summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorVitaly Buka <vitalybuka@google.com>2024-09-18 16:47:13 -0700
committerVitaly Buka <vitalybuka@google.com>2024-09-18 16:47:13 -0700
commit7f1bd09ed8fa47d24ed69d3c1f97afc7535d4615 (patch)
treeb9e7e3aa2fe9fbc0a11d10aaa86f43ea6b94128c
parentf080291d3a56d5d6767af925782d5a30c52c93ae (diff)
parent999313debe8a87760b128e4469f17ec0ce1a4a8f (diff)
Created using spr 1.3.4
-rw-r--r--bolt/test/perf2bolt/lit.local.cfg5
-rw-r--r--clang-tools-extra/CODE_OWNERS.TXT2
-rw-r--r--clang-tools-extra/clang-tidy/ClangTidyDiagnosticConsumer.cpp2
-rw-r--r--clang-tools-extra/clang-tidy/modernize/AvoidCArraysCheck.cpp27
-rw-r--r--clang-tools-extra/clang-tidy/modernize/LoopConvertCheck.cpp6
-rw-r--r--clang-tools-extra/clang-tidy/readability/ContainerContainsCheck.cpp42
-rw-r--r--clang-tools-extra/clang-tidy/readability/ContainerContainsCheck.h12
-rw-r--r--clang-tools-extra/clangd/unittests/ConfigCompileTests.cpp12
-rw-r--r--clang-tools-extra/docs/ReleaseNotes.rst12
-rw-r--r--clang-tools-extra/docs/clang-rename.rst168
-rw-r--r--clang-tools-extra/docs/clang-tidy/checks/modernize/avoid-c-arrays.rst3
-rw-r--r--clang-tools-extra/docs/clang-tidy/checks/readability/container-contains.rst38
-rw-r--r--clang-tools-extra/docs/index.rst1
-rw-r--r--clang-tools-extra/test/CMakeLists.txt3
-rw-r--r--clang-tools-extra/test/clang-apply-replacements/ClangRenameClassReplacements.cpp11
-rw-r--r--clang-tools-extra/test/clang-tidy/checkers/modernize/avoid-c-arrays-c++20.cpp11
-rw-r--r--clang-tools-extra/test/clang-tidy/checkers/modernize/avoid-c-arrays-ignores-main.cpp6
-rw-r--r--clang-tools-extra/test/clang-tidy/checkers/modernize/avoid-c-arrays-ignores-strings.cpp4
-rw-r--r--clang-tools-extra/test/clang-tidy/checkers/modernize/avoid-c-arrays-ignores-three-arg-main.cpp10
-rw-r--r--clang-tools-extra/test/clang-tidy/checkers/modernize/avoid-c-arrays.cpp4
-rw-r--r--clang-tools-extra/test/clang-tidy/checkers/performance/unnecessary-value-param-crash.cpp23
-rw-r--r--clang-tools-extra/test/clang-tidy/checkers/readability/container-contains.cpp187
-rw-r--r--clang/docs/ClangFormattedStatus.rst5
-rw-r--r--clang/docs/LanguageExtensions.rst1
-rw-r--r--clang/docs/ReleaseNotes.rst13
-rw-r--r--clang/docs/tools/clang-formatted-files.txt1
-rw-r--r--clang/include/clang/Analysis/Analyses/ExprMutationAnalyzer.h17
-rw-r--r--clang/include/clang/Basic/Attr.td8
-rw-r--r--clang/include/clang/Basic/AttrDocs.td83
-rw-r--r--clang/include/clang/Basic/Builtins.td6
-rw-r--r--clang/include/clang/Basic/Diagnostic.h270
-rw-r--r--clang/include/clang/Basic/DiagnosticIDs.h7
-rw-r--r--clang/include/clang/Basic/DiagnosticParseKinds.td9
-rw-r--r--clang/include/clang/Basic/DiagnosticSemaKinds.td7
-rw-r--r--clang/include/clang/Basic/PartialDiagnostic.h5
-rw-r--r--clang/include/clang/Sema/Sema.h6
-rw-r--r--clang/lib/AST/ASTContext.cpp3
-rw-r--r--clang/lib/Basic/Diagnostic.cpp85
-rw-r--r--clang/lib/Basic/DiagnosticIDs.cpp21
-rw-r--r--clang/lib/Basic/SourceManager.cpp23
-rw-r--r--clang/lib/CodeGen/CGBuiltin.cpp3
-rw-r--r--clang/lib/CodeGen/CGHLSLRuntime.cpp7
-rw-r--r--clang/lib/Driver/ToolChains/AMDGPU.cpp4
-rw-r--r--clang/lib/Driver/ToolChains/Clang.cpp19
-rw-r--r--clang/lib/Driver/ToolChains/CommonArgs.cpp16
-rw-r--r--clang/lib/Driver/ToolChains/CommonArgs.h3
-rw-r--r--clang/lib/Driver/ToolChains/Cuda.cpp2
-rw-r--r--clang/lib/Format/FormatTokenLexer.cpp7
-rw-r--r--clang/lib/Format/TokenAnnotator.cpp54
-rw-r--r--clang/lib/Frontend/Rewrite/FixItRewriter.cpp4
-rw-r--r--clang/lib/Frontend/TextDiagnosticPrinter.cpp2
-rw-r--r--clang/lib/Headers/CMakeLists.txt1
-rw-r--r--clang/lib/Headers/avx10_2copyintrin.h34
-rw-r--r--clang/lib/Headers/hlsl/hlsl_intrinsics.h71
-rw-r--r--clang/lib/Headers/immintrin.h1
-rw-r--r--clang/lib/Parse/ParseDecl.cpp8
-rw-r--r--clang/lib/Sema/Sema.cpp19
-rw-r--r--clang/lib/Sema/SemaBase.cpp2
-rw-r--r--clang/lib/Sema/SemaCast.cpp7
-rw-r--r--clang/lib/Sema/SemaChecking.cpp2
-rw-r--r--clang/lib/Sema/SemaCoroutine.cpp40
-rw-r--r--clang/lib/Sema/SemaDecl.cpp2
-rw-r--r--clang/lib/Sema/SemaExpr.cpp4
-rw-r--r--clang/lib/Sema/SemaHLSL.cpp310
-rw-r--r--clang/lib/Sema/SemaOpenMP.cpp211
-rw-r--r--clang/lib/Sema/SemaTemplate.cpp55
-rw-r--r--clang/lib/Sema/SemaTemplateDeduction.cpp7
-rw-r--r--clang/lib/Sema/SemaTemplateInstantiate.cpp4
-rw-r--r--clang/lib/Sema/SemaTemplateInstantiateDecl.cpp10
-rw-r--r--clang/lib/Serialization/ASTReader.cpp15
-rw-r--r--clang/lib/StaticAnalyzer/Checkers/StreamChecker.cpp50
-rw-r--r--clang/lib/StaticAnalyzer/Checkers/WebKit/ASTUtils.cpp15
-rw-r--r--clang/lib/StaticAnalyzer/Checkers/WebKit/PtrTypesSemantics.cpp5
-rw-r--r--clang/lib/StaticAnalyzer/Checkers/WebKit/PtrTypesSemantics.h4
-rw-r--r--clang/lib/StaticAnalyzer/Checkers/WebKit/RefCntblBaseVirtualDtorChecker.cpp21
-rw-r--r--clang/test/Analysis/Checkers/WebKit/ref-cntbl-crtp-base-no-virtual-dtor.cpp18
-rw-r--r--clang/test/Analysis/Checkers/WebKit/uncounted-obj-arg.mm17
-rw-r--r--clang/test/Analysis/stream-error.c22
-rw-r--r--clang/test/Analysis/stream-note.c9
-rw-r--r--clang/test/Analysis/stream.c10
-rw-r--r--clang/test/C/C23/n3030.c93
-rw-r--r--clang/test/CMakeLists.txt1
-rw-r--r--clang/test/CodeGen/X86/avx512copy-builtins.c17
-rw-r--r--clang/test/CodeGen/builtins-elementwise-math.c37
-rw-r--r--clang/test/CodeGenCoroutines/coro-await-elidable.cpp40
-rw-r--r--clang/test/CodeGenHLSL/builtins/countbits.hlsl80
-rw-r--r--clang/test/CodeGenHLSL/wavesize.hlsl34
-rw-r--r--clang/test/Driver/openmp-offload-gpu.c2
-rw-r--r--clang/test/Misc/pragma-attribute-supported-attributes-list.test1
-rw-r--r--clang/test/Modules/pr108732.cppm14
-rw-r--r--clang/test/PCH/race-condition.cpp41
-rw-r--r--clang/test/Parser/cxx-bad-cast-diagnose-broken-template.cpp26
-rw-r--r--clang/test/Sema/builtins-elementwise-math.c33
-rw-r--r--clang/test/Sema/countbits-errors.hlsl28
-rw-r--r--clang/test/Sema/fixed-enum.c25
-rw-r--r--clang/test/SemaCXX/builtins-elementwise-math.cpp8
-rw-r--r--clang/test/SemaCXX/cxx2a-consteval.cpp24
-rw-r--r--clang/test/SemaHLSL/BuiltIns/countbits-errors.hlsl21
-rw-r--r--clang/test/SemaHLSL/resource_binding_attr_error.hlsl17
-rw-r--r--clang/test/SemaHLSL/resource_binding_attr_error_basic.hlsl21
-rw-r--r--clang/test/SemaHLSL/resource_binding_attr_error_space.hlsl62
-rw-r--r--clang/test/SemaTemplate/GH18291.cpp9
-rw-r--r--clang/test/SemaTemplate/concepts-out-of-line-def.cpp44
-rw-r--r--clang/test/clang-rename/ClassAsTemplateArgument.cpp21
-rw-r--r--clang/test/clang-rename/ClassFindByName.cpp10
-rw-r--r--clang/test/clang-rename/ClassSimpleRenaming.cpp14
-rw-r--r--clang/test/clang-rename/ClassTestMulti.cpp11
-rw-r--r--clang/test/clang-rename/ClassTestMultiByName.cpp8
-rw-r--r--clang/test/clang-rename/ComplexFunctionOverride.cpp47
-rw-r--r--clang/test/clang-rename/ComplicatedClassType.cpp63
-rw-r--r--clang/test/clang-rename/Ctor.cpp14
-rw-r--r--clang/test/clang-rename/CtorInitializer.cpp17
-rw-r--r--clang/test/clang-rename/DeclRefExpr.cpp24
-rw-r--r--clang/test/clang-rename/ForceMulti.cpp8
-rw-r--r--clang/test/clang-rename/ForwardClassDecl.cpp4
-rw-r--r--clang/test/clang-rename/FunctionMacro.cpp20
-rw-r--r--clang/test/clang-rename/FunctionOverride.cpp13
-rw-r--r--clang/test/clang-rename/FunctionTemplate.cpp19
-rw-r--r--clang/test/clang-rename/FunctionWithClassFindByName.cpp12
-rw-r--r--clang/test/clang-rename/IncludeHeaderWithSymbol.cpp10
-rw-r--r--clang/test/clang-rename/Inputs/HeaderWithSymbol.h1
-rw-r--r--clang/test/clang-rename/Inputs/OffsetToNewName.yaml6
-rw-r--r--clang/test/clang-rename/Inputs/QualifiedNameToNewName.yaml6
-rw-r--r--clang/test/clang-rename/InvalidNewName.cpp2
-rw-r--r--clang/test/clang-rename/InvalidOffset.cpp9
-rw-r--r--clang/test/clang-rename/InvalidQualifiedName.cpp4
-rw-r--r--clang/test/clang-rename/MemberExprMacro.cpp22
-rw-r--r--clang/test/clang-rename/Namespace.cpp13
-rw-r--r--clang/test/clang-rename/NoNewName.cpp4
-rw-r--r--clang/test/clang-rename/NonExistFile.cpp2
-rw-r--r--clang/test/clang-rename/TemplateClassInstantiation.cpp42
-rw-r--r--clang/test/clang-rename/TemplateCtor.cpp10
-rw-r--r--clang/test/clang-rename/TemplateTypename.cpp24
-rw-r--r--clang/test/clang-rename/TemplatedClassFunction.cpp27
-rw-r--r--clang/test/clang-rename/Typedef.cpp8
-rw-r--r--clang/test/clang-rename/UserDefinedConversion.cpp26
-rw-r--r--clang/test/clang-rename/Variable.cpp33
-rw-r--r--clang/test/clang-rename/VariableMacro.cpp21
-rw-r--r--clang/test/clang-rename/VariableTemplate.cpp32
-rw-r--r--clang/test/clang-rename/YAMLInput.cpp10
-rw-r--r--clang/test/utils/update-verify-tests/Inputs/duplicate-diag.c8
-rw-r--r--clang/test/utils/update-verify-tests/Inputs/duplicate-diag.c.expected8
-rw-r--r--clang/test/utils/update-verify-tests/Inputs/infer-indentation.c8
-rw-r--r--clang/test/utils/update-verify-tests/Inputs/infer-indentation.c.expected11
-rw-r--r--clang/test/utils/update-verify-tests/Inputs/leave-existing-diags.c11
-rw-r--r--clang/test/utils/update-verify-tests/Inputs/leave-existing-diags.c.expected12
-rw-r--r--clang/test/utils/update-verify-tests/Inputs/multiple-errors.c6
-rw-r--r--clang/test/utils/update-verify-tests/Inputs/multiple-errors.c.expected9
-rw-r--r--clang/test/utils/update-verify-tests/Inputs/multiple-missing-errors-same-line.c8
-rw-r--r--clang/test/utils/update-verify-tests/Inputs/multiple-missing-errors-same-line.c.expected13
-rw-r--r--clang/test/utils/update-verify-tests/Inputs/no-checks.c3
-rw-r--r--clang/test/utils/update-verify-tests/Inputs/no-checks.c.expected4
-rw-r--r--clang/test/utils/update-verify-tests/Inputs/no-diags.c5
-rw-r--r--clang/test/utils/update-verify-tests/Inputs/no-diags.c.expected5
-rw-r--r--clang/test/utils/update-verify-tests/Inputs/no-expected-diags.c4
-rw-r--r--clang/test/utils/update-verify-tests/Inputs/no-expected-diags.c.expected4
-rw-r--r--clang/test/utils/update-verify-tests/Inputs/non-default-prefix.c5
-rw-r--r--clang/test/utils/update-verify-tests/Inputs/non-default-prefix.c.expected5
-rw-r--r--clang/test/utils/update-verify-tests/Inputs/update-same-line.c4
-rw-r--r--clang/test/utils/update-verify-tests/Inputs/update-same-line.c.expected4
-rw-r--r--clang/test/utils/update-verify-tests/Inputs/update-single-check.c4
-rw-r--r--clang/test/utils/update-verify-tests/Inputs/update-single-check.c.expected4
-rw-r--r--clang/test/utils/update-verify-tests/duplicate-diag.test4
-rw-r--r--clang/test/utils/update-verify-tests/infer-indentation.test3
-rw-r--r--clang/test/utils/update-verify-tests/leave-existing-diags.test4
-rw-r--r--clang/test/utils/update-verify-tests/lit.local.cfg28
-rw-r--r--clang/test/utils/update-verify-tests/multiple-errors.test3
-rw-r--r--clang/test/utils/update-verify-tests/multiple-missing-errors-same-line.test3
-rw-r--r--clang/test/utils/update-verify-tests/no-checks.test3
-rw-r--r--clang/test/utils/update-verify-tests/no-diags.test4
-rw-r--r--clang/test/utils/update-verify-tests/no-expected-diags.test4
-rw-r--r--clang/test/utils/update-verify-tests/non-default-prefix.test4
-rw-r--r--clang/test/utils/update-verify-tests/update-same-line.test4
-rw-r--r--clang/test/utils/update-verify-tests/update-single-check.test3
-rw-r--r--clang/tools/CMakeLists.txt1
-rw-r--r--clang/tools/clang-rename/CMakeLists.txt26
-rw-r--r--clang/tools/clang-rename/ClangRename.cpp242
-rw-r--r--clang/tools/clang-rename/clang-rename.el80
-rw-r--r--clang/tools/clang-rename/clang-rename.py70
-rw-r--r--clang/unittests/Basic/DiagnosticTest.cpp19
-rw-r--r--clang/unittests/CMakeLists.txt1
-rw-r--r--clang/unittests/Driver/DXCModeTest.cpp5
-rw-r--r--clang/unittests/Format/FormatTest.cpp12
-rw-r--r--clang/unittests/Format/FormatTestBase.h14
-rw-r--r--clang/unittests/Format/FormatTestProto.cpp2
-rw-r--r--clang/unittests/Format/FormatTestTextProto.cpp90
-rw-r--r--clang/unittests/Format/TokenAnnotatorTest.cpp8
-rw-r--r--clang/unittests/Rename/CMakeLists.txt29
-rw-r--r--clang/unittests/Rename/ClangRenameTest.h116
-rw-r--r--clang/unittests/Rename/RenameAliasTest.cpp303
-rw-r--r--clang/unittests/Rename/RenameClassTest.cpp820
-rw-r--r--clang/unittests/Rename/RenameEnumTest.cpp189
-rw-r--r--clang/unittests/Rename/RenameFunctionTest.cpp573
-rw-r--r--clang/unittests/Rename/RenameMemberTest.cpp228
-rw-r--r--clang/utils/UpdateVerifyTests/core.py452
-rw-r--r--clang/utils/update-verify-tests.py38
-rw-r--r--clang/www/c_status.html2
-rw-r--r--compiler-rt/lib/asan/asan_rtl.cpp3
-rw-r--r--compiler-rt/lib/dfsan/dfsan.cpp2
-rw-r--r--compiler-rt/lib/hwasan/hwasan.cpp4
-rw-r--r--compiler-rt/lib/interception/interception_win.cpp26
-rw-r--r--compiler-rt/lib/lsan/lsan.cpp2
-rw-r--r--compiler-rt/lib/memprof/memprof_rtl.cpp3
-rw-r--r--compiler-rt/lib/msan/msan.cpp3
-rw-r--r--compiler-rt/lib/rtsan/CMakeLists.txt2
-rw-r--r--compiler-rt/lib/rtsan/rtsan_context.cpp2
-rw-r--r--compiler-rt/lib/rtsan/rtsan_flags.cpp1
-rw-r--r--compiler-rt/lib/rtsan/tests/rtsan_test_main.cpp17
-rw-r--r--compiler-rt/lib/rtsan/tests/rtsan_test_utilities.h4
-rw-r--r--compiler-rt/lib/sanitizer_common/sanitizer_common_nolibc.cpp1
-rw-r--r--compiler-rt/lib/sanitizer_common/sanitizer_fuchsia.cpp1
-rw-r--r--compiler-rt/lib/sanitizer_common/sanitizer_linux.cpp4
-rw-r--r--compiler-rt/lib/sanitizer_common/sanitizer_linux_libcdep.cpp73
-rw-r--r--compiler-rt/lib/sanitizer_common/sanitizer_mac.cpp3
-rw-r--r--compiler-rt/lib/sanitizer_common/sanitizer_tls_get_addr.cpp9
-rw-r--r--compiler-rt/lib/sanitizer_common/sanitizer_win.cpp3
-rw-r--r--compiler-rt/lib/sanitizer_common/tests/sanitizer_linux_test.cpp2
-rw-r--r--compiler-rt/lib/tsan/rtl/tsan_platform_linux.cpp1
-rw-r--r--compiler-rt/lib/ubsan/ubsan_init.cpp2
-rw-r--r--compiler-rt/test/rtsan/basic.cpp1
-rw-r--r--compiler-rt/test/rtsan/lit.cfg.py16
-rw-r--r--compiler-rt/test/rtsan/sanity_check_pure_c.c28
-rw-r--r--flang/include/flang/Evaluate/traverse.h2
-rw-r--r--flang/include/flang/Semantics/type.h9
-rw-r--r--flang/lib/Evaluate/characteristics.cpp5
-rw-r--r--flang/lib/Frontend/FrontendActions.cpp9
-rw-r--r--flang/lib/Frontend/TextDiagnosticPrinter.cpp2
-rw-r--r--flang/lib/Optimizer/Dialect/FIRType.cpp5
-rw-r--r--flang/lib/Optimizer/Transforms/DebugTypeGenerator.cpp27
-rw-r--r--flang/lib/Parser/parsing.cpp4
-rw-r--r--flang/lib/Semantics/check-declarations.cpp2
-rw-r--r--flang/lib/Semantics/resolve-names.cpp49
-rw-r--r--flang/lib/Semantics/type.cpp11
-rw-r--r--flang/runtime/io-api.cpp2
-rw-r--r--flang/runtime/io-stmt.cpp7
-rw-r--r--flang/test/Semantics/OpenMP/bad_module_subroutine.f906
-rw-r--r--flang/test/Semantics/get_team.f902
-rw-r--r--flang/test/Semantics/implicit16.f9012
-rw-r--r--flang/test/Semantics/modfile68.f9042
-rw-r--r--flang/test/Semantics/modproc01.f908
-rw-r--r--flang/test/Transforms/debug-107988.fir23
-rw-r--r--libc/cmake/modules/prepare_libc_gpu_build.cmake4
-rw-r--r--libcxx/cmake/caches/AMDGPU.cmake2
-rw-r--r--libcxx/cmake/caches/NVPTX.cmake2
-rw-r--r--libcxx/include/__algorithm/mismatch.h8
-rw-r--r--libcxx/include/__algorithm/simd_utils.h77
-rw-r--r--lld/COFF/InputFiles.h4
-rw-r--r--lld/COFF/MapFile.cpp3
-rw-r--r--lld/COFF/SymbolTable.cpp14
-rw-r--r--lld/COFF/SymbolTable.h4
-rw-r--r--lld/ELF/Driver.cpp553
-rw-r--r--lld/test/COFF/import_weak_alias.test20
-rw-r--r--lld/test/wasm/unsupported-pic-relocations.s39
-rw-r--r--lld/test/wasm/unsupported-pic-relocations64.s39
-rw-r--r--lld/wasm/Relocations.cpp16
-rw-r--r--lldb/include/lldb/Utility/Scalar.h10
-rw-r--r--lldb/include/lldb/Utility/Status.h85
-rw-r--r--lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationServer.cpp7
-rw-r--r--lldb/source/Plugins/ScriptInterpreter/Python/PythonDataObjects.cpp31
-rw-r--r--lldb/source/Utility/Scalar.cpp49
-rw-r--r--lldb/source/Utility/Status.cpp261
-rw-r--r--lldb/test/API/lang/cpp/fpnan/Makefile3
-rw-r--r--lldb/test/API/lang/cpp/fpnan/TestFPNaN.py130
-rw-r--r--lldb/test/API/lang/cpp/fpnan/main.cpp8
-rw-r--r--lldb/test/API/tools/lldb-dap/memory/TestDAP_memory.py8
-rw-r--r--lldb/tools/lldb-dap/package-lock.json4
-rw-r--r--lldb/tools/lldb-dap/package.json11
-rw-r--r--lldb/tools/lldb-dap/src-ts/extension.ts11
-rw-r--r--lldb/unittests/Process/gdb-remote/GDBRemoteCommunicationServerTest.cpp3
-rw-r--r--lldb/unittests/TestingSupport/Host/SocketTestUtilities.cpp23
-rw-r--r--lldb/unittests/Utility/StatusTest.cpp8
-rw-r--r--llvm/cmake/config-ix.cmake2
-rw-r--r--llvm/docs/LangRef.rst34
-rw-r--r--llvm/include/llvm/Analysis/TargetLibraryInfo.def15
-rw-r--r--llvm/include/llvm/CodeGen/SelectionDAG.h10
-rw-r--r--llvm/include/llvm/CodeGen/SwitchLoweringUtils.h8
-rw-r--r--llvm/include/llvm/CodeGen/VirtRegMap.h8
-rw-r--r--llvm/include/llvm/Config/config.h.cmake6
-rw-r--r--llvm/include/llvm/Demangle/MicrosoftDemangleNodes.h2
-rw-r--r--llvm/include/llvm/MC/MCParser/MCTargetAsmParser.h14
-rw-r--r--llvm/include/llvm/SandboxIR/SandboxIR.h36
-rw-r--r--llvm/include/llvm/Support/GlobPattern.h9
-rw-r--r--llvm/include/llvm/Support/raw_ostream.h19
-rw-r--r--llvm/include/llvm/TableGen/Record.h10
-rw-r--r--llvm/include/llvm/Transforms/IPO/FunctionImport.h12
-rw-r--r--llvm/include/llvm/Transforms/Utils/SimplifyLibCalls.h1
-rw-r--r--llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/Legality.h62
-rw-r--r--llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/Passes/BottomUpVec.h7
-rw-r--r--llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/Region.h104
-rw-r--r--llvm/lib/Analysis/Loads.cpp34
-rw-r--r--llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp2
-rw-r--r--llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp6
-rw-r--r--llvm/lib/CodeGen/MachineVerifier.cpp2
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp5
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp8
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp43
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h10
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp4
-rw-r--r--llvm/lib/CodeGen/SwitchLoweringUtils.cpp4
-rw-r--r--llvm/lib/Demangle/MicrosoftDemangle.cpp4
-rw-r--r--llvm/lib/Demangle/MicrosoftDemangleNodes.cpp2
-rw-r--r--llvm/lib/MC/MCParser/AsmParser.cpp6
-rw-r--r--llvm/lib/MC/MCParser/MasmParser.cpp6
-rw-r--r--llvm/lib/SandboxIR/SandboxIR.cpp29
-rw-r--r--llvm/lib/TableGen/Record.cpp22
-rw-r--r--llvm/lib/Target/AArch64/AArch64SchedNeoverseN2.td9
-rw-r--r--llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp15
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp47
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp6
-rw-r--r--llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp14
-rw-r--r--llvm/lib/Target/AMDGPU/SIISelLowering.cpp113
-rw-r--r--llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp6
-rw-r--r--llvm/lib/Target/AMDGPU/SIInstrInfo.cpp5
-rw-r--r--llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp22
-rw-r--r--llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp12
-rw-r--r--llvm/lib/Target/AVR/AsmParser/AVRAsmParser.cpp8
-rw-r--r--llvm/lib/Target/BPF/AsmParser/BPFAsmParser.cpp11
-rw-r--r--llvm/lib/Target/CSKY/AsmParser/CSKYAsmParser.cpp8
-rw-r--r--llvm/lib/Target/DirectX/DXIL.td11
-rw-r--r--llvm/lib/Target/Hexagon/AsmParser/HexagonAsmParser.cpp10
-rw-r--r--llvm/lib/Target/Lanai/AsmParser/LanaiAsmParser.cpp8
-rw-r--r--llvm/lib/Target/LoongArch/AsmParser/LoongArchAsmParser.cpp10
-rw-r--r--llvm/lib/Target/M68k/AsmParser/M68kAsmParser.cpp8
-rw-r--r--llvm/lib/Target/MSP430/AsmParser/MSP430AsmParser.cpp8
-rw-r--r--llvm/lib/Target/Mips/AsmParser/MipsAsmParser.cpp10
-rw-r--r--llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp1
-rw-r--r--llvm/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp123
-rw-r--r--llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp14
-rw-r--r--llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp66
-rw-r--r--llvm/lib/Target/RISCV/RISCVISelLowering.cpp238
-rw-r--r--llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.cpp2
-rw-r--r--llvm/lib/Target/Sparc/AsmParser/SparcAsmParser.cpp10
-rw-r--r--llvm/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp8
-rw-r--r--llvm/lib/Target/VE/AsmParser/VEAsmParser.cpp8
-rw-r--r--llvm/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmParser.cpp4
-rw-r--r--llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyInstPrinter.cpp45
-rw-r--r--llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCCodeEmitter.cpp70
-rw-r--r--llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp12
-rw-r--r--llvm/lib/Target/X86/MCTargetDesc/X86InstComments.cpp20
-rw-r--r--llvm/lib/Target/X86/X86DomainReassignment.cpp57
-rw-r--r--llvm/lib/Target/X86/X86FixupVectorConstants.cpp8
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp46
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.h4
-rw-r--r--llvm/lib/Target/X86/X86InstrAVX10.td110
-rw-r--r--llvm/lib/Target/X86/X86InstrAVX512.td124
-rw-r--r--llvm/lib/Target/X86/X86InstrFragmentsSIMD.td3
-rw-r--r--llvm/lib/Target/X86/X86InstrInfo.cpp8
-rw-r--r--llvm/lib/Target/X86/X86MCInstLower.cpp20
-rw-r--r--llvm/lib/Target/X86/X86SchedIceLake.td20
-rw-r--r--llvm/lib/Target/X86/X86SchedSapphireRapids.td14
-rw-r--r--llvm/lib/Target/X86/X86SchedSkylakeServer.td20
-rw-r--r--llvm/lib/Target/Xtensa/AsmParser/XtensaAsmParser.cpp8
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp5
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp22
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineInternal.h5
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp64
-rw-r--r--llvm/lib/Transforms/Instrumentation/PGOCtxProfFlattening.cpp108
-rw-r--r--llvm/lib/Transforms/Utils/BuildLibCalls.cpp12
-rw-r--r--llvm/lib/Transforms/Utils/SimplifyCFG.cpp10
-rw-r--r--llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp33
-rw-r--r--llvm/lib/Transforms/Vectorize/CMakeLists.txt1
-rw-r--r--llvm/lib/Transforms/Vectorize/LoopVectorize.cpp26
-rw-r--r--llvm/lib/Transforms/Vectorize/SandboxVectorizer/Passes/BottomUpVec.cpp53
-rw-r--r--llvm/lib/Transforms/Vectorize/SandboxVectorizer/Region.cpp45
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlan.cpp11
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlan.h9
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp1
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp11
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp7
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlanUtils.cpp12
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlanUtils.h9
-rw-r--r--llvm/test/Analysis/CostModel/RISCV/fround.ll16
-rw-r--r--llvm/test/Analysis/CtxProfAnalysis/flatten-zero-path.ll55
-rw-r--r--llvm/test/CodeGen/AMDGPU/insert_vector_elt.v2bf16.ll10
-rw-r--r--llvm/test/CodeGen/AMDGPU/phi-vgpr-input-moveimm.mir4
-rw-r--r--llvm/test/CodeGen/DirectX/countbits.ll47
-rw-r--r--llvm/test/CodeGen/NVPTX/vector-loads.ll9
-rw-r--r--llvm/test/CodeGen/RISCV/GlobalISel/double-intrinsics.ll264
-rw-r--r--llvm/test/CodeGen/RISCV/GlobalISel/float-intrinsics.ll441
-rw-r--r--llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-fp-ceil-floor.mir98
-rw-r--r--llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-load-rv32.mir84
-rw-r--r--llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-load-rv64.mir114
-rw-r--r--llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-store-rv32.mir72
-rw-r--r--llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-store-rv64.mir90
-rw-r--r--llvm/test/CodeGen/RISCV/rvv/ceil-vp.ll536
-rw-r--r--llvm/test/CodeGen/RISCV/rvv/fceil-sdnode.ll460
-rw-r--r--llvm/test/CodeGen/RISCV/rvv/ffloor-sdnode.ll466
-rw-r--r--llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll943
-rw-r--r--llvm/test/CodeGen/RISCV/rvv/floor-vp.ll536
-rw-r--r--llvm/test/CodeGen/RISCV/rvv/fmaximum-sdnode.ll195
-rw-r--r--llvm/test/CodeGen/RISCV/rvv/fmaximum-vp.ll562
-rw-r--r--llvm/test/CodeGen/RISCV/rvv/fminimum-sdnode.ll195
-rw-r--r--llvm/test/CodeGen/RISCV/rvv/fminimum-vp.ll562
-rw-r--r--llvm/test/CodeGen/RISCV/rvv/fnearbyint-sdnode.ll488
-rw-r--r--llvm/test/CodeGen/RISCV/rvv/frint-sdnode.ll424
-rw-r--r--llvm/test/CodeGen/RISCV/rvv/fround-sdnode.ll460
-rw-r--r--llvm/test/CodeGen/RISCV/rvv/froundeven-sdnode.ll461
-rw-r--r--llvm/test/CodeGen/RISCV/rvv/ftrunc-sdnode.ll424
-rw-r--r--llvm/test/CodeGen/RISCV/rvv/named-vector-shuffle-reverse.ll84
-rw-r--r--llvm/test/CodeGen/RISCV/rvv/nearbyint-vp.ll530
-rw-r--r--llvm/test/CodeGen/RISCV/rvv/rint-vp.ll507
-rw-r--r--llvm/test/CodeGen/RISCV/rvv/round-vp.ll538
-rw-r--r--llvm/test/CodeGen/RISCV/rvv/roundeven-vp.ll538
-rw-r--r--llvm/test/CodeGen/RISCV/rvv/roundtozero-vp.ll538
-rw-r--r--llvm/test/CodeGen/RISCV/rvv/setcc-fp-vp.ll1913
-rw-r--r--llvm/test/CodeGen/RISCV/rvv/setcc-fp.ll1162
-rw-r--r--llvm/test/CodeGen/RISCV/rvv/vfadd-constrained-sdnode.ll243
-rw-r--r--llvm/test/CodeGen/RISCV/rvv/vfadd-sdnode.ll256
-rw-r--r--llvm/test/CodeGen/RISCV/rvv/vfadd-vp.ll681
-rw-r--r--llvm/test/CodeGen/RISCV/rvv/vfclass-sdnode.ll55
-rw-r--r--llvm/test/CodeGen/RISCV/rvv/vfdiv-constrained-sdnode.ll262
-rw-r--r--llvm/test/CodeGen/RISCV/rvv/vfdiv-sdnode.ll244
-rw-r--r--llvm/test/CodeGen/RISCV/rvv/vfdiv-vp.ll643
-rw-r--r--llvm/test/CodeGen/RISCV/rvv/vfma-vp.ll2449
-rw-r--r--llvm/test/CodeGen/RISCV/rvv/vfmadd-constrained-sdnode.ll402
-rw-r--r--llvm/test/CodeGen/RISCV/rvv/vfmadd-sdnode.ll565
-rw-r--r--llvm/test/CodeGen/RISCV/rvv/vfmax-sdnode.ll247
-rw-r--r--llvm/test/CodeGen/RISCV/rvv/vfmax-vp.ll291
-rw-r--r--llvm/test/CodeGen/RISCV/rvv/vfmin-sdnode.ll247
-rw-r--r--llvm/test/CodeGen/RISCV/rvv/vfmin-vp.ll291
-rw-r--r--llvm/test/CodeGen/RISCV/rvv/vfmul-constrained-sdnode.ll243
-rw-r--r--llvm/test/CodeGen/RISCV/rvv/vfmul-sdnode.ll256
-rw-r--r--llvm/test/CodeGen/RISCV/rvv/vfsqrt-constrained-sdnode.ll114
-rw-r--r--llvm/test/CodeGen/RISCV/rvv/vfsqrt-sdnode.ll109
-rw-r--r--llvm/test/CodeGen/RISCV/rvv/vfsqrt-vp.ll257
-rw-r--r--llvm/test/CodeGen/RISCV/rvv/vfsub-constrained-sdnode.ll262
-rw-r--r--llvm/test/CodeGen/RISCV/rvv/vfsub-sdnode.ll256
-rw-r--r--llvm/test/CodeGen/RISCV/rvv/vfsub-vp.ll643
-rw-r--r--llvm/test/CodeGen/RISCV/rvv/vreductions-fp-vp.ll340
-rw-r--r--llvm/test/CodeGen/SPIRV/hlsl-intrinsics/countbits.ll21
-rw-r--r--llvm/test/CodeGen/WebAssembly/exception.ll1
-rw-r--r--llvm/test/CodeGen/X86/avx512copy-intrinsics.ll35
-rw-r--r--llvm/test/CodeGen/X86/comi-flags.ll237
-rw-r--r--llvm/test/CodeGen/X86/vector-interleaved-load-i32-stride-7.ll40
-rw-r--r--llvm/test/CodeGen/X86/vector-interleaved-load-i32-stride-8.ll16
-rw-r--r--llvm/test/Demangle/ms-placeholder-return-type.test18
-rw-r--r--llvm/test/MC/Disassembler/X86/avx10.2-com-ef-32.txt195
-rw-r--r--llvm/test/MC/Disassembler/X86/avx10.2-com-ef-64.txt195
-rw-r--r--llvm/test/MC/Disassembler/X86/avx10.2-copy-32.txt34
-rw-r--r--llvm/test/MC/Disassembler/X86/avx10.2-copy-64.txt34
-rw-r--r--llvm/test/MC/WebAssembly/annotations.s35
-rw-r--r--llvm/test/MC/WebAssembly/eh-assembly.s2
-rw-r--r--llvm/test/MC/X86/avx10.2-com-ef-32-att.s194
-rw-r--r--llvm/test/MC/X86/avx10.2-com-ef-32-intel.s194
-rw-r--r--llvm/test/MC/X86/avx10.2-com-ef-64-att.s194
-rw-r--r--llvm/test/MC/X86/avx10.2-com-ef-64-intel.s194
-rw-r--r--llvm/test/MC/X86/avx10.2-copy-32-att.s82
-rw-r--r--llvm/test/MC/X86/avx10.2-copy-32-intel.s81
-rw-r--r--llvm/test/MC/X86/avx10.2-copy-64-att.s97
-rw-r--r--llvm/test/MC/X86/avx10.2-copy-64-intel.s97
-rw-r--r--llvm/test/MachineVerifier/test_g_insert_subvector.mir3
-rw-r--r--llvm/test/TableGen/x86-fold-tables.inc8
-rw-r--r--llvm/test/ThinLTO/X86/ctxprof.ll11
-rw-r--r--llvm/test/Transforms/InferFunctionAttrs/annotate.ll9
-rw-r--r--llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll79
-rw-r--r--llvm/test/Transforms/InstCombine/bitcast.ll23
-rw-r--r--llvm/test/Transforms/InstCombine/fmod.ll30
-rw-r--r--llvm/test/Transforms/InstCombine/icmp-shl-nuw.ll106
-rw-r--r--llvm/test/Transforms/InstCombine/phi.ll51
-rw-r--r--llvm/test/Transforms/LICM/hoist-deref-load.ll81
-rw-r--r--llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-gep.ll72
-rw-r--r--llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-phi.ll16
-rw-r--r--llvm/test/Transforms/LoopVectorize/RISCV/strided-accesses.ll20
-rw-r--r--llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-intrinsics.ll2
-rw-r--r--llvm/test/Transforms/LoopVectorize/pointer-induction.ll32
-rw-r--r--llvm/test/Transforms/LoopVectorize/scev-predicate-reasoning.ll6
-rw-r--r--llvm/test/Transforms/SimplifyCFG/UnreachableEliminate.ll274
-rw-r--r--llvm/test/Transforms/SimplifyCFG/speculate-derefable-load.ll198
-rw-r--r--llvm/test/tools/llvm-mca/AArch64/Neoverse/N2-writeback.s50
-rw-r--r--llvm/test/tools/llvm-tli-checker/ps4-tli-check.yaml20
-rw-r--r--llvm/unittests/Analysis/TargetLibraryInfoTest.cpp3
-rw-r--r--llvm/unittests/SandboxIR/PassTest.cpp2
-rw-r--r--llvm/unittests/SandboxIR/SandboxIRTest.cpp70
-rw-r--r--llvm/unittests/SandboxIR/TrackerTest.cpp31
-rw-r--r--llvm/unittests/Support/raw_ostream_test.cpp13
-rw-r--r--llvm/unittests/TargetParser/Host.cpp12
-rw-r--r--llvm/unittests/Transforms/Vectorize/SandboxVectorizer/CMakeLists.txt2
-rw-r--r--llvm/unittests/Transforms/Vectorize/SandboxVectorizer/LegalityTest.cpp56
-rw-r--r--llvm/unittests/Transforms/Vectorize/SandboxVectorizer/RegionTest.cpp81
-rw-r--r--llvm/unittests/Transforms/Vectorize/VPlanHCFGTest.cpp7
-rw-r--r--llvm/unittests/Transforms/Vectorize/VPlanTestBase.h18
-rw-r--r--llvm/utils/TableGen/AsmMatcherEmitter.cpp147
-rw-r--r--llvm/utils/TableGen/AsmWriterEmitter.cpp41
-rw-r--r--llvm/utils/TableGen/CallingConvEmitter.cpp45
-rw-r--r--llvm/utils/TableGen/CodeEmitterGen.cpp75
-rw-r--r--llvm/utils/TableGen/CodeGenMapTable.cpp179
-rw-r--r--llvm/utils/TableGen/Common/CodeGenSchedule.cpp116
-rw-r--r--llvm/utils/TableGen/Common/CodeGenSchedule.h35
-rw-r--r--llvm/utils/TableGen/CompressInstEmitter.cpp92
-rw-r--r--llvm/utils/TableGen/DAGISelEmitter.cpp10
-rw-r--r--llvm/utils/TableGen/DAGISelMatcherGen.cpp20
-rw-r--r--llvm/utils/TableGen/DFAEmitter.cpp4
-rw-r--r--llvm/utils/TableGen/DFAPacketizerEmitter.cpp43
-rw-r--r--llvm/utils/TableGen/DXILEmitter.cpp52
-rw-r--r--llvm/utils/TableGen/DecoderEmitter.cpp28
-rw-r--r--llvm/utils/TableGen/FastISelEmitter.cpp15
-rw-r--r--llvm/utils/TableGen/InstrDocsEmitter.cpp20
-rw-r--r--llvm/utils/TableGen/InstrInfoEmitter.cpp3
-rw-r--r--llvm/utils/TableGen/MacroFusionPredicatorEmitter.cpp58
-rw-r--r--llvm/utils/TableGen/RegisterInfoEmitter.cpp2
-rw-r--r--llvm/utils/TableGen/SubtargetEmitter.cpp34
-rw-r--r--llvm/utils/TableGen/TableGenBackends.h2
-rw-r--r--llvm/utils/TableGen/WebAssemblyDisassemblerEmitter.cpp39
-rw-r--r--llvm/utils/TableGen/WebAssemblyDisassemblerEmitter.h3
-rw-r--r--llvm/utils/TableGen/X86FoldTablesEmitter.cpp12
-rw-r--r--llvm/utils/TableGen/X86InstrMappingEmitter.cpp29
-rw-r--r--llvm/utils/TableGen/X86ManualInstrMapping.def1
-rw-r--r--llvm/utils/TableGen/X86MnemonicTables.cpp10
-rw-r--r--llvm/utils/TableGen/X86RecognizableInstr.cpp9
-rw-r--r--llvm/utils/gn/secondary/clang-tools-extra/test/BUILD.gn1
-rw-r--r--llvm/utils/gn/secondary/clang/lib/Headers/BUILD.gn1
-rw-r--r--llvm/utils/gn/secondary/clang/test/BUILD.gn1
-rw-r--r--llvm/utils/gn/secondary/clang/tools/clang-rename/BUILD.gn14
-rw-r--r--llvm/utils/gn/secondary/clang/unittests/BUILD.gn1
-rw-r--r--llvm/utils/gn/secondary/clang/unittests/Rename/BUILD.gn28
-rw-r--r--llvm/utils/gn/secondary/compiler-rt/lib/builtins/BUILD.gn2
-rw-r--r--llvm/utils/gn/secondary/llvm/include/llvm/Config/BUILD.gn3
-rw-r--r--llvm/utils/gn/secondary/llvm/lib/Transforms/Vectorize/BUILD.gn1
-rw-r--r--llvm/utils/gn/secondary/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/BUILD.gn6
-rw-r--r--llvm/utils/yaml-bench/YAMLBench.cpp11
-rw-r--r--mlir/include/mlir-c/Pass.h8
-rw-r--r--mlir/include/mlir/Dialect/GPU/Transforms/Passes.h10
-rw-r--r--mlir/lib/Bindings/Python/Pass.cpp13
-rw-r--r--mlir/lib/CAPI/IR/Pass.cpp17
-rw-r--r--mlir/lib/Dialect/GPU/Transforms/SubgroupReduceLowering.cpp37
-rw-r--r--mlir/lib/IR/AsmPrinter.cpp10
-rw-r--r--mlir/python/mlir/_mlir_libs/_mlir/passmanager.pyi9
-rw-r--r--mlir/test/IR/print-attr-type-aliases.mlir27
-rw-r--r--mlir/test/lib/Dialect/GPU/TestGpuRewrite.cpp5
-rw-r--r--mlir/test/python/execution_engine.py16
-rw-r--r--mlir/test/python/pass_manager.py30
-rw-r--r--mlir/test/tblgen-to-irdl/CMathDialect.td12
-rw-r--r--mlir/test/tblgen-to-irdl/TestDialect.td17
-rw-r--r--mlir/tools/tblgen-to-irdl/OpDefinitionsGen.cpp36
-rw-r--r--offload/test/lit.cfg7
-rw-r--r--openmp/runtime/src/CMakeLists.txt6
-rw-r--r--utils/bazel/llvm-project-overlay/clang/BUILD.bazel15
-rw-r--r--utils/bazel/llvm-project-overlay/clang/unittests/BUILD.bazel26
-rw-r--r--utils/bazel/llvm-project-overlay/llvm/include/llvm/Config/config.h6
-rw-r--r--utils/bazel/llvm_configs/config.h.cmake6
540 files changed, 28842 insertions, 10580 deletions
diff --git a/bolt/test/perf2bolt/lit.local.cfg b/bolt/test/perf2bolt/lit.local.cfg
index 4ee9ad08cc78..0fecf913aa98 100644
--- a/bolt/test/perf2bolt/lit.local.cfg
+++ b/bolt/test/perf2bolt/lit.local.cfg
@@ -1,4 +1,5 @@
import shutil
+import subprocess
-if shutil.which("perf") is not None:
- config.available_features.add("perf") \ No newline at end of file
+if shutil.which("perf") is not None and subprocess.run(["perf", "record", "-e", "cycles:u", "-o", "/dev/null", "--", "perf", "--version"], capture_output=True).returncode == 0:
+ config.available_features.add("perf")
diff --git a/clang-tools-extra/CODE_OWNERS.TXT b/clang-tools-extra/CODE_OWNERS.TXT
index 4cf80aa2b0b8..2831ec7e25f5 100644
--- a/clang-tools-extra/CODE_OWNERS.TXT
+++ b/clang-tools-extra/CODE_OWNERS.TXT
@@ -23,7 +23,7 @@ D: clang-tidy
N: Manuel Klimek
E: klimek@google.com
-D: clang-rename, all parts of clang-tools-extra not covered by someone else
+D: all parts of clang-tools-extra not covered by someone else
N: Sam McCall
E: sammccall@google.com
diff --git a/clang-tools-extra/clang-tidy/ClangTidyDiagnosticConsumer.cpp b/clang-tools-extra/clang-tidy/ClangTidyDiagnosticConsumer.cpp
index 200bb87a5ac3..4c75b4227011 100644
--- a/clang-tools-extra/clang-tidy/ClangTidyDiagnosticConsumer.cpp
+++ b/clang-tools-extra/clang-tidy/ClangTidyDiagnosticConsumer.cpp
@@ -380,7 +380,6 @@ void ClangTidyDiagnosticConsumer::HandleDiagnostic(
++Context.Stats.ErrorsIgnoredNOLINT;
// Ignored a warning, should ignore related notes as well
LastErrorWasIgnored = true;
- Context.DiagEngine->Clear();
for (const auto &Error : SuppressionErrors)
Context.diag(Error);
return;
@@ -457,7 +456,6 @@ void ClangTidyDiagnosticConsumer::HandleDiagnostic(
if (Info.hasSourceManager())
checkFilters(Info.getLocation(), Info.getSourceManager());
- Context.DiagEngine->Clear();
for (const auto &Error : SuppressionErrors)
Context.diag(Error);
}
diff --git a/clang-tools-extra/clang-tidy/modernize/AvoidCArraysCheck.cpp b/clang-tools-extra/clang-tidy/modernize/AvoidCArraysCheck.cpp
index 89790ea70cf2..98778192dbd3 100644
--- a/clang-tools-extra/clang-tidy/modernize/AvoidCArraysCheck.cpp
+++ b/clang-tools-extra/clang-tidy/modernize/AvoidCArraysCheck.cpp
@@ -9,6 +9,7 @@
#include "AvoidCArraysCheck.h"
#include "clang/AST/ASTContext.h"
#include "clang/ASTMatchers/ASTMatchFinder.h"
+#include "clang/ASTMatchers/ASTMatchers.h"
using namespace clang::ast_matchers;
@@ -60,6 +61,7 @@ void AvoidCArraysCheck::registerMatchers(MatchFinder *Finder) {
Finder->addMatcher(
typeLoc(hasValidBeginLoc(), hasType(arrayType()),
+ optionally(hasParent(parmVarDecl().bind("param_decl"))),
unless(anyOf(hasParent(parmVarDecl(isArgvOfMain())),
hasParent(varDecl(isExternC())),
hasParent(fieldDecl(
@@ -72,11 +74,28 @@ void AvoidCArraysCheck::registerMatchers(MatchFinder *Finder) {
void AvoidCArraysCheck::check(const MatchFinder::MatchResult &Result) {
const auto *ArrayType = Result.Nodes.getNodeAs<TypeLoc>("typeloc");
-
+ const bool IsInParam =
+ Result.Nodes.getNodeAs<ParmVarDecl>("param_decl") != nullptr;
+ const bool IsVLA = ArrayType->getTypePtr()->isVariableArrayType();
+ enum class RecommendType { Array, Vector, Span };
+ llvm::SmallVector<const char *> RecommendTypes{};
+ if (IsVLA) {
+ RecommendTypes.push_back("std::vector<>");
+ } else if (ArrayType->getTypePtr()->isIncompleteArrayType() && IsInParam) {
+ // in function parameter, we also don't know the size of
+ // IncompleteArrayType.
+ if (Result.Context->getLangOpts().CPlusPlus20)
+ RecommendTypes.push_back("std::span<>");
+ else {
+ RecommendTypes.push_back("std::array<>");
+ RecommendTypes.push_back("std::vector<>");
+ }
+ } else {
+ RecommendTypes.push_back("std::array<>");
+ }
diag(ArrayType->getBeginLoc(),
- "do not declare %select{C-style|C VLA}0 arrays, use "
- "%select{std::array<>|std::vector<>}0 instead")
- << ArrayType->getTypePtr()->isVariableArrayType();
+ "do not declare %select{C-style|C VLA}0 arrays, use %1 instead")
+ << IsVLA << llvm::join(RecommendTypes, " or ");
}
} // namespace clang::tidy::modernize
diff --git a/clang-tools-extra/clang-tidy/modernize/LoopConvertCheck.cpp b/clang-tools-extra/clang-tidy/modernize/LoopConvertCheck.cpp
index a1786ba5acfd..1c6a1618ebbc 100644
--- a/clang-tools-extra/clang-tidy/modernize/LoopConvertCheck.cpp
+++ b/clang-tools-extra/clang-tidy/modernize/LoopConvertCheck.cpp
@@ -262,7 +262,7 @@ StatementMatcher makeIteratorLoopMatcher(bool IsReverse) {
/// EndVarName: 'j' (as a VarDecl)
/// In the second example only:
/// EndCallName: 'container.size()' (as a CXXMemberCallExpr) or
-/// 'size(contaner)' (as a CallExpr)
+/// 'size(container)' (as a CallExpr)
///
/// Client code will need to make sure that:
/// - The containers on which 'size()' is called is the container indexed.
@@ -491,7 +491,7 @@ static bool isDirectMemberExpr(const Expr *E) {
}
/// Given an expression that represents an usage of an element from the
-/// containter that we are iterating over, returns false when it can be
+/// container that we are iterating over, returns false when it can be
/// guaranteed this element cannot be modified as a result of this usage.
static bool canBeModified(ASTContext *Context, const Expr *E) {
if (E->getType().isConstQualified())
@@ -922,7 +922,7 @@ bool LoopConvertCheck::isConvertible(ASTContext *Context,
const ast_matchers::BoundNodes &Nodes,
const ForStmt *Loop,
LoopFixerKind FixerKind) {
- // In self contained diagnosics mode we don't want dependancies on other
+ // In self contained diagnostic mode we don't want dependencies on other
// loops, otherwise, If we already modified the range of this for loop, don't
// do any further updates on this iteration.
if (areDiagsSelfContained())
diff --git a/clang-tools-extra/clang-tidy/readability/ContainerContainsCheck.cpp b/clang-tools-extra/clang-tidy/readability/ContainerContainsCheck.cpp
index dbb50a060e59..698231d777d2 100644
--- a/clang-tools-extra/clang-tidy/readability/ContainerContainsCheck.cpp
+++ b/clang-tools-extra/clang-tidy/readability/ContainerContainsCheck.cpp
@@ -13,30 +13,40 @@
using namespace clang::ast_matchers;
namespace clang::tidy::readability {
-
void ContainerContainsCheck::registerMatchers(MatchFinder *Finder) {
- const auto SupportedContainers = hasType(
- hasUnqualifiedDesugaredType(recordType(hasDeclaration(cxxRecordDecl(
- hasAnyName("::std::set", "::std::unordered_set", "::std::map",
- "::std::unordered_map", "::std::multiset",
- "::std::unordered_multiset", "::std::multimap",
- "::std::unordered_multimap"))))));
+ const auto HasContainsMatchingParamType = hasMethod(
+ cxxMethodDecl(isConst(), parameterCountIs(1), returns(booleanType()),
+ hasName("contains"), unless(isDeleted()), isPublic(),
+ hasParameter(0, hasType(hasUnqualifiedDesugaredType(
+ equalsBoundNode("parameterType"))))));
const auto CountCall =
- cxxMemberCallExpr(on(SupportedContainers),
- callee(cxxMethodDecl(hasName("count"))),
- argumentCountIs(1))
+ cxxMemberCallExpr(
+ argumentCountIs(1),
+ callee(cxxMethodDecl(
+ hasName("count"),
+ hasParameter(0, hasType(hasUnqualifiedDesugaredType(
+ type().bind("parameterType")))),
+ ofClass(cxxRecordDecl(HasContainsMatchingParamType)))))
.bind("call");
const auto FindCall =
- cxxMemberCallExpr(on(SupportedContainers),
- callee(cxxMethodDecl(hasName("find"))),
- argumentCountIs(1))
+ cxxMemberCallExpr(
+ argumentCountIs(1),
+ callee(cxxMethodDecl(
+ hasName("find"),
+ hasParameter(0, hasType(hasUnqualifiedDesugaredType(
+ type().bind("parameterType")))),
+ ofClass(cxxRecordDecl(HasContainsMatchingParamType)))))
.bind("call");
- const auto EndCall = cxxMemberCallExpr(on(SupportedContainers),
- callee(cxxMethodDecl(hasName("end"))),
- argumentCountIs(0));
+ const auto EndCall = cxxMemberCallExpr(
+ argumentCountIs(0),
+ callee(
+ cxxMethodDecl(hasName("end"),
+ // In the matchers below, FindCall should always appear
+ // before EndCall so 'parameterType' is properly bound.
+ ofClass(cxxRecordDecl(HasContainsMatchingParamType)))));
const auto Literal0 = integerLiteral(equals(0));
const auto Literal1 = integerLiteral(equals(1));
diff --git a/clang-tools-extra/clang-tidy/readability/ContainerContainsCheck.h b/clang-tools-extra/clang-tidy/readability/ContainerContainsCheck.h
index 2e8276d684cd..753603ed8253 100644
--- a/clang-tools-extra/clang-tidy/readability/ContainerContainsCheck.h
+++ b/clang-tools-extra/clang-tidy/readability/ContainerContainsCheck.h
@@ -13,8 +13,9 @@
namespace clang::tidy::readability {
-/// Finds usages of `container.count()` and `find() == end()` which should be
-/// replaced by a call to the `container.contains()` method introduced in C++20.
+/// Finds usages of `container.count()` and
+/// `container.find() == container.end()` which should be replaced by a call
+/// to the `container.contains()` method.
///
/// For the user-facing documentation see:
/// http://clang.llvm.org/extra/clang-tidy/checks/readability/container-contains.html
@@ -24,10 +25,11 @@ public:
: ClangTidyCheck(Name, Context) {}
void registerMatchers(ast_matchers::MatchFinder *Finder) final;
void check(const ast_matchers::MatchFinder::MatchResult &Result) final;
-
-protected:
bool isLanguageVersionSupported(const LangOptions &LO) const final {
- return LO.CPlusPlus20;
+ return LO.CPlusPlus;
+ }
+ std::optional<TraversalKind> getCheckTraversalKind() const override {
+ return TK_AsIs;
}
};
diff --git a/clang-tools-extra/clangd/unittests/ConfigCompileTests.cpp b/clang-tools-extra/clangd/unittests/ConfigCompileTests.cpp
index 021d731f8f17..cf9b42828568 100644
--- a/clang-tools-extra/clangd/unittests/ConfigCompileTests.cpp
+++ b/clang-tools-extra/clangd/unittests/ConfigCompileTests.cpp
@@ -305,33 +305,33 @@ TEST_F(ConfigCompileTests, DiagnosticSuppression) {
{
auto D = DiagEngine.Report(diag::warn_unreachable);
EXPECT_TRUE(isDiagnosticSuppressed(
- Diag{&DiagEngine}, Conf.Diagnostics.Suppress, LangOptions()));
+ Diag{&DiagEngine, D}, Conf.Diagnostics.Suppress, LangOptions()));
}
// Subcategory not respected/suppressed.
{
auto D = DiagEngine.Report(diag::warn_unreachable_break);
EXPECT_FALSE(isDiagnosticSuppressed(
- Diag{&DiagEngine}, Conf.Diagnostics.Suppress, LangOptions()));
+ Diag{&DiagEngine, D}, Conf.Diagnostics.Suppress, LangOptions()));
}
{
auto D = DiagEngine.Report(diag::warn_unused_variable);
EXPECT_TRUE(isDiagnosticSuppressed(
- Diag{&DiagEngine}, Conf.Diagnostics.Suppress, LangOptions()));
+ Diag{&DiagEngine, D}, Conf.Diagnostics.Suppress, LangOptions()));
}
{
auto D = DiagEngine.Report(diag::err_typecheck_bool_condition);
EXPECT_TRUE(isDiagnosticSuppressed(
- Diag{&DiagEngine}, Conf.Diagnostics.Suppress, LangOptions()));
+ Diag{&DiagEngine, D}, Conf.Diagnostics.Suppress, LangOptions()));
}
{
auto D = DiagEngine.Report(diag::err_unexpected_friend);
EXPECT_TRUE(isDiagnosticSuppressed(
- Diag{&DiagEngine}, Conf.Diagnostics.Suppress, LangOptions()));
+ Diag{&DiagEngine, D}, Conf.Diagnostics.Suppress, LangOptions()));
}
{
auto D = DiagEngine.Report(diag::warn_alloca);
EXPECT_TRUE(isDiagnosticSuppressed(
- Diag{&DiagEngine}, Conf.Diagnostics.Suppress, LangOptions()));
+ Diag{&DiagEngine, D}, Conf.Diagnostics.Suppress, LangOptions()));
}
Frag.Diagnostics.Suppress.emplace_back("*");
diff --git a/clang-tools-extra/docs/ReleaseNotes.rst b/clang-tools-extra/docs/ReleaseNotes.rst
index 79501b563b4e..82a761bd7f40 100644
--- a/clang-tools-extra/docs/ReleaseNotes.rst
+++ b/clang-tools-extra/docs/ReleaseNotes.rst
@@ -87,9 +87,6 @@ Improvements to clang-doc
Improvements to clang-query
---------------------------
-Improvements to clang-rename
-----------------------------
-
The improvements are...
Improvements to clang-tidy
@@ -137,6 +134,11 @@ Changes in existing checks
<clang-tidy/checks/misc/definitions-in-headers>` check by rewording the
diagnostic note that suggests adding ``inline``.
+- Improved :doc:`modernize-avoid-c-arrays
+ <clang-tidy/checks/modernize/avoid-c-arrays>` check to suggest using ``std::span``
+ as a replacement for parameters of incomplete C array type in C++20 and
+ ``std::array`` or ``std::vector`` before C++20.
+
- Improved :doc:`modernize-use-std-format
<clang-tidy/checks/modernize/use-std-format>` check to support replacing
member function calls too.
@@ -163,6 +165,10 @@ Changes in existing checks
<clang-tidy/checks/performance/avoid-endl>` check to use ``std::endl`` as
placeholder when lexer cannot get source text.
+- Improved :doc:`readability-container-contains
+ <clang-tidy/checks/readability/container-contains>` check to let it work on
+ any class that has a ``contains`` method.
+
- Improved :doc:`readability-implicit-bool-conversion
<clang-tidy/checks/readability/implicit-bool-conversion>` check
by adding the option `UseUpperCaseLiteralSuffix` to select the
diff --git a/clang-tools-extra/docs/clang-rename.rst b/clang-tools-extra/docs/clang-rename.rst
deleted file mode 100644
index e13d8c3ad25f..000000000000
--- a/clang-tools-extra/docs/clang-rename.rst
+++ /dev/null
@@ -1,168 +0,0 @@
-============
-Clang-Rename
-============
-
-.. contents::
-
-See also:
-
-.. toctree::
- :maxdepth: 1
-
-
-:program:`clang-rename` is a C++ refactoring tool. Its purpose is to perform
-efficient renaming actions in large-scale projects such as renaming classes,
-functions, variables, arguments, namespaces etc.
-
-The tool is in a very early development stage, so you might encounter bugs and
-crashes. Submitting reports with information about how to reproduce the issue
-to `the LLVM bugtracker <https://bugs.llvm.org>`_ will definitely help the
-project. If you have any ideas or suggestions, you might want to put a feature
-request there.
-
-Using Clang-Rename
-==================
-
-:program:`clang-rename` is a `LibTooling
-<https://clang.llvm.org/docs/LibTooling.html>`_-based tool, and it's easier to
-work with if you set up a compile command database for your project (for an
-example of how to do this see `How To Setup Tooling For LLVM
-<https://clang.llvm.org/docs/HowToSetupToolingForLLVM.html>`_). You can also
-specify compilation options on the command line after `--`:
-
-.. code-block:: console
-
- $ clang-rename -offset=42 -new-name=foo test.cpp -- -Imy_project/include -DMY_DEFINES ...
-
-
-To get an offset of a symbol in a file run
-
-.. code-block:: console
-
- $ grep -FUbo 'foo' file.cpp
-
-
-The tool currently supports renaming actions inside a single translation unit
-only. It is planned to extend the tool's functionality to support multi-TU
-renaming actions in the future.
-
-:program:`clang-rename` also aims to be easily integrated into popular text
-editors, such as Vim and Emacs, and improve the workflow of users.
-
-Although a command line interface exists, it is highly recommended to use the
-text editor interface instead for better experience.
-
-You can also identify one or more symbols to be renamed by giving the fully
-qualified name:
-
-.. code-block:: console
-
- $ clang-rename -qualified-name=foo -new-name=bar test.cpp
-
-Renaming multiple symbols at once is supported, too. However,
-:program:`clang-rename` doesn't accept both `-offset` and `-qualified-name` at
-the same time. So, you can either specify multiple `-offset` or
-`-qualified-name`.
-
-.. code-block:: console
-
- $ clang-rename -offset=42 -new-name=bar1 -offset=150 -new-name=bar2 test.cpp
-
-or
-
-.. code-block:: console
-
- $ clang-rename -qualified-name=foo1 -new-name=bar1 -qualified-name=foo2 -new-name=bar2 test.cpp
-
-
-Alternatively, {offset | qualified-name} / new-name pairs can be put into a YAML
-file:
-
-.. code-block:: yaml
-
- ---
- - Offset: 42
- NewName: bar1
- - Offset: 150
- NewName: bar2
- ...
-
-or
-
-.. code-block:: yaml
-
- ---
- - QualifiedName: foo1
- NewName: bar1
- - QualifiedName: foo2
- NewName: bar2
- ...
-
-That way you can avoid spelling out all the names as command line arguments:
-
-.. code-block:: console
-
- $ clang-rename -input=test.yaml test.cpp
-
-:program:`clang-rename` offers the following options:
-
-.. code-block:: console
-
- $ clang-rename --help
- USAGE: clang-rename [subcommand] [options] <source0> [... <sourceN>]
-
- OPTIONS:
-
- Generic Options:
-
- -help - Display available options (-help-hidden for more)
- -help-list - Display list of available options (-help-list-hidden for more)
- -version - Display the version of this program
-
- clang-rename common options:
-
- -export-fixes=<filename> - YAML file to store suggested fixes in.
- -extra-arg=<string> - Additional argument to append to the compiler command line
- Can be used several times.
- -extra-arg-before=<string> - Additional argument to prepend to the compiler command line
- Can be used several times.
- -force - Ignore nonexistent qualified names.
- -i - Overwrite edited <file>s.
- -input=<string> - YAML file to load oldname-newname pairs from.
- -new-name=<string> - The new name to change the symbol to.
- -offset=<uint> - Locates the symbol by offset as opposed to <line>:<column>.
- -p <string> - Build path
- -pl - Print the locations affected by renaming to stderr.
- -pn - Print the found symbol's name prior to renaming to stderr.
- -qualified-name=<string> - The fully qualified name of the symbol.
-
-Vim Integration
-===============
-
-You can call :program:`clang-rename` directly from Vim! To set up
-:program:`clang-rename` integration for Vim see
-`clang/tools/clang-rename/clang-rename.py
-<https://github.com/llvm/llvm-project/blob/main/clang/tools/clang-rename/clang-rename.py>`_.
-
-Please note that **you have to save all buffers, in which the replacement will
-happen before running the tool**.
-
-Once installed, you can point your cursor to symbols you want to rename, press
-`<leader>cr` and type new desired name. The `<leader> key
-<http://vim.wikia.com/wiki/Mapping_keys_in_Vim_-_Tutorial_(Part_3)#Map_leader>`_
-is a reference to a specific key defined by the mapleader variable and is bound
-to backslash by default.
-
-Emacs Integration
-=================
-
-You can also use :program:`clang-rename` while using Emacs! To set up
-:program:`clang-rename` integration for Emacs see
-`clang-rename/tool/clang-rename.el
-<https://github.com/llvm/llvm-project/blob/main/clang/tools/clang-rename/clang-rename.el>`_.
-
-Once installed, you can point your cursor to symbols you want to rename, press
-`M-X`, type `clang-rename` and new desired name.
-
-Please note that **you have to save all buffers, in which the replacement will
-happen before running the tool**.
diff --git a/clang-tools-extra/docs/clang-tidy/checks/modernize/avoid-c-arrays.rst b/clang-tools-extra/docs/clang-tidy/checks/modernize/avoid-c-arrays.rst
index 8f13ca4466a3..2d72352989ab 100644
--- a/clang-tools-extra/docs/clang-tidy/checks/modernize/avoid-c-arrays.rst
+++ b/clang-tools-extra/docs/clang-tidy/checks/modernize/avoid-c-arrays.rst
@@ -10,6 +10,9 @@ modernize-avoid-c-arrays
Finds C-style array types and recommend to use ``std::array<>`` /
``std::vector<>``. All types of C arrays are diagnosed.
+For incomplete C-style array types appeared in parameters, It would be better to
+use ``std::span`` / ``gsl::span`` as replacement.
+
However, fix-it are potentially dangerous in header files and are therefore not
emitted right now.
diff --git a/clang-tools-extra/docs/clang-tidy/checks/readability/container-contains.rst b/clang-tools-extra/docs/clang-tidy/checks/readability/container-contains.rst
index b28daecf7a2c..1cfbf4c511c5 100644
--- a/clang-tools-extra/docs/clang-tidy/checks/readability/container-contains.rst
+++ b/clang-tools-extra/docs/clang-tidy/checks/readability/container-contains.rst
@@ -3,23 +3,31 @@
readability-container-contains
==============================
-Finds usages of ``container.count()`` and ``container.find() == container.end()`` which should be replaced by a call to the ``container.contains()`` method introduced in C++20.
+Finds usages of ``container.count()`` and
+``container.find() == container.end()`` which should be replaced by a call to
+the ``container.contains()`` method.
-Whether an element is contained inside a container should be checked with ``contains`` instead of ``count``/``find`` because ``contains`` conveys the intent more clearly. Furthermore, for containers which permit multiple entries per key (``multimap``, ``multiset``, ...), ``contains`` is more efficient than ``count`` because ``count`` has to do unnecessary additional work.
+Whether an element is contained inside a container should be checked with
+``contains`` instead of ``count``/``find`` because ``contains`` conveys the
+intent more clearly. Furthermore, for containers which permit multiple entries
+per key (``multimap``, ``multiset``, ...), ``contains`` is more efficient than
+``count`` because ``count`` has to do unnecessary additional work.
Examples:
-=========================================== ==============================
-Initial expression Result
-------------------------------------------- ------------------------------
-``myMap.find(x) == myMap.end()`` ``!myMap.contains(x)``
-``myMap.find(x) != myMap.end()`` ``myMap.contains(x)``
-``if (myMap.count(x))`` ``if (myMap.contains(x))``
-``bool exists = myMap.count(x)`` ``bool exists = myMap.contains(x)``
-``bool exists = myMap.count(x) > 0`` ``bool exists = myMap.contains(x)``
-``bool exists = myMap.count(x) >= 1`` ``bool exists = myMap.contains(x)``
-``bool missing = myMap.count(x) == 0`` ``bool missing = !myMap.contains(x)``
-=========================================== ==============================
+====================================== =====================================
+Initial expression Result
+-------------------------------------- -------------------------------------
+``myMap.find(x) == myMap.end()`` ``!myMap.contains(x)``
+``myMap.find(x) != myMap.end()`` ``myMap.contains(x)``
+``if (myMap.count(x))`` ``if (myMap.contains(x))``
+``bool exists = myMap.count(x)`` ``bool exists = myMap.contains(x)``
+``bool exists = myMap.count(x) > 0`` ``bool exists = myMap.contains(x)``
+``bool exists = myMap.count(x) >= 1`` ``bool exists = myMap.contains(x)``
+``bool missing = myMap.count(x) == 0`` ``bool missing = !myMap.contains(x)``
+====================================== =====================================
-This check applies to ``std::set``, ``std::unordered_set``, ``std::map``, ``std::unordered_map`` and the corresponding multi-key variants.
-It is only active for C++20 and later, as the ``contains`` method was only added in C++20.
+This check will apply to any class that has a ``contains`` method, notably
+including ``std::set``, ``std::unordered_set``, ``std::map``, and
+``std::unordered_map`` as of C++20, and ``std::string`` and ``std::string_view``
+as of C++23.
diff --git a/clang-tools-extra/docs/index.rst b/clang-tools-extra/docs/index.rst
index d5c00b89a155..9f7324fcf741 100644
--- a/clang-tools-extra/docs/index.rst
+++ b/clang-tools-extra/docs/index.rst
@@ -19,7 +19,6 @@ Contents
clang-include-fixer
modularize
pp-trace
- clang-rename
clangd <https://clangd.llvm.org/>
clang-doc
diff --git a/clang-tools-extra/test/CMakeLists.txt b/clang-tools-extra/test/CMakeLists.txt
index 0953ff2531e1..d72a117166a0 100644
--- a/clang-tools-extra/test/CMakeLists.txt
+++ b/clang-tools-extra/test/CMakeLists.txt
@@ -28,9 +28,6 @@ configure_lit_site_cfg(
)
set(CLANG_TOOLS_TEST_DEPS
- # For the clang-apply-replacements test that uses clang-rename.
- clang-rename
-
# For the clang-doc tests that emit bitcode files.
llvm-bcanalyzer
diff --git a/clang-tools-extra/test/clang-apply-replacements/ClangRenameClassReplacements.cpp b/clang-tools-extra/test/clang-apply-replacements/ClangRenameClassReplacements.cpp
deleted file mode 100644
index 2b478bbf900d..000000000000
--- a/clang-tools-extra/test/clang-apply-replacements/ClangRenameClassReplacements.cpp
+++ /dev/null
@@ -1,11 +0,0 @@
-// RUN: rm -rf %t
-// RUN: mkdir -p %t/fixes
-// RUN: cat %s > %t.cpp
-// RUN: clang-rename -offset=254 -new-name=Bar -export-fixes=%t/fixes/clang-rename.yaml %t.cpp --
-// RUN: clang-apply-replacements %t
-// RUN: sed 's,//.*,,' %t.cpp | FileCheck %s
-
-class Foo {}; // CHECK: class Bar {};
-
-// Use grep -FUbo 'Foo' <file> to get the correct offset of Cla when changing
-// this file.
diff --git a/clang-tools-extra/test/clang-tidy/checkers/modernize/avoid-c-arrays-c++20.cpp b/clang-tools-extra/test/clang-tidy/checkers/modernize/avoid-c-arrays-c++20.cpp
new file mode 100644
index 000000000000..e53cfeba0e46
--- /dev/null
+++ b/clang-tools-extra/test/clang-tidy/checkers/modernize/avoid-c-arrays-c++20.cpp
@@ -0,0 +1,11 @@
+// RUN: %check_clang_tidy -std=c++20 %s modernize-avoid-c-arrays %t
+
+int f1(int data[], int size) {
+ // CHECK-MESSAGES: :[[@LINE-1]]:8: warning: do not declare C-style arrays, use std::span<> instead
+ int f4[] = {1, 2};
+ // CHECK-MESSAGES: :[[@LINE-1]]:3: warning: do not declare C-style arrays, use std::array<> instead
+}
+
+int f2(int data[100]) {
+ // CHECK-MESSAGES: :[[@LINE-1]]:8: warning: do not declare C-style arrays, use std::array<> instead
+}
diff --git a/clang-tools-extra/test/clang-tidy/checkers/modernize/avoid-c-arrays-ignores-main.cpp b/clang-tools-extra/test/clang-tidy/checkers/modernize/avoid-c-arrays-ignores-main.cpp
index 6549422f393a..ad12b3d6f95b 100644
--- a/clang-tools-extra/test/clang-tidy/checkers/modernize/avoid-c-arrays-ignores-main.cpp
+++ b/clang-tools-extra/test/clang-tidy/checkers/modernize/avoid-c-arrays-ignores-main.cpp
@@ -1,7 +1,7 @@
-// RUN: %check_clang_tidy %s modernize-avoid-c-arrays %t
+// RUN: %check_clang_tidy -std=c++17 %s modernize-avoid-c-arrays %t
int not_main(int argc, char *argv[]) {
- // CHECK-MESSAGES: :[[@LINE-1]]:24: warning: do not declare C-style arrays, use std::array<> instead
+ // CHECK-MESSAGES: :[[@LINE-1]]:24: warning: do not declare C-style arrays, use std::array<> or std::vector<> instead
int f4[] = {1, 2};
// CHECK-MESSAGES: :[[@LINE-1]]:3: warning: do not declare C-style arrays, use std::array<> instead
}
@@ -11,7 +11,7 @@ int main(int argc, char *argv[]) {
// CHECK-MESSAGES: :[[@LINE-1]]:3: warning: do not declare C-style arrays, use std::array<> instead
auto not_main = [](int argc, char *argv[]) {
- // CHECK-MESSAGES: :[[@LINE-1]]:32: warning: do not declare C-style arrays, use std::array<> instead
+ // CHECK-MESSAGES: :[[@LINE-1]]:32: warning: do not declare C-style arrays, use std::array<> or std::vector<> instead
int f6[] = {1, 2};
// CHECK-MESSAGES: :[[@LINE-1]]:5: warning: do not declare C-style arrays, use std::array<> instead
};
diff --git a/clang-tools-extra/test/clang-tidy/checkers/modernize/avoid-c-arrays-ignores-strings.cpp b/clang-tools-extra/test/clang-tidy/checkers/modernize/avoid-c-arrays-ignores-strings.cpp
index f6d64848f9e3..b607068f5b7c 100644
--- a/clang-tools-extra/test/clang-tidy/checkers/modernize/avoid-c-arrays-ignores-strings.cpp
+++ b/clang-tools-extra/test/clang-tidy/checkers/modernize/avoid-c-arrays-ignores-strings.cpp
@@ -1,4 +1,4 @@
-// RUN: %check_clang_tidy %s modernize-avoid-c-arrays %t -- \
+// RUN: %check_clang_tidy -std=c++17 %s modernize-avoid-c-arrays %t -- \
// RUN: -config='{CheckOptions: { modernize-avoid-c-arrays.AllowStringArrays: true }}'
const char name[] = "name";
@@ -6,4 +6,4 @@ const char array[] = {'n', 'a', 'm', 'e', '\0'};
// CHECK-MESSAGES: :[[@LINE-1]]:7: warning: do not declare C-style arrays, use std::array<> instead [modernize-avoid-c-arrays]
void takeCharArray(const char name[]);
-// CHECK-MESSAGES: :[[@LINE-1]]:26: warning: do not declare C-style arrays, use std::array<> instead [modernize-avoid-c-arrays]
+// CHECK-MESSAGES: :[[@LINE-1]]:26: warning: do not declare C-style arrays, use std::array<> or std::vector<> instead [modernize-avoid-c-arrays]
diff --git a/clang-tools-extra/test/clang-tidy/checkers/modernize/avoid-c-arrays-ignores-three-arg-main.cpp b/clang-tools-extra/test/clang-tidy/checkers/modernize/avoid-c-arrays-ignores-three-arg-main.cpp
index 22a4016f79f4..c04edf2b5aea 100644
--- a/clang-tools-extra/test/clang-tidy/checkers/modernize/avoid-c-arrays-ignores-three-arg-main.cpp
+++ b/clang-tools-extra/test/clang-tidy/checkers/modernize/avoid-c-arrays-ignores-three-arg-main.cpp
@@ -1,8 +1,8 @@
-// RUN: %check_clang_tidy %s modernize-avoid-c-arrays %t
+// RUN: %check_clang_tidy -std=c++17 %s modernize-avoid-c-arrays %t
int not_main(int argc, char *argv[], char *argw[]) {
- // CHECK-MESSAGES: :[[@LINE-1]]:24: warning: do not declare C-style arrays, use std::array<> instead
- // CHECK-MESSAGES: :[[@LINE-2]]:38: warning: do not declare C-style arrays, use std::array<> instead
+ // CHECK-MESSAGES: :[[@LINE-1]]:24: warning: do not declare C-style arrays, use std::array<> or std::vector<> instead
+ // CHECK-MESSAGES: :[[@LINE-2]]:38: warning: do not declare C-style arrays, use std::array<> or std::vector<> instead
int f4[] = {1, 2};
// CHECK-MESSAGES: :[[@LINE-1]]:3: warning: do not declare C-style arrays, use std::array<> instead
}
@@ -12,8 +12,8 @@ int main(int argc, char *argv[], char *argw[]) {
// CHECK-MESSAGES: :[[@LINE-1]]:3: warning: do not declare C-style arrays, use std::array<> instead
auto not_main = [](int argc, char *argv[], char *argw[]) {
- // CHECK-MESSAGES: :[[@LINE-1]]:32: warning: do not declare C-style arrays, use std::array<> instead
- // CHECK-MESSAGES: :[[@LINE-2]]:46: warning: do not declare C-style arrays, use std::array<> instead
+ // CHECK-MESSAGES: :[[@LINE-1]]:32: warning: do not declare C-style arrays, use std::array<> or std::vector<> instead
+ // CHECK-MESSAGES: :[[@LINE-2]]:46: warning: do not declare C-style arrays, use std::array<> or std::vector<> instead
int f6[] = {1, 2};
// CHECK-MESSAGES: :[[@LINE-1]]:5: warning: do not declare C-style arrays, use std::array<> instead
};
diff --git a/clang-tools-extra/test/clang-tidy/checkers/modernize/avoid-c-arrays.cpp b/clang-tools-extra/test/clang-tidy/checkers/modernize/avoid-c-arrays.cpp
index ce99f0821b22..b0aaa4962a83 100644
--- a/clang-tools-extra/test/clang-tidy/checkers/modernize/avoid-c-arrays.cpp
+++ b/clang-tools-extra/test/clang-tidy/checkers/modernize/avoid-c-arrays.cpp
@@ -1,4 +1,4 @@
-// RUN: %check_clang_tidy %s modernize-avoid-c-arrays %t
+// RUN: %check_clang_tidy -std=c++17 %s modernize-avoid-c-arrays %t
int a[] = {1, 2};
// CHECK-MESSAGES: :[[@LINE-1]]:1: warning: do not declare C-style arrays, use std::array<> instead
@@ -91,4 +91,4 @@ const char name[] = "Some string";
// CHECK-MESSAGES: :[[@LINE-1]]:7: warning: do not declare C-style arrays, use std::array<> instead [modernize-avoid-c-arrays]
void takeCharArray(const char name[]);
-// CHECK-MESSAGES: :[[@LINE-1]]:26: warning: do not declare C-style arrays, use std::array<> instead [modernize-avoid-c-arrays]
+// CHECK-MESSAGES: :[[@LINE-1]]:26: warning: do not declare C-style arrays, use std::array<> or std::vector<> instead [modernize-avoid-c-arrays]
diff --git a/clang-tools-extra/test/clang-tidy/checkers/performance/unnecessary-value-param-crash.cpp b/clang-tools-extra/test/clang-tidy/checkers/performance/unnecessary-value-param-crash.cpp
new file mode 100644
index 000000000000..99c2fe905bdf
--- /dev/null
+++ b/clang-tools-extra/test/clang-tidy/checkers/performance/unnecessary-value-param-crash.cpp
@@ -0,0 +1,23 @@
+// RUN: %check_clang_tidy -std=c++14-or-later %s performance-unnecessary-value-param %t
+
+// The test case used to crash clang-tidy.
+// https://github.com/llvm/llvm-project/issues/108963
+
+struct A
+{
+ template<typename T> A(T&&) {}
+};
+
+struct B
+{
+ ~B();
+};
+
+struct C
+{
+ A a;
+ C(B, int i) : a(i) {}
+ // CHECK-MESSAGES: [[@LINE-1]]:6: warning: the parameter #1 is copied for each invocation but only used as a const reference; consider making it a const reference
+};
+
+C c(B(), 0);
diff --git a/clang-tools-extra/test/clang-tidy/checkers/readability/container-contains.cpp b/clang-tools-extra/test/clang-tidy/checkers/readability/container-contains.cpp
index 0ecb61b2e7df..906515b075d4 100644
--- a/clang-tools-extra/test/clang-tidy/checkers/readability/container-contains.cpp
+++ b/clang-tools-extra/test/clang-tidy/checkers/readability/container-contains.cpp
@@ -240,7 +240,7 @@ int testMacroExpansion(std::unordered_set<int> &MySet) {
return 0;
}
-// The following map has the same interface like `std::map`.
+// The following map has the same interface as `std::map`.
template <class Key, class T>
struct CustomMap {
unsigned count(const Key &K) const;
@@ -249,13 +249,180 @@ struct CustomMap {
void *end();
};
-// The clang-tidy check is currently hard-coded against the `std::` containers
-// and hence won't annotate the following instance. We might change this in the
-// future and also detect the following case.
-void *testDifferentCheckTypes(CustomMap<int, int> &MyMap) {
- if (MyMap.count(0))
- // NO-WARNING.
- // CHECK-FIXES: if (MyMap.count(0))
- return nullptr;
- return MyMap.find(2);
+void testDifferentCheckTypes(CustomMap<int, int> &MyMap) {
+ if (MyMap.count(0)) {};
+ // CHECK-MESSAGES: :[[@LINE-1]]:{{[0-9]+}}: warning: use 'contains' to check for membership [readability-container-contains]
+ // CHECK-FIXES: if (MyMap.contains(0)) {};
+
+ MyMap.find(0) != MyMap.end();
+ // CHECK-MESSAGES: :[[@LINE-1]]:{{[0-9]+}}: warning: use 'contains' to check for membership [readability-container-contains]
+ // CHECK-FIXES: MyMap.contains(0);
+}
+
+struct MySubmap : public CustomMap<int, int> {};
+
+void testSubclass(MySubmap& MyMap) {
+ if (MyMap.count(0)) {};
+ // CHECK-MESSAGES: :[[@LINE-1]]:{{[0-9]+}}: warning: use 'contains' to check for membership [readability-container-contains]
+ // CHECK-FIXES: if (MyMap.contains(0)) {};
+}
+
+using UsingMap = CustomMap<int, int>;
+struct MySubmap2 : public UsingMap {};
+using UsingMap2 = MySubmap2;
+
+void testUsing(UsingMap2& MyMap) {
+ if (MyMap.count(0)) {};
+ // CHECK-MESSAGES: :[[@LINE-1]]:{{[0-9]+}}: warning: use 'contains' to check for membership [readability-container-contains]
+ // CHECK-FIXES: if (MyMap.contains(0)) {};
+}
+
+template <class Key, class T>
+struct CustomMapContainsDeleted {
+ unsigned count(const Key &K) const;
+ bool contains(const Key &K) const = delete;
+ void *find(const Key &K);
+ void *end();
+};
+
+struct SubmapContainsDeleted : public CustomMapContainsDeleted<int, int> {};
+
+void testContainsDeleted(CustomMapContainsDeleted<int, int> &MyMap,
+ SubmapContainsDeleted &MyMap2) {
+ // No warning if the `contains` method is deleted.
+ if (MyMap.count(0)) {};
+ if (MyMap2.count(0)) {};
+}
+
+template <class Key, class T>
+struct CustomMapPrivateContains {
+ unsigned count(const Key &K) const;
+ void *find(const Key &K);
+ void *end();
+
+private:
+ bool contains(const Key &K) const;
+};
+
+struct SubmapPrivateContains : public CustomMapPrivateContains<int, int> {};
+
+void testPrivateContains(CustomMapPrivateContains<int, int> &MyMap,
+ SubmapPrivateContains &MyMap2) {
+ // No warning if the `contains` method is not public.
+ if (MyMap.count(0)) {};
+ if (MyMap2.count(0)) {};
+}
+
+struct MyString {};
+
+struct WeirdNonMatchingContains {
+ unsigned count(char) const;
+ bool contains(const MyString&) const;
+};
+
+void testWeirdNonMatchingContains(WeirdNonMatchingContains &MyMap) {
+ // No warning if there is no `contains` method with the right type.
+ if (MyMap.count('a')) {};
+}
+
+template <class T>
+struct SmallPtrSet {
+ using ConstPtrType = const T*;
+ unsigned count(ConstPtrType Ptr) const;
+ bool contains(ConstPtrType Ptr) const;
+};
+
+template <class T>
+struct SmallPtrPtrSet {
+ using ConstPtrType = const T**;
+ unsigned count(ConstPtrType Ptr) const;
+ bool contains(ConstPtrType Ptr) const;
+};
+
+template <class T>
+struct SmallPtrPtrPtrSet {
+ using ConstPtrType = const T***;
+ unsigned count(ConstPtrType Ptr) const;
+ bool contains(ConstPtrType Ptr) const;
+};
+
+void testSmallPtrSet(const int ***Ptr,
+ SmallPtrSet<int> &MySet,
+ SmallPtrPtrSet<int> &MySet2,
+ SmallPtrPtrPtrSet<int> &MySet3) {
+ if (MySet.count(**Ptr)) {};
+ // CHECK-MESSAGES: :[[@LINE-1]]:{{[0-9]+}}: warning: use 'contains' to check for membership [readability-container-contains]
+ // CHECK-FIXES: if (MySet.contains(**Ptr)) {};
+ if (MySet2.count(*Ptr)) {};
+ // CHECK-MESSAGES: :[[@LINE-1]]:{{[0-9]+}}: warning: use 'contains' to check for membership [readability-container-contains]
+ // CHECK-FIXES: if (MySet2.contains(*Ptr)) {};
+ if (MySet3.count(Ptr)) {};
+ // CHECK-MESSAGES: :[[@LINE-1]]:{{[0-9]+}}: warning: use 'contains' to check for membership [readability-container-contains]
+ // CHECK-FIXES: if (MySet3.contains(Ptr)) {};
+}
+
+struct X {};
+struct Y : public X {};
+
+void testSubclassEntry(SmallPtrSet<X>& Set, Y* Entry) {
+ if (Set.count(Entry)) {}
+ // CHECK-MESSAGES: :[[@LINE-1]]:{{[0-9]+}}: warning: use 'contains' to check for membership [readability-container-contains]
+ // CHECK-FIXES: if (Set.contains(Entry)) {}
+}
+
+struct WeirdPointerApi {
+ unsigned count(int** Ptr) const;
+ bool contains(int* Ptr) const;
+};
+
+void testWeirdApi(WeirdPointerApi& Set, int* E) {
+ if (Set.count(&E)) {}
+}
+
+void testIntUnsigned(std::set<int>& S, unsigned U) {
+ if (S.count(U)) {}
+ // CHECK-MESSAGES: :[[@LINE-1]]:{{[0-9]+}}: warning: use 'contains' to check for membership [readability-container-contains]
+ // CHECK-FIXES: if (S.contains(U)) {}
+}
+
+template <class T>
+struct CustomSetConvertible {
+ unsigned count(const T &K) const;
+ bool contains(const T &K) const;
+};
+
+struct A {};
+struct B { B() = default; B(const A&) {} };
+struct C { operator A() const; };
+
+void testConvertibleTypes() {
+ CustomSetConvertible<B> MyMap;
+ if (MyMap.count(A())) {};
+ // CHECK-MESSAGES: :[[@LINE-1]]:{{[0-9]+}}: warning: use 'contains' to check for membership [readability-container-contains]
+ // CHECK-FIXES: if (MyMap.contains(A())) {};
+
+ CustomSetConvertible<A> MyMap2;
+ if (MyMap2.count(C())) {};
+ // CHECK-MESSAGES: :[[@LINE-1]]:{{[0-9]+}}: warning: use 'contains' to check for membership [readability-container-contains]
+ // CHECK-FIXES: if (MyMap2.contains(C())) {};
+
+ if (MyMap2.count(C()) != 0) {};
+ // CHECK-MESSAGES: :[[@LINE-1]]:{{[0-9]+}}: warning: use 'contains' to check for membership [readability-container-contains]
+ // CHECK-FIXES: if (MyMap2.contains(C())) {};
+}
+
+template<class U>
+using Box = const U& ;
+
+template <class T>
+struct CustomBoxedSet {
+ unsigned count(Box<T> K) const;
+ bool contains(Box<T> K) const;
+};
+
+void testBox() {
+ CustomBoxedSet<int> Set;
+ if (Set.count(0)) {};
+ // CHECK-MESSAGES: :[[@LINE-1]]:{{[0-9]+}}: warning: use 'contains' to check for membership [readability-container-contains]
+ // CHECK-FIXES: if (Set.contains(0)) {};
}
diff --git a/clang/docs/ClangFormattedStatus.rst b/clang/docs/ClangFormattedStatus.rst
index 0ee0782879ef..b917e077679b 100644
--- a/clang/docs/ClangFormattedStatus.rst
+++ b/clang/docs/ClangFormattedStatus.rst
@@ -809,11 +809,6 @@ tree in terms of conformance to :doc:`ClangFormat` as of: March 06, 2022 17:32:2
- `4`
- `0`
- :good:`100%`
- * - clang/tools/clang-rename
- - `1`
- - `1`
- - `0`
- - :good:`100%`
* - clang/tools/clang-repl
- `1`
- `1`
diff --git a/clang/docs/LanguageExtensions.rst b/clang/docs/LanguageExtensions.rst
index c08697282cbf..f62f90fb9650 100644
--- a/clang/docs/LanguageExtensions.rst
+++ b/clang/docs/LanguageExtensions.rst
@@ -667,6 +667,7 @@ Unless specified otherwise operation(±0) = ±0 and operation(±infinity) = ±in
T __builtin_elementwise_log(T x) return the natural logarithm of x floating point types
T __builtin_elementwise_log2(T x) return the base 2 logarithm of x floating point types
T __builtin_elementwise_log10(T x) return the base 10 logarithm of x floating point types
+ T __builtin_elementwise_popcount(T x) return the number of 1 bits in x integer types
T __builtin_elementwise_pow(T x, T y) return x raised to the power of y floating point types
T __builtin_elementwise_bitreverse(T x) return the integer represented after reversing the bits of x integer types
T __builtin_elementwise_exp(T x) returns the base-e exponential, e^x, of the specified value floating point types
diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index d92b59334f8f..d10b28431007 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -39,6 +39,8 @@ code bases.
- The ``le32`` and ``le64`` targets have been removed.
+- The ``clang-rename`` tool has been removed.
+
C/C++ Language Potentially Breaking Changes
-------------------------------------------
@@ -114,6 +116,7 @@ C++ Language Changes
- Accept C++26 user-defined ``static_assert`` messages in C++11 as an extension.
+- Add ``__builtin_elementwise_popcount`` builtin for integer types only.
C++2c Feature Support
^^^^^^^^^^^^^^^^^^^^^
@@ -252,7 +255,10 @@ Attribute Changes in Clang
(#GH106864)
- Introduced a new attribute ``[[clang::coro_await_elidable]]`` on coroutine return types
- to express elideability at call sites where the coroutine is co_awaited as a prvalue.
+ to express elideability at call sites where the coroutine is invoked under a safe elide context.
+
+- Introduced a new attribute ``[[clang::coro_await_elidable_argument]]`` on function parameters
+ to propagate safe elide context to arguments if such function is also under a safe elide context.
Improvements to Clang's diagnostics
-----------------------------------
@@ -392,8 +398,11 @@ Bug Fixes to C++ Support
- A follow-up fix was added for (#GH61460), as the previous fix was not entirely correct. (#GH86361)
- Fixed a crash in the typo correction of an invalid CTAD guide. (#GH107887)
- Fixed a crash when clang tries to subtitute parameter pack while retaining the parameter
- pack. #GH63819, #GH107560
+ pack. (#GH63819), (#GH107560)
- Fix a crash when a static assert declaration has an invalid close location. (#GH108687)
+- Avoided a redundant friend declaration instantiation under a certain ``consteval`` context. (#GH107175)
+- Fixed an assertion failure in debug mode, and potential crashes in release mode, when
+ diagnosing a failed cast caused indirectly by a failed implicit conversion to the type of the constructor parameter.
Bug Fixes to AST Handling
^^^^^^^^^^^^^^^^^^^^^^^^^
diff --git a/clang/docs/tools/clang-formatted-files.txt b/clang/docs/tools/clang-formatted-files.txt
index 48ded9c75455..fa40ea74fb7e 100644
--- a/clang/docs/tools/clang-formatted-files.txt
+++ b/clang/docs/tools/clang-formatted-files.txt
@@ -608,7 +608,6 @@ clang/tools/clang-refactor/ClangRefactor.cpp
clang/tools/clang-refactor/TestSupport.cpp
clang/tools/clang-refactor/TestSupport.h
clang/tools/clang-refactor/ToolRefactoringResultConsumer.h
-clang/tools/clang-rename/ClangRename.cpp
clang/tools/clang-repl/ClangRepl.cpp
clang/tools/clang-scan-deps/ClangScanDeps.cpp
clang/tools/clang-shlib/clang-shlib.cpp
diff --git a/clang/include/clang/Analysis/Analyses/ExprMutationAnalyzer.h b/clang/include/clang/Analysis/Analyses/ExprMutationAnalyzer.h
index b7b84852168e..c7a5b016c949 100644
--- a/clang/include/clang/Analysis/Analyses/ExprMutationAnalyzer.h
+++ b/clang/include/clang/Analysis/Analyses/ExprMutationAnalyzer.h
@@ -118,10 +118,19 @@ public:
static FunctionParmMutationAnalyzer *
getFunctionParmMutationAnalyzer(const FunctionDecl &Func, ASTContext &Context,
ExprMutationAnalyzer::Memoized &Memorized) {
- auto [it, Inserted] = Memorized.FuncParmAnalyzer.try_emplace(&Func);
- if (Inserted)
- it->second = std::unique_ptr<FunctionParmMutationAnalyzer>(
- new FunctionParmMutationAnalyzer(Func, Context, Memorized));
+ auto it = Memorized.FuncParmAnalyzer.find(&Func);
+ if (it == Memorized.FuncParmAnalyzer.end()) {
+ // Creating a new instance of FunctionParmMutationAnalyzer below may add
+ // additional elements to FuncParmAnalyzer. If we did try_emplace before
+ // creating a new instance, the returned iterator of try_emplace could be
+ // invalidated.
+ it =
+ Memorized.FuncParmAnalyzer
+ .try_emplace(&Func, std::unique_ptr<FunctionParmMutationAnalyzer>(
+ new FunctionParmMutationAnalyzer(
+ Func, Context, Memorized)))
+ .first;
+ }
return it->getSecond().get();
}
diff --git a/clang/include/clang/Basic/Attr.td b/clang/include/clang/Basic/Attr.td
index 35b9716e13ff..ce86116680d7 100644
--- a/clang/include/clang/Basic/Attr.td
+++ b/clang/include/clang/Basic/Attr.td
@@ -1258,6 +1258,14 @@ def CoroAwaitElidable : InheritableAttr {
let SimpleHandler = 1;
}
+def CoroAwaitElidableArgument : InheritableAttr {
+ let Spellings = [Clang<"coro_await_elidable_argument">];
+ let Subjects = SubjectList<[ParmVar]>;
+ let LangOpts = [CPlusPlus];
+ let Documentation = [CoroAwaitElidableArgumentDoc];
+ let SimpleHandler = 1;
+}
+
// OSObject-based attributes.
def OSConsumed : InheritableParamAttr {
let Spellings = [Clang<"os_consumed">];
diff --git a/clang/include/clang/Basic/AttrDocs.td b/clang/include/clang/Basic/AttrDocs.td
index cc9bc499c9cc..8ef151b3f2fd 100644
--- a/clang/include/clang/Basic/AttrDocs.td
+++ b/clang/include/clang/Basic/AttrDocs.td
@@ -8258,15 +8258,23 @@ but do not pass them to the underlying coroutine or pass them by value.
def CoroAwaitElidableDoc : Documentation {
let Category = DocCatDecl;
let Content = [{
-The ``[[clang::coro_await_elidable]]`` is a class attribute which can be applied
-to a coroutine return type.
+The ``[[clang::coro_await_elidable]]`` is a class attribute which can be
+applied to a coroutine return type. It provides a hint to the compiler to apply
+Heap Allocation Elision more aggressively.
-When a coroutine function that returns such a type calls another coroutine function,
-the compiler performs heap allocation elision when the call to the coroutine function
-is immediately co_awaited as a prvalue. In this case, the coroutine frame for the
-callee will be a local variable within the enclosing braces in the caller's stack
-frame. And the local variable, like other variables in coroutines, may be collected
-into the coroutine frame, which may be allocated on the heap.
+When a coroutine function returns such a type, a direct call expression therein
+that returns a prvalue of a type attributed ``[[clang::coro_await_elidable]]``
+is said to be under a safe elide context if one of the following is true:
+- it is the immediate right-hand side operand to a co_await expression.
+- it is an argument to a ``[[clang::coro_await_elidable_argument]]`` parameter
+or parameter pack of another direct call expression under a safe elide context.
+
+Do note that the safe elide context applies only to the call expression itself,
+and the context does not transitively include any of its subexpressions unless
+exceptional rules of ``[[clang::coro_await_elidable_argument]]`` apply.
+
+The compiler performs heap allocation elision on call expressions under a safe
+elide context, if the callee is a coroutine.
Example:
@@ -8281,8 +8289,63 @@ Example:
co_await t;
}
-The behavior is undefined if the caller coroutine is destroyed earlier than the
-callee coroutine.
+Such elision replaces the heap allocated activation frame of the callee coroutine
+with a local variable within the enclosing braces in the caller's stack frame.
+The local variable, like other variables in coroutines, may be collected into the
+coroutine frame, which may be allocated on the heap. The behavior is undefined
+if the caller coroutine is destroyed earlier than the callee coroutine.
+
+}];
+}
+
+def CoroAwaitElidableArgumentDoc : Documentation {
+ let Category = DocCatDecl;
+ let Content = [{
+
+The ``[[clang::coro_await_elidable_argument]]`` is a function parameter attribute.
+It works in conjunction with ``[[clang::coro_await_elidable]]`` to propagate a
+safe elide context to a parameter or parameter pack if the function is called
+under a safe elide context.
+
+This is sometimes necessary on utility functions used to compose or modify the
+behavior of a callee coroutine.
+
+Example:
+
+.. code-block:: c++
+
+ template <typename T>
+ class [[clang::coro_await_elidable]] Task { ... };
+
+ template <typename... T>
+ class [[clang::coro_await_elidable]] WhenAll { ... };
+
+ // `when_all` is a utility function that composes coroutines. It does not
+ // need to be a coroutine to propagate.
+ template <typename... T>
+ WhenAll<T...> when_all([[clang::coro_await_elidable_argument]] Task<T> tasks...);
+
+ Task<int> foo();
+ Task<int> bar();
+ Task<void> example1() {
+ // `when_all``, `foo``, and `bar` are all elide safe because `when_all` is
+ // under a safe elide context and, thanks to the [[clang::coro_await_elidable_argument]]
+ // attribute, such context is propagated to foo and bar.
+ co_await when_all(foo(), bar());
+ }
+
+ Task<void> example2() {
+ // `when_all` and `bar` are elide safe. `foo` is not elide safe.
+ auto f = foo();
+ co_await when_all(f, bar());
+ }
+
+
+ Task<void> example3() {
+ // None of the calls are elide safe.
+ auto t = when_all(foo(), bar());
+ co_await t;
+ }
}];
}
diff --git a/clang/include/clang/Basic/Builtins.td b/clang/include/clang/Basic/Builtins.td
index 6cf03d27055c..8c5d7ad763bf 100644
--- a/clang/include/clang/Basic/Builtins.td
+++ b/clang/include/clang/Basic/Builtins.td
@@ -1322,6 +1322,12 @@ def ElementwiseLog10 : Builtin {
let Prototype = "void(...)";
}
+def ElementwisePopcount : Builtin {
+ let Spellings = ["__builtin_elementwise_popcount"];
+ let Attributes = [NoThrow, Const, CustomTypeChecking];
+ let Prototype = "void(...)";
+}
+
def ElementwisePow : Builtin {
let Spellings = ["__builtin_elementwise_pow"];
let Attributes = [NoThrow, Const, CustomTypeChecking];
diff --git a/clang/include/clang/Basic/Diagnostic.h b/clang/include/clang/Basic/Diagnostic.h
index 54b69e985402..e17ed8f98afa 100644
--- a/clang/include/clang/Basic/Diagnostic.h
+++ b/clang/include/clang/Basic/Diagnostic.h
@@ -183,6 +183,41 @@ struct DiagnosticStorage {
DiagnosticStorage() = default;
};
+/// An allocator for DiagnosticStorage objects, which uses a small cache to
+/// objects, used to reduce malloc()/free() traffic for partial diagnostics.
+class DiagStorageAllocator {
+ static const unsigned NumCached = 16;
+ DiagnosticStorage Cached[NumCached];
+ DiagnosticStorage *FreeList[NumCached];
+ unsigned NumFreeListEntries;
+
+public:
+ DiagStorageAllocator();
+ ~DiagStorageAllocator();
+
+ /// Allocate new storage.
+ DiagnosticStorage *Allocate() {
+ if (NumFreeListEntries == 0)
+ return new DiagnosticStorage;
+
+ DiagnosticStorage *Result = FreeList[--NumFreeListEntries];
+ Result->NumDiagArgs = 0;
+ Result->DiagRanges.clear();
+ Result->FixItHints.clear();
+ return Result;
+ }
+
+ /// Free the given storage object.
+ void Deallocate(DiagnosticStorage *S) {
+ if (S >= Cached && S <= Cached + NumCached) {
+ FreeList[NumFreeListEntries++] = S;
+ return;
+ }
+
+ delete S;
+ }
+};
+
/// Concrete class used by the front-end to report problems and issues.
///
/// This massages the diagnostics (e.g. handling things like "report warnings
@@ -522,27 +557,6 @@ private:
void *ArgToStringCookie = nullptr;
ArgToStringFnTy ArgToStringFn;
- /// ID of the "delayed" diagnostic, which is a (typically
- /// fatal) diagnostic that had to be delayed because it was found
- /// while emitting another diagnostic.
- unsigned DelayedDiagID;
-
- /// First string argument for the delayed diagnostic.
- std::string DelayedDiagArg1;
-
- /// Second string argument for the delayed diagnostic.
- std::string DelayedDiagArg2;
-
- /// Third string argument for the delayed diagnostic.
- std::string DelayedDiagArg3;
-
- /// Optional flag value.
- ///
- /// Some flags accept values, for instance: -Wframe-larger-than=<value> and
- /// -Rpass=<value>. The content of this string is emitted after the flag name
- /// and '='.
- std::string FlagValue;
-
public:
explicit DiagnosticsEngine(IntrusiveRefCntPtr<DiagnosticIDs> Diags,
IntrusiveRefCntPtr<DiagnosticOptions> DiagOpts,
@@ -949,70 +963,18 @@ public:
void Report(const StoredDiagnostic &storedDiag);
- /// Determine whethere there is already a diagnostic in flight.
- bool isDiagnosticInFlight() const {
- return CurDiagID != std::numeric_limits<unsigned>::max();
- }
-
- /// Set the "delayed" diagnostic that will be emitted once
- /// the current diagnostic completes.
- ///
- /// If a diagnostic is already in-flight but the front end must
- /// report a problem (e.g., with an inconsistent file system
- /// state), this routine sets a "delayed" diagnostic that will be
- /// emitted after the current diagnostic completes. This should
- /// only be used for fatal errors detected at inconvenient
- /// times. If emitting a delayed diagnostic causes a second delayed
- /// diagnostic to be introduced, that second delayed diagnostic
- /// will be ignored.
- ///
- /// \param DiagID The ID of the diagnostic being delayed.
- ///
- /// \param Arg1 A string argument that will be provided to the
- /// diagnostic. A copy of this string will be stored in the
- /// DiagnosticsEngine object itself.
- ///
- /// \param Arg2 A string argument that will be provided to the
- /// diagnostic. A copy of this string will be stored in the
- /// DiagnosticsEngine object itself.
- ///
- /// \param Arg3 A string argument that will be provided to the
- /// diagnostic. A copy of this string will be stored in the
- /// DiagnosticsEngine object itself.
- void SetDelayedDiagnostic(unsigned DiagID, StringRef Arg1 = "",
- StringRef Arg2 = "", StringRef Arg3 = "");
-
- /// Clear out the current diagnostic.
- void Clear() { CurDiagID = std::numeric_limits<unsigned>::max(); }
-
- /// Return the value associated with this diagnostic flag.
- StringRef getFlagValue() const { return FlagValue; }
-
private:
// This is private state used by DiagnosticBuilder. We put it here instead of
// in DiagnosticBuilder in order to keep DiagnosticBuilder a small lightweight
- // object. This implementation choice means that we can only have one
- // diagnostic "in flight" at a time, but this seems to be a reasonable
- // tradeoff to keep these objects small. Assertions verify that only one
- // diagnostic is in flight at a time.
+ // object. This implementation choice means that we can only have a few
+ // diagnostics "in flight" at a time, but this seems to be a reasonable
+ // tradeoff to keep these objects small.
friend class Diagnostic;
friend class DiagnosticBuilder;
friend class DiagnosticErrorTrap;
friend class DiagnosticIDs;
friend class PartialDiagnostic;
- /// Report the delayed diagnostic.
- void ReportDelayed();
-
- /// The location of the current diagnostic that is in flight.
- SourceLocation CurDiagLoc;
-
- /// The ID of the current diagnostic that is in flight.
- ///
- /// This is set to std::numeric_limits<unsigned>::max() when there is no
- /// diagnostic in flight.
- unsigned CurDiagID;
-
enum {
/// The maximum number of arguments we can hold.
///
@@ -1022,7 +984,7 @@ private:
MaxArguments = DiagnosticStorage::MaxArguments,
};
- DiagnosticStorage DiagStorage;
+ DiagStorageAllocator DiagAllocator;
DiagnosticMapping makeUserMapping(diag::Severity Map, SourceLocation L) {
bool isPragma = L.isValid();
@@ -1042,8 +1004,8 @@ private:
/// Used to report a diagnostic that is finally fully formed.
///
/// \returns true if the diagnostic was emitted, false if it was suppressed.
- bool ProcessDiag() {
- return Diags->ProcessDiag(*this);
+ bool ProcessDiag(const DiagnosticBuilder &DiagBuilder) {
+ return Diags->ProcessDiag(*this, DiagBuilder);
}
/// @name Diagnostic Emission
@@ -1058,14 +1020,10 @@ protected:
// Sema::Diag() patterns.
friend class Sema;
- /// Emit the current diagnostic and clear the diagnostic state.
+ /// Emit the diagnostic
///
/// \param Force Emit the diagnostic regardless of suppression settings.
- bool EmitCurrentDiagnostic(bool Force = false);
-
- unsigned getCurrentDiagID() const { return CurDiagID; }
-
- SourceLocation getCurrentDiagLoc() const { return CurDiagLoc; }
+ bool EmitDiagnostic(const DiagnosticBuilder &DB, bool Force = false);
/// @}
};
@@ -1118,40 +1076,7 @@ public:
///
class StreamingDiagnostic {
public:
- /// An allocator for DiagnosticStorage objects, which uses a small cache to
- /// objects, used to reduce malloc()/free() traffic for partial diagnostics.
- class DiagStorageAllocator {
- static const unsigned NumCached = 16;
- DiagnosticStorage Cached[NumCached];
- DiagnosticStorage *FreeList[NumCached];
- unsigned NumFreeListEntries;
-
- public:
- DiagStorageAllocator();
- ~DiagStorageAllocator();
-
- /// Allocate new storage.
- DiagnosticStorage *Allocate() {
- if (NumFreeListEntries == 0)
- return new DiagnosticStorage;
-
- DiagnosticStorage *Result = FreeList[--NumFreeListEntries];
- Result->NumDiagArgs = 0;
- Result->DiagRanges.clear();
- Result->FixItHints.clear();
- return Result;
- }
-
- /// Free the given storage object.
- void Deallocate(DiagnosticStorage *S) {
- if (S >= Cached && S <= Cached + NumCached) {
- FreeList[NumFreeListEntries++] = S;
- return;
- }
-
- delete S;
- }
- };
+ using DiagStorageAllocator = clang::DiagStorageAllocator;
protected:
mutable DiagnosticStorage *DiagStorage = nullptr;
@@ -1240,11 +1165,6 @@ public:
protected:
StreamingDiagnostic() = default;
- /// Construct with an external storage not owned by itself. The allocator
- /// is a null pointer in this case.
- explicit StreamingDiagnostic(DiagnosticStorage *Storage)
- : DiagStorage(Storage) {}
-
/// Construct with a storage allocator which will manage the storage. The
/// allocator is not a null pointer in this case.
explicit StreamingDiagnostic(DiagStorageAllocator &Alloc)
@@ -1275,9 +1195,20 @@ protected:
class DiagnosticBuilder : public StreamingDiagnostic {
friend class DiagnosticsEngine;
friend class PartialDiagnostic;
+ friend class Diagnostic;
mutable DiagnosticsEngine *DiagObj = nullptr;
+ SourceLocation DiagLoc;
+ unsigned DiagID;
+
+ /// Optional flag value.
+ ///
+ /// Some flags accept values, for instance: -Wframe-larger-than=<value> and
+ /// -Rpass=<value>. The content of this string is emitted after the flag name
+ /// and '='.
+ mutable std::string FlagValue;
+
/// Status variable indicating if this diagnostic is still active.
///
// NOTE: This field is redundant with DiagObj (IsActive iff (DiagObj == 0)),
@@ -1291,16 +1222,8 @@ class DiagnosticBuilder : public StreamingDiagnostic {
DiagnosticBuilder() = default;
- explicit DiagnosticBuilder(DiagnosticsEngine *diagObj)
- : StreamingDiagnostic(&diagObj->DiagStorage), DiagObj(diagObj),
- IsActive(true) {
- assert(diagObj && "DiagnosticBuilder requires a valid DiagnosticsEngine!");
- assert(DiagStorage &&
- "DiagnosticBuilder requires a valid DiagnosticStorage!");
- DiagStorage->NumDiagArgs = 0;
- DiagStorage->DiagRanges.clear();
- DiagStorage->FixItHints.clear();
- }
+ DiagnosticBuilder(DiagnosticsEngine *DiagObj, SourceLocation DiagLoc,
+ unsigned DiagID);
protected:
/// Clear out the current diagnostic.
@@ -1326,7 +1249,7 @@ protected:
if (!isActive()) return false;
// Process the diagnostic.
- bool Result = DiagObj->EmitCurrentDiagnostic(IsForceEmit);
+ bool Result = DiagObj->EmitDiagnostic(*this, IsForceEmit);
// This diagnostic is dead.
Clear();
@@ -1337,13 +1260,7 @@ protected:
public:
/// Copy constructor. When copied, this "takes" the diagnostic info from the
/// input and neuters it.
- DiagnosticBuilder(const DiagnosticBuilder &D) : StreamingDiagnostic() {
- DiagObj = D.DiagObj;
- DiagStorage = D.DiagStorage;
- IsActive = D.IsActive;
- IsForceEmit = D.IsForceEmit;
- D.Clear();
- }
+ DiagnosticBuilder(const DiagnosticBuilder &D);
template <typename T> const DiagnosticBuilder &operator<<(const T &V) const {
assert(isActive() && "Clients must not add to cleared diagnostic!");
@@ -1375,7 +1292,7 @@ public:
return *this;
}
- void addFlagValue(StringRef V) const { DiagObj->FlagValue = std::string(V); }
+ void addFlagValue(StringRef V) const { FlagValue = std::string(V); }
};
struct AddFlagValue {
@@ -1550,12 +1467,7 @@ const StreamingDiagnostic &operator<<(const StreamingDiagnostic &DB,
inline DiagnosticBuilder DiagnosticsEngine::Report(SourceLocation Loc,
unsigned DiagID) {
- assert(CurDiagID == std::numeric_limits<unsigned>::max() &&
- "Multiple diagnostics in flight at once!");
- CurDiagLoc = Loc;
- CurDiagID = DiagID;
- FlagValue.clear();
- return DiagnosticBuilder(this);
+ return DiagnosticBuilder(this, Loc, DiagID);
}
const StreamingDiagnostic &operator<<(const StreamingDiagnostic &DB,
@@ -1570,24 +1482,29 @@ inline DiagnosticBuilder DiagnosticsEngine::Report(unsigned DiagID) {
//===----------------------------------------------------------------------===//
/// A little helper class (which is basically a smart pointer that forwards
-/// info from DiagnosticsEngine) that allows clients to enquire about the
-/// currently in-flight diagnostic.
+/// info from DiagnosticsEngine and DiagnosticStorage) that allows clients to
+/// enquire about the diagnostic.
class Diagnostic {
const DiagnosticsEngine *DiagObj;
+ SourceLocation DiagLoc;
+ unsigned DiagID;
+ std::string FlagValue;
+ const DiagnosticStorage &DiagStorage;
std::optional<StringRef> StoredDiagMessage;
public:
- explicit Diagnostic(const DiagnosticsEngine *DO) : DiagObj(DO) {}
- Diagnostic(const DiagnosticsEngine *DO, StringRef storedDiagMessage)
- : DiagObj(DO), StoredDiagMessage(storedDiagMessage) {}
+ Diagnostic(const DiagnosticsEngine *DO, const DiagnosticBuilder &DiagBuilder);
+ Diagnostic(const DiagnosticsEngine *DO, SourceLocation DiagLoc,
+ unsigned DiagID, const DiagnosticStorage &DiagStorage,
+ StringRef StoredDiagMessage);
const DiagnosticsEngine *getDiags() const { return DiagObj; }
- unsigned getID() const { return DiagObj->CurDiagID; }
- const SourceLocation &getLocation() const { return DiagObj->CurDiagLoc; }
+ unsigned getID() const { return DiagID; }
+ const SourceLocation &getLocation() const { return DiagLoc; }
bool hasSourceManager() const { return DiagObj->hasSourceManager(); }
SourceManager &getSourceManager() const { return DiagObj->getSourceManager();}
- unsigned getNumArgs() const { return DiagObj->DiagStorage.NumDiagArgs; }
+ unsigned getNumArgs() const { return DiagStorage.NumDiagArgs; }
/// Return the kind of the specified index.
///
@@ -1597,8 +1514,7 @@ public:
/// \pre Idx < getNumArgs()
DiagnosticsEngine::ArgumentKind getArgKind(unsigned Idx) const {
assert(Idx < getNumArgs() && "Argument index out of range!");
- return (DiagnosticsEngine::ArgumentKind)
- DiagObj->DiagStorage.DiagArgumentsKind[Idx];
+ return (DiagnosticsEngine::ArgumentKind)DiagStorage.DiagArgumentsKind[Idx];
}
/// Return the provided argument string specified by \p Idx.
@@ -1606,7 +1522,7 @@ public:
const std::string &getArgStdStr(unsigned Idx) const {
assert(getArgKind(Idx) == DiagnosticsEngine::ak_std_string &&
"invalid argument accessor!");
- return DiagObj->DiagStorage.DiagArgumentsStr[Idx];
+ return DiagStorage.DiagArgumentsStr[Idx];
}
/// Return the specified C string argument.
@@ -1614,8 +1530,7 @@ public:
const char *getArgCStr(unsigned Idx) const {
assert(getArgKind(Idx) == DiagnosticsEngine::ak_c_string &&
"invalid argument accessor!");
- return reinterpret_cast<const char *>(
- DiagObj->DiagStorage.DiagArgumentsVal[Idx]);
+ return reinterpret_cast<const char *>(DiagStorage.DiagArgumentsVal[Idx]);
}
/// Return the specified signed integer argument.
@@ -1623,7 +1538,7 @@ public:
int64_t getArgSInt(unsigned Idx) const {
assert(getArgKind(Idx) == DiagnosticsEngine::ak_sint &&
"invalid argument accessor!");
- return (int64_t)DiagObj->DiagStorage.DiagArgumentsVal[Idx];
+ return (int64_t)DiagStorage.DiagArgumentsVal[Idx];
}
/// Return the specified unsigned integer argument.
@@ -1631,7 +1546,7 @@ public:
uint64_t getArgUInt(unsigned Idx) const {
assert(getArgKind(Idx) == DiagnosticsEngine::ak_uint &&
"invalid argument accessor!");
- return DiagObj->DiagStorage.DiagArgumentsVal[Idx];
+ return DiagStorage.DiagArgumentsVal[Idx];
}
/// Return the specified IdentifierInfo argument.
@@ -1640,7 +1555,7 @@ public:
assert(getArgKind(Idx) == DiagnosticsEngine::ak_identifierinfo &&
"invalid argument accessor!");
return reinterpret_cast<IdentifierInfo *>(
- DiagObj->DiagStorage.DiagArgumentsVal[Idx]);
+ DiagStorage.DiagArgumentsVal[Idx]);
}
/// Return the specified non-string argument in an opaque form.
@@ -1648,37 +1563,32 @@ public:
uint64_t getRawArg(unsigned Idx) const {
assert(getArgKind(Idx) != DiagnosticsEngine::ak_std_string &&
"invalid argument accessor!");
- return DiagObj->DiagStorage.DiagArgumentsVal[Idx];
+ return DiagStorage.DiagArgumentsVal[Idx];
}
/// Return the number of source ranges associated with this diagnostic.
- unsigned getNumRanges() const {
- return DiagObj->DiagStorage.DiagRanges.size();
- }
+ unsigned getNumRanges() const { return DiagStorage.DiagRanges.size(); }
/// \pre Idx < getNumRanges()
const CharSourceRange &getRange(unsigned Idx) const {
assert(Idx < getNumRanges() && "Invalid diagnostic range index!");
- return DiagObj->DiagStorage.DiagRanges[Idx];
+ return DiagStorage.DiagRanges[Idx];
}
/// Return an array reference for this diagnostic's ranges.
- ArrayRef<CharSourceRange> getRanges() const {
- return DiagObj->DiagStorage.DiagRanges;
- }
+ ArrayRef<CharSourceRange> getRanges() const { return DiagStorage.DiagRanges; }
- unsigned getNumFixItHints() const {
- return DiagObj->DiagStorage.FixItHints.size();
- }
+ unsigned getNumFixItHints() const { return DiagStorage.FixItHints.size(); }
const FixItHint &getFixItHint(unsigned Idx) const {
assert(Idx < getNumFixItHints() && "Invalid index!");
- return DiagObj->DiagStorage.FixItHints[Idx];
+ return DiagStorage.FixItHints[Idx];
}
- ArrayRef<FixItHint> getFixItHints() const {
- return DiagObj->DiagStorage.FixItHints;
- }
+ ArrayRef<FixItHint> getFixItHints() const { return DiagStorage.FixItHints; }
+
+ /// Return the value associated with this diagnostic flag.
+ StringRef getFlagValue() const { return FlagValue; }
/// Format this diagnostic into a string, substituting the
/// formal arguments into the %0 slots.
diff --git a/clang/include/clang/Basic/DiagnosticIDs.h b/clang/include/clang/Basic/DiagnosticIDs.h
index daad66f49953..1fa38ed6066e 100644
--- a/clang/include/clang/Basic/DiagnosticIDs.h
+++ b/clang/include/clang/Basic/DiagnosticIDs.h
@@ -24,6 +24,7 @@
namespace clang {
class DiagnosticsEngine;
+ class DiagnosticBuilder;
class SourceLocation;
// Import the diagnostic enums themselves.
@@ -486,11 +487,13 @@ private:
///
/// \returns \c true if the diagnostic was emitted, \c false if it was
/// suppressed.
- bool ProcessDiag(DiagnosticsEngine &Diag) const;
+ bool ProcessDiag(DiagnosticsEngine &Diag,
+ const DiagnosticBuilder &DiagBuilder) const;
/// Used to emit a diagnostic that is finally fully formed,
/// ignoring suppression.
- void EmitDiag(DiagnosticsEngine &Diag, Level DiagLevel) const;
+ void EmitDiag(DiagnosticsEngine &Diag, const DiagnosticBuilder &DiagBuilder,
+ Level DiagLevel) const;
/// Whether the diagnostic may leave the AST in a state where some
/// invariants can break.
diff --git a/clang/include/clang/Basic/DiagnosticParseKinds.td b/clang/include/clang/Basic/DiagnosticParseKinds.td
index 1afadb3bff75..78510e61a639 100644
--- a/clang/include/clang/Basic/DiagnosticParseKinds.td
+++ b/clang/include/clang/Basic/DiagnosticParseKinds.td
@@ -113,9 +113,12 @@ def ext_cxx11_enum_fixed_underlying_type : Extension<
def ext_ms_c_enum_fixed_underlying_type : Extension<
"enumeration types with a fixed underlying type are a Microsoft extension">,
InGroup<MicrosoftFixedEnum>;
-def ext_clang_c_enum_fixed_underlying_type : Extension<
- "enumeration types with a fixed underlying type are a Clang extension">,
- InGroup<DiagGroup<"fixed-enum-extension">>;
+def ext_c23_enum_fixed_underlying_type : Extension<
+ "enumeration types with a fixed underlying type are a C23 extension">,
+ InGroup<C23>;
+def warn_c17_compat_enum_fixed_underlying_type : Warning<
+ "enumeration types with a fixed underlying type are incompatible with C standards before C23">,
+ DefaultIgnore, InGroup<CPre23Compat>;
def warn_cxx98_compat_enum_fixed_underlying_type : Warning<
"enumeration types with a fixed underlying type are incompatible with C++98">,
InGroup<CXX98Compat>, DefaultIgnore;
diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td
index e8b64f3c5a01..ba813af960af 100644
--- a/clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -3290,7 +3290,7 @@ def err_attribute_unsupported_m_profile
def err_duplicate_target_attribute
: Error<"%select{unsupported|duplicate|unknown}0%select{| CPU|"
" tune CPU}1 '%2' in the '%select{target|target_clones|target_version}3' "
- "attribute string; ">;
+ "attribute string;">;
// The err_*_attribute_argument_not_int are separate because they're used by
// VerifyIntegerConstantExpression.
def err_aligned_attribute_argument_not_int : Error<
@@ -9915,7 +9915,7 @@ def err_defaulted_comparison_constexpr_mismatch : Error<
"three-way comparison operator}0 cannot be "
"declared %select{constexpr|consteval}2 because "
"%select{it|for which the corresponding implicit 'operator==' }0 "
- "invokes a non-constexpr comparison function ">;
+ "invokes a non-constexpr comparison function">;
def note_defaulted_comparison_not_constexpr : Note<
"non-constexpr comparison function would be used to compare "
"%select{|member %1|base class %1}0">;
@@ -11559,7 +11559,7 @@ def err_omp_wrong_device_function_call : Error<
"function with 'device_type(%0)' is not available on %select{device|host}1">;
def note_omp_marked_device_type_here : Note<"marked as 'device_type(%0)' here">;
def err_omp_declare_target_has_local_vars : Error<
- "local variable '%0' should not be used in 'declare target' directive; ">;
+ "local variable '%0' should not be used in 'declare target' directive;">;
def warn_omp_declare_target_after_first_use : Warning<
"declaration marked as declare target after first use, it may lead to incorrect results">,
InGroup<OpenMPTarget>;
@@ -12381,6 +12381,7 @@ def warn_hlsl_deprecated_register_type_b: Warning<"binding type 'b' only applies
def warn_hlsl_deprecated_register_type_i: Warning<"binding type 'i' ignored. The 'integer constant' binding type is no longer supported">, InGroup<LegacyConstantRegisterBinding>, DefaultError;
def err_hlsl_unsupported_register_number : Error<"register number should be an integer">;
def err_hlsl_expected_space : Error<"invalid space specifier '%0' used; expected 'space' followed by an integer, like space1">;
+def err_hlsl_space_on_global_constant : Error<"register space cannot be specified on global constants">;
def warn_hlsl_packoffset_mix : Warning<"cannot mix packoffset elements with nonpackoffset elements in a cbuffer">,
InGroup<HLSLMixPackOffset>;
def err_hlsl_packoffset_overlap : Error<"packoffset overlap between %0, %1">;
diff --git a/clang/include/clang/Basic/PartialDiagnostic.h b/clang/include/clang/Basic/PartialDiagnostic.h
index 507d789c54ff..4bf6049d08fd 100644
--- a/clang/include/clang/Basic/PartialDiagnostic.h
+++ b/clang/include/clang/Basic/PartialDiagnostic.h
@@ -166,13 +166,10 @@ public:
void EmitToString(DiagnosticsEngine &Diags,
SmallVectorImpl<char> &Buf) const {
- // FIXME: It should be possible to render a diagnostic to a string without
- // messing with the state of the diagnostics engine.
DiagnosticBuilder DB(Diags.Report(getDiagID()));
Emit(DB);
- Diagnostic(&Diags).FormatDiagnostic(Buf);
+ Diagnostic(&Diags, DB).FormatDiagnostic(Buf);
DB.Clear();
- Diags.Clear();
}
/// Clear out this partial diagnostic, giving it a new diagnostic ID
diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h
index 0809ac1b144e..e1c3a99cfa16 100644
--- a/clang/include/clang/Sema/Sema.h
+++ b/clang/include/clang/Sema/Sema.h
@@ -626,10 +626,10 @@ public:
const llvm::MapVector<FieldDecl *, DeleteLocs> &
getMismatchingDeleteExpressions() const;
- /// Cause the active diagnostic on the DiagosticsEngine to be
- /// emitted. This is closely coupled to the SemaDiagnosticBuilder class and
+ /// Cause the built diagnostic to be emitted on the DiagosticsEngine.
+ /// This is closely coupled to the SemaDiagnosticBuilder class and
/// should not be used elsewhere.
- void EmitCurrentDiagnostic(unsigned DiagID);
+ void EmitDiagnostic(unsigned DiagID, const DiagnosticBuilder &DB);
void addImplicitTypedef(StringRef Name, QualType T);
diff --git a/clang/lib/AST/ASTContext.cpp b/clang/lib/AST/ASTContext.cpp
index 67841a30a571..ebd4a41ee636 100644
--- a/clang/lib/AST/ASTContext.cpp
+++ b/clang/lib/AST/ASTContext.cpp
@@ -3377,7 +3377,8 @@ static void encodeTypeForFunctionPointerAuth(const ASTContext &Ctx,
#include "clang/Basic/HLSLIntangibleTypes.def"
case BuiltinType::Dependent:
llvm_unreachable("should never get here");
- case BuiltinType::AMDGPUBufferRsrc:
+#define AMDGPU_TYPE(Name, Id, SingletonId) case BuiltinType::Id:
+#include "clang/Basic/AMDGPUTypes.def"
case BuiltinType::WasmExternRef:
#define RVV_TYPE(Name, Id, SingletonId) case BuiltinType::Id:
#include "clang/Basic/RISCVVTypes.def"
diff --git a/clang/lib/Basic/Diagnostic.cpp b/clang/lib/Basic/Diagnostic.cpp
index ecff80a50906..0bd6845085b7 100644
--- a/clang/lib/Basic/Diagnostic.cpp
+++ b/clang/lib/Basic/Diagnostic.cpp
@@ -126,9 +126,7 @@ void DiagnosticsEngine::Reset(bool soft /*=false*/) {
TrapNumErrorsOccurred = 0;
TrapNumUnrecoverableErrorsOccurred = 0;
- CurDiagID = std::numeric_limits<unsigned>::max();
LastDiagLevel = DiagnosticIDs::Ignored;
- DelayedDiagID = 0;
if (!soft) {
// Clear state related to #pragma diagnostic.
@@ -143,23 +141,6 @@ void DiagnosticsEngine::Reset(bool soft /*=false*/) {
}
}
-void DiagnosticsEngine::SetDelayedDiagnostic(unsigned DiagID, StringRef Arg1,
- StringRef Arg2, StringRef Arg3) {
- if (DelayedDiagID)
- return;
-
- DelayedDiagID = DiagID;
- DelayedDiagArg1 = Arg1.str();
- DelayedDiagArg2 = Arg2.str();
- DelayedDiagArg3 = Arg3.str();
-}
-
-void DiagnosticsEngine::ReportDelayed() {
- unsigned ID = DelayedDiagID;
- DelayedDiagID = 0;
- Report(ID) << DelayedDiagArg1 << DelayedDiagArg2 << DelayedDiagArg3;
-}
-
DiagnosticMapping &
DiagnosticsEngine::DiagState::getOrAddMapping(diag::kind Diag) {
std::pair<iterator, bool> Result =
@@ -503,39 +484,31 @@ void DiagnosticsEngine::setSeverityForAll(diag::Flavor Flavor,
}
void DiagnosticsEngine::Report(const StoredDiagnostic &storedDiag) {
- assert(CurDiagID == std::numeric_limits<unsigned>::max() &&
- "Multiple diagnostics in flight at once!");
-
- CurDiagLoc = storedDiag.getLocation();
- CurDiagID = storedDiag.getID();
- DiagStorage.NumDiagArgs = 0;
-
- DiagStorage.DiagRanges.clear();
+ DiagnosticStorage DiagStorage;
DiagStorage.DiagRanges.append(storedDiag.range_begin(),
storedDiag.range_end());
- DiagStorage.FixItHints.clear();
DiagStorage.FixItHints.append(storedDiag.fixit_begin(),
storedDiag.fixit_end());
assert(Client && "DiagnosticConsumer not set!");
Level DiagLevel = storedDiag.getLevel();
- Diagnostic Info(this, storedDiag.getMessage());
+ Diagnostic Info(this, storedDiag.getLocation(), storedDiag.getID(),
+ DiagStorage, storedDiag.getMessage());
Client->HandleDiagnostic(DiagLevel, Info);
if (Client->IncludeInDiagnosticCounts()) {
if (DiagLevel == DiagnosticsEngine::Warning)
++NumWarnings;
}
-
- CurDiagID = std::numeric_limits<unsigned>::max();
}
-bool DiagnosticsEngine::EmitCurrentDiagnostic(bool Force) {
+bool DiagnosticsEngine::EmitDiagnostic(const DiagnosticBuilder &DB,
+ bool Force) {
assert(getClient() && "DiagnosticClient not set!");
bool Emitted;
if (Force) {
- Diagnostic Info(this);
+ Diagnostic Info(this, DB);
// Figure out the diagnostic level of this message.
DiagnosticIDs::Level DiagLevel
@@ -544,24 +517,50 @@ bool DiagnosticsEngine::EmitCurrentDiagnostic(bool Force) {
Emitted = (DiagLevel != DiagnosticIDs::Ignored);
if (Emitted) {
// Emit the diagnostic regardless of suppression level.
- Diags->EmitDiag(*this, DiagLevel);
+ Diags->EmitDiag(*this, DB, DiagLevel);
}
} else {
// Process the diagnostic, sending the accumulated information to the
// DiagnosticConsumer.
- Emitted = ProcessDiag();
+ Emitted = ProcessDiag(DB);
}
- // Clear out the current diagnostic object.
- Clear();
+ return Emitted;
+}
+
+DiagnosticBuilder::DiagnosticBuilder(DiagnosticsEngine *DiagObj,
+ SourceLocation DiagLoc, unsigned DiagID)
+ : StreamingDiagnostic(DiagObj->DiagAllocator), DiagObj(DiagObj),
+ DiagLoc(DiagLoc), DiagID(DiagID), IsActive(true) {
+ assert(DiagObj && "DiagnosticBuilder requires a valid DiagnosticsEngine!");
+}
- // If there was a delayed diagnostic, emit it now.
- if (!Force && DelayedDiagID)
- ReportDelayed();
+DiagnosticBuilder::DiagnosticBuilder(const DiagnosticBuilder &D)
+ : StreamingDiagnostic() {
+ DiagLoc = D.DiagLoc;
+ DiagID = D.DiagID;
+ FlagValue = D.FlagValue;
+ DiagObj = D.DiagObj;
+ DiagStorage = D.DiagStorage;
+ D.DiagStorage = nullptr;
+ Allocator = D.Allocator;
+ IsActive = D.IsActive;
+ IsForceEmit = D.IsForceEmit;
+ D.Clear();
+}
- return Emitted;
+Diagnostic::Diagnostic(const DiagnosticsEngine *DO,
+ const DiagnosticBuilder &DiagBuilder)
+ : DiagObj(DO), DiagLoc(DiagBuilder.DiagLoc), DiagID(DiagBuilder.DiagID),
+ FlagValue(DiagBuilder.FlagValue), DiagStorage(*DiagBuilder.getStorage()) {
}
+Diagnostic::Diagnostic(const DiagnosticsEngine *DO, SourceLocation DiagLoc,
+ unsigned DiagID, const DiagnosticStorage &DiagStorage,
+ StringRef StoredDiagMessage)
+ : DiagObj(DO), DiagLoc(DiagLoc), DiagID(DiagID), DiagStorage(DiagStorage),
+ StoredDiagMessage(StoredDiagMessage) {}
+
DiagnosticConsumer::~DiagnosticConsumer() = default;
void DiagnosticConsumer::HandleDiagnostic(DiagnosticsEngine::Level DiagLevel,
@@ -1216,13 +1215,13 @@ bool ForwardingDiagnosticConsumer::IncludeInDiagnosticCounts() const {
return Target.IncludeInDiagnosticCounts();
}
-PartialDiagnostic::DiagStorageAllocator::DiagStorageAllocator() {
+DiagStorageAllocator::DiagStorageAllocator() {
for (unsigned I = 0; I != NumCached; ++I)
FreeList[I] = Cached + I;
NumFreeListEntries = NumCached;
}
-PartialDiagnostic::DiagStorageAllocator::~DiagStorageAllocator() {
+DiagStorageAllocator::~DiagStorageAllocator() {
// Don't assert if we are in a CrashRecovery context, as this invariant may
// be invalidated during a crash.
assert((NumFreeListEntries == NumCached ||
diff --git a/clang/lib/Basic/DiagnosticIDs.cpp b/clang/lib/Basic/DiagnosticIDs.cpp
index cae6642bd4bd..031d9d7817d1 100644
--- a/clang/lib/Basic/DiagnosticIDs.cpp
+++ b/clang/lib/Basic/DiagnosticIDs.cpp
@@ -566,7 +566,7 @@ DiagnosticIDs::getDiagnosticSeverity(unsigned DiagID, SourceLocation Loc,
// If explicitly requested, map fatal errors to errors.
if (Result == diag::Severity::Fatal &&
- Diag.CurDiagID != diag::fatal_too_many_errors && Diag.FatalsAsError)
+ DiagID != diag::fatal_too_many_errors && Diag.FatalsAsError)
Result = diag::Severity::Error;
bool ShowInSystemHeader =
@@ -800,8 +800,9 @@ StringRef DiagnosticIDs::getNearestOption(diag::Flavor Flavor,
/// ProcessDiag - This is the method used to report a diagnostic that is
/// finally fully formed.
-bool DiagnosticIDs::ProcessDiag(DiagnosticsEngine &Diag) const {
- Diagnostic Info(&Diag);
+bool DiagnosticIDs::ProcessDiag(DiagnosticsEngine &Diag,
+ const DiagnosticBuilder &DiagBuilder) const {
+ Diagnostic Info(&Diag, DiagBuilder);
assert(Diag.getClient() && "DiagnosticClient not set!");
@@ -867,22 +868,24 @@ bool DiagnosticIDs::ProcessDiag(DiagnosticsEngine &Diag) const {
// stop a flood of bogus errors.
if (Diag.ErrorLimit && Diag.NumErrors > Diag.ErrorLimit &&
DiagLevel == DiagnosticIDs::Error) {
- Diag.SetDelayedDiagnostic(diag::fatal_too_many_errors);
+ Diag.Report(diag::fatal_too_many_errors);
return false;
}
}
// Make sure we set FatalErrorOccurred to ensure that the notes from the
// diagnostic that caused `fatal_too_many_errors` won't be emitted.
- if (Diag.CurDiagID == diag::fatal_too_many_errors)
+ if (Info.getID() == diag::fatal_too_many_errors)
Diag.FatalErrorOccurred = true;
// Finally, report it.
- EmitDiag(Diag, DiagLevel);
+ EmitDiag(Diag, DiagBuilder, DiagLevel);
return true;
}
-void DiagnosticIDs::EmitDiag(DiagnosticsEngine &Diag, Level DiagLevel) const {
- Diagnostic Info(&Diag);
+void DiagnosticIDs::EmitDiag(DiagnosticsEngine &Diag,
+ const DiagnosticBuilder &DiagBuilder,
+ Level DiagLevel) const {
+ Diagnostic Info(&Diag, DiagBuilder);
assert(DiagLevel != DiagnosticIDs::Ignored && "Cannot emit ignored diagnostics!");
Diag.Client->HandleDiagnostic((DiagnosticsEngine::Level)DiagLevel, Info);
@@ -890,8 +893,6 @@ void DiagnosticIDs::EmitDiag(DiagnosticsEngine &Diag, Level DiagLevel) const {
if (DiagLevel == DiagnosticIDs::Warning)
++Diag.NumWarnings;
}
-
- Diag.CurDiagID = ~0U;
}
bool DiagnosticIDs::isUnrecoverable(unsigned DiagID) const {
diff --git a/clang/lib/Basic/SourceManager.cpp b/clang/lib/Basic/SourceManager.cpp
index d6ec26af80aa..65a8a7253e05 100644
--- a/clang/lib/Basic/SourceManager.cpp
+++ b/clang/lib/Basic/SourceManager.cpp
@@ -130,13 +130,8 @@ ContentCache::getBufferOrNone(DiagnosticsEngine &Diag, FileManager &FM,
// the file could also have been removed during processing. Since we can't
// really deal with this situation, just create an empty buffer.
if (!BufferOrError) {
- if (Diag.isDiagnosticInFlight())
- Diag.SetDelayedDiagnostic(diag::err_cannot_open_file,
- ContentsEntry->getName(),
- BufferOrError.getError().message());
- else
- Diag.Report(Loc, diag::err_cannot_open_file)
- << ContentsEntry->getName() << BufferOrError.getError().message();
+ Diag.Report(Loc, diag::err_cannot_open_file)
+ << ContentsEntry->getName() << BufferOrError.getError().message();
return std::nullopt;
}
@@ -153,12 +148,7 @@ ContentCache::getBufferOrNone(DiagnosticsEngine &Diag, FileManager &FM,
// ContentsEntry::getSize() could have the wrong size. Use
// MemoryBuffer::getBufferSize() instead.
if (Buffer->getBufferSize() >= std::numeric_limits<unsigned>::max()) {
- if (Diag.isDiagnosticInFlight())
- Diag.SetDelayedDiagnostic(diag::err_file_too_large,
- ContentsEntry->getName());
- else
- Diag.Report(Loc, diag::err_file_too_large)
- << ContentsEntry->getName();
+ Diag.Report(Loc, diag::err_file_too_large) << ContentsEntry->getName();
return std::nullopt;
}
@@ -168,12 +158,7 @@ ContentCache::getBufferOrNone(DiagnosticsEngine &Diag, FileManager &FM,
// have come from a stat cache).
if (!ContentsEntry->isNamedPipe() &&
Buffer->getBufferSize() != (size_t)ContentsEntry->getSize()) {
- if (Diag.isDiagnosticInFlight())
- Diag.SetDelayedDiagnostic(diag::err_file_modified,
- ContentsEntry->getName());
- else
- Diag.Report(Loc, diag::err_file_modified)
- << ContentsEntry->getName();
+ Diag.Report(Loc, diag::err_file_modified) << ContentsEntry->getName();
return std::nullopt;
}
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index a52e880a7642..7e18aafcdd4b 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -3834,6 +3834,9 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
case Builtin::BI__builtin_elementwise_floor:
return RValue::get(emitBuiltinWithOneOverloadedType<1>(
*this, E, llvm::Intrinsic::floor, "elt.floor"));
+ case Builtin::BI__builtin_elementwise_popcount:
+ return RValue::get(emitBuiltinWithOneOverloadedType<1>(
+ *this, E, llvm::Intrinsic::ctpop, "elt.ctpop"));
case Builtin::BI__builtin_elementwise_roundeven:
return RValue::get(emitBuiltinWithOneOverloadedType<1>(
*this, E, llvm::Intrinsic::roundeven, "elt.roundeven"));
diff --git a/clang/lib/CodeGen/CGHLSLRuntime.cpp b/clang/lib/CodeGen/CGHLSLRuntime.cpp
index bec0a29e34fc..59d8fc830dcc 100644
--- a/clang/lib/CodeGen/CGHLSLRuntime.cpp
+++ b/clang/lib/CodeGen/CGHLSLRuntime.cpp
@@ -338,6 +338,13 @@ void clang::CodeGen::CGHLSLRuntime::setHLSLEntryAttributes(
NumThreadsAttr->getZ());
Fn->addFnAttr(NumThreadsKindStr, NumThreadsStr);
}
+ if (HLSLWaveSizeAttr *WaveSizeAttr = FD->getAttr<HLSLWaveSizeAttr>()) {
+ const StringRef WaveSizeKindStr = "hlsl.wavesize";
+ std::string WaveSizeStr =
+ formatv("{0},{1},{2}", WaveSizeAttr->getMin(), WaveSizeAttr->getMax(),
+ WaveSizeAttr->getPreferred());
+ Fn->addFnAttr(WaveSizeKindStr, WaveSizeStr);
+ }
Fn->addFnAttr(llvm::Attribute::NoInline);
}
diff --git a/clang/lib/Driver/ToolChains/AMDGPU.cpp b/clang/lib/Driver/ToolChains/AMDGPU.cpp
index 74f70573c5fe..2c85d21ebd73 100644
--- a/clang/lib/Driver/ToolChains/AMDGPU.cpp
+++ b/clang/lib/Driver/ToolChains/AMDGPU.cpp
@@ -648,8 +648,6 @@ void amdgpu::Linker::ConstructJob(Compilation &C, const JobAction &JA,
Args.MakeArgString("-plugin-opt=-mattr=" + llvm::join(Features, ",")));
}
- addGPULibraries(getToolChain(), Args, CmdArgs);
-
CmdArgs.push_back("-o");
CmdArgs.push_back(Output.getFilename());
C.addCommand(std::make_unique<Command>(
@@ -1089,4 +1087,4 @@ bool AMDGPUToolChain::shouldSkipSanitizeOption(
return true;
}
return false;
-} \ No newline at end of file
+}
diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp
index 494883500342..c00df5f5bc72 100644
--- a/clang/lib/Driver/ToolChains/Clang.cpp
+++ b/clang/lib/Driver/ToolChains/Clang.cpp
@@ -9223,6 +9223,25 @@ void LinkerWrapper::ConstructJob(Compilation &C, const JobAction &JA,
A->claim();
}
+ // Pass in the C library for GPUs if present and not disabled.
+ if (!Args.hasArg(options::OPT_nostdlib, options::OPT_r, options::OPT_nogpulib,
+ options::OPT_nodefaultlibs, options::OPT_nolibc,
+ options::OPT_nogpulibc)) {
+ forAllAssociatedToolChains(C, JA, getToolChain(), [&](const ToolChain &TC) {
+ // The device C library is only available for NVPTX and AMDGPU targets
+ // currently.
+ if (!TC.getTriple().isNVPTX() && !TC.getTriple().isAMDGPU())
+ return;
+ bool HasLibC = TC.getStdlibIncludePath().has_value();
+ if (HasLibC) {
+ CmdArgs.push_back(Args.MakeArgString(
+ "--device-linker=" + TC.getTripleString() + "=" + "-lc"));
+ CmdArgs.push_back(Args.MakeArgString(
+ "--device-linker=" + TC.getTripleString() + "=" + "-lm"));
+ }
+ });
+ }
+
// If we disable the GPU C library support it needs to be forwarded to the
// link job.
if (!Args.hasFlag(options::OPT_gpulibc, options::OPT_nogpulibc, true))
diff --git a/clang/lib/Driver/ToolChains/CommonArgs.cpp b/clang/lib/Driver/ToolChains/CommonArgs.cpp
index 502aba2ce4aa..043d9e487644 100644
--- a/clang/lib/Driver/ToolChains/CommonArgs.cpp
+++ b/clang/lib/Driver/ToolChains/CommonArgs.cpp
@@ -510,22 +510,6 @@ void tools::addLinkerCompressDebugSectionsOption(
}
}
-void tools::addGPULibraries(const ToolChain &TC, const llvm::opt::ArgList &Args,
- llvm::opt::ArgStringList &CmdArgs) {
- if (Args.hasArg(options::OPT_nostdlib, options::OPT_r,
- options::OPT_nodefaultlibs, options::OPT_nolibc,
- options::OPT_nogpulibc))
- return;
-
- // If the user's toolchain has the 'include/<triple>/` path, we assume it
- // supports the standard C libraries for the GPU and include them.
- bool HasLibC = TC.getStdlibIncludePath().has_value();
- if (HasLibC) {
- CmdArgs.push_back("-lc");
- CmdArgs.push_back("-lm");
- }
-}
-
void tools::AddTargetFeature(const ArgList &Args,
std::vector<StringRef> &Features,
OptSpecifier OnOpt, OptSpecifier OffOpt,
diff --git a/clang/lib/Driver/ToolChains/CommonArgs.h b/clang/lib/Driver/ToolChains/CommonArgs.h
index 0c97398dfcfa..8695d3fe5b55 100644
--- a/clang/lib/Driver/ToolChains/CommonArgs.h
+++ b/clang/lib/Driver/ToolChains/CommonArgs.h
@@ -35,9 +35,6 @@ void addLinkerCompressDebugSectionsOption(const ToolChain &TC,
const llvm::opt::ArgList &Args,
llvm::opt::ArgStringList &CmdArgs);
-void addGPULibraries(const ToolChain &TC, const llvm::opt::ArgList &Args,
- llvm::opt::ArgStringList &CmdArgs);
-
void claimNoWarnArgs(const llvm::opt::ArgList &Args);
bool addSanitizerRuntimes(const ToolChain &TC, const llvm::opt::ArgList &Args,
diff --git a/clang/lib/Driver/ToolChains/Cuda.cpp b/clang/lib/Driver/ToolChains/Cuda.cpp
index ef44ffa5594d..509cd87b28c3 100644
--- a/clang/lib/Driver/ToolChains/Cuda.cpp
+++ b/clang/lib/Driver/ToolChains/Cuda.cpp
@@ -635,8 +635,6 @@ void NVPTX::Linker::ConstructJob(Compilation &C, const JobAction &JA,
for (StringRef Feature : Features)
CmdArgs.append({"--feature", Args.MakeArgString(Feature)});
- addGPULibraries(getToolChain(), Args, CmdArgs);
-
// Add paths for the default clang library path.
SmallString<256> DefaultLibPath =
llvm::sys::path::parent_path(TC.getDriver().Dir);
diff --git a/clang/lib/Format/FormatTokenLexer.cpp b/clang/lib/Format/FormatTokenLexer.cpp
index e21b5a882b77..63949b2e26bd 100644
--- a/clang/lib/Format/FormatTokenLexer.cpp
+++ b/clang/lib/Format/FormatTokenLexer.cpp
@@ -100,6 +100,13 @@ ArrayRef<FormatToken *> FormatTokenLexer::lex() {
if (Tokens.back()->NewlinesBefore > 0 || Tokens.back()->IsMultiline)
FirstInLineIndex = Tokens.size() - 1;
} while (Tokens.back()->isNot(tok::eof));
+ if (Style.InsertNewlineAtEOF) {
+ auto &TokEOF = *Tokens.back();
+ if (TokEOF.NewlinesBefore == 0) {
+ TokEOF.NewlinesBefore = 1;
+ TokEOF.OriginalColumn = 0;
+ }
+ }
return Tokens;
}
diff --git a/clang/lib/Format/TokenAnnotator.cpp b/clang/lib/Format/TokenAnnotator.cpp
index dfa703aed0d3..6f09835bad3a 100644
--- a/clang/lib/Format/TokenAnnotator.cpp
+++ b/clang/lib/Format/TokenAnnotator.cpp
@@ -2840,11 +2840,14 @@ private:
if (AfterRParen->isOneOf(tok::identifier, tok::kw_this))
return true;
- // Look for a cast `( x ) (`.
- if (AfterRParen->is(tok::l_paren) && BeforeRParen->Previous) {
- if (BeforeRParen->is(tok::identifier) &&
- BeforeRParen->Previous->is(tok::l_paren)) {
- return true;
+ // Look for a cast `( x ) (`, where x may be a qualified identifier.
+ if (AfterRParen->is(tok::l_paren)) {
+ for (const auto *Prev = BeforeRParen; Prev->is(tok::identifier);) {
+ Prev = Prev->Previous;
+ if (Prev->is(tok::coloncolon))
+ Prev = Prev->Previous;
+ if (Prev == LParen)
+ return true;
}
}
@@ -3704,11 +3707,6 @@ void TokenAnnotator::annotate(AnnotatedLine &Line) {
auto *First = Line.First;
First->SpacesRequiredBefore = 1;
First->CanBreakBefore = First->MustBreakBefore;
-
- if (First->is(tok::eof) && First->NewlinesBefore == 0 &&
- Style.InsertNewlineAtEOF) {
- First->NewlinesBefore = 1;
- }
}
// This function heuristically determines whether 'Current' starts the name of a
@@ -4418,31 +4416,29 @@ bool TokenAnnotator::spaceRequiredBetween(const AnnotatedLine &Line,
Right.MatchingParen == &Left && Line.Children.empty()) {
return Style.SpaceInEmptyBlock;
}
- if ((Left.is(tok::l_paren) && Right.is(tok::r_paren)) ||
- (Left.is(tok::l_brace) && Left.isNot(BK_Block) &&
- Right.is(tok::r_brace) && Right.isNot(BK_Block))) {
- return Style.SpacesInParensOptions.InEmptyParentheses;
- }
- if (Style.SpacesInParens == FormatStyle::SIPO_Custom &&
- Style.SpacesInParensOptions.ExceptDoubleParentheses &&
- Left.is(tok::r_paren) && Right.is(tok::r_paren)) {
- auto *InnerLParen = Left.MatchingParen;
- if (InnerLParen && InnerLParen->Previous == Right.MatchingParen) {
- InnerLParen->SpacesRequiredBefore = 0;
- return false;
+ if (Style.SpacesInParens == FormatStyle::SIPO_Custom) {
+ if ((Left.is(tok::l_paren) && Right.is(tok::r_paren)) ||
+ (Left.is(tok::l_brace) && Left.isNot(BK_Block) &&
+ Right.is(tok::r_brace) && Right.isNot(BK_Block))) {
+ return Style.SpacesInParensOptions.InEmptyParentheses;
+ }
+ if (Style.SpacesInParensOptions.ExceptDoubleParentheses &&
+ Left.is(tok::r_paren) && Right.is(tok::r_paren)) {
+ auto *InnerLParen = Left.MatchingParen;
+ if (InnerLParen && InnerLParen->Previous == Right.MatchingParen) {
+ InnerLParen->SpacesRequiredBefore = 0;
+ return false;
+ }
}
- }
- if (Style.SpacesInParensOptions.InConditionalStatements) {
const FormatToken *LeftParen = nullptr;
if (Left.is(tok::l_paren))
LeftParen = &Left;
else if (Right.is(tok::r_paren) && Right.MatchingParen)
LeftParen = Right.MatchingParen;
- if (LeftParen) {
- if (LeftParen->is(TT_ConditionLParen))
- return true;
- if (LeftParen->Previous && isKeywordWithCondition(*LeftParen->Previous))
- return true;
+ if (LeftParen && (LeftParen->is(TT_ConditionLParen) ||
+ (LeftParen->Previous &&
+ isKeywordWithCondition(*LeftParen->Previous)))) {
+ return Style.SpacesInParensOptions.InConditionalStatements;
}
}
diff --git a/clang/lib/Frontend/Rewrite/FixItRewriter.cpp b/clang/lib/Frontend/Rewrite/FixItRewriter.cpp
index 44dfaf20eae7..7309553e3bc0 100644
--- a/clang/lib/Frontend/Rewrite/FixItRewriter.cpp
+++ b/clang/lib/Frontend/Rewrite/FixItRewriter.cpp
@@ -200,10 +200,8 @@ void FixItRewriter::HandleDiagnostic(DiagnosticsEngine::Level DiagLevel,
/// Emit a diagnostic via the adapted diagnostic client.
void FixItRewriter::Diag(SourceLocation Loc, unsigned DiagID) {
// When producing this diagnostic, we temporarily bypass ourselves,
- // clear out any current diagnostic, and let the downstream client
- // format the diagnostic.
+ // and let the downstream client format the diagnostic.
Diags.setClient(Client, false);
- Diags.Clear();
Diags.Report(Loc, DiagID);
Diags.setClient(this, false);
}
diff --git a/clang/lib/Frontend/TextDiagnosticPrinter.cpp b/clang/lib/Frontend/TextDiagnosticPrinter.cpp
index c2fea3d03f0c..28f7218dc23f 100644
--- a/clang/lib/Frontend/TextDiagnosticPrinter.cpp
+++ b/clang/lib/Frontend/TextDiagnosticPrinter.cpp
@@ -84,7 +84,7 @@ static void printDiagnosticOptions(raw_ostream &OS,
if (!Opt.empty()) {
OS << (Started ? "," : " [")
<< (Level == DiagnosticsEngine::Remark ? "-R" : "-W") << Opt;
- StringRef OptValue = Info.getDiags()->getFlagValue();
+ StringRef OptValue = Info.getFlagValue();
if (!OptValue.empty())
OS << "=" << OptValue;
Started = true;
diff --git a/clang/lib/Headers/CMakeLists.txt b/clang/lib/Headers/CMakeLists.txt
index 4c75c638b41b..f5cc07c303f9 100644
--- a/clang/lib/Headers/CMakeLists.txt
+++ b/clang/lib/Headers/CMakeLists.txt
@@ -156,6 +156,7 @@ set(x86_files
avx10_2_512satcvtintrin.h
avx10_2bf16intrin.h
avx10_2convertintrin.h
+ avx10_2copyintrin.h
avx10_2minmaxintrin.h
avx10_2niintrin.h
avx10_2satcvtdsintrin.h
diff --git a/clang/lib/Headers/avx10_2copyintrin.h b/clang/lib/Headers/avx10_2copyintrin.h
new file mode 100644
index 000000000000..7fc31190781d
--- /dev/null
+++ b/clang/lib/Headers/avx10_2copyintrin.h
@@ -0,0 +1,34 @@
+/*===---- avx10_2copyintrin.h - AVX10.2 Copy intrinsics -------------------===
+ *
+ * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+ * See https://llvm.org/LICENSE.txt for license information.
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+ *
+ *===-----------------------------------------------------------------------===
+ */
+#ifndef __IMMINTRIN_H
+#error \
+ "Never use <avx10_2copyintrin.h> directly; include <immintrin.h> instead."
+#endif // __IMMINTRIN_H
+
+#ifndef __AVX10_2COPYINTRIN_H
+#define __AVX10_2COPYINTRIN_H
+
+/* Define the default attributes for the functions in this file. */
+#define __DEFAULT_FN_ATTRS128 \
+ __attribute__((__always_inline__, __nodebug__, __target__("avx10.2-256"), \
+ __min_vector_width__(128)))
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_move_epi32(__m128i __A) {
+ return (__m128i)__builtin_shufflevector(
+ (__v4si)__A, (__v4si)_mm_setzero_si128(), 0, 4, 4, 4);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_move_epi16(__m128i __A) {
+ return (__m128i)__builtin_shufflevector(
+ (__v8hi)__A, (__v8hi)_mm_setzero_si128(), 0, 8, 8, 8, 8, 8, 8, 8);
+}
+
+#undef __DEFAULT_FN_ATTRS128
+
+#endif // __AVX10_2COPYINTRIN_H
diff --git a/clang/lib/Headers/hlsl/hlsl_intrinsics.h b/clang/lib/Headers/hlsl/hlsl_intrinsics.h
index 6a50d50ebd34..6cd6a2caf199 100644
--- a/clang/lib/Headers/hlsl/hlsl_intrinsics.h
+++ b/clang/lib/Headers/hlsl/hlsl_intrinsics.h
@@ -651,6 +651,77 @@ _HLSL_BUILTIN_ALIAS(__builtin_elementwise_cosh)
float4 cosh(float4);
//===----------------------------------------------------------------------===//
+// count bits builtins
+//===----------------------------------------------------------------------===//
+
+/// \fn T countbits(T Val)
+/// \brief Return the number of bits (per component) set in the input integer.
+/// \param Val The input value.
+
+#ifdef __HLSL_ENABLE_16_BIT
+_HLSL_AVAILABILITY(shadermodel, 6.2)
+_HLSL_BUILTIN_ALIAS(__builtin_elementwise_popcount)
+int16_t countbits(int16_t);
+_HLSL_AVAILABILITY(shadermodel, 6.2)
+_HLSL_BUILTIN_ALIAS(__builtin_elementwise_popcount)
+int16_t2 countbits(int16_t2);
+_HLSL_AVAILABILITY(shadermodel, 6.2)
+_HLSL_BUILTIN_ALIAS(__builtin_elementwise_popcount)
+int16_t3 countbits(int16_t3);
+_HLSL_AVAILABILITY(shadermodel, 6.2)
+_HLSL_BUILTIN_ALIAS(__builtin_elementwise_popcount)
+int16_t4 countbits(int16_t4);
+_HLSL_AVAILABILITY(shadermodel, 6.2)
+_HLSL_BUILTIN_ALIAS(__builtin_elementwise_popcount)
+uint16_t countbits(uint16_t);
+_HLSL_AVAILABILITY(shadermodel, 6.2)
+_HLSL_BUILTIN_ALIAS(__builtin_elementwise_popcount)
+uint16_t2 countbits(uint16_t2);
+_HLSL_AVAILABILITY(shadermodel, 6.2)
+_HLSL_BUILTIN_ALIAS(__builtin_elementwise_popcount)
+uint16_t3 countbits(uint16_t3);
+_HLSL_AVAILABILITY(shadermodel, 6.2)
+_HLSL_BUILTIN_ALIAS(__builtin_elementwise_popcount)
+uint16_t4 countbits(uint16_t4);
+#endif
+
+_HLSL_BUILTIN_ALIAS(__builtin_elementwise_popcount)
+int countbits(int);
+_HLSL_BUILTIN_ALIAS(__builtin_elementwise_popcount)
+int2 countbits(int2);
+_HLSL_BUILTIN_ALIAS(__builtin_elementwise_popcount)
+int3 countbits(int3);
+_HLSL_BUILTIN_ALIAS(__builtin_elementwise_popcount)
+int4 countbits(int4);
+
+_HLSL_BUILTIN_ALIAS(__builtin_elementwise_popcount)
+uint countbits(uint);
+_HLSL_BUILTIN_ALIAS(__builtin_elementwise_popcount)
+uint2 countbits(uint2);
+_HLSL_BUILTIN_ALIAS(__builtin_elementwise_popcount)
+uint3 countbits(uint3);
+_HLSL_BUILTIN_ALIAS(__builtin_elementwise_popcount)
+uint4 countbits(uint4);
+
+_HLSL_BUILTIN_ALIAS(__builtin_elementwise_popcount)
+int64_t countbits(int64_t);
+_HLSL_BUILTIN_ALIAS(__builtin_elementwise_popcount)
+int64_t2 countbits(int64_t2);
+_HLSL_BUILTIN_ALIAS(__builtin_elementwise_popcount)
+int64_t3 countbits(int64_t3);
+_HLSL_BUILTIN_ALIAS(__builtin_elementwise_popcount)
+int64_t4 countbits(int64_t4);
+
+_HLSL_BUILTIN_ALIAS(__builtin_elementwise_popcount)
+uint64_t countbits(uint64_t);
+_HLSL_BUILTIN_ALIAS(__builtin_elementwise_popcount)
+uint64_t2 countbits(uint64_t2);
+_HLSL_BUILTIN_ALIAS(__builtin_elementwise_popcount)
+uint64_t3 countbits(uint64_t3);
+_HLSL_BUILTIN_ALIAS(__builtin_elementwise_popcount)
+uint64_t4 countbits(uint64_t4);
+
+//===----------------------------------------------------------------------===//
// dot product builtins
//===----------------------------------------------------------------------===//
diff --git a/clang/lib/Headers/immintrin.h b/clang/lib/Headers/immintrin.h
index 280154f3c102..3fbabffa98df 100644
--- a/clang/lib/Headers/immintrin.h
+++ b/clang/lib/Headers/immintrin.h
@@ -651,6 +651,7 @@ _storebe_i64(void * __P, long long __D) {
#if !defined(__SCE__) || __has_feature(modules) || defined(__AVX10_2__)
#include <avx10_2bf16intrin.h>
#include <avx10_2convertintrin.h>
+#include <avx10_2copyintrin.h>
#include <avx10_2minmaxintrin.h>
#include <avx10_2niintrin.h>
#include <avx10_2satcvtdsintrin.h>
diff --git a/clang/lib/Parse/ParseDecl.cpp b/clang/lib/Parse/ParseDecl.cpp
index 1f56884be392..a04eed9873c0 100644
--- a/clang/lib/Parse/ParseDecl.cpp
+++ b/clang/lib/Parse/ParseDecl.cpp
@@ -5439,18 +5439,20 @@ void Parser::ParseEnumSpecifier(SourceLocation StartLoc, DeclSpec &DS,
BaseRange = SourceRange(ColonLoc, DeclaratorInfo.getSourceRange().getEnd());
- if (!getLangOpts().ObjC && !getLangOpts().C23) {
+ if (!getLangOpts().ObjC) {
if (getLangOpts().CPlusPlus11)
Diag(ColonLoc, diag::warn_cxx98_compat_enum_fixed_underlying_type)
<< BaseRange;
else if (getLangOpts().CPlusPlus)
Diag(ColonLoc, diag::ext_cxx11_enum_fixed_underlying_type)
<< BaseRange;
- else if (getLangOpts().MicrosoftExt)
+ else if (getLangOpts().MicrosoftExt && !getLangOpts().C23)
Diag(ColonLoc, diag::ext_ms_c_enum_fixed_underlying_type)
<< BaseRange;
else
- Diag(ColonLoc, diag::ext_clang_c_enum_fixed_underlying_type)
+ Diag(ColonLoc, getLangOpts().C23
+ ? diag::warn_c17_compat_enum_fixed_underlying_type
+ : diag::ext_c23_enum_fixed_underlying_type)
<< BaseRange;
}
}
diff --git a/clang/lib/Sema/Sema.cpp b/clang/lib/Sema/Sema.cpp
index 85cbbe7750c2..69d724124718 100644
--- a/clang/lib/Sema/Sema.cpp
+++ b/clang/lib/Sema/Sema.cpp
@@ -1590,7 +1590,7 @@ LangAS Sema::getDefaultCXXMethodAddrSpace() const {
return LangAS::Default;
}
-void Sema::EmitCurrentDiagnostic(unsigned DiagID) {
+void Sema::EmitDiagnostic(unsigned DiagID, const DiagnosticBuilder &DB) {
// FIXME: It doesn't make sense to me that DiagID is an incoming argument here
// and yet we also use the current diag ID on the DiagnosticsEngine. This has
// been made more painfully obvious by the refactor that introduced this
@@ -1598,9 +1598,9 @@ void Sema::EmitCurrentDiagnostic(unsigned DiagID) {
// eliminated. If it truly cannot be (for example, there is some reentrancy
// issue I am not seeing yet), then there should at least be a clarifying
// comment somewhere.
+ Diagnostic DiagInfo(&Diags, DB);
if (std::optional<TemplateDeductionInfo *> Info = isSFINAEContext()) {
- switch (DiagnosticIDs::getDiagnosticSFINAEResponse(
- Diags.getCurrentDiagID())) {
+ switch (DiagnosticIDs::getDiagnosticSFINAEResponse(DiagInfo.getID())) {
case DiagnosticIDs::SFINAE_Report:
// We'll report the diagnostic below.
break;
@@ -1613,13 +1613,11 @@ void Sema::EmitCurrentDiagnostic(unsigned DiagID) {
// Make a copy of this suppressed diagnostic and store it with the
// template-deduction information.
if (*Info && !(*Info)->hasSFINAEDiagnostic()) {
- Diagnostic DiagInfo(&Diags);
(*Info)->addSFINAEDiagnostic(DiagInfo.getLocation(),
PartialDiagnostic(DiagInfo, Context.getDiagAllocator()));
}
Diags.setLastDiagnosticIgnored(true);
- Diags.Clear();
return;
case DiagnosticIDs::SFINAE_AccessControl: {
@@ -1630,7 +1628,7 @@ void Sema::EmitCurrentDiagnostic(unsigned DiagID) {
if (!AccessCheckingSFINAE && !getLangOpts().CPlusPlus11)
break;
- SourceLocation Loc = Diags.getCurrentDiagLoc();
+ SourceLocation Loc = DiagInfo.getLocation();
// Suppress this diagnostic.
++NumSFINAEErrors;
@@ -1638,16 +1636,13 @@ void Sema::EmitCurrentDiagnostic(unsigned DiagID) {
// Make a copy of this suppressed diagnostic and store it with the
// template-deduction information.
if (*Info && !(*Info)->hasSFINAEDiagnostic()) {
- Diagnostic DiagInfo(&Diags);
(*Info)->addSFINAEDiagnostic(DiagInfo.getLocation(),
PartialDiagnostic(DiagInfo, Context.getDiagAllocator()));
}
Diags.setLastDiagnosticIgnored(true);
- Diags.Clear();
- // Now the diagnostic state is clear, produce a C++98 compatibility
- // warning.
+ // Now produce a C++98 compatibility warning.
Diag(Loc, diag::warn_cxx98_compat_sfinae_access_control);
// The last diagnostic which Sema produced was ignored. Suppress any
@@ -1660,14 +1655,12 @@ void Sema::EmitCurrentDiagnostic(unsigned DiagID) {
// Make a copy of this suppressed diagnostic and store it with the
// template-deduction information;
if (*Info) {
- Diagnostic DiagInfo(&Diags);
(*Info)->addSuppressedDiagnostic(DiagInfo.getLocation(),
PartialDiagnostic(DiagInfo, Context.getDiagAllocator()));
}
// Suppress this diagnostic.
Diags.setLastDiagnosticIgnored(true);
- Diags.Clear();
return;
}
}
@@ -1677,7 +1670,7 @@ void Sema::EmitCurrentDiagnostic(unsigned DiagID) {
Context.setPrintingPolicy(getPrintingPolicy());
// Emit the diagnostic.
- if (!Diags.EmitCurrentDiagnostic())
+ if (!Diags.EmitDiagnostic(DB))
return;
// If this is not a note, and we're in a template instantiation
diff --git a/clang/lib/Sema/SemaBase.cpp b/clang/lib/Sema/SemaBase.cpp
index a2f12d622e8c..5c24f21b469b 100644
--- a/clang/lib/Sema/SemaBase.cpp
+++ b/clang/lib/Sema/SemaBase.cpp
@@ -26,7 +26,7 @@ SemaBase::ImmediateDiagBuilder::~ImmediateDiagBuilder() {
Clear();
// Dispatch to Sema to emit the diagnostic.
- SemaRef.EmitCurrentDiagnostic(DiagID);
+ SemaRef.EmitDiagnostic(DiagID, *this);
}
PartialDiagnostic SemaBase::PDiag(unsigned DiagID) {
diff --git a/clang/lib/Sema/SemaCast.cpp b/clang/lib/Sema/SemaCast.cpp
index f01b22a72915..6ac620184347 100644
--- a/clang/lib/Sema/SemaCast.cpp
+++ b/clang/lib/Sema/SemaCast.cpp
@@ -446,7 +446,12 @@ static bool tryDiagnoseOverloadedCast(Sema &S, CastType CT,
: InitializationKind::CreateCast(/*type range?*/ range);
InitializationSequence sequence(S, entity, initKind, src);
- assert(sequence.Failed() && "initialization succeeded on second try?");
+ // It could happen that a constructor failed to be used because
+ // it requires a temporary of a broken type. Still, it will be found when
+ // looking for a match.
+ if (!sequence.Failed())
+ return false;
+
switch (sequence.getFailureKind()) {
default: return false;
diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp
index 99500daca295..d2570119c343 100644
--- a/clang/lib/Sema/SemaChecking.cpp
+++ b/clang/lib/Sema/SemaChecking.cpp
@@ -2795,7 +2795,7 @@ Sema::CheckBuiltinFunctionCall(FunctionDecl *FDecl, unsigned BuiltinID,
if (BuiltinElementwiseMath(TheCall))
return ExprError();
break;
-
+ case Builtin::BI__builtin_elementwise_popcount:
case Builtin::BI__builtin_elementwise_bitreverse: {
if (PrepareBuiltinElementwiseMathOneArgCall(TheCall))
return ExprError();
diff --git a/clang/lib/Sema/SemaCoroutine.cpp b/clang/lib/Sema/SemaCoroutine.cpp
index a574d56646f3..89a0beadc61f 100644
--- a/clang/lib/Sema/SemaCoroutine.cpp
+++ b/clang/lib/Sema/SemaCoroutine.cpp
@@ -849,12 +849,28 @@ static bool isAttributedCoroAwaitElidable(const QualType &QT) {
return Record && Record->hasAttr<CoroAwaitElidableAttr>();
}
-static bool isCoroAwaitElidableCall(Expr *Operand) {
- if (!Operand->isPRValue()) {
- return false;
- }
+static void applySafeElideContext(Expr *Operand) {
+ auto *Call = dyn_cast<CallExpr>(Operand->IgnoreImplicit());
+ if (!Call || !Call->isPRValue())
+ return;
+
+ if (!isAttributedCoroAwaitElidable(Call->getType()))
+ return;
+
+ Call->setCoroElideSafe();
- return isAttributedCoroAwaitElidable(Operand->getType());
+ // Check parameter
+ auto *Fn = llvm::dyn_cast_if_present<FunctionDecl>(Call->getCalleeDecl());
+ if (!Fn)
+ return;
+
+ size_t ParmIdx = 0;
+ for (ParmVarDecl *PD : Fn->parameters()) {
+ if (PD->hasAttr<CoroAwaitElidableArgumentAttr>())
+ applySafeElideContext(Call->getArg(ParmIdx));
+
+ ParmIdx++;
+ }
}
// Attempts to resolve and build a CoawaitExpr from "raw" inputs, bailing out to
@@ -880,14 +896,12 @@ ExprResult Sema::BuildUnresolvedCoawaitExpr(SourceLocation Loc, Expr *Operand,
}
auto *RD = Promise->getType()->getAsCXXRecordDecl();
- bool AwaitElidable =
- isCoroAwaitElidableCall(Operand) &&
- isAttributedCoroAwaitElidable(
- getCurFunctionDecl(/*AllowLambda=*/true)->getReturnType());
-
- if (AwaitElidable)
- if (auto *Call = dyn_cast<CallExpr>(Operand->IgnoreImplicit()))
- Call->setCoroElideSafe();
+
+ bool CurFnAwaitElidable = isAttributedCoroAwaitElidable(
+ getCurFunctionDecl(/*AllowLambda=*/true)->getReturnType());
+
+ if (CurFnAwaitElidable)
+ applySafeElideContext(Operand);
Expr *Transformed = Operand;
if (lookupMember(*this, "await_transform", RD, Loc)) {
diff --git a/clang/lib/Sema/SemaDecl.cpp b/clang/lib/Sema/SemaDecl.cpp
index 8557c25b93a8..31bf50a32a83 100644
--- a/clang/lib/Sema/SemaDecl.cpp
+++ b/clang/lib/Sema/SemaDecl.cpp
@@ -9768,7 +9768,7 @@ Sema::ActOnFunctionDeclarator(Scope *S, Declarator &D, DeclContext *DC,
bool ImplicitInlineCXX20 = !getLangOpts().CPlusPlusModules ||
NewFD->isConstexpr() || NewFD->isConsteval() ||
!NewFD->getOwningModule() ||
- NewFD->isFromExplicitGlobalModule() ||
+ NewFD->isFromGlobalModule() ||
NewFD->getOwningModule()->isHeaderLikeModule();
bool isInline = D.getDeclSpec().isInlineSpecified();
bool isVirtual = D.getDeclSpec().isVirtualSpecified();
diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp
index 80c252c79e4d..2f7e9c754ce0 100644
--- a/clang/lib/Sema/SemaExpr.cpp
+++ b/clang/lib/Sema/SemaExpr.cpp
@@ -17557,7 +17557,7 @@ static void RemoveNestedImmediateInvocation(
else
break;
}
- /// ConstantExpr are the first layer of implicit node to be removed so if
+ /// ConstantExprs are the first layer of implicit node to be removed so if
/// Init isn't a ConstantExpr, no ConstantExpr will be skipped.
if (auto *CE = dyn_cast<ConstantExpr>(Init);
CE && CE->isImmediateInvocation())
@@ -17570,7 +17570,7 @@ static void RemoveNestedImmediateInvocation(
}
ExprResult TransformLambdaExpr(LambdaExpr *E) {
// Do not rebuild lambdas to avoid creating a new type.
- // Lambdas have already been processed inside their eval context.
+ // Lambdas have already been processed inside their eval contexts.
return E;
}
bool AlwaysRebuild() { return false; }
diff --git a/clang/lib/Sema/SemaHLSL.cpp b/clang/lib/Sema/SemaHLSL.cpp
index 67792be994fa..03b7c2edb605 100644
--- a/clang/lib/Sema/SemaHLSL.cpp
+++ b/clang/lib/Sema/SemaHLSL.cpp
@@ -40,6 +40,48 @@
#include <utility>
using namespace clang;
+using llvm::dxil::ResourceClass;
+
+enum class RegisterType { SRV, UAV, CBuffer, Sampler, C, I, Invalid };
+
+static RegisterType getRegisterType(ResourceClass RC) {
+ switch (RC) {
+ case ResourceClass::SRV:
+ return RegisterType::SRV;
+ case ResourceClass::UAV:
+ return RegisterType::UAV;
+ case ResourceClass::CBuffer:
+ return RegisterType::CBuffer;
+ case ResourceClass::Sampler:
+ return RegisterType::Sampler;
+ }
+ llvm_unreachable("unexpected ResourceClass value");
+}
+
+static RegisterType getRegisterType(StringRef Slot) {
+ switch (Slot[0]) {
+ case 't':
+ case 'T':
+ return RegisterType::SRV;
+ case 'u':
+ case 'U':
+ return RegisterType::UAV;
+ case 'b':
+ case 'B':
+ return RegisterType::CBuffer;
+ case 's':
+ case 'S':
+ return RegisterType::Sampler;
+ case 'c':
+ case 'C':
+ return RegisterType::C;
+ case 'i':
+ case 'I':
+ return RegisterType::I;
+ default:
+ return RegisterType::Invalid;
+ }
+}
SemaHLSL::SemaHLSL(Sema &S) : SemaBase(S) {}
@@ -586,8 +628,7 @@ bool clang::CreateHLSLAttributedResourceType(
LocEnd = A->getRange().getEnd();
switch (A->getKind()) {
case attr::HLSLResourceClass: {
- llvm::dxil::ResourceClass RC =
- cast<HLSLResourceClassAttr>(A)->getResourceClass();
+ ResourceClass RC = cast<HLSLResourceClassAttr>(A)->getResourceClass();
if (HasResourceClass) {
S.Diag(A->getLocation(), ResAttrs.ResourceClass == RC
? diag::warn_duplicate_attribute_exact
@@ -672,7 +713,7 @@ bool SemaHLSL::handleResourceTypeAttr(const ParsedAttr &AL) {
SourceLocation ArgLoc = Loc->Loc;
// Validate resource class value
- llvm::dxil::ResourceClass RC;
+ ResourceClass RC;
if (!HLSLResourceClassAttr::ConvertStrToResourceClass(Identifier, RC)) {
Diag(ArgLoc, diag::warn_attribute_type_not_supported)
<< "ResourceClass" << Identifier;
@@ -750,28 +791,6 @@ SemaHLSL::TakeLocForHLSLAttribute(const HLSLAttributedResourceType *RT) {
return LocInfo;
}
-struct RegisterBindingFlags {
- bool Resource = false;
- bool UDT = false;
- bool Other = false;
- bool Basic = false;
-
- bool SRV = false;
- bool UAV = false;
- bool CBV = false;
- bool Sampler = false;
-
- bool ContainsNumeric = false;
- bool DefaultGlobals = false;
-
- // used only when Resource == true
- std::optional<llvm::dxil::ResourceClass> ResourceClass;
-};
-
-static bool isDeclaredWithinCOrTBuffer(const Decl *TheDecl) {
- return TheDecl && isa<HLSLBufferDecl>(TheDecl->getDeclContext());
-}
-
// get the record decl from a var decl that we expect
// represents a resource
static CXXRecordDecl *getRecordDeclFromVarDecl(VarDecl *VD) {
@@ -786,24 +805,6 @@ static CXXRecordDecl *getRecordDeclFromVarDecl(VarDecl *VD) {
return TheRecordDecl;
}
-static void updateResourceClassFlagsFromDeclResourceClass(
- RegisterBindingFlags &Flags, llvm::hlsl::ResourceClass DeclResourceClass) {
- switch (DeclResourceClass) {
- case llvm::hlsl::ResourceClass::SRV:
- Flags.SRV = true;
- break;
- case llvm::hlsl::ResourceClass::UAV:
- Flags.UAV = true;
- break;
- case llvm::hlsl::ResourceClass::CBuffer:
- Flags.CBV = true;
- break;
- case llvm::hlsl::ResourceClass::Sampler:
- Flags.Sampler = true;
- break;
- }
-}
-
const HLSLAttributedResourceType *
findAttributedResourceTypeOnField(VarDecl *VD) {
assert(VD != nullptr && "expected VarDecl");
@@ -817,8 +818,10 @@ findAttributedResourceTypeOnField(VarDecl *VD) {
return nullptr;
}
-static void updateResourceClassFlagsFromRecordType(RegisterBindingFlags &Flags,
- const RecordType *RT) {
+// Iterate over RecordType fields and return true if any of them matched the
+// register type
+static bool ContainsResourceForRegisterType(Sema &S, const RecordType *RT,
+ RegisterType RegType) {
llvm::SmallVector<const Type *> TypesToScan;
TypesToScan.emplace_back(RT);
@@ -827,8 +830,8 @@ static void updateResourceClassFlagsFromRecordType(RegisterBindingFlags &Flags,
while (T->isArrayType())
T = T->getArrayElementTypeNoTypeQual();
if (T->isIntegralOrEnumerationType() || T->isFloatingType()) {
- Flags.ContainsNumeric = true;
- continue;
+ if (RegType == RegisterType::C)
+ return true;
}
const RecordType *RT = T->getAs<RecordType>();
if (!RT)
@@ -839,100 +842,84 @@ static void updateResourceClassFlagsFromRecordType(RegisterBindingFlags &Flags,
const Type *FieldTy = FD->getType().getTypePtr();
if (const HLSLAttributedResourceType *AttrResType =
dyn_cast<HLSLAttributedResourceType>(FieldTy)) {
- updateResourceClassFlagsFromDeclResourceClass(
- Flags, AttrResType->getAttrs().ResourceClass);
- continue;
+ ResourceClass RC = AttrResType->getAttrs().ResourceClass;
+ if (getRegisterType(RC) == RegType)
+ return true;
+ } else {
+ TypesToScan.emplace_back(FD->getType().getTypePtr());
}
- TypesToScan.emplace_back(FD->getType().getTypePtr());
}
}
+ return false;
}
-static RegisterBindingFlags HLSLFillRegisterBindingFlags(Sema &S,
- Decl *TheDecl) {
- RegisterBindingFlags Flags;
+static void CheckContainsResourceForRegisterType(Sema &S,
+ SourceLocation &ArgLoc,
+ Decl *D, RegisterType RegType,
+ bool SpecifiedSpace) {
+ int RegTypeNum = static_cast<int>(RegType);
// check if the decl type is groupshared
- if (TheDecl->hasAttr<HLSLGroupSharedAddressSpaceAttr>()) {
- Flags.Other = true;
- return Flags;
+ if (D->hasAttr<HLSLGroupSharedAddressSpaceAttr>()) {
+ S.Diag(ArgLoc, diag::err_hlsl_binding_type_mismatch) << RegTypeNum;
+ return;
}
// Cbuffers and Tbuffers are HLSLBufferDecl types
- if (HLSLBufferDecl *CBufferOrTBuffer = dyn_cast<HLSLBufferDecl>(TheDecl)) {
- Flags.Resource = true;
- Flags.ResourceClass = CBufferOrTBuffer->isCBuffer()
- ? llvm::dxil::ResourceClass::CBuffer
- : llvm::dxil::ResourceClass::SRV;
+ if (HLSLBufferDecl *CBufferOrTBuffer = dyn_cast<HLSLBufferDecl>(D)) {
+ ResourceClass RC = CBufferOrTBuffer->isCBuffer() ? ResourceClass::CBuffer
+ : ResourceClass::SRV;
+ if (RegType != getRegisterType(RC))
+ S.Diag(D->getLocation(), diag::err_hlsl_binding_type_mismatch)
+ << RegTypeNum;
+ return;
}
+
// Samplers, UAVs, and SRVs are VarDecl types
- else if (VarDecl *TheVarDecl = dyn_cast<VarDecl>(TheDecl)) {
- if (const HLSLAttributedResourceType *AttrResType =
- findAttributedResourceTypeOnField(TheVarDecl)) {
- Flags.Resource = true;
- Flags.ResourceClass = AttrResType->getAttrs().ResourceClass;
- } else {
- const clang::Type *TheBaseType = TheVarDecl->getType().getTypePtr();
- while (TheBaseType->isArrayType())
- TheBaseType = TheBaseType->getArrayElementTypeNoTypeQual();
-
- if (TheBaseType->isArithmeticType()) {
- Flags.Basic = true;
- if (!isDeclaredWithinCOrTBuffer(TheDecl) &&
- (TheBaseType->isIntegralType(S.getASTContext()) ||
- TheBaseType->isFloatingType()))
- Flags.DefaultGlobals = true;
- } else if (TheBaseType->isRecordType()) {
- Flags.UDT = true;
- const RecordType *TheRecordTy = TheBaseType->getAs<RecordType>();
- updateResourceClassFlagsFromRecordType(Flags, TheRecordTy);
- } else
- Flags.Other = true;
- }
- } else {
- llvm_unreachable("expected be VarDecl or HLSLBufferDecl");
+ assert(isa<VarDecl>(D) && "D is expected to be VarDecl or HLSLBufferDecl");
+ VarDecl *VD = cast<VarDecl>(D);
+
+ // Resource
+ if (const HLSLAttributedResourceType *AttrResType =
+ findAttributedResourceTypeOnField(VD)) {
+ if (RegType != getRegisterType(AttrResType->getAttrs().ResourceClass))
+ S.Diag(D->getLocation(), diag::err_hlsl_binding_type_mismatch)
+ << RegTypeNum;
+ return;
}
- return Flags;
-}
-enum class RegisterType { SRV, UAV, CBuffer, Sampler, C, I, Invalid };
+ const clang::Type *Ty = VD->getType().getTypePtr();
+ while (Ty->isArrayType())
+ Ty = Ty->getArrayElementTypeNoTypeQual();
-static RegisterType getRegisterType(llvm::dxil::ResourceClass RC) {
- switch (RC) {
- case llvm::dxil::ResourceClass::SRV:
- return RegisterType::SRV;
- case llvm::dxil::ResourceClass::UAV:
- return RegisterType::UAV;
- case llvm::dxil::ResourceClass::CBuffer:
- return RegisterType::CBuffer;
- case llvm::dxil::ResourceClass::Sampler:
- return RegisterType::Sampler;
- }
- llvm_unreachable("unexpected ResourceClass value");
-}
+ // Basic types
+ if (Ty->isArithmeticType()) {
+ bool DeclaredInCOrTBuffer = isa<HLSLBufferDecl>(D->getDeclContext());
+ if (SpecifiedSpace && !DeclaredInCOrTBuffer)
+ S.Diag(ArgLoc, diag::err_hlsl_space_on_global_constant);
-static RegisterType getRegisterType(StringRef Slot) {
- switch (Slot[0]) {
- case 't':
- case 'T':
- return RegisterType::SRV;
- case 'u':
- case 'U':
- return RegisterType::UAV;
- case 'b':
- case 'B':
- return RegisterType::CBuffer;
- case 's':
- case 'S':
- return RegisterType::Sampler;
- case 'c':
- case 'C':
- return RegisterType::C;
- case 'i':
- case 'I':
- return RegisterType::I;
- default:
- return RegisterType::Invalid;
+ if (!DeclaredInCOrTBuffer &&
+ (Ty->isIntegralType(S.getASTContext()) || Ty->isFloatingType())) {
+ // Default Globals
+ if (RegType == RegisterType::CBuffer)
+ S.Diag(ArgLoc, diag::warn_hlsl_deprecated_register_type_b);
+ else if (RegType != RegisterType::C)
+ S.Diag(ArgLoc, diag::err_hlsl_binding_type_mismatch) << RegTypeNum;
+ } else {
+ if (RegType == RegisterType::C)
+ S.Diag(ArgLoc, diag::warn_hlsl_register_type_c_packoffset);
+ else
+ S.Diag(ArgLoc, diag::err_hlsl_binding_type_mismatch) << RegTypeNum;
+ }
+ } else if (Ty->isRecordType()) {
+ // Class/struct types - walk the declaration and check each field and
+ // subclass
+ if (!ContainsResourceForRegisterType(S, Ty->getAs<RecordType>(), RegType))
+ S.Diag(D->getLocation(), diag::warn_hlsl_user_defined_type_missing_member)
+ << RegTypeNum;
+ } else {
+ // Anything else is an error
+ S.Diag(ArgLoc, diag::err_hlsl_binding_type_mismatch) << RegTypeNum;
}
}
@@ -969,73 +956,19 @@ static void ValidateMultipleRegisterAnnotations(Sema &S, Decl *TheDecl,
}
static void DiagnoseHLSLRegisterAttribute(Sema &S, SourceLocation &ArgLoc,
- Decl *TheDecl, RegisterType regType) {
+ Decl *D, RegisterType RegType,
+ bool SpecifiedSpace) {
// exactly one of these two types should be set
- assert(((isa<VarDecl>(TheDecl) && !isa<HLSLBufferDecl>(TheDecl)) ||
- (!isa<VarDecl>(TheDecl) && isa<HLSLBufferDecl>(TheDecl))) &&
+ assert(((isa<VarDecl>(D) && !isa<HLSLBufferDecl>(D)) ||
+ (!isa<VarDecl>(D) && isa<HLSLBufferDecl>(D))) &&
"expecting VarDecl or HLSLBufferDecl");
- RegisterBindingFlags Flags = HLSLFillRegisterBindingFlags(S, TheDecl);
- assert((int)Flags.Other + (int)Flags.Resource + (int)Flags.Basic +
- (int)Flags.UDT ==
- 1 &&
- "only one resource analysis result should be expected");
-
- int regTypeNum = static_cast<int>(regType);
-
- // first, if "other" is set, emit an error
- if (Flags.Other) {
- S.Diag(ArgLoc, diag::err_hlsl_binding_type_mismatch) << regTypeNum;
- return;
- }
+ // check if the declaration contains resource matching the register type
+ CheckContainsResourceForRegisterType(S, ArgLoc, D, RegType, SpecifiedSpace);
// next, if multiple register annotations exist, check that none conflict.
- ValidateMultipleRegisterAnnotations(S, TheDecl, regType);
-
- // next, if resource is set, make sure the register type in the register
- // annotation is compatible with the variable's resource type.
- if (Flags.Resource) {
- RegisterType expRegType = getRegisterType(Flags.ResourceClass.value());
- if (regType != expRegType) {
- S.Diag(TheDecl->getLocation(), diag::err_hlsl_binding_type_mismatch)
- << regTypeNum;
- }
- return;
- }
-
- // next, handle diagnostics for when the "basic" flag is set
- if (Flags.Basic) {
- if (Flags.DefaultGlobals) {
- if (regType == RegisterType::CBuffer)
- S.Diag(ArgLoc, diag::warn_hlsl_deprecated_register_type_b);
- else if (regType != RegisterType::C)
- S.Diag(ArgLoc, diag::err_hlsl_binding_type_mismatch) << regTypeNum;
- return;
- }
-
- if (regType == RegisterType::C)
- S.Diag(ArgLoc, diag::warn_hlsl_register_type_c_packoffset);
- else
- S.Diag(ArgLoc, diag::err_hlsl_binding_type_mismatch) << regTypeNum;
-
- return;
- }
-
- // finally, we handle the udt case
- if (Flags.UDT) {
- const bool ExpectedRegisterTypesForUDT[] = {
- Flags.SRV, Flags.UAV, Flags.CBV, Flags.Sampler, Flags.ContainsNumeric};
- assert((size_t)regTypeNum < std::size(ExpectedRegisterTypesForUDT) &&
- "regType has unexpected value");
-
- if (!ExpectedRegisterTypesForUDT[regTypeNum])
- S.Diag(TheDecl->getLocation(),
- diag::warn_hlsl_user_defined_type_missing_member)
- << regTypeNum;
-
- return;
- }
+ ValidateMultipleRegisterAnnotations(S, D, RegType);
}
void SemaHLSL::handleResourceBindingAttr(Decl *TheDecl, const ParsedAttr &AL) {
@@ -1059,7 +992,9 @@ void SemaHLSL::handleResourceBindingAttr(Decl *TheDecl, const ParsedAttr &AL) {
SourceLocation ArgLoc = Loc->Loc;
SourceLocation SpaceArgLoc;
+ bool SpecifiedSpace = false;
if (AL.getNumArgs() == 2) {
+ SpecifiedSpace = true;
Slot = Str;
if (!AL.isArgIdent(1)) {
Diag(AL.getLoc(), diag::err_attribute_argument_type)
@@ -1107,7 +1042,8 @@ void SemaHLSL::handleResourceBindingAttr(Decl *TheDecl, const ParsedAttr &AL) {
return;
}
- DiagnoseHLSLRegisterAttribute(SemaRef, ArgLoc, TheDecl, regType);
+ DiagnoseHLSLRegisterAttribute(SemaRef, ArgLoc, TheDecl, regType,
+ SpecifiedSpace);
HLSLResourceBindingAttr *NewAttr =
HLSLResourceBindingAttr::Create(getASTContext(), Slot, Space, AL);
diff --git a/clang/lib/Sema/SemaOpenMP.cpp b/clang/lib/Sema/SemaOpenMP.cpp
index b952ffbd69f5..9afb8cea26fe 100644
--- a/clang/lib/Sema/SemaOpenMP.cpp
+++ b/clang/lib/Sema/SemaOpenMP.cpp
@@ -2861,113 +2861,120 @@ void SemaOpenMP::EndOpenMPDSABlock(Stmt *CurDirective) {
// clause requires an accessible, unambiguous default constructor for the
// class type, unless the list item is also specified in a firstprivate
// clause.
- if (const auto *D = dyn_cast_or_null<OMPExecutableDirective>(CurDirective)) {
- for (OMPClause *C : D->clauses()) {
- if (auto *Clause = dyn_cast<OMPLastprivateClause>(C)) {
- SmallVector<Expr *, 8> PrivateCopies;
- for (Expr *DE : Clause->varlist()) {
- if (DE->isValueDependent() || DE->isTypeDependent()) {
- PrivateCopies.push_back(nullptr);
- continue;
- }
- auto *DRE = cast<DeclRefExpr>(DE->IgnoreParens());
- auto *VD = cast<VarDecl>(DRE->getDecl());
- QualType Type = VD->getType().getNonReferenceType();
- const DSAStackTy::DSAVarData DVar =
- DSAStack->getTopDSA(VD, /*FromParent=*/false);
- if (DVar.CKind == OMPC_lastprivate) {
- // Generate helper private variable and initialize it with the
- // default value. The address of the original variable is replaced
- // by the address of the new private variable in CodeGen. This new
- // variable is not added to IdResolver, so the code in the OpenMP
- // region uses original variable for proper diagnostics.
- VarDecl *VDPrivate = buildVarDecl(
- SemaRef, DE->getExprLoc(), Type.getUnqualifiedType(),
- VD->getName(), VD->hasAttrs() ? &VD->getAttrs() : nullptr, DRE);
- SemaRef.ActOnUninitializedDecl(VDPrivate);
- if (VDPrivate->isInvalidDecl()) {
- PrivateCopies.push_back(nullptr);
- continue;
- }
- PrivateCopies.push_back(buildDeclRefExpr(
- SemaRef, VDPrivate, DE->getType(), DE->getExprLoc()));
- } else {
- // The variable is also a firstprivate, so initialization sequence
- // for private copy is generated already.
- PrivateCopies.push_back(nullptr);
- }
- }
- Clause->setPrivateCopies(PrivateCopies);
+
+ auto FinalizeLastprivate = [&](OMPLastprivateClause *Clause) {
+ SmallVector<Expr *, 8> PrivateCopies;
+ for (Expr *DE : Clause->varlist()) {
+ if (DE->isValueDependent() || DE->isTypeDependent()) {
+ PrivateCopies.push_back(nullptr);
continue;
}
- // Finalize nontemporal clause by handling private copies, if any.
- if (auto *Clause = dyn_cast<OMPNontemporalClause>(C)) {
- SmallVector<Expr *, 8> PrivateRefs;
- for (Expr *RefExpr : Clause->varlist()) {
- assert(RefExpr && "NULL expr in OpenMP nontemporal clause.");
- SourceLocation ELoc;
- SourceRange ERange;
- Expr *SimpleRefExpr = RefExpr;
- auto Res = getPrivateItem(SemaRef, SimpleRefExpr, ELoc, ERange);
- if (Res.second)
- // It will be analyzed later.
- PrivateRefs.push_back(RefExpr);
- ValueDecl *D = Res.first;
- if (!D)
- continue;
-
- const DSAStackTy::DSAVarData DVar =
- DSAStack->getTopDSA(D, /*FromParent=*/false);
- PrivateRefs.push_back(DVar.PrivateCopy ? DVar.PrivateCopy
- : SimpleRefExpr);
- }
- Clause->setPrivateRefs(PrivateRefs);
+ auto *DRE = cast<DeclRefExpr>(DE->IgnoreParens());
+ auto *VD = cast<VarDecl>(DRE->getDecl());
+ QualType Type = VD->getType().getNonReferenceType();
+ const DSAStackTy::DSAVarData DVar =
+ DSAStack->getTopDSA(VD, /*FromParent=*/false);
+ if (DVar.CKind != OMPC_lastprivate) {
+ // The variable is also a firstprivate, so initialization sequence
+ // for private copy is generated already.
+ PrivateCopies.push_back(nullptr);
continue;
}
- if (auto *Clause = dyn_cast<OMPUsesAllocatorsClause>(C)) {
- for (unsigned I = 0, E = Clause->getNumberOfAllocators(); I < E; ++I) {
- OMPUsesAllocatorsClause::Data D = Clause->getAllocatorData(I);
- auto *DRE = dyn_cast<DeclRefExpr>(D.Allocator->IgnoreParenImpCasts());
- if (!DRE)
- continue;
- ValueDecl *VD = DRE->getDecl();
- if (!VD || !isa<VarDecl>(VD))
- continue;
- DSAStackTy::DSAVarData DVar =
- DSAStack->getTopDSA(VD, /*FromParent=*/false);
- // OpenMP [2.12.5, target Construct]
- // Memory allocators that appear in a uses_allocators clause cannot
- // appear in other data-sharing attribute clauses or data-mapping
- // attribute clauses in the same construct.
- Expr *MapExpr = nullptr;
- if (DVar.RefExpr ||
- DSAStack->checkMappableExprComponentListsForDecl(
- VD, /*CurrentRegionOnly=*/true,
- [VD, &MapExpr](
- OMPClauseMappableExprCommon::MappableExprComponentListRef
- MapExprComponents,
- OpenMPClauseKind C) {
- auto MI = MapExprComponents.rbegin();
- auto ME = MapExprComponents.rend();
- if (MI != ME &&
- MI->getAssociatedDeclaration()->getCanonicalDecl() ==
- VD->getCanonicalDecl()) {
- MapExpr = MI->getAssociatedExpression();
- return true;
- }
- return false;
- })) {
- Diag(D.Allocator->getExprLoc(),
- diag::err_omp_allocator_used_in_clauses)
- << D.Allocator->getSourceRange();
- if (DVar.RefExpr)
- reportOriginalDsa(SemaRef, DSAStack, VD, DVar);
- else
- Diag(MapExpr->getExprLoc(), diag::note_used_here)
- << MapExpr->getSourceRange();
- }
- }
+ // Generate helper private variable and initialize it with the
+ // default value. The address of the original variable is replaced
+ // by the address of the new private variable in CodeGen. This new
+ // variable is not added to IdResolver, so the code in the OpenMP
+ // region uses original variable for proper diagnostics.
+ VarDecl *VDPrivate = buildVarDecl(
+ SemaRef, DE->getExprLoc(), Type.getUnqualifiedType(), VD->getName(),
+ VD->hasAttrs() ? &VD->getAttrs() : nullptr, DRE);
+ SemaRef.ActOnUninitializedDecl(VDPrivate);
+ if (VDPrivate->isInvalidDecl()) {
+ PrivateCopies.push_back(nullptr);
+ continue;
+ }
+ PrivateCopies.push_back(buildDeclRefExpr(
+ SemaRef, VDPrivate, DE->getType(), DE->getExprLoc()));
+ }
+ Clause->setPrivateCopies(PrivateCopies);
+ };
+
+ auto FinalizeNontemporal = [&](OMPNontemporalClause *Clause) {
+ // Finalize nontemporal clause by handling private copies, if any.
+ SmallVector<Expr *, 8> PrivateRefs;
+ for (Expr *RefExpr : Clause->varlist()) {
+ assert(RefExpr && "NULL expr in OpenMP nontemporal clause.");
+ SourceLocation ELoc;
+ SourceRange ERange;
+ Expr *SimpleRefExpr = RefExpr;
+ auto Res = getPrivateItem(SemaRef, SimpleRefExpr, ELoc, ERange);
+ if (Res.second)
+ // It will be analyzed later.
+ PrivateRefs.push_back(RefExpr);
+ ValueDecl *D = Res.first;
+ if (!D)
continue;
+
+ const DSAStackTy::DSAVarData DVar =
+ DSAStack->getTopDSA(D, /*FromParent=*/false);
+ PrivateRefs.push_back(DVar.PrivateCopy ? DVar.PrivateCopy
+ : SimpleRefExpr);
+ }
+ Clause->setPrivateRefs(PrivateRefs);
+ };
+
+ auto FinalizeAllocators = [&](OMPUsesAllocatorsClause *Clause) {
+ for (unsigned I = 0, E = Clause->getNumberOfAllocators(); I < E; ++I) {
+ OMPUsesAllocatorsClause::Data D = Clause->getAllocatorData(I);
+ auto *DRE = dyn_cast<DeclRefExpr>(D.Allocator->IgnoreParenImpCasts());
+ if (!DRE)
+ continue;
+ ValueDecl *VD = DRE->getDecl();
+ if (!VD || !isa<VarDecl>(VD))
+ continue;
+ DSAStackTy::DSAVarData DVar =
+ DSAStack->getTopDSA(VD, /*FromParent=*/false);
+ // OpenMP [2.12.5, target Construct]
+ // Memory allocators that appear in a uses_allocators clause cannot
+ // appear in other data-sharing attribute clauses or data-mapping
+ // attribute clauses in the same construct.
+ Expr *MapExpr = nullptr;
+ if (DVar.RefExpr ||
+ DSAStack->checkMappableExprComponentListsForDecl(
+ VD, /*CurrentRegionOnly=*/true,
+ [VD, &MapExpr](
+ OMPClauseMappableExprCommon::MappableExprComponentListRef
+ MapExprComponents,
+ OpenMPClauseKind C) {
+ auto MI = MapExprComponents.rbegin();
+ auto ME = MapExprComponents.rend();
+ if (MI != ME &&
+ MI->getAssociatedDeclaration()->getCanonicalDecl() ==
+ VD->getCanonicalDecl()) {
+ MapExpr = MI->getAssociatedExpression();
+ return true;
+ }
+ return false;
+ })) {
+ Diag(D.Allocator->getExprLoc(), diag::err_omp_allocator_used_in_clauses)
+ << D.Allocator->getSourceRange();
+ if (DVar.RefExpr)
+ reportOriginalDsa(SemaRef, DSAStack, VD, DVar);
+ else
+ Diag(MapExpr->getExprLoc(), diag::note_used_here)
+ << MapExpr->getSourceRange();
+ }
+ }
+ };
+
+ if (const auto *D = dyn_cast_or_null<OMPExecutableDirective>(CurDirective)) {
+ for (OMPClause *C : D->clauses()) {
+ if (auto *Clause = dyn_cast<OMPLastprivateClause>(C)) {
+ FinalizeLastprivate(Clause);
+ } else if (auto *Clause = dyn_cast<OMPNontemporalClause>(C)) {
+ FinalizeNontemporal(Clause);
+ } else if (auto *Clause = dyn_cast<OMPUsesAllocatorsClause>(C)) {
+ FinalizeAllocators(Clause);
}
}
// Check allocate clauses.
diff --git a/clang/lib/Sema/SemaTemplate.cpp b/clang/lib/Sema/SemaTemplate.cpp
index e5ea02a919f4..b052afede2cd 100644
--- a/clang/lib/Sema/SemaTemplate.cpp
+++ b/clang/lib/Sema/SemaTemplate.cpp
@@ -5508,50 +5508,31 @@ bool Sema::CheckTemplateArgumentList(
}
// Check whether we have a default argument.
- TemplateArgumentLoc Arg;
+ bool HasDefaultArg;
// Retrieve the default template argument from the template
// parameter. For each kind of template parameter, we substitute the
// template arguments provided thus far and any "outer" template arguments
// (when the template parameter was part of a nested template) into
// the default argument.
- if (TemplateTypeParmDecl *TTP = dyn_cast<TemplateTypeParmDecl>(*Param)) {
- if (!hasReachableDefaultArgument(TTP))
- return diagnoseMissingArgument(*this, TemplateLoc, Template, TTP,
+ TemplateArgumentLoc Arg = SubstDefaultTemplateArgumentIfAvailable(
+ Template, TemplateLoc, RAngleLoc, *Param, SugaredConverted,
+ CanonicalConverted, HasDefaultArg);
+
+ if (Arg.getArgument().isNull()) {
+ if (!HasDefaultArg) {
+ if (TemplateTypeParmDecl *TTP = dyn_cast<TemplateTypeParmDecl>(*Param))
+ return diagnoseMissingArgument(*this, TemplateLoc, Template, TTP,
+ NewArgs);
+ if (NonTypeTemplateParmDecl *NTTP =
+ dyn_cast<NonTypeTemplateParmDecl>(*Param))
+ return diagnoseMissingArgument(*this, TemplateLoc, Template, NTTP,
+ NewArgs);
+ return diagnoseMissingArgument(*this, TemplateLoc, Template,
+ cast<TemplateTemplateParmDecl>(*Param),
NewArgs);
-
- if (SubstDefaultTemplateArgument(*this, Template, TemplateLoc, RAngleLoc,
- TTP, SugaredConverted,
- CanonicalConverted, Arg))
- return true;
- } else if (NonTypeTemplateParmDecl *NTTP
- = dyn_cast<NonTypeTemplateParmDecl>(*Param)) {
- if (!hasReachableDefaultArgument(NTTP))
- return diagnoseMissingArgument(*this, TemplateLoc, Template, NTTP,
- NewArgs);
-
- if (SubstDefaultTemplateArgument(*this, Template, TemplateLoc, RAngleLoc,
- NTTP, SugaredConverted,
- CanonicalConverted, Arg))
- return true;
- } else {
- TemplateTemplateParmDecl *TempParm
- = cast<TemplateTemplateParmDecl>(*Param);
-
- if (!hasReachableDefaultArgument(TempParm))
- return diagnoseMissingArgument(*this, TemplateLoc, Template, TempParm,
- NewArgs);
-
- NestedNameSpecifierLoc QualifierLoc;
- TemplateName Name = SubstDefaultTemplateArgument(
- *this, Template, TemplateLoc, RAngleLoc, TempParm, SugaredConverted,
- CanonicalConverted, QualifierLoc);
- if (Name.isNull())
- return true;
-
- Arg = TemplateArgumentLoc(
- Context, TemplateArgument(Name), QualifierLoc,
- TempParm->getDefaultArgument().getTemplateNameLoc());
+ }
+ return true;
}
// Introduce an instantiation record that describes where we are using
diff --git a/clang/lib/Sema/SemaTemplateDeduction.cpp b/clang/lib/Sema/SemaTemplateDeduction.cpp
index b50648d5752c..7d83b86a0073 100644
--- a/clang/lib/Sema/SemaTemplateDeduction.cpp
+++ b/clang/lib/Sema/SemaTemplateDeduction.cpp
@@ -5505,8 +5505,11 @@ static TemplateDeductionResult CheckDeductionConsistency(
Sema::ArgumentPackSubstitutionIndexRAII PackIndex(
S, ArgIdx != -1 ? ::getPackIndexForParam(S, FTD, MLTAL, ArgIdx) : -1);
bool IsIncompleteSubstitution = false;
- QualType InstP = S.SubstType(P, MLTAL, FTD->getLocation(), FTD->getDeclName(),
- &IsIncompleteSubstitution);
+ // FIXME: A substitution can be incomplete on a non-structural part of the
+ // type. Use the canonical type for now, until the TemplateInstantiator can
+ // deal with that.
+ QualType InstP = S.SubstType(P.getCanonicalType(), MLTAL, FTD->getLocation(),
+ FTD->getDeclName(), &IsIncompleteSubstitution);
if (InstP.isNull() && !IsIncompleteSubstitution)
return TemplateDeductionResult::SubstitutionFailure;
if (!CheckConsistency)
diff --git a/clang/lib/Sema/SemaTemplateInstantiate.cpp b/clang/lib/Sema/SemaTemplateInstantiate.cpp
index c42cc250bb90..55f38743e276 100644
--- a/clang/lib/Sema/SemaTemplateInstantiate.cpp
+++ b/clang/lib/Sema/SemaTemplateInstantiate.cpp
@@ -1673,6 +1673,10 @@ namespace {
}
ExprResult TransformLambdaExpr(LambdaExpr *E) {
+ // Do not rebuild lambdas to avoid creating a new type.
+ // Lambdas have already been processed inside their eval contexts.
+ if (SemaRef.RebuildingImmediateInvocation)
+ return E;
LocalInstantiationScope Scope(SemaRef, /*CombineWithOuterScope=*/true);
Sema::ConstraintEvalRAII<TemplateInstantiator> RAII(*this);
diff --git a/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp b/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp
index e97a7d768b93..e055c87e7838 100644
--- a/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp
+++ b/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp
@@ -6294,9 +6294,13 @@ NamedDecl *Sema::FindInstantiatedDecl(SourceLocation Loc, NamedDecl *D,
if (!SubstRecord) {
// T can be a dependent TemplateSpecializationType when performing a
- // substitution for building a deduction guide.
- assert(CodeSynthesisContexts.back().Kind ==
- CodeSynthesisContext::BuildingDeductionGuides);
+ // substitution for building a deduction guide or for template
+ // argument deduction in the process of rebuilding immediate
+ // expressions. (Because the default argument that involves a lambda
+ // is untransformed and thus could be dependent at this point.)
+ assert(SemaRef.RebuildingImmediateInvocation ||
+ CodeSynthesisContexts.back().Kind ==
+ CodeSynthesisContext::BuildingDeductionGuides);
// Return a nullptr as a sentinel value, we handle it properly in
// the TemplateInstantiator::TransformInjectedClassNameType
// override, which we transform it to a TemplateSpecializationType.
diff --git a/clang/lib/Serialization/ASTReader.cpp b/clang/lib/Serialization/ASTReader.cpp
index 4fae6ff02ea9..7efcc81e194d 100644
--- a/clang/lib/Serialization/ASTReader.cpp
+++ b/clang/lib/Serialization/ASTReader.cpp
@@ -1382,7 +1382,7 @@ bool ASTReader::ReadVisibleDeclContextStorage(ModuleFile &M,
void ASTReader::Error(StringRef Msg) const {
Error(diag::err_fe_pch_malformed, Msg);
- if (PP.getLangOpts().Modules && !Diags.isDiagnosticInFlight() &&
+ if (PP.getLangOpts().Modules &&
!PP.getHeaderSearchInfo().getModuleCachePath().empty()) {
Diag(diag::note_module_cache_path)
<< PP.getHeaderSearchInfo().getModuleCachePath();
@@ -1391,10 +1391,7 @@ void ASTReader::Error(StringRef Msg) const {
void ASTReader::Error(unsigned DiagID, StringRef Arg1, StringRef Arg2,
StringRef Arg3) const {
- if (Diags.isDiagnosticInFlight())
- Diags.SetDelayedDiagnostic(DiagID, Arg1, Arg2, Arg3);
- else
- Diag(DiagID) << Arg1 << Arg2 << Arg3;
+ Diag(DiagID) << Arg1 << Arg2 << Arg3;
}
void ASTReader::Error(llvm::Error &&Err) const {
@@ -2713,7 +2710,7 @@ InputFile ASTReader::getInputFile(ModuleFile &F, unsigned ID, bool Complain) {
// For an overridden file, there is nothing to validate.
if (!Overridden && FileChange.Kind != Change::None) {
- if (Complain && !Diags.isDiagnosticInFlight()) {
+ if (Complain) {
// Build a list of the PCH imports that got us here (in reverse).
SmallVector<ModuleFile *, 4> ImportStack(1, &F);
while (!ImportStack.back()->ImportedBy.empty())
@@ -3689,10 +3686,8 @@ llvm::Error ASTReader::ReadASTBlock(ModuleFile &F,
SourceMgr.AllocateLoadedSLocEntries(F.LocalNumSLocEntries,
SLocSpaceSize);
if (!F.SLocEntryBaseID) {
- if (!Diags.isDiagnosticInFlight()) {
- Diags.Report(SourceLocation(), diag::remark_sloc_usage);
- SourceMgr.noteSLocAddressSpaceUsage(Diags);
- }
+ Diags.Report(SourceLocation(), diag::remark_sloc_usage);
+ SourceMgr.noteSLocAddressSpaceUsage(Diags);
return llvm::createStringError(std::errc::invalid_argument,
"ran out of source locations");
}
diff --git a/clang/lib/StaticAnalyzer/Checkers/StreamChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/StreamChecker.cpp
index 8bb7880a3cc2..0a823a1126ce 100644
--- a/clang/lib/StaticAnalyzer/Checkers/StreamChecker.cpp
+++ b/clang/lib/StaticAnalyzer/Checkers/StreamChecker.cpp
@@ -1835,6 +1835,46 @@ StreamChecker::ensureStreamNonNull(SVal StreamVal, const Expr *StreamE,
return StateNotNull;
}
+namespace {
+class StreamClosedVisitor final : public BugReporterVisitor {
+ const SymbolRef StreamSym;
+ bool Satisfied = false;
+
+public:
+ explicit StreamClosedVisitor(SymbolRef StreamSym) : StreamSym(StreamSym) {}
+
+ static void *getTag() {
+ static int Tag = 0;
+ return &Tag;
+ }
+
+ void Profile(llvm::FoldingSetNodeID &ID) const override {
+ ID.AddPointer(getTag());
+ ID.AddPointer(StreamSym);
+ }
+
+ PathDiagnosticPieceRef VisitNode(const ExplodedNode *N,
+ BugReporterContext &BRC,
+ PathSensitiveBugReport &BR) override {
+ if (Satisfied)
+ return nullptr;
+ const StreamState *PredSS =
+ N->getFirstPred()->getState()->get<StreamMap>(StreamSym);
+ if (PredSS && PredSS->isClosed())
+ return nullptr;
+
+ const Stmt *S = N->getStmtForDiagnostics();
+ if (!S)
+ return nullptr;
+ Satisfied = true;
+ PathDiagnosticLocation Pos(S, BRC.getSourceManager(),
+ N->getLocationContext());
+ llvm::StringLiteral Msg = "Stream is closed here";
+ return std::make_shared<PathDiagnosticEventPiece>(Pos, Msg);
+ }
+};
+} // namespace
+
ProgramStateRef StreamChecker::ensureStreamOpened(SVal StreamVal,
CheckerContext &C,
ProgramStateRef State) const {
@@ -1849,11 +1889,11 @@ ProgramStateRef StreamChecker::ensureStreamOpened(SVal StreamVal,
if (SS->isClosed()) {
// Using a stream pointer after 'fclose' causes undefined behavior
// according to cppreference.com .
- ExplodedNode *N = C.generateErrorNode();
- if (N) {
- C.emitReport(std::make_unique<PathSensitiveBugReport>(
- BT_UseAfterClose,
- "Stream might be already closed. Causes undefined behaviour.", N));
+ if (ExplodedNode *N = C.generateErrorNode()) {
+ auto R = std::make_unique<PathSensitiveBugReport>(
+ BT_UseAfterClose, "Use of a stream that might be already closed", N);
+ R->addVisitor<StreamClosedVisitor>(Sym);
+ C.emitReport(std::move(R));
return nullptr;
}
diff --git a/clang/lib/StaticAnalyzer/Checkers/WebKit/ASTUtils.cpp b/clang/lib/StaticAnalyzer/Checkers/WebKit/ASTUtils.cpp
index be07cf51eefb..394cb26f03cf 100644
--- a/clang/lib/StaticAnalyzer/Checkers/WebKit/ASTUtils.cpp
+++ b/clang/lib/StaticAnalyzer/Checkers/WebKit/ASTUtils.cpp
@@ -12,6 +12,7 @@
#include "clang/AST/Decl.h"
#include "clang/AST/DeclCXX.h"
#include "clang/AST/ExprCXX.h"
+#include "clang/AST/ExprObjC.h"
#include <optional>
namespace clang {
@@ -35,6 +36,12 @@ bool tryToFindPtrOrigin(
break;
}
}
+ if (auto *POE = dyn_cast<PseudoObjectExpr>(E)) {
+ if (auto *RF = POE->getResultExpr()) {
+ E = RF;
+ continue;
+ }
+ }
if (auto *tempExpr = dyn_cast<ParenExpr>(E)) {
E = tempExpr->getSubExpr();
continue;
@@ -88,7 +95,7 @@ bool tryToFindPtrOrigin(
continue;
}
- if (isReturnValueRefCounted(callee))
+ if (isRefType(callee->getReturnType()))
return callback(E, true);
if (isSingleton(callee))
@@ -100,6 +107,12 @@ bool tryToFindPtrOrigin(
}
}
}
+ if (auto *ObjCMsgExpr = dyn_cast<ObjCMessageExpr>(E)) {
+ if (auto *Method = ObjCMsgExpr->getMethodDecl()) {
+ if (isRefType(Method->getReturnType()))
+ return callback(E, true);
+ }
+ }
if (auto *unaryOp = dyn_cast<UnaryOperator>(E)) {
// FIXME: Currently accepts ANY unary operator. Is it OK?
E = unaryOp->getSubExpr();
diff --git a/clang/lib/StaticAnalyzer/Checkers/WebKit/PtrTypesSemantics.cpp b/clang/lib/StaticAnalyzer/Checkers/WebKit/PtrTypesSemantics.cpp
index f48b2fd9dca7..9da3e54e4543 100644
--- a/clang/lib/StaticAnalyzer/Checkers/WebKit/PtrTypesSemantics.cpp
+++ b/clang/lib/StaticAnalyzer/Checkers/WebKit/PtrTypesSemantics.cpp
@@ -123,9 +123,8 @@ bool isCtorOfRefCounted(const clang::FunctionDecl *F) {
|| FunctionName == "Identifier";
}
-bool isReturnValueRefCounted(const clang::FunctionDecl *F) {
- assert(F);
- QualType type = F->getReturnType();
+bool isRefType(const clang::QualType T) {
+ QualType type = T;
while (!type.isNull()) {
if (auto *elaboratedT = type->getAs<ElaboratedType>()) {
type = elaboratedT->desugar();
diff --git a/clang/lib/StaticAnalyzer/Checkers/WebKit/PtrTypesSemantics.h b/clang/lib/StaticAnalyzer/Checkers/WebKit/PtrTypesSemantics.h
index 2932e62ad06e..e2d0342bebd5 100644
--- a/clang/lib/StaticAnalyzer/Checkers/WebKit/PtrTypesSemantics.h
+++ b/clang/lib/StaticAnalyzer/Checkers/WebKit/PtrTypesSemantics.h
@@ -62,8 +62,8 @@ bool isRefType(const std::string &Name);
/// false if not.
bool isCtorOfRefCounted(const clang::FunctionDecl *F);
-/// \returns true if \p F returns a ref-counted object, false if not.
-bool isReturnValueRefCounted(const clang::FunctionDecl *F);
+/// \returns true if \p T is RefPtr, Ref, or its variant, false if not.
+bool isRefType(const clang::QualType T);
/// \returns true if \p M is getter of a ref-counted class, false if not.
std::optional<bool> isGetterOfRefCounted(const clang::CXXMethodDecl* Method);
diff --git a/clang/lib/StaticAnalyzer/Checkers/WebKit/RefCntblBaseVirtualDtorChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/WebKit/RefCntblBaseVirtualDtorChecker.cpp
index ecba5f9aa23e..e80246f49a31 100644
--- a/clang/lib/StaticAnalyzer/Checkers/WebKit/RefCntblBaseVirtualDtorChecker.cpp
+++ b/clang/lib/StaticAnalyzer/Checkers/WebKit/RefCntblBaseVirtualDtorChecker.cpp
@@ -72,7 +72,7 @@ public:
if (name == "ensureOnMainThread" || name == "ensureOnMainRunLoop") {
for (unsigned i = 0; i < CE->getNumArgs(); ++i) {
auto *Arg = CE->getArg(i);
- if (VisitLabmdaArgument(Arg))
+ if (VisitLambdaArgument(Arg))
return true;
}
}
@@ -80,17 +80,24 @@ public:
return false;
}
- bool VisitLabmdaArgument(const Expr *E) {
+ bool VisitLambdaArgument(const Expr *E) {
E = E->IgnoreParenCasts();
if (auto *TempE = dyn_cast<CXXBindTemporaryExpr>(E))
E = TempE->getSubExpr();
+ E = E->IgnoreParenCasts();
+ if (auto *Ref = dyn_cast<DeclRefExpr>(E)) {
+ if (auto *VD = dyn_cast_or_null<VarDecl>(Ref->getDecl()))
+ return VisitLambdaArgument(VD->getInit());
+ return false;
+ }
+ if (auto *Lambda = dyn_cast<LambdaExpr>(E)) {
+ if (VisitBody(Lambda->getBody()))
+ return true;
+ }
if (auto *ConstructE = dyn_cast<CXXConstructExpr>(E)) {
for (unsigned i = 0; i < ConstructE->getNumArgs(); ++i) {
- auto *Arg = ConstructE->getArg(i);
- if (auto *Lambda = dyn_cast<LambdaExpr>(Arg)) {
- if (VisitBody(Lambda->getBody()))
- return true;
- }
+ if (VisitLambdaArgument(ConstructE->getArg(i)))
+ return true;
}
}
return false;
diff --git a/clang/test/Analysis/Checkers/WebKit/ref-cntbl-crtp-base-no-virtual-dtor.cpp b/clang/test/Analysis/Checkers/WebKit/ref-cntbl-crtp-base-no-virtual-dtor.cpp
index 01527addb529..33c60ea8ca64 100644
--- a/clang/test/Analysis/Checkers/WebKit/ref-cntbl-crtp-base-no-virtual-dtor.cpp
+++ b/clang/test/Analysis/Checkers/WebKit/ref-cntbl-crtp-base-no-virtual-dtor.cpp
@@ -119,6 +119,11 @@ public:
ensureOnMainThread([this] {
delete static_cast<const T*>(this);
});
+ } else if constexpr (destructionThread == DestructionThread::MainRunLoop) {
+ auto deleteThis = [this] {
+ delete static_cast<const T*>(this);
+ };
+ ensureOnMainThread(deleteThis);
}
}
@@ -230,3 +235,16 @@ public:
private:
FancyRefCountedClass4();
};
+
+class FancyRefCountedClass5 final : public ThreadSafeRefCounted<FancyRefCountedClass5, DestructionThread::MainRunLoop> {
+public:
+ static Ref<FancyRefCountedClass5> create()
+ {
+ return adoptRef(*new FancyRefCountedClass5());
+ }
+
+ virtual ~FancyRefCountedClass5();
+
+private:
+ FancyRefCountedClass5();
+};
diff --git a/clang/test/Analysis/Checkers/WebKit/uncounted-obj-arg.mm b/clang/test/Analysis/Checkers/WebKit/uncounted-obj-arg.mm
index db0c5b19eec5..9ad1880e9d11 100644
--- a/clang/test/Analysis/Checkers/WebKit/uncounted-obj-arg.mm
+++ b/clang/test/Analysis/Checkers/WebKit/uncounted-obj-arg.mm
@@ -24,3 +24,20 @@
}
@end
+
+class RefCountedObject {
+public:
+ void ref() const;
+ void deref() const;
+ Ref<RefCountedObject> copy() const;
+};
+
+@interface WrapperObj : NSObject
+
+- (Ref<RefCountedObject>)_protectedWebExtensionControllerConfiguration;
+
+@end
+
+static void foo(WrapperObj *configuration) {
+ configuration._protectedWebExtensionControllerConfiguration->copy();
+}
diff --git a/clang/test/Analysis/stream-error.c b/clang/test/Analysis/stream-error.c
index 3f791d133464..9de56c082e82 100644
--- a/clang/test/Analysis/stream-error.c
+++ b/clang/test/Analysis/stream-error.c
@@ -96,7 +96,7 @@ void error_fread(void) {
}
}
fclose(F);
- Ret = fread(Buf, 1, 10, F); // expected-warning {{Stream might be already closed}}
+ Ret = fread(Buf, 1, 10, F); // expected-warning {{Use of a stream that might be already closed}}
}
void error_fwrite(void) {
@@ -113,7 +113,7 @@ void error_fwrite(void) {
fwrite(0, 1, 10, F); // expected-warning {{might be 'indeterminate'}}
}
fclose(F);
- Ret = fwrite(0, 1, 10, F); // expected-warning {{Stream might be already closed}}
+ Ret = fwrite(0, 1, 10, F); // expected-warning {{Use of a stream that might be already closed}}
}
void error_fgetc(void) {
@@ -135,7 +135,7 @@ void error_fgetc(void) {
}
}
fclose(F);
- fgetc(F); // expected-warning {{Stream might be already closed}}
+ fgetc(F); // expected-warning {{Use of a stream that might be already closed}}
}
void error_fgets(void) {
@@ -158,7 +158,7 @@ void error_fgets(void) {
}
}
fclose(F);
- fgets(Buf, sizeof(Buf), F); // expected-warning {{Stream might be already closed}}
+ fgets(Buf, sizeof(Buf), F); // expected-warning {{Use of a stream that might be already closed}}
}
void error_fputc(int fd) {
@@ -176,7 +176,7 @@ void error_fputc(int fd) {
fputc('Y', F); // no-warning
}
fclose(F);
- fputc('A', F); // expected-warning {{Stream might be already closed}}
+ fputc('A', F); // expected-warning {{Use of a stream that might be already closed}}
}
void error_fputs(void) {
@@ -194,7 +194,7 @@ void error_fputs(void) {
fputs("QWD", F); // expected-warning {{might be 'indeterminate'}}
}
fclose(F);
- fputs("ABC", F); // expected-warning {{Stream might be already closed}}
+ fputs("ABC", F); // expected-warning {{Use of a stream that might be already closed}}
}
void error_fprintf(void) {
@@ -211,7 +211,7 @@ void error_fprintf(void) {
fprintf(F, "bbb"); // expected-warning {{might be 'indeterminate'}}
}
fclose(F);
- fprintf(F, "ccc"); // expected-warning {{Stream might be already closed}}
+ fprintf(F, "ccc"); // expected-warning {{Use of a stream that might be already closed}}
}
void error_fscanf(int *A) {
@@ -236,7 +236,7 @@ void error_fscanf(int *A) {
}
}
fclose(F);
- fscanf(F, "ccc"); // expected-warning {{Stream might be already closed}}
+ fscanf(F, "ccc"); // expected-warning {{Use of a stream that might be already closed}}
}
void error_ungetc(int TestIndeterminate) {
@@ -256,7 +256,7 @@ void error_ungetc(int TestIndeterminate) {
ungetc('X', F); // expected-warning {{might be 'indeterminate'}}
}
fclose(F);
- ungetc('A', F); // expected-warning {{Stream might be already closed}}
+ ungetc('A', F); // expected-warning {{Use of a stream that might be already closed}}
}
void error_getdelim(char *P, size_t Sz) {
@@ -278,7 +278,7 @@ void error_getdelim(char *P, size_t Sz) {
}
}
fclose(F);
- getdelim(&P, &Sz, '\n', F); // expected-warning {{Stream might be already closed}}
+ getdelim(&P, &Sz, '\n', F); // expected-warning {{Use of a stream that might be already closed}}
}
void error_getline(char *P, size_t Sz) {
@@ -300,7 +300,7 @@ void error_getline(char *P, size_t Sz) {
}
}
fclose(F);
- getline(&P, &Sz, F); // expected-warning {{Stream might be already closed}}
+ getline(&P, &Sz, F); // expected-warning {{Use of a stream that might be already closed}}
}
void write_after_eof_is_allowed(void) {
diff --git a/clang/test/Analysis/stream-note.c b/clang/test/Analysis/stream-note.c
index 3aef707d5005..2b5d1edb2814 100644
--- a/clang/test/Analysis/stream-note.c
+++ b/clang/test/Analysis/stream-note.c
@@ -264,3 +264,12 @@ void error_fseek_read_eof(void) {
fgetc(F); // no warning
fclose(F);
}
+
+void check_note_at_use_after_close(void) {
+ FILE *F = tmpfile();
+ if (!F) // expected-note {{'F' is non-null}} expected-note {{Taking false branch}}
+ return;
+ fclose(F); // expected-note {{Stream is closed here}}
+ rewind(F); // expected-warning {{Use of a stream that might be already closed}}
+ // expected-note@-1 {{Use of a stream that might be already closed}}
+}
diff --git a/clang/test/Analysis/stream.c b/clang/test/Analysis/stream.c
index b9a5b1ba8cd4..758b40cca493 100644
--- a/clang/test/Analysis/stream.c
+++ b/clang/test/Analysis/stream.c
@@ -185,7 +185,7 @@ void f_double_close(void) {
if (!p)
return;
fclose(p);
- fclose(p); // expected-warning {{Stream might be already closed}}
+ fclose(p); // expected-warning {{Use of a stream that might be already closed}}
}
void f_double_close_alias(void) {
@@ -194,7 +194,7 @@ void f_double_close_alias(void) {
return;
FILE *p2 = p1;
fclose(p1);
- fclose(p2); // expected-warning {{Stream might be already closed}}
+ fclose(p2); // expected-warning {{Use of a stream that might be already closed}}
}
void f_use_after_close(void) {
@@ -202,7 +202,7 @@ void f_use_after_close(void) {
if (!p)
return;
fclose(p);
- clearerr(p); // expected-warning {{Stream might be already closed}}
+ clearerr(p); // expected-warning {{Use of a stream that might be already closed}}
}
void f_open_after_close(void) {
@@ -266,7 +266,7 @@ void check_freopen_2(void) {
if (f2) {
// Check if f1 and f2 point to the same stream.
fclose(f1);
- fclose(f2); // expected-warning {{Stream might be already closed.}}
+ fclose(f2); // expected-warning {{Use of a stream that might be already closed}}
} else {
// Reopen failed.
// f1 is non-NULL but points to a possibly invalid stream.
@@ -370,7 +370,7 @@ void fflush_after_fclose(void) {
if ((Ret = fflush(F)) != 0)
clang_analyzer_eval(Ret == EOF); // expected-warning {{TRUE}}
fclose(F);
- fflush(F); // expected-warning {{Stream might be already closed}}
+ fflush(F); // expected-warning {{Use of a stream that might be already closed}}
}
void fflush_on_open_failed_stream(void) {
diff --git a/clang/test/C/C23/n3030.c b/clang/test/C/C23/n3030.c
new file mode 100644
index 000000000000..9e1405a2e0e1
--- /dev/null
+++ b/clang/test/C/C23/n3030.c
@@ -0,0 +1,93 @@
+// RUN: %clang_cc1 -verify -triple x86_64-unknown-linux-gnu -fsyntax-only -std=c23 %s -pedantic -Wall
+
+#include <limits.h>
+
+enum us : unsigned short {
+ us_max = USHRT_MAX,
+ us_violation, // expected-error {{enumerator value 65536 is not representable in the underlying type 'unsigned short'}}
+ us_violation_2 = us_max + 1, // expected-error {{enumerator value is not representable in the underlying type 'unsigned short'}}
+ us_wrap_around_to_zero = (unsigned short)(USHRT_MAX + 1) /* Okay: conversion
+ done in constant expression before conversion to
+ underlying type: unsigned semantics okay. */
+};
+
+enum ui : unsigned int {
+ ui_max = UINT_MAX,
+ ui_violation, // expected-error {{enumerator value 4294967296 is not representable in the underlying type 'unsigned int'}}
+ ui_no_violation = ui_max + 1,
+ ui_wrap_around_to_zero = (unsigned int)(UINT_MAX + 1)
+};
+
+enum E1 : short;
+enum E2 : short; // expected-note {{previous}}
+enum E3; // expected-warning {{ISO C forbids forward references to 'enum' types}}
+enum E4 : unsigned long long;
+
+enum E1 : short { m11, m12 };
+enum E1 x = m11;
+
+enum E2 : long { // expected-error {{enumeration redeclared with different underlying type 'long' (was 'short')}}
+ m21,
+ m22
+};
+
+enum E3 { // expected-note {{definition of 'enum E3' is not complete until the closing '}'}}
+ // expected-note@-1 {{previous}}
+ m31,
+ m32,
+ m33 = sizeof(enum E3) // expected-error {{invalid application of 'sizeof' to an incomplete type 'enum E3'}}
+};
+enum E3 : int; // expected-error {{enumeration previously declared with nonfixed underlying type}}
+
+enum E4 : unsigned long long {
+ m40 = sizeof(enum E4),
+ m41 = ULLONG_MAX,
+ m42 // expected-error {{enumerator value 18446744073709551616 is not representable in the underlying type 'unsigned long long'}}
+};
+
+enum E5 y; // expected-error {{tentative definition has type 'enum E5' that is never completed}}
+ // expected-warning@-1 {{ISO C forbids forward references to 'enum' types}}
+ // expected-note@-2 {{forward declaration of 'enum E5'}}
+enum E6 : long int z; // expected-error {{non-defining declaration of enumeration with a fixed underlying type is only permitted as a standalone declaration; missing list of enumerators?}}
+enum E7 : long int = 0; // expected-error {{non-defining declaration of enumeration with a fixed underlying type is only permitted as a standalone declaration; missing list of enumerators?}}
+ // expected-error@-1 {{expected identifier or '('}}
+
+enum underlying : unsigned char { b0 };
+
+constexpr int a = _Generic(b0, int: 2, unsigned char: 1, default: 0);
+constexpr int b = _Generic((enum underlying)b0, int: 2, unsigned char: 1, default: 0);
+static_assert(a == 1);
+static_assert(b == 1);
+
+void f1(enum a : long b); // expected-error {{non-defining declaration of enumeration with a fixed underlying type is only permitted as a standalone declaration; missing list of enumerators?}}
+ // expected-warning@-1 {{declaration of 'enum a' will not be visible outside of this function}}
+void f2(enum c : long{x} d); // expected-warning {{declaration of 'enum c' will not be visible outside of this function}}
+enum e : int f3(); // expected-error {{non-defining declaration of enumeration with a fixed underlying type is only permitted as a standalone declaration; missing list of enumerators?}}
+
+typedef enum t u; // expected-warning {{ISO C forbids forward references to 'enum' types}}
+typedef enum v : short W; // expected-error {{non-defining declaration of enumeration with a fixed underlying type is only permitted as a standalone declaration; missing list of enumerators?}}
+typedef enum q : short { s } R;
+
+struct s1 {
+ int x;
+ enum e:int : 1; // expected-error {{non-defining declaration of enumeration with a fixed underlying type is only permitted as a standalone declaration; missing list of enumerators?}}
+ int y;
+};
+
+enum forward; // expected-warning {{ISO C forbids forward references to 'enum' types}}
+extern enum forward fwd_val0; /* Constraint violation: incomplete type */
+extern enum forward *fwd_ptr0; // expected-note {{previous}}
+extern int
+ *fwd_ptr0; // expected-error {{redeclaration of 'fwd_ptr0' with a different type: 'int *' vs 'enum forward *'}}
+
+enum forward1 : int;
+extern enum forward1 fwd_val1;
+extern int fwd_val1;
+extern enum forward1 *fwd_ptr1;
+extern int *fwd_ptr1;
+
+enum ee1 : short;
+enum e : short f = 0; // expected-error {{non-defining declaration of enumeration with a fixed underlying type is only permitted as a standalone declaration; missing list of enumerators?}}
+enum g : short { yyy } h = yyy;
+
+enum ee2 : typeof ((enum ee3 : short { A })0, (short)0);
diff --git a/clang/test/CMakeLists.txt b/clang/test/CMakeLists.txt
index 299a35723b59..2d84b0d73053 100644
--- a/clang/test/CMakeLists.txt
+++ b/clang/test/CMakeLists.txt
@@ -72,7 +72,6 @@ list(APPEND CLANG_TEST_DEPS
clang-tblgen
clang-offload-bundler
clang-import-test
- clang-rename
clang-refactor
clang-diff
clang-installapi
diff --git a/clang/test/CodeGen/X86/avx512copy-builtins.c b/clang/test/CodeGen/X86/avx512copy-builtins.c
new file mode 100644
index 000000000000..06f7507bde53
--- /dev/null
+++ b/clang/test/CodeGen/X86/avx512copy-builtins.c
@@ -0,0 +1,17 @@
+// RUN: %clang_cc1 %s -flax-vector-conversions=none -ffreestanding -triple=x86_64-unknown-unknown -target-feature +avx10.2-512 \
+// RUN: -emit-llvm -o - -Wall -Werror -pedantic -Wno-gnu-statement-expression | FileCheck %s
+
+#include <immintrin.h>
+#include <stddef.h>
+
+__m128i test_mm_move_epi32(__m128i A) {
+ // CHECK-LABEL: test_mm_move_epi32
+ // CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> <i32 0, i32 4, i32 4, i32 4>
+ return _mm_move_epi32(A);
+}
+
+__m128i test_mm_move_epi16(__m128i A) {
+ // CHECK-LABEL: test_mm_move_epi16
+ // CHECK: shufflevector <8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <8 x i32> <i32 0, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8>
+ return _mm_move_epi16(A);
+}
diff --git a/clang/test/CodeGen/builtins-elementwise-math.c b/clang/test/CodeGen/builtins-elementwise-math.c
index 8fb52992c0fe..7e094a52653e 100644
--- a/clang/test/CodeGen/builtins-elementwise-math.c
+++ b/clang/test/CodeGen/builtins-elementwise-math.c
@@ -570,6 +570,43 @@ void test_builtin_elementwise_log2(float f1, float f2, double d1, double d2,
vf2 = __builtin_elementwise_log2(vf1);
}
+void test_builtin_elementwise_popcount(si8 vi1, si8 vi2,
+ long long int i1, long long int i2, short si,
+ _BitInt(31) bi1, _BitInt(31) bi2) {
+
+
+ // CHECK: [[I1:%.+]] = load i64, ptr %i1.addr, align 8
+ // CHECK-NEXT: call i64 @llvm.ctpop.i64(i64 [[I1]])
+ i2 = __builtin_elementwise_popcount(i1);
+
+ // CHECK: [[VI1:%.+]] = load <8 x i16>, ptr %vi1.addr, align 16
+ // CHECK-NEXT: call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> [[VI1]])
+ vi2 = __builtin_elementwise_popcount(vi1);
+
+ // CHECK: [[CVI2:%.+]] = load <8 x i16>, ptr %cvi2, align 16
+ // CHECK-NEXT: call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> [[CVI2]])
+ const si8 cvi2 = vi2;
+ vi2 = __builtin_elementwise_popcount(cvi2);
+
+ // CHECK: [[BI1:%.+]] = load i32, ptr %bi1.addr, align 4
+ // CHECK-NEXT: [[LOADEDV:%.+]] = trunc i32 [[BI1]] to i31
+ // CHECK-NEXT: call i31 @llvm.ctpop.i31(i31 [[LOADEDV]])
+ bi2 = __builtin_elementwise_popcount(bi1);
+
+ // CHECK: [[IA1:%.+]] = load i32, ptr addrspace(1) @int_as_one, align 4
+ // CHECK-NEXT: call i32 @llvm.ctpop.i32(i32 [[IA1]])
+ b = __builtin_elementwise_popcount(int_as_one);
+
+ // CHECK: call i32 @llvm.ctpop.i32(i32 -10)
+ b = __builtin_elementwise_popcount(-10);
+
+ // CHECK: [[SI:%.+]] = load i16, ptr %si.addr, align 2
+ // CHECK-NEXT: [[SI_EXT:%.+]] = sext i16 [[SI]] to i32
+ // CHECK-NEXT: [[RES:%.+]] = call i32 @llvm.ctpop.i32(i32 [[SI_EXT]])
+ // CHECK-NEXT: = trunc i32 [[RES]] to i16
+ si = __builtin_elementwise_popcount(si);
+}
+
void test_builtin_elementwise_pow(float f1, float f2, double d1, double d2,
float4 vf1, float4 vf2) {
diff --git a/clang/test/CodeGenCoroutines/coro-await-elidable.cpp b/clang/test/CodeGenCoroutines/coro-await-elidable.cpp
index 8512995dfad4..deb19b4a5004 100644
--- a/clang/test/CodeGenCoroutines/coro-await-elidable.cpp
+++ b/clang/test/CodeGenCoroutines/coro-await-elidable.cpp
@@ -84,4 +84,44 @@ Task<int> nonelidable() {
co_return 1;
}
+// CHECK-LABEL: define{{.*}} @_Z8addTasksO4TaskIiES1_{{.*}} {
+Task<int> addTasks([[clang::coro_await_elidable_argument]] Task<int> &&t1, Task<int> &&t2) {
+ int i1 = co_await t1;
+ int i2 = co_await t2;
+ co_return i1 + i2;
+}
+
+// CHECK-LABEL: define{{.*}} @_Z10returnSamei{{.*}} {
+Task<int> returnSame(int i) {
+ co_return i;
+}
+
+// CHECK-LABEL: define{{.*}} @_Z21elidableWithMustAwaitv{{.*}} {
+Task<int> elidableWithMustAwait() {
+ // CHECK: call void @_Z10returnSamei(ptr {{.*}}, i32 noundef 2) #[[ELIDE_SAFE]]
+ // CHECK: call void @_Z10returnSamei(ptr {{.*}}, i32 noundef 3){{$}}
+ co_return co_await addTasks(returnSame(2), returnSame(3));
+}
+
+template <typename... Args>
+Task<int> sumAll([[clang::coro_await_elidable_argument]] Args && ... tasks);
+
+// CHECK-LABEL: define{{.*}} @_Z16elidableWithPackv{{.*}} {
+Task<int> elidableWithPack() {
+ // CHECK: call void @_Z10returnSamei(ptr {{.*}}, i32 noundef 1){{$}}
+ // CHECK: call void @_Z10returnSamei(ptr {{.*}}, i32 noundef 2) #[[ELIDE_SAFE]]
+ // CHECK: call void @_Z10returnSamei(ptr {{.*}}, i32 noundef 3) #[[ELIDE_SAFE]]
+ auto t = returnSame(1);
+ co_return co_await sumAll(t, returnSame(2), returnSame(3));
+}
+
+
+// CHECK-LABEL: define{{.*}} @_Z25elidableWithPackRecursivev{{.*}} {
+Task<int> elidableWithPackRecursive() {
+ // CHECK: call void @_Z10returnSamei(ptr {{.*}}, i32 noundef 1) #[[ELIDE_SAFE]]
+ // CHECK: call void @_Z10returnSamei(ptr {{.*}}, i32 noundef 2){{$}}
+ // CHECK: call void @_Z10returnSamei(ptr {{.*}}, i32 noundef 3) #[[ELIDE_SAFE]]
+ co_return co_await sumAll(addTasks(returnSame(1), returnSame(2)), returnSame(3));
+}
+
// CHECK: attributes #[[ELIDE_SAFE]] = { coro_elide_safe }
diff --git a/clang/test/CodeGenHLSL/builtins/countbits.hlsl b/clang/test/CodeGenHLSL/builtins/countbits.hlsl
new file mode 100644
index 000000000000..8dfe977bfae6
--- /dev/null
+++ b/clang/test/CodeGenHLSL/builtins/countbits.hlsl
@@ -0,0 +1,80 @@
+// RUN: %clang_cc1 -std=hlsl2021 -finclude-default-header -x hlsl -triple \
+// RUN: dxil-pc-shadermodel6.3-library %s -fnative-half-type \
+// RUN: -emit-llvm -disable-llvm-passes -O3 -o - | FileCheck %s
+
+#ifdef __HLSL_ENABLE_16_BIT
+// CHECK-LABEL: test_countbits_ushort
+// CHECK: call i16 @llvm.ctpop.i16
+uint16_t test_countbits_ushort(uint16_t p0)
+{
+ return countbits(p0);
+}
+// CHECK-LABEL: test_countbits_ushort2
+// CHECK: call <2 x i16> @llvm.ctpop.v2i16
+uint16_t2 test_countbits_ushort2(uint16_t2 p0)
+{
+ return countbits(p0);
+}
+// CHECK-LABEL: test_countbits_ushort3
+// CHECK: call <3 x i16> @llvm.ctpop.v3i16
+uint16_t3 test_countbits_ushort3(uint16_t3 p0)
+{
+ return countbits(p0);
+}
+// CHECK-LABEL: test_countbits_ushort4
+// CHECK: call <4 x i16> @llvm.ctpop.v4i16
+uint16_t4 test_countbits_ushort4(uint16_t4 p0)
+{
+ return countbits(p0);
+}
+#endif
+
+// CHECK-LABEL: test_countbits_uint
+// CHECK: call i32 @llvm.ctpop.i32
+int test_countbits_uint(uint p0)
+{
+ return countbits(p0);
+}
+// CHECK-LABEL: test_countbits_uint2
+// CHECK: call <2 x i32> @llvm.ctpop.v2i32
+uint2 test_countbits_uint2(uint2 p0)
+{
+ return countbits(p0);
+}
+// CHECK-LABEL: test_countbits_uint3
+// CHECK: call <3 x i32> @llvm.ctpop.v3i32
+uint3 test_countbits_uint3(uint3 p0)
+{
+ return countbits(p0);
+}
+// CHECK-LABEL: test_countbits_uint4
+// CHECK: call <4 x i32> @llvm.ctpop.v4i32
+uint4 test_countbits_uint4(uint4 p0)
+{
+ return countbits(p0);
+}
+
+// CHECK-LABEL: test_countbits_long
+// CHECK: call i64 @llvm.ctpop.i64
+uint64_t test_countbits_long(uint64_t p0)
+{
+ return countbits(p0);
+}
+// CHECK-LABEL: test_countbits_long2
+// CHECK: call <2 x i64> @llvm.ctpop.v2i64
+uint64_t2 test_countbits_long2(uint64_t2 p0)
+{
+ return countbits(p0);
+}
+// CHECK-LABEL: test_countbits_long3
+// CHECK: call <3 x i64> @llvm.ctpop.v3i64
+uint64_t3 test_countbits_long3(uint64_t3 p0)
+{
+ return countbits(p0);
+}
+// CHECK-LABEL: test_countbits_long4
+// CHECK: call <4 x i64> @llvm.ctpop.v4i64
+uint64_t4 test_countbits_long4(uint64_t4 p0)
+{
+ return countbits(p0);
+}
diff --git a/clang/test/CodeGenHLSL/wavesize.hlsl b/clang/test/CodeGenHLSL/wavesize.hlsl
new file mode 100644
index 000000000000..fcb817ce0d06
--- /dev/null
+++ b/clang/test/CodeGenHLSL/wavesize.hlsl
@@ -0,0 +1,34 @@
+// RUN: %clang_cc1 -std=hlsl2021 -finclude-default-header -x hlsl -triple \
+// RUN: dxil-pc-shadermodel6.6-compute %s -DSM66 -hlsl-entry foo \
+// RUN: -emit-llvm -disable-llvm-passes -o - | FileCheck %s
+
+// RUN: %clang_cc1 -std=hlsl2021 -finclude-default-header -x hlsl -triple \
+// RUN: dxil-pc-shadermodel6.8-compute %s -DNO_PREFERR -hlsl-entry foo \
+// RUN: -emit-llvm -disable-llvm-passes -o - | FileCheck %s --check-prefix=NO_PREFERR
+
+// RUN: %clang_cc1 -std=hlsl2021 -finclude-default-header -x hlsl -triple \
+// RUN: dxil-pc-shadermodel6.8-compute %s -hlsl-entry foo \
+// RUN: -emit-llvm -disable-llvm-passes -o - | FileCheck %s --check-prefix=CHECK-SM68
+
+
+// Make sure wavesize attribute get correct value for sm66 and sm68.
+// CHECK:define void @foo()
+// CHECK:"hlsl.wavesize"="8,0,0"
+
+// NO_PREFERR:define void @foo()
+// NO_PREFERR:"hlsl.wavesize"="8,128,0"
+
+// CHECK-SM68:define void @foo()
+// CHECK-SM68:"hlsl.wavesize"="8,128,64"
+
+[numthreads(16,8,1)]
+#ifdef SM66
+[WaveSize(8)]
+#elif NO_PREFERR
+[WaveSize(8, 128)]
+#else
+[WaveSize(8, 128, 64)]
+#endif
+void foo() {
+
+}
diff --git a/clang/test/Driver/openmp-offload-gpu.c b/clang/test/Driver/openmp-offload-gpu.c
index ef6cbdded6a6..f6e2245dcdbc 100644
--- a/clang/test/Driver/openmp-offload-gpu.c
+++ b/clang/test/Driver/openmp-offload-gpu.c
@@ -377,4 +377,4 @@
// RUN: --cuda-path=%S/Inputs/CUDA_102/usr/local/cuda \
// RUN: --offload-arch=sm_52 -nogpulibc -nogpuinc %s 2>&1 \
// RUN: | FileCheck --check-prefix=LIBC-GPU %s
-// LIBC-GPU: clang-linker-wrapper{{.*}}"--device-compiler=-nolibc"
+// LIBC-GPU-NOT: clang-linker-wrapper{{.*}}"--device-linker"
diff --git a/clang/test/Misc/pragma-attribute-supported-attributes-list.test b/clang/test/Misc/pragma-attribute-supported-attributes-list.test
index baa1816358b1..914f94c08a9f 100644
--- a/clang/test/Misc/pragma-attribute-supported-attributes-list.test
+++ b/clang/test/Misc/pragma-attribute-supported-attributes-list.test
@@ -60,6 +60,7 @@
// CHECK-NEXT: ConsumableSetOnRead (SubjectMatchRule_record)
// CHECK-NEXT: Convergent (SubjectMatchRule_function)
// CHECK-NEXT: CoroAwaitElidable (SubjectMatchRule_record)
+// CHECK-NEXT: CoroAwaitElidableArgument (SubjectMatchRule_variable_is_parameter)
// CHECK-NEXT: CoroDisableLifetimeBound (SubjectMatchRule_function)
// CHECK-NEXT: CoroLifetimeBound (SubjectMatchRule_record)
// CHECK-NEXT: CoroOnlyDestroyWhenComplete (SubjectMatchRule_record)
diff --git a/clang/test/Modules/pr108732.cppm b/clang/test/Modules/pr108732.cppm
new file mode 100644
index 000000000000..f3b495aa826c
--- /dev/null
+++ b/clang/test/Modules/pr108732.cppm
@@ -0,0 +1,14 @@
+// RUN: %clang_cc1 -std=c++20 %s -ast-dump | FileCheck %s
+export module mod;
+
+extern "C++" {
+class C
+{
+public:
+bool foo() const {
+ return true;
+}
+};
+}
+
+// CHECK: foo {{.*}}implicit-inline
diff --git a/clang/test/PCH/race-condition.cpp b/clang/test/PCH/race-condition.cpp
new file mode 100644
index 000000000000..752b0cc3ff62
--- /dev/null
+++ b/clang/test/PCH/race-condition.cpp
@@ -0,0 +1,41 @@
+// RUN: %clang_cc1 -fallow-pch-with-compiler-errors -std=c++20 -x c++-header -emit-pch %s -o %t -verify
+// RUN: %clang_cc1 -fallow-pch-with-compiler-errors -std=c++20 -include-pch %t %s -verify
+#ifndef HEADER_H
+#define HEADER_H
+
+#include "bad_include.h"
+// expected-error@6{{'bad_include.h' file not found}}
+
+template <bool, class = void> struct enable_if {};
+template <class T> struct enable_if<true, T> { typedef T type; };
+template <bool B, class T = void> using enable_if_t = typename enable_if<B, T>::type;
+
+template <typename> struct meta { static constexpr int value = 0; };
+template <> struct meta<int> { static constexpr int value = 1; };
+template <> struct meta<float> { static constexpr int value = 2; };
+
+namespace N {
+inline namespace inner {
+
+template <class T>
+constexpr enable_if_t<meta<T>::value == 0, void> midpoint(T) {}
+
+template <class U>
+constexpr enable_if_t<meta<U>::value == 1, void> midpoint(U) {}
+
+template <class F>
+constexpr enable_if_t<meta<F>::value == 2, void> midpoint(F) {}
+
+} // namespace inner
+} // namespace N
+
+#else
+
+// expected-error@27{{'N::midpoint' has different definitions in different modules; defined here first difference is 1st parameter with type 'F'}}
+// expected-error@24{{'N::midpoint' has different definitions in different modules; defined here first difference is 1st parameter with type 'U'}}
+// expected-note@21{{but in '' found 1st parameter with type 'T'}}
+int x = N::something;
+// expected-error@37{{no member named 'something' in namespace 'N'}}
+// expected-note@21{{but in '' found 1st parameter with type 'T'}}
+
+#endif
diff --git a/clang/test/Parser/cxx-bad-cast-diagnose-broken-template.cpp b/clang/test/Parser/cxx-bad-cast-diagnose-broken-template.cpp
new file mode 100644
index 000000000000..3500975d9369
--- /dev/null
+++ b/clang/test/Parser/cxx-bad-cast-diagnose-broken-template.cpp
@@ -0,0 +1,26 @@
+// RUN: %clang_cc1 -fcxx-exceptions -fexceptions -fsyntax-only -verify %s
+
+template<typename>
+struct StringTrait {};
+
+template< int N >
+struct StringTrait< const char[ N ] > {
+ typedef char CharType;
+ static const MissingIntT length = N - 1; // expected-error {{unknown type name 'MissingIntT'}}
+};
+
+class String {
+public:
+ template <typename T>
+ String(T& str, typename StringTrait<T>::CharType = 0);
+};
+
+
+class Exception {
+public:
+ Exception(String const&);
+};
+
+void foo() {
+ throw Exception("some error"); // expected-error {{functional-style cast from 'const char[11]' to 'Exception' is not allowed}}
+}
diff --git a/clang/test/Sema/builtins-elementwise-math.c b/clang/test/Sema/builtins-elementwise-math.c
index 628274380ae5..1727be1d6286 100644
--- a/clang/test/Sema/builtins-elementwise-math.c
+++ b/clang/test/Sema/builtins-elementwise-math.c
@@ -505,6 +505,39 @@ void test_builtin_elementwise_log2(int i, float f, double d, float4 v, int3 iv,
// expected-error@-1 {{1st argument must be a floating point type (was 'unsigned4' (vector of 4 'unsigned int' values))}}
}
+void test_builtin_elementwise_popcount(int i, float f, double d, float4 v, int3 iv, unsigned u, unsigned4 uv) {
+
+ struct Foo s = __builtin_elementwise_popcount(i);
+ // expected-error@-1 {{initializing 'struct Foo' with an expression of incompatible type 'int'}}
+
+ i = __builtin_elementwise_popcount();
+ // expected-error@-1 {{too few arguments to function call, expected 1, have 0}}
+
+ i = __builtin_elementwise_popcount(f);
+ // expected-error@-1 {{1st argument must be a vector of integers (was 'float')}}
+
+ i = __builtin_elementwise_popcount(f, f);
+ // expected-error@-1 {{too many arguments to function call, expected 1, have 2}}
+
+ u = __builtin_elementwise_popcount(d);
+ // expected-error@-1 {{1st argument must be a vector of integers (was 'double')}}
+
+ v = __builtin_elementwise_popcount(v);
+ // expected-error@-1 {{1st argument must be a vector of integers (was 'float4' (vector of 4 'float' values))}}
+
+ int2 i2 = __builtin_elementwise_popcount(iv);
+ // expected-error@-1 {{initializing 'int2' (vector of 2 'int' values) with an expression of incompatible type 'int3' (vector of 3 'int' values)}}
+
+ iv = __builtin_elementwise_popcount(i2);
+ // expected-error@-1 {{assigning to 'int3' (vector of 3 'int' values) from incompatible type 'int2' (vector of 2 'int' values)}}
+
+ unsigned3 u3 = __builtin_elementwise_popcount(iv);
+ // expected-error@-1 {{initializing 'unsigned3' (vector of 3 'unsigned int' values) with an expression of incompatible type 'int3' (vector of 3 'int' values)}}
+
+ iv = __builtin_elementwise_popcount(u3);
+ // expected-error@-1 {{assigning to 'int3' (vector of 3 'int' values) from incompatible type 'unsigned3' (vector of 3 'unsigned int' values)}}
+}
+
void test_builtin_elementwise_pow(int i, short s, double d, float4 v, int3 iv, unsigned3 uv, int *p) {
i = __builtin_elementwise_pow(p, d);
// expected-error@-1 {{arguments are of different types ('int *' vs 'double')}}
diff --git a/clang/test/Sema/countbits-errors.hlsl b/clang/test/Sema/countbits-errors.hlsl
new file mode 100644
index 000000000000..0fd36fe78d79
--- /dev/null
+++ b/clang/test/Sema/countbits-errors.hlsl
@@ -0,0 +1,28 @@
+// RUN: %clang_cc1 -finclude-default-header
+// -triple dxil-pc-shadermodel6.6-library %s -fnative-half-type -emit-llvm-only
+// -disable-llvm-passes -verify
+
+double2 test_int_builtin(double2 p0) {
+ return __builtin_hlsl_elementwise_countbits(p0);
+ // expected-error@-1 {{passing 'double2' (aka 'vector<double, 2>') to
+ // parameter of incompatible type
+ // '__attribute__((__vector_size__(2 * sizeof(int)))) int'
+ // (vector of 2 'int' values)}}
+}
+
+float test_ambiguous(float p0) {
+ return countbits(p0);
+ // expected-error@-1 {{call to 'countbits' is ambiguous}}
+ // expected-note@hlsl/hlsl_intrinsics.h:* {{candidate function}}
+ // expected-note@hlsl/hlsl_intrinsics.h:* {{candidate function}}
+ // expected-note@hlsl/hlsl_intrinsics.h:* {{candidate function}}
+ // expected-note@hlsl/hlsl_intrinsics.h:* {{candidate function}}
+ // expected-note@hlsl/hlsl_intrinsics.h:* {{candidate function}}
+ // expected-note@hlsl/hlsl_intrinsics.h:* {{candidate function}}
+}
+
+float test_float_builtin(float p0) {
+ return __builtin_hlsl_elementwise_countbits(p0);
+ // expected-error@-1 {{passing 'double' to parameter of incompatible type
+ // 'int'}}
+}
diff --git a/clang/test/Sema/fixed-enum.c b/clang/test/Sema/fixed-enum.c
index 954ff8c452b8..2b02def0e178 100644
--- a/clang/test/Sema/fixed-enum.c
+++ b/clang/test/Sema/fixed-enum.c
@@ -5,9 +5,9 @@
// RUN: %clang_cc1 -pedantic -std=c11 -xc -DC11 -verify %s
// RUN: %clang_cc1 -Weverything -std=c11 -xc -fms-extensions -DMS -verify %s
// RUN: %clang_cc1 -Weverything -std=c2x -xc -DC23 -verify %s
-// RUN: %clang_cc1 -pedantic -std=c2x -xc -DC23 -verify %s
+// RUN: %clang_cc1 -pedantic -std=c2x -xc -DC23 -verify -Wpre-c23-compat %s
// RUN: %clang_cc1 -Weverything -std=c23 -xc -DC23 -verify %s
-// RUN: %clang_cc1 -pedantic -std=c23 -xc -DC23 -verify %s
+// RUN: %clang_cc1 -pedantic -std=c23 -xc -DC23 -verify -Wpre-c23-compat %s
// RUN: %clang_cc1 -Weverything -std=c23 -xc -fms-extensions -DC23 -verify %s
enum X : int {e};
@@ -15,12 +15,14 @@ enum X : int {e};
// expected-warning@-2{{enumeration types with a fixed underlying type are incompatible with C++98}}
#elif defined(CXX03)
// expected-warning@-4{{enumeration types with a fixed underlying type are a C++11 extension}}
-#elif defined(OBJC) || defined(C23)
-// No diagnostic
+#elif defined(OBJC)
+// diagnostic
+#elif defined(C23)
+// expected-warning@-8{{enumeration types with a fixed underlying type are incompatible with C standards before C23}}
#elif defined(C11)
-// expected-warning@-8{{enumeration types with a fixed underlying type are a Clang extension}}
+// expected-warning@-10{{enumeration types with a fixed underlying type are a C23 extension}}
#elif defined(MS)
-// expected-warning@-10{{enumeration types with a fixed underlying type are a Microsoft extension}}
+// expected-warning@-12{{enumeration types with a fixed underlying type are a Microsoft extension}}
#endif
// Don't warn about the forward declaration in any language mode.
@@ -29,16 +31,23 @@ enum Fwd : int { e2 };
#if !defined(OBJC) && !defined(C23)
// expected-warning@-3 {{enumeration types with a fixed underlying type}}
// expected-warning@-3 {{enumeration types with a fixed underlying type}}
+#elif defined(C23)
+// expected-warning@-6 {{enumeration types with a fixed underlying type are incompatible with C standards before C23}}
+// expected-warning@-6 {{enumeration types with a fixed underlying type are incompatible with C standards before C23}}
#endif
// Always error on the incompatible redeclaration.
enum BadFwd : int;
#if !defined(OBJC) && !defined(C23)
// expected-warning@-2 {{enumeration types with a fixed underlying type}}
+#elif defined(C23)
+// expected-warning@-4 {{enumeration types with a fixed underlying type are incompatible with C standards before C23}}
#endif
-// expected-note@-4 {{previous declaration is here}}
+// expected-note@-6 {{previous declaration is here}}
enum BadFwd : char { e3 };
#if !defined(OBJC) && !defined(C23)
// expected-warning@-2 {{enumeration types with a fixed underlying type}}
+#elif defined(C23)
+// expected-warning@-4 {{enumeration types with a fixed underlying type are incompatible with C standards before C23}}
#endif
-// expected-error@-4 {{enumeration redeclared with different underlying type 'char' (was 'int')}}
+// expected-error@-6 {{enumeration redeclared with different underlying type 'char' (was 'int')}}
diff --git a/clang/test/SemaCXX/builtins-elementwise-math.cpp b/clang/test/SemaCXX/builtins-elementwise-math.cpp
index 898d869f4c81..c3d8bc593c0b 100644
--- a/clang/test/SemaCXX/builtins-elementwise-math.cpp
+++ b/clang/test/SemaCXX/builtins-elementwise-math.cpp
@@ -269,3 +269,11 @@ void test_builtin_elementwise_bitreverse() {
static_assert(!is_const<decltype(__builtin_elementwise_bitreverse(a))>::value);
static_assert(!is_const<decltype(__builtin_elementwise_bitreverse(b))>::value);
}
+
+void test_builtin_elementwise_popcount() {
+ const int a = 2;
+ int b = 1;
+ static_assert(!is_const<decltype(__builtin_elementwise_popcount(a))>::value);
+ static_assert(!is_const<decltype(__builtin_elementwise_popcount(b))>::value);
+}
+
diff --git a/clang/test/SemaCXX/cxx2a-consteval.cpp b/clang/test/SemaCXX/cxx2a-consteval.cpp
index 81923617f637..ae331055c52b 100644
--- a/clang/test/SemaCXX/cxx2a-consteval.cpp
+++ b/clang/test/SemaCXX/cxx2a-consteval.cpp
@@ -1248,3 +1248,27 @@ void test() {
}
}
+
+// Test that we don't redundantly instantiate the friend declaration in
+// RemoveNestedImmediateInvocation(). Otherwise, we would end up with spurious
+// redefinition errors.
+namespace GH107175 {
+
+consteval void consteval_func() {}
+
+template <auto> struct define_f {
+ friend void foo() {}
+};
+
+template <auto = [] {}> struct A {};
+
+struct B {
+ template <auto T> consteval void func() { (void)define_f<T>{}; }
+};
+
+int main() {
+ B{}.func<A{}>();
+ consteval_func();
+}
+
+} // namespace GH107175
diff --git a/clang/test/SemaHLSL/BuiltIns/countbits-errors.hlsl b/clang/test/SemaHLSL/BuiltIns/countbits-errors.hlsl
new file mode 100644
index 000000000000..8d5f0abb2860
--- /dev/null
+++ b/clang/test/SemaHLSL/BuiltIns/countbits-errors.hlsl
@@ -0,0 +1,21 @@
+// RUN: %clang_cc1 -finclude-default-header
+// -triple dxil-pc-shadermodel6.6-library %s -fnative-half-type -emit-llvm-only
+// -disable-llvm-passes -verify -verify-ignore-unexpected
+
+
+double test_int_builtin(double p0) {
+ return countbits(p0);
+ // expected-error@-1 {{call to 'countbits' is ambiguous}}
+}
+
+double2 test_int_builtin_2(double2 p0) {
+ return __builtin_elementwise_popcount(p0);
+ // expected-error@-1 {{1st argument must be a vector of integers
+ // (was 'double2' (aka 'vector<double, 2>'))}}
+}
+
+double test_int_builtin_3(float p0) {
+ return __builtin_elementwise_popcount(p0);
+ // expected-error@-1 {{1st argument must be a vector of integers
+ // (was 'float')}}
+}
diff --git a/clang/test/SemaHLSL/resource_binding_attr_error.hlsl b/clang/test/SemaHLSL/resource_binding_attr_error.hlsl
index cb728dca838c..74aff79f0e37 100644
--- a/clang/test/SemaHLSL/resource_binding_attr_error.hlsl
+++ b/clang/test/SemaHLSL/resource_binding_attr_error.hlsl
@@ -13,16 +13,9 @@ float a : register(c0);
cbuffer b : register(i0) {
}
-// expected-error@+1 {{invalid space specifier 's2' used; expected 'space' followed by an integer, like space1}}
-cbuffer c : register(b0, s2) {
-}
// expected-error@+1 {{register number should be an integer}}
-cbuffer d : register(bf, s2) {
-
-}
-// expected-error@+1 {{invalid space specifier 'spaces' used; expected 'space' followed by an integer, like space1}}
-cbuffer e : register(b2, spaces) {
+cbuffer c : register(bf, s2) {
}
@@ -35,9 +28,8 @@ cbuffer B : register(space1) {}
// expected-error@+1 {{wrong argument format for hlsl attribute, use b2 instead}}
cbuffer C : register(b 2) {}
-// expected-error@+2 {{wrong argument format for hlsl attribute, use b2 instead}}
-// expected-error@+1 {{wrong argument format for hlsl attribute, use space3 instead}}
-cbuffer D : register(b 2, space 3) {}
+// expected-error@+1 {{wrong argument format for hlsl attribute, use b2 instead}}
+cbuffer D : register(b 2, space3) {}
// expected-error@+1 {{'register' attribute only applies to cbuffer/tbuffer and external global variables}}
static MyTemplatedSRV<float> U : register(u5);
@@ -61,9 +53,6 @@ void foo2() {
extern MyTemplatedSRV<float> U2 : register(u5);
}
-// expected-error@+1 {{binding type 'u' only applies to UAV resources}}
-float b : register(u0, space1);
-
// expected-error@+1 {{'register' attribute only applies to cbuffer/tbuffer and external global variables}}
void bar(MyTemplatedSRV<float> U : register(u3)) {
diff --git a/clang/test/SemaHLSL/resource_binding_attr_error_basic.hlsl b/clang/test/SemaHLSL/resource_binding_attr_error_basic.hlsl
index 0a547ed66af0..760c057630a7 100644
--- a/clang/test/SemaHLSL/resource_binding_attr_error_basic.hlsl
+++ b/clang/test/SemaHLSL/resource_binding_attr_error_basic.hlsl
@@ -3,37 +3,40 @@
// expected-error@+1{{binding type 't' only applies to SRV resources}}
float f1 : register(t0);
-
-float f2 : register(c0);
+// expected-error@+1 {{binding type 'u' only applies to UAV resources}}
+float f2 : register(u0);
// expected-error@+1{{binding type 'b' only applies to constant buffers. The 'bool constant' binding type is no longer supported}}
float f3 : register(b9);
+// expected-error@+1 {{binding type 's' only applies to sampler state}}
+float f4 : register(s0);
+
// expected-error@+1{{binding type 'i' ignored. The 'integer constant' binding type is no longer supported}}
-float f4 : register(i9);
+float f5 : register(i9);
// expected-error@+1{{binding type 'x' is invalid}}
-float f5 : register(x9);
+float f6 : register(x9);
cbuffer g_cbuffer1 {
// expected-error@+1{{binding type 'c' ignored in buffer declaration. Did you mean 'packoffset'?}}
- float f6 : register(c2);
+ float f7 : register(c2);
};
tbuffer g_tbuffer1 {
// expected-error@+1{{binding type 'c' ignored in buffer declaration. Did you mean 'packoffset'?}}
- float f7 : register(c2);
+ float f8 : register(c2);
};
cbuffer g_cbuffer2 {
// expected-error@+1{{binding type 'b' only applies to constant buffer resources}}
- float f8 : register(b2);
+ float f9 : register(b2);
};
tbuffer g_tbuffer2 {
// expected-error@+1{{binding type 'i' ignored. The 'integer constant' binding type is no longer supported}}
- float f9 : register(i2);
+ float f10 : register(i2);
};
// expected-error@+1{{binding type 'c' only applies to numeric variables in the global scope}}
-RWBuffer<float> f10 : register(c3);
+RWBuffer<float> f11 : register(c3);
diff --git a/clang/test/SemaHLSL/resource_binding_attr_error_space.hlsl b/clang/test/SemaHLSL/resource_binding_attr_error_space.hlsl
new file mode 100644
index 000000000000..70e64e6ca752
--- /dev/null
+++ b/clang/test/SemaHLSL/resource_binding_attr_error_space.hlsl
@@ -0,0 +1,62 @@
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-library -x hlsl -o - -fsyntax-only %s -verify
+
+// valid
+cbuffer cbuf {
+ RWBuffer<int> r : register(u0, space0);
+}
+
+cbuffer cbuf2 {
+ struct x {
+ // this test validates that no diagnostic is emitted on the space parameter, because
+ // this register annotation is not in the global scope.
+ // expected-error@+1 {{'register' attribute only applies to cbuffer/tbuffer and external global variables}}
+ RWBuffer<int> E : register(u2, space3);
+ };
+}
+
+struct MyStruct {
+ RWBuffer<int> E;
+};
+
+cbuffer cbuf3 {
+ // valid
+ MyStruct E : register(u2, space3);
+}
+
+// valid
+MyStruct F : register(u3, space4);
+
+cbuffer cbuf4 {
+ // this test validates that no diagnostic is emitted on the space parameter, because
+ // this register annotation is not in the global scope.
+ // expected-error@+1 {{binding type 'u' only applies to UAV resources}}
+ float a : register(u2, space3);
+}
+
+// expected-error@+1 {{invalid space specifier 's2' used; expected 'space' followed by an integer, like space1}}
+cbuffer a : register(b0, s2) {
+
+}
+
+// expected-error@+1 {{invalid space specifier 'spaces' used; expected 'space' followed by an integer, like space1}}
+cbuffer b : register(b2, spaces) {
+
+}
+
+// expected-error@+1 {{wrong argument format for hlsl attribute, use space3 instead}}
+cbuffer c : register(b2, space 3) {}
+
+// expected-error@+1 {{register space cannot be specified on global constants}}
+int d : register(c2, space3);
+
+// expected-error@+1 {{register space cannot be specified on global constants}}
+int e : register(c2, space0);
+
+// expected-error@+1 {{register space cannot be specified on global constants}}
+int f : register(c2, space00);
+
+// valid
+RWBuffer<int> g : register(u2, space0);
+
+// valid
+RWBuffer<int> h : register(u2, space0);
diff --git a/clang/test/SemaTemplate/GH18291.cpp b/clang/test/SemaTemplate/GH18291.cpp
index 820564ffa6f1..2e9754b65617 100644
--- a/clang/test/SemaTemplate/GH18291.cpp
+++ b/clang/test/SemaTemplate/GH18291.cpp
@@ -112,3 +112,12 @@ namespace static_vs_nonstatic {
}
} // namespace explicit_obj_param
} // namespace static_vs_nonstatic
+
+namespace incomplete_on_sugar {
+ template <unsigned P, class T> void f(T[P]) = delete;
+ template <unsigned P> void f(int[][P]);
+ void test() {
+ int array[1][8];
+ f<8>(array);
+ }
+} // namespace incomplete_on_sugar
diff --git a/clang/test/SemaTemplate/concepts-out-of-line-def.cpp b/clang/test/SemaTemplate/concepts-out-of-line-def.cpp
index 333187b0d74a..5450d105a6f5 100644
--- a/clang/test/SemaTemplate/concepts-out-of-line-def.cpp
+++ b/clang/test/SemaTemplate/concepts-out-of-line-def.cpp
@@ -622,3 +622,47 @@ void A<T>::method(Ts&... ts)
} {}
}
+
+namespace GH63782 {
+// GH63782 was also fixed by PR #80594, so let's add a test for it.
+
+template<bool... Vals>
+constexpr bool All = (Vals && ...);
+
+template<bool... Bs>
+class Class {
+ template<typename>
+ requires All<Bs...>
+ void Foo();
+};
+
+template<bool... Bs>
+template<typename>
+requires All<Bs...>
+void Class<Bs...>::Foo() {
+};
+
+} // namespace GH63782
+
+namespace eve {
+// Reduced from the "eve" project
+
+template <typename... Ts>
+struct tuple {
+ template <int I0> requires(I0 <= sizeof...(Ts))
+ constexpr auto split();
+};
+
+template <typename... Ts>
+template <int I0>
+requires(I0 <= sizeof...(Ts))
+constexpr auto tuple<Ts...>::split(){
+ return 0;
+}
+
+int foo() {
+ tuple<int, float> x;
+ return x.split<0>();
+}
+
+} // namespace eve
diff --git a/clang/test/clang-rename/ClassAsTemplateArgument.cpp b/clang/test/clang-rename/ClassAsTemplateArgument.cpp
deleted file mode 100644
index 2e09a5b529e7..000000000000
--- a/clang/test/clang-rename/ClassAsTemplateArgument.cpp
+++ /dev/null
@@ -1,21 +0,0 @@
-class Foo /* Test 1 */ {}; // CHECK: class Bar /* Test 1 */ {};
-
-template <typename T>
-void func() {}
-
-template <typename T>
-class Baz {};
-
-int main() {
- func<Foo>(); // CHECK: func<Bar>();
- Baz<Foo> /* Test 2 */ obj; // CHECK: Baz<Bar> /* Test 2 */ obj;
- return 0;
-}
-
-// Test 1.
-// RUN: clang-rename -offset=7 -new-name=Bar %s -- | sed 's,//.*,,' | FileCheck %s
-// Test 2.
-// RUN: clang-rename -offset=215 -new-name=Bar %s -- | sed 's,//.*,,' | FileCheck %s
-
-// To find offsets after modifying the file, use:
-// grep -Ubo 'Foo.*' <file>
diff --git a/clang/test/clang-rename/ClassFindByName.cpp b/clang/test/clang-rename/ClassFindByName.cpp
deleted file mode 100644
index 4430891ec4b1..000000000000
--- a/clang/test/clang-rename/ClassFindByName.cpp
+++ /dev/null
@@ -1,10 +0,0 @@
-class Foo { // CHECK: class Bar {
-};
-
-int main() {
- Foo *Pointer = 0; // CHECK: Bar *Pointer = 0;
- return 0;
-}
-
-// Test 1.
-// RUN: clang-rename -qualified-name=Foo -new-name=Bar %s -- | sed 's,//.*,,' | FileCheck %s
diff --git a/clang/test/clang-rename/ClassSimpleRenaming.cpp b/clang/test/clang-rename/ClassSimpleRenaming.cpp
deleted file mode 100644
index 086f55736cb7..000000000000
--- a/clang/test/clang-rename/ClassSimpleRenaming.cpp
+++ /dev/null
@@ -1,14 +0,0 @@
-class Foo /* Test 1 */ { // CHECK: class Bar /* Test 1 */ {
-public:
- void foo(int x);
-};
-
-void Foo::foo(int x) /* Test 2 */ {} // CHECK: void Bar::foo(int x) /* Test 2 */ {}
-
-// Test 1.
-// RUN: clang-rename -offset=6 -new-name=Bar %s -- | sed 's,//.*,,' | FileCheck %s
-// Test 2.
-// RUN: clang-rename -offset=109 -new-name=Bar %s -- | sed 's,//.*,,' | FileCheck %s
-
-// To find offsets after modifying the file, use:
-// grep -Ubo 'Foo.*' <file>
diff --git a/clang/test/clang-rename/ClassTestMulti.cpp b/clang/test/clang-rename/ClassTestMulti.cpp
deleted file mode 100644
index 81e65c760652..000000000000
--- a/clang/test/clang-rename/ClassTestMulti.cpp
+++ /dev/null
@@ -1,11 +0,0 @@
-class Foo1 /* Offset 1 */ { // CHECK: class Bar1 /* Offset 1 */ {
-};
-
-class Foo2 /* Offset 2 */ { // CHECK: class Bar2 /* Offset 2 */ {
-};
-
-// Test 1.
-// RUN: clang-rename -offset=6 -new-name=Bar1 -offset=76 -new-name=Bar2 %s -- | sed 's,//.*,,' | FileCheck %s
-
-// To find offsets after modifying the file, use:
-// grep -Ubo 'Foo.*' <file>
diff --git a/clang/test/clang-rename/ClassTestMultiByName.cpp b/clang/test/clang-rename/ClassTestMultiByName.cpp
deleted file mode 100644
index 61b69a1bdf4c..000000000000
--- a/clang/test/clang-rename/ClassTestMultiByName.cpp
+++ /dev/null
@@ -1,8 +0,0 @@
-class Foo1 { // CHECK: class Bar1
-};
-
-class Foo2 { // CHECK: class Bar2
-};
-
-// Test 1.
-// RUN: clang-rename -qualified-name=Foo1 -new-name=Bar1 -qualified-name=Foo2 -new-name=Bar2 %s -- | sed 's,//.*,,' | FileCheck %s
diff --git a/clang/test/clang-rename/ComplexFunctionOverride.cpp b/clang/test/clang-rename/ComplexFunctionOverride.cpp
deleted file mode 100644
index ccf3a20e5400..000000000000
--- a/clang/test/clang-rename/ComplexFunctionOverride.cpp
+++ /dev/null
@@ -1,47 +0,0 @@
-struct A {
- virtual void foo() {} /* Test 1 */ // CHECK: virtual void bar() {}
-};
-
-struct B : A {
- void foo() override {} /* Test 2 */ // CHECK: void bar() override {}
-};
-
-struct C : B {
- void foo() override {} /* Test 3 */ // CHECK: void bar() override {}
-};
-
-struct D : B {
- void foo() override {} /* Test 4 */ // CHECK: void bar() override {}
-};
-
-struct E : D {
- void foo() override {} /* Test 5 */ // CHECK: void bar() override {}
-};
-
-int main() {
- A a;
- a.foo(); // CHECK: a.bar();
- B b;
- b.foo(); // CHECK: b.bar();
- C c;
- c.foo(); // CHECK: c.bar();
- D d;
- d.foo(); // CHECK: d.bar();
- E e;
- e.foo(); // CHECK: e.bar();
- return 0;
-}
-
-// Test 1.
-// RUN: clang-rename -offset=26 -new-name=bar %s -- | sed 's,//.*,,' | FileCheck %s
-// Test 2.
-// RUN: clang-rename -offset=109 -new-name=bar %s -- | sed 's,//.*,,' | FileCheck %s
-// Test 3.
-// RUN: clang-rename -offset=201 -new-name=bar %s -- | sed 's,//.*,,' | FileCheck %s
-// Test 4.
-// RUN: clang-rename -offset=293 -new-name=bar %s -- | sed 's,//.*,,' | FileCheck %s
-// Test 5.
-// RUN: clang-rename -offset=385 -new-name=bar %s -- | sed 's,//.*,,' | FileCheck %s
-
-// To find offsets after modifying the file, use:
-// grep -Ubo 'foo.*' <file>
diff --git a/clang/test/clang-rename/ComplicatedClassType.cpp b/clang/test/clang-rename/ComplicatedClassType.cpp
deleted file mode 100644
index 880195303127..000000000000
--- a/clang/test/clang-rename/ComplicatedClassType.cpp
+++ /dev/null
@@ -1,63 +0,0 @@
-// Forward declaration.
-class Foo; /* Test 1 */ // CHECK: class Bar; /* Test 1 */
-
-class Baz {
- virtual int getValue() const = 0;
-};
-
-class Foo : public Baz { /* Test 2 */// CHECK: class Bar : public Baz {
-public:
- Foo(int value = 0) : x(value) {} // CHECK: Bar(int value = 0) : x(value) {}
-
- Foo &operator++(int) { // CHECK: Bar &operator++(int) {
- x++;
- return *this;
- }
-
- bool operator<(Foo const &rhs) { // CHECK: bool operator<(Bar const &rhs) {
- return this->x < rhs.x;
- }
-
- int getValue() const {
- return 0;
- }
-
-private:
- int x;
-};
-
-int main() {
- Foo *Pointer = 0; // CHECK: Bar *Pointer = 0;
- Foo Variable = Foo(10); // CHECK: Bar Variable = Bar(10);
- for (Foo it; it < Variable; it++) { // CHECK: for (Bar it; it < Variable; it++) {
- }
- const Foo *C = new Foo(); // CHECK: const Bar *C = new Bar();
- const_cast<Foo *>(C)->getValue(); // CHECK: const_cast<Bar *>(C)->getValue();
- Foo foo; // CHECK: Bar foo;
- const Baz &BazReference = foo;
- const Baz *BazPointer = &foo;
- dynamic_cast<const Foo &>(BazReference).getValue(); /* Test 3 */ // CHECK: dynamic_cast<const Bar &>(BazReference).getValue();
- dynamic_cast<const Foo *>(BazPointer)->getValue(); /* Test 4 */ // CHECK: dynamic_cast<const Bar *>(BazPointer)->getValue();
- reinterpret_cast<const Foo *>(BazPointer)->getValue(); /* Test 5 */ // CHECK: reinterpret_cast<const Bar *>(BazPointer)->getValue();
- static_cast<const Foo &>(BazReference).getValue(); /* Test 6 */ // CHECK: static_cast<const Bar &>(BazReference).getValue();
- static_cast<const Foo *>(BazPointer)->getValue(); /* Test 7 */ // CHECK: static_cast<const Bar *>(BazPointer)->getValue();
- return 0;
-}
-
-// Test 1.
-// RUN: clang-rename -offset=30 -new-name=Bar %s -- -frtti | sed 's,//.*,,' | FileCheck %s
-// Test 2.
-// RUN: clang-rename -offset=155 -new-name=Bar %s -- -frtti | sed 's,//.*,,' | FileCheck %s
-// Test 3.
-// RUN: clang-rename -offset=1133 -new-name=Bar %s -- -frtti | sed 's,//.*,,' | FileCheck %s
-// Test 4.
-// RUN: clang-rename -offset=1266 -new-name=Bar %s -- -frtti | sed 's,//.*,,' | FileCheck %s
-// Test 5.
-// RUN: clang-rename -offset=1402 -new-name=Bar %s -- -frtti | sed 's,//.*,,' | FileCheck %s
-// Test 6.
-// RUN: clang-rename -offset=1533 -new-name=Bar %s -- -frtti | sed 's,//.*,,' | FileCheck %s
-// Test 7.
-// RUN: clang-rename -offset=1665 -new-name=Bar %s -- -frtti | sed 's,//.*,,' | FileCheck %s
-
-// To find offsets after modifying the file, use:
-// grep -Ubo 'Foo.*' <file>
diff --git a/clang/test/clang-rename/Ctor.cpp b/clang/test/clang-rename/Ctor.cpp
deleted file mode 100644
index 9908a4123ddf..000000000000
--- a/clang/test/clang-rename/Ctor.cpp
+++ /dev/null
@@ -1,14 +0,0 @@
-class Foo { // CHECK: class Bar {
-public:
- Foo(); /* Test 1 */ // CHECK: Bar();
-};
-
-Foo::Foo() /* Test 2 */ {} // CHECK: Bar::Bar() /* Test 2 */ {}
-
-// Test 1.
-// RUN: clang-rename -offset=62 -new-name=Bar %s -- | sed 's,//.*,,' | FileCheck %s
-// Test 2.
-// RUN: clang-rename -offset=116 -new-name=Bar %s -- | sed 's,//.*,,' | FileCheck %s
-
-// To find offsets after modifying the file, use:
-// grep -Ubo 'Foo.*' <file>
diff --git a/clang/test/clang-rename/CtorInitializer.cpp b/clang/test/clang-rename/CtorInitializer.cpp
deleted file mode 100644
index fed4f5b06c27..000000000000
--- a/clang/test/clang-rename/CtorInitializer.cpp
+++ /dev/null
@@ -1,17 +0,0 @@
-class Baz {};
-
-class Qux {
- Baz Foo; /* Test 1 */ // CHECK: Baz Bar;
-public:
- Qux();
-};
-
-Qux::Qux() : Foo() /* Test 2 */ {} // CHECK: Qux::Qux() : Bar() /* Test 2 */ {}
-
-// Test 1.
-// RUN: clang-rename -offset=33 -new-name=Bar %s -- | sed 's,//.*,,' | FileCheck %s
-// Test 2.
-// RUN: clang-rename -offset=118 -new-name=Bar %s -- | sed 's,//.*,,' | FileCheck %s
-
-// To find offsets after modifying the file, use:
-// grep -Ubo 'Foo.*' <file>
diff --git a/clang/test/clang-rename/DeclRefExpr.cpp b/clang/test/clang-rename/DeclRefExpr.cpp
deleted file mode 100644
index 6462862d82ad..000000000000
--- a/clang/test/clang-rename/DeclRefExpr.cpp
+++ /dev/null
@@ -1,24 +0,0 @@
-class C {
-public:
- static int Foo; /* Test 1 */ // CHECK: static int Bar;
-};
-
-int foo(int x) { return 0; }
-#define MACRO(a) foo(a)
-
-int main() {
- C::Foo = 1; /* Test 2 */ // CHECK: C::Bar = 1;
- MACRO(C::Foo); // CHECK: MACRO(C::Bar);
- int y = C::Foo; /* Test 3 */ // CHECK: int y = C::Bar;
- return 0;
-}
-
-// Test 1.
-// RUN: clang-rename -offset=31 -new-name=Bar %s -- | sed 's,//.*,,' | FileCheck %s
-// Test 2.
-// RUN: clang-rename -offset=152 -new-name=Bar %s -- | sed 's,//.*,,' | FileCheck %s
-// Test 3.
-// RUN: clang-rename -offset=271 -new-name=Bar %s -- | sed 's,//.*,,' | FileCheck %s
-
-// To find offsets after modifying the file, use:
-// grep -Ubo 'Foo.*' <file>
diff --git a/clang/test/clang-rename/ForceMulti.cpp b/clang/test/clang-rename/ForceMulti.cpp
deleted file mode 100644
index 41983ce260c8..000000000000
--- a/clang/test/clang-rename/ForceMulti.cpp
+++ /dev/null
@@ -1,8 +0,0 @@
-class B /* Test 1 */ { // CHECK: class B2 /* Test 1 */ {
-};
-
-class D : public B /* Test 1 */ { // CHECK: class D : public B2 /* Test 1 */ {
-};
-
-// Test 1.
-// RUN: clang-rename -force -qualified-name B -new-name B2 -qualified-name E -new-name E2 %s -- | sed 's,//.*,,' | FileCheck %s
diff --git a/clang/test/clang-rename/ForwardClassDecl.cpp b/clang/test/clang-rename/ForwardClassDecl.cpp
deleted file mode 100644
index ef731a16d6e0..000000000000
--- a/clang/test/clang-rename/ForwardClassDecl.cpp
+++ /dev/null
@@ -1,4 +0,0 @@
-class Foo; // CHECK: class Bar;
-Foo *f(); // CHECK: Bar *f();
-
-// RUN: clang-rename -offset=6 -new-name=Bar %s -- | sed 's,//.*,,' | FileCheck %s
diff --git a/clang/test/clang-rename/FunctionMacro.cpp b/clang/test/clang-rename/FunctionMacro.cpp
deleted file mode 100644
index 6e87026ec706..000000000000
--- a/clang/test/clang-rename/FunctionMacro.cpp
+++ /dev/null
@@ -1,20 +0,0 @@
-#define moo foo // CHECK: #define moo macro_function
-
-int foo() /* Test 1 */ { // CHECK: int macro_function() /* Test 1 */ {
- return 42;
-}
-
-void boo(int value) {}
-
-void qoo() {
- foo(); // CHECK: macro_function();
- boo(foo()); // CHECK: boo(macro_function());
- moo();
- boo(moo());
-}
-
-// Test 1.
-// RUN: clang-rename -offset=68 -new-name=macro_function %s -- | sed 's,//.*,,' | FileCheck %s
-
-// To find offsets after modifying the file, use:
-// grep -Ubo 'foo.*' <file>
diff --git a/clang/test/clang-rename/FunctionOverride.cpp b/clang/test/clang-rename/FunctionOverride.cpp
deleted file mode 100644
index adfeb739e66d..000000000000
--- a/clang/test/clang-rename/FunctionOverride.cpp
+++ /dev/null
@@ -1,13 +0,0 @@
-class A { virtual void foo(); /* Test 1 */ }; // CHECK: class A { virtual void bar();
-class B : public A { void foo(); /* Test 2 */ }; // CHECK: class B : public A { void bar();
-class C : public B { void foo(); /* Test 3 */ }; // CHECK: class C : public B { void bar();
-
-// Test 1.
-// RUN: clang-rename -offset=23 -new-name=bar %s -- | sed 's,//.*,,' | FileCheck %s
-// Test 2.
-// RUN: clang-rename -offset=116 -new-name=bar %s -- | sed 's,//.*,,' | FileCheck %s
-// Test 3.
-// RUN: clang-rename -offset=209 -new-name=bar %s -- | sed 's,//.*,,' | FileCheck %s
-
-// To find offsets after modifying the file, use:
-// grep -Ubo 'foo.*' <file>
diff --git a/clang/test/clang-rename/FunctionTemplate.cpp b/clang/test/clang-rename/FunctionTemplate.cpp
deleted file mode 100644
index 51b2515b8894..000000000000
--- a/clang/test/clang-rename/FunctionTemplate.cpp
+++ /dev/null
@@ -1,19 +0,0 @@
-template <typename T>
-void Foo(T t); // CHECK: void Bar(T t);
-
-template <>
-void Foo(int a); // CHECK: void Bar(int a);
-
-void test() {
- Foo<double>(1); // CHECK: Bar<double>(1);
-}
-
-// Test 1.
-// RUN: clang-rename -offset=28 -new-name=Bar %s -- | sed 's,//.*,,' | FileCheck %s
-// Test 2.
-// RUN: clang-rename -offset=81 -new-name=Bar %s -- | sed 's,//.*,,' | FileCheck %s
-// Test 3.
-// RUN: clang-rename -offset=137 -new-name=Bar %s -- | sed 's,//.*,,' | FileCheck %s
-
-// To find offsets after modifying the file, use:
-// grep -Ubo 'Foo.*' <file>
diff --git a/clang/test/clang-rename/FunctionWithClassFindByName.cpp b/clang/test/clang-rename/FunctionWithClassFindByName.cpp
deleted file mode 100644
index 2cae09a1c244..000000000000
--- a/clang/test/clang-rename/FunctionWithClassFindByName.cpp
+++ /dev/null
@@ -1,12 +0,0 @@
-void foo() {
-}
-
-class Foo { // CHECK: class Bar
-};
-
-int main() {
- Foo *Pointer = 0; // CHECK: Bar *Pointer = 0;
- return 0;
-}
-
-// RUN: clang-rename -qualified-name=Foo -new-name=Bar %s -- | sed 's,//.*,,' | FileCheck %s
diff --git a/clang/test/clang-rename/IncludeHeaderWithSymbol.cpp b/clang/test/clang-rename/IncludeHeaderWithSymbol.cpp
deleted file mode 100644
index cb2baee57b89..000000000000
--- a/clang/test/clang-rename/IncludeHeaderWithSymbol.cpp
+++ /dev/null
@@ -1,10 +0,0 @@
-#include "Inputs/HeaderWithSymbol.h"
-
-int main() {
- return 0; // CHECK: {{^ return 0;}}
-}
-
-// Test 1.
-// The file IncludeHeaderWithSymbol.cpp doesn't contain the symbol Foo
-// and is expected to be written to stdout without modifications
-// RUN: clang-rename -qualified-name=Foo -new-name=Bar %s -- | FileCheck %s
diff --git a/clang/test/clang-rename/Inputs/HeaderWithSymbol.h b/clang/test/clang-rename/Inputs/HeaderWithSymbol.h
deleted file mode 100644
index 1fe02e89786c..000000000000
--- a/clang/test/clang-rename/Inputs/HeaderWithSymbol.h
+++ /dev/null
@@ -1 +0,0 @@
-struct Foo {};
diff --git a/clang/test/clang-rename/Inputs/OffsetToNewName.yaml b/clang/test/clang-rename/Inputs/OffsetToNewName.yaml
deleted file mode 100644
index d8e972880f36..000000000000
--- a/clang/test/clang-rename/Inputs/OffsetToNewName.yaml
+++ /dev/null
@@ -1,6 +0,0 @@
----
-- Offset: 6
- NewName: Bar1
-- Offset: 44
- NewName: Bar2
-...
diff --git a/clang/test/clang-rename/Inputs/QualifiedNameToNewName.yaml b/clang/test/clang-rename/Inputs/QualifiedNameToNewName.yaml
deleted file mode 100644
index 6e3783671dfa..000000000000
--- a/clang/test/clang-rename/Inputs/QualifiedNameToNewName.yaml
+++ /dev/null
@@ -1,6 +0,0 @@
----
-- QualifiedName: Foo1
- NewName: Bar1
-- QualifiedName: Foo2
- NewName: Bar2
-...
diff --git a/clang/test/clang-rename/InvalidNewName.cpp b/clang/test/clang-rename/InvalidNewName.cpp
deleted file mode 100644
index e6b38e59420a..000000000000
--- a/clang/test/clang-rename/InvalidNewName.cpp
+++ /dev/null
@@ -1,2 +0,0 @@
-// RUN: not clang-rename -new-name=class -offset=133 %s 2>&1 | FileCheck %s
-// CHECK: ERROR: new name is not a valid identifier in C++17.
diff --git a/clang/test/clang-rename/InvalidOffset.cpp b/clang/test/clang-rename/InvalidOffset.cpp
deleted file mode 100644
index 2ae04d01e4a7..000000000000
--- a/clang/test/clang-rename/InvalidOffset.cpp
+++ /dev/null
@@ -1,9 +0,0 @@
-#include "Inputs/HeaderWithSymbol.h"
-#define FOO int bar;
-FOO
-
-int foo;
-
-// RUN: not clang-rename -new-name=qux -offset=259 %s -- 2>&1 | FileCheck %s
-// CHECK-NOT: CHECK
-// CHECK: error: SourceLocation in file {{.*}}InvalidOffset.cpp at offset 259 is invalid
diff --git a/clang/test/clang-rename/InvalidQualifiedName.cpp b/clang/test/clang-rename/InvalidQualifiedName.cpp
deleted file mode 100644
index 5280e3939ccd..000000000000
--- a/clang/test/clang-rename/InvalidQualifiedName.cpp
+++ /dev/null
@@ -1,4 +0,0 @@
-struct S {
-};
-
-// RUN: clang-rename -force -qualified-name S2 -new-name=T %s --
diff --git a/clang/test/clang-rename/MemberExprMacro.cpp b/clang/test/clang-rename/MemberExprMacro.cpp
deleted file mode 100644
index 56cd8d95f6e8..000000000000
--- a/clang/test/clang-rename/MemberExprMacro.cpp
+++ /dev/null
@@ -1,22 +0,0 @@
-class Baz {
-public:
- int Foo; /* Test 1 */ // CHECK: int Bar;
-};
-
-int qux(int x) { return 0; }
-#define MACRO(a) qux(a)
-
-int main() {
- Baz baz;
- baz.Foo = 1; /* Test 2 */ // CHECK: baz.Bar = 1;
- MACRO(baz.Foo); // CHECK: MACRO(baz.Bar);
- int y = baz.Foo; // CHECK: int y = baz.Bar;
-}
-
-// Test 1.
-// RUN: clang-rename -offset=26 -new-name=Bar %s -- | sed 's,//.*,,' | FileCheck %s
-// Test 2.
-// RUN: clang-rename -offset=155 -new-name=Bar %s -- | sed 's,//.*,,' | FileCheck %s
-
-// To find offsets after modifying the file, use:
-// grep -Ubo 'Foo.*' <file>
diff --git a/clang/test/clang-rename/Namespace.cpp b/clang/test/clang-rename/Namespace.cpp
deleted file mode 100644
index ec9630fdedb6..000000000000
--- a/clang/test/clang-rename/Namespace.cpp
+++ /dev/null
@@ -1,13 +0,0 @@
-namespace gcc /* Test 1 */ { // CHECK: namespace clang /* Test 1 */ {
- int x;
-}
-
-void boo() {
- gcc::x = 42; // CHECK: clang::x = 42;
-}
-
-// Test 1.
-// RUN: clang-rename -offset=10 -new-name=clang %s -- | sed 's,//.*,,' | FileCheck %s
-
-// To find offsets after modifying the file, use:
-// grep -Ubo 'Foo.*' <file>
diff --git a/clang/test/clang-rename/NoNewName.cpp b/clang/test/clang-rename/NoNewName.cpp
deleted file mode 100644
index 4f882d83b0c1..000000000000
--- a/clang/test/clang-rename/NoNewName.cpp
+++ /dev/null
@@ -1,4 +0,0 @@
-// Check for an error while -new-name argument has not been passed to
-// clang-rename.
-// RUN: not clang-rename -offset=133 %s 2>&1 | FileCheck %s
-// CHECK: clang-rename: -new-name must be specified.
diff --git a/clang/test/clang-rename/NonExistFile.cpp b/clang/test/clang-rename/NonExistFile.cpp
deleted file mode 100644
index f45839be8047..000000000000
--- a/clang/test/clang-rename/NonExistFile.cpp
+++ /dev/null
@@ -1,2 +0,0 @@
-// RUN: not clang-rename -offset=0 -new-name=bar non-existing-file 2>&1 | FileCheck %s
-// CHECK: clang-rename: non-existing-file does not exist.
diff --git a/clang/test/clang-rename/TemplateClassInstantiation.cpp b/clang/test/clang-rename/TemplateClassInstantiation.cpp
deleted file mode 100644
index 493d0951df57..000000000000
--- a/clang/test/clang-rename/TemplateClassInstantiation.cpp
+++ /dev/null
@@ -1,42 +0,0 @@
-template <typename T>
-class Foo { /* Test 1 */ // CHECK: class Bar { /* Test 1 */
-public:
- T foo(T arg, T& ref, T* ptr) {
- T value;
- int number = 42;
- value = (T)number;
- value = static_cast<T>(number);
- return value;
- }
- static void foo(T value) {}
- T member;
-};
-
-template <typename T>
-void func() {
- Foo<T> obj; /* Test 2 */ // CHECK: Bar<T> obj;
- obj.member = T();
- Foo<T>::foo(); // CHECK: Bar<T>::foo();
-}
-
-int main() {
- Foo<int> i; /* Test 3 */ // CHECK: Bar<int> i;
- i.member = 0;
- Foo<int>::foo(0); // CHECK: Bar<int>::foo(0);
-
- Foo<bool> b; // CHECK: Bar<bool> b;
- b.member = false;
- Foo<bool>::foo(false); // CHECK: Bar<bool>::foo(false);
-
- return 0;
-}
-
-// Test 1.
-// RUN: clang-rename -offset=29 -new-name=Bar %s -- -fno-delayed-template-parsing | sed 's,//.*,,' | FileCheck %s
-// Test 2.
-// RUN: clang-rename -offset=324 -new-name=Bar %s -- -fno-delayed-template-parsing | sed 's,//.*,,' | FileCheck %s
-// Test 3.
-// RUN: clang-rename -offset=463 -new-name=Bar %s -- -fno-delayed-template-parsing | sed 's,//.*,,' | FileCheck %s
-
-// To find offsets after modifying the file, use:
-// grep -Ubo 'Foo.*' <file>
diff --git a/clang/test/clang-rename/TemplateCtor.cpp b/clang/test/clang-rename/TemplateCtor.cpp
deleted file mode 100644
index 9a59194ac3f4..000000000000
--- a/clang/test/clang-rename/TemplateCtor.cpp
+++ /dev/null
@@ -1,10 +0,0 @@
-class Foo { // CHECK: class Bar {
-public:
- template <typename T>
- Foo(); // CHECK: Bar();
-
- template <typename T>
- Foo(Foo &); // CHECK: Bar(Bar &);
-};
-
-// RUN: clang-rename -offset=6 -new-name=Bar %s -- | sed 's,//.*,,' | FileCheck %s
diff --git a/clang/test/clang-rename/TemplateTypename.cpp b/clang/test/clang-rename/TemplateTypename.cpp
deleted file mode 100644
index 559ec1f9ade7..000000000000
--- a/clang/test/clang-rename/TemplateTypename.cpp
+++ /dev/null
@@ -1,24 +0,0 @@
-template <typename T /* Test 1 */> // CHECK: template <typename U /* Test 1 */>
-class Foo {
-T foo(T arg, T& ref, T* /* Test 2 */ ptr) { // CHECK: U foo(U arg, U& ref, U* /* Test 2 */ ptr) {
- T value; // CHECK: U value;
- int number = 42;
- value = (T)number; // CHECK: value = (U)number;
- value = static_cast<T /* Test 3 */>(number); // CHECK: value = static_cast<U /* Test 3 */>(number);
- return value;
-}
-
-static void foo(T value) {} // CHECK: static void foo(U value) {}
-
-T member; // CHECK: U member;
-};
-
-// Test 1.
-// RUN: clang-rename -offset=19 -new-name=U %s -- -fno-delayed-template-parsing | sed 's,//.*,,' | FileCheck %s
-// Test 2.
-// RUN: clang-rename -offset=126 -new-name=U %s -- -fno-delayed-template-parsing | sed 's,//.*,,' | FileCheck %s
-// Test 3.
-// RUN: clang-rename -offset=392 -new-name=U %s -- -fno-delayed-template-parsing | sed 's,//.*,,' | FileCheck %s
-
-// To find offsets after modifying the file, use:
-// grep -Ubo 'T.*' <file>
diff --git a/clang/test/clang-rename/TemplatedClassFunction.cpp b/clang/test/clang-rename/TemplatedClassFunction.cpp
deleted file mode 100644
index d7f21e0847c9..000000000000
--- a/clang/test/clang-rename/TemplatedClassFunction.cpp
+++ /dev/null
@@ -1,27 +0,0 @@
-template <typename T>
-class A {
-public:
- void foo() /* Test 1 */ {} // CHECK: void bar() /* Test 1 */ {}
-};
-
-int main(int argc, char **argv) {
- A<int> a;
- A<double> b;
- A<float> c;
- a.foo(); /* Test 2 */ // CHECK: a.bar(); /* Test 2 */
- b.foo(); /* Test 3 */ // CHECK: b.bar(); /* Test 3 */
- c.foo(); /* Test 4 */ // CHECK: c.bar(); /* Test 4 */
- return 0;
-}
-
-// Test 1.
-// RUN: clang-rename -offset=48 -new-name=bar %s -- | sed 's,//.*,,' | FileCheck %s
-// Test 2.
-// RUN: clang-rename -offset=191 -new-name=bar %s -- | sed 's,//.*,,' | FileCheck %s
-// Test 3.
-// RUN: clang-rename -offset=255 -new-name=bar %s -- | sed 's,//.*,,' | FileCheck %s
-// Test 4.
-// RUN: clang-rename -offset=319 -new-name=bar %s -- | sed 's,//.*,,' | FileCheck %s
-
-// To find offsets after modifying the file, use:
-// grep -Ubo 'foo.*' <file>
diff --git a/clang/test/clang-rename/Typedef.cpp b/clang/test/clang-rename/Typedef.cpp
deleted file mode 100644
index 64d337fae22c..000000000000
--- a/clang/test/clang-rename/Typedef.cpp
+++ /dev/null
@@ -1,8 +0,0 @@
-namespace std {
-class basic_string {};
-typedef basic_string string;
-} // namespace std
-
-std::string foo(); // // CHECK: std::new_string foo();
-
-// RUN: clang-rename -offset=93 -new-name=new_string %s -- | sed 's,//.*,,' | FileCheck %s
diff --git a/clang/test/clang-rename/UserDefinedConversion.cpp b/clang/test/clang-rename/UserDefinedConversion.cpp
deleted file mode 100644
index 60f251ab4483..000000000000
--- a/clang/test/clang-rename/UserDefinedConversion.cpp
+++ /dev/null
@@ -1,26 +0,0 @@
-class Foo { /* Test 1 */ // CHECK: class Bar {
-public:
- Foo() {} // CHECK: Bar() {}
-};
-
-class Baz {
-public:
- operator Foo() /* Test 2 */ const { // CHECK: operator Bar() /* Test 2 */ const {
- Foo foo; // CHECK: Bar foo;
- return foo;
- }
-};
-
-int main() {
- Baz boo;
- Foo foo = static_cast<Foo>(boo); // CHECK: Bar foo = static_cast<Bar>(boo);
- return 0;
-}
-
-// Test 1.
-// RUN: clang-rename -offset=7 -new-name=Bar %s -- | sed 's,//.*,,' | FileCheck %s
-// Test 2.
-// RUN: clang-rename -offset=164 -new-name=Bar %s -- | sed 's,//.*,,' | FileCheck %s
-
-// To find offsets after modifying the file, use:
-// grep -Ubo 'Foo.*' <file>
diff --git a/clang/test/clang-rename/Variable.cpp b/clang/test/clang-rename/Variable.cpp
deleted file mode 100644
index d7e670fb43ee..000000000000
--- a/clang/test/clang-rename/Variable.cpp
+++ /dev/null
@@ -1,33 +0,0 @@
-#define NAMESPACE namespace A
-NAMESPACE {
-int Foo; /* Test 1 */ // CHECK: int Bar;
-}
-int Foo; // CHECK: int Foo;
-int Qux = Foo; // CHECK: int Qux = Foo;
-int Baz = A::Foo; /* Test 2 */ // CHECK: Baz = A::Bar;
-void fun() {
- struct {
- int Foo; // CHECK: int Foo;
- } b = {100};
- int Foo = 100; // CHECK: int Foo = 100;
- Baz = Foo; // CHECK: Baz = Foo;
- {
- extern int Foo; // CHECK: extern int Foo;
- Baz = Foo; // CHECK: Baz = Foo;
- Foo = A::Foo /* Test 3 */ + Baz; // CHECK: Foo = A::Bar /* Test 3 */ + Baz;
- A::Foo /* Test 4 */ = b.Foo; // CHECK: A::Bar /* Test 4 */ = b.Foo;
- }
- Foo = b.Foo; // Foo = b.Foo;
-}
-
-// Test 1.
-// RUN: clang-rename -offset=46 -new-name=Bar %s -- | sed 's,//.*,,' | FileCheck %s
-// Test 2.
-// RUN: clang-rename -offset=234 -new-name=Bar %s -- | sed 's,//.*,,' | FileCheck %s
-// Test 3.
-// RUN: clang-rename -offset=641 -new-name=Bar %s -- | sed 's,//.*,,' | FileCheck %s
-// Test 4.
-// RUN: clang-rename -offset=716 -new-name=Bar %s -- | sed 's,//.*,,' | FileCheck %s
-
-// To find offsets after modifying the file, use:
-// grep -Ubo 'Foo.*' <file>
diff --git a/clang/test/clang-rename/VariableMacro.cpp b/clang/test/clang-rename/VariableMacro.cpp
deleted file mode 100644
index 622e825d3e41..000000000000
--- a/clang/test/clang-rename/VariableMacro.cpp
+++ /dev/null
@@ -1,21 +0,0 @@
-#define Baz Foo // CHECK: #define Baz Bar
-
-void foo(int value) {}
-
-void macro() {
- int Foo; /* Test 1 */ // CHECK: int Bar;
- Foo = 42; /* Test 2 */ // CHECK: Bar = 42;
- Baz -= 0;
- foo(Foo); /* Test 3 */ // CHECK: foo(Bar);
- foo(Baz);
-}
-
-// Test 1.
-// RUN: clang-rename -offset=88 -new-name=Bar %s -- | sed 's,//.*,,' | FileCheck %s
-// Test 2.
-// RUN: clang-rename -offset=129 -new-name=Bar %s -- | sed 's,//.*,,' | FileCheck %s
-// Test 3.
-// RUN: clang-rename -offset=191 -new-name=Bar %s -- | sed 's,//.*,,' | FileCheck %s
-
-// To find offsets after modifying the file, use:
-// grep -Ubo 'Foo.*' <file>
diff --git a/clang/test/clang-rename/VariableTemplate.cpp b/clang/test/clang-rename/VariableTemplate.cpp
deleted file mode 100644
index a345ede5a7f6..000000000000
--- a/clang/test/clang-rename/VariableTemplate.cpp
+++ /dev/null
@@ -1,32 +0,0 @@
-template <typename T, int U>
-bool Foo = true; // CHECK: bool Bar = true;
-
-// explicit template specialization
-template <>
-bool Foo<int, 0> = false; // CHECK: bool Bar<int, 0> = false;
-
-// partial template specialization
-template <typename T>
-bool Foo<T, 1> = false; // bool Bar<x, 1> = false;
-
-void k() {
- // ref to the explicit template specialization
- Foo<int, 0>; // CHECK: Bar<int, 0>;
- // ref to the primary template.
- Foo<double, 2>; // CHECK: Bar<double, 2>;
-}
-
-
-// Test 1.
-// RUN: clang-rename -offset=34 -new-name=Bar %s -- | sed 's,//.*,,' | FileCheck %s
-// Test 2.
-// RUN: clang-rename -offset=128 -new-name=Bar %s -- | sed 's,//.*,,' | FileCheck %s
-// Test 3.
-// RUN: clang-rename -offset=248 -new-name=Bar %s -- | sed 's,//.*,,' | FileCheck %s
-// Test 4.
-// RUN: clang-rename -offset=357 -new-name=Bar %s -- | sed 's,//.*,,' | FileCheck %s
-// Test 5.
-// RUN: clang-rename -offset=431 -new-name=Bar %s -- | sed 's,//.*,,' | FileCheck %s
-
-// To find offsets after modifying the file, use:
-// grep -Ubo 'Foo.*' <file>
diff --git a/clang/test/clang-rename/YAMLInput.cpp b/clang/test/clang-rename/YAMLInput.cpp
deleted file mode 100644
index 55dbc6d66a5a..000000000000
--- a/clang/test/clang-rename/YAMLInput.cpp
+++ /dev/null
@@ -1,10 +0,0 @@
-class Foo1 { // CHECK: class Bar1
-};
-
-class Foo2 { // CHECK: class Bar2
-};
-
-// Test 1.
-// RUN: clang-rename -input %S/Inputs/OffsetToNewName.yaml %s -- | sed 's,//.*,,' | FileCheck %s
-// Test 2.
-// RUN: clang-rename -input %S/Inputs/QualifiedNameToNewName.yaml %s -- | sed 's,//.*,,' | FileCheck %s
diff --git a/clang/test/utils/update-verify-tests/Inputs/duplicate-diag.c b/clang/test/utils/update-verify-tests/Inputs/duplicate-diag.c
deleted file mode 100644
index 8c7e46c6eca9..000000000000
--- a/clang/test/utils/update-verify-tests/Inputs/duplicate-diag.c
+++ /dev/null
@@ -1,8 +0,0 @@
-void foo() {
- // expected-error@+1{{use of undeclared identifier 'a'}}
- a = 2; a = 2;
- b = 2; b = 2;
- // expected-error@+1 3{{use of undeclared identifier 'c'}}
- c = 2; c = 2;
- // expected-error 2{{asdf}}
-}
diff --git a/clang/test/utils/update-verify-tests/Inputs/duplicate-diag.c.expected b/clang/test/utils/update-verify-tests/Inputs/duplicate-diag.c.expected
deleted file mode 100644
index 6214ff382f44..000000000000
--- a/clang/test/utils/update-verify-tests/Inputs/duplicate-diag.c.expected
+++ /dev/null
@@ -1,8 +0,0 @@
-void foo() {
- // expected-error@+1 2{{use of undeclared identifier 'a'}}
- a = 2; a = 2;
- // expected-error@+1 2{{use of undeclared identifier 'b'}}
- b = 2; b = 2;
- // expected-error@+1 2{{use of undeclared identifier 'c'}}
- c = 2; c = 2;
-}
diff --git a/clang/test/utils/update-verify-tests/Inputs/infer-indentation.c b/clang/test/utils/update-verify-tests/Inputs/infer-indentation.c
deleted file mode 100644
index 0210ac35fd5c..000000000000
--- a/clang/test/utils/update-verify-tests/Inputs/infer-indentation.c
+++ /dev/null
@@ -1,8 +0,0 @@
-void foo() {
- // expected-error@+1 2 {{use of undeclared identifier 'a'}}
- a = 2; a = 2; b = 2; b = 2; c = 2;
- // expected-error@+1 2 {{asdf}}
- d = 2;
- e = 2; f = 2; // expected-error 2 {{use of undeclared identifier 'e'}}
-}
-
diff --git a/clang/test/utils/update-verify-tests/Inputs/infer-indentation.c.expected b/clang/test/utils/update-verify-tests/Inputs/infer-indentation.c.expected
deleted file mode 100644
index 5c5aaeeef97a..000000000000
--- a/clang/test/utils/update-verify-tests/Inputs/infer-indentation.c.expected
+++ /dev/null
@@ -1,11 +0,0 @@
-void foo() {
- // expected-error@+3 {{use of undeclared identifier 'c'}}
- // expected-error@+2 2 {{use of undeclared identifier 'b'}}
- // expected-error@+1 2 {{use of undeclared identifier 'a'}}
- a = 2; a = 2; b = 2; b = 2; c = 2;
- // expected-error@+1 {{use of undeclared identifier 'd'}}
- d = 2;
- // expected-error@+1 {{use of undeclared identifier 'f'}}
- e = 2; f = 2; // expected-error {{use of undeclared identifier 'e'}}
-}
-
diff --git a/clang/test/utils/update-verify-tests/Inputs/leave-existing-diags.c b/clang/test/utils/update-verify-tests/Inputs/leave-existing-diags.c
deleted file mode 100644
index 1aa8d088e972..000000000000
--- a/clang/test/utils/update-verify-tests/Inputs/leave-existing-diags.c
+++ /dev/null
@@ -1,11 +0,0 @@
-void foo() {
- a = 2;
- // expected-error@-1{{use of undeclared identifier 'a'}}
- b = 2;// expected-error{{use of undeclared identifier 'b'}}
- c = 2;
- // expected-error@5{{use of undeclared identifier 'c'}}
- d = 2; // expected-error-re{{use of {{.*}} identifier 'd'}}
-
- e = 2; // error to trigger mismatch
-}
-
diff --git a/clang/test/utils/update-verify-tests/Inputs/leave-existing-diags.c.expected b/clang/test/utils/update-verify-tests/Inputs/leave-existing-diags.c.expected
deleted file mode 100644
index 6b621061bbfb..000000000000
--- a/clang/test/utils/update-verify-tests/Inputs/leave-existing-diags.c.expected
+++ /dev/null
@@ -1,12 +0,0 @@
-void foo() {
- a = 2;
- // expected-error@-1{{use of undeclared identifier 'a'}}
- b = 2;// expected-error{{use of undeclared identifier 'b'}}
- c = 2;
- // expected-error@5{{use of undeclared identifier 'c'}}
- d = 2; // expected-error-re{{use of {{.*}} identifier 'd'}}
-
- // expected-error@+1{{use of undeclared identifier 'e'}}
- e = 2; // error to trigger mismatch
-}
-
diff --git a/clang/test/utils/update-verify-tests/Inputs/multiple-errors.c b/clang/test/utils/update-verify-tests/Inputs/multiple-errors.c
deleted file mode 100644
index e230e0a337bf..000000000000
--- a/clang/test/utils/update-verify-tests/Inputs/multiple-errors.c
+++ /dev/null
@@ -1,6 +0,0 @@
-void foo() {
- a = 2;
- b = 2;
-
- c = 2;
-}
diff --git a/clang/test/utils/update-verify-tests/Inputs/multiple-errors.c.expected b/clang/test/utils/update-verify-tests/Inputs/multiple-errors.c.expected
deleted file mode 100644
index 27dc1f30a26f..000000000000
--- a/clang/test/utils/update-verify-tests/Inputs/multiple-errors.c.expected
+++ /dev/null
@@ -1,9 +0,0 @@
-void foo() {
- // expected-error@+1{{use of undeclared identifier 'a'}}
- a = 2;
- // expected-error@+1{{use of undeclared identifier 'b'}}
- b = 2;
-
- // expected-error@+1{{use of undeclared identifier 'c'}}
- c = 2;
-}
diff --git a/clang/test/utils/update-verify-tests/Inputs/multiple-missing-errors-same-line.c b/clang/test/utils/update-verify-tests/Inputs/multiple-missing-errors-same-line.c
deleted file mode 100644
index 03f723d44bbe..000000000000
--- a/clang/test/utils/update-verify-tests/Inputs/multiple-missing-errors-same-line.c
+++ /dev/null
@@ -1,8 +0,0 @@
-void foo() {
- a = 2; b = 2; c = 2;
-}
-
-void bar() {
- x = 2; y = 2; z = 2;
- // expected-error@-1{{use of undeclared identifier 'x'}}
-}
diff --git a/clang/test/utils/update-verify-tests/Inputs/multiple-missing-errors-same-line.c.expected b/clang/test/utils/update-verify-tests/Inputs/multiple-missing-errors-same-line.c.expected
deleted file mode 100644
index 24b57f4353d9..000000000000
--- a/clang/test/utils/update-verify-tests/Inputs/multiple-missing-errors-same-line.c.expected
+++ /dev/null
@@ -1,13 +0,0 @@
-void foo() {
- // expected-error@+3{{use of undeclared identifier 'c'}}
- // expected-error@+2{{use of undeclared identifier 'b'}}
- // expected-error@+1{{use of undeclared identifier 'a'}}
- a = 2; b = 2; c = 2;
-}
-
-void bar() {
- x = 2; y = 2; z = 2;
- // expected-error@-1{{use of undeclared identifier 'x'}}
- // expected-error@-2{{use of undeclared identifier 'y'}}
- // expected-error@-3{{use of undeclared identifier 'z'}}
-}
diff --git a/clang/test/utils/update-verify-tests/Inputs/no-checks.c b/clang/test/utils/update-verify-tests/Inputs/no-checks.c
deleted file mode 100644
index 8fd1f7cd3337..000000000000
--- a/clang/test/utils/update-verify-tests/Inputs/no-checks.c
+++ /dev/null
@@ -1,3 +0,0 @@
-void foo() {
- bar = 2;
-}
diff --git a/clang/test/utils/update-verify-tests/Inputs/no-checks.c.expected b/clang/test/utils/update-verify-tests/Inputs/no-checks.c.expected
deleted file mode 100644
index e80548fbe50f..000000000000
--- a/clang/test/utils/update-verify-tests/Inputs/no-checks.c.expected
+++ /dev/null
@@ -1,4 +0,0 @@
-void foo() {
- // expected-error@+1{{use of undeclared identifier 'bar'}}
- bar = 2;
-}
diff --git a/clang/test/utils/update-verify-tests/Inputs/no-diags.c b/clang/test/utils/update-verify-tests/Inputs/no-diags.c
deleted file mode 100644
index 66d169be4394..000000000000
--- a/clang/test/utils/update-verify-tests/Inputs/no-diags.c
+++ /dev/null
@@ -1,5 +0,0 @@
-void foo() {
- // expected-error@+1{{asdf}}
- int a = 2;
-}
-
diff --git a/clang/test/utils/update-verify-tests/Inputs/no-diags.c.expected b/clang/test/utils/update-verify-tests/Inputs/no-diags.c.expected
deleted file mode 100644
index 052302849457..000000000000
--- a/clang/test/utils/update-verify-tests/Inputs/no-diags.c.expected
+++ /dev/null
@@ -1,5 +0,0 @@
-// expected-no-diagnostics
-void foo() {
- int a = 2;
-}
-
diff --git a/clang/test/utils/update-verify-tests/Inputs/no-expected-diags.c b/clang/test/utils/update-verify-tests/Inputs/no-expected-diags.c
deleted file mode 100644
index 78b72e1357da..000000000000
--- a/clang/test/utils/update-verify-tests/Inputs/no-expected-diags.c
+++ /dev/null
@@ -1,4 +0,0 @@
-// expected-no-diagnostics
-void foo() {
- a = 2;
-}
diff --git a/clang/test/utils/update-verify-tests/Inputs/no-expected-diags.c.expected b/clang/test/utils/update-verify-tests/Inputs/no-expected-diags.c.expected
deleted file mode 100644
index d948ffce5618..000000000000
--- a/clang/test/utils/update-verify-tests/Inputs/no-expected-diags.c.expected
+++ /dev/null
@@ -1,4 +0,0 @@
-void foo() {
- // expected-error@+1{{use of undeclared identifier 'a'}}
- a = 2;
-}
diff --git a/clang/test/utils/update-verify-tests/Inputs/non-default-prefix.c b/clang/test/utils/update-verify-tests/Inputs/non-default-prefix.c
deleted file mode 100644
index 3d63eaf0f1b8..000000000000
--- a/clang/test/utils/update-verify-tests/Inputs/non-default-prefix.c
+++ /dev/null
@@ -1,5 +0,0 @@
-void foo() {
- a = 2; // check-error{{asdf}}
- // expected-error@-1{ignored}}
-}
-
diff --git a/clang/test/utils/update-verify-tests/Inputs/non-default-prefix.c.expected b/clang/test/utils/update-verify-tests/Inputs/non-default-prefix.c.expected
deleted file mode 100644
index a877f8692212..000000000000
--- a/clang/test/utils/update-verify-tests/Inputs/non-default-prefix.c.expected
+++ /dev/null
@@ -1,5 +0,0 @@
-void foo() {
- a = 2; // check-error{{use of undeclared identifier 'a'}}
- // expected-error@-1{ignored}}
-}
-
diff --git a/clang/test/utils/update-verify-tests/Inputs/update-same-line.c b/clang/test/utils/update-verify-tests/Inputs/update-same-line.c
deleted file mode 100644
index 5278ce0c57c3..000000000000
--- a/clang/test/utils/update-verify-tests/Inputs/update-same-line.c
+++ /dev/null
@@ -1,4 +0,0 @@
-void foo() {
- bar = 2; // expected-error {{asdf}}
-}
-
diff --git a/clang/test/utils/update-verify-tests/Inputs/update-same-line.c.expected b/clang/test/utils/update-verify-tests/Inputs/update-same-line.c.expected
deleted file mode 100644
index 8ba47f788319..000000000000
--- a/clang/test/utils/update-verify-tests/Inputs/update-same-line.c.expected
+++ /dev/null
@@ -1,4 +0,0 @@
-void foo() {
- bar = 2; // expected-error {{use of undeclared identifier 'bar'}}
-}
-
diff --git a/clang/test/utils/update-verify-tests/Inputs/update-single-check.c b/clang/test/utils/update-verify-tests/Inputs/update-single-check.c
deleted file mode 100644
index 20b011bfc3d7..000000000000
--- a/clang/test/utils/update-verify-tests/Inputs/update-single-check.c
+++ /dev/null
@@ -1,4 +0,0 @@
-void foo() {
- // expected-error@+1{{asdf}}
- bar = 2;
-}
diff --git a/clang/test/utils/update-verify-tests/Inputs/update-single-check.c.expected b/clang/test/utils/update-verify-tests/Inputs/update-single-check.c.expected
deleted file mode 100644
index e80548fbe50f..000000000000
--- a/clang/test/utils/update-verify-tests/Inputs/update-single-check.c.expected
+++ /dev/null
@@ -1,4 +0,0 @@
-void foo() {
- // expected-error@+1{{use of undeclared identifier 'bar'}}
- bar = 2;
-}
diff --git a/clang/test/utils/update-verify-tests/duplicate-diag.test b/clang/test/utils/update-verify-tests/duplicate-diag.test
deleted file mode 100644
index db4b0fd86f08..000000000000
--- a/clang/test/utils/update-verify-tests/duplicate-diag.test
+++ /dev/null
@@ -1,4 +0,0 @@
-# RUN: cp %S/Inputs/duplicate-diag.c %t.c && not %clang_cc1 -verify %t.c 2>&1 | %update-verify-tests
-# RUN: diff --strip-trailing-cr %S/Inputs/duplicate-diag.c.expected %t.c
-# RUN: %clang_cc1 -verify %t.c
-
diff --git a/clang/test/utils/update-verify-tests/infer-indentation.test b/clang/test/utils/update-verify-tests/infer-indentation.test
deleted file mode 100644
index bd94dce4844e..000000000000
--- a/clang/test/utils/update-verify-tests/infer-indentation.test
+++ /dev/null
@@ -1,3 +0,0 @@
-# RUN: cp %S/Inputs/infer-indentation.c %t.c && not %clang_cc1 -verify %t.c 2>&1 | %update-verify-tests
-# RUN: diff --strip-trailing-cr %S/Inputs/infer-indentation.c.expected %t.c
-# RUN: %clang_cc1 -verify %t.c
diff --git a/clang/test/utils/update-verify-tests/leave-existing-diags.test b/clang/test/utils/update-verify-tests/leave-existing-diags.test
deleted file mode 100644
index 8a723f157bf8..000000000000
--- a/clang/test/utils/update-verify-tests/leave-existing-diags.test
+++ /dev/null
@@ -1,4 +0,0 @@
-# RUN: cp %S/Inputs/leave-existing-diags.c %t.c && not %clang_cc1 -verify %t.c 2>&1 | %update-verify-tests
-# RUN: diff --strip-trailing-cr %S/Inputs/leave-existing-diags.c.expected %t.c
-# RUN: %clang_cc1 -verify %t.c
-
diff --git a/clang/test/utils/update-verify-tests/lit.local.cfg b/clang/test/utils/update-verify-tests/lit.local.cfg
deleted file mode 100644
index b0eebf337da5..000000000000
--- a/clang/test/utils/update-verify-tests/lit.local.cfg
+++ /dev/null
@@ -1,28 +0,0 @@
-import lit.util
-
-# python 2.7 backwards compatibility
-try:
- from shlex import quote as shell_quote
-except ImportError:
- from pipes import quote as shell_quote
-
-if config.standalone_build:
- # These tests require the update-verify-tests.py script from the clang
- # source tree, so skip these tests if we are doing standalone builds.
- config.unsupported = True
-else:
- config.suffixes = [".test"]
-
- script_path = os.path.join(
- config.clang_src_dir, "utils", "update-verify-tests.py"
- )
- python = shell_quote(config.python_executable)
- config.substitutions.append(
- (
- "%update-verify-tests",
- "%s %s" % (python, shell_quote(script_path)),
- )
- )
- # AIX 'diff' command doesn't support --strip-trailing-cr, but the internal
- # python implementation does, so use that for cross platform compatibility
- config.test_format = lit.formats.ShTest()
diff --git a/clang/test/utils/update-verify-tests/multiple-errors.test b/clang/test/utils/update-verify-tests/multiple-errors.test
deleted file mode 100644
index 1fcb6b7f2ca0..000000000000
--- a/clang/test/utils/update-verify-tests/multiple-errors.test
+++ /dev/null
@@ -1,3 +0,0 @@
-# RUN: cp %S/Inputs/multiple-errors.c %t.c && not %clang_cc1 -verify %t.c 2>&1 | %update-verify-tests
-# RUN: diff --strip-trailing-cr %S/Inputs/multiple-errors.c.expected %t.c
-# RUN: %clang_cc1 -verify %t.c
diff --git a/clang/test/utils/update-verify-tests/multiple-missing-errors-same-line.test b/clang/test/utils/update-verify-tests/multiple-missing-errors-same-line.test
deleted file mode 100644
index 00338d7595cb..000000000000
--- a/clang/test/utils/update-verify-tests/multiple-missing-errors-same-line.test
+++ /dev/null
@@ -1,3 +0,0 @@
-# RUN: cp %S/Inputs/multiple-missing-errors-same-line.c %t.c && not %clang_cc1 -verify %t.c 2>&1 | %update-verify-tests
-# RUN: diff --strip-trailing-cr %S/Inputs/multiple-missing-errors-same-line.c.expected %t.c
-# RUN: %clang_cc1 -verify %t.c
diff --git a/clang/test/utils/update-verify-tests/no-checks.test b/clang/test/utils/update-verify-tests/no-checks.test
deleted file mode 100644
index 5fdbdcbac952..000000000000
--- a/clang/test/utils/update-verify-tests/no-checks.test
+++ /dev/null
@@ -1,3 +0,0 @@
-# RUN: cp %S/Inputs/no-checks.c %t.c && not %clang_cc1 -verify %t.c 2>&1 | %update-verify-tests
-# RUN: diff --strip-trailing-cr %S/Inputs/no-checks.c.expected %t.c
-# RUN: %clang_cc1 -verify %t.c
diff --git a/clang/test/utils/update-verify-tests/no-diags.test b/clang/test/utils/update-verify-tests/no-diags.test
deleted file mode 100644
index 825fd0219deb..000000000000
--- a/clang/test/utils/update-verify-tests/no-diags.test
+++ /dev/null
@@ -1,4 +0,0 @@
-# RUN: cp %S/Inputs/no-diags.c %t.c && not %clang_cc1 -verify %t.c 2>&1 | %update-verify-tests
-# RUN: diff --strip-trailing-cr %S/Inputs/no-diags.c.expected %t.c
-# RUN: %clang_cc1 -verify %t.c
-
diff --git a/clang/test/utils/update-verify-tests/no-expected-diags.test b/clang/test/utils/update-verify-tests/no-expected-diags.test
deleted file mode 100644
index be475c190da1..000000000000
--- a/clang/test/utils/update-verify-tests/no-expected-diags.test
+++ /dev/null
@@ -1,4 +0,0 @@
-# RUN: cp %S/Inputs/no-expected-diags.c %t.c && not %clang_cc1 -verify %t.c 2>&1 | %update-verify-tests
-# RUN: diff --strip-trailing-cr %S/Inputs/no-expected-diags.c.expected %t.c
-# RUN: %clang_cc1 -verify %t.c
-
diff --git a/clang/test/utils/update-verify-tests/non-default-prefix.test b/clang/test/utils/update-verify-tests/non-default-prefix.test
deleted file mode 100644
index 594dba4174d2..000000000000
--- a/clang/test/utils/update-verify-tests/non-default-prefix.test
+++ /dev/null
@@ -1,4 +0,0 @@
-# RUN: cp %S/Inputs/non-default-prefix.c %t.c && not %clang_cc1 -verify=check %t.c 2>&1 | %update-verify-tests --prefix check
-# RUN: diff --strip-trailing-cr %S/Inputs/non-default-prefix.c.expected %t.c
-# RUN: %clang_cc1 -verify=check %t.c
-
diff --git a/clang/test/utils/update-verify-tests/update-same-line.test b/clang/test/utils/update-verify-tests/update-same-line.test
deleted file mode 100644
index b7e5d7a574ec..000000000000
--- a/clang/test/utils/update-verify-tests/update-same-line.test
+++ /dev/null
@@ -1,4 +0,0 @@
-# RUN: cp %S/Inputs/update-same-line.c %t.c && not %clang_cc1 -verify %t.c 2>&1 | %update-verify-tests
-# RUN: diff --strip-trailing-cr %S/Inputs/update-same-line.c.expected %t.c
-# RUN: %clang_cc1 -verify %t.c
-
diff --git a/clang/test/utils/update-verify-tests/update-single-check.test b/clang/test/utils/update-verify-tests/update-single-check.test
deleted file mode 100644
index b958d66b099d..000000000000
--- a/clang/test/utils/update-verify-tests/update-single-check.test
+++ /dev/null
@@ -1,3 +0,0 @@
-# RUN: cp %S/Inputs/update-single-check.c %t.c && not %clang_cc1 -verify %t.c 2>&1 | %update-verify-tests
-# RUN: diff --strip-trailing-cr %S/Inputs/update-single-check.c.expected %t.c
-# RUN: %clang_cc1 -verify %t.c
diff --git a/clang/tools/CMakeLists.txt b/clang/tools/CMakeLists.txt
index f588a3634ee6..9a3512712a28 100644
--- a/clang/tools/CMakeLists.txt
+++ b/clang/tools/CMakeLists.txt
@@ -19,7 +19,6 @@ endif()
add_clang_subdirectory(c-index-test)
-add_clang_subdirectory(clang-rename)
add_clang_subdirectory(clang-refactor)
# For MinGW we only enable shared library if LLVM_LINK_LLVM_DYLIB=ON.
# Without that option resulting library is too close to 2^16 DLL exports limit.
diff --git a/clang/tools/clang-rename/CMakeLists.txt b/clang/tools/clang-rename/CMakeLists.txt
deleted file mode 100644
index f4c4e520520d..000000000000
--- a/clang/tools/clang-rename/CMakeLists.txt
+++ /dev/null
@@ -1,26 +0,0 @@
-set(LLVM_LINK_COMPONENTS
- Option
- Support
- )
-
-add_clang_tool(clang-rename
- ClangRename.cpp
- )
-
-clang_target_link_libraries(clang-rename
- PRIVATE
- clangBasic
- clangFrontend
- clangRewrite
- clangSerialization
- clangTooling
- clangToolingCore
- clangToolingRefactoring
- )
-
-install(FILES clang-rename.py
- DESTINATION "${CMAKE_INSTALL_DATADIR}/clang"
- COMPONENT clang-rename)
-install(FILES clang-rename.el
- DESTINATION "${CMAKE_INSTALL_DATADIR}/clang"
- COMPONENT clang-rename)
diff --git a/clang/tools/clang-rename/ClangRename.cpp b/clang/tools/clang-rename/ClangRename.cpp
deleted file mode 100644
index f2ac0c4360e0..000000000000
--- a/clang/tools/clang-rename/ClangRename.cpp
+++ /dev/null
@@ -1,242 +0,0 @@
-//===--- tools/extra/clang-rename/ClangRename.cpp - Clang rename tool -----===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-///
-/// \file
-/// This file implements a clang-rename tool that automatically finds and
-/// renames symbols in C++ code.
-///
-//===----------------------------------------------------------------------===//
-
-#include "clang/Basic/Diagnostic.h"
-#include "clang/Basic/DiagnosticOptions.h"
-#include "clang/Basic/FileManager.h"
-#include "clang/Basic/IdentifierTable.h"
-#include "clang/Basic/LangOptions.h"
-#include "clang/Basic/SourceManager.h"
-#include "clang/Basic/TokenKinds.h"
-#include "clang/Frontend/TextDiagnosticPrinter.h"
-#include "clang/Rewrite/Core/Rewriter.h"
-#include "clang/Tooling/CommonOptionsParser.h"
-#include "clang/Tooling/Refactoring.h"
-#include "clang/Tooling/Refactoring/Rename/RenamingAction.h"
-#include "clang/Tooling/Refactoring/Rename/USRFindingAction.h"
-#include "clang/Tooling/ReplacementsYaml.h"
-#include "clang/Tooling/Tooling.h"
-#include "llvm/ADT/IntrusiveRefCntPtr.h"
-#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/FileSystem.h"
-#include "llvm/Support/YAMLTraits.h"
-#include "llvm/Support/raw_ostream.h"
-#include <string>
-#include <system_error>
-
-using namespace llvm;
-using namespace clang;
-
-/// An oldname -> newname rename.
-struct RenameAllInfo {
- unsigned Offset = 0;
- std::string QualifiedName;
- std::string NewName;
-};
-
-LLVM_YAML_IS_SEQUENCE_VECTOR(RenameAllInfo)
-
-namespace llvm {
-namespace yaml {
-
-/// Specialized MappingTraits to describe how a RenameAllInfo is
-/// (de)serialized.
-template <> struct MappingTraits<RenameAllInfo> {
- static void mapping(IO &IO, RenameAllInfo &Info) {
- IO.mapOptional("Offset", Info.Offset);
- IO.mapOptional("QualifiedName", Info.QualifiedName);
- IO.mapRequired("NewName", Info.NewName);
- }
-};
-
-} // end namespace yaml
-} // end namespace llvm
-
-static cl::OptionCategory ClangRenameOptions("clang-rename common options");
-
-static cl::list<unsigned> SymbolOffsets(
- "offset",
- cl::desc("Locates the symbol by offset as opposed to <line>:<column>."),
- cl::cat(ClangRenameOptions));
-static cl::opt<bool> Inplace("i", cl::desc("Overwrite edited <file>s."),
- cl::cat(ClangRenameOptions));
-static cl::list<std::string>
- QualifiedNames("qualified-name",
- cl::desc("The fully qualified name of the symbol."),
- cl::cat(ClangRenameOptions));
-
-static cl::list<std::string>
- NewNames("new-name", cl::desc("The new name to change the symbol to."),
- cl::cat(ClangRenameOptions));
-static cl::opt<bool> PrintName(
- "pn",
- cl::desc("Print the found symbol's name prior to renaming to stderr."),
- cl::cat(ClangRenameOptions));
-static cl::opt<bool> PrintLocations(
- "pl", cl::desc("Print the locations affected by renaming to stderr."),
- cl::cat(ClangRenameOptions));
-static cl::opt<std::string>
- ExportFixes("export-fixes",
- cl::desc("YAML file to store suggested fixes in."),
- cl::value_desc("filename"), cl::cat(ClangRenameOptions));
-static cl::opt<std::string>
- Input("input", cl::desc("YAML file to load oldname-newname pairs from."),
- cl::Optional, cl::cat(ClangRenameOptions));
-static cl::opt<bool> Force("force",
- cl::desc("Ignore nonexistent qualified names."),
- cl::cat(ClangRenameOptions));
-
-int main(int argc, const char **argv) {
- auto ExpectedParser =
- tooling::CommonOptionsParser::create(argc, argv, ClangRenameOptions);
- if (!ExpectedParser) {
- llvm::errs() << ExpectedParser.takeError();
- return 1;
- }
- tooling::CommonOptionsParser &OP = ExpectedParser.get();
-
- if (!Input.empty()) {
- // Populate QualifiedNames and NewNames from a YAML file.
- ErrorOr<std::unique_ptr<MemoryBuffer>> Buffer =
- llvm::MemoryBuffer::getFile(Input);
- if (!Buffer) {
- errs() << "clang-rename: failed to read " << Input << ": "
- << Buffer.getError().message() << "\n";
- return 1;
- }
-
- std::vector<RenameAllInfo> Infos;
- llvm::yaml::Input YAML(Buffer.get()->getBuffer());
- YAML >> Infos;
- for (const auto &Info : Infos) {
- if (!Info.QualifiedName.empty())
- QualifiedNames.push_back(Info.QualifiedName);
- else
- SymbolOffsets.push_back(Info.Offset);
- NewNames.push_back(Info.NewName);
- }
- }
-
- // Check the arguments for correctness.
- if (NewNames.empty()) {
- errs() << "clang-rename: -new-name must be specified.\n\n";
- return 1;
- }
-
- if (SymbolOffsets.empty() == QualifiedNames.empty()) {
- errs() << "clang-rename: -offset and -qualified-name can't be present at "
- "the same time.\n";
- return 1;
- }
-
- // Check if NewNames is a valid identifier in C++17.
- LangOptions Options;
- Options.CPlusPlus = true;
- Options.CPlusPlus17 = true;
- IdentifierTable Table(Options);
- for (const auto &NewName : NewNames) {
- auto NewNameTokKind = Table.get(NewName).getTokenID();
- if (!tok::isAnyIdentifier(NewNameTokKind)) {
- errs() << "ERROR: new name is not a valid identifier in C++17.\n\n";
- return 1;
- }
- }
-
- if (SymbolOffsets.size() + QualifiedNames.size() != NewNames.size()) {
- errs() << "clang-rename: number of symbol offsets(" << SymbolOffsets.size()
- << ") + number of qualified names (" << QualifiedNames.size()
- << ") must be equal to number of new names(" << NewNames.size()
- << ").\n\n";
- cl::PrintHelpMessage();
- return 1;
- }
-
- auto Files = OP.getSourcePathList();
- tooling::RefactoringTool Tool(OP.getCompilations(), Files);
- tooling::USRFindingAction FindingAction(SymbolOffsets, QualifiedNames, Force);
- Tool.run(tooling::newFrontendActionFactory(&FindingAction).get());
- const std::vector<std::vector<std::string>> &USRList =
- FindingAction.getUSRList();
- const std::vector<std::string> &PrevNames = FindingAction.getUSRSpellings();
- if (PrintName) {
- for (const auto &PrevName : PrevNames) {
- outs() << "clang-rename found name: " << PrevName << '\n';
- }
- }
-
- if (FindingAction.errorOccurred()) {
- // Diagnostics are already issued at this point.
- return 1;
- }
-
- // Perform the renaming.
- tooling::RenamingAction RenameAction(NewNames, PrevNames, USRList,
- Tool.getReplacements(), PrintLocations);
- std::unique_ptr<tooling::FrontendActionFactory> Factory =
- tooling::newFrontendActionFactory(&RenameAction);
- int ExitCode;
-
- if (Inplace) {
- ExitCode = Tool.runAndSave(Factory.get());
- } else {
- ExitCode = Tool.run(Factory.get());
-
- if (!ExportFixes.empty()) {
- std::error_code EC;
- llvm::raw_fd_ostream OS(ExportFixes, EC, llvm::sys::fs::OF_None);
- if (EC) {
- llvm::errs() << "Error opening output file: " << EC.message() << '\n';
- return 1;
- }
-
- // Export replacements.
- tooling::TranslationUnitReplacements TUR;
- const auto &FileToReplacements = Tool.getReplacements();
- for (const auto &Entry : FileToReplacements)
- TUR.Replacements.insert(TUR.Replacements.end(), Entry.second.begin(),
- Entry.second.end());
-
- yaml::Output YAML(OS);
- YAML << TUR;
- OS.close();
- return 0;
- }
-
- // Write every file to stdout. Right now we just barf the files without any
- // indication of which files start where, other than that we print the files
- // in the same order we see them.
- LangOptions DefaultLangOptions;
- IntrusiveRefCntPtr<DiagnosticOptions> DiagOpts = new DiagnosticOptions();
- TextDiagnosticPrinter DiagnosticPrinter(errs(), &*DiagOpts);
- DiagnosticsEngine Diagnostics(
- IntrusiveRefCntPtr<DiagnosticIDs>(new DiagnosticIDs()), &*DiagOpts,
- &DiagnosticPrinter, false);
- auto &FileMgr = Tool.getFiles();
- SourceManager Sources(Diagnostics, FileMgr);
- Rewriter Rewrite(Sources, DefaultLangOptions);
-
- Tool.applyAllReplacements(Rewrite);
- for (const auto &File : Files) {
- auto Entry = FileMgr.getOptionalFileRef(File);
- if (!Entry) {
- errs() << "clang-rename: " << File << " does not exist.\n";
- return 1;
- }
- const auto ID = Sources.getOrCreateFileID(*Entry, SrcMgr::C_User);
- Rewrite.getEditBuffer(ID).write(outs());
- }
- }
-
- return ExitCode;
-}
diff --git a/clang/tools/clang-rename/clang-rename.el b/clang/tools/clang-rename/clang-rename.el
deleted file mode 100644
index 3f47c11e2c75..000000000000
--- a/clang/tools/clang-rename/clang-rename.el
+++ /dev/null
@@ -1,80 +0,0 @@
-;;; clang-rename.el --- Renames every occurrence of a symbol found at <offset>. -*- lexical-binding: t; -*-
-
-;; Version: 0.1.0
-;; Keywords: tools, c
-
-;;; Commentary:
-
-;; To install clang-rename.el make sure the directory of this file is in your
-;; `load-path' and add
-;;
-;; (require 'clang-rename)
-;;
-;; to your .emacs configuration.
-
-;;; Code:
-
-(defgroup clang-rename nil
- "Integration with clang-rename"
- :group 'c)
-
-(defcustom clang-rename-binary "clang-rename"
- "Path to clang-rename executable."
- :type '(file :must-match t)
- :group 'clang-rename)
-
-;;;###autoload
-(defun clang-rename (new-name)
- "Rename all instances of the symbol at point to NEW-NAME using clang-rename."
- (interactive "sEnter a new name: ")
- (save-some-buffers :all)
- ;; clang-rename should not be combined with other operations when undoing.
- (undo-boundary)
- (let ((output-buffer (get-buffer-create "*clang-rename*")))
- (with-current-buffer output-buffer (erase-buffer))
- (let ((exit-code (call-process
- clang-rename-binary nil output-buffer nil
- (format "-offset=%d"
- ;; clang-rename wants file (byte) offsets, not
- ;; buffer (character) positions.
- (clang-rename--bufferpos-to-filepos
- ;; Emacs treats one character after a symbol as
- ;; part of the symbol, but clang-rename doesn’t.
- ;; Use the beginning of the current symbol, if
- ;; available, to resolve the inconsistency.
- (or (car (bounds-of-thing-at-point 'symbol))
- (point))
- 'exact))
- (format "-new-name=%s" new-name)
- "-i" (buffer-file-name))))
- (if (and (integerp exit-code) (zerop exit-code))
- ;; Success; revert current buffer so it gets the modifications.
- (progn
- (kill-buffer output-buffer)
- (revert-buffer :ignore-auto :noconfirm :preserve-modes))
- ;; Failure; append exit code to output buffer and display it.
- (let ((message (clang-rename--format-message
- "clang-rename failed with %s %s"
- (if (integerp exit-code) "exit status" "signal")
- exit-code)))
- (with-current-buffer output-buffer
- (insert ?\n message ?\n))
- (message "%s" message)
- (display-buffer output-buffer))))))
-
-(defalias 'clang-rename--bufferpos-to-filepos
- (if (fboundp 'bufferpos-to-filepos)
- 'bufferpos-to-filepos
- ;; Emacs 24 doesn’t have ‘bufferpos-to-filepos’, simulate it using
- ;; ‘position-bytes’.
- (lambda (position &optional _quality _coding-system)
- (1- (position-bytes position)))))
-
-;; ‘format-message’ is new in Emacs 25.1. Provide a fallback for older
-;; versions.
-(defalias 'clang-rename--format-message
- (if (fboundp 'format-message) 'format-message 'format))
-
-(provide 'clang-rename)
-
-;;; clang-rename.el ends here
diff --git a/clang/tools/clang-rename/clang-rename.py b/clang/tools/clang-rename/clang-rename.py
deleted file mode 100644
index 1cbabaf859a5..000000000000
--- a/clang/tools/clang-rename/clang-rename.py
+++ /dev/null
@@ -1,70 +0,0 @@
-"""
-Minimal clang-rename integration with Vim.
-
-Before installing make sure one of the following is satisfied:
-
-* clang-rename is in your PATH
-* `g:clang_rename_path` in ~/.vimrc points to valid clang-rename executable
-* `binary` in clang-rename.py points to valid to clang-rename executable
-
-To install, simply put this into your ~/.vimrc for python2 support
-
- noremap <leader>cr :pyf <path-to>/clang-rename.py<cr>
-
-For python3 use the following command (note the change from :pyf to :py3f)
-
- noremap <leader>cr :py3f <path-to>/clang-rename.py<cr>
-
-IMPORTANT NOTE: Before running the tool, make sure you saved the file.
-
-All you have to do now is to place a cursor on a variable/function/class which
-you would like to rename and press '<leader>cr'. You will be prompted for a new
-name if the cursor points to a valid symbol.
-"""
-
-from __future__ import absolute_import, division, print_function
-import vim
-import subprocess
-import sys
-
-
-def main():
- binary = "clang-rename"
- if vim.eval('exists("g:clang_rename_path")') == "1":
- binary = vim.eval("g:clang_rename_path")
-
- # Get arguments for clang-rename binary.
- offset = int(vim.eval('line2byte(line("."))+col(".")')) - 2
- if offset < 0:
- print(
- "Couldn't determine cursor position. Is your file empty?", file=sys.stderr
- )
- return
- filename = vim.current.buffer.name
-
- new_name_request_message = "type new name:"
- new_name = vim.eval("input('{}\n')".format(new_name_request_message))
-
- # Call clang-rename.
- command = [
- binary,
- filename,
- "-i",
- "-offset",
- str(offset),
- "-new-name",
- str(new_name),
- ]
- # FIXME: make it possible to run the tool on unsaved file.
- p = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
- stdout, stderr = p.communicate()
-
- if stderr:
- print(stderr)
-
- # Reload all buffers in Vim.
- vim.command("checktime")
-
-
-if __name__ == "__main__":
- main()
diff --git a/clang/unittests/Basic/DiagnosticTest.cpp b/clang/unittests/Basic/DiagnosticTest.cpp
index 746901939171..691d74f697f2 100644
--- a/clang/unittests/Basic/DiagnosticTest.cpp
+++ b/clang/unittests/Basic/DiagnosticTest.cpp
@@ -17,9 +17,6 @@ using namespace llvm;
using namespace clang;
void clang::DiagnosticsTestHelper(DiagnosticsEngine &diag) {
- unsigned delayedDiagID = 0U;
-
- EXPECT_EQ(diag.DelayedDiagID, delayedDiagID);
EXPECT_FALSE(diag.DiagStates.empty());
EXPECT_TRUE(diag.DiagStatesByLoc.empty());
EXPECT_TRUE(diag.DiagStateOnPushStack.empty());
@@ -83,6 +80,21 @@ TEST(DiagnosticTest, fatalsAsError) {
}
}
+TEST(DiagnosticTest, tooManyErrorsIsAlwaysFatal) {
+ DiagnosticsEngine Diags(new DiagnosticIDs(), new DiagnosticOptions,
+ new IgnoringDiagConsumer());
+ Diags.setFatalsAsError(true);
+
+ // Report a fatal_too_many_errors diagnostic to ensure that still
+ // acts as a fatal error despite downgrading fatal errors to errors.
+ Diags.Report(diag::fatal_too_many_errors);
+ EXPECT_TRUE(Diags.hasFatalErrorOccurred());
+
+ // Ensure that the severity of that diagnostic is really "fatal".
+ EXPECT_EQ(Diags.getDiagnosticLevel(diag::fatal_too_many_errors, {}),
+ DiagnosticsEngine::Level::Fatal);
+}
+
// Check that soft RESET works as intended
TEST(DiagnosticTest, softReset) {
DiagnosticsEngine Diags(new DiagnosticIDs(), new DiagnosticOptions,
@@ -104,7 +116,6 @@ TEST(DiagnosticTest, softReset) {
// Check for private variables of DiagnosticsEngine differentiating soft reset
DiagnosticsTestHelper(Diags);
- EXPECT_FALSE(Diags.isDiagnosticInFlight());
EXPECT_TRUE(Diags.isLastDiagnosticIgnored());
}
diff --git a/clang/unittests/CMakeLists.txt b/clang/unittests/CMakeLists.txt
index e43ee7bfa88a..85d265426ec8 100644
--- a/clang/unittests/CMakeLists.txt
+++ b/clang/unittests/CMakeLists.txt
@@ -48,7 +48,6 @@ if(NOT WIN32 AND CLANG_TOOL_LIBCLANG_BUILD)
add_subdirectory(libclang)
endif()
add_subdirectory(DirectoryWatcher)
-add_subdirectory(Rename)
add_subdirectory(Index)
add_subdirectory(InstallAPI)
add_subdirectory(Serialization)
diff --git a/clang/unittests/Driver/DXCModeTest.cpp b/clang/unittests/Driver/DXCModeTest.cpp
index 41ab30bc81d5..2a079a62f1bc 100644
--- a/clang/unittests/Driver/DXCModeTest.cpp
+++ b/clang/unittests/Driver/DXCModeTest.cpp
@@ -51,7 +51,6 @@ static void validateTargetProfile(
EXPECT_TRUE(C);
EXPECT_EQ(Diags.getNumErrors(), NumOfErrors);
EXPECT_STREQ(DiagConsumer->Errors.back().c_str(), ExpectError.data());
- Diags.Clear();
DiagConsumer->clear();
}
@@ -160,7 +159,6 @@ TEST(DxcModeTest, ValidatorVersionValidation) {
DiagConsumer->Errors.back().c_str(),
"invalid validator version : 0.1; if validator major version is 0, "
"minor version must also be 0");
- Diags.Clear();
DiagConsumer->clear();
Args = TheDriver.ParseArgStrings({"-validator-version", "1"}, false,
@@ -176,7 +174,6 @@ TEST(DxcModeTest, ValidatorVersionValidation) {
EXPECT_STREQ(DiagConsumer->Errors.back().c_str(),
"invalid validator version : 1; format of validator version is "
"\"<major>.<minor>\" (ex:\"1.4\")");
- Diags.Clear();
DiagConsumer->clear();
Args = TheDriver.ParseArgStrings({"-validator-version", "-Tlib_6_7"}, false,
@@ -193,7 +190,6 @@ TEST(DxcModeTest, ValidatorVersionValidation) {
DiagConsumer->Errors.back().c_str(),
"invalid validator version : -Tlib_6_7; format of validator version is "
"\"<major>.<minor>\" (ex:\"1.4\")");
- Diags.Clear();
DiagConsumer->clear();
Args = TheDriver.ParseArgStrings({"-validator-version", "foo"}, false,
@@ -210,7 +206,6 @@ TEST(DxcModeTest, ValidatorVersionValidation) {
DiagConsumer->Errors.back().c_str(),
"invalid validator version : foo; format of validator version is "
"\"<major>.<minor>\" (ex:\"1.4\")");
- Diags.Clear();
DiagConsumer->clear();
}
diff --git a/clang/unittests/Format/FormatTest.cpp b/clang/unittests/Format/FormatTest.cpp
index 5ebf0d7068dd..53aa93a7a4fb 100644
--- a/clang/unittests/Format/FormatTest.cpp
+++ b/clang/unittests/Format/FormatTest.cpp
@@ -17282,6 +17282,12 @@ TEST_F(FormatTest, ConfigurableSpacesInParens) {
Spaces.SpacesInParens = FormatStyle::SIPO_Custom;
Spaces.SpacesInParensOptions = {};
Spaces.SpacesInParensOptions.Other = true;
+
+ EXPECT_FALSE(Spaces.SpacesInParensOptions.InConditionalStatements);
+ verifyFormat("if (a)\n"
+ " return;",
+ Spaces);
+
Spaces.SpacesInParensOptions.InConditionalStatements = true;
verifyFormat("do_something( ::globalVar );", Spaces);
verifyFormat("call( x, y, z );", Spaces);
@@ -27577,6 +27583,12 @@ TEST_F(FormatTest, InsertNewlineAtEOF) {
verifyNoChange("int i;\n", Style);
verifyFormat("int i;\n", "int i;", Style);
+
+ constexpr StringRef Code{"namespace {\n"
+ "int i;\n"
+ "} // namespace"};
+ verifyFormat(Code.str() + '\n', Code, Style,
+ {tooling::Range(19, 13)}); // line 3
}
TEST_F(FormatTest, KeepEmptyLinesAtEOF) {
diff --git a/clang/unittests/Format/FormatTestBase.h b/clang/unittests/Format/FormatTestBase.h
index 33110ca5d9ed..9d9472964fd3 100644
--- a/clang/unittests/Format/FormatTestBase.h
+++ b/clang/unittests/Format/FormatTestBase.h
@@ -61,23 +61,23 @@ protected:
return *Result;
}
- FormatStyle getStyleWithColumns(FormatStyle Style, unsigned ColumnLimit) {
+ FormatStyle getStyleWithColumns(FormatStyle Style,
+ unsigned ColumnLimit) const {
Style.ColumnLimit = ColumnLimit;
return Style;
}
- FormatStyle getLLVMStyleWithColumns(unsigned ColumnLimit) {
+ FormatStyle getLLVMStyleWithColumns(unsigned ColumnLimit) const {
return getStyleWithColumns(getLLVMStyle(), ColumnLimit);
}
- FormatStyle getGoogleStyleWithColumns(unsigned ColumnLimit) {
+ FormatStyle getGoogleStyleWithColumns(unsigned ColumnLimit) const {
return getStyleWithColumns(getGoogleStyle(), ColumnLimit);
}
- FormatStyle getTextProtoStyleWithColumns(unsigned ColumnLimit) {
- FormatStyle Style = getGoogleStyle(FormatStyle::FormatStyle::LK_TextProto);
- Style.ColumnLimit = ColumnLimit;
- return Style;
+ FormatStyle getTextProtoStyleWithColumns(unsigned ColumnLimit) const {
+ return getStyleWithColumns(getGoogleStyle(FormatStyle::LK_TextProto),
+ ColumnLimit);
}
bool _verifyFormat(const char *File, int Line, StringRef Expected,
diff --git a/clang/unittests/Format/FormatTestProto.cpp b/clang/unittests/Format/FormatTestProto.cpp
index 5adb532ae4a4..30ce57c545ec 100644
--- a/clang/unittests/Format/FormatTestProto.cpp
+++ b/clang/unittests/Format/FormatTestProto.cpp
@@ -516,8 +516,6 @@ TEST_F(FormatTestProto, AcceptsOperatorAsKeyInOptions) {
}
TEST_F(FormatTestProto, BreaksEntriesOfSubmessagesContainingSubmessages) {
- FormatStyle Style = getGoogleStyle(FormatStyle::LK_TextProto);
- Style.ColumnLimit = 60;
// The column limit allows for the keys submessage to be put on 1 line, but we
// break it since it contains a submessage an another entry.
verifyFormat("option (MyProto.options) = {\n"
diff --git a/clang/unittests/Format/FormatTestTextProto.cpp b/clang/unittests/Format/FormatTestTextProto.cpp
index 23f46202a346..fd65c9a58db5 100644
--- a/clang/unittests/Format/FormatTestTextProto.cpp
+++ b/clang/unittests/Format/FormatTestTextProto.cpp
@@ -18,9 +18,7 @@ namespace {
class FormatTestTextProto : public FormatTestBase {
protected:
virtual FormatStyle getDefaultStyle() const override {
- FormatStyle Style = getGoogleStyle(FormatStyle::LK_TextProto);
- Style.ColumnLimit = 60; // To make writing tests easier.
- return Style;
+ return getTextProtoStyleWithColumns(60);
}
};
@@ -126,7 +124,8 @@ TEST_F(FormatTestTextProto, ImplicitStringLiteralConcatenation) {
" 'bbbbb'");
verifyFormat("field_a: \"aaaaa\"\n"
" \"bbbbb\"");
- FormatStyle Style = getGoogleStyle(FormatStyle::LK_TextProto);
+
+ auto Style = getDefaultStyle();
Style.AlwaysBreakBeforeMultilineStrings = true;
verifyFormat("field_a:\n"
" 'aaaaa'\n"
@@ -359,46 +358,40 @@ TEST_F(FormatTestTextProto, KeepsCommentsIndentedInList) {
}
TEST_F(FormatTestTextProto, UnderstandsHashComments) {
- FormatStyle Style = getGoogleStyle(FormatStyle::LK_TextProto);
- Style.ColumnLimit = 60; // To make writing tests easier.
- EXPECT_EQ("aaa: 100\n"
- "## this is a double-hash comment.\n"
- "bb: 100\n"
- "## another double-hash comment.\n"
- "### a triple-hash comment\n"
- "cc: 200\n"
- "### another triple-hash comment\n"
- "#### a quadriple-hash comment\n"
- "dd: 100\n"
- "#### another quadriple-hash comment",
- format("aaa: 100\n"
- "##this is a double-hash comment.\n"
- "bb: 100\n"
- "## another double-hash comment.\n"
- "###a triple-hash comment\n"
- "cc: 200\n"
- "### another triple-hash comment\n"
- "####a quadriple-hash comment\n"
- "dd: 100\n"
- "#### another quadriple-hash comment",
- Style));
+ auto Style = getDefaultStyle();
+
+ verifyFormat("aaa: 100\n"
+ "## this is a double-hash comment.\n"
+ "bb: 100\n"
+ "## another double-hash comment.\n"
+ "### a triple-hash comment\n"
+ "cc: 200\n"
+ "### another triple-hash comment\n"
+ "#### a quadriple-hash comment\n"
+ "dd: 100\n"
+ "#### another quadriple-hash comment",
+ "aaa: 100\n"
+ "##this is a double-hash comment.\n"
+ "bb: 100\n"
+ "## another double-hash comment.\n"
+ "###a triple-hash comment\n"
+ "cc: 200\n"
+ "### another triple-hash comment\n"
+ "####a quadriple-hash comment\n"
+ "dd: 100\n"
+ "#### another quadriple-hash comment",
+ Style);
// Ensure we support a common pattern for naming sections.
- EXPECT_EQ("##############\n"
- "# section name\n"
- "##############",
- format("##############\n"
- "# section name\n"
- "##############",
- Style));
-
- EXPECT_EQ("///////////////\n"
- "// section name\n"
- "///////////////",
- format("///////////////\n"
- "// section name\n"
- "///////////////",
- Style));
+ verifyFormat("##############\n"
+ "# section name\n"
+ "##############",
+ Style);
+
+ verifyFormat("///////////////\n"
+ "// section name\n"
+ "///////////////",
+ Style);
}
TEST_F(FormatTestTextProto, FormatsExtensions) {
@@ -519,8 +512,8 @@ TEST_F(FormatTestTextProto, FormatsRepeatedListInitializers) {
" ]\n"
"}\n"
"key: value");
- FormatStyle Style = getGoogleStyle(FormatStyle::LK_TextProto);
- Style.ColumnLimit = 60; // To make writing tests easier.
+
+ auto Style = getDefaultStyle();
Style.Cpp11BracedListStyle = true;
verifyFormat("keys: [1]", Style);
}
@@ -544,7 +537,6 @@ TEST_F(FormatTestTextProto, BreaksConsecutiveStringLiterals) {
}
TEST_F(FormatTestTextProto, PutsMultipleEntriesInExtensionsOnNewlines) {
- FormatStyle Style = getGoogleStyle(FormatStyle::LK_TextProto);
verifyFormat("pppppppppp: {\n"
" ssssss: \"http://example.com/blahblahblah\"\n"
" ppppppp: \"sssss/MMMMMMMMMMMM\"\n"
@@ -556,12 +548,10 @@ TEST_F(FormatTestTextProto, PutsMultipleEntriesInExtensionsOnNewlines) {
" key: value\n"
" }\n"
"}",
- Style);
+ getGoogleStyle(FormatStyle::LK_TextProto));
}
TEST_F(FormatTestTextProto, BreaksAfterBraceFollowedByClosingBraceOnNextLine) {
- FormatStyle Style = getGoogleStyle(FormatStyle::LK_TextProto);
- Style.ColumnLimit = 60;
verifyFormat("keys: [\n"
" data: { item: 'aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa' }\n"
"]");
@@ -571,10 +561,6 @@ TEST_F(FormatTestTextProto, BreaksAfterBraceFollowedByClosingBraceOnNextLine) {
}
TEST_F(FormatTestTextProto, BreaksEntriesOfSubmessagesContainingSubmessages) {
- FormatStyle Style = getGoogleStyle(FormatStyle::LK_TextProto);
- Style.ColumnLimit = 60;
- // The column limit allows for the keys submessage to be put on 1 line, but we
- // break it since it contains a submessage an another entry.
verifyFormat("key: valueeeeeeee\n"
"keys: {\n"
" item: 'aaaaaaaaaaaaaaaa'\n"
diff --git a/clang/unittests/Format/TokenAnnotatorTest.cpp b/clang/unittests/Format/TokenAnnotatorTest.cpp
index baa5ab0ac5e4..34c03d668a9a 100644
--- a/clang/unittests/Format/TokenAnnotatorTest.cpp
+++ b/clang/unittests/Format/TokenAnnotatorTest.cpp
@@ -781,6 +781,14 @@ TEST_F(TokenAnnotatorTest, UnderstandsCasts) {
EXPECT_TOKEN(Tokens[9], tok::r_paren, TT_Unknown);
EXPECT_TOKEN(Tokens[10], tok::minus, TT_BinaryOperator);
+ Tokens = annotate("return (::Type)(1 + 2);");
+ ASSERT_EQ(Tokens.size(), 12u) << Tokens;
+ EXPECT_TOKEN(Tokens[4], tok::r_paren, TT_CastRParen);
+
+ Tokens = annotate("return (Namespace::Class)(1 + 2);");
+ ASSERT_EQ(Tokens.size(), 13u) << Tokens;
+ EXPECT_TOKEN(Tokens[5], tok::r_paren, TT_CastRParen);
+
auto Style = getLLVMStyle();
Style.TypeNames.push_back("Foo");
Tokens = annotate("#define FOO(bar) foo((Foo)&bar)", Style);
diff --git a/clang/unittests/Rename/CMakeLists.txt b/clang/unittests/Rename/CMakeLists.txt
deleted file mode 100644
index 6ec0c521551c..000000000000
--- a/clang/unittests/Rename/CMakeLists.txt
+++ /dev/null
@@ -1,29 +0,0 @@
-set(LLVM_LINK_COMPONENTS
- FrontendOpenMP
- support
- )
-
-# We'd like clang/unittests/Tooling/RewriterTestContext.h in the test.
-include_directories(${CLANG_SOURCE_DIR})
-
-add_clang_unittest(ClangRenameTests
- RenameClassTest.cpp
- RenameEnumTest.cpp
- RenameAliasTest.cpp
- RenameMemberTest.cpp
- RenameFunctionTest.cpp
- )
-
-clang_target_link_libraries(ClangRenameTests
- PRIVATE
- clangAST
- clangASTMatchers
- clangBasic
- clangFormat
- clangFrontend
- clangRewrite
- clangSerialization
- clangTooling
- clangToolingCore
- clangToolingRefactoring
- )
diff --git a/clang/unittests/Rename/ClangRenameTest.h b/clang/unittests/Rename/ClangRenameTest.h
deleted file mode 100644
index 64033657b579..000000000000
--- a/clang/unittests/Rename/ClangRenameTest.h
+++ /dev/null
@@ -1,116 +0,0 @@
-//===-- ClangRenameTests.cpp - clang-rename unit tests --------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_CLANG_UNITTESTS_RENAME_CLANGRENAMETEST_H
-#define LLVM_CLANG_UNITTESTS_RENAME_CLANGRENAMETEST_H
-
-#include "unittests/Tooling/RewriterTestContext.h"
-#include "clang/ASTMatchers/ASTMatchFinder.h"
-#include "clang/Basic/FileManager.h"
-#include "clang/Basic/FileSystemOptions.h"
-#include "clang/Format/Format.h"
-#include "clang/Frontend/CompilerInstance.h"
-#include "clang/Frontend/PCHContainerOperations.h"
-#include "clang/Tooling/Refactoring.h"
-#include "clang/Tooling/Refactoring/Rename/RenamingAction.h"
-#include "clang/Tooling/Refactoring/Rename/USRFindingAction.h"
-#include "clang/Tooling/Tooling.h"
-#include "llvm/ADT/IntrusiveRefCntPtr.h"
-#include "llvm/ADT/StringRef.h"
-#include "llvm/Support/Format.h"
-#include "llvm/Support/MemoryBuffer.h"
-#include "llvm/Support/VirtualFileSystem.h"
-#include "gtest/gtest.h"
-#include <memory>
-#include <string>
-#include <vector>
-
-namespace clang {
-namespace clang_rename {
-namespace test {
-
-struct Case {
- std::string Before;
- std::string After;
- std::string OldName;
- std::string NewName;
-};
-
-class ClangRenameTest : public testing::Test,
- public testing::WithParamInterface<Case> {
-protected:
- void AppendToHeader(StringRef Code) { HeaderContent += Code.str(); }
-
- std::string runClangRenameOnCode(llvm::StringRef Code,
- llvm::StringRef OldName,
- llvm::StringRef NewName) {
- std::string NewCode;
- llvm::raw_string_ostream(NewCode) << llvm::format(
- "#include \"%s\"\n%s", HeaderName.c_str(), Code.str().c_str());
- tooling::FileContentMappings FileContents = {{HeaderName, HeaderContent},
- {CCName, NewCode}};
- clang::RewriterTestContext Context;
- Context.createInMemoryFile(HeaderName, HeaderContent);
- clang::FileID InputFileID = Context.createInMemoryFile(CCName, NewCode);
-
- tooling::USRFindingAction FindingAction({}, {std::string(OldName)}, false);
- std::unique_ptr<tooling::FrontendActionFactory> USRFindingActionFactory =
- tooling::newFrontendActionFactory(&FindingAction);
-
- if (!tooling::runToolOnCodeWithArgs(
- USRFindingActionFactory->create(), NewCode, {"-std=c++11"}, CCName,
- "clang-rename", std::make_shared<PCHContainerOperations>(),
- FileContents))
- return "";
-
- const std::vector<std::vector<std::string>> &USRList =
- FindingAction.getUSRList();
- std::vector<std::string> NewNames = {std::string(NewName)};
- std::map<std::string, tooling::Replacements> FileToReplacements;
- tooling::QualifiedRenamingAction RenameAction(NewNames, USRList,
- FileToReplacements);
- auto RenameActionFactory = tooling::newFrontendActionFactory(&RenameAction);
- if (!tooling::runToolOnCodeWithArgs(
- RenameActionFactory->create(), NewCode, {"-std=c++11"}, CCName,
- "clang-rename", std::make_shared<PCHContainerOperations>(),
- FileContents))
- return "";
-
- formatAndApplyAllReplacements(FileToReplacements, Context.Rewrite, "llvm");
- return Context.getRewrittenText(InputFileID);
- }
-
- void CompareSnippets(StringRef Expected, StringRef Actual) {
- std::string ExpectedCode;
- llvm::raw_string_ostream(ExpectedCode) << llvm::format(
- "#include \"%s\"\n%s", HeaderName.c_str(), Expected.str().c_str());
- EXPECT_EQ(format(ExpectedCode), format(Actual));
- }
-
- std::string format(llvm::StringRef Code) {
- tooling::Replacements Replaces = format::reformat(
- format::getLLVMStyle(), Code, {tooling::Range(0, Code.size())});
- auto ChangedCode = tooling::applyAllReplacements(Code, Replaces);
- EXPECT_TRUE(static_cast<bool>(ChangedCode));
- if (!ChangedCode) {
- llvm::errs() << llvm::toString(ChangedCode.takeError());
- return "";
- }
- return *ChangedCode;
- }
-
- std::string HeaderContent;
- std::string HeaderName = "header.h";
- std::string CCName = "input.cc";
-};
-
-} // namespace test
-} // namespace clang_rename
-} // namesdpace clang
-
-#endif
diff --git a/clang/unittests/Rename/RenameAliasTest.cpp b/clang/unittests/Rename/RenameAliasTest.cpp
deleted file mode 100644
index 50fa2c104263..000000000000
--- a/clang/unittests/Rename/RenameAliasTest.cpp
+++ /dev/null
@@ -1,303 +0,0 @@
-//===-- RenameAliasTest.cpp - unit tests for renaming alias ---------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#include "ClangRenameTest.h"
-
-namespace clang {
-namespace clang_rename {
-namespace test {
-namespace {
-
-class RenameAliasTest : public ClangRenameTest {
-public:
- RenameAliasTest() {
- AppendToHeader(R"(
- #define MACRO(x) x
- namespace some_ns {
- class A {
- public:
- void foo() {}
- struct Nested {
- enum NestedEnum {
- E1, E2,
- };
- };
- };
- } // namespace some_ns
- namespace a {
- typedef some_ns::A TA;
- using UA = some_ns::A;
- } // namespace a
- namespace b {
- typedef some_ns::A TA;
- using UA = some_ns::A;
- }
- template <typename T> class ptr {};
- template <typename T>
-
- using TPtr = ptr<int>;
- )");
- }
-};
-
-INSTANTIATE_TEST_SUITE_P(
- RenameAliasTests, RenameAliasTest,
- testing::ValuesIn(std::vector<Case>({
- // basic functions
- {"void f(a::TA a1) {}", "void f(b::TB a1) {}", "a::TA", "b::TB"},
- {"void f(a::UA a1) {}", "void f(b::UB a1) {}", "a::UA", "b::UB"},
- {"void f(a::TA* a1) {}", "void f(b::TB* a1) {}", "a::TA", "b::TB"},
- {"void f(a::TA** a1) {}", "void f(b::TB** a1) {}", "a::TA", "b::TB"},
- {"a::TA f() { return a::TA(); }", "b::TB f() { return b::TB(); }",
- "a::TA", "b::TB"},
- {"a::TA f() { return a::UA(); }", "b::TB f() { return a::UA(); }",
- "a::TA", "b::TB"},
- {"a::TA f() { return a::UA(); }", "a::TA f() { return b::UB(); }",
- "a::UA", "b::UB"},
- {"void f() { a::TA a; }", "void f() { b::TB a; }", "a::TA", "b::TB"},
- {"void f(const a::TA& a1) {}", "void f(const b::TB& a1) {}", "a::TA",
- "b::TB"},
- {"void f(const a::UA& a1) {}", "void f(const b::UB& a1) {}", "a::UA",
- "b::UB"},
- {"void f(const a::TA* a1) {}", "void f(const b::TB* a1) {}", "a::TA",
- "b::TB"},
- {"namespace a { void f(TA a1) {} }",
- "namespace a { void f(b::TB a1) {} }", "a::TA", "b::TB"},
- {"void f(MACRO(a::TA) a1) {}", "void f(MACRO(b::TB) a1) {}", "a::TA",
- "b::TB"},
- {"void f(MACRO(a::TA a1)) {}", "void f(MACRO(b::TB a1)) {}", "a::TA",
- "b::TB"},
-
- // shorten/add namespace.
- {"namespace b { void f(a::UA a1) {} }",
- "namespace b {void f(UB a1) {} }", "a::UA", "b::UB"},
- {"namespace a { void f(UA a1) {} }",
- "namespace a {void f(b::UB a1) {} }", "a::UA", "b::UB"},
-
- // use namespace and typedefs
- {"struct S { using T = a::TA; T a_; };",
- "struct S { using T = b::TB; T a_; };", "a::TA", "b::TB"},
- {"using T = a::TA; T gA;", "using T = b::TB; T gA;", "a::TA", "b::TB"},
- {"using T = a::UA; T gA;", "using T = b::UB; T gA;", "a::UA", "b::UB"},
- {"typedef a::TA T; T gA;", "typedef b::TB T; T gA;", "a::TA", "b::TB"},
- {"typedef a::UA T; T gA;", "typedef b::UB T; T gA;", "a::UA", "b::UB"},
- {"typedef MACRO(a::TA) T; T gA;", "typedef MACRO(b::TB) T; T gA;",
- "a::TA", "b::TB"},
-
- // types in using shadows.
- {"using a::TA; TA gA;", "using b::TB; b::TB gA;", "a::TA", "b::TB"},
- {"using a::UA; UA gA;", "using b::UB; b::UB gA;", "a::UA", "b::UB"},
-
- // struct members and other oddities
- {"struct S : public a::TA {};", "struct S : public b::TB {};", "a::TA",
- "b::TB"},
- {"struct S : public a::UA {};", "struct S : public b::UB {};", "a::UA",
- "b::UB"},
- {"struct F { void f(a::TA a1) {} };",
- "struct F { void f(b::TB a1) {} };", "a::TA", "b::TB"},
- {"struct F { a::TA a_; };", "struct F { b::TB a_; };", "a::TA",
- "b::TB"},
- {"struct F { ptr<a::TA> a_; };", "struct F { ptr<b::TB> a_; };",
- "a::TA", "b::TB"},
- {"struct F { ptr<a::UA> a_; };", "struct F { ptr<b::UB> a_; };",
- "a::UA", "b::UB"},
-
- // types in nested name specifiers
- {"void f() { a::TA::Nested ne; }", "void f() { b::TB::Nested ne; }",
- "a::TA", "b::TB"},
- {"void f() { a::UA::Nested ne; }", "void f() { b::UB::Nested ne; }",
- "a::UA", "b::UB"},
- {"void f() { a::TA::Nested::NestedEnum e; }",
- "void f() { b::TB::Nested::NestedEnum e; }", "a::TA", "b::TB"},
- {"void f() { auto e = a::TA::Nested::NestedEnum::E1; }",
- "void f() { auto e = b::TB::Nested::NestedEnum::E1; }", "a::TA",
- "b::TB"},
- {"void f() { auto e = a::TA::Nested::E1; }",
- "void f() { auto e = b::TB::Nested::E1; }", "a::TA", "b::TB"},
-
- // templates
- {"template <typename T> struct Foo { T t; }; void f() { Foo<a::TA> "
- "foo; }",
- "template <typename T> struct Foo { T t; }; void f() { Foo<b::TB> "
- "foo; }",
- "a::TA", "b::TB"},
- {"template <typename T> struct Foo { a::TA a; };",
- "template <typename T> struct Foo { b::TB a; };", "a::TA", "b::TB"},
- {"template <typename T> void f(T t) {} void g() { f<a::TA>(a::TA()); }",
- "template <typename T> void f(T t) {} void g() { f<b::TB>(b::TB()); }",
- "a::TA", "b::TB"},
- {"template <typename T> void f(T t) {} void g() { f<a::UA>(a::UA()); }",
- "template <typename T> void f(T t) {} void g() { f<b::UB>(b::UB()); }",
- "a::UA", "b::UB"},
- {"template <typename T> int f() { return 1; } template <> int "
- "f<a::TA>() { return 2; } int g() { return f<a::TA>(); }",
- "template <typename T> int f() { return 1; } template <> int "
- "f<b::TB>() { return 2; } int g() { return f<b::TB>(); }",
- "a::TA", "b::TB"},
- {"struct Foo { template <typename T> T foo(); }; void g() { Foo f; "
- "auto a = f.template foo<a::TA>(); }",
- "struct Foo { template <typename T> T foo(); }; void g() { Foo f; "
- "auto a = f.template foo<b::TB>(); }",
- "a::TA", "b::TB"},
- {"struct Foo { template <typename T> T foo(); }; void g() { Foo f; "
- "auto a = f.template foo<a::UA>(); }",
- "struct Foo { template <typename T> T foo(); }; void g() { Foo f; "
- "auto a = f.template foo<b::UB>(); }",
- "a::UA", "b::UB"},
-
- // The following two templates are distilled from regressions found in
- // unique_ptr<> and type_traits.h
- {"template <typename T> struct outer { typedef T type; type Baz(); }; "
- "outer<a::TA> g_A;",
- "template <typename T> struct outer { typedef T type; type Baz(); }; "
- "outer<b::TB> g_A;",
- "a::TA", "b::TB"},
- {"template <typename T> struct nested { typedef T type; }; template "
- "<typename T> struct outer { typename nested<T>::type Foo(); }; "
- "outer<a::TA> g_A;",
- "template <typename T> struct nested { typedef T type; }; template "
- "<typename T> struct outer { typename nested<T>::type Foo(); }; "
- "outer<b::TB> g_A;",
- "a::TA", "b::TB"},
-
- // macros
- {"#define FOO(T, t) T t\nvoid f() { FOO(a::TA, a1); FOO(a::TA, a2); }",
- "#define FOO(T, t) T t\nvoid f() { FOO(b::TB, a1); FOO(b::TB, a2); }",
- "a::TA", "b::TB"},
- {"#define FOO(n) a::TA n\nvoid f() { FOO(a1); FOO(a2); }",
- "#define FOO(n) b::TB n\nvoid f() { FOO(a1); FOO(a2); }", "a::TA",
- "b::TB"},
- {"#define FOO(n) a::UA n\nvoid f() { FOO(a1); FOO(a2); }",
- "#define FOO(n) b::UB n\nvoid f() { FOO(a1); FOO(a2); }", "a::UA",
- "b::UB"},
-
- // Pointer to member functions
- {"auto gA = &a::TA::foo;", "auto gA = &b::TB::foo;", "a::TA", "b::TB"},
- {"using a::TA; auto gA = &TA::foo;",
- "using b::TB; auto gA = &b::TB::foo;", "a::TA", "b::TB"},
- {"typedef a::TA T; auto gA = &T::foo;",
- "typedef b::TB T; auto gA = &T::foo;", "a::TA", "b::TB"},
- {"auto gA = &MACRO(a::TA)::foo;", "auto gA = &MACRO(b::TB)::foo;",
- "a::TA", "b::TB"},
-
- // templated using alias.
- {"void f(TPtr<int> p) {}", "void f(NewTPtr<int> p) {}", "TPtr",
- "NewTPtr"},
- {"void f(::TPtr<int> p) {}", "void f(::NewTPtr<int> p) {}", "TPtr",
- "NewTPtr"},
- })));
-
-TEST_P(RenameAliasTest, RenameAlias) {
- auto Param = GetParam();
- assert(!Param.OldName.empty());
- assert(!Param.NewName.empty());
- std::string Actual =
- runClangRenameOnCode(Param.Before, Param.OldName, Param.NewName);
- CompareSnippets(Param.After, Actual);
-}
-
-TEST_F(RenameAliasTest, RenameTypedefDefinitions) {
- std::string Before = R"(
- class X {};
- typedef X TOld;
- )";
- std::string Expected = R"(
- class X {};
- typedef X TNew;
- )";
- std::string After = runClangRenameOnCode(Before, "TOld", "TNew");
- CompareSnippets(Expected, After);
-}
-
-TEST_F(RenameAliasTest, RenameUsingAliasDefinitions) {
- std::string Before = R"(
- class X {};
- using UOld = X;
- )";
- std::string Expected = R"(
- class X {};
- using UNew = X;
- )";
- std::string After = runClangRenameOnCode(Before, "UOld", "UNew");
- CompareSnippets(Expected, After);
-}
-
-TEST_F(RenameAliasTest, RenameTemplatedAliasDefinitions) {
- std::string Before = R"(
- template <typename T>
- class X { T t; };
-
- template <typename T>
- using Old = X<T>;
- )";
- std::string Expected = R"(
- template <typename T>
- class X { T t; };
-
- template <typename T>
- using New = X<T>;
- )";
- std::string After = runClangRenameOnCode(Before, "Old", "New");
- CompareSnippets(Expected, After);
-}
-
-TEST_F(RenameAliasTest, RenameAliasesInNamespaces) {
- std::string Before = R"(
- namespace x { class X {}; }
- namespace ns {
- using UOld = x::X;
- }
- )";
- std::string Expected = R"(
- namespace x { class X {}; }
- namespace ns {
- using UNew = x::X;
- }
- )";
- std::string After = runClangRenameOnCode(Before, "ns::UOld", "ns::UNew");
- CompareSnippets(Expected, After);
-}
-
-TEST_F(RenameAliasTest, AliasesInMacros) {
- std::string Before = R"(
- namespace x { class Old {}; }
- namespace ns {
- #define REF(alias) alias alias_var;
-
- #define ALIAS(old) \
- using old##Alias = x::old; \
- REF(old##Alias);
-
- ALIAS(Old);
-
- OldAlias old_alias;
- }
- )";
- std::string Expected = R"(
- namespace x { class Old {}; }
- namespace ns {
- #define REF(alias) alias alias_var;
-
- #define ALIAS(old) \
- using old##Alias = x::old; \
- REF(old##Alias);
-
- ALIAS(Old);
-
- NewAlias old_alias;
- }
- )";
- std::string After =
- runClangRenameOnCode(Before, "ns::OldAlias", "ns::NewAlias");
- CompareSnippets(Expected, After);
-}
-
-} // anonymous namespace
-} // namespace test
-} // namespace clang_rename
-} // namesdpace clang
diff --git a/clang/unittests/Rename/RenameClassTest.cpp b/clang/unittests/Rename/RenameClassTest.cpp
deleted file mode 100644
index 24370b5795e9..000000000000
--- a/clang/unittests/Rename/RenameClassTest.cpp
+++ /dev/null
@@ -1,820 +0,0 @@
-//===-- RenameClassTest.cpp - unit tests for renaming classes -------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#include "ClangRenameTest.h"
-
-namespace clang {
-namespace clang_rename {
-namespace test {
-namespace {
-
-class RenameClassTest : public ClangRenameTest {
-public:
- RenameClassTest() {
- AppendToHeader(R"(
- namespace a {
- class Foo {
- public:
- struct Nested {
- enum NestedEnum {E1, E2};
- };
- void func() {}
- static int Constant;
- };
- class Goo {
- public:
- struct Nested {
- enum NestedEnum {E1, E2};
- };
- };
- int Foo::Constant = 1;
- } // namespace a
- namespace b {
- class Foo {};
- } // namespace b
-
- #define MACRO(x) x
-
- template<typename T> class ptr {};
- )");
- }
-};
-
-INSTANTIATE_TEST_SUITE_P(
- RenameClassTests, RenameClassTest,
- testing::ValuesIn(std::vector<Case>({
- // basic classes
- {"a::Foo f;", "b::Bar f;", "", ""},
- {"::a::Foo f;", "::b::Bar f;", "", ""},
- {"void f(a::Foo f) {}", "void f(b::Bar f) {}", "", ""},
- {"void f(a::Foo *f) {}", "void f(b::Bar *f) {}", "", ""},
- {"a::Foo f() { return a::Foo(); }", "b::Bar f() { return b::Bar(); }",
- "", ""},
- {"namespace a {a::Foo f() { return Foo(); }}",
- "namespace a {b::Bar f() { return b::Bar(); }}", "", ""},
- {"void f(const a::Foo& a1) {}", "void f(const b::Bar& a1) {}", "", ""},
- {"void f(const a::Foo* a1) {}", "void f(const b::Bar* a1) {}", "", ""},
- {"namespace a { void f(Foo a1) {} }",
- "namespace a { void f(b::Bar a1) {} }", "", ""},
- {"void f(MACRO(a::Foo) a1) {}", "void f(MACRO(b::Bar) a1) {}", "", ""},
- {"void f(MACRO(a::Foo a1)) {}", "void f(MACRO(b::Bar a1)) {}", "", ""},
- {"a::Foo::Nested ns;", "b::Bar::Nested ns;", "", ""},
- {"auto t = a::Foo::Constant;", "auto t = b::Bar::Constant;", "", ""},
- {"a::Foo::Nested ns;", "a::Foo::Nested2 ns;", "a::Foo::Nested",
- "a::Foo::Nested2"},
-
- // use namespace and typedefs
- {"using a::Foo; Foo gA;", "using b::Bar; b::Bar gA;", "", ""},
- {"using a::Foo; void f(Foo gA) {}", "using b::Bar; void f(Bar gA) {}",
- "", ""},
- {"using a::Foo; namespace x { Foo gA; }",
- "using b::Bar; namespace x { Bar gA; }", "", ""},
- {"struct S { using T = a::Foo; T a_; };",
- "struct S { using T = b::Bar; T a_; };", "", ""},
- {"using T = a::Foo; T gA;", "using T = b::Bar; T gA;", "", ""},
- {"typedef a::Foo T; T gA;", "typedef b::Bar T; T gA;", "", ""},
- {"typedef MACRO(a::Foo) T; T gA;", "typedef MACRO(b::Bar) T; T gA;", "",
- ""},
-
- // struct members and other oddities
- {"struct S : public a::Foo {};", "struct S : public b::Bar {};", "",
- ""},
- {"struct F { void f(a::Foo a1) {} };",
- "struct F { void f(b::Bar a1) {} };", "", ""},
- {"struct F { a::Foo a_; };", "struct F { b::Bar a_; };", "", ""},
- {"struct F { ptr<a::Foo> a_; };", "struct F { ptr<b::Bar> a_; };", "",
- ""},
-
- {"void f() { a::Foo::Nested ne; }", "void f() { b::Bar::Nested ne; }",
- "", ""},
- {"void f() { a::Goo::Nested ne; }", "void f() { a::Goo::Nested ne; }",
- "", ""},
- {"void f() { a::Foo::Nested::NestedEnum e; }",
- "void f() { b::Bar::Nested::NestedEnum e; }", "", ""},
- {"void f() { auto e = a::Foo::Nested::NestedEnum::E1; }",
- "void f() { auto e = b::Bar::Nested::NestedEnum::E1; }", "", ""},
- {"void f() { auto e = a::Foo::Nested::E1; }",
- "void f() { auto e = b::Bar::Nested::E1; }", "", ""},
-
- // templates
- {"template <typename T> struct Foo { T t; };\n"
- "void f() { Foo<a::Foo> foo; }",
- "template <typename T> struct Foo { T t; };\n"
- "void f() { Foo<b::Bar> foo; }",
- "", ""},
- {"template <typename T> struct Foo { a::Foo a; };",
- "template <typename T> struct Foo { b::Bar a; };", "", ""},
- {"template <typename T> void f(T t) {}\n"
- "void g() { f<a::Foo>(a::Foo()); }",
- "template <typename T> void f(T t) {}\n"
- "void g() { f<b::Bar>(b::Bar()); }",
- "", ""},
- {"template <typename T> int f() { return 1; }\n"
- "template <> int f<a::Foo>() { return 2; }\n"
- "int g() { return f<a::Foo>(); }",
- "template <typename T> int f() { return 1; }\n"
- "template <> int f<b::Bar>() { return 2; }\n"
- "int g() { return f<b::Bar>(); }",
- "", ""},
- {"struct Foo { template <typename T> T foo(); };\n"
- "void g() { Foo f; auto a = f.template foo<a::Foo>(); }",
- "struct Foo { template <typename T> T foo(); };\n"
- "void g() { Foo f; auto a = f.template foo<b::Bar>(); }",
- "", ""},
-
- // The following two templates are distilled from regressions found in
- // unique_ptr<> and type_traits.h
- {"template <typename T> struct outer {\n"
- " typedef T type;\n"
- " type Baz();\n"
- " };\n"
- " outer<a::Foo> g_A;",
- "template <typename T> struct outer {\n"
- " typedef T type;\n"
- " type Baz();\n"
- " };\n"
- " outer<b::Bar> g_A;",
- "", ""},
- {"template <typename T> struct nested { typedef T type; };\n"
- "template <typename T> struct outer { typename nested<T>::type Foo(); "
- "};\n"
- "outer<a::Foo> g_A;",
- "template <typename T> struct nested { typedef T type; };\n"
- "template <typename T> struct outer { typename nested<T>::type Foo(); "
- "};\n"
- "outer<b::Bar> g_A;",
- "", ""},
-
- // macros
- {"#define FOO(T, t) T t\n"
- "void f() { FOO(a::Foo, a1); FOO(a::Foo, a2); }",
- "#define FOO(T, t) T t\n"
- "void f() { FOO(b::Bar, a1); FOO(b::Bar, a2); }",
- "", ""},
- {"#define FOO(n) a::Foo n\n"
- " void f() { FOO(a1); FOO(a2); }",
- "#define FOO(n) b::Bar n\n"
- " void f() { FOO(a1); FOO(a2); }",
- "", ""},
-
- // Pointer to member functions
- {"auto gA = &a::Foo::func;", "auto gA = &b::Bar::func;", "", ""},
- {"using a::Foo; auto gA = &Foo::func;",
- "using b::Bar; auto gA = &b::Bar::func;", "", ""},
- {"using a::Foo; namespace x { auto gA = &Foo::func; }",
- "using b::Bar; namespace x { auto gA = &Bar::func; }", "", ""},
- {"typedef a::Foo T; auto gA = &T::func;",
- "typedef b::Bar T; auto gA = &T::func;", "", ""},
- {"auto gA = &MACRO(a::Foo)::func;", "auto gA = &MACRO(b::Bar)::func;",
- "", ""},
-
- // Short match inside a namespace
- {"namespace a { void f(Foo a1) {} }",
- "namespace a { void f(b::Bar a1) {} }", "", ""},
-
- // Correct match.
- {"using a::Foo; struct F { ptr<Foo> a_; };",
- "using b::Bar; struct F { ptr<Bar> a_; };", "", ""},
-
- // avoid false positives
- {"void f(b::Foo a) {}", "void f(b::Foo a) {}", "", ""},
- {"namespace b { void f(Foo a) {} }", "namespace b { void f(Foo a) {} }",
- "", ""},
-
- // friends, everyone needs friends.
- {"class Foo { int i; friend class a::Foo; };",
- "class Foo { int i; friend class b::Bar; };", "", ""},
- })) );
-
-TEST_P(RenameClassTest, RenameClasses) {
- auto Param = GetParam();
- std::string OldName = Param.OldName.empty() ? "a::Foo" : Param.OldName;
- std::string NewName = Param.NewName.empty() ? "b::Bar" : Param.NewName;
- std::string Actual = runClangRenameOnCode(Param.Before, OldName, NewName);
- CompareSnippets(Param.After, Actual);
-}
-
-class NamespaceDetectionTest : public ClangRenameTest {
-protected:
- NamespaceDetectionTest() {
- AppendToHeader(R"(
- class Old {};
- namespace o1 {
- class Old {};
- namespace o2 {
- class Old {};
- namespace o3 {
- class Old {};
- } // namespace o3
- } // namespace o2
- } // namespace o1
- )");
- }
-};
-
-INSTANTIATE_TEST_SUITE_P(
- RenameClassTest, NamespaceDetectionTest,
- ::testing::ValuesIn(std::vector<Case>({
- // Test old and new namespace overlap.
- {"namespace o1 { namespace o2 { namespace o3 { Old moo; } } }",
- "namespace o1 { namespace o2 { namespace o3 { New moo; } } }",
- "o1::o2::o3::Old", "o1::o2::o3::New"},
- {"namespace o1 { namespace o2 { namespace o3 { Old moo; } } }",
- "namespace o1 { namespace o2 { namespace o3 { n3::New moo; } } }",
- "o1::o2::o3::Old", "o1::o2::n3::New"},
- {"namespace o1 { namespace o2 { namespace o3 { Old moo; } } }",
- "namespace o1 { namespace o2 { namespace o3 { n2::n3::New moo; } } }",
- "o1::o2::o3::Old", "o1::n2::n3::New"},
- {"namespace o1 { namespace o2 { Old moo; } }",
- "namespace o1 { namespace o2 { New moo; } }", "::o1::o2::Old",
- "::o1::o2::New"},
- {"namespace o1 { namespace o2 { Old moo; } }",
- "namespace o1 { namespace o2 { n2::New moo; } }", "::o1::o2::Old",
- "::o1::n2::New"},
- {"namespace o1 { namespace o2 { Old moo; } }",
- "namespace o1 { namespace o2 { ::n1::n2::New moo; } }",
- "::o1::o2::Old", "::n1::n2::New"},
- {"namespace o1 { namespace o2 { Old moo; } }",
- "namespace o1 { namespace o2 { n1::n2::New moo; } }", "::o1::o2::Old",
- "n1::n2::New"},
-
- // Test old and new namespace with differing depths.
- {"namespace o1 { namespace o2 { namespace o3 { Old moo; } } }",
- "namespace o1 { namespace o2 { namespace o3 { New moo; } } }",
- "o1::o2::o3::Old", "::o1::New"},
- {"namespace o1 { namespace o2 { namespace o3 { Old moo; } } }",
- "namespace o1 { namespace o2 { namespace o3 { New moo; } } }",
- "o1::o2::o3::Old", "::o1::o2::New"},
- {"namespace o1 { namespace o2 { namespace o3 { Old moo; } } }",
- "namespace o1 { namespace o2 { namespace o3 { New moo; } } }",
- "o1::o2::o3::Old", "o1::New"},
- {"namespace o1 { namespace o2 { namespace o3 { Old moo; } } }",
- "namespace o1 { namespace o2 { namespace o3 { New moo; } } }",
- "o1::o2::o3::Old", "o1::o2::New"},
- {"Old moo;", "o1::New moo;", "::Old", "o1::New"},
- {"Old moo;", "o1::New moo;", "Old", "o1::New"},
- {"namespace o1 { ::Old moo; }", "namespace o1 { New moo; }", "Old",
- "o1::New"},
- {"namespace o1 { namespace o2 { Old moo; } }",
- "namespace o1 { namespace o2 { ::New moo; } }", "::o1::o2::Old",
- "::New"},
- {"namespace o1 { namespace o2 { Old moo; } }",
- "namespace o1 { namespace o2 { New moo; } }", "::o1::o2::Old", "New"},
-
- // Test moving into the new namespace at different levels.
- {"namespace n1 { namespace n2 { o1::o2::Old moo; } }",
- "namespace n1 { namespace n2 { New moo; } }", "::o1::o2::Old",
- "::n1::n2::New"},
- {"namespace n1 { namespace n2 { o1::o2::Old moo; } }",
- "namespace n1 { namespace n2 { New moo; } }", "::o1::o2::Old",
- "n1::n2::New"},
- {"namespace n1 { namespace n2 { o1::o2::Old moo; } }",
- "namespace n1 { namespace n2 { o2::New moo; } }", "::o1::o2::Old",
- "::n1::o2::New"},
- {"namespace n1 { namespace n2 { o1::o2::Old moo; } }",
- "namespace n1 { namespace n2 { o2::New moo; } }", "::o1::o2::Old",
- "n1::o2::New"},
- {"namespace n1 { namespace n2 { o1::o2::Old moo; } }",
- "namespace n1 { namespace n2 { ::o1::o2::New moo; } }",
- "::o1::o2::Old", "::o1::o2::New"},
- {"namespace n1 { namespace n2 { o1::o2::Old moo; } }",
- "namespace n1 { namespace n2 { o1::o2::New moo; } }", "::o1::o2::Old",
- "o1::o2::New"},
-
- // Test friends declarations.
- {"class Foo { friend class o1::Old; };",
- "class Foo { friend class o1::New; };", "o1::Old", "o1::New"},
- {"class Foo { int i; friend class o1::Old; };",
- "class Foo { int i; friend class ::o1::New; };", "::o1::Old",
- "::o1::New"},
- {"namespace o1 { class Foo { int i; friend class Old; }; }",
- "namespace o1 { class Foo { int i; friend class New; }; }", "o1::Old",
- "o1::New"},
- {"namespace o1 { class Foo { int i; friend class Old; }; }",
- "namespace o1 { class Foo { int i; friend class New; }; }",
- "::o1::Old", "::o1::New"},
- })) );
-
-TEST_P(NamespaceDetectionTest, RenameClasses) {
- auto Param = GetParam();
- std::string Actual =
- runClangRenameOnCode(Param.Before, Param.OldName, Param.NewName);
- CompareSnippets(Param.After, Actual);
-}
-
-class TemplatedClassRenameTest : public ClangRenameTest {
-protected:
- TemplatedClassRenameTest() {
- AppendToHeader(R"(
- template <typename T> struct Old {
- T t_;
- T f() { return T(); };
- static T s(T t) { return t; }
- };
- namespace ns {
- template <typename T> struct Old {
- T t_;
- T f() { return T(); };
- static T s(T t) { return t; }
- };
- } // namespace ns
-
- namespace o1 {
- namespace o2 {
- namespace o3 {
- template <typename T> struct Old {
- T t_;
- T f() { return T(); };
- static T s(T t) { return t; }
- };
- } // namespace o3
- } // namespace o2
- } // namespace o1
- )");
- }
-};
-
-INSTANTIATE_TEST_SUITE_P(
- RenameClassTests, TemplatedClassRenameTest,
- ::testing::ValuesIn(std::vector<Case>({
- {"Old<int> gI; Old<bool> gB;", "New<int> gI; New<bool> gB;", "Old",
- "New"},
- {"ns::Old<int> gI; ns::Old<bool> gB;",
- "ns::New<int> gI; ns::New<bool> gB;", "ns::Old", "ns::New"},
- {"auto gI = &Old<int>::f; auto gB = &Old<bool>::f;",
- "auto gI = &New<int>::f; auto gB = &New<bool>::f;", "Old", "New"},
- {"auto gI = &ns::Old<int>::f;", "auto gI = &ns::New<int>::f;",
- "ns::Old", "ns::New"},
-
- {"int gI = Old<int>::s(0); bool gB = Old<bool>::s(false);",
- "int gI = New<int>::s(0); bool gB = New<bool>::s(false);", "Old",
- "New"},
- {"int gI = ns::Old<int>::s(0); bool gB = ns::Old<bool>::s(false);",
- "int gI = ns::New<int>::s(0); bool gB = ns::New<bool>::s(false);",
- "ns::Old", "ns::New"},
-
- {"struct S { Old<int*> o_; };", "struct S { New<int*> o_; };", "Old",
- "New"},
- {"struct S { ns::Old<int*> o_; };", "struct S { ns::New<int*> o_; };",
- "ns::Old", "ns::New"},
-
- {"auto a = reinterpret_cast<Old<int>*>(new Old<int>);",
- "auto a = reinterpret_cast<New<int>*>(new New<int>);", "Old", "New"},
- {"auto a = reinterpret_cast<ns::Old<int>*>(new ns::Old<int>);",
- "auto a = reinterpret_cast<ns::New<int>*>(new ns::New<int>);",
- "ns::Old", "ns::New"},
- {"auto a = reinterpret_cast<const Old<int>*>(new Old<int>);",
- "auto a = reinterpret_cast<const New<int>*>(new New<int>);", "Old",
- "New"},
- {"auto a = reinterpret_cast<const ns::Old<int>*>(new ns::Old<int>);",
- "auto a = reinterpret_cast<const ns::New<int>*>(new ns::New<int>);",
- "ns::Old", "ns::New"},
-
- {"Old<bool>& foo();", "New<bool>& foo();", "Old", "New"},
- {"ns::Old<bool>& foo();", "ns::New<bool>& foo();", "ns::Old",
- "ns::New"},
- {"o1::o2::o3::Old<bool>& foo();", "o1::o2::o3::New<bool>& foo();",
- "o1::o2::o3::Old", "o1::o2::o3::New"},
- {"namespace ns { Old<bool>& foo(); }",
- "namespace ns { New<bool>& foo(); }", "ns::Old", "ns::New"},
- {"const Old<bool>& foo();", "const New<bool>& foo();", "Old", "New"},
- {"const ns::Old<bool>& foo();", "const ns::New<bool>& foo();",
- "ns::Old", "ns::New"},
-
- // FIXME: figure out why this only works when Moo gets
- // specialized at some point.
- {"template <typename T> struct Moo { Old<T> o_; }; Moo<int> m;",
- "template <typename T> struct Moo { New<T> o_; }; Moo<int> m;", "Old",
- "New"},
- {"template <typename T> struct Moo { ns::Old<T> o_; }; Moo<int> m;",
- "template <typename T> struct Moo { ns::New<T> o_; }; Moo<int> m;",
- "ns::Old", "ns::New"},
- })) );
-
-TEST_P(TemplatedClassRenameTest, RenameTemplateClasses) {
- auto Param = GetParam();
- std::string Actual =
- runClangRenameOnCode(Param.Before, Param.OldName, Param.NewName);
- CompareSnippets(Param.After, Actual);
-}
-
-TEST_F(ClangRenameTest, RenameClassWithOutOfLineMembers) {
- std::string Before = R"(
- class Old {
- public:
- Old();
- ~Old();
-
- Old* next();
-
- private:
- Old* next_;
- };
-
- Old::Old() {}
- Old::~Old() {}
- Old* Old::next() { return next_; }
- )";
- std::string Expected = R"(
- class New {
- public:
- New();
- ~New();
-
- New* next();
-
- private:
- New* next_;
- };
-
- New::New() {}
- New::~New() {}
- New* New::next() { return next_; }
- )";
- std::string After = runClangRenameOnCode(Before, "Old", "New");
- CompareSnippets(Expected, After);
-}
-
-TEST_F(ClangRenameTest, RenameClassWithInlineMembers) {
- std::string Before = R"(
- class Old {
- public:
- Old() {}
- ~Old() {}
-
- Old* next() { return next_; }
-
- private:
- Old* next_;
- };
- )";
- std::string Expected = R"(
- class New {
- public:
- New() {}
- ~New() {}
-
- New* next() { return next_; }
-
- private:
- New* next_;
- };
- )";
- std::string After = runClangRenameOnCode(Before, "Old", "New");
- CompareSnippets(Expected, After);
-}
-
-TEST_F(ClangRenameTest, RenameClassWithNamespaceWithInlineMembers) {
- std::string Before = R"(
- namespace ns {
- class Old {
- public:
- Old() {}
- ~Old() {}
-
- Old* next() { return next_; }
-
- private:
- Old* next_;
- };
- } // namespace ns
- )";
- std::string Expected = R"(
- namespace ns {
- class New {
- public:
- New() {}
- ~New() {}
-
- New* next() { return next_; }
-
- private:
- New* next_;
- };
- } // namespace ns
- )";
- std::string After = runClangRenameOnCode(Before, "ns::Old", "ns::New");
- CompareSnippets(Expected, After);
-}
-
-TEST_F(ClangRenameTest, RenameClassWithNamespaceWithOutOfInlineMembers) {
- std::string Before = R"(
- namespace ns {
- class Old {
- public:
- Old();
- ~Old();
-
- Old* next();
-
- private:
- Old* next_;
- };
-
- Old::Old() {}
- Old::~Old() {}
- Old* Old::next() { return next_; }
- } // namespace ns
- )";
- std::string Expected = R"(
- namespace ns {
- class New {
- public:
- New();
- ~New();
-
- New* next();
-
- private:
- New* next_;
- };
-
- New::New() {}
- New::~New() {}
- New* New::next() { return next_; }
- } // namespace ns
- )";
- std::string After = runClangRenameOnCode(Before, "ns::Old", "ns::New");
- CompareSnippets(Expected, After);
-}
-
-TEST_F(ClangRenameTest, RenameClassInInheritedConstructor) {
- // `using Base::Base;` will generate an implicit constructor containing usage
- // of `::ns::Old` which should not be matched.
- std::string Before = R"(
- namespace ns {
- class Old;
- class Old {
- int x;
- };
- class Base {
- protected:
- Old *moo_;
- public:
- Base(Old *moo) : moo_(moo) {}
- };
- class Derived : public Base {
- public:
- using Base::Base;
- };
- } // namespace ns
- int main() {
- ::ns::Old foo;
- ::ns::Derived d(&foo);
- return 0;
- })";
- std::string Expected = R"(
- namespace ns {
- class New;
- class New {
- int x;
- };
- class Base {
- protected:
- New *moo_;
- public:
- Base(New *moo) : moo_(moo) {}
- };
- class Derived : public Base {
- public:
- using Base::Base;
- };
- } // namespace ns
- int main() {
- ::ns::New foo;
- ::ns::Derived d(&foo);
- return 0;
- })";
- std::string After = runClangRenameOnCode(Before, "ns::Old", "ns::New");
- CompareSnippets(Expected, After);
-}
-
-TEST_F(ClangRenameTest, DontRenameReferencesInImplicitFunction) {
- std::string Before = R"(
- namespace ns {
- class Old {
- };
- } // namespace ns
- struct S {
- int y;
- ns::Old old;
- };
- void f() {
- S s1, s2, s3;
- // This causes an implicit assignment operator to be created.
- s1 = s2 = s3;
- }
- )";
- std::string Expected = R"(
- namespace ns {
- class New {
- };
- } // namespace ns
- struct S {
- int y;
- ::new_ns::New old;
- };
- void f() {
- S s1, s2, s3;
- // This causes an implicit assignment operator to be created.
- s1 = s2 = s3;
- }
- )";
- std::string After = runClangRenameOnCode(Before, "ns::Old", "::new_ns::New");
- CompareSnippets(Expected, After);
-}
-
-TEST_F(ClangRenameTest, ReferencesInLambdaFunctionParameters) {
- std::string Before = R"(
- template <class T>
- class function;
- template <class R, class... ArgTypes>
- class function<R(ArgTypes...)> {
- public:
- template <typename Functor>
- function(Functor f) {}
-
- function() {}
-
- R operator()(ArgTypes...) const {}
- };
-
- namespace ns {
- class Old {};
- void f() {
- function<void(Old)> func;
- }
- } // namespace ns)";
- std::string Expected = R"(
- template <class T>
- class function;
- template <class R, class... ArgTypes>
- class function<R(ArgTypes...)> {
- public:
- template <typename Functor>
- function(Functor f) {}
-
- function() {}
-
- R operator()(ArgTypes...) const {}
- };
-
- namespace ns {
- class New {};
- void f() {
- function<void(::new_ns::New)> func;
- }
- } // namespace ns)";
- std::string After = runClangRenameOnCode(Before, "ns::Old", "::new_ns::New");
- CompareSnippets(Expected, After);
-}
-
-TEST_F(ClangRenameTest, DontChangeIfSameName) {
- std::string Before = R"(
- namespace foo {
- class Old {
- public:
- static void foo() {}
- };
- }
-
- void f(foo::Old * x) {
- foo::Old::foo() ;
- }
- using foo::Old;)";
- std::string Expected = R"(
- namespace foo {
- class Old {
- public:
- static void foo() {}
- };
- }
-
- void f(foo::Old * x) {
- foo::Old::foo() ;
- }
- using foo::Old;)";
- std::string After = runClangRenameOnCode(Before, "foo::Old", "foo::Old");
- CompareSnippets(Expected, After);
-}
-
-TEST_F(ClangRenameTest, ChangeIfNewNameWithLeadingDotDot) {
- std::string Before = R"(
- namespace foo {
- class Old {
- public:
- static void foo() {}
- };
- }
-
- void f(foo::Old * x) {
- foo::Old::foo() ;
- }
- using foo::Old;)";
- std::string Expected = R"(
- namespace foo {
- class Old {
- public:
- static void foo() {}
- };
- }
-
- void f(::foo::Old * x) {
- ::foo::Old::foo() ;
- }
- using ::foo::Old;)";
- std::string After = runClangRenameOnCode(Before, "foo::Old", "::foo::Old");
- CompareSnippets(Expected, After);
-}
-
-TEST_F(ClangRenameTest, ChangeIfSameNameWithLeadingDotDot) {
- std::string Before = R"(
- namespace foo {
- class Old {
- public:
- static void foo() {}
- };
- }
-
- void f(foo::Old * x) {
- foo::Old::foo() ;
- }
- using foo::Old;)";
- std::string Expected = R"(
- namespace foo {
- class Old {
- public:
- static void foo() {}
- };
- }
-
- void f(::foo::Old * x) {
- ::foo::Old::foo() ;
- }
- using ::foo::Old;)";
- std::string After = runClangRenameOnCode(Before, "::foo::Old", "::foo::Old");
- CompareSnippets(Expected, After);
-}
-
-TEST_F(RenameClassTest, UsingAlias) {
- std::string Before = R"(
- namespace a { struct A {}; }
-
- namespace foo {
- using Alias = a::A;
- Alias a;
- })";
- std::string Expected = R"(
- namespace a { struct B {}; }
-
- namespace foo {
- using Alias = b::B;
- Alias a;
- })";
- std::string After = runClangRenameOnCode(Before, "a::A", "b::B");
- CompareSnippets(Expected, After);
-}
-
-TEST_F(ClangRenameTest, FieldDesignatedInitializers) {
- std::string Before = R"(
- struct S {
- int a;
- };
- void foo() {
- S s = { .a = 10 };
- s.a = 20;
- })";
- std::string Expected = R"(
- struct S {
- int b;
- };
- void foo() {
- S s = { .b = 10 };
- s.b = 20;
- })";
- std::string After = runClangRenameOnCode(Before, "S::a", "S::b");
- CompareSnippets(Expected, After);
-}
-
-// FIXME: investigate why the test fails when adding a new USR to the USRSet.
-TEST_F(ClangRenameTest, DISABLED_NestedTemplates) {
- std::string Before = R"(
- namespace a { template <typename T> struct A {}; }
- a::A<a::A<int>> foo;)";
- std::string Expected = R"(
- namespace a { template <typename T> struct B {}; }
- b::B<b::B<int>> foo;)";
- std::string After = runClangRenameOnCode(Before, "a::A", "b::B");
- CompareSnippets(Expected, After);
-}
-
-
-} // anonymous namespace
-} // namespace test
-} // namespace clang_rename
-} // namesdpace clang
diff --git a/clang/unittests/Rename/RenameEnumTest.cpp b/clang/unittests/Rename/RenameEnumTest.cpp
deleted file mode 100644
index dc3440047c4a..000000000000
--- a/clang/unittests/Rename/RenameEnumTest.cpp
+++ /dev/null
@@ -1,189 +0,0 @@
-#include "ClangRenameTest.h"
-
-namespace clang {
-namespace clang_rename {
-namespace test {
-namespace {
-
-class RenameEnumTest : public ClangRenameTest {
-public:
- RenameEnumTest() {
- AppendToHeader(R"(
- #define MACRO(x) x
- namespace a {
- enum A1 { Red };
- enum class A2 { Blue };
- struct C {
- enum NestedEnum { White };
- enum class NestedScopedEnum { Black };
- };
- namespace d {
- enum A3 { Orange };
- } // namespace d
- enum A4 { Pink };
- } // namespace a
- enum A5 { Green };)");
- }
-};
-
-INSTANTIATE_TEST_SUITE_P(
- RenameEnumTests, RenameEnumTest,
- testing::ValuesIn(std::vector<Case>({
- {"void f(a::A2 arg) { a::A2 t = a::A2::Blue; }",
- "void f(b::B2 arg) { b::B2 t = b::B2::Blue; }", "a::A2", "b::B2"},
- {"void f() { a::A1* t1; }", "void f() { b::B1* t1; }", "a::A1",
- "b::B1"},
- {"void f() { a::A2* t1; }", "void f() { b::B2* t1; }", "a::A2",
- "b::B2"},
- {"void f() { enum a::A2 t = a::A2::Blue; }",
- "void f() { enum b::B2 t = b::B2::Blue; }", "a::A2", "b::B2"},
- {"void f() { enum a::A2 t = a::A2::Blue; }",
- "void f() { enum b::B2 t = b::B2::Blue; }", "a::A2", "b::B2"},
-
- {"void f() { a::A1 t = a::Red; }", "void f() { b::B1 t = b::B1::Red; }",
- "a::A1", "b::B1"},
- {"void f() { a::A1 t = a::A1::Red; }",
- "void f() { b::B1 t = b::B1::Red; }", "a::A1", "b::B1"},
- {"void f() { auto t = a::Red; }", "void f() { auto t = b::B1::Red; }",
- "a::A1", "b::B1"},
- {"namespace b { void f() { a::A1 t = a::Red; } }",
- "namespace b { void f() { B1 t = B1::Red; } }", "a::A1", "b::B1"},
- {"void f() { a::d::A3 t = a::d::Orange; }",
- "void f() { a::b::B3 t = a::b::B3::Orange; }", "a::d::A3", "a::b::B3"},
- {"namespace a { void f() { a::d::A3 t = a::d::Orange; } }",
- "namespace a { void f() { b::B3 t = b::B3::Orange; } }", "a::d::A3",
- "a::b::B3"},
- {"void f() { A5 t = Green; }", "void f() { B5 t = Green; }", "A5",
- "B5"},
- // FIXME: the new namespace qualifier should be added to the unscoped
- // enum constant.
- {"namespace a { void f() { auto t = Green; } }",
- "namespace a { void f() { auto t = Green; } }", "a::A1", "b::B1"},
-
- // namespace qualifiers
- {"namespace a { void f(A1 a1) {} }",
- "namespace a { void f(b::B1 a1) {} }", "a::A1", "b::B1"},
- {"namespace a { void f(A2 a2) {} }",
- "namespace a { void f(b::B2 a2) {} }", "a::A2", "b::B2"},
- {"namespace b { void f(a::A1 a1) {} }",
- "namespace b { void f(B1 a1) {} }", "a::A1", "b::B1"},
- {"namespace b { void f(a::A2 a2) {} }",
- "namespace b { void f(B2 a2) {} }", "a::A2", "b::B2"},
-
- // nested enums
- {"void f() { a::C::NestedEnum t = a::C::White; }",
- "void f() { a::C::NewNestedEnum t = a::C::NewNestedEnum::White; }",
- "a::C::NestedEnum", "a::C::NewNestedEnum"},
- {"void f() { a::C::NestedScopedEnum t = a::C::NestedScopedEnum::Black; "
- "}",
- "void f() { a::C::NewNestedScopedEnum t = "
- "a::C::NewNestedScopedEnum::Black; }",
- "a::C::NestedScopedEnum", "a::C::NewNestedScopedEnum"},
-
- // macros
- {"void f(MACRO(a::A1) a1) {}", "void f(MACRO(b::B1) a1) {}", "a::A1",
- "b::B1"},
- {"void f(MACRO(a::A2) a2) {}", "void f(MACRO(b::B2) a2) {}", "a::A2",
- "b::B2"},
- {"#define FOO(T, t) T t\nvoid f() { FOO(a::A1, a1); }",
- "#define FOO(T, t) T t\nvoid f() { FOO(b::B1, a1); }", "a::A1",
- "b::B1"},
- {"#define FOO(T, t) T t\nvoid f() { FOO(a::A2, a2); }",
- "#define FOO(T, t) T t\nvoid f() { FOO(b::B2, a2); }", "a::A2",
- "b::B2"},
- {"#define FOO(n) a::A1 n\nvoid f() { FOO(a1); FOO(a2); }",
- "#define FOO(n) b::B1 n\nvoid f() { FOO(a1); FOO(a2); }", "a::A1",
- "b::B1"},
-
- // using and type alias
- {"using a::A1; A1 gA;", "using b::B1; b::B1 gA;", "a::A1", "b::B1"},
- {"using a::A2; A2 gA;", "using b::B2; b::B2 gA;", "a::A2", "b::B2"},
- {"struct S { using T = a::A1; T a_; };",
- "struct S { using T = b::B1; T a_; };", "a::A1", "b::B1"},
- {"using T = a::A1; T gA;", "using T = b::B1; T gA;", "a::A1", "b::B1"},
- {"using T = a::A2; T gA;", "using T = b::B2; T gA;", "a::A2", "b::B2"},
- {"typedef a::A1 T; T gA;", "typedef b::B1 T; T gA;", "a::A1", "b::B1"},
- {"typedef a::A2 T; T gA;", "typedef b::B2 T; T gA;", "a::A2", "b::B2"},
- {"typedef MACRO(a::A1) T; T gA;", "typedef MACRO(b::B1) T; T gA;",
- "a::A1", "b::B1"},
-
- // templates
- {"template<typename T> struct Foo { T t; }; void f() { Foo<a::A1> "
- "foo1; }",
- "template<typename T> struct Foo { T t; }; void f() { Foo<b::B1> "
- "foo1; }",
- "a::A1", "b::B1"},
- {"template<typename T> struct Foo { T t; }; void f() { Foo<a::A2> "
- "foo2; }",
- "template<typename T> struct Foo { T t; }; void f() { Foo<b::B2> "
- "foo2; }",
- "a::A2", "b::B2"},
- {"template<typename T> struct Foo { a::A1 a1; };",
- "template<typename T> struct Foo { b::B1 a1; };", "a::A1", "b::B1"},
- {"template<typename T> struct Foo { a::A2 a2; };",
- "template<typename T> struct Foo { b::B2 a2; };", "a::A2", "b::B2"},
- {"template<typename T> int f() { return 1; } template<> int f<a::A1>() "
- "{ return 2; } int g() { return f<a::A1>(); }",
- "template<typename T> int f() { return 1; } template<> int f<b::B1>() "
- "{ return 2; } int g() { return f<b::B1>(); }",
- "a::A1", "b::B1"},
- {"template<typename T> int f() { return 1; } template<> int f<a::A2>() "
- "{ return 2; } int g() { return f<a::A2>(); }",
- "template<typename T> int f() { return 1; } template<> int f<b::B2>() "
- "{ return 2; } int g() { return f<b::B2>(); }",
- "a::A2", "b::B2"},
- {"struct Foo { template <typename T> T foo(); }; void g() { Foo f; "
- "f.foo<a::A1>(); }",
- "struct Foo { template <typename T> T foo(); }; void g() { Foo f; "
- "f.foo<b::B1>(); }",
- "a::A1", "b::B1"},
- {"struct Foo { template <typename T> T foo(); }; void g() { Foo f; "
- "f.foo<a::A2>(); }",
- "struct Foo { template <typename T> T foo(); }; void g() { Foo f; "
- "f.foo<b::B2>(); }",
- "a::A2", "b::B2"},
- })) );
-
-TEST_P(RenameEnumTest, RenameEnums) {
- auto Param = GetParam();
- assert(!Param.OldName.empty());
- assert(!Param.NewName.empty());
- std::string Actual =
- runClangRenameOnCode(Param.Before, Param.OldName, Param.NewName);
- CompareSnippets(Param.After, Actual);
-}
-
-TEST_F(RenameEnumTest, RenameEnumDecl) {
- std::string Before = R"(
- namespace ns {
- enum Old1 { Blue };
- }
- )";
- std::string Expected = R"(
- namespace ns {
- enum New1 { Blue };
- }
- )";
- std::string After = runClangRenameOnCode(Before, "ns::Old1", "ns::New1");
- CompareSnippets(Expected, After);
-}
-
-TEST_F(RenameEnumTest, RenameScopedEnumDecl) {
- std::string Before = R"(
- namespace ns {
- enum class Old1 { Blue };
- }
- )";
- std::string Expected = R"(
- namespace ns {
- enum class New1 { Blue };
- }
- )";
- std::string After = runClangRenameOnCode(Before, "ns::Old1", "ns::New1");
- CompareSnippets(Expected, After);
-}
-
-} // anonymous namespace
-} // namespace test
-} // namespace clang_rename
-} // namesdpace clang
diff --git a/clang/unittests/Rename/RenameFunctionTest.cpp b/clang/unittests/Rename/RenameFunctionTest.cpp
deleted file mode 100644
index 1c9b112232eb..000000000000
--- a/clang/unittests/Rename/RenameFunctionTest.cpp
+++ /dev/null
@@ -1,573 +0,0 @@
-//===-- RenameFunctionTest.cpp - unit tests for renaming functions --------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#include "ClangRenameTest.h"
-
-namespace clang {
-namespace clang_rename {
-namespace test {
-namespace {
-
-class RenameFunctionTest : public ClangRenameTest {
-public:
- RenameFunctionTest() {
- AppendToHeader(R"(
- struct A {
- static bool Foo();
- static bool Spam();
- };
- struct B {
- static void Same();
- static bool Foo();
- static int Eric(int x);
- };
- void Same(int x);
- int Eric(int x);
- namespace base {
- void Same();
- void ToNanoSeconds();
- void ToInt64NanoSeconds();
- })");
- }
-};
-
-TEST_F(RenameFunctionTest, RefactorsAFoo) {
- std::string Before = R"(
- void f() {
- A::Foo();
- ::A::Foo();
- })";
- std::string Expected = R"(
- void f() {
- A::Bar();
- ::A::Bar();
- })";
-
- std::string After = runClangRenameOnCode(Before, "A::Foo", "A::Bar");
- CompareSnippets(Expected, After);
-}
-
-TEST_F(RenameFunctionTest, RefactorsNonCallingAFoo) {
- std::string Before = R"(
- bool g(bool (*func)()) {
- return func();
- }
- void f() {
- auto *ref1 = A::Foo;
- auto *ref2 = ::A::Foo;
- g(A::Foo);
- })";
- std::string Expected = R"(
- bool g(bool (*func)()) {
- return func();
- }
- void f() {
- auto *ref1 = A::Bar;
- auto *ref2 = ::A::Bar;
- g(A::Bar);
- })";
- std::string After = runClangRenameOnCode(Before, "A::Foo", "A::Bar");
- CompareSnippets(Expected, After);
-}
-
-TEST_F(RenameFunctionTest, RefactorsEric) {
- std::string Before = R"(
- void f() {
- if (Eric(3)==4) ::Eric(2);
- })";
- std::string Expected = R"(
- void f() {
- if (Larry(3)==4) ::Larry(2);
- })";
- std::string After = runClangRenameOnCode(Before, "Eric", "Larry");
- CompareSnippets(Expected, After);
-}
-
-TEST_F(RenameFunctionTest, RefactorsNonCallingEric) {
- std::string Before = R"(
- int g(int (*func)(int)) {
- return func(1);
- }
- void f() {
- auto *ref = ::Eric;
- g(Eric);
- })";
- std::string Expected = R"(
- int g(int (*func)(int)) {
- return func(1);
- }
- void f() {
- auto *ref = ::Larry;
- g(Larry);
- })";
- std::string After = runClangRenameOnCode(Before, "Eric", "Larry");
- CompareSnippets(Expected, After);
-}
-
-TEST_F(RenameFunctionTest, DoesNotRefactorBFoo) {
- std::string Before = R"(
- void f() {
- B::Foo();
- })";
- std::string After = runClangRenameOnCode(Before, "A::Foo", "A::Bar");
- CompareSnippets(Before, After);
-}
-
-TEST_F(RenameFunctionTest, DoesNotRefactorBEric) {
- std::string Before = R"(
- void f() {
- B::Eric(2);
- })";
- std::string After = runClangRenameOnCode(Before, "Eric", "Larry");
- CompareSnippets(Before, After);
-}
-
-TEST_F(RenameFunctionTest, DoesNotRefactorCEric) {
- std::string Before = R"(
- namespace C { int Eric(int x); }
- void f() {
- if (C::Eric(3)==4) ::C::Eric(2);
- })";
- std::string Expected = R"(
- namespace C { int Eric(int x); }
- void f() {
- if (C::Eric(3)==4) ::C::Eric(2);
- })";
- std::string After = runClangRenameOnCode(Before, "Eric", "Larry");
- CompareSnippets(Expected, After);
-}
-
-TEST_F(RenameFunctionTest, DoesNotRefactorEricInNamespaceC) {
- std::string Before = R"(
- namespace C {
- int Eric(int x);
- void f() {
- if (Eric(3)==4) Eric(2);
- }
- } // namespace C)";
- std::string After = runClangRenameOnCode(Before, "Eric", "Larry");
- CompareSnippets(Before, After);
-}
-
-TEST_F(RenameFunctionTest, NamespaceQualified) {
- std::string Before = R"(
- void f() {
- base::ToNanoSeconds();
- ::base::ToNanoSeconds();
- }
- void g() {
- using base::ToNanoSeconds;
- base::ToNanoSeconds();
- ::base::ToNanoSeconds();
- ToNanoSeconds();
- }
- namespace foo {
- namespace base {
- void ToNanoSeconds();
- void f() {
- base::ToNanoSeconds();
- }
- }
- void f() {
- ::base::ToNanoSeconds();
- }
- })";
- std::string Expected = R"(
- void f() {
- base::ToInt64NanoSeconds();
- ::base::ToInt64NanoSeconds();
- }
- void g() {
- using base::ToInt64NanoSeconds;
- base::ToInt64NanoSeconds();
- ::base::ToInt64NanoSeconds();
- base::ToInt64NanoSeconds();
- }
- namespace foo {
- namespace base {
- void ToNanoSeconds();
- void f() {
- base::ToNanoSeconds();
- }
- }
- void f() {
- ::base::ToInt64NanoSeconds();
- }
- })";
- std::string After = runClangRenameOnCode(Before, "base::ToNanoSeconds",
- "base::ToInt64NanoSeconds");
- CompareSnippets(Expected, After);
-}
-
-TEST_F(RenameFunctionTest, RenameFunctionDecls) {
- std::string Before = R"(
- namespace na {
- void X();
- void X() {}
- })";
- std::string Expected = R"(
- namespace na {
- void Y();
- void Y() {}
- })";
- std::string After = runClangRenameOnCode(Before, "na::X", "na::Y");
- CompareSnippets(Expected, After);
-}
-
-TEST_F(RenameFunctionTest, RenameTemplateFunctions) {
- std::string Before = R"(
- namespace na {
- template<typename T> T X();
- }
- namespace na { void f() { X<int>(); } }
- namespace nb { void g() { na::X <int>(); } }
- )";
- std::string Expected = R"(
- namespace na {
- template<typename T> T Y();
- }
- namespace na { void f() { nb::Y<int>(); } }
- namespace nb { void g() { Y<int>(); } }
- )";
- std::string After = runClangRenameOnCode(Before, "na::X", "nb::Y");
- CompareSnippets(Expected, After);
-}
-
-TEST_F(RenameFunctionTest, RenameOutOfLineFunctionDecls) {
- std::string Before = R"(
- namespace na {
- void X();
- }
- void na::X() {}
- )";
- std::string Expected = R"(
- namespace na {
- void Y();
- }
- void na::Y() {}
- )";
- std::string After = runClangRenameOnCode(Before, "na::X", "na::Y");
- CompareSnippets(Expected, After);
-}
-
-TEST_F(RenameFunctionTest, NewNamespaceWithoutLeadingDotDot) {
- std::string Before = R"(
- namespace old_ns {
- void X();
- void X() {}
- }
- // Assume that the reference is in another file.
- void f() { old_ns::X(); }
- namespace old_ns { void g() { X(); } }
- namespace new_ns { void h() { ::old_ns::X(); } }
- )";
- std::string Expected = R"(
- namespace old_ns {
- void Y();
- void Y() {}
- }
- // Assume that the reference is in another file.
- void f() { new_ns::Y(); }
- namespace old_ns { void g() { new_ns::Y(); } }
- namespace new_ns { void h() { Y(); } }
- )";
- std::string After = runClangRenameOnCode(Before, "::old_ns::X", "new_ns::Y");
- CompareSnippets(Expected, After);
-}
-
-TEST_F(RenameFunctionTest, NewNamespaceWithLeadingDotDot) {
- std::string Before = R"(
- namespace old_ns {
- void X();
- void X() {}
- }
- // Assume that the reference is in another file.
- void f() { old_ns::X(); }
- namespace old_ns { void g() { X(); } }
- namespace new_ns { void h() { ::old_ns::X(); } }
- )";
- std::string Expected = R"(
- namespace old_ns {
- void Y();
- void Y() {}
- }
- // Assume that the reference is in another file.
- void f() { ::new_ns::Y(); }
- namespace old_ns { void g() { ::new_ns::Y(); } }
- namespace new_ns { void h() { Y(); } }
- )";
- std::string After =
- runClangRenameOnCode(Before, "::old_ns::X", "::new_ns::Y");
- CompareSnippets(Expected, After);
-}
-
-TEST_F(RenameFunctionTest, DontRenameSymbolsDefinedInAnonymousNamespace) {
- std::string Before = R"(
- namespace old_ns {
- class X {};
- namespace {
- void X();
- void X() {}
- void f() { X(); }
- }
- }
- )";
- std::string Expected = R"(
- namespace old_ns {
- class Y {};
- namespace {
- void X();
- void X() {}
- void f() { X(); }
- }
- }
- )";
- std::string After =
- runClangRenameOnCode(Before, "::old_ns::X", "::old_ns::Y");
- CompareSnippets(Expected, After);
-}
-
-TEST_F(RenameFunctionTest, NewNestedNamespace) {
- std::string Before = R"(
- namespace old_ns {
- void X();
- void X() {}
- }
- // Assume that the reference is in another file.
- namespace old_ns {
- void f() { X(); }
- }
- )";
- std::string Expected = R"(
- namespace old_ns {
- void X();
- void X() {}
- }
- // Assume that the reference is in another file.
- namespace old_ns {
- void f() { older_ns::X(); }
- }
- )";
- std::string After =
- runClangRenameOnCode(Before, "::old_ns::X", "::old_ns::older_ns::X");
- CompareSnippets(Expected, After);
-}
-
-TEST_F(RenameFunctionTest, MoveFromGlobalToNamespaceWithoutLeadingDotDot) {
- std::string Before = R"(
- void X();
- void X() {}
-
- // Assume that the reference is in another file.
- namespace some_ns {
- void f() { X(); }
- }
- )";
- std::string Expected = R"(
- void X();
- void X() {}
-
- // Assume that the reference is in another file.
- namespace some_ns {
- void f() { ns::X(); }
- }
- )";
- std::string After =
- runClangRenameOnCode(Before, "::X", "ns::X");
- CompareSnippets(Expected, After);
-}
-
-TEST_F(RenameFunctionTest, MoveFromGlobalToNamespaceWithLeadingDotDot) {
- std::string Before = R"(
- void Y() {}
-
- // Assume that the reference is in another file.
- namespace some_ns {
- void f() { Y(); }
- }
- )";
- std::string Expected = R"(
- void Y() {}
-
- // Assume that the reference is in another file.
- namespace some_ns {
- void f() { ::ns::Y(); }
- }
- )";
- std::string After =
- runClangRenameOnCode(Before, "::Y", "::ns::Y");
- CompareSnippets(Expected, After);
-}
-
-// FIXME: the rename of overloaded operator is not fully supported yet.
-TEST_F(RenameFunctionTest, DISABLED_DoNotRenameOverloadedOperatorCalls) {
- std::string Before = R"(
- namespace old_ns {
- class T { public: int x; };
- bool operator==(const T& lhs, const T& rhs) {
- return lhs.x == rhs.x;
- }
- } // namespace old_ns
-
- // Assume that the reference is in another file.
- bool f() {
- auto eq = old_ns::operator==;
- old_ns::T t1, t2;
- old_ns::operator==(t1, t2);
- return t1 == t2;
- }
- )";
- std::string Expected = R"(
- namespace old_ns {
- class T { public: int x; };
- bool operator==(const T& lhs, const T& rhs) {
- return lhs.x == rhs.x;
- }
- } // namespace old_ns
-
- // Assume that the reference is in another file.
- bool f() {
- auto eq = new_ns::operator==;
- old_ns::T t1, t2;
- new_ns::operator==(t1, t2);
- return t1 == t2;
- }
- )";
- std::string After =
- runClangRenameOnCode(Before, "old_ns::operator==", "new_ns::operator==");
- CompareSnippets(Expected, After);
-}
-
-TEST_F(RenameFunctionTest, FunctionRefAsTemplate) {
- std::string Before = R"(
- void X();
-
- // Assume that the reference is in another file.
- namespace some_ns {
- template <void (*Func)(void)>
- class TIterator {};
-
- template <void (*Func)(void)>
- class T {
- public:
- typedef TIterator<Func> IterType;
- using TI = TIterator<Func>;
- void g() {
- Func();
- auto func = Func;
- TIterator<Func> iter;
- }
- };
-
-
- void f() { T<X> tx; tx.g(); }
- } // namespace some_ns
- )";
- std::string Expected = R"(
- void X();
-
- // Assume that the reference is in another file.
- namespace some_ns {
- template <void (*Func)(void)>
- class TIterator {};
-
- template <void (*Func)(void)>
- class T {
- public:
- typedef TIterator<Func> IterType;
- using TI = TIterator<Func>;
- void g() {
- Func();
- auto func = Func;
- TIterator<Func> iter;
- }
- };
-
-
- void f() { T<ns::X> tx; tx.g(); }
- } // namespace some_ns
- )";
- std::string After = runClangRenameOnCode(Before, "::X", "ns::X");
- CompareSnippets(Expected, After);
-}
-
-TEST_F(RenameFunctionTest, RenameFunctionInUsingDecl) {
- std::string Before = R"(
- using base::ToNanoSeconds;
- namespace old_ns {
- using base::ToNanoSeconds;
- void f() {
- using base::ToNanoSeconds;
- }
- }
- )";
- std::string Expected = R"(
- using base::ToInt64NanoSeconds;
- namespace old_ns {
- using base::ToInt64NanoSeconds;
- void f() {
- using base::ToInt64NanoSeconds;
- }
- }
- )";
- std::string After = runClangRenameOnCode(Before, "base::ToNanoSeconds",
- "base::ToInt64NanoSeconds");
- CompareSnippets(Expected, After);
-}
-
-// FIXME: Fix the complex the case where the symbol being renamed is located in
-// `std::function<decltype<renamed_symbol>>`.
-TEST_F(ClangRenameTest, DISABLED_ReferencesInLambdaFunctionParameters) {
- std::string Before = R"(
- template <class T>
- class function;
- template <class R, class... ArgTypes>
- class function<R(ArgTypes...)> {
- public:
- template <typename Functor>
- function(Functor f) {}
-
- function() {}
-
- R operator()(ArgTypes...) const {}
- };
-
- namespace ns {
- void Old() {}
- void f() {
- function<decltype(Old)> func;
- }
- } // namespace ns)";
- std::string Expected = R"(
- template <class T>
- class function;
- template <class R, class... ArgTypes>
- class function<R(ArgTypes...)> {
- public:
- template <typename Functor>
- function(Functor f) {}
-
- function() {}
-
- R operator()(ArgTypes...) const {}
- };
-
- namespace ns {
- void New() {}
- void f() {
- function<decltype(::new_ns::New)> func;
- }
- } // namespace ns)";
- std::string After = runClangRenameOnCode(Before, "ns::Old", "::new_ns::New");
- CompareSnippets(Expected, After);
-}
-
-} // anonymous namespace
-} // namespace test
-} // namespace clang_rename
-} // namesdpace clang
diff --git a/clang/unittests/Rename/RenameMemberTest.cpp b/clang/unittests/Rename/RenameMemberTest.cpp
deleted file mode 100644
index c16d16aa25f0..000000000000
--- a/clang/unittests/Rename/RenameMemberTest.cpp
+++ /dev/null
@@ -1,228 +0,0 @@
-//===-- ClangMemberTests.cpp - unit tests for renaming class members ------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#include "ClangRenameTest.h"
-
-namespace clang {
-namespace clang_rename {
-namespace test {
-namespace {
-
-class RenameMemberTest : public ClangRenameTest {
-public:
- RenameMemberTest() {
- AppendToHeader(R"(
- struct NA {
- void Foo();
- void NotFoo();
- static void SFoo();
- static void SNotFoo();
- int Moo;
- };
- struct A {
- virtual void Foo();
- void NotFoo();
- static void SFoo();
- static void SNotFoo();
- int Moo;
- int NotMoo;
- static int SMoo;
- };
- struct B : public A {
- void Foo() override;
- };
- template <typename T> struct TA {
- T* Foo();
- T* NotFoo();
- static T* SFoo();
- static T* NotSFoo();
- };
- template <typename T> struct TB : public TA<T> {};
- namespace ns {
- template <typename T> struct TA {
- T* Foo();
- T* NotFoo();
- static T* SFoo();
- static T* NotSFoo();
- static int SMoo;
- };
- template <typename T> struct TB : public TA<T> {};
- struct A {
- void Foo();
- void NotFoo();
- static void SFoo();
- static void SNotFoo();
- };
- struct B : public A {};
- struct C {
- template <class T>
- void SFoo(const T& t) {}
- template <class T>
- void Foo() {}
- };
- })");
- }
-};
-
-INSTANTIATE_TEST_SUITE_P(
- DISABLED_RenameTemplatedClassStaticVariableTest, RenameMemberTest,
- testing::ValuesIn(std::vector<Case>({
- // FIXME: support renaming static variables for template classes.
- {"void f() { ns::TA<int>::SMoo; }",
- "void f() { ns::TA<int>::SMeh; }", "ns::TA::SMoo", "ns::TA::SMeh"},
- })) );
-
-INSTANTIATE_TEST_SUITE_P(
- RenameMemberTest, RenameMemberTest,
- testing::ValuesIn(std::vector<Case>({
- // Normal methods and fields.
- {"void f() { A a; a.Foo(); }", "void f() { A a; a.Bar(); }", "A::Foo",
- "A::Bar"},
- {"void f() { ns::A a; a.Foo(); }", "void f() { ns::A a; a.Bar(); }",
- "ns::A::Foo", "ns::A::Bar"},
- {"void f() { A a; int x = a.Moo; }", "void f() { A a; int x = a.Meh; }",
- "A::Moo", "A::Meh"},
- {"void f() { B b; b.Foo(); }", "void f() { B b; b.Bar(); }", "B::Foo",
- "B::Bar"},
- {"void f() { ns::B b; b.Foo(); }", "void f() { ns::B b; b.Bar(); }",
- "ns::A::Foo", "ns::A::Bar"},
- {"void f() { B b; int x = b.Moo; }", "void f() { B b; int x = b.Meh; }",
- "A::Moo", "A::Meh"},
-
- // Static methods.
- {"void f() { A::SFoo(); }", "void f() { A::SBar(); }", "A::SFoo",
- "A::SBar"},
- {"void f() { ns::A::SFoo(); }", "void f() { ns::A::SBar(); }",
- "ns::A::SFoo", "ns::A::SBar"},
- {"void f() { TA<int>::SFoo(); }", "void f() { TA<int>::SBar(); }",
- "TA::SFoo", "TA::SBar"},
- {"void f() { ns::TA<int>::SFoo(); }",
- "void f() { ns::TA<int>::SBar(); }", "ns::TA::SFoo", "ns::TA::SBar"},
-
- // Static variables.
- {"void f() { A::SMoo; }",
- "void f() { A::SMeh; }", "A::SMoo", "A::SMeh"},
-
- // Templated methods.
- {"void f() { TA<int> a; a.Foo(); }", "void f() { TA<int> a; a.Bar(); }",
- "TA::Foo", "TA::Bar"},
- {"void f() { ns::TA<int> a; a.Foo(); }",
- "void f() { ns::TA<int> a; a.Bar(); }", "ns::TA::Foo", "ns::TA::Bar"},
- {"void f() { TB<int> b; b.Foo(); }", "void f() { TB<int> b; b.Bar(); }",
- "TA::Foo", "TA::Bar"},
- {"void f() { ns::TB<int> b; b.Foo(); }",
- "void f() { ns::TB<int> b; b.Bar(); }", "ns::TA::Foo", "ns::TA::Bar"},
- {"void f() { ns::C c; int x; c.SFoo(x); }",
- "void f() { ns::C c; int x; c.SBar(x); }", "ns::C::SFoo",
- "ns::C::SBar"},
- {"void f() { ns::C c; c.Foo<int>(); }",
- "void f() { ns::C c; c.Bar<int>(); }", "ns::C::Foo", "ns::C::Bar"},
-
- // Pointers to methods.
- {"void f() { auto p = &A::Foo; }", "void f() { auto p = &A::Bar; }",
- "A::Foo", "A::Bar"},
- {"void f() { auto p = &A::SFoo; }", "void f() { auto p = &A::SBar; }",
- "A::SFoo", "A::SBar"},
- {"void f() { auto p = &B::Foo; }", "void f() { auto p = &B::Bar; }",
- "B::Foo", "B::Bar"},
- {"void f() { auto p = &ns::A::Foo; }",
- "void f() { auto p = &ns::A::Bar; }", "ns::A::Foo", "ns::A::Bar"},
- {"void f() { auto p = &ns::A::SFoo; }",
- "void f() { auto p = &ns::A::SBar; }", "ns::A::SFoo", "ns::A::SBar"},
- {"void f() { auto p = &ns::C::SFoo<int>; }",
- "void f() { auto p = &ns::C::SBar<int>; }", "ns::C::SFoo",
- "ns::C::SBar"},
-
- // These methods are not declared or overridden in the subclass B, we
- // have to use the qualified name with parent class A to identify them.
- {"void f() { auto p = &ns::B::Foo; }",
- "void f() { auto p = &ns::B::Bar; }", "ns::A::Foo", "ns::B::Bar"},
- {"void f() { B::SFoo(); }", "void f() { B::SBar(); }", "A::SFoo",
- "B::SBar"},
- {"void f() { ns::B::SFoo(); }", "void f() { ns::B::SBar(); }",
- "ns::A::SFoo", "ns::B::SBar"},
- {"void f() { auto p = &B::SFoo; }", "void f() { auto p = &B::SBar; }",
- "A::SFoo", "B::SBar"},
- {"void f() { auto p = &ns::B::SFoo; }",
- "void f() { auto p = &ns::B::SBar; }", "ns::A::SFoo", "ns::B::SBar"},
- {"void f() { TB<int>::SFoo(); }", "void f() { TB<int>::SBar(); }",
- "TA::SFoo", "TB::SBar"},
- {"void f() { ns::TB<int>::SFoo(); }",
- "void f() { ns::TB<int>::SBar(); }", "ns::TA::SFoo", "ns::TB::SBar"},
- })) );
-
-TEST_P(RenameMemberTest, RenameMembers) {
- auto Param = GetParam();
- assert(!Param.OldName.empty());
- assert(!Param.NewName.empty());
- std::string Actual =
- runClangRenameOnCode(Param.Before, Param.OldName, Param.NewName);
- CompareSnippets(Param.After, Actual);
-}
-
-TEST_F(RenameMemberTest, RenameMemberInsideClassMethods) {
- std::string Before = R"(
- struct X {
- int Moo;
- void Baz() { Moo = 1; }
- };)";
- std::string Expected = R"(
- struct X {
- int Meh;
- void Baz() { Meh = 1; }
- };)";
- std::string After = runClangRenameOnCode(Before, "X::Moo", "Y::Meh");
- CompareSnippets(Expected, After);
-}
-
-TEST_F(RenameMemberTest, RenameMethodInsideClassMethods) {
- std::string Before = R"(
- struct X {
- void Foo() {}
- void Baz() { Foo(); }
- };)";
- std::string Expected = R"(
- struct X {
- void Bar() {}
- void Baz() { Bar(); }
- };)";
- std::string After = runClangRenameOnCode(Before, "X::Foo", "X::Bar");
- CompareSnippets(Expected, After);
-}
-
-TEST_F(RenameMemberTest, RenameCtorInitializer) {
- std::string Before = R"(
- class X {
- public:
- X();
- A a;
- A a2;
- B b;
- };
-
- X::X():a(), b() {}
- )";
- std::string Expected = R"(
- class X {
- public:
- X();
- A bar;
- A a2;
- B b;
- };
-
- X::X():bar(), b() {}
- )";
- std::string After = runClangRenameOnCode(Before, "X::a", "X::bar");
- CompareSnippets(Expected, After);
-}
-
-} // anonymous namespace
-} // namespace test
-} // namespace clang_rename
-} // namesdpace clang
diff --git a/clang/utils/UpdateVerifyTests/core.py b/clang/utils/UpdateVerifyTests/core.py
deleted file mode 100644
index d1350cdbb698..000000000000
--- a/clang/utils/UpdateVerifyTests/core.py
+++ /dev/null
@@ -1,452 +0,0 @@
-import sys
-import re
-
-DEBUG = False
-
-
-def dprint(*args):
- if DEBUG:
- print(*args, file=sys.stderr)
-
-
-class KnownException(Exception):
- pass
-
-
-def parse_error_category(s, prefix):
- if "no expected directives found" in s:
- return None
- parts = s.split("diagnostics")
- diag_category = parts[0]
- category_parts = parts[0].strip().strip("'").split("-")
- expected = category_parts[0]
- if expected != prefix:
- raise Exception(
- f"expected prefix '{prefix}', but found '{expected}'. Multiple verify prefixes are not supported."
- )
- diag_category = category_parts[1]
- if "seen but not expected" in parts[1]:
- seen = True
- elif "expected but not seen" in parts[1]:
- seen = False
- else:
- raise KnownException(f"unexpected category '{parts[1]}'")
- return (diag_category, seen)
-
-
-diag_error_re = re.compile(r"File (\S+) Line (\d+): (.+)")
-diag_error_re2 = re.compile(r"File \S+ Line \d+ \(directive at (\S+):(\d+)\): (.+)")
-
-
-def parse_diag_error(s):
- m = diag_error_re2.match(s)
- if not m:
- m = diag_error_re.match(s)
- if not m:
- return None
- return (m.group(1), int(m.group(2)), m.group(3))
-
-
-class Line:
- def __init__(self, content, line_n):
- self.content = content
- self.diag = None
- self.line_n = line_n
- self.targeting_diags = []
-
- def update_line_n(self, n):
- self.line_n = n
-
- def render(self):
- if not self.diag:
- return self.content
- assert "{{DIAG}}" in self.content
- res = self.content.replace("{{DIAG}}", self.diag.render())
- if not res.strip():
- return ""
- return res
-
-
-class Diag:
- def __init__(
- self,
- prefix,
- diag_content,
- category,
- parsed_target_line_n,
- line_is_absolute,
- count,
- line,
- is_re,
- whitespace_strings,
- is_from_source_file,
- ):
- self.prefix = prefix
- self.diag_content = diag_content
- self.category = category
- self.parsed_target_line_n = parsed_target_line_n
- self.line_is_absolute = line_is_absolute
- self.count = count
- self.line = line
- self.target = None
- self.is_re = is_re
- self.absolute_target()
- self.whitespace_strings = whitespace_strings
- self.is_from_source_file = is_from_source_file
-
- def decrement_count(self):
- self.count -= 1
- assert self.count >= 0
-
- def increment_count(self):
- assert self.count >= 0
- self.count += 1
-
- def unset_target(self):
- assert self.target is not None
- self.target.targeting_diags.remove(self)
- self.target = None
-
- def set_target(self, target):
- if self.target:
- self.unset_target()
- self.target = target
- self.target.targeting_diags.append(self)
-
- def absolute_target(self):
- if self.target:
- return self.target.line_n
- if self.line_is_absolute:
- return self.parsed_target_line_n
- return self.line.line_n + self.parsed_target_line_n
-
- def relative_target(self):
- return self.absolute_target() - self.line.line_n
-
- def take(self, other_diag):
- assert self.count == 0
- assert other_diag.count > 0
- assert other_diag.target == self.target
- assert not other_diag.line_is_absolute
- assert not other_diag.is_re and not self.is_re
- self.line_is_absolute = False
- self.diag_content = other_diag.diag_content
- self.count = other_diag.count
- self.category = other_diag.category
- self.count = other_diag.count
- other_diag.count = 0
-
- def render(self):
- assert self.count >= 0
- if self.count == 0:
- return ""
- line_location_s = ""
- if self.relative_target() != 0:
- if self.line_is_absolute:
- line_location_s = f"@{self.absolute_target()}"
- elif self.relative_target() > 0:
- line_location_s = f"@+{self.relative_target()}"
- else:
- line_location_s = (
- f"@{self.relative_target()}" # the minus sign is implicit
- )
- count_s = "" if self.count == 1 else f"{self.count}"
- re_s = "-re" if self.is_re else ""
- if self.whitespace_strings:
- whitespace1_s = self.whitespace_strings[0]
- whitespace2_s = self.whitespace_strings[1]
- whitespace3_s = self.whitespace_strings[2]
- else:
- whitespace1_s = " "
- whitespace2_s = ""
- whitespace3_s = ""
- if count_s and not whitespace2_s:
- whitespace2_s = " " # required to parse correctly
- elif not count_s and whitespace2_s == " ":
- """Don't emit a weird extra space.
- However if the whitespace is something other than the
- standard single space, let it be to avoid disrupting manual formatting.
- The existence of a non-empty whitespace2_s implies this was parsed with
- a count > 1 and then decremented, otherwise this whitespace would have
- been parsed as whitespace3_s.
- """
- whitespace2_s = ""
- return f"//{whitespace1_s}{self.prefix}-{self.category}{re_s}{line_location_s}{whitespace2_s}{count_s}{whitespace3_s}{{{{{self.diag_content}}}}}"
-
-
-expected_diag_re = re.compile(
- r"//(\s*)([a-zA-Z]+)-(note|warning|error)(-re)?(@[+-]?\d+)?(?:(\s*)(\d+))?(\s*)\{\{(.*)\}\}"
-)
-
-
-def parse_diag(line, filename, lines, prefix):
- s = line.content
- ms = expected_diag_re.findall(s)
- if not ms:
- return None
- if len(ms) > 1:
- raise KnownException(
- f"multiple diags on line {filename}:{line.line_n}. Aborting due to missing implementation."
- )
- [
- whitespace1_s,
- check_prefix,
- category_s,
- re_s,
- target_line_s,
- whitespace2_s,
- count_s,
- whitespace3_s,
- diag_s,
- ] = ms[0]
- if check_prefix != prefix:
- return None
- if not target_line_s:
- target_line_n = 0
- is_absolute = False
- elif target_line_s.startswith("@+"):
- target_line_n = int(target_line_s[2:])
- is_absolute = False
- elif target_line_s.startswith("@-"):
- target_line_n = int(target_line_s[1:])
- is_absolute = False
- else:
- target_line_n = int(target_line_s[1:])
- is_absolute = True
- count = int(count_s) if count_s else 1
- line.content = expected_diag_re.sub("{{DIAG}}", s)
-
- return Diag(
- prefix,
- diag_s,
- category_s,
- target_line_n,
- is_absolute,
- count,
- line,
- bool(re_s),
- [whitespace1_s, whitespace2_s, whitespace3_s],
- True,
- )
-
-
-def add_line(new_line, lines):
- lines.insert(new_line.line_n - 1, new_line)
- for i in range(new_line.line_n, len(lines)):
- line = lines[i]
- assert line.line_n == i
- line.update_line_n(i + 1)
- assert all(line.line_n == i + 1 for i, line in enumerate(lines))
-
-
-def remove_line(old_line, lines):
- lines.remove(old_line)
- for i in range(old_line.line_n - 1, len(lines)):
- line = lines[i]
- assert line.line_n == i + 2
- line.update_line_n(i + 1)
- assert all(line.line_n == i + 1 for i, line in enumerate(lines))
-
-
-indent_re = re.compile(r"\s*")
-
-
-def get_indent(s):
- return indent_re.match(s).group(0)
-
-
-def orig_line_n_to_new_line_n(line_n, orig_lines):
- return orig_lines[line_n - 1].line_n
-
-
-def add_diag(orig_line_n, diag_s, diag_category, lines, orig_lines, prefix):
- line_n = orig_line_n_to_new_line_n(orig_line_n, orig_lines)
- target = lines[line_n - 1]
- for other in target.targeting_diags:
- if other.is_re:
- raise KnownException(
- "mismatching diag on line with regex matcher. Skipping due to missing implementation"
- )
- reverse = (
- True
- if [other for other in target.targeting_diags if other.relative_target() < 0]
- else False
- )
-
- targeting = [
- other for other in target.targeting_diags if not other.line_is_absolute
- ]
- targeting.sort(reverse=reverse, key=lambda d: d.relative_target())
- prev_offset = 0
- prev_line = target
- direction = -1 if reverse else 1
- for d in targeting:
- if d.relative_target() != prev_offset + direction:
- break
- prev_offset = d.relative_target()
- prev_line = d.line
- total_offset = prev_offset - 1 if reverse else prev_offset + 1
- if reverse:
- new_line_n = prev_line.line_n + 1
- else:
- new_line_n = prev_line.line_n
- assert new_line_n == line_n + (not reverse) - total_offset
-
- new_line = Line(get_indent(prev_line.content) + "{{DIAG}}\n", new_line_n)
- add_line(new_line, lines)
-
- whitespace_strings = prev_line.diag.whitespace_strings if prev_line.diag else None
- new_diag = Diag(
- prefix,
- diag_s,
- diag_category,
- total_offset,
- False,
- 1,
- new_line,
- False,
- whitespace_strings,
- False,
- )
- new_line.diag = new_diag
- new_diag.set_target(target)
-
-
-def remove_dead_diags(lines):
- for line in lines:
- if not line.diag or line.diag.count != 0:
- continue
- if line.render() == "":
- remove_line(line, lines)
- else:
- assert line.diag.is_from_source_file
- for other_diag in line.targeting_diags:
- if (
- other_diag.is_from_source_file
- or other_diag.count == 0
- or other_diag.category != line.diag.category
- ):
- continue
- if other_diag.is_re or line.diag.is_re:
- continue
- line.diag.take(other_diag)
- remove_line(other_diag.line, lines)
-
-
-def has_live_diags(lines):
- for line in lines:
- if line.diag and line.diag.count > 0:
- return True
- return False
-
-
-def get_expected_no_diags_line_n(lines, prefix):
- for line in lines:
- if f"{prefix}-no-diagnostics" in line.content:
- return line.line_n
- return None
-
-
-def update_test_file(filename, diag_errors, prefix, updated_test_files):
- dprint(f"updating test file {filename}")
- if filename in updated_test_files:
- raise KnownException(f"{filename} already updated, but got new output")
- else:
- updated_test_files.add(filename)
- with open(filename, "r") as f:
- lines = [Line(line, i + 1) for i, line in enumerate(f.readlines())]
- orig_lines = list(lines)
- expected_no_diags_line_n = get_expected_no_diags_line_n(orig_lines, prefix)
-
- for line in lines:
- diag = parse_diag(line, filename, lines, prefix)
- if diag:
- line.diag = diag
- diag.set_target(lines[diag.absolute_target() - 1])
-
- for line_n, diag_s, diag_category, seen in diag_errors:
- if seen:
- continue
- # this is a diagnostic expected but not seen
- assert lines[line_n - 1].diag
- if diag_s != lines[line_n - 1].diag.diag_content:
- raise KnownException(
- f"{filename}:{line_n} - found diag {lines[line_n - 1].diag.diag_content} but expected {diag_s}"
- )
- if diag_category != lines[line_n - 1].diag.category:
- raise KnownException(
- f"{filename}:{line_n} - found {lines[line_n - 1].diag.category} diag but expected {diag_category}"
- )
- lines[line_n - 1].diag.decrement_count()
- diag_errors_left = []
- diag_errors.sort(reverse=True, key=lambda t: t[0])
- for line_n, diag_s, diag_category, seen in diag_errors:
- if not seen:
- continue
- target = orig_lines[line_n - 1]
- other_diags = [
- d
- for d in target.targeting_diags
- if d.diag_content == diag_s and d.category == diag_category
- ]
- other_diag = other_diags[0] if other_diags else None
- if other_diag:
- other_diag.increment_count()
- else:
- add_diag(line_n, diag_s, diag_category, lines, orig_lines, prefix)
- remove_dead_diags(lines)
- has_diags = has_live_diags(lines)
- with open(filename, "w") as f:
- if not has_diags and expected_no_diags_line_n is None:
- f.write("// expected-no-diagnostics\n")
- for line in lines:
- if has_diags and line.line_n == expected_no_diags_line_n:
- continue
- f.write(line.render())
-
-
-def update_test_files(errors, prefix):
- errors_by_file = {}
- for (filename, line, diag_s), (diag_category, seen) in errors:
- if filename not in errors_by_file:
- errors_by_file[filename] = []
- errors_by_file[filename].append((line, diag_s, diag_category, seen))
- updated_test_files = set()
- for filename, diag_errors in errors_by_file.items():
- try:
- update_test_file(filename, diag_errors, prefix, updated_test_files)
- except KnownException as e:
- return f"Error in update-verify-tests while updating {filename}: {e}"
- updated_files = list(updated_test_files)
- assert updated_files
- if len(updated_files) == 1:
- return f"updated file {updated_files[0]}"
- updated_files_s = "\n\t".join(updated_files)
- return "updated files:\n\t{updated_files_s}"
-
-
-def check_expectations(tool_output, prefix):
- """
- The entry point function.
- Called by the stand-alone update-verify-tests.py as well as litplugin.py.
- """
- curr = []
- curr_category = None
- try:
- for line in tool_output:
- if line.startswith("error: "):
- curr_category = parse_error_category(line[len("error: ") :], prefix)
- continue
-
- diag_error = parse_diag_error(line.strip())
- if diag_error:
- curr.append((diag_error, curr_category))
- else:
- dprint("no match")
- dprint(line.strip())
- except KnownException as e:
- return f"Error in update-verify-tests while parsing tool output: {e}"
- if curr:
- return update_test_files(curr, prefix)
- else:
- return "no mismatching diagnostics found"
diff --git a/clang/utils/update-verify-tests.py b/clang/utils/update-verify-tests.py
deleted file mode 100644
index e2874a8c049e..000000000000
--- a/clang/utils/update-verify-tests.py
+++ /dev/null
@@ -1,38 +0,0 @@
-import sys
-import argparse
-from UpdateVerifyTests.core import check_expectations
-
-"""
- Pipe output from clang's -verify into this script to have the test case updated to expect the actual diagnostic output.
- When inserting new expected-* checks it will place them on the line before the location of the diagnostic, with an @+1,
- or @+N for some N if there are multiple diagnostics emitted on the same line. If the current checks are using @-N for
- this line, the new check will follow that convention also.
- Existing checks will be left untouched as much as possible, including their location and whitespace content, to minimize
- diffs. If inaccurate their count will be updated, or the check removed entirely.
-
- Missing features:
- - multiple prefixes on the same line (-verify=my-prefix,my-other-prefix)
- - multiple prefixes on separate RUN lines (RUN: -verify=my-prefix\nRUN: -verify my-other-prefix)
- - regexes with expected-*-re: existing ones will be left untouched if accurate, but the script will abort if there are any
- diagnostic mismatches on the same line.
- - multiple checks targeting the same line are supported, but a line may only contain one check
- - if multiple checks targeting the same line are failing the script is not guaranteed to produce a minimal diff
-
-Example usage:
- clang -verify [file] | python3 update-verify-tests.py
- clang -verify=check [file] | python3 update-verify-tests.py --prefix check
-"""
-
-
-def main():
- parser = argparse.ArgumentParser(description=__doc__)
- parser.add_argument(
- "--prefix", default="expected", help="The prefix passed to -verify"
- )
- args = parser.parse_args()
- output = check_expectations(sys.stdin.readlines(), args.prefix)
- print(output)
-
-
-if __name__ == "__main__":
- main()
diff --git a/clang/www/c_status.html b/clang/www/c_status.html
index 148405ec31a9..e5da7f3c87a5 100644
--- a/clang/www/c_status.html
+++ b/clang/www/c_status.html
@@ -697,7 +697,7 @@ conformance.</p>
<tr>
<td>Enhanced enumerations</td>
<td><a href="https://www.open-std.org/jtc1/sc22/wg14/www/docs/n3030.htm">N3030</a></td>
- <td class="unknown" align="center">Unknown</td>
+ <td class="unreleased" align="center">Clang 20</td>
</tr>
<tr>
<td>Freestanding C and IEC 60559 conformance scope reduction</td>
diff --git a/compiler-rt/lib/asan/asan_rtl.cpp b/compiler-rt/lib/asan/asan_rtl.cpp
index a390802af28d..19c6c210b564 100644
--- a/compiler-rt/lib/asan/asan_rtl.cpp
+++ b/compiler-rt/lib/asan/asan_rtl.cpp
@@ -478,9 +478,6 @@ static bool AsanInitInternal() {
if (flags()->start_deactivated)
AsanDeactivate();
- // interceptors
- InitTlsSize();
-
// Create main thread.
AsanThread *main_thread = CreateMainThread();
CHECK_EQ(0, main_thread->tid());
diff --git a/compiler-rt/lib/dfsan/dfsan.cpp b/compiler-rt/lib/dfsan/dfsan.cpp
index 1972a07d15ac..886e93e5fa81 100644
--- a/compiler-rt/lib/dfsan/dfsan.cpp
+++ b/compiler-rt/lib/dfsan/dfsan.cpp
@@ -1262,6 +1262,8 @@ static void DFsanInit(int argc, char **argv, char **envp) {
CheckASLR();
+ InitializePlatformEarly();
+
if (!InitShadowWithReExec(dfsan_get_track_origins())) {
Printf("FATAL: DataflowSanitizer can not mmap the shadow memory.\n");
DumpProcessMap();
diff --git a/compiler-rt/lib/hwasan/hwasan.cpp b/compiler-rt/lib/hwasan/hwasan.cpp
index ccdc0b4bc21b..24384d8b4d2c 100644
--- a/compiler-rt/lib/hwasan/hwasan.cpp
+++ b/compiler-rt/lib/hwasan/hwasan.cpp
@@ -357,8 +357,6 @@ __attribute__((constructor(0))) void __hwasan_init() {
hwasan_init_is_running = 1;
SanitizerToolName = "HWAddressSanitizer";
- InitTlsSize();
-
CacheBinaryName();
InitializeFlags();
@@ -367,6 +365,8 @@ __attribute__((constructor(0))) void __hwasan_init() {
__sanitizer_set_report_path(common_flags()->log_path);
+ InitializePlatformEarly();
+
AndroidTestTlsSlot();
DisableCoreDumperIfNecessary();
diff --git a/compiler-rt/lib/interception/interception_win.cpp b/compiler-rt/lib/interception/interception_win.cpp
index a638e66eccee..a0ff124a89c9 100644
--- a/compiler-rt/lib/interception/interception_win.cpp
+++ b/compiler-rt/lib/interception/interception_win.cpp
@@ -130,6 +130,7 @@
#include "sanitizer_common/sanitizer_platform.h"
#define WIN32_LEAN_AND_MEAN
#include <windows.h>
+#include <psapi.h>
namespace __interception {
@@ -385,7 +386,30 @@ void TestOnlyReleaseTrampolineRegions() {
}
}
-static uptr AllocateMemoryForTrampoline(uptr image_address, size_t size) {
+static uptr AllocateMemoryForTrampoline(uptr func_address, size_t size) {
+ uptr image_address = func_address;
+
+#if SANITIZER_WINDOWS64
+ // Allocate memory after the module (DLL or EXE file), but within 2GB
+ // of the start of the module so that any address within the module can be
+ // referenced with PC-relative operands.
+ // This allows us to not just jump to the trampoline with a PC-relative
+ // offset, but to relocate any instructions that we copy to the trampoline
+ // which have references to the original module. If we can't find the base
+ // address of the module (e.g. if func_address is in mmap'ed memory), just
+ // use func_address as is.
+ HMODULE module;
+ if (::GetModuleHandleExW(GET_MODULE_HANDLE_EX_FLAG_FROM_ADDRESS |
+ GET_MODULE_HANDLE_EX_FLAG_UNCHANGED_REFCOUNT,
+ (LPCWSTR)func_address, &module)) {
+ MODULEINFO module_info;
+ if (::GetModuleInformation(::GetCurrentProcess(), module,
+ &module_info, sizeof(module_info))) {
+ image_address = (uptr)module_info.lpBaseOfDll;
+ }
+ }
+#endif
+
// Find a region within 2G with enough space to allocate |size| bytes.
TrampolineMemoryRegion *region = nullptr;
for (size_t bucket = 0; bucket < kMaxTrampolineRegion; ++bucket) {
diff --git a/compiler-rt/lib/lsan/lsan.cpp b/compiler-rt/lib/lsan/lsan.cpp
index 7a27b600f203..798294b499e2 100644
--- a/compiler-rt/lib/lsan/lsan.cpp
+++ b/compiler-rt/lib/lsan/lsan.cpp
@@ -92,10 +92,10 @@ extern "C" void __lsan_init() {
CacheBinaryName();
AvoidCVE_2016_2143();
InitializeFlags();
+ InitializePlatformEarly();
InitCommonLsan();
InitializeAllocator();
ReplaceSystemMalloc();
- InitTlsSize();
InitializeInterceptors();
InitializeThreads();
InstallDeadlySignalHandlers(LsanOnDeadlySignal);
diff --git a/compiler-rt/lib/memprof/memprof_rtl.cpp b/compiler-rt/lib/memprof/memprof_rtl.cpp
index cf4bde808bfa..2cc6c2df5a6f 100644
--- a/compiler-rt/lib/memprof/memprof_rtl.cpp
+++ b/compiler-rt/lib/memprof/memprof_rtl.cpp
@@ -213,9 +213,6 @@ static void MemprofInitInternal() {
InitializeCoverage(common_flags()->coverage, common_flags()->coverage_dir);
- // interceptors
- InitTlsSize();
-
// Create main thread.
MemprofThread *main_thread = CreateMainThread();
CHECK_EQ(0, main_thread->tid());
diff --git a/compiler-rt/lib/msan/msan.cpp b/compiler-rt/lib/msan/msan.cpp
index 2ee05f43ec5e..6c27ab21eeeb 100644
--- a/compiler-rt/lib/msan/msan.cpp
+++ b/compiler-rt/lib/msan/msan.cpp
@@ -457,10 +457,11 @@ void __msan_init() {
__sanitizer_set_report_path(common_flags()->log_path);
+ InitializePlatformEarly();
+
InitializeInterceptors();
InstallAtForkHandler();
CheckASLR();
- InitTlsSize();
InstallDeadlySignalHandlers(MsanOnDeadlySignal);
InstallAtExitHandler(); // Needs __cxa_atexit interceptor.
diff --git a/compiler-rt/lib/rtsan/CMakeLists.txt b/compiler-rt/lib/rtsan/CMakeLists.txt
index 3f146a757a97..07a21b49eb45 100644
--- a/compiler-rt/lib/rtsan/CMakeLists.txt
+++ b/compiler-rt/lib/rtsan/CMakeLists.txt
@@ -29,6 +29,8 @@ set(RTSAN_LINK_LIBS
${COMPILER_RT_UNWINDER_LINK_LIBS}
${COMPILER_RT_CXX_LINK_LIBS})
+append_rtti_flag(OFF RTSAN_CFLAGS)
+
if(APPLE)
add_compiler_rt_object_libraries(RTRtsan
OS ${SANITIZER_COMMON_SUPPORTED_OS}
diff --git a/compiler-rt/lib/rtsan/rtsan_context.cpp b/compiler-rt/lib/rtsan/rtsan_context.cpp
index 8609394fa222..e69fb259798d 100644
--- a/compiler-rt/lib/rtsan/rtsan_context.cpp
+++ b/compiler-rt/lib/rtsan/rtsan_context.cpp
@@ -62,7 +62,7 @@ static __rtsan::Context &GetContextForThisThreadImpl() {
Until then, and to keep the first PRs small, only the exit mode
is available.
*/
-static void InvokeViolationDetectedAction() { exit(EXIT_FAILURE); }
+static void InvokeViolationDetectedAction() { Die(); }
__rtsan::Context::Context() = default;
diff --git a/compiler-rt/lib/rtsan/rtsan_flags.cpp b/compiler-rt/lib/rtsan/rtsan_flags.cpp
index beab2a2fc5d8..9c90d23d7426 100644
--- a/compiler-rt/lib/rtsan/rtsan_flags.cpp
+++ b/compiler-rt/lib/rtsan/rtsan_flags.cpp
@@ -35,6 +35,7 @@ void __rtsan::InitializeFlags() {
{
CommonFlags cf;
cf.CopyFrom(*common_flags());
+ cf.exitcode = 43;
cf.external_symbolizer_path = GetEnv("RTSAN_SYMBOLIZER_PATH");
OverrideCommonFlags(cf);
}
diff --git a/compiler-rt/lib/rtsan/tests/rtsan_test_main.cpp b/compiler-rt/lib/rtsan/tests/rtsan_test_main.cpp
index 255ac9497103..50c726e09f28 100644
--- a/compiler-rt/lib/rtsan/tests/rtsan_test_main.cpp
+++ b/compiler-rt/lib/rtsan/tests/rtsan_test_main.cpp
@@ -8,8 +8,25 @@
//
//===----------------------------------------------------------------------===//
+#include "sanitizer_common/sanitizer_platform.h"
#include "sanitizer_test_utils.h"
+// Default RTSAN_OPTIONS for the unit tests.
+extern "C" const char *__rtsan_default_options() {
+#if SANITIZER_APPLE
+ // On Darwin, we default to `abort_on_error=1`, which would make tests run
+ // much slower. Let's override this and run lit tests with 'abort_on_error=0'
+ // and make sure we do not overwhelm the syslog while testing. Also, let's
+ // turn symbolization off to speed up testing, especially when not running
+ // with llvm-symbolizer but with atos.
+ return "symbolize=false:abort_on_error=0:log_to_syslog=0";
+#else
+ // Let's turn symbolization off to speed up testing (more than 3 times speedup
+ // observed).
+ return "symbolize=false";
+#endif
+}
+
int main(int argc, char **argv) {
testing::GTEST_FLAG(death_test_style) = "threadsafe";
testing::InitGoogleTest(&argc, argv);
diff --git a/compiler-rt/lib/rtsan/tests/rtsan_test_utilities.h b/compiler-rt/lib/rtsan/tests/rtsan_test_utilities.h
index 4ba4fc5e5308..f0cf90e057e3 100644
--- a/compiler-rt/lib/rtsan/tests/rtsan_test_utilities.h
+++ b/compiler-rt/lib/rtsan/tests/rtsan_test_utilities.h
@@ -37,8 +37,8 @@ void ExpectRealtimeDeath(Function &&Func,
: "";
};
- EXPECT_EXIT(RealtimeInvoke(std::forward<Function>(Func)),
- ExitedWithCode(EXIT_FAILURE), GetExpectedErrorSubstring());
+ EXPECT_EXIT(RealtimeInvoke(std::forward<Function>(Func)), ExitedWithCode(43),
+ GetExpectedErrorSubstring());
}
template <typename Function> void ExpectNonRealtimeSurvival(Function &&Func) {
diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_common_nolibc.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_common_nolibc.cpp
index 7d88575160c6..e49285f22dff 100644
--- a/compiler-rt/lib/sanitizer_common/sanitizer_common_nolibc.cpp
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_common_nolibc.cpp
@@ -22,6 +22,7 @@ namespace __sanitizer {
#if !SANITIZER_WINDOWS
# if SANITIZER_LINUX
void LogMessageOnPrintf(const char *str) {}
+void InitTlsSize() {}
# endif
void WriteToSyslog(const char *buffer) {}
void Abort() { internal__exit(1); }
diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_fuchsia.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_fuchsia.cpp
index a67b2a8725ec..75dcf546729f 100644
--- a/compiler-rt/lib/sanitizer_common/sanitizer_fuchsia.cpp
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_fuchsia.cpp
@@ -94,7 +94,6 @@ void DisableCoreDumperIfNecessary() {}
void InstallDeadlySignalHandlers(SignalHandlerType handler) {}
void SetAlternateSignalStack() {}
void UnsetAlternateSignalStack() {}
-void InitTlsSize() {}
bool SignalContext::IsStackOverflow() const { return false; }
void SignalContext::DumpAllRegisters(void *context) { UNIMPLEMENTED(); }
diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_linux.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_linux.cpp
index 6359f4348e3c..1c637d109649 100644
--- a/compiler-rt/lib/sanitizer_common/sanitizer_linux.cpp
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_linux.cpp
@@ -2672,9 +2672,7 @@ static void GetPcSpBp(void *context, uptr *pc, uptr *sp, uptr *bp) {
void SignalContext::InitPcSpBp() { GetPcSpBp(context, &pc, &sp, &bp); }
-void InitializePlatformEarly() {
- // Do nothing.
-}
+void InitializePlatformEarly() { InitTlsSize(); }
void CheckASLR() {
# if SANITIZER_NETBSD
diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_linux_libcdep.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_linux_libcdep.cpp
index 4ecd47388c1d..93158b0d41aa 100644
--- a/compiler-rt/lib/sanitizer_common/sanitizer_linux_libcdep.cpp
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_linux_libcdep.cpp
@@ -209,7 +209,6 @@ bool SetEnv(const char *name, const char *value) {
__attribute__((unused)) static int g_use_dlpi_tls_data;
# if SANITIZER_GLIBC && !SANITIZER_GO
-
static void GetGLibcVersion(int *major, int *minor, int *patch) {
const char *p = gnu_get_libc_version();
*major = internal_simple_strtoll(p, &p, 10);
@@ -218,23 +217,6 @@ static void GetGLibcVersion(int *major, int *minor, int *patch) {
*minor = (*p == '.') ? internal_simple_strtoll(p + 1, &p, 10) : 0;
*patch = (*p == '.') ? internal_simple_strtoll(p + 1, &p, 10) : 0;
}
-
-__attribute__((unused)) static size_t g_tls_size;
-
-void InitTlsSize() {
- int major, minor, patch;
- GetGLibcVersion(&major, &minor, &patch);
- g_use_dlpi_tls_data = major == 2 && minor >= 25;
-
-# if defined(__aarch64__) || defined(__x86_64__) || \
- defined(__powerpc64__) || defined(__loongarch__)
- void *get_tls_static_info = dlsym(RTLD_DEFAULT, "_dl_get_tls_static_info");
- size_t tls_align;
- ((void (*)(size_t *, size_t *))get_tls_static_info)(&g_tls_size, &tls_align);
-# endif
-}
-# else
-void InitTlsSize() {}
# endif // SANITIZER_GLIBC && !SANITIZER_GO
// On glibc x86_64, ThreadDescriptorSize() needs to be precise due to the usage
@@ -242,7 +224,7 @@ void InitTlsSize() {}
// to get the pointer to thread-specific data keys in the thread control block.
# if (SANITIZER_FREEBSD || SANITIZER_GLIBC) && !SANITIZER_GO
// sizeof(struct pthread) from glibc.
-static atomic_uintptr_t thread_descriptor_size;
+static uptr thread_descriptor_size;
// FIXME: Implementation is very GLIBC specific, but it's used by FREEBSD.
static uptr ThreadDescriptorSizeFallback() {
@@ -323,19 +305,41 @@ static uptr ThreadDescriptorSizeFallback() {
# endif
}
-uptr ThreadDescriptorSize() {
- uptr val = atomic_load_relaxed(&thread_descriptor_size);
- if (val)
- return val;
- // _thread_db_sizeof_pthread is a GLIBC_PRIVATE symbol that is exported in
- // glibc 2.34 and later.
- if (unsigned *psizeof = static_cast<unsigned *>(
- dlsym(RTLD_DEFAULT, "_thread_db_sizeof_pthread")))
- val = *psizeof;
- if (!val)
- val = ThreadDescriptorSizeFallback();
- atomic_store_relaxed(&thread_descriptor_size, val);
- return val;
+uptr ThreadDescriptorSize() { return thread_descriptor_size; }
+
+# if SANITIZER_GLIBC
+__attribute__((unused)) static size_t g_tls_size;
+# endif
+
+void InitTlsSize() {
+# if SANITIZER_GLIBC
+ int major, minor, patch;
+ GetGLibcVersion(&major, &minor, &patch);
+ g_use_dlpi_tls_data = major == 2 && minor >= 25;
+
+ if (major == 2 && minor >= 34) {
+ // _thread_db_sizeof_pthread is a GLIBC_PRIVATE symbol that is exported in
+ // glibc 2.34 and later.
+ if (unsigned *psizeof = static_cast<unsigned *>(
+ dlsym(RTLD_DEFAULT, "_thread_db_sizeof_pthread"))) {
+ thread_descriptor_size = *psizeof;
+ }
+ }
+
+# if defined(__aarch64__) || defined(__x86_64__) || \
+ defined(__powerpc64__) || defined(__loongarch__)
+ auto *get_tls_static_info = (void (*)(size_t *, size_t *))dlsym(
+ RTLD_DEFAULT, "_dl_get_tls_static_info");
+ size_t tls_align;
+ // Can be null if static link.
+ if (get_tls_static_info)
+ get_tls_static_info(&g_tls_size, &tls_align);
+# endif
+
+# endif // SANITIZER_GLIBC
+
+ if (!thread_descriptor_size)
+ thread_descriptor_size = ThreadDescriptorSizeFallback();
}
# if defined(__mips__) || defined(__powerpc64__) || SANITIZER_RISCV64 || \
@@ -358,8 +362,9 @@ static uptr TlsPreTcbSize() {
return kTlsPreTcbSize;
}
# endif
-
-# endif
+# else // (SANITIZER_FREEBSD || SANITIZER_GLIBC) && !SANITIZER_GO
+void InitTlsSize() {}
+# endif // (SANITIZER_FREEBSD || SANITIZER_GLIBC) && !SANITIZER_GO
# if (SANITIZER_FREEBSD || SANITIZER_LINUX || SANITIZER_SOLARIS) && \
!SANITIZER_ANDROID && !SANITIZER_GO
diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_mac.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_mac.cpp
index 2a36104e6f9f..26d2e8d4ed76 100644
--- a/compiler-rt/lib/sanitizer_common/sanitizer_mac.cpp
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_mac.cpp
@@ -545,9 +545,6 @@ uptr GetTlsSize() {
return 0;
}
-void InitTlsSize() {
-}
-
uptr TlsBaseAddr() {
uptr segbase = 0;
#if defined(__x86_64__)
diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_tls_get_addr.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_tls_get_addr.cpp
index a17a14882d0e..6f3b6af3c584 100644
--- a/compiler-rt/lib/sanitizer_common/sanitizer_tls_get_addr.cpp
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_tls_get_addr.cpp
@@ -14,6 +14,8 @@
#include "sanitizer_allocator_interface.h"
#include "sanitizer_atomic.h"
+#include "sanitizer_common/sanitizer_common.h"
+#include "sanitizer_common/sanitizer_internal_defs.h"
#include "sanitizer_flags.h"
#include "sanitizer_platform_interceptors.h"
@@ -115,11 +117,14 @@ SANITIZER_INTERFACE_WEAK_DEF(uptr, __sanitizer_get_dtls_size,
const void *start = __sanitizer_get_allocated_begin(tls_begin);
if (!start)
return 0;
- CHECK_EQ(start, tls_begin);
+ CHECK_LE(start, tls_begin);
uptr tls_size = __sanitizer_get_allocated_size(start);
VReport(2, "__tls_get_addr: glibc DTLS suspected; tls={%p,0x%zx}\n",
tls_begin, tls_size);
- return tls_size;
+ uptr offset =
+ (reinterpret_cast<uptr>(tls_begin) - reinterpret_cast<uptr>(start));
+ CHECK_LE(offset, tls_size);
+ return tls_size - offset;
}
DTLS::DTV *DTLS_on_tls_get_addr(void *arg_void, void *res,
diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_win.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_win.cpp
index 2c8f8343519e..7cee57131486 100644
--- a/compiler-rt/lib/sanitizer_common/sanitizer_win.cpp
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_win.cpp
@@ -873,9 +873,6 @@ uptr GetTlsSize() {
return 0;
}
-void InitTlsSize() {
-}
-
void GetThreadStackAndTls(bool main, uptr *stk_begin, uptr *stk_end,
uptr *tls_begin, uptr *tls_end) {
# if SANITIZER_GO
diff --git a/compiler-rt/lib/sanitizer_common/tests/sanitizer_linux_test.cpp b/compiler-rt/lib/sanitizer_common/tests/sanitizer_linux_test.cpp
index 338c4d3bab2b..b286ab72a5c7 100644
--- a/compiler-rt/lib/sanitizer_common/tests/sanitizer_linux_test.cpp
+++ b/compiler-rt/lib/sanitizer_common/tests/sanitizer_linux_test.cpp
@@ -202,6 +202,8 @@ TEST(SanitizerLinux, ThreadDescriptorSize) {
void *result;
ASSERT_EQ(0, pthread_create(&tid, 0, thread_descriptor_size_test_func, 0));
ASSERT_EQ(0, pthread_join(tid, &result));
+ EXPECT_EQ(0u, ThreadDescriptorSize());
+ InitTlsSize();
EXPECT_EQ((uptr)result, ThreadDescriptorSize());
}
# endif
diff --git a/compiler-rt/lib/tsan/rtl/tsan_platform_linux.cpp b/compiler-rt/lib/tsan/rtl/tsan_platform_linux.cpp
index 621c679a05db..3e08a1bece98 100644
--- a/compiler-rt/lib/tsan/rtl/tsan_platform_linux.cpp
+++ b/compiler-rt/lib/tsan/rtl/tsan_platform_linux.cpp
@@ -418,7 +418,6 @@ void InitializePlatform() {
Die();
}
- InitTlsSize();
#endif // !SANITIZER_GO
}
diff --git a/compiler-rt/lib/ubsan/ubsan_init.cpp b/compiler-rt/lib/ubsan/ubsan_init.cpp
index 5802d58896f0..aea7ca00e3cb 100644
--- a/compiler-rt/lib/ubsan/ubsan_init.cpp
+++ b/compiler-rt/lib/ubsan/ubsan_init.cpp
@@ -43,8 +43,8 @@ static void CommonStandaloneInit() {
SanitizerToolName = GetSanititizerToolName();
CacheBinaryName();
InitializeFlags();
- __sanitizer::InitializePlatformEarly();
__sanitizer_set_report_path(common_flags()->log_path);
+ __sanitizer::InitializePlatformEarly();
AndroidLogInit();
InitializeCoverage(common_flags()->coverage, common_flags()->coverage_dir);
CommonInit();
diff --git a/compiler-rt/test/rtsan/basic.cpp b/compiler-rt/test/rtsan/basic.cpp
index 607db90213a3..4edf32336720 100644
--- a/compiler-rt/test/rtsan/basic.cpp
+++ b/compiler-rt/test/rtsan/basic.cpp
@@ -1,5 +1,4 @@
// RUN: %clangxx -fsanitize=realtime %s -o %t
-// RUN: %clang -fsanitize=realtime %s -o %t
// RUN: not %run %t 2>&1 | FileCheck %s
// UNSUPPORTED: ios
diff --git a/compiler-rt/test/rtsan/lit.cfg.py b/compiler-rt/test/rtsan/lit.cfg.py
index b262ecfa7fb4..7c75515a7608 100644
--- a/compiler-rt/test/rtsan/lit.cfg.py
+++ b/compiler-rt/test/rtsan/lit.cfg.py
@@ -3,6 +3,22 @@ import os
# Setup config name.
config.name = "RTSAN" + config.name_suffix
+
+default_rtsan_opts = "atexit_sleep_ms=0"
+
+if config.host_os == "Darwin":
+ # On Darwin, we default to `abort_on_error=1`, which would make tests run
+ # much slower. Let's override this and run lit tests with 'abort_on_error=0'.
+ default_rtsan_opts += ":abort_on_error=0"
+
+if default_rtsan_opts:
+ config.environment["RTSAN_OPTIONS"] = default_rtsan_opts
+ default_rtsan_opts += ":"
+
+config.substitutions.append(
+ ("%env_rtsan_opts=", "env RTSAN_OPTIONS=" + default_rtsan_opts)
+)
+
# Setup source root.
config.test_source_root = os.path.dirname(__file__)
diff --git a/compiler-rt/test/rtsan/sanity_check_pure_c.c b/compiler-rt/test/rtsan/sanity_check_pure_c.c
new file mode 100644
index 000000000000..bdca6039d932
--- /dev/null
+++ b/compiler-rt/test/rtsan/sanity_check_pure_c.c
@@ -0,0 +1,28 @@
+// RUN: %clang -fsanitize=realtime %s -o %t
+// RUN: not %run %t 2>&1 | FileCheck %s
+// RUN: %clang %s -o %t
+// RUN: %run %t 2>&1 | FileCheck %s --check-prefix=CHECK-NO-SANITIZE
+#ifdef __cplusplus
+# error "This test must be built in C mode"
+#endif
+
+#include <stdio.h>
+#include <stdlib.h>
+
+// Check that we can build and run C code.
+
+void nonblocking_function(void) __attribute__((nonblocking));
+
+void nonblocking_function(void) __attribute__((nonblocking)) {
+ void *ptr = malloc(2);
+ printf("ptr: %p\n", ptr); // ensure we don't optimize out the malloc
+}
+
+int main() {
+ nonblocking_function();
+ printf("Done\n");
+ return 0;
+}
+
+// CHECK: ==ERROR: RealtimeSanitizer
+// CHECK-NO-SANITIZE: Done
diff --git a/flang/include/flang/Evaluate/traverse.h b/flang/include/flang/Evaluate/traverse.h
index 7f4a67d97e64..90b93f6afd35 100644
--- a/flang/include/flang/Evaluate/traverse.h
+++ b/flang/include/flang/Evaluate/traverse.h
@@ -217,7 +217,7 @@ public:
return CombineContents(x);
}
Result operator()(const semantics::DerivedTypeSpec &x) const {
- return Combine(x.typeSymbol(), x.parameters());
+ return Combine(x.originalTypeSymbol(), x.parameters());
}
Result operator()(const StructureConstructorValues::value_type &x) const {
return visitor_(x.second);
diff --git a/flang/include/flang/Semantics/type.h b/flang/include/flang/Semantics/type.h
index e2d47d38f927..e2131e7e160c 100644
--- a/flang/include/flang/Semantics/type.h
+++ b/flang/include/flang/Semantics/type.h
@@ -259,6 +259,7 @@ public:
DerivedTypeSpec(DerivedTypeSpec &&);
const SourceName &name() const { return name_; }
+ const Symbol &originalTypeSymbol() const { return originalTypeSymbol_; }
const Symbol &typeSymbol() const { return typeSymbol_; }
const Scope *scope() const { return scope_; }
// Return scope_ if it is set, or the typeSymbol_ scope otherwise.
@@ -319,7 +320,8 @@ public:
private:
SourceName name_;
- const Symbol &typeSymbol_;
+ const Symbol &originalTypeSymbol_;
+ const Symbol &typeSymbol_; // == originalTypeSymbol_.GetUltimate()
const Scope *scope_{nullptr}; // same as typeSymbol_.scope() unless PDT
bool cooked_{false};
bool evaluated_{false};
@@ -328,8 +330,9 @@ private:
ParameterMapType parameters_;
Category category_{Category::DerivedType};
bool RawEquals(const DerivedTypeSpec &that) const {
- return &typeSymbol_ == &that.typeSymbol_ && cooked_ == that.cooked_ &&
- rawParameters_ == that.rawParameters_;
+ return &typeSymbol_ == &that.typeSymbol_ &&
+ &originalTypeSymbol_ == &that.originalTypeSymbol_ &&
+ cooked_ == that.cooked_ && rawParameters_ == that.rawParameters_;
}
friend llvm::raw_ostream &operator<<(
llvm::raw_ostream &, const DerivedTypeSpec &);
diff --git a/flang/lib/Evaluate/characteristics.cpp b/flang/lib/Evaluate/characteristics.cpp
index 70e24d6e82eb..2496e4427fe7 100644
--- a/flang/lib/Evaluate/characteristics.cpp
+++ b/flang/lib/Evaluate/characteristics.cpp
@@ -66,8 +66,9 @@ bool ShapesAreCompatible(const std::optional<Shape> &x,
}
bool TypeAndShape::operator==(const TypeAndShape &that) const {
- return type_ == that.type_ && ShapesAreCompatible(shape_, that.shape_) &&
- attrs_ == that.attrs_ && corank_ == that.corank_;
+ return type_.IsEquivalentTo(that.type_) &&
+ ShapesAreCompatible(shape_, that.shape_) && attrs_ == that.attrs_ &&
+ corank_ == that.corank_;
}
TypeAndShape &TypeAndShape::Rewrite(FoldingContext &context) {
diff --git a/flang/lib/Frontend/FrontendActions.cpp b/flang/lib/Frontend/FrontendActions.cpp
index cda82bcb7ecc..267c3ceb44f3 100644
--- a/flang/lib/Frontend/FrontendActions.cpp
+++ b/flang/lib/Frontend/FrontendActions.cpp
@@ -425,7 +425,7 @@ void PrintPreprocessedAction::executeAction() {
// If a pre-defined output stream exists, dump the preprocessed content there
if (!ci.isOutputStreamNull()) {
// Send the output to the pre-defined output buffer.
- ci.writeOutputStream(outForPP.str());
+ ci.writeOutputStream(buf);
return;
}
@@ -436,7 +436,7 @@ void PrintPreprocessedAction::executeAction() {
return;
}
- (*os) << outForPP.str();
+ (*os) << buf;
}
void DebugDumpProvenanceAction::executeAction() {
@@ -756,7 +756,7 @@ getRISCVVScaleRange(CompilerInstance &ci) {
outputErrMsg << errMsg.getMessage();
});
ci.getDiagnostics().Report(clang::diag::err_invalid_feature_combination)
- << outputErrMsg.str();
+ << buffer;
return std::nullopt;
}
@@ -1091,8 +1091,7 @@ public:
msgStream << diagInfo.getMsg();
// Emit message.
- diags.Report(diagID) << clang::AddFlagValue(diagInfo.getPassName())
- << msgStream.str();
+ diags.Report(diagID) << clang::AddFlagValue(diagInfo.getPassName()) << msg;
}
void optimizationRemarkHandler(
diff --git a/flang/lib/Frontend/TextDiagnosticPrinter.cpp b/flang/lib/Frontend/TextDiagnosticPrinter.cpp
index 8b00fb69b3ce..dc182d68a1a9 100644
--- a/flang/lib/Frontend/TextDiagnosticPrinter.cpp
+++ b/flang/lib/Frontend/TextDiagnosticPrinter.cpp
@@ -45,7 +45,7 @@ static void printRemarkOption(llvm::raw_ostream &os,
// warning could be printed i.e. [-Wunknown-warning-option]
os << " [" << (level == clang::DiagnosticsEngine::Remark ? "-R" : "-W")
<< opt;
- llvm::StringRef optValue = info.getDiags()->getFlagValue();
+ llvm::StringRef optValue = info.getFlagValue();
if (!optValue.empty())
os << "=" << optValue;
os << ']';
diff --git a/flang/lib/Optimizer/Dialect/FIRType.cpp b/flang/lib/Optimizer/Dialect/FIRType.cpp
index 05f644654efe..7a516298e5ef 100644
--- a/flang/lib/Optimizer/Dialect/FIRType.cpp
+++ b/flang/lib/Optimizer/Dialect/FIRType.cpp
@@ -533,9 +533,8 @@ int getTypeCode(mlir::Type ty, const fir::KindMapping &kindMap) {
std::string getTypeAsString(mlir::Type ty, const fir::KindMapping &kindMap,
llvm::StringRef prefix) {
- std::string buf;
+ std::string buf = prefix.str();
llvm::raw_string_ostream name{buf};
- name << prefix.str();
if (!prefix.empty())
name << "_";
while (ty) {
@@ -606,7 +605,7 @@ std::string getTypeAsString(mlir::Type ty, const fir::KindMapping &kindMap,
llvm::report_fatal_error("unsupported type");
}
}
- return name.str();
+ return buf;
}
mlir::Type changeElementType(mlir::Type type, mlir::Type newElementType,
diff --git a/flang/lib/Optimizer/Transforms/DebugTypeGenerator.cpp b/flang/lib/Optimizer/Transforms/DebugTypeGenerator.cpp
index 82c6a6618e0e..1390fae062b9 100644
--- a/flang/lib/Optimizer/Transforms/DebugTypeGenerator.cpp
+++ b/flang/lib/Optimizer/Transforms/DebugTypeGenerator.cpp
@@ -271,6 +271,7 @@ mlir::LLVM::DITypeAttr DebugTypeGenerator::convertCharacterType(
uint64_t sizeInBits = 0;
mlir::LLVM::DIExpressionAttr lenExpr = nullptr;
mlir::LLVM::DIExpressionAttr locExpr = nullptr;
+ mlir::LLVM::DIVariableAttr varAttr = nullptr;
if (hasDescriptor) {
llvm::SmallVector<mlir::LLVM::DIExpressionElemAttr> ops;
@@ -289,7 +290,29 @@ mlir::LLVM::DITypeAttr DebugTypeGenerator::convertCharacterType(
sizeInBits =
charTy.getLen() * kindMapping.getCharacterBitsize(charTy.getFKind());
} else {
- return genPlaceholderType(context);
+ // In assumed length string, the len of the character is not part of the
+ // type but can be found at the runtime. Here we create an artificial
+ // variable that will contain that length. This variable is used as
+ // 'stringLength' in DIStringTypeAttr.
+ if (declOp && !declOp.getTypeparams().empty()) {
+ mlir::Operation *op = declOp.getTypeparams()[0].getDefiningOp();
+ if (auto unbox = mlir::dyn_cast_or_null<fir::UnboxCharOp>(op)) {
+ auto name =
+ mlir::StringAttr::get(context, "." + declOp.getUniqName().str());
+ mlir::OpBuilder builder(context);
+ builder.setInsertionPoint(declOp);
+ mlir::Type i64Ty = builder.getIntegerType(64);
+ auto convOp = builder.create<fir::ConvertOp>(unbox.getLoc(), i64Ty,
+ unbox.getResult(1));
+ mlir::LLVM::DITypeAttr Ty = convertType(i64Ty, fileAttr, scope, declOp);
+ auto lvAttr = mlir::LLVM::DILocalVariableAttr::get(
+ context, scope, name, fileAttr, /*line=*/0, /*argNo=*/0,
+ /*alignInBits=*/0, Ty, mlir::LLVM::DIFlags::Artificial);
+ builder.create<mlir::LLVM::DbgValueOp>(convOp.getLoc(), convOp, lvAttr,
+ nullptr);
+ varAttr = mlir::cast<mlir::LLVM::DIVariableAttr>(lvAttr);
+ }
+ }
}
// FIXME: Currently the DIStringType in llvm does not have the option to set
@@ -299,7 +322,7 @@ mlir::LLVM::DITypeAttr DebugTypeGenerator::convertCharacterType(
return mlir::LLVM::DIStringTypeAttr::get(
context, llvm::dwarf::DW_TAG_string_type,
mlir::StringAttr::get(context, ""), sizeInBits, /*alignInBits=*/0,
- /*stringLength=*/nullptr, lenExpr, locExpr, encoding);
+ /*stringLength=*/varAttr, lenExpr, locExpr, encoding);
}
mlir::LLVM::DITypeAttr DebugTypeGenerator::convertPointerLikeType(
diff --git a/flang/lib/Parser/parsing.cpp b/flang/lib/Parser/parsing.cpp
index 43a898ff120c..37dc113436aa 100644
--- a/flang/lib/Parser/parsing.cpp
+++ b/flang/lib/Parser/parsing.cpp
@@ -42,9 +42,9 @@ const SourceFile *Parsing::Prescan(const std::string &path, Options options) {
sourceFile =
allSources.Open(path, fileError, "."s /*prepend to search path*/);
}
- if (!fileError.str().empty()) {
+ if (!buf.empty()) {
ProvenanceRange range{allSources.AddCompilerInsertion(path)};
- messages_.Say(range, "%s"_err_en_US, fileError.str());
+ messages_.Say(range, "%s"_err_en_US, buf);
return sourceFile;
}
CHECK(sourceFile);
diff --git a/flang/lib/Semantics/check-declarations.cpp b/flang/lib/Semantics/check-declarations.cpp
index b852fbf12a6e..dfd49db74eea 100644
--- a/flang/lib/Semantics/check-declarations.cpp
+++ b/flang/lib/Semantics/check-declarations.cpp
@@ -2519,7 +2519,7 @@ void CheckHelper::CheckProcBinding(
? "A NOPASS type-bound procedure may not override a passed-argument procedure"_err_en_US
: "A passed-argument type-bound procedure may not override a NOPASS procedure"_err_en_US);
} else {
- const auto *bindingChars{Characterize(binding.symbol())};
+ const auto *bindingChars{Characterize(symbol)};
const auto *overriddenChars{Characterize(*overridden)};
if (bindingChars && overriddenChars) {
if (isNopass) {
diff --git a/flang/lib/Semantics/resolve-names.cpp b/flang/lib/Semantics/resolve-names.cpp
index b99f308e1c7f..5414787d85f7 100644
--- a/flang/lib/Semantics/resolve-names.cpp
+++ b/flang/lib/Semantics/resolve-names.cpp
@@ -3053,11 +3053,16 @@ void ModuleVisitor::DoAddUse(SourceName location, SourceName localName,
const Symbol &useUltimate{useSymbol.GetUltimate()};
const auto *useGeneric{useUltimate.detailsIf<GenericDetails>()};
if (localSymbol->has<UnknownDetails>()) {
- if (useGeneric && useGeneric->specific() &&
- IsProcedurePointer(*useGeneric->specific())) {
- // We are use-associating a generic that shadows a procedure pointer.
- // Local references that might be made to that procedure pointer should
- // use a UseDetails symbol for proper data addressing. So create an
+ if (useGeneric &&
+ ((useGeneric->specific() &&
+ IsProcedurePointer(*useGeneric->specific())) ||
+ (useGeneric->derivedType() &&
+ useUltimate.name() != localSymbol->name()))) {
+ // We are use-associating a generic that either shadows a procedure
+ // pointer or shadows a derived type of the same name.
+ // Local references that might be made to the procedure pointer should
+ // use a UseDetails symbol for proper data addressing, and a derived
+ // type needs to be in scope with the renamed name. So create an
// empty local generic now into which the use-associated generic may
// be copied.
localSymbol->set_details(GenericDetails{});
@@ -3153,9 +3158,15 @@ void ModuleVisitor::DoAddUse(SourceName location, SourceName localName,
if (!useDerivedType) {
combinedDerivedType = localDerivedType;
} else if (!localDerivedType) {
- combinedDerivedType = useDerivedType;
+ if (useDerivedType->name() == localName) {
+ combinedDerivedType = useDerivedType;
+ } else {
+ Symbol &combined{currScope().MakeSymbol(localName,
+ useDerivedType->attrs(), UseDetails{localName, *useDerivedType})};
+ combinedDerivedType = &combined;
+ }
} else {
- const Scope *localScope{localDerivedType->scope()};
+ const Scope *localScope{localDerivedType->GetUltimate().scope()};
const Scope *useScope{useDerivedType->GetUltimate().scope()};
if (localScope && useScope && localScope->derivedTypeSpec() &&
useScope->derivedTypeSpec() &&
@@ -4351,15 +4362,18 @@ bool SubprogramVisitor::BeginSubprogram(const parser::Name &name,
Symbol::Flag subpFlag, bool hasModulePrefix,
const parser::LanguageBindingSpec *bindingSpec,
const ProgramTree::EntryStmtList *entryStmts) {
+ bool isValid{true};
if (hasModulePrefix && !currScope().IsModule() &&
!currScope().IsSubmodule()) { // C1547
Say(name,
"'%s' is a MODULE procedure which must be declared within a "
"MODULE or SUBMODULE"_err_en_US);
- return false;
+ // Don't return here because it can be useful to have the scope set for
+ // other semantic checks run before we print the errors
+ isValid = false;
}
Symbol *moduleInterface{nullptr};
- if (hasModulePrefix && !inInterfaceBlock()) {
+ if (isValid && hasModulePrefix && !inInterfaceBlock()) {
moduleInterface = FindSeparateModuleProcedureInterface(name);
if (moduleInterface && &moduleInterface->owner() == &currScope()) {
// Subprogram is MODULE FUNCTION or MODULE SUBROUTINE with an interface
@@ -6773,9 +6787,7 @@ std::optional<DerivedTypeSpec> DeclarationVisitor::ResolveDerivedType(
}
if (CheckUseError(name)) {
return std::nullopt;
- }
- symbol = &symbol->GetUltimate();
- if (symbol->has<DerivedTypeDetails>()) {
+ } else if (symbol->GetUltimate().has<DerivedTypeDetails>()) {
return DerivedTypeSpec{name.source, *symbol};
} else {
Say(name, "'%s' is not a derived type"_err_en_US);
@@ -7117,12 +7129,10 @@ bool ConstructVisitor::Pre(const parser::DataStmtValue &x) {
auto &mutableData{const_cast<parser::DataStmtConstant &>(data)};
if (auto *elem{parser::Unwrap<parser::ArrayElement>(mutableData)}) {
if (const auto *name{std::get_if<parser::Name>(&elem->base.u)}) {
- if (const Symbol * symbol{FindSymbol(*name)}) {
- const Symbol &ultimate{symbol->GetUltimate()};
- if (ultimate.has<DerivedTypeDetails>()) {
- mutableData.u = elem->ConvertToStructureConstructor(
- DerivedTypeSpec{name->source, ultimate});
- }
+ if (const Symbol * symbol{FindSymbol(*name)};
+ symbol && symbol->GetUltimate().has<DerivedTypeDetails>()) {
+ mutableData.u = elem->ConvertToStructureConstructor(
+ DerivedTypeSpec{name->source, *symbol});
}
}
}
@@ -8738,6 +8748,9 @@ void ResolveNamesVisitor::FinishSpecificationPart(
CheckImports();
for (auto &pair : currScope()) {
auto &symbol{*pair.second};
+ if (inInterfaceBlock()) {
+ ConvertToObjectEntity(symbol);
+ }
if (NeedsExplicitType(symbol)) {
ApplyImplicitRules(symbol);
}
diff --git a/flang/lib/Semantics/type.cpp b/flang/lib/Semantics/type.cpp
index 810b9829b0b8..e867d7ad6e25 100644
--- a/flang/lib/Semantics/type.cpp
+++ b/flang/lib/Semantics/type.cpp
@@ -22,8 +22,9 @@
namespace Fortran::semantics {
DerivedTypeSpec::DerivedTypeSpec(SourceName name, const Symbol &typeSymbol)
- : name_{name}, typeSymbol_{typeSymbol} {
- CHECK(typeSymbol.has<DerivedTypeDetails>());
+ : name_{name}, originalTypeSymbol_{typeSymbol},
+ typeSymbol_{typeSymbol.GetUltimate()} {
+ CHECK(typeSymbol_.has<DerivedTypeDetails>());
}
DerivedTypeSpec::DerivedTypeSpec(const DerivedTypeSpec &that) = default;
DerivedTypeSpec::DerivedTypeSpec(DerivedTypeSpec &&that) = default;
@@ -340,9 +341,7 @@ void DerivedTypeSpec::Instantiate(Scope &containingScope) {
const Scope &typeScope{DEREF(typeSymbol_.scope())};
if (!MightBeParameterized()) {
scope_ = &typeScope;
- if (typeScope.derivedTypeSpec()) {
- CHECK(*this == *typeScope.derivedTypeSpec());
- } else {
+ if (!typeScope.derivedTypeSpec() || *this != *typeScope.derivedTypeSpec()) {
Scope &mutableTypeScope{const_cast<Scope &>(typeScope)};
mutableTypeScope.set_derivedTypeSpec(*this);
InstantiateNonPDTScope(mutableTypeScope, containingScope);
@@ -664,7 +663,7 @@ std::string DerivedTypeSpec::VectorTypeAsFortran() const {
std::string DerivedTypeSpec::AsFortran() const {
std::string buf;
llvm::raw_string_ostream ss{buf};
- ss << name_;
+ ss << originalTypeSymbol_.name();
if (!rawParameters_.empty()) {
CHECK(parameters_.empty());
ss << '(';
diff --git a/flang/runtime/io-api.cpp b/flang/runtime/io-api.cpp
index e3c6b9e5ca89..39ac8c9eb6de 100644
--- a/flang/runtime/io-api.cpp
+++ b/flang/runtime/io-api.cpp
@@ -948,7 +948,7 @@ bool IODEF(SetRecl)(Cookie cookie, std::size_t n) {
io.GetIoErrorHandler().Crash(
"SetRecl() called after GetNewUnit() for an OPEN statement");
}
- if (n <= 0) {
+ if (static_cast<std::int64_t>(n) <= 0) {
io.GetIoErrorHandler().SignalError("RECL= must be greater than zero");
return false;
} else if (open->wasExtant() &&
diff --git a/flang/runtime/io-stmt.cpp b/flang/runtime/io-stmt.cpp
index 265bd0dc9d94..cd7a196335d3 100644
--- a/flang/runtime/io-stmt.cpp
+++ b/flang/runtime/io-stmt.cpp
@@ -329,8 +329,11 @@ void OpenStatementState::CompleteOperation() {
}
if (!wasExtant_ && InError()) {
// Release the new unit on failure
- unit().CloseUnit(CloseStatus::Delete, *this);
- unit().DestroyClosed();
+ if (ExternalFileUnit *
+ toClose{unit().LookUpForClose(unit().unitNumber())}) {
+ toClose->Close(CloseStatus::Delete, *this);
+ toClose->DestroyClosed();
+ }
}
IoStatementBase::CompleteOperation();
}
diff --git a/flang/test/Semantics/OpenMP/bad_module_subroutine.f90 b/flang/test/Semantics/OpenMP/bad_module_subroutine.f90
new file mode 100644
index 000000000000..339d6bf27e7d
--- /dev/null
+++ b/flang/test/Semantics/OpenMP/bad_module_subroutine.f90
@@ -0,0 +1,6 @@
+! RUN: %python %S/../test_errors.py %s %flang_fc1 -fopenmp
+! Test that we don't crash on this code inside of openmp semantics checks
+
+!ERROR: 'e' is a MODULE procedure which must be declared within a MODULE or SUBMODULE
+impure elemental module subroutine e()
+end subroutine
diff --git a/flang/test/Semantics/get_team.f90 b/flang/test/Semantics/get_team.f90
index a28b0d72f23f..7e4886703d17 100644
--- a/flang/test/Semantics/get_team.f90
+++ b/flang/test/Semantics/get_team.f90
@@ -49,7 +49,7 @@ program get_team_test
!ERROR: repeated keyword argument to intrinsic 'get_team'
result_team = get_team(level=initial_team, level=parent_team)
- !ERROR: No intrinsic or user-defined ASSIGNMENT(=) matches operand types LOGICAL(4) and TYPE(__builtin_team_type)
+ !ERROR: No intrinsic or user-defined ASSIGNMENT(=) matches operand types LOGICAL(4) and TYPE(team_type)
wrong_result_type = get_team()
end program get_team_test
diff --git a/flang/test/Semantics/implicit16.f90 b/flang/test/Semantics/implicit16.f90
new file mode 100644
index 000000000000..4a03e0c15747
--- /dev/null
+++ b/flang/test/Semantics/implicit16.f90
@@ -0,0 +1,12 @@
+! RUN: %python %S/test_errors.py %s %flang_fc1
+interface
+!ERROR: No explicit type declared for 'a'
+ subroutine s(a)
+ implicit none
+ end
+!ERROR: No explicit type declared for 'f'
+ function f()
+ implicit none
+ end
+end interface
+end
diff --git a/flang/test/Semantics/modfile68.f90 b/flang/test/Semantics/modfile68.f90
new file mode 100644
index 000000000000..550560303f08
--- /dev/null
+++ b/flang/test/Semantics/modfile68.f90
@@ -0,0 +1,42 @@
+! RUN: %python %S/test_modfile.py %s %flang_fc1
+module m1
+ use iso_c_binding, only : c_ptr, c_null_ptr
+ private
+ public :: t1
+ type :: t1
+ type(c_ptr) :: c_ptr = c_null_ptr
+ end type
+end
+
+!Expect: m1.mod
+!module m1
+!use,intrinsic::__fortran_builtins,only:__builtin_c_ptr
+!use,intrinsic::iso_c_binding,only:c_ptr
+!use,intrinsic::iso_c_binding,only:c_null_ptr
+!private::__builtin_c_ptr
+!private::c_ptr
+!private::c_null_ptr
+!type::t1
+!type(c_ptr)::c_ptr=__builtin_c_ptr(__address=0_8)
+!end type
+!end
+
+module m2
+ use m1, only : t1
+ private
+ public :: t2
+ type :: t2
+ type(t1) :: x = t1()
+ end type
+end
+
+!Expect: m2.mod
+!module m2
+!use,intrinsic::__fortran_builtins,only:__builtin_c_ptr
+!use m1,only:t1
+!private::__builtin_c_ptr
+!private::t1
+!type::t2
+!type(t1)::x=t1(c_ptr=__builtin_c_ptr(__address=0_8))
+!end type
+!end
diff --git a/flang/test/Semantics/modproc01.f90 b/flang/test/Semantics/modproc01.f90
index 5652e15750c7..5f45362e9509 100644
--- a/flang/test/Semantics/modproc01.f90
+++ b/flang/test/Semantics/modproc01.f90
@@ -144,8 +144,12 @@ end program
!CHECK: a1, ALLOCATABLE size=40 offset=0: ObjectEntity type: TYPE(pdt2(k2=1_4,l2=3_4))
!CHECK: k1: TypeParam type:INTEGER(4) Kind init:1_4
!CHECK: l1: TypeParam type:INTEGER(4) Len init:3_4
-!CHECK: DerivedType scope: size=1 alignment=1 instantiation of pdt2(k2=1_4,l2=3_4)
-!CHECK: a2: ObjectEntity type: TYPE(pdt1(k1=1_4,l1=3_4)) shape: 1_8:1_8
+!CHECK: DerivedType scope: size=48 alignment=8 instantiation of pdt2(k2=1_4,l2=3_4) sourceRange=0 bytes
+!CHECK: a2 size=40 offset=8: ObjectEntity type: TYPE(pdt1(k1=1_4,l1=3_4)) shape: 1_8:1_8
!CHECK: j2 size=1 offset=0: ObjectEntity type: INTEGER(1)
!CHECK: k2: TypeParam type:INTEGER(4) Kind init:1_4
!CHECK: l2: TypeParam type:INTEGER(4) Len init:3_4
+!CHECK: DerivedType scope: size=40 alignment=8 instantiation of pdt1(k1=1_4,l1=3_4) sourceRange=0 bytes
+!CHECK: a1, ALLOCATABLE size=40 offset=0: ObjectEntity type: TYPE(pdt2(k2=1_4,l2=3_4))
+!CHECK: k1: TypeParam type:INTEGER(4) Kind init:1_4
+!CHECK: l1: TypeParam type:INTEGER(4) Len init:3_4
diff --git a/flang/test/Transforms/debug-107988.fir b/flang/test/Transforms/debug-107988.fir
new file mode 100644
index 000000000000..308f78a86512
--- /dev/null
+++ b/flang/test/Transforms/debug-107988.fir
@@ -0,0 +1,23 @@
+// RUN: fir-opt --add-debug-info --mlir-print-debuginfo %s -o - | FileCheck %s
+
+module attributes {dlti.dl_spec = #dlti.dl_spec<>} {
+ func.func @test(%arg0: !fir.ref<!fir.char<1,?>> {fir.bindc_name = "str"}, %arg1: i64) {
+ %0 = fir.emboxchar %arg0, %arg1 : (!fir.ref<!fir.char<1,?>>, i64) -> !fir.boxchar<1>
+ %1 = fir.undefined !fir.dscope
+ %2:2 = fir.unboxchar %0 : (!fir.boxchar<1>) -> (!fir.ref<!fir.char<1,?>>, index) loc(#loc1)
+ %3 = fircg.ext_declare %2#0 typeparams %2#1 dummy_scope %1 {uniq_name = "_QFtestEstr"} : (!fir.ref<!fir.char<1,?>>, index, !fir.dscope) -> !fir.ref<!fir.char<1,?>> loc(#loc1)
+ return
+ } loc(#loc2)
+}
+
+#loc1 = loc("test.f90":5:1)
+#loc2 = loc("test.f90":15:1)
+
+// CHECK: #[[VAR:.*]] = #llvm.di_local_variable<{{.*}}name = "._QFtestEstr"{{.*}}flags = Artificial>
+// CHECK: func.func @test
+// CHECK: %[[V1:.*]]:2 = fir.unboxchar{{.*}}
+// CHECK: %[[V2:.*]] = fir.convert %[[V1]]#1 : (index) -> i64
+// CHECK: llvm.intr.dbg.value #di_local_variable = %[[V2]] : i64
+// CHECK: #[[STR_TY:.*]] = #llvm.di_string_type<tag = DW_TAG_string_type, name = "", stringLength = #[[VAR]], encoding = DW_ATE_ASCII>
+// CHECK: #llvm.di_local_variable<{{.*}}name = "str"{{.*}}type = #[[STR_TY]]>
+
diff --git a/libc/cmake/modules/prepare_libc_gpu_build.cmake b/libc/cmake/modules/prepare_libc_gpu_build.cmake
index 14ae8f6e9eec..e20591b80e6f 100644
--- a/libc/cmake/modules/prepare_libc_gpu_build.cmake
+++ b/libc/cmake/modules/prepare_libc_gpu_build.cmake
@@ -21,10 +21,10 @@ if(LIBC_TARGET_TRIPLE)
set(CMAKE_REQUIRED_FLAGS "--target=${LIBC_TARGET_TRIPLE}")
endif()
if(LIBC_TARGET_ARCHITECTURE_IS_AMDGPU)
- set(CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS} -nogpulib -nostdlib")
+ set(CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS} -nogpulib")
elseif(LIBC_TARGET_ARCHITECTURE_IS_NVPTX)
set(CMAKE_REQUIRED_FLAGS
- "${CMAKE_REQUIRED_FLAGS} -flto -c -Wno-unused-command-line-argument -nostdlib")
+ "${CMAKE_REQUIRED_FLAGS} -flto -c -Wno-unused-command-line-argument")
endif()
# Optionally set up a job pool to limit the number of GPU tests run in parallel.
diff --git a/libcxx/cmake/caches/AMDGPU.cmake b/libcxx/cmake/caches/AMDGPU.cmake
index 0cd2eebfb9c1..7443470b2e8a 100644
--- a/libcxx/cmake/caches/AMDGPU.cmake
+++ b/libcxx/cmake/caches/AMDGPU.cmake
@@ -33,4 +33,4 @@ set(LIBCXX_ADDITIONAL_COMPILE_FLAGS
"-nogpulib;-flto;-fconvergent-functions;-Xclang;-mcode-object-version=none" CACHE STRING "")
set(LIBCXXABI_ADDITIONAL_COMPILE_FLAGS
"-nogpulib;-flto;-fconvergent-functions;-Xclang;-mcode-object-version=none" CACHE STRING "")
-set(CMAKE_REQUIRED_FLAGS "-nogpulib -nodefaultlibs" CACHE STRING "")
+set(CMAKE_REQUIRED_FLAGS "-nogpulib" CACHE STRING "")
diff --git a/libcxx/cmake/caches/NVPTX.cmake b/libcxx/cmake/caches/NVPTX.cmake
index 47a24a349e99..3685ddcbb666 100644
--- a/libcxx/cmake/caches/NVPTX.cmake
+++ b/libcxx/cmake/caches/NVPTX.cmake
@@ -33,4 +33,4 @@ set(LIBCXX_ADDITIONAL_COMPILE_FLAGS
"-nogpulib;-flto;-fconvergent-functions;--cuda-feature=+ptx63" CACHE STRING "")
set(LIBCXXABI_ADDITIONAL_COMPILE_FLAGS
"-nogpulib;-flto;-fconvergent-functions;--cuda-feature=+ptx63" CACHE STRING "")
-set(CMAKE_REQUIRED_FLAGS "-nogpulib -nodefaultlibs -flto -c" CACHE STRING "")
+set(CMAKE_REQUIRED_FLAGS "-nogpulib -flto -c" CACHE STRING "")
diff --git a/libcxx/include/__algorithm/mismatch.h b/libcxx/include/__algorithm/mismatch.h
index 006ddbd80c30..cb83347584b1 100644
--- a/libcxx/include/__algorithm/mismatch.h
+++ b/libcxx/include/__algorithm/mismatch.h
@@ -79,7 +79,7 @@ __mismatch_vectorized(_Iter __first1, _Iter __last1, _Iter __first2) {
}
for (size_t __i = 0; __i != __unroll_count; ++__i) {
- if (auto __cmp_res = std::__as_mask(__lhs[__i] == __rhs[__i]); !std::__all_of(__cmp_res)) {
+ if (auto __cmp_res = __lhs[__i] == __rhs[__i]; !std::__all_of(__cmp_res)) {
auto __offset = __i * __vec_size + std::__find_first_not_set(__cmp_res);
return {__first1 + __offset, __first2 + __offset};
}
@@ -91,7 +91,7 @@ __mismatch_vectorized(_Iter __first1, _Iter __last1, _Iter __first2) {
// check the remaining 0-3 vectors
while (static_cast<size_t>(__last1 - __first1) >= __vec_size) {
- if (auto __cmp_res = std::__as_mask(std::__load_vector<__vec>(__first1) == std::__load_vector<__vec>(__first2));
+ if (auto __cmp_res = std::__load_vector<__vec>(__first1) == std::__load_vector<__vec>(__first2);
!std::__all_of(__cmp_res)) {
auto __offset = std::__find_first_not_set(__cmp_res);
return {__first1 + __offset, __first2 + __offset};
@@ -108,8 +108,8 @@ __mismatch_vectorized(_Iter __first1, _Iter __last1, _Iter __first2) {
if (static_cast<size_t>(__first1 - __orig_first1) >= __vec_size) {
__first1 = __last1 - __vec_size;
__first2 = __last2 - __vec_size;
- auto __offset = std::__find_first_not_set(
- std::__as_mask(std::__load_vector<__vec>(__first1) == std::__load_vector<__vec>(__first2)));
+ auto __offset =
+ std::__find_first_not_set(std::__load_vector<__vec>(__first1) == std::__load_vector<__vec>(__first2));
return {__first1 + __offset, __first2 + __offset};
} // else loop over the elements individually
}
diff --git a/libcxx/include/__algorithm/simd_utils.h b/libcxx/include/__algorithm/simd_utils.h
index ec9840f60d87..56518dafa319 100644
--- a/libcxx/include/__algorithm/simd_utils.h
+++ b/libcxx/include/__algorithm/simd_utils.h
@@ -116,65 +116,42 @@ template <class _VecT, class _Iter>
}(make_index_sequence<__simd_vector_size_v<_VecT>>{});
}
-template <size_t _Np>
-[[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI bool __all_of(__simd_vector<bool, _Np> __vec) noexcept {
- return __builtin_reduce_and(__vec);
-}
-
template <class _Tp, size_t _Np>
-[[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI auto __as_mask(__simd_vector<_Tp, _Np> __vec) noexcept {
- static_assert(!is_same<_Tp, bool>::value, "vector type should not be a bool!");
- return __builtin_convertvector(__vec, __simd_vector<bool, _Np>);
-}
-
-// This uses __builtin_convertvector around the __builtin_shufflevector to work around #107981.
-template <size_t _Np>
-[[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI __simd_vector<bool, 8>
-__extend_vector(__simd_vector<bool, _Np> __vec) noexcept {
- using _VecT = __simd_vector<bool, _Np>;
- if constexpr (_Np == 4) {
- return __builtin_convertvector(
- __builtin_shufflevector(__vec, _VecT{}, 0, 1, 2, 3, 4, 5, 6, 7), __simd_vector<bool, 8>);
- } else if constexpr (_Np == 2) {
- return std::__extend_vector(
- __builtin_convertvector(__builtin_shufflevector(__vec, _VecT{}, 0, 1, 2, 3), __simd_vector<bool, 4>));
- } else if constexpr (_Np == 1) {
- return std::__extend_vector(
- __builtin_convertvector(__builtin_shufflevector(__vec, _VecT{}, 0, 1), __simd_vector<bool, 2>));
- } else {
- static_assert(sizeof(_VecT) == 0, "Unexpected vector size");
- }
+[[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI bool __all_of(__simd_vector<_Tp, _Np> __vec) noexcept {
+ return __builtin_reduce_and(__builtin_convertvector(__vec, __simd_vector<bool, _Np>));
}
-template <size_t _Np>
-[[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI auto __to_int_mask(__simd_vector<bool, _Np> __vec) {
- if constexpr (_Np < 8) {
- return std::__bit_cast<uint8_t>(std::__extend_vector(__vec));
- } else if constexpr (_Np == 8) {
- return std::__bit_cast<uint8_t>(__vec);
- } else if constexpr (_Np == 16) {
- return std::__bit_cast<uint16_t>(__vec);
- } else if constexpr (_Np == 32) {
- return std::__bit_cast<uint32_t>(__vec);
- } else if constexpr (_Np == 64) {
- return std::__bit_cast<uint64_t>(__vec);
- } else {
- static_assert(sizeof(__simd_vector<bool, _Np>) == 0, "Unexpected vector size");
- return 0;
- }
-}
+template <class _Tp, size_t _Np>
+[[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI size_t __find_first_set(__simd_vector<_Tp, _Np> __vec) noexcept {
+ using __mask_vec = __simd_vector<bool, _Np>;
-template <size_t _Np>
-[[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI size_t __find_first_set(__simd_vector<bool, _Np> __vec) noexcept {
+ // This has MSan disabled du to https://github.com/llvm/llvm-project/issues/85876
+ auto __impl = [&]<class _MaskT>(_MaskT) _LIBCPP_NO_SANITIZE("memory") noexcept {
# if defined(_LIBCPP_BIG_ENDIAN)
- return std::min<size_t>(_Np, std::__countl_zero(std::__to_int_mask(__vec)));
+ return std::min<size_t>(
+ _Np, std::__countl_zero(__builtin_bit_cast(_MaskT, __builtin_convertvector(__vec, __mask_vec))));
# else
- return std::min<size_t>(_Np, std::__countr_zero(std::__to_int_mask(__vec)));
+ return std::min<size_t>(
+ _Np, std::__countr_zero(__builtin_bit_cast(_MaskT, __builtin_convertvector(__vec, __mask_vec))));
# endif
+ };
+
+ if constexpr (sizeof(__mask_vec) == sizeof(uint8_t)) {
+ return __impl(uint8_t{});
+ } else if constexpr (sizeof(__mask_vec) == sizeof(uint16_t)) {
+ return __impl(uint16_t{});
+ } else if constexpr (sizeof(__mask_vec) == sizeof(uint32_t)) {
+ return __impl(uint32_t{});
+ } else if constexpr (sizeof(__mask_vec) == sizeof(uint64_t)) {
+ return __impl(uint64_t{});
+ } else {
+ static_assert(sizeof(__mask_vec) == 0, "unexpected required size for mask integer type");
+ return 0;
+ }
}
-template <size_t _Np>
-[[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI size_t __find_first_not_set(__simd_vector<bool, _Np> __vec) noexcept {
+template <class _Tp, size_t _Np>
+[[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI size_t __find_first_not_set(__simd_vector<_Tp, _Np> __vec) noexcept {
return std::__find_first_set(~__vec);
}
diff --git a/lld/COFF/InputFiles.h b/lld/COFF/InputFiles.h
index 5fa93f57ef9e..a20b097cbe04 100644
--- a/lld/COFF/InputFiles.h
+++ b/lld/COFF/InputFiles.h
@@ -349,7 +349,7 @@ public:
MachineTypes getMachineType() const override;
DefinedImportData *impSym = nullptr;
- Symbol *thunkSym = nullptr;
+ Defined *thunkSym = nullptr;
ImportThunkChunkARM64EC *impchkThunk = nullptr;
std::string dllName;
@@ -365,7 +365,7 @@ public:
// Auxiliary IAT symbols and chunks on ARM64EC.
DefinedImportData *impECSym = nullptr;
Chunk *auxLocation = nullptr;
- Symbol *auxThunkSym = nullptr;
+ Defined *auxThunkSym = nullptr;
DefinedImportData *auxImpCopySym = nullptr;
Chunk *auxCopyLocation = nullptr;
diff --git a/lld/COFF/MapFile.cpp b/lld/COFF/MapFile.cpp
index 52e9ce996f23..751a2238e701 100644
--- a/lld/COFF/MapFile.cpp
+++ b/lld/COFF/MapFile.cpp
@@ -128,8 +128,7 @@ static void getSymbols(const COFFLinkerContext &ctx,
if (!file->thunkSym->isLive())
continue;
- if (auto *thunkSym = dyn_cast<Defined>(file->thunkSym))
- syms.push_back(thunkSym);
+ syms.push_back(file->thunkSym);
if (auto *impSym = dyn_cast_or_null<Defined>(file->impSym))
syms.push_back(impSym);
diff --git a/lld/COFF/SymbolTable.cpp b/lld/COFF/SymbolTable.cpp
index efea16ccbbfe..0ef58910151c 100644
--- a/lld/COFF/SymbolTable.cpp
+++ b/lld/COFF/SymbolTable.cpp
@@ -502,6 +502,14 @@ void SymbolTable::resolveRemainingUndefines() {
// This odd rule is for compatibility with MSVC linker.
if (name.starts_with("__imp_")) {
Symbol *imp = find(name.substr(strlen("__imp_")));
+ if (imp) {
+ // The unprefixed symbol might come later in symMap, so handle it now
+ // so that the condition below can be appropriately applied.
+ auto *undef = dyn_cast<Undefined>(imp);
+ if (undef) {
+ undef->resolveWeakAlias();
+ }
+ }
if (imp && isa<Defined>(imp)) {
auto *d = cast<Defined>(imp);
replaceSymbol<DefinedLocalImport>(sym, ctx, name, d);
@@ -815,13 +823,13 @@ DefinedImportData *SymbolTable::addImportData(StringRef n, ImportFile *f,
return nullptr;
}
-Symbol *SymbolTable::addImportThunk(StringRef name, DefinedImportData *id,
- ImportThunkChunk *chunk) {
+Defined *SymbolTable::addImportThunk(StringRef name, DefinedImportData *id,
+ ImportThunkChunk *chunk) {
auto [s, wasInserted] = insert(name, nullptr);
s->isUsedInRegularObj = true;
if (wasInserted || isa<Undefined>(s) || s->isLazy()) {
replaceSymbol<DefinedImportThunk>(s, ctx, name, id, chunk);
- return s;
+ return cast<Defined>(s);
}
reportDuplicate(s, id->file);
diff --git a/lld/COFF/SymbolTable.h b/lld/COFF/SymbolTable.h
index bf97cf442039..e3f674b8098f 100644
--- a/lld/COFF/SymbolTable.h
+++ b/lld/COFF/SymbolTable.h
@@ -105,8 +105,8 @@ public:
CommonChunk *c = nullptr);
DefinedImportData *addImportData(StringRef n, ImportFile *f,
Chunk *&location);
- Symbol *addImportThunk(StringRef name, DefinedImportData *s,
- ImportThunkChunk *chunk);
+ Defined *addImportThunk(StringRef name, DefinedImportData *s,
+ ImportThunkChunk *chunk);
void addLibcall(StringRef name);
void addEntryThunk(Symbol *from, Symbol *to);
void addExitThunk(Symbol *from, Symbol *to);
diff --git a/lld/ELF/Driver.cpp b/lld/ELF/Driver.cpp
index d50eae9c0144..e25db0e49512 100644
--- a/lld/ELF/Driver.cpp
+++ b/lld/ELF/Driver.cpp
@@ -82,7 +82,7 @@ ConfigWrapper elf::config;
Ctx elf::ctx;
static void setConfigs(Ctx &ctx, opt::InputArgList &args);
-static void readConfigs(opt::InputArgList &args);
+static void readConfigs(Ctx &ctx, opt::InputArgList &args);
void elf::errorOrWarn(const Twine &msg) {
if (config->noinhibitExec)
@@ -669,7 +669,7 @@ void LinkerDriver::linkerMain(ArrayRef<const char *> argsArr) {
}
}
- readConfigs(args);
+ readConfigs(ctx, args);
checkZOptions(args);
// The behavior of -v or --version is a bit strange, but this is
@@ -681,7 +681,7 @@ void LinkerDriver::linkerMain(ArrayRef<const char *> argsArr) {
// Initialize time trace profiler.
if (ctx.arg.timeTraceEnabled)
- timeTraceProfilerInitialize(ctx.arg.timeTraceGranularity, config->progName);
+ timeTraceProfilerInitialize(ctx.arg.timeTraceGranularity, ctx.arg.progName);
{
llvm::TimeTraceScope timeScope("ExecuteLinker");
@@ -714,14 +714,14 @@ static std::string getRpath(opt::InputArgList &args) {
// Determines what we should do if there are remaining unresolved
// symbols after the name resolution.
-static void setUnresolvedSymbolPolicy(opt::InputArgList &args) {
+static void setUnresolvedSymbolPolicy(Ctx &ctx, opt::InputArgList &args) {
UnresolvedPolicy errorOrWarn = args.hasFlag(OPT_error_unresolved_symbols,
OPT_warn_unresolved_symbols, true)
? UnresolvedPolicy::ReportError
: UnresolvedPolicy::Warn;
// -shared implies --unresolved-symbols=ignore-all because missing
// symbols are likely to be resolved at runtime.
- bool diagRegular = !config->shared, diagShlib = !config->shared;
+ bool diagRegular = !ctx.arg.shared, diagShlib = !ctx.arg.shared;
for (const opt::Arg *arg : args) {
switch (arg->getOption().getID()) {
@@ -765,9 +765,9 @@ static void setUnresolvedSymbolPolicy(opt::InputArgList &args) {
}
}
- config->unresolvedSymbols =
+ ctx.arg.unresolvedSymbols =
diagRegular ? errorOrWarn : UnresolvedPolicy::Ignore;
- config->unresolvedSymbolsInShlib =
+ ctx.arg.unresolvedSymbolsInShlib =
diagShlib ? errorOrWarn : UnresolvedPolicy::Ignore;
}
@@ -804,25 +804,25 @@ static DiscardPolicy getDiscard(opt::InputArgList &args) {
return DiscardPolicy::None;
}
-static StringRef getDynamicLinker(opt::InputArgList &args) {
+static StringRef getDynamicLinker(Ctx &ctx, opt::InputArgList &args) {
auto *arg = args.getLastArg(OPT_dynamic_linker, OPT_no_dynamic_linker);
if (!arg)
return "";
if (arg->getOption().getID() == OPT_no_dynamic_linker) {
// --no-dynamic-linker suppresses undefined weak symbols in .dynsym
- config->noDynamicLinker = true;
+ ctx.arg.noDynamicLinker = true;
return "";
}
return arg->getValue();
}
-static int getMemtagMode(opt::InputArgList &args) {
+static int getMemtagMode(Ctx &ctx, opt::InputArgList &args) {
StringRef memtagModeArg = args.getLastArgValue(OPT_android_memtag_mode);
if (memtagModeArg.empty()) {
- if (config->androidMemtagStack)
+ if (ctx.arg.androidMemtagStack)
warn("--android-memtag-mode is unspecified, leaving "
"--android-memtag-stack a no-op");
- else if (config->androidMemtagHeap)
+ else if (ctx.arg.androidMemtagHeap)
warn("--android-memtag-mode is unspecified, leaving "
"--android-memtag-heap a no-op");
return ELF::NT_MEMTAG_LEVEL_NONE;
@@ -1247,140 +1247,141 @@ static bool remapInputs(StringRef line, const Twine &location) {
}
// Initializes Config members by the command line options.
-static void readConfigs(opt::InputArgList &args) {
+static void readConfigs(Ctx &ctx, opt::InputArgList &args) {
errorHandler().verbose = args.hasArg(OPT_verbose);
errorHandler().vsDiagnostics =
args.hasArg(OPT_visual_studio_diagnostics_format, false);
- config->allowMultipleDefinition =
+ ctx.arg.allowMultipleDefinition =
hasZOption(args, "muldefs") ||
args.hasFlag(OPT_allow_multiple_definition,
OPT_no_allow_multiple_definition, false);
- config->androidMemtagHeap =
+ ctx.arg.androidMemtagHeap =
args.hasFlag(OPT_android_memtag_heap, OPT_no_android_memtag_heap, false);
- config->androidMemtagStack = args.hasFlag(OPT_android_memtag_stack,
+ ctx.arg.androidMemtagStack = args.hasFlag(OPT_android_memtag_stack,
OPT_no_android_memtag_stack, false);
- config->fatLTOObjects =
+ ctx.arg.fatLTOObjects =
args.hasFlag(OPT_fat_lto_objects, OPT_no_fat_lto_objects, false);
- config->androidMemtagMode = getMemtagMode(args);
- config->auxiliaryList = args::getStrings(args, OPT_auxiliary);
- config->armBe8 = args.hasArg(OPT_be8);
+ ctx.arg.androidMemtagMode = getMemtagMode(ctx, args);
+ ctx.arg.auxiliaryList = args::getStrings(args, OPT_auxiliary);
+ ctx.arg.armBe8 = args.hasArg(OPT_be8);
if (opt::Arg *arg = args.getLastArg(
OPT_Bno_symbolic, OPT_Bsymbolic_non_weak_functions,
OPT_Bsymbolic_functions, OPT_Bsymbolic_non_weak, OPT_Bsymbolic)) {
if (arg->getOption().matches(OPT_Bsymbolic_non_weak_functions))
- config->bsymbolic = BsymbolicKind::NonWeakFunctions;
+ ctx.arg.bsymbolic = BsymbolicKind::NonWeakFunctions;
else if (arg->getOption().matches(OPT_Bsymbolic_functions))
- config->bsymbolic = BsymbolicKind::Functions;
+ ctx.arg.bsymbolic = BsymbolicKind::Functions;
else if (arg->getOption().matches(OPT_Bsymbolic_non_weak))
- config->bsymbolic = BsymbolicKind::NonWeak;
+ ctx.arg.bsymbolic = BsymbolicKind::NonWeak;
else if (arg->getOption().matches(OPT_Bsymbolic))
- config->bsymbolic = BsymbolicKind::All;
+ ctx.arg.bsymbolic = BsymbolicKind::All;
}
- config->callGraphProfileSort = getCGProfileSortKind(args);
- config->checkSections =
+ ctx.arg.callGraphProfileSort = getCGProfileSortKind(args);
+ ctx.arg.checkSections =
args.hasFlag(OPT_check_sections, OPT_no_check_sections, true);
- config->chroot = args.getLastArgValue(OPT_chroot);
+ ctx.arg.chroot = args.getLastArgValue(OPT_chroot);
if (auto *arg = args.getLastArg(OPT_compress_debug_sections)) {
- config->compressDebugSections =
+ ctx.arg.compressDebugSections =
getCompressionType(arg->getValue(), "--compress-debug-sections");
}
- config->cref = args.hasArg(OPT_cref);
- config->optimizeBBJumps =
+ ctx.arg.cref = args.hasArg(OPT_cref);
+ ctx.arg.optimizeBBJumps =
args.hasFlag(OPT_optimize_bb_jumps, OPT_no_optimize_bb_jumps, false);
- config->debugNames = args.hasFlag(OPT_debug_names, OPT_no_debug_names, false);
- config->demangle = args.hasFlag(OPT_demangle, OPT_no_demangle, true);
- config->dependencyFile = args.getLastArgValue(OPT_dependency_file);
- config->dependentLibraries = args.hasFlag(OPT_dependent_libraries, OPT_no_dependent_libraries, true);
- config->disableVerify = args.hasArg(OPT_disable_verify);
- config->discard = getDiscard(args);
- config->dwoDir = args.getLastArgValue(OPT_plugin_opt_dwo_dir_eq);
- config->dynamicLinker = getDynamicLinker(args);
- config->ehFrameHdr =
+ ctx.arg.debugNames = args.hasFlag(OPT_debug_names, OPT_no_debug_names, false);
+ ctx.arg.demangle = args.hasFlag(OPT_demangle, OPT_no_demangle, true);
+ ctx.arg.dependencyFile = args.getLastArgValue(OPT_dependency_file);
+ ctx.arg.dependentLibraries =
+ args.hasFlag(OPT_dependent_libraries, OPT_no_dependent_libraries, true);
+ ctx.arg.disableVerify = args.hasArg(OPT_disable_verify);
+ ctx.arg.discard = getDiscard(args);
+ ctx.arg.dwoDir = args.getLastArgValue(OPT_plugin_opt_dwo_dir_eq);
+ ctx.arg.dynamicLinker = getDynamicLinker(ctx, args);
+ ctx.arg.ehFrameHdr =
args.hasFlag(OPT_eh_frame_hdr, OPT_no_eh_frame_hdr, false);
- config->emitLLVM = args.hasArg(OPT_lto_emit_llvm);
- config->emitRelocs = args.hasArg(OPT_emit_relocs);
- config->enableNewDtags =
+ ctx.arg.emitLLVM = args.hasArg(OPT_lto_emit_llvm);
+ ctx.arg.emitRelocs = args.hasArg(OPT_emit_relocs);
+ ctx.arg.enableNewDtags =
args.hasFlag(OPT_enable_new_dtags, OPT_disable_new_dtags, true);
- config->enableNonContiguousRegions =
+ ctx.arg.enableNonContiguousRegions =
args.hasArg(OPT_enable_non_contiguous_regions);
- config->entry = args.getLastArgValue(OPT_entry);
+ ctx.arg.entry = args.getLastArgValue(OPT_entry);
errorHandler().errorHandlingScript =
args.getLastArgValue(OPT_error_handling_script);
- config->executeOnly =
+ ctx.arg.executeOnly =
args.hasFlag(OPT_execute_only, OPT_no_execute_only, false);
- config->exportDynamic =
+ ctx.arg.exportDynamic =
args.hasFlag(OPT_export_dynamic, OPT_no_export_dynamic, false) ||
args.hasArg(OPT_shared);
- config->filterList = args::getStrings(args, OPT_filter);
- config->fini = args.getLastArgValue(OPT_fini, "_fini");
- config->fixCortexA53Errata843419 = args.hasArg(OPT_fix_cortex_a53_843419) &&
- !args.hasArg(OPT_relocatable);
- config->cmseImplib = args.hasArg(OPT_cmse_implib);
- config->cmseInputLib = args.getLastArgValue(OPT_in_implib);
- config->cmseOutputLib = args.getLastArgValue(OPT_out_implib);
- config->fixCortexA8 =
+ ctx.arg.filterList = args::getStrings(args, OPT_filter);
+ ctx.arg.fini = args.getLastArgValue(OPT_fini, "_fini");
+ ctx.arg.fixCortexA53Errata843419 =
+ args.hasArg(OPT_fix_cortex_a53_843419) && !args.hasArg(OPT_relocatable);
+ ctx.arg.cmseImplib = args.hasArg(OPT_cmse_implib);
+ ctx.arg.cmseInputLib = args.getLastArgValue(OPT_in_implib);
+ ctx.arg.cmseOutputLib = args.getLastArgValue(OPT_out_implib);
+ ctx.arg.fixCortexA8 =
args.hasArg(OPT_fix_cortex_a8) && !args.hasArg(OPT_relocatable);
- config->fortranCommon =
+ ctx.arg.fortranCommon =
args.hasFlag(OPT_fortran_common, OPT_no_fortran_common, false);
- config->gcSections = args.hasFlag(OPT_gc_sections, OPT_no_gc_sections, false);
- config->gnuUnique = args.hasFlag(OPT_gnu_unique, OPT_no_gnu_unique, true);
- config->gdbIndex = args.hasFlag(OPT_gdb_index, OPT_no_gdb_index, false);
- config->icf = getICF(args);
- config->ignoreDataAddressEquality =
+ ctx.arg.gcSections = args.hasFlag(OPT_gc_sections, OPT_no_gc_sections, false);
+ ctx.arg.gnuUnique = args.hasFlag(OPT_gnu_unique, OPT_no_gnu_unique, true);
+ ctx.arg.gdbIndex = args.hasFlag(OPT_gdb_index, OPT_no_gdb_index, false);
+ ctx.arg.icf = getICF(args);
+ ctx.arg.ignoreDataAddressEquality =
args.hasArg(OPT_ignore_data_address_equality);
- config->ignoreFunctionAddressEquality =
+ ctx.arg.ignoreFunctionAddressEquality =
args.hasArg(OPT_ignore_function_address_equality);
- config->init = args.getLastArgValue(OPT_init, "_init");
- config->ltoAAPipeline = args.getLastArgValue(OPT_lto_aa_pipeline);
- config->ltoCSProfileGenerate = args.hasArg(OPT_lto_cs_profile_generate);
- config->ltoCSProfileFile = args.getLastArgValue(OPT_lto_cs_profile_file);
- config->ltoPGOWarnMismatch = args.hasFlag(OPT_lto_pgo_warn_mismatch,
+ ctx.arg.init = args.getLastArgValue(OPT_init, "_init");
+ ctx.arg.ltoAAPipeline = args.getLastArgValue(OPT_lto_aa_pipeline);
+ ctx.arg.ltoCSProfileGenerate = args.hasArg(OPT_lto_cs_profile_generate);
+ ctx.arg.ltoCSProfileFile = args.getLastArgValue(OPT_lto_cs_profile_file);
+ ctx.arg.ltoPGOWarnMismatch = args.hasFlag(OPT_lto_pgo_warn_mismatch,
OPT_no_lto_pgo_warn_mismatch, true);
- config->ltoDebugPassManager = args.hasArg(OPT_lto_debug_pass_manager);
- config->ltoEmitAsm = args.hasArg(OPT_lto_emit_asm);
- config->ltoNewPmPasses = args.getLastArgValue(OPT_lto_newpm_passes);
- config->ltoWholeProgramVisibility =
+ ctx.arg.ltoDebugPassManager = args.hasArg(OPT_lto_debug_pass_manager);
+ ctx.arg.ltoEmitAsm = args.hasArg(OPT_lto_emit_asm);
+ ctx.arg.ltoNewPmPasses = args.getLastArgValue(OPT_lto_newpm_passes);
+ ctx.arg.ltoWholeProgramVisibility =
args.hasFlag(OPT_lto_whole_program_visibility,
OPT_no_lto_whole_program_visibility, false);
- config->ltoValidateAllVtablesHaveTypeInfos =
+ ctx.arg.ltoValidateAllVtablesHaveTypeInfos =
args.hasFlag(OPT_lto_validate_all_vtables_have_type_infos,
OPT_no_lto_validate_all_vtables_have_type_infos, false);
- config->ltoo = args::getInteger(args, OPT_lto_O, 2);
- if (config->ltoo > 3)
- error("invalid optimization level for LTO: " + Twine(config->ltoo));
+ ctx.arg.ltoo = args::getInteger(args, OPT_lto_O, 2);
+ if (ctx.arg.ltoo > 3)
+ error("invalid optimization level for LTO: " + Twine(ctx.arg.ltoo));
unsigned ltoCgo =
- args::getInteger(args, OPT_lto_CGO, args::getCGOptLevel(config->ltoo));
+ args::getInteger(args, OPT_lto_CGO, args::getCGOptLevel(ctx.arg.ltoo));
if (auto level = CodeGenOpt::getLevel(ltoCgo))
- config->ltoCgo = *level;
+ ctx.arg.ltoCgo = *level;
else
error("invalid codegen optimization level for LTO: " + Twine(ltoCgo));
- config->ltoObjPath = args.getLastArgValue(OPT_lto_obj_path_eq);
- config->ltoPartitions = args::getInteger(args, OPT_lto_partitions, 1);
- config->ltoSampleProfile = args.getLastArgValue(OPT_lto_sample_profile);
- config->ltoBBAddrMap =
+ ctx.arg.ltoObjPath = args.getLastArgValue(OPT_lto_obj_path_eq);
+ ctx.arg.ltoPartitions = args::getInteger(args, OPT_lto_partitions, 1);
+ ctx.arg.ltoSampleProfile = args.getLastArgValue(OPT_lto_sample_profile);
+ ctx.arg.ltoBBAddrMap =
args.hasFlag(OPT_lto_basic_block_address_map,
OPT_no_lto_basic_block_address_map, false);
- config->ltoBasicBlockSections =
+ ctx.arg.ltoBasicBlockSections =
args.getLastArgValue(OPT_lto_basic_block_sections);
- config->ltoUniqueBasicBlockSectionNames =
+ ctx.arg.ltoUniqueBasicBlockSectionNames =
args.hasFlag(OPT_lto_unique_basic_block_section_names,
OPT_no_lto_unique_basic_block_section_names, false);
- config->mapFile = args.getLastArgValue(OPT_Map);
- config->mipsGotSize = args::getInteger(args, OPT_mips_got_size, 0xfff0);
- config->mergeArmExidx =
+ ctx.arg.mapFile = args.getLastArgValue(OPT_Map);
+ ctx.arg.mipsGotSize = args::getInteger(args, OPT_mips_got_size, 0xfff0);
+ ctx.arg.mergeArmExidx =
args.hasFlag(OPT_merge_exidx_entries, OPT_no_merge_exidx_entries, true);
- config->mmapOutputFile =
+ ctx.arg.mmapOutputFile =
args.hasFlag(OPT_mmap_output_file, OPT_no_mmap_output_file, true);
- config->nmagic = args.hasFlag(OPT_nmagic, OPT_no_nmagic, false);
- config->noinhibitExec = args.hasArg(OPT_noinhibit_exec);
- config->nostdlib = args.hasArg(OPT_nostdlib);
- config->oFormatBinary = isOutputFormatBinary(args);
- config->omagic = args.hasFlag(OPT_omagic, OPT_no_omagic, false);
- config->optRemarksFilename = args.getLastArgValue(OPT_opt_remarks_filename);
- config->optStatsFilename = args.getLastArgValue(OPT_plugin_opt_stats_file);
+ ctx.arg.nmagic = args.hasFlag(OPT_nmagic, OPT_no_nmagic, false);
+ ctx.arg.noinhibitExec = args.hasArg(OPT_noinhibit_exec);
+ ctx.arg.nostdlib = args.hasArg(OPT_nostdlib);
+ ctx.arg.oFormatBinary = isOutputFormatBinary(args);
+ ctx.arg.omagic = args.hasFlag(OPT_omagic, OPT_no_omagic, false);
+ ctx.arg.optRemarksFilename = args.getLastArgValue(OPT_opt_remarks_filename);
+ ctx.arg.optStatsFilename = args.getLastArgValue(OPT_plugin_opt_stats_file);
// Parse remarks hotness threshold. Valid value is either integer or 'auto'.
if (auto *arg = args.getLastArg(OPT_opt_remarks_hotness_threshold)) {
@@ -1389,77 +1390,77 @@ static void readConfigs(opt::InputArgList &args) {
error(arg->getSpelling() + ": invalid argument '" + arg->getValue() +
"', only integer or 'auto' is supported");
else
- config->optRemarksHotnessThreshold = *resultOrErr;
- }
-
- config->optRemarksPasses = args.getLastArgValue(OPT_opt_remarks_passes);
- config->optRemarksWithHotness = args.hasArg(OPT_opt_remarks_with_hotness);
- config->optRemarksFormat = args.getLastArgValue(OPT_opt_remarks_format);
- config->optimize = args::getInteger(args, OPT_O, 1);
- config->orphanHandling = getOrphanHandling(args);
- config->outputFile = args.getLastArgValue(OPT_o);
- config->packageMetadata = args.getLastArgValue(OPT_package_metadata);
- config->pie = args.hasFlag(OPT_pie, OPT_no_pie, false);
- config->printIcfSections =
+ ctx.arg.optRemarksHotnessThreshold = *resultOrErr;
+ }
+
+ ctx.arg.optRemarksPasses = args.getLastArgValue(OPT_opt_remarks_passes);
+ ctx.arg.optRemarksWithHotness = args.hasArg(OPT_opt_remarks_with_hotness);
+ ctx.arg.optRemarksFormat = args.getLastArgValue(OPT_opt_remarks_format);
+ ctx.arg.optimize = args::getInteger(args, OPT_O, 1);
+ ctx.arg.orphanHandling = getOrphanHandling(args);
+ ctx.arg.outputFile = args.getLastArgValue(OPT_o);
+ ctx.arg.packageMetadata = args.getLastArgValue(OPT_package_metadata);
+ ctx.arg.pie = args.hasFlag(OPT_pie, OPT_no_pie, false);
+ ctx.arg.printIcfSections =
args.hasFlag(OPT_print_icf_sections, OPT_no_print_icf_sections, false);
- config->printGcSections =
+ ctx.arg.printGcSections =
args.hasFlag(OPT_print_gc_sections, OPT_no_print_gc_sections, false);
- config->printMemoryUsage = args.hasArg(OPT_print_memory_usage);
- config->printArchiveStats = args.getLastArgValue(OPT_print_archive_stats);
- config->printSymbolOrder =
- args.getLastArgValue(OPT_print_symbol_order);
- config->rejectMismatch = !args.hasArg(OPT_no_warn_mismatch);
- config->relax = args.hasFlag(OPT_relax, OPT_no_relax, true);
- config->relaxGP = args.hasFlag(OPT_relax_gp, OPT_no_relax_gp, false);
- config->rpath = getRpath(args);
- config->relocatable = args.hasArg(OPT_relocatable);
- config->resolveGroups =
+ ctx.arg.printMemoryUsage = args.hasArg(OPT_print_memory_usage);
+ ctx.arg.printArchiveStats = args.getLastArgValue(OPT_print_archive_stats);
+ ctx.arg.printSymbolOrder = args.getLastArgValue(OPT_print_symbol_order);
+ ctx.arg.rejectMismatch = !args.hasArg(OPT_no_warn_mismatch);
+ ctx.arg.relax = args.hasFlag(OPT_relax, OPT_no_relax, true);
+ ctx.arg.relaxGP = args.hasFlag(OPT_relax_gp, OPT_no_relax_gp, false);
+ ctx.arg.rpath = getRpath(args);
+ ctx.arg.relocatable = args.hasArg(OPT_relocatable);
+ ctx.arg.resolveGroups =
!args.hasArg(OPT_relocatable) || args.hasArg(OPT_force_group_allocation);
if (args.hasArg(OPT_save_temps)) {
// --save-temps implies saving all temps.
for (const char *s : saveTempsValues)
- config->saveTempsArgs.insert(s);
+ ctx.arg.saveTempsArgs.insert(s);
} else {
for (auto *arg : args.filtered(OPT_save_temps_eq)) {
StringRef s = arg->getValue();
if (llvm::is_contained(saveTempsValues, s))
- config->saveTempsArgs.insert(s);
+ ctx.arg.saveTempsArgs.insert(s);
else
error("unknown --save-temps value: " + s);
}
}
- config->searchPaths = args::getStrings(args, OPT_library_path);
- config->sectionStartMap = getSectionStartMap(args);
- config->shared = args.hasArg(OPT_shared);
- config->singleRoRx = !args.hasFlag(OPT_rosegment, OPT_no_rosegment, true);
- config->soName = args.getLastArgValue(OPT_soname);
- config->sortSection = getSortSection(args);
- config->splitStackAdjustSize = args::getInteger(args, OPT_split_stack_adjust_size, 16384);
- config->zSectionHeader =
+ ctx.arg.searchPaths = args::getStrings(args, OPT_library_path);
+ ctx.arg.sectionStartMap = getSectionStartMap(args);
+ ctx.arg.shared = args.hasArg(OPT_shared);
+ ctx.arg.singleRoRx = !args.hasFlag(OPT_rosegment, OPT_no_rosegment, true);
+ ctx.arg.soName = args.getLastArgValue(OPT_soname);
+ ctx.arg.sortSection = getSortSection(args);
+ ctx.arg.splitStackAdjustSize =
+ args::getInteger(args, OPT_split_stack_adjust_size, 16384);
+ ctx.arg.zSectionHeader =
getZFlag(args, "sectionheader", "nosectionheader", true);
- config->strip = getStrip(args); // needs zSectionHeader
- config->sysroot = args.getLastArgValue(OPT_sysroot);
- config->target1Rel = args.hasFlag(OPT_target1_rel, OPT_target1_abs, false);
- config->target2 = getTarget2(args);
- config->thinLTOCacheDir = args.getLastArgValue(OPT_thinlto_cache_dir);
- config->thinLTOCachePolicy = CHECK(
+ ctx.arg.strip = getStrip(args); // needs zSectionHeader
+ ctx.arg.sysroot = args.getLastArgValue(OPT_sysroot);
+ ctx.arg.target1Rel = args.hasFlag(OPT_target1_rel, OPT_target1_abs, false);
+ ctx.arg.target2 = getTarget2(args);
+ ctx.arg.thinLTOCacheDir = args.getLastArgValue(OPT_thinlto_cache_dir);
+ ctx.arg.thinLTOCachePolicy = CHECK(
parseCachePruningPolicy(args.getLastArgValue(OPT_thinlto_cache_policy)),
"--thinlto-cache-policy: invalid cache policy");
- config->thinLTOEmitImportsFiles = args.hasArg(OPT_thinlto_emit_imports_files);
- config->thinLTOEmitIndexFiles = args.hasArg(OPT_thinlto_emit_index_files) ||
+ ctx.arg.thinLTOEmitImportsFiles = args.hasArg(OPT_thinlto_emit_imports_files);
+ ctx.arg.thinLTOEmitIndexFiles = args.hasArg(OPT_thinlto_emit_index_files) ||
args.hasArg(OPT_thinlto_index_only) ||
args.hasArg(OPT_thinlto_index_only_eq);
- config->thinLTOIndexOnly = args.hasArg(OPT_thinlto_index_only) ||
+ ctx.arg.thinLTOIndexOnly = args.hasArg(OPT_thinlto_index_only) ||
args.hasArg(OPT_thinlto_index_only_eq);
- config->thinLTOIndexOnlyArg = args.getLastArgValue(OPT_thinlto_index_only_eq);
- config->thinLTOObjectSuffixReplace =
+ ctx.arg.thinLTOIndexOnlyArg = args.getLastArgValue(OPT_thinlto_index_only_eq);
+ ctx.arg.thinLTOObjectSuffixReplace =
getOldNewOptions(args, OPT_thinlto_object_suffix_replace_eq);
- std::tie(config->thinLTOPrefixReplaceOld, config->thinLTOPrefixReplaceNew,
- config->thinLTOPrefixReplaceNativeObject) =
+ std::tie(ctx.arg.thinLTOPrefixReplaceOld, ctx.arg.thinLTOPrefixReplaceNew,
+ ctx.arg.thinLTOPrefixReplaceNativeObject) =
getOldNewOptionsExtra(args, OPT_thinlto_prefix_replace_eq);
- if (config->thinLTOEmitIndexFiles && !config->thinLTOIndexOnly) {
+ if (ctx.arg.thinLTOEmitIndexFiles && !ctx.arg.thinLTOIndexOnly) {
if (args.hasArg(OPT_thinlto_object_suffix_replace_eq))
error("--thinlto-object-suffix-replace is not supported with "
"--thinlto-emit-index-files");
@@ -1467,69 +1468,69 @@ static void readConfigs(opt::InputArgList &args) {
error("--thinlto-prefix-replace is not supported with "
"--thinlto-emit-index-files");
}
- if (!config->thinLTOPrefixReplaceNativeObject.empty() &&
- config->thinLTOIndexOnlyArg.empty()) {
+ if (!ctx.arg.thinLTOPrefixReplaceNativeObject.empty() &&
+ ctx.arg.thinLTOIndexOnlyArg.empty()) {
error("--thinlto-prefix-replace=old_dir;new_dir;obj_dir must be used with "
"--thinlto-index-only=");
}
- config->thinLTOModulesToCompile =
+ ctx.arg.thinLTOModulesToCompile =
args::getStrings(args, OPT_thinlto_single_module_eq);
- config->timeTraceEnabled = args.hasArg(OPT_time_trace_eq);
- config->timeTraceGranularity =
+ ctx.arg.timeTraceEnabled = args.hasArg(OPT_time_trace_eq);
+ ctx.arg.timeTraceGranularity =
args::getInteger(args, OPT_time_trace_granularity, 500);
- config->trace = args.hasArg(OPT_trace);
- config->undefined = args::getStrings(args, OPT_undefined);
- config->undefinedVersion =
+ ctx.arg.trace = args.hasArg(OPT_trace);
+ ctx.arg.undefined = args::getStrings(args, OPT_undefined);
+ ctx.arg.undefinedVersion =
args.hasFlag(OPT_undefined_version, OPT_no_undefined_version, false);
- config->unique = args.hasArg(OPT_unique);
- config->useAndroidRelrTags = args.hasFlag(
+ ctx.arg.unique = args.hasArg(OPT_unique);
+ ctx.arg.useAndroidRelrTags = args.hasFlag(
OPT_use_android_relr_tags, OPT_no_use_android_relr_tags, false);
- config->warnBackrefs =
+ ctx.arg.warnBackrefs =
args.hasFlag(OPT_warn_backrefs, OPT_no_warn_backrefs, false);
- config->warnCommon = args.hasFlag(OPT_warn_common, OPT_no_warn_common, false);
- config->warnSymbolOrdering =
+ ctx.arg.warnCommon = args.hasFlag(OPT_warn_common, OPT_no_warn_common, false);
+ ctx.arg.warnSymbolOrdering =
args.hasFlag(OPT_warn_symbol_ordering, OPT_no_warn_symbol_ordering, true);
- config->whyExtract = args.getLastArgValue(OPT_why_extract);
- config->zCombreloc = getZFlag(args, "combreloc", "nocombreloc", true);
- config->zCopyreloc = getZFlag(args, "copyreloc", "nocopyreloc", true);
- config->zForceBti = hasZOption(args, "force-bti");
- config->zForceIbt = hasZOption(args, "force-ibt");
- config->zGcs = getZGcs(args);
- config->zGlobal = hasZOption(args, "global");
- config->zGnustack = getZGnuStack(args);
- config->zHazardplt = hasZOption(args, "hazardplt");
- config->zIfuncNoplt = hasZOption(args, "ifunc-noplt");
- config->zInitfirst = hasZOption(args, "initfirst");
- config->zInterpose = hasZOption(args, "interpose");
- config->zKeepTextSectionPrefix = getZFlag(
+ ctx.arg.whyExtract = args.getLastArgValue(OPT_why_extract);
+ ctx.arg.zCombreloc = getZFlag(args, "combreloc", "nocombreloc", true);
+ ctx.arg.zCopyreloc = getZFlag(args, "copyreloc", "nocopyreloc", true);
+ ctx.arg.zForceBti = hasZOption(args, "force-bti");
+ ctx.arg.zForceIbt = hasZOption(args, "force-ibt");
+ ctx.arg.zGcs = getZGcs(args);
+ ctx.arg.zGlobal = hasZOption(args, "global");
+ ctx.arg.zGnustack = getZGnuStack(args);
+ ctx.arg.zHazardplt = hasZOption(args, "hazardplt");
+ ctx.arg.zIfuncNoplt = hasZOption(args, "ifunc-noplt");
+ ctx.arg.zInitfirst = hasZOption(args, "initfirst");
+ ctx.arg.zInterpose = hasZOption(args, "interpose");
+ ctx.arg.zKeepTextSectionPrefix = getZFlag(
args, "keep-text-section-prefix", "nokeep-text-section-prefix", false);
- config->zLrodataAfterBss =
+ ctx.arg.zLrodataAfterBss =
getZFlag(args, "lrodata-after-bss", "nolrodata-after-bss", false);
- config->zNodefaultlib = hasZOption(args, "nodefaultlib");
- config->zNodelete = hasZOption(args, "nodelete");
- config->zNodlopen = hasZOption(args, "nodlopen");
- config->zNow = getZFlag(args, "now", "lazy", false);
- config->zOrigin = hasZOption(args, "origin");
- config->zPacPlt = hasZOption(args, "pac-plt");
- config->zRelro = getZFlag(args, "relro", "norelro", true);
- config->zRetpolineplt = hasZOption(args, "retpolineplt");
- config->zRodynamic = hasZOption(args, "rodynamic");
- config->zSeparate = getZSeparate(args);
- config->zShstk = hasZOption(args, "shstk");
- config->zStackSize = args::getZOptionValue(args, OPT_z, "stack-size", 0);
- config->zStartStopGC =
+ ctx.arg.zNodefaultlib = hasZOption(args, "nodefaultlib");
+ ctx.arg.zNodelete = hasZOption(args, "nodelete");
+ ctx.arg.zNodlopen = hasZOption(args, "nodlopen");
+ ctx.arg.zNow = getZFlag(args, "now", "lazy", false);
+ ctx.arg.zOrigin = hasZOption(args, "origin");
+ ctx.arg.zPacPlt = hasZOption(args, "pac-plt");
+ ctx.arg.zRelro = getZFlag(args, "relro", "norelro", true);
+ ctx.arg.zRetpolineplt = hasZOption(args, "retpolineplt");
+ ctx.arg.zRodynamic = hasZOption(args, "rodynamic");
+ ctx.arg.zSeparate = getZSeparate(args);
+ ctx.arg.zShstk = hasZOption(args, "shstk");
+ ctx.arg.zStackSize = args::getZOptionValue(args, OPT_z, "stack-size", 0);
+ ctx.arg.zStartStopGC =
getZFlag(args, "start-stop-gc", "nostart-stop-gc", true);
- config->zStartStopVisibility = getZStartStopVisibility(args);
- config->zText = getZFlag(args, "text", "notext", true);
- config->zWxneeded = hasZOption(args, "wxneeded");
- setUnresolvedSymbolPolicy(args);
- config->power10Stubs = args.getLastArgValue(OPT_power10_stubs_eq) != "no";
+ ctx.arg.zStartStopVisibility = getZStartStopVisibility(args);
+ ctx.arg.zText = getZFlag(args, "text", "notext", true);
+ ctx.arg.zWxneeded = hasZOption(args, "wxneeded");
+ setUnresolvedSymbolPolicy(ctx, args);
+ ctx.arg.power10Stubs = args.getLastArgValue(OPT_power10_stubs_eq) != "no";
if (opt::Arg *arg = args.getLastArg(OPT_eb, OPT_el)) {
if (arg->getOption().matches(OPT_eb))
- config->optEB = true;
+ ctx.arg.optEB = true;
else
- config->optEL = true;
+ ctx.arg.optEL = true;
}
for (opt::Arg *arg : args.filtered(OPT_remap_inputs)) {
@@ -1560,15 +1561,15 @@ static void readConfigs(opt::InputArgList &args) {
if (!to_integer(kv.second, v))
error(errPrefix + "expected an integer, but got '" + kv.second + "'");
else if (Expected<GlobPattern> pat = GlobPattern::create(kv.first))
- config->shuffleSections.emplace_back(std::move(*pat), uint32_t(v));
+ ctx.arg.shuffleSections.emplace_back(std::move(*pat), uint32_t(v));
else
error(errPrefix + toString(pat.takeError()) + ": " + kv.first);
}
- auto reports = {std::make_pair("bti-report", &config->zBtiReport),
- std::make_pair("cet-report", &config->zCetReport),
- std::make_pair("gcs-report", &config->zGcsReport),
- std::make_pair("pauth-report", &config->zPauthReport)};
+ auto reports = {std::make_pair("bti-report", &ctx.arg.zBtiReport),
+ std::make_pair("cet-report", &ctx.arg.zCetReport),
+ std::make_pair("gcs-report", &ctx.arg.zGcsReport),
+ std::make_pair("pauth-report", &ctx.arg.zPauthReport)};
for (opt::Arg *arg : args.filtered(OPT_z)) {
std::pair<StringRef, StringRef> option =
StringRef(arg->getValue()).split('=');
@@ -1603,7 +1604,7 @@ static void readConfigs(opt::InputArgList &args) {
levelStr + "'");
}
if (Expected<GlobPattern> pat = GlobPattern::create(fields[0])) {
- config->compressSections.emplace_back(std::move(*pat), type, level);
+ ctx.arg.compressSections.emplace_back(std::move(*pat), type, level);
} else {
error(arg->getSpelling() + ": " + toString(pat.takeError()));
continue;
@@ -1627,7 +1628,7 @@ static void readConfigs(opt::InputArgList &args) {
error(errPrefix + "expected a non-negative integer, but got '" +
kv.second + "'");
else if (Expected<GlobPattern> pat = GlobPattern::create(kv.first))
- config->deadRelocInNonAlloc.emplace_back(std::move(*pat), v);
+ ctx.arg.deadRelocInNonAlloc.emplace_back(std::move(*pat), v);
else
error(errPrefix + toString(pat.takeError()) + ": " + kv.first);
}
@@ -1653,23 +1654,23 @@ static void readConfigs(opt::InputArgList &args) {
"'");
}
- config->passPlugins = args::getStrings(args, OPT_load_pass_plugins);
+ ctx.arg.passPlugins = args::getStrings(args, OPT_load_pass_plugins);
// Parse -mllvm options.
for (const auto *arg : args.filtered(OPT_mllvm)) {
parseClangOption(arg->getValue(), arg->getSpelling());
- config->mllvmOpts.emplace_back(arg->getValue());
+ ctx.arg.mllvmOpts.emplace_back(arg->getValue());
}
- config->ltoKind = LtoKind::Default;
+ ctx.arg.ltoKind = LtoKind::Default;
if (auto *arg = args.getLastArg(OPT_lto)) {
StringRef s = arg->getValue();
if (s == "thin")
- config->ltoKind = LtoKind::UnifiedThin;
+ ctx.arg.ltoKind = LtoKind::UnifiedThin;
else if (s == "full")
- config->ltoKind = LtoKind::UnifiedRegular;
+ ctx.arg.ltoKind = LtoKind::UnifiedRegular;
else if (s == "default")
- config->ltoKind = LtoKind::Default;
+ ctx.arg.ltoKind = LtoKind::Default;
else
error("unknown LTO mode: " + s);
}
@@ -1685,21 +1686,21 @@ static void readConfigs(opt::InputArgList &args) {
error(arg->getSpelling() + ": expected a positive integer, but got '" +
arg->getValue() + "'");
parallel::strategy = hardware_concurrency(threads);
- config->thinLTOJobs = v;
+ ctx.arg.thinLTOJobs = v;
} else if (parallel::strategy.compute_thread_count() > 16) {
log("set maximum concurrency to 16, specify --threads= to change");
parallel::strategy = hardware_concurrency(16);
}
if (auto *arg = args.getLastArg(OPT_thinlto_jobs_eq))
- config->thinLTOJobs = arg->getValue();
- config->threadCount = parallel::strategy.compute_thread_count();
+ ctx.arg.thinLTOJobs = arg->getValue();
+ ctx.arg.threadCount = parallel::strategy.compute_thread_count();
- if (config->ltoPartitions == 0)
+ if (ctx.arg.ltoPartitions == 0)
error("--lto-partitions: number of threads must be > 0");
- if (!get_threadpool_strategy(config->thinLTOJobs))
- error("--thinlto-jobs: invalid job count: " + config->thinLTOJobs);
+ if (!get_threadpool_strategy(ctx.arg.thinLTOJobs))
+ error("--thinlto-jobs: invalid job count: " + ctx.arg.thinLTOJobs);
- if (config->splitStackAdjustSize < 0)
+ if (ctx.arg.splitStackAdjustSize < 0)
error("--split-stack-adjust-size: size must be >= 0");
// The text segment is traditionally the first segment, whose address equals
@@ -1713,42 +1714,42 @@ static void readConfigs(opt::InputArgList &args) {
// Parse ELF{32,64}{LE,BE} and CPU type.
if (auto *arg = args.getLastArg(OPT_m)) {
StringRef s = arg->getValue();
- std::tie(config->ekind, config->emachine, config->osabi) =
+ std::tie(ctx.arg.ekind, ctx.arg.emachine, ctx.arg.osabi) =
parseEmulation(s);
- config->mipsN32Abi =
+ ctx.arg.mipsN32Abi =
(s.starts_with("elf32btsmipn32") || s.starts_with("elf32ltsmipn32"));
- config->emulation = s;
+ ctx.arg.emulation = s;
}
// Parse --hash-style={sysv,gnu,both}.
if (auto *arg = args.getLastArg(OPT_hash_style)) {
StringRef s = arg->getValue();
if (s == "sysv")
- config->sysvHash = true;
+ ctx.arg.sysvHash = true;
else if (s == "gnu")
- config->gnuHash = true;
+ ctx.arg.gnuHash = true;
else if (s == "both")
- config->sysvHash = config->gnuHash = true;
+ ctx.arg.sysvHash = ctx.arg.gnuHash = true;
else
error("unknown --hash-style: " + s);
}
if (args.hasArg(OPT_print_map))
- config->mapFile = "-";
+ ctx.arg.mapFile = "-";
// Page alignment can be disabled by the -n (--nmagic) and -N (--omagic).
// As PT_GNU_RELRO relies on Paging, do not create it when we have disabled
// it. Also disable RELRO for -r.
- if (config->nmagic || config->omagic || config->relocatable)
- config->zRelro = false;
+ if (ctx.arg.nmagic || ctx.arg.omagic || ctx.arg.relocatable)
+ ctx.arg.zRelro = false;
- std::tie(config->buildId, config->buildIdVector) = getBuildId(args);
+ std::tie(ctx.arg.buildId, ctx.arg.buildIdVector) = getBuildId(args);
if (getZFlag(args, "pack-relative-relocs", "nopack-relative-relocs", false)) {
- config->relrGlibc = true;
- config->relrPackDynRelocs = true;
+ ctx.arg.relrGlibc = true;
+ ctx.arg.relrPackDynRelocs = true;
} else {
- std::tie(config->androidPackDynRelocs, config->relrPackDynRelocs) =
+ std::tie(ctx.arg.androidPackDynRelocs, ctx.arg.relrPackDynRelocs) =
getPackDynRelocs(args);
}
@@ -1757,34 +1758,34 @@ static void readConfigs(opt::InputArgList &args) {
error("--symbol-ordering-file and --call-graph-order-file "
"may not be used together");
if (std::optional<MemoryBufferRef> buffer = readFile(arg->getValue())) {
- config->symbolOrderingFile = getSymbolOrderingFile(*buffer);
+ ctx.arg.symbolOrderingFile = getSymbolOrderingFile(*buffer);
// Also need to disable CallGraphProfileSort to prevent
// LLD order symbols with CGProfile
- config->callGraphProfileSort = CGProfileSortKind::None;
+ ctx.arg.callGraphProfileSort = CGProfileSortKind::None;
}
}
- assert(config->versionDefinitions.empty());
- config->versionDefinitions.push_back(
+ assert(ctx.arg.versionDefinitions.empty());
+ ctx.arg.versionDefinitions.push_back(
{"local", (uint16_t)VER_NDX_LOCAL, {}, {}});
- config->versionDefinitions.push_back(
+ ctx.arg.versionDefinitions.push_back(
{"global", (uint16_t)VER_NDX_GLOBAL, {}, {}});
// If --retain-symbol-file is used, we'll keep only the symbols listed in
// the file and discard all others.
if (auto *arg = args.getLastArg(OPT_retain_symbols_file)) {
- config->versionDefinitions[VER_NDX_LOCAL].nonLocalPatterns.push_back(
+ ctx.arg.versionDefinitions[VER_NDX_LOCAL].nonLocalPatterns.push_back(
{"*", /*isExternCpp=*/false, /*hasWildcard=*/true});
if (std::optional<MemoryBufferRef> buffer = readFile(arg->getValue()))
for (StringRef s : args::getLines(*buffer))
- config->versionDefinitions[VER_NDX_GLOBAL].nonLocalPatterns.push_back(
+ ctx.arg.versionDefinitions[VER_NDX_GLOBAL].nonLocalPatterns.push_back(
{s, /*isExternCpp=*/false, /*hasWildcard=*/false});
}
for (opt::Arg *arg : args.filtered(OPT_warn_backrefs_exclude)) {
StringRef pattern(arg->getValue());
if (Expected<GlobPattern> pat = GlobPattern::create(pattern))
- config->warnBackrefsExclude.push_back(std::move(*pat));
+ ctx.arg.warnBackrefsExclude.push_back(std::move(*pat));
else
error(arg->getSpelling() + ": " + toString(pat.takeError()) + ": " +
pattern);
@@ -1796,15 +1797,15 @@ static void readConfigs(opt::InputArgList &args) {
// even if other options express a symbolic intention: -Bsymbolic,
// -Bsymbolic-functions (if STT_FUNC), --dynamic-list.
for (auto *arg : args.filtered(OPT_export_dynamic_symbol))
- config->dynamicList.push_back(
+ ctx.arg.dynamicList.push_back(
{arg->getValue(), /*isExternCpp=*/false,
/*hasWildcard=*/hasWildcard(arg->getValue())});
// --export-dynamic-symbol-list specifies a list of --export-dynamic-symbol
// patterns. --dynamic-list is --export-dynamic-symbol-list plus -Bsymbolic
// like semantics.
- config->symbolic =
- config->bsymbolic == BsymbolicKind::All || args.hasArg(OPT_dynamic_list);
+ ctx.arg.symbolic =
+ ctx.arg.bsymbolic == BsymbolicKind::All || args.hasArg(OPT_dynamic_list);
for (auto *arg :
args.filtered(OPT_dynamic_list, OPT_export_dynamic_symbol_list))
if (std::optional<MemoryBufferRef> buffer = readFile(arg->getValue()))
@@ -2067,14 +2068,14 @@ void LinkerDriver::inferMachineType() {
// Parse -z max-page-size=<value>. The default value is defined by
// each target.
-static uint64_t getMaxPageSize(opt::InputArgList &args) {
+static uint64_t getMaxPageSize(Ctx &ctx, opt::InputArgList &args) {
uint64_t val = args::getZOptionValue(args, OPT_z, "max-page-size",
ctx.target->defaultMaxPageSize);
if (!isPowerOf2_64(val)) {
error("max-page-size: value isn't a power of 2");
return ctx.target->defaultMaxPageSize;
}
- if (config->nmagic || config->omagic) {
+ if (ctx.arg.nmagic || ctx.arg.omagic) {
if (val != ctx.target->defaultMaxPageSize)
warn("-z max-page-size set, but paging disabled by omagic or nmagic");
return 1;
@@ -2084,7 +2085,7 @@ static uint64_t getMaxPageSize(opt::InputArgList &args) {
// Parse -z common-page-size=<value>. The default value is defined by
// each target.
-static uint64_t getCommonPageSize(opt::InputArgList &args) {
+static uint64_t getCommonPageSize(Ctx &ctx, opt::InputArgList &args) {
uint64_t val = args::getZOptionValue(args, OPT_z, "common-page-size",
ctx.target->defaultCommonPageSize);
if (!isPowerOf2_64(val)) {
@@ -2103,7 +2104,7 @@ static uint64_t getCommonPageSize(opt::InputArgList &args) {
}
// Parses --image-base option.
-static std::optional<uint64_t> getImageBase(opt::InputArgList &args) {
+static std::optional<uint64_t> getImageBase(Ctx &ctx, opt::InputArgList &args) {
// Because we are using "Config->maxPageSize" here, this function has to be
// called after the variable is initialized.
auto *arg = args.getLastArg(OPT_image_base);
@@ -2116,7 +2117,7 @@ static std::optional<uint64_t> getImageBase(opt::InputArgList &args) {
error("--image-base: number expected, but got " + s);
return 0;
}
- if ((v % config->maxPageSize) != 0)
+ if ((v % ctx.arg.maxPageSize) != 0)
warn("--image-base: address isn't multiple of page size: " + s);
return v;
}
@@ -2169,7 +2170,7 @@ static void excludeLibs(opt::InputArgList &args) {
}
// Force Sym to be entered in the output.
-static void handleUndefined(Symbol *sym, const char *option) {
+static void handleUndefined(Ctx &ctx, Symbol *sym, const char *option) {
// Since a symbol may not be used inside the program, LTO may
// eliminate it. Mark the symbol as "used" to prevent it.
sym->isUsedInRegularObj = true;
@@ -2177,14 +2178,14 @@ static void handleUndefined(Symbol *sym, const char *option) {
if (!sym->isLazy())
return;
sym->extract();
- if (!config->whyExtract.empty())
+ if (!ctx.arg.whyExtract.empty())
ctx.whyExtractRecords.emplace_back(option, sym->file, *sym);
}
// As an extension to GNU linkers, lld supports a variant of `-u`
// which accepts wildcard patterns. All symbols that match a given
// pattern are handled as if they were given by `-u`.
-static void handleUndefinedGlob(StringRef arg) {
+static void handleUndefinedGlob(Ctx &ctx, StringRef arg) {
Expected<GlobPattern> pat = GlobPattern::create(arg);
if (!pat) {
error("--undefined-glob: " + toString(pat.takeError()) + ": " + arg);
@@ -2199,26 +2200,26 @@ static void handleUndefinedGlob(StringRef arg) {
syms.push_back(sym);
for (Symbol *sym : syms)
- handleUndefined(sym, "--undefined-glob");
+ handleUndefined(ctx, sym, "--undefined-glob");
}
-static void handleLibcall(StringRef name) {
+static void handleLibcall(Ctx &ctx, StringRef name) {
Symbol *sym = symtab.find(name);
if (sym && sym->isLazy() && isa<BitcodeFile>(sym->file)) {
- if (!config->whyExtract.empty())
+ if (!ctx.arg.whyExtract.empty())
ctx.whyExtractRecords.emplace_back("<libcall>", sym->file, *sym);
sym->extract();
}
}
-static void writeArchiveStats() {
- if (config->printArchiveStats.empty())
+static void writeArchiveStats(Ctx &ctx) {
+ if (ctx.arg.printArchiveStats.empty())
return;
std::error_code ec;
- raw_fd_ostream os = ctx.openAuxiliaryFile(config->printArchiveStats, ec);
+ raw_fd_ostream os = ctx.openAuxiliaryFile(ctx.arg.printArchiveStats, ec);
if (ec) {
- error("--print-archive-stats=: cannot open " + config->printArchiveStats +
+ error("--print-archive-stats=: cannot open " + ctx.arg.printArchiveStats +
": " + ec.message());
return;
}
@@ -2241,14 +2242,14 @@ static void writeArchiveStats() {
}
}
-static void writeWhyExtract() {
- if (config->whyExtract.empty())
+static void writeWhyExtract(Ctx &ctx) {
+ if (ctx.arg.whyExtract.empty())
return;
std::error_code ec;
- raw_fd_ostream os = ctx.openAuxiliaryFile(config->whyExtract, ec);
+ raw_fd_ostream os = ctx.openAuxiliaryFile(ctx.arg.whyExtract, ec);
if (ec) {
- error("cannot open --why-extract= file " + config->whyExtract + ": " +
+ error("cannot open --why-extract= file " + ctx.arg.whyExtract + ": " +
ec.message());
return;
}
@@ -2260,7 +2261,7 @@ static void writeWhyExtract() {
}
}
-static void reportBackrefs() {
+static void reportBackrefs(Ctx &ctx) {
for (auto &ref : ctx.backwardReferences) {
const Symbol &sym = *ref.first;
std::string to = toString(ref.second.second);
@@ -2268,7 +2269,7 @@ static void reportBackrefs() {
// with --warn-backrefs-exclude=. The value may look like (for --start-lib)
// *.o or (archive member) *.a(*.o).
bool exclude = false;
- for (const llvm::GlobPattern &pat : config->warnBackrefsExclude)
+ for (const llvm::GlobPattern &pat : ctx.arg.warnBackrefsExclude)
if (pat.match(to)) {
exclude = true;
break;
@@ -2303,11 +2304,11 @@ static void reportBackrefs() {
// part of your program. By using --dependency-file option, you can make
// lld to dump dependency info so that you can maintain exact dependencies
// easily.
-static void writeDependencyFile() {
+static void writeDependencyFile(Ctx &ctx) {
std::error_code ec;
- raw_fd_ostream os = ctx.openAuxiliaryFile(config->dependencyFile, ec);
+ raw_fd_ostream os = ctx.openAuxiliaryFile(ctx.arg.dependencyFile, ec);
if (ec) {
- error("cannot open " + config->dependencyFile + ": " + ec.message());
+ error("cannot open " + ctx.arg.dependencyFile + ": " + ec.message());
return;
}
@@ -2334,14 +2335,14 @@ static void writeDependencyFile() {
}
};
- os << config->outputFile << ":";
- for (StringRef path : config->dependencyFiles) {
+ os << ctx.arg.outputFile << ":";
+ for (StringRef path : ctx.arg.dependencyFiles) {
os << " \\\n ";
printFilename(os, path);
}
os << "\n";
- for (StringRef path : config->dependencyFiles) {
+ for (StringRef path : ctx.arg.dependencyFiles) {
os << "\n";
printFilename(os, path);
os << ":\n";
@@ -2352,7 +2353,7 @@ static void writeDependencyFile() {
// This function is called after all symbol names are resolved. As a
// result, the passes after the symbol resolution won't see any
// symbols of type CommonSymbol.
-static void replaceCommonSymbols() {
+static void replaceCommonSymbols(Ctx &ctx) {
llvm::TimeTraceScope timeScope("Replace common symbols");
for (ELFFileBase *file : ctx.objectFiles) {
if (!file->hasCommonSyms)
@@ -2387,7 +2388,7 @@ static void markAddrsig(Symbol *s) {
// and symbols referred to by address-significance tables. These sections are
// ineligible for ICF.
template <class ELFT>
-static void findKeepUniqueSections(opt::InputArgList &args) {
+static void findKeepUniqueSections(Ctx &ctx, opt::InputArgList &args) {
for (auto *arg : args.filtered(OPT_keep_unique)) {
StringRef name = arg->getValue();
auto *d = dyn_cast_or_null<Defined>(symtab.find(name));
@@ -2400,7 +2401,7 @@ static void findKeepUniqueSections(opt::InputArgList &args) {
// --icf=all --ignore-data-address-equality means that we can ignore
// the dynsym and address-significance tables entirely.
- if (config->icf == ICFLevel::All && config->ignoreDataAddressEquality)
+ if (ctx.arg.icf == ICFLevel::All && ctx.arg.ignoreDataAddressEquality)
return;
// Symbols in the dynsym could be address-significant in other executables
@@ -2669,7 +2670,7 @@ static void combineVersionedSymbol(Symbol &sym,
// When this function is executed, only InputFiles and symbol table
// contain pointers to symbol objects. We visit them to replace pointers,
// so that wrapped symbols are swapped as instructed by the command line.
-static void redirectSymbols(ArrayRef<WrappedSymbol> wrapped) {
+static void redirectSymbols(Ctx &ctx, ArrayRef<WrappedSymbol> wrapped) {
llvm::TimeTraceScope timeScope("Redirect symbols");
DenseMap<Symbol *, Symbol *> map;
for (const WrappedSymbol &w : wrapped) {
@@ -2680,7 +2681,7 @@ static void redirectSymbols(ArrayRef<WrappedSymbol> wrapped) {
// If there are version definitions (versionDefinitions.size() > 2), enumerate
// symbols with a non-default version (foo@v1) and check whether it should be
// combined with foo or foo@@v1.
- if (config->versionDefinitions.size() > 2)
+ if (ctx.arg.versionDefinitions.size() > 2)
for (Symbol *sym : symtab.getSymbols())
if (sym->hasVersionSuffix)
combineVersionedSymbol(*sym, map);
@@ -2877,11 +2878,11 @@ template <class ELFT> void LinkerDriver::link(opt::InputArgList &args) {
// If an entry symbol is in a static archive, pull out that file now.
if (Symbol *sym = symtab.find(ctx.arg.entry))
- handleUndefined(sym, "--entry");
+ handleUndefined(ctx, sym, "--entry");
// Handle the `--undefined-glob <pattern>` options.
for (StringRef pat : args::getStrings(args, OPT_undefined_glob))
- handleUndefinedGlob(pat);
+ handleUndefinedGlob(ctx, pat);
// After potential archive member extraction involving ENTRY and
// -u/--undefined-glob, check whether PROVIDE symbols should be defined (the
@@ -2919,7 +2920,7 @@ template <class ELFT> void LinkerDriver::link(opt::InputArgList &args) {
if (!ctx.bitcodeFiles.empty()) {
llvm::Triple TT(ctx.bitcodeFiles.front()->obj->getTargetTriple());
for (auto *s : lto::LTO::getRuntimeLibcallSymbols(TT))
- handleLibcall(s);
+ handleLibcall(ctx, s);
}
// Archive members defining __wrap symbols may be extracted.
@@ -3006,9 +3007,9 @@ template <class ELFT> void LinkerDriver::link(opt::InputArgList &args) {
// Symbol resolution finished. Report backward reference problems,
// --print-archive-stats=, and --why-extract=.
- reportBackrefs();
- writeArchiveStats();
- writeWhyExtract();
+ reportBackrefs(ctx);
+ writeArchiveStats(ctx);
+ writeWhyExtract(ctx);
if (errorCount())
return;
@@ -3050,10 +3051,10 @@ template <class ELFT> void LinkerDriver::link(opt::InputArgList &args) {
processArmCmseSymbols();
// Apply symbol renames for --wrap and combine foo@v1 and foo@@v1.
- redirectSymbols(wrapped);
+ redirectSymbols(ctx, wrapped);
// Replace common symbols with regular symbols.
- replaceCommonSymbols();
+ replaceCommonSymbols(ctx);
{
llvm::TimeTraceScope timeScope("Aggregate sections");
@@ -3104,7 +3105,7 @@ template <class ELFT> void LinkerDriver::link(opt::InputArgList &args) {
// Since we now have a complete set of input files, we can create
// a .d file to record build dependencies.
if (!ctx.arg.dependencyFile.empty())
- writeDependencyFile();
+ writeDependencyFile(ctx);
// Now that the number of partitions is fixed, save a pointer to the main
// partition.
@@ -3124,15 +3125,15 @@ template <class ELFT> void LinkerDriver::link(opt::InputArgList &args) {
// the output can be run on. For example if the OS can use 4k or 64k page
// sizes then maxPageSize must be 64k for the output to be useable on both.
// All important alignment decisions must use this value.
- ctx.arg.maxPageSize = getMaxPageSize(args);
+ ctx.arg.maxPageSize = getMaxPageSize(ctx, args);
// commonPageSize is the most common page size that the output will be run on.
// For example if an OS can use 4k or 64k page sizes and 4k is more common
// than 64k then commonPageSize is set to 4k. commonPageSize can be used for
// optimizations such as DATA_SEGMENT_ALIGN in linker scripts. LLD's use of it
// is limited to writing trap instructions on the last executable segment.
- ctx.arg.commonPageSize = getCommonPageSize(args);
+ ctx.arg.commonPageSize = getCommonPageSize(ctx, args);
- ctx.arg.imageBase = getImageBase(args);
+ ctx.arg.imageBase = getImageBase(ctx, args);
// This adds a .comment section containing a version string.
if (!ctx.arg.relocatable)
@@ -3195,7 +3196,7 @@ template <class ELFT> void LinkerDriver::link(opt::InputArgList &args) {
// Two input sections with different output sections should not be folded.
// ICF runs after processSectionCommands() so that we know the output sections.
if (ctx.arg.icf != ICFLevel::None) {
- findKeepUniqueSections<ELFT>(args);
+ findKeepUniqueSections<ELFT>(ctx, args);
doIcf<ELFT>();
}
diff --git a/lld/test/COFF/import_weak_alias.test b/lld/test/COFF/import_weak_alias.test
new file mode 100644
index 000000000000..ae1817c67a20
--- /dev/null
+++ b/lld/test/COFF/import_weak_alias.test
@@ -0,0 +1,20 @@
+# REQUIRES: x86
+
+# RUN: split-file %s %t.dir
+# RUN: llvm-mc --filetype=obj -triple=x86_64-windows-msvc %t.dir/foo.s -o %t.foo.obj
+# RUN: llvm-mc --filetype=obj -triple=x86_64-windows-msvc %t.dir/qux.s -o %t.qux.obj
+# RUN: lld-link %t.qux.obj %t.foo.obj -out:%t.dll -dll
+#
+#--- foo.s
+.text
+bar:
+ ret
+
+.weak foo
+.set foo, bar
+#--- qux.s
+.text
+.global _DllMainCRTStartup
+_DllMainCRTStartup:
+ call *__imp_foo(%rip)
+ ret
diff --git a/lld/test/wasm/unsupported-pic-relocations.s b/lld/test/wasm/unsupported-pic-relocations.s
new file mode 100644
index 000000000000..ea32e8468cdb
--- /dev/null
+++ b/lld/test/wasm/unsupported-pic-relocations.s
@@ -0,0 +1,39 @@
+# RUN: llvm-mc -filetype=obj -triple=wasm32-unknown-unknown -o %t.o %s
+
+# RUN: not wasm-ld --experimental-pic -shared %t.o -o /dev/null 2>&1 | \
+# RUN: FileCheck %s
+
+# RUN: not wasm-ld --experimental-pic -shared %t.o -o /dev/null --unresolved-symbols=report-all 2>&1 | \
+# RUN: FileCheck %s
+
+# RUN: not wasm-ld --experimental-pic -shared %t.o -o /dev/null --warn-unresolved-symbols 2>&1 | \
+# RUN: FileCheck %s
+
+# RUN: not wasm-ld --experimental-pic -shared %t.o -o /dev/null --unresolved-symbols=ignore-all 2>&1 | \
+# RUN: FileCheck %s
+
+# RUN: not wasm-ld --experimental-pic -shared %t.o -o /dev/null --unresolved-symbols=import-dynamic 2>&1 | \
+# RUN: FileCheck %s
+
+.functype external_func () -> ()
+
+use_undefined_function:
+ .functype use_undefined_function () -> ()
+ i32.const external_func@TBREL
+ # CHECK: error: {{.*}}.o: relocation R_WASM_TABLE_INDEX_REL_SLEB is not supported against an undefined symbol `external_func`
+ drop
+ end_function
+
+use_undefined_data:
+ .functype use_undefined_data () -> ()
+ i32.const external_data@MBREL
+ # CHECK: error: {{.*}}.o: relocation R_WASM_MEMORY_ADDR_REL_SLEB is not supported against an undefined symbol `external_data`
+ drop
+ end_function
+
+.globl _start
+_start:
+ .functype _start () -> ()
+ call use_undefined_function
+ call use_undefined_data
+ end_function
diff --git a/lld/test/wasm/unsupported-pic-relocations64.s b/lld/test/wasm/unsupported-pic-relocations64.s
new file mode 100644
index 000000000000..db9707b7fbac
--- /dev/null
+++ b/lld/test/wasm/unsupported-pic-relocations64.s
@@ -0,0 +1,39 @@
+# RUN: llvm-mc -filetype=obj -triple=wasm64-unknown-unknown -o %t.o %s
+
+# RUN: not wasm-ld -mwasm64 --experimental-pic -shared %t.o -o /dev/null 2>&1 | \
+# RUN: FileCheck %s
+
+# RUN: not wasm-ld -mwasm64 --experimental-pic -shared %t.o -o /dev/null --unresolved-symbols=report-all 2>&1 | \
+# RUN: FileCheck %s
+
+# RUN: not wasm-ld -mwasm64 --experimental-pic -shared %t.o -o /dev/null --warn-unresolved-symbols 2>&1 | \
+# RUN: FileCheck %s
+
+# RUN: not wasm-ld -mwasm64 --experimental-pic -shared %t.o -o /dev/null --unresolved-symbols=ignore-all 2>&1 | \
+# RUN: FileCheck %s
+
+# RUN: not wasm-ld -mwasm64 --experimental-pic -shared %t.o -o /dev/null --unresolved-symbols=import-dynamic 2>&1 | \
+# RUN: FileCheck %s
+
+.functype external_func () -> ()
+
+use_undefined_function:
+ .functype use_undefined_function () -> ()
+ i64.const external_func@TBREL
+ # CHECK: error: {{.*}}.o: relocation R_WASM_TABLE_INDEX_REL_SLEB64 is not supported against an undefined symbol `external_func`
+ drop
+ end_function
+
+use_undefined_data:
+ .functype use_undefined_data () -> ()
+ i64.const external_data@MBREL
+ # CHECK: error: {{.*}}.o: relocation R_WASM_MEMORY_ADDR_REL_SLEB64 is not supported against an undefined symbol `external_data`
+ drop
+ end_function
+
+.globl _start
+_start:
+ .functype _start () -> ()
+ call use_undefined_function
+ call use_undefined_data
+ end_function
diff --git a/lld/wasm/Relocations.cpp b/lld/wasm/Relocations.cpp
index 6f33a4f28a9d..2dbfe3354947 100644
--- a/lld/wasm/Relocations.cpp
+++ b/lld/wasm/Relocations.cpp
@@ -173,6 +173,22 @@ void scanRelocations(InputChunk *chunk) {
}
}
+ if (sym->isUndefined()) {
+ switch (reloc.Type) {
+ case R_WASM_TABLE_INDEX_REL_SLEB:
+ case R_WASM_TABLE_INDEX_REL_SLEB64:
+ case R_WASM_MEMORY_ADDR_REL_SLEB:
+ case R_WASM_MEMORY_ADDR_REL_SLEB64:
+ // These relocation types are for symbols that exists relative to
+ // `__memory_base` or `__table_base` and as such only make sense for
+ // defined symbols.
+ error(toString(file) + ": relocation " + relocTypeToString(reloc.Type) +
+ " is not supported against an undefined symbol `" +
+ toString(*sym) + "`");
+ break;
+ }
+ }
+
if (sym->isUndefined() && !config->relocatable && !sym->isWeak()) {
// Report undefined symbols
reportUndefined(file, sym);
diff --git a/lldb/include/lldb/Utility/Scalar.h b/lldb/include/lldb/Utility/Scalar.h
index 0d8eba3c9726..b4b9c7e18958 100644
--- a/lldb/include/lldb/Utility/Scalar.h
+++ b/lldb/include/lldb/Utility/Scalar.h
@@ -210,6 +210,7 @@ protected:
static PromotionKey GetFloatPromoKey(const llvm::fltSemantics &semantics);
private:
+ friend llvm::APFloat::cmpResult compare(Scalar lhs, Scalar rhs);
friend const Scalar operator+(const Scalar &lhs, const Scalar &rhs);
friend const Scalar operator-(Scalar lhs, Scalar rhs);
friend const Scalar operator/(Scalar lhs, Scalar rhs);
@@ -220,9 +221,9 @@ private:
friend const Scalar operator^(Scalar lhs, Scalar rhs);
friend const Scalar operator<<(const Scalar &lhs, const Scalar &rhs);
friend const Scalar operator>>(const Scalar &lhs, const Scalar &rhs);
- friend bool operator==(Scalar lhs, Scalar rhs);
+ friend bool operator==(const Scalar &lhs, const Scalar &rhs);
friend bool operator!=(const Scalar &lhs, const Scalar &rhs);
- friend bool operator<(Scalar lhs, Scalar rhs);
+ friend bool operator<(const Scalar &lhs, const Scalar &rhs);
friend bool operator<=(const Scalar &lhs, const Scalar &rhs);
friend bool operator>(const Scalar &lhs, const Scalar &rhs);
friend bool operator>=(const Scalar &lhs, const Scalar &rhs);
@@ -241,6 +242,7 @@ private:
// Item 19 of "Effective C++ Second Edition" by Scott Meyers
// Differentiate among members functions, non-member functions, and
// friend functions
+llvm::APFloat::cmpResult compare(Scalar lhs, Scalar rhs);
const Scalar operator+(const Scalar &lhs, const Scalar &rhs);
const Scalar operator-(Scalar lhs, Scalar rhs);
const Scalar operator/(Scalar lhs, Scalar rhs);
@@ -251,9 +253,9 @@ const Scalar operator%(Scalar lhs, Scalar rhs);
const Scalar operator^(Scalar lhs, Scalar rhs);
const Scalar operator<<(const Scalar &lhs, const Scalar &rhs);
const Scalar operator>>(const Scalar &lhs, const Scalar &rhs);
-bool operator==(Scalar lhs, Scalar rhs);
+bool operator==(const Scalar &lhs, const Scalar &rhs);
bool operator!=(const Scalar &lhs, const Scalar &rhs);
-bool operator<(Scalar lhs, Scalar rhs);
+bool operator<(const Scalar &lhs, const Scalar &rhs);
bool operator<=(const Scalar &lhs, const Scalar &rhs);
bool operator>(const Scalar &lhs, const Scalar &rhs);
bool operator>=(const Scalar &lhs, const Scalar &rhs);
diff --git a/lldb/include/lldb/Utility/Status.h b/lldb/include/lldb/Utility/Status.h
index 795c830b9651..4a09c38ce62f 100644
--- a/lldb/include/lldb/Utility/Status.h
+++ b/lldb/include/lldb/Utility/Status.h
@@ -28,6 +28,69 @@ namespace lldb_private {
const char *ExpressionResultAsCString(lldb::ExpressionResults result);
+/// Going a bit against the spirit of llvm::Error,
+/// lldb_private::Status need to store errors long-term and sometimes
+/// copy them. This base class defines an interface for this
+/// operation.
+class CloneableError
+ : public llvm::ErrorInfo<CloneableError, llvm::ErrorInfoBase> {
+public:
+ using llvm::ErrorInfo<CloneableError, llvm::ErrorInfoBase>::ErrorInfo;
+ CloneableError() : ErrorInfo() {}
+ virtual std::unique_ptr<CloneableError> Clone() const = 0;
+ static char ID;
+};
+
+/// Common base class for all error-code errors.
+class CloneableECError
+ : public llvm::ErrorInfo<CloneableECError, CloneableError> {
+public:
+ using llvm::ErrorInfo<CloneableECError, CloneableError>::ErrorInfo;
+ CloneableECError() = delete;
+ CloneableECError(std::error_code ec) : ErrorInfo(), EC(ec) {}
+ std::error_code convertToErrorCode() const override { return EC; }
+ void log(llvm::raw_ostream &OS) const override { OS << EC.message(); }
+ std::unique_ptr<CloneableError> Clone() const override;
+ static char ID;
+
+protected:
+ std::error_code EC;
+};
+
+/// FIXME: Move these declarations closer to where they're used.
+class MachKernelError
+ : public llvm::ErrorInfo<MachKernelError, CloneableECError> {
+public:
+ using llvm::ErrorInfo<MachKernelError, CloneableECError>::ErrorInfo;
+ MachKernelError(std::error_code ec) : ErrorInfo(ec) {}
+ std::string message() const override;
+ std::unique_ptr<CloneableError> Clone() const override;
+ static char ID;
+};
+
+class Win32Error : public llvm::ErrorInfo<Win32Error, CloneableECError> {
+public:
+ using llvm::ErrorInfo<Win32Error, CloneableECError>::ErrorInfo;
+ Win32Error(std::error_code ec, const llvm::Twine &msg = {}) : ErrorInfo(ec) {}
+ std::string message() const override;
+ std::unique_ptr<CloneableError> Clone() const override;
+ static char ID;
+};
+
+class ExpressionError
+ : public llvm::ErrorInfo<ExpressionError, CloneableECError> {
+public:
+ using llvm::ErrorInfo<ExpressionError, CloneableECError>::ErrorInfo;
+ ExpressionError(std::error_code ec, std::string msg = {})
+ : ErrorInfo(ec), m_string(msg) {}
+ std::unique_ptr<CloneableError> Clone() const override;
+ std::string message() const override { return m_string; }
+ static char ID;
+
+protected:
+ std::string m_string;
+};
+
/// \class Status Status.h "lldb/Utility/Status.h" An error handling class.
///
/// This class is designed to be able to hold any error code that can be
@@ -100,9 +163,7 @@ public:
}
static Status FromExpressionError(lldb::ExpressionResults result,
- std::string msg) {
- return Status(result, lldb::eErrorTypeExpression, msg);
- }
+ std::string msg);
/// Set the current error to errno.
///
@@ -115,6 +176,7 @@ public:
const Status &operator=(Status &&);
/// Avoid using this in new code. Migrate APIs to llvm::Expected instead.
static Status FromError(llvm::Error error);
+
/// FIXME: Replace this with a takeError() method.
llvm::Error ToError() const;
/// Don't call this function in new code. Instead, redesign the API
@@ -149,12 +211,20 @@ public:
/// Access the error value.
///
+ /// If the internally stored \ref llvm::Error is an \ref
+ /// llvm::ErrorList then this returns the error value of the first
+ /// error.
+ ///
/// \return
/// The error value.
ValueType GetError() const;
/// Access the error type.
///
+ /// If the internally stored \ref llvm::Error is an \ref
+ /// llvm::ErrorList then this returns the error value of the first
+ /// error.
+ ///
/// \return
/// The error type enumeration value.
lldb::ErrorType GetType() const;
@@ -170,12 +240,9 @@ public:
bool Success() const;
protected:
- Status(llvm::Error error);
- /// Status code as an integer value.
- ValueType m_code = 0;
- /// The type of the above error code.
- lldb::ErrorType m_type = lldb::eErrorTypeInvalid;
- /// A string representation of the error code.
+ Status(llvm::Error error) : m_error(std::move(error)) {}
+ llvm::Error m_error;
+ /// TODO: Replace this with just callling toString(m_error).
mutable std::string m_string;
};
diff --git a/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationServer.cpp b/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationServer.cpp
index 9b72cb003528..d4aa90b2c773 100644
--- a/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationServer.cpp
+++ b/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationServer.cpp
@@ -103,13 +103,14 @@ GDBRemoteCommunicationServer::SendErrorResponse(uint8_t err) {
GDBRemoteCommunication::PacketResult
GDBRemoteCommunicationServer::SendErrorResponse(const Status &error) {
+ uint8_t code = error.GetType() == eErrorTypePOSIX ? error.GetError() : 0xff;
if (m_send_error_strings) {
lldb_private::StreamString packet;
- packet.Printf("E%2.2x;", static_cast<uint8_t>(error.GetError()));
+ packet.Printf("E%2.2x;", code);
packet.PutStringAsRawHex8(error.AsCString());
return SendPacketNoLock(packet.GetString());
- } else
- return SendErrorResponse(error.GetError());
+ }
+ return SendErrorResponse(code);
}
GDBRemoteCommunication::PacketResult
diff --git a/lldb/source/Plugins/ScriptInterpreter/Python/PythonDataObjects.cpp b/lldb/source/Plugins/ScriptInterpreter/Python/PythonDataObjects.cpp
index 24cf34300063..6ddd00df3a21 100644
--- a/lldb/source/Plugins/ScriptInterpreter/Python/PythonDataObjects.cpp
+++ b/lldb/source/Plugins/ScriptInterpreter/Python/PythonDataObjects.cpp
@@ -993,8 +993,8 @@ void PythonException::Restore() {
}
PythonException::~PythonException() {
- Py_XDECREF(m_exception_type);
Py_XDECREF(m_exception);
+ Py_XDECREF(m_exception_type);
Py_XDECREF(m_traceback);
Py_XDECREF(m_repr_bytes);
}
@@ -1108,9 +1108,10 @@ public:
py_error = Status::FromError(r.takeError());
}
base_error = Base::Close();
+ // Cloning since the wrapped exception may still reference the PyThread.
if (py_error.Fail())
- return py_error;
- return base_error;
+ return py_error.Clone();
+ return base_error.Clone();
};
PyObject *GetPythonObject() const {
@@ -1196,7 +1197,8 @@ public:
return Flush();
auto r = m_py_obj.CallMethod("close");
if (!r)
- return Status::FromError(r.takeError());
+ // Cloning since the wrapped exception may still reference the PyThread.
+ return Status::FromError(r.takeError()).Clone();
return Status();
}
@@ -1204,7 +1206,8 @@ public:
GIL takeGIL;
auto r = m_py_obj.CallMethod("flush");
if (!r)
- return Status::FromError(r.takeError());
+ // Cloning since the wrapped exception may still reference the PyThread.
+ return Status::FromError(r.takeError()).Clone();
return Status();
}
@@ -1240,7 +1243,8 @@ public:
PyObject *pybuffer_p = PyMemoryView_FromMemory(
const_cast<char *>((const char *)buf), num_bytes, PyBUF_READ);
if (!pybuffer_p)
- return Status::FromError(llvm::make_error<PythonException>());
+ // Cloning since the wrapped exception may still reference the PyThread.
+ return Status::FromError(llvm::make_error<PythonException>()).Clone();
auto pybuffer = Take<PythonObject>(pybuffer_p);
num_bytes = 0;
auto bytes_written = As<long long>(m_py_obj.CallMethod("write", pybuffer));
@@ -1260,7 +1264,8 @@ public:
auto pybuffer_obj =
m_py_obj.CallMethod("read", (unsigned long long)num_bytes);
if (!pybuffer_obj)
- return Status::FromError(pybuffer_obj.takeError());
+ // Cloning since the wrapped exception may still reference the PyThread.
+ return Status::FromError(pybuffer_obj.takeError()).Clone();
num_bytes = 0;
if (pybuffer_obj.get().IsNone()) {
// EOF
@@ -1269,7 +1274,8 @@ public:
}
auto pybuffer = PythonBuffer::Create(pybuffer_obj.get());
if (!pybuffer)
- return Status::FromError(pybuffer.takeError());
+ // Cloning since the wrapped exception may still reference the PyThread.
+ return Status::FromError(pybuffer.takeError()).Clone();
memcpy(buf, pybuffer.get().get().buf, pybuffer.get().get().len);
num_bytes = pybuffer.get().get().len;
return Status();
@@ -1300,7 +1306,8 @@ public:
auto bytes_written =
As<long long>(m_py_obj.CallMethod("write", pystring.get()));
if (!bytes_written)
- return Status::FromError(bytes_written.takeError());
+ // Cloning since the wrapped exception may still reference the PyThread.
+ return Status::FromError(bytes_written.takeError()).Clone();
if (bytes_written.get() < 0)
return Status::FromErrorString(
".write() method returned a negative number!");
@@ -1321,14 +1328,16 @@ public:
auto pystring = As<PythonString>(
m_py_obj.CallMethod("read", (unsigned long long)num_chars));
if (!pystring)
- return Status::FromError(pystring.takeError());
+ // Cloning since the wrapped exception may still reference the PyThread.
+ return Status::FromError(pystring.takeError()).Clone();
if (pystring.get().IsNone()) {
// EOF
return Status();
}
auto stringref = pystring.get().AsUTF8();
if (!stringref)
- return Status::FromError(stringref.takeError());
+ // Cloning since the wrapped exception may still reference the PyThread.
+ return Status::FromError(stringref.takeError()).Clone();
num_bytes = stringref.get().size();
memcpy(buf, stringref.get().begin(), num_bytes);
return Status();
diff --git a/lldb/source/Utility/Scalar.cpp b/lldb/source/Utility/Scalar.cpp
index 329f5b6e4b9a..f07a9f3bed00 100644
--- a/lldb/source/Utility/Scalar.cpp
+++ b/lldb/source/Utility/Scalar.cpp
@@ -852,57 +852,50 @@ llvm::APFloat Scalar::CreateAPFloatFromAPFloat(lldb::BasicType basic_type) {
}
}
-bool lldb_private::operator==(Scalar lhs, Scalar rhs) {
+APFloat::cmpResult lldb_private::compare(Scalar lhs, Scalar rhs) {
// If either entry is void then we can just compare the types
if (lhs.m_type == Scalar::e_void || rhs.m_type == Scalar::e_void)
- return lhs.m_type == rhs.m_type;
+ return lhs.m_type == rhs.m_type ? APFloat::cmpEqual : APFloat::cmpUnordered;
- llvm::APFloat::cmpResult result;
switch (Scalar::PromoteToMaxType(lhs, rhs)) {
case Scalar::e_void:
break;
case Scalar::e_int:
- return lhs.m_integer == rhs.m_integer;
+ if (lhs.m_integer < rhs.m_integer)
+ return APFloat::cmpLessThan;
+ if (lhs.m_integer > rhs.m_integer)
+ return APFloat::cmpGreaterThan;
+ return APFloat::cmpEqual;
case Scalar::e_float:
- result = lhs.m_float.compare(rhs.m_float);
- if (result == llvm::APFloat::cmpEqual)
- return true;
+ return lhs.m_float.compare(rhs.m_float);
}
- return false;
+ return APFloat::cmpUnordered;
}
-bool lldb_private::operator!=(const Scalar &lhs, const Scalar &rhs) {
- return !(lhs == rhs);
+bool lldb_private::operator==(const Scalar &lhs, const Scalar &rhs) {
+ return compare(lhs, rhs) == APFloat::cmpEqual;
}
-bool lldb_private::operator<(Scalar lhs, Scalar rhs) {
- if (lhs.m_type == Scalar::e_void || rhs.m_type == Scalar::e_void)
- return false;
+bool lldb_private::operator!=(const Scalar &lhs, const Scalar &rhs) {
+ return compare(lhs, rhs) != APFloat::cmpEqual;
+}
- llvm::APFloat::cmpResult result;
- switch (Scalar::PromoteToMaxType(lhs, rhs)) {
- case Scalar::e_void:
- break;
- case Scalar::e_int:
- return lhs.m_integer < rhs.m_integer;
- case Scalar::e_float:
- result = lhs.m_float.compare(rhs.m_float);
- if (result == llvm::APFloat::cmpLessThan)
- return true;
- }
- return false;
+bool lldb_private::operator<(const Scalar &lhs, const Scalar &rhs) {
+ return compare(lhs, rhs) == APFloat::cmpLessThan;
}
bool lldb_private::operator<=(const Scalar &lhs, const Scalar &rhs) {
- return !(rhs < lhs);
+ APFloat::cmpResult Res = compare(lhs, rhs);
+ return Res == APFloat::cmpLessThan || Res == APFloat::cmpEqual;
}
bool lldb_private::operator>(const Scalar &lhs, const Scalar &rhs) {
- return rhs < lhs;
+ return compare(lhs, rhs) == APFloat::cmpGreaterThan;
}
bool lldb_private::operator>=(const Scalar &lhs, const Scalar &rhs) {
- return !(lhs < rhs);
+ APFloat::cmpResult Res = compare(lhs, rhs);
+ return Res == APFloat::cmpGreaterThan || Res == APFloat::cmpEqual;
}
bool Scalar::ClearBit(uint32_t bit) {
diff --git a/lldb/source/Utility/Status.cpp b/lldb/source/Utility/Status.cpp
index 4af3af5fba01..a659456b9b1b 100644
--- a/lldb/source/Utility/Status.cpp
+++ b/lldb/source/Utility/Status.cpp
@@ -8,6 +8,8 @@
#include "lldb/Utility/Status.h"
+#include "lldb/Utility/LLDBLog.h"
+#include "lldb/Utility/Log.h"
#include "lldb/Utility/VASPrintf.h"
#include "lldb/lldb-defines.h"
#include "lldb/lldb-enumerations.h"
@@ -37,48 +39,80 @@ class raw_ostream;
using namespace lldb;
using namespace lldb_private;
-Status::Status() {}
+char CloneableError::ID;
+char CloneableECError::ID;
+char MachKernelError::ID;
+char Win32Error::ID;
+char ExpressionError::ID;
+
+namespace {
+/// A std::error_code category for eErrorTypeGeneric.
+class LLDBGenericCategory : public std::error_category {
+ const char *name() const noexcept override { return "LLDBGenericCategory"; }
+ std::string message(int __ev) const override { return "generic LLDB error"; };
+};
+LLDBGenericCategory &lldb_generic_category() {
+ static LLDBGenericCategory g_generic_category;
+ return g_generic_category;
+}
+
+/// A std::error_code category for eErrorTypeExpression.
+class ExpressionCategory : public std::error_category {
+ const char *name() const noexcept override {
+ return "LLDBExpressionCategory";
+ }
+ std::string message(int __ev) const override {
+ return ExpressionResultAsCString(
+ static_cast<lldb::ExpressionResults>(__ev));
+ };
+};
+ExpressionCategory &expression_category() {
+ static ExpressionCategory g_expression_category;
+ return g_expression_category;
+}
+} // namespace
+
+Status::Status() : m_error(llvm::Error::success()) {}
+
+static llvm::Error ErrorFromEnums(Status::ValueType err, ErrorType type,
+ std::string msg) {
+ switch (type) {
+ case eErrorTypeMachKernel:
+ return llvm::make_error<MachKernelError>(
+ std::error_code(err, std::system_category()));
+ case eErrorTypeWin32:
+ return llvm::make_error<Win32Error>(
+ std::error_code(err, std::system_category()));
+ case eErrorTypePOSIX:
+ if (msg.empty())
+ return llvm::errorCodeToError(
+ std::error_code(err, std::generic_category()));
+ return llvm::createStringError(
+ std::move(msg), std::error_code(err, std::generic_category()));
+ default:
+ return llvm::createStringError(
+ std::move(msg), std::error_code(err, lldb_generic_category()));
+ }
+}
Status::Status(ValueType err, ErrorType type, std::string msg)
- : m_code(err), m_type(type), m_string(std::move(msg)) {}
+ : m_error(ErrorFromEnums(err, type, msg)) {}
-// This logic is confusing because c++ calls the traditional (posix) errno codes
+// This logic is confusing because C++ calls the traditional (posix) errno codes
// "generic errors", while we use the term "generic" to mean completely
// arbitrary (text-based) errors.
Status::Status(std::error_code EC)
- : m_code(EC.value()),
- m_type(EC.category() == std::generic_category() ? eErrorTypePOSIX
- : eErrorTypeGeneric),
- m_string(EC.message()) {}
+ : m_error(!EC ? llvm::Error::success() : llvm::errorCodeToError(EC)) {}
Status::Status(std::string err_str)
- : m_code(LLDB_GENERIC_ERROR), m_type(eErrorTypeGeneric),
- m_string(std::move(err_str)) {}
+ : m_error(
+ llvm::createStringError(llvm::inconvertibleErrorCode(), err_str)) {}
-Status::Status(llvm::Error error) {
- if (!error) {
- Clear();
- return;
- }
-
- // if the error happens to be a errno error, preserve the error code
- error = llvm::handleErrors(
- std::move(error), [&](std::unique_ptr<llvm::ECError> e) -> llvm::Error {
- std::error_code ec = e->convertToErrorCode();
- if (ec.category() == std::generic_category()) {
- m_code = ec.value();
- m_type = ErrorType::eErrorTypePOSIX;
- return llvm::Error::success();
- }
- return llvm::Error(std::move(e));
- });
-
- // Otherwise, just preserve the message
- if (error) {
- m_code = LLDB_GENERIC_ERROR;
- m_type = eErrorTypeGeneric;
- m_string = llvm::toString(std::move(error));
- }
+const Status &Status::operator=(Status &&other) {
+ Clear();
+ llvm::consumeError(std::move(m_error));
+ m_error = std::move(other.m_error);
+ return *this;
}
Status Status::FromErrorStringWithFormat(const char *format, ...) {
@@ -94,26 +128,40 @@ Status Status::FromErrorStringWithFormat(const char *format, ...) {
return Status(string);
}
-Status Status::FromError(llvm::Error error) { return Status(std::move(error)); }
-
-llvm::Error Status::ToError() const {
- if (Success())
- return llvm::Error::success();
- if (m_type == ErrorType::eErrorTypePOSIX)
- return llvm::errorCodeToError(
- std::error_code(m_code, std::generic_category()));
- return llvm::createStringError(AsCString());
+Status Status::FromExpressionError(lldb::ExpressionResults result,
+ std::string msg) {
+ return Status(llvm::make_error<ExpressionError>(
+ std::error_code(result, expression_category()), msg));
}
-Status::~Status() = default;
+/// Creates a deep copy of all known errors and converts all other
+/// errors to a new llvm::StringError.
+static llvm::Error CloneError(const llvm::Error &error) {
+ llvm::Error result = llvm::Error::success();
+ auto clone = [](const llvm::ErrorInfoBase &e) {
+ if (e.isA<CloneableError>())
+ return llvm::Error(static_cast<const CloneableError &>(e).Clone());
+ return llvm::make_error<llvm::StringError>(e.message(),
+ e.convertToErrorCode(), true);
+ };
+ llvm::visitErrors(error, [&](const llvm::ErrorInfoBase &e) {
+ result = joinErrors(std::move(result), clone(e));
+ });
+ return result;
+}
-const Status &Status::operator=(Status &&other) {
- m_code = other.m_code;
- m_type = other.m_type;
- m_string = std::move(other.m_string);
- return *this;
+Status Status::FromError(llvm::Error error) {
+ if (error.isA<llvm::ECError>()) {
+ std::error_code ec = llvm::errorToErrorCode(std::move(error));
+ return Status::FromError(llvm::make_error<CloneableECError>(ec));
+ }
+ return Status(std::move(error));
}
+llvm::Error Status::ToError() const { return CloneError(m_error); }
+
+Status::~Status() { llvm::consumeError(std::move(m_error)); }
+
#ifdef _WIN32
static std::string RetrieveWin32ErrorString(uint32_t error_code) {
char *buffer = nullptr;
@@ -140,6 +188,37 @@ static std::string RetrieveWin32ErrorString(uint32_t error_code) {
}
#endif
+std::string MachKernelError::message() const {
+#if defined(__APPLE__)
+ if (const char *s = ::mach_error_string(convertToErrorCode().value()))
+ return s;
+#endif
+ return "MachKernelError";
+}
+
+std::string Win32Error::message() const {
+#if defined(_WIN32)
+ return RetrieveWin32ErrorString(convertToErrorCode().value());
+#endif
+ return "Win32Error";
+}
+
+std::unique_ptr<CloneableError> CloneableECError::Clone() const {
+ return std::make_unique<CloneableECError>(convertToErrorCode());
+}
+
+std::unique_ptr<CloneableError> MachKernelError::Clone() const {
+ return std::make_unique<MachKernelError>(convertToErrorCode());
+}
+
+std::unique_ptr<CloneableError> Win32Error::Clone() const {
+ return std::make_unique<Win32Error>(convertToErrorCode());
+}
+
+std::unique_ptr<CloneableError> ExpressionError::Clone() const {
+ return std::make_unique<ExpressionError>(convertToErrorCode(), message());
+}
+
// Get the error value as a NULL C string. The error string will be fetched and
// cached on demand. The cached error string value will remain until the error
// value is changed or cleared.
@@ -147,29 +226,12 @@ const char *Status::AsCString(const char *default_error_str) const {
if (Success())
return nullptr;
- if (m_string.empty()) {
- switch (m_type) {
- case eErrorTypeMachKernel:
-#if defined(__APPLE__)
- if (const char *s = ::mach_error_string(m_code))
- m_string.assign(s);
-#endif
- break;
-
- case eErrorTypePOSIX:
- m_string = llvm::sys::StrError(m_code);
- break;
-
- case eErrorTypeWin32:
-#if defined(_WIN32)
- m_string = RetrieveWin32ErrorString(m_code);
-#endif
- break;
+ m_string = llvm::toStringWithoutConsuming(m_error);
+ // Backwards compatibility with older implementations of Status.
+ if (m_error.isA<llvm::ECError>())
+ if (!m_string.empty() && m_string[m_string.size() - 1] == '\n')
+ m_string.pop_back();
- default:
- break;
- }
- }
if (m_string.empty()) {
if (default_error_str)
m_string.assign(default_error_str);
@@ -181,29 +243,64 @@ const char *Status::AsCString(const char *default_error_str) const {
// Clear the error and any cached error string that it might contain.
void Status::Clear() {
- m_code = 0;
- m_type = eErrorTypeInvalid;
- m_string.clear();
+ if (m_error)
+ LLDB_LOG_ERRORV(GetLog(LLDBLog::API), std::move(m_error),
+ "dropping error {0}");
+ m_error = llvm::Error::success();
}
-// Access the error value.
-Status::ValueType Status::GetError() const { return m_code; }
+Status::ValueType Status::GetError() const {
+ Status::ValueType result = 0;
+ llvm::visitErrors(m_error, [&](const llvm::ErrorInfoBase &error) {
+ // Return the first only.
+ if (result)
+ return;
+ std::error_code ec = error.convertToErrorCode();
+ result = ec.value();
+ });
+ return result;
+}
// Access the error type.
-ErrorType Status::GetType() const { return m_type; }
+ErrorType Status::GetType() const {
+ ErrorType result = eErrorTypeInvalid;
+ llvm::visitErrors(m_error, [&](const llvm::ErrorInfoBase &error) {
+ // Return the first only.
+ if (result != eErrorTypeInvalid)
+ return;
+ if (error.isA<MachKernelError>())
+ result = eErrorTypeMachKernel;
+ else if (error.isA<Win32Error>())
+ result = eErrorTypeWin32;
+ else if (error.isA<ExpressionError>())
+ result = eErrorTypeExpression;
+ else if (error.convertToErrorCode().category() == std::generic_category())
+ result = eErrorTypePOSIX;
+ else if (error.convertToErrorCode().category() == lldb_generic_category() ||
+ error.convertToErrorCode() == llvm::inconvertibleErrorCode())
+ result = eErrorTypeGeneric;
+ else
+ result = eErrorTypeInvalid;
+ });
+ return result;
+}
-// Returns true if this object contains a value that describes an error or
-// otherwise non-success result.
-bool Status::Fail() const { return m_code != 0; }
+bool Status::Fail() const {
+ // Note that this does not clear the checked flag in
+ // m_error. Otherwise we'd need to make this thread-safe.
+ return m_error.isA<llvm::ErrorInfoBase>();
+}
Status Status::FromErrno() {
- // Update the error value to be "errno" and update the type to be "POSIX".
- return Status(errno, eErrorTypePOSIX);
+ std::error_code ec = llvm::errnoAsErrorCode();
+ if (ec)
+ return Status::FromError(llvm::make_error<CloneableECError>(ec));
+ return Status();
}
// Returns true if the error code in this object is considered a successful
// return value.
-bool Status::Success() const { return m_code == 0; }
+bool Status::Success() const { return !Fail(); }
void llvm::format_provider<lldb_private::Status>::format(
const lldb_private::Status &error, llvm::raw_ostream &OS,
diff --git a/lldb/test/API/lang/cpp/fpnan/Makefile b/lldb/test/API/lang/cpp/fpnan/Makefile
new file mode 100644
index 000000000000..99998b20bcb0
--- /dev/null
+++ b/lldb/test/API/lang/cpp/fpnan/Makefile
@@ -0,0 +1,3 @@
+CXX_SOURCES := main.cpp
+
+include Makefile.rules
diff --git a/lldb/test/API/lang/cpp/fpnan/TestFPNaN.py b/lldb/test/API/lang/cpp/fpnan/TestFPNaN.py
new file mode 100644
index 000000000000..6093ef91ac1f
--- /dev/null
+++ b/lldb/test/API/lang/cpp/fpnan/TestFPNaN.py
@@ -0,0 +1,130 @@
+"""
+Test floating point expressions with zero, NaN, dernormalized and infinite
+numbers.
+"""
+
+import lldb
+from lldbsuite.test.lldbtest import *
+from lldbsuite.test import lldbutil
+
+
+class FPNaNTestCase(TestBase):
+ def setUp(self):
+ # Call super's setUp().
+ TestBase.setUp(self)
+ # Find the line number to break inside main().
+ self.line = line_number("main.cpp", "// Set break point at this line.")
+
+ def test(self):
+ self.build()
+ exe = self.getBuildArtifact("a.out")
+ self.runCmd("file " + exe, CURRENT_EXECUTABLE_SET)
+
+ # Break inside the main.
+ lldbutil.run_break_set_by_file_and_line(
+ self, "main.cpp", self.line, num_expected_locations=1
+ )
+
+ self.runCmd("run", RUN_SUCCEEDED)
+ # Zero and denorm
+ self.expect(
+ "expr +0.0",
+ VARIABLES_DISPLAYED_CORRECTLY,
+ substrs=["double", "0"],
+ )
+ self.expect(
+ "expr -0.0",
+ VARIABLES_DISPLAYED_CORRECTLY,
+ substrs=["double", "0"],
+ )
+ self.expect(
+ "expr 0.0 / 0",
+ VARIABLES_DISPLAYED_CORRECTLY,
+ substrs=["double", "NaN"],
+ )
+ self.expect(
+ "expr 0 / 0.0",
+ VARIABLES_DISPLAYED_CORRECTLY,
+ substrs=["double", "NaN"],
+ )
+ self.expect(
+ "expr 1 / +0.0",
+ VARIABLES_DISPLAYED_CORRECTLY,
+ substrs=["double", "+Inf"],
+ )
+ self.expect(
+ "expr 1 / -0.0",
+ VARIABLES_DISPLAYED_CORRECTLY,
+ substrs=["double", "-Inf"],
+ )
+ self.expect(
+ "expr +0.0 / +0.0 != +0.0 / +0.0",
+ VARIABLES_DISPLAYED_CORRECTLY,
+ substrs=["bool", "true"],
+ )
+ self.expect(
+ "expr -1.f * 0",
+ VARIABLES_DISPLAYED_CORRECTLY,
+ substrs=["float", "-0"],
+ )
+ self.expect(
+ "expr 0x0.123p-1",
+ VARIABLES_DISPLAYED_CORRECTLY,
+ substrs=["double", "0.0355224609375"],
+ )
+ # NaN
+ self.expect(
+ "expr fnan < fnan",
+ VARIABLES_DISPLAYED_CORRECTLY,
+ substrs=["bool", "false"],
+ )
+ self.expect(
+ "expr fnan <= fnan",
+ VARIABLES_DISPLAYED_CORRECTLY,
+ substrs=["bool", "false"],
+ )
+ self.expect(
+ "expr fnan > fnan",
+ VARIABLES_DISPLAYED_CORRECTLY,
+ substrs=["bool", "false"],
+ )
+ self.expect(
+ "expr fnan >= fnan",
+ VARIABLES_DISPLAYED_CORRECTLY,
+ substrs=["bool", "false"],
+ )
+ self.expect(
+ "expr fnan == fnan",
+ VARIABLES_DISPLAYED_CORRECTLY,
+ substrs=["bool", "false"],
+ )
+ self.expect(
+ "expr fnan != fnan",
+ VARIABLES_DISPLAYED_CORRECTLY,
+ substrs=["bool", "true"],
+ )
+ self.expect(
+ "expr 1.0 <= fnan",
+ VARIABLES_DISPLAYED_CORRECTLY,
+ substrs=["bool", "false"],
+ )
+ self.expect(
+ "expr 1.0f < fnan",
+ VARIABLES_DISPLAYED_CORRECTLY,
+ substrs=["bool", "false"],
+ )
+ self.expect(
+ "expr 1.0f != fnan",
+ VARIABLES_DISPLAYED_CORRECTLY,
+ substrs=["bool", "true"],
+ )
+ self.expect(
+ "expr (unsigned int) fdenorm",
+ VARIABLES_DISPLAYED_CORRECTLY,
+ substrs=["int", "0"],
+ )
+ self.expect(
+ "expr (unsigned int) (1.0f + fdenorm)",
+ VARIABLES_DISPLAYED_CORRECTLY,
+ substrs=["int", "1"],
+ )
diff --git a/lldb/test/API/lang/cpp/fpnan/main.cpp b/lldb/test/API/lang/cpp/fpnan/main.cpp
new file mode 100644
index 000000000000..8bcfebfaea8e
--- /dev/null
+++ b/lldb/test/API/lang/cpp/fpnan/main.cpp
@@ -0,0 +1,8 @@
+#include <limits>
+
+int main() {
+ float fnan = std::numeric_limits<float>::quiet_NaN();
+ float fdenorm = std::numeric_limits<float>::denorm_min();
+
+ // Set break point at this line.
+}
diff --git a/lldb/test/API/tools/lldb-dap/memory/TestDAP_memory.py b/lldb/test/API/tools/lldb-dap/memory/TestDAP_memory.py
index 3d8aaeda7f4b..1082541aebcf 100644
--- a/lldb/test/API/tools/lldb-dap/memory/TestDAP_memory.py
+++ b/lldb/test/API/tools/lldb-dap/memory/TestDAP_memory.py
@@ -74,10 +74,6 @@ class TestDAP_memory(lldbdap_testcase.DAPTestCaseBase):
].keys(),
)
- # lldb-dap assumes that all reads will be within the same region. On Windows
- # the target string is at the very start of a region so the -1 offset causes
- # the read to only read from the previous region and only return 1 byte.
- @skipIfWindows
def test_readMemory(self):
"""
Tests the 'readMemory' request
@@ -104,10 +100,6 @@ class TestDAP_memory(lldbdap_testcase.DAPTestCaseBase):
mem = self.dap_server.request_readMemory(memref, 2, 3)["body"]
self.assertEqual(b64decode(mem["data"]), b"ad\0")
- # Use a negative offset
- mem = self.dap_server.request_readMemory(memref, -1, 6)["body"]
- self.assertEqual(b64decode(mem["data"])[1:], b"dead\0")
-
# Reads of size 0 are successful
# VS-Code sends those in order to check if a `memoryReference` can actually be dereferenced.
mem = self.dap_server.request_readMemory(memref, 0, 0)
diff --git a/lldb/tools/lldb-dap/package-lock.json b/lldb/tools/lldb-dap/package-lock.json
index 96570e42dbfd..866365971559 100644
--- a/lldb/tools/lldb-dap/package-lock.json
+++ b/lldb/tools/lldb-dap/package-lock.json
@@ -1,12 +1,12 @@
{
"name": "lldb-dap",
- "version": "0.2.4",
+ "version": "0.2.6",
"lockfileVersion": 3,
"requires": true,
"packages": {
"": {
"name": "lldb-dap",
- "version": "0.2.4",
+ "version": "0.2.6",
"license": "Apache 2.0 License with LLVM exceptions",
"devDependencies": {
"@types/node": "^18.11.18",
diff --git a/lldb/tools/lldb-dap/package.json b/lldb/tools/lldb-dap/package.json
index d35accfb6ec4..33b09d56ab17 100644
--- a/lldb/tools/lldb-dap/package.json
+++ b/lldb/tools/lldb-dap/package.json
@@ -1,7 +1,7 @@
{
"name": "lldb-dap",
"displayName": "LLDB DAP",
- "version": "0.2.5",
+ "version": "0.2.6",
"publisher": "llvm-vs-code-extensions",
"homepage": "https://lldb.llvm.org",
"description": "LLDB debugging from VSCode",
@@ -78,6 +78,15 @@
"scope": "resource",
"type": "string",
"description": "The log path for lldb-dap (if any)"
+ },
+ "lldb-dap.environment": {
+ "scope": "resource",
+ "type": "object",
+ "default": {},
+ "description": "The environment of the lldb-dap process.",
+ "additionalProperties": {
+ "type": "string"
+ }
}
}
},
diff --git a/lldb/tools/lldb-dap/src-ts/extension.ts b/lldb/tools/lldb-dap/src-ts/extension.ts
index fdc4f47b238b..36d3dfba18c1 100644
--- a/lldb/tools/lldb-dap/src-ts/extension.ts
+++ b/lldb/tools/lldb-dap/src-ts/extension.ts
@@ -25,9 +25,15 @@ function createDefaultLLDBDapOptions(): LLDBDapOptions {
if (log_path) {
env["LLDBDAP_LOG"] = log_path;
}
-
+ const configEnvironment = config.get<{ [key: string]: string }>("environment") || {};
if (path) {
- return new vscode.DebugAdapterExecutable(path, [], { env });
+ const dbgOptions = {
+ env: {
+ ...configEnvironment,
+ ...env,
+ }
+ };
+ return new vscode.DebugAdapterExecutable(path, [], dbgOptions);
} else if (packageJSONExecutable) {
return new vscode.DebugAdapterExecutable(
packageJSONExecutable.command,
@@ -36,6 +42,7 @@ function createDefaultLLDBDapOptions(): LLDBDapOptions {
...packageJSONExecutable.options,
env: {
...packageJSONExecutable.options?.env,
+ ...configEnvironment,
...env,
},
},
diff --git a/lldb/unittests/Process/gdb-remote/GDBRemoteCommunicationServerTest.cpp b/lldb/unittests/Process/gdb-remote/GDBRemoteCommunicationServerTest.cpp
index 69ca1720c04f..ba9ca6ea73e3 100644
--- a/lldb/unittests/Process/gdb-remote/GDBRemoteCommunicationServerTest.cpp
+++ b/lldb/unittests/Process/gdb-remote/GDBRemoteCommunicationServerTest.cpp
@@ -12,6 +12,7 @@
#include "Plugins/Process/gdb-remote/GDBRemoteCommunicationServer.h"
#include "lldb/Utility/Connection.h"
#include "lldb/Utility/UnimplementedError.h"
+#include "lldb/lldb-enumerations.h"
namespace lldb_private {
namespace process_gdb_remote {
@@ -25,7 +26,7 @@ TEST(GDBRemoteCommunicationServerTest, SendErrorResponse_ErrorNumber) {
TEST(GDBRemoteCommunicationServerTest, SendErrorResponse_Status) {
MockServerWithMockConnection server;
- Status status(0x42, lldb::eErrorTypeGeneric, "Test error message");
+ Status status(0x42, lldb::eErrorTypePOSIX, "Test error message");
server.SendErrorResponse(status);
EXPECT_THAT(
diff --git a/lldb/unittests/TestingSupport/Host/SocketTestUtilities.cpp b/lldb/unittests/TestingSupport/Host/SocketTestUtilities.cpp
index 2455a4f6f5d4..9777555d57ff 100644
--- a/lldb/unittests/TestingSupport/Host/SocketTestUtilities.cpp
+++ b/lldb/unittests/TestingSupport/Host/SocketTestUtilities.cpp
@@ -102,12 +102,23 @@ static bool CheckIPSupport(llvm::StringRef Proto, llvm::StringRef Addr) {
Proto, Err)
.str();
bool HasProtocolError = false;
- handleAllErrors(std::move(Err), [&](std::unique_ptr<llvm::ECError> ECErr) {
- std::error_code ec = ECErr->convertToErrorCode();
- if (ec == std::make_error_code(std::errc::address_family_not_supported) ||
- ec == std::make_error_code(std::errc::address_not_available))
- HasProtocolError = true;
- });
+ handleAllErrors(
+ std::move(Err),
+ [&](std::unique_ptr<CloneableECError> ECErr) {
+ std::error_code ec = ECErr->convertToErrorCode();
+ if (ec ==
+ std::make_error_code(std::errc::address_family_not_supported) ||
+ ec == std::make_error_code(std::errc::address_not_available))
+ HasProtocolError = true;
+ },
+ [&](std::unique_ptr<llvm::ECError> ECErr) {
+ // FIXME: This code path should not be reachable.
+ std::error_code ec = ECErr->convertToErrorCode();
+ if (ec ==
+ std::make_error_code(std::errc::address_family_not_supported) ||
+ ec == std::make_error_code(std::errc::address_not_available))
+ HasProtocolError = true;
+ });
if (HasProtocolError) {
GTEST_LOG_(WARNING)
<< llvm::formatv(
diff --git a/lldb/unittests/Utility/StatusTest.cpp b/lldb/unittests/Utility/StatusTest.cpp
index be4f2beebcdb..e37c94ac17f2 100644
--- a/lldb/unittests/Utility/StatusTest.cpp
+++ b/lldb/unittests/Utility/StatusTest.cpp
@@ -70,6 +70,14 @@ TEST(StatusTest, ErrorConversion) {
llvm::Error foo = Status::FromErrorString("foo").ToError();
EXPECT_TRUE(bool(foo));
EXPECT_EQ("foo", llvm::toString(std::move(foo)));
+
+ llvm::Error eperm = llvm::errorCodeToError({EPERM, std::generic_category()});
+ llvm::Error eintr = llvm::errorCodeToError({EINTR, std::generic_category()});
+ llvm::Error elist = llvm::joinErrors(std::move(eperm), std::move(eintr));
+ elist = llvm::joinErrors(std::move(elist), llvm::createStringError("foo"));
+ Status list = Status::FromError(std::move(elist));
+ EXPECT_EQ((int)list.GetError(), EPERM);
+ EXPECT_EQ(list.GetType(), eErrorTypePOSIX);
}
#ifdef _WIN32
diff --git a/llvm/cmake/config-ix.cmake b/llvm/cmake/config-ix.cmake
index 3707ca824f6e..86f2bac7d23e 100644
--- a/llvm/cmake/config-ix.cmake
+++ b/llvm/cmake/config-ix.cmake
@@ -51,11 +51,9 @@ endif()
check_include_file(signal.h HAVE_SIGNAL_H)
check_include_file(sys/ioctl.h HAVE_SYS_IOCTL_H)
check_include_file(sys/mman.h HAVE_SYS_MMAN_H)
-check_include_file(sys/param.h HAVE_SYS_PARAM_H)
check_include_file(sys/resource.h HAVE_SYS_RESOURCE_H)
check_include_file(sys/stat.h HAVE_SYS_STAT_H)
check_include_file(sys/time.h HAVE_SYS_TIME_H)
-check_include_file(sys/types.h HAVE_SYS_TYPES_H)
check_include_file(sysexits.h HAVE_SYSEXITS_H)
check_include_file(termios.h HAVE_TERMIOS_H)
check_include_file(unistd.h HAVE_UNISTD_H)
diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst
index 144b4497ca63..abeafb761620 100644
--- a/llvm/docs/LangRef.rst
+++ b/llvm/docs/LangRef.rst
@@ -354,7 +354,7 @@ added in the future:
not be used lightly but only for specific situations such as an
alternative to the *register pinning* performance technique often
used when implementing functional programming languages. At the
- moment only X86, AArch64, and RISCV support this convention. The
+ moment only X86, AArch64, and RISCV support this convention. The
following limitations exist:
- On *X86-32* only up to 4 bit type parameters are supported. No
@@ -685,10 +685,10 @@ implementation defined, the optimizer can't do the latter. The former is
challenging as many commonly expected properties, such as
``ptrtoint(v)-ptrtoint(v) == 0``, don't hold for non-integral types.
Similar restrictions apply to intrinsics that might examine the pointer bits,
-such as :ref:`llvm.ptrmask<int_ptrmask>`.
+such as :ref:`llvm.ptrmask<int_ptrmask>`.
The alignment information provided by the frontend for a non-integral pointer
-(typically using attributes or metadata) must be valid for every possible
+(typically using attributes or metadata) must be valid for every possible
representation of the pointer.
.. _globalvars:
@@ -1677,10 +1677,10 @@ Currently, only the following parameter attributes are defined:
- The range is allowed to wrap.
- The empty range is represented using ``0,0``.
- Otherwise, ``a`` and ``b`` are not allowed to be equal.
-
- This attribute may only be applied to parameters or return values with integer
+
+ This attribute may only be applied to parameters or return values with integer
or vector of integer types.
-
+
For vector-typed parameters, the range is applied element-wise.
.. _gc:
@@ -14346,7 +14346,7 @@ Arguments:
""""""""""
The first 4 arguments are similar to ``llvm.instrprof.increment``. The indexing
is specific to callsites, meaning callsites are indexed from 0, independent from
-the indexes used by the other intrinsics (such as
+the indexes used by the other intrinsics (such as
``llvm.instrprof.increment[.step]``).
The last argument is the called value of the callsite this intrinsic precedes.
@@ -14360,7 +14360,7 @@ a buffer LLVM can use to perform counter increments (i.e. the lowering of
``llvm.instrprof.increment[.step]``. The address range following the counter
buffer, ``<num-counters>`` x ``sizeof(ptr)`` - sized, is expected to contain
pointers to contexts of functions called from this function ("subcontexts").
-LLVM does not dereference into that memory region, just calculates GEPs.
+LLVM does not dereference into that memory region, just calculates GEPs.
The lowering of ``llvm.instrprof.callsite`` consists of:
@@ -14929,8 +14929,8 @@ integer bit width or any vector of integer elements.
Overview:
"""""""""
-Return ``-1`` if ``%a`` is signed less than ``%b``, ``0`` if they are equal, and
-``1`` if ``%a`` is signed greater than ``%b``. Vector intrinsics operate on a per-element basis.
+Return ``-1`` if ``%a`` is signed less than ``%b``, ``0`` if they are equal, and
+``1`` if ``%a`` is signed greater than ``%b``. Vector intrinsics operate on a per-element basis.
Arguments:
""""""""""
@@ -14958,8 +14958,8 @@ integer bit width or any vector of integer elements.
Overview:
"""""""""
-Return ``-1`` if ``%a`` is unsigned less than ``%b``, ``0`` if they are equal, and
-``1`` if ``%a`` is unsigned greater than ``%b``. Vector intrinsics operate on a per-element basis.
+Return ``-1`` if ``%a`` is unsigned less than ``%b``, ``0`` if they are equal, and
+``1`` if ``%a`` is unsigned greater than ``%b``. Vector intrinsics operate on a per-element basis.
Arguments:
""""""""""
@@ -21556,9 +21556,9 @@ Semantics:
""""""""""
The '``llvm.vp.minimum``' intrinsic performs floating-point minimum (:ref:`minimum <i_minimum>`)
-of the first and second vector arguments on each enabled lane, the result being
+of the first and second vector arguments on each enabled lane, the result being
NaN if either argument is a NaN. -0.0 is considered to be less than +0.0 for this
-intrinsic. The result on disabled lanes is a :ref:`poison value <poisonvalues>`.
+intrinsic. The result on disabled lanes is a :ref:`poison value <poisonvalues>`.
The operation is performed in the default floating-point environment.
Examples:
@@ -29191,7 +29191,7 @@ Semantics:
""""""""""
The intrinsic ``@llvm.allow.ubsan.check()`` returns either ``true`` or
-``false``, depending on compiler options.
+``false``, depending on compiler options.
For each evaluation of a call to this intrinsic, the program must be valid and
correct both if it returns ``true`` and if it returns ``false``.
@@ -29250,13 +29250,13 @@ Semantics:
""""""""""
The intrinsic ``@llvm.allow.runtime.check()`` returns either ``true`` or
-``false``, depending on compiler options.
+``false``, depending on compiler options.
For each evaluation of a call to this intrinsic, the program must be valid and
correct both if it returns ``true`` and if it returns ``false``.
When used in a branch condition, it allows us to choose between
-two alternative correct solutions for the same problem.
+two alternative correct solutions for the same problem.
If the intrinsic is evaluated as ``true``, program should execute a guarded
check. If the intrinsic is evaluated as ``false``, the program should avoid any
diff --git a/llvm/include/llvm/Analysis/TargetLibraryInfo.def b/llvm/include/llvm/Analysis/TargetLibraryInfo.def
index 5914324b286c..ebc917ea53eb 100644
--- a/llvm/include/llvm/Analysis/TargetLibraryInfo.def
+++ b/llvm/include/llvm/Analysis/TargetLibraryInfo.def
@@ -2183,6 +2183,21 @@ TLI_DEFINE_ENUM_INTERNAL(sinl)
TLI_DEFINE_STRING_INTERNAL("sinl")
TLI_DEFINE_SIG_INTERNAL(LDbl, LDbl)
+/// void sincos(double x, double *sin_out, double *cos_out);
+TLI_DEFINE_ENUM_INTERNAL(sincos)
+TLI_DEFINE_STRING_INTERNAL("sincos")
+TLI_DEFINE_SIG_INTERNAL(Void, Dbl, Ptr, Ptr)
+
+/// void sincosf(float x, float *sin_out, float *cos_out);
+TLI_DEFINE_ENUM_INTERNAL(sincosf)
+TLI_DEFINE_STRING_INTERNAL("sincosf")
+TLI_DEFINE_SIG_INTERNAL(Void, Flt, Ptr, Ptr)
+
+/// void sincosl(long double x, long double *sin_out, long double *cos_out);
+TLI_DEFINE_ENUM_INTERNAL(sincosl)
+TLI_DEFINE_STRING_INTERNAL("sincosl")
+TLI_DEFINE_SIG_INTERNAL(Void, LDbl, Ptr, Ptr)
+
/// int siprintf(char *str, const char *format, ...);
TLI_DEFINE_ENUM_INTERNAL(siprintf)
TLI_DEFINE_STRING_INTERNAL("siprintf")
diff --git a/llvm/include/llvm/CodeGen/SelectionDAG.h b/llvm/include/llvm/CodeGen/SelectionDAG.h
index 7ee8ca18c2c1..d6c2c36a0d48 100644
--- a/llvm/include/llvm/CodeGen/SelectionDAG.h
+++ b/llvm/include/llvm/CodeGen/SelectionDAG.h
@@ -772,7 +772,7 @@ public:
SDValue getMCSymbol(MCSymbol *Sym, EVT VT);
SDValue getValueType(EVT);
- SDValue getRegister(unsigned Reg, EVT VT);
+ SDValue getRegister(Register Reg, EVT VT);
SDValue getRegisterMask(const uint32_t *RegMask);
SDValue getEHLabel(const SDLoc &dl, SDValue Root, MCSymbol *Label);
SDValue getLabelNode(unsigned Opcode, const SDLoc &dl, SDValue Root,
@@ -784,7 +784,7 @@ public:
return getBlockAddress(BA, VT, Offset, true, TargetFlags);
}
- SDValue getCopyToReg(SDValue Chain, const SDLoc &dl, unsigned Reg,
+ SDValue getCopyToReg(SDValue Chain, const SDLoc &dl, Register Reg,
SDValue N) {
return getNode(ISD::CopyToReg, dl, MVT::Other, Chain,
getRegister(Reg, N.getValueType()), N);
@@ -793,7 +793,7 @@ public:
// This version of the getCopyToReg method takes an extra operand, which
// indicates that there is potentially an incoming glue value (if Glue is not
// null) and that there should be a glue result.
- SDValue getCopyToReg(SDValue Chain, const SDLoc &dl, unsigned Reg, SDValue N,
+ SDValue getCopyToReg(SDValue Chain, const SDLoc &dl, Register Reg, SDValue N,
SDValue Glue) {
SDVTList VTs = getVTList(MVT::Other, MVT::Glue);
SDValue Ops[] = { Chain, getRegister(Reg, N.getValueType()), N, Glue };
@@ -810,7 +810,7 @@ public:
ArrayRef(Ops, Glue.getNode() ? 4 : 3));
}
- SDValue getCopyFromReg(SDValue Chain, const SDLoc &dl, unsigned Reg, EVT VT) {
+ SDValue getCopyFromReg(SDValue Chain, const SDLoc &dl, Register Reg, EVT VT) {
SDVTList VTs = getVTList(VT, MVT::Other);
SDValue Ops[] = { Chain, getRegister(Reg, VT) };
return getNode(ISD::CopyFromReg, dl, VTs, Ops);
@@ -819,7 +819,7 @@ public:
// This version of the getCopyFromReg method takes an extra operand, which
// indicates that there is potentially an incoming glue value (if Glue is not
// null) and that there should be a glue result.
- SDValue getCopyFromReg(SDValue Chain, const SDLoc &dl, unsigned Reg, EVT VT,
+ SDValue getCopyFromReg(SDValue Chain, const SDLoc &dl, Register Reg, EVT VT,
SDValue Glue) {
SDVTList VTs = getVTList(VT, MVT::Other, MVT::Glue);
SDValue Ops[] = { Chain, getRegister(Reg, VT), Glue };
diff --git a/llvm/include/llvm/CodeGen/SwitchLoweringUtils.h b/llvm/include/llvm/CodeGen/SwitchLoweringUtils.h
index 9282c4a771af..9f1d6f7b4f95 100644
--- a/llvm/include/llvm/CodeGen/SwitchLoweringUtils.h
+++ b/llvm/include/llvm/CodeGen/SwitchLoweringUtils.h
@@ -170,7 +170,7 @@ struct CaseBlock {
struct JumpTable {
/// The virtual register containing the index of the jump table entry
/// to jump to.
- unsigned Reg;
+ Register Reg;
/// The JumpTableIndex for this jump table in the function.
unsigned JTI;
/// The MBB into which to emit the code for the indirect jump.
@@ -182,7 +182,7 @@ struct JumpTable {
/// The debug location of the instruction this JumpTable was produced from.
std::optional<SDLoc> SL; // For SelectionDAG
- JumpTable(unsigned R, unsigned J, MachineBasicBlock *M, MachineBasicBlock *D,
+ JumpTable(Register R, unsigned J, MachineBasicBlock *M, MachineBasicBlock *D,
std::optional<SDLoc> SL)
: Reg(R), JTI(J), MBB(M), Default(D), SL(SL) {}
};
@@ -218,7 +218,7 @@ struct BitTestBlock {
APInt First;
APInt Range;
const Value *SValue;
- unsigned Reg;
+ Register Reg;
MVT RegVT;
bool Emitted;
bool ContiguousRange;
@@ -229,7 +229,7 @@ struct BitTestBlock {
BranchProbability DefaultProb;
bool FallthroughUnreachable = false;
- BitTestBlock(APInt F, APInt R, const Value *SV, unsigned Rg, MVT RgVT, bool E,
+ BitTestBlock(APInt F, APInt R, const Value *SV, Register Rg, MVT RgVT, bool E,
bool CR, MachineBasicBlock *P, MachineBasicBlock *D,
BitTestInfo C, BranchProbability Pr)
: First(std::move(F)), Range(std::move(R)), SValue(SV), Reg(Rg),
diff --git a/llvm/include/llvm/CodeGen/VirtRegMap.h b/llvm/include/llvm/CodeGen/VirtRegMap.h
index dee462255b0b..52221762fed5 100644
--- a/llvm/include/llvm/CodeGen/VirtRegMap.h
+++ b/llvm/include/llvm/CodeGen/VirtRegMap.h
@@ -31,12 +31,6 @@ class raw_ostream;
class TargetInstrInfo;
class VirtRegMap : public MachineFunctionPass {
- public:
- enum {
- NO_STACK_SLOT = (1L << 30)-1,
- };
-
- private:
MachineRegisterInfo *MRI = nullptr;
const TargetInstrInfo *TII = nullptr;
const TargetRegisterInfo *TRI = nullptr;
@@ -69,6 +63,8 @@ class TargetInstrInfo;
public:
static char ID;
+ static constexpr int NO_STACK_SLOT = INT_MAX;
+
VirtRegMap() : MachineFunctionPass(ID), Virt2StackSlotMap(NO_STACK_SLOT) {}
VirtRegMap(const VirtRegMap &) = delete;
VirtRegMap &operator=(const VirtRegMap &) = delete;
diff --git a/llvm/include/llvm/Config/config.h.cmake b/llvm/include/llvm/Config/config.h.cmake
index d71ff40144c0..4c9404d95daf 100644
--- a/llvm/include/llvm/Config/config.h.cmake
+++ b/llvm/include/llvm/Config/config.h.cmake
@@ -191,9 +191,6 @@
/* Define to 1 if you have the <sys/mman.h> header file. */
#cmakedefine HAVE_SYS_MMAN_H ${HAVE_SYS_MMAN_H}
-/* Define to 1 if you have the <sys/param.h> header file. */
-#cmakedefine HAVE_SYS_PARAM_H ${HAVE_SYS_PARAM_H}
-
/* Define to 1 if you have the <sys/resource.h> header file. */
#cmakedefine HAVE_SYS_RESOURCE_H ${HAVE_SYS_RESOURCE_H}
@@ -209,9 +206,6 @@
/* Define to 1 if stat struct has st_mtim member. */
#cmakedefine HAVE_STRUCT_STAT_ST_MTIM_TV_NSEC ${HAVE_STRUCT_STAT_ST_MTIM_TV_NSEC}
-/* Define to 1 if you have the <sys/types.h> header file. */
-#cmakedefine HAVE_SYS_TYPES_H ${HAVE_SYS_TYPES_H}
-
/* Define to 1 if you have the <termios.h> header file. */
#cmakedefine HAVE_TERMIOS_H ${HAVE_TERMIOS_H}
diff --git a/llvm/include/llvm/Demangle/MicrosoftDemangleNodes.h b/llvm/include/llvm/Demangle/MicrosoftDemangleNodes.h
index 30ca43d2bde0..09b9d947464a 100644
--- a/llvm/include/llvm/Demangle/MicrosoftDemangleNodes.h
+++ b/llvm/include/llvm/Demangle/MicrosoftDemangleNodes.h
@@ -104,6 +104,8 @@ enum class PrimitiveKind {
Double,
Ldouble,
Nullptr,
+ Auto,
+ DecltypeAuto,
};
enum class CharKind {
diff --git a/llvm/include/llvm/MC/MCParser/MCTargetAsmParser.h b/llvm/include/llvm/MC/MCParser/MCTargetAsmParser.h
index 49ce417e6fbb..54ae436d90b2 100644
--- a/llvm/include/llvm/MC/MCParser/MCTargetAsmParser.h
+++ b/llvm/include/llvm/MC/MCParser/MCTargetAsmParser.h
@@ -426,7 +426,7 @@ public:
virtual ParseStatus tryParseRegister(MCRegister &Reg, SMLoc &StartLoc,
SMLoc &EndLoc) = 0;
- /// ParseInstruction - Parse one assembly instruction.
+ /// Parse one assembly instruction.
///
/// The parser is positioned following the instruction name. The target
/// specific instruction parser should parse the entire instruction and
@@ -439,11 +439,11 @@ public:
/// \param Operands [out] - The list of parsed operands, this returns
/// ownership of them to the caller.
/// \return True on failure.
- virtual bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
+ virtual bool parseInstruction(ParseInstructionInfo &Info, StringRef Name,
SMLoc NameLoc, OperandVector &Operands) = 0;
- virtual bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
+ virtual bool parseInstruction(ParseInstructionInfo &Info, StringRef Name,
AsmToken Token, OperandVector &Operands) {
- return ParseInstruction(Info, Name, Token.getLoc(), Operands);
+ return parseInstruction(Info, Name, Token.getLoc(), Operands);
}
/// ParseDirective - Parse a target specific assembler directive
@@ -471,19 +471,19 @@ public:
/// \param DirectiveID - The token identifying the directive.
virtual ParseStatus parseDirective(AsmToken DirectiveID);
- /// MatchAndEmitInstruction - Recognize a series of operands of a parsed
+ /// Recognize a series of operands of a parsed
/// instruction as an actual MCInst and emit it to the specified MCStreamer.
/// This returns false on success and returns true on failure to match.
///
/// On failure, the target parser is responsible for emitting a diagnostic
/// explaining the match failure.
- virtual bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
+ virtual bool matchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
OperandVector &Operands, MCStreamer &Out,
uint64_t &ErrorInfo,
bool MatchingInlineAsm) = 0;
/// Allows targets to let registers opt out of clobber lists.
- virtual bool OmitRegisterFromClobberLists(unsigned RegNo) { return false; }
+ virtual bool omitRegisterFromClobberLists(unsigned RegNo) { return false; }
/// Allow a target to add special case operand matching for things that
/// tblgen doesn't/can't handle effectively. For example, literal
diff --git a/llvm/include/llvm/SandboxIR/SandboxIR.h b/llvm/include/llvm/SandboxIR/SandboxIR.h
index 22b46bd8d7da..c01516aa9d31 100644
--- a/llvm/include/llvm/SandboxIR/SandboxIR.h
+++ b/llvm/include/llvm/SandboxIR/SandboxIR.h
@@ -130,6 +130,7 @@ class GlobalValue;
class GlobalObject;
class GlobalIFunc;
class GlobalVariable;
+class GlobalAlias;
class Context;
class Function;
class Instruction;
@@ -336,6 +337,7 @@ protected:
friend class GlobalObject; // For `Val`.
friend class GlobalIFunc; // For `Val`.
friend class GlobalVariable; // For `Val`.
+ friend class GlobalAlias; // For `Val`.
/// All values point to the context.
Context &Ctx;
@@ -1528,6 +1530,38 @@ public:
#endif
};
+class GlobalAlias final
+ : public GlobalWithNodeAPI<GlobalAlias, llvm::GlobalAlias, GlobalValue,
+ llvm::GlobalValue> {
+ GlobalAlias(llvm::GlobalAlias *C, Context &Ctx)
+ : GlobalWithNodeAPI(ClassID::GlobalAlias, C, Ctx) {}
+ friend class Context; // For constructor.
+
+public:
+ /// For isa/dyn_cast.
+ static bool classof(const sandboxir::Value *From) {
+ return From->getSubclassID() == ClassID::GlobalAlias;
+ }
+
+ // TODO: Missing create() due to unimplemented sandboxir::Module.
+
+ // TODO: Missing copyAttributresFrom().
+ // TODO: Missing removeFromParent(), eraseFromParent().
+
+ void setAliasee(Constant *Aliasee);
+ Constant *getAliasee() const;
+
+ const GlobalObject *getAliaseeObject() const;
+ GlobalObject *getAliaseeObject() {
+ return const_cast<GlobalObject *>(
+ static_cast<const GlobalAlias *>(this)->getAliaseeObject());
+ }
+
+ static bool isValidLinkage(LinkageTypes L) {
+ return llvm::GlobalAlias::isValidLinkage(L);
+ }
+};
+
class BlockAddress final : public Constant {
BlockAddress(llvm::BlockAddress *C, Context &Ctx)
: Constant(ClassID::BlockAddress, C, Ctx) {}
@@ -1659,6 +1693,8 @@ public:
/// \Returns the SBInstruction that corresponds to this iterator, or null if
/// the instruction is not found in the IR-to-SandboxIR tables.
pointer get() const { return getInstr(It); }
+ /// \Returns the parent BB.
+ BasicBlock *getNodeParent() const;
};
/// Contains a list of sandboxir::Instruction's.
diff --git a/llvm/include/llvm/Support/GlobPattern.h b/llvm/include/llvm/Support/GlobPattern.h
index 1a722e65bed8..eff8957d1b68 100644
--- a/llvm/include/llvm/Support/GlobPattern.h
+++ b/llvm/include/llvm/Support/GlobPattern.h
@@ -34,8 +34,8 @@ namespace llvm {
/// expansions are not supported. If \p MaxSubPatterns is empty then
/// brace expansions are not supported and characters \p "{,}" are treated as
/// literals.
-/// * \p "\" escapes the next character so it is treated as a literal.
-///
+/// * \p "\\" (a single backslash) escapes the next character so it is treated
+/// as a literal.
///
/// Some known edge cases are:
/// * \p "]" is allowed as the first character in a character class, i.e.,
@@ -45,9 +45,8 @@ namespace llvm {
/// * \p "}" and \p "," that are not inside a brace expansion are taken as
/// literals, e.g., \p ",}" is valid but \p "{" is not.
///
-///
-/// For example, \p "*[/\\]foo.{c,cpp}" will match (unix or windows) paths to
-/// all files named \p "foo.c" or \p "foo.cpp".
+/// For example, \p "*[/\\\\]foo.{c,cpp}" (with two backslashes) will match
+/// (unix or windows) paths to all files named \p "foo.c" or \p "foo.cpp".
class GlobPattern {
public:
/// \param Pat the pattern to match against
diff --git a/llvm/include/llvm/Support/raw_ostream.h b/llvm/include/llvm/Support/raw_ostream.h
index 2570c826502e..34f91cbe9551 100644
--- a/llvm/include/llvm/Support/raw_ostream.h
+++ b/llvm/include/llvm/Support/raw_ostream.h
@@ -769,6 +769,25 @@ public:
~buffer_unique_ostream() override { *OS << str(); }
};
+// Helper struct to add indentation to raw_ostream. Instead of
+// OS.indent(6) << "more stuff";
+// you can use
+// OS << indent(6) << "more stuff";
+// which has better ergonomics (and clang-formats better as well).
+struct indent {
+ unsigned NumSpaces;
+
+ explicit indent(unsigned NumSpaces) : NumSpaces(NumSpaces) {}
+ void operator+=(unsigned N) { NumSpaces += N; }
+ void operator-=(unsigned N) { NumSpaces -= N; }
+ indent operator+(unsigned N) const { return indent(NumSpaces + N); }
+ indent operator-(unsigned N) const { return indent(NumSpaces - N); }
+};
+
+inline raw_ostream &operator<<(raw_ostream &OS, const indent &Indent) {
+ return OS.indent(Indent.NumSpaces);
+}
+
class Error;
/// This helper creates an output stream and then passes it to \p Write.
diff --git a/llvm/include/llvm/TableGen/Record.h b/llvm/include/llvm/TableGen/Record.h
index c9e01e3f221b..5348c1177f63 100644
--- a/llvm/include/llvm/TableGen/Record.h
+++ b/llvm/include/llvm/TableGen/Record.h
@@ -603,6 +603,7 @@ public:
Init *convertInitializerTo(RecTy *Ty) const override;
Init *convertInitializerBitRange(ArrayRef<unsigned> Bits) const override;
+ std::optional<int64_t> convertInitializerToInt() const;
bool isComplete() const override {
for (unsigned i = 0; i != getNumBits(); ++i)
@@ -2035,7 +2036,7 @@ public:
}
/// Start timing a phase. Automatically stops any previous phase timer.
- void startTimer(StringRef Name);
+ void startTimer(StringRef Name) const;
/// Stop timing a phase.
void stopTimer();
@@ -2109,12 +2110,13 @@ private:
mutable std::map<std::string, std::vector<Record *>> ClassRecordsMap;
GlobalMap ExtraGlobals;
+ // TODO: Move timing related code out of RecordKeeper.
// These members are for the phase timing feature. We need a timer group,
// the last timer started, and a flag to say whether the last timer
// is the special "backend overall timer."
- TimerGroup *TimingGroup = nullptr;
- Timer *LastTimer = nullptr;
- bool BackendTimer = false;
+ mutable TimerGroup *TimingGroup = nullptr;
+ mutable Timer *LastTimer = nullptr;
+ mutable bool BackendTimer = false;
/// The internal uniquer implementation of the RecordKeeper.
std::unique_ptr<detail::RecordKeeperImpl> Impl;
diff --git a/llvm/include/llvm/Transforms/IPO/FunctionImport.h b/llvm/include/llvm/Transforms/IPO/FunctionImport.h
index 70739709a810..4b29d3f40ab7 100644
--- a/llvm/include/llvm/Transforms/IPO/FunctionImport.h
+++ b/llvm/include/llvm/Transforms/IPO/FunctionImport.h
@@ -270,13 +270,20 @@ public:
// A map from destination modules to lists of imports.
class ImportListsTy {
public:
- ImportListsTy() = default;
- ImportListsTy(size_t Size) : ListsImpl(Size) {}
+ ImportListsTy() : EmptyList(ImportIDs) {}
+ ImportListsTy(size_t Size) : EmptyList(ImportIDs), ListsImpl(Size) {}
ImportMapTy &operator[](StringRef DestMod) {
return ListsImpl.try_emplace(DestMod, ImportIDs).first->second;
}
+ const ImportMapTy &lookup(StringRef DestMod) const {
+ auto It = ListsImpl.find(DestMod);
+ if (It != ListsImpl.end())
+ return It->second;
+ return EmptyList;
+ }
+
size_t size() const { return ListsImpl.size(); }
using const_iterator = DenseMap<StringRef, ImportMapTy>::const_iterator;
@@ -284,6 +291,7 @@ public:
const_iterator end() const { return ListsImpl.end(); }
private:
+ ImportMapTy EmptyList;
DenseMap<StringRef, ImportMapTy> ListsImpl;
ImportIDTable ImportIDs;
};
diff --git a/llvm/include/llvm/Transforms/Utils/SimplifyLibCalls.h b/llvm/include/llvm/Transforms/Utils/SimplifyLibCalls.h
index 2e7a0ec29ed9..2d3d2ada6183 100644
--- a/llvm/include/llvm/Transforms/Utils/SimplifyLibCalls.h
+++ b/llvm/include/llvm/Transforms/Utils/SimplifyLibCalls.h
@@ -206,6 +206,7 @@ private:
Value *optimizeFMinFMax(CallInst *CI, IRBuilderBase &B);
Value *optimizeLog(CallInst *CI, IRBuilderBase &B);
Value *optimizeSqrt(CallInst *CI, IRBuilderBase &B);
+ Value *optimizeFMod(CallInst *CI, IRBuilderBase &B);
Value *mergeSqrtToExp(CallInst *CI, IRBuilderBase &B);
Value *optimizeSinCosPi(CallInst *CI, bool IsSin, IRBuilderBase &B);
Value *optimizeTrigInversionPairs(CallInst *CI, IRBuilderBase &B);
diff --git a/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/Legality.h b/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/Legality.h
new file mode 100644
index 000000000000..78c1c0e4c046
--- /dev/null
+++ b/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/Legality.h
@@ -0,0 +1,62 @@
+//===- Legality.h -----------------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Legality checks for the Sandbox Vectorizer.
+//
+
+#ifndef LLVM_TRANSFORMS_VECTORIZE_SANDBOXVECTORIZER_LEGALITY_H
+#define LLVM_TRANSFORMS_VECTORIZE_SANDBOXVECTORIZER_LEGALITY_H
+
+#include "llvm/SandboxIR/SandboxIR.h"
+
+namespace llvm::sandboxir {
+
+class LegalityAnalysis;
+
+enum class LegalityResultID {
+ Widen, ///> Vectorize by combining scalars to a vector.
+};
+
+/// The legality outcome is represented by a class rather than an enum class
+/// because in some cases the legality checks are expensive and look for a
+/// particular instruction that can be passed along to the vectorizer to avoid
+/// repeating the same expensive computation.
+class LegalityResult {
+protected:
+ LegalityResultID ID;
+ /// Only Legality can create LegalityResults.
+ LegalityResult(LegalityResultID ID) : ID(ID) {}
+ friend class LegalityAnalysis;
+
+public:
+ LegalityResultID getSubclassID() const { return ID; }
+};
+
+class Widen final : public LegalityResult {
+ friend class LegalityAnalysis;
+ Widen() : LegalityResult(LegalityResultID::Widen) {}
+
+public:
+ static bool classof(const LegalityResult *From) {
+ return From->getSubclassID() == LegalityResultID::Widen;
+ }
+};
+
+/// Performs the legality analysis and returns a LegalityResult object.
+class LegalityAnalysis {
+public:
+ LegalityAnalysis() = default;
+ LegalityResult canVectorize(ArrayRef<Value *> Bndl) {
+ // TODO: For now everything is legal.
+ return Widen();
+ }
+};
+
+} // namespace llvm::sandboxir
+
+#endif // LLVM_TRANSFORMS_VECTORIZE_SANDBOXVECTORIZER_LEGALITY_H
diff --git a/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/Passes/BottomUpVec.h b/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/Passes/BottomUpVec.h
index 5b3d1a50aa1e..99582e3e0e02 100644
--- a/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/Passes/BottomUpVec.h
+++ b/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/Passes/BottomUpVec.h
@@ -12,11 +12,18 @@
#ifndef LLVM_TRANSFORMS_VECTORIZE_SANDBOXVECTORIZER_PASSES_BOTTOMUPVEC_H
#define LLVM_TRANSFORMS_VECTORIZE_SANDBOXVECTORIZER_PASSES_BOTTOMUPVEC_H
+#include "llvm/ADT/ArrayRef.h"
#include "llvm/SandboxIR/Pass.h"
+#include "llvm/SandboxIR/SandboxIR.h"
+#include "llvm/Transforms/Vectorize/SandboxVectorizer/Legality.h"
namespace llvm::sandboxir {
class BottomUpVec final : public FunctionPass {
+ bool Change = false;
+ LegalityAnalysis Legality;
+ void vectorizeRec(ArrayRef<Value *> Bndl);
+ void tryVectorize(ArrayRef<Value *> Seeds);
public:
BottomUpVec() : FunctionPass("bottom-up-vec") {}
diff --git a/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/Region.h b/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/Region.h
new file mode 100644
index 000000000000..2f893bac213a
--- /dev/null
+++ b/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/Region.h
@@ -0,0 +1,104 @@
+//===- Region.h -------------------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TRANSFORMS_VECTORIZE_SANDBOXVECTORIZER_REGION_H
+#define LLVM_TRANSFORMS_VECTORIZE_SANDBOXVECTORIZER_REGION_H
+
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/iterator_range.h"
+#include "llvm/SandboxIR/SandboxIR.h"
+#include "llvm/Support/InstructionCost.h"
+#include "llvm/Support/raw_ostream.h"
+
+namespace llvm::sandboxir {
+
+/// The main job of the Region is to point to new instructions generated by
+/// vectorization passes. It is the unit that RegionPasses operate on with their
+/// runOnRegion() function.
+///
+/// The region allows us to stack transformations horizontally, meaning that
+/// each transformation operates on a single region and the resulting region is
+/// the input to the next transformation, as opposed to vertically, which is the
+/// common way of applying a transformation across the whole function. This
+/// enables us to check for profitability and decide whether we accept or
+/// rollback at a region granularity, which is much better than doing this at
+/// the function level.
+///
+// Traditional approach: transformations applied vertically for the whole
+// function
+// F
+// +----+
+// | |
+// | |
+// | | -> Transform1 -> ... -> TransformN -> Check Cost
+// | |
+// | |
+// +----+
+//
+// Region-based approach: transformations applied horizontally, for each Region
+// F
+// +----+
+// |Rgn1| -> Transform1 -> ... -> TransformN -> Check Cost
+// | |
+// |Rgn2| -> Transform1 -> ... -> TransformN -> Check Cost
+// | |
+// |Rgn3| -> Transform1 -> ... -> TransformN -> Check Cost
+// +----+
+
+class Region {
+ /// All the instructions in the Region. Only new instructions generated during
+ /// vectorization are part of the Region.
+ SetVector<Instruction *> Insts;
+
+ /// A unique ID, used for debugging.
+ unsigned RegionID = 0;
+
+ Context &Ctx;
+
+ // TODO: Add cost modeling.
+ // TODO: Add a way to encode/decode region info to/from metadata.
+
+public:
+ Region(Context &Ctx);
+ ~Region();
+
+ Context &getContext() const { return Ctx; }
+ /// Returns the region's unique ID.
+ unsigned getID() const { return RegionID; }
+
+ /// Adds I to the set.
+ void add(Instruction *I);
+ /// Removes I from the set.
+ void remove(Instruction *I);
+ /// Returns true if I is in the Region.
+ bool contains(Instruction *I) const { return Insts.contains(I); }
+ /// Returns true if the Region has no instructions.
+ bool empty() const { return Insts.empty(); }
+
+ using iterator = decltype(Insts.begin());
+ iterator begin() { return Insts.begin(); }
+ iterator end() { return Insts.end(); }
+ iterator_range<iterator> insts() { return make_range(begin(), end()); }
+
+#ifndef NDEBUG
+ /// This is an expensive check, meant for testing.
+ bool operator==(const Region &Other) const;
+ bool operator!=(const Region &other) const { return !(*this == other); }
+
+ void dump(raw_ostream &OS) const;
+ void dump() const;
+ friend raw_ostream &operator<<(raw_ostream &OS, const Region &Rgn) {
+ Rgn.dump(OS);
+ return OS;
+ }
+#endif
+};
+
+} // namespace llvm::sandboxir
+
+#endif // LLVM_TRANSFORMS_VECTORIZE_SANDBOXVECTORIZER_REGION_H
diff --git a/llvm/lib/Analysis/Loads.cpp b/llvm/lib/Analysis/Loads.cpp
index a88469ab81a8..957ac883490c 100644
--- a/llvm/lib/Analysis/Loads.cpp
+++ b/llvm/lib/Analysis/Loads.cpp
@@ -93,20 +93,26 @@ static bool isDereferenceableAndAlignedPointer(
Visited, MaxDepth);
}
- bool CheckForNonNull, CheckForFreed;
- APInt KnownDerefBytes(Size.getBitWidth(),
- V->getPointerDereferenceableBytes(DL, CheckForNonNull,
- CheckForFreed));
- if (KnownDerefBytes.getBoolValue() && KnownDerefBytes.uge(Size) &&
- !CheckForFreed)
- if (!CheckForNonNull ||
- isKnownNonZero(V, SimplifyQuery(DL, DT, AC, CtxI))) {
- // As we recursed through GEPs to get here, we've incrementally checked
- // that each step advanced by a multiple of the alignment. If our base is
- // properly aligned, then the original offset accessed must also be.
- APInt Offset(DL.getTypeStoreSizeInBits(V->getType()), 0);
- return isAligned(V, Offset, Alignment, DL);
- }
+ auto IsKnownDeref = [&]() {
+ bool CheckForNonNull, CheckForFreed;
+ APInt KnownDerefBytes(Size.getBitWidth(),
+ V->getPointerDereferenceableBytes(DL, CheckForNonNull,
+ CheckForFreed));
+ if (!KnownDerefBytes.getBoolValue() || !KnownDerefBytes.uge(Size) ||
+ CheckForFreed)
+ return false;
+ if (CheckForNonNull &&
+ !isKnownNonZero(V, SimplifyQuery(DL, DT, AC, CtxI)))
+ return false;
+ return true;
+ };
+ if (IsKnownDeref()) {
+ // As we recursed through GEPs to get here, we've incrementally checked
+ // that each step advanced by a multiple of the alignment. If our base is
+ // properly aligned, then the original offset accessed must also be.
+ APInt Offset(DL.getTypeStoreSizeInBits(V->getType()), 0);
+ return isAligned(V, Offset, Alignment, DL);
+ }
/// TODO refactor this function to be able to search independently for
/// Dereferencability and Alignment requirements.
diff --git a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
index 5710bda2b2cf..07c189344c64 100644
--- a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
@@ -838,7 +838,7 @@ void IRTranslator::splitWorkItem(SwitchCG::SwitchWorkList &WorkList,
void IRTranslator::emitJumpTable(SwitchCG::JumpTable &JT,
MachineBasicBlock *MBB) {
// Emit the code for the jump table
- assert(JT.Reg != -1U && "Should lower JT Header first!");
+ assert(JT.Reg && "Should lower JT Header first!");
MachineIRBuilder MIB(*MBB->getParent());
MIB.setMBB(*MBB);
MIB.setDebugLoc(CurBuilder->getDebugLoc());
diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
index 01e47bd2fb40..e64d3f51a011 100644
--- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
@@ -485,6 +485,10 @@ static RTLIB::Libcall getRTLibDesc(unsigned Opcode, unsigned Size) {
RTLIBCASE(RINT_F);
case TargetOpcode::G_FNEARBYINT:
RTLIBCASE(NEARBYINT_F);
+ case TargetOpcode::G_INTRINSIC_TRUNC:
+ RTLIBCASE(TRUNC_F);
+ case TargetOpcode::G_INTRINSIC_ROUND:
+ RTLIBCASE(ROUND_F);
case TargetOpcode::G_INTRINSIC_ROUNDEVEN:
RTLIBCASE(ROUNDEVEN_F);
case TargetOpcode::G_INTRINSIC_LRINT:
@@ -1215,6 +1219,8 @@ LegalizerHelper::libcall(MachineInstr &MI, LostDebugLocObserver &LocObserver) {
case TargetOpcode::G_FSQRT:
case TargetOpcode::G_FRINT:
case TargetOpcode::G_FNEARBYINT:
+ case TargetOpcode::G_INTRINSIC_TRUNC:
+ case TargetOpcode::G_INTRINSIC_ROUND:
case TargetOpcode::G_INTRINSIC_ROUNDEVEN: {
LLT LLTy = MRI.getType(MI.getOperand(0).getReg());
unsigned Size = LLTy.getSizeInBits();
diff --git a/llvm/lib/CodeGen/MachineVerifier.cpp b/llvm/lib/CodeGen/MachineVerifier.cpp
index 6eed73c15f09..1fcbeeec6f64 100644
--- a/llvm/lib/CodeGen/MachineVerifier.cpp
+++ b/llvm/lib/CodeGen/MachineVerifier.cpp
@@ -1739,7 +1739,7 @@ void MachineVerifier::verifyPreISelGenericInstruction(const MachineInstr *MI) {
}
if (IndexOp.getImm() != 0 &&
- Src1Ty.getElementCount().getKnownMinValue() % IndexOp.getImm() != 0) {
+ IndexOp.getImm() % Src1Ty.getElementCount().getKnownMinValue() != 0) {
report("Index must be a multiple of the second source vector's "
"minimum vector length",
MI);
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
index 325aba461e80..2c81c829e75c 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
@@ -3474,7 +3474,7 @@ bool DAGTypeLegalizer::SoftPromoteHalfOperand(SDNode *N, unsigned OpNo) {
assert(Res.getNode() != N && "Expected a new node!");
- assert(Res.getValueType() == N->getValueType(0) &&
+ assert(Res.getValueType() == N->getValueType(0) && N->getNumValues() == 1 &&
"Invalid operand expansion");
ReplaceValueWith(SDValue(N, 0), Res);
@@ -3544,7 +3544,8 @@ SDValue DAGTypeLegalizer::SoftPromoteHalfOp_FP_TO_XINT(SDNode *N) {
Op = DAG.getNode(N->getOpcode(), dl, {RVT, MVT::Other},
{Op.getValue(1), Op});
ReplaceValueWith(SDValue(N, 1), Op.getValue(1));
- return Op;
+ ReplaceValueWith(SDValue(N, 0), Op);
+ return SDValue();
}
SDValue Res = DAG.getNode(GetPromotionOpcode(SVT, RVT), dl, NVT, Op);
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 44ec6f7cab14..3918da3ef031 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -760,7 +760,7 @@ static void AddNodeIDCustom(FoldingSetNodeID &ID, const SDNode *N) {
ID.AddPointer(cast<BasicBlockSDNode>(N)->getBasicBlock());
break;
case ISD::Register:
- ID.AddInteger(cast<RegisterSDNode>(N)->getReg());
+ ID.AddInteger(cast<RegisterSDNode>(N)->getReg().id());
break;
case ISD::RegisterMask:
ID.AddPointer(cast<RegisterMaskSDNode>(N)->getRegMask());
@@ -2292,16 +2292,16 @@ SDValue SelectionDAG::getCommutedVectorShuffle(const ShuffleVectorSDNode &SV) {
return getVectorShuffle(VT, SDLoc(&SV), Op1, Op0, MaskVec);
}
-SDValue SelectionDAG::getRegister(unsigned RegNo, EVT VT) {
+SDValue SelectionDAG::getRegister(Register Reg, EVT VT) {
SDVTList VTs = getVTList(VT);
FoldingSetNodeID ID;
AddNodeIDNode(ID, ISD::Register, VTs, std::nullopt);
- ID.AddInteger(RegNo);
+ ID.AddInteger(Reg.id());
void *IP = nullptr;
if (SDNode *E = FindNodeOrInsertPos(ID, IP))
return SDValue(E, 0);
- auto *N = newSDNode<RegisterSDNode>(RegNo, VTs);
+ auto *N = newSDNode<RegisterSDNode>(Reg, VTs);
N->SDNodeBits.IsDivergent = TLI->isSDNodeSourceOfDivergence(N, FLI, UA);
CSEMap.InsertNode(N, IP);
InsertNode(N);
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 1dbcf8fd7651..eec89f04c635 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -845,13 +845,13 @@ static void getCopyToPartsVector(SelectionDAG &DAG, const SDLoc &DL,
}
}
-RegsForValue::RegsForValue(const SmallVector<unsigned, 4> &regs, MVT regvt,
+RegsForValue::RegsForValue(const SmallVector<Register, 4> &regs, MVT regvt,
EVT valuevt, std::optional<CallingConv::ID> CC)
: ValueVTs(1, valuevt), RegVTs(1, regvt), Regs(regs),
RegCount(1, regs.size()), CallConv(CC) {}
RegsForValue::RegsForValue(LLVMContext &Context, const TargetLowering &TLI,
- const DataLayout &DL, unsigned Reg, Type *Ty,
+ const DataLayout &DL, Register Reg, Type *Ty,
std::optional<CallingConv::ID> CC) {
ComputeValueVTs(TLI, DL, Ty, ValueVTs);
@@ -870,7 +870,7 @@ RegsForValue::RegsForValue(LLVMContext &Context, const TargetLowering &TLI,
Regs.push_back(Reg + i);
RegVTs.push_back(RegisterVT);
RegCount.push_back(NumRegs);
- Reg += NumRegs;
+ Reg = Reg.id() + NumRegs;
}
}
@@ -1070,9 +1070,9 @@ void RegsForValue::AddInlineAsmOperands(InlineAsm::Kind Code, bool HasMatching,
}
}
-SmallVector<std::pair<unsigned, TypeSize>, 4>
+SmallVector<std::pair<Register, TypeSize>, 4>
RegsForValue::getRegsAndSizes() const {
- SmallVector<std::pair<unsigned, TypeSize>, 4> OutVec;
+ SmallVector<std::pair<Register, TypeSize>, 4> OutVec;
unsigned I = 0;
for (auto CountAndVT : zip_first(RegCount, RegVTs)) {
unsigned RegCount = std::get<0>(CountAndVT);
@@ -2183,7 +2183,7 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) {
}
if (!FuncInfo.CanLowerReturn) {
- unsigned DemoteReg = FuncInfo.DemoteRegister;
+ Register DemoteReg = FuncInfo.DemoteRegister;
const Function *F = I.getParent()->getParent();
// Emit a store of the return value through the virtual register.
@@ -2981,7 +2981,7 @@ void SelectionDAGBuilder::visitSwitchCase(CaseBlock &CB,
void SelectionDAGBuilder::visitJumpTable(SwitchCG::JumpTable &JT) {
// Emit the code for the jump table
assert(JT.SL && "Should set SDLoc for SelectionDAG!");
- assert(JT.Reg != -1U && "Should lower JT Header first!");
+ assert(JT.Reg && "Should lower JT Header first!");
EVT PTy = DAG.getTargetLoweringInfo().getJumpTableRegTy(DAG.getDataLayout());
SDValue Index = DAG.getCopyFromReg(getControlRoot(), *JT.SL, JT.Reg, PTy);
SDValue Table = DAG.getJumpTable(JT.JTI, PTy);
@@ -3013,7 +3013,7 @@ void SelectionDAGBuilder::visitJumpTableHeader(SwitchCG::JumpTable &JT,
SwitchOp =
DAG.getZExtOrTrunc(Sub, dl, TLI.getJumpTableRegTy(DAG.getDataLayout()));
- unsigned JumpTableReg =
+ Register JumpTableReg =
FuncInfo.CreateReg(TLI.getJumpTableRegTy(DAG.getDataLayout()));
SDValue CopyTo =
DAG.getCopyToReg(getControlRoot(), dl, JumpTableReg, SwitchOp);
@@ -3261,10 +3261,9 @@ void SelectionDAGBuilder::visitBitTestHeader(BitTestBlock &B,
/// visitBitTestCase - this function produces one "bit test"
void SelectionDAGBuilder::visitBitTestCase(BitTestBlock &BB,
- MachineBasicBlock* NextMBB,
+ MachineBasicBlock *NextMBB,
BranchProbability BranchProbToNext,
- unsigned Reg,
- BitTestCase &B,
+ Register Reg, BitTestCase &B,
MachineBasicBlock *SwitchBB) {
SDLoc dl = getCurSDLoc();
MVT VT = BB.RegVT;
@@ -5956,7 +5955,7 @@ static SDValue expandDivFix(unsigned Opcode, const SDLoc &DL,
// getUnderlyingArgRegs - Find underlying registers used for a truncated,
// bitcasted, or split argument. Returns a list of <Register, size in bits>
static void
-getUnderlyingArgRegs(SmallVectorImpl<std::pair<unsigned, TypeSize>> &Regs,
+getUnderlyingArgRegs(SmallVectorImpl<std::pair<Register, TypeSize>> &Regs,
const SDValue &N) {
switch (N.getOpcode()) {
case ISD::CopyFromReg: {
@@ -6101,7 +6100,7 @@ bool SelectionDAGBuilder::EmitFuncArgumentDbgValue(
if (FI != std::numeric_limits<int>::max())
Op = MachineOperand::CreateFI(FI);
- SmallVector<std::pair<unsigned, TypeSize>, 8> ArgRegsAndSizes;
+ SmallVector<std::pair<Register, TypeSize>, 8> ArgRegsAndSizes;
if (!Op && N.getNode()) {
getUnderlyingArgRegs(ArgRegsAndSizes, N);
Register Reg;
@@ -6131,7 +6130,7 @@ bool SelectionDAGBuilder::EmitFuncArgumentDbgValue(
if (!Op) {
// Create a DBG_VALUE for each decomposed value in ArgRegs to cover Reg
- auto splitMultiRegDbgValue = [&](ArrayRef<std::pair<unsigned, TypeSize>>
+ auto splitMultiRegDbgValue = [&](ArrayRef<std::pair<Register, TypeSize>>
SplitRegs) {
unsigned Offset = 0;
for (const auto &RegAndSize : SplitRegs) {
@@ -7748,7 +7747,7 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
const auto *CPI = cast<CatchPadInst>(I.getArgOperand(0));
MVT PtrVT = TLI.getPointerTy(DAG.getDataLayout());
const TargetRegisterClass *PtrRC = TLI.getRegClassFor(PtrVT);
- unsigned VReg = FuncInfo.getCatchPadExceptionPointerVReg(CPI, PtrRC);
+ Register VReg = FuncInfo.getCatchPadExceptionPointerVReg(CPI, PtrRC);
SDValue N = DAG.getCopyFromReg(DAG.getEntryNode(), sdl, VReg, PtrVT);
if (Intrinsic == Intrinsic::eh_exceptioncode)
N = DAG.getZExtOrTrunc(N, sdl, MVT::i32);
@@ -9653,7 +9652,7 @@ getRegistersForValue(SelectionDAG &DAG, const SDLoc &DL,
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
MachineFunction &MF = DAG.getMachineFunction();
- SmallVector<unsigned, 4> Regs;
+ SmallVector<Register, 4> Regs;
const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo();
// No work to do for memory/address operands.
@@ -10078,7 +10077,7 @@ void SelectionDAGBuilder::visitInlineAsm(const CallBase &Call,
return;
}
- SmallVector<unsigned, 4> Regs;
+ SmallVector<Register, 4> Regs;
MachineFunction &MF = DAG.getMachineFunction();
MachineRegisterInfo &MRI = MF.getRegInfo();
const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo();
@@ -11817,8 +11816,8 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
// Update the SwiftErrorVRegDefMap.
if (Res.getOpcode() == ISD::CopyFromReg && isSwiftErrorArg) {
- unsigned Reg = cast<RegisterSDNode>(Res.getOperand(1))->getReg();
- if (Register::isVirtualRegister(Reg))
+ Register Reg = cast<RegisterSDNode>(Res.getOperand(1))->getReg();
+ if (Reg.isVirtual())
SwiftError->setCurrentVReg(FuncInfo->MBB, SwiftError->getFunctionArg(),
Reg);
}
@@ -11829,8 +11828,8 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
// If we can, though, try to skip creating an unnecessary vreg.
// FIXME: This isn't very clean... it would be nice to make this more
// general.
- unsigned Reg = cast<RegisterSDNode>(Res.getOperand(1))->getReg();
- if (Register::isVirtualRegister(Reg)) {
+ Register Reg = cast<RegisterSDNode>(Res.getOperand(1))->getReg();
+ if (Reg.isVirtual()) {
FuncInfo->ValueMap[&Arg] = Reg;
continue;
}
@@ -12654,7 +12653,7 @@ void SelectionDAGBuilder::visitCallBrLandingPad(const CallInst &I) {
// getRegistersForValue may produce 1 to many registers based on whether
// the OpInfo.ConstraintVT is legal on the target or not.
- for (unsigned &Reg : OpInfo.AssignedRegs.Regs) {
+ for (Register &Reg : OpInfo.AssignedRegs.Regs) {
Register OriginalDef = FollowCopyChain(MRI, InitialDef++);
if (Register::isPhysicalRegister(OriginalDef))
FuncInfo.MBB->addLiveIn(OriginalDef);
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
index b13a2df7b48e..3f8a3e7ffb65 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
@@ -526,7 +526,7 @@ public:
void visitBitTestHeader(SwitchCG::BitTestBlock &B,
MachineBasicBlock *SwitchBB);
void visitBitTestCase(SwitchCG::BitTestBlock &BB, MachineBasicBlock *NextMBB,
- BranchProbability BranchProbToNext, unsigned Reg,
+ BranchProbability BranchProbToNext, Register Reg,
SwitchCG::BitTestCase &B, MachineBasicBlock *SwitchBB);
void visitJumpTable(SwitchCG::JumpTable &JT);
void visitJumpTableHeader(SwitchCG::JumpTable &JT,
@@ -740,7 +740,7 @@ struct RegsForValue {
/// This list holds the registers assigned to the values.
/// Each legal or promoted value requires one register, and each
/// expanded value requires multiple registers.
- SmallVector<unsigned, 4> Regs;
+ SmallVector<Register, 4> Regs;
/// This list holds the number of registers for each value.
SmallVector<unsigned, 4> RegCount;
@@ -750,10 +750,10 @@ struct RegsForValue {
std::optional<CallingConv::ID> CallConv;
RegsForValue() = default;
- RegsForValue(const SmallVector<unsigned, 4> &regs, MVT regvt, EVT valuevt,
+ RegsForValue(const SmallVector<Register, 4> &regs, MVT regvt, EVT valuevt,
std::optional<CallingConv::ID> CC = std::nullopt);
RegsForValue(LLVMContext &Context, const TargetLowering &TLI,
- const DataLayout &DL, unsigned Reg, Type *Ty,
+ const DataLayout &DL, Register Reg, Type *Ty,
std::optional<CallingConv::ID> CC);
bool isABIMangled() const { return CallConv.has_value(); }
@@ -796,7 +796,7 @@ struct RegsForValue {
}
/// Return a list of registers and their sizes.
- SmallVector<std::pair<unsigned, TypeSize>, 4> getRegsAndSizes() const;
+ SmallVector<std::pair<Register, TypeSize>, 4> getRegsAndSizes() const;
};
} // end namespace llvm
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
index 263a213bd4f6..2a97580942df 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
@@ -895,8 +895,8 @@ void SelectionDAGISel::ComputeLiveOutVRegInfo() {
if (N->getOpcode() != ISD::CopyToReg)
continue;
- unsigned DestReg = cast<RegisterSDNode>(N->getOperand(1))->getReg();
- if (!Register::isVirtualRegister(DestReg))
+ Register DestReg = cast<RegisterSDNode>(N->getOperand(1))->getReg();
+ if (!DestReg.isVirtual())
continue;
// Ignore non-integer values.
diff --git a/llvm/lib/CodeGen/SwitchLoweringUtils.cpp b/llvm/lib/CodeGen/SwitchLoweringUtils.cpp
index e741a0fc49fb..038c499fe236 100644
--- a/llvm/lib/CodeGen/SwitchLoweringUtils.cpp
+++ b/llvm/lib/CodeGen/SwitchLoweringUtils.cpp
@@ -254,7 +254,7 @@ bool SwitchCG::SwitchLowering::buildJumpTable(const CaseClusterVector &Clusters,
->createJumpTableIndex(Table);
// Set up the jump table info.
- JumpTable JT(-1U, JTI, JumpTableMBB, nullptr, SL);
+ JumpTable JT(Register(), JTI, JumpTableMBB, nullptr, SL);
JumpTableHeader JTH(Clusters[First].Low->getValue(),
Clusters[Last].High->getValue(), SI->getCondition(),
nullptr, false);
@@ -455,7 +455,7 @@ bool SwitchCG::SwitchLowering::buildBitTests(CaseClusterVector &Clusters,
BTI.push_back(BitTestCase(CB.Mask, BitTestBB, CB.BB, CB.ExtraProb));
}
BitTestCases.emplace_back(std::move(LowBound), std::move(CmpRange),
- SI->getCondition(), -1U, MVT::Other, false,
+ SI->getCondition(), Register(), MVT::Other, false,
ContiguousRange, nullptr, nullptr, std::move(BTI),
TotalProb);
diff --git a/llvm/lib/Demangle/MicrosoftDemangle.cpp b/llvm/lib/Demangle/MicrosoftDemangle.cpp
index c5835e8c2e98..aa65f3be29da 100644
--- a/llvm/lib/Demangle/MicrosoftDemangle.cpp
+++ b/llvm/lib/Demangle/MicrosoftDemangle.cpp
@@ -2035,6 +2035,10 @@ Demangler::demanglePrimitiveType(std::string_view &MangledName) {
return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Char16);
case 'U':
return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Char32);
+ case 'P':
+ return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Auto);
+ case 'T':
+ return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::DecltypeAuto);
}
break;
}
diff --git a/llvm/lib/Demangle/MicrosoftDemangleNodes.cpp b/llvm/lib/Demangle/MicrosoftDemangleNodes.cpp
index 9a9c34ec6d34..ec6e67058c68 100644
--- a/llvm/lib/Demangle/MicrosoftDemangleNodes.cpp
+++ b/llvm/lib/Demangle/MicrosoftDemangleNodes.cpp
@@ -149,6 +149,8 @@ void PrimitiveTypeNode::outputPre(OutputBuffer &OB, OutputFlags Flags) const {
OUTPUT_ENUM_CLASS_VALUE(PrimitiveKind, Double, "double");
OUTPUT_ENUM_CLASS_VALUE(PrimitiveKind, Ldouble, "long double");
OUTPUT_ENUM_CLASS_VALUE(PrimitiveKind, Nullptr, "std::nullptr_t");
+ OUTPUT_ENUM_CLASS_VALUE(PrimitiveKind, Auto, "auto");
+ OUTPUT_ENUM_CLASS_VALUE(PrimitiveKind, DecltypeAuto, "decltype(auto)");
}
outputQualifiers(OB, Quals, true, false);
}
diff --git a/llvm/lib/MC/MCParser/AsmParser.cpp b/llvm/lib/MC/MCParser/AsmParser.cpp
index 66e52fe2d08f..9eff35642c9f 100644
--- a/llvm/lib/MC/MCParser/AsmParser.cpp
+++ b/llvm/lib/MC/MCParser/AsmParser.cpp
@@ -2322,7 +2322,7 @@ bool AsmParser::parseAndMatchAndEmitTargetInstruction(ParseStatementInfo &Info,
// Canonicalize the opcode to lower case.
std::string OpcodeStr = IDVal.lower();
ParseInstructionInfo IInfo(Info.AsmRewrites);
- bool ParseHadError = getTargetParser().ParseInstruction(IInfo, OpcodeStr, ID,
+ bool ParseHadError = getTargetParser().parseInstruction(IInfo, OpcodeStr, ID,
Info.ParsedOperands);
Info.ParseError = ParseHadError;
@@ -2379,7 +2379,7 @@ bool AsmParser::parseAndMatchAndEmitTargetInstruction(ParseStatementInfo &Info,
// If parsing succeeded, match the instruction.
if (!ParseHadError) {
uint64_t ErrorInfo;
- if (getTargetParser().MatchAndEmitInstruction(
+ if (getTargetParser().matchAndEmitInstruction(
IDLoc, Info.Opcode, Info.ParsedOperands, Out, ErrorInfo,
getTargetParser().isParsingMSInlineAsm()))
return true;
@@ -6029,7 +6029,7 @@ bool AsmParser::parseMSInlineAsm(
// Register operand.
if (Operand.isReg() && !Operand.needAddressOf() &&
- !getTargetParser().OmitRegisterFromClobberLists(Operand.getReg())) {
+ !getTargetParser().omitRegisterFromClobberLists(Operand.getReg())) {
unsigned NumDefs = Desc.getNumDefs();
// Clobber.
if (NumDefs && Operand.getMCOperandNum() < NumDefs)
diff --git a/llvm/lib/MC/MCParser/MasmParser.cpp b/llvm/lib/MC/MCParser/MasmParser.cpp
index 9f619c5018b5..0c64af9e460e 100644
--- a/llvm/lib/MC/MCParser/MasmParser.cpp
+++ b/llvm/lib/MC/MCParser/MasmParser.cpp
@@ -2657,7 +2657,7 @@ bool MasmParser::parseStatement(ParseStatementInfo &Info,
// Canonicalize the opcode to lower case.
std::string OpcodeStr = IDVal.lower();
ParseInstructionInfo IInfo(Info.AsmRewrites);
- bool ParseHadError = getTargetParser().ParseInstruction(IInfo, OpcodeStr, ID,
+ bool ParseHadError = getTargetParser().parseInstruction(IInfo, OpcodeStr, ID,
Info.ParsedOperands);
Info.ParseError = ParseHadError;
@@ -2714,7 +2714,7 @@ bool MasmParser::parseStatement(ParseStatementInfo &Info,
// If parsing succeeded, match the instruction.
if (!ParseHadError) {
uint64_t ErrorInfo;
- if (getTargetParser().MatchAndEmitInstruction(
+ if (getTargetParser().matchAndEmitInstruction(
IDLoc, Info.Opcode, Info.ParsedOperands, Out, ErrorInfo,
getTargetParser().isParsingMSInlineAsm()))
return true;
@@ -7389,7 +7389,7 @@ bool MasmParser::parseMSInlineAsm(
// Register operand.
if (Operand.isReg() && !Operand.needAddressOf() &&
- !getTargetParser().OmitRegisterFromClobberLists(Operand.getReg())) {
+ !getTargetParser().omitRegisterFromClobberLists(Operand.getReg())) {
unsigned NumDefs = Desc.getNumDefs();
// Clobber.
if (NumDefs && Operand.getMCOperandNum() < NumDefs)
diff --git a/llvm/lib/SandboxIR/SandboxIR.cpp b/llvm/lib/SandboxIR/SandboxIR.cpp
index c26ba1983db9..f8faef5a386a 100644
--- a/llvm/lib/SandboxIR/SandboxIR.cpp
+++ b/llvm/lib/SandboxIR/SandboxIR.cpp
@@ -306,6 +306,11 @@ BBIterator &BBIterator::operator--() {
return *this;
}
+BasicBlock *BBIterator::getNodeParent() const {
+ llvm::BasicBlock *Parent = const_cast<BBIterator *>(this)->It.getNodeParent();
+ return cast<BasicBlock>(Ctx->getValue(Parent));
+}
+
const char *Instruction::getOpcodeName(Opcode Opc) {
switch (Opc) {
#define OP(OPC) \
@@ -2534,6 +2539,8 @@ template class GlobalWithNodeAPI<Function, llvm::Function, GlobalObject,
llvm::GlobalObject>;
template class GlobalWithNodeAPI<GlobalVariable, llvm::GlobalVariable,
GlobalObject, llvm::GlobalObject>;
+template class GlobalWithNodeAPI<GlobalAlias, llvm::GlobalAlias, GlobalValue,
+ llvm::GlobalValue>;
} // namespace llvm::sandboxir
void GlobalIFunc::setResolver(Constant *Resolver) {
@@ -2587,6 +2594,24 @@ void GlobalVariable::setExternallyInitialized(bool V) {
cast<llvm::GlobalVariable>(Val)->setExternallyInitialized(V);
}
+void GlobalAlias::setAliasee(Constant *Aliasee) {
+ Ctx.getTracker()
+ .emplaceIfTracking<
+ GenericSetter<&GlobalAlias::getAliasee, &GlobalAlias::setAliasee>>(
+ this);
+ cast<llvm::GlobalAlias>(Val)->setAliasee(cast<llvm::Constant>(Aliasee->Val));
+}
+
+Constant *GlobalAlias::getAliasee() const {
+ return cast<Constant>(
+ Ctx.getOrCreateConstant(cast<llvm::GlobalAlias>(Val)->getAliasee()));
+}
+
+const GlobalObject *GlobalAlias::getAliaseeObject() const {
+ return cast<GlobalObject>(Ctx.getOrCreateConstant(
+ cast<llvm::GlobalAlias>(Val)->getAliaseeObject()));
+}
+
void GlobalValue::setUnnamedAddr(UnnamedAddr V) {
Ctx.getTracker()
.emplaceIfTracking<GenericSetter<&GlobalValue::getUnnamedAddr,
@@ -2803,6 +2828,10 @@ Value *Context::getOrCreateValueInternal(llvm::Value *LLVMV, llvm::User *U) {
It->second = std::unique_ptr<GlobalVariable>(
new GlobalVariable(cast<llvm::GlobalVariable>(C), *this));
break;
+ case llvm::Value::GlobalAliasVal:
+ It->second = std::unique_ptr<GlobalAlias>(
+ new GlobalAlias(cast<llvm::GlobalAlias>(C), *this));
+ break;
default:
It->second = std::unique_ptr<Constant>(new Constant(C, *this));
break;
diff --git a/llvm/lib/TableGen/Record.cpp b/llvm/lib/TableGen/Record.cpp
index 97ae0b092b81..ff2da3badb36 100644
--- a/llvm/lib/TableGen/Record.cpp
+++ b/llvm/lib/TableGen/Record.cpp
@@ -497,18 +497,24 @@ Init *BitsInit::convertInitializerTo(RecTy *Ty) const {
}
if (isa<IntRecTy>(Ty)) {
- int64_t Result = 0;
- for (unsigned i = 0, e = getNumBits(); i != e; ++i)
- if (auto *Bit = dyn_cast<BitInit>(getBit(i)))
- Result |= static_cast<int64_t>(Bit->getValue()) << i;
- else
- return nullptr;
- return IntInit::get(getRecordKeeper(), Result);
+ std::optional<int64_t> Result = convertInitializerToInt();
+ if (Result)
+ return IntInit::get(getRecordKeeper(), *Result);
}
return nullptr;
}
+std::optional<int64_t> BitsInit::convertInitializerToInt() const {
+ int64_t Result = 0;
+ for (unsigned i = 0, e = getNumBits(); i != e; ++i)
+ if (auto *Bit = dyn_cast<BitInit>(getBit(i)))
+ Result |= static_cast<int64_t>(Bit->getValue()) << i;
+ else
+ return std::nullopt;
+ return Result;
+}
+
Init *
BitsInit::convertInitializerBitRange(ArrayRef<unsigned> Bits) const {
SmallVector<Init *, 16> NewBits(Bits.size());
@@ -3219,7 +3225,7 @@ Init *RecordKeeper::getNewAnonymousName() {
// These functions implement the phase timing facility. Starting a timer
// when one is already running stops the running one.
-void RecordKeeper::startTimer(StringRef Name) {
+void RecordKeeper::startTimer(StringRef Name) const {
if (TimingGroup) {
if (LastTimer && LastTimer->isRunning()) {
LastTimer->stopTimer();
diff --git a/llvm/lib/Target/AArch64/AArch64SchedNeoverseN2.td b/llvm/lib/Target/AArch64/AArch64SchedNeoverseN2.td
index 8a7d2af34498..737fc7390455 100644
--- a/llvm/lib/Target/AArch64/AArch64SchedNeoverseN2.td
+++ b/llvm/lib/Target/AArch64/AArch64SchedNeoverseN2.td
@@ -836,9 +836,11 @@ def : InstRW<[N2Write_3c_1V], (instregex "^FCVT[AMNPZ][SU][SU][WX][HSD]r$")>;
def : SchedAlias<WriteFCvt, N2Write_3c_1V0>;
// FP move, immed
-// FP move, register
def : SchedAlias<WriteFImm, N2Write_2c_1V>;
+// FP move, register
+def : InstRW<[N2Write_2c_1V], (instrs FMOVHr, FMOVSr, FMOVDr)>;
+
// FP transfer, from gen to low half of vec reg
def : InstRW<[N2Write_3c_1M0], (instrs FMOVWHr, FMOVXHr, FMOVWSr, FMOVXDr,
FMOVHWr, FMOVHXr, FMOVSWr, FMOVDXr)>;
@@ -858,9 +860,8 @@ def : InstRW<[N2Write_6c_1L], (instregex "^LDR[SDQ]l$",
"^LDUR[BHSDQ]i$")>;
// Load vector reg, immed post-index
-def : InstRW<[N2Write_6c_1I_1L, WriteI], (instregex "^LDR[BHSDQ]post$")>;
// Load vector reg, immed pre-index
-def : InstRW<[WriteAdr, N2Write_6c_1I_1L], (instregex "^LDR[BHSDQ]pre$")>;
+def : InstRW<[WriteAdr, N2Write_6c_1I_1L], (instregex "^LDR[BHSDQ](post|pre)$")>;
// Load vector reg, unsigned immed
def : InstRW<[N2Write_6c_1L], (instregex "^LDR[BHSDQ]ui$")>;
@@ -1119,7 +1120,7 @@ def : InstRW<[N2Write_5c_1V], (instregex "^FMLALv", "^FMLSLv")>;
// ASIMD FP round, D-form F32 and Q-form F64
def : InstRW<[N2Write_3c_1V0],
(instregex "^FRINT[AIMNPXZ]v2f(32|64)$",
- "^FRINT[32|64)[XZ]v2f(32|64)$")>;
+ "^FRINT(32|64)[XZ]v2f(32|64)$")>;
// ASIMD FP round, D-form F16 and Q-form F32
def : InstRW<[N2Write_4c_2V0],
diff --git a/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp b/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
index 13a7eef47885..4f6131fd8355 100644
--- a/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
+++ b/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
@@ -231,12 +231,12 @@ private:
bool validateInstruction(MCInst &Inst, SMLoc &IDLoc,
SmallVectorImpl<SMLoc> &Loc);
unsigned getNumRegsForRegKind(RegKind K);
- bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
+ bool matchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
OperandVector &Operands, MCStreamer &Out,
uint64_t &ErrorInfo,
bool MatchingInlineAsm) override;
-/// @name Auto-generated Match Functions
-/// {
+ /// @name Auto-generated Match Functions
+ /// {
#define GET_ASSEMBLER_HEADER
#include "AArch64GenAsmMatcher.inc"
@@ -321,7 +321,7 @@ public:
bool areEqualRegs(const MCParsedAsmOperand &Op1,
const MCParsedAsmOperand &Op2) const override;
- bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
+ bool parseInstruction(ParseInstructionInfo &Info, StringRef Name,
SMLoc NameLoc, OperandVector &Operands) override;
bool parseRegister(MCRegister &Reg, SMLoc &StartLoc, SMLoc &EndLoc) override;
ParseStatus tryParseRegister(MCRegister &Reg, SMLoc &StartLoc,
@@ -5086,9 +5086,8 @@ bool AArch64AsmParser::areEqualRegs(const MCParsedAsmOperand &Op1,
return false;
}
-/// ParseInstruction - Parse an AArch64 instruction mnemonic followed by its
-/// operands.
-bool AArch64AsmParser::ParseInstruction(ParseInstructionInfo &Info,
+/// Parse an AArch64 instruction mnemonic followed by its operands.
+bool AArch64AsmParser::parseInstruction(ParseInstructionInfo &Info,
StringRef Name, SMLoc NameLoc,
OperandVector &Operands) {
Name = StringSwitch<StringRef>(Name.lower())
@@ -6205,7 +6204,7 @@ bool AArch64AsmParser::showMatchError(SMLoc Loc, unsigned ErrCode,
static const char *getSubtargetFeatureName(uint64_t Val);
-bool AArch64AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
+bool AArch64AsmParser::matchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
OperandVector &Operands,
MCStreamer &Out,
uint64_t &ErrorInfo,
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
index 9f8926432d00..e8674c4c7759 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
@@ -640,27 +640,38 @@ GCNTTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const {
break;
}
case Intrinsic::amdgcn_cvt_pkrtz: {
- Value *Src0 = II.getArgOperand(0);
- Value *Src1 = II.getArgOperand(1);
- if (const ConstantFP *C0 = dyn_cast<ConstantFP>(Src0)) {
- if (const ConstantFP *C1 = dyn_cast<ConstantFP>(Src1)) {
- const fltSemantics &HalfSem =
- II.getType()->getScalarType()->getFltSemantics();
+ auto foldFPTruncToF16RTZ = [](Value *Arg) -> Value * {
+ Type *HalfTy = Type::getHalfTy(Arg->getContext());
+
+ if (isa<PoisonValue>(Arg))
+ return PoisonValue::get(HalfTy);
+ if (isa<UndefValue>(Arg))
+ return UndefValue::get(HalfTy);
+
+ ConstantFP *CFP = nullptr;
+ if (match(Arg, m_ConstantFP(CFP))) {
bool LosesInfo;
- APFloat Val0 = C0->getValueAPF();
- APFloat Val1 = C1->getValueAPF();
- Val0.convert(HalfSem, APFloat::rmTowardZero, &LosesInfo);
- Val1.convert(HalfSem, APFloat::rmTowardZero, &LosesInfo);
-
- Constant *Folded =
- ConstantVector::get({ConstantFP::get(II.getContext(), Val0),
- ConstantFP::get(II.getContext(), Val1)});
- return IC.replaceInstUsesWith(II, Folded);
+ APFloat Val(CFP->getValueAPF());
+ Val.convert(APFloat::IEEEhalf(), APFloat::rmTowardZero, &LosesInfo);
+ return ConstantFP::get(HalfTy, Val);
}
- }
- if (isa<UndefValue>(Src0) && isa<UndefValue>(Src1)) {
- return IC.replaceInstUsesWith(II, UndefValue::get(II.getType()));
+ Value *Src = nullptr;
+ if (match(Arg, m_FPExt(m_Value(Src)))) {
+ if (Src->getType()->isHalfTy())
+ return Src;
+ }
+
+ return nullptr;
+ };
+
+ if (Value *Src0 = foldFPTruncToF16RTZ(II.getArgOperand(0))) {
+ if (Value *Src1 = foldFPTruncToF16RTZ(II.getArgOperand(1))) {
+ Value *V = PoisonValue::get(II.getType());
+ V = IC.Builder.CreateInsertElement(V, Src0, (uint64_t)0);
+ V = IC.Builder.CreateInsertElement(V, Src1, (uint64_t)1);
+ return IC.replaceInstUsesWith(II, V);
+ }
}
break;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
index f2c9619cb827..bc771d4ef6c0 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
@@ -3076,11 +3076,7 @@ void AMDGPURegisterBankInfo::applyMappingImpl(
case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_OR:
case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_XOR:
case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_INC:
- case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_DEC: {
- applyDefaultMapping(OpdMapper);
- executeInWaterfallLoop(B, MI, {2, 5});
- return;
- }
+ case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_DEC:
case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_FADD:
case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_FMIN:
case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_FMAX: {
diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
index 5db6c52d189e..bab3f8a08781 100644
--- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
+++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
@@ -1256,7 +1256,7 @@ class KernelScopeInfo {
}
void usesAgprAt(int i) {
- // Instruction will error in AMDGPUAsmParser::MatchAndEmitInstruction
+ // Instruction will error in AMDGPUAsmParser::matchAndEmitInstruction
if (!hasMAIInsts(*MSTI))
return;
@@ -1597,7 +1597,7 @@ public:
unsigned checkTargetMatchPredicate(MCInst &Inst) override;
unsigned validateTargetOperandClass(MCParsedAsmOperand &Op,
unsigned Kind) override;
- bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
+ bool matchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
OperandVector &Operands, MCStreamer &Out,
uint64_t &ErrorInfo,
bool MatchingInlineAsm) override;
@@ -1605,7 +1605,7 @@ public:
ParseStatus parseOperand(OperandVector &Operands, StringRef Mnemonic,
OperandMode Mode = OperandMode_Default);
StringRef parseMnemonicSuffix(StringRef Name);
- bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
+ bool parseInstruction(ParseInstructionInfo &Info, StringRef Name,
SMLoc NameLoc, OperandVector &Operands) override;
//bool ProcessInstruction(MCInst &Inst);
@@ -5288,7 +5288,7 @@ static bool isInvalidVOPDY(const OperandVector &Operands,
return false;
}
-bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
+bool AMDGPUAsmParser::matchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
OperandVector &Operands,
MCStreamer &Out,
uint64_t &ErrorInfo,
@@ -6393,9 +6393,9 @@ static void applyMnemonicAliases(StringRef &Mnemonic,
const FeatureBitset &Features,
unsigned VariantID);
-bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info,
- StringRef Name,
- SMLoc NameLoc, OperandVector &Operands) {
+bool AMDGPUAsmParser::parseInstruction(ParseInstructionInfo &Info,
+ StringRef Name, SMLoc NameLoc,
+ OperandVector &Operands) {
// Add the instruction mnemonic
Name = parseMnemonicSuffix(Name);
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 4a861f0c03a0..b197f38d054f 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -7443,98 +7443,49 @@ SDValue SITargetLowering::lowerBUILD_VECTOR(SDValue Op,
SDLoc SL(Op);
EVT VT = Op.getValueType();
- if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v8i16 ||
- VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
- EVT HalfVT = MVT::getVectorVT(VT.getVectorElementType().getSimpleVT(),
- VT.getVectorNumElements() / 2);
- MVT HalfIntVT = MVT::getIntegerVT(HalfVT.getSizeInBits());
+ if (VT == MVT::v2f16 || VT == MVT::v2i16 || VT == MVT::v2bf16) {
+ assert(!Subtarget->hasVOP3PInsts() && "this should be legal");
- // Turn into pair of packed build_vectors.
- // TODO: Special case for constants that can be materialized with s_mov_b64.
- SmallVector<SDValue, 4> LoOps, HiOps;
- for (unsigned I = 0, E = VT.getVectorNumElements() / 2; I != E; ++I) {
- LoOps.push_back(Op.getOperand(I));
- HiOps.push_back(Op.getOperand(I + E));
- }
- SDValue Lo = DAG.getBuildVector(HalfVT, SL, LoOps);
- SDValue Hi = DAG.getBuildVector(HalfVT, SL, HiOps);
-
- SDValue CastLo = DAG.getNode(ISD::BITCAST, SL, HalfIntVT, Lo);
- SDValue CastHi = DAG.getNode(ISD::BITCAST, SL, HalfIntVT, Hi);
-
- SDValue Blend = DAG.getBuildVector(MVT::getVectorVT(HalfIntVT, 2), SL,
- { CastLo, CastHi });
- return DAG.getNode(ISD::BITCAST, SL, VT, Blend);
- }
+ SDValue Lo = Op.getOperand(0);
+ SDValue Hi = Op.getOperand(1);
- if (VT == MVT::v16i16 || VT == MVT::v16f16 || VT == MVT::v16bf16) {
- EVT QuarterVT = MVT::getVectorVT(VT.getVectorElementType().getSimpleVT(),
- VT.getVectorNumElements() / 4);
- MVT QuarterIntVT = MVT::getIntegerVT(QuarterVT.getSizeInBits());
-
- SmallVector<SDValue, 4> Parts[4];
- for (unsigned I = 0, E = VT.getVectorNumElements() / 4; I != E; ++I) {
- for (unsigned P = 0; P < 4; ++P)
- Parts[P].push_back(Op.getOperand(I + P * E));
- }
- SDValue Casts[4];
- for (unsigned P = 0; P < 4; ++P) {
- SDValue Vec = DAG.getBuildVector(QuarterVT, SL, Parts[P]);
- Casts[P] = DAG.getNode(ISD::BITCAST, SL, QuarterIntVT, Vec);
+ // Avoid adding defined bits with the zero_extend.
+ if (Hi.isUndef()) {
+ Lo = DAG.getNode(ISD::BITCAST, SL, MVT::i16, Lo);
+ SDValue ExtLo = DAG.getNode(ISD::ANY_EXTEND, SL, MVT::i32, Lo);
+ return DAG.getNode(ISD::BITCAST, SL, VT, ExtLo);
}
- SDValue Blend =
- DAG.getBuildVector(MVT::getVectorVT(QuarterIntVT, 4), SL, Casts);
- return DAG.getNode(ISD::BITCAST, SL, VT, Blend);
- }
+ Hi = DAG.getNode(ISD::BITCAST, SL, MVT::i16, Hi);
+ Hi = DAG.getNode(ISD::ZERO_EXTEND, SL, MVT::i32, Hi);
- if (VT == MVT::v32i16 || VT == MVT::v32f16 || VT == MVT::v32bf16) {
- EVT QuarterVT = MVT::getVectorVT(VT.getVectorElementType().getSimpleVT(),
- VT.getVectorNumElements() / 8);
- MVT QuarterIntVT = MVT::getIntegerVT(QuarterVT.getSizeInBits());
+ SDValue ShlHi = DAG.getNode(ISD::SHL, SL, MVT::i32, Hi,
+ DAG.getConstant(16, SL, MVT::i32));
+ if (Lo.isUndef())
+ return DAG.getNode(ISD::BITCAST, SL, VT, ShlHi);
- SmallVector<SDValue, 8> Parts[8];
- for (unsigned I = 0, E = VT.getVectorNumElements() / 8; I != E; ++I) {
- for (unsigned P = 0; P < 8; ++P)
- Parts[P].push_back(Op.getOperand(I + P * E));
- }
- SDValue Casts[8];
- for (unsigned P = 0; P < 8; ++P) {
- SDValue Vec = DAG.getBuildVector(QuarterVT, SL, Parts[P]);
- Casts[P] = DAG.getNode(ISD::BITCAST, SL, QuarterIntVT, Vec);
- }
+ Lo = DAG.getNode(ISD::BITCAST, SL, MVT::i16, Lo);
+ Lo = DAG.getNode(ISD::ZERO_EXTEND, SL, MVT::i32, Lo);
- SDValue Blend =
- DAG.getBuildVector(MVT::getVectorVT(QuarterIntVT, 8), SL, Casts);
- return DAG.getNode(ISD::BITCAST, SL, VT, Blend);
+ SDValue Or = DAG.getNode(ISD::OR, SL, MVT::i32, Lo, ShlHi);
+ return DAG.getNode(ISD::BITCAST, SL, VT, Or);
}
- assert(VT == MVT::v2f16 || VT == MVT::v2i16 || VT == MVT::v2bf16);
- assert(!Subtarget->hasVOP3PInsts() && "this should be legal");
+ // Split into 2-element chunks.
+ const unsigned NumParts = VT.getVectorNumElements() / 2;
+ EVT PartVT = MVT::getVectorVT(VT.getVectorElementType().getSimpleVT(), 2);
+ MVT PartIntVT = MVT::getIntegerVT(PartVT.getSizeInBits());
- SDValue Lo = Op.getOperand(0);
- SDValue Hi = Op.getOperand(1);
-
- // Avoid adding defined bits with the zero_extend.
- if (Hi.isUndef()) {
- Lo = DAG.getNode(ISD::BITCAST, SL, MVT::i16, Lo);
- SDValue ExtLo = DAG.getNode(ISD::ANY_EXTEND, SL, MVT::i32, Lo);
- return DAG.getNode(ISD::BITCAST, SL, VT, ExtLo);
+ SmallVector<SDValue> Casts;
+ for (unsigned P = 0; P < NumParts; ++P) {
+ SDValue Vec = DAG.getBuildVector(
+ PartVT, SL, {Op.getOperand(P * 2), Op.getOperand(P * 2 + 1)});
+ Casts.push_back(DAG.getNode(ISD::BITCAST, SL, PartIntVT, Vec));
}
- Hi = DAG.getNode(ISD::BITCAST, SL, MVT::i16, Hi);
- Hi = DAG.getNode(ISD::ZERO_EXTEND, SL, MVT::i32, Hi);
-
- SDValue ShlHi = DAG.getNode(ISD::SHL, SL, MVT::i32, Hi,
- DAG.getConstant(16, SL, MVT::i32));
- if (Lo.isUndef())
- return DAG.getNode(ISD::BITCAST, SL, VT, ShlHi);
-
- Lo = DAG.getNode(ISD::BITCAST, SL, MVT::i16, Lo);
- Lo = DAG.getNode(ISD::ZERO_EXTEND, SL, MVT::i32, Lo);
-
- SDValue Or = DAG.getNode(ISD::OR, SL, MVT::i32, Lo, ShlHi);
- return DAG.getNode(ISD::BITCAST, SL, VT, Or);
+ SDValue Blend =
+ DAG.getBuildVector(MVT::getVectorVT(PartIntVT, NumParts), SL, Casts);
+ return DAG.getNode(ISD::BITCAST, SL, VT, Blend);
}
bool
@@ -10062,8 +10013,6 @@ SDValue SITargetLowering::bufferRsrcPtrToVector(SDValue MaybePointer,
if (!MaybePointer.getValueType().isScalarInteger())
return MaybePointer;
- SDLoc DL(MaybePointer);
-
SDValue Rsrc = DAG.getBitcast(MVT::v4i32, MaybePointer);
return Rsrc;
}
diff --git a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
index fd9fe1196b78..a56682726013 100644
--- a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
@@ -820,7 +820,7 @@ void WaitcntBrackets::updateByEvent(const SIInstrInfo *TII,
Inst.getOpcode() != AMDGPU::DS_CONSUME &&
Inst.getOpcode() != AMDGPU::DS_ORDERED_COUNT) {
for (const MachineOperand &Op : Inst.all_uses()) {
- if (Op.isReg() && TRI->isVectorRegister(*MRI, Op.getReg()))
+ if (TRI->isVectorRegister(*MRI, Op.getReg()))
setExpScore(&Inst, TRI, MRI, Op, CurrScore);
}
}
@@ -872,7 +872,7 @@ void WaitcntBrackets::updateByEvent(const SIInstrInfo *TII,
}
}
for (const MachineOperand &Op : Inst.all_uses()) {
- if (Op.isReg() && TRI->isVectorRegister(*MRI, Op.getReg()))
+ if (TRI->isVectorRegister(*MRI, Op.getReg()))
setExpScore(&Inst, TRI, MRI, Op, CurrScore);
}
}
@@ -2327,7 +2327,7 @@ bool SIInsertWaitcnts::shouldFlushVmCnt(MachineLoop *ML,
HasVMemStore = true;
}
for (const MachineOperand &Op : MI.all_uses()) {
- if (!Op.isReg() || !TRI->isVectorRegister(*MRI, Op.getReg()))
+ if (!TRI->isVectorRegister(*MRI, Op.getReg()))
continue;
RegInterval Interval = Brackets.getRegInterval(&MI, MRI, TRI, Op);
// Vgpr use
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index e4a679f6a3ef..30aa36be99c9 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -6231,10 +6231,9 @@ void SIInstrInfo::legalizeGenericOperand(MachineBasicBlock &InsertMBB,
return;
Register DstReg = MRI.createVirtualRegister(DstRC);
- auto Copy = BuildMI(InsertMBB, I, DL, get(AMDGPU::COPY), DstReg).add(Op);
-
+ auto Copy =
+ BuildMI(InsertMBB, I, DL, get(AMDGPU::COPY), DstReg).addReg(OpReg);
Op.setReg(DstReg);
- Op.setSubReg(0);
MachineInstr *Def = MRI.getVRegDef(OpReg);
if (!Def)
diff --git a/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp
index 42440bc36f24..fe26d6c2dd09 100644
--- a/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp
+++ b/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp
@@ -1589,7 +1589,7 @@ void ARMExpandPseudo::CMSESaveClearFPRegsV81(MachineBasicBlock &MBB,
BuildMI(MBB, MBBI, DL, TII->get(ARM::VSTMSDB_UPD), ARM::SP)
.addReg(ARM::SP)
.add(predOps(ARMCC::AL));
- for (int Reg = ARM::S16; Reg <= ARM::S31; ++Reg)
+ for (unsigned Reg = ARM::S16; Reg <= ARM::S31; ++Reg)
VPUSH.addReg(Reg);
// Clear FP registers with a VSCCLRM.
@@ -1794,7 +1794,7 @@ void ARMExpandPseudo::CMSERestoreFPRegsV81(
BuildMI(MBB, MBBI, DL, TII->get(ARM::VLDMSIA_UPD), ARM::SP)
.addReg(ARM::SP)
.add(predOps(ARMCC::AL));
- for (int Reg = ARM::S16; Reg <= ARM::S31; ++Reg)
+ for (unsigned Reg = ARM::S16; Reg <= ARM::S31; ++Reg)
VPOP.addReg(Reg, RegState::Define);
}
}
@@ -2044,13 +2044,14 @@ bool ARMExpandPseudo::ExpandCMP_SWAP_64(MachineBasicBlock &MBB,
static void CMSEPushCalleeSaves(const TargetInstrInfo &TII,
MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MBBI, int JumpReg,
- const LivePhysRegs &LiveRegs, bool Thumb1Only) {
+ MachineBasicBlock::iterator MBBI,
+ Register JumpReg, const LivePhysRegs &LiveRegs,
+ bool Thumb1Only) {
const DebugLoc &DL = MBBI->getDebugLoc();
if (Thumb1Only) { // push Lo and Hi regs separately
MachineInstrBuilder PushMIB =
BuildMI(MBB, MBBI, DL, TII.get(ARM::tPUSH)).add(predOps(ARMCC::AL));
- for (int Reg = ARM::R4; Reg < ARM::R8; ++Reg) {
+ for (unsigned Reg = ARM::R4; Reg < ARM::R8; ++Reg) {
PushMIB.addReg(
Reg, Reg == JumpReg || LiveRegs.contains(Reg) ? 0 : RegState::Undef);
}
@@ -2062,7 +2063,8 @@ static void CMSEPushCalleeSaves(const TargetInstrInfo &TII,
// memory, and allow us to later pop them with a single instructions.
// FIXME: Could also use any of r0-r3 that are free (including in the
// first PUSH above).
- for (int LoReg = ARM::R7, HiReg = ARM::R11; LoReg >= ARM::R4; --LoReg) {
+ for (unsigned LoReg = ARM::R7, HiReg = ARM::R11; LoReg >= ARM::R4;
+ --LoReg) {
if (JumpReg == LoReg)
continue;
BuildMI(MBB, MBBI, DL, TII.get(ARM::tMOVr), LoReg)
@@ -2072,7 +2074,7 @@ static void CMSEPushCalleeSaves(const TargetInstrInfo &TII,
}
MachineInstrBuilder PushMIB2 =
BuildMI(MBB, MBBI, DL, TII.get(ARM::tPUSH)).add(predOps(ARMCC::AL));
- for (int Reg = ARM::R4; Reg < ARM::R8; ++Reg) {
+ for (unsigned Reg = ARM::R4; Reg < ARM::R8; ++Reg) {
if (Reg == JumpReg)
continue;
PushMIB2.addReg(Reg, RegState::Kill);
@@ -2082,7 +2084,7 @@ static void CMSEPushCalleeSaves(const TargetInstrInfo &TII,
// the JumpReg), use r4 or r5, whichever is not JumpReg. It has already been
// saved.
if (JumpReg >= ARM::R4 && JumpReg <= ARM::R7) {
- int LoReg = JumpReg == ARM::R4 ? ARM::R5 : ARM::R4;
+ Register LoReg = JumpReg == ARM::R4 ? ARM::R5 : ARM::R4;
BuildMI(MBB, MBBI, DL, TII.get(ARM::tMOVr), LoReg)
.addReg(ARM::R8, LiveRegs.contains(ARM::R8) ? 0 : RegState::Undef)
.add(predOps(ARMCC::AL));
@@ -2095,7 +2097,7 @@ static void CMSEPushCalleeSaves(const TargetInstrInfo &TII,
BuildMI(MBB, MBBI, DL, TII.get(ARM::t2STMDB_UPD), ARM::SP)
.addReg(ARM::SP)
.add(predOps(ARMCC::AL));
- for (int Reg = ARM::R4; Reg < ARM::R12; ++Reg) {
+ for (unsigned Reg = ARM::R4; Reg < ARM::R12; ++Reg) {
PushMIB.addReg(
Reg, Reg == JumpReg || LiveRegs.contains(Reg) ? 0 : RegState::Undef);
}
@@ -2125,7 +2127,7 @@ static void CMSEPopCalleeSaves(const TargetInstrInfo &TII,
BuildMI(MBB, MBBI, DL, TII.get(ARM::t2LDMIA_UPD), ARM::SP)
.addReg(ARM::SP)
.add(predOps(ARMCC::AL));
- for (int Reg = ARM::R4; Reg < ARM::R12; ++Reg)
+ for (unsigned Reg = ARM::R4; Reg < ARM::R12; ++Reg)
PopMIB.addReg(Reg, RegState::Define);
}
}
diff --git a/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
index 3e3f134d3470..7d74f86c164f 100644
--- a/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
+++ b/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
@@ -712,7 +712,7 @@ public:
bool parseRegister(MCRegister &Reg, SMLoc &StartLoc, SMLoc &EndLoc) override;
ParseStatus tryParseRegister(MCRegister &Reg, SMLoc &StartLoc,
SMLoc &EndLoc) override;
- bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
+ bool parseInstruction(ParseInstructionInfo &Info, StringRef Name,
SMLoc NameLoc, OperandVector &Operands) override;
bool ParseDirective(AsmToken DirectiveID) override;
@@ -723,7 +723,7 @@ public:
checkEarlyTargetMatchPredicate(MCInst &Inst,
const OperandVector &Operands) override;
- bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
+ bool matchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
OperandVector &Operands, MCStreamer &Out,
uint64_t &ErrorInfo,
bool MatchingInlineAsm) override;
@@ -7051,7 +7051,7 @@ void removeVPTCondCode(OperandVector &Operands, unsigned &MnemonicOpsEndInd) {
}
/// Parse an arm instruction mnemonic followed by its operands.
-bool ARMAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
+bool ARMAsmParser::parseInstruction(ParseInstructionInfo &Info, StringRef Name,
SMLoc NameLoc, OperandVector &Operands) {
MCAsmParser &Parser = getParser();
@@ -11350,7 +11350,7 @@ static std::string ARMMnemonicSpellCheck(StringRef S, const FeatureBitset &FBS,
unsigned VariantID = 0);
static const char *getSubtargetFeatureName(uint64_t Val);
-bool ARMAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
+bool ARMAsmParser::matchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
OperandVector &Operands,
MCStreamer &Out, uint64_t &ErrorInfo,
bool MatchingInlineAsm) {
@@ -11427,7 +11427,7 @@ bool ARMAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
llvm_unreachable("Implement any new match types added!");
}
-/// parseDirective parses the arm specific directives
+/// ParseDirective parses the arm specific directives
bool ARMAsmParser::ParseDirective(AsmToken DirectiveID) {
const MCContext::Environment Format = getContext().getObjectFileType();
bool IsMachO = Format == MCContext::IsMachO;
@@ -12120,7 +12120,7 @@ bool ARMAsmParser::parseDirectiveSetFP(SMLoc L) {
return false;
}
-/// parseDirective
+/// parseDirectivePad
/// ::= .pad offset
bool ARMAsmParser::parseDirectivePad(SMLoc L) {
MCAsmParser &Parser = getParser();
diff --git a/llvm/lib/Target/AVR/AsmParser/AVRAsmParser.cpp b/llvm/lib/Target/AVR/AsmParser/AVRAsmParser.cpp
index 193722fa3561..b4971e43b48e 100644
--- a/llvm/lib/Target/AVR/AsmParser/AVRAsmParser.cpp
+++ b/llvm/lib/Target/AVR/AsmParser/AVRAsmParser.cpp
@@ -51,7 +51,7 @@ class AVRAsmParser : public MCTargetAsmParser {
#define GET_ASSEMBLER_HEADER
#include "AVRGenAsmMatcher.inc"
- bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
+ bool matchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
OperandVector &Operands, MCStreamer &Out,
uint64_t &ErrorInfo,
bool MatchingInlineAsm) override;
@@ -60,7 +60,7 @@ class AVRAsmParser : public MCTargetAsmParser {
ParseStatus tryParseRegister(MCRegister &Reg, SMLoc &StartLoc,
SMLoc &EndLoc) override;
- bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
+ bool parseInstruction(ParseInstructionInfo &Info, StringRef Name,
SMLoc NameLoc, OperandVector &Operands) override;
ParseStatus parseDirective(AsmToken DirectiveID) override;
@@ -320,7 +320,7 @@ bool AVRAsmParser::emit(MCInst &Inst, SMLoc const &Loc, MCStreamer &Out) const {
return false;
}
-bool AVRAsmParser::MatchAndEmitInstruction(SMLoc Loc, unsigned &Opcode,
+bool AVRAsmParser::matchAndEmitInstruction(SMLoc Loc, unsigned &Opcode,
OperandVector &Operands,
MCStreamer &Out, uint64_t &ErrorInfo,
bool MatchingInlineAsm) {
@@ -623,7 +623,7 @@ void AVRAsmParser::eatComma() {
}
}
-bool AVRAsmParser::ParseInstruction(ParseInstructionInfo &Info,
+bool AVRAsmParser::parseInstruction(ParseInstructionInfo &Info,
StringRef Mnemonic, SMLoc NameLoc,
OperandVector &Operands) {
Operands.push_back(AVROperand::CreateToken(Mnemonic, NameLoc));
diff --git a/llvm/lib/Target/BPF/AsmParser/BPFAsmParser.cpp b/llvm/lib/Target/BPF/AsmParser/BPFAsmParser.cpp
index 9672ed009e9b..06b7743e0cd3 100644
--- a/llvm/lib/Target/BPF/AsmParser/BPFAsmParser.cpp
+++ b/llvm/lib/Target/BPF/AsmParser/BPFAsmParser.cpp
@@ -34,7 +34,7 @@ class BPFAsmParser : public MCTargetAsmParser {
bool PreMatchCheck(OperandVector &Operands);
- bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
+ bool matchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
OperandVector &Operands, MCStreamer &Out,
uint64_t &ErrorInfo,
bool MatchingInlineAsm) override;
@@ -43,7 +43,7 @@ class BPFAsmParser : public MCTargetAsmParser {
ParseStatus tryParseRegister(MCRegister &Reg, SMLoc &StartLoc,
SMLoc &EndLoc) override;
- bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
+ bool parseInstruction(ParseInstructionInfo &Info, StringRef Name,
SMLoc NameLoc, OperandVector &Operands) override;
// "=" is used as assignment operator for assembly statment, so can't be used
@@ -304,7 +304,7 @@ bool BPFAsmParser::PreMatchCheck(OperandVector &Operands) {
return false;
}
-bool BPFAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
+bool BPFAsmParser::matchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
OperandVector &Operands,
MCStreamer &Out, uint64_t &ErrorInfo,
bool MatchingInlineAsm) {
@@ -483,9 +483,8 @@ ParseStatus BPFAsmParser::parseImmediate(OperandVector &Operands) {
return ParseStatus::Success;
}
-/// ParseInstruction - Parse an BPF instruction which is in BPF verifier
-/// format.
-bool BPFAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
+/// Parse an BPF instruction which is in BPF verifier format.
+bool BPFAsmParser::parseInstruction(ParseInstructionInfo &Info, StringRef Name,
SMLoc NameLoc, OperandVector &Operands) {
// The first operand could be either register or actually an operator.
unsigned RegNo = MatchRegisterName(Name);
diff --git a/llvm/lib/Target/CSKY/AsmParser/CSKYAsmParser.cpp b/llvm/lib/Target/CSKY/AsmParser/CSKYAsmParser.cpp
index 30bd3dcefa60..d923c96bc008 100644
--- a/llvm/lib/Target/CSKY/AsmParser/CSKYAsmParser.cpp
+++ b/llvm/lib/Target/CSKY/AsmParser/CSKYAsmParser.cpp
@@ -67,14 +67,14 @@ class CSKYAsmParser : public MCTargetAsmParser {
SMLoc getLoc() const { return getParser().getTok().getLoc(); }
- bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
+ bool matchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
OperandVector &Operands, MCStreamer &Out,
uint64_t &ErrorInfo,
bool MatchingInlineAsm) override;
bool parseRegister(MCRegister &Reg, SMLoc &StartLoc, SMLoc &EndLoc) override;
- bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
+ bool parseInstruction(ParseInstructionInfo &Info, StringRef Name,
SMLoc NameLoc, OperandVector &Operands) override;
ParseStatus parseDirective(AsmToken DirectiveID) override;
@@ -656,7 +656,7 @@ bool CSKYAsmParser::generateImmOutOfRangeError(
return Error(ErrorLoc, Msg + " [" + Twine(Lower) + ", " + Twine(Upper) + "]");
}
-bool CSKYAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
+bool CSKYAsmParser::matchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
OperandVector &Operands,
MCStreamer &Out,
uint64_t &ErrorInfo,
@@ -1485,7 +1485,7 @@ ParseStatus CSKYAsmParser::parseRegList(OperandVector &Operands) {
return ParseStatus::Success;
}
-bool CSKYAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
+bool CSKYAsmParser::parseInstruction(ParseInstructionInfo &Info, StringRef Name,
SMLoc NameLoc, OperandVector &Operands) {
// First operand is token for instruction.
Operands.push_back(CSKYOperand::createToken(Name, NameLoc));
diff --git a/llvm/lib/Target/DirectX/DXIL.td b/llvm/lib/Target/DirectX/DXIL.td
index 902ab37bf741..9aa0af3e3a6b 100644
--- a/llvm/lib/Target/DirectX/DXIL.td
+++ b/llvm/lib/Target/DirectX/DXIL.td
@@ -553,6 +553,17 @@ def Rbits : DXILOp<30, unary> {
let attributes = [Attributes<DXIL1_0, [ReadNone]>];
}
+def CBits : DXILOp<31, unary> {
+ let Doc = "Returns the number of 1 bits in the specified value.";
+ let LLVMIntrinsic = int_ctpop;
+ let arguments = [OverloadTy];
+ let result = OverloadTy;
+ let overloads =
+ [Overloads<DXIL1_0, [Int16Ty, Int32Ty, Int64Ty]>];
+ let stages = [Stages<DXIL1_0, [all_stages]>];
+ let attributes = [Attributes<DXIL1_0, [ReadNone]>];
+}
+
def FMax : DXILOp<35, binary> {
let Doc = "Float maximum. FMax(a,b) = a > b ? a : b";
let LLVMIntrinsic = int_maxnum;
diff --git a/llvm/lib/Target/Hexagon/AsmParser/HexagonAsmParser.cpp b/llvm/lib/Target/Hexagon/AsmParser/HexagonAsmParser.cpp
index 2fb1c484fc8a..62f188957ccc 100644
--- a/llvm/lib/Target/Hexagon/AsmParser/HexagonAsmParser.cpp
+++ b/llvm/lib/Target/Hexagon/AsmParser/HexagonAsmParser.cpp
@@ -134,7 +134,7 @@ class HexagonAsmParser : public MCTargetAsmParser {
OperandVector &InstOperands, uint64_t &ErrorInfo,
bool MatchingInlineAsm);
void eatToEndOfPacket();
- bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
+ bool matchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
OperandVector &Operands, MCStreamer &Out,
uint64_t &ErrorInfo,
bool MatchingInlineAsm) override;
@@ -180,12 +180,12 @@ public:
bool parseExpressionOrOperand(OperandVector &Operands);
bool parseExpression(MCExpr const *&Expr);
- bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
+ bool parseInstruction(ParseInstructionInfo &Info, StringRef Name,
SMLoc NameLoc, OperandVector &Operands) override {
llvm_unreachable("Unimplemented");
}
- bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name, AsmToken ID,
+ bool parseInstruction(ParseInstructionInfo &Info, StringRef Name, AsmToken ID,
OperandVector &Operands) override;
bool ParseDirective(AsmToken DirectiveID) override;
@@ -614,7 +614,7 @@ void HexagonAsmParser::eatToEndOfPacket() {
InBrackets = false;
}
-bool HexagonAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
+bool HexagonAsmParser::matchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
OperandVector &Operands,
MCStreamer &Out,
uint64_t &ErrorInfo,
@@ -1278,7 +1278,7 @@ bool HexagonAsmParser::parseInstruction(OperandVector &Operands) {
}
}
-bool HexagonAsmParser::ParseInstruction(ParseInstructionInfo &Info,
+bool HexagonAsmParser::parseInstruction(ParseInstructionInfo &Info,
StringRef Name, AsmToken ID,
OperandVector &Operands) {
getLexer().UnLex(ID);
diff --git a/llvm/lib/Target/Lanai/AsmParser/LanaiAsmParser.cpp b/llvm/lib/Target/Lanai/AsmParser/LanaiAsmParser.cpp
index 6ab1375b974e..280f1f3ddbb6 100644
--- a/llvm/lib/Target/Lanai/AsmParser/LanaiAsmParser.cpp
+++ b/llvm/lib/Target/Lanai/AsmParser/LanaiAsmParser.cpp
@@ -62,14 +62,14 @@ class LanaiAsmParser : public MCTargetAsmParser {
bool parsePrePost(StringRef Type, int *OffsetValue);
- bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
+ bool parseInstruction(ParseInstructionInfo &Info, StringRef Name,
SMLoc NameLoc, OperandVector &Operands) override;
bool parseRegister(MCRegister &Reg, SMLoc &StartLoc, SMLoc &EndLoc) override;
ParseStatus tryParseRegister(MCRegister &Reg, SMLoc &StartLoc,
SMLoc &EndLoc) override;
- bool MatchAndEmitInstruction(SMLoc IdLoc, unsigned &Opcode,
+ bool matchAndEmitInstruction(SMLoc IdLoc, unsigned &Opcode,
OperandVector &Operands, MCStreamer &Out,
uint64_t &ErrorInfo,
bool MatchingInlineAsm) override;
@@ -645,7 +645,7 @@ public:
} // end anonymous namespace
-bool LanaiAsmParser::MatchAndEmitInstruction(SMLoc IdLoc, unsigned &Opcode,
+bool LanaiAsmParser::matchAndEmitInstruction(SMLoc IdLoc, unsigned &Opcode,
OperandVector &Operands,
MCStreamer &Out,
uint64_t &ErrorInfo,
@@ -1161,7 +1161,7 @@ static bool MaybePredicatedInst(const OperandVector &Operands) {
.Default(false);
}
-bool LanaiAsmParser::ParseInstruction(ParseInstructionInfo & /*Info*/,
+bool LanaiAsmParser::parseInstruction(ParseInstructionInfo & /*Info*/,
StringRef Name, SMLoc NameLoc,
OperandVector &Operands) {
// First operand is token for instruction
diff --git a/llvm/lib/Target/LoongArch/AsmParser/LoongArchAsmParser.cpp b/llvm/lib/Target/LoongArch/AsmParser/LoongArchAsmParser.cpp
index b8f1cdfd2cb3..57c42024b4d2 100644
--- a/llvm/lib/Target/LoongArch/AsmParser/LoongArchAsmParser.cpp
+++ b/llvm/lib/Target/LoongArch/AsmParser/LoongArchAsmParser.cpp
@@ -47,10 +47,10 @@ class LoongArchAsmParser : public MCTargetAsmParser {
ParseStatus tryParseRegister(MCRegister &Reg, SMLoc &StartLoc,
SMLoc &EndLoc) override;
- bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
+ bool parseInstruction(ParseInstructionInfo &Info, StringRef Name,
SMLoc NameLoc, OperandVector &Operands) override;
- bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
+ bool matchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
OperandVector &Operands, MCStreamer &Out,
uint64_t &ErrorInfo,
bool MatchingInlineAsm) override;
@@ -65,7 +65,7 @@ class LoongArchAsmParser : public MCTargetAsmParser {
const Twine &Msg);
/// Helper for processing MC instructions that have been successfully matched
- /// by MatchAndEmitInstruction.
+ /// by matchAndEmitInstruction.
bool processInstruction(MCInst &Inst, SMLoc IDLoc, OperandVector &Operands,
MCStreamer &Out);
@@ -793,7 +793,7 @@ bool LoongArchAsmParser::parseOperand(OperandVector &Operands,
return Error(getLoc(), "unknown operand");
}
-bool LoongArchAsmParser::ParseInstruction(ParseInstructionInfo &Info,
+bool LoongArchAsmParser::parseInstruction(ParseInstructionInfo &Info,
StringRef Name, SMLoc NameLoc,
OperandVector &Operands) {
// First operand in MCInst is instruction mnemonic.
@@ -1506,7 +1506,7 @@ bool LoongArchAsmParser::generateImmOutOfRangeError(
return Error(ErrorLoc, Msg + " [" + Twine(Lower) + ", " + Twine(Upper) + "]");
}
-bool LoongArchAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
+bool LoongArchAsmParser::matchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
OperandVector &Operands,
MCStreamer &Out,
uint64_t &ErrorInfo,
diff --git a/llvm/lib/Target/M68k/AsmParser/M68kAsmParser.cpp b/llvm/lib/Target/M68k/AsmParser/M68kAsmParser.cpp
index 126176133dc0..3a0d9dd316d8 100644
--- a/llvm/lib/Target/M68k/AsmParser/M68kAsmParser.cpp
+++ b/llvm/lib/Target/M68k/AsmParser/M68kAsmParser.cpp
@@ -69,9 +69,9 @@ public:
bool parseRegister(MCRegister &Reg, SMLoc &StartLoc, SMLoc &EndLoc) override;
ParseStatus tryParseRegister(MCRegister &Reg, SMLoc &StartLoc,
SMLoc &EndLoc) override;
- bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
+ bool parseInstruction(ParseInstructionInfo &Info, StringRef Name,
SMLoc NameLoc, OperandVector &Operands) override;
- bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
+ bool matchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
OperandVector &Operands, MCStreamer &Out,
uint64_t &ErrorInfo,
bool MatchingInlineAsm) override;
@@ -959,7 +959,7 @@ void M68kAsmParser::eatComma() {
}
}
-bool M68kAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
+bool M68kAsmParser::parseInstruction(ParseInstructionInfo &Info, StringRef Name,
SMLoc NameLoc, OperandVector &Operands) {
SMLoc Start = getLexer().getLoc();
Operands.push_back(M68kOperand::createToken(Name, Start, Start));
@@ -1024,7 +1024,7 @@ bool M68kAsmParser::emit(MCInst &Inst, SMLoc const &Loc,
return false;
}
-bool M68kAsmParser::MatchAndEmitInstruction(SMLoc Loc, unsigned &Opcode,
+bool M68kAsmParser::matchAndEmitInstruction(SMLoc Loc, unsigned &Opcode,
OperandVector &Operands,
MCStreamer &Out,
uint64_t &ErrorInfo,
diff --git a/llvm/lib/Target/MSP430/AsmParser/MSP430AsmParser.cpp b/llvm/lib/Target/MSP430/AsmParser/MSP430AsmParser.cpp
index 2bc1a89ef59c..34ae80669f2c 100644
--- a/llvm/lib/Target/MSP430/AsmParser/MSP430AsmParser.cpp
+++ b/llvm/lib/Target/MSP430/AsmParser/MSP430AsmParser.cpp
@@ -40,7 +40,7 @@ class MSP430AsmParser : public MCTargetAsmParser {
MCAsmParser &Parser;
const MCRegisterInfo *MRI;
- bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
+ bool matchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
OperandVector &Operands, MCStreamer &Out,
uint64_t &ErrorInfo,
bool MatchingInlineAsm) override;
@@ -49,7 +49,7 @@ class MSP430AsmParser : public MCTargetAsmParser {
ParseStatus tryParseRegister(MCRegister &Reg, SMLoc &StartLoc,
SMLoc &EndLoc) override;
- bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
+ bool parseInstruction(ParseInstructionInfo &Info, StringRef Name,
SMLoc NameLoc, OperandVector &Operands) override;
ParseStatus parseDirective(AsmToken DirectiveID) override;
@@ -252,7 +252,7 @@ public:
};
} // end anonymous namespace
-bool MSP430AsmParser::MatchAndEmitInstruction(SMLoc Loc, unsigned &Opcode,
+bool MSP430AsmParser::matchAndEmitInstruction(SMLoc Loc, unsigned &Opcode,
OperandVector &Operands,
MCStreamer &Out,
uint64_t &ErrorInfo,
@@ -385,7 +385,7 @@ bool MSP430AsmParser::parseJccInstruction(ParseInstructionInfo &Info,
return false;
}
-bool MSP430AsmParser::ParseInstruction(ParseInstructionInfo &Info,
+bool MSP430AsmParser::parseInstruction(ParseInstructionInfo &Info,
StringRef Name, SMLoc NameLoc,
OperandVector &Operands) {
// Drop .w suffix
diff --git a/llvm/lib/Target/Mips/AsmParser/MipsAsmParser.cpp b/llvm/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
index 8ab435c6c6fd..7888c57363ed 100644
--- a/llvm/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
+++ b/llvm/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
@@ -174,7 +174,7 @@ class MipsAsmParser : public MCTargetAsmParser {
const OperandVector &Operands) override;
unsigned checkTargetMatchPredicate(MCInst &Inst) override;
- bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
+ bool matchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
OperandVector &Operands, MCStreamer &Out,
uint64_t &ErrorInfo,
bool MatchingInlineAsm) override;
@@ -190,7 +190,7 @@ class MipsAsmParser : public MCTargetAsmParser {
bool mnemonicIsValid(StringRef Mnemonic, unsigned VariantID);
- bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
+ bool parseInstruction(ParseInstructionInfo &Info, StringRef Name,
SMLoc NameLoc, OperandVector &Operands) override;
bool ParseDirective(AsmToken DirectiveID) override;
@@ -5992,7 +5992,7 @@ static SMLoc RefineErrorLoc(const SMLoc Loc, const OperandVector &Operands,
return Loc;
}
-bool MipsAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
+bool MipsAsmParser::matchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
OperandVector &Operands,
MCStreamer &Out,
uint64_t &ErrorInfo,
@@ -6997,10 +6997,10 @@ bool MipsAsmParser::areEqualRegs(const MCParsedAsmOperand &Op1,
return Op1.getReg() == Op2.getReg();
}
-bool MipsAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
+bool MipsAsmParser::parseInstruction(ParseInstructionInfo &Info, StringRef Name,
SMLoc NameLoc, OperandVector &Operands) {
MCAsmParser &Parser = getParser();
- LLVM_DEBUG(dbgs() << "ParseInstruction\n");
+ LLVM_DEBUG(dbgs() << "parseInstruction\n");
// We have reached first instruction, module directive are now forbidden.
getTargetStreamer().forbidModuleDirective();
diff --git a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
index c5a40e430886..31a5e937adae 100644
--- a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
@@ -6179,6 +6179,7 @@ static void ReplaceLoadVector(SDNode *N, SelectionDAG &DAG,
case MVT::v4i16:
case MVT::v4i32:
case MVT::v4f16:
+ case MVT::v4bf16:
case MVT::v4f32:
case MVT::v8f16: // <4 x f16x2>
case MVT::v8bf16: // <4 x bf16x2>
diff --git a/llvm/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp b/llvm/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp
index 59ad995b44b0..597a976b076a 100644
--- a/llvm/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp
+++ b/llvm/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp
@@ -103,32 +103,32 @@ class PPCAsmParser : public MCTargetAsmParser {
bool isPPC64() const { return IsPPC64; }
- bool MatchRegisterName(MCRegister &RegNo, int64_t &IntVal);
+ bool matchRegisterName(MCRegister &RegNo, int64_t &IntVal);
bool parseRegister(MCRegister &Reg, SMLoc &StartLoc, SMLoc &EndLoc) override;
ParseStatus tryParseRegister(MCRegister &Reg, SMLoc &StartLoc,
SMLoc &EndLoc) override;
- const MCExpr *ExtractModifierFromExpr(const MCExpr *E,
+ const MCExpr *extractModifierFromExpr(const MCExpr *E,
PPCMCExpr::VariantKind &Variant);
- const MCExpr *FixupVariantKind(const MCExpr *E);
- bool ParseExpression(const MCExpr *&EVal);
+ const MCExpr *fixupVariantKind(const MCExpr *E);
+ bool parseExpression(const MCExpr *&EVal);
- bool ParseOperand(OperandVector &Operands);
+ bool parseOperand(OperandVector &Operands);
- bool ParseDirectiveWord(unsigned Size, AsmToken ID);
- bool ParseDirectiveTC(unsigned Size, AsmToken ID);
- bool ParseDirectiveMachine(SMLoc L);
- bool ParseDirectiveAbiVersion(SMLoc L);
- bool ParseDirectiveLocalEntry(SMLoc L);
- bool ParseGNUAttribute(SMLoc L);
+ bool parseDirectiveWord(unsigned Size, AsmToken ID);
+ bool parseDirectiveTC(unsigned Size, AsmToken ID);
+ bool parseDirectiveMachine(SMLoc L);
+ bool parseDirectiveAbiVersion(SMLoc L);
+ bool parseDirectiveLocalEntry(SMLoc L);
+ bool parseGNUAttribute(SMLoc L);
- bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
+ bool matchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
OperandVector &Operands, MCStreamer &Out,
uint64_t &ErrorInfo,
bool MatchingInlineAsm) override;
- void ProcessInstruction(MCInst &Inst, const OperandVector &Ops);
+ void processInstruction(MCInst &Inst, const OperandVector &Ops);
/// @name Auto-generated Match Functions
/// {
@@ -150,7 +150,7 @@ public:
setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits()));
}
- bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
+ bool parseInstruction(ParseInstructionInfo &Info, StringRef Name,
SMLoc NameLoc, OperandVector &Operands) override;
bool ParseDirective(AsmToken DirectiveID) override;
@@ -818,7 +818,7 @@ addNegOperand(MCInst &Inst, MCOperand &Op, MCContext &Ctx) {
Inst.addOperand(MCOperand::createExpr(MCUnaryExpr::createMinus(Expr, Ctx)));
}
-void PPCAsmParser::ProcessInstruction(MCInst &Inst,
+void PPCAsmParser::processInstruction(MCInst &Inst,
const OperandVector &Operands) {
int Opcode = Inst.getOpcode();
switch (Opcode) {
@@ -1252,7 +1252,7 @@ void PPCAsmParser::ProcessInstruction(MCInst &Inst,
static std::string PPCMnemonicSpellCheck(StringRef S, const FeatureBitset &FBS,
unsigned VariantID = 0);
-bool PPCAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
+bool PPCAsmParser::matchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
OperandVector &Operands,
MCStreamer &Out, uint64_t &ErrorInfo,
bool MatchingInlineAsm) {
@@ -1261,7 +1261,7 @@ bool PPCAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
switch (MatchInstructionImpl(Operands, Inst, ErrorInfo, MatchingInlineAsm)) {
case Match_Success:
// Post-process instructions (typically extended mnemonics)
- ProcessInstruction(Inst, Operands);
+ processInstruction(Inst, Operands);
Inst.setLoc(IDLoc);
Out.emitInstruction(Inst, getSTI());
return false;
@@ -1291,7 +1291,7 @@ bool PPCAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
llvm_unreachable("Implement any new match types added!");
}
-bool PPCAsmParser::MatchRegisterName(MCRegister &RegNo, int64_t &IntVal) {
+bool PPCAsmParser::matchRegisterName(MCRegister &RegNo, int64_t &IntVal) {
if (getParser().getTok().is(AsmToken::Percent))
getParser().Lex(); // Eat the '%'.
@@ -1364,7 +1364,7 @@ ParseStatus PPCAsmParser::tryParseRegister(MCRegister &Reg, SMLoc &StartLoc,
EndLoc = Tok.getEndLoc();
Reg = PPC::NoRegister;
int64_t IntVal;
- if (MatchRegisterName(Reg, IntVal))
+ if (matchRegisterName(Reg, IntVal))
return ParseStatus::NoMatch;
return ParseStatus::Success;
}
@@ -1375,9 +1375,9 @@ ParseStatus PPCAsmParser::tryParseRegister(MCRegister &Reg, SMLoc &StartLoc,
/// variant, return the corresponding PPCMCExpr::VariantKind,
/// and a modified expression using the default symbol variant.
/// Otherwise, return NULL.
-const MCExpr *PPCAsmParser::
-ExtractModifierFromExpr(const MCExpr *E,
- PPCMCExpr::VariantKind &Variant) {
+const MCExpr *
+PPCAsmParser::extractModifierFromExpr(const MCExpr *E,
+ PPCMCExpr::VariantKind &Variant) {
MCContext &Context = getParser().getContext();
Variant = PPCMCExpr::VK_PPC_None;
@@ -1426,7 +1426,7 @@ ExtractModifierFromExpr(const MCExpr *E,
case MCExpr::Unary: {
const MCUnaryExpr *UE = cast<MCUnaryExpr>(E);
- const MCExpr *Sub = ExtractModifierFromExpr(UE->getSubExpr(), Variant);
+ const MCExpr *Sub = extractModifierFromExpr(UE->getSubExpr(), Variant);
if (!Sub)
return nullptr;
return MCUnaryExpr::create(UE->getOpcode(), Sub, Context);
@@ -1435,8 +1435,8 @@ ExtractModifierFromExpr(const MCExpr *E,
case MCExpr::Binary: {
const MCBinaryExpr *BE = cast<MCBinaryExpr>(E);
PPCMCExpr::VariantKind LHSVariant, RHSVariant;
- const MCExpr *LHS = ExtractModifierFromExpr(BE->getLHS(), LHSVariant);
- const MCExpr *RHS = ExtractModifierFromExpr(BE->getRHS(), RHSVariant);
+ const MCExpr *LHS = extractModifierFromExpr(BE->getLHS(), LHSVariant);
+ const MCExpr *RHS = extractModifierFromExpr(BE->getRHS(), RHSVariant);
if (!LHS && !RHS)
return nullptr;
@@ -1464,8 +1464,7 @@ ExtractModifierFromExpr(const MCExpr *E,
/// them by VK_PPC_TLSGD/VK_PPC_TLSLD. This is necessary to avoid having
/// _GLOBAL_OFFSET_TABLE_ created via ELFObjectWriter::RelocNeedsGOT.
/// FIXME: This is a hack.
-const MCExpr *PPCAsmParser::
-FixupVariantKind(const MCExpr *E) {
+const MCExpr *PPCAsmParser::fixupVariantKind(const MCExpr *E) {
MCContext &Context = getParser().getContext();
switch (E->getKind()) {
@@ -1492,7 +1491,7 @@ FixupVariantKind(const MCExpr *E) {
case MCExpr::Unary: {
const MCUnaryExpr *UE = cast<MCUnaryExpr>(E);
- const MCExpr *Sub = FixupVariantKind(UE->getSubExpr());
+ const MCExpr *Sub = fixupVariantKind(UE->getSubExpr());
if (Sub == UE->getSubExpr())
return E;
return MCUnaryExpr::create(UE->getOpcode(), Sub, Context);
@@ -1500,8 +1499,8 @@ FixupVariantKind(const MCExpr *E) {
case MCExpr::Binary: {
const MCBinaryExpr *BE = cast<MCBinaryExpr>(E);
- const MCExpr *LHS = FixupVariantKind(BE->getLHS());
- const MCExpr *RHS = FixupVariantKind(BE->getRHS());
+ const MCExpr *LHS = fixupVariantKind(BE->getLHS());
+ const MCExpr *RHS = fixupVariantKind(BE->getRHS());
if (LHS == BE->getLHS() && RHS == BE->getRHS())
return E;
return MCBinaryExpr::create(BE->getOpcode(), LHS, RHS, Context);
@@ -1511,29 +1510,27 @@ FixupVariantKind(const MCExpr *E) {
llvm_unreachable("Invalid expression kind!");
}
-/// ParseExpression. This differs from the default "parseExpression" in that
-/// it handles modifiers.
-bool PPCAsmParser::
-ParseExpression(const MCExpr *&EVal) {
+/// This differs from the default "parseExpression" in that it handles
+/// modifiers.
+bool PPCAsmParser::parseExpression(const MCExpr *&EVal) {
// (ELF Platforms)
// Handle \code @l/@ha \endcode
if (getParser().parseExpression(EVal))
return true;
- EVal = FixupVariantKind(EVal);
+ EVal = fixupVariantKind(EVal);
PPCMCExpr::VariantKind Variant;
- const MCExpr *E = ExtractModifierFromExpr(EVal, Variant);
+ const MCExpr *E = extractModifierFromExpr(EVal, Variant);
if (E)
EVal = PPCMCExpr::create(Variant, E, getParser().getContext());
return false;
}
-/// ParseOperand
/// This handles registers in the form 'NN', '%rNN' for ELF platforms and
/// rNN for MachO.
-bool PPCAsmParser::ParseOperand(OperandVector &Operands) {
+bool PPCAsmParser::parseOperand(OperandVector &Operands) {
MCAsmParser &Parser = getParser();
SMLoc S = Parser.getTok().getLoc();
SMLoc E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1);
@@ -1546,7 +1543,7 @@ bool PPCAsmParser::ParseOperand(OperandVector &Operands) {
case AsmToken::Percent: {
MCRegister RegNo;
int64_t IntVal;
- if (MatchRegisterName(RegNo, IntVal))
+ if (matchRegisterName(RegNo, IntVal))
return Error(S, "invalid register name");
Operands.push_back(PPCOperand::CreateImm(IntVal, S, E, isPPC64()));
@@ -1561,7 +1558,7 @@ bool PPCAsmParser::ParseOperand(OperandVector &Operands) {
case AsmToken::Dollar:
case AsmToken::Exclaim:
case AsmToken::Tilde:
- if (!ParseExpression(EVal))
+ if (!parseExpression(EVal))
break;
// Fall-through
[[fallthrough]];
@@ -1589,7 +1586,7 @@ bool PPCAsmParser::ParseOperand(OperandVector &Operands) {
if (TlsCall && parseOptionalToken(AsmToken::LParen)) {
const MCExpr *TLSSym;
const SMLoc S2 = Parser.getTok().getLoc();
- if (ParseExpression(TLSSym))
+ if (parseExpression(TLSSym))
return Error(S2, "invalid TLS call expression");
E = Parser.getTok().getLoc();
if (parseToken(AsmToken::RParen, "expected ')'"))
@@ -1631,7 +1628,7 @@ bool PPCAsmParser::ParseOperand(OperandVector &Operands) {
switch (getLexer().getKind()) {
case AsmToken::Percent: {
MCRegister RegNo;
- if (MatchRegisterName(RegNo, IntVal))
+ if (matchRegisterName(RegNo, IntVal))
return Error(S, "invalid register name");
break;
}
@@ -1655,7 +1652,7 @@ bool PPCAsmParser::ParseOperand(OperandVector &Operands) {
}
/// Parse an instruction mnemonic followed by its operands.
-bool PPCAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
+bool PPCAsmParser::parseInstruction(ParseInstructionInfo &Info, StringRef Name,
SMLoc NameLoc, OperandVector &Operands) {
// The first operand is the token for the instruction name.
// If the next character is a '+' or '-', we need to add it to the
@@ -1695,11 +1692,11 @@ bool PPCAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
return false;
// Parse the first operand
- if (ParseOperand(Operands))
+ if (parseOperand(Operands))
return true;
while (!parseOptionalToken(AsmToken::EndOfStatement)) {
- if (parseToken(AsmToken::Comma) || ParseOperand(Operands))
+ if (parseToken(AsmToken::Comma) || parseOperand(Operands))
return true;
}
@@ -1731,31 +1728,30 @@ bool PPCAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
return false;
}
-/// ParseDirective parses the PPC specific directives
+/// Parses the PPC specific directives
bool PPCAsmParser::ParseDirective(AsmToken DirectiveID) {
StringRef IDVal = DirectiveID.getIdentifier();
if (IDVal == ".word")
- ParseDirectiveWord(2, DirectiveID);
+ parseDirectiveWord(2, DirectiveID);
else if (IDVal == ".llong")
- ParseDirectiveWord(8, DirectiveID);
+ parseDirectiveWord(8, DirectiveID);
else if (IDVal == ".tc")
- ParseDirectiveTC(isPPC64() ? 8 : 4, DirectiveID);
+ parseDirectiveTC(isPPC64() ? 8 : 4, DirectiveID);
else if (IDVal == ".machine")
- ParseDirectiveMachine(DirectiveID.getLoc());
+ parseDirectiveMachine(DirectiveID.getLoc());
else if (IDVal == ".abiversion")
- ParseDirectiveAbiVersion(DirectiveID.getLoc());
+ parseDirectiveAbiVersion(DirectiveID.getLoc());
else if (IDVal == ".localentry")
- ParseDirectiveLocalEntry(DirectiveID.getLoc());
+ parseDirectiveLocalEntry(DirectiveID.getLoc());
else if (IDVal.starts_with(".gnu_attribute"))
- ParseGNUAttribute(DirectiveID.getLoc());
+ parseGNUAttribute(DirectiveID.getLoc());
else
return true;
return false;
}
-/// ParseDirectiveWord
/// ::= .word [ expression (, expression)* ]
-bool PPCAsmParser::ParseDirectiveWord(unsigned Size, AsmToken ID) {
+bool PPCAsmParser::parseDirectiveWord(unsigned Size, AsmToken ID) {
auto parseOp = [&]() -> bool {
const MCExpr *Value;
SMLoc ExprLoc = getParser().getTok().getLoc();
@@ -1778,9 +1774,8 @@ bool PPCAsmParser::ParseDirectiveWord(unsigned Size, AsmToken ID) {
return false;
}
-/// ParseDirectiveTC
/// ::= .tc [ symbol (, expression)* ]
-bool PPCAsmParser::ParseDirectiveTC(unsigned Size, AsmToken ID) {
+bool PPCAsmParser::parseDirectiveTC(unsigned Size, AsmToken ID) {
MCAsmParser &Parser = getParser();
// Skip TC symbol, which is only used with XCOFF.
while (getLexer().isNot(AsmToken::EndOfStatement)
@@ -1793,12 +1788,12 @@ bool PPCAsmParser::ParseDirectiveTC(unsigned Size, AsmToken ID) {
getParser().getStreamer().emitValueToAlignment(Align(Size));
// Emit expressions.
- return ParseDirectiveWord(Size, ID);
+ return parseDirectiveWord(Size, ID);
}
-/// ParseDirectiveMachine (ELF platforms)
+/// ELF platforms.
/// ::= .machine [ cpu | "push" | "pop" ]
-bool PPCAsmParser::ParseDirectiveMachine(SMLoc L) {
+bool PPCAsmParser::parseDirectiveMachine(SMLoc L) {
MCAsmParser &Parser = getParser();
if (Parser.getTok().isNot(AsmToken::Identifier) &&
Parser.getTok().isNot(AsmToken::String))
@@ -1823,9 +1818,8 @@ bool PPCAsmParser::ParseDirectiveMachine(SMLoc L) {
return false;
}
-/// ParseDirectiveAbiVersion
/// ::= .abiversion constant-expression
-bool PPCAsmParser::ParseDirectiveAbiVersion(SMLoc L) {
+bool PPCAsmParser::parseDirectiveAbiVersion(SMLoc L) {
int64_t AbiVersion;
if (check(getParser().parseAbsoluteExpression(AbiVersion), L,
"expected constant expression") ||
@@ -1840,9 +1834,8 @@ bool PPCAsmParser::ParseDirectiveAbiVersion(SMLoc L) {
return false;
}
-/// ParseDirectiveLocalEntry
/// ::= .localentry symbol, expression
-bool PPCAsmParser::ParseDirectiveLocalEntry(SMLoc L) {
+bool PPCAsmParser::parseDirectiveLocalEntry(SMLoc L) {
StringRef Name;
if (getParser().parseIdentifier(Name))
return Error(L, "expected identifier in '.localentry' directive");
@@ -1863,7 +1856,7 @@ bool PPCAsmParser::ParseDirectiveLocalEntry(SMLoc L) {
return false;
}
-bool PPCAsmParser::ParseGNUAttribute(SMLoc L) {
+bool PPCAsmParser::parseGNUAttribute(SMLoc L) {
int64_t Tag;
int64_t IntegerValue;
if (!getParser().parseGNUAttribute(L, Tag, IntegerValue))
diff --git a/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp b/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp
index 6eb205810761..9600293d3da7 100644
--- a/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp
+++ b/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp
@@ -104,7 +104,7 @@ class RISCVAsmParser : public MCTargetAsmParser {
bool generateImmOutOfRangeError(SMLoc ErrorLoc, int64_t Lower, int64_t Upper,
const Twine &Msg);
- bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
+ bool matchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
OperandVector &Operands, MCStreamer &Out,
uint64_t &ErrorInfo,
bool MatchingInlineAsm) override;
@@ -114,7 +114,7 @@ class RISCVAsmParser : public MCTargetAsmParser {
ParseStatus tryParseRegister(MCRegister &Reg, SMLoc &StartLoc,
SMLoc &EndLoc) override;
- bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
+ bool parseInstruction(ParseInstructionInfo &Info, StringRef Name,
SMLoc NameLoc, OperandVector &Operands) override;
ParseStatus parseDirective(AsmToken DirectiveID) override;
@@ -182,7 +182,7 @@ class RISCVAsmParser : public MCTargetAsmParser {
bool validateInstruction(MCInst &Inst, OperandVector &Operands);
/// Helper for processing MC instructions that have been successfully matched
- /// by MatchAndEmitInstruction. Modifications to the emitted instructions,
+ /// by matchAndEmitInstruction. Modifications to the emitted instructions,
/// like the expansion of pseudo instructions (e.g., "li"), can be performed
/// in this method.
bool processInstruction(MCInst &Inst, SMLoc IDLoc, OperandVector &Operands,
@@ -1376,7 +1376,7 @@ bool RISCVAsmParser::generateImmOutOfRangeError(
return generateImmOutOfRangeError(ErrorLoc, Lower, Upper, Msg);
}
-bool RISCVAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
+bool RISCVAsmParser::matchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
OperandVector &Operands,
MCStreamer &Out,
uint64_t &ErrorInfo,
@@ -2732,7 +2732,7 @@ bool RISCVAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic) {
return true;
}
-bool RISCVAsmParser::ParseInstruction(ParseInstructionInfo &Info,
+bool RISCVAsmParser::parseInstruction(ParseInstructionInfo &Info,
StringRef Name, SMLoc NameLoc,
OperandVector &Operands) {
// Ensure that if the instruction occurs when relaxation is enabled,
@@ -3186,12 +3186,12 @@ bool RISCVAsmParser::parseDirectiveInsn(SMLoc L) {
ParseInstructionInfo Info;
SmallVector<std::unique_ptr<MCParsedAsmOperand>, 8> Operands;
- if (ParseInstruction(Info, FormatName, L, Operands))
+ if (parseInstruction(Info, FormatName, L, Operands))
return true;
unsigned Opcode;
uint64_t ErrorInfo;
- return MatchAndEmitInstruction(L, Opcode, Operands, Parser.getStreamer(),
+ return matchAndEmitInstruction(L, Opcode, Operands, Parser.getStreamer(),
ErrorInfo,
/*MatchingInlineAsm=*/false);
}
diff --git a/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp b/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp
index c204683f4e79..055193bcc2c8 100644
--- a/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp
+++ b/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp
@@ -287,34 +287,48 @@ RISCVLegalizerInfo::RISCVLegalizerInfo(const RISCVSubtarget &ST)
auto &LoadActions = getActionDefinitionsBuilder(G_LOAD);
auto &StoreActions = getActionDefinitionsBuilder(G_STORE);
+ auto &ExtLoadActions = getActionDefinitionsBuilder({G_SEXTLOAD, G_ZEXTLOAD});
- LoadActions
- .legalForTypesWithMemDesc({{s32, p0, s8, 8},
- {s32, p0, s16, 16},
- {s32, p0, s32, 32},
- {p0, p0, sXLen, XLen}});
- StoreActions
- .legalForTypesWithMemDesc({{s32, p0, s8, 8},
- {s32, p0, s16, 16},
- {s32, p0, s32, 32},
- {p0, p0, sXLen, XLen}});
- auto &ExtLoadActions =
- getActionDefinitionsBuilder({G_SEXTLOAD, G_ZEXTLOAD})
- .legalForTypesWithMemDesc({{s32, p0, s8, 8}, {s32, p0, s16, 16}});
+ // Return the alignment needed for scalar memory ops. If unaligned scalar mem
+ // is supported, we only require byte alignment. Otherwise, we need the memory
+ // op to be natively aligned.
+ auto getScalarMemAlign = [&ST](unsigned Size) {
+ return ST.enableUnalignedScalarMem() ? 8 : Size;
+ };
+
+ LoadActions.legalForTypesWithMemDesc(
+ {{s32, p0, s8, getScalarMemAlign(8)},
+ {s32, p0, s16, getScalarMemAlign(16)},
+ {s32, p0, s32, getScalarMemAlign(32)},
+ {p0, p0, sXLen, getScalarMemAlign(XLen)}});
+ StoreActions.legalForTypesWithMemDesc(
+ {{s32, p0, s8, getScalarMemAlign(8)},
+ {s32, p0, s16, getScalarMemAlign(16)},
+ {s32, p0, s32, getScalarMemAlign(32)},
+ {p0, p0, sXLen, getScalarMemAlign(XLen)}});
+ ExtLoadActions.legalForTypesWithMemDesc(
+ {{s32, p0, s8, getScalarMemAlign(8)},
+ {s32, p0, s16, getScalarMemAlign(16)}});
if (XLen == 64) {
- LoadActions.legalForTypesWithMemDesc({{s64, p0, s8, 8},
- {s64, p0, s16, 16},
- {s64, p0, s32, 32},
- {s64, p0, s64, 64}});
- StoreActions.legalForTypesWithMemDesc({{s64, p0, s8, 8},
- {s64, p0, s16, 16},
- {s64, p0, s32, 32},
- {s64, p0, s64, 64}});
+ LoadActions.legalForTypesWithMemDesc(
+ {{s64, p0, s8, getScalarMemAlign(8)},
+ {s64, p0, s16, getScalarMemAlign(16)},
+ {s64, p0, s32, getScalarMemAlign(32)},
+ {s64, p0, s64, getScalarMemAlign(64)}});
+ StoreActions.legalForTypesWithMemDesc(
+ {{s64, p0, s8, getScalarMemAlign(8)},
+ {s64, p0, s16, getScalarMemAlign(16)},
+ {s64, p0, s32, getScalarMemAlign(32)},
+ {s64, p0, s64, getScalarMemAlign(64)}});
ExtLoadActions.legalForTypesWithMemDesc(
- {{s64, p0, s8, 8}, {s64, p0, s16, 16}, {s64, p0, s32, 32}});
+ {{s64, p0, s8, getScalarMemAlign(8)},
+ {s64, p0, s16, getScalarMemAlign(16)},
+ {s64, p0, s32, getScalarMemAlign(32)}});
} else if (ST.hasStdExtD()) {
- LoadActions.legalForTypesWithMemDesc({{s64, p0, s64, 64}});
- StoreActions.legalForTypesWithMemDesc({{s64, p0, s64, 64}});
+ LoadActions.legalForTypesWithMemDesc(
+ {{s64, p0, s64, getScalarMemAlign(64)}});
+ StoreActions.legalForTypesWithMemDesc(
+ {{s64, p0, s64, getScalarMemAlign(64)}});
}
// Vector loads/stores.
@@ -541,7 +555,9 @@ RISCVLegalizerInfo::RISCVLegalizerInfo(const RISCVSubtarget &ST)
// FIXME: We can do custom inline expansion like SelectionDAG.
// FIXME: Legal with Zfa.
- getActionDefinitionsBuilder({G_FCEIL, G_FFLOOR})
+ getActionDefinitionsBuilder({G_FCEIL, G_FFLOOR, G_FRINT, G_FNEARBYINT,
+ G_INTRINSIC_TRUNC, G_INTRINSIC_ROUND,
+ G_INTRINSIC_ROUNDEVEN})
.libcallFor({s32, s64});
getActionDefinitionsBuilder(G_VASTART).customFor({p0});
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index d5b3cccda02d..189fb741f34c 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -941,39 +941,41 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
};
// TODO: support more ops.
- static const unsigned ZvfhminPromoteOps[] = {
- ISD::FMINNUM, ISD::FMAXNUM, ISD::FADD, ISD::FSUB,
- ISD::FMUL, ISD::FMA, ISD::FDIV, ISD::FSQRT,
- ISD::FCEIL, ISD::FFLOOR, ISD::FROUND, ISD::FROUNDEVEN,
- ISD::FRINT, ISD::FNEARBYINT, ISD::IS_FPCLASS, ISD::SETCC,
- ISD::FMAXIMUM, ISD::FMINIMUM, ISD::STRICT_FADD, ISD::STRICT_FSUB,
- ISD::STRICT_FMUL, ISD::STRICT_FDIV, ISD::STRICT_FSQRT, ISD::STRICT_FMA};
+ static const unsigned ZvfhminZvfbfminPromoteOps[] = {
+ ISD::FMINNUM, ISD::FMAXNUM, ISD::FADD, ISD::FSUB,
+ ISD::FMUL, ISD::FMA, ISD::FDIV, ISD::FSQRT,
+ ISD::FCEIL, ISD::FTRUNC, ISD::FFLOOR, ISD::FROUND,
+ ISD::FROUNDEVEN, ISD::FRINT, ISD::FNEARBYINT, ISD::IS_FPCLASS,
+ ISD::SETCC, ISD::FMAXIMUM, ISD::FMINIMUM, ISD::STRICT_FADD,
+ ISD::STRICT_FSUB, ISD::STRICT_FMUL, ISD::STRICT_FDIV, ISD::STRICT_FSQRT,
+ ISD::STRICT_FMA};
// TODO: support more vp ops.
- static const unsigned ZvfhminPromoteVPOps[] = {ISD::VP_FADD,
- ISD::VP_FSUB,
- ISD::VP_FMUL,
- ISD::VP_FDIV,
- ISD::VP_FMA,
- ISD::VP_REDUCE_FADD,
- ISD::VP_REDUCE_SEQ_FADD,
- ISD::VP_REDUCE_FMIN,
- ISD::VP_REDUCE_FMAX,
- ISD::VP_SQRT,
- ISD::VP_FMINNUM,
- ISD::VP_FMAXNUM,
- ISD::VP_FCEIL,
- ISD::VP_FFLOOR,
- ISD::VP_FROUND,
- ISD::VP_FROUNDEVEN,
- ISD::VP_FROUNDTOZERO,
- ISD::VP_FRINT,
- ISD::VP_FNEARBYINT,
- ISD::VP_SETCC,
- ISD::VP_FMINIMUM,
- ISD::VP_FMAXIMUM,
- ISD::VP_REDUCE_FMINIMUM,
- ISD::VP_REDUCE_FMAXIMUM};
+ static const unsigned ZvfhminZvfbfminPromoteVPOps[] = {
+ ISD::VP_FADD,
+ ISD::VP_FSUB,
+ ISD::VP_FMUL,
+ ISD::VP_FDIV,
+ ISD::VP_FMA,
+ ISD::VP_REDUCE_FADD,
+ ISD::VP_REDUCE_SEQ_FADD,
+ ISD::VP_REDUCE_FMIN,
+ ISD::VP_REDUCE_FMAX,
+ ISD::VP_SQRT,
+ ISD::VP_FMINNUM,
+ ISD::VP_FMAXNUM,
+ ISD::VP_FCEIL,
+ ISD::VP_FFLOOR,
+ ISD::VP_FROUND,
+ ISD::VP_FROUNDEVEN,
+ ISD::VP_FROUNDTOZERO,
+ ISD::VP_FRINT,
+ ISD::VP_FNEARBYINT,
+ ISD::VP_SETCC,
+ ISD::VP_FMINIMUM,
+ ISD::VP_FMAXIMUM,
+ ISD::VP_REDUCE_FMINIMUM,
+ ISD::VP_REDUCE_FMAXIMUM};
// Sets common operation actions on RVV floating-point vector types.
const auto SetCommonVFPActions = [&](MVT VT) {
@@ -1061,6 +1063,45 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
}
};
+ // Sets common actions for f16 and bf16 for when there's only
+ // zvfhmin/zvfbfmin and we need to promote to f32 for most operations.
+ const auto SetCommonPromoteToF32Actions = [&](MVT VT) {
+ setOperationAction({ISD::FP_ROUND, ISD::FP_EXTEND}, VT, Custom);
+ setOperationAction({ISD::STRICT_FP_ROUND, ISD::STRICT_FP_EXTEND}, VT,
+ Custom);
+ setOperationAction({ISD::VP_FP_ROUND, ISD::VP_FP_EXTEND}, VT, Custom);
+ setOperationAction({ISD::VP_MERGE, ISD::VP_SELECT, ISD::SELECT}, VT,
+ Custom);
+ setOperationAction(ISD::SELECT_CC, VT, Expand);
+ setOperationAction({ISD::SINT_TO_FP, ISD::UINT_TO_FP, ISD::VP_SINT_TO_FP,
+ ISD::VP_UINT_TO_FP},
+ VT, Custom);
+ setOperationAction({ISD::CONCAT_VECTORS, ISD::INSERT_SUBVECTOR,
+ ISD::EXTRACT_SUBVECTOR, ISD::VECTOR_INTERLEAVE,
+ ISD::VECTOR_DEINTERLEAVE},
+ VT, Custom);
+ MVT EltVT = VT.getVectorElementType();
+ if (isTypeLegal(EltVT))
+ setOperationAction(ISD::SPLAT_VECTOR, VT, Custom);
+ else
+ setOperationAction(ISD::SPLAT_VECTOR, EltVT, Custom);
+ setOperationAction({ISD::LOAD, ISD::STORE}, VT, Custom);
+
+ setOperationAction(ISD::FNEG, VT, Expand);
+ setOperationAction(ISD::FABS, VT, Expand);
+ setOperationAction(ISD::FCOPYSIGN, VT, Expand);
+
+ // Custom split nxv32[b]f16 since nxv32[b]f32 is not legal.
+ if (getLMUL(VT) == RISCVII::VLMUL::LMUL_8) {
+ setOperationAction(ZvfhminZvfbfminPromoteOps, VT, Custom);
+ setOperationAction(ZvfhminZvfbfminPromoteVPOps, VT, Custom);
+ } else {
+ MVT F32VecVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
+ setOperationPromotedToType(ZvfhminZvfbfminPromoteOps, VT, F32VecVT);
+ setOperationPromotedToType(ZvfhminZvfbfminPromoteVPOps, VT, F32VecVT);
+ }
+ };
+
if (Subtarget.hasVInstructionsF16()) {
for (MVT VT : F16VecVTs) {
if (!isTypeLegal(VT))
@@ -1071,74 +1112,15 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
for (MVT VT : F16VecVTs) {
if (!isTypeLegal(VT))
continue;
- setOperationAction({ISD::FP_ROUND, ISD::FP_EXTEND}, VT, Custom);
- setOperationAction({ISD::STRICT_FP_ROUND, ISD::STRICT_FP_EXTEND}, VT,
- Custom);
- setOperationAction({ISD::VP_FP_ROUND, ISD::VP_FP_EXTEND}, VT, Custom);
- setOperationAction({ISD::VP_MERGE, ISD::VP_SELECT, ISD::SELECT}, VT,
- Custom);
- setOperationAction(ISD::SELECT_CC, VT, Expand);
- setOperationAction({ISD::SINT_TO_FP, ISD::UINT_TO_FP,
- ISD::VP_SINT_TO_FP, ISD::VP_UINT_TO_FP},
- VT, Custom);
- setOperationAction({ISD::CONCAT_VECTORS, ISD::INSERT_SUBVECTOR,
- ISD::EXTRACT_SUBVECTOR, ISD::VECTOR_INTERLEAVE,
- ISD::VECTOR_DEINTERLEAVE},
- VT, Custom);
- if (Subtarget.hasStdExtZfhmin())
- setOperationAction(ISD::SPLAT_VECTOR, VT, Custom);
- else
- setOperationAction(ISD::SPLAT_VECTOR, MVT::f16, Custom);
- // load/store
- setOperationAction({ISD::LOAD, ISD::STORE}, VT, Custom);
-
- setOperationAction(ISD::FNEG, VT, Expand);
- setOperationAction(ISD::FABS, VT, Expand);
- setOperationAction(ISD::FCOPYSIGN, VT, Expand);
-
- // Custom split nxv32f16 since nxv32f32 if not legal.
- if (VT == MVT::nxv32f16) {
- setOperationAction(ZvfhminPromoteOps, VT, Custom);
- setOperationAction(ZvfhminPromoteVPOps, VT, Custom);
- continue;
- }
- // Add more promote ops.
- MVT F32VecVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
- setOperationPromotedToType(ZvfhminPromoteOps, VT, F32VecVT);
- setOperationPromotedToType(ZvfhminPromoteVPOps, VT, F32VecVT);
+ SetCommonPromoteToF32Actions(VT);
}
}
- // TODO: Could we merge some code with zvfhmin?
if (Subtarget.hasVInstructionsBF16Minimal()) {
for (MVT VT : BF16VecVTs) {
if (!isTypeLegal(VT))
continue;
- setOperationAction({ISD::FP_ROUND, ISD::FP_EXTEND}, VT, Custom);
- setOperationAction({ISD::STRICT_FP_ROUND, ISD::STRICT_FP_EXTEND}, VT,
- Custom);
- setOperationAction({ISD::VP_FP_ROUND, ISD::VP_FP_EXTEND}, VT, Custom);
- setOperationAction({ISD::VP_MERGE, ISD::VP_SELECT, ISD::SELECT}, VT,
- Custom);
- setOperationAction(ISD::SELECT_CC, VT, Expand);
- setOperationAction({ISD::SINT_TO_FP, ISD::UINT_TO_FP,
- ISD::VP_SINT_TO_FP, ISD::VP_UINT_TO_FP},
- VT, Custom);
- setOperationAction({ISD::CONCAT_VECTORS, ISD::INSERT_SUBVECTOR,
- ISD::EXTRACT_SUBVECTOR, ISD::VECTOR_INTERLEAVE,
- ISD::VECTOR_DEINTERLEAVE},
- VT, Custom);
- if (Subtarget.hasStdExtZfbfmin())
- setOperationAction(ISD::SPLAT_VECTOR, VT, Custom);
- else
- setOperationAction(ISD::SPLAT_VECTOR, MVT::bf16, Custom);
- setOperationAction({ISD::LOAD, ISD::STORE}, VT, Custom);
-
- setOperationAction(ISD::FNEG, VT, Expand);
- setOperationAction(ISD::FABS, VT, Expand);
- setOperationAction(ISD::FCOPYSIGN, VT, Expand);
-
- // TODO: Promote to fp32.
+ SetCommonPromoteToF32Actions(VT);
}
}
@@ -1374,8 +1356,8 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
// TODO: could split the f16 vector into two vectors and do promotion.
if (!isTypeLegal(F32VecVT))
continue;
- setOperationPromotedToType(ZvfhminPromoteOps, VT, F32VecVT);
- setOperationPromotedToType(ZvfhminPromoteVPOps, VT, F32VecVT);
+ setOperationPromotedToType(ZvfhminZvfbfminPromoteOps, VT, F32VecVT);
+ setOperationPromotedToType(ZvfhminZvfbfminPromoteVPOps, VT, F32VecVT);
continue;
}
@@ -6332,6 +6314,17 @@ static bool hasMaskOp(unsigned Opcode) {
return false;
}
+static bool isPromotedOpNeedingSplit(SDValue Op,
+ const RISCVSubtarget &Subtarget) {
+ if (Op.getValueType() == MVT::nxv32f16 &&
+ (Subtarget.hasVInstructionsF16Minimal() &&
+ !Subtarget.hasVInstructionsF16()))
+ return true;
+ if (Op.getValueType() == MVT::nxv32bf16)
+ return true;
+ return false;
+}
+
static SDValue SplitVectorOp(SDValue Op, SelectionDAG &DAG) {
auto [LoVT, HiVT] = DAG.GetSplitDestVTs(Op.getValueType());
SDLoc DL(Op);
@@ -6669,9 +6662,7 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
}
case ISD::FMAXIMUM:
case ISD::FMINIMUM:
- if (Op.getValueType() == MVT::nxv32f16 &&
- (Subtarget.hasVInstructionsF16Minimal() &&
- !Subtarget.hasVInstructionsF16()))
+ if (isPromotedOpNeedingSplit(Op, Subtarget))
return SplitVectorOp(Op, DAG);
return lowerFMAXIMUM_FMINIMUM(Op, DAG, Subtarget);
case ISD::FP_EXTEND:
@@ -6687,8 +6678,7 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
(Subtarget.hasVInstructionsF16Minimal() &&
!Subtarget.hasVInstructionsF16())) ||
Op.getValueType().getScalarType() == MVT::bf16)) {
- if (Op.getValueType() == MVT::nxv32f16 ||
- Op.getValueType() == MVT::nxv32bf16)
+ if (isPromotedOpNeedingSplit(Op, Subtarget))
return SplitVectorOp(Op, DAG);
// int -> f32
SDLoc DL(Op);
@@ -6708,8 +6698,7 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
(Subtarget.hasVInstructionsF16Minimal() &&
!Subtarget.hasVInstructionsF16())) ||
Op1.getValueType().getScalarType() == MVT::bf16)) {
- if (Op1.getValueType() == MVT::nxv32f16 ||
- Op1.getValueType() == MVT::nxv32bf16)
+ if (isPromotedOpNeedingSplit(Op1, Subtarget))
return SplitVectorOp(Op, DAG);
// [b]f16 -> f32
SDLoc DL(Op);
@@ -6941,6 +6930,8 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
case ISD::FRINT:
case ISD::FROUND:
case ISD::FROUNDEVEN:
+ if (isPromotedOpNeedingSplit(Op, Subtarget))
+ return SplitVectorOp(Op, DAG);
return lowerFTRUNC_FCEIL_FFLOOR_FROUND(Op, DAG, Subtarget);
case ISD::LRINT:
case ISD::LLRINT:
@@ -6997,9 +6988,7 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
case ISD::VP_REDUCE_FMAX:
case ISD::VP_REDUCE_FMINIMUM:
case ISD::VP_REDUCE_FMAXIMUM:
- if (Op.getOperand(1).getValueType() == MVT::nxv32f16 &&
- (Subtarget.hasVInstructionsF16Minimal() &&
- !Subtarget.hasVInstructionsF16()))
+ if (isPromotedOpNeedingSplit(Op.getOperand(1), Subtarget))
return SplitVectorReductionOp(Op, DAG);
return lowerVPREDUCE(Op, DAG);
case ISD::VP_REDUCE_AND:
@@ -7246,9 +7235,7 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
return DAG.getSetCC(DL, VT, RHS, LHS, CCVal);
}
- if (Op.getOperand(0).getSimpleValueType() == MVT::nxv32f16 &&
- (Subtarget.hasVInstructionsF16Minimal() &&
- !Subtarget.hasVInstructionsF16()))
+ if (isPromotedOpNeedingSplit(Op.getOperand(0), Subtarget))
return SplitVectorOp(Op, DAG);
return lowerFixedLengthVectorSetccToRVV(Op, DAG);
@@ -7290,9 +7277,7 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
case ISD::FMA:
case ISD::FMINNUM:
case ISD::FMAXNUM:
- if (Op.getValueType() == MVT::nxv32f16 &&
- (Subtarget.hasVInstructionsF16Minimal() &&
- !Subtarget.hasVInstructionsF16()))
+ if (isPromotedOpNeedingSplit(Op, Subtarget))
return SplitVectorOp(Op, DAG);
[[fallthrough]];
case ISD::AVGFLOORS:
@@ -7340,9 +7325,7 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
case ISD::FCOPYSIGN:
if (Op.getValueType() == MVT::f16 || Op.getValueType() == MVT::bf16)
return lowerFCOPYSIGN(Op, DAG, Subtarget);
- if (Op.getValueType() == MVT::nxv32f16 &&
- (Subtarget.hasVInstructionsF16Minimal() &&
- !Subtarget.hasVInstructionsF16()))
+ if (isPromotedOpNeedingSplit(Op, Subtarget))
return SplitVectorOp(Op, DAG);
return lowerFixedLengthVectorFCOPYSIGNToRVV(Op, DAG);
case ISD::STRICT_FADD:
@@ -7351,9 +7334,7 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
case ISD::STRICT_FDIV:
case ISD::STRICT_FSQRT:
case ISD::STRICT_FMA:
- if (Op.getValueType() == MVT::nxv32f16 &&
- (Subtarget.hasVInstructionsF16Minimal() &&
- !Subtarget.hasVInstructionsF16()))
+ if (isPromotedOpNeedingSplit(Op, Subtarget))
return SplitStrictFPVectorOp(Op, DAG);
return lowerToScalableOp(Op, DAG);
case ISD::STRICT_FSETCC:
@@ -7410,9 +7391,7 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
case ISD::VP_FMINNUM:
case ISD::VP_FMAXNUM:
case ISD::VP_FCOPYSIGN:
- if (Op.getValueType() == MVT::nxv32f16 &&
- (Subtarget.hasVInstructionsF16Minimal() &&
- !Subtarget.hasVInstructionsF16()))
+ if (isPromotedOpNeedingSplit(Op, Subtarget))
return SplitVPOp(Op, DAG);
[[fallthrough]];
case ISD::VP_SRA:
@@ -7438,8 +7417,7 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
(Subtarget.hasVInstructionsF16Minimal() &&
!Subtarget.hasVInstructionsF16())) ||
Op.getValueType().getScalarType() == MVT::bf16)) {
- if (Op.getValueType() == MVT::nxv32f16 ||
- Op.getValueType() == MVT::nxv32bf16)
+ if (isPromotedOpNeedingSplit(Op, Subtarget))
return SplitVectorOp(Op, DAG);
// int -> f32
SDLoc DL(Op);
@@ -7459,8 +7437,7 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
(Subtarget.hasVInstructionsF16Minimal() &&
!Subtarget.hasVInstructionsF16())) ||
Op1.getValueType().getScalarType() == MVT::bf16)) {
- if (Op1.getValueType() == MVT::nxv32f16 ||
- Op1.getValueType() == MVT::nxv32bf16)
+ if (isPromotedOpNeedingSplit(Op1, Subtarget))
return SplitVectorOp(Op, DAG);
// [b]f16 -> f32
SDLoc DL(Op);
@@ -7473,9 +7450,7 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
}
return lowerVPFPIntConvOp(Op, DAG);
case ISD::VP_SETCC:
- if (Op.getOperand(0).getSimpleValueType() == MVT::nxv32f16 &&
- (Subtarget.hasVInstructionsF16Minimal() &&
- !Subtarget.hasVInstructionsF16()))
+ if (isPromotedOpNeedingSplit(Op.getOperand(0), Subtarget))
return SplitVPOp(Op, DAG);
if (Op.getOperand(0).getSimpleValueType().getVectorElementType() == MVT::i1)
return lowerVPSetCCMaskOp(Op, DAG);
@@ -7510,16 +7485,12 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
case ISD::VP_FROUND:
case ISD::VP_FROUNDEVEN:
case ISD::VP_FROUNDTOZERO:
- if (Op.getValueType() == MVT::nxv32f16 &&
- (Subtarget.hasVInstructionsF16Minimal() &&
- !Subtarget.hasVInstructionsF16()))
+ if (isPromotedOpNeedingSplit(Op, Subtarget))
return SplitVPOp(Op, DAG);
return lowerVectorFTRUNC_FCEIL_FFLOOR_FROUND(Op, DAG, Subtarget);
case ISD::VP_FMAXIMUM:
case ISD::VP_FMINIMUM:
- if (Op.getValueType() == MVT::nxv32f16 &&
- (Subtarget.hasVInstructionsF16Minimal() &&
- !Subtarget.hasVInstructionsF16()))
+ if (isPromotedOpNeedingSplit(Op, Subtarget))
return SplitVPOp(Op, DAG);
return lowerFMAXIMUM_FMINIMUM(Op, DAG, Subtarget);
case ISD::EXPERIMENTAL_VP_SPLICE:
@@ -10804,7 +10775,8 @@ SDValue RISCVTargetLowering::lowerVECTOR_REVERSE(SDValue Op,
MVT WidenVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorElementCount());
SDValue Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, WidenVT, Op.getOperand(0));
SDValue Op2 = DAG.getNode(ISD::VECTOR_REVERSE, DL, WidenVT, Op1);
- return DAG.getNode(ISD::TRUNCATE, DL, VecVT, Op2);
+ return DAG.getSetCC(DL, VecVT, Op2,
+ DAG.getConstant(0, DL, Op2.getValueType()), ISD::SETNE);
}
MVT ContainerVT = VecVT;
diff --git a/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.cpp b/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.cpp
index 553d86efa3df..ca3e47a4b78f 100644
--- a/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.cpp
@@ -496,7 +496,7 @@ Register SPIRVGlobalRegistry::getOrCreateIntCompositeOrNull(
assignSPIRVTypeToVReg(SpvType, SpvVecConst, *CurMF);
DT.add(CA, CurMF, SpvVecConst);
if (EmitIR) {
- MIRBuilder.buildSplatVector(SpvVecConst, SpvScalConst);
+ MIRBuilder.buildSplatBuildVector(SpvVecConst, SpvScalConst);
} else {
if (Val) {
auto MIB = MIRBuilder.buildInstr(SPIRV::OpConstantComposite)
diff --git a/llvm/lib/Target/Sparc/AsmParser/SparcAsmParser.cpp b/llvm/lib/Target/Sparc/AsmParser/SparcAsmParser.cpp
index c1200df5d44d..c7a0bebea969 100644
--- a/llvm/lib/Target/Sparc/AsmParser/SparcAsmParser.cpp
+++ b/llvm/lib/Target/Sparc/AsmParser/SparcAsmParser.cpp
@@ -70,14 +70,14 @@ class SparcAsmParser : public MCTargetAsmParser {
/// }
// public interface of the MCTargetAsmParser.
- bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
+ bool matchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
OperandVector &Operands, MCStreamer &Out,
uint64_t &ErrorInfo,
bool MatchingInlineAsm) override;
bool parseRegister(MCRegister &Reg, SMLoc &StartLoc, SMLoc &EndLoc) override;
ParseStatus tryParseRegister(MCRegister &Reg, SMLoc &StartLoc,
SMLoc &EndLoc) override;
- bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
+ bool parseInstruction(ParseInstructionInfo &Info, StringRef Name,
SMLoc NameLoc, OperandVector &Operands) override;
ParseStatus parseDirective(AsmToken DirectiveID) override;
@@ -789,7 +789,7 @@ bool SparcAsmParser::expandSETX(MCInst &Inst, SMLoc IDLoc,
return false;
}
-bool SparcAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
+bool SparcAsmParser::matchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
OperandVector &Operands,
MCStreamer &Out,
uint64_t &ErrorInfo,
@@ -871,14 +871,14 @@ ParseStatus SparcAsmParser::tryParseRegister(MCRegister &Reg, SMLoc &StartLoc,
return ParseStatus::NoMatch;
}
-bool SparcAsmParser::ParseInstruction(ParseInstructionInfo &Info,
+bool SparcAsmParser::parseInstruction(ParseInstructionInfo &Info,
StringRef Name, SMLoc NameLoc,
OperandVector &Operands) {
// Validate and reject unavailable mnemonics early before
// running any operand parsing.
// This is needed because some operands (mainly memory ones)
// differ between V8 and V9 ISA and so any operand parsing errors
- // will cause IAS to bail out before it reaches MatchAndEmitInstruction
+ // will cause IAS to bail out before it reaches matchAndEmitInstruction
// (where the instruction as a whole, including the mnemonic, is validated
// once again just before emission).
// As a nice side effect this also allows us to reject unknown
diff --git a/llvm/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp b/llvm/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp
index 7c3898ac6731..5b26ba08dbdb 100644
--- a/llvm/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp
+++ b/llvm/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp
@@ -499,9 +499,9 @@ public:
bool RequirePercent, bool RestoreOnFailure);
ParseStatus tryParseRegister(MCRegister &Reg, SMLoc &StartLoc,
SMLoc &EndLoc) override;
- bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
+ bool parseInstruction(ParseInstructionInfo &Info, StringRef Name,
SMLoc NameLoc, OperandVector &Operands) override;
- bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
+ bool matchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
OperandVector &Operands, MCStreamer &Out,
uint64_t &ErrorInfo,
bool MatchingInlineAsm) override;
@@ -1401,7 +1401,7 @@ ParseStatus SystemZAsmParser::tryParseRegister(MCRegister &Reg, SMLoc &StartLoc,
return ParseStatus::Success;
}
-bool SystemZAsmParser::ParseInstruction(ParseInstructionInfo &Info,
+bool SystemZAsmParser::parseInstruction(ParseInstructionInfo &Info,
StringRef Name, SMLoc NameLoc,
OperandVector &Operands) {
@@ -1526,7 +1526,7 @@ bool SystemZAsmParser::parseOperand(OperandVector &Operands,
return false;
}
-bool SystemZAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
+bool SystemZAsmParser::matchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
OperandVector &Operands,
MCStreamer &Out,
uint64_t &ErrorInfo,
diff --git a/llvm/lib/Target/VE/AsmParser/VEAsmParser.cpp b/llvm/lib/Target/VE/AsmParser/VEAsmParser.cpp
index 691fe8fe3aa4..5073894cc7fb 100644
--- a/llvm/lib/Target/VE/AsmParser/VEAsmParser.cpp
+++ b/llvm/lib/Target/VE/AsmParser/VEAsmParser.cpp
@@ -51,7 +51,7 @@ class VEAsmParser : public MCTargetAsmParser {
/// }
// public interface of the MCTargetAsmParser.
- bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
+ bool matchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
OperandVector &Operands, MCStreamer &Out,
uint64_t &ErrorInfo,
bool MatchingInlineAsm) override;
@@ -59,7 +59,7 @@ class VEAsmParser : public MCTargetAsmParser {
int parseRegisterName(MCRegister (*matchFn)(StringRef));
ParseStatus tryParseRegister(MCRegister &Reg, SMLoc &StartLoc,
SMLoc &EndLoc) override;
- bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
+ bool parseInstruction(ParseInstructionInfo &Info, StringRef Name,
SMLoc NameLoc, OperandVector &Operands) override;
ParseStatus parseDirective(AsmToken DirectiveID) override;
@@ -760,7 +760,7 @@ public:
} // end anonymous namespace
-bool VEAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
+bool VEAsmParser::matchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
OperandVector &Operands,
MCStreamer &Out, uint64_t &ErrorInfo,
bool MatchingInlineAsm) {
@@ -965,7 +965,7 @@ static void applyMnemonicAliases(StringRef &Mnemonic,
const FeatureBitset &Features,
unsigned VariantID);
-bool VEAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
+bool VEAsmParser::parseInstruction(ParseInstructionInfo &Info, StringRef Name,
SMLoc NameLoc, OperandVector &Operands) {
// If the target architecture uses MnemonicAlias, call it here to parse
// operands correctly.
diff --git a/llvm/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmParser.cpp b/llvm/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmParser.cpp
index 4fef5fa0ef22..8e8d08f77563 100644
--- a/llvm/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmParser.cpp
+++ b/llvm/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmParser.cpp
@@ -562,7 +562,7 @@ public:
}
}
- bool ParseInstruction(ParseInstructionInfo & /*Info*/, StringRef Name,
+ bool parseInstruction(ParseInstructionInfo & /*Info*/, StringRef Name,
SMLoc NameLoc, OperandVector &Operands) override {
// Note: Name does NOT point into the sourcecode, but to a local, so
// use NameLoc instead.
@@ -1127,7 +1127,7 @@ public:
}
}
- bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned & /*Opcode*/,
+ bool matchAndEmitInstruction(SMLoc IDLoc, unsigned & /*Opcode*/,
OperandVector &Operands, MCStreamer &Out,
uint64_t &ErrorInfo,
bool MatchingInlineAsm) override {
diff --git a/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyInstPrinter.cpp b/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyInstPrinter.cpp
index 215722204ba4..4c29b59b3302 100644
--- a/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyInstPrinter.cpp
+++ b/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyInstPrinter.cpp
@@ -110,6 +110,20 @@ void WebAssemblyInstPrinter::printInst(const MCInst *MI, uint64_t Address,
// Print any added annotation.
printAnnotation(OS, Annot);
+ auto PrintBranchAnnotation = [&](const MCOperand &Op,
+ SmallSet<uint64_t, 8> &Printed) {
+ uint64_t Depth = Op.getImm();
+ if (!Printed.insert(Depth).second)
+ return;
+ if (Depth >= ControlFlowStack.size()) {
+ printAnnotation(OS, "Invalid depth argument!");
+ } else {
+ const auto &Pair = ControlFlowStack.rbegin()[Depth];
+ printAnnotation(OS, utostr(Depth) + ": " + (Pair.second ? "up" : "down") +
+ " to label" + utostr(Pair.first));
+ }
+ };
+
if (CommentStream) {
// Observe any effects on the control flow stack, for use in annotating
// control flow label references.
@@ -136,6 +150,23 @@ void WebAssemblyInstPrinter::printInst(const MCInst *MI, uint64_t Address,
EHInstStack.push_back(TRY);
return;
+ case WebAssembly::TRY_TABLE:
+ case WebAssembly::TRY_TABLE_S: {
+ SmallSet<uint64_t, 8> Printed;
+ unsigned OpIdx = 1;
+ const MCOperand &Op = MI->getOperand(OpIdx++);
+ unsigned NumCatches = Op.getImm();
+ for (unsigned I = 0; I < NumCatches; I++) {
+ int64_t CatchOpcode = MI->getOperand(OpIdx++).getImm();
+ if (CatchOpcode == wasm::WASM_OPCODE_CATCH ||
+ CatchOpcode == wasm::WASM_OPCODE_CATCH_REF)
+ OpIdx++; // Skip tag
+ PrintBranchAnnotation(MI->getOperand(OpIdx++), Printed);
+ }
+ ControlFlowStack.push_back(std::make_pair(ControlFlowCounter++, false));
+ return;
+ }
+
case WebAssembly::END_LOOP:
case WebAssembly::END_LOOP_S:
if (ControlFlowStack.empty()) {
@@ -147,6 +178,8 @@ void WebAssemblyInstPrinter::printInst(const MCInst *MI, uint64_t Address,
case WebAssembly::END_BLOCK:
case WebAssembly::END_BLOCK_S:
+ case WebAssembly::END_TRY_TABLE:
+ case WebAssembly::END_TRY_TABLE_S:
if (ControlFlowStack.empty()) {
printAnnotation(OS, "End marker mismatch!");
} else {
@@ -251,17 +284,7 @@ void WebAssemblyInstPrinter::printInst(const MCInst *MI, uint64_t Address,
if (!MI->getOperand(I).isImm())
continue;
}
- uint64_t Depth = MI->getOperand(I).getImm();
- if (!Printed.insert(Depth).second)
- continue;
- if (Depth >= ControlFlowStack.size()) {
- printAnnotation(OS, "Invalid depth argument!");
- } else {
- const auto &Pair = ControlFlowStack.rbegin()[Depth];
- printAnnotation(OS, utostr(Depth) + ": " +
- (Pair.second ? "up" : "down") + " to label" +
- utostr(Pair.first));
- }
+ PrintBranchAnnotation(MI->getOperand(I), Printed);
}
}
}
diff --git a/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCCodeEmitter.cpp b/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCCodeEmitter.cpp
index aaca213c4afe..3e7487dbd8f5 100644
--- a/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCCodeEmitter.cpp
+++ b/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCCodeEmitter.cpp
@@ -82,14 +82,15 @@ void WebAssemblyMCCodeEmitter::encodeInstruction(
// For br_table instructions, encode the size of the table. In the MCInst,
// there's an index operand (if not a stack instruction), one operand for
// each table entry, and the default operand.
- if (MI.getOpcode() == WebAssembly::BR_TABLE_I32_S ||
- MI.getOpcode() == WebAssembly::BR_TABLE_I64_S)
+ unsigned Opcode = MI.getOpcode();
+ if (Opcode == WebAssembly::BR_TABLE_I32_S ||
+ Opcode == WebAssembly::BR_TABLE_I64_S)
encodeULEB128(MI.getNumOperands() - 1, OS);
- if (MI.getOpcode() == WebAssembly::BR_TABLE_I32 ||
- MI.getOpcode() == WebAssembly::BR_TABLE_I64)
+ if (Opcode == WebAssembly::BR_TABLE_I32 ||
+ Opcode == WebAssembly::BR_TABLE_I64)
encodeULEB128(MI.getNumOperands() - 2, OS);
- const MCInstrDesc &Desc = MCII.get(MI.getOpcode());
+ const MCInstrDesc &Desc = MCII.get(Opcode);
for (unsigned I = 0, E = MI.getNumOperands(); I < E; ++I) {
const MCOperand &MO = MI.getOperand(I);
if (MO.isReg()) {
@@ -136,7 +137,12 @@ void WebAssemblyMCCodeEmitter::encodeInstruction(
encodeULEB128(uint64_t(MO.getImm()), OS);
}
} else {
- encodeULEB128(uint64_t(MO.getImm()), OS);
+ // Variadic immediate operands are br_table's destination operands or
+ // try_table's operands (# of catch clauses, catch sub-opcodes, or catch
+ // clause destinations)
+ assert(WebAssembly::isBrTable(Opcode) ||
+ Opcode == WebAssembly::TRY_TABLE_S);
+ encodeULEB128(uint32_t(MO.getImm()), OS);
}
} else if (MO.isSFPImm()) {
@@ -146,32 +152,38 @@ void WebAssemblyMCCodeEmitter::encodeInstruction(
uint64_t D = MO.getDFPImm();
support::endian::write<uint64_t>(OS, D, llvm::endianness::little);
} else if (MO.isExpr()) {
- const MCOperandInfo &Info = Desc.operands()[I];
llvm::MCFixupKind FixupKind;
size_t PaddedSize = 5;
- switch (Info.OperandType) {
- case WebAssembly::OPERAND_I32IMM:
- FixupKind = MCFixupKind(WebAssembly::fixup_sleb128_i32);
- break;
- case WebAssembly::OPERAND_I64IMM:
- FixupKind = MCFixupKind(WebAssembly::fixup_sleb128_i64);
- PaddedSize = 10;
- break;
- case WebAssembly::OPERAND_FUNCTION32:
- case WebAssembly::OPERAND_TABLE:
- case WebAssembly::OPERAND_OFFSET32:
- case WebAssembly::OPERAND_SIGNATURE:
- case WebAssembly::OPERAND_TYPEINDEX:
- case WebAssembly::OPERAND_GLOBAL:
- case WebAssembly::OPERAND_TAG:
+ if (I < Desc.getNumOperands()) {
+ const MCOperandInfo &Info = Desc.operands()[I];
+ switch (Info.OperandType) {
+ case WebAssembly::OPERAND_I32IMM:
+ FixupKind = MCFixupKind(WebAssembly::fixup_sleb128_i32);
+ break;
+ case WebAssembly::OPERAND_I64IMM:
+ FixupKind = MCFixupKind(WebAssembly::fixup_sleb128_i64);
+ PaddedSize = 10;
+ break;
+ case WebAssembly::OPERAND_FUNCTION32:
+ case WebAssembly::OPERAND_TABLE:
+ case WebAssembly::OPERAND_OFFSET32:
+ case WebAssembly::OPERAND_SIGNATURE:
+ case WebAssembly::OPERAND_TYPEINDEX:
+ case WebAssembly::OPERAND_GLOBAL:
+ case WebAssembly::OPERAND_TAG:
+ FixupKind = MCFixupKind(WebAssembly::fixup_uleb128_i32);
+ break;
+ case WebAssembly::OPERAND_OFFSET64:
+ FixupKind = MCFixupKind(WebAssembly::fixup_uleb128_i64);
+ PaddedSize = 10;
+ break;
+ default:
+ llvm_unreachable("unexpected symbolic operand kind");
+ }
+ } else {
+ // Variadic expr operands are try_table's catch/catch_ref clauses' tags.
+ assert(Opcode == WebAssembly::TRY_TABLE_S);
FixupKind = MCFixupKind(WebAssembly::fixup_uleb128_i32);
- break;
- case WebAssembly::OPERAND_OFFSET64:
- FixupKind = MCFixupKind(WebAssembly::fixup_uleb128_i64);
- PaddedSize = 10;
- break;
- default:
- llvm_unreachable("unexpected symbolic operand kind");
}
Fixups.push_back(MCFixup::create(OS.tell() - Start, MO.getExpr(),
FixupKind, MI.getLoc()));
diff --git a/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp b/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp
index 735f9dcefb97..97079cba143a 100644
--- a/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp
+++ b/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp
@@ -1195,7 +1195,7 @@ private:
/// instrumentation around Inst.
void emitInstruction(MCInst &Inst, OperandVector &Operands, MCStreamer &Out);
- bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
+ bool matchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
OperandVector &Operands, MCStreamer &Out,
uint64_t &ErrorInfo,
bool MatchingInlineAsm) override;
@@ -1215,7 +1215,7 @@ private:
uint64_t &ErrorInfo,
bool MatchingInlineAsm);
- bool OmitRegisterFromClobberLists(unsigned RegNo) override;
+ bool omitRegisterFromClobberLists(unsigned RegNo) override;
/// Parses AVX512 specific operand primitives: masked registers ({%k<NUM>}, {z})
/// and memory broadcasting ({1to<NUM>}) primitives, updating Operands vector if required.
@@ -1290,7 +1290,7 @@ public:
bool parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) override;
- bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
+ bool parseInstruction(ParseInstructionInfo &Info, StringRef Name,
SMLoc NameLoc, OperandVector &Operands) override;
bool ParseDirective(AsmToken DirectiveID) override;
@@ -3186,7 +3186,7 @@ bool X86AsmParser::parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) {
return Parser.parsePrimaryExpr(Res, EndLoc, nullptr);
}
-bool X86AsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
+bool X86AsmParser::parseInstruction(ParseInstructionInfo &Info, StringRef Name,
SMLoc NameLoc, OperandVector &Operands) {
MCAsmParser &Parser = getParser();
InstInfo = &Info;
@@ -4121,7 +4121,7 @@ static unsigned getPrefixes(OperandVector &Operands) {
return Result;
}
-bool X86AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
+bool X86AsmParser::matchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
OperandVector &Operands,
MCStreamer &Out, uint64_t &ErrorInfo,
bool MatchingInlineAsm) {
@@ -4659,7 +4659,7 @@ bool X86AsmParser::matchAndEmitIntelInstruction(
MatchingInlineAsm);
}
-bool X86AsmParser::OmitRegisterFromClobberLists(unsigned RegNo) {
+bool X86AsmParser::omitRegisterFromClobberLists(unsigned RegNo) {
return X86MCRegisterClasses[X86::SEGMENT_REGRegClassID].contains(RegNo);
}
diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86InstComments.cpp b/llvm/lib/Target/X86/MCTargetDesc/X86InstComments.cpp
index 95038ccf63b8..a4b72515252a 100644
--- a/llvm/lib/Target/X86/MCTargetDesc/X86InstComments.cpp
+++ b/llvm/lib/Target/X86/MCTargetDesc/X86InstComments.cpp
@@ -1249,18 +1249,18 @@ bool llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
case X86::VBROADCASTF128rm:
case X86::VBROADCASTI128rm:
- CASE_AVX512_INS_COMMON(BROADCASTF64X2, Z128, rm)
- CASE_AVX512_INS_COMMON(BROADCASTI64X2, Z128, rm)
+ CASE_AVX512_INS_COMMON(BROADCASTF64X2, Z256, rm)
+ CASE_AVX512_INS_COMMON(BROADCASTI64X2, Z256, rm)
DecodeSubVectorBroadcast(4, 2, ShuffleMask);
DestName = getRegName(MI->getOperand(0).getReg());
break;
- CASE_AVX512_INS_COMMON(BROADCASTF64X2, , rm)
- CASE_AVX512_INS_COMMON(BROADCASTI64X2, , rm)
+ CASE_AVX512_INS_COMMON(BROADCASTF64X2, Z, rm)
+ CASE_AVX512_INS_COMMON(BROADCASTI64X2, Z, rm)
DecodeSubVectorBroadcast(8, 2, ShuffleMask);
DestName = getRegName(MI->getOperand(0).getReg());
break;
- CASE_AVX512_INS_COMMON(BROADCASTF64X4, , rm)
- CASE_AVX512_INS_COMMON(BROADCASTI64X4, , rm)
+ CASE_AVX512_INS_COMMON(BROADCASTF64X4, Z, rm)
+ CASE_AVX512_INS_COMMON(BROADCASTI64X4, Z, rm)
DecodeSubVectorBroadcast(8, 4, ShuffleMask);
DestName = getRegName(MI->getOperand(0).getReg());
break;
@@ -1269,13 +1269,13 @@ bool llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
DecodeSubVectorBroadcast(8, 4, ShuffleMask);
DestName = getRegName(MI->getOperand(0).getReg());
break;
- CASE_AVX512_INS_COMMON(BROADCASTF32X4, , rm)
- CASE_AVX512_INS_COMMON(BROADCASTI32X4, , rm)
+ CASE_AVX512_INS_COMMON(BROADCASTF32X4, Z, rm)
+ CASE_AVX512_INS_COMMON(BROADCASTI32X4, Z, rm)
DecodeSubVectorBroadcast(16, 4, ShuffleMask);
DestName = getRegName(MI->getOperand(0).getReg());
break;
- CASE_AVX512_INS_COMMON(BROADCASTF32X8, , rm)
- CASE_AVX512_INS_COMMON(BROADCASTI32X8, , rm)
+ CASE_AVX512_INS_COMMON(BROADCASTF32X8, Z, rm)
+ CASE_AVX512_INS_COMMON(BROADCASTI32X8, Z, rm)
DecodeSubVectorBroadcast(16, 8, ShuffleMask);
DestName = getRegName(MI->getOperand(0).getReg());
break;
diff --git a/llvm/lib/Target/X86/X86DomainReassignment.cpp b/llvm/lib/Target/X86/X86DomainReassignment.cpp
index 482318311398..9c667f5036dd 100644
--- a/llvm/lib/Target/X86/X86DomainReassignment.cpp
+++ b/llvm/lib/Target/X86/X86DomainReassignment.cpp
@@ -367,7 +367,7 @@ class X86DomainReassignment : public MachineFunctionPass {
const X86InstrInfo *TII = nullptr;
/// All edges that are included in some closure
- BitVector EnclosedEdges{8, false};
+ DenseMap<Register, unsigned> EnclosedEdges;
/// All instructions that are included in some closure.
DenseMap<MachineInstr *, unsigned> EnclosedInstrs;
@@ -399,14 +399,16 @@ private:
void buildClosure(Closure &, Register Reg);
/// Enqueue \p Reg to be considered for addition to the closure.
- void visitRegister(Closure &, Register Reg, RegDomain &Domain,
+ /// Return false if the closure becomes invalid.
+ bool visitRegister(Closure &, Register Reg, RegDomain &Domain,
SmallVectorImpl<unsigned> &Worklist);
/// Reassign the closure to \p Domain.
void reassign(const Closure &C, RegDomain Domain) const;
/// Add \p MI to the closure.
- void encloseInstr(Closure &C, MachineInstr *MI);
+ /// Return false if the closure becomes invalid.
+ bool encloseInstr(Closure &C, MachineInstr *MI);
/// /returns true if it is profitable to reassign the closure to \p Domain.
bool isReassignmentProfitable(const Closure &C, RegDomain Domain) const;
@@ -419,17 +421,23 @@ char X86DomainReassignment::ID = 0;
} // End anonymous namespace.
-void X86DomainReassignment::visitRegister(Closure &C, Register Reg,
+bool X86DomainReassignment::visitRegister(Closure &C, Register Reg,
RegDomain &Domain,
SmallVectorImpl<unsigned> &Worklist) {
if (!Reg.isVirtual())
- return;
+ return true;
- if (EnclosedEdges.test(Register::virtReg2Index(Reg)))
- return;
+ auto I = EnclosedEdges.find(Reg);
+ if (I != EnclosedEdges.end()) {
+ if (I->second != C.getID()) {
+ C.setAllIllegal();
+ return false;
+ }
+ return true;
+ }
if (!MRI->hasOneDef(Reg))
- return;
+ return true;
RegDomain RD = getDomain(MRI->getRegClass(Reg), MRI->getTargetRegisterInfo());
// First edge in closure sets the domain.
@@ -437,19 +445,22 @@ void X86DomainReassignment::visitRegister(Closure &C, Register Reg,
Domain = RD;
if (Domain != RD)
- return;
+ return true;
Worklist.push_back(Reg);
+ return true;
}
-void X86DomainReassignment::encloseInstr(Closure &C, MachineInstr *MI) {
+bool X86DomainReassignment::encloseInstr(Closure &C, MachineInstr *MI) {
auto I = EnclosedInstrs.find(MI);
if (I != EnclosedInstrs.end()) {
- if (I->second != C.getID())
+ if (I->second != C.getID()) {
// Instruction already belongs to another closure, avoid conflicts between
// closure and mark this closure as illegal.
C.setAllIllegal();
- return;
+ return false;
+ }
+ return true;
}
EnclosedInstrs[MI] = C.getID();
@@ -465,6 +476,7 @@ void X86DomainReassignment::encloseInstr(Closure &C, MachineInstr *MI) {
C.setIllegal((RegDomain)i);
}
}
+ return C.hasLegalDstDomain();
}
double X86DomainReassignment::calculateCost(const Closure &C,
@@ -543,10 +555,11 @@ void X86DomainReassignment::buildClosure(Closure &C, Register Reg) {
// Register already in this closure.
if (!C.insertEdge(CurReg))
continue;
- EnclosedEdges.set(Register::virtReg2Index(Reg));
+ EnclosedEdges[Reg] = C.getID();
MachineInstr *DefMI = MRI->getVRegDef(CurReg);
- encloseInstr(C, DefMI);
+ if (!encloseInstr(C, DefMI))
+ return;
// Add register used by the defining MI to the worklist.
// Do not add registers which are used in address calculation, they will be
@@ -565,7 +578,8 @@ void X86DomainReassignment::buildClosure(Closure &C, Register Reg) {
auto &Op = DefMI->getOperand(OpIdx);
if (!Op.isReg() || !Op.isUse())
continue;
- visitRegister(C, Op.getReg(), Domain, Worklist);
+ if (!visitRegister(C, Op.getReg(), Domain, Worklist))
+ return;
}
// Expand closure through register uses.
@@ -574,9 +588,10 @@ void X86DomainReassignment::buildClosure(Closure &C, Register Reg) {
// as this should remain in GPRs.
if (usedAsAddr(UseMI, CurReg, TII)) {
C.setAllIllegal();
- continue;
+ return;
}
- encloseInstr(C, &UseMI);
+ if (!encloseInstr(C, &UseMI))
+ return;
for (auto &DefOp : UseMI.defs()) {
if (!DefOp.isReg())
@@ -585,9 +600,10 @@ void X86DomainReassignment::buildClosure(Closure &C, Register Reg) {
Register DefReg = DefOp.getReg();
if (!DefReg.isVirtual()) {
C.setAllIllegal();
- continue;
+ return;
}
- visitRegister(C, DefReg, Domain, Worklist);
+ if (!visitRegister(C, DefReg, Domain, Worklist))
+ return;
}
}
}
@@ -775,7 +791,6 @@ bool X86DomainReassignment::runOnMachineFunction(MachineFunction &MF) {
bool Changed = false;
EnclosedEdges.clear();
- EnclosedEdges.resize(MRI->getNumVirtRegs());
EnclosedInstrs.clear();
std::vector<Closure> Closures;
@@ -795,7 +810,7 @@ bool X86DomainReassignment::runOnMachineFunction(MachineFunction &MF) {
continue;
// Register already in closure.
- if (EnclosedEdges.test(Idx))
+ if (EnclosedEdges.contains(Reg))
continue;
// Calculate closure starting with Reg.
diff --git a/llvm/lib/Target/X86/X86FixupVectorConstants.cpp b/llvm/lib/Target/X86/X86FixupVectorConstants.cpp
index c9f79e1645f5..68a4a0be3a1d 100644
--- a/llvm/lib/Target/X86/X86FixupVectorConstants.cpp
+++ b/llvm/lib/Target/X86/X86FixupVectorConstants.cpp
@@ -439,8 +439,8 @@ bool X86FixupVectorConstantsPass::processInstruction(MachineFunction &MF,
case X86::VMOVUPSZrm:
return FixupConstant({{X86::VBROADCASTSSZrm, 1, 32, rebuildSplatCst},
{X86::VBROADCASTSDZrm, 1, 64, rebuildSplatCst},
- {X86::VBROADCASTF32X4rm, 1, 128, rebuildSplatCst},
- {X86::VBROADCASTF64X4rm, 1, 256, rebuildSplatCst}},
+ {X86::VBROADCASTF32X4Zrm, 1, 128, rebuildSplatCst},
+ {X86::VBROADCASTF64X4Zrm, 1, 256, rebuildSplatCst}},
512, 1);
/* Integer Loads */
case X86::MOVDQArm:
@@ -572,12 +572,12 @@ bool X86FixupVectorConstantsPass::processInstruction(MachineFunction &MF,
{X86::VPBROADCASTQZrm, 1, 64, rebuildSplatCst},
{X86::VPMOVSXBQZrm, 8, 8, rebuildSExtCst},
{X86::VPMOVZXBQZrm, 8, 8, rebuildZExtCst},
- {X86::VBROADCASTI32X4rm, 1, 128, rebuildSplatCst},
+ {X86::VBROADCASTI32X4Zrm, 1, 128, rebuildSplatCst},
{X86::VPMOVSXBDZrm, 16, 8, rebuildSExtCst},
{X86::VPMOVZXBDZrm, 16, 8, rebuildZExtCst},
{X86::VPMOVSXWQZrm, 8, 16, rebuildSExtCst},
{X86::VPMOVZXWQZrm, 8, 16, rebuildZExtCst},
- {X86::VBROADCASTI64X4rm, 1, 256, rebuildSplatCst},
+ {X86::VBROADCASTI64X4Zrm, 1, 256, rebuildSplatCst},
{HasBWI ? X86::VPMOVSXBWZrm : 0, 32, 8, rebuildSExtCst},
{HasBWI ? X86::VPMOVZXBWZrm : 0, 32, 8, rebuildZExtCst},
{X86::VPMOVSXWDZrm, 16, 16, rebuildSExtCst},
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index f6d42ade6008..68563f556ecf 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -12348,7 +12348,7 @@ static SDValue lowerShuffleAsElementInsertion(
}
V2 = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, ExtVT, V2S);
} else if (Mask[V2Index] != (int)Mask.size() || EltVT == MVT::i8 ||
- EltVT == MVT::i16) {
+ (EltVT == MVT::i16 && !Subtarget.hasAVX10_2())) {
// Either not inserting from the low element of the input or the input
// element size is too small to use VZEXT_MOVL to clear the high bits.
return SDValue();
@@ -26159,22 +26159,43 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
if (CC == ISD::SETLT || CC == ISD::SETLE)
std::swap(LHS, RHS);
- SDValue Comi = DAG.getNode(IntrData->Opc0, dl, MVT::i32, LHS, RHS);
+ // For AVX10.2, Support EQ and NE.
+ bool HasAVX10_2_COMX =
+ Subtarget.hasAVX10_2() && (CC == ISD::SETEQ || CC == ISD::SETNE);
+
+ // AVX10.2 COMPARE supports only v2f64, v4f32 or v8f16.
+ // For BF type we need to fall back.
+ bool HasAVX10_2_COMX_Ty = (LHS.getSimpleValueType() != MVT::v8bf16);
+
+ auto ComiOpCode = IntrData->Opc0;
+ auto isUnordered = (ComiOpCode == X86ISD::UCOMI);
+
+ if (HasAVX10_2_COMX && HasAVX10_2_COMX_Ty)
+ ComiOpCode = isUnordered ? X86ISD::UCOMX : X86ISD::COMX;
+
+ SDValue Comi = DAG.getNode(ComiOpCode, dl, MVT::i32, LHS, RHS);
+
SDValue SetCC;
switch (CC) {
- case ISD::SETEQ: { // (ZF = 0 and PF = 0)
+ case ISD::SETEQ: {
SetCC = getSETCC(X86::COND_E, Comi, dl, DAG);
+ if (HasAVX10_2_COMX & HasAVX10_2_COMX_Ty) // ZF == 1
+ break;
+ // (ZF = 1 and PF = 0)
SDValue SetNP = getSETCC(X86::COND_NP, Comi, dl, DAG);
SetCC = DAG.getNode(ISD::AND, dl, MVT::i8, SetCC, SetNP);
break;
}
- case ISD::SETNE: { // (ZF = 1 or PF = 1)
+ case ISD::SETNE: {
SetCC = getSETCC(X86::COND_NE, Comi, dl, DAG);
+ if (HasAVX10_2_COMX & HasAVX10_2_COMX_Ty) // ZF == 0
+ break;
+ // (ZF = 0 or PF = 1)
SDValue SetP = getSETCC(X86::COND_P, Comi, dl, DAG);
SetCC = DAG.getNode(ISD::OR, dl, MVT::i8, SetCC, SetP);
break;
}
- case ISD::SETGT: // (CF = 0 and ZF = 0)
+ case ISD::SETGT: // (CF = 0 and ZF = 0)
case ISD::SETLT: { // Condition opposite to GT. Operands swapped above.
SetCC = getSETCC(X86::COND_A, Comi, dl, DAG);
break;
@@ -34083,6 +34104,8 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
NODE_NAME_CASE(STRICT_FCMPS)
NODE_NAME_CASE(COMI)
NODE_NAME_CASE(UCOMI)
+ NODE_NAME_CASE(COMX)
+ NODE_NAME_CASE(UCOMX)
NODE_NAME_CASE(CMPM)
NODE_NAME_CASE(CMPMM)
NODE_NAME_CASE(STRICT_CMPM)
@@ -57768,6 +57791,19 @@ static SDValue combineEXTRACT_SUBVECTOR(SDNode *N, SelectionDAG &DAG,
DAG.getTargetConstant(M, DL, MVT::i8));
}
break;
+ case X86ISD::VPERMV3:
+ if (IdxVal != 0) {
+ SDValue Src0 = InVec.getOperand(0);
+ SDValue Mask = InVec.getOperand(1);
+ SDValue Src1 = InVec.getOperand(2);
+ Mask = extractSubVector(Mask, IdxVal, DAG, DL, SizeInBits);
+ Mask = widenSubVector(Mask, /*ZeroNewElements=*/false, Subtarget, DAG,
+ DL, InSizeInBits);
+ SDValue Shuffle =
+ DAG.getNode(InOpcode, DL, InVecVT, Src0, Mask, Src1);
+ return extractSubVector(Shuffle, 0, DAG, DL, SizeInBits);
+ }
+ break;
}
}
}
diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h
index 5fb588675680..ae7da8efb5f9 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.h
+++ b/llvm/lib/Target/X86/X86ISelLowering.h
@@ -87,6 +87,10 @@ namespace llvm {
COMI,
UCOMI,
+ // X86 compare with Intrinsics similar to COMI.
+ COMX,
+ UCOMX,
+
/// X86 bit-test instructions.
BT,
diff --git a/llvm/lib/Target/X86/X86InstrAVX10.td b/llvm/lib/Target/X86/X86InstrAVX10.td
index ada2bbaffd66..625f2e01d472 100644
--- a/llvm/lib/Target/X86/X86InstrAVX10.td
+++ b/llvm/lib/Target/X86/X86InstrAVX10.td
@@ -1537,3 +1537,113 @@ defm VFNMADD132NEPBF16 : avx10_fma3p_132_bf16<0x9C, "vfnmadd132nepbf16", X86any_
defm VFNMSUB132NEPBF16 : avx10_fma3p_132_bf16<0x9E, "vfnmsub132nepbf16", X86any_Fnmsub,
X86Fnmsub, SchedWriteFMA>;
}
+
+//-------------------------------------------------
+// AVX10 COMEF instructions
+//-------------------------------------------------
+multiclass avx10_com_ef_int<bits<8> Opc, X86VectorVTInfo _, SDNode OpNode,
+ string OpcodeStr,
+ Domain d,
+ X86FoldableSchedWrite sched = WriteFComX> {
+ let ExeDomain = d, mayRaiseFPException = 1 in {
+ def rr_Int : AVX512<Opc, MRMSrcReg, (outs), (ins _.RC:$src1, _.RC:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"),
+ [(set EFLAGS, (OpNode (_.VT _.RC:$src1), _.RC:$src2))]>,
+ EVEX, EVEX_V128, Sched<[sched]>, SIMD_EXC;
+ let mayLoad = 1 in {
+ def rm_Int : AVX512<Opc, MRMSrcMem, (outs), (ins _.RC:$src1, _.ScalarMemOp:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"),
+ [(set EFLAGS, (OpNode (_.VT _.RC:$src1), (_.LdFrag addr:$src2)))]>,
+ EVEX, EVEX_V128, Sched<[sched]>, SIMD_EXC;
+ }
+ def rrb_Int : AVX512<Opc, MRMSrcReg, (outs), (ins _.RC:$src1, _.RC:$src2),
+ !strconcat(OpcodeStr, "\t{{sae}, $src2, $src1|$src1, $src2, {sae}}"),
+ []>,
+ EVEX, EVEX_V128, EVEX_B, Sched<[sched]>, SIMD_EXC;
+ }
+}
+
+let Defs = [EFLAGS], Uses = [MXCSR], Predicates = [HasAVX10_2] in {
+ defm VCOMXSDZ : avx10_com_ef_int<0x2f, v2f64x_info, X86comi512,
+ "vcomxsd", SSEPackedDouble>,
+ TB, XS, VEX_LIG, REX_W, EVEX_CD8<64, CD8VT1>;
+ defm VCOMXSHZ : avx10_com_ef_int<0x2f, v8f16x_info, X86comi512,
+ "vcomxsh", SSEPackedSingle>,
+ T_MAP5, XD, EVEX_CD8<16, CD8VT1>;
+ defm VCOMXSSZ : avx10_com_ef_int<0x2f, v4f32x_info, X86comi512,
+ "vcomxss", SSEPackedSingle>,
+ TB, XD, VEX_LIG, EVEX_CD8<32, CD8VT1>;
+ defm VUCOMXSDZ : avx10_com_ef_int<0x2e, v2f64x_info, X86ucomi512,
+ "vucomxsd", SSEPackedDouble>,
+ TB, XS, VEX_LIG, REX_W, EVEX_CD8<64, CD8VT1>;
+ defm VUCOMXSHZ : avx10_com_ef_int<0x2e, v8f16x_info, X86ucomi512,
+ "vucomxsh", SSEPackedSingle>,
+ T_MAP5, XD, EVEX_CD8<16, CD8VT1>;
+ defm VUCOMXSSZ : avx10_com_ef_int<0x2e, v4f32x_info, X86ucomi512,
+ "vucomxss", SSEPackedSingle>,
+ TB, XD, VEX_LIG, EVEX_CD8<32, CD8VT1>;
+}
+
+//-------------------------------------------------
+// AVX10 MOVZXC (COPY) instructions
+//-------------------------------------------------
+let Predicates = [HasAVX10_2] in {
+ def VMOVZPDILo2PDIZrr : AVX512XSI<0x7E, MRMSrcReg, (outs VR128X:$dst),
+ (ins VR128X:$src),
+ "vmovd\t{$src, $dst|$dst, $src}",
+ [(set VR128X:$dst, (v4i32 (X86vzmovl
+ (v4i32 VR128X:$src))))]>, EVEX,
+ Sched<[WriteVecMoveFromGpr]>;
+
+let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayLoad = 1 in
+ def VMOVZPDILo2PDIZrm : AVX512XSI<0x7E, MRMSrcMem, (outs VR128X:$dst),
+ (ins i32mem:$src),
+ "vmovd\t{$src, $dst|$dst, $src}", []>, EVEX,
+ EVEX_CD8<32, CD8VT1>,
+ Sched<[WriteVecLoad]>;
+
+let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayStore = 1 in
+ def VMOVZPDILo2PDIZmr : AVX512PDI<0xD6, MRMDestMem, (outs),
+ (ins i32mem:$dst, VR128X:$src),
+ "vmovd\t{$src, $dst|$dst, $src}", []>, EVEX,
+ EVEX_CD8<32, CD8VT1>,
+ Sched<[WriteVecStore]>;
+
+let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in
+ def VMOVZPDILo2PDIZrr2 : AVX512PDI<0xD6, MRMSrcReg, (outs VR128X:$dst),
+ (ins VR128X:$src),
+ "vmovd\t{$src, $dst|$dst, $src}", []>, EVEX,
+ Sched<[WriteVecMoveFromGpr]>;
+ def : InstAlias<"vmovd.s\t{$src, $dst|$dst, $src}",
+ (VMOVZPDILo2PDIZrr2 VR128X:$dst, VR128X:$src), 0>;
+
+def VMOVZPWILo2PWIZrr : AVX512XSI<0x6E, MRMSrcReg, (outs VR128X:$dst),
+ (ins VR128X:$src),
+ "vmovw\t{$src, $dst|$dst, $src}",
+ [(set VR128X:$dst, (v8i16 (X86vzmovl
+ (v8i16 VR128X:$src))))]>, EVEX, T_MAP5,
+ Sched<[WriteVecMoveFromGpr]>;
+
+let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayLoad = 1 in
+ def VMOVZPWILo2PWIZrm : AVX512XSI<0x6E, MRMSrcMem, (outs VR128X:$dst),
+ (ins i16mem:$src),
+ "vmovw\t{$src, $dst|$dst, $src}", []>, EVEX,
+ EVEX_CD8<16, CD8VT1>, T_MAP5,
+ Sched<[WriteVecLoad]>;
+
+let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayStore = 1 in
+ def VMOVZPWILo2PWIZmr : AVX512XSI<0x7E, MRMDestMem, (outs),
+ (ins i32mem:$dst, VR128X:$src),
+ "vmovw\t{$src, $dst|$dst, $src}", []>, EVEX,
+ EVEX_CD8<16, CD8VT1>, T_MAP5,
+ Sched<[WriteVecStore]>;
+
+let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in
+ def VMOVZPWILo2PWIZrr2 : AVX512XSI<0x7E, MRMSrcReg, (outs VR128X:$dst),
+ (ins VR128X:$src),
+ "vmovw\t{$src, $dst|$dst, $src}",
+ []>, EVEX, T_MAP5,
+ Sched<[WriteVecMoveFromGpr]>;
+ def : InstAlias<"vmovw.s\t{$src, $dst|$dst, $src}",
+ (VMOVZPWILo2PWIZrr2 VR128X:$dst, VR128X:$src), 0>;
+}
diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td
index 9ed59803c1f9..928abac46da8 100644
--- a/llvm/lib/Target/X86/X86InstrAVX512.td
+++ b/llvm/lib/Target/X86/X86InstrAVX512.td
@@ -1337,84 +1337,84 @@ let Predicates = [HasVLX, HasBWI] in {
// AVX-512 BROADCAST SUBVECTORS
//
-defm VBROADCASTI32X4 : avx512_subvec_broadcast_rm<0x5a, "vbroadcasti32x4",
- X86SubVBroadcastld128, v16i32_info, v4i32x_info>,
- EVEX_V512, EVEX_CD8<32, CD8VT4>;
-defm VBROADCASTF32X4 : avx512_subvec_broadcast_rm<0x1a, "vbroadcastf32x4",
- X86SubVBroadcastld128, v16f32_info, v4f32x_info>,
- EVEX_V512, EVEX_CD8<32, CD8VT4>;
-defm VBROADCASTI64X4 : avx512_subvec_broadcast_rm<0x5b, "vbroadcasti64x4",
- X86SubVBroadcastld256, v8i64_info, v4i64x_info>, REX_W,
- EVEX_V512, EVEX_CD8<64, CD8VT4>;
-defm VBROADCASTF64X4 : avx512_subvec_broadcast_rm<0x1b, "vbroadcastf64x4",
- X86SubVBroadcastld256, v8f64_info, v4f64x_info>, REX_W,
- EVEX_V512, EVEX_CD8<64, CD8VT4>;
+defm VBROADCASTI32X4Z : avx512_subvec_broadcast_rm<0x5a, "vbroadcasti32x4",
+ X86SubVBroadcastld128, v16i32_info, v4i32x_info>,
+ EVEX_V512, EVEX_CD8<32, CD8VT4>;
+defm VBROADCASTF32X4Z : avx512_subvec_broadcast_rm<0x1a, "vbroadcastf32x4",
+ X86SubVBroadcastld128, v16f32_info, v4f32x_info>,
+ EVEX_V512, EVEX_CD8<32, CD8VT4>;
+defm VBROADCASTI64X4Z : avx512_subvec_broadcast_rm<0x5b, "vbroadcasti64x4",
+ X86SubVBroadcastld256, v8i64_info, v4i64x_info>, REX_W,
+ EVEX_V512, EVEX_CD8<64, CD8VT4>;
+defm VBROADCASTF64X4Z : avx512_subvec_broadcast_rm<0x1b, "vbroadcastf64x4",
+ X86SubVBroadcastld256, v8f64_info, v4f64x_info>, REX_W,
+ EVEX_V512, EVEX_CD8<64, CD8VT4>;
let Predicates = [HasAVX512] in {
def : Pat<(v8f64 (X86SubVBroadcastld256 addr:$src)),
- (VBROADCASTF64X4rm addr:$src)>;
+ (VBROADCASTF64X4Zrm addr:$src)>;
def : Pat<(v16f32 (X86SubVBroadcastld256 addr:$src)),
- (VBROADCASTF64X4rm addr:$src)>;
+ (VBROADCASTF64X4Zrm addr:$src)>;
def : Pat<(v32f16 (X86SubVBroadcastld256 addr:$src)),
- (VBROADCASTF64X4rm addr:$src)>;
+ (VBROADCASTF64X4Zrm addr:$src)>;
def : Pat<(v8i64 (X86SubVBroadcastld256 addr:$src)),
- (VBROADCASTI64X4rm addr:$src)>;
+ (VBROADCASTI64X4Zrm addr:$src)>;
def : Pat<(v16i32 (X86SubVBroadcastld256 addr:$src)),
- (VBROADCASTI64X4rm addr:$src)>;
+ (VBROADCASTI64X4Zrm addr:$src)>;
def : Pat<(v32i16 (X86SubVBroadcastld256 addr:$src)),
- (VBROADCASTI64X4rm addr:$src)>;
+ (VBROADCASTI64X4Zrm addr:$src)>;
def : Pat<(v64i8 (X86SubVBroadcastld256 addr:$src)),
- (VBROADCASTI64X4rm addr:$src)>;
+ (VBROADCASTI64X4Zrm addr:$src)>;
def : Pat<(v8f64 (X86SubVBroadcastld128 addr:$src)),
- (VBROADCASTF32X4rm addr:$src)>;
+ (VBROADCASTF32X4Zrm addr:$src)>;
def : Pat<(v16f32 (X86SubVBroadcastld128 addr:$src)),
- (VBROADCASTF32X4rm addr:$src)>;
+ (VBROADCASTF32X4Zrm addr:$src)>;
def : Pat<(v32f16 (X86SubVBroadcastld128 addr:$src)),
- (VBROADCASTF32X4rm addr:$src)>;
+ (VBROADCASTF32X4Zrm addr:$src)>;
def : Pat<(v8i64 (X86SubVBroadcastld128 addr:$src)),
- (VBROADCASTI32X4rm addr:$src)>;
+ (VBROADCASTI32X4Zrm addr:$src)>;
def : Pat<(v16i32 (X86SubVBroadcastld128 addr:$src)),
- (VBROADCASTI32X4rm addr:$src)>;
+ (VBROADCASTI32X4Zrm addr:$src)>;
def : Pat<(v32i16 (X86SubVBroadcastld128 addr:$src)),
- (VBROADCASTI32X4rm addr:$src)>;
+ (VBROADCASTI32X4Zrm addr:$src)>;
def : Pat<(v64i8 (X86SubVBroadcastld128 addr:$src)),
- (VBROADCASTI32X4rm addr:$src)>;
+ (VBROADCASTI32X4Zrm addr:$src)>;
// Patterns for selects of bitcasted operations.
def : Pat<(vselect_mask VK16WM:$mask,
(bc_v16f32 (v8f64 (X86SubVBroadcastld128 addr:$src))),
(v16f32 immAllZerosV)),
- (VBROADCASTF32X4rmkz VK16WM:$mask, addr:$src)>;
+ (VBROADCASTF32X4Zrmkz VK16WM:$mask, addr:$src)>;
def : Pat<(vselect_mask VK16WM:$mask,
(bc_v16f32 (v8f64 (X86SubVBroadcastld128 addr:$src))),
VR512:$src0),
- (VBROADCASTF32X4rmk VR512:$src0, VK16WM:$mask, addr:$src)>;
+ (VBROADCASTF32X4Zrmk VR512:$src0, VK16WM:$mask, addr:$src)>;
def : Pat<(vselect_mask VK16WM:$mask,
(bc_v16i32 (v8i64 (X86SubVBroadcastld128 addr:$src))),
(v16i32 immAllZerosV)),
- (VBROADCASTI32X4rmkz VK16WM:$mask, addr:$src)>;
+ (VBROADCASTI32X4Zrmkz VK16WM:$mask, addr:$src)>;
def : Pat<(vselect_mask VK16WM:$mask,
(bc_v16i32 (v8i64 (X86SubVBroadcastld128 addr:$src))),
VR512:$src0),
- (VBROADCASTI32X4rmk VR512:$src0, VK16WM:$mask, addr:$src)>;
+ (VBROADCASTI32X4Zrmk VR512:$src0, VK16WM:$mask, addr:$src)>;
def : Pat<(vselect_mask VK8WM:$mask,
(bc_v8f64 (v16f32 (X86SubVBroadcastld256 addr:$src))),
(v8f64 immAllZerosV)),
- (VBROADCASTF64X4rmkz VK8WM:$mask, addr:$src)>;
+ (VBROADCASTF64X4Zrmkz VK8WM:$mask, addr:$src)>;
def : Pat<(vselect_mask VK8WM:$mask,
(bc_v8f64 (v16f32 (X86SubVBroadcastld256 addr:$src))),
VR512:$src0),
- (VBROADCASTF64X4rmk VR512:$src0, VK8WM:$mask, addr:$src)>;
+ (VBROADCASTF64X4Zrmk VR512:$src0, VK8WM:$mask, addr:$src)>;
def : Pat<(vselect_mask VK8WM:$mask,
(bc_v8i64 (v16i32 (X86SubVBroadcastld256 addr:$src))),
(v8i64 immAllZerosV)),
- (VBROADCASTI64X4rmkz VK8WM:$mask, addr:$src)>;
+ (VBROADCASTI64X4Zrmkz VK8WM:$mask, addr:$src)>;
def : Pat<(vselect_mask VK8WM:$mask,
(bc_v8i64 (v16i32 (X86SubVBroadcastld256 addr:$src))),
VR512:$src0),
- (VBROADCASTI64X4rmk VR512:$src0, VK8WM:$mask, addr:$src)>;
+ (VBROADCASTI64X4Zrmk VR512:$src0, VK8WM:$mask, addr:$src)>;
}
let Predicates = [HasVLX] in {
@@ -1461,9 +1461,9 @@ def : Pat<(vselect_mask VK8WM:$mask,
let Predicates = [HasBF16] in {
def : Pat<(v32bf16 (X86SubVBroadcastld256 addr:$src)),
- (VBROADCASTF64X4rm addr:$src)>;
+ (VBROADCASTF64X4Zrm addr:$src)>;
def : Pat<(v32bf16 (X86SubVBroadcastld128 addr:$src)),
- (VBROADCASTF32X4rm addr:$src)>;
+ (VBROADCASTF32X4Zrm addr:$src)>;
}
let Predicates = [HasBF16, HasVLX] in
@@ -1471,10 +1471,10 @@ let Predicates = [HasBF16, HasVLX] in
(VBROADCASTF32X4Z256rm addr:$src)>;
let Predicates = [HasVLX, HasDQI] in {
-defm VBROADCASTI64X2Z128 : avx512_subvec_broadcast_rm_dq<0x5a, "vbroadcasti64x2",
+defm VBROADCASTI64X2Z256 : avx512_subvec_broadcast_rm_dq<0x5a, "vbroadcasti64x2",
X86SubVBroadcastld128, v4i64x_info, v2i64x_info>,
EVEX_V256, EVEX_CD8<64, CD8VT2>, REX_W;
-defm VBROADCASTF64X2Z128 : avx512_subvec_broadcast_rm_dq<0x1a, "vbroadcastf64x2",
+defm VBROADCASTF64X2Z256 : avx512_subvec_broadcast_rm_dq<0x1a, "vbroadcastf64x2",
X86SubVBroadcastld128, v4f64x_info, v2f64x_info>,
EVEX_V256, EVEX_CD8<64, CD8VT2>, REX_W;
@@ -1482,69 +1482,69 @@ defm VBROADCASTF64X2Z128 : avx512_subvec_broadcast_rm_dq<0x1a, "vbroadcastf64x2"
def : Pat<(vselect_mask VK4WM:$mask,
(bc_v4f64 (v8f32 (X86SubVBroadcastld128 addr:$src))),
(v4f64 immAllZerosV)),
- (VBROADCASTF64X2Z128rmkz VK4WM:$mask, addr:$src)>;
+ (VBROADCASTF64X2Z256rmkz VK4WM:$mask, addr:$src)>;
def : Pat<(vselect_mask VK4WM:$mask,
(bc_v4f64 (v8f32 (X86SubVBroadcastld128 addr:$src))),
VR256X:$src0),
- (VBROADCASTF64X2Z128rmk VR256X:$src0, VK4WM:$mask, addr:$src)>;
+ (VBROADCASTF64X2Z256rmk VR256X:$src0, VK4WM:$mask, addr:$src)>;
def : Pat<(vselect_mask VK4WM:$mask,
(bc_v4i64 (v8i32 (X86SubVBroadcastld128 addr:$src))),
(v4i64 immAllZerosV)),
- (VBROADCASTI64X2Z128rmkz VK4WM:$mask, addr:$src)>;
+ (VBROADCASTI64X2Z256rmkz VK4WM:$mask, addr:$src)>;
def : Pat<(vselect_mask VK4WM:$mask,
(bc_v4i64 (v8i32 (X86SubVBroadcastld128 addr:$src))),
VR256X:$src0),
- (VBROADCASTI64X2Z128rmk VR256X:$src0, VK4WM:$mask, addr:$src)>;
+ (VBROADCASTI64X2Z256rmk VR256X:$src0, VK4WM:$mask, addr:$src)>;
}
let Predicates = [HasDQI] in {
-defm VBROADCASTI64X2 : avx512_subvec_broadcast_rm_dq<0x5a, "vbroadcasti64x2",
- X86SubVBroadcastld128, v8i64_info, v2i64x_info>, REX_W,
- EVEX_V512, EVEX_CD8<64, CD8VT2>;
-defm VBROADCASTI32X8 : avx512_subvec_broadcast_rm_dq<0x5b, "vbroadcasti32x8",
- X86SubVBroadcastld256, v16i32_info, v8i32x_info>,
- EVEX_V512, EVEX_CD8<32, CD8VT8>;
-defm VBROADCASTF64X2 : avx512_subvec_broadcast_rm_dq<0x1a, "vbroadcastf64x2",
- X86SubVBroadcastld128, v8f64_info, v2f64x_info>, REX_W,
- EVEX_V512, EVEX_CD8<64, CD8VT2>;
-defm VBROADCASTF32X8 : avx512_subvec_broadcast_rm_dq<0x1b, "vbroadcastf32x8",
- X86SubVBroadcastld256, v16f32_info, v8f32x_info>,
- EVEX_V512, EVEX_CD8<32, CD8VT8>;
+defm VBROADCASTI64X2Z : avx512_subvec_broadcast_rm_dq<0x5a, "vbroadcasti64x2",
+ X86SubVBroadcastld128, v8i64_info, v2i64x_info>, REX_W,
+ EVEX_V512, EVEX_CD8<64, CD8VT2>;
+defm VBROADCASTI32X8Z : avx512_subvec_broadcast_rm_dq<0x5b, "vbroadcasti32x8",
+ X86SubVBroadcastld256, v16i32_info, v8i32x_info>,
+ EVEX_V512, EVEX_CD8<32, CD8VT8>;
+defm VBROADCASTF64X2Z : avx512_subvec_broadcast_rm_dq<0x1a, "vbroadcastf64x2",
+ X86SubVBroadcastld128, v8f64_info, v2f64x_info>, REX_W,
+ EVEX_V512, EVEX_CD8<64, CD8VT2>;
+defm VBROADCASTF32X8Z : avx512_subvec_broadcast_rm_dq<0x1b, "vbroadcastf32x8",
+ X86SubVBroadcastld256, v16f32_info, v8f32x_info>,
+ EVEX_V512, EVEX_CD8<32, CD8VT8>;
// Patterns for selects of bitcasted operations.
def : Pat<(vselect_mask VK16WM:$mask,
(bc_v16f32 (v8f64 (X86SubVBroadcastld256 addr:$src))),
(v16f32 immAllZerosV)),
- (VBROADCASTF32X8rmkz VK16WM:$mask, addr:$src)>;
+ (VBROADCASTF32X8Zrmkz VK16WM:$mask, addr:$src)>;
def : Pat<(vselect_mask VK16WM:$mask,
(bc_v16f32 (v8f64 (X86SubVBroadcastld256 addr:$src))),
VR512:$src0),
- (VBROADCASTF32X8rmk VR512:$src0, VK16WM:$mask, addr:$src)>;
+ (VBROADCASTF32X8Zrmk VR512:$src0, VK16WM:$mask, addr:$src)>;
def : Pat<(vselect_mask VK16WM:$mask,
(bc_v16i32 (v8i64 (X86SubVBroadcastld256 addr:$src))),
(v16i32 immAllZerosV)),
- (VBROADCASTI32X8rmkz VK16WM:$mask, addr:$src)>;
+ (VBROADCASTI32X8Zrmkz VK16WM:$mask, addr:$src)>;
def : Pat<(vselect_mask VK16WM:$mask,
(bc_v16i32 (v8i64 (X86SubVBroadcastld256 addr:$src))),
VR512:$src0),
- (VBROADCASTI32X8rmk VR512:$src0, VK16WM:$mask, addr:$src)>;
+ (VBROADCASTI32X8Zrmk VR512:$src0, VK16WM:$mask, addr:$src)>;
def : Pat<(vselect_mask VK8WM:$mask,
(bc_v8f64 (v16f32 (X86SubVBroadcastld128 addr:$src))),
(v8f64 immAllZerosV)),
- (VBROADCASTF64X2rmkz VK8WM:$mask, addr:$src)>;
+ (VBROADCASTF64X2Zrmkz VK8WM:$mask, addr:$src)>;
def : Pat<(vselect_mask VK8WM:$mask,
(bc_v8f64 (v16f32 (X86SubVBroadcastld128 addr:$src))),
VR512:$src0),
- (VBROADCASTF64X2rmk VR512:$src0, VK8WM:$mask, addr:$src)>;
+ (VBROADCASTF64X2Zrmk VR512:$src0, VK8WM:$mask, addr:$src)>;
def : Pat<(vselect_mask VK8WM:$mask,
(bc_v8i64 (v16i32 (X86SubVBroadcastld128 addr:$src))),
(v8i64 immAllZerosV)),
- (VBROADCASTI64X2rmkz VK8WM:$mask, addr:$src)>;
+ (VBROADCASTI64X2Zrmkz VK8WM:$mask, addr:$src)>;
def : Pat<(vselect_mask VK8WM:$mask,
(bc_v8i64 (v16i32 (X86SubVBroadcastld128 addr:$src))),
VR512:$src0),
- (VBROADCASTI64X2rmk VR512:$src0, VK8WM:$mask, addr:$src)>;
+ (VBROADCASTI64X2Zrmk VR512:$src0, VK8WM:$mask, addr:$src)>;
}
multiclass avx512_common_broadcast_32x2<bits<8> opc, string OpcodeStr,
diff --git a/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td b/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td
index af39b1ab82d6..ed1bff05b731 100644
--- a/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td
+++ b/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td
@@ -61,7 +61,8 @@ def X86hadd : SDNode<"X86ISD::HADD", SDTIntBinOp>;
def X86hsub : SDNode<"X86ISD::HSUB", SDTIntBinOp>;
def X86comi : SDNode<"X86ISD::COMI", SDTX86FCmp>;
def X86ucomi : SDNode<"X86ISD::UCOMI", SDTX86FCmp>;
-
+def X86comi512 : SDNode<"X86ISD::COMX", SDTX86FCmp>;
+def X86ucomi512 : SDNode<"X86ISD::UCOMX", SDTX86FCmp>;
def SDTX86Cmps : SDTypeProfile<1, 3, [SDTCisFP<0>, SDTCisSameAs<0, 1>,
SDTCisSameAs<1, 2>, SDTCisVT<3, i8>]>;
def X86cmps : SDNode<"X86ISD::FSETCC", SDTX86Cmps>;
diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp
index 57a894b09e04..38ea1f35be2b 100644
--- a/llvm/lib/Target/X86/X86InstrInfo.cpp
+++ b/llvm/lib/Target/X86/X86InstrInfo.cpp
@@ -6246,16 +6246,16 @@ bool X86InstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
}
case X86::VMOVAPSZ128rm_NOVLX:
return expandNOVLXLoad(MIB, &getRegisterInfo(), get(X86::VMOVAPSrm),
- get(X86::VBROADCASTF32X4rm), X86::sub_xmm);
+ get(X86::VBROADCASTF32X4Zrm), X86::sub_xmm);
case X86::VMOVUPSZ128rm_NOVLX:
return expandNOVLXLoad(MIB, &getRegisterInfo(), get(X86::VMOVUPSrm),
- get(X86::VBROADCASTF32X4rm), X86::sub_xmm);
+ get(X86::VBROADCASTF32X4Zrm), X86::sub_xmm);
case X86::VMOVAPSZ256rm_NOVLX:
return expandNOVLXLoad(MIB, &getRegisterInfo(), get(X86::VMOVAPSYrm),
- get(X86::VBROADCASTF64X4rm), X86::sub_ymm);
+ get(X86::VBROADCASTF64X4Zrm), X86::sub_ymm);
case X86::VMOVUPSZ256rm_NOVLX:
return expandNOVLXLoad(MIB, &getRegisterInfo(), get(X86::VMOVUPSYrm),
- get(X86::VBROADCASTF64X4rm), X86::sub_ymm);
+ get(X86::VBROADCASTF64X4Zrm), X86::sub_ymm);
case X86::VMOVAPSZ128mr_NOVLX:
return expandNOVLXStore(MIB, &getRegisterInfo(), get(X86::VMOVAPSmr),
get(X86::VEXTRACTF32x4Zmri), X86::sub_xmm);
diff --git a/llvm/lib/Target/X86/X86MCInstLower.cpp b/llvm/lib/Target/X86/X86MCInstLower.cpp
index 77ddd2366e62..55c237e2df2d 100644
--- a/llvm/lib/Target/X86/X86MCInstLower.cpp
+++ b/llvm/lib/Target/X86/X86MCInstLower.cpp
@@ -2051,21 +2051,21 @@ static void addConstantComments(const MachineInstr *MI,
case X86::VBROADCASTF128rm:
case X86::VBROADCASTI128rm:
MASK_AVX512_CASE(X86::VBROADCASTF32X4Z256rm)
- MASK_AVX512_CASE(X86::VBROADCASTF64X2Z128rm)
+ MASK_AVX512_CASE(X86::VBROADCASTF64X2Z256rm)
MASK_AVX512_CASE(X86::VBROADCASTI32X4Z256rm)
- MASK_AVX512_CASE(X86::VBROADCASTI64X2Z128rm)
+ MASK_AVX512_CASE(X86::VBROADCASTI64X2Z256rm)
printBroadcast(MI, OutStreamer, 2, 128);
break;
- MASK_AVX512_CASE(X86::VBROADCASTF32X4rm)
- MASK_AVX512_CASE(X86::VBROADCASTF64X2rm)
- MASK_AVX512_CASE(X86::VBROADCASTI32X4rm)
- MASK_AVX512_CASE(X86::VBROADCASTI64X2rm)
+ MASK_AVX512_CASE(X86::VBROADCASTF32X4Zrm)
+ MASK_AVX512_CASE(X86::VBROADCASTF64X2Zrm)
+ MASK_AVX512_CASE(X86::VBROADCASTI32X4Zrm)
+ MASK_AVX512_CASE(X86::VBROADCASTI64X2Zrm)
printBroadcast(MI, OutStreamer, 4, 128);
break;
- MASK_AVX512_CASE(X86::VBROADCASTF32X8rm)
- MASK_AVX512_CASE(X86::VBROADCASTF64X4rm)
- MASK_AVX512_CASE(X86::VBROADCASTI32X8rm)
- MASK_AVX512_CASE(X86::VBROADCASTI64X4rm)
+ MASK_AVX512_CASE(X86::VBROADCASTF32X8Zrm)
+ MASK_AVX512_CASE(X86::VBROADCASTF64X4Zrm)
+ MASK_AVX512_CASE(X86::VBROADCASTI32X8Zrm)
+ MASK_AVX512_CASE(X86::VBROADCASTI64X4Zrm)
printBroadcast(MI, OutStreamer, 2, 256);
break;
diff --git a/llvm/lib/Target/X86/X86SchedIceLake.td b/llvm/lib/Target/X86/X86SchedIceLake.td
index 72fbcc559810..a5051d932d4e 100644
--- a/llvm/lib/Target/X86/X86SchedIceLake.td
+++ b/llvm/lib/Target/X86/X86SchedIceLake.td
@@ -1576,19 +1576,19 @@ def: InstRW<[ICXWriteResGroup121, ReadAfterVecYLd],
"VBROADCASTF32X2Z256rm(b?)",
"VBROADCASTF32X2Zrm(b?)",
"VBROADCASTF32X4Z256rm(b?)",
- "VBROADCASTF32X4rm(b?)",
- "VBROADCASTF32X8rm(b?)",
- "VBROADCASTF64X2Z128rm(b?)",
- "VBROADCASTF64X2rm(b?)",
- "VBROADCASTF64X4rm(b?)",
+ "VBROADCASTF32X4Zrm(b?)",
+ "VBROADCASTF32X8Zrm(b?)",
+ "VBROADCASTF64X2Z256rm(b?)",
+ "VBROADCASTF64X2Zrm(b?)",
+ "VBROADCASTF64X4Zrm(b?)",
"VBROADCASTI32X2Z256rm(b?)",
"VBROADCASTI32X2Zrm(b?)",
"VBROADCASTI32X4Z256rm(b?)",
- "VBROADCASTI32X4rm(b?)",
- "VBROADCASTI32X8rm(b?)",
- "VBROADCASTI64X2Z128rm(b?)",
- "VBROADCASTI64X2rm(b?)",
- "VBROADCASTI64X4rm(b?)",
+ "VBROADCASTI32X4Zrm(b?)",
+ "VBROADCASTI32X8Zrm(b?)",
+ "VBROADCASTI64X2Z256rm(b?)",
+ "VBROADCASTI64X2Zrm(b?)",
+ "VBROADCASTI64X4Zrm(b?)",
"VBROADCASTSD(Z|Z256)rm(b?)",
"VBROADCASTSS(Z|Z256)rm(b?)",
"VINSERTF32x4(Z|Z256)rm(b?)",
diff --git a/llvm/lib/Target/X86/X86SchedSapphireRapids.td b/llvm/lib/Target/X86/X86SchedSapphireRapids.td
index 9818f4c01ea6..6e292da4e293 100644
--- a/llvm/lib/Target/X86/X86SchedSapphireRapids.td
+++ b/llvm/lib/Target/X86/X86SchedSapphireRapids.td
@@ -1601,9 +1601,9 @@ def SPRWriteResGroup126 : SchedWriteRes<[SPRPort02_03_11]> {
def : InstRW<[SPRWriteResGroup126], (instregex "^MMX_MOV(D|Q)64rm$",
"^VBROADCAST(F|I)128rm$",
"^VBROADCAST(F|I)32X(2|4)Z256rm$",
- "^VBROADCAST(F|I)32X(8|2Z)rm$",
- "^VBROADCAST(F|I)(32|64)X4rm$",
- "^VBROADCAST(F|I)64X2((Z128)?)rm$",
+ "^VBROADCAST(F|I)32X(8|2)Zrm$",
+ "^VBROADCAST(F|I)(32|64)X4Zrm$",
+ "^VBROADCAST(F|I)64X2(Z|Z256)rm$",
"^VBROADCASTS(DY|SZ)rm$",
"^VBROADCASTS(D|S)Z256rm$",
"^VBROADCASTS(DZ|SY)rm$",
@@ -1652,9 +1652,9 @@ def SPRWriteResGroup131 : SchedWriteRes<[SPRPort00_05, SPRPort02_03_11]> {
let Latency = 9;
let NumMicroOps = 2;
}
-def : InstRW<[SPRWriteResGroup131], (instregex "^VBROADCAST(F|I)32X(8|2Z)rmk(z?)$",
- "^VBROADCAST(F|I)(32|64)X4rmk(z?)$",
- "^VBROADCAST(F|I)64X2rmk(z?)$",
+def : InstRW<[SPRWriteResGroup131], (instregex "^VBROADCAST(F|I)32X(8|2)Zrmk(z?)$",
+ "^VBROADCAST(F|I)(32|64)X4Zrmk(z?)$",
+ "^VBROADCAST(F|I)64X2Zrmk(z?)$",
"^VBROADCASTS(D|S)Zrmk(z?)$",
"^VMOV(A|U)P(D|S)Zrmk(z?)$",
"^VMOV(D|SH|SL)DUPZrmk(z?)$",
@@ -2698,7 +2698,7 @@ def SPRWriteResGroup262 : SchedWriteRes<[SPRPort00_01_05, SPRPort02_03_11]> {
let NumMicroOps = 2;
}
def : InstRW<[SPRWriteResGroup262], (instregex "^VBROADCAST(F|I)32X(2|4)Z256rmk(z?)$",
- "^VBROADCAST(F|I)64X2Z128rmk(z?)$",
+ "^VBROADCAST(F|I)64X2Z256rmk(z?)$",
"^VBROADCASTS(D|S)Z256rmk(z?)$",
"^VMOV(A|U)P(D|S)Z256rmk(z?)$",
"^VMOV(D|SH|SL)DUPZ256rmk(z?)$",
diff --git a/llvm/lib/Target/X86/X86SchedSkylakeServer.td b/llvm/lib/Target/X86/X86SchedSkylakeServer.td
index 26e290a2250c..e733d9ac74dd 100644
--- a/llvm/lib/Target/X86/X86SchedSkylakeServer.td
+++ b/llvm/lib/Target/X86/X86SchedSkylakeServer.td
@@ -1547,19 +1547,19 @@ def: InstRW<[SKXWriteResGroup121, ReadAfterVecYLd],
"VBROADCASTF32X2Z256rm(b?)",
"VBROADCASTF32X2Zrm(b?)",
"VBROADCASTF32X4Z256rm(b?)",
- "VBROADCASTF32X4rm(b?)",
- "VBROADCASTF32X8rm(b?)",
- "VBROADCASTF64X2Z128rm(b?)",
- "VBROADCASTF64X2rm(b?)",
- "VBROADCASTF64X4rm(b?)",
+ "VBROADCASTF32X4Zrm(b?)",
+ "VBROADCASTF32X8Zrm(b?)",
+ "VBROADCASTF64X2Z256rm(b?)",
+ "VBROADCASTF64X2Zrm(b?)",
+ "VBROADCASTF64X4Zrm(b?)",
"VBROADCASTI32X2Z256rm(b?)",
"VBROADCASTI32X2Zrm(b?)",
"VBROADCASTI32X4Z256rm(b?)",
- "VBROADCASTI32X4rm(b?)",
- "VBROADCASTI32X8rm(b?)",
- "VBROADCASTI64X2Z128rm(b?)",
- "VBROADCASTI64X2rm(b?)",
- "VBROADCASTI64X4rm(b?)",
+ "VBROADCASTI32X4Zrm(b?)",
+ "VBROADCASTI32X8Zrm(b?)",
+ "VBROADCASTI64X2Z256rm(b?)",
+ "VBROADCASTI64X2Zrm(b?)",
+ "VBROADCASTI64X4Zrm(b?)",
"VBROADCASTSD(Z|Z256)rm(b?)",
"VBROADCASTSS(Z|Z256)rm(b?)",
"VINSERTF32x4(Z|Z256)rm(b?)",
diff --git a/llvm/lib/Target/Xtensa/AsmParser/XtensaAsmParser.cpp b/llvm/lib/Target/Xtensa/AsmParser/XtensaAsmParser.cpp
index b0ce624a495f..83b1cfca529b 100644
--- a/llvm/lib/Target/Xtensa/AsmParser/XtensaAsmParser.cpp
+++ b/llvm/lib/Target/Xtensa/AsmParser/XtensaAsmParser.cpp
@@ -45,9 +45,9 @@ class XtensaAsmParser : public MCTargetAsmParser {
ParseStatus parseDirective(AsmToken DirectiveID) override;
bool parseRegister(MCRegister &Reg, SMLoc &StartLoc, SMLoc &EndLoc) override;
- bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
+ bool parseInstruction(ParseInstructionInfo &Info, StringRef Name,
SMLoc NameLoc, OperandVector &Operands) override;
- bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
+ bool matchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
OperandVector &Operands, MCStreamer &Out,
uint64_t &ErrorInfo,
bool MatchingInlineAsm) override;
@@ -425,7 +425,7 @@ bool XtensaAsmParser::processInstruction(MCInst &Inst, SMLoc IDLoc,
return true;
}
-bool XtensaAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
+bool XtensaAsmParser::matchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
OperandVector &Operands,
MCStreamer &Out,
uint64_t &ErrorInfo,
@@ -730,7 +730,7 @@ bool XtensaAsmParser::ParseInstructionWithSR(ParseInstructionInfo &Info,
return false;
}
-bool XtensaAsmParser::ParseInstruction(ParseInstructionInfo &Info,
+bool XtensaAsmParser::parseInstruction(ParseInstructionInfo &Info,
StringRef Name, SMLoc NameLoc,
OperandVector &Operands) {
if (Name.starts_with("wsr") || Name.starts_with("rsr") ||
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
index 5c9faa9449f5..ea51d7790457 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
@@ -2323,6 +2323,11 @@ static Value *optimizeIntegerToVectorInsertions(BitCastInst &CI,
auto *DestVecTy = cast<FixedVectorType>(CI.getType());
Value *IntInput = CI.getOperand(0);
+ // if the int input is just an undef value do not try to optimize to vector
+ // insertions as it will prevent undef propagation
+ if (isa<UndefValue>(IntInput))
+ return nullptr;
+
SmallVector<Value*, 8> Elements(DestVecTy->getNumElements());
if (!collectInsertionElements(IntInput, 0, Elements,
DestVecTy->getElementType(),
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
index 5cdfeada7f0a..80d6ceca094d 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -2227,18 +2227,24 @@ Instruction *InstCombinerImpl::foldICmpMulConstant(ICmpInst &Cmp,
return NewC ? new ICmpInst(Pred, X, NewC) : nullptr;
}
-/// Fold icmp (shl 1, Y), C.
-static Instruction *foldICmpShlOne(ICmpInst &Cmp, Instruction *Shl,
- const APInt &C) {
+/// Fold icmp (shl nuw C2, Y), C.
+static Instruction *foldICmpShlLHSC(ICmpInst &Cmp, Instruction *Shl,
+ const APInt &C) {
Value *Y;
- if (!match(Shl, m_Shl(m_One(), m_Value(Y))))
+ const APInt *C2;
+ if (!match(Shl, m_NUWShl(m_APInt(C2), m_Value(Y))))
return nullptr;
Type *ShiftType = Shl->getType();
unsigned TypeBits = C.getBitWidth();
- bool CIsPowerOf2 = C.isPowerOf2();
ICmpInst::Predicate Pred = Cmp.getPredicate();
if (Cmp.isUnsigned()) {
+ if (C2->isZero() || C2->ugt(C))
+ return nullptr;
+ APInt Div, Rem;
+ APInt::udivrem(C, *C2, Div, Rem);
+ bool CIsPowerOf2 = Rem.isZero() && Div.isPowerOf2();
+
// (1 << Y) pred C -> Y pred Log2(C)
if (!CIsPowerOf2) {
// (1 << Y) < 30 -> Y <= 4
@@ -2251,9 +2257,9 @@ static Instruction *foldICmpShlOne(ICmpInst &Cmp, Instruction *Shl,
Pred = ICmpInst::ICMP_UGT;
}
- unsigned CLog2 = C.logBase2();
+ unsigned CLog2 = Div.logBase2();
return new ICmpInst(Pred, Y, ConstantInt::get(ShiftType, CLog2));
- } else if (Cmp.isSigned()) {
+ } else if (Cmp.isSigned() && C2->isOne()) {
Constant *BitWidthMinusOne = ConstantInt::get(ShiftType, TypeBits - 1);
// (1 << Y) > 0 -> Y != 31
// (1 << Y) > C -> Y != 31 if C is negative.
@@ -2307,7 +2313,7 @@ Instruction *InstCombinerImpl::foldICmpShlConstant(ICmpInst &Cmp,
const APInt *ShiftAmt;
if (!match(Shl->getOperand(1), m_APInt(ShiftAmt)))
- return foldICmpShlOne(Cmp, Shl, C);
+ return foldICmpShlLHSC(Cmp, Shl, C);
// Check that the shift amount is in range. If not, don't perform undefined
// shifts. When the shift is visited, it will be simplified.
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
index a051a568bfd6..da6f991ad4cd 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
+++ b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
@@ -634,6 +634,11 @@ public:
Instruction *foldPHIArgZextsIntoPHI(PHINode &PN);
Instruction *foldPHIArgIntToPtrToPHI(PHINode &PN);
+ /// If the phi is within a phi web, which is formed by the def-use chain
+ /// of phis and all the phis in the web are only used in the other phis.
+ /// In this case, these phis are dead and we will remove all of them.
+ bool foldDeadPhiWeb(PHINode &PN);
+
/// If an integer typed PHI has only one use which is an IntToPtr operation,
/// replace the PHI with an existing pointer typed PHI if it exists. Otherwise
/// insert a new pointer typed PHI and replace the original one.
diff --git a/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp b/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp
index bcff9a72b657..cb5c44730512 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp
@@ -53,6 +53,34 @@ void InstCombinerImpl::PHIArgMergedDebugLoc(Instruction *Inst, PHINode &PN) {
}
}
+/// If the phi is within a phi web, which is formed by the def-use chain
+/// of phis and all the phis in the web are only used in the other phis.
+/// In this case, these phis are dead and we will remove all of them.
+bool InstCombinerImpl::foldDeadPhiWeb(PHINode &PN) {
+ SmallVector<PHINode *, 16> Stack;
+ SmallPtrSet<PHINode *, 16> Visited;
+ Stack.push_back(&PN);
+ while (!Stack.empty()) {
+ PHINode *Phi = Stack.pop_back_val();
+ if (!Visited.insert(Phi).second)
+ continue;
+ // Early stop if the set of PHIs is large
+ if (Visited.size() == 16)
+ return false;
+ for (User *Use : Phi->users()) {
+ if (PHINode *PhiUse = dyn_cast<PHINode>(Use))
+ Stack.push_back(PhiUse);
+ else
+ return false;
+ }
+ }
+ for (PHINode *Phi : Visited)
+ replaceInstUsesWith(*Phi, PoisonValue::get(Phi->getType()));
+ for (PHINode *Phi : Visited)
+ eraseInstFromFunction(*Phi);
+ return true;
+}
+
// Replace Integer typed PHI PN if the PHI's value is used as a pointer value.
// If there is an existing pointer typed PHI that produces the same value as PN,
// replace PN and the IntToPtr operation with it. Otherwise, synthesize a new
@@ -976,26 +1004,6 @@ Instruction *InstCombinerImpl::foldPHIArgOpIntoPHI(PHINode &PN) {
return NewCI;
}
-/// Return true if this PHI node is only used by a PHI node cycle that is dead.
-static bool isDeadPHICycle(PHINode *PN,
- SmallPtrSetImpl<PHINode *> &PotentiallyDeadPHIs) {
- if (PN->use_empty()) return true;
- if (!PN->hasOneUse()) return false;
-
- // Remember this node, and if we find the cycle, return.
- if (!PotentiallyDeadPHIs.insert(PN).second)
- return true;
-
- // Don't scan crazily complex things.
- if (PotentiallyDeadPHIs.size() == 16)
- return false;
-
- if (PHINode *PU = dyn_cast<PHINode>(PN->user_back()))
- return isDeadPHICycle(PU, PotentiallyDeadPHIs);
-
- return false;
-}
-
/// Return true if this phi node is always equal to NonPhiInVal.
/// This happens with mutually cyclic phi nodes like:
/// z = some value; x = phi (y, z); y = phi (x, z)
@@ -1474,27 +1482,21 @@ Instruction *InstCombinerImpl::visitPHINode(PHINode &PN) {
}
}
- // If this is a trivial cycle in the PHI node graph, remove it. Basically, if
- // this PHI only has a single use (a PHI), and if that PHI only has one use (a
- // PHI)... break the cycle.
+ if (foldDeadPhiWeb(PN))
+ return nullptr;
+
+ // Optimization when the phi only has one use
if (PN.hasOneUse()) {
if (foldIntegerTypedPHI(PN))
return nullptr;
- Instruction *PHIUser = cast<Instruction>(PN.user_back());
- if (PHINode *PU = dyn_cast<PHINode>(PHIUser)) {
- SmallPtrSet<PHINode*, 16> PotentiallyDeadPHIs;
- PotentiallyDeadPHIs.insert(&PN);
- if (isDeadPHICycle(PU, PotentiallyDeadPHIs))
- return replaceInstUsesWith(PN, PoisonValue::get(PN.getType()));
- }
-
// If this phi has a single use, and if that use just computes a value for
// the next iteration of a loop, delete the phi. This occurs with unused
// induction variables, e.g. "for (int j = 0; ; ++j);". Detecting this
// common case here is good because the only other things that catch this
// are induction variable analysis (sometimes) and ADCE, which is only run
// late.
+ Instruction *PHIUser = cast<Instruction>(PN.user_back());
if (PHIUser->hasOneUse() &&
(isa<BinaryOperator>(PHIUser) || isa<UnaryOperator>(PHIUser) ||
isa<GetElementPtrInst>(PHIUser)) &&
diff --git a/llvm/lib/Transforms/Instrumentation/PGOCtxProfFlattening.cpp b/llvm/lib/Transforms/Instrumentation/PGOCtxProfFlattening.cpp
index fc78d8c60ec0..e76689e2f5f0 100644
--- a/llvm/lib/Transforms/Instrumentation/PGOCtxProfFlattening.cpp
+++ b/llvm/lib/Transforms/Instrumentation/PGOCtxProfFlattening.cpp
@@ -26,6 +26,7 @@
#include "llvm/IR/Analysis.h"
#include "llvm/IR/CFG.h"
#include "llvm/IR/Dominators.h"
+#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/PassManager.h"
@@ -34,6 +35,7 @@
#include "llvm/Transforms/Instrumentation/PGOInstrumentation.h"
#include "llvm/Transforms/Scalar/DCE.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include <deque>
using namespace llvm;
@@ -51,6 +53,11 @@ class ProfileAnnotator final {
class BBInfo {
std::optional<uint64_t> Count;
+ // OutEdges is dimensioned to match the number of terminator operands.
+ // Entries in the vector match the index in the terminator operand list. In
+ // some cases - see `shouldExcludeEdge` and its implementation - an entry
+ // will be nullptr.
+ // InEdges doesn't have the above constraint.
SmallVector<EdgeInfo *> OutEdges;
SmallVector<EdgeInfo *> InEdges;
size_t UnknownCountOutEdges = 0;
@@ -58,22 +65,30 @@ class ProfileAnnotator final {
// Pass AssumeAllKnown when we try to propagate counts from edges to BBs -
// because all the edge counters must be known.
- uint64_t getEdgeSum(const SmallVector<EdgeInfo *> &Edges,
- bool AssumeAllKnown) const {
- uint64_t Sum = 0;
- for (const auto *E : Edges)
- if (E)
- Sum += AssumeAllKnown ? *E->Count : E->Count.value_or(0U);
+ // Return std::nullopt if there were no edges to sum. The user can decide
+ // how to interpret that.
+ std::optional<uint64_t> getEdgeSum(const SmallVector<EdgeInfo *> &Edges,
+ bool AssumeAllKnown) const {
+ std::optional<uint64_t> Sum;
+ for (const auto *E : Edges) {
+ // `Edges` may be `OutEdges`, case in which `E` could be nullptr.
+ if (E) {
+ if (!Sum.has_value())
+ Sum = 0;
+ *Sum += (AssumeAllKnown ? *E->Count : E->Count.value_or(0U));
+ }
+ }
return Sum;
}
- void computeCountFrom(const SmallVector<EdgeInfo *> &Edges) {
+ bool computeCountFrom(const SmallVector<EdgeInfo *> &Edges) {
assert(!Count.has_value());
Count = getEdgeSum(Edges, true);
+ return Count.has_value();
}
void setSingleUnknownEdgeCount(SmallVector<EdgeInfo *> &Edges) {
- uint64_t KnownSum = getEdgeSum(Edges, false);
+ uint64_t KnownSum = getEdgeSum(Edges, false).value_or(0U);
uint64_t EdgeVal = *Count > KnownSum ? *Count - KnownSum : 0U;
EdgeInfo *E = nullptr;
for (auto *I : Edges)
@@ -110,17 +125,15 @@ class ProfileAnnotator final {
}
bool tryTakeCountFromKnownOutEdges(const BasicBlock &BB) {
- if (!succ_empty(&BB) && !UnknownCountOutEdges) {
- computeCountFrom(OutEdges);
- return true;
+ if (!UnknownCountOutEdges) {
+ return computeCountFrom(OutEdges);
}
return false;
}
bool tryTakeCountFromKnownInEdges(const BasicBlock &BB) {
- if (!BB.isEntryBlock() && !UnknownCountInEdges) {
- computeCountFrom(InEdges);
- return true;
+ if (!UnknownCountInEdges) {
+ return computeCountFrom(InEdges);
}
return false;
}
@@ -178,7 +191,7 @@ class ProfileAnnotator final {
bool KeepGoing = true;
while (KeepGoing) {
KeepGoing = false;
- for (const auto &BB : reverse(F)) {
+ for (const auto &BB : F) {
auto &Info = getBBInfo(BB);
if (!Info.hasCount())
KeepGoing |= Info.tryTakeCountFromKnownOutEdges(BB) ||
@@ -198,6 +211,52 @@ class ProfileAnnotator final {
BBInfo &getBBInfo(const BasicBlock &BB) { return BBInfos.find(&BB)->second; }
+ const BBInfo &getBBInfo(const BasicBlock &BB) const {
+ return BBInfos.find(&BB)->second;
+ }
+
+ // validation function after we propagate the counters: all BBs and edges'
+ // counters must have a value.
+ bool allCountersAreAssigned() const {
+ for (const auto &BBInfo : BBInfos)
+ if (!BBInfo.second.hasCount())
+ return false;
+ for (const auto &EdgeInfo : EdgeInfos)
+ if (!EdgeInfo.Count.has_value())
+ return false;
+ return true;
+ }
+
+ /// Check that all paths from the entry basic block that use edges with
+ /// non-zero counts arrive at a basic block with no successors (i.e. "exit")
+ bool allTakenPathsExit() const {
+ std::deque<const BasicBlock *> Worklist;
+ DenseSet<const BasicBlock *> Visited;
+ Worklist.push_back(&F.getEntryBlock());
+ Visited.insert(&F.getEntryBlock());
+ while (!Worklist.empty()) {
+ const auto *BB = Worklist.front();
+ Worklist.pop_front();
+ if (succ_size(BB) <= 1)
+ continue;
+ const auto &BBInfo = getBBInfo(*BB);
+ bool Inserted = false;
+ for (auto I = 0U; I < BB->getTerminator()->getNumSuccessors(); ++I) {
+ const auto *Succ = BB->getTerminator()->getSuccessor(I);
+ if (!shouldExcludeEdge(*BB, *Succ)) {
+ if (BBInfo.getEdgeCount(I) > 0)
+ if (Visited.insert(Succ).second) {
+ Worklist.push_back(Succ);
+ Inserted = true;
+ }
+ }
+ }
+ if (!Inserted)
+ return false;
+ }
+ return true;
+ }
+
public:
ProfileAnnotator(Function &F, const SmallVectorImpl<uint64_t> &Counters,
InstrProfSummaryBuilder &PB)
@@ -216,6 +275,9 @@ public:
"profile is managed by IPO transforms");
(void)Index;
Count = Counters[Ins->getIndex()->getZExtValue()];
+ } else if (isa<UnreachableInst>(BB.getTerminator())) {
+ // The program presumably didn't crash.
+ Count = 0;
}
auto [It, Ins] =
BBInfos.insert({&BB, {pred_size(&BB), succ_size(&BB), Count}});
@@ -268,14 +330,16 @@ public:
PB.addInternalCount(EdgeCount);
}
- if (MaxCount == 0)
- F.getContext().emitError(
- "[ctx-prof] Encountered a BB with more than one successor, where "
- "all outgoing edges have a 0 count. This occurs in non-exiting "
- "functions (message pumps, usually) which are not supported in the "
- "contextual profiling case");
- setProfMetadata(F.getParent(), Term, EdgeCounts, MaxCount);
+ if (MaxCount != 0)
+ setProfMetadata(F.getParent(), Term, EdgeCounts, MaxCount);
}
+ assert(allCountersAreAssigned() &&
+ "Expected all counters have been assigned.");
+ assert(allTakenPathsExit() &&
+ "[ctx-prof] Encountered a BB with more than one successor, where "
+ "all outgoing edges have a 0 count. This occurs in non-exiting "
+ "functions (message pumps, usually) which are not supported in the "
+ "contextual profiling case");
}
};
diff --git a/llvm/lib/Transforms/Utils/BuildLibCalls.cpp b/llvm/lib/Transforms/Utils/BuildLibCalls.cpp
index b0da19813f0a..f64488832875 100644
--- a/llvm/lib/Transforms/Utils/BuildLibCalls.cpp
+++ b/llvm/lib/Transforms/Utils/BuildLibCalls.cpp
@@ -1270,6 +1270,18 @@ bool llvm::inferNonMandatoryLibFuncAttrs(Function &F,
Changed |= setOnlyWritesMemory(F);
Changed |= setWillReturn(F);
break;
+ case LibFunc_sincos:
+ case LibFunc_sincosf:
+ case LibFunc_sincosl:
+ Changed |= setDoesNotThrow(F);
+ Changed |= setDoesNotFreeMemory(F);
+ Changed |= setOnlyWritesMemory(F);
+ Changed |= setOnlyWritesMemory(F, 1);
+ Changed |= setOnlyWritesMemory(F, 2);
+ Changed |= setDoesNotCapture(F, 1);
+ Changed |= setDoesNotCapture(F, 2);
+ Changed |= setWillReturn(F);
+ break;
default:
// FIXME: It'd be really nice to cover all the library functions we're
// aware of here.
diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
index c63618d9dd12..09461e65e2dc 100644
--- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -7885,6 +7885,13 @@ static bool passingValueIsAlwaysUndefined(Value *V, Instruction *I, bool PtrValu
case Instruction::Call:
case Instruction::CallBr:
case Instruction::Invoke:
+ case Instruction::UDiv:
+ case Instruction::URem:
+ // Note: signed div/rem of INT_MIN / -1 is also immediate UB, not
+ // implemented to avoid code complexity as it is unclear how useful such
+ // logic is.
+ case Instruction::SDiv:
+ case Instruction::SRem:
return true;
}
});
@@ -7986,6 +7993,9 @@ static bool passingValueIsAlwaysUndefined(Value *V, Instruction *I, bool PtrValu
}
}
}
+ // Div/Rem by zero is immediate UB
+ if (match(Use, m_BinOp(m_Value(), m_Specific(I))) && Use->isIntDivRem())
+ return true;
}
return false;
}
diff --git a/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp b/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp
index 917f81863cf6..6799d333fb28 100644
--- a/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp
+++ b/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp
@@ -2796,6 +2796,35 @@ Value *LibCallSimplifier::optimizeSqrt(CallInst *CI, IRBuilderBase &B) {
return copyFlags(*CI, FabsCall);
}
+Value *LibCallSimplifier::optimizeFMod(CallInst *CI, IRBuilderBase &B) {
+ SimplifyQuery SQ(DL, TLI, DT, AC, CI, true, true, DC);
+
+ // fmod(x,y) can set errno if y == 0 or x == +/-inf, and returns Nan in those
+ // case. If we know those do not happen, then we can convert the fmod into
+ // frem.
+ bool IsNoNan = CI->hasNoNaNs();
+ if (!IsNoNan) {
+ KnownFPClass Known0 = computeKnownFPClass(CI->getOperand(0), fcInf,
+ /*Depth=*/0, SQ);
+ if (Known0.isKnownNeverInfinity()) {
+ KnownFPClass Known1 =
+ computeKnownFPClass(CI->getOperand(1), fcZero | fcSubnormal,
+ /*Depth=*/0, SQ);
+ Function *F = CI->getParent()->getParent();
+ if (Known1.isKnownNeverLogicalZero(*F, CI->getType()))
+ IsNoNan = true;
+ }
+ }
+
+ if (IsNoNan) {
+ Value *FRem = B.CreateFRemFMF(CI->getOperand(0), CI->getOperand(1), CI);
+ if (auto *FRemI = dyn_cast<Instruction>(FRem))
+ FRemI->setHasNoNaNs(true);
+ return FRem;
+ }
+ return nullptr;
+}
+
Value *LibCallSimplifier::optimizeTrigInversionPairs(CallInst *CI,
IRBuilderBase &B) {
Module *M = CI->getModule();
@@ -3945,6 +3974,10 @@ Value *LibCallSimplifier::optimizeFloatingPointLibCall(CallInst *CI,
case LibFunc_sqrt:
case LibFunc_sqrtl:
return optimizeSqrt(CI, Builder);
+ case LibFunc_fmod:
+ case LibFunc_fmodf:
+ case LibFunc_fmodl:
+ return optimizeFMod(CI, Builder);
case LibFunc_logf:
case LibFunc_log:
case LibFunc_logl:
diff --git a/llvm/lib/Transforms/Vectorize/CMakeLists.txt b/llvm/lib/Transforms/Vectorize/CMakeLists.txt
index 59d04ac3cecd..f33906b05fed 100644
--- a/llvm/lib/Transforms/Vectorize/CMakeLists.txt
+++ b/llvm/lib/Transforms/Vectorize/CMakeLists.txt
@@ -5,6 +5,7 @@ add_llvm_component_library(LLVMVectorize
LoopVectorize.cpp
SandboxVectorizer/DependencyGraph.cpp
SandboxVectorizer/Passes/BottomUpVec.cpp
+ SandboxVectorizer/Region.cpp
SandboxVectorizer/SandboxVectorizer.cpp
SLPVectorizer.cpp
Vectorize.cpp
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 0fa7c2add1fa..9fb684427cfe 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -905,15 +905,6 @@ Value *getRuntimeVF(IRBuilderBase &B, Type *Ty, ElementCount VF) {
return B.CreateElementCount(Ty, VF);
}
-const SCEV *createTripCountSCEV(Type *IdxTy, PredicatedScalarEvolution &PSE,
- Loop *OrigLoop) {
- const SCEV *BackedgeTakenCount = PSE.getBackedgeTakenCount();
- assert(!isa<SCEVCouldNotCompute>(BackedgeTakenCount) && "Invalid loop count");
-
- ScalarEvolution &SE = *PSE.getSE();
- return SE.getTripCountFromExitCount(BackedgeTakenCount, IdxTy, OrigLoop);
-}
-
void reportVectorizationFailure(const StringRef DebugMsg,
const StringRef OREMsg, const StringRef ORETag,
OptimizationRemarkEmitter *ORE, Loop *TheLoop,
@@ -4750,7 +4741,10 @@ VectorizationFactor LoopVectorizationPlanner::selectEpilogueVectorizationFactor(
if (!MainLoopVF.isScalable() && !NextVF.Width.isScalable()) {
// TODO: extend to support scalable VFs.
if (!RemainingIterations) {
- const SCEV *TC = createTripCountSCEV(TCType, PSE, OrigLoop);
+ const SCEV *TC = vputils::getSCEVExprForVPValue(
+ getPlanFor(NextVF.Width).getTripCount(), SE);
+ assert(!isa<SCEVCouldNotCompute>(TC) &&
+ "Trip count SCEV must be computable");
RemainingIterations = SE.getURemExpr(
TC, SE.getConstant(TCType, MainLoopVF.getKnownMinValue() * IC));
}
@@ -8863,10 +8857,9 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) {
return !CM.requiresScalarEpilogue(VF.isVector());
},
Range);
- VPlanPtr Plan = VPlan::createInitialVPlan(
- createTripCountSCEV(Legal->getWidestInductionType(), PSE, OrigLoop),
- *PSE.getSE(), RequiresScalarEpilogueCheck, CM.foldTailByMasking(),
- OrigLoop);
+ VPlanPtr Plan = VPlan::createInitialVPlan(Legal->getWidestInductionType(),
+ PSE, RequiresScalarEpilogueCheck,
+ CM.foldTailByMasking(), OrigLoop);
// Don't use getDecisionAndClampRange here, because we don't know the UF
// so this function is better to be conservative, rather than to split
@@ -9081,9 +9074,8 @@ VPlanPtr LoopVectorizationPlanner::buildVPlan(VFRange &Range) {
assert(EnableVPlanNativePath && "VPlan-native path is not enabled.");
// Create new empty VPlan
- auto Plan = VPlan::createInitialVPlan(
- createTripCountSCEV(Legal->getWidestInductionType(), PSE, OrigLoop),
- *PSE.getSE(), true, false, OrigLoop);
+ auto Plan = VPlan::createInitialVPlan(Legal->getWidestInductionType(), PSE,
+ true, false, OrigLoop);
// Build hierarchical CFG
VPlanHCFGBuilder HCFGBuilder(OrigLoop, LI, *Plan);
diff --git a/llvm/lib/Transforms/Vectorize/SandboxVectorizer/Passes/BottomUpVec.cpp b/llvm/lib/Transforms/Vectorize/SandboxVectorizer/Passes/BottomUpVec.cpp
index c4870b70fd52..0c44d05f0474 100644
--- a/llvm/lib/Transforms/Vectorize/SandboxVectorizer/Passes/BottomUpVec.cpp
+++ b/llvm/lib/Transforms/Vectorize/SandboxVectorizer/Passes/BottomUpVec.cpp
@@ -7,7 +7,58 @@
//===----------------------------------------------------------------------===//
#include "llvm/Transforms/Vectorize/SandboxVectorizer/Passes/BottomUpVec.h"
+#include "llvm/ADT/SmallVector.h"
using namespace llvm::sandboxir;
-bool BottomUpVec::runOnFunction(Function &F) { return false; }
+namespace llvm::sandboxir {
+// TODO: This is a temporary function that returns some seeds.
+// Replace this with SeedCollector's function when it lands.
+static llvm::SmallVector<Value *, 4> collectSeeds(BasicBlock &BB) {
+ llvm::SmallVector<Value *, 4> Seeds;
+ for (auto &I : BB)
+ if (auto *SI = llvm::dyn_cast<StoreInst>(&I))
+ Seeds.push_back(SI);
+ return Seeds;
+}
+
+static SmallVector<Value *, 4> getOperand(ArrayRef<Value *> Bndl,
+ unsigned OpIdx) {
+ SmallVector<Value *, 4> Operands;
+ for (Value *BndlV : Bndl) {
+ auto *BndlI = cast<Instruction>(BndlV);
+ Operands.push_back(BndlI->getOperand(OpIdx));
+ }
+ return Operands;
+}
+
+} // namespace llvm::sandboxir
+
+void BottomUpVec::vectorizeRec(ArrayRef<Value *> Bndl) {
+ auto LegalityRes = Legality.canVectorize(Bndl);
+ switch (LegalityRes.getSubclassID()) {
+ case LegalityResultID::Widen: {
+ auto *I = cast<Instruction>(Bndl[0]);
+ for (auto OpIdx : seq<unsigned>(I->getNumOperands())) {
+ auto OperandBndl = getOperand(Bndl, OpIdx);
+ vectorizeRec(OperandBndl);
+ }
+ break;
+ }
+ }
+}
+
+void BottomUpVec::tryVectorize(ArrayRef<Value *> Bndl) { vectorizeRec(Bndl); }
+
+bool BottomUpVec::runOnFunction(Function &F) {
+ Change = false;
+ // TODO: Start from innermost BBs first
+ for (auto &BB : F) {
+ // TODO: Replace with proper SeedCollector function.
+ auto Seeds = collectSeeds(BB);
+ // TODO: Slice Seeds into smaller chunks.
+ if (Seeds.size() >= 2)
+ tryVectorize(Seeds);
+ }
+ return Change;
+}
diff --git a/llvm/lib/Transforms/Vectorize/SandboxVectorizer/Region.cpp b/llvm/lib/Transforms/Vectorize/SandboxVectorizer/Region.cpp
new file mode 100644
index 000000000000..34aa9f3786f3
--- /dev/null
+++ b/llvm/lib/Transforms/Vectorize/SandboxVectorizer/Region.cpp
@@ -0,0 +1,45 @@
+//===- Region.cpp ---------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Vectorize/SandboxVectorizer/Region.h"
+
+namespace llvm::sandboxir {
+
+Region::Region(Context &Ctx) : Ctx(Ctx) {
+ static unsigned StaticRegionID;
+ RegionID = StaticRegionID++;
+}
+
+Region::~Region() {}
+
+void Region::add(Instruction *I) { Insts.insert(I); }
+
+void Region::remove(Instruction *I) { Insts.remove(I); }
+
+#ifndef NDEBUG
+bool Region::operator==(const Region &Other) const {
+ if (Insts.size() != Other.Insts.size())
+ return false;
+ if (!std::is_permutation(Insts.begin(), Insts.end(), Other.Insts.begin()))
+ return false;
+ return true;
+}
+
+void Region::dump(raw_ostream &OS) const {
+ OS << "RegionID: " << getID() << "\n";
+ for (auto *I : Insts)
+ OS << *I << "\n";
+}
+
+void Region::dump() const {
+ dump(dbgs());
+ dbgs() << "\n";
+}
+#endif // NDEBUG
+
+} // namespace llvm::sandboxir
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp
index 41e281f3fa99..2169d78542cb 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp
@@ -869,14 +869,23 @@ static VPIRBasicBlock *createVPIRBasicBlockFor(BasicBlock *BB) {
return VPIRBB;
}
-VPlanPtr VPlan::createInitialVPlan(const SCEV *TripCount, ScalarEvolution &SE,
+VPlanPtr VPlan::createInitialVPlan(Type *InductionTy,
+ PredicatedScalarEvolution &PSE,
bool RequiresScalarEpilogueCheck,
bool TailFolded, Loop *TheLoop) {
VPIRBasicBlock *Entry = createVPIRBasicBlockFor(TheLoop->getLoopPreheader());
VPBasicBlock *VecPreheader = new VPBasicBlock("vector.ph");
auto Plan = std::make_unique<VPlan>(Entry, VecPreheader);
+
+ // Create SCEV and VPValue for the trip count.
+ const SCEV *BackedgeTakenCount = PSE.getBackedgeTakenCount();
+ assert(!isa<SCEVCouldNotCompute>(BackedgeTakenCount) && "Invalid loop count");
+ ScalarEvolution &SE = *PSE.getSE();
+ const SCEV *TripCount =
+ SE.getTripCountFromExitCount(BackedgeTakenCount, InductionTy, TheLoop);
Plan->TripCount =
vputils::getOrCreateVPValueForSCEVExpr(*Plan, TripCount, SE);
+
// Create VPRegionBlock, with empty header and latch blocks, to be filled
// during processing later.
VPBasicBlock *HeaderVPBB = new VPBasicBlock("vector.body");
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index eac4fe8ce8b0..9b9e710ddc88 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -83,9 +83,6 @@ Value *getRuntimeVF(IRBuilderBase &B, Type *Ty, ElementCount VF);
Value *createStepForVF(IRBuilderBase &B, Type *Ty, ElementCount VF,
int64_t Step);
-const SCEV *createTripCountSCEV(Type *IdxTy, PredicatedScalarEvolution &PSE,
- Loop *CurLoop = nullptr);
-
/// A helper function that returns the reciprocal of the block probability of
/// predicated blocks. If we return X, we are assuming the predicated block
/// will execute once for every X iterations of the loop header.
@@ -3477,8 +3474,10 @@ public:
/// middle VPBasicBlock. If a check is needed to guard executing the scalar
/// epilogue loop, it will be added to the middle block, together with
/// VPBasicBlocks for the scalar preheader and exit blocks.
- static VPlanPtr createInitialVPlan(const SCEV *TripCount,
- ScalarEvolution &PSE,
+ /// \p InductionTy is the type of the canonical induction and used for related
+ /// values, like the trip count expression.
+ static VPlanPtr createInitialVPlan(Type *InductionTy,
+ PredicatedScalarEvolution &PSE,
bool RequiresScalarEpilogueCheck,
bool TailFolded, Loop *TheLoop);
diff --git a/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp b/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp
index f091ee5a71b2..277df0637372 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp
@@ -11,6 +11,7 @@
#include "VPlanCFG.h"
#include "VPlanDominatorTree.h"
#include "llvm/ADT/TypeSwitch.h"
+#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/PatternMatch.h"
#include "llvm/Support/GenericDomTreeConstruction.h"
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index 9068ccf519c5..c077e2b4eac5 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -1353,9 +1353,9 @@ void VPWidenRecipe::print(raw_ostream &O, const Twine &Indent,
void VPWidenEVLRecipe::print(raw_ostream &O, const Twine &Indent,
VPSlotTracker &SlotTracker) const {
- O << Indent << "WIDEN-VP ";
+ O << Indent << "WIDEN ";
printAsOperand(O, SlotTracker);
- O << " = " << Instruction::getOpcodeName(getOpcode());
+ O << " = vp." << Instruction::getOpcodeName(getOpcode());
printFlags(O);
printOperands(O, SlotTracker);
}
@@ -2941,10 +2941,9 @@ void VPWidenPointerInductionRecipe::execute(VPTransformState &State) {
"scalar step must be the same across all parts");
Value *GEP = State.Builder.CreateGEP(
State.Builder.getInt8Ty(), NewPointerPhi,
- State.Builder.CreateMul(
- StartOffset,
- State.Builder.CreateVectorSplat(State.VF, ScalarStepValue),
- "vector.gep"));
+ State.Builder.CreateMul(StartOffset, State.Builder.CreateVectorSplat(
+ State.VF, ScalarStepValue)),
+ "vector.gep");
State.set(this, GEP, Part);
}
}
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index 1d8455001001..edcd7d26e60d 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -685,10 +685,11 @@ void VPlanTransforms::optimizeForVFAndUF(VPlan &Plan, ElementCount BestVF,
m_BranchOnCond(m_Not(m_ActiveLaneMask(m_VPValue(), m_VPValue())))))
return;
- Type *IdxTy =
- Plan.getCanonicalIV()->getStartValue()->getLiveInIRValue()->getType();
- const SCEV *TripCount = createTripCountSCEV(IdxTy, PSE);
ScalarEvolution &SE = *PSE.getSE();
+ const SCEV *TripCount =
+ vputils::getSCEVExprForVPValue(Plan.getTripCount(), SE);
+ assert(!isa<SCEVCouldNotCompute>(TripCount) &&
+ "Trip count SCEV must be computable");
ElementCount NumElements = BestVF.multiplyCoefficientBy(BestUF);
const SCEV *C = SE.getElementCount(TripCount->getType(), NumElements);
if (TripCount->isZero() ||
diff --git a/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp b/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp
index c18bea4f4c59..414f8866d24f 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp
@@ -8,6 +8,7 @@
#include "VPlanUtils.h"
#include "VPlanPatternMatch.h"
+#include "llvm/ADT/TypeSwitch.h"
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
using namespace llvm;
@@ -60,3 +61,14 @@ bool vputils::isHeaderMask(const VPValue *V, VPlan &Plan) {
return match(V, m_Binary<Instruction::ICmp>(m_VPValue(A), m_VPValue(B))) &&
IsWideCanonicalIV(A) && B == Plan.getOrCreateBackedgeTakenCount();
}
+
+const SCEV *vputils::getSCEVExprForVPValue(VPValue *V, ScalarEvolution &SE) {
+ if (V->isLiveIn())
+ return SE.getSCEV(V->getLiveInIRValue());
+
+ // TODO: Support constructing SCEVs for more recipes as needed.
+ return TypeSwitch<const VPRecipeBase *, const SCEV *>(V->getDefiningRecipe())
+ .Case<VPExpandSCEVRecipe>(
+ [](const VPExpandSCEVRecipe *R) { return R->getSCEV(); })
+ .Default([&SE](const VPRecipeBase *) { return SE.getCouldNotCompute(); });
+}
diff --git a/llvm/lib/Transforms/Vectorize/VPlanUtils.h b/llvm/lib/Transforms/Vectorize/VPlanUtils.h
index fc11208a4339..7b5d4300655f 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanUtils.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanUtils.h
@@ -11,6 +11,11 @@
#include "VPlan.h"
+namespace llvm {
+class ScalarEvolution;
+class SCEV;
+} // namespace llvm
+
namespace llvm::vputils {
/// Returns true if only the first lane of \p Def is used.
bool onlyFirstLaneUsed(const VPValue *Def);
@@ -26,6 +31,10 @@ bool onlyFirstPartUsed(const VPValue *Def);
VPValue *getOrCreateVPValueForSCEVExpr(VPlan &Plan, const SCEV *Expr,
ScalarEvolution &SE);
+/// Return the SCEV expression for \p V. Returns SCEVCouldNotCompute if no
+/// SCEV expression could be constructed.
+const SCEV *getSCEVExprForVPValue(VPValue *V, ScalarEvolution &SE);
+
/// Returns true if \p VPV is uniform after vectorization.
inline bool isUniformAfterVectorization(const VPValue *VPV) {
// A value defined outside the vector region must be uniform after
diff --git a/llvm/test/Analysis/CostModel/RISCV/fround.ll b/llvm/test/Analysis/CostModel/RISCV/fround.ll
index dc501b82417d..b4740f223eca 100644
--- a/llvm/test/Analysis/CostModel/RISCV/fround.ll
+++ b/llvm/test/Analysis/CostModel/RISCV/fround.ll
@@ -233,10 +233,10 @@ define void @trunc_fp16() {
;
; ZVFHMIN-LABEL: 'trunc_fp16'
; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %1 = call half @llvm.trunc.f16(half undef)
-; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %2 = call <2 x half> @llvm.trunc.v2f16(<2 x half> undef)
-; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 39 for instruction: %3 = call <4 x half> @llvm.trunc.v4f16(<4 x half> undef)
-; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 79 for instruction: %4 = call <8 x half> @llvm.trunc.v8f16(<8 x half> undef)
-; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 159 for instruction: %5 = call <16 x half> @llvm.trunc.v16f16(<16 x half> undef)
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %2 = call <2 x half> @llvm.trunc.v2f16(<2 x half> undef)
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %3 = call <4 x half> @llvm.trunc.v4f16(<4 x half> undef)
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %4 = call <8 x half> @llvm.trunc.v8f16(<8 x half> undef)
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %5 = call <16 x half> @llvm.trunc.v16f16(<16 x half> undef)
; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %6 = call <vscale x 1 x half> @llvm.trunc.nxv1f16(<vscale x 1 x half> undef)
; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %7 = call <vscale x 2 x half> @llvm.trunc.nxv2f16(<vscale x 2 x half> undef)
; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %8 = call <vscale x 4 x half> @llvm.trunc.nxv4f16(<vscale x 4 x half> undef)
@@ -1108,10 +1108,10 @@ define void @vp_roundtozero_f16() {
; ZVFH-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; ZVFHMIN-LABEL: 'vp_roundtozero_f16'
-; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %1 = call <2 x half> @llvm.vp.roundtozero.v2f16(<2 x half> undef, <2 x i1> undef, i32 undef)
-; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %2 = call <4 x half> @llvm.vp.roundtozero.v4f16(<4 x half> undef, <4 x i1> undef, i32 undef)
-; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 94 for instruction: %3 = call <8 x half> @llvm.vp.roundtozero.v8f16(<8 x half> undef, <8 x i1> undef, i32 undef)
-; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 190 for instruction: %4 = call <16 x half> @llvm.vp.roundtozero.v16f16(<16 x half> undef, <16 x i1> undef, i32 undef)
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = call <2 x half> @llvm.vp.roundtozero.v2f16(<2 x half> undef, <2 x i1> undef, i32 undef)
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %2 = call <4 x half> @llvm.vp.roundtozero.v4f16(<4 x half> undef, <4 x i1> undef, i32 undef)
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %3 = call <8 x half> @llvm.vp.roundtozero.v8f16(<8 x half> undef, <8 x i1> undef, i32 undef)
+; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %4 = call <16 x half> @llvm.vp.roundtozero.v16f16(<16 x half> undef, <16 x i1> undef, i32 undef)
; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %5 = call <vscale x 1 x half> @llvm.vp.roundtozero.nxv1f16(<vscale x 1 x half> undef, <vscale x 1 x i1> undef, i32 undef)
; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %6 = call <vscale x 2 x half> @llvm.vp.roundtozero.nxv2f16(<vscale x 2 x half> undef, <vscale x 2 x i1> undef, i32 undef)
; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %7 = call <vscale x 4 x half> @llvm.vp.roundtozero.nxv4f16(<vscale x 4 x half> undef, <vscale x 4 x i1> undef, i32 undef)
diff --git a/llvm/test/Analysis/CtxProfAnalysis/flatten-zero-path.ll b/llvm/test/Analysis/CtxProfAnalysis/flatten-zero-path.ll
new file mode 100644
index 000000000000..7eea1c36afc3
--- /dev/null
+++ b/llvm/test/Analysis/CtxProfAnalysis/flatten-zero-path.ll
@@ -0,0 +1,55 @@
+; Check that flattened profile lowering handles cold subgraphs that end in "unreachable"
+; RUN: split-file %s %t
+; RUN: llvm-ctxprof-util fromJSON --input=%t/profile.json --output=%t/profile.ctxprofdata
+; RUN: opt -passes=ctx-prof-flatten %t/example.ll -use-ctx-profile=%t/profile.ctxprofdata -S -o - | FileCheck %s
+
+; CHECK-LABEL: entry:
+; CHECK: br i1 %t, label %yes, label %no, !prof ![[C1:[0-9]+]]
+; CHECK-LABEL: no:
+; CHECK-NOT: !prof
+; CHECK-LABEL: no1:
+; CHECK-NOT: !prof
+; CHECK-LABEL: no2:
+; CHECK-NOT: !prof
+; CHECK-LABEL: yes:
+; CHECK: br i1 %t3, label %yes1, label %yes2, !prof ![[C1]]
+; CHECK-NOT: !prof
+; CHECK: ![[C1]] = !{!"branch_weights", i32 6, i32 0}
+
+;--- example.ll
+define void @f1(i32 %cond) !guid !0 {
+entry:
+ call void @llvm.instrprof.increment(ptr @f1, i64 42, i32 42, i32 0)
+ %t = icmp eq i32 %cond, 1
+ br i1 %t, label %yes, label %no
+
+no:
+ %t2 = icmp eq i32 %cond, 2
+ br i1 %t2, label %no1, label %no2
+no1:
+ unreachable
+no2:
+ call void @llvm.instrprof.increment(ptr @f1, i64 42, i32 42, i32 1)
+ unreachable
+yes:
+ %t3 = icmp eq i32 %cond, 3
+ br i1 %t3, label %yes1, label %yes2
+yes1:
+ br label %exit
+yes2:
+ call void @llvm.instrprof.increment(ptr @f1, i64 42, i32 42, i32 2)
+ %t4 = icmp eq i32 %cond, 4
+ br i1 %t4, label %yes3, label %yes4
+yes3:
+ br label %exit
+yes4:
+ call void @llvm.instrprof.increment(ptr @f1, i64 42, i32 42, i32 3)
+ unreachable
+exit:
+ ret void
+}
+
+!0 = !{i64 1234}
+
+;--- profile.json
+[{"Guid":1234, "Counters":[6,0,0,0]}]
diff --git a/llvm/test/CodeGen/AMDGPU/insert_vector_elt.v2bf16.ll b/llvm/test/CodeGen/AMDGPU/insert_vector_elt.v2bf16.ll
index 3135addec161..c68138acc9b2 100644
--- a/llvm/test/CodeGen/AMDGPU/insert_vector_elt.v2bf16.ll
+++ b/llvm/test/CodeGen/AMDGPU/insert_vector_elt.v2bf16.ll
@@ -965,11 +965,7 @@ define amdgpu_kernel void @v_insertelement_v8bf16_3(ptr addrspace(1) %out, ptr a
; GFX900-NEXT: v_mov_b32_e32 v5, 0x5040100
; GFX900-NEXT: s_waitcnt lgkmcnt(0)
; GFX900-NEXT: global_load_dwordx4 v[0:3], v4, s[2:3]
-; GFX900-NEXT: s_mov_b32 s2, 0xffff
; GFX900-NEXT: s_waitcnt vmcnt(0)
-; GFX900-NEXT: v_bfi_b32 v3, s2, v3, v3
-; GFX900-NEXT: v_bfi_b32 v2, s2, v2, v2
-; GFX900-NEXT: v_bfi_b32 v0, s2, v0, v0
; GFX900-NEXT: v_perm_b32 v1, s4, v1, v5
; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
; GFX900-NEXT: s_endpgm
@@ -980,14 +976,10 @@ define amdgpu_kernel void @v_insertelement_v8bf16_3(ptr addrspace(1) %out, ptr a
; GFX940-NEXT: s_load_dword s0, s[2:3], 0x10
; GFX940-NEXT: v_and_b32_e32 v0, 0x3ff, v0
; GFX940-NEXT: v_lshlrev_b32_e32 v4, 4, v0
-; GFX940-NEXT: s_mov_b32 s1, 0xffff
+; GFX940-NEXT: v_mov_b32_e32 v5, 0x5040100
; GFX940-NEXT: s_waitcnt lgkmcnt(0)
; GFX940-NEXT: global_load_dwordx4 v[0:3], v4, s[6:7]
-; GFX940-NEXT: v_mov_b32_e32 v5, 0x5040100
; GFX940-NEXT: s_waitcnt vmcnt(0)
-; GFX940-NEXT: v_bfi_b32 v3, s1, v3, v3
-; GFX940-NEXT: v_bfi_b32 v2, s1, v2, v2
-; GFX940-NEXT: v_bfi_b32 v0, s1, v0, v0
; GFX940-NEXT: v_perm_b32 v1, s0, v1, v5
; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[4:5] sc0 sc1
; GFX940-NEXT: s_endpgm
diff --git a/llvm/test/CodeGen/AMDGPU/phi-vgpr-input-moveimm.mir b/llvm/test/CodeGen/AMDGPU/phi-vgpr-input-moveimm.mir
index dab4c9d40140..d21dbd290acc 100644
--- a/llvm/test/CodeGen/AMDGPU/phi-vgpr-input-moveimm.mir
+++ b/llvm/test/CodeGen/AMDGPU/phi-vgpr-input-moveimm.mir
@@ -73,13 +73,13 @@ body: |
; GCN-NEXT: successors: %bb.2(0x80000000)
; GCN-NEXT: {{ $}}
; GCN-NEXT: [[S_ADD_U:%[0-9]+]]:sreg_64 = S_ADD_U64_PSEUDO [[COPY]], [[COPY1]], implicit-def $scc
- ; GCN-NEXT: [[COPY2:%[0-9]+]]:vreg_64 = COPY [[S_ADD_U]].sub0, implicit $exec
+ ; GCN-NEXT: [[COPY2:%[0-9]+]]:vreg_64 = COPY [[S_ADD_U]], implicit $exec
; GCN-NEXT: S_BRANCH %bb.2
; GCN-NEXT: {{ $}}
; GCN-NEXT: bb.2:
; GCN-NEXT: successors: %bb.3(0x80000000)
; GCN-NEXT: {{ $}}
- ; GCN-NEXT: [[PHI:%[0-9]+]]:vgpr_32 = PHI [[V_MOV_B64_e32_]].sub0, %bb.3, [[COPY2]], %bb.1
+ ; GCN-NEXT: [[PHI:%[0-9]+]]:vgpr_32 = PHI [[V_MOV_B64_e32_]].sub0, %bb.3, [[COPY2]].sub0, %bb.1
; GCN-NEXT: S_BRANCH %bb.3
; GCN-NEXT: {{ $}}
; GCN-NEXT: bb.3:
diff --git a/llvm/test/CodeGen/DirectX/countbits.ll b/llvm/test/CodeGen/DirectX/countbits.ll
new file mode 100644
index 000000000000..c6bc2b679094
--- /dev/null
+++ b/llvm/test/CodeGen/DirectX/countbits.ll
@@ -0,0 +1,47 @@
+; RUN: opt -S -scalarizer -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s
+
+; Make sure dxil operation function calls for countbits are generated for all integer types.
+
+define noundef i16 @test_countbits_short(i16 noundef %a) {
+entry:
+; CHECK: call i16 @dx.op.unary.i16(i32 31, i16 %{{.*}})
+ %elt.ctpop = call i16 @llvm.ctpop.i16(i16 %a)
+ ret i16 %elt.ctpop
+}
+
+define noundef i32 @test_countbits_int(i32 noundef %a) {
+entry:
+; CHECK: call i32 @dx.op.unary.i32(i32 31, i32 %{{.*}})
+ %elt.ctpop = call i32 @llvm.ctpop.i32(i32 %a)
+ ret i32 %elt.ctpop
+}
+
+define noundef i64 @test_countbits_long(i64 noundef %a) {
+entry:
+; CHECK: call i64 @dx.op.unary.i64(i32 31, i64 %{{.*}})
+ %elt.ctpop = call i64 @llvm.ctpop.i64(i64 %a)
+ ret i64 %elt.ctpop
+}
+
+define noundef <4 x i32> @countbits_vec4_i32(<4 x i32> noundef %a) {
+entry:
+ ; CHECK: [[ee0:%.*]] = extractelement <4 x i32> %a, i64 0
+ ; CHECK: [[ie0:%.*]] = call i32 @dx.op.unary.i32(i32 31, i32 [[ee0]])
+ ; CHECK: [[ee1:%.*]] = extractelement <4 x i32> %a, i64 1
+ ; CHECK: [[ie1:%.*]] = call i32 @dx.op.unary.i32(i32 31, i32 [[ee1]])
+ ; CHECK: [[ee2:%.*]] = extractelement <4 x i32> %a, i64 2
+ ; CHECK: [[ie2:%.*]] = call i32 @dx.op.unary.i32(i32 31, i32 [[ee2]])
+ ; CHECK: [[ee3:%.*]] = extractelement <4 x i32> %a, i64 3
+ ; CHECK: [[ie3:%.*]] = call i32 @dx.op.unary.i32(i32 31, i32 [[ee3]])
+ ; CHECK: insertelement <4 x i32> poison, i32 [[ie0]], i64 0
+ ; CHECK: insertelement <4 x i32> %{{.*}}, i32 [[ie1]], i64 1
+ ; CHECK: insertelement <4 x i32> %{{.*}}, i32 [[ie2]], i64 2
+ ; CHECK: insertelement <4 x i32> %{{.*}}, i32 [[ie3]], i64 3
+ %2 = call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %a)
+ ret <4 x i32> %2
+}
+
+declare i16 @llvm.ctpop.i16(i16)
+declare i32 @llvm.ctpop.i32(i32)
+declare i64 @llvm.ctpop.i64(i64)
+declare <4 x i32> @llvm.ctpop.v4i32(<4 x i32>)
diff --git a/llvm/test/CodeGen/NVPTX/vector-loads.ll b/llvm/test/CodeGen/NVPTX/vector-loads.ll
index 9322b9e0fe6c..f582ebc166dd 100644
--- a/llvm/test/CodeGen/NVPTX/vector-loads.ll
+++ b/llvm/test/CodeGen/NVPTX/vector-loads.ll
@@ -198,3 +198,12 @@ define void @extv8f16_generic_a4(ptr noalias readonly align 16 %dst, ptr noalias
!1 = !{i32 0, i32 64}
+
+; CHECK-LABEL: bf16_v4_align_load_store
+define dso_local void @bf16_v4_align_load_store(ptr noundef %0, ptr noundef %1) #0 {
+ ; CHECK: ld.v4.b16
+ ; CHECK: st.v4.b16
+ %3 = load <4 x bfloat>, ptr %1, align 8
+ store <4 x bfloat> %3, ptr %0, align 8
+ ret void
+}
diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/double-intrinsics.ll b/llvm/test/CodeGen/RISCV/GlobalISel/double-intrinsics.ll
new file mode 100644
index 000000000000..ad461f8f24b9
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/GlobalISel/double-intrinsics.ll
@@ -0,0 +1,264 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -global-isel -mattr=+d \
+; RUN: -verify-machineinstrs -target-abi=ilp32d \
+; RUN: | FileCheck -check-prefixes=CHECKIFD,RV32IFD %s
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -global-isel -mattr=+d \
+; RUN: -verify-machineinstrs -target-abi=lp64d \
+; RUN: | FileCheck -check-prefixes=CHECKIFD,RV64IFD %s
+
+declare double @llvm.sqrt.f64(double)
+
+define double @sqrt_f64(double %a) nounwind {
+; CHECKIFD-LABEL: sqrt_f64:
+; CHECKIFD: # %bb.0:
+; CHECKIFD-NEXT: fsqrt.d fa0, fa0
+; CHECKIFD-NEXT: ret
+ %1 = call double @llvm.sqrt.f64(double %a)
+ ret double %1
+}
+
+declare double @llvm.fma.f64(double, double, double)
+
+define double @fma_f64(double %a, double %b, double %c) nounwind {
+; CHECKIFD-LABEL: fma_f64:
+; CHECKIFD: # %bb.0:
+; CHECKIFD-NEXT: fmadd.d fa0, fa0, fa1, fa2
+; CHECKIFD-NEXT: ret
+ %1 = call double @llvm.fma.f64(double %a, double %b, double %c)
+ ret double %1
+}
+
+declare double @llvm.fmuladd.f64(double, double, double)
+
+define double @fmuladd_f64(double %a, double %b, double %c) nounwind {
+; CHECKIFD-LABEL: fmuladd_f64:
+; CHECKIFD: # %bb.0:
+; CHECKIFD-NEXT: fmadd.d fa0, fa0, fa1, fa2
+; CHECKIFD-NEXT: ret
+ %1 = call double @llvm.fmuladd.f64(double %a, double %b, double %c)
+ ret double %1
+}
+
+declare double @llvm.fabs.f64(double)
+
+define double @fabs_f64(double %a) nounwind {
+; CHECKIFD-LABEL: fabs_f64:
+; CHECKIFD: # %bb.0:
+; CHECKIFD-NEXT: fabs.d fa0, fa0
+; CHECKIFD-NEXT: ret
+ %1 = call double @llvm.fabs.f64(double %a)
+ ret double %1
+}
+
+declare double @llvm.minnum.f64(double, double)
+
+define double @minnum_f64(double %a, double %b) nounwind {
+; CHECKIFD-LABEL: minnum_f64:
+; CHECKIFD: # %bb.0:
+; CHECKIFD-NEXT: fmin.d fa0, fa0, fa1
+; CHECKIFD-NEXT: ret
+ %1 = call double @llvm.minnum.f64(double %a, double %b)
+ ret double %1
+}
+
+declare double @llvm.maxnum.f64(double, double)
+
+define double @maxnum_f64(double %a, double %b) nounwind {
+; CHECKIFD-LABEL: maxnum_f64:
+; CHECKIFD: # %bb.0:
+; CHECKIFD-NEXT: fmax.d fa0, fa0, fa1
+; CHECKIFD-NEXT: ret
+ %1 = call double @llvm.maxnum.f64(double %a, double %b)
+ ret double %1
+}
+
+declare double @llvm.copysign.f64(double, double)
+
+define double @copysign_f64(double %a, double %b) nounwind {
+; CHECKIFD-LABEL: copysign_f64:
+; CHECKIFD: # %bb.0:
+; CHECKIFD-NEXT: fsgnj.d fa0, fa0, fa1
+; CHECKIFD-NEXT: ret
+ %1 = call double @llvm.copysign.f64(double %a, double %b)
+ ret double %1
+}
+
+declare double @llvm.floor.f64(double)
+
+define double @floor_f64(double %a) nounwind {
+; RV32IFD-LABEL: floor_f64:
+; RV32IFD: # %bb.0:
+; RV32IFD-NEXT: addi sp, sp, -16
+; RV32IFD-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32IFD-NEXT: call floor
+; RV32IFD-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32IFD-NEXT: addi sp, sp, 16
+; RV32IFD-NEXT: ret
+;
+; RV64IFD-LABEL: floor_f64:
+; RV64IFD: # %bb.0:
+; RV64IFD-NEXT: addi sp, sp, -16
+; RV64IFD-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64IFD-NEXT: call floor
+; RV64IFD-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64IFD-NEXT: addi sp, sp, 16
+; RV64IFD-NEXT: ret
+ %1 = call double @llvm.floor.f64(double %a)
+ ret double %1
+}
+
+declare double @llvm.ceil.f64(double)
+
+define double @ceil_f64(double %a) nounwind {
+; RV32IFD-LABEL: ceil_f64:
+; RV32IFD: # %bb.0:
+; RV32IFD-NEXT: addi sp, sp, -16
+; RV32IFD-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32IFD-NEXT: call ceil
+; RV32IFD-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32IFD-NEXT: addi sp, sp, 16
+; RV32IFD-NEXT: ret
+;
+; RV64IFD-LABEL: ceil_f64:
+; RV64IFD: # %bb.0:
+; RV64IFD-NEXT: addi sp, sp, -16
+; RV64IFD-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64IFD-NEXT: call ceil
+; RV64IFD-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64IFD-NEXT: addi sp, sp, 16
+; RV64IFD-NEXT: ret
+ %1 = call double @llvm.ceil.f64(double %a)
+ ret double %1
+}
+
+declare double @llvm.trunc.f64(double)
+
+define double @trunc_f64(double %a) nounwind {
+; RV32IFD-LABEL: trunc_f64:
+; RV32IFD: # %bb.0:
+; RV32IFD-NEXT: addi sp, sp, -16
+; RV32IFD-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32IFD-NEXT: call trunc
+; RV32IFD-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32IFD-NEXT: addi sp, sp, 16
+; RV32IFD-NEXT: ret
+;
+; RV64IFD-LABEL: trunc_f64:
+; RV64IFD: # %bb.0:
+; RV64IFD-NEXT: addi sp, sp, -16
+; RV64IFD-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64IFD-NEXT: call trunc
+; RV64IFD-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64IFD-NEXT: addi sp, sp, 16
+; RV64IFD-NEXT: ret
+ %1 = call double @llvm.trunc.f64(double %a)
+ ret double %1
+}
+
+declare double @llvm.rint.f64(double)
+
+define double @rint_f64(double %a) nounwind {
+; RV32IFD-LABEL: rint_f64:
+; RV32IFD: # %bb.0:
+; RV32IFD-NEXT: addi sp, sp, -16
+; RV32IFD-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32IFD-NEXT: call rint
+; RV32IFD-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32IFD-NEXT: addi sp, sp, 16
+; RV32IFD-NEXT: ret
+;
+; RV64IFD-LABEL: rint_f64:
+; RV64IFD: # %bb.0:
+; RV64IFD-NEXT: addi sp, sp, -16
+; RV64IFD-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64IFD-NEXT: call rint
+; RV64IFD-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64IFD-NEXT: addi sp, sp, 16
+; RV64IFD-NEXT: ret
+ %1 = call double @llvm.rint.f64(double %a)
+ ret double %1
+}
+
+declare double @llvm.nearbyint.f64(double)
+
+define double @nearbyint_f64(double %a) nounwind {
+; RV32IFD-LABEL: nearbyint_f64:
+; RV32IFD: # %bb.0:
+; RV32IFD-NEXT: addi sp, sp, -16
+; RV32IFD-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32IFD-NEXT: call nearbyint
+; RV32IFD-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32IFD-NEXT: addi sp, sp, 16
+; RV32IFD-NEXT: ret
+;
+; RV64IFD-LABEL: nearbyint_f64:
+; RV64IFD: # %bb.0:
+; RV64IFD-NEXT: addi sp, sp, -16
+; RV64IFD-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64IFD-NEXT: call nearbyint
+; RV64IFD-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64IFD-NEXT: addi sp, sp, 16
+; RV64IFD-NEXT: ret
+ %1 = call double @llvm.nearbyint.f64(double %a)
+ ret double %1
+}
+
+declare double @llvm.round.f64(double)
+
+define double @round_f64(double %a) nounwind {
+; RV32IFD-LABEL: round_f64:
+; RV32IFD: # %bb.0:
+; RV32IFD-NEXT: addi sp, sp, -16
+; RV32IFD-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32IFD-NEXT: call round
+; RV32IFD-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32IFD-NEXT: addi sp, sp, 16
+; RV32IFD-NEXT: ret
+;
+; RV64IFD-LABEL: round_f64:
+; RV64IFD: # %bb.0:
+; RV64IFD-NEXT: addi sp, sp, -16
+; RV64IFD-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64IFD-NEXT: call round
+; RV64IFD-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64IFD-NEXT: addi sp, sp, 16
+; RV64IFD-NEXT: ret
+ %1 = call double @llvm.round.f64(double %a)
+ ret double %1
+}
+
+declare double @llvm.roundeven.f64(double)
+
+define double @roundeven_f64(double %a) nounwind {
+; RV32IFD-LABEL: roundeven_f64:
+; RV32IFD: # %bb.0:
+; RV32IFD-NEXT: addi sp, sp, -16
+; RV32IFD-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32IFD-NEXT: call roundeven
+; RV32IFD-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32IFD-NEXT: addi sp, sp, 16
+; RV32IFD-NEXT: ret
+;
+; RV64IFD-LABEL: roundeven_f64:
+; RV64IFD: # %bb.0:
+; RV64IFD-NEXT: addi sp, sp, -16
+; RV64IFD-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64IFD-NEXT: call roundeven
+; RV64IFD-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64IFD-NEXT: addi sp, sp, 16
+; RV64IFD-NEXT: ret
+ %1 = call double @llvm.roundeven.f64(double %a)
+ ret double %1
+}
+
+declare i1 @llvm.is.fpclass.f64(double, i32)
+define i1 @isnan_d_fpclass(double %x) {
+; CHECKIFD-LABEL: isnan_d_fpclass:
+; CHECKIFD: # %bb.0:
+; CHECKIFD-NEXT: fclass.d a0, fa0
+; CHECKIFD-NEXT: andi a0, a0, 768
+; CHECKIFD-NEXT: snez a0, a0
+; CHECKIFD-NEXT: ret
+ %1 = call i1 @llvm.is.fpclass.f64(double %x, i32 3) ; nan
+ ret i1 %1
+}
diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/float-intrinsics.ll b/llvm/test/CodeGen/RISCV/GlobalISel/float-intrinsics.ll
new file mode 100644
index 000000000000..39a5beb317ab
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/GlobalISel/float-intrinsics.ll
@@ -0,0 +1,441 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -global-isel -mattr=+f \
+; RUN: -verify-machineinstrs -target-abi=ilp32f \
+; RUN: | FileCheck -check-prefix=RV32IF %s
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -global-isel -mattr=+d \
+; RUN: -verify-machineinstrs -target-abi=ilp32f \
+; RUN: | FileCheck -check-prefix=RV32IF %s
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -global-isel -mattr=+f \
+; RUN: -verify-machineinstrs -target-abi=lp64f \
+; RUN: | FileCheck -check-prefix=RV64IF %s
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -global-isel -mattr=+d \
+; RUN: -verify-machineinstrs -target-abi=lp64d \
+; RUN: | FileCheck -check-prefix=RV64IF %s
+
+define float @sqrt_f32(float %a) nounwind {
+; RV32IF-LABEL: sqrt_f32:
+; RV32IF: # %bb.0:
+; RV32IF-NEXT: fsqrt.s fa0, fa0
+; RV32IF-NEXT: ret
+;
+; RV64IF-LABEL: sqrt_f32:
+; RV64IF: # %bb.0:
+; RV64IF-NEXT: fsqrt.s fa0, fa0
+; RV64IF-NEXT: ret
+ %1 = call float @llvm.sqrt.f32(float %a)
+ ret float %1
+}
+
+define float @fma_f32(float %a, float %b, float %c) nounwind {
+; RV32IF-LABEL: fma_f32:
+; RV32IF: # %bb.0:
+; RV32IF-NEXT: fmadd.s fa0, fa0, fa1, fa2
+; RV32IF-NEXT: ret
+;
+; RV64IF-LABEL: fma_f32:
+; RV64IF: # %bb.0:
+; RV64IF-NEXT: fmadd.s fa0, fa0, fa1, fa2
+; RV64IF-NEXT: ret
+ %1 = call float @llvm.fma.f32(float %a, float %b, float %c)
+ ret float %1
+}
+
+define float @fmuladd_f32(float %a, float %b, float %c) nounwind {
+; RV32IF-LABEL: fmuladd_f32:
+; RV32IF: # %bb.0:
+; RV32IF-NEXT: fmadd.s fa0, fa0, fa1, fa2
+; RV32IF-NEXT: ret
+;
+; RV64IF-LABEL: fmuladd_f32:
+; RV64IF: # %bb.0:
+; RV64IF-NEXT: fmadd.s fa0, fa0, fa1, fa2
+; RV64IF-NEXT: ret
+ %1 = call float @llvm.fmuladd.f32(float %a, float %b, float %c)
+ ret float %1
+}
+
+define float @fabs_f32(float %a) nounwind {
+; RV32IF-LABEL: fabs_f32:
+; RV32IF: # %bb.0:
+; RV32IF-NEXT: fabs.s fa0, fa0
+; RV32IF-NEXT: ret
+;
+; RV64IF-LABEL: fabs_f32:
+; RV64IF: # %bb.0:
+; RV64IF-NEXT: fabs.s fa0, fa0
+; RV64IF-NEXT: ret
+ %1 = call float @llvm.fabs.f32(float %a)
+ ret float %1
+}
+
+define float @minnum_f32(float %a, float %b) nounwind {
+; RV32IF-LABEL: minnum_f32:
+; RV32IF: # %bb.0:
+; RV32IF-NEXT: fmin.s fa0, fa0, fa1
+; RV32IF-NEXT: ret
+;
+; RV64IF-LABEL: minnum_f32:
+; RV64IF: # %bb.0:
+; RV64IF-NEXT: fmin.s fa0, fa0, fa1
+; RV64IF-NEXT: ret
+ %1 = call float @llvm.minnum.f32(float %a, float %b)
+ ret float %1
+}
+
+define float @maxnum_f32(float %a, float %b) nounwind {
+; RV32IF-LABEL: maxnum_f32:
+; RV32IF: # %bb.0:
+; RV32IF-NEXT: fmax.s fa0, fa0, fa1
+; RV32IF-NEXT: ret
+;
+; RV64IF-LABEL: maxnum_f32:
+; RV64IF: # %bb.0:
+; RV64IF-NEXT: fmax.s fa0, fa0, fa1
+; RV64IF-NEXT: ret
+ %1 = call float @llvm.maxnum.f32(float %a, float %b)
+ ret float %1
+}
+
+define float @copysign_f32(float %a, float %b) nounwind {
+; RV32IF-LABEL: copysign_f32:
+; RV32IF: # %bb.0:
+; RV32IF-NEXT: fsgnj.s fa0, fa0, fa1
+; RV32IF-NEXT: ret
+;
+; RV64IF-LABEL: copysign_f32:
+; RV64IF: # %bb.0:
+; RV64IF-NEXT: fsgnj.s fa0, fa0, fa1
+; RV64IF-NEXT: ret
+ %1 = call float @llvm.copysign.f32(float %a, float %b)
+ ret float %1
+}
+
+define float @ceil_f32(float %a) nounwind {
+; RV32IF-LABEL: ceil_f32:
+; RV32IF: # %bb.0:
+; RV32IF-NEXT: addi sp, sp, -16
+; RV32IF-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32IF-NEXT: call ceilf
+; RV32IF-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32IF-NEXT: addi sp, sp, 16
+; RV32IF-NEXT: ret
+;
+; RV64IF-LABEL: ceil_f32:
+; RV64IF: # %bb.0:
+; RV64IF-NEXT: addi sp, sp, -16
+; RV64IF-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64IF-NEXT: call ceilf
+; RV64IF-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64IF-NEXT: addi sp, sp, 16
+; RV64IF-NEXT: ret
+ %1 = call float @llvm.ceil.f32(float %a)
+ ret float %1
+}
+
+define float @trunc_f32(float %a) nounwind {
+; RV32IF-LABEL: trunc_f32:
+; RV32IF: # %bb.0:
+; RV32IF-NEXT: addi sp, sp, -16
+; RV32IF-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32IF-NEXT: call truncf
+; RV32IF-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32IF-NEXT: addi sp, sp, 16
+; RV32IF-NEXT: ret
+;
+; RV64IF-LABEL: trunc_f32:
+; RV64IF: # %bb.0:
+; RV64IF-NEXT: addi sp, sp, -16
+; RV64IF-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64IF-NEXT: call truncf
+; RV64IF-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64IF-NEXT: addi sp, sp, 16
+; RV64IF-NEXT: ret
+ %1 = call float @llvm.trunc.f32(float %a)
+ ret float %1
+}
+
+define float @rint_f32(float %a) nounwind {
+; RV32IF-LABEL: rint_f32:
+; RV32IF: # %bb.0:
+; RV32IF-NEXT: addi sp, sp, -16
+; RV32IF-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32IF-NEXT: call rintf
+; RV32IF-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32IF-NEXT: addi sp, sp, 16
+; RV32IF-NEXT: ret
+;
+; RV64IF-LABEL: rint_f32:
+; RV64IF: # %bb.0:
+; RV64IF-NEXT: addi sp, sp, -16
+; RV64IF-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64IF-NEXT: call rintf
+; RV64IF-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64IF-NEXT: addi sp, sp, 16
+; RV64IF-NEXT: ret
+ %1 = call float @llvm.rint.f32(float %a)
+ ret float %1
+}
+
+define float @nearbyint_f32(float %a) nounwind {
+; RV32IF-LABEL: nearbyint_f32:
+; RV32IF: # %bb.0:
+; RV32IF-NEXT: addi sp, sp, -16
+; RV32IF-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32IF-NEXT: call nearbyintf
+; RV32IF-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32IF-NEXT: addi sp, sp, 16
+; RV32IF-NEXT: ret
+;
+; RV64IF-LABEL: nearbyint_f32:
+; RV64IF: # %bb.0:
+; RV64IF-NEXT: addi sp, sp, -16
+; RV64IF-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64IF-NEXT: call nearbyintf
+; RV64IF-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64IF-NEXT: addi sp, sp, 16
+; RV64IF-NEXT: ret
+ %1 = call float @llvm.nearbyint.f32(float %a)
+ ret float %1
+}
+
+define float @round_f32(float %a) nounwind {
+; RV32IF-LABEL: round_f32:
+; RV32IF: # %bb.0:
+; RV32IF-NEXT: addi sp, sp, -16
+; RV32IF-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32IF-NEXT: call roundf
+; RV32IF-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32IF-NEXT: addi sp, sp, 16
+; RV32IF-NEXT: ret
+;
+; RV64IF-LABEL: round_f32:
+; RV64IF: # %bb.0:
+; RV64IF-NEXT: addi sp, sp, -16
+; RV64IF-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64IF-NEXT: call roundf
+; RV64IF-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64IF-NEXT: addi sp, sp, 16
+; RV64IF-NEXT: ret
+ %1 = call float @llvm.round.f32(float %a)
+ ret float %1
+}
+
+define float @roundeven_f32(float %a) nounwind {
+; RV32IF-LABEL: roundeven_f32:
+; RV32IF: # %bb.0:
+; RV32IF-NEXT: addi sp, sp, -16
+; RV32IF-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32IF-NEXT: call roundevenf
+; RV32IF-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32IF-NEXT: addi sp, sp, 16
+; RV32IF-NEXT: ret
+;
+; RV64IF-LABEL: roundeven_f32:
+; RV64IF: # %bb.0:
+; RV64IF-NEXT: addi sp, sp, -16
+; RV64IF-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64IF-NEXT: call roundevenf
+; RV64IF-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64IF-NEXT: addi sp, sp, 16
+; RV64IF-NEXT: ret
+ %1 = call float @llvm.roundeven.f32(float %a)
+ ret float %1
+}
+
+define i1 @fpclass(float %x) {
+; RV32IF-LABEL: fpclass:
+; RV32IF: # %bb.0:
+; RV32IF-NEXT: fclass.s a0, fa0
+; RV32IF-NEXT: andi a0, a0, 927
+; RV32IF-NEXT: snez a0, a0
+; RV32IF-NEXT: ret
+;
+; RV64IF-LABEL: fpclass:
+; RV64IF: # %bb.0:
+; RV64IF-NEXT: fclass.s a0, fa0
+; RV64IF-NEXT: andi a0, a0, 927
+; RV64IF-NEXT: snez a0, a0
+; RV64IF-NEXT: ret
+ %cmp = call i1 @llvm.is.fpclass.f32(float %x, i32 639)
+ ret i1 %cmp
+}
+
+define i1 @isnan_fpclass(float %x) {
+; RV32IF-LABEL: isnan_fpclass:
+; RV32IF: # %bb.0:
+; RV32IF-NEXT: fclass.s a0, fa0
+; RV32IF-NEXT: andi a0, a0, 768
+; RV32IF-NEXT: snez a0, a0
+; RV32IF-NEXT: ret
+;
+; RV64IF-LABEL: isnan_fpclass:
+; RV64IF: # %bb.0:
+; RV64IF-NEXT: fclass.s a0, fa0
+; RV64IF-NEXT: andi a0, a0, 768
+; RV64IF-NEXT: snez a0, a0
+; RV64IF-NEXT: ret
+ %1 = call i1 @llvm.is.fpclass.f32(float %x, i32 3) ; nan
+ ret i1 %1
+}
+
+define i1 @isqnan_fpclass(float %x) {
+; RV32IF-LABEL: isqnan_fpclass:
+; RV32IF: # %bb.0:
+; RV32IF-NEXT: fclass.s a0, fa0
+; RV32IF-NEXT: andi a0, a0, 512
+; RV32IF-NEXT: snez a0, a0
+; RV32IF-NEXT: ret
+;
+; RV64IF-LABEL: isqnan_fpclass:
+; RV64IF: # %bb.0:
+; RV64IF-NEXT: fclass.s a0, fa0
+; RV64IF-NEXT: andi a0, a0, 512
+; RV64IF-NEXT: snez a0, a0
+; RV64IF-NEXT: ret
+ %1 = call i1 @llvm.is.fpclass.f32(float %x, i32 2) ; qnan
+ ret i1 %1
+}
+
+define i1 @issnan_fpclass(float %x) {
+; RV32IF-LABEL: issnan_fpclass:
+; RV32IF: # %bb.0:
+; RV32IF-NEXT: fclass.s a0, fa0
+; RV32IF-NEXT: andi a0, a0, 256
+; RV32IF-NEXT: snez a0, a0
+; RV32IF-NEXT: ret
+;
+; RV64IF-LABEL: issnan_fpclass:
+; RV64IF: # %bb.0:
+; RV64IF-NEXT: fclass.s a0, fa0
+; RV64IF-NEXT: andi a0, a0, 256
+; RV64IF-NEXT: snez a0, a0
+; RV64IF-NEXT: ret
+ %1 = call i1 @llvm.is.fpclass.f32(float %x, i32 1) ; snan
+ ret i1 %1
+}
+
+define i1 @isinf_fpclass(float %x) {
+; RV32IF-LABEL: isinf_fpclass:
+; RV32IF: # %bb.0:
+; RV32IF-NEXT: fclass.s a0, fa0
+; RV32IF-NEXT: andi a0, a0, 129
+; RV32IF-NEXT: snez a0, a0
+; RV32IF-NEXT: ret
+;
+; RV64IF-LABEL: isinf_fpclass:
+; RV64IF: # %bb.0:
+; RV64IF-NEXT: fclass.s a0, fa0
+; RV64IF-NEXT: andi a0, a0, 129
+; RV64IF-NEXT: snez a0, a0
+; RV64IF-NEXT: ret
+ %1 = call i1 @llvm.is.fpclass.f32(float %x, i32 516) ; 0x204 = "inf"
+ ret i1 %1
+}
+
+define i1 @isposinf_fpclass(float %x) {
+; RV32IF-LABEL: isposinf_fpclass:
+; RV32IF: # %bb.0:
+; RV32IF-NEXT: fclass.s a0, fa0
+; RV32IF-NEXT: andi a0, a0, 128
+; RV32IF-NEXT: snez a0, a0
+; RV32IF-NEXT: ret
+;
+; RV64IF-LABEL: isposinf_fpclass:
+; RV64IF: # %bb.0:
+; RV64IF-NEXT: fclass.s a0, fa0
+; RV64IF-NEXT: andi a0, a0, 128
+; RV64IF-NEXT: snez a0, a0
+; RV64IF-NEXT: ret
+ %1 = call i1 @llvm.is.fpclass.f32(float %x, i32 512) ; 0x200 = "+inf"
+ ret i1 %1
+}
+
+define i1 @isneginf_fpclass(float %x) {
+; RV32IF-LABEL: isneginf_fpclass:
+; RV32IF: # %bb.0:
+; RV32IF-NEXT: fclass.s a0, fa0
+; RV32IF-NEXT: andi a0, a0, 1
+; RV32IF-NEXT: snez a0, a0
+; RV32IF-NEXT: ret
+;
+; RV64IF-LABEL: isneginf_fpclass:
+; RV64IF: # %bb.0:
+; RV64IF-NEXT: fclass.s a0, fa0
+; RV64IF-NEXT: andi a0, a0, 1
+; RV64IF-NEXT: snez a0, a0
+; RV64IF-NEXT: ret
+ %1 = call i1 @llvm.is.fpclass.f32(float %x, i32 4) ; "-inf"
+ ret i1 %1
+}
+
+define i1 @isfinite_fpclass(float %x) {
+; RV32IF-LABEL: isfinite_fpclass:
+; RV32IF: # %bb.0:
+; RV32IF-NEXT: fclass.s a0, fa0
+; RV32IF-NEXT: andi a0, a0, 126
+; RV32IF-NEXT: snez a0, a0
+; RV32IF-NEXT: ret
+;
+; RV64IF-LABEL: isfinite_fpclass:
+; RV64IF: # %bb.0:
+; RV64IF-NEXT: fclass.s a0, fa0
+; RV64IF-NEXT: andi a0, a0, 126
+; RV64IF-NEXT: snez a0, a0
+; RV64IF-NEXT: ret
+ %1 = call i1 @llvm.is.fpclass.f32(float %x, i32 504) ; 0x1f8 = "finite"
+ ret i1 %1
+}
+
+define i1 @isposfinite_fpclass(float %x) {
+; RV32IF-LABEL: isposfinite_fpclass:
+; RV32IF: # %bb.0:
+; RV32IF-NEXT: fclass.s a0, fa0
+; RV32IF-NEXT: andi a0, a0, 112
+; RV32IF-NEXT: snez a0, a0
+; RV32IF-NEXT: ret
+;
+; RV64IF-LABEL: isposfinite_fpclass:
+; RV64IF: # %bb.0:
+; RV64IF-NEXT: fclass.s a0, fa0
+; RV64IF-NEXT: andi a0, a0, 112
+; RV64IF-NEXT: snez a0, a0
+; RV64IF-NEXT: ret
+ %1 = call i1 @llvm.is.fpclass.f32(float %x, i32 448) ; 0x1c0 = "+finite"
+ ret i1 %1
+}
+
+define i1 @isnegfinite_fpclass(float %x) {
+; RV32IF-LABEL: isnegfinite_fpclass:
+; RV32IF: # %bb.0:
+; RV32IF-NEXT: fclass.s a0, fa0
+; RV32IF-NEXT: andi a0, a0, 14
+; RV32IF-NEXT: snez a0, a0
+; RV32IF-NEXT: ret
+;
+; RV64IF-LABEL: isnegfinite_fpclass:
+; RV64IF: # %bb.0:
+; RV64IF-NEXT: fclass.s a0, fa0
+; RV64IF-NEXT: andi a0, a0, 14
+; RV64IF-NEXT: snez a0, a0
+; RV64IF-NEXT: ret
+ %1 = call i1 @llvm.is.fpclass.f32(float %x, i32 56) ; 0x38 = "-finite"
+ ret i1 %1
+}
+
+define i1 @isnotfinite_fpclass(float %x) {
+; RV32IF-LABEL: isnotfinite_fpclass:
+; RV32IF: # %bb.0:
+; RV32IF-NEXT: fclass.s a0, fa0
+; RV32IF-NEXT: andi a0, a0, 897
+; RV32IF-NEXT: snez a0, a0
+; RV32IF-NEXT: ret
+;
+; RV64IF-LABEL: isnotfinite_fpclass:
+; RV64IF: # %bb.0:
+; RV64IF-NEXT: fclass.s a0, fa0
+; RV64IF-NEXT: andi a0, a0, 897
+; RV64IF-NEXT: snez a0, a0
+; RV64IF-NEXT: ret
+ %1 = call i1 @llvm.is.fpclass.f32(float %x, i32 519) ; ox207 = "inf|nan"
+ ret i1 %1
+}
diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-fp-ceil-floor.mir b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-fp-ceil-floor.mir
deleted file mode 100644
index 1e184bd0c112..000000000000
--- a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-fp-ceil-floor.mir
+++ /dev/null
@@ -1,98 +0,0 @@
-# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-# RUN: llc -mtriple=riscv32 -mattr=+d -run-pass=legalizer %s -o - \
-# RUN: | FileCheck %s
-# RUN: llc -mtriple=riscv64 -mattr=+d -run-pass=legalizer %s -o - \
-# RUN: | FileCheck %s
-
----
-name: ceil_f32
-body: |
- bb.1:
- liveins: $f10_f
-
- ; CHECK-LABEL: name: ceil_f32
- ; CHECK: liveins: $f10_f
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $f10_f
- ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $x2, implicit $x2
- ; CHECK-NEXT: $f10_f = COPY [[COPY]](s32)
- ; CHECK-NEXT: PseudoCALL target-flags(riscv-call) &ceilf, csr_ilp32d_lp64d, implicit-def $x1, implicit $f10_f, implicit-def $f10_f
- ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $x2, implicit $x2
- ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $f10_f
- ; CHECK-NEXT: $f10_f = COPY [[COPY1]](s32)
- ; CHECK-NEXT: PseudoRET implicit $f10_f
- %0:_(s32) = COPY $f10_f
- %1:_(s32) = G_FCEIL %0
- $f10_f = COPY %1(s32)
- PseudoRET implicit $f10_f
-
-...
----
-name: floor_f32
-body: |
- bb.1:
- liveins: $f10_f
-
- ; CHECK-LABEL: name: floor_f32
- ; CHECK: liveins: $f10_f
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $f10_f
- ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $x2, implicit $x2
- ; CHECK-NEXT: $f10_f = COPY [[COPY]](s32)
- ; CHECK-NEXT: PseudoCALL target-flags(riscv-call) &floorf, csr_ilp32d_lp64d, implicit-def $x1, implicit $f10_f, implicit-def $f10_f
- ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $x2, implicit $x2
- ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $f10_f
- ; CHECK-NEXT: $f10_f = COPY [[COPY1]](s32)
- ; CHECK-NEXT: PseudoRET implicit $f10_f
- %0:_(s32) = COPY $f10_f
- %1:_(s32) = G_FFLOOR %0
- $f10_f = COPY %1(s32)
- PseudoRET implicit $f10_f
-
-...
----
-name: ceil_f64
-body: |
- bb.1:
- liveins: $f10_d
-
- ; CHECK-LABEL: name: ceil_f64
- ; CHECK: liveins: $f10_d
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $f10_d
- ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $x2, implicit $x2
- ; CHECK-NEXT: $f10_d = COPY [[COPY]](s64)
- ; CHECK-NEXT: PseudoCALL target-flags(riscv-call) &ceil, csr_ilp32d_lp64d, implicit-def $x1, implicit $f10_d, implicit-def $f10_d
- ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $x2, implicit $x2
- ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $f10_d
- ; CHECK-NEXT: $f10_d = COPY [[COPY1]](s64)
- ; CHECK-NEXT: PseudoRET implicit $f10_d
- %0:_(s64) = COPY $f10_d
- %1:_(s64) = G_FCEIL %0
- $f10_d = COPY %1(s64)
- PseudoRET implicit $f10_d
-
-...
----
-name: floor_f64
-body: |
- bb.1:
- liveins: $f10_d
-
- ; CHECK-LABEL: name: floor_f64
- ; CHECK: liveins: $f10_d
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $f10_d
- ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $x2, implicit $x2
- ; CHECK-NEXT: $f10_d = COPY [[COPY]](s64)
- ; CHECK-NEXT: PseudoCALL target-flags(riscv-call) &floor, csr_ilp32d_lp64d, implicit-def $x1, implicit $f10_d, implicit-def $f10_d
- ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $x2, implicit $x2
- ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $f10_d
- ; CHECK-NEXT: $f10_d = COPY [[COPY1]](s64)
- ; CHECK-NEXT: PseudoRET implicit $f10_d
- %0:_(s64) = COPY $f10_d
- %1:_(s64) = G_FFLOOR %0
- $f10_d = COPY %1(s64)
- PseudoRET implicit $f10_d
-
-...
diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-load-rv32.mir b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-load-rv32.mir
index f925d2451508..bed44eb657da 100644
--- a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-load-rv32.mir
+++ b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-load-rv32.mir
@@ -1,6 +1,8 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -mtriple=riscv32 -run-pass=legalizer %s -o - \
-# RUN: | FileCheck %s
+# RUN: | FileCheck %s
+# RUN: llc -mtriple=riscv32 -mattr=+unaligned-scalar-mem -run-pass=legalizer %s -o - \
+# RUN: | FileCheck %s --check-prefix=UNALIGNED
---
name: load_i8
@@ -26,6 +28,14 @@ body: |
; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8))
; CHECK-NEXT: $x10 = COPY [[LOAD]](s32)
; CHECK-NEXT: PseudoRET implicit $x10
+ ;
+ ; UNALIGNED-LABEL: name: load_i8
+ ; UNALIGNED: liveins: $x10
+ ; UNALIGNED-NEXT: {{ $}}
+ ; UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x10
+ ; UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8))
+ ; UNALIGNED-NEXT: $x10 = COPY [[LOAD]](s32)
+ ; UNALIGNED-NEXT: PseudoRET implicit $x10
%0:_(p0) = COPY $x10
%1:_(s8) = G_LOAD %0(p0) :: (load (s8))
%2:_(s32) = G_ANYEXT %1(s8)
@@ -57,6 +67,14 @@ body: |
; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16))
; CHECK-NEXT: $x10 = COPY [[LOAD]](s32)
; CHECK-NEXT: PseudoRET implicit $x10
+ ;
+ ; UNALIGNED-LABEL: name: load_i16
+ ; UNALIGNED: liveins: $x10
+ ; UNALIGNED-NEXT: {{ $}}
+ ; UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x10
+ ; UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16))
+ ; UNALIGNED-NEXT: $x10 = COPY [[LOAD]](s32)
+ ; UNALIGNED-NEXT: PseudoRET implicit $x10
%0:_(p0) = COPY $x10
%1:_(s16) = G_LOAD %0(p0) :: (load (s16))
%2:_(s32) = G_ANYEXT %1(s16)
@@ -87,6 +105,14 @@ body: |
; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s32))
; CHECK-NEXT: $x10 = COPY [[LOAD]](s32)
; CHECK-NEXT: PseudoRET implicit $x10
+ ;
+ ; UNALIGNED-LABEL: name: load_i32
+ ; UNALIGNED: liveins: $x10
+ ; UNALIGNED-NEXT: {{ $}}
+ ; UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x10
+ ; UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s32))
+ ; UNALIGNED-NEXT: $x10 = COPY [[LOAD]](s32)
+ ; UNALIGNED-NEXT: PseudoRET implicit $x10
%0:_(p0) = COPY $x10
%1:_(s32) = G_LOAD %0(p0) :: (load (s32))
$x10 = COPY %1(s32)
@@ -122,6 +148,18 @@ body: |
; CHECK-NEXT: $x10 = COPY [[LOAD]](s32)
; CHECK-NEXT: $x11 = COPY [[LOAD1]](s32)
; CHECK-NEXT: PseudoRET implicit $x10, implicit $x11
+ ;
+ ; UNALIGNED-LABEL: name: load_i64
+ ; UNALIGNED: liveins: $x10
+ ; UNALIGNED-NEXT: {{ $}}
+ ; UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x10
+ ; UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s32), align 8)
+ ; UNALIGNED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+ ; UNALIGNED-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s32)
+ ; UNALIGNED-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s32) from unknown-address + 4)
+ ; UNALIGNED-NEXT: $x10 = COPY [[LOAD]](s32)
+ ; UNALIGNED-NEXT: $x11 = COPY [[LOAD1]](s32)
+ ; UNALIGNED-NEXT: PseudoRET implicit $x10, implicit $x11
%0:_(p0) = COPY $x10
%1:_(s64) = G_LOAD %0(p0) :: (load (s64))
%2:_(s32), %3:_(s32) = G_UNMERGE_VALUES %1(s64)
@@ -153,6 +191,14 @@ body: |
; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p0) = G_LOAD [[COPY]](p0) :: (load (p0), align 8)
; CHECK-NEXT: $x10 = COPY [[LOAD]](p0)
; CHECK-NEXT: PseudoRET implicit $x10
+ ;
+ ; UNALIGNED-LABEL: name: load_ptr
+ ; UNALIGNED: liveins: $x10
+ ; UNALIGNED-NEXT: {{ $}}
+ ; UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x10
+ ; UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(p0) = G_LOAD [[COPY]](p0) :: (load (p0), align 8)
+ ; UNALIGNED-NEXT: $x10 = COPY [[LOAD]](p0)
+ ; UNALIGNED-NEXT: PseudoRET implicit $x10
%0:_(p0) = COPY $x10
%1:_(p0) = G_LOAD %0(p0) :: (load (p0), align 8)
$x10 = COPY %1(p0)
@@ -189,6 +235,14 @@ body: |
; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]]
; CHECK-NEXT: $x10 = COPY [[OR]](s32)
; CHECK-NEXT: PseudoRET implicit $x10
+ ;
+ ; UNALIGNED-LABEL: name: load_i16_unaligned
+ ; UNALIGNED: liveins: $x10
+ ; UNALIGNED-NEXT: {{ $}}
+ ; UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x10
+ ; UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16), align 1)
+ ; UNALIGNED-NEXT: $x10 = COPY [[LOAD]](s32)
+ ; UNALIGNED-NEXT: PseudoRET implicit $x10
%0:_(p0) = COPY $x10
%1:_(s16) = G_LOAD %0(p0) :: (load (s16), align 1)
%2:_(s32) = G_ANYEXT %1(s16)
@@ -237,6 +291,14 @@ body: |
; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]]
; CHECK-NEXT: $x10 = COPY [[OR2]](s32)
; CHECK-NEXT: PseudoRET implicit $x10
+ ;
+ ; UNALIGNED-LABEL: name: load_i32_unaligned
+ ; UNALIGNED: liveins: $x10
+ ; UNALIGNED-NEXT: {{ $}}
+ ; UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x10
+ ; UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s32), align 1)
+ ; UNALIGNED-NEXT: $x10 = COPY [[LOAD]](s32)
+ ; UNALIGNED-NEXT: PseudoRET implicit $x10
%0:_(p0) = COPY $x10
%1:_(s32) = G_LOAD %0(p0) :: (load (s32), align 1)
$x10 = COPY %1(s32)
@@ -272,6 +334,14 @@ body: |
; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]]
; CHECK-NEXT: $x10 = COPY [[OR]](s32)
; CHECK-NEXT: PseudoRET implicit $x10
+ ;
+ ; UNALIGNED-LABEL: name: load_i32_align2
+ ; UNALIGNED: liveins: $x10
+ ; UNALIGNED-NEXT: {{ $}}
+ ; UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x10
+ ; UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s32), align 2)
+ ; UNALIGNED-NEXT: $x10 = COPY [[LOAD]](s32)
+ ; UNALIGNED-NEXT: PseudoRET implicit $x10
%0:_(p0) = COPY $x10
%1:_(s32) = G_LOAD %0(p0) :: (load (s32), align 2)
$x10 = COPY %1(s32)
@@ -343,6 +413,18 @@ body: |
; CHECK-NEXT: $x10 = COPY [[OR2]](s32)
; CHECK-NEXT: $x11 = COPY [[OR5]](s32)
; CHECK-NEXT: PseudoRET implicit $x10, implicit $x11
+ ;
+ ; UNALIGNED-LABEL: name: load_i64_unaligned
+ ; UNALIGNED: liveins: $x10
+ ; UNALIGNED-NEXT: {{ $}}
+ ; UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x10
+ ; UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s32), align 1)
+ ; UNALIGNED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+ ; UNALIGNED-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s32)
+ ; UNALIGNED-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s32) from unknown-address + 4, align 1)
+ ; UNALIGNED-NEXT: $x10 = COPY [[LOAD]](s32)
+ ; UNALIGNED-NEXT: $x11 = COPY [[LOAD1]](s32)
+ ; UNALIGNED-NEXT: PseudoRET implicit $x10, implicit $x11
%0:_(p0) = COPY $x10
%1:_(s64) = G_LOAD %0(p0) :: (load (s64), align 1)
%2:_(s32), %3:_(s32) = G_UNMERGE_VALUES %1(s64)
diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-load-rv64.mir b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-load-rv64.mir
index 933bc589f601..491e4a358b1a 100644
--- a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-load-rv64.mir
+++ b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-load-rv64.mir
@@ -1,6 +1,8 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -mtriple=riscv64 -run-pass=legalizer %s -o - \
-# RUN: | FileCheck %s
+# RUN: | FileCheck %s
+# RUN: llc -mtriple=riscv64 -mattr=+unaligned-scalar-mem -run-pass=legalizer %s -o - \
+# RUN: | FileCheck %s --check-prefix=UNALIGNED
---
name: load_i8
@@ -27,6 +29,15 @@ body: |
; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
; CHECK-NEXT: $x10 = COPY [[ANYEXT]](s64)
; CHECK-NEXT: PseudoRET implicit $x10
+ ;
+ ; UNALIGNED-LABEL: name: load_i8
+ ; UNALIGNED: liveins: $x10
+ ; UNALIGNED-NEXT: {{ $}}
+ ; UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x10
+ ; UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8))
+ ; UNALIGNED-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
+ ; UNALIGNED-NEXT: $x10 = COPY [[ANYEXT]](s64)
+ ; UNALIGNED-NEXT: PseudoRET implicit $x10
%0:_(p0) = COPY $x10
%1:_(s8) = G_LOAD %0(p0) :: (load (s8))
%2:_(s64) = G_ANYEXT %1(s8)
@@ -59,6 +70,15 @@ body: |
; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
; CHECK-NEXT: $x10 = COPY [[ANYEXT]](s64)
; CHECK-NEXT: PseudoRET implicit $x10
+ ;
+ ; UNALIGNED-LABEL: name: load_i16
+ ; UNALIGNED: liveins: $x10
+ ; UNALIGNED-NEXT: {{ $}}
+ ; UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x10
+ ; UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16))
+ ; UNALIGNED-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
+ ; UNALIGNED-NEXT: $x10 = COPY [[ANYEXT]](s64)
+ ; UNALIGNED-NEXT: PseudoRET implicit $x10
%0:_(p0) = COPY $x10
%1:_(s16) = G_LOAD %0(p0) :: (load (s16))
%2:_(s64) = G_ANYEXT %1(s16)
@@ -91,6 +111,15 @@ body: |
; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
; CHECK-NEXT: $x10 = COPY [[ANYEXT]](s64)
; CHECK-NEXT: PseudoRET implicit $x10
+ ;
+ ; UNALIGNED-LABEL: name: load_i32
+ ; UNALIGNED: liveins: $x10
+ ; UNALIGNED-NEXT: {{ $}}
+ ; UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x10
+ ; UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s32))
+ ; UNALIGNED-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
+ ; UNALIGNED-NEXT: $x10 = COPY [[ANYEXT]](s64)
+ ; UNALIGNED-NEXT: PseudoRET implicit $x10
%0:_(p0) = COPY $x10
%1:_(s32) = G_LOAD %0(p0) :: (load (s32))
%2:_(s64) = G_ANYEXT %1(s32)
@@ -121,6 +150,14 @@ body: |
; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p0) :: (load (s64))
; CHECK-NEXT: $x10 = COPY [[LOAD]](s64)
; CHECK-NEXT: PseudoRET implicit $x10
+ ;
+ ; UNALIGNED-LABEL: name: load_i64
+ ; UNALIGNED: liveins: $x10
+ ; UNALIGNED-NEXT: {{ $}}
+ ; UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x10
+ ; UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p0) :: (load (s64))
+ ; UNALIGNED-NEXT: $x10 = COPY [[LOAD]](s64)
+ ; UNALIGNED-NEXT: PseudoRET implicit $x10
%0:_(p0) = COPY $x10
%1:_(s64) = G_LOAD %0(p0) :: (load (s64))
$x10 = COPY %1(s64)
@@ -156,6 +193,18 @@ body: |
; CHECK-NEXT: $x10 = COPY [[LOAD]](s64)
; CHECK-NEXT: $x11 = COPY [[LOAD1]](s64)
; CHECK-NEXT: PseudoRET implicit $x10, implicit $x11
+ ;
+ ; UNALIGNED-LABEL: name: load_i128
+ ; UNALIGNED: liveins: $x10
+ ; UNALIGNED-NEXT: {{ $}}
+ ; UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x10
+ ; UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p0) :: (load (s64))
+ ; UNALIGNED-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+ ; UNALIGNED-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
+ ; UNALIGNED-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p0) :: (load (s64) from unknown-address + 8)
+ ; UNALIGNED-NEXT: $x10 = COPY [[LOAD]](s64)
+ ; UNALIGNED-NEXT: $x11 = COPY [[LOAD1]](s64)
+ ; UNALIGNED-NEXT: PseudoRET implicit $x10, implicit $x11
%0:_(p0) = COPY $x10
%1:_(s128) = G_LOAD %0(p0) :: (load (s128), align 8)
%2:_(s64), %3:_(s64) = G_UNMERGE_VALUES %1(s128)
@@ -187,6 +236,14 @@ body: |
; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p0) = G_LOAD [[COPY]](p0) :: (load (p0))
; CHECK-NEXT: $x10 = COPY [[LOAD]](p0)
; CHECK-NEXT: PseudoRET implicit $x10
+ ;
+ ; UNALIGNED-LABEL: name: load_ptr
+ ; UNALIGNED: liveins: $x10
+ ; UNALIGNED-NEXT: {{ $}}
+ ; UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x10
+ ; UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(p0) = G_LOAD [[COPY]](p0) :: (load (p0))
+ ; UNALIGNED-NEXT: $x10 = COPY [[LOAD]](p0)
+ ; UNALIGNED-NEXT: PseudoRET implicit $x10
%0:_(p0) = COPY $x10
%1:_(p0) = G_LOAD %0(p0) :: (load (p0))
$x10 = COPY %1(p0)
@@ -224,6 +281,15 @@ body: |
; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[OR]](s32)
; CHECK-NEXT: $x10 = COPY [[ANYEXT]](s64)
; CHECK-NEXT: PseudoRET implicit $x10
+ ;
+ ; UNALIGNED-LABEL: name: load_i16_unaligned
+ ; UNALIGNED: liveins: $x10
+ ; UNALIGNED-NEXT: {{ $}}
+ ; UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x10
+ ; UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16), align 1)
+ ; UNALIGNED-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
+ ; UNALIGNED-NEXT: $x10 = COPY [[ANYEXT]](s64)
+ ; UNALIGNED-NEXT: PseudoRET implicit $x10
%0:_(p0) = COPY $x10
%1:_(s16) = G_LOAD %0(p0) :: (load (s16), align 1)
%2:_(s64) = G_ANYEXT %1(s16)
@@ -274,6 +340,15 @@ body: |
; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[OR2]](s32)
; CHECK-NEXT: $x10 = COPY [[ANYEXT]](s64)
; CHECK-NEXT: PseudoRET implicit $x10
+ ;
+ ; UNALIGNED-LABEL: name: load_i32_unaligned
+ ; UNALIGNED: liveins: $x10
+ ; UNALIGNED-NEXT: {{ $}}
+ ; UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x10
+ ; UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s32), align 1)
+ ; UNALIGNED-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
+ ; UNALIGNED-NEXT: $x10 = COPY [[ANYEXT]](s64)
+ ; UNALIGNED-NEXT: PseudoRET implicit $x10
%0:_(p0) = COPY $x10
%1:_(s32) = G_LOAD %0(p0) :: (load (s32), align 1)
%2:_(s64) = G_ANYEXT %1(s32)
@@ -312,6 +387,15 @@ body: |
; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[OR]](s32)
; CHECK-NEXT: $x10 = COPY [[ANYEXT]](s64)
; CHECK-NEXT: PseudoRET implicit $x10
+ ;
+ ; UNALIGNED-LABEL: name: load_i32_align2
+ ; UNALIGNED: liveins: $x10
+ ; UNALIGNED-NEXT: {{ $}}
+ ; UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x10
+ ; UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s32), align 2)
+ ; UNALIGNED-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32)
+ ; UNALIGNED-NEXT: $x10 = COPY [[ANYEXT]](s64)
+ ; UNALIGNED-NEXT: PseudoRET implicit $x10
%0:_(p0) = COPY $x10
%1:_(s32) = G_LOAD %0(p0) :: (load (s32), align 2)
%2:_(s64) = G_ANYEXT %1(s32)
@@ -384,6 +468,14 @@ body: |
; CHECK-NEXT: [[OR6:%[0-9]+]]:_(s64) = G_OR [[SHL6]], [[OR2]]
; CHECK-NEXT: $x10 = COPY [[OR6]](s64)
; CHECK-NEXT: PseudoRET implicit $x10
+ ;
+ ; UNALIGNED-LABEL: name: load_i64_unaligned
+ ; UNALIGNED: liveins: $x10
+ ; UNALIGNED-NEXT: {{ $}}
+ ; UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x10
+ ; UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p0) :: (load (s64), align 1)
+ ; UNALIGNED-NEXT: $x10 = COPY [[LOAD]](s64)
+ ; UNALIGNED-NEXT: PseudoRET implicit $x10
%0:_(p0) = COPY $x10
%1:_(s64) = G_LOAD %0(p0) :: (load (s64), align 1)
$x10 = COPY %1(s64)
@@ -431,6 +523,14 @@ body: |
; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s64) = G_OR [[SHL2]], [[OR]]
; CHECK-NEXT: $x10 = COPY [[OR2]](s64)
; CHECK-NEXT: PseudoRET implicit $x10
+ ;
+ ; UNALIGNED-LABEL: name: load_i64_align2
+ ; UNALIGNED: liveins: $x10
+ ; UNALIGNED-NEXT: {{ $}}
+ ; UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x10
+ ; UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p0) :: (load (s64), align 2)
+ ; UNALIGNED-NEXT: $x10 = COPY [[LOAD]](s64)
+ ; UNALIGNED-NEXT: PseudoRET implicit $x10
%0:_(p0) = COPY $x10
%1:_(s64) = G_LOAD %0(p0) :: (load (s64), align 2)
$x10 = COPY %1(s64)
@@ -550,6 +650,18 @@ body: |
; CHECK-NEXT: $x10 = COPY [[OR6]](s64)
; CHECK-NEXT: $x11 = COPY [[OR13]](s64)
; CHECK-NEXT: PseudoRET implicit $x10, implicit $x11
+ ;
+ ; UNALIGNED-LABEL: name: load_i128_unaligned
+ ; UNALIGNED: liveins: $x10
+ ; UNALIGNED-NEXT: {{ $}}
+ ; UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x10
+ ; UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p0) :: (load (s64), align 1)
+ ; UNALIGNED-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+ ; UNALIGNED-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
+ ; UNALIGNED-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p0) :: (load (s64) from unknown-address + 8, align 1)
+ ; UNALIGNED-NEXT: $x10 = COPY [[LOAD]](s64)
+ ; UNALIGNED-NEXT: $x11 = COPY [[LOAD1]](s64)
+ ; UNALIGNED-NEXT: PseudoRET implicit $x10, implicit $x11
%0:_(p0) = COPY $x10
%1:_(s128) = G_LOAD %0(p0) :: (load (s128), align 1)
%2:_(s64), %3:_(s64) = G_UNMERGE_VALUES %1(s128)
diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-store-rv32.mir b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-store-rv32.mir
index 2ece5a8c9d41..791bdb30c490 100644
--- a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-store-rv32.mir
+++ b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-store-rv32.mir
@@ -1,6 +1,8 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -mtriple=riscv32 -run-pass=legalizer %s -o - \
-# RUN: | FileCheck %s
+# RUN: | FileCheck %s
+# RUN: llc -mtriple=riscv32 -mattr=+unaligned-scalar-mem -run-pass=legalizer %s -o - \
+# RUN: | FileCheck %s --check-prefix=UNALIGNED
---
name: store_i8
@@ -26,6 +28,14 @@ body: |
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(p0) = COPY $x11
; CHECK-NEXT: G_STORE [[COPY]](s32), [[COPY1]](p0) :: (store (s8))
; CHECK-NEXT: PseudoRET
+ ;
+ ; UNALIGNED-LABEL: name: store_i8
+ ; UNALIGNED: liveins: $x10, $x11
+ ; UNALIGNED-NEXT: {{ $}}
+ ; UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $x10
+ ; UNALIGNED-NEXT: [[COPY1:%[0-9]+]]:_(p0) = COPY $x11
+ ; UNALIGNED-NEXT: G_STORE [[COPY]](s32), [[COPY1]](p0) :: (store (s8))
+ ; UNALIGNED-NEXT: PseudoRET
%2:_(s32) = COPY $x10
%0:_(s8) = G_TRUNC %2(s32)
%1:_(p0) = COPY $x11
@@ -57,6 +67,14 @@ body: |
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(p0) = COPY $x11
; CHECK-NEXT: G_STORE [[COPY]](s32), [[COPY1]](p0) :: (store (s16))
; CHECK-NEXT: PseudoRET
+ ;
+ ; UNALIGNED-LABEL: name: store_i16
+ ; UNALIGNED: liveins: $x10, $x11
+ ; UNALIGNED-NEXT: {{ $}}
+ ; UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $x10
+ ; UNALIGNED-NEXT: [[COPY1:%[0-9]+]]:_(p0) = COPY $x11
+ ; UNALIGNED-NEXT: G_STORE [[COPY]](s32), [[COPY1]](p0) :: (store (s16))
+ ; UNALIGNED-NEXT: PseudoRET
%2:_(s32) = COPY $x10
%0:_(s16) = G_TRUNC %2(s32)
%1:_(p0) = COPY $x11
@@ -87,6 +105,14 @@ body: |
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(p0) = COPY $x11
; CHECK-NEXT: G_STORE [[COPY]](s32), [[COPY1]](p0) :: (store (s32))
; CHECK-NEXT: PseudoRET
+ ;
+ ; UNALIGNED-LABEL: name: store_i32
+ ; UNALIGNED: liveins: $x10, $x11
+ ; UNALIGNED-NEXT: {{ $}}
+ ; UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $x10
+ ; UNALIGNED-NEXT: [[COPY1:%[0-9]+]]:_(p0) = COPY $x11
+ ; UNALIGNED-NEXT: G_STORE [[COPY]](s32), [[COPY1]](p0) :: (store (s32))
+ ; UNALIGNED-NEXT: PseudoRET
%0:_(s32) = COPY $x10
%1:_(p0) = COPY $x11
G_STORE %0(s32), %1(p0) :: (store (s32))
@@ -122,6 +148,18 @@ body: |
; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY2]], [[C]](s32)
; CHECK-NEXT: G_STORE [[COPY1]](s32), [[PTR_ADD]](p0) :: (store (s32) into unknown-address + 4)
; CHECK-NEXT: PseudoRET
+ ;
+ ; UNALIGNED-LABEL: name: store_i64
+ ; UNALIGNED: liveins: $x10, $x11, $x12
+ ; UNALIGNED-NEXT: {{ $}}
+ ; UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $x10
+ ; UNALIGNED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $x11
+ ; UNALIGNED-NEXT: [[COPY2:%[0-9]+]]:_(p0) = COPY $x12
+ ; UNALIGNED-NEXT: G_STORE [[COPY]](s32), [[COPY2]](p0) :: (store (s32), align 8)
+ ; UNALIGNED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+ ; UNALIGNED-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY2]], [[C]](s32)
+ ; UNALIGNED-NEXT: G_STORE [[COPY1]](s32), [[PTR_ADD]](p0) :: (store (s32) into unknown-address + 4)
+ ; UNALIGNED-NEXT: PseudoRET
%2:_(s32) = COPY $x10
%3:_(s32) = COPY $x11
%0:_(s64) = G_MERGE_VALUES %2(s32), %3(s32)
@@ -153,6 +191,14 @@ body: |
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(p0) = COPY $x11
; CHECK-NEXT: G_STORE [[COPY]](p0), [[COPY1]](p0) :: (store (p0), align 8)
; CHECK-NEXT: PseudoRET
+ ;
+ ; UNALIGNED-LABEL: name: store_ptr
+ ; UNALIGNED: liveins: $x10, $x11
+ ; UNALIGNED-NEXT: {{ $}}
+ ; UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x10
+ ; UNALIGNED-NEXT: [[COPY1:%[0-9]+]]:_(p0) = COPY $x11
+ ; UNALIGNED-NEXT: G_STORE [[COPY]](p0), [[COPY1]](p0) :: (store (p0), align 8)
+ ; UNALIGNED-NEXT: PseudoRET
%0:_(p0) = COPY $x10
%1:_(p0) = COPY $x11
G_STORE %0(p0), %1(p0) :: (store (p0), align 8)
@@ -190,6 +236,14 @@ body: |
; CHECK-NEXT: G_STORE [[COPY]](s32), [[COPY1]](p0) :: (store (s8))
; CHECK-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p0) :: (store (s8) into unknown-address + 1)
; CHECK-NEXT: PseudoRET
+ ;
+ ; UNALIGNED-LABEL: name: store_i16_unaligned
+ ; UNALIGNED: liveins: $x10, $x11
+ ; UNALIGNED-NEXT: {{ $}}
+ ; UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $x10
+ ; UNALIGNED-NEXT: [[COPY1:%[0-9]+]]:_(p0) = COPY $x11
+ ; UNALIGNED-NEXT: G_STORE [[COPY]](s32), [[COPY1]](p0) :: (store (s16), align 1)
+ ; UNALIGNED-NEXT: PseudoRET
%2:_(s32) = COPY $x10
%0:_(s16) = G_TRUNC %2(s32)
%1:_(p0) = COPY $x11
@@ -238,6 +292,14 @@ body: |
; CHECK-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p0) :: (store (s8) into unknown-address + 2)
; CHECK-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD2]](p0) :: (store (s8) into unknown-address + 3)
; CHECK-NEXT: PseudoRET
+ ;
+ ; UNALIGNED-LABEL: name: store_i32_unaligned
+ ; UNALIGNED: liveins: $x10, $x11
+ ; UNALIGNED-NEXT: {{ $}}
+ ; UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $x10
+ ; UNALIGNED-NEXT: [[COPY1:%[0-9]+]]:_(p0) = COPY $x11
+ ; UNALIGNED-NEXT: G_STORE [[COPY]](s32), [[COPY1]](p0) :: (store (s32), align 1)
+ ; UNALIGNED-NEXT: PseudoRET
%0:_(s32) = COPY $x10
%1:_(p0) = COPY $x11
G_STORE %0(s32), %1(p0) :: (store (s32), align 1)
@@ -273,6 +335,14 @@ body: |
; CHECK-NEXT: G_STORE [[COPY2]](s32), [[COPY1]](p0) :: (store (s16))
; CHECK-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p0) :: (store (s16) into unknown-address + 2)
; CHECK-NEXT: PseudoRET
+ ;
+ ; UNALIGNED-LABEL: name: store_i32_align2
+ ; UNALIGNED: liveins: $x10, $x11
+ ; UNALIGNED-NEXT: {{ $}}
+ ; UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $x10
+ ; UNALIGNED-NEXT: [[COPY1:%[0-9]+]]:_(p0) = COPY $x11
+ ; UNALIGNED-NEXT: G_STORE [[COPY]](s32), [[COPY1]](p0) :: (store (s32), align 2)
+ ; UNALIGNED-NEXT: PseudoRET
%0:_(s32) = COPY $x10
%1:_(p0) = COPY $x11
G_STORE %0(s32), %1(p0) :: (store (s32), align 2)
diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-store-rv64.mir b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-store-rv64.mir
index 85055561c4f9..860bc932d856 100644
--- a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-store-rv64.mir
+++ b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-store-rv64.mir
@@ -1,6 +1,8 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -mtriple=riscv64 -run-pass=legalizer %s -o - \
-# RUN: | FileCheck %s
+# RUN: | FileCheck %s
+# RUN: llc -mtriple=riscv64 -mattr=+unaligned-scalar-mem -run-pass=legalizer %s -o - \
+# RUN: | FileCheck %s --check-prefix=UNALIGNED
---
name: store_i8
@@ -27,6 +29,15 @@ body: |
; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64)
; CHECK-NEXT: G_STORE [[TRUNC]](s32), [[COPY1]](p0) :: (store (s8))
; CHECK-NEXT: PseudoRET
+ ;
+ ; UNALIGNED-LABEL: name: store_i8
+ ; UNALIGNED: liveins: $x10, $x11
+ ; UNALIGNED-NEXT: {{ $}}
+ ; UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x10
+ ; UNALIGNED-NEXT: [[COPY1:%[0-9]+]]:_(p0) = COPY $x11
+ ; UNALIGNED-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64)
+ ; UNALIGNED-NEXT: G_STORE [[TRUNC]](s32), [[COPY1]](p0) :: (store (s8))
+ ; UNALIGNED-NEXT: PseudoRET
%2:_(s64) = COPY $x10
%0:_(s8) = G_TRUNC %2(s64)
%1:_(p0) = COPY $x11
@@ -59,6 +70,15 @@ body: |
; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64)
; CHECK-NEXT: G_STORE [[TRUNC]](s32), [[COPY1]](p0) :: (store (s16))
; CHECK-NEXT: PseudoRET
+ ;
+ ; UNALIGNED-LABEL: name: store_i16
+ ; UNALIGNED: liveins: $x10, $x11
+ ; UNALIGNED-NEXT: {{ $}}
+ ; UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x10
+ ; UNALIGNED-NEXT: [[COPY1:%[0-9]+]]:_(p0) = COPY $x11
+ ; UNALIGNED-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64)
+ ; UNALIGNED-NEXT: G_STORE [[TRUNC]](s32), [[COPY1]](p0) :: (store (s16))
+ ; UNALIGNED-NEXT: PseudoRET
%2:_(s64) = COPY $x10
%0:_(s16) = G_TRUNC %2(s64)
%1:_(p0) = COPY $x11
@@ -91,6 +111,15 @@ body: |
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(p0) = COPY $x11
; CHECK-NEXT: G_STORE [[TRUNC]](s32), [[COPY1]](p0) :: (store (s32))
; CHECK-NEXT: PseudoRET
+ ;
+ ; UNALIGNED-LABEL: name: store_i32
+ ; UNALIGNED: liveins: $x10, $x11
+ ; UNALIGNED-NEXT: {{ $}}
+ ; UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x10
+ ; UNALIGNED-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64)
+ ; UNALIGNED-NEXT: [[COPY1:%[0-9]+]]:_(p0) = COPY $x11
+ ; UNALIGNED-NEXT: G_STORE [[TRUNC]](s32), [[COPY1]](p0) :: (store (s32))
+ ; UNALIGNED-NEXT: PseudoRET
%2:_(s64) = COPY $x10
%0:_(s32) = G_TRUNC %2(s64)
%1:_(p0) = COPY $x11
@@ -121,6 +150,14 @@ body: |
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(p0) = COPY $x11
; CHECK-NEXT: G_STORE [[COPY]](s64), [[COPY1]](p0) :: (store (s64))
; CHECK-NEXT: PseudoRET
+ ;
+ ; UNALIGNED-LABEL: name: store_i64
+ ; UNALIGNED: liveins: $x10, $x11
+ ; UNALIGNED-NEXT: {{ $}}
+ ; UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x10
+ ; UNALIGNED-NEXT: [[COPY1:%[0-9]+]]:_(p0) = COPY $x11
+ ; UNALIGNED-NEXT: G_STORE [[COPY]](s64), [[COPY1]](p0) :: (store (s64))
+ ; UNALIGNED-NEXT: PseudoRET
%0:_(s64) = COPY $x10
%1:_(p0) = COPY $x11
G_STORE %0(s64), %1(p0) :: (store (s64))
@@ -150,6 +187,14 @@ body: |
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(p0) = COPY $x11
; CHECK-NEXT: G_STORE [[COPY]](s64), [[COPY1]](p0) :: (store (s64))
; CHECK-NEXT: PseudoRET
+ ;
+ ; UNALIGNED-LABEL: name: store_i128
+ ; UNALIGNED: liveins: $x10, $x11
+ ; UNALIGNED-NEXT: {{ $}}
+ ; UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x10
+ ; UNALIGNED-NEXT: [[COPY1:%[0-9]+]]:_(p0) = COPY $x11
+ ; UNALIGNED-NEXT: G_STORE [[COPY]](s64), [[COPY1]](p0) :: (store (s64))
+ ; UNALIGNED-NEXT: PseudoRET
%0:_(s64) = COPY $x10
%1:_(p0) = COPY $x11
G_STORE %0(s64), %1(p0) :: (store (s64))
@@ -179,6 +224,14 @@ body: |
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(p0) = COPY $x11
; CHECK-NEXT: G_STORE [[COPY]](p0), [[COPY1]](p0) :: (store (p0))
; CHECK-NEXT: PseudoRET
+ ;
+ ; UNALIGNED-LABEL: name: store_ptr
+ ; UNALIGNED: liveins: $x10, $x11
+ ; UNALIGNED-NEXT: {{ $}}
+ ; UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x10
+ ; UNALIGNED-NEXT: [[COPY1:%[0-9]+]]:_(p0) = COPY $x11
+ ; UNALIGNED-NEXT: G_STORE [[COPY]](p0), [[COPY1]](p0) :: (store (p0))
+ ; UNALIGNED-NEXT: PseudoRET
%0:_(p0) = COPY $x10
%1:_(p0) = COPY $x11
G_STORE %0(p0), %1(p0) :: (store (p0))
@@ -217,6 +270,15 @@ body: |
; CHECK-NEXT: G_STORE [[TRUNC]](s32), [[COPY1]](p0) :: (store (s8))
; CHECK-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p0) :: (store (s8) into unknown-address + 1)
; CHECK-NEXT: PseudoRET
+ ;
+ ; UNALIGNED-LABEL: name: store_i16_unaligned
+ ; UNALIGNED: liveins: $x10, $x11
+ ; UNALIGNED-NEXT: {{ $}}
+ ; UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x10
+ ; UNALIGNED-NEXT: [[COPY1:%[0-9]+]]:_(p0) = COPY $x11
+ ; UNALIGNED-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64)
+ ; UNALIGNED-NEXT: G_STORE [[TRUNC]](s32), [[COPY1]](p0) :: (store (s16), align 1)
+ ; UNALIGNED-NEXT: PseudoRET
%2:_(s64) = COPY $x10
%0:_(s16) = G_TRUNC %2(s64)
%1:_(p0) = COPY $x11
@@ -267,6 +329,15 @@ body: |
; CHECK-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p0) :: (store (s8) into unknown-address + 2)
; CHECK-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD2]](p0) :: (store (s8) into unknown-address + 3)
; CHECK-NEXT: PseudoRET
+ ;
+ ; UNALIGNED-LABEL: name: store_i32_unaligned
+ ; UNALIGNED: liveins: $x10, $x11
+ ; UNALIGNED-NEXT: {{ $}}
+ ; UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x10
+ ; UNALIGNED-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64)
+ ; UNALIGNED-NEXT: [[COPY1:%[0-9]+]]:_(p0) = COPY $x11
+ ; UNALIGNED-NEXT: G_STORE [[TRUNC]](s32), [[COPY1]](p0) :: (store (s32), align 1)
+ ; UNALIGNED-NEXT: PseudoRET
%2:_(s64) = COPY $x10
%0:_(s32) = G_TRUNC %2(s64)
%1:_(p0) = COPY $x11
@@ -305,6 +376,15 @@ body: |
; CHECK-NEXT: G_STORE [[COPY2]](s32), [[COPY1]](p0) :: (store (s16))
; CHECK-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD]](p0) :: (store (s16) into unknown-address + 2)
; CHECK-NEXT: PseudoRET
+ ;
+ ; UNALIGNED-LABEL: name: store_i32_align2
+ ; UNALIGNED: liveins: $x10, $x11
+ ; UNALIGNED-NEXT: {{ $}}
+ ; UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x10
+ ; UNALIGNED-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64)
+ ; UNALIGNED-NEXT: [[COPY1:%[0-9]+]]:_(p0) = COPY $x11
+ ; UNALIGNED-NEXT: G_STORE [[TRUNC]](s32), [[COPY1]](p0) :: (store (s32), align 2)
+ ; UNALIGNED-NEXT: PseudoRET
%2:_(s64) = COPY $x10
%0:_(s32) = G_TRUNC %2(s64)
%1:_(p0) = COPY $x11
@@ -353,6 +433,14 @@ body: |
; CHECK-NEXT: G_STORE [[TRUNC1]](s32), [[PTR_ADD]](p0) :: (store (s16) into unknown-address + 4)
; CHECK-NEXT: G_STORE [[LSHR2]](s32), [[PTR_ADD2]](p0) :: (store (s16) into unknown-address + 6)
; CHECK-NEXT: PseudoRET
+ ;
+ ; UNALIGNED-LABEL: name: store_i64_align2
+ ; UNALIGNED: liveins: $x10, $x11
+ ; UNALIGNED-NEXT: {{ $}}
+ ; UNALIGNED-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x10
+ ; UNALIGNED-NEXT: [[COPY1:%[0-9]+]]:_(p0) = COPY $x11
+ ; UNALIGNED-NEXT: G_STORE [[COPY]](s64), [[COPY1]](p0) :: (store (s64), align 2)
+ ; UNALIGNED-NEXT: PseudoRET
%0:_(s64) = COPY $x10
%1:_(p0) = COPY $x11
G_STORE %0(s64), %1(p0) :: (store (s64), align 2)
diff --git a/llvm/test/CodeGen/RISCV/rvv/ceil-vp.ll b/llvm/test/CodeGen/RISCV/rvv/ceil-vp.ll
index d613e4ee0bc2..15cff650765e 100644
--- a/llvm/test/CodeGen/RISCV/rvv/ceil-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/ceil-vp.ll
@@ -1,22 +1,428 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v -target-abi=ilp32d \
-; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH
-; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -target-abi=lp64d \
-; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH
-; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfhmin,+v -target-abi=ilp32d \
-; RUN: -verify-machineinstrs < %s | FileCheck %s \
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+zfbfmin,+zvfbfmin,+v \
+; RUN: -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s \
+; RUN: --check-prefixes=CHECK,ZVFH
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+zfbfmin,+zvfbfmin,+v \
+; RUN: -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \
+; RUN: --check-prefixes=CHECK,ZVFH
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfhmin,+zfbfmin,+zvfbfmin,+v \
+; RUN: -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s \
; RUN: --check-prefixes=CHECK,ZVFHMIN
-; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfhmin,+v -target-abi=lp64d \
-; RUN: -verify-machineinstrs < %s | FileCheck %s \
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfhmin,+zfbfmin,+zvfbfmin,+v \
+; RUN: -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \
; RUN: --check-prefixes=CHECK,ZVFHMIN
+declare <vscale x 1 x bfloat> @llvm.vp.ceil.nxv1bf16(<vscale x 1 x bfloat>, <vscale x 1 x i1>, i32)
+
+define <vscale x 1 x bfloat> @vp_ceil_vv_nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vp_ceil_vv_nxv1bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8
+; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-NEXT: vfabs.v v8, v9, v0.t
+; CHECK-NEXT: lui a0, 307200
+; CHECK-NEXT: fmv.w.x fa5, a0
+; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu
+; CHECK-NEXT: vmflt.vf v0, v8, fa5, v0.t
+; CHECK-NEXT: fsrmi a0, 3
+; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
+; CHECK-NEXT: vfcvt.x.f.v v8, v9, v0.t
+; CHECK-NEXT: fsrm a0
+; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu
+; CHECK-NEXT: vfsgnj.vv v9, v8, v9, v0.t
+; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9
+; CHECK-NEXT: ret
+ %v = call <vscale x 1 x bfloat> @llvm.vp.ceil.nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x i1> %m, i32 %evl)
+ ret <vscale x 1 x bfloat> %v
+}
+
+define <vscale x 1 x bfloat> @vp_ceil_vv_nxv1bf16_unmasked(<vscale x 1 x bfloat> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vp_ceil_vv_nxv1bf16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8
+; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-NEXT: vfabs.v v8, v9
+; CHECK-NEXT: lui a0, 307200
+; CHECK-NEXT: fmv.w.x fa5, a0
+; CHECK-NEXT: vmflt.vf v0, v8, fa5
+; CHECK-NEXT: fsrmi a0, 3
+; CHECK-NEXT: vfcvt.x.f.v v8, v9, v0.t
+; CHECK-NEXT: fsrm a0
+; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu
+; CHECK-NEXT: vfsgnj.vv v9, v8, v9, v0.t
+; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9
+; CHECK-NEXT: ret
+ %v = call <vscale x 1 x bfloat> @llvm.vp.ceil.nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x i1> splat (i1 true), i32 %evl)
+ ret <vscale x 1 x bfloat> %v
+}
+
+declare <vscale x 2 x bfloat> @llvm.vp.ceil.nxv2bf16(<vscale x 2 x bfloat>, <vscale x 2 x i1>, i32)
+
+define <vscale x 2 x bfloat> @vp_ceil_vv_nxv2bf16(<vscale x 2 x bfloat> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vp_ceil_vv_nxv2bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, zero, e16, mf2, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8
+; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-NEXT: vfabs.v v8, v9, v0.t
+; CHECK-NEXT: lui a0, 307200
+; CHECK-NEXT: fmv.w.x fa5, a0
+; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu
+; CHECK-NEXT: vmflt.vf v0, v8, fa5, v0.t
+; CHECK-NEXT: fsrmi a0, 3
+; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; CHECK-NEXT: vfcvt.x.f.v v8, v9, v0.t
+; CHECK-NEXT: fsrm a0
+; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu
+; CHECK-NEXT: vfsgnj.vv v9, v8, v9, v0.t
+; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9
+; CHECK-NEXT: ret
+ %v = call <vscale x 2 x bfloat> @llvm.vp.ceil.nxv2bf16(<vscale x 2 x bfloat> %va, <vscale x 2 x i1> %m, i32 %evl)
+ ret <vscale x 2 x bfloat> %v
+}
+
+define <vscale x 2 x bfloat> @vp_ceil_vv_nxv2bf16_unmasked(<vscale x 2 x bfloat> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vp_ceil_vv_nxv2bf16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, zero, e16, mf2, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8
+; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-NEXT: vfabs.v v8, v9
+; CHECK-NEXT: lui a0, 307200
+; CHECK-NEXT: fmv.w.x fa5, a0
+; CHECK-NEXT: vmflt.vf v0, v8, fa5
+; CHECK-NEXT: fsrmi a0, 3
+; CHECK-NEXT: vfcvt.x.f.v v8, v9, v0.t
+; CHECK-NEXT: fsrm a0
+; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu
+; CHECK-NEXT: vfsgnj.vv v9, v8, v9, v0.t
+; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9
+; CHECK-NEXT: ret
+ %v = call <vscale x 2 x bfloat> @llvm.vp.ceil.nxv2bf16(<vscale x 2 x bfloat> %va, <vscale x 2 x i1> splat (i1 true), i32 %evl)
+ ret <vscale x 2 x bfloat> %v
+}
+
+declare <vscale x 4 x bfloat> @llvm.vp.ceil.nxv4bf16(<vscale x 4 x bfloat>, <vscale x 4 x i1>, i32)
+
+define <vscale x 4 x bfloat> @vp_ceil_vv_nxv4bf16(<vscale x 4 x bfloat> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vp_ceil_vv_nxv4bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vmv1r.v v9, v0
+; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8
+; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; CHECK-NEXT: vfabs.v v12, v10, v0.t
+; CHECK-NEXT: lui a0, 307200
+; CHECK-NEXT: fmv.w.x fa5, a0
+; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu
+; CHECK-NEXT: vmflt.vf v9, v12, fa5, v0.t
+; CHECK-NEXT: fsrmi a0, 3
+; CHECK-NEXT: vmv1r.v v0, v9
+; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; CHECK-NEXT: vfcvt.x.f.v v12, v10, v0.t
+; CHECK-NEXT: fsrm a0
+; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu
+; CHECK-NEXT: vfsgnj.vv v10, v12, v10, v0.t
+; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10
+; CHECK-NEXT: ret
+ %v = call <vscale x 4 x bfloat> @llvm.vp.ceil.nxv4bf16(<vscale x 4 x bfloat> %va, <vscale x 4 x i1> %m, i32 %evl)
+ ret <vscale x 4 x bfloat> %v
+}
+
+define <vscale x 4 x bfloat> @vp_ceil_vv_nxv4bf16_unmasked(<vscale x 4 x bfloat> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vp_ceil_vv_nxv4bf16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8
+; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; CHECK-NEXT: vfabs.v v8, v10
+; CHECK-NEXT: lui a0, 307200
+; CHECK-NEXT: fmv.w.x fa5, a0
+; CHECK-NEXT: vmflt.vf v0, v8, fa5
+; CHECK-NEXT: fsrmi a0, 3
+; CHECK-NEXT: vfcvt.x.f.v v8, v10, v0.t
+; CHECK-NEXT: fsrm a0
+; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu
+; CHECK-NEXT: vfsgnj.vv v10, v8, v10, v0.t
+; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10
+; CHECK-NEXT: ret
+ %v = call <vscale x 4 x bfloat> @llvm.vp.ceil.nxv4bf16(<vscale x 4 x bfloat> %va, <vscale x 4 x i1> splat (i1 true), i32 %evl)
+ ret <vscale x 4 x bfloat> %v
+}
+
+declare <vscale x 8 x bfloat> @llvm.vp.ceil.nxv8bf16(<vscale x 8 x bfloat>, <vscale x 8 x i1>, i32)
+
+define <vscale x 8 x bfloat> @vp_ceil_vv_nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vp_ceil_vv_nxv8bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vmv1r.v v10, v0
+; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8
+; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-NEXT: vfabs.v v16, v12, v0.t
+; CHECK-NEXT: lui a0, 307200
+; CHECK-NEXT: fmv.w.x fa5, a0
+; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu
+; CHECK-NEXT: vmflt.vf v10, v16, fa5, v0.t
+; CHECK-NEXT: fsrmi a0, 3
+; CHECK-NEXT: vmv1r.v v0, v10
+; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; CHECK-NEXT: vfcvt.x.f.v v16, v12, v0.t
+; CHECK-NEXT: fsrm a0
+; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu
+; CHECK-NEXT: vfsgnj.vv v12, v16, v12, v0.t
+; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12
+; CHECK-NEXT: ret
+ %v = call <vscale x 8 x bfloat> @llvm.vp.ceil.nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x i1> %m, i32 %evl)
+ ret <vscale x 8 x bfloat> %v
+}
+
+define <vscale x 8 x bfloat> @vp_ceil_vv_nxv8bf16_unmasked(<vscale x 8 x bfloat> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vp_ceil_vv_nxv8bf16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8
+; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-NEXT: vfabs.v v8, v12
+; CHECK-NEXT: lui a0, 307200
+; CHECK-NEXT: fmv.w.x fa5, a0
+; CHECK-NEXT: vmflt.vf v0, v8, fa5
+; CHECK-NEXT: fsrmi a0, 3
+; CHECK-NEXT: vfcvt.x.f.v v8, v12, v0.t
+; CHECK-NEXT: fsrm a0
+; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu
+; CHECK-NEXT: vfsgnj.vv v12, v8, v12, v0.t
+; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12
+; CHECK-NEXT: ret
+ %v = call <vscale x 8 x bfloat> @llvm.vp.ceil.nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x i1> splat (i1 true), i32 %evl)
+ ret <vscale x 8 x bfloat> %v
+}
+
+declare <vscale x 16 x bfloat> @llvm.vp.ceil.nxv16bf16(<vscale x 16 x bfloat>, <vscale x 16 x i1>, i32)
+
+define <vscale x 16 x bfloat> @vp_ceil_vv_nxv16bf16(<vscale x 16 x bfloat> %va, <vscale x 16 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vp_ceil_vv_nxv16bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vmv1r.v v12, v0
+; CHECK-NEXT: vsetvli a1, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8
+; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma
+; CHECK-NEXT: vfabs.v v24, v16, v0.t
+; CHECK-NEXT: lui a0, 307200
+; CHECK-NEXT: fmv.w.x fa5, a0
+; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu
+; CHECK-NEXT: vmflt.vf v12, v24, fa5, v0.t
+; CHECK-NEXT: fsrmi a0, 3
+; CHECK-NEXT: vmv1r.v v0, v12
+; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t
+; CHECK-NEXT: fsrm a0
+; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu
+; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t
+; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16
+; CHECK-NEXT: ret
+ %v = call <vscale x 16 x bfloat> @llvm.vp.ceil.nxv16bf16(<vscale x 16 x bfloat> %va, <vscale x 16 x i1> %m, i32 %evl)
+ ret <vscale x 16 x bfloat> %v
+}
+
+define <vscale x 16 x bfloat> @vp_ceil_vv_nxv16bf16_unmasked(<vscale x 16 x bfloat> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vp_ceil_vv_nxv16bf16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8
+; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma
+; CHECK-NEXT: vfabs.v v8, v16
+; CHECK-NEXT: lui a0, 307200
+; CHECK-NEXT: fmv.w.x fa5, a0
+; CHECK-NEXT: vmflt.vf v0, v8, fa5
+; CHECK-NEXT: fsrmi a0, 3
+; CHECK-NEXT: vfcvt.x.f.v v8, v16, v0.t
+; CHECK-NEXT: fsrm a0
+; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu
+; CHECK-NEXT: vfsgnj.vv v16, v8, v16, v0.t
+; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16
+; CHECK-NEXT: ret
+ %v = call <vscale x 16 x bfloat> @llvm.vp.ceil.nxv16bf16(<vscale x 16 x bfloat> %va, <vscale x 16 x i1> splat (i1 true), i32 %evl)
+ ret <vscale x 16 x bfloat> %v
+}
+
+declare <vscale x 32 x bfloat> @llvm.vp.ceil.nxv32bf16(<vscale x 32 x bfloat>, <vscale x 32 x i1>, i32)
+
+define <vscale x 32 x bfloat> @vp_ceil_vv_nxv32bf16(<vscale x 32 x bfloat> %va, <vscale x 32 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vp_ceil_vv_nxv32bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi sp, sp, -16
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: csrr a1, vlenb
+; CHECK-NEXT: slli a1, a1, 3
+; CHECK-NEXT: sub sp, sp, a1
+; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
+; CHECK-NEXT: csrr a2, vlenb
+; CHECK-NEXT: slli a1, a2, 1
+; CHECK-NEXT: sub a3, a0, a1
+; CHECK-NEXT: sltu a4, a0, a3
+; CHECK-NEXT: addi a4, a4, -1
+; CHECK-NEXT: and a3, a4, a3
+; CHECK-NEXT: srli a2, a2, 2
+; CHECK-NEXT: vmv1r.v v16, v0
+; CHECK-NEXT: vsetvli a4, zero, e8, mf2, ta, ma
+; CHECK-NEXT: vslidedown.vx v17, v0, a2
+; CHECK-NEXT: addi a2, sp, 16
+; CHECK-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill
+; CHECK-NEXT: vsetvli a2, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v12
+; CHECK-NEXT: vmv1r.v v0, v17
+; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, ma
+; CHECK-NEXT: vfabs.v v8, v24, v0.t
+; CHECK-NEXT: lui a2, 307200
+; CHECK-NEXT: fmv.w.x fa5, a2
+; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu
+; CHECK-NEXT: vmflt.vf v17, v8, fa5, v0.t
+; CHECK-NEXT: fsrmi a2, 3
+; CHECK-NEXT: vmv1r.v v0, v17
+; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; CHECK-NEXT: vfcvt.x.f.v v8, v24, v0.t
+; CHECK-NEXT: fsrm a2
+; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu
+; CHECK-NEXT: vfsgnj.vv v24, v8, v24, v0.t
+; CHECK-NEXT: vsetvli a2, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v12, v24
+; CHECK-NEXT: bltu a0, a1, .LBB10_2
+; CHECK-NEXT: # %bb.1:
+; CHECK-NEXT: mv a0, a1
+; CHECK-NEXT: .LBB10_2:
+; CHECK-NEXT: addi a1, sp, 16
+; CHECK-NEXT: vl8r.v v0, (a1) # Unknown-size Folded Reload
+; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v0
+; CHECK-NEXT: vmv1r.v v8, v16
+; CHECK-NEXT: vmv1r.v v0, v16
+; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma
+; CHECK-NEXT: vfabs.v v16, v24, v0.t
+; CHECK-NEXT: addi a0, sp, 16
+; CHECK-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill
+; CHECK-NEXT: vmv1r.v v0, v8
+; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
+; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu
+; CHECK-NEXT: vmflt.vf v8, v16, fa5, v0.t
+; CHECK-NEXT: fsrmi a0, 3
+; CHECK-NEXT: vmv1r.v v0, v8
+; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; CHECK-NEXT: vfcvt.x.f.v v16, v24, v0.t
+; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill
+; CHECK-NEXT: fsrm a0
+; CHECK-NEXT: addi a0, sp, 16
+; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
+; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu
+; CHECK-NEXT: vfsgnj.vv v24, v16, v24, v0.t
+; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v24
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: slli a0, a0, 3
+; CHECK-NEXT: add sp, sp, a0
+; CHECK-NEXT: addi sp, sp, 16
+; CHECK-NEXT: ret
+ %v = call <vscale x 32 x bfloat> @llvm.vp.ceil.nxv32bf16(<vscale x 32 x bfloat> %va, <vscale x 32 x i1> %m, i32 %evl)
+ ret <vscale x 32 x bfloat> %v
+}
+
+define <vscale x 32 x bfloat> @vp_ceil_vv_nxv32bf16_unmasked(<vscale x 32 x bfloat> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vp_ceil_vv_nxv32bf16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi sp, sp, -16
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: csrr a1, vlenb
+; CHECK-NEXT: slli a1, a1, 3
+; CHECK-NEXT: sub sp, sp, a1
+; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
+; CHECK-NEXT: csrr a2, vlenb
+; CHECK-NEXT: slli a1, a2, 1
+; CHECK-NEXT: sub a3, a0, a1
+; CHECK-NEXT: sltu a4, a0, a3
+; CHECK-NEXT: addi a4, a4, -1
+; CHECK-NEXT: and a3, a4, a3
+; CHECK-NEXT: srli a2, a2, 2
+; CHECK-NEXT: vsetvli a4, zero, e8, m4, ta, ma
+; CHECK-NEXT: vmset.m v16
+; CHECK-NEXT: vsetvli a4, zero, e8, mf2, ta, ma
+; CHECK-NEXT: vslidedown.vx v16, v16, a2
+; CHECK-NEXT: addi a2, sp, 16
+; CHECK-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill
+; CHECK-NEXT: vsetvli a2, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v12
+; CHECK-NEXT: vmv1r.v v0, v16
+; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, ma
+; CHECK-NEXT: vfabs.v v8, v24, v0.t
+; CHECK-NEXT: lui a2, 307200
+; CHECK-NEXT: fmv.w.x fa5, a2
+; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu
+; CHECK-NEXT: vmflt.vf v16, v8, fa5, v0.t
+; CHECK-NEXT: fsrmi a2, 3
+; CHECK-NEXT: vmv1r.v v0, v16
+; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; CHECK-NEXT: vfcvt.x.f.v v8, v24, v0.t
+; CHECK-NEXT: fsrm a2
+; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu
+; CHECK-NEXT: vfsgnj.vv v24, v8, v24, v0.t
+; CHECK-NEXT: vsetvli a2, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v12, v24
+; CHECK-NEXT: bltu a0, a1, .LBB11_2
+; CHECK-NEXT: # %bb.1:
+; CHECK-NEXT: mv a0, a1
+; CHECK-NEXT: .LBB11_2:
+; CHECK-NEXT: addi a1, sp, 16
+; CHECK-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v24
+; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma
+; CHECK-NEXT: vfabs.v v24, v16
+; CHECK-NEXT: vmflt.vf v0, v24, fa5
+; CHECK-NEXT: fsrmi a0, 3
+; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t
+; CHECK-NEXT: fsrm a0
+; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu
+; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t
+; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: slli a0, a0, 3
+; CHECK-NEXT: add sp, sp, a0
+; CHECK-NEXT: addi sp, sp, 16
+; CHECK-NEXT: ret
+ %v = call <vscale x 32 x bfloat> @llvm.vp.ceil.nxv32bf16(<vscale x 32 x bfloat> %va, <vscale x 32 x i1> splat (i1 true), i32 %evl)
+ ret <vscale x 32 x bfloat> %v
+}
declare <vscale x 1 x half> @llvm.vp.ceil.nxv1f16(<vscale x 1 x half>, <vscale x 1 x i1>, i32)
define <vscale x 1 x half> @vp_ceil_vv_nxv1f16(<vscale x 1 x half> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) {
; ZVFH-LABEL: vp_ceil_vv_nxv1f16:
; ZVFH: # %bb.0:
-; ZVFH-NEXT: lui a1, %hi(.LCPI0_0)
-; ZVFH-NEXT: flh fa5, %lo(.LCPI0_0)(a1)
+; ZVFH-NEXT: lui a1, %hi(.LCPI12_0)
+; ZVFH-NEXT: flh fa5, %lo(.LCPI12_0)(a1)
; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
; ZVFH-NEXT: vfabs.v v9, v8, v0.t
; ZVFH-NEXT: vsetvli zero, zero, e16, mf4, ta, mu
@@ -57,8 +463,8 @@ define <vscale x 1 x half> @vp_ceil_vv_nxv1f16(<vscale x 1 x half> %va, <vscale
define <vscale x 1 x half> @vp_ceil_vv_nxv1f16_unmasked(<vscale x 1 x half> %va, i32 zeroext %evl) {
; ZVFH-LABEL: vp_ceil_vv_nxv1f16_unmasked:
; ZVFH: # %bb.0:
-; ZVFH-NEXT: lui a1, %hi(.LCPI1_0)
-; ZVFH-NEXT: flh fa5, %lo(.LCPI1_0)(a1)
+; ZVFH-NEXT: lui a1, %hi(.LCPI13_0)
+; ZVFH-NEXT: flh fa5, %lo(.LCPI13_0)(a1)
; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
; ZVFH-NEXT: vfabs.v v9, v8
; ZVFH-NEXT: vmflt.vf v0, v9, fa5
@@ -97,8 +503,8 @@ declare <vscale x 2 x half> @llvm.vp.ceil.nxv2f16(<vscale x 2 x half>, <vscale x
define <vscale x 2 x half> @vp_ceil_vv_nxv2f16(<vscale x 2 x half> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) {
; ZVFH-LABEL: vp_ceil_vv_nxv2f16:
; ZVFH: # %bb.0:
-; ZVFH-NEXT: lui a1, %hi(.LCPI2_0)
-; ZVFH-NEXT: flh fa5, %lo(.LCPI2_0)(a1)
+; ZVFH-NEXT: lui a1, %hi(.LCPI14_0)
+; ZVFH-NEXT: flh fa5, %lo(.LCPI14_0)(a1)
; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
; ZVFH-NEXT: vfabs.v v9, v8, v0.t
; ZVFH-NEXT: vsetvli zero, zero, e16, mf2, ta, mu
@@ -139,8 +545,8 @@ define <vscale x 2 x half> @vp_ceil_vv_nxv2f16(<vscale x 2 x half> %va, <vscale
define <vscale x 2 x half> @vp_ceil_vv_nxv2f16_unmasked(<vscale x 2 x half> %va, i32 zeroext %evl) {
; ZVFH-LABEL: vp_ceil_vv_nxv2f16_unmasked:
; ZVFH: # %bb.0:
-; ZVFH-NEXT: lui a1, %hi(.LCPI3_0)
-; ZVFH-NEXT: flh fa5, %lo(.LCPI3_0)(a1)
+; ZVFH-NEXT: lui a1, %hi(.LCPI15_0)
+; ZVFH-NEXT: flh fa5, %lo(.LCPI15_0)(a1)
; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
; ZVFH-NEXT: vfabs.v v9, v8
; ZVFH-NEXT: vmflt.vf v0, v9, fa5
@@ -179,8 +585,8 @@ declare <vscale x 4 x half> @llvm.vp.ceil.nxv4f16(<vscale x 4 x half>, <vscale x
define <vscale x 4 x half> @vp_ceil_vv_nxv4f16(<vscale x 4 x half> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) {
; ZVFH-LABEL: vp_ceil_vv_nxv4f16:
; ZVFH: # %bb.0:
-; ZVFH-NEXT: lui a1, %hi(.LCPI4_0)
-; ZVFH-NEXT: flh fa5, %lo(.LCPI4_0)(a1)
+; ZVFH-NEXT: lui a1, %hi(.LCPI16_0)
+; ZVFH-NEXT: flh fa5, %lo(.LCPI16_0)(a1)
; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma
; ZVFH-NEXT: vfabs.v v9, v8, v0.t
; ZVFH-NEXT: vsetvli zero, zero, e16, m1, ta, mu
@@ -223,8 +629,8 @@ define <vscale x 4 x half> @vp_ceil_vv_nxv4f16(<vscale x 4 x half> %va, <vscale
define <vscale x 4 x half> @vp_ceil_vv_nxv4f16_unmasked(<vscale x 4 x half> %va, i32 zeroext %evl) {
; ZVFH-LABEL: vp_ceil_vv_nxv4f16_unmasked:
; ZVFH: # %bb.0:
-; ZVFH-NEXT: lui a1, %hi(.LCPI5_0)
-; ZVFH-NEXT: flh fa5, %lo(.LCPI5_0)(a1)
+; ZVFH-NEXT: lui a1, %hi(.LCPI17_0)
+; ZVFH-NEXT: flh fa5, %lo(.LCPI17_0)(a1)
; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma
; ZVFH-NEXT: vfabs.v v9, v8
; ZVFH-NEXT: vmflt.vf v0, v9, fa5
@@ -263,8 +669,8 @@ declare <vscale x 8 x half> @llvm.vp.ceil.nxv8f16(<vscale x 8 x half>, <vscale x
define <vscale x 8 x half> @vp_ceil_vv_nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) {
; ZVFH-LABEL: vp_ceil_vv_nxv8f16:
; ZVFH: # %bb.0:
-; ZVFH-NEXT: lui a1, %hi(.LCPI6_0)
-; ZVFH-NEXT: flh fa5, %lo(.LCPI6_0)(a1)
+; ZVFH-NEXT: lui a1, %hi(.LCPI18_0)
+; ZVFH-NEXT: flh fa5, %lo(.LCPI18_0)(a1)
; ZVFH-NEXT: vmv1r.v v10, v0
; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma
; ZVFH-NEXT: vfabs.v v12, v8, v0.t
@@ -309,8 +715,8 @@ define <vscale x 8 x half> @vp_ceil_vv_nxv8f16(<vscale x 8 x half> %va, <vscale
define <vscale x 8 x half> @vp_ceil_vv_nxv8f16_unmasked(<vscale x 8 x half> %va, i32 zeroext %evl) {
; ZVFH-LABEL: vp_ceil_vv_nxv8f16_unmasked:
; ZVFH: # %bb.0:
-; ZVFH-NEXT: lui a1, %hi(.LCPI7_0)
-; ZVFH-NEXT: flh fa5, %lo(.LCPI7_0)(a1)
+; ZVFH-NEXT: lui a1, %hi(.LCPI19_0)
+; ZVFH-NEXT: flh fa5, %lo(.LCPI19_0)(a1)
; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma
; ZVFH-NEXT: vfabs.v v10, v8
; ZVFH-NEXT: vmflt.vf v0, v10, fa5
@@ -349,8 +755,8 @@ declare <vscale x 16 x half> @llvm.vp.ceil.nxv16f16(<vscale x 16 x half>, <vscal
define <vscale x 16 x half> @vp_ceil_vv_nxv16f16(<vscale x 16 x half> %va, <vscale x 16 x i1> %m, i32 zeroext %evl) {
; ZVFH-LABEL: vp_ceil_vv_nxv16f16:
; ZVFH: # %bb.0:
-; ZVFH-NEXT: lui a1, %hi(.LCPI8_0)
-; ZVFH-NEXT: flh fa5, %lo(.LCPI8_0)(a1)
+; ZVFH-NEXT: lui a1, %hi(.LCPI20_0)
+; ZVFH-NEXT: flh fa5, %lo(.LCPI20_0)(a1)
; ZVFH-NEXT: vmv1r.v v12, v0
; ZVFH-NEXT: vsetvli zero, a0, e16, m4, ta, ma
; ZVFH-NEXT: vfabs.v v16, v8, v0.t
@@ -395,8 +801,8 @@ define <vscale x 16 x half> @vp_ceil_vv_nxv16f16(<vscale x 16 x half> %va, <vsca
define <vscale x 16 x half> @vp_ceil_vv_nxv16f16_unmasked(<vscale x 16 x half> %va, i32 zeroext %evl) {
; ZVFH-LABEL: vp_ceil_vv_nxv16f16_unmasked:
; ZVFH: # %bb.0:
-; ZVFH-NEXT: lui a1, %hi(.LCPI9_0)
-; ZVFH-NEXT: flh fa5, %lo(.LCPI9_0)(a1)
+; ZVFH-NEXT: lui a1, %hi(.LCPI21_0)
+; ZVFH-NEXT: flh fa5, %lo(.LCPI21_0)(a1)
; ZVFH-NEXT: vsetvli zero, a0, e16, m4, ta, ma
; ZVFH-NEXT: vfabs.v v12, v8
; ZVFH-NEXT: vmflt.vf v0, v12, fa5
@@ -435,8 +841,8 @@ declare <vscale x 32 x half> @llvm.vp.ceil.nxv32f16(<vscale x 32 x half>, <vscal
define <vscale x 32 x half> @vp_ceil_vv_nxv32f16(<vscale x 32 x half> %va, <vscale x 32 x i1> %m, i32 zeroext %evl) {
; ZVFH-LABEL: vp_ceil_vv_nxv32f16:
; ZVFH: # %bb.0:
-; ZVFH-NEXT: lui a1, %hi(.LCPI10_0)
-; ZVFH-NEXT: flh fa5, %lo(.LCPI10_0)(a1)
+; ZVFH-NEXT: lui a1, %hi(.LCPI22_0)
+; ZVFH-NEXT: flh fa5, %lo(.LCPI22_0)(a1)
; ZVFH-NEXT: vmv1r.v v16, v0
; ZVFH-NEXT: vsetvli zero, a0, e16, m8, ta, ma
; ZVFH-NEXT: vfabs.v v24, v8, v0.t
@@ -491,10 +897,10 @@ define <vscale x 32 x half> @vp_ceil_vv_nxv32f16(<vscale x 32 x half> %va, <vsca
; ZVFHMIN-NEXT: vfsgnj.vv v24, v8, v24, v0.t
; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v24
-; ZVFHMIN-NEXT: bltu a0, a1, .LBB10_2
+; ZVFHMIN-NEXT: bltu a0, a1, .LBB22_2
; ZVFHMIN-NEXT: # %bb.1:
; ZVFHMIN-NEXT: mv a0, a1
-; ZVFHMIN-NEXT: .LBB10_2:
+; ZVFHMIN-NEXT: .LBB22_2:
; ZVFHMIN-NEXT: addi a1, sp, 16
; ZVFHMIN-NEXT: vl8r.v v0, (a1) # Unknown-size Folded Reload
; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v0
@@ -533,8 +939,8 @@ define <vscale x 32 x half> @vp_ceil_vv_nxv32f16(<vscale x 32 x half> %va, <vsca
define <vscale x 32 x half> @vp_ceil_vv_nxv32f16_unmasked(<vscale x 32 x half> %va, i32 zeroext %evl) {
; ZVFH-LABEL: vp_ceil_vv_nxv32f16_unmasked:
; ZVFH: # %bb.0:
-; ZVFH-NEXT: lui a1, %hi(.LCPI11_0)
-; ZVFH-NEXT: flh fa5, %lo(.LCPI11_0)(a1)
+; ZVFH-NEXT: lui a1, %hi(.LCPI23_0)
+; ZVFH-NEXT: flh fa5, %lo(.LCPI23_0)(a1)
; ZVFH-NEXT: vsetvli zero, a0, e16, m8, ta, ma
; ZVFH-NEXT: vfabs.v v16, v8
; ZVFH-NEXT: vmflt.vf v0, v16, fa5
@@ -586,10 +992,10 @@ define <vscale x 32 x half> @vp_ceil_vv_nxv32f16_unmasked(<vscale x 32 x half> %
; ZVFHMIN-NEXT: vfsgnj.vv v24, v8, v24, v0.t
; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v24
-; ZVFHMIN-NEXT: bltu a0, a1, .LBB11_2
+; ZVFHMIN-NEXT: bltu a0, a1, .LBB23_2
; ZVFHMIN-NEXT: # %bb.1:
; ZVFHMIN-NEXT: mv a0, a1
-; ZVFHMIN-NEXT: .LBB11_2:
+; ZVFHMIN-NEXT: .LBB23_2:
; ZVFHMIN-NEXT: addi a1, sp, 16
; ZVFHMIN-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload
; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v24
@@ -834,8 +1240,8 @@ declare <vscale x 1 x double> @llvm.vp.ceil.nxv1f64(<vscale x 1 x double>, <vsca
define <vscale x 1 x double> @vp_ceil_vv_nxv1f64(<vscale x 1 x double> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_ceil_vv_nxv1f64:
; CHECK: # %bb.0:
-; CHECK-NEXT: lui a1, %hi(.LCPI22_0)
-; CHECK-NEXT: fld fa5, %lo(.LCPI22_0)(a1)
+; CHECK-NEXT: lui a1, %hi(.LCPI34_0)
+; CHECK-NEXT: fld fa5, %lo(.LCPI34_0)(a1)
; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma
; CHECK-NEXT: vfabs.v v9, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu
@@ -855,8 +1261,8 @@ define <vscale x 1 x double> @vp_ceil_vv_nxv1f64(<vscale x 1 x double> %va, <vsc
define <vscale x 1 x double> @vp_ceil_vv_nxv1f64_unmasked(<vscale x 1 x double> %va, i32 zeroext %evl) {
; CHECK-LABEL: vp_ceil_vv_nxv1f64_unmasked:
; CHECK: # %bb.0:
-; CHECK-NEXT: lui a1, %hi(.LCPI23_0)
-; CHECK-NEXT: fld fa5, %lo(.LCPI23_0)(a1)
+; CHECK-NEXT: lui a1, %hi(.LCPI35_0)
+; CHECK-NEXT: fld fa5, %lo(.LCPI35_0)(a1)
; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma
; CHECK-NEXT: vfabs.v v9, v8
; CHECK-NEXT: vmflt.vf v0, v9, fa5
@@ -876,8 +1282,8 @@ declare <vscale x 2 x double> @llvm.vp.ceil.nxv2f64(<vscale x 2 x double>, <vsca
define <vscale x 2 x double> @vp_ceil_vv_nxv2f64(<vscale x 2 x double> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_ceil_vv_nxv2f64:
; CHECK: # %bb.0:
-; CHECK-NEXT: lui a1, %hi(.LCPI24_0)
-; CHECK-NEXT: fld fa5, %lo(.LCPI24_0)(a1)
+; CHECK-NEXT: lui a1, %hi(.LCPI36_0)
+; CHECK-NEXT: fld fa5, %lo(.LCPI36_0)(a1)
; CHECK-NEXT: vmv1r.v v10, v0
; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma
; CHECK-NEXT: vfabs.v v12, v8, v0.t
@@ -899,8 +1305,8 @@ define <vscale x 2 x double> @vp_ceil_vv_nxv2f64(<vscale x 2 x double> %va, <vsc
define <vscale x 2 x double> @vp_ceil_vv_nxv2f64_unmasked(<vscale x 2 x double> %va, i32 zeroext %evl) {
; CHECK-LABEL: vp_ceil_vv_nxv2f64_unmasked:
; CHECK: # %bb.0:
-; CHECK-NEXT: lui a1, %hi(.LCPI25_0)
-; CHECK-NEXT: fld fa5, %lo(.LCPI25_0)(a1)
+; CHECK-NEXT: lui a1, %hi(.LCPI37_0)
+; CHECK-NEXT: fld fa5, %lo(.LCPI37_0)(a1)
; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma
; CHECK-NEXT: vfabs.v v10, v8
; CHECK-NEXT: vmflt.vf v0, v10, fa5
@@ -920,8 +1326,8 @@ declare <vscale x 4 x double> @llvm.vp.ceil.nxv4f64(<vscale x 4 x double>, <vsca
define <vscale x 4 x double> @vp_ceil_vv_nxv4f64(<vscale x 4 x double> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_ceil_vv_nxv4f64:
; CHECK: # %bb.0:
-; CHECK-NEXT: lui a1, %hi(.LCPI26_0)
-; CHECK-NEXT: fld fa5, %lo(.LCPI26_0)(a1)
+; CHECK-NEXT: lui a1, %hi(.LCPI38_0)
+; CHECK-NEXT: fld fa5, %lo(.LCPI38_0)(a1)
; CHECK-NEXT: vmv1r.v v12, v0
; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma
; CHECK-NEXT: vfabs.v v16, v8, v0.t
@@ -943,8 +1349,8 @@ define <vscale x 4 x double> @vp_ceil_vv_nxv4f64(<vscale x 4 x double> %va, <vsc
define <vscale x 4 x double> @vp_ceil_vv_nxv4f64_unmasked(<vscale x 4 x double> %va, i32 zeroext %evl) {
; CHECK-LABEL: vp_ceil_vv_nxv4f64_unmasked:
; CHECK: # %bb.0:
-; CHECK-NEXT: lui a1, %hi(.LCPI27_0)
-; CHECK-NEXT: fld fa5, %lo(.LCPI27_0)(a1)
+; CHECK-NEXT: lui a1, %hi(.LCPI39_0)
+; CHECK-NEXT: fld fa5, %lo(.LCPI39_0)(a1)
; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma
; CHECK-NEXT: vfabs.v v12, v8
; CHECK-NEXT: vmflt.vf v0, v12, fa5
@@ -964,8 +1370,8 @@ declare <vscale x 7 x double> @llvm.vp.ceil.nxv7f64(<vscale x 7 x double>, <vsca
define <vscale x 7 x double> @vp_ceil_vv_nxv7f64(<vscale x 7 x double> %va, <vscale x 7 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_ceil_vv_nxv7f64:
; CHECK: # %bb.0:
-; CHECK-NEXT: lui a1, %hi(.LCPI28_0)
-; CHECK-NEXT: fld fa5, %lo(.LCPI28_0)(a1)
+; CHECK-NEXT: lui a1, %hi(.LCPI40_0)
+; CHECK-NEXT: fld fa5, %lo(.LCPI40_0)(a1)
; CHECK-NEXT: vmv1r.v v16, v0
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; CHECK-NEXT: vfabs.v v24, v8, v0.t
@@ -987,8 +1393,8 @@ define <vscale x 7 x double> @vp_ceil_vv_nxv7f64(<vscale x 7 x double> %va, <vsc
define <vscale x 7 x double> @vp_ceil_vv_nxv7f64_unmasked(<vscale x 7 x double> %va, i32 zeroext %evl) {
; CHECK-LABEL: vp_ceil_vv_nxv7f64_unmasked:
; CHECK: # %bb.0:
-; CHECK-NEXT: lui a1, %hi(.LCPI29_0)
-; CHECK-NEXT: fld fa5, %lo(.LCPI29_0)(a1)
+; CHECK-NEXT: lui a1, %hi(.LCPI41_0)
+; CHECK-NEXT: fld fa5, %lo(.LCPI41_0)(a1)
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; CHECK-NEXT: vfabs.v v16, v8
; CHECK-NEXT: vmflt.vf v0, v16, fa5
@@ -1008,8 +1414,8 @@ declare <vscale x 8 x double> @llvm.vp.ceil.nxv8f64(<vscale x 8 x double>, <vsca
define <vscale x 8 x double> @vp_ceil_vv_nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_ceil_vv_nxv8f64:
; CHECK: # %bb.0:
-; CHECK-NEXT: lui a1, %hi(.LCPI30_0)
-; CHECK-NEXT: fld fa5, %lo(.LCPI30_0)(a1)
+; CHECK-NEXT: lui a1, %hi(.LCPI42_0)
+; CHECK-NEXT: fld fa5, %lo(.LCPI42_0)(a1)
; CHECK-NEXT: vmv1r.v v16, v0
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; CHECK-NEXT: vfabs.v v24, v8, v0.t
@@ -1031,8 +1437,8 @@ define <vscale x 8 x double> @vp_ceil_vv_nxv8f64(<vscale x 8 x double> %va, <vsc
define <vscale x 8 x double> @vp_ceil_vv_nxv8f64_unmasked(<vscale x 8 x double> %va, i32 zeroext %evl) {
; CHECK-LABEL: vp_ceil_vv_nxv8f64_unmasked:
; CHECK: # %bb.0:
-; CHECK-NEXT: lui a1, %hi(.LCPI31_0)
-; CHECK-NEXT: fld fa5, %lo(.LCPI31_0)(a1)
+; CHECK-NEXT: lui a1, %hi(.LCPI43_0)
+; CHECK-NEXT: fld fa5, %lo(.LCPI43_0)(a1)
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; CHECK-NEXT: vfabs.v v16, v8
; CHECK-NEXT: vmflt.vf v0, v16, fa5
@@ -1065,8 +1471,8 @@ define <vscale x 16 x double> @vp_ceil_vv_nxv16f64(<vscale x 16 x double> %va, <
; CHECK-NEXT: vsetvli a3, zero, e8, mf4, ta, ma
; CHECK-NEXT: vslidedown.vx v6, v0, a2
; CHECK-NEXT: sub a2, a0, a1
-; CHECK-NEXT: lui a3, %hi(.LCPI32_0)
-; CHECK-NEXT: fld fa5, %lo(.LCPI32_0)(a3)
+; CHECK-NEXT: lui a3, %hi(.LCPI44_0)
+; CHECK-NEXT: fld fa5, %lo(.LCPI44_0)(a3)
; CHECK-NEXT: sltu a3, a0, a2
; CHECK-NEXT: addi a3, a3, -1
; CHECK-NEXT: and a2, a3, a2
@@ -1087,10 +1493,10 @@ define <vscale x 16 x double> @vp_ceil_vv_nxv16f64(<vscale x 16 x double> %va, <
; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t
-; CHECK-NEXT: bltu a0, a1, .LBB32_2
+; CHECK-NEXT: bltu a0, a1, .LBB44_2
; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: mv a0, a1
-; CHECK-NEXT: .LBB32_2:
+; CHECK-NEXT: .LBB44_2:
; CHECK-NEXT: vmv1r.v v0, v7
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; CHECK-NEXT: vfabs.v v24, v8, v0.t
@@ -1118,8 +1524,8 @@ define <vscale x 16 x double> @vp_ceil_vv_nxv16f64_unmasked(<vscale x 16 x doubl
; CHECK: # %bb.0:
; CHECK-NEXT: csrr a1, vlenb
; CHECK-NEXT: sub a2, a0, a1
-; CHECK-NEXT: lui a3, %hi(.LCPI33_0)
-; CHECK-NEXT: fld fa5, %lo(.LCPI33_0)(a3)
+; CHECK-NEXT: lui a3, %hi(.LCPI45_0)
+; CHECK-NEXT: fld fa5, %lo(.LCPI45_0)(a3)
; CHECK-NEXT: sltu a3, a0, a2
; CHECK-NEXT: addi a3, a3, -1
; CHECK-NEXT: and a2, a3, a2
@@ -1132,10 +1538,10 @@ define <vscale x 16 x double> @vp_ceil_vv_nxv16f64_unmasked(<vscale x 16 x doubl
; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t
-; CHECK-NEXT: bltu a0, a1, .LBB33_2
+; CHECK-NEXT: bltu a0, a1, .LBB45_2
; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: mv a0, a1
-; CHECK-NEXT: .LBB33_2:
+; CHECK-NEXT: .LBB45_2:
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; CHECK-NEXT: vfabs.v v24, v8
; CHECK-NEXT: vmflt.vf v0, v24, fa5
diff --git a/llvm/test/CodeGen/RISCV/rvv/fceil-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/fceil-sdnode.ll
index 9efc3183f15a..ee16b476dc84 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fceil-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fceil-sdnode.ll
@@ -1,124 +1,408 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v -target-abi=ilp32d \
-; RUN: -verify-machineinstrs < %s | FileCheck %s
-; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -target-abi=lp64d \
-; RUN: -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+zfbfmin,+zvfbfmin,+v \
+; RUN: -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s \
+; RUN: --check-prefixes=CHECK,ZVFH
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+zfbfmin,+zvfbfmin,+v \
+; RUN: -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \
+; RUN: --check-prefixes=CHECK,ZVFH
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin,+v \
+; RUN: -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s \
+; RUN: --check-prefixes=CHECK,ZVFHMIN
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin,+v \
+; RUN: -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \
+; RUN: --check-prefixes=CHECK,ZVFHMIN
-define <vscale x 1 x half> @ceil_nxv1f16(<vscale x 1 x half> %x) {
-; CHECK-LABEL: ceil_nxv1f16:
+define <vscale x 1 x bfloat> @ceil_nxv1bf16(<vscale x 1 x bfloat> %x) {
+; CHECK-LABEL: ceil_nxv1bf16:
; CHECK: # %bb.0:
-; CHECK-NEXT: lui a0, %hi(.LCPI0_0)
-; CHECK-NEXT: flh fa5, %lo(.LCPI0_0)(a0)
; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
-; CHECK-NEXT: vfabs.v v9, v8
-; CHECK-NEXT: vmflt.vf v0, v9, fa5
+; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8
+; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
+; CHECK-NEXT: vfabs.v v8, v9
+; CHECK-NEXT: lui a0, 307200
+; CHECK-NEXT: fmv.w.x fa5, a0
+; CHECK-NEXT: vmflt.vf v0, v8, fa5
; CHECK-NEXT: fsrmi a0, 3
-; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t
+; CHECK-NEXT: vfcvt.x.f.v v8, v9, v0.t
; CHECK-NEXT: fsrm a0
-; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t
-; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, mu
-; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t
+; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu
+; CHECK-NEXT: vfsgnj.vv v9, v8, v9, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9
; CHECK-NEXT: ret
- %a = call <vscale x 1 x half> @llvm.ceil.nxv1f16(<vscale x 1 x half> %x)
- ret <vscale x 1 x half> %a
+ %a = call <vscale x 1 x bfloat> @llvm.ceil.nxv1bf16(<vscale x 1 x bfloat> %x)
+ ret <vscale x 1 x bfloat> %a
}
-declare <vscale x 1 x half> @llvm.ceil.nxv1f16(<vscale x 1 x half>)
-define <vscale x 2 x half> @ceil_nxv2f16(<vscale x 2 x half> %x) {
-; CHECK-LABEL: ceil_nxv2f16:
+define <vscale x 2 x bfloat> @ceil_nxv2bf16(<vscale x 2 x bfloat> %x) {
+; CHECK-LABEL: ceil_nxv2bf16:
; CHECK: # %bb.0:
-; CHECK-NEXT: lui a0, %hi(.LCPI1_0)
-; CHECK-NEXT: flh fa5, %lo(.LCPI1_0)(a0)
; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
-; CHECK-NEXT: vfabs.v v9, v8
-; CHECK-NEXT: vmflt.vf v0, v9, fa5
+; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8
+; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; CHECK-NEXT: vfabs.v v8, v9
+; CHECK-NEXT: lui a0, 307200
+; CHECK-NEXT: fmv.w.x fa5, a0
+; CHECK-NEXT: vmflt.vf v0, v8, fa5
; CHECK-NEXT: fsrmi a0, 3
-; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t
+; CHECK-NEXT: vfcvt.x.f.v v8, v9, v0.t
; CHECK-NEXT: fsrm a0
-; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t
-; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, mu
-; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t
+; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu
+; CHECK-NEXT: vfsgnj.vv v9, v8, v9, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9
; CHECK-NEXT: ret
- %a = call <vscale x 2 x half> @llvm.ceil.nxv2f16(<vscale x 2 x half> %x)
- ret <vscale x 2 x half> %a
+ %a = call <vscale x 2 x bfloat> @llvm.ceil.nxv2bf16(<vscale x 2 x bfloat> %x)
+ ret <vscale x 2 x bfloat> %a
}
-declare <vscale x 2 x half> @llvm.ceil.nxv2f16(<vscale x 2 x half>)
-define <vscale x 4 x half> @ceil_nxv4f16(<vscale x 4 x half> %x) {
-; CHECK-LABEL: ceil_nxv4f16:
+define <vscale x 4 x bfloat> @ceil_nxv4bf16(<vscale x 4 x bfloat> %x) {
+; CHECK-LABEL: ceil_nxv4bf16:
; CHECK: # %bb.0:
-; CHECK-NEXT: lui a0, %hi(.LCPI2_0)
-; CHECK-NEXT: flh fa5, %lo(.LCPI2_0)(a0)
; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
-; CHECK-NEXT: vfabs.v v9, v8
-; CHECK-NEXT: vmflt.vf v0, v9, fa5
+; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8
+; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; CHECK-NEXT: vfabs.v v8, v10
+; CHECK-NEXT: lui a0, 307200
+; CHECK-NEXT: fmv.w.x fa5, a0
+; CHECK-NEXT: vmflt.vf v0, v8, fa5
; CHECK-NEXT: fsrmi a0, 3
-; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t
+; CHECK-NEXT: vfcvt.x.f.v v8, v10, v0.t
; CHECK-NEXT: fsrm a0
-; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t
-; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, mu
-; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t
+; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu
+; CHECK-NEXT: vfsgnj.vv v10, v8, v10, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10
; CHECK-NEXT: ret
- %a = call <vscale x 4 x half> @llvm.ceil.nxv4f16(<vscale x 4 x half> %x)
- ret <vscale x 4 x half> %a
+ %a = call <vscale x 4 x bfloat> @llvm.ceil.nxv4bf16(<vscale x 4 x bfloat> %x)
+ ret <vscale x 4 x bfloat> %a
}
-declare <vscale x 4 x half> @llvm.ceil.nxv4f16(<vscale x 4 x half>)
-define <vscale x 8 x half> @ceil_nxv8f16(<vscale x 8 x half> %x) {
-; CHECK-LABEL: ceil_nxv8f16:
+define <vscale x 8 x bfloat> @ceil_nxv8bf16(<vscale x 8 x bfloat> %x) {
+; CHECK-LABEL: ceil_nxv8bf16:
; CHECK: # %bb.0:
-; CHECK-NEXT: lui a0, %hi(.LCPI3_0)
-; CHECK-NEXT: flh fa5, %lo(.LCPI3_0)(a0)
; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma
-; CHECK-NEXT: vfabs.v v10, v8
-; CHECK-NEXT: vmflt.vf v0, v10, fa5
+; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8
+; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; CHECK-NEXT: vfabs.v v8, v12
+; CHECK-NEXT: lui a0, 307200
+; CHECK-NEXT: fmv.w.x fa5, a0
+; CHECK-NEXT: vmflt.vf v0, v8, fa5
; CHECK-NEXT: fsrmi a0, 3
-; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t
+; CHECK-NEXT: vfcvt.x.f.v v8, v12, v0.t
; CHECK-NEXT: fsrm a0
-; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t
-; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, mu
-; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t
+; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu
+; CHECK-NEXT: vfsgnj.vv v12, v8, v12, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12
; CHECK-NEXT: ret
- %a = call <vscale x 8 x half> @llvm.ceil.nxv8f16(<vscale x 8 x half> %x)
- ret <vscale x 8 x half> %a
+ %a = call <vscale x 8 x bfloat> @llvm.ceil.nxv8bf16(<vscale x 8 x bfloat> %x)
+ ret <vscale x 8 x bfloat> %a
}
-declare <vscale x 8 x half> @llvm.ceil.nxv8f16(<vscale x 8 x half>)
-define <vscale x 16 x half> @ceil_nxv16f16(<vscale x 16 x half> %x) {
-; CHECK-LABEL: ceil_nxv16f16:
+define <vscale x 16 x bfloat> @ceil_nxv16bf16(<vscale x 16 x bfloat> %x) {
+; CHECK-LABEL: ceil_nxv16bf16:
; CHECK: # %bb.0:
-; CHECK-NEXT: lui a0, %hi(.LCPI4_0)
-; CHECK-NEXT: flh fa5, %lo(.LCPI4_0)(a0)
; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma
-; CHECK-NEXT: vfabs.v v12, v8
-; CHECK-NEXT: vmflt.vf v0, v12, fa5
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8
+; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; CHECK-NEXT: vfabs.v v8, v16
+; CHECK-NEXT: lui a0, 307200
+; CHECK-NEXT: fmv.w.x fa5, a0
+; CHECK-NEXT: vmflt.vf v0, v8, fa5
; CHECK-NEXT: fsrmi a0, 3
-; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t
+; CHECK-NEXT: vfcvt.x.f.v v8, v16, v0.t
; CHECK-NEXT: fsrm a0
-; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t
-; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, mu
-; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t
+; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu
+; CHECK-NEXT: vfsgnj.vv v16, v8, v16, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16
; CHECK-NEXT: ret
- %a = call <vscale x 16 x half> @llvm.ceil.nxv16f16(<vscale x 16 x half> %x)
- ret <vscale x 16 x half> %a
+ %a = call <vscale x 16 x bfloat> @llvm.ceil.nxv16bf16(<vscale x 16 x bfloat> %x)
+ ret <vscale x 16 x bfloat> %a
}
-declare <vscale x 16 x half> @llvm.ceil.nxv16f16(<vscale x 16 x half>)
-define <vscale x 32 x half> @ceil_nxv32f16(<vscale x 32 x half> %x) {
-; CHECK-LABEL: ceil_nxv32f16:
+define <vscale x 32 x bfloat> @ceil_nxv32bf16(<vscale x 32 x bfloat> %x) {
+; CHECK-LABEL: ceil_nxv32bf16:
; CHECK: # %bb.0:
-; CHECK-NEXT: lui a0, %hi(.LCPI5_0)
-; CHECK-NEXT: flh fa5, %lo(.LCPI5_0)(a0)
-; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, ma
-; CHECK-NEXT: vfabs.v v16, v8
-; CHECK-NEXT: vmflt.vf v0, v16, fa5
+; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8
+; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; CHECK-NEXT: vfabs.v v24, v16
+; CHECK-NEXT: lui a0, 307200
+; CHECK-NEXT: fmv.w.x fa5, a0
+; CHECK-NEXT: vmflt.vf v0, v24, fa5
; CHECK-NEXT: fsrmi a0, 3
-; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t
+; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t
; CHECK-NEXT: fsrm a0
-; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t
-; CHECK-NEXT: vsetvli zero, zero, e16, m8, ta, mu
-; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t
+; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu
+; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v12
+; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; CHECK-NEXT: vfabs.v v24, v16
+; CHECK-NEXT: vmflt.vf v0, v24, fa5
+; CHECK-NEXT: fsrmi a0, 3
+; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t
+; CHECK-NEXT: fsrm a0
+; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu
+; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v12, v16
; CHECK-NEXT: ret
+ %a = call <vscale x 32 x bfloat> @llvm.ceil.nxv32bf16(<vscale x 32 x bfloat> %x)
+ ret <vscale x 32 x bfloat> %a
+}
+
+define <vscale x 1 x half> @ceil_nxv1f16(<vscale x 1 x half> %x) {
+; ZVFH-LABEL: ceil_nxv1f16:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: lui a0, %hi(.LCPI6_0)
+; ZVFH-NEXT: flh fa5, %lo(.LCPI6_0)(a0)
+; ZVFH-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
+; ZVFH-NEXT: vfabs.v v9, v8
+; ZVFH-NEXT: vmflt.vf v0, v9, fa5
+; ZVFH-NEXT: fsrmi a0, 3
+; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t
+; ZVFH-NEXT: fsrm a0
+; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t
+; ZVFH-NEXT: vsetvli zero, zero, e16, mf4, ta, mu
+; ZVFH-NEXT: vfsgnj.vv v8, v9, v8, v0.t
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: ceil_nxv1f16:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
+; ZVFHMIN-NEXT: vfabs.v v8, v9
+; ZVFHMIN-NEXT: lui a0, 307200
+; ZVFHMIN-NEXT: fmv.w.x fa5, a0
+; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5
+; ZVFHMIN-NEXT: fsrmi a0, 3
+; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v9, v0.t
+; ZVFHMIN-NEXT: fsrm a0
+; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, mu
+; ZVFHMIN-NEXT: vfsgnj.vv v9, v8, v9, v0.t
+; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
+; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9
+; ZVFHMIN-NEXT: ret
+ %a = call <vscale x 1 x half> @llvm.ceil.nxv1f16(<vscale x 1 x half> %x)
+ ret <vscale x 1 x half> %a
+}
+declare <vscale x 1 x half> @llvm.ceil.nxv1f16(<vscale x 1 x half>)
+
+define <vscale x 2 x half> @ceil_nxv2f16(<vscale x 2 x half> %x) {
+; ZVFH-LABEL: ceil_nxv2f16:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: lui a0, %hi(.LCPI7_0)
+; ZVFH-NEXT: flh fa5, %lo(.LCPI7_0)(a0)
+; ZVFH-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
+; ZVFH-NEXT: vfabs.v v9, v8
+; ZVFH-NEXT: vmflt.vf v0, v9, fa5
+; ZVFH-NEXT: fsrmi a0, 3
+; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t
+; ZVFH-NEXT: fsrm a0
+; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t
+; ZVFH-NEXT: vsetvli zero, zero, e16, mf2, ta, mu
+; ZVFH-NEXT: vfsgnj.vv v8, v9, v8, v0.t
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: ceil_nxv2f16:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; ZVFHMIN-NEXT: vfabs.v v8, v9
+; ZVFHMIN-NEXT: lui a0, 307200
+; ZVFHMIN-NEXT: fmv.w.x fa5, a0
+; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5
+; ZVFHMIN-NEXT: fsrmi a0, 3
+; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v9, v0.t
+; ZVFHMIN-NEXT: fsrm a0
+; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, mu
+; ZVFHMIN-NEXT: vfsgnj.vv v9, v8, v9, v0.t
+; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
+; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9
+; ZVFHMIN-NEXT: ret
+ %a = call <vscale x 2 x half> @llvm.ceil.nxv2f16(<vscale x 2 x half> %x)
+ ret <vscale x 2 x half> %a
+}
+declare <vscale x 2 x half> @llvm.ceil.nxv2f16(<vscale x 2 x half>)
+
+define <vscale x 4 x half> @ceil_nxv4f16(<vscale x 4 x half> %x) {
+; ZVFH-LABEL: ceil_nxv4f16:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: lui a0, %hi(.LCPI8_0)
+; ZVFH-NEXT: flh fa5, %lo(.LCPI8_0)(a0)
+; ZVFH-NEXT: vsetvli a0, zero, e16, m1, ta, ma
+; ZVFH-NEXT: vfabs.v v9, v8
+; ZVFH-NEXT: vmflt.vf v0, v9, fa5
+; ZVFH-NEXT: fsrmi a0, 3
+; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t
+; ZVFH-NEXT: fsrm a0
+; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t
+; ZVFH-NEXT: vsetvli zero, zero, e16, m1, ta, mu
+; ZVFH-NEXT: vfsgnj.vv v8, v9, v8, v0.t
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: ceil_nxv4f16:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m1, ta, ma
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; ZVFHMIN-NEXT: vfabs.v v8, v10
+; ZVFHMIN-NEXT: lui a0, 307200
+; ZVFHMIN-NEXT: fmv.w.x fa5, a0
+; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5
+; ZVFHMIN-NEXT: fsrmi a0, 3
+; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v10, v0.t
+; ZVFHMIN-NEXT: fsrm a0
+; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, mu
+; ZVFHMIN-NEXT: vfsgnj.vv v10, v8, v10, v0.t
+; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
+; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10
+; ZVFHMIN-NEXT: ret
+ %a = call <vscale x 4 x half> @llvm.ceil.nxv4f16(<vscale x 4 x half> %x)
+ ret <vscale x 4 x half> %a
+}
+declare <vscale x 4 x half> @llvm.ceil.nxv4f16(<vscale x 4 x half>)
+
+define <vscale x 8 x half> @ceil_nxv8f16(<vscale x 8 x half> %x) {
+; ZVFH-LABEL: ceil_nxv8f16:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: lui a0, %hi(.LCPI9_0)
+; ZVFH-NEXT: flh fa5, %lo(.LCPI9_0)(a0)
+; ZVFH-NEXT: vsetvli a0, zero, e16, m2, ta, ma
+; ZVFH-NEXT: vfabs.v v10, v8
+; ZVFH-NEXT: vmflt.vf v0, v10, fa5
+; ZVFH-NEXT: fsrmi a0, 3
+; ZVFH-NEXT: vfcvt.x.f.v v10, v8, v0.t
+; ZVFH-NEXT: fsrm a0
+; ZVFH-NEXT: vfcvt.f.x.v v10, v10, v0.t
+; ZVFH-NEXT: vsetvli zero, zero, e16, m2, ta, mu
+; ZVFH-NEXT: vfsgnj.vv v8, v10, v8, v0.t
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: ceil_nxv8f16:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m2, ta, ma
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; ZVFHMIN-NEXT: vfabs.v v8, v12
+; ZVFHMIN-NEXT: lui a0, 307200
+; ZVFHMIN-NEXT: fmv.w.x fa5, a0
+; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5
+; ZVFHMIN-NEXT: fsrmi a0, 3
+; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v12, v0.t
+; ZVFHMIN-NEXT: fsrm a0
+; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, mu
+; ZVFHMIN-NEXT: vfsgnj.vv v12, v8, v12, v0.t
+; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
+; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12
+; ZVFHMIN-NEXT: ret
+ %a = call <vscale x 8 x half> @llvm.ceil.nxv8f16(<vscale x 8 x half> %x)
+ ret <vscale x 8 x half> %a
+}
+declare <vscale x 8 x half> @llvm.ceil.nxv8f16(<vscale x 8 x half>)
+
+define <vscale x 16 x half> @ceil_nxv16f16(<vscale x 16 x half> %x) {
+; ZVFH-LABEL: ceil_nxv16f16:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: lui a0, %hi(.LCPI10_0)
+; ZVFH-NEXT: flh fa5, %lo(.LCPI10_0)(a0)
+; ZVFH-NEXT: vsetvli a0, zero, e16, m4, ta, ma
+; ZVFH-NEXT: vfabs.v v12, v8
+; ZVFH-NEXT: vmflt.vf v0, v12, fa5
+; ZVFH-NEXT: fsrmi a0, 3
+; ZVFH-NEXT: vfcvt.x.f.v v12, v8, v0.t
+; ZVFH-NEXT: fsrm a0
+; ZVFH-NEXT: vfcvt.f.x.v v12, v12, v0.t
+; ZVFH-NEXT: vsetvli zero, zero, e16, m4, ta, mu
+; ZVFH-NEXT: vfsgnj.vv v8, v12, v8, v0.t
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: ceil_nxv16f16:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; ZVFHMIN-NEXT: vfabs.v v8, v16
+; ZVFHMIN-NEXT: lui a0, 307200
+; ZVFHMIN-NEXT: fmv.w.x fa5, a0
+; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5
+; ZVFHMIN-NEXT: fsrmi a0, 3
+; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v16, v0.t
+; ZVFHMIN-NEXT: fsrm a0
+; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, mu
+; ZVFHMIN-NEXT: vfsgnj.vv v16, v8, v16, v0.t
+; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
+; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16
+; ZVFHMIN-NEXT: ret
+ %a = call <vscale x 16 x half> @llvm.ceil.nxv16f16(<vscale x 16 x half> %x)
+ ret <vscale x 16 x half> %a
+}
+declare <vscale x 16 x half> @llvm.ceil.nxv16f16(<vscale x 16 x half>)
+
+define <vscale x 32 x half> @ceil_nxv32f16(<vscale x 32 x half> %x) {
+; ZVFH-LABEL: ceil_nxv32f16:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: lui a0, %hi(.LCPI11_0)
+; ZVFH-NEXT: flh fa5, %lo(.LCPI11_0)(a0)
+; ZVFH-NEXT: vsetvli a0, zero, e16, m8, ta, ma
+; ZVFH-NEXT: vfabs.v v16, v8
+; ZVFH-NEXT: vmflt.vf v0, v16, fa5
+; ZVFH-NEXT: fsrmi a0, 3
+; ZVFH-NEXT: vfcvt.x.f.v v16, v8, v0.t
+; ZVFH-NEXT: fsrm a0
+; ZVFH-NEXT: vfcvt.f.x.v v16, v16, v0.t
+; ZVFH-NEXT: vsetvli zero, zero, e16, m8, ta, mu
+; ZVFH-NEXT: vfsgnj.vv v8, v16, v8, v0.t
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: ceil_nxv32f16:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; ZVFHMIN-NEXT: vfabs.v v24, v16
+; ZVFHMIN-NEXT: lui a0, 307200
+; ZVFHMIN-NEXT: fmv.w.x fa5, a0
+; ZVFHMIN-NEXT: vmflt.vf v0, v24, fa5
+; ZVFHMIN-NEXT: fsrmi a0, 3
+; ZVFHMIN-NEXT: vfcvt.x.f.v v24, v16, v0.t
+; ZVFHMIN-NEXT: fsrm a0
+; ZVFHMIN-NEXT: vfcvt.f.x.v v24, v24, v0.t
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, mu
+; ZVFHMIN-NEXT: vfsgnj.vv v16, v24, v16, v0.t
+; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
+; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; ZVFHMIN-NEXT: vfabs.v v24, v16
+; ZVFHMIN-NEXT: vmflt.vf v0, v24, fa5
+; ZVFHMIN-NEXT: fsrmi a0, 3
+; ZVFHMIN-NEXT: vfcvt.x.f.v v24, v16, v0.t
+; ZVFHMIN-NEXT: fsrm a0
+; ZVFHMIN-NEXT: vfcvt.f.x.v v24, v24, v0.t
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, mu
+; ZVFHMIN-NEXT: vfsgnj.vv v16, v24, v16, v0.t
+; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
+; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v16
+; ZVFHMIN-NEXT: ret
%a = call <vscale x 32 x half> @llvm.ceil.nxv32f16(<vscale x 32 x half> %x)
ret <vscale x 32 x half> %a
}
@@ -227,8 +511,8 @@ declare <vscale x 16 x float> @llvm.ceil.nxv16f32(<vscale x 16 x float>)
define <vscale x 1 x double> @ceil_nxv1f64(<vscale x 1 x double> %x) {
; CHECK-LABEL: ceil_nxv1f64:
; CHECK: # %bb.0:
-; CHECK-NEXT: lui a0, %hi(.LCPI11_0)
-; CHECK-NEXT: fld fa5, %lo(.LCPI11_0)(a0)
+; CHECK-NEXT: lui a0, %hi(.LCPI17_0)
+; CHECK-NEXT: fld fa5, %lo(.LCPI17_0)(a0)
; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma
; CHECK-NEXT: vfabs.v v9, v8
; CHECK-NEXT: vmflt.vf v0, v9, fa5
@@ -247,8 +531,8 @@ declare <vscale x 1 x double> @llvm.ceil.nxv1f64(<vscale x 1 x double>)
define <vscale x 2 x double> @ceil_nxv2f64(<vscale x 2 x double> %x) {
; CHECK-LABEL: ceil_nxv2f64:
; CHECK: # %bb.0:
-; CHECK-NEXT: lui a0, %hi(.LCPI12_0)
-; CHECK-NEXT: fld fa5, %lo(.LCPI12_0)(a0)
+; CHECK-NEXT: lui a0, %hi(.LCPI18_0)
+; CHECK-NEXT: fld fa5, %lo(.LCPI18_0)(a0)
; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma
; CHECK-NEXT: vfabs.v v10, v8
; CHECK-NEXT: vmflt.vf v0, v10, fa5
@@ -267,8 +551,8 @@ declare <vscale x 2 x double> @llvm.ceil.nxv2f64(<vscale x 2 x double>)
define <vscale x 4 x double> @ceil_nxv4f64(<vscale x 4 x double> %x) {
; CHECK-LABEL: ceil_nxv4f64:
; CHECK: # %bb.0:
-; CHECK-NEXT: lui a0, %hi(.LCPI13_0)
-; CHECK-NEXT: fld fa5, %lo(.LCPI13_0)(a0)
+; CHECK-NEXT: lui a0, %hi(.LCPI19_0)
+; CHECK-NEXT: fld fa5, %lo(.LCPI19_0)(a0)
; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma
; CHECK-NEXT: vfabs.v v12, v8
; CHECK-NEXT: vmflt.vf v0, v12, fa5
@@ -287,8 +571,8 @@ declare <vscale x 4 x double> @llvm.ceil.nxv4f64(<vscale x 4 x double>)
define <vscale x 8 x double> @ceil_nxv8f64(<vscale x 8 x double> %x) {
; CHECK-LABEL: ceil_nxv8f64:
; CHECK: # %bb.0:
-; CHECK-NEXT: lui a0, %hi(.LCPI14_0)
-; CHECK-NEXT: fld fa5, %lo(.LCPI14_0)(a0)
+; CHECK-NEXT: lui a0, %hi(.LCPI20_0)
+; CHECK-NEXT: fld fa5, %lo(.LCPI20_0)(a0)
; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma
; CHECK-NEXT: vfabs.v v16, v8
; CHECK-NEXT: vmflt.vf v0, v16, fa5
diff --git a/llvm/test/CodeGen/RISCV/rvv/ffloor-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/ffloor-sdnode.ll
index ec60b3ed3e0c..00e21ce8992b 100644
--- a/llvm/test/CodeGen/RISCV/rvv/ffloor-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/ffloor-sdnode.ll
@@ -1,124 +1,414 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v -target-abi=ilp32d \
-; RUN: -verify-machineinstrs < %s | FileCheck %s
-; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -target-abi=lp64d \
-; RUN: -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+zfbfmin,+zvfbfmin,+v \
+; RUN: -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s \
+; RUN: --check-prefixes=CHECK,ZVFH
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+zfbfmin,+zvfbfmin,+v \
+; RUN: -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \
+; RUN: --check-prefixes=CHECK,ZVFH
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin,+v \
+; RUN: -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s \
+; RUN: --check-prefixes=CHECK,ZVFHMIN
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin,+v \
+; RUN: -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \
+; RUN: --check-prefixes=CHECK,ZVFHMIN
-define <vscale x 1 x half> @floor_nxv1f16(<vscale x 1 x half> %x) {
-; CHECK-LABEL: floor_nxv1f16:
+define <vscale x 1 x bfloat> @floor_nxv1bf16(<vscale x 1 x bfloat> %x) {
+; CHECK-LABEL: floor_nxv1bf16:
; CHECK: # %bb.0:
-; CHECK-NEXT: lui a0, %hi(.LCPI0_0)
-; CHECK-NEXT: flh fa5, %lo(.LCPI0_0)(a0)
; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
-; CHECK-NEXT: vfabs.v v9, v8
-; CHECK-NEXT: vmflt.vf v0, v9, fa5
+; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8
+; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
+; CHECK-NEXT: vfabs.v v8, v9
+; CHECK-NEXT: lui a0, 307200
+; CHECK-NEXT: fmv.w.x fa5, a0
+; CHECK-NEXT: vmflt.vf v0, v8, fa5
; CHECK-NEXT: fsrmi a0, 2
-; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t
+; CHECK-NEXT: vfcvt.x.f.v v8, v9, v0.t
; CHECK-NEXT: fsrm a0
-; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t
-; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, mu
-; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t
+; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu
+; CHECK-NEXT: vfsgnj.vv v9, v8, v9, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9
; CHECK-NEXT: ret
- %a = call <vscale x 1 x half> @llvm.floor.nxv1f16(<vscale x 1 x half> %x)
- ret <vscale x 1 x half> %a
+ %a = call <vscale x 1 x bfloat> @llvm.floor.nxv1bf16(<vscale x 1 x bfloat> %x)
+ ret <vscale x 1 x bfloat> %a
}
-declare <vscale x 1 x half> @llvm.floor.nxv1f16(<vscale x 1 x half>)
+declare <vscale x 1 x bfloat> @llvm.floor.nxv1bf16(<vscale x 1 x bfloat>)
-define <vscale x 2 x half> @floor_nxv2f16(<vscale x 2 x half> %x) {
-; CHECK-LABEL: floor_nxv2f16:
+define <vscale x 2 x bfloat> @floor_nxv2bf16(<vscale x 2 x bfloat> %x) {
+; CHECK-LABEL: floor_nxv2bf16:
; CHECK: # %bb.0:
-; CHECK-NEXT: lui a0, %hi(.LCPI1_0)
-; CHECK-NEXT: flh fa5, %lo(.LCPI1_0)(a0)
; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
-; CHECK-NEXT: vfabs.v v9, v8
-; CHECK-NEXT: vmflt.vf v0, v9, fa5
+; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8
+; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; CHECK-NEXT: vfabs.v v8, v9
+; CHECK-NEXT: lui a0, 307200
+; CHECK-NEXT: fmv.w.x fa5, a0
+; CHECK-NEXT: vmflt.vf v0, v8, fa5
; CHECK-NEXT: fsrmi a0, 2
-; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t
+; CHECK-NEXT: vfcvt.x.f.v v8, v9, v0.t
; CHECK-NEXT: fsrm a0
-; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t
-; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, mu
-; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t
+; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu
+; CHECK-NEXT: vfsgnj.vv v9, v8, v9, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9
; CHECK-NEXT: ret
- %a = call <vscale x 2 x half> @llvm.floor.nxv2f16(<vscale x 2 x half> %x)
- ret <vscale x 2 x half> %a
+ %a = call <vscale x 2 x bfloat> @llvm.floor.nxv2bf16(<vscale x 2 x bfloat> %x)
+ ret <vscale x 2 x bfloat> %a
}
-declare <vscale x 2 x half> @llvm.floor.nxv2f16(<vscale x 2 x half>)
+declare <vscale x 2 x bfloat> @llvm.floor.nxv2bf16(<vscale x 2 x bfloat>)
-define <vscale x 4 x half> @floor_nxv4f16(<vscale x 4 x half> %x) {
-; CHECK-LABEL: floor_nxv4f16:
+define <vscale x 4 x bfloat> @floor_nxv4bf16(<vscale x 4 x bfloat> %x) {
+; CHECK-LABEL: floor_nxv4bf16:
; CHECK: # %bb.0:
-; CHECK-NEXT: lui a0, %hi(.LCPI2_0)
-; CHECK-NEXT: flh fa5, %lo(.LCPI2_0)(a0)
; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
-; CHECK-NEXT: vfabs.v v9, v8
-; CHECK-NEXT: vmflt.vf v0, v9, fa5
+; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8
+; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; CHECK-NEXT: vfabs.v v8, v10
+; CHECK-NEXT: lui a0, 307200
+; CHECK-NEXT: fmv.w.x fa5, a0
+; CHECK-NEXT: vmflt.vf v0, v8, fa5
; CHECK-NEXT: fsrmi a0, 2
-; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t
+; CHECK-NEXT: vfcvt.x.f.v v8, v10, v0.t
; CHECK-NEXT: fsrm a0
-; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t
-; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, mu
-; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t
+; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu
+; CHECK-NEXT: vfsgnj.vv v10, v8, v10, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10
; CHECK-NEXT: ret
- %a = call <vscale x 4 x half> @llvm.floor.nxv4f16(<vscale x 4 x half> %x)
- ret <vscale x 4 x half> %a
+ %a = call <vscale x 4 x bfloat> @llvm.floor.nxv4bf16(<vscale x 4 x bfloat> %x)
+ ret <vscale x 4 x bfloat> %a
}
-declare <vscale x 4 x half> @llvm.floor.nxv4f16(<vscale x 4 x half>)
+declare <vscale x 4 x bfloat> @llvm.floor.nxv4bf16(<vscale x 4 x bfloat>)
-define <vscale x 8 x half> @floor_nxv8f16(<vscale x 8 x half> %x) {
-; CHECK-LABEL: floor_nxv8f16:
+define <vscale x 8 x bfloat> @floor_nxv8bf16(<vscale x 8 x bfloat> %x) {
+; CHECK-LABEL: floor_nxv8bf16:
; CHECK: # %bb.0:
-; CHECK-NEXT: lui a0, %hi(.LCPI3_0)
-; CHECK-NEXT: flh fa5, %lo(.LCPI3_0)(a0)
; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma
-; CHECK-NEXT: vfabs.v v10, v8
-; CHECK-NEXT: vmflt.vf v0, v10, fa5
+; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8
+; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; CHECK-NEXT: vfabs.v v8, v12
+; CHECK-NEXT: lui a0, 307200
+; CHECK-NEXT: fmv.w.x fa5, a0
+; CHECK-NEXT: vmflt.vf v0, v8, fa5
; CHECK-NEXT: fsrmi a0, 2
-; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t
+; CHECK-NEXT: vfcvt.x.f.v v8, v12, v0.t
; CHECK-NEXT: fsrm a0
-; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t
-; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, mu
-; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t
+; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu
+; CHECK-NEXT: vfsgnj.vv v12, v8, v12, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12
; CHECK-NEXT: ret
- %a = call <vscale x 8 x half> @llvm.floor.nxv8f16(<vscale x 8 x half> %x)
- ret <vscale x 8 x half> %a
+ %a = call <vscale x 8 x bfloat> @llvm.floor.nxv8bf16(<vscale x 8 x bfloat> %x)
+ ret <vscale x 8 x bfloat> %a
}
-declare <vscale x 8 x half> @llvm.floor.nxv8f16(<vscale x 8 x half>)
+declare <vscale x 8 x bfloat> @llvm.floor.nxv8bf16(<vscale x 8 x bfloat>)
-define <vscale x 16 x half> @floor_nxv16f16(<vscale x 16 x half> %x) {
-; CHECK-LABEL: floor_nxv16f16:
+define <vscale x 16 x bfloat> @floor_nxv16bf16(<vscale x 16 x bfloat> %x) {
+; CHECK-LABEL: floor_nxv16bf16:
; CHECK: # %bb.0:
-; CHECK-NEXT: lui a0, %hi(.LCPI4_0)
-; CHECK-NEXT: flh fa5, %lo(.LCPI4_0)(a0)
; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma
-; CHECK-NEXT: vfabs.v v12, v8
-; CHECK-NEXT: vmflt.vf v0, v12, fa5
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8
+; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; CHECK-NEXT: vfabs.v v8, v16
+; CHECK-NEXT: lui a0, 307200
+; CHECK-NEXT: fmv.w.x fa5, a0
+; CHECK-NEXT: vmflt.vf v0, v8, fa5
; CHECK-NEXT: fsrmi a0, 2
-; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t
+; CHECK-NEXT: vfcvt.x.f.v v8, v16, v0.t
; CHECK-NEXT: fsrm a0
-; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t
-; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, mu
-; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t
+; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu
+; CHECK-NEXT: vfsgnj.vv v16, v8, v16, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16
; CHECK-NEXT: ret
- %a = call <vscale x 16 x half> @llvm.floor.nxv16f16(<vscale x 16 x half> %x)
- ret <vscale x 16 x half> %a
+ %a = call <vscale x 16 x bfloat> @llvm.floor.nxv16bf16(<vscale x 16 x bfloat> %x)
+ ret <vscale x 16 x bfloat> %a
}
-declare <vscale x 16 x half> @llvm.floor.nxv16f16(<vscale x 16 x half>)
+declare <vscale x 16 x bfloat> @llvm.floor.nxv16bf16(<vscale x 16 x bfloat>)
-define <vscale x 32 x half> @floor_nxv32f16(<vscale x 32 x half> %x) {
-; CHECK-LABEL: floor_nxv32f16:
+define <vscale x 32 x bfloat> @floor_nxv32bf16(<vscale x 32 x bfloat> %x) {
+; CHECK-LABEL: floor_nxv32bf16:
; CHECK: # %bb.0:
-; CHECK-NEXT: lui a0, %hi(.LCPI5_0)
-; CHECK-NEXT: flh fa5, %lo(.LCPI5_0)(a0)
-; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, ma
-; CHECK-NEXT: vfabs.v v16, v8
-; CHECK-NEXT: vmflt.vf v0, v16, fa5
+; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8
+; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; CHECK-NEXT: vfabs.v v24, v16
+; CHECK-NEXT: lui a0, 307200
+; CHECK-NEXT: fmv.w.x fa5, a0
+; CHECK-NEXT: vmflt.vf v0, v24, fa5
; CHECK-NEXT: fsrmi a0, 2
-; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t
+; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t
; CHECK-NEXT: fsrm a0
-; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t
-; CHECK-NEXT: vsetvli zero, zero, e16, m8, ta, mu
-; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t
+; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu
+; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v12
+; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; CHECK-NEXT: vfabs.v v24, v16
+; CHECK-NEXT: vmflt.vf v0, v24, fa5
+; CHECK-NEXT: fsrmi a0, 2
+; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t
+; CHECK-NEXT: fsrm a0
+; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu
+; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v12, v16
; CHECK-NEXT: ret
+ %a = call <vscale x 32 x bfloat> @llvm.floor.nxv32bf16(<vscale x 32 x bfloat> %x)
+ ret <vscale x 32 x bfloat> %a
+}
+declare <vscale x 32 x bfloat> @llvm.floor.nxv32bf16(<vscale x 32 x bfloat>)
+
+define <vscale x 1 x half> @floor_nxv1f16(<vscale x 1 x half> %x) {
+; ZVFH-LABEL: floor_nxv1f16:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: lui a0, %hi(.LCPI6_0)
+; ZVFH-NEXT: flh fa5, %lo(.LCPI6_0)(a0)
+; ZVFH-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
+; ZVFH-NEXT: vfabs.v v9, v8
+; ZVFH-NEXT: vmflt.vf v0, v9, fa5
+; ZVFH-NEXT: fsrmi a0, 2
+; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t
+; ZVFH-NEXT: fsrm a0
+; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t
+; ZVFH-NEXT: vsetvli zero, zero, e16, mf4, ta, mu
+; ZVFH-NEXT: vfsgnj.vv v8, v9, v8, v0.t
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: floor_nxv1f16:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
+; ZVFHMIN-NEXT: vfabs.v v8, v9
+; ZVFHMIN-NEXT: lui a0, 307200
+; ZVFHMIN-NEXT: fmv.w.x fa5, a0
+; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5
+; ZVFHMIN-NEXT: fsrmi a0, 2
+; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v9, v0.t
+; ZVFHMIN-NEXT: fsrm a0
+; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, mu
+; ZVFHMIN-NEXT: vfsgnj.vv v9, v8, v9, v0.t
+; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
+; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9
+; ZVFHMIN-NEXT: ret
+ %a = call <vscale x 1 x half> @llvm.floor.nxv1f16(<vscale x 1 x half> %x)
+ ret <vscale x 1 x half> %a
+}
+declare <vscale x 1 x half> @llvm.floor.nxv1f16(<vscale x 1 x half>)
+
+define <vscale x 2 x half> @floor_nxv2f16(<vscale x 2 x half> %x) {
+; ZVFH-LABEL: floor_nxv2f16:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: lui a0, %hi(.LCPI7_0)
+; ZVFH-NEXT: flh fa5, %lo(.LCPI7_0)(a0)
+; ZVFH-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
+; ZVFH-NEXT: vfabs.v v9, v8
+; ZVFH-NEXT: vmflt.vf v0, v9, fa5
+; ZVFH-NEXT: fsrmi a0, 2
+; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t
+; ZVFH-NEXT: fsrm a0
+; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t
+; ZVFH-NEXT: vsetvli zero, zero, e16, mf2, ta, mu
+; ZVFH-NEXT: vfsgnj.vv v8, v9, v8, v0.t
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: floor_nxv2f16:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; ZVFHMIN-NEXT: vfabs.v v8, v9
+; ZVFHMIN-NEXT: lui a0, 307200
+; ZVFHMIN-NEXT: fmv.w.x fa5, a0
+; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5
+; ZVFHMIN-NEXT: fsrmi a0, 2
+; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v9, v0.t
+; ZVFHMIN-NEXT: fsrm a0
+; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, mu
+; ZVFHMIN-NEXT: vfsgnj.vv v9, v8, v9, v0.t
+; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
+; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9
+; ZVFHMIN-NEXT: ret
+ %a = call <vscale x 2 x half> @llvm.floor.nxv2f16(<vscale x 2 x half> %x)
+ ret <vscale x 2 x half> %a
+}
+declare <vscale x 2 x half> @llvm.floor.nxv2f16(<vscale x 2 x half>)
+
+define <vscale x 4 x half> @floor_nxv4f16(<vscale x 4 x half> %x) {
+; ZVFH-LABEL: floor_nxv4f16:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: lui a0, %hi(.LCPI8_0)
+; ZVFH-NEXT: flh fa5, %lo(.LCPI8_0)(a0)
+; ZVFH-NEXT: vsetvli a0, zero, e16, m1, ta, ma
+; ZVFH-NEXT: vfabs.v v9, v8
+; ZVFH-NEXT: vmflt.vf v0, v9, fa5
+; ZVFH-NEXT: fsrmi a0, 2
+; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t
+; ZVFH-NEXT: fsrm a0
+; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t
+; ZVFH-NEXT: vsetvli zero, zero, e16, m1, ta, mu
+; ZVFH-NEXT: vfsgnj.vv v8, v9, v8, v0.t
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: floor_nxv4f16:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m1, ta, ma
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; ZVFHMIN-NEXT: vfabs.v v8, v10
+; ZVFHMIN-NEXT: lui a0, 307200
+; ZVFHMIN-NEXT: fmv.w.x fa5, a0
+; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5
+; ZVFHMIN-NEXT: fsrmi a0, 2
+; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v10, v0.t
+; ZVFHMIN-NEXT: fsrm a0
+; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, mu
+; ZVFHMIN-NEXT: vfsgnj.vv v10, v8, v10, v0.t
+; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
+; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10
+; ZVFHMIN-NEXT: ret
+ %a = call <vscale x 4 x half> @llvm.floor.nxv4f16(<vscale x 4 x half> %x)
+ ret <vscale x 4 x half> %a
+}
+declare <vscale x 4 x half> @llvm.floor.nxv4f16(<vscale x 4 x half>)
+
+define <vscale x 8 x half> @floor_nxv8f16(<vscale x 8 x half> %x) {
+; ZVFH-LABEL: floor_nxv8f16:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: lui a0, %hi(.LCPI9_0)
+; ZVFH-NEXT: flh fa5, %lo(.LCPI9_0)(a0)
+; ZVFH-NEXT: vsetvli a0, zero, e16, m2, ta, ma
+; ZVFH-NEXT: vfabs.v v10, v8
+; ZVFH-NEXT: vmflt.vf v0, v10, fa5
+; ZVFH-NEXT: fsrmi a0, 2
+; ZVFH-NEXT: vfcvt.x.f.v v10, v8, v0.t
+; ZVFH-NEXT: fsrm a0
+; ZVFH-NEXT: vfcvt.f.x.v v10, v10, v0.t
+; ZVFH-NEXT: vsetvli zero, zero, e16, m2, ta, mu
+; ZVFH-NEXT: vfsgnj.vv v8, v10, v8, v0.t
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: floor_nxv8f16:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m2, ta, ma
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; ZVFHMIN-NEXT: vfabs.v v8, v12
+; ZVFHMIN-NEXT: lui a0, 307200
+; ZVFHMIN-NEXT: fmv.w.x fa5, a0
+; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5
+; ZVFHMIN-NEXT: fsrmi a0, 2
+; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v12, v0.t
+; ZVFHMIN-NEXT: fsrm a0
+; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, mu
+; ZVFHMIN-NEXT: vfsgnj.vv v12, v8, v12, v0.t
+; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
+; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12
+; ZVFHMIN-NEXT: ret
+ %a = call <vscale x 8 x half> @llvm.floor.nxv8f16(<vscale x 8 x half> %x)
+ ret <vscale x 8 x half> %a
+}
+declare <vscale x 8 x half> @llvm.floor.nxv8f16(<vscale x 8 x half>)
+
+define <vscale x 16 x half> @floor_nxv16f16(<vscale x 16 x half> %x) {
+; ZVFH-LABEL: floor_nxv16f16:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: lui a0, %hi(.LCPI10_0)
+; ZVFH-NEXT: flh fa5, %lo(.LCPI10_0)(a0)
+; ZVFH-NEXT: vsetvli a0, zero, e16, m4, ta, ma
+; ZVFH-NEXT: vfabs.v v12, v8
+; ZVFH-NEXT: vmflt.vf v0, v12, fa5
+; ZVFH-NEXT: fsrmi a0, 2
+; ZVFH-NEXT: vfcvt.x.f.v v12, v8, v0.t
+; ZVFH-NEXT: fsrm a0
+; ZVFH-NEXT: vfcvt.f.x.v v12, v12, v0.t
+; ZVFH-NEXT: vsetvli zero, zero, e16, m4, ta, mu
+; ZVFH-NEXT: vfsgnj.vv v8, v12, v8, v0.t
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: floor_nxv16f16:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; ZVFHMIN-NEXT: vfabs.v v8, v16
+; ZVFHMIN-NEXT: lui a0, 307200
+; ZVFHMIN-NEXT: fmv.w.x fa5, a0
+; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5
+; ZVFHMIN-NEXT: fsrmi a0, 2
+; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v16, v0.t
+; ZVFHMIN-NEXT: fsrm a0
+; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, mu
+; ZVFHMIN-NEXT: vfsgnj.vv v16, v8, v16, v0.t
+; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
+; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16
+; ZVFHMIN-NEXT: ret
+ %a = call <vscale x 16 x half> @llvm.floor.nxv16f16(<vscale x 16 x half> %x)
+ ret <vscale x 16 x half> %a
+}
+declare <vscale x 16 x half> @llvm.floor.nxv16f16(<vscale x 16 x half>)
+
+define <vscale x 32 x half> @floor_nxv32f16(<vscale x 32 x half> %x) {
+; ZVFH-LABEL: floor_nxv32f16:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: lui a0, %hi(.LCPI11_0)
+; ZVFH-NEXT: flh fa5, %lo(.LCPI11_0)(a0)
+; ZVFH-NEXT: vsetvli a0, zero, e16, m8, ta, ma
+; ZVFH-NEXT: vfabs.v v16, v8
+; ZVFH-NEXT: vmflt.vf v0, v16, fa5
+; ZVFH-NEXT: fsrmi a0, 2
+; ZVFH-NEXT: vfcvt.x.f.v v16, v8, v0.t
+; ZVFH-NEXT: fsrm a0
+; ZVFH-NEXT: vfcvt.f.x.v v16, v16, v0.t
+; ZVFH-NEXT: vsetvli zero, zero, e16, m8, ta, mu
+; ZVFH-NEXT: vfsgnj.vv v8, v16, v8, v0.t
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: floor_nxv32f16:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; ZVFHMIN-NEXT: vfabs.v v24, v16
+; ZVFHMIN-NEXT: lui a0, 307200
+; ZVFHMIN-NEXT: fmv.w.x fa5, a0
+; ZVFHMIN-NEXT: vmflt.vf v0, v24, fa5
+; ZVFHMIN-NEXT: fsrmi a0, 2
+; ZVFHMIN-NEXT: vfcvt.x.f.v v24, v16, v0.t
+; ZVFHMIN-NEXT: fsrm a0
+; ZVFHMIN-NEXT: vfcvt.f.x.v v24, v24, v0.t
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, mu
+; ZVFHMIN-NEXT: vfsgnj.vv v16, v24, v16, v0.t
+; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
+; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; ZVFHMIN-NEXT: vfabs.v v24, v16
+; ZVFHMIN-NEXT: vmflt.vf v0, v24, fa5
+; ZVFHMIN-NEXT: fsrmi a0, 2
+; ZVFHMIN-NEXT: vfcvt.x.f.v v24, v16, v0.t
+; ZVFHMIN-NEXT: fsrm a0
+; ZVFHMIN-NEXT: vfcvt.f.x.v v24, v24, v0.t
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, mu
+; ZVFHMIN-NEXT: vfsgnj.vv v16, v24, v16, v0.t
+; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
+; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v16
+; ZVFHMIN-NEXT: ret
%a = call <vscale x 32 x half> @llvm.floor.nxv32f16(<vscale x 32 x half> %x)
ret <vscale x 32 x half> %a
}
@@ -227,8 +517,8 @@ declare <vscale x 16 x float> @llvm.floor.nxv16f32(<vscale x 16 x float>)
define <vscale x 1 x double> @floor_nxv1f64(<vscale x 1 x double> %x) {
; CHECK-LABEL: floor_nxv1f64:
; CHECK: # %bb.0:
-; CHECK-NEXT: lui a0, %hi(.LCPI11_0)
-; CHECK-NEXT: fld fa5, %lo(.LCPI11_0)(a0)
+; CHECK-NEXT: lui a0, %hi(.LCPI17_0)
+; CHECK-NEXT: fld fa5, %lo(.LCPI17_0)(a0)
; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma
; CHECK-NEXT: vfabs.v v9, v8
; CHECK-NEXT: vmflt.vf v0, v9, fa5
@@ -247,8 +537,8 @@ declare <vscale x 1 x double> @llvm.floor.nxv1f64(<vscale x 1 x double>)
define <vscale x 2 x double> @floor_nxv2f64(<vscale x 2 x double> %x) {
; CHECK-LABEL: floor_nxv2f64:
; CHECK: # %bb.0:
-; CHECK-NEXT: lui a0, %hi(.LCPI12_0)
-; CHECK-NEXT: fld fa5, %lo(.LCPI12_0)(a0)
+; CHECK-NEXT: lui a0, %hi(.LCPI18_0)
+; CHECK-NEXT: fld fa5, %lo(.LCPI18_0)(a0)
; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma
; CHECK-NEXT: vfabs.v v10, v8
; CHECK-NEXT: vmflt.vf v0, v10, fa5
@@ -267,8 +557,8 @@ declare <vscale x 2 x double> @llvm.floor.nxv2f64(<vscale x 2 x double>)
define <vscale x 4 x double> @floor_nxv4f64(<vscale x 4 x double> %x) {
; CHECK-LABEL: floor_nxv4f64:
; CHECK: # %bb.0:
-; CHECK-NEXT: lui a0, %hi(.LCPI13_0)
-; CHECK-NEXT: fld fa5, %lo(.LCPI13_0)(a0)
+; CHECK-NEXT: lui a0, %hi(.LCPI19_0)
+; CHECK-NEXT: fld fa5, %lo(.LCPI19_0)(a0)
; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma
; CHECK-NEXT: vfabs.v v12, v8
; CHECK-NEXT: vmflt.vf v0, v12, fa5
@@ -287,8 +577,8 @@ declare <vscale x 4 x double> @llvm.floor.nxv4f64(<vscale x 4 x double>)
define <vscale x 8 x double> @floor_nxv8f64(<vscale x 8 x double> %x) {
; CHECK-LABEL: floor_nxv8f64:
; CHECK: # %bb.0:
-; CHECK-NEXT: lui a0, %hi(.LCPI14_0)
-; CHECK-NEXT: fld fa5, %lo(.LCPI14_0)(a0)
+; CHECK-NEXT: lui a0, %hi(.LCPI20_0)
+; CHECK-NEXT: fld fa5, %lo(.LCPI20_0)(a0)
; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma
; CHECK-NEXT: vfabs.v v16, v8
; CHECK-NEXT: vmflt.vf v0, v16, fa5
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll
index d996a9c05aca..b5c40fbfaac6 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll
@@ -5545,457 +5545,24 @@ define void @trunc_v8f16(ptr %x) {
; ZVFH-NEXT: vse16.v v8, (a0)
; ZVFH-NEXT: ret
;
-; ZVFHMIN-ZFH-RV32-LABEL: trunc_v8f16:
-; ZVFHMIN-ZFH-RV32: # %bb.0:
-; ZVFHMIN-ZFH-RV32-NEXT: addi sp, sp, -16
-; ZVFHMIN-ZFH-RV32-NEXT: .cfi_def_cfa_offset 16
-; ZVFHMIN-ZFH-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMIN-ZFH-RV32-NEXT: vle16.v v8, (a0)
-; ZVFHMIN-ZFH-RV32-NEXT: mv a1, sp
-; ZVFHMIN-ZFH-RV32-NEXT: vse16.v v8, (a1)
-; ZVFHMIN-ZFH-RV32-NEXT: flh fa4, 2(sp)
-; ZVFHMIN-ZFH-RV32-NEXT: lui a1, %hi(.LCPI115_0)
-; ZVFHMIN-ZFH-RV32-NEXT: flh fa5, %lo(.LCPI115_0)(a1)
-; ZVFHMIN-ZFH-RV32-NEXT: fabs.h fa3, fa4
-; ZVFHMIN-ZFH-RV32-NEXT: flt.h a1, fa3, fa5
-; ZVFHMIN-ZFH-RV32-NEXT: beqz a1, .LBB115_2
-; ZVFHMIN-ZFH-RV32-NEXT: # %bb.1:
-; ZVFHMIN-ZFH-RV32-NEXT: fcvt.w.h a1, fa4, rtz
-; ZVFHMIN-ZFH-RV32-NEXT: fcvt.h.w fa3, a1, rtz
-; ZVFHMIN-ZFH-RV32-NEXT: fsgnj.h fa4, fa3, fa4
-; ZVFHMIN-ZFH-RV32-NEXT: .LBB115_2:
-; ZVFHMIN-ZFH-RV32-NEXT: flh fa1, 0(sp)
-; ZVFHMIN-ZFH-RV32-NEXT: fabs.h fa3, fa1
-; ZVFHMIN-ZFH-RV32-NEXT: flt.h a1, fa3, fa5
-; ZVFHMIN-ZFH-RV32-NEXT: beqz a1, .LBB115_4
-; ZVFHMIN-ZFH-RV32-NEXT: # %bb.3:
-; ZVFHMIN-ZFH-RV32-NEXT: fcvt.w.h a1, fa1, rtz
-; ZVFHMIN-ZFH-RV32-NEXT: fcvt.h.w fa3, a1, rtz
-; ZVFHMIN-ZFH-RV32-NEXT: fsgnj.h fa1, fa3, fa1
-; ZVFHMIN-ZFH-RV32-NEXT: .LBB115_4:
-; ZVFHMIN-ZFH-RV32-NEXT: flh fa2, 4(sp)
-; ZVFHMIN-ZFH-RV32-NEXT: fabs.h fa3, fa2
-; ZVFHMIN-ZFH-RV32-NEXT: flt.h a1, fa3, fa5
-; ZVFHMIN-ZFH-RV32-NEXT: beqz a1, .LBB115_6
-; ZVFHMIN-ZFH-RV32-NEXT: # %bb.5:
-; ZVFHMIN-ZFH-RV32-NEXT: fcvt.w.h a1, fa2, rtz
-; ZVFHMIN-ZFH-RV32-NEXT: fcvt.h.w fa3, a1, rtz
-; ZVFHMIN-ZFH-RV32-NEXT: fsgnj.h fa2, fa3, fa2
-; ZVFHMIN-ZFH-RV32-NEXT: .LBB115_6:
-; ZVFHMIN-ZFH-RV32-NEXT: flh fa3, 6(sp)
-; ZVFHMIN-ZFH-RV32-NEXT: fabs.h fa0, fa3
-; ZVFHMIN-ZFH-RV32-NEXT: flt.h a1, fa0, fa5
-; ZVFHMIN-ZFH-RV32-NEXT: fmv.x.h a2, fa1
-; ZVFHMIN-ZFH-RV32-NEXT: beqz a1, .LBB115_8
-; ZVFHMIN-ZFH-RV32-NEXT: # %bb.7:
-; ZVFHMIN-ZFH-RV32-NEXT: fcvt.w.h a1, fa3, rtz
-; ZVFHMIN-ZFH-RV32-NEXT: fcvt.h.w fa1, a1, rtz
-; ZVFHMIN-ZFH-RV32-NEXT: fsgnj.h fa3, fa1, fa3
-; ZVFHMIN-ZFH-RV32-NEXT: .LBB115_8:
-; ZVFHMIN-ZFH-RV32-NEXT: flh fa1, 10(sp)
-; ZVFHMIN-ZFH-RV32-NEXT: fmv.x.h a1, fa4
-; ZVFHMIN-ZFH-RV32-NEXT: fabs.h fa4, fa1
-; ZVFHMIN-ZFH-RV32-NEXT: flt.h a3, fa4, fa5
-; ZVFHMIN-ZFH-RV32-NEXT: vmv.v.x v8, a2
-; ZVFHMIN-ZFH-RV32-NEXT: beqz a3, .LBB115_10
-; ZVFHMIN-ZFH-RV32-NEXT: # %bb.9:
-; ZVFHMIN-ZFH-RV32-NEXT: fcvt.w.h a2, fa1, rtz
-; ZVFHMIN-ZFH-RV32-NEXT: fcvt.h.w fa4, a2, rtz
-; ZVFHMIN-ZFH-RV32-NEXT: fsgnj.h fa1, fa4, fa1
-; ZVFHMIN-ZFH-RV32-NEXT: .LBB115_10:
-; ZVFHMIN-ZFH-RV32-NEXT: flh fa4, 8(sp)
-; ZVFHMIN-ZFH-RV32-NEXT: vslide1down.vx v8, v8, a1
-; ZVFHMIN-ZFH-RV32-NEXT: fmv.x.h a2, fa2
-; ZVFHMIN-ZFH-RV32-NEXT: fabs.h fa2, fa4
-; ZVFHMIN-ZFH-RV32-NEXT: flt.h a3, fa2, fa5
-; ZVFHMIN-ZFH-RV32-NEXT: fmv.x.h a1, fa1
-; ZVFHMIN-ZFH-RV32-NEXT: beqz a3, .LBB115_12
-; ZVFHMIN-ZFH-RV32-NEXT: # %bb.11:
-; ZVFHMIN-ZFH-RV32-NEXT: fcvt.w.h a3, fa4, rtz
-; ZVFHMIN-ZFH-RV32-NEXT: fcvt.h.w fa2, a3, rtz
-; ZVFHMIN-ZFH-RV32-NEXT: fsgnj.h fa4, fa2, fa4
-; ZVFHMIN-ZFH-RV32-NEXT: .LBB115_12:
-; ZVFHMIN-ZFH-RV32-NEXT: vslide1down.vx v8, v8, a2
-; ZVFHMIN-ZFH-RV32-NEXT: flh fa2, 12(sp)
-; ZVFHMIN-ZFH-RV32-NEXT: fmv.x.h a2, fa3
-; ZVFHMIN-ZFH-RV32-NEXT: fmv.x.h a3, fa4
-; ZVFHMIN-ZFH-RV32-NEXT: vmv.v.x v9, a3
-; ZVFHMIN-ZFH-RV32-NEXT: fabs.h fa4, fa2
-; ZVFHMIN-ZFH-RV32-NEXT: flt.h a3, fa4, fa5
-; ZVFHMIN-ZFH-RV32-NEXT: vslide1down.vx v9, v9, a1
-; ZVFHMIN-ZFH-RV32-NEXT: beqz a3, .LBB115_14
-; ZVFHMIN-ZFH-RV32-NEXT: # %bb.13:
-; ZVFHMIN-ZFH-RV32-NEXT: fcvt.w.h a1, fa2, rtz
-; ZVFHMIN-ZFH-RV32-NEXT: fcvt.h.w fa4, a1, rtz
-; ZVFHMIN-ZFH-RV32-NEXT: fsgnj.h fa2, fa4, fa2
-; ZVFHMIN-ZFH-RV32-NEXT: .LBB115_14:
-; ZVFHMIN-ZFH-RV32-NEXT: flh fa4, 14(sp)
-; ZVFHMIN-ZFH-RV32-NEXT: vslide1down.vx v8, v8, a2
-; ZVFHMIN-ZFH-RV32-NEXT: fmv.x.h a1, fa2
-; ZVFHMIN-ZFH-RV32-NEXT: fabs.h fa3, fa4
-; ZVFHMIN-ZFH-RV32-NEXT: flt.h a2, fa3, fa5
-; ZVFHMIN-ZFH-RV32-NEXT: vslide1down.vx v9, v9, a1
-; ZVFHMIN-ZFH-RV32-NEXT: beqz a2, .LBB115_16
-; ZVFHMIN-ZFH-RV32-NEXT: # %bb.15:
-; ZVFHMIN-ZFH-RV32-NEXT: fcvt.w.h a1, fa4, rtz
-; ZVFHMIN-ZFH-RV32-NEXT: fcvt.h.w fa5, a1, rtz
-; ZVFHMIN-ZFH-RV32-NEXT: fsgnj.h fa4, fa5, fa4
-; ZVFHMIN-ZFH-RV32-NEXT: .LBB115_16:
-; ZVFHMIN-ZFH-RV32-NEXT: fmv.x.h a1, fa4
-; ZVFHMIN-ZFH-RV32-NEXT: vmv.v.i v0, 15
-; ZVFHMIN-ZFH-RV32-NEXT: vslide1down.vx v9, v9, a1
-; ZVFHMIN-ZFH-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, mu
-; ZVFHMIN-ZFH-RV32-NEXT: vslidedown.vi v9, v8, 4, v0.t
-; ZVFHMIN-ZFH-RV32-NEXT: vse16.v v9, (a0)
-; ZVFHMIN-ZFH-RV32-NEXT: addi sp, sp, 16
-; ZVFHMIN-ZFH-RV32-NEXT: ret
-;
-; ZVFHMIN-ZFH-RV64-LABEL: trunc_v8f16:
-; ZVFHMIN-ZFH-RV64: # %bb.0:
-; ZVFHMIN-ZFH-RV64-NEXT: addi sp, sp, -16
-; ZVFHMIN-ZFH-RV64-NEXT: .cfi_def_cfa_offset 16
-; ZVFHMIN-ZFH-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMIN-ZFH-RV64-NEXT: vle16.v v8, (a0)
-; ZVFHMIN-ZFH-RV64-NEXT: mv a1, sp
-; ZVFHMIN-ZFH-RV64-NEXT: vse16.v v8, (a1)
-; ZVFHMIN-ZFH-RV64-NEXT: flh fa4, 2(sp)
-; ZVFHMIN-ZFH-RV64-NEXT: lui a1, %hi(.LCPI115_0)
-; ZVFHMIN-ZFH-RV64-NEXT: flh fa5, %lo(.LCPI115_0)(a1)
-; ZVFHMIN-ZFH-RV64-NEXT: fabs.h fa3, fa4
-; ZVFHMIN-ZFH-RV64-NEXT: flt.h a1, fa3, fa5
-; ZVFHMIN-ZFH-RV64-NEXT: beqz a1, .LBB115_2
-; ZVFHMIN-ZFH-RV64-NEXT: # %bb.1:
-; ZVFHMIN-ZFH-RV64-NEXT: fcvt.w.h a1, fa4, rtz
-; ZVFHMIN-ZFH-RV64-NEXT: fcvt.h.w fa3, a1, rtz
-; ZVFHMIN-ZFH-RV64-NEXT: fsgnj.h fa4, fa3, fa4
-; ZVFHMIN-ZFH-RV64-NEXT: .LBB115_2:
-; ZVFHMIN-ZFH-RV64-NEXT: flh fa1, 0(sp)
-; ZVFHMIN-ZFH-RV64-NEXT: fabs.h fa3, fa1
-; ZVFHMIN-ZFH-RV64-NEXT: flt.h a1, fa3, fa5
-; ZVFHMIN-ZFH-RV64-NEXT: beqz a1, .LBB115_4
-; ZVFHMIN-ZFH-RV64-NEXT: # %bb.3:
-; ZVFHMIN-ZFH-RV64-NEXT: fcvt.w.h a1, fa1, rtz
-; ZVFHMIN-ZFH-RV64-NEXT: fcvt.h.w fa3, a1, rtz
-; ZVFHMIN-ZFH-RV64-NEXT: fsgnj.h fa1, fa3, fa1
-; ZVFHMIN-ZFH-RV64-NEXT: .LBB115_4:
-; ZVFHMIN-ZFH-RV64-NEXT: flh fa2, 4(sp)
-; ZVFHMIN-ZFH-RV64-NEXT: fabs.h fa3, fa2
-; ZVFHMIN-ZFH-RV64-NEXT: flt.h a1, fa3, fa5
-; ZVFHMIN-ZFH-RV64-NEXT: beqz a1, .LBB115_6
-; ZVFHMIN-ZFH-RV64-NEXT: # %bb.5:
-; ZVFHMIN-ZFH-RV64-NEXT: fcvt.w.h a1, fa2, rtz
-; ZVFHMIN-ZFH-RV64-NEXT: fcvt.h.w fa3, a1, rtz
-; ZVFHMIN-ZFH-RV64-NEXT: fsgnj.h fa2, fa3, fa2
-; ZVFHMIN-ZFH-RV64-NEXT: .LBB115_6:
-; ZVFHMIN-ZFH-RV64-NEXT: flh fa3, 6(sp)
-; ZVFHMIN-ZFH-RV64-NEXT: fabs.h fa0, fa3
-; ZVFHMIN-ZFH-RV64-NEXT: flt.h a1, fa0, fa5
-; ZVFHMIN-ZFH-RV64-NEXT: fmv.x.h a2, fa1
-; ZVFHMIN-ZFH-RV64-NEXT: beqz a1, .LBB115_8
-; ZVFHMIN-ZFH-RV64-NEXT: # %bb.7:
-; ZVFHMIN-ZFH-RV64-NEXT: fcvt.w.h a1, fa3, rtz
-; ZVFHMIN-ZFH-RV64-NEXT: fcvt.h.w fa1, a1, rtz
-; ZVFHMIN-ZFH-RV64-NEXT: fsgnj.h fa3, fa1, fa3
-; ZVFHMIN-ZFH-RV64-NEXT: .LBB115_8:
-; ZVFHMIN-ZFH-RV64-NEXT: flh fa1, 10(sp)
-; ZVFHMIN-ZFH-RV64-NEXT: fmv.x.h a1, fa4
-; ZVFHMIN-ZFH-RV64-NEXT: fabs.h fa4, fa1
-; ZVFHMIN-ZFH-RV64-NEXT: flt.h a3, fa4, fa5
-; ZVFHMIN-ZFH-RV64-NEXT: vmv.v.x v8, a2
-; ZVFHMIN-ZFH-RV64-NEXT: beqz a3, .LBB115_10
-; ZVFHMIN-ZFH-RV64-NEXT: # %bb.9:
-; ZVFHMIN-ZFH-RV64-NEXT: fcvt.w.h a2, fa1, rtz
-; ZVFHMIN-ZFH-RV64-NEXT: fcvt.h.w fa4, a2, rtz
-; ZVFHMIN-ZFH-RV64-NEXT: fsgnj.h fa1, fa4, fa1
-; ZVFHMIN-ZFH-RV64-NEXT: .LBB115_10:
-; ZVFHMIN-ZFH-RV64-NEXT: flh fa4, 8(sp)
-; ZVFHMIN-ZFH-RV64-NEXT: vslide1down.vx v8, v8, a1
-; ZVFHMIN-ZFH-RV64-NEXT: fmv.x.h a2, fa2
-; ZVFHMIN-ZFH-RV64-NEXT: fabs.h fa2, fa4
-; ZVFHMIN-ZFH-RV64-NEXT: flt.h a3, fa2, fa5
-; ZVFHMIN-ZFH-RV64-NEXT: fmv.x.h a1, fa1
-; ZVFHMIN-ZFH-RV64-NEXT: beqz a3, .LBB115_12
-; ZVFHMIN-ZFH-RV64-NEXT: # %bb.11:
-; ZVFHMIN-ZFH-RV64-NEXT: fcvt.w.h a3, fa4, rtz
-; ZVFHMIN-ZFH-RV64-NEXT: fcvt.h.w fa2, a3, rtz
-; ZVFHMIN-ZFH-RV64-NEXT: fsgnj.h fa4, fa2, fa4
-; ZVFHMIN-ZFH-RV64-NEXT: .LBB115_12:
-; ZVFHMIN-ZFH-RV64-NEXT: vslide1down.vx v8, v8, a2
-; ZVFHMIN-ZFH-RV64-NEXT: flh fa2, 12(sp)
-; ZVFHMIN-ZFH-RV64-NEXT: fmv.x.h a2, fa3
-; ZVFHMIN-ZFH-RV64-NEXT: fmv.x.h a3, fa4
-; ZVFHMIN-ZFH-RV64-NEXT: vmv.v.x v9, a3
-; ZVFHMIN-ZFH-RV64-NEXT: fabs.h fa4, fa2
-; ZVFHMIN-ZFH-RV64-NEXT: flt.h a3, fa4, fa5
-; ZVFHMIN-ZFH-RV64-NEXT: vslide1down.vx v9, v9, a1
-; ZVFHMIN-ZFH-RV64-NEXT: beqz a3, .LBB115_14
-; ZVFHMIN-ZFH-RV64-NEXT: # %bb.13:
-; ZVFHMIN-ZFH-RV64-NEXT: fcvt.w.h a1, fa2, rtz
-; ZVFHMIN-ZFH-RV64-NEXT: fcvt.h.w fa4, a1, rtz
-; ZVFHMIN-ZFH-RV64-NEXT: fsgnj.h fa2, fa4, fa2
-; ZVFHMIN-ZFH-RV64-NEXT: .LBB115_14:
-; ZVFHMIN-ZFH-RV64-NEXT: flh fa4, 14(sp)
-; ZVFHMIN-ZFH-RV64-NEXT: vslide1down.vx v8, v8, a2
-; ZVFHMIN-ZFH-RV64-NEXT: fmv.x.h a1, fa2
-; ZVFHMIN-ZFH-RV64-NEXT: fabs.h fa3, fa4
-; ZVFHMIN-ZFH-RV64-NEXT: flt.h a2, fa3, fa5
-; ZVFHMIN-ZFH-RV64-NEXT: vslide1down.vx v9, v9, a1
-; ZVFHMIN-ZFH-RV64-NEXT: beqz a2, .LBB115_16
-; ZVFHMIN-ZFH-RV64-NEXT: # %bb.15:
-; ZVFHMIN-ZFH-RV64-NEXT: fcvt.w.h a1, fa4, rtz
-; ZVFHMIN-ZFH-RV64-NEXT: fcvt.h.w fa5, a1, rtz
-; ZVFHMIN-ZFH-RV64-NEXT: fsgnj.h fa4, fa5, fa4
-; ZVFHMIN-ZFH-RV64-NEXT: .LBB115_16:
-; ZVFHMIN-ZFH-RV64-NEXT: fmv.x.h a1, fa4
-; ZVFHMIN-ZFH-RV64-NEXT: vmv.v.i v0, 15
-; ZVFHMIN-ZFH-RV64-NEXT: vslide1down.vx v9, v9, a1
-; ZVFHMIN-ZFH-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, mu
-; ZVFHMIN-ZFH-RV64-NEXT: vslidedown.vi v9, v8, 4, v0.t
-; ZVFHMIN-ZFH-RV64-NEXT: vse16.v v9, (a0)
-; ZVFHMIN-ZFH-RV64-NEXT: addi sp, sp, 16
-; ZVFHMIN-ZFH-RV64-NEXT: ret
-;
-; ZVFHMIN-ZFHIN-RV32-LABEL: trunc_v8f16:
-; ZVFHMIN-ZFHIN-RV32: # %bb.0:
-; ZVFHMIN-ZFHIN-RV32-NEXT: addi sp, sp, -16
-; ZVFHMIN-ZFHIN-RV32-NEXT: .cfi_def_cfa_offset 16
-; ZVFHMIN-ZFHIN-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMIN-ZFHIN-RV32-NEXT: vle16.v v8, (a0)
-; ZVFHMIN-ZFHIN-RV32-NEXT: mv a1, sp
-; ZVFHMIN-ZFHIN-RV32-NEXT: vse16.v v8, (a1)
-; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 2(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT: fcvt.s.h fa4, fa5
-; ZVFHMIN-ZFHIN-RV32-NEXT: lui a1, 307200
-; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.w.x fa5, a1
-; ZVFHMIN-ZFHIN-RV32-NEXT: fabs.s fa3, fa4
-; ZVFHMIN-ZFHIN-RV32-NEXT: flt.s a1, fa3, fa5
-; ZVFHMIN-ZFHIN-RV32-NEXT: beqz a1, .LBB115_2
-; ZVFHMIN-ZFHIN-RV32-NEXT: # %bb.1:
-; ZVFHMIN-ZFHIN-RV32-NEXT: fcvt.w.s a1, fa4, rtz
-; ZVFHMIN-ZFHIN-RV32-NEXT: fcvt.s.w fa3, a1, rtz
-; ZVFHMIN-ZFHIN-RV32-NEXT: fsgnj.s fa4, fa3, fa4
-; ZVFHMIN-ZFHIN-RV32-NEXT: .LBB115_2:
-; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa3, 0(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT: fcvt.s.h fa2, fa3
-; ZVFHMIN-ZFHIN-RV32-NEXT: fabs.s fa3, fa2
-; ZVFHMIN-ZFHIN-RV32-NEXT: flt.s a1, fa3, fa5
-; ZVFHMIN-ZFHIN-RV32-NEXT: beqz a1, .LBB115_4
-; ZVFHMIN-ZFHIN-RV32-NEXT: # %bb.3:
-; ZVFHMIN-ZFHIN-RV32-NEXT: fcvt.w.s a1, fa2, rtz
-; ZVFHMIN-ZFHIN-RV32-NEXT: fcvt.s.w fa3, a1, rtz
-; ZVFHMIN-ZFHIN-RV32-NEXT: fsgnj.s fa2, fa3, fa2
-; ZVFHMIN-ZFHIN-RV32-NEXT: .LBB115_4:
-; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa3, 4(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT: fcvt.s.h fa3, fa3
-; ZVFHMIN-ZFHIN-RV32-NEXT: fabs.s fa1, fa3
-; ZVFHMIN-ZFHIN-RV32-NEXT: flt.s a1, fa1, fa5
-; ZVFHMIN-ZFHIN-RV32-NEXT: fcvt.h.s fa1, fa2
-; ZVFHMIN-ZFHIN-RV32-NEXT: beqz a1, .LBB115_6
-; ZVFHMIN-ZFHIN-RV32-NEXT: # %bb.5:
-; ZVFHMIN-ZFHIN-RV32-NEXT: fcvt.w.s a1, fa3, rtz
-; ZVFHMIN-ZFHIN-RV32-NEXT: fcvt.s.w fa2, a1, rtz
-; ZVFHMIN-ZFHIN-RV32-NEXT: fsgnj.s fa3, fa2, fa3
-; ZVFHMIN-ZFHIN-RV32-NEXT: .LBB115_6:
-; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa0, 6(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT: fcvt.h.s fa2, fa4
-; ZVFHMIN-ZFHIN-RV32-NEXT: fcvt.s.h fa4, fa0
-; ZVFHMIN-ZFHIN-RV32-NEXT: fabs.s fa0, fa4
-; ZVFHMIN-ZFHIN-RV32-NEXT: flt.s a1, fa0, fa5
-; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a2, fa1
-; ZVFHMIN-ZFHIN-RV32-NEXT: beqz a1, .LBB115_8
-; ZVFHMIN-ZFHIN-RV32-NEXT: # %bb.7:
-; ZVFHMIN-ZFHIN-RV32-NEXT: fcvt.w.s a1, fa4, rtz
-; ZVFHMIN-ZFHIN-RV32-NEXT: fcvt.s.w fa1, a1, rtz
-; ZVFHMIN-ZFHIN-RV32-NEXT: fsgnj.s fa4, fa1, fa4
-; ZVFHMIN-ZFHIN-RV32-NEXT: .LBB115_8:
-; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa1, 10(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a1, fa2
-; ZVFHMIN-ZFHIN-RV32-NEXT: vmv.v.x v8, a2
-; ZVFHMIN-ZFHIN-RV32-NEXT: fcvt.s.h fa2, fa1
-; ZVFHMIN-ZFHIN-RV32-NEXT: fabs.s fa1, fa2
-; ZVFHMIN-ZFHIN-RV32-NEXT: flt.s a2, fa1, fa5
-; ZVFHMIN-ZFHIN-RV32-NEXT: fcvt.h.s fa3, fa3
-; ZVFHMIN-ZFHIN-RV32-NEXT: beqz a2, .LBB115_10
-; ZVFHMIN-ZFHIN-RV32-NEXT: # %bb.9:
-; ZVFHMIN-ZFHIN-RV32-NEXT: fcvt.w.s a2, fa2, rtz
-; ZVFHMIN-ZFHIN-RV32-NEXT: fcvt.s.w fa1, a2, rtz
-; ZVFHMIN-ZFHIN-RV32-NEXT: fsgnj.s fa2, fa1, fa2
-; ZVFHMIN-ZFHIN-RV32-NEXT: .LBB115_10:
-; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v8, v8, a1
-; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa1, 8(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a1, fa3
-; ZVFHMIN-ZFHIN-RV32-NEXT: fcvt.h.s fa3, fa4
-; ZVFHMIN-ZFHIN-RV32-NEXT: fcvt.h.s fa2, fa2
-; ZVFHMIN-ZFHIN-RV32-NEXT: fcvt.s.h fa4, fa1
-; ZVFHMIN-ZFHIN-RV32-NEXT: fabs.s fa1, fa4
-; ZVFHMIN-ZFHIN-RV32-NEXT: flt.s a3, fa1, fa5
-; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a2, fa2
-; ZVFHMIN-ZFHIN-RV32-NEXT: beqz a3, .LBB115_12
-; ZVFHMIN-ZFHIN-RV32-NEXT: # %bb.11:
-; ZVFHMIN-ZFHIN-RV32-NEXT: fcvt.w.s a3, fa4, rtz
-; ZVFHMIN-ZFHIN-RV32-NEXT: fcvt.s.w fa2, a3, rtz
-; ZVFHMIN-ZFHIN-RV32-NEXT: fsgnj.s fa4, fa2, fa4
-; ZVFHMIN-ZFHIN-RV32-NEXT: .LBB115_12:
-; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v8, v8, a1
-; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a1, fa3
-; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa3, 12(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT: fcvt.h.s fa4, fa4
-; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a3, fa4
-; ZVFHMIN-ZFHIN-RV32-NEXT: vmv.v.x v9, a3
-; ZVFHMIN-ZFHIN-RV32-NEXT: fcvt.s.h fa4, fa3
-; ZVFHMIN-ZFHIN-RV32-NEXT: fabs.s fa3, fa4
-; ZVFHMIN-ZFHIN-RV32-NEXT: flt.s a3, fa3, fa5
-; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v9, v9, a2
-; ZVFHMIN-ZFHIN-RV32-NEXT: beqz a3, .LBB115_14
-; ZVFHMIN-ZFHIN-RV32-NEXT: # %bb.13:
-; ZVFHMIN-ZFHIN-RV32-NEXT: fcvt.w.s a2, fa4, rtz
-; ZVFHMIN-ZFHIN-RV32-NEXT: fcvt.s.w fa3, a2, rtz
-; ZVFHMIN-ZFHIN-RV32-NEXT: fsgnj.s fa4, fa3, fa4
-; ZVFHMIN-ZFHIN-RV32-NEXT: .LBB115_14:
-; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa3, 14(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v8, v8, a1
-; ZVFHMIN-ZFHIN-RV32-NEXT: fcvt.h.s fa4, fa4
-; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a1, fa4
-; ZVFHMIN-ZFHIN-RV32-NEXT: fcvt.s.h fa4, fa3
-; ZVFHMIN-ZFHIN-RV32-NEXT: fabs.s fa3, fa4
-; ZVFHMIN-ZFHIN-RV32-NEXT: flt.s a2, fa3, fa5
-; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v9, v9, a1
-; ZVFHMIN-ZFHIN-RV32-NEXT: beqz a2, .LBB115_16
-; ZVFHMIN-ZFHIN-RV32-NEXT: # %bb.15:
-; ZVFHMIN-ZFHIN-RV32-NEXT: fcvt.w.s a1, fa4, rtz
-; ZVFHMIN-ZFHIN-RV32-NEXT: fcvt.s.w fa5, a1, rtz
-; ZVFHMIN-ZFHIN-RV32-NEXT: fsgnj.s fa4, fa5, fa4
-; ZVFHMIN-ZFHIN-RV32-NEXT: .LBB115_16:
-; ZVFHMIN-ZFHIN-RV32-NEXT: fcvt.h.s fa5, fa4
-; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a1, fa5
-; ZVFHMIN-ZFHIN-RV32-NEXT: vmv.v.i v0, 15
-; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v9, v9, a1
-; ZVFHMIN-ZFHIN-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, mu
-; ZVFHMIN-ZFHIN-RV32-NEXT: vslidedown.vi v9, v8, 4, v0.t
-; ZVFHMIN-ZFHIN-RV32-NEXT: vse16.v v9, (a0)
-; ZVFHMIN-ZFHIN-RV32-NEXT: addi sp, sp, 16
-; ZVFHMIN-ZFHIN-RV32-NEXT: ret
-;
-; ZVFHMIN-ZFHIN-RV64-LABEL: trunc_v8f16:
-; ZVFHMIN-ZFHIN-RV64: # %bb.0:
-; ZVFHMIN-ZFHIN-RV64-NEXT: addi sp, sp, -16
-; ZVFHMIN-ZFHIN-RV64-NEXT: .cfi_def_cfa_offset 16
-; ZVFHMIN-ZFHIN-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMIN-ZFHIN-RV64-NEXT: vle16.v v8, (a0)
-; ZVFHMIN-ZFHIN-RV64-NEXT: mv a1, sp
-; ZVFHMIN-ZFHIN-RV64-NEXT: vse16.v v8, (a1)
-; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 2(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT: fcvt.s.h fa4, fa5
-; ZVFHMIN-ZFHIN-RV64-NEXT: lui a1, 307200
-; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.w.x fa5, a1
-; ZVFHMIN-ZFHIN-RV64-NEXT: fabs.s fa3, fa4
-; ZVFHMIN-ZFHIN-RV64-NEXT: flt.s a1, fa3, fa5
-; ZVFHMIN-ZFHIN-RV64-NEXT: beqz a1, .LBB115_2
-; ZVFHMIN-ZFHIN-RV64-NEXT: # %bb.1:
-; ZVFHMIN-ZFHIN-RV64-NEXT: fcvt.w.s a1, fa4, rtz
-; ZVFHMIN-ZFHIN-RV64-NEXT: fcvt.s.w fa3, a1, rtz
-; ZVFHMIN-ZFHIN-RV64-NEXT: fsgnj.s fa4, fa3, fa4
-; ZVFHMIN-ZFHIN-RV64-NEXT: .LBB115_2:
-; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa3, 0(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT: fcvt.s.h fa2, fa3
-; ZVFHMIN-ZFHIN-RV64-NEXT: fabs.s fa3, fa2
-; ZVFHMIN-ZFHIN-RV64-NEXT: flt.s a1, fa3, fa5
-; ZVFHMIN-ZFHIN-RV64-NEXT: beqz a1, .LBB115_4
-; ZVFHMIN-ZFHIN-RV64-NEXT: # %bb.3:
-; ZVFHMIN-ZFHIN-RV64-NEXT: fcvt.w.s a1, fa2, rtz
-; ZVFHMIN-ZFHIN-RV64-NEXT: fcvt.s.w fa3, a1, rtz
-; ZVFHMIN-ZFHIN-RV64-NEXT: fsgnj.s fa2, fa3, fa2
-; ZVFHMIN-ZFHIN-RV64-NEXT: .LBB115_4:
-; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa3, 4(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT: fcvt.s.h fa3, fa3
-; ZVFHMIN-ZFHIN-RV64-NEXT: fabs.s fa1, fa3
-; ZVFHMIN-ZFHIN-RV64-NEXT: flt.s a1, fa1, fa5
-; ZVFHMIN-ZFHIN-RV64-NEXT: fcvt.h.s fa1, fa2
-; ZVFHMIN-ZFHIN-RV64-NEXT: beqz a1, .LBB115_6
-; ZVFHMIN-ZFHIN-RV64-NEXT: # %bb.5:
-; ZVFHMIN-ZFHIN-RV64-NEXT: fcvt.w.s a1, fa3, rtz
-; ZVFHMIN-ZFHIN-RV64-NEXT: fcvt.s.w fa2, a1, rtz
-; ZVFHMIN-ZFHIN-RV64-NEXT: fsgnj.s fa3, fa2, fa3
-; ZVFHMIN-ZFHIN-RV64-NEXT: .LBB115_6:
-; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa0, 6(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT: fcvt.h.s fa2, fa4
-; ZVFHMIN-ZFHIN-RV64-NEXT: fcvt.s.h fa4, fa0
-; ZVFHMIN-ZFHIN-RV64-NEXT: fabs.s fa0, fa4
-; ZVFHMIN-ZFHIN-RV64-NEXT: flt.s a1, fa0, fa5
-; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a2, fa1
-; ZVFHMIN-ZFHIN-RV64-NEXT: beqz a1, .LBB115_8
-; ZVFHMIN-ZFHIN-RV64-NEXT: # %bb.7:
-; ZVFHMIN-ZFHIN-RV64-NEXT: fcvt.w.s a1, fa4, rtz
-; ZVFHMIN-ZFHIN-RV64-NEXT: fcvt.s.w fa1, a1, rtz
-; ZVFHMIN-ZFHIN-RV64-NEXT: fsgnj.s fa4, fa1, fa4
-; ZVFHMIN-ZFHIN-RV64-NEXT: .LBB115_8:
-; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa1, 10(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a1, fa2
-; ZVFHMIN-ZFHIN-RV64-NEXT: vmv.v.x v8, a2
-; ZVFHMIN-ZFHIN-RV64-NEXT: fcvt.s.h fa2, fa1
-; ZVFHMIN-ZFHIN-RV64-NEXT: fabs.s fa1, fa2
-; ZVFHMIN-ZFHIN-RV64-NEXT: flt.s a2, fa1, fa5
-; ZVFHMIN-ZFHIN-RV64-NEXT: fcvt.h.s fa3, fa3
-; ZVFHMIN-ZFHIN-RV64-NEXT: beqz a2, .LBB115_10
-; ZVFHMIN-ZFHIN-RV64-NEXT: # %bb.9:
-; ZVFHMIN-ZFHIN-RV64-NEXT: fcvt.w.s a2, fa2, rtz
-; ZVFHMIN-ZFHIN-RV64-NEXT: fcvt.s.w fa1, a2, rtz
-; ZVFHMIN-ZFHIN-RV64-NEXT: fsgnj.s fa2, fa1, fa2
-; ZVFHMIN-ZFHIN-RV64-NEXT: .LBB115_10:
-; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v8, v8, a1
-; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa1, 8(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a1, fa3
-; ZVFHMIN-ZFHIN-RV64-NEXT: fcvt.h.s fa3, fa4
-; ZVFHMIN-ZFHIN-RV64-NEXT: fcvt.h.s fa2, fa2
-; ZVFHMIN-ZFHIN-RV64-NEXT: fcvt.s.h fa4, fa1
-; ZVFHMIN-ZFHIN-RV64-NEXT: fabs.s fa1, fa4
-; ZVFHMIN-ZFHIN-RV64-NEXT: flt.s a3, fa1, fa5
-; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a2, fa2
-; ZVFHMIN-ZFHIN-RV64-NEXT: beqz a3, .LBB115_12
-; ZVFHMIN-ZFHIN-RV64-NEXT: # %bb.11:
-; ZVFHMIN-ZFHIN-RV64-NEXT: fcvt.w.s a3, fa4, rtz
-; ZVFHMIN-ZFHIN-RV64-NEXT: fcvt.s.w fa2, a3, rtz
-; ZVFHMIN-ZFHIN-RV64-NEXT: fsgnj.s fa4, fa2, fa4
-; ZVFHMIN-ZFHIN-RV64-NEXT: .LBB115_12:
-; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v8, v8, a1
-; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a1, fa3
-; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa3, 12(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT: fcvt.h.s fa4, fa4
-; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a3, fa4
-; ZVFHMIN-ZFHIN-RV64-NEXT: vmv.v.x v9, a3
-; ZVFHMIN-ZFHIN-RV64-NEXT: fcvt.s.h fa4, fa3
-; ZVFHMIN-ZFHIN-RV64-NEXT: fabs.s fa3, fa4
-; ZVFHMIN-ZFHIN-RV64-NEXT: flt.s a3, fa3, fa5
-; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v9, v9, a2
-; ZVFHMIN-ZFHIN-RV64-NEXT: beqz a3, .LBB115_14
-; ZVFHMIN-ZFHIN-RV64-NEXT: # %bb.13:
-; ZVFHMIN-ZFHIN-RV64-NEXT: fcvt.w.s a2, fa4, rtz
-; ZVFHMIN-ZFHIN-RV64-NEXT: fcvt.s.w fa3, a2, rtz
-; ZVFHMIN-ZFHIN-RV64-NEXT: fsgnj.s fa4, fa3, fa4
-; ZVFHMIN-ZFHIN-RV64-NEXT: .LBB115_14:
-; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa3, 14(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v8, v8, a1
-; ZVFHMIN-ZFHIN-RV64-NEXT: fcvt.h.s fa4, fa4
-; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a1, fa4
-; ZVFHMIN-ZFHIN-RV64-NEXT: fcvt.s.h fa4, fa3
-; ZVFHMIN-ZFHIN-RV64-NEXT: fabs.s fa3, fa4
-; ZVFHMIN-ZFHIN-RV64-NEXT: flt.s a2, fa3, fa5
-; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v9, v9, a1
-; ZVFHMIN-ZFHIN-RV64-NEXT: beqz a2, .LBB115_16
-; ZVFHMIN-ZFHIN-RV64-NEXT: # %bb.15:
-; ZVFHMIN-ZFHIN-RV64-NEXT: fcvt.w.s a1, fa4, rtz
-; ZVFHMIN-ZFHIN-RV64-NEXT: fcvt.s.w fa5, a1, rtz
-; ZVFHMIN-ZFHIN-RV64-NEXT: fsgnj.s fa4, fa5, fa4
-; ZVFHMIN-ZFHIN-RV64-NEXT: .LBB115_16:
-; ZVFHMIN-ZFHIN-RV64-NEXT: fcvt.h.s fa5, fa4
-; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a1, fa5
-; ZVFHMIN-ZFHIN-RV64-NEXT: vmv.v.i v0, 15
-; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v9, v9, a1
-; ZVFHMIN-ZFHIN-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, mu
-; ZVFHMIN-ZFHIN-RV64-NEXT: vslidedown.vi v9, v8, 4, v0.t
-; ZVFHMIN-ZFHIN-RV64-NEXT: vse16.v v9, (a0)
-; ZVFHMIN-ZFHIN-RV64-NEXT: addi sp, sp, 16
-; ZVFHMIN-ZFHIN-RV64-NEXT: ret
+; ZVFHMIN-LABEL: trunc_v8f16:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
+; ZVFHMIN-NEXT: vle16.v v8, (a0)
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; ZVFHMIN-NEXT: vfabs.v v8, v9
+; ZVFHMIN-NEXT: lui a1, 307200
+; ZVFHMIN-NEXT: fmv.w.x fa5, a1
+; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5
+; ZVFHMIN-NEXT: vfcvt.rtz.x.f.v v8, v9, v0.t
+; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, mu
+; ZVFHMIN-NEXT: vfsgnj.vv v9, v8, v9, v0.t
+; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
+; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9
+; ZVFHMIN-NEXT: vse16.v v8, (a0)
+; ZVFHMIN-NEXT: ret
%a = load <8 x half>, ptr %x
%b = call <8 x half> @llvm.trunc.v8f16(<8 x half> %a)
store <8 x half> %b, ptr %x
@@ -6020,461 +5587,25 @@ define void @trunc_v6f16(ptr %x) {
; ZVFH-NEXT: vse16.v v8, (a0)
; ZVFH-NEXT: ret
;
-; ZVFHMIN-ZFH-RV32-LABEL: trunc_v6f16:
-; ZVFHMIN-ZFH-RV32: # %bb.0:
-; ZVFHMIN-ZFH-RV32-NEXT: addi sp, sp, -16
-; ZVFHMIN-ZFH-RV32-NEXT: .cfi_def_cfa_offset 16
-; ZVFHMIN-ZFH-RV32-NEXT: vsetivli zero, 6, e16, mf2, ta, ma
-; ZVFHMIN-ZFH-RV32-NEXT: vle16.v v8, (a0)
-; ZVFHMIN-ZFH-RV32-NEXT: mv a1, sp
-; ZVFHMIN-ZFH-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMIN-ZFH-RV32-NEXT: vse16.v v8, (a1)
-; ZVFHMIN-ZFH-RV32-NEXT: flh fa4, 2(sp)
-; ZVFHMIN-ZFH-RV32-NEXT: lui a1, %hi(.LCPI116_0)
-; ZVFHMIN-ZFH-RV32-NEXT: flh fa5, %lo(.LCPI116_0)(a1)
-; ZVFHMIN-ZFH-RV32-NEXT: fabs.h fa3, fa4
-; ZVFHMIN-ZFH-RV32-NEXT: flt.h a1, fa3, fa5
-; ZVFHMIN-ZFH-RV32-NEXT: beqz a1, .LBB116_2
-; ZVFHMIN-ZFH-RV32-NEXT: # %bb.1:
-; ZVFHMIN-ZFH-RV32-NEXT: fcvt.w.h a1, fa4, rtz
-; ZVFHMIN-ZFH-RV32-NEXT: fcvt.h.w fa3, a1, rtz
-; ZVFHMIN-ZFH-RV32-NEXT: fsgnj.h fa4, fa3, fa4
-; ZVFHMIN-ZFH-RV32-NEXT: .LBB116_2:
-; ZVFHMIN-ZFH-RV32-NEXT: flh fa1, 0(sp)
-; ZVFHMIN-ZFH-RV32-NEXT: fabs.h fa3, fa1
-; ZVFHMIN-ZFH-RV32-NEXT: flt.h a1, fa3, fa5
-; ZVFHMIN-ZFH-RV32-NEXT: beqz a1, .LBB116_4
-; ZVFHMIN-ZFH-RV32-NEXT: # %bb.3:
-; ZVFHMIN-ZFH-RV32-NEXT: fcvt.w.h a1, fa1, rtz
-; ZVFHMIN-ZFH-RV32-NEXT: fcvt.h.w fa3, a1, rtz
-; ZVFHMIN-ZFH-RV32-NEXT: fsgnj.h fa1, fa3, fa1
-; ZVFHMIN-ZFH-RV32-NEXT: .LBB116_4:
-; ZVFHMIN-ZFH-RV32-NEXT: flh fa2, 4(sp)
-; ZVFHMIN-ZFH-RV32-NEXT: fabs.h fa3, fa2
-; ZVFHMIN-ZFH-RV32-NEXT: flt.h a1, fa3, fa5
-; ZVFHMIN-ZFH-RV32-NEXT: beqz a1, .LBB116_6
-; ZVFHMIN-ZFH-RV32-NEXT: # %bb.5:
-; ZVFHMIN-ZFH-RV32-NEXT: fcvt.w.h a1, fa2, rtz
-; ZVFHMIN-ZFH-RV32-NEXT: fcvt.h.w fa3, a1, rtz
-; ZVFHMIN-ZFH-RV32-NEXT: fsgnj.h fa2, fa3, fa2
-; ZVFHMIN-ZFH-RV32-NEXT: .LBB116_6:
-; ZVFHMIN-ZFH-RV32-NEXT: flh fa3, 6(sp)
-; ZVFHMIN-ZFH-RV32-NEXT: fabs.h fa0, fa3
-; ZVFHMIN-ZFH-RV32-NEXT: flt.h a1, fa0, fa5
-; ZVFHMIN-ZFH-RV32-NEXT: fmv.x.h a2, fa1
-; ZVFHMIN-ZFH-RV32-NEXT: beqz a1, .LBB116_8
-; ZVFHMIN-ZFH-RV32-NEXT: # %bb.7:
-; ZVFHMIN-ZFH-RV32-NEXT: fcvt.w.h a1, fa3, rtz
-; ZVFHMIN-ZFH-RV32-NEXT: fcvt.h.w fa1, a1, rtz
-; ZVFHMIN-ZFH-RV32-NEXT: fsgnj.h fa3, fa1, fa3
-; ZVFHMIN-ZFH-RV32-NEXT: .LBB116_8:
-; ZVFHMIN-ZFH-RV32-NEXT: flh fa1, 10(sp)
-; ZVFHMIN-ZFH-RV32-NEXT: fmv.x.h a1, fa4
-; ZVFHMIN-ZFH-RV32-NEXT: fabs.h fa4, fa1
-; ZVFHMIN-ZFH-RV32-NEXT: flt.h a3, fa4, fa5
-; ZVFHMIN-ZFH-RV32-NEXT: vmv.v.x v8, a2
-; ZVFHMIN-ZFH-RV32-NEXT: beqz a3, .LBB116_10
-; ZVFHMIN-ZFH-RV32-NEXT: # %bb.9:
-; ZVFHMIN-ZFH-RV32-NEXT: fcvt.w.h a2, fa1, rtz
-; ZVFHMIN-ZFH-RV32-NEXT: fcvt.h.w fa4, a2, rtz
-; ZVFHMIN-ZFH-RV32-NEXT: fsgnj.h fa1, fa4, fa1
-; ZVFHMIN-ZFH-RV32-NEXT: .LBB116_10:
-; ZVFHMIN-ZFH-RV32-NEXT: flh fa4, 8(sp)
-; ZVFHMIN-ZFH-RV32-NEXT: vslide1down.vx v8, v8, a1
-; ZVFHMIN-ZFH-RV32-NEXT: fmv.x.h a2, fa2
-; ZVFHMIN-ZFH-RV32-NEXT: fabs.h fa2, fa4
-; ZVFHMIN-ZFH-RV32-NEXT: flt.h a3, fa2, fa5
-; ZVFHMIN-ZFH-RV32-NEXT: fmv.x.h a1, fa1
-; ZVFHMIN-ZFH-RV32-NEXT: beqz a3, .LBB116_12
-; ZVFHMIN-ZFH-RV32-NEXT: # %bb.11:
-; ZVFHMIN-ZFH-RV32-NEXT: fcvt.w.h a3, fa4, rtz
-; ZVFHMIN-ZFH-RV32-NEXT: fcvt.h.w fa2, a3, rtz
-; ZVFHMIN-ZFH-RV32-NEXT: fsgnj.h fa4, fa2, fa4
-; ZVFHMIN-ZFH-RV32-NEXT: .LBB116_12:
-; ZVFHMIN-ZFH-RV32-NEXT: vslide1down.vx v8, v8, a2
-; ZVFHMIN-ZFH-RV32-NEXT: flh fa2, 12(sp)
-; ZVFHMIN-ZFH-RV32-NEXT: fmv.x.h a2, fa3
-; ZVFHMIN-ZFH-RV32-NEXT: fmv.x.h a3, fa4
-; ZVFHMIN-ZFH-RV32-NEXT: vmv.v.x v9, a3
-; ZVFHMIN-ZFH-RV32-NEXT: fabs.h fa4, fa2
-; ZVFHMIN-ZFH-RV32-NEXT: flt.h a3, fa4, fa5
-; ZVFHMIN-ZFH-RV32-NEXT: vslide1down.vx v9, v9, a1
-; ZVFHMIN-ZFH-RV32-NEXT: beqz a3, .LBB116_14
-; ZVFHMIN-ZFH-RV32-NEXT: # %bb.13:
-; ZVFHMIN-ZFH-RV32-NEXT: fcvt.w.h a1, fa2, rtz
-; ZVFHMIN-ZFH-RV32-NEXT: fcvt.h.w fa4, a1, rtz
-; ZVFHMIN-ZFH-RV32-NEXT: fsgnj.h fa2, fa4, fa2
-; ZVFHMIN-ZFH-RV32-NEXT: .LBB116_14:
-; ZVFHMIN-ZFH-RV32-NEXT: flh fa4, 14(sp)
-; ZVFHMIN-ZFH-RV32-NEXT: vslide1down.vx v8, v8, a2
-; ZVFHMIN-ZFH-RV32-NEXT: fmv.x.h a1, fa2
-; ZVFHMIN-ZFH-RV32-NEXT: fabs.h fa3, fa4
-; ZVFHMIN-ZFH-RV32-NEXT: flt.h a2, fa3, fa5
-; ZVFHMIN-ZFH-RV32-NEXT: vslide1down.vx v9, v9, a1
-; ZVFHMIN-ZFH-RV32-NEXT: beqz a2, .LBB116_16
-; ZVFHMIN-ZFH-RV32-NEXT: # %bb.15:
-; ZVFHMIN-ZFH-RV32-NEXT: fcvt.w.h a1, fa4, rtz
-; ZVFHMIN-ZFH-RV32-NEXT: fcvt.h.w fa5, a1, rtz
-; ZVFHMIN-ZFH-RV32-NEXT: fsgnj.h fa4, fa5, fa4
-; ZVFHMIN-ZFH-RV32-NEXT: .LBB116_16:
-; ZVFHMIN-ZFH-RV32-NEXT: fmv.x.h a1, fa4
-; ZVFHMIN-ZFH-RV32-NEXT: vmv.v.i v0, 15
-; ZVFHMIN-ZFH-RV32-NEXT: vslide1down.vx v9, v9, a1
-; ZVFHMIN-ZFH-RV32-NEXT: vsetivli zero, 6, e16, mf2, ta, mu
-; ZVFHMIN-ZFH-RV32-NEXT: vslidedown.vi v9, v8, 4, v0.t
-; ZVFHMIN-ZFH-RV32-NEXT: vse16.v v9, (a0)
-; ZVFHMIN-ZFH-RV32-NEXT: addi sp, sp, 16
-; ZVFHMIN-ZFH-RV32-NEXT: ret
-;
-; ZVFHMIN-ZFH-RV64-LABEL: trunc_v6f16:
-; ZVFHMIN-ZFH-RV64: # %bb.0:
-; ZVFHMIN-ZFH-RV64-NEXT: addi sp, sp, -16
-; ZVFHMIN-ZFH-RV64-NEXT: .cfi_def_cfa_offset 16
-; ZVFHMIN-ZFH-RV64-NEXT: vsetivli zero, 6, e16, mf2, ta, ma
-; ZVFHMIN-ZFH-RV64-NEXT: vle16.v v8, (a0)
-; ZVFHMIN-ZFH-RV64-NEXT: mv a1, sp
-; ZVFHMIN-ZFH-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMIN-ZFH-RV64-NEXT: vse16.v v8, (a1)
-; ZVFHMIN-ZFH-RV64-NEXT: flh fa4, 2(sp)
-; ZVFHMIN-ZFH-RV64-NEXT: lui a1, %hi(.LCPI116_0)
-; ZVFHMIN-ZFH-RV64-NEXT: flh fa5, %lo(.LCPI116_0)(a1)
-; ZVFHMIN-ZFH-RV64-NEXT: fabs.h fa3, fa4
-; ZVFHMIN-ZFH-RV64-NEXT: flt.h a1, fa3, fa5
-; ZVFHMIN-ZFH-RV64-NEXT: beqz a1, .LBB116_2
-; ZVFHMIN-ZFH-RV64-NEXT: # %bb.1:
-; ZVFHMIN-ZFH-RV64-NEXT: fcvt.w.h a1, fa4, rtz
-; ZVFHMIN-ZFH-RV64-NEXT: fcvt.h.w fa3, a1, rtz
-; ZVFHMIN-ZFH-RV64-NEXT: fsgnj.h fa4, fa3, fa4
-; ZVFHMIN-ZFH-RV64-NEXT: .LBB116_2:
-; ZVFHMIN-ZFH-RV64-NEXT: flh fa1, 0(sp)
-; ZVFHMIN-ZFH-RV64-NEXT: fabs.h fa3, fa1
-; ZVFHMIN-ZFH-RV64-NEXT: flt.h a1, fa3, fa5
-; ZVFHMIN-ZFH-RV64-NEXT: beqz a1, .LBB116_4
-; ZVFHMIN-ZFH-RV64-NEXT: # %bb.3:
-; ZVFHMIN-ZFH-RV64-NEXT: fcvt.w.h a1, fa1, rtz
-; ZVFHMIN-ZFH-RV64-NEXT: fcvt.h.w fa3, a1, rtz
-; ZVFHMIN-ZFH-RV64-NEXT: fsgnj.h fa1, fa3, fa1
-; ZVFHMIN-ZFH-RV64-NEXT: .LBB116_4:
-; ZVFHMIN-ZFH-RV64-NEXT: flh fa2, 4(sp)
-; ZVFHMIN-ZFH-RV64-NEXT: fabs.h fa3, fa2
-; ZVFHMIN-ZFH-RV64-NEXT: flt.h a1, fa3, fa5
-; ZVFHMIN-ZFH-RV64-NEXT: beqz a1, .LBB116_6
-; ZVFHMIN-ZFH-RV64-NEXT: # %bb.5:
-; ZVFHMIN-ZFH-RV64-NEXT: fcvt.w.h a1, fa2, rtz
-; ZVFHMIN-ZFH-RV64-NEXT: fcvt.h.w fa3, a1, rtz
-; ZVFHMIN-ZFH-RV64-NEXT: fsgnj.h fa2, fa3, fa2
-; ZVFHMIN-ZFH-RV64-NEXT: .LBB116_6:
-; ZVFHMIN-ZFH-RV64-NEXT: flh fa3, 6(sp)
-; ZVFHMIN-ZFH-RV64-NEXT: fabs.h fa0, fa3
-; ZVFHMIN-ZFH-RV64-NEXT: flt.h a1, fa0, fa5
-; ZVFHMIN-ZFH-RV64-NEXT: fmv.x.h a2, fa1
-; ZVFHMIN-ZFH-RV64-NEXT: beqz a1, .LBB116_8
-; ZVFHMIN-ZFH-RV64-NEXT: # %bb.7:
-; ZVFHMIN-ZFH-RV64-NEXT: fcvt.w.h a1, fa3, rtz
-; ZVFHMIN-ZFH-RV64-NEXT: fcvt.h.w fa1, a1, rtz
-; ZVFHMIN-ZFH-RV64-NEXT: fsgnj.h fa3, fa1, fa3
-; ZVFHMIN-ZFH-RV64-NEXT: .LBB116_8:
-; ZVFHMIN-ZFH-RV64-NEXT: flh fa1, 10(sp)
-; ZVFHMIN-ZFH-RV64-NEXT: fmv.x.h a1, fa4
-; ZVFHMIN-ZFH-RV64-NEXT: fabs.h fa4, fa1
-; ZVFHMIN-ZFH-RV64-NEXT: flt.h a3, fa4, fa5
-; ZVFHMIN-ZFH-RV64-NEXT: vmv.v.x v8, a2
-; ZVFHMIN-ZFH-RV64-NEXT: beqz a3, .LBB116_10
-; ZVFHMIN-ZFH-RV64-NEXT: # %bb.9:
-; ZVFHMIN-ZFH-RV64-NEXT: fcvt.w.h a2, fa1, rtz
-; ZVFHMIN-ZFH-RV64-NEXT: fcvt.h.w fa4, a2, rtz
-; ZVFHMIN-ZFH-RV64-NEXT: fsgnj.h fa1, fa4, fa1
-; ZVFHMIN-ZFH-RV64-NEXT: .LBB116_10:
-; ZVFHMIN-ZFH-RV64-NEXT: flh fa4, 8(sp)
-; ZVFHMIN-ZFH-RV64-NEXT: vslide1down.vx v8, v8, a1
-; ZVFHMIN-ZFH-RV64-NEXT: fmv.x.h a2, fa2
-; ZVFHMIN-ZFH-RV64-NEXT: fabs.h fa2, fa4
-; ZVFHMIN-ZFH-RV64-NEXT: flt.h a3, fa2, fa5
-; ZVFHMIN-ZFH-RV64-NEXT: fmv.x.h a1, fa1
-; ZVFHMIN-ZFH-RV64-NEXT: beqz a3, .LBB116_12
-; ZVFHMIN-ZFH-RV64-NEXT: # %bb.11:
-; ZVFHMIN-ZFH-RV64-NEXT: fcvt.w.h a3, fa4, rtz
-; ZVFHMIN-ZFH-RV64-NEXT: fcvt.h.w fa2, a3, rtz
-; ZVFHMIN-ZFH-RV64-NEXT: fsgnj.h fa4, fa2, fa4
-; ZVFHMIN-ZFH-RV64-NEXT: .LBB116_12:
-; ZVFHMIN-ZFH-RV64-NEXT: vslide1down.vx v8, v8, a2
-; ZVFHMIN-ZFH-RV64-NEXT: flh fa2, 12(sp)
-; ZVFHMIN-ZFH-RV64-NEXT: fmv.x.h a2, fa3
-; ZVFHMIN-ZFH-RV64-NEXT: fmv.x.h a3, fa4
-; ZVFHMIN-ZFH-RV64-NEXT: vmv.v.x v9, a3
-; ZVFHMIN-ZFH-RV64-NEXT: fabs.h fa4, fa2
-; ZVFHMIN-ZFH-RV64-NEXT: flt.h a3, fa4, fa5
-; ZVFHMIN-ZFH-RV64-NEXT: vslide1down.vx v9, v9, a1
-; ZVFHMIN-ZFH-RV64-NEXT: beqz a3, .LBB116_14
-; ZVFHMIN-ZFH-RV64-NEXT: # %bb.13:
-; ZVFHMIN-ZFH-RV64-NEXT: fcvt.w.h a1, fa2, rtz
-; ZVFHMIN-ZFH-RV64-NEXT: fcvt.h.w fa4, a1, rtz
-; ZVFHMIN-ZFH-RV64-NEXT: fsgnj.h fa2, fa4, fa2
-; ZVFHMIN-ZFH-RV64-NEXT: .LBB116_14:
-; ZVFHMIN-ZFH-RV64-NEXT: flh fa4, 14(sp)
-; ZVFHMIN-ZFH-RV64-NEXT: vslide1down.vx v8, v8, a2
-; ZVFHMIN-ZFH-RV64-NEXT: fmv.x.h a1, fa2
-; ZVFHMIN-ZFH-RV64-NEXT: fabs.h fa3, fa4
-; ZVFHMIN-ZFH-RV64-NEXT: flt.h a2, fa3, fa5
-; ZVFHMIN-ZFH-RV64-NEXT: vslide1down.vx v9, v9, a1
-; ZVFHMIN-ZFH-RV64-NEXT: beqz a2, .LBB116_16
-; ZVFHMIN-ZFH-RV64-NEXT: # %bb.15:
-; ZVFHMIN-ZFH-RV64-NEXT: fcvt.w.h a1, fa4, rtz
-; ZVFHMIN-ZFH-RV64-NEXT: fcvt.h.w fa5, a1, rtz
-; ZVFHMIN-ZFH-RV64-NEXT: fsgnj.h fa4, fa5, fa4
-; ZVFHMIN-ZFH-RV64-NEXT: .LBB116_16:
-; ZVFHMIN-ZFH-RV64-NEXT: fmv.x.h a1, fa4
-; ZVFHMIN-ZFH-RV64-NEXT: vmv.v.i v0, 15
-; ZVFHMIN-ZFH-RV64-NEXT: vslide1down.vx v9, v9, a1
-; ZVFHMIN-ZFH-RV64-NEXT: vsetivli zero, 6, e16, mf2, ta, mu
-; ZVFHMIN-ZFH-RV64-NEXT: vslidedown.vi v9, v8, 4, v0.t
-; ZVFHMIN-ZFH-RV64-NEXT: vse16.v v9, (a0)
-; ZVFHMIN-ZFH-RV64-NEXT: addi sp, sp, 16
-; ZVFHMIN-ZFH-RV64-NEXT: ret
-;
-; ZVFHMIN-ZFHIN-RV32-LABEL: trunc_v6f16:
-; ZVFHMIN-ZFHIN-RV32: # %bb.0:
-; ZVFHMIN-ZFHIN-RV32-NEXT: addi sp, sp, -16
-; ZVFHMIN-ZFHIN-RV32-NEXT: .cfi_def_cfa_offset 16
-; ZVFHMIN-ZFHIN-RV32-NEXT: vsetivli zero, 6, e16, mf2, ta, ma
-; ZVFHMIN-ZFHIN-RV32-NEXT: vle16.v v8, (a0)
-; ZVFHMIN-ZFHIN-RV32-NEXT: mv a1, sp
-; ZVFHMIN-ZFHIN-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMIN-ZFHIN-RV32-NEXT: vse16.v v8, (a1)
-; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 2(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT: fcvt.s.h fa4, fa5
-; ZVFHMIN-ZFHIN-RV32-NEXT: lui a1, 307200
-; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.w.x fa5, a1
-; ZVFHMIN-ZFHIN-RV32-NEXT: fabs.s fa3, fa4
-; ZVFHMIN-ZFHIN-RV32-NEXT: flt.s a1, fa3, fa5
-; ZVFHMIN-ZFHIN-RV32-NEXT: beqz a1, .LBB116_2
-; ZVFHMIN-ZFHIN-RV32-NEXT: # %bb.1:
-; ZVFHMIN-ZFHIN-RV32-NEXT: fcvt.w.s a1, fa4, rtz
-; ZVFHMIN-ZFHIN-RV32-NEXT: fcvt.s.w fa3, a1, rtz
-; ZVFHMIN-ZFHIN-RV32-NEXT: fsgnj.s fa4, fa3, fa4
-; ZVFHMIN-ZFHIN-RV32-NEXT: .LBB116_2:
-; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa3, 0(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT: fcvt.s.h fa2, fa3
-; ZVFHMIN-ZFHIN-RV32-NEXT: fabs.s fa3, fa2
-; ZVFHMIN-ZFHIN-RV32-NEXT: flt.s a1, fa3, fa5
-; ZVFHMIN-ZFHIN-RV32-NEXT: beqz a1, .LBB116_4
-; ZVFHMIN-ZFHIN-RV32-NEXT: # %bb.3:
-; ZVFHMIN-ZFHIN-RV32-NEXT: fcvt.w.s a1, fa2, rtz
-; ZVFHMIN-ZFHIN-RV32-NEXT: fcvt.s.w fa3, a1, rtz
-; ZVFHMIN-ZFHIN-RV32-NEXT: fsgnj.s fa2, fa3, fa2
-; ZVFHMIN-ZFHIN-RV32-NEXT: .LBB116_4:
-; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa3, 4(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT: fcvt.s.h fa3, fa3
-; ZVFHMIN-ZFHIN-RV32-NEXT: fabs.s fa1, fa3
-; ZVFHMIN-ZFHIN-RV32-NEXT: flt.s a1, fa1, fa5
-; ZVFHMIN-ZFHIN-RV32-NEXT: fcvt.h.s fa1, fa2
-; ZVFHMIN-ZFHIN-RV32-NEXT: beqz a1, .LBB116_6
-; ZVFHMIN-ZFHIN-RV32-NEXT: # %bb.5:
-; ZVFHMIN-ZFHIN-RV32-NEXT: fcvt.w.s a1, fa3, rtz
-; ZVFHMIN-ZFHIN-RV32-NEXT: fcvt.s.w fa2, a1, rtz
-; ZVFHMIN-ZFHIN-RV32-NEXT: fsgnj.s fa3, fa2, fa3
-; ZVFHMIN-ZFHIN-RV32-NEXT: .LBB116_6:
-; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa0, 6(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT: fcvt.h.s fa2, fa4
-; ZVFHMIN-ZFHIN-RV32-NEXT: fcvt.s.h fa4, fa0
-; ZVFHMIN-ZFHIN-RV32-NEXT: fabs.s fa0, fa4
-; ZVFHMIN-ZFHIN-RV32-NEXT: flt.s a1, fa0, fa5
-; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a2, fa1
-; ZVFHMIN-ZFHIN-RV32-NEXT: beqz a1, .LBB116_8
-; ZVFHMIN-ZFHIN-RV32-NEXT: # %bb.7:
-; ZVFHMIN-ZFHIN-RV32-NEXT: fcvt.w.s a1, fa4, rtz
-; ZVFHMIN-ZFHIN-RV32-NEXT: fcvt.s.w fa1, a1, rtz
-; ZVFHMIN-ZFHIN-RV32-NEXT: fsgnj.s fa4, fa1, fa4
-; ZVFHMIN-ZFHIN-RV32-NEXT: .LBB116_8:
-; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa1, 10(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a1, fa2
-; ZVFHMIN-ZFHIN-RV32-NEXT: vmv.v.x v8, a2
-; ZVFHMIN-ZFHIN-RV32-NEXT: fcvt.s.h fa2, fa1
-; ZVFHMIN-ZFHIN-RV32-NEXT: fabs.s fa1, fa2
-; ZVFHMIN-ZFHIN-RV32-NEXT: flt.s a2, fa1, fa5
-; ZVFHMIN-ZFHIN-RV32-NEXT: fcvt.h.s fa3, fa3
-; ZVFHMIN-ZFHIN-RV32-NEXT: beqz a2, .LBB116_10
-; ZVFHMIN-ZFHIN-RV32-NEXT: # %bb.9:
-; ZVFHMIN-ZFHIN-RV32-NEXT: fcvt.w.s a2, fa2, rtz
-; ZVFHMIN-ZFHIN-RV32-NEXT: fcvt.s.w fa1, a2, rtz
-; ZVFHMIN-ZFHIN-RV32-NEXT: fsgnj.s fa2, fa1, fa2
-; ZVFHMIN-ZFHIN-RV32-NEXT: .LBB116_10:
-; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v8, v8, a1
-; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa1, 8(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a1, fa3
-; ZVFHMIN-ZFHIN-RV32-NEXT: fcvt.h.s fa3, fa4
-; ZVFHMIN-ZFHIN-RV32-NEXT: fcvt.h.s fa2, fa2
-; ZVFHMIN-ZFHIN-RV32-NEXT: fcvt.s.h fa4, fa1
-; ZVFHMIN-ZFHIN-RV32-NEXT: fabs.s fa1, fa4
-; ZVFHMIN-ZFHIN-RV32-NEXT: flt.s a3, fa1, fa5
-; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a2, fa2
-; ZVFHMIN-ZFHIN-RV32-NEXT: beqz a3, .LBB116_12
-; ZVFHMIN-ZFHIN-RV32-NEXT: # %bb.11:
-; ZVFHMIN-ZFHIN-RV32-NEXT: fcvt.w.s a3, fa4, rtz
-; ZVFHMIN-ZFHIN-RV32-NEXT: fcvt.s.w fa2, a3, rtz
-; ZVFHMIN-ZFHIN-RV32-NEXT: fsgnj.s fa4, fa2, fa4
-; ZVFHMIN-ZFHIN-RV32-NEXT: .LBB116_12:
-; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v8, v8, a1
-; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a1, fa3
-; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa3, 12(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT: fcvt.h.s fa4, fa4
-; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a3, fa4
-; ZVFHMIN-ZFHIN-RV32-NEXT: vmv.v.x v9, a3
-; ZVFHMIN-ZFHIN-RV32-NEXT: fcvt.s.h fa4, fa3
-; ZVFHMIN-ZFHIN-RV32-NEXT: fabs.s fa3, fa4
-; ZVFHMIN-ZFHIN-RV32-NEXT: flt.s a3, fa3, fa5
-; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v9, v9, a2
-; ZVFHMIN-ZFHIN-RV32-NEXT: beqz a3, .LBB116_14
-; ZVFHMIN-ZFHIN-RV32-NEXT: # %bb.13:
-; ZVFHMIN-ZFHIN-RV32-NEXT: fcvt.w.s a2, fa4, rtz
-; ZVFHMIN-ZFHIN-RV32-NEXT: fcvt.s.w fa3, a2, rtz
-; ZVFHMIN-ZFHIN-RV32-NEXT: fsgnj.s fa4, fa3, fa4
-; ZVFHMIN-ZFHIN-RV32-NEXT: .LBB116_14:
-; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa3, 14(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v8, v8, a1
-; ZVFHMIN-ZFHIN-RV32-NEXT: fcvt.h.s fa4, fa4
-; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a1, fa4
-; ZVFHMIN-ZFHIN-RV32-NEXT: fcvt.s.h fa4, fa3
-; ZVFHMIN-ZFHIN-RV32-NEXT: fabs.s fa3, fa4
-; ZVFHMIN-ZFHIN-RV32-NEXT: flt.s a2, fa3, fa5
-; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v9, v9, a1
-; ZVFHMIN-ZFHIN-RV32-NEXT: beqz a2, .LBB116_16
-; ZVFHMIN-ZFHIN-RV32-NEXT: # %bb.15:
-; ZVFHMIN-ZFHIN-RV32-NEXT: fcvt.w.s a1, fa4, rtz
-; ZVFHMIN-ZFHIN-RV32-NEXT: fcvt.s.w fa5, a1, rtz
-; ZVFHMIN-ZFHIN-RV32-NEXT: fsgnj.s fa4, fa5, fa4
-; ZVFHMIN-ZFHIN-RV32-NEXT: .LBB116_16:
-; ZVFHMIN-ZFHIN-RV32-NEXT: fcvt.h.s fa5, fa4
-; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a1, fa5
-; ZVFHMIN-ZFHIN-RV32-NEXT: vmv.v.i v0, 15
-; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v9, v9, a1
-; ZVFHMIN-ZFHIN-RV32-NEXT: vsetivli zero, 6, e16, mf2, ta, mu
-; ZVFHMIN-ZFHIN-RV32-NEXT: vslidedown.vi v9, v8, 4, v0.t
-; ZVFHMIN-ZFHIN-RV32-NEXT: vse16.v v9, (a0)
-; ZVFHMIN-ZFHIN-RV32-NEXT: addi sp, sp, 16
-; ZVFHMIN-ZFHIN-RV32-NEXT: ret
-;
-; ZVFHMIN-ZFHIN-RV64-LABEL: trunc_v6f16:
-; ZVFHMIN-ZFHIN-RV64: # %bb.0:
-; ZVFHMIN-ZFHIN-RV64-NEXT: addi sp, sp, -16
-; ZVFHMIN-ZFHIN-RV64-NEXT: .cfi_def_cfa_offset 16
-; ZVFHMIN-ZFHIN-RV64-NEXT: vsetivli zero, 6, e16, mf2, ta, ma
-; ZVFHMIN-ZFHIN-RV64-NEXT: vle16.v v8, (a0)
-; ZVFHMIN-ZFHIN-RV64-NEXT: mv a1, sp
-; ZVFHMIN-ZFHIN-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMIN-ZFHIN-RV64-NEXT: vse16.v v8, (a1)
-; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 2(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT: fcvt.s.h fa4, fa5
-; ZVFHMIN-ZFHIN-RV64-NEXT: lui a1, 307200
-; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.w.x fa5, a1
-; ZVFHMIN-ZFHIN-RV64-NEXT: fabs.s fa3, fa4
-; ZVFHMIN-ZFHIN-RV64-NEXT: flt.s a1, fa3, fa5
-; ZVFHMIN-ZFHIN-RV64-NEXT: beqz a1, .LBB116_2
-; ZVFHMIN-ZFHIN-RV64-NEXT: # %bb.1:
-; ZVFHMIN-ZFHIN-RV64-NEXT: fcvt.w.s a1, fa4, rtz
-; ZVFHMIN-ZFHIN-RV64-NEXT: fcvt.s.w fa3, a1, rtz
-; ZVFHMIN-ZFHIN-RV64-NEXT: fsgnj.s fa4, fa3, fa4
-; ZVFHMIN-ZFHIN-RV64-NEXT: .LBB116_2:
-; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa3, 0(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT: fcvt.s.h fa2, fa3
-; ZVFHMIN-ZFHIN-RV64-NEXT: fabs.s fa3, fa2
-; ZVFHMIN-ZFHIN-RV64-NEXT: flt.s a1, fa3, fa5
-; ZVFHMIN-ZFHIN-RV64-NEXT: beqz a1, .LBB116_4
-; ZVFHMIN-ZFHIN-RV64-NEXT: # %bb.3:
-; ZVFHMIN-ZFHIN-RV64-NEXT: fcvt.w.s a1, fa2, rtz
-; ZVFHMIN-ZFHIN-RV64-NEXT: fcvt.s.w fa3, a1, rtz
-; ZVFHMIN-ZFHIN-RV64-NEXT: fsgnj.s fa2, fa3, fa2
-; ZVFHMIN-ZFHIN-RV64-NEXT: .LBB116_4:
-; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa3, 4(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT: fcvt.s.h fa3, fa3
-; ZVFHMIN-ZFHIN-RV64-NEXT: fabs.s fa1, fa3
-; ZVFHMIN-ZFHIN-RV64-NEXT: flt.s a1, fa1, fa5
-; ZVFHMIN-ZFHIN-RV64-NEXT: fcvt.h.s fa1, fa2
-; ZVFHMIN-ZFHIN-RV64-NEXT: beqz a1, .LBB116_6
-; ZVFHMIN-ZFHIN-RV64-NEXT: # %bb.5:
-; ZVFHMIN-ZFHIN-RV64-NEXT: fcvt.w.s a1, fa3, rtz
-; ZVFHMIN-ZFHIN-RV64-NEXT: fcvt.s.w fa2, a1, rtz
-; ZVFHMIN-ZFHIN-RV64-NEXT: fsgnj.s fa3, fa2, fa3
-; ZVFHMIN-ZFHIN-RV64-NEXT: .LBB116_6:
-; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa0, 6(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT: fcvt.h.s fa2, fa4
-; ZVFHMIN-ZFHIN-RV64-NEXT: fcvt.s.h fa4, fa0
-; ZVFHMIN-ZFHIN-RV64-NEXT: fabs.s fa0, fa4
-; ZVFHMIN-ZFHIN-RV64-NEXT: flt.s a1, fa0, fa5
-; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a2, fa1
-; ZVFHMIN-ZFHIN-RV64-NEXT: beqz a1, .LBB116_8
-; ZVFHMIN-ZFHIN-RV64-NEXT: # %bb.7:
-; ZVFHMIN-ZFHIN-RV64-NEXT: fcvt.w.s a1, fa4, rtz
-; ZVFHMIN-ZFHIN-RV64-NEXT: fcvt.s.w fa1, a1, rtz
-; ZVFHMIN-ZFHIN-RV64-NEXT: fsgnj.s fa4, fa1, fa4
-; ZVFHMIN-ZFHIN-RV64-NEXT: .LBB116_8:
-; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa1, 10(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a1, fa2
-; ZVFHMIN-ZFHIN-RV64-NEXT: vmv.v.x v8, a2
-; ZVFHMIN-ZFHIN-RV64-NEXT: fcvt.s.h fa2, fa1
-; ZVFHMIN-ZFHIN-RV64-NEXT: fabs.s fa1, fa2
-; ZVFHMIN-ZFHIN-RV64-NEXT: flt.s a2, fa1, fa5
-; ZVFHMIN-ZFHIN-RV64-NEXT: fcvt.h.s fa3, fa3
-; ZVFHMIN-ZFHIN-RV64-NEXT: beqz a2, .LBB116_10
-; ZVFHMIN-ZFHIN-RV64-NEXT: # %bb.9:
-; ZVFHMIN-ZFHIN-RV64-NEXT: fcvt.w.s a2, fa2, rtz
-; ZVFHMIN-ZFHIN-RV64-NEXT: fcvt.s.w fa1, a2, rtz
-; ZVFHMIN-ZFHIN-RV64-NEXT: fsgnj.s fa2, fa1, fa2
-; ZVFHMIN-ZFHIN-RV64-NEXT: .LBB116_10:
-; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v8, v8, a1
-; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa1, 8(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a1, fa3
-; ZVFHMIN-ZFHIN-RV64-NEXT: fcvt.h.s fa3, fa4
-; ZVFHMIN-ZFHIN-RV64-NEXT: fcvt.h.s fa2, fa2
-; ZVFHMIN-ZFHIN-RV64-NEXT: fcvt.s.h fa4, fa1
-; ZVFHMIN-ZFHIN-RV64-NEXT: fabs.s fa1, fa4
-; ZVFHMIN-ZFHIN-RV64-NEXT: flt.s a3, fa1, fa5
-; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a2, fa2
-; ZVFHMIN-ZFHIN-RV64-NEXT: beqz a3, .LBB116_12
-; ZVFHMIN-ZFHIN-RV64-NEXT: # %bb.11:
-; ZVFHMIN-ZFHIN-RV64-NEXT: fcvt.w.s a3, fa4, rtz
-; ZVFHMIN-ZFHIN-RV64-NEXT: fcvt.s.w fa2, a3, rtz
-; ZVFHMIN-ZFHIN-RV64-NEXT: fsgnj.s fa4, fa2, fa4
-; ZVFHMIN-ZFHIN-RV64-NEXT: .LBB116_12:
-; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v8, v8, a1
-; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a1, fa3
-; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa3, 12(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT: fcvt.h.s fa4, fa4
-; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a3, fa4
-; ZVFHMIN-ZFHIN-RV64-NEXT: vmv.v.x v9, a3
-; ZVFHMIN-ZFHIN-RV64-NEXT: fcvt.s.h fa4, fa3
-; ZVFHMIN-ZFHIN-RV64-NEXT: fabs.s fa3, fa4
-; ZVFHMIN-ZFHIN-RV64-NEXT: flt.s a3, fa3, fa5
-; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v9, v9, a2
-; ZVFHMIN-ZFHIN-RV64-NEXT: beqz a3, .LBB116_14
-; ZVFHMIN-ZFHIN-RV64-NEXT: # %bb.13:
-; ZVFHMIN-ZFHIN-RV64-NEXT: fcvt.w.s a2, fa4, rtz
-; ZVFHMIN-ZFHIN-RV64-NEXT: fcvt.s.w fa3, a2, rtz
-; ZVFHMIN-ZFHIN-RV64-NEXT: fsgnj.s fa4, fa3, fa4
-; ZVFHMIN-ZFHIN-RV64-NEXT: .LBB116_14:
-; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa3, 14(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v8, v8, a1
-; ZVFHMIN-ZFHIN-RV64-NEXT: fcvt.h.s fa4, fa4
-; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a1, fa4
-; ZVFHMIN-ZFHIN-RV64-NEXT: fcvt.s.h fa4, fa3
-; ZVFHMIN-ZFHIN-RV64-NEXT: fabs.s fa3, fa4
-; ZVFHMIN-ZFHIN-RV64-NEXT: flt.s a2, fa3, fa5
-; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v9, v9, a1
-; ZVFHMIN-ZFHIN-RV64-NEXT: beqz a2, .LBB116_16
-; ZVFHMIN-ZFHIN-RV64-NEXT: # %bb.15:
-; ZVFHMIN-ZFHIN-RV64-NEXT: fcvt.w.s a1, fa4, rtz
-; ZVFHMIN-ZFHIN-RV64-NEXT: fcvt.s.w fa5, a1, rtz
-; ZVFHMIN-ZFHIN-RV64-NEXT: fsgnj.s fa4, fa5, fa4
-; ZVFHMIN-ZFHIN-RV64-NEXT: .LBB116_16:
-; ZVFHMIN-ZFHIN-RV64-NEXT: fcvt.h.s fa5, fa4
-; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a1, fa5
-; ZVFHMIN-ZFHIN-RV64-NEXT: vmv.v.i v0, 15
-; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v9, v9, a1
-; ZVFHMIN-ZFHIN-RV64-NEXT: vsetivli zero, 6, e16, mf2, ta, mu
-; ZVFHMIN-ZFHIN-RV64-NEXT: vslidedown.vi v9, v8, 4, v0.t
-; ZVFHMIN-ZFHIN-RV64-NEXT: vse16.v v9, (a0)
-; ZVFHMIN-ZFHIN-RV64-NEXT: addi sp, sp, 16
-; ZVFHMIN-ZFHIN-RV64-NEXT: ret
+; ZVFHMIN-LABEL: trunc_v6f16:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: vsetivli zero, 6, e16, mf2, ta, ma
+; ZVFHMIN-NEXT: vle16.v v8, (a0)
+; ZVFHMIN-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; ZVFHMIN-NEXT: vfabs.v v8, v9
+; ZVFHMIN-NEXT: lui a1, 307200
+; ZVFHMIN-NEXT: fmv.w.x fa5, a1
+; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5
+; ZVFHMIN-NEXT: vfcvt.rtz.x.f.v v8, v9, v0.t
+; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, mu
+; ZVFHMIN-NEXT: vfsgnj.vv v9, v8, v9, v0.t
+; ZVFHMIN-NEXT: vsetivli zero, 6, e16, mf2, ta, ma
+; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9
+; ZVFHMIN-NEXT: vse16.v v8, (a0)
+; ZVFHMIN-NEXT: ret
%a = load <6 x half>, ptr %x
%b = call <6 x half> @llvm.trunc.v6f16(<6 x half> %a)
store <6 x half> %b, ptr %x
diff --git a/llvm/test/CodeGen/RISCV/rvv/floor-vp.ll b/llvm/test/CodeGen/RISCV/rvv/floor-vp.ll
index 45334ea8648f..03d1fb6c8d29 100644
--- a/llvm/test/CodeGen/RISCV/rvv/floor-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/floor-vp.ll
@@ -1,22 +1,428 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v -target-abi=ilp32d \
-; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH
-; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -target-abi=lp64d \
-; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH
-; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfhmin,+v -target-abi=ilp32d \
-; RUN: -verify-machineinstrs < %s | FileCheck %s \
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+zfbfmin,+zvfbfmin,+v \
+; RUN: -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s \
+; RUN: --check-prefixes=CHECK,ZVFH
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+zfbfmin,+zvfbfmin,+v \
+; RUN: -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \
+; RUN: --check-prefixes=CHECK,ZVFH
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfhmin,+zfbfmin,+zvfbfmin,+v \
+; RUN: -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s \
; RUN: --check-prefixes=CHECK,ZVFHMIN
-; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfhmin,+v -target-abi=lp64d \
-; RUN: -verify-machineinstrs < %s | FileCheck %s \
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfhmin,+zfbfmin,+zvfbfmin,+v \
+; RUN: -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \
; RUN: --check-prefixes=CHECK,ZVFHMIN
+declare <vscale x 1 x bfloat> @llvm.vp.floor.nxv1bf16(<vscale x 1 x bfloat>, <vscale x 1 x i1>, i32)
+
+define <vscale x 1 x bfloat> @vp_floor_nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vp_floor_nxv1bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8
+; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-NEXT: vfabs.v v8, v9, v0.t
+; CHECK-NEXT: lui a0, 307200
+; CHECK-NEXT: fmv.w.x fa5, a0
+; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu
+; CHECK-NEXT: vmflt.vf v0, v8, fa5, v0.t
+; CHECK-NEXT: fsrmi a0, 2
+; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
+; CHECK-NEXT: vfcvt.x.f.v v8, v9, v0.t
+; CHECK-NEXT: fsrm a0
+; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu
+; CHECK-NEXT: vfsgnj.vv v9, v8, v9, v0.t
+; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9
+; CHECK-NEXT: ret
+ %v = call <vscale x 1 x bfloat> @llvm.vp.floor.nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x i1> %m, i32 %evl)
+ ret <vscale x 1 x bfloat> %v
+}
+
+define <vscale x 1 x bfloat> @vp_floor_nxv1bf16_unmasked(<vscale x 1 x bfloat> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vp_floor_nxv1bf16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8
+; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-NEXT: vfabs.v v8, v9
+; CHECK-NEXT: lui a0, 307200
+; CHECK-NEXT: fmv.w.x fa5, a0
+; CHECK-NEXT: vmflt.vf v0, v8, fa5
+; CHECK-NEXT: fsrmi a0, 2
+; CHECK-NEXT: vfcvt.x.f.v v8, v9, v0.t
+; CHECK-NEXT: fsrm a0
+; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu
+; CHECK-NEXT: vfsgnj.vv v9, v8, v9, v0.t
+; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9
+; CHECK-NEXT: ret
+ %v = call <vscale x 1 x bfloat> @llvm.vp.floor.nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x i1> splat (i1 true), i32 %evl)
+ ret <vscale x 1 x bfloat> %v
+}
+
+declare <vscale x 2 x bfloat> @llvm.vp.floor.nxv2bf16(<vscale x 2 x bfloat>, <vscale x 2 x i1>, i32)
+
+define <vscale x 2 x bfloat> @vp_floor_nxv2bf16(<vscale x 2 x bfloat> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vp_floor_nxv2bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, zero, e16, mf2, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8
+; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-NEXT: vfabs.v v8, v9, v0.t
+; CHECK-NEXT: lui a0, 307200
+; CHECK-NEXT: fmv.w.x fa5, a0
+; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu
+; CHECK-NEXT: vmflt.vf v0, v8, fa5, v0.t
+; CHECK-NEXT: fsrmi a0, 2
+; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; CHECK-NEXT: vfcvt.x.f.v v8, v9, v0.t
+; CHECK-NEXT: fsrm a0
+; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu
+; CHECK-NEXT: vfsgnj.vv v9, v8, v9, v0.t
+; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9
+; CHECK-NEXT: ret
+ %v = call <vscale x 2 x bfloat> @llvm.vp.floor.nxv2bf16(<vscale x 2 x bfloat> %va, <vscale x 2 x i1> %m, i32 %evl)
+ ret <vscale x 2 x bfloat> %v
+}
+
+define <vscale x 2 x bfloat> @vp_floor_nxv2bf16_unmasked(<vscale x 2 x bfloat> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vp_floor_nxv2bf16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, zero, e16, mf2, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8
+; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-NEXT: vfabs.v v8, v9
+; CHECK-NEXT: lui a0, 307200
+; CHECK-NEXT: fmv.w.x fa5, a0
+; CHECK-NEXT: vmflt.vf v0, v8, fa5
+; CHECK-NEXT: fsrmi a0, 2
+; CHECK-NEXT: vfcvt.x.f.v v8, v9, v0.t
+; CHECK-NEXT: fsrm a0
+; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu
+; CHECK-NEXT: vfsgnj.vv v9, v8, v9, v0.t
+; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9
+; CHECK-NEXT: ret
+ %v = call <vscale x 2 x bfloat> @llvm.vp.floor.nxv2bf16(<vscale x 2 x bfloat> %va, <vscale x 2 x i1> splat (i1 true), i32 %evl)
+ ret <vscale x 2 x bfloat> %v
+}
+
+declare <vscale x 4 x bfloat> @llvm.vp.floor.nxv4bf16(<vscale x 4 x bfloat>, <vscale x 4 x i1>, i32)
+
+define <vscale x 4 x bfloat> @vp_floor_nxv4bf16(<vscale x 4 x bfloat> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vp_floor_nxv4bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vmv1r.v v9, v0
+; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8
+; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; CHECK-NEXT: vfabs.v v12, v10, v0.t
+; CHECK-NEXT: lui a0, 307200
+; CHECK-NEXT: fmv.w.x fa5, a0
+; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu
+; CHECK-NEXT: vmflt.vf v9, v12, fa5, v0.t
+; CHECK-NEXT: fsrmi a0, 2
+; CHECK-NEXT: vmv1r.v v0, v9
+; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; CHECK-NEXT: vfcvt.x.f.v v12, v10, v0.t
+; CHECK-NEXT: fsrm a0
+; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu
+; CHECK-NEXT: vfsgnj.vv v10, v12, v10, v0.t
+; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10
+; CHECK-NEXT: ret
+ %v = call <vscale x 4 x bfloat> @llvm.vp.floor.nxv4bf16(<vscale x 4 x bfloat> %va, <vscale x 4 x i1> %m, i32 %evl)
+ ret <vscale x 4 x bfloat> %v
+}
+
+define <vscale x 4 x bfloat> @vp_floor_nxv4bf16_unmasked(<vscale x 4 x bfloat> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vp_floor_nxv4bf16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8
+; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; CHECK-NEXT: vfabs.v v8, v10
+; CHECK-NEXT: lui a0, 307200
+; CHECK-NEXT: fmv.w.x fa5, a0
+; CHECK-NEXT: vmflt.vf v0, v8, fa5
+; CHECK-NEXT: fsrmi a0, 2
+; CHECK-NEXT: vfcvt.x.f.v v8, v10, v0.t
+; CHECK-NEXT: fsrm a0
+; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu
+; CHECK-NEXT: vfsgnj.vv v10, v8, v10, v0.t
+; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10
+; CHECK-NEXT: ret
+ %v = call <vscale x 4 x bfloat> @llvm.vp.floor.nxv4bf16(<vscale x 4 x bfloat> %va, <vscale x 4 x i1> splat (i1 true), i32 %evl)
+ ret <vscale x 4 x bfloat> %v
+}
+
+declare <vscale x 8 x bfloat> @llvm.vp.floor.nxv8bf16(<vscale x 8 x bfloat>, <vscale x 8 x i1>, i32)
+
+define <vscale x 8 x bfloat> @vp_floor_nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vp_floor_nxv8bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vmv1r.v v10, v0
+; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8
+; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-NEXT: vfabs.v v16, v12, v0.t
+; CHECK-NEXT: lui a0, 307200
+; CHECK-NEXT: fmv.w.x fa5, a0
+; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu
+; CHECK-NEXT: vmflt.vf v10, v16, fa5, v0.t
+; CHECK-NEXT: fsrmi a0, 2
+; CHECK-NEXT: vmv1r.v v0, v10
+; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; CHECK-NEXT: vfcvt.x.f.v v16, v12, v0.t
+; CHECK-NEXT: fsrm a0
+; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu
+; CHECK-NEXT: vfsgnj.vv v12, v16, v12, v0.t
+; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12
+; CHECK-NEXT: ret
+ %v = call <vscale x 8 x bfloat> @llvm.vp.floor.nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x i1> %m, i32 %evl)
+ ret <vscale x 8 x bfloat> %v
+}
+
+define <vscale x 8 x bfloat> @vp_floor_nxv8bf16_unmasked(<vscale x 8 x bfloat> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vp_floor_nxv8bf16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8
+; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-NEXT: vfabs.v v8, v12
+; CHECK-NEXT: lui a0, 307200
+; CHECK-NEXT: fmv.w.x fa5, a0
+; CHECK-NEXT: vmflt.vf v0, v8, fa5
+; CHECK-NEXT: fsrmi a0, 2
+; CHECK-NEXT: vfcvt.x.f.v v8, v12, v0.t
+; CHECK-NEXT: fsrm a0
+; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu
+; CHECK-NEXT: vfsgnj.vv v12, v8, v12, v0.t
+; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12
+; CHECK-NEXT: ret
+ %v = call <vscale x 8 x bfloat> @llvm.vp.floor.nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x i1> splat (i1 true), i32 %evl)
+ ret <vscale x 8 x bfloat> %v
+}
+
+declare <vscale x 16 x bfloat> @llvm.vp.floor.nxv16bf16(<vscale x 16 x bfloat>, <vscale x 16 x i1>, i32)
+
+define <vscale x 16 x bfloat> @vp_floor_nxv16bf16(<vscale x 16 x bfloat> %va, <vscale x 16 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vp_floor_nxv16bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vmv1r.v v12, v0
+; CHECK-NEXT: vsetvli a1, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8
+; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma
+; CHECK-NEXT: vfabs.v v24, v16, v0.t
+; CHECK-NEXT: lui a0, 307200
+; CHECK-NEXT: fmv.w.x fa5, a0
+; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu
+; CHECK-NEXT: vmflt.vf v12, v24, fa5, v0.t
+; CHECK-NEXT: fsrmi a0, 2
+; CHECK-NEXT: vmv1r.v v0, v12
+; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t
+; CHECK-NEXT: fsrm a0
+; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu
+; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t
+; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16
+; CHECK-NEXT: ret
+ %v = call <vscale x 16 x bfloat> @llvm.vp.floor.nxv16bf16(<vscale x 16 x bfloat> %va, <vscale x 16 x i1> %m, i32 %evl)
+ ret <vscale x 16 x bfloat> %v
+}
+
+define <vscale x 16 x bfloat> @vp_floor_nxv16bf16_unmasked(<vscale x 16 x bfloat> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vp_floor_nxv16bf16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8
+; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma
+; CHECK-NEXT: vfabs.v v8, v16
+; CHECK-NEXT: lui a0, 307200
+; CHECK-NEXT: fmv.w.x fa5, a0
+; CHECK-NEXT: vmflt.vf v0, v8, fa5
+; CHECK-NEXT: fsrmi a0, 2
+; CHECK-NEXT: vfcvt.x.f.v v8, v16, v0.t
+; CHECK-NEXT: fsrm a0
+; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu
+; CHECK-NEXT: vfsgnj.vv v16, v8, v16, v0.t
+; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16
+; CHECK-NEXT: ret
+ %v = call <vscale x 16 x bfloat> @llvm.vp.floor.nxv16bf16(<vscale x 16 x bfloat> %va, <vscale x 16 x i1> splat (i1 true), i32 %evl)
+ ret <vscale x 16 x bfloat> %v
+}
+
+declare <vscale x 32 x bfloat> @llvm.vp.floor.nxv32bf16(<vscale x 32 x bfloat>, <vscale x 32 x i1>, i32)
+
+define <vscale x 32 x bfloat> @vp_floor_nxv32bf16(<vscale x 32 x bfloat> %va, <vscale x 32 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vp_floor_nxv32bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi sp, sp, -16
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: csrr a1, vlenb
+; CHECK-NEXT: slli a1, a1, 3
+; CHECK-NEXT: sub sp, sp, a1
+; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
+; CHECK-NEXT: csrr a2, vlenb
+; CHECK-NEXT: slli a1, a2, 1
+; CHECK-NEXT: sub a3, a0, a1
+; CHECK-NEXT: sltu a4, a0, a3
+; CHECK-NEXT: addi a4, a4, -1
+; CHECK-NEXT: and a3, a4, a3
+; CHECK-NEXT: srli a2, a2, 2
+; CHECK-NEXT: vmv1r.v v16, v0
+; CHECK-NEXT: vsetvli a4, zero, e8, mf2, ta, ma
+; CHECK-NEXT: vslidedown.vx v17, v0, a2
+; CHECK-NEXT: addi a2, sp, 16
+; CHECK-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill
+; CHECK-NEXT: vsetvli a2, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v12
+; CHECK-NEXT: vmv1r.v v0, v17
+; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, ma
+; CHECK-NEXT: vfabs.v v8, v24, v0.t
+; CHECK-NEXT: lui a2, 307200
+; CHECK-NEXT: fmv.w.x fa5, a2
+; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu
+; CHECK-NEXT: vmflt.vf v17, v8, fa5, v0.t
+; CHECK-NEXT: fsrmi a2, 2
+; CHECK-NEXT: vmv1r.v v0, v17
+; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; CHECK-NEXT: vfcvt.x.f.v v8, v24, v0.t
+; CHECK-NEXT: fsrm a2
+; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu
+; CHECK-NEXT: vfsgnj.vv v24, v8, v24, v0.t
+; CHECK-NEXT: vsetvli a2, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v12, v24
+; CHECK-NEXT: bltu a0, a1, .LBB10_2
+; CHECK-NEXT: # %bb.1:
+; CHECK-NEXT: mv a0, a1
+; CHECK-NEXT: .LBB10_2:
+; CHECK-NEXT: addi a1, sp, 16
+; CHECK-NEXT: vl8r.v v0, (a1) # Unknown-size Folded Reload
+; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v0
+; CHECK-NEXT: vmv1r.v v8, v16
+; CHECK-NEXT: vmv1r.v v0, v16
+; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma
+; CHECK-NEXT: vfabs.v v16, v24, v0.t
+; CHECK-NEXT: addi a0, sp, 16
+; CHECK-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill
+; CHECK-NEXT: vmv1r.v v0, v8
+; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
+; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu
+; CHECK-NEXT: vmflt.vf v8, v16, fa5, v0.t
+; CHECK-NEXT: fsrmi a0, 2
+; CHECK-NEXT: vmv1r.v v0, v8
+; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; CHECK-NEXT: vfcvt.x.f.v v16, v24, v0.t
+; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill
+; CHECK-NEXT: fsrm a0
+; CHECK-NEXT: addi a0, sp, 16
+; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
+; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu
+; CHECK-NEXT: vfsgnj.vv v24, v16, v24, v0.t
+; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v24
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: slli a0, a0, 3
+; CHECK-NEXT: add sp, sp, a0
+; CHECK-NEXT: addi sp, sp, 16
+; CHECK-NEXT: ret
+ %v = call <vscale x 32 x bfloat> @llvm.vp.floor.nxv32bf16(<vscale x 32 x bfloat> %va, <vscale x 32 x i1> %m, i32 %evl)
+ ret <vscale x 32 x bfloat> %v
+}
+
+define <vscale x 32 x bfloat> @vp_floor_nxv32bf16_unmasked(<vscale x 32 x bfloat> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vp_floor_nxv32bf16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi sp, sp, -16
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: csrr a1, vlenb
+; CHECK-NEXT: slli a1, a1, 3
+; CHECK-NEXT: sub sp, sp, a1
+; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
+; CHECK-NEXT: csrr a2, vlenb
+; CHECK-NEXT: slli a1, a2, 1
+; CHECK-NEXT: sub a3, a0, a1
+; CHECK-NEXT: sltu a4, a0, a3
+; CHECK-NEXT: addi a4, a4, -1
+; CHECK-NEXT: and a3, a4, a3
+; CHECK-NEXT: srli a2, a2, 2
+; CHECK-NEXT: vsetvli a4, zero, e8, m4, ta, ma
+; CHECK-NEXT: vmset.m v16
+; CHECK-NEXT: vsetvli a4, zero, e8, mf2, ta, ma
+; CHECK-NEXT: vslidedown.vx v16, v16, a2
+; CHECK-NEXT: addi a2, sp, 16
+; CHECK-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill
+; CHECK-NEXT: vsetvli a2, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v12
+; CHECK-NEXT: vmv1r.v v0, v16
+; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, ma
+; CHECK-NEXT: vfabs.v v8, v24, v0.t
+; CHECK-NEXT: lui a2, 307200
+; CHECK-NEXT: fmv.w.x fa5, a2
+; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu
+; CHECK-NEXT: vmflt.vf v16, v8, fa5, v0.t
+; CHECK-NEXT: fsrmi a2, 2
+; CHECK-NEXT: vmv1r.v v0, v16
+; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; CHECK-NEXT: vfcvt.x.f.v v8, v24, v0.t
+; CHECK-NEXT: fsrm a2
+; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu
+; CHECK-NEXT: vfsgnj.vv v24, v8, v24, v0.t
+; CHECK-NEXT: vsetvli a2, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v12, v24
+; CHECK-NEXT: bltu a0, a1, .LBB11_2
+; CHECK-NEXT: # %bb.1:
+; CHECK-NEXT: mv a0, a1
+; CHECK-NEXT: .LBB11_2:
+; CHECK-NEXT: addi a1, sp, 16
+; CHECK-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v24
+; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma
+; CHECK-NEXT: vfabs.v v24, v16
+; CHECK-NEXT: vmflt.vf v0, v24, fa5
+; CHECK-NEXT: fsrmi a0, 2
+; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t
+; CHECK-NEXT: fsrm a0
+; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu
+; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t
+; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: slli a0, a0, 3
+; CHECK-NEXT: add sp, sp, a0
+; CHECK-NEXT: addi sp, sp, 16
+; CHECK-NEXT: ret
+ %v = call <vscale x 32 x bfloat> @llvm.vp.floor.nxv32bf16(<vscale x 32 x bfloat> %va, <vscale x 32 x i1> splat (i1 true), i32 %evl)
+ ret <vscale x 32 x bfloat> %v
+}
declare <vscale x 1 x half> @llvm.vp.floor.nxv1f16(<vscale x 1 x half>, <vscale x 1 x i1>, i32)
define <vscale x 1 x half> @vp_floor_nxv1f16(<vscale x 1 x half> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) {
; ZVFH-LABEL: vp_floor_nxv1f16:
; ZVFH: # %bb.0:
-; ZVFH-NEXT: lui a1, %hi(.LCPI0_0)
-; ZVFH-NEXT: flh fa5, %lo(.LCPI0_0)(a1)
+; ZVFH-NEXT: lui a1, %hi(.LCPI12_0)
+; ZVFH-NEXT: flh fa5, %lo(.LCPI12_0)(a1)
; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
; ZVFH-NEXT: vfabs.v v9, v8, v0.t
; ZVFH-NEXT: vsetvli zero, zero, e16, mf4, ta, mu
@@ -57,8 +463,8 @@ define <vscale x 1 x half> @vp_floor_nxv1f16(<vscale x 1 x half> %va, <vscale x
define <vscale x 1 x half> @vp_floor_nxv1f16_unmasked(<vscale x 1 x half> %va, i32 zeroext %evl) {
; ZVFH-LABEL: vp_floor_nxv1f16_unmasked:
; ZVFH: # %bb.0:
-; ZVFH-NEXT: lui a1, %hi(.LCPI1_0)
-; ZVFH-NEXT: flh fa5, %lo(.LCPI1_0)(a1)
+; ZVFH-NEXT: lui a1, %hi(.LCPI13_0)
+; ZVFH-NEXT: flh fa5, %lo(.LCPI13_0)(a1)
; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
; ZVFH-NEXT: vfabs.v v9, v8
; ZVFH-NEXT: vmflt.vf v0, v9, fa5
@@ -97,8 +503,8 @@ declare <vscale x 2 x half> @llvm.vp.floor.nxv2f16(<vscale x 2 x half>, <vscale
define <vscale x 2 x half> @vp_floor_nxv2f16(<vscale x 2 x half> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) {
; ZVFH-LABEL: vp_floor_nxv2f16:
; ZVFH: # %bb.0:
-; ZVFH-NEXT: lui a1, %hi(.LCPI2_0)
-; ZVFH-NEXT: flh fa5, %lo(.LCPI2_0)(a1)
+; ZVFH-NEXT: lui a1, %hi(.LCPI14_0)
+; ZVFH-NEXT: flh fa5, %lo(.LCPI14_0)(a1)
; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
; ZVFH-NEXT: vfabs.v v9, v8, v0.t
; ZVFH-NEXT: vsetvli zero, zero, e16, mf2, ta, mu
@@ -139,8 +545,8 @@ define <vscale x 2 x half> @vp_floor_nxv2f16(<vscale x 2 x half> %va, <vscale x
define <vscale x 2 x half> @vp_floor_nxv2f16_unmasked(<vscale x 2 x half> %va, i32 zeroext %evl) {
; ZVFH-LABEL: vp_floor_nxv2f16_unmasked:
; ZVFH: # %bb.0:
-; ZVFH-NEXT: lui a1, %hi(.LCPI3_0)
-; ZVFH-NEXT: flh fa5, %lo(.LCPI3_0)(a1)
+; ZVFH-NEXT: lui a1, %hi(.LCPI15_0)
+; ZVFH-NEXT: flh fa5, %lo(.LCPI15_0)(a1)
; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
; ZVFH-NEXT: vfabs.v v9, v8
; ZVFH-NEXT: vmflt.vf v0, v9, fa5
@@ -179,8 +585,8 @@ declare <vscale x 4 x half> @llvm.vp.floor.nxv4f16(<vscale x 4 x half>, <vscale
define <vscale x 4 x half> @vp_floor_nxv4f16(<vscale x 4 x half> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) {
; ZVFH-LABEL: vp_floor_nxv4f16:
; ZVFH: # %bb.0:
-; ZVFH-NEXT: lui a1, %hi(.LCPI4_0)
-; ZVFH-NEXT: flh fa5, %lo(.LCPI4_0)(a1)
+; ZVFH-NEXT: lui a1, %hi(.LCPI16_0)
+; ZVFH-NEXT: flh fa5, %lo(.LCPI16_0)(a1)
; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma
; ZVFH-NEXT: vfabs.v v9, v8, v0.t
; ZVFH-NEXT: vsetvli zero, zero, e16, m1, ta, mu
@@ -223,8 +629,8 @@ define <vscale x 4 x half> @vp_floor_nxv4f16(<vscale x 4 x half> %va, <vscale x
define <vscale x 4 x half> @vp_floor_nxv4f16_unmasked(<vscale x 4 x half> %va, i32 zeroext %evl) {
; ZVFH-LABEL: vp_floor_nxv4f16_unmasked:
; ZVFH: # %bb.0:
-; ZVFH-NEXT: lui a1, %hi(.LCPI5_0)
-; ZVFH-NEXT: flh fa5, %lo(.LCPI5_0)(a1)
+; ZVFH-NEXT: lui a1, %hi(.LCPI17_0)
+; ZVFH-NEXT: flh fa5, %lo(.LCPI17_0)(a1)
; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma
; ZVFH-NEXT: vfabs.v v9, v8
; ZVFH-NEXT: vmflt.vf v0, v9, fa5
@@ -263,8 +669,8 @@ declare <vscale x 8 x half> @llvm.vp.floor.nxv8f16(<vscale x 8 x half>, <vscale
define <vscale x 8 x half> @vp_floor_nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) {
; ZVFH-LABEL: vp_floor_nxv8f16:
; ZVFH: # %bb.0:
-; ZVFH-NEXT: lui a1, %hi(.LCPI6_0)
-; ZVFH-NEXT: flh fa5, %lo(.LCPI6_0)(a1)
+; ZVFH-NEXT: lui a1, %hi(.LCPI18_0)
+; ZVFH-NEXT: flh fa5, %lo(.LCPI18_0)(a1)
; ZVFH-NEXT: vmv1r.v v10, v0
; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma
; ZVFH-NEXT: vfabs.v v12, v8, v0.t
@@ -309,8 +715,8 @@ define <vscale x 8 x half> @vp_floor_nxv8f16(<vscale x 8 x half> %va, <vscale x
define <vscale x 8 x half> @vp_floor_nxv8f16_unmasked(<vscale x 8 x half> %va, i32 zeroext %evl) {
; ZVFH-LABEL: vp_floor_nxv8f16_unmasked:
; ZVFH: # %bb.0:
-; ZVFH-NEXT: lui a1, %hi(.LCPI7_0)
-; ZVFH-NEXT: flh fa5, %lo(.LCPI7_0)(a1)
+; ZVFH-NEXT: lui a1, %hi(.LCPI19_0)
+; ZVFH-NEXT: flh fa5, %lo(.LCPI19_0)(a1)
; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma
; ZVFH-NEXT: vfabs.v v10, v8
; ZVFH-NEXT: vmflt.vf v0, v10, fa5
@@ -349,8 +755,8 @@ declare <vscale x 16 x half> @llvm.vp.floor.nxv16f16(<vscale x 16 x half>, <vsca
define <vscale x 16 x half> @vp_floor_nxv16f16(<vscale x 16 x half> %va, <vscale x 16 x i1> %m, i32 zeroext %evl) {
; ZVFH-LABEL: vp_floor_nxv16f16:
; ZVFH: # %bb.0:
-; ZVFH-NEXT: lui a1, %hi(.LCPI8_0)
-; ZVFH-NEXT: flh fa5, %lo(.LCPI8_0)(a1)
+; ZVFH-NEXT: lui a1, %hi(.LCPI20_0)
+; ZVFH-NEXT: flh fa5, %lo(.LCPI20_0)(a1)
; ZVFH-NEXT: vmv1r.v v12, v0
; ZVFH-NEXT: vsetvli zero, a0, e16, m4, ta, ma
; ZVFH-NEXT: vfabs.v v16, v8, v0.t
@@ -395,8 +801,8 @@ define <vscale x 16 x half> @vp_floor_nxv16f16(<vscale x 16 x half> %va, <vscale
define <vscale x 16 x half> @vp_floor_nxv16f16_unmasked(<vscale x 16 x half> %va, i32 zeroext %evl) {
; ZVFH-LABEL: vp_floor_nxv16f16_unmasked:
; ZVFH: # %bb.0:
-; ZVFH-NEXT: lui a1, %hi(.LCPI9_0)
-; ZVFH-NEXT: flh fa5, %lo(.LCPI9_0)(a1)
+; ZVFH-NEXT: lui a1, %hi(.LCPI21_0)
+; ZVFH-NEXT: flh fa5, %lo(.LCPI21_0)(a1)
; ZVFH-NEXT: vsetvli zero, a0, e16, m4, ta, ma
; ZVFH-NEXT: vfabs.v v12, v8
; ZVFH-NEXT: vmflt.vf v0, v12, fa5
@@ -435,8 +841,8 @@ declare <vscale x 32 x half> @llvm.vp.floor.nxv32f16(<vscale x 32 x half>, <vsca
define <vscale x 32 x half> @vp_floor_nxv32f16(<vscale x 32 x half> %va, <vscale x 32 x i1> %m, i32 zeroext %evl) {
; ZVFH-LABEL: vp_floor_nxv32f16:
; ZVFH: # %bb.0:
-; ZVFH-NEXT: lui a1, %hi(.LCPI10_0)
-; ZVFH-NEXT: flh fa5, %lo(.LCPI10_0)(a1)
+; ZVFH-NEXT: lui a1, %hi(.LCPI22_0)
+; ZVFH-NEXT: flh fa5, %lo(.LCPI22_0)(a1)
; ZVFH-NEXT: vmv1r.v v16, v0
; ZVFH-NEXT: vsetvli zero, a0, e16, m8, ta, ma
; ZVFH-NEXT: vfabs.v v24, v8, v0.t
@@ -491,10 +897,10 @@ define <vscale x 32 x half> @vp_floor_nxv32f16(<vscale x 32 x half> %va, <vscale
; ZVFHMIN-NEXT: vfsgnj.vv v24, v8, v24, v0.t
; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v24
-; ZVFHMIN-NEXT: bltu a0, a1, .LBB10_2
+; ZVFHMIN-NEXT: bltu a0, a1, .LBB22_2
; ZVFHMIN-NEXT: # %bb.1:
; ZVFHMIN-NEXT: mv a0, a1
-; ZVFHMIN-NEXT: .LBB10_2:
+; ZVFHMIN-NEXT: .LBB22_2:
; ZVFHMIN-NEXT: addi a1, sp, 16
; ZVFHMIN-NEXT: vl8r.v v0, (a1) # Unknown-size Folded Reload
; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v0
@@ -533,8 +939,8 @@ define <vscale x 32 x half> @vp_floor_nxv32f16(<vscale x 32 x half> %va, <vscale
define <vscale x 32 x half> @vp_floor_nxv32f16_unmasked(<vscale x 32 x half> %va, i32 zeroext %evl) {
; ZVFH-LABEL: vp_floor_nxv32f16_unmasked:
; ZVFH: # %bb.0:
-; ZVFH-NEXT: lui a1, %hi(.LCPI11_0)
-; ZVFH-NEXT: flh fa5, %lo(.LCPI11_0)(a1)
+; ZVFH-NEXT: lui a1, %hi(.LCPI23_0)
+; ZVFH-NEXT: flh fa5, %lo(.LCPI23_0)(a1)
; ZVFH-NEXT: vsetvli zero, a0, e16, m8, ta, ma
; ZVFH-NEXT: vfabs.v v16, v8
; ZVFH-NEXT: vmflt.vf v0, v16, fa5
@@ -586,10 +992,10 @@ define <vscale x 32 x half> @vp_floor_nxv32f16_unmasked(<vscale x 32 x half> %va
; ZVFHMIN-NEXT: vfsgnj.vv v24, v8, v24, v0.t
; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v24
-; ZVFHMIN-NEXT: bltu a0, a1, .LBB11_2
+; ZVFHMIN-NEXT: bltu a0, a1, .LBB23_2
; ZVFHMIN-NEXT: # %bb.1:
; ZVFHMIN-NEXT: mv a0, a1
-; ZVFHMIN-NEXT: .LBB11_2:
+; ZVFHMIN-NEXT: .LBB23_2:
; ZVFHMIN-NEXT: addi a1, sp, 16
; ZVFHMIN-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload
; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v24
@@ -834,8 +1240,8 @@ declare <vscale x 1 x double> @llvm.vp.floor.nxv1f64(<vscale x 1 x double>, <vsc
define <vscale x 1 x double> @vp_floor_nxv1f64(<vscale x 1 x double> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_floor_nxv1f64:
; CHECK: # %bb.0:
-; CHECK-NEXT: lui a1, %hi(.LCPI22_0)
-; CHECK-NEXT: fld fa5, %lo(.LCPI22_0)(a1)
+; CHECK-NEXT: lui a1, %hi(.LCPI34_0)
+; CHECK-NEXT: fld fa5, %lo(.LCPI34_0)(a1)
; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma
; CHECK-NEXT: vfabs.v v9, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu
@@ -855,8 +1261,8 @@ define <vscale x 1 x double> @vp_floor_nxv1f64(<vscale x 1 x double> %va, <vscal
define <vscale x 1 x double> @vp_floor_nxv1f64_unmasked(<vscale x 1 x double> %va, i32 zeroext %evl) {
; CHECK-LABEL: vp_floor_nxv1f64_unmasked:
; CHECK: # %bb.0:
-; CHECK-NEXT: lui a1, %hi(.LCPI23_0)
-; CHECK-NEXT: fld fa5, %lo(.LCPI23_0)(a1)
+; CHECK-NEXT: lui a1, %hi(.LCPI35_0)
+; CHECK-NEXT: fld fa5, %lo(.LCPI35_0)(a1)
; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma
; CHECK-NEXT: vfabs.v v9, v8
; CHECK-NEXT: vmflt.vf v0, v9, fa5
@@ -876,8 +1282,8 @@ declare <vscale x 2 x double> @llvm.vp.floor.nxv2f64(<vscale x 2 x double>, <vsc
define <vscale x 2 x double> @vp_floor_nxv2f64(<vscale x 2 x double> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_floor_nxv2f64:
; CHECK: # %bb.0:
-; CHECK-NEXT: lui a1, %hi(.LCPI24_0)
-; CHECK-NEXT: fld fa5, %lo(.LCPI24_0)(a1)
+; CHECK-NEXT: lui a1, %hi(.LCPI36_0)
+; CHECK-NEXT: fld fa5, %lo(.LCPI36_0)(a1)
; CHECK-NEXT: vmv1r.v v10, v0
; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma
; CHECK-NEXT: vfabs.v v12, v8, v0.t
@@ -899,8 +1305,8 @@ define <vscale x 2 x double> @vp_floor_nxv2f64(<vscale x 2 x double> %va, <vscal
define <vscale x 2 x double> @vp_floor_nxv2f64_unmasked(<vscale x 2 x double> %va, i32 zeroext %evl) {
; CHECK-LABEL: vp_floor_nxv2f64_unmasked:
; CHECK: # %bb.0:
-; CHECK-NEXT: lui a1, %hi(.LCPI25_0)
-; CHECK-NEXT: fld fa5, %lo(.LCPI25_0)(a1)
+; CHECK-NEXT: lui a1, %hi(.LCPI37_0)
+; CHECK-NEXT: fld fa5, %lo(.LCPI37_0)(a1)
; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma
; CHECK-NEXT: vfabs.v v10, v8
; CHECK-NEXT: vmflt.vf v0, v10, fa5
@@ -920,8 +1326,8 @@ declare <vscale x 4 x double> @llvm.vp.floor.nxv4f64(<vscale x 4 x double>, <vsc
define <vscale x 4 x double> @vp_floor_nxv4f64(<vscale x 4 x double> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_floor_nxv4f64:
; CHECK: # %bb.0:
-; CHECK-NEXT: lui a1, %hi(.LCPI26_0)
-; CHECK-NEXT: fld fa5, %lo(.LCPI26_0)(a1)
+; CHECK-NEXT: lui a1, %hi(.LCPI38_0)
+; CHECK-NEXT: fld fa5, %lo(.LCPI38_0)(a1)
; CHECK-NEXT: vmv1r.v v12, v0
; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma
; CHECK-NEXT: vfabs.v v16, v8, v0.t
@@ -943,8 +1349,8 @@ define <vscale x 4 x double> @vp_floor_nxv4f64(<vscale x 4 x double> %va, <vscal
define <vscale x 4 x double> @vp_floor_nxv4f64_unmasked(<vscale x 4 x double> %va, i32 zeroext %evl) {
; CHECK-LABEL: vp_floor_nxv4f64_unmasked:
; CHECK: # %bb.0:
-; CHECK-NEXT: lui a1, %hi(.LCPI27_0)
-; CHECK-NEXT: fld fa5, %lo(.LCPI27_0)(a1)
+; CHECK-NEXT: lui a1, %hi(.LCPI39_0)
+; CHECK-NEXT: fld fa5, %lo(.LCPI39_0)(a1)
; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma
; CHECK-NEXT: vfabs.v v12, v8
; CHECK-NEXT: vmflt.vf v0, v12, fa5
@@ -964,8 +1370,8 @@ declare <vscale x 7 x double> @llvm.vp.floor.nxv7f64(<vscale x 7 x double>, <vsc
define <vscale x 7 x double> @vp_floor_nxv7f64(<vscale x 7 x double> %va, <vscale x 7 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_floor_nxv7f64:
; CHECK: # %bb.0:
-; CHECK-NEXT: lui a1, %hi(.LCPI28_0)
-; CHECK-NEXT: fld fa5, %lo(.LCPI28_0)(a1)
+; CHECK-NEXT: lui a1, %hi(.LCPI40_0)
+; CHECK-NEXT: fld fa5, %lo(.LCPI40_0)(a1)
; CHECK-NEXT: vmv1r.v v16, v0
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; CHECK-NEXT: vfabs.v v24, v8, v0.t
@@ -987,8 +1393,8 @@ define <vscale x 7 x double> @vp_floor_nxv7f64(<vscale x 7 x double> %va, <vscal
define <vscale x 7 x double> @vp_floor_nxv7f64_unmasked(<vscale x 7 x double> %va, i32 zeroext %evl) {
; CHECK-LABEL: vp_floor_nxv7f64_unmasked:
; CHECK: # %bb.0:
-; CHECK-NEXT: lui a1, %hi(.LCPI29_0)
-; CHECK-NEXT: fld fa5, %lo(.LCPI29_0)(a1)
+; CHECK-NEXT: lui a1, %hi(.LCPI41_0)
+; CHECK-NEXT: fld fa5, %lo(.LCPI41_0)(a1)
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; CHECK-NEXT: vfabs.v v16, v8
; CHECK-NEXT: vmflt.vf v0, v16, fa5
@@ -1008,8 +1414,8 @@ declare <vscale x 8 x double> @llvm.vp.floor.nxv8f64(<vscale x 8 x double>, <vsc
define <vscale x 8 x double> @vp_floor_nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_floor_nxv8f64:
; CHECK: # %bb.0:
-; CHECK-NEXT: lui a1, %hi(.LCPI30_0)
-; CHECK-NEXT: fld fa5, %lo(.LCPI30_0)(a1)
+; CHECK-NEXT: lui a1, %hi(.LCPI42_0)
+; CHECK-NEXT: fld fa5, %lo(.LCPI42_0)(a1)
; CHECK-NEXT: vmv1r.v v16, v0
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; CHECK-NEXT: vfabs.v v24, v8, v0.t
@@ -1031,8 +1437,8 @@ define <vscale x 8 x double> @vp_floor_nxv8f64(<vscale x 8 x double> %va, <vscal
define <vscale x 8 x double> @vp_floor_nxv8f64_unmasked(<vscale x 8 x double> %va, i32 zeroext %evl) {
; CHECK-LABEL: vp_floor_nxv8f64_unmasked:
; CHECK: # %bb.0:
-; CHECK-NEXT: lui a1, %hi(.LCPI31_0)
-; CHECK-NEXT: fld fa5, %lo(.LCPI31_0)(a1)
+; CHECK-NEXT: lui a1, %hi(.LCPI43_0)
+; CHECK-NEXT: fld fa5, %lo(.LCPI43_0)(a1)
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; CHECK-NEXT: vfabs.v v16, v8
; CHECK-NEXT: vmflt.vf v0, v16, fa5
@@ -1065,8 +1471,8 @@ define <vscale x 16 x double> @vp_floor_nxv16f64(<vscale x 16 x double> %va, <vs
; CHECK-NEXT: vsetvli a3, zero, e8, mf4, ta, ma
; CHECK-NEXT: vslidedown.vx v6, v0, a2
; CHECK-NEXT: sub a2, a0, a1
-; CHECK-NEXT: lui a3, %hi(.LCPI32_0)
-; CHECK-NEXT: fld fa5, %lo(.LCPI32_0)(a3)
+; CHECK-NEXT: lui a3, %hi(.LCPI44_0)
+; CHECK-NEXT: fld fa5, %lo(.LCPI44_0)(a3)
; CHECK-NEXT: sltu a3, a0, a2
; CHECK-NEXT: addi a3, a3, -1
; CHECK-NEXT: and a2, a3, a2
@@ -1087,10 +1493,10 @@ define <vscale x 16 x double> @vp_floor_nxv16f64(<vscale x 16 x double> %va, <vs
; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t
-; CHECK-NEXT: bltu a0, a1, .LBB32_2
+; CHECK-NEXT: bltu a0, a1, .LBB44_2
; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: mv a0, a1
-; CHECK-NEXT: .LBB32_2:
+; CHECK-NEXT: .LBB44_2:
; CHECK-NEXT: vmv1r.v v0, v7
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; CHECK-NEXT: vfabs.v v24, v8, v0.t
@@ -1118,8 +1524,8 @@ define <vscale x 16 x double> @vp_floor_nxv16f64_unmasked(<vscale x 16 x double>
; CHECK: # %bb.0:
; CHECK-NEXT: csrr a1, vlenb
; CHECK-NEXT: sub a2, a0, a1
-; CHECK-NEXT: lui a3, %hi(.LCPI33_0)
-; CHECK-NEXT: fld fa5, %lo(.LCPI33_0)(a3)
+; CHECK-NEXT: lui a3, %hi(.LCPI45_0)
+; CHECK-NEXT: fld fa5, %lo(.LCPI45_0)(a3)
; CHECK-NEXT: sltu a3, a0, a2
; CHECK-NEXT: addi a3, a3, -1
; CHECK-NEXT: and a2, a3, a2
@@ -1132,10 +1538,10 @@ define <vscale x 16 x double> @vp_floor_nxv16f64_unmasked(<vscale x 16 x double>
; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t
-; CHECK-NEXT: bltu a0, a1, .LBB33_2
+; CHECK-NEXT: bltu a0, a1, .LBB45_2
; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: mv a0, a1
-; CHECK-NEXT: .LBB33_2:
+; CHECK-NEXT: .LBB45_2:
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; CHECK-NEXT: vfabs.v v24, v8
; CHECK-NEXT: vmflt.vf v0, v24, fa5
diff --git a/llvm/test/CodeGen/RISCV/rvv/fmaximum-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/fmaximum-sdnode.ll
index 05896d8ef6ff..d8c3ab27cfad 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fmaximum-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fmaximum-sdnode.ll
@@ -1,21 +1,200 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v -target-abi=ilp32d \
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+zfbfmin,+zvfbfmin,+v -target-abi=ilp32d \
; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH
-; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v -target-abi=ilp32d \
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+zfbfmin,+zvfbfmin,+v -target-abi=ilp32d \
; RUN: -verify-machineinstrs -early-live-intervals < %s | FileCheck %s --check-prefixes=CHECK,ZVFH
-; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -target-abi=lp64d \
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+zfbfmin,+zvfbfmin,+v -target-abi=lp64d \
; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH
-; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -target-abi=lp64d \
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+zfbfmin,+zvfbfmin,+v -target-abi=lp64d \
; RUN: -verify-machineinstrs -early-live-intervals < %s | FileCheck %s --check-prefixes=CHECK,ZVFH
-; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfhmin,+v,+m -target-abi=ilp32d \
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfhmin,+zfbfmin,+zvfbfmin,+v,+m -target-abi=ilp32d \
; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN
-; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfhmin,+v,+m -target-abi=ilp32d \
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfhmin,+zfbfmin,+zvfbfmin,+v,+m -target-abi=ilp32d \
; RUN: -verify-machineinstrs -early-live-intervals < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN
-; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfhmin,+v,+m -target-abi=lp64d \
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfhmin,+zfbfmin,+zvfbfmin,+v,+m -target-abi=lp64d \
; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN
-; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfhmin,+v,+m -target-abi=lp64d \
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfhmin,+zfbfmin,+zvfbfmin,+v,+m -target-abi=lp64d \
; RUN: -verify-machineinstrs -early-live-intervals < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN
+declare <vscale x 1 x bfloat> @llvm.maximum.nxv1bf16(<vscale x 1 x bfloat>, <vscale x 1 x bfloat>)
+
+define <vscale x 1 x bfloat> @vfmax_nxv1bf16_vv(<vscale x 1 x bfloat> %a, <vscale x 1 x bfloat> %b) {
+; CHECK-LABEL: vfmax_nxv1bf16_vv:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v9
+; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8
+; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
+; CHECK-NEXT: vmfeq.vv v0, v9, v9
+; CHECK-NEXT: vmfeq.vv v8, v10, v10
+; CHECK-NEXT: vmerge.vvm v11, v9, v10, v0
+; CHECK-NEXT: vmv1r.v v0, v8
+; CHECK-NEXT: vmerge.vvm v8, v10, v9, v0
+; CHECK-NEXT: vfmax.vv v9, v8, v11
+; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9
+; CHECK-NEXT: ret
+ %v = call <vscale x 1 x bfloat> @llvm.maximum.nxv1bf16(<vscale x 1 x bfloat> %a, <vscale x 1 x bfloat> %b)
+ ret <vscale x 1 x bfloat> %v
+}
+
+declare <vscale x 2 x bfloat> @llvm.maximum.nxv2bf16(<vscale x 2 x bfloat>, <vscale x 2 x bfloat>)
+
+define <vscale x 2 x bfloat> @vfmax_nxv2bf16_vv(<vscale x 2 x bfloat> %a, <vscale x 2 x bfloat> %b) {
+; CHECK-LABEL: vfmax_nxv2bf16_vv:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v9
+; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8
+; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; CHECK-NEXT: vmfeq.vv v0, v9, v9
+; CHECK-NEXT: vmfeq.vv v8, v10, v10
+; CHECK-NEXT: vmerge.vvm v11, v9, v10, v0
+; CHECK-NEXT: vmv.v.v v0, v8
+; CHECK-NEXT: vmerge.vvm v8, v10, v9, v0
+; CHECK-NEXT: vfmax.vv v9, v8, v11
+; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9
+; CHECK-NEXT: ret
+ %v = call <vscale x 2 x bfloat> @llvm.maximum.nxv2bf16(<vscale x 2 x bfloat> %a, <vscale x 2 x bfloat> %b)
+ ret <vscale x 2 x bfloat> %v
+}
+
+declare <vscale x 4 x bfloat> @llvm.maximum.nxv4bf16(<vscale x 4 x bfloat>, <vscale x 4 x bfloat>)
+
+define <vscale x 4 x bfloat> @vfmax_nxv4bf16_vv(<vscale x 4 x bfloat> %a, <vscale x 4 x bfloat> %b) {
+; CHECK-LABEL: vfmax_nxv4bf16_vv:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v9
+; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8
+; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; CHECK-NEXT: vmfeq.vv v0, v12, v12
+; CHECK-NEXT: vmfeq.vv v8, v10, v10
+; CHECK-NEXT: vmerge.vvm v14, v12, v10, v0
+; CHECK-NEXT: vmv1r.v v0, v8
+; CHECK-NEXT: vmerge.vvm v8, v10, v12, v0
+; CHECK-NEXT: vfmax.vv v10, v8, v14
+; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10
+; CHECK-NEXT: ret
+ %v = call <vscale x 4 x bfloat> @llvm.maximum.nxv4bf16(<vscale x 4 x bfloat> %a, <vscale x 4 x bfloat> %b)
+ ret <vscale x 4 x bfloat> %v
+}
+
+declare <vscale x 8 x bfloat> @llvm.maximum.nxv8bf16(<vscale x 8 x bfloat>, <vscale x 8 x bfloat>)
+
+define <vscale x 8 x bfloat> @vfmax_nxv8bf16_vv(<vscale x 8 x bfloat> %a, <vscale x 8 x bfloat> %b) {
+; CHECK-LABEL: vfmax_nxv8bf16_vv:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v10
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8
+; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; CHECK-NEXT: vmfeq.vv v0, v16, v16
+; CHECK-NEXT: vmfeq.vv v8, v12, v12
+; CHECK-NEXT: vmerge.vvm v20, v16, v12, v0
+; CHECK-NEXT: vmv1r.v v0, v8
+; CHECK-NEXT: vmerge.vvm v8, v12, v16, v0
+; CHECK-NEXT: vfmax.vv v12, v8, v20
+; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12
+; CHECK-NEXT: ret
+ %v = call <vscale x 8 x bfloat> @llvm.maximum.nxv8bf16(<vscale x 8 x bfloat> %a, <vscale x 8 x bfloat> %b)
+ ret <vscale x 8 x bfloat> %v
+}
+
+declare <vscale x 16 x bfloat> @llvm.maximum.nxv16bf16(<vscale x 16 x bfloat>, <vscale x 16 x bfloat>)
+
+define <vscale x 16 x bfloat> @vfmax_nxv16bf16_vv(<vscale x 16 x bfloat> %a, <vscale x 16 x bfloat> %b) {
+; CHECK-LABEL: vfmax_nxv16bf16_vv:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi sp, sp, -16
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: slli a0, a0, 3
+; CHECK-NEXT: sub sp, sp, a0
+; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
+; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v12
+; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v8
+; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; CHECK-NEXT: vmfeq.vv v0, v24, v24
+; CHECK-NEXT: vmfeq.vv v7, v16, v16
+; CHECK-NEXT: vmerge.vvm v8, v24, v16, v0
+; CHECK-NEXT: addi a0, sp, 16
+; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
+; CHECK-NEXT: vmv1r.v v0, v7
+; CHECK-NEXT: vmerge.vvm v8, v16, v24, v0
+; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
+; CHECK-NEXT: vfmax.vv v16, v8, v16
+; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: slli a0, a0, 3
+; CHECK-NEXT: add sp, sp, a0
+; CHECK-NEXT: addi sp, sp, 16
+; CHECK-NEXT: ret
+ %v = call <vscale x 16 x bfloat> @llvm.maximum.nxv16bf16(<vscale x 16 x bfloat> %a, <vscale x 16 x bfloat> %b)
+ ret <vscale x 16 x bfloat> %v
+}
+
+declare <vscale x 32 x bfloat> @llvm.maximum.nxv32bf16(<vscale x 32 x bfloat>, <vscale x 32 x bfloat>)
+
+define <vscale x 32 x bfloat> @vfmax_nxv32bf16_vv(<vscale x 32 x bfloat> %a, <vscale x 32 x bfloat> %b) nounwind {
+; CHECK-LABEL: vfmax_nxv32bf16_vv:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi sp, sp, -16
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: slli a0, a0, 4
+; CHECK-NEXT: sub sp, sp, a0
+; CHECK-NEXT: addi a0, sp, 16
+; CHECK-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill
+; CHECK-NEXT: vmv8r.v v0, v8
+; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v16
+; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v0
+; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; CHECK-NEXT: vmfeq.vv v0, v8, v8
+; CHECK-NEXT: vmfeq.vv v3, v24, v24
+; CHECK-NEXT: vmerge.vvm v16, v8, v24, v0
+; CHECK-NEXT: vmv1r.v v0, v3
+; CHECK-NEXT: vmerge.vvm v8, v24, v8, v0
+; CHECK-NEXT: vfmax.vv v8, v8, v16
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: slli a0, a0, 3
+; CHECK-NEXT: add a0, sp, a0
+; CHECK-NEXT: addi a0, a0, 16
+; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
+; CHECK-NEXT: addi a0, sp, 16
+; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
+; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v20
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v4
+; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; CHECK-NEXT: vmfeq.vv v0, v16, v16
+; CHECK-NEXT: vmfeq.vv v7, v8, v8
+; CHECK-NEXT: vmerge.vvm v24, v16, v8, v0
+; CHECK-NEXT: vmv1r.v v0, v7
+; CHECK-NEXT: vmerge.vvm v8, v8, v16, v0
+; CHECK-NEXT: vfmax.vv v16, v8, v24
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: slli a0, a0, 3
+; CHECK-NEXT: add a0, sp, a0
+; CHECK-NEXT: addi a0, a0, 16
+; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload
+; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v24
+; CHECK-NEXT: vfncvtbf16.f.f.w v12, v16
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: slli a0, a0, 4
+; CHECK-NEXT: add sp, sp, a0
+; CHECK-NEXT: addi sp, sp, 16
+; CHECK-NEXT: ret
+ %v = call <vscale x 32 x bfloat> @llvm.maximum.nxv32bf16(<vscale x 32 x bfloat> %a, <vscale x 32 x bfloat> %b)
+ ret <vscale x 32 x bfloat> %v
+}
+
declare <vscale x 1 x half> @llvm.maximum.nxv1f16(<vscale x 1 x half>, <vscale x 1 x half>)
define <vscale x 1 x half> @vfmax_nxv1f16_vv(<vscale x 1 x half> %a, <vscale x 1 x half> %b) {
diff --git a/llvm/test/CodeGen/RISCV/rvv/fmaximum-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fmaximum-vp.ll
index ab07fff59b21..320db35770cb 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fmaximum-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fmaximum-vp.ll
@@ -1,13 +1,541 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v,+m -target-abi=ilp32d \
-; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH
-; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v,+m -target-abi=lp64d \
-; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH
-; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfhmin,+v,+m -target-abi=ilp32d \
-; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN
-; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfhmin,+v,+m -target-abi=lp64d \
-; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+zfbfmin,+zvfbfmin,+v,+m \
+; RUN: -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s \
+; RUN: --check-prefixes=CHECK,ZVFH
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+zfbfmin,+zvfbfmin,+v,+m \
+; RUN: -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \
+; RUN: --check-prefixes=CHECK,ZVFH
+; RUN: llc -mtriple=riscv32 \
+; RUN: -mattr=+d,+zfh,+zvfhmin,+zfbfmin,+zvfbfmin,+v,+m \
+; RUN: -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s \
+; RUN: --check-prefixes=CHECK,ZVFHMIN
+; RUN: llc -mtriple=riscv64 \
+; RUN: -mattr=+d,+zfh,+zvfhmin,+zfbfmin,+zvfbfmin,+v,+m \
+; RUN: -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \
+; RUN: --check-prefixes=CHECK,ZVFHMIN
+declare <vscale x 1 x bfloat> @llvm.vp.maximum.nxv1bf16(<vscale x 1 x bfloat>, <vscale x 1 x bfloat>, <vscale x 1 x i1>, i32)
+
+define <vscale x 1 x bfloat> @vfmax_vv_nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %vb, <vscale x 1 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vfmax_vv_nxv1bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vmv1r.v v10, v0
+; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v11, v8
+; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-NEXT: vmfeq.vv v0, v11, v11, v0.t
+; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v9
+; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-NEXT: vmerge.vvm v9, v11, v8, v0
+; CHECK-NEXT: vmv1r.v v0, v10
+; CHECK-NEXT: vmfeq.vv v0, v8, v8, v0.t
+; CHECK-NEXT: vmerge.vvm v8, v8, v11, v0
+; CHECK-NEXT: vmv1r.v v0, v10
+; CHECK-NEXT: vfmax.vv v9, v8, v9, v0.t
+; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9
+; CHECK-NEXT: ret
+ %v = call <vscale x 1 x bfloat> @llvm.vp.maximum.nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %vb, <vscale x 1 x i1> %m, i32 %evl)
+ ret <vscale x 1 x bfloat> %v
+}
+
+define <vscale x 1 x bfloat> @vfmax_vv_nxv1bf16_unmasked(<vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %vb, i32 zeroext %evl) {
+; CHECK-LABEL: vfmax_vv_nxv1bf16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8
+; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-NEXT: vmfeq.vv v0, v10, v10
+; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v11, v9
+; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-NEXT: vmfeq.vv v8, v11, v11
+; CHECK-NEXT: vmerge.vvm v9, v10, v11, v0
+; CHECK-NEXT: vmv1r.v v0, v8
+; CHECK-NEXT: vmerge.vvm v8, v11, v10, v0
+; CHECK-NEXT: vfmax.vv v9, v8, v9
+; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9
+; CHECK-NEXT: ret
+ %v = call <vscale x 1 x bfloat> @llvm.vp.maximum.nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %vb, <vscale x 1 x i1> splat (i1 true), i32 %evl)
+ ret <vscale x 1 x bfloat> %v
+}
+
+declare <vscale x 2 x bfloat> @llvm.vp.maximum.nxv2bf16(<vscale x 2 x bfloat>, <vscale x 2 x bfloat>, <vscale x 2 x i1>, i32)
+
+define <vscale x 2 x bfloat> @vfmax_vv_nxv2bf16(<vscale x 2 x bfloat> %va, <vscale x 2 x bfloat> %vb, <vscale x 2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vfmax_vv_nxv2bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vmv1r.v v10, v0
+; CHECK-NEXT: vsetvli a1, zero, e16, mf2, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v11, v8
+; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-NEXT: vmfeq.vv v0, v11, v11, v0.t
+; CHECK-NEXT: vsetvli a1, zero, e16, mf2, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v9
+; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-NEXT: vmerge.vvm v9, v11, v8, v0
+; CHECK-NEXT: vmv1r.v v0, v10
+; CHECK-NEXT: vmfeq.vv v0, v8, v8, v0.t
+; CHECK-NEXT: vmerge.vvm v8, v8, v11, v0
+; CHECK-NEXT: vmv1r.v v0, v10
+; CHECK-NEXT: vfmax.vv v9, v8, v9, v0.t
+; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9
+; CHECK-NEXT: ret
+ %v = call <vscale x 2 x bfloat> @llvm.vp.maximum.nxv2bf16(<vscale x 2 x bfloat> %va, <vscale x 2 x bfloat> %vb, <vscale x 2 x i1> %m, i32 %evl)
+ ret <vscale x 2 x bfloat> %v
+}
+
+define <vscale x 2 x bfloat> @vfmax_vv_nxv2bf16_unmasked(<vscale x 2 x bfloat> %va, <vscale x 2 x bfloat> %vb, i32 zeroext %evl) {
+; CHECK-LABEL: vfmax_vv_nxv2bf16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, zero, e16, mf2, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8
+; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-NEXT: vmfeq.vv v0, v10, v10
+; CHECK-NEXT: vsetvli a1, zero, e16, mf2, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v11, v9
+; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-NEXT: vmfeq.vv v8, v11, v11
+; CHECK-NEXT: vmerge.vvm v9, v10, v11, v0
+; CHECK-NEXT: vmv.v.v v0, v8
+; CHECK-NEXT: vmerge.vvm v8, v11, v10, v0
+; CHECK-NEXT: vfmax.vv v9, v8, v9
+; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9
+; CHECK-NEXT: ret
+ %v = call <vscale x 2 x bfloat> @llvm.vp.maximum.nxv2bf16(<vscale x 2 x bfloat> %va, <vscale x 2 x bfloat> %vb, <vscale x 2 x i1> splat (i1 true), i32 %evl)
+ ret <vscale x 2 x bfloat> %v
+}
+
+declare <vscale x 4 x bfloat> @llvm.vp.maximum.nxv4bf16(<vscale x 4 x bfloat>, <vscale x 4 x bfloat>, <vscale x 4 x i1>, i32)
+
+define <vscale x 4 x bfloat> @vfmax_vv_nxv4bf16(<vscale x 4 x bfloat> %va, <vscale x 4 x bfloat> %vb, <vscale x 4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vfmax_vv_nxv4bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vmv1r.v v10, v0
+; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8
+; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; CHECK-NEXT: vmfeq.vv v8, v12, v12, v0.t
+; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v14, v9
+; CHECK-NEXT: vmv1r.v v0, v8
+; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; CHECK-NEXT: vmerge.vvm v16, v12, v14, v0
+; CHECK-NEXT: vmv1r.v v0, v10
+; CHECK-NEXT: vmfeq.vv v8, v14, v14, v0.t
+; CHECK-NEXT: vmv1r.v v0, v8
+; CHECK-NEXT: vmerge.vvm v8, v14, v12, v0
+; CHECK-NEXT: vmv1r.v v0, v10
+; CHECK-NEXT: vfmax.vv v10, v8, v16, v0.t
+; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10
+; CHECK-NEXT: ret
+ %v = call <vscale x 4 x bfloat> @llvm.vp.maximum.nxv4bf16(<vscale x 4 x bfloat> %va, <vscale x 4 x bfloat> %vb, <vscale x 4 x i1> %m, i32 %evl)
+ ret <vscale x 4 x bfloat> %v
+}
+
+define <vscale x 4 x bfloat> @vfmax_vv_nxv4bf16_unmasked(<vscale x 4 x bfloat> %va, <vscale x 4 x bfloat> %vb, i32 zeroext %evl) {
+; CHECK-LABEL: vfmax_vv_nxv4bf16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8
+; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; CHECK-NEXT: vmfeq.vv v0, v10, v10
+; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v9
+; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; CHECK-NEXT: vmfeq.vv v8, v12, v12
+; CHECK-NEXT: vmerge.vvm v14, v10, v12, v0
+; CHECK-NEXT: vmv1r.v v0, v8
+; CHECK-NEXT: vmerge.vvm v8, v12, v10, v0
+; CHECK-NEXT: vfmax.vv v10, v8, v14
+; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10
+; CHECK-NEXT: ret
+ %v = call <vscale x 4 x bfloat> @llvm.vp.maximum.nxv4bf16(<vscale x 4 x bfloat> %va, <vscale x 4 x bfloat> %vb, <vscale x 4 x i1> splat (i1 true), i32 %evl)
+ ret <vscale x 4 x bfloat> %v
+}
+
+declare <vscale x 8 x bfloat> @llvm.vp.maximum.nxv8bf16(<vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x i1>, i32)
+
+define <vscale x 8 x bfloat> @vfmax_vv_nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %vb, <vscale x 8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vfmax_vv_nxv8bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vmv1r.v v12, v0
+; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8
+; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-NEXT: vmfeq.vv v8, v16, v16, v0.t
+; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v20, v10
+; CHECK-NEXT: vmv1r.v v0, v8
+; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-NEXT: vmerge.vvm v24, v16, v20, v0
+; CHECK-NEXT: vmv1r.v v0, v12
+; CHECK-NEXT: vmfeq.vv v8, v20, v20, v0.t
+; CHECK-NEXT: vmv1r.v v0, v8
+; CHECK-NEXT: vmerge.vvm v8, v20, v16, v0
+; CHECK-NEXT: vmv1r.v v0, v12
+; CHECK-NEXT: vfmax.vv v12, v8, v24, v0.t
+; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12
+; CHECK-NEXT: ret
+ %v = call <vscale x 8 x bfloat> @llvm.vp.maximum.nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %vb, <vscale x 8 x i1> %m, i32 %evl)
+ ret <vscale x 8 x bfloat> %v
+}
+
+define <vscale x 8 x bfloat> @vfmax_vv_nxv8bf16_unmasked(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %vb, i32 zeroext %evl) {
+; CHECK-LABEL: vfmax_vv_nxv8bf16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8
+; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-NEXT: vmfeq.vv v0, v12, v12
+; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v10
+; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-NEXT: vmfeq.vv v8, v16, v16
+; CHECK-NEXT: vmerge.vvm v20, v12, v16, v0
+; CHECK-NEXT: vmv1r.v v0, v8
+; CHECK-NEXT: vmerge.vvm v8, v16, v12, v0
+; CHECK-NEXT: vfmax.vv v12, v8, v20
+; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12
+; CHECK-NEXT: ret
+ %v = call <vscale x 8 x bfloat> @llvm.vp.maximum.nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %vb, <vscale x 8 x i1> splat (i1 true), i32 %evl)
+ ret <vscale x 8 x bfloat> %v
+}
+
+declare <vscale x 16 x bfloat> @llvm.vp.maximum.nxv16bf16(<vscale x 16 x bfloat>, <vscale x 16 x bfloat>, <vscale x 16 x i1>, i32)
+
+define <vscale x 16 x bfloat> @vfmax_vv_nxv16bf16(<vscale x 16 x bfloat> %va, <vscale x 16 x bfloat> %vb, <vscale x 16 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vfmax_vv_nxv16bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi sp, sp, -16
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: csrr a1, vlenb
+; CHECK-NEXT: slli a1, a1, 3
+; CHECK-NEXT: sub sp, sp, a1
+; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
+; CHECK-NEXT: vmv1r.v v7, v0
+; CHECK-NEXT: vsetvli a1, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v8
+; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma
+; CHECK-NEXT: vmfeq.vv v8, v24, v24, v0.t
+; CHECK-NEXT: vsetvli a1, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v12
+; CHECK-NEXT: vmv1r.v v0, v8
+; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma
+; CHECK-NEXT: vmerge.vvm v8, v24, v16, v0
+; CHECK-NEXT: addi a0, sp, 16
+; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
+; CHECK-NEXT: vmv1r.v v0, v7
+; CHECK-NEXT: vmfeq.vv v8, v16, v16, v0.t
+; CHECK-NEXT: vmv1r.v v0, v8
+; CHECK-NEXT: vmerge.vvm v8, v16, v24, v0
+; CHECK-NEXT: vmv1r.v v0, v7
+; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
+; CHECK-NEXT: vfmax.vv v16, v8, v16, v0.t
+; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: slli a0, a0, 3
+; CHECK-NEXT: add sp, sp, a0
+; CHECK-NEXT: addi sp, sp, 16
+; CHECK-NEXT: ret
+ %v = call <vscale x 16 x bfloat> @llvm.vp.maximum.nxv16bf16(<vscale x 16 x bfloat> %va, <vscale x 16 x bfloat> %vb, <vscale x 16 x i1> %m, i32 %evl)
+ ret <vscale x 16 x bfloat> %v
+}
+
+define <vscale x 16 x bfloat> @vfmax_vv_nxv16bf16_unmasked(<vscale x 16 x bfloat> %va, <vscale x 16 x bfloat> %vb, i32 zeroext %evl) {
+; CHECK-LABEL: vfmax_vv_nxv16bf16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi sp, sp, -16
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: csrr a1, vlenb
+; CHECK-NEXT: slli a1, a1, 3
+; CHECK-NEXT: sub sp, sp, a1
+; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
+; CHECK-NEXT: vsetvli a1, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8
+; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma
+; CHECK-NEXT: vmfeq.vv v0, v16, v16
+; CHECK-NEXT: vsetvli a1, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v12
+; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma
+; CHECK-NEXT: vmfeq.vv v7, v24, v24
+; CHECK-NEXT: vmerge.vvm v8, v16, v24, v0
+; CHECK-NEXT: addi a0, sp, 16
+; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
+; CHECK-NEXT: vmv1r.v v0, v7
+; CHECK-NEXT: vmerge.vvm v8, v24, v16, v0
+; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
+; CHECK-NEXT: vfmax.vv v16, v8, v16
+; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: slli a0, a0, 3
+; CHECK-NEXT: add sp, sp, a0
+; CHECK-NEXT: addi sp, sp, 16
+; CHECK-NEXT: ret
+ %v = call <vscale x 16 x bfloat> @llvm.vp.maximum.nxv16bf16(<vscale x 16 x bfloat> %va, <vscale x 16 x bfloat> %vb, <vscale x 16 x i1> splat (i1 true), i32 %evl)
+ ret <vscale x 16 x bfloat> %v
+}
+
+declare <vscale x 32 x bfloat> @llvm.vp.maximum.nxv32bf16(<vscale x 32 x bfloat>, <vscale x 32 x bfloat>, <vscale x 32 x i1>, i32)
+
+define <vscale x 32 x bfloat> @vfmax_vv_nxv32bf16(<vscale x 32 x bfloat> %va, <vscale x 32 x bfloat> %vb, <vscale x 32 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vfmax_vv_nxv32bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi sp, sp, -16
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: csrr a1, vlenb
+; CHECK-NEXT: li a2, 34
+; CHECK-NEXT: mul a1, a1, a2
+; CHECK-NEXT: sub sp, sp, a1
+; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x22, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 34 * vlenb
+; CHECK-NEXT: vmv1r.v v24, v0
+; CHECK-NEXT: vmv8r.v v0, v8
+; CHECK-NEXT: csrr a2, vlenb
+; CHECK-NEXT: slli a1, a2, 1
+; CHECK-NEXT: sub a3, a0, a1
+; CHECK-NEXT: sltu a4, a0, a3
+; CHECK-NEXT: addi a4, a4, -1
+; CHECK-NEXT: and a3, a4, a3
+; CHECK-NEXT: srli a2, a2, 2
+; CHECK-NEXT: csrr a4, vlenb
+; CHECK-NEXT: slli a4, a4, 5
+; CHECK-NEXT: add a4, sp, a4
+; CHECK-NEXT: addi a4, a4, 16
+; CHECK-NEXT: vs1r.v v24, (a4) # Unknown-size Folded Spill
+; CHECK-NEXT: vsetvli a4, zero, e8, mf2, ta, ma
+; CHECK-NEXT: vslidedown.vx v8, v24, a2
+; CHECK-NEXT: addi a2, sp, 16
+; CHECK-NEXT: vs1r.v v8, (a2) # Unknown-size Folded Spill
+; CHECK-NEXT: csrr a2, vlenb
+; CHECK-NEXT: slli a2, a2, 4
+; CHECK-NEXT: add a2, sp, a2
+; CHECK-NEXT: addi a2, a2, 16
+; CHECK-NEXT: vs8r.v v0, (a2) # Unknown-size Folded Spill
+; CHECK-NEXT: vsetvli a2, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v4
+; CHECK-NEXT: vmv1r.v v0, v8
+; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, ma
+; CHECK-NEXT: vmfeq.vv v12, v24, v24, v0.t
+; CHECK-NEXT: vmv8r.v v0, v16
+; CHECK-NEXT: csrr a2, vlenb
+; CHECK-NEXT: li a4, 24
+; CHECK-NEXT: mul a2, a2, a4
+; CHECK-NEXT: add a2, sp, a2
+; CHECK-NEXT: addi a2, a2, 16
+; CHECK-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill
+; CHECK-NEXT: vsetvli a2, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v4
+; CHECK-NEXT: vmv1r.v v0, v12
+; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, ma
+; CHECK-NEXT: vmerge.vvm v8, v24, v16, v0
+; CHECK-NEXT: csrr a2, vlenb
+; CHECK-NEXT: slli a2, a2, 3
+; CHECK-NEXT: add a2, sp, a2
+; CHECK-NEXT: addi a2, a2, 16
+; CHECK-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill
+; CHECK-NEXT: addi a2, sp, 16
+; CHECK-NEXT: vl1r.v v8, (a2) # Unknown-size Folded Reload
+; CHECK-NEXT: vmv1r.v v0, v8
+; CHECK-NEXT: vmfeq.vv v12, v16, v16, v0.t
+; CHECK-NEXT: vmv1r.v v0, v12
+; CHECK-NEXT: vmerge.vvm v16, v16, v24, v0
+; CHECK-NEXT: vmv1r.v v0, v8
+; CHECK-NEXT: csrr a2, vlenb
+; CHECK-NEXT: slli a2, a2, 3
+; CHECK-NEXT: add a2, sp, a2
+; CHECK-NEXT: addi a2, a2, 16
+; CHECK-NEXT: vl8r.v v24, (a2) # Unknown-size Folded Reload
+; CHECK-NEXT: vfmax.vv v16, v16, v24, v0.t
+; CHECK-NEXT: vsetvli a2, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v12, v16
+; CHECK-NEXT: csrr a2, vlenb
+; CHECK-NEXT: slli a2, a2, 3
+; CHECK-NEXT: add a2, sp, a2
+; CHECK-NEXT: addi a2, a2, 16
+; CHECK-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill
+; CHECK-NEXT: bltu a0, a1, .LBB10_2
+; CHECK-NEXT: # %bb.1:
+; CHECK-NEXT: mv a0, a1
+; CHECK-NEXT: .LBB10_2:
+; CHECK-NEXT: csrr a1, vlenb
+; CHECK-NEXT: slli a1, a1, 4
+; CHECK-NEXT: add a1, sp, a1
+; CHECK-NEXT: addi a1, a1, 16
+; CHECK-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload
+; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v16
+; CHECK-NEXT: csrr a1, vlenb
+; CHECK-NEXT: slli a1, a1, 5
+; CHECK-NEXT: add a1, sp, a1
+; CHECK-NEXT: addi a1, a1, 16
+; CHECK-NEXT: vl1r.v v0, (a1) # Unknown-size Folded Reload
+; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma
+; CHECK-NEXT: vmfeq.vv v8, v24, v24, v0.t
+; CHECK-NEXT: addi a1, sp, 16
+; CHECK-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill
+; CHECK-NEXT: csrr a1, vlenb
+; CHECK-NEXT: li a2, 24
+; CHECK-NEXT: mul a1, a1, a2
+; CHECK-NEXT: add a1, sp, a1
+; CHECK-NEXT: addi a1, a1, 16
+; CHECK-NEXT: vl8r.v v0, (a1) # Unknown-size Folded Reload
+; CHECK-NEXT: vsetvli a1, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v0
+; CHECK-NEXT: vmv1r.v v0, v8
+; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma
+; CHECK-NEXT: vmerge.vvm v24, v24, v16, v0
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: li a1, 24
+; CHECK-NEXT: mul a0, a0, a1
+; CHECK-NEXT: add a0, sp, a0
+; CHECK-NEXT: addi a0, a0, 16
+; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: slli a0, a0, 5
+; CHECK-NEXT: add a0, sp, a0
+; CHECK-NEXT: addi a0, a0, 16
+; CHECK-NEXT: vl1r.v v9, (a0) # Unknown-size Folded Reload
+; CHECK-NEXT: vmv1r.v v0, v9
+; CHECK-NEXT: vmfeq.vv v8, v16, v16, v0.t
+; CHECK-NEXT: vmv1r.v v0, v8
+; CHECK-NEXT: addi a0, sp, 16
+; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload
+; CHECK-NEXT: vmerge.vvm v16, v16, v24, v0
+; CHECK-NEXT: vmv1r.v v0, v9
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: li a1, 24
+; CHECK-NEXT: mul a0, a0, a1
+; CHECK-NEXT: add a0, sp, a0
+; CHECK-NEXT: addi a0, a0, 16
+; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload
+; CHECK-NEXT: vfmax.vv v16, v16, v24, v0.t
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: slli a0, a0, 3
+; CHECK-NEXT: add a0, sp, a0
+; CHECK-NEXT: addi a0, a0, 16
+; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
+; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: li a1, 34
+; CHECK-NEXT: mul a0, a0, a1
+; CHECK-NEXT: add sp, sp, a0
+; CHECK-NEXT: addi sp, sp, 16
+; CHECK-NEXT: ret
+ %v = call <vscale x 32 x bfloat> @llvm.vp.maximum.nxv32bf16(<vscale x 32 x bfloat> %va, <vscale x 32 x bfloat> %vb, <vscale x 32 x i1> %m, i32 %evl)
+ ret <vscale x 32 x bfloat> %v
+}
+
+define <vscale x 32 x bfloat> @vfmax_vv_nxv32bf16_unmasked(<vscale x 32 x bfloat> %va, <vscale x 32 x bfloat> %vb, i32 zeroext %evl) {
+; CHECK-LABEL: vfmax_vv_nxv32bf16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi sp, sp, -16
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: csrr a1, vlenb
+; CHECK-NEXT: slli a1, a1, 5
+; CHECK-NEXT: sub sp, sp, a1
+; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * vlenb
+; CHECK-NEXT: csrr a2, vlenb
+; CHECK-NEXT: slli a1, a2, 1
+; CHECK-NEXT: sub a3, a0, a1
+; CHECK-NEXT: sltu a4, a0, a3
+; CHECK-NEXT: addi a4, a4, -1
+; CHECK-NEXT: and a3, a4, a3
+; CHECK-NEXT: srli a2, a2, 2
+; CHECK-NEXT: vsetvli a4, zero, e8, m4, ta, ma
+; CHECK-NEXT: vmset.m v24
+; CHECK-NEXT: vsetvli a4, zero, e8, mf2, ta, ma
+; CHECK-NEXT: vslidedown.vx v7, v24, a2
+; CHECK-NEXT: csrr a2, vlenb
+; CHECK-NEXT: slli a2, a2, 4
+; CHECK-NEXT: add a2, sp, a2
+; CHECK-NEXT: addi a2, a2, 16
+; CHECK-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill
+; CHECK-NEXT: vsetvli a2, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v12
+; CHECK-NEXT: addi a2, sp, 16
+; CHECK-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill
+; CHECK-NEXT: vmv1r.v v0, v7
+; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, ma
+; CHECK-NEXT: vmfeq.vv v12, v24, v24, v0.t
+; CHECK-NEXT: vmv4r.v v8, v16
+; CHECK-NEXT: csrr a2, vlenb
+; CHECK-NEXT: li a4, 24
+; CHECK-NEXT: mul a2, a2, a4
+; CHECK-NEXT: add a2, sp, a2
+; CHECK-NEXT: addi a2, a2, 16
+; CHECK-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill
+; CHECK-NEXT: vsetvli a2, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v20
+; CHECK-NEXT: vmv1r.v v0, v12
+; CHECK-NEXT: addi a2, sp, 16
+; CHECK-NEXT: vl8r.v v16, (a2) # Unknown-size Folded Reload
+; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, ma
+; CHECK-NEXT: vmerge.vvm v8, v16, v24, v0
+; CHECK-NEXT: csrr a2, vlenb
+; CHECK-NEXT: slli a2, a2, 3
+; CHECK-NEXT: add a2, sp, a2
+; CHECK-NEXT: addi a2, a2, 16
+; CHECK-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill
+; CHECK-NEXT: vmv1r.v v0, v7
+; CHECK-NEXT: vmfeq.vv v12, v24, v24, v0.t
+; CHECK-NEXT: vmv1r.v v0, v12
+; CHECK-NEXT: vmerge.vvm v16, v24, v16, v0
+; CHECK-NEXT: vmv1r.v v0, v7
+; CHECK-NEXT: csrr a2, vlenb
+; CHECK-NEXT: slli a2, a2, 3
+; CHECK-NEXT: add a2, sp, a2
+; CHECK-NEXT: addi a2, a2, 16
+; CHECK-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload
+; CHECK-NEXT: vfmax.vv v16, v16, v8, v0.t
+; CHECK-NEXT: vsetvli a2, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v4, v16
+; CHECK-NEXT: bltu a0, a1, .LBB11_2
+; CHECK-NEXT: # %bb.1:
+; CHECK-NEXT: mv a0, a1
+; CHECK-NEXT: .LBB11_2:
+; CHECK-NEXT: csrr a1, vlenb
+; CHECK-NEXT: slli a1, a1, 4
+; CHECK-NEXT: add a1, sp, a1
+; CHECK-NEXT: addi a1, a1, 16
+; CHECK-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload
+; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v16
+; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma
+; CHECK-NEXT: vmfeq.vv v0, v8, v8
+; CHECK-NEXT: csrr a1, vlenb
+; CHECK-NEXT: li a2, 24
+; CHECK-NEXT: mul a1, a1, a2
+; CHECK-NEXT: add a1, sp, a1
+; CHECK-NEXT: addi a1, a1, 16
+; CHECK-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload
+; CHECK-NEXT: vsetvli a1, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v24
+; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma
+; CHECK-NEXT: vmfeq.vv v3, v16, v16
+; CHECK-NEXT: vmerge.vvm v24, v8, v16, v0
+; CHECK-NEXT: vmv1r.v v0, v3
+; CHECK-NEXT: vmerge.vvm v16, v16, v8, v0
+; CHECK-NEXT: vfmax.vv v16, v16, v24
+; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v0, v16
+; CHECK-NEXT: vmv8r.v v8, v0
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: slli a0, a0, 5
+; CHECK-NEXT: add sp, sp, a0
+; CHECK-NEXT: addi sp, sp, 16
+; CHECK-NEXT: ret
+ %v = call <vscale x 32 x bfloat> @llvm.vp.maximum.nxv32bf16(<vscale x 32 x bfloat> %va, <vscale x 32 x bfloat> %vb, <vscale x 32 x i1> splat (i1 true), i32 %evl)
+ ret <vscale x 32 x bfloat> %v
+}
declare <vscale x 1 x half> @llvm.vp.maximum.nxv1f16(<vscale x 1 x half>, <vscale x 1 x half>, <vscale x 1 x i1>, i32)
define <vscale x 1 x half> @vfmax_vv_nxv1f16(<vscale x 1 x half> %va, <vscale x 1 x half> %vb, <vscale x 1 x i1> %m, i32 zeroext %evl) {
@@ -509,10 +1037,10 @@ define <vscale x 32 x half> @vfmax_vv_nxv32f16(<vscale x 32 x half> %va, <vscale
; ZVFHMIN-NEXT: add a2, sp, a2
; ZVFHMIN-NEXT: addi a2, a2, 16
; ZVFHMIN-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill
-; ZVFHMIN-NEXT: bltu a0, a1, .LBB10_2
+; ZVFHMIN-NEXT: bltu a0, a1, .LBB22_2
; ZVFHMIN-NEXT: # %bb.1:
; ZVFHMIN-NEXT: mv a0, a1
-; ZVFHMIN-NEXT: .LBB10_2:
+; ZVFHMIN-NEXT: .LBB22_2:
; ZVFHMIN-NEXT: csrr a1, vlenb
; ZVFHMIN-NEXT: slli a1, a1, 4
; ZVFHMIN-NEXT: add a1, sp, a1
@@ -656,10 +1184,10 @@ define <vscale x 32 x half> @vfmax_vv_nxv32f16_unmasked(<vscale x 32 x half> %va
; ZVFHMIN-NEXT: vfmax.vv v16, v16, v8, v0.t
; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v4, v16
-; ZVFHMIN-NEXT: bltu a0, a1, .LBB11_2
+; ZVFHMIN-NEXT: bltu a0, a1, .LBB23_2
; ZVFHMIN-NEXT: # %bb.1:
; ZVFHMIN-NEXT: mv a0, a1
-; ZVFHMIN-NEXT: .LBB11_2:
+; ZVFHMIN-NEXT: .LBB23_2:
; ZVFHMIN-NEXT: csrr a1, vlenb
; ZVFHMIN-NEXT: slli a1, a1, 4
; ZVFHMIN-NEXT: add a1, sp, a1
@@ -1093,10 +1621,10 @@ define <vscale x 16 x double> @vfmax_vv_nxv16f64(<vscale x 16 x double> %va, <vs
; CHECK-NEXT: add a0, sp, a0
; CHECK-NEXT: addi a0, a0, 16
; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
-; CHECK-NEXT: bltu a2, a1, .LBB28_2
+; CHECK-NEXT: bltu a2, a1, .LBB40_2
; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: mv a2, a1
-; CHECK-NEXT: .LBB28_2:
+; CHECK-NEXT: .LBB40_2:
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: li a1, 18
; CHECK-NEXT: mul a0, a0, a1
@@ -1202,10 +1730,10 @@ define <vscale x 16 x double> @vfmax_vv_nxv16f64_unmasked(<vscale x 16 x double>
; CHECK-NEXT: vfmax.vv v8, v16, v8
; CHECK-NEXT: addi a0, sp, 16
; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
-; CHECK-NEXT: bltu a2, a1, .LBB29_2
+; CHECK-NEXT: bltu a2, a1, .LBB41_2
; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: mv a2, a1
-; CHECK-NEXT: .LBB29_2:
+; CHECK-NEXT: .LBB41_2:
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: slli a0, a0, 4
; CHECK-NEXT: add a0, sp, a0
diff --git a/llvm/test/CodeGen/RISCV/rvv/fminimum-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/fminimum-sdnode.ll
index e94259392498..2371840002f4 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fminimum-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fminimum-sdnode.ll
@@ -1,21 +1,200 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v -target-abi=ilp32d \
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+zfbfmin,+zvfbfmin,+v -target-abi=ilp32d \
; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH
-; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v -target-abi=ilp32d \
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+zfbfmin,+zvfbfmin,+v -target-abi=ilp32d \
; RUN: -verify-machineinstrs -early-live-intervals < %s | FileCheck %s --check-prefixes=CHECK,ZVFH
-; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -target-abi=lp64d \
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+zfbfmin,+zvfbfmin,+v -target-abi=lp64d \
; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH
-; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -target-abi=lp64d \
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+zfbfmin,+zvfbfmin,+v -target-abi=lp64d \
; RUN: -verify-machineinstrs -early-live-intervals < %s | FileCheck %s --check-prefixes=CHECK,ZVFH
-; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfhmin,+v,+m -target-abi=ilp32d \
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfhmin,+zfbfmin,+zvfbfmin,+v,+m -target-abi=ilp32d \
; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN
-; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfhmin,+v,+m -target-abi=ilp32d \
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfhmin,+zfbfmin,+zvfbfmin,+v,+m -target-abi=ilp32d \
; RUN: -verify-machineinstrs -early-live-intervals < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN
-; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfhmin,+v,+m -target-abi=lp64d \
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfhmin,+zfbfmin,+zvfbfmin,+v,+m -target-abi=lp64d \
; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN
-; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfhmin,+v,+m -target-abi=lp64d \
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfhmin,+zfbfmin,+zvfbfmin,+v,+m -target-abi=lp64d \
; RUN: -verify-machineinstrs -early-live-intervals < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN
+declare <vscale x 1 x bfloat> @llvm.minimum.nxv1bf16(<vscale x 1 x bfloat>, <vscale x 1 x bfloat>)
+
+define <vscale x 1 x bfloat> @vfmin_nxv1bf16_vv(<vscale x 1 x bfloat> %a, <vscale x 1 x bfloat> %b) {
+; CHECK-LABEL: vfmin_nxv1bf16_vv:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v9
+; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8
+; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
+; CHECK-NEXT: vmfeq.vv v0, v9, v9
+; CHECK-NEXT: vmfeq.vv v8, v10, v10
+; CHECK-NEXT: vmerge.vvm v11, v9, v10, v0
+; CHECK-NEXT: vmv1r.v v0, v8
+; CHECK-NEXT: vmerge.vvm v8, v10, v9, v0
+; CHECK-NEXT: vfmin.vv v9, v8, v11
+; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9
+; CHECK-NEXT: ret
+ %v = call <vscale x 1 x bfloat> @llvm.minimum.nxv1bf16(<vscale x 1 x bfloat> %a, <vscale x 1 x bfloat> %b)
+ ret <vscale x 1 x bfloat> %v
+}
+
+declare <vscale x 2 x bfloat> @llvm.minimum.nxv2bf16(<vscale x 2 x bfloat>, <vscale x 2 x bfloat>)
+
+define <vscale x 2 x bfloat> @vfmin_nxv2bf16_vv(<vscale x 2 x bfloat> %a, <vscale x 2 x bfloat> %b) {
+; CHECK-LABEL: vfmin_nxv2bf16_vv:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v9
+; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8
+; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; CHECK-NEXT: vmfeq.vv v0, v9, v9
+; CHECK-NEXT: vmfeq.vv v8, v10, v10
+; CHECK-NEXT: vmerge.vvm v11, v9, v10, v0
+; CHECK-NEXT: vmv.v.v v0, v8
+; CHECK-NEXT: vmerge.vvm v8, v10, v9, v0
+; CHECK-NEXT: vfmin.vv v9, v8, v11
+; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9
+; CHECK-NEXT: ret
+ %v = call <vscale x 2 x bfloat> @llvm.minimum.nxv2bf16(<vscale x 2 x bfloat> %a, <vscale x 2 x bfloat> %b)
+ ret <vscale x 2 x bfloat> %v
+}
+
+declare <vscale x 4 x bfloat> @llvm.minimum.nxv4bf16(<vscale x 4 x bfloat>, <vscale x 4 x bfloat>)
+
+define <vscale x 4 x bfloat> @vfmin_nxv4bf16_vv(<vscale x 4 x bfloat> %a, <vscale x 4 x bfloat> %b) {
+; CHECK-LABEL: vfmin_nxv4bf16_vv:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v9
+; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8
+; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; CHECK-NEXT: vmfeq.vv v0, v12, v12
+; CHECK-NEXT: vmfeq.vv v8, v10, v10
+; CHECK-NEXT: vmerge.vvm v14, v12, v10, v0
+; CHECK-NEXT: vmv1r.v v0, v8
+; CHECK-NEXT: vmerge.vvm v8, v10, v12, v0
+; CHECK-NEXT: vfmin.vv v10, v8, v14
+; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10
+; CHECK-NEXT: ret
+ %v = call <vscale x 4 x bfloat> @llvm.minimum.nxv4bf16(<vscale x 4 x bfloat> %a, <vscale x 4 x bfloat> %b)
+ ret <vscale x 4 x bfloat> %v
+}
+
+declare <vscale x 8 x bfloat> @llvm.minimum.nxv8bf16(<vscale x 8 x bfloat>, <vscale x 8 x bfloat>)
+
+define <vscale x 8 x bfloat> @vfmin_nxv8bf16_vv(<vscale x 8 x bfloat> %a, <vscale x 8 x bfloat> %b) {
+; CHECK-LABEL: vfmin_nxv8bf16_vv:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v10
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8
+; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; CHECK-NEXT: vmfeq.vv v0, v16, v16
+; CHECK-NEXT: vmfeq.vv v8, v12, v12
+; CHECK-NEXT: vmerge.vvm v20, v16, v12, v0
+; CHECK-NEXT: vmv1r.v v0, v8
+; CHECK-NEXT: vmerge.vvm v8, v12, v16, v0
+; CHECK-NEXT: vfmin.vv v12, v8, v20
+; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12
+; CHECK-NEXT: ret
+ %v = call <vscale x 8 x bfloat> @llvm.minimum.nxv8bf16(<vscale x 8 x bfloat> %a, <vscale x 8 x bfloat> %b)
+ ret <vscale x 8 x bfloat> %v
+}
+
+declare <vscale x 16 x bfloat> @llvm.minimum.nxv16bf16(<vscale x 16 x bfloat>, <vscale x 16 x bfloat>)
+
+define <vscale x 16 x bfloat> @vfmin_nxv16bf16_vv(<vscale x 16 x bfloat> %a, <vscale x 16 x bfloat> %b) {
+; CHECK-LABEL: vfmin_nxv16bf16_vv:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi sp, sp, -16
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: slli a0, a0, 3
+; CHECK-NEXT: sub sp, sp, a0
+; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
+; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v12
+; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v8
+; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; CHECK-NEXT: vmfeq.vv v0, v24, v24
+; CHECK-NEXT: vmfeq.vv v7, v16, v16
+; CHECK-NEXT: vmerge.vvm v8, v24, v16, v0
+; CHECK-NEXT: addi a0, sp, 16
+; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
+; CHECK-NEXT: vmv1r.v v0, v7
+; CHECK-NEXT: vmerge.vvm v8, v16, v24, v0
+; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
+; CHECK-NEXT: vfmin.vv v16, v8, v16
+; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: slli a0, a0, 3
+; CHECK-NEXT: add sp, sp, a0
+; CHECK-NEXT: addi sp, sp, 16
+; CHECK-NEXT: ret
+ %v = call <vscale x 16 x bfloat> @llvm.minimum.nxv16bf16(<vscale x 16 x bfloat> %a, <vscale x 16 x bfloat> %b)
+ ret <vscale x 16 x bfloat> %v
+}
+
+declare <vscale x 32 x bfloat> @llvm.minimum.nxv32bf16(<vscale x 32 x bfloat>, <vscale x 32 x bfloat>)
+
+define <vscale x 32 x bfloat> @vfmin_nxv32bf16_vv(<vscale x 32 x bfloat> %a, <vscale x 32 x bfloat> %b) nounwind {
+; CHECK-LABEL: vfmin_nxv32bf16_vv:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi sp, sp, -16
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: slli a0, a0, 4
+; CHECK-NEXT: sub sp, sp, a0
+; CHECK-NEXT: addi a0, sp, 16
+; CHECK-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill
+; CHECK-NEXT: vmv8r.v v0, v8
+; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v16
+; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v0
+; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; CHECK-NEXT: vmfeq.vv v0, v8, v8
+; CHECK-NEXT: vmfeq.vv v3, v24, v24
+; CHECK-NEXT: vmerge.vvm v16, v8, v24, v0
+; CHECK-NEXT: vmv1r.v v0, v3
+; CHECK-NEXT: vmerge.vvm v8, v24, v8, v0
+; CHECK-NEXT: vfmin.vv v8, v8, v16
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: slli a0, a0, 3
+; CHECK-NEXT: add a0, sp, a0
+; CHECK-NEXT: addi a0, a0, 16
+; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
+; CHECK-NEXT: addi a0, sp, 16
+; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
+; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v20
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v4
+; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; CHECK-NEXT: vmfeq.vv v0, v16, v16
+; CHECK-NEXT: vmfeq.vv v7, v8, v8
+; CHECK-NEXT: vmerge.vvm v24, v16, v8, v0
+; CHECK-NEXT: vmv1r.v v0, v7
+; CHECK-NEXT: vmerge.vvm v8, v8, v16, v0
+; CHECK-NEXT: vfmin.vv v16, v8, v24
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: slli a0, a0, 3
+; CHECK-NEXT: add a0, sp, a0
+; CHECK-NEXT: addi a0, a0, 16
+; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload
+; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v24
+; CHECK-NEXT: vfncvtbf16.f.f.w v12, v16
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: slli a0, a0, 4
+; CHECK-NEXT: add sp, sp, a0
+; CHECK-NEXT: addi sp, sp, 16
+; CHECK-NEXT: ret
+ %v = call <vscale x 32 x bfloat> @llvm.minimum.nxv32bf16(<vscale x 32 x bfloat> %a, <vscale x 32 x bfloat> %b)
+ ret <vscale x 32 x bfloat> %v
+}
+
declare <vscale x 1 x half> @llvm.minimum.nxv1f16(<vscale x 1 x half>, <vscale x 1 x half>)
define <vscale x 1 x half> @vfmin_nxv1f16_vv(<vscale x 1 x half> %a, <vscale x 1 x half> %b) {
diff --git a/llvm/test/CodeGen/RISCV/rvv/fminimum-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fminimum-vp.ll
index fc5b11284dab..03e3969f9141 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fminimum-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fminimum-vp.ll
@@ -1,13 +1,541 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v,+m -target-abi=ilp32d \
-; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH
-; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v,+m -target-abi=lp64d \
-; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH
-; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfhmin,+v,+m -target-abi=ilp32d \
-; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN
-; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfhmin,+v,+m -target-abi=lp64d \
-; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+zfbfmin,+zvfbfmin,+v,+m \
+; RUN: -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s \
+; RUN: --check-prefixes=CHECK,ZVFH
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+zfbfmin,+zvfbfmin,+v,+m \
+; RUN: -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \
+; RUN: --check-prefixes=CHECK,ZVFH
+; RUN: llc -mtriple=riscv32 \
+; RUN: -mattr=+d,+zfh,+zvfhmin,+zfbfmin,+zvfbfmin,+v,+m \
+; RUN: -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s \
+; RUN: --check-prefixes=CHECK,ZVFHMIN
+; RUN: llc -mtriple=riscv64 \
+; RUN: -mattr=+d,+zfh,+zvfhmin,+zfbfmin,+zvfbfmin,+v,+m \
+; RUN: -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \
+; RUN: --check-prefixes=CHECK,ZVFHMIN
+declare <vscale x 1 x bfloat> @llvm.vp.minimum.nxv1bf16(<vscale x 1 x bfloat>, <vscale x 1 x bfloat>, <vscale x 1 x i1>, i32)
+
+define <vscale x 1 x bfloat> @vfmin_vv_nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %vb, <vscale x 1 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vfmin_vv_nxv1bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vmv1r.v v10, v0
+; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v11, v8
+; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-NEXT: vmfeq.vv v0, v11, v11, v0.t
+; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v9
+; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-NEXT: vmerge.vvm v9, v11, v8, v0
+; CHECK-NEXT: vmv1r.v v0, v10
+; CHECK-NEXT: vmfeq.vv v0, v8, v8, v0.t
+; CHECK-NEXT: vmerge.vvm v8, v8, v11, v0
+; CHECK-NEXT: vmv1r.v v0, v10
+; CHECK-NEXT: vfmin.vv v9, v8, v9, v0.t
+; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9
+; CHECK-NEXT: ret
+ %v = call <vscale x 1 x bfloat> @llvm.vp.minimum.nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %vb, <vscale x 1 x i1> %m, i32 %evl)
+ ret <vscale x 1 x bfloat> %v
+}
+
+define <vscale x 1 x bfloat> @vfmin_vv_nxv1bf16_unmasked(<vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %vb, i32 zeroext %evl) {
+; CHECK-LABEL: vfmin_vv_nxv1bf16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8
+; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-NEXT: vmfeq.vv v0, v10, v10
+; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v11, v9
+; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-NEXT: vmfeq.vv v8, v11, v11
+; CHECK-NEXT: vmerge.vvm v9, v10, v11, v0
+; CHECK-NEXT: vmv1r.v v0, v8
+; CHECK-NEXT: vmerge.vvm v8, v11, v10, v0
+; CHECK-NEXT: vfmin.vv v9, v8, v9
+; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9
+; CHECK-NEXT: ret
+ %v = call <vscale x 1 x bfloat> @llvm.vp.minimum.nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %vb, <vscale x 1 x i1> splat (i1 true), i32 %evl)
+ ret <vscale x 1 x bfloat> %v
+}
+
+declare <vscale x 2 x bfloat> @llvm.vp.minimum.nxv2bf16(<vscale x 2 x bfloat>, <vscale x 2 x bfloat>, <vscale x 2 x i1>, i32)
+
+define <vscale x 2 x bfloat> @vfmin_vv_nxv2bf16(<vscale x 2 x bfloat> %va, <vscale x 2 x bfloat> %vb, <vscale x 2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vfmin_vv_nxv2bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vmv1r.v v10, v0
+; CHECK-NEXT: vsetvli a1, zero, e16, mf2, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v11, v8
+; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-NEXT: vmfeq.vv v0, v11, v11, v0.t
+; CHECK-NEXT: vsetvli a1, zero, e16, mf2, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v9
+; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-NEXT: vmerge.vvm v9, v11, v8, v0
+; CHECK-NEXT: vmv1r.v v0, v10
+; CHECK-NEXT: vmfeq.vv v0, v8, v8, v0.t
+; CHECK-NEXT: vmerge.vvm v8, v8, v11, v0
+; CHECK-NEXT: vmv1r.v v0, v10
+; CHECK-NEXT: vfmin.vv v9, v8, v9, v0.t
+; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9
+; CHECK-NEXT: ret
+ %v = call <vscale x 2 x bfloat> @llvm.vp.minimum.nxv2bf16(<vscale x 2 x bfloat> %va, <vscale x 2 x bfloat> %vb, <vscale x 2 x i1> %m, i32 %evl)
+ ret <vscale x 2 x bfloat> %v
+}
+
+define <vscale x 2 x bfloat> @vfmin_vv_nxv2bf16_unmasked(<vscale x 2 x bfloat> %va, <vscale x 2 x bfloat> %vb, i32 zeroext %evl) {
+; CHECK-LABEL: vfmin_vv_nxv2bf16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, zero, e16, mf2, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8
+; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-NEXT: vmfeq.vv v0, v10, v10
+; CHECK-NEXT: vsetvli a1, zero, e16, mf2, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v11, v9
+; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-NEXT: vmfeq.vv v8, v11, v11
+; CHECK-NEXT: vmerge.vvm v9, v10, v11, v0
+; CHECK-NEXT: vmv.v.v v0, v8
+; CHECK-NEXT: vmerge.vvm v8, v11, v10, v0
+; CHECK-NEXT: vfmin.vv v9, v8, v9
+; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9
+; CHECK-NEXT: ret
+ %v = call <vscale x 2 x bfloat> @llvm.vp.minimum.nxv2bf16(<vscale x 2 x bfloat> %va, <vscale x 2 x bfloat> %vb, <vscale x 2 x i1> splat (i1 true), i32 %evl)
+ ret <vscale x 2 x bfloat> %v
+}
+
+declare <vscale x 4 x bfloat> @llvm.vp.minimum.nxv4bf16(<vscale x 4 x bfloat>, <vscale x 4 x bfloat>, <vscale x 4 x i1>, i32)
+
+define <vscale x 4 x bfloat> @vfmin_vv_nxv4bf16(<vscale x 4 x bfloat> %va, <vscale x 4 x bfloat> %vb, <vscale x 4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vfmin_vv_nxv4bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vmv1r.v v10, v0
+; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8
+; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; CHECK-NEXT: vmfeq.vv v8, v12, v12, v0.t
+; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v14, v9
+; CHECK-NEXT: vmv1r.v v0, v8
+; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; CHECK-NEXT: vmerge.vvm v16, v12, v14, v0
+; CHECK-NEXT: vmv1r.v v0, v10
+; CHECK-NEXT: vmfeq.vv v8, v14, v14, v0.t
+; CHECK-NEXT: vmv1r.v v0, v8
+; CHECK-NEXT: vmerge.vvm v8, v14, v12, v0
+; CHECK-NEXT: vmv1r.v v0, v10
+; CHECK-NEXT: vfmin.vv v10, v8, v16, v0.t
+; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10
+; CHECK-NEXT: ret
+ %v = call <vscale x 4 x bfloat> @llvm.vp.minimum.nxv4bf16(<vscale x 4 x bfloat> %va, <vscale x 4 x bfloat> %vb, <vscale x 4 x i1> %m, i32 %evl)
+ ret <vscale x 4 x bfloat> %v
+}
+
+define <vscale x 4 x bfloat> @vfmin_vv_nxv4bf16_unmasked(<vscale x 4 x bfloat> %va, <vscale x 4 x bfloat> %vb, i32 zeroext %evl) {
+; CHECK-LABEL: vfmin_vv_nxv4bf16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8
+; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; CHECK-NEXT: vmfeq.vv v0, v10, v10
+; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v9
+; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; CHECK-NEXT: vmfeq.vv v8, v12, v12
+; CHECK-NEXT: vmerge.vvm v14, v10, v12, v0
+; CHECK-NEXT: vmv1r.v v0, v8
+; CHECK-NEXT: vmerge.vvm v8, v12, v10, v0
+; CHECK-NEXT: vfmin.vv v10, v8, v14
+; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10
+; CHECK-NEXT: ret
+ %v = call <vscale x 4 x bfloat> @llvm.vp.minimum.nxv4bf16(<vscale x 4 x bfloat> %va, <vscale x 4 x bfloat> %vb, <vscale x 4 x i1> splat (i1 true), i32 %evl)
+ ret <vscale x 4 x bfloat> %v
+}
+
+declare <vscale x 8 x bfloat> @llvm.vp.minimum.nxv8bf16(<vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x i1>, i32)
+
+define <vscale x 8 x bfloat> @vfmin_vv_nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %vb, <vscale x 8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vfmin_vv_nxv8bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vmv1r.v v12, v0
+; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8
+; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-NEXT: vmfeq.vv v8, v16, v16, v0.t
+; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v20, v10
+; CHECK-NEXT: vmv1r.v v0, v8
+; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-NEXT: vmerge.vvm v24, v16, v20, v0
+; CHECK-NEXT: vmv1r.v v0, v12
+; CHECK-NEXT: vmfeq.vv v8, v20, v20, v0.t
+; CHECK-NEXT: vmv1r.v v0, v8
+; CHECK-NEXT: vmerge.vvm v8, v20, v16, v0
+; CHECK-NEXT: vmv1r.v v0, v12
+; CHECK-NEXT: vfmin.vv v12, v8, v24, v0.t
+; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12
+; CHECK-NEXT: ret
+ %v = call <vscale x 8 x bfloat> @llvm.vp.minimum.nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %vb, <vscale x 8 x i1> %m, i32 %evl)
+ ret <vscale x 8 x bfloat> %v
+}
+
+define <vscale x 8 x bfloat> @vfmin_vv_nxv8bf16_unmasked(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %vb, i32 zeroext %evl) {
+; CHECK-LABEL: vfmin_vv_nxv8bf16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8
+; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-NEXT: vmfeq.vv v0, v12, v12
+; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v10
+; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-NEXT: vmfeq.vv v8, v16, v16
+; CHECK-NEXT: vmerge.vvm v20, v12, v16, v0
+; CHECK-NEXT: vmv1r.v v0, v8
+; CHECK-NEXT: vmerge.vvm v8, v16, v12, v0
+; CHECK-NEXT: vfmin.vv v12, v8, v20
+; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12
+; CHECK-NEXT: ret
+ %v = call <vscale x 8 x bfloat> @llvm.vp.minimum.nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %vb, <vscale x 8 x i1> splat (i1 true), i32 %evl)
+ ret <vscale x 8 x bfloat> %v
+}
+
+declare <vscale x 16 x bfloat> @llvm.vp.minimum.nxv16bf16(<vscale x 16 x bfloat>, <vscale x 16 x bfloat>, <vscale x 16 x i1>, i32)
+
+define <vscale x 16 x bfloat> @vfmin_vv_nxv16bf16(<vscale x 16 x bfloat> %va, <vscale x 16 x bfloat> %vb, <vscale x 16 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vfmin_vv_nxv16bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi sp, sp, -16
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: csrr a1, vlenb
+; CHECK-NEXT: slli a1, a1, 3
+; CHECK-NEXT: sub sp, sp, a1
+; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
+; CHECK-NEXT: vmv1r.v v7, v0
+; CHECK-NEXT: vsetvli a1, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v8
+; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma
+; CHECK-NEXT: vmfeq.vv v8, v24, v24, v0.t
+; CHECK-NEXT: vsetvli a1, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v12
+; CHECK-NEXT: vmv1r.v v0, v8
+; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma
+; CHECK-NEXT: vmerge.vvm v8, v24, v16, v0
+; CHECK-NEXT: addi a0, sp, 16
+; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
+; CHECK-NEXT: vmv1r.v v0, v7
+; CHECK-NEXT: vmfeq.vv v8, v16, v16, v0.t
+; CHECK-NEXT: vmv1r.v v0, v8
+; CHECK-NEXT: vmerge.vvm v8, v16, v24, v0
+; CHECK-NEXT: vmv1r.v v0, v7
+; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
+; CHECK-NEXT: vfmin.vv v16, v8, v16, v0.t
+; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: slli a0, a0, 3
+; CHECK-NEXT: add sp, sp, a0
+; CHECK-NEXT: addi sp, sp, 16
+; CHECK-NEXT: ret
+ %v = call <vscale x 16 x bfloat> @llvm.vp.minimum.nxv16bf16(<vscale x 16 x bfloat> %va, <vscale x 16 x bfloat> %vb, <vscale x 16 x i1> %m, i32 %evl)
+ ret <vscale x 16 x bfloat> %v
+}
+
+define <vscale x 16 x bfloat> @vfmin_vv_nxv16bf16_unmasked(<vscale x 16 x bfloat> %va, <vscale x 16 x bfloat> %vb, i32 zeroext %evl) {
+; CHECK-LABEL: vfmin_vv_nxv16bf16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi sp, sp, -16
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: csrr a1, vlenb
+; CHECK-NEXT: slli a1, a1, 3
+; CHECK-NEXT: sub sp, sp, a1
+; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
+; CHECK-NEXT: vsetvli a1, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8
+; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma
+; CHECK-NEXT: vmfeq.vv v0, v16, v16
+; CHECK-NEXT: vsetvli a1, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v12
+; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma
+; CHECK-NEXT: vmfeq.vv v7, v24, v24
+; CHECK-NEXT: vmerge.vvm v8, v16, v24, v0
+; CHECK-NEXT: addi a0, sp, 16
+; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
+; CHECK-NEXT: vmv1r.v v0, v7
+; CHECK-NEXT: vmerge.vvm v8, v24, v16, v0
+; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
+; CHECK-NEXT: vfmin.vv v16, v8, v16
+; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: slli a0, a0, 3
+; CHECK-NEXT: add sp, sp, a0
+; CHECK-NEXT: addi sp, sp, 16
+; CHECK-NEXT: ret
+ %v = call <vscale x 16 x bfloat> @llvm.vp.minimum.nxv16bf16(<vscale x 16 x bfloat> %va, <vscale x 16 x bfloat> %vb, <vscale x 16 x i1> splat (i1 true), i32 %evl)
+ ret <vscale x 16 x bfloat> %v
+}
+
+declare <vscale x 32 x bfloat> @llvm.vp.minimum.nxv32bf16(<vscale x 32 x bfloat>, <vscale x 32 x bfloat>, <vscale x 32 x i1>, i32)
+
+define <vscale x 32 x bfloat> @vfmin_vv_nxv32bf16(<vscale x 32 x bfloat> %va, <vscale x 32 x bfloat> %vb, <vscale x 32 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vfmin_vv_nxv32bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi sp, sp, -16
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: csrr a1, vlenb
+; CHECK-NEXT: li a2, 34
+; CHECK-NEXT: mul a1, a1, a2
+; CHECK-NEXT: sub sp, sp, a1
+; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x22, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 34 * vlenb
+; CHECK-NEXT: vmv1r.v v24, v0
+; CHECK-NEXT: vmv8r.v v0, v8
+; CHECK-NEXT: csrr a2, vlenb
+; CHECK-NEXT: slli a1, a2, 1
+; CHECK-NEXT: sub a3, a0, a1
+; CHECK-NEXT: sltu a4, a0, a3
+; CHECK-NEXT: addi a4, a4, -1
+; CHECK-NEXT: and a3, a4, a3
+; CHECK-NEXT: srli a2, a2, 2
+; CHECK-NEXT: csrr a4, vlenb
+; CHECK-NEXT: slli a4, a4, 5
+; CHECK-NEXT: add a4, sp, a4
+; CHECK-NEXT: addi a4, a4, 16
+; CHECK-NEXT: vs1r.v v24, (a4) # Unknown-size Folded Spill
+; CHECK-NEXT: vsetvli a4, zero, e8, mf2, ta, ma
+; CHECK-NEXT: vslidedown.vx v8, v24, a2
+; CHECK-NEXT: addi a2, sp, 16
+; CHECK-NEXT: vs1r.v v8, (a2) # Unknown-size Folded Spill
+; CHECK-NEXT: csrr a2, vlenb
+; CHECK-NEXT: slli a2, a2, 4
+; CHECK-NEXT: add a2, sp, a2
+; CHECK-NEXT: addi a2, a2, 16
+; CHECK-NEXT: vs8r.v v0, (a2) # Unknown-size Folded Spill
+; CHECK-NEXT: vsetvli a2, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v4
+; CHECK-NEXT: vmv1r.v v0, v8
+; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, ma
+; CHECK-NEXT: vmfeq.vv v12, v24, v24, v0.t
+; CHECK-NEXT: vmv8r.v v0, v16
+; CHECK-NEXT: csrr a2, vlenb
+; CHECK-NEXT: li a4, 24
+; CHECK-NEXT: mul a2, a2, a4
+; CHECK-NEXT: add a2, sp, a2
+; CHECK-NEXT: addi a2, a2, 16
+; CHECK-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill
+; CHECK-NEXT: vsetvli a2, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v4
+; CHECK-NEXT: vmv1r.v v0, v12
+; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, ma
+; CHECK-NEXT: vmerge.vvm v8, v24, v16, v0
+; CHECK-NEXT: csrr a2, vlenb
+; CHECK-NEXT: slli a2, a2, 3
+; CHECK-NEXT: add a2, sp, a2
+; CHECK-NEXT: addi a2, a2, 16
+; CHECK-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill
+; CHECK-NEXT: addi a2, sp, 16
+; CHECK-NEXT: vl1r.v v8, (a2) # Unknown-size Folded Reload
+; CHECK-NEXT: vmv1r.v v0, v8
+; CHECK-NEXT: vmfeq.vv v12, v16, v16, v0.t
+; CHECK-NEXT: vmv1r.v v0, v12
+; CHECK-NEXT: vmerge.vvm v16, v16, v24, v0
+; CHECK-NEXT: vmv1r.v v0, v8
+; CHECK-NEXT: csrr a2, vlenb
+; CHECK-NEXT: slli a2, a2, 3
+; CHECK-NEXT: add a2, sp, a2
+; CHECK-NEXT: addi a2, a2, 16
+; CHECK-NEXT: vl8r.v v24, (a2) # Unknown-size Folded Reload
+; CHECK-NEXT: vfmin.vv v16, v16, v24, v0.t
+; CHECK-NEXT: vsetvli a2, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v12, v16
+; CHECK-NEXT: csrr a2, vlenb
+; CHECK-NEXT: slli a2, a2, 3
+; CHECK-NEXT: add a2, sp, a2
+; CHECK-NEXT: addi a2, a2, 16
+; CHECK-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill
+; CHECK-NEXT: bltu a0, a1, .LBB10_2
+; CHECK-NEXT: # %bb.1:
+; CHECK-NEXT: mv a0, a1
+; CHECK-NEXT: .LBB10_2:
+; CHECK-NEXT: csrr a1, vlenb
+; CHECK-NEXT: slli a1, a1, 4
+; CHECK-NEXT: add a1, sp, a1
+; CHECK-NEXT: addi a1, a1, 16
+; CHECK-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload
+; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v16
+; CHECK-NEXT: csrr a1, vlenb
+; CHECK-NEXT: slli a1, a1, 5
+; CHECK-NEXT: add a1, sp, a1
+; CHECK-NEXT: addi a1, a1, 16
+; CHECK-NEXT: vl1r.v v0, (a1) # Unknown-size Folded Reload
+; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma
+; CHECK-NEXT: vmfeq.vv v8, v24, v24, v0.t
+; CHECK-NEXT: addi a1, sp, 16
+; CHECK-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill
+; CHECK-NEXT: csrr a1, vlenb
+; CHECK-NEXT: li a2, 24
+; CHECK-NEXT: mul a1, a1, a2
+; CHECK-NEXT: add a1, sp, a1
+; CHECK-NEXT: addi a1, a1, 16
+; CHECK-NEXT: vl8r.v v0, (a1) # Unknown-size Folded Reload
+; CHECK-NEXT: vsetvli a1, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v0
+; CHECK-NEXT: vmv1r.v v0, v8
+; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma
+; CHECK-NEXT: vmerge.vvm v24, v24, v16, v0
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: li a1, 24
+; CHECK-NEXT: mul a0, a0, a1
+; CHECK-NEXT: add a0, sp, a0
+; CHECK-NEXT: addi a0, a0, 16
+; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: slli a0, a0, 5
+; CHECK-NEXT: add a0, sp, a0
+; CHECK-NEXT: addi a0, a0, 16
+; CHECK-NEXT: vl1r.v v9, (a0) # Unknown-size Folded Reload
+; CHECK-NEXT: vmv1r.v v0, v9
+; CHECK-NEXT: vmfeq.vv v8, v16, v16, v0.t
+; CHECK-NEXT: vmv1r.v v0, v8
+; CHECK-NEXT: addi a0, sp, 16
+; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload
+; CHECK-NEXT: vmerge.vvm v16, v16, v24, v0
+; CHECK-NEXT: vmv1r.v v0, v9
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: li a1, 24
+; CHECK-NEXT: mul a0, a0, a1
+; CHECK-NEXT: add a0, sp, a0
+; CHECK-NEXT: addi a0, a0, 16
+; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload
+; CHECK-NEXT: vfmin.vv v16, v16, v24, v0.t
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: slli a0, a0, 3
+; CHECK-NEXT: add a0, sp, a0
+; CHECK-NEXT: addi a0, a0, 16
+; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
+; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: li a1, 34
+; CHECK-NEXT: mul a0, a0, a1
+; CHECK-NEXT: add sp, sp, a0
+; CHECK-NEXT: addi sp, sp, 16
+; CHECK-NEXT: ret
+ %v = call <vscale x 32 x bfloat> @llvm.vp.minimum.nxv32bf16(<vscale x 32 x bfloat> %va, <vscale x 32 x bfloat> %vb, <vscale x 32 x i1> %m, i32 %evl)
+ ret <vscale x 32 x bfloat> %v
+}
+
+define <vscale x 32 x bfloat> @vfmin_vv_nxv32bf16_unmasked(<vscale x 32 x bfloat> %va, <vscale x 32 x bfloat> %vb, i32 zeroext %evl) {
+; CHECK-LABEL: vfmin_vv_nxv32bf16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi sp, sp, -16
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: csrr a1, vlenb
+; CHECK-NEXT: slli a1, a1, 5
+; CHECK-NEXT: sub sp, sp, a1
+; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * vlenb
+; CHECK-NEXT: csrr a2, vlenb
+; CHECK-NEXT: slli a1, a2, 1
+; CHECK-NEXT: sub a3, a0, a1
+; CHECK-NEXT: sltu a4, a0, a3
+; CHECK-NEXT: addi a4, a4, -1
+; CHECK-NEXT: and a3, a4, a3
+; CHECK-NEXT: srli a2, a2, 2
+; CHECK-NEXT: vsetvli a4, zero, e8, m4, ta, ma
+; CHECK-NEXT: vmset.m v24
+; CHECK-NEXT: vsetvli a4, zero, e8, mf2, ta, ma
+; CHECK-NEXT: vslidedown.vx v7, v24, a2
+; CHECK-NEXT: csrr a2, vlenb
+; CHECK-NEXT: slli a2, a2, 4
+; CHECK-NEXT: add a2, sp, a2
+; CHECK-NEXT: addi a2, a2, 16
+; CHECK-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill
+; CHECK-NEXT: vsetvli a2, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v12
+; CHECK-NEXT: addi a2, sp, 16
+; CHECK-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill
+; CHECK-NEXT: vmv1r.v v0, v7
+; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, ma
+; CHECK-NEXT: vmfeq.vv v12, v24, v24, v0.t
+; CHECK-NEXT: vmv4r.v v8, v16
+; CHECK-NEXT: csrr a2, vlenb
+; CHECK-NEXT: li a4, 24
+; CHECK-NEXT: mul a2, a2, a4
+; CHECK-NEXT: add a2, sp, a2
+; CHECK-NEXT: addi a2, a2, 16
+; CHECK-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill
+; CHECK-NEXT: vsetvli a2, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v20
+; CHECK-NEXT: vmv1r.v v0, v12
+; CHECK-NEXT: addi a2, sp, 16
+; CHECK-NEXT: vl8r.v v16, (a2) # Unknown-size Folded Reload
+; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, ma
+; CHECK-NEXT: vmerge.vvm v8, v16, v24, v0
+; CHECK-NEXT: csrr a2, vlenb
+; CHECK-NEXT: slli a2, a2, 3
+; CHECK-NEXT: add a2, sp, a2
+; CHECK-NEXT: addi a2, a2, 16
+; CHECK-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill
+; CHECK-NEXT: vmv1r.v v0, v7
+; CHECK-NEXT: vmfeq.vv v12, v24, v24, v0.t
+; CHECK-NEXT: vmv1r.v v0, v12
+; CHECK-NEXT: vmerge.vvm v16, v24, v16, v0
+; CHECK-NEXT: vmv1r.v v0, v7
+; CHECK-NEXT: csrr a2, vlenb
+; CHECK-NEXT: slli a2, a2, 3
+; CHECK-NEXT: add a2, sp, a2
+; CHECK-NEXT: addi a2, a2, 16
+; CHECK-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload
+; CHECK-NEXT: vfmin.vv v16, v16, v8, v0.t
+; CHECK-NEXT: vsetvli a2, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v4, v16
+; CHECK-NEXT: bltu a0, a1, .LBB11_2
+; CHECK-NEXT: # %bb.1:
+; CHECK-NEXT: mv a0, a1
+; CHECK-NEXT: .LBB11_2:
+; CHECK-NEXT: csrr a1, vlenb
+; CHECK-NEXT: slli a1, a1, 4
+; CHECK-NEXT: add a1, sp, a1
+; CHECK-NEXT: addi a1, a1, 16
+; CHECK-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload
+; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v16
+; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma
+; CHECK-NEXT: vmfeq.vv v0, v8, v8
+; CHECK-NEXT: csrr a1, vlenb
+; CHECK-NEXT: li a2, 24
+; CHECK-NEXT: mul a1, a1, a2
+; CHECK-NEXT: add a1, sp, a1
+; CHECK-NEXT: addi a1, a1, 16
+; CHECK-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload
+; CHECK-NEXT: vsetvli a1, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v24
+; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma
+; CHECK-NEXT: vmfeq.vv v3, v16, v16
+; CHECK-NEXT: vmerge.vvm v24, v8, v16, v0
+; CHECK-NEXT: vmv1r.v v0, v3
+; CHECK-NEXT: vmerge.vvm v16, v16, v8, v0
+; CHECK-NEXT: vfmin.vv v16, v16, v24
+; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v0, v16
+; CHECK-NEXT: vmv8r.v v8, v0
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: slli a0, a0, 5
+; CHECK-NEXT: add sp, sp, a0
+; CHECK-NEXT: addi sp, sp, 16
+; CHECK-NEXT: ret
+ %v = call <vscale x 32 x bfloat> @llvm.vp.minimum.nxv32bf16(<vscale x 32 x bfloat> %va, <vscale x 32 x bfloat> %vb, <vscale x 32 x i1> splat (i1 true), i32 %evl)
+ ret <vscale x 32 x bfloat> %v
+}
declare <vscale x 1 x half> @llvm.vp.minimum.nxv1f16(<vscale x 1 x half>, <vscale x 1 x half>, <vscale x 1 x i1>, i32)
define <vscale x 1 x half> @vfmin_vv_nxv1f16(<vscale x 1 x half> %va, <vscale x 1 x half> %vb, <vscale x 1 x i1> %m, i32 zeroext %evl) {
@@ -509,10 +1037,10 @@ define <vscale x 32 x half> @vfmin_vv_nxv32f16(<vscale x 32 x half> %va, <vscale
; ZVFHMIN-NEXT: add a2, sp, a2
; ZVFHMIN-NEXT: addi a2, a2, 16
; ZVFHMIN-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill
-; ZVFHMIN-NEXT: bltu a0, a1, .LBB10_2
+; ZVFHMIN-NEXT: bltu a0, a1, .LBB22_2
; ZVFHMIN-NEXT: # %bb.1:
; ZVFHMIN-NEXT: mv a0, a1
-; ZVFHMIN-NEXT: .LBB10_2:
+; ZVFHMIN-NEXT: .LBB22_2:
; ZVFHMIN-NEXT: csrr a1, vlenb
; ZVFHMIN-NEXT: slli a1, a1, 4
; ZVFHMIN-NEXT: add a1, sp, a1
@@ -656,10 +1184,10 @@ define <vscale x 32 x half> @vfmin_vv_nxv32f16_unmasked(<vscale x 32 x half> %va
; ZVFHMIN-NEXT: vfmin.vv v16, v16, v8, v0.t
; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v4, v16
-; ZVFHMIN-NEXT: bltu a0, a1, .LBB11_2
+; ZVFHMIN-NEXT: bltu a0, a1, .LBB23_2
; ZVFHMIN-NEXT: # %bb.1:
; ZVFHMIN-NEXT: mv a0, a1
-; ZVFHMIN-NEXT: .LBB11_2:
+; ZVFHMIN-NEXT: .LBB23_2:
; ZVFHMIN-NEXT: csrr a1, vlenb
; ZVFHMIN-NEXT: slli a1, a1, 4
; ZVFHMIN-NEXT: add a1, sp, a1
@@ -1093,10 +1621,10 @@ define <vscale x 16 x double> @vfmin_vv_nxv16f64(<vscale x 16 x double> %va, <vs
; CHECK-NEXT: add a0, sp, a0
; CHECK-NEXT: addi a0, a0, 16
; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
-; CHECK-NEXT: bltu a2, a1, .LBB28_2
+; CHECK-NEXT: bltu a2, a1, .LBB40_2
; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: mv a2, a1
-; CHECK-NEXT: .LBB28_2:
+; CHECK-NEXT: .LBB40_2:
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: li a1, 18
; CHECK-NEXT: mul a0, a0, a1
@@ -1202,10 +1730,10 @@ define <vscale x 16 x double> @vfmin_vv_nxv16f64_unmasked(<vscale x 16 x double>
; CHECK-NEXT: vfmin.vv v8, v16, v8
; CHECK-NEXT: addi a0, sp, 16
; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
-; CHECK-NEXT: bltu a2, a1, .LBB29_2
+; CHECK-NEXT: bltu a2, a1, .LBB41_2
; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: mv a2, a1
-; CHECK-NEXT: .LBB29_2:
+; CHECK-NEXT: .LBB41_2:
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: slli a0, a0, 4
; CHECK-NEXT: add a0, sp, a0
diff --git a/llvm/test/CodeGen/RISCV/rvv/fnearbyint-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/fnearbyint-sdnode.ll
index 9e14852305ca..9498c65ba9a1 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fnearbyint-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fnearbyint-sdnode.ll
@@ -1,124 +1,438 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v -target-abi=ilp32d \
-; RUN: -verify-machineinstrs < %s | FileCheck %s
-; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -target-abi=lp64d \
-; RUN: -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+zfbfmin,+zvfbfmin,+v \
+; RUN: -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s \
+; RUN: --check-prefixes=CHECK,ZVFH
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+zfbfmin,+zvfbfmin,+v \
+; RUN: -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \
+; RUN: --check-prefixes=CHECK,ZVFH
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin,+v \
+; RUN: -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s \
+; RUN: --check-prefixes=CHECK,ZVFHMIN
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin,+v \
+; RUN: -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \
+; RUN: --check-prefixes=CHECK,ZVFHMIN
-define <vscale x 1 x half> @nearbyint_nxv1f16(<vscale x 1 x half> %x) {
-; CHECK-LABEL: nearbyint_nxv1f16:
+define <vscale x 1 x bfloat> @nearbyint_nxv1bf16(<vscale x 1 x bfloat> %x) {
+; CHECK-LABEL: nearbyint_nxv1bf16:
; CHECK: # %bb.0:
-; CHECK-NEXT: lui a0, %hi(.LCPI0_0)
-; CHECK-NEXT: flh fa5, %lo(.LCPI0_0)(a0)
; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
-; CHECK-NEXT: vfabs.v v9, v8
-; CHECK-NEXT: vmflt.vf v0, v9, fa5
+; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8
+; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
+; CHECK-NEXT: vfabs.v v8, v9
+; CHECK-NEXT: lui a0, 307200
+; CHECK-NEXT: fmv.w.x fa5, a0
+; CHECK-NEXT: vmflt.vf v0, v8, fa5
; CHECK-NEXT: frflags a0
-; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t
-; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t
-; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, mu
-; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t
+; CHECK-NEXT: vfcvt.x.f.v v8, v9, v0.t
+; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu
+; CHECK-NEXT: vfsgnj.vv v9, v8, v9, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9
; CHECK-NEXT: fsflags a0
; CHECK-NEXT: ret
- %a = call <vscale x 1 x half> @llvm.nearbyint.nxv1f16(<vscale x 1 x half> %x)
- ret <vscale x 1 x half> %a
+ %a = call <vscale x 1 x bfloat> @llvm.nearbyint.nxv1bf16(<vscale x 1 x bfloat> %x)
+ ret <vscale x 1 x bfloat> %a
}
-declare <vscale x 1 x half> @llvm.nearbyint.nxv1f16(<vscale x 1 x half>)
-define <vscale x 2 x half> @nearbyint_nxv2f16(<vscale x 2 x half> %x) {
-; CHECK-LABEL: nearbyint_nxv2f16:
+define <vscale x 2 x bfloat> @nearbyint_nxv2bf16(<vscale x 2 x bfloat> %x) {
+; CHECK-LABEL: nearbyint_nxv2bf16:
; CHECK: # %bb.0:
-; CHECK-NEXT: lui a0, %hi(.LCPI1_0)
-; CHECK-NEXT: flh fa5, %lo(.LCPI1_0)(a0)
; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
-; CHECK-NEXT: vfabs.v v9, v8
-; CHECK-NEXT: vmflt.vf v0, v9, fa5
+; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8
+; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; CHECK-NEXT: vfabs.v v8, v9
+; CHECK-NEXT: lui a0, 307200
+; CHECK-NEXT: fmv.w.x fa5, a0
+; CHECK-NEXT: vmflt.vf v0, v8, fa5
; CHECK-NEXT: frflags a0
-; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t
-; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t
-; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, mu
-; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t
+; CHECK-NEXT: vfcvt.x.f.v v8, v9, v0.t
+; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu
+; CHECK-NEXT: vfsgnj.vv v9, v8, v9, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9
; CHECK-NEXT: fsflags a0
; CHECK-NEXT: ret
- %a = call <vscale x 2 x half> @llvm.nearbyint.nxv2f16(<vscale x 2 x half> %x)
- ret <vscale x 2 x half> %a
+ %a = call <vscale x 2 x bfloat> @llvm.nearbyint.nxv2bf16(<vscale x 2 x bfloat> %x)
+ ret <vscale x 2 x bfloat> %a
}
-declare <vscale x 2 x half> @llvm.nearbyint.nxv2f16(<vscale x 2 x half>)
-define <vscale x 4 x half> @nearbyint_nxv4f16(<vscale x 4 x half> %x) {
-; CHECK-LABEL: nearbyint_nxv4f16:
+define <vscale x 4 x bfloat> @nearbyint_nxv4bf16(<vscale x 4 x bfloat> %x) {
+; CHECK-LABEL: nearbyint_nxv4bf16:
; CHECK: # %bb.0:
-; CHECK-NEXT: lui a0, %hi(.LCPI2_0)
-; CHECK-NEXT: flh fa5, %lo(.LCPI2_0)(a0)
; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
-; CHECK-NEXT: vfabs.v v9, v8
-; CHECK-NEXT: vmflt.vf v0, v9, fa5
+; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8
+; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; CHECK-NEXT: vfabs.v v8, v10
+; CHECK-NEXT: lui a0, 307200
+; CHECK-NEXT: fmv.w.x fa5, a0
+; CHECK-NEXT: vmflt.vf v0, v8, fa5
; CHECK-NEXT: frflags a0
-; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t
-; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t
-; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, mu
-; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t
+; CHECK-NEXT: vfcvt.x.f.v v8, v10, v0.t
+; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu
+; CHECK-NEXT: vfsgnj.vv v10, v8, v10, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10
; CHECK-NEXT: fsflags a0
; CHECK-NEXT: ret
- %a = call <vscale x 4 x half> @llvm.nearbyint.nxv4f16(<vscale x 4 x half> %x)
- ret <vscale x 4 x half> %a
+ %a = call <vscale x 4 x bfloat> @llvm.nearbyint.nxv4bf16(<vscale x 4 x bfloat> %x)
+ ret <vscale x 4 x bfloat> %a
}
-declare <vscale x 4 x half> @llvm.nearbyint.nxv4f16(<vscale x 4 x half>)
-define <vscale x 8 x half> @nearbyint_nxv8f16(<vscale x 8 x half> %x) {
-; CHECK-LABEL: nearbyint_nxv8f16:
+define <vscale x 8 x bfloat> @nearbyint_nxv8bf16(<vscale x 8 x bfloat> %x) {
+; CHECK-LABEL: nearbyint_nxv8bf16:
; CHECK: # %bb.0:
-; CHECK-NEXT: lui a0, %hi(.LCPI3_0)
-; CHECK-NEXT: flh fa5, %lo(.LCPI3_0)(a0)
; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma
-; CHECK-NEXT: vfabs.v v10, v8
-; CHECK-NEXT: vmflt.vf v0, v10, fa5
+; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8
+; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; CHECK-NEXT: vfabs.v v8, v12
+; CHECK-NEXT: lui a0, 307200
+; CHECK-NEXT: fmv.w.x fa5, a0
+; CHECK-NEXT: vmflt.vf v0, v8, fa5
; CHECK-NEXT: frflags a0
-; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t
-; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t
-; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, mu
-; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t
+; CHECK-NEXT: vfcvt.x.f.v v8, v12, v0.t
+; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu
+; CHECK-NEXT: vfsgnj.vv v12, v8, v12, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12
; CHECK-NEXT: fsflags a0
; CHECK-NEXT: ret
- %a = call <vscale x 8 x half> @llvm.nearbyint.nxv8f16(<vscale x 8 x half> %x)
- ret <vscale x 8 x half> %a
+ %a = call <vscale x 8 x bfloat> @llvm.nearbyint.nxv8bf16(<vscale x 8 x bfloat> %x)
+ ret <vscale x 8 x bfloat> %a
}
-declare <vscale x 8 x half> @llvm.nearbyint.nxv8f16(<vscale x 8 x half>)
-define <vscale x 16 x half> @nearbyint_nxv16f16(<vscale x 16 x half> %x) {
-; CHECK-LABEL: nearbyint_nxv16f16:
+define <vscale x 16 x bfloat> @nearbyint_nxv16bf16(<vscale x 16 x bfloat> %x) {
+; CHECK-LABEL: nearbyint_nxv16bf16:
; CHECK: # %bb.0:
-; CHECK-NEXT: lui a0, %hi(.LCPI4_0)
-; CHECK-NEXT: flh fa5, %lo(.LCPI4_0)(a0)
; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma
-; CHECK-NEXT: vfabs.v v12, v8
-; CHECK-NEXT: vmflt.vf v0, v12, fa5
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8
+; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; CHECK-NEXT: vfabs.v v8, v16
+; CHECK-NEXT: lui a0, 307200
+; CHECK-NEXT: fmv.w.x fa5, a0
+; CHECK-NEXT: vmflt.vf v0, v8, fa5
; CHECK-NEXT: frflags a0
-; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t
-; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t
-; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, mu
-; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t
+; CHECK-NEXT: vfcvt.x.f.v v8, v16, v0.t
+; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu
+; CHECK-NEXT: vfsgnj.vv v16, v8, v16, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16
; CHECK-NEXT: fsflags a0
; CHECK-NEXT: ret
- %a = call <vscale x 16 x half> @llvm.nearbyint.nxv16f16(<vscale x 16 x half> %x)
- ret <vscale x 16 x half> %a
+ %a = call <vscale x 16 x bfloat> @llvm.nearbyint.nxv16bf16(<vscale x 16 x bfloat> %x)
+ ret <vscale x 16 x bfloat> %a
}
-declare <vscale x 16 x half> @llvm.nearbyint.nxv16f16(<vscale x 16 x half>)
-define <vscale x 32 x half> @nearbyint_nxv32f16(<vscale x 32 x half> %x) {
-; CHECK-LABEL: nearbyint_nxv32f16:
+define <vscale x 32 x bfloat> @nearbyint_nxv32bf16(<vscale x 32 x bfloat> %x) {
+; CHECK-LABEL: nearbyint_nxv32bf16:
; CHECK: # %bb.0:
-; CHECK-NEXT: lui a0, %hi(.LCPI5_0)
-; CHECK-NEXT: flh fa5, %lo(.LCPI5_0)(a0)
-; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, ma
-; CHECK-NEXT: vfabs.v v16, v8
-; CHECK-NEXT: vmflt.vf v0, v16, fa5
+; CHECK-NEXT: addi sp, sp, -16
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: slli a0, a0, 3
+; CHECK-NEXT: sub sp, sp, a0
+; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
+; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8
+; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; CHECK-NEXT: vfabs.v v24, v16
+; CHECK-NEXT: lui a0, 307200
+; CHECK-NEXT: fmv.w.x fa5, a0
+; CHECK-NEXT: vmflt.vf v0, v24, fa5
; CHECK-NEXT: frflags a0
-; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t
+; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t
+; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t
+; CHECK-NEXT: fsflags a0
+; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu
+; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v12
+; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; CHECK-NEXT: vfabs.v v8, v24
+; CHECK-NEXT: vmflt.vf v0, v8, fa5
+; CHECK-NEXT: frflags a0
+; CHECK-NEXT: vfcvt.x.f.v v8, v24, v0.t
+; CHECK-NEXT: addi a1, sp, 16
+; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill
+; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16
+; CHECK-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload
+; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma
; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t
-; CHECK-NEXT: vsetvli zero, zero, e16, m8, ta, mu
-; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu
+; CHECK-NEXT: vfsgnj.vv v24, v16, v24, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v12, v24
; CHECK-NEXT: fsflags a0
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: slli a0, a0, 3
+; CHECK-NEXT: add sp, sp, a0
+; CHECK-NEXT: addi sp, sp, 16
; CHECK-NEXT: ret
+ %a = call <vscale x 32 x bfloat> @llvm.nearbyint.nxv32bf16(<vscale x 32 x bfloat> %x)
+ ret <vscale x 32 x bfloat> %a
+}
+
+define <vscale x 1 x half> @nearbyint_nxv1f16(<vscale x 1 x half> %x) {
+; ZVFH-LABEL: nearbyint_nxv1f16:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: lui a0, %hi(.LCPI6_0)
+; ZVFH-NEXT: flh fa5, %lo(.LCPI6_0)(a0)
+; ZVFH-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
+; ZVFH-NEXT: vfabs.v v9, v8
+; ZVFH-NEXT: vmflt.vf v0, v9, fa5
+; ZVFH-NEXT: frflags a0
+; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t
+; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t
+; ZVFH-NEXT: vsetvli zero, zero, e16, mf4, ta, mu
+; ZVFH-NEXT: vfsgnj.vv v8, v9, v8, v0.t
+; ZVFH-NEXT: fsflags a0
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: nearbyint_nxv1f16:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
+; ZVFHMIN-NEXT: vfabs.v v8, v9
+; ZVFHMIN-NEXT: lui a0, 307200
+; ZVFHMIN-NEXT: fmv.w.x fa5, a0
+; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5
+; ZVFHMIN-NEXT: frflags a0
+; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v9, v0.t
+; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, mu
+; ZVFHMIN-NEXT: vfsgnj.vv v9, v8, v9, v0.t
+; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
+; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9
+; ZVFHMIN-NEXT: fsflags a0
+; ZVFHMIN-NEXT: ret
+ %a = call <vscale x 1 x half> @llvm.nearbyint.nxv1f16(<vscale x 1 x half> %x)
+ ret <vscale x 1 x half> %a
+}
+declare <vscale x 1 x half> @llvm.nearbyint.nxv1f16(<vscale x 1 x half>)
+
+define <vscale x 2 x half> @nearbyint_nxv2f16(<vscale x 2 x half> %x) {
+; ZVFH-LABEL: nearbyint_nxv2f16:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: lui a0, %hi(.LCPI7_0)
+; ZVFH-NEXT: flh fa5, %lo(.LCPI7_0)(a0)
+; ZVFH-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
+; ZVFH-NEXT: vfabs.v v9, v8
+; ZVFH-NEXT: vmflt.vf v0, v9, fa5
+; ZVFH-NEXT: frflags a0
+; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t
+; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t
+; ZVFH-NEXT: vsetvli zero, zero, e16, mf2, ta, mu
+; ZVFH-NEXT: vfsgnj.vv v8, v9, v8, v0.t
+; ZVFH-NEXT: fsflags a0
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: nearbyint_nxv2f16:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; ZVFHMIN-NEXT: vfabs.v v8, v9
+; ZVFHMIN-NEXT: lui a0, 307200
+; ZVFHMIN-NEXT: fmv.w.x fa5, a0
+; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5
+; ZVFHMIN-NEXT: frflags a0
+; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v9, v0.t
+; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, mu
+; ZVFHMIN-NEXT: vfsgnj.vv v9, v8, v9, v0.t
+; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
+; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9
+; ZVFHMIN-NEXT: fsflags a0
+; ZVFHMIN-NEXT: ret
+ %a = call <vscale x 2 x half> @llvm.nearbyint.nxv2f16(<vscale x 2 x half> %x)
+ ret <vscale x 2 x half> %a
+}
+declare <vscale x 2 x half> @llvm.nearbyint.nxv2f16(<vscale x 2 x half>)
+
+define <vscale x 4 x half> @nearbyint_nxv4f16(<vscale x 4 x half> %x) {
+; ZVFH-LABEL: nearbyint_nxv4f16:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: lui a0, %hi(.LCPI8_0)
+; ZVFH-NEXT: flh fa5, %lo(.LCPI8_0)(a0)
+; ZVFH-NEXT: vsetvli a0, zero, e16, m1, ta, ma
+; ZVFH-NEXT: vfabs.v v9, v8
+; ZVFH-NEXT: vmflt.vf v0, v9, fa5
+; ZVFH-NEXT: frflags a0
+; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t
+; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t
+; ZVFH-NEXT: vsetvli zero, zero, e16, m1, ta, mu
+; ZVFH-NEXT: vfsgnj.vv v8, v9, v8, v0.t
+; ZVFH-NEXT: fsflags a0
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: nearbyint_nxv4f16:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m1, ta, ma
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; ZVFHMIN-NEXT: vfabs.v v8, v10
+; ZVFHMIN-NEXT: lui a0, 307200
+; ZVFHMIN-NEXT: fmv.w.x fa5, a0
+; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5
+; ZVFHMIN-NEXT: frflags a0
+; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v10, v0.t
+; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, mu
+; ZVFHMIN-NEXT: vfsgnj.vv v10, v8, v10, v0.t
+; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
+; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10
+; ZVFHMIN-NEXT: fsflags a0
+; ZVFHMIN-NEXT: ret
+ %a = call <vscale x 4 x half> @llvm.nearbyint.nxv4f16(<vscale x 4 x half> %x)
+ ret <vscale x 4 x half> %a
+}
+declare <vscale x 4 x half> @llvm.nearbyint.nxv4f16(<vscale x 4 x half>)
+
+define <vscale x 8 x half> @nearbyint_nxv8f16(<vscale x 8 x half> %x) {
+; ZVFH-LABEL: nearbyint_nxv8f16:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: lui a0, %hi(.LCPI9_0)
+; ZVFH-NEXT: flh fa5, %lo(.LCPI9_0)(a0)
+; ZVFH-NEXT: vsetvli a0, zero, e16, m2, ta, ma
+; ZVFH-NEXT: vfabs.v v10, v8
+; ZVFH-NEXT: vmflt.vf v0, v10, fa5
+; ZVFH-NEXT: frflags a0
+; ZVFH-NEXT: vfcvt.x.f.v v10, v8, v0.t
+; ZVFH-NEXT: vfcvt.f.x.v v10, v10, v0.t
+; ZVFH-NEXT: vsetvli zero, zero, e16, m2, ta, mu
+; ZVFH-NEXT: vfsgnj.vv v8, v10, v8, v0.t
+; ZVFH-NEXT: fsflags a0
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: nearbyint_nxv8f16:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m2, ta, ma
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; ZVFHMIN-NEXT: vfabs.v v8, v12
+; ZVFHMIN-NEXT: lui a0, 307200
+; ZVFHMIN-NEXT: fmv.w.x fa5, a0
+; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5
+; ZVFHMIN-NEXT: frflags a0
+; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v12, v0.t
+; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, mu
+; ZVFHMIN-NEXT: vfsgnj.vv v12, v8, v12, v0.t
+; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
+; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12
+; ZVFHMIN-NEXT: fsflags a0
+; ZVFHMIN-NEXT: ret
+ %a = call <vscale x 8 x half> @llvm.nearbyint.nxv8f16(<vscale x 8 x half> %x)
+ ret <vscale x 8 x half> %a
+}
+declare <vscale x 8 x half> @llvm.nearbyint.nxv8f16(<vscale x 8 x half>)
+
+define <vscale x 16 x half> @nearbyint_nxv16f16(<vscale x 16 x half> %x) {
+; ZVFH-LABEL: nearbyint_nxv16f16:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: lui a0, %hi(.LCPI10_0)
+; ZVFH-NEXT: flh fa5, %lo(.LCPI10_0)(a0)
+; ZVFH-NEXT: vsetvli a0, zero, e16, m4, ta, ma
+; ZVFH-NEXT: vfabs.v v12, v8
+; ZVFH-NEXT: vmflt.vf v0, v12, fa5
+; ZVFH-NEXT: frflags a0
+; ZVFH-NEXT: vfcvt.x.f.v v12, v8, v0.t
+; ZVFH-NEXT: vfcvt.f.x.v v12, v12, v0.t
+; ZVFH-NEXT: vsetvli zero, zero, e16, m4, ta, mu
+; ZVFH-NEXT: vfsgnj.vv v8, v12, v8, v0.t
+; ZVFH-NEXT: fsflags a0
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: nearbyint_nxv16f16:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; ZVFHMIN-NEXT: vfabs.v v8, v16
+; ZVFHMIN-NEXT: lui a0, 307200
+; ZVFHMIN-NEXT: fmv.w.x fa5, a0
+; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5
+; ZVFHMIN-NEXT: frflags a0
+; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v16, v0.t
+; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, mu
+; ZVFHMIN-NEXT: vfsgnj.vv v16, v8, v16, v0.t
+; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
+; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16
+; ZVFHMIN-NEXT: fsflags a0
+; ZVFHMIN-NEXT: ret
+ %a = call <vscale x 16 x half> @llvm.nearbyint.nxv16f16(<vscale x 16 x half> %x)
+ ret <vscale x 16 x half> %a
+}
+declare <vscale x 16 x half> @llvm.nearbyint.nxv16f16(<vscale x 16 x half>)
+
+define <vscale x 32 x half> @nearbyint_nxv32f16(<vscale x 32 x half> %x) {
+; ZVFH-LABEL: nearbyint_nxv32f16:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: lui a0, %hi(.LCPI11_0)
+; ZVFH-NEXT: flh fa5, %lo(.LCPI11_0)(a0)
+; ZVFH-NEXT: vsetvli a0, zero, e16, m8, ta, ma
+; ZVFH-NEXT: vfabs.v v16, v8
+; ZVFH-NEXT: vmflt.vf v0, v16, fa5
+; ZVFH-NEXT: frflags a0
+; ZVFH-NEXT: vfcvt.x.f.v v16, v8, v0.t
+; ZVFH-NEXT: vfcvt.f.x.v v16, v16, v0.t
+; ZVFH-NEXT: vsetvli zero, zero, e16, m8, ta, mu
+; ZVFH-NEXT: vfsgnj.vv v8, v16, v8, v0.t
+; ZVFH-NEXT: fsflags a0
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: nearbyint_nxv32f16:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: addi sp, sp, -16
+; ZVFHMIN-NEXT: .cfi_def_cfa_offset 16
+; ZVFHMIN-NEXT: csrr a0, vlenb
+; ZVFHMIN-NEXT: slli a0, a0, 3
+; ZVFHMIN-NEXT: sub sp, sp, a0
+; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
+; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; ZVFHMIN-NEXT: vfabs.v v24, v16
+; ZVFHMIN-NEXT: lui a0, 307200
+; ZVFHMIN-NEXT: fmv.w.x fa5, a0
+; ZVFHMIN-NEXT: vmflt.vf v0, v24, fa5
+; ZVFHMIN-NEXT: frflags a0
+; ZVFHMIN-NEXT: vfcvt.x.f.v v24, v16, v0.t
+; ZVFHMIN-NEXT: vfcvt.f.x.v v24, v24, v0.t
+; ZVFHMIN-NEXT: fsflags a0
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, mu
+; ZVFHMIN-NEXT: vfsgnj.vv v16, v24, v16, v0.t
+; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; ZVFHMIN-NEXT: vfabs.v v8, v24
+; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5
+; ZVFHMIN-NEXT: frflags a0
+; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v24, v0.t
+; ZVFHMIN-NEXT: addi a1, sp, 16
+; ZVFHMIN-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill
+; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
+; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16
+; ZVFHMIN-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; ZVFHMIN-NEXT: vfcvt.f.x.v v16, v16, v0.t
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, mu
+; ZVFHMIN-NEXT: vfsgnj.vv v24, v16, v24, v0.t
+; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
+; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v24
+; ZVFHMIN-NEXT: fsflags a0
+; ZVFHMIN-NEXT: csrr a0, vlenb
+; ZVFHMIN-NEXT: slli a0, a0, 3
+; ZVFHMIN-NEXT: add sp, sp, a0
+; ZVFHMIN-NEXT: addi sp, sp, 16
+; ZVFHMIN-NEXT: ret
%a = call <vscale x 32 x half> @llvm.nearbyint.nxv32f16(<vscale x 32 x half> %x)
ret <vscale x 32 x half> %a
}
@@ -227,8 +541,8 @@ declare <vscale x 16 x float> @llvm.nearbyint.nxv16f32(<vscale x 16 x float>)
define <vscale x 1 x double> @nearbyint_nxv1f64(<vscale x 1 x double> %x) {
; CHECK-LABEL: nearbyint_nxv1f64:
; CHECK: # %bb.0:
-; CHECK-NEXT: lui a0, %hi(.LCPI11_0)
-; CHECK-NEXT: fld fa5, %lo(.LCPI11_0)(a0)
+; CHECK-NEXT: lui a0, %hi(.LCPI17_0)
+; CHECK-NEXT: fld fa5, %lo(.LCPI17_0)(a0)
; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma
; CHECK-NEXT: vfabs.v v9, v8
; CHECK-NEXT: vmflt.vf v0, v9, fa5
@@ -247,8 +561,8 @@ declare <vscale x 1 x double> @llvm.nearbyint.nxv1f64(<vscale x 1 x double>)
define <vscale x 2 x double> @nearbyint_nxv2f64(<vscale x 2 x double> %x) {
; CHECK-LABEL: nearbyint_nxv2f64:
; CHECK: # %bb.0:
-; CHECK-NEXT: lui a0, %hi(.LCPI12_0)
-; CHECK-NEXT: fld fa5, %lo(.LCPI12_0)(a0)
+; CHECK-NEXT: lui a0, %hi(.LCPI18_0)
+; CHECK-NEXT: fld fa5, %lo(.LCPI18_0)(a0)
; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma
; CHECK-NEXT: vfabs.v v10, v8
; CHECK-NEXT: vmflt.vf v0, v10, fa5
@@ -267,8 +581,8 @@ declare <vscale x 2 x double> @llvm.nearbyint.nxv2f64(<vscale x 2 x double>)
define <vscale x 4 x double> @nearbyint_nxv4f64(<vscale x 4 x double> %x) {
; CHECK-LABEL: nearbyint_nxv4f64:
; CHECK: # %bb.0:
-; CHECK-NEXT: lui a0, %hi(.LCPI13_0)
-; CHECK-NEXT: fld fa5, %lo(.LCPI13_0)(a0)
+; CHECK-NEXT: lui a0, %hi(.LCPI19_0)
+; CHECK-NEXT: fld fa5, %lo(.LCPI19_0)(a0)
; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma
; CHECK-NEXT: vfabs.v v12, v8
; CHECK-NEXT: vmflt.vf v0, v12, fa5
@@ -287,8 +601,8 @@ declare <vscale x 4 x double> @llvm.nearbyint.nxv4f64(<vscale x 4 x double>)
define <vscale x 8 x double> @nearbyint_nxv8f64(<vscale x 8 x double> %x) {
; CHECK-LABEL: nearbyint_nxv8f64:
; CHECK: # %bb.0:
-; CHECK-NEXT: lui a0, %hi(.LCPI14_0)
-; CHECK-NEXT: fld fa5, %lo(.LCPI14_0)(a0)
+; CHECK-NEXT: lui a0, %hi(.LCPI20_0)
+; CHECK-NEXT: fld fa5, %lo(.LCPI20_0)(a0)
; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma
; CHECK-NEXT: vfabs.v v16, v8
; CHECK-NEXT: vmflt.vf v0, v16, fa5
diff --git a/llvm/test/CodeGen/RISCV/rvv/frint-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/frint-sdnode.ll
index fb77b7465494..7fac8949c551 100644
--- a/llvm/test/CodeGen/RISCV/rvv/frint-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/frint-sdnode.ll
@@ -1,112 +1,372 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v -target-abi=ilp32d \
-; RUN: -verify-machineinstrs < %s | FileCheck %s
-; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -target-abi=lp64d \
-; RUN: -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+zfbfmin,+zvfbfmin,+v \
+; RUN: -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s \
+; RUN: --check-prefixes=CHECK,ZVFH
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+zfbfmin,+zvfbfmin,+v \
+; RUN: -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \
+; RUN: --check-prefixes=CHECK,ZVFH
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin,+v \
+; RUN: -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s \
+; RUN: --check-prefixes=CHECK,ZVFHMIN
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin,+v \
+; RUN: -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \
+; RUN: --check-prefixes=CHECK,ZVFHMIN
-define <vscale x 1 x half> @rint_nxv1f16(<vscale x 1 x half> %x) {
-; CHECK-LABEL: rint_nxv1f16:
+define <vscale x 1 x bfloat> @rint_nxv1bf16(<vscale x 1 x bfloat> %x) {
+; CHECK-LABEL: rint_nxv1bf16:
; CHECK: # %bb.0:
-; CHECK-NEXT: lui a0, %hi(.LCPI0_0)
-; CHECK-NEXT: flh fa5, %lo(.LCPI0_0)(a0)
; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
-; CHECK-NEXT: vfabs.v v9, v8
-; CHECK-NEXT: vmflt.vf v0, v9, fa5
-; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t
-; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t
-; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, mu
-; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t
+; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8
+; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
+; CHECK-NEXT: vfabs.v v8, v9
+; CHECK-NEXT: lui a0, 307200
+; CHECK-NEXT: fmv.w.x fa5, a0
+; CHECK-NEXT: vmflt.vf v0, v8, fa5
+; CHECK-NEXT: vfcvt.x.f.v v8, v9, v0.t
+; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu
+; CHECK-NEXT: vfsgnj.vv v9, v8, v9, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9
+; CHECK-NEXT: ret
+ %a = call <vscale x 1 x bfloat> @llvm.rint.nxv1bf16(<vscale x 1 x bfloat> %x)
+ ret <vscale x 1 x bfloat> %a
+}
+
+define <vscale x 2 x bfloat> @rint_nxv2bf16(<vscale x 2 x bfloat> %x) {
+; CHECK-LABEL: rint_nxv2bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8
+; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; CHECK-NEXT: vfabs.v v8, v9
+; CHECK-NEXT: lui a0, 307200
+; CHECK-NEXT: fmv.w.x fa5, a0
+; CHECK-NEXT: vmflt.vf v0, v8, fa5
+; CHECK-NEXT: vfcvt.x.f.v v8, v9, v0.t
+; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu
+; CHECK-NEXT: vfsgnj.vv v9, v8, v9, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9
; CHECK-NEXT: ret
+ %a = call <vscale x 2 x bfloat> @llvm.rint.nxv2bf16(<vscale x 2 x bfloat> %x)
+ ret <vscale x 2 x bfloat> %a
+}
+
+define <vscale x 4 x bfloat> @rint_nxv4bf16(<vscale x 4 x bfloat> %x) {
+; CHECK-LABEL: rint_nxv4bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8
+; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; CHECK-NEXT: vfabs.v v8, v10
+; CHECK-NEXT: lui a0, 307200
+; CHECK-NEXT: fmv.w.x fa5, a0
+; CHECK-NEXT: vmflt.vf v0, v8, fa5
+; CHECK-NEXT: vfcvt.x.f.v v8, v10, v0.t
+; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu
+; CHECK-NEXT: vfsgnj.vv v10, v8, v10, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10
+; CHECK-NEXT: ret
+ %a = call <vscale x 4 x bfloat> @llvm.rint.nxv4bf16(<vscale x 4 x bfloat> %x)
+ ret <vscale x 4 x bfloat> %a
+}
+
+define <vscale x 8 x bfloat> @rint_nxv8bf16(<vscale x 8 x bfloat> %x) {
+; CHECK-LABEL: rint_nxv8bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8
+; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; CHECK-NEXT: vfabs.v v8, v12
+; CHECK-NEXT: lui a0, 307200
+; CHECK-NEXT: fmv.w.x fa5, a0
+; CHECK-NEXT: vmflt.vf v0, v8, fa5
+; CHECK-NEXT: vfcvt.x.f.v v8, v12, v0.t
+; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu
+; CHECK-NEXT: vfsgnj.vv v12, v8, v12, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12
+; CHECK-NEXT: ret
+ %a = call <vscale x 8 x bfloat> @llvm.rint.nxv8bf16(<vscale x 8 x bfloat> %x)
+ ret <vscale x 8 x bfloat> %a
+}
+
+define <vscale x 16 x bfloat> @rint_nxv16bf16(<vscale x 16 x bfloat> %x) {
+; CHECK-LABEL: rint_nxv16bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8
+; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; CHECK-NEXT: vfabs.v v8, v16
+; CHECK-NEXT: lui a0, 307200
+; CHECK-NEXT: fmv.w.x fa5, a0
+; CHECK-NEXT: vmflt.vf v0, v8, fa5
+; CHECK-NEXT: vfcvt.x.f.v v8, v16, v0.t
+; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu
+; CHECK-NEXT: vfsgnj.vv v16, v8, v16, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16
+; CHECK-NEXT: ret
+ %a = call <vscale x 16 x bfloat> @llvm.rint.nxv16bf16(<vscale x 16 x bfloat> %x)
+ ret <vscale x 16 x bfloat> %a
+}
+
+define <vscale x 32 x bfloat> @rint_nxv32bf16(<vscale x 32 x bfloat> %x) {
+; CHECK-LABEL: rint_nxv32bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8
+; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; CHECK-NEXT: vfabs.v v24, v16
+; CHECK-NEXT: lui a0, 307200
+; CHECK-NEXT: fmv.w.x fa5, a0
+; CHECK-NEXT: vmflt.vf v0, v24, fa5
+; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t
+; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu
+; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v12
+; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; CHECK-NEXT: vfabs.v v8, v24
+; CHECK-NEXT: vmflt.vf v0, v8, fa5
+; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16
+; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; CHECK-NEXT: vfcvt.x.f.v v16, v24, v0.t
+; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu
+; CHECK-NEXT: vfsgnj.vv v24, v16, v24, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v12, v24
+; CHECK-NEXT: ret
+ %a = call <vscale x 32 x bfloat> @llvm.rint.nxv32bf16(<vscale x 32 x bfloat> %x)
+ ret <vscale x 32 x bfloat> %a
+}
+
+define <vscale x 1 x half> @rint_nxv1f16(<vscale x 1 x half> %x) {
+; ZVFH-LABEL: rint_nxv1f16:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: lui a0, %hi(.LCPI6_0)
+; ZVFH-NEXT: flh fa5, %lo(.LCPI6_0)(a0)
+; ZVFH-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
+; ZVFH-NEXT: vfabs.v v9, v8
+; ZVFH-NEXT: vmflt.vf v0, v9, fa5
+; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t
+; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t
+; ZVFH-NEXT: vsetvli zero, zero, e16, mf4, ta, mu
+; ZVFH-NEXT: vfsgnj.vv v8, v9, v8, v0.t
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: rint_nxv1f16:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
+; ZVFHMIN-NEXT: vfabs.v v8, v9
+; ZVFHMIN-NEXT: lui a0, 307200
+; ZVFHMIN-NEXT: fmv.w.x fa5, a0
+; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5
+; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v9, v0.t
+; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, mu
+; ZVFHMIN-NEXT: vfsgnj.vv v9, v8, v9, v0.t
+; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
+; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9
+; ZVFHMIN-NEXT: ret
%a = call <vscale x 1 x half> @llvm.rint.nxv1f16(<vscale x 1 x half> %x)
ret <vscale x 1 x half> %a
}
declare <vscale x 1 x half> @llvm.rint.nxv1f16(<vscale x 1 x half>)
define <vscale x 2 x half> @rint_nxv2f16(<vscale x 2 x half> %x) {
-; CHECK-LABEL: rint_nxv2f16:
-; CHECK: # %bb.0:
-; CHECK-NEXT: lui a0, %hi(.LCPI1_0)
-; CHECK-NEXT: flh fa5, %lo(.LCPI1_0)(a0)
-; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
-; CHECK-NEXT: vfabs.v v9, v8
-; CHECK-NEXT: vmflt.vf v0, v9, fa5
-; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t
-; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t
-; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, mu
-; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t
-; CHECK-NEXT: ret
+; ZVFH-LABEL: rint_nxv2f16:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: lui a0, %hi(.LCPI7_0)
+; ZVFH-NEXT: flh fa5, %lo(.LCPI7_0)(a0)
+; ZVFH-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
+; ZVFH-NEXT: vfabs.v v9, v8
+; ZVFH-NEXT: vmflt.vf v0, v9, fa5
+; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t
+; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t
+; ZVFH-NEXT: vsetvli zero, zero, e16, mf2, ta, mu
+; ZVFH-NEXT: vfsgnj.vv v8, v9, v8, v0.t
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: rint_nxv2f16:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; ZVFHMIN-NEXT: vfabs.v v8, v9
+; ZVFHMIN-NEXT: lui a0, 307200
+; ZVFHMIN-NEXT: fmv.w.x fa5, a0
+; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5
+; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v9, v0.t
+; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, mu
+; ZVFHMIN-NEXT: vfsgnj.vv v9, v8, v9, v0.t
+; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
+; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9
+; ZVFHMIN-NEXT: ret
%a = call <vscale x 2 x half> @llvm.rint.nxv2f16(<vscale x 2 x half> %x)
ret <vscale x 2 x half> %a
}
declare <vscale x 2 x half> @llvm.rint.nxv2f16(<vscale x 2 x half>)
define <vscale x 4 x half> @rint_nxv4f16(<vscale x 4 x half> %x) {
-; CHECK-LABEL: rint_nxv4f16:
-; CHECK: # %bb.0:
-; CHECK-NEXT: lui a0, %hi(.LCPI2_0)
-; CHECK-NEXT: flh fa5, %lo(.LCPI2_0)(a0)
-; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
-; CHECK-NEXT: vfabs.v v9, v8
-; CHECK-NEXT: vmflt.vf v0, v9, fa5
-; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t
-; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t
-; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, mu
-; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t
-; CHECK-NEXT: ret
+; ZVFH-LABEL: rint_nxv4f16:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: lui a0, %hi(.LCPI8_0)
+; ZVFH-NEXT: flh fa5, %lo(.LCPI8_0)(a0)
+; ZVFH-NEXT: vsetvli a0, zero, e16, m1, ta, ma
+; ZVFH-NEXT: vfabs.v v9, v8
+; ZVFH-NEXT: vmflt.vf v0, v9, fa5
+; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t
+; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t
+; ZVFH-NEXT: vsetvli zero, zero, e16, m1, ta, mu
+; ZVFH-NEXT: vfsgnj.vv v8, v9, v8, v0.t
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: rint_nxv4f16:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m1, ta, ma
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; ZVFHMIN-NEXT: vfabs.v v8, v10
+; ZVFHMIN-NEXT: lui a0, 307200
+; ZVFHMIN-NEXT: fmv.w.x fa5, a0
+; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5
+; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v10, v0.t
+; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, mu
+; ZVFHMIN-NEXT: vfsgnj.vv v10, v8, v10, v0.t
+; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
+; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10
+; ZVFHMIN-NEXT: ret
%a = call <vscale x 4 x half> @llvm.rint.nxv4f16(<vscale x 4 x half> %x)
ret <vscale x 4 x half> %a
}
declare <vscale x 4 x half> @llvm.rint.nxv4f16(<vscale x 4 x half>)
define <vscale x 8 x half> @rint_nxv8f16(<vscale x 8 x half> %x) {
-; CHECK-LABEL: rint_nxv8f16:
-; CHECK: # %bb.0:
-; CHECK-NEXT: lui a0, %hi(.LCPI3_0)
-; CHECK-NEXT: flh fa5, %lo(.LCPI3_0)(a0)
-; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma
-; CHECK-NEXT: vfabs.v v10, v8
-; CHECK-NEXT: vmflt.vf v0, v10, fa5
-; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t
-; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t
-; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, mu
-; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t
-; CHECK-NEXT: ret
+; ZVFH-LABEL: rint_nxv8f16:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: lui a0, %hi(.LCPI9_0)
+; ZVFH-NEXT: flh fa5, %lo(.LCPI9_0)(a0)
+; ZVFH-NEXT: vsetvli a0, zero, e16, m2, ta, ma
+; ZVFH-NEXT: vfabs.v v10, v8
+; ZVFH-NEXT: vmflt.vf v0, v10, fa5
+; ZVFH-NEXT: vfcvt.x.f.v v10, v8, v0.t
+; ZVFH-NEXT: vfcvt.f.x.v v10, v10, v0.t
+; ZVFH-NEXT: vsetvli zero, zero, e16, m2, ta, mu
+; ZVFH-NEXT: vfsgnj.vv v8, v10, v8, v0.t
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: rint_nxv8f16:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m2, ta, ma
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; ZVFHMIN-NEXT: vfabs.v v8, v12
+; ZVFHMIN-NEXT: lui a0, 307200
+; ZVFHMIN-NEXT: fmv.w.x fa5, a0
+; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5
+; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v12, v0.t
+; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, mu
+; ZVFHMIN-NEXT: vfsgnj.vv v12, v8, v12, v0.t
+; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
+; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12
+; ZVFHMIN-NEXT: ret
%a = call <vscale x 8 x half> @llvm.rint.nxv8f16(<vscale x 8 x half> %x)
ret <vscale x 8 x half> %a
}
declare <vscale x 8 x half> @llvm.rint.nxv8f16(<vscale x 8 x half>)
define <vscale x 16 x half> @rint_nxv16f16(<vscale x 16 x half> %x) {
-; CHECK-LABEL: rint_nxv16f16:
-; CHECK: # %bb.0:
-; CHECK-NEXT: lui a0, %hi(.LCPI4_0)
-; CHECK-NEXT: flh fa5, %lo(.LCPI4_0)(a0)
-; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma
-; CHECK-NEXT: vfabs.v v12, v8
-; CHECK-NEXT: vmflt.vf v0, v12, fa5
-; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t
-; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t
-; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, mu
-; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t
-; CHECK-NEXT: ret
+; ZVFH-LABEL: rint_nxv16f16:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: lui a0, %hi(.LCPI10_0)
+; ZVFH-NEXT: flh fa5, %lo(.LCPI10_0)(a0)
+; ZVFH-NEXT: vsetvli a0, zero, e16, m4, ta, ma
+; ZVFH-NEXT: vfabs.v v12, v8
+; ZVFH-NEXT: vmflt.vf v0, v12, fa5
+; ZVFH-NEXT: vfcvt.x.f.v v12, v8, v0.t
+; ZVFH-NEXT: vfcvt.f.x.v v12, v12, v0.t
+; ZVFH-NEXT: vsetvli zero, zero, e16, m4, ta, mu
+; ZVFH-NEXT: vfsgnj.vv v8, v12, v8, v0.t
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: rint_nxv16f16:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; ZVFHMIN-NEXT: vfabs.v v8, v16
+; ZVFHMIN-NEXT: lui a0, 307200
+; ZVFHMIN-NEXT: fmv.w.x fa5, a0
+; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5
+; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v16, v0.t
+; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, mu
+; ZVFHMIN-NEXT: vfsgnj.vv v16, v8, v16, v0.t
+; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
+; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16
+; ZVFHMIN-NEXT: ret
%a = call <vscale x 16 x half> @llvm.rint.nxv16f16(<vscale x 16 x half> %x)
ret <vscale x 16 x half> %a
}
declare <vscale x 16 x half> @llvm.rint.nxv16f16(<vscale x 16 x half>)
define <vscale x 32 x half> @rint_nxv32f16(<vscale x 32 x half> %x) {
-; CHECK-LABEL: rint_nxv32f16:
-; CHECK: # %bb.0:
-; CHECK-NEXT: lui a0, %hi(.LCPI5_0)
-; CHECK-NEXT: flh fa5, %lo(.LCPI5_0)(a0)
-; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, ma
-; CHECK-NEXT: vfabs.v v16, v8
-; CHECK-NEXT: vmflt.vf v0, v16, fa5
-; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t
-; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t
-; CHECK-NEXT: vsetvli zero, zero, e16, m8, ta, mu
-; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t
-; CHECK-NEXT: ret
+; ZVFH-LABEL: rint_nxv32f16:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: lui a0, %hi(.LCPI11_0)
+; ZVFH-NEXT: flh fa5, %lo(.LCPI11_0)(a0)
+; ZVFH-NEXT: vsetvli a0, zero, e16, m8, ta, ma
+; ZVFH-NEXT: vfabs.v v16, v8
+; ZVFH-NEXT: vmflt.vf v0, v16, fa5
+; ZVFH-NEXT: vfcvt.x.f.v v16, v8, v0.t
+; ZVFH-NEXT: vfcvt.f.x.v v16, v16, v0.t
+; ZVFH-NEXT: vsetvli zero, zero, e16, m8, ta, mu
+; ZVFH-NEXT: vfsgnj.vv v8, v16, v8, v0.t
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: rint_nxv32f16:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; ZVFHMIN-NEXT: vfabs.v v24, v16
+; ZVFHMIN-NEXT: lui a0, 307200
+; ZVFHMIN-NEXT: fmv.w.x fa5, a0
+; ZVFHMIN-NEXT: vmflt.vf v0, v24, fa5
+; ZVFHMIN-NEXT: vfcvt.x.f.v v24, v16, v0.t
+; ZVFHMIN-NEXT: vfcvt.f.x.v v24, v24, v0.t
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, mu
+; ZVFHMIN-NEXT: vfsgnj.vv v16, v24, v16, v0.t
+; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; ZVFHMIN-NEXT: vfabs.v v8, v24
+; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5
+; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
+; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; ZVFHMIN-NEXT: vfcvt.x.f.v v16, v24, v0.t
+; ZVFHMIN-NEXT: vfcvt.f.x.v v16, v16, v0.t
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, mu
+; ZVFHMIN-NEXT: vfsgnj.vv v24, v16, v24, v0.t
+; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
+; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v24
+; ZVFHMIN-NEXT: ret
%a = call <vscale x 32 x half> @llvm.rint.nxv32f16(<vscale x 32 x half> %x)
ret <vscale x 32 x half> %a
}
@@ -205,8 +465,8 @@ declare <vscale x 16 x float> @llvm.rint.nxv16f32(<vscale x 16 x float>)
define <vscale x 1 x double> @rint_nxv1f64(<vscale x 1 x double> %x) {
; CHECK-LABEL: rint_nxv1f64:
; CHECK: # %bb.0:
-; CHECK-NEXT: lui a0, %hi(.LCPI11_0)
-; CHECK-NEXT: fld fa5, %lo(.LCPI11_0)(a0)
+; CHECK-NEXT: lui a0, %hi(.LCPI17_0)
+; CHECK-NEXT: fld fa5, %lo(.LCPI17_0)(a0)
; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma
; CHECK-NEXT: vfabs.v v9, v8
; CHECK-NEXT: vmflt.vf v0, v9, fa5
@@ -223,8 +483,8 @@ declare <vscale x 1 x double> @llvm.rint.nxv1f64(<vscale x 1 x double>)
define <vscale x 2 x double> @rint_nxv2f64(<vscale x 2 x double> %x) {
; CHECK-LABEL: rint_nxv2f64:
; CHECK: # %bb.0:
-; CHECK-NEXT: lui a0, %hi(.LCPI12_0)
-; CHECK-NEXT: fld fa5, %lo(.LCPI12_0)(a0)
+; CHECK-NEXT: lui a0, %hi(.LCPI18_0)
+; CHECK-NEXT: fld fa5, %lo(.LCPI18_0)(a0)
; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma
; CHECK-NEXT: vfabs.v v10, v8
; CHECK-NEXT: vmflt.vf v0, v10, fa5
@@ -241,8 +501,8 @@ declare <vscale x 2 x double> @llvm.rint.nxv2f64(<vscale x 2 x double>)
define <vscale x 4 x double> @rint_nxv4f64(<vscale x 4 x double> %x) {
; CHECK-LABEL: rint_nxv4f64:
; CHECK: # %bb.0:
-; CHECK-NEXT: lui a0, %hi(.LCPI13_0)
-; CHECK-NEXT: fld fa5, %lo(.LCPI13_0)(a0)
+; CHECK-NEXT: lui a0, %hi(.LCPI19_0)
+; CHECK-NEXT: fld fa5, %lo(.LCPI19_0)(a0)
; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma
; CHECK-NEXT: vfabs.v v12, v8
; CHECK-NEXT: vmflt.vf v0, v12, fa5
@@ -259,8 +519,8 @@ declare <vscale x 4 x double> @llvm.rint.nxv4f64(<vscale x 4 x double>)
define <vscale x 8 x double> @rint_nxv8f64(<vscale x 8 x double> %x) {
; CHECK-LABEL: rint_nxv8f64:
; CHECK: # %bb.0:
-; CHECK-NEXT: lui a0, %hi(.LCPI14_0)
-; CHECK-NEXT: fld fa5, %lo(.LCPI14_0)(a0)
+; CHECK-NEXT: lui a0, %hi(.LCPI20_0)
+; CHECK-NEXT: fld fa5, %lo(.LCPI20_0)(a0)
; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma
; CHECK-NEXT: vfabs.v v16, v8
; CHECK-NEXT: vmflt.vf v0, v16, fa5
diff --git a/llvm/test/CodeGen/RISCV/rvv/fround-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/fround-sdnode.ll
index bb6724eeb320..193773b0c89c 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fround-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fround-sdnode.ll
@@ -1,126 +1,410 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v -target-abi=ilp32d \
-; RUN: -verify-machineinstrs < %s | FileCheck %s
-; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -target-abi=lp64d \
-; RUN: -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+zfbfmin,+zvfbfmin,+v \
+; RUN: -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s \
+; RUN: --check-prefixes=CHECK,ZVFH
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+zfbfmin,+zvfbfmin,+v \
+; RUN: -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \
+; RUN: --check-prefixes=CHECK,ZVFH
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin,+v \
+; RUN: -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s \
+; RUN: --check-prefixes=CHECK,ZVFHMIN
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin,+v \
+; RUN: -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \
+; RUN: --check-prefixes=CHECK,ZVFHMIN
; This file tests the code generation for `llvm.round.*` on scalable vector type.
-define <vscale x 1 x half> @round_nxv1f16(<vscale x 1 x half> %x) {
-; CHECK-LABEL: round_nxv1f16:
+define <vscale x 1 x bfloat> @round_nxv1bf16(<vscale x 1 x bfloat> %x) {
+; CHECK-LABEL: round_nxv1bf16:
; CHECK: # %bb.0:
-; CHECK-NEXT: lui a0, %hi(.LCPI0_0)
-; CHECK-NEXT: flh fa5, %lo(.LCPI0_0)(a0)
; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
-; CHECK-NEXT: vfabs.v v9, v8
-; CHECK-NEXT: vmflt.vf v0, v9, fa5
+; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8
+; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
+; CHECK-NEXT: vfabs.v v8, v9
+; CHECK-NEXT: lui a0, 307200
+; CHECK-NEXT: fmv.w.x fa5, a0
+; CHECK-NEXT: vmflt.vf v0, v8, fa5
; CHECK-NEXT: fsrmi a0, 4
-; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t
+; CHECK-NEXT: vfcvt.x.f.v v8, v9, v0.t
; CHECK-NEXT: fsrm a0
-; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t
-; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, mu
-; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t
+; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu
+; CHECK-NEXT: vfsgnj.vv v9, v8, v9, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9
; CHECK-NEXT: ret
- %a = call <vscale x 1 x half> @llvm.round.nxv1f16(<vscale x 1 x half> %x)
- ret <vscale x 1 x half> %a
+ %a = call <vscale x 1 x bfloat> @llvm.round.nxv1bf16(<vscale x 1 x bfloat> %x)
+ ret <vscale x 1 x bfloat> %a
}
-declare <vscale x 1 x half> @llvm.round.nxv1f16(<vscale x 1 x half>)
-define <vscale x 2 x half> @round_nxv2f16(<vscale x 2 x half> %x) {
-; CHECK-LABEL: round_nxv2f16:
+define <vscale x 2 x bfloat> @round_nxv2bf16(<vscale x 2 x bfloat> %x) {
+; CHECK-LABEL: round_nxv2bf16:
; CHECK: # %bb.0:
-; CHECK-NEXT: lui a0, %hi(.LCPI1_0)
-; CHECK-NEXT: flh fa5, %lo(.LCPI1_0)(a0)
; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
-; CHECK-NEXT: vfabs.v v9, v8
-; CHECK-NEXT: vmflt.vf v0, v9, fa5
+; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8
+; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; CHECK-NEXT: vfabs.v v8, v9
+; CHECK-NEXT: lui a0, 307200
+; CHECK-NEXT: fmv.w.x fa5, a0
+; CHECK-NEXT: vmflt.vf v0, v8, fa5
; CHECK-NEXT: fsrmi a0, 4
-; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t
+; CHECK-NEXT: vfcvt.x.f.v v8, v9, v0.t
; CHECK-NEXT: fsrm a0
-; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t
-; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, mu
-; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t
+; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu
+; CHECK-NEXT: vfsgnj.vv v9, v8, v9, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9
; CHECK-NEXT: ret
- %a = call <vscale x 2 x half> @llvm.round.nxv2f16(<vscale x 2 x half> %x)
- ret <vscale x 2 x half> %a
+ %a = call <vscale x 2 x bfloat> @llvm.round.nxv2bf16(<vscale x 2 x bfloat> %x)
+ ret <vscale x 2 x bfloat> %a
}
-declare <vscale x 2 x half> @llvm.round.nxv2f16(<vscale x 2 x half>)
-define <vscale x 4 x half> @round_nxv4f16(<vscale x 4 x half> %x) {
-; CHECK-LABEL: round_nxv4f16:
+define <vscale x 4 x bfloat> @round_nxv4bf16(<vscale x 4 x bfloat> %x) {
+; CHECK-LABEL: round_nxv4bf16:
; CHECK: # %bb.0:
-; CHECK-NEXT: lui a0, %hi(.LCPI2_0)
-; CHECK-NEXT: flh fa5, %lo(.LCPI2_0)(a0)
; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
-; CHECK-NEXT: vfabs.v v9, v8
-; CHECK-NEXT: vmflt.vf v0, v9, fa5
+; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8
+; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; CHECK-NEXT: vfabs.v v8, v10
+; CHECK-NEXT: lui a0, 307200
+; CHECK-NEXT: fmv.w.x fa5, a0
+; CHECK-NEXT: vmflt.vf v0, v8, fa5
; CHECK-NEXT: fsrmi a0, 4
-; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t
+; CHECK-NEXT: vfcvt.x.f.v v8, v10, v0.t
; CHECK-NEXT: fsrm a0
-; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t
-; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, mu
-; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t
+; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu
+; CHECK-NEXT: vfsgnj.vv v10, v8, v10, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10
; CHECK-NEXT: ret
- %a = call <vscale x 4 x half> @llvm.round.nxv4f16(<vscale x 4 x half> %x)
- ret <vscale x 4 x half> %a
+ %a = call <vscale x 4 x bfloat> @llvm.round.nxv4bf16(<vscale x 4 x bfloat> %x)
+ ret <vscale x 4 x bfloat> %a
}
-declare <vscale x 4 x half> @llvm.round.nxv4f16(<vscale x 4 x half>)
-define <vscale x 8 x half> @round_nxv8f16(<vscale x 8 x half> %x) {
-; CHECK-LABEL: round_nxv8f16:
+define <vscale x 8 x bfloat> @round_nxv8bf16(<vscale x 8 x bfloat> %x) {
+; CHECK-LABEL: round_nxv8bf16:
; CHECK: # %bb.0:
-; CHECK-NEXT: lui a0, %hi(.LCPI3_0)
-; CHECK-NEXT: flh fa5, %lo(.LCPI3_0)(a0)
; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma
-; CHECK-NEXT: vfabs.v v10, v8
-; CHECK-NEXT: vmflt.vf v0, v10, fa5
+; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8
+; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; CHECK-NEXT: vfabs.v v8, v12
+; CHECK-NEXT: lui a0, 307200
+; CHECK-NEXT: fmv.w.x fa5, a0
+; CHECK-NEXT: vmflt.vf v0, v8, fa5
; CHECK-NEXT: fsrmi a0, 4
-; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t
+; CHECK-NEXT: vfcvt.x.f.v v8, v12, v0.t
; CHECK-NEXT: fsrm a0
-; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t
-; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, mu
-; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t
+; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu
+; CHECK-NEXT: vfsgnj.vv v12, v8, v12, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12
; CHECK-NEXT: ret
- %a = call <vscale x 8 x half> @llvm.round.nxv8f16(<vscale x 8 x half> %x)
- ret <vscale x 8 x half> %a
+ %a = call <vscale x 8 x bfloat> @llvm.round.nxv8bf16(<vscale x 8 x bfloat> %x)
+ ret <vscale x 8 x bfloat> %a
}
-declare <vscale x 8 x half> @llvm.round.nxv8f16(<vscale x 8 x half>)
-define <vscale x 16 x half> @round_nxv16f16(<vscale x 16 x half> %x) {
-; CHECK-LABEL: round_nxv16f16:
+define <vscale x 16 x bfloat> @round_nxv16bf16(<vscale x 16 x bfloat> %x) {
+; CHECK-LABEL: round_nxv16bf16:
; CHECK: # %bb.0:
-; CHECK-NEXT: lui a0, %hi(.LCPI4_0)
-; CHECK-NEXT: flh fa5, %lo(.LCPI4_0)(a0)
; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma
-; CHECK-NEXT: vfabs.v v12, v8
-; CHECK-NEXT: vmflt.vf v0, v12, fa5
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8
+; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; CHECK-NEXT: vfabs.v v8, v16
+; CHECK-NEXT: lui a0, 307200
+; CHECK-NEXT: fmv.w.x fa5, a0
+; CHECK-NEXT: vmflt.vf v0, v8, fa5
; CHECK-NEXT: fsrmi a0, 4
-; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t
+; CHECK-NEXT: vfcvt.x.f.v v8, v16, v0.t
; CHECK-NEXT: fsrm a0
-; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t
-; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, mu
-; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t
+; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu
+; CHECK-NEXT: vfsgnj.vv v16, v8, v16, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16
; CHECK-NEXT: ret
- %a = call <vscale x 16 x half> @llvm.round.nxv16f16(<vscale x 16 x half> %x)
- ret <vscale x 16 x half> %a
+ %a = call <vscale x 16 x bfloat> @llvm.round.nxv16bf16(<vscale x 16 x bfloat> %x)
+ ret <vscale x 16 x bfloat> %a
}
-declare <vscale x 16 x half> @llvm.round.nxv16f16(<vscale x 16 x half>)
-define <vscale x 32 x half> @round_nxv32f16(<vscale x 32 x half> %x) {
-; CHECK-LABEL: round_nxv32f16:
+define <vscale x 32 x bfloat> @round_nxv32bf16(<vscale x 32 x bfloat> %x) {
+; CHECK-LABEL: round_nxv32bf16:
; CHECK: # %bb.0:
-; CHECK-NEXT: lui a0, %hi(.LCPI5_0)
-; CHECK-NEXT: flh fa5, %lo(.LCPI5_0)(a0)
-; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, ma
-; CHECK-NEXT: vfabs.v v16, v8
-; CHECK-NEXT: vmflt.vf v0, v16, fa5
+; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8
+; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; CHECK-NEXT: vfabs.v v24, v16
+; CHECK-NEXT: lui a0, 307200
+; CHECK-NEXT: fmv.w.x fa5, a0
+; CHECK-NEXT: vmflt.vf v0, v24, fa5
; CHECK-NEXT: fsrmi a0, 4
-; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t
+; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t
; CHECK-NEXT: fsrm a0
-; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t
-; CHECK-NEXT: vsetvli zero, zero, e16, m8, ta, mu
-; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t
+; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu
+; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v12
+; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; CHECK-NEXT: vfabs.v v24, v16
+; CHECK-NEXT: vmflt.vf v0, v24, fa5
+; CHECK-NEXT: fsrmi a0, 4
+; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t
+; CHECK-NEXT: fsrm a0
+; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu
+; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v12, v16
; CHECK-NEXT: ret
+ %a = call <vscale x 32 x bfloat> @llvm.round.nxv32bf16(<vscale x 32 x bfloat> %x)
+ ret <vscale x 32 x bfloat> %a
+}
+
+define <vscale x 1 x half> @round_nxv1f16(<vscale x 1 x half> %x) {
+; ZVFH-LABEL: round_nxv1f16:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: lui a0, %hi(.LCPI6_0)
+; ZVFH-NEXT: flh fa5, %lo(.LCPI6_0)(a0)
+; ZVFH-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
+; ZVFH-NEXT: vfabs.v v9, v8
+; ZVFH-NEXT: vmflt.vf v0, v9, fa5
+; ZVFH-NEXT: fsrmi a0, 4
+; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t
+; ZVFH-NEXT: fsrm a0
+; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t
+; ZVFH-NEXT: vsetvli zero, zero, e16, mf4, ta, mu
+; ZVFH-NEXT: vfsgnj.vv v8, v9, v8, v0.t
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: round_nxv1f16:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
+; ZVFHMIN-NEXT: vfabs.v v8, v9
+; ZVFHMIN-NEXT: lui a0, 307200
+; ZVFHMIN-NEXT: fmv.w.x fa5, a0
+; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5
+; ZVFHMIN-NEXT: fsrmi a0, 4
+; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v9, v0.t
+; ZVFHMIN-NEXT: fsrm a0
+; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, mu
+; ZVFHMIN-NEXT: vfsgnj.vv v9, v8, v9, v0.t
+; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
+; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9
+; ZVFHMIN-NEXT: ret
+ %a = call <vscale x 1 x half> @llvm.round.nxv1f16(<vscale x 1 x half> %x)
+ ret <vscale x 1 x half> %a
+}
+declare <vscale x 1 x half> @llvm.round.nxv1f16(<vscale x 1 x half>)
+
+define <vscale x 2 x half> @round_nxv2f16(<vscale x 2 x half> %x) {
+; ZVFH-LABEL: round_nxv2f16:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: lui a0, %hi(.LCPI7_0)
+; ZVFH-NEXT: flh fa5, %lo(.LCPI7_0)(a0)
+; ZVFH-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
+; ZVFH-NEXT: vfabs.v v9, v8
+; ZVFH-NEXT: vmflt.vf v0, v9, fa5
+; ZVFH-NEXT: fsrmi a0, 4
+; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t
+; ZVFH-NEXT: fsrm a0
+; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t
+; ZVFH-NEXT: vsetvli zero, zero, e16, mf2, ta, mu
+; ZVFH-NEXT: vfsgnj.vv v8, v9, v8, v0.t
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: round_nxv2f16:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; ZVFHMIN-NEXT: vfabs.v v8, v9
+; ZVFHMIN-NEXT: lui a0, 307200
+; ZVFHMIN-NEXT: fmv.w.x fa5, a0
+; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5
+; ZVFHMIN-NEXT: fsrmi a0, 4
+; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v9, v0.t
+; ZVFHMIN-NEXT: fsrm a0
+; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, mu
+; ZVFHMIN-NEXT: vfsgnj.vv v9, v8, v9, v0.t
+; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
+; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9
+; ZVFHMIN-NEXT: ret
+ %a = call <vscale x 2 x half> @llvm.round.nxv2f16(<vscale x 2 x half> %x)
+ ret <vscale x 2 x half> %a
+}
+declare <vscale x 2 x half> @llvm.round.nxv2f16(<vscale x 2 x half>)
+
+define <vscale x 4 x half> @round_nxv4f16(<vscale x 4 x half> %x) {
+; ZVFH-LABEL: round_nxv4f16:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: lui a0, %hi(.LCPI8_0)
+; ZVFH-NEXT: flh fa5, %lo(.LCPI8_0)(a0)
+; ZVFH-NEXT: vsetvli a0, zero, e16, m1, ta, ma
+; ZVFH-NEXT: vfabs.v v9, v8
+; ZVFH-NEXT: vmflt.vf v0, v9, fa5
+; ZVFH-NEXT: fsrmi a0, 4
+; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t
+; ZVFH-NEXT: fsrm a0
+; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t
+; ZVFH-NEXT: vsetvli zero, zero, e16, m1, ta, mu
+; ZVFH-NEXT: vfsgnj.vv v8, v9, v8, v0.t
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: round_nxv4f16:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m1, ta, ma
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; ZVFHMIN-NEXT: vfabs.v v8, v10
+; ZVFHMIN-NEXT: lui a0, 307200
+; ZVFHMIN-NEXT: fmv.w.x fa5, a0
+; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5
+; ZVFHMIN-NEXT: fsrmi a0, 4
+; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v10, v0.t
+; ZVFHMIN-NEXT: fsrm a0
+; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, mu
+; ZVFHMIN-NEXT: vfsgnj.vv v10, v8, v10, v0.t
+; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
+; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10
+; ZVFHMIN-NEXT: ret
+ %a = call <vscale x 4 x half> @llvm.round.nxv4f16(<vscale x 4 x half> %x)
+ ret <vscale x 4 x half> %a
+}
+declare <vscale x 4 x half> @llvm.round.nxv4f16(<vscale x 4 x half>)
+
+define <vscale x 8 x half> @round_nxv8f16(<vscale x 8 x half> %x) {
+; ZVFH-LABEL: round_nxv8f16:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: lui a0, %hi(.LCPI9_0)
+; ZVFH-NEXT: flh fa5, %lo(.LCPI9_0)(a0)
+; ZVFH-NEXT: vsetvli a0, zero, e16, m2, ta, ma
+; ZVFH-NEXT: vfabs.v v10, v8
+; ZVFH-NEXT: vmflt.vf v0, v10, fa5
+; ZVFH-NEXT: fsrmi a0, 4
+; ZVFH-NEXT: vfcvt.x.f.v v10, v8, v0.t
+; ZVFH-NEXT: fsrm a0
+; ZVFH-NEXT: vfcvt.f.x.v v10, v10, v0.t
+; ZVFH-NEXT: vsetvli zero, zero, e16, m2, ta, mu
+; ZVFH-NEXT: vfsgnj.vv v8, v10, v8, v0.t
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: round_nxv8f16:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m2, ta, ma
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; ZVFHMIN-NEXT: vfabs.v v8, v12
+; ZVFHMIN-NEXT: lui a0, 307200
+; ZVFHMIN-NEXT: fmv.w.x fa5, a0
+; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5
+; ZVFHMIN-NEXT: fsrmi a0, 4
+; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v12, v0.t
+; ZVFHMIN-NEXT: fsrm a0
+; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, mu
+; ZVFHMIN-NEXT: vfsgnj.vv v12, v8, v12, v0.t
+; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
+; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12
+; ZVFHMIN-NEXT: ret
+ %a = call <vscale x 8 x half> @llvm.round.nxv8f16(<vscale x 8 x half> %x)
+ ret <vscale x 8 x half> %a
+}
+declare <vscale x 8 x half> @llvm.round.nxv8f16(<vscale x 8 x half>)
+
+define <vscale x 16 x half> @round_nxv16f16(<vscale x 16 x half> %x) {
+; ZVFH-LABEL: round_nxv16f16:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: lui a0, %hi(.LCPI10_0)
+; ZVFH-NEXT: flh fa5, %lo(.LCPI10_0)(a0)
+; ZVFH-NEXT: vsetvli a0, zero, e16, m4, ta, ma
+; ZVFH-NEXT: vfabs.v v12, v8
+; ZVFH-NEXT: vmflt.vf v0, v12, fa5
+; ZVFH-NEXT: fsrmi a0, 4
+; ZVFH-NEXT: vfcvt.x.f.v v12, v8, v0.t
+; ZVFH-NEXT: fsrm a0
+; ZVFH-NEXT: vfcvt.f.x.v v12, v12, v0.t
+; ZVFH-NEXT: vsetvli zero, zero, e16, m4, ta, mu
+; ZVFH-NEXT: vfsgnj.vv v8, v12, v8, v0.t
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: round_nxv16f16:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; ZVFHMIN-NEXT: vfabs.v v8, v16
+; ZVFHMIN-NEXT: lui a0, 307200
+; ZVFHMIN-NEXT: fmv.w.x fa5, a0
+; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5
+; ZVFHMIN-NEXT: fsrmi a0, 4
+; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v16, v0.t
+; ZVFHMIN-NEXT: fsrm a0
+; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, mu
+; ZVFHMIN-NEXT: vfsgnj.vv v16, v8, v16, v0.t
+; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
+; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16
+; ZVFHMIN-NEXT: ret
+ %a = call <vscale x 16 x half> @llvm.round.nxv16f16(<vscale x 16 x half> %x)
+ ret <vscale x 16 x half> %a
+}
+declare <vscale x 16 x half> @llvm.round.nxv16f16(<vscale x 16 x half>)
+
+define <vscale x 32 x half> @round_nxv32f16(<vscale x 32 x half> %x) {
+; ZVFH-LABEL: round_nxv32f16:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: lui a0, %hi(.LCPI11_0)
+; ZVFH-NEXT: flh fa5, %lo(.LCPI11_0)(a0)
+; ZVFH-NEXT: vsetvli a0, zero, e16, m8, ta, ma
+; ZVFH-NEXT: vfabs.v v16, v8
+; ZVFH-NEXT: vmflt.vf v0, v16, fa5
+; ZVFH-NEXT: fsrmi a0, 4
+; ZVFH-NEXT: vfcvt.x.f.v v16, v8, v0.t
+; ZVFH-NEXT: fsrm a0
+; ZVFH-NEXT: vfcvt.f.x.v v16, v16, v0.t
+; ZVFH-NEXT: vsetvli zero, zero, e16, m8, ta, mu
+; ZVFH-NEXT: vfsgnj.vv v8, v16, v8, v0.t
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: round_nxv32f16:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; ZVFHMIN-NEXT: vfabs.v v24, v16
+; ZVFHMIN-NEXT: lui a0, 307200
+; ZVFHMIN-NEXT: fmv.w.x fa5, a0
+; ZVFHMIN-NEXT: vmflt.vf v0, v24, fa5
+; ZVFHMIN-NEXT: fsrmi a0, 4
+; ZVFHMIN-NEXT: vfcvt.x.f.v v24, v16, v0.t
+; ZVFHMIN-NEXT: fsrm a0
+; ZVFHMIN-NEXT: vfcvt.f.x.v v24, v24, v0.t
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, mu
+; ZVFHMIN-NEXT: vfsgnj.vv v16, v24, v16, v0.t
+; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
+; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; ZVFHMIN-NEXT: vfabs.v v24, v16
+; ZVFHMIN-NEXT: vmflt.vf v0, v24, fa5
+; ZVFHMIN-NEXT: fsrmi a0, 4
+; ZVFHMIN-NEXT: vfcvt.x.f.v v24, v16, v0.t
+; ZVFHMIN-NEXT: fsrm a0
+; ZVFHMIN-NEXT: vfcvt.f.x.v v24, v24, v0.t
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, mu
+; ZVFHMIN-NEXT: vfsgnj.vv v16, v24, v16, v0.t
+; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
+; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v16
+; ZVFHMIN-NEXT: ret
%a = call <vscale x 32 x half> @llvm.round.nxv32f16(<vscale x 32 x half> %x)
ret <vscale x 32 x half> %a
}
@@ -229,8 +513,8 @@ declare <vscale x 16 x float> @llvm.round.nxv16f32(<vscale x 16 x float>)
define <vscale x 1 x double> @round_nxv1f64(<vscale x 1 x double> %x) {
; CHECK-LABEL: round_nxv1f64:
; CHECK: # %bb.0:
-; CHECK-NEXT: lui a0, %hi(.LCPI11_0)
-; CHECK-NEXT: fld fa5, %lo(.LCPI11_0)(a0)
+; CHECK-NEXT: lui a0, %hi(.LCPI17_0)
+; CHECK-NEXT: fld fa5, %lo(.LCPI17_0)(a0)
; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma
; CHECK-NEXT: vfabs.v v9, v8
; CHECK-NEXT: vmflt.vf v0, v9, fa5
@@ -249,8 +533,8 @@ declare <vscale x 1 x double> @llvm.round.nxv1f64(<vscale x 1 x double>)
define <vscale x 2 x double> @round_nxv2f64(<vscale x 2 x double> %x) {
; CHECK-LABEL: round_nxv2f64:
; CHECK: # %bb.0:
-; CHECK-NEXT: lui a0, %hi(.LCPI12_0)
-; CHECK-NEXT: fld fa5, %lo(.LCPI12_0)(a0)
+; CHECK-NEXT: lui a0, %hi(.LCPI18_0)
+; CHECK-NEXT: fld fa5, %lo(.LCPI18_0)(a0)
; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma
; CHECK-NEXT: vfabs.v v10, v8
; CHECK-NEXT: vmflt.vf v0, v10, fa5
@@ -269,8 +553,8 @@ declare <vscale x 2 x double> @llvm.round.nxv2f64(<vscale x 2 x double>)
define <vscale x 4 x double> @round_nxv4f64(<vscale x 4 x double> %x) {
; CHECK-LABEL: round_nxv4f64:
; CHECK: # %bb.0:
-; CHECK-NEXT: lui a0, %hi(.LCPI13_0)
-; CHECK-NEXT: fld fa5, %lo(.LCPI13_0)(a0)
+; CHECK-NEXT: lui a0, %hi(.LCPI19_0)
+; CHECK-NEXT: fld fa5, %lo(.LCPI19_0)(a0)
; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma
; CHECK-NEXT: vfabs.v v12, v8
; CHECK-NEXT: vmflt.vf v0, v12, fa5
@@ -289,8 +573,8 @@ declare <vscale x 4 x double> @llvm.round.nxv4f64(<vscale x 4 x double>)
define <vscale x 8 x double> @round_nxv8f64(<vscale x 8 x double> %x) {
; CHECK-LABEL: round_nxv8f64:
; CHECK: # %bb.0:
-; CHECK-NEXT: lui a0, %hi(.LCPI14_0)
-; CHECK-NEXT: fld fa5, %lo(.LCPI14_0)(a0)
+; CHECK-NEXT: lui a0, %hi(.LCPI20_0)
+; CHECK-NEXT: fld fa5, %lo(.LCPI20_0)(a0)
; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma
; CHECK-NEXT: vfabs.v v16, v8
; CHECK-NEXT: vmflt.vf v0, v16, fa5
diff --git a/llvm/test/CodeGen/RISCV/rvv/froundeven-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/froundeven-sdnode.ll
index 6f5207a25518..052ee2d3a43c 100644
--- a/llvm/test/CodeGen/RISCV/rvv/froundeven-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/froundeven-sdnode.ll
@@ -1,126 +1,409 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v -target-abi=ilp32d \
-; RUN: -verify-machineinstrs < %s | FileCheck %s
-; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -target-abi=lp64d \
-; RUN: -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+zfbfmin,+zvfbfmin,+v \
+; RUN: -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s \
+; RUN: --check-prefixes=CHECK,ZVFH
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+zfbfmin,+zvfbfmin,+v \
+; RUN: -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \
+; RUN: --check-prefixes=CHECK,ZVFH
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin,+v \
+; RUN: -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s \
+; RUN: --check-prefixes=CHECK,ZVFHMIN
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin,+v \
+; RUN: -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \
+; RUN: --check-prefixes=CHECK,ZVFHMIN
; This file tests the code generation for `llvm.roundeven.*` on scalable vector type.
-
-define <vscale x 1 x half> @roundeven_nxv1f16(<vscale x 1 x half> %x) {
-; CHECK-LABEL: roundeven_nxv1f16:
+define <vscale x 1 x bfloat> @roundeven_nxv1bf16(<vscale x 1 x bfloat> %x) {
+; CHECK-LABEL: roundeven_nxv1bf16:
; CHECK: # %bb.0:
-; CHECK-NEXT: lui a0, %hi(.LCPI0_0)
-; CHECK-NEXT: flh fa5, %lo(.LCPI0_0)(a0)
; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
-; CHECK-NEXT: vfabs.v v9, v8
-; CHECK-NEXT: vmflt.vf v0, v9, fa5
+; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8
+; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
+; CHECK-NEXT: vfabs.v v8, v9
+; CHECK-NEXT: lui a0, 307200
+; CHECK-NEXT: fmv.w.x fa5, a0
+; CHECK-NEXT: vmflt.vf v0, v8, fa5
; CHECK-NEXT: fsrmi a0, 0
-; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t
+; CHECK-NEXT: vfcvt.x.f.v v8, v9, v0.t
; CHECK-NEXT: fsrm a0
-; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t
-; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, mu
-; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t
+; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu
+; CHECK-NEXT: vfsgnj.vv v9, v8, v9, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9
; CHECK-NEXT: ret
- %a = call <vscale x 1 x half> @llvm.roundeven.nxv1f16(<vscale x 1 x half> %x)
- ret <vscale x 1 x half> %a
+ %a = call <vscale x 1 x bfloat> @llvm.roundeven.nxv1bf16(<vscale x 1 x bfloat> %x)
+ ret <vscale x 1 x bfloat> %a
}
-declare <vscale x 1 x half> @llvm.roundeven.nxv1f16(<vscale x 1 x half>)
-define <vscale x 2 x half> @roundeven_nxv2f16(<vscale x 2 x half> %x) {
-; CHECK-LABEL: roundeven_nxv2f16:
+define <vscale x 2 x bfloat> @roundeven_nxv2bf16(<vscale x 2 x bfloat> %x) {
+; CHECK-LABEL: roundeven_nxv2bf16:
; CHECK: # %bb.0:
-; CHECK-NEXT: lui a0, %hi(.LCPI1_0)
-; CHECK-NEXT: flh fa5, %lo(.LCPI1_0)(a0)
; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
-; CHECK-NEXT: vfabs.v v9, v8
-; CHECK-NEXT: vmflt.vf v0, v9, fa5
+; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8
+; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; CHECK-NEXT: vfabs.v v8, v9
+; CHECK-NEXT: lui a0, 307200
+; CHECK-NEXT: fmv.w.x fa5, a0
+; CHECK-NEXT: vmflt.vf v0, v8, fa5
; CHECK-NEXT: fsrmi a0, 0
-; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t
+; CHECK-NEXT: vfcvt.x.f.v v8, v9, v0.t
; CHECK-NEXT: fsrm a0
-; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t
-; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, mu
-; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t
+; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu
+; CHECK-NEXT: vfsgnj.vv v9, v8, v9, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9
; CHECK-NEXT: ret
- %a = call <vscale x 2 x half> @llvm.roundeven.nxv2f16(<vscale x 2 x half> %x)
- ret <vscale x 2 x half> %a
+ %a = call <vscale x 2 x bfloat> @llvm.roundeven.nxv2bf16(<vscale x 2 x bfloat> %x)
+ ret <vscale x 2 x bfloat> %a
}
-declare <vscale x 2 x half> @llvm.roundeven.nxv2f16(<vscale x 2 x half>)
-define <vscale x 4 x half> @roundeven_nxv4f16(<vscale x 4 x half> %x) {
-; CHECK-LABEL: roundeven_nxv4f16:
+define <vscale x 4 x bfloat> @roundeven_nxv4bf16(<vscale x 4 x bfloat> %x) {
+; CHECK-LABEL: roundeven_nxv4bf16:
; CHECK: # %bb.0:
-; CHECK-NEXT: lui a0, %hi(.LCPI2_0)
-; CHECK-NEXT: flh fa5, %lo(.LCPI2_0)(a0)
; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
-; CHECK-NEXT: vfabs.v v9, v8
-; CHECK-NEXT: vmflt.vf v0, v9, fa5
+; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8
+; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; CHECK-NEXT: vfabs.v v8, v10
+; CHECK-NEXT: lui a0, 307200
+; CHECK-NEXT: fmv.w.x fa5, a0
+; CHECK-NEXT: vmflt.vf v0, v8, fa5
; CHECK-NEXT: fsrmi a0, 0
-; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t
+; CHECK-NEXT: vfcvt.x.f.v v8, v10, v0.t
; CHECK-NEXT: fsrm a0
-; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t
-; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, mu
-; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t
+; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu
+; CHECK-NEXT: vfsgnj.vv v10, v8, v10, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10
; CHECK-NEXT: ret
- %a = call <vscale x 4 x half> @llvm.roundeven.nxv4f16(<vscale x 4 x half> %x)
- ret <vscale x 4 x half> %a
+ %a = call <vscale x 4 x bfloat> @llvm.roundeven.nxv4bf16(<vscale x 4 x bfloat> %x)
+ ret <vscale x 4 x bfloat> %a
}
-declare <vscale x 4 x half> @llvm.roundeven.nxv4f16(<vscale x 4 x half>)
-define <vscale x 8 x half> @roundeven_nxv8f16(<vscale x 8 x half> %x) {
-; CHECK-LABEL: roundeven_nxv8f16:
+define <vscale x 8 x bfloat> @roundeven_nxv8bf16(<vscale x 8 x bfloat> %x) {
+; CHECK-LABEL: roundeven_nxv8bf16:
; CHECK: # %bb.0:
-; CHECK-NEXT: lui a0, %hi(.LCPI3_0)
-; CHECK-NEXT: flh fa5, %lo(.LCPI3_0)(a0)
; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma
-; CHECK-NEXT: vfabs.v v10, v8
-; CHECK-NEXT: vmflt.vf v0, v10, fa5
+; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8
+; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; CHECK-NEXT: vfabs.v v8, v12
+; CHECK-NEXT: lui a0, 307200
+; CHECK-NEXT: fmv.w.x fa5, a0
+; CHECK-NEXT: vmflt.vf v0, v8, fa5
; CHECK-NEXT: fsrmi a0, 0
-; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t
+; CHECK-NEXT: vfcvt.x.f.v v8, v12, v0.t
; CHECK-NEXT: fsrm a0
-; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t
-; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, mu
-; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t
+; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu
+; CHECK-NEXT: vfsgnj.vv v12, v8, v12, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12
; CHECK-NEXT: ret
- %a = call <vscale x 8 x half> @llvm.roundeven.nxv8f16(<vscale x 8 x half> %x)
- ret <vscale x 8 x half> %a
+ %a = call <vscale x 8 x bfloat> @llvm.roundeven.nxv8bf16(<vscale x 8 x bfloat> %x)
+ ret <vscale x 8 x bfloat> %a
}
-declare <vscale x 8 x half> @llvm.roundeven.nxv8f16(<vscale x 8 x half>)
-define <vscale x 16 x half> @roundeven_nxv16f16(<vscale x 16 x half> %x) {
-; CHECK-LABEL: roundeven_nxv16f16:
+define <vscale x 16 x bfloat> @roundeven_nxv16bf16(<vscale x 16 x bfloat> %x) {
+; CHECK-LABEL: roundeven_nxv16bf16:
; CHECK: # %bb.0:
-; CHECK-NEXT: lui a0, %hi(.LCPI4_0)
-; CHECK-NEXT: flh fa5, %lo(.LCPI4_0)(a0)
; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma
-; CHECK-NEXT: vfabs.v v12, v8
-; CHECK-NEXT: vmflt.vf v0, v12, fa5
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8
+; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; CHECK-NEXT: vfabs.v v8, v16
+; CHECK-NEXT: lui a0, 307200
+; CHECK-NEXT: fmv.w.x fa5, a0
+; CHECK-NEXT: vmflt.vf v0, v8, fa5
; CHECK-NEXT: fsrmi a0, 0
-; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t
+; CHECK-NEXT: vfcvt.x.f.v v8, v16, v0.t
; CHECK-NEXT: fsrm a0
-; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t
-; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, mu
-; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t
+; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu
+; CHECK-NEXT: vfsgnj.vv v16, v8, v16, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16
; CHECK-NEXT: ret
- %a = call <vscale x 16 x half> @llvm.roundeven.nxv16f16(<vscale x 16 x half> %x)
- ret <vscale x 16 x half> %a
+ %a = call <vscale x 16 x bfloat> @llvm.roundeven.nxv16bf16(<vscale x 16 x bfloat> %x)
+ ret <vscale x 16 x bfloat> %a
}
-declare <vscale x 16 x half> @llvm.roundeven.nxv16f16(<vscale x 16 x half>)
-define <vscale x 32 x half> @roundeven_nxv32f16(<vscale x 32 x half> %x) {
-; CHECK-LABEL: roundeven_nxv32f16:
+define <vscale x 32 x bfloat> @roundeven_nxv32bf16(<vscale x 32 x bfloat> %x) {
+; CHECK-LABEL: roundeven_nxv32bf16:
; CHECK: # %bb.0:
-; CHECK-NEXT: lui a0, %hi(.LCPI5_0)
-; CHECK-NEXT: flh fa5, %lo(.LCPI5_0)(a0)
-; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, ma
-; CHECK-NEXT: vfabs.v v16, v8
-; CHECK-NEXT: vmflt.vf v0, v16, fa5
+; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8
+; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; CHECK-NEXT: vfabs.v v24, v16
+; CHECK-NEXT: lui a0, 307200
+; CHECK-NEXT: fmv.w.x fa5, a0
+; CHECK-NEXT: vmflt.vf v0, v24, fa5
; CHECK-NEXT: fsrmi a0, 0
-; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t
+; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t
; CHECK-NEXT: fsrm a0
-; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t
-; CHECK-NEXT: vsetvli zero, zero, e16, m8, ta, mu
-; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t
+; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu
+; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v12
+; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; CHECK-NEXT: vfabs.v v24, v16
+; CHECK-NEXT: vmflt.vf v0, v24, fa5
+; CHECK-NEXT: fsrmi a0, 0
+; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t
+; CHECK-NEXT: fsrm a0
+; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu
+; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v12, v16
; CHECK-NEXT: ret
+ %a = call <vscale x 32 x bfloat> @llvm.roundeven.nxv32bf16(<vscale x 32 x bfloat> %x)
+ ret <vscale x 32 x bfloat> %a
+}
+
+define <vscale x 1 x half> @roundeven_nxv1f16(<vscale x 1 x half> %x) {
+; ZVFH-LABEL: roundeven_nxv1f16:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: lui a0, %hi(.LCPI6_0)
+; ZVFH-NEXT: flh fa5, %lo(.LCPI6_0)(a0)
+; ZVFH-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
+; ZVFH-NEXT: vfabs.v v9, v8
+; ZVFH-NEXT: vmflt.vf v0, v9, fa5
+; ZVFH-NEXT: fsrmi a0, 0
+; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t
+; ZVFH-NEXT: fsrm a0
+; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t
+; ZVFH-NEXT: vsetvli zero, zero, e16, mf4, ta, mu
+; ZVFH-NEXT: vfsgnj.vv v8, v9, v8, v0.t
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: roundeven_nxv1f16:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
+; ZVFHMIN-NEXT: vfabs.v v8, v9
+; ZVFHMIN-NEXT: lui a0, 307200
+; ZVFHMIN-NEXT: fmv.w.x fa5, a0
+; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5
+; ZVFHMIN-NEXT: fsrmi a0, 0
+; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v9, v0.t
+; ZVFHMIN-NEXT: fsrm a0
+; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, mu
+; ZVFHMIN-NEXT: vfsgnj.vv v9, v8, v9, v0.t
+; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
+; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9
+; ZVFHMIN-NEXT: ret
+ %a = call <vscale x 1 x half> @llvm.roundeven.nxv1f16(<vscale x 1 x half> %x)
+ ret <vscale x 1 x half> %a
+}
+declare <vscale x 1 x half> @llvm.roundeven.nxv1f16(<vscale x 1 x half>)
+
+define <vscale x 2 x half> @roundeven_nxv2f16(<vscale x 2 x half> %x) {
+; ZVFH-LABEL: roundeven_nxv2f16:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: lui a0, %hi(.LCPI7_0)
+; ZVFH-NEXT: flh fa5, %lo(.LCPI7_0)(a0)
+; ZVFH-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
+; ZVFH-NEXT: vfabs.v v9, v8
+; ZVFH-NEXT: vmflt.vf v0, v9, fa5
+; ZVFH-NEXT: fsrmi a0, 0
+; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t
+; ZVFH-NEXT: fsrm a0
+; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t
+; ZVFH-NEXT: vsetvli zero, zero, e16, mf2, ta, mu
+; ZVFH-NEXT: vfsgnj.vv v8, v9, v8, v0.t
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: roundeven_nxv2f16:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; ZVFHMIN-NEXT: vfabs.v v8, v9
+; ZVFHMIN-NEXT: lui a0, 307200
+; ZVFHMIN-NEXT: fmv.w.x fa5, a0
+; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5
+; ZVFHMIN-NEXT: fsrmi a0, 0
+; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v9, v0.t
+; ZVFHMIN-NEXT: fsrm a0
+; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, mu
+; ZVFHMIN-NEXT: vfsgnj.vv v9, v8, v9, v0.t
+; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
+; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9
+; ZVFHMIN-NEXT: ret
+ %a = call <vscale x 2 x half> @llvm.roundeven.nxv2f16(<vscale x 2 x half> %x)
+ ret <vscale x 2 x half> %a
+}
+declare <vscale x 2 x half> @llvm.roundeven.nxv2f16(<vscale x 2 x half>)
+
+define <vscale x 4 x half> @roundeven_nxv4f16(<vscale x 4 x half> %x) {
+; ZVFH-LABEL: roundeven_nxv4f16:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: lui a0, %hi(.LCPI8_0)
+; ZVFH-NEXT: flh fa5, %lo(.LCPI8_0)(a0)
+; ZVFH-NEXT: vsetvli a0, zero, e16, m1, ta, ma
+; ZVFH-NEXT: vfabs.v v9, v8
+; ZVFH-NEXT: vmflt.vf v0, v9, fa5
+; ZVFH-NEXT: fsrmi a0, 0
+; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t
+; ZVFH-NEXT: fsrm a0
+; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t
+; ZVFH-NEXT: vsetvli zero, zero, e16, m1, ta, mu
+; ZVFH-NEXT: vfsgnj.vv v8, v9, v8, v0.t
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: roundeven_nxv4f16:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m1, ta, ma
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; ZVFHMIN-NEXT: vfabs.v v8, v10
+; ZVFHMIN-NEXT: lui a0, 307200
+; ZVFHMIN-NEXT: fmv.w.x fa5, a0
+; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5
+; ZVFHMIN-NEXT: fsrmi a0, 0
+; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v10, v0.t
+; ZVFHMIN-NEXT: fsrm a0
+; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, mu
+; ZVFHMIN-NEXT: vfsgnj.vv v10, v8, v10, v0.t
+; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
+; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10
+; ZVFHMIN-NEXT: ret
+ %a = call <vscale x 4 x half> @llvm.roundeven.nxv4f16(<vscale x 4 x half> %x)
+ ret <vscale x 4 x half> %a
+}
+declare <vscale x 4 x half> @llvm.roundeven.nxv4f16(<vscale x 4 x half>)
+
+define <vscale x 8 x half> @roundeven_nxv8f16(<vscale x 8 x half> %x) {
+; ZVFH-LABEL: roundeven_nxv8f16:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: lui a0, %hi(.LCPI9_0)
+; ZVFH-NEXT: flh fa5, %lo(.LCPI9_0)(a0)
+; ZVFH-NEXT: vsetvli a0, zero, e16, m2, ta, ma
+; ZVFH-NEXT: vfabs.v v10, v8
+; ZVFH-NEXT: vmflt.vf v0, v10, fa5
+; ZVFH-NEXT: fsrmi a0, 0
+; ZVFH-NEXT: vfcvt.x.f.v v10, v8, v0.t
+; ZVFH-NEXT: fsrm a0
+; ZVFH-NEXT: vfcvt.f.x.v v10, v10, v0.t
+; ZVFH-NEXT: vsetvli zero, zero, e16, m2, ta, mu
+; ZVFH-NEXT: vfsgnj.vv v8, v10, v8, v0.t
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: roundeven_nxv8f16:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m2, ta, ma
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; ZVFHMIN-NEXT: vfabs.v v8, v12
+; ZVFHMIN-NEXT: lui a0, 307200
+; ZVFHMIN-NEXT: fmv.w.x fa5, a0
+; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5
+; ZVFHMIN-NEXT: fsrmi a0, 0
+; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v12, v0.t
+; ZVFHMIN-NEXT: fsrm a0
+; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, mu
+; ZVFHMIN-NEXT: vfsgnj.vv v12, v8, v12, v0.t
+; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
+; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12
+; ZVFHMIN-NEXT: ret
+ %a = call <vscale x 8 x half> @llvm.roundeven.nxv8f16(<vscale x 8 x half> %x)
+ ret <vscale x 8 x half> %a
+}
+declare <vscale x 8 x half> @llvm.roundeven.nxv8f16(<vscale x 8 x half>)
+
+define <vscale x 16 x half> @roundeven_nxv16f16(<vscale x 16 x half> %x) {
+; ZVFH-LABEL: roundeven_nxv16f16:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: lui a0, %hi(.LCPI10_0)
+; ZVFH-NEXT: flh fa5, %lo(.LCPI10_0)(a0)
+; ZVFH-NEXT: vsetvli a0, zero, e16, m4, ta, ma
+; ZVFH-NEXT: vfabs.v v12, v8
+; ZVFH-NEXT: vmflt.vf v0, v12, fa5
+; ZVFH-NEXT: fsrmi a0, 0
+; ZVFH-NEXT: vfcvt.x.f.v v12, v8, v0.t
+; ZVFH-NEXT: fsrm a0
+; ZVFH-NEXT: vfcvt.f.x.v v12, v12, v0.t
+; ZVFH-NEXT: vsetvli zero, zero, e16, m4, ta, mu
+; ZVFH-NEXT: vfsgnj.vv v8, v12, v8, v0.t
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: roundeven_nxv16f16:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; ZVFHMIN-NEXT: vfabs.v v8, v16
+; ZVFHMIN-NEXT: lui a0, 307200
+; ZVFHMIN-NEXT: fmv.w.x fa5, a0
+; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5
+; ZVFHMIN-NEXT: fsrmi a0, 0
+; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v16, v0.t
+; ZVFHMIN-NEXT: fsrm a0
+; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, mu
+; ZVFHMIN-NEXT: vfsgnj.vv v16, v8, v16, v0.t
+; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
+; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16
+; ZVFHMIN-NEXT: ret
+ %a = call <vscale x 16 x half> @llvm.roundeven.nxv16f16(<vscale x 16 x half> %x)
+ ret <vscale x 16 x half> %a
+}
+declare <vscale x 16 x half> @llvm.roundeven.nxv16f16(<vscale x 16 x half>)
+
+define <vscale x 32 x half> @roundeven_nxv32f16(<vscale x 32 x half> %x) {
+; ZVFH-LABEL: roundeven_nxv32f16:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: lui a0, %hi(.LCPI11_0)
+; ZVFH-NEXT: flh fa5, %lo(.LCPI11_0)(a0)
+; ZVFH-NEXT: vsetvli a0, zero, e16, m8, ta, ma
+; ZVFH-NEXT: vfabs.v v16, v8
+; ZVFH-NEXT: vmflt.vf v0, v16, fa5
+; ZVFH-NEXT: fsrmi a0, 0
+; ZVFH-NEXT: vfcvt.x.f.v v16, v8, v0.t
+; ZVFH-NEXT: fsrm a0
+; ZVFH-NEXT: vfcvt.f.x.v v16, v16, v0.t
+; ZVFH-NEXT: vsetvli zero, zero, e16, m8, ta, mu
+; ZVFH-NEXT: vfsgnj.vv v8, v16, v8, v0.t
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: roundeven_nxv32f16:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; ZVFHMIN-NEXT: vfabs.v v24, v16
+; ZVFHMIN-NEXT: lui a0, 307200
+; ZVFHMIN-NEXT: fmv.w.x fa5, a0
+; ZVFHMIN-NEXT: vmflt.vf v0, v24, fa5
+; ZVFHMIN-NEXT: fsrmi a0, 0
+; ZVFHMIN-NEXT: vfcvt.x.f.v v24, v16, v0.t
+; ZVFHMIN-NEXT: fsrm a0
+; ZVFHMIN-NEXT: vfcvt.f.x.v v24, v24, v0.t
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, mu
+; ZVFHMIN-NEXT: vfsgnj.vv v16, v24, v16, v0.t
+; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
+; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; ZVFHMIN-NEXT: vfabs.v v24, v16
+; ZVFHMIN-NEXT: vmflt.vf v0, v24, fa5
+; ZVFHMIN-NEXT: fsrmi a0, 0
+; ZVFHMIN-NEXT: vfcvt.x.f.v v24, v16, v0.t
+; ZVFHMIN-NEXT: fsrm a0
+; ZVFHMIN-NEXT: vfcvt.f.x.v v24, v24, v0.t
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, mu
+; ZVFHMIN-NEXT: vfsgnj.vv v16, v24, v16, v0.t
+; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
+; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v16
+; ZVFHMIN-NEXT: ret
%a = call <vscale x 32 x half> @llvm.roundeven.nxv32f16(<vscale x 32 x half> %x)
ret <vscale x 32 x half> %a
}
@@ -229,8 +512,8 @@ declare <vscale x 16 x float> @llvm.roundeven.nxv16f32(<vscale x 16 x float>)
define <vscale x 1 x double> @roundeven_nxv1f64(<vscale x 1 x double> %x) {
; CHECK-LABEL: roundeven_nxv1f64:
; CHECK: # %bb.0:
-; CHECK-NEXT: lui a0, %hi(.LCPI11_0)
-; CHECK-NEXT: fld fa5, %lo(.LCPI11_0)(a0)
+; CHECK-NEXT: lui a0, %hi(.LCPI17_0)
+; CHECK-NEXT: fld fa5, %lo(.LCPI17_0)(a0)
; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma
; CHECK-NEXT: vfabs.v v9, v8
; CHECK-NEXT: vmflt.vf v0, v9, fa5
@@ -249,8 +532,8 @@ declare <vscale x 1 x double> @llvm.roundeven.nxv1f64(<vscale x 1 x double>)
define <vscale x 2 x double> @roundeven_nxv2f64(<vscale x 2 x double> %x) {
; CHECK-LABEL: roundeven_nxv2f64:
; CHECK: # %bb.0:
-; CHECK-NEXT: lui a0, %hi(.LCPI12_0)
-; CHECK-NEXT: fld fa5, %lo(.LCPI12_0)(a0)
+; CHECK-NEXT: lui a0, %hi(.LCPI18_0)
+; CHECK-NEXT: fld fa5, %lo(.LCPI18_0)(a0)
; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma
; CHECK-NEXT: vfabs.v v10, v8
; CHECK-NEXT: vmflt.vf v0, v10, fa5
@@ -269,8 +552,8 @@ declare <vscale x 2 x double> @llvm.roundeven.nxv2f64(<vscale x 2 x double>)
define <vscale x 4 x double> @roundeven_nxv4f64(<vscale x 4 x double> %x) {
; CHECK-LABEL: roundeven_nxv4f64:
; CHECK: # %bb.0:
-; CHECK-NEXT: lui a0, %hi(.LCPI13_0)
-; CHECK-NEXT: fld fa5, %lo(.LCPI13_0)(a0)
+; CHECK-NEXT: lui a0, %hi(.LCPI19_0)
+; CHECK-NEXT: fld fa5, %lo(.LCPI19_0)(a0)
; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma
; CHECK-NEXT: vfabs.v v12, v8
; CHECK-NEXT: vmflt.vf v0, v12, fa5
@@ -289,8 +572,8 @@ declare <vscale x 4 x double> @llvm.roundeven.nxv4f64(<vscale x 4 x double>)
define <vscale x 8 x double> @roundeven_nxv8f64(<vscale x 8 x double> %x) {
; CHECK-LABEL: roundeven_nxv8f64:
; CHECK: # %bb.0:
-; CHECK-NEXT: lui a0, %hi(.LCPI14_0)
-; CHECK-NEXT: fld fa5, %lo(.LCPI14_0)(a0)
+; CHECK-NEXT: lui a0, %hi(.LCPI20_0)
+; CHECK-NEXT: fld fa5, %lo(.LCPI20_0)(a0)
; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma
; CHECK-NEXT: vfabs.v v16, v8
; CHECK-NEXT: vmflt.vf v0, v16, fa5
diff --git a/llvm/test/CodeGen/RISCV/rvv/ftrunc-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/ftrunc-sdnode.ll
index 8841232e7f76..b29b24a9ce7b 100644
--- a/llvm/test/CodeGen/RISCV/rvv/ftrunc-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/ftrunc-sdnode.ll
@@ -1,112 +1,372 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v -target-abi=ilp32d \
-; RUN: -verify-machineinstrs < %s | FileCheck %s
-; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -target-abi=lp64d \
-; RUN: -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+zfbfmin,+zvfbfmin,+v \
+; RUN: -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s \
+; RUN: --check-prefixes=CHECK,ZVFH
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+zfbfmin,+zvfbfmin,+v \
+; RUN: -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \
+; RUN: --check-prefixes=CHECK,ZVFH
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin,+v \
+; RUN: -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s \
+; RUN: --check-prefixes=CHECK,ZVFHMIN
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin,+v \
+; RUN: -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \
+; RUN: --check-prefixes=CHECK,ZVFHMIN
-define <vscale x 1 x half> @trunc_nxv1f16(<vscale x 1 x half> %x) {
-; CHECK-LABEL: trunc_nxv1f16:
+define <vscale x 1 x bfloat> @trunc_nxv1bf16(<vscale x 1 x bfloat> %x) {
+; CHECK-LABEL: trunc_nxv1bf16:
; CHECK: # %bb.0:
-; CHECK-NEXT: lui a0, %hi(.LCPI0_0)
-; CHECK-NEXT: flh fa5, %lo(.LCPI0_0)(a0)
; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
-; CHECK-NEXT: vfabs.v v9, v8
-; CHECK-NEXT: vmflt.vf v0, v9, fa5
-; CHECK-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t
-; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t
-; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, mu
-; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t
+; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8
+; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
+; CHECK-NEXT: vfabs.v v8, v9
+; CHECK-NEXT: lui a0, 307200
+; CHECK-NEXT: fmv.w.x fa5, a0
+; CHECK-NEXT: vmflt.vf v0, v8, fa5
+; CHECK-NEXT: vfcvt.rtz.x.f.v v8, v9, v0.t
+; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu
+; CHECK-NEXT: vfsgnj.vv v9, v8, v9, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9
+; CHECK-NEXT: ret
+ %a = call <vscale x 1 x bfloat> @llvm.trunc.nxv1bf16(<vscale x 1 x bfloat> %x)
+ ret <vscale x 1 x bfloat> %a
+}
+
+define <vscale x 2 x bfloat> @trunc_nxv2bf16(<vscale x 2 x bfloat> %x) {
+; CHECK-LABEL: trunc_nxv2bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8
+; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; CHECK-NEXT: vfabs.v v8, v9
+; CHECK-NEXT: lui a0, 307200
+; CHECK-NEXT: fmv.w.x fa5, a0
+; CHECK-NEXT: vmflt.vf v0, v8, fa5
+; CHECK-NEXT: vfcvt.rtz.x.f.v v8, v9, v0.t
+; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu
+; CHECK-NEXT: vfsgnj.vv v9, v8, v9, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9
; CHECK-NEXT: ret
+ %a = call <vscale x 2 x bfloat> @llvm.trunc.nxv2bf16(<vscale x 2 x bfloat> %x)
+ ret <vscale x 2 x bfloat> %a
+}
+
+define <vscale x 4 x bfloat> @trunc_nxv4bf16(<vscale x 4 x bfloat> %x) {
+; CHECK-LABEL: trunc_nxv4bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8
+; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; CHECK-NEXT: vfabs.v v8, v10
+; CHECK-NEXT: lui a0, 307200
+; CHECK-NEXT: fmv.w.x fa5, a0
+; CHECK-NEXT: vmflt.vf v0, v8, fa5
+; CHECK-NEXT: vfcvt.rtz.x.f.v v8, v10, v0.t
+; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu
+; CHECK-NEXT: vfsgnj.vv v10, v8, v10, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10
+; CHECK-NEXT: ret
+ %a = call <vscale x 4 x bfloat> @llvm.trunc.nxv4bf16(<vscale x 4 x bfloat> %x)
+ ret <vscale x 4 x bfloat> %a
+}
+
+define <vscale x 8 x bfloat> @trunc_nxv8bf16(<vscale x 8 x bfloat> %x) {
+; CHECK-LABEL: trunc_nxv8bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8
+; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; CHECK-NEXT: vfabs.v v8, v12
+; CHECK-NEXT: lui a0, 307200
+; CHECK-NEXT: fmv.w.x fa5, a0
+; CHECK-NEXT: vmflt.vf v0, v8, fa5
+; CHECK-NEXT: vfcvt.rtz.x.f.v v8, v12, v0.t
+; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu
+; CHECK-NEXT: vfsgnj.vv v12, v8, v12, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12
+; CHECK-NEXT: ret
+ %a = call <vscale x 8 x bfloat> @llvm.trunc.nxv8bf16(<vscale x 8 x bfloat> %x)
+ ret <vscale x 8 x bfloat> %a
+}
+
+define <vscale x 16 x bfloat> @trunc_nxv16bf16(<vscale x 16 x bfloat> %x) {
+; CHECK-LABEL: trunc_nxv16bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8
+; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; CHECK-NEXT: vfabs.v v8, v16
+; CHECK-NEXT: lui a0, 307200
+; CHECK-NEXT: fmv.w.x fa5, a0
+; CHECK-NEXT: vmflt.vf v0, v8, fa5
+; CHECK-NEXT: vfcvt.rtz.x.f.v v8, v16, v0.t
+; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu
+; CHECK-NEXT: vfsgnj.vv v16, v8, v16, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16
+; CHECK-NEXT: ret
+ %a = call <vscale x 16 x bfloat> @llvm.trunc.nxv16bf16(<vscale x 16 x bfloat> %x)
+ ret <vscale x 16 x bfloat> %a
+}
+
+define <vscale x 32 x bfloat> @trunc_nxv32bf16(<vscale x 32 x bfloat> %x) {
+; CHECK-LABEL: trunc_nxv32bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8
+; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; CHECK-NEXT: vfabs.v v24, v16
+; CHECK-NEXT: lui a0, 307200
+; CHECK-NEXT: fmv.w.x fa5, a0
+; CHECK-NEXT: vmflt.vf v0, v24, fa5
+; CHECK-NEXT: vfcvt.rtz.x.f.v v24, v16, v0.t
+; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu
+; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v12
+; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; CHECK-NEXT: vfabs.v v8, v24
+; CHECK-NEXT: vmflt.vf v0, v8, fa5
+; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16
+; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; CHECK-NEXT: vfcvt.rtz.x.f.v v16, v24, v0.t
+; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu
+; CHECK-NEXT: vfsgnj.vv v24, v16, v24, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v12, v24
+; CHECK-NEXT: ret
+ %a = call <vscale x 32 x bfloat> @llvm.trunc.nxv32bf16(<vscale x 32 x bfloat> %x)
+ ret <vscale x 32 x bfloat> %a
+}
+
+define <vscale x 1 x half> @trunc_nxv1f16(<vscale x 1 x half> %x) {
+; ZVFH-LABEL: trunc_nxv1f16:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: lui a0, %hi(.LCPI6_0)
+; ZVFH-NEXT: flh fa5, %lo(.LCPI6_0)(a0)
+; ZVFH-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
+; ZVFH-NEXT: vfabs.v v9, v8
+; ZVFH-NEXT: vmflt.vf v0, v9, fa5
+; ZVFH-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t
+; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t
+; ZVFH-NEXT: vsetvli zero, zero, e16, mf4, ta, mu
+; ZVFH-NEXT: vfsgnj.vv v8, v9, v8, v0.t
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: trunc_nxv1f16:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
+; ZVFHMIN-NEXT: vfabs.v v8, v9
+; ZVFHMIN-NEXT: lui a0, 307200
+; ZVFHMIN-NEXT: fmv.w.x fa5, a0
+; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5
+; ZVFHMIN-NEXT: vfcvt.rtz.x.f.v v8, v9, v0.t
+; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, mu
+; ZVFHMIN-NEXT: vfsgnj.vv v9, v8, v9, v0.t
+; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
+; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9
+; ZVFHMIN-NEXT: ret
%a = call <vscale x 1 x half> @llvm.trunc.nxv1f16(<vscale x 1 x half> %x)
ret <vscale x 1 x half> %a
}
declare <vscale x 1 x half> @llvm.trunc.nxv1f16(<vscale x 1 x half>)
define <vscale x 2 x half> @trunc_nxv2f16(<vscale x 2 x half> %x) {
-; CHECK-LABEL: trunc_nxv2f16:
-; CHECK: # %bb.0:
-; CHECK-NEXT: lui a0, %hi(.LCPI1_0)
-; CHECK-NEXT: flh fa5, %lo(.LCPI1_0)(a0)
-; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
-; CHECK-NEXT: vfabs.v v9, v8
-; CHECK-NEXT: vmflt.vf v0, v9, fa5
-; CHECK-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t
-; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t
-; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, mu
-; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t
-; CHECK-NEXT: ret
+; ZVFH-LABEL: trunc_nxv2f16:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: lui a0, %hi(.LCPI7_0)
+; ZVFH-NEXT: flh fa5, %lo(.LCPI7_0)(a0)
+; ZVFH-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
+; ZVFH-NEXT: vfabs.v v9, v8
+; ZVFH-NEXT: vmflt.vf v0, v9, fa5
+; ZVFH-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t
+; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t
+; ZVFH-NEXT: vsetvli zero, zero, e16, mf2, ta, mu
+; ZVFH-NEXT: vfsgnj.vv v8, v9, v8, v0.t
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: trunc_nxv2f16:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; ZVFHMIN-NEXT: vfabs.v v8, v9
+; ZVFHMIN-NEXT: lui a0, 307200
+; ZVFHMIN-NEXT: fmv.w.x fa5, a0
+; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5
+; ZVFHMIN-NEXT: vfcvt.rtz.x.f.v v8, v9, v0.t
+; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, mu
+; ZVFHMIN-NEXT: vfsgnj.vv v9, v8, v9, v0.t
+; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
+; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9
+; ZVFHMIN-NEXT: ret
%a = call <vscale x 2 x half> @llvm.trunc.nxv2f16(<vscale x 2 x half> %x)
ret <vscale x 2 x half> %a
}
declare <vscale x 2 x half> @llvm.trunc.nxv2f16(<vscale x 2 x half>)
define <vscale x 4 x half> @trunc_nxv4f16(<vscale x 4 x half> %x) {
-; CHECK-LABEL: trunc_nxv4f16:
-; CHECK: # %bb.0:
-; CHECK-NEXT: lui a0, %hi(.LCPI2_0)
-; CHECK-NEXT: flh fa5, %lo(.LCPI2_0)(a0)
-; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
-; CHECK-NEXT: vfabs.v v9, v8
-; CHECK-NEXT: vmflt.vf v0, v9, fa5
-; CHECK-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t
-; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t
-; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, mu
-; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t
-; CHECK-NEXT: ret
+; ZVFH-LABEL: trunc_nxv4f16:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: lui a0, %hi(.LCPI8_0)
+; ZVFH-NEXT: flh fa5, %lo(.LCPI8_0)(a0)
+; ZVFH-NEXT: vsetvli a0, zero, e16, m1, ta, ma
+; ZVFH-NEXT: vfabs.v v9, v8
+; ZVFH-NEXT: vmflt.vf v0, v9, fa5
+; ZVFH-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t
+; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t
+; ZVFH-NEXT: vsetvli zero, zero, e16, m1, ta, mu
+; ZVFH-NEXT: vfsgnj.vv v8, v9, v8, v0.t
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: trunc_nxv4f16:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m1, ta, ma
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; ZVFHMIN-NEXT: vfabs.v v8, v10
+; ZVFHMIN-NEXT: lui a0, 307200
+; ZVFHMIN-NEXT: fmv.w.x fa5, a0
+; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5
+; ZVFHMIN-NEXT: vfcvt.rtz.x.f.v v8, v10, v0.t
+; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, mu
+; ZVFHMIN-NEXT: vfsgnj.vv v10, v8, v10, v0.t
+; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
+; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10
+; ZVFHMIN-NEXT: ret
%a = call <vscale x 4 x half> @llvm.trunc.nxv4f16(<vscale x 4 x half> %x)
ret <vscale x 4 x half> %a
}
declare <vscale x 4 x half> @llvm.trunc.nxv4f16(<vscale x 4 x half>)
define <vscale x 8 x half> @trunc_nxv8f16(<vscale x 8 x half> %x) {
-; CHECK-LABEL: trunc_nxv8f16:
-; CHECK: # %bb.0:
-; CHECK-NEXT: lui a0, %hi(.LCPI3_0)
-; CHECK-NEXT: flh fa5, %lo(.LCPI3_0)(a0)
-; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma
-; CHECK-NEXT: vfabs.v v10, v8
-; CHECK-NEXT: vmflt.vf v0, v10, fa5
-; CHECK-NEXT: vfcvt.rtz.x.f.v v10, v8, v0.t
-; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t
-; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, mu
-; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t
-; CHECK-NEXT: ret
+; ZVFH-LABEL: trunc_nxv8f16:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: lui a0, %hi(.LCPI9_0)
+; ZVFH-NEXT: flh fa5, %lo(.LCPI9_0)(a0)
+; ZVFH-NEXT: vsetvli a0, zero, e16, m2, ta, ma
+; ZVFH-NEXT: vfabs.v v10, v8
+; ZVFH-NEXT: vmflt.vf v0, v10, fa5
+; ZVFH-NEXT: vfcvt.rtz.x.f.v v10, v8, v0.t
+; ZVFH-NEXT: vfcvt.f.x.v v10, v10, v0.t
+; ZVFH-NEXT: vsetvli zero, zero, e16, m2, ta, mu
+; ZVFH-NEXT: vfsgnj.vv v8, v10, v8, v0.t
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: trunc_nxv8f16:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m2, ta, ma
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; ZVFHMIN-NEXT: vfabs.v v8, v12
+; ZVFHMIN-NEXT: lui a0, 307200
+; ZVFHMIN-NEXT: fmv.w.x fa5, a0
+; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5
+; ZVFHMIN-NEXT: vfcvt.rtz.x.f.v v8, v12, v0.t
+; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, mu
+; ZVFHMIN-NEXT: vfsgnj.vv v12, v8, v12, v0.t
+; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
+; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12
+; ZVFHMIN-NEXT: ret
%a = call <vscale x 8 x half> @llvm.trunc.nxv8f16(<vscale x 8 x half> %x)
ret <vscale x 8 x half> %a
}
declare <vscale x 8 x half> @llvm.trunc.nxv8f16(<vscale x 8 x half>)
define <vscale x 16 x half> @trunc_nxv16f16(<vscale x 16 x half> %x) {
-; CHECK-LABEL: trunc_nxv16f16:
-; CHECK: # %bb.0:
-; CHECK-NEXT: lui a0, %hi(.LCPI4_0)
-; CHECK-NEXT: flh fa5, %lo(.LCPI4_0)(a0)
-; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma
-; CHECK-NEXT: vfabs.v v12, v8
-; CHECK-NEXT: vmflt.vf v0, v12, fa5
-; CHECK-NEXT: vfcvt.rtz.x.f.v v12, v8, v0.t
-; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t
-; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, mu
-; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t
-; CHECK-NEXT: ret
+; ZVFH-LABEL: trunc_nxv16f16:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: lui a0, %hi(.LCPI10_0)
+; ZVFH-NEXT: flh fa5, %lo(.LCPI10_0)(a0)
+; ZVFH-NEXT: vsetvli a0, zero, e16, m4, ta, ma
+; ZVFH-NEXT: vfabs.v v12, v8
+; ZVFH-NEXT: vmflt.vf v0, v12, fa5
+; ZVFH-NEXT: vfcvt.rtz.x.f.v v12, v8, v0.t
+; ZVFH-NEXT: vfcvt.f.x.v v12, v12, v0.t
+; ZVFH-NEXT: vsetvli zero, zero, e16, m4, ta, mu
+; ZVFH-NEXT: vfsgnj.vv v8, v12, v8, v0.t
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: trunc_nxv16f16:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; ZVFHMIN-NEXT: vfabs.v v8, v16
+; ZVFHMIN-NEXT: lui a0, 307200
+; ZVFHMIN-NEXT: fmv.w.x fa5, a0
+; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5
+; ZVFHMIN-NEXT: vfcvt.rtz.x.f.v v8, v16, v0.t
+; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, mu
+; ZVFHMIN-NEXT: vfsgnj.vv v16, v8, v16, v0.t
+; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
+; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16
+; ZVFHMIN-NEXT: ret
%a = call <vscale x 16 x half> @llvm.trunc.nxv16f16(<vscale x 16 x half> %x)
ret <vscale x 16 x half> %a
}
declare <vscale x 16 x half> @llvm.trunc.nxv16f16(<vscale x 16 x half>)
define <vscale x 32 x half> @trunc_nxv32f16(<vscale x 32 x half> %x) {
-; CHECK-LABEL: trunc_nxv32f16:
-; CHECK: # %bb.0:
-; CHECK-NEXT: lui a0, %hi(.LCPI5_0)
-; CHECK-NEXT: flh fa5, %lo(.LCPI5_0)(a0)
-; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, ma
-; CHECK-NEXT: vfabs.v v16, v8
-; CHECK-NEXT: vmflt.vf v0, v16, fa5
-; CHECK-NEXT: vfcvt.rtz.x.f.v v16, v8, v0.t
-; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t
-; CHECK-NEXT: vsetvli zero, zero, e16, m8, ta, mu
-; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t
-; CHECK-NEXT: ret
+; ZVFH-LABEL: trunc_nxv32f16:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: lui a0, %hi(.LCPI11_0)
+; ZVFH-NEXT: flh fa5, %lo(.LCPI11_0)(a0)
+; ZVFH-NEXT: vsetvli a0, zero, e16, m8, ta, ma
+; ZVFH-NEXT: vfabs.v v16, v8
+; ZVFH-NEXT: vmflt.vf v0, v16, fa5
+; ZVFH-NEXT: vfcvt.rtz.x.f.v v16, v8, v0.t
+; ZVFH-NEXT: vfcvt.f.x.v v16, v16, v0.t
+; ZVFH-NEXT: vsetvli zero, zero, e16, m8, ta, mu
+; ZVFH-NEXT: vfsgnj.vv v8, v16, v8, v0.t
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: trunc_nxv32f16:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; ZVFHMIN-NEXT: vfabs.v v24, v16
+; ZVFHMIN-NEXT: lui a0, 307200
+; ZVFHMIN-NEXT: fmv.w.x fa5, a0
+; ZVFHMIN-NEXT: vmflt.vf v0, v24, fa5
+; ZVFHMIN-NEXT: vfcvt.rtz.x.f.v v24, v16, v0.t
+; ZVFHMIN-NEXT: vfcvt.f.x.v v24, v24, v0.t
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, mu
+; ZVFHMIN-NEXT: vfsgnj.vv v16, v24, v16, v0.t
+; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; ZVFHMIN-NEXT: vfabs.v v8, v24
+; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5
+; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
+; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; ZVFHMIN-NEXT: vfcvt.rtz.x.f.v v16, v24, v0.t
+; ZVFHMIN-NEXT: vfcvt.f.x.v v16, v16, v0.t
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, mu
+; ZVFHMIN-NEXT: vfsgnj.vv v24, v16, v24, v0.t
+; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
+; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v24
+; ZVFHMIN-NEXT: ret
%a = call <vscale x 32 x half> @llvm.trunc.nxv32f16(<vscale x 32 x half> %x)
ret <vscale x 32 x half> %a
}
@@ -205,8 +465,8 @@ declare <vscale x 16 x float> @llvm.trunc.nxv16f32(<vscale x 16 x float>)
define <vscale x 1 x double> @trunc_nxv1f64(<vscale x 1 x double> %x) {
; CHECK-LABEL: trunc_nxv1f64:
; CHECK: # %bb.0:
-; CHECK-NEXT: lui a0, %hi(.LCPI11_0)
-; CHECK-NEXT: fld fa5, %lo(.LCPI11_0)(a0)
+; CHECK-NEXT: lui a0, %hi(.LCPI17_0)
+; CHECK-NEXT: fld fa5, %lo(.LCPI17_0)(a0)
; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma
; CHECK-NEXT: vfabs.v v9, v8
; CHECK-NEXT: vmflt.vf v0, v9, fa5
@@ -223,8 +483,8 @@ declare <vscale x 1 x double> @llvm.trunc.nxv1f64(<vscale x 1 x double>)
define <vscale x 2 x double> @trunc_nxv2f64(<vscale x 2 x double> %x) {
; CHECK-LABEL: trunc_nxv2f64:
; CHECK: # %bb.0:
-; CHECK-NEXT: lui a0, %hi(.LCPI12_0)
-; CHECK-NEXT: fld fa5, %lo(.LCPI12_0)(a0)
+; CHECK-NEXT: lui a0, %hi(.LCPI18_0)
+; CHECK-NEXT: fld fa5, %lo(.LCPI18_0)(a0)
; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma
; CHECK-NEXT: vfabs.v v10, v8
; CHECK-NEXT: vmflt.vf v0, v10, fa5
@@ -241,8 +501,8 @@ declare <vscale x 2 x double> @llvm.trunc.nxv2f64(<vscale x 2 x double>)
define <vscale x 4 x double> @trunc_nxv4f64(<vscale x 4 x double> %x) {
; CHECK-LABEL: trunc_nxv4f64:
; CHECK: # %bb.0:
-; CHECK-NEXT: lui a0, %hi(.LCPI13_0)
-; CHECK-NEXT: fld fa5, %lo(.LCPI13_0)(a0)
+; CHECK-NEXT: lui a0, %hi(.LCPI19_0)
+; CHECK-NEXT: fld fa5, %lo(.LCPI19_0)(a0)
; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma
; CHECK-NEXT: vfabs.v v12, v8
; CHECK-NEXT: vmflt.vf v0, v12, fa5
@@ -259,8 +519,8 @@ declare <vscale x 4 x double> @llvm.trunc.nxv4f64(<vscale x 4 x double>)
define <vscale x 8 x double> @trunc_nxv8f64(<vscale x 8 x double> %x) {
; CHECK-LABEL: trunc_nxv8f64:
; CHECK: # %bb.0:
-; CHECK-NEXT: lui a0, %hi(.LCPI14_0)
-; CHECK-NEXT: fld fa5, %lo(.LCPI14_0)(a0)
+; CHECK-NEXT: lui a0, %hi(.LCPI20_0)
+; CHECK-NEXT: fld fa5, %lo(.LCPI20_0)(a0)
; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma
; CHECK-NEXT: vfabs.v v16, v8
; CHECK-NEXT: vmflt.vf v0, v16, fa5
diff --git a/llvm/test/CodeGen/RISCV/rvv/named-vector-shuffle-reverse.ll b/llvm/test/CodeGen/RISCV/rvv/named-vector-shuffle-reverse.ll
index 2a915529e61d..9d0cb22eb5f4 100644
--- a/llvm/test/CodeGen/RISCV/rvv/named-vector-shuffle-reverse.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/named-vector-shuffle-reverse.ll
@@ -24,8 +24,7 @@ define <vscale x 2 x i1> @reverse_nxv2i1(<vscale x 2 x i1> %a) {
; RV32-BITS-UNKNOWN-NEXT: vrsub.vx v9, v9, a0
; RV32-BITS-UNKNOWN-NEXT: vsetvli zero, zero, e8, mf4, ta, ma
; RV32-BITS-UNKNOWN-NEXT: vrgatherei16.vv v10, v8, v9
-; RV32-BITS-UNKNOWN-NEXT: vand.vi v8, v10, 1
-; RV32-BITS-UNKNOWN-NEXT: vmsne.vi v0, v8, 0
+; RV32-BITS-UNKNOWN-NEXT: vmsne.vi v0, v10, 0
; RV32-BITS-UNKNOWN-NEXT: ret
;
; RV32-BITS-256-LABEL: reverse_nxv2i1:
@@ -39,8 +38,7 @@ define <vscale x 2 x i1> @reverse_nxv2i1(<vscale x 2 x i1> %a) {
; RV32-BITS-256-NEXT: vid.v v9
; RV32-BITS-256-NEXT: vrsub.vx v9, v9, a0
; RV32-BITS-256-NEXT: vrgather.vv v10, v8, v9
-; RV32-BITS-256-NEXT: vand.vi v8, v10, 1
-; RV32-BITS-256-NEXT: vmsne.vi v0, v8, 0
+; RV32-BITS-256-NEXT: vmsne.vi v0, v10, 0
; RV32-BITS-256-NEXT: ret
;
; RV32-BITS-512-LABEL: reverse_nxv2i1:
@@ -54,8 +52,7 @@ define <vscale x 2 x i1> @reverse_nxv2i1(<vscale x 2 x i1> %a) {
; RV32-BITS-512-NEXT: vid.v v9
; RV32-BITS-512-NEXT: vrsub.vx v9, v9, a0
; RV32-BITS-512-NEXT: vrgather.vv v10, v8, v9
-; RV32-BITS-512-NEXT: vand.vi v8, v10, 1
-; RV32-BITS-512-NEXT: vmsne.vi v0, v8, 0
+; RV32-BITS-512-NEXT: vmsne.vi v0, v10, 0
; RV32-BITS-512-NEXT: ret
;
; RV64-BITS-UNKNOWN-LABEL: reverse_nxv2i1:
@@ -71,8 +68,7 @@ define <vscale x 2 x i1> @reverse_nxv2i1(<vscale x 2 x i1> %a) {
; RV64-BITS-UNKNOWN-NEXT: vrsub.vx v9, v9, a0
; RV64-BITS-UNKNOWN-NEXT: vsetvli zero, zero, e8, mf4, ta, ma
; RV64-BITS-UNKNOWN-NEXT: vrgatherei16.vv v10, v8, v9
-; RV64-BITS-UNKNOWN-NEXT: vand.vi v8, v10, 1
-; RV64-BITS-UNKNOWN-NEXT: vmsne.vi v0, v8, 0
+; RV64-BITS-UNKNOWN-NEXT: vmsne.vi v0, v10, 0
; RV64-BITS-UNKNOWN-NEXT: ret
;
; RV64-BITS-256-LABEL: reverse_nxv2i1:
@@ -86,8 +82,7 @@ define <vscale x 2 x i1> @reverse_nxv2i1(<vscale x 2 x i1> %a) {
; RV64-BITS-256-NEXT: vid.v v9
; RV64-BITS-256-NEXT: vrsub.vx v9, v9, a0
; RV64-BITS-256-NEXT: vrgather.vv v10, v8, v9
-; RV64-BITS-256-NEXT: vand.vi v8, v10, 1
-; RV64-BITS-256-NEXT: vmsne.vi v0, v8, 0
+; RV64-BITS-256-NEXT: vmsne.vi v0, v10, 0
; RV64-BITS-256-NEXT: ret
;
; RV64-BITS-512-LABEL: reverse_nxv2i1:
@@ -101,8 +96,7 @@ define <vscale x 2 x i1> @reverse_nxv2i1(<vscale x 2 x i1> %a) {
; RV64-BITS-512-NEXT: vid.v v9
; RV64-BITS-512-NEXT: vrsub.vx v9, v9, a0
; RV64-BITS-512-NEXT: vrgather.vv v10, v8, v9
-; RV64-BITS-512-NEXT: vand.vi v8, v10, 1
-; RV64-BITS-512-NEXT: vmsne.vi v0, v8, 0
+; RV64-BITS-512-NEXT: vmsne.vi v0, v10, 0
; RV64-BITS-512-NEXT: ret
%res = call <vscale x 2 x i1> @llvm.vector.reverse.nxv2i1(<vscale x 2 x i1> %a)
ret <vscale x 2 x i1> %res
@@ -122,8 +116,7 @@ define <vscale x 4 x i1> @reverse_nxv4i1(<vscale x 4 x i1> %a) {
; RV32-BITS-UNKNOWN-NEXT: vrsub.vx v9, v9, a0
; RV32-BITS-UNKNOWN-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
; RV32-BITS-UNKNOWN-NEXT: vrgatherei16.vv v10, v8, v9
-; RV32-BITS-UNKNOWN-NEXT: vand.vi v8, v10, 1
-; RV32-BITS-UNKNOWN-NEXT: vmsne.vi v0, v8, 0
+; RV32-BITS-UNKNOWN-NEXT: vmsne.vi v0, v10, 0
; RV32-BITS-UNKNOWN-NEXT: ret
;
; RV32-BITS-256-LABEL: reverse_nxv4i1:
@@ -137,8 +130,7 @@ define <vscale x 4 x i1> @reverse_nxv4i1(<vscale x 4 x i1> %a) {
; RV32-BITS-256-NEXT: vid.v v9
; RV32-BITS-256-NEXT: vrsub.vx v9, v9, a0
; RV32-BITS-256-NEXT: vrgather.vv v10, v8, v9
-; RV32-BITS-256-NEXT: vand.vi v8, v10, 1
-; RV32-BITS-256-NEXT: vmsne.vi v0, v8, 0
+; RV32-BITS-256-NEXT: vmsne.vi v0, v10, 0
; RV32-BITS-256-NEXT: ret
;
; RV32-BITS-512-LABEL: reverse_nxv4i1:
@@ -152,8 +144,7 @@ define <vscale x 4 x i1> @reverse_nxv4i1(<vscale x 4 x i1> %a) {
; RV32-BITS-512-NEXT: vid.v v9
; RV32-BITS-512-NEXT: vrsub.vx v9, v9, a0
; RV32-BITS-512-NEXT: vrgather.vv v10, v8, v9
-; RV32-BITS-512-NEXT: vand.vi v8, v10, 1
-; RV32-BITS-512-NEXT: vmsne.vi v0, v8, 0
+; RV32-BITS-512-NEXT: vmsne.vi v0, v10, 0
; RV32-BITS-512-NEXT: ret
;
; RV64-BITS-UNKNOWN-LABEL: reverse_nxv4i1:
@@ -169,8 +160,7 @@ define <vscale x 4 x i1> @reverse_nxv4i1(<vscale x 4 x i1> %a) {
; RV64-BITS-UNKNOWN-NEXT: vrsub.vx v9, v9, a0
; RV64-BITS-UNKNOWN-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
; RV64-BITS-UNKNOWN-NEXT: vrgatherei16.vv v10, v8, v9
-; RV64-BITS-UNKNOWN-NEXT: vand.vi v8, v10, 1
-; RV64-BITS-UNKNOWN-NEXT: vmsne.vi v0, v8, 0
+; RV64-BITS-UNKNOWN-NEXT: vmsne.vi v0, v10, 0
; RV64-BITS-UNKNOWN-NEXT: ret
;
; RV64-BITS-256-LABEL: reverse_nxv4i1:
@@ -184,8 +174,7 @@ define <vscale x 4 x i1> @reverse_nxv4i1(<vscale x 4 x i1> %a) {
; RV64-BITS-256-NEXT: vid.v v9
; RV64-BITS-256-NEXT: vrsub.vx v9, v9, a0
; RV64-BITS-256-NEXT: vrgather.vv v10, v8, v9
-; RV64-BITS-256-NEXT: vand.vi v8, v10, 1
-; RV64-BITS-256-NEXT: vmsne.vi v0, v8, 0
+; RV64-BITS-256-NEXT: vmsne.vi v0, v10, 0
; RV64-BITS-256-NEXT: ret
;
; RV64-BITS-512-LABEL: reverse_nxv4i1:
@@ -199,8 +188,7 @@ define <vscale x 4 x i1> @reverse_nxv4i1(<vscale x 4 x i1> %a) {
; RV64-BITS-512-NEXT: vid.v v9
; RV64-BITS-512-NEXT: vrsub.vx v9, v9, a0
; RV64-BITS-512-NEXT: vrgather.vv v10, v8, v9
-; RV64-BITS-512-NEXT: vand.vi v8, v10, 1
-; RV64-BITS-512-NEXT: vmsne.vi v0, v8, 0
+; RV64-BITS-512-NEXT: vmsne.vi v0, v10, 0
; RV64-BITS-512-NEXT: ret
%res = call <vscale x 4 x i1> @llvm.vector.reverse.nxv4i1(<vscale x 4 x i1> %a)
ret <vscale x 4 x i1> %res
@@ -219,8 +207,7 @@ define <vscale x 8 x i1> @reverse_nxv8i1(<vscale x 8 x i1> %a) {
; RV32-BITS-UNKNOWN-NEXT: vrsub.vx v10, v10, a0
; RV32-BITS-UNKNOWN-NEXT: vsetvli zero, zero, e8, m1, ta, ma
; RV32-BITS-UNKNOWN-NEXT: vrgatherei16.vv v9, v8, v10
-; RV32-BITS-UNKNOWN-NEXT: vand.vi v8, v9, 1
-; RV32-BITS-UNKNOWN-NEXT: vmsne.vi v0, v8, 0
+; RV32-BITS-UNKNOWN-NEXT: vmsne.vi v0, v9, 0
; RV32-BITS-UNKNOWN-NEXT: ret
;
; RV32-BITS-256-LABEL: reverse_nxv8i1:
@@ -233,8 +220,7 @@ define <vscale x 8 x i1> @reverse_nxv8i1(<vscale x 8 x i1> %a) {
; RV32-BITS-256-NEXT: vid.v v9
; RV32-BITS-256-NEXT: vrsub.vx v9, v9, a0
; RV32-BITS-256-NEXT: vrgather.vv v10, v8, v9
-; RV32-BITS-256-NEXT: vand.vi v8, v10, 1
-; RV32-BITS-256-NEXT: vmsne.vi v0, v8, 0
+; RV32-BITS-256-NEXT: vmsne.vi v0, v10, 0
; RV32-BITS-256-NEXT: ret
;
; RV32-BITS-512-LABEL: reverse_nxv8i1:
@@ -247,8 +233,7 @@ define <vscale x 8 x i1> @reverse_nxv8i1(<vscale x 8 x i1> %a) {
; RV32-BITS-512-NEXT: vid.v v9
; RV32-BITS-512-NEXT: vrsub.vx v9, v9, a0
; RV32-BITS-512-NEXT: vrgather.vv v10, v8, v9
-; RV32-BITS-512-NEXT: vand.vi v8, v10, 1
-; RV32-BITS-512-NEXT: vmsne.vi v0, v8, 0
+; RV32-BITS-512-NEXT: vmsne.vi v0, v10, 0
; RV32-BITS-512-NEXT: ret
;
; RV64-BITS-UNKNOWN-LABEL: reverse_nxv8i1:
@@ -263,8 +248,7 @@ define <vscale x 8 x i1> @reverse_nxv8i1(<vscale x 8 x i1> %a) {
; RV64-BITS-UNKNOWN-NEXT: vrsub.vx v10, v10, a0
; RV64-BITS-UNKNOWN-NEXT: vsetvli zero, zero, e8, m1, ta, ma
; RV64-BITS-UNKNOWN-NEXT: vrgatherei16.vv v9, v8, v10
-; RV64-BITS-UNKNOWN-NEXT: vand.vi v8, v9, 1
-; RV64-BITS-UNKNOWN-NEXT: vmsne.vi v0, v8, 0
+; RV64-BITS-UNKNOWN-NEXT: vmsne.vi v0, v9, 0
; RV64-BITS-UNKNOWN-NEXT: ret
;
; RV64-BITS-256-LABEL: reverse_nxv8i1:
@@ -277,8 +261,7 @@ define <vscale x 8 x i1> @reverse_nxv8i1(<vscale x 8 x i1> %a) {
; RV64-BITS-256-NEXT: vid.v v9
; RV64-BITS-256-NEXT: vrsub.vx v9, v9, a0
; RV64-BITS-256-NEXT: vrgather.vv v10, v8, v9
-; RV64-BITS-256-NEXT: vand.vi v8, v10, 1
-; RV64-BITS-256-NEXT: vmsne.vi v0, v8, 0
+; RV64-BITS-256-NEXT: vmsne.vi v0, v10, 0
; RV64-BITS-256-NEXT: ret
;
; RV64-BITS-512-LABEL: reverse_nxv8i1:
@@ -291,8 +274,7 @@ define <vscale x 8 x i1> @reverse_nxv8i1(<vscale x 8 x i1> %a) {
; RV64-BITS-512-NEXT: vid.v v9
; RV64-BITS-512-NEXT: vrsub.vx v9, v9, a0
; RV64-BITS-512-NEXT: vrgather.vv v10, v8, v9
-; RV64-BITS-512-NEXT: vand.vi v8, v10, 1
-; RV64-BITS-512-NEXT: vmsne.vi v0, v8, 0
+; RV64-BITS-512-NEXT: vmsne.vi v0, v10, 0
; RV64-BITS-512-NEXT: ret
%res = call <vscale x 8 x i1> @llvm.vector.reverse.nxv8i1(<vscale x 8 x i1> %a)
ret <vscale x 8 x i1> %res
@@ -313,8 +295,7 @@ define <vscale x 16 x i1> @reverse_nxv16i1(<vscale x 16 x i1> %a) {
; RV32-BITS-UNKNOWN-NEXT: vrgatherei16.vv v13, v10, v8
; RV32-BITS-UNKNOWN-NEXT: vrgatherei16.vv v12, v11, v8
; RV32-BITS-UNKNOWN-NEXT: vsetvli a0, zero, e8, m2, ta, ma
-; RV32-BITS-UNKNOWN-NEXT: vand.vi v8, v12, 1
-; RV32-BITS-UNKNOWN-NEXT: vmsne.vi v0, v8, 0
+; RV32-BITS-UNKNOWN-NEXT: vmsne.vi v0, v12, 0
; RV32-BITS-UNKNOWN-NEXT: ret
;
; RV32-BITS-256-LABEL: reverse_nxv16i1:
@@ -331,8 +312,7 @@ define <vscale x 16 x i1> @reverse_nxv16i1(<vscale x 16 x i1> %a) {
; RV32-BITS-256-NEXT: vrgather.vv v13, v10, v8
; RV32-BITS-256-NEXT: vrgather.vv v12, v11, v8
; RV32-BITS-256-NEXT: vsetvli a0, zero, e8, m2, ta, ma
-; RV32-BITS-256-NEXT: vand.vi v8, v12, 1
-; RV32-BITS-256-NEXT: vmsne.vi v0, v8, 0
+; RV32-BITS-256-NEXT: vmsne.vi v0, v12, 0
; RV32-BITS-256-NEXT: ret
;
; RV32-BITS-512-LABEL: reverse_nxv16i1:
@@ -349,8 +329,7 @@ define <vscale x 16 x i1> @reverse_nxv16i1(<vscale x 16 x i1> %a) {
; RV32-BITS-512-NEXT: vrgather.vv v13, v10, v8
; RV32-BITS-512-NEXT: vrgather.vv v12, v11, v8
; RV32-BITS-512-NEXT: vsetvli a0, zero, e8, m2, ta, ma
-; RV32-BITS-512-NEXT: vand.vi v8, v12, 1
-; RV32-BITS-512-NEXT: vmsne.vi v0, v8, 0
+; RV32-BITS-512-NEXT: vmsne.vi v0, v12, 0
; RV32-BITS-512-NEXT: ret
;
; RV64-BITS-UNKNOWN-LABEL: reverse_nxv16i1:
@@ -367,8 +346,7 @@ define <vscale x 16 x i1> @reverse_nxv16i1(<vscale x 16 x i1> %a) {
; RV64-BITS-UNKNOWN-NEXT: vrgatherei16.vv v13, v10, v8
; RV64-BITS-UNKNOWN-NEXT: vrgatherei16.vv v12, v11, v8
; RV64-BITS-UNKNOWN-NEXT: vsetvli a0, zero, e8, m2, ta, ma
-; RV64-BITS-UNKNOWN-NEXT: vand.vi v8, v12, 1
-; RV64-BITS-UNKNOWN-NEXT: vmsne.vi v0, v8, 0
+; RV64-BITS-UNKNOWN-NEXT: vmsne.vi v0, v12, 0
; RV64-BITS-UNKNOWN-NEXT: ret
;
; RV64-BITS-256-LABEL: reverse_nxv16i1:
@@ -385,8 +363,7 @@ define <vscale x 16 x i1> @reverse_nxv16i1(<vscale x 16 x i1> %a) {
; RV64-BITS-256-NEXT: vrgather.vv v13, v10, v8
; RV64-BITS-256-NEXT: vrgather.vv v12, v11, v8
; RV64-BITS-256-NEXT: vsetvli a0, zero, e8, m2, ta, ma
-; RV64-BITS-256-NEXT: vand.vi v8, v12, 1
-; RV64-BITS-256-NEXT: vmsne.vi v0, v8, 0
+; RV64-BITS-256-NEXT: vmsne.vi v0, v12, 0
; RV64-BITS-256-NEXT: ret
;
; RV64-BITS-512-LABEL: reverse_nxv16i1:
@@ -403,8 +380,7 @@ define <vscale x 16 x i1> @reverse_nxv16i1(<vscale x 16 x i1> %a) {
; RV64-BITS-512-NEXT: vrgather.vv v13, v10, v8
; RV64-BITS-512-NEXT: vrgather.vv v12, v11, v8
; RV64-BITS-512-NEXT: vsetvli a0, zero, e8, m2, ta, ma
-; RV64-BITS-512-NEXT: vand.vi v8, v12, 1
-; RV64-BITS-512-NEXT: vmsne.vi v0, v8, 0
+; RV64-BITS-512-NEXT: vmsne.vi v0, v12, 0
; RV64-BITS-512-NEXT: ret
%res = call <vscale x 16 x i1> @llvm.vector.reverse.nxv16i1(<vscale x 16 x i1> %a)
ret <vscale x 16 x i1> %res
@@ -427,7 +403,6 @@ define <vscale x 32 x i1> @reverse_nxv32i1(<vscale x 32 x i1> %a) {
; RV32-BITS-UNKNOWN-NEXT: vrgatherei16.vv v9, v18, v12
; RV32-BITS-UNKNOWN-NEXT: vrgatherei16.vv v8, v19, v12
; RV32-BITS-UNKNOWN-NEXT: vsetvli a0, zero, e8, m4, ta, ma
-; RV32-BITS-UNKNOWN-NEXT: vand.vi v8, v8, 1
; RV32-BITS-UNKNOWN-NEXT: vmsne.vi v0, v8, 0
; RV32-BITS-UNKNOWN-NEXT: ret
;
@@ -447,7 +422,6 @@ define <vscale x 32 x i1> @reverse_nxv32i1(<vscale x 32 x i1> %a) {
; RV32-BITS-256-NEXT: vrgather.vv v9, v18, v12
; RV32-BITS-256-NEXT: vrgather.vv v8, v19, v12
; RV32-BITS-256-NEXT: vsetvli a0, zero, e8, m4, ta, ma
-; RV32-BITS-256-NEXT: vand.vi v8, v8, 1
; RV32-BITS-256-NEXT: vmsne.vi v0, v8, 0
; RV32-BITS-256-NEXT: ret
;
@@ -467,7 +441,6 @@ define <vscale x 32 x i1> @reverse_nxv32i1(<vscale x 32 x i1> %a) {
; RV32-BITS-512-NEXT: vrgather.vv v9, v18, v12
; RV32-BITS-512-NEXT: vrgather.vv v8, v19, v12
; RV32-BITS-512-NEXT: vsetvli a0, zero, e8, m4, ta, ma
-; RV32-BITS-512-NEXT: vand.vi v8, v8, 1
; RV32-BITS-512-NEXT: vmsne.vi v0, v8, 0
; RV32-BITS-512-NEXT: ret
;
@@ -487,7 +460,6 @@ define <vscale x 32 x i1> @reverse_nxv32i1(<vscale x 32 x i1> %a) {
; RV64-BITS-UNKNOWN-NEXT: vrgatherei16.vv v9, v18, v12
; RV64-BITS-UNKNOWN-NEXT: vrgatherei16.vv v8, v19, v12
; RV64-BITS-UNKNOWN-NEXT: vsetvli a0, zero, e8, m4, ta, ma
-; RV64-BITS-UNKNOWN-NEXT: vand.vi v8, v8, 1
; RV64-BITS-UNKNOWN-NEXT: vmsne.vi v0, v8, 0
; RV64-BITS-UNKNOWN-NEXT: ret
;
@@ -507,7 +479,6 @@ define <vscale x 32 x i1> @reverse_nxv32i1(<vscale x 32 x i1> %a) {
; RV64-BITS-256-NEXT: vrgather.vv v9, v18, v12
; RV64-BITS-256-NEXT: vrgather.vv v8, v19, v12
; RV64-BITS-256-NEXT: vsetvli a0, zero, e8, m4, ta, ma
-; RV64-BITS-256-NEXT: vand.vi v8, v8, 1
; RV64-BITS-256-NEXT: vmsne.vi v0, v8, 0
; RV64-BITS-256-NEXT: ret
;
@@ -527,7 +498,6 @@ define <vscale x 32 x i1> @reverse_nxv32i1(<vscale x 32 x i1> %a) {
; RV64-BITS-512-NEXT: vrgather.vv v9, v18, v12
; RV64-BITS-512-NEXT: vrgather.vv v8, v19, v12
; RV64-BITS-512-NEXT: vsetvli a0, zero, e8, m4, ta, ma
-; RV64-BITS-512-NEXT: vand.vi v8, v8, 1
; RV64-BITS-512-NEXT: vmsne.vi v0, v8, 0
; RV64-BITS-512-NEXT: ret
%res = call <vscale x 32 x i1> @llvm.vector.reverse.nxv32i1(<vscale x 32 x i1> %a)
@@ -555,7 +525,6 @@ define <vscale x 64 x i1> @reverse_nxv64i1(<vscale x 64 x i1> %a) {
; RV32-BITS-UNKNOWN-NEXT: vrgatherei16.vv v9, v30, v16
; RV32-BITS-UNKNOWN-NEXT: vrgatherei16.vv v8, v31, v16
; RV32-BITS-UNKNOWN-NEXT: vsetvli a0, zero, e8, m8, ta, ma
-; RV32-BITS-UNKNOWN-NEXT: vand.vi v8, v8, 1
; RV32-BITS-UNKNOWN-NEXT: vmsne.vi v0, v8, 0
; RV32-BITS-UNKNOWN-NEXT: ret
;
@@ -579,7 +548,6 @@ define <vscale x 64 x i1> @reverse_nxv64i1(<vscale x 64 x i1> %a) {
; RV32-BITS-256-NEXT: vrgather.vv v9, v22, v24
; RV32-BITS-256-NEXT: vrgather.vv v8, v23, v24
; RV32-BITS-256-NEXT: vsetvli a0, zero, e8, m8, ta, ma
-; RV32-BITS-256-NEXT: vand.vi v8, v8, 1
; RV32-BITS-256-NEXT: vmsne.vi v0, v8, 0
; RV32-BITS-256-NEXT: ret
;
@@ -603,7 +571,6 @@ define <vscale x 64 x i1> @reverse_nxv64i1(<vscale x 64 x i1> %a) {
; RV32-BITS-512-NEXT: vrgather.vv v9, v22, v24
; RV32-BITS-512-NEXT: vrgather.vv v8, v23, v24
; RV32-BITS-512-NEXT: vsetvli a0, zero, e8, m8, ta, ma
-; RV32-BITS-512-NEXT: vand.vi v8, v8, 1
; RV32-BITS-512-NEXT: vmsne.vi v0, v8, 0
; RV32-BITS-512-NEXT: ret
;
@@ -627,7 +594,6 @@ define <vscale x 64 x i1> @reverse_nxv64i1(<vscale x 64 x i1> %a) {
; RV64-BITS-UNKNOWN-NEXT: vrgatherei16.vv v9, v30, v16
; RV64-BITS-UNKNOWN-NEXT: vrgatherei16.vv v8, v31, v16
; RV64-BITS-UNKNOWN-NEXT: vsetvli a0, zero, e8, m8, ta, ma
-; RV64-BITS-UNKNOWN-NEXT: vand.vi v8, v8, 1
; RV64-BITS-UNKNOWN-NEXT: vmsne.vi v0, v8, 0
; RV64-BITS-UNKNOWN-NEXT: ret
;
@@ -651,7 +617,6 @@ define <vscale x 64 x i1> @reverse_nxv64i1(<vscale x 64 x i1> %a) {
; RV64-BITS-256-NEXT: vrgather.vv v9, v22, v24
; RV64-BITS-256-NEXT: vrgather.vv v8, v23, v24
; RV64-BITS-256-NEXT: vsetvli a0, zero, e8, m8, ta, ma
-; RV64-BITS-256-NEXT: vand.vi v8, v8, 1
; RV64-BITS-256-NEXT: vmsne.vi v0, v8, 0
; RV64-BITS-256-NEXT: ret
;
@@ -675,7 +640,6 @@ define <vscale x 64 x i1> @reverse_nxv64i1(<vscale x 64 x i1> %a) {
; RV64-BITS-512-NEXT: vrgather.vv v9, v22, v24
; RV64-BITS-512-NEXT: vrgather.vv v8, v23, v24
; RV64-BITS-512-NEXT: vsetvli a0, zero, e8, m8, ta, ma
-; RV64-BITS-512-NEXT: vand.vi v8, v8, 1
; RV64-BITS-512-NEXT: vmsne.vi v0, v8, 0
; RV64-BITS-512-NEXT: ret
%res = call <vscale x 64 x i1> @llvm.vector.reverse.nxv64i1(<vscale x 64 x i1> %a)
diff --git a/llvm/test/CodeGen/RISCV/rvv/nearbyint-vp.ll b/llvm/test/CodeGen/RISCV/rvv/nearbyint-vp.ll
index a3ea462b6a73..5aa773b01e69 100644
--- a/llvm/test/CodeGen/RISCV/rvv/nearbyint-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/nearbyint-vp.ll
@@ -1,20 +1,420 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v -target-abi=ilp32d \
-; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH
-; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -target-abi=lp64d \
-; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH
-; RUN: llc -mtriple=riscv32 -mattr=+d,+zfhmin,+zvfhmin,+v -target-abi=ilp32d \
-; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN
-; RUN: llc -mtriple=riscv64 -mattr=+d,+zfhmin,+zvfhmin,+v -target-abi=lp64d \
-; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+zfbfmin,+zvfbfmin,+v \
+; RUN: -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s \
+; RUN: --check-prefixes=CHECK,ZVFH
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+zfbfmin,+zvfbfmin,+v \
+; RUN: -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \
+; RUN: --check-prefixes=CHECK,ZVFH
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin,+v \
+; RUN: -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s \
+; RUN: --check-prefixes=CHECK,ZVFHMIN
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin,+v \
+; RUN: -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \
+; RUN: --check-prefixes=CHECK,ZVFHMIN
+declare <vscale x 1 x bfloat> @llvm.vp.nearbyint.nxv1bf16(<vscale x 1 x bfloat>, <vscale x 1 x i1>, i32)
+
+define <vscale x 1 x bfloat> @vp_nearbyint_nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vp_nearbyint_nxv1bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8
+; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-NEXT: vfabs.v v8, v9, v0.t
+; CHECK-NEXT: lui a0, 307200
+; CHECK-NEXT: fmv.w.x fa5, a0
+; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu
+; CHECK-NEXT: vmflt.vf v0, v8, fa5, v0.t
+; CHECK-NEXT: frflags a0
+; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
+; CHECK-NEXT: vfcvt.x.f.v v8, v9, v0.t
+; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu
+; CHECK-NEXT: vfsgnj.vv v9, v8, v9, v0.t
+; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9
+; CHECK-NEXT: fsflags a0
+; CHECK-NEXT: ret
+ %v = call <vscale x 1 x bfloat> @llvm.vp.nearbyint.nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x i1> %m, i32 %evl)
+ ret <vscale x 1 x bfloat> %v
+}
+
+define <vscale x 1 x bfloat> @vp_nearbyint_nxv1bf16_unmasked(<vscale x 1 x bfloat> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vp_nearbyint_nxv1bf16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8
+; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-NEXT: vfabs.v v8, v9
+; CHECK-NEXT: lui a0, 307200
+; CHECK-NEXT: fmv.w.x fa5, a0
+; CHECK-NEXT: vmflt.vf v0, v8, fa5
+; CHECK-NEXT: frflags a0
+; CHECK-NEXT: vfcvt.x.f.v v8, v9, v0.t
+; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu
+; CHECK-NEXT: vfsgnj.vv v9, v8, v9, v0.t
+; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9
+; CHECK-NEXT: fsflags a0
+; CHECK-NEXT: ret
+ %v = call <vscale x 1 x bfloat> @llvm.vp.nearbyint.nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x i1> splat (i1 true), i32 %evl)
+ ret <vscale x 1 x bfloat> %v
+}
+
+declare <vscale x 2 x bfloat> @llvm.vp.nearbyint.nxv2bf16(<vscale x 2 x bfloat>, <vscale x 2 x i1>, i32)
+
+define <vscale x 2 x bfloat> @vp_nearbyint_nxv2bf16(<vscale x 2 x bfloat> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vp_nearbyint_nxv2bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, zero, e16, mf2, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8
+; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-NEXT: vfabs.v v8, v9, v0.t
+; CHECK-NEXT: lui a0, 307200
+; CHECK-NEXT: fmv.w.x fa5, a0
+; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu
+; CHECK-NEXT: vmflt.vf v0, v8, fa5, v0.t
+; CHECK-NEXT: frflags a0
+; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; CHECK-NEXT: vfcvt.x.f.v v8, v9, v0.t
+; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu
+; CHECK-NEXT: vfsgnj.vv v9, v8, v9, v0.t
+; CHECK-NEXT: vsetvli a1, zero, e16, mf2, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9
+; CHECK-NEXT: fsflags a0
+; CHECK-NEXT: ret
+ %v = call <vscale x 2 x bfloat> @llvm.vp.nearbyint.nxv2bf16(<vscale x 2 x bfloat> %va, <vscale x 2 x i1> %m, i32 %evl)
+ ret <vscale x 2 x bfloat> %v
+}
+
+define <vscale x 2 x bfloat> @vp_nearbyint_nxv2bf16_unmasked(<vscale x 2 x bfloat> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vp_nearbyint_nxv2bf16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, zero, e16, mf2, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8
+; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-NEXT: vfabs.v v8, v9
+; CHECK-NEXT: lui a0, 307200
+; CHECK-NEXT: fmv.w.x fa5, a0
+; CHECK-NEXT: vmflt.vf v0, v8, fa5
+; CHECK-NEXT: frflags a0
+; CHECK-NEXT: vfcvt.x.f.v v8, v9, v0.t
+; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu
+; CHECK-NEXT: vfsgnj.vv v9, v8, v9, v0.t
+; CHECK-NEXT: vsetvli a1, zero, e16, mf2, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9
+; CHECK-NEXT: fsflags a0
+; CHECK-NEXT: ret
+ %v = call <vscale x 2 x bfloat> @llvm.vp.nearbyint.nxv2bf16(<vscale x 2 x bfloat> %va, <vscale x 2 x i1> splat (i1 true), i32 %evl)
+ ret <vscale x 2 x bfloat> %v
+}
+
+declare <vscale x 4 x bfloat> @llvm.vp.nearbyint.nxv4bf16(<vscale x 4 x bfloat>, <vscale x 4 x i1>, i32)
+
+define <vscale x 4 x bfloat> @vp_nearbyint_nxv4bf16(<vscale x 4 x bfloat> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vp_nearbyint_nxv4bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vmv1r.v v9, v0
+; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8
+; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; CHECK-NEXT: vfabs.v v12, v10, v0.t
+; CHECK-NEXT: lui a0, 307200
+; CHECK-NEXT: fmv.w.x fa5, a0
+; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu
+; CHECK-NEXT: vmflt.vf v9, v12, fa5, v0.t
+; CHECK-NEXT: frflags a0
+; CHECK-NEXT: vmv1r.v v0, v9
+; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; CHECK-NEXT: vfcvt.x.f.v v12, v10, v0.t
+; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu
+; CHECK-NEXT: vfsgnj.vv v10, v12, v10, v0.t
+; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10
+; CHECK-NEXT: fsflags a0
+; CHECK-NEXT: ret
+ %v = call <vscale x 4 x bfloat> @llvm.vp.nearbyint.nxv4bf16(<vscale x 4 x bfloat> %va, <vscale x 4 x i1> %m, i32 %evl)
+ ret <vscale x 4 x bfloat> %v
+}
+
+define <vscale x 4 x bfloat> @vp_nearbyint_nxv4bf16_unmasked(<vscale x 4 x bfloat> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vp_nearbyint_nxv4bf16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8
+; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; CHECK-NEXT: vfabs.v v8, v10
+; CHECK-NEXT: lui a0, 307200
+; CHECK-NEXT: fmv.w.x fa5, a0
+; CHECK-NEXT: vmflt.vf v0, v8, fa5
+; CHECK-NEXT: frflags a0
+; CHECK-NEXT: vfcvt.x.f.v v8, v10, v0.t
+; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu
+; CHECK-NEXT: vfsgnj.vv v10, v8, v10, v0.t
+; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10
+; CHECK-NEXT: fsflags a0
+; CHECK-NEXT: ret
+ %v = call <vscale x 4 x bfloat> @llvm.vp.nearbyint.nxv4bf16(<vscale x 4 x bfloat> %va, <vscale x 4 x i1> splat (i1 true), i32 %evl)
+ ret <vscale x 4 x bfloat> %v
+}
+
+declare <vscale x 8 x bfloat> @llvm.vp.nearbyint.nxv8bf16(<vscale x 8 x bfloat>, <vscale x 8 x i1>, i32)
+
+define <vscale x 8 x bfloat> @vp_nearbyint_nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vp_nearbyint_nxv8bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vmv1r.v v10, v0
+; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8
+; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-NEXT: vfabs.v v16, v12, v0.t
+; CHECK-NEXT: lui a0, 307200
+; CHECK-NEXT: fmv.w.x fa5, a0
+; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu
+; CHECK-NEXT: vmflt.vf v10, v16, fa5, v0.t
+; CHECK-NEXT: frflags a0
+; CHECK-NEXT: vmv1r.v v0, v10
+; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; CHECK-NEXT: vfcvt.x.f.v v16, v12, v0.t
+; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu
+; CHECK-NEXT: vfsgnj.vv v12, v16, v12, v0.t
+; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12
+; CHECK-NEXT: fsflags a0
+; CHECK-NEXT: ret
+ %v = call <vscale x 8 x bfloat> @llvm.vp.nearbyint.nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x i1> %m, i32 %evl)
+ ret <vscale x 8 x bfloat> %v
+}
+
+define <vscale x 8 x bfloat> @vp_nearbyint_nxv8bf16_unmasked(<vscale x 8 x bfloat> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vp_nearbyint_nxv8bf16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8
+; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-NEXT: vfabs.v v8, v12
+; CHECK-NEXT: lui a0, 307200
+; CHECK-NEXT: fmv.w.x fa5, a0
+; CHECK-NEXT: vmflt.vf v0, v8, fa5
+; CHECK-NEXT: frflags a0
+; CHECK-NEXT: vfcvt.x.f.v v8, v12, v0.t
+; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu
+; CHECK-NEXT: vfsgnj.vv v12, v8, v12, v0.t
+; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12
+; CHECK-NEXT: fsflags a0
+; CHECK-NEXT: ret
+ %v = call <vscale x 8 x bfloat> @llvm.vp.nearbyint.nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x i1> splat (i1 true), i32 %evl)
+ ret <vscale x 8 x bfloat> %v
+}
+
+declare <vscale x 16 x bfloat> @llvm.vp.nearbyint.nxv16bf16(<vscale x 16 x bfloat>, <vscale x 16 x i1>, i32)
+
+define <vscale x 16 x bfloat> @vp_nearbyint_nxv16bf16(<vscale x 16 x bfloat> %va, <vscale x 16 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vp_nearbyint_nxv16bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vmv1r.v v12, v0
+; CHECK-NEXT: vsetvli a1, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8
+; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma
+; CHECK-NEXT: vfabs.v v24, v16, v0.t
+; CHECK-NEXT: lui a0, 307200
+; CHECK-NEXT: fmv.w.x fa5, a0
+; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu
+; CHECK-NEXT: vmflt.vf v12, v24, fa5, v0.t
+; CHECK-NEXT: frflags a0
+; CHECK-NEXT: vmv1r.v v0, v12
+; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t
+; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu
+; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t
+; CHECK-NEXT: vsetvli a1, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16
+; CHECK-NEXT: fsflags a0
+; CHECK-NEXT: ret
+ %v = call <vscale x 16 x bfloat> @llvm.vp.nearbyint.nxv16bf16(<vscale x 16 x bfloat> %va, <vscale x 16 x i1> %m, i32 %evl)
+ ret <vscale x 16 x bfloat> %v
+}
+
+define <vscale x 16 x bfloat> @vp_nearbyint_nxv16bf16_unmasked(<vscale x 16 x bfloat> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vp_nearbyint_nxv16bf16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8
+; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma
+; CHECK-NEXT: vfabs.v v8, v16
+; CHECK-NEXT: lui a0, 307200
+; CHECK-NEXT: fmv.w.x fa5, a0
+; CHECK-NEXT: vmflt.vf v0, v8, fa5
+; CHECK-NEXT: frflags a0
+; CHECK-NEXT: vfcvt.x.f.v v8, v16, v0.t
+; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu
+; CHECK-NEXT: vfsgnj.vv v16, v8, v16, v0.t
+; CHECK-NEXT: vsetvli a1, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16
+; CHECK-NEXT: fsflags a0
+; CHECK-NEXT: ret
+ %v = call <vscale x 16 x bfloat> @llvm.vp.nearbyint.nxv16bf16(<vscale x 16 x bfloat> %va, <vscale x 16 x i1> splat (i1 true), i32 %evl)
+ ret <vscale x 16 x bfloat> %v
+}
+
+declare <vscale x 32 x bfloat> @llvm.vp.nearbyint.nxv32bf16(<vscale x 32 x bfloat>, <vscale x 32 x i1>, i32)
+
+define <vscale x 32 x bfloat> @vp_nearbyint_nxv32bf16(<vscale x 32 x bfloat> %va, <vscale x 32 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vp_nearbyint_nxv32bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi sp, sp, -16
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: csrr a1, vlenb
+; CHECK-NEXT: slli a1, a1, 3
+; CHECK-NEXT: sub sp, sp, a1
+; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
+; CHECK-NEXT: vmv1r.v v7, v0
+; CHECK-NEXT: csrr a2, vlenb
+; CHECK-NEXT: slli a1, a2, 1
+; CHECK-NEXT: sub a3, a0, a1
+; CHECK-NEXT: sltu a4, a0, a3
+; CHECK-NEXT: addi a4, a4, -1
+; CHECK-NEXT: and a3, a4, a3
+; CHECK-NEXT: srli a2, a2, 2
+; CHECK-NEXT: vsetvli a4, zero, e8, mf2, ta, ma
+; CHECK-NEXT: vslidedown.vx v17, v0, a2
+; CHECK-NEXT: addi a2, sp, 16
+; CHECK-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill
+; CHECK-NEXT: vsetvli a2, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v12
+; CHECK-NEXT: vmv1r.v v0, v17
+; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, ma
+; CHECK-NEXT: vfabs.v v8, v24, v0.t
+; CHECK-NEXT: lui a2, 307200
+; CHECK-NEXT: fmv.w.x fa5, a2
+; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu
+; CHECK-NEXT: vmflt.vf v17, v8, fa5, v0.t
+; CHECK-NEXT: frflags a2
+; CHECK-NEXT: vmv1r.v v0, v17
+; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; CHECK-NEXT: vfcvt.x.f.v v8, v24, v0.t
+; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t
+; CHECK-NEXT: fsflags a2
+; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu
+; CHECK-NEXT: vfsgnj.vv v24, v8, v24, v0.t
+; CHECK-NEXT: vsetvli a2, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v12, v24
+; CHECK-NEXT: bltu a0, a1, .LBB10_2
+; CHECK-NEXT: # %bb.1:
+; CHECK-NEXT: mv a0, a1
+; CHECK-NEXT: .LBB10_2:
+; CHECK-NEXT: addi a1, sp, 16
+; CHECK-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload
+; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v16
+; CHECK-NEXT: vmv1r.v v0, v7
+; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma
+; CHECK-NEXT: vfabs.v v16, v24, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu
+; CHECK-NEXT: vmflt.vf v7, v16, fa5, v0.t
+; CHECK-NEXT: frflags a0
+; CHECK-NEXT: vmv1r.v v0, v7
+; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; CHECK-NEXT: vfcvt.x.f.v v16, v24, v0.t
+; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu
+; CHECK-NEXT: vfsgnj.vv v24, v16, v24, v0.t
+; CHECK-NEXT: vsetvli a1, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v24
+; CHECK-NEXT: fsflags a0
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: slli a0, a0, 3
+; CHECK-NEXT: add sp, sp, a0
+; CHECK-NEXT: addi sp, sp, 16
+; CHECK-NEXT: ret
+ %v = call <vscale x 32 x bfloat> @llvm.vp.nearbyint.nxv32bf16(<vscale x 32 x bfloat> %va, <vscale x 32 x i1> %m, i32 %evl)
+ ret <vscale x 32 x bfloat> %v
+}
+
+define <vscale x 32 x bfloat> @vp_nearbyint_nxv32bf16_unmasked(<vscale x 32 x bfloat> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vp_nearbyint_nxv32bf16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi sp, sp, -16
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: csrr a1, vlenb
+; CHECK-NEXT: slli a1, a1, 3
+; CHECK-NEXT: sub sp, sp, a1
+; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
+; CHECK-NEXT: csrr a2, vlenb
+; CHECK-NEXT: slli a1, a2, 1
+; CHECK-NEXT: sub a3, a0, a1
+; CHECK-NEXT: sltu a4, a0, a3
+; CHECK-NEXT: addi a4, a4, -1
+; CHECK-NEXT: and a3, a4, a3
+; CHECK-NEXT: srli a2, a2, 2
+; CHECK-NEXT: vsetvli a4, zero, e8, m4, ta, ma
+; CHECK-NEXT: vmset.m v16
+; CHECK-NEXT: vsetvli a4, zero, e8, mf2, ta, ma
+; CHECK-NEXT: vslidedown.vx v16, v16, a2
+; CHECK-NEXT: addi a2, sp, 16
+; CHECK-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill
+; CHECK-NEXT: vsetvli a2, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v12
+; CHECK-NEXT: vmv1r.v v0, v16
+; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, ma
+; CHECK-NEXT: vfabs.v v8, v24, v0.t
+; CHECK-NEXT: lui a2, 307200
+; CHECK-NEXT: fmv.w.x fa5, a2
+; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu
+; CHECK-NEXT: vmflt.vf v16, v8, fa5, v0.t
+; CHECK-NEXT: frflags a2
+; CHECK-NEXT: vmv1r.v v0, v16
+; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; CHECK-NEXT: vfcvt.x.f.v v8, v24, v0.t
+; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t
+; CHECK-NEXT: fsflags a2
+; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu
+; CHECK-NEXT: vfsgnj.vv v24, v8, v24, v0.t
+; CHECK-NEXT: vsetvli a2, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v12, v24
+; CHECK-NEXT: bltu a0, a1, .LBB11_2
+; CHECK-NEXT: # %bb.1:
+; CHECK-NEXT: mv a0, a1
+; CHECK-NEXT: .LBB11_2:
+; CHECK-NEXT: addi a1, sp, 16
+; CHECK-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v24
+; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma
+; CHECK-NEXT: vfabs.v v24, v16
+; CHECK-NEXT: vmflt.vf v0, v24, fa5
+; CHECK-NEXT: frflags a0
+; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t
+; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu
+; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t
+; CHECK-NEXT: vsetvli a1, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16
+; CHECK-NEXT: fsflags a0
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: slli a0, a0, 3
+; CHECK-NEXT: add sp, sp, a0
+; CHECK-NEXT: addi sp, sp, 16
+; CHECK-NEXT: ret
+ %v = call <vscale x 32 x bfloat> @llvm.vp.nearbyint.nxv32bf16(<vscale x 32 x bfloat> %va, <vscale x 32 x i1> splat (i1 true), i32 %evl)
+ ret <vscale x 32 x bfloat> %v
+}
declare <vscale x 1 x half> @llvm.vp.nearbyint.nxv1f16(<vscale x 1 x half>, <vscale x 1 x i1>, i32)
define <vscale x 1 x half> @vp_nearbyint_nxv1f16(<vscale x 1 x half> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) {
; ZVFH-LABEL: vp_nearbyint_nxv1f16:
; ZVFH: # %bb.0:
-; ZVFH-NEXT: lui a1, %hi(.LCPI0_0)
-; ZVFH-NEXT: flh fa5, %lo(.LCPI0_0)(a1)
+; ZVFH-NEXT: lui a1, %hi(.LCPI12_0)
+; ZVFH-NEXT: flh fa5, %lo(.LCPI12_0)(a1)
; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
; ZVFH-NEXT: vfabs.v v9, v8, v0.t
; ZVFH-NEXT: vsetvli zero, zero, e16, mf4, ta, mu
@@ -55,8 +455,8 @@ define <vscale x 1 x half> @vp_nearbyint_nxv1f16(<vscale x 1 x half> %va, <vscal
define <vscale x 1 x half> @vp_nearbyint_nxv1f16_unmasked(<vscale x 1 x half> %va, i32 zeroext %evl) {
; ZVFH-LABEL: vp_nearbyint_nxv1f16_unmasked:
; ZVFH: # %bb.0:
-; ZVFH-NEXT: lui a1, %hi(.LCPI1_0)
-; ZVFH-NEXT: flh fa5, %lo(.LCPI1_0)(a1)
+; ZVFH-NEXT: lui a1, %hi(.LCPI13_0)
+; ZVFH-NEXT: flh fa5, %lo(.LCPI13_0)(a1)
; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
; ZVFH-NEXT: vfabs.v v9, v8
; ZVFH-NEXT: vmflt.vf v0, v9, fa5
@@ -95,8 +495,8 @@ declare <vscale x 2 x half> @llvm.vp.nearbyint.nxv2f16(<vscale x 2 x half>, <vsc
define <vscale x 2 x half> @vp_nearbyint_nxv2f16(<vscale x 2 x half> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) {
; ZVFH-LABEL: vp_nearbyint_nxv2f16:
; ZVFH: # %bb.0:
-; ZVFH-NEXT: lui a1, %hi(.LCPI2_0)
-; ZVFH-NEXT: flh fa5, %lo(.LCPI2_0)(a1)
+; ZVFH-NEXT: lui a1, %hi(.LCPI14_0)
+; ZVFH-NEXT: flh fa5, %lo(.LCPI14_0)(a1)
; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
; ZVFH-NEXT: vfabs.v v9, v8, v0.t
; ZVFH-NEXT: vsetvli zero, zero, e16, mf2, ta, mu
@@ -137,8 +537,8 @@ define <vscale x 2 x half> @vp_nearbyint_nxv2f16(<vscale x 2 x half> %va, <vscal
define <vscale x 2 x half> @vp_nearbyint_nxv2f16_unmasked(<vscale x 2 x half> %va, i32 zeroext %evl) {
; ZVFH-LABEL: vp_nearbyint_nxv2f16_unmasked:
; ZVFH: # %bb.0:
-; ZVFH-NEXT: lui a1, %hi(.LCPI3_0)
-; ZVFH-NEXT: flh fa5, %lo(.LCPI3_0)(a1)
+; ZVFH-NEXT: lui a1, %hi(.LCPI15_0)
+; ZVFH-NEXT: flh fa5, %lo(.LCPI15_0)(a1)
; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
; ZVFH-NEXT: vfabs.v v9, v8
; ZVFH-NEXT: vmflt.vf v0, v9, fa5
@@ -177,8 +577,8 @@ declare <vscale x 4 x half> @llvm.vp.nearbyint.nxv4f16(<vscale x 4 x half>, <vsc
define <vscale x 4 x half> @vp_nearbyint_nxv4f16(<vscale x 4 x half> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) {
; ZVFH-LABEL: vp_nearbyint_nxv4f16:
; ZVFH: # %bb.0:
-; ZVFH-NEXT: lui a1, %hi(.LCPI4_0)
-; ZVFH-NEXT: flh fa5, %lo(.LCPI4_0)(a1)
+; ZVFH-NEXT: lui a1, %hi(.LCPI16_0)
+; ZVFH-NEXT: flh fa5, %lo(.LCPI16_0)(a1)
; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma
; ZVFH-NEXT: vfabs.v v9, v8, v0.t
; ZVFH-NEXT: vsetvli zero, zero, e16, m1, ta, mu
@@ -221,8 +621,8 @@ define <vscale x 4 x half> @vp_nearbyint_nxv4f16(<vscale x 4 x half> %va, <vscal
define <vscale x 4 x half> @vp_nearbyint_nxv4f16_unmasked(<vscale x 4 x half> %va, i32 zeroext %evl) {
; ZVFH-LABEL: vp_nearbyint_nxv4f16_unmasked:
; ZVFH: # %bb.0:
-; ZVFH-NEXT: lui a1, %hi(.LCPI5_0)
-; ZVFH-NEXT: flh fa5, %lo(.LCPI5_0)(a1)
+; ZVFH-NEXT: lui a1, %hi(.LCPI17_0)
+; ZVFH-NEXT: flh fa5, %lo(.LCPI17_0)(a1)
; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma
; ZVFH-NEXT: vfabs.v v9, v8
; ZVFH-NEXT: vmflt.vf v0, v9, fa5
@@ -261,8 +661,8 @@ declare <vscale x 8 x half> @llvm.vp.nearbyint.nxv8f16(<vscale x 8 x half>, <vsc
define <vscale x 8 x half> @vp_nearbyint_nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) {
; ZVFH-LABEL: vp_nearbyint_nxv8f16:
; ZVFH: # %bb.0:
-; ZVFH-NEXT: lui a1, %hi(.LCPI6_0)
-; ZVFH-NEXT: flh fa5, %lo(.LCPI6_0)(a1)
+; ZVFH-NEXT: lui a1, %hi(.LCPI18_0)
+; ZVFH-NEXT: flh fa5, %lo(.LCPI18_0)(a1)
; ZVFH-NEXT: vmv1r.v v10, v0
; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma
; ZVFH-NEXT: vfabs.v v12, v8, v0.t
@@ -307,8 +707,8 @@ define <vscale x 8 x half> @vp_nearbyint_nxv8f16(<vscale x 8 x half> %va, <vscal
define <vscale x 8 x half> @vp_nearbyint_nxv8f16_unmasked(<vscale x 8 x half> %va, i32 zeroext %evl) {
; ZVFH-LABEL: vp_nearbyint_nxv8f16_unmasked:
; ZVFH: # %bb.0:
-; ZVFH-NEXT: lui a1, %hi(.LCPI7_0)
-; ZVFH-NEXT: flh fa5, %lo(.LCPI7_0)(a1)
+; ZVFH-NEXT: lui a1, %hi(.LCPI19_0)
+; ZVFH-NEXT: flh fa5, %lo(.LCPI19_0)(a1)
; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma
; ZVFH-NEXT: vfabs.v v10, v8
; ZVFH-NEXT: vmflt.vf v0, v10, fa5
@@ -347,8 +747,8 @@ declare <vscale x 16 x half> @llvm.vp.nearbyint.nxv16f16(<vscale x 16 x half>, <
define <vscale x 16 x half> @vp_nearbyint_nxv16f16(<vscale x 16 x half> %va, <vscale x 16 x i1> %m, i32 zeroext %evl) {
; ZVFH-LABEL: vp_nearbyint_nxv16f16:
; ZVFH: # %bb.0:
-; ZVFH-NEXT: lui a1, %hi(.LCPI8_0)
-; ZVFH-NEXT: flh fa5, %lo(.LCPI8_0)(a1)
+; ZVFH-NEXT: lui a1, %hi(.LCPI20_0)
+; ZVFH-NEXT: flh fa5, %lo(.LCPI20_0)(a1)
; ZVFH-NEXT: vmv1r.v v12, v0
; ZVFH-NEXT: vsetvli zero, a0, e16, m4, ta, ma
; ZVFH-NEXT: vfabs.v v16, v8, v0.t
@@ -393,8 +793,8 @@ define <vscale x 16 x half> @vp_nearbyint_nxv16f16(<vscale x 16 x half> %va, <vs
define <vscale x 16 x half> @vp_nearbyint_nxv16f16_unmasked(<vscale x 16 x half> %va, i32 zeroext %evl) {
; ZVFH-LABEL: vp_nearbyint_nxv16f16_unmasked:
; ZVFH: # %bb.0:
-; ZVFH-NEXT: lui a1, %hi(.LCPI9_0)
-; ZVFH-NEXT: flh fa5, %lo(.LCPI9_0)(a1)
+; ZVFH-NEXT: lui a1, %hi(.LCPI21_0)
+; ZVFH-NEXT: flh fa5, %lo(.LCPI21_0)(a1)
; ZVFH-NEXT: vsetvli zero, a0, e16, m4, ta, ma
; ZVFH-NEXT: vfabs.v v12, v8
; ZVFH-NEXT: vmflt.vf v0, v12, fa5
@@ -433,8 +833,8 @@ declare <vscale x 32 x half> @llvm.vp.nearbyint.nxv32f16(<vscale x 32 x half>, <
define <vscale x 32 x half> @vp_nearbyint_nxv32f16(<vscale x 32 x half> %va, <vscale x 32 x i1> %m, i32 zeroext %evl) {
; ZVFH-LABEL: vp_nearbyint_nxv32f16:
; ZVFH: # %bb.0:
-; ZVFH-NEXT: lui a1, %hi(.LCPI10_0)
-; ZVFH-NEXT: flh fa5, %lo(.LCPI10_0)(a1)
+; ZVFH-NEXT: lui a1, %hi(.LCPI22_0)
+; ZVFH-NEXT: flh fa5, %lo(.LCPI22_0)(a1)
; ZVFH-NEXT: vmv1r.v v16, v0
; ZVFH-NEXT: vsetvli zero, a0, e16, m8, ta, ma
; ZVFH-NEXT: vfabs.v v24, v8, v0.t
@@ -489,10 +889,10 @@ define <vscale x 32 x half> @vp_nearbyint_nxv32f16(<vscale x 32 x half> %va, <vs
; ZVFHMIN-NEXT: vfsgnj.vv v24, v8, v24, v0.t
; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v24
-; ZVFHMIN-NEXT: bltu a0, a1, .LBB10_2
+; ZVFHMIN-NEXT: bltu a0, a1, .LBB22_2
; ZVFHMIN-NEXT: # %bb.1:
; ZVFHMIN-NEXT: mv a0, a1
-; ZVFHMIN-NEXT: .LBB10_2:
+; ZVFHMIN-NEXT: .LBB22_2:
; ZVFHMIN-NEXT: addi a1, sp, 16
; ZVFHMIN-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload
; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v16
@@ -523,8 +923,8 @@ define <vscale x 32 x half> @vp_nearbyint_nxv32f16(<vscale x 32 x half> %va, <vs
define <vscale x 32 x half> @vp_nearbyint_nxv32f16_unmasked(<vscale x 32 x half> %va, i32 zeroext %evl) {
; ZVFH-LABEL: vp_nearbyint_nxv32f16_unmasked:
; ZVFH: # %bb.0:
-; ZVFH-NEXT: lui a1, %hi(.LCPI11_0)
-; ZVFH-NEXT: flh fa5, %lo(.LCPI11_0)(a1)
+; ZVFH-NEXT: lui a1, %hi(.LCPI23_0)
+; ZVFH-NEXT: flh fa5, %lo(.LCPI23_0)(a1)
; ZVFH-NEXT: vsetvli zero, a0, e16, m8, ta, ma
; ZVFH-NEXT: vfabs.v v16, v8
; ZVFH-NEXT: vmflt.vf v0, v16, fa5
@@ -576,10 +976,10 @@ define <vscale x 32 x half> @vp_nearbyint_nxv32f16_unmasked(<vscale x 32 x half>
; ZVFHMIN-NEXT: vfsgnj.vv v24, v8, v24, v0.t
; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v24
-; ZVFHMIN-NEXT: bltu a0, a1, .LBB11_2
+; ZVFHMIN-NEXT: bltu a0, a1, .LBB23_2
; ZVFHMIN-NEXT: # %bb.1:
; ZVFHMIN-NEXT: mv a0, a1
-; ZVFHMIN-NEXT: .LBB11_2:
+; ZVFHMIN-NEXT: .LBB23_2:
; ZVFHMIN-NEXT: addi a1, sp, 16
; ZVFHMIN-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload
; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v24
@@ -824,8 +1224,8 @@ declare <vscale x 1 x double> @llvm.vp.nearbyint.nxv1f64(<vscale x 1 x double>,
define <vscale x 1 x double> @vp_nearbyint_nxv1f64(<vscale x 1 x double> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_nearbyint_nxv1f64:
; CHECK: # %bb.0:
-; CHECK-NEXT: lui a1, %hi(.LCPI22_0)
-; CHECK-NEXT: fld fa5, %lo(.LCPI22_0)(a1)
+; CHECK-NEXT: lui a1, %hi(.LCPI34_0)
+; CHECK-NEXT: fld fa5, %lo(.LCPI34_0)(a1)
; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma
; CHECK-NEXT: vfabs.v v9, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu
@@ -845,8 +1245,8 @@ define <vscale x 1 x double> @vp_nearbyint_nxv1f64(<vscale x 1 x double> %va, <v
define <vscale x 1 x double> @vp_nearbyint_nxv1f64_unmasked(<vscale x 1 x double> %va, i32 zeroext %evl) {
; CHECK-LABEL: vp_nearbyint_nxv1f64_unmasked:
; CHECK: # %bb.0:
-; CHECK-NEXT: lui a1, %hi(.LCPI23_0)
-; CHECK-NEXT: fld fa5, %lo(.LCPI23_0)(a1)
+; CHECK-NEXT: lui a1, %hi(.LCPI35_0)
+; CHECK-NEXT: fld fa5, %lo(.LCPI35_0)(a1)
; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma
; CHECK-NEXT: vfabs.v v9, v8
; CHECK-NEXT: vmflt.vf v0, v9, fa5
@@ -866,8 +1266,8 @@ declare <vscale x 2 x double> @llvm.vp.nearbyint.nxv2f64(<vscale x 2 x double>,
define <vscale x 2 x double> @vp_nearbyint_nxv2f64(<vscale x 2 x double> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_nearbyint_nxv2f64:
; CHECK: # %bb.0:
-; CHECK-NEXT: lui a1, %hi(.LCPI24_0)
-; CHECK-NEXT: fld fa5, %lo(.LCPI24_0)(a1)
+; CHECK-NEXT: lui a1, %hi(.LCPI36_0)
+; CHECK-NEXT: fld fa5, %lo(.LCPI36_0)(a1)
; CHECK-NEXT: vmv1r.v v10, v0
; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma
; CHECK-NEXT: vfabs.v v12, v8, v0.t
@@ -889,8 +1289,8 @@ define <vscale x 2 x double> @vp_nearbyint_nxv2f64(<vscale x 2 x double> %va, <v
define <vscale x 2 x double> @vp_nearbyint_nxv2f64_unmasked(<vscale x 2 x double> %va, i32 zeroext %evl) {
; CHECK-LABEL: vp_nearbyint_nxv2f64_unmasked:
; CHECK: # %bb.0:
-; CHECK-NEXT: lui a1, %hi(.LCPI25_0)
-; CHECK-NEXT: fld fa5, %lo(.LCPI25_0)(a1)
+; CHECK-NEXT: lui a1, %hi(.LCPI37_0)
+; CHECK-NEXT: fld fa5, %lo(.LCPI37_0)(a1)
; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma
; CHECK-NEXT: vfabs.v v10, v8
; CHECK-NEXT: vmflt.vf v0, v10, fa5
@@ -910,8 +1310,8 @@ declare <vscale x 4 x double> @llvm.vp.nearbyint.nxv4f64(<vscale x 4 x double>,
define <vscale x 4 x double> @vp_nearbyint_nxv4f64(<vscale x 4 x double> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_nearbyint_nxv4f64:
; CHECK: # %bb.0:
-; CHECK-NEXT: lui a1, %hi(.LCPI26_0)
-; CHECK-NEXT: fld fa5, %lo(.LCPI26_0)(a1)
+; CHECK-NEXT: lui a1, %hi(.LCPI38_0)
+; CHECK-NEXT: fld fa5, %lo(.LCPI38_0)(a1)
; CHECK-NEXT: vmv1r.v v12, v0
; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma
; CHECK-NEXT: vfabs.v v16, v8, v0.t
@@ -933,8 +1333,8 @@ define <vscale x 4 x double> @vp_nearbyint_nxv4f64(<vscale x 4 x double> %va, <v
define <vscale x 4 x double> @vp_nearbyint_nxv4f64_unmasked(<vscale x 4 x double> %va, i32 zeroext %evl) {
; CHECK-LABEL: vp_nearbyint_nxv4f64_unmasked:
; CHECK: # %bb.0:
-; CHECK-NEXT: lui a1, %hi(.LCPI27_0)
-; CHECK-NEXT: fld fa5, %lo(.LCPI27_0)(a1)
+; CHECK-NEXT: lui a1, %hi(.LCPI39_0)
+; CHECK-NEXT: fld fa5, %lo(.LCPI39_0)(a1)
; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma
; CHECK-NEXT: vfabs.v v12, v8
; CHECK-NEXT: vmflt.vf v0, v12, fa5
@@ -954,8 +1354,8 @@ declare <vscale x 7 x double> @llvm.vp.nearbyint.nxv7f64(<vscale x 7 x double>,
define <vscale x 7 x double> @vp_nearbyint_nxv7f64(<vscale x 7 x double> %va, <vscale x 7 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_nearbyint_nxv7f64:
; CHECK: # %bb.0:
-; CHECK-NEXT: lui a1, %hi(.LCPI28_0)
-; CHECK-NEXT: fld fa5, %lo(.LCPI28_0)(a1)
+; CHECK-NEXT: lui a1, %hi(.LCPI40_0)
+; CHECK-NEXT: fld fa5, %lo(.LCPI40_0)(a1)
; CHECK-NEXT: vmv1r.v v16, v0
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; CHECK-NEXT: vfabs.v v24, v8, v0.t
@@ -977,8 +1377,8 @@ define <vscale x 7 x double> @vp_nearbyint_nxv7f64(<vscale x 7 x double> %va, <v
define <vscale x 7 x double> @vp_nearbyint_nxv7f64_unmasked(<vscale x 7 x double> %va, i32 zeroext %evl) {
; CHECK-LABEL: vp_nearbyint_nxv7f64_unmasked:
; CHECK: # %bb.0:
-; CHECK-NEXT: lui a1, %hi(.LCPI29_0)
-; CHECK-NEXT: fld fa5, %lo(.LCPI29_0)(a1)
+; CHECK-NEXT: lui a1, %hi(.LCPI41_0)
+; CHECK-NEXT: fld fa5, %lo(.LCPI41_0)(a1)
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; CHECK-NEXT: vfabs.v v16, v8
; CHECK-NEXT: vmflt.vf v0, v16, fa5
@@ -998,8 +1398,8 @@ declare <vscale x 8 x double> @llvm.vp.nearbyint.nxv8f64(<vscale x 8 x double>,
define <vscale x 8 x double> @vp_nearbyint_nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_nearbyint_nxv8f64:
; CHECK: # %bb.0:
-; CHECK-NEXT: lui a1, %hi(.LCPI30_0)
-; CHECK-NEXT: fld fa5, %lo(.LCPI30_0)(a1)
+; CHECK-NEXT: lui a1, %hi(.LCPI42_0)
+; CHECK-NEXT: fld fa5, %lo(.LCPI42_0)(a1)
; CHECK-NEXT: vmv1r.v v16, v0
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; CHECK-NEXT: vfabs.v v24, v8, v0.t
@@ -1021,8 +1421,8 @@ define <vscale x 8 x double> @vp_nearbyint_nxv8f64(<vscale x 8 x double> %va, <v
define <vscale x 8 x double> @vp_nearbyint_nxv8f64_unmasked(<vscale x 8 x double> %va, i32 zeroext %evl) {
; CHECK-LABEL: vp_nearbyint_nxv8f64_unmasked:
; CHECK: # %bb.0:
-; CHECK-NEXT: lui a1, %hi(.LCPI31_0)
-; CHECK-NEXT: fld fa5, %lo(.LCPI31_0)(a1)
+; CHECK-NEXT: lui a1, %hi(.LCPI43_0)
+; CHECK-NEXT: fld fa5, %lo(.LCPI43_0)(a1)
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; CHECK-NEXT: vfabs.v v16, v8
; CHECK-NEXT: vmflt.vf v0, v16, fa5
@@ -1049,8 +1449,8 @@ define <vscale x 16 x double> @vp_nearbyint_nxv16f64(<vscale x 16 x double> %va,
; CHECK-NEXT: vsetvli a3, zero, e8, mf4, ta, ma
; CHECK-NEXT: vslidedown.vx v6, v0, a2
; CHECK-NEXT: sub a2, a0, a1
-; CHECK-NEXT: lui a3, %hi(.LCPI32_0)
-; CHECK-NEXT: fld fa5, %lo(.LCPI32_0)(a3)
+; CHECK-NEXT: lui a3, %hi(.LCPI44_0)
+; CHECK-NEXT: fld fa5, %lo(.LCPI44_0)(a3)
; CHECK-NEXT: sltu a3, a0, a2
; CHECK-NEXT: addi a3, a3, -1
; CHECK-NEXT: and a2, a3, a2
@@ -1067,10 +1467,10 @@ define <vscale x 16 x double> @vp_nearbyint_nxv16f64(<vscale x 16 x double> %va,
; CHECK-NEXT: fsflags a2
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t
-; CHECK-NEXT: bltu a0, a1, .LBB32_2
+; CHECK-NEXT: bltu a0, a1, .LBB44_2
; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: mv a0, a1
-; CHECK-NEXT: .LBB32_2:
+; CHECK-NEXT: .LBB44_2:
; CHECK-NEXT: vmv1r.v v0, v7
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; CHECK-NEXT: vfabs.v v24, v8, v0.t
@@ -1094,8 +1494,8 @@ define <vscale x 16 x double> @vp_nearbyint_nxv16f64_unmasked(<vscale x 16 x dou
; CHECK: # %bb.0:
; CHECK-NEXT: csrr a1, vlenb
; CHECK-NEXT: sub a2, a0, a1
-; CHECK-NEXT: lui a3, %hi(.LCPI33_0)
-; CHECK-NEXT: fld fa5, %lo(.LCPI33_0)(a3)
+; CHECK-NEXT: lui a3, %hi(.LCPI45_0)
+; CHECK-NEXT: fld fa5, %lo(.LCPI45_0)(a3)
; CHECK-NEXT: sltu a3, a0, a2
; CHECK-NEXT: addi a3, a3, -1
; CHECK-NEXT: and a2, a3, a2
@@ -1108,10 +1508,10 @@ define <vscale x 16 x double> @vp_nearbyint_nxv16f64_unmasked(<vscale x 16 x dou
; CHECK-NEXT: fsflags a2
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t
-; CHECK-NEXT: bltu a0, a1, .LBB33_2
+; CHECK-NEXT: bltu a0, a1, .LBB45_2
; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: mv a0, a1
-; CHECK-NEXT: .LBB33_2:
+; CHECK-NEXT: .LBB45_2:
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; CHECK-NEXT: vfabs.v v24, v8
; CHECK-NEXT: vmflt.vf v0, v24, fa5
diff --git a/llvm/test/CodeGen/RISCV/rvv/rint-vp.ll b/llvm/test/CodeGen/RISCV/rvv/rint-vp.ll
index 88bd92c6ec16..a454f9dd97ce 100644
--- a/llvm/test/CodeGen/RISCV/rvv/rint-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/rint-vp.ll
@@ -1,20 +1,397 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v -target-abi=ilp32d \
-; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH
-; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -target-abi=lp64d \
-; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH
-; RUN: llc -mtriple=riscv32 -mattr=+d,+zfhmin,+zvfhmin,+v -target-abi=ilp32d \
-; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN
-; RUN: llc -mtriple=riscv64 -mattr=+d,+zfhmin,+zvfhmin,+v -target-abi=lp64d \
-; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+zfbfmin,+zvfbfmin,+v \
+; RUN: -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s \
+; RUN: --check-prefixes=CHECK,ZVFH
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+zfbfmin,+zvfbfmin,+v \
+; RUN: -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \
+; RUN: --check-prefixes=CHECK,ZVFH
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin,+v \
+; RUN: -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s \
+; RUN: --check-prefixes=CHECK,ZVFHMIN
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin,+v \
+; RUN: -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \
+; RUN: --check-prefixes=CHECK,ZVFHMIN
+declare <vscale x 1 x bfloat> @llvm.vp.rint.nxv1bf16(<vscale x 1 x bfloat>, <vscale x 1 x i1>, i32)
+
+define <vscale x 1 x bfloat> @vp_rint_nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vp_rint_nxv1bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8
+; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-NEXT: vfabs.v v8, v9, v0.t
+; CHECK-NEXT: lui a0, 307200
+; CHECK-NEXT: fmv.w.x fa5, a0
+; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu
+; CHECK-NEXT: vmflt.vf v0, v8, fa5, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
+; CHECK-NEXT: vfcvt.x.f.v v8, v9, v0.t
+; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu
+; CHECK-NEXT: vfsgnj.vv v9, v8, v9, v0.t
+; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9
+; CHECK-NEXT: ret
+ %v = call <vscale x 1 x bfloat> @llvm.vp.rint.nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x i1> %m, i32 %evl)
+ ret <vscale x 1 x bfloat> %v
+}
+
+define <vscale x 1 x bfloat> @vp_rint_nxv1bf16_unmasked(<vscale x 1 x bfloat> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vp_rint_nxv1bf16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8
+; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-NEXT: vfabs.v v8, v9
+; CHECK-NEXT: lui a0, 307200
+; CHECK-NEXT: fmv.w.x fa5, a0
+; CHECK-NEXT: vmflt.vf v0, v8, fa5
+; CHECK-NEXT: vfcvt.x.f.v v8, v9, v0.t
+; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu
+; CHECK-NEXT: vfsgnj.vv v9, v8, v9, v0.t
+; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9
+; CHECK-NEXT: ret
+ %v = call <vscale x 1 x bfloat> @llvm.vp.rint.nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x i1> splat (i1 true), i32 %evl)
+ ret <vscale x 1 x bfloat> %v
+}
+
+declare <vscale x 2 x bfloat> @llvm.vp.rint.nxv2bf16(<vscale x 2 x bfloat>, <vscale x 2 x i1>, i32)
+
+define <vscale x 2 x bfloat> @vp_rint_nxv2bf16(<vscale x 2 x bfloat> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vp_rint_nxv2bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, zero, e16, mf2, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8
+; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-NEXT: vfabs.v v8, v9, v0.t
+; CHECK-NEXT: lui a0, 307200
+; CHECK-NEXT: fmv.w.x fa5, a0
+; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu
+; CHECK-NEXT: vmflt.vf v0, v8, fa5, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; CHECK-NEXT: vfcvt.x.f.v v8, v9, v0.t
+; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu
+; CHECK-NEXT: vfsgnj.vv v9, v8, v9, v0.t
+; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9
+; CHECK-NEXT: ret
+ %v = call <vscale x 2 x bfloat> @llvm.vp.rint.nxv2bf16(<vscale x 2 x bfloat> %va, <vscale x 2 x i1> %m, i32 %evl)
+ ret <vscale x 2 x bfloat> %v
+}
+
+define <vscale x 2 x bfloat> @vp_rint_nxv2bf16_unmasked(<vscale x 2 x bfloat> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vp_rint_nxv2bf16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, zero, e16, mf2, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8
+; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-NEXT: vfabs.v v8, v9
+; CHECK-NEXT: lui a0, 307200
+; CHECK-NEXT: fmv.w.x fa5, a0
+; CHECK-NEXT: vmflt.vf v0, v8, fa5
+; CHECK-NEXT: vfcvt.x.f.v v8, v9, v0.t
+; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu
+; CHECK-NEXT: vfsgnj.vv v9, v8, v9, v0.t
+; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9
+; CHECK-NEXT: ret
+ %v = call <vscale x 2 x bfloat> @llvm.vp.rint.nxv2bf16(<vscale x 2 x bfloat> %va, <vscale x 2 x i1> splat (i1 true), i32 %evl)
+ ret <vscale x 2 x bfloat> %v
+}
+
+declare <vscale x 4 x bfloat> @llvm.vp.rint.nxv4bf16(<vscale x 4 x bfloat>, <vscale x 4 x i1>, i32)
+
+define <vscale x 4 x bfloat> @vp_rint_nxv4bf16(<vscale x 4 x bfloat> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vp_rint_nxv4bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vmv1r.v v9, v0
+; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8
+; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; CHECK-NEXT: vfabs.v v12, v10, v0.t
+; CHECK-NEXT: lui a0, 307200
+; CHECK-NEXT: fmv.w.x fa5, a0
+; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu
+; CHECK-NEXT: vmflt.vf v9, v12, fa5, v0.t
+; CHECK-NEXT: vmv1r.v v0, v9
+; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; CHECK-NEXT: vfcvt.x.f.v v12, v10, v0.t
+; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu
+; CHECK-NEXT: vfsgnj.vv v10, v12, v10, v0.t
+; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10
+; CHECK-NEXT: ret
+ %v = call <vscale x 4 x bfloat> @llvm.vp.rint.nxv4bf16(<vscale x 4 x bfloat> %va, <vscale x 4 x i1> %m, i32 %evl)
+ ret <vscale x 4 x bfloat> %v
+}
+
+define <vscale x 4 x bfloat> @vp_rint_nxv4bf16_unmasked(<vscale x 4 x bfloat> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vp_rint_nxv4bf16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8
+; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; CHECK-NEXT: vfabs.v v8, v10
+; CHECK-NEXT: lui a0, 307200
+; CHECK-NEXT: fmv.w.x fa5, a0
+; CHECK-NEXT: vmflt.vf v0, v8, fa5
+; CHECK-NEXT: vfcvt.x.f.v v8, v10, v0.t
+; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu
+; CHECK-NEXT: vfsgnj.vv v10, v8, v10, v0.t
+; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10
+; CHECK-NEXT: ret
+ %v = call <vscale x 4 x bfloat> @llvm.vp.rint.nxv4bf16(<vscale x 4 x bfloat> %va, <vscale x 4 x i1> splat (i1 true), i32 %evl)
+ ret <vscale x 4 x bfloat> %v
+}
+
+declare <vscale x 8 x bfloat> @llvm.vp.rint.nxv8bf16(<vscale x 8 x bfloat>, <vscale x 8 x i1>, i32)
+
+define <vscale x 8 x bfloat> @vp_rint_nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vp_rint_nxv8bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vmv1r.v v10, v0
+; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8
+; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-NEXT: vfabs.v v16, v12, v0.t
+; CHECK-NEXT: lui a0, 307200
+; CHECK-NEXT: fmv.w.x fa5, a0
+; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu
+; CHECK-NEXT: vmflt.vf v10, v16, fa5, v0.t
+; CHECK-NEXT: vmv1r.v v0, v10
+; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; CHECK-NEXT: vfcvt.x.f.v v16, v12, v0.t
+; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu
+; CHECK-NEXT: vfsgnj.vv v12, v16, v12, v0.t
+; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12
+; CHECK-NEXT: ret
+ %v = call <vscale x 8 x bfloat> @llvm.vp.rint.nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x i1> %m, i32 %evl)
+ ret <vscale x 8 x bfloat> %v
+}
+
+define <vscale x 8 x bfloat> @vp_rint_nxv8bf16_unmasked(<vscale x 8 x bfloat> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vp_rint_nxv8bf16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8
+; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-NEXT: vfabs.v v8, v12
+; CHECK-NEXT: lui a0, 307200
+; CHECK-NEXT: fmv.w.x fa5, a0
+; CHECK-NEXT: vmflt.vf v0, v8, fa5
+; CHECK-NEXT: vfcvt.x.f.v v8, v12, v0.t
+; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu
+; CHECK-NEXT: vfsgnj.vv v12, v8, v12, v0.t
+; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12
+; CHECK-NEXT: ret
+ %v = call <vscale x 8 x bfloat> @llvm.vp.rint.nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x i1> splat (i1 true), i32 %evl)
+ ret <vscale x 8 x bfloat> %v
+}
+
+declare <vscale x 16 x bfloat> @llvm.vp.rint.nxv16bf16(<vscale x 16 x bfloat>, <vscale x 16 x i1>, i32)
+
+define <vscale x 16 x bfloat> @vp_rint_nxv16bf16(<vscale x 16 x bfloat> %va, <vscale x 16 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vp_rint_nxv16bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vmv1r.v v12, v0
+; CHECK-NEXT: vsetvli a1, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8
+; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma
+; CHECK-NEXT: vfabs.v v24, v16, v0.t
+; CHECK-NEXT: lui a0, 307200
+; CHECK-NEXT: fmv.w.x fa5, a0
+; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu
+; CHECK-NEXT: vmflt.vf v12, v24, fa5, v0.t
+; CHECK-NEXT: vmv1r.v v0, v12
+; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t
+; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu
+; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t
+; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16
+; CHECK-NEXT: ret
+ %v = call <vscale x 16 x bfloat> @llvm.vp.rint.nxv16bf16(<vscale x 16 x bfloat> %va, <vscale x 16 x i1> %m, i32 %evl)
+ ret <vscale x 16 x bfloat> %v
+}
+
+define <vscale x 16 x bfloat> @vp_rint_nxv16bf16_unmasked(<vscale x 16 x bfloat> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vp_rint_nxv16bf16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8
+; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma
+; CHECK-NEXT: vfabs.v v8, v16
+; CHECK-NEXT: lui a0, 307200
+; CHECK-NEXT: fmv.w.x fa5, a0
+; CHECK-NEXT: vmflt.vf v0, v8, fa5
+; CHECK-NEXT: vfcvt.x.f.v v8, v16, v0.t
+; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu
+; CHECK-NEXT: vfsgnj.vv v16, v8, v16, v0.t
+; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16
+; CHECK-NEXT: ret
+ %v = call <vscale x 16 x bfloat> @llvm.vp.rint.nxv16bf16(<vscale x 16 x bfloat> %va, <vscale x 16 x i1> splat (i1 true), i32 %evl)
+ ret <vscale x 16 x bfloat> %v
+}
+
+declare <vscale x 32 x bfloat> @llvm.vp.rint.nxv32bf16(<vscale x 32 x bfloat>, <vscale x 32 x i1>, i32)
+
+define <vscale x 32 x bfloat> @vp_rint_nxv32bf16(<vscale x 32 x bfloat> %va, <vscale x 32 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vp_rint_nxv32bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi sp, sp, -16
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: csrr a1, vlenb
+; CHECK-NEXT: slli a1, a1, 3
+; CHECK-NEXT: sub sp, sp, a1
+; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
+; CHECK-NEXT: vmv1r.v v16, v0
+; CHECK-NEXT: csrr a2, vlenb
+; CHECK-NEXT: slli a1, a2, 1
+; CHECK-NEXT: sub a3, a0, a1
+; CHECK-NEXT: sltu a4, a0, a3
+; CHECK-NEXT: addi a4, a4, -1
+; CHECK-NEXT: and a3, a4, a3
+; CHECK-NEXT: srli a2, a2, 2
+; CHECK-NEXT: vsetvli a4, zero, e8, mf2, ta, ma
+; CHECK-NEXT: vslidedown.vx v17, v0, a2
+; CHECK-NEXT: addi a2, sp, 16
+; CHECK-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill
+; CHECK-NEXT: vsetvli a2, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v12
+; CHECK-NEXT: vmv1r.v v0, v17
+; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, ma
+; CHECK-NEXT: vfabs.v v8, v24, v0.t
+; CHECK-NEXT: lui a2, 307200
+; CHECK-NEXT: fmv.w.x fa5, a2
+; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu
+; CHECK-NEXT: vmflt.vf v17, v8, fa5, v0.t
+; CHECK-NEXT: vmv1r.v v0, v17
+; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; CHECK-NEXT: vfcvt.x.f.v v8, v24, v0.t
+; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu
+; CHECK-NEXT: vfsgnj.vv v24, v8, v24, v0.t
+; CHECK-NEXT: vsetvli a2, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v12, v24
+; CHECK-NEXT: bltu a0, a1, .LBB10_2
+; CHECK-NEXT: # %bb.1:
+; CHECK-NEXT: mv a0, a1
+; CHECK-NEXT: .LBB10_2:
+; CHECK-NEXT: addi a1, sp, 16
+; CHECK-NEXT: vl8r.v v0, (a1) # Unknown-size Folded Reload
+; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v0
+; CHECK-NEXT: vmv1r.v v0, v16
+; CHECK-NEXT: vmv1r.v v8, v16
+; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma
+; CHECK-NEXT: vfabs.v v16, v24, v0.t
+; CHECK-NEXT: addi a0, sp, 16
+; CHECK-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill
+; CHECK-NEXT: vmv1r.v v0, v8
+; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
+; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu
+; CHECK-NEXT: vmflt.vf v8, v16, fa5, v0.t
+; CHECK-NEXT: vmv1r.v v0, v8
+; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; CHECK-NEXT: vfcvt.x.f.v v16, v24, v0.t
+; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu
+; CHECK-NEXT: vfsgnj.vv v24, v16, v24, v0.t
+; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v24
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: slli a0, a0, 3
+; CHECK-NEXT: add sp, sp, a0
+; CHECK-NEXT: addi sp, sp, 16
+; CHECK-NEXT: ret
+ %v = call <vscale x 32 x bfloat> @llvm.vp.rint.nxv32bf16(<vscale x 32 x bfloat> %va, <vscale x 32 x i1> %m, i32 %evl)
+ ret <vscale x 32 x bfloat> %v
+}
+
+define <vscale x 32 x bfloat> @vp_rint_nxv32bf16_unmasked(<vscale x 32 x bfloat> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vp_rint_nxv32bf16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi sp, sp, -16
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: csrr a1, vlenb
+; CHECK-NEXT: slli a1, a1, 3
+; CHECK-NEXT: sub sp, sp, a1
+; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
+; CHECK-NEXT: csrr a2, vlenb
+; CHECK-NEXT: slli a1, a2, 1
+; CHECK-NEXT: sub a3, a0, a1
+; CHECK-NEXT: sltu a4, a0, a3
+; CHECK-NEXT: addi a4, a4, -1
+; CHECK-NEXT: and a3, a4, a3
+; CHECK-NEXT: srli a2, a2, 2
+; CHECK-NEXT: vsetvli a4, zero, e8, m4, ta, ma
+; CHECK-NEXT: vmset.m v16
+; CHECK-NEXT: vsetvli a4, zero, e8, mf2, ta, ma
+; CHECK-NEXT: vslidedown.vx v16, v16, a2
+; CHECK-NEXT: addi a2, sp, 16
+; CHECK-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill
+; CHECK-NEXT: vsetvli a2, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v12
+; CHECK-NEXT: vmv1r.v v0, v16
+; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, ma
+; CHECK-NEXT: vfabs.v v8, v24, v0.t
+; CHECK-NEXT: lui a2, 307200
+; CHECK-NEXT: fmv.w.x fa5, a2
+; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu
+; CHECK-NEXT: vmflt.vf v16, v8, fa5, v0.t
+; CHECK-NEXT: vmv1r.v v0, v16
+; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; CHECK-NEXT: vfcvt.x.f.v v8, v24, v0.t
+; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu
+; CHECK-NEXT: vfsgnj.vv v24, v8, v24, v0.t
+; CHECK-NEXT: vsetvli a2, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v12, v24
+; CHECK-NEXT: bltu a0, a1, .LBB11_2
+; CHECK-NEXT: # %bb.1:
+; CHECK-NEXT: mv a0, a1
+; CHECK-NEXT: .LBB11_2:
+; CHECK-NEXT: addi a1, sp, 16
+; CHECK-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v24
+; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma
+; CHECK-NEXT: vfabs.v v24, v16
+; CHECK-NEXT: vmflt.vf v0, v24, fa5
+; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t
+; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu
+; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t
+; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: slli a0, a0, 3
+; CHECK-NEXT: add sp, sp, a0
+; CHECK-NEXT: addi sp, sp, 16
+; CHECK-NEXT: ret
+ %v = call <vscale x 32 x bfloat> @llvm.vp.rint.nxv32bf16(<vscale x 32 x bfloat> %va, <vscale x 32 x i1> splat (i1 true), i32 %evl)
+ ret <vscale x 32 x bfloat> %v
+}
declare <vscale x 1 x half> @llvm.vp.rint.nxv1f16(<vscale x 1 x half>, <vscale x 1 x i1>, i32)
define <vscale x 1 x half> @vp_rint_nxv1f16(<vscale x 1 x half> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) {
; ZVFH-LABEL: vp_rint_nxv1f16:
; ZVFH: # %bb.0:
-; ZVFH-NEXT: lui a1, %hi(.LCPI0_0)
-; ZVFH-NEXT: flh fa5, %lo(.LCPI0_0)(a1)
+; ZVFH-NEXT: lui a1, %hi(.LCPI12_0)
+; ZVFH-NEXT: flh fa5, %lo(.LCPI12_0)(a1)
; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
; ZVFH-NEXT: vfabs.v v9, v8, v0.t
; ZVFH-NEXT: vsetvli zero, zero, e16, mf4, ta, mu
@@ -51,8 +428,8 @@ define <vscale x 1 x half> @vp_rint_nxv1f16(<vscale x 1 x half> %va, <vscale x 1
define <vscale x 1 x half> @vp_rint_nxv1f16_unmasked(<vscale x 1 x half> %va, i32 zeroext %evl) {
; ZVFH-LABEL: vp_rint_nxv1f16_unmasked:
; ZVFH: # %bb.0:
-; ZVFH-NEXT: lui a1, %hi(.LCPI1_0)
-; ZVFH-NEXT: flh fa5, %lo(.LCPI1_0)(a1)
+; ZVFH-NEXT: lui a1, %hi(.LCPI13_0)
+; ZVFH-NEXT: flh fa5, %lo(.LCPI13_0)(a1)
; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
; ZVFH-NEXT: vfabs.v v9, v8
; ZVFH-NEXT: vmflt.vf v0, v9, fa5
@@ -87,8 +464,8 @@ declare <vscale x 2 x half> @llvm.vp.rint.nxv2f16(<vscale x 2 x half>, <vscale x
define <vscale x 2 x half> @vp_rint_nxv2f16(<vscale x 2 x half> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) {
; ZVFH-LABEL: vp_rint_nxv2f16:
; ZVFH: # %bb.0:
-; ZVFH-NEXT: lui a1, %hi(.LCPI2_0)
-; ZVFH-NEXT: flh fa5, %lo(.LCPI2_0)(a1)
+; ZVFH-NEXT: lui a1, %hi(.LCPI14_0)
+; ZVFH-NEXT: flh fa5, %lo(.LCPI14_0)(a1)
; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
; ZVFH-NEXT: vfabs.v v9, v8, v0.t
; ZVFH-NEXT: vsetvli zero, zero, e16, mf2, ta, mu
@@ -125,8 +502,8 @@ define <vscale x 2 x half> @vp_rint_nxv2f16(<vscale x 2 x half> %va, <vscale x 2
define <vscale x 2 x half> @vp_rint_nxv2f16_unmasked(<vscale x 2 x half> %va, i32 zeroext %evl) {
; ZVFH-LABEL: vp_rint_nxv2f16_unmasked:
; ZVFH: # %bb.0:
-; ZVFH-NEXT: lui a1, %hi(.LCPI3_0)
-; ZVFH-NEXT: flh fa5, %lo(.LCPI3_0)(a1)
+; ZVFH-NEXT: lui a1, %hi(.LCPI15_0)
+; ZVFH-NEXT: flh fa5, %lo(.LCPI15_0)(a1)
; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
; ZVFH-NEXT: vfabs.v v9, v8
; ZVFH-NEXT: vmflt.vf v0, v9, fa5
@@ -161,8 +538,8 @@ declare <vscale x 4 x half> @llvm.vp.rint.nxv4f16(<vscale x 4 x half>, <vscale x
define <vscale x 4 x half> @vp_rint_nxv4f16(<vscale x 4 x half> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) {
; ZVFH-LABEL: vp_rint_nxv4f16:
; ZVFH: # %bb.0:
-; ZVFH-NEXT: lui a1, %hi(.LCPI4_0)
-; ZVFH-NEXT: flh fa5, %lo(.LCPI4_0)(a1)
+; ZVFH-NEXT: lui a1, %hi(.LCPI16_0)
+; ZVFH-NEXT: flh fa5, %lo(.LCPI16_0)(a1)
; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma
; ZVFH-NEXT: vfabs.v v9, v8, v0.t
; ZVFH-NEXT: vsetvli zero, zero, e16, m1, ta, mu
@@ -201,8 +578,8 @@ define <vscale x 4 x half> @vp_rint_nxv4f16(<vscale x 4 x half> %va, <vscale x 4
define <vscale x 4 x half> @vp_rint_nxv4f16_unmasked(<vscale x 4 x half> %va, i32 zeroext %evl) {
; ZVFH-LABEL: vp_rint_nxv4f16_unmasked:
; ZVFH: # %bb.0:
-; ZVFH-NEXT: lui a1, %hi(.LCPI5_0)
-; ZVFH-NEXT: flh fa5, %lo(.LCPI5_0)(a1)
+; ZVFH-NEXT: lui a1, %hi(.LCPI17_0)
+; ZVFH-NEXT: flh fa5, %lo(.LCPI17_0)(a1)
; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma
; ZVFH-NEXT: vfabs.v v9, v8
; ZVFH-NEXT: vmflt.vf v0, v9, fa5
@@ -237,8 +614,8 @@ declare <vscale x 8 x half> @llvm.vp.rint.nxv8f16(<vscale x 8 x half>, <vscale x
define <vscale x 8 x half> @vp_rint_nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) {
; ZVFH-LABEL: vp_rint_nxv8f16:
; ZVFH: # %bb.0:
-; ZVFH-NEXT: lui a1, %hi(.LCPI6_0)
-; ZVFH-NEXT: flh fa5, %lo(.LCPI6_0)(a1)
+; ZVFH-NEXT: lui a1, %hi(.LCPI18_0)
+; ZVFH-NEXT: flh fa5, %lo(.LCPI18_0)(a1)
; ZVFH-NEXT: vmv1r.v v10, v0
; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma
; ZVFH-NEXT: vfabs.v v12, v8, v0.t
@@ -279,8 +656,8 @@ define <vscale x 8 x half> @vp_rint_nxv8f16(<vscale x 8 x half> %va, <vscale x 8
define <vscale x 8 x half> @vp_rint_nxv8f16_unmasked(<vscale x 8 x half> %va, i32 zeroext %evl) {
; ZVFH-LABEL: vp_rint_nxv8f16_unmasked:
; ZVFH: # %bb.0:
-; ZVFH-NEXT: lui a1, %hi(.LCPI7_0)
-; ZVFH-NEXT: flh fa5, %lo(.LCPI7_0)(a1)
+; ZVFH-NEXT: lui a1, %hi(.LCPI19_0)
+; ZVFH-NEXT: flh fa5, %lo(.LCPI19_0)(a1)
; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma
; ZVFH-NEXT: vfabs.v v10, v8
; ZVFH-NEXT: vmflt.vf v0, v10, fa5
@@ -315,8 +692,8 @@ declare <vscale x 16 x half> @llvm.vp.rint.nxv16f16(<vscale x 16 x half>, <vscal
define <vscale x 16 x half> @vp_rint_nxv16f16(<vscale x 16 x half> %va, <vscale x 16 x i1> %m, i32 zeroext %evl) {
; ZVFH-LABEL: vp_rint_nxv16f16:
; ZVFH: # %bb.0:
-; ZVFH-NEXT: lui a1, %hi(.LCPI8_0)
-; ZVFH-NEXT: flh fa5, %lo(.LCPI8_0)(a1)
+; ZVFH-NEXT: lui a1, %hi(.LCPI20_0)
+; ZVFH-NEXT: flh fa5, %lo(.LCPI20_0)(a1)
; ZVFH-NEXT: vmv1r.v v12, v0
; ZVFH-NEXT: vsetvli zero, a0, e16, m4, ta, ma
; ZVFH-NEXT: vfabs.v v16, v8, v0.t
@@ -357,8 +734,8 @@ define <vscale x 16 x half> @vp_rint_nxv16f16(<vscale x 16 x half> %va, <vscale
define <vscale x 16 x half> @vp_rint_nxv16f16_unmasked(<vscale x 16 x half> %va, i32 zeroext %evl) {
; ZVFH-LABEL: vp_rint_nxv16f16_unmasked:
; ZVFH: # %bb.0:
-; ZVFH-NEXT: lui a1, %hi(.LCPI9_0)
-; ZVFH-NEXT: flh fa5, %lo(.LCPI9_0)(a1)
+; ZVFH-NEXT: lui a1, %hi(.LCPI21_0)
+; ZVFH-NEXT: flh fa5, %lo(.LCPI21_0)(a1)
; ZVFH-NEXT: vsetvli zero, a0, e16, m4, ta, ma
; ZVFH-NEXT: vfabs.v v12, v8
; ZVFH-NEXT: vmflt.vf v0, v12, fa5
@@ -393,8 +770,8 @@ declare <vscale x 32 x half> @llvm.vp.rint.nxv32f16(<vscale x 32 x half>, <vscal
define <vscale x 32 x half> @vp_rint_nxv32f16(<vscale x 32 x half> %va, <vscale x 32 x i1> %m, i32 zeroext %evl) {
; ZVFH-LABEL: vp_rint_nxv32f16:
; ZVFH: # %bb.0:
-; ZVFH-NEXT: lui a1, %hi(.LCPI10_0)
-; ZVFH-NEXT: flh fa5, %lo(.LCPI10_0)(a1)
+; ZVFH-NEXT: lui a1, %hi(.LCPI22_0)
+; ZVFH-NEXT: flh fa5, %lo(.LCPI22_0)(a1)
; ZVFH-NEXT: vmv1r.v v16, v0
; ZVFH-NEXT: vsetvli zero, a0, e16, m8, ta, ma
; ZVFH-NEXT: vfabs.v v24, v8, v0.t
@@ -445,10 +822,10 @@ define <vscale x 32 x half> @vp_rint_nxv32f16(<vscale x 32 x half> %va, <vscale
; ZVFHMIN-NEXT: vfsgnj.vv v24, v8, v24, v0.t
; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v24
-; ZVFHMIN-NEXT: bltu a0, a1, .LBB10_2
+; ZVFHMIN-NEXT: bltu a0, a1, .LBB22_2
; ZVFHMIN-NEXT: # %bb.1:
; ZVFHMIN-NEXT: mv a0, a1
-; ZVFHMIN-NEXT: .LBB10_2:
+; ZVFHMIN-NEXT: .LBB22_2:
; ZVFHMIN-NEXT: addi a1, sp, 16
; ZVFHMIN-NEXT: vl8r.v v0, (a1) # Unknown-size Folded Reload
; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v0
@@ -482,8 +859,8 @@ define <vscale x 32 x half> @vp_rint_nxv32f16(<vscale x 32 x half> %va, <vscale
define <vscale x 32 x half> @vp_rint_nxv32f16_unmasked(<vscale x 32 x half> %va, i32 zeroext %evl) {
; ZVFH-LABEL: vp_rint_nxv32f16_unmasked:
; ZVFH: # %bb.0:
-; ZVFH-NEXT: lui a1, %hi(.LCPI11_0)
-; ZVFH-NEXT: flh fa5, %lo(.LCPI11_0)(a1)
+; ZVFH-NEXT: lui a1, %hi(.LCPI23_0)
+; ZVFH-NEXT: flh fa5, %lo(.LCPI23_0)(a1)
; ZVFH-NEXT: vsetvli zero, a0, e16, m8, ta, ma
; ZVFH-NEXT: vfabs.v v16, v8
; ZVFH-NEXT: vmflt.vf v0, v16, fa5
@@ -531,10 +908,10 @@ define <vscale x 32 x half> @vp_rint_nxv32f16_unmasked(<vscale x 32 x half> %va,
; ZVFHMIN-NEXT: vfsgnj.vv v24, v8, v24, v0.t
; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v24
-; ZVFHMIN-NEXT: bltu a0, a1, .LBB11_2
+; ZVFHMIN-NEXT: bltu a0, a1, .LBB23_2
; ZVFHMIN-NEXT: # %bb.1:
; ZVFHMIN-NEXT: mv a0, a1
-; ZVFHMIN-NEXT: .LBB11_2:
+; ZVFHMIN-NEXT: .LBB23_2:
; ZVFHMIN-NEXT: addi a1, sp, 16
; ZVFHMIN-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload
; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v24
@@ -757,8 +1134,8 @@ declare <vscale x 1 x double> @llvm.vp.rint.nxv1f64(<vscale x 1 x double>, <vsca
define <vscale x 1 x double> @vp_rint_nxv1f64(<vscale x 1 x double> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_rint_nxv1f64:
; CHECK: # %bb.0:
-; CHECK-NEXT: lui a1, %hi(.LCPI22_0)
-; CHECK-NEXT: fld fa5, %lo(.LCPI22_0)(a1)
+; CHECK-NEXT: lui a1, %hi(.LCPI34_0)
+; CHECK-NEXT: fld fa5, %lo(.LCPI34_0)(a1)
; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma
; CHECK-NEXT: vfabs.v v9, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu
@@ -776,8 +1153,8 @@ define <vscale x 1 x double> @vp_rint_nxv1f64(<vscale x 1 x double> %va, <vscale
define <vscale x 1 x double> @vp_rint_nxv1f64_unmasked(<vscale x 1 x double> %va, i32 zeroext %evl) {
; CHECK-LABEL: vp_rint_nxv1f64_unmasked:
; CHECK: # %bb.0:
-; CHECK-NEXT: lui a1, %hi(.LCPI23_0)
-; CHECK-NEXT: fld fa5, %lo(.LCPI23_0)(a1)
+; CHECK-NEXT: lui a1, %hi(.LCPI35_0)
+; CHECK-NEXT: fld fa5, %lo(.LCPI35_0)(a1)
; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma
; CHECK-NEXT: vfabs.v v9, v8
; CHECK-NEXT: vmflt.vf v0, v9, fa5
@@ -795,8 +1172,8 @@ declare <vscale x 2 x double> @llvm.vp.rint.nxv2f64(<vscale x 2 x double>, <vsca
define <vscale x 2 x double> @vp_rint_nxv2f64(<vscale x 2 x double> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_rint_nxv2f64:
; CHECK: # %bb.0:
-; CHECK-NEXT: lui a1, %hi(.LCPI24_0)
-; CHECK-NEXT: fld fa5, %lo(.LCPI24_0)(a1)
+; CHECK-NEXT: lui a1, %hi(.LCPI36_0)
+; CHECK-NEXT: fld fa5, %lo(.LCPI36_0)(a1)
; CHECK-NEXT: vmv1r.v v10, v0
; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma
; CHECK-NEXT: vfabs.v v12, v8, v0.t
@@ -816,8 +1193,8 @@ define <vscale x 2 x double> @vp_rint_nxv2f64(<vscale x 2 x double> %va, <vscale
define <vscale x 2 x double> @vp_rint_nxv2f64_unmasked(<vscale x 2 x double> %va, i32 zeroext %evl) {
; CHECK-LABEL: vp_rint_nxv2f64_unmasked:
; CHECK: # %bb.0:
-; CHECK-NEXT: lui a1, %hi(.LCPI25_0)
-; CHECK-NEXT: fld fa5, %lo(.LCPI25_0)(a1)
+; CHECK-NEXT: lui a1, %hi(.LCPI37_0)
+; CHECK-NEXT: fld fa5, %lo(.LCPI37_0)(a1)
; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma
; CHECK-NEXT: vfabs.v v10, v8
; CHECK-NEXT: vmflt.vf v0, v10, fa5
@@ -835,8 +1212,8 @@ declare <vscale x 4 x double> @llvm.vp.rint.nxv4f64(<vscale x 4 x double>, <vsca
define <vscale x 4 x double> @vp_rint_nxv4f64(<vscale x 4 x double> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_rint_nxv4f64:
; CHECK: # %bb.0:
-; CHECK-NEXT: lui a1, %hi(.LCPI26_0)
-; CHECK-NEXT: fld fa5, %lo(.LCPI26_0)(a1)
+; CHECK-NEXT: lui a1, %hi(.LCPI38_0)
+; CHECK-NEXT: fld fa5, %lo(.LCPI38_0)(a1)
; CHECK-NEXT: vmv1r.v v12, v0
; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma
; CHECK-NEXT: vfabs.v v16, v8, v0.t
@@ -856,8 +1233,8 @@ define <vscale x 4 x double> @vp_rint_nxv4f64(<vscale x 4 x double> %va, <vscale
define <vscale x 4 x double> @vp_rint_nxv4f64_unmasked(<vscale x 4 x double> %va, i32 zeroext %evl) {
; CHECK-LABEL: vp_rint_nxv4f64_unmasked:
; CHECK: # %bb.0:
-; CHECK-NEXT: lui a1, %hi(.LCPI27_0)
-; CHECK-NEXT: fld fa5, %lo(.LCPI27_0)(a1)
+; CHECK-NEXT: lui a1, %hi(.LCPI39_0)
+; CHECK-NEXT: fld fa5, %lo(.LCPI39_0)(a1)
; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma
; CHECK-NEXT: vfabs.v v12, v8
; CHECK-NEXT: vmflt.vf v0, v12, fa5
@@ -875,8 +1252,8 @@ declare <vscale x 7 x double> @llvm.vp.rint.nxv7f64(<vscale x 7 x double>, <vsca
define <vscale x 7 x double> @vp_rint_nxv7f64(<vscale x 7 x double> %va, <vscale x 7 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_rint_nxv7f64:
; CHECK: # %bb.0:
-; CHECK-NEXT: lui a1, %hi(.LCPI28_0)
-; CHECK-NEXT: fld fa5, %lo(.LCPI28_0)(a1)
+; CHECK-NEXT: lui a1, %hi(.LCPI40_0)
+; CHECK-NEXT: fld fa5, %lo(.LCPI40_0)(a1)
; CHECK-NEXT: vmv1r.v v16, v0
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; CHECK-NEXT: vfabs.v v24, v8, v0.t
@@ -896,8 +1273,8 @@ define <vscale x 7 x double> @vp_rint_nxv7f64(<vscale x 7 x double> %va, <vscale
define <vscale x 7 x double> @vp_rint_nxv7f64_unmasked(<vscale x 7 x double> %va, i32 zeroext %evl) {
; CHECK-LABEL: vp_rint_nxv7f64_unmasked:
; CHECK: # %bb.0:
-; CHECK-NEXT: lui a1, %hi(.LCPI29_0)
-; CHECK-NEXT: fld fa5, %lo(.LCPI29_0)(a1)
+; CHECK-NEXT: lui a1, %hi(.LCPI41_0)
+; CHECK-NEXT: fld fa5, %lo(.LCPI41_0)(a1)
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; CHECK-NEXT: vfabs.v v16, v8
; CHECK-NEXT: vmflt.vf v0, v16, fa5
@@ -915,8 +1292,8 @@ declare <vscale x 8 x double> @llvm.vp.rint.nxv8f64(<vscale x 8 x double>, <vsca
define <vscale x 8 x double> @vp_rint_nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_rint_nxv8f64:
; CHECK: # %bb.0:
-; CHECK-NEXT: lui a1, %hi(.LCPI30_0)
-; CHECK-NEXT: fld fa5, %lo(.LCPI30_0)(a1)
+; CHECK-NEXT: lui a1, %hi(.LCPI42_0)
+; CHECK-NEXT: fld fa5, %lo(.LCPI42_0)(a1)
; CHECK-NEXT: vmv1r.v v16, v0
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; CHECK-NEXT: vfabs.v v24, v8, v0.t
@@ -936,8 +1313,8 @@ define <vscale x 8 x double> @vp_rint_nxv8f64(<vscale x 8 x double> %va, <vscale
define <vscale x 8 x double> @vp_rint_nxv8f64_unmasked(<vscale x 8 x double> %va, i32 zeroext %evl) {
; CHECK-LABEL: vp_rint_nxv8f64_unmasked:
; CHECK: # %bb.0:
-; CHECK-NEXT: lui a1, %hi(.LCPI31_0)
-; CHECK-NEXT: fld fa5, %lo(.LCPI31_0)(a1)
+; CHECK-NEXT: lui a1, %hi(.LCPI43_0)
+; CHECK-NEXT: fld fa5, %lo(.LCPI43_0)(a1)
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; CHECK-NEXT: vfabs.v v16, v8
; CHECK-NEXT: vmflt.vf v0, v16, fa5
@@ -968,8 +1345,8 @@ define <vscale x 16 x double> @vp_rint_nxv16f64(<vscale x 16 x double> %va, <vsc
; CHECK-NEXT: vsetvli a3, zero, e8, mf4, ta, ma
; CHECK-NEXT: vslidedown.vx v6, v0, a2
; CHECK-NEXT: sub a2, a0, a1
-; CHECK-NEXT: lui a3, %hi(.LCPI32_0)
-; CHECK-NEXT: fld fa5, %lo(.LCPI32_0)(a3)
+; CHECK-NEXT: lui a3, %hi(.LCPI44_0)
+; CHECK-NEXT: fld fa5, %lo(.LCPI44_0)(a3)
; CHECK-NEXT: sltu a3, a0, a2
; CHECK-NEXT: addi a3, a3, -1
; CHECK-NEXT: and a2, a3, a2
@@ -987,10 +1364,10 @@ define <vscale x 16 x double> @vp_rint_nxv16f64(<vscale x 16 x double> %va, <vsc
; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t
-; CHECK-NEXT: bltu a0, a1, .LBB32_2
+; CHECK-NEXT: bltu a0, a1, .LBB44_2
; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: mv a0, a1
-; CHECK-NEXT: .LBB32_2:
+; CHECK-NEXT: .LBB44_2:
; CHECK-NEXT: vmv1r.v v0, v7
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; CHECK-NEXT: vfabs.v v24, v8, v0.t
@@ -1016,8 +1393,8 @@ define <vscale x 16 x double> @vp_rint_nxv16f64_unmasked(<vscale x 16 x double>
; CHECK: # %bb.0:
; CHECK-NEXT: csrr a1, vlenb
; CHECK-NEXT: sub a2, a0, a1
-; CHECK-NEXT: lui a3, %hi(.LCPI33_0)
-; CHECK-NEXT: fld fa5, %lo(.LCPI33_0)(a3)
+; CHECK-NEXT: lui a3, %hi(.LCPI45_0)
+; CHECK-NEXT: fld fa5, %lo(.LCPI45_0)(a3)
; CHECK-NEXT: sltu a3, a0, a2
; CHECK-NEXT: addi a3, a3, -1
; CHECK-NEXT: and a2, a3, a2
@@ -1028,10 +1405,10 @@ define <vscale x 16 x double> @vp_rint_nxv16f64_unmasked(<vscale x 16 x double>
; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t
-; CHECK-NEXT: bltu a0, a1, .LBB33_2
+; CHECK-NEXT: bltu a0, a1, .LBB45_2
; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: mv a0, a1
-; CHECK-NEXT: .LBB33_2:
+; CHECK-NEXT: .LBB45_2:
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; CHECK-NEXT: vfabs.v v24, v8
; CHECK-NEXT: vmflt.vf v0, v24, fa5
diff --git a/llvm/test/CodeGen/RISCV/rvv/round-vp.ll b/llvm/test/CodeGen/RISCV/rvv/round-vp.ll
index 1ddadcc49373..a4936483e8a1 100644
--- a/llvm/test/CodeGen/RISCV/rvv/round-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/round-vp.ll
@@ -1,20 +1,428 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v -target-abi=ilp32d \
-; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH
-; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -target-abi=lp64d \
-; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH
-; RUN: llc -mtriple=riscv32 -mattr=+d,+zfhmin,+zvfhmin,+v -target-abi=ilp32d \
-; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN
-; RUN: llc -mtriple=riscv64 -mattr=+d,+zfhmin,+zvfhmin,+v -target-abi=lp64d \
-; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+zfbfmin,+zvfbfmin,+v \
+; RUN: -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s \
+; RUN: --check-prefixes=CHECK,ZVFH
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+zfbfmin,+zvfbfmin,+v \
+; RUN: -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \
+; RUN: --check-prefixes=CHECK,ZVFH
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin,+v \
+; RUN: -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s \
+; RUN: --check-prefixes=CHECK,ZVFHMIN
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin,+v \
+; RUN: -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \
+; RUN: --check-prefixes=CHECK,ZVFHMIN
+declare <vscale x 1 x bfloat> @llvm.vp.round.nxv1bf16(<vscale x 1 x bfloat>, <vscale x 1 x i1>, i32)
+
+define <vscale x 1 x bfloat> @vp_round_nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vp_round_nxv1bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8
+; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-NEXT: vfabs.v v8, v9, v0.t
+; CHECK-NEXT: lui a0, 307200
+; CHECK-NEXT: fmv.w.x fa5, a0
+; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu
+; CHECK-NEXT: vmflt.vf v0, v8, fa5, v0.t
+; CHECK-NEXT: fsrmi a0, 4
+; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
+; CHECK-NEXT: vfcvt.x.f.v v8, v9, v0.t
+; CHECK-NEXT: fsrm a0
+; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu
+; CHECK-NEXT: vfsgnj.vv v9, v8, v9, v0.t
+; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9
+; CHECK-NEXT: ret
+ %v = call <vscale x 1 x bfloat> @llvm.vp.round.nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x i1> %m, i32 %evl)
+ ret <vscale x 1 x bfloat> %v
+}
+
+define <vscale x 1 x bfloat> @vp_round_nxv1bf16_unmasked(<vscale x 1 x bfloat> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vp_round_nxv1bf16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8
+; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-NEXT: vfabs.v v8, v9
+; CHECK-NEXT: lui a0, 307200
+; CHECK-NEXT: fmv.w.x fa5, a0
+; CHECK-NEXT: vmflt.vf v0, v8, fa5
+; CHECK-NEXT: fsrmi a0, 4
+; CHECK-NEXT: vfcvt.x.f.v v8, v9, v0.t
+; CHECK-NEXT: fsrm a0
+; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu
+; CHECK-NEXT: vfsgnj.vv v9, v8, v9, v0.t
+; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9
+; CHECK-NEXT: ret
+ %v = call <vscale x 1 x bfloat> @llvm.vp.round.nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x i1> splat (i1 true), i32 %evl)
+ ret <vscale x 1 x bfloat> %v
+}
+
+declare <vscale x 2 x bfloat> @llvm.vp.round.nxv2bf16(<vscale x 2 x bfloat>, <vscale x 2 x i1>, i32)
+
+define <vscale x 2 x bfloat> @vp_round_nxv2bf16(<vscale x 2 x bfloat> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vp_round_nxv2bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, zero, e16, mf2, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8
+; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-NEXT: vfabs.v v8, v9, v0.t
+; CHECK-NEXT: lui a0, 307200
+; CHECK-NEXT: fmv.w.x fa5, a0
+; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu
+; CHECK-NEXT: vmflt.vf v0, v8, fa5, v0.t
+; CHECK-NEXT: fsrmi a0, 4
+; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; CHECK-NEXT: vfcvt.x.f.v v8, v9, v0.t
+; CHECK-NEXT: fsrm a0
+; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu
+; CHECK-NEXT: vfsgnj.vv v9, v8, v9, v0.t
+; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9
+; CHECK-NEXT: ret
+ %v = call <vscale x 2 x bfloat> @llvm.vp.round.nxv2bf16(<vscale x 2 x bfloat> %va, <vscale x 2 x i1> %m, i32 %evl)
+ ret <vscale x 2 x bfloat> %v
+}
+
+define <vscale x 2 x bfloat> @vp_round_nxv2bf16_unmasked(<vscale x 2 x bfloat> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vp_round_nxv2bf16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, zero, e16, mf2, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8
+; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-NEXT: vfabs.v v8, v9
+; CHECK-NEXT: lui a0, 307200
+; CHECK-NEXT: fmv.w.x fa5, a0
+; CHECK-NEXT: vmflt.vf v0, v8, fa5
+; CHECK-NEXT: fsrmi a0, 4
+; CHECK-NEXT: vfcvt.x.f.v v8, v9, v0.t
+; CHECK-NEXT: fsrm a0
+; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu
+; CHECK-NEXT: vfsgnj.vv v9, v8, v9, v0.t
+; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9
+; CHECK-NEXT: ret
+ %v = call <vscale x 2 x bfloat> @llvm.vp.round.nxv2bf16(<vscale x 2 x bfloat> %va, <vscale x 2 x i1> splat (i1 true), i32 %evl)
+ ret <vscale x 2 x bfloat> %v
+}
+
+declare <vscale x 4 x bfloat> @llvm.vp.round.nxv4bf16(<vscale x 4 x bfloat>, <vscale x 4 x i1>, i32)
+
+define <vscale x 4 x bfloat> @vp_round_nxv4bf16(<vscale x 4 x bfloat> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vp_round_nxv4bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vmv1r.v v9, v0
+; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8
+; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; CHECK-NEXT: vfabs.v v12, v10, v0.t
+; CHECK-NEXT: lui a0, 307200
+; CHECK-NEXT: fmv.w.x fa5, a0
+; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu
+; CHECK-NEXT: vmflt.vf v9, v12, fa5, v0.t
+; CHECK-NEXT: fsrmi a0, 4
+; CHECK-NEXT: vmv1r.v v0, v9
+; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; CHECK-NEXT: vfcvt.x.f.v v12, v10, v0.t
+; CHECK-NEXT: fsrm a0
+; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu
+; CHECK-NEXT: vfsgnj.vv v10, v12, v10, v0.t
+; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10
+; CHECK-NEXT: ret
+ %v = call <vscale x 4 x bfloat> @llvm.vp.round.nxv4bf16(<vscale x 4 x bfloat> %va, <vscale x 4 x i1> %m, i32 %evl)
+ ret <vscale x 4 x bfloat> %v
+}
+
+define <vscale x 4 x bfloat> @vp_round_nxv4bf16_unmasked(<vscale x 4 x bfloat> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vp_round_nxv4bf16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8
+; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; CHECK-NEXT: vfabs.v v8, v10
+; CHECK-NEXT: lui a0, 307200
+; CHECK-NEXT: fmv.w.x fa5, a0
+; CHECK-NEXT: vmflt.vf v0, v8, fa5
+; CHECK-NEXT: fsrmi a0, 4
+; CHECK-NEXT: vfcvt.x.f.v v8, v10, v0.t
+; CHECK-NEXT: fsrm a0
+; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu
+; CHECK-NEXT: vfsgnj.vv v10, v8, v10, v0.t
+; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10
+; CHECK-NEXT: ret
+ %v = call <vscale x 4 x bfloat> @llvm.vp.round.nxv4bf16(<vscale x 4 x bfloat> %va, <vscale x 4 x i1> splat (i1 true), i32 %evl)
+ ret <vscale x 4 x bfloat> %v
+}
+
+declare <vscale x 8 x bfloat> @llvm.vp.round.nxv8bf16(<vscale x 8 x bfloat>, <vscale x 8 x i1>, i32)
+
+define <vscale x 8 x bfloat> @vp_round_nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vp_round_nxv8bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vmv1r.v v10, v0
+; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8
+; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-NEXT: vfabs.v v16, v12, v0.t
+; CHECK-NEXT: lui a0, 307200
+; CHECK-NEXT: fmv.w.x fa5, a0
+; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu
+; CHECK-NEXT: vmflt.vf v10, v16, fa5, v0.t
+; CHECK-NEXT: fsrmi a0, 4
+; CHECK-NEXT: vmv1r.v v0, v10
+; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; CHECK-NEXT: vfcvt.x.f.v v16, v12, v0.t
+; CHECK-NEXT: fsrm a0
+; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu
+; CHECK-NEXT: vfsgnj.vv v12, v16, v12, v0.t
+; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12
+; CHECK-NEXT: ret
+ %v = call <vscale x 8 x bfloat> @llvm.vp.round.nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x i1> %m, i32 %evl)
+ ret <vscale x 8 x bfloat> %v
+}
+
+define <vscale x 8 x bfloat> @vp_round_nxv8bf16_unmasked(<vscale x 8 x bfloat> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vp_round_nxv8bf16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8
+; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-NEXT: vfabs.v v8, v12
+; CHECK-NEXT: lui a0, 307200
+; CHECK-NEXT: fmv.w.x fa5, a0
+; CHECK-NEXT: vmflt.vf v0, v8, fa5
+; CHECK-NEXT: fsrmi a0, 4
+; CHECK-NEXT: vfcvt.x.f.v v8, v12, v0.t
+; CHECK-NEXT: fsrm a0
+; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu
+; CHECK-NEXT: vfsgnj.vv v12, v8, v12, v0.t
+; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12
+; CHECK-NEXT: ret
+ %v = call <vscale x 8 x bfloat> @llvm.vp.round.nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x i1> splat (i1 true), i32 %evl)
+ ret <vscale x 8 x bfloat> %v
+}
+
+declare <vscale x 16 x bfloat> @llvm.vp.round.nxv16bf16(<vscale x 16 x bfloat>, <vscale x 16 x i1>, i32)
+
+define <vscale x 16 x bfloat> @vp_round_nxv16bf16(<vscale x 16 x bfloat> %va, <vscale x 16 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vp_round_nxv16bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vmv1r.v v12, v0
+; CHECK-NEXT: vsetvli a1, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8
+; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma
+; CHECK-NEXT: vfabs.v v24, v16, v0.t
+; CHECK-NEXT: lui a0, 307200
+; CHECK-NEXT: fmv.w.x fa5, a0
+; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu
+; CHECK-NEXT: vmflt.vf v12, v24, fa5, v0.t
+; CHECK-NEXT: fsrmi a0, 4
+; CHECK-NEXT: vmv1r.v v0, v12
+; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t
+; CHECK-NEXT: fsrm a0
+; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu
+; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t
+; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16
+; CHECK-NEXT: ret
+ %v = call <vscale x 16 x bfloat> @llvm.vp.round.nxv16bf16(<vscale x 16 x bfloat> %va, <vscale x 16 x i1> %m, i32 %evl)
+ ret <vscale x 16 x bfloat> %v
+}
+
+define <vscale x 16 x bfloat> @vp_round_nxv16bf16_unmasked(<vscale x 16 x bfloat> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vp_round_nxv16bf16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8
+; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma
+; CHECK-NEXT: vfabs.v v8, v16
+; CHECK-NEXT: lui a0, 307200
+; CHECK-NEXT: fmv.w.x fa5, a0
+; CHECK-NEXT: vmflt.vf v0, v8, fa5
+; CHECK-NEXT: fsrmi a0, 4
+; CHECK-NEXT: vfcvt.x.f.v v8, v16, v0.t
+; CHECK-NEXT: fsrm a0
+; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu
+; CHECK-NEXT: vfsgnj.vv v16, v8, v16, v0.t
+; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16
+; CHECK-NEXT: ret
+ %v = call <vscale x 16 x bfloat> @llvm.vp.round.nxv16bf16(<vscale x 16 x bfloat> %va, <vscale x 16 x i1> splat (i1 true), i32 %evl)
+ ret <vscale x 16 x bfloat> %v
+}
+
+declare <vscale x 32 x bfloat> @llvm.vp.round.nxv32bf16(<vscale x 32 x bfloat>, <vscale x 32 x i1>, i32)
+
+define <vscale x 32 x bfloat> @vp_round_nxv32bf16(<vscale x 32 x bfloat> %va, <vscale x 32 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vp_round_nxv32bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi sp, sp, -16
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: csrr a1, vlenb
+; CHECK-NEXT: slli a1, a1, 3
+; CHECK-NEXT: sub sp, sp, a1
+; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
+; CHECK-NEXT: csrr a2, vlenb
+; CHECK-NEXT: slli a1, a2, 1
+; CHECK-NEXT: sub a3, a0, a1
+; CHECK-NEXT: sltu a4, a0, a3
+; CHECK-NEXT: addi a4, a4, -1
+; CHECK-NEXT: and a3, a4, a3
+; CHECK-NEXT: srli a2, a2, 2
+; CHECK-NEXT: vmv1r.v v16, v0
+; CHECK-NEXT: vsetvli a4, zero, e8, mf2, ta, ma
+; CHECK-NEXT: vslidedown.vx v17, v0, a2
+; CHECK-NEXT: addi a2, sp, 16
+; CHECK-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill
+; CHECK-NEXT: vsetvli a2, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v12
+; CHECK-NEXT: vmv1r.v v0, v17
+; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, ma
+; CHECK-NEXT: vfabs.v v8, v24, v0.t
+; CHECK-NEXT: lui a2, 307200
+; CHECK-NEXT: fmv.w.x fa5, a2
+; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu
+; CHECK-NEXT: vmflt.vf v17, v8, fa5, v0.t
+; CHECK-NEXT: fsrmi a2, 4
+; CHECK-NEXT: vmv1r.v v0, v17
+; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; CHECK-NEXT: vfcvt.x.f.v v8, v24, v0.t
+; CHECK-NEXT: fsrm a2
+; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu
+; CHECK-NEXT: vfsgnj.vv v24, v8, v24, v0.t
+; CHECK-NEXT: vsetvli a2, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v12, v24
+; CHECK-NEXT: bltu a0, a1, .LBB10_2
+; CHECK-NEXT: # %bb.1:
+; CHECK-NEXT: mv a0, a1
+; CHECK-NEXT: .LBB10_2:
+; CHECK-NEXT: addi a1, sp, 16
+; CHECK-NEXT: vl8r.v v0, (a1) # Unknown-size Folded Reload
+; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v0
+; CHECK-NEXT: vmv1r.v v8, v16
+; CHECK-NEXT: vmv1r.v v0, v16
+; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma
+; CHECK-NEXT: vfabs.v v16, v24, v0.t
+; CHECK-NEXT: addi a0, sp, 16
+; CHECK-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill
+; CHECK-NEXT: vmv1r.v v0, v8
+; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
+; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu
+; CHECK-NEXT: vmflt.vf v8, v16, fa5, v0.t
+; CHECK-NEXT: fsrmi a0, 4
+; CHECK-NEXT: vmv1r.v v0, v8
+; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; CHECK-NEXT: vfcvt.x.f.v v16, v24, v0.t
+; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill
+; CHECK-NEXT: fsrm a0
+; CHECK-NEXT: addi a0, sp, 16
+; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
+; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu
+; CHECK-NEXT: vfsgnj.vv v24, v16, v24, v0.t
+; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v24
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: slli a0, a0, 3
+; CHECK-NEXT: add sp, sp, a0
+; CHECK-NEXT: addi sp, sp, 16
+; CHECK-NEXT: ret
+ %v = call <vscale x 32 x bfloat> @llvm.vp.round.nxv32bf16(<vscale x 32 x bfloat> %va, <vscale x 32 x i1> %m, i32 %evl)
+ ret <vscale x 32 x bfloat> %v
+}
+
+define <vscale x 32 x bfloat> @vp_round_nxv32bf16_unmasked(<vscale x 32 x bfloat> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vp_round_nxv32bf16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi sp, sp, -16
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: csrr a1, vlenb
+; CHECK-NEXT: slli a1, a1, 3
+; CHECK-NEXT: sub sp, sp, a1
+; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
+; CHECK-NEXT: csrr a2, vlenb
+; CHECK-NEXT: slli a1, a2, 1
+; CHECK-NEXT: sub a3, a0, a1
+; CHECK-NEXT: sltu a4, a0, a3
+; CHECK-NEXT: addi a4, a4, -1
+; CHECK-NEXT: and a3, a4, a3
+; CHECK-NEXT: srli a2, a2, 2
+; CHECK-NEXT: vsetvli a4, zero, e8, m4, ta, ma
+; CHECK-NEXT: vmset.m v16
+; CHECK-NEXT: vsetvli a4, zero, e8, mf2, ta, ma
+; CHECK-NEXT: vslidedown.vx v16, v16, a2
+; CHECK-NEXT: addi a2, sp, 16
+; CHECK-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill
+; CHECK-NEXT: vsetvli a2, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v12
+; CHECK-NEXT: vmv1r.v v0, v16
+; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, ma
+; CHECK-NEXT: vfabs.v v8, v24, v0.t
+; CHECK-NEXT: lui a2, 307200
+; CHECK-NEXT: fmv.w.x fa5, a2
+; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu
+; CHECK-NEXT: vmflt.vf v16, v8, fa5, v0.t
+; CHECK-NEXT: fsrmi a2, 4
+; CHECK-NEXT: vmv1r.v v0, v16
+; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; CHECK-NEXT: vfcvt.x.f.v v8, v24, v0.t
+; CHECK-NEXT: fsrm a2
+; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu
+; CHECK-NEXT: vfsgnj.vv v24, v8, v24, v0.t
+; CHECK-NEXT: vsetvli a2, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v12, v24
+; CHECK-NEXT: bltu a0, a1, .LBB11_2
+; CHECK-NEXT: # %bb.1:
+; CHECK-NEXT: mv a0, a1
+; CHECK-NEXT: .LBB11_2:
+; CHECK-NEXT: addi a1, sp, 16
+; CHECK-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v24
+; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma
+; CHECK-NEXT: vfabs.v v24, v16
+; CHECK-NEXT: vmflt.vf v0, v24, fa5
+; CHECK-NEXT: fsrmi a0, 4
+; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t
+; CHECK-NEXT: fsrm a0
+; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu
+; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t
+; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: slli a0, a0, 3
+; CHECK-NEXT: add sp, sp, a0
+; CHECK-NEXT: addi sp, sp, 16
+; CHECK-NEXT: ret
+ %v = call <vscale x 32 x bfloat> @llvm.vp.round.nxv32bf16(<vscale x 32 x bfloat> %va, <vscale x 32 x i1> splat (i1 true), i32 %evl)
+ ret <vscale x 32 x bfloat> %v
+}
declare <vscale x 1 x half> @llvm.vp.round.nxv1f16(<vscale x 1 x half>, <vscale x 1 x i1>, i32)
define <vscale x 1 x half> @vp_round_nxv1f16(<vscale x 1 x half> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) {
; ZVFH-LABEL: vp_round_nxv1f16:
; ZVFH: # %bb.0:
-; ZVFH-NEXT: lui a1, %hi(.LCPI0_0)
-; ZVFH-NEXT: flh fa5, %lo(.LCPI0_0)(a1)
+; ZVFH-NEXT: lui a1, %hi(.LCPI12_0)
+; ZVFH-NEXT: flh fa5, %lo(.LCPI12_0)(a1)
; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
; ZVFH-NEXT: vfabs.v v9, v8, v0.t
; ZVFH-NEXT: vsetvli zero, zero, e16, mf4, ta, mu
@@ -55,8 +463,8 @@ define <vscale x 1 x half> @vp_round_nxv1f16(<vscale x 1 x half> %va, <vscale x
define <vscale x 1 x half> @vp_round_nxv1f16_unmasked(<vscale x 1 x half> %va, i32 zeroext %evl) {
; ZVFH-LABEL: vp_round_nxv1f16_unmasked:
; ZVFH: # %bb.0:
-; ZVFH-NEXT: lui a1, %hi(.LCPI1_0)
-; ZVFH-NEXT: flh fa5, %lo(.LCPI1_0)(a1)
+; ZVFH-NEXT: lui a1, %hi(.LCPI13_0)
+; ZVFH-NEXT: flh fa5, %lo(.LCPI13_0)(a1)
; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
; ZVFH-NEXT: vfabs.v v9, v8
; ZVFH-NEXT: vmflt.vf v0, v9, fa5
@@ -95,8 +503,8 @@ declare <vscale x 2 x half> @llvm.vp.round.nxv2f16(<vscale x 2 x half>, <vscale
define <vscale x 2 x half> @vp_round_nxv2f16(<vscale x 2 x half> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) {
; ZVFH-LABEL: vp_round_nxv2f16:
; ZVFH: # %bb.0:
-; ZVFH-NEXT: lui a1, %hi(.LCPI2_0)
-; ZVFH-NEXT: flh fa5, %lo(.LCPI2_0)(a1)
+; ZVFH-NEXT: lui a1, %hi(.LCPI14_0)
+; ZVFH-NEXT: flh fa5, %lo(.LCPI14_0)(a1)
; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
; ZVFH-NEXT: vfabs.v v9, v8, v0.t
; ZVFH-NEXT: vsetvli zero, zero, e16, mf2, ta, mu
@@ -137,8 +545,8 @@ define <vscale x 2 x half> @vp_round_nxv2f16(<vscale x 2 x half> %va, <vscale x
define <vscale x 2 x half> @vp_round_nxv2f16_unmasked(<vscale x 2 x half> %va, i32 zeroext %evl) {
; ZVFH-LABEL: vp_round_nxv2f16_unmasked:
; ZVFH: # %bb.0:
-; ZVFH-NEXT: lui a1, %hi(.LCPI3_0)
-; ZVFH-NEXT: flh fa5, %lo(.LCPI3_0)(a1)
+; ZVFH-NEXT: lui a1, %hi(.LCPI15_0)
+; ZVFH-NEXT: flh fa5, %lo(.LCPI15_0)(a1)
; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
; ZVFH-NEXT: vfabs.v v9, v8
; ZVFH-NEXT: vmflt.vf v0, v9, fa5
@@ -177,8 +585,8 @@ declare <vscale x 4 x half> @llvm.vp.round.nxv4f16(<vscale x 4 x half>, <vscale
define <vscale x 4 x half> @vp_round_nxv4f16(<vscale x 4 x half> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) {
; ZVFH-LABEL: vp_round_nxv4f16:
; ZVFH: # %bb.0:
-; ZVFH-NEXT: lui a1, %hi(.LCPI4_0)
-; ZVFH-NEXT: flh fa5, %lo(.LCPI4_0)(a1)
+; ZVFH-NEXT: lui a1, %hi(.LCPI16_0)
+; ZVFH-NEXT: flh fa5, %lo(.LCPI16_0)(a1)
; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma
; ZVFH-NEXT: vfabs.v v9, v8, v0.t
; ZVFH-NEXT: vsetvli zero, zero, e16, m1, ta, mu
@@ -221,8 +629,8 @@ define <vscale x 4 x half> @vp_round_nxv4f16(<vscale x 4 x half> %va, <vscale x
define <vscale x 4 x half> @vp_round_nxv4f16_unmasked(<vscale x 4 x half> %va, i32 zeroext %evl) {
; ZVFH-LABEL: vp_round_nxv4f16_unmasked:
; ZVFH: # %bb.0:
-; ZVFH-NEXT: lui a1, %hi(.LCPI5_0)
-; ZVFH-NEXT: flh fa5, %lo(.LCPI5_0)(a1)
+; ZVFH-NEXT: lui a1, %hi(.LCPI17_0)
+; ZVFH-NEXT: flh fa5, %lo(.LCPI17_0)(a1)
; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma
; ZVFH-NEXT: vfabs.v v9, v8
; ZVFH-NEXT: vmflt.vf v0, v9, fa5
@@ -261,8 +669,8 @@ declare <vscale x 8 x half> @llvm.vp.round.nxv8f16(<vscale x 8 x half>, <vscale
define <vscale x 8 x half> @vp_round_nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) {
; ZVFH-LABEL: vp_round_nxv8f16:
; ZVFH: # %bb.0:
-; ZVFH-NEXT: lui a1, %hi(.LCPI6_0)
-; ZVFH-NEXT: flh fa5, %lo(.LCPI6_0)(a1)
+; ZVFH-NEXT: lui a1, %hi(.LCPI18_0)
+; ZVFH-NEXT: flh fa5, %lo(.LCPI18_0)(a1)
; ZVFH-NEXT: vmv1r.v v10, v0
; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma
; ZVFH-NEXT: vfabs.v v12, v8, v0.t
@@ -307,8 +715,8 @@ define <vscale x 8 x half> @vp_round_nxv8f16(<vscale x 8 x half> %va, <vscale x
define <vscale x 8 x half> @vp_round_nxv8f16_unmasked(<vscale x 8 x half> %va, i32 zeroext %evl) {
; ZVFH-LABEL: vp_round_nxv8f16_unmasked:
; ZVFH: # %bb.0:
-; ZVFH-NEXT: lui a1, %hi(.LCPI7_0)
-; ZVFH-NEXT: flh fa5, %lo(.LCPI7_0)(a1)
+; ZVFH-NEXT: lui a1, %hi(.LCPI19_0)
+; ZVFH-NEXT: flh fa5, %lo(.LCPI19_0)(a1)
; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma
; ZVFH-NEXT: vfabs.v v10, v8
; ZVFH-NEXT: vmflt.vf v0, v10, fa5
@@ -347,8 +755,8 @@ declare <vscale x 16 x half> @llvm.vp.round.nxv16f16(<vscale x 16 x half>, <vsca
define <vscale x 16 x half> @vp_round_nxv16f16(<vscale x 16 x half> %va, <vscale x 16 x i1> %m, i32 zeroext %evl) {
; ZVFH-LABEL: vp_round_nxv16f16:
; ZVFH: # %bb.0:
-; ZVFH-NEXT: lui a1, %hi(.LCPI8_0)
-; ZVFH-NEXT: flh fa5, %lo(.LCPI8_0)(a1)
+; ZVFH-NEXT: lui a1, %hi(.LCPI20_0)
+; ZVFH-NEXT: flh fa5, %lo(.LCPI20_0)(a1)
; ZVFH-NEXT: vmv1r.v v12, v0
; ZVFH-NEXT: vsetvli zero, a0, e16, m4, ta, ma
; ZVFH-NEXT: vfabs.v v16, v8, v0.t
@@ -393,8 +801,8 @@ define <vscale x 16 x half> @vp_round_nxv16f16(<vscale x 16 x half> %va, <vscale
define <vscale x 16 x half> @vp_round_nxv16f16_unmasked(<vscale x 16 x half> %va, i32 zeroext %evl) {
; ZVFH-LABEL: vp_round_nxv16f16_unmasked:
; ZVFH: # %bb.0:
-; ZVFH-NEXT: lui a1, %hi(.LCPI9_0)
-; ZVFH-NEXT: flh fa5, %lo(.LCPI9_0)(a1)
+; ZVFH-NEXT: lui a1, %hi(.LCPI21_0)
+; ZVFH-NEXT: flh fa5, %lo(.LCPI21_0)(a1)
; ZVFH-NEXT: vsetvli zero, a0, e16, m4, ta, ma
; ZVFH-NEXT: vfabs.v v12, v8
; ZVFH-NEXT: vmflt.vf v0, v12, fa5
@@ -433,8 +841,8 @@ declare <vscale x 32 x half> @llvm.vp.round.nxv32f16(<vscale x 32 x half>, <vsca
define <vscale x 32 x half> @vp_round_nxv32f16(<vscale x 32 x half> %va, <vscale x 32 x i1> %m, i32 zeroext %evl) {
; ZVFH-LABEL: vp_round_nxv32f16:
; ZVFH: # %bb.0:
-; ZVFH-NEXT: lui a1, %hi(.LCPI10_0)
-; ZVFH-NEXT: flh fa5, %lo(.LCPI10_0)(a1)
+; ZVFH-NEXT: lui a1, %hi(.LCPI22_0)
+; ZVFH-NEXT: flh fa5, %lo(.LCPI22_0)(a1)
; ZVFH-NEXT: vmv1r.v v16, v0
; ZVFH-NEXT: vsetvli zero, a0, e16, m8, ta, ma
; ZVFH-NEXT: vfabs.v v24, v8, v0.t
@@ -489,10 +897,10 @@ define <vscale x 32 x half> @vp_round_nxv32f16(<vscale x 32 x half> %va, <vscale
; ZVFHMIN-NEXT: vfsgnj.vv v24, v8, v24, v0.t
; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v24
-; ZVFHMIN-NEXT: bltu a0, a1, .LBB10_2
+; ZVFHMIN-NEXT: bltu a0, a1, .LBB22_2
; ZVFHMIN-NEXT: # %bb.1:
; ZVFHMIN-NEXT: mv a0, a1
-; ZVFHMIN-NEXT: .LBB10_2:
+; ZVFHMIN-NEXT: .LBB22_2:
; ZVFHMIN-NEXT: addi a1, sp, 16
; ZVFHMIN-NEXT: vl8r.v v0, (a1) # Unknown-size Folded Reload
; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v0
@@ -531,8 +939,8 @@ define <vscale x 32 x half> @vp_round_nxv32f16(<vscale x 32 x half> %va, <vscale
define <vscale x 32 x half> @vp_round_nxv32f16_unmasked(<vscale x 32 x half> %va, i32 zeroext %evl) {
; ZVFH-LABEL: vp_round_nxv32f16_unmasked:
; ZVFH: # %bb.0:
-; ZVFH-NEXT: lui a1, %hi(.LCPI11_0)
-; ZVFH-NEXT: flh fa5, %lo(.LCPI11_0)(a1)
+; ZVFH-NEXT: lui a1, %hi(.LCPI23_0)
+; ZVFH-NEXT: flh fa5, %lo(.LCPI23_0)(a1)
; ZVFH-NEXT: vsetvli zero, a0, e16, m8, ta, ma
; ZVFH-NEXT: vfabs.v v16, v8
; ZVFH-NEXT: vmflt.vf v0, v16, fa5
@@ -584,10 +992,10 @@ define <vscale x 32 x half> @vp_round_nxv32f16_unmasked(<vscale x 32 x half> %va
; ZVFHMIN-NEXT: vfsgnj.vv v24, v8, v24, v0.t
; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v24
-; ZVFHMIN-NEXT: bltu a0, a1, .LBB11_2
+; ZVFHMIN-NEXT: bltu a0, a1, .LBB23_2
; ZVFHMIN-NEXT: # %bb.1:
; ZVFHMIN-NEXT: mv a0, a1
-; ZVFHMIN-NEXT: .LBB11_2:
+; ZVFHMIN-NEXT: .LBB23_2:
; ZVFHMIN-NEXT: addi a1, sp, 16
; ZVFHMIN-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload
; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v24
@@ -832,8 +1240,8 @@ declare <vscale x 1 x double> @llvm.vp.round.nxv1f64(<vscale x 1 x double>, <vsc
define <vscale x 1 x double> @vp_round_nxv1f64(<vscale x 1 x double> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_round_nxv1f64:
; CHECK: # %bb.0:
-; CHECK-NEXT: lui a1, %hi(.LCPI22_0)
-; CHECK-NEXT: fld fa5, %lo(.LCPI22_0)(a1)
+; CHECK-NEXT: lui a1, %hi(.LCPI34_0)
+; CHECK-NEXT: fld fa5, %lo(.LCPI34_0)(a1)
; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma
; CHECK-NEXT: vfabs.v v9, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu
@@ -853,8 +1261,8 @@ define <vscale x 1 x double> @vp_round_nxv1f64(<vscale x 1 x double> %va, <vscal
define <vscale x 1 x double> @vp_round_nxv1f64_unmasked(<vscale x 1 x double> %va, i32 zeroext %evl) {
; CHECK-LABEL: vp_round_nxv1f64_unmasked:
; CHECK: # %bb.0:
-; CHECK-NEXT: lui a1, %hi(.LCPI23_0)
-; CHECK-NEXT: fld fa5, %lo(.LCPI23_0)(a1)
+; CHECK-NEXT: lui a1, %hi(.LCPI35_0)
+; CHECK-NEXT: fld fa5, %lo(.LCPI35_0)(a1)
; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma
; CHECK-NEXT: vfabs.v v9, v8
; CHECK-NEXT: vmflt.vf v0, v9, fa5
@@ -874,8 +1282,8 @@ declare <vscale x 2 x double> @llvm.vp.round.nxv2f64(<vscale x 2 x double>, <vsc
define <vscale x 2 x double> @vp_round_nxv2f64(<vscale x 2 x double> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_round_nxv2f64:
; CHECK: # %bb.0:
-; CHECK-NEXT: lui a1, %hi(.LCPI24_0)
-; CHECK-NEXT: fld fa5, %lo(.LCPI24_0)(a1)
+; CHECK-NEXT: lui a1, %hi(.LCPI36_0)
+; CHECK-NEXT: fld fa5, %lo(.LCPI36_0)(a1)
; CHECK-NEXT: vmv1r.v v10, v0
; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma
; CHECK-NEXT: vfabs.v v12, v8, v0.t
@@ -897,8 +1305,8 @@ define <vscale x 2 x double> @vp_round_nxv2f64(<vscale x 2 x double> %va, <vscal
define <vscale x 2 x double> @vp_round_nxv2f64_unmasked(<vscale x 2 x double> %va, i32 zeroext %evl) {
; CHECK-LABEL: vp_round_nxv2f64_unmasked:
; CHECK: # %bb.0:
-; CHECK-NEXT: lui a1, %hi(.LCPI25_0)
-; CHECK-NEXT: fld fa5, %lo(.LCPI25_0)(a1)
+; CHECK-NEXT: lui a1, %hi(.LCPI37_0)
+; CHECK-NEXT: fld fa5, %lo(.LCPI37_0)(a1)
; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma
; CHECK-NEXT: vfabs.v v10, v8
; CHECK-NEXT: vmflt.vf v0, v10, fa5
@@ -918,8 +1326,8 @@ declare <vscale x 4 x double> @llvm.vp.round.nxv4f64(<vscale x 4 x double>, <vsc
define <vscale x 4 x double> @vp_round_nxv4f64(<vscale x 4 x double> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_round_nxv4f64:
; CHECK: # %bb.0:
-; CHECK-NEXT: lui a1, %hi(.LCPI26_0)
-; CHECK-NEXT: fld fa5, %lo(.LCPI26_0)(a1)
+; CHECK-NEXT: lui a1, %hi(.LCPI38_0)
+; CHECK-NEXT: fld fa5, %lo(.LCPI38_0)(a1)
; CHECK-NEXT: vmv1r.v v12, v0
; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma
; CHECK-NEXT: vfabs.v v16, v8, v0.t
@@ -941,8 +1349,8 @@ define <vscale x 4 x double> @vp_round_nxv4f64(<vscale x 4 x double> %va, <vscal
define <vscale x 4 x double> @vp_round_nxv4f64_unmasked(<vscale x 4 x double> %va, i32 zeroext %evl) {
; CHECK-LABEL: vp_round_nxv4f64_unmasked:
; CHECK: # %bb.0:
-; CHECK-NEXT: lui a1, %hi(.LCPI27_0)
-; CHECK-NEXT: fld fa5, %lo(.LCPI27_0)(a1)
+; CHECK-NEXT: lui a1, %hi(.LCPI39_0)
+; CHECK-NEXT: fld fa5, %lo(.LCPI39_0)(a1)
; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma
; CHECK-NEXT: vfabs.v v12, v8
; CHECK-NEXT: vmflt.vf v0, v12, fa5
@@ -962,8 +1370,8 @@ declare <vscale x 7 x double> @llvm.vp.round.nxv7f64(<vscale x 7 x double>, <vsc
define <vscale x 7 x double> @vp_round_nxv7f64(<vscale x 7 x double> %va, <vscale x 7 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_round_nxv7f64:
; CHECK: # %bb.0:
-; CHECK-NEXT: lui a1, %hi(.LCPI28_0)
-; CHECK-NEXT: fld fa5, %lo(.LCPI28_0)(a1)
+; CHECK-NEXT: lui a1, %hi(.LCPI40_0)
+; CHECK-NEXT: fld fa5, %lo(.LCPI40_0)(a1)
; CHECK-NEXT: vmv1r.v v16, v0
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; CHECK-NEXT: vfabs.v v24, v8, v0.t
@@ -985,8 +1393,8 @@ define <vscale x 7 x double> @vp_round_nxv7f64(<vscale x 7 x double> %va, <vscal
define <vscale x 7 x double> @vp_round_nxv7f64_unmasked(<vscale x 7 x double> %va, i32 zeroext %evl) {
; CHECK-LABEL: vp_round_nxv7f64_unmasked:
; CHECK: # %bb.0:
-; CHECK-NEXT: lui a1, %hi(.LCPI29_0)
-; CHECK-NEXT: fld fa5, %lo(.LCPI29_0)(a1)
+; CHECK-NEXT: lui a1, %hi(.LCPI41_0)
+; CHECK-NEXT: fld fa5, %lo(.LCPI41_0)(a1)
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; CHECK-NEXT: vfabs.v v16, v8
; CHECK-NEXT: vmflt.vf v0, v16, fa5
@@ -1006,8 +1414,8 @@ declare <vscale x 8 x double> @llvm.vp.round.nxv8f64(<vscale x 8 x double>, <vsc
define <vscale x 8 x double> @vp_round_nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_round_nxv8f64:
; CHECK: # %bb.0:
-; CHECK-NEXT: lui a1, %hi(.LCPI30_0)
-; CHECK-NEXT: fld fa5, %lo(.LCPI30_0)(a1)
+; CHECK-NEXT: lui a1, %hi(.LCPI42_0)
+; CHECK-NEXT: fld fa5, %lo(.LCPI42_0)(a1)
; CHECK-NEXT: vmv1r.v v16, v0
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; CHECK-NEXT: vfabs.v v24, v8, v0.t
@@ -1029,8 +1437,8 @@ define <vscale x 8 x double> @vp_round_nxv8f64(<vscale x 8 x double> %va, <vscal
define <vscale x 8 x double> @vp_round_nxv8f64_unmasked(<vscale x 8 x double> %va, i32 zeroext %evl) {
; CHECK-LABEL: vp_round_nxv8f64_unmasked:
; CHECK: # %bb.0:
-; CHECK-NEXT: lui a1, %hi(.LCPI31_0)
-; CHECK-NEXT: fld fa5, %lo(.LCPI31_0)(a1)
+; CHECK-NEXT: lui a1, %hi(.LCPI43_0)
+; CHECK-NEXT: fld fa5, %lo(.LCPI43_0)(a1)
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; CHECK-NEXT: vfabs.v v16, v8
; CHECK-NEXT: vmflt.vf v0, v16, fa5
@@ -1063,8 +1471,8 @@ define <vscale x 16 x double> @vp_round_nxv16f64(<vscale x 16 x double> %va, <vs
; CHECK-NEXT: vsetvli a3, zero, e8, mf4, ta, ma
; CHECK-NEXT: vslidedown.vx v6, v0, a2
; CHECK-NEXT: sub a2, a0, a1
-; CHECK-NEXT: lui a3, %hi(.LCPI32_0)
-; CHECK-NEXT: fld fa5, %lo(.LCPI32_0)(a3)
+; CHECK-NEXT: lui a3, %hi(.LCPI44_0)
+; CHECK-NEXT: fld fa5, %lo(.LCPI44_0)(a3)
; CHECK-NEXT: sltu a3, a0, a2
; CHECK-NEXT: addi a3, a3, -1
; CHECK-NEXT: and a2, a3, a2
@@ -1085,10 +1493,10 @@ define <vscale x 16 x double> @vp_round_nxv16f64(<vscale x 16 x double> %va, <vs
; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t
-; CHECK-NEXT: bltu a0, a1, .LBB32_2
+; CHECK-NEXT: bltu a0, a1, .LBB44_2
; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: mv a0, a1
-; CHECK-NEXT: .LBB32_2:
+; CHECK-NEXT: .LBB44_2:
; CHECK-NEXT: vmv1r.v v0, v7
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; CHECK-NEXT: vfabs.v v24, v8, v0.t
@@ -1116,8 +1524,8 @@ define <vscale x 16 x double> @vp_round_nxv16f64_unmasked(<vscale x 16 x double>
; CHECK: # %bb.0:
; CHECK-NEXT: csrr a1, vlenb
; CHECK-NEXT: sub a2, a0, a1
-; CHECK-NEXT: lui a3, %hi(.LCPI33_0)
-; CHECK-NEXT: fld fa5, %lo(.LCPI33_0)(a3)
+; CHECK-NEXT: lui a3, %hi(.LCPI45_0)
+; CHECK-NEXT: fld fa5, %lo(.LCPI45_0)(a3)
; CHECK-NEXT: sltu a3, a0, a2
; CHECK-NEXT: addi a3, a3, -1
; CHECK-NEXT: and a2, a3, a2
@@ -1130,10 +1538,10 @@ define <vscale x 16 x double> @vp_round_nxv16f64_unmasked(<vscale x 16 x double>
; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t
-; CHECK-NEXT: bltu a0, a1, .LBB33_2
+; CHECK-NEXT: bltu a0, a1, .LBB45_2
; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: mv a0, a1
-; CHECK-NEXT: .LBB33_2:
+; CHECK-NEXT: .LBB45_2:
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; CHECK-NEXT: vfabs.v v24, v8
; CHECK-NEXT: vmflt.vf v0, v24, fa5
diff --git a/llvm/test/CodeGen/RISCV/rvv/roundeven-vp.ll b/llvm/test/CodeGen/RISCV/rvv/roundeven-vp.ll
index 8c5a7bb2dea6..9857009002eb 100644
--- a/llvm/test/CodeGen/RISCV/rvv/roundeven-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/roundeven-vp.ll
@@ -1,20 +1,428 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v -target-abi=ilp32d \
-; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH
-; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -target-abi=lp64d \
-; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH
-; RUN: llc -mtriple=riscv32 -mattr=+d,+zfhmin,+zvfhmin,+v -target-abi=ilp32d \
-; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN
-; RUN: llc -mtriple=riscv64 -mattr=+d,+zfhmin,+zvfhmin,+v -target-abi=lp64d \
-; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+zfbfmin,+zvfbfmin,+v \
+; RUN: -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s \
+; RUN: --check-prefixes=CHECK,ZVFH
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+zfbfmin,+zvfbfmin,+v \
+; RUN: -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \
+; RUN: --check-prefixes=CHECK,ZVFH
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin,+v \
+; RUN: -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s \
+; RUN: --check-prefixes=CHECK,ZVFHMIN
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin,+v \
+; RUN: -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \
+; RUN: --check-prefixes=CHECK,ZVFHMIN
+declare <vscale x 1 x bfloat> @llvm.vp.roundeven.nxv1bf16(<vscale x 1 x bfloat>, <vscale x 1 x i1>, i32)
+
+define <vscale x 1 x bfloat> @vp_roundeven_nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vp_roundeven_nxv1bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8
+; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-NEXT: vfabs.v v8, v9, v0.t
+; CHECK-NEXT: lui a0, 307200
+; CHECK-NEXT: fmv.w.x fa5, a0
+; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu
+; CHECK-NEXT: vmflt.vf v0, v8, fa5, v0.t
+; CHECK-NEXT: fsrmi a0, 0
+; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
+; CHECK-NEXT: vfcvt.x.f.v v8, v9, v0.t
+; CHECK-NEXT: fsrm a0
+; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu
+; CHECK-NEXT: vfsgnj.vv v9, v8, v9, v0.t
+; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9
+; CHECK-NEXT: ret
+ %v = call <vscale x 1 x bfloat> @llvm.vp.roundeven.nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x i1> %m, i32 %evl)
+ ret <vscale x 1 x bfloat> %v
+}
+
+define <vscale x 1 x bfloat> @vp_roundeven_nxv1bf16_unmasked(<vscale x 1 x bfloat> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vp_roundeven_nxv1bf16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8
+; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-NEXT: vfabs.v v8, v9
+; CHECK-NEXT: lui a0, 307200
+; CHECK-NEXT: fmv.w.x fa5, a0
+; CHECK-NEXT: vmflt.vf v0, v8, fa5
+; CHECK-NEXT: fsrmi a0, 0
+; CHECK-NEXT: vfcvt.x.f.v v8, v9, v0.t
+; CHECK-NEXT: fsrm a0
+; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu
+; CHECK-NEXT: vfsgnj.vv v9, v8, v9, v0.t
+; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9
+; CHECK-NEXT: ret
+ %v = call <vscale x 1 x bfloat> @llvm.vp.roundeven.nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x i1> splat (i1 true), i32 %evl)
+ ret <vscale x 1 x bfloat> %v
+}
+
+declare <vscale x 2 x bfloat> @llvm.vp.roundeven.nxv2bf16(<vscale x 2 x bfloat>, <vscale x 2 x i1>, i32)
+
+define <vscale x 2 x bfloat> @vp_roundeven_nxv2bf16(<vscale x 2 x bfloat> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vp_roundeven_nxv2bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, zero, e16, mf2, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8
+; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-NEXT: vfabs.v v8, v9, v0.t
+; CHECK-NEXT: lui a0, 307200
+; CHECK-NEXT: fmv.w.x fa5, a0
+; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu
+; CHECK-NEXT: vmflt.vf v0, v8, fa5, v0.t
+; CHECK-NEXT: fsrmi a0, 0
+; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; CHECK-NEXT: vfcvt.x.f.v v8, v9, v0.t
+; CHECK-NEXT: fsrm a0
+; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu
+; CHECK-NEXT: vfsgnj.vv v9, v8, v9, v0.t
+; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9
+; CHECK-NEXT: ret
+ %v = call <vscale x 2 x bfloat> @llvm.vp.roundeven.nxv2bf16(<vscale x 2 x bfloat> %va, <vscale x 2 x i1> %m, i32 %evl)
+ ret <vscale x 2 x bfloat> %v
+}
+
+define <vscale x 2 x bfloat> @vp_roundeven_nxv2bf16_unmasked(<vscale x 2 x bfloat> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vp_roundeven_nxv2bf16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, zero, e16, mf2, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8
+; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-NEXT: vfabs.v v8, v9
+; CHECK-NEXT: lui a0, 307200
+; CHECK-NEXT: fmv.w.x fa5, a0
+; CHECK-NEXT: vmflt.vf v0, v8, fa5
+; CHECK-NEXT: fsrmi a0, 0
+; CHECK-NEXT: vfcvt.x.f.v v8, v9, v0.t
+; CHECK-NEXT: fsrm a0
+; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu
+; CHECK-NEXT: vfsgnj.vv v9, v8, v9, v0.t
+; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9
+; CHECK-NEXT: ret
+ %v = call <vscale x 2 x bfloat> @llvm.vp.roundeven.nxv2bf16(<vscale x 2 x bfloat> %va, <vscale x 2 x i1> splat (i1 true), i32 %evl)
+ ret <vscale x 2 x bfloat> %v
+}
+
+declare <vscale x 4 x bfloat> @llvm.vp.roundeven.nxv4bf16(<vscale x 4 x bfloat>, <vscale x 4 x i1>, i32)
+
+define <vscale x 4 x bfloat> @vp_roundeven_nxv4bf16(<vscale x 4 x bfloat> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vp_roundeven_nxv4bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vmv1r.v v9, v0
+; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8
+; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; CHECK-NEXT: vfabs.v v12, v10, v0.t
+; CHECK-NEXT: lui a0, 307200
+; CHECK-NEXT: fmv.w.x fa5, a0
+; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu
+; CHECK-NEXT: vmflt.vf v9, v12, fa5, v0.t
+; CHECK-NEXT: fsrmi a0, 0
+; CHECK-NEXT: vmv1r.v v0, v9
+; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; CHECK-NEXT: vfcvt.x.f.v v12, v10, v0.t
+; CHECK-NEXT: fsrm a0
+; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu
+; CHECK-NEXT: vfsgnj.vv v10, v12, v10, v0.t
+; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10
+; CHECK-NEXT: ret
+ %v = call <vscale x 4 x bfloat> @llvm.vp.roundeven.nxv4bf16(<vscale x 4 x bfloat> %va, <vscale x 4 x i1> %m, i32 %evl)
+ ret <vscale x 4 x bfloat> %v
+}
+
+define <vscale x 4 x bfloat> @vp_roundeven_nxv4bf16_unmasked(<vscale x 4 x bfloat> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vp_roundeven_nxv4bf16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8
+; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; CHECK-NEXT: vfabs.v v8, v10
+; CHECK-NEXT: lui a0, 307200
+; CHECK-NEXT: fmv.w.x fa5, a0
+; CHECK-NEXT: vmflt.vf v0, v8, fa5
+; CHECK-NEXT: fsrmi a0, 0
+; CHECK-NEXT: vfcvt.x.f.v v8, v10, v0.t
+; CHECK-NEXT: fsrm a0
+; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu
+; CHECK-NEXT: vfsgnj.vv v10, v8, v10, v0.t
+; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10
+; CHECK-NEXT: ret
+ %v = call <vscale x 4 x bfloat> @llvm.vp.roundeven.nxv4bf16(<vscale x 4 x bfloat> %va, <vscale x 4 x i1> splat (i1 true), i32 %evl)
+ ret <vscale x 4 x bfloat> %v
+}
+
+declare <vscale x 8 x bfloat> @llvm.vp.roundeven.nxv8bf16(<vscale x 8 x bfloat>, <vscale x 8 x i1>, i32)
+
+define <vscale x 8 x bfloat> @vp_roundeven_nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vp_roundeven_nxv8bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vmv1r.v v10, v0
+; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8
+; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-NEXT: vfabs.v v16, v12, v0.t
+; CHECK-NEXT: lui a0, 307200
+; CHECK-NEXT: fmv.w.x fa5, a0
+; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu
+; CHECK-NEXT: vmflt.vf v10, v16, fa5, v0.t
+; CHECK-NEXT: fsrmi a0, 0
+; CHECK-NEXT: vmv1r.v v0, v10
+; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; CHECK-NEXT: vfcvt.x.f.v v16, v12, v0.t
+; CHECK-NEXT: fsrm a0
+; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu
+; CHECK-NEXT: vfsgnj.vv v12, v16, v12, v0.t
+; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12
+; CHECK-NEXT: ret
+ %v = call <vscale x 8 x bfloat> @llvm.vp.roundeven.nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x i1> %m, i32 %evl)
+ ret <vscale x 8 x bfloat> %v
+}
+
+define <vscale x 8 x bfloat> @vp_roundeven_nxv8bf16_unmasked(<vscale x 8 x bfloat> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vp_roundeven_nxv8bf16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8
+; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-NEXT: vfabs.v v8, v12
+; CHECK-NEXT: lui a0, 307200
+; CHECK-NEXT: fmv.w.x fa5, a0
+; CHECK-NEXT: vmflt.vf v0, v8, fa5
+; CHECK-NEXT: fsrmi a0, 0
+; CHECK-NEXT: vfcvt.x.f.v v8, v12, v0.t
+; CHECK-NEXT: fsrm a0
+; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu
+; CHECK-NEXT: vfsgnj.vv v12, v8, v12, v0.t
+; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12
+; CHECK-NEXT: ret
+ %v = call <vscale x 8 x bfloat> @llvm.vp.roundeven.nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x i1> splat (i1 true), i32 %evl)
+ ret <vscale x 8 x bfloat> %v
+}
+
+declare <vscale x 16 x bfloat> @llvm.vp.roundeven.nxv16bf16(<vscale x 16 x bfloat>, <vscale x 16 x i1>, i32)
+
+define <vscale x 16 x bfloat> @vp_roundeven_nxv16bf16(<vscale x 16 x bfloat> %va, <vscale x 16 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vp_roundeven_nxv16bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vmv1r.v v12, v0
+; CHECK-NEXT: vsetvli a1, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8
+; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma
+; CHECK-NEXT: vfabs.v v24, v16, v0.t
+; CHECK-NEXT: lui a0, 307200
+; CHECK-NEXT: fmv.w.x fa5, a0
+; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu
+; CHECK-NEXT: vmflt.vf v12, v24, fa5, v0.t
+; CHECK-NEXT: fsrmi a0, 0
+; CHECK-NEXT: vmv1r.v v0, v12
+; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t
+; CHECK-NEXT: fsrm a0
+; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu
+; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t
+; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16
+; CHECK-NEXT: ret
+ %v = call <vscale x 16 x bfloat> @llvm.vp.roundeven.nxv16bf16(<vscale x 16 x bfloat> %va, <vscale x 16 x i1> %m, i32 %evl)
+ ret <vscale x 16 x bfloat> %v
+}
+
+define <vscale x 16 x bfloat> @vp_roundeven_nxv16bf16_unmasked(<vscale x 16 x bfloat> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vp_roundeven_nxv16bf16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8
+; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma
+; CHECK-NEXT: vfabs.v v8, v16
+; CHECK-NEXT: lui a0, 307200
+; CHECK-NEXT: fmv.w.x fa5, a0
+; CHECK-NEXT: vmflt.vf v0, v8, fa5
+; CHECK-NEXT: fsrmi a0, 0
+; CHECK-NEXT: vfcvt.x.f.v v8, v16, v0.t
+; CHECK-NEXT: fsrm a0
+; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu
+; CHECK-NEXT: vfsgnj.vv v16, v8, v16, v0.t
+; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16
+; CHECK-NEXT: ret
+ %v = call <vscale x 16 x bfloat> @llvm.vp.roundeven.nxv16bf16(<vscale x 16 x bfloat> %va, <vscale x 16 x i1> splat (i1 true), i32 %evl)
+ ret <vscale x 16 x bfloat> %v
+}
+
+declare <vscale x 32 x bfloat> @llvm.vp.roundeven.nxv32bf16(<vscale x 32 x bfloat>, <vscale x 32 x i1>, i32)
+
+define <vscale x 32 x bfloat> @vp_roundeven_nxv32bf16(<vscale x 32 x bfloat> %va, <vscale x 32 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vp_roundeven_nxv32bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi sp, sp, -16
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: csrr a1, vlenb
+; CHECK-NEXT: slli a1, a1, 3
+; CHECK-NEXT: sub sp, sp, a1
+; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
+; CHECK-NEXT: csrr a2, vlenb
+; CHECK-NEXT: slli a1, a2, 1
+; CHECK-NEXT: sub a3, a0, a1
+; CHECK-NEXT: sltu a4, a0, a3
+; CHECK-NEXT: addi a4, a4, -1
+; CHECK-NEXT: and a3, a4, a3
+; CHECK-NEXT: srli a2, a2, 2
+; CHECK-NEXT: vmv1r.v v16, v0
+; CHECK-NEXT: vsetvli a4, zero, e8, mf2, ta, ma
+; CHECK-NEXT: vslidedown.vx v17, v0, a2
+; CHECK-NEXT: addi a2, sp, 16
+; CHECK-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill
+; CHECK-NEXT: vsetvli a2, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v12
+; CHECK-NEXT: vmv1r.v v0, v17
+; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, ma
+; CHECK-NEXT: vfabs.v v8, v24, v0.t
+; CHECK-NEXT: lui a2, 307200
+; CHECK-NEXT: fmv.w.x fa5, a2
+; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu
+; CHECK-NEXT: vmflt.vf v17, v8, fa5, v0.t
+; CHECK-NEXT: fsrmi a2, 0
+; CHECK-NEXT: vmv1r.v v0, v17
+; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; CHECK-NEXT: vfcvt.x.f.v v8, v24, v0.t
+; CHECK-NEXT: fsrm a2
+; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu
+; CHECK-NEXT: vfsgnj.vv v24, v8, v24, v0.t
+; CHECK-NEXT: vsetvli a2, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v12, v24
+; CHECK-NEXT: bltu a0, a1, .LBB10_2
+; CHECK-NEXT: # %bb.1:
+; CHECK-NEXT: mv a0, a1
+; CHECK-NEXT: .LBB10_2:
+; CHECK-NEXT: addi a1, sp, 16
+; CHECK-NEXT: vl8r.v v0, (a1) # Unknown-size Folded Reload
+; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v0
+; CHECK-NEXT: vmv1r.v v8, v16
+; CHECK-NEXT: vmv1r.v v0, v16
+; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma
+; CHECK-NEXT: vfabs.v v16, v24, v0.t
+; CHECK-NEXT: addi a0, sp, 16
+; CHECK-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill
+; CHECK-NEXT: vmv1r.v v0, v8
+; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
+; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu
+; CHECK-NEXT: vmflt.vf v8, v16, fa5, v0.t
+; CHECK-NEXT: fsrmi a0, 0
+; CHECK-NEXT: vmv1r.v v0, v8
+; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; CHECK-NEXT: vfcvt.x.f.v v16, v24, v0.t
+; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill
+; CHECK-NEXT: fsrm a0
+; CHECK-NEXT: addi a0, sp, 16
+; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
+; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu
+; CHECK-NEXT: vfsgnj.vv v24, v16, v24, v0.t
+; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v24
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: slli a0, a0, 3
+; CHECK-NEXT: add sp, sp, a0
+; CHECK-NEXT: addi sp, sp, 16
+; CHECK-NEXT: ret
+ %v = call <vscale x 32 x bfloat> @llvm.vp.roundeven.nxv32bf16(<vscale x 32 x bfloat> %va, <vscale x 32 x i1> %m, i32 %evl)
+ ret <vscale x 32 x bfloat> %v
+}
+
+define <vscale x 32 x bfloat> @vp_roundeven_nxv32bf16_unmasked(<vscale x 32 x bfloat> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vp_roundeven_nxv32bf16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi sp, sp, -16
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: csrr a1, vlenb
+; CHECK-NEXT: slli a1, a1, 3
+; CHECK-NEXT: sub sp, sp, a1
+; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
+; CHECK-NEXT: csrr a2, vlenb
+; CHECK-NEXT: slli a1, a2, 1
+; CHECK-NEXT: sub a3, a0, a1
+; CHECK-NEXT: sltu a4, a0, a3
+; CHECK-NEXT: addi a4, a4, -1
+; CHECK-NEXT: and a3, a4, a3
+; CHECK-NEXT: srli a2, a2, 2
+; CHECK-NEXT: vsetvli a4, zero, e8, m4, ta, ma
+; CHECK-NEXT: vmset.m v16
+; CHECK-NEXT: vsetvli a4, zero, e8, mf2, ta, ma
+; CHECK-NEXT: vslidedown.vx v16, v16, a2
+; CHECK-NEXT: addi a2, sp, 16
+; CHECK-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill
+; CHECK-NEXT: vsetvli a2, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v12
+; CHECK-NEXT: vmv1r.v v0, v16
+; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, ma
+; CHECK-NEXT: vfabs.v v8, v24, v0.t
+; CHECK-NEXT: lui a2, 307200
+; CHECK-NEXT: fmv.w.x fa5, a2
+; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu
+; CHECK-NEXT: vmflt.vf v16, v8, fa5, v0.t
+; CHECK-NEXT: fsrmi a2, 0
+; CHECK-NEXT: vmv1r.v v0, v16
+; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; CHECK-NEXT: vfcvt.x.f.v v8, v24, v0.t
+; CHECK-NEXT: fsrm a2
+; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu
+; CHECK-NEXT: vfsgnj.vv v24, v8, v24, v0.t
+; CHECK-NEXT: vsetvli a2, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v12, v24
+; CHECK-NEXT: bltu a0, a1, .LBB11_2
+; CHECK-NEXT: # %bb.1:
+; CHECK-NEXT: mv a0, a1
+; CHECK-NEXT: .LBB11_2:
+; CHECK-NEXT: addi a1, sp, 16
+; CHECK-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v24
+; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma
+; CHECK-NEXT: vfabs.v v24, v16
+; CHECK-NEXT: vmflt.vf v0, v24, fa5
+; CHECK-NEXT: fsrmi a0, 0
+; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t
+; CHECK-NEXT: fsrm a0
+; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu
+; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t
+; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: slli a0, a0, 3
+; CHECK-NEXT: add sp, sp, a0
+; CHECK-NEXT: addi sp, sp, 16
+; CHECK-NEXT: ret
+ %v = call <vscale x 32 x bfloat> @llvm.vp.roundeven.nxv32bf16(<vscale x 32 x bfloat> %va, <vscale x 32 x i1> splat (i1 true), i32 %evl)
+ ret <vscale x 32 x bfloat> %v
+}
declare <vscale x 1 x half> @llvm.vp.roundeven.nxv1f16(<vscale x 1 x half>, <vscale x 1 x i1>, i32)
define <vscale x 1 x half> @vp_roundeven_nxv1f16(<vscale x 1 x half> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) {
; ZVFH-LABEL: vp_roundeven_nxv1f16:
; ZVFH: # %bb.0:
-; ZVFH-NEXT: lui a1, %hi(.LCPI0_0)
-; ZVFH-NEXT: flh fa5, %lo(.LCPI0_0)(a1)
+; ZVFH-NEXT: lui a1, %hi(.LCPI12_0)
+; ZVFH-NEXT: flh fa5, %lo(.LCPI12_0)(a1)
; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
; ZVFH-NEXT: vfabs.v v9, v8, v0.t
; ZVFH-NEXT: vsetvli zero, zero, e16, mf4, ta, mu
@@ -55,8 +463,8 @@ define <vscale x 1 x half> @vp_roundeven_nxv1f16(<vscale x 1 x half> %va, <vscal
define <vscale x 1 x half> @vp_roundeven_nxv1f16_unmasked(<vscale x 1 x half> %va, i32 zeroext %evl) {
; ZVFH-LABEL: vp_roundeven_nxv1f16_unmasked:
; ZVFH: # %bb.0:
-; ZVFH-NEXT: lui a1, %hi(.LCPI1_0)
-; ZVFH-NEXT: flh fa5, %lo(.LCPI1_0)(a1)
+; ZVFH-NEXT: lui a1, %hi(.LCPI13_0)
+; ZVFH-NEXT: flh fa5, %lo(.LCPI13_0)(a1)
; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
; ZVFH-NEXT: vfabs.v v9, v8
; ZVFH-NEXT: vmflt.vf v0, v9, fa5
@@ -95,8 +503,8 @@ declare <vscale x 2 x half> @llvm.vp.roundeven.nxv2f16(<vscale x 2 x half>, <vsc
define <vscale x 2 x half> @vp_roundeven_nxv2f16(<vscale x 2 x half> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) {
; ZVFH-LABEL: vp_roundeven_nxv2f16:
; ZVFH: # %bb.0:
-; ZVFH-NEXT: lui a1, %hi(.LCPI2_0)
-; ZVFH-NEXT: flh fa5, %lo(.LCPI2_0)(a1)
+; ZVFH-NEXT: lui a1, %hi(.LCPI14_0)
+; ZVFH-NEXT: flh fa5, %lo(.LCPI14_0)(a1)
; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
; ZVFH-NEXT: vfabs.v v9, v8, v0.t
; ZVFH-NEXT: vsetvli zero, zero, e16, mf2, ta, mu
@@ -137,8 +545,8 @@ define <vscale x 2 x half> @vp_roundeven_nxv2f16(<vscale x 2 x half> %va, <vscal
define <vscale x 2 x half> @vp_roundeven_nxv2f16_unmasked(<vscale x 2 x half> %va, i32 zeroext %evl) {
; ZVFH-LABEL: vp_roundeven_nxv2f16_unmasked:
; ZVFH: # %bb.0:
-; ZVFH-NEXT: lui a1, %hi(.LCPI3_0)
-; ZVFH-NEXT: flh fa5, %lo(.LCPI3_0)(a1)
+; ZVFH-NEXT: lui a1, %hi(.LCPI15_0)
+; ZVFH-NEXT: flh fa5, %lo(.LCPI15_0)(a1)
; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
; ZVFH-NEXT: vfabs.v v9, v8
; ZVFH-NEXT: vmflt.vf v0, v9, fa5
@@ -177,8 +585,8 @@ declare <vscale x 4 x half> @llvm.vp.roundeven.nxv4f16(<vscale x 4 x half>, <vsc
define <vscale x 4 x half> @vp_roundeven_nxv4f16(<vscale x 4 x half> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) {
; ZVFH-LABEL: vp_roundeven_nxv4f16:
; ZVFH: # %bb.0:
-; ZVFH-NEXT: lui a1, %hi(.LCPI4_0)
-; ZVFH-NEXT: flh fa5, %lo(.LCPI4_0)(a1)
+; ZVFH-NEXT: lui a1, %hi(.LCPI16_0)
+; ZVFH-NEXT: flh fa5, %lo(.LCPI16_0)(a1)
; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma
; ZVFH-NEXT: vfabs.v v9, v8, v0.t
; ZVFH-NEXT: vsetvli zero, zero, e16, m1, ta, mu
@@ -221,8 +629,8 @@ define <vscale x 4 x half> @vp_roundeven_nxv4f16(<vscale x 4 x half> %va, <vscal
define <vscale x 4 x half> @vp_roundeven_nxv4f16_unmasked(<vscale x 4 x half> %va, i32 zeroext %evl) {
; ZVFH-LABEL: vp_roundeven_nxv4f16_unmasked:
; ZVFH: # %bb.0:
-; ZVFH-NEXT: lui a1, %hi(.LCPI5_0)
-; ZVFH-NEXT: flh fa5, %lo(.LCPI5_0)(a1)
+; ZVFH-NEXT: lui a1, %hi(.LCPI17_0)
+; ZVFH-NEXT: flh fa5, %lo(.LCPI17_0)(a1)
; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma
; ZVFH-NEXT: vfabs.v v9, v8
; ZVFH-NEXT: vmflt.vf v0, v9, fa5
@@ -261,8 +669,8 @@ declare <vscale x 8 x half> @llvm.vp.roundeven.nxv8f16(<vscale x 8 x half>, <vsc
define <vscale x 8 x half> @vp_roundeven_nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) {
; ZVFH-LABEL: vp_roundeven_nxv8f16:
; ZVFH: # %bb.0:
-; ZVFH-NEXT: lui a1, %hi(.LCPI6_0)
-; ZVFH-NEXT: flh fa5, %lo(.LCPI6_0)(a1)
+; ZVFH-NEXT: lui a1, %hi(.LCPI18_0)
+; ZVFH-NEXT: flh fa5, %lo(.LCPI18_0)(a1)
; ZVFH-NEXT: vmv1r.v v10, v0
; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma
; ZVFH-NEXT: vfabs.v v12, v8, v0.t
@@ -307,8 +715,8 @@ define <vscale x 8 x half> @vp_roundeven_nxv8f16(<vscale x 8 x half> %va, <vscal
define <vscale x 8 x half> @vp_roundeven_nxv8f16_unmasked(<vscale x 8 x half> %va, i32 zeroext %evl) {
; ZVFH-LABEL: vp_roundeven_nxv8f16_unmasked:
; ZVFH: # %bb.0:
-; ZVFH-NEXT: lui a1, %hi(.LCPI7_0)
-; ZVFH-NEXT: flh fa5, %lo(.LCPI7_0)(a1)
+; ZVFH-NEXT: lui a1, %hi(.LCPI19_0)
+; ZVFH-NEXT: flh fa5, %lo(.LCPI19_0)(a1)
; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma
; ZVFH-NEXT: vfabs.v v10, v8
; ZVFH-NEXT: vmflt.vf v0, v10, fa5
@@ -347,8 +755,8 @@ declare <vscale x 16 x half> @llvm.vp.roundeven.nxv16f16(<vscale x 16 x half>, <
define <vscale x 16 x half> @vp_roundeven_nxv16f16(<vscale x 16 x half> %va, <vscale x 16 x i1> %m, i32 zeroext %evl) {
; ZVFH-LABEL: vp_roundeven_nxv16f16:
; ZVFH: # %bb.0:
-; ZVFH-NEXT: lui a1, %hi(.LCPI8_0)
-; ZVFH-NEXT: flh fa5, %lo(.LCPI8_0)(a1)
+; ZVFH-NEXT: lui a1, %hi(.LCPI20_0)
+; ZVFH-NEXT: flh fa5, %lo(.LCPI20_0)(a1)
; ZVFH-NEXT: vmv1r.v v12, v0
; ZVFH-NEXT: vsetvli zero, a0, e16, m4, ta, ma
; ZVFH-NEXT: vfabs.v v16, v8, v0.t
@@ -393,8 +801,8 @@ define <vscale x 16 x half> @vp_roundeven_nxv16f16(<vscale x 16 x half> %va, <vs
define <vscale x 16 x half> @vp_roundeven_nxv16f16_unmasked(<vscale x 16 x half> %va, i32 zeroext %evl) {
; ZVFH-LABEL: vp_roundeven_nxv16f16_unmasked:
; ZVFH: # %bb.0:
-; ZVFH-NEXT: lui a1, %hi(.LCPI9_0)
-; ZVFH-NEXT: flh fa5, %lo(.LCPI9_0)(a1)
+; ZVFH-NEXT: lui a1, %hi(.LCPI21_0)
+; ZVFH-NEXT: flh fa5, %lo(.LCPI21_0)(a1)
; ZVFH-NEXT: vsetvli zero, a0, e16, m4, ta, ma
; ZVFH-NEXT: vfabs.v v12, v8
; ZVFH-NEXT: vmflt.vf v0, v12, fa5
@@ -433,8 +841,8 @@ declare <vscale x 32 x half> @llvm.vp.roundeven.nxv32f16(<vscale x 32 x half>, <
define <vscale x 32 x half> @vp_roundeven_nxv32f16(<vscale x 32 x half> %va, <vscale x 32 x i1> %m, i32 zeroext %evl) {
; ZVFH-LABEL: vp_roundeven_nxv32f16:
; ZVFH: # %bb.0:
-; ZVFH-NEXT: lui a1, %hi(.LCPI10_0)
-; ZVFH-NEXT: flh fa5, %lo(.LCPI10_0)(a1)
+; ZVFH-NEXT: lui a1, %hi(.LCPI22_0)
+; ZVFH-NEXT: flh fa5, %lo(.LCPI22_0)(a1)
; ZVFH-NEXT: vmv1r.v v16, v0
; ZVFH-NEXT: vsetvli zero, a0, e16, m8, ta, ma
; ZVFH-NEXT: vfabs.v v24, v8, v0.t
@@ -489,10 +897,10 @@ define <vscale x 32 x half> @vp_roundeven_nxv32f16(<vscale x 32 x half> %va, <vs
; ZVFHMIN-NEXT: vfsgnj.vv v24, v8, v24, v0.t
; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v24
-; ZVFHMIN-NEXT: bltu a0, a1, .LBB10_2
+; ZVFHMIN-NEXT: bltu a0, a1, .LBB22_2
; ZVFHMIN-NEXT: # %bb.1:
; ZVFHMIN-NEXT: mv a0, a1
-; ZVFHMIN-NEXT: .LBB10_2:
+; ZVFHMIN-NEXT: .LBB22_2:
; ZVFHMIN-NEXT: addi a1, sp, 16
; ZVFHMIN-NEXT: vl8r.v v0, (a1) # Unknown-size Folded Reload
; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v0
@@ -531,8 +939,8 @@ define <vscale x 32 x half> @vp_roundeven_nxv32f16(<vscale x 32 x half> %va, <vs
define <vscale x 32 x half> @vp_roundeven_nxv32f16_unmasked(<vscale x 32 x half> %va, i32 zeroext %evl) {
; ZVFH-LABEL: vp_roundeven_nxv32f16_unmasked:
; ZVFH: # %bb.0:
-; ZVFH-NEXT: lui a1, %hi(.LCPI11_0)
-; ZVFH-NEXT: flh fa5, %lo(.LCPI11_0)(a1)
+; ZVFH-NEXT: lui a1, %hi(.LCPI23_0)
+; ZVFH-NEXT: flh fa5, %lo(.LCPI23_0)(a1)
; ZVFH-NEXT: vsetvli zero, a0, e16, m8, ta, ma
; ZVFH-NEXT: vfabs.v v16, v8
; ZVFH-NEXT: vmflt.vf v0, v16, fa5
@@ -584,10 +992,10 @@ define <vscale x 32 x half> @vp_roundeven_nxv32f16_unmasked(<vscale x 32 x half>
; ZVFHMIN-NEXT: vfsgnj.vv v24, v8, v24, v0.t
; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v24
-; ZVFHMIN-NEXT: bltu a0, a1, .LBB11_2
+; ZVFHMIN-NEXT: bltu a0, a1, .LBB23_2
; ZVFHMIN-NEXT: # %bb.1:
; ZVFHMIN-NEXT: mv a0, a1
-; ZVFHMIN-NEXT: .LBB11_2:
+; ZVFHMIN-NEXT: .LBB23_2:
; ZVFHMIN-NEXT: addi a1, sp, 16
; ZVFHMIN-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload
; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v24
@@ -832,8 +1240,8 @@ declare <vscale x 1 x double> @llvm.vp.roundeven.nxv1f64(<vscale x 1 x double>,
define <vscale x 1 x double> @vp_roundeven_nxv1f64(<vscale x 1 x double> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_roundeven_nxv1f64:
; CHECK: # %bb.0:
-; CHECK-NEXT: lui a1, %hi(.LCPI22_0)
-; CHECK-NEXT: fld fa5, %lo(.LCPI22_0)(a1)
+; CHECK-NEXT: lui a1, %hi(.LCPI34_0)
+; CHECK-NEXT: fld fa5, %lo(.LCPI34_0)(a1)
; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma
; CHECK-NEXT: vfabs.v v9, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu
@@ -853,8 +1261,8 @@ define <vscale x 1 x double> @vp_roundeven_nxv1f64(<vscale x 1 x double> %va, <v
define <vscale x 1 x double> @vp_roundeven_nxv1f64_unmasked(<vscale x 1 x double> %va, i32 zeroext %evl) {
; CHECK-LABEL: vp_roundeven_nxv1f64_unmasked:
; CHECK: # %bb.0:
-; CHECK-NEXT: lui a1, %hi(.LCPI23_0)
-; CHECK-NEXT: fld fa5, %lo(.LCPI23_0)(a1)
+; CHECK-NEXT: lui a1, %hi(.LCPI35_0)
+; CHECK-NEXT: fld fa5, %lo(.LCPI35_0)(a1)
; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma
; CHECK-NEXT: vfabs.v v9, v8
; CHECK-NEXT: vmflt.vf v0, v9, fa5
@@ -874,8 +1282,8 @@ declare <vscale x 2 x double> @llvm.vp.roundeven.nxv2f64(<vscale x 2 x double>,
define <vscale x 2 x double> @vp_roundeven_nxv2f64(<vscale x 2 x double> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_roundeven_nxv2f64:
; CHECK: # %bb.0:
-; CHECK-NEXT: lui a1, %hi(.LCPI24_0)
-; CHECK-NEXT: fld fa5, %lo(.LCPI24_0)(a1)
+; CHECK-NEXT: lui a1, %hi(.LCPI36_0)
+; CHECK-NEXT: fld fa5, %lo(.LCPI36_0)(a1)
; CHECK-NEXT: vmv1r.v v10, v0
; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma
; CHECK-NEXT: vfabs.v v12, v8, v0.t
@@ -897,8 +1305,8 @@ define <vscale x 2 x double> @vp_roundeven_nxv2f64(<vscale x 2 x double> %va, <v
define <vscale x 2 x double> @vp_roundeven_nxv2f64_unmasked(<vscale x 2 x double> %va, i32 zeroext %evl) {
; CHECK-LABEL: vp_roundeven_nxv2f64_unmasked:
; CHECK: # %bb.0:
-; CHECK-NEXT: lui a1, %hi(.LCPI25_0)
-; CHECK-NEXT: fld fa5, %lo(.LCPI25_0)(a1)
+; CHECK-NEXT: lui a1, %hi(.LCPI37_0)
+; CHECK-NEXT: fld fa5, %lo(.LCPI37_0)(a1)
; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma
; CHECK-NEXT: vfabs.v v10, v8
; CHECK-NEXT: vmflt.vf v0, v10, fa5
@@ -918,8 +1326,8 @@ declare <vscale x 4 x double> @llvm.vp.roundeven.nxv4f64(<vscale x 4 x double>,
define <vscale x 4 x double> @vp_roundeven_nxv4f64(<vscale x 4 x double> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_roundeven_nxv4f64:
; CHECK: # %bb.0:
-; CHECK-NEXT: lui a1, %hi(.LCPI26_0)
-; CHECK-NEXT: fld fa5, %lo(.LCPI26_0)(a1)
+; CHECK-NEXT: lui a1, %hi(.LCPI38_0)
+; CHECK-NEXT: fld fa5, %lo(.LCPI38_0)(a1)
; CHECK-NEXT: vmv1r.v v12, v0
; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma
; CHECK-NEXT: vfabs.v v16, v8, v0.t
@@ -941,8 +1349,8 @@ define <vscale x 4 x double> @vp_roundeven_nxv4f64(<vscale x 4 x double> %va, <v
define <vscale x 4 x double> @vp_roundeven_nxv4f64_unmasked(<vscale x 4 x double> %va, i32 zeroext %evl) {
; CHECK-LABEL: vp_roundeven_nxv4f64_unmasked:
; CHECK: # %bb.0:
-; CHECK-NEXT: lui a1, %hi(.LCPI27_0)
-; CHECK-NEXT: fld fa5, %lo(.LCPI27_0)(a1)
+; CHECK-NEXT: lui a1, %hi(.LCPI39_0)
+; CHECK-NEXT: fld fa5, %lo(.LCPI39_0)(a1)
; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma
; CHECK-NEXT: vfabs.v v12, v8
; CHECK-NEXT: vmflt.vf v0, v12, fa5
@@ -962,8 +1370,8 @@ declare <vscale x 7 x double> @llvm.vp.roundeven.nxv7f64(<vscale x 7 x double>,
define <vscale x 7 x double> @vp_roundeven_nxv7f64(<vscale x 7 x double> %va, <vscale x 7 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_roundeven_nxv7f64:
; CHECK: # %bb.0:
-; CHECK-NEXT: lui a1, %hi(.LCPI28_0)
-; CHECK-NEXT: fld fa5, %lo(.LCPI28_0)(a1)
+; CHECK-NEXT: lui a1, %hi(.LCPI40_0)
+; CHECK-NEXT: fld fa5, %lo(.LCPI40_0)(a1)
; CHECK-NEXT: vmv1r.v v16, v0
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; CHECK-NEXT: vfabs.v v24, v8, v0.t
@@ -985,8 +1393,8 @@ define <vscale x 7 x double> @vp_roundeven_nxv7f64(<vscale x 7 x double> %va, <v
define <vscale x 7 x double> @vp_roundeven_nxv7f64_unmasked(<vscale x 7 x double> %va, i32 zeroext %evl) {
; CHECK-LABEL: vp_roundeven_nxv7f64_unmasked:
; CHECK: # %bb.0:
-; CHECK-NEXT: lui a1, %hi(.LCPI29_0)
-; CHECK-NEXT: fld fa5, %lo(.LCPI29_0)(a1)
+; CHECK-NEXT: lui a1, %hi(.LCPI41_0)
+; CHECK-NEXT: fld fa5, %lo(.LCPI41_0)(a1)
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; CHECK-NEXT: vfabs.v v16, v8
; CHECK-NEXT: vmflt.vf v0, v16, fa5
@@ -1006,8 +1414,8 @@ declare <vscale x 8 x double> @llvm.vp.roundeven.nxv8f64(<vscale x 8 x double>,
define <vscale x 8 x double> @vp_roundeven_nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_roundeven_nxv8f64:
; CHECK: # %bb.0:
-; CHECK-NEXT: lui a1, %hi(.LCPI30_0)
-; CHECK-NEXT: fld fa5, %lo(.LCPI30_0)(a1)
+; CHECK-NEXT: lui a1, %hi(.LCPI42_0)
+; CHECK-NEXT: fld fa5, %lo(.LCPI42_0)(a1)
; CHECK-NEXT: vmv1r.v v16, v0
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; CHECK-NEXT: vfabs.v v24, v8, v0.t
@@ -1029,8 +1437,8 @@ define <vscale x 8 x double> @vp_roundeven_nxv8f64(<vscale x 8 x double> %va, <v
define <vscale x 8 x double> @vp_roundeven_nxv8f64_unmasked(<vscale x 8 x double> %va, i32 zeroext %evl) {
; CHECK-LABEL: vp_roundeven_nxv8f64_unmasked:
; CHECK: # %bb.0:
-; CHECK-NEXT: lui a1, %hi(.LCPI31_0)
-; CHECK-NEXT: fld fa5, %lo(.LCPI31_0)(a1)
+; CHECK-NEXT: lui a1, %hi(.LCPI43_0)
+; CHECK-NEXT: fld fa5, %lo(.LCPI43_0)(a1)
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; CHECK-NEXT: vfabs.v v16, v8
; CHECK-NEXT: vmflt.vf v0, v16, fa5
@@ -1063,8 +1471,8 @@ define <vscale x 16 x double> @vp_roundeven_nxv16f64(<vscale x 16 x double> %va,
; CHECK-NEXT: vsetvli a3, zero, e8, mf4, ta, ma
; CHECK-NEXT: vslidedown.vx v6, v0, a2
; CHECK-NEXT: sub a2, a0, a1
-; CHECK-NEXT: lui a3, %hi(.LCPI32_0)
-; CHECK-NEXT: fld fa5, %lo(.LCPI32_0)(a3)
+; CHECK-NEXT: lui a3, %hi(.LCPI44_0)
+; CHECK-NEXT: fld fa5, %lo(.LCPI44_0)(a3)
; CHECK-NEXT: sltu a3, a0, a2
; CHECK-NEXT: addi a3, a3, -1
; CHECK-NEXT: and a2, a3, a2
@@ -1085,10 +1493,10 @@ define <vscale x 16 x double> @vp_roundeven_nxv16f64(<vscale x 16 x double> %va,
; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t
-; CHECK-NEXT: bltu a0, a1, .LBB32_2
+; CHECK-NEXT: bltu a0, a1, .LBB44_2
; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: mv a0, a1
-; CHECK-NEXT: .LBB32_2:
+; CHECK-NEXT: .LBB44_2:
; CHECK-NEXT: vmv1r.v v0, v7
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; CHECK-NEXT: vfabs.v v24, v8, v0.t
@@ -1116,8 +1524,8 @@ define <vscale x 16 x double> @vp_roundeven_nxv16f64_unmasked(<vscale x 16 x dou
; CHECK: # %bb.0:
; CHECK-NEXT: csrr a1, vlenb
; CHECK-NEXT: sub a2, a0, a1
-; CHECK-NEXT: lui a3, %hi(.LCPI33_0)
-; CHECK-NEXT: fld fa5, %lo(.LCPI33_0)(a3)
+; CHECK-NEXT: lui a3, %hi(.LCPI45_0)
+; CHECK-NEXT: fld fa5, %lo(.LCPI45_0)(a3)
; CHECK-NEXT: sltu a3, a0, a2
; CHECK-NEXT: addi a3, a3, -1
; CHECK-NEXT: and a2, a3, a2
@@ -1130,10 +1538,10 @@ define <vscale x 16 x double> @vp_roundeven_nxv16f64_unmasked(<vscale x 16 x dou
; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t
-; CHECK-NEXT: bltu a0, a1, .LBB33_2
+; CHECK-NEXT: bltu a0, a1, .LBB45_2
; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: mv a0, a1
-; CHECK-NEXT: .LBB33_2:
+; CHECK-NEXT: .LBB45_2:
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; CHECK-NEXT: vfabs.v v24, v8
; CHECK-NEXT: vmflt.vf v0, v24, fa5
diff --git a/llvm/test/CodeGen/RISCV/rvv/roundtozero-vp.ll b/llvm/test/CodeGen/RISCV/rvv/roundtozero-vp.ll
index 1227e73a0243..11830c924867 100644
--- a/llvm/test/CodeGen/RISCV/rvv/roundtozero-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/roundtozero-vp.ll
@@ -1,20 +1,428 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v -target-abi=ilp32d \
-; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH
-; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -target-abi=lp64d \
-; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH
-; RUN: llc -mtriple=riscv32 -mattr=+d,+zfhmin,+zvfhmin,+v -target-abi=ilp32d \
-; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN
-; RUN: llc -mtriple=riscv64 -mattr=+d,+zfhmin,+zvfhmin,+v -target-abi=lp64d \
-; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+zfbfmin,+zvfbfmin,+v \
+; RUN: -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s \
+; RUN: --check-prefixes=CHECK,ZVFH
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+zfbfmin,+zvfbfmin,+v \
+; RUN: -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \
+; RUN: --check-prefixes=CHECK,ZVFH
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin,+v \
+; RUN: -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s \
+; RUN: --check-prefixes=CHECK,ZVFHMIN
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin,+v \
+; RUN: -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \
+; RUN: --check-prefixes=CHECK,ZVFHMIN
+declare <vscale x 1 x bfloat> @llvm.vp.roundtozero.nxv1bf16(<vscale x 1 x bfloat>, <vscale x 1 x i1>, i32)
+
+define <vscale x 1 x bfloat> @vp_roundtozero_nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vp_roundtozero_nxv1bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8
+; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-NEXT: vfabs.v v8, v9, v0.t
+; CHECK-NEXT: lui a0, 307200
+; CHECK-NEXT: fmv.w.x fa5, a0
+; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu
+; CHECK-NEXT: vmflt.vf v0, v8, fa5, v0.t
+; CHECK-NEXT: fsrmi a0, 1
+; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
+; CHECK-NEXT: vfcvt.x.f.v v8, v9, v0.t
+; CHECK-NEXT: fsrm a0
+; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu
+; CHECK-NEXT: vfsgnj.vv v9, v8, v9, v0.t
+; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9
+; CHECK-NEXT: ret
+ %v = call <vscale x 1 x bfloat> @llvm.vp.roundtozero.nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x i1> %m, i32 %evl)
+ ret <vscale x 1 x bfloat> %v
+}
+
+define <vscale x 1 x bfloat> @vp_roundtozero_nxv1bf16_unmasked(<vscale x 1 x bfloat> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vp_roundtozero_nxv1bf16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8
+; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-NEXT: vfabs.v v8, v9
+; CHECK-NEXT: lui a0, 307200
+; CHECK-NEXT: fmv.w.x fa5, a0
+; CHECK-NEXT: vmflt.vf v0, v8, fa5
+; CHECK-NEXT: fsrmi a0, 1
+; CHECK-NEXT: vfcvt.x.f.v v8, v9, v0.t
+; CHECK-NEXT: fsrm a0
+; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu
+; CHECK-NEXT: vfsgnj.vv v9, v8, v9, v0.t
+; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9
+; CHECK-NEXT: ret
+ %v = call <vscale x 1 x bfloat> @llvm.vp.roundtozero.nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x i1> splat (i1 true), i32 %evl)
+ ret <vscale x 1 x bfloat> %v
+}
+
+declare <vscale x 2 x bfloat> @llvm.vp.roundtozero.nxv2bf16(<vscale x 2 x bfloat>, <vscale x 2 x i1>, i32)
+
+define <vscale x 2 x bfloat> @vp_roundtozero_nxv2bf16(<vscale x 2 x bfloat> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vp_roundtozero_nxv2bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, zero, e16, mf2, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8
+; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-NEXT: vfabs.v v8, v9, v0.t
+; CHECK-NEXT: lui a0, 307200
+; CHECK-NEXT: fmv.w.x fa5, a0
+; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu
+; CHECK-NEXT: vmflt.vf v0, v8, fa5, v0.t
+; CHECK-NEXT: fsrmi a0, 1
+; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; CHECK-NEXT: vfcvt.x.f.v v8, v9, v0.t
+; CHECK-NEXT: fsrm a0
+; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu
+; CHECK-NEXT: vfsgnj.vv v9, v8, v9, v0.t
+; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9
+; CHECK-NEXT: ret
+ %v = call <vscale x 2 x bfloat> @llvm.vp.roundtozero.nxv2bf16(<vscale x 2 x bfloat> %va, <vscale x 2 x i1> %m, i32 %evl)
+ ret <vscale x 2 x bfloat> %v
+}
+
+define <vscale x 2 x bfloat> @vp_roundtozero_nxv2bf16_unmasked(<vscale x 2 x bfloat> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vp_roundtozero_nxv2bf16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, zero, e16, mf2, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8
+; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-NEXT: vfabs.v v8, v9
+; CHECK-NEXT: lui a0, 307200
+; CHECK-NEXT: fmv.w.x fa5, a0
+; CHECK-NEXT: vmflt.vf v0, v8, fa5
+; CHECK-NEXT: fsrmi a0, 1
+; CHECK-NEXT: vfcvt.x.f.v v8, v9, v0.t
+; CHECK-NEXT: fsrm a0
+; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu
+; CHECK-NEXT: vfsgnj.vv v9, v8, v9, v0.t
+; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9
+; CHECK-NEXT: ret
+ %v = call <vscale x 2 x bfloat> @llvm.vp.roundtozero.nxv2bf16(<vscale x 2 x bfloat> %va, <vscale x 2 x i1> splat (i1 true), i32 %evl)
+ ret <vscale x 2 x bfloat> %v
+}
+
+declare <vscale x 4 x bfloat> @llvm.vp.roundtozero.nxv4bf16(<vscale x 4 x bfloat>, <vscale x 4 x i1>, i32)
+
+define <vscale x 4 x bfloat> @vp_roundtozero_nxv4bf16(<vscale x 4 x bfloat> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vp_roundtozero_nxv4bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vmv1r.v v9, v0
+; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8
+; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; CHECK-NEXT: vfabs.v v12, v10, v0.t
+; CHECK-NEXT: lui a0, 307200
+; CHECK-NEXT: fmv.w.x fa5, a0
+; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu
+; CHECK-NEXT: vmflt.vf v9, v12, fa5, v0.t
+; CHECK-NEXT: fsrmi a0, 1
+; CHECK-NEXT: vmv1r.v v0, v9
+; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; CHECK-NEXT: vfcvt.x.f.v v12, v10, v0.t
+; CHECK-NEXT: fsrm a0
+; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu
+; CHECK-NEXT: vfsgnj.vv v10, v12, v10, v0.t
+; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10
+; CHECK-NEXT: ret
+ %v = call <vscale x 4 x bfloat> @llvm.vp.roundtozero.nxv4bf16(<vscale x 4 x bfloat> %va, <vscale x 4 x i1> %m, i32 %evl)
+ ret <vscale x 4 x bfloat> %v
+}
+
+define <vscale x 4 x bfloat> @vp_roundtozero_nxv4bf16_unmasked(<vscale x 4 x bfloat> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vp_roundtozero_nxv4bf16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8
+; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; CHECK-NEXT: vfabs.v v8, v10
+; CHECK-NEXT: lui a0, 307200
+; CHECK-NEXT: fmv.w.x fa5, a0
+; CHECK-NEXT: vmflt.vf v0, v8, fa5
+; CHECK-NEXT: fsrmi a0, 1
+; CHECK-NEXT: vfcvt.x.f.v v8, v10, v0.t
+; CHECK-NEXT: fsrm a0
+; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu
+; CHECK-NEXT: vfsgnj.vv v10, v8, v10, v0.t
+; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10
+; CHECK-NEXT: ret
+ %v = call <vscale x 4 x bfloat> @llvm.vp.roundtozero.nxv4bf16(<vscale x 4 x bfloat> %va, <vscale x 4 x i1> splat (i1 true), i32 %evl)
+ ret <vscale x 4 x bfloat> %v
+}
+
+declare <vscale x 8 x bfloat> @llvm.vp.roundtozero.nxv8bf16(<vscale x 8 x bfloat>, <vscale x 8 x i1>, i32)
+
+define <vscale x 8 x bfloat> @vp_roundtozero_nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vp_roundtozero_nxv8bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vmv1r.v v10, v0
+; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8
+; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-NEXT: vfabs.v v16, v12, v0.t
+; CHECK-NEXT: lui a0, 307200
+; CHECK-NEXT: fmv.w.x fa5, a0
+; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu
+; CHECK-NEXT: vmflt.vf v10, v16, fa5, v0.t
+; CHECK-NEXT: fsrmi a0, 1
+; CHECK-NEXT: vmv1r.v v0, v10
+; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; CHECK-NEXT: vfcvt.x.f.v v16, v12, v0.t
+; CHECK-NEXT: fsrm a0
+; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu
+; CHECK-NEXT: vfsgnj.vv v12, v16, v12, v0.t
+; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12
+; CHECK-NEXT: ret
+ %v = call <vscale x 8 x bfloat> @llvm.vp.roundtozero.nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x i1> %m, i32 %evl)
+ ret <vscale x 8 x bfloat> %v
+}
+
+define <vscale x 8 x bfloat> @vp_roundtozero_nxv8bf16_unmasked(<vscale x 8 x bfloat> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vp_roundtozero_nxv8bf16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8
+; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-NEXT: vfabs.v v8, v12
+; CHECK-NEXT: lui a0, 307200
+; CHECK-NEXT: fmv.w.x fa5, a0
+; CHECK-NEXT: vmflt.vf v0, v8, fa5
+; CHECK-NEXT: fsrmi a0, 1
+; CHECK-NEXT: vfcvt.x.f.v v8, v12, v0.t
+; CHECK-NEXT: fsrm a0
+; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu
+; CHECK-NEXT: vfsgnj.vv v12, v8, v12, v0.t
+; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12
+; CHECK-NEXT: ret
+ %v = call <vscale x 8 x bfloat> @llvm.vp.roundtozero.nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x i1> splat (i1 true), i32 %evl)
+ ret <vscale x 8 x bfloat> %v
+}
+
+declare <vscale x 16 x bfloat> @llvm.vp.roundtozero.nxv16bf16(<vscale x 16 x bfloat>, <vscale x 16 x i1>, i32)
+
+define <vscale x 16 x bfloat> @vp_roundtozero_nxv16bf16(<vscale x 16 x bfloat> %va, <vscale x 16 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vp_roundtozero_nxv16bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vmv1r.v v12, v0
+; CHECK-NEXT: vsetvli a1, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8
+; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma
+; CHECK-NEXT: vfabs.v v24, v16, v0.t
+; CHECK-NEXT: lui a0, 307200
+; CHECK-NEXT: fmv.w.x fa5, a0
+; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu
+; CHECK-NEXT: vmflt.vf v12, v24, fa5, v0.t
+; CHECK-NEXT: fsrmi a0, 1
+; CHECK-NEXT: vmv1r.v v0, v12
+; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t
+; CHECK-NEXT: fsrm a0
+; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu
+; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t
+; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16
+; CHECK-NEXT: ret
+ %v = call <vscale x 16 x bfloat> @llvm.vp.roundtozero.nxv16bf16(<vscale x 16 x bfloat> %va, <vscale x 16 x i1> %m, i32 %evl)
+ ret <vscale x 16 x bfloat> %v
+}
+
+define <vscale x 16 x bfloat> @vp_roundtozero_nxv16bf16_unmasked(<vscale x 16 x bfloat> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vp_roundtozero_nxv16bf16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8
+; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma
+; CHECK-NEXT: vfabs.v v8, v16
+; CHECK-NEXT: lui a0, 307200
+; CHECK-NEXT: fmv.w.x fa5, a0
+; CHECK-NEXT: vmflt.vf v0, v8, fa5
+; CHECK-NEXT: fsrmi a0, 1
+; CHECK-NEXT: vfcvt.x.f.v v8, v16, v0.t
+; CHECK-NEXT: fsrm a0
+; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu
+; CHECK-NEXT: vfsgnj.vv v16, v8, v16, v0.t
+; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16
+; CHECK-NEXT: ret
+ %v = call <vscale x 16 x bfloat> @llvm.vp.roundtozero.nxv16bf16(<vscale x 16 x bfloat> %va, <vscale x 16 x i1> splat (i1 true), i32 %evl)
+ ret <vscale x 16 x bfloat> %v
+}
+
+declare <vscale x 32 x bfloat> @llvm.vp.roundtozero.nxv32bf16(<vscale x 32 x bfloat>, <vscale x 32 x i1>, i32)
+
+define <vscale x 32 x bfloat> @vp_roundtozero_nxv32bf16(<vscale x 32 x bfloat> %va, <vscale x 32 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vp_roundtozero_nxv32bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi sp, sp, -16
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: csrr a1, vlenb
+; CHECK-NEXT: slli a1, a1, 3
+; CHECK-NEXT: sub sp, sp, a1
+; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
+; CHECK-NEXT: csrr a2, vlenb
+; CHECK-NEXT: slli a1, a2, 1
+; CHECK-NEXT: sub a3, a0, a1
+; CHECK-NEXT: sltu a4, a0, a3
+; CHECK-NEXT: addi a4, a4, -1
+; CHECK-NEXT: and a3, a4, a3
+; CHECK-NEXT: srli a2, a2, 2
+; CHECK-NEXT: vmv1r.v v16, v0
+; CHECK-NEXT: vsetvli a4, zero, e8, mf2, ta, ma
+; CHECK-NEXT: vslidedown.vx v17, v0, a2
+; CHECK-NEXT: addi a2, sp, 16
+; CHECK-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill
+; CHECK-NEXT: vsetvli a2, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v12
+; CHECK-NEXT: vmv1r.v v0, v17
+; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, ma
+; CHECK-NEXT: vfabs.v v8, v24, v0.t
+; CHECK-NEXT: lui a2, 307200
+; CHECK-NEXT: fmv.w.x fa5, a2
+; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu
+; CHECK-NEXT: vmflt.vf v17, v8, fa5, v0.t
+; CHECK-NEXT: fsrmi a2, 1
+; CHECK-NEXT: vmv1r.v v0, v17
+; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; CHECK-NEXT: vfcvt.x.f.v v8, v24, v0.t
+; CHECK-NEXT: fsrm a2
+; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu
+; CHECK-NEXT: vfsgnj.vv v24, v8, v24, v0.t
+; CHECK-NEXT: vsetvli a2, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v12, v24
+; CHECK-NEXT: bltu a0, a1, .LBB10_2
+; CHECK-NEXT: # %bb.1:
+; CHECK-NEXT: mv a0, a1
+; CHECK-NEXT: .LBB10_2:
+; CHECK-NEXT: addi a1, sp, 16
+; CHECK-NEXT: vl8r.v v0, (a1) # Unknown-size Folded Reload
+; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v0
+; CHECK-NEXT: vmv1r.v v8, v16
+; CHECK-NEXT: vmv1r.v v0, v16
+; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma
+; CHECK-NEXT: vfabs.v v16, v24, v0.t
+; CHECK-NEXT: addi a0, sp, 16
+; CHECK-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill
+; CHECK-NEXT: vmv1r.v v0, v8
+; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
+; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu
+; CHECK-NEXT: vmflt.vf v8, v16, fa5, v0.t
+; CHECK-NEXT: fsrmi a0, 1
+; CHECK-NEXT: vmv1r.v v0, v8
+; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; CHECK-NEXT: vfcvt.x.f.v v16, v24, v0.t
+; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill
+; CHECK-NEXT: fsrm a0
+; CHECK-NEXT: addi a0, sp, 16
+; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
+; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu
+; CHECK-NEXT: vfsgnj.vv v24, v16, v24, v0.t
+; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v24
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: slli a0, a0, 3
+; CHECK-NEXT: add sp, sp, a0
+; CHECK-NEXT: addi sp, sp, 16
+; CHECK-NEXT: ret
+ %v = call <vscale x 32 x bfloat> @llvm.vp.roundtozero.nxv32bf16(<vscale x 32 x bfloat> %va, <vscale x 32 x i1> %m, i32 %evl)
+ ret <vscale x 32 x bfloat> %v
+}
+
+define <vscale x 32 x bfloat> @vp_roundtozero_nxv32bf16_unmasked(<vscale x 32 x bfloat> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vp_roundtozero_nxv32bf16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi sp, sp, -16
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: csrr a1, vlenb
+; CHECK-NEXT: slli a1, a1, 3
+; CHECK-NEXT: sub sp, sp, a1
+; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
+; CHECK-NEXT: csrr a2, vlenb
+; CHECK-NEXT: slli a1, a2, 1
+; CHECK-NEXT: sub a3, a0, a1
+; CHECK-NEXT: sltu a4, a0, a3
+; CHECK-NEXT: addi a4, a4, -1
+; CHECK-NEXT: and a3, a4, a3
+; CHECK-NEXT: srli a2, a2, 2
+; CHECK-NEXT: vsetvli a4, zero, e8, m4, ta, ma
+; CHECK-NEXT: vmset.m v16
+; CHECK-NEXT: vsetvli a4, zero, e8, mf2, ta, ma
+; CHECK-NEXT: vslidedown.vx v16, v16, a2
+; CHECK-NEXT: addi a2, sp, 16
+; CHECK-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill
+; CHECK-NEXT: vsetvli a2, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v12
+; CHECK-NEXT: vmv1r.v v0, v16
+; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, ma
+; CHECK-NEXT: vfabs.v v8, v24, v0.t
+; CHECK-NEXT: lui a2, 307200
+; CHECK-NEXT: fmv.w.x fa5, a2
+; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu
+; CHECK-NEXT: vmflt.vf v16, v8, fa5, v0.t
+; CHECK-NEXT: fsrmi a2, 1
+; CHECK-NEXT: vmv1r.v v0, v16
+; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; CHECK-NEXT: vfcvt.x.f.v v8, v24, v0.t
+; CHECK-NEXT: fsrm a2
+; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu
+; CHECK-NEXT: vfsgnj.vv v24, v8, v24, v0.t
+; CHECK-NEXT: vsetvli a2, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v12, v24
+; CHECK-NEXT: bltu a0, a1, .LBB11_2
+; CHECK-NEXT: # %bb.1:
+; CHECK-NEXT: mv a0, a1
+; CHECK-NEXT: .LBB11_2:
+; CHECK-NEXT: addi a1, sp, 16
+; CHECK-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v24
+; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma
+; CHECK-NEXT: vfabs.v v24, v16
+; CHECK-NEXT: vmflt.vf v0, v24, fa5
+; CHECK-NEXT: fsrmi a0, 1
+; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t
+; CHECK-NEXT: fsrm a0
+; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu
+; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t
+; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: slli a0, a0, 3
+; CHECK-NEXT: add sp, sp, a0
+; CHECK-NEXT: addi sp, sp, 16
+; CHECK-NEXT: ret
+ %v = call <vscale x 32 x bfloat> @llvm.vp.roundtozero.nxv32bf16(<vscale x 32 x bfloat> %va, <vscale x 32 x i1> splat (i1 true), i32 %evl)
+ ret <vscale x 32 x bfloat> %v
+}
declare <vscale x 1 x half> @llvm.vp.roundtozero.nxv1f16(<vscale x 1 x half>, <vscale x 1 x i1>, i32)
define <vscale x 1 x half> @vp_roundtozero_nxv1f16(<vscale x 1 x half> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) {
; ZVFH-LABEL: vp_roundtozero_nxv1f16:
; ZVFH: # %bb.0:
-; ZVFH-NEXT: lui a1, %hi(.LCPI0_0)
-; ZVFH-NEXT: flh fa5, %lo(.LCPI0_0)(a1)
+; ZVFH-NEXT: lui a1, %hi(.LCPI12_0)
+; ZVFH-NEXT: flh fa5, %lo(.LCPI12_0)(a1)
; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
; ZVFH-NEXT: vfabs.v v9, v8, v0.t
; ZVFH-NEXT: vsetvli zero, zero, e16, mf4, ta, mu
@@ -55,8 +463,8 @@ define <vscale x 1 x half> @vp_roundtozero_nxv1f16(<vscale x 1 x half> %va, <vsc
define <vscale x 1 x half> @vp_roundtozero_nxv1f16_unmasked(<vscale x 1 x half> %va, i32 zeroext %evl) {
; ZVFH-LABEL: vp_roundtozero_nxv1f16_unmasked:
; ZVFH: # %bb.0:
-; ZVFH-NEXT: lui a1, %hi(.LCPI1_0)
-; ZVFH-NEXT: flh fa5, %lo(.LCPI1_0)(a1)
+; ZVFH-NEXT: lui a1, %hi(.LCPI13_0)
+; ZVFH-NEXT: flh fa5, %lo(.LCPI13_0)(a1)
; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
; ZVFH-NEXT: vfabs.v v9, v8
; ZVFH-NEXT: vmflt.vf v0, v9, fa5
@@ -95,8 +503,8 @@ declare <vscale x 2 x half> @llvm.vp.roundtozero.nxv2f16(<vscale x 2 x half>, <v
define <vscale x 2 x half> @vp_roundtozero_nxv2f16(<vscale x 2 x half> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) {
; ZVFH-LABEL: vp_roundtozero_nxv2f16:
; ZVFH: # %bb.0:
-; ZVFH-NEXT: lui a1, %hi(.LCPI2_0)
-; ZVFH-NEXT: flh fa5, %lo(.LCPI2_0)(a1)
+; ZVFH-NEXT: lui a1, %hi(.LCPI14_0)
+; ZVFH-NEXT: flh fa5, %lo(.LCPI14_0)(a1)
; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
; ZVFH-NEXT: vfabs.v v9, v8, v0.t
; ZVFH-NEXT: vsetvli zero, zero, e16, mf2, ta, mu
@@ -137,8 +545,8 @@ define <vscale x 2 x half> @vp_roundtozero_nxv2f16(<vscale x 2 x half> %va, <vsc
define <vscale x 2 x half> @vp_roundtozero_nxv2f16_unmasked(<vscale x 2 x half> %va, i32 zeroext %evl) {
; ZVFH-LABEL: vp_roundtozero_nxv2f16_unmasked:
; ZVFH: # %bb.0:
-; ZVFH-NEXT: lui a1, %hi(.LCPI3_0)
-; ZVFH-NEXT: flh fa5, %lo(.LCPI3_0)(a1)
+; ZVFH-NEXT: lui a1, %hi(.LCPI15_0)
+; ZVFH-NEXT: flh fa5, %lo(.LCPI15_0)(a1)
; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
; ZVFH-NEXT: vfabs.v v9, v8
; ZVFH-NEXT: vmflt.vf v0, v9, fa5
@@ -177,8 +585,8 @@ declare <vscale x 4 x half> @llvm.vp.roundtozero.nxv4f16(<vscale x 4 x half>, <v
define <vscale x 4 x half> @vp_roundtozero_nxv4f16(<vscale x 4 x half> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) {
; ZVFH-LABEL: vp_roundtozero_nxv4f16:
; ZVFH: # %bb.0:
-; ZVFH-NEXT: lui a1, %hi(.LCPI4_0)
-; ZVFH-NEXT: flh fa5, %lo(.LCPI4_0)(a1)
+; ZVFH-NEXT: lui a1, %hi(.LCPI16_0)
+; ZVFH-NEXT: flh fa5, %lo(.LCPI16_0)(a1)
; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma
; ZVFH-NEXT: vfabs.v v9, v8, v0.t
; ZVFH-NEXT: vsetvli zero, zero, e16, m1, ta, mu
@@ -221,8 +629,8 @@ define <vscale x 4 x half> @vp_roundtozero_nxv4f16(<vscale x 4 x half> %va, <vsc
define <vscale x 4 x half> @vp_roundtozero_nxv4f16_unmasked(<vscale x 4 x half> %va, i32 zeroext %evl) {
; ZVFH-LABEL: vp_roundtozero_nxv4f16_unmasked:
; ZVFH: # %bb.0:
-; ZVFH-NEXT: lui a1, %hi(.LCPI5_0)
-; ZVFH-NEXT: flh fa5, %lo(.LCPI5_0)(a1)
+; ZVFH-NEXT: lui a1, %hi(.LCPI17_0)
+; ZVFH-NEXT: flh fa5, %lo(.LCPI17_0)(a1)
; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma
; ZVFH-NEXT: vfabs.v v9, v8
; ZVFH-NEXT: vmflt.vf v0, v9, fa5
@@ -261,8 +669,8 @@ declare <vscale x 8 x half> @llvm.vp.roundtozero.nxv8f16(<vscale x 8 x half>, <v
define <vscale x 8 x half> @vp_roundtozero_nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) {
; ZVFH-LABEL: vp_roundtozero_nxv8f16:
; ZVFH: # %bb.0:
-; ZVFH-NEXT: lui a1, %hi(.LCPI6_0)
-; ZVFH-NEXT: flh fa5, %lo(.LCPI6_0)(a1)
+; ZVFH-NEXT: lui a1, %hi(.LCPI18_0)
+; ZVFH-NEXT: flh fa5, %lo(.LCPI18_0)(a1)
; ZVFH-NEXT: vmv1r.v v10, v0
; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma
; ZVFH-NEXT: vfabs.v v12, v8, v0.t
@@ -307,8 +715,8 @@ define <vscale x 8 x half> @vp_roundtozero_nxv8f16(<vscale x 8 x half> %va, <vsc
define <vscale x 8 x half> @vp_roundtozero_nxv8f16_unmasked(<vscale x 8 x half> %va, i32 zeroext %evl) {
; ZVFH-LABEL: vp_roundtozero_nxv8f16_unmasked:
; ZVFH: # %bb.0:
-; ZVFH-NEXT: lui a1, %hi(.LCPI7_0)
-; ZVFH-NEXT: flh fa5, %lo(.LCPI7_0)(a1)
+; ZVFH-NEXT: lui a1, %hi(.LCPI19_0)
+; ZVFH-NEXT: flh fa5, %lo(.LCPI19_0)(a1)
; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma
; ZVFH-NEXT: vfabs.v v10, v8
; ZVFH-NEXT: vmflt.vf v0, v10, fa5
@@ -347,8 +755,8 @@ declare <vscale x 16 x half> @llvm.vp.roundtozero.nxv16f16(<vscale x 16 x half>,
define <vscale x 16 x half> @vp_roundtozero_nxv16f16(<vscale x 16 x half> %va, <vscale x 16 x i1> %m, i32 zeroext %evl) {
; ZVFH-LABEL: vp_roundtozero_nxv16f16:
; ZVFH: # %bb.0:
-; ZVFH-NEXT: lui a1, %hi(.LCPI8_0)
-; ZVFH-NEXT: flh fa5, %lo(.LCPI8_0)(a1)
+; ZVFH-NEXT: lui a1, %hi(.LCPI20_0)
+; ZVFH-NEXT: flh fa5, %lo(.LCPI20_0)(a1)
; ZVFH-NEXT: vmv1r.v v12, v0
; ZVFH-NEXT: vsetvli zero, a0, e16, m4, ta, ma
; ZVFH-NEXT: vfabs.v v16, v8, v0.t
@@ -393,8 +801,8 @@ define <vscale x 16 x half> @vp_roundtozero_nxv16f16(<vscale x 16 x half> %va, <
define <vscale x 16 x half> @vp_roundtozero_nxv16f16_unmasked(<vscale x 16 x half> %va, i32 zeroext %evl) {
; ZVFH-LABEL: vp_roundtozero_nxv16f16_unmasked:
; ZVFH: # %bb.0:
-; ZVFH-NEXT: lui a1, %hi(.LCPI9_0)
-; ZVFH-NEXT: flh fa5, %lo(.LCPI9_0)(a1)
+; ZVFH-NEXT: lui a1, %hi(.LCPI21_0)
+; ZVFH-NEXT: flh fa5, %lo(.LCPI21_0)(a1)
; ZVFH-NEXT: vsetvli zero, a0, e16, m4, ta, ma
; ZVFH-NEXT: vfabs.v v12, v8
; ZVFH-NEXT: vmflt.vf v0, v12, fa5
@@ -433,8 +841,8 @@ declare <vscale x 32 x half> @llvm.vp.roundtozero.nxv32f16(<vscale x 32 x half>,
define <vscale x 32 x half> @vp_roundtozero_nxv32f16(<vscale x 32 x half> %va, <vscale x 32 x i1> %m, i32 zeroext %evl) {
; ZVFH-LABEL: vp_roundtozero_nxv32f16:
; ZVFH: # %bb.0:
-; ZVFH-NEXT: lui a1, %hi(.LCPI10_0)
-; ZVFH-NEXT: flh fa5, %lo(.LCPI10_0)(a1)
+; ZVFH-NEXT: lui a1, %hi(.LCPI22_0)
+; ZVFH-NEXT: flh fa5, %lo(.LCPI22_0)(a1)
; ZVFH-NEXT: vmv1r.v v16, v0
; ZVFH-NEXT: vsetvli zero, a0, e16, m8, ta, ma
; ZVFH-NEXT: vfabs.v v24, v8, v0.t
@@ -489,10 +897,10 @@ define <vscale x 32 x half> @vp_roundtozero_nxv32f16(<vscale x 32 x half> %va, <
; ZVFHMIN-NEXT: vfsgnj.vv v24, v8, v24, v0.t
; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v24
-; ZVFHMIN-NEXT: bltu a0, a1, .LBB10_2
+; ZVFHMIN-NEXT: bltu a0, a1, .LBB22_2
; ZVFHMIN-NEXT: # %bb.1:
; ZVFHMIN-NEXT: mv a0, a1
-; ZVFHMIN-NEXT: .LBB10_2:
+; ZVFHMIN-NEXT: .LBB22_2:
; ZVFHMIN-NEXT: addi a1, sp, 16
; ZVFHMIN-NEXT: vl8r.v v0, (a1) # Unknown-size Folded Reload
; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v0
@@ -531,8 +939,8 @@ define <vscale x 32 x half> @vp_roundtozero_nxv32f16(<vscale x 32 x half> %va, <
define <vscale x 32 x half> @vp_roundtozero_nxv32f16_unmasked(<vscale x 32 x half> %va, i32 zeroext %evl) {
; ZVFH-LABEL: vp_roundtozero_nxv32f16_unmasked:
; ZVFH: # %bb.0:
-; ZVFH-NEXT: lui a1, %hi(.LCPI11_0)
-; ZVFH-NEXT: flh fa5, %lo(.LCPI11_0)(a1)
+; ZVFH-NEXT: lui a1, %hi(.LCPI23_0)
+; ZVFH-NEXT: flh fa5, %lo(.LCPI23_0)(a1)
; ZVFH-NEXT: vsetvli zero, a0, e16, m8, ta, ma
; ZVFH-NEXT: vfabs.v v16, v8
; ZVFH-NEXT: vmflt.vf v0, v16, fa5
@@ -584,10 +992,10 @@ define <vscale x 32 x half> @vp_roundtozero_nxv32f16_unmasked(<vscale x 32 x hal
; ZVFHMIN-NEXT: vfsgnj.vv v24, v8, v24, v0.t
; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v24
-; ZVFHMIN-NEXT: bltu a0, a1, .LBB11_2
+; ZVFHMIN-NEXT: bltu a0, a1, .LBB23_2
; ZVFHMIN-NEXT: # %bb.1:
; ZVFHMIN-NEXT: mv a0, a1
-; ZVFHMIN-NEXT: .LBB11_2:
+; ZVFHMIN-NEXT: .LBB23_2:
; ZVFHMIN-NEXT: addi a1, sp, 16
; ZVFHMIN-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload
; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v24
@@ -832,8 +1240,8 @@ declare <vscale x 1 x double> @llvm.vp.roundtozero.nxv1f64(<vscale x 1 x double>
define <vscale x 1 x double> @vp_roundtozero_nxv1f64(<vscale x 1 x double> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_roundtozero_nxv1f64:
; CHECK: # %bb.0:
-; CHECK-NEXT: lui a1, %hi(.LCPI22_0)
-; CHECK-NEXT: fld fa5, %lo(.LCPI22_0)(a1)
+; CHECK-NEXT: lui a1, %hi(.LCPI34_0)
+; CHECK-NEXT: fld fa5, %lo(.LCPI34_0)(a1)
; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma
; CHECK-NEXT: vfabs.v v9, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu
@@ -853,8 +1261,8 @@ define <vscale x 1 x double> @vp_roundtozero_nxv1f64(<vscale x 1 x double> %va,
define <vscale x 1 x double> @vp_roundtozero_nxv1f64_unmasked(<vscale x 1 x double> %va, i32 zeroext %evl) {
; CHECK-LABEL: vp_roundtozero_nxv1f64_unmasked:
; CHECK: # %bb.0:
-; CHECK-NEXT: lui a1, %hi(.LCPI23_0)
-; CHECK-NEXT: fld fa5, %lo(.LCPI23_0)(a1)
+; CHECK-NEXT: lui a1, %hi(.LCPI35_0)
+; CHECK-NEXT: fld fa5, %lo(.LCPI35_0)(a1)
; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma
; CHECK-NEXT: vfabs.v v9, v8
; CHECK-NEXT: vmflt.vf v0, v9, fa5
@@ -874,8 +1282,8 @@ declare <vscale x 2 x double> @llvm.vp.roundtozero.nxv2f64(<vscale x 2 x double>
define <vscale x 2 x double> @vp_roundtozero_nxv2f64(<vscale x 2 x double> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_roundtozero_nxv2f64:
; CHECK: # %bb.0:
-; CHECK-NEXT: lui a1, %hi(.LCPI24_0)
-; CHECK-NEXT: fld fa5, %lo(.LCPI24_0)(a1)
+; CHECK-NEXT: lui a1, %hi(.LCPI36_0)
+; CHECK-NEXT: fld fa5, %lo(.LCPI36_0)(a1)
; CHECK-NEXT: vmv1r.v v10, v0
; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma
; CHECK-NEXT: vfabs.v v12, v8, v0.t
@@ -897,8 +1305,8 @@ define <vscale x 2 x double> @vp_roundtozero_nxv2f64(<vscale x 2 x double> %va,
define <vscale x 2 x double> @vp_roundtozero_nxv2f64_unmasked(<vscale x 2 x double> %va, i32 zeroext %evl) {
; CHECK-LABEL: vp_roundtozero_nxv2f64_unmasked:
; CHECK: # %bb.0:
-; CHECK-NEXT: lui a1, %hi(.LCPI25_0)
-; CHECK-NEXT: fld fa5, %lo(.LCPI25_0)(a1)
+; CHECK-NEXT: lui a1, %hi(.LCPI37_0)
+; CHECK-NEXT: fld fa5, %lo(.LCPI37_0)(a1)
; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma
; CHECK-NEXT: vfabs.v v10, v8
; CHECK-NEXT: vmflt.vf v0, v10, fa5
@@ -918,8 +1326,8 @@ declare <vscale x 4 x double> @llvm.vp.roundtozero.nxv4f64(<vscale x 4 x double>
define <vscale x 4 x double> @vp_roundtozero_nxv4f64(<vscale x 4 x double> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_roundtozero_nxv4f64:
; CHECK: # %bb.0:
-; CHECK-NEXT: lui a1, %hi(.LCPI26_0)
-; CHECK-NEXT: fld fa5, %lo(.LCPI26_0)(a1)
+; CHECK-NEXT: lui a1, %hi(.LCPI38_0)
+; CHECK-NEXT: fld fa5, %lo(.LCPI38_0)(a1)
; CHECK-NEXT: vmv1r.v v12, v0
; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma
; CHECK-NEXT: vfabs.v v16, v8, v0.t
@@ -941,8 +1349,8 @@ define <vscale x 4 x double> @vp_roundtozero_nxv4f64(<vscale x 4 x double> %va,
define <vscale x 4 x double> @vp_roundtozero_nxv4f64_unmasked(<vscale x 4 x double> %va, i32 zeroext %evl) {
; CHECK-LABEL: vp_roundtozero_nxv4f64_unmasked:
; CHECK: # %bb.0:
-; CHECK-NEXT: lui a1, %hi(.LCPI27_0)
-; CHECK-NEXT: fld fa5, %lo(.LCPI27_0)(a1)
+; CHECK-NEXT: lui a1, %hi(.LCPI39_0)
+; CHECK-NEXT: fld fa5, %lo(.LCPI39_0)(a1)
; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma
; CHECK-NEXT: vfabs.v v12, v8
; CHECK-NEXT: vmflt.vf v0, v12, fa5
@@ -962,8 +1370,8 @@ declare <vscale x 7 x double> @llvm.vp.roundtozero.nxv7f64(<vscale x 7 x double>
define <vscale x 7 x double> @vp_roundtozero_nxv7f64(<vscale x 7 x double> %va, <vscale x 7 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_roundtozero_nxv7f64:
; CHECK: # %bb.0:
-; CHECK-NEXT: lui a1, %hi(.LCPI28_0)
-; CHECK-NEXT: fld fa5, %lo(.LCPI28_0)(a1)
+; CHECK-NEXT: lui a1, %hi(.LCPI40_0)
+; CHECK-NEXT: fld fa5, %lo(.LCPI40_0)(a1)
; CHECK-NEXT: vmv1r.v v16, v0
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; CHECK-NEXT: vfabs.v v24, v8, v0.t
@@ -985,8 +1393,8 @@ define <vscale x 7 x double> @vp_roundtozero_nxv7f64(<vscale x 7 x double> %va,
define <vscale x 7 x double> @vp_roundtozero_nxv7f64_unmasked(<vscale x 7 x double> %va, i32 zeroext %evl) {
; CHECK-LABEL: vp_roundtozero_nxv7f64_unmasked:
; CHECK: # %bb.0:
-; CHECK-NEXT: lui a1, %hi(.LCPI29_0)
-; CHECK-NEXT: fld fa5, %lo(.LCPI29_0)(a1)
+; CHECK-NEXT: lui a1, %hi(.LCPI41_0)
+; CHECK-NEXT: fld fa5, %lo(.LCPI41_0)(a1)
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; CHECK-NEXT: vfabs.v v16, v8
; CHECK-NEXT: vmflt.vf v0, v16, fa5
@@ -1006,8 +1414,8 @@ declare <vscale x 8 x double> @llvm.vp.roundtozero.nxv8f64(<vscale x 8 x double>
define <vscale x 8 x double> @vp_roundtozero_nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_roundtozero_nxv8f64:
; CHECK: # %bb.0:
-; CHECK-NEXT: lui a1, %hi(.LCPI30_0)
-; CHECK-NEXT: fld fa5, %lo(.LCPI30_0)(a1)
+; CHECK-NEXT: lui a1, %hi(.LCPI42_0)
+; CHECK-NEXT: fld fa5, %lo(.LCPI42_0)(a1)
; CHECK-NEXT: vmv1r.v v16, v0
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; CHECK-NEXT: vfabs.v v24, v8, v0.t
@@ -1029,8 +1437,8 @@ define <vscale x 8 x double> @vp_roundtozero_nxv8f64(<vscale x 8 x double> %va,
define <vscale x 8 x double> @vp_roundtozero_nxv8f64_unmasked(<vscale x 8 x double> %va, i32 zeroext %evl) {
; CHECK-LABEL: vp_roundtozero_nxv8f64_unmasked:
; CHECK: # %bb.0:
-; CHECK-NEXT: lui a1, %hi(.LCPI31_0)
-; CHECK-NEXT: fld fa5, %lo(.LCPI31_0)(a1)
+; CHECK-NEXT: lui a1, %hi(.LCPI43_0)
+; CHECK-NEXT: fld fa5, %lo(.LCPI43_0)(a1)
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; CHECK-NEXT: vfabs.v v16, v8
; CHECK-NEXT: vmflt.vf v0, v16, fa5
@@ -1063,8 +1471,8 @@ define <vscale x 16 x double> @vp_roundtozero_nxv16f64(<vscale x 16 x double> %v
; CHECK-NEXT: vsetvli a3, zero, e8, mf4, ta, ma
; CHECK-NEXT: vslidedown.vx v6, v0, a2
; CHECK-NEXT: sub a2, a0, a1
-; CHECK-NEXT: lui a3, %hi(.LCPI32_0)
-; CHECK-NEXT: fld fa5, %lo(.LCPI32_0)(a3)
+; CHECK-NEXT: lui a3, %hi(.LCPI44_0)
+; CHECK-NEXT: fld fa5, %lo(.LCPI44_0)(a3)
; CHECK-NEXT: sltu a3, a0, a2
; CHECK-NEXT: addi a3, a3, -1
; CHECK-NEXT: and a2, a3, a2
@@ -1085,10 +1493,10 @@ define <vscale x 16 x double> @vp_roundtozero_nxv16f64(<vscale x 16 x double> %v
; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t
-; CHECK-NEXT: bltu a0, a1, .LBB32_2
+; CHECK-NEXT: bltu a0, a1, .LBB44_2
; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: mv a0, a1
-; CHECK-NEXT: .LBB32_2:
+; CHECK-NEXT: .LBB44_2:
; CHECK-NEXT: vmv1r.v v0, v7
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; CHECK-NEXT: vfabs.v v24, v8, v0.t
@@ -1116,8 +1524,8 @@ define <vscale x 16 x double> @vp_roundtozero_nxv16f64_unmasked(<vscale x 16 x d
; CHECK: # %bb.0:
; CHECK-NEXT: csrr a1, vlenb
; CHECK-NEXT: sub a2, a0, a1
-; CHECK-NEXT: lui a3, %hi(.LCPI33_0)
-; CHECK-NEXT: fld fa5, %lo(.LCPI33_0)(a3)
+; CHECK-NEXT: lui a3, %hi(.LCPI45_0)
+; CHECK-NEXT: fld fa5, %lo(.LCPI45_0)(a3)
; CHECK-NEXT: sltu a3, a0, a2
; CHECK-NEXT: addi a3, a3, -1
; CHECK-NEXT: and a2, a3, a2
@@ -1130,10 +1538,10 @@ define <vscale x 16 x double> @vp_roundtozero_nxv16f64_unmasked(<vscale x 16 x d
; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t
-; CHECK-NEXT: bltu a0, a1, .LBB33_2
+; CHECK-NEXT: bltu a0, a1, .LBB45_2
; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: mv a0, a1
-; CHECK-NEXT: .LBB33_2:
+; CHECK-NEXT: .LBB45_2:
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; CHECK-NEXT: vfabs.v v24, v8
; CHECK-NEXT: vmflt.vf v0, v24, fa5
diff --git a/llvm/test/CodeGen/RISCV/rvv/setcc-fp-vp.ll b/llvm/test/CodeGen/RISCV/rvv/setcc-fp-vp.ll
index e9b6126323de..5ba4efa8458c 100644
--- a/llvm/test/CodeGen/RISCV/rvv/setcc-fp-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/setcc-fp-vp.ll
@@ -1,12 +1,1664 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+zfh,+zvfh,+v -target-abi=ilp32d \
-; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH
-; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+zfh,+zvfh,+v -target-abi=lp64d \
-; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH
-; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+zfh,+zvfhmin,+v -target-abi=ilp32d \
-; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN
-; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+zfh,+zvfhmin,+v -target-abi=lp64d \
-; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+zfbfmin,+zvfbfmin,+v \
+; RUN: -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s \
+; RUN: --check-prefixes=CHECK,ZVFH
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+zfbfmin,+zvfbfmin,+v \
+; RUN: -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \
+; RUN: --check-prefixes=CHECK,ZVFH
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin,+v \
+; RUN: -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s \
+; RUN: --check-prefixes=CHECK,ZVFHMIN
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin,+v \
+; RUN: -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \
+; RUN: --check-prefixes=CHECK,ZVFHMIN
+
+declare <vscale x 1 x i1> @llvm.vp.fcmp.nxv1bf16(<vscale x 1 x bfloat>, <vscale x 1 x bfloat>, metadata, <vscale x 1 x i1>, i32)
+
+define <vscale x 1 x i1> @fcmp_oeq_vv_nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %vb, <vscale x 1 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: fcmp_oeq_vv_nxv1bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v9
+; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8
+; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-NEXT: vmfeq.vv v0, v9, v10, v0.t
+; CHECK-NEXT: ret
+ %v = call <vscale x 1 x i1> @llvm.vp.fcmp.nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %vb, metadata !"oeq", <vscale x 1 x i1> %m, i32 %evl)
+ ret <vscale x 1 x i1> %v
+}
+
+define <vscale x 1 x i1> @fcmp_oeq_vf_nxv1bf16(<vscale x 1 x bfloat> %va, bfloat %b, <vscale x 1 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: fcmp_oeq_vf_nxv1bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: fmv.x.h a1, fa0
+; CHECK-NEXT: vsetvli a2, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vmv.v.x v9, a1
+; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8
+; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v9
+; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-NEXT: vmfeq.vv v0, v10, v8, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 1 x bfloat> poison, bfloat %b, i32 0
+ %vb = shufflevector <vscale x 1 x bfloat> %elt.head, <vscale x 1 x bfloat> poison, <vscale x 1 x i32> zeroinitializer
+ %v = call <vscale x 1 x i1> @llvm.vp.fcmp.nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %vb, metadata !"oeq", <vscale x 1 x i1> %m, i32 %evl)
+ ret <vscale x 1 x i1> %v
+}
+
+define <vscale x 1 x i1> @fcmp_oeq_vf_swap_nxv1bf16(<vscale x 1 x bfloat> %va, bfloat %b, <vscale x 1 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: fcmp_oeq_vf_swap_nxv1bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: fmv.x.h a1, fa0
+; CHECK-NEXT: vsetvli a2, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vmv.v.x v9, a1
+; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8
+; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v9
+; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-NEXT: vmfeq.vv v0, v8, v10, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 1 x bfloat> poison, bfloat %b, i32 0
+ %vb = shufflevector <vscale x 1 x bfloat> %elt.head, <vscale x 1 x bfloat> poison, <vscale x 1 x i32> zeroinitializer
+ %v = call <vscale x 1 x i1> @llvm.vp.fcmp.nxv1bf16(<vscale x 1 x bfloat> %vb, <vscale x 1 x bfloat> %va, metadata !"oeq", <vscale x 1 x i1> %m, i32 %evl)
+ ret <vscale x 1 x i1> %v
+}
+
+define <vscale x 1 x i1> @fcmp_ogt_vv_nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %vb, <vscale x 1 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: fcmp_ogt_vv_nxv1bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8
+; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v9
+; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-NEXT: vmflt.vv v0, v8, v10, v0.t
+; CHECK-NEXT: ret
+ %v = call <vscale x 1 x i1> @llvm.vp.fcmp.nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %vb, metadata !"ogt", <vscale x 1 x i1> %m, i32 %evl)
+ ret <vscale x 1 x i1> %v
+}
+
+define <vscale x 1 x i1> @fcmp_ogt_vf_nxv1bf16(<vscale x 1 x bfloat> %va, bfloat %b, <vscale x 1 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: fcmp_ogt_vf_nxv1bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: fmv.x.h a1, fa0
+; CHECK-NEXT: vsetvli a2, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vmv.v.x v9, a1
+; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8
+; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v9
+; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-NEXT: vmflt.vv v0, v8, v10, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 1 x bfloat> poison, bfloat %b, i32 0
+ %vb = shufflevector <vscale x 1 x bfloat> %elt.head, <vscale x 1 x bfloat> poison, <vscale x 1 x i32> zeroinitializer
+ %v = call <vscale x 1 x i1> @llvm.vp.fcmp.nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %vb, metadata !"ogt", <vscale x 1 x i1> %m, i32 %evl)
+ ret <vscale x 1 x i1> %v
+}
+
+define <vscale x 1 x i1> @fcmp_ogt_vf_swap_nxv1bf16(<vscale x 1 x bfloat> %va, bfloat %b, <vscale x 1 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: fcmp_ogt_vf_swap_nxv1bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: fmv.x.h a1, fa0
+; CHECK-NEXT: vsetvli a2, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vmv.v.x v9, a1
+; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8
+; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v9
+; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-NEXT: vmflt.vv v0, v10, v8, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 1 x bfloat> poison, bfloat %b, i32 0
+ %vb = shufflevector <vscale x 1 x bfloat> %elt.head, <vscale x 1 x bfloat> poison, <vscale x 1 x i32> zeroinitializer
+ %v = call <vscale x 1 x i1> @llvm.vp.fcmp.nxv1bf16(<vscale x 1 x bfloat> %vb, <vscale x 1 x bfloat> %va, metadata !"ogt", <vscale x 1 x i1> %m, i32 %evl)
+ ret <vscale x 1 x i1> %v
+}
+
+define <vscale x 1 x i1> @fcmp_oge_vv_nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %vb, <vscale x 1 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: fcmp_oge_vv_nxv1bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8
+; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v9
+; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-NEXT: vmfle.vv v0, v8, v10, v0.t
+; CHECK-NEXT: ret
+ %v = call <vscale x 1 x i1> @llvm.vp.fcmp.nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %vb, metadata !"oge", <vscale x 1 x i1> %m, i32 %evl)
+ ret <vscale x 1 x i1> %v
+}
+
+define <vscale x 1 x i1> @fcmp_oge_vf_nxv1bf16(<vscale x 1 x bfloat> %va, bfloat %b, <vscale x 1 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: fcmp_oge_vf_nxv1bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: fmv.x.h a1, fa0
+; CHECK-NEXT: vsetvli a2, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vmv.v.x v9, a1
+; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8
+; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v9
+; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-NEXT: vmfle.vv v0, v8, v10, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 1 x bfloat> poison, bfloat %b, i32 0
+ %vb = shufflevector <vscale x 1 x bfloat> %elt.head, <vscale x 1 x bfloat> poison, <vscale x 1 x i32> zeroinitializer
+ %v = call <vscale x 1 x i1> @llvm.vp.fcmp.nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %vb, metadata !"oge", <vscale x 1 x i1> %m, i32 %evl)
+ ret <vscale x 1 x i1> %v
+}
+
+define <vscale x 1 x i1> @fcmp_oge_vf_swap_nxv1bf16(<vscale x 1 x bfloat> %va, bfloat %b, <vscale x 1 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: fcmp_oge_vf_swap_nxv1bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: fmv.x.h a1, fa0
+; CHECK-NEXT: vsetvli a2, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vmv.v.x v9, a1
+; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8
+; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v9
+; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-NEXT: vmfle.vv v0, v10, v8, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 1 x bfloat> poison, bfloat %b, i32 0
+ %vb = shufflevector <vscale x 1 x bfloat> %elt.head, <vscale x 1 x bfloat> poison, <vscale x 1 x i32> zeroinitializer
+ %v = call <vscale x 1 x i1> @llvm.vp.fcmp.nxv1bf16(<vscale x 1 x bfloat> %vb, <vscale x 1 x bfloat> %va, metadata !"oge", <vscale x 1 x i1> %m, i32 %evl)
+ ret <vscale x 1 x i1> %v
+}
+
+define <vscale x 1 x i1> @fcmp_olt_vv_nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %vb, <vscale x 1 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: fcmp_olt_vv_nxv1bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v9
+; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8
+; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-NEXT: vmflt.vv v0, v9, v10, v0.t
+; CHECK-NEXT: ret
+ %v = call <vscale x 1 x i1> @llvm.vp.fcmp.nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %vb, metadata !"olt", <vscale x 1 x i1> %m, i32 %evl)
+ ret <vscale x 1 x i1> %v
+}
+
+define <vscale x 1 x i1> @fcmp_olt_vf_nxv1bf16(<vscale x 1 x bfloat> %va, bfloat %b, <vscale x 1 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: fcmp_olt_vf_nxv1bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: fmv.x.h a1, fa0
+; CHECK-NEXT: vsetvli a2, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vmv.v.x v9, a1
+; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8
+; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v9
+; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-NEXT: vmflt.vv v0, v10, v8, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 1 x bfloat> poison, bfloat %b, i32 0
+ %vb = shufflevector <vscale x 1 x bfloat> %elt.head, <vscale x 1 x bfloat> poison, <vscale x 1 x i32> zeroinitializer
+ %v = call <vscale x 1 x i1> @llvm.vp.fcmp.nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %vb, metadata !"olt", <vscale x 1 x i1> %m, i32 %evl)
+ ret <vscale x 1 x i1> %v
+}
+
+define <vscale x 1 x i1> @fcmp_olt_vf_swap_nxv1bf16(<vscale x 1 x bfloat> %va, bfloat %b, <vscale x 1 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: fcmp_olt_vf_swap_nxv1bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: fmv.x.h a1, fa0
+; CHECK-NEXT: vsetvli a2, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vmv.v.x v9, a1
+; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8
+; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v9
+; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-NEXT: vmflt.vv v0, v8, v10, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 1 x bfloat> poison, bfloat %b, i32 0
+ %vb = shufflevector <vscale x 1 x bfloat> %elt.head, <vscale x 1 x bfloat> poison, <vscale x 1 x i32> zeroinitializer
+ %v = call <vscale x 1 x i1> @llvm.vp.fcmp.nxv1bf16(<vscale x 1 x bfloat> %vb, <vscale x 1 x bfloat> %va, metadata !"olt", <vscale x 1 x i1> %m, i32 %evl)
+ ret <vscale x 1 x i1> %v
+}
+
+define <vscale x 1 x i1> @fcmp_ole_vv_nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %vb, <vscale x 1 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: fcmp_ole_vv_nxv1bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v9
+; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8
+; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-NEXT: vmfle.vv v0, v9, v10, v0.t
+; CHECK-NEXT: ret
+ %v = call <vscale x 1 x i1> @llvm.vp.fcmp.nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %vb, metadata !"ole", <vscale x 1 x i1> %m, i32 %evl)
+ ret <vscale x 1 x i1> %v
+}
+
+define <vscale x 1 x i1> @fcmp_ole_vf_nxv1bf16(<vscale x 1 x bfloat> %va, bfloat %b, <vscale x 1 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: fcmp_ole_vf_nxv1bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: fmv.x.h a1, fa0
+; CHECK-NEXT: vsetvli a2, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vmv.v.x v9, a1
+; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8
+; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v9
+; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-NEXT: vmfle.vv v0, v10, v8, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 1 x bfloat> poison, bfloat %b, i32 0
+ %vb = shufflevector <vscale x 1 x bfloat> %elt.head, <vscale x 1 x bfloat> poison, <vscale x 1 x i32> zeroinitializer
+ %v = call <vscale x 1 x i1> @llvm.vp.fcmp.nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %vb, metadata !"ole", <vscale x 1 x i1> %m, i32 %evl)
+ ret <vscale x 1 x i1> %v
+}
+
+define <vscale x 1 x i1> @fcmp_ole_vf_swap_nxv1bf16(<vscale x 1 x bfloat> %va, bfloat %b, <vscale x 1 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: fcmp_ole_vf_swap_nxv1bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: fmv.x.h a1, fa0
+; CHECK-NEXT: vsetvli a2, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vmv.v.x v9, a1
+; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8
+; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v9
+; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-NEXT: vmfle.vv v0, v8, v10, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 1 x bfloat> poison, bfloat %b, i32 0
+ %vb = shufflevector <vscale x 1 x bfloat> %elt.head, <vscale x 1 x bfloat> poison, <vscale x 1 x i32> zeroinitializer
+ %v = call <vscale x 1 x i1> @llvm.vp.fcmp.nxv1bf16(<vscale x 1 x bfloat> %vb, <vscale x 1 x bfloat> %va, metadata !"ole", <vscale x 1 x i1> %m, i32 %evl)
+ ret <vscale x 1 x i1> %v
+}
+
+define <vscale x 1 x i1> @fcmp_one_vv_nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %vb, <vscale x 1 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: fcmp_one_vv_nxv1bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v9
+; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8
+; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-NEXT: vmflt.vv v8, v9, v10, v0.t
+; CHECK-NEXT: vmflt.vv v9, v10, v9, v0.t
+; CHECK-NEXT: vmor.mm v0, v9, v8
+; CHECK-NEXT: ret
+ %v = call <vscale x 1 x i1> @llvm.vp.fcmp.nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %vb, metadata !"one", <vscale x 1 x i1> %m, i32 %evl)
+ ret <vscale x 1 x i1> %v
+}
+
+define <vscale x 1 x i1> @fcmp_one_vf_nxv1bf16(<vscale x 1 x bfloat> %va, bfloat %b, <vscale x 1 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: fcmp_one_vf_nxv1bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: fmv.x.h a1, fa0
+; CHECK-NEXT: vsetvli a2, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vmv.v.x v9, a1
+; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8
+; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v9
+; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-NEXT: vmflt.vv v9, v10, v8, v0.t
+; CHECK-NEXT: vmflt.vv v8, v8, v10, v0.t
+; CHECK-NEXT: vmor.mm v0, v8, v9
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 1 x bfloat> poison, bfloat %b, i32 0
+ %vb = shufflevector <vscale x 1 x bfloat> %elt.head, <vscale x 1 x bfloat> poison, <vscale x 1 x i32> zeroinitializer
+ %v = call <vscale x 1 x i1> @llvm.vp.fcmp.nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %vb, metadata !"one", <vscale x 1 x i1> %m, i32 %evl)
+ ret <vscale x 1 x i1> %v
+}
+
+define <vscale x 1 x i1> @fcmp_one_vf_swap_nxv1bf16(<vscale x 1 x bfloat> %va, bfloat %b, <vscale x 1 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: fcmp_one_vf_swap_nxv1bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: fmv.x.h a1, fa0
+; CHECK-NEXT: vsetvli a2, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vmv.v.x v9, a1
+; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8
+; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v9
+; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-NEXT: vmflt.vv v9, v8, v10, v0.t
+; CHECK-NEXT: vmflt.vv v8, v10, v8, v0.t
+; CHECK-NEXT: vmor.mm v0, v8, v9
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 1 x bfloat> poison, bfloat %b, i32 0
+ %vb = shufflevector <vscale x 1 x bfloat> %elt.head, <vscale x 1 x bfloat> poison, <vscale x 1 x i32> zeroinitializer
+ %v = call <vscale x 1 x i1> @llvm.vp.fcmp.nxv1bf16(<vscale x 1 x bfloat> %vb, <vscale x 1 x bfloat> %va, metadata !"one", <vscale x 1 x i1> %m, i32 %evl)
+ ret <vscale x 1 x i1> %v
+}
+
+define <vscale x 1 x i1> @fcmp_ord_vv_nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %vb, <vscale x 1 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: fcmp_ord_vv_nxv1bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v9
+; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-NEXT: vmfeq.vv v9, v10, v10, v0.t
+; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8
+; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-NEXT: vmfeq.vv v8, v10, v10, v0.t
+; CHECK-NEXT: vmand.mm v0, v8, v9
+; CHECK-NEXT: ret
+ %v = call <vscale x 1 x i1> @llvm.vp.fcmp.nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %vb, metadata !"ord", <vscale x 1 x i1> %m, i32 %evl)
+ ret <vscale x 1 x i1> %v
+}
+
+define <vscale x 1 x i1> @fcmp_ord_vf_nxv1bf16(<vscale x 1 x bfloat> %va, bfloat %b, <vscale x 1 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: fcmp_ord_vf_nxv1bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: fmv.x.h a1, fa0
+; CHECK-NEXT: vsetvli a2, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vmv.v.x v9, a1
+; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8
+; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-NEXT: vmfeq.vv v8, v10, v10, v0.t
+; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v9
+; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-NEXT: vmfeq.vv v9, v10, v10, v0.t
+; CHECK-NEXT: vmand.mm v0, v8, v9
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 1 x bfloat> poison, bfloat %b, i32 0
+ %vb = shufflevector <vscale x 1 x bfloat> %elt.head, <vscale x 1 x bfloat> poison, <vscale x 1 x i32> zeroinitializer
+ %v = call <vscale x 1 x i1> @llvm.vp.fcmp.nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %vb, metadata !"ord", <vscale x 1 x i1> %m, i32 %evl)
+ ret <vscale x 1 x i1> %v
+}
+
+define <vscale x 1 x i1> @fcmp_ord_vf_swap_nxv1bf16(<vscale x 1 x bfloat> %va, bfloat %b, <vscale x 1 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: fcmp_ord_vf_swap_nxv1bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: fmv.x.h a1, fa0
+; CHECK-NEXT: vsetvli a2, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vmv.v.x v9, a1
+; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8
+; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-NEXT: vmfeq.vv v8, v10, v10, v0.t
+; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v9
+; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-NEXT: vmfeq.vv v9, v10, v10, v0.t
+; CHECK-NEXT: vmand.mm v0, v9, v8
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 1 x bfloat> poison, bfloat %b, i32 0
+ %vb = shufflevector <vscale x 1 x bfloat> %elt.head, <vscale x 1 x bfloat> poison, <vscale x 1 x i32> zeroinitializer
+ %v = call <vscale x 1 x i1> @llvm.vp.fcmp.nxv1bf16(<vscale x 1 x bfloat> %vb, <vscale x 1 x bfloat> %va, metadata !"ord", <vscale x 1 x i1> %m, i32 %evl)
+ ret <vscale x 1 x i1> %v
+}
+
+define <vscale x 1 x i1> @fcmp_ueq_vv_nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %vb, <vscale x 1 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: fcmp_ueq_vv_nxv1bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v9
+; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8
+; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-NEXT: vmflt.vv v8, v9, v10, v0.t
+; CHECK-NEXT: vmflt.vv v9, v10, v9, v0.t
+; CHECK-NEXT: vmnor.mm v0, v9, v8
+; CHECK-NEXT: ret
+ %v = call <vscale x 1 x i1> @llvm.vp.fcmp.nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %vb, metadata !"ueq", <vscale x 1 x i1> %m, i32 %evl)
+ ret <vscale x 1 x i1> %v
+}
+
+define <vscale x 1 x i1> @fcmp_ueq_vf_nxv1bf16(<vscale x 1 x bfloat> %va, bfloat %b, <vscale x 1 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: fcmp_ueq_vf_nxv1bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: fmv.x.h a1, fa0
+; CHECK-NEXT: vsetvli a2, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vmv.v.x v9, a1
+; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8
+; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v9
+; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-NEXT: vmflt.vv v9, v10, v8, v0.t
+; CHECK-NEXT: vmflt.vv v8, v8, v10, v0.t
+; CHECK-NEXT: vmnor.mm v0, v8, v9
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 1 x bfloat> poison, bfloat %b, i32 0
+ %vb = shufflevector <vscale x 1 x bfloat> %elt.head, <vscale x 1 x bfloat> poison, <vscale x 1 x i32> zeroinitializer
+ %v = call <vscale x 1 x i1> @llvm.vp.fcmp.nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %vb, metadata !"ueq", <vscale x 1 x i1> %m, i32 %evl)
+ ret <vscale x 1 x i1> %v
+}
+
+define <vscale x 1 x i1> @fcmp_ueq_vf_swap_nxv1bf16(<vscale x 1 x bfloat> %va, bfloat %b, <vscale x 1 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: fcmp_ueq_vf_swap_nxv1bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: fmv.x.h a1, fa0
+; CHECK-NEXT: vsetvli a2, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vmv.v.x v9, a1
+; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8
+; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v9
+; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-NEXT: vmflt.vv v9, v8, v10, v0.t
+; CHECK-NEXT: vmflt.vv v8, v10, v8, v0.t
+; CHECK-NEXT: vmnor.mm v0, v8, v9
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 1 x bfloat> poison, bfloat %b, i32 0
+ %vb = shufflevector <vscale x 1 x bfloat> %elt.head, <vscale x 1 x bfloat> poison, <vscale x 1 x i32> zeroinitializer
+ %v = call <vscale x 1 x i1> @llvm.vp.fcmp.nxv1bf16(<vscale x 1 x bfloat> %vb, <vscale x 1 x bfloat> %va, metadata !"ueq", <vscale x 1 x i1> %m, i32 %evl)
+ ret <vscale x 1 x i1> %v
+}
+
+define <vscale x 1 x i1> @fcmp_ugt_vv_nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %vb, <vscale x 1 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: fcmp_ugt_vv_nxv1bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v9
+; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8
+; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-NEXT: vmfle.vv v8, v9, v10, v0.t
+; CHECK-NEXT: vmnot.m v0, v8
+; CHECK-NEXT: ret
+ %v = call <vscale x 1 x i1> @llvm.vp.fcmp.nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %vb, metadata !"ugt", <vscale x 1 x i1> %m, i32 %evl)
+ ret <vscale x 1 x i1> %v
+}
+
+define <vscale x 1 x i1> @fcmp_ugt_vf_nxv1bf16(<vscale x 1 x bfloat> %va, bfloat %b, <vscale x 1 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: fcmp_ugt_vf_nxv1bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: fmv.x.h a1, fa0
+; CHECK-NEXT: vsetvli a2, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vmv.v.x v9, a1
+; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8
+; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v9
+; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-NEXT: vmfle.vv v8, v10, v8, v0.t
+; CHECK-NEXT: vmnot.m v0, v8
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 1 x bfloat> poison, bfloat %b, i32 0
+ %vb = shufflevector <vscale x 1 x bfloat> %elt.head, <vscale x 1 x bfloat> poison, <vscale x 1 x i32> zeroinitializer
+ %v = call <vscale x 1 x i1> @llvm.vp.fcmp.nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %vb, metadata !"ugt", <vscale x 1 x i1> %m, i32 %evl)
+ ret <vscale x 1 x i1> %v
+}
+
+define <vscale x 1 x i1> @fcmp_ugt_vf_swap_nxv1bf16(<vscale x 1 x bfloat> %va, bfloat %b, <vscale x 1 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: fcmp_ugt_vf_swap_nxv1bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: fmv.x.h a1, fa0
+; CHECK-NEXT: vsetvli a2, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vmv.v.x v9, a1
+; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8
+; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v9
+; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-NEXT: vmfle.vv v8, v8, v10, v0.t
+; CHECK-NEXT: vmnot.m v0, v8
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 1 x bfloat> poison, bfloat %b, i32 0
+ %vb = shufflevector <vscale x 1 x bfloat> %elt.head, <vscale x 1 x bfloat> poison, <vscale x 1 x i32> zeroinitializer
+ %v = call <vscale x 1 x i1> @llvm.vp.fcmp.nxv1bf16(<vscale x 1 x bfloat> %vb, <vscale x 1 x bfloat> %va, metadata !"ugt", <vscale x 1 x i1> %m, i32 %evl)
+ ret <vscale x 1 x i1> %v
+}
+
+define <vscale x 1 x i1> @fcmp_uge_vv_nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %vb, <vscale x 1 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: fcmp_uge_vv_nxv1bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v9
+; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8
+; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-NEXT: vmflt.vv v8, v9, v10, v0.t
+; CHECK-NEXT: vmnot.m v0, v8
+; CHECK-NEXT: ret
+ %v = call <vscale x 1 x i1> @llvm.vp.fcmp.nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %vb, metadata !"uge", <vscale x 1 x i1> %m, i32 %evl)
+ ret <vscale x 1 x i1> %v
+}
+
+define <vscale x 1 x i1> @fcmp_uge_vf_nxv1bf16(<vscale x 1 x bfloat> %va, bfloat %b, <vscale x 1 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: fcmp_uge_vf_nxv1bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: fmv.x.h a1, fa0
+; CHECK-NEXT: vsetvli a2, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vmv.v.x v9, a1
+; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8
+; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v9
+; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-NEXT: vmflt.vv v8, v10, v8, v0.t
+; CHECK-NEXT: vmnot.m v0, v8
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 1 x bfloat> poison, bfloat %b, i32 0
+ %vb = shufflevector <vscale x 1 x bfloat> %elt.head, <vscale x 1 x bfloat> poison, <vscale x 1 x i32> zeroinitializer
+ %v = call <vscale x 1 x i1> @llvm.vp.fcmp.nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %vb, metadata !"uge", <vscale x 1 x i1> %m, i32 %evl)
+ ret <vscale x 1 x i1> %v
+}
+
+define <vscale x 1 x i1> @fcmp_uge_vf_swap_nxv1bf16(<vscale x 1 x bfloat> %va, bfloat %b, <vscale x 1 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: fcmp_uge_vf_swap_nxv1bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: fmv.x.h a1, fa0
+; CHECK-NEXT: vsetvli a2, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vmv.v.x v9, a1
+; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8
+; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v9
+; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-NEXT: vmflt.vv v8, v8, v10, v0.t
+; CHECK-NEXT: vmnot.m v0, v8
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 1 x bfloat> poison, bfloat %b, i32 0
+ %vb = shufflevector <vscale x 1 x bfloat> %elt.head, <vscale x 1 x bfloat> poison, <vscale x 1 x i32> zeroinitializer
+ %v = call <vscale x 1 x i1> @llvm.vp.fcmp.nxv1bf16(<vscale x 1 x bfloat> %vb, <vscale x 1 x bfloat> %va, metadata !"uge", <vscale x 1 x i1> %m, i32 %evl)
+ ret <vscale x 1 x i1> %v
+}
+
+define <vscale x 1 x i1> @fcmp_ult_vv_nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %vb, <vscale x 1 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: fcmp_ult_vv_nxv1bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8
+; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v9
+; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-NEXT: vmfle.vv v8, v8, v10, v0.t
+; CHECK-NEXT: vmnot.m v0, v8
+; CHECK-NEXT: ret
+ %v = call <vscale x 1 x i1> @llvm.vp.fcmp.nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %vb, metadata !"ult", <vscale x 1 x i1> %m, i32 %evl)
+ ret <vscale x 1 x i1> %v
+}
+
+define <vscale x 1 x i1> @fcmp_ult_vf_nxv1bf16(<vscale x 1 x bfloat> %va, bfloat %b, <vscale x 1 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: fcmp_ult_vf_nxv1bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: fmv.x.h a1, fa0
+; CHECK-NEXT: vsetvli a2, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vmv.v.x v9, a1
+; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8
+; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v9
+; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-NEXT: vmfle.vv v8, v8, v10, v0.t
+; CHECK-NEXT: vmnot.m v0, v8
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 1 x bfloat> poison, bfloat %b, i32 0
+ %vb = shufflevector <vscale x 1 x bfloat> %elt.head, <vscale x 1 x bfloat> poison, <vscale x 1 x i32> zeroinitializer
+ %v = call <vscale x 1 x i1> @llvm.vp.fcmp.nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %vb, metadata !"ult", <vscale x 1 x i1> %m, i32 %evl)
+ ret <vscale x 1 x i1> %v
+}
+
+define <vscale x 1 x i1> @fcmp_ult_vf_swap_nxv1bf16(<vscale x 1 x bfloat> %va, bfloat %b, <vscale x 1 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: fcmp_ult_vf_swap_nxv1bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: fmv.x.h a1, fa0
+; CHECK-NEXT: vsetvli a2, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vmv.v.x v9, a1
+; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8
+; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v9
+; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-NEXT: vmfle.vv v8, v10, v8, v0.t
+; CHECK-NEXT: vmnot.m v0, v8
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 1 x bfloat> poison, bfloat %b, i32 0
+ %vb = shufflevector <vscale x 1 x bfloat> %elt.head, <vscale x 1 x bfloat> poison, <vscale x 1 x i32> zeroinitializer
+ %v = call <vscale x 1 x i1> @llvm.vp.fcmp.nxv1bf16(<vscale x 1 x bfloat> %vb, <vscale x 1 x bfloat> %va, metadata !"ult", <vscale x 1 x i1> %m, i32 %evl)
+ ret <vscale x 1 x i1> %v
+}
+
+define <vscale x 1 x i1> @fcmp_ule_vv_nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %vb, <vscale x 1 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: fcmp_ule_vv_nxv1bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8
+; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v9
+; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-NEXT: vmflt.vv v8, v8, v10, v0.t
+; CHECK-NEXT: vmnot.m v0, v8
+; CHECK-NEXT: ret
+ %v = call <vscale x 1 x i1> @llvm.vp.fcmp.nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %vb, metadata !"ule", <vscale x 1 x i1> %m, i32 %evl)
+ ret <vscale x 1 x i1> %v
+}
+
+define <vscale x 1 x i1> @fcmp_ule_vf_nxv1bf16(<vscale x 1 x bfloat> %va, bfloat %b, <vscale x 1 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: fcmp_ule_vf_nxv1bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: fmv.x.h a1, fa0
+; CHECK-NEXT: vsetvli a2, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vmv.v.x v9, a1
+; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8
+; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v9
+; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-NEXT: vmflt.vv v8, v8, v10, v0.t
+; CHECK-NEXT: vmnot.m v0, v8
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 1 x bfloat> poison, bfloat %b, i32 0
+ %vb = shufflevector <vscale x 1 x bfloat> %elt.head, <vscale x 1 x bfloat> poison, <vscale x 1 x i32> zeroinitializer
+ %v = call <vscale x 1 x i1> @llvm.vp.fcmp.nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %vb, metadata !"ule", <vscale x 1 x i1> %m, i32 %evl)
+ ret <vscale x 1 x i1> %v
+}
+
+define <vscale x 1 x i1> @fcmp_ule_vf_swap_nxv1bf16(<vscale x 1 x bfloat> %va, bfloat %b, <vscale x 1 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: fcmp_ule_vf_swap_nxv1bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: fmv.x.h a1, fa0
+; CHECK-NEXT: vsetvli a2, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vmv.v.x v9, a1
+; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8
+; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v9
+; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-NEXT: vmflt.vv v8, v10, v8, v0.t
+; CHECK-NEXT: vmnot.m v0, v8
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 1 x bfloat> poison, bfloat %b, i32 0
+ %vb = shufflevector <vscale x 1 x bfloat> %elt.head, <vscale x 1 x bfloat> poison, <vscale x 1 x i32> zeroinitializer
+ %v = call <vscale x 1 x i1> @llvm.vp.fcmp.nxv1bf16(<vscale x 1 x bfloat> %vb, <vscale x 1 x bfloat> %va, metadata !"ule", <vscale x 1 x i1> %m, i32 %evl)
+ ret <vscale x 1 x i1> %v
+}
+
+define <vscale x 1 x i1> @fcmp_une_vv_nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %vb, <vscale x 1 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: fcmp_une_vv_nxv1bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v9
+; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8
+; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-NEXT: vmfne.vv v0, v9, v10, v0.t
+; CHECK-NEXT: ret
+ %v = call <vscale x 1 x i1> @llvm.vp.fcmp.nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %vb, metadata !"une", <vscale x 1 x i1> %m, i32 %evl)
+ ret <vscale x 1 x i1> %v
+}
+
+define <vscale x 1 x i1> @fcmp_une_vf_nxv1bf16(<vscale x 1 x bfloat> %va, bfloat %b, <vscale x 1 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: fcmp_une_vf_nxv1bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: fmv.x.h a1, fa0
+; CHECK-NEXT: vsetvli a2, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vmv.v.x v9, a1
+; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8
+; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v9
+; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-NEXT: vmfne.vv v0, v10, v8, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 1 x bfloat> poison, bfloat %b, i32 0
+ %vb = shufflevector <vscale x 1 x bfloat> %elt.head, <vscale x 1 x bfloat> poison, <vscale x 1 x i32> zeroinitializer
+ %v = call <vscale x 1 x i1> @llvm.vp.fcmp.nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %vb, metadata !"une", <vscale x 1 x i1> %m, i32 %evl)
+ ret <vscale x 1 x i1> %v
+}
+
+define <vscale x 1 x i1> @fcmp_une_vf_swap_nxv1bf16(<vscale x 1 x bfloat> %va, bfloat %b, <vscale x 1 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: fcmp_une_vf_swap_nxv1bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: fmv.x.h a1, fa0
+; CHECK-NEXT: vsetvli a2, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vmv.v.x v9, a1
+; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8
+; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v9
+; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-NEXT: vmfne.vv v0, v8, v10, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 1 x bfloat> poison, bfloat %b, i32 0
+ %vb = shufflevector <vscale x 1 x bfloat> %elt.head, <vscale x 1 x bfloat> poison, <vscale x 1 x i32> zeroinitializer
+ %v = call <vscale x 1 x i1> @llvm.vp.fcmp.nxv1bf16(<vscale x 1 x bfloat> %vb, <vscale x 1 x bfloat> %va, metadata !"une", <vscale x 1 x i1> %m, i32 %evl)
+ ret <vscale x 1 x i1> %v
+}
+
+define <vscale x 1 x i1> @fcmp_uno_vv_nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %vb, <vscale x 1 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: fcmp_uno_vv_nxv1bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v9
+; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-NEXT: vmfne.vv v9, v10, v10, v0.t
+; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8
+; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-NEXT: vmfne.vv v8, v10, v10, v0.t
+; CHECK-NEXT: vmor.mm v0, v8, v9
+; CHECK-NEXT: ret
+ %v = call <vscale x 1 x i1> @llvm.vp.fcmp.nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %vb, metadata !"uno", <vscale x 1 x i1> %m, i32 %evl)
+ ret <vscale x 1 x i1> %v
+}
+
+define <vscale x 1 x i1> @fcmp_uno_vf_nxv1bf16(<vscale x 1 x bfloat> %va, bfloat %b, <vscale x 1 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: fcmp_uno_vf_nxv1bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: fmv.x.h a1, fa0
+; CHECK-NEXT: vsetvli a2, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vmv.v.x v9, a1
+; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8
+; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-NEXT: vmfne.vv v8, v10, v10, v0.t
+; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v9
+; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-NEXT: vmfne.vv v9, v10, v10, v0.t
+; CHECK-NEXT: vmor.mm v0, v8, v9
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 1 x bfloat> poison, bfloat %b, i32 0
+ %vb = shufflevector <vscale x 1 x bfloat> %elt.head, <vscale x 1 x bfloat> poison, <vscale x 1 x i32> zeroinitializer
+ %v = call <vscale x 1 x i1> @llvm.vp.fcmp.nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %vb, metadata !"uno", <vscale x 1 x i1> %m, i32 %evl)
+ ret <vscale x 1 x i1> %v
+}
+
+define <vscale x 1 x i1> @fcmp_uno_vf_swap_nxv1bf16(<vscale x 1 x bfloat> %va, bfloat %b, <vscale x 1 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: fcmp_uno_vf_swap_nxv1bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: fmv.x.h a1, fa0
+; CHECK-NEXT: vsetvli a2, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vmv.v.x v9, a1
+; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8
+; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-NEXT: vmfne.vv v8, v10, v10, v0.t
+; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v9
+; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-NEXT: vmfne.vv v9, v10, v10, v0.t
+; CHECK-NEXT: vmor.mm v0, v9, v8
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 1 x bfloat> poison, bfloat %b, i32 0
+ %vb = shufflevector <vscale x 1 x bfloat> %elt.head, <vscale x 1 x bfloat> poison, <vscale x 1 x i32> zeroinitializer
+ %v = call <vscale x 1 x i1> @llvm.vp.fcmp.nxv1bf16(<vscale x 1 x bfloat> %vb, <vscale x 1 x bfloat> %va, metadata !"uno", <vscale x 1 x i1> %m, i32 %evl)
+ ret <vscale x 1 x i1> %v
+}
+
+declare <vscale x 3 x i1> @llvm.vp.fcmp.nxv3bf16(<vscale x 3 x bfloat>, <vscale x 3 x bfloat>, metadata, <vscale x 3 x i1>, i32)
+
+define <vscale x 3 x i1> @fcmp_oeq_vv_nxv3bf16(<vscale x 3 x bfloat> %va, <vscale x 3 x bfloat> %vb, <vscale x 3 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: fcmp_oeq_vv_nxv3bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v9
+; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8
+; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; CHECK-NEXT: vmfeq.vv v8, v12, v10, v0.t
+; CHECK-NEXT: vmv1r.v v0, v8
+; CHECK-NEXT: ret
+ %v = call <vscale x 3 x i1> @llvm.vp.fcmp.nxv3bf16(<vscale x 3 x bfloat> %va, <vscale x 3 x bfloat> %vb, metadata !"oeq", <vscale x 3 x i1> %m, i32 %evl)
+ ret <vscale x 3 x i1> %v
+}
+
+declare <vscale x 8 x i1> @llvm.vp.fcmp.nxv8bf16(<vscale x 8 x bfloat>, <vscale x 8 x bfloat>, metadata, <vscale x 8 x i1>, i32)
+
+define <vscale x 8 x i1> @fcmp_oeq_vv_nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %vb, <vscale x 8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: fcmp_oeq_vv_nxv8bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v10
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8
+; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-NEXT: vmfeq.vv v8, v16, v12, v0.t
+; CHECK-NEXT: vmv1r.v v0, v8
+; CHECK-NEXT: ret
+ %v = call <vscale x 8 x i1> @llvm.vp.fcmp.nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %vb, metadata !"oeq", <vscale x 8 x i1> %m, i32 %evl)
+ ret <vscale x 8 x i1> %v
+}
+
+define <vscale x 8 x i1> @fcmp_oeq_vf_nxv8bf16(<vscale x 8 x bfloat> %va, bfloat %b, <vscale x 8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: fcmp_oeq_vf_nxv8bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: fmv.x.h a1, fa0
+; CHECK-NEXT: vsetvli a2, zero, e16, m2, ta, ma
+; CHECK-NEXT: vmv.v.x v10, a1
+; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v10
+; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-NEXT: vmfeq.vv v8, v12, v16, v0.t
+; CHECK-NEXT: vmv1r.v v0, v8
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0
+ %vb = shufflevector <vscale x 8 x bfloat> %elt.head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer
+ %v = call <vscale x 8 x i1> @llvm.vp.fcmp.nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %vb, metadata !"oeq", <vscale x 8 x i1> %m, i32 %evl)
+ ret <vscale x 8 x i1> %v
+}
+
+define <vscale x 8 x i1> @fcmp_oeq_vf_swap_nxv8bf16(<vscale x 8 x bfloat> %va, bfloat %b, <vscale x 8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: fcmp_oeq_vf_swap_nxv8bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: fmv.x.h a1, fa0
+; CHECK-NEXT: vsetvli a2, zero, e16, m2, ta, ma
+; CHECK-NEXT: vmv.v.x v10, a1
+; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v10
+; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-NEXT: vmfeq.vv v8, v16, v12, v0.t
+; CHECK-NEXT: vmv1r.v v0, v8
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0
+ %vb = shufflevector <vscale x 8 x bfloat> %elt.head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer
+ %v = call <vscale x 8 x i1> @llvm.vp.fcmp.nxv8bf16(<vscale x 8 x bfloat> %vb, <vscale x 8 x bfloat> %va, metadata !"oeq", <vscale x 8 x i1> %m, i32 %evl)
+ ret <vscale x 8 x i1> %v
+}
+
+define <vscale x 8 x i1> @fcmp_ogt_vv_nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %vb, <vscale x 8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: fcmp_ogt_vv_nxv8bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v10
+; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-NEXT: vmflt.vv v8, v16, v12, v0.t
+; CHECK-NEXT: vmv1r.v v0, v8
+; CHECK-NEXT: ret
+ %v = call <vscale x 8 x i1> @llvm.vp.fcmp.nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %vb, metadata !"ogt", <vscale x 8 x i1> %m, i32 %evl)
+ ret <vscale x 8 x i1> %v
+}
+
+define <vscale x 8 x i1> @fcmp_ogt_vf_nxv8bf16(<vscale x 8 x bfloat> %va, bfloat %b, <vscale x 8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: fcmp_ogt_vf_nxv8bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: fmv.x.h a1, fa0
+; CHECK-NEXT: vsetvli a2, zero, e16, m2, ta, ma
+; CHECK-NEXT: vmv.v.x v10, a1
+; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v10
+; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-NEXT: vmflt.vv v8, v16, v12, v0.t
+; CHECK-NEXT: vmv1r.v v0, v8
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0
+ %vb = shufflevector <vscale x 8 x bfloat> %elt.head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer
+ %v = call <vscale x 8 x i1> @llvm.vp.fcmp.nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %vb, metadata !"ogt", <vscale x 8 x i1> %m, i32 %evl)
+ ret <vscale x 8 x i1> %v
+}
+
+define <vscale x 8 x i1> @fcmp_ogt_vf_swap_nxv8bf16(<vscale x 8 x bfloat> %va, bfloat %b, <vscale x 8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: fcmp_ogt_vf_swap_nxv8bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: fmv.x.h a1, fa0
+; CHECK-NEXT: vsetvli a2, zero, e16, m2, ta, ma
+; CHECK-NEXT: vmv.v.x v10, a1
+; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v10
+; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-NEXT: vmflt.vv v8, v12, v16, v0.t
+; CHECK-NEXT: vmv1r.v v0, v8
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0
+ %vb = shufflevector <vscale x 8 x bfloat> %elt.head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer
+ %v = call <vscale x 8 x i1> @llvm.vp.fcmp.nxv8bf16(<vscale x 8 x bfloat> %vb, <vscale x 8 x bfloat> %va, metadata !"ogt", <vscale x 8 x i1> %m, i32 %evl)
+ ret <vscale x 8 x i1> %v
+}
+
+define <vscale x 8 x i1> @fcmp_oge_vv_nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %vb, <vscale x 8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: fcmp_oge_vv_nxv8bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v10
+; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-NEXT: vmfle.vv v8, v16, v12, v0.t
+; CHECK-NEXT: vmv1r.v v0, v8
+; CHECK-NEXT: ret
+ %v = call <vscale x 8 x i1> @llvm.vp.fcmp.nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %vb, metadata !"oge", <vscale x 8 x i1> %m, i32 %evl)
+ ret <vscale x 8 x i1> %v
+}
+
+define <vscale x 8 x i1> @fcmp_oge_vf_nxv8bf16(<vscale x 8 x bfloat> %va, bfloat %b, <vscale x 8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: fcmp_oge_vf_nxv8bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: fmv.x.h a1, fa0
+; CHECK-NEXT: vsetvli a2, zero, e16, m2, ta, ma
+; CHECK-NEXT: vmv.v.x v10, a1
+; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v10
+; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-NEXT: vmfle.vv v8, v16, v12, v0.t
+; CHECK-NEXT: vmv1r.v v0, v8
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0
+ %vb = shufflevector <vscale x 8 x bfloat> %elt.head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer
+ %v = call <vscale x 8 x i1> @llvm.vp.fcmp.nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %vb, metadata !"oge", <vscale x 8 x i1> %m, i32 %evl)
+ ret <vscale x 8 x i1> %v
+}
+
+define <vscale x 8 x i1> @fcmp_oge_vf_swap_nxv8bf16(<vscale x 8 x bfloat> %va, bfloat %b, <vscale x 8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: fcmp_oge_vf_swap_nxv8bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: fmv.x.h a1, fa0
+; CHECK-NEXT: vsetvli a2, zero, e16, m2, ta, ma
+; CHECK-NEXT: vmv.v.x v10, a1
+; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v10
+; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-NEXT: vmfle.vv v8, v12, v16, v0.t
+; CHECK-NEXT: vmv1r.v v0, v8
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0
+ %vb = shufflevector <vscale x 8 x bfloat> %elt.head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer
+ %v = call <vscale x 8 x i1> @llvm.vp.fcmp.nxv8bf16(<vscale x 8 x bfloat> %vb, <vscale x 8 x bfloat> %va, metadata !"oge", <vscale x 8 x i1> %m, i32 %evl)
+ ret <vscale x 8 x i1> %v
+}
+
+define <vscale x 8 x i1> @fcmp_olt_vv_nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %vb, <vscale x 8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: fcmp_olt_vv_nxv8bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v10
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8
+; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-NEXT: vmflt.vv v8, v16, v12, v0.t
+; CHECK-NEXT: vmv1r.v v0, v8
+; CHECK-NEXT: ret
+ %v = call <vscale x 8 x i1> @llvm.vp.fcmp.nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %vb, metadata !"olt", <vscale x 8 x i1> %m, i32 %evl)
+ ret <vscale x 8 x i1> %v
+}
+
+define <vscale x 8 x i1> @fcmp_olt_vf_nxv8bf16(<vscale x 8 x bfloat> %va, bfloat %b, <vscale x 8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: fcmp_olt_vf_nxv8bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: fmv.x.h a1, fa0
+; CHECK-NEXT: vsetvli a2, zero, e16, m2, ta, ma
+; CHECK-NEXT: vmv.v.x v10, a1
+; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v10
+; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-NEXT: vmflt.vv v8, v12, v16, v0.t
+; CHECK-NEXT: vmv1r.v v0, v8
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0
+ %vb = shufflevector <vscale x 8 x bfloat> %elt.head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer
+ %v = call <vscale x 8 x i1> @llvm.vp.fcmp.nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %vb, metadata !"olt", <vscale x 8 x i1> %m, i32 %evl)
+ ret <vscale x 8 x i1> %v
+}
+
+define <vscale x 8 x i1> @fcmp_olt_vf_swap_nxv8bf16(<vscale x 8 x bfloat> %va, bfloat %b, <vscale x 8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: fcmp_olt_vf_swap_nxv8bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: fmv.x.h a1, fa0
+; CHECK-NEXT: vsetvli a2, zero, e16, m2, ta, ma
+; CHECK-NEXT: vmv.v.x v10, a1
+; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v10
+; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-NEXT: vmflt.vv v8, v16, v12, v0.t
+; CHECK-NEXT: vmv1r.v v0, v8
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0
+ %vb = shufflevector <vscale x 8 x bfloat> %elt.head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer
+ %v = call <vscale x 8 x i1> @llvm.vp.fcmp.nxv8bf16(<vscale x 8 x bfloat> %vb, <vscale x 8 x bfloat> %va, metadata !"olt", <vscale x 8 x i1> %m, i32 %evl)
+ ret <vscale x 8 x i1> %v
+}
+
+define <vscale x 8 x i1> @fcmp_ole_vv_nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %vb, <vscale x 8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: fcmp_ole_vv_nxv8bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v10
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8
+; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-NEXT: vmfle.vv v8, v16, v12, v0.t
+; CHECK-NEXT: vmv1r.v v0, v8
+; CHECK-NEXT: ret
+ %v = call <vscale x 8 x i1> @llvm.vp.fcmp.nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %vb, metadata !"ole", <vscale x 8 x i1> %m, i32 %evl)
+ ret <vscale x 8 x i1> %v
+}
+
+define <vscale x 8 x i1> @fcmp_ole_vf_nxv8bf16(<vscale x 8 x bfloat> %va, bfloat %b, <vscale x 8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: fcmp_ole_vf_nxv8bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: fmv.x.h a1, fa0
+; CHECK-NEXT: vsetvli a2, zero, e16, m2, ta, ma
+; CHECK-NEXT: vmv.v.x v10, a1
+; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v10
+; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-NEXT: vmfle.vv v8, v12, v16, v0.t
+; CHECK-NEXT: vmv1r.v v0, v8
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0
+ %vb = shufflevector <vscale x 8 x bfloat> %elt.head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer
+ %v = call <vscale x 8 x i1> @llvm.vp.fcmp.nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %vb, metadata !"ole", <vscale x 8 x i1> %m, i32 %evl)
+ ret <vscale x 8 x i1> %v
+}
+
+define <vscale x 8 x i1> @fcmp_ole_vf_swap_nxv8bf16(<vscale x 8 x bfloat> %va, bfloat %b, <vscale x 8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: fcmp_ole_vf_swap_nxv8bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: fmv.x.h a1, fa0
+; CHECK-NEXT: vsetvli a2, zero, e16, m2, ta, ma
+; CHECK-NEXT: vmv.v.x v10, a1
+; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v10
+; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-NEXT: vmfle.vv v8, v16, v12, v0.t
+; CHECK-NEXT: vmv1r.v v0, v8
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0
+ %vb = shufflevector <vscale x 8 x bfloat> %elt.head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer
+ %v = call <vscale x 8 x i1> @llvm.vp.fcmp.nxv8bf16(<vscale x 8 x bfloat> %vb, <vscale x 8 x bfloat> %va, metadata !"ole", <vscale x 8 x i1> %m, i32 %evl)
+ ret <vscale x 8 x i1> %v
+}
+
+define <vscale x 8 x i1> @fcmp_one_vv_nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %vb, <vscale x 8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: fcmp_one_vv_nxv8bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v10
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8
+; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-NEXT: vmflt.vv v8, v16, v12, v0.t
+; CHECK-NEXT: vmflt.vv v9, v12, v16, v0.t
+; CHECK-NEXT: vmor.mm v0, v9, v8
+; CHECK-NEXT: ret
+ %v = call <vscale x 8 x i1> @llvm.vp.fcmp.nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %vb, metadata !"one", <vscale x 8 x i1> %m, i32 %evl)
+ ret <vscale x 8 x i1> %v
+}
+
+define <vscale x 8 x i1> @fcmp_one_vf_nxv8bf16(<vscale x 8 x bfloat> %va, bfloat %b, <vscale x 8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: fcmp_one_vf_nxv8bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: fmv.x.h a1, fa0
+; CHECK-NEXT: vsetvli a2, zero, e16, m2, ta, ma
+; CHECK-NEXT: vmv.v.x v10, a1
+; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v10
+; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-NEXT: vmflt.vv v8, v12, v16, v0.t
+; CHECK-NEXT: vmflt.vv v9, v16, v12, v0.t
+; CHECK-NEXT: vmor.mm v0, v9, v8
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0
+ %vb = shufflevector <vscale x 8 x bfloat> %elt.head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer
+ %v = call <vscale x 8 x i1> @llvm.vp.fcmp.nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %vb, metadata !"one", <vscale x 8 x i1> %m, i32 %evl)
+ ret <vscale x 8 x i1> %v
+}
+
+define <vscale x 8 x i1> @fcmp_one_vf_swap_nxv8bf16(<vscale x 8 x bfloat> %va, bfloat %b, <vscale x 8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: fcmp_one_vf_swap_nxv8bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: fmv.x.h a1, fa0
+; CHECK-NEXT: vsetvli a2, zero, e16, m2, ta, ma
+; CHECK-NEXT: vmv.v.x v10, a1
+; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v10
+; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-NEXT: vmflt.vv v8, v16, v12, v0.t
+; CHECK-NEXT: vmflt.vv v9, v12, v16, v0.t
+; CHECK-NEXT: vmor.mm v0, v9, v8
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0
+ %vb = shufflevector <vscale x 8 x bfloat> %elt.head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer
+ %v = call <vscale x 8 x i1> @llvm.vp.fcmp.nxv8bf16(<vscale x 8 x bfloat> %vb, <vscale x 8 x bfloat> %va, metadata !"one", <vscale x 8 x i1> %m, i32 %evl)
+ ret <vscale x 8 x i1> %v
+}
+
+define <vscale x 8 x i1> @fcmp_ord_vv_nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %vb, <vscale x 8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: fcmp_ord_vv_nxv8bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v10
+; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-NEXT: vmfeq.vv v10, v12, v12, v0.t
+; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8
+; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-NEXT: vmfeq.vv v8, v12, v12, v0.t
+; CHECK-NEXT: vmand.mm v0, v8, v10
+; CHECK-NEXT: ret
+ %v = call <vscale x 8 x i1> @llvm.vp.fcmp.nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %vb, metadata !"ord", <vscale x 8 x i1> %m, i32 %evl)
+ ret <vscale x 8 x i1> %v
+}
+
+define <vscale x 8 x i1> @fcmp_ord_vf_nxv8bf16(<vscale x 8 x bfloat> %va, bfloat %b, <vscale x 8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: fcmp_ord_vf_nxv8bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: fmv.x.h a1, fa0
+; CHECK-NEXT: vsetvli a2, zero, e16, m2, ta, ma
+; CHECK-NEXT: vmv.v.x v10, a1
+; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8
+; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-NEXT: vmfeq.vv v8, v12, v12, v0.t
+; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v10
+; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-NEXT: vmfeq.vv v9, v12, v12, v0.t
+; CHECK-NEXT: vmand.mm v0, v8, v9
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0
+ %vb = shufflevector <vscale x 8 x bfloat> %elt.head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer
+ %v = call <vscale x 8 x i1> @llvm.vp.fcmp.nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %vb, metadata !"ord", <vscale x 8 x i1> %m, i32 %evl)
+ ret <vscale x 8 x i1> %v
+}
+
+define <vscale x 8 x i1> @fcmp_ord_vf_swap_nxv8bf16(<vscale x 8 x bfloat> %va, bfloat %b, <vscale x 8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: fcmp_ord_vf_swap_nxv8bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: fmv.x.h a1, fa0
+; CHECK-NEXT: vsetvli a2, zero, e16, m2, ta, ma
+; CHECK-NEXT: vmv.v.x v10, a1
+; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8
+; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-NEXT: vmfeq.vv v8, v12, v12, v0.t
+; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v10
+; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-NEXT: vmfeq.vv v9, v12, v12, v0.t
+; CHECK-NEXT: vmand.mm v0, v9, v8
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0
+ %vb = shufflevector <vscale x 8 x bfloat> %elt.head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer
+ %v = call <vscale x 8 x i1> @llvm.vp.fcmp.nxv8bf16(<vscale x 8 x bfloat> %vb, <vscale x 8 x bfloat> %va, metadata !"ord", <vscale x 8 x i1> %m, i32 %evl)
+ ret <vscale x 8 x i1> %v
+}
+
+define <vscale x 8 x i1> @fcmp_ueq_vv_nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %vb, <vscale x 8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: fcmp_ueq_vv_nxv8bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v10
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8
+; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-NEXT: vmflt.vv v8, v16, v12, v0.t
+; CHECK-NEXT: vmflt.vv v9, v12, v16, v0.t
+; CHECK-NEXT: vmnor.mm v0, v9, v8
+; CHECK-NEXT: ret
+ %v = call <vscale x 8 x i1> @llvm.vp.fcmp.nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %vb, metadata !"ueq", <vscale x 8 x i1> %m, i32 %evl)
+ ret <vscale x 8 x i1> %v
+}
+
+define <vscale x 8 x i1> @fcmp_ueq_vf_nxv8bf16(<vscale x 8 x bfloat> %va, bfloat %b, <vscale x 8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: fcmp_ueq_vf_nxv8bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: fmv.x.h a1, fa0
+; CHECK-NEXT: vsetvli a2, zero, e16, m2, ta, ma
+; CHECK-NEXT: vmv.v.x v10, a1
+; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v10
+; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-NEXT: vmflt.vv v8, v12, v16, v0.t
+; CHECK-NEXT: vmflt.vv v9, v16, v12, v0.t
+; CHECK-NEXT: vmnor.mm v0, v9, v8
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0
+ %vb = shufflevector <vscale x 8 x bfloat> %elt.head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer
+ %v = call <vscale x 8 x i1> @llvm.vp.fcmp.nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %vb, metadata !"ueq", <vscale x 8 x i1> %m, i32 %evl)
+ ret <vscale x 8 x i1> %v
+}
+
+define <vscale x 8 x i1> @fcmp_ueq_vf_swap_nxv8bf16(<vscale x 8 x bfloat> %va, bfloat %b, <vscale x 8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: fcmp_ueq_vf_swap_nxv8bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: fmv.x.h a1, fa0
+; CHECK-NEXT: vsetvli a2, zero, e16, m2, ta, ma
+; CHECK-NEXT: vmv.v.x v10, a1
+; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v10
+; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-NEXT: vmflt.vv v8, v16, v12, v0.t
+; CHECK-NEXT: vmflt.vv v9, v12, v16, v0.t
+; CHECK-NEXT: vmnor.mm v0, v9, v8
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0
+ %vb = shufflevector <vscale x 8 x bfloat> %elt.head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer
+ %v = call <vscale x 8 x i1> @llvm.vp.fcmp.nxv8bf16(<vscale x 8 x bfloat> %vb, <vscale x 8 x bfloat> %va, metadata !"ueq", <vscale x 8 x i1> %m, i32 %evl)
+ ret <vscale x 8 x i1> %v
+}
+
+define <vscale x 8 x i1> @fcmp_ugt_vv_nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %vb, <vscale x 8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: fcmp_ugt_vv_nxv8bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v10
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8
+; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-NEXT: vmfle.vv v8, v16, v12, v0.t
+; CHECK-NEXT: vmnot.m v0, v8
+; CHECK-NEXT: ret
+ %v = call <vscale x 8 x i1> @llvm.vp.fcmp.nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %vb, metadata !"ugt", <vscale x 8 x i1> %m, i32 %evl)
+ ret <vscale x 8 x i1> %v
+}
+
+define <vscale x 8 x i1> @fcmp_ugt_vf_nxv8bf16(<vscale x 8 x bfloat> %va, bfloat %b, <vscale x 8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: fcmp_ugt_vf_nxv8bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: fmv.x.h a1, fa0
+; CHECK-NEXT: vsetvli a2, zero, e16, m2, ta, ma
+; CHECK-NEXT: vmv.v.x v10, a1
+; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v10
+; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-NEXT: vmfle.vv v8, v12, v16, v0.t
+; CHECK-NEXT: vmnot.m v0, v8
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0
+ %vb = shufflevector <vscale x 8 x bfloat> %elt.head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer
+ %v = call <vscale x 8 x i1> @llvm.vp.fcmp.nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %vb, metadata !"ugt", <vscale x 8 x i1> %m, i32 %evl)
+ ret <vscale x 8 x i1> %v
+}
+
+define <vscale x 8 x i1> @fcmp_ugt_vf_swap_nxv8bf16(<vscale x 8 x bfloat> %va, bfloat %b, <vscale x 8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: fcmp_ugt_vf_swap_nxv8bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: fmv.x.h a1, fa0
+; CHECK-NEXT: vsetvli a2, zero, e16, m2, ta, ma
+; CHECK-NEXT: vmv.v.x v10, a1
+; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v10
+; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-NEXT: vmfle.vv v8, v16, v12, v0.t
+; CHECK-NEXT: vmnot.m v0, v8
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0
+ %vb = shufflevector <vscale x 8 x bfloat> %elt.head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer
+ %v = call <vscale x 8 x i1> @llvm.vp.fcmp.nxv8bf16(<vscale x 8 x bfloat> %vb, <vscale x 8 x bfloat> %va, metadata !"ugt", <vscale x 8 x i1> %m, i32 %evl)
+ ret <vscale x 8 x i1> %v
+}
+
+define <vscale x 8 x i1> @fcmp_uge_vv_nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %vb, <vscale x 8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: fcmp_uge_vv_nxv8bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v10
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8
+; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-NEXT: vmflt.vv v8, v16, v12, v0.t
+; CHECK-NEXT: vmnot.m v0, v8
+; CHECK-NEXT: ret
+ %v = call <vscale x 8 x i1> @llvm.vp.fcmp.nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %vb, metadata !"uge", <vscale x 8 x i1> %m, i32 %evl)
+ ret <vscale x 8 x i1> %v
+}
+
+define <vscale x 8 x i1> @fcmp_uge_vf_nxv8bf16(<vscale x 8 x bfloat> %va, bfloat %b, <vscale x 8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: fcmp_uge_vf_nxv8bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: fmv.x.h a1, fa0
+; CHECK-NEXT: vsetvli a2, zero, e16, m2, ta, ma
+; CHECK-NEXT: vmv.v.x v10, a1
+; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v10
+; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-NEXT: vmflt.vv v8, v12, v16, v0.t
+; CHECK-NEXT: vmnot.m v0, v8
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0
+ %vb = shufflevector <vscale x 8 x bfloat> %elt.head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer
+ %v = call <vscale x 8 x i1> @llvm.vp.fcmp.nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %vb, metadata !"uge", <vscale x 8 x i1> %m, i32 %evl)
+ ret <vscale x 8 x i1> %v
+}
+
+define <vscale x 8 x i1> @fcmp_uge_vf_swap_nxv8bf16(<vscale x 8 x bfloat> %va, bfloat %b, <vscale x 8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: fcmp_uge_vf_swap_nxv8bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: fmv.x.h a1, fa0
+; CHECK-NEXT: vsetvli a2, zero, e16, m2, ta, ma
+; CHECK-NEXT: vmv.v.x v10, a1
+; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v10
+; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-NEXT: vmflt.vv v8, v16, v12, v0.t
+; CHECK-NEXT: vmnot.m v0, v8
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0
+ %vb = shufflevector <vscale x 8 x bfloat> %elt.head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer
+ %v = call <vscale x 8 x i1> @llvm.vp.fcmp.nxv8bf16(<vscale x 8 x bfloat> %vb, <vscale x 8 x bfloat> %va, metadata !"uge", <vscale x 8 x i1> %m, i32 %evl)
+ ret <vscale x 8 x i1> %v
+}
+
+define <vscale x 8 x i1> @fcmp_ult_vv_nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %vb, <vscale x 8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: fcmp_ult_vv_nxv8bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v10
+; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-NEXT: vmfle.vv v8, v16, v12, v0.t
+; CHECK-NEXT: vmnot.m v0, v8
+; CHECK-NEXT: ret
+ %v = call <vscale x 8 x i1> @llvm.vp.fcmp.nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %vb, metadata !"ult", <vscale x 8 x i1> %m, i32 %evl)
+ ret <vscale x 8 x i1> %v
+}
+
+define <vscale x 8 x i1> @fcmp_ult_vf_nxv8bf16(<vscale x 8 x bfloat> %va, bfloat %b, <vscale x 8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: fcmp_ult_vf_nxv8bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: fmv.x.h a1, fa0
+; CHECK-NEXT: vsetvli a2, zero, e16, m2, ta, ma
+; CHECK-NEXT: vmv.v.x v10, a1
+; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v10
+; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-NEXT: vmfle.vv v8, v16, v12, v0.t
+; CHECK-NEXT: vmnot.m v0, v8
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0
+ %vb = shufflevector <vscale x 8 x bfloat> %elt.head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer
+ %v = call <vscale x 8 x i1> @llvm.vp.fcmp.nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %vb, metadata !"ult", <vscale x 8 x i1> %m, i32 %evl)
+ ret <vscale x 8 x i1> %v
+}
+
+define <vscale x 8 x i1> @fcmp_ult_vf_swap_nxv8bf16(<vscale x 8 x bfloat> %va, bfloat %b, <vscale x 8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: fcmp_ult_vf_swap_nxv8bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: fmv.x.h a1, fa0
+; CHECK-NEXT: vsetvli a2, zero, e16, m2, ta, ma
+; CHECK-NEXT: vmv.v.x v10, a1
+; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v10
+; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-NEXT: vmfle.vv v8, v12, v16, v0.t
+; CHECK-NEXT: vmnot.m v0, v8
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0
+ %vb = shufflevector <vscale x 8 x bfloat> %elt.head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer
+ %v = call <vscale x 8 x i1> @llvm.vp.fcmp.nxv8bf16(<vscale x 8 x bfloat> %vb, <vscale x 8 x bfloat> %va, metadata !"ult", <vscale x 8 x i1> %m, i32 %evl)
+ ret <vscale x 8 x i1> %v
+}
+
+define <vscale x 8 x i1> @fcmp_ule_vv_nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %vb, <vscale x 8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: fcmp_ule_vv_nxv8bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v10
+; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-NEXT: vmflt.vv v8, v16, v12, v0.t
+; CHECK-NEXT: vmnot.m v0, v8
+; CHECK-NEXT: ret
+ %v = call <vscale x 8 x i1> @llvm.vp.fcmp.nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %vb, metadata !"ule", <vscale x 8 x i1> %m, i32 %evl)
+ ret <vscale x 8 x i1> %v
+}
+
+define <vscale x 8 x i1> @fcmp_ule_vf_nxv8bf16(<vscale x 8 x bfloat> %va, bfloat %b, <vscale x 8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: fcmp_ule_vf_nxv8bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: fmv.x.h a1, fa0
+; CHECK-NEXT: vsetvli a2, zero, e16, m2, ta, ma
+; CHECK-NEXT: vmv.v.x v10, a1
+; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v10
+; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-NEXT: vmflt.vv v8, v16, v12, v0.t
+; CHECK-NEXT: vmnot.m v0, v8
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0
+ %vb = shufflevector <vscale x 8 x bfloat> %elt.head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer
+ %v = call <vscale x 8 x i1> @llvm.vp.fcmp.nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %vb, metadata !"ule", <vscale x 8 x i1> %m, i32 %evl)
+ ret <vscale x 8 x i1> %v
+}
+
+define <vscale x 8 x i1> @fcmp_ule_vf_swap_nxv8bf16(<vscale x 8 x bfloat> %va, bfloat %b, <vscale x 8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: fcmp_ule_vf_swap_nxv8bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: fmv.x.h a1, fa0
+; CHECK-NEXT: vsetvli a2, zero, e16, m2, ta, ma
+; CHECK-NEXT: vmv.v.x v10, a1
+; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v10
+; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-NEXT: vmflt.vv v8, v12, v16, v0.t
+; CHECK-NEXT: vmnot.m v0, v8
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0
+ %vb = shufflevector <vscale x 8 x bfloat> %elt.head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer
+ %v = call <vscale x 8 x i1> @llvm.vp.fcmp.nxv8bf16(<vscale x 8 x bfloat> %vb, <vscale x 8 x bfloat> %va, metadata !"ule", <vscale x 8 x i1> %m, i32 %evl)
+ ret <vscale x 8 x i1> %v
+}
+
+define <vscale x 8 x i1> @fcmp_une_vv_nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %vb, <vscale x 8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: fcmp_une_vv_nxv8bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v10
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8
+; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-NEXT: vmfne.vv v8, v16, v12, v0.t
+; CHECK-NEXT: vmv1r.v v0, v8
+; CHECK-NEXT: ret
+ %v = call <vscale x 8 x i1> @llvm.vp.fcmp.nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %vb, metadata !"une", <vscale x 8 x i1> %m, i32 %evl)
+ ret <vscale x 8 x i1> %v
+}
+
+define <vscale x 8 x i1> @fcmp_une_vf_nxv8bf16(<vscale x 8 x bfloat> %va, bfloat %b, <vscale x 8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: fcmp_une_vf_nxv8bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: fmv.x.h a1, fa0
+; CHECK-NEXT: vsetvli a2, zero, e16, m2, ta, ma
+; CHECK-NEXT: vmv.v.x v10, a1
+; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v10
+; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-NEXT: vmfne.vv v8, v12, v16, v0.t
+; CHECK-NEXT: vmv1r.v v0, v8
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0
+ %vb = shufflevector <vscale x 8 x bfloat> %elt.head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer
+ %v = call <vscale x 8 x i1> @llvm.vp.fcmp.nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %vb, metadata !"une", <vscale x 8 x i1> %m, i32 %evl)
+ ret <vscale x 8 x i1> %v
+}
+
+define <vscale x 8 x i1> @fcmp_une_vf_swap_nxv8bf16(<vscale x 8 x bfloat> %va, bfloat %b, <vscale x 8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: fcmp_une_vf_swap_nxv8bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: fmv.x.h a1, fa0
+; CHECK-NEXT: vsetvli a2, zero, e16, m2, ta, ma
+; CHECK-NEXT: vmv.v.x v10, a1
+; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v10
+; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-NEXT: vmfne.vv v8, v16, v12, v0.t
+; CHECK-NEXT: vmv1r.v v0, v8
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0
+ %vb = shufflevector <vscale x 8 x bfloat> %elt.head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer
+ %v = call <vscale x 8 x i1> @llvm.vp.fcmp.nxv8bf16(<vscale x 8 x bfloat> %vb, <vscale x 8 x bfloat> %va, metadata !"une", <vscale x 8 x i1> %m, i32 %evl)
+ ret <vscale x 8 x i1> %v
+}
+
+define <vscale x 8 x i1> @fcmp_uno_vv_nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %vb, <vscale x 8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: fcmp_uno_vv_nxv8bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v10
+; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-NEXT: vmfne.vv v10, v12, v12, v0.t
+; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8
+; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-NEXT: vmfne.vv v8, v12, v12, v0.t
+; CHECK-NEXT: vmor.mm v0, v8, v10
+; CHECK-NEXT: ret
+ %v = call <vscale x 8 x i1> @llvm.vp.fcmp.nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %vb, metadata !"uno", <vscale x 8 x i1> %m, i32 %evl)
+ ret <vscale x 8 x i1> %v
+}
+
+define <vscale x 8 x i1> @fcmp_uno_vf_nxv8bf16(<vscale x 8 x bfloat> %va, bfloat %b, <vscale x 8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: fcmp_uno_vf_nxv8bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: fmv.x.h a1, fa0
+; CHECK-NEXT: vsetvli a2, zero, e16, m2, ta, ma
+; CHECK-NEXT: vmv.v.x v10, a1
+; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8
+; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-NEXT: vmfne.vv v8, v12, v12, v0.t
+; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v10
+; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-NEXT: vmfne.vv v9, v12, v12, v0.t
+; CHECK-NEXT: vmor.mm v0, v8, v9
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0
+ %vb = shufflevector <vscale x 8 x bfloat> %elt.head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer
+ %v = call <vscale x 8 x i1> @llvm.vp.fcmp.nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %vb, metadata !"uno", <vscale x 8 x i1> %m, i32 %evl)
+ ret <vscale x 8 x i1> %v
+}
+
+define <vscale x 8 x i1> @fcmp_uno_vf_swap_nxv8bf16(<vscale x 8 x bfloat> %va, bfloat %b, <vscale x 8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: fcmp_uno_vf_swap_nxv8bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: fmv.x.h a1, fa0
+; CHECK-NEXT: vsetvli a2, zero, e16, m2, ta, ma
+; CHECK-NEXT: vmv.v.x v10, a1
+; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8
+; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-NEXT: vmfne.vv v8, v12, v12, v0.t
+; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v10
+; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-NEXT: vmfne.vv v9, v12, v12, v0.t
+; CHECK-NEXT: vmor.mm v0, v9, v8
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0
+ %vb = shufflevector <vscale x 8 x bfloat> %elt.head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer
+ %v = call <vscale x 8 x i1> @llvm.vp.fcmp.nxv8bf16(<vscale x 8 x bfloat> %vb, <vscale x 8 x bfloat> %va, metadata !"uno", <vscale x 8 x i1> %m, i32 %evl)
+ ret <vscale x 8 x i1> %v
+}
+
+declare <vscale x 64 x i1> @llvm.vp.fcmp.nxv64bf16(<vscale x 64 x bfloat>, <vscale x 64 x bfloat>, metadata, <vscale x 64 x i1>, i32)
+
+define <vscale x 64 x i1> @fcmp_oeq_vv_nxv64bf16(<vscale x 64 x bfloat> %va, <vscale x 64 x bfloat> %vb, <vscale x 64 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: fcmp_oeq_vv_nxv64bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi sp, sp, -16
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: csrr a1, vlenb
+; CHECK-NEXT: slli a1, a1, 1
+; CHECK-NEXT: mv a3, a1
+; CHECK-NEXT: slli a1, a1, 4
+; CHECK-NEXT: add a1, a1, a3
+; CHECK-NEXT: sub sp, sp, a1
+; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x22, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 34 * vlenb
+; CHECK-NEXT: vmv8r.v v24, v16
+; CHECK-NEXT: csrr a1, vlenb
+; CHECK-NEXT: slli a1, a1, 1
+; CHECK-NEXT: mv a3, a1
+; CHECK-NEXT: slli a1, a1, 3
+; CHECK-NEXT: add a1, a1, a3
+; CHECK-NEXT: add a1, sp, a1
+; CHECK-NEXT: addi a1, a1, 16
+; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill
+; CHECK-NEXT: csrr a3, vlenb
+; CHECK-NEXT: slli a1, a3, 3
+; CHECK-NEXT: add a1, a0, a1
+; CHECK-NEXT: vl8re16.v v16, (a1)
+; CHECK-NEXT: slli a5, a3, 2
+; CHECK-NEXT: sub a1, a2, a5
+; CHECK-NEXT: sltu a4, a2, a1
+; CHECK-NEXT: addi a4, a4, -1
+; CHECK-NEXT: and a6, a4, a1
+; CHECK-NEXT: slli a4, a3, 1
+; CHECK-NEXT: sub a1, a6, a4
+; CHECK-NEXT: sltu a7, a6, a1
+; CHECK-NEXT: addi a7, a7, -1
+; CHECK-NEXT: and a7, a7, a1
+; CHECK-NEXT: srli a1, a3, 1
+; CHECK-NEXT: csrr t0, vlenb
+; CHECK-NEXT: add t0, sp, t0
+; CHECK-NEXT: addi t0, t0, 16
+; CHECK-NEXT: vs1r.v v0, (t0) # Unknown-size Folded Spill
+; CHECK-NEXT: vsetvli t0, zero, e8, m1, ta, ma
+; CHECK-NEXT: vslidedown.vx v8, v0, a1
+; CHECK-NEXT: srli a3, a3, 2
+; CHECK-NEXT: addi t0, sp, 16
+; CHECK-NEXT: vs1r.v v8, (t0) # Unknown-size Folded Spill
+; CHECK-NEXT: vsetvli t0, zero, e8, mf2, ta, ma
+; CHECK-NEXT: vslidedown.vx v0, v8, a3
+; CHECK-NEXT: vl8re16.v v8, (a0)
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: slli a0, a0, 1
+; CHECK-NEXT: mv t0, a0
+; CHECK-NEXT: slli a0, a0, 2
+; CHECK-NEXT: add t0, t0, a0
+; CHECK-NEXT: slli a0, a0, 1
+; CHECK-NEXT: add a0, a0, t0
+; CHECK-NEXT: add a0, sp, a0
+; CHECK-NEXT: addi a0, a0, 16
+; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: slli a0, a0, 1
+; CHECK-NEXT: add a0, sp, a0
+; CHECK-NEXT: addi a0, a0, 16
+; CHECK-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill
+; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v20
+; CHECK-NEXT: vmv4r.v v16, v24
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: slli a0, a0, 1
+; CHECK-NEXT: mv t0, a0
+; CHECK-NEXT: slli a0, a0, 2
+; CHECK-NEXT: add a0, a0, t0
+; CHECK-NEXT: add a0, sp, a0
+; CHECK-NEXT: addi a0, a0, 16
+; CHECK-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v28
+; CHECK-NEXT: vsetvli zero, a7, e32, m8, ta, ma
+; CHECK-NEXT: vmfeq.vv v26, v16, v8, v0.t
+; CHECK-NEXT: bltu a6, a4, .LBB85_2
+; CHECK-NEXT: # %bb.1:
+; CHECK-NEXT: mv a6, a4
+; CHECK-NEXT: .LBB85_2:
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: slli a0, a0, 1
+; CHECK-NEXT: add a0, sp, a0
+; CHECK-NEXT: addi a0, a0, 16
+; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
+; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v16
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: slli a0, a0, 1
+; CHECK-NEXT: mv a7, a0
+; CHECK-NEXT: slli a0, a0, 2
+; CHECK-NEXT: add a0, a0, a7
+; CHECK-NEXT: add a0, sp, a0
+; CHECK-NEXT: addi a0, a0, 16
+; CHECK-NEXT: vl8r.v v0, (a0) # Unknown-size Folded Reload
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v0
+; CHECK-NEXT: addi a0, sp, 16
+; CHECK-NEXT: vl1r.v v0, (a0) # Unknown-size Folded Reload
+; CHECK-NEXT: vsetvli zero, a6, e32, m8, ta, ma
+; CHECK-NEXT: vmfeq.vv v6, v16, v8, v0.t
+; CHECK-NEXT: add a0, a3, a3
+; CHECK-NEXT: bltu a2, a5, .LBB85_4
+; CHECK-NEXT: # %bb.3:
+; CHECK-NEXT: mv a2, a5
+; CHECK-NEXT: .LBB85_4:
+; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma
+; CHECK-NEXT: vslideup.vx v6, v26, a3
+; CHECK-NEXT: sub a5, a2, a4
+; CHECK-NEXT: sltu a6, a2, a5
+; CHECK-NEXT: addi a6, a6, -1
+; CHECK-NEXT: and a5, a6, a5
+; CHECK-NEXT: csrr a6, vlenb
+; CHECK-NEXT: add a6, sp, a6
+; CHECK-NEXT: addi a6, a6, 16
+; CHECK-NEXT: vl1r.v v8, (a6) # Unknown-size Folded Reload
+; CHECK-NEXT: vmv1r.v v7, v8
+; CHECK-NEXT: vsetvli a6, zero, e8, mf2, ta, ma
+; CHECK-NEXT: vslidedown.vx v0, v8, a3
+; CHECK-NEXT: csrr a6, vlenb
+; CHECK-NEXT: slli a6, a6, 1
+; CHECK-NEXT: mv a7, a6
+; CHECK-NEXT: slli a6, a6, 3
+; CHECK-NEXT: add a6, a6, a7
+; CHECK-NEXT: add a6, sp, a6
+; CHECK-NEXT: addi a6, a6, 16
+; CHECK-NEXT: vl8r.v v24, (a6) # Unknown-size Folded Reload
+; CHECK-NEXT: vsetvli a6, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v28
+; CHECK-NEXT: csrr a6, vlenb
+; CHECK-NEXT: slli a6, a6, 1
+; CHECK-NEXT: mv a7, a6
+; CHECK-NEXT: slli a6, a6, 2
+; CHECK-NEXT: add a6, a6, a7
+; CHECK-NEXT: add a6, sp, a6
+; CHECK-NEXT: addi a6, a6, 16
+; CHECK-NEXT: vs8r.v v8, (a6) # Unknown-size Folded Spill
+; CHECK-NEXT: csrr a6, vlenb
+; CHECK-NEXT: slli a6, a6, 1
+; CHECK-NEXT: mv a7, a6
+; CHECK-NEXT: slli a6, a6, 2
+; CHECK-NEXT: add a7, a7, a6
+; CHECK-NEXT: slli a6, a6, 1
+; CHECK-NEXT: add a6, a6, a7
+; CHECK-NEXT: add a6, sp, a6
+; CHECK-NEXT: addi a6, a6, 16
+; CHECK-NEXT: vl8r.v v16, (a6) # Unknown-size Folded Reload
+; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v20
+; CHECK-NEXT: csrr a6, vlenb
+; CHECK-NEXT: slli a6, a6, 1
+; CHECK-NEXT: mv a7, a6
+; CHECK-NEXT: slli a6, a6, 2
+; CHECK-NEXT: add a6, a6, a7
+; CHECK-NEXT: add a6, sp, a6
+; CHECK-NEXT: addi a6, a6, 16
+; CHECK-NEXT: vl8r.v v16, (a6) # Unknown-size Folded Reload
+; CHECK-NEXT: vsetvli zero, a5, e32, m8, ta, ma
+; CHECK-NEXT: vmfeq.vv v5, v16, v8, v0.t
+; CHECK-NEXT: bltu a2, a4, .LBB85_6
+; CHECK-NEXT: # %bb.5:
+; CHECK-NEXT: mv a2, a4
+; CHECK-NEXT: .LBB85_6:
+; CHECK-NEXT: vsetvli a4, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v24
+; CHECK-NEXT: csrr a4, vlenb
+; CHECK-NEXT: slli a4, a4, 1
+; CHECK-NEXT: mv a5, a4
+; CHECK-NEXT: slli a4, a4, 2
+; CHECK-NEXT: add a5, a5, a4
+; CHECK-NEXT: slli a4, a4, 1
+; CHECK-NEXT: add a4, a4, a5
+; CHECK-NEXT: add a4, sp, a4
+; CHECK-NEXT: addi a4, a4, 16
+; CHECK-NEXT: vl8r.v v8, (a4) # Unknown-size Folded Reload
+; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v8
+; CHECK-NEXT: vmv1r.v v0, v7
+; CHECK-NEXT: vsetvli zero, a2, e32, m8, ta, ma
+; CHECK-NEXT: vmfeq.vv v8, v16, v24, v0.t
+; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma
+; CHECK-NEXT: vslideup.vx v8, v5, a3
+; CHECK-NEXT: add a0, a1, a1
+; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-NEXT: vslideup.vx v8, v6, a1
+; CHECK-NEXT: vmv.v.v v0, v8
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: slli a0, a0, 1
+; CHECK-NEXT: mv a1, a0
+; CHECK-NEXT: slli a0, a0, 4
+; CHECK-NEXT: add a0, a0, a1
+; CHECK-NEXT: add sp, sp, a0
+; CHECK-NEXT: addi sp, sp, 16
+; CHECK-NEXT: ret
+ %v = call <vscale x 64 x i1> @llvm.vp.fcmp.nxv64bf16(<vscale x 64 x bfloat> %va, <vscale x 64 x bfloat> %vb, metadata !"oeq", <vscale x 64 x i1> %m, i32 %evl)
+ ret <vscale x 64 x i1> %v
+}
declare <vscale x 1 x i1> @llvm.vp.fcmp.nxv1f16(<vscale x 1 x half>, <vscale x 1 x half>, metadata, <vscale x 1 x i1>, i32)
@@ -2108,10 +3760,10 @@ define <vscale x 64 x i1> @fcmp_oeq_vv_nxv64f16(<vscale x 64 x half> %va, <vscal
; ZVFH-NEXT: and a4, a5, a4
; ZVFH-NEXT: vsetvli zero, a4, e16, m8, ta, ma
; ZVFH-NEXT: vmfeq.vv v7, v16, v8, v0.t
-; ZVFH-NEXT: bltu a2, a3, .LBB85_2
+; ZVFH-NEXT: bltu a2, a3, .LBB171_2
; ZVFH-NEXT: # %bb.1:
; ZVFH-NEXT: mv a2, a3
-; ZVFH-NEXT: .LBB85_2:
+; ZVFH-NEXT: .LBB171_2:
; ZVFH-NEXT: vmv1r.v v0, v24
; ZVFH-NEXT: csrr a0, vlenb
; ZVFH-NEXT: slli a0, a0, 3
@@ -2137,14 +3789,18 @@ define <vscale x 64 x i1> @fcmp_oeq_vv_nxv64f16(<vscale x 64 x half> %va, <vscal
; ZVFHMIN-NEXT: addi sp, sp, -16
; ZVFHMIN-NEXT: .cfi_def_cfa_offset 16
; ZVFHMIN-NEXT: csrr a1, vlenb
-; ZVFHMIN-NEXT: li a3, 34
-; ZVFHMIN-NEXT: mul a1, a1, a3
+; ZVFHMIN-NEXT: slli a1, a1, 1
+; ZVFHMIN-NEXT: mv a3, a1
+; ZVFHMIN-NEXT: slli a1, a1, 4
+; ZVFHMIN-NEXT: add a1, a1, a3
; ZVFHMIN-NEXT: sub sp, sp, a1
; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x22, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 34 * vlenb
; ZVFHMIN-NEXT: vmv8r.v v24, v16
; ZVFHMIN-NEXT: csrr a1, vlenb
-; ZVFHMIN-NEXT: li a3, 18
-; ZVFHMIN-NEXT: mul a1, a1, a3
+; ZVFHMIN-NEXT: slli a1, a1, 1
+; ZVFHMIN-NEXT: mv a3, a1
+; ZVFHMIN-NEXT: slli a1, a1, 3
+; ZVFHMIN-NEXT: add a1, a1, a3
; ZVFHMIN-NEXT: add a1, sp, a1
; ZVFHMIN-NEXT: addi a1, a1, 16
; ZVFHMIN-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill
@@ -2176,8 +3832,12 @@ define <vscale x 64 x i1> @fcmp_oeq_vv_nxv64f16(<vscale x 64 x half> %va, <vscal
; ZVFHMIN-NEXT: vslidedown.vx v0, v8, a3
; ZVFHMIN-NEXT: vl8re16.v v8, (a0)
; ZVFHMIN-NEXT: csrr a0, vlenb
-; ZVFHMIN-NEXT: li t0, 26
-; ZVFHMIN-NEXT: mul a0, a0, t0
+; ZVFHMIN-NEXT: slli a0, a0, 1
+; ZVFHMIN-NEXT: mv t0, a0
+; ZVFHMIN-NEXT: slli a0, a0, 2
+; ZVFHMIN-NEXT: add t0, t0, a0
+; ZVFHMIN-NEXT: slli a0, a0, 1
+; ZVFHMIN-NEXT: add a0, a0, t0
; ZVFHMIN-NEXT: add a0, sp, a0
; ZVFHMIN-NEXT: addi a0, a0, 16
; ZVFHMIN-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
@@ -2190,18 +3850,20 @@ define <vscale x 64 x i1> @fcmp_oeq_vv_nxv64f16(<vscale x 64 x half> %va, <vscal
; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v20
; ZVFHMIN-NEXT: vmv4r.v v16, v24
; ZVFHMIN-NEXT: csrr a0, vlenb
-; ZVFHMIN-NEXT: li t0, 10
-; ZVFHMIN-NEXT: mul a0, a0, t0
+; ZVFHMIN-NEXT: slli a0, a0, 1
+; ZVFHMIN-NEXT: mv t0, a0
+; ZVFHMIN-NEXT: slli a0, a0, 2
+; ZVFHMIN-NEXT: add a0, a0, t0
; ZVFHMIN-NEXT: add a0, sp, a0
; ZVFHMIN-NEXT: addi a0, a0, 16
; ZVFHMIN-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill
; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v28
; ZVFHMIN-NEXT: vsetvli zero, a7, e32, m8, ta, ma
; ZVFHMIN-NEXT: vmfeq.vv v26, v16, v8, v0.t
-; ZVFHMIN-NEXT: bltu a6, a4, .LBB85_2
+; ZVFHMIN-NEXT: bltu a6, a4, .LBB171_2
; ZVFHMIN-NEXT: # %bb.1:
; ZVFHMIN-NEXT: mv a6, a4
-; ZVFHMIN-NEXT: .LBB85_2:
+; ZVFHMIN-NEXT: .LBB171_2:
; ZVFHMIN-NEXT: csrr a0, vlenb
; ZVFHMIN-NEXT: slli a0, a0, 1
; ZVFHMIN-NEXT: add a0, sp, a0
@@ -2210,8 +3872,10 @@ define <vscale x 64 x i1> @fcmp_oeq_vv_nxv64f16(<vscale x 64 x half> %va, <vscal
; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma
; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v16
; ZVFHMIN-NEXT: csrr a0, vlenb
-; ZVFHMIN-NEXT: li a7, 10
-; ZVFHMIN-NEXT: mul a0, a0, a7
+; ZVFHMIN-NEXT: slli a0, a0, 1
+; ZVFHMIN-NEXT: mv a7, a0
+; ZVFHMIN-NEXT: slli a0, a0, 2
+; ZVFHMIN-NEXT: add a0, a0, a7
; ZVFHMIN-NEXT: add a0, sp, a0
; ZVFHMIN-NEXT: addi a0, a0, 16
; ZVFHMIN-NEXT: vl8r.v v0, (a0) # Unknown-size Folded Reload
@@ -2221,10 +3885,10 @@ define <vscale x 64 x i1> @fcmp_oeq_vv_nxv64f16(<vscale x 64 x half> %va, <vscal
; ZVFHMIN-NEXT: vsetvli zero, a6, e32, m8, ta, ma
; ZVFHMIN-NEXT: vmfeq.vv v6, v16, v8, v0.t
; ZVFHMIN-NEXT: add a0, a3, a3
-; ZVFHMIN-NEXT: bltu a2, a5, .LBB85_4
+; ZVFHMIN-NEXT: bltu a2, a5, .LBB171_4
; ZVFHMIN-NEXT: # %bb.3:
; ZVFHMIN-NEXT: mv a2, a5
-; ZVFHMIN-NEXT: .LBB85_4:
+; ZVFHMIN-NEXT: .LBB171_4:
; ZVFHMIN-NEXT: vsetvli zero, a0, e8, mf2, ta, ma
; ZVFHMIN-NEXT: vslideup.vx v6, v26, a3
; ZVFHMIN-NEXT: sub a5, a2, a4
@@ -2239,43 +3903,57 @@ define <vscale x 64 x i1> @fcmp_oeq_vv_nxv64f16(<vscale x 64 x half> %va, <vscal
; ZVFHMIN-NEXT: vsetvli a6, zero, e8, mf2, ta, ma
; ZVFHMIN-NEXT: vslidedown.vx v0, v8, a3
; ZVFHMIN-NEXT: csrr a6, vlenb
-; ZVFHMIN-NEXT: li a7, 18
-; ZVFHMIN-NEXT: mul a6, a6, a7
+; ZVFHMIN-NEXT: slli a6, a6, 1
+; ZVFHMIN-NEXT: mv a7, a6
+; ZVFHMIN-NEXT: slli a6, a6, 3
+; ZVFHMIN-NEXT: add a6, a6, a7
; ZVFHMIN-NEXT: add a6, sp, a6
; ZVFHMIN-NEXT: addi a6, a6, 16
; ZVFHMIN-NEXT: vl8r.v v24, (a6) # Unknown-size Folded Reload
; ZVFHMIN-NEXT: vsetvli a6, zero, e16, m4, ta, ma
; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v28
; ZVFHMIN-NEXT: csrr a6, vlenb
-; ZVFHMIN-NEXT: li a7, 10
-; ZVFHMIN-NEXT: mul a6, a6, a7
+; ZVFHMIN-NEXT: slli a6, a6, 1
+; ZVFHMIN-NEXT: mv a7, a6
+; ZVFHMIN-NEXT: slli a6, a6, 2
+; ZVFHMIN-NEXT: add a6, a6, a7
; ZVFHMIN-NEXT: add a6, sp, a6
; ZVFHMIN-NEXT: addi a6, a6, 16
; ZVFHMIN-NEXT: vs8r.v v8, (a6) # Unknown-size Folded Spill
; ZVFHMIN-NEXT: csrr a6, vlenb
-; ZVFHMIN-NEXT: li a7, 26
-; ZVFHMIN-NEXT: mul a6, a6, a7
+; ZVFHMIN-NEXT: slli a6, a6, 1
+; ZVFHMIN-NEXT: mv a7, a6
+; ZVFHMIN-NEXT: slli a6, a6, 2
+; ZVFHMIN-NEXT: add a7, a7, a6
+; ZVFHMIN-NEXT: slli a6, a6, 1
+; ZVFHMIN-NEXT: add a6, a6, a7
; ZVFHMIN-NEXT: add a6, sp, a6
; ZVFHMIN-NEXT: addi a6, a6, 16
; ZVFHMIN-NEXT: vl8r.v v16, (a6) # Unknown-size Folded Reload
; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v20
; ZVFHMIN-NEXT: csrr a6, vlenb
-; ZVFHMIN-NEXT: li a7, 10
-; ZVFHMIN-NEXT: mul a6, a6, a7
+; ZVFHMIN-NEXT: slli a6, a6, 1
+; ZVFHMIN-NEXT: mv a7, a6
+; ZVFHMIN-NEXT: slli a6, a6, 2
+; ZVFHMIN-NEXT: add a6, a6, a7
; ZVFHMIN-NEXT: add a6, sp, a6
; ZVFHMIN-NEXT: addi a6, a6, 16
; ZVFHMIN-NEXT: vl8r.v v16, (a6) # Unknown-size Folded Reload
; ZVFHMIN-NEXT: vsetvli zero, a5, e32, m8, ta, ma
; ZVFHMIN-NEXT: vmfeq.vv v5, v16, v8, v0.t
-; ZVFHMIN-NEXT: bltu a2, a4, .LBB85_6
+; ZVFHMIN-NEXT: bltu a2, a4, .LBB171_6
; ZVFHMIN-NEXT: # %bb.5:
; ZVFHMIN-NEXT: mv a2, a4
-; ZVFHMIN-NEXT: .LBB85_6:
+; ZVFHMIN-NEXT: .LBB171_6:
; ZVFHMIN-NEXT: vsetvli a4, zero, e16, m4, ta, ma
; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v24
; ZVFHMIN-NEXT: csrr a4, vlenb
-; ZVFHMIN-NEXT: li a5, 26
-; ZVFHMIN-NEXT: mul a4, a4, a5
+; ZVFHMIN-NEXT: slli a4, a4, 1
+; ZVFHMIN-NEXT: mv a5, a4
+; ZVFHMIN-NEXT: slli a4, a4, 2
+; ZVFHMIN-NEXT: add a5, a5, a4
+; ZVFHMIN-NEXT: slli a4, a4, 1
+; ZVFHMIN-NEXT: add a4, a4, a5
; ZVFHMIN-NEXT: add a4, sp, a4
; ZVFHMIN-NEXT: addi a4, a4, 16
; ZVFHMIN-NEXT: vl8r.v v8, (a4) # Unknown-size Folded Reload
@@ -2290,8 +3968,10 @@ define <vscale x 64 x i1> @fcmp_oeq_vv_nxv64f16(<vscale x 64 x half> %va, <vscal
; ZVFHMIN-NEXT: vslideup.vx v8, v6, a1
; ZVFHMIN-NEXT: vmv.v.v v0, v8
; ZVFHMIN-NEXT: csrr a0, vlenb
-; ZVFHMIN-NEXT: li a1, 34
-; ZVFHMIN-NEXT: mul a0, a0, a1
+; ZVFHMIN-NEXT: slli a0, a0, 1
+; ZVFHMIN-NEXT: mv a1, a0
+; ZVFHMIN-NEXT: slli a0, a0, 4
+; ZVFHMIN-NEXT: add a0, a0, a1
; ZVFHMIN-NEXT: add sp, sp, a0
; ZVFHMIN-NEXT: addi sp, sp, 16
; ZVFHMIN-NEXT: ret
@@ -3377,163 +5057,6 @@ define <vscale x 8 x i1> @fcmp_uno_vf_swap_nxv8f64(<vscale x 8 x double> %va, do
declare <vscale x 32 x i1> @llvm.vp.fcmp.nxv32f64(<vscale x 32 x double>, <vscale x 32 x double>, metadata, <vscale x 32 x i1>, i32)
define <vscale x 32 x i1> @fcmp_oeq_vv_nxv32f64(<vscale x 32 x double> %va, <vscale x 32 x double> %vb, <vscale x 32 x i1> %m, i32 zeroext %evl) {
-; CHECK-LABEL: fcmp_oeq_vv_nxv32f64:
-; CHECK: # %bb.0:
-; CHECK-NEXT: addi sp, sp, -16
-; CHECK-NEXT: .cfi_def_cfa_offset 16
-; CHECK-NEXT: csrr a1, vlenb
-; CHECK-NEXT: li a3, 48
-; CHECK-NEXT: mul a1, a1, a3
-; CHECK-NEXT: sub sp, sp, a1
-; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x30, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 48 * vlenb
-; CHECK-NEXT: vmv1r.v v24, v0
-; CHECK-NEXT: csrr a1, vlenb
-; CHECK-NEXT: slli a1, a1, 5
-; CHECK-NEXT: add a1, sp, a1
-; CHECK-NEXT: addi a1, a1, 16
-; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill
-; CHECK-NEXT: csrr a1, vlenb
-; CHECK-NEXT: li a3, 40
-; CHECK-NEXT: mul a1, a1, a3
-; CHECK-NEXT: add a1, sp, a1
-; CHECK-NEXT: addi a1, a1, 16
-; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill
-; CHECK-NEXT: csrr a4, vlenb
-; CHECK-NEXT: slli t0, a4, 3
-; CHECK-NEXT: slli a1, a4, 5
-; CHECK-NEXT: sub t1, a1, t0
-; CHECK-NEXT: srli a1, a4, 2
-; CHECK-NEXT: vsetvli a3, zero, e8, mf2, ta, ma
-; CHECK-NEXT: vslidedown.vx v7, v0, a1
-; CHECK-NEXT: srli a3, a4, 3
-; CHECK-NEXT: add a5, a2, t0
-; CHECK-NEXT: vl8re64.v v8, (a5)
-; CHECK-NEXT: slli t3, a4, 4
-; CHECK-NEXT: slli a5, a4, 1
-; CHECK-NEXT: vsetvli a7, zero, e8, mf4, ta, ma
-; CHECK-NEXT: vslidedown.vx v0, v0, a3
-; CHECK-NEXT: mv a7, a6
-; CHECK-NEXT: bltu a6, a5, .LBB171_2
-; CHECK-NEXT: # %bb.1:
-; CHECK-NEXT: mv a7, a5
-; CHECK-NEXT: .LBB171_2:
-; CHECK-NEXT: add t2, a0, t0
-; CHECK-NEXT: add t1, a2, t1
-; CHECK-NEXT: add t0, a2, t3
-; CHECK-NEXT: vl8re64.v v16, (a2)
-; CHECK-NEXT: csrr a2, vlenb
-; CHECK-NEXT: slli a2, a2, 4
-; CHECK-NEXT: add a2, sp, a2
-; CHECK-NEXT: addi a2, a2, 16
-; CHECK-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill
-; CHECK-NEXT: sub a2, a7, a4
-; CHECK-NEXT: sltu t3, a7, a2
-; CHECK-NEXT: addi t3, t3, -1
-; CHECK-NEXT: and a2, t3, a2
-; CHECK-NEXT: csrr t3, vlenb
-; CHECK-NEXT: slli t3, t3, 5
-; CHECK-NEXT: add t3, sp, t3
-; CHECK-NEXT: addi t3, t3, 16
-; CHECK-NEXT: vl8r.v v16, (t3) # Unknown-size Folded Reload
-; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma
-; CHECK-NEXT: vmfeq.vv v6, v16, v8, v0.t
-; CHECK-NEXT: bltu a7, a4, .LBB171_4
-; CHECK-NEXT: # %bb.3:
-; CHECK-NEXT: mv a7, a4
-; CHECK-NEXT: .LBB171_4:
-; CHECK-NEXT: vl8re64.v v8, (t2)
-; CHECK-NEXT: csrr a2, vlenb
-; CHECK-NEXT: slli a2, a2, 5
-; CHECK-NEXT: add a2, sp, a2
-; CHECK-NEXT: addi a2, a2, 16
-; CHECK-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill
-; CHECK-NEXT: vl8re64.v v8, (t1)
-; CHECK-NEXT: csrr a2, vlenb
-; CHECK-NEXT: li t1, 24
-; CHECK-NEXT: mul a2, a2, t1
-; CHECK-NEXT: add a2, sp, a2
-; CHECK-NEXT: addi a2, a2, 16
-; CHECK-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill
-; CHECK-NEXT: vsetvli a2, zero, e8, mf4, ta, ma
-; CHECK-NEXT: vslidedown.vx v18, v7, a3
-; CHECK-NEXT: vl8re64.v v8, (t0)
-; CHECK-NEXT: csrr a2, vlenb
-; CHECK-NEXT: slli a2, a2, 3
-; CHECK-NEXT: add a2, sp, a2
-; CHECK-NEXT: addi a2, a2, 16
-; CHECK-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill
-; CHECK-NEXT: vl8re64.v v8, (a0)
-; CHECK-NEXT: addi a0, sp, 16
-; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
-; CHECK-NEXT: vmv1r.v v0, v24
-; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: li a2, 40
-; CHECK-NEXT: mul a0, a0, a2
-; CHECK-NEXT: add a0, sp, a0
-; CHECK-NEXT: addi a0, a0, 16
-; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload
-; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: slli a0, a0, 4
-; CHECK-NEXT: add a0, sp, a0
-; CHECK-NEXT: addi a0, a0, 16
-; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
-; CHECK-NEXT: vsetvli zero, a7, e64, m8, ta, ma
-; CHECK-NEXT: vmfeq.vv v17, v24, v8, v0.t
-; CHECK-NEXT: add a2, a3, a3
-; CHECK-NEXT: sub a0, a6, a5
-; CHECK-NEXT: sltu a5, a6, a0
-; CHECK-NEXT: addi a5, a5, -1
-; CHECK-NEXT: and a0, a5, a0
-; CHECK-NEXT: vsetvli zero, a2, e8, mf2, tu, ma
-; CHECK-NEXT: vslideup.vx v17, v6, a3
-; CHECK-NEXT: mv a2, a0
-; CHECK-NEXT: bltu a0, a4, .LBB171_6
-; CHECK-NEXT: # %bb.5:
-; CHECK-NEXT: mv a2, a4
-; CHECK-NEXT: .LBB171_6:
-; CHECK-NEXT: vmv1r.v v0, v7
-; CHECK-NEXT: csrr a5, vlenb
-; CHECK-NEXT: slli a5, a5, 3
-; CHECK-NEXT: add a5, sp, a5
-; CHECK-NEXT: addi a5, a5, 16
-; CHECK-NEXT: vl8r.v v8, (a5) # Unknown-size Folded Reload
-; CHECK-NEXT: addi a5, sp, 16
-; CHECK-NEXT: vl8r.v v24, (a5) # Unknown-size Folded Reload
-; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma
-; CHECK-NEXT: vmfeq.vv v16, v24, v8, v0.t
-; CHECK-NEXT: sub a2, a0, a4
-; CHECK-NEXT: sltu a0, a0, a2
-; CHECK-NEXT: addi a0, a0, -1
-; CHECK-NEXT: and a0, a0, a2
-; CHECK-NEXT: vmv1r.v v0, v18
-; CHECK-NEXT: csrr a2, vlenb
-; CHECK-NEXT: slli a2, a2, 5
-; CHECK-NEXT: add a2, sp, a2
-; CHECK-NEXT: addi a2, a2, 16
-; CHECK-NEXT: vl8r.v v24, (a2) # Unknown-size Folded Reload
-; CHECK-NEXT: csrr a2, vlenb
-; CHECK-NEXT: li a4, 24
-; CHECK-NEXT: mul a2, a2, a4
-; CHECK-NEXT: add a2, sp, a2
-; CHECK-NEXT: addi a2, a2, 16
-; CHECK-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload
-; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT: vmfeq.vv v18, v24, v8, v0.t
-; CHECK-NEXT: add a0, a1, a3
-; CHECK-NEXT: vsetvli zero, a0, e8, mf2, tu, ma
-; CHECK-NEXT: vslideup.vx v17, v16, a1
-; CHECK-NEXT: slli a0, a3, 1
-; CHECK-NEXT: add a0, a0, a3
-; CHECK-NEXT: add a3, a0, a3
-; CHECK-NEXT: vsetvli zero, a3, e8, mf2, ta, ma
-; CHECK-NEXT: vslideup.vx v17, v18, a0
-; CHECK-NEXT: vmv1r.v v0, v17
-; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: li a1, 48
-; CHECK-NEXT: mul a0, a0, a1
-; CHECK-NEXT: add sp, sp, a0
-; CHECK-NEXT: addi sp, sp, 16
-; CHECK-NEXT: ret
%v = call <vscale x 32 x i1> @llvm.vp.fcmp.nxv32f64(<vscale x 32 x double> %va, <vscale x 32 x double> %vb, metadata !"oeq", <vscale x 32 x i1> %m, i32 %evl)
ret <vscale x 32 x i1> %v
}
diff --git a/llvm/test/CodeGen/RISCV/rvv/setcc-fp.ll b/llvm/test/CodeGen/RISCV/rvv/setcc-fp.ll
index c2c977bec605..23d73481aed2 100644
--- a/llvm/test/CodeGen/RISCV/rvv/setcc-fp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/setcc-fp.ll
@@ -1,16 +1,1162 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+zfh,+zvfh,+v -target-abi=ilp32d \
-; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH,RV32
-; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+zfh,+zvfh,+v -target-abi=lp64d \
-; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH,RV64
-; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+zfh,+zvfhmin,+v -target-abi=ilp32d \
-; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN,ZVFHMIN32
-; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+zfh,+zvfhmin,+v -target-abi=lp64d \
-; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN,ZVFHMIN64
+; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+zfh,+zvfh,+zfbfmin,+zvfbfmin,+v \
+; RUN: -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s \
+; RUN: --check-prefixes=CHECK,ZVFH,RV32
+; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+zfh,+zvfh,+zfbfmin,+zvfbfmin,+v \
+; RUN: -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \
+; RUN: --check-prefixes=CHECK,ZVFH,RV64
+; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+zfh,+zvfhmin,+zfbfmin,+zvfbfmin,+v \
+; RUN: -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s \
+; RUN: --check-prefixes=CHECK,ZVFHMIN,ZVFHMIN32
+; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+zfh,+zvfhmin,+zfbfmin,+zvfbfmin,+v \
+; RUN: -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \
+; RUN: --check-prefixes=CHECK,ZVFHMIN,ZVFHMIN64
; FIXME: The scalar/vector operations ('fv' tests) should swap operands and
; condition codes accordingly in order to generate a 'vf' instruction.
+define <vscale x 8 x i1> @fcmp_oeq_vv_nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %vb) {
+; CHECK-LABEL: fcmp_oeq_vv_nxv8bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v10
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8
+; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; CHECK-NEXT: vmfeq.vv v0, v16, v12
+; CHECK-NEXT: ret
+ %vc = fcmp oeq <vscale x 8 x bfloat> %va, %vb
+ ret <vscale x 8 x i1> %vc
+}
+
+define <vscale x 8 x i1> @fcmp_oeq_vf_nxv8bf16(<vscale x 8 x bfloat> %va, bfloat %b) {
+; CHECK-LABEL: fcmp_oeq_vf_nxv8bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: fmv.x.h a0, fa0
+; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma
+; CHECK-NEXT: vmv.v.x v10, a0
+; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v10
+; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; CHECK-NEXT: vmfeq.vv v0, v12, v16
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0
+ %splat = shufflevector <vscale x 8 x bfloat> %head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer
+ %vc = fcmp oeq <vscale x 8 x bfloat> %va, %splat
+ ret <vscale x 8 x i1> %vc
+}
+
+define <vscale x 8 x i1> @fcmp_oeq_fv_nxv8bf16(<vscale x 8 x bfloat> %va, bfloat %b) {
+; CHECK-LABEL: fcmp_oeq_fv_nxv8bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: fmv.x.h a0, fa0
+; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma
+; CHECK-NEXT: vmv.v.x v10, a0
+; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v10
+; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; CHECK-NEXT: vmfeq.vv v0, v16, v12
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0
+ %splat = shufflevector <vscale x 8 x bfloat> %head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer
+ %vc = fcmp oeq <vscale x 8 x bfloat> %splat, %va
+ ret <vscale x 8 x i1> %vc
+}
+
+define <vscale x 8 x i1> @fcmp_oeq_vv_nxv8bf16_nonans(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %vb) #0 {
+; CHECK-LABEL: fcmp_oeq_vv_nxv8bf16_nonans:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v10
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8
+; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; CHECK-NEXT: vmfeq.vv v0, v16, v12
+; CHECK-NEXT: ret
+ %vc = fcmp oeq <vscale x 8 x bfloat> %va, %vb
+ ret <vscale x 8 x i1> %vc
+}
+
+define <vscale x 8 x i1> @fcmp_oeq_vf_nxv8bf16_nonans(<vscale x 8 x bfloat> %va, bfloat %b) #0 {
+; CHECK-LABEL: fcmp_oeq_vf_nxv8bf16_nonans:
+; CHECK: # %bb.0:
+; CHECK-NEXT: fmv.x.h a0, fa0
+; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma
+; CHECK-NEXT: vmv.v.x v10, a0
+; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v10
+; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; CHECK-NEXT: vmfeq.vv v0, v12, v16
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0
+ %splat = shufflevector <vscale x 8 x bfloat> %head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer
+ %vc = fcmp oeq <vscale x 8 x bfloat> %va, %splat
+ ret <vscale x 8 x i1> %vc
+}
+
+define <vscale x 8 x i1> @fcmp_ogt_vv_nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %vb) {
+; CHECK-LABEL: fcmp_ogt_vv_nxv8bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v10
+; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; CHECK-NEXT: vmflt.vv v0, v16, v12
+; CHECK-NEXT: ret
+ %vc = fcmp ogt <vscale x 8 x bfloat> %va, %vb
+ ret <vscale x 8 x i1> %vc
+}
+
+define <vscale x 8 x i1> @fcmp_ogt_vf_nxv8bf16(<vscale x 8 x bfloat> %va, bfloat %b) {
+; CHECK-LABEL: fcmp_ogt_vf_nxv8bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: fmv.x.h a0, fa0
+; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma
+; CHECK-NEXT: vmv.v.x v10, a0
+; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v10
+; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; CHECK-NEXT: vmflt.vv v0, v16, v12
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0
+ %splat = shufflevector <vscale x 8 x bfloat> %head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer
+ %vc = fcmp ogt <vscale x 8 x bfloat> %va, %splat
+ ret <vscale x 8 x i1> %vc
+}
+
+define <vscale x 8 x i1> @fcmp_ogt_fv_nxv8bf16(<vscale x 8 x bfloat> %va, bfloat %b) {
+; CHECK-LABEL: fcmp_ogt_fv_nxv8bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: fmv.x.h a0, fa0
+; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma
+; CHECK-NEXT: vmv.v.x v10, a0
+; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v10
+; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; CHECK-NEXT: vmflt.vv v0, v12, v16
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0
+ %splat = shufflevector <vscale x 8 x bfloat> %head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer
+ %vc = fcmp ogt <vscale x 8 x bfloat> %splat, %va
+ ret <vscale x 8 x i1> %vc
+}
+
+define <vscale x 8 x i1> @fcmp_ogt_vv_nxv8bf16_nonans(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %vb) #0 {
+; CHECK-LABEL: fcmp_ogt_vv_nxv8bf16_nonans:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v10
+; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; CHECK-NEXT: vmflt.vv v0, v16, v12
+; CHECK-NEXT: ret
+ %vc = fcmp ogt <vscale x 8 x bfloat> %va, %vb
+ ret <vscale x 8 x i1> %vc
+}
+
+define <vscale x 8 x i1> @fcmp_ogt_vf_nxv8bf16_nonans(<vscale x 8 x bfloat> %va, bfloat %b) #0 {
+; CHECK-LABEL: fcmp_ogt_vf_nxv8bf16_nonans:
+; CHECK: # %bb.0:
+; CHECK-NEXT: fmv.x.h a0, fa0
+; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma
+; CHECK-NEXT: vmv.v.x v10, a0
+; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v10
+; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; CHECK-NEXT: vmflt.vv v0, v16, v12
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0
+ %splat = shufflevector <vscale x 8 x bfloat> %head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer
+ %vc = fcmp ogt <vscale x 8 x bfloat> %va, %splat
+ ret <vscale x 8 x i1> %vc
+}
+
+define <vscale x 8 x i1> @fcmp_oge_vv_nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %vb) {
+; CHECK-LABEL: fcmp_oge_vv_nxv8bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v10
+; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; CHECK-NEXT: vmfle.vv v0, v16, v12
+; CHECK-NEXT: ret
+ %vc = fcmp oge <vscale x 8 x bfloat> %va, %vb
+ ret <vscale x 8 x i1> %vc
+}
+
+define <vscale x 8 x i1> @fcmp_oge_vf_nxv8bf16(<vscale x 8 x bfloat> %va, bfloat %b) {
+; CHECK-LABEL: fcmp_oge_vf_nxv8bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: fmv.x.h a0, fa0
+; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma
+; CHECK-NEXT: vmv.v.x v10, a0
+; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v10
+; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; CHECK-NEXT: vmfle.vv v0, v16, v12
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0
+ %splat = shufflevector <vscale x 8 x bfloat> %head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer
+ %vc = fcmp oge <vscale x 8 x bfloat> %va, %splat
+ ret <vscale x 8 x i1> %vc
+}
+
+define <vscale x 8 x i1> @fcmp_oge_fv_nxv8bf16(<vscale x 8 x bfloat> %va, bfloat %b) {
+; CHECK-LABEL: fcmp_oge_fv_nxv8bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: fmv.x.h a0, fa0
+; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma
+; CHECK-NEXT: vmv.v.x v10, a0
+; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v10
+; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; CHECK-NEXT: vmfle.vv v0, v12, v16
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0
+ %splat = shufflevector <vscale x 8 x bfloat> %head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer
+ %vc = fcmp oge <vscale x 8 x bfloat> %splat, %va
+ ret <vscale x 8 x i1> %vc
+}
+
+define <vscale x 8 x i1> @fcmp_oge_vv_nxv8bf16_nonans(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %vb) #0 {
+; CHECK-LABEL: fcmp_oge_vv_nxv8bf16_nonans:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v10
+; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; CHECK-NEXT: vmfle.vv v0, v16, v12
+; CHECK-NEXT: ret
+ %vc = fcmp oge <vscale x 8 x bfloat> %va, %vb
+ ret <vscale x 8 x i1> %vc
+}
+
+define <vscale x 8 x i1> @fcmp_oge_vf_nxv8bf16_nonans(<vscale x 8 x bfloat> %va, bfloat %b) #0 {
+; CHECK-LABEL: fcmp_oge_vf_nxv8bf16_nonans:
+; CHECK: # %bb.0:
+; CHECK-NEXT: fmv.x.h a0, fa0
+; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma
+; CHECK-NEXT: vmv.v.x v10, a0
+; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v10
+; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; CHECK-NEXT: vmfle.vv v0, v16, v12
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0
+ %splat = shufflevector <vscale x 8 x bfloat> %head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer
+ %vc = fcmp oge <vscale x 8 x bfloat> %va, %splat
+ ret <vscale x 8 x i1> %vc
+}
+
+define <vscale x 8 x i1> @fcmp_olt_vv_nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %vb) {
+; CHECK-LABEL: fcmp_olt_vv_nxv8bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v10
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8
+; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; CHECK-NEXT: vmflt.vv v0, v16, v12
+; CHECK-NEXT: ret
+ %vc = fcmp olt <vscale x 8 x bfloat> %va, %vb
+ ret <vscale x 8 x i1> %vc
+}
+
+define <vscale x 8 x i1> @fcmp_olt_vf_nxv8bf16(<vscale x 8 x bfloat> %va, bfloat %b) {
+; CHECK-LABEL: fcmp_olt_vf_nxv8bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: fmv.x.h a0, fa0
+; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma
+; CHECK-NEXT: vmv.v.x v10, a0
+; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v10
+; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; CHECK-NEXT: vmflt.vv v0, v12, v16
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0
+ %splat = shufflevector <vscale x 8 x bfloat> %head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer
+ %vc = fcmp olt <vscale x 8 x bfloat> %va, %splat
+ ret <vscale x 8 x i1> %vc
+}
+
+define <vscale x 8 x i1> @fcmp_olt_fv_nxv8bf16(<vscale x 8 x bfloat> %va, bfloat %b) {
+; CHECK-LABEL: fcmp_olt_fv_nxv8bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: fmv.x.h a0, fa0
+; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma
+; CHECK-NEXT: vmv.v.x v10, a0
+; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v10
+; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; CHECK-NEXT: vmflt.vv v0, v16, v12
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0
+ %splat = shufflevector <vscale x 8 x bfloat> %head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer
+ %vc = fcmp olt <vscale x 8 x bfloat> %splat, %va
+ ret <vscale x 8 x i1> %vc
+}
+
+define <vscale x 8 x i1> @fcmp_olt_vv_nxv8bf16_nonans(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %vb) #0 {
+; CHECK-LABEL: fcmp_olt_vv_nxv8bf16_nonans:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v10
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8
+; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; CHECK-NEXT: vmflt.vv v0, v16, v12
+; CHECK-NEXT: ret
+ %vc = fcmp olt <vscale x 8 x bfloat> %va, %vb
+ ret <vscale x 8 x i1> %vc
+}
+
+define <vscale x 8 x i1> @fcmp_olt_vf_nxv8bf16_nonans(<vscale x 8 x bfloat> %va, bfloat %b) #0 {
+; CHECK-LABEL: fcmp_olt_vf_nxv8bf16_nonans:
+; CHECK: # %bb.0:
+; CHECK-NEXT: fmv.x.h a0, fa0
+; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma
+; CHECK-NEXT: vmv.v.x v10, a0
+; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v10
+; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; CHECK-NEXT: vmflt.vv v0, v12, v16
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0
+ %splat = shufflevector <vscale x 8 x bfloat> %head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer
+ %vc = fcmp olt <vscale x 8 x bfloat> %va, %splat
+ ret <vscale x 8 x i1> %vc
+}
+
+define <vscale x 8 x i1> @fcmp_ole_vv_nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %vb) {
+; CHECK-LABEL: fcmp_ole_vv_nxv8bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v10
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8
+; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; CHECK-NEXT: vmfle.vv v0, v16, v12
+; CHECK-NEXT: ret
+ %vc = fcmp ole <vscale x 8 x bfloat> %va, %vb
+ ret <vscale x 8 x i1> %vc
+}
+
+define <vscale x 8 x i1> @fcmp_ole_vf_nxv8bf16(<vscale x 8 x bfloat> %va, bfloat %b) {
+; CHECK-LABEL: fcmp_ole_vf_nxv8bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: fmv.x.h a0, fa0
+; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma
+; CHECK-NEXT: vmv.v.x v10, a0
+; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v10
+; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; CHECK-NEXT: vmfle.vv v0, v12, v16
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0
+ %splat = shufflevector <vscale x 8 x bfloat> %head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer
+ %vc = fcmp ole <vscale x 8 x bfloat> %va, %splat
+ ret <vscale x 8 x i1> %vc
+}
+
+define <vscale x 8 x i1> @fcmp_ole_fv_nxv8bf16(<vscale x 8 x bfloat> %va, bfloat %b) {
+; CHECK-LABEL: fcmp_ole_fv_nxv8bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: fmv.x.h a0, fa0
+; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma
+; CHECK-NEXT: vmv.v.x v10, a0
+; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v10
+; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; CHECK-NEXT: vmfle.vv v0, v16, v12
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0
+ %splat = shufflevector <vscale x 8 x bfloat> %head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer
+ %vc = fcmp ole <vscale x 8 x bfloat> %splat, %va
+ ret <vscale x 8 x i1> %vc
+}
+
+define <vscale x 8 x i1> @fcmp_ole_vv_nxv8bf16_nonans(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %vb) #0 {
+; CHECK-LABEL: fcmp_ole_vv_nxv8bf16_nonans:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v10
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8
+; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; CHECK-NEXT: vmfle.vv v0, v16, v12
+; CHECK-NEXT: ret
+ %vc = fcmp ole <vscale x 8 x bfloat> %va, %vb
+ ret <vscale x 8 x i1> %vc
+}
+
+define <vscale x 8 x i1> @fcmp_ole_vf_nxv8bf16_nonans(<vscale x 8 x bfloat> %va, bfloat %b) #0 {
+; CHECK-LABEL: fcmp_ole_vf_nxv8bf16_nonans:
+; CHECK: # %bb.0:
+; CHECK-NEXT: fmv.x.h a0, fa0
+; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma
+; CHECK-NEXT: vmv.v.x v10, a0
+; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v10
+; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; CHECK-NEXT: vmfle.vv v0, v12, v16
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0
+ %splat = shufflevector <vscale x 8 x bfloat> %head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer
+ %vc = fcmp ole <vscale x 8 x bfloat> %va, %splat
+ ret <vscale x 8 x i1> %vc
+}
+
+define <vscale x 8 x i1> @fcmp_one_vv_nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %vb) {
+; CHECK-LABEL: fcmp_one_vv_nxv8bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v10
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8
+; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; CHECK-NEXT: vmflt.vv v8, v16, v12
+; CHECK-NEXT: vmflt.vv v9, v12, v16
+; CHECK-NEXT: vmor.mm v0, v9, v8
+; CHECK-NEXT: ret
+ %vc = fcmp one <vscale x 8 x bfloat> %va, %vb
+ ret <vscale x 8 x i1> %vc
+}
+
+define <vscale x 8 x i1> @fcmp_one_vf_nxv8bf16(<vscale x 8 x bfloat> %va, bfloat %b) {
+; CHECK-LABEL: fcmp_one_vf_nxv8bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: fmv.x.h a0, fa0
+; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma
+; CHECK-NEXT: vmv.v.x v10, a0
+; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v10
+; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; CHECK-NEXT: vmflt.vv v8, v12, v16
+; CHECK-NEXT: vmflt.vv v9, v16, v12
+; CHECK-NEXT: vmor.mm v0, v9, v8
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0
+ %splat = shufflevector <vscale x 8 x bfloat> %head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer
+ %vc = fcmp one <vscale x 8 x bfloat> %va, %splat
+ ret <vscale x 8 x i1> %vc
+}
+
+define <vscale x 8 x i1> @fcmp_one_fv_nxv8bf16(<vscale x 8 x bfloat> %va, bfloat %b) {
+; CHECK-LABEL: fcmp_one_fv_nxv8bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: fmv.x.h a0, fa0
+; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma
+; CHECK-NEXT: vmv.v.x v10, a0
+; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v10
+; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; CHECK-NEXT: vmflt.vv v8, v16, v12
+; CHECK-NEXT: vmflt.vv v9, v12, v16
+; CHECK-NEXT: vmor.mm v0, v9, v8
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0
+ %splat = shufflevector <vscale x 8 x bfloat> %head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer
+ %vc = fcmp one <vscale x 8 x bfloat> %splat, %va
+ ret <vscale x 8 x i1> %vc
+}
+
+define <vscale x 8 x i1> @fcmp_one_vv_nxv8bf16_nonans(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %vb) #0 {
+; CHECK-LABEL: fcmp_one_vv_nxv8bf16_nonans:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v10
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8
+; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; CHECK-NEXT: vmfne.vv v0, v16, v12
+; CHECK-NEXT: ret
+ %vc = fcmp one <vscale x 8 x bfloat> %va, %vb
+ ret <vscale x 8 x i1> %vc
+}
+
+define <vscale x 8 x i1> @fcmp_one_vf_nxv8bf16_nonans(<vscale x 8 x bfloat> %va, bfloat %b) #0 {
+; CHECK-LABEL: fcmp_one_vf_nxv8bf16_nonans:
+; CHECK: # %bb.0:
+; CHECK-NEXT: fmv.x.h a0, fa0
+; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma
+; CHECK-NEXT: vmv.v.x v10, a0
+; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v10
+; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; CHECK-NEXT: vmfne.vv v0, v12, v16
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0
+ %splat = shufflevector <vscale x 8 x bfloat> %head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer
+ %vc = fcmp one <vscale x 8 x bfloat> %va, %splat
+ ret <vscale x 8 x i1> %vc
+}
+
+define <vscale x 8 x i1> @fcmp_ord_vv_nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %vb) {
+; CHECK-LABEL: fcmp_ord_vv_nxv8bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v10
+; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; CHECK-NEXT: vmfeq.vv v10, v12, v12
+; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8
+; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; CHECK-NEXT: vmfeq.vv v8, v12, v12
+; CHECK-NEXT: vmand.mm v0, v8, v10
+; CHECK-NEXT: ret
+ %vc = fcmp ord <vscale x 8 x bfloat> %va, %vb
+ ret <vscale x 8 x i1> %vc
+}
+
+define <vscale x 8 x i1> @fcmp_ord_vf_nxv8bf16(<vscale x 8 x bfloat> %va, bfloat %b) {
+; CHECK-LABEL: fcmp_ord_vf_nxv8bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: fmv.x.h a0, fa0
+; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma
+; CHECK-NEXT: vmv.v.x v10, a0
+; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8
+; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; CHECK-NEXT: vmfeq.vv v8, v12, v12
+; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v10
+; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; CHECK-NEXT: vmfeq.vv v9, v12, v12
+; CHECK-NEXT: vmand.mm v0, v8, v9
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0
+ %splat = shufflevector <vscale x 8 x bfloat> %head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer
+ %vc = fcmp ord <vscale x 8 x bfloat> %va, %splat
+ ret <vscale x 8 x i1> %vc
+}
+
+define <vscale x 8 x i1> @fcmp_ord_fv_nxv8bf16(<vscale x 8 x bfloat> %va, bfloat %b) {
+; CHECK-LABEL: fcmp_ord_fv_nxv8bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: fmv.x.h a0, fa0
+; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma
+; CHECK-NEXT: vmv.v.x v10, a0
+; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8
+; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; CHECK-NEXT: vmfeq.vv v8, v12, v12
+; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v10
+; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; CHECK-NEXT: vmfeq.vv v9, v12, v12
+; CHECK-NEXT: vmand.mm v0, v9, v8
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0
+ %splat = shufflevector <vscale x 8 x bfloat> %head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer
+ %vc = fcmp ord <vscale x 8 x bfloat> %splat, %va
+ ret <vscale x 8 x i1> %vc
+}
+
+define <vscale x 8 x i1> @fcmp_ord_vv_nxv8bf16_nonans(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %vb) #0 {
+; CHECK-LABEL: fcmp_ord_vv_nxv8bf16_nonans:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v10
+; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; CHECK-NEXT: vmfeq.vv v10, v12, v12
+; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8
+; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; CHECK-NEXT: vmfeq.vv v8, v12, v12
+; CHECK-NEXT: vmand.mm v0, v8, v10
+; CHECK-NEXT: ret
+ %vc = fcmp ord <vscale x 8 x bfloat> %va, %vb
+ ret <vscale x 8 x i1> %vc
+}
+
+define <vscale x 8 x i1> @fcmp_ord_vf_nxv8bf16_nonans(<vscale x 8 x bfloat> %va, bfloat %b) #0 {
+; CHECK-LABEL: fcmp_ord_vf_nxv8bf16_nonans:
+; CHECK: # %bb.0:
+; CHECK-NEXT: fmv.x.h a0, fa0
+; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma
+; CHECK-NEXT: vmv.v.x v10, a0
+; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8
+; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; CHECK-NEXT: vmfeq.vv v8, v12, v12
+; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v10
+; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; CHECK-NEXT: vmfeq.vv v9, v12, v12
+; CHECK-NEXT: vmand.mm v0, v8, v9
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0
+ %splat = shufflevector <vscale x 8 x bfloat> %head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer
+ %vc = fcmp ord <vscale x 8 x bfloat> %va, %splat
+ ret <vscale x 8 x i1> %vc
+}
+
+define <vscale x 8 x i1> @fcmp_ueq_vv_nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %vb) {
+; CHECK-LABEL: fcmp_ueq_vv_nxv8bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v10
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8
+; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; CHECK-NEXT: vmflt.vv v8, v16, v12
+; CHECK-NEXT: vmflt.vv v9, v12, v16
+; CHECK-NEXT: vmnor.mm v0, v9, v8
+; CHECK-NEXT: ret
+ %vc = fcmp ueq <vscale x 8 x bfloat> %va, %vb
+ ret <vscale x 8 x i1> %vc
+}
+
+define <vscale x 8 x i1> @fcmp_ueq_vf_nxv8bf16(<vscale x 8 x bfloat> %va, bfloat %b) {
+; CHECK-LABEL: fcmp_ueq_vf_nxv8bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: fmv.x.h a0, fa0
+; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma
+; CHECK-NEXT: vmv.v.x v10, a0
+; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v10
+; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; CHECK-NEXT: vmflt.vv v8, v12, v16
+; CHECK-NEXT: vmflt.vv v9, v16, v12
+; CHECK-NEXT: vmnor.mm v0, v9, v8
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0
+ %splat = shufflevector <vscale x 8 x bfloat> %head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer
+ %vc = fcmp ueq <vscale x 8 x bfloat> %va, %splat
+ ret <vscale x 8 x i1> %vc
+}
+
+define <vscale x 8 x i1> @fcmp_ueq_fv_nxv8bf16(<vscale x 8 x bfloat> %va, bfloat %b) {
+; CHECK-LABEL: fcmp_ueq_fv_nxv8bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: fmv.x.h a0, fa0
+; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma
+; CHECK-NEXT: vmv.v.x v10, a0
+; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v10
+; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; CHECK-NEXT: vmflt.vv v8, v16, v12
+; CHECK-NEXT: vmflt.vv v9, v12, v16
+; CHECK-NEXT: vmnor.mm v0, v9, v8
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0
+ %splat = shufflevector <vscale x 8 x bfloat> %head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer
+ %vc = fcmp ueq <vscale x 8 x bfloat> %splat, %va
+ ret <vscale x 8 x i1> %vc
+}
+
+define <vscale x 8 x i1> @fcmp_ueq_vv_nxv8bf16_nonans(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %vb) #0 {
+; CHECK-LABEL: fcmp_ueq_vv_nxv8bf16_nonans:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v10
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8
+; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; CHECK-NEXT: vmfeq.vv v0, v16, v12
+; CHECK-NEXT: ret
+ %vc = fcmp ueq <vscale x 8 x bfloat> %va, %vb
+ ret <vscale x 8 x i1> %vc
+}
+
+define <vscale x 8 x i1> @fcmp_ueq_vf_nxv8bf16_nonans(<vscale x 8 x bfloat> %va, bfloat %b) #0 {
+; CHECK-LABEL: fcmp_ueq_vf_nxv8bf16_nonans:
+; CHECK: # %bb.0:
+; CHECK-NEXT: fmv.x.h a0, fa0
+; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma
+; CHECK-NEXT: vmv.v.x v10, a0
+; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v10
+; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; CHECK-NEXT: vmfeq.vv v0, v12, v16
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0
+ %splat = shufflevector <vscale x 8 x bfloat> %head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer
+ %vc = fcmp ueq <vscale x 8 x bfloat> %va, %splat
+ ret <vscale x 8 x i1> %vc
+}
+
+define <vscale x 8 x i1> @fcmp_ugt_vv_nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %vb) {
+; CHECK-LABEL: fcmp_ugt_vv_nxv8bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v10
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8
+; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; CHECK-NEXT: vmfle.vv v8, v16, v12
+; CHECK-NEXT: vmnot.m v0, v8
+; CHECK-NEXT: ret
+ %vc = fcmp ugt <vscale x 8 x bfloat> %va, %vb
+ ret <vscale x 8 x i1> %vc
+}
+
+define <vscale x 8 x i1> @fcmp_ugt_vf_nxv8bf16(<vscale x 8 x bfloat> %va, bfloat %b) {
+; CHECK-LABEL: fcmp_ugt_vf_nxv8bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: fmv.x.h a0, fa0
+; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma
+; CHECK-NEXT: vmv.v.x v10, a0
+; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v10
+; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; CHECK-NEXT: vmfle.vv v8, v12, v16
+; CHECK-NEXT: vmnot.m v0, v8
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0
+ %splat = shufflevector <vscale x 8 x bfloat> %head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer
+ %vc = fcmp ugt <vscale x 8 x bfloat> %va, %splat
+ ret <vscale x 8 x i1> %vc
+}
+
+define <vscale x 8 x i1> @fcmp_ugt_fv_nxv8bf16(<vscale x 8 x bfloat> %va, bfloat %b) {
+; CHECK-LABEL: fcmp_ugt_fv_nxv8bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: fmv.x.h a0, fa0
+; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma
+; CHECK-NEXT: vmv.v.x v10, a0
+; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v10
+; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; CHECK-NEXT: vmfle.vv v8, v16, v12
+; CHECK-NEXT: vmnot.m v0, v8
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0
+ %splat = shufflevector <vscale x 8 x bfloat> %head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer
+ %vc = fcmp ugt <vscale x 8 x bfloat> %splat, %va
+ ret <vscale x 8 x i1> %vc
+}
+
+define <vscale x 8 x i1> @fcmp_ugt_vv_nxv8bf16_nonans(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %vb) #0 {
+; CHECK-LABEL: fcmp_ugt_vv_nxv8bf16_nonans:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v10
+; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; CHECK-NEXT: vmflt.vv v0, v16, v12
+; CHECK-NEXT: ret
+ %vc = fcmp ugt <vscale x 8 x bfloat> %va, %vb
+ ret <vscale x 8 x i1> %vc
+}
+
+define <vscale x 8 x i1> @fcmp_ugt_vf_nxv8bf16_nonans(<vscale x 8 x bfloat> %va, bfloat %b) #0 {
+; CHECK-LABEL: fcmp_ugt_vf_nxv8bf16_nonans:
+; CHECK: # %bb.0:
+; CHECK-NEXT: fmv.x.h a0, fa0
+; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma
+; CHECK-NEXT: vmv.v.x v10, a0
+; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v10
+; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; CHECK-NEXT: vmflt.vv v0, v16, v12
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0
+ %splat = shufflevector <vscale x 8 x bfloat> %head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer
+ %vc = fcmp ugt <vscale x 8 x bfloat> %va, %splat
+ ret <vscale x 8 x i1> %vc
+}
+
+define <vscale x 8 x i1> @fcmp_uge_vv_nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %vb) {
+; CHECK-LABEL: fcmp_uge_vv_nxv8bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v10
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8
+; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; CHECK-NEXT: vmflt.vv v8, v16, v12
+; CHECK-NEXT: vmnot.m v0, v8
+; CHECK-NEXT: ret
+ %vc = fcmp uge <vscale x 8 x bfloat> %va, %vb
+ ret <vscale x 8 x i1> %vc
+}
+
+define <vscale x 8 x i1> @fcmp_uge_vf_nxv8bf16(<vscale x 8 x bfloat> %va, bfloat %b) {
+; CHECK-LABEL: fcmp_uge_vf_nxv8bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: fmv.x.h a0, fa0
+; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma
+; CHECK-NEXT: vmv.v.x v10, a0
+; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v10
+; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; CHECK-NEXT: vmflt.vv v8, v12, v16
+; CHECK-NEXT: vmnot.m v0, v8
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0
+ %splat = shufflevector <vscale x 8 x bfloat> %head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer
+ %vc = fcmp uge <vscale x 8 x bfloat> %va, %splat
+ ret <vscale x 8 x i1> %vc
+}
+
+define <vscale x 8 x i1> @fcmp_uge_fv_nxv8bf16(<vscale x 8 x bfloat> %va, bfloat %b) {
+; CHECK-LABEL: fcmp_uge_fv_nxv8bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: fmv.x.h a0, fa0
+; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma
+; CHECK-NEXT: vmv.v.x v10, a0
+; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v10
+; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; CHECK-NEXT: vmflt.vv v8, v16, v12
+; CHECK-NEXT: vmnot.m v0, v8
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0
+ %splat = shufflevector <vscale x 8 x bfloat> %head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer
+ %vc = fcmp uge <vscale x 8 x bfloat> %splat, %va
+ ret <vscale x 8 x i1> %vc
+}
+
+define <vscale x 8 x i1> @fcmp_uge_vv_nxv8bf16_nonans(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %vb) #0 {
+; CHECK-LABEL: fcmp_uge_vv_nxv8bf16_nonans:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v10
+; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; CHECK-NEXT: vmfle.vv v0, v16, v12
+; CHECK-NEXT: ret
+ %vc = fcmp uge <vscale x 8 x bfloat> %va, %vb
+ ret <vscale x 8 x i1> %vc
+}
+
+define <vscale x 8 x i1> @fcmp_uge_vf_nxv8bf16_nonans(<vscale x 8 x bfloat> %va, bfloat %b) #0 {
+; CHECK-LABEL: fcmp_uge_vf_nxv8bf16_nonans:
+; CHECK: # %bb.0:
+; CHECK-NEXT: fmv.x.h a0, fa0
+; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma
+; CHECK-NEXT: vmv.v.x v10, a0
+; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v10
+; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; CHECK-NEXT: vmfle.vv v0, v16, v12
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0
+ %splat = shufflevector <vscale x 8 x bfloat> %head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer
+ %vc = fcmp uge <vscale x 8 x bfloat> %va, %splat
+ ret <vscale x 8 x i1> %vc
+}
+
+define <vscale x 8 x i1> @fcmp_ult_vv_nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %vb) {
+; CHECK-LABEL: fcmp_ult_vv_nxv8bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v10
+; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; CHECK-NEXT: vmfle.vv v8, v16, v12
+; CHECK-NEXT: vmnot.m v0, v8
+; CHECK-NEXT: ret
+ %vc = fcmp ult <vscale x 8 x bfloat> %va, %vb
+ ret <vscale x 8 x i1> %vc
+}
+
+define <vscale x 8 x i1> @fcmp_ult_vf_nxv8bf16(<vscale x 8 x bfloat> %va, bfloat %b) {
+; CHECK-LABEL: fcmp_ult_vf_nxv8bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: fmv.x.h a0, fa0
+; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma
+; CHECK-NEXT: vmv.v.x v10, a0
+; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v10
+; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; CHECK-NEXT: vmfle.vv v8, v16, v12
+; CHECK-NEXT: vmnot.m v0, v8
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0
+ %splat = shufflevector <vscale x 8 x bfloat> %head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer
+ %vc = fcmp ult <vscale x 8 x bfloat> %va, %splat
+ ret <vscale x 8 x i1> %vc
+}
+
+define <vscale x 8 x i1> @fcmp_ult_fv_nxv8bf16(<vscale x 8 x bfloat> %va, bfloat %b) {
+; CHECK-LABEL: fcmp_ult_fv_nxv8bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: fmv.x.h a0, fa0
+; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma
+; CHECK-NEXT: vmv.v.x v10, a0
+; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v10
+; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; CHECK-NEXT: vmfle.vv v8, v12, v16
+; CHECK-NEXT: vmnot.m v0, v8
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0
+ %splat = shufflevector <vscale x 8 x bfloat> %head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer
+ %vc = fcmp ult <vscale x 8 x bfloat> %splat, %va
+ ret <vscale x 8 x i1> %vc
+}
+
+define <vscale x 8 x i1> @fcmp_ult_vv_nxv8bf16_nonans(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %vb) #0 {
+; CHECK-LABEL: fcmp_ult_vv_nxv8bf16_nonans:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v10
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8
+; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; CHECK-NEXT: vmflt.vv v0, v16, v12
+; CHECK-NEXT: ret
+ %vc = fcmp ult <vscale x 8 x bfloat> %va, %vb
+ ret <vscale x 8 x i1> %vc
+}
+
+define <vscale x 8 x i1> @fcmp_ult_vf_nxv8bf16_nonans(<vscale x 8 x bfloat> %va, bfloat %b) #0 {
+; CHECK-LABEL: fcmp_ult_vf_nxv8bf16_nonans:
+; CHECK: # %bb.0:
+; CHECK-NEXT: fmv.x.h a0, fa0
+; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma
+; CHECK-NEXT: vmv.v.x v10, a0
+; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v10
+; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; CHECK-NEXT: vmflt.vv v0, v12, v16
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0
+ %splat = shufflevector <vscale x 8 x bfloat> %head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer
+ %vc = fcmp ult <vscale x 8 x bfloat> %va, %splat
+ ret <vscale x 8 x i1> %vc
+}
+
+define <vscale x 8 x i1> @fcmp_ule_vv_nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %vb) {
+; CHECK-LABEL: fcmp_ule_vv_nxv8bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v10
+; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; CHECK-NEXT: vmflt.vv v8, v16, v12
+; CHECK-NEXT: vmnot.m v0, v8
+; CHECK-NEXT: ret
+ %vc = fcmp ule <vscale x 8 x bfloat> %va, %vb
+ ret <vscale x 8 x i1> %vc
+}
+
+define <vscale x 8 x i1> @fcmp_ule_vf_nxv8bf16(<vscale x 8 x bfloat> %va, bfloat %b) {
+; CHECK-LABEL: fcmp_ule_vf_nxv8bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: fmv.x.h a0, fa0
+; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma
+; CHECK-NEXT: vmv.v.x v10, a0
+; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v10
+; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; CHECK-NEXT: vmflt.vv v8, v16, v12
+; CHECK-NEXT: vmnot.m v0, v8
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0
+ %splat = shufflevector <vscale x 8 x bfloat> %head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer
+ %vc = fcmp ule <vscale x 8 x bfloat> %va, %splat
+ ret <vscale x 8 x i1> %vc
+}
+
+define <vscale x 8 x i1> @fcmp_ule_fv_nxv8bf16(<vscale x 8 x bfloat> %va, bfloat %b) {
+; CHECK-LABEL: fcmp_ule_fv_nxv8bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: fmv.x.h a0, fa0
+; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma
+; CHECK-NEXT: vmv.v.x v10, a0
+; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v10
+; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; CHECK-NEXT: vmflt.vv v8, v12, v16
+; CHECK-NEXT: vmnot.m v0, v8
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0
+ %splat = shufflevector <vscale x 8 x bfloat> %head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer
+ %vc = fcmp ule <vscale x 8 x bfloat> %splat, %va
+ ret <vscale x 8 x i1> %vc
+}
+
+define <vscale x 8 x i1> @fcmp_ule_vv_nxv8bf16_nonans(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %vb) #0 {
+; CHECK-LABEL: fcmp_ule_vv_nxv8bf16_nonans:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v10
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8
+; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; CHECK-NEXT: vmfle.vv v0, v16, v12
+; CHECK-NEXT: ret
+ %vc = fcmp ule <vscale x 8 x bfloat> %va, %vb
+ ret <vscale x 8 x i1> %vc
+}
+
+define <vscale x 8 x i1> @fcmp_ule_vf_nxv8bf16_nonans(<vscale x 8 x bfloat> %va, bfloat %b) #0 {
+; CHECK-LABEL: fcmp_ule_vf_nxv8bf16_nonans:
+; CHECK: # %bb.0:
+; CHECK-NEXT: fmv.x.h a0, fa0
+; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma
+; CHECK-NEXT: vmv.v.x v10, a0
+; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v10
+; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; CHECK-NEXT: vmfle.vv v0, v12, v16
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0
+ %splat = shufflevector <vscale x 8 x bfloat> %head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer
+ %vc = fcmp ule <vscale x 8 x bfloat> %va, %splat
+ ret <vscale x 8 x i1> %vc
+}
+
+define <vscale x 8 x i1> @fcmp_une_vv_nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %vb) {
+; CHECK-LABEL: fcmp_une_vv_nxv8bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v10
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8
+; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; CHECK-NEXT: vmfne.vv v0, v16, v12
+; CHECK-NEXT: ret
+ %vc = fcmp une <vscale x 8 x bfloat> %va, %vb
+ ret <vscale x 8 x i1> %vc
+}
+
+define <vscale x 8 x i1> @fcmp_une_vf_nxv8bf16(<vscale x 8 x bfloat> %va, bfloat %b) {
+; CHECK-LABEL: fcmp_une_vf_nxv8bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: fmv.x.h a0, fa0
+; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma
+; CHECK-NEXT: vmv.v.x v10, a0
+; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v10
+; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; CHECK-NEXT: vmfne.vv v0, v12, v16
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0
+ %splat = shufflevector <vscale x 8 x bfloat> %head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer
+ %vc = fcmp une <vscale x 8 x bfloat> %va, %splat
+ ret <vscale x 8 x i1> %vc
+}
+
+define <vscale x 8 x i1> @fcmp_une_fv_nxv8bf16(<vscale x 8 x bfloat> %va, bfloat %b) {
+; CHECK-LABEL: fcmp_une_fv_nxv8bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: fmv.x.h a0, fa0
+; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma
+; CHECK-NEXT: vmv.v.x v10, a0
+; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v10
+; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; CHECK-NEXT: vmfne.vv v0, v16, v12
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0
+ %splat = shufflevector <vscale x 8 x bfloat> %head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer
+ %vc = fcmp une <vscale x 8 x bfloat> %splat, %va
+ ret <vscale x 8 x i1> %vc
+}
+
+define <vscale x 8 x i1> @fcmp_une_vv_nxv8bf16_nonans(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %vb) #0 {
+; CHECK-LABEL: fcmp_une_vv_nxv8bf16_nonans:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v10
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8
+; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; CHECK-NEXT: vmfne.vv v0, v16, v12
+; CHECK-NEXT: ret
+ %vc = fcmp une <vscale x 8 x bfloat> %va, %vb
+ ret <vscale x 8 x i1> %vc
+}
+
+define <vscale x 8 x i1> @fcmp_une_vf_nxv8bf16_nonans(<vscale x 8 x bfloat> %va, bfloat %b) #0 {
+; CHECK-LABEL: fcmp_une_vf_nxv8bf16_nonans:
+; CHECK: # %bb.0:
+; CHECK-NEXT: fmv.x.h a0, fa0
+; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma
+; CHECK-NEXT: vmv.v.x v10, a0
+; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v10
+; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; CHECK-NEXT: vmfne.vv v0, v12, v16
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0
+ %splat = shufflevector <vscale x 8 x bfloat> %head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer
+ %vc = fcmp une <vscale x 8 x bfloat> %va, %splat
+ ret <vscale x 8 x i1> %vc
+}
+
+define <vscale x 8 x i1> @fcmp_uno_vv_nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %vb) {
+; CHECK-LABEL: fcmp_uno_vv_nxv8bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v10
+; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; CHECK-NEXT: vmfne.vv v10, v12, v12
+; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8
+; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; CHECK-NEXT: vmfne.vv v8, v12, v12
+; CHECK-NEXT: vmor.mm v0, v8, v10
+; CHECK-NEXT: ret
+ %vc = fcmp uno <vscale x 8 x bfloat> %va, %vb
+ ret <vscale x 8 x i1> %vc
+}
+
+define <vscale x 8 x i1> @fcmp_uno_vf_nxv8bf16(<vscale x 8 x bfloat> %va, bfloat %b) {
+; CHECK-LABEL: fcmp_uno_vf_nxv8bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: fmv.x.h a0, fa0
+; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma
+; CHECK-NEXT: vmv.v.x v10, a0
+; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8
+; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; CHECK-NEXT: vmfne.vv v8, v12, v12
+; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v10
+; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; CHECK-NEXT: vmfne.vv v9, v12, v12
+; CHECK-NEXT: vmor.mm v0, v8, v9
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0
+ %splat = shufflevector <vscale x 8 x bfloat> %head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer
+ %vc = fcmp uno <vscale x 8 x bfloat> %va, %splat
+ ret <vscale x 8 x i1> %vc
+}
+
+define <vscale x 8 x i1> @fcmp_uno_fv_nxv8bf16(<vscale x 8 x bfloat> %va, bfloat %b) {
+; CHECK-LABEL: fcmp_uno_fv_nxv8bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: fmv.x.h a0, fa0
+; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma
+; CHECK-NEXT: vmv.v.x v10, a0
+; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8
+; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; CHECK-NEXT: vmfne.vv v8, v12, v12
+; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v10
+; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; CHECK-NEXT: vmfne.vv v9, v12, v12
+; CHECK-NEXT: vmor.mm v0, v9, v8
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0
+ %splat = shufflevector <vscale x 8 x bfloat> %head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer
+ %vc = fcmp uno <vscale x 8 x bfloat> %splat, %va
+ ret <vscale x 8 x i1> %vc
+}
+
+define <vscale x 8 x i1> @fcmp_uno_vv_nxv8bf16_nonans(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %vb) #0 {
+; CHECK-LABEL: fcmp_uno_vv_nxv8bf16_nonans:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v10
+; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; CHECK-NEXT: vmfne.vv v10, v12, v12
+; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8
+; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; CHECK-NEXT: vmfne.vv v8, v12, v12
+; CHECK-NEXT: vmor.mm v0, v8, v10
+; CHECK-NEXT: ret
+ %vc = fcmp uno <vscale x 8 x bfloat> %va, %vb
+ ret <vscale x 8 x i1> %vc
+}
+
+define <vscale x 8 x i1> @fcmp_uno_vf_nxv8bf16_nonans(<vscale x 8 x bfloat> %va, bfloat %b) #0 {
+; CHECK-LABEL: fcmp_uno_vf_nxv8bf16_nonans:
+; CHECK: # %bb.0:
+; CHECK-NEXT: fmv.x.h a0, fa0
+; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma
+; CHECK-NEXT: vmv.v.x v10, a0
+; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8
+; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; CHECK-NEXT: vmfne.vv v8, v12, v12
+; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v10
+; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; CHECK-NEXT: vmfne.vv v9, v12, v12
+; CHECK-NEXT: vmor.mm v0, v8, v9
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0
+ %splat = shufflevector <vscale x 8 x bfloat> %head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer
+ %vc = fcmp uno <vscale x 8 x bfloat> %va, %splat
+ ret <vscale x 8 x i1> %vc
+}
+
define <vscale x 8 x i1> @fcmp_oeq_vv_nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x half> %vb) {
; ZVFH-LABEL: fcmp_oeq_vv_nxv8f16:
; ZVFH: # %bb.0:
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfadd-constrained-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vfadd-constrained-sdnode.ll
index af80e627b43f..53be153f8ff2 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfadd-constrained-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfadd-constrained-sdnode.ll
@@ -1,12 +1,239 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v -target-abi=ilp32d \
-; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH
-; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -target-abi=lp64d \
-; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH
-; RUN: llc -mtriple=riscv32 -mattr=+d,+zfhmin,+zvfhmin,+v -target-abi=ilp32d \
-; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN
-; RUN: llc -mtriple=riscv64 -mattr=+d,+zfhmin,+zvfhmin,+v -target-abi=lp64d \
-; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+zfbfmin,+zvfbfmin,+v \
+; RUN: -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s \
+; RUN: --check-prefixes=CHECK,ZVFH
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+zfbfmin,+zvfbfmin,+v \
+; RUN: -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \
+; RUN: --check-prefixes=CHECK,ZVFH
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin,+v \
+; RUN: -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s \
+; RUN: --check-prefixes=CHECK,ZVFHMIN
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin,+v \
+; RUN: -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \
+; RUN: --check-prefixes=CHECK,ZVFHMIN
+
+define <vscale x 1 x bfloat> @vfadd_vv_nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %vb) strictfp {
+; CHECK-LABEL: vfadd_vv_nxv1bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v9
+; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8
+; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
+; CHECK-NEXT: vfadd.vv v9, v9, v10
+; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9
+; CHECK-NEXT: ret
+entry:
+ %vc = call <vscale x 1 x bfloat> @llvm.experimental.constrained.fadd.nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %vb, metadata !"round.dynamic", metadata !"fpexcept.ignore")
+ ret <vscale x 1 x bfloat> %vc
+}
+
+define <vscale x 1 x bfloat> @vfadd_vf_nxv1bf16(<vscale x 1 x bfloat> %va, bfloat %b) strictfp {
+; CHECK-LABEL: vfadd_vf_nxv1bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: fmv.x.h a0, fa0
+; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vmv.v.x v9, a0
+; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8
+; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v9
+; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
+; CHECK-NEXT: vfadd.vv v9, v10, v8
+; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 1 x bfloat> poison, bfloat %b, i32 0
+ %splat = shufflevector <vscale x 1 x bfloat> %head, <vscale x 1 x bfloat> poison, <vscale x 1 x i32> zeroinitializer
+ %vc = call <vscale x 1 x bfloat> @llvm.experimental.constrained.fadd.nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %splat, metadata !"round.dynamic", metadata !"fpexcept.ignore")
+ ret <vscale x 1 x bfloat> %vc
+}
+
+define <vscale x 2 x bfloat> @vfadd_vv_nxv2bf16(<vscale x 2 x bfloat> %va, <vscale x 2 x bfloat> %vb) strictfp {
+; CHECK-LABEL: vfadd_vv_nxv2bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v9
+; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8
+; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; CHECK-NEXT: vfadd.vv v9, v9, v10
+; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9
+; CHECK-NEXT: ret
+entry:
+ %vc = call <vscale x 2 x bfloat> @llvm.experimental.constrained.fadd.nxv2bf16(<vscale x 2 x bfloat> %va, <vscale x 2 x bfloat> %vb, metadata !"round.dynamic", metadata !"fpexcept.ignore")
+ ret <vscale x 2 x bfloat> %vc
+}
+
+define <vscale x 2 x bfloat> @vfadd_vf_nxv2bf16(<vscale x 2 x bfloat> %va, bfloat %b) strictfp {
+; CHECK-LABEL: vfadd_vf_nxv2bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: fmv.x.h a0, fa0
+; CHECK-NEXT: vsetvli a1, zero, e16, mf2, ta, ma
+; CHECK-NEXT: vmv.v.x v9, a0
+; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8
+; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v9
+; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; CHECK-NEXT: vfadd.vv v9, v10, v8
+; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 2 x bfloat> poison, bfloat %b, i32 0
+ %splat = shufflevector <vscale x 2 x bfloat> %head, <vscale x 2 x bfloat> poison, <vscale x 2 x i32> zeroinitializer
+ %vc = call <vscale x 2 x bfloat> @llvm.experimental.constrained.fadd.nxv2bf16(<vscale x 2 x bfloat> %va, <vscale x 2 x bfloat> %splat, metadata !"round.dynamic", metadata !"fpexcept.ignore")
+ ret <vscale x 2 x bfloat> %vc
+}
+
+define <vscale x 4 x bfloat> @vfadd_vv_nxv4bf16(<vscale x 4 x bfloat> %va, <vscale x 4 x bfloat> %vb) strictfp {
+; CHECK-LABEL: vfadd_vv_nxv4bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v9
+; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8
+; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; CHECK-NEXT: vfadd.vv v10, v12, v10
+; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10
+; CHECK-NEXT: ret
+entry:
+ %vc = call <vscale x 4 x bfloat> @llvm.experimental.constrained.fadd.nxv4bf16(<vscale x 4 x bfloat> %va, <vscale x 4 x bfloat> %vb, metadata !"round.dynamic", metadata !"fpexcept.ignore")
+ ret <vscale x 4 x bfloat> %vc
+}
+
+define <vscale x 4 x bfloat> @vfadd_vf_nxv4bf16(<vscale x 4 x bfloat> %va, bfloat %b) strictfp {
+; CHECK-LABEL: vfadd_vf_nxv4bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: fmv.x.h a0, fa0
+; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma
+; CHECK-NEXT: vmv.v.x v9, a0
+; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8
+; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v9
+; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; CHECK-NEXT: vfadd.vv v10, v10, v12
+; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 4 x bfloat> poison, bfloat %b, i32 0
+ %splat = shufflevector <vscale x 4 x bfloat> %head, <vscale x 4 x bfloat> poison, <vscale x 4 x i32> zeroinitializer
+ %vc = call <vscale x 4 x bfloat> @llvm.experimental.constrained.fadd.nxv4bf16(<vscale x 4 x bfloat> %va, <vscale x 4 x bfloat> %splat, metadata !"round.dynamic", metadata !"fpexcept.ignore")
+ ret <vscale x 4 x bfloat> %vc
+}
+
+define <vscale x 8 x bfloat> @vfadd_vv_nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %vb) strictfp {
+; CHECK-LABEL: vfadd_vv_nxv8bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v10
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8
+; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; CHECK-NEXT: vfadd.vv v12, v16, v12
+; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12
+; CHECK-NEXT: ret
+entry:
+ %vc = call <vscale x 8 x bfloat> @llvm.experimental.constrained.fadd.nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %vb, metadata !"round.dynamic", metadata !"fpexcept.ignore")
+ ret <vscale x 8 x bfloat> %vc
+}
+
+define <vscale x 8 x bfloat> @vfadd_vf_nxv8bf16(<vscale x 8 x bfloat> %va, bfloat %b) strictfp {
+; CHECK-LABEL: vfadd_vf_nxv8bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: fmv.x.h a0, fa0
+; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma
+; CHECK-NEXT: vmv.v.x v10, a0
+; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v10
+; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; CHECK-NEXT: vfadd.vv v12, v12, v16
+; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0
+ %splat = shufflevector <vscale x 8 x bfloat> %head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer
+ %vc = call <vscale x 8 x bfloat> @llvm.experimental.constrained.fadd.nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %splat, metadata !"round.dynamic", metadata !"fpexcept.ignore")
+ ret <vscale x 8 x bfloat> %vc
+}
+
+define <vscale x 16 x bfloat> @vfadd_vv_nxv16bf16(<vscale x 16 x bfloat> %va, <vscale x 16 x bfloat> %vb) strictfp {
+; CHECK-LABEL: vfadd_vv_nxv16bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v12
+; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v8
+; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; CHECK-NEXT: vfadd.vv v16, v24, v16
+; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16
+; CHECK-NEXT: ret
+entry:
+ %vc = call <vscale x 16 x bfloat> @llvm.experimental.constrained.fadd.nxv16bf16(<vscale x 16 x bfloat> %va, <vscale x 16 x bfloat> %vb, metadata !"round.dynamic", metadata !"fpexcept.ignore")
+ ret <vscale x 16 x bfloat> %vc
+}
+
+define <vscale x 16 x bfloat> @vfadd_vf_nxv16bf16(<vscale x 16 x bfloat> %va, bfloat %b) strictfp {
+; CHECK-LABEL: vfadd_vf_nxv16bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: fmv.x.h a0, fa0
+; CHECK-NEXT: vsetvli a1, zero, e16, m4, ta, ma
+; CHECK-NEXT: vmv.v.x v12, a0
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8
+; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v12
+; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; CHECK-NEXT: vfadd.vv v16, v16, v24
+; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 16 x bfloat> poison, bfloat %b, i32 0
+ %splat = shufflevector <vscale x 16 x bfloat> %head, <vscale x 16 x bfloat> poison, <vscale x 16 x i32> zeroinitializer
+ %vc = call <vscale x 16 x bfloat> @llvm.experimental.constrained.fadd.nxv16bf16(<vscale x 16 x bfloat> %va, <vscale x 16 x bfloat> %splat, metadata !"round.dynamic", metadata !"fpexcept.ignore")
+ ret <vscale x 16 x bfloat> %vc
+}
+
+define <vscale x 32 x bfloat> @vfadd_vv_nxv32bf16(<vscale x 32 x bfloat> %va, <vscale x 32 x bfloat> %vb) strictfp {
+; CHECK-LABEL: vfadd_vv_nxv32bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v16
+; CHECK-NEXT: vfwcvtbf16.f.f.v v0, v8
+; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; CHECK-NEXT: vfadd.vv v24, v0, v24
+; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v24
+; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v20
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v12
+; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; CHECK-NEXT: vfadd.vv v16, v16, v24
+; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v12, v16
+; CHECK-NEXT: ret
+entry:
+ %vc = call <vscale x 32 x bfloat> @llvm.experimental.constrained.fadd.nxv32bf16(<vscale x 32 x bfloat> %va, <vscale x 32 x bfloat> %vb, metadata !"round.dynamic", metadata !"fpexcept.ignore")
+ ret <vscale x 32 x bfloat> %vc
+}
+
+define <vscale x 32 x bfloat> @vfadd_vf_nxv32bf16(<vscale x 32 x bfloat> %va, bfloat %b) strictfp {
+; CHECK-LABEL: vfadd_vf_nxv32bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: fmv.x.h a0, fa0
+; CHECK-NEXT: vsetvli a1, zero, e16, m8, ta, ma
+; CHECK-NEXT: vmv.v.x v16, a0
+; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v8
+; CHECK-NEXT: vfwcvtbf16.f.f.v v0, v16
+; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; CHECK-NEXT: vfadd.vv v24, v24, v0
+; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v24
+; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v20
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v12
+; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; CHECK-NEXT: vfadd.vv v16, v16, v24
+; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v12, v16
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 32 x bfloat> poison, bfloat %b, i32 0
+ %splat = shufflevector <vscale x 32 x bfloat> %head, <vscale x 32 x bfloat> poison, <vscale x 32 x i32> zeroinitializer
+ %vc = call <vscale x 32 x bfloat> @llvm.experimental.constrained.fadd.nxv32bf16(<vscale x 32 x bfloat> %va, <vscale x 32 x bfloat> %splat, metadata !"round.dynamic", metadata !"fpexcept.ignore")
+ ret <vscale x 32 x bfloat> %vc
+}
declare <vscale x 1 x half> @llvm.experimental.constrained.fadd.nxv1f16(<vscale x 1 x half>, <vscale x 1 x half>, metadata, metadata)
define <vscale x 1 x half> @vfadd_vv_nxv1f16(<vscale x 1 x half> %va, <vscale x 1 x half> %vb) strictfp {
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfadd-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vfadd-sdnode.ll
index 8f21e326e687..c3c0958f7096 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfadd-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfadd-sdnode.ll
@@ -1,12 +1,252 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v -target-abi=ilp32d \
-; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH
-; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -target-abi=lp64d \
-; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH
-; RUN: llc -mtriple=riscv32 -mattr=+d,+zfhmin,+zvfhmin,+v -target-abi=ilp32d \
-; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN
-; RUN: llc -mtriple=riscv64 -mattr=+d,+zfhmin,+zvfhmin,+v -target-abi=lp64d \
-; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+zfbfmin,+zvfbfmin,+v \
+; RUN: -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s \
+; RUN: --check-prefixes=CHECK,ZVFH
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+zfbfmin,+zvfbfmin,+v \
+; RUN: -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \
+; RUN: --check-prefixes=CHECK,ZVFH
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin,+v \
+; RUN: -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s \
+; RUN: --check-prefixes=CHECK,ZVFHMIN
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin,+v \
+; RUN: -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \
+; RUN: --check-prefixes=CHECK,ZVFHMIN
+
+define <vscale x 1 x bfloat> @vfadd_vv_nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %vb) {
+; CHECK-LABEL: vfadd_vv_nxv1bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v9
+; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8
+; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
+; CHECK-NEXT: vfadd.vv v9, v9, v10
+; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9
+; CHECK-NEXT: ret
+ %vc = fadd <vscale x 1 x bfloat> %va, %vb
+ ret <vscale x 1 x bfloat> %vc
+}
+
+define <vscale x 1 x bfloat> @vfadd_vf_nxv1bf16(<vscale x 1 x bfloat> %va, bfloat %b) {
+; CHECK-LABEL: vfadd_vf_nxv1bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: fmv.x.h a0, fa0
+; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vmv.v.x v9, a0
+; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8
+; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v9
+; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
+; CHECK-NEXT: vfadd.vv v9, v10, v8
+; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 1 x bfloat> poison, bfloat %b, i32 0
+ %splat = shufflevector <vscale x 1 x bfloat> %head, <vscale x 1 x bfloat> poison, <vscale x 1 x i32> zeroinitializer
+ %vc = fadd <vscale x 1 x bfloat> %va, %splat
+ ret <vscale x 1 x bfloat> %vc
+}
+
+define <vscale x 2 x bfloat> @vfadd_vv_nxv2bf16(<vscale x 2 x bfloat> %va, <vscale x 2 x bfloat> %vb) {
+; CHECK-LABEL: vfadd_vv_nxv2bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v9
+; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8
+; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; CHECK-NEXT: vfadd.vv v9, v9, v10
+; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9
+; CHECK-NEXT: ret
+ %vc = fadd <vscale x 2 x bfloat> %va, %vb
+ ret <vscale x 2 x bfloat> %vc
+}
+
+define <vscale x 2 x bfloat> @vfadd_vf_nxv2bf16(<vscale x 2 x bfloat> %va, bfloat %b) {
+; CHECK-LABEL: vfadd_vf_nxv2bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: fmv.x.h a0, fa0
+; CHECK-NEXT: vsetvli a1, zero, e16, mf2, ta, ma
+; CHECK-NEXT: vmv.v.x v9, a0
+; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8
+; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v9
+; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; CHECK-NEXT: vfadd.vv v9, v10, v8
+; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 2 x bfloat> poison, bfloat %b, i32 0
+ %splat = shufflevector <vscale x 2 x bfloat> %head, <vscale x 2 x bfloat> poison, <vscale x 2 x i32> zeroinitializer
+ %vc = fadd <vscale x 2 x bfloat> %va, %splat
+ ret <vscale x 2 x bfloat> %vc
+}
+
+define <vscale x 4 x bfloat> @vfadd_vv_nxv4bf16(<vscale x 4 x bfloat> %va, <vscale x 4 x bfloat> %vb) {
+; CHECK-LABEL: vfadd_vv_nxv4bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v9
+; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8
+; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; CHECK-NEXT: vfadd.vv v10, v12, v10
+; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10
+; CHECK-NEXT: ret
+ %vc = fadd <vscale x 4 x bfloat> %va, %vb
+ ret <vscale x 4 x bfloat> %vc
+}
+
+define <vscale x 4 x bfloat> @vfadd_vf_nxv4bf16(<vscale x 4 x bfloat> %va, bfloat %b) {
+; CHECK-LABEL: vfadd_vf_nxv4bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: fmv.x.h a0, fa0
+; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma
+; CHECK-NEXT: vmv.v.x v9, a0
+; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8
+; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v9
+; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; CHECK-NEXT: vfadd.vv v10, v10, v12
+; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 4 x bfloat> poison, bfloat %b, i32 0
+ %splat = shufflevector <vscale x 4 x bfloat> %head, <vscale x 4 x bfloat> poison, <vscale x 4 x i32> zeroinitializer
+ %vc = fadd <vscale x 4 x bfloat> %va, %splat
+ ret <vscale x 4 x bfloat> %vc
+}
+
+define <vscale x 8 x bfloat> @vfadd_vv_nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %vb) {
+; CHECK-LABEL: vfadd_vv_nxv8bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v10
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8
+; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; CHECK-NEXT: vfadd.vv v12, v16, v12
+; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12
+; CHECK-NEXT: ret
+ %vc = fadd <vscale x 8 x bfloat> %va, %vb
+ ret <vscale x 8 x bfloat> %vc
+}
+
+define <vscale x 8 x bfloat> @vfadd_vf_nxv8bf16(<vscale x 8 x bfloat> %va, bfloat %b) {
+; CHECK-LABEL: vfadd_vf_nxv8bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: fmv.x.h a0, fa0
+; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma
+; CHECK-NEXT: vmv.v.x v10, a0
+; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v10
+; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; CHECK-NEXT: vfadd.vv v12, v12, v16
+; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0
+ %splat = shufflevector <vscale x 8 x bfloat> %head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer
+ %vc = fadd <vscale x 8 x bfloat> %va, %splat
+ ret <vscale x 8 x bfloat> %vc
+}
+
+define <vscale x 8 x bfloat> @vfadd_fv_nxv8bf16(<vscale x 8 x bfloat> %va, bfloat %b) {
+; CHECK-LABEL: vfadd_fv_nxv8bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: fmv.x.h a0, fa0
+; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma
+; CHECK-NEXT: vmv.v.x v10, a0
+; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v10
+; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; CHECK-NEXT: vfadd.vv v12, v16, v12
+; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0
+ %splat = shufflevector <vscale x 8 x bfloat> %head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer
+ %vc = fadd <vscale x 8 x bfloat> %splat, %va
+ ret <vscale x 8 x bfloat> %vc
+}
+
+define <vscale x 16 x bfloat> @vfadd_vv_nxv16bf16(<vscale x 16 x bfloat> %va, <vscale x 16 x bfloat> %vb) {
+; CHECK-LABEL: vfadd_vv_nxv16bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v12
+; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v8
+; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; CHECK-NEXT: vfadd.vv v16, v24, v16
+; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16
+; CHECK-NEXT: ret
+ %vc = fadd <vscale x 16 x bfloat> %va, %vb
+ ret <vscale x 16 x bfloat> %vc
+}
+
+define <vscale x 16 x bfloat> @vfadd_vf_nxv16bf16(<vscale x 16 x bfloat> %va, bfloat %b) {
+; CHECK-LABEL: vfadd_vf_nxv16bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: fmv.x.h a0, fa0
+; CHECK-NEXT: vsetvli a1, zero, e16, m4, ta, ma
+; CHECK-NEXT: vmv.v.x v12, a0
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8
+; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v12
+; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; CHECK-NEXT: vfadd.vv v16, v16, v24
+; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 16 x bfloat> poison, bfloat %b, i32 0
+ %splat = shufflevector <vscale x 16 x bfloat> %head, <vscale x 16 x bfloat> poison, <vscale x 16 x i32> zeroinitializer
+ %vc = fadd <vscale x 16 x bfloat> %va, %splat
+ ret <vscale x 16 x bfloat> %vc
+}
+
+define <vscale x 32 x bfloat> @vfadd_vv_nxv32bf16(<vscale x 32 x bfloat> %va, <vscale x 32 x bfloat> %vb) {
+; CHECK-LABEL: vfadd_vv_nxv32bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v16
+; CHECK-NEXT: vfwcvtbf16.f.f.v v0, v8
+; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; CHECK-NEXT: vfadd.vv v24, v0, v24
+; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v24
+; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v20
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v12
+; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; CHECK-NEXT: vfadd.vv v16, v16, v24
+; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v12, v16
+; CHECK-NEXT: ret
+ %vc = fadd <vscale x 32 x bfloat> %va, %vb
+ ret <vscale x 32 x bfloat> %vc
+}
+
+define <vscale x 32 x bfloat> @vfadd_vf_nxv32bf16(<vscale x 32 x bfloat> %va, bfloat %b) {
+; CHECK-LABEL: vfadd_vf_nxv32bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: fmv.x.h a0, fa0
+; CHECK-NEXT: vsetvli a1, zero, e16, m8, ta, ma
+; CHECK-NEXT: vmv.v.x v16, a0
+; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v8
+; CHECK-NEXT: vfwcvtbf16.f.f.v v0, v16
+; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; CHECK-NEXT: vfadd.vv v24, v24, v0
+; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v24
+; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v12
+; CHECK-NEXT: vfwcvtbf16.f.f.v v0, v20
+; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; CHECK-NEXT: vfadd.vv v16, v24, v0
+; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v12, v16
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 32 x bfloat> poison, bfloat %b, i32 0
+ %splat = shufflevector <vscale x 32 x bfloat> %head, <vscale x 32 x bfloat> poison, <vscale x 32 x i32> zeroinitializer
+ %vc = fadd <vscale x 32 x bfloat> %va, %splat
+ ret <vscale x 32 x bfloat> %vc
+}
define <vscale x 1 x half> @vfadd_vv_nxv1f16(<vscale x 1 x half> %va, <vscale x 1 x half> %vb) {
; ZVFH-LABEL: vfadd_vv_nxv1f16:
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfadd-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vfadd-vp.ll
index 395f1a7c382b..b3de904d2062 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfadd-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfadd-vp.ll
@@ -1,13 +1,660 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v -target-abi=ilp32d \
-; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH
-; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -target-abi=lp64d \
-; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH
-; RUN: llc -mtriple=riscv32 -mattr=+d,+zfhmin,+zvfhmin,+v -target-abi=ilp32d \
-; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN
-; RUN: llc -mtriple=riscv64 -mattr=+d,+zfhmin,+zvfhmin,+v -target-abi=lp64d \
-; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+zfbfmin,+zvfbfmin,+v \
+; RUN: -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s \
+; RUN: --check-prefixes=CHECK,ZVFH
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+zfbfmin,+zvfbfmin,+v \
+; RUN: -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \
+; RUN: --check-prefixes=CHECK,ZVFH
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin,+v \
+; RUN: -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s \
+; RUN: --check-prefixes=CHECK,ZVFHMIN
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin,+v \
+; RUN: -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \
+; RUN: --check-prefixes=CHECK,ZVFHMIN
+
+declare <vscale x 1 x bfloat> @llvm.vp.fadd.nxv1bf16(<vscale x 1 x bfloat>, <vscale x 1 x bfloat>, <vscale x 1 x i1>, i32)
+
+define <vscale x 1 x bfloat> @vfadd_vv_nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %b, <vscale x 1 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vfadd_vv_nxv1bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v9
+; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8
+; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-NEXT: vfadd.vv v9, v9, v10, v0.t
+; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9
+; CHECK-NEXT: ret
+ %v = call <vscale x 1 x bfloat> @llvm.vp.fadd.nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %b, <vscale x 1 x i1> %m, i32 %evl)
+ ret <vscale x 1 x bfloat> %v
+}
+
+define <vscale x 1 x bfloat> @vfadd_vv_nxv1bf16_unmasked(<vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vfadd_vv_nxv1bf16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v9
+; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8
+; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-NEXT: vfadd.vv v9, v9, v10
+; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9
+; CHECK-NEXT: ret
+ %v = call <vscale x 1 x bfloat> @llvm.vp.fadd.nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %b, <vscale x 1 x i1> splat (i1 true), i32 %evl)
+ ret <vscale x 1 x bfloat> %v
+}
+
+define <vscale x 1 x bfloat> @vfadd_vf_nxv1bf16(<vscale x 1 x bfloat> %va, bfloat %b, <vscale x 1 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vfadd_vf_nxv1bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: fmv.x.h a1, fa0
+; CHECK-NEXT: vsetvli a2, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vmv.v.x v9, a1
+; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8
+; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v9
+; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-NEXT: vfadd.vv v9, v10, v8, v0.t
+; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 1 x bfloat> poison, bfloat %b, i32 0
+ %vb = shufflevector <vscale x 1 x bfloat> %elt.head, <vscale x 1 x bfloat> poison, <vscale x 1 x i32> zeroinitializer
+ %v = call <vscale x 1 x bfloat> @llvm.vp.fadd.nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %vb, <vscale x 1 x i1> %m, i32 %evl)
+ ret <vscale x 1 x bfloat> %v
+}
+
+define <vscale x 1 x bfloat> @vfadd_vf_nxv1bf16_commute(<vscale x 1 x bfloat> %va, bfloat %b, <vscale x 1 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vfadd_vf_nxv1bf16_commute:
+; CHECK: # %bb.0:
+; CHECK-NEXT: fmv.x.h a1, fa0
+; CHECK-NEXT: vsetvli a2, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vmv.v.x v9, a1
+; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8
+; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v9
+; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-NEXT: vfadd.vv v9, v8, v10, v0.t
+; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 1 x bfloat> poison, bfloat %b, i32 0
+ %vb = shufflevector <vscale x 1 x bfloat> %elt.head, <vscale x 1 x bfloat> poison, <vscale x 1 x i32> zeroinitializer
+ %v = call <vscale x 1 x bfloat> @llvm.vp.fadd.nxv1bf16(<vscale x 1 x bfloat> %vb, <vscale x 1 x bfloat> %va, <vscale x 1 x i1> %m, i32 %evl)
+ ret <vscale x 1 x bfloat> %v
+}
+define <vscale x 1 x bfloat> @vfadd_vf_nxv1bf16_unmasked(<vscale x 1 x bfloat> %va, bfloat %b, i32 zeroext %evl) {
+; CHECK-LABEL: vfadd_vf_nxv1bf16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: fmv.x.h a1, fa0
+; CHECK-NEXT: vsetvli a2, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vmv.v.x v9, a1
+; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8
+; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v9
+; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-NEXT: vfadd.vv v9, v10, v8
+; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 1 x bfloat> poison, bfloat %b, i32 0
+ %vb = shufflevector <vscale x 1 x bfloat> %elt.head, <vscale x 1 x bfloat> poison, <vscale x 1 x i32> zeroinitializer
+ %v = call <vscale x 1 x bfloat> @llvm.vp.fadd.nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %vb, <vscale x 1 x i1> splat (i1 true), i32 %evl)
+ ret <vscale x 1 x bfloat> %v
+}
+
+define <vscale x 1 x bfloat> @vfadd_vf_nxv1bf16_unmasked_commute(<vscale x 1 x bfloat> %va, bfloat %b, i32 zeroext %evl) {
+; CHECK-LABEL: vfadd_vf_nxv1bf16_unmasked_commute:
+; CHECK: # %bb.0:
+; CHECK-NEXT: fmv.x.h a1, fa0
+; CHECK-NEXT: vsetvli a2, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vmv.v.x v9, a1
+; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8
+; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v9
+; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-NEXT: vfadd.vv v9, v8, v10
+; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 1 x bfloat> poison, bfloat %b, i32 0
+ %vb = shufflevector <vscale x 1 x bfloat> %elt.head, <vscale x 1 x bfloat> poison, <vscale x 1 x i32> zeroinitializer
+ %v = call <vscale x 1 x bfloat> @llvm.vp.fadd.nxv1bf16(<vscale x 1 x bfloat> %vb, <vscale x 1 x bfloat> %va, <vscale x 1 x i1> splat (i1 true), i32 %evl)
+ ret <vscale x 1 x bfloat> %v
+}
+
+declare <vscale x 2 x bfloat> @llvm.vp.fadd.nxv2bf16(<vscale x 2 x bfloat>, <vscale x 2 x bfloat>, <vscale x 2 x i1>, i32)
+
+define <vscale x 2 x bfloat> @vfadd_vv_nxv2bf16(<vscale x 2 x bfloat> %va, <vscale x 2 x bfloat> %b, <vscale x 2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vfadd_vv_nxv2bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, zero, e16, mf2, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v9
+; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8
+; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-NEXT: vfadd.vv v9, v9, v10, v0.t
+; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9
+; CHECK-NEXT: ret
+ %v = call <vscale x 2 x bfloat> @llvm.vp.fadd.nxv2bf16(<vscale x 2 x bfloat> %va, <vscale x 2 x bfloat> %b, <vscale x 2 x i1> %m, i32 %evl)
+ ret <vscale x 2 x bfloat> %v
+}
+
+define <vscale x 2 x bfloat> @vfadd_vv_nxv2bf16_unmasked(<vscale x 2 x bfloat> %va, <vscale x 2 x bfloat> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vfadd_vv_nxv2bf16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, zero, e16, mf2, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v9
+; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8
+; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-NEXT: vfadd.vv v9, v9, v10
+; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9
+; CHECK-NEXT: ret
+ %v = call <vscale x 2 x bfloat> @llvm.vp.fadd.nxv2bf16(<vscale x 2 x bfloat> %va, <vscale x 2 x bfloat> %b, <vscale x 2 x i1> splat (i1 true), i32 %evl)
+ ret <vscale x 2 x bfloat> %v
+}
+
+define <vscale x 2 x bfloat> @vfadd_vf_nxv2bf16(<vscale x 2 x bfloat> %va, bfloat %b, <vscale x 2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vfadd_vf_nxv2bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: fmv.x.h a1, fa0
+; CHECK-NEXT: vsetvli a2, zero, e16, mf2, ta, ma
+; CHECK-NEXT: vmv.v.x v9, a1
+; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8
+; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v9
+; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-NEXT: vfadd.vv v9, v10, v8, v0.t
+; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 2 x bfloat> poison, bfloat %b, i32 0
+ %vb = shufflevector <vscale x 2 x bfloat> %elt.head, <vscale x 2 x bfloat> poison, <vscale x 2 x i32> zeroinitializer
+ %v = call <vscale x 2 x bfloat> @llvm.vp.fadd.nxv2bf16(<vscale x 2 x bfloat> %va, <vscale x 2 x bfloat> %vb, <vscale x 2 x i1> %m, i32 %evl)
+ ret <vscale x 2 x bfloat> %v
+}
+
+define <vscale x 2 x bfloat> @vfadd_vf_nxv2bf16_unmasked(<vscale x 2 x bfloat> %va, bfloat %b, i32 zeroext %evl) {
+; CHECK-LABEL: vfadd_vf_nxv2bf16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: fmv.x.h a1, fa0
+; CHECK-NEXT: vsetvli a2, zero, e16, mf2, ta, ma
+; CHECK-NEXT: vmv.v.x v9, a1
+; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8
+; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v9
+; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-NEXT: vfadd.vv v9, v10, v8
+; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 2 x bfloat> poison, bfloat %b, i32 0
+ %vb = shufflevector <vscale x 2 x bfloat> %elt.head, <vscale x 2 x bfloat> poison, <vscale x 2 x i32> zeroinitializer
+ %v = call <vscale x 2 x bfloat> @llvm.vp.fadd.nxv2bf16(<vscale x 2 x bfloat> %va, <vscale x 2 x bfloat> %vb, <vscale x 2 x i1> splat (i1 true), i32 %evl)
+ ret <vscale x 2 x bfloat> %v
+}
+
+declare <vscale x 4 x bfloat> @llvm.vp.fadd.nxv4bf16(<vscale x 4 x bfloat>, <vscale x 4 x bfloat>, <vscale x 4 x i1>, i32)
+
+define <vscale x 4 x bfloat> @vfadd_vv_nxv4bf16(<vscale x 4 x bfloat> %va, <vscale x 4 x bfloat> %b, <vscale x 4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vfadd_vv_nxv4bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v9
+; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8
+; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; CHECK-NEXT: vfadd.vv v10, v12, v10, v0.t
+; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10
+; CHECK-NEXT: ret
+ %v = call <vscale x 4 x bfloat> @llvm.vp.fadd.nxv4bf16(<vscale x 4 x bfloat> %va, <vscale x 4 x bfloat> %b, <vscale x 4 x i1> %m, i32 %evl)
+ ret <vscale x 4 x bfloat> %v
+}
+
+define <vscale x 4 x bfloat> @vfadd_vv_nxv4bf16_unmasked(<vscale x 4 x bfloat> %va, <vscale x 4 x bfloat> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vfadd_vv_nxv4bf16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v9
+; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8
+; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; CHECK-NEXT: vfadd.vv v10, v12, v10
+; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10
+; CHECK-NEXT: ret
+ %v = call <vscale x 4 x bfloat> @llvm.vp.fadd.nxv4bf16(<vscale x 4 x bfloat> %va, <vscale x 4 x bfloat> %b, <vscale x 4 x i1> splat (i1 true), i32 %evl)
+ ret <vscale x 4 x bfloat> %v
+}
+
+define <vscale x 4 x bfloat> @vfadd_vf_nxv4bf16(<vscale x 4 x bfloat> %va, bfloat %b, <vscale x 4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vfadd_vf_nxv4bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: fmv.x.h a1, fa0
+; CHECK-NEXT: vsetvli a2, zero, e16, m1, ta, ma
+; CHECK-NEXT: vmv.v.x v9, a1
+; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8
+; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v9
+; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; CHECK-NEXT: vfadd.vv v10, v10, v12, v0.t
+; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 4 x bfloat> poison, bfloat %b, i32 0
+ %vb = shufflevector <vscale x 4 x bfloat> %elt.head, <vscale x 4 x bfloat> poison, <vscale x 4 x i32> zeroinitializer
+ %v = call <vscale x 4 x bfloat> @llvm.vp.fadd.nxv4bf16(<vscale x 4 x bfloat> %va, <vscale x 4 x bfloat> %vb, <vscale x 4 x i1> %m, i32 %evl)
+ ret <vscale x 4 x bfloat> %v
+}
+
+define <vscale x 4 x bfloat> @vfadd_vf_nxv4bf16_unmasked(<vscale x 4 x bfloat> %va, bfloat %b, i32 zeroext %evl) {
+; CHECK-LABEL: vfadd_vf_nxv4bf16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: fmv.x.h a1, fa0
+; CHECK-NEXT: vsetvli a2, zero, e16, m1, ta, ma
+; CHECK-NEXT: vmv.v.x v9, a1
+; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8
+; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v9
+; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; CHECK-NEXT: vfadd.vv v10, v10, v12
+; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 4 x bfloat> poison, bfloat %b, i32 0
+ %vb = shufflevector <vscale x 4 x bfloat> %elt.head, <vscale x 4 x bfloat> poison, <vscale x 4 x i32> zeroinitializer
+ %v = call <vscale x 4 x bfloat> @llvm.vp.fadd.nxv4bf16(<vscale x 4 x bfloat> %va, <vscale x 4 x bfloat> %vb, <vscale x 4 x i1> splat (i1 true), i32 %evl)
+ ret <vscale x 4 x bfloat> %v
+}
+
+declare <vscale x 8 x bfloat> @llvm.vp.fadd.nxv8bf16(<vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x i1>, i32)
+
+define <vscale x 8 x bfloat> @vfadd_vv_nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %b, <vscale x 8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vfadd_vv_nxv8bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v10
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8
+; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-NEXT: vfadd.vv v12, v16, v12, v0.t
+; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12
+; CHECK-NEXT: ret
+ %v = call <vscale x 8 x bfloat> @llvm.vp.fadd.nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %b, <vscale x 8 x i1> %m, i32 %evl)
+ ret <vscale x 8 x bfloat> %v
+}
+
+define <vscale x 8 x bfloat> @vfadd_vv_nxv8bf16_unmasked(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vfadd_vv_nxv8bf16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v10
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8
+; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-NEXT: vfadd.vv v12, v16, v12
+; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12
+; CHECK-NEXT: ret
+ %v = call <vscale x 8 x bfloat> @llvm.vp.fadd.nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %b, <vscale x 8 x i1> splat (i1 true), i32 %evl)
+ ret <vscale x 8 x bfloat> %v
+}
+
+define <vscale x 8 x bfloat> @vfadd_vf_nxv8bf16(<vscale x 8 x bfloat> %va, bfloat %b, <vscale x 8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vfadd_vf_nxv8bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: fmv.x.h a1, fa0
+; CHECK-NEXT: vsetvli a2, zero, e16, m2, ta, ma
+; CHECK-NEXT: vmv.v.x v10, a1
+; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v10
+; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-NEXT: vfadd.vv v12, v12, v16, v0.t
+; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0
+ %vb = shufflevector <vscale x 8 x bfloat> %elt.head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer
+ %v = call <vscale x 8 x bfloat> @llvm.vp.fadd.nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %vb, <vscale x 8 x i1> %m, i32 %evl)
+ ret <vscale x 8 x bfloat> %v
+}
+
+define <vscale x 8 x bfloat> @vfadd_vf_nxv8bf16_unmasked(<vscale x 8 x bfloat> %va, bfloat %b, i32 zeroext %evl) {
+; CHECK-LABEL: vfadd_vf_nxv8bf16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: fmv.x.h a1, fa0
+; CHECK-NEXT: vsetvli a2, zero, e16, m2, ta, ma
+; CHECK-NEXT: vmv.v.x v10, a1
+; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v10
+; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-NEXT: vfadd.vv v12, v12, v16
+; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0
+ %vb = shufflevector <vscale x 8 x bfloat> %elt.head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer
+ %v = call <vscale x 8 x bfloat> @llvm.vp.fadd.nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %vb, <vscale x 8 x i1> splat (i1 true), i32 %evl)
+ ret <vscale x 8 x bfloat> %v
+}
+
+declare <vscale x 16 x bfloat> @llvm.vp.fadd.nxv16bf16(<vscale x 16 x bfloat>, <vscale x 16 x bfloat>, <vscale x 16 x i1>, i32)
+
+define <vscale x 16 x bfloat> @vfadd_vv_nxv16bf16(<vscale x 16 x bfloat> %va, <vscale x 16 x bfloat> %b, <vscale x 16 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vfadd_vv_nxv16bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v12
+; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v8
+; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma
+; CHECK-NEXT: vfadd.vv v16, v24, v16, v0.t
+; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16
+; CHECK-NEXT: ret
+ %v = call <vscale x 16 x bfloat> @llvm.vp.fadd.nxv16bf16(<vscale x 16 x bfloat> %va, <vscale x 16 x bfloat> %b, <vscale x 16 x i1> %m, i32 %evl)
+ ret <vscale x 16 x bfloat> %v
+}
+
+define <vscale x 16 x bfloat> @vfadd_vv_nxv16bf16_unmasked(<vscale x 16 x bfloat> %va, <vscale x 16 x bfloat> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vfadd_vv_nxv16bf16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v12
+; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v8
+; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma
+; CHECK-NEXT: vfadd.vv v16, v24, v16
+; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16
+; CHECK-NEXT: ret
+ %v = call <vscale x 16 x bfloat> @llvm.vp.fadd.nxv16bf16(<vscale x 16 x bfloat> %va, <vscale x 16 x bfloat> %b, <vscale x 16 x i1> splat (i1 true), i32 %evl)
+ ret <vscale x 16 x bfloat> %v
+}
+
+define <vscale x 16 x bfloat> @vfadd_vf_nxv16bf16(<vscale x 16 x bfloat> %va, bfloat %b, <vscale x 16 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vfadd_vf_nxv16bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: fmv.x.h a1, fa0
+; CHECK-NEXT: vsetvli a2, zero, e16, m4, ta, ma
+; CHECK-NEXT: vmv.v.x v12, a1
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8
+; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v12
+; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma
+; CHECK-NEXT: vfadd.vv v16, v16, v24, v0.t
+; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 16 x bfloat> poison, bfloat %b, i32 0
+ %vb = shufflevector <vscale x 16 x bfloat> %elt.head, <vscale x 16 x bfloat> poison, <vscale x 16 x i32> zeroinitializer
+ %v = call <vscale x 16 x bfloat> @llvm.vp.fadd.nxv16bf16(<vscale x 16 x bfloat> %va, <vscale x 16 x bfloat> %vb, <vscale x 16 x i1> %m, i32 %evl)
+ ret <vscale x 16 x bfloat> %v
+}
+
+define <vscale x 16 x bfloat> @vfadd_vf_nxv16bf16_unmasked(<vscale x 16 x bfloat> %va, bfloat %b, i32 zeroext %evl) {
+; CHECK-LABEL: vfadd_vf_nxv16bf16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: fmv.x.h a1, fa0
+; CHECK-NEXT: vsetvli a2, zero, e16, m4, ta, ma
+; CHECK-NEXT: vmv.v.x v12, a1
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8
+; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v12
+; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma
+; CHECK-NEXT: vfadd.vv v16, v16, v24
+; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 16 x bfloat> poison, bfloat %b, i32 0
+ %vb = shufflevector <vscale x 16 x bfloat> %elt.head, <vscale x 16 x bfloat> poison, <vscale x 16 x i32> zeroinitializer
+ %v = call <vscale x 16 x bfloat> @llvm.vp.fadd.nxv16bf16(<vscale x 16 x bfloat> %va, <vscale x 16 x bfloat> %vb, <vscale x 16 x i1> splat (i1 true), i32 %evl)
+ ret <vscale x 16 x bfloat> %v
+}
+
+declare <vscale x 32 x bfloat> @llvm.vp.fadd.nxv32bf16(<vscale x 32 x bfloat>, <vscale x 32 x bfloat>, <vscale x 32 x i1>, i32)
+
+define <vscale x 32 x bfloat> @vfadd_vv_nxv32bf16(<vscale x 32 x bfloat> %va, <vscale x 32 x bfloat> %b, <vscale x 32 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vfadd_vv_nxv32bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi sp, sp, -16
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: csrr a1, vlenb
+; CHECK-NEXT: slli a1, a1, 3
+; CHECK-NEXT: sub sp, sp, a1
+; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
+; CHECK-NEXT: vmv1r.v v7, v0
+; CHECK-NEXT: csrr a2, vlenb
+; CHECK-NEXT: slli a1, a2, 1
+; CHECK-NEXT: sub a3, a0, a1
+; CHECK-NEXT: sltu a4, a0, a3
+; CHECK-NEXT: addi a4, a4, -1
+; CHECK-NEXT: and a3, a4, a3
+; CHECK-NEXT: srli a2, a2, 2
+; CHECK-NEXT: vsetvli a4, zero, e8, mf2, ta, ma
+; CHECK-NEXT: vslidedown.vx v0, v0, a2
+; CHECK-NEXT: addi a2, sp, 16
+; CHECK-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill
+; CHECK-NEXT: vsetvli a2, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v20
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v12
+; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, ma
+; CHECK-NEXT: vfadd.vv v16, v16, v24, v0.t
+; CHECK-NEXT: vsetvli a2, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v12, v16
+; CHECK-NEXT: bltu a0, a1, .LBB22_2
+; CHECK-NEXT: # %bb.1:
+; CHECK-NEXT: mv a0, a1
+; CHECK-NEXT: .LBB22_2:
+; CHECK-NEXT: addi a1, sp, 16
+; CHECK-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v24
+; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v8
+; CHECK-NEXT: vmv1r.v v0, v7
+; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma
+; CHECK-NEXT: vfadd.vv v16, v24, v16, v0.t
+; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: slli a0, a0, 3
+; CHECK-NEXT: add sp, sp, a0
+; CHECK-NEXT: addi sp, sp, 16
+; CHECK-NEXT: ret
+ %v = call <vscale x 32 x bfloat> @llvm.vp.fadd.nxv32bf16(<vscale x 32 x bfloat> %va, <vscale x 32 x bfloat> %b, <vscale x 32 x i1> %m, i32 %evl)
+ ret <vscale x 32 x bfloat> %v
+}
+
+define <vscale x 32 x bfloat> @vfadd_vv_nxv32bf16_unmasked(<vscale x 32 x bfloat> %va, <vscale x 32 x bfloat> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vfadd_vv_nxv32bf16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi sp, sp, -16
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: csrr a1, vlenb
+; CHECK-NEXT: slli a1, a1, 3
+; CHECK-NEXT: sub sp, sp, a1
+; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
+; CHECK-NEXT: csrr a2, vlenb
+; CHECK-NEXT: slli a1, a2, 1
+; CHECK-NEXT: sub a3, a0, a1
+; CHECK-NEXT: sltu a4, a0, a3
+; CHECK-NEXT: addi a4, a4, -1
+; CHECK-NEXT: and a3, a4, a3
+; CHECK-NEXT: srli a2, a2, 2
+; CHECK-NEXT: vsetvli a4, zero, e8, m4, ta, ma
+; CHECK-NEXT: vmset.m v24
+; CHECK-NEXT: vsetvli a4, zero, e8, mf2, ta, ma
+; CHECK-NEXT: vslidedown.vx v0, v24, a2
+; CHECK-NEXT: addi a2, sp, 16
+; CHECK-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill
+; CHECK-NEXT: vsetvli a2, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v20
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v12
+; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, ma
+; CHECK-NEXT: vfadd.vv v16, v16, v24, v0.t
+; CHECK-NEXT: vsetvli a2, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v12, v16
+; CHECK-NEXT: bltu a0, a1, .LBB23_2
+; CHECK-NEXT: # %bb.1:
+; CHECK-NEXT: mv a0, a1
+; CHECK-NEXT: .LBB23_2:
+; CHECK-NEXT: addi a1, sp, 16
+; CHECK-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v24
+; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v8
+; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma
+; CHECK-NEXT: vfadd.vv v16, v24, v16
+; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: slli a0, a0, 3
+; CHECK-NEXT: add sp, sp, a0
+; CHECK-NEXT: addi sp, sp, 16
+; CHECK-NEXT: ret
+ %v = call <vscale x 32 x bfloat> @llvm.vp.fadd.nxv32bf16(<vscale x 32 x bfloat> %va, <vscale x 32 x bfloat> %b, <vscale x 32 x i1> splat (i1 true), i32 %evl)
+ ret <vscale x 32 x bfloat> %v
+}
+
+define <vscale x 32 x bfloat> @vfadd_vf_nxv32bf16(<vscale x 32 x bfloat> %va, bfloat %b, <vscale x 32 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vfadd_vf_nxv32bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi sp, sp, -16
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: csrr a1, vlenb
+; CHECK-NEXT: slli a1, a1, 1
+; CHECK-NEXT: mv a2, a1
+; CHECK-NEXT: slli a1, a1, 3
+; CHECK-NEXT: add a1, a1, a2
+; CHECK-NEXT: sub sp, sp, a1
+; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x12, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 18 * vlenb
+; CHECK-NEXT: vmv8r.v v24, v8
+; CHECK-NEXT: fmv.x.h a1, fa0
+; CHECK-NEXT: vsetvli a2, zero, e16, m8, ta, ma
+; CHECK-NEXT: vmv.v.x v16, a1
+; CHECK-NEXT: csrr a1, vlenb
+; CHECK-NEXT: slli a2, a1, 3
+; CHECK-NEXT: add a1, a2, a1
+; CHECK-NEXT: add a1, sp, a1
+; CHECK-NEXT: addi a1, a1, 16
+; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill
+; CHECK-NEXT: csrr a2, vlenb
+; CHECK-NEXT: slli a1, a2, 1
+; CHECK-NEXT: sub a3, a0, a1
+; CHECK-NEXT: sltu a4, a0, a3
+; CHECK-NEXT: addi a4, a4, -1
+; CHECK-NEXT: and a3, a4, a3
+; CHECK-NEXT: srli a2, a2, 2
+; CHECK-NEXT: csrr a4, vlenb
+; CHECK-NEXT: slli a4, a4, 3
+; CHECK-NEXT: add a4, sp, a4
+; CHECK-NEXT: addi a4, a4, 16
+; CHECK-NEXT: vs1r.v v0, (a4) # Unknown-size Folded Spill
+; CHECK-NEXT: vsetvli a4, zero, e8, mf2, ta, ma
+; CHECK-NEXT: vslidedown.vx v0, v0, a2
+; CHECK-NEXT: addi a2, sp, 16
+; CHECK-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill
+; CHECK-NEXT: vsetvli a2, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v28
+; CHECK-NEXT: csrr a2, vlenb
+; CHECK-NEXT: slli a4, a2, 3
+; CHECK-NEXT: add a2, a4, a2
+; CHECK-NEXT: add a2, sp, a2
+; CHECK-NEXT: addi a2, a2, 16
+; CHECK-NEXT: vl8r.v v24, (a2) # Unknown-size Folded Reload
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v28
+; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, ma
+; CHECK-NEXT: vfadd.vv v16, v8, v16, v0.t
+; CHECK-NEXT: vsetvli a2, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v12, v16
+; CHECK-NEXT: bltu a0, a1, .LBB24_2
+; CHECK-NEXT: # %bb.1:
+; CHECK-NEXT: mv a0, a1
+; CHECK-NEXT: .LBB24_2:
+; CHECK-NEXT: addi a1, sp, 16
+; CHECK-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v24
+; CHECK-NEXT: csrr a1, vlenb
+; CHECK-NEXT: slli a2, a1, 3
+; CHECK-NEXT: add a1, a2, a1
+; CHECK-NEXT: add a1, sp, a1
+; CHECK-NEXT: addi a1, a1, 16
+; CHECK-NEXT: vl8r.v v0, (a1) # Unknown-size Folded Reload
+; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v0
+; CHECK-NEXT: csrr a1, vlenb
+; CHECK-NEXT: slli a1, a1, 3
+; CHECK-NEXT: add a1, sp, a1
+; CHECK-NEXT: addi a1, a1, 16
+; CHECK-NEXT: vl1r.v v0, (a1) # Unknown-size Folded Reload
+; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma
+; CHECK-NEXT: vfadd.vv v16, v16, v24, v0.t
+; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: slli a0, a0, 1
+; CHECK-NEXT: mv a1, a0
+; CHECK-NEXT: slli a0, a0, 3
+; CHECK-NEXT: add a0, a0, a1
+; CHECK-NEXT: add sp, sp, a0
+; CHECK-NEXT: addi sp, sp, 16
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 32 x bfloat> poison, bfloat %b, i32 0
+ %vb = shufflevector <vscale x 32 x bfloat> %elt.head, <vscale x 32 x bfloat> poison, <vscale x 32 x i32> zeroinitializer
+ %v = call <vscale x 32 x bfloat> @llvm.vp.fadd.nxv32bf16(<vscale x 32 x bfloat> %va, <vscale x 32 x bfloat> %vb, <vscale x 32 x i1> %m, i32 %evl)
+ ret <vscale x 32 x bfloat> %v
+}
+
+define <vscale x 32 x bfloat> @vfadd_vf_nxv32bf16_unmasked(<vscale x 32 x bfloat> %va, bfloat %b, i32 zeroext %evl) {
+; CHECK-LABEL: vfadd_vf_nxv32bf16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi sp, sp, -16
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: csrr a1, vlenb
+; CHECK-NEXT: slli a1, a1, 4
+; CHECK-NEXT: sub sp, sp, a1
+; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb
+; CHECK-NEXT: vmv8r.v v16, v8
+; CHECK-NEXT: fmv.x.h a1, fa0
+; CHECK-NEXT: vsetvli a2, zero, e16, m8, ta, ma
+; CHECK-NEXT: vmv.v.x v8, a1
+; CHECK-NEXT: csrr a2, vlenb
+; CHECK-NEXT: slli a1, a2, 1
+; CHECK-NEXT: sub a3, a0, a1
+; CHECK-NEXT: sltu a4, a0, a3
+; CHECK-NEXT: addi a4, a4, -1
+; CHECK-NEXT: and a3, a4, a3
+; CHECK-NEXT: srli a2, a2, 2
+; CHECK-NEXT: vmset.m v24
+; CHECK-NEXT: vsetvli a4, zero, e8, mf2, ta, ma
+; CHECK-NEXT: vslidedown.vx v0, v24, a2
+; CHECK-NEXT: addi a2, sp, 16
+; CHECK-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill
+; CHECK-NEXT: vsetvli a2, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v20
+; CHECK-NEXT: vmv4r.v v16, v8
+; CHECK-NEXT: csrr a2, vlenb
+; CHECK-NEXT: slli a2, a2, 3
+; CHECK-NEXT: add a2, sp, a2
+; CHECK-NEXT: addi a2, a2, 16
+; CHECK-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v12
+; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, ma
+; CHECK-NEXT: vfadd.vv v16, v24, v16, v0.t
+; CHECK-NEXT: vsetvli a2, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v12, v16
+; CHECK-NEXT: bltu a0, a1, .LBB25_2
+; CHECK-NEXT: # %bb.1:
+; CHECK-NEXT: mv a0, a1
+; CHECK-NEXT: .LBB25_2:
+; CHECK-NEXT: addi a1, sp, 16
+; CHECK-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v24
+; CHECK-NEXT: csrr a1, vlenb
+; CHECK-NEXT: slli a1, a1, 3
+; CHECK-NEXT: add a1, sp, a1
+; CHECK-NEXT: addi a1, a1, 16
+; CHECK-NEXT: vl8r.v v0, (a1) # Unknown-size Folded Reload
+; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v0
+; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma
+; CHECK-NEXT: vfadd.vv v16, v16, v24
+; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: slli a0, a0, 4
+; CHECK-NEXT: add sp, sp, a0
+; CHECK-NEXT: addi sp, sp, 16
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 32 x bfloat> poison, bfloat %b, i32 0
+ %vb = shufflevector <vscale x 32 x bfloat> %elt.head, <vscale x 32 x bfloat> poison, <vscale x 32 x i32> zeroinitializer
+ %v = call <vscale x 32 x bfloat> @llvm.vp.fadd.nxv32bf16(<vscale x 32 x bfloat> %va, <vscale x 32 x bfloat> %vb, <vscale x 32 x i1> splat (i1 true), i32 %evl)
+ ret <vscale x 32 x bfloat> %v
+}
declare <vscale x 1 x half> @llvm.vp.fadd.nxv1f16(<vscale x 1 x half>, <vscale x 1 x half>, <vscale x 1 x i1>, i32)
define <vscale x 1 x half> @vfadd_vv_nxv1f16(<vscale x 1 x half> %va, <vscale x 1 x half> %b, <vscale x 1 x i1> %m, i32 zeroext %evl) {
@@ -564,10 +1211,10 @@ define <vscale x 32 x half> @vfadd_vv_nxv32f16(<vscale x 32 x half> %va, <vscale
; ZVFHMIN-NEXT: vfadd.vv v16, v16, v24, v0.t
; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v16
-; ZVFHMIN-NEXT: bltu a0, a1, .LBB22_2
+; ZVFHMIN-NEXT: bltu a0, a1, .LBB48_2
; ZVFHMIN-NEXT: # %bb.1:
; ZVFHMIN-NEXT: mv a0, a1
-; ZVFHMIN-NEXT: .LBB22_2:
+; ZVFHMIN-NEXT: .LBB48_2:
; ZVFHMIN-NEXT: addi a1, sp, 16
; ZVFHMIN-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload
; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v24
@@ -621,10 +1268,10 @@ define <vscale x 32 x half> @vfadd_vv_nxv32f16_unmasked(<vscale x 32 x half> %va
; ZVFHMIN-NEXT: vfadd.vv v16, v16, v24, v0.t
; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v16
-; ZVFHMIN-NEXT: bltu a0, a1, .LBB23_2
+; ZVFHMIN-NEXT: bltu a0, a1, .LBB49_2
; ZVFHMIN-NEXT: # %bb.1:
; ZVFHMIN-NEXT: mv a0, a1
-; ZVFHMIN-NEXT: .LBB23_2:
+; ZVFHMIN-NEXT: .LBB49_2:
; ZVFHMIN-NEXT: addi a1, sp, 16
; ZVFHMIN-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload
; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v24
@@ -699,10 +1346,10 @@ define <vscale x 32 x half> @vfadd_vf_nxv32f16(<vscale x 32 x half> %va, half %b
; ZVFHMIN-NEXT: vfadd.vv v16, v8, v16, v0.t
; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v16
-; ZVFHMIN-NEXT: bltu a0, a1, .LBB24_2
+; ZVFHMIN-NEXT: bltu a0, a1, .LBB50_2
; ZVFHMIN-NEXT: # %bb.1:
; ZVFHMIN-NEXT: mv a0, a1
-; ZVFHMIN-NEXT: .LBB24_2:
+; ZVFHMIN-NEXT: .LBB50_2:
; ZVFHMIN-NEXT: addi a1, sp, 16
; ZVFHMIN-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload
; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v24
@@ -780,10 +1427,10 @@ define <vscale x 32 x half> @vfadd_vf_nxv32f16_unmasked(<vscale x 32 x half> %va
; ZVFHMIN-NEXT: vfadd.vv v16, v24, v16, v0.t
; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v16
-; ZVFHMIN-NEXT: bltu a0, a1, .LBB25_2
+; ZVFHMIN-NEXT: bltu a0, a1, .LBB51_2
; ZVFHMIN-NEXT: # %bb.1:
; ZVFHMIN-NEXT: mv a0, a1
-; ZVFHMIN-NEXT: .LBB25_2:
+; ZVFHMIN-NEXT: .LBB51_2:
; ZVFHMIN-NEXT: addi a1, sp, 16
; ZVFHMIN-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload
; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v24
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfclass-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vfclass-sdnode.ll
index f2af8ac3b02d..c97278480f1a 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfclass-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfclass-sdnode.ll
@@ -1,18 +1,51 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v -target-abi=ilp32d \
-; RUN: -verify-machineinstrs < %s | FileCheck %s
-; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -target-abi=lp64d \
-; RUN: -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+zfbfmin,+zvfbfmin,+v \
+; RUN: -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s \
+; RUN: --check-prefixes=CHECK,ZVFH
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+zfbfmin,+zvfbfmin,+v \
+; RUN: -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \
+; RUN: --check-prefixes=CHECK,ZVFH
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin,+v \
+; RUN: -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s \
+; RUN: --check-prefixes=CHECK,ZVFHMIN
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin,+v \
+; RUN: -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \
+; RUN: --check-prefixes=CHECK,ZVFHMIN
-define <vscale x 2 x i1> @isnan_nxv2f16(<vscale x 2 x half> %x) {
-; CHECK-LABEL: isnan_nxv2f16:
+define <vscale x 2 x i1> @isnan_nxv2bf16(<vscale x 2 x bfloat> %x) {
+; CHECK-LABEL: isnan_nxv2bf16:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
-; CHECK-NEXT: vfclass.v v8, v8
-; CHECK-NEXT: li a0, 768
-; CHECK-NEXT: vand.vx v8, v8, a0
-; CHECK-NEXT: vmsne.vi v0, v8, 0
+; CHECK-NEXT: lui a0, 8
+; CHECK-NEXT: addi a1, a0, -1
+; CHECK-NEXT: vsetvli a2, zero, e16, mf2, ta, ma
+; CHECK-NEXT: vand.vx v8, v8, a1
+; CHECK-NEXT: addi a0, a0, -128
+; CHECK-NEXT: vmsgt.vx v0, v8, a0
; CHECK-NEXT: ret
+ %1 = call <vscale x 2 x i1> @llvm.is.fpclass.nxv2bf16(<vscale x 2 x bfloat> %x, i32 3) ; nan
+ ret <vscale x 2 x i1> %1
+}
+
+define <vscale x 2 x i1> @isnan_nxv2f16(<vscale x 2 x half> %x) {
+; ZVFH-LABEL: isnan_nxv2f16:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
+; ZVFH-NEXT: vfclass.v v8, v8
+; ZVFH-NEXT: li a0, 768
+; ZVFH-NEXT: vand.vx v8, v8, a0
+; ZVFH-NEXT: vmsne.vi v0, v8, 0
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: isnan_nxv2f16:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: lui a0, 8
+; ZVFHMIN-NEXT: addi a0, a0, -1
+; ZVFHMIN-NEXT: vsetvli a1, zero, e16, mf2, ta, ma
+; ZVFHMIN-NEXT: vand.vx v8, v8, a0
+; ZVFHMIN-NEXT: li a0, 31
+; ZVFHMIN-NEXT: slli a0, a0, 10
+; ZVFHMIN-NEXT: vmsgt.vx v0, v8, a0
+; ZVFHMIN-NEXT: ret
%1 = call <vscale x 2 x i1> @llvm.is.fpclass.nxv2f16(<vscale x 2 x half> %x, i32 3) ; nan
ret <vscale x 2 x i1> %1
}
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfdiv-constrained-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vfdiv-constrained-sdnode.ll
index 69095a0b21bb..aa59732e1e1e 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfdiv-constrained-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfdiv-constrained-sdnode.ll
@@ -1,12 +1,258 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v -target-abi=ilp32d \
-; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH
-; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -target-abi=lp64d \
-; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH
-; RUN: llc -mtriple=riscv32 -mattr=+d,+zfhmin,+zvfhmin,+v -target-abi=ilp32d \
-; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN
-; RUN: llc -mtriple=riscv64 -mattr=+d,+zfhmin,+zvfhmin,+v -target-abi=lp64d \
-; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+zfbfmin,+zvfbfmin,+v \
+; RUN: -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s \
+; RUN: --check-prefixes=CHECK,ZVFH
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+zfbfmin,+zvfbfmin,+v \
+; RUN: -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \
+; RUN: --check-prefixes=CHECK,ZVFH
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin,+v \
+; RUN: -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s \
+; RUN: --check-prefixes=CHECK,ZVFHMIN
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin,+v \
+; RUN: -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \
+; RUN: --check-prefixes=CHECK,ZVFHMIN
+
+define <vscale x 1 x bfloat> @vfdiv_vv_nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %vb) strictfp {
+; CHECK-LABEL: vfdiv_vv_nxv1bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v9
+; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8
+; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
+; CHECK-NEXT: vfdiv.vv v9, v9, v10
+; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9
+; CHECK-NEXT: ret
+entry:
+ %vc = call <vscale x 1 x bfloat> @llvm.experimental.constrained.fdiv.nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %vb, metadata !"round.dynamic", metadata !"fpexcept.ignore")
+ ret <vscale x 1 x bfloat> %vc
+}
+
+define <vscale x 1 x bfloat> @vfdiv_vf_nxv1bf16(<vscale x 1 x bfloat> %va, bfloat %b) strictfp {
+; CHECK-LABEL: vfdiv_vf_nxv1bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: fmv.x.h a0, fa0
+; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vmv.v.x v9, a0
+; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8
+; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v9
+; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
+; CHECK-NEXT: vfdiv.vv v9, v10, v8
+; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 1 x bfloat> poison, bfloat %b, i32 0
+ %splat = shufflevector <vscale x 1 x bfloat> %head, <vscale x 1 x bfloat> poison, <vscale x 1 x i32> zeroinitializer
+ %vc = call <vscale x 1 x bfloat> @llvm.experimental.constrained.fdiv.nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %splat, metadata !"round.dynamic", metadata !"fpexcept.ignore")
+ ret <vscale x 1 x bfloat> %vc
+}
+
+define <vscale x 2 x bfloat> @vfdiv_vv_nxv2bf16(<vscale x 2 x bfloat> %va, <vscale x 2 x bfloat> %vb) strictfp {
+; CHECK-LABEL: vfdiv_vv_nxv2bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v9
+; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8
+; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; CHECK-NEXT: vfdiv.vv v9, v9, v10
+; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9
+; CHECK-NEXT: ret
+entry:
+ %vc = call <vscale x 2 x bfloat> @llvm.experimental.constrained.fdiv.nxv2bf16(<vscale x 2 x bfloat> %va, <vscale x 2 x bfloat> %vb, metadata !"round.dynamic", metadata !"fpexcept.ignore")
+ ret <vscale x 2 x bfloat> %vc
+}
+
+define <vscale x 2 x bfloat> @vfdiv_vf_nxv2bf16(<vscale x 2 x bfloat> %va, bfloat %b) strictfp {
+; CHECK-LABEL: vfdiv_vf_nxv2bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: fmv.x.h a0, fa0
+; CHECK-NEXT: vsetvli a1, zero, e16, mf2, ta, ma
+; CHECK-NEXT: vmv.v.x v9, a0
+; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8
+; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v9
+; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; CHECK-NEXT: vfdiv.vv v9, v10, v8
+; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 2 x bfloat> poison, bfloat %b, i32 0
+ %splat = shufflevector <vscale x 2 x bfloat> %head, <vscale x 2 x bfloat> poison, <vscale x 2 x i32> zeroinitializer
+ %vc = call <vscale x 2 x bfloat> @llvm.experimental.constrained.fdiv.nxv2bf16(<vscale x 2 x bfloat> %va, <vscale x 2 x bfloat> %splat, metadata !"round.dynamic", metadata !"fpexcept.ignore")
+ ret <vscale x 2 x bfloat> %vc
+}
+
+define <vscale x 4 x bfloat> @vfdiv_vv_nxv4bf16(<vscale x 4 x bfloat> %va, <vscale x 4 x bfloat> %vb) strictfp {
+; CHECK-LABEL: vfdiv_vv_nxv4bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v9
+; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8
+; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; CHECK-NEXT: vfdiv.vv v10, v12, v10
+; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10
+; CHECK-NEXT: ret
+entry:
+ %vc = call <vscale x 4 x bfloat> @llvm.experimental.constrained.fdiv.nxv4bf16(<vscale x 4 x bfloat> %va, <vscale x 4 x bfloat> %vb, metadata !"round.dynamic", metadata !"fpexcept.ignore")
+ ret <vscale x 4 x bfloat> %vc
+}
+
+define <vscale x 4 x bfloat> @vfdiv_vf_nxv4bf16(<vscale x 4 x bfloat> %va, bfloat %b) strictfp {
+; CHECK-LABEL: vfdiv_vf_nxv4bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: fmv.x.h a0, fa0
+; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma
+; CHECK-NEXT: vmv.v.x v9, a0
+; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8
+; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v9
+; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; CHECK-NEXT: vfdiv.vv v10, v10, v12
+; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 4 x bfloat> poison, bfloat %b, i32 0
+ %splat = shufflevector <vscale x 4 x bfloat> %head, <vscale x 4 x bfloat> poison, <vscale x 4 x i32> zeroinitializer
+ %vc = call <vscale x 4 x bfloat> @llvm.experimental.constrained.fdiv.nxv4bf16(<vscale x 4 x bfloat> %va, <vscale x 4 x bfloat> %splat, metadata !"round.dynamic", metadata !"fpexcept.ignore")
+ ret <vscale x 4 x bfloat> %vc
+}
+
+define <vscale x 8 x bfloat> @vfdiv_vv_nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %vb) strictfp {
+; CHECK-LABEL: vfdiv_vv_nxv8bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v10
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8
+; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; CHECK-NEXT: vfdiv.vv v12, v16, v12
+; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12
+; CHECK-NEXT: ret
+entry:
+ %vc = call <vscale x 8 x bfloat> @llvm.experimental.constrained.fdiv.nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %vb, metadata !"round.dynamic", metadata !"fpexcept.ignore")
+ ret <vscale x 8 x bfloat> %vc
+}
+
+define <vscale x 8 x bfloat> @vfdiv_vf_nxv8bf16(<vscale x 8 x bfloat> %va, bfloat %b) strictfp {
+; CHECK-LABEL: vfdiv_vf_nxv8bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: fmv.x.h a0, fa0
+; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma
+; CHECK-NEXT: vmv.v.x v10, a0
+; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v10
+; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; CHECK-NEXT: vfdiv.vv v12, v12, v16
+; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0
+ %splat = shufflevector <vscale x 8 x bfloat> %head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer
+ %vc = call <vscale x 8 x bfloat> @llvm.experimental.constrained.fdiv.nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %splat, metadata !"round.dynamic", metadata !"fpexcept.ignore")
+ ret <vscale x 8 x bfloat> %vc
+}
+
+define <vscale x 8 x bfloat> @vfdiv_fv_nxv8bf16(<vscale x 8 x bfloat> %va, bfloat %b) strictfp {
+; CHECK-LABEL: vfdiv_fv_nxv8bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: fmv.x.h a0, fa0
+; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma
+; CHECK-NEXT: vmv.v.x v10, a0
+; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v10
+; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; CHECK-NEXT: vfdiv.vv v12, v16, v12
+; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0
+ %splat = shufflevector <vscale x 8 x bfloat> %head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer
+ %vc = call <vscale x 8 x bfloat> @llvm.experimental.constrained.fdiv.nxv8bf16(<vscale x 8 x bfloat> %splat, <vscale x 8 x bfloat> %va, metadata !"round.dynamic", metadata !"fpexcept.ignore")
+ ret <vscale x 8 x bfloat> %vc
+}
+
+define <vscale x 16 x bfloat> @vfdiv_vv_nxv16bf16(<vscale x 16 x bfloat> %va, <vscale x 16 x bfloat> %vb) strictfp {
+; CHECK-LABEL: vfdiv_vv_nxv16bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v12
+; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v8
+; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; CHECK-NEXT: vfdiv.vv v16, v24, v16
+; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16
+; CHECK-NEXT: ret
+entry:
+ %vc = call <vscale x 16 x bfloat> @llvm.experimental.constrained.fdiv.nxv16bf16(<vscale x 16 x bfloat> %va, <vscale x 16 x bfloat> %vb, metadata !"round.dynamic", metadata !"fpexcept.ignore")
+ ret <vscale x 16 x bfloat> %vc
+}
+
+define <vscale x 16 x bfloat> @vfdiv_vf_nxv16bf16(<vscale x 16 x bfloat> %va, bfloat %b) strictfp {
+; CHECK-LABEL: vfdiv_vf_nxv16bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: fmv.x.h a0, fa0
+; CHECK-NEXT: vsetvli a1, zero, e16, m4, ta, ma
+; CHECK-NEXT: vmv.v.x v12, a0
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8
+; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v12
+; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; CHECK-NEXT: vfdiv.vv v16, v16, v24
+; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 16 x bfloat> poison, bfloat %b, i32 0
+ %splat = shufflevector <vscale x 16 x bfloat> %head, <vscale x 16 x bfloat> poison, <vscale x 16 x i32> zeroinitializer
+ %vc = call <vscale x 16 x bfloat> @llvm.experimental.constrained.fdiv.nxv16bf16(<vscale x 16 x bfloat> %va, <vscale x 16 x bfloat> %splat, metadata !"round.dynamic", metadata !"fpexcept.ignore")
+ ret <vscale x 16 x bfloat> %vc
+}
+
+define <vscale x 32 x bfloat> @vfdiv_vv_nxv32bf16(<vscale x 32 x bfloat> %va, <vscale x 32 x bfloat> %vb) strictfp {
+; CHECK-LABEL: vfdiv_vv_nxv32bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v16
+; CHECK-NEXT: vfwcvtbf16.f.f.v v0, v8
+; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; CHECK-NEXT: vfdiv.vv v24, v0, v24
+; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v24
+; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v20
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v12
+; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; CHECK-NEXT: vfdiv.vv v16, v16, v24
+; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v12, v16
+; CHECK-NEXT: ret
+entry:
+ %vc = call <vscale x 32 x bfloat> @llvm.experimental.constrained.fdiv.nxv32bf16(<vscale x 32 x bfloat> %va, <vscale x 32 x bfloat> %vb, metadata !"round.dynamic", metadata !"fpexcept.ignore")
+ ret <vscale x 32 x bfloat> %vc
+}
+
+define <vscale x 32 x bfloat> @vfdiv_vf_nxv32bf16(<vscale x 32 x bfloat> %va, bfloat %b) strictfp {
+; CHECK-LABEL: vfdiv_vf_nxv32bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: fmv.x.h a0, fa0
+; CHECK-NEXT: vsetvli a1, zero, e16, m8, ta, ma
+; CHECK-NEXT: vmv.v.x v16, a0
+; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v8
+; CHECK-NEXT: vfwcvtbf16.f.f.v v0, v16
+; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; CHECK-NEXT: vfdiv.vv v24, v24, v0
+; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v24
+; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v20
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v12
+; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; CHECK-NEXT: vfdiv.vv v16, v16, v24
+; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v12, v16
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 32 x bfloat> poison, bfloat %b, i32 0
+ %splat = shufflevector <vscale x 32 x bfloat> %head, <vscale x 32 x bfloat> poison, <vscale x 32 x i32> zeroinitializer
+ %vc = call <vscale x 32 x bfloat> @llvm.experimental.constrained.fdiv.nxv32bf16(<vscale x 32 x bfloat> %va, <vscale x 32 x bfloat> %splat, metadata !"round.dynamic", metadata !"fpexcept.ignore")
+ ret <vscale x 32 x bfloat> %vc
+}
declare <vscale x 1 x half> @llvm.experimental.constrained.fdiv.nxv1f16(<vscale x 1 x half>, <vscale x 1 x half>, metadata, metadata)
define <vscale x 1 x half> @vfdiv_vv_nxv1f16(<vscale x 1 x half> %va, <vscale x 1 x half> %vb) strictfp {
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfdiv-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vfdiv-sdnode.ll
index 9f5434dd3472..f7db2be35d72 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfdiv-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfdiv-sdnode.ll
@@ -1,13 +1,249 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v -target-abi=ilp32d \
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+zfbfmin,+zvfbfmin,+v -target-abi=ilp32d \
; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH
-; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -target-abi=lp64d \
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+zfbfmin,+zvfbfmin,+v -target-abi=lp64d \
; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH
-; RUN: llc -mtriple=riscv32 -mattr=+d,+zfhmin,+zvfhmin,+v -target-abi=ilp32d \
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin,+v -target-abi=ilp32d \
; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN
-; RUN: llc -mtriple=riscv64 -mattr=+d,+zfhmin,+zvfhmin,+v -target-abi=lp64d \
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin,+v -target-abi=lp64d \
; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN
+define <vscale x 1 x bfloat> @vfdiv_vv_nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %vb) {
+; CHECK-LABEL: vfdiv_vv_nxv1bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v9
+; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8
+; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
+; CHECK-NEXT: vfdiv.vv v9, v9, v10
+; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9
+; CHECK-NEXT: ret
+ %vc = fdiv <vscale x 1 x bfloat> %va, %vb
+ ret <vscale x 1 x bfloat> %vc
+}
+
+define <vscale x 1 x bfloat> @vfdiv_vf_nxv1bf16(<vscale x 1 x bfloat> %va, bfloat %b) {
+; CHECK-LABEL: vfdiv_vf_nxv1bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: fmv.x.h a0, fa0
+; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vmv.v.x v9, a0
+; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8
+; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v9
+; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
+; CHECK-NEXT: vfdiv.vv v9, v10, v8
+; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 1 x bfloat> poison, bfloat %b, i32 0
+ %splat = shufflevector <vscale x 1 x bfloat> %head, <vscale x 1 x bfloat> poison, <vscale x 1 x i32> zeroinitializer
+ %vc = fdiv <vscale x 1 x bfloat> %va, %splat
+ ret <vscale x 1 x bfloat> %vc
+}
+
+define <vscale x 2 x bfloat> @vfdiv_vv_nxv2bf16(<vscale x 2 x bfloat> %va, <vscale x 2 x bfloat> %vb) {
+; CHECK-LABEL: vfdiv_vv_nxv2bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v9
+; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8
+; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; CHECK-NEXT: vfdiv.vv v9, v9, v10
+; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9
+; CHECK-NEXT: ret
+ %vc = fdiv <vscale x 2 x bfloat> %va, %vb
+ ret <vscale x 2 x bfloat> %vc
+}
+
+define <vscale x 2 x bfloat> @vfdiv_vf_nxv2bf16(<vscale x 2 x bfloat> %va, bfloat %b) {
+; CHECK-LABEL: vfdiv_vf_nxv2bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: fmv.x.h a0, fa0
+; CHECK-NEXT: vsetvli a1, zero, e16, mf2, ta, ma
+; CHECK-NEXT: vmv.v.x v9, a0
+; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8
+; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v9
+; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; CHECK-NEXT: vfdiv.vv v9, v10, v8
+; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 2 x bfloat> poison, bfloat %b, i32 0
+ %splat = shufflevector <vscale x 2 x bfloat> %head, <vscale x 2 x bfloat> poison, <vscale x 2 x i32> zeroinitializer
+ %vc = fdiv <vscale x 2 x bfloat> %va, %splat
+ ret <vscale x 2 x bfloat> %vc
+}
+
+define <vscale x 4 x bfloat> @vfdiv_vv_nxv4bf16(<vscale x 4 x bfloat> %va, <vscale x 4 x bfloat> %vb) {
+; CHECK-LABEL: vfdiv_vv_nxv4bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v9
+; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8
+; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; CHECK-NEXT: vfdiv.vv v10, v12, v10
+; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10
+; CHECK-NEXT: ret
+ %vc = fdiv <vscale x 4 x bfloat> %va, %vb
+ ret <vscale x 4 x bfloat> %vc
+}
+
+define <vscale x 4 x bfloat> @vfdiv_vf_nxv4bf16(<vscale x 4 x bfloat> %va, bfloat %b) {
+; CHECK-LABEL: vfdiv_vf_nxv4bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: fmv.x.h a0, fa0
+; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma
+; CHECK-NEXT: vmv.v.x v9, a0
+; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8
+; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v9
+; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; CHECK-NEXT: vfdiv.vv v10, v10, v12
+; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 4 x bfloat> poison, bfloat %b, i32 0
+ %splat = shufflevector <vscale x 4 x bfloat> %head, <vscale x 4 x bfloat> poison, <vscale x 4 x i32> zeroinitializer
+ %vc = fdiv <vscale x 4 x bfloat> %va, %splat
+ ret <vscale x 4 x bfloat> %vc
+}
+
+define <vscale x 8 x bfloat> @vfdiv_vv_nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %vb) {
+; CHECK-LABEL: vfdiv_vv_nxv8bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v10
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8
+; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; CHECK-NEXT: vfdiv.vv v12, v16, v12
+; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12
+; CHECK-NEXT: ret
+ %vc = fdiv <vscale x 8 x bfloat> %va, %vb
+ ret <vscale x 8 x bfloat> %vc
+}
+
+define <vscale x 8 x bfloat> @vfdiv_vf_nxv8bf16(<vscale x 8 x bfloat> %va, bfloat %b) {
+; CHECK-LABEL: vfdiv_vf_nxv8bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: fmv.x.h a0, fa0
+; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma
+; CHECK-NEXT: vmv.v.x v10, a0
+; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v10
+; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; CHECK-NEXT: vfdiv.vv v12, v12, v16
+; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0
+ %splat = shufflevector <vscale x 8 x bfloat> %head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer
+ %vc = fdiv <vscale x 8 x bfloat> %va, %splat
+ ret <vscale x 8 x bfloat> %vc
+}
+
+define <vscale x 8 x bfloat> @vfdiv_fv_nxv8bf16(<vscale x 8 x bfloat> %va, bfloat %b) {
+; CHECK-LABEL: vfdiv_fv_nxv8bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: fmv.x.h a0, fa0
+; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma
+; CHECK-NEXT: vmv.v.x v10, a0
+; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v10
+; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; CHECK-NEXT: vfdiv.vv v12, v16, v12
+; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0
+ %splat = shufflevector <vscale x 8 x bfloat> %head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer
+ %vc = fdiv <vscale x 8 x bfloat> %splat, %va
+ ret <vscale x 8 x bfloat> %vc
+}
+
+define <vscale x 16 x bfloat> @vfdiv_vv_nxv16bf16(<vscale x 16 x bfloat> %va, <vscale x 16 x bfloat> %vb) {
+; CHECK-LABEL: vfdiv_vv_nxv16bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v12
+; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v8
+; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; CHECK-NEXT: vfdiv.vv v16, v24, v16
+; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16
+; CHECK-NEXT: ret
+ %vc = fdiv <vscale x 16 x bfloat> %va, %vb
+ ret <vscale x 16 x bfloat> %vc
+}
+
+define <vscale x 16 x bfloat> @vfdiv_vf_nxv16bf16(<vscale x 16 x bfloat> %va, bfloat %b) {
+; CHECK-LABEL: vfdiv_vf_nxv16bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: fmv.x.h a0, fa0
+; CHECK-NEXT: vsetvli a1, zero, e16, m4, ta, ma
+; CHECK-NEXT: vmv.v.x v12, a0
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8
+; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v12
+; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; CHECK-NEXT: vfdiv.vv v16, v16, v24
+; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 16 x bfloat> poison, bfloat %b, i32 0
+ %splat = shufflevector <vscale x 16 x bfloat> %head, <vscale x 16 x bfloat> poison, <vscale x 16 x i32> zeroinitializer
+ %vc = fdiv <vscale x 16 x bfloat> %va, %splat
+ ret <vscale x 16 x bfloat> %vc
+}
+
+define <vscale x 32 x bfloat> @vfdiv_vv_nxv32bf16(<vscale x 32 x bfloat> %va, <vscale x 32 x bfloat> %vb) {
+; CHECK-LABEL: vfdiv_vv_nxv32bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v16
+; CHECK-NEXT: vfwcvtbf16.f.f.v v0, v8
+; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; CHECK-NEXT: vfdiv.vv v24, v0, v24
+; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v24
+; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v20
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v12
+; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; CHECK-NEXT: vfdiv.vv v16, v16, v24
+; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v12, v16
+; CHECK-NEXT: ret
+ %vc = fdiv <vscale x 32 x bfloat> %va, %vb
+ ret <vscale x 32 x bfloat> %vc
+}
+
+define <vscale x 32 x bfloat> @vfdiv_vf_nxv32bf16(<vscale x 32 x bfloat> %va, bfloat %b) {
+; CHECK-LABEL: vfdiv_vf_nxv32bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: fmv.x.h a0, fa0
+; CHECK-NEXT: vsetvli a1, zero, e16, m8, ta, ma
+; CHECK-NEXT: vmv.v.x v16, a0
+; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v8
+; CHECK-NEXT: vfwcvtbf16.f.f.v v0, v16
+; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; CHECK-NEXT: vfdiv.vv v24, v24, v0
+; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v24
+; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v12
+; CHECK-NEXT: vfwcvtbf16.f.f.v v0, v20
+; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; CHECK-NEXT: vfdiv.vv v16, v24, v0
+; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v12, v16
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 32 x bfloat> poison, bfloat %b, i32 0
+ %splat = shufflevector <vscale x 32 x bfloat> %head, <vscale x 32 x bfloat> poison, <vscale x 32 x i32> zeroinitializer
+ %vc = fdiv <vscale x 32 x bfloat> %va, %splat
+ ret <vscale x 32 x bfloat> %vc
+}
+
define <vscale x 1 x half> @vfdiv_vv_nxv1f16(<vscale x 1 x half> %va, <vscale x 1 x half> %vb) {
; ZVFH-LABEL: vfdiv_vv_nxv1f16:
; ZVFH: # %bb.0:
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfdiv-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vfdiv-vp.ll
index 52e2a9535ef6..aa39fe5b5ec8 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfdiv-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfdiv-vp.ll
@@ -1,13 +1,622 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v -target-abi=ilp32d \
-; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH
-; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -target-abi=lp64d \
-; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH
-; RUN: llc -mtriple=riscv32 -mattr=+d,+zfhmin,+zvfhmin,+v -target-abi=ilp32d \
-; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN
-; RUN: llc -mtriple=riscv64 -mattr=+d,+zfhmin,+zvfhmin,+v -target-abi=lp64d \
-; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+zfbfmin,+zvfbfmin,+v \
+; RUN: -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s \
+; RUN: --check-prefixes=CHECK,ZVFH
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+zfbfmin,+zvfbfmin,+v \
+; RUN: -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \
+; RUN: --check-prefixes=CHECK,ZVFH
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin,+v \
+; RUN: -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s \
+; RUN: --check-prefixes=CHECK,ZVFHMIN
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin,+v \
+; RUN: -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \
+; RUN: --check-prefixes=CHECK,ZVFHMIN
+
+declare <vscale x 1 x bfloat> @llvm.vp.fdiv.nxv1bf16(<vscale x 1 x bfloat>, <vscale x 1 x bfloat>, <vscale x 1 x i1>, i32)
+
+define <vscale x 1 x bfloat> @vfdiv_vv_nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %b, <vscale x 1 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vfdiv_vv_nxv1bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v9
+; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8
+; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-NEXT: vfdiv.vv v9, v9, v10, v0.t
+; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9
+; CHECK-NEXT: ret
+ %v = call <vscale x 1 x bfloat> @llvm.vp.fdiv.nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %b, <vscale x 1 x i1> %m, i32 %evl)
+ ret <vscale x 1 x bfloat> %v
+}
+
+define <vscale x 1 x bfloat> @vfdiv_vv_nxv1bf16_unmasked(<vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vfdiv_vv_nxv1bf16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v9
+; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8
+; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-NEXT: vfdiv.vv v9, v9, v10
+; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9
+; CHECK-NEXT: ret
+ %v = call <vscale x 1 x bfloat> @llvm.vp.fdiv.nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %b, <vscale x 1 x i1> splat (i1 true), i32 %evl)
+ ret <vscale x 1 x bfloat> %v
+}
+
+define <vscale x 1 x bfloat> @vfdiv_vf_nxv1bf16(<vscale x 1 x bfloat> %va, bfloat %b, <vscale x 1 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vfdiv_vf_nxv1bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: fmv.x.h a1, fa0
+; CHECK-NEXT: vsetvli a2, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vmv.v.x v9, a1
+; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8
+; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v9
+; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-NEXT: vfdiv.vv v9, v10, v8, v0.t
+; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 1 x bfloat> poison, bfloat %b, i32 0
+ %vb = shufflevector <vscale x 1 x bfloat> %elt.head, <vscale x 1 x bfloat> poison, <vscale x 1 x i32> zeroinitializer
+ %v = call <vscale x 1 x bfloat> @llvm.vp.fdiv.nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %vb, <vscale x 1 x i1> %m, i32 %evl)
+ ret <vscale x 1 x bfloat> %v
+}
+
+define <vscale x 1 x bfloat> @vfdiv_vf_nxv1bf16_unmasked(<vscale x 1 x bfloat> %va, bfloat %b, i32 zeroext %evl) {
+; CHECK-LABEL: vfdiv_vf_nxv1bf16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: fmv.x.h a1, fa0
+; CHECK-NEXT: vsetvli a2, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vmv.v.x v9, a1
+; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8
+; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v9
+; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-NEXT: vfdiv.vv v9, v10, v8
+; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 1 x bfloat> poison, bfloat %b, i32 0
+ %vb = shufflevector <vscale x 1 x bfloat> %elt.head, <vscale x 1 x bfloat> poison, <vscale x 1 x i32> zeroinitializer
+ %v = call <vscale x 1 x bfloat> @llvm.vp.fdiv.nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %vb, <vscale x 1 x i1> splat (i1 true), i32 %evl)
+ ret <vscale x 1 x bfloat> %v
+}
+
+declare <vscale x 2 x bfloat> @llvm.vp.fdiv.nxv2bf16(<vscale x 2 x bfloat>, <vscale x 2 x bfloat>, <vscale x 2 x i1>, i32)
+
+define <vscale x 2 x bfloat> @vfdiv_vv_nxv2bf16(<vscale x 2 x bfloat> %va, <vscale x 2 x bfloat> %b, <vscale x 2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vfdiv_vv_nxv2bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, zero, e16, mf2, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v9
+; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8
+; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-NEXT: vfdiv.vv v9, v9, v10, v0.t
+; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9
+; CHECK-NEXT: ret
+ %v = call <vscale x 2 x bfloat> @llvm.vp.fdiv.nxv2bf16(<vscale x 2 x bfloat> %va, <vscale x 2 x bfloat> %b, <vscale x 2 x i1> %m, i32 %evl)
+ ret <vscale x 2 x bfloat> %v
+}
+
+define <vscale x 2 x bfloat> @vfdiv_vv_nxv2bf16_unmasked(<vscale x 2 x bfloat> %va, <vscale x 2 x bfloat> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vfdiv_vv_nxv2bf16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, zero, e16, mf2, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v9
+; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8
+; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-NEXT: vfdiv.vv v9, v9, v10
+; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9
+; CHECK-NEXT: ret
+ %v = call <vscale x 2 x bfloat> @llvm.vp.fdiv.nxv2bf16(<vscale x 2 x bfloat> %va, <vscale x 2 x bfloat> %b, <vscale x 2 x i1> splat (i1 true), i32 %evl)
+ ret <vscale x 2 x bfloat> %v
+}
+
+define <vscale x 2 x bfloat> @vfdiv_vf_nxv2bf16(<vscale x 2 x bfloat> %va, bfloat %b, <vscale x 2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vfdiv_vf_nxv2bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: fmv.x.h a1, fa0
+; CHECK-NEXT: vsetvli a2, zero, e16, mf2, ta, ma
+; CHECK-NEXT: vmv.v.x v9, a1
+; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8
+; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v9
+; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-NEXT: vfdiv.vv v9, v10, v8, v0.t
+; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 2 x bfloat> poison, bfloat %b, i32 0
+ %vb = shufflevector <vscale x 2 x bfloat> %elt.head, <vscale x 2 x bfloat> poison, <vscale x 2 x i32> zeroinitializer
+ %v = call <vscale x 2 x bfloat> @llvm.vp.fdiv.nxv2bf16(<vscale x 2 x bfloat> %va, <vscale x 2 x bfloat> %vb, <vscale x 2 x i1> %m, i32 %evl)
+ ret <vscale x 2 x bfloat> %v
+}
+
+define <vscale x 2 x bfloat> @vfdiv_vf_nxv2bf16_unmasked(<vscale x 2 x bfloat> %va, bfloat %b, i32 zeroext %evl) {
+; CHECK-LABEL: vfdiv_vf_nxv2bf16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: fmv.x.h a1, fa0
+; CHECK-NEXT: vsetvli a2, zero, e16, mf2, ta, ma
+; CHECK-NEXT: vmv.v.x v9, a1
+; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8
+; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v9
+; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-NEXT: vfdiv.vv v9, v10, v8
+; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 2 x bfloat> poison, bfloat %b, i32 0
+ %vb = shufflevector <vscale x 2 x bfloat> %elt.head, <vscale x 2 x bfloat> poison, <vscale x 2 x i32> zeroinitializer
+ %v = call <vscale x 2 x bfloat> @llvm.vp.fdiv.nxv2bf16(<vscale x 2 x bfloat> %va, <vscale x 2 x bfloat> %vb, <vscale x 2 x i1> splat (i1 true), i32 %evl)
+ ret <vscale x 2 x bfloat> %v
+}
+
+declare <vscale x 4 x bfloat> @llvm.vp.fdiv.nxv4bf16(<vscale x 4 x bfloat>, <vscale x 4 x bfloat>, <vscale x 4 x i1>, i32)
+
+define <vscale x 4 x bfloat> @vfdiv_vv_nxv4bf16(<vscale x 4 x bfloat> %va, <vscale x 4 x bfloat> %b, <vscale x 4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vfdiv_vv_nxv4bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v9
+; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8
+; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; CHECK-NEXT: vfdiv.vv v10, v12, v10, v0.t
+; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10
+; CHECK-NEXT: ret
+ %v = call <vscale x 4 x bfloat> @llvm.vp.fdiv.nxv4bf16(<vscale x 4 x bfloat> %va, <vscale x 4 x bfloat> %b, <vscale x 4 x i1> %m, i32 %evl)
+ ret <vscale x 4 x bfloat> %v
+}
+
+define <vscale x 4 x bfloat> @vfdiv_vv_nxv4bf16_unmasked(<vscale x 4 x bfloat> %va, <vscale x 4 x bfloat> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vfdiv_vv_nxv4bf16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v9
+; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8
+; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; CHECK-NEXT: vfdiv.vv v10, v12, v10
+; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10
+; CHECK-NEXT: ret
+ %v = call <vscale x 4 x bfloat> @llvm.vp.fdiv.nxv4bf16(<vscale x 4 x bfloat> %va, <vscale x 4 x bfloat> %b, <vscale x 4 x i1> splat (i1 true), i32 %evl)
+ ret <vscale x 4 x bfloat> %v
+}
+
+define <vscale x 4 x bfloat> @vfdiv_vf_nxv4bf16(<vscale x 4 x bfloat> %va, bfloat %b, <vscale x 4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vfdiv_vf_nxv4bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: fmv.x.h a1, fa0
+; CHECK-NEXT: vsetvli a2, zero, e16, m1, ta, ma
+; CHECK-NEXT: vmv.v.x v9, a1
+; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8
+; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v9
+; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; CHECK-NEXT: vfdiv.vv v10, v10, v12, v0.t
+; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 4 x bfloat> poison, bfloat %b, i32 0
+ %vb = shufflevector <vscale x 4 x bfloat> %elt.head, <vscale x 4 x bfloat> poison, <vscale x 4 x i32> zeroinitializer
+ %v = call <vscale x 4 x bfloat> @llvm.vp.fdiv.nxv4bf16(<vscale x 4 x bfloat> %va, <vscale x 4 x bfloat> %vb, <vscale x 4 x i1> %m, i32 %evl)
+ ret <vscale x 4 x bfloat> %v
+}
+
+define <vscale x 4 x bfloat> @vfdiv_vf_nxv4bf16_unmasked(<vscale x 4 x bfloat> %va, bfloat %b, i32 zeroext %evl) {
+; CHECK-LABEL: vfdiv_vf_nxv4bf16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: fmv.x.h a1, fa0
+; CHECK-NEXT: vsetvli a2, zero, e16, m1, ta, ma
+; CHECK-NEXT: vmv.v.x v9, a1
+; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8
+; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v9
+; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; CHECK-NEXT: vfdiv.vv v10, v10, v12
+; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 4 x bfloat> poison, bfloat %b, i32 0
+ %vb = shufflevector <vscale x 4 x bfloat> %elt.head, <vscale x 4 x bfloat> poison, <vscale x 4 x i32> zeroinitializer
+ %v = call <vscale x 4 x bfloat> @llvm.vp.fdiv.nxv4bf16(<vscale x 4 x bfloat> %va, <vscale x 4 x bfloat> %vb, <vscale x 4 x i1> splat (i1 true), i32 %evl)
+ ret <vscale x 4 x bfloat> %v
+}
+
+declare <vscale x 8 x bfloat> @llvm.vp.fdiv.nxv8bf16(<vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x i1>, i32)
+define <vscale x 8 x bfloat> @vfdiv_vv_nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %b, <vscale x 8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vfdiv_vv_nxv8bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v10
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8
+; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-NEXT: vfdiv.vv v12, v16, v12, v0.t
+; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12
+; CHECK-NEXT: ret
+ %v = call <vscale x 8 x bfloat> @llvm.vp.fdiv.nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %b, <vscale x 8 x i1> %m, i32 %evl)
+ ret <vscale x 8 x bfloat> %v
+}
+
+define <vscale x 8 x bfloat> @vfdiv_vv_nxv8bf16_unmasked(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vfdiv_vv_nxv8bf16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v10
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8
+; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-NEXT: vfdiv.vv v12, v16, v12
+; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12
+; CHECK-NEXT: ret
+ %v = call <vscale x 8 x bfloat> @llvm.vp.fdiv.nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %b, <vscale x 8 x i1> splat (i1 true), i32 %evl)
+ ret <vscale x 8 x bfloat> %v
+}
+
+define <vscale x 8 x bfloat> @vfdiv_vf_nxv8bf16(<vscale x 8 x bfloat> %va, bfloat %b, <vscale x 8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vfdiv_vf_nxv8bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: fmv.x.h a1, fa0
+; CHECK-NEXT: vsetvli a2, zero, e16, m2, ta, ma
+; CHECK-NEXT: vmv.v.x v10, a1
+; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v10
+; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-NEXT: vfdiv.vv v12, v12, v16, v0.t
+; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0
+ %vb = shufflevector <vscale x 8 x bfloat> %elt.head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer
+ %v = call <vscale x 8 x bfloat> @llvm.vp.fdiv.nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %vb, <vscale x 8 x i1> %m, i32 %evl)
+ ret <vscale x 8 x bfloat> %v
+}
+
+define <vscale x 8 x bfloat> @vfdiv_vf_nxv8bf16_unmasked(<vscale x 8 x bfloat> %va, bfloat %b, i32 zeroext %evl) {
+; CHECK-LABEL: vfdiv_vf_nxv8bf16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: fmv.x.h a1, fa0
+; CHECK-NEXT: vsetvli a2, zero, e16, m2, ta, ma
+; CHECK-NEXT: vmv.v.x v10, a1
+; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v10
+; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-NEXT: vfdiv.vv v12, v12, v16
+; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0
+ %vb = shufflevector <vscale x 8 x bfloat> %elt.head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer
+ %v = call <vscale x 8 x bfloat> @llvm.vp.fdiv.nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %vb, <vscale x 8 x i1> splat (i1 true), i32 %evl)
+ ret <vscale x 8 x bfloat> %v
+}
+
+declare <vscale x 16 x bfloat> @llvm.vp.fdiv.nxv16bf16(<vscale x 16 x bfloat>, <vscale x 16 x bfloat>, <vscale x 16 x i1>, i32)
+
+define <vscale x 16 x bfloat> @vfdiv_vv_nxv16bf16(<vscale x 16 x bfloat> %va, <vscale x 16 x bfloat> %b, <vscale x 16 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vfdiv_vv_nxv16bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v12
+; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v8
+; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma
+; CHECK-NEXT: vfdiv.vv v16, v24, v16, v0.t
+; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16
+; CHECK-NEXT: ret
+ %v = call <vscale x 16 x bfloat> @llvm.vp.fdiv.nxv16bf16(<vscale x 16 x bfloat> %va, <vscale x 16 x bfloat> %b, <vscale x 16 x i1> %m, i32 %evl)
+ ret <vscale x 16 x bfloat> %v
+}
+
+define <vscale x 16 x bfloat> @vfdiv_vv_nxv16bf16_unmasked(<vscale x 16 x bfloat> %va, <vscale x 16 x bfloat> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vfdiv_vv_nxv16bf16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v12
+; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v8
+; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma
+; CHECK-NEXT: vfdiv.vv v16, v24, v16
+; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16
+; CHECK-NEXT: ret
+ %v = call <vscale x 16 x bfloat> @llvm.vp.fdiv.nxv16bf16(<vscale x 16 x bfloat> %va, <vscale x 16 x bfloat> %b, <vscale x 16 x i1> splat (i1 true), i32 %evl)
+ ret <vscale x 16 x bfloat> %v
+}
+
+define <vscale x 16 x bfloat> @vfdiv_vf_nxv16bf16(<vscale x 16 x bfloat> %va, bfloat %b, <vscale x 16 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vfdiv_vf_nxv16bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: fmv.x.h a1, fa0
+; CHECK-NEXT: vsetvli a2, zero, e16, m4, ta, ma
+; CHECK-NEXT: vmv.v.x v12, a1
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8
+; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v12
+; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma
+; CHECK-NEXT: vfdiv.vv v16, v16, v24, v0.t
+; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 16 x bfloat> poison, bfloat %b, i32 0
+ %vb = shufflevector <vscale x 16 x bfloat> %elt.head, <vscale x 16 x bfloat> poison, <vscale x 16 x i32> zeroinitializer
+ %v = call <vscale x 16 x bfloat> @llvm.vp.fdiv.nxv16bf16(<vscale x 16 x bfloat> %va, <vscale x 16 x bfloat> %vb, <vscale x 16 x i1> %m, i32 %evl)
+ ret <vscale x 16 x bfloat> %v
+}
+
+define <vscale x 16 x bfloat> @vfdiv_vf_nxv16bf16_unmasked(<vscale x 16 x bfloat> %va, bfloat %b, i32 zeroext %evl) {
+; CHECK-LABEL: vfdiv_vf_nxv16bf16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: fmv.x.h a1, fa0
+; CHECK-NEXT: vsetvli a2, zero, e16, m4, ta, ma
+; CHECK-NEXT: vmv.v.x v12, a1
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8
+; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v12
+; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma
+; CHECK-NEXT: vfdiv.vv v16, v16, v24
+; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 16 x bfloat> poison, bfloat %b, i32 0
+ %vb = shufflevector <vscale x 16 x bfloat> %elt.head, <vscale x 16 x bfloat> poison, <vscale x 16 x i32> zeroinitializer
+ %v = call <vscale x 16 x bfloat> @llvm.vp.fdiv.nxv16bf16(<vscale x 16 x bfloat> %va, <vscale x 16 x bfloat> %vb, <vscale x 16 x i1> splat (i1 true), i32 %evl)
+ ret <vscale x 16 x bfloat> %v
+}
+
+declare <vscale x 32 x bfloat> @llvm.vp.fdiv.nxv32bf16(<vscale x 32 x bfloat>, <vscale x 32 x bfloat>, <vscale x 32 x i1>, i32)
+
+define <vscale x 32 x bfloat> @vfdiv_vv_nxv32bf16(<vscale x 32 x bfloat> %va, <vscale x 32 x bfloat> %b, <vscale x 32 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vfdiv_vv_nxv32bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi sp, sp, -16
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: csrr a1, vlenb
+; CHECK-NEXT: slli a1, a1, 3
+; CHECK-NEXT: sub sp, sp, a1
+; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
+; CHECK-NEXT: vmv1r.v v7, v0
+; CHECK-NEXT: csrr a2, vlenb
+; CHECK-NEXT: slli a1, a2, 1
+; CHECK-NEXT: sub a3, a0, a1
+; CHECK-NEXT: sltu a4, a0, a3
+; CHECK-NEXT: addi a4, a4, -1
+; CHECK-NEXT: and a3, a4, a3
+; CHECK-NEXT: srli a2, a2, 2
+; CHECK-NEXT: vsetvli a4, zero, e8, mf2, ta, ma
+; CHECK-NEXT: vslidedown.vx v0, v0, a2
+; CHECK-NEXT: addi a2, sp, 16
+; CHECK-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill
+; CHECK-NEXT: vsetvli a2, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v20
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v12
+; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, ma
+; CHECK-NEXT: vfdiv.vv v16, v16, v24, v0.t
+; CHECK-NEXT: vsetvli a2, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v12, v16
+; CHECK-NEXT: bltu a0, a1, .LBB20_2
+; CHECK-NEXT: # %bb.1:
+; CHECK-NEXT: mv a0, a1
+; CHECK-NEXT: .LBB20_2:
+; CHECK-NEXT: addi a1, sp, 16
+; CHECK-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v24
+; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v8
+; CHECK-NEXT: vmv1r.v v0, v7
+; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma
+; CHECK-NEXT: vfdiv.vv v16, v24, v16, v0.t
+; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: slli a0, a0, 3
+; CHECK-NEXT: add sp, sp, a0
+; CHECK-NEXT: addi sp, sp, 16
+; CHECK-NEXT: ret
+ %v = call <vscale x 32 x bfloat> @llvm.vp.fdiv.nxv32bf16(<vscale x 32 x bfloat> %va, <vscale x 32 x bfloat> %b, <vscale x 32 x i1> %m, i32 %evl)
+ ret <vscale x 32 x bfloat> %v
+}
+
+define <vscale x 32 x bfloat> @vfdiv_vv_nxv32bf16_unmasked(<vscale x 32 x bfloat> %va, <vscale x 32 x bfloat> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vfdiv_vv_nxv32bf16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi sp, sp, -16
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: csrr a1, vlenb
+; CHECK-NEXT: slli a1, a1, 3
+; CHECK-NEXT: sub sp, sp, a1
+; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
+; CHECK-NEXT: csrr a2, vlenb
+; CHECK-NEXT: slli a1, a2, 1
+; CHECK-NEXT: sub a3, a0, a1
+; CHECK-NEXT: sltu a4, a0, a3
+; CHECK-NEXT: addi a4, a4, -1
+; CHECK-NEXT: and a3, a4, a3
+; CHECK-NEXT: srli a2, a2, 2
+; CHECK-NEXT: vsetvli a4, zero, e8, m4, ta, ma
+; CHECK-NEXT: vmset.m v24
+; CHECK-NEXT: vsetvli a4, zero, e8, mf2, ta, ma
+; CHECK-NEXT: vslidedown.vx v0, v24, a2
+; CHECK-NEXT: addi a2, sp, 16
+; CHECK-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill
+; CHECK-NEXT: vsetvli a2, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v20
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v12
+; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, ma
+; CHECK-NEXT: vfdiv.vv v16, v16, v24, v0.t
+; CHECK-NEXT: vsetvli a2, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v12, v16
+; CHECK-NEXT: bltu a0, a1, .LBB21_2
+; CHECK-NEXT: # %bb.1:
+; CHECK-NEXT: mv a0, a1
+; CHECK-NEXT: .LBB21_2:
+; CHECK-NEXT: addi a1, sp, 16
+; CHECK-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v24
+; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v8
+; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma
+; CHECK-NEXT: vfdiv.vv v16, v24, v16
+; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: slli a0, a0, 3
+; CHECK-NEXT: add sp, sp, a0
+; CHECK-NEXT: addi sp, sp, 16
+; CHECK-NEXT: ret
+ %v = call <vscale x 32 x bfloat> @llvm.vp.fdiv.nxv32bf16(<vscale x 32 x bfloat> %va, <vscale x 32 x bfloat> %b, <vscale x 32 x i1> splat (i1 true), i32 %evl)
+ ret <vscale x 32 x bfloat> %v
+}
+
+define <vscale x 32 x bfloat> @vfdiv_vf_nxv32bf16(<vscale x 32 x bfloat> %va, bfloat %b, <vscale x 32 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vfdiv_vf_nxv32bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi sp, sp, -16
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: csrr a1, vlenb
+; CHECK-NEXT: slli a1, a1, 1
+; CHECK-NEXT: mv a2, a1
+; CHECK-NEXT: slli a1, a1, 3
+; CHECK-NEXT: add a1, a1, a2
+; CHECK-NEXT: sub sp, sp, a1
+; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x12, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 18 * vlenb
+; CHECK-NEXT: vmv8r.v v24, v8
+; CHECK-NEXT: fmv.x.h a1, fa0
+; CHECK-NEXT: vsetvli a2, zero, e16, m8, ta, ma
+; CHECK-NEXT: vmv.v.x v16, a1
+; CHECK-NEXT: csrr a1, vlenb
+; CHECK-NEXT: slli a2, a1, 3
+; CHECK-NEXT: add a1, a2, a1
+; CHECK-NEXT: add a1, sp, a1
+; CHECK-NEXT: addi a1, a1, 16
+; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill
+; CHECK-NEXT: csrr a2, vlenb
+; CHECK-NEXT: slli a1, a2, 1
+; CHECK-NEXT: sub a3, a0, a1
+; CHECK-NEXT: sltu a4, a0, a3
+; CHECK-NEXT: addi a4, a4, -1
+; CHECK-NEXT: and a3, a4, a3
+; CHECK-NEXT: srli a2, a2, 2
+; CHECK-NEXT: csrr a4, vlenb
+; CHECK-NEXT: slli a4, a4, 3
+; CHECK-NEXT: add a4, sp, a4
+; CHECK-NEXT: addi a4, a4, 16
+; CHECK-NEXT: vs1r.v v0, (a4) # Unknown-size Folded Spill
+; CHECK-NEXT: vsetvli a4, zero, e8, mf2, ta, ma
+; CHECK-NEXT: vslidedown.vx v0, v0, a2
+; CHECK-NEXT: addi a2, sp, 16
+; CHECK-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill
+; CHECK-NEXT: vsetvli a2, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v28
+; CHECK-NEXT: csrr a2, vlenb
+; CHECK-NEXT: slli a4, a2, 3
+; CHECK-NEXT: add a2, a4, a2
+; CHECK-NEXT: add a2, sp, a2
+; CHECK-NEXT: addi a2, a2, 16
+; CHECK-NEXT: vl8r.v v24, (a2) # Unknown-size Folded Reload
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v28
+; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, ma
+; CHECK-NEXT: vfdiv.vv v16, v8, v16, v0.t
+; CHECK-NEXT: vsetvli a2, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v12, v16
+; CHECK-NEXT: bltu a0, a1, .LBB22_2
+; CHECK-NEXT: # %bb.1:
+; CHECK-NEXT: mv a0, a1
+; CHECK-NEXT: .LBB22_2:
+; CHECK-NEXT: addi a1, sp, 16
+; CHECK-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v24
+; CHECK-NEXT: csrr a1, vlenb
+; CHECK-NEXT: slli a2, a1, 3
+; CHECK-NEXT: add a1, a2, a1
+; CHECK-NEXT: add a1, sp, a1
+; CHECK-NEXT: addi a1, a1, 16
+; CHECK-NEXT: vl8r.v v0, (a1) # Unknown-size Folded Reload
+; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v0
+; CHECK-NEXT: csrr a1, vlenb
+; CHECK-NEXT: slli a1, a1, 3
+; CHECK-NEXT: add a1, sp, a1
+; CHECK-NEXT: addi a1, a1, 16
+; CHECK-NEXT: vl1r.v v0, (a1) # Unknown-size Folded Reload
+; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma
+; CHECK-NEXT: vfdiv.vv v16, v16, v24, v0.t
+; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: slli a0, a0, 1
+; CHECK-NEXT: mv a1, a0
+; CHECK-NEXT: slli a0, a0, 3
+; CHECK-NEXT: add a0, a0, a1
+; CHECK-NEXT: add sp, sp, a0
+; CHECK-NEXT: addi sp, sp, 16
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 32 x bfloat> poison, bfloat %b, i32 0
+ %vb = shufflevector <vscale x 32 x bfloat> %elt.head, <vscale x 32 x bfloat> poison, <vscale x 32 x i32> zeroinitializer
+ %v = call <vscale x 32 x bfloat> @llvm.vp.fdiv.nxv32bf16(<vscale x 32 x bfloat> %va, <vscale x 32 x bfloat> %vb, <vscale x 32 x i1> %m, i32 %evl)
+ ret <vscale x 32 x bfloat> %v
+}
+
+define <vscale x 32 x bfloat> @vfdiv_vf_nxv32bf16_unmasked(<vscale x 32 x bfloat> %va, bfloat %b, i32 zeroext %evl) {
+; CHECK-LABEL: vfdiv_vf_nxv32bf16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi sp, sp, -16
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: csrr a1, vlenb
+; CHECK-NEXT: slli a1, a1, 4
+; CHECK-NEXT: sub sp, sp, a1
+; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb
+; CHECK-NEXT: vmv8r.v v16, v8
+; CHECK-NEXT: fmv.x.h a1, fa0
+; CHECK-NEXT: vsetvli a2, zero, e16, m8, ta, ma
+; CHECK-NEXT: vmv.v.x v8, a1
+; CHECK-NEXT: csrr a2, vlenb
+; CHECK-NEXT: slli a1, a2, 1
+; CHECK-NEXT: sub a3, a0, a1
+; CHECK-NEXT: sltu a4, a0, a3
+; CHECK-NEXT: addi a4, a4, -1
+; CHECK-NEXT: and a3, a4, a3
+; CHECK-NEXT: srli a2, a2, 2
+; CHECK-NEXT: vmset.m v24
+; CHECK-NEXT: vsetvli a4, zero, e8, mf2, ta, ma
+; CHECK-NEXT: vslidedown.vx v0, v24, a2
+; CHECK-NEXT: addi a2, sp, 16
+; CHECK-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill
+; CHECK-NEXT: vsetvli a2, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v20
+; CHECK-NEXT: vmv4r.v v16, v8
+; CHECK-NEXT: csrr a2, vlenb
+; CHECK-NEXT: slli a2, a2, 3
+; CHECK-NEXT: add a2, sp, a2
+; CHECK-NEXT: addi a2, a2, 16
+; CHECK-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v12
+; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, ma
+; CHECK-NEXT: vfdiv.vv v16, v24, v16, v0.t
+; CHECK-NEXT: vsetvli a2, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v12, v16
+; CHECK-NEXT: bltu a0, a1, .LBB23_2
+; CHECK-NEXT: # %bb.1:
+; CHECK-NEXT: mv a0, a1
+; CHECK-NEXT: .LBB23_2:
+; CHECK-NEXT: addi a1, sp, 16
+; CHECK-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v24
+; CHECK-NEXT: csrr a1, vlenb
+; CHECK-NEXT: slli a1, a1, 3
+; CHECK-NEXT: add a1, sp, a1
+; CHECK-NEXT: addi a1, a1, 16
+; CHECK-NEXT: vl8r.v v0, (a1) # Unknown-size Folded Reload
+; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v0
+; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma
+; CHECK-NEXT: vfdiv.vv v16, v16, v24
+; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: slli a0, a0, 4
+; CHECK-NEXT: add sp, sp, a0
+; CHECK-NEXT: addi sp, sp, 16
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 32 x bfloat> poison, bfloat %b, i32 0
+ %vb = shufflevector <vscale x 32 x bfloat> %elt.head, <vscale x 32 x bfloat> poison, <vscale x 32 x i32> zeroinitializer
+ %v = call <vscale x 32 x bfloat> @llvm.vp.fdiv.nxv32bf16(<vscale x 32 x bfloat> %va, <vscale x 32 x bfloat> %vb, <vscale x 32 x i1> splat (i1 true), i32 %evl)
+ ret <vscale x 32 x bfloat> %v
+}
declare <vscale x 1 x half> @llvm.vp.fdiv.nxv1f16(<vscale x 1 x half>, <vscale x 1 x half>, <vscale x 1 x i1>, i32)
define <vscale x 1 x half> @vfdiv_vv_nxv1f16(<vscale x 1 x half> %va, <vscale x 1 x half> %b, <vscale x 1 x i1> %m, i32 zeroext %evl) {
@@ -514,10 +1123,10 @@ define <vscale x 32 x half> @vfdiv_vv_nxv32f16(<vscale x 32 x half> %va, <vscale
; ZVFHMIN-NEXT: vfdiv.vv v16, v16, v24, v0.t
; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v16
-; ZVFHMIN-NEXT: bltu a0, a1, .LBB20_2
+; ZVFHMIN-NEXT: bltu a0, a1, .LBB44_2
; ZVFHMIN-NEXT: # %bb.1:
; ZVFHMIN-NEXT: mv a0, a1
-; ZVFHMIN-NEXT: .LBB20_2:
+; ZVFHMIN-NEXT: .LBB44_2:
; ZVFHMIN-NEXT: addi a1, sp, 16
; ZVFHMIN-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload
; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v24
@@ -571,10 +1180,10 @@ define <vscale x 32 x half> @vfdiv_vv_nxv32f16_unmasked(<vscale x 32 x half> %va
; ZVFHMIN-NEXT: vfdiv.vv v16, v16, v24, v0.t
; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v16
-; ZVFHMIN-NEXT: bltu a0, a1, .LBB21_2
+; ZVFHMIN-NEXT: bltu a0, a1, .LBB45_2
; ZVFHMIN-NEXT: # %bb.1:
; ZVFHMIN-NEXT: mv a0, a1
-; ZVFHMIN-NEXT: .LBB21_2:
+; ZVFHMIN-NEXT: .LBB45_2:
; ZVFHMIN-NEXT: addi a1, sp, 16
; ZVFHMIN-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload
; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v24
@@ -649,10 +1258,10 @@ define <vscale x 32 x half> @vfdiv_vf_nxv32f16(<vscale x 32 x half> %va, half %b
; ZVFHMIN-NEXT: vfdiv.vv v16, v8, v16, v0.t
; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v16
-; ZVFHMIN-NEXT: bltu a0, a1, .LBB22_2
+; ZVFHMIN-NEXT: bltu a0, a1, .LBB46_2
; ZVFHMIN-NEXT: # %bb.1:
; ZVFHMIN-NEXT: mv a0, a1
-; ZVFHMIN-NEXT: .LBB22_2:
+; ZVFHMIN-NEXT: .LBB46_2:
; ZVFHMIN-NEXT: addi a1, sp, 16
; ZVFHMIN-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload
; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v24
@@ -730,10 +1339,10 @@ define <vscale x 32 x half> @vfdiv_vf_nxv32f16_unmasked(<vscale x 32 x half> %va
; ZVFHMIN-NEXT: vfdiv.vv v16, v24, v16, v0.t
; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v16
-; ZVFHMIN-NEXT: bltu a0, a1, .LBB23_2
+; ZVFHMIN-NEXT: bltu a0, a1, .LBB47_2
; ZVFHMIN-NEXT: # %bb.1:
; ZVFHMIN-NEXT: mv a0, a1
-; ZVFHMIN-NEXT: .LBB23_2:
+; ZVFHMIN-NEXT: .LBB47_2:
; ZVFHMIN-NEXT: addi a1, sp, 16
; ZVFHMIN-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload
; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v24
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfma-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vfma-vp.ll
index b6bb0371121b..baecb7bb7d24 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfma-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfma-vp.ll
@@ -1,15 +1,1429 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v,+m -target-abi=ilp32d \
-; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH
-; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v,+m -target-abi=lp64d \
-; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH
-; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfhmin,+v,+m -target-abi=ilp32d \
-; RUN: -verify-machineinstrs < %s | FileCheck %s \
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+zfbfmin,+zvfbfmin,+v \
+; RUN: -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s \
+; RUN: --check-prefixes=CHECK,ZVFH
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+zfbfmin,+zvfbfmin,+v \
+; RUN: -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \
+; RUN: --check-prefixes=CHECK,ZVFH
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin,+v \
+; RUN: -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s \
; RUN: --check-prefixes=CHECK,ZVFHMIN
-; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfhmin,+v,+m -target-abi=lp64d \
-; RUN: -verify-machineinstrs < %s | FileCheck %s \
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin,+v \
+; RUN: -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \
; RUN: --check-prefixes=CHECK,ZVFHMIN
+declare <vscale x 1 x bfloat> @llvm.vp.fma.nxv1bf16(<vscale x 1 x bfloat>, <vscale x 1 x bfloat>, <vscale x 1 x bfloat>, <vscale x 1 x i1>, i32)
+
+define <vscale x 1 x bfloat> @vfma_vv_nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %b, <vscale x 1 x bfloat> %c, <vscale x 1 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vfma_vv_nxv1bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v11, v10
+; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8
+; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v9
+; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-NEXT: vfmadd.vv v12, v10, v11, v0.t
+; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12
+; CHECK-NEXT: ret
+ %v = call <vscale x 1 x bfloat> @llvm.vp.fma.nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %b, <vscale x 1 x bfloat> %c, <vscale x 1 x i1> %m, i32 %evl)
+ ret <vscale x 1 x bfloat> %v
+}
+
+define <vscale x 1 x bfloat> @vfma_vv_nxv1bf16_unmasked(<vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %b, <vscale x 1 x bfloat> %c, i32 zeroext %evl) {
+; CHECK-LABEL: vfma_vv_nxv1bf16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v11, v10
+; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8
+; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v9
+; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-NEXT: vfmadd.vv v12, v10, v11
+; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12
+; CHECK-NEXT: ret
+ %v = call <vscale x 1 x bfloat> @llvm.vp.fma.nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %b, <vscale x 1 x bfloat> %c, <vscale x 1 x i1> splat (i1 true), i32 %evl)
+ ret <vscale x 1 x bfloat> %v
+}
+
+define <vscale x 1 x bfloat> @vfma_vf_nxv1bf16(<vscale x 1 x bfloat> %va, bfloat %b, <vscale x 1 x bfloat> %vc, <vscale x 1 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vfma_vf_nxv1bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: fmv.x.h a1, fa0
+; CHECK-NEXT: vsetvli a2, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vmv.v.x v10, a1
+; CHECK-NEXT: vfwcvtbf16.f.f.v v11, v9
+; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8
+; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v10
+; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-NEXT: vfmadd.vv v12, v9, v11, v0.t
+; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 1 x bfloat> poison, bfloat %b, i32 0
+ %vb = shufflevector <vscale x 1 x bfloat> %elt.head, <vscale x 1 x bfloat> poison, <vscale x 1 x i32> zeroinitializer
+ %v = call <vscale x 1 x bfloat> @llvm.vp.fma.nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %vb, <vscale x 1 x bfloat> %vc, <vscale x 1 x i1> %m, i32 %evl)
+ ret <vscale x 1 x bfloat> %v
+}
+
+define <vscale x 1 x bfloat> @vfma_vf_nxv1bf16_commute(<vscale x 1 x bfloat> %va, bfloat %b, <vscale x 1 x bfloat> %vc, <vscale x 1 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vfma_vf_nxv1bf16_commute:
+; CHECK: # %bb.0:
+; CHECK-NEXT: fmv.x.h a1, fa0
+; CHECK-NEXT: vsetvli a2, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vmv.v.x v10, a1
+; CHECK-NEXT: vfwcvtbf16.f.f.v v11, v9
+; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8
+; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v10
+; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-NEXT: vfmadd.vv v9, v8, v11, v0.t
+; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 1 x bfloat> poison, bfloat %b, i32 0
+ %vb = shufflevector <vscale x 1 x bfloat> %elt.head, <vscale x 1 x bfloat> poison, <vscale x 1 x i32> zeroinitializer
+ %v = call <vscale x 1 x bfloat> @llvm.vp.fma.nxv1bf16(<vscale x 1 x bfloat> %vb, <vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %vc, <vscale x 1 x i1> %m, i32 %evl)
+ ret <vscale x 1 x bfloat> %v
+}
+
+define <vscale x 1 x bfloat> @vfma_vf_nxv1bf16_unmasked(<vscale x 1 x bfloat> %va, bfloat %b, <vscale x 1 x bfloat> %vc, i32 zeroext %evl) {
+; CHECK-LABEL: vfma_vf_nxv1bf16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: fmv.x.h a1, fa0
+; CHECK-NEXT: vsetvli a2, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vmv.v.x v10, a1
+; CHECK-NEXT: vfwcvtbf16.f.f.v v11, v9
+; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8
+; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v10
+; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-NEXT: vfmadd.vv v12, v9, v11
+; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 1 x bfloat> poison, bfloat %b, i32 0
+ %vb = shufflevector <vscale x 1 x bfloat> %elt.head, <vscale x 1 x bfloat> poison, <vscale x 1 x i32> zeroinitializer
+ %v = call <vscale x 1 x bfloat> @llvm.vp.fma.nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %vb, <vscale x 1 x bfloat> %vc, <vscale x 1 x i1> splat (i1 true), i32 %evl)
+ ret <vscale x 1 x bfloat> %v
+}
+
+define <vscale x 1 x bfloat> @vfma_vf_nxv1bf16_unmasked_commute(<vscale x 1 x bfloat> %va, bfloat %b, <vscale x 1 x bfloat> %vc, i32 zeroext %evl) {
+; CHECK-LABEL: vfma_vf_nxv1bf16_unmasked_commute:
+; CHECK: # %bb.0:
+; CHECK-NEXT: fmv.x.h a1, fa0
+; CHECK-NEXT: vsetvli a2, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vmv.v.x v10, a1
+; CHECK-NEXT: vfwcvtbf16.f.f.v v11, v9
+; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8
+; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v10
+; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-NEXT: vfmadd.vv v12, v9, v11
+; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 1 x bfloat> poison, bfloat %b, i32 0
+ %vb = shufflevector <vscale x 1 x bfloat> %elt.head, <vscale x 1 x bfloat> poison, <vscale x 1 x i32> zeroinitializer
+ %v = call <vscale x 1 x bfloat> @llvm.vp.fma.nxv1bf16(<vscale x 1 x bfloat> %vb, <vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %vc, <vscale x 1 x i1> splat (i1 true), i32 %evl)
+ ret <vscale x 1 x bfloat> %v
+}
+
+declare <vscale x 2 x bfloat> @llvm.vp.fma.nxv2bf16(<vscale x 2 x bfloat>, <vscale x 2 x bfloat>, <vscale x 2 x bfloat>, <vscale x 2 x i1>, i32)
+
+define <vscale x 2 x bfloat> @vfma_vv_nxv2bf16(<vscale x 2 x bfloat> %va, <vscale x 2 x bfloat> %b, <vscale x 2 x bfloat> %c, <vscale x 2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vfma_vv_nxv2bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, zero, e16, mf2, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v11, v10
+; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8
+; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v9
+; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-NEXT: vfmadd.vv v12, v10, v11, v0.t
+; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12
+; CHECK-NEXT: ret
+ %v = call <vscale x 2 x bfloat> @llvm.vp.fma.nxv2bf16(<vscale x 2 x bfloat> %va, <vscale x 2 x bfloat> %b, <vscale x 2 x bfloat> %c, <vscale x 2 x i1> %m, i32 %evl)
+ ret <vscale x 2 x bfloat> %v
+}
+
+define <vscale x 2 x bfloat> @vfma_vv_nxv2bf16_unmasked(<vscale x 2 x bfloat> %va, <vscale x 2 x bfloat> %b, <vscale x 2 x bfloat> %c, i32 zeroext %evl) {
+; CHECK-LABEL: vfma_vv_nxv2bf16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, zero, e16, mf2, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v11, v10
+; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8
+; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v9
+; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-NEXT: vfmadd.vv v12, v10, v11
+; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12
+; CHECK-NEXT: ret
+ %v = call <vscale x 2 x bfloat> @llvm.vp.fma.nxv2bf16(<vscale x 2 x bfloat> %va, <vscale x 2 x bfloat> %b, <vscale x 2 x bfloat> %c, <vscale x 2 x i1> splat (i1 true), i32 %evl)
+ ret <vscale x 2 x bfloat> %v
+}
+
+define <vscale x 2 x bfloat> @vfma_vf_nxv2bf16(<vscale x 2 x bfloat> %va, bfloat %b, <vscale x 2 x bfloat> %vc, <vscale x 2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vfma_vf_nxv2bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: fmv.x.h a1, fa0
+; CHECK-NEXT: vsetvli a2, zero, e16, mf2, ta, ma
+; CHECK-NEXT: vmv.v.x v10, a1
+; CHECK-NEXT: vfwcvtbf16.f.f.v v11, v9
+; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8
+; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v10
+; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-NEXT: vfmadd.vv v12, v9, v11, v0.t
+; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 2 x bfloat> poison, bfloat %b, i32 0
+ %vb = shufflevector <vscale x 2 x bfloat> %elt.head, <vscale x 2 x bfloat> poison, <vscale x 2 x i32> zeroinitializer
+ %v = call <vscale x 2 x bfloat> @llvm.vp.fma.nxv2bf16(<vscale x 2 x bfloat> %va, <vscale x 2 x bfloat> %vb, <vscale x 2 x bfloat> %vc, <vscale x 2 x i1> %m, i32 %evl)
+ ret <vscale x 2 x bfloat> %v
+}
+
+define <vscale x 2 x bfloat> @vfma_vf_nxv2bf16_commute(<vscale x 2 x bfloat> %va, bfloat %b, <vscale x 2 x bfloat> %vc, <vscale x 2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vfma_vf_nxv2bf16_commute:
+; CHECK: # %bb.0:
+; CHECK-NEXT: fmv.x.h a1, fa0
+; CHECK-NEXT: vsetvli a2, zero, e16, mf2, ta, ma
+; CHECK-NEXT: vmv.v.x v10, a1
+; CHECK-NEXT: vfwcvtbf16.f.f.v v11, v9
+; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8
+; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v10
+; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-NEXT: vfmadd.vv v9, v8, v11, v0.t
+; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 2 x bfloat> poison, bfloat %b, i32 0
+ %vb = shufflevector <vscale x 2 x bfloat> %elt.head, <vscale x 2 x bfloat> poison, <vscale x 2 x i32> zeroinitializer
+ %v = call <vscale x 2 x bfloat> @llvm.vp.fma.nxv2bf16(<vscale x 2 x bfloat> %vb, <vscale x 2 x bfloat> %va, <vscale x 2 x bfloat> %vc, <vscale x 2 x i1> %m, i32 %evl)
+ ret <vscale x 2 x bfloat> %v
+}
+
+define <vscale x 2 x bfloat> @vfma_vf_nxv2bf16_unmasked(<vscale x 2 x bfloat> %va, bfloat %b, <vscale x 2 x bfloat> %vc, i32 zeroext %evl) {
+; CHECK-LABEL: vfma_vf_nxv2bf16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: fmv.x.h a1, fa0
+; CHECK-NEXT: vsetvli a2, zero, e16, mf2, ta, ma
+; CHECK-NEXT: vmv.v.x v10, a1
+; CHECK-NEXT: vfwcvtbf16.f.f.v v11, v9
+; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8
+; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v10
+; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-NEXT: vfmadd.vv v12, v9, v11
+; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 2 x bfloat> poison, bfloat %b, i32 0
+ %vb = shufflevector <vscale x 2 x bfloat> %elt.head, <vscale x 2 x bfloat> poison, <vscale x 2 x i32> zeroinitializer
+ %v = call <vscale x 2 x bfloat> @llvm.vp.fma.nxv2bf16(<vscale x 2 x bfloat> %va, <vscale x 2 x bfloat> %vb, <vscale x 2 x bfloat> %vc, <vscale x 2 x i1> splat (i1 true), i32 %evl)
+ ret <vscale x 2 x bfloat> %v
+}
+
+define <vscale x 2 x bfloat> @vfma_vf_nxv2bf16_unmasked_commute(<vscale x 2 x bfloat> %va, bfloat %b, <vscale x 2 x bfloat> %vc, i32 zeroext %evl) {
+; CHECK-LABEL: vfma_vf_nxv2bf16_unmasked_commute:
+; CHECK: # %bb.0:
+; CHECK-NEXT: fmv.x.h a1, fa0
+; CHECK-NEXT: vsetvli a2, zero, e16, mf2, ta, ma
+; CHECK-NEXT: vmv.v.x v10, a1
+; CHECK-NEXT: vfwcvtbf16.f.f.v v11, v9
+; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8
+; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v10
+; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-NEXT: vfmadd.vv v12, v9, v11
+; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 2 x bfloat> poison, bfloat %b, i32 0
+ %vb = shufflevector <vscale x 2 x bfloat> %elt.head, <vscale x 2 x bfloat> poison, <vscale x 2 x i32> zeroinitializer
+ %v = call <vscale x 2 x bfloat> @llvm.vp.fma.nxv2bf16(<vscale x 2 x bfloat> %vb, <vscale x 2 x bfloat> %va, <vscale x 2 x bfloat> %vc, <vscale x 2 x i1> splat (i1 true), i32 %evl)
+ ret <vscale x 2 x bfloat> %v
+}
+
+declare <vscale x 4 x bfloat> @llvm.vp.fma.nxv4bf16(<vscale x 4 x bfloat>, <vscale x 4 x bfloat>, <vscale x 4 x bfloat>, <vscale x 4 x i1>, i32)
+
+define <vscale x 4 x bfloat> @vfma_vv_nxv4bf16(<vscale x 4 x bfloat> %va, <vscale x 4 x bfloat> %b, <vscale x 4 x bfloat> %c, <vscale x 4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vfma_vv_nxv4bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v10
+; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8
+; CHECK-NEXT: vfwcvtbf16.f.f.v v14, v9
+; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; CHECK-NEXT: vfmadd.vv v14, v10, v12, v0.t
+; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v14
+; CHECK-NEXT: ret
+ %v = call <vscale x 4 x bfloat> @llvm.vp.fma.nxv4bf16(<vscale x 4 x bfloat> %va, <vscale x 4 x bfloat> %b, <vscale x 4 x bfloat> %c, <vscale x 4 x i1> %m, i32 %evl)
+ ret <vscale x 4 x bfloat> %v
+}
+
+define <vscale x 4 x bfloat> @vfma_vv_nxv4bf16_unmasked(<vscale x 4 x bfloat> %va, <vscale x 4 x bfloat> %b, <vscale x 4 x bfloat> %c, i32 zeroext %evl) {
+; CHECK-LABEL: vfma_vv_nxv4bf16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v10
+; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8
+; CHECK-NEXT: vfwcvtbf16.f.f.v v14, v9
+; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; CHECK-NEXT: vfmadd.vv v14, v10, v12
+; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v14
+; CHECK-NEXT: ret
+ %v = call <vscale x 4 x bfloat> @llvm.vp.fma.nxv4bf16(<vscale x 4 x bfloat> %va, <vscale x 4 x bfloat> %b, <vscale x 4 x bfloat> %c, <vscale x 4 x i1> splat (i1 true), i32 %evl)
+ ret <vscale x 4 x bfloat> %v
+}
+
+define <vscale x 4 x bfloat> @vfma_vf_nxv4bf16(<vscale x 4 x bfloat> %va, bfloat %b, <vscale x 4 x bfloat> %vc, <vscale x 4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vfma_vf_nxv4bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: fmv.x.h a1, fa0
+; CHECK-NEXT: vsetvli a2, zero, e16, m1, ta, ma
+; CHECK-NEXT: vmv.v.x v10, a1
+; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v9
+; CHECK-NEXT: vfwcvtbf16.f.f.v v14, v8
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v10
+; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; CHECK-NEXT: vfmadd.vv v16, v14, v12, v0.t
+; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 4 x bfloat> poison, bfloat %b, i32 0
+ %vb = shufflevector <vscale x 4 x bfloat> %elt.head, <vscale x 4 x bfloat> poison, <vscale x 4 x i32> zeroinitializer
+ %v = call <vscale x 4 x bfloat> @llvm.vp.fma.nxv4bf16(<vscale x 4 x bfloat> %va, <vscale x 4 x bfloat> %vb, <vscale x 4 x bfloat> %vc, <vscale x 4 x i1> %m, i32 %evl)
+ ret <vscale x 4 x bfloat> %v
+}
+
+define <vscale x 4 x bfloat> @vfma_vf_nxv4bf16_commute(<vscale x 4 x bfloat> %va, bfloat %b, <vscale x 4 x bfloat> %vc, <vscale x 4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vfma_vf_nxv4bf16_commute:
+; CHECK: # %bb.0:
+; CHECK-NEXT: fmv.x.h a1, fa0
+; CHECK-NEXT: vsetvli a2, zero, e16, m1, ta, ma
+; CHECK-NEXT: vmv.v.x v10, a1
+; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v9
+; CHECK-NEXT: vfwcvtbf16.f.f.v v14, v8
+; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v10
+; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; CHECK-NEXT: vfmadd.vv v14, v8, v12, v0.t
+; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v14
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 4 x bfloat> poison, bfloat %b, i32 0
+ %vb = shufflevector <vscale x 4 x bfloat> %elt.head, <vscale x 4 x bfloat> poison, <vscale x 4 x i32> zeroinitializer
+ %v = call <vscale x 4 x bfloat> @llvm.vp.fma.nxv4bf16(<vscale x 4 x bfloat> %vb, <vscale x 4 x bfloat> %va, <vscale x 4 x bfloat> %vc, <vscale x 4 x i1> %m, i32 %evl)
+ ret <vscale x 4 x bfloat> %v
+}
+
+define <vscale x 4 x bfloat> @vfma_vf_nxv4bf16_unmasked(<vscale x 4 x bfloat> %va, bfloat %b, <vscale x 4 x bfloat> %vc, i32 zeroext %evl) {
+; CHECK-LABEL: vfma_vf_nxv4bf16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: fmv.x.h a1, fa0
+; CHECK-NEXT: vsetvli a2, zero, e16, m1, ta, ma
+; CHECK-NEXT: vmv.v.x v10, a1
+; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v9
+; CHECK-NEXT: vfwcvtbf16.f.f.v v14, v8
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v10
+; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; CHECK-NEXT: vfmadd.vv v16, v14, v12
+; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 4 x bfloat> poison, bfloat %b, i32 0
+ %vb = shufflevector <vscale x 4 x bfloat> %elt.head, <vscale x 4 x bfloat> poison, <vscale x 4 x i32> zeroinitializer
+ %v = call <vscale x 4 x bfloat> @llvm.vp.fma.nxv4bf16(<vscale x 4 x bfloat> %va, <vscale x 4 x bfloat> %vb, <vscale x 4 x bfloat> %vc, <vscale x 4 x i1> splat (i1 true), i32 %evl)
+ ret <vscale x 4 x bfloat> %v
+}
+
+define <vscale x 4 x bfloat> @vfma_vf_nxv4bf16_unmasked_commute(<vscale x 4 x bfloat> %va, bfloat %b, <vscale x 4 x bfloat> %vc, i32 zeroext %evl) {
+; CHECK-LABEL: vfma_vf_nxv4bf16_unmasked_commute:
+; CHECK: # %bb.0:
+; CHECK-NEXT: fmv.x.h a1, fa0
+; CHECK-NEXT: vsetvli a2, zero, e16, m1, ta, ma
+; CHECK-NEXT: vmv.v.x v10, a1
+; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v9
+; CHECK-NEXT: vfwcvtbf16.f.f.v v14, v8
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v10
+; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; CHECK-NEXT: vfmadd.vv v16, v14, v12
+; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 4 x bfloat> poison, bfloat %b, i32 0
+ %vb = shufflevector <vscale x 4 x bfloat> %elt.head, <vscale x 4 x bfloat> poison, <vscale x 4 x i32> zeroinitializer
+ %v = call <vscale x 4 x bfloat> @llvm.vp.fma.nxv4bf16(<vscale x 4 x bfloat> %vb, <vscale x 4 x bfloat> %va, <vscale x 4 x bfloat> %vc, <vscale x 4 x i1> splat (i1 true), i32 %evl)
+ ret <vscale x 4 x bfloat> %v
+}
+
+declare <vscale x 8 x bfloat> @llvm.vp.fma.nxv8bf16(<vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x i1>, i32)
+
+define <vscale x 8 x bfloat> @vfma_vv_nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c, <vscale x 8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vfma_vv_nxv8bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v12
+; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8
+; CHECK-NEXT: vfwcvtbf16.f.f.v v20, v10
+; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-NEXT: vfmadd.vv v20, v12, v16, v0.t
+; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v20
+; CHECK-NEXT: ret
+ %v = call <vscale x 8 x bfloat> @llvm.vp.fma.nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c, <vscale x 8 x i1> %m, i32 %evl)
+ ret <vscale x 8 x bfloat> %v
+}
+
+define <vscale x 8 x bfloat> @vfma_vv_nxv8bf16_unmasked(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c, i32 zeroext %evl) {
+; CHECK-LABEL: vfma_vv_nxv8bf16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v12
+; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8
+; CHECK-NEXT: vfwcvtbf16.f.f.v v20, v10
+; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-NEXT: vfmadd.vv v20, v12, v16
+; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v20
+; CHECK-NEXT: ret
+ %v = call <vscale x 8 x bfloat> @llvm.vp.fma.nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c, <vscale x 8 x i1> splat (i1 true), i32 %evl)
+ ret <vscale x 8 x bfloat> %v
+}
+
+define <vscale x 8 x bfloat> @vfma_vf_nxv8bf16(<vscale x 8 x bfloat> %va, bfloat %b, <vscale x 8 x bfloat> %vc, <vscale x 8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vfma_vf_nxv8bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: fmv.x.h a1, fa0
+; CHECK-NEXT: vsetvli a2, zero, e16, m2, ta, ma
+; CHECK-NEXT: vmv.v.x v12, a1
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v10
+; CHECK-NEXT: vfwcvtbf16.f.f.v v20, v8
+; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v12
+; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-NEXT: vfmadd.vv v24, v20, v16, v0.t
+; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v24
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0
+ %vb = shufflevector <vscale x 8 x bfloat> %elt.head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer
+ %v = call <vscale x 8 x bfloat> @llvm.vp.fma.nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %vb, <vscale x 8 x bfloat> %vc, <vscale x 8 x i1> %m, i32 %evl)
+ ret <vscale x 8 x bfloat> %v
+}
+
+define <vscale x 8 x bfloat> @vfma_vf_nxv8bf16_commute(<vscale x 8 x bfloat> %va, bfloat %b, <vscale x 8 x bfloat> %vc, <vscale x 8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vfma_vf_nxv8bf16_commute:
+; CHECK: # %bb.0:
+; CHECK-NEXT: fmv.x.h a1, fa0
+; CHECK-NEXT: vsetvli a2, zero, e16, m2, ta, ma
+; CHECK-NEXT: vmv.v.x v12, a1
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v10
+; CHECK-NEXT: vfwcvtbf16.f.f.v v20, v8
+; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v12
+; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-NEXT: vfmadd.vv v20, v8, v16, v0.t
+; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v20
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0
+ %vb = shufflevector <vscale x 8 x bfloat> %elt.head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer
+ %v = call <vscale x 8 x bfloat> @llvm.vp.fma.nxv8bf16(<vscale x 8 x bfloat> %vb, <vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %vc, <vscale x 8 x i1> %m, i32 %evl)
+ ret <vscale x 8 x bfloat> %v
+}
+
+define <vscale x 8 x bfloat> @vfma_vf_nxv8bf16_unmasked(<vscale x 8 x bfloat> %va, bfloat %b, <vscale x 8 x bfloat> %vc, i32 zeroext %evl) {
+; CHECK-LABEL: vfma_vf_nxv8bf16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: fmv.x.h a1, fa0
+; CHECK-NEXT: vsetvli a2, zero, e16, m2, ta, ma
+; CHECK-NEXT: vmv.v.x v12, a1
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v10
+; CHECK-NEXT: vfwcvtbf16.f.f.v v20, v8
+; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v12
+; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-NEXT: vfmadd.vv v24, v20, v16
+; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v24
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0
+ %vb = shufflevector <vscale x 8 x bfloat> %elt.head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer
+ %v = call <vscale x 8 x bfloat> @llvm.vp.fma.nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %vb, <vscale x 8 x bfloat> %vc, <vscale x 8 x i1> splat (i1 true), i32 %evl)
+ ret <vscale x 8 x bfloat> %v
+}
+
+define <vscale x 8 x bfloat> @vfma_vf_nxv8bf16_unmasked_commute(<vscale x 8 x bfloat> %va, bfloat %b, <vscale x 8 x bfloat> %vc, i32 zeroext %evl) {
+; CHECK-LABEL: vfma_vf_nxv8bf16_unmasked_commute:
+; CHECK: # %bb.0:
+; CHECK-NEXT: fmv.x.h a1, fa0
+; CHECK-NEXT: vsetvli a2, zero, e16, m2, ta, ma
+; CHECK-NEXT: vmv.v.x v12, a1
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v10
+; CHECK-NEXT: vfwcvtbf16.f.f.v v20, v8
+; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v12
+; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-NEXT: vfmadd.vv v24, v20, v16
+; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v24
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0
+ %vb = shufflevector <vscale x 8 x bfloat> %elt.head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer
+ %v = call <vscale x 8 x bfloat> @llvm.vp.fma.nxv8bf16(<vscale x 8 x bfloat> %vb, <vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %vc, <vscale x 8 x i1> splat (i1 true), i32 %evl)
+ ret <vscale x 8 x bfloat> %v
+}
+
+declare <vscale x 16 x bfloat> @llvm.vp.fma.nxv16bf16(<vscale x 16 x bfloat>, <vscale x 16 x bfloat>, <vscale x 16 x bfloat>, <vscale x 16 x i1>, i32)
+
+define <vscale x 16 x bfloat> @vfma_vv_nxv16bf16(<vscale x 16 x bfloat> %va, <vscale x 16 x bfloat> %b, <vscale x 16 x bfloat> %c, <vscale x 16 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vfma_vv_nxv16bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi sp, sp, -16
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: csrr a1, vlenb
+; CHECK-NEXT: slli a1, a1, 3
+; CHECK-NEXT: sub sp, sp, a1
+; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
+; CHECK-NEXT: vsetvli a1, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v16
+; CHECK-NEXT: addi a1, sp, 16
+; CHECK-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill
+; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v8
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v12
+; CHECK-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload
+; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma
+; CHECK-NEXT: vfmadd.vv v16, v24, v8, v0.t
+; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: slli a0, a0, 3
+; CHECK-NEXT: add sp, sp, a0
+; CHECK-NEXT: addi sp, sp, 16
+; CHECK-NEXT: ret
+ %v = call <vscale x 16 x bfloat> @llvm.vp.fma.nxv16bf16(<vscale x 16 x bfloat> %va, <vscale x 16 x bfloat> %b, <vscale x 16 x bfloat> %c, <vscale x 16 x i1> %m, i32 %evl)
+ ret <vscale x 16 x bfloat> %v
+}
+
+define <vscale x 16 x bfloat> @vfma_vv_nxv16bf16_unmasked(<vscale x 16 x bfloat> %va, <vscale x 16 x bfloat> %b, <vscale x 16 x bfloat> %c, i32 zeroext %evl) {
+; CHECK-LABEL: vfma_vv_nxv16bf16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v16
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8
+; CHECK-NEXT: vfwcvtbf16.f.f.v v0, v12
+; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma
+; CHECK-NEXT: vfmadd.vv v0, v16, v24
+; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v0
+; CHECK-NEXT: ret
+ %v = call <vscale x 16 x bfloat> @llvm.vp.fma.nxv16bf16(<vscale x 16 x bfloat> %va, <vscale x 16 x bfloat> %b, <vscale x 16 x bfloat> %c, <vscale x 16 x i1> splat (i1 true), i32 %evl)
+ ret <vscale x 16 x bfloat> %v
+}
+
+define <vscale x 16 x bfloat> @vfma_vf_nxv16bf16(<vscale x 16 x bfloat> %va, bfloat %b, <vscale x 16 x bfloat> %vc, <vscale x 16 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vfma_vf_nxv16bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi sp, sp, -16
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: csrr a1, vlenb
+; CHECK-NEXT: slli a1, a1, 3
+; CHECK-NEXT: sub sp, sp, a1
+; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
+; CHECK-NEXT: fmv.x.h a1, fa0
+; CHECK-NEXT: vsetvli a2, zero, e16, m4, ta, ma
+; CHECK-NEXT: vmv.v.x v4, a1
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v12
+; CHECK-NEXT: addi a1, sp, 16
+; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill
+; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v8
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v4
+; CHECK-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload
+; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma
+; CHECK-NEXT: vfmadd.vv v16, v24, v8, v0.t
+; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: slli a0, a0, 3
+; CHECK-NEXT: add sp, sp, a0
+; CHECK-NEXT: addi sp, sp, 16
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 16 x bfloat> poison, bfloat %b, i32 0
+ %vb = shufflevector <vscale x 16 x bfloat> %elt.head, <vscale x 16 x bfloat> poison, <vscale x 16 x i32> zeroinitializer
+ %v = call <vscale x 16 x bfloat> @llvm.vp.fma.nxv16bf16(<vscale x 16 x bfloat> %va, <vscale x 16 x bfloat> %vb, <vscale x 16 x bfloat> %vc, <vscale x 16 x i1> %m, i32 %evl)
+ ret <vscale x 16 x bfloat> %v
+}
+
+define <vscale x 16 x bfloat> @vfma_vf_nxv16bf16_commute(<vscale x 16 x bfloat> %va, bfloat %b, <vscale x 16 x bfloat> %vc, <vscale x 16 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vfma_vf_nxv16bf16_commute:
+; CHECK: # %bb.0:
+; CHECK-NEXT: fmv.x.h a1, fa0
+; CHECK-NEXT: vsetvli a2, zero, e16, m4, ta, ma
+; CHECK-NEXT: vmv.v.x v4, a1
+; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v12
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8
+; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v4
+; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma
+; CHECK-NEXT: vfmadd.vv v16, v8, v24, v0.t
+; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 16 x bfloat> poison, bfloat %b, i32 0
+ %vb = shufflevector <vscale x 16 x bfloat> %elt.head, <vscale x 16 x bfloat> poison, <vscale x 16 x i32> zeroinitializer
+ %v = call <vscale x 16 x bfloat> @llvm.vp.fma.nxv16bf16(<vscale x 16 x bfloat> %vb, <vscale x 16 x bfloat> %va, <vscale x 16 x bfloat> %vc, <vscale x 16 x i1> %m, i32 %evl)
+ ret <vscale x 16 x bfloat> %v
+}
+
+define <vscale x 16 x bfloat> @vfma_vf_nxv16bf16_unmasked(<vscale x 16 x bfloat> %va, bfloat %b, <vscale x 16 x bfloat> %vc, i32 zeroext %evl) {
+; CHECK-LABEL: vfma_vf_nxv16bf16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi sp, sp, -16
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: csrr a1, vlenb
+; CHECK-NEXT: slli a1, a1, 2
+; CHECK-NEXT: sub sp, sp, a1
+; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb
+; CHECK-NEXT: fmv.x.h a1, fa0
+; CHECK-NEXT: vsetvli a2, zero, e16, m4, ta, ma
+; CHECK-NEXT: vmv.v.x v16, a1
+; CHECK-NEXT: addi a1, sp, 16
+; CHECK-NEXT: vs4r.v v16, (a1) # Unknown-size Folded Spill
+; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v12
+; CHECK-NEXT: vfwcvtbf16.f.f.v v0, v8
+; CHECK-NEXT: vl4r.v v8, (a1) # Unknown-size Folded Reload
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8
+; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma
+; CHECK-NEXT: vfmadd.vv v16, v0, v24
+; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: slli a0, a0, 2
+; CHECK-NEXT: add sp, sp, a0
+; CHECK-NEXT: addi sp, sp, 16
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 16 x bfloat> poison, bfloat %b, i32 0
+ %vb = shufflevector <vscale x 16 x bfloat> %elt.head, <vscale x 16 x bfloat> poison, <vscale x 16 x i32> zeroinitializer
+ %v = call <vscale x 16 x bfloat> @llvm.vp.fma.nxv16bf16(<vscale x 16 x bfloat> %va, <vscale x 16 x bfloat> %vb, <vscale x 16 x bfloat> %vc, <vscale x 16 x i1> splat (i1 true), i32 %evl)
+ ret <vscale x 16 x bfloat> %v
+}
+
+define <vscale x 16 x bfloat> @vfma_vf_nxv16bf16_unmasked_commute(<vscale x 16 x bfloat> %va, bfloat %b, <vscale x 16 x bfloat> %vc, i32 zeroext %evl) {
+; CHECK-LABEL: vfma_vf_nxv16bf16_unmasked_commute:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi sp, sp, -16
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: csrr a1, vlenb
+; CHECK-NEXT: slli a1, a1, 2
+; CHECK-NEXT: sub sp, sp, a1
+; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb
+; CHECK-NEXT: fmv.x.h a1, fa0
+; CHECK-NEXT: vsetvli a2, zero, e16, m4, ta, ma
+; CHECK-NEXT: vmv.v.x v16, a1
+; CHECK-NEXT: addi a1, sp, 16
+; CHECK-NEXT: vs4r.v v16, (a1) # Unknown-size Folded Spill
+; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v12
+; CHECK-NEXT: vfwcvtbf16.f.f.v v0, v8
+; CHECK-NEXT: vl4r.v v8, (a1) # Unknown-size Folded Reload
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8
+; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma
+; CHECK-NEXT: vfmadd.vv v16, v0, v24
+; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: slli a0, a0, 2
+; CHECK-NEXT: add sp, sp, a0
+; CHECK-NEXT: addi sp, sp, 16
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 16 x bfloat> poison, bfloat %b, i32 0
+ %vb = shufflevector <vscale x 16 x bfloat> %elt.head, <vscale x 16 x bfloat> poison, <vscale x 16 x i32> zeroinitializer
+ %v = call <vscale x 16 x bfloat> @llvm.vp.fma.nxv16bf16(<vscale x 16 x bfloat> %vb, <vscale x 16 x bfloat> %va, <vscale x 16 x bfloat> %vc, <vscale x 16 x i1> splat (i1 true), i32 %evl)
+ ret <vscale x 16 x bfloat> %v
+}
+
+declare <vscale x 32 x bfloat> @llvm.vp.fma.nxv32bf16(<vscale x 32 x bfloat>, <vscale x 32 x bfloat>, <vscale x 32 x bfloat>, <vscale x 32 x i1>, i32)
+
+define <vscale x 32 x bfloat> @vfma_vv_nxv32bf16(<vscale x 32 x bfloat> %va, <vscale x 32 x bfloat> %b, <vscale x 32 x bfloat> %c, <vscale x 32 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vfma_vv_nxv32bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi sp, sp, -16
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: csrr a2, vlenb
+; CHECK-NEXT: slli a2, a2, 1
+; CHECK-NEXT: mv a3, a2
+; CHECK-NEXT: slli a2, a2, 2
+; CHECK-NEXT: add a3, a3, a2
+; CHECK-NEXT: slli a2, a2, 2
+; CHECK-NEXT: add a2, a2, a3
+; CHECK-NEXT: sub sp, sp, a2
+; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x2a, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 42 * vlenb
+; CHECK-NEXT: vmv1r.v v24, v0
+; CHECK-NEXT: vl8re16.v v0, (a0)
+; CHECK-NEXT: csrr a2, vlenb
+; CHECK-NEXT: slli a0, a2, 1
+; CHECK-NEXT: sub a3, a1, a0
+; CHECK-NEXT: sltu a4, a1, a3
+; CHECK-NEXT: addi a4, a4, -1
+; CHECK-NEXT: and a3, a4, a3
+; CHECK-NEXT: srli a2, a2, 2
+; CHECK-NEXT: csrr a4, vlenb
+; CHECK-NEXT: slli a4, a4, 4
+; CHECK-NEXT: add a4, sp, a4
+; CHECK-NEXT: addi a4, a4, 16
+; CHECK-NEXT: vs1r.v v24, (a4) # Unknown-size Folded Spill
+; CHECK-NEXT: vsetvli a4, zero, e8, mf2, ta, ma
+; CHECK-NEXT: vslidedown.vx v24, v24, a2
+; CHECK-NEXT: addi a2, sp, 16
+; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill
+; CHECK-NEXT: csrr a2, vlenb
+; CHECK-NEXT: mv a4, a2
+; CHECK-NEXT: slli a2, a2, 3
+; CHECK-NEXT: add a4, a4, a2
+; CHECK-NEXT: slli a2, a2, 1
+; CHECK-NEXT: add a2, a2, a4
+; CHECK-NEXT: add a2, sp, a2
+; CHECK-NEXT: addi a2, a2, 16
+; CHECK-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill
+; CHECK-NEXT: vsetvli a2, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v12
+; CHECK-NEXT: csrr a2, vlenb
+; CHECK-NEXT: slli a2, a2, 3
+; CHECK-NEXT: add a2, sp, a2
+; CHECK-NEXT: addi a2, a2, 16
+; CHECK-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill
+; CHECK-NEXT: vmv8r.v v8, v16
+; CHECK-NEXT: vmv4r.v v24, v8
+; CHECK-NEXT: csrr a2, vlenb
+; CHECK-NEXT: slli a4, a2, 5
+; CHECK-NEXT: add a2, a4, a2
+; CHECK-NEXT: add a2, sp, a2
+; CHECK-NEXT: addi a2, a2, 16
+; CHECK-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v12
+; CHECK-NEXT: csrr a2, vlenb
+; CHECK-NEXT: slli a4, a2, 4
+; CHECK-NEXT: add a2, a4, a2
+; CHECK-NEXT: add a2, sp, a2
+; CHECK-NEXT: addi a2, a2, 16
+; CHECK-NEXT: vs8r.v v0, (a2) # Unknown-size Folded Spill
+; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v4
+; CHECK-NEXT: addi a2, sp, 16
+; CHECK-NEXT: vl1r.v v0, (a2) # Unknown-size Folded Reload
+; CHECK-NEXT: csrr a2, vlenb
+; CHECK-NEXT: slli a2, a2, 3
+; CHECK-NEXT: add a2, sp, a2
+; CHECK-NEXT: addi a2, a2, 16
+; CHECK-NEXT: vl8r.v v24, (a2) # Unknown-size Folded Reload
+; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, ma
+; CHECK-NEXT: vfmadd.vv v16, v24, v8, v0.t
+; CHECK-NEXT: vsetvli a2, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v12, v16
+; CHECK-NEXT: addi a2, sp, 16
+; CHECK-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill
+; CHECK-NEXT: bltu a1, a0, .LBB30_2
+; CHECK-NEXT: # %bb.1:
+; CHECK-NEXT: mv a1, a0
+; CHECK-NEXT: .LBB30_2:
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: mv a2, a0
+; CHECK-NEXT: slli a0, a0, 3
+; CHECK-NEXT: add a2, a2, a0
+; CHECK-NEXT: slli a0, a0, 1
+; CHECK-NEXT: add a0, a0, a2
+; CHECK-NEXT: add a0, sp, a0
+; CHECK-NEXT: addi a0, a0, 16
+; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
+; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v16
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: slli a0, a0, 3
+; CHECK-NEXT: add a0, sp, a0
+; CHECK-NEXT: addi a0, a0, 16
+; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: slli a2, a0, 5
+; CHECK-NEXT: add a0, a2, a0
+; CHECK-NEXT: add a0, sp, a0
+; CHECK-NEXT: addi a0, a0, 16
+; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
+; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v16
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: slli a2, a0, 4
+; CHECK-NEXT: add a0, a2, a0
+; CHECK-NEXT: add a0, sp, a0
+; CHECK-NEXT: addi a0, a0, 16
+; CHECK-NEXT: vl8r.v v0, (a0) # Unknown-size Folded Reload
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v0
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: slli a2, a0, 5
+; CHECK-NEXT: add a0, a2, a0
+; CHECK-NEXT: add a0, sp, a0
+; CHECK-NEXT: addi a0, a0, 16
+; CHECK-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: slli a0, a0, 4
+; CHECK-NEXT: add a0, sp, a0
+; CHECK-NEXT: addi a0, a0, 16
+; CHECK-NEXT: vl1r.v v0, (a0) # Unknown-size Folded Reload
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: slli a0, a0, 3
+; CHECK-NEXT: add a0, sp, a0
+; CHECK-NEXT: addi a0, a0, 16
+; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: slli a2, a0, 5
+; CHECK-NEXT: add a0, a2, a0
+; CHECK-NEXT: add a0, sp, a0
+; CHECK-NEXT: addi a0, a0, 16
+; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload
+; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma
+; CHECK-NEXT: vfmadd.vv v8, v16, v24, v0.t
+; CHECK-NEXT: vmv.v.v v16, v8
+; CHECK-NEXT: addi a0, sp, 16
+; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
+; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: slli a0, a0, 1
+; CHECK-NEXT: mv a1, a0
+; CHECK-NEXT: slli a0, a0, 2
+; CHECK-NEXT: add a1, a1, a0
+; CHECK-NEXT: slli a0, a0, 2
+; CHECK-NEXT: add a0, a0, a1
+; CHECK-NEXT: add sp, sp, a0
+; CHECK-NEXT: addi sp, sp, 16
+; CHECK-NEXT: ret
+ %v = call <vscale x 32 x bfloat> @llvm.vp.fma.nxv32bf16(<vscale x 32 x bfloat> %va, <vscale x 32 x bfloat> %b, <vscale x 32 x bfloat> %c, <vscale x 32 x i1> %m, i32 %evl)
+ ret <vscale x 32 x bfloat> %v
+}
+
+define <vscale x 32 x bfloat> @vfma_vv_nxv32bf16_unmasked(<vscale x 32 x bfloat> %va, <vscale x 32 x bfloat> %b, <vscale x 32 x bfloat> %c, i32 zeroext %evl) {
+; CHECK-LABEL: vfma_vv_nxv32bf16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi sp, sp, -16
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: csrr a2, vlenb
+; CHECK-NEXT: slli a2, a2, 5
+; CHECK-NEXT: sub sp, sp, a2
+; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * vlenb
+; CHECK-NEXT: vmv8r.v v24, v16
+; CHECK-NEXT: vl8re16.v v16, (a0)
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: slli a0, a0, 3
+; CHECK-NEXT: mv a2, a0
+; CHECK-NEXT: slli a0, a0, 1
+; CHECK-NEXT: add a0, a0, a2
+; CHECK-NEXT: add a0, sp, a0
+; CHECK-NEXT: addi a0, a0, 16
+; CHECK-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill
+; CHECK-NEXT: csrr a2, vlenb
+; CHECK-NEXT: slli a0, a2, 1
+; CHECK-NEXT: sub a3, a1, a0
+; CHECK-NEXT: sltu a4, a1, a3
+; CHECK-NEXT: addi a4, a4, -1
+; CHECK-NEXT: and a3, a4, a3
+; CHECK-NEXT: srli a2, a2, 2
+; CHECK-NEXT: vsetvli a4, zero, e8, m4, ta, ma
+; CHECK-NEXT: vmset.m v7
+; CHECK-NEXT: vsetvli a4, zero, e8, mf2, ta, ma
+; CHECK-NEXT: vslidedown.vx v0, v7, a2
+; CHECK-NEXT: csrr a2, vlenb
+; CHECK-NEXT: slli a2, a2, 3
+; CHECK-NEXT: add a2, sp, a2
+; CHECK-NEXT: addi a2, a2, 16
+; CHECK-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill
+; CHECK-NEXT: vsetvli a2, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v12
+; CHECK-NEXT: addi a2, sp, 16
+; CHECK-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill
+; CHECK-NEXT: csrr a2, vlenb
+; CHECK-NEXT: slli a2, a2, 4
+; CHECK-NEXT: add a2, sp, a2
+; CHECK-NEXT: addi a2, a2, 16
+; CHECK-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v28
+; CHECK-NEXT: csrr a2, vlenb
+; CHECK-NEXT: slli a2, a2, 3
+; CHECK-NEXT: mv a4, a2
+; CHECK-NEXT: slli a2, a2, 1
+; CHECK-NEXT: add a2, a2, a4
+; CHECK-NEXT: add a2, sp, a2
+; CHECK-NEXT: addi a2, a2, 16
+; CHECK-NEXT: vl8r.v v24, (a2) # Unknown-size Folded Reload
+; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v28
+; CHECK-NEXT: addi a2, sp, 16
+; CHECK-NEXT: vl8r.v v24, (a2) # Unknown-size Folded Reload
+; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, ma
+; CHECK-NEXT: vfmadd.vv v16, v24, v8, v0.t
+; CHECK-NEXT: vsetvli a2, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v12, v16
+; CHECK-NEXT: bltu a1, a0, .LBB31_2
+; CHECK-NEXT: # %bb.1:
+; CHECK-NEXT: mv a1, a0
+; CHECK-NEXT: .LBB31_2:
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: slli a0, a0, 3
+; CHECK-NEXT: add a0, sp, a0
+; CHECK-NEXT: addi a0, a0, 16
+; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
+; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v16
+; CHECK-NEXT: addi a0, sp, 16
+; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: slli a0, a0, 4
+; CHECK-NEXT: add a0, sp, a0
+; CHECK-NEXT: addi a0, a0, 16
+; CHECK-NEXT: vl8r.v v0, (a0) # Unknown-size Folded Reload
+; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v0
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: slli a0, a0, 3
+; CHECK-NEXT: mv a2, a0
+; CHECK-NEXT: slli a0, a0, 1
+; CHECK-NEXT: add a0, a0, a2
+; CHECK-NEXT: add a0, sp, a0
+; CHECK-NEXT: addi a0, a0, 16
+; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
+; CHECK-NEXT: vfwcvtbf16.f.f.v v0, v16
+; CHECK-NEXT: addi a0, sp, 16
+; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
+; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma
+; CHECK-NEXT: vfmacc.vv v0, v16, v24
+; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v0
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: slli a0, a0, 5
+; CHECK-NEXT: add sp, sp, a0
+; CHECK-NEXT: addi sp, sp, 16
+; CHECK-NEXT: ret
+ %v = call <vscale x 32 x bfloat> @llvm.vp.fma.nxv32bf16(<vscale x 32 x bfloat> %va, <vscale x 32 x bfloat> %b, <vscale x 32 x bfloat> %c, <vscale x 32 x i1> splat (i1 true), i32 %evl)
+ ret <vscale x 32 x bfloat> %v
+}
+
+define <vscale x 32 x bfloat> @vfma_vf_nxv32bf16(<vscale x 32 x bfloat> %va, bfloat %b, <vscale x 32 x bfloat> %vc, <vscale x 32 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vfma_vf_nxv32bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi sp, sp, -16
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: csrr a1, vlenb
+; CHECK-NEXT: slli a1, a1, 1
+; CHECK-NEXT: mv a2, a1
+; CHECK-NEXT: slli a1, a1, 2
+; CHECK-NEXT: add a2, a2, a1
+; CHECK-NEXT: slli a1, a1, 2
+; CHECK-NEXT: add a1, a1, a2
+; CHECK-NEXT: sub sp, sp, a1
+; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x2a, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 42 * vlenb
+; CHECK-NEXT: fmv.x.h a1, fa0
+; CHECK-NEXT: vsetvli a2, zero, e16, m8, ta, ma
+; CHECK-NEXT: vmv.v.x v24, a1
+; CHECK-NEXT: csrr a1, vlenb
+; CHECK-NEXT: slli a2, a1, 5
+; CHECK-NEXT: add a1, a2, a1
+; CHECK-NEXT: add a1, sp, a1
+; CHECK-NEXT: addi a1, a1, 16
+; CHECK-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill
+; CHECK-NEXT: csrr a2, vlenb
+; CHECK-NEXT: slli a1, a2, 1
+; CHECK-NEXT: sub a3, a0, a1
+; CHECK-NEXT: sltu a4, a0, a3
+; CHECK-NEXT: addi a4, a4, -1
+; CHECK-NEXT: and a3, a4, a3
+; CHECK-NEXT: srli a2, a2, 2
+; CHECK-NEXT: csrr a4, vlenb
+; CHECK-NEXT: slli a4, a4, 4
+; CHECK-NEXT: add a4, sp, a4
+; CHECK-NEXT: addi a4, a4, 16
+; CHECK-NEXT: vs1r.v v0, (a4) # Unknown-size Folded Spill
+; CHECK-NEXT: vsetvli a4, zero, e8, mf2, ta, ma
+; CHECK-NEXT: vslidedown.vx v0, v0, a2
+; CHECK-NEXT: csrr a2, vlenb
+; CHECK-NEXT: mv a4, a2
+; CHECK-NEXT: slli a2, a2, 3
+; CHECK-NEXT: add a4, a4, a2
+; CHECK-NEXT: slli a2, a2, 1
+; CHECK-NEXT: add a2, a2, a4
+; CHECK-NEXT: add a2, sp, a2
+; CHECK-NEXT: addi a2, a2, 16
+; CHECK-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill
+; CHECK-NEXT: vsetvli a2, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v20
+; CHECK-NEXT: csrr a2, vlenb
+; CHECK-NEXT: slli a2, a2, 3
+; CHECK-NEXT: add a2, sp, a2
+; CHECK-NEXT: addi a2, a2, 16
+; CHECK-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill
+; CHECK-NEXT: csrr a2, vlenb
+; CHECK-NEXT: slli a4, a2, 4
+; CHECK-NEXT: add a2, a4, a2
+; CHECK-NEXT: add a2, sp, a2
+; CHECK-NEXT: addi a2, a2, 16
+; CHECK-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v12
+; CHECK-NEXT: csrr a2, vlenb
+; CHECK-NEXT: slli a4, a2, 5
+; CHECK-NEXT: add a2, a4, a2
+; CHECK-NEXT: add a2, sp, a2
+; CHECK-NEXT: addi a2, a2, 16
+; CHECK-NEXT: vl8r.v v24, (a2) # Unknown-size Folded Reload
+; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v28
+; CHECK-NEXT: csrr a2, vlenb
+; CHECK-NEXT: slli a2, a2, 3
+; CHECK-NEXT: add a2, sp, a2
+; CHECK-NEXT: addi a2, a2, 16
+; CHECK-NEXT: vl8r.v v24, (a2) # Unknown-size Folded Reload
+; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, ma
+; CHECK-NEXT: vfmadd.vv v8, v16, v24, v0.t
+; CHECK-NEXT: vsetvli a2, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v20, v8
+; CHECK-NEXT: addi a2, sp, 16
+; CHECK-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill
+; CHECK-NEXT: bltu a0, a1, .LBB32_2
+; CHECK-NEXT: # %bb.1:
+; CHECK-NEXT: mv a0, a1
+; CHECK-NEXT: .LBB32_2:
+; CHECK-NEXT: csrr a1, vlenb
+; CHECK-NEXT: mv a2, a1
+; CHECK-NEXT: slli a1, a1, 3
+; CHECK-NEXT: add a2, a2, a1
+; CHECK-NEXT: slli a1, a1, 1
+; CHECK-NEXT: add a1, a1, a2
+; CHECK-NEXT: add a1, sp, a1
+; CHECK-NEXT: addi a1, a1, 16
+; CHECK-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload
+; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v8
+; CHECK-NEXT: csrr a1, vlenb
+; CHECK-NEXT: slli a1, a1, 3
+; CHECK-NEXT: add a1, sp, a1
+; CHECK-NEXT: addi a1, a1, 16
+; CHECK-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill
+; CHECK-NEXT: csrr a1, vlenb
+; CHECK-NEXT: slli a2, a1, 4
+; CHECK-NEXT: add a1, a2, a1
+; CHECK-NEXT: add a1, sp, a1
+; CHECK-NEXT: addi a1, a1, 16
+; CHECK-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8
+; CHECK-NEXT: csrr a1, vlenb
+; CHECK-NEXT: mv a2, a1
+; CHECK-NEXT: slli a1, a1, 3
+; CHECK-NEXT: add a2, a2, a1
+; CHECK-NEXT: slli a1, a1, 1
+; CHECK-NEXT: add a1, a1, a2
+; CHECK-NEXT: add a1, sp, a1
+; CHECK-NEXT: addi a1, a1, 16
+; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill
+; CHECK-NEXT: csrr a1, vlenb
+; CHECK-NEXT: slli a2, a1, 5
+; CHECK-NEXT: add a1, a2, a1
+; CHECK-NEXT: add a1, sp, a1
+; CHECK-NEXT: addi a1, a1, 16
+; CHECK-NEXT: vl8r.v v0, (a1) # Unknown-size Folded Reload
+; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v0
+; CHECK-NEXT: csrr a1, vlenb
+; CHECK-NEXT: slli a1, a1, 4
+; CHECK-NEXT: add a1, sp, a1
+; CHECK-NEXT: addi a1, a1, 16
+; CHECK-NEXT: vl1r.v v0, (a1) # Unknown-size Folded Reload
+; CHECK-NEXT: csrr a1, vlenb
+; CHECK-NEXT: slli a1, a1, 3
+; CHECK-NEXT: add a1, sp, a1
+; CHECK-NEXT: addi a1, a1, 16
+; CHECK-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload
+; CHECK-NEXT: csrr a1, vlenb
+; CHECK-NEXT: mv a2, a1
+; CHECK-NEXT: slli a1, a1, 3
+; CHECK-NEXT: add a2, a2, a1
+; CHECK-NEXT: slli a1, a1, 1
+; CHECK-NEXT: add a1, a1, a2
+; CHECK-NEXT: add a1, sp, a1
+; CHECK-NEXT: addi a1, a1, 16
+; CHECK-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload
+; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma
+; CHECK-NEXT: vfmadd.vv v8, v16, v24, v0.t
+; CHECK-NEXT: vmv.v.v v16, v8
+; CHECK-NEXT: addi a0, sp, 16
+; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
+; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: slli a0, a0, 1
+; CHECK-NEXT: mv a1, a0
+; CHECK-NEXT: slli a0, a0, 2
+; CHECK-NEXT: add a1, a1, a0
+; CHECK-NEXT: slli a0, a0, 2
+; CHECK-NEXT: add a0, a0, a1
+; CHECK-NEXT: add sp, sp, a0
+; CHECK-NEXT: addi sp, sp, 16
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 32 x bfloat> poison, bfloat %b, i32 0
+ %vb = shufflevector <vscale x 32 x bfloat> %elt.head, <vscale x 32 x bfloat> poison, <vscale x 32 x i32> zeroinitializer
+ %v = call <vscale x 32 x bfloat> @llvm.vp.fma.nxv32bf16(<vscale x 32 x bfloat> %va, <vscale x 32 x bfloat> %vb, <vscale x 32 x bfloat> %vc, <vscale x 32 x i1> %m, i32 %evl)
+ ret <vscale x 32 x bfloat> %v
+}
+
+define <vscale x 32 x bfloat> @vfma_vf_nxv32bf16_commute(<vscale x 32 x bfloat> %va, bfloat %b, <vscale x 32 x bfloat> %vc, <vscale x 32 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vfma_vf_nxv32bf16_commute:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi sp, sp, -16
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: csrr a1, vlenb
+; CHECK-NEXT: slli a1, a1, 1
+; CHECK-NEXT: mv a2, a1
+; CHECK-NEXT: slli a1, a1, 2
+; CHECK-NEXT: add a2, a2, a1
+; CHECK-NEXT: slli a1, a1, 2
+; CHECK-NEXT: add a1, a1, a2
+; CHECK-NEXT: sub sp, sp, a1
+; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x2a, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 42 * vlenb
+; CHECK-NEXT: fmv.x.h a1, fa0
+; CHECK-NEXT: vsetvli a2, zero, e16, m8, ta, ma
+; CHECK-NEXT: vmv.v.x v24, a1
+; CHECK-NEXT: csrr a1, vlenb
+; CHECK-NEXT: slli a2, a1, 5
+; CHECK-NEXT: add a1, a2, a1
+; CHECK-NEXT: add a1, sp, a1
+; CHECK-NEXT: addi a1, a1, 16
+; CHECK-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill
+; CHECK-NEXT: csrr a2, vlenb
+; CHECK-NEXT: slli a1, a2, 1
+; CHECK-NEXT: sub a3, a0, a1
+; CHECK-NEXT: sltu a4, a0, a3
+; CHECK-NEXT: addi a4, a4, -1
+; CHECK-NEXT: and a3, a4, a3
+; CHECK-NEXT: srli a2, a2, 2
+; CHECK-NEXT: csrr a4, vlenb
+; CHECK-NEXT: slli a4, a4, 4
+; CHECK-NEXT: add a4, sp, a4
+; CHECK-NEXT: addi a4, a4, 16
+; CHECK-NEXT: vs1r.v v0, (a4) # Unknown-size Folded Spill
+; CHECK-NEXT: vsetvli a4, zero, e8, mf2, ta, ma
+; CHECK-NEXT: vslidedown.vx v0, v0, a2
+; CHECK-NEXT: csrr a2, vlenb
+; CHECK-NEXT: mv a4, a2
+; CHECK-NEXT: slli a2, a2, 3
+; CHECK-NEXT: add a4, a4, a2
+; CHECK-NEXT: slli a2, a2, 1
+; CHECK-NEXT: add a2, a2, a4
+; CHECK-NEXT: add a2, sp, a2
+; CHECK-NEXT: addi a2, a2, 16
+; CHECK-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill
+; CHECK-NEXT: vsetvli a2, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v20
+; CHECK-NEXT: csrr a2, vlenb
+; CHECK-NEXT: slli a2, a2, 3
+; CHECK-NEXT: add a2, sp, a2
+; CHECK-NEXT: addi a2, a2, 16
+; CHECK-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill
+; CHECK-NEXT: csrr a2, vlenb
+; CHECK-NEXT: slli a4, a2, 4
+; CHECK-NEXT: add a2, a4, a2
+; CHECK-NEXT: add a2, sp, a2
+; CHECK-NEXT: addi a2, a2, 16
+; CHECK-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v12
+; CHECK-NEXT: csrr a2, vlenb
+; CHECK-NEXT: slli a4, a2, 5
+; CHECK-NEXT: add a2, a4, a2
+; CHECK-NEXT: add a2, sp, a2
+; CHECK-NEXT: addi a2, a2, 16
+; CHECK-NEXT: vl8r.v v24, (a2) # Unknown-size Folded Reload
+; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v28
+; CHECK-NEXT: csrr a2, vlenb
+; CHECK-NEXT: slli a2, a2, 3
+; CHECK-NEXT: add a2, sp, a2
+; CHECK-NEXT: addi a2, a2, 16
+; CHECK-NEXT: vl8r.v v24, (a2) # Unknown-size Folded Reload
+; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, ma
+; CHECK-NEXT: vfmadd.vv v16, v8, v24, v0.t
+; CHECK-NEXT: vsetvli a2, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v12, v16
+; CHECK-NEXT: addi a2, sp, 16
+; CHECK-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill
+; CHECK-NEXT: bltu a0, a1, .LBB33_2
+; CHECK-NEXT: # %bb.1:
+; CHECK-NEXT: mv a0, a1
+; CHECK-NEXT: .LBB33_2:
+; CHECK-NEXT: csrr a1, vlenb
+; CHECK-NEXT: mv a2, a1
+; CHECK-NEXT: slli a1, a1, 3
+; CHECK-NEXT: add a2, a2, a1
+; CHECK-NEXT: slli a1, a1, 1
+; CHECK-NEXT: add a1, a1, a2
+; CHECK-NEXT: add a1, sp, a1
+; CHECK-NEXT: addi a1, a1, 16
+; CHECK-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload
+; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v16
+; CHECK-NEXT: csrr a1, vlenb
+; CHECK-NEXT: slli a1, a1, 3
+; CHECK-NEXT: add a1, sp, a1
+; CHECK-NEXT: addi a1, a1, 16
+; CHECK-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill
+; CHECK-NEXT: csrr a1, vlenb
+; CHECK-NEXT: slli a2, a1, 4
+; CHECK-NEXT: add a1, a2, a1
+; CHECK-NEXT: add a1, sp, a1
+; CHECK-NEXT: addi a1, a1, 16
+; CHECK-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload
+; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v16
+; CHECK-NEXT: csrr a1, vlenb
+; CHECK-NEXT: slli a2, a1, 5
+; CHECK-NEXT: add a1, a2, a1
+; CHECK-NEXT: add a1, sp, a1
+; CHECK-NEXT: addi a1, a1, 16
+; CHECK-NEXT: vl8r.v v0, (a1) # Unknown-size Folded Reload
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v0
+; CHECK-NEXT: csrr a1, vlenb
+; CHECK-NEXT: mv a2, a1
+; CHECK-NEXT: slli a1, a1, 3
+; CHECK-NEXT: add a2, a2, a1
+; CHECK-NEXT: slli a1, a1, 1
+; CHECK-NEXT: add a1, a1, a2
+; CHECK-NEXT: add a1, sp, a1
+; CHECK-NEXT: addi a1, a1, 16
+; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill
+; CHECK-NEXT: csrr a1, vlenb
+; CHECK-NEXT: slli a1, a1, 4
+; CHECK-NEXT: add a1, sp, a1
+; CHECK-NEXT: addi a1, a1, 16
+; CHECK-NEXT: vl1r.v v0, (a1) # Unknown-size Folded Reload
+; CHECK-NEXT: csrr a1, vlenb
+; CHECK-NEXT: slli a1, a1, 3
+; CHECK-NEXT: add a1, sp, a1
+; CHECK-NEXT: addi a1, a1, 16
+; CHECK-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload
+; CHECK-NEXT: csrr a1, vlenb
+; CHECK-NEXT: mv a2, a1
+; CHECK-NEXT: slli a1, a1, 3
+; CHECK-NEXT: add a2, a2, a1
+; CHECK-NEXT: slli a1, a1, 1
+; CHECK-NEXT: add a1, a1, a2
+; CHECK-NEXT: add a1, sp, a1
+; CHECK-NEXT: addi a1, a1, 16
+; CHECK-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload
+; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma
+; CHECK-NEXT: vfmadd.vv v8, v24, v16, v0.t
+; CHECK-NEXT: vmv.v.v v16, v8
+; CHECK-NEXT: addi a0, sp, 16
+; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
+; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: slli a0, a0, 1
+; CHECK-NEXT: mv a1, a0
+; CHECK-NEXT: slli a0, a0, 2
+; CHECK-NEXT: add a1, a1, a0
+; CHECK-NEXT: slli a0, a0, 2
+; CHECK-NEXT: add a0, a0, a1
+; CHECK-NEXT: add sp, sp, a0
+; CHECK-NEXT: addi sp, sp, 16
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 32 x bfloat> poison, bfloat %b, i32 0
+ %vb = shufflevector <vscale x 32 x bfloat> %elt.head, <vscale x 32 x bfloat> poison, <vscale x 32 x i32> zeroinitializer
+ %v = call <vscale x 32 x bfloat> @llvm.vp.fma.nxv32bf16(<vscale x 32 x bfloat> %vb, <vscale x 32 x bfloat> %va, <vscale x 32 x bfloat> %vc, <vscale x 32 x i1> %m, i32 %evl)
+ ret <vscale x 32 x bfloat> %v
+}
+
+define <vscale x 32 x bfloat> @vfma_vf_nxv32bf16_unmasked(<vscale x 32 x bfloat> %va, bfloat %b, <vscale x 32 x bfloat> %vc, i32 zeroext %evl) {
+; CHECK-LABEL: vfma_vf_nxv32bf16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi sp, sp, -16
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: csrr a1, vlenb
+; CHECK-NEXT: slli a1, a1, 5
+; CHECK-NEXT: sub sp, sp, a1
+; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * vlenb
+; CHECK-NEXT: fmv.x.h a1, fa0
+; CHECK-NEXT: vsetvli a2, zero, e16, m8, ta, ma
+; CHECK-NEXT: vmv.v.x v24, a1
+; CHECK-NEXT: csrr a1, vlenb
+; CHECK-NEXT: slli a1, a1, 3
+; CHECK-NEXT: mv a2, a1
+; CHECK-NEXT: slli a1, a1, 1
+; CHECK-NEXT: add a1, a1, a2
+; CHECK-NEXT: add a1, sp, a1
+; CHECK-NEXT: addi a1, a1, 16
+; CHECK-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill
+; CHECK-NEXT: csrr a2, vlenb
+; CHECK-NEXT: slli a1, a2, 1
+; CHECK-NEXT: sub a3, a0, a1
+; CHECK-NEXT: sltu a4, a0, a3
+; CHECK-NEXT: addi a4, a4, -1
+; CHECK-NEXT: and a3, a4, a3
+; CHECK-NEXT: srli a2, a2, 2
+; CHECK-NEXT: vmset.m v7
+; CHECK-NEXT: vsetvli a4, zero, e8, mf2, ta, ma
+; CHECK-NEXT: vslidedown.vx v0, v7, a2
+; CHECK-NEXT: csrr a2, vlenb
+; CHECK-NEXT: slli a2, a2, 4
+; CHECK-NEXT: add a2, sp, a2
+; CHECK-NEXT: addi a2, a2, 16
+; CHECK-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill
+; CHECK-NEXT: vsetvli a2, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v20
+; CHECK-NEXT: addi a2, sp, 16
+; CHECK-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill
+; CHECK-NEXT: csrr a2, vlenb
+; CHECK-NEXT: slli a2, a2, 3
+; CHECK-NEXT: add a2, sp, a2
+; CHECK-NEXT: addi a2, a2, 16
+; CHECK-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v12
+; CHECK-NEXT: csrr a2, vlenb
+; CHECK-NEXT: slli a2, a2, 3
+; CHECK-NEXT: mv a4, a2
+; CHECK-NEXT: slli a2, a2, 1
+; CHECK-NEXT: add a2, a2, a4
+; CHECK-NEXT: add a2, sp, a2
+; CHECK-NEXT: addi a2, a2, 16
+; CHECK-NEXT: vl8r.v v24, (a2) # Unknown-size Folded Reload
+; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v28
+; CHECK-NEXT: addi a2, sp, 16
+; CHECK-NEXT: vl8r.v v24, (a2) # Unknown-size Folded Reload
+; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, ma
+; CHECK-NEXT: vfmadd.vv v8, v16, v24, v0.t
+; CHECK-NEXT: vsetvli a2, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v20, v8
+; CHECK-NEXT: bltu a0, a1, .LBB34_2
+; CHECK-NEXT: # %bb.1:
+; CHECK-NEXT: mv a0, a1
+; CHECK-NEXT: .LBB34_2:
+; CHECK-NEXT: csrr a1, vlenb
+; CHECK-NEXT: slli a1, a1, 4
+; CHECK-NEXT: add a1, sp, a1
+; CHECK-NEXT: addi a1, a1, 16
+; CHECK-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload
+; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v24
+; CHECK-NEXT: addi a1, sp, 16
+; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill
+; CHECK-NEXT: csrr a1, vlenb
+; CHECK-NEXT: slli a1, a1, 3
+; CHECK-NEXT: add a1, sp, a1
+; CHECK-NEXT: addi a1, a1, 16
+; CHECK-NEXT: vl8r.v v0, (a1) # Unknown-size Folded Reload
+; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v0
+; CHECK-NEXT: csrr a1, vlenb
+; CHECK-NEXT: slli a1, a1, 3
+; CHECK-NEXT: mv a2, a1
+; CHECK-NEXT: slli a1, a1, 1
+; CHECK-NEXT: add a1, a1, a2
+; CHECK-NEXT: add a1, sp, a1
+; CHECK-NEXT: addi a1, a1, 16
+; CHECK-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload
+; CHECK-NEXT: vfwcvtbf16.f.f.v v0, v8
+; CHECK-NEXT: addi a1, sp, 16
+; CHECK-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload
+; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma
+; CHECK-NEXT: vfmadd.vv v0, v24, v8
+; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v16, v0
+; CHECK-NEXT: vmv8r.v v8, v16
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: slli a0, a0, 5
+; CHECK-NEXT: add sp, sp, a0
+; CHECK-NEXT: addi sp, sp, 16
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 32 x bfloat> poison, bfloat %b, i32 0
+ %vb = shufflevector <vscale x 32 x bfloat> %elt.head, <vscale x 32 x bfloat> poison, <vscale x 32 x i32> zeroinitializer
+ %v = call <vscale x 32 x bfloat> @llvm.vp.fma.nxv32bf16(<vscale x 32 x bfloat> %va, <vscale x 32 x bfloat> %vb, <vscale x 32 x bfloat> %vc, <vscale x 32 x i1> splat (i1 true), i32 %evl)
+ ret <vscale x 32 x bfloat> %v
+}
+
+define <vscale x 32 x bfloat> @vfma_vf_nxv32bf16_unmasked_commute(<vscale x 32 x bfloat> %va, bfloat %b, <vscale x 32 x bfloat> %vc, i32 zeroext %evl) {
+; CHECK-LABEL: vfma_vf_nxv32bf16_unmasked_commute:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi sp, sp, -16
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: csrr a1, vlenb
+; CHECK-NEXT: slli a1, a1, 5
+; CHECK-NEXT: sub sp, sp, a1
+; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * vlenb
+; CHECK-NEXT: fmv.x.h a1, fa0
+; CHECK-NEXT: vsetvli a2, zero, e16, m8, ta, ma
+; CHECK-NEXT: vmv.v.x v24, a1
+; CHECK-NEXT: csrr a1, vlenb
+; CHECK-NEXT: slli a1, a1, 3
+; CHECK-NEXT: mv a2, a1
+; CHECK-NEXT: slli a1, a1, 1
+; CHECK-NEXT: add a1, a1, a2
+; CHECK-NEXT: add a1, sp, a1
+; CHECK-NEXT: addi a1, a1, 16
+; CHECK-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill
+; CHECK-NEXT: csrr a2, vlenb
+; CHECK-NEXT: slli a1, a2, 1
+; CHECK-NEXT: sub a3, a0, a1
+; CHECK-NEXT: sltu a4, a0, a3
+; CHECK-NEXT: addi a4, a4, -1
+; CHECK-NEXT: and a3, a4, a3
+; CHECK-NEXT: srli a2, a2, 2
+; CHECK-NEXT: vmset.m v7
+; CHECK-NEXT: vsetvli a4, zero, e8, mf2, ta, ma
+; CHECK-NEXT: vslidedown.vx v0, v7, a2
+; CHECK-NEXT: csrr a2, vlenb
+; CHECK-NEXT: slli a2, a2, 4
+; CHECK-NEXT: add a2, sp, a2
+; CHECK-NEXT: addi a2, a2, 16
+; CHECK-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill
+; CHECK-NEXT: vsetvli a2, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v20
+; CHECK-NEXT: addi a2, sp, 16
+; CHECK-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill
+; CHECK-NEXT: csrr a2, vlenb
+; CHECK-NEXT: slli a2, a2, 3
+; CHECK-NEXT: add a2, sp, a2
+; CHECK-NEXT: addi a2, a2, 16
+; CHECK-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v12
+; CHECK-NEXT: csrr a2, vlenb
+; CHECK-NEXT: slli a2, a2, 3
+; CHECK-NEXT: mv a4, a2
+; CHECK-NEXT: slli a2, a2, 1
+; CHECK-NEXT: add a2, a2, a4
+; CHECK-NEXT: add a2, sp, a2
+; CHECK-NEXT: addi a2, a2, 16
+; CHECK-NEXT: vl8r.v v24, (a2) # Unknown-size Folded Reload
+; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v28
+; CHECK-NEXT: addi a2, sp, 16
+; CHECK-NEXT: vl8r.v v24, (a2) # Unknown-size Folded Reload
+; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, ma
+; CHECK-NEXT: vfmadd.vv v16, v8, v24, v0.t
+; CHECK-NEXT: vsetvli a2, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v12, v16
+; CHECK-NEXT: bltu a0, a1, .LBB35_2
+; CHECK-NEXT: # %bb.1:
+; CHECK-NEXT: mv a0, a1
+; CHECK-NEXT: .LBB35_2:
+; CHECK-NEXT: csrr a1, vlenb
+; CHECK-NEXT: slli a1, a1, 4
+; CHECK-NEXT: add a1, sp, a1
+; CHECK-NEXT: addi a1, a1, 16
+; CHECK-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v24
+; CHECK-NEXT: addi a1, sp, 16
+; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill
+; CHECK-NEXT: csrr a1, vlenb
+; CHECK-NEXT: slli a1, a1, 3
+; CHECK-NEXT: add a1, sp, a1
+; CHECK-NEXT: addi a1, a1, 16
+; CHECK-NEXT: vl8r.v v0, (a1) # Unknown-size Folded Reload
+; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v0
+; CHECK-NEXT: csrr a1, vlenb
+; CHECK-NEXT: slli a1, a1, 3
+; CHECK-NEXT: mv a2, a1
+; CHECK-NEXT: slli a1, a1, 1
+; CHECK-NEXT: add a1, a1, a2
+; CHECK-NEXT: add a1, sp, a1
+; CHECK-NEXT: addi a1, a1, 16
+; CHECK-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload
+; CHECK-NEXT: vfwcvtbf16.f.f.v v0, v16
+; CHECK-NEXT: addi a1, sp, 16
+; CHECK-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload
+; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma
+; CHECK-NEXT: vfmadd.vv v0, v24, v16
+; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v0
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: slli a0, a0, 5
+; CHECK-NEXT: add sp, sp, a0
+; CHECK-NEXT: addi sp, sp, 16
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 32 x bfloat> poison, bfloat %b, i32 0
+ %vb = shufflevector <vscale x 32 x bfloat> %elt.head, <vscale x 32 x bfloat> poison, <vscale x 32 x i32> zeroinitializer
+ %v = call <vscale x 32 x bfloat> @llvm.vp.fma.nxv32bf16(<vscale x 32 x bfloat> %vb, <vscale x 32 x bfloat> %va, <vscale x 32 x bfloat> %vc, <vscale x 32 x i1> splat (i1 true), i32 %evl)
+ ret <vscale x 32 x bfloat> %v
+}
+
declare <vscale x 1 x half> @llvm.vp.fma.nxv1f16(<vscale x 1 x half>, <vscale x 1 x half>, <vscale x 1 x half>, <vscale x 1 x i1>, i32)
define <vscale x 1 x half> @vfma_vv_nxv1f16(<vscale x 1 x half> %va, <vscale x 1 x half> %b, <vscale x 1 x half> %c, <vscale x 1 x i1> %m, i32 zeroext %evl) {
@@ -833,8 +2247,12 @@ define <vscale x 32 x half> @vfma_vv_nxv32f16(<vscale x 32 x half> %va, <vscale
; ZVFHMIN-NEXT: addi sp, sp, -16
; ZVFHMIN-NEXT: .cfi_def_cfa_offset 16
; ZVFHMIN-NEXT: csrr a2, vlenb
-; ZVFHMIN-NEXT: li a3, 42
-; ZVFHMIN-NEXT: mul a2, a2, a3
+; ZVFHMIN-NEXT: slli a2, a2, 1
+; ZVFHMIN-NEXT: mv a3, a2
+; ZVFHMIN-NEXT: slli a2, a2, 2
+; ZVFHMIN-NEXT: add a3, a3, a2
+; ZVFHMIN-NEXT: slli a2, a2, 2
+; ZVFHMIN-NEXT: add a2, a2, a3
; ZVFHMIN-NEXT: sub sp, sp, a2
; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x2a, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 42 * vlenb
; ZVFHMIN-NEXT: vmv1r.v v24, v0
@@ -856,8 +2274,11 @@ define <vscale x 32 x half> @vfma_vv_nxv32f16(<vscale x 32 x half> %va, <vscale
; ZVFHMIN-NEXT: addi a2, sp, 16
; ZVFHMIN-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill
; ZVFHMIN-NEXT: csrr a2, vlenb
-; ZVFHMIN-NEXT: li a4, 25
-; ZVFHMIN-NEXT: mul a2, a2, a4
+; ZVFHMIN-NEXT: mv a4, a2
+; ZVFHMIN-NEXT: slli a2, a2, 3
+; ZVFHMIN-NEXT: add a4, a4, a2
+; ZVFHMIN-NEXT: slli a2, a2, 1
+; ZVFHMIN-NEXT: add a2, a2, a4
; ZVFHMIN-NEXT: add a2, sp, a2
; ZVFHMIN-NEXT: addi a2, a2, 16
; ZVFHMIN-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill
@@ -897,13 +2318,16 @@ define <vscale x 32 x half> @vfma_vv_nxv32f16(<vscale x 32 x half> %va, <vscale
; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v16
; ZVFHMIN-NEXT: addi a2, sp, 16
; ZVFHMIN-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill
-; ZVFHMIN-NEXT: bltu a1, a0, .LBB30_2
+; ZVFHMIN-NEXT: bltu a1, a0, .LBB66_2
; ZVFHMIN-NEXT: # %bb.1:
; ZVFHMIN-NEXT: mv a1, a0
-; ZVFHMIN-NEXT: .LBB30_2:
+; ZVFHMIN-NEXT: .LBB66_2:
; ZVFHMIN-NEXT: csrr a0, vlenb
-; ZVFHMIN-NEXT: li a2, 25
-; ZVFHMIN-NEXT: mul a0, a0, a2
+; ZVFHMIN-NEXT: mv a2, a0
+; ZVFHMIN-NEXT: slli a0, a0, 3
+; ZVFHMIN-NEXT: add a2, a2, a0
+; ZVFHMIN-NEXT: slli a0, a0, 1
+; ZVFHMIN-NEXT: add a0, a0, a2
; ZVFHMIN-NEXT: add a0, sp, a0
; ZVFHMIN-NEXT: addi a0, a0, 16
; ZVFHMIN-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
@@ -957,8 +2381,12 @@ define <vscale x 32 x half> @vfma_vv_nxv32f16(<vscale x 32 x half> %va, <vscale
; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16
; ZVFHMIN-NEXT: csrr a0, vlenb
-; ZVFHMIN-NEXT: li a1, 42
-; ZVFHMIN-NEXT: mul a0, a0, a1
+; ZVFHMIN-NEXT: slli a0, a0, 1
+; ZVFHMIN-NEXT: mv a1, a0
+; ZVFHMIN-NEXT: slli a0, a0, 2
+; ZVFHMIN-NEXT: add a1, a1, a0
+; ZVFHMIN-NEXT: slli a0, a0, 2
+; ZVFHMIN-NEXT: add a0, a0, a1
; ZVFHMIN-NEXT: add sp, sp, a0
; ZVFHMIN-NEXT: addi sp, sp, 16
; ZVFHMIN-NEXT: ret
@@ -985,8 +2413,10 @@ define <vscale x 32 x half> @vfma_vv_nxv32f16_unmasked(<vscale x 32 x half> %va,
; ZVFHMIN-NEXT: vmv8r.v v24, v16
; ZVFHMIN-NEXT: vl8re16.v v16, (a0)
; ZVFHMIN-NEXT: csrr a0, vlenb
-; ZVFHMIN-NEXT: li a2, 24
-; ZVFHMIN-NEXT: mul a0, a0, a2
+; ZVFHMIN-NEXT: slli a0, a0, 3
+; ZVFHMIN-NEXT: mv a2, a0
+; ZVFHMIN-NEXT: slli a0, a0, 1
+; ZVFHMIN-NEXT: add a0, a0, a2
; ZVFHMIN-NEXT: add a0, sp, a0
; ZVFHMIN-NEXT: addi a0, a0, 16
; ZVFHMIN-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill
@@ -1017,8 +2447,10 @@ define <vscale x 32 x half> @vfma_vv_nxv32f16_unmasked(<vscale x 32 x half> %va,
; ZVFHMIN-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill
; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v28
; ZVFHMIN-NEXT: csrr a2, vlenb
-; ZVFHMIN-NEXT: li a4, 24
-; ZVFHMIN-NEXT: mul a2, a2, a4
+; ZVFHMIN-NEXT: slli a2, a2, 3
+; ZVFHMIN-NEXT: mv a4, a2
+; ZVFHMIN-NEXT: slli a2, a2, 1
+; ZVFHMIN-NEXT: add a2, a2, a4
; ZVFHMIN-NEXT: add a2, sp, a2
; ZVFHMIN-NEXT: addi a2, a2, 16
; ZVFHMIN-NEXT: vl8r.v v24, (a2) # Unknown-size Folded Reload
@@ -1029,10 +2461,10 @@ define <vscale x 32 x half> @vfma_vv_nxv32f16_unmasked(<vscale x 32 x half> %va,
; ZVFHMIN-NEXT: vfmadd.vv v16, v24, v8, v0.t
; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v16
-; ZVFHMIN-NEXT: bltu a1, a0, .LBB31_2
+; ZVFHMIN-NEXT: bltu a1, a0, .LBB67_2
; ZVFHMIN-NEXT: # %bb.1:
; ZVFHMIN-NEXT: mv a1, a0
-; ZVFHMIN-NEXT: .LBB31_2:
+; ZVFHMIN-NEXT: .LBB67_2:
; ZVFHMIN-NEXT: csrr a0, vlenb
; ZVFHMIN-NEXT: slli a0, a0, 3
; ZVFHMIN-NEXT: add a0, sp, a0
@@ -1048,8 +2480,10 @@ define <vscale x 32 x half> @vfma_vv_nxv32f16_unmasked(<vscale x 32 x half> %va,
; ZVFHMIN-NEXT: vl8r.v v0, (a0) # Unknown-size Folded Reload
; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v0
; ZVFHMIN-NEXT: csrr a0, vlenb
-; ZVFHMIN-NEXT: li a2, 24
-; ZVFHMIN-NEXT: mul a0, a0, a2
+; ZVFHMIN-NEXT: slli a0, a0, 3
+; ZVFHMIN-NEXT: mv a2, a0
+; ZVFHMIN-NEXT: slli a0, a0, 1
+; ZVFHMIN-NEXT: add a0, a0, a2
; ZVFHMIN-NEXT: add a0, sp, a0
; ZVFHMIN-NEXT: addi a0, a0, 16
; ZVFHMIN-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
@@ -1081,8 +2515,12 @@ define <vscale x 32 x half> @vfma_vf_nxv32f16(<vscale x 32 x half> %va, half %b,
; ZVFHMIN-NEXT: addi sp, sp, -16
; ZVFHMIN-NEXT: .cfi_def_cfa_offset 16
; ZVFHMIN-NEXT: csrr a1, vlenb
-; ZVFHMIN-NEXT: li a2, 42
-; ZVFHMIN-NEXT: mul a1, a1, a2
+; ZVFHMIN-NEXT: slli a1, a1, 1
+; ZVFHMIN-NEXT: mv a2, a1
+; ZVFHMIN-NEXT: slli a1, a1, 2
+; ZVFHMIN-NEXT: add a2, a2, a1
+; ZVFHMIN-NEXT: slli a1, a1, 2
+; ZVFHMIN-NEXT: add a1, a1, a2
; ZVFHMIN-NEXT: sub sp, sp, a1
; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x2a, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 42 * vlenb
; ZVFHMIN-NEXT: fmv.x.h a1, fa0
@@ -1109,8 +2547,11 @@ define <vscale x 32 x half> @vfma_vf_nxv32f16(<vscale x 32 x half> %va, half %b,
; ZVFHMIN-NEXT: vsetvli a4, zero, e8, mf2, ta, ma
; ZVFHMIN-NEXT: vslidedown.vx v0, v0, a2
; ZVFHMIN-NEXT: csrr a2, vlenb
-; ZVFHMIN-NEXT: li a4, 25
-; ZVFHMIN-NEXT: mul a2, a2, a4
+; ZVFHMIN-NEXT: mv a4, a2
+; ZVFHMIN-NEXT: slli a2, a2, 3
+; ZVFHMIN-NEXT: add a4, a4, a2
+; ZVFHMIN-NEXT: slli a2, a2, 1
+; ZVFHMIN-NEXT: add a2, a2, a4
; ZVFHMIN-NEXT: add a2, sp, a2
; ZVFHMIN-NEXT: addi a2, a2, 16
; ZVFHMIN-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill
@@ -1146,13 +2587,16 @@ define <vscale x 32 x half> @vfma_vf_nxv32f16(<vscale x 32 x half> %va, half %b,
; ZVFHMIN-NEXT: vfncvt.f.f.w v20, v8
; ZVFHMIN-NEXT: addi a2, sp, 16
; ZVFHMIN-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill
-; ZVFHMIN-NEXT: bltu a0, a1, .LBB32_2
+; ZVFHMIN-NEXT: bltu a0, a1, .LBB68_2
; ZVFHMIN-NEXT: # %bb.1:
; ZVFHMIN-NEXT: mv a0, a1
-; ZVFHMIN-NEXT: .LBB32_2:
+; ZVFHMIN-NEXT: .LBB68_2:
; ZVFHMIN-NEXT: csrr a1, vlenb
-; ZVFHMIN-NEXT: li a2, 25
-; ZVFHMIN-NEXT: mul a1, a1, a2
+; ZVFHMIN-NEXT: mv a2, a1
+; ZVFHMIN-NEXT: slli a1, a1, 3
+; ZVFHMIN-NEXT: add a2, a2, a1
+; ZVFHMIN-NEXT: slli a1, a1, 1
+; ZVFHMIN-NEXT: add a1, a1, a2
; ZVFHMIN-NEXT: add a1, sp, a1
; ZVFHMIN-NEXT: addi a1, a1, 16
; ZVFHMIN-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload
@@ -1170,8 +2614,11 @@ define <vscale x 32 x half> @vfma_vf_nxv32f16(<vscale x 32 x half> %va, half %b,
; ZVFHMIN-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload
; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8
; ZVFHMIN-NEXT: csrr a1, vlenb
-; ZVFHMIN-NEXT: li a2, 25
-; ZVFHMIN-NEXT: mul a1, a1, a2
+; ZVFHMIN-NEXT: mv a2, a1
+; ZVFHMIN-NEXT: slli a1, a1, 3
+; ZVFHMIN-NEXT: add a2, a2, a1
+; ZVFHMIN-NEXT: slli a1, a1, 1
+; ZVFHMIN-NEXT: add a1, a1, a2
; ZVFHMIN-NEXT: add a1, sp, a1
; ZVFHMIN-NEXT: addi a1, a1, 16
; ZVFHMIN-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill
@@ -1193,8 +2640,11 @@ define <vscale x 32 x half> @vfma_vf_nxv32f16(<vscale x 32 x half> %va, half %b,
; ZVFHMIN-NEXT: addi a1, a1, 16
; ZVFHMIN-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload
; ZVFHMIN-NEXT: csrr a1, vlenb
-; ZVFHMIN-NEXT: li a2, 25
-; ZVFHMIN-NEXT: mul a1, a1, a2
+; ZVFHMIN-NEXT: mv a2, a1
+; ZVFHMIN-NEXT: slli a1, a1, 3
+; ZVFHMIN-NEXT: add a2, a2, a1
+; ZVFHMIN-NEXT: slli a1, a1, 1
+; ZVFHMIN-NEXT: add a1, a1, a2
; ZVFHMIN-NEXT: add a1, sp, a1
; ZVFHMIN-NEXT: addi a1, a1, 16
; ZVFHMIN-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload
@@ -1206,8 +2656,12 @@ define <vscale x 32 x half> @vfma_vf_nxv32f16(<vscale x 32 x half> %va, half %b,
; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16
; ZVFHMIN-NEXT: csrr a0, vlenb
-; ZVFHMIN-NEXT: li a1, 42
-; ZVFHMIN-NEXT: mul a0, a0, a1
+; ZVFHMIN-NEXT: slli a0, a0, 1
+; ZVFHMIN-NEXT: mv a1, a0
+; ZVFHMIN-NEXT: slli a0, a0, 2
+; ZVFHMIN-NEXT: add a1, a1, a0
+; ZVFHMIN-NEXT: slli a0, a0, 2
+; ZVFHMIN-NEXT: add a0, a0, a1
; ZVFHMIN-NEXT: add sp, sp, a0
; ZVFHMIN-NEXT: addi sp, sp, 16
; ZVFHMIN-NEXT: ret
@@ -1229,8 +2683,12 @@ define <vscale x 32 x half> @vfma_vf_nxv32f16_commute(<vscale x 32 x half> %va,
; ZVFHMIN-NEXT: addi sp, sp, -16
; ZVFHMIN-NEXT: .cfi_def_cfa_offset 16
; ZVFHMIN-NEXT: csrr a1, vlenb
-; ZVFHMIN-NEXT: li a2, 42
-; ZVFHMIN-NEXT: mul a1, a1, a2
+; ZVFHMIN-NEXT: slli a1, a1, 1
+; ZVFHMIN-NEXT: mv a2, a1
+; ZVFHMIN-NEXT: slli a1, a1, 2
+; ZVFHMIN-NEXT: add a2, a2, a1
+; ZVFHMIN-NEXT: slli a1, a1, 2
+; ZVFHMIN-NEXT: add a1, a1, a2
; ZVFHMIN-NEXT: sub sp, sp, a1
; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x2a, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 42 * vlenb
; ZVFHMIN-NEXT: fmv.x.h a1, fa0
@@ -1257,8 +2715,11 @@ define <vscale x 32 x half> @vfma_vf_nxv32f16_commute(<vscale x 32 x half> %va,
; ZVFHMIN-NEXT: vsetvli a4, zero, e8, mf2, ta, ma
; ZVFHMIN-NEXT: vslidedown.vx v0, v0, a2
; ZVFHMIN-NEXT: csrr a2, vlenb
-; ZVFHMIN-NEXT: li a4, 25
-; ZVFHMIN-NEXT: mul a2, a2, a4
+; ZVFHMIN-NEXT: mv a4, a2
+; ZVFHMIN-NEXT: slli a2, a2, 3
+; ZVFHMIN-NEXT: add a4, a4, a2
+; ZVFHMIN-NEXT: slli a2, a2, 1
+; ZVFHMIN-NEXT: add a2, a2, a4
; ZVFHMIN-NEXT: add a2, sp, a2
; ZVFHMIN-NEXT: addi a2, a2, 16
; ZVFHMIN-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill
@@ -1294,13 +2755,16 @@ define <vscale x 32 x half> @vfma_vf_nxv32f16_commute(<vscale x 32 x half> %va,
; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v16
; ZVFHMIN-NEXT: addi a2, sp, 16
; ZVFHMIN-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill
-; ZVFHMIN-NEXT: bltu a0, a1, .LBB33_2
+; ZVFHMIN-NEXT: bltu a0, a1, .LBB69_2
; ZVFHMIN-NEXT: # %bb.1:
; ZVFHMIN-NEXT: mv a0, a1
-; ZVFHMIN-NEXT: .LBB33_2:
+; ZVFHMIN-NEXT: .LBB69_2:
; ZVFHMIN-NEXT: csrr a1, vlenb
-; ZVFHMIN-NEXT: li a2, 25
-; ZVFHMIN-NEXT: mul a1, a1, a2
+; ZVFHMIN-NEXT: mv a2, a1
+; ZVFHMIN-NEXT: slli a1, a1, 3
+; ZVFHMIN-NEXT: add a2, a2, a1
+; ZVFHMIN-NEXT: slli a1, a1, 1
+; ZVFHMIN-NEXT: add a1, a1, a2
; ZVFHMIN-NEXT: add a1, sp, a1
; ZVFHMIN-NEXT: addi a1, a1, 16
; ZVFHMIN-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload
@@ -1325,8 +2789,11 @@ define <vscale x 32 x half> @vfma_vf_nxv32f16_commute(<vscale x 32 x half> %va,
; ZVFHMIN-NEXT: vl8r.v v0, (a1) # Unknown-size Folded Reload
; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v0
; ZVFHMIN-NEXT: csrr a1, vlenb
-; ZVFHMIN-NEXT: li a2, 25
-; ZVFHMIN-NEXT: mul a1, a1, a2
+; ZVFHMIN-NEXT: mv a2, a1
+; ZVFHMIN-NEXT: slli a1, a1, 3
+; ZVFHMIN-NEXT: add a2, a2, a1
+; ZVFHMIN-NEXT: slli a1, a1, 1
+; ZVFHMIN-NEXT: add a1, a1, a2
; ZVFHMIN-NEXT: add a1, sp, a1
; ZVFHMIN-NEXT: addi a1, a1, 16
; ZVFHMIN-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill
@@ -1341,8 +2808,11 @@ define <vscale x 32 x half> @vfma_vf_nxv32f16_commute(<vscale x 32 x half> %va,
; ZVFHMIN-NEXT: addi a1, a1, 16
; ZVFHMIN-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload
; ZVFHMIN-NEXT: csrr a1, vlenb
-; ZVFHMIN-NEXT: li a2, 25
-; ZVFHMIN-NEXT: mul a1, a1, a2
+; ZVFHMIN-NEXT: mv a2, a1
+; ZVFHMIN-NEXT: slli a1, a1, 3
+; ZVFHMIN-NEXT: add a2, a2, a1
+; ZVFHMIN-NEXT: slli a1, a1, 1
+; ZVFHMIN-NEXT: add a1, a1, a2
; ZVFHMIN-NEXT: add a1, sp, a1
; ZVFHMIN-NEXT: addi a1, a1, 16
; ZVFHMIN-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload
@@ -1354,8 +2824,12 @@ define <vscale x 32 x half> @vfma_vf_nxv32f16_commute(<vscale x 32 x half> %va,
; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16
; ZVFHMIN-NEXT: csrr a0, vlenb
-; ZVFHMIN-NEXT: li a1, 42
-; ZVFHMIN-NEXT: mul a0, a0, a1
+; ZVFHMIN-NEXT: slli a0, a0, 1
+; ZVFHMIN-NEXT: mv a1, a0
+; ZVFHMIN-NEXT: slli a0, a0, 2
+; ZVFHMIN-NEXT: add a1, a1, a0
+; ZVFHMIN-NEXT: slli a0, a0, 2
+; ZVFHMIN-NEXT: add a0, a0, a1
; ZVFHMIN-NEXT: add sp, sp, a0
; ZVFHMIN-NEXT: addi sp, sp, 16
; ZVFHMIN-NEXT: ret
@@ -1384,8 +2858,10 @@ define <vscale x 32 x half> @vfma_vf_nxv32f16_unmasked(<vscale x 32 x half> %va,
; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m8, ta, ma
; ZVFHMIN-NEXT: vmv.v.x v24, a1
; ZVFHMIN-NEXT: csrr a1, vlenb
-; ZVFHMIN-NEXT: li a2, 24
-; ZVFHMIN-NEXT: mul a1, a1, a2
+; ZVFHMIN-NEXT: slli a1, a1, 3
+; ZVFHMIN-NEXT: mv a2, a1
+; ZVFHMIN-NEXT: slli a1, a1, 1
+; ZVFHMIN-NEXT: add a1, a1, a2
; ZVFHMIN-NEXT: add a1, sp, a1
; ZVFHMIN-NEXT: addi a1, a1, 16
; ZVFHMIN-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill
@@ -1415,8 +2891,10 @@ define <vscale x 32 x half> @vfma_vf_nxv32f16_unmasked(<vscale x 32 x half> %va,
; ZVFHMIN-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill
; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12
; ZVFHMIN-NEXT: csrr a2, vlenb
-; ZVFHMIN-NEXT: li a4, 24
-; ZVFHMIN-NEXT: mul a2, a2, a4
+; ZVFHMIN-NEXT: slli a2, a2, 3
+; ZVFHMIN-NEXT: mv a4, a2
+; ZVFHMIN-NEXT: slli a2, a2, 1
+; ZVFHMIN-NEXT: add a2, a2, a4
; ZVFHMIN-NEXT: add a2, sp, a2
; ZVFHMIN-NEXT: addi a2, a2, 16
; ZVFHMIN-NEXT: vl8r.v v24, (a2) # Unknown-size Folded Reload
@@ -1427,10 +2905,10 @@ define <vscale x 32 x half> @vfma_vf_nxv32f16_unmasked(<vscale x 32 x half> %va,
; ZVFHMIN-NEXT: vfmadd.vv v8, v16, v24, v0.t
; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v20, v8
-; ZVFHMIN-NEXT: bltu a0, a1, .LBB34_2
+; ZVFHMIN-NEXT: bltu a0, a1, .LBB70_2
; ZVFHMIN-NEXT: # %bb.1:
; ZVFHMIN-NEXT: mv a0, a1
-; ZVFHMIN-NEXT: .LBB34_2:
+; ZVFHMIN-NEXT: .LBB70_2:
; ZVFHMIN-NEXT: csrr a1, vlenb
; ZVFHMIN-NEXT: slli a1, a1, 4
; ZVFHMIN-NEXT: add a1, sp, a1
@@ -1446,8 +2924,10 @@ define <vscale x 32 x half> @vfma_vf_nxv32f16_unmasked(<vscale x 32 x half> %va,
; ZVFHMIN-NEXT: vl8r.v v0, (a1) # Unknown-size Folded Reload
; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v0
; ZVFHMIN-NEXT: csrr a1, vlenb
-; ZVFHMIN-NEXT: li a2, 24
-; ZVFHMIN-NEXT: mul a1, a1, a2
+; ZVFHMIN-NEXT: slli a1, a1, 3
+; ZVFHMIN-NEXT: mv a2, a1
+; ZVFHMIN-NEXT: slli a1, a1, 1
+; ZVFHMIN-NEXT: add a1, a1, a2
; ZVFHMIN-NEXT: add a1, sp, a1
; ZVFHMIN-NEXT: addi a1, a1, 16
; ZVFHMIN-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload
@@ -1489,8 +2969,10 @@ define <vscale x 32 x half> @vfma_vf_nxv32f16_unmasked_commute(<vscale x 32 x ha
; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m8, ta, ma
; ZVFHMIN-NEXT: vmv.v.x v24, a1
; ZVFHMIN-NEXT: csrr a1, vlenb
-; ZVFHMIN-NEXT: li a2, 24
-; ZVFHMIN-NEXT: mul a1, a1, a2
+; ZVFHMIN-NEXT: slli a1, a1, 3
+; ZVFHMIN-NEXT: mv a2, a1
+; ZVFHMIN-NEXT: slli a1, a1, 1
+; ZVFHMIN-NEXT: add a1, a1, a2
; ZVFHMIN-NEXT: add a1, sp, a1
; ZVFHMIN-NEXT: addi a1, a1, 16
; ZVFHMIN-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill
@@ -1520,8 +3002,10 @@ define <vscale x 32 x half> @vfma_vf_nxv32f16_unmasked_commute(<vscale x 32 x ha
; ZVFHMIN-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill
; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12
; ZVFHMIN-NEXT: csrr a2, vlenb
-; ZVFHMIN-NEXT: li a4, 24
-; ZVFHMIN-NEXT: mul a2, a2, a4
+; ZVFHMIN-NEXT: slli a2, a2, 3
+; ZVFHMIN-NEXT: mv a4, a2
+; ZVFHMIN-NEXT: slli a2, a2, 1
+; ZVFHMIN-NEXT: add a2, a2, a4
; ZVFHMIN-NEXT: add a2, sp, a2
; ZVFHMIN-NEXT: addi a2, a2, 16
; ZVFHMIN-NEXT: vl8r.v v24, (a2) # Unknown-size Folded Reload
@@ -1532,10 +3016,10 @@ define <vscale x 32 x half> @vfma_vf_nxv32f16_unmasked_commute(<vscale x 32 x ha
; ZVFHMIN-NEXT: vfmadd.vv v16, v8, v24, v0.t
; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v16
-; ZVFHMIN-NEXT: bltu a0, a1, .LBB35_2
+; ZVFHMIN-NEXT: bltu a0, a1, .LBB71_2
; ZVFHMIN-NEXT: # %bb.1:
; ZVFHMIN-NEXT: mv a0, a1
-; ZVFHMIN-NEXT: .LBB35_2:
+; ZVFHMIN-NEXT: .LBB71_2:
; ZVFHMIN-NEXT: csrr a1, vlenb
; ZVFHMIN-NEXT: slli a1, a1, 4
; ZVFHMIN-NEXT: add a1, sp, a1
@@ -1551,8 +3035,10 @@ define <vscale x 32 x half> @vfma_vf_nxv32f16_unmasked_commute(<vscale x 32 x ha
; ZVFHMIN-NEXT: vl8r.v v0, (a1) # Unknown-size Folded Reload
; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v0
; ZVFHMIN-NEXT: csrr a1, vlenb
-; ZVFHMIN-NEXT: li a2, 24
-; ZVFHMIN-NEXT: mul a1, a1, a2
+; ZVFHMIN-NEXT: slli a1, a1, 3
+; ZVFHMIN-NEXT: mv a2, a1
+; ZVFHMIN-NEXT: slli a1, a1, 1
+; ZVFHMIN-NEXT: add a1, a1, a2
; ZVFHMIN-NEXT: add a1, sp, a1
; ZVFHMIN-NEXT: addi a1, a1, 16
; ZVFHMIN-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload
@@ -2250,14 +3736,18 @@ define <vscale x 16 x double> @vfma_vv_nxv16f64(<vscale x 16 x double> %va, <vsc
; CHECK-NEXT: addi sp, sp, -16
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: csrr a1, vlenb
-; CHECK-NEXT: li a3, 40
-; CHECK-NEXT: mul a1, a1, a3
+; CHECK-NEXT: slli a1, a1, 3
+; CHECK-NEXT: mv a3, a1
+; CHECK-NEXT: slli a1, a1, 2
+; CHECK-NEXT: add a1, a1, a3
; CHECK-NEXT: sub sp, sp, a1
; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x28, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 40 * vlenb
; CHECK-NEXT: vmv1r.v v7, v0
; CHECK-NEXT: csrr a1, vlenb
-; CHECK-NEXT: li a3, 24
-; CHECK-NEXT: mul a1, a1, a3
+; CHECK-NEXT: slli a1, a1, 3
+; CHECK-NEXT: mv a3, a1
+; CHECK-NEXT: slli a1, a1, 1
+; CHECK-NEXT: add a1, a1, a3
; CHECK-NEXT: add a1, sp, a1
; CHECK-NEXT: addi a1, a1, 16
; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill
@@ -2294,8 +3784,10 @@ define <vscale x 16 x double> @vfma_vv_nxv16f64(<vscale x 16 x double> %va, <vsc
; CHECK-NEXT: vslidedown.vx v0, v0, a3
; CHECK-NEXT: and a0, a7, a6
; CHECK-NEXT: csrr a2, vlenb
-; CHECK-NEXT: li a3, 24
-; CHECK-NEXT: mul a2, a2, a3
+; CHECK-NEXT: slli a2, a2, 3
+; CHECK-NEXT: mv a3, a2
+; CHECK-NEXT: slli a2, a2, 1
+; CHECK-NEXT: add a2, a2, a3
; CHECK-NEXT: add a2, sp, a2
; CHECK-NEXT: addi a2, a2, 16
; CHECK-NEXT: vl8r.v v24, (a2) # Unknown-size Folded Reload
@@ -2307,15 +3799,17 @@ define <vscale x 16 x double> @vfma_vv_nxv16f64(<vscale x 16 x double> %va, <vsc
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; CHECK-NEXT: vfmadd.vv v8, v24, v16, v0.t
; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: li a2, 24
-; CHECK-NEXT: mul a0, a0, a2
+; CHECK-NEXT: slli a0, a0, 3
+; CHECK-NEXT: mv a2, a0
+; CHECK-NEXT: slli a0, a0, 1
+; CHECK-NEXT: add a0, a0, a2
; CHECK-NEXT: add a0, sp, a0
; CHECK-NEXT: addi a0, a0, 16
; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
-; CHECK-NEXT: bltu a4, a1, .LBB92_2
+; CHECK-NEXT: bltu a4, a1, .LBB128_2
; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: mv a4, a1
-; CHECK-NEXT: .LBB92_2:
+; CHECK-NEXT: .LBB128_2:
; CHECK-NEXT: vmv1r.v v0, v7
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: slli a0, a0, 5
@@ -2333,14 +3827,18 @@ define <vscale x 16 x double> @vfma_vv_nxv16f64(<vscale x 16 x double> %va, <vsc
; CHECK-NEXT: vfmadd.vv v16, v24, v8, v0.t
; CHECK-NEXT: vmv.v.v v8, v16
; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: li a1, 24
-; CHECK-NEXT: mul a0, a0, a1
+; CHECK-NEXT: slli a0, a0, 3
+; CHECK-NEXT: mv a1, a0
+; CHECK-NEXT: slli a0, a0, 1
+; CHECK-NEXT: add a0, a0, a1
; CHECK-NEXT: add a0, sp, a0
; CHECK-NEXT: addi a0, a0, 16
; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: li a1, 40
-; CHECK-NEXT: mul a0, a0, a1
+; CHECK-NEXT: slli a0, a0, 3
+; CHECK-NEXT: mv a1, a0
+; CHECK-NEXT: slli a0, a0, 2
+; CHECK-NEXT: add a0, a0, a1
; CHECK-NEXT: add sp, sp, a0
; CHECK-NEXT: addi sp, sp, 16
; CHECK-NEXT: ret
@@ -2354,8 +3852,10 @@ define <vscale x 16 x double> @vfma_vv_nxv16f64_unmasked(<vscale x 16 x double>
; CHECK-NEXT: addi sp, sp, -16
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: csrr a1, vlenb
-; CHECK-NEXT: li a3, 24
-; CHECK-NEXT: mul a1, a1, a3
+; CHECK-NEXT: slli a1, a1, 3
+; CHECK-NEXT: mv a3, a1
+; CHECK-NEXT: slli a1, a1, 1
+; CHECK-NEXT: add a1, a1, a3
; CHECK-NEXT: sub sp, sp, a1
; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb
; CHECK-NEXT: csrr a1, vlenb
@@ -2389,10 +3889,10 @@ define <vscale x 16 x double> @vfma_vv_nxv16f64_unmasked(<vscale x 16 x double>
; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
; CHECK-NEXT: vsetvli zero, a3, e64, m8, ta, ma
; CHECK-NEXT: vfmadd.vv v16, v8, v24
-; CHECK-NEXT: bltu a4, a1, .LBB93_2
+; CHECK-NEXT: bltu a4, a1, .LBB129_2
; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: mv a4, a1
-; CHECK-NEXT: .LBB93_2:
+; CHECK-NEXT: .LBB129_2:
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: slli a0, a0, 4
; CHECK-NEXT: add a0, sp, a0
@@ -2404,8 +3904,10 @@ define <vscale x 16 x double> @vfma_vv_nxv16f64_unmasked(<vscale x 16 x double>
; CHECK-NEXT: vfmadd.vv v0, v24, v8
; CHECK-NEXT: vmv.v.v v8, v0
; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: li a1, 24
-; CHECK-NEXT: mul a0, a0, a1
+; CHECK-NEXT: slli a0, a0, 3
+; CHECK-NEXT: mv a1, a0
+; CHECK-NEXT: slli a0, a0, 1
+; CHECK-NEXT: add a0, a0, a1
; CHECK-NEXT: add sp, sp, a0
; CHECK-NEXT: addi sp, sp, 16
; CHECK-NEXT: ret
@@ -7161,8 +8663,12 @@ define <vscale x 32 x half> @vfmsub_vv_nxv32f16(<vscale x 32 x half> %va, <vscal
; ZVFHMIN-NEXT: addi sp, sp, -16
; ZVFHMIN-NEXT: .cfi_def_cfa_offset 16
; ZVFHMIN-NEXT: csrr a2, vlenb
-; ZVFHMIN-NEXT: li a3, 42
-; ZVFHMIN-NEXT: mul a2, a2, a3
+; ZVFHMIN-NEXT: slli a2, a2, 1
+; ZVFHMIN-NEXT: mv a3, a2
+; ZVFHMIN-NEXT: slli a2, a2, 2
+; ZVFHMIN-NEXT: add a3, a3, a2
+; ZVFHMIN-NEXT: slli a2, a2, 2
+; ZVFHMIN-NEXT: add a2, a2, a3
; ZVFHMIN-NEXT: sub sp, sp, a2
; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x2a, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 42 * vlenb
; ZVFHMIN-NEXT: vl8re16.v v24, (a0)
@@ -7183,8 +8689,11 @@ define <vscale x 32 x half> @vfmsub_vv_nxv32f16(<vscale x 32 x half> %va, <vscal
; ZVFHMIN-NEXT: csrr a0, vlenb
; ZVFHMIN-NEXT: slli a2, a0, 1
; ZVFHMIN-NEXT: csrr a3, vlenb
-; ZVFHMIN-NEXT: li a4, 25
-; ZVFHMIN-NEXT: mul a3, a3, a4
+; ZVFHMIN-NEXT: mv a4, a3
+; ZVFHMIN-NEXT: slli a3, a3, 3
+; ZVFHMIN-NEXT: add a4, a4, a3
+; ZVFHMIN-NEXT: slli a3, a3, 1
+; ZVFHMIN-NEXT: add a3, a3, a4
; ZVFHMIN-NEXT: add a3, sp, a3
; ZVFHMIN-NEXT: addi a3, a3, 16
; ZVFHMIN-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill
@@ -7197,10 +8706,10 @@ define <vscale x 32 x half> @vfmsub_vv_nxv32f16(<vscale x 32 x half> %va, <vscal
; ZVFHMIN-NEXT: addi a3, a3, 16
; ZVFHMIN-NEXT: vs8r.v v24, (a3) # Unknown-size Folded Spill
; ZVFHMIN-NEXT: mv a3, a1
-; ZVFHMIN-NEXT: bltu a1, a2, .LBB244_2
+; ZVFHMIN-NEXT: bltu a1, a2, .LBB280_2
; ZVFHMIN-NEXT: # %bb.1:
; ZVFHMIN-NEXT: mv a3, a2
-; ZVFHMIN-NEXT: .LBB244_2:
+; ZVFHMIN-NEXT: .LBB280_2:
; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8
; ZVFHMIN-NEXT: csrr a4, vlenb
; ZVFHMIN-NEXT: slli a4, a4, 3
@@ -7230,8 +8739,11 @@ define <vscale x 32 x half> @vfmsub_vv_nxv32f16(<vscale x 32 x half> %va, <vscal
; ZVFHMIN-NEXT: addi a3, sp, 16
; ZVFHMIN-NEXT: vs8r.v v24, (a3) # Unknown-size Folded Spill
; ZVFHMIN-NEXT: csrr a3, vlenb
-; ZVFHMIN-NEXT: li a4, 25
-; ZVFHMIN-NEXT: mul a3, a3, a4
+; ZVFHMIN-NEXT: mv a4, a3
+; ZVFHMIN-NEXT: slli a3, a3, 3
+; ZVFHMIN-NEXT: add a4, a4, a3
+; ZVFHMIN-NEXT: slli a3, a3, 1
+; ZVFHMIN-NEXT: add a3, a3, a4
; ZVFHMIN-NEXT: add a3, sp, a3
; ZVFHMIN-NEXT: addi a3, a3, 16
; ZVFHMIN-NEXT: vl8r.v v16, (a3) # Unknown-size Folded Reload
@@ -7258,8 +8770,11 @@ define <vscale x 32 x half> @vfmsub_vv_nxv32f16(<vscale x 32 x half> %va, <vscal
; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma
; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12
; ZVFHMIN-NEXT: csrr a0, vlenb
-; ZVFHMIN-NEXT: li a2, 25
-; ZVFHMIN-NEXT: mul a0, a0, a2
+; ZVFHMIN-NEXT: mv a2, a0
+; ZVFHMIN-NEXT: slli a0, a0, 3
+; ZVFHMIN-NEXT: add a2, a2, a0
+; ZVFHMIN-NEXT: slli a0, a0, 1
+; ZVFHMIN-NEXT: add a0, a0, a2
; ZVFHMIN-NEXT: add a0, sp, a0
; ZVFHMIN-NEXT: addi a0, a0, 16
; ZVFHMIN-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill
@@ -7271,8 +8786,11 @@ define <vscale x 32 x half> @vfmsub_vv_nxv32f16(<vscale x 32 x half> %va, <vscal
; ZVFHMIN-NEXT: addi a0, a0, 16
; ZVFHMIN-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
; ZVFHMIN-NEXT: csrr a0, vlenb
-; ZVFHMIN-NEXT: li a2, 25
-; ZVFHMIN-NEXT: mul a0, a0, a2
+; ZVFHMIN-NEXT: mv a2, a0
+; ZVFHMIN-NEXT: slli a0, a0, 3
+; ZVFHMIN-NEXT: add a2, a2, a0
+; ZVFHMIN-NEXT: slli a0, a0, 1
+; ZVFHMIN-NEXT: add a0, a0, a2
; ZVFHMIN-NEXT: add a0, sp, a0
; ZVFHMIN-NEXT: addi a0, a0, 16
; ZVFHMIN-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload
@@ -7284,8 +8802,12 @@ define <vscale x 32 x half> @vfmsub_vv_nxv32f16(<vscale x 32 x half> %va, <vscal
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v24
; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v16
; ZVFHMIN-NEXT: csrr a0, vlenb
-; ZVFHMIN-NEXT: li a1, 42
-; ZVFHMIN-NEXT: mul a0, a0, a1
+; ZVFHMIN-NEXT: slli a0, a0, 1
+; ZVFHMIN-NEXT: mv a1, a0
+; ZVFHMIN-NEXT: slli a0, a0, 2
+; ZVFHMIN-NEXT: add a1, a1, a0
+; ZVFHMIN-NEXT: slli a0, a0, 2
+; ZVFHMIN-NEXT: add a0, a0, a1
; ZVFHMIN-NEXT: add sp, sp, a0
; ZVFHMIN-NEXT: addi sp, sp, 16
; ZVFHMIN-NEXT: ret
@@ -7307,8 +8829,10 @@ define <vscale x 32 x half> @vfmsub_vv_nxv32f16_unmasked(<vscale x 32 x half> %v
; ZVFHMIN-NEXT: addi sp, sp, -16
; ZVFHMIN-NEXT: .cfi_def_cfa_offset 16
; ZVFHMIN-NEXT: csrr a2, vlenb
-; ZVFHMIN-NEXT: li a3, 40
-; ZVFHMIN-NEXT: mul a2, a2, a3
+; ZVFHMIN-NEXT: slli a2, a2, 3
+; ZVFHMIN-NEXT: mv a3, a2
+; ZVFHMIN-NEXT: slli a2, a2, 2
+; ZVFHMIN-NEXT: add a2, a2, a3
; ZVFHMIN-NEXT: sub sp, sp, a2
; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x28, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 40 * vlenb
; ZVFHMIN-NEXT: vl8re16.v v24, (a0)
@@ -7316,8 +8840,10 @@ define <vscale x 32 x half> @vfmsub_vv_nxv32f16_unmasked(<vscale x 32 x half> %v
; ZVFHMIN-NEXT: vsetvli zero, a1, e16, m8, ta, ma
; ZVFHMIN-NEXT: vxor.vx v0, v24, a0
; ZVFHMIN-NEXT: csrr a0, vlenb
-; ZVFHMIN-NEXT: li a2, 24
-; ZVFHMIN-NEXT: mul a0, a0, a2
+; ZVFHMIN-NEXT: slli a0, a0, 3
+; ZVFHMIN-NEXT: mv a2, a0
+; ZVFHMIN-NEXT: slli a0, a0, 1
+; ZVFHMIN-NEXT: add a0, a0, a2
; ZVFHMIN-NEXT: add a0, sp, a0
; ZVFHMIN-NEXT: addi a0, a0, 16
; ZVFHMIN-NEXT: vs8r.v v0, (a0) # Unknown-size Folded Spill
@@ -7369,8 +8895,10 @@ define <vscale x 32 x half> @vfmsub_vv_nxv32f16_unmasked(<vscale x 32 x half> %v
; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v4, v16
; ZVFHMIN-NEXT: csrr a2, vlenb
-; ZVFHMIN-NEXT: li a3, 24
-; ZVFHMIN-NEXT: mul a2, a2, a3
+; ZVFHMIN-NEXT: slli a2, a2, 3
+; ZVFHMIN-NEXT: mv a3, a2
+; ZVFHMIN-NEXT: slli a2, a2, 1
+; ZVFHMIN-NEXT: add a2, a2, a3
; ZVFHMIN-NEXT: add a2, sp, a2
; ZVFHMIN-NEXT: addi a2, a2, 16
; ZVFHMIN-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload
@@ -7380,10 +8908,10 @@ define <vscale x 32 x half> @vfmsub_vv_nxv32f16_unmasked(<vscale x 32 x half> %v
; ZVFHMIN-NEXT: add a2, sp, a2
; ZVFHMIN-NEXT: addi a2, a2, 16
; ZVFHMIN-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill
-; ZVFHMIN-NEXT: bltu a1, a0, .LBB245_2
+; ZVFHMIN-NEXT: bltu a1, a0, .LBB281_2
; ZVFHMIN-NEXT: # %bb.1:
; ZVFHMIN-NEXT: mv a1, a0
-; ZVFHMIN-NEXT: .LBB245_2:
+; ZVFHMIN-NEXT: .LBB281_2:
; ZVFHMIN-NEXT: csrr a0, vlenb
; ZVFHMIN-NEXT: slli a0, a0, 5
; ZVFHMIN-NEXT: add a0, sp, a0
@@ -7404,8 +8932,10 @@ define <vscale x 32 x half> @vfmsub_vv_nxv32f16_unmasked(<vscale x 32 x half> %v
; ZVFHMIN-NEXT: vfncvt.f.f.w v0, v24
; ZVFHMIN-NEXT: vmv8r.v v8, v0
; ZVFHMIN-NEXT: csrr a0, vlenb
-; ZVFHMIN-NEXT: li a1, 40
-; ZVFHMIN-NEXT: mul a0, a0, a1
+; ZVFHMIN-NEXT: slli a0, a0, 3
+; ZVFHMIN-NEXT: mv a1, a0
+; ZVFHMIN-NEXT: slli a0, a0, 2
+; ZVFHMIN-NEXT: add a0, a0, a1
; ZVFHMIN-NEXT: add sp, sp, a0
; ZVFHMIN-NEXT: addi sp, sp, 16
; ZVFHMIN-NEXT: ret
@@ -7433,8 +8963,10 @@ define <vscale x 32 x half> @vfmsub_vf_nxv32f16(<vscale x 32 x half> %va, half %
; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m8, ta, ma
; ZVFHMIN-NEXT: vmv.v.x v24, a1
; ZVFHMIN-NEXT: csrr a1, vlenb
-; ZVFHMIN-NEXT: li a2, 24
-; ZVFHMIN-NEXT: mul a1, a1, a2
+; ZVFHMIN-NEXT: slli a1, a1, 3
+; ZVFHMIN-NEXT: mv a2, a1
+; ZVFHMIN-NEXT: slli a1, a1, 1
+; ZVFHMIN-NEXT: add a1, a1, a2
; ZVFHMIN-NEXT: add a1, sp, a1
; ZVFHMIN-NEXT: addi a1, a1, 16
; ZVFHMIN-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill
@@ -7456,15 +8988,17 @@ define <vscale x 32 x half> @vfmsub_vf_nxv32f16(<vscale x 32 x half> %va, half %
; ZVFHMIN-NEXT: addi a3, a3, 16
; ZVFHMIN-NEXT: vs8r.v v24, (a3) # Unknown-size Folded Spill
; ZVFHMIN-NEXT: mv a3, a0
-; ZVFHMIN-NEXT: bltu a0, a2, .LBB246_2
+; ZVFHMIN-NEXT: bltu a0, a2, .LBB282_2
; ZVFHMIN-NEXT: # %bb.1:
; ZVFHMIN-NEXT: mv a3, a2
-; ZVFHMIN-NEXT: .LBB246_2:
+; ZVFHMIN-NEXT: .LBB282_2:
; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8
; ZVFHMIN-NEXT: vmv4r.v v4, v12
; ZVFHMIN-NEXT: csrr a4, vlenb
-; ZVFHMIN-NEXT: li a5, 24
-; ZVFHMIN-NEXT: mul a4, a4, a5
+; ZVFHMIN-NEXT: slli a4, a4, 3
+; ZVFHMIN-NEXT: mv a5, a4
+; ZVFHMIN-NEXT: slli a4, a4, 1
+; ZVFHMIN-NEXT: add a4, a4, a5
; ZVFHMIN-NEXT: add a4, sp, a4
; ZVFHMIN-NEXT: addi a4, a4, 16
; ZVFHMIN-NEXT: vl8r.v v8, (a4) # Unknown-size Folded Reload
@@ -7505,8 +9039,10 @@ define <vscale x 32 x half> @vfmsub_vf_nxv32f16(<vscale x 32 x half> %va, half %
; ZVFHMIN-NEXT: addi a1, a1, 16
; ZVFHMIN-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill
; ZVFHMIN-NEXT: csrr a1, vlenb
-; ZVFHMIN-NEXT: li a2, 24
-; ZVFHMIN-NEXT: mul a1, a1, a2
+; ZVFHMIN-NEXT: slli a1, a1, 3
+; ZVFHMIN-NEXT: mv a2, a1
+; ZVFHMIN-NEXT: slli a1, a1, 1
+; ZVFHMIN-NEXT: add a1, a1, a2
; ZVFHMIN-NEXT: add a1, sp, a1
; ZVFHMIN-NEXT: addi a1, a1, 16
; ZVFHMIN-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload
@@ -7559,8 +9095,10 @@ define <vscale x 32 x half> @vfmsub_vf_nxv32f16_commute(<vscale x 32 x half> %va
; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m8, ta, ma
; ZVFHMIN-NEXT: vmv.v.x v24, a1
; ZVFHMIN-NEXT: csrr a1, vlenb
-; ZVFHMIN-NEXT: li a2, 24
-; ZVFHMIN-NEXT: mul a1, a1, a2
+; ZVFHMIN-NEXT: slli a1, a1, 3
+; ZVFHMIN-NEXT: mv a2, a1
+; ZVFHMIN-NEXT: slli a1, a1, 1
+; ZVFHMIN-NEXT: add a1, a1, a2
; ZVFHMIN-NEXT: add a1, sp, a1
; ZVFHMIN-NEXT: addi a1, a1, 16
; ZVFHMIN-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill
@@ -7582,15 +9120,17 @@ define <vscale x 32 x half> @vfmsub_vf_nxv32f16_commute(<vscale x 32 x half> %va
; ZVFHMIN-NEXT: addi a3, a3, 16
; ZVFHMIN-NEXT: vs8r.v v24, (a3) # Unknown-size Folded Spill
; ZVFHMIN-NEXT: mv a3, a0
-; ZVFHMIN-NEXT: bltu a0, a2, .LBB247_2
+; ZVFHMIN-NEXT: bltu a0, a2, .LBB283_2
; ZVFHMIN-NEXT: # %bb.1:
; ZVFHMIN-NEXT: mv a3, a2
-; ZVFHMIN-NEXT: .LBB247_2:
+; ZVFHMIN-NEXT: .LBB283_2:
; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8
; ZVFHMIN-NEXT: vmv4r.v v4, v12
; ZVFHMIN-NEXT: csrr a4, vlenb
-; ZVFHMIN-NEXT: li a5, 24
-; ZVFHMIN-NEXT: mul a4, a4, a5
+; ZVFHMIN-NEXT: slli a4, a4, 3
+; ZVFHMIN-NEXT: mv a5, a4
+; ZVFHMIN-NEXT: slli a4, a4, 1
+; ZVFHMIN-NEXT: add a4, a4, a5
; ZVFHMIN-NEXT: add a4, sp, a4
; ZVFHMIN-NEXT: addi a4, a4, 16
; ZVFHMIN-NEXT: vl8r.v v24, (a4) # Unknown-size Folded Reload
@@ -7626,8 +9166,10 @@ define <vscale x 32 x half> @vfmsub_vf_nxv32f16_commute(<vscale x 32 x half> %va
; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m4, ta, ma
; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v4
; ZVFHMIN-NEXT: csrr a1, vlenb
-; ZVFHMIN-NEXT: li a2, 24
-; ZVFHMIN-NEXT: mul a1, a1, a2
+; ZVFHMIN-NEXT: slli a1, a1, 3
+; ZVFHMIN-NEXT: mv a2, a1
+; ZVFHMIN-NEXT: slli a1, a1, 1
+; ZVFHMIN-NEXT: add a1, a1, a2
; ZVFHMIN-NEXT: add a1, sp, a1
; ZVFHMIN-NEXT: addi a1, a1, 16
; ZVFHMIN-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload
@@ -7685,8 +9227,10 @@ define <vscale x 32 x half> @vfmsub_vf_nxv32f16_unmasked(<vscale x 32 x half> %v
; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m8, ta, ma
; ZVFHMIN-NEXT: vmv.v.x v24, a1
; ZVFHMIN-NEXT: csrr a1, vlenb
-; ZVFHMIN-NEXT: li a2, 24
-; ZVFHMIN-NEXT: mul a1, a1, a2
+; ZVFHMIN-NEXT: slli a1, a1, 3
+; ZVFHMIN-NEXT: mv a2, a1
+; ZVFHMIN-NEXT: slli a1, a1, 1
+; ZVFHMIN-NEXT: add a1, a1, a2
; ZVFHMIN-NEXT: add a1, sp, a1
; ZVFHMIN-NEXT: addi a1, a1, 16
; ZVFHMIN-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill
@@ -7720,8 +9264,10 @@ define <vscale x 32 x half> @vfmsub_vf_nxv32f16_unmasked(<vscale x 32 x half> %v
; ZVFHMIN-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill
; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12
; ZVFHMIN-NEXT: csrr a2, vlenb
-; ZVFHMIN-NEXT: li a4, 24
-; ZVFHMIN-NEXT: mul a2, a2, a4
+; ZVFHMIN-NEXT: slli a2, a2, 3
+; ZVFHMIN-NEXT: mv a4, a2
+; ZVFHMIN-NEXT: slli a2, a2, 1
+; ZVFHMIN-NEXT: add a2, a2, a4
; ZVFHMIN-NEXT: add a2, sp, a2
; ZVFHMIN-NEXT: addi a2, a2, 16
; ZVFHMIN-NEXT: vl8r.v v24, (a2) # Unknown-size Folded Reload
@@ -7743,10 +9289,10 @@ define <vscale x 32 x half> @vfmsub_vf_nxv32f16_unmasked(<vscale x 32 x half> %v
; ZVFHMIN-NEXT: add a2, sp, a2
; ZVFHMIN-NEXT: addi a2, a2, 16
; ZVFHMIN-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill
-; ZVFHMIN-NEXT: bltu a0, a1, .LBB248_2
+; ZVFHMIN-NEXT: bltu a0, a1, .LBB284_2
; ZVFHMIN-NEXT: # %bb.1:
; ZVFHMIN-NEXT: mv a0, a1
-; ZVFHMIN-NEXT: .LBB248_2:
+; ZVFHMIN-NEXT: .LBB284_2:
; ZVFHMIN-NEXT: csrr a1, vlenb
; ZVFHMIN-NEXT: slli a1, a1, 4
; ZVFHMIN-NEXT: add a1, sp, a1
@@ -7754,8 +9300,10 @@ define <vscale x 32 x half> @vfmsub_vf_nxv32f16_unmasked(<vscale x 32 x half> %v
; ZVFHMIN-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload
; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v24
; ZVFHMIN-NEXT: csrr a1, vlenb
-; ZVFHMIN-NEXT: li a2, 24
-; ZVFHMIN-NEXT: mul a1, a1, a2
+; ZVFHMIN-NEXT: slli a1, a1, 3
+; ZVFHMIN-NEXT: mv a2, a1
+; ZVFHMIN-NEXT: slli a1, a1, 1
+; ZVFHMIN-NEXT: add a1, a1, a2
; ZVFHMIN-NEXT: add a1, sp, a1
; ZVFHMIN-NEXT: addi a1, a1, 16
; ZVFHMIN-NEXT: vl8r.v v0, (a1) # Unknown-size Folded Reload
@@ -7803,8 +9351,10 @@ define <vscale x 32 x half> @vfmsub_vf_nxv32f16_unmasked_commute(<vscale x 32 x
; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m8, ta, ma
; ZVFHMIN-NEXT: vmv.v.x v8, a1
; ZVFHMIN-NEXT: csrr a1, vlenb
-; ZVFHMIN-NEXT: li a2, 24
-; ZVFHMIN-NEXT: mul a1, a1, a2
+; ZVFHMIN-NEXT: slli a1, a1, 3
+; ZVFHMIN-NEXT: mv a2, a1
+; ZVFHMIN-NEXT: slli a1, a1, 1
+; ZVFHMIN-NEXT: add a1, a1, a2
; ZVFHMIN-NEXT: add a1, sp, a1
; ZVFHMIN-NEXT: addi a1, a1, 16
; ZVFHMIN-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill
@@ -7838,8 +9388,10 @@ define <vscale x 32 x half> @vfmsub_vf_nxv32f16_unmasked_commute(<vscale x 32 x
; ZVFHMIN-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill
; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v20
; ZVFHMIN-NEXT: csrr a2, vlenb
-; ZVFHMIN-NEXT: li a4, 24
-; ZVFHMIN-NEXT: mul a2, a2, a4
+; ZVFHMIN-NEXT: slli a2, a2, 3
+; ZVFHMIN-NEXT: mv a4, a2
+; ZVFHMIN-NEXT: slli a2, a2, 1
+; ZVFHMIN-NEXT: add a2, a2, a4
; ZVFHMIN-NEXT: add a2, sp, a2
; ZVFHMIN-NEXT: addi a2, a2, 16
; ZVFHMIN-NEXT: vl8r.v v24, (a2) # Unknown-size Folded Reload
@@ -7861,10 +9413,10 @@ define <vscale x 32 x half> @vfmsub_vf_nxv32f16_unmasked_commute(<vscale x 32 x
; ZVFHMIN-NEXT: add a2, sp, a2
; ZVFHMIN-NEXT: addi a2, a2, 16
; ZVFHMIN-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill
-; ZVFHMIN-NEXT: bltu a0, a1, .LBB249_2
+; ZVFHMIN-NEXT: bltu a0, a1, .LBB285_2
; ZVFHMIN-NEXT: # %bb.1:
; ZVFHMIN-NEXT: mv a0, a1
-; ZVFHMIN-NEXT: .LBB249_2:
+; ZVFHMIN-NEXT: .LBB285_2:
; ZVFHMIN-NEXT: csrr a1, vlenb
; ZVFHMIN-NEXT: slli a1, a1, 4
; ZVFHMIN-NEXT: add a1, sp, a1
@@ -7872,8 +9424,10 @@ define <vscale x 32 x half> @vfmsub_vf_nxv32f16_unmasked_commute(<vscale x 32 x
; ZVFHMIN-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload
; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v24
; ZVFHMIN-NEXT: csrr a1, vlenb
-; ZVFHMIN-NEXT: li a2, 24
-; ZVFHMIN-NEXT: mul a1, a1, a2
+; ZVFHMIN-NEXT: slli a1, a1, 3
+; ZVFHMIN-NEXT: mv a2, a1
+; ZVFHMIN-NEXT: slli a1, a1, 1
+; ZVFHMIN-NEXT: add a1, a1, a2
; ZVFHMIN-NEXT: add a1, sp, a1
; ZVFHMIN-NEXT: addi a1, a1, 16
; ZVFHMIN-NEXT: vl8r.v v0, (a1) # Unknown-size Folded Reload
@@ -7919,8 +9473,10 @@ define <vscale x 32 x half> @vfnmadd_vv_nxv32f16(<vscale x 32 x half> %va, <vsca
; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * vlenb
; ZVFHMIN-NEXT: vl8re16.v v24, (a0)
; ZVFHMIN-NEXT: csrr a0, vlenb
-; ZVFHMIN-NEXT: li a2, 24
-; ZVFHMIN-NEXT: mul a0, a0, a2
+; ZVFHMIN-NEXT: slli a0, a0, 3
+; ZVFHMIN-NEXT: mv a2, a0
+; ZVFHMIN-NEXT: slli a0, a0, 1
+; ZVFHMIN-NEXT: add a0, a0, a2
; ZVFHMIN-NEXT: add a0, sp, a0
; ZVFHMIN-NEXT: addi a0, a0, 16
; ZVFHMIN-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
@@ -7945,13 +9501,15 @@ define <vscale x 32 x half> @vfnmadd_vv_nxv32f16(<vscale x 32 x half> %va, <vsca
; ZVFHMIN-NEXT: add a4, sp, a4
; ZVFHMIN-NEXT: addi a4, a4, 16
; ZVFHMIN-NEXT: vs8r.v v8, (a4) # Unknown-size Folded Spill
-; ZVFHMIN-NEXT: bltu a1, a2, .LBB250_2
+; ZVFHMIN-NEXT: bltu a1, a2, .LBB286_2
; ZVFHMIN-NEXT: # %bb.1:
; ZVFHMIN-NEXT: mv a3, a2
-; ZVFHMIN-NEXT: .LBB250_2:
+; ZVFHMIN-NEXT: .LBB286_2:
; ZVFHMIN-NEXT: csrr a4, vlenb
-; ZVFHMIN-NEXT: li a5, 24
-; ZVFHMIN-NEXT: mul a4, a4, a5
+; ZVFHMIN-NEXT: slli a4, a4, 3
+; ZVFHMIN-NEXT: mv a5, a4
+; ZVFHMIN-NEXT: slli a4, a4, 1
+; ZVFHMIN-NEXT: add a4, a4, a5
; ZVFHMIN-NEXT: add a4, sp, a4
; ZVFHMIN-NEXT: addi a4, a4, 16
; ZVFHMIN-NEXT: vl8r.v v8, (a4) # Unknown-size Folded Reload
@@ -7986,8 +9544,10 @@ define <vscale x 32 x half> @vfnmadd_vv_nxv32f16(<vscale x 32 x half> %va, <vsca
; ZVFHMIN-NEXT: vsetvli a2, zero, e8, mf2, ta, ma
; ZVFHMIN-NEXT: vslidedown.vx v0, v0, a0
; ZVFHMIN-NEXT: csrr a0, vlenb
-; ZVFHMIN-NEXT: li a2, 24
-; ZVFHMIN-NEXT: mul a0, a0, a2
+; ZVFHMIN-NEXT: slli a0, a0, 3
+; ZVFHMIN-NEXT: mv a2, a0
+; ZVFHMIN-NEXT: slli a0, a0, 1
+; ZVFHMIN-NEXT: add a0, a0, a2
; ZVFHMIN-NEXT: add a0, sp, a0
; ZVFHMIN-NEXT: addi a0, a0, 16
; ZVFHMIN-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
@@ -8029,8 +9589,10 @@ define <vscale x 32 x half> @vfnmadd_vv_nxv32f16_commuted(<vscale x 32 x half> %
; ZVFHMIN-NEXT: addi sp, sp, -16
; ZVFHMIN-NEXT: .cfi_def_cfa_offset 16
; ZVFHMIN-NEXT: csrr a2, vlenb
-; ZVFHMIN-NEXT: li a3, 40
-; ZVFHMIN-NEXT: mul a2, a2, a3
+; ZVFHMIN-NEXT: slli a2, a2, 3
+; ZVFHMIN-NEXT: mv a3, a2
+; ZVFHMIN-NEXT: slli a2, a2, 2
+; ZVFHMIN-NEXT: add a2, a2, a3
; ZVFHMIN-NEXT: sub sp, sp, a2
; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x28, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 40 * vlenb
; ZVFHMIN-NEXT: vl8re16.v v24, (a0)
@@ -8061,15 +9623,17 @@ define <vscale x 32 x half> @vfnmadd_vv_nxv32f16_commuted(<vscale x 32 x half> %
; ZVFHMIN-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill
; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v16
; ZVFHMIN-NEXT: csrr a4, vlenb
-; ZVFHMIN-NEXT: li a5, 24
-; ZVFHMIN-NEXT: mul a4, a4, a5
+; ZVFHMIN-NEXT: slli a4, a4, 3
+; ZVFHMIN-NEXT: mv a5, a4
+; ZVFHMIN-NEXT: slli a4, a4, 1
+; ZVFHMIN-NEXT: add a4, a4, a5
; ZVFHMIN-NEXT: add a4, sp, a4
; ZVFHMIN-NEXT: addi a4, a4, 16
; ZVFHMIN-NEXT: vs8r.v v8, (a4) # Unknown-size Folded Spill
-; ZVFHMIN-NEXT: bltu a1, a2, .LBB251_2
+; ZVFHMIN-NEXT: bltu a1, a2, .LBB287_2
; ZVFHMIN-NEXT: # %bb.1:
; ZVFHMIN-NEXT: mv a3, a2
-; ZVFHMIN-NEXT: .LBB251_2:
+; ZVFHMIN-NEXT: .LBB287_2:
; ZVFHMIN-NEXT: csrr a4, vlenb
; ZVFHMIN-NEXT: slli a4, a4, 5
; ZVFHMIN-NEXT: add a4, sp, a4
@@ -8077,8 +9641,10 @@ define <vscale x 32 x half> @vfnmadd_vv_nxv32f16_commuted(<vscale x 32 x half> %
; ZVFHMIN-NEXT: vl8r.v v8, (a4) # Unknown-size Folded Reload
; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8
; ZVFHMIN-NEXT: csrr a4, vlenb
-; ZVFHMIN-NEXT: li a5, 24
-; ZVFHMIN-NEXT: mul a4, a4, a5
+; ZVFHMIN-NEXT: slli a4, a4, 3
+; ZVFHMIN-NEXT: mv a5, a4
+; ZVFHMIN-NEXT: slli a4, a4, 1
+; ZVFHMIN-NEXT: add a4, a4, a5
; ZVFHMIN-NEXT: add a4, sp, a4
; ZVFHMIN-NEXT: addi a4, a4, 16
; ZVFHMIN-NEXT: vl8r.v v24, (a4) # Unknown-size Folded Reload
@@ -8094,8 +9660,10 @@ define <vscale x 32 x half> @vfnmadd_vv_nxv32f16_commuted(<vscale x 32 x half> %
; ZVFHMIN-NEXT: vsetvli a3, zero, e16, m4, ta, ma
; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v4
; ZVFHMIN-NEXT: csrr a3, vlenb
-; ZVFHMIN-NEXT: li a4, 24
-; ZVFHMIN-NEXT: mul a3, a3, a4
+; ZVFHMIN-NEXT: slli a3, a3, 3
+; ZVFHMIN-NEXT: mv a4, a3
+; ZVFHMIN-NEXT: slli a3, a3, 1
+; ZVFHMIN-NEXT: add a3, a3, a4
; ZVFHMIN-NEXT: add a3, sp, a3
; ZVFHMIN-NEXT: addi a3, a3, 16
; ZVFHMIN-NEXT: vs8r.v v24, (a3) # Unknown-size Folded Spill
@@ -8125,8 +9693,10 @@ define <vscale x 32 x half> @vfnmadd_vv_nxv32f16_commuted(<vscale x 32 x half> %
; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma
; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12
; ZVFHMIN-NEXT: csrr a0, vlenb
-; ZVFHMIN-NEXT: li a2, 24
-; ZVFHMIN-NEXT: mul a0, a0, a2
+; ZVFHMIN-NEXT: slli a0, a0, 3
+; ZVFHMIN-NEXT: mv a2, a0
+; ZVFHMIN-NEXT: slli a0, a0, 1
+; ZVFHMIN-NEXT: add a0, a0, a2
; ZVFHMIN-NEXT: add a0, sp, a0
; ZVFHMIN-NEXT: addi a0, a0, 16
; ZVFHMIN-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
@@ -8143,8 +9713,10 @@ define <vscale x 32 x half> @vfnmadd_vv_nxv32f16_commuted(<vscale x 32 x half> %
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16
; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v24
; ZVFHMIN-NEXT: csrr a0, vlenb
-; ZVFHMIN-NEXT: li a1, 40
-; ZVFHMIN-NEXT: mul a0, a0, a1
+; ZVFHMIN-NEXT: slli a0, a0, 3
+; ZVFHMIN-NEXT: mv a1, a0
+; ZVFHMIN-NEXT: slli a0, a0, 2
+; ZVFHMIN-NEXT: add a0, a0, a1
; ZVFHMIN-NEXT: add sp, sp, a0
; ZVFHMIN-NEXT: addi sp, sp, 16
; ZVFHMIN-NEXT: ret
@@ -8171,8 +9743,10 @@ define <vscale x 32 x half> @vfnmadd_vv_nxv32f16_unmasked(<vscale x 32 x half> %
; ZVFHMIN-NEXT: sub sp, sp, a2
; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * vlenb
; ZVFHMIN-NEXT: csrr a2, vlenb
-; ZVFHMIN-NEXT: li a3, 24
-; ZVFHMIN-NEXT: mul a2, a2, a3
+; ZVFHMIN-NEXT: slli a2, a2, 3
+; ZVFHMIN-NEXT: mv a3, a2
+; ZVFHMIN-NEXT: slli a2, a2, 1
+; ZVFHMIN-NEXT: add a2, a2, a3
; ZVFHMIN-NEXT: add a2, sp, a2
; ZVFHMIN-NEXT: addi a2, a2, 16
; ZVFHMIN-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill
@@ -8209,8 +9783,10 @@ define <vscale x 32 x half> @vfnmadd_vv_nxv32f16_unmasked(<vscale x 32 x half> %
; ZVFHMIN-NEXT: addi a2, sp, 16
; ZVFHMIN-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill
; ZVFHMIN-NEXT: csrr a2, vlenb
-; ZVFHMIN-NEXT: li a4, 24
-; ZVFHMIN-NEXT: mul a2, a2, a4
+; ZVFHMIN-NEXT: slli a2, a2, 3
+; ZVFHMIN-NEXT: mv a4, a2
+; ZVFHMIN-NEXT: slli a2, a2, 1
+; ZVFHMIN-NEXT: add a2, a2, a4
; ZVFHMIN-NEXT: add a2, sp, a2
; ZVFHMIN-NEXT: addi a2, a2, 16
; ZVFHMIN-NEXT: vl8r.v v24, (a2) # Unknown-size Folded Reload
@@ -8238,13 +9814,15 @@ define <vscale x 32 x half> @vfnmadd_vv_nxv32f16_unmasked(<vscale x 32 x half> %
; ZVFHMIN-NEXT: add a2, sp, a2
; ZVFHMIN-NEXT: addi a2, a2, 16
; ZVFHMIN-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill
-; ZVFHMIN-NEXT: bltu a1, a0, .LBB252_2
+; ZVFHMIN-NEXT: bltu a1, a0, .LBB288_2
; ZVFHMIN-NEXT: # %bb.1:
; ZVFHMIN-NEXT: mv a1, a0
-; ZVFHMIN-NEXT: .LBB252_2:
+; ZVFHMIN-NEXT: .LBB288_2:
; ZVFHMIN-NEXT: csrr a0, vlenb
-; ZVFHMIN-NEXT: li a2, 24
-; ZVFHMIN-NEXT: mul a0, a0, a2
+; ZVFHMIN-NEXT: slli a0, a0, 3
+; ZVFHMIN-NEXT: mv a2, a0
+; ZVFHMIN-NEXT: slli a0, a0, 1
+; ZVFHMIN-NEXT: add a0, a0, a2
; ZVFHMIN-NEXT: add a0, sp, a0
; ZVFHMIN-NEXT: addi a0, a0, 16
; ZVFHMIN-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
@@ -8287,8 +9865,10 @@ define <vscale x 32 x half> @vfnmadd_vv_nxv32f16_unmasked_commuted(<vscale x 32
; ZVFHMIN-NEXT: sub sp, sp, a2
; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * vlenb
; ZVFHMIN-NEXT: csrr a2, vlenb
-; ZVFHMIN-NEXT: li a3, 24
-; ZVFHMIN-NEXT: mul a2, a2, a3
+; ZVFHMIN-NEXT: slli a2, a2, 3
+; ZVFHMIN-NEXT: mv a3, a2
+; ZVFHMIN-NEXT: slli a2, a2, 1
+; ZVFHMIN-NEXT: add a2, a2, a3
; ZVFHMIN-NEXT: add a2, sp, a2
; ZVFHMIN-NEXT: addi a2, a2, 16
; ZVFHMIN-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill
@@ -8325,8 +9905,10 @@ define <vscale x 32 x half> @vfnmadd_vv_nxv32f16_unmasked_commuted(<vscale x 32
; ZVFHMIN-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill
; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v20
; ZVFHMIN-NEXT: csrr a2, vlenb
-; ZVFHMIN-NEXT: li a4, 24
-; ZVFHMIN-NEXT: mul a2, a2, a4
+; ZVFHMIN-NEXT: slli a2, a2, 3
+; ZVFHMIN-NEXT: mv a4, a2
+; ZVFHMIN-NEXT: slli a2, a2, 1
+; ZVFHMIN-NEXT: add a2, a2, a4
; ZVFHMIN-NEXT: add a2, sp, a2
; ZVFHMIN-NEXT: addi a2, a2, 16
; ZVFHMIN-NEXT: vl8r.v v16, (a2) # Unknown-size Folded Reload
@@ -8354,13 +9936,15 @@ define <vscale x 32 x half> @vfnmadd_vv_nxv32f16_unmasked_commuted(<vscale x 32
; ZVFHMIN-NEXT: addi a2, sp, 16
; ZVFHMIN-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload
; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8
-; ZVFHMIN-NEXT: bltu a1, a0, .LBB253_2
+; ZVFHMIN-NEXT: bltu a1, a0, .LBB289_2
; ZVFHMIN-NEXT: # %bb.1:
; ZVFHMIN-NEXT: mv a1, a0
-; ZVFHMIN-NEXT: .LBB253_2:
+; ZVFHMIN-NEXT: .LBB289_2:
; ZVFHMIN-NEXT: csrr a0, vlenb
-; ZVFHMIN-NEXT: li a2, 24
-; ZVFHMIN-NEXT: mul a0, a0, a2
+; ZVFHMIN-NEXT: slli a0, a0, 3
+; ZVFHMIN-NEXT: mv a2, a0
+; ZVFHMIN-NEXT: slli a0, a0, 1
+; ZVFHMIN-NEXT: add a0, a0, a2
; ZVFHMIN-NEXT: add a0, sp, a0
; ZVFHMIN-NEXT: addi a0, a0, 16
; ZVFHMIN-NEXT: vl8r.v v0, (a0) # Unknown-size Folded Reload
@@ -8398,8 +9982,10 @@ define <vscale x 32 x half> @vfnmadd_vf_nxv32f16(<vscale x 32 x half> %va, half
; ZVFHMIN-NEXT: addi sp, sp, -16
; ZVFHMIN-NEXT: .cfi_def_cfa_offset 16
; ZVFHMIN-NEXT: csrr a1, vlenb
-; ZVFHMIN-NEXT: li a2, 40
-; ZVFHMIN-NEXT: mul a1, a1, a2
+; ZVFHMIN-NEXT: slli a1, a1, 3
+; ZVFHMIN-NEXT: mv a2, a1
+; ZVFHMIN-NEXT: slli a1, a1, 2
+; ZVFHMIN-NEXT: add a1, a1, a2
; ZVFHMIN-NEXT: sub sp, sp, a1
; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x28, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 40 * vlenb
; ZVFHMIN-NEXT: fmv.x.h a1, fa0
@@ -8422,8 +10008,10 @@ define <vscale x 32 x half> @vfnmadd_vf_nxv32f16(<vscale x 32 x half> %va, half
; ZVFHMIN-NEXT: vsetvli a4, zero, e16, m4, ta, ma
; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v16
; ZVFHMIN-NEXT: csrr a4, vlenb
-; ZVFHMIN-NEXT: li a5, 24
-; ZVFHMIN-NEXT: mul a4, a4, a5
+; ZVFHMIN-NEXT: slli a4, a4, 3
+; ZVFHMIN-NEXT: mv a5, a4
+; ZVFHMIN-NEXT: slli a4, a4, 1
+; ZVFHMIN-NEXT: add a4, a4, a5
; ZVFHMIN-NEXT: add a4, sp, a4
; ZVFHMIN-NEXT: addi a4, a4, 16
; ZVFHMIN-NEXT: vs8r.v v24, (a4) # Unknown-size Folded Spill
@@ -8434,10 +10022,10 @@ define <vscale x 32 x half> @vfnmadd_vf_nxv32f16(<vscale x 32 x half> %va, half
; ZVFHMIN-NEXT: add a4, sp, a4
; ZVFHMIN-NEXT: addi a4, a4, 16
; ZVFHMIN-NEXT: vs8r.v v24, (a4) # Unknown-size Folded Spill
-; ZVFHMIN-NEXT: bltu a0, a2, .LBB254_2
+; ZVFHMIN-NEXT: bltu a0, a2, .LBB290_2
; ZVFHMIN-NEXT: # %bb.1:
; ZVFHMIN-NEXT: mv a3, a2
-; ZVFHMIN-NEXT: .LBB254_2:
+; ZVFHMIN-NEXT: .LBB290_2:
; ZVFHMIN-NEXT: csrr a4, vlenb
; ZVFHMIN-NEXT: slli a4, a4, 5
; ZVFHMIN-NEXT: add a4, sp, a4
@@ -8445,8 +10033,10 @@ define <vscale x 32 x half> @vfnmadd_vf_nxv32f16(<vscale x 32 x half> %va, half
; ZVFHMIN-NEXT: vl8r.v v8, (a4) # Unknown-size Folded Reload
; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v8
; ZVFHMIN-NEXT: csrr a4, vlenb
-; ZVFHMIN-NEXT: li a5, 24
-; ZVFHMIN-NEXT: mul a4, a4, a5
+; ZVFHMIN-NEXT: slli a4, a4, 3
+; ZVFHMIN-NEXT: mv a5, a4
+; ZVFHMIN-NEXT: slli a4, a4, 1
+; ZVFHMIN-NEXT: add a4, a4, a5
; ZVFHMIN-NEXT: add a4, sp, a4
; ZVFHMIN-NEXT: addi a4, a4, 16
; ZVFHMIN-NEXT: vl8r.v v8, (a4) # Unknown-size Folded Reload
@@ -8467,8 +10057,10 @@ define <vscale x 32 x half> @vfnmadd_vf_nxv32f16(<vscale x 32 x half> %va, half
; ZVFHMIN-NEXT: vsetvli a3, zero, e16, m4, ta, ma
; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v20
; ZVFHMIN-NEXT: csrr a3, vlenb
-; ZVFHMIN-NEXT: li a4, 24
-; ZVFHMIN-NEXT: mul a3, a3, a4
+; ZVFHMIN-NEXT: slli a3, a3, 3
+; ZVFHMIN-NEXT: mv a4, a3
+; ZVFHMIN-NEXT: slli a3, a3, 1
+; ZVFHMIN-NEXT: add a3, a3, a4
; ZVFHMIN-NEXT: add a3, sp, a3
; ZVFHMIN-NEXT: addi a3, a3, 16
; ZVFHMIN-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill
@@ -8493,8 +10085,10 @@ define <vscale x 32 x half> @vfnmadd_vf_nxv32f16(<vscale x 32 x half> %va, half
; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m4, ta, ma
; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12
; ZVFHMIN-NEXT: csrr a1, vlenb
-; ZVFHMIN-NEXT: li a2, 24
-; ZVFHMIN-NEXT: mul a1, a1, a2
+; ZVFHMIN-NEXT: slli a1, a1, 3
+; ZVFHMIN-NEXT: mv a2, a1
+; ZVFHMIN-NEXT: slli a1, a1, 1
+; ZVFHMIN-NEXT: add a1, a1, a2
; ZVFHMIN-NEXT: add a1, sp, a1
; ZVFHMIN-NEXT: addi a1, a1, 16
; ZVFHMIN-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload
@@ -8514,8 +10108,10 @@ define <vscale x 32 x half> @vfnmadd_vf_nxv32f16(<vscale x 32 x half> %va, half
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v24
; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v16
; ZVFHMIN-NEXT: csrr a0, vlenb
-; ZVFHMIN-NEXT: li a1, 40
-; ZVFHMIN-NEXT: mul a0, a0, a1
+; ZVFHMIN-NEXT: slli a0, a0, 3
+; ZVFHMIN-NEXT: mv a1, a0
+; ZVFHMIN-NEXT: slli a0, a0, 2
+; ZVFHMIN-NEXT: add a0, a0, a1
; ZVFHMIN-NEXT: add sp, sp, a0
; ZVFHMIN-NEXT: addi sp, sp, 16
; ZVFHMIN-NEXT: ret
@@ -8546,8 +10142,10 @@ define <vscale x 32 x half> @vfnmadd_vf_nxv32f16_commute(<vscale x 32 x half> %v
; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m8, ta, ma
; ZVFHMIN-NEXT: vmv.v.x v24, a1
; ZVFHMIN-NEXT: csrr a1, vlenb
-; ZVFHMIN-NEXT: li a2, 24
-; ZVFHMIN-NEXT: mul a1, a1, a2
+; ZVFHMIN-NEXT: slli a1, a1, 3
+; ZVFHMIN-NEXT: mv a2, a1
+; ZVFHMIN-NEXT: slli a1, a1, 1
+; ZVFHMIN-NEXT: add a1, a1, a2
; ZVFHMIN-NEXT: add a1, sp, a1
; ZVFHMIN-NEXT: addi a1, a1, 16
; ZVFHMIN-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill
@@ -8572,13 +10170,15 @@ define <vscale x 32 x half> @vfnmadd_vf_nxv32f16_commute(<vscale x 32 x half> %v
; ZVFHMIN-NEXT: vs8r.v v24, (a4) # Unknown-size Folded Spill
; ZVFHMIN-NEXT: vmv4r.v v4, v12
; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8
-; ZVFHMIN-NEXT: bltu a0, a2, .LBB255_2
+; ZVFHMIN-NEXT: bltu a0, a2, .LBB291_2
; ZVFHMIN-NEXT: # %bb.1:
; ZVFHMIN-NEXT: mv a3, a2
-; ZVFHMIN-NEXT: .LBB255_2:
+; ZVFHMIN-NEXT: .LBB291_2:
; ZVFHMIN-NEXT: csrr a4, vlenb
-; ZVFHMIN-NEXT: li a5, 24
-; ZVFHMIN-NEXT: mul a4, a4, a5
+; ZVFHMIN-NEXT: slli a4, a4, 3
+; ZVFHMIN-NEXT: mv a5, a4
+; ZVFHMIN-NEXT: slli a4, a4, 1
+; ZVFHMIN-NEXT: add a4, a4, a5
; ZVFHMIN-NEXT: add a4, sp, a4
; ZVFHMIN-NEXT: addi a4, a4, 16
; ZVFHMIN-NEXT: vl8r.v v24, (a4) # Unknown-size Folded Reload
@@ -8613,8 +10213,10 @@ define <vscale x 32 x half> @vfnmadd_vf_nxv32f16_commute(<vscale x 32 x half> %v
; ZVFHMIN-NEXT: vsetvli a2, zero, e8, mf2, ta, ma
; ZVFHMIN-NEXT: vslidedown.vx v0, v0, a1
; ZVFHMIN-NEXT: csrr a1, vlenb
-; ZVFHMIN-NEXT: li a2, 24
-; ZVFHMIN-NEXT: mul a1, a1, a2
+; ZVFHMIN-NEXT: slli a1, a1, 3
+; ZVFHMIN-NEXT: mv a2, a1
+; ZVFHMIN-NEXT: slli a1, a1, 1
+; ZVFHMIN-NEXT: add a1, a1, a2
; ZVFHMIN-NEXT: add a1, sp, a1
; ZVFHMIN-NEXT: addi a1, a1, 16
; ZVFHMIN-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload
@@ -8674,8 +10276,10 @@ define <vscale x 32 x half> @vfnmadd_vf_nxv32f16_unmasked(<vscale x 32 x half> %
; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m8, ta, ma
; ZVFHMIN-NEXT: vmv.v.x v24, a1
; ZVFHMIN-NEXT: csrr a1, vlenb
-; ZVFHMIN-NEXT: li a2, 24
-; ZVFHMIN-NEXT: mul a1, a1, a2
+; ZVFHMIN-NEXT: slli a1, a1, 3
+; ZVFHMIN-NEXT: mv a2, a1
+; ZVFHMIN-NEXT: slli a1, a1, 1
+; ZVFHMIN-NEXT: add a1, a1, a2
; ZVFHMIN-NEXT: add a1, sp, a1
; ZVFHMIN-NEXT: addi a1, a1, 16
; ZVFHMIN-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill
@@ -8711,8 +10315,10 @@ define <vscale x 32 x half> @vfnmadd_vf_nxv32f16_unmasked(<vscale x 32 x half> %
; ZVFHMIN-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill
; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12
; ZVFHMIN-NEXT: csrr a2, vlenb
-; ZVFHMIN-NEXT: li a4, 24
-; ZVFHMIN-NEXT: mul a2, a2, a4
+; ZVFHMIN-NEXT: slli a2, a2, 3
+; ZVFHMIN-NEXT: mv a4, a2
+; ZVFHMIN-NEXT: slli a2, a2, 1
+; ZVFHMIN-NEXT: add a2, a2, a4
; ZVFHMIN-NEXT: add a2, sp, a2
; ZVFHMIN-NEXT: addi a2, a2, 16
; ZVFHMIN-NEXT: vl8r.v v24, (a2) # Unknown-size Folded Reload
@@ -8740,13 +10346,15 @@ define <vscale x 32 x half> @vfnmadd_vf_nxv32f16_unmasked(<vscale x 32 x half> %
; ZVFHMIN-NEXT: addi a2, a2, 16
; ZVFHMIN-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload
; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8
-; ZVFHMIN-NEXT: bltu a0, a1, .LBB256_2
+; ZVFHMIN-NEXT: bltu a0, a1, .LBB292_2
; ZVFHMIN-NEXT: # %bb.1:
; ZVFHMIN-NEXT: mv a0, a1
-; ZVFHMIN-NEXT: .LBB256_2:
+; ZVFHMIN-NEXT: .LBB292_2:
; ZVFHMIN-NEXT: csrr a1, vlenb
-; ZVFHMIN-NEXT: li a2, 24
-; ZVFHMIN-NEXT: mul a1, a1, a2
+; ZVFHMIN-NEXT: slli a1, a1, 3
+; ZVFHMIN-NEXT: mv a2, a1
+; ZVFHMIN-NEXT: slli a1, a1, 1
+; ZVFHMIN-NEXT: add a1, a1, a2
; ZVFHMIN-NEXT: add a1, sp, a1
; ZVFHMIN-NEXT: addi a1, a1, 16
; ZVFHMIN-NEXT: vl8r.v v0, (a1) # Unknown-size Folded Reload
@@ -8793,8 +10401,10 @@ define <vscale x 32 x half> @vfnmadd_vf_nxv32f16_unmasked_commute(<vscale x 32 x
; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m8, ta, ma
; ZVFHMIN-NEXT: vmv.v.x v24, a1
; ZVFHMIN-NEXT: csrr a1, vlenb
-; ZVFHMIN-NEXT: li a2, 24
-; ZVFHMIN-NEXT: mul a1, a1, a2
+; ZVFHMIN-NEXT: slli a1, a1, 3
+; ZVFHMIN-NEXT: mv a2, a1
+; ZVFHMIN-NEXT: slli a1, a1, 1
+; ZVFHMIN-NEXT: add a1, a1, a2
; ZVFHMIN-NEXT: add a1, sp, a1
; ZVFHMIN-NEXT: addi a1, a1, 16
; ZVFHMIN-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill
@@ -8830,8 +10440,10 @@ define <vscale x 32 x half> @vfnmadd_vf_nxv32f16_unmasked_commute(<vscale x 32 x
; ZVFHMIN-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill
; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v28
; ZVFHMIN-NEXT: csrr a2, vlenb
-; ZVFHMIN-NEXT: li a4, 24
-; ZVFHMIN-NEXT: mul a2, a2, a4
+; ZVFHMIN-NEXT: slli a2, a2, 3
+; ZVFHMIN-NEXT: mv a4, a2
+; ZVFHMIN-NEXT: slli a2, a2, 1
+; ZVFHMIN-NEXT: add a2, a2, a4
; ZVFHMIN-NEXT: add a2, sp, a2
; ZVFHMIN-NEXT: addi a2, a2, 16
; ZVFHMIN-NEXT: vl8r.v v0, (a2) # Unknown-size Folded Reload
@@ -8860,13 +10472,15 @@ define <vscale x 32 x half> @vfnmadd_vf_nxv32f16_unmasked_commute(<vscale x 32 x
; ZVFHMIN-NEXT: addi a2, a2, 16
; ZVFHMIN-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload
; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8
-; ZVFHMIN-NEXT: bltu a0, a1, .LBB257_2
+; ZVFHMIN-NEXT: bltu a0, a1, .LBB293_2
; ZVFHMIN-NEXT: # %bb.1:
; ZVFHMIN-NEXT: mv a0, a1
-; ZVFHMIN-NEXT: .LBB257_2:
+; ZVFHMIN-NEXT: .LBB293_2:
; ZVFHMIN-NEXT: csrr a1, vlenb
-; ZVFHMIN-NEXT: li a2, 24
-; ZVFHMIN-NEXT: mul a1, a1, a2
+; ZVFHMIN-NEXT: slli a1, a1, 3
+; ZVFHMIN-NEXT: mv a2, a1
+; ZVFHMIN-NEXT: slli a1, a1, 1
+; ZVFHMIN-NEXT: add a1, a1, a2
; ZVFHMIN-NEXT: add a1, sp, a1
; ZVFHMIN-NEXT: addi a1, a1, 16
; ZVFHMIN-NEXT: vl8r.v v0, (a1) # Unknown-size Folded Reload
@@ -8910,8 +10524,10 @@ define <vscale x 32 x half> @vfnmadd_vf_nxv32f16_neg_splat(<vscale x 32 x half>
; ZVFHMIN-NEXT: sub sp, sp, a1
; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * vlenb
; ZVFHMIN-NEXT: csrr a1, vlenb
-; ZVFHMIN-NEXT: li a2, 24
-; ZVFHMIN-NEXT: mul a1, a1, a2
+; ZVFHMIN-NEXT: slli a1, a1, 3
+; ZVFHMIN-NEXT: mv a2, a1
+; ZVFHMIN-NEXT: slli a1, a1, 1
+; ZVFHMIN-NEXT: add a1, a1, a2
; ZVFHMIN-NEXT: add a1, sp, a1
; ZVFHMIN-NEXT: addi a1, a1, 16
; ZVFHMIN-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill
@@ -8939,13 +10555,15 @@ define <vscale x 32 x half> @vfnmadd_vf_nxv32f16_neg_splat(<vscale x 32 x half>
; ZVFHMIN-NEXT: vs8r.v v24, (a4) # Unknown-size Folded Spill
; ZVFHMIN-NEXT: vmv4r.v v4, v12
; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8
-; ZVFHMIN-NEXT: bltu a0, a2, .LBB258_2
+; ZVFHMIN-NEXT: bltu a0, a2, .LBB294_2
; ZVFHMIN-NEXT: # %bb.1:
; ZVFHMIN-NEXT: mv a3, a2
-; ZVFHMIN-NEXT: .LBB258_2:
+; ZVFHMIN-NEXT: .LBB294_2:
; ZVFHMIN-NEXT: csrr a4, vlenb
-; ZVFHMIN-NEXT: li a5, 24
-; ZVFHMIN-NEXT: mul a4, a4, a5
+; ZVFHMIN-NEXT: slli a4, a4, 3
+; ZVFHMIN-NEXT: mv a5, a4
+; ZVFHMIN-NEXT: slli a4, a4, 1
+; ZVFHMIN-NEXT: add a4, a4, a5
; ZVFHMIN-NEXT: add a4, sp, a4
; ZVFHMIN-NEXT: addi a4, a4, 16
; ZVFHMIN-NEXT: vl8r.v v24, (a4) # Unknown-size Folded Reload
@@ -8984,8 +10602,10 @@ define <vscale x 32 x half> @vfnmadd_vf_nxv32f16_neg_splat(<vscale x 32 x half>
; ZVFHMIN-NEXT: vsetvli a2, zero, e8, mf2, ta, ma
; ZVFHMIN-NEXT: vslidedown.vx v0, v0, a1
; ZVFHMIN-NEXT: csrr a1, vlenb
-; ZVFHMIN-NEXT: li a2, 24
-; ZVFHMIN-NEXT: mul a1, a1, a2
+; ZVFHMIN-NEXT: slli a1, a1, 3
+; ZVFHMIN-NEXT: mv a2, a1
+; ZVFHMIN-NEXT: slli a1, a1, 1
+; ZVFHMIN-NEXT: add a1, a1, a2
; ZVFHMIN-NEXT: add a1, sp, a1
; ZVFHMIN-NEXT: addi a1, a1, 16
; ZVFHMIN-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload
@@ -9032,8 +10652,10 @@ define <vscale x 32 x half> @vfnmadd_vf_nxv32f16_neg_splat_commute(<vscale x 32
; ZVFHMIN-NEXT: sub sp, sp, a1
; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * vlenb
; ZVFHMIN-NEXT: csrr a1, vlenb
-; ZVFHMIN-NEXT: li a2, 24
-; ZVFHMIN-NEXT: mul a1, a1, a2
+; ZVFHMIN-NEXT: slli a1, a1, 3
+; ZVFHMIN-NEXT: mv a2, a1
+; ZVFHMIN-NEXT: slli a1, a1, 1
+; ZVFHMIN-NEXT: add a1, a1, a2
; ZVFHMIN-NEXT: add a1, sp, a1
; ZVFHMIN-NEXT: addi a1, a1, 16
; ZVFHMIN-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill
@@ -9063,13 +10685,15 @@ define <vscale x 32 x half> @vfnmadd_vf_nxv32f16_neg_splat_commute(<vscale x 32
; ZVFHMIN-NEXT: add a4, sp, a4
; ZVFHMIN-NEXT: addi a4, a4, 16
; ZVFHMIN-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill
-; ZVFHMIN-NEXT: bltu a0, a2, .LBB259_2
+; ZVFHMIN-NEXT: bltu a0, a2, .LBB295_2
; ZVFHMIN-NEXT: # %bb.1:
; ZVFHMIN-NEXT: mv a3, a2
-; ZVFHMIN-NEXT: .LBB259_2:
+; ZVFHMIN-NEXT: .LBB295_2:
; ZVFHMIN-NEXT: csrr a4, vlenb
-; ZVFHMIN-NEXT: li a5, 24
-; ZVFHMIN-NEXT: mul a4, a4, a5
+; ZVFHMIN-NEXT: slli a4, a4, 3
+; ZVFHMIN-NEXT: mv a5, a4
+; ZVFHMIN-NEXT: slli a4, a4, 1
+; ZVFHMIN-NEXT: add a4, a4, a5
; ZVFHMIN-NEXT: add a4, sp, a4
; ZVFHMIN-NEXT: addi a4, a4, 16
; ZVFHMIN-NEXT: vl8r.v v8, (a4) # Unknown-size Folded Reload
@@ -9109,8 +10733,10 @@ define <vscale x 32 x half> @vfnmadd_vf_nxv32f16_neg_splat_commute(<vscale x 32
; ZVFHMIN-NEXT: vsetvli a2, zero, e8, mf2, ta, ma
; ZVFHMIN-NEXT: vslidedown.vx v0, v0, a1
; ZVFHMIN-NEXT: csrr a1, vlenb
-; ZVFHMIN-NEXT: li a2, 24
-; ZVFHMIN-NEXT: mul a1, a1, a2
+; ZVFHMIN-NEXT: slli a1, a1, 3
+; ZVFHMIN-NEXT: mv a2, a1
+; ZVFHMIN-NEXT: slli a1, a1, 1
+; ZVFHMIN-NEXT: add a1, a1, a2
; ZVFHMIN-NEXT: add a1, sp, a1
; ZVFHMIN-NEXT: addi a1, a1, 16
; ZVFHMIN-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload
@@ -9160,8 +10786,10 @@ define <vscale x 32 x half> @vfnmadd_vf_nxv32f16_neg_splat_unmasked(<vscale x 32
; ZVFHMIN-NEXT: sub sp, sp, a1
; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * vlenb
; ZVFHMIN-NEXT: csrr a1, vlenb
-; ZVFHMIN-NEXT: li a2, 24
-; ZVFHMIN-NEXT: mul a1, a1, a2
+; ZVFHMIN-NEXT: slli a1, a1, 3
+; ZVFHMIN-NEXT: mv a2, a1
+; ZVFHMIN-NEXT: slli a1, a1, 1
+; ZVFHMIN-NEXT: add a1, a1, a2
; ZVFHMIN-NEXT: add a1, sp, a1
; ZVFHMIN-NEXT: addi a1, a1, 16
; ZVFHMIN-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill
@@ -9200,8 +10828,10 @@ define <vscale x 32 x half> @vfnmadd_vf_nxv32f16_neg_splat_unmasked(<vscale x 32
; ZVFHMIN-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill
; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v28
; ZVFHMIN-NEXT: csrr a2, vlenb
-; ZVFHMIN-NEXT: li a4, 24
-; ZVFHMIN-NEXT: mul a2, a2, a4
+; ZVFHMIN-NEXT: slli a2, a2, 3
+; ZVFHMIN-NEXT: mv a4, a2
+; ZVFHMIN-NEXT: slli a2, a2, 1
+; ZVFHMIN-NEXT: add a2, a2, a4
; ZVFHMIN-NEXT: add a2, sp, a2
; ZVFHMIN-NEXT: addi a2, a2, 16
; ZVFHMIN-NEXT: vl8r.v v0, (a2) # Unknown-size Folded Reload
@@ -9230,13 +10860,15 @@ define <vscale x 32 x half> @vfnmadd_vf_nxv32f16_neg_splat_unmasked(<vscale x 32
; ZVFHMIN-NEXT: addi a2, a2, 16
; ZVFHMIN-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload
; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8
-; ZVFHMIN-NEXT: bltu a0, a1, .LBB260_2
+; ZVFHMIN-NEXT: bltu a0, a1, .LBB296_2
; ZVFHMIN-NEXT: # %bb.1:
; ZVFHMIN-NEXT: mv a0, a1
-; ZVFHMIN-NEXT: .LBB260_2:
+; ZVFHMIN-NEXT: .LBB296_2:
; ZVFHMIN-NEXT: csrr a1, vlenb
-; ZVFHMIN-NEXT: li a2, 24
-; ZVFHMIN-NEXT: mul a1, a1, a2
+; ZVFHMIN-NEXT: slli a1, a1, 3
+; ZVFHMIN-NEXT: mv a2, a1
+; ZVFHMIN-NEXT: slli a1, a1, 1
+; ZVFHMIN-NEXT: add a1, a1, a2
; ZVFHMIN-NEXT: add a1, sp, a1
; ZVFHMIN-NEXT: addi a1, a1, 16
; ZVFHMIN-NEXT: vl8r.v v0, (a1) # Unknown-size Folded Reload
@@ -9280,8 +10912,10 @@ define <vscale x 32 x half> @vfnmadd_vf_nxv32f16_neg_splat_unmasked_commute(<vsc
; ZVFHMIN-NEXT: sub sp, sp, a1
; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * vlenb
; ZVFHMIN-NEXT: csrr a1, vlenb
-; ZVFHMIN-NEXT: li a2, 24
-; ZVFHMIN-NEXT: mul a1, a1, a2
+; ZVFHMIN-NEXT: slli a1, a1, 3
+; ZVFHMIN-NEXT: mv a2, a1
+; ZVFHMIN-NEXT: slli a1, a1, 1
+; ZVFHMIN-NEXT: add a1, a1, a2
; ZVFHMIN-NEXT: add a1, sp, a1
; ZVFHMIN-NEXT: addi a1, a1, 16
; ZVFHMIN-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill
@@ -9320,8 +10954,10 @@ define <vscale x 32 x half> @vfnmadd_vf_nxv32f16_neg_splat_unmasked_commute(<vsc
; ZVFHMIN-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill
; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12
; ZVFHMIN-NEXT: csrr a2, vlenb
-; ZVFHMIN-NEXT: li a4, 24
-; ZVFHMIN-NEXT: mul a2, a2, a4
+; ZVFHMIN-NEXT: slli a2, a2, 3
+; ZVFHMIN-NEXT: mv a4, a2
+; ZVFHMIN-NEXT: slli a2, a2, 1
+; ZVFHMIN-NEXT: add a2, a2, a4
; ZVFHMIN-NEXT: add a2, sp, a2
; ZVFHMIN-NEXT: addi a2, a2, 16
; ZVFHMIN-NEXT: vl8r.v v16, (a2) # Unknown-size Folded Reload
@@ -9349,13 +10985,15 @@ define <vscale x 32 x half> @vfnmadd_vf_nxv32f16_neg_splat_unmasked_commute(<vsc
; ZVFHMIN-NEXT: addi a2, a2, 16
; ZVFHMIN-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload
; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8
-; ZVFHMIN-NEXT: bltu a0, a1, .LBB261_2
+; ZVFHMIN-NEXT: bltu a0, a1, .LBB297_2
; ZVFHMIN-NEXT: # %bb.1:
; ZVFHMIN-NEXT: mv a0, a1
-; ZVFHMIN-NEXT: .LBB261_2:
+; ZVFHMIN-NEXT: .LBB297_2:
; ZVFHMIN-NEXT: csrr a1, vlenb
-; ZVFHMIN-NEXT: li a2, 24
-; ZVFHMIN-NEXT: mul a1, a1, a2
+; ZVFHMIN-NEXT: slli a1, a1, 3
+; ZVFHMIN-NEXT: mv a2, a1
+; ZVFHMIN-NEXT: slli a1, a1, 1
+; ZVFHMIN-NEXT: add a1, a1, a2
; ZVFHMIN-NEXT: add a1, sp, a1
; ZVFHMIN-NEXT: addi a1, a1, 16
; ZVFHMIN-NEXT: vl8r.v v0, (a1) # Unknown-size Folded Reload
@@ -9402,8 +11040,10 @@ define <vscale x 32 x half> @vfnmsub_vv_nxv32f16(<vscale x 32 x half> %va, <vsca
; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * vlenb
; ZVFHMIN-NEXT: vl8re16.v v24, (a0)
; ZVFHMIN-NEXT: csrr a0, vlenb
-; ZVFHMIN-NEXT: li a2, 24
-; ZVFHMIN-NEXT: mul a0, a0, a2
+; ZVFHMIN-NEXT: slli a0, a0, 3
+; ZVFHMIN-NEXT: mv a2, a0
+; ZVFHMIN-NEXT: slli a0, a0, 1
+; ZVFHMIN-NEXT: add a0, a0, a2
; ZVFHMIN-NEXT: add a0, sp, a0
; ZVFHMIN-NEXT: addi a0, a0, 16
; ZVFHMIN-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
@@ -9428,13 +11068,15 @@ define <vscale x 32 x half> @vfnmsub_vv_nxv32f16(<vscale x 32 x half> %va, <vsca
; ZVFHMIN-NEXT: add a4, sp, a4
; ZVFHMIN-NEXT: addi a4, a4, 16
; ZVFHMIN-NEXT: vs8r.v v8, (a4) # Unknown-size Folded Spill
-; ZVFHMIN-NEXT: bltu a1, a2, .LBB262_2
+; ZVFHMIN-NEXT: bltu a1, a2, .LBB298_2
; ZVFHMIN-NEXT: # %bb.1:
; ZVFHMIN-NEXT: mv a3, a2
-; ZVFHMIN-NEXT: .LBB262_2:
+; ZVFHMIN-NEXT: .LBB298_2:
; ZVFHMIN-NEXT: csrr a4, vlenb
-; ZVFHMIN-NEXT: li a5, 24
-; ZVFHMIN-NEXT: mul a4, a4, a5
+; ZVFHMIN-NEXT: slli a4, a4, 3
+; ZVFHMIN-NEXT: mv a5, a4
+; ZVFHMIN-NEXT: slli a4, a4, 1
+; ZVFHMIN-NEXT: add a4, a4, a5
; ZVFHMIN-NEXT: add a4, sp, a4
; ZVFHMIN-NEXT: addi a4, a4, 16
; ZVFHMIN-NEXT: vl8r.v v8, (a4) # Unknown-size Folded Reload
@@ -9469,8 +11111,10 @@ define <vscale x 32 x half> @vfnmsub_vv_nxv32f16(<vscale x 32 x half> %va, <vsca
; ZVFHMIN-NEXT: vsetvli a2, zero, e8, mf2, ta, ma
; ZVFHMIN-NEXT: vslidedown.vx v0, v0, a0
; ZVFHMIN-NEXT: csrr a0, vlenb
-; ZVFHMIN-NEXT: li a2, 24
-; ZVFHMIN-NEXT: mul a0, a0, a2
+; ZVFHMIN-NEXT: slli a0, a0, 3
+; ZVFHMIN-NEXT: mv a2, a0
+; ZVFHMIN-NEXT: slli a0, a0, 1
+; ZVFHMIN-NEXT: add a0, a0, a2
; ZVFHMIN-NEXT: add a0, sp, a0
; ZVFHMIN-NEXT: addi a0, a0, 16
; ZVFHMIN-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
@@ -9512,8 +11156,10 @@ define <vscale x 32 x half> @vfnmsub_vv_nxv32f16_commuted(<vscale x 32 x half> %
; ZVFHMIN-NEXT: addi sp, sp, -16
; ZVFHMIN-NEXT: .cfi_def_cfa_offset 16
; ZVFHMIN-NEXT: csrr a2, vlenb
-; ZVFHMIN-NEXT: li a3, 40
-; ZVFHMIN-NEXT: mul a2, a2, a3
+; ZVFHMIN-NEXT: slli a2, a2, 3
+; ZVFHMIN-NEXT: mv a3, a2
+; ZVFHMIN-NEXT: slli a2, a2, 2
+; ZVFHMIN-NEXT: add a2, a2, a3
; ZVFHMIN-NEXT: sub sp, sp, a2
; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x28, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 40 * vlenb
; ZVFHMIN-NEXT: vl8re16.v v24, (a0)
@@ -9544,15 +11190,17 @@ define <vscale x 32 x half> @vfnmsub_vv_nxv32f16_commuted(<vscale x 32 x half> %
; ZVFHMIN-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill
; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v16
; ZVFHMIN-NEXT: csrr a4, vlenb
-; ZVFHMIN-NEXT: li a5, 24
-; ZVFHMIN-NEXT: mul a4, a4, a5
+; ZVFHMIN-NEXT: slli a4, a4, 3
+; ZVFHMIN-NEXT: mv a5, a4
+; ZVFHMIN-NEXT: slli a4, a4, 1
+; ZVFHMIN-NEXT: add a4, a4, a5
; ZVFHMIN-NEXT: add a4, sp, a4
; ZVFHMIN-NEXT: addi a4, a4, 16
; ZVFHMIN-NEXT: vs8r.v v8, (a4) # Unknown-size Folded Spill
-; ZVFHMIN-NEXT: bltu a1, a2, .LBB263_2
+; ZVFHMIN-NEXT: bltu a1, a2, .LBB299_2
; ZVFHMIN-NEXT: # %bb.1:
; ZVFHMIN-NEXT: mv a3, a2
-; ZVFHMIN-NEXT: .LBB263_2:
+; ZVFHMIN-NEXT: .LBB299_2:
; ZVFHMIN-NEXT: csrr a4, vlenb
; ZVFHMIN-NEXT: slli a4, a4, 5
; ZVFHMIN-NEXT: add a4, sp, a4
@@ -9560,8 +11208,10 @@ define <vscale x 32 x half> @vfnmsub_vv_nxv32f16_commuted(<vscale x 32 x half> %
; ZVFHMIN-NEXT: vl8r.v v8, (a4) # Unknown-size Folded Reload
; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8
; ZVFHMIN-NEXT: csrr a4, vlenb
-; ZVFHMIN-NEXT: li a5, 24
-; ZVFHMIN-NEXT: mul a4, a4, a5
+; ZVFHMIN-NEXT: slli a4, a4, 3
+; ZVFHMIN-NEXT: mv a5, a4
+; ZVFHMIN-NEXT: slli a4, a4, 1
+; ZVFHMIN-NEXT: add a4, a4, a5
; ZVFHMIN-NEXT: add a4, sp, a4
; ZVFHMIN-NEXT: addi a4, a4, 16
; ZVFHMIN-NEXT: vl8r.v v24, (a4) # Unknown-size Folded Reload
@@ -9577,8 +11227,10 @@ define <vscale x 32 x half> @vfnmsub_vv_nxv32f16_commuted(<vscale x 32 x half> %
; ZVFHMIN-NEXT: vsetvli a3, zero, e16, m4, ta, ma
; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v4
; ZVFHMIN-NEXT: csrr a3, vlenb
-; ZVFHMIN-NEXT: li a4, 24
-; ZVFHMIN-NEXT: mul a3, a3, a4
+; ZVFHMIN-NEXT: slli a3, a3, 3
+; ZVFHMIN-NEXT: mv a4, a3
+; ZVFHMIN-NEXT: slli a3, a3, 1
+; ZVFHMIN-NEXT: add a3, a3, a4
; ZVFHMIN-NEXT: add a3, sp, a3
; ZVFHMIN-NEXT: addi a3, a3, 16
; ZVFHMIN-NEXT: vs8r.v v24, (a3) # Unknown-size Folded Spill
@@ -9608,8 +11260,10 @@ define <vscale x 32 x half> @vfnmsub_vv_nxv32f16_commuted(<vscale x 32 x half> %
; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma
; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12
; ZVFHMIN-NEXT: csrr a0, vlenb
-; ZVFHMIN-NEXT: li a2, 24
-; ZVFHMIN-NEXT: mul a0, a0, a2
+; ZVFHMIN-NEXT: slli a0, a0, 3
+; ZVFHMIN-NEXT: mv a2, a0
+; ZVFHMIN-NEXT: slli a0, a0, 1
+; ZVFHMIN-NEXT: add a0, a0, a2
; ZVFHMIN-NEXT: add a0, sp, a0
; ZVFHMIN-NEXT: addi a0, a0, 16
; ZVFHMIN-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
@@ -9626,8 +11280,10 @@ define <vscale x 32 x half> @vfnmsub_vv_nxv32f16_commuted(<vscale x 32 x half> %
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16
; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v24
; ZVFHMIN-NEXT: csrr a0, vlenb
-; ZVFHMIN-NEXT: li a1, 40
-; ZVFHMIN-NEXT: mul a0, a0, a1
+; ZVFHMIN-NEXT: slli a0, a0, 3
+; ZVFHMIN-NEXT: mv a1, a0
+; ZVFHMIN-NEXT: slli a0, a0, 2
+; ZVFHMIN-NEXT: add a0, a0, a1
; ZVFHMIN-NEXT: add sp, sp, a0
; ZVFHMIN-NEXT: addi sp, sp, 16
; ZVFHMIN-NEXT: ret
@@ -9654,8 +11310,10 @@ define <vscale x 32 x half> @vfnmsub_vv_nxv32f16_unmasked(<vscale x 32 x half> %
; ZVFHMIN-NEXT: sub sp, sp, a2
; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * vlenb
; ZVFHMIN-NEXT: csrr a2, vlenb
-; ZVFHMIN-NEXT: li a3, 24
-; ZVFHMIN-NEXT: mul a2, a2, a3
+; ZVFHMIN-NEXT: slli a2, a2, 3
+; ZVFHMIN-NEXT: mv a3, a2
+; ZVFHMIN-NEXT: slli a2, a2, 1
+; ZVFHMIN-NEXT: add a2, a2, a3
; ZVFHMIN-NEXT: add a2, sp, a2
; ZVFHMIN-NEXT: addi a2, a2, 16
; ZVFHMIN-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill
@@ -9692,8 +11350,10 @@ define <vscale x 32 x half> @vfnmsub_vv_nxv32f16_unmasked(<vscale x 32 x half> %
; ZVFHMIN-NEXT: addi a2, sp, 16
; ZVFHMIN-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill
; ZVFHMIN-NEXT: csrr a2, vlenb
-; ZVFHMIN-NEXT: li a4, 24
-; ZVFHMIN-NEXT: mul a2, a2, a4
+; ZVFHMIN-NEXT: slli a2, a2, 3
+; ZVFHMIN-NEXT: mv a4, a2
+; ZVFHMIN-NEXT: slli a2, a2, 1
+; ZVFHMIN-NEXT: add a2, a2, a4
; ZVFHMIN-NEXT: add a2, sp, a2
; ZVFHMIN-NEXT: addi a2, a2, 16
; ZVFHMIN-NEXT: vl8r.v v24, (a2) # Unknown-size Folded Reload
@@ -9721,13 +11381,15 @@ define <vscale x 32 x half> @vfnmsub_vv_nxv32f16_unmasked(<vscale x 32 x half> %
; ZVFHMIN-NEXT: add a2, sp, a2
; ZVFHMIN-NEXT: addi a2, a2, 16
; ZVFHMIN-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill
-; ZVFHMIN-NEXT: bltu a1, a0, .LBB264_2
+; ZVFHMIN-NEXT: bltu a1, a0, .LBB300_2
; ZVFHMIN-NEXT: # %bb.1:
; ZVFHMIN-NEXT: mv a1, a0
-; ZVFHMIN-NEXT: .LBB264_2:
+; ZVFHMIN-NEXT: .LBB300_2:
; ZVFHMIN-NEXT: csrr a0, vlenb
-; ZVFHMIN-NEXT: li a2, 24
-; ZVFHMIN-NEXT: mul a0, a0, a2
+; ZVFHMIN-NEXT: slli a0, a0, 3
+; ZVFHMIN-NEXT: mv a2, a0
+; ZVFHMIN-NEXT: slli a0, a0, 1
+; ZVFHMIN-NEXT: add a0, a0, a2
; ZVFHMIN-NEXT: add a0, sp, a0
; ZVFHMIN-NEXT: addi a0, a0, 16
; ZVFHMIN-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
@@ -9770,8 +11432,10 @@ define <vscale x 32 x half> @vfnmsub_vv_nxv32f16_unmasked_commuted(<vscale x 32
; ZVFHMIN-NEXT: sub sp, sp, a2
; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * vlenb
; ZVFHMIN-NEXT: csrr a2, vlenb
-; ZVFHMIN-NEXT: li a3, 24
-; ZVFHMIN-NEXT: mul a2, a2, a3
+; ZVFHMIN-NEXT: slli a2, a2, 3
+; ZVFHMIN-NEXT: mv a3, a2
+; ZVFHMIN-NEXT: slli a2, a2, 1
+; ZVFHMIN-NEXT: add a2, a2, a3
; ZVFHMIN-NEXT: add a2, sp, a2
; ZVFHMIN-NEXT: addi a2, a2, 16
; ZVFHMIN-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill
@@ -9808,8 +11472,10 @@ define <vscale x 32 x half> @vfnmsub_vv_nxv32f16_unmasked_commuted(<vscale x 32
; ZVFHMIN-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill
; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v20
; ZVFHMIN-NEXT: csrr a2, vlenb
-; ZVFHMIN-NEXT: li a4, 24
-; ZVFHMIN-NEXT: mul a2, a2, a4
+; ZVFHMIN-NEXT: slli a2, a2, 3
+; ZVFHMIN-NEXT: mv a4, a2
+; ZVFHMIN-NEXT: slli a2, a2, 1
+; ZVFHMIN-NEXT: add a2, a2, a4
; ZVFHMIN-NEXT: add a2, sp, a2
; ZVFHMIN-NEXT: addi a2, a2, 16
; ZVFHMIN-NEXT: vl8r.v v16, (a2) # Unknown-size Folded Reload
@@ -9837,13 +11503,15 @@ define <vscale x 32 x half> @vfnmsub_vv_nxv32f16_unmasked_commuted(<vscale x 32
; ZVFHMIN-NEXT: addi a2, sp, 16
; ZVFHMIN-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload
; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8
-; ZVFHMIN-NEXT: bltu a1, a0, .LBB265_2
+; ZVFHMIN-NEXT: bltu a1, a0, .LBB301_2
; ZVFHMIN-NEXT: # %bb.1:
; ZVFHMIN-NEXT: mv a1, a0
-; ZVFHMIN-NEXT: .LBB265_2:
+; ZVFHMIN-NEXT: .LBB301_2:
; ZVFHMIN-NEXT: csrr a0, vlenb
-; ZVFHMIN-NEXT: li a2, 24
-; ZVFHMIN-NEXT: mul a0, a0, a2
+; ZVFHMIN-NEXT: slli a0, a0, 3
+; ZVFHMIN-NEXT: mv a2, a0
+; ZVFHMIN-NEXT: slli a0, a0, 1
+; ZVFHMIN-NEXT: add a0, a0, a2
; ZVFHMIN-NEXT: add a0, sp, a0
; ZVFHMIN-NEXT: addi a0, a0, 16
; ZVFHMIN-NEXT: vl8r.v v0, (a0) # Unknown-size Folded Reload
@@ -9881,8 +11549,10 @@ define <vscale x 32 x half> @vfnmsub_vf_nxv32f16(<vscale x 32 x half> %va, half
; ZVFHMIN-NEXT: addi sp, sp, -16
; ZVFHMIN-NEXT: .cfi_def_cfa_offset 16
; ZVFHMIN-NEXT: csrr a1, vlenb
-; ZVFHMIN-NEXT: li a2, 40
-; ZVFHMIN-NEXT: mul a1, a1, a2
+; ZVFHMIN-NEXT: slli a1, a1, 3
+; ZVFHMIN-NEXT: mv a2, a1
+; ZVFHMIN-NEXT: slli a1, a1, 2
+; ZVFHMIN-NEXT: add a1, a1, a2
; ZVFHMIN-NEXT: sub sp, sp, a1
; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x28, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 40 * vlenb
; ZVFHMIN-NEXT: fmv.x.h a1, fa0
@@ -9907,14 +11577,16 @@ define <vscale x 32 x half> @vfnmsub_vf_nxv32f16(<vscale x 32 x half> %va, half
; ZVFHMIN-NEXT: addi a3, a3, 16
; ZVFHMIN-NEXT: vs8r.v v24, (a3) # Unknown-size Folded Spill
; ZVFHMIN-NEXT: mv a3, a0
-; ZVFHMIN-NEXT: bltu a0, a2, .LBB266_2
+; ZVFHMIN-NEXT: bltu a0, a2, .LBB302_2
; ZVFHMIN-NEXT: # %bb.1:
; ZVFHMIN-NEXT: mv a3, a2
-; ZVFHMIN-NEXT: .LBB266_2:
+; ZVFHMIN-NEXT: .LBB302_2:
; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v16
; ZVFHMIN-NEXT: csrr a4, vlenb
-; ZVFHMIN-NEXT: li a5, 24
-; ZVFHMIN-NEXT: mul a4, a4, a5
+; ZVFHMIN-NEXT: slli a4, a4, 3
+; ZVFHMIN-NEXT: mv a5, a4
+; ZVFHMIN-NEXT: slli a4, a4, 1
+; ZVFHMIN-NEXT: add a4, a4, a5
; ZVFHMIN-NEXT: add a4, sp, a4
; ZVFHMIN-NEXT: addi a4, a4, 16
; ZVFHMIN-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill
@@ -9948,8 +11620,10 @@ define <vscale x 32 x half> @vfnmsub_vf_nxv32f16(<vscale x 32 x half> %va, half
; ZVFHMIN-NEXT: vsetvli a2, zero, e8, mf2, ta, ma
; ZVFHMIN-NEXT: vslidedown.vx v0, v0, a1
; ZVFHMIN-NEXT: csrr a1, vlenb
-; ZVFHMIN-NEXT: li a2, 24
-; ZVFHMIN-NEXT: mul a1, a1, a2
+; ZVFHMIN-NEXT: slli a1, a1, 3
+; ZVFHMIN-NEXT: mv a2, a1
+; ZVFHMIN-NEXT: slli a1, a1, 1
+; ZVFHMIN-NEXT: add a1, a1, a2
; ZVFHMIN-NEXT: add a1, sp, a1
; ZVFHMIN-NEXT: addi a1, a1, 16
; ZVFHMIN-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload
@@ -9984,8 +11658,10 @@ define <vscale x 32 x half> @vfnmsub_vf_nxv32f16(<vscale x 32 x half> %va, half
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v24
; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v16
; ZVFHMIN-NEXT: csrr a0, vlenb
-; ZVFHMIN-NEXT: li a1, 40
-; ZVFHMIN-NEXT: mul a0, a0, a1
+; ZVFHMIN-NEXT: slli a0, a0, 3
+; ZVFHMIN-NEXT: mv a1, a0
+; ZVFHMIN-NEXT: slli a0, a0, 2
+; ZVFHMIN-NEXT: add a0, a0, a1
; ZVFHMIN-NEXT: add sp, sp, a0
; ZVFHMIN-NEXT: addi sp, sp, 16
; ZVFHMIN-NEXT: ret
@@ -10020,8 +11696,10 @@ define <vscale x 32 x half> @vfnmsub_vf_nxv32f16_commute(<vscale x 32 x half> %v
; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m8, ta, ma
; ZVFHMIN-NEXT: vmv.v.x v24, a1
; ZVFHMIN-NEXT: csrr a1, vlenb
-; ZVFHMIN-NEXT: li a2, 24
-; ZVFHMIN-NEXT: mul a1, a1, a2
+; ZVFHMIN-NEXT: slli a1, a1, 3
+; ZVFHMIN-NEXT: mv a2, a1
+; ZVFHMIN-NEXT: slli a1, a1, 1
+; ZVFHMIN-NEXT: add a1, a1, a2
; ZVFHMIN-NEXT: add a1, sp, a1
; ZVFHMIN-NEXT: addi a1, a1, 16
; ZVFHMIN-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill
@@ -10034,10 +11712,10 @@ define <vscale x 32 x half> @vfnmsub_vf_nxv32f16_commute(<vscale x 32 x half> %v
; ZVFHMIN-NEXT: vsetvli a3, zero, e16, m4, ta, ma
; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8
; ZVFHMIN-NEXT: mv a3, a0
-; ZVFHMIN-NEXT: bltu a0, a2, .LBB267_2
+; ZVFHMIN-NEXT: bltu a0, a2, .LBB303_2
; ZVFHMIN-NEXT: # %bb.1:
; ZVFHMIN-NEXT: mv a3, a2
-; ZVFHMIN-NEXT: .LBB267_2:
+; ZVFHMIN-NEXT: .LBB303_2:
; ZVFHMIN-NEXT: csrr a4, vlenb
; ZVFHMIN-NEXT: slli a4, a4, 4
; ZVFHMIN-NEXT: add a4, sp, a4
@@ -10050,8 +11728,10 @@ define <vscale x 32 x half> @vfnmsub_vf_nxv32f16_commute(<vscale x 32 x half> %v
; ZVFHMIN-NEXT: addi a4, a4, 16
; ZVFHMIN-NEXT: vs8r.v v24, (a4) # Unknown-size Folded Spill
; ZVFHMIN-NEXT: csrr a4, vlenb
-; ZVFHMIN-NEXT: li a5, 24
-; ZVFHMIN-NEXT: mul a4, a4, a5
+; ZVFHMIN-NEXT: slli a4, a4, 3
+; ZVFHMIN-NEXT: mv a5, a4
+; ZVFHMIN-NEXT: slli a4, a4, 1
+; ZVFHMIN-NEXT: add a4, a4, a5
; ZVFHMIN-NEXT: add a4, sp, a4
; ZVFHMIN-NEXT: addi a4, a4, 16
; ZVFHMIN-NEXT: vl8r.v v8, (a4) # Unknown-size Folded Reload
@@ -10087,8 +11767,10 @@ define <vscale x 32 x half> @vfnmsub_vf_nxv32f16_commute(<vscale x 32 x half> %v
; ZVFHMIN-NEXT: addi a1, a1, 16
; ZVFHMIN-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill
; ZVFHMIN-NEXT: csrr a1, vlenb
-; ZVFHMIN-NEXT: li a2, 24
-; ZVFHMIN-NEXT: mul a1, a1, a2
+; ZVFHMIN-NEXT: slli a1, a1, 3
+; ZVFHMIN-NEXT: mv a2, a1
+; ZVFHMIN-NEXT: slli a1, a1, 1
+; ZVFHMIN-NEXT: add a1, a1, a2
; ZVFHMIN-NEXT: add a1, sp, a1
; ZVFHMIN-NEXT: addi a1, a1, 16
; ZVFHMIN-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload
@@ -10136,8 +11818,10 @@ define <vscale x 32 x half> @vfnmsub_vf_nxv32f16_unmasked(<vscale x 32 x half> %
; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m8, ta, ma
; ZVFHMIN-NEXT: vmv.v.x v24, a1
; ZVFHMIN-NEXT: csrr a1, vlenb
-; ZVFHMIN-NEXT: li a2, 24
-; ZVFHMIN-NEXT: mul a1, a1, a2
+; ZVFHMIN-NEXT: slli a1, a1, 3
+; ZVFHMIN-NEXT: mv a2, a1
+; ZVFHMIN-NEXT: slli a1, a1, 1
+; ZVFHMIN-NEXT: add a1, a1, a2
; ZVFHMIN-NEXT: add a1, sp, a1
; ZVFHMIN-NEXT: addi a1, a1, 16
; ZVFHMIN-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill
@@ -10172,8 +11856,10 @@ define <vscale x 32 x half> @vfnmsub_vf_nxv32f16_unmasked(<vscale x 32 x half> %
; ZVFHMIN-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill
; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v20
; ZVFHMIN-NEXT: csrr a2, vlenb
-; ZVFHMIN-NEXT: li a4, 24
-; ZVFHMIN-NEXT: mul a2, a2, a4
+; ZVFHMIN-NEXT: slli a2, a2, 3
+; ZVFHMIN-NEXT: mv a4, a2
+; ZVFHMIN-NEXT: slli a2, a2, 1
+; ZVFHMIN-NEXT: add a2, a2, a4
; ZVFHMIN-NEXT: add a2, sp, a2
; ZVFHMIN-NEXT: addi a2, a2, 16
; ZVFHMIN-NEXT: vl8r.v v16, (a2) # Unknown-size Folded Reload
@@ -10195,10 +11881,10 @@ define <vscale x 32 x half> @vfnmsub_vf_nxv32f16_unmasked(<vscale x 32 x half> %
; ZVFHMIN-NEXT: add a2, sp, a2
; ZVFHMIN-NEXT: addi a2, a2, 16
; ZVFHMIN-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill
-; ZVFHMIN-NEXT: bltu a0, a1, .LBB268_2
+; ZVFHMIN-NEXT: bltu a0, a1, .LBB304_2
; ZVFHMIN-NEXT: # %bb.1:
; ZVFHMIN-NEXT: mv a0, a1
-; ZVFHMIN-NEXT: .LBB268_2:
+; ZVFHMIN-NEXT: .LBB304_2:
; ZVFHMIN-NEXT: csrr a1, vlenb
; ZVFHMIN-NEXT: slli a1, a1, 4
; ZVFHMIN-NEXT: add a1, sp, a1
@@ -10206,8 +11892,10 @@ define <vscale x 32 x half> @vfnmsub_vf_nxv32f16_unmasked(<vscale x 32 x half> %
; ZVFHMIN-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload
; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v24
; ZVFHMIN-NEXT: csrr a1, vlenb
-; ZVFHMIN-NEXT: li a2, 24
-; ZVFHMIN-NEXT: mul a1, a1, a2
+; ZVFHMIN-NEXT: slli a1, a1, 3
+; ZVFHMIN-NEXT: mv a2, a1
+; ZVFHMIN-NEXT: slli a1, a1, 1
+; ZVFHMIN-NEXT: add a1, a1, a2
; ZVFHMIN-NEXT: add a1, sp, a1
; ZVFHMIN-NEXT: addi a1, a1, 16
; ZVFHMIN-NEXT: vl8r.v v0, (a1) # Unknown-size Folded Reload
@@ -10253,8 +11941,10 @@ define <vscale x 32 x half> @vfnmsub_vf_nxv32f16_unmasked_commute(<vscale x 32 x
; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m8, ta, ma
; ZVFHMIN-NEXT: vmv.v.x v24, a1
; ZVFHMIN-NEXT: csrr a1, vlenb
-; ZVFHMIN-NEXT: li a2, 24
-; ZVFHMIN-NEXT: mul a1, a1, a2
+; ZVFHMIN-NEXT: slli a1, a1, 3
+; ZVFHMIN-NEXT: mv a2, a1
+; ZVFHMIN-NEXT: slli a1, a1, 1
+; ZVFHMIN-NEXT: add a1, a1, a2
; ZVFHMIN-NEXT: add a1, sp, a1
; ZVFHMIN-NEXT: addi a1, a1, 16
; ZVFHMIN-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill
@@ -10289,8 +11979,10 @@ define <vscale x 32 x half> @vfnmsub_vf_nxv32f16_unmasked_commute(<vscale x 32 x
; ZVFHMIN-NEXT: addi a2, sp, 16
; ZVFHMIN-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill
; ZVFHMIN-NEXT: csrr a2, vlenb
-; ZVFHMIN-NEXT: li a4, 24
-; ZVFHMIN-NEXT: mul a2, a2, a4
+; ZVFHMIN-NEXT: slli a2, a2, 3
+; ZVFHMIN-NEXT: mv a4, a2
+; ZVFHMIN-NEXT: slli a2, a2, 1
+; ZVFHMIN-NEXT: add a2, a2, a4
; ZVFHMIN-NEXT: add a2, sp, a2
; ZVFHMIN-NEXT: addi a2, a2, 16
; ZVFHMIN-NEXT: vl8r.v v16, (a2) # Unknown-size Folded Reload
@@ -10307,10 +11999,10 @@ define <vscale x 32 x half> @vfnmsub_vf_nxv32f16_unmasked_commute(<vscale x 32 x
; ZVFHMIN-NEXT: addi a2, a2, 16
; ZVFHMIN-NEXT: vl8r.v v16, (a2) # Unknown-size Folded Reload
; ZVFHMIN-NEXT: vfwcvt.f.f.v v0, v16
-; ZVFHMIN-NEXT: bltu a0, a1, .LBB269_2
+; ZVFHMIN-NEXT: bltu a0, a1, .LBB305_2
; ZVFHMIN-NEXT: # %bb.1:
; ZVFHMIN-NEXT: mv a0, a1
-; ZVFHMIN-NEXT: .LBB269_2:
+; ZVFHMIN-NEXT: .LBB305_2:
; ZVFHMIN-NEXT: csrr a1, vlenb
; ZVFHMIN-NEXT: slli a1, a1, 4
; ZVFHMIN-NEXT: add a1, sp, a1
@@ -10323,8 +12015,10 @@ define <vscale x 32 x half> @vfnmsub_vf_nxv32f16_unmasked_commute(<vscale x 32 x
; ZVFHMIN-NEXT: addi a1, a1, 16
; ZVFHMIN-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill
; ZVFHMIN-NEXT: csrr a1, vlenb
-; ZVFHMIN-NEXT: li a2, 24
-; ZVFHMIN-NEXT: mul a1, a1, a2
+; ZVFHMIN-NEXT: slli a1, a1, 3
+; ZVFHMIN-NEXT: mv a2, a1
+; ZVFHMIN-NEXT: slli a1, a1, 1
+; ZVFHMIN-NEXT: add a1, a1, a2
; ZVFHMIN-NEXT: add a1, sp, a1
; ZVFHMIN-NEXT: addi a1, a1, 16
; ZVFHMIN-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload
@@ -10366,8 +12060,10 @@ define <vscale x 32 x half> @vfnmsub_vf_nxv32f16_neg_splat(<vscale x 32 x half>
; ZVFHMIN-NEXT: sub sp, sp, a1
; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * vlenb
; ZVFHMIN-NEXT: csrr a1, vlenb
-; ZVFHMIN-NEXT: li a2, 24
-; ZVFHMIN-NEXT: mul a1, a1, a2
+; ZVFHMIN-NEXT: slli a1, a1, 3
+; ZVFHMIN-NEXT: mv a2, a1
+; ZVFHMIN-NEXT: slli a1, a1, 1
+; ZVFHMIN-NEXT: add a1, a1, a2
; ZVFHMIN-NEXT: add a1, sp, a1
; ZVFHMIN-NEXT: addi a1, a1, 16
; ZVFHMIN-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill
@@ -10393,13 +12089,15 @@ define <vscale x 32 x half> @vfnmsub_vf_nxv32f16_neg_splat(<vscale x 32 x half>
; ZVFHMIN-NEXT: vsetvli a3, zero, e16, m4, ta, ma
; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v8
; ZVFHMIN-NEXT: mv a3, a0
-; ZVFHMIN-NEXT: bltu a0, a2, .LBB270_2
+; ZVFHMIN-NEXT: bltu a0, a2, .LBB306_2
; ZVFHMIN-NEXT: # %bb.1:
; ZVFHMIN-NEXT: mv a3, a2
-; ZVFHMIN-NEXT: .LBB270_2:
+; ZVFHMIN-NEXT: .LBB306_2:
; ZVFHMIN-NEXT: csrr a4, vlenb
-; ZVFHMIN-NEXT: li a5, 24
-; ZVFHMIN-NEXT: mul a4, a4, a5
+; ZVFHMIN-NEXT: slli a4, a4, 3
+; ZVFHMIN-NEXT: mv a5, a4
+; ZVFHMIN-NEXT: slli a4, a4, 1
+; ZVFHMIN-NEXT: add a4, a4, a5
; ZVFHMIN-NEXT: add a4, sp, a4
; ZVFHMIN-NEXT: addi a4, a4, 16
; ZVFHMIN-NEXT: vl8r.v v8, (a4) # Unknown-size Folded Reload
@@ -10434,8 +12132,10 @@ define <vscale x 32 x half> @vfnmsub_vf_nxv32f16_neg_splat(<vscale x 32 x half>
; ZVFHMIN-NEXT: vsetvli a2, zero, e8, mf2, ta, ma
; ZVFHMIN-NEXT: vslidedown.vx v0, v0, a1
; ZVFHMIN-NEXT: csrr a1, vlenb
-; ZVFHMIN-NEXT: li a2, 24
-; ZVFHMIN-NEXT: mul a1, a1, a2
+; ZVFHMIN-NEXT: slli a1, a1, 3
+; ZVFHMIN-NEXT: mv a2, a1
+; ZVFHMIN-NEXT: slli a1, a1, 1
+; ZVFHMIN-NEXT: add a1, a1, a2
; ZVFHMIN-NEXT: add a1, sp, a1
; ZVFHMIN-NEXT: addi a1, a1, 16
; ZVFHMIN-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload
@@ -10483,13 +12183,18 @@ define <vscale x 32 x half> @vfnmsub_vf_nxv32f16_neg_splat_commute(<vscale x 32
; ZVFHMIN-NEXT: addi sp, sp, -16
; ZVFHMIN-NEXT: .cfi_def_cfa_offset 16
; ZVFHMIN-NEXT: csrr a1, vlenb
-; ZVFHMIN-NEXT: li a2, 34
-; ZVFHMIN-NEXT: mul a1, a1, a2
+; ZVFHMIN-NEXT: slli a1, a1, 1
+; ZVFHMIN-NEXT: mv a2, a1
+; ZVFHMIN-NEXT: slli a1, a1, 4
+; ZVFHMIN-NEXT: add a1, a1, a2
; ZVFHMIN-NEXT: sub sp, sp, a1
; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x22, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 34 * vlenb
; ZVFHMIN-NEXT: csrr a1, vlenb
-; ZVFHMIN-NEXT: li a2, 25
-; ZVFHMIN-NEXT: mul a1, a1, a2
+; ZVFHMIN-NEXT: mv a2, a1
+; ZVFHMIN-NEXT: slli a1, a1, 3
+; ZVFHMIN-NEXT: add a2, a2, a1
+; ZVFHMIN-NEXT: slli a1, a1, 1
+; ZVFHMIN-NEXT: add a1, a1, a2
; ZVFHMIN-NEXT: add a1, sp, a1
; ZVFHMIN-NEXT: addi a1, a1, 16
; ZVFHMIN-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill
@@ -10519,13 +12224,16 @@ define <vscale x 32 x half> @vfnmsub_vf_nxv32f16_neg_splat_commute(<vscale x 32
; ZVFHMIN-NEXT: addi a3, a3, 16
; ZVFHMIN-NEXT: vs8r.v v24, (a3) # Unknown-size Folded Spill
; ZVFHMIN-NEXT: mv a3, a0
-; ZVFHMIN-NEXT: bltu a0, a2, .LBB271_2
+; ZVFHMIN-NEXT: bltu a0, a2, .LBB307_2
; ZVFHMIN-NEXT: # %bb.1:
; ZVFHMIN-NEXT: mv a3, a2
-; ZVFHMIN-NEXT: .LBB271_2:
+; ZVFHMIN-NEXT: .LBB307_2:
; ZVFHMIN-NEXT: csrr a4, vlenb
-; ZVFHMIN-NEXT: li a5, 25
-; ZVFHMIN-NEXT: mul a4, a4, a5
+; ZVFHMIN-NEXT: mv a5, a4
+; ZVFHMIN-NEXT: slli a4, a4, 3
+; ZVFHMIN-NEXT: add a5, a5, a4
+; ZVFHMIN-NEXT: slli a4, a4, 1
+; ZVFHMIN-NEXT: add a4, a4, a5
; ZVFHMIN-NEXT: add a4, sp, a4
; ZVFHMIN-NEXT: addi a4, a4, 16
; ZVFHMIN-NEXT: vl8r.v v24, (a4) # Unknown-size Folded Reload
@@ -10576,8 +12284,11 @@ define <vscale x 32 x half> @vfnmsub_vf_nxv32f16_neg_splat_commute(<vscale x 32
; ZVFHMIN-NEXT: vsetvli a2, zero, e8, mf2, ta, ma
; ZVFHMIN-NEXT: vslidedown.vx v0, v0, a1
; ZVFHMIN-NEXT: csrr a1, vlenb
-; ZVFHMIN-NEXT: li a2, 25
-; ZVFHMIN-NEXT: mul a1, a1, a2
+; ZVFHMIN-NEXT: mv a2, a1
+; ZVFHMIN-NEXT: slli a1, a1, 3
+; ZVFHMIN-NEXT: add a2, a2, a1
+; ZVFHMIN-NEXT: slli a1, a1, 1
+; ZVFHMIN-NEXT: add a1, a1, a2
; ZVFHMIN-NEXT: add a1, sp, a1
; ZVFHMIN-NEXT: addi a1, a1, 16
; ZVFHMIN-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload
@@ -10600,8 +12311,10 @@ define <vscale x 32 x half> @vfnmsub_vf_nxv32f16_neg_splat_commute(<vscale x 32
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v24
; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v16
; ZVFHMIN-NEXT: csrr a0, vlenb
-; ZVFHMIN-NEXT: li a1, 34
-; ZVFHMIN-NEXT: mul a0, a0, a1
+; ZVFHMIN-NEXT: slli a0, a0, 1
+; ZVFHMIN-NEXT: mv a1, a0
+; ZVFHMIN-NEXT: slli a0, a0, 4
+; ZVFHMIN-NEXT: add a0, a0, a1
; ZVFHMIN-NEXT: add sp, sp, a0
; ZVFHMIN-NEXT: addi sp, sp, 16
; ZVFHMIN-NEXT: ret
@@ -10629,8 +12342,10 @@ define <vscale x 32 x half> @vfnmsub_vf_nxv32f16_neg_splat_unmasked(<vscale x 32
; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * vlenb
; ZVFHMIN-NEXT: vmv8r.v v24, v16
; ZVFHMIN-NEXT: csrr a1, vlenb
-; ZVFHMIN-NEXT: li a2, 24
-; ZVFHMIN-NEXT: mul a1, a1, a2
+; ZVFHMIN-NEXT: slli a1, a1, 3
+; ZVFHMIN-NEXT: mv a2, a1
+; ZVFHMIN-NEXT: slli a1, a1, 1
+; ZVFHMIN-NEXT: add a1, a1, a2
; ZVFHMIN-NEXT: add a1, sp, a1
; ZVFHMIN-NEXT: addi a1, a1, 16
; ZVFHMIN-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill
@@ -10668,8 +12383,10 @@ define <vscale x 32 x half> @vfnmsub_vf_nxv32f16_neg_splat_unmasked(<vscale x 32
; ZVFHMIN-NEXT: addi a2, sp, 16
; ZVFHMIN-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill
; ZVFHMIN-NEXT: csrr a2, vlenb
-; ZVFHMIN-NEXT: li a4, 24
-; ZVFHMIN-NEXT: mul a2, a2, a4
+; ZVFHMIN-NEXT: slli a2, a2, 3
+; ZVFHMIN-NEXT: mv a4, a2
+; ZVFHMIN-NEXT: slli a2, a2, 1
+; ZVFHMIN-NEXT: add a2, a2, a4
; ZVFHMIN-NEXT: add a2, sp, a2
; ZVFHMIN-NEXT: addi a2, a2, 16
; ZVFHMIN-NEXT: vl8r.v v24, (a2) # Unknown-size Folded Reload
@@ -10686,10 +12403,10 @@ define <vscale x 32 x half> @vfnmsub_vf_nxv32f16_neg_splat_unmasked(<vscale x 32
; ZVFHMIN-NEXT: addi a2, a2, 16
; ZVFHMIN-NEXT: vl8r.v v16, (a2) # Unknown-size Folded Reload
; ZVFHMIN-NEXT: vfwcvt.f.f.v v0, v16
-; ZVFHMIN-NEXT: bltu a0, a1, .LBB272_2
+; ZVFHMIN-NEXT: bltu a0, a1, .LBB308_2
; ZVFHMIN-NEXT: # %bb.1:
; ZVFHMIN-NEXT: mv a0, a1
-; ZVFHMIN-NEXT: .LBB272_2:
+; ZVFHMIN-NEXT: .LBB308_2:
; ZVFHMIN-NEXT: csrr a1, vlenb
; ZVFHMIN-NEXT: slli a1, a1, 4
; ZVFHMIN-NEXT: add a1, sp, a1
@@ -10702,8 +12419,10 @@ define <vscale x 32 x half> @vfnmsub_vf_nxv32f16_neg_splat_unmasked(<vscale x 32
; ZVFHMIN-NEXT: addi a1, a1, 16
; ZVFHMIN-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill
; ZVFHMIN-NEXT: csrr a1, vlenb
-; ZVFHMIN-NEXT: li a2, 24
-; ZVFHMIN-NEXT: mul a1, a1, a2
+; ZVFHMIN-NEXT: slli a1, a1, 3
+; ZVFHMIN-NEXT: mv a2, a1
+; ZVFHMIN-NEXT: slli a1, a1, 1
+; ZVFHMIN-NEXT: add a1, a1, a2
; ZVFHMIN-NEXT: add a1, sp, a1
; ZVFHMIN-NEXT: addi a1, a1, 16
; ZVFHMIN-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload
@@ -10745,8 +12464,10 @@ define <vscale x 32 x half> @vfnmsub_vf_nxv32f16_neg_splat_unmasked_commute(<vsc
; ZVFHMIN-NEXT: sub sp, sp, a1
; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * vlenb
; ZVFHMIN-NEXT: csrr a1, vlenb
-; ZVFHMIN-NEXT: li a2, 24
-; ZVFHMIN-NEXT: mul a1, a1, a2
+; ZVFHMIN-NEXT: slli a1, a1, 3
+; ZVFHMIN-NEXT: mv a2, a1
+; ZVFHMIN-NEXT: slli a1, a1, 1
+; ZVFHMIN-NEXT: add a1, a1, a2
; ZVFHMIN-NEXT: add a1, sp, a1
; ZVFHMIN-NEXT: addi a1, a1, 16
; ZVFHMIN-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill
@@ -10784,8 +12505,10 @@ define <vscale x 32 x half> @vfnmsub_vf_nxv32f16_neg_splat_unmasked_commute(<vsc
; ZVFHMIN-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill
; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v20
; ZVFHMIN-NEXT: csrr a2, vlenb
-; ZVFHMIN-NEXT: li a4, 24
-; ZVFHMIN-NEXT: mul a2, a2, a4
+; ZVFHMIN-NEXT: slli a2, a2, 3
+; ZVFHMIN-NEXT: mv a4, a2
+; ZVFHMIN-NEXT: slli a2, a2, 1
+; ZVFHMIN-NEXT: add a2, a2, a4
; ZVFHMIN-NEXT: add a2, sp, a2
; ZVFHMIN-NEXT: addi a2, a2, 16
; ZVFHMIN-NEXT: vl8r.v v16, (a2) # Unknown-size Folded Reload
@@ -10807,10 +12530,10 @@ define <vscale x 32 x half> @vfnmsub_vf_nxv32f16_neg_splat_unmasked_commute(<vsc
; ZVFHMIN-NEXT: add a2, sp, a2
; ZVFHMIN-NEXT: addi a2, a2, 16
; ZVFHMIN-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill
-; ZVFHMIN-NEXT: bltu a0, a1, .LBB273_2
+; ZVFHMIN-NEXT: bltu a0, a1, .LBB309_2
; ZVFHMIN-NEXT: # %bb.1:
; ZVFHMIN-NEXT: mv a0, a1
-; ZVFHMIN-NEXT: .LBB273_2:
+; ZVFHMIN-NEXT: .LBB309_2:
; ZVFHMIN-NEXT: csrr a1, vlenb
; ZVFHMIN-NEXT: slli a1, a1, 4
; ZVFHMIN-NEXT: add a1, sp, a1
@@ -10818,8 +12541,10 @@ define <vscale x 32 x half> @vfnmsub_vf_nxv32f16_neg_splat_unmasked_commute(<vsc
; ZVFHMIN-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload
; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v24
; ZVFHMIN-NEXT: csrr a1, vlenb
-; ZVFHMIN-NEXT: li a2, 24
-; ZVFHMIN-NEXT: mul a1, a1, a2
+; ZVFHMIN-NEXT: slli a1, a1, 3
+; ZVFHMIN-NEXT: mv a2, a1
+; ZVFHMIN-NEXT: slli a1, a1, 1
+; ZVFHMIN-NEXT: add a1, a1, a2
; ZVFHMIN-NEXT: add a1, sp, a1
; ZVFHMIN-NEXT: addi a1, a1, 16
; ZVFHMIN-NEXT: vl8r.v v0, (a1) # Unknown-size Folded Reload
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfmadd-constrained-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vfmadd-constrained-sdnode.ll
index 52e438013fdb..dea411348ce5 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfmadd-constrained-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfmadd-constrained-sdnode.ll
@@ -1,16 +1,402 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+zfh,+zvfh,+v -target-abi=ilp32d \
-; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH
-; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+zfh,+zvfh,+v -target-abi=lp64d \
-; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH
-; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+zfhmin,+zvfhmin,+v -target-abi=ilp32d \
-; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN
-; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+zfhmin,+zvfhmin,+v -target-abi=lp64d \
-; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN
+; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+zfh,+zvfh,+zfbfmin,+zvfbfmin,+v \
+; RUN: -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s \
+; RUN: --check-prefixes=CHECK,ZVFH
+; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+zfh,+zvfh,+zfbfmin,+zvfbfmin,+v \
+; RUN: -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \
+; RUN: --check-prefixes=CHECK,ZVFH
+; RUN: llc -mtriple=riscv32 \
+; RUN: -mattr=+m,+d,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin,+v \
+; RUN: -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s \
+; RUN: --check-prefixes=CHECK,ZVFHMIN
+; RUN: llc -mtriple=riscv64 \
+; RUN: -mattr=+m,+d,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin,+v \
+; RUN: -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \
+; RUN: --check-prefixes=CHECK,ZVFHMIN
; This tests a mix of vfmacc and vfmadd by using different operand orders to
; trigger commuting in TwoAddressInstructionPass.
+define <vscale x 1 x bfloat> @vfmadd_vv_nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %vb, <vscale x 1 x bfloat> %vc) strictfp {
+; CHECK-LABEL: vfmadd_vv_nxv1bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v11, v10
+; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v9
+; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8
+; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
+; CHECK-NEXT: vfmadd.vv v9, v10, v11
+; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9
+; CHECK-NEXT: ret
+ %vd = call <vscale x 1 x bfloat> @llvm.experimental.constrained.fma.nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %vb, <vscale x 1 x bfloat> %vc, metadata !"round.dynamic", metadata !"fpexcept.strict")
+ ret <vscale x 1 x bfloat> %vd
+}
+
+define <vscale x 1 x bfloat> @vfmadd_vf_nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %vb, bfloat %c) strictfp {
+; CHECK-LABEL: vfmadd_vf_nxv1bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: fmv.x.h a0, fa0
+; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vmv.v.x v10, a0
+; CHECK-NEXT: vfwcvtbf16.f.f.v v11, v9
+; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8
+; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v10
+; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
+; CHECK-NEXT: vfmadd.vv v12, v9, v11
+; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 1 x bfloat> poison, bfloat %c, i32 0
+ %splat = shufflevector <vscale x 1 x bfloat> %head, <vscale x 1 x bfloat> poison, <vscale x 1 x i32> zeroinitializer
+ %vd = call <vscale x 1 x bfloat> @llvm.experimental.constrained.fma.nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %splat, <vscale x 1 x bfloat> %vb, metadata !"round.dynamic", metadata !"fpexcept.strict")
+ ret <vscale x 1 x bfloat> %vd
+}
+
+
+define <vscale x 2 x bfloat> @vfmadd_vv_nxv2bf16(<vscale x 2 x bfloat> %va, <vscale x 2 x bfloat> %vb, <vscale x 2 x bfloat> %vc) strictfp {
+; CHECK-LABEL: vfmadd_vv_nxv2bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v11, v9
+; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v10
+; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8
+; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; CHECK-NEXT: vfmadd.vv v10, v9, v11
+; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10
+; CHECK-NEXT: ret
+ %vd = call <vscale x 2 x bfloat> @llvm.experimental.constrained.fma.nxv2bf16(<vscale x 2 x bfloat> %va, <vscale x 2 x bfloat> %vc, <vscale x 2 x bfloat> %vb, metadata !"round.dynamic", metadata !"fpexcept.strict")
+ ret <vscale x 2 x bfloat> %vd
+}
+
+define <vscale x 2 x bfloat> @vfmadd_vf_nxv2bf16(<vscale x 2 x bfloat> %va, <vscale x 2 x bfloat> %vb, bfloat %c) strictfp {
+; CHECK-LABEL: vfmadd_vf_nxv2bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: fmv.x.h a0, fa0
+; CHECK-NEXT: vsetvli a1, zero, e16, mf2, ta, ma
+; CHECK-NEXT: vmv.v.x v10, a0
+; CHECK-NEXT: vfwcvtbf16.f.f.v v11, v8
+; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v9
+; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v10
+; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; CHECK-NEXT: vfmadd.vv v9, v8, v11
+; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 2 x bfloat> poison, bfloat %c, i32 0
+ %splat = shufflevector <vscale x 2 x bfloat> %head, <vscale x 2 x bfloat> poison, <vscale x 2 x i32> zeroinitializer
+ %vd = call <vscale x 2 x bfloat> @llvm.experimental.constrained.fma.nxv2bf16(<vscale x 2 x bfloat> %vb, <vscale x 2 x bfloat> %splat, <vscale x 2 x bfloat> %va, metadata !"round.dynamic", metadata !"fpexcept.strict")
+ ret <vscale x 2 x bfloat> %vd
+}
+
+
+define <vscale x 4 x bfloat> @vfmadd_vv_nxv4bf16(<vscale x 4 x bfloat> %va, <vscale x 4 x bfloat> %vb, <vscale x 4 x bfloat> %vc) strictfp {
+; CHECK-LABEL: vfmadd_vv_nxv4bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v10
+; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8
+; CHECK-NEXT: vfwcvtbf16.f.f.v v14, v9
+; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; CHECK-NEXT: vfmadd.vv v14, v10, v12
+; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v14
+; CHECK-NEXT: ret
+ %vd = call <vscale x 4 x bfloat> @llvm.experimental.constrained.fma.nxv4bf16(<vscale x 4 x bfloat> %vb, <vscale x 4 x bfloat> %va, <vscale x 4 x bfloat> %vc, metadata !"round.dynamic", metadata !"fpexcept.strict")
+ ret <vscale x 4 x bfloat> %vd
+}
+
+define <vscale x 4 x bfloat> @vfmadd_vf_nxv4bf16(<vscale x 4 x bfloat> %va, <vscale x 4 x bfloat> %vb, bfloat %c) strictfp {
+; CHECK-LABEL: vfmadd_vf_nxv4bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: fmv.x.h a0, fa0
+; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma
+; CHECK-NEXT: vmv.v.x v10, a0
+; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v9
+; CHECK-NEXT: vfwcvtbf16.f.f.v v14, v8
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v10
+; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; CHECK-NEXT: vfmadd.vv v16, v14, v12
+; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 4 x bfloat> poison, bfloat %c, i32 0
+ %splat = shufflevector <vscale x 4 x bfloat> %head, <vscale x 4 x bfloat> poison, <vscale x 4 x i32> zeroinitializer
+ %vd = call <vscale x 4 x bfloat> @llvm.experimental.constrained.fma.nxv4bf16(<vscale x 4 x bfloat> %va, <vscale x 4 x bfloat> %splat, <vscale x 4 x bfloat> %vb, metadata !"round.dynamic", metadata !"fpexcept.strict")
+ ret <vscale x 4 x bfloat> %vd
+}
+
+
+define <vscale x 8 x bfloat> @vfmadd_vv_nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %vb, <vscale x 8 x bfloat> %vc) strictfp {
+; CHECK-LABEL: vfmadd_vv_nxv8bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8
+; CHECK-NEXT: vfwcvtbf16.f.f.v v20, v12
+; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v10
+; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; CHECK-NEXT: vfmadd.vv v12, v20, v16
+; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12
+; CHECK-NEXT: ret
+ %vd = call <vscale x 8 x bfloat> @llvm.experimental.constrained.fma.nxv8bf16(<vscale x 8 x bfloat> %vb, <vscale x 8 x bfloat> %vc, <vscale x 8 x bfloat> %va, metadata !"round.dynamic", metadata !"fpexcept.strict")
+ ret <vscale x 8 x bfloat> %vd
+}
+
+define <vscale x 8 x bfloat> @vfmadd_vf_nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %vb, bfloat %c) strictfp {
+; CHECK-LABEL: vfmadd_vf_nxv8bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: fmv.x.h a0, fa0
+; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma
+; CHECK-NEXT: vmv.v.x v12, a0
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8
+; CHECK-NEXT: vfwcvtbf16.f.f.v v20, v10
+; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v12
+; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; CHECK-NEXT: vfmadd.vv v24, v20, v16
+; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v24
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 8 x bfloat> poison, bfloat %c, i32 0
+ %splat = shufflevector <vscale x 8 x bfloat> %head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer
+ %vd = call <vscale x 8 x bfloat> @llvm.experimental.constrained.fma.nxv8bf16(<vscale x 8 x bfloat> %vb, <vscale x 8 x bfloat> %splat, <vscale x 8 x bfloat> %va, metadata !"round.dynamic", metadata !"fpexcept.strict")
+ ret <vscale x 8 x bfloat> %vd
+}
+
+
+define <vscale x 16 x bfloat> @vfmadd_vv_nxv16bf16(<vscale x 16 x bfloat> %va, <vscale x 16 x bfloat> %vb, <vscale x 16 x bfloat> %vc) strictfp {
+; CHECK-LABEL: vfmadd_vv_nxv16bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi sp, sp, -16
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: slli a0, a0, 2
+; CHECK-NEXT: sub sp, sp, a0
+; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb
+; CHECK-NEXT: addi a0, sp, 16
+; CHECK-NEXT: vs4r.v v16, (a0) # Unknown-size Folded Spill
+; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v12
+; CHECK-NEXT: vfwcvtbf16.f.f.v v0, v8
+; CHECK-NEXT: addi a0, sp, 16
+; CHECK-NEXT: vl4r.v v8, (a0) # Unknown-size Folded Reload
+; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v8
+; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; CHECK-NEXT: vfmadd.vv v24, v0, v16
+; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v24
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: slli a0, a0, 2
+; CHECK-NEXT: add sp, sp, a0
+; CHECK-NEXT: addi sp, sp, 16
+; CHECK-NEXT: ret
+ %vd = call <vscale x 16 x bfloat> @llvm.experimental.constrained.fma.nxv16bf16(<vscale x 16 x bfloat> %vc, <vscale x 16 x bfloat> %va, <vscale x 16 x bfloat> %vb, metadata !"round.dynamic", metadata !"fpexcept.strict")
+ ret <vscale x 16 x bfloat> %vd
+}
+
+define <vscale x 16 x bfloat> @vfmadd_vf_nxv16bf16(<vscale x 16 x bfloat> %va, <vscale x 16 x bfloat> %vb, bfloat %c) strictfp {
+; CHECK-LABEL: vfmadd_vf_nxv16bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi sp, sp, -16
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: slli a0, a0, 2
+; CHECK-NEXT: sub sp, sp, a0
+; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb
+; CHECK-NEXT: fmv.x.h a0, fa0
+; CHECK-NEXT: vsetvli a1, zero, e16, m4, ta, ma
+; CHECK-NEXT: vmv.v.x v16, a0
+; CHECK-NEXT: addi a0, sp, 16
+; CHECK-NEXT: vs4r.v v16, (a0) # Unknown-size Folded Spill
+; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v12
+; CHECK-NEXT: vfwcvtbf16.f.f.v v0, v8
+; CHECK-NEXT: vl4r.v v8, (a0) # Unknown-size Folded Reload
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8
+; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; CHECK-NEXT: vfmadd.vv v16, v0, v24
+; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: slli a0, a0, 2
+; CHECK-NEXT: add sp, sp, a0
+; CHECK-NEXT: addi sp, sp, 16
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 16 x bfloat> poison, bfloat %c, i32 0
+ %splat = shufflevector <vscale x 16 x bfloat> %head, <vscale x 16 x bfloat> poison, <vscale x 16 x i32> zeroinitializer
+ %vd = call <vscale x 16 x bfloat> @llvm.experimental.constrained.fma.nxv16bf16(<vscale x 16 x bfloat> %va, <vscale x 16 x bfloat> %splat, <vscale x 16 x bfloat> %vb, metadata !"round.dynamic", metadata !"fpexcept.strict")
+ ret <vscale x 16 x bfloat> %vd
+}
+
+
+define <vscale x 32 x bfloat> @vfmadd_vv_nxv32bf16(<vscale x 32 x bfloat> %va, <vscale x 32 x bfloat> %vb, <vscale x 32 x bfloat> %vc) strictfp {
+; CHECK-LABEL: vfmadd_vv_nxv32bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi sp, sp, -16
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: csrr a1, vlenb
+; CHECK-NEXT: slli a1, a1, 5
+; CHECK-NEXT: sub sp, sp, a1
+; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * vlenb
+; CHECK-NEXT: vl8re16.v v0, (a0)
+; CHECK-NEXT: vmv8r.v v24, v16
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: slli a0, a0, 4
+; CHECK-NEXT: add a0, sp, a0
+; CHECK-NEXT: addi a0, a0, 16
+; CHECK-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill
+; CHECK-NEXT: vmv8r.v v16, v8
+; CHECK-NEXT: addi a0, sp, 16
+; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
+; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v16
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: li a1, 24
+; CHECK-NEXT: mul a0, a0, a1
+; CHECK-NEXT: add a0, sp, a0
+; CHECK-NEXT: addi a0, a0, 16
+; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
+; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v24
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: slli a0, a0, 3
+; CHECK-NEXT: add a0, sp, a0
+; CHECK-NEXT: addi a0, a0, 16
+; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
+; CHECK-NEXT: vmv8r.v v8, v0
+; CHECK-NEXT: vfwcvtbf16.f.f.v v0, v8
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: li a1, 24
+; CHECK-NEXT: mul a0, a0, a1
+; CHECK-NEXT: add a0, sp, a0
+; CHECK-NEXT: addi a0, a0, 16
+; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: slli a0, a0, 3
+; CHECK-NEXT: add a0, sp, a0
+; CHECK-NEXT: addi a0, a0, 16
+; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
+; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; CHECK-NEXT: vfmadd.vv v0, v16, v24
+; CHECK-NEXT: addi a0, sp, 16
+; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
+; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v20
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: slli a0, a0, 3
+; CHECK-NEXT: add a0, sp, a0
+; CHECK-NEXT: addi a0, a0, 16
+; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: slli a0, a0, 4
+; CHECK-NEXT: add a0, sp, a0
+; CHECK-NEXT: addi a0, a0, 16
+; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
+; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v20
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: li a1, 24
+; CHECK-NEXT: mul a0, a0, a1
+; CHECK-NEXT: add a0, sp, a0
+; CHECK-NEXT: addi a0, a0, 16
+; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v12
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: li a1, 24
+; CHECK-NEXT: mul a0, a0, a1
+; CHECK-NEXT: add a0, sp, a0
+; CHECK-NEXT: addi a0, a0, 16
+; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: slli a0, a0, 3
+; CHECK-NEXT: add a0, sp, a0
+; CHECK-NEXT: addi a0, a0, 16
+; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload
+; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; CHECK-NEXT: vfmadd.vv v16, v8, v24
+; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v0
+; CHECK-NEXT: vfncvtbf16.f.f.w v12, v16
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: slli a0, a0, 5
+; CHECK-NEXT: add sp, sp, a0
+; CHECK-NEXT: addi sp, sp, 16
+; CHECK-NEXT: ret
+ %vd = call <vscale x 32 x bfloat> @llvm.experimental.constrained.fma.nxv32bf16(<vscale x 32 x bfloat> %vc, <vscale x 32 x bfloat> %vb, <vscale x 32 x bfloat> %va, metadata !"round.dynamic", metadata !"fpexcept.strict")
+ ret <vscale x 32 x bfloat> %vd
+}
+
+define <vscale x 32 x bfloat> @vfmadd_vf_nxv32bf16(<vscale x 32 x bfloat> %va, <vscale x 32 x bfloat> %vb, bfloat %c) strictfp {
+; CHECK-LABEL: vfmadd_vf_nxv32bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi sp, sp, -16
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: li a1, 24
+; CHECK-NEXT: mul a0, a0, a1
+; CHECK-NEXT: sub sp, sp, a0
+; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb
+; CHECK-NEXT: vmv8r.v v24, v16
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: slli a0, a0, 3
+; CHECK-NEXT: add a0, sp, a0
+; CHECK-NEXT: addi a0, a0, 16
+; CHECK-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill
+; CHECK-NEXT: fmv.x.h a0, fa0
+; CHECK-NEXT: vsetvli a1, zero, e16, m8, ta, ma
+; CHECK-NEXT: vmv.v.x v16, a0
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: slli a0, a0, 4
+; CHECK-NEXT: add a0, sp, a0
+; CHECK-NEXT: addi a0, a0, 16
+; CHECK-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill
+; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8
+; CHECK-NEXT: addi a0, sp, 16
+; CHECK-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v24
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: slli a0, a0, 4
+; CHECK-NEXT: add a0, sp, a0
+; CHECK-NEXT: addi a0, a0, 16
+; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload
+; CHECK-NEXT: vfwcvtbf16.f.f.v v0, v24
+; CHECK-NEXT: addi a0, sp, 16
+; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload
+; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; CHECK-NEXT: vfmadd.vv v0, v16, v24
+; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v12
+; CHECK-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: slli a0, a0, 4
+; CHECK-NEXT: add a0, sp, a0
+; CHECK-NEXT: addi a0, a0, 16
+; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
+; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v20
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: slli a0, a0, 3
+; CHECK-NEXT: add a0, sp, a0
+; CHECK-NEXT: addi a0, a0, 16
+; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v28
+; CHECK-NEXT: addi a0, sp, 16
+; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload
+; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; CHECK-NEXT: vfmadd.vv v16, v8, v24
+; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v0
+; CHECK-NEXT: vfncvtbf16.f.f.w v12, v16
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: li a1, 24
+; CHECK-NEXT: mul a0, a0, a1
+; CHECK-NEXT: add sp, sp, a0
+; CHECK-NEXT: addi sp, sp, 16
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 32 x bfloat> poison, bfloat %c, i32 0
+ %splat = shufflevector <vscale x 32 x bfloat> %head, <vscale x 32 x bfloat> poison, <vscale x 32 x i32> zeroinitializer
+ %vd = call <vscale x 32 x bfloat> @llvm.experimental.constrained.fma.nxv32bf16(<vscale x 32 x bfloat> %vb, <vscale x 32 x bfloat> %splat, <vscale x 32 x bfloat> %va, metadata !"round.dynamic", metadata !"fpexcept.strict")
+ ret <vscale x 32 x bfloat> %vd
+}
+
declare <vscale x 1 x half> @llvm.experimental.constrained.fma.nxv1f16(<vscale x 1 x half>, <vscale x 1 x half>, <vscale x 1 x half>, metadata, metadata)
define <vscale x 1 x half> @vfmadd_vv_nxv1f16(<vscale x 1 x half> %va, <vscale x 1 x half> %vb, <vscale x 1 x half> %vc) strictfp {
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfmadd-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vfmadd-sdnode.ll
index a80a943c2e1d..2df2212c43db 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfmadd-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfmadd-sdnode.ll
@@ -1,16 +1,573 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v -target-abi=ilp32d \
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+zfbfmin,+zvfbfmin,+v -target-abi=ilp32d \
; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH
-; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -target-abi=lp64d \
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+zfbfmin,+zvfbfmin,+v -target-abi=lp64d \
; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH
-; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+zfhmin,+zvfhmin,+v -target-abi=ilp32d \
+; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin,+v -target-abi=ilp32d \
; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN
-; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+zfhmin,+zvfhmin,+v -target-abi=lp64d \
+; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin,+v -target-abi=lp64d \
; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN
; This tests a mix of vfmacc and vfmadd by using different operand orders to
; trigger commuting in TwoAddressInstructionPass.
+define <vscale x 1 x bfloat> @vfmadd_vv_nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %vb, <vscale x 1 x bfloat> %vc) {
+; CHECK-LABEL: vfmadd_vv_nxv1bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v11, v10
+; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8
+; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v9
+; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
+; CHECK-NEXT: vfmadd.vv v12, v10, v11
+; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12
+; CHECK-NEXT: ret
+ %vd = call <vscale x 1 x bfloat> @llvm.fma.v1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %vb, <vscale x 1 x bfloat> %vc)
+ ret <vscale x 1 x bfloat> %vd
+}
+
+define <vscale x 1 x bfloat> @vfmadd_vv_nxv1bf16_commuted(<vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %vb, <vscale x 1 x bfloat> %vc) {
+; CHECK-LABEL: vfmadd_vv_nxv1bf16_commuted:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v11, v8
+; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v9
+; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v10
+; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
+; CHECK-NEXT: vfmadd.vv v9, v8, v11
+; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9
+; CHECK-NEXT: ret
+ %vd = call <vscale x 1 x bfloat> @llvm.fma.v1bf16(<vscale x 1 x bfloat> %vb, <vscale x 1 x bfloat> %vc, <vscale x 1 x bfloat> %va)
+ ret <vscale x 1 x bfloat> %vd
+}
+
+define <vscale x 1 x bfloat> @vfmadd_vf_nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %vb, bfloat %c) {
+; CHECK-LABEL: vfmadd_vf_nxv1bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: fmv.x.h a0, fa0
+; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vmv.v.x v10, a0
+; CHECK-NEXT: vfwcvtbf16.f.f.v v11, v9
+; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8
+; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v10
+; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
+; CHECK-NEXT: vfmadd.vv v12, v9, v11
+; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 1 x bfloat> poison, bfloat %c, i32 0
+ %splat = shufflevector <vscale x 1 x bfloat> %head, <vscale x 1 x bfloat> poison, <vscale x 1 x i32> zeroinitializer
+ %vd = call <vscale x 1 x bfloat> @llvm.fma.v1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %splat, <vscale x 1 x bfloat> %vb)
+ ret <vscale x 1 x bfloat> %vd
+}
+
+declare <vscale x 2 x bfloat> @llvm.fma.v2bf16(<vscale x 2 x bfloat>, <vscale x 2 x bfloat>, <vscale x 2 x bfloat>)
+
+define <vscale x 2 x bfloat> @vfmadd_vv_nxv2bf16(<vscale x 2 x bfloat> %va, <vscale x 2 x bfloat> %vb, <vscale x 2 x bfloat> %vc) {
+; CHECK-LABEL: vfmadd_vv_nxv2bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v11, v9
+; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8
+; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v10
+; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; CHECK-NEXT: vfmadd.vv v12, v9, v11
+; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12
+; CHECK-NEXT: ret
+ %vd = call <vscale x 2 x bfloat> @llvm.fma.v2bf16(<vscale x 2 x bfloat> %va, <vscale x 2 x bfloat> %vc, <vscale x 2 x bfloat> %vb)
+ ret <vscale x 2 x bfloat> %vd
+}
+
+define <vscale x 2 x bfloat> @vfmadd_vf_nxv2bf16(<vscale x 2 x bfloat> %va, <vscale x 2 x bfloat> %vb, bfloat %c) {
+; CHECK-LABEL: vfmadd_vf_nxv2bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: fmv.x.h a0, fa0
+; CHECK-NEXT: vsetvli a1, zero, e16, mf2, ta, ma
+; CHECK-NEXT: vmv.v.x v10, a0
+; CHECK-NEXT: vfwcvtbf16.f.f.v v11, v8
+; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v9
+; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v10
+; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; CHECK-NEXT: vfmadd.vv v9, v8, v11
+; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 2 x bfloat> poison, bfloat %c, i32 0
+ %splat = shufflevector <vscale x 2 x bfloat> %head, <vscale x 2 x bfloat> poison, <vscale x 2 x i32> zeroinitializer
+ %vd = call <vscale x 2 x bfloat> @llvm.fma.v2bf16(<vscale x 2 x bfloat> %vb, <vscale x 2 x bfloat> %splat, <vscale x 2 x bfloat> %va)
+ ret <vscale x 2 x bfloat> %vd
+}
+
+declare <vscale x 4 x bfloat> @llvm.fma.v4bf16(<vscale x 4 x bfloat>, <vscale x 4 x bfloat>, <vscale x 4 x bfloat>)
+
+define <vscale x 4 x bfloat> @vfmadd_vv_nxv4bf16(<vscale x 4 x bfloat> %va, <vscale x 4 x bfloat> %vb, <vscale x 4 x bfloat> %vc) {
+; CHECK-LABEL: vfmadd_vv_nxv4bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v10
+; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v9
+; CHECK-NEXT: vfwcvtbf16.f.f.v v14, v8
+; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; CHECK-NEXT: vfmadd.vv v14, v10, v12
+; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v14
+; CHECK-NEXT: ret
+ %vd = call <vscale x 4 x bfloat> @llvm.fma.v4bf16(<vscale x 4 x bfloat> %vb, <vscale x 4 x bfloat> %va, <vscale x 4 x bfloat> %vc)
+ ret <vscale x 4 x bfloat> %vd
+}
+
+define <vscale x 4 x bfloat> @vfmadd_vf_nxv4bf16(<vscale x 4 x bfloat> %va, <vscale x 4 x bfloat> %vb, bfloat %c) {
+; CHECK-LABEL: vfmadd_vf_nxv4bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: fmv.x.h a0, fa0
+; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma
+; CHECK-NEXT: vmv.v.x v10, a0
+; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v9
+; CHECK-NEXT: vfwcvtbf16.f.f.v v14, v8
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v10
+; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; CHECK-NEXT: vfmadd.vv v16, v14, v12
+; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 4 x bfloat> poison, bfloat %c, i32 0
+ %splat = shufflevector <vscale x 4 x bfloat> %head, <vscale x 4 x bfloat> poison, <vscale x 4 x i32> zeroinitializer
+ %vd = call <vscale x 4 x bfloat> @llvm.fma.v4bf16(<vscale x 4 x bfloat> %va, <vscale x 4 x bfloat> %splat, <vscale x 4 x bfloat> %vb)
+ ret <vscale x 4 x bfloat> %vd
+}
+
+declare <vscale x 8 x bfloat> @llvm.fma.v8bf16(<vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>)
+
+define <vscale x 8 x bfloat> @vfmadd_vv_nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %vb, <vscale x 8 x bfloat> %vc) {
+; CHECK-LABEL: vfmadd_vv_nxv8bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8
+; CHECK-NEXT: vfwcvtbf16.f.f.v v20, v10
+; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v12
+; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; CHECK-NEXT: vfmadd.vv v24, v20, v16
+; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v24
+; CHECK-NEXT: ret
+ %vd = call <vscale x 8 x bfloat> @llvm.fma.v8bf16(<vscale x 8 x bfloat> %vb, <vscale x 8 x bfloat> %vc, <vscale x 8 x bfloat> %va)
+ ret <vscale x 8 x bfloat> %vd
+}
+
+define <vscale x 8 x bfloat> @vfmadd_vf_nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %vb, bfloat %c) {
+; CHECK-LABEL: vfmadd_vf_nxv8bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: fmv.x.h a0, fa0
+; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma
+; CHECK-NEXT: vmv.v.x v12, a0
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8
+; CHECK-NEXT: vfwcvtbf16.f.f.v v20, v10
+; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v12
+; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; CHECK-NEXT: vfmadd.vv v24, v20, v16
+; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v24
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 8 x bfloat> poison, bfloat %c, i32 0
+ %splat = shufflevector <vscale x 8 x bfloat> %head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer
+ %vd = call <vscale x 8 x bfloat> @llvm.fma.v8bf16(<vscale x 8 x bfloat> %vb, <vscale x 8 x bfloat> %splat, <vscale x 8 x bfloat> %va)
+ ret <vscale x 8 x bfloat> %vd
+}
+
+declare <vscale x 16 x bfloat> @llvm.fma.v16bf16(<vscale x 16 x bfloat>, <vscale x 16 x bfloat>, <vscale x 16 x bfloat>)
+
+define <vscale x 16 x bfloat> @vfmadd_vv_nxv16bf16(<vscale x 16 x bfloat> %va, <vscale x 16 x bfloat> %vb, <vscale x 16 x bfloat> %vc) {
+; CHECK-LABEL: vfmadd_vv_nxv16bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v12
+; CHECK-NEXT: vfwcvtbf16.f.f.v v0, v16
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8
+; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; CHECK-NEXT: vfmadd.vv v16, v0, v24
+; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16
+; CHECK-NEXT: ret
+ %vd = call <vscale x 16 x bfloat> @llvm.fma.v16bf16(<vscale x 16 x bfloat> %vc, <vscale x 16 x bfloat> %va, <vscale x 16 x bfloat> %vb)
+ ret <vscale x 16 x bfloat> %vd
+}
+
+define <vscale x 16 x bfloat> @vfmadd_vf_nxv16bf16(<vscale x 16 x bfloat> %va, <vscale x 16 x bfloat> %vb, bfloat %c) {
+; CHECK-LABEL: vfmadd_vf_nxv16bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi sp, sp, -16
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: slli a0, a0, 2
+; CHECK-NEXT: sub sp, sp, a0
+; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb
+; CHECK-NEXT: fmv.x.h a0, fa0
+; CHECK-NEXT: vsetvli a1, zero, e16, m4, ta, ma
+; CHECK-NEXT: vmv.v.x v16, a0
+; CHECK-NEXT: addi a0, sp, 16
+; CHECK-NEXT: vs4r.v v16, (a0) # Unknown-size Folded Spill
+; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v12
+; CHECK-NEXT: vfwcvtbf16.f.f.v v0, v8
+; CHECK-NEXT: vl4r.v v8, (a0) # Unknown-size Folded Reload
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8
+; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; CHECK-NEXT: vfmadd.vv v16, v0, v24
+; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: slli a0, a0, 2
+; CHECK-NEXT: add sp, sp, a0
+; CHECK-NEXT: addi sp, sp, 16
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 16 x bfloat> poison, bfloat %c, i32 0
+ %splat = shufflevector <vscale x 16 x bfloat> %head, <vscale x 16 x bfloat> poison, <vscale x 16 x i32> zeroinitializer
+ %vd = call <vscale x 16 x bfloat> @llvm.fma.v16bf16(<vscale x 16 x bfloat> %va, <vscale x 16 x bfloat> %splat, <vscale x 16 x bfloat> %vb)
+ ret <vscale x 16 x bfloat> %vd
+}
+
+declare <vscale x 32 x bfloat> @llvm.fma.v32bf16(<vscale x 32 x bfloat>, <vscale x 32 x bfloat>, <vscale x 32 x bfloat>)
+
+define <vscale x 32 x bfloat> @vfmadd_vv_nxv32bf16(<vscale x 32 x bfloat> %va, <vscale x 32 x bfloat> %vb, <vscale x 32 x bfloat> %vc) {
+; ZVFH-LABEL: vfmadd_vv_nxv32bf16:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: addi sp, sp, -16
+; ZVFH-NEXT: .cfi_def_cfa_offset 16
+; ZVFH-NEXT: csrr a1, vlenb
+; ZVFH-NEXT: slli a1, a1, 5
+; ZVFH-NEXT: sub sp, sp, a1
+; ZVFH-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * vlenb
+; ZVFH-NEXT: vl8re16.v v0, (a0)
+; ZVFH-NEXT: vmv8r.v v24, v16
+; ZVFH-NEXT: csrr a0, vlenb
+; ZVFH-NEXT: slli a0, a0, 4
+; ZVFH-NEXT: add a0, sp, a0
+; ZVFH-NEXT: addi a0, a0, 16
+; ZVFH-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill
+; ZVFH-NEXT: vmv8r.v v16, v8
+; ZVFH-NEXT: addi a0, sp, 16
+; ZVFH-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
+; ZVFH-NEXT: vsetvli a0, zero, e16, m4, ta, ma
+; ZVFH-NEXT: vfwcvtbf16.f.f.v v8, v16
+; ZVFH-NEXT: csrr a0, vlenb
+; ZVFH-NEXT: slli a0, a0, 3
+; ZVFH-NEXT: mv a1, a0
+; ZVFH-NEXT: slli a0, a0, 1
+; ZVFH-NEXT: add a0, a0, a1
+; ZVFH-NEXT: add a0, sp, a0
+; ZVFH-NEXT: addi a0, a0, 16
+; ZVFH-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
+; ZVFH-NEXT: vfwcvtbf16.f.f.v v8, v24
+; ZVFH-NEXT: csrr a0, vlenb
+; ZVFH-NEXT: slli a0, a0, 3
+; ZVFH-NEXT: add a0, sp, a0
+; ZVFH-NEXT: addi a0, a0, 16
+; ZVFH-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
+; ZVFH-NEXT: vmv8r.v v8, v0
+; ZVFH-NEXT: vfwcvtbf16.f.f.v v0, v8
+; ZVFH-NEXT: csrr a0, vlenb
+; ZVFH-NEXT: slli a0, a0, 3
+; ZVFH-NEXT: mv a1, a0
+; ZVFH-NEXT: slli a0, a0, 1
+; ZVFH-NEXT: add a0, a0, a1
+; ZVFH-NEXT: add a0, sp, a0
+; ZVFH-NEXT: addi a0, a0, 16
+; ZVFH-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload
+; ZVFH-NEXT: csrr a0, vlenb
+; ZVFH-NEXT: slli a0, a0, 3
+; ZVFH-NEXT: add a0, sp, a0
+; ZVFH-NEXT: addi a0, a0, 16
+; ZVFH-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
+; ZVFH-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; ZVFH-NEXT: vfmadd.vv v0, v16, v24
+; ZVFH-NEXT: addi a0, sp, 16
+; ZVFH-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
+; ZVFH-NEXT: vsetvli zero, zero, e16, m4, ta, ma
+; ZVFH-NEXT: vfwcvtbf16.f.f.v v24, v20
+; ZVFH-NEXT: csrr a0, vlenb
+; ZVFH-NEXT: slli a0, a0, 3
+; ZVFH-NEXT: add a0, sp, a0
+; ZVFH-NEXT: addi a0, a0, 16
+; ZVFH-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill
+; ZVFH-NEXT: csrr a0, vlenb
+; ZVFH-NEXT: slli a0, a0, 4
+; ZVFH-NEXT: add a0, sp, a0
+; ZVFH-NEXT: addi a0, a0, 16
+; ZVFH-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
+; ZVFH-NEXT: vfwcvtbf16.f.f.v v24, v20
+; ZVFH-NEXT: csrr a0, vlenb
+; ZVFH-NEXT: slli a0, a0, 3
+; ZVFH-NEXT: mv a1, a0
+; ZVFH-NEXT: slli a0, a0, 1
+; ZVFH-NEXT: add a0, a0, a1
+; ZVFH-NEXT: add a0, sp, a0
+; ZVFH-NEXT: addi a0, a0, 16
+; ZVFH-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill
+; ZVFH-NEXT: vfwcvtbf16.f.f.v v16, v12
+; ZVFH-NEXT: csrr a0, vlenb
+; ZVFH-NEXT: slli a0, a0, 3
+; ZVFH-NEXT: mv a1, a0
+; ZVFH-NEXT: slli a0, a0, 1
+; ZVFH-NEXT: add a0, a0, a1
+; ZVFH-NEXT: add a0, sp, a0
+; ZVFH-NEXT: addi a0, a0, 16
+; ZVFH-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
+; ZVFH-NEXT: csrr a0, vlenb
+; ZVFH-NEXT: slli a0, a0, 3
+; ZVFH-NEXT: add a0, sp, a0
+; ZVFH-NEXT: addi a0, a0, 16
+; ZVFH-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload
+; ZVFH-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; ZVFH-NEXT: vfmadd.vv v16, v8, v24
+; ZVFH-NEXT: vsetvli zero, zero, e16, m4, ta, ma
+; ZVFH-NEXT: vfncvtbf16.f.f.w v8, v0
+; ZVFH-NEXT: vfncvtbf16.f.f.w v12, v16
+; ZVFH-NEXT: csrr a0, vlenb
+; ZVFH-NEXT: slli a0, a0, 5
+; ZVFH-NEXT: add sp, sp, a0
+; ZVFH-NEXT: addi sp, sp, 16
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: vfmadd_vv_nxv32bf16:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: addi sp, sp, -16
+; ZVFHMIN-NEXT: .cfi_def_cfa_offset 16
+; ZVFHMIN-NEXT: csrr a1, vlenb
+; ZVFHMIN-NEXT: slli a1, a1, 5
+; ZVFHMIN-NEXT: sub sp, sp, a1
+; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * vlenb
+; ZVFHMIN-NEXT: vl8re16.v v0, (a0)
+; ZVFHMIN-NEXT: vmv8r.v v24, v16
+; ZVFHMIN-NEXT: csrr a0, vlenb
+; ZVFHMIN-NEXT: slli a0, a0, 4
+; ZVFHMIN-NEXT: add a0, sp, a0
+; ZVFHMIN-NEXT: addi a0, a0, 16
+; ZVFHMIN-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill
+; ZVFHMIN-NEXT: vmv8r.v v16, v8
+; ZVFHMIN-NEXT: addi a0, sp, 16
+; ZVFHMIN-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
+; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma
+; ZVFHMIN-NEXT: vfwcvtbf16.f.f.v v8, v16
+; ZVFHMIN-NEXT: csrr a0, vlenb
+; ZVFHMIN-NEXT: li a1, 24
+; ZVFHMIN-NEXT: mul a0, a0, a1
+; ZVFHMIN-NEXT: add a0, sp, a0
+; ZVFHMIN-NEXT: addi a0, a0, 16
+; ZVFHMIN-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
+; ZVFHMIN-NEXT: vfwcvtbf16.f.f.v v8, v24
+; ZVFHMIN-NEXT: csrr a0, vlenb
+; ZVFHMIN-NEXT: slli a0, a0, 3
+; ZVFHMIN-NEXT: add a0, sp, a0
+; ZVFHMIN-NEXT: addi a0, a0, 16
+; ZVFHMIN-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
+; ZVFHMIN-NEXT: vmv8r.v v8, v0
+; ZVFHMIN-NEXT: vfwcvtbf16.f.f.v v0, v8
+; ZVFHMIN-NEXT: csrr a0, vlenb
+; ZVFHMIN-NEXT: li a1, 24
+; ZVFHMIN-NEXT: mul a0, a0, a1
+; ZVFHMIN-NEXT: add a0, sp, a0
+; ZVFHMIN-NEXT: addi a0, a0, 16
+; ZVFHMIN-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload
+; ZVFHMIN-NEXT: csrr a0, vlenb
+; ZVFHMIN-NEXT: slli a0, a0, 3
+; ZVFHMIN-NEXT: add a0, sp, a0
+; ZVFHMIN-NEXT: addi a0, a0, 16
+; ZVFHMIN-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; ZVFHMIN-NEXT: vfmadd.vv v0, v16, v24
+; ZVFHMIN-NEXT: addi a0, sp, 16
+; ZVFHMIN-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
+; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
+; ZVFHMIN-NEXT: vfwcvtbf16.f.f.v v24, v20
+; ZVFHMIN-NEXT: csrr a0, vlenb
+; ZVFHMIN-NEXT: slli a0, a0, 3
+; ZVFHMIN-NEXT: add a0, sp, a0
+; ZVFHMIN-NEXT: addi a0, a0, 16
+; ZVFHMIN-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill
+; ZVFHMIN-NEXT: csrr a0, vlenb
+; ZVFHMIN-NEXT: slli a0, a0, 4
+; ZVFHMIN-NEXT: add a0, sp, a0
+; ZVFHMIN-NEXT: addi a0, a0, 16
+; ZVFHMIN-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
+; ZVFHMIN-NEXT: vfwcvtbf16.f.f.v v24, v20
+; ZVFHMIN-NEXT: csrr a0, vlenb
+; ZVFHMIN-NEXT: li a1, 24
+; ZVFHMIN-NEXT: mul a0, a0, a1
+; ZVFHMIN-NEXT: add a0, sp, a0
+; ZVFHMIN-NEXT: addi a0, a0, 16
+; ZVFHMIN-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill
+; ZVFHMIN-NEXT: vfwcvtbf16.f.f.v v16, v12
+; ZVFHMIN-NEXT: csrr a0, vlenb
+; ZVFHMIN-NEXT: li a1, 24
+; ZVFHMIN-NEXT: mul a0, a0, a1
+; ZVFHMIN-NEXT: add a0, sp, a0
+; ZVFHMIN-NEXT: addi a0, a0, 16
+; ZVFHMIN-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
+; ZVFHMIN-NEXT: csrr a0, vlenb
+; ZVFHMIN-NEXT: slli a0, a0, 3
+; ZVFHMIN-NEXT: add a0, sp, a0
+; ZVFHMIN-NEXT: addi a0, a0, 16
+; ZVFHMIN-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; ZVFHMIN-NEXT: vfmadd.vv v16, v8, v24
+; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
+; ZVFHMIN-NEXT: vfncvtbf16.f.f.w v8, v0
+; ZVFHMIN-NEXT: vfncvtbf16.f.f.w v12, v16
+; ZVFHMIN-NEXT: csrr a0, vlenb
+; ZVFHMIN-NEXT: slli a0, a0, 5
+; ZVFHMIN-NEXT: add sp, sp, a0
+; ZVFHMIN-NEXT: addi sp, sp, 16
+; ZVFHMIN-NEXT: ret
+ %vd = call <vscale x 32 x bfloat> @llvm.fma.v32bf16(<vscale x 32 x bfloat> %vc, <vscale x 32 x bfloat> %vb, <vscale x 32 x bfloat> %va)
+ ret <vscale x 32 x bfloat> %vd
+}
+
+define <vscale x 32 x bfloat> @vfmadd_vf_nxv32bf16(<vscale x 32 x bfloat> %va, <vscale x 32 x bfloat> %vb, bfloat %c) {
+; ZVFH-LABEL: vfmadd_vf_nxv32bf16:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: addi sp, sp, -16
+; ZVFH-NEXT: .cfi_def_cfa_offset 16
+; ZVFH-NEXT: csrr a0, vlenb
+; ZVFH-NEXT: slli a0, a0, 3
+; ZVFH-NEXT: mv a1, a0
+; ZVFH-NEXT: slli a0, a0, 1
+; ZVFH-NEXT: add a0, a0, a1
+; ZVFH-NEXT: sub sp, sp, a0
+; ZVFH-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb
+; ZVFH-NEXT: vmv8r.v v24, v16
+; ZVFH-NEXT: csrr a0, vlenb
+; ZVFH-NEXT: slli a0, a0, 3
+; ZVFH-NEXT: add a0, sp, a0
+; ZVFH-NEXT: addi a0, a0, 16
+; ZVFH-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill
+; ZVFH-NEXT: fmv.x.h a0, fa0
+; ZVFH-NEXT: vsetvli a1, zero, e16, m8, ta, ma
+; ZVFH-NEXT: vmv.v.x v16, a0
+; ZVFH-NEXT: csrr a0, vlenb
+; ZVFH-NEXT: slli a0, a0, 4
+; ZVFH-NEXT: add a0, sp, a0
+; ZVFH-NEXT: addi a0, a0, 16
+; ZVFH-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill
+; ZVFH-NEXT: vsetvli a0, zero, e16, m4, ta, ma
+; ZVFH-NEXT: vfwcvtbf16.f.f.v v16, v8
+; ZVFH-NEXT: addi a0, sp, 16
+; ZVFH-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill
+; ZVFH-NEXT: vfwcvtbf16.f.f.v v16, v24
+; ZVFH-NEXT: csrr a0, vlenb
+; ZVFH-NEXT: slli a0, a0, 4
+; ZVFH-NEXT: add a0, sp, a0
+; ZVFH-NEXT: addi a0, a0, 16
+; ZVFH-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload
+; ZVFH-NEXT: vfwcvtbf16.f.f.v v0, v24
+; ZVFH-NEXT: addi a0, sp, 16
+; ZVFH-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload
+; ZVFH-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; ZVFH-NEXT: vfmadd.vv v0, v16, v24
+; ZVFH-NEXT: vsetvli zero, zero, e16, m4, ta, ma
+; ZVFH-NEXT: vfwcvtbf16.f.f.v v16, v12
+; ZVFH-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill
+; ZVFH-NEXT: csrr a0, vlenb
+; ZVFH-NEXT: slli a0, a0, 3
+; ZVFH-NEXT: add a0, sp, a0
+; ZVFH-NEXT: addi a0, a0, 16
+; ZVFH-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
+; ZVFH-NEXT: vfwcvtbf16.f.f.v v8, v20
+; ZVFH-NEXT: csrr a0, vlenb
+; ZVFH-NEXT: slli a0, a0, 4
+; ZVFH-NEXT: add a0, sp, a0
+; ZVFH-NEXT: addi a0, a0, 16
+; ZVFH-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
+; ZVFH-NEXT: vfwcvtbf16.f.f.v v24, v20
+; ZVFH-NEXT: addi a0, sp, 16
+; ZVFH-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
+; ZVFH-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; ZVFH-NEXT: vfmadd.vv v24, v8, v16
+; ZVFH-NEXT: vsetvli zero, zero, e16, m4, ta, ma
+; ZVFH-NEXT: vfncvtbf16.f.f.w v8, v0
+; ZVFH-NEXT: vfncvtbf16.f.f.w v12, v24
+; ZVFH-NEXT: csrr a0, vlenb
+; ZVFH-NEXT: slli a0, a0, 3
+; ZVFH-NEXT: mv a1, a0
+; ZVFH-NEXT: slli a0, a0, 1
+; ZVFH-NEXT: add a0, a0, a1
+; ZVFH-NEXT: add sp, sp, a0
+; ZVFH-NEXT: addi sp, sp, 16
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: vfmadd_vf_nxv32bf16:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: addi sp, sp, -16
+; ZVFHMIN-NEXT: .cfi_def_cfa_offset 16
+; ZVFHMIN-NEXT: csrr a0, vlenb
+; ZVFHMIN-NEXT: li a1, 24
+; ZVFHMIN-NEXT: mul a0, a0, a1
+; ZVFHMIN-NEXT: sub sp, sp, a0
+; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb
+; ZVFHMIN-NEXT: vmv8r.v v24, v16
+; ZVFHMIN-NEXT: csrr a0, vlenb
+; ZVFHMIN-NEXT: slli a0, a0, 3
+; ZVFHMIN-NEXT: add a0, sp, a0
+; ZVFHMIN-NEXT: addi a0, a0, 16
+; ZVFHMIN-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill
+; ZVFHMIN-NEXT: fmv.x.h a0, fa0
+; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m8, ta, ma
+; ZVFHMIN-NEXT: vmv.v.x v16, a0
+; ZVFHMIN-NEXT: csrr a0, vlenb
+; ZVFHMIN-NEXT: slli a0, a0, 4
+; ZVFHMIN-NEXT: add a0, sp, a0
+; ZVFHMIN-NEXT: addi a0, a0, 16
+; ZVFHMIN-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill
+; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma
+; ZVFHMIN-NEXT: vfwcvtbf16.f.f.v v16, v8
+; ZVFHMIN-NEXT: addi a0, sp, 16
+; ZVFHMIN-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill
+; ZVFHMIN-NEXT: vfwcvtbf16.f.f.v v16, v24
+; ZVFHMIN-NEXT: csrr a0, vlenb
+; ZVFHMIN-NEXT: slli a0, a0, 4
+; ZVFHMIN-NEXT: add a0, sp, a0
+; ZVFHMIN-NEXT: addi a0, a0, 16
+; ZVFHMIN-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload
+; ZVFHMIN-NEXT: vfwcvtbf16.f.f.v v0, v24
+; ZVFHMIN-NEXT: addi a0, sp, 16
+; ZVFHMIN-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; ZVFHMIN-NEXT: vfmadd.vv v0, v16, v24
+; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
+; ZVFHMIN-NEXT: vfwcvtbf16.f.f.v v16, v12
+; ZVFHMIN-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill
+; ZVFHMIN-NEXT: csrr a0, vlenb
+; ZVFHMIN-NEXT: slli a0, a0, 3
+; ZVFHMIN-NEXT: add a0, sp, a0
+; ZVFHMIN-NEXT: addi a0, a0, 16
+; ZVFHMIN-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
+; ZVFHMIN-NEXT: vfwcvtbf16.f.f.v v8, v20
+; ZVFHMIN-NEXT: csrr a0, vlenb
+; ZVFHMIN-NEXT: slli a0, a0, 4
+; ZVFHMIN-NEXT: add a0, sp, a0
+; ZVFHMIN-NEXT: addi a0, a0, 16
+; ZVFHMIN-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
+; ZVFHMIN-NEXT: vfwcvtbf16.f.f.v v24, v20
+; ZVFHMIN-NEXT: addi a0, sp, 16
+; ZVFHMIN-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; ZVFHMIN-NEXT: vfmadd.vv v24, v8, v16
+; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
+; ZVFHMIN-NEXT: vfncvtbf16.f.f.w v8, v0
+; ZVFHMIN-NEXT: vfncvtbf16.f.f.w v12, v24
+; ZVFHMIN-NEXT: csrr a0, vlenb
+; ZVFHMIN-NEXT: li a1, 24
+; ZVFHMIN-NEXT: mul a0, a0, a1
+; ZVFHMIN-NEXT: add sp, sp, a0
+; ZVFHMIN-NEXT: addi sp, sp, 16
+; ZVFHMIN-NEXT: ret
+ %head = insertelement <vscale x 32 x bfloat> poison, bfloat %c, i32 0
+ %splat = shufflevector <vscale x 32 x bfloat> %head, <vscale x 32 x bfloat> poison, <vscale x 32 x i32> zeroinitializer
+ %vd = call <vscale x 32 x bfloat> @llvm.fma.v32bf16(<vscale x 32 x bfloat> %vb, <vscale x 32 x bfloat> %splat, <vscale x 32 x bfloat> %va)
+ ret <vscale x 32 x bfloat> %vd
+}
+
declare <vscale x 1 x half> @llvm.fma.v1f16(<vscale x 1 x half>, <vscale x 1 x half>, <vscale x 1 x half>)
define <vscale x 1 x half> @vfmadd_vv_nxv1f16(<vscale x 1 x half> %va, <vscale x 1 x half> %vb, <vscale x 1 x half> %vc) {
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfmax-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vfmax-sdnode.ll
index caf37b7a0a12..b5604add6d25 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfmax-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfmax-sdnode.ll
@@ -1,12 +1,243 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v -target-abi=ilp32d \
-; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH
-; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -target-abi=lp64d \
-; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH
-; RUN: llc -mtriple=riscv32 -mattr=+d,+zfhmin,+zvfhmin,+v -target-abi=ilp32d \
-; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN
-; RUN: llc -mtriple=riscv64 -mattr=+d,+zfhmin,+zvfhmin,+v -target-abi=lp64d \
-; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+zfbfmin,+zvfbfmin,+v \
+; RUN: -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s \
+; RUN: --check-prefixes=CHECK,ZVFH
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+zfbfmin,+zvfbfmin,+v \
+; RUN: -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \
+; RUN: --check-prefixes=CHECK,ZVFH
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin,+v \
+; RUN: -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s \
+; RUN: --check-prefixes=CHECK,ZVFHMIN
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin,+v \
+; RUN: -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \
+; RUN: --check-prefixes=CHECK,ZVFHMIN
+
+define <vscale x 1 x bfloat> @vfmax_nxv1bf16_vv(<vscale x 1 x bfloat> %a, <vscale x 1 x bfloat> %b) {
+; CHECK-LABEL: vfmax_nxv1bf16_vv:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v9
+; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8
+; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
+; CHECK-NEXT: vfmax.vv v9, v9, v10
+; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9
+; CHECK-NEXT: ret
+ %v = call <vscale x 1 x bfloat> @llvm.maxnum.nxv1bf16(<vscale x 1 x bfloat> %a, <vscale x 1 x bfloat> %b)
+ ret <vscale x 1 x bfloat> %v
+}
+
+define <vscale x 1 x bfloat> @vfmax_nxv1bf16_vf(<vscale x 1 x bfloat> %a, bfloat %b) {
+; CHECK-LABEL: vfmax_nxv1bf16_vf:
+; CHECK: # %bb.0:
+; CHECK-NEXT: fmv.x.h a0, fa0
+; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vmv.v.x v9, a0
+; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8
+; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v9
+; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
+; CHECK-NEXT: vfmax.vv v9, v10, v8
+; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 1 x bfloat> poison, bfloat %b, i32 0
+ %splat = shufflevector <vscale x 1 x bfloat> %head, <vscale x 1 x bfloat> poison, <vscale x 1 x i32> zeroinitializer
+ %v = call <vscale x 1 x bfloat> @llvm.maxnum.nxv1bf16(<vscale x 1 x bfloat> %a, <vscale x 1 x bfloat> %splat)
+ ret <vscale x 1 x bfloat> %v
+}
+
+declare <vscale x 2 x bfloat> @llvm.maxnum.nxv2bf16(<vscale x 2 x bfloat>, <vscale x 2 x bfloat>)
+
+define <vscale x 2 x bfloat> @vfmax_nxv2bf16_vv(<vscale x 2 x bfloat> %a, <vscale x 2 x bfloat> %b) {
+; CHECK-LABEL: vfmax_nxv2bf16_vv:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v9
+; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8
+; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; CHECK-NEXT: vfmax.vv v9, v9, v10
+; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9
+; CHECK-NEXT: ret
+ %v = call <vscale x 2 x bfloat> @llvm.maxnum.nxv2bf16(<vscale x 2 x bfloat> %a, <vscale x 2 x bfloat> %b)
+ ret <vscale x 2 x bfloat> %v
+}
+
+define <vscale x 2 x bfloat> @vfmax_nxv2bf16_vf(<vscale x 2 x bfloat> %a, bfloat %b) {
+; CHECK-LABEL: vfmax_nxv2bf16_vf:
+; CHECK: # %bb.0:
+; CHECK-NEXT: fmv.x.h a0, fa0
+; CHECK-NEXT: vsetvli a1, zero, e16, mf2, ta, ma
+; CHECK-NEXT: vmv.v.x v9, a0
+; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8
+; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v9
+; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; CHECK-NEXT: vfmax.vv v9, v10, v8
+; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 2 x bfloat> poison, bfloat %b, i32 0
+ %splat = shufflevector <vscale x 2 x bfloat> %head, <vscale x 2 x bfloat> poison, <vscale x 2 x i32> zeroinitializer
+ %v = call <vscale x 2 x bfloat> @llvm.maxnum.nxv2bf16(<vscale x 2 x bfloat> %a, <vscale x 2 x bfloat> %splat)
+ ret <vscale x 2 x bfloat> %v
+}
+
+declare <vscale x 4 x bfloat> @llvm.maxnum.nxv4bf16(<vscale x 4 x bfloat>, <vscale x 4 x bfloat>)
+
+define <vscale x 4 x bfloat> @vfmax_nxv4bf16_vv(<vscale x 4 x bfloat> %a, <vscale x 4 x bfloat> %b) {
+; CHECK-LABEL: vfmax_nxv4bf16_vv:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v9
+; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8
+; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; CHECK-NEXT: vfmax.vv v10, v12, v10
+; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10
+; CHECK-NEXT: ret
+ %v = call <vscale x 4 x bfloat> @llvm.maxnum.nxv4bf16(<vscale x 4 x bfloat> %a, <vscale x 4 x bfloat> %b)
+ ret <vscale x 4 x bfloat> %v
+}
+
+define <vscale x 4 x bfloat> @vfmax_nxv4bf16_vf(<vscale x 4 x bfloat> %a, bfloat %b) {
+; CHECK-LABEL: vfmax_nxv4bf16_vf:
+; CHECK: # %bb.0:
+; CHECK-NEXT: fmv.x.h a0, fa0
+; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma
+; CHECK-NEXT: vmv.v.x v9, a0
+; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8
+; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v9
+; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; CHECK-NEXT: vfmax.vv v10, v10, v12
+; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 4 x bfloat> poison, bfloat %b, i32 0
+ %splat = shufflevector <vscale x 4 x bfloat> %head, <vscale x 4 x bfloat> poison, <vscale x 4 x i32> zeroinitializer
+ %v = call <vscale x 4 x bfloat> @llvm.maxnum.nxv4bf16(<vscale x 4 x bfloat> %a, <vscale x 4 x bfloat> %splat)
+ ret <vscale x 4 x bfloat> %v
+}
+
+declare <vscale x 8 x bfloat> @llvm.maxnum.nxv8bf16(<vscale x 8 x bfloat>, <vscale x 8 x bfloat>)
+
+define <vscale x 8 x bfloat> @vfmax_nxv8bf16_vv(<vscale x 8 x bfloat> %a, <vscale x 8 x bfloat> %b) {
+; CHECK-LABEL: vfmax_nxv8bf16_vv:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v10
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8
+; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; CHECK-NEXT: vfmax.vv v12, v16, v12
+; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12
+; CHECK-NEXT: ret
+ %v = call <vscale x 8 x bfloat> @llvm.maxnum.nxv8bf16(<vscale x 8 x bfloat> %a, <vscale x 8 x bfloat> %b)
+ ret <vscale x 8 x bfloat> %v
+}
+
+define <vscale x 8 x bfloat> @vfmax_nxv8bf16_vf(<vscale x 8 x bfloat> %a, bfloat %b) {
+; CHECK-LABEL: vfmax_nxv8bf16_vf:
+; CHECK: # %bb.0:
+; CHECK-NEXT: fmv.x.h a0, fa0
+; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma
+; CHECK-NEXT: vmv.v.x v10, a0
+; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v10
+; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; CHECK-NEXT: vfmax.vv v12, v12, v16
+; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0
+ %splat = shufflevector <vscale x 8 x bfloat> %head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer
+ %v = call <vscale x 8 x bfloat> @llvm.maxnum.nxv8bf16(<vscale x 8 x bfloat> %a, <vscale x 8 x bfloat> %splat)
+ ret <vscale x 8 x bfloat> %v
+}
+
+declare <vscale x 16 x bfloat> @llvm.maxnum.nxv16bf16(<vscale x 16 x bfloat>, <vscale x 16 x bfloat>)
+
+define <vscale x 16 x bfloat> @vfmax_nxv16bf16_vv(<vscale x 16 x bfloat> %a, <vscale x 16 x bfloat> %b) {
+; CHECK-LABEL: vfmax_nxv16bf16_vv:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v12
+; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v8
+; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; CHECK-NEXT: vfmax.vv v16, v24, v16
+; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16
+; CHECK-NEXT: ret
+ %v = call <vscale x 16 x bfloat> @llvm.maxnum.nxv16bf16(<vscale x 16 x bfloat> %a, <vscale x 16 x bfloat> %b)
+ ret <vscale x 16 x bfloat> %v
+}
+
+define <vscale x 16 x bfloat> @vfmax_nxv16bf16_vf(<vscale x 16 x bfloat> %a, bfloat %b) {
+; CHECK-LABEL: vfmax_nxv16bf16_vf:
+; CHECK: # %bb.0:
+; CHECK-NEXT: fmv.x.h a0, fa0
+; CHECK-NEXT: vsetvli a1, zero, e16, m4, ta, ma
+; CHECK-NEXT: vmv.v.x v12, a0
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8
+; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v12
+; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; CHECK-NEXT: vfmax.vv v16, v16, v24
+; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 16 x bfloat> poison, bfloat %b, i32 0
+ %splat = shufflevector <vscale x 16 x bfloat> %head, <vscale x 16 x bfloat> poison, <vscale x 16 x i32> zeroinitializer
+ %v = call <vscale x 16 x bfloat> @llvm.maxnum.nxv16bf16(<vscale x 16 x bfloat> %a, <vscale x 16 x bfloat> %splat)
+ ret <vscale x 16 x bfloat> %v
+}
+
+declare <vscale x 32 x bfloat> @llvm.maxnum.nxv32bf16(<vscale x 32 x bfloat>, <vscale x 32 x bfloat>)
+
+define <vscale x 32 x bfloat> @vfmax_nxv32bf16_vv(<vscale x 32 x bfloat> %a, <vscale x 32 x bfloat> %b) {
+; CHECK-LABEL: vfmax_nxv32bf16_vv:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v16
+; CHECK-NEXT: vfwcvtbf16.f.f.v v0, v8
+; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; CHECK-NEXT: vfmax.vv v24, v0, v24
+; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v24
+; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v20
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v12
+; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; CHECK-NEXT: vfmax.vv v16, v16, v24
+; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v12, v16
+; CHECK-NEXT: ret
+ %v = call <vscale x 32 x bfloat> @llvm.maxnum.nxv32bf16(<vscale x 32 x bfloat> %a, <vscale x 32 x bfloat> %b)
+ ret <vscale x 32 x bfloat> %v
+}
+
+define <vscale x 32 x bfloat> @vfmax_nxv32bf16_vf(<vscale x 32 x bfloat> %a, bfloat %b) {
+; CHECK-LABEL: vfmax_nxv32bf16_vf:
+; CHECK: # %bb.0:
+; CHECK-NEXT: fmv.x.h a0, fa0
+; CHECK-NEXT: vsetvli a1, zero, e16, m8, ta, ma
+; CHECK-NEXT: vmv.v.x v16, a0
+; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v8
+; CHECK-NEXT: vfwcvtbf16.f.f.v v0, v16
+; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; CHECK-NEXT: vfmax.vv v24, v24, v0
+; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v24
+; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v12
+; CHECK-NEXT: vfwcvtbf16.f.f.v v0, v20
+; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; CHECK-NEXT: vfmax.vv v16, v24, v0
+; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v12, v16
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 32 x bfloat> poison, bfloat %b, i32 0
+ %splat = shufflevector <vscale x 32 x bfloat> %head, <vscale x 32 x bfloat> poison, <vscale x 32 x i32> zeroinitializer
+ %v = call <vscale x 32 x bfloat> @llvm.maxnum.nxv32bf16(<vscale x 32 x bfloat> %a, <vscale x 32 x bfloat> %splat)
+ ret <vscale x 32 x bfloat> %v
+}
declare <vscale x 1 x half> @llvm.maxnum.nxv1f16(<vscale x 1 x half>, <vscale x 1 x half>)
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfmax-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vfmax-vp.ll
index 7ab999ea4fa7..6e38881b4d60 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfmax-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfmax-vp.ll
@@ -1,13 +1,278 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v -target-abi=ilp32d \
-; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH
-; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -target-abi=lp64d \
-; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH
-; RUN: llc -mtriple=riscv32 -mattr=+d,+zfhmin,+zvfhmin,+v -target-abi=ilp32d \
-; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN
-; RUN: llc -mtriple=riscv64 -mattr=+d,+zfhmin,+zvfhmin,+v -target-abi=lp64d \
-; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+zfbfmin,+zvfbfmin,+v \
+; RUN: -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s \
+; RUN: --check-prefixes=CHECK,ZVFH
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+zfbfmin,+zvfbfmin,+v \
+; RUN: -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \
+; RUN: --check-prefixes=CHECK,ZVFH
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin,+v \
+; RUN: -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s \
+; RUN: --check-prefixes=CHECK,ZVFHMIN
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin,+v \
+; RUN: -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \
+; RUN: --check-prefixes=CHECK,ZVFHMIN
+
+declare <vscale x 1 x bfloat> @llvm.vp.maxnum.nxv1bf16(<vscale x 1 x bfloat>, <vscale x 1 x bfloat>, <vscale x 1 x i1>, i32)
+
+define <vscale x 1 x bfloat> @vfmax_vv_nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %vb, <vscale x 1 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vfmax_vv_nxv1bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v9
+; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8
+; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-NEXT: vfmax.vv v9, v9, v10, v0.t
+; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9
+; CHECK-NEXT: ret
+ %v = call <vscale x 1 x bfloat> @llvm.vp.maxnum.nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %vb, <vscale x 1 x i1> %m, i32 %evl)
+ ret <vscale x 1 x bfloat> %v
+}
+
+define <vscale x 1 x bfloat> @vfmax_vv_nxv1bf16_unmasked(<vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %vb, i32 zeroext %evl) {
+; CHECK-LABEL: vfmax_vv_nxv1bf16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v9
+; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8
+; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-NEXT: vfmax.vv v9, v9, v10
+; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9
+; CHECK-NEXT: ret
+ %v = call <vscale x 1 x bfloat> @llvm.vp.maxnum.nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %vb, <vscale x 1 x i1> splat (i1 true), i32 %evl)
+ ret <vscale x 1 x bfloat> %v
+}
+
+declare <vscale x 2 x bfloat> @llvm.vp.maxnum.nxv2bf16(<vscale x 2 x bfloat>, <vscale x 2 x bfloat>, <vscale x 2 x i1>, i32)
+
+define <vscale x 2 x bfloat> @vfmax_vv_nxv2bf16(<vscale x 2 x bfloat> %va, <vscale x 2 x bfloat> %vb, <vscale x 2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vfmax_vv_nxv2bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, zero, e16, mf2, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v9
+; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8
+; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-NEXT: vfmax.vv v9, v9, v10, v0.t
+; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9
+; CHECK-NEXT: ret
+ %v = call <vscale x 2 x bfloat> @llvm.vp.maxnum.nxv2bf16(<vscale x 2 x bfloat> %va, <vscale x 2 x bfloat> %vb, <vscale x 2 x i1> %m, i32 %evl)
+ ret <vscale x 2 x bfloat> %v
+}
+
+define <vscale x 2 x bfloat> @vfmax_vv_nxv2bf16_unmasked(<vscale x 2 x bfloat> %va, <vscale x 2 x bfloat> %vb, i32 zeroext %evl) {
+; CHECK-LABEL: vfmax_vv_nxv2bf16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, zero, e16, mf2, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v9
+; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8
+; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-NEXT: vfmax.vv v9, v9, v10
+; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9
+; CHECK-NEXT: ret
+ %v = call <vscale x 2 x bfloat> @llvm.vp.maxnum.nxv2bf16(<vscale x 2 x bfloat> %va, <vscale x 2 x bfloat> %vb, <vscale x 2 x i1> splat (i1 true), i32 %evl)
+ ret <vscale x 2 x bfloat> %v
+}
+
+declare <vscale x 4 x bfloat> @llvm.vp.maxnum.nxv4bf16(<vscale x 4 x bfloat>, <vscale x 4 x bfloat>, <vscale x 4 x i1>, i32)
+
+define <vscale x 4 x bfloat> @vfmax_vv_nxv4bf16(<vscale x 4 x bfloat> %va, <vscale x 4 x bfloat> %vb, <vscale x 4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vfmax_vv_nxv4bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v9
+; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8
+; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; CHECK-NEXT: vfmax.vv v10, v12, v10, v0.t
+; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10
+; CHECK-NEXT: ret
+ %v = call <vscale x 4 x bfloat> @llvm.vp.maxnum.nxv4bf16(<vscale x 4 x bfloat> %va, <vscale x 4 x bfloat> %vb, <vscale x 4 x i1> %m, i32 %evl)
+ ret <vscale x 4 x bfloat> %v
+}
+
+define <vscale x 4 x bfloat> @vfmax_vv_nxv4bf16_unmasked(<vscale x 4 x bfloat> %va, <vscale x 4 x bfloat> %vb, i32 zeroext %evl) {
+; CHECK-LABEL: vfmax_vv_nxv4bf16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v9
+; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8
+; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; CHECK-NEXT: vfmax.vv v10, v12, v10
+; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10
+; CHECK-NEXT: ret
+ %v = call <vscale x 4 x bfloat> @llvm.vp.maxnum.nxv4bf16(<vscale x 4 x bfloat> %va, <vscale x 4 x bfloat> %vb, <vscale x 4 x i1> splat (i1 true), i32 %evl)
+ ret <vscale x 4 x bfloat> %v
+}
+
+declare <vscale x 8 x bfloat> @llvm.vp.maxnum.nxv8bf16(<vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x i1>, i32)
+define <vscale x 8 x bfloat> @vfmax_vv_nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %vb, <vscale x 8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vfmax_vv_nxv8bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v10
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8
+; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-NEXT: vfmax.vv v12, v16, v12, v0.t
+; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12
+; CHECK-NEXT: ret
+ %v = call <vscale x 8 x bfloat> @llvm.vp.maxnum.nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %vb, <vscale x 8 x i1> %m, i32 %evl)
+ ret <vscale x 8 x bfloat> %v
+}
+
+define <vscale x 8 x bfloat> @vfmax_vv_nxv8bf16_unmasked(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %vb, i32 zeroext %evl) {
+; CHECK-LABEL: vfmax_vv_nxv8bf16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v10
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8
+; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-NEXT: vfmax.vv v12, v16, v12
+; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12
+; CHECK-NEXT: ret
+ %v = call <vscale x 8 x bfloat> @llvm.vp.maxnum.nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %vb, <vscale x 8 x i1> splat (i1 true), i32 %evl)
+ ret <vscale x 8 x bfloat> %v
+}
+
+declare <vscale x 16 x bfloat> @llvm.vp.maxnum.nxv16bf16(<vscale x 16 x bfloat>, <vscale x 16 x bfloat>, <vscale x 16 x i1>, i32)
+
+define <vscale x 16 x bfloat> @vfmax_vv_nxv16bf16(<vscale x 16 x bfloat> %va, <vscale x 16 x bfloat> %vb, <vscale x 16 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vfmax_vv_nxv16bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v12
+; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v8
+; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma
+; CHECK-NEXT: vfmax.vv v16, v24, v16, v0.t
+; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16
+; CHECK-NEXT: ret
+ %v = call <vscale x 16 x bfloat> @llvm.vp.maxnum.nxv16bf16(<vscale x 16 x bfloat> %va, <vscale x 16 x bfloat> %vb, <vscale x 16 x i1> %m, i32 %evl)
+ ret <vscale x 16 x bfloat> %v
+}
+
+define <vscale x 16 x bfloat> @vfmax_vv_nxv16bf16_unmasked(<vscale x 16 x bfloat> %va, <vscale x 16 x bfloat> %vb, i32 zeroext %evl) {
+; CHECK-LABEL: vfmax_vv_nxv16bf16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v12
+; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v8
+; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma
+; CHECK-NEXT: vfmax.vv v16, v24, v16
+; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16
+; CHECK-NEXT: ret
+ %v = call <vscale x 16 x bfloat> @llvm.vp.maxnum.nxv16bf16(<vscale x 16 x bfloat> %va, <vscale x 16 x bfloat> %vb, <vscale x 16 x i1> splat (i1 true), i32 %evl)
+ ret <vscale x 16 x bfloat> %v
+}
+
+declare <vscale x 32 x bfloat> @llvm.vp.maxnum.nxv32bf16(<vscale x 32 x bfloat>, <vscale x 32 x bfloat>, <vscale x 32 x i1>, i32)
+
+define <vscale x 32 x bfloat> @vfmax_vv_nxv32bf16(<vscale x 32 x bfloat> %va, <vscale x 32 x bfloat> %vb, <vscale x 32 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vfmax_vv_nxv32bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi sp, sp, -16
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: csrr a1, vlenb
+; CHECK-NEXT: slli a1, a1, 3
+; CHECK-NEXT: sub sp, sp, a1
+; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
+; CHECK-NEXT: vmv1r.v v7, v0
+; CHECK-NEXT: csrr a2, vlenb
+; CHECK-NEXT: slli a1, a2, 1
+; CHECK-NEXT: sub a3, a0, a1
+; CHECK-NEXT: sltu a4, a0, a3
+; CHECK-NEXT: addi a4, a4, -1
+; CHECK-NEXT: and a3, a4, a3
+; CHECK-NEXT: srli a2, a2, 2
+; CHECK-NEXT: vsetvli a4, zero, e8, mf2, ta, ma
+; CHECK-NEXT: vslidedown.vx v0, v0, a2
+; CHECK-NEXT: addi a2, sp, 16
+; CHECK-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill
+; CHECK-NEXT: vsetvli a2, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v20
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v12
+; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, ma
+; CHECK-NEXT: vfmax.vv v16, v16, v24, v0.t
+; CHECK-NEXT: vsetvli a2, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v12, v16
+; CHECK-NEXT: bltu a0, a1, .LBB10_2
+; CHECK-NEXT: # %bb.1:
+; CHECK-NEXT: mv a0, a1
+; CHECK-NEXT: .LBB10_2:
+; CHECK-NEXT: addi a1, sp, 16
+; CHECK-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v24
+; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v8
+; CHECK-NEXT: vmv1r.v v0, v7
+; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma
+; CHECK-NEXT: vfmax.vv v16, v24, v16, v0.t
+; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: slli a0, a0, 3
+; CHECK-NEXT: add sp, sp, a0
+; CHECK-NEXT: addi sp, sp, 16
+; CHECK-NEXT: ret
+ %v = call <vscale x 32 x bfloat> @llvm.vp.maxnum.nxv32bf16(<vscale x 32 x bfloat> %va, <vscale x 32 x bfloat> %vb, <vscale x 32 x i1> %m, i32 %evl)
+ ret <vscale x 32 x bfloat> %v
+}
+
+define <vscale x 32 x bfloat> @vfmax_vv_nxv32bf16_unmasked(<vscale x 32 x bfloat> %va, <vscale x 32 x bfloat> %vb, i32 zeroext %evl) {
+; CHECK-LABEL: vfmax_vv_nxv32bf16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi sp, sp, -16
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: csrr a1, vlenb
+; CHECK-NEXT: slli a1, a1, 3
+; CHECK-NEXT: sub sp, sp, a1
+; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
+; CHECK-NEXT: csrr a2, vlenb
+; CHECK-NEXT: slli a1, a2, 1
+; CHECK-NEXT: sub a3, a0, a1
+; CHECK-NEXT: sltu a4, a0, a3
+; CHECK-NEXT: addi a4, a4, -1
+; CHECK-NEXT: and a3, a4, a3
+; CHECK-NEXT: srli a2, a2, 2
+; CHECK-NEXT: vsetvli a4, zero, e8, m4, ta, ma
+; CHECK-NEXT: vmset.m v24
+; CHECK-NEXT: vsetvli a4, zero, e8, mf2, ta, ma
+; CHECK-NEXT: vslidedown.vx v0, v24, a2
+; CHECK-NEXT: addi a2, sp, 16
+; CHECK-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill
+; CHECK-NEXT: vsetvli a2, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v20
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v12
+; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, ma
+; CHECK-NEXT: vfmax.vv v16, v16, v24, v0.t
+; CHECK-NEXT: vsetvli a2, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v12, v16
+; CHECK-NEXT: bltu a0, a1, .LBB11_2
+; CHECK-NEXT: # %bb.1:
+; CHECK-NEXT: mv a0, a1
+; CHECK-NEXT: .LBB11_2:
+; CHECK-NEXT: addi a1, sp, 16
+; CHECK-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v24
+; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v8
+; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma
+; CHECK-NEXT: vfmax.vv v16, v24, v16
+; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: slli a0, a0, 3
+; CHECK-NEXT: add sp, sp, a0
+; CHECK-NEXT: addi sp, sp, 16
+; CHECK-NEXT: ret
+ %v = call <vscale x 32 x bfloat> @llvm.vp.maxnum.nxv32bf16(<vscale x 32 x bfloat> %va, <vscale x 32 x bfloat> %vb, <vscale x 32 x i1> splat (i1 true), i32 %evl)
+ ret <vscale x 32 x bfloat> %v
+}
declare <vscale x 1 x half> @llvm.vp.maxnum.nxv1f16(<vscale x 1 x half>, <vscale x 1 x half>, <vscale x 1 x i1>, i32)
define <vscale x 1 x half> @vfmax_vv_nxv1f16(<vscale x 1 x half> %va, <vscale x 1 x half> %vb, <vscale x 1 x i1> %m, i32 zeroext %evl) {
@@ -264,10 +529,10 @@ define <vscale x 32 x half> @vfmax_vv_nxv32f16(<vscale x 32 x half> %va, <vscale
; ZVFHMIN-NEXT: vfmax.vv v16, v16, v24, v0.t
; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v16
-; ZVFHMIN-NEXT: bltu a0, a1, .LBB10_2
+; ZVFHMIN-NEXT: bltu a0, a1, .LBB22_2
; ZVFHMIN-NEXT: # %bb.1:
; ZVFHMIN-NEXT: mv a0, a1
-; ZVFHMIN-NEXT: .LBB10_2:
+; ZVFHMIN-NEXT: .LBB22_2:
; ZVFHMIN-NEXT: addi a1, sp, 16
; ZVFHMIN-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload
; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v24
@@ -321,10 +586,10 @@ define <vscale x 32 x half> @vfmax_vv_nxv32f16_unmasked(<vscale x 32 x half> %va
; ZVFHMIN-NEXT: vfmax.vv v16, v16, v24, v0.t
; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v16
-; ZVFHMIN-NEXT: bltu a0, a1, .LBB11_2
+; ZVFHMIN-NEXT: bltu a0, a1, .LBB23_2
; ZVFHMIN-NEXT: # %bb.1:
; ZVFHMIN-NEXT: mv a0, a1
-; ZVFHMIN-NEXT: .LBB11_2:
+; ZVFHMIN-NEXT: .LBB23_2:
; ZVFHMIN-NEXT: addi a1, sp, 16
; ZVFHMIN-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload
; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v24
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfmin-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vfmin-sdnode.ll
index b47e14f4f26b..9212ddab5b1e 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfmin-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfmin-sdnode.ll
@@ -1,12 +1,243 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v -target-abi=ilp32d \
-; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH
-; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -target-abi=lp64d \
-; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH
-; RUN: llc -mtriple=riscv32 -mattr=+d,+zfhmin,+zvfhmin,+v -target-abi=ilp32d \
-; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN
-; RUN: llc -mtriple=riscv64 -mattr=+d,+zfhmin,+zvfhmin,+v -target-abi=lp64d \
-; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+zfbfmin,+zvfbfmin,+v \
+; RUN: -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s \
+; RUN: --check-prefixes=CHECK,ZVFH
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+zfbfmin,+zvfbfmin,+v \
+; RUN: -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \
+; RUN: --check-prefixes=CHECK,ZVFH
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin,+v \
+; RUN: -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s \
+; RUN: --check-prefixes=CHECK,ZVFHMIN
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin,+v \
+; RUN: -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \
+; RUN: --check-prefixes=CHECK,ZVFHMIN
+
+define <vscale x 1 x bfloat> @vfmin_nxv1bf16_vv(<vscale x 1 x bfloat> %a, <vscale x 1 x bfloat> %b) {
+; CHECK-LABEL: vfmin_nxv1bf16_vv:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v9
+; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8
+; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
+; CHECK-NEXT: vfmin.vv v9, v9, v10
+; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9
+; CHECK-NEXT: ret
+ %v = call <vscale x 1 x bfloat> @llvm.minnum.nxv1bf16(<vscale x 1 x bfloat> %a, <vscale x 1 x bfloat> %b)
+ ret <vscale x 1 x bfloat> %v
+}
+
+define <vscale x 1 x bfloat> @vfmin_nxv1bf16_vf(<vscale x 1 x bfloat> %a, bfloat %b) {
+; CHECK-LABEL: vfmin_nxv1bf16_vf:
+; CHECK: # %bb.0:
+; CHECK-NEXT: fmv.x.h a0, fa0
+; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vmv.v.x v9, a0
+; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8
+; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v9
+; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
+; CHECK-NEXT: vfmin.vv v9, v10, v8
+; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 1 x bfloat> poison, bfloat %b, i32 0
+ %splat = shufflevector <vscale x 1 x bfloat> %head, <vscale x 1 x bfloat> poison, <vscale x 1 x i32> zeroinitializer
+ %v = call <vscale x 1 x bfloat> @llvm.minnum.nxv1bf16(<vscale x 1 x bfloat> %a, <vscale x 1 x bfloat> %splat)
+ ret <vscale x 1 x bfloat> %v
+}
+
+declare <vscale x 2 x bfloat> @llvm.minnum.nxv2bf16(<vscale x 2 x bfloat>, <vscale x 2 x bfloat>)
+
+define <vscale x 2 x bfloat> @vfmin_nxv2bf16_vv(<vscale x 2 x bfloat> %a, <vscale x 2 x bfloat> %b) {
+; CHECK-LABEL: vfmin_nxv2bf16_vv:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v9
+; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8
+; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; CHECK-NEXT: vfmin.vv v9, v9, v10
+; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9
+; CHECK-NEXT: ret
+ %v = call <vscale x 2 x bfloat> @llvm.minnum.nxv2bf16(<vscale x 2 x bfloat> %a, <vscale x 2 x bfloat> %b)
+ ret <vscale x 2 x bfloat> %v
+}
+
+define <vscale x 2 x bfloat> @vfmin_nxv2bf16_vf(<vscale x 2 x bfloat> %a, bfloat %b) {
+; CHECK-LABEL: vfmin_nxv2bf16_vf:
+; CHECK: # %bb.0:
+; CHECK-NEXT: fmv.x.h a0, fa0
+; CHECK-NEXT: vsetvli a1, zero, e16, mf2, ta, ma
+; CHECK-NEXT: vmv.v.x v9, a0
+; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8
+; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v9
+; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; CHECK-NEXT: vfmin.vv v9, v10, v8
+; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 2 x bfloat> poison, bfloat %b, i32 0
+ %splat = shufflevector <vscale x 2 x bfloat> %head, <vscale x 2 x bfloat> poison, <vscale x 2 x i32> zeroinitializer
+ %v = call <vscale x 2 x bfloat> @llvm.minnum.nxv2bf16(<vscale x 2 x bfloat> %a, <vscale x 2 x bfloat> %splat)
+ ret <vscale x 2 x bfloat> %v
+}
+
+declare <vscale x 4 x bfloat> @llvm.minnum.nxv4bf16(<vscale x 4 x bfloat>, <vscale x 4 x bfloat>)
+
+define <vscale x 4 x bfloat> @vfmin_nxv4bf16_vv(<vscale x 4 x bfloat> %a, <vscale x 4 x bfloat> %b) {
+; CHECK-LABEL: vfmin_nxv4bf16_vv:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v9
+; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8
+; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; CHECK-NEXT: vfmin.vv v10, v12, v10
+; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10
+; CHECK-NEXT: ret
+ %v = call <vscale x 4 x bfloat> @llvm.minnum.nxv4bf16(<vscale x 4 x bfloat> %a, <vscale x 4 x bfloat> %b)
+ ret <vscale x 4 x bfloat> %v
+}
+
+define <vscale x 4 x bfloat> @vfmin_nxv4bf16_vf(<vscale x 4 x bfloat> %a, bfloat %b) {
+; CHECK-LABEL: vfmin_nxv4bf16_vf:
+; CHECK: # %bb.0:
+; CHECK-NEXT: fmv.x.h a0, fa0
+; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma
+; CHECK-NEXT: vmv.v.x v9, a0
+; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8
+; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v9
+; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; CHECK-NEXT: vfmin.vv v10, v10, v12
+; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 4 x bfloat> poison, bfloat %b, i32 0
+ %splat = shufflevector <vscale x 4 x bfloat> %head, <vscale x 4 x bfloat> poison, <vscale x 4 x i32> zeroinitializer
+ %v = call <vscale x 4 x bfloat> @llvm.minnum.nxv4bf16(<vscale x 4 x bfloat> %a, <vscale x 4 x bfloat> %splat)
+ ret <vscale x 4 x bfloat> %v
+}
+
+declare <vscale x 8 x bfloat> @llvm.minnum.nxv8bf16(<vscale x 8 x bfloat>, <vscale x 8 x bfloat>)
+
+define <vscale x 8 x bfloat> @vfmin_nxv8bf16_vv(<vscale x 8 x bfloat> %a, <vscale x 8 x bfloat> %b) {
+; CHECK-LABEL: vfmin_nxv8bf16_vv:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v10
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8
+; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; CHECK-NEXT: vfmin.vv v12, v16, v12
+; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12
+; CHECK-NEXT: ret
+ %v = call <vscale x 8 x bfloat> @llvm.minnum.nxv8bf16(<vscale x 8 x bfloat> %a, <vscale x 8 x bfloat> %b)
+ ret <vscale x 8 x bfloat> %v
+}
+
+define <vscale x 8 x bfloat> @vfmin_nxv8bf16_vf(<vscale x 8 x bfloat> %a, bfloat %b) {
+; CHECK-LABEL: vfmin_nxv8bf16_vf:
+; CHECK: # %bb.0:
+; CHECK-NEXT: fmv.x.h a0, fa0
+; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma
+; CHECK-NEXT: vmv.v.x v10, a0
+; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v10
+; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; CHECK-NEXT: vfmin.vv v12, v12, v16
+; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0
+ %splat = shufflevector <vscale x 8 x bfloat> %head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer
+ %v = call <vscale x 8 x bfloat> @llvm.minnum.nxv8bf16(<vscale x 8 x bfloat> %a, <vscale x 8 x bfloat> %splat)
+ ret <vscale x 8 x bfloat> %v
+}
+
+declare <vscale x 16 x bfloat> @llvm.minnum.nxv16bf16(<vscale x 16 x bfloat>, <vscale x 16 x bfloat>)
+
+define <vscale x 16 x bfloat> @vfmin_nxv16bf16_vv(<vscale x 16 x bfloat> %a, <vscale x 16 x bfloat> %b) {
+; CHECK-LABEL: vfmin_nxv16bf16_vv:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v12
+; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v8
+; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; CHECK-NEXT: vfmin.vv v16, v24, v16
+; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16
+; CHECK-NEXT: ret
+ %v = call <vscale x 16 x bfloat> @llvm.minnum.nxv16bf16(<vscale x 16 x bfloat> %a, <vscale x 16 x bfloat> %b)
+ ret <vscale x 16 x bfloat> %v
+}
+
+define <vscale x 16 x bfloat> @vfmin_nxv16bf16_vf(<vscale x 16 x bfloat> %a, bfloat %b) {
+; CHECK-LABEL: vfmin_nxv16bf16_vf:
+; CHECK: # %bb.0:
+; CHECK-NEXT: fmv.x.h a0, fa0
+; CHECK-NEXT: vsetvli a1, zero, e16, m4, ta, ma
+; CHECK-NEXT: vmv.v.x v12, a0
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8
+; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v12
+; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; CHECK-NEXT: vfmin.vv v16, v16, v24
+; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 16 x bfloat> poison, bfloat %b, i32 0
+ %splat = shufflevector <vscale x 16 x bfloat> %head, <vscale x 16 x bfloat> poison, <vscale x 16 x i32> zeroinitializer
+ %v = call <vscale x 16 x bfloat> @llvm.minnum.nxv16bf16(<vscale x 16 x bfloat> %a, <vscale x 16 x bfloat> %splat)
+ ret <vscale x 16 x bfloat> %v
+}
+
+declare <vscale x 32 x bfloat> @llvm.minnum.nxv32bf16(<vscale x 32 x bfloat>, <vscale x 32 x bfloat>)
+
+define <vscale x 32 x bfloat> @vfmin_nxv32bf16_vv(<vscale x 32 x bfloat> %a, <vscale x 32 x bfloat> %b) {
+; CHECK-LABEL: vfmin_nxv32bf16_vv:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v16
+; CHECK-NEXT: vfwcvtbf16.f.f.v v0, v8
+; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; CHECK-NEXT: vfmin.vv v24, v0, v24
+; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v24
+; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v20
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v12
+; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; CHECK-NEXT: vfmin.vv v16, v16, v24
+; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v12, v16
+; CHECK-NEXT: ret
+ %v = call <vscale x 32 x bfloat> @llvm.minnum.nxv32bf16(<vscale x 32 x bfloat> %a, <vscale x 32 x bfloat> %b)
+ ret <vscale x 32 x bfloat> %v
+}
+
+define <vscale x 32 x bfloat> @vfmin_nxv32bf16_vf(<vscale x 32 x bfloat> %a, bfloat %b) {
+; CHECK-LABEL: vfmin_nxv32bf16_vf:
+; CHECK: # %bb.0:
+; CHECK-NEXT: fmv.x.h a0, fa0
+; CHECK-NEXT: vsetvli a1, zero, e16, m8, ta, ma
+; CHECK-NEXT: vmv.v.x v16, a0
+; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v8
+; CHECK-NEXT: vfwcvtbf16.f.f.v v0, v16
+; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; CHECK-NEXT: vfmin.vv v24, v24, v0
+; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v24
+; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v12
+; CHECK-NEXT: vfwcvtbf16.f.f.v v0, v20
+; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; CHECK-NEXT: vfmin.vv v16, v24, v0
+; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v12, v16
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 32 x bfloat> poison, bfloat %b, i32 0
+ %splat = shufflevector <vscale x 32 x bfloat> %head, <vscale x 32 x bfloat> poison, <vscale x 32 x i32> zeroinitializer
+ %v = call <vscale x 32 x bfloat> @llvm.minnum.nxv32bf16(<vscale x 32 x bfloat> %a, <vscale x 32 x bfloat> %splat)
+ ret <vscale x 32 x bfloat> %v
+}
declare <vscale x 1 x half> @llvm.minnum.nxv1f16(<vscale x 1 x half>, <vscale x 1 x half>)
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfmin-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vfmin-vp.ll
index e928df85b5bb..f1d6b2100ae9 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfmin-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfmin-vp.ll
@@ -1,13 +1,278 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v -target-abi=ilp32d \
-; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH
-; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -target-abi=lp64d \
-; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH
-; RUN: llc -mtriple=riscv32 -mattr=+d,+zfhmin,+zvfhmin,+v -target-abi=ilp32d \
-; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN
-; RUN: llc -mtriple=riscv64 -mattr=+d,+zfhmin,+zvfhmin,+v -target-abi=lp64d \
-; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+zfbfmin,+zvfbfmin,+v \
+; RUN: -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s \
+; RUN: --check-prefixes=CHECK,ZVFH
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+zfbfmin,+zvfbfmin,+v \
+; RUN: -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \
+; RUN: --check-prefixes=CHECK,ZVFH
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin,+v \
+; RUN: -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s \
+; RUN: --check-prefixes=CHECK,ZVFHMIN
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin,+v \
+; RUN: -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \
+; RUN: --check-prefixes=CHECK,ZVFHMIN
+
+declare <vscale x 1 x bfloat> @llvm.vp.minnum.nxv1bf16(<vscale x 1 x bfloat>, <vscale x 1 x bfloat>, <vscale x 1 x i1>, i32)
+
+define <vscale x 1 x bfloat> @vfmin_vv_nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %vb, <vscale x 1 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vfmin_vv_nxv1bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v9
+; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8
+; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-NEXT: vfmin.vv v9, v9, v10, v0.t
+; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9
+; CHECK-NEXT: ret
+ %v = call <vscale x 1 x bfloat> @llvm.vp.minnum.nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %vb, <vscale x 1 x i1> %m, i32 %evl)
+ ret <vscale x 1 x bfloat> %v
+}
+
+define <vscale x 1 x bfloat> @vfmin_vv_nxv1bf16_unmasked(<vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %vb, i32 zeroext %evl) {
+; CHECK-LABEL: vfmin_vv_nxv1bf16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v9
+; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8
+; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-NEXT: vfmin.vv v9, v9, v10
+; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9
+; CHECK-NEXT: ret
+ %v = call <vscale x 1 x bfloat> @llvm.vp.minnum.nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %vb, <vscale x 1 x i1> splat (i1 true), i32 %evl)
+ ret <vscale x 1 x bfloat> %v
+}
+
+declare <vscale x 2 x bfloat> @llvm.vp.minnum.nxv2bf16(<vscale x 2 x bfloat>, <vscale x 2 x bfloat>, <vscale x 2 x i1>, i32)
+
+define <vscale x 2 x bfloat> @vfmin_vv_nxv2bf16(<vscale x 2 x bfloat> %va, <vscale x 2 x bfloat> %vb, <vscale x 2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vfmin_vv_nxv2bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, zero, e16, mf2, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v9
+; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8
+; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-NEXT: vfmin.vv v9, v9, v10, v0.t
+; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9
+; CHECK-NEXT: ret
+ %v = call <vscale x 2 x bfloat> @llvm.vp.minnum.nxv2bf16(<vscale x 2 x bfloat> %va, <vscale x 2 x bfloat> %vb, <vscale x 2 x i1> %m, i32 %evl)
+ ret <vscale x 2 x bfloat> %v
+}
+
+define <vscale x 2 x bfloat> @vfmin_vv_nxv2bf16_unmasked(<vscale x 2 x bfloat> %va, <vscale x 2 x bfloat> %vb, i32 zeroext %evl) {
+; CHECK-LABEL: vfmin_vv_nxv2bf16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, zero, e16, mf2, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v9
+; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8
+; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-NEXT: vfmin.vv v9, v9, v10
+; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9
+; CHECK-NEXT: ret
+ %v = call <vscale x 2 x bfloat> @llvm.vp.minnum.nxv2bf16(<vscale x 2 x bfloat> %va, <vscale x 2 x bfloat> %vb, <vscale x 2 x i1> splat (i1 true), i32 %evl)
+ ret <vscale x 2 x bfloat> %v
+}
+
+declare <vscale x 4 x bfloat> @llvm.vp.minnum.nxv4bf16(<vscale x 4 x bfloat>, <vscale x 4 x bfloat>, <vscale x 4 x i1>, i32)
+
+define <vscale x 4 x bfloat> @vfmin_vv_nxv4bf16(<vscale x 4 x bfloat> %va, <vscale x 4 x bfloat> %vb, <vscale x 4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vfmin_vv_nxv4bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v9
+; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8
+; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; CHECK-NEXT: vfmin.vv v10, v12, v10, v0.t
+; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10
+; CHECK-NEXT: ret
+ %v = call <vscale x 4 x bfloat> @llvm.vp.minnum.nxv4bf16(<vscale x 4 x bfloat> %va, <vscale x 4 x bfloat> %vb, <vscale x 4 x i1> %m, i32 %evl)
+ ret <vscale x 4 x bfloat> %v
+}
+
+define <vscale x 4 x bfloat> @vfmin_vv_nxv4bf16_unmasked(<vscale x 4 x bfloat> %va, <vscale x 4 x bfloat> %vb, i32 zeroext %evl) {
+; CHECK-LABEL: vfmin_vv_nxv4bf16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v9
+; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8
+; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; CHECK-NEXT: vfmin.vv v10, v12, v10
+; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10
+; CHECK-NEXT: ret
+ %v = call <vscale x 4 x bfloat> @llvm.vp.minnum.nxv4bf16(<vscale x 4 x bfloat> %va, <vscale x 4 x bfloat> %vb, <vscale x 4 x i1> splat (i1 true), i32 %evl)
+ ret <vscale x 4 x bfloat> %v
+}
+
+declare <vscale x 8 x bfloat> @llvm.vp.minnum.nxv8bf16(<vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x i1>, i32)
+define <vscale x 8 x bfloat> @vfmin_vv_nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %vb, <vscale x 8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vfmin_vv_nxv8bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v10
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8
+; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-NEXT: vfmin.vv v12, v16, v12, v0.t
+; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12
+; CHECK-NEXT: ret
+ %v = call <vscale x 8 x bfloat> @llvm.vp.minnum.nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %vb, <vscale x 8 x i1> %m, i32 %evl)
+ ret <vscale x 8 x bfloat> %v
+}
+
+define <vscale x 8 x bfloat> @vfmin_vv_nxv8bf16_unmasked(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %vb, i32 zeroext %evl) {
+; CHECK-LABEL: vfmin_vv_nxv8bf16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v10
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8
+; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-NEXT: vfmin.vv v12, v16, v12
+; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12
+; CHECK-NEXT: ret
+ %v = call <vscale x 8 x bfloat> @llvm.vp.minnum.nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %vb, <vscale x 8 x i1> splat (i1 true), i32 %evl)
+ ret <vscale x 8 x bfloat> %v
+}
+
+declare <vscale x 16 x bfloat> @llvm.vp.minnum.nxv16bf16(<vscale x 16 x bfloat>, <vscale x 16 x bfloat>, <vscale x 16 x i1>, i32)
+
+define <vscale x 16 x bfloat> @vfmin_vv_nxv16bf16(<vscale x 16 x bfloat> %va, <vscale x 16 x bfloat> %vb, <vscale x 16 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vfmin_vv_nxv16bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v12
+; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v8
+; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma
+; CHECK-NEXT: vfmin.vv v16, v24, v16, v0.t
+; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16
+; CHECK-NEXT: ret
+ %v = call <vscale x 16 x bfloat> @llvm.vp.minnum.nxv16bf16(<vscale x 16 x bfloat> %va, <vscale x 16 x bfloat> %vb, <vscale x 16 x i1> %m, i32 %evl)
+ ret <vscale x 16 x bfloat> %v
+}
+
+define <vscale x 16 x bfloat> @vfmin_vv_nxv16bf16_unmasked(<vscale x 16 x bfloat> %va, <vscale x 16 x bfloat> %vb, i32 zeroext %evl) {
+; CHECK-LABEL: vfmin_vv_nxv16bf16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v12
+; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v8
+; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma
+; CHECK-NEXT: vfmin.vv v16, v24, v16
+; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16
+; CHECK-NEXT: ret
+ %v = call <vscale x 16 x bfloat> @llvm.vp.minnum.nxv16bf16(<vscale x 16 x bfloat> %va, <vscale x 16 x bfloat> %vb, <vscale x 16 x i1> splat (i1 true), i32 %evl)
+ ret <vscale x 16 x bfloat> %v
+}
+
+declare <vscale x 32 x bfloat> @llvm.vp.minnum.nxv32bf16(<vscale x 32 x bfloat>, <vscale x 32 x bfloat>, <vscale x 32 x i1>, i32)
+
+define <vscale x 32 x bfloat> @vfmin_vv_nxv32bf16(<vscale x 32 x bfloat> %va, <vscale x 32 x bfloat> %vb, <vscale x 32 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vfmin_vv_nxv32bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi sp, sp, -16
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: csrr a1, vlenb
+; CHECK-NEXT: slli a1, a1, 3
+; CHECK-NEXT: sub sp, sp, a1
+; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
+; CHECK-NEXT: vmv1r.v v7, v0
+; CHECK-NEXT: csrr a2, vlenb
+; CHECK-NEXT: slli a1, a2, 1
+; CHECK-NEXT: sub a3, a0, a1
+; CHECK-NEXT: sltu a4, a0, a3
+; CHECK-NEXT: addi a4, a4, -1
+; CHECK-NEXT: and a3, a4, a3
+; CHECK-NEXT: srli a2, a2, 2
+; CHECK-NEXT: vsetvli a4, zero, e8, mf2, ta, ma
+; CHECK-NEXT: vslidedown.vx v0, v0, a2
+; CHECK-NEXT: addi a2, sp, 16
+; CHECK-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill
+; CHECK-NEXT: vsetvli a2, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v20
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v12
+; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, ma
+; CHECK-NEXT: vfmin.vv v16, v16, v24, v0.t
+; CHECK-NEXT: vsetvli a2, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v12, v16
+; CHECK-NEXT: bltu a0, a1, .LBB10_2
+; CHECK-NEXT: # %bb.1:
+; CHECK-NEXT: mv a0, a1
+; CHECK-NEXT: .LBB10_2:
+; CHECK-NEXT: addi a1, sp, 16
+; CHECK-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v24
+; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v8
+; CHECK-NEXT: vmv1r.v v0, v7
+; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma
+; CHECK-NEXT: vfmin.vv v16, v24, v16, v0.t
+; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: slli a0, a0, 3
+; CHECK-NEXT: add sp, sp, a0
+; CHECK-NEXT: addi sp, sp, 16
+; CHECK-NEXT: ret
+ %v = call <vscale x 32 x bfloat> @llvm.vp.minnum.nxv32bf16(<vscale x 32 x bfloat> %va, <vscale x 32 x bfloat> %vb, <vscale x 32 x i1> %m, i32 %evl)
+ ret <vscale x 32 x bfloat> %v
+}
+
+define <vscale x 32 x bfloat> @vfmin_vv_nxv32bf16_unmasked(<vscale x 32 x bfloat> %va, <vscale x 32 x bfloat> %vb, i32 zeroext %evl) {
+; CHECK-LABEL: vfmin_vv_nxv32bf16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi sp, sp, -16
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: csrr a1, vlenb
+; CHECK-NEXT: slli a1, a1, 3
+; CHECK-NEXT: sub sp, sp, a1
+; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
+; CHECK-NEXT: csrr a2, vlenb
+; CHECK-NEXT: slli a1, a2, 1
+; CHECK-NEXT: sub a3, a0, a1
+; CHECK-NEXT: sltu a4, a0, a3
+; CHECK-NEXT: addi a4, a4, -1
+; CHECK-NEXT: and a3, a4, a3
+; CHECK-NEXT: srli a2, a2, 2
+; CHECK-NEXT: vsetvli a4, zero, e8, m4, ta, ma
+; CHECK-NEXT: vmset.m v24
+; CHECK-NEXT: vsetvli a4, zero, e8, mf2, ta, ma
+; CHECK-NEXT: vslidedown.vx v0, v24, a2
+; CHECK-NEXT: addi a2, sp, 16
+; CHECK-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill
+; CHECK-NEXT: vsetvli a2, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v20
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v12
+; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, ma
+; CHECK-NEXT: vfmin.vv v16, v16, v24, v0.t
+; CHECK-NEXT: vsetvli a2, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v12, v16
+; CHECK-NEXT: bltu a0, a1, .LBB11_2
+; CHECK-NEXT: # %bb.1:
+; CHECK-NEXT: mv a0, a1
+; CHECK-NEXT: .LBB11_2:
+; CHECK-NEXT: addi a1, sp, 16
+; CHECK-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v24
+; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v8
+; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma
+; CHECK-NEXT: vfmin.vv v16, v24, v16
+; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: slli a0, a0, 3
+; CHECK-NEXT: add sp, sp, a0
+; CHECK-NEXT: addi sp, sp, 16
+; CHECK-NEXT: ret
+ %v = call <vscale x 32 x bfloat> @llvm.vp.minnum.nxv32bf16(<vscale x 32 x bfloat> %va, <vscale x 32 x bfloat> %vb, <vscale x 32 x i1> splat (i1 true), i32 %evl)
+ ret <vscale x 32 x bfloat> %v
+}
declare <vscale x 1 x half> @llvm.vp.minnum.nxv1f16(<vscale x 1 x half>, <vscale x 1 x half>, <vscale x 1 x i1>, i32)
define <vscale x 1 x half> @vfmin_vv_nxv1f16(<vscale x 1 x half> %va, <vscale x 1 x half> %vb, <vscale x 1 x i1> %m, i32 zeroext %evl) {
@@ -264,10 +529,10 @@ define <vscale x 32 x half> @vfmin_vv_nxv32f16(<vscale x 32 x half> %va, <vscale
; ZVFHMIN-NEXT: vfmin.vv v16, v16, v24, v0.t
; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v16
-; ZVFHMIN-NEXT: bltu a0, a1, .LBB10_2
+; ZVFHMIN-NEXT: bltu a0, a1, .LBB22_2
; ZVFHMIN-NEXT: # %bb.1:
; ZVFHMIN-NEXT: mv a0, a1
-; ZVFHMIN-NEXT: .LBB10_2:
+; ZVFHMIN-NEXT: .LBB22_2:
; ZVFHMIN-NEXT: addi a1, sp, 16
; ZVFHMIN-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload
; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v24
@@ -321,10 +586,10 @@ define <vscale x 32 x half> @vfmin_vv_nxv32f16_unmasked(<vscale x 32 x half> %va
; ZVFHMIN-NEXT: vfmin.vv v16, v16, v24, v0.t
; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v16
-; ZVFHMIN-NEXT: bltu a0, a1, .LBB11_2
+; ZVFHMIN-NEXT: bltu a0, a1, .LBB23_2
; ZVFHMIN-NEXT: # %bb.1:
; ZVFHMIN-NEXT: mv a0, a1
-; ZVFHMIN-NEXT: .LBB11_2:
+; ZVFHMIN-NEXT: .LBB23_2:
; ZVFHMIN-NEXT: addi a1, sp, 16
; ZVFHMIN-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload
; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v24
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfmul-constrained-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vfmul-constrained-sdnode.ll
index e82fdf065574..999b06ba5a57 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfmul-constrained-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfmul-constrained-sdnode.ll
@@ -1,12 +1,239 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v -target-abi=ilp32d \
-; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH
-; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -target-abi=lp64d \
-; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH
-; RUN: llc -mtriple=riscv32 -mattr=+d,+zfhmin,+zvfhmin,+v -target-abi=ilp32d \
-; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN
-; RUN: llc -mtriple=riscv64 -mattr=+d,+zfhmin,+zvfhmin,+v -target-abi=lp64d \
-; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+zfbfmin,+zvfbfmin,+v \
+; RUN: -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s \
+; RUN: --check-prefixes=CHECK,ZVFH
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+zfbfmin,+zvfbfmin,+v \
+; RUN: -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \
+; RUN: --check-prefixes=CHECK,ZVFH
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin,+v \
+; RUN: -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s \
+; RUN: --check-prefixes=CHECK,ZVFHMIN
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin,+v \
+; RUN: -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \
+; RUN: --check-prefixes=CHECK,ZVFHMIN
+
+define <vscale x 1 x bfloat> @vfmul_vv_nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %vb) strictfp {
+; CHECK-LABEL: vfmul_vv_nxv1bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v9
+; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8
+; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
+; CHECK-NEXT: vfmul.vv v9, v9, v10
+; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9
+; CHECK-NEXT: ret
+entry:
+ %vc = call <vscale x 1 x bfloat> @llvm.experimental.constrained.fmul.nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %vb, metadata !"round.dynamic", metadata !"fpexcept.ignore")
+ ret <vscale x 1 x bfloat> %vc
+}
+
+define <vscale x 1 x bfloat> @vfmul_vf_nxv1bf16(<vscale x 1 x bfloat> %va, bfloat %b) strictfp {
+; CHECK-LABEL: vfmul_vf_nxv1bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: fmv.x.h a0, fa0
+; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vmv.v.x v9, a0
+; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8
+; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v9
+; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
+; CHECK-NEXT: vfmul.vv v9, v10, v8
+; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 1 x bfloat> poison, bfloat %b, i32 0
+ %splat = shufflevector <vscale x 1 x bfloat> %head, <vscale x 1 x bfloat> poison, <vscale x 1 x i32> zeroinitializer
+ %vc = call <vscale x 1 x bfloat> @llvm.experimental.constrained.fmul.nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %splat, metadata !"round.dynamic", metadata !"fpexcept.ignore")
+ ret <vscale x 1 x bfloat> %vc
+}
+
+define <vscale x 2 x bfloat> @vfmul_vv_nxv2bf16(<vscale x 2 x bfloat> %va, <vscale x 2 x bfloat> %vb) strictfp {
+; CHECK-LABEL: vfmul_vv_nxv2bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v9
+; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8
+; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; CHECK-NEXT: vfmul.vv v9, v9, v10
+; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9
+; CHECK-NEXT: ret
+entry:
+ %vc = call <vscale x 2 x bfloat> @llvm.experimental.constrained.fmul.nxv2bf16(<vscale x 2 x bfloat> %va, <vscale x 2 x bfloat> %vb, metadata !"round.dynamic", metadata !"fpexcept.ignore")
+ ret <vscale x 2 x bfloat> %vc
+}
+
+define <vscale x 2 x bfloat> @vfmul_vf_nxv2bf16(<vscale x 2 x bfloat> %va, bfloat %b) strictfp {
+; CHECK-LABEL: vfmul_vf_nxv2bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: fmv.x.h a0, fa0
+; CHECK-NEXT: vsetvli a1, zero, e16, mf2, ta, ma
+; CHECK-NEXT: vmv.v.x v9, a0
+; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8
+; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v9
+; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; CHECK-NEXT: vfmul.vv v9, v10, v8
+; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 2 x bfloat> poison, bfloat %b, i32 0
+ %splat = shufflevector <vscale x 2 x bfloat> %head, <vscale x 2 x bfloat> poison, <vscale x 2 x i32> zeroinitializer
+ %vc = call <vscale x 2 x bfloat> @llvm.experimental.constrained.fmul.nxv2bf16(<vscale x 2 x bfloat> %va, <vscale x 2 x bfloat> %splat, metadata !"round.dynamic", metadata !"fpexcept.ignore")
+ ret <vscale x 2 x bfloat> %vc
+}
+
+define <vscale x 4 x bfloat> @vfmul_vv_nxv4bf16(<vscale x 4 x bfloat> %va, <vscale x 4 x bfloat> %vb) strictfp {
+; CHECK-LABEL: vfmul_vv_nxv4bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v9
+; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8
+; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; CHECK-NEXT: vfmul.vv v10, v12, v10
+; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10
+; CHECK-NEXT: ret
+entry:
+ %vc = call <vscale x 4 x bfloat> @llvm.experimental.constrained.fmul.nxv4bf16(<vscale x 4 x bfloat> %va, <vscale x 4 x bfloat> %vb, metadata !"round.dynamic", metadata !"fpexcept.ignore")
+ ret <vscale x 4 x bfloat> %vc
+}
+
+define <vscale x 4 x bfloat> @vfmul_vf_nxv4bf16(<vscale x 4 x bfloat> %va, bfloat %b) strictfp {
+; CHECK-LABEL: vfmul_vf_nxv4bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: fmv.x.h a0, fa0
+; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma
+; CHECK-NEXT: vmv.v.x v9, a0
+; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8
+; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v9
+; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; CHECK-NEXT: vfmul.vv v10, v10, v12
+; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 4 x bfloat> poison, bfloat %b, i32 0
+ %splat = shufflevector <vscale x 4 x bfloat> %head, <vscale x 4 x bfloat> poison, <vscale x 4 x i32> zeroinitializer
+ %vc = call <vscale x 4 x bfloat> @llvm.experimental.constrained.fmul.nxv4bf16(<vscale x 4 x bfloat> %va, <vscale x 4 x bfloat> %splat, metadata !"round.dynamic", metadata !"fpexcept.ignore")
+ ret <vscale x 4 x bfloat> %vc
+}
+
+define <vscale x 8 x bfloat> @vfmul_vv_nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %vb) strictfp {
+; CHECK-LABEL: vfmul_vv_nxv8bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v10
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8
+; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; CHECK-NEXT: vfmul.vv v12, v16, v12
+; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12
+; CHECK-NEXT: ret
+entry:
+ %vc = call <vscale x 8 x bfloat> @llvm.experimental.constrained.fmul.nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %vb, metadata !"round.dynamic", metadata !"fpexcept.ignore")
+ ret <vscale x 8 x bfloat> %vc
+}
+
+define <vscale x 8 x bfloat> @vfmul_vf_nxv8bf16(<vscale x 8 x bfloat> %va, bfloat %b) strictfp {
+; CHECK-LABEL: vfmul_vf_nxv8bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: fmv.x.h a0, fa0
+; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma
+; CHECK-NEXT: vmv.v.x v10, a0
+; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v10
+; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; CHECK-NEXT: vfmul.vv v12, v12, v16
+; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0
+ %splat = shufflevector <vscale x 8 x bfloat> %head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer
+ %vc = call <vscale x 8 x bfloat> @llvm.experimental.constrained.fmul.nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %splat, metadata !"round.dynamic", metadata !"fpexcept.ignore")
+ ret <vscale x 8 x bfloat> %vc
+}
+
+define <vscale x 16 x bfloat> @vfmul_vv_nxv16bf16(<vscale x 16 x bfloat> %va, <vscale x 16 x bfloat> %vb) strictfp {
+; CHECK-LABEL: vfmul_vv_nxv16bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v12
+; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v8
+; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; CHECK-NEXT: vfmul.vv v16, v24, v16
+; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16
+; CHECK-NEXT: ret
+entry:
+ %vc = call <vscale x 16 x bfloat> @llvm.experimental.constrained.fmul.nxv16bf16(<vscale x 16 x bfloat> %va, <vscale x 16 x bfloat> %vb, metadata !"round.dynamic", metadata !"fpexcept.ignore")
+ ret <vscale x 16 x bfloat> %vc
+}
+
+define <vscale x 16 x bfloat> @vfmul_vf_nxv16bf16(<vscale x 16 x bfloat> %va, bfloat %b) strictfp {
+; CHECK-LABEL: vfmul_vf_nxv16bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: fmv.x.h a0, fa0
+; CHECK-NEXT: vsetvli a1, zero, e16, m4, ta, ma
+; CHECK-NEXT: vmv.v.x v12, a0
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8
+; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v12
+; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; CHECK-NEXT: vfmul.vv v16, v16, v24
+; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 16 x bfloat> poison, bfloat %b, i32 0
+ %splat = shufflevector <vscale x 16 x bfloat> %head, <vscale x 16 x bfloat> poison, <vscale x 16 x i32> zeroinitializer
+ %vc = call <vscale x 16 x bfloat> @llvm.experimental.constrained.fmul.nxv16bf16(<vscale x 16 x bfloat> %va, <vscale x 16 x bfloat> %splat, metadata !"round.dynamic", metadata !"fpexcept.ignore")
+ ret <vscale x 16 x bfloat> %vc
+}
+
+define <vscale x 32 x bfloat> @vfmul_vv_nxv32bf16(<vscale x 32 x bfloat> %va, <vscale x 32 x bfloat> %vb) strictfp {
+; CHECK-LABEL: vfmul_vv_nxv32bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v16
+; CHECK-NEXT: vfwcvtbf16.f.f.v v0, v8
+; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; CHECK-NEXT: vfmul.vv v24, v0, v24
+; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v24
+; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v20
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v12
+; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; CHECK-NEXT: vfmul.vv v16, v16, v24
+; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v12, v16
+; CHECK-NEXT: ret
+entry:
+ %vc = call <vscale x 32 x bfloat> @llvm.experimental.constrained.fmul.nxv32bf16(<vscale x 32 x bfloat> %va, <vscale x 32 x bfloat> %vb, metadata !"round.dynamic", metadata !"fpexcept.ignore")
+ ret <vscale x 32 x bfloat> %vc
+}
+
+define <vscale x 32 x bfloat> @vfmul_vf_nxv32bf16(<vscale x 32 x bfloat> %va, bfloat %b) strictfp {
+; CHECK-LABEL: vfmul_vf_nxv32bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: fmv.x.h a0, fa0
+; CHECK-NEXT: vsetvli a1, zero, e16, m8, ta, ma
+; CHECK-NEXT: vmv.v.x v16, a0
+; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v8
+; CHECK-NEXT: vfwcvtbf16.f.f.v v0, v16
+; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; CHECK-NEXT: vfmul.vv v24, v24, v0
+; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v24
+; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v20
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v12
+; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; CHECK-NEXT: vfmul.vv v16, v16, v24
+; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v12, v16
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 32 x bfloat> poison, bfloat %b, i32 0
+ %splat = shufflevector <vscale x 32 x bfloat> %head, <vscale x 32 x bfloat> poison, <vscale x 32 x i32> zeroinitializer
+ %vc = call <vscale x 32 x bfloat> @llvm.experimental.constrained.fmul.nxv32bf16(<vscale x 32 x bfloat> %va, <vscale x 32 x bfloat> %splat, metadata !"round.dynamic", metadata !"fpexcept.ignore")
+ ret <vscale x 32 x bfloat> %vc
+}
declare <vscale x 1 x half> @llvm.experimental.constrained.fmul.nxv1f16(<vscale x 1 x half>, <vscale x 1 x half>, metadata, metadata)
define <vscale x 1 x half> @vfmul_vv_nxv1f16(<vscale x 1 x half> %va, <vscale x 1 x half> %vb) strictfp {
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfmul-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vfmul-sdnode.ll
index 70d664aa50ec..2ab04a45c818 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfmul-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfmul-sdnode.ll
@@ -1,12 +1,252 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v -target-abi=ilp32d \
-; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH
-; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -target-abi=lp64d \
-; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH
-; RUN: llc -mtriple=riscv32 -mattr=+d,+zfhmin,+zvfhmin,+v -target-abi=ilp32d \
-; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN
-; RUN: llc -mtriple=riscv64 -mattr=+d,+zfhmin,+zvfhmin,+v -target-abi=lp64d \
-; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+zfbfmin,+zvfbfmin,+v \
+; RUN: -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s \
+; RUN: --check-prefixes=CHECK,ZVFH
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+zfbfmin,+zvfbfmin,+v \
+; RUN: -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \
+; RUN: --check-prefixes=CHECK,ZVFH
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin,+v \
+; RUN: -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s \
+; RUN: --check-prefixes=CHECK,ZVFHMIN
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin,+v \
+; RUN: -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \
+; RUN: --check-prefixes=CHECK,ZVFHMIN
+
+define <vscale x 1 x bfloat> @vfmul_vv_nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %vb) {
+; CHECK-LABEL: vfmul_vv_nxv1bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v9
+; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8
+; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
+; CHECK-NEXT: vfmul.vv v9, v9, v10
+; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9
+; CHECK-NEXT: ret
+ %vc = fmul <vscale x 1 x bfloat> %va, %vb
+ ret <vscale x 1 x bfloat> %vc
+}
+
+define <vscale x 1 x bfloat> @vfmul_vf_nxv1bf16(<vscale x 1 x bfloat> %va, bfloat %b) {
+; CHECK-LABEL: vfmul_vf_nxv1bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: fmv.x.h a0, fa0
+; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vmv.v.x v9, a0
+; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8
+; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v9
+; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
+; CHECK-NEXT: vfmul.vv v9, v10, v8
+; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 1 x bfloat> poison, bfloat %b, i32 0
+ %splat = shufflevector <vscale x 1 x bfloat> %head, <vscale x 1 x bfloat> poison, <vscale x 1 x i32> zeroinitializer
+ %vc = fmul <vscale x 1 x bfloat> %va, %splat
+ ret <vscale x 1 x bfloat> %vc
+}
+
+define <vscale x 2 x bfloat> @vfmul_vv_nxv2bf16(<vscale x 2 x bfloat> %va, <vscale x 2 x bfloat> %vb) {
+; CHECK-LABEL: vfmul_vv_nxv2bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v9
+; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8
+; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; CHECK-NEXT: vfmul.vv v9, v9, v10
+; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9
+; CHECK-NEXT: ret
+ %vc = fmul <vscale x 2 x bfloat> %va, %vb
+ ret <vscale x 2 x bfloat> %vc
+}
+
+define <vscale x 2 x bfloat> @vfmul_vf_nxv2bf16(<vscale x 2 x bfloat> %va, bfloat %b) {
+; CHECK-LABEL: vfmul_vf_nxv2bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: fmv.x.h a0, fa0
+; CHECK-NEXT: vsetvli a1, zero, e16, mf2, ta, ma
+; CHECK-NEXT: vmv.v.x v9, a0
+; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8
+; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v9
+; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; CHECK-NEXT: vfmul.vv v9, v10, v8
+; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 2 x bfloat> poison, bfloat %b, i32 0
+ %splat = shufflevector <vscale x 2 x bfloat> %head, <vscale x 2 x bfloat> poison, <vscale x 2 x i32> zeroinitializer
+ %vc = fmul <vscale x 2 x bfloat> %va, %splat
+ ret <vscale x 2 x bfloat> %vc
+}
+
+define <vscale x 4 x bfloat> @vfmul_vv_nxv4bf16(<vscale x 4 x bfloat> %va, <vscale x 4 x bfloat> %vb) {
+; CHECK-LABEL: vfmul_vv_nxv4bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v9
+; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8
+; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; CHECK-NEXT: vfmul.vv v10, v12, v10
+; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10
+; CHECK-NEXT: ret
+ %vc = fmul <vscale x 4 x bfloat> %va, %vb
+ ret <vscale x 4 x bfloat> %vc
+}
+
+define <vscale x 4 x bfloat> @vfmul_vf_nxv4bf16(<vscale x 4 x bfloat> %va, bfloat %b) {
+; CHECK-LABEL: vfmul_vf_nxv4bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: fmv.x.h a0, fa0
+; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma
+; CHECK-NEXT: vmv.v.x v9, a0
+; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8
+; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v9
+; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; CHECK-NEXT: vfmul.vv v10, v10, v12
+; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 4 x bfloat> poison, bfloat %b, i32 0
+ %splat = shufflevector <vscale x 4 x bfloat> %head, <vscale x 4 x bfloat> poison, <vscale x 4 x i32> zeroinitializer
+ %vc = fmul <vscale x 4 x bfloat> %va, %splat
+ ret <vscale x 4 x bfloat> %vc
+}
+
+define <vscale x 8 x bfloat> @vfmul_vv_nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %vb) {
+; CHECK-LABEL: vfmul_vv_nxv8bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v10
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8
+; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; CHECK-NEXT: vfmul.vv v12, v16, v12
+; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12
+; CHECK-NEXT: ret
+ %vc = fmul <vscale x 8 x bfloat> %va, %vb
+ ret <vscale x 8 x bfloat> %vc
+}
+
+define <vscale x 8 x bfloat> @vfmul_vf_nxv8bf16(<vscale x 8 x bfloat> %va, bfloat %b) {
+; CHECK-LABEL: vfmul_vf_nxv8bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: fmv.x.h a0, fa0
+; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma
+; CHECK-NEXT: vmv.v.x v10, a0
+; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v10
+; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; CHECK-NEXT: vfmul.vv v12, v12, v16
+; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0
+ %splat = shufflevector <vscale x 8 x bfloat> %head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer
+ %vc = fmul <vscale x 8 x bfloat> %va, %splat
+ ret <vscale x 8 x bfloat> %vc
+}
+
+define <vscale x 8 x bfloat> @vfmul_fv_nxv8bf16(<vscale x 8 x bfloat> %va, bfloat %b) {
+; CHECK-LABEL: vfmul_fv_nxv8bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: fmv.x.h a0, fa0
+; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma
+; CHECK-NEXT: vmv.v.x v10, a0
+; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v10
+; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; CHECK-NEXT: vfmul.vv v12, v16, v12
+; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0
+ %splat = shufflevector <vscale x 8 x bfloat> %head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer
+ %vc = fmul <vscale x 8 x bfloat> %splat, %va
+ ret <vscale x 8 x bfloat> %vc
+}
+
+define <vscale x 16 x bfloat> @vfmul_vv_nxv16bf16(<vscale x 16 x bfloat> %va, <vscale x 16 x bfloat> %vb) {
+; CHECK-LABEL: vfmul_vv_nxv16bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v12
+; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v8
+; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; CHECK-NEXT: vfmul.vv v16, v24, v16
+; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16
+; CHECK-NEXT: ret
+ %vc = fmul <vscale x 16 x bfloat> %va, %vb
+ ret <vscale x 16 x bfloat> %vc
+}
+
+define <vscale x 16 x bfloat> @vfmul_vf_nxv16bf16(<vscale x 16 x bfloat> %va, bfloat %b) {
+; CHECK-LABEL: vfmul_vf_nxv16bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: fmv.x.h a0, fa0
+; CHECK-NEXT: vsetvli a1, zero, e16, m4, ta, ma
+; CHECK-NEXT: vmv.v.x v12, a0
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8
+; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v12
+; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; CHECK-NEXT: vfmul.vv v16, v16, v24
+; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 16 x bfloat> poison, bfloat %b, i32 0
+ %splat = shufflevector <vscale x 16 x bfloat> %head, <vscale x 16 x bfloat> poison, <vscale x 16 x i32> zeroinitializer
+ %vc = fmul <vscale x 16 x bfloat> %va, %splat
+ ret <vscale x 16 x bfloat> %vc
+}
+
+define <vscale x 32 x bfloat> @vfmul_vv_nxv32bf16(<vscale x 32 x bfloat> %va, <vscale x 32 x bfloat> %vb) {
+; CHECK-LABEL: vfmul_vv_nxv32bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v16
+; CHECK-NEXT: vfwcvtbf16.f.f.v v0, v8
+; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; CHECK-NEXT: vfmul.vv v24, v0, v24
+; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v24
+; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v20
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v12
+; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; CHECK-NEXT: vfmul.vv v16, v16, v24
+; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v12, v16
+; CHECK-NEXT: ret
+ %vc = fmul <vscale x 32 x bfloat> %va, %vb
+ ret <vscale x 32 x bfloat> %vc
+}
+
+define <vscale x 32 x bfloat> @vfmul_vf_nxv32bf16(<vscale x 32 x bfloat> %va, bfloat %b) {
+; CHECK-LABEL: vfmul_vf_nxv32bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: fmv.x.h a0, fa0
+; CHECK-NEXT: vsetvli a1, zero, e16, m8, ta, ma
+; CHECK-NEXT: vmv.v.x v16, a0
+; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v8
+; CHECK-NEXT: vfwcvtbf16.f.f.v v0, v16
+; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; CHECK-NEXT: vfmul.vv v24, v24, v0
+; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v24
+; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v12
+; CHECK-NEXT: vfwcvtbf16.f.f.v v0, v20
+; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; CHECK-NEXT: vfmul.vv v16, v24, v0
+; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v12, v16
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 32 x bfloat> poison, bfloat %b, i32 0
+ %splat = shufflevector <vscale x 32 x bfloat> %head, <vscale x 32 x bfloat> poison, <vscale x 32 x i32> zeroinitializer
+ %vc = fmul <vscale x 32 x bfloat> %va, %splat
+ ret <vscale x 32 x bfloat> %vc
+}
define <vscale x 1 x half> @vfmul_vv_nxv1f16(<vscale x 1 x half> %va, <vscale x 1 x half> %vb) {
; ZVFH-LABEL: vfmul_vv_nxv1f16:
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfsqrt-constrained-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vfsqrt-constrained-sdnode.ll
index 806b817fd6c4..9da1e0a576d5 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfsqrt-constrained-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfsqrt-constrained-sdnode.ll
@@ -1,12 +1,110 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v -target-abi=ilp32d \
-; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH
-; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -target-abi=lp64d \
-; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH
-; RUN: llc -mtriple=riscv32 -mattr=+d,+zfhmin,+zvfhmin,+v -target-abi=ilp32d \
-; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN
-; RUN: llc -mtriple=riscv64 -mattr=+d,+zfhmin,+zvfhmin,+v -target-abi=lp64d \
-; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+zfbfmin,+zvfbfmin,+v \
+; RUN: -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s \
+; RUN: --check-prefixes=CHECK,ZVFH
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+zfbfmin,+zvfbfmin,+v \
+; RUN: -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \
+; RUN: --check-prefixes=CHECK,ZVFH
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin,+v \
+; RUN: -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s \
+; RUN: --check-prefixes=CHECK,ZVFHMIN
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin,+v \
+; RUN: -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \
+; RUN: --check-prefixes=CHECK,ZVFHMIN
+
+define <vscale x 1 x bfloat> @vfsqrt_nxv1bf16(<vscale x 1 x bfloat> %v) strictfp {
+; CHECK-LABEL: vfsqrt_nxv1bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8
+; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
+; CHECK-NEXT: vfsqrt.v v9, v9
+; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9
+; CHECK-NEXT: ret
+ %r = call <vscale x 1 x bfloat> @llvm.experimental.constrained.sqrt.nxv1bf16(<vscale x 1 x bfloat> %v, metadata !"round.dynamic", metadata !"fpexcept.strict")
+ ret <vscale x 1 x bfloat> %r
+}
+
+
+define <vscale x 2 x bfloat> @vfsqrt_nxv2bf16(<vscale x 2 x bfloat> %v) strictfp {
+; CHECK-LABEL: vfsqrt_nxv2bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8
+; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; CHECK-NEXT: vfsqrt.v v9, v9
+; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9
+; CHECK-NEXT: ret
+ %r = call <vscale x 2 x bfloat> @llvm.experimental.constrained.sqrt.nxv2bf16(<vscale x 2 x bfloat> %v, metadata !"round.dynamic", metadata !"fpexcept.strict")
+ ret <vscale x 2 x bfloat> %r
+}
+
+
+define <vscale x 4 x bfloat> @vfsqrt_nxv4bf16(<vscale x 4 x bfloat> %v) strictfp {
+; CHECK-LABEL: vfsqrt_nxv4bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8
+; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; CHECK-NEXT: vfsqrt.v v10, v10
+; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10
+; CHECK-NEXT: ret
+ %r = call <vscale x 4 x bfloat> @llvm.experimental.constrained.sqrt.nxv4bf16(<vscale x 4 x bfloat> %v, metadata !"round.dynamic", metadata !"fpexcept.strict")
+ ret <vscale x 4 x bfloat> %r
+}
+
+
+define <vscale x 8 x bfloat> @vfsqrt_nxv8bf16(<vscale x 8 x bfloat> %v) strictfp {
+; CHECK-LABEL: vfsqrt_nxv8bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8
+; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; CHECK-NEXT: vfsqrt.v v12, v12
+; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12
+; CHECK-NEXT: ret
+ %r = call <vscale x 8 x bfloat> @llvm.experimental.constrained.sqrt.nxv8bf16(<vscale x 8 x bfloat> %v, metadata !"round.dynamic", metadata !"fpexcept.strict")
+ ret <vscale x 8 x bfloat> %r
+}
+
+
+define <vscale x 16 x bfloat> @vfsqrt_nxv16bf16(<vscale x 16 x bfloat> %v) strictfp {
+; CHECK-LABEL: vfsqrt_nxv16bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8
+; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; CHECK-NEXT: vfsqrt.v v16, v16
+; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16
+; CHECK-NEXT: ret
+ %r = call <vscale x 16 x bfloat> @llvm.experimental.constrained.sqrt.nxv16bf16(<vscale x 16 x bfloat> %v, metadata !"round.dynamic", metadata !"fpexcept.strict")
+ ret <vscale x 16 x bfloat> %r
+}
+
+
+define <vscale x 32 x bfloat> @vfsqrt_nxv32bf16(<vscale x 32 x bfloat> %v) strictfp {
+; CHECK-LABEL: vfsqrt_nxv32bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8
+; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; CHECK-NEXT: vfsqrt.v v16, v16
+; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v12
+; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; CHECK-NEXT: vfsqrt.v v16, v16
+; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v12, v16
+; CHECK-NEXT: ret
+ %r = call <vscale x 32 x bfloat> @llvm.experimental.constrained.sqrt.nxv32bf16(<vscale x 32 x bfloat> %v, metadata !"round.dynamic", metadata !"fpexcept.strict")
+ ret <vscale x 32 x bfloat> %r
+}
declare <vscale x 1 x half> @llvm.experimental.constrained.sqrt.nxv1f16(<vscale x 1 x half>, metadata, metadata)
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfsqrt-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vfsqrt-sdnode.ll
index 329a078cd166..de31a02cd154 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfsqrt-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfsqrt-sdnode.ll
@@ -1,12 +1,105 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v -target-abi=ilp32d \
-; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH
-; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -target-abi=lp64d \
-; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH
-; RUN: llc -mtriple=riscv32 -mattr=+d,+zfhmin,+zvfhmin,+v -target-abi=ilp32d \
-; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN
-; RUN: llc -mtriple=riscv64 -mattr=+d,+zfhmin,+zvfhmin,+v -target-abi=lp64d \
-; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+zfbfmin,+zvfbfmin,+v \
+; RUN: -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s \
+; RUN: --check-prefixes=CHECK,ZVFH
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+zfbfmin,+zvfbfmin,+v \
+; RUN: -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \
+; RUN: --check-prefixes=CHECK,ZVFH
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin,+v \
+; RUN: -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s \
+; RUN: --check-prefixes=CHECK,ZVFHMIN
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin,+v \
+; RUN: -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \
+; RUN: --check-prefixes=CHECK,ZVFHMIN
+
+define <vscale x 1 x bfloat> @vfsqrt_nxv1bf16(<vscale x 1 x bfloat> %v) {
+; CHECK-LABEL: vfsqrt_nxv1bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8
+; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
+; CHECK-NEXT: vfsqrt.v v9, v9
+; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9
+; CHECK-NEXT: ret
+ %r = call <vscale x 1 x bfloat> @llvm.sqrt.nxv1bf16(<vscale x 1 x bfloat> %v)
+ ret <vscale x 1 x bfloat> %r
+}
+
+define <vscale x 2 x bfloat> @vfsqrt_nxv2bf16(<vscale x 2 x bfloat> %v) {
+; CHECK-LABEL: vfsqrt_nxv2bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8
+; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; CHECK-NEXT: vfsqrt.v v9, v9
+; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9
+; CHECK-NEXT: ret
+ %r = call <vscale x 2 x bfloat> @llvm.sqrt.nxv2bf16(<vscale x 2 x bfloat> %v)
+ ret <vscale x 2 x bfloat> %r
+}
+
+define <vscale x 4 x bfloat> @vfsqrt_nxv4bf16(<vscale x 4 x bfloat> %v) {
+; CHECK-LABEL: vfsqrt_nxv4bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8
+; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; CHECK-NEXT: vfsqrt.v v10, v10
+; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10
+; CHECK-NEXT: ret
+ %r = call <vscale x 4 x bfloat> @llvm.sqrt.nxv4bf16(<vscale x 4 x bfloat> %v)
+ ret <vscale x 4 x bfloat> %r
+}
+
+define <vscale x 8 x bfloat> @vfsqrt_nxv8bf16(<vscale x 8 x bfloat> %v) {
+; CHECK-LABEL: vfsqrt_nxv8bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8
+; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; CHECK-NEXT: vfsqrt.v v12, v12
+; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12
+; CHECK-NEXT: ret
+ %r = call <vscale x 8 x bfloat> @llvm.sqrt.nxv8bf16(<vscale x 8 x bfloat> %v)
+ ret <vscale x 8 x bfloat> %r
+}
+
+define <vscale x 16 x bfloat> @vfsqrt_nxv16bf16(<vscale x 16 x bfloat> %v) {
+; CHECK-LABEL: vfsqrt_nxv16bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8
+; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; CHECK-NEXT: vfsqrt.v v16, v16
+; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16
+; CHECK-NEXT: ret
+ %r = call <vscale x 16 x bfloat> @llvm.sqrt.nxv16bf16(<vscale x 16 x bfloat> %v)
+ ret <vscale x 16 x bfloat> %r
+}
+
+define <vscale x 32 x bfloat> @vfsqrt_nxv32bf16(<vscale x 32 x bfloat> %v) {
+; CHECK-LABEL: vfsqrt_nxv32bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8
+; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; CHECK-NEXT: vfsqrt.v v16, v16
+; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v12
+; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; CHECK-NEXT: vfsqrt.v v16, v16
+; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v12, v16
+; CHECK-NEXT: ret
+ %r = call <vscale x 32 x bfloat> @llvm.sqrt.nxv32bf16(<vscale x 32 x bfloat> %v)
+ ret <vscale x 32 x bfloat> %r
+}
declare <vscale x 1 x half> @llvm.sqrt.nxv1f16(<vscale x 1 x half>)
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfsqrt-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vfsqrt-vp.ll
index bd229e0220a4..574c2e052630 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfsqrt-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfsqrt-vp.ll
@@ -1,13 +1,236 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v -target-abi=ilp32d \
-; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH
-; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -target-abi=lp64d \
-; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH
-; RUN: llc -mtriple=riscv32 -mattr=+d,+zfhmin,+zvfhmin,+v -target-abi=ilp32d \
-; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN
-; RUN: llc -mtriple=riscv64 -mattr=+d,+zfhmin,+zvfhmin,+v -target-abi=lp64d \
-; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+zfbfmin,+zvfbfmin,+v \
+; RUN: -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s \
+; RUN: --check-prefixes=CHECK,ZVFH
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+zfbfmin,+zvfbfmin,+v \
+; RUN: -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \
+; RUN: --check-prefixes=CHECK,ZVFH
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin,+v \
+; RUN: -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s \
+; RUN: --check-prefixes=CHECK,ZVFHMIN
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin,+v \
+; RUN: -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \
+; RUN: --check-prefixes=CHECK,ZVFHMIN
+
+declare <vscale x 1 x bfloat> @llvm.vp.sqrt.nxv1bf16(<vscale x 1 x bfloat>, <vscale x 1 x i1>, i32)
+
+define <vscale x 1 x bfloat> @vfsqrt_vv_nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vfsqrt_vv_nxv1bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8
+; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-NEXT: vfsqrt.v v9, v9, v0.t
+; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9
+; CHECK-NEXT: ret
+ %v = call <vscale x 1 x bfloat> @llvm.vp.sqrt.nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x i1> %m, i32 %evl)
+ ret <vscale x 1 x bfloat> %v
+}
+
+define <vscale x 1 x bfloat> @vfsqrt_vv_nxv1bf16_unmasked(<vscale x 1 x bfloat> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vfsqrt_vv_nxv1bf16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8
+; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-NEXT: vfsqrt.v v9, v9
+; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9
+; CHECK-NEXT: ret
+ %v = call <vscale x 1 x bfloat> @llvm.vp.sqrt.nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x i1> splat (i1 true), i32 %evl)
+ ret <vscale x 1 x bfloat> %v
+}
+
+declare <vscale x 2 x bfloat> @llvm.vp.sqrt.nxv2bf16(<vscale x 2 x bfloat>, <vscale x 2 x i1>, i32)
+
+define <vscale x 2 x bfloat> @vfsqrt_vv_nxv2bf16(<vscale x 2 x bfloat> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vfsqrt_vv_nxv2bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, zero, e16, mf2, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8
+; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-NEXT: vfsqrt.v v9, v9, v0.t
+; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9
+; CHECK-NEXT: ret
+ %v = call <vscale x 2 x bfloat> @llvm.vp.sqrt.nxv2bf16(<vscale x 2 x bfloat> %va, <vscale x 2 x i1> %m, i32 %evl)
+ ret <vscale x 2 x bfloat> %v
+}
+
+define <vscale x 2 x bfloat> @vfsqrt_vv_nxv2bf16_unmasked(<vscale x 2 x bfloat> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vfsqrt_vv_nxv2bf16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, zero, e16, mf2, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8
+; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-NEXT: vfsqrt.v v9, v9
+; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9
+; CHECK-NEXT: ret
+ %v = call <vscale x 2 x bfloat> @llvm.vp.sqrt.nxv2bf16(<vscale x 2 x bfloat> %va, <vscale x 2 x i1> splat (i1 true), i32 %evl)
+ ret <vscale x 2 x bfloat> %v
+}
+
+declare <vscale x 4 x bfloat> @llvm.vp.sqrt.nxv4bf16(<vscale x 4 x bfloat>, <vscale x 4 x i1>, i32)
+
+define <vscale x 4 x bfloat> @vfsqrt_vv_nxv4bf16(<vscale x 4 x bfloat> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vfsqrt_vv_nxv4bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8
+; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; CHECK-NEXT: vfsqrt.v v10, v10, v0.t
+; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10
+; CHECK-NEXT: ret
+ %v = call <vscale x 4 x bfloat> @llvm.vp.sqrt.nxv4bf16(<vscale x 4 x bfloat> %va, <vscale x 4 x i1> %m, i32 %evl)
+ ret <vscale x 4 x bfloat> %v
+}
+
+define <vscale x 4 x bfloat> @vfsqrt_vv_nxv4bf16_unmasked(<vscale x 4 x bfloat> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vfsqrt_vv_nxv4bf16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8
+; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; CHECK-NEXT: vfsqrt.v v10, v10
+; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10
+; CHECK-NEXT: ret
+ %v = call <vscale x 4 x bfloat> @llvm.vp.sqrt.nxv4bf16(<vscale x 4 x bfloat> %va, <vscale x 4 x i1> splat (i1 true), i32 %evl)
+ ret <vscale x 4 x bfloat> %v
+}
+
+declare <vscale x 8 x bfloat> @llvm.vp.sqrt.nxv8bf16(<vscale x 8 x bfloat>, <vscale x 8 x i1>, i32)
+
+define <vscale x 8 x bfloat> @vfsqrt_vv_nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vfsqrt_vv_nxv8bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8
+; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-NEXT: vfsqrt.v v12, v12, v0.t
+; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12
+; CHECK-NEXT: ret
+ %v = call <vscale x 8 x bfloat> @llvm.vp.sqrt.nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x i1> %m, i32 %evl)
+ ret <vscale x 8 x bfloat> %v
+}
+
+define <vscale x 8 x bfloat> @vfsqrt_vv_nxv8bf16_unmasked(<vscale x 8 x bfloat> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vfsqrt_vv_nxv8bf16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8
+; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-NEXT: vfsqrt.v v12, v12
+; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12
+; CHECK-NEXT: ret
+ %v = call <vscale x 8 x bfloat> @llvm.vp.sqrt.nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x i1> splat (i1 true), i32 %evl)
+ ret <vscale x 8 x bfloat> %v
+}
+
+declare <vscale x 16 x bfloat> @llvm.vp.sqrt.nxv16bf16(<vscale x 16 x bfloat>, <vscale x 16 x i1>, i32)
+
+define <vscale x 16 x bfloat> @vfsqrt_vv_nxv16bf16(<vscale x 16 x bfloat> %va, <vscale x 16 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vfsqrt_vv_nxv16bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8
+; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma
+; CHECK-NEXT: vfsqrt.v v16, v16, v0.t
+; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16
+; CHECK-NEXT: ret
+ %v = call <vscale x 16 x bfloat> @llvm.vp.sqrt.nxv16bf16(<vscale x 16 x bfloat> %va, <vscale x 16 x i1> %m, i32 %evl)
+ ret <vscale x 16 x bfloat> %v
+}
+
+define <vscale x 16 x bfloat> @vfsqrt_vv_nxv16bf16_unmasked(<vscale x 16 x bfloat> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vfsqrt_vv_nxv16bf16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8
+; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma
+; CHECK-NEXT: vfsqrt.v v16, v16
+; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16
+; CHECK-NEXT: ret
+ %v = call <vscale x 16 x bfloat> @llvm.vp.sqrt.nxv16bf16(<vscale x 16 x bfloat> %va, <vscale x 16 x i1> splat (i1 true), i32 %evl)
+ ret <vscale x 16 x bfloat> %v
+}
+
+declare <vscale x 32 x bfloat> @llvm.vp.sqrt.nxv32bf16(<vscale x 32 x bfloat>, <vscale x 32 x i1>, i32)
+
+define <vscale x 32 x bfloat> @vfsqrt_vv_nxv32bf16(<vscale x 32 x bfloat> %va, <vscale x 32 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vfsqrt_vv_nxv32bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vmv1r.v v16, v0
+; CHECK-NEXT: csrr a2, vlenb
+; CHECK-NEXT: slli a1, a2, 1
+; CHECK-NEXT: sub a3, a0, a1
+; CHECK-NEXT: sltu a4, a0, a3
+; CHECK-NEXT: addi a4, a4, -1
+; CHECK-NEXT: and a3, a4, a3
+; CHECK-NEXT: srli a2, a2, 2
+; CHECK-NEXT: vsetvli a4, zero, e8, mf2, ta, ma
+; CHECK-NEXT: vslidedown.vx v0, v0, a2
+; CHECK-NEXT: vsetvli a2, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v12
+; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, ma
+; CHECK-NEXT: vfsqrt.v v24, v24, v0.t
+; CHECK-NEXT: vsetvli a2, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v12, v24
+; CHECK-NEXT: bltu a0, a1, .LBB10_2
+; CHECK-NEXT: # %bb.1:
+; CHECK-NEXT: mv a0, a1
+; CHECK-NEXT: .LBB10_2:
+; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v8
+; CHECK-NEXT: vmv1r.v v0, v16
+; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma
+; CHECK-NEXT: vfsqrt.v v16, v24, v0.t
+; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16
+; CHECK-NEXT: ret
+ %v = call <vscale x 32 x bfloat> @llvm.vp.sqrt.nxv32bf16(<vscale x 32 x bfloat> %va, <vscale x 32 x i1> %m, i32 %evl)
+ ret <vscale x 32 x bfloat> %v
+}
+define <vscale x 32 x bfloat> @vfsqrt_vv_nxv32bf16_unmasked(<vscale x 32 x bfloat> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vfsqrt_vv_nxv32bf16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: csrr a2, vlenb
+; CHECK-NEXT: slli a1, a2, 1
+; CHECK-NEXT: sub a3, a0, a1
+; CHECK-NEXT: sltu a4, a0, a3
+; CHECK-NEXT: addi a4, a4, -1
+; CHECK-NEXT: and a3, a4, a3
+; CHECK-NEXT: srli a2, a2, 2
+; CHECK-NEXT: vsetvli a4, zero, e8, m4, ta, ma
+; CHECK-NEXT: vmset.m v16
+; CHECK-NEXT: vsetvli a4, zero, e8, mf2, ta, ma
+; CHECK-NEXT: vslidedown.vx v0, v16, a2
+; CHECK-NEXT: vsetvli a2, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v12
+; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, ma
+; CHECK-NEXT: vfsqrt.v v16, v16, v0.t
+; CHECK-NEXT: vsetvli a2, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v12, v16
+; CHECK-NEXT: bltu a0, a1, .LBB11_2
+; CHECK-NEXT: # %bb.1:
+; CHECK-NEXT: mv a0, a1
+; CHECK-NEXT: .LBB11_2:
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8
+; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma
+; CHECK-NEXT: vfsqrt.v v16, v16
+; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16
+; CHECK-NEXT: ret
+ %v = call <vscale x 32 x bfloat> @llvm.vp.sqrt.nxv32bf16(<vscale x 32 x bfloat> %va, <vscale x 32 x i1> splat (i1 true), i32 %evl)
+ ret <vscale x 32 x bfloat> %v
+}
declare <vscale x 1 x half> @llvm.vp.sqrt.nxv1f16(<vscale x 1 x half>, <vscale x 1 x i1>, i32)
define <vscale x 1 x half> @vfsqrt_vv_nxv1f16(<vscale x 1 x half> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) {
@@ -245,10 +468,10 @@ define <vscale x 32 x half> @vfsqrt_vv_nxv32f16(<vscale x 32 x half> %va, <vscal
; ZVFHMIN-NEXT: vfsqrt.v v24, v24, v0.t
; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v24
-; ZVFHMIN-NEXT: bltu a0, a1, .LBB10_2
+; ZVFHMIN-NEXT: bltu a0, a1, .LBB22_2
; ZVFHMIN-NEXT: # %bb.1:
; ZVFHMIN-NEXT: mv a0, a1
-; ZVFHMIN-NEXT: .LBB10_2:
+; ZVFHMIN-NEXT: .LBB22_2:
; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v8
; ZVFHMIN-NEXT: vmv1r.v v0, v16
; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m8, ta, ma
@@ -286,10 +509,10 @@ define <vscale x 32 x half> @vfsqrt_vv_nxv32f16_unmasked(<vscale x 32 x half> %v
; ZVFHMIN-NEXT: vfsqrt.v v16, v16, v0.t
; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v16
-; ZVFHMIN-NEXT: bltu a0, a1, .LBB11_2
+; ZVFHMIN-NEXT: bltu a0, a1, .LBB23_2
; ZVFHMIN-NEXT: # %bb.1:
; ZVFHMIN-NEXT: mv a0, a1
-; ZVFHMIN-NEXT: .LBB11_2:
+; ZVFHMIN-NEXT: .LBB23_2:
; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8
; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m8, ta, ma
; ZVFHMIN-NEXT: vfsqrt.v v16, v16
@@ -537,10 +760,10 @@ define <vscale x 16 x double> @vfsqrt_vv_nxv16f64(<vscale x 16 x double> %va, <v
; CHECK-NEXT: and a2, a3, a2
; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma
; CHECK-NEXT: vfsqrt.v v16, v16, v0.t
-; CHECK-NEXT: bltu a0, a1, .LBB32_2
+; CHECK-NEXT: bltu a0, a1, .LBB44_2
; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: mv a0, a1
-; CHECK-NEXT: .LBB32_2:
+; CHECK-NEXT: .LBB44_2:
; CHECK-NEXT: vmv1r.v v0, v24
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; CHECK-NEXT: vfsqrt.v v8, v8, v0.t
@@ -559,10 +782,10 @@ define <vscale x 16 x double> @vfsqrt_vv_nxv16f64_unmasked(<vscale x 16 x double
; CHECK-NEXT: and a2, a3, a2
; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma
; CHECK-NEXT: vfsqrt.v v16, v16
-; CHECK-NEXT: bltu a0, a1, .LBB33_2
+; CHECK-NEXT: bltu a0, a1, .LBB45_2
; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: mv a0, a1
-; CHECK-NEXT: .LBB33_2:
+; CHECK-NEXT: .LBB45_2:
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; CHECK-NEXT: vfsqrt.v v8, v8
; CHECK-NEXT: ret
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfsub-constrained-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vfsub-constrained-sdnode.ll
index 5729dc4875ae..e40427a305f6 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfsub-constrained-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfsub-constrained-sdnode.ll
@@ -1,12 +1,258 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v -target-abi=ilp32d \
-; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH
-; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -target-abi=lp64d \
-; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH
-; RUN: llc -mtriple=riscv32 -mattr=+d,+zfhmin,+zvfhmin,+v -target-abi=ilp32d \
-; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN
-; RUN: llc -mtriple=riscv64 -mattr=+d,+zfhmin,+zvfhmin,+v -target-abi=lp64d \
-; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+zfbfmin,+zvfbfmin,+v \
+; RUN: -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s \
+; RUN: --check-prefixes=CHECK,ZVFH
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+zfbfmin,+zvfbfmin,+v \
+; RUN: -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \
+; RUN: --check-prefixes=CHECK,ZVFH
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin,+v \
+; RUN: -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s \
+; RUN: --check-prefixes=CHECK,ZVFHMIN
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin,+v \
+; RUN: -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \
+; RUN: --check-prefixes=CHECK,ZVFHMIN
+
+define <vscale x 1 x bfloat> @vfsub_vv_nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %vb) strictfp {
+; CHECK-LABEL: vfsub_vv_nxv1bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v9
+; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8
+; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
+; CHECK-NEXT: vfsub.vv v9, v9, v10
+; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9
+; CHECK-NEXT: ret
+entry:
+ %vc = call <vscale x 1 x bfloat> @llvm.experimental.constrained.fsub.nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %vb, metadata !"round.dynamic", metadata !"fpexcept.ignore")
+ ret <vscale x 1 x bfloat> %vc
+}
+
+define <vscale x 1 x bfloat> @vfsub_vf_nxv1bf16(<vscale x 1 x bfloat> %va, bfloat %b) strictfp {
+; CHECK-LABEL: vfsub_vf_nxv1bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: fmv.x.h a0, fa0
+; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vmv.v.x v9, a0
+; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8
+; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v9
+; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
+; CHECK-NEXT: vfsub.vv v9, v10, v8
+; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 1 x bfloat> poison, bfloat %b, i32 0
+ %splat = shufflevector <vscale x 1 x bfloat> %head, <vscale x 1 x bfloat> poison, <vscale x 1 x i32> zeroinitializer
+ %vc = call <vscale x 1 x bfloat> @llvm.experimental.constrained.fsub.nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %splat, metadata !"round.dynamic", metadata !"fpexcept.ignore")
+ ret <vscale x 1 x bfloat> %vc
+}
+
+define <vscale x 2 x bfloat> @vfsub_vv_nxv2bf16(<vscale x 2 x bfloat> %va, <vscale x 2 x bfloat> %vb) strictfp {
+; CHECK-LABEL: vfsub_vv_nxv2bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v9
+; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8
+; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; CHECK-NEXT: vfsub.vv v9, v9, v10
+; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9
+; CHECK-NEXT: ret
+entry:
+ %vc = call <vscale x 2 x bfloat> @llvm.experimental.constrained.fsub.nxv2bf16(<vscale x 2 x bfloat> %va, <vscale x 2 x bfloat> %vb, metadata !"round.dynamic", metadata !"fpexcept.ignore")
+ ret <vscale x 2 x bfloat> %vc
+}
+
+define <vscale x 2 x bfloat> @vfsub_vf_nxv2bf16(<vscale x 2 x bfloat> %va, bfloat %b) strictfp {
+; CHECK-LABEL: vfsub_vf_nxv2bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: fmv.x.h a0, fa0
+; CHECK-NEXT: vsetvli a1, zero, e16, mf2, ta, ma
+; CHECK-NEXT: vmv.v.x v9, a0
+; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8
+; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v9
+; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; CHECK-NEXT: vfsub.vv v9, v10, v8
+; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 2 x bfloat> poison, bfloat %b, i32 0
+ %splat = shufflevector <vscale x 2 x bfloat> %head, <vscale x 2 x bfloat> poison, <vscale x 2 x i32> zeroinitializer
+ %vc = call <vscale x 2 x bfloat> @llvm.experimental.constrained.fsub.nxv2bf16(<vscale x 2 x bfloat> %va, <vscale x 2 x bfloat> %splat, metadata !"round.dynamic", metadata !"fpexcept.ignore")
+ ret <vscale x 2 x bfloat> %vc
+}
+
+define <vscale x 4 x bfloat> @vfsub_vv_nxv4bf16(<vscale x 4 x bfloat> %va, <vscale x 4 x bfloat> %vb) strictfp {
+; CHECK-LABEL: vfsub_vv_nxv4bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v9
+; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8
+; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; CHECK-NEXT: vfsub.vv v10, v12, v10
+; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10
+; CHECK-NEXT: ret
+entry:
+ %vc = call <vscale x 4 x bfloat> @llvm.experimental.constrained.fsub.nxv4bf16(<vscale x 4 x bfloat> %va, <vscale x 4 x bfloat> %vb, metadata !"round.dynamic", metadata !"fpexcept.ignore")
+ ret <vscale x 4 x bfloat> %vc
+}
+
+define <vscale x 4 x bfloat> @vfsub_vf_nxv4bf16(<vscale x 4 x bfloat> %va, bfloat %b) strictfp {
+; CHECK-LABEL: vfsub_vf_nxv4bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: fmv.x.h a0, fa0
+; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma
+; CHECK-NEXT: vmv.v.x v9, a0
+; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8
+; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v9
+; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; CHECK-NEXT: vfsub.vv v10, v10, v12
+; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 4 x bfloat> poison, bfloat %b, i32 0
+ %splat = shufflevector <vscale x 4 x bfloat> %head, <vscale x 4 x bfloat> poison, <vscale x 4 x i32> zeroinitializer
+ %vc = call <vscale x 4 x bfloat> @llvm.experimental.constrained.fsub.nxv4bf16(<vscale x 4 x bfloat> %va, <vscale x 4 x bfloat> %splat, metadata !"round.dynamic", metadata !"fpexcept.ignore")
+ ret <vscale x 4 x bfloat> %vc
+}
+
+define <vscale x 8 x bfloat> @vfsub_vv_nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %vb) strictfp {
+; CHECK-LABEL: vfsub_vv_nxv8bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v10
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8
+; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; CHECK-NEXT: vfsub.vv v12, v16, v12
+; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12
+; CHECK-NEXT: ret
+entry:
+ %vc = call <vscale x 8 x bfloat> @llvm.experimental.constrained.fsub.nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %vb, metadata !"round.dynamic", metadata !"fpexcept.ignore")
+ ret <vscale x 8 x bfloat> %vc
+}
+
+define <vscale x 8 x bfloat> @vfsub_vf_nxv8bf16(<vscale x 8 x bfloat> %va, bfloat %b) strictfp {
+; CHECK-LABEL: vfsub_vf_nxv8bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: fmv.x.h a0, fa0
+; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma
+; CHECK-NEXT: vmv.v.x v10, a0
+; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v10
+; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; CHECK-NEXT: vfsub.vv v12, v12, v16
+; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0
+ %splat = shufflevector <vscale x 8 x bfloat> %head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer
+ %vc = call <vscale x 8 x bfloat> @llvm.experimental.constrained.fsub.nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %splat, metadata !"round.dynamic", metadata !"fpexcept.ignore")
+ ret <vscale x 8 x bfloat> %vc
+}
+
+define <vscale x 8 x bfloat> @vfsub_fv_nxv8bf16(<vscale x 8 x bfloat> %va, bfloat %b) strictfp {
+; CHECK-LABEL: vfsub_fv_nxv8bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: fmv.x.h a0, fa0
+; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma
+; CHECK-NEXT: vmv.v.x v10, a0
+; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v10
+; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; CHECK-NEXT: vfsub.vv v12, v16, v12
+; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0
+ %splat = shufflevector <vscale x 8 x bfloat> %head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer
+ %vc = call <vscale x 8 x bfloat> @llvm.experimental.constrained.fsub.nxv8bf16(<vscale x 8 x bfloat> %splat, <vscale x 8 x bfloat> %va, metadata !"round.dynamic", metadata !"fpexcept.ignore")
+ ret <vscale x 8 x bfloat> %vc
+}
+
+define <vscale x 16 x bfloat> @vfsub_vv_nxv16bf16(<vscale x 16 x bfloat> %va, <vscale x 16 x bfloat> %vb) strictfp {
+; CHECK-LABEL: vfsub_vv_nxv16bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v12
+; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v8
+; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; CHECK-NEXT: vfsub.vv v16, v24, v16
+; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16
+; CHECK-NEXT: ret
+entry:
+ %vc = call <vscale x 16 x bfloat> @llvm.experimental.constrained.fsub.nxv16bf16(<vscale x 16 x bfloat> %va, <vscale x 16 x bfloat> %vb, metadata !"round.dynamic", metadata !"fpexcept.ignore")
+ ret <vscale x 16 x bfloat> %vc
+}
+
+define <vscale x 16 x bfloat> @vfsub_vf_nxv16bf16(<vscale x 16 x bfloat> %va, bfloat %b) strictfp {
+; CHECK-LABEL: vfsub_vf_nxv16bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: fmv.x.h a0, fa0
+; CHECK-NEXT: vsetvli a1, zero, e16, m4, ta, ma
+; CHECK-NEXT: vmv.v.x v12, a0
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8
+; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v12
+; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; CHECK-NEXT: vfsub.vv v16, v16, v24
+; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 16 x bfloat> poison, bfloat %b, i32 0
+ %splat = shufflevector <vscale x 16 x bfloat> %head, <vscale x 16 x bfloat> poison, <vscale x 16 x i32> zeroinitializer
+ %vc = call <vscale x 16 x bfloat> @llvm.experimental.constrained.fsub.nxv16bf16(<vscale x 16 x bfloat> %va, <vscale x 16 x bfloat> %splat, metadata !"round.dynamic", metadata !"fpexcept.ignore")
+ ret <vscale x 16 x bfloat> %vc
+}
+
+define <vscale x 32 x bfloat> @vfsub_vv_nxv32bf16(<vscale x 32 x bfloat> %va, <vscale x 32 x bfloat> %vb) strictfp {
+; CHECK-LABEL: vfsub_vv_nxv32bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v16
+; CHECK-NEXT: vfwcvtbf16.f.f.v v0, v8
+; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; CHECK-NEXT: vfsub.vv v24, v0, v24
+; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v24
+; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v20
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v12
+; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; CHECK-NEXT: vfsub.vv v16, v16, v24
+; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v12, v16
+; CHECK-NEXT: ret
+entry:
+ %vc = call <vscale x 32 x bfloat> @llvm.experimental.constrained.fsub.nxv32bf16(<vscale x 32 x bfloat> %va, <vscale x 32 x bfloat> %vb, metadata !"round.dynamic", metadata !"fpexcept.ignore")
+ ret <vscale x 32 x bfloat> %vc
+}
+
+define <vscale x 32 x bfloat> @vfsub_vf_nxv32bf16(<vscale x 32 x bfloat> %va, bfloat %b) strictfp {
+; CHECK-LABEL: vfsub_vf_nxv32bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: fmv.x.h a0, fa0
+; CHECK-NEXT: vsetvli a1, zero, e16, m8, ta, ma
+; CHECK-NEXT: vmv.v.x v16, a0
+; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v8
+; CHECK-NEXT: vfwcvtbf16.f.f.v v0, v16
+; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; CHECK-NEXT: vfsub.vv v24, v24, v0
+; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v24
+; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v20
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v12
+; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; CHECK-NEXT: vfsub.vv v16, v16, v24
+; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v12, v16
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 32 x bfloat> poison, bfloat %b, i32 0
+ %splat = shufflevector <vscale x 32 x bfloat> %head, <vscale x 32 x bfloat> poison, <vscale x 32 x i32> zeroinitializer
+ %vc = call <vscale x 32 x bfloat> @llvm.experimental.constrained.fsub.nxv32bf16(<vscale x 32 x bfloat> %va, <vscale x 32 x bfloat> %splat, metadata !"round.dynamic", metadata !"fpexcept.ignore")
+ ret <vscale x 32 x bfloat> %vc
+}
declare <vscale x 1 x half> @llvm.experimental.constrained.fsub.nxv1f16(<vscale x 1 x half>, <vscale x 1 x half>, metadata, metadata)
define <vscale x 1 x half> @vfsub_vv_nxv1f16(<vscale x 1 x half> %va, <vscale x 1 x half> %vb) strictfp {
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfsub-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vfsub-sdnode.ll
index bd73398fd04b..e56cfd9ee4eb 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfsub-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfsub-sdnode.ll
@@ -1,12 +1,252 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v -target-abi=ilp32d \
-; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH
-; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -target-abi=lp64d \
-; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH
-; RUN: llc -mtriple=riscv32 -mattr=+d,+zfhmin,+zvfhmin,+v -target-abi=ilp32d \
-; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN
-; RUN: llc -mtriple=riscv64 -mattr=+d,+zfhmin,+zvfhmin,+v -target-abi=lp64d \
-; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+zfbfmin,+zvfbfmin,+v \
+; RUN: -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s \
+; RUN: --check-prefixes=CHECK,ZVFH
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+zfbfmin,+zvfbfmin,+v \
+; RUN: -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \
+; RUN: --check-prefixes=CHECK,ZVFH
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin,+v \
+; RUN: -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s \
+; RUN: --check-prefixes=CHECK,ZVFHMIN
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin,+v \
+; RUN: -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \
+; RUN: --check-prefixes=CHECK,ZVFHMIN
+
+define <vscale x 1 x bfloat> @vfsub_vv_nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %vb) {
+; CHECK-LABEL: vfsub_vv_nxv1bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v9
+; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8
+; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
+; CHECK-NEXT: vfsub.vv v9, v9, v10
+; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9
+; CHECK-NEXT: ret
+ %vc = fsub <vscale x 1 x bfloat> %va, %vb
+ ret <vscale x 1 x bfloat> %vc
+}
+
+define <vscale x 1 x bfloat> @vfsub_vf_nxv1bf16(<vscale x 1 x bfloat> %va, bfloat %b) {
+; CHECK-LABEL: vfsub_vf_nxv1bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: fmv.x.h a0, fa0
+; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vmv.v.x v9, a0
+; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8
+; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v9
+; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
+; CHECK-NEXT: vfsub.vv v9, v10, v8
+; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 1 x bfloat> poison, bfloat %b, i32 0
+ %splat = shufflevector <vscale x 1 x bfloat> %head, <vscale x 1 x bfloat> poison, <vscale x 1 x i32> zeroinitializer
+ %vc = fsub <vscale x 1 x bfloat> %va, %splat
+ ret <vscale x 1 x bfloat> %vc
+}
+
+define <vscale x 2 x bfloat> @vfsub_vv_nxv2bf16(<vscale x 2 x bfloat> %va, <vscale x 2 x bfloat> %vb) {
+; CHECK-LABEL: vfsub_vv_nxv2bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v9
+; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8
+; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; CHECK-NEXT: vfsub.vv v9, v9, v10
+; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9
+; CHECK-NEXT: ret
+ %vc = fsub <vscale x 2 x bfloat> %va, %vb
+ ret <vscale x 2 x bfloat> %vc
+}
+
+define <vscale x 2 x bfloat> @vfsub_vf_nxv2bf16(<vscale x 2 x bfloat> %va, bfloat %b) {
+; CHECK-LABEL: vfsub_vf_nxv2bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: fmv.x.h a0, fa0
+; CHECK-NEXT: vsetvli a1, zero, e16, mf2, ta, ma
+; CHECK-NEXT: vmv.v.x v9, a0
+; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8
+; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v9
+; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; CHECK-NEXT: vfsub.vv v9, v10, v8
+; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 2 x bfloat> poison, bfloat %b, i32 0
+ %splat = shufflevector <vscale x 2 x bfloat> %head, <vscale x 2 x bfloat> poison, <vscale x 2 x i32> zeroinitializer
+ %vc = fsub <vscale x 2 x bfloat> %va, %splat
+ ret <vscale x 2 x bfloat> %vc
+}
+
+define <vscale x 4 x bfloat> @vfsub_vv_nxv4bf16(<vscale x 4 x bfloat> %va, <vscale x 4 x bfloat> %vb) {
+; CHECK-LABEL: vfsub_vv_nxv4bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v9
+; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8
+; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; CHECK-NEXT: vfsub.vv v10, v12, v10
+; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10
+; CHECK-NEXT: ret
+ %vc = fsub <vscale x 4 x bfloat> %va, %vb
+ ret <vscale x 4 x bfloat> %vc
+}
+
+define <vscale x 4 x bfloat> @vfsub_vf_nxv4bf16(<vscale x 4 x bfloat> %va, bfloat %b) {
+; CHECK-LABEL: vfsub_vf_nxv4bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: fmv.x.h a0, fa0
+; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma
+; CHECK-NEXT: vmv.v.x v9, a0
+; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8
+; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v9
+; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; CHECK-NEXT: vfsub.vv v10, v10, v12
+; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 4 x bfloat> poison, bfloat %b, i32 0
+ %splat = shufflevector <vscale x 4 x bfloat> %head, <vscale x 4 x bfloat> poison, <vscale x 4 x i32> zeroinitializer
+ %vc = fsub <vscale x 4 x bfloat> %va, %splat
+ ret <vscale x 4 x bfloat> %vc
+}
+
+define <vscale x 8 x bfloat> @vfsub_vv_nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %vb) {
+; CHECK-LABEL: vfsub_vv_nxv8bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v10
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8
+; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; CHECK-NEXT: vfsub.vv v12, v16, v12
+; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12
+; CHECK-NEXT: ret
+ %vc = fsub <vscale x 8 x bfloat> %va, %vb
+ ret <vscale x 8 x bfloat> %vc
+}
+
+define <vscale x 8 x bfloat> @vfsub_vf_nxv8bf16(<vscale x 8 x bfloat> %va, bfloat %b) {
+; CHECK-LABEL: vfsub_vf_nxv8bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: fmv.x.h a0, fa0
+; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma
+; CHECK-NEXT: vmv.v.x v10, a0
+; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v10
+; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; CHECK-NEXT: vfsub.vv v12, v12, v16
+; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0
+ %splat = shufflevector <vscale x 8 x bfloat> %head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer
+ %vc = fsub <vscale x 8 x bfloat> %va, %splat
+ ret <vscale x 8 x bfloat> %vc
+}
+
+define <vscale x 8 x bfloat> @vfsub_fv_nxv8bf16(<vscale x 8 x bfloat> %va, bfloat %b) {
+; CHECK-LABEL: vfsub_fv_nxv8bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: fmv.x.h a0, fa0
+; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma
+; CHECK-NEXT: vmv.v.x v10, a0
+; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v10
+; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; CHECK-NEXT: vfsub.vv v12, v16, v12
+; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0
+ %splat = shufflevector <vscale x 8 x bfloat> %head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer
+ %vc = fsub <vscale x 8 x bfloat> %splat, %va
+ ret <vscale x 8 x bfloat> %vc
+}
+
+define <vscale x 16 x bfloat> @vfsub_vv_nxv16bf16(<vscale x 16 x bfloat> %va, <vscale x 16 x bfloat> %vb) {
+; CHECK-LABEL: vfsub_vv_nxv16bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v12
+; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v8
+; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; CHECK-NEXT: vfsub.vv v16, v24, v16
+; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16
+; CHECK-NEXT: ret
+ %vc = fsub <vscale x 16 x bfloat> %va, %vb
+ ret <vscale x 16 x bfloat> %vc
+}
+
+define <vscale x 16 x bfloat> @vfsub_vf_nxv16bf16(<vscale x 16 x bfloat> %va, bfloat %b) {
+; CHECK-LABEL: vfsub_vf_nxv16bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: fmv.x.h a0, fa0
+; CHECK-NEXT: vsetvli a1, zero, e16, m4, ta, ma
+; CHECK-NEXT: vmv.v.x v12, a0
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8
+; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v12
+; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; CHECK-NEXT: vfsub.vv v16, v16, v24
+; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 16 x bfloat> poison, bfloat %b, i32 0
+ %splat = shufflevector <vscale x 16 x bfloat> %head, <vscale x 16 x bfloat> poison, <vscale x 16 x i32> zeroinitializer
+ %vc = fsub <vscale x 16 x bfloat> %va, %splat
+ ret <vscale x 16 x bfloat> %vc
+}
+
+define <vscale x 32 x bfloat> @vfsub_vv_nxv32bf16(<vscale x 32 x bfloat> %va, <vscale x 32 x bfloat> %vb) {
+; CHECK-LABEL: vfsub_vv_nxv32bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v16
+; CHECK-NEXT: vfwcvtbf16.f.f.v v0, v8
+; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; CHECK-NEXT: vfsub.vv v24, v0, v24
+; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v24
+; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v20
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v12
+; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; CHECK-NEXT: vfsub.vv v16, v16, v24
+; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v12, v16
+; CHECK-NEXT: ret
+ %vc = fsub <vscale x 32 x bfloat> %va, %vb
+ ret <vscale x 32 x bfloat> %vc
+}
+
+define <vscale x 32 x bfloat> @vfsub_vf_nxv32bf16(<vscale x 32 x bfloat> %va, bfloat %b) {
+; CHECK-LABEL: vfsub_vf_nxv32bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: fmv.x.h a0, fa0
+; CHECK-NEXT: vsetvli a1, zero, e16, m8, ta, ma
+; CHECK-NEXT: vmv.v.x v16, a0
+; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v8
+; CHECK-NEXT: vfwcvtbf16.f.f.v v0, v16
+; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; CHECK-NEXT: vfsub.vv v24, v24, v0
+; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v24
+; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v12
+; CHECK-NEXT: vfwcvtbf16.f.f.v v0, v20
+; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; CHECK-NEXT: vfsub.vv v16, v24, v0
+; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v12, v16
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 32 x bfloat> poison, bfloat %b, i32 0
+ %splat = shufflevector <vscale x 32 x bfloat> %head, <vscale x 32 x bfloat> poison, <vscale x 32 x i32> zeroinitializer
+ %vc = fsub <vscale x 32 x bfloat> %va, %splat
+ ret <vscale x 32 x bfloat> %vc
+}
define <vscale x 1 x half> @vfsub_vv_nxv1f16(<vscale x 1 x half> %va, <vscale x 1 x half> %vb) {
; ZVFH-LABEL: vfsub_vv_nxv1f16:
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfsub-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vfsub-vp.ll
index fda6d0c48d4a..449130e59876 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfsub-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfsub-vp.ll
@@ -1,13 +1,622 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v -target-abi=ilp32d \
-; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH
-; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -target-abi=lp64d \
-; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH
-; RUN: llc -mtriple=riscv32 -mattr=+d,+zfhmin,+zvfhmin,+v -target-abi=ilp32d \
-; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN
-; RUN: llc -mtriple=riscv64 -mattr=+d,+zfhmin,+zvfhmin,+v -target-abi=lp64d \
-; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+zfbfmin,+zvfbfmin,+v \
+; RUN: -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s \
+; RUN: --check-prefixes=CHECK,ZVFH
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+zfbfmin,+zvfbfmin,+v \
+; RUN: -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \
+; RUN: --check-prefixes=CHECK,ZVFH
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin,+v \
+; RUN: -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s \
+; RUN: --check-prefixes=CHECK,ZVFHMIN
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin,+v \
+; RUN: -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \
+; RUN: --check-prefixes=CHECK,ZVFHMIN
+
+declare <vscale x 1 x bfloat> @llvm.vp.fsub.nxv1bf16(<vscale x 1 x bfloat>, <vscale x 1 x bfloat>, <vscale x 1 x i1>, i32)
+
+define <vscale x 1 x bfloat> @vfsub_vv_nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %b, <vscale x 1 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vfsub_vv_nxv1bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v9
+; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8
+; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-NEXT: vfsub.vv v9, v9, v10, v0.t
+; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9
+; CHECK-NEXT: ret
+ %v = call <vscale x 1 x bfloat> @llvm.vp.fsub.nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %b, <vscale x 1 x i1> %m, i32 %evl)
+ ret <vscale x 1 x bfloat> %v
+}
+
+define <vscale x 1 x bfloat> @vfsub_vv_nxv1bf16_unmasked(<vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vfsub_vv_nxv1bf16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v9
+; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8
+; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-NEXT: vfsub.vv v9, v9, v10
+; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9
+; CHECK-NEXT: ret
+ %v = call <vscale x 1 x bfloat> @llvm.vp.fsub.nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %b, <vscale x 1 x i1> splat (i1 true), i32 %evl)
+ ret <vscale x 1 x bfloat> %v
+}
+
+define <vscale x 1 x bfloat> @vfsub_vf_nxv1bf16(<vscale x 1 x bfloat> %va, bfloat %b, <vscale x 1 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vfsub_vf_nxv1bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: fmv.x.h a1, fa0
+; CHECK-NEXT: vsetvli a2, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vmv.v.x v9, a1
+; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8
+; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v9
+; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-NEXT: vfsub.vv v9, v10, v8, v0.t
+; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 1 x bfloat> poison, bfloat %b, i32 0
+ %vb = shufflevector <vscale x 1 x bfloat> %elt.head, <vscale x 1 x bfloat> poison, <vscale x 1 x i32> zeroinitializer
+ %v = call <vscale x 1 x bfloat> @llvm.vp.fsub.nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %vb, <vscale x 1 x i1> %m, i32 %evl)
+ ret <vscale x 1 x bfloat> %v
+}
+
+define <vscale x 1 x bfloat> @vfsub_vf_nxv1bf16_unmasked(<vscale x 1 x bfloat> %va, bfloat %b, i32 zeroext %evl) {
+; CHECK-LABEL: vfsub_vf_nxv1bf16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: fmv.x.h a1, fa0
+; CHECK-NEXT: vsetvli a2, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vmv.v.x v9, a1
+; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8
+; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v9
+; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-NEXT: vfsub.vv v9, v10, v8
+; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 1 x bfloat> poison, bfloat %b, i32 0
+ %vb = shufflevector <vscale x 1 x bfloat> %elt.head, <vscale x 1 x bfloat> poison, <vscale x 1 x i32> zeroinitializer
+ %v = call <vscale x 1 x bfloat> @llvm.vp.fsub.nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %vb, <vscale x 1 x i1> splat (i1 true), i32 %evl)
+ ret <vscale x 1 x bfloat> %v
+}
+
+declare <vscale x 2 x bfloat> @llvm.vp.fsub.nxv2bf16(<vscale x 2 x bfloat>, <vscale x 2 x bfloat>, <vscale x 2 x i1>, i32)
+
+define <vscale x 2 x bfloat> @vfsub_vv_nxv2bf16(<vscale x 2 x bfloat> %va, <vscale x 2 x bfloat> %b, <vscale x 2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vfsub_vv_nxv2bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, zero, e16, mf2, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v9
+; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8
+; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-NEXT: vfsub.vv v9, v9, v10, v0.t
+; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9
+; CHECK-NEXT: ret
+ %v = call <vscale x 2 x bfloat> @llvm.vp.fsub.nxv2bf16(<vscale x 2 x bfloat> %va, <vscale x 2 x bfloat> %b, <vscale x 2 x i1> %m, i32 %evl)
+ ret <vscale x 2 x bfloat> %v
+}
+
+define <vscale x 2 x bfloat> @vfsub_vv_nxv2bf16_unmasked(<vscale x 2 x bfloat> %va, <vscale x 2 x bfloat> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vfsub_vv_nxv2bf16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, zero, e16, mf2, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v9
+; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8
+; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-NEXT: vfsub.vv v9, v9, v10
+; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9
+; CHECK-NEXT: ret
+ %v = call <vscale x 2 x bfloat> @llvm.vp.fsub.nxv2bf16(<vscale x 2 x bfloat> %va, <vscale x 2 x bfloat> %b, <vscale x 2 x i1> splat (i1 true), i32 %evl)
+ ret <vscale x 2 x bfloat> %v
+}
+
+define <vscale x 2 x bfloat> @vfsub_vf_nxv2bf16(<vscale x 2 x bfloat> %va, bfloat %b, <vscale x 2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vfsub_vf_nxv2bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: fmv.x.h a1, fa0
+; CHECK-NEXT: vsetvli a2, zero, e16, mf2, ta, ma
+; CHECK-NEXT: vmv.v.x v9, a1
+; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8
+; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v9
+; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-NEXT: vfsub.vv v9, v10, v8, v0.t
+; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 2 x bfloat> poison, bfloat %b, i32 0
+ %vb = shufflevector <vscale x 2 x bfloat> %elt.head, <vscale x 2 x bfloat> poison, <vscale x 2 x i32> zeroinitializer
+ %v = call <vscale x 2 x bfloat> @llvm.vp.fsub.nxv2bf16(<vscale x 2 x bfloat> %va, <vscale x 2 x bfloat> %vb, <vscale x 2 x i1> %m, i32 %evl)
+ ret <vscale x 2 x bfloat> %v
+}
+
+define <vscale x 2 x bfloat> @vfsub_vf_nxv2bf16_unmasked(<vscale x 2 x bfloat> %va, bfloat %b, i32 zeroext %evl) {
+; CHECK-LABEL: vfsub_vf_nxv2bf16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: fmv.x.h a1, fa0
+; CHECK-NEXT: vsetvli a2, zero, e16, mf2, ta, ma
+; CHECK-NEXT: vmv.v.x v9, a1
+; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8
+; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v9
+; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-NEXT: vfsub.vv v9, v10, v8
+; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 2 x bfloat> poison, bfloat %b, i32 0
+ %vb = shufflevector <vscale x 2 x bfloat> %elt.head, <vscale x 2 x bfloat> poison, <vscale x 2 x i32> zeroinitializer
+ %v = call <vscale x 2 x bfloat> @llvm.vp.fsub.nxv2bf16(<vscale x 2 x bfloat> %va, <vscale x 2 x bfloat> %vb, <vscale x 2 x i1> splat (i1 true), i32 %evl)
+ ret <vscale x 2 x bfloat> %v
+}
+
+declare <vscale x 4 x bfloat> @llvm.vp.fsub.nxv4bf16(<vscale x 4 x bfloat>, <vscale x 4 x bfloat>, <vscale x 4 x i1>, i32)
+
+define <vscale x 4 x bfloat> @vfsub_vv_nxv4bf16(<vscale x 4 x bfloat> %va, <vscale x 4 x bfloat> %b, <vscale x 4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vfsub_vv_nxv4bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v9
+; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8
+; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; CHECK-NEXT: vfsub.vv v10, v12, v10, v0.t
+; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10
+; CHECK-NEXT: ret
+ %v = call <vscale x 4 x bfloat> @llvm.vp.fsub.nxv4bf16(<vscale x 4 x bfloat> %va, <vscale x 4 x bfloat> %b, <vscale x 4 x i1> %m, i32 %evl)
+ ret <vscale x 4 x bfloat> %v
+}
+
+define <vscale x 4 x bfloat> @vfsub_vv_nxv4bf16_unmasked(<vscale x 4 x bfloat> %va, <vscale x 4 x bfloat> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vfsub_vv_nxv4bf16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v9
+; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8
+; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; CHECK-NEXT: vfsub.vv v10, v12, v10
+; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10
+; CHECK-NEXT: ret
+ %v = call <vscale x 4 x bfloat> @llvm.vp.fsub.nxv4bf16(<vscale x 4 x bfloat> %va, <vscale x 4 x bfloat> %b, <vscale x 4 x i1> splat (i1 true), i32 %evl)
+ ret <vscale x 4 x bfloat> %v
+}
+
+define <vscale x 4 x bfloat> @vfsub_vf_nxv4bf16(<vscale x 4 x bfloat> %va, bfloat %b, <vscale x 4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vfsub_vf_nxv4bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: fmv.x.h a1, fa0
+; CHECK-NEXT: vsetvli a2, zero, e16, m1, ta, ma
+; CHECK-NEXT: vmv.v.x v9, a1
+; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8
+; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v9
+; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; CHECK-NEXT: vfsub.vv v10, v10, v12, v0.t
+; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 4 x bfloat> poison, bfloat %b, i32 0
+ %vb = shufflevector <vscale x 4 x bfloat> %elt.head, <vscale x 4 x bfloat> poison, <vscale x 4 x i32> zeroinitializer
+ %v = call <vscale x 4 x bfloat> @llvm.vp.fsub.nxv4bf16(<vscale x 4 x bfloat> %va, <vscale x 4 x bfloat> %vb, <vscale x 4 x i1> %m, i32 %evl)
+ ret <vscale x 4 x bfloat> %v
+}
+
+define <vscale x 4 x bfloat> @vfsub_vf_nxv4bf16_unmasked(<vscale x 4 x bfloat> %va, bfloat %b, i32 zeroext %evl) {
+; CHECK-LABEL: vfsub_vf_nxv4bf16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: fmv.x.h a1, fa0
+; CHECK-NEXT: vsetvli a2, zero, e16, m1, ta, ma
+; CHECK-NEXT: vmv.v.x v9, a1
+; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8
+; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v9
+; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; CHECK-NEXT: vfsub.vv v10, v10, v12
+; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 4 x bfloat> poison, bfloat %b, i32 0
+ %vb = shufflevector <vscale x 4 x bfloat> %elt.head, <vscale x 4 x bfloat> poison, <vscale x 4 x i32> zeroinitializer
+ %v = call <vscale x 4 x bfloat> @llvm.vp.fsub.nxv4bf16(<vscale x 4 x bfloat> %va, <vscale x 4 x bfloat> %vb, <vscale x 4 x i1> splat (i1 true), i32 %evl)
+ ret <vscale x 4 x bfloat> %v
+}
+
+declare <vscale x 8 x bfloat> @llvm.vp.fsub.nxv8bf16(<vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x i1>, i32)
+define <vscale x 8 x bfloat> @vfsub_vv_nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %b, <vscale x 8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vfsub_vv_nxv8bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v10
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8
+; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-NEXT: vfsub.vv v12, v16, v12, v0.t
+; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12
+; CHECK-NEXT: ret
+ %v = call <vscale x 8 x bfloat> @llvm.vp.fsub.nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %b, <vscale x 8 x i1> %m, i32 %evl)
+ ret <vscale x 8 x bfloat> %v
+}
+
+define <vscale x 8 x bfloat> @vfsub_vv_nxv8bf16_unmasked(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vfsub_vv_nxv8bf16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v10
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8
+; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-NEXT: vfsub.vv v12, v16, v12
+; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12
+; CHECK-NEXT: ret
+ %v = call <vscale x 8 x bfloat> @llvm.vp.fsub.nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %b, <vscale x 8 x i1> splat (i1 true), i32 %evl)
+ ret <vscale x 8 x bfloat> %v
+}
+
+define <vscale x 8 x bfloat> @vfsub_vf_nxv8bf16(<vscale x 8 x bfloat> %va, bfloat %b, <vscale x 8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vfsub_vf_nxv8bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: fmv.x.h a1, fa0
+; CHECK-NEXT: vsetvli a2, zero, e16, m2, ta, ma
+; CHECK-NEXT: vmv.v.x v10, a1
+; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v10
+; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-NEXT: vfsub.vv v12, v12, v16, v0.t
+; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0
+ %vb = shufflevector <vscale x 8 x bfloat> %elt.head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer
+ %v = call <vscale x 8 x bfloat> @llvm.vp.fsub.nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %vb, <vscale x 8 x i1> %m, i32 %evl)
+ ret <vscale x 8 x bfloat> %v
+}
+
+define <vscale x 8 x bfloat> @vfsub_vf_nxv8bf16_unmasked(<vscale x 8 x bfloat> %va, bfloat %b, i32 zeroext %evl) {
+; CHECK-LABEL: vfsub_vf_nxv8bf16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: fmv.x.h a1, fa0
+; CHECK-NEXT: vsetvli a2, zero, e16, m2, ta, ma
+; CHECK-NEXT: vmv.v.x v10, a1
+; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v10
+; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-NEXT: vfsub.vv v12, v12, v16
+; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0
+ %vb = shufflevector <vscale x 8 x bfloat> %elt.head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer
+ %v = call <vscale x 8 x bfloat> @llvm.vp.fsub.nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %vb, <vscale x 8 x i1> splat (i1 true), i32 %evl)
+ ret <vscale x 8 x bfloat> %v
+}
+
+declare <vscale x 16 x bfloat> @llvm.vp.fsub.nxv16bf16(<vscale x 16 x bfloat>, <vscale x 16 x bfloat>, <vscale x 16 x i1>, i32)
+
+define <vscale x 16 x bfloat> @vfsub_vv_nxv16bf16(<vscale x 16 x bfloat> %va, <vscale x 16 x bfloat> %b, <vscale x 16 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vfsub_vv_nxv16bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v12
+; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v8
+; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma
+; CHECK-NEXT: vfsub.vv v16, v24, v16, v0.t
+; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16
+; CHECK-NEXT: ret
+ %v = call <vscale x 16 x bfloat> @llvm.vp.fsub.nxv16bf16(<vscale x 16 x bfloat> %va, <vscale x 16 x bfloat> %b, <vscale x 16 x i1> %m, i32 %evl)
+ ret <vscale x 16 x bfloat> %v
+}
+
+define <vscale x 16 x bfloat> @vfsub_vv_nxv16bf16_unmasked(<vscale x 16 x bfloat> %va, <vscale x 16 x bfloat> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vfsub_vv_nxv16bf16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v12
+; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v8
+; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma
+; CHECK-NEXT: vfsub.vv v16, v24, v16
+; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16
+; CHECK-NEXT: ret
+ %v = call <vscale x 16 x bfloat> @llvm.vp.fsub.nxv16bf16(<vscale x 16 x bfloat> %va, <vscale x 16 x bfloat> %b, <vscale x 16 x i1> splat (i1 true), i32 %evl)
+ ret <vscale x 16 x bfloat> %v
+}
+
+define <vscale x 16 x bfloat> @vfsub_vf_nxv16bf16(<vscale x 16 x bfloat> %va, bfloat %b, <vscale x 16 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vfsub_vf_nxv16bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: fmv.x.h a1, fa0
+; CHECK-NEXT: vsetvli a2, zero, e16, m4, ta, ma
+; CHECK-NEXT: vmv.v.x v12, a1
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8
+; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v12
+; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma
+; CHECK-NEXT: vfsub.vv v16, v16, v24, v0.t
+; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 16 x bfloat> poison, bfloat %b, i32 0
+ %vb = shufflevector <vscale x 16 x bfloat> %elt.head, <vscale x 16 x bfloat> poison, <vscale x 16 x i32> zeroinitializer
+ %v = call <vscale x 16 x bfloat> @llvm.vp.fsub.nxv16bf16(<vscale x 16 x bfloat> %va, <vscale x 16 x bfloat> %vb, <vscale x 16 x i1> %m, i32 %evl)
+ ret <vscale x 16 x bfloat> %v
+}
+
+define <vscale x 16 x bfloat> @vfsub_vf_nxv16bf16_unmasked(<vscale x 16 x bfloat> %va, bfloat %b, i32 zeroext %evl) {
+; CHECK-LABEL: vfsub_vf_nxv16bf16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: fmv.x.h a1, fa0
+; CHECK-NEXT: vsetvli a2, zero, e16, m4, ta, ma
+; CHECK-NEXT: vmv.v.x v12, a1
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8
+; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v12
+; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma
+; CHECK-NEXT: vfsub.vv v16, v16, v24
+; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 16 x bfloat> poison, bfloat %b, i32 0
+ %vb = shufflevector <vscale x 16 x bfloat> %elt.head, <vscale x 16 x bfloat> poison, <vscale x 16 x i32> zeroinitializer
+ %v = call <vscale x 16 x bfloat> @llvm.vp.fsub.nxv16bf16(<vscale x 16 x bfloat> %va, <vscale x 16 x bfloat> %vb, <vscale x 16 x i1> splat (i1 true), i32 %evl)
+ ret <vscale x 16 x bfloat> %v
+}
+
+declare <vscale x 32 x bfloat> @llvm.vp.fsub.nxv32bf16(<vscale x 32 x bfloat>, <vscale x 32 x bfloat>, <vscale x 32 x i1>, i32)
+
+define <vscale x 32 x bfloat> @vfsub_vv_nxv32bf16(<vscale x 32 x bfloat> %va, <vscale x 32 x bfloat> %b, <vscale x 32 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vfsub_vv_nxv32bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi sp, sp, -16
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: csrr a1, vlenb
+; CHECK-NEXT: slli a1, a1, 3
+; CHECK-NEXT: sub sp, sp, a1
+; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
+; CHECK-NEXT: vmv1r.v v7, v0
+; CHECK-NEXT: csrr a2, vlenb
+; CHECK-NEXT: slli a1, a2, 1
+; CHECK-NEXT: sub a3, a0, a1
+; CHECK-NEXT: sltu a4, a0, a3
+; CHECK-NEXT: addi a4, a4, -1
+; CHECK-NEXT: and a3, a4, a3
+; CHECK-NEXT: srli a2, a2, 2
+; CHECK-NEXT: vsetvli a4, zero, e8, mf2, ta, ma
+; CHECK-NEXT: vslidedown.vx v0, v0, a2
+; CHECK-NEXT: addi a2, sp, 16
+; CHECK-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill
+; CHECK-NEXT: vsetvli a2, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v20
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v12
+; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, ma
+; CHECK-NEXT: vfsub.vv v16, v16, v24, v0.t
+; CHECK-NEXT: vsetvli a2, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v12, v16
+; CHECK-NEXT: bltu a0, a1, .LBB20_2
+; CHECK-NEXT: # %bb.1:
+; CHECK-NEXT: mv a0, a1
+; CHECK-NEXT: .LBB20_2:
+; CHECK-NEXT: addi a1, sp, 16
+; CHECK-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v24
+; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v8
+; CHECK-NEXT: vmv1r.v v0, v7
+; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma
+; CHECK-NEXT: vfsub.vv v16, v24, v16, v0.t
+; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: slli a0, a0, 3
+; CHECK-NEXT: add sp, sp, a0
+; CHECK-NEXT: addi sp, sp, 16
+; CHECK-NEXT: ret
+ %v = call <vscale x 32 x bfloat> @llvm.vp.fsub.nxv32bf16(<vscale x 32 x bfloat> %va, <vscale x 32 x bfloat> %b, <vscale x 32 x i1> %m, i32 %evl)
+ ret <vscale x 32 x bfloat> %v
+}
+
+define <vscale x 32 x bfloat> @vfsub_vv_nxv32bf16_unmasked(<vscale x 32 x bfloat> %va, <vscale x 32 x bfloat> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vfsub_vv_nxv32bf16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi sp, sp, -16
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: csrr a1, vlenb
+; CHECK-NEXT: slli a1, a1, 3
+; CHECK-NEXT: sub sp, sp, a1
+; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
+; CHECK-NEXT: csrr a2, vlenb
+; CHECK-NEXT: slli a1, a2, 1
+; CHECK-NEXT: sub a3, a0, a1
+; CHECK-NEXT: sltu a4, a0, a3
+; CHECK-NEXT: addi a4, a4, -1
+; CHECK-NEXT: and a3, a4, a3
+; CHECK-NEXT: srli a2, a2, 2
+; CHECK-NEXT: vsetvli a4, zero, e8, m4, ta, ma
+; CHECK-NEXT: vmset.m v24
+; CHECK-NEXT: vsetvli a4, zero, e8, mf2, ta, ma
+; CHECK-NEXT: vslidedown.vx v0, v24, a2
+; CHECK-NEXT: addi a2, sp, 16
+; CHECK-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill
+; CHECK-NEXT: vsetvli a2, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v20
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v12
+; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, ma
+; CHECK-NEXT: vfsub.vv v16, v16, v24, v0.t
+; CHECK-NEXT: vsetvli a2, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v12, v16
+; CHECK-NEXT: bltu a0, a1, .LBB21_2
+; CHECK-NEXT: # %bb.1:
+; CHECK-NEXT: mv a0, a1
+; CHECK-NEXT: .LBB21_2:
+; CHECK-NEXT: addi a1, sp, 16
+; CHECK-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v24
+; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v8
+; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma
+; CHECK-NEXT: vfsub.vv v16, v24, v16
+; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: slli a0, a0, 3
+; CHECK-NEXT: add sp, sp, a0
+; CHECK-NEXT: addi sp, sp, 16
+; CHECK-NEXT: ret
+ %v = call <vscale x 32 x bfloat> @llvm.vp.fsub.nxv32bf16(<vscale x 32 x bfloat> %va, <vscale x 32 x bfloat> %b, <vscale x 32 x i1> splat (i1 true), i32 %evl)
+ ret <vscale x 32 x bfloat> %v
+}
+
+define <vscale x 32 x bfloat> @vfsub_vf_nxv32bf16(<vscale x 32 x bfloat> %va, bfloat %b, <vscale x 32 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vfsub_vf_nxv32bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi sp, sp, -16
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: csrr a1, vlenb
+; CHECK-NEXT: slli a1, a1, 1
+; CHECK-NEXT: mv a2, a1
+; CHECK-NEXT: slli a1, a1, 3
+; CHECK-NEXT: add a1, a1, a2
+; CHECK-NEXT: sub sp, sp, a1
+; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x12, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 18 * vlenb
+; CHECK-NEXT: vmv8r.v v24, v8
+; CHECK-NEXT: fmv.x.h a1, fa0
+; CHECK-NEXT: vsetvli a2, zero, e16, m8, ta, ma
+; CHECK-NEXT: vmv.v.x v16, a1
+; CHECK-NEXT: csrr a1, vlenb
+; CHECK-NEXT: slli a2, a1, 3
+; CHECK-NEXT: add a1, a2, a1
+; CHECK-NEXT: add a1, sp, a1
+; CHECK-NEXT: addi a1, a1, 16
+; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill
+; CHECK-NEXT: csrr a2, vlenb
+; CHECK-NEXT: slli a1, a2, 1
+; CHECK-NEXT: sub a3, a0, a1
+; CHECK-NEXT: sltu a4, a0, a3
+; CHECK-NEXT: addi a4, a4, -1
+; CHECK-NEXT: and a3, a4, a3
+; CHECK-NEXT: srli a2, a2, 2
+; CHECK-NEXT: csrr a4, vlenb
+; CHECK-NEXT: slli a4, a4, 3
+; CHECK-NEXT: add a4, sp, a4
+; CHECK-NEXT: addi a4, a4, 16
+; CHECK-NEXT: vs1r.v v0, (a4) # Unknown-size Folded Spill
+; CHECK-NEXT: vsetvli a4, zero, e8, mf2, ta, ma
+; CHECK-NEXT: vslidedown.vx v0, v0, a2
+; CHECK-NEXT: addi a2, sp, 16
+; CHECK-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill
+; CHECK-NEXT: vsetvli a2, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v28
+; CHECK-NEXT: csrr a2, vlenb
+; CHECK-NEXT: slli a4, a2, 3
+; CHECK-NEXT: add a2, a4, a2
+; CHECK-NEXT: add a2, sp, a2
+; CHECK-NEXT: addi a2, a2, 16
+; CHECK-NEXT: vl8r.v v24, (a2) # Unknown-size Folded Reload
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v28
+; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, ma
+; CHECK-NEXT: vfsub.vv v16, v8, v16, v0.t
+; CHECK-NEXT: vsetvli a2, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v12, v16
+; CHECK-NEXT: bltu a0, a1, .LBB22_2
+; CHECK-NEXT: # %bb.1:
+; CHECK-NEXT: mv a0, a1
+; CHECK-NEXT: .LBB22_2:
+; CHECK-NEXT: addi a1, sp, 16
+; CHECK-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v24
+; CHECK-NEXT: csrr a1, vlenb
+; CHECK-NEXT: slli a2, a1, 3
+; CHECK-NEXT: add a1, a2, a1
+; CHECK-NEXT: add a1, sp, a1
+; CHECK-NEXT: addi a1, a1, 16
+; CHECK-NEXT: vl8r.v v0, (a1) # Unknown-size Folded Reload
+; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v0
+; CHECK-NEXT: csrr a1, vlenb
+; CHECK-NEXT: slli a1, a1, 3
+; CHECK-NEXT: add a1, sp, a1
+; CHECK-NEXT: addi a1, a1, 16
+; CHECK-NEXT: vl1r.v v0, (a1) # Unknown-size Folded Reload
+; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma
+; CHECK-NEXT: vfsub.vv v16, v16, v24, v0.t
+; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: slli a0, a0, 1
+; CHECK-NEXT: mv a1, a0
+; CHECK-NEXT: slli a0, a0, 3
+; CHECK-NEXT: add a0, a0, a1
+; CHECK-NEXT: add sp, sp, a0
+; CHECK-NEXT: addi sp, sp, 16
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 32 x bfloat> poison, bfloat %b, i32 0
+ %vb = shufflevector <vscale x 32 x bfloat> %elt.head, <vscale x 32 x bfloat> poison, <vscale x 32 x i32> zeroinitializer
+ %v = call <vscale x 32 x bfloat> @llvm.vp.fsub.nxv32bf16(<vscale x 32 x bfloat> %va, <vscale x 32 x bfloat> %vb, <vscale x 32 x i1> %m, i32 %evl)
+ ret <vscale x 32 x bfloat> %v
+}
+
+define <vscale x 32 x bfloat> @vfsub_vf_nxv32bf16_unmasked(<vscale x 32 x bfloat> %va, bfloat %b, i32 zeroext %evl) {
+; CHECK-LABEL: vfsub_vf_nxv32bf16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi sp, sp, -16
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: csrr a1, vlenb
+; CHECK-NEXT: slli a1, a1, 4
+; CHECK-NEXT: sub sp, sp, a1
+; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb
+; CHECK-NEXT: vmv8r.v v16, v8
+; CHECK-NEXT: fmv.x.h a1, fa0
+; CHECK-NEXT: vsetvli a2, zero, e16, m8, ta, ma
+; CHECK-NEXT: vmv.v.x v8, a1
+; CHECK-NEXT: csrr a2, vlenb
+; CHECK-NEXT: slli a1, a2, 1
+; CHECK-NEXT: sub a3, a0, a1
+; CHECK-NEXT: sltu a4, a0, a3
+; CHECK-NEXT: addi a4, a4, -1
+; CHECK-NEXT: and a3, a4, a3
+; CHECK-NEXT: srli a2, a2, 2
+; CHECK-NEXT: vmset.m v24
+; CHECK-NEXT: vsetvli a4, zero, e8, mf2, ta, ma
+; CHECK-NEXT: vslidedown.vx v0, v24, a2
+; CHECK-NEXT: addi a2, sp, 16
+; CHECK-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill
+; CHECK-NEXT: vsetvli a2, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v20
+; CHECK-NEXT: vmv4r.v v16, v8
+; CHECK-NEXT: csrr a2, vlenb
+; CHECK-NEXT: slli a2, a2, 3
+; CHECK-NEXT: add a2, sp, a2
+; CHECK-NEXT: addi a2, a2, 16
+; CHECK-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v12
+; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, ma
+; CHECK-NEXT: vfsub.vv v16, v24, v16, v0.t
+; CHECK-NEXT: vsetvli a2, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v12, v16
+; CHECK-NEXT: bltu a0, a1, .LBB23_2
+; CHECK-NEXT: # %bb.1:
+; CHECK-NEXT: mv a0, a1
+; CHECK-NEXT: .LBB23_2:
+; CHECK-NEXT: addi a1, sp, 16
+; CHECK-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v24
+; CHECK-NEXT: csrr a1, vlenb
+; CHECK-NEXT: slli a1, a1, 3
+; CHECK-NEXT: add a1, sp, a1
+; CHECK-NEXT: addi a1, a1, 16
+; CHECK-NEXT: vl8r.v v0, (a1) # Unknown-size Folded Reload
+; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v0
+; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma
+; CHECK-NEXT: vfsub.vv v16, v16, v24
+; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: slli a0, a0, 4
+; CHECK-NEXT: add sp, sp, a0
+; CHECK-NEXT: addi sp, sp, 16
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 32 x bfloat> poison, bfloat %b, i32 0
+ %vb = shufflevector <vscale x 32 x bfloat> %elt.head, <vscale x 32 x bfloat> poison, <vscale x 32 x i32> zeroinitializer
+ %v = call <vscale x 32 x bfloat> @llvm.vp.fsub.nxv32bf16(<vscale x 32 x bfloat> %va, <vscale x 32 x bfloat> %vb, <vscale x 32 x i1> splat (i1 true), i32 %evl)
+ ret <vscale x 32 x bfloat> %v
+}
declare <vscale x 1 x half> @llvm.vp.fsub.nxv1f16(<vscale x 1 x half>, <vscale x 1 x half>, <vscale x 1 x i1>, i32)
define <vscale x 1 x half> @vfsub_vv_nxv1f16(<vscale x 1 x half> %va, <vscale x 1 x half> %b, <vscale x 1 x i1> %m, i32 zeroext %evl) {
@@ -514,10 +1123,10 @@ define <vscale x 32 x half> @vfsub_vv_nxv32f16(<vscale x 32 x half> %va, <vscale
; ZVFHMIN-NEXT: vfsub.vv v16, v16, v24, v0.t
; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v16
-; ZVFHMIN-NEXT: bltu a0, a1, .LBB20_2
+; ZVFHMIN-NEXT: bltu a0, a1, .LBB44_2
; ZVFHMIN-NEXT: # %bb.1:
; ZVFHMIN-NEXT: mv a0, a1
-; ZVFHMIN-NEXT: .LBB20_2:
+; ZVFHMIN-NEXT: .LBB44_2:
; ZVFHMIN-NEXT: addi a1, sp, 16
; ZVFHMIN-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload
; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v24
@@ -571,10 +1180,10 @@ define <vscale x 32 x half> @vfsub_vv_nxv32f16_unmasked(<vscale x 32 x half> %va
; ZVFHMIN-NEXT: vfsub.vv v16, v16, v24, v0.t
; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v16
-; ZVFHMIN-NEXT: bltu a0, a1, .LBB21_2
+; ZVFHMIN-NEXT: bltu a0, a1, .LBB45_2
; ZVFHMIN-NEXT: # %bb.1:
; ZVFHMIN-NEXT: mv a0, a1
-; ZVFHMIN-NEXT: .LBB21_2:
+; ZVFHMIN-NEXT: .LBB45_2:
; ZVFHMIN-NEXT: addi a1, sp, 16
; ZVFHMIN-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload
; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v24
@@ -649,10 +1258,10 @@ define <vscale x 32 x half> @vfsub_vf_nxv32f16(<vscale x 32 x half> %va, half %b
; ZVFHMIN-NEXT: vfsub.vv v16, v8, v16, v0.t
; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v16
-; ZVFHMIN-NEXT: bltu a0, a1, .LBB22_2
+; ZVFHMIN-NEXT: bltu a0, a1, .LBB46_2
; ZVFHMIN-NEXT: # %bb.1:
; ZVFHMIN-NEXT: mv a0, a1
-; ZVFHMIN-NEXT: .LBB22_2:
+; ZVFHMIN-NEXT: .LBB46_2:
; ZVFHMIN-NEXT: addi a1, sp, 16
; ZVFHMIN-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload
; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v24
@@ -730,10 +1339,10 @@ define <vscale x 32 x half> @vfsub_vf_nxv32f16_unmasked(<vscale x 32 x half> %va
; ZVFHMIN-NEXT: vfsub.vv v16, v24, v16, v0.t
; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v16
-; ZVFHMIN-NEXT: bltu a0, a1, .LBB23_2
+; ZVFHMIN-NEXT: bltu a0, a1, .LBB47_2
; ZVFHMIN-NEXT: # %bb.1:
; ZVFHMIN-NEXT: mv a0, a1
-; ZVFHMIN-NEXT: .LBB23_2:
+; ZVFHMIN-NEXT: .LBB47_2:
; ZVFHMIN-NEXT: addi a1, sp, 16
; ZVFHMIN-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload
; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v24
diff --git a/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-vp.ll
index 7d78fa5a8f3e..0f8e74942d58 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-vp.ll
@@ -1,12 +1,288 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v -target-abi=ilp32d \
-; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH
-; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -target-abi=lp64d \
-; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH
-; RUN: llc -mtriple=riscv32 -mattr=+d,+zfhmin,+zvfhmin,+v -target-abi=ilp32d \
-; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN
-; RUN: llc -mtriple=riscv64 -mattr=+d,+zfhmin,+zvfhmin,+v -target-abi=lp64d \
-; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+zfbfmin,+zvfbfmin,+v \
+; RUN: -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s \
+; RUN: --check-prefixes=CHECK,ZVFH
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+zfbfmin,+zvfbfmin,+v \
+; RUN: -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \
+; RUN: --check-prefixes=CHECK,ZVFH
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin,+v \
+; RUN: -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s \
+; RUN: --check-prefixes=CHECK,ZVFHMIN
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin,+v \
+; RUN: -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \
+; RUN: --check-prefixes=CHECK,ZVFHMIN
+
+declare bfloat @llvm.vp.reduce.fadd.nxv1bf16(bfloat, <vscale x 1 x bfloat>, <vscale x 1 x i1>, i32)
+
+define bfloat @vpreduce_fadd_nxv1bf16(bfloat %s, <vscale x 1 x bfloat> %v, <vscale x 1 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vpreduce_fadd_nxv1bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8
+; CHECK-NEXT: fcvt.s.bf16 fa5, fa0
+; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
+; CHECK-NEXT: vfmv.s.f v8, fa5
+; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-NEXT: vfredusum.vs v8, v9, v8, v0.t
+; CHECK-NEXT: vfmv.f.s fa5, v8
+; CHECK-NEXT: fcvt.bf16.s fa0, fa5
+; CHECK-NEXT: ret
+ %r = call reassoc bfloat @llvm.vp.reduce.fadd.nxv1bf16(bfloat %s, <vscale x 1 x bfloat> %v, <vscale x 1 x i1> %m, i32 %evl)
+ ret bfloat %r
+}
+
+define bfloat @vpreduce_ord_fadd_nxv1bf16(bfloat %s, <vscale x 1 x bfloat> %v, <vscale x 1 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vpreduce_ord_fadd_nxv1bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8
+; CHECK-NEXT: fcvt.s.bf16 fa5, fa0
+; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
+; CHECK-NEXT: vfmv.s.f v8, fa5
+; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-NEXT: vfredosum.vs v8, v9, v8, v0.t
+; CHECK-NEXT: vfmv.f.s fa5, v8
+; CHECK-NEXT: fcvt.bf16.s fa0, fa5
+; CHECK-NEXT: ret
+ %r = call bfloat @llvm.vp.reduce.fadd.nxv1bf16(bfloat %s, <vscale x 1 x bfloat> %v, <vscale x 1 x i1> %m, i32 %evl)
+ ret bfloat %r
+}
+
+declare bfloat @llvm.vp.reduce.fadd.nxv2bf16(bfloat, <vscale x 2 x bfloat>, <vscale x 2 x i1>, i32)
+
+define bfloat @vpreduce_fadd_nxv2bf16(bfloat %s, <vscale x 2 x bfloat> %v, <vscale x 2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vpreduce_fadd_nxv2bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8
+; CHECK-NEXT: fcvt.s.bf16 fa5, fa0
+; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; CHECK-NEXT: vfmv.s.f v8, fa5
+; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-NEXT: vfredusum.vs v8, v9, v8, v0.t
+; CHECK-NEXT: vfmv.f.s fa5, v8
+; CHECK-NEXT: fcvt.bf16.s fa0, fa5
+; CHECK-NEXT: ret
+ %r = call reassoc bfloat @llvm.vp.reduce.fadd.nxv2bf16(bfloat %s, <vscale x 2 x bfloat> %v, <vscale x 2 x i1> %m, i32 %evl)
+ ret bfloat %r
+}
+
+define bfloat @vpreduce_ord_fadd_nxv2bf16(bfloat %s, <vscale x 2 x bfloat> %v, <vscale x 2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vpreduce_ord_fadd_nxv2bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8
+; CHECK-NEXT: fcvt.s.bf16 fa5, fa0
+; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; CHECK-NEXT: vfmv.s.f v8, fa5
+; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-NEXT: vfredosum.vs v8, v9, v8, v0.t
+; CHECK-NEXT: vfmv.f.s fa5, v8
+; CHECK-NEXT: fcvt.bf16.s fa0, fa5
+; CHECK-NEXT: ret
+ %r = call bfloat @llvm.vp.reduce.fadd.nxv2bf16(bfloat %s, <vscale x 2 x bfloat> %v, <vscale x 2 x i1> %m, i32 %evl)
+ ret bfloat %r
+}
+
+declare bfloat @llvm.vp.reduce.fadd.nxv4bf16(bfloat, <vscale x 4 x bfloat>, <vscale x 4 x i1>, i32)
+
+define bfloat @vpreduce_fadd_nxv4bf16(bfloat %s, <vscale x 4 x bfloat> %v, <vscale x 4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vpreduce_fadd_nxv4bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8
+; CHECK-NEXT: fcvt.s.bf16 fa5, fa0
+; CHECK-NEXT: vsetivli zero, 1, e32, m2, ta, ma
+; CHECK-NEXT: vfmv.s.f v8, fa5
+; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; CHECK-NEXT: vfredusum.vs v8, v10, v8, v0.t
+; CHECK-NEXT: vfmv.f.s fa5, v8
+; CHECK-NEXT: fcvt.bf16.s fa0, fa5
+; CHECK-NEXT: ret
+ %r = call reassoc bfloat @llvm.vp.reduce.fadd.nxv4bf16(bfloat %s, <vscale x 4 x bfloat> %v, <vscale x 4 x i1> %m, i32 %evl)
+ ret bfloat %r
+}
+
+define bfloat @vpreduce_ord_fadd_nxv4bf16(bfloat %s, <vscale x 4 x bfloat> %v, <vscale x 4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vpreduce_ord_fadd_nxv4bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8
+; CHECK-NEXT: fcvt.s.bf16 fa5, fa0
+; CHECK-NEXT: vsetivli zero, 1, e32, m2, ta, ma
+; CHECK-NEXT: vfmv.s.f v8, fa5
+; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; CHECK-NEXT: vfredosum.vs v8, v10, v8, v0.t
+; CHECK-NEXT: vfmv.f.s fa5, v8
+; CHECK-NEXT: fcvt.bf16.s fa0, fa5
+; CHECK-NEXT: ret
+ %r = call bfloat @llvm.vp.reduce.fadd.nxv4bf16(bfloat %s, <vscale x 4 x bfloat> %v, <vscale x 4 x i1> %m, i32 %evl)
+ ret bfloat %r
+}
+
+declare bfloat @llvm.vp.reduce.fadd.nxv64bf16(bfloat, <vscale x 64 x bfloat>, <vscale x 64 x i1>, i32)
+
+define bfloat @vpreduce_fadd_nxv64bf16(bfloat %s, <vscale x 64 x bfloat> %v, <vscale x 64 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vpreduce_fadd_nxv64bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: csrr a3, vlenb
+; CHECK-NEXT: srli a1, a3, 1
+; CHECK-NEXT: vsetvli a2, zero, e8, m1, ta, ma
+; CHECK-NEXT: vslidedown.vx v7, v0, a1
+; CHECK-NEXT: slli a5, a3, 2
+; CHECK-NEXT: sub a1, a0, a5
+; CHECK-NEXT: sltu a2, a0, a1
+; CHECK-NEXT: addi a2, a2, -1
+; CHECK-NEXT: and a1, a2, a1
+; CHECK-NEXT: slli a4, a3, 1
+; CHECK-NEXT: sub a2, a1, a4
+; CHECK-NEXT: sltu a6, a1, a2
+; CHECK-NEXT: bltu a1, a4, .LBB6_2
+; CHECK-NEXT: # %bb.1:
+; CHECK-NEXT: mv a1, a4
+; CHECK-NEXT: .LBB6_2:
+; CHECK-NEXT: addi a6, a6, -1
+; CHECK-NEXT: bltu a0, a5, .LBB6_4
+; CHECK-NEXT: # %bb.3:
+; CHECK-NEXT: mv a0, a5
+; CHECK-NEXT: .LBB6_4:
+; CHECK-NEXT: and a2, a6, a2
+; CHECK-NEXT: sub a5, a0, a4
+; CHECK-NEXT: sltu a6, a0, a5
+; CHECK-NEXT: addi a6, a6, -1
+; CHECK-NEXT: and a5, a6, a5
+; CHECK-NEXT: srli a3, a3, 2
+; CHECK-NEXT: vsetvli a6, zero, e8, mf2, ta, ma
+; CHECK-NEXT: vslidedown.vx v6, v0, a3
+; CHECK-NEXT: bltu a0, a4, .LBB6_6
+; CHECK-NEXT: # %bb.5:
+; CHECK-NEXT: mv a0, a4
+; CHECK-NEXT: .LBB6_6:
+; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v8
+; CHECK-NEXT: fcvt.s.bf16 fa5, fa0
+; CHECK-NEXT: vsetivli zero, 1, e32, m8, ta, ma
+; CHECK-NEXT: vfmv.s.f v8, fa5
+; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma
+; CHECK-NEXT: vfredusum.vs v8, v24, v8, v0.t
+; CHECK-NEXT: vfmv.f.s fa5, v8
+; CHECK-NEXT: fcvt.bf16.s fa5, fa5
+; CHECK-NEXT: fcvt.s.bf16 fa5, fa5
+; CHECK-NEXT: vsetivli zero, 1, e32, m8, ta, ma
+; CHECK-NEXT: vfmv.s.f v8, fa5
+; CHECK-NEXT: vsetvli zero, a5, e16, m4, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v12
+; CHECK-NEXT: vmv1r.v v0, v6
+; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; CHECK-NEXT: vfredusum.vs v8, v24, v8, v0.t
+; CHECK-NEXT: vfmv.f.s fa5, v8
+; CHECK-NEXT: fcvt.bf16.s fa5, fa5
+; CHECK-NEXT: fcvt.s.bf16 fa5, fa5
+; CHECK-NEXT: vsetivli zero, 1, e32, m8, ta, ma
+; CHECK-NEXT: vfmv.s.f v8, fa5
+; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v16
+; CHECK-NEXT: vmv1r.v v0, v7
+; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; CHECK-NEXT: vfredusum.vs v8, v24, v8, v0.t
+; CHECK-NEXT: vfmv.f.s fa5, v8
+; CHECK-NEXT: fcvt.bf16.s fa5, fa5
+; CHECK-NEXT: fcvt.s.bf16 fa5, fa5
+; CHECK-NEXT: vsetivli zero, 1, e32, m8, ta, ma
+; CHECK-NEXT: vfmv.s.f v8, fa5
+; CHECK-NEXT: vsetvli a0, zero, e8, mf2, ta, ma
+; CHECK-NEXT: vslidedown.vx v0, v7, a3
+; CHECK-NEXT: vsetvli zero, a2, e16, m4, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v20
+; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; CHECK-NEXT: vfredusum.vs v8, v24, v8, v0.t
+; CHECK-NEXT: vfmv.f.s fa5, v8
+; CHECK-NEXT: fcvt.bf16.s fa0, fa5
+; CHECK-NEXT: ret
+ %r = call reassoc bfloat @llvm.vp.reduce.fadd.nxv64bf16(bfloat %s, <vscale x 64 x bfloat> %v, <vscale x 64 x i1> %m, i32 %evl)
+ ret bfloat %r
+}
+
+define bfloat @vpreduce_ord_fadd_nxv64bf16(bfloat %s, <vscale x 64 x bfloat> %v, <vscale x 64 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vpreduce_ord_fadd_nxv64bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: csrr a3, vlenb
+; CHECK-NEXT: srli a1, a3, 1
+; CHECK-NEXT: vsetvli a2, zero, e8, m1, ta, ma
+; CHECK-NEXT: vslidedown.vx v7, v0, a1
+; CHECK-NEXT: slli a5, a3, 2
+; CHECK-NEXT: sub a1, a0, a5
+; CHECK-NEXT: sltu a2, a0, a1
+; CHECK-NEXT: addi a2, a2, -1
+; CHECK-NEXT: and a1, a2, a1
+; CHECK-NEXT: slli a4, a3, 1
+; CHECK-NEXT: sub a2, a1, a4
+; CHECK-NEXT: sltu a6, a1, a2
+; CHECK-NEXT: bltu a1, a4, .LBB7_2
+; CHECK-NEXT: # %bb.1:
+; CHECK-NEXT: mv a1, a4
+; CHECK-NEXT: .LBB7_2:
+; CHECK-NEXT: addi a6, a6, -1
+; CHECK-NEXT: bltu a0, a5, .LBB7_4
+; CHECK-NEXT: # %bb.3:
+; CHECK-NEXT: mv a0, a5
+; CHECK-NEXT: .LBB7_4:
+; CHECK-NEXT: and a2, a6, a2
+; CHECK-NEXT: sub a5, a0, a4
+; CHECK-NEXT: sltu a6, a0, a5
+; CHECK-NEXT: addi a6, a6, -1
+; CHECK-NEXT: and a5, a6, a5
+; CHECK-NEXT: srli a3, a3, 2
+; CHECK-NEXT: vsetvli a6, zero, e8, mf2, ta, ma
+; CHECK-NEXT: vslidedown.vx v6, v0, a3
+; CHECK-NEXT: bltu a0, a4, .LBB7_6
+; CHECK-NEXT: # %bb.5:
+; CHECK-NEXT: mv a0, a4
+; CHECK-NEXT: .LBB7_6:
+; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v8
+; CHECK-NEXT: fcvt.s.bf16 fa5, fa0
+; CHECK-NEXT: vsetivli zero, 1, e32, m8, ta, ma
+; CHECK-NEXT: vfmv.s.f v8, fa5
+; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma
+; CHECK-NEXT: vfredosum.vs v8, v24, v8, v0.t
+; CHECK-NEXT: vfmv.f.s fa5, v8
+; CHECK-NEXT: fcvt.bf16.s fa5, fa5
+; CHECK-NEXT: fcvt.s.bf16 fa5, fa5
+; CHECK-NEXT: vsetivli zero, 1, e32, m8, ta, ma
+; CHECK-NEXT: vfmv.s.f v8, fa5
+; CHECK-NEXT: vsetvli zero, a5, e16, m4, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v12
+; CHECK-NEXT: vmv1r.v v0, v6
+; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; CHECK-NEXT: vfredosum.vs v8, v24, v8, v0.t
+; CHECK-NEXT: vfmv.f.s fa5, v8
+; CHECK-NEXT: fcvt.bf16.s fa5, fa5
+; CHECK-NEXT: fcvt.s.bf16 fa5, fa5
+; CHECK-NEXT: vsetivli zero, 1, e32, m8, ta, ma
+; CHECK-NEXT: vfmv.s.f v8, fa5
+; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v16
+; CHECK-NEXT: vmv1r.v v0, v7
+; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; CHECK-NEXT: vfredosum.vs v8, v24, v8, v0.t
+; CHECK-NEXT: vfmv.f.s fa5, v8
+; CHECK-NEXT: fcvt.bf16.s fa5, fa5
+; CHECK-NEXT: fcvt.s.bf16 fa5, fa5
+; CHECK-NEXT: vsetivli zero, 1, e32, m8, ta, ma
+; CHECK-NEXT: vfmv.s.f v8, fa5
+; CHECK-NEXT: vsetvli a0, zero, e8, mf2, ta, ma
+; CHECK-NEXT: vslidedown.vx v0, v7, a3
+; CHECK-NEXT: vsetvli zero, a2, e16, m4, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v20
+; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; CHECK-NEXT: vfredosum.vs v8, v24, v8, v0.t
+; CHECK-NEXT: vfmv.f.s fa5, v8
+; CHECK-NEXT: fcvt.bf16.s fa0, fa5
+; CHECK-NEXT: ret
+ %r = call bfloat @llvm.vp.reduce.fadd.nxv64bf16(bfloat %s, <vscale x 64 x bfloat> %v, <vscale x 64 x i1> %m, i32 %evl)
+ ret bfloat %r
+}
declare half @llvm.vp.reduce.fadd.nxv1f16(half, <vscale x 1 x half>, <vscale x 1 x i1>, i32)
@@ -184,10 +460,10 @@ define half @vpreduce_fadd_nxv64f16(half %s, <vscale x 64 x half> %v, <vscale x
; ZVFH-NEXT: sltu a3, a0, a1
; ZVFH-NEXT: addi a3, a3, -1
; ZVFH-NEXT: and a1, a3, a1
-; ZVFH-NEXT: bltu a0, a2, .LBB6_2
+; ZVFH-NEXT: bltu a0, a2, .LBB14_2
; ZVFH-NEXT: # %bb.1:
; ZVFH-NEXT: mv a0, a2
-; ZVFH-NEXT: .LBB6_2:
+; ZVFH-NEXT: .LBB14_2:
; ZVFH-NEXT: vsetvli zero, zero, e16, m2, ta, ma
; ZVFH-NEXT: vfmv.s.f v25, fa0
; ZVFH-NEXT: vsetvli zero, a0, e16, m8, ta, ma
@@ -212,15 +488,15 @@ define half @vpreduce_fadd_nxv64f16(half %s, <vscale x 64 x half> %v, <vscale x
; ZVFHMIN-NEXT: slli a4, a3, 1
; ZVFHMIN-NEXT: sub a2, a1, a4
; ZVFHMIN-NEXT: sltu a6, a1, a2
-; ZVFHMIN-NEXT: bltu a1, a4, .LBB6_2
+; ZVFHMIN-NEXT: bltu a1, a4, .LBB14_2
; ZVFHMIN-NEXT: # %bb.1:
; ZVFHMIN-NEXT: mv a1, a4
-; ZVFHMIN-NEXT: .LBB6_2:
+; ZVFHMIN-NEXT: .LBB14_2:
; ZVFHMIN-NEXT: addi a6, a6, -1
-; ZVFHMIN-NEXT: bltu a0, a5, .LBB6_4
+; ZVFHMIN-NEXT: bltu a0, a5, .LBB14_4
; ZVFHMIN-NEXT: # %bb.3:
; ZVFHMIN-NEXT: mv a0, a5
-; ZVFHMIN-NEXT: .LBB6_4:
+; ZVFHMIN-NEXT: .LBB14_4:
; ZVFHMIN-NEXT: and a2, a6, a2
; ZVFHMIN-NEXT: sub a5, a0, a4
; ZVFHMIN-NEXT: sltu a6, a0, a5
@@ -229,10 +505,10 @@ define half @vpreduce_fadd_nxv64f16(half %s, <vscale x 64 x half> %v, <vscale x
; ZVFHMIN-NEXT: srli a3, a3, 2
; ZVFHMIN-NEXT: vsetvli a6, zero, e8, mf2, ta, ma
; ZVFHMIN-NEXT: vslidedown.vx v6, v0, a3
-; ZVFHMIN-NEXT: bltu a0, a4, .LBB6_6
+; ZVFHMIN-NEXT: bltu a0, a4, .LBB14_6
; ZVFHMIN-NEXT: # %bb.5:
; ZVFHMIN-NEXT: mv a0, a4
-; ZVFHMIN-NEXT: .LBB6_6:
+; ZVFHMIN-NEXT: .LBB14_6:
; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma
; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v8
; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0
@@ -290,10 +566,10 @@ define half @vpreduce_ord_fadd_nxv64f16(half %s, <vscale x 64 x half> %v, <vscal
; ZVFH-NEXT: sltu a3, a0, a1
; ZVFH-NEXT: addi a3, a3, -1
; ZVFH-NEXT: and a1, a3, a1
-; ZVFH-NEXT: bltu a0, a2, .LBB7_2
+; ZVFH-NEXT: bltu a0, a2, .LBB15_2
; ZVFH-NEXT: # %bb.1:
; ZVFH-NEXT: mv a0, a2
-; ZVFH-NEXT: .LBB7_2:
+; ZVFH-NEXT: .LBB15_2:
; ZVFH-NEXT: vsetvli zero, zero, e16, m2, ta, ma
; ZVFH-NEXT: vfmv.s.f v25, fa0
; ZVFH-NEXT: vsetvli zero, a0, e16, m8, ta, ma
@@ -318,15 +594,15 @@ define half @vpreduce_ord_fadd_nxv64f16(half %s, <vscale x 64 x half> %v, <vscal
; ZVFHMIN-NEXT: slli a4, a3, 1
; ZVFHMIN-NEXT: sub a2, a1, a4
; ZVFHMIN-NEXT: sltu a6, a1, a2
-; ZVFHMIN-NEXT: bltu a1, a4, .LBB7_2
+; ZVFHMIN-NEXT: bltu a1, a4, .LBB15_2
; ZVFHMIN-NEXT: # %bb.1:
; ZVFHMIN-NEXT: mv a1, a4
-; ZVFHMIN-NEXT: .LBB7_2:
+; ZVFHMIN-NEXT: .LBB15_2:
; ZVFHMIN-NEXT: addi a6, a6, -1
-; ZVFHMIN-NEXT: bltu a0, a5, .LBB7_4
+; ZVFHMIN-NEXT: bltu a0, a5, .LBB15_4
; ZVFHMIN-NEXT: # %bb.3:
; ZVFHMIN-NEXT: mv a0, a5
-; ZVFHMIN-NEXT: .LBB7_4:
+; ZVFHMIN-NEXT: .LBB15_4:
; ZVFHMIN-NEXT: and a2, a6, a2
; ZVFHMIN-NEXT: sub a5, a0, a4
; ZVFHMIN-NEXT: sltu a6, a0, a5
@@ -335,10 +611,10 @@ define half @vpreduce_ord_fadd_nxv64f16(half %s, <vscale x 64 x half> %v, <vscal
; ZVFHMIN-NEXT: srli a3, a3, 2
; ZVFHMIN-NEXT: vsetvli a6, zero, e8, mf2, ta, ma
; ZVFHMIN-NEXT: vslidedown.vx v6, v0, a3
-; ZVFHMIN-NEXT: bltu a0, a4, .LBB7_6
+; ZVFHMIN-NEXT: bltu a0, a4, .LBB15_6
; ZVFHMIN-NEXT: # %bb.5:
; ZVFHMIN-NEXT: mv a0, a4
-; ZVFHMIN-NEXT: .LBB7_6:
+; ZVFHMIN-NEXT: .LBB15_6:
; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma
; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v8
; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0
@@ -592,12 +868,12 @@ define float @vreduce_fminimum_nxv4f32(float %start, <vscale x 4 x float> %val,
; CHECK-NEXT: feq.s a1, fa0, fa0
; CHECK-NEXT: xori a1, a1, 1
; CHECK-NEXT: or a0, a0, a1
-; CHECK-NEXT: beqz a0, .LBB22_2
+; CHECK-NEXT: beqz a0, .LBB30_2
; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: lui a0, 523264
; CHECK-NEXT: fmv.w.x fa0, a0
; CHECK-NEXT: ret
-; CHECK-NEXT: .LBB22_2:
+; CHECK-NEXT: .LBB30_2:
; CHECK-NEXT: vfmv.f.s fa0, v10
; CHECK-NEXT: ret
%s = call float @llvm.vp.reduce.fminimum.nxv4f32(float %start, <vscale x 4 x float> %val, <vscale x 4 x i1> %m, i32 %evl)
@@ -616,12 +892,12 @@ define float @vreduce_fmaximum_nxv4f32(float %start, <vscale x 4 x float> %val,
; CHECK-NEXT: feq.s a1, fa0, fa0
; CHECK-NEXT: xori a1, a1, 1
; CHECK-NEXT: or a0, a0, a1
-; CHECK-NEXT: beqz a0, .LBB23_2
+; CHECK-NEXT: beqz a0, .LBB31_2
; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: lui a0, 523264
; CHECK-NEXT: fmv.w.x fa0, a0
; CHECK-NEXT: ret
-; CHECK-NEXT: .LBB23_2:
+; CHECK-NEXT: .LBB31_2:
; CHECK-NEXT: vfmv.f.s fa0, v10
; CHECK-NEXT: ret
%s = call float @llvm.vp.reduce.fmaximum.nxv4f32(float %start, <vscale x 4 x float> %val, <vscale x 4 x i1> %m, i32 %evl)
@@ -666,12 +942,12 @@ define float @vreduce_fminimum_v4f32(float %start, <4 x float> %val, <4 x i1> %m
; CHECK-NEXT: feq.s a1, fa0, fa0
; CHECK-NEXT: xori a1, a1, 1
; CHECK-NEXT: or a0, a0, a1
-; CHECK-NEXT: beqz a0, .LBB26_2
+; CHECK-NEXT: beqz a0, .LBB34_2
; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: lui a0, 523264
; CHECK-NEXT: fmv.w.x fa0, a0
; CHECK-NEXT: ret
-; CHECK-NEXT: .LBB26_2:
+; CHECK-NEXT: .LBB34_2:
; CHECK-NEXT: vfmv.f.s fa0, v9
; CHECK-NEXT: ret
%s = call float @llvm.vp.reduce.fminimum.v4f32(float %start, <4 x float> %val, <4 x i1> %m, i32 %evl)
@@ -690,12 +966,12 @@ define float @vreduce_fmaximum_v4f32(float %start, <4 x float> %val, <4 x i1> %m
; CHECK-NEXT: feq.s a1, fa0, fa0
; CHECK-NEXT: xori a1, a1, 1
; CHECK-NEXT: or a0, a0, a1
-; CHECK-NEXT: beqz a0, .LBB27_2
+; CHECK-NEXT: beqz a0, .LBB35_2
; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: lui a0, 523264
; CHECK-NEXT: fmv.w.x fa0, a0
; CHECK-NEXT: ret
-; CHECK-NEXT: .LBB27_2:
+; CHECK-NEXT: .LBB35_2:
; CHECK-NEXT: vfmv.f.s fa0, v9
; CHECK-NEXT: ret
%s = call float @llvm.vp.reduce.fmaximum.v4f32(float %start, <4 x float> %val, <4 x i1> %m, i32 %evl)
diff --git a/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/countbits.ll b/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/countbits.ll
new file mode 100644
index 000000000000..57ec0bda2e18
--- /dev/null
+++ b/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/countbits.ll
@@ -0,0 +1,21 @@
+; RUN: llc -verify-machineinstrs -O0 -mtriple=spirv-unknown-unknown %s -o - | FileCheck %s
+; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv-unknown-unknown %s -o - -filetype=obj | spirv-val %}
+
+; CHECK: OpMemoryModel Logical GLSL450
+
+define noundef i32 @countbits_i32(i32 noundef %a) {
+entry:
+; CHECK: %[[#]] = OpBitCount %[[#]] %[[#]]
+ %elt.bitreverse = call i32 @llvm.ctpop.i32(i32 %a)
+ ret i32 %elt.bitreverse
+}
+
+define noundef i16 @countbits_i16(i16 noundef %a) {
+entry:
+; CHECK: %[[#]] = OpBitCount %[[#]] %[[#]]
+ %elt.ctpop = call i16 @llvm.ctpop.i16(i16 %a)
+ ret i16 %elt.ctpop
+}
+
+declare i16 @llvm.ctpop.i16(i16)
+declare i32 @llvm.ctpop.i32(i32)
diff --git a/llvm/test/CodeGen/WebAssembly/exception.ll b/llvm/test/CodeGen/WebAssembly/exception.ll
index 7259761d6313..1ad4c84f1c02 100644
--- a/llvm/test/CodeGen/WebAssembly/exception.ll
+++ b/llvm/test/CodeGen/WebAssembly/exception.ll
@@ -1,6 +1,7 @@
; RUN: llc < %s -asm-verbose=false -wasm-enable-eh -exception-model=wasm -mattr=+exception-handling -wasm-enable-exnref -verify-machineinstrs | FileCheck --implicit-check-not=ehgcr -allow-deprecated-dag-overlap %s
; RUN: llc < %s -asm-verbose=false -wasm-enable-eh -exception-model=wasm -mattr=+exception-handling -wasm-enable-exnref -verify-machineinstrs -O0
; RUN: llc < %s -wasm-enable-eh -exception-model=wasm -mattr=+exception-handling -wasm-enable-exnref
+; RUN: llc < %s -wasm-enable-eh -exception-model=wasm -mattr=+exception-handling -wasm-enable-exnref -filetype=obj
target triple = "wasm32-unknown-unknown"
diff --git a/llvm/test/CodeGen/X86/avx512copy-intrinsics.ll b/llvm/test/CodeGen/X86/avx512copy-intrinsics.ll
new file mode 100644
index 000000000000..a7ca23792e6f
--- /dev/null
+++ b/llvm/test/CodeGen/X86/avx512copy-intrinsics.ll
@@ -0,0 +1,35 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -verify-machineinstrs -mtriple=x86_64-unknown-unknown --show-mc-encoding -mattr=+avx10.2-256 | FileCheck %s --check-prefixes=AVX102
+; RUN: llc < %s -verify-machineinstrs -mtriple=x86_64-unknown-unknown --show-mc-encoding -mattr=+avx512f | FileCheck %s --check-prefixes=NOAVX512MOVZXC
+
+define <4 x i32> @test_mm_move_epi32(<4 x i32> %a0) nounwind {
+; AVX102-LABEL: test_mm_move_epi32:
+; AVX102: # %bb.0:
+; AVX102-NEXT: vmovd %xmm0, %xmm0 # encoding: [0x62,0xf1,0x7e,0x08,0x7e,0xc0]
+; AVX102-NEXT: retq # encoding: [0xc3]
+;
+; NOAVX512MOVZXC-LABEL: test_mm_move_epi32:
+; NOAVX512MOVZXC: # %bb.0:
+; NOAVX512MOVZXC-NEXT: vxorps %xmm1, %xmm1, %xmm1 # encoding: [0xc5,0xf0,0x57,0xc9]
+; NOAVX512MOVZXC-NEXT: vblendps $1, %xmm0, %xmm1, %xmm0 # encoding: [0xc4,0xe3,0x71,0x0c,0xc0,0x01]
+; NOAVX512MOVZXC-NEXT: # xmm0 = xmm0[0],xmm1[1,2,3]
+; NOAVX512MOVZXC-NEXT: retq # encoding: [0xc3]
+ %res = shufflevector <4 x i32> %a0, <4 x i32> zeroinitializer, <4 x i32> <i32 0, i32 4, i32 4, i32 4>
+ ret <4 x i32> %res
+}
+
+define <8 x i16> @test_mm_move_epi16(<8 x i16> %a0) nounwind {
+; AVX102-LABEL: test_mm_move_epi16:
+; AVX102: # %bb.0:
+; AVX102-NEXT: vmovw %xmm0, %xmm0 # encoding: [0x62,0xf5,0x7e,0x08,0x6e,0xc0]
+; AVX102-NEXT: retq # encoding: [0xc3]
+;
+; NOAVX512MOVZXC-LABEL: test_mm_move_epi16:
+; NOAVX512MOVZXC: # %bb.0:
+; NOAVX512MOVZXC-NEXT: vpxor %xmm1, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0xef,0xc9]
+; NOAVX512MOVZXC-NEXT: vpblendw $1, %xmm0, %xmm1, %xmm0 # encoding: [0xc4,0xe3,0x71,0x0e,0xc0,0x01]
+; NOAVX512MOVZXC-NEXT: # xmm0 = xmm0[0],xmm1[1,2,3,4,5,6,7]
+; NOAVX512MOVZXC-NEXT: retq # encoding: [0xc3]
+ %res = shufflevector <8 x i16> %a0, <8 x i16> zeroinitializer, <8 x i32> <i32 0, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8>
+ ret <8 x i16> %res
+}
diff --git a/llvm/test/CodeGen/X86/comi-flags.ll b/llvm/test/CodeGen/X86/comi-flags.ll
index 8b7a089f0ce8..6f520aa57dcd 100644
--- a/llvm/test/CodeGen/X86/comi-flags.ll
+++ b/llvm/test/CodeGen/X86/comi-flags.ll
@@ -1,6 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=x86_64-- -mattr=+sse2 | FileCheck %s --check-prefix=SSE
-; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx | FileCheck %s --check-prefix=AVX
+; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx | FileCheck %s --check-prefixes=AVX,NO-AVX10_2
+; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx10.2-256 | FileCheck %s --check-prefixes=AVX,AVX10_2
;
; SSE
@@ -17,15 +18,22 @@ define i32 @test_x86_sse_comieq_ss(<4 x float> %a0, <4 x float> %a1, i32 %a2, i3
; SSE-NEXT: cmovnel %esi, %eax
; SSE-NEXT: retq
;
-; AVX-LABEL: test_x86_sse_comieq_ss:
-; AVX: # %bb.0:
-; AVX-NEXT: movl %edi, %eax
-; AVX-NEXT: vcomiss %xmm1, %xmm0
-; AVX-NEXT: setnp %cl
-; AVX-NEXT: sete %dl
-; AVX-NEXT: testb %cl, %dl
-; AVX-NEXT: cmovnel %esi, %eax
-; AVX-NEXT: retq
+; NO-AVX10_2-LABEL: test_x86_sse_comieq_ss:
+; NO-AVX10_2: # %bb.0:
+; NO-AVX10_2-NEXT: movl %edi, %eax
+; NO-AVX10_2-NEXT: vcomiss %xmm1, %xmm0
+; NO-AVX10_2-NEXT: setnp %cl
+; NO-AVX10_2-NEXT: sete %dl
+; NO-AVX10_2-NEXT: testb %cl, %dl
+; NO-AVX10_2-NEXT: cmovnel %esi, %eax
+; NO-AVX10_2-NEXT: retq
+;
+; AVX10_2-LABEL: test_x86_sse_comieq_ss:
+; AVX10_2: # %bb.0:
+; AVX10_2-NEXT: movl %edi, %eax
+; AVX10_2-NEXT: vcomxss %xmm1, %xmm0
+; AVX10_2-NEXT: cmovel %esi, %eax
+; AVX10_2-NEXT: retq
%call = call i32 @llvm.x86.sse.comieq.ss(<4 x float> %a0, <4 x float> %a1)
%cmp = icmp eq i32 %call, 0
%res = select i1 %cmp, i32 %a2, i32 %a3
@@ -126,13 +134,20 @@ define i32 @test_x86_sse_comineq_ss(<4 x float> %a0, <4 x float> %a1, i32 %a2, i
; SSE-NEXT: cmovpl %edi, %eax
; SSE-NEXT: retq
;
-; AVX-LABEL: test_x86_sse_comineq_ss:
-; AVX: # %bb.0:
-; AVX-NEXT: movl %esi, %eax
-; AVX-NEXT: vcomiss %xmm1, %xmm0
-; AVX-NEXT: cmovnel %edi, %eax
-; AVX-NEXT: cmovpl %edi, %eax
-; AVX-NEXT: retq
+; NO-AVX10_2-LABEL: test_x86_sse_comineq_ss:
+; NO-AVX10_2: # %bb.0:
+; NO-AVX10_2-NEXT: movl %esi, %eax
+; NO-AVX10_2-NEXT: vcomiss %xmm1, %xmm0
+; NO-AVX10_2-NEXT: cmovnel %edi, %eax
+; NO-AVX10_2-NEXT: cmovpl %edi, %eax
+; NO-AVX10_2-NEXT: retq
+;
+; AVX10_2-LABEL: test_x86_sse_comineq_ss:
+; AVX10_2: # %bb.0:
+; AVX10_2-NEXT: movl %edi, %eax
+; AVX10_2-NEXT: vcomxss %xmm1, %xmm0
+; AVX10_2-NEXT: cmovel %esi, %eax
+; AVX10_2-NEXT: retq
%call = call i32 @llvm.x86.sse.comineq.ss(<4 x float> %a0, <4 x float> %a1)
%cmp = icmp ne i32 %call, 0
%res = select i1 %cmp, i32 %a2, i32 %a3
@@ -151,15 +166,22 @@ define i32 @test_x86_sse_ucomieq_ss(<4 x float> %a0, <4 x float> %a1, i32 %a2, i
; SSE-NEXT: cmovnel %esi, %eax
; SSE-NEXT: retq
;
-; AVX-LABEL: test_x86_sse_ucomieq_ss:
-; AVX: # %bb.0:
-; AVX-NEXT: movl %edi, %eax
-; AVX-NEXT: vucomiss %xmm1, %xmm0
-; AVX-NEXT: setnp %cl
-; AVX-NEXT: sete %dl
-; AVX-NEXT: testb %cl, %dl
-; AVX-NEXT: cmovnel %esi, %eax
-; AVX-NEXT: retq
+; NO-AVX10_2-LABEL: test_x86_sse_ucomieq_ss:
+; NO-AVX10_2: # %bb.0:
+; NO-AVX10_2-NEXT: movl %edi, %eax
+; NO-AVX10_2-NEXT: vucomiss %xmm1, %xmm0
+; NO-AVX10_2-NEXT: setnp %cl
+; NO-AVX10_2-NEXT: sete %dl
+; NO-AVX10_2-NEXT: testb %cl, %dl
+; NO-AVX10_2-NEXT: cmovnel %esi, %eax
+; NO-AVX10_2-NEXT: retq
+;
+; AVX10_2-LABEL: test_x86_sse_ucomieq_ss:
+; AVX10_2: # %bb.0:
+; AVX10_2-NEXT: movl %edi, %eax
+; AVX10_2-NEXT: vucomxss %xmm1, %xmm0
+; AVX10_2-NEXT: cmovel %esi, %eax
+; AVX10_2-NEXT: retq
%call = call i32 @llvm.x86.sse.ucomieq.ss(<4 x float> %a0, <4 x float> %a1)
%cmp = icmp eq i32 %call, 0
%res = select i1 %cmp, i32 %a2, i32 %a3
@@ -260,13 +282,20 @@ define i32 @test_x86_sse_ucomineq_ss(<4 x float> %a0, <4 x float> %a1, i32 %a2,
; SSE-NEXT: cmovpl %edi, %eax
; SSE-NEXT: retq
;
-; AVX-LABEL: test_x86_sse_ucomineq_ss:
-; AVX: # %bb.0:
-; AVX-NEXT: movl %esi, %eax
-; AVX-NEXT: vucomiss %xmm1, %xmm0
-; AVX-NEXT: cmovnel %edi, %eax
-; AVX-NEXT: cmovpl %edi, %eax
-; AVX-NEXT: retq
+; NO-AVX10_2-LABEL: test_x86_sse_ucomineq_ss:
+; NO-AVX10_2: # %bb.0:
+; NO-AVX10_2-NEXT: movl %esi, %eax
+; NO-AVX10_2-NEXT: vucomiss %xmm1, %xmm0
+; NO-AVX10_2-NEXT: cmovnel %edi, %eax
+; NO-AVX10_2-NEXT: cmovpl %edi, %eax
+; NO-AVX10_2-NEXT: retq
+;
+; AVX10_2-LABEL: test_x86_sse_ucomineq_ss:
+; AVX10_2: # %bb.0:
+; AVX10_2-NEXT: movl %edi, %eax
+; AVX10_2-NEXT: vucomxss %xmm1, %xmm0
+; AVX10_2-NEXT: cmovel %esi, %eax
+; AVX10_2-NEXT: retq
%call = call i32 @llvm.x86.sse.ucomineq.ss(<4 x float> %a0, <4 x float> %a1)
%cmp = icmp ne i32 %call, 0
%res = select i1 %cmp, i32 %a2, i32 %a3
@@ -289,15 +318,22 @@ define i32 @test_x86_sse2_comieq_sd(<2 x double> %a0, <2 x double> %a1, i32 %a2,
; SSE-NEXT: cmovnel %esi, %eax
; SSE-NEXT: retq
;
-; AVX-LABEL: test_x86_sse2_comieq_sd:
-; AVX: # %bb.0:
-; AVX-NEXT: movl %edi, %eax
-; AVX-NEXT: vcomisd %xmm1, %xmm0
-; AVX-NEXT: setnp %cl
-; AVX-NEXT: sete %dl
-; AVX-NEXT: testb %cl, %dl
-; AVX-NEXT: cmovnel %esi, %eax
-; AVX-NEXT: retq
+; NO-AVX10_2-LABEL: test_x86_sse2_comieq_sd:
+; NO-AVX10_2: # %bb.0:
+; NO-AVX10_2-NEXT: movl %edi, %eax
+; NO-AVX10_2-NEXT: vcomisd %xmm1, %xmm0
+; NO-AVX10_2-NEXT: setnp %cl
+; NO-AVX10_2-NEXT: sete %dl
+; NO-AVX10_2-NEXT: testb %cl, %dl
+; NO-AVX10_2-NEXT: cmovnel %esi, %eax
+; NO-AVX10_2-NEXT: retq
+;
+; AVX10_2-LABEL: test_x86_sse2_comieq_sd:
+; AVX10_2: # %bb.0:
+; AVX10_2-NEXT: movl %edi, %eax
+; AVX10_2-NEXT: vcomxsd %xmm1, %xmm0
+; AVX10_2-NEXT: cmovel %esi, %eax
+; AVX10_2-NEXT: retq
%call = call i32 @llvm.x86.sse2.comieq.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
%cmp = icmp eq i32 %call, 0
%res = select i1 %cmp, i32 %a2, i32 %a3
@@ -398,13 +434,20 @@ define i32 @test_x86_sse2_comineq_sd(<2 x double> %a0, <2 x double> %a1, i32 %a2
; SSE-NEXT: cmovpl %edi, %eax
; SSE-NEXT: retq
;
-; AVX-LABEL: test_x86_sse2_comineq_sd:
-; AVX: # %bb.0:
-; AVX-NEXT: movl %esi, %eax
-; AVX-NEXT: vcomisd %xmm1, %xmm0
-; AVX-NEXT: cmovnel %edi, %eax
-; AVX-NEXT: cmovpl %edi, %eax
-; AVX-NEXT: retq
+; NO-AVX10_2-LABEL: test_x86_sse2_comineq_sd:
+; NO-AVX10_2: # %bb.0:
+; NO-AVX10_2-NEXT: movl %esi, %eax
+; NO-AVX10_2-NEXT: vcomisd %xmm1, %xmm0
+; NO-AVX10_2-NEXT: cmovnel %edi, %eax
+; NO-AVX10_2-NEXT: cmovpl %edi, %eax
+; NO-AVX10_2-NEXT: retq
+;
+; AVX10_2-LABEL: test_x86_sse2_comineq_sd:
+; AVX10_2: # %bb.0:
+; AVX10_2-NEXT: movl %edi, %eax
+; AVX10_2-NEXT: vcomxsd %xmm1, %xmm0
+; AVX10_2-NEXT: cmovel %esi, %eax
+; AVX10_2-NEXT: retq
%call = call i32 @llvm.x86.sse2.comineq.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
%cmp = icmp ne i32 %call, 0
%res = select i1 %cmp, i32 %a2, i32 %a3
@@ -423,15 +466,22 @@ define i32 @test_x86_sse2_ucomieq_sd(<2 x double> %a0, <2 x double> %a1, i32 %a2
; SSE-NEXT: cmovnel %esi, %eax
; SSE-NEXT: retq
;
-; AVX-LABEL: test_x86_sse2_ucomieq_sd:
-; AVX: # %bb.0:
-; AVX-NEXT: movl %edi, %eax
-; AVX-NEXT: vucomisd %xmm1, %xmm0
-; AVX-NEXT: setnp %cl
-; AVX-NEXT: sete %dl
-; AVX-NEXT: testb %cl, %dl
-; AVX-NEXT: cmovnel %esi, %eax
-; AVX-NEXT: retq
+; NO-AVX10_2-LABEL: test_x86_sse2_ucomieq_sd:
+; NO-AVX10_2: # %bb.0:
+; NO-AVX10_2-NEXT: movl %edi, %eax
+; NO-AVX10_2-NEXT: vucomisd %xmm1, %xmm0
+; NO-AVX10_2-NEXT: setnp %cl
+; NO-AVX10_2-NEXT: sete %dl
+; NO-AVX10_2-NEXT: testb %cl, %dl
+; NO-AVX10_2-NEXT: cmovnel %esi, %eax
+; NO-AVX10_2-NEXT: retq
+;
+; AVX10_2-LABEL: test_x86_sse2_ucomieq_sd:
+; AVX10_2: # %bb.0:
+; AVX10_2-NEXT: movl %edi, %eax
+; AVX10_2-NEXT: vucomxsd %xmm1, %xmm0
+; AVX10_2-NEXT: cmovel %esi, %eax
+; AVX10_2-NEXT: retq
%call = call i32 @llvm.x86.sse2.ucomieq.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
%cmp = icmp eq i32 %call, 0
%res = select i1 %cmp, i32 %a2, i32 %a3
@@ -532,13 +582,20 @@ define i32 @test_x86_sse2_ucomineq_sd(<2 x double> %a0, <2 x double> %a1, i32 %a
; SSE-NEXT: cmovpl %edi, %eax
; SSE-NEXT: retq
;
-; AVX-LABEL: test_x86_sse2_ucomineq_sd:
-; AVX: # %bb.0:
-; AVX-NEXT: movl %esi, %eax
-; AVX-NEXT: vucomisd %xmm1, %xmm0
-; AVX-NEXT: cmovnel %edi, %eax
-; AVX-NEXT: cmovpl %edi, %eax
-; AVX-NEXT: retq
+; NO-AVX10_2-LABEL: test_x86_sse2_ucomineq_sd:
+; NO-AVX10_2: # %bb.0:
+; NO-AVX10_2-NEXT: movl %esi, %eax
+; NO-AVX10_2-NEXT: vucomisd %xmm1, %xmm0
+; NO-AVX10_2-NEXT: cmovnel %edi, %eax
+; NO-AVX10_2-NEXT: cmovpl %edi, %eax
+; NO-AVX10_2-NEXT: retq
+;
+; AVX10_2-LABEL: test_x86_sse2_ucomineq_sd:
+; AVX10_2: # %bb.0:
+; AVX10_2-NEXT: movl %edi, %eax
+; AVX10_2-NEXT: vucomxsd %xmm1, %xmm0
+; AVX10_2-NEXT: cmovel %esi, %eax
+; AVX10_2-NEXT: retq
%call = call i32 @llvm.x86.sse2.ucomineq.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
%cmp = icmp ne i32 %call, 0
%res = select i1 %cmp, i32 %a2, i32 %a3
@@ -557,15 +614,22 @@ define void @PR38960_eq(<4 x float> %A, <4 x float> %B) {
; SSE-NEXT: # %bb.1: # %if.end
; SSE-NEXT: retq
;
-; AVX-LABEL: PR38960_eq:
-; AVX: # %bb.0: # %entry
-; AVX-NEXT: vcomiss %xmm1, %xmm0
-; AVX-NEXT: setnp %al
-; AVX-NEXT: sete %cl
-; AVX-NEXT: testb %al, %cl
-; AVX-NEXT: jne foo@PLT # TAILCALL
-; AVX-NEXT: # %bb.1: # %if.end
-; AVX-NEXT: retq
+; NO-AVX10_2-LABEL: PR38960_eq:
+; NO-AVX10_2: # %bb.0: # %entry
+; NO-AVX10_2-NEXT: vcomiss %xmm1, %xmm0
+; NO-AVX10_2-NEXT: setnp %al
+; NO-AVX10_2-NEXT: sete %cl
+; NO-AVX10_2-NEXT: testb %al, %cl
+; NO-AVX10_2-NEXT: jne foo@PLT # TAILCALL
+; NO-AVX10_2-NEXT: # %bb.1: # %if.end
+; NO-AVX10_2-NEXT: retq
+;
+; AVX10_2-LABEL: PR38960_eq:
+; AVX10_2: # %bb.0: # %entry
+; AVX10_2-NEXT: vcomxss %xmm1, %xmm0
+; AVX10_2-NEXT: je foo@PLT # TAILCALL
+; AVX10_2-NEXT: # %bb.1: # %if.end
+; AVX10_2-NEXT: retq
entry:
%call = tail call i32 @llvm.x86.sse.comieq.ss(<4 x float> %A, <4 x float> %B) #3
%cmp = icmp eq i32 %call, 0
@@ -590,15 +654,22 @@ define void @PR38960_neq(<4 x float> %A, <4 x float> %B) {
; SSE-NEXT: # %bb.1: # %if.end
; SSE-NEXT: retq
;
-; AVX-LABEL: PR38960_neq:
-; AVX: # %bb.0: # %entry
-; AVX-NEXT: vcomiss %xmm1, %xmm0
-; AVX-NEXT: setp %al
-; AVX-NEXT: setne %cl
-; AVX-NEXT: orb %al, %cl
-; AVX-NEXT: jne foo@PLT # TAILCALL
-; AVX-NEXT: # %bb.1: # %if.end
-; AVX-NEXT: retq
+; NO-AVX10_2-LABEL: PR38960_neq:
+; NO-AVX10_2: # %bb.0: # %entry
+; NO-AVX10_2-NEXT: vcomiss %xmm1, %xmm0
+; NO-AVX10_2-NEXT: setp %al
+; NO-AVX10_2-NEXT: setne %cl
+; NO-AVX10_2-NEXT: orb %al, %cl
+; NO-AVX10_2-NEXT: jne foo@PLT # TAILCALL
+; NO-AVX10_2-NEXT: # %bb.1: # %if.end
+; NO-AVX10_2-NEXT: retq
+;
+; AVX10_2-LABEL: PR38960_neq:
+; AVX10_2: # %bb.0: # %entry
+; AVX10_2-NEXT: vcomxss %xmm1, %xmm0
+; AVX10_2-NEXT: jne foo@PLT # TAILCALL
+; AVX10_2-NEXT: # %bb.1: # %if.end
+; AVX10_2-NEXT: retq
entry:
%call = tail call i32 @llvm.x86.sse.comineq.ss(<4 x float> %A, <4 x float> %B) #3
%cmp = icmp eq i32 %call, 0
diff --git a/llvm/test/CodeGen/X86/vector-interleaved-load-i32-stride-7.ll b/llvm/test/CodeGen/X86/vector-interleaved-load-i32-stride-7.ll
index ed316990e486..f616eafc2427 100644
--- a/llvm/test/CodeGen/X86/vector-interleaved-load-i32-stride-7.ll
+++ b/llvm/test/CodeGen/X86/vector-interleaved-load-i32-stride-7.ll
@@ -240,21 +240,17 @@ define void @load_i32_stride7_vf2(ptr %in.vec, ptr %out.vec0, ptr %out.vec1, ptr
; AVX512-FCP-NEXT: vmovdqa 32(%rdi), %ymm1
; AVX512-FCP-NEXT: vmovdqa (%rdi), %ymm6
; AVX512-FCP-NEXT: vpermi2d %ymm1, %ymm6, %ymm0
-; AVX512-FCP-NEXT: vbroadcasti128 {{.*#+}} ymm7 = [13,4,6,7,13,4,6,7]
-; AVX512-FCP-NEXT: # ymm7 = mem[0,1,0,1]
+; AVX512-FCP-NEXT: vpmovsxbd {{.*#+}} xmm7 = [13,4,6,7]
; AVX512-FCP-NEXT: vpermi2d %ymm6, %ymm1, %ymm7
-; AVX512-FCP-NEXT: vextracti128 $1, %ymm7, %xmm7
-; AVX512-FCP-NEXT: vbroadcasti128 {{.*#+}} ymm8 = [6,13,6,7,6,13,6,7]
-; AVX512-FCP-NEXT: # ymm8 = mem[0,1,0,1]
+; AVX512-FCP-NEXT: vpmovsxbd {{.*#+}} xmm8 = [6,13,6,7]
; AVX512-FCP-NEXT: vpermi2d %ymm1, %ymm6, %ymm8
-; AVX512-FCP-NEXT: vextracti128 $1, %ymm8, %xmm1
; AVX512-FCP-NEXT: vmovq %xmm2, (%rsi)
; AVX512-FCP-NEXT: vmovq %xmm3, (%rdx)
; AVX512-FCP-NEXT: vmovq %xmm4, (%rcx)
; AVX512-FCP-NEXT: vmovq %xmm5, (%r8)
; AVX512-FCP-NEXT: vmovq %xmm0, (%r9)
; AVX512-FCP-NEXT: vmovq %xmm7, (%r10)
-; AVX512-FCP-NEXT: vmovq %xmm1, (%rax)
+; AVX512-FCP-NEXT: vmovq %xmm8, (%rax)
; AVX512-FCP-NEXT: vzeroupper
; AVX512-FCP-NEXT: retq
;
@@ -309,21 +305,17 @@ define void @load_i32_stride7_vf2(ptr %in.vec, ptr %out.vec0, ptr %out.vec1, ptr
; AVX512DQ-FCP-NEXT: vmovdqa 32(%rdi), %ymm1
; AVX512DQ-FCP-NEXT: vmovdqa (%rdi), %ymm6
; AVX512DQ-FCP-NEXT: vpermi2d %ymm1, %ymm6, %ymm0
-; AVX512DQ-FCP-NEXT: vbroadcasti128 {{.*#+}} ymm7 = [13,4,6,7,13,4,6,7]
-; AVX512DQ-FCP-NEXT: # ymm7 = mem[0,1,0,1]
+; AVX512DQ-FCP-NEXT: vpmovsxbd {{.*#+}} xmm7 = [13,4,6,7]
; AVX512DQ-FCP-NEXT: vpermi2d %ymm6, %ymm1, %ymm7
-; AVX512DQ-FCP-NEXT: vextracti128 $1, %ymm7, %xmm7
-; AVX512DQ-FCP-NEXT: vbroadcasti128 {{.*#+}} ymm8 = [6,13,6,7,6,13,6,7]
-; AVX512DQ-FCP-NEXT: # ymm8 = mem[0,1,0,1]
+; AVX512DQ-FCP-NEXT: vpmovsxbd {{.*#+}} xmm8 = [6,13,6,7]
; AVX512DQ-FCP-NEXT: vpermi2d %ymm1, %ymm6, %ymm8
-; AVX512DQ-FCP-NEXT: vextracti128 $1, %ymm8, %xmm1
; AVX512DQ-FCP-NEXT: vmovq %xmm2, (%rsi)
; AVX512DQ-FCP-NEXT: vmovq %xmm3, (%rdx)
; AVX512DQ-FCP-NEXT: vmovq %xmm4, (%rcx)
; AVX512DQ-FCP-NEXT: vmovq %xmm5, (%r8)
; AVX512DQ-FCP-NEXT: vmovq %xmm0, (%r9)
; AVX512DQ-FCP-NEXT: vmovq %xmm7, (%r10)
-; AVX512DQ-FCP-NEXT: vmovq %xmm1, (%rax)
+; AVX512DQ-FCP-NEXT: vmovq %xmm8, (%rax)
; AVX512DQ-FCP-NEXT: vzeroupper
; AVX512DQ-FCP-NEXT: retq
;
@@ -378,21 +370,17 @@ define void @load_i32_stride7_vf2(ptr %in.vec, ptr %out.vec0, ptr %out.vec1, ptr
; AVX512BW-FCP-NEXT: vmovdqa 32(%rdi), %ymm1
; AVX512BW-FCP-NEXT: vmovdqa (%rdi), %ymm6
; AVX512BW-FCP-NEXT: vpermi2d %ymm1, %ymm6, %ymm0
-; AVX512BW-FCP-NEXT: vbroadcasti128 {{.*#+}} ymm7 = [13,4,6,7,13,4,6,7]
-; AVX512BW-FCP-NEXT: # ymm7 = mem[0,1,0,1]
+; AVX512BW-FCP-NEXT: vpmovsxbd {{.*#+}} xmm7 = [13,4,6,7]
; AVX512BW-FCP-NEXT: vpermi2d %ymm6, %ymm1, %ymm7
-; AVX512BW-FCP-NEXT: vextracti128 $1, %ymm7, %xmm7
-; AVX512BW-FCP-NEXT: vbroadcasti128 {{.*#+}} ymm8 = [6,13,6,7,6,13,6,7]
-; AVX512BW-FCP-NEXT: # ymm8 = mem[0,1,0,1]
+; AVX512BW-FCP-NEXT: vpmovsxbd {{.*#+}} xmm8 = [6,13,6,7]
; AVX512BW-FCP-NEXT: vpermi2d %ymm1, %ymm6, %ymm8
-; AVX512BW-FCP-NEXT: vextracti128 $1, %ymm8, %xmm1
; AVX512BW-FCP-NEXT: vmovq %xmm2, (%rsi)
; AVX512BW-FCP-NEXT: vmovq %xmm3, (%rdx)
; AVX512BW-FCP-NEXT: vmovq %xmm4, (%rcx)
; AVX512BW-FCP-NEXT: vmovq %xmm5, (%r8)
; AVX512BW-FCP-NEXT: vmovq %xmm0, (%r9)
; AVX512BW-FCP-NEXT: vmovq %xmm7, (%r10)
-; AVX512BW-FCP-NEXT: vmovq %xmm1, (%rax)
+; AVX512BW-FCP-NEXT: vmovq %xmm8, (%rax)
; AVX512BW-FCP-NEXT: vzeroupper
; AVX512BW-FCP-NEXT: retq
;
@@ -447,21 +435,17 @@ define void @load_i32_stride7_vf2(ptr %in.vec, ptr %out.vec0, ptr %out.vec1, ptr
; AVX512DQ-BW-FCP-NEXT: vmovdqa 32(%rdi), %ymm1
; AVX512DQ-BW-FCP-NEXT: vmovdqa (%rdi), %ymm6
; AVX512DQ-BW-FCP-NEXT: vpermi2d %ymm1, %ymm6, %ymm0
-; AVX512DQ-BW-FCP-NEXT: vbroadcasti128 {{.*#+}} ymm7 = [13,4,6,7,13,4,6,7]
-; AVX512DQ-BW-FCP-NEXT: # ymm7 = mem[0,1,0,1]
+; AVX512DQ-BW-FCP-NEXT: vpmovsxbd {{.*#+}} xmm7 = [13,4,6,7]
; AVX512DQ-BW-FCP-NEXT: vpermi2d %ymm6, %ymm1, %ymm7
-; AVX512DQ-BW-FCP-NEXT: vextracti128 $1, %ymm7, %xmm7
-; AVX512DQ-BW-FCP-NEXT: vbroadcasti128 {{.*#+}} ymm8 = [6,13,6,7,6,13,6,7]
-; AVX512DQ-BW-FCP-NEXT: # ymm8 = mem[0,1,0,1]
+; AVX512DQ-BW-FCP-NEXT: vpmovsxbd {{.*#+}} xmm8 = [6,13,6,7]
; AVX512DQ-BW-FCP-NEXT: vpermi2d %ymm1, %ymm6, %ymm8
-; AVX512DQ-BW-FCP-NEXT: vextracti128 $1, %ymm8, %xmm1
; AVX512DQ-BW-FCP-NEXT: vmovq %xmm2, (%rsi)
; AVX512DQ-BW-FCP-NEXT: vmovq %xmm3, (%rdx)
; AVX512DQ-BW-FCP-NEXT: vmovq %xmm4, (%rcx)
; AVX512DQ-BW-FCP-NEXT: vmovq %xmm5, (%r8)
; AVX512DQ-BW-FCP-NEXT: vmovq %xmm0, (%r9)
; AVX512DQ-BW-FCP-NEXT: vmovq %xmm7, (%r10)
-; AVX512DQ-BW-FCP-NEXT: vmovq %xmm1, (%rax)
+; AVX512DQ-BW-FCP-NEXT: vmovq %xmm8, (%rax)
; AVX512DQ-BW-FCP-NEXT: vzeroupper
; AVX512DQ-BW-FCP-NEXT: retq
%wide.vec = load <14 x i32>, ptr %in.vec, align 64
diff --git a/llvm/test/CodeGen/X86/vector-interleaved-load-i32-stride-8.ll b/llvm/test/CodeGen/X86/vector-interleaved-load-i32-stride-8.ll
index 2fd173c72917..872a8d00cc23 100644
--- a/llvm/test/CodeGen/X86/vector-interleaved-load-i32-stride-8.ll
+++ b/llvm/test/CodeGen/X86/vector-interleaved-load-i32-stride-8.ll
@@ -226,10 +226,8 @@ define void @load_i32_stride8_vf2(ptr %in.vec, ptr %out.vec0, ptr %out.vec1, ptr
; AVX512-FCP-NEXT: vmovdqa (%rdi), %ymm4
; AVX512-FCP-NEXT: vpunpckldq {{.*#+}} ymm5 = ymm4[0],ymm1[0],ymm4[1],ymm1[1],ymm4[4],ymm1[4],ymm4[5],ymm1[5]
; AVX512-FCP-NEXT: vextracti128 $1, %ymm5, %xmm5
-; AVX512-FCP-NEXT: vbroadcasti128 {{.*#+}} ymm6 = [5,13,5,5,5,13,5,5]
-; AVX512-FCP-NEXT: # ymm6 = mem[0,1,0,1]
+; AVX512-FCP-NEXT: vpmovsxbd {{.*#+}} xmm6 = [5,13,5,5]
; AVX512-FCP-NEXT: vpermi2d %ymm1, %ymm4, %ymm6
-; AVX512-FCP-NEXT: vextracti128 $1, %ymm6, %xmm6
; AVX512-FCP-NEXT: vpunpckhdq {{.*#+}} ymm1 = ymm4[2],ymm1[2],ymm4[3],ymm1[3],ymm4[6],ymm1[6],ymm4[7],ymm1[7]
; AVX512-FCP-NEXT: vextracti128 $1, %ymm1, %xmm4
; AVX512-FCP-NEXT: vpshufd {{.*#+}} ymm1 = ymm1[2,3,2,3,6,7,6,7]
@@ -293,10 +291,8 @@ define void @load_i32_stride8_vf2(ptr %in.vec, ptr %out.vec0, ptr %out.vec1, ptr
; AVX512DQ-FCP-NEXT: vmovdqa (%rdi), %ymm4
; AVX512DQ-FCP-NEXT: vpunpckldq {{.*#+}} ymm5 = ymm4[0],ymm1[0],ymm4[1],ymm1[1],ymm4[4],ymm1[4],ymm4[5],ymm1[5]
; AVX512DQ-FCP-NEXT: vextracti128 $1, %ymm5, %xmm5
-; AVX512DQ-FCP-NEXT: vbroadcasti128 {{.*#+}} ymm6 = [5,13,5,5,5,13,5,5]
-; AVX512DQ-FCP-NEXT: # ymm6 = mem[0,1,0,1]
+; AVX512DQ-FCP-NEXT: vpmovsxbd {{.*#+}} xmm6 = [5,13,5,5]
; AVX512DQ-FCP-NEXT: vpermi2d %ymm1, %ymm4, %ymm6
-; AVX512DQ-FCP-NEXT: vextracti128 $1, %ymm6, %xmm6
; AVX512DQ-FCP-NEXT: vpunpckhdq {{.*#+}} ymm1 = ymm4[2],ymm1[2],ymm4[3],ymm1[3],ymm4[6],ymm1[6],ymm4[7],ymm1[7]
; AVX512DQ-FCP-NEXT: vextracti128 $1, %ymm1, %xmm4
; AVX512DQ-FCP-NEXT: vpshufd {{.*#+}} ymm1 = ymm1[2,3,2,3,6,7,6,7]
@@ -360,10 +356,8 @@ define void @load_i32_stride8_vf2(ptr %in.vec, ptr %out.vec0, ptr %out.vec1, ptr
; AVX512BW-FCP-NEXT: vmovdqa (%rdi), %ymm4
; AVX512BW-FCP-NEXT: vpunpckldq {{.*#+}} ymm5 = ymm4[0],ymm1[0],ymm4[1],ymm1[1],ymm4[4],ymm1[4],ymm4[5],ymm1[5]
; AVX512BW-FCP-NEXT: vextracti128 $1, %ymm5, %xmm5
-; AVX512BW-FCP-NEXT: vbroadcasti128 {{.*#+}} ymm6 = [5,13,5,5,5,13,5,5]
-; AVX512BW-FCP-NEXT: # ymm6 = mem[0,1,0,1]
+; AVX512BW-FCP-NEXT: vpmovsxbd {{.*#+}} xmm6 = [5,13,5,5]
; AVX512BW-FCP-NEXT: vpermi2d %ymm1, %ymm4, %ymm6
-; AVX512BW-FCP-NEXT: vextracti128 $1, %ymm6, %xmm6
; AVX512BW-FCP-NEXT: vpunpckhdq {{.*#+}} ymm1 = ymm4[2],ymm1[2],ymm4[3],ymm1[3],ymm4[6],ymm1[6],ymm4[7],ymm1[7]
; AVX512BW-FCP-NEXT: vextracti128 $1, %ymm1, %xmm4
; AVX512BW-FCP-NEXT: vpshufd {{.*#+}} ymm1 = ymm1[2,3,2,3,6,7,6,7]
@@ -427,10 +421,8 @@ define void @load_i32_stride8_vf2(ptr %in.vec, ptr %out.vec0, ptr %out.vec1, ptr
; AVX512DQ-BW-FCP-NEXT: vmovdqa (%rdi), %ymm4
; AVX512DQ-BW-FCP-NEXT: vpunpckldq {{.*#+}} ymm5 = ymm4[0],ymm1[0],ymm4[1],ymm1[1],ymm4[4],ymm1[4],ymm4[5],ymm1[5]
; AVX512DQ-BW-FCP-NEXT: vextracti128 $1, %ymm5, %xmm5
-; AVX512DQ-BW-FCP-NEXT: vbroadcasti128 {{.*#+}} ymm6 = [5,13,5,5,5,13,5,5]
-; AVX512DQ-BW-FCP-NEXT: # ymm6 = mem[0,1,0,1]
+; AVX512DQ-BW-FCP-NEXT: vpmovsxbd {{.*#+}} xmm6 = [5,13,5,5]
; AVX512DQ-BW-FCP-NEXT: vpermi2d %ymm1, %ymm4, %ymm6
-; AVX512DQ-BW-FCP-NEXT: vextracti128 $1, %ymm6, %xmm6
; AVX512DQ-BW-FCP-NEXT: vpunpckhdq {{.*#+}} ymm1 = ymm4[2],ymm1[2],ymm4[3],ymm1[3],ymm4[6],ymm1[6],ymm4[7],ymm1[7]
; AVX512DQ-BW-FCP-NEXT: vextracti128 $1, %ymm1, %xmm4
; AVX512DQ-BW-FCP-NEXT: vpshufd {{.*#+}} ymm1 = ymm1[2,3,2,3,6,7,6,7]
diff --git a/llvm/test/Demangle/ms-placeholder-return-type.test b/llvm/test/Demangle/ms-placeholder-return-type.test
new file mode 100644
index 000000000000..18038e636c8d
--- /dev/null
+++ b/llvm/test/Demangle/ms-placeholder-return-type.test
@@ -0,0 +1,18 @@
+; RUN: llvm-undname < %s | FileCheck %s
+
+; CHECK-NOT: Invalid mangled name
+
+?TestNonTemplateAuto@@YA@XZ
+; CHECK: __cdecl TestNonTemplateAuto(void)
+
+??$AutoT@X@@YA?A_PXZ
+; CHECK: auto __cdecl AutoT<void>(void)
+
+??$AutoT@X@@YA?B_PXZ
+; CHECK: auto const __cdecl AutoT<void>(void)
+
+??$AutoT@X@@YA?A_TXZ
+; CHECK: decltype(auto) __cdecl AutoT<void>(void)
+
+??$AutoT@X@@YA?B_TXZ
+; CHECK: decltype(auto) const __cdecl AutoT<void>(void)
diff --git a/llvm/test/MC/Disassembler/X86/avx10.2-com-ef-32.txt b/llvm/test/MC/Disassembler/X86/avx10.2-com-ef-32.txt
new file mode 100644
index 000000000000..e7adacbbf88c
--- /dev/null
+++ b/llvm/test/MC/Disassembler/X86/avx10.2-com-ef-32.txt
@@ -0,0 +1,195 @@
+# RUN: llvm-mc --disassemble %s -triple=i386 | FileCheck %s --check-prefixes=ATT
+# RUN: llvm-mc --disassemble %s -triple=i386 --output-asm-variant=1 | FileCheck %s --check-prefixes=INTEL
+
+# ATT: vcomxsd %xmm3, %xmm2
+# INTEL: vcomxsd xmm2, xmm3
+0x62,0xf1,0xfe,0x08,0x2f,0xd3
+
+# ATT: vcomxsd {sae}, %xmm3, %xmm2
+# INTEL: vcomxsd xmm2, xmm3, {sae}
+0x62,0xf1,0xfe,0x18,0x2f,0xd3
+
+# ATT: vcomxsd 268435456(%esp,%esi,8), %xmm2
+# INTEL: vcomxsd xmm2, qword ptr [esp + 8*esi + 268435456]
+0x62,0xf1,0xfe,0x08,0x2f,0x94,0xf4,0x00,0x00,0x00,0x10
+
+# ATT: vcomxsd 291(%edi,%eax,4), %xmm2
+# INTEL: vcomxsd xmm2, qword ptr [edi + 4*eax + 291]
+0x62,0xf1,0xfe,0x08,0x2f,0x94,0x87,0x23,0x01,0x00,0x00
+
+# ATT: vcomxsd (%eax), %xmm2
+# INTEL: vcomxsd xmm2, qword ptr [eax]
+0x62,0xf1,0xfe,0x08,0x2f,0x10
+
+# ATT: vcomxsd -256(,%ebp,2), %xmm2
+# INTEL: vcomxsd xmm2, qword ptr [2*ebp - 256]
+0x62,0xf1,0xfe,0x08,0x2f,0x14,0x6d,0x00,0xff,0xff,0xff
+
+# ATT: vcomxsd 1016(%ecx), %xmm2
+# INTEL: vcomxsd xmm2, qword ptr [ecx + 1016]
+0x62,0xf1,0xfe,0x08,0x2f,0x51,0x7f
+
+# ATT: vcomxsd -1024(%edx), %xmm2
+# INTEL: vcomxsd xmm2, qword ptr [edx - 1024]
+0x62,0xf1,0xfe,0x08,0x2f,0x52,0x80
+
+# ATT: vcomxsh %xmm3, %xmm2
+# INTEL: vcomxsh xmm2, xmm3
+0x62,0xf5,0x7f,0x08,0x2f,0xd3
+
+# ATT: vcomxsh {sae}, %xmm3, %xmm2
+# INTEL: vcomxsh xmm2, xmm3, {sae}
+0x62,0xf5,0x7f,0x18,0x2f,0xd3
+
+# ATT: vcomxsh 268435456(%esp,%esi,8), %xmm2
+# INTEL: vcomxsh xmm2, word ptr [esp + 8*esi + 268435456]
+0x62,0xf5,0x7f,0x08,0x2f,0x94,0xf4,0x00,0x00,0x00,0x10
+
+# ATT: vcomxsh 291(%edi,%eax,4), %xmm2
+# INTEL: vcomxsh xmm2, word ptr [edi + 4*eax + 291]
+0x62,0xf5,0x7f,0x08,0x2f,0x94,0x87,0x23,0x01,0x00,0x00
+
+# ATT: vcomxsh (%eax), %xmm2
+# INTEL: vcomxsh xmm2, word ptr [eax]
+0x62,0xf5,0x7f,0x08,0x2f,0x10
+
+# ATT: vcomxsh -64(,%ebp,2), %xmm2
+# INTEL: vcomxsh xmm2, word ptr [2*ebp - 64]
+0x62,0xf5,0x7f,0x08,0x2f,0x14,0x6d,0xc0,0xff,0xff,0xff
+
+# ATT: vcomxsh 254(%ecx), %xmm2
+# INTEL: vcomxsh xmm2, word ptr [ecx + 254]
+0x62,0xf5,0x7f,0x08,0x2f,0x51,0x7f
+
+# ATT: vcomxsh -256(%edx), %xmm2
+# INTEL: vcomxsh xmm2, word ptr [edx - 256]
+0x62,0xf5,0x7f,0x08,0x2f,0x52,0x80
+
+# ATT: vcomxss %xmm3, %xmm2
+# INTEL: vcomxss xmm2, xmm3
+0x62,0xf1,0x7f,0x08,0x2f,0xd3
+
+# ATT: vcomxss {sae}, %xmm3, %xmm2
+# INTEL: vcomxss xmm2, xmm3, {sae}
+0x62,0xf1,0x7f,0x18,0x2f,0xd3
+
+# ATT: vcomxss 268435456(%esp,%esi,8), %xmm2
+# INTEL: vcomxss xmm2, dword ptr [esp + 8*esi + 268435456]
+0x62,0xf1,0x7f,0x08,0x2f,0x94,0xf4,0x00,0x00,0x00,0x10
+
+# ATT: vcomxss 291(%edi,%eax,4), %xmm2
+# INTEL: vcomxss xmm2, dword ptr [edi + 4*eax + 291]
+0x62,0xf1,0x7f,0x08,0x2f,0x94,0x87,0x23,0x01,0x00,0x00
+
+# ATT: vcomxss (%eax), %xmm2
+# INTEL: vcomxss xmm2, dword ptr [eax]
+0x62,0xf1,0x7f,0x08,0x2f,0x10
+
+# ATT: vcomxss -128(,%ebp,2), %xmm2
+# INTEL: vcomxss xmm2, dword ptr [2*ebp - 128]
+0x62,0xf1,0x7f,0x08,0x2f,0x14,0x6d,0x80,0xff,0xff,0xff
+
+# ATT: vcomxss 508(%ecx), %xmm2
+# INTEL: vcomxss xmm2, dword ptr [ecx + 508]
+0x62,0xf1,0x7f,0x08,0x2f,0x51,0x7f
+
+# ATT: vcomxss -512(%edx), %xmm2
+# INTEL: vcomxss xmm2, dword ptr [edx - 512]
+0x62,0xf1,0x7f,0x08,0x2f,0x52,0x80
+
+# ATT: vucomxsd %xmm3, %xmm2
+# INTEL: vucomxsd xmm2, xmm3
+0x62,0xf1,0xfe,0x08,0x2e,0xd3
+
+# ATT: vucomxsd {sae}, %xmm3, %xmm2
+# INTEL: vucomxsd xmm2, xmm3, {sae}
+0x62,0xf1,0xfe,0x18,0x2e,0xd3
+
+# ATT: vucomxsd 268435456(%esp,%esi,8), %xmm2
+# INTEL: vucomxsd xmm2, qword ptr [esp + 8*esi + 268435456]
+0x62,0xf1,0xfe,0x08,0x2e,0x94,0xf4,0x00,0x00,0x00,0x10
+
+# ATT: vucomxsd 291(%edi,%eax,4), %xmm2
+# INTEL: vucomxsd xmm2, qword ptr [edi + 4*eax + 291]
+0x62,0xf1,0xfe,0x08,0x2e,0x94,0x87,0x23,0x01,0x00,0x00
+
+# ATT: vucomxsd (%eax), %xmm2
+# INTEL: vucomxsd xmm2, qword ptr [eax]
+0x62,0xf1,0xfe,0x08,0x2e,0x10
+
+# ATT: vucomxsd -256(,%ebp,2), %xmm2
+# INTEL: vucomxsd xmm2, qword ptr [2*ebp - 256]
+0x62,0xf1,0xfe,0x08,0x2e,0x14,0x6d,0x00,0xff,0xff,0xff
+
+# ATT: vucomxsd 1016(%ecx), %xmm2
+# INTEL: vucomxsd xmm2, qword ptr [ecx + 1016]
+0x62,0xf1,0xfe,0x08,0x2e,0x51,0x7f
+
+# ATT: vucomxsd -1024(%edx), %xmm2
+# INTEL: vucomxsd xmm2, qword ptr [edx - 1024]
+0x62,0xf1,0xfe,0x08,0x2e,0x52,0x80
+
+# ATT: vucomxsh %xmm3, %xmm2
+# INTEL: vucomxsh xmm2, xmm3
+0x62,0xf5,0x7f,0x08,0x2e,0xd3
+
+# ATT: vucomxsh {sae}, %xmm3, %xmm2
+# INTEL: vucomxsh xmm2, xmm3, {sae}
+0x62,0xf5,0x7f,0x18,0x2e,0xd3
+
+# ATT: vucomxsh 268435456(%esp,%esi,8), %xmm2
+# INTEL: vucomxsh xmm2, word ptr [esp + 8*esi + 268435456]
+0x62,0xf5,0x7f,0x08,0x2e,0x94,0xf4,0x00,0x00,0x00,0x10
+
+# ATT: vucomxsh 291(%edi,%eax,4), %xmm2
+# INTEL: vucomxsh xmm2, word ptr [edi + 4*eax + 291]
+0x62,0xf5,0x7f,0x08,0x2e,0x94,0x87,0x23,0x01,0x00,0x00
+
+# ATT: vucomxsh (%eax), %xmm2
+# INTEL: vucomxsh xmm2, word ptr [eax]
+0x62,0xf5,0x7f,0x08,0x2e,0x10
+
+# ATT: vucomxsh -64(,%ebp,2), %xmm2
+# INTEL: vucomxsh xmm2, word ptr [2*ebp - 64]
+0x62,0xf5,0x7f,0x08,0x2e,0x14,0x6d,0xc0,0xff,0xff,0xff
+
+# ATT: vucomxsh 254(%ecx), %xmm2
+# INTEL: vucomxsh xmm2, word ptr [ecx + 254]
+0x62,0xf5,0x7f,0x08,0x2e,0x51,0x7f
+
+# ATT: vucomxsh -256(%edx), %xmm2
+# INTEL: vucomxsh xmm2, word ptr [edx - 256]
+0x62,0xf5,0x7f,0x08,0x2e,0x52,0x80
+
+# ATT: vucomxss %xmm3, %xmm2
+# INTEL: vucomxss xmm2, xmm3
+0x62,0xf1,0x7f,0x08,0x2e,0xd3
+
+# ATT: vucomxss {sae}, %xmm3, %xmm2
+# INTEL: vucomxss xmm2, xmm3, {sae}
+0x62,0xf1,0x7f,0x18,0x2e,0xd3
+
+# ATT: vucomxss 268435456(%esp,%esi,8), %xmm2
+# INTEL: vucomxss xmm2, dword ptr [esp + 8*esi + 268435456]
+0x62,0xf1,0x7f,0x08,0x2e,0x94,0xf4,0x00,0x00,0x00,0x10
+
+# ATT: vucomxss 291(%edi,%eax,4), %xmm2
+# INTEL: vucomxss xmm2, dword ptr [edi + 4*eax + 291]
+0x62,0xf1,0x7f,0x08,0x2e,0x94,0x87,0x23,0x01,0x00,0x00
+
+# ATT: vucomxss (%eax), %xmm2
+# INTEL: vucomxss xmm2, dword ptr [eax]
+0x62,0xf1,0x7f,0x08,0x2e,0x10
+
+# ATT: vucomxss -128(,%ebp,2), %xmm2
+# INTEL: vucomxss xmm2, dword ptr [2*ebp - 128]
+0x62,0xf1,0x7f,0x08,0x2e,0x14,0x6d,0x80,0xff,0xff,0xff
+
+# ATT: vucomxss 508(%ecx), %xmm2
+# INTEL: vucomxss xmm2, dword ptr [ecx + 508]
+0x62,0xf1,0x7f,0x08,0x2e,0x51,0x7f
+
+# ATT: vucomxss -512(%edx), %xmm2
+# INTEL: vucomxss xmm2, dword ptr [edx - 512]
+0x62,0xf1,0x7f,0x08,0x2e,0x52,0x80
+
diff --git a/llvm/test/MC/Disassembler/X86/avx10.2-com-ef-64.txt b/llvm/test/MC/Disassembler/X86/avx10.2-com-ef-64.txt
new file mode 100644
index 000000000000..ea580fe8d508
--- /dev/null
+++ b/llvm/test/MC/Disassembler/X86/avx10.2-com-ef-64.txt
@@ -0,0 +1,195 @@
+# RUN: llvm-mc --disassemble %s -triple=x86_64 | FileCheck %s --check-prefixes=ATT
+# RUN: llvm-mc --disassemble %s -triple=x86_64 --output-asm-variant=1 | FileCheck %s --check-prefixes=INTEL
+
+# ATT: vcomxsd %xmm23, %xmm22
+# INTEL: vcomxsd xmm22, xmm23
+0x62,0xa1,0xfe,0x08,0x2f,0xf7
+
+# ATT: vcomxsd {sae}, %xmm23, %xmm22
+# INTEL: vcomxsd xmm22, xmm23, {sae}
+0x62,0xa1,0xfe,0x18,0x2f,0xf7
+
+# ATT: vcomxsd 268435456(%rbp,%r14,8), %xmm22
+# INTEL: vcomxsd xmm22, qword ptr [rbp + 8*r14 + 268435456]
+0x62,0xa1,0xfe,0x08,0x2f,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# ATT: vcomxsd 291(%r8,%rax,4), %xmm22
+# INTEL: vcomxsd xmm22, qword ptr [r8 + 4*rax + 291]
+0x62,0xc1,0xfe,0x08,0x2f,0xb4,0x80,0x23,0x01,0x00,0x00
+
+# ATT: vcomxsd (%rip), %xmm22
+# INTEL: vcomxsd xmm22, qword ptr [rip]
+0x62,0xe1,0xfe,0x08,0x2f,0x35,0x00,0x00,0x00,0x00
+
+# ATT: vcomxsd -256(,%rbp,2), %xmm22
+# INTEL: vcomxsd xmm22, qword ptr [2*rbp - 256]
+0x62,0xe1,0xfe,0x08,0x2f,0x34,0x6d,0x00,0xff,0xff,0xff
+
+# ATT: vcomxsd 1016(%rcx), %xmm22
+# INTEL: vcomxsd xmm22, qword ptr [rcx + 1016]
+0x62,0xe1,0xfe,0x08,0x2f,0x71,0x7f
+
+# ATT: vcomxsd -1024(%rdx), %xmm22
+# INTEL: vcomxsd xmm22, qword ptr [rdx - 1024]
+0x62,0xe1,0xfe,0x08,0x2f,0x72,0x80
+
+# ATT: vcomxsh %xmm23, %xmm22
+# INTEL: vcomxsh xmm22, xmm23
+0x62,0xa5,0x7f,0x08,0x2f,0xf7
+
+# ATT: vcomxsh {sae}, %xmm23, %xmm22
+# INTEL: vcomxsh xmm22, xmm23, {sae}
+0x62,0xa5,0x7f,0x18,0x2f,0xf7
+
+# ATT: vcomxsh 268435456(%rbp,%r14,8), %xmm22
+# INTEL: vcomxsh xmm22, word ptr [rbp + 8*r14 + 268435456]
+0x62,0xa5,0x7f,0x08,0x2f,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# ATT: vcomxsh 291(%r8,%rax,4), %xmm22
+# INTEL: vcomxsh xmm22, word ptr [r8 + 4*rax + 291]
+0x62,0xc5,0x7f,0x08,0x2f,0xb4,0x80,0x23,0x01,0x00,0x00
+
+# ATT: vcomxsh (%rip), %xmm22
+# INTEL: vcomxsh xmm22, word ptr [rip]
+0x62,0xe5,0x7f,0x08,0x2f,0x35,0x00,0x00,0x00,0x00
+
+# ATT: vcomxsh -64(,%rbp,2), %xmm22
+# INTEL: vcomxsh xmm22, word ptr [2*rbp - 64]
+0x62,0xe5,0x7f,0x08,0x2f,0x34,0x6d,0xc0,0xff,0xff,0xff
+
+# ATT: vcomxsh 254(%rcx), %xmm22
+# INTEL: vcomxsh xmm22, word ptr [rcx + 254]
+0x62,0xe5,0x7f,0x08,0x2f,0x71,0x7f
+
+# ATT: vcomxsh -256(%rdx), %xmm22
+# INTEL: vcomxsh xmm22, word ptr [rdx - 256]
+0x62,0xe5,0x7f,0x08,0x2f,0x72,0x80
+
+# ATT: vcomxss %xmm23, %xmm22
+# INTEL: vcomxss xmm22, xmm23
+0x62,0xa1,0x7f,0x08,0x2f,0xf7
+
+# ATT: vcomxss {sae}, %xmm23, %xmm22
+# INTEL: vcomxss xmm22, xmm23, {sae}
+0x62,0xa1,0x7f,0x18,0x2f,0xf7
+
+# ATT: vcomxss 268435456(%rbp,%r14,8), %xmm22
+# INTEL: vcomxss xmm22, dword ptr [rbp + 8*r14 + 268435456]
+0x62,0xa1,0x7f,0x08,0x2f,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# ATT: vcomxss 291(%r8,%rax,4), %xmm22
+# INTEL: vcomxss xmm22, dword ptr [r8 + 4*rax + 291]
+0x62,0xc1,0x7f,0x08,0x2f,0xb4,0x80,0x23,0x01,0x00,0x00
+
+# ATT: vcomxss (%rip), %xmm22
+# INTEL: vcomxss xmm22, dword ptr [rip]
+0x62,0xe1,0x7f,0x08,0x2f,0x35,0x00,0x00,0x00,0x00
+
+# ATT: vcomxss -128(,%rbp,2), %xmm22
+# INTEL: vcomxss xmm22, dword ptr [2*rbp - 128]
+0x62,0xe1,0x7f,0x08,0x2f,0x34,0x6d,0x80,0xff,0xff,0xff
+
+# ATT: vcomxss 508(%rcx), %xmm22
+# INTEL: vcomxss xmm22, dword ptr [rcx + 508]
+0x62,0xe1,0x7f,0x08,0x2f,0x71,0x7f
+
+# ATT: vcomxss -512(%rdx), %xmm22
+# INTEL: vcomxss xmm22, dword ptr [rdx - 512]
+0x62,0xe1,0x7f,0x08,0x2f,0x72,0x80
+
+# ATT: vucomxsd %xmm23, %xmm22
+# INTEL: vucomxsd xmm22, xmm23
+0x62,0xa1,0xfe,0x08,0x2e,0xf7
+
+# ATT: vucomxsd {sae}, %xmm23, %xmm22
+# INTEL: vucomxsd xmm22, xmm23, {sae}
+0x62,0xa1,0xfe,0x18,0x2e,0xf7
+
+# ATT: vucomxsd 268435456(%rbp,%r14,8), %xmm22
+# INTEL: vucomxsd xmm22, qword ptr [rbp + 8*r14 + 268435456]
+0x62,0xa1,0xfe,0x08,0x2e,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# ATT: vucomxsd 291(%r8,%rax,4), %xmm22
+# INTEL: vucomxsd xmm22, qword ptr [r8 + 4*rax + 291]
+0x62,0xc1,0xfe,0x08,0x2e,0xb4,0x80,0x23,0x01,0x00,0x00
+
+# ATT: vucomxsd (%rip), %xmm22
+# INTEL: vucomxsd xmm22, qword ptr [rip]
+0x62,0xe1,0xfe,0x08,0x2e,0x35,0x00,0x00,0x00,0x00
+
+# ATT: vucomxsd -256(,%rbp,2), %xmm22
+# INTEL: vucomxsd xmm22, qword ptr [2*rbp - 256]
+0x62,0xe1,0xfe,0x08,0x2e,0x34,0x6d,0x00,0xff,0xff,0xff
+
+# ATT: vucomxsd 1016(%rcx), %xmm22
+# INTEL: vucomxsd xmm22, qword ptr [rcx + 1016]
+0x62,0xe1,0xfe,0x08,0x2e,0x71,0x7f
+
+# ATT: vucomxsd -1024(%rdx), %xmm22
+# INTEL: vucomxsd xmm22, qword ptr [rdx - 1024]
+0x62,0xe1,0xfe,0x08,0x2e,0x72,0x80
+
+# ATT: vucomxsh %xmm23, %xmm22
+# INTEL: vucomxsh xmm22, xmm23
+0x62,0xa5,0x7f,0x08,0x2e,0xf7
+
+# ATT: vucomxsh {sae}, %xmm23, %xmm22
+# INTEL: vucomxsh xmm22, xmm23, {sae}
+0x62,0xa5,0x7f,0x18,0x2e,0xf7
+
+# ATT: vucomxsh 268435456(%rbp,%r14,8), %xmm22
+# INTEL: vucomxsh xmm22, word ptr [rbp + 8*r14 + 268435456]
+0x62,0xa5,0x7f,0x08,0x2e,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# ATT: vucomxsh 291(%r8,%rax,4), %xmm22
+# INTEL: vucomxsh xmm22, word ptr [r8 + 4*rax + 291]
+0x62,0xc5,0x7f,0x08,0x2e,0xb4,0x80,0x23,0x01,0x00,0x00
+
+# ATT: vucomxsh (%rip), %xmm22
+# INTEL: vucomxsh xmm22, word ptr [rip]
+0x62,0xe5,0x7f,0x08,0x2e,0x35,0x00,0x00,0x00,0x00
+
+# ATT: vucomxsh -64(,%rbp,2), %xmm22
+# INTEL: vucomxsh xmm22, word ptr [2*rbp - 64]
+0x62,0xe5,0x7f,0x08,0x2e,0x34,0x6d,0xc0,0xff,0xff,0xff
+
+# ATT: vucomxsh 254(%rcx), %xmm22
+# INTEL: vucomxsh xmm22, word ptr [rcx + 254]
+0x62,0xe5,0x7f,0x08,0x2e,0x71,0x7f
+
+# ATT: vucomxsh -256(%rdx), %xmm22
+# INTEL: vucomxsh xmm22, word ptr [rdx - 256]
+0x62,0xe5,0x7f,0x08,0x2e,0x72,0x80
+
+# ATT: vucomxss %xmm23, %xmm22
+# INTEL: vucomxss xmm22, xmm23
+0x62,0xa1,0x7f,0x08,0x2e,0xf7
+
+# ATT: vucomxss {sae}, %xmm23, %xmm22
+# INTEL: vucomxss xmm22, xmm23, {sae}
+0x62,0xa1,0x7f,0x18,0x2e,0xf7
+
+# ATT: vucomxss 268435456(%rbp,%r14,8), %xmm22
+# INTEL: vucomxss xmm22, dword ptr [rbp + 8*r14 + 268435456]
+0x62,0xa1,0x7f,0x08,0x2e,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# ATT: vucomxss 291(%r8,%rax,4), %xmm22
+# INTEL: vucomxss xmm22, dword ptr [r8 + 4*rax + 291]
+0x62,0xc1,0x7f,0x08,0x2e,0xb4,0x80,0x23,0x01,0x00,0x00
+
+# ATT: vucomxss (%rip), %xmm22
+# INTEL: vucomxss xmm22, dword ptr [rip]
+0x62,0xe1,0x7f,0x08,0x2e,0x35,0x00,0x00,0x00,0x00
+
+# ATT: vucomxss -128(,%rbp,2), %xmm22
+# INTEL: vucomxss xmm22, dword ptr [2*rbp - 128]
+0x62,0xe1,0x7f,0x08,0x2e,0x34,0x6d,0x80,0xff,0xff,0xff
+
+# ATT: vucomxss 508(%rcx), %xmm22
+# INTEL: vucomxss xmm22, dword ptr [rcx + 508]
+0x62,0xe1,0x7f,0x08,0x2e,0x71,0x7f
+
+# ATT: vucomxss -512(%rdx), %xmm22
+# INTEL: vucomxss xmm22, dword ptr [rdx - 512]
+0x62,0xe1,0x7f,0x08,0x2e,0x72,0x80
+
diff --git a/llvm/test/MC/Disassembler/X86/avx10.2-copy-32.txt b/llvm/test/MC/Disassembler/X86/avx10.2-copy-32.txt
new file mode 100644
index 000000000000..e86c2340a486
--- /dev/null
+++ b/llvm/test/MC/Disassembler/X86/avx10.2-copy-32.txt
@@ -0,0 +1,34 @@
+# RUN: llvm-mc --disassemble %s -triple=i386 | FileCheck %s --check-prefixes=ATT
+# RUN: llvm-mc --disassemble %s -triple=i386 -x86-asm-syntax=intel --output-asm-variant=1 | FileCheck %s --check-prefixes=INTEL
+
+# ATT: vmovd (%ecx), %xmm5
+# INTEL: vmovd xmm5, dword ptr [ecx]
+0x62 0xf1 0x7e 0x08 0x7e 0x29
+
+# ATT: vmovd %xmm5, (%ecx)
+# INTEL: vmovd dword ptr [ecx], xmm5
+0x62 0xf1 0x7d 0x08 0xd6 0x29
+
+# ATT: vmovd %xmm2, %xmm1
+# INTEL: vmovd xmm1, xmm2
+0x62 0xf1 0x7e 0x08 0x7e 0xca
+
+# ATT: vmovd %xmm2, %xmm1
+# INTEL: vmovd xmm1, xmm2
+0x62 0xf1 0x7d 0x08 0xd6 0xca
+
+# ATT: vmovw %xmm5, (%ecx)
+# INTEL: vmovw dword ptr [ecx], xmm5
+0x62 0xf5 0x7e 0x08 0x7e 0x29
+
+# ATT: vmovw (%ecx), %xmm5
+# INTEL: vmovw xmm5, word ptr [ecx]
+0x62 0xf5 0x7e 0x08 0x6e 0x29
+
+# ATT: vmovw %xmm2, %xmm1
+# INTEL: vmovw xmm1, xmm2
+0x62 0xf5 0x7e 0x08 0x6e 0xca
+
+# ATT: vmovw %xmm2, %xmm1
+# INTEL: vmovw xmm1, xmm2
+0x62 0xf5 0x7e 0x08 0x7e 0xca
diff --git a/llvm/test/MC/Disassembler/X86/avx10.2-copy-64.txt b/llvm/test/MC/Disassembler/X86/avx10.2-copy-64.txt
new file mode 100644
index 000000000000..36ddd75a77ad
--- /dev/null
+++ b/llvm/test/MC/Disassembler/X86/avx10.2-copy-64.txt
@@ -0,0 +1,34 @@
+# RUN: llvm-mc --disassemble %s -triple=x86_64 | FileCheck %s --check-prefixes=ATT
+# RUN: llvm-mc --disassemble %s -triple=x86_64 -x86-asm-syntax=intel --output-asm-variant=1 | FileCheck %s --check-prefixes=INTEL
+
+# ATT: vmovd (%rcx), %xmm29
+# INTEL: vmovd xmm29, dword ptr [rcx]
+0x62 0x61 0x7e 0x08 0x7e 0x29
+
+# ATT: vmovd %xmm29, (%rcx)
+# INTEL: vmovd dword ptr [rcx], xmm29
+0x62 0x61 0x7d 0x08 0xd6 0x29
+
+# ATT: vmovd %xmm22, %xmm21
+# INTEL: vmovd xmm21, xmm22
+0x62 0xa1 0x7e 0x08 0x7e 0xee
+
+# ATT: vmovd %xmm22, %xmm21
+# INTEL: vmovd xmm21, xmm22
+0x62 0xa1 0x7d 0x08 0xd6 0xee
+
+# ATT: vmovw %xmm29, (%rcx)
+# INTEL: vmovw dword ptr [rcx], xmm29
+0x62 0x65 0x7e 0x08 0x7e 0x29
+
+# ATT: vmovw (%rcx), %xmm29
+# INTEL: vmovw xmm29, word ptr [rcx]
+0x62 0x65 0x7e 0x08 0x6e 0x29
+
+# ATT: vmovw %xmm22, %xmm21
+# INTEL: vmovw xmm21, xmm22
+0x62 0xa5 0x7e 0x08 0x6e 0xee
+
+# ATT: vmovw %xmm22, %xmm21
+# INTEL: vmovw xmm21, xmm22
+0x62 0xa5 0x7e 0x08 0x7e 0xee
diff --git a/llvm/test/MC/WebAssembly/annotations.s b/llvm/test/MC/WebAssembly/annotations.s
index b1f97daccccd..3e727591afa9 100644
--- a/llvm/test/MC/WebAssembly/annotations.s
+++ b/llvm/test/MC/WebAssembly/annotations.s
@@ -33,6 +33,22 @@ test_annotation:
rethrow 0
end_try
end_try
+
+ block exnref
+ block
+ block () -> (i32, exnref)
+ block i32
+ try_table (catch __cpp_exception 0) (catch_ref __c_longjmp 1) (catch_all 2) (catch_all_ref 3)
+ end_try_table
+ return
+ end_block
+ return
+ end_block
+ return
+ end_block
+ return
+ end_block
+ drop
end_function
@@ -61,5 +77,24 @@ test_annotation:
# CHECK-NEXT: rethrow 0 # to caller
# CHECK-NEXT: end_try # label3:
# CHECK-NEXT: end_try # label0:
+
+# CHECK: block exnref
+# CHECK-NEXT: block
+# CHECK-NEXT: block () -> (i32, exnref)
+# CHECK-NEXT: block i32
+# CHECK-NEXT: try_table (catch __cpp_exception 0) (catch_ref __c_longjmp 1) (catch_all 2) (catch_all_ref 3) # 0: down to label10
+# CHECK-NEXT: # 1: down to label9
+# CHECK-NEXT: # 2: down to label8
+# CHECK-NEXT: # 3: down to label7
+# CHECK-NEXT: end_try_table # label11:
+# CHECK-NEXT: return
+# CHECK-NEXT: end_block # label10:
+# CHECK-NEXT: return
+# CHECK-NEXT: end_block # label9:
+# CHECK-NEXT: return
+# CHECK-NEXT: end_block # label8:
+# CHECK-NEXT: return
+# CHECK-NEXT: end_block # label7:
+# CHECK-NEXT: drop
# CHECK-NEXT: end_function
diff --git a/llvm/test/MC/WebAssembly/eh-assembly.s b/llvm/test/MC/WebAssembly/eh-assembly.s
index a769bc447d0b..b4d6b324d96e 100644
--- a/llvm/test/MC/WebAssembly/eh-assembly.s
+++ b/llvm/test/MC/WebAssembly/eh-assembly.s
@@ -1,4 +1,6 @@
# RUN: llvm-mc -triple=wasm32-unknown-unknown -mattr=+exception-handling --no-type-check < %s | FileCheck %s
+# Check that it converts to .o without errors, but don't check any output:
+# RUN: llvm-mc -triple=wasm32-unknown-unknown -filetype=obj -mattr=+exception-handling --no-type-check -o %t.o < %s
.tagtype __cpp_exception i32
.tagtype __c_longjmp i32
diff --git a/llvm/test/MC/X86/avx10.2-com-ef-32-att.s b/llvm/test/MC/X86/avx10.2-com-ef-32-att.s
new file mode 100644
index 000000000000..8883bb3d6775
--- /dev/null
+++ b/llvm/test/MC/X86/avx10.2-com-ef-32-att.s
@@ -0,0 +1,194 @@
+// RUN: llvm-mc -triple i386 --show-encoding %s | FileCheck %s
+
+// CHECK: vcomxsd %xmm3, %xmm2
+// CHECK: encoding: [0x62,0xf1,0xfe,0x08,0x2f,0xd3]
+ vcomxsd %xmm3, %xmm2
+
+// CHECK: vcomxsd {sae}, %xmm3, %xmm2
+// CHECK: encoding: [0x62,0xf1,0xfe,0x18,0x2f,0xd3]
+ vcomxsd {sae}, %xmm3, %xmm2
+
+// CHECK: vcomxsd 268435456(%esp,%esi,8), %xmm2
+// CHECK: encoding: [0x62,0xf1,0xfe,0x08,0x2f,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vcomxsd 268435456(%esp,%esi,8), %xmm2
+
+// CHECK: vcomxsd 291(%edi,%eax,4), %xmm2
+// CHECK: encoding: [0x62,0xf1,0xfe,0x08,0x2f,0x94,0x87,0x23,0x01,0x00,0x00]
+ vcomxsd 291(%edi,%eax,4), %xmm2
+
+// CHECK: vcomxsd (%eax), %xmm2
+// CHECK: encoding: [0x62,0xf1,0xfe,0x08,0x2f,0x10]
+ vcomxsd (%eax), %xmm2
+
+// CHECK: vcomxsd -256(,%ebp,2), %xmm2
+// CHECK: encoding: [0x62,0xf1,0xfe,0x08,0x2f,0x14,0x6d,0x00,0xff,0xff,0xff]
+ vcomxsd -256(,%ebp,2), %xmm2
+
+// CHECK: vcomxsd 1016(%ecx), %xmm2
+// CHECK: encoding: [0x62,0xf1,0xfe,0x08,0x2f,0x51,0x7f]
+ vcomxsd 1016(%ecx), %xmm2
+
+// CHECK: vcomxsd -1024(%edx), %xmm2
+// CHECK: encoding: [0x62,0xf1,0xfe,0x08,0x2f,0x52,0x80]
+ vcomxsd -1024(%edx), %xmm2
+
+// CHECK: vcomxsh %xmm3, %xmm2
+// CHECK: encoding: [0x62,0xf5,0x7f,0x08,0x2f,0xd3]
+ vcomxsh %xmm3, %xmm2
+
+// CHECK: vcomxsh {sae}, %xmm3, %xmm2
+// CHECK: encoding: [0x62,0xf5,0x7f,0x18,0x2f,0xd3]
+ vcomxsh {sae}, %xmm3, %xmm2
+
+// CHECK: vcomxsh 268435456(%esp,%esi,8), %xmm2
+// CHECK: encoding: [0x62,0xf5,0x7f,0x08,0x2f,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vcomxsh 268435456(%esp,%esi,8), %xmm2
+
+// CHECK: vcomxsh 291(%edi,%eax,4), %xmm2
+// CHECK: encoding: [0x62,0xf5,0x7f,0x08,0x2f,0x94,0x87,0x23,0x01,0x00,0x00]
+ vcomxsh 291(%edi,%eax,4), %xmm2
+
+// CHECK: vcomxsh (%eax), %xmm2
+// CHECK: encoding: [0x62,0xf5,0x7f,0x08,0x2f,0x10]
+ vcomxsh (%eax), %xmm2
+
+// CHECK: vcomxsh -64(,%ebp,2), %xmm2
+// CHECK: encoding: [0x62,0xf5,0x7f,0x08,0x2f,0x14,0x6d,0xc0,0xff,0xff,0xff]
+ vcomxsh -64(,%ebp,2), %xmm2
+
+// CHECK: vcomxsh 254(%ecx), %xmm2
+// CHECK: encoding: [0x62,0xf5,0x7f,0x08,0x2f,0x51,0x7f]
+ vcomxsh 254(%ecx), %xmm2
+
+// CHECK: vcomxsh -256(%edx), %xmm2
+// CHECK: encoding: [0x62,0xf5,0x7f,0x08,0x2f,0x52,0x80]
+ vcomxsh -256(%edx), %xmm2
+
+// CHECK: vcomxss %xmm3, %xmm2
+// CHECK: encoding: [0x62,0xf1,0x7f,0x08,0x2f,0xd3]
+ vcomxss %xmm3, %xmm2
+
+// CHECK: vcomxss {sae}, %xmm3, %xmm2
+// CHECK: encoding: [0x62,0xf1,0x7f,0x18,0x2f,0xd3]
+ vcomxss {sae}, %xmm3, %xmm2
+
+// CHECK: vcomxss 268435456(%esp,%esi,8), %xmm2
+// CHECK: encoding: [0x62,0xf1,0x7f,0x08,0x2f,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vcomxss 268435456(%esp,%esi,8), %xmm2
+
+// CHECK: vcomxss 291(%edi,%eax,4), %xmm2
+// CHECK: encoding: [0x62,0xf1,0x7f,0x08,0x2f,0x94,0x87,0x23,0x01,0x00,0x00]
+ vcomxss 291(%edi,%eax,4), %xmm2
+
+// CHECK: vcomxss (%eax), %xmm2
+// CHECK: encoding: [0x62,0xf1,0x7f,0x08,0x2f,0x10]
+ vcomxss (%eax), %xmm2
+
+// CHECK: vcomxss -128(,%ebp,2), %xmm2
+// CHECK: encoding: [0x62,0xf1,0x7f,0x08,0x2f,0x14,0x6d,0x80,0xff,0xff,0xff]
+ vcomxss -128(,%ebp,2), %xmm2
+
+// CHECK: vcomxss 508(%ecx), %xmm2
+// CHECK: encoding: [0x62,0xf1,0x7f,0x08,0x2f,0x51,0x7f]
+ vcomxss 508(%ecx), %xmm2
+
+// CHECK: vcomxss -512(%edx), %xmm2
+// CHECK: encoding: [0x62,0xf1,0x7f,0x08,0x2f,0x52,0x80]
+ vcomxss -512(%edx), %xmm2
+
+// CHECK: vucomxsd %xmm3, %xmm2
+// CHECK: encoding: [0x62,0xf1,0xfe,0x08,0x2e,0xd3]
+ vucomxsd %xmm3, %xmm2
+
+// CHECK: vucomxsd {sae}, %xmm3, %xmm2
+// CHECK: encoding: [0x62,0xf1,0xfe,0x18,0x2e,0xd3]
+ vucomxsd {sae}, %xmm3, %xmm2
+
+// CHECK: vucomxsd 268435456(%esp,%esi,8), %xmm2
+// CHECK: encoding: [0x62,0xf1,0xfe,0x08,0x2e,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vucomxsd 268435456(%esp,%esi,8), %xmm2
+
+// CHECK: vucomxsd 291(%edi,%eax,4), %xmm2
+// CHECK: encoding: [0x62,0xf1,0xfe,0x08,0x2e,0x94,0x87,0x23,0x01,0x00,0x00]
+ vucomxsd 291(%edi,%eax,4), %xmm2
+
+// CHECK: vucomxsd (%eax), %xmm2
+// CHECK: encoding: [0x62,0xf1,0xfe,0x08,0x2e,0x10]
+ vucomxsd (%eax), %xmm2
+
+// CHECK: vucomxsd -256(,%ebp,2), %xmm2
+// CHECK: encoding: [0x62,0xf1,0xfe,0x08,0x2e,0x14,0x6d,0x00,0xff,0xff,0xff]
+ vucomxsd -256(,%ebp,2), %xmm2
+
+// CHECK: vucomxsd 1016(%ecx), %xmm2
+// CHECK: encoding: [0x62,0xf1,0xfe,0x08,0x2e,0x51,0x7f]
+ vucomxsd 1016(%ecx), %xmm2
+
+// CHECK: vucomxsd -1024(%edx), %xmm2
+// CHECK: encoding: [0x62,0xf1,0xfe,0x08,0x2e,0x52,0x80]
+ vucomxsd -1024(%edx), %xmm2
+
+// CHECK: vucomxsh %xmm3, %xmm2
+// CHECK: encoding: [0x62,0xf5,0x7f,0x08,0x2e,0xd3]
+ vucomxsh %xmm3, %xmm2
+
+// CHECK: vucomxsh {sae}, %xmm3, %xmm2
+// CHECK: encoding: [0x62,0xf5,0x7f,0x18,0x2e,0xd3]
+ vucomxsh {sae}, %xmm3, %xmm2
+
+// CHECK: vucomxsh 268435456(%esp,%esi,8), %xmm2
+// CHECK: encoding: [0x62,0xf5,0x7f,0x08,0x2e,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vucomxsh 268435456(%esp,%esi,8), %xmm2
+
+// CHECK: vucomxsh 291(%edi,%eax,4), %xmm2
+// CHECK: encoding: [0x62,0xf5,0x7f,0x08,0x2e,0x94,0x87,0x23,0x01,0x00,0x00]
+ vucomxsh 291(%edi,%eax,4), %xmm2
+
+// CHECK: vucomxsh (%eax), %xmm2
+// CHECK: encoding: [0x62,0xf5,0x7f,0x08,0x2e,0x10]
+ vucomxsh (%eax), %xmm2
+
+// CHECK: vucomxsh -64(,%ebp,2), %xmm2
+// CHECK: encoding: [0x62,0xf5,0x7f,0x08,0x2e,0x14,0x6d,0xc0,0xff,0xff,0xff]
+ vucomxsh -64(,%ebp,2), %xmm2
+
+// CHECK: vucomxsh 254(%ecx), %xmm2
+// CHECK: encoding: [0x62,0xf5,0x7f,0x08,0x2e,0x51,0x7f]
+ vucomxsh 254(%ecx), %xmm2
+
+// CHECK: vucomxsh -256(%edx), %xmm2
+// CHECK: encoding: [0x62,0xf5,0x7f,0x08,0x2e,0x52,0x80]
+ vucomxsh -256(%edx), %xmm2
+
+// CHECK: vucomxss %xmm3, %xmm2
+// CHECK: encoding: [0x62,0xf1,0x7f,0x08,0x2e,0xd3]
+ vucomxss %xmm3, %xmm2
+
+// CHECK: vucomxss {sae}, %xmm3, %xmm2
+// CHECK: encoding: [0x62,0xf1,0x7f,0x18,0x2e,0xd3]
+ vucomxss {sae}, %xmm3, %xmm2
+
+// CHECK: vucomxss 268435456(%esp,%esi,8), %xmm2
+// CHECK: encoding: [0x62,0xf1,0x7f,0x08,0x2e,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vucomxss 268435456(%esp,%esi,8), %xmm2
+
+// CHECK: vucomxss 291(%edi,%eax,4), %xmm2
+// CHECK: encoding: [0x62,0xf1,0x7f,0x08,0x2e,0x94,0x87,0x23,0x01,0x00,0x00]
+ vucomxss 291(%edi,%eax,4), %xmm2
+
+// CHECK: vucomxss (%eax), %xmm2
+// CHECK: encoding: [0x62,0xf1,0x7f,0x08,0x2e,0x10]
+ vucomxss (%eax), %xmm2
+
+// CHECK: vucomxss -128(,%ebp,2), %xmm2
+// CHECK: encoding: [0x62,0xf1,0x7f,0x08,0x2e,0x14,0x6d,0x80,0xff,0xff,0xff]
+ vucomxss -128(,%ebp,2), %xmm2
+
+// CHECK: vucomxss 508(%ecx), %xmm2
+// CHECK: encoding: [0x62,0xf1,0x7f,0x08,0x2e,0x51,0x7f]
+ vucomxss 508(%ecx), %xmm2
+
+// CHECK: vucomxss -512(%edx), %xmm2
+// CHECK: encoding: [0x62,0xf1,0x7f,0x08,0x2e,0x52,0x80]
+ vucomxss -512(%edx), %xmm2
+
diff --git a/llvm/test/MC/X86/avx10.2-com-ef-32-intel.s b/llvm/test/MC/X86/avx10.2-com-ef-32-intel.s
new file mode 100644
index 000000000000..9ff0484db133
--- /dev/null
+++ b/llvm/test/MC/X86/avx10.2-com-ef-32-intel.s
@@ -0,0 +1,194 @@
+// RUN: llvm-mc -triple i386 -x86-asm-syntax=intel -output-asm-variant=1 --show-encoding %s | FileCheck %s
+
+// CHECK: vcomxsd xmm2, xmm3
+// CHECK: encoding: [0x62,0xf1,0xfe,0x08,0x2f,0xd3]
+ vcomxsd xmm2, xmm3
+
+// CHECK: vcomxsd xmm2, xmm3, {sae}
+// CHECK: encoding: [0x62,0xf1,0xfe,0x18,0x2f,0xd3]
+ vcomxsd xmm2, xmm3, {sae}
+
+// CHECK: vcomxsd xmm2, qword ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0x62,0xf1,0xfe,0x08,0x2f,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vcomxsd xmm2, qword ptr [esp + 8*esi + 268435456]
+
+// CHECK: vcomxsd xmm2, qword ptr [edi + 4*eax + 291]
+// CHECK: encoding: [0x62,0xf1,0xfe,0x08,0x2f,0x94,0x87,0x23,0x01,0x00,0x00]
+ vcomxsd xmm2, qword ptr [edi + 4*eax + 291]
+
+// CHECK: vcomxsd xmm2, qword ptr [eax]
+// CHECK: encoding: [0x62,0xf1,0xfe,0x08,0x2f,0x10]
+ vcomxsd xmm2, qword ptr [eax]
+
+// CHECK: vcomxsd xmm2, qword ptr [2*ebp - 256]
+// CHECK: encoding: [0x62,0xf1,0xfe,0x08,0x2f,0x14,0x6d,0x00,0xff,0xff,0xff]
+ vcomxsd xmm2, qword ptr [2*ebp - 256]
+
+// CHECK: vcomxsd xmm2, qword ptr [ecx + 1016]
+// CHECK: encoding: [0x62,0xf1,0xfe,0x08,0x2f,0x51,0x7f]
+ vcomxsd xmm2, qword ptr [ecx + 1016]
+
+// CHECK: vcomxsd xmm2, qword ptr [edx - 1024]
+// CHECK: encoding: [0x62,0xf1,0xfe,0x08,0x2f,0x52,0x80]
+ vcomxsd xmm2, qword ptr [edx - 1024]
+
+// CHECK: vcomxsh xmm2, xmm3
+// CHECK: encoding: [0x62,0xf5,0x7f,0x08,0x2f,0xd3]
+ vcomxsh xmm2, xmm3
+
+// CHECK: vcomxsh xmm2, xmm3, {sae}
+// CHECK: encoding: [0x62,0xf5,0x7f,0x18,0x2f,0xd3]
+ vcomxsh xmm2, xmm3, {sae}
+
+// CHECK: vcomxsh xmm2, word ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0x62,0xf5,0x7f,0x08,0x2f,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vcomxsh xmm2, word ptr [esp + 8*esi + 268435456]
+
+// CHECK: vcomxsh xmm2, word ptr [edi + 4*eax + 291]
+// CHECK: encoding: [0x62,0xf5,0x7f,0x08,0x2f,0x94,0x87,0x23,0x01,0x00,0x00]
+ vcomxsh xmm2, word ptr [edi + 4*eax + 291]
+
+// CHECK: vcomxsh xmm2, word ptr [eax]
+// CHECK: encoding: [0x62,0xf5,0x7f,0x08,0x2f,0x10]
+ vcomxsh xmm2, word ptr [eax]
+
+// CHECK: vcomxsh xmm2, word ptr [2*ebp - 64]
+// CHECK: encoding: [0x62,0xf5,0x7f,0x08,0x2f,0x14,0x6d,0xc0,0xff,0xff,0xff]
+ vcomxsh xmm2, word ptr [2*ebp - 64]
+
+// CHECK: vcomxsh xmm2, word ptr [ecx + 254]
+// CHECK: encoding: [0x62,0xf5,0x7f,0x08,0x2f,0x51,0x7f]
+ vcomxsh xmm2, word ptr [ecx + 254]
+
+// CHECK: vcomxsh xmm2, word ptr [edx - 256]
+// CHECK: encoding: [0x62,0xf5,0x7f,0x08,0x2f,0x52,0x80]
+ vcomxsh xmm2, word ptr [edx - 256]
+
+// CHECK: vcomxss xmm2, xmm3
+// CHECK: encoding: [0x62,0xf1,0x7f,0x08,0x2f,0xd3]
+ vcomxss xmm2, xmm3
+
+// CHECK: vcomxss xmm2, xmm3, {sae}
+// CHECK: encoding: [0x62,0xf1,0x7f,0x18,0x2f,0xd3]
+ vcomxss xmm2, xmm3, {sae}
+
+// CHECK: vcomxss xmm2, dword ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0x62,0xf1,0x7f,0x08,0x2f,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vcomxss xmm2, dword ptr [esp + 8*esi + 268435456]
+
+// CHECK: vcomxss xmm2, dword ptr [edi + 4*eax + 291]
+// CHECK: encoding: [0x62,0xf1,0x7f,0x08,0x2f,0x94,0x87,0x23,0x01,0x00,0x00]
+ vcomxss xmm2, dword ptr [edi + 4*eax + 291]
+
+// CHECK: vcomxss xmm2, dword ptr [eax]
+// CHECK: encoding: [0x62,0xf1,0x7f,0x08,0x2f,0x10]
+ vcomxss xmm2, dword ptr [eax]
+
+// CHECK: vcomxss xmm2, dword ptr [2*ebp - 128]
+// CHECK: encoding: [0x62,0xf1,0x7f,0x08,0x2f,0x14,0x6d,0x80,0xff,0xff,0xff]
+ vcomxss xmm2, dword ptr [2*ebp - 128]
+
+// CHECK: vcomxss xmm2, dword ptr [ecx + 508]
+// CHECK: encoding: [0x62,0xf1,0x7f,0x08,0x2f,0x51,0x7f]
+ vcomxss xmm2, dword ptr [ecx + 508]
+
+// CHECK: vcomxss xmm2, dword ptr [edx - 512]
+// CHECK: encoding: [0x62,0xf1,0x7f,0x08,0x2f,0x52,0x80]
+ vcomxss xmm2, dword ptr [edx - 512]
+
+// CHECK: vucomxsd xmm2, xmm3
+// CHECK: encoding: [0x62,0xf1,0xfe,0x08,0x2e,0xd3]
+ vucomxsd xmm2, xmm3
+
+// CHECK: vucomxsd xmm2, xmm3, {sae}
+// CHECK: encoding: [0x62,0xf1,0xfe,0x18,0x2e,0xd3]
+ vucomxsd xmm2, xmm3, {sae}
+
+// CHECK: vucomxsd xmm2, qword ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0x62,0xf1,0xfe,0x08,0x2e,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vucomxsd xmm2, qword ptr [esp + 8*esi + 268435456]
+
+// CHECK: vucomxsd xmm2, qword ptr [edi + 4*eax + 291]
+// CHECK: encoding: [0x62,0xf1,0xfe,0x08,0x2e,0x94,0x87,0x23,0x01,0x00,0x00]
+ vucomxsd xmm2, qword ptr [edi + 4*eax + 291]
+
+// CHECK: vucomxsd xmm2, qword ptr [eax]
+// CHECK: encoding: [0x62,0xf1,0xfe,0x08,0x2e,0x10]
+ vucomxsd xmm2, qword ptr [eax]
+
+// CHECK: vucomxsd xmm2, qword ptr [2*ebp - 256]
+// CHECK: encoding: [0x62,0xf1,0xfe,0x08,0x2e,0x14,0x6d,0x00,0xff,0xff,0xff]
+ vucomxsd xmm2, qword ptr [2*ebp - 256]
+
+// CHECK: vucomxsd xmm2, qword ptr [ecx + 1016]
+// CHECK: encoding: [0x62,0xf1,0xfe,0x08,0x2e,0x51,0x7f]
+ vucomxsd xmm2, qword ptr [ecx + 1016]
+
+// CHECK: vucomxsd xmm2, qword ptr [edx - 1024]
+// CHECK: encoding: [0x62,0xf1,0xfe,0x08,0x2e,0x52,0x80]
+ vucomxsd xmm2, qword ptr [edx - 1024]
+
+// CHECK: vucomxsh xmm2, xmm3
+// CHECK: encoding: [0x62,0xf5,0x7f,0x08,0x2e,0xd3]
+ vucomxsh xmm2, xmm3
+
+// CHECK: vucomxsh xmm2, xmm3, {sae}
+// CHECK: encoding: [0x62,0xf5,0x7f,0x18,0x2e,0xd3]
+ vucomxsh xmm2, xmm3, {sae}
+
+// CHECK: vucomxsh xmm2, word ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0x62,0xf5,0x7f,0x08,0x2e,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vucomxsh xmm2, word ptr [esp + 8*esi + 268435456]
+
+// CHECK: vucomxsh xmm2, word ptr [edi + 4*eax + 291]
+// CHECK: encoding: [0x62,0xf5,0x7f,0x08,0x2e,0x94,0x87,0x23,0x01,0x00,0x00]
+ vucomxsh xmm2, word ptr [edi + 4*eax + 291]
+
+// CHECK: vucomxsh xmm2, word ptr [eax]
+// CHECK: encoding: [0x62,0xf5,0x7f,0x08,0x2e,0x10]
+ vucomxsh xmm2, word ptr [eax]
+
+// CHECK: vucomxsh xmm2, word ptr [2*ebp - 64]
+// CHECK: encoding: [0x62,0xf5,0x7f,0x08,0x2e,0x14,0x6d,0xc0,0xff,0xff,0xff]
+ vucomxsh xmm2, word ptr [2*ebp - 64]
+
+// CHECK: vucomxsh xmm2, word ptr [ecx + 254]
+// CHECK: encoding: [0x62,0xf5,0x7f,0x08,0x2e,0x51,0x7f]
+ vucomxsh xmm2, word ptr [ecx + 254]
+
+// CHECK: vucomxsh xmm2, word ptr [edx - 256]
+// CHECK: encoding: [0x62,0xf5,0x7f,0x08,0x2e,0x52,0x80]
+ vucomxsh xmm2, word ptr [edx - 256]
+
+// CHECK: vucomxss xmm2, xmm3
+// CHECK: encoding: [0x62,0xf1,0x7f,0x08,0x2e,0xd3]
+ vucomxss xmm2, xmm3
+
+// CHECK: vucomxss xmm2, xmm3, {sae}
+// CHECK: encoding: [0x62,0xf1,0x7f,0x18,0x2e,0xd3]
+ vucomxss xmm2, xmm3, {sae}
+
+// CHECK: vucomxss xmm2, dword ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0x62,0xf1,0x7f,0x08,0x2e,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vucomxss xmm2, dword ptr [esp + 8*esi + 268435456]
+
+// CHECK: vucomxss xmm2, dword ptr [edi + 4*eax + 291]
+// CHECK: encoding: [0x62,0xf1,0x7f,0x08,0x2e,0x94,0x87,0x23,0x01,0x00,0x00]
+ vucomxss xmm2, dword ptr [edi + 4*eax + 291]
+
+// CHECK: vucomxss xmm2, dword ptr [eax]
+// CHECK: encoding: [0x62,0xf1,0x7f,0x08,0x2e,0x10]
+ vucomxss xmm2, dword ptr [eax]
+
+// CHECK: vucomxss xmm2, dword ptr [2*ebp - 128]
+// CHECK: encoding: [0x62,0xf1,0x7f,0x08,0x2e,0x14,0x6d,0x80,0xff,0xff,0xff]
+ vucomxss xmm2, dword ptr [2*ebp - 128]
+
+// CHECK: vucomxss xmm2, dword ptr [ecx + 508]
+// CHECK: encoding: [0x62,0xf1,0x7f,0x08,0x2e,0x51,0x7f]
+ vucomxss xmm2, dword ptr [ecx + 508]
+
+// CHECK: vucomxss xmm2, dword ptr [edx - 512]
+// CHECK: encoding: [0x62,0xf1,0x7f,0x08,0x2e,0x52,0x80]
+ vucomxss xmm2, dword ptr [edx - 512]
+
diff --git a/llvm/test/MC/X86/avx10.2-com-ef-64-att.s b/llvm/test/MC/X86/avx10.2-com-ef-64-att.s
new file mode 100644
index 000000000000..2f3690537334
--- /dev/null
+++ b/llvm/test/MC/X86/avx10.2-com-ef-64-att.s
@@ -0,0 +1,194 @@
+// RUN: llvm-mc -triple x86_64 --show-encoding %s | FileCheck %s
+
+// CHECK: vcomxsd %xmm23, %xmm22
+// CHECK: encoding: [0x62,0xa1,0xfe,0x08,0x2f,0xf7]
+ vcomxsd %xmm23, %xmm22
+
+// CHECK: vcomxsd {sae}, %xmm23, %xmm22
+// CHECK: encoding: [0x62,0xa1,0xfe,0x18,0x2f,0xf7]
+ vcomxsd {sae}, %xmm23, %xmm22
+
+// CHECK: vcomxsd 268435456(%rbp,%r14,8), %xmm22
+// CHECK: encoding: [0x62,0xa1,0xfe,0x08,0x2f,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vcomxsd 268435456(%rbp,%r14,8), %xmm22
+
+// CHECK: vcomxsd 291(%r8,%rax,4), %xmm22
+// CHECK: encoding: [0x62,0xc1,0xfe,0x08,0x2f,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vcomxsd 291(%r8,%rax,4), %xmm22
+
+// CHECK: vcomxsd (%rip), %xmm22
+// CHECK: encoding: [0x62,0xe1,0xfe,0x08,0x2f,0x35,0x00,0x00,0x00,0x00]
+ vcomxsd (%rip), %xmm22
+
+// CHECK: vcomxsd -256(,%rbp,2), %xmm22
+// CHECK: encoding: [0x62,0xe1,0xfe,0x08,0x2f,0x34,0x6d,0x00,0xff,0xff,0xff]
+ vcomxsd -256(,%rbp,2), %xmm22
+
+// CHECK: vcomxsd 1016(%rcx), %xmm22
+// CHECK: encoding: [0x62,0xe1,0xfe,0x08,0x2f,0x71,0x7f]
+ vcomxsd 1016(%rcx), %xmm22
+
+// CHECK: vcomxsd -1024(%rdx), %xmm22
+// CHECK: encoding: [0x62,0xe1,0xfe,0x08,0x2f,0x72,0x80]
+ vcomxsd -1024(%rdx), %xmm22
+
+// CHECK: vcomxsh %xmm23, %xmm22
+// CHECK: encoding: [0x62,0xa5,0x7f,0x08,0x2f,0xf7]
+ vcomxsh %xmm23, %xmm22
+
+// CHECK: vcomxsh {sae}, %xmm23, %xmm22
+// CHECK: encoding: [0x62,0xa5,0x7f,0x18,0x2f,0xf7]
+ vcomxsh {sae}, %xmm23, %xmm22
+
+// CHECK: vcomxsh 268435456(%rbp,%r14,8), %xmm22
+// CHECK: encoding: [0x62,0xa5,0x7f,0x08,0x2f,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vcomxsh 268435456(%rbp,%r14,8), %xmm22
+
+// CHECK: vcomxsh 291(%r8,%rax,4), %xmm22
+// CHECK: encoding: [0x62,0xc5,0x7f,0x08,0x2f,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vcomxsh 291(%r8,%rax,4), %xmm22
+
+// CHECK: vcomxsh (%rip), %xmm22
+// CHECK: encoding: [0x62,0xe5,0x7f,0x08,0x2f,0x35,0x00,0x00,0x00,0x00]
+ vcomxsh (%rip), %xmm22
+
+// CHECK: vcomxsh -64(,%rbp,2), %xmm22
+// CHECK: encoding: [0x62,0xe5,0x7f,0x08,0x2f,0x34,0x6d,0xc0,0xff,0xff,0xff]
+ vcomxsh -64(,%rbp,2), %xmm22
+
+// CHECK: vcomxsh 254(%rcx), %xmm22
+// CHECK: encoding: [0x62,0xe5,0x7f,0x08,0x2f,0x71,0x7f]
+ vcomxsh 254(%rcx), %xmm22
+
+// CHECK: vcomxsh -256(%rdx), %xmm22
+// CHECK: encoding: [0x62,0xe5,0x7f,0x08,0x2f,0x72,0x80]
+ vcomxsh -256(%rdx), %xmm22
+
+// CHECK: vcomxss %xmm23, %xmm22
+// CHECK: encoding: [0x62,0xa1,0x7f,0x08,0x2f,0xf7]
+ vcomxss %xmm23, %xmm22
+
+// CHECK: vcomxss {sae}, %xmm23, %xmm22
+// CHECK: encoding: [0x62,0xa1,0x7f,0x18,0x2f,0xf7]
+ vcomxss {sae}, %xmm23, %xmm22
+
+// CHECK: vcomxss 268435456(%rbp,%r14,8), %xmm22
+// CHECK: encoding: [0x62,0xa1,0x7f,0x08,0x2f,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vcomxss 268435456(%rbp,%r14,8), %xmm22
+
+// CHECK: vcomxss 291(%r8,%rax,4), %xmm22
+// CHECK: encoding: [0x62,0xc1,0x7f,0x08,0x2f,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vcomxss 291(%r8,%rax,4), %xmm22
+
+// CHECK: vcomxss (%rip), %xmm22
+// CHECK: encoding: [0x62,0xe1,0x7f,0x08,0x2f,0x35,0x00,0x00,0x00,0x00]
+ vcomxss (%rip), %xmm22
+
+// CHECK: vcomxss -128(,%rbp,2), %xmm22
+// CHECK: encoding: [0x62,0xe1,0x7f,0x08,0x2f,0x34,0x6d,0x80,0xff,0xff,0xff]
+ vcomxss -128(,%rbp,2), %xmm22
+
+// CHECK: vcomxss 508(%rcx), %xmm22
+// CHECK: encoding: [0x62,0xe1,0x7f,0x08,0x2f,0x71,0x7f]
+ vcomxss 508(%rcx), %xmm22
+
+// CHECK: vcomxss -512(%rdx), %xmm22
+// CHECK: encoding: [0x62,0xe1,0x7f,0x08,0x2f,0x72,0x80]
+ vcomxss -512(%rdx), %xmm22
+
+// CHECK: vucomxsd %xmm23, %xmm22
+// CHECK: encoding: [0x62,0xa1,0xfe,0x08,0x2e,0xf7]
+ vucomxsd %xmm23, %xmm22
+
+// CHECK: vucomxsd {sae}, %xmm23, %xmm22
+// CHECK: encoding: [0x62,0xa1,0xfe,0x18,0x2e,0xf7]
+ vucomxsd {sae}, %xmm23, %xmm22
+
+// CHECK: vucomxsd 268435456(%rbp,%r14,8), %xmm22
+// CHECK: encoding: [0x62,0xa1,0xfe,0x08,0x2e,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vucomxsd 268435456(%rbp,%r14,8), %xmm22
+
+// CHECK: vucomxsd 291(%r8,%rax,4), %xmm22
+// CHECK: encoding: [0x62,0xc1,0xfe,0x08,0x2e,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vucomxsd 291(%r8,%rax,4), %xmm22
+
+// CHECK: vucomxsd (%rip), %xmm22
+// CHECK: encoding: [0x62,0xe1,0xfe,0x08,0x2e,0x35,0x00,0x00,0x00,0x00]
+ vucomxsd (%rip), %xmm22
+
+// CHECK: vucomxsd -256(,%rbp,2), %xmm22
+// CHECK: encoding: [0x62,0xe1,0xfe,0x08,0x2e,0x34,0x6d,0x00,0xff,0xff,0xff]
+ vucomxsd -256(,%rbp,2), %xmm22
+
+// CHECK: vucomxsd 1016(%rcx), %xmm22
+// CHECK: encoding: [0x62,0xe1,0xfe,0x08,0x2e,0x71,0x7f]
+ vucomxsd 1016(%rcx), %xmm22
+
+// CHECK: vucomxsd -1024(%rdx), %xmm22
+// CHECK: encoding: [0x62,0xe1,0xfe,0x08,0x2e,0x72,0x80]
+ vucomxsd -1024(%rdx), %xmm22
+
+// CHECK: vucomxsh %xmm23, %xmm22
+// CHECK: encoding: [0x62,0xa5,0x7f,0x08,0x2e,0xf7]
+ vucomxsh %xmm23, %xmm22
+
+// CHECK: vucomxsh {sae}, %xmm23, %xmm22
+// CHECK: encoding: [0x62,0xa5,0x7f,0x18,0x2e,0xf7]
+ vucomxsh {sae}, %xmm23, %xmm22
+
+// CHECK: vucomxsh 268435456(%rbp,%r14,8), %xmm22
+// CHECK: encoding: [0x62,0xa5,0x7f,0x08,0x2e,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vucomxsh 268435456(%rbp,%r14,8), %xmm22
+
+// CHECK: vucomxsh 291(%r8,%rax,4), %xmm22
+// CHECK: encoding: [0x62,0xc5,0x7f,0x08,0x2e,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vucomxsh 291(%r8,%rax,4), %xmm22
+
+// CHECK: vucomxsh (%rip), %xmm22
+// CHECK: encoding: [0x62,0xe5,0x7f,0x08,0x2e,0x35,0x00,0x00,0x00,0x00]
+ vucomxsh (%rip), %xmm22
+
+// CHECK: vucomxsh -64(,%rbp,2), %xmm22
+// CHECK: encoding: [0x62,0xe5,0x7f,0x08,0x2e,0x34,0x6d,0xc0,0xff,0xff,0xff]
+ vucomxsh -64(,%rbp,2), %xmm22
+
+// CHECK: vucomxsh 254(%rcx), %xmm22
+// CHECK: encoding: [0x62,0xe5,0x7f,0x08,0x2e,0x71,0x7f]
+ vucomxsh 254(%rcx), %xmm22
+
+// CHECK: vucomxsh -256(%rdx), %xmm22
+// CHECK: encoding: [0x62,0xe5,0x7f,0x08,0x2e,0x72,0x80]
+ vucomxsh -256(%rdx), %xmm22
+
+// CHECK: vucomxss %xmm23, %xmm22
+// CHECK: encoding: [0x62,0xa1,0x7f,0x08,0x2e,0xf7]
+ vucomxss %xmm23, %xmm22
+
+// CHECK: vucomxss {sae}, %xmm23, %xmm22
+// CHECK: encoding: [0x62,0xa1,0x7f,0x18,0x2e,0xf7]
+ vucomxss {sae}, %xmm23, %xmm22
+
+// CHECK: vucomxss 268435456(%rbp,%r14,8), %xmm22
+// CHECK: encoding: [0x62,0xa1,0x7f,0x08,0x2e,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vucomxss 268435456(%rbp,%r14,8), %xmm22
+
+// CHECK: vucomxss 291(%r8,%rax,4), %xmm22
+// CHECK: encoding: [0x62,0xc1,0x7f,0x08,0x2e,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vucomxss 291(%r8,%rax,4), %xmm22
+
+// CHECK: vucomxss (%rip), %xmm22
+// CHECK: encoding: [0x62,0xe1,0x7f,0x08,0x2e,0x35,0x00,0x00,0x00,0x00]
+ vucomxss (%rip), %xmm22
+
+// CHECK: vucomxss -128(,%rbp,2), %xmm22
+// CHECK: encoding: [0x62,0xe1,0x7f,0x08,0x2e,0x34,0x6d,0x80,0xff,0xff,0xff]
+ vucomxss -128(,%rbp,2), %xmm22
+
+// CHECK: vucomxss 508(%rcx), %xmm22
+// CHECK: encoding: [0x62,0xe1,0x7f,0x08,0x2e,0x71,0x7f]
+ vucomxss 508(%rcx), %xmm22
+
+// CHECK: vucomxss -512(%rdx), %xmm22
+// CHECK: encoding: [0x62,0xe1,0x7f,0x08,0x2e,0x72,0x80]
+ vucomxss -512(%rdx), %xmm22
+
diff --git a/llvm/test/MC/X86/avx10.2-com-ef-64-intel.s b/llvm/test/MC/X86/avx10.2-com-ef-64-intel.s
new file mode 100644
index 000000000000..41aaf99270b8
--- /dev/null
+++ b/llvm/test/MC/X86/avx10.2-com-ef-64-intel.s
@@ -0,0 +1,194 @@
+// RUN: llvm-mc -triple x86_64 -x86-asm-syntax=intel -output-asm-variant=1 --show-encoding %s | FileCheck %s
+
+// CHECK: vcomxsd xmm22, xmm23
+// CHECK: encoding: [0x62,0xa1,0xfe,0x08,0x2f,0xf7]
+ vcomxsd xmm22, xmm23
+
+// CHECK: vcomxsd xmm22, xmm23, {sae}
+// CHECK: encoding: [0x62,0xa1,0xfe,0x18,0x2f,0xf7]
+ vcomxsd xmm22, xmm23, {sae}
+
+// CHECK: vcomxsd xmm22, qword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0xa1,0xfe,0x08,0x2f,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vcomxsd xmm22, qword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK: vcomxsd xmm22, qword ptr [r8 + 4*rax + 291]
+// CHECK: encoding: [0x62,0xc1,0xfe,0x08,0x2f,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vcomxsd xmm22, qword ptr [r8 + 4*rax + 291]
+
+// CHECK: vcomxsd xmm22, qword ptr [rip]
+// CHECK: encoding: [0x62,0xe1,0xfe,0x08,0x2f,0x35,0x00,0x00,0x00,0x00]
+ vcomxsd xmm22, qword ptr [rip]
+
+// CHECK: vcomxsd xmm22, qword ptr [2*rbp - 256]
+// CHECK: encoding: [0x62,0xe1,0xfe,0x08,0x2f,0x34,0x6d,0x00,0xff,0xff,0xff]
+ vcomxsd xmm22, qword ptr [2*rbp - 256]
+
+// CHECK: vcomxsd xmm22, qword ptr [rcx + 1016]
+// CHECK: encoding: [0x62,0xe1,0xfe,0x08,0x2f,0x71,0x7f]
+ vcomxsd xmm22, qword ptr [rcx + 1016]
+
+// CHECK: vcomxsd xmm22, qword ptr [rdx - 1024]
+// CHECK: encoding: [0x62,0xe1,0xfe,0x08,0x2f,0x72,0x80]
+ vcomxsd xmm22, qword ptr [rdx - 1024]
+
+// CHECK: vcomxsh xmm22, xmm23
+// CHECK: encoding: [0x62,0xa5,0x7f,0x08,0x2f,0xf7]
+ vcomxsh xmm22, xmm23
+
+// CHECK: vcomxsh xmm22, xmm23, {sae}
+// CHECK: encoding: [0x62,0xa5,0x7f,0x18,0x2f,0xf7]
+ vcomxsh xmm22, xmm23, {sae}
+
+// CHECK: vcomxsh xmm22, word ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0xa5,0x7f,0x08,0x2f,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vcomxsh xmm22, word ptr [rbp + 8*r14 + 268435456]
+
+// CHECK: vcomxsh xmm22, word ptr [r8 + 4*rax + 291]
+// CHECK: encoding: [0x62,0xc5,0x7f,0x08,0x2f,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vcomxsh xmm22, word ptr [r8 + 4*rax + 291]
+
+// CHECK: vcomxsh xmm22, word ptr [rip]
+// CHECK: encoding: [0x62,0xe5,0x7f,0x08,0x2f,0x35,0x00,0x00,0x00,0x00]
+ vcomxsh xmm22, word ptr [rip]
+
+// CHECK: vcomxsh xmm22, word ptr [2*rbp - 64]
+// CHECK: encoding: [0x62,0xe5,0x7f,0x08,0x2f,0x34,0x6d,0xc0,0xff,0xff,0xff]
+ vcomxsh xmm22, word ptr [2*rbp - 64]
+
+// CHECK: vcomxsh xmm22, word ptr [rcx + 254]
+// CHECK: encoding: [0x62,0xe5,0x7f,0x08,0x2f,0x71,0x7f]
+ vcomxsh xmm22, word ptr [rcx + 254]
+
+// CHECK: vcomxsh xmm22, word ptr [rdx - 256]
+// CHECK: encoding: [0x62,0xe5,0x7f,0x08,0x2f,0x72,0x80]
+ vcomxsh xmm22, word ptr [rdx - 256]
+
+// CHECK: vcomxss xmm22, xmm23
+// CHECK: encoding: [0x62,0xa1,0x7f,0x08,0x2f,0xf7]
+ vcomxss xmm22, xmm23
+
+// CHECK: vcomxss xmm22, xmm23, {sae}
+// CHECK: encoding: [0x62,0xa1,0x7f,0x18,0x2f,0xf7]
+ vcomxss xmm22, xmm23, {sae}
+
+// CHECK: vcomxss xmm22, dword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0xa1,0x7f,0x08,0x2f,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vcomxss xmm22, dword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK: vcomxss xmm22, dword ptr [r8 + 4*rax + 291]
+// CHECK: encoding: [0x62,0xc1,0x7f,0x08,0x2f,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vcomxss xmm22, dword ptr [r8 + 4*rax + 291]
+
+// CHECK: vcomxss xmm22, dword ptr [rip]
+// CHECK: encoding: [0x62,0xe1,0x7f,0x08,0x2f,0x35,0x00,0x00,0x00,0x00]
+ vcomxss xmm22, dword ptr [rip]
+
+// CHECK: vcomxss xmm22, dword ptr [2*rbp - 128]
+// CHECK: encoding: [0x62,0xe1,0x7f,0x08,0x2f,0x34,0x6d,0x80,0xff,0xff,0xff]
+ vcomxss xmm22, dword ptr [2*rbp - 128]
+
+// CHECK: vcomxss xmm22, dword ptr [rcx + 508]
+// CHECK: encoding: [0x62,0xe1,0x7f,0x08,0x2f,0x71,0x7f]
+ vcomxss xmm22, dword ptr [rcx + 508]
+
+// CHECK: vcomxss xmm22, dword ptr [rdx - 512]
+// CHECK: encoding: [0x62,0xe1,0x7f,0x08,0x2f,0x72,0x80]
+ vcomxss xmm22, dword ptr [rdx - 512]
+
+// CHECK: vucomxsd xmm22, xmm23
+// CHECK: encoding: [0x62,0xa1,0xfe,0x08,0x2e,0xf7]
+ vucomxsd xmm22, xmm23
+
+// CHECK: vucomxsd xmm22, xmm23, {sae}
+// CHECK: encoding: [0x62,0xa1,0xfe,0x18,0x2e,0xf7]
+ vucomxsd xmm22, xmm23, {sae}
+
+// CHECK: vucomxsd xmm22, qword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0xa1,0xfe,0x08,0x2e,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vucomxsd xmm22, qword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK: vucomxsd xmm22, qword ptr [r8 + 4*rax + 291]
+// CHECK: encoding: [0x62,0xc1,0xfe,0x08,0x2e,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vucomxsd xmm22, qword ptr [r8 + 4*rax + 291]
+
+// CHECK: vucomxsd xmm22, qword ptr [rip]
+// CHECK: encoding: [0x62,0xe1,0xfe,0x08,0x2e,0x35,0x00,0x00,0x00,0x00]
+ vucomxsd xmm22, qword ptr [rip]
+
+// CHECK: vucomxsd xmm22, qword ptr [2*rbp - 256]
+// CHECK: encoding: [0x62,0xe1,0xfe,0x08,0x2e,0x34,0x6d,0x00,0xff,0xff,0xff]
+ vucomxsd xmm22, qword ptr [2*rbp - 256]
+
+// CHECK: vucomxsd xmm22, qword ptr [rcx + 1016]
+// CHECK: encoding: [0x62,0xe1,0xfe,0x08,0x2e,0x71,0x7f]
+ vucomxsd xmm22, qword ptr [rcx + 1016]
+
+// CHECK: vucomxsd xmm22, qword ptr [rdx - 1024]
+// CHECK: encoding: [0x62,0xe1,0xfe,0x08,0x2e,0x72,0x80]
+ vucomxsd xmm22, qword ptr [rdx - 1024]
+
+// CHECK: vucomxsh xmm22, xmm23
+// CHECK: encoding: [0x62,0xa5,0x7f,0x08,0x2e,0xf7]
+ vucomxsh xmm22, xmm23
+
+// CHECK: vucomxsh xmm22, xmm23, {sae}
+// CHECK: encoding: [0x62,0xa5,0x7f,0x18,0x2e,0xf7]
+ vucomxsh xmm22, xmm23, {sae}
+
+// CHECK: vucomxsh xmm22, word ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0xa5,0x7f,0x08,0x2e,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vucomxsh xmm22, word ptr [rbp + 8*r14 + 268435456]
+
+// CHECK: vucomxsh xmm22, word ptr [r8 + 4*rax + 291]
+// CHECK: encoding: [0x62,0xc5,0x7f,0x08,0x2e,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vucomxsh xmm22, word ptr [r8 + 4*rax + 291]
+
+// CHECK: vucomxsh xmm22, word ptr [rip]
+// CHECK: encoding: [0x62,0xe5,0x7f,0x08,0x2e,0x35,0x00,0x00,0x00,0x00]
+ vucomxsh xmm22, word ptr [rip]
+
+// CHECK: vucomxsh xmm22, word ptr [2*rbp - 64]
+// CHECK: encoding: [0x62,0xe5,0x7f,0x08,0x2e,0x34,0x6d,0xc0,0xff,0xff,0xff]
+ vucomxsh xmm22, word ptr [2*rbp - 64]
+
+// CHECK: vucomxsh xmm22, word ptr [rcx + 254]
+// CHECK: encoding: [0x62,0xe5,0x7f,0x08,0x2e,0x71,0x7f]
+ vucomxsh xmm22, word ptr [rcx + 254]
+
+// CHECK: vucomxsh xmm22, word ptr [rdx - 256]
+// CHECK: encoding: [0x62,0xe5,0x7f,0x08,0x2e,0x72,0x80]
+ vucomxsh xmm22, word ptr [rdx - 256]
+
+// CHECK: vucomxss xmm22, xmm23
+// CHECK: encoding: [0x62,0xa1,0x7f,0x08,0x2e,0xf7]
+ vucomxss xmm22, xmm23
+
+// CHECK: vucomxss xmm22, xmm23, {sae}
+// CHECK: encoding: [0x62,0xa1,0x7f,0x18,0x2e,0xf7]
+ vucomxss xmm22, xmm23, {sae}
+
+// CHECK: vucomxss xmm22, dword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0xa1,0x7f,0x08,0x2e,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vucomxss xmm22, dword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK: vucomxss xmm22, dword ptr [r8 + 4*rax + 291]
+// CHECK: encoding: [0x62,0xc1,0x7f,0x08,0x2e,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vucomxss xmm22, dword ptr [r8 + 4*rax + 291]
+
+// CHECK: vucomxss xmm22, dword ptr [rip]
+// CHECK: encoding: [0x62,0xe1,0x7f,0x08,0x2e,0x35,0x00,0x00,0x00,0x00]
+ vucomxss xmm22, dword ptr [rip]
+
+// CHECK: vucomxss xmm22, dword ptr [2*rbp - 128]
+// CHECK: encoding: [0x62,0xe1,0x7f,0x08,0x2e,0x34,0x6d,0x80,0xff,0xff,0xff]
+ vucomxss xmm22, dword ptr [2*rbp - 128]
+
+// CHECK: vucomxss xmm22, dword ptr [rcx + 508]
+// CHECK: encoding: [0x62,0xe1,0x7f,0x08,0x2e,0x71,0x7f]
+ vucomxss xmm22, dword ptr [rcx + 508]
+
+// CHECK: vucomxss xmm22, dword ptr [rdx - 512]
+// CHECK: encoding: [0x62,0xe1,0x7f,0x08,0x2e,0x72,0x80]
+ vucomxss xmm22, dword ptr [rdx - 512]
+
diff --git a/llvm/test/MC/X86/avx10.2-copy-32-att.s b/llvm/test/MC/X86/avx10.2-copy-32-att.s
new file mode 100644
index 000000000000..2bc498720849
--- /dev/null
+++ b/llvm/test/MC/X86/avx10.2-copy-32-att.s
@@ -0,0 +1,82 @@
+// RUN: llvm-mc -triple i386 --show-encoding %s | FileCheck %s
+
+// CHECK: vmovd 268435456(%esp,%esi,8), %xmm2
+// CHECK: encoding: [0xc5,0xf9,0x6e,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vmovd 268435456(%esp,%esi,8), %xmm2
+
+// CHECK: vmovd 291(%edi,%eax,4), %xmm2
+// CHECK: encoding: [0xc5,0xf9,0x6e,0x94,0x87,0x23,0x01,0x00,0x00]
+ vmovd 291(%edi,%eax,4), %xmm2
+
+// CHECK: vmovd (%eax), %xmm2
+// CHECK: encoding: [0xc5,0xf9,0x6e,0x10]
+ vmovd (%eax), %xmm2
+
+// CHECK: vmovd -128(,%ebp,2), %xmm2
+// CHECK: encoding: [0xc5,0xf9,0x6e,0x14,0x6d,0x80,0xff,0xff,0xff]
+ vmovd -128(,%ebp,2), %xmm2
+
+// CHECK: vmovd %xmm3, 268435456(%esp,%esi,8)
+// CHECK: encoding: [0xc5,0xf9,0x7e,0x9c,0xf4,0x00,0x00,0x00,0x10]
+ vmovd %xmm3, 268435456(%esp,%esi,8)
+
+// CHECK: vmovd %xmm3, 291(%edi,%eax,4)
+// CHECK: encoding: [0xc5,0xf9,0x7e,0x9c,0x87,0x23,0x01,0x00,0x00]
+ vmovd %xmm3, 291(%edi,%eax,4)
+
+// CHECK: vmovd %xmm3, (%eax)
+// CHECK: encoding: [0xc5,0xf9,0x7e,0x18]
+ vmovd %xmm3, (%eax)
+
+// CHECK: vmovd %xmm3, -128(,%ebp,2)
+// CHECK: encoding: [0xc5,0xf9,0x7e,0x1c,0x6d,0x80,0xff,0xff,0xff]
+ vmovd %xmm3, -128(,%ebp,2)
+
+// CHECK: vmovw 268435456(%esp,%esi,8), %xmm2
+// CHECK: encoding: [0x62,0xf5,0x7d,0x08,0x6e,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vmovw 268435456(%esp,%esi,8), %xmm2
+
+// CHECK: vmovw 291(%edi,%eax,4), %xmm2
+// CHECK: encoding: [0x62,0xf5,0x7d,0x08,0x6e,0x94,0x87,0x23,0x01,0x00,0x00]
+ vmovw 291(%edi,%eax,4), %xmm2
+
+// CHECK: vmovw (%eax), %xmm2
+// CHECK: encoding: [0x62,0xf5,0x7d,0x08,0x6e,0x10]
+ vmovw (%eax), %xmm2
+
+// CHECK: vmovw -64(,%ebp,2), %xmm2
+// CHECK: encoding: [0x62,0xf5,0x7d,0x08,0x6e,0x14,0x6d,0xc0,0xff,0xff,0xff]
+ vmovw -64(,%ebp,2), %xmm2
+
+// CHECK: vmovw 254(%ecx), %xmm2
+// CHECK: encoding: [0x62,0xf5,0x7d,0x08,0x6e,0x51,0x7f]
+ vmovw 254(%ecx), %xmm2
+
+// CHECK: vmovw -256(%edx), %xmm2
+// CHECK: encoding: [0x62,0xf5,0x7d,0x08,0x6e,0x52,0x80]
+ vmovw -256(%edx), %xmm2
+
+// CHECK: vmovw %xmm3, 268435456(%esp,%esi,8)
+// CHECK: encoding: [0x62,0xf5,0x7d,0x08,0x7e,0x9c,0xf4,0x00,0x00,0x00,0x10]
+ vmovw %xmm3, 268435456(%esp,%esi,8)
+
+// CHECK: vmovw %xmm3, 291(%edi,%eax,4)
+// CHECK: encoding: [0x62,0xf5,0x7d,0x08,0x7e,0x9c,0x87,0x23,0x01,0x00,0x00]
+ vmovw %xmm3, 291(%edi,%eax,4)
+
+// CHECK: vmovw %xmm3, (%eax)
+// CHECK: encoding: [0x62,0xf5,0x7d,0x08,0x7e,0x18]
+ vmovw %xmm3, (%eax)
+
+// CHECK: vmovw %xmm3, -64(,%ebp,2)
+// CHECK: encoding: [0x62,0xf5,0x7d,0x08,0x7e,0x1c,0x6d,0xc0,0xff,0xff,0xff]
+ vmovw %xmm3, -64(,%ebp,2)
+
+// CHECK: vmovw %xmm3, 254(%ecx)
+// CHECK: encoding: [0x62,0xf5,0x7d,0x08,0x7e,0x59,0x7f]
+ vmovw %xmm3, 254(%ecx)
+
+// CHECK: vmovw %xmm3, -256(%edx)
+// CHECK: encoding: [0x62,0xf5,0x7d,0x08,0x7e,0x5a,0x80]
+ vmovw %xmm3, -256(%edx)
+
diff --git a/llvm/test/MC/X86/avx10.2-copy-32-intel.s b/llvm/test/MC/X86/avx10.2-copy-32-intel.s
new file mode 100644
index 000000000000..aa84548e5f75
--- /dev/null
+++ b/llvm/test/MC/X86/avx10.2-copy-32-intel.s
@@ -0,0 +1,81 @@
+// RUN: llvm-mc -triple i386 -x86-asm-syntax=intel -output-asm-variant=1 --show-encoding %s | FileCheck %s
+
+// CHECK: vmovd xmm2, dword ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0xc5,0xf9,0x6e,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vmovd xmm2, dword ptr [esp + 8*esi + 268435456]
+
+// CHECK: vmovd xmm2, dword ptr [edi + 4*eax + 291]
+// CHECK: encoding: [0xc5,0xf9,0x6e,0x94,0x87,0x23,0x01,0x00,0x00]
+ vmovd xmm2, dword ptr [edi + 4*eax + 291]
+
+// CHECK: vmovd xmm2, dword ptr [eax]
+// CHECK: encoding: [0xc5,0xf9,0x6e,0x10]
+ vmovd xmm2, dword ptr [eax]
+
+// CHECK: vmovd xmm2, dword ptr [2*ebp - 128]
+// CHECK: encoding: [0xc5,0xf9,0x6e,0x14,0x6d,0x80,0xff,0xff,0xff]
+ vmovd xmm2, dword ptr [2*ebp - 128]
+
+// CHECK: vmovd dword ptr [esp + 8*esi + 268435456], xmm3
+// CHECK: encoding: [0xc5,0xf9,0x7e,0x9c,0xf4,0x00,0x00,0x00,0x10]
+ vmovd dword ptr [esp + 8*esi + 268435456], xmm3
+
+// CHECK: vmovd dword ptr [edi + 4*eax + 291], xmm3
+// CHECK: encoding: [0xc5,0xf9,0x7e,0x9c,0x87,0x23,0x01,0x00,0x00]
+ vmovd dword ptr [edi + 4*eax + 291], xmm3
+
+// CHECK: vmovd dword ptr [eax], xmm3
+// CHECK: encoding: [0xc5,0xf9,0x7e,0x18]
+ vmovd dword ptr [eax], xmm3
+
+// CHECK: vmovd dword ptr [2*ebp - 128], xmm3
+// CHECK: encoding: [0xc5,0xf9,0x7e,0x1c,0x6d,0x80,0xff,0xff,0xff]
+ vmovd dword ptr [2*ebp - 128], xmm3
+
+// CHECK: vmovw xmm2, word ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0x62,0xf5,0x7d,0x08,0x6e,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vmovw xmm2, word ptr [esp + 8*esi + 268435456]
+
+// CHECK: vmovw xmm2, word ptr [edi + 4*eax + 291]
+// CHECK: encoding: [0x62,0xf5,0x7d,0x08,0x6e,0x94,0x87,0x23,0x01,0x00,0x00]
+ vmovw xmm2, word ptr [edi + 4*eax + 291]
+
+// CHECK: vmovw xmm2, word ptr [eax]
+// CHECK: encoding: [0x62,0xf5,0x7d,0x08,0x6e,0x10]
+ vmovw xmm2, word ptr [eax]
+
+// CHECK: vmovw xmm2, word ptr [2*ebp - 64]
+// CHECK: encoding: [0x62,0xf5,0x7d,0x08,0x6e,0x14,0x6d,0xc0,0xff,0xff,0xff]
+ vmovw xmm2, word ptr [2*ebp - 64]
+
+// CHECK: vmovw xmm2, word ptr [ecx + 254]
+// CHECK: encoding: [0x62,0xf5,0x7d,0x08,0x6e,0x51,0x7f]
+ vmovw xmm2, word ptr [ecx + 254]
+
+// CHECK: vmovw xmm2, word ptr [edx - 256]
+// CHECK: encoding: [0x62,0xf5,0x7d,0x08,0x6e,0x52,0x80]
+ vmovw xmm2, word ptr [edx - 256]
+
+// CHECK: vmovw word ptr [esp + 8*esi + 268435456], xmm3
+// CHECK: encoding: [0x62,0xf5,0x7d,0x08,0x7e,0x9c,0xf4,0x00,0x00,0x00,0x10]
+ vmovw word ptr [esp + 8*esi + 268435456], xmm3
+
+// CHECK: vmovw word ptr [edi + 4*eax + 291], xmm3
+// CHECK: encoding: [0x62,0xf5,0x7d,0x08,0x7e,0x9c,0x87,0x23,0x01,0x00,0x00]
+ vmovw word ptr [edi + 4*eax + 291], xmm3
+
+// CHECK: vmovw word ptr [eax], xmm3
+// CHECK: encoding: [0x62,0xf5,0x7d,0x08,0x7e,0x18]
+ vmovw word ptr [eax], xmm3
+
+// CHECK: vmovw word ptr [2*ebp - 64], xmm3
+// CHECK: encoding: [0x62,0xf5,0x7d,0x08,0x7e,0x1c,0x6d,0xc0,0xff,0xff,0xff]
+ vmovw word ptr [2*ebp - 64], xmm3
+
+// CHECK: vmovw word ptr [ecx + 254], xmm3
+// CHECK: encoding: [0x62,0xf5,0x7d,0x08,0x7e,0x59,0x7f]
+ vmovw word ptr [ecx + 254], xmm3
+
+// CHECK: vmovw word ptr [edx - 256], xmm3
+// CHECK: encoding: [0x62,0xf5,0x7d,0x08,0x7e,0x5a,0x80]
+ vmovw word ptr [edx - 256], xmm3
diff --git a/llvm/test/MC/X86/avx10.2-copy-64-att.s b/llvm/test/MC/X86/avx10.2-copy-64-att.s
new file mode 100644
index 000000000000..a672b2d84224
--- /dev/null
+++ b/llvm/test/MC/X86/avx10.2-copy-64-att.s
@@ -0,0 +1,97 @@
+// RUN: llvm-mc -triple x86_64 --show-encoding %s | FileCheck %s
+
+// CHECK: vmovd 268435456(%rbp,%r14,8), %xmm22
+// CHECK: encoding: [0x62,0xa1,0x7d,0x08,0x6e,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vmovd 268435456(%rbp,%r14,8), %xmm22
+
+// CHECK: vmovd 291(%r8,%rax,4), %xmm22
+// CHECK: encoding: [0x62,0xc1,0x7d,0x08,0x6e,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vmovd 291(%r8,%rax,4), %xmm22
+
+// CHECK: vmovd (%rip), %xmm22
+// CHECK: encoding: [0x62,0xe1,0x7d,0x08,0x6e,0x35,0x00,0x00,0x00,0x00]
+ vmovd (%rip), %xmm22
+
+// CHECK: vmovd -128(,%rbp,2), %xmm22
+// CHECK: encoding: [0x62,0xe1,0x7d,0x08,0x6e,0x34,0x6d,0x80,0xff,0xff,0xff]
+ vmovd -128(,%rbp,2), %xmm22
+
+// CHECK: vmovd 508(%rcx), %xmm22
+// CHECK: encoding: [0x62,0xe1,0x7d,0x08,0x6e,0x71,0x7f]
+ vmovd 508(%rcx), %xmm22
+
+// CHECK: vmovd -512(%rdx), %xmm22
+// CHECK: encoding: [0x62,0xe1,0x7d,0x08,0x6e,0x72,0x80]
+ vmovd -512(%rdx), %xmm22
+
+// CHECK: vmovd %xmm23, 268435456(%rbp,%r14,8)
+// CHECK: encoding: [0x62,0xa1,0x7d,0x08,0x7e,0xbc,0xf5,0x00,0x00,0x00,0x10]
+ vmovd %xmm23, 268435456(%rbp,%r14,8)
+
+// CHECK: vmovd %xmm23, 291(%r8,%rax,4)
+// CHECK: encoding: [0x62,0xc1,0x7d,0x08,0x7e,0xbc,0x80,0x23,0x01,0x00,0x00]
+ vmovd %xmm23, 291(%r8,%rax,4)
+
+// CHECK: vmovd %xmm23, (%rip)
+// CHECK: encoding: [0x62,0xe1,0x7d,0x08,0x7e,0x3d,0x00,0x00,0x00,0x00]
+ vmovd %xmm23, (%rip)
+
+// CHECK: vmovd %xmm23, -128(,%rbp,2)
+// CHECK: encoding: [0x62,0xe1,0x7d,0x08,0x7e,0x3c,0x6d,0x80,0xff,0xff,0xff]
+ vmovd %xmm23, -128(,%rbp,2)
+
+// CHECK: vmovd %xmm23, 508(%rcx)
+// CHECK: encoding: [0x62,0xe1,0x7d,0x08,0x7e,0x79,0x7f]
+ vmovd %xmm23, 508(%rcx)
+
+// CHECK: vmovd %xmm23, -512(%rdx)
+// CHECK: encoding: [0x62,0xe1,0x7d,0x08,0x7e,0x7a,0x80]
+ vmovd %xmm23, -512(%rdx)
+
+// CHECK: vmovw 268435456(%rbp,%r14,8), %xmm22
+// CHECK: encoding: [0x62,0xa5,0x7d,0x08,0x6e,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vmovw 268435456(%rbp,%r14,8), %xmm22
+
+// CHECK: vmovw 291(%r8,%rax,4), %xmm22
+// CHECK: encoding: [0x62,0xc5,0x7d,0x08,0x6e,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vmovw 291(%r8,%rax,4), %xmm22
+
+// CHECK: vmovw (%rip), %xmm22
+// CHECK: encoding: [0x62,0xe5,0x7d,0x08,0x6e,0x35,0x00,0x00,0x00,0x00]
+ vmovw (%rip), %xmm22
+
+// CHECK: vmovw -64(,%rbp,2), %xmm22
+// CHECK: encoding: [0x62,0xe5,0x7d,0x08,0x6e,0x34,0x6d,0xc0,0xff,0xff,0xff]
+ vmovw -64(,%rbp,2), %xmm22
+
+// CHECK: vmovw 254(%rcx), %xmm22
+// CHECK: encoding: [0x62,0xe5,0x7d,0x08,0x6e,0x71,0x7f]
+ vmovw 254(%rcx), %xmm22
+
+// CHECK: vmovw -256(%rdx), %xmm22
+// CHECK: encoding: [0x62,0xe5,0x7d,0x08,0x6e,0x72,0x80]
+ vmovw -256(%rdx), %xmm22
+
+// CHECK: vmovw %xmm23, 268435456(%rbp,%r14,8)
+// CHECK: encoding: [0x62,0xa5,0x7d,0x08,0x7e,0xbc,0xf5,0x00,0x00,0x00,0x10]
+ vmovw %xmm23, 268435456(%rbp,%r14,8)
+
+// CHECK: vmovw %xmm23, 291(%r8,%rax,4)
+// CHECK: encoding: [0x62,0xc5,0x7d,0x08,0x7e,0xbc,0x80,0x23,0x01,0x00,0x00]
+ vmovw %xmm23, 291(%r8,%rax,4)
+
+// CHECK: vmovw %xmm23, (%rip)
+// CHECK: encoding: [0x62,0xe5,0x7d,0x08,0x7e,0x3d,0x00,0x00,0x00,0x00]
+ vmovw %xmm23, (%rip)
+
+// CHECK: vmovw %xmm23, -64(,%rbp,2)
+// CHECK: encoding: [0x62,0xe5,0x7d,0x08,0x7e,0x3c,0x6d,0xc0,0xff,0xff,0xff]
+ vmovw %xmm23, -64(,%rbp,2)
+
+// CHECK: vmovw %xmm23, 254(%rcx)
+// CHECK: encoding: [0x62,0xe5,0x7d,0x08,0x7e,0x79,0x7f]
+ vmovw %xmm23, 254(%rcx)
+
+// CHECK: vmovw %xmm23, -256(%rdx)
+// CHECK: encoding: [0x62,0xe5,0x7d,0x08,0x7e,0x7a,0x80]
+ vmovw %xmm23, -256(%rdx)
diff --git a/llvm/test/MC/X86/avx10.2-copy-64-intel.s b/llvm/test/MC/X86/avx10.2-copy-64-intel.s
new file mode 100644
index 000000000000..4fd7b67dfa5d
--- /dev/null
+++ b/llvm/test/MC/X86/avx10.2-copy-64-intel.s
@@ -0,0 +1,97 @@
+// RUN: llvm-mc -triple x86_64 -x86-asm-syntax=intel -output-asm-variant=1 --show-encoding %s | FileCheck %s
+
+// CHECK: vmovd xmm22, dword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0xa1,0x7d,0x08,0x6e,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vmovd xmm22, dword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK: vmovd xmm22, dword ptr [r8 + 4*rax + 291]
+// CHECK: encoding: [0x62,0xc1,0x7d,0x08,0x6e,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vmovd xmm22, dword ptr [r8 + 4*rax + 291]
+
+// CHECK: vmovd xmm22, dword ptr [rip]
+// CHECK: encoding: [0x62,0xe1,0x7d,0x08,0x6e,0x35,0x00,0x00,0x00,0x00]
+ vmovd xmm22, dword ptr [rip]
+
+// CHECK: vmovd xmm22, dword ptr [2*rbp - 128]
+// CHECK: encoding: [0x62,0xe1,0x7d,0x08,0x6e,0x34,0x6d,0x80,0xff,0xff,0xff]
+ vmovd xmm22, dword ptr [2*rbp - 128]
+
+// CHECK: vmovd xmm22, dword ptr [rcx + 508]
+// CHECK: encoding: [0x62,0xe1,0x7d,0x08,0x6e,0x71,0x7f]
+ vmovd xmm22, dword ptr [rcx + 508]
+
+// CHECK: vmovd xmm22, dword ptr [rdx - 512]
+// CHECK: encoding: [0x62,0xe1,0x7d,0x08,0x6e,0x72,0x80]
+ vmovd xmm22, dword ptr [rdx - 512]
+
+// CHECK: vmovd dword ptr [rbp + 8*r14 + 268435456], xmm23
+// CHECK: encoding: [0x62,0xa1,0x7d,0x08,0x7e,0xbc,0xf5,0x00,0x00,0x00,0x10]
+ vmovd dword ptr [rbp + 8*r14 + 268435456], xmm23
+
+// CHECK: vmovd dword ptr [r8 + 4*rax + 291], xmm23
+// CHECK: encoding: [0x62,0xc1,0x7d,0x08,0x7e,0xbc,0x80,0x23,0x01,0x00,0x00]
+ vmovd dword ptr [r8 + 4*rax + 291], xmm23
+
+// CHECK: vmovd dword ptr [rip], xmm23
+// CHECK: encoding: [0x62,0xe1,0x7d,0x08,0x7e,0x3d,0x00,0x00,0x00,0x00]
+ vmovd dword ptr [rip], xmm23
+
+// CHECK: vmovd dword ptr [2*rbp - 128], xmm23
+// CHECK: encoding: [0x62,0xe1,0x7d,0x08,0x7e,0x3c,0x6d,0x80,0xff,0xff,0xff]
+ vmovd dword ptr [2*rbp - 128], xmm23
+
+// CHECK: vmovd dword ptr [rcx + 508], xmm23
+// CHECK: encoding: [0x62,0xe1,0x7d,0x08,0x7e,0x79,0x7f]
+ vmovd dword ptr [rcx + 508], xmm23
+
+// CHECK: vmovd dword ptr [rdx - 512], xmm23
+// CHECK: encoding: [0x62,0xe1,0x7d,0x08,0x7e,0x7a,0x80]
+ vmovd dword ptr [rdx - 512], xmm23
+
+// CHECK: vmovw xmm22, word ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0xa5,0x7d,0x08,0x6e,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vmovw xmm22, word ptr [rbp + 8*r14 + 268435456]
+
+// CHECK: vmovw xmm22, word ptr [r8 + 4*rax + 291]
+// CHECK: encoding: [0x62,0xc5,0x7d,0x08,0x6e,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vmovw xmm22, word ptr [r8 + 4*rax + 291]
+
+// CHECK: vmovw xmm22, word ptr [rip]
+// CHECK: encoding: [0x62,0xe5,0x7d,0x08,0x6e,0x35,0x00,0x00,0x00,0x00]
+ vmovw xmm22, word ptr [rip]
+
+// CHECK: vmovw xmm22, word ptr [2*rbp - 64]
+// CHECK: encoding: [0x62,0xe5,0x7d,0x08,0x6e,0x34,0x6d,0xc0,0xff,0xff,0xff]
+ vmovw xmm22, word ptr [2*rbp - 64]
+
+// CHECK: vmovw xmm22, word ptr [rcx + 254]
+// CHECK: encoding: [0x62,0xe5,0x7d,0x08,0x6e,0x71,0x7f]
+ vmovw xmm22, word ptr [rcx + 254]
+
+// CHECK: vmovw xmm22, word ptr [rdx - 256]
+// CHECK: encoding: [0x62,0xe5,0x7d,0x08,0x6e,0x72,0x80]
+ vmovw xmm22, word ptr [rdx - 256]
+
+// CHECK: vmovw word ptr [rbp + 8*r14 + 268435456], xmm23
+// CHECK: encoding: [0x62,0xa5,0x7d,0x08,0x7e,0xbc,0xf5,0x00,0x00,0x00,0x10]
+ vmovw word ptr [rbp + 8*r14 + 268435456], xmm23
+
+// CHECK: vmovw word ptr [r8 + 4*rax + 291], xmm23
+// CHECK: encoding: [0x62,0xc5,0x7d,0x08,0x7e,0xbc,0x80,0x23,0x01,0x00,0x00]
+ vmovw word ptr [r8 + 4*rax + 291], xmm23
+
+// CHECK: vmovw word ptr [rip], xmm23
+// CHECK: encoding: [0x62,0xe5,0x7d,0x08,0x7e,0x3d,0x00,0x00,0x00,0x00]
+ vmovw word ptr [rip], xmm23
+
+// CHECK: vmovw word ptr [2*rbp - 64], xmm23
+// CHECK: encoding: [0x62,0xe5,0x7d,0x08,0x7e,0x3c,0x6d,0xc0,0xff,0xff,0xff]
+ vmovw word ptr [2*rbp - 64], xmm23
+
+// CHECK: vmovw word ptr [rcx + 254], xmm23
+// CHECK: encoding: [0x62,0xe5,0x7d,0x08,0x7e,0x79,0x7f]
+ vmovw word ptr [rcx + 254], xmm23
+
+// CHECK: vmovw word ptr [rdx - 256], xmm23
+// CHECK: encoding: [0x62,0xe5,0x7d,0x08,0x7e,0x7a,0x80]
+ vmovw word ptr [rdx - 256], xmm23
diff --git a/llvm/test/MachineVerifier/test_g_insert_subvector.mir b/llvm/test/MachineVerifier/test_g_insert_subvector.mir
index 9fce3c3e842d..62ddd28919b2 100644
--- a/llvm/test/MachineVerifier/test_g_insert_subvector.mir
+++ b/llvm/test/MachineVerifier/test_g_insert_subvector.mir
@@ -41,4 +41,7 @@ body: |
; CHECK: Index must be a multiple of the second source vector's minimum vector length
%13:_(<vscale x 4 x s32>) = G_INSERT_SUBVECTOR %12, %1, 3
+
+ ; CHECK: Index must be a multiple of the second source vector's minimum vector length
+ %13:_(<vscale x 4 x s32>) = G_INSERT_SUBVECTOR %12, %1, 1
...
diff --git a/llvm/test/TableGen/x86-fold-tables.inc b/llvm/test/TableGen/x86-fold-tables.inc
index e85708ac1cc4..85d9b02ac0cb 100644
--- a/llvm/test/TableGen/x86-fold-tables.inc
+++ b/llvm/test/TableGen/x86-fold-tables.inc
@@ -1178,6 +1178,9 @@ static const X86FoldTableEntry Table1[] = {
{X86::VCOMISSrr_Int, X86::VCOMISSrm_Int, TB_NO_REVERSE},
{X86::VCOMSBF16Zrr, X86::VCOMSBF16Zrm, 0},
{X86::VCOMSBF16Zrr_Int, X86::VCOMSBF16Zrm_Int, TB_NO_REVERSE},
+ {X86::VCOMXSDZrr_Int, X86::VCOMXSDZrm_Int, TB_NO_REVERSE},
+ {X86::VCOMXSHZrr_Int, X86::VCOMXSHZrm_Int, TB_NO_REVERSE},
+ {X86::VCOMXSSZrr_Int, X86::VCOMXSSZrm_Int, TB_NO_REVERSE},
{X86::VCVTDQ2PDYrr, X86::VCVTDQ2PDYrm, 0},
{X86::VCVTDQ2PDZ128rr, X86::VCVTDQ2PDZ128rm, TB_NO_REVERSE},
{X86::VCVTDQ2PDZ256rr, X86::VCVTDQ2PDZ256rm, 0},
@@ -1614,8 +1617,10 @@ static const X86FoldTableEntry Table1[] = {
{X86::VMOVUPSZrr, X86::VMOVUPSZrm, 0},
{X86::VMOVUPSrr, X86::VMOVUPSrm, 0},
{X86::VMOVW2SHrr, X86::VMOVWrm, TB_NO_REVERSE},
+ {X86::VMOVZPDILo2PDIZrr, X86::VMOVZPDILo2PDIZrm, TB_NO_REVERSE},
{X86::VMOVZPQILo2PQIZrr, X86::VMOVQI2PQIZrm, TB_NO_REVERSE},
{X86::VMOVZPQILo2PQIrr, X86::VMOVQI2PQIrm, TB_NO_REVERSE},
+ {X86::VMOVZPWILo2PWIZrr, X86::VMOVZPWILo2PWIZrm, TB_NO_REVERSE},
{X86::VPABSBYrr, X86::VPABSBYrm, 0},
{X86::VPABSBZ128rr, X86::VPABSBZ128rm, 0},
{X86::VPABSBZ256rr, X86::VPABSBZ256rm, 0},
@@ -1954,6 +1959,9 @@ static const X86FoldTableEntry Table1[] = {
{X86::VUCOMISSZrr_Int, X86::VUCOMISSZrm_Int, TB_NO_REVERSE},
{X86::VUCOMISSrr, X86::VUCOMISSrm, 0},
{X86::VUCOMISSrr_Int, X86::VUCOMISSrm_Int, TB_NO_REVERSE},
+ {X86::VUCOMXSDZrr_Int, X86::VUCOMXSDZrm_Int, TB_NO_REVERSE},
+ {X86::VUCOMXSHZrr_Int, X86::VUCOMXSHZrm_Int, TB_NO_REVERSE},
+ {X86::VUCOMXSSZrr_Int, X86::VUCOMXSSZrm_Int, TB_NO_REVERSE},
{X86::XOR16ri8_ND, X86::XOR16mi8_ND, 0},
{X86::XOR16ri8_NF_ND, X86::XOR16mi8_NF_ND, 0},
{X86::XOR16ri_ND, X86::XOR16mi_ND, 0},
diff --git a/llvm/test/ThinLTO/X86/ctxprof.ll b/llvm/test/ThinLTO/X86/ctxprof.ll
index 1e30b90ec23d..4baea3b25890 100644
--- a/llvm/test/ThinLTO/X86/ctxprof.ll
+++ b/llvm/test/ThinLTO/X86/ctxprof.ll
@@ -47,18 +47,21 @@
; NOPROF-1-NOT: m2_f1()
; NOPROF-2-NOT: m1_f1()
;
-; The run with workload definitions - same other options.
+; The run with workload definitions - same other options. We do need to re-generate the .bc
+; files, to include instrumentation.
+; RUN: opt -module-summary -passes=assign-guid,ctx-instr-gen %t/m1.ll -o %t/m1-instr.bc
+; RUN: opt -module-summary -passes=assign-guid,ctx-instr-gen %t/m2.ll -o %t/m2-instr.bc
;
; RUN: echo '[ \
; RUN: {"Guid": 6019442868614718803, "Counters": [1], "Callsites": [[{"Guid": 15593096274670919754, "Counters": [1]}]]}, \
; RUN: {"Guid": 15593096274670919754, "Counters": [1], "Callsites": [[{"Guid": 6019442868614718803, "Counters": [1]}]]} \
; RUN: ]' > %t_exp/ctxprof.json
; RUN: llvm-ctxprof-util fromJSON --input %t_exp/ctxprof.json --output %t_exp/ctxprof.bitstream
-; RUN: llvm-lto2 run %t/m1.bc %t/m2.bc \
+; RUN: llvm-lto2 run %t/m1-instr.bc %t/m2-instr.bc \
; RUN: -o %t_exp/result.o -save-temps \
; RUN: -use-ctx-profile=%t_exp/ctxprof.bitstream \
-; RUN: -r %t/m1.bc,m1_f1,plx \
-; RUN: -r %t/m2.bc,m2_f1,plx
+; RUN: -r %t/m1-instr.bc,m1_f1,plx \
+; RUN: -r %t/m2-instr.bc,m2_f1,plx
; RUN: llvm-dis %t_exp/result.o.1.3.import.bc -o - | FileCheck %s --check-prefix=FIRST
; RUN: llvm-dis %t_exp/result.o.2.3.import.bc -o - | FileCheck %s --check-prefix=SECOND
;
diff --git a/llvm/test/Transforms/InferFunctionAttrs/annotate.ll b/llvm/test/Transforms/InferFunctionAttrs/annotate.ll
index bc0d7a509e1f..40c512c81f0c 100644
--- a/llvm/test/Transforms/InferFunctionAttrs/annotate.ll
+++ b/llvm/test/Transforms/InferFunctionAttrs/annotate.ll
@@ -1106,6 +1106,15 @@ declare void @__cxa_throw(ptr, ptr, ptr)
; CHECK: declare void @_ZSt9terminatev() [[NOFREE_COLD_NORETURN:#[0-9]+]]
declare void @_ZSt9terminatev()
+; CHECK: declare void @sincos(double, ptr nocapture writeonly, ptr nocapture writeonly) [[NOFREE_NOUNWIND_WILLRETURN_WRITEONLY]]
+declare void @sincos(double, ptr, ptr)
+
+; CHECK: declare void @sincosf(float, ptr nocapture writeonly, ptr nocapture writeonly) [[NOFREE_NOUNWIND_WILLRETURN_WRITEONLY]]
+declare void @sincosf(float, ptr, ptr)
+
+; CHECK: declare void @sincosl(x86_fp80, ptr nocapture writeonly, ptr nocapture writeonly) [[NOFREE_NOUNWIND_WILLRETURN_WRITEONLY]]
+declare void @sincosl(x86_fp80, ptr, ptr)
+
; memset_pattern{4,8,16} aren't available everywhere.
; CHECK-DARWIN: declare void @memset_pattern4(ptr nocapture writeonly, ptr nocapture readonly, i64) [[ARGMEMONLY_NOFREE_NOUNWIND_WILLRETURN]]
declare void @memset_pattern4(ptr, ptr, i64)
diff --git a/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll b/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll
index f3a3b8c1dc5d..fabf8ab51764 100644
--- a/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll
+++ b/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll
@@ -1161,6 +1161,85 @@ define <2 x half> @constant_rtz_pkrtz() {
ret <2 x half> %cvt
}
+define <2 x half> @fpext_const_cvt_pkrtz(half %x) {
+; CHECK-LABEL: @fpext_const_cvt_pkrtz(
+; CHECK-NEXT: [[CVT:%.*]] = insertelement <2 x half> <half poison, half 0xH4200>, half [[X:%.*]], i64 0
+; CHECK-NEXT: ret <2 x half> [[CVT]]
+;
+ %ext = fpext half %x to float
+ %cvt = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float %ext, float 3.0)
+ ret <2 x half> %cvt
+}
+
+define <2 x half> @const_fpext_cvt_pkrtz(half %y) {
+; CHECK-LABEL: @const_fpext_cvt_pkrtz(
+; CHECK-NEXT: [[CVT:%.*]] = insertelement <2 x half> <half 0xH4500, half poison>, half [[Y:%.*]], i64 1
+; CHECK-NEXT: ret <2 x half> [[CVT]]
+;
+ %ext = fpext half %y to float
+ %cvt = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float 5.0, float %ext)
+ ret <2 x half> %cvt
+}
+
+define <2 x half> @const_fpext_multi_cvt_pkrtz(half %y) {
+; CHECK-LABEL: @const_fpext_multi_cvt_pkrtz(
+; CHECK-NEXT: [[CVT1:%.*]] = insertelement <2 x half> <half 0xH4500, half poison>, half [[Y:%.*]], i64 1
+; CHECK-NEXT: [[CVT2:%.*]] = insertelement <2 x half> <half 0xH4200, half poison>, half [[Y]], i64 1
+; CHECK-NEXT: [[ADD:%.*]] = fadd <2 x half> [[CVT1]], [[CVT2]]
+; CHECK-NEXT: ret <2 x half> [[ADD]]
+;
+ %ext = fpext half %y to float
+ %cvt1 = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float 5.0, float %ext)
+ %cvt2 = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float 3.0, float %ext)
+ %add = fadd <2 x half> %cvt1, %cvt2
+ ret <2 x half> %add
+}
+
+define <2 x half> @fpext_fpext_cvt_pkrtz(half %x, half %y) {
+; CHECK-LABEL: @fpext_fpext_cvt_pkrtz(
+; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x half> poison, half [[X:%.*]], i64 0
+; CHECK-NEXT: [[CVT:%.*]] = insertelement <2 x half> [[TMP1]], half [[Y:%.*]], i64 1
+; CHECK-NEXT: ret <2 x half> [[CVT]]
+;
+ %extx = fpext half %x to float
+ %exty = fpext half %y to float
+ %cvt = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float %extx, float %exty)
+ ret <2 x half> %cvt
+}
+
+define <2 x half> @fpext_fpext_bf16_cvt_pkrtz(bfloat %x, bfloat %y) {
+; CHECK-LABEL: @fpext_fpext_bf16_cvt_pkrtz(
+; CHECK-NEXT: [[EXTX:%.*]] = fpext bfloat [[X:%.*]] to float
+; CHECK-NEXT: [[EXTY:%.*]] = fpext bfloat [[Y:%.*]] to float
+; CHECK-NEXT: [[CVT:%.*]] = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float [[EXTX]], float [[EXTY]])
+; CHECK-NEXT: ret <2 x half> [[CVT]]
+;
+ %extx = fpext bfloat %x to float
+ %exty = fpext bfloat %y to float
+ %cvt = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float %extx, float %exty)
+ ret <2 x half> %cvt
+}
+
+define <2 x half> @poison_fpext_cvt_pkrtz(half %y) {
+; CHECK-LABEL: @poison_fpext_cvt_pkrtz(
+; CHECK-NEXT: [[CVT:%.*]] = insertelement <2 x half> poison, half [[Y:%.*]], i64 1
+; CHECK-NEXT: ret <2 x half> [[CVT]]
+;
+ %ext = fpext half %y to float
+ %cvt = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float poison, float %ext)
+ ret <2 x half> %cvt
+}
+
+define <2 x half> @fpext_poison_cvt_pkrtz(half %x) {
+; CHECK-LABEL: @fpext_poison_cvt_pkrtz(
+; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x half> poison, half [[X:%.*]], i64 0
+; CHECK-NEXT: ret <2 x half> [[TMP1]]
+;
+ %ext = fpext half %x to float
+ %cvt = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float %ext, float poison)
+ ret <2 x half> %cvt
+}
+
; --------------------------------------------------------------------
; llvm.amdgcn.cvt.pknorm.i16
; --------------------------------------------------------------------
diff --git a/llvm/test/Transforms/InstCombine/bitcast.ll b/llvm/test/Transforms/InstCombine/bitcast.ll
index 4ab24ce7b925..79e6370b7242 100644
--- a/llvm/test/Transforms/InstCombine/bitcast.ll
+++ b/llvm/test/Transforms/InstCombine/bitcast.ll
@@ -879,3 +879,26 @@ define half @copysign_idiom_constant_wrong_type2(bfloat %x, i16 %mag) {
%y = bitcast i16 %res to half
ret half %y
}
+
+define i16 @bitcast_undef_to_vector() {
+; CHECK-LABEL: @bitcast_undef_to_vector(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br label [[END:%.*]]
+; CHECK: unreachable:
+; CHECK-NEXT: br label [[END]]
+; CHECK: end:
+; CHECK-NEXT: ret i16 undef
+;
+entry:
+ br label %end
+
+unreachable: ; No predecessors!
+ %0 = extractvalue { i32, i32 } zeroinitializer, 1
+ br label %end
+
+end: ; preds = %unreachable, %entry
+ %1 = phi i32 [ %0, %unreachable ], [ undef, %entry ]
+ %2 = bitcast i32 %1 to <2 x i16>
+ %3 = extractelement <2 x i16> %2, i64 0
+ ret i16 %3
+}
diff --git a/llvm/test/Transforms/InstCombine/fmod.ll b/llvm/test/Transforms/InstCombine/fmod.ll
index c021d27e95fa..10cff189b8df 100644
--- a/llvm/test/Transforms/InstCombine/fmod.ll
+++ b/llvm/test/Transforms/InstCombine/fmod.ll
@@ -9,7 +9,7 @@ define float @test_inf_const(float %f) {
; CHECK-NEXT: [[ISINF:%.*]] = fcmp oeq float [[ABS]], 0x7FF0000000000000
; CHECK-NEXT: br i1 [[ISINF]], label [[RETURN:%.*]], label [[IF_END:%.*]]
; CHECK: if.end:
-; CHECK-NEXT: [[CALL:%.*]] = tail call float @fmodf(float [[F]], float 2.000000e+00)
+; CHECK-NEXT: [[CALL:%.*]] = frem nnan float [[F]], 2.000000e+00
; CHECK-NEXT: ret float [[CALL]]
; CHECK: return:
; CHECK-NEXT: ret float 0.000000e+00
@@ -34,7 +34,7 @@ define float @test_const_zero(float %f) {
; CHECK-NEXT: [[ISZERO:%.*]] = fcmp oeq float [[F]], 0.000000e+00
; CHECK-NEXT: br i1 [[ISZERO]], label [[RETURN:%.*]], label [[IF_END:%.*]]
; CHECK: if.end:
-; CHECK-NEXT: [[CALL:%.*]] = tail call float @fmodf(float 2.000000e+00, float [[F]])
+; CHECK-NEXT: [[CALL:%.*]] = frem nnan float 2.000000e+00, [[F]]
; CHECK-NEXT: ret float [[CALL]]
; CHECK: return:
; CHECK-NEXT: ret float 0.000000e+00
@@ -67,11 +67,11 @@ define float @test_noinf_nozero(float nofpclass(inf) %f, float nofpclass(zero) %
; CHECK-LABEL: define float @test_noinf_nozero(
; CHECK-SAME: float nofpclass(inf) [[F:%.*]], float nofpclass(zero) [[G:%.*]]) {
; CHECK-NEXT: entry:
-; CHECK-NEXT: [[CALL:%.*]] = tail call nnan float @fmodf(float [[F]], float [[G]])
+; CHECK-NEXT: [[CALL:%.*]] = frem nnan float [[F]], [[G]]
; CHECK-NEXT: ret float [[CALL]]
;
entry:
- %call = tail call nnan float @fmodf(float %f, float %g)
+ %call = tail call float @fmodf(float %f, float %g)
ret float %call
}
@@ -79,7 +79,7 @@ define double @test_double(double nofpclass(inf) %f, double nofpclass(zero) %g)
; CHECK-LABEL: define double @test_double(
; CHECK-SAME: double nofpclass(inf) [[F:%.*]], double nofpclass(zero) [[G:%.*]]) {
; CHECK-NEXT: entry:
-; CHECK-NEXT: [[CALL:%.*]] = tail call double @fmod(double [[F]], double [[G]])
+; CHECK-NEXT: [[CALL:%.*]] = frem nnan double [[F]], [[G]]
; CHECK-NEXT: ret double [[CALL]]
;
entry:
@@ -91,7 +91,7 @@ define fp128 @test_fp128(fp128 nofpclass(inf) %f, fp128 nofpclass(zero) %g) {
; CHECK-LABEL: define fp128 @test_fp128(
; CHECK-SAME: fp128 nofpclass(inf) [[F:%.*]], fp128 nofpclass(zero) [[G:%.*]]) {
; CHECK-NEXT: entry:
-; CHECK-NEXT: [[CALL:%.*]] = tail call fp128 @fmodl(fp128 [[F]], fp128 [[G]])
+; CHECK-NEXT: [[CALL:%.*]] = frem nnan fp128 [[F]], [[G]]
; CHECK-NEXT: ret fp128 [[CALL]]
;
entry:
@@ -103,11 +103,11 @@ define float @test_noinf_nozero_dazpreservesign(float nofpclass(inf) %f, float n
; CHECK-LABEL: define float @test_noinf_nozero_dazpreservesign(
; CHECK-SAME: float nofpclass(inf) [[F:%.*]], float nofpclass(zero) [[G:%.*]]) #[[ATTR0:[0-9]+]] {
; CHECK-NEXT: entry:
-; CHECK-NEXT: [[CALL:%.*]] = tail call nnan float @fmodf(float [[F]], float [[G]])
+; CHECK-NEXT: [[CALL:%.*]] = tail call float @fmodf(float [[F]], float [[G]])
; CHECK-NEXT: ret float [[CALL]]
;
entry:
- %call = tail call nnan float @fmodf(float %f, float %g)
+ %call = tail call float @fmodf(float %f, float %g)
ret float %call
}
@@ -115,7 +115,19 @@ define float @test_noinf_nozero_dazdynamic(float nofpclass(inf) %f, float nofpcl
; CHECK-LABEL: define float @test_noinf_nozero_dazdynamic(
; CHECK-SAME: float nofpclass(inf) [[F:%.*]], float nofpclass(zero) [[G:%.*]]) #[[ATTR1:[0-9]+]] {
; CHECK-NEXT: entry:
-; CHECK-NEXT: [[CALL:%.*]] = tail call nnan float @fmodf(float [[F]], float [[G]])
+; CHECK-NEXT: [[CALL:%.*]] = tail call float @fmodf(float [[F]], float [[G]])
+; CHECK-NEXT: ret float [[CALL]]
+;
+entry:
+ %call = tail call float @fmodf(float %f, float %g)
+ ret float %call
+}
+
+define float @test_nnan(float %f, float %g) {
+; CHECK-LABEL: define float @test_nnan(
+; CHECK-SAME: float [[F:%.*]], float [[G:%.*]]) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[CALL:%.*]] = frem nnan float [[F]], [[G]]
; CHECK-NEXT: ret float [[CALL]]
;
entry:
diff --git a/llvm/test/Transforms/InstCombine/icmp-shl-nuw.ll b/llvm/test/Transforms/InstCombine/icmp-shl-nuw.ll
index 57c3abc7b984..9f50265004f0 100644
--- a/llvm/test/Transforms/InstCombine/icmp-shl-nuw.ll
+++ b/llvm/test/Transforms/InstCombine/icmp-shl-nuw.ll
@@ -90,3 +90,109 @@ define <2 x i1> @icmp_ugt_16x2(<2 x i32>) {
%d = icmp ugt <2 x i32> %c, <i32 1048575, i32 1048575>
ret <2 x i1> %d
}
+
+define i1 @fold_icmp_shl_nuw_c1(i32 %x) {
+; CHECK-LABEL: @fold_icmp_shl_nuw_c1(
+; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[X:%.*]], 61440
+; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP1]], 0
+; CHECK-NEXT: ret i1 [[CMP]]
+;
+ %lshr = lshr i32 %x, 12
+ %and = and i32 %lshr, 15
+ %shl = shl nuw i32 2, %and
+ %cmp = icmp ult i32 %shl, 4
+ ret i1 %cmp
+}
+
+define i1 @fold_icmp_shl_nuw_c2(i32 %x) {
+; CHECK-LABEL: @fold_icmp_shl_nuw_c2(
+; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[X:%.*]], 2
+; CHECK-NEXT: ret i1 [[CMP]]
+;
+ %shl = shl nuw i32 16, %x
+ %cmp = icmp ult i32 %shl, 64
+ ret i1 %cmp
+}
+
+define i1 @fold_icmp_shl_nuw_c2_non_pow2(i32 %x) {
+; CHECK-LABEL: @fold_icmp_shl_nuw_c2_non_pow2(
+; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[X:%.*]], 2
+; CHECK-NEXT: ret i1 [[CMP]]
+;
+ %shl = shl nuw i32 48, %x
+ %cmp = icmp ult i32 %shl, 192
+ ret i1 %cmp
+}
+
+define i1 @fold_icmp_shl_nuw_c2_div_non_pow2(i32 %x) {
+; CHECK-LABEL: @fold_icmp_shl_nuw_c2_div_non_pow2(
+; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[X:%.*]], 5
+; CHECK-NEXT: ret i1 [[CMP]]
+;
+ %shl = shl nuw i32 2, %x
+ %cmp = icmp ult i32 %shl, 60
+ ret i1 %cmp
+}
+
+define i1 @fold_icmp_shl_nuw_c3(i32 %x) {
+; CHECK-LABEL: @fold_icmp_shl_nuw_c3(
+; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i32 [[X:%.*]], 1
+; CHECK-NEXT: ret i1 [[CMP]]
+;
+ %shl = shl nuw i32 48, %x
+ %cmp = icmp uge i32 %shl, 144
+ ret i1 %cmp
+}
+
+define i1 @fold_icmp_shl_nuw_c2_indivisible(i32 %x) {
+; CHECK-LABEL: @fold_icmp_shl_nuw_c2_indivisible(
+; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[X:%.*]], 2
+; CHECK-NEXT: ret i1 [[CMP]]
+;
+ %shl = shl nuw i32 16, %x
+ %cmp = icmp ult i32 %shl, 63
+ ret i1 %cmp
+}
+
+; Negative tests
+
+define i1 @fold_icmp_shl_c2_without_nuw(i32 %x) {
+; CHECK-LABEL: @fold_icmp_shl_c2_without_nuw(
+; CHECK-NEXT: [[SHL:%.*]] = shl i32 16, [[X:%.*]]
+; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[SHL]], 64
+; CHECK-NEXT: ret i1 [[CMP]]
+;
+ %shl = shl i32 16, %x
+ %cmp = icmp ult i32 %shl, 64
+ ret i1 %cmp
+}
+
+; Make sure this trivial case is folded by InstSimplify.
+define i1 @fold_icmp_shl_nuw_c2_precondition1(i32 %x) {
+; CHECK-LABEL: @fold_icmp_shl_nuw_c2_precondition1(
+; CHECK-NEXT: ret i1 true
+;
+ %shl = shl nuw i32 0, %x
+ %cmp = icmp ult i32 %shl, 63
+ ret i1 %cmp
+}
+
+; Make sure this trivial case is folded by InstSimplify.
+define i1 @fold_icmp_shl_nuw_c2_precondition2(i32 %x) {
+; CHECK-LABEL: @fold_icmp_shl_nuw_c2_precondition2(
+; CHECK-NEXT: ret i1 false
+;
+ %shl = shl nuw i32 127, %x
+ %cmp = icmp ult i32 %shl, 63
+ ret i1 %cmp
+}
+
+; Make sure we don't crash on this case.
+define i1 @fold_icmp_shl_nuw_c2_precondition3(i32 %x) {
+; CHECK-LABEL: @fold_icmp_shl_nuw_c2_precondition3(
+; CHECK-NEXT: ret i1 false
+;
+ %shl = shl nuw i32 1, %x
+ %cmp = icmp ult i32 %shl, 1
+ ret i1 %cmp
+}
diff --git a/llvm/test/Transforms/InstCombine/phi.ll b/llvm/test/Transforms/InstCombine/phi.ll
index 3b1fa3a97d9c..b33ad9a7d339 100644
--- a/llvm/test/Transforms/InstCombine/phi.ll
+++ b/llvm/test/Transforms/InstCombine/phi.ll
@@ -2742,3 +2742,54 @@ loop.latch:
call void @use(i32 %and)
br label %loop
}
+
+define void @test_dead_phi_web(i64 %index, i1 %cond) {
+; CHECK-LABEL: @test_dead_phi_web(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br label [[BB0:%.*]]
+; CHECK: BB0:
+; CHECK-NEXT: switch i64 [[INDEX:%.*]], label [[BB4:%.*]] [
+; CHECK-NEXT: i64 0, label [[BB1:%.*]]
+; CHECK-NEXT: i64 1, label [[BB2:%.*]]
+; CHECK-NEXT: i64 2, label [[BB3:%.*]]
+; CHECK-NEXT: ]
+; CHECK: BB1:
+; CHECK-NEXT: br i1 [[COND:%.*]], label [[BB2]], label [[BB4]]
+; CHECK: BB2:
+; CHECK-NEXT: br i1 [[COND]], label [[BB3]], label [[BB4]]
+; CHECK: BB3:
+; CHECK-NEXT: br label [[BB4]]
+; CHECK: BB4:
+; CHECK-NEXT: br i1 [[COND]], label [[BB0]], label [[BB5:%.*]]
+; CHECK: BB5:
+; CHECK-NEXT: ret void
+;
+entry:
+ br label %BB0
+
+BB0: ; preds = %BB4, %entry
+ %a = phi float [ 0.0, %entry ], [ %x, %BB4 ]
+ switch i64 %index, label %BB4 [
+ i64 0, label %BB1
+ i64 1, label %BB2
+ i64 2, label %BB3
+ ]
+
+BB1: ; preds = %BB0
+ br i1 %cond, label %BB2, label %BB4
+
+BB2: ; preds = %BB1, %BB0
+ %b = phi float [ 2.0, %BB0 ], [ %a, %BB1 ]
+ br i1 %cond, label %BB3, label %BB4
+
+BB3: ; preds = %BB2, %BB0
+ %c = phi float [ 3.0, %BB0 ], [ %b, %BB2 ]
+ br label %BB4
+
+BB4: ; preds = %BB3, %BB2, %BB1, %BB0
+ %x = phi float [ %a, %BB0 ], [ %a, %BB1 ], [ %b, %BB2 ], [ %c, %BB3 ]
+ br i1 %cond, label %BB0, label %BB5
+
+BB5: ; preds = %BB4
+ ret void
+}
diff --git a/llvm/test/Transforms/LICM/hoist-deref-load.ll b/llvm/test/Transforms/LICM/hoist-deref-load.ll
index 149976ab1874..c498e85ddd6c 100644
--- a/llvm/test/Transforms/LICM/hoist-deref-load.ll
+++ b/llvm/test/Transforms/LICM/hoist-deref-load.ll
@@ -420,7 +420,7 @@ for.end: ; preds = %for.inc, %entry
define void @test7(ptr noalias %a, ptr %b, ptr %cptr, i32 %n) #0 {
; CHECK-LABEL: @test7(
; CHECK-NEXT: entry:
-; CHECK-NEXT: [[C:%.*]] = load ptr, ptr [[CPTR:%.*]], align 8, !dereferenceable !0, !align !0
+; CHECK-NEXT: [[C:%.*]] = load ptr, ptr [[CPTR:%.*]], align 8, !dereferenceable [[META0:![0-9]+]], !align [[META0]]
; CHECK-NEXT: [[CMP11:%.*]] = icmp sgt i32 [[N:%.*]], 0
; CHECK-NEXT: br i1 [[CMP11]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_END:%.*]]
; CHECK: for.body.preheader:
@@ -492,7 +492,7 @@ for.end: ; preds = %for.inc, %entry
define void @test8(ptr noalias %a, ptr %b, ptr %cptr, i32 %n) #0 {
; CHECK-LABEL: @test8(
; CHECK-NEXT: entry:
-; CHECK-NEXT: [[C:%.*]] = load ptr, ptr [[CPTR:%.*]], align 8, !dereferenceable_or_null !0, !align !0
+; CHECK-NEXT: [[C:%.*]] = load ptr, ptr [[CPTR:%.*]], align 8, !dereferenceable_or_null [[META0]], !align [[META0]]
; CHECK-NEXT: [[NOT_NULL:%.*]] = icmp ne ptr [[C]], null
; CHECK-NEXT: br i1 [[NOT_NULL]], label [[NOT_NULL:%.*]], label [[FOR_END:%.*]]
; CHECK: not.null:
@@ -562,7 +562,7 @@ for.end: ; preds = %for.inc, %entry, %n
define void @test9(ptr noalias %a, ptr %b, ptr %cptr, i32 %n) #0 {
; CHECK-LABEL: @test9(
; CHECK-NEXT: entry:
-; CHECK-NEXT: [[C:%.*]] = load ptr, ptr [[CPTR:%.*]], align 8, !dereferenceable_or_null !0
+; CHECK-NEXT: [[C:%.*]] = load ptr, ptr [[CPTR:%.*]], align 8, !dereferenceable_or_null [[META0]]
; CHECK-NEXT: [[CMP11:%.*]] = icmp sgt i32 [[N:%.*]], 0
; CHECK-NEXT: br i1 [[CMP11]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_END:%.*]]
; CHECK: for.body.preheader:
@@ -693,7 +693,7 @@ define void @test11(ptr noalias %a, ptr %b, ptr dereferenceable(8) %cptr, i32 %n
; CHECK-NEXT: [[CMP11:%.*]] = icmp sgt i32 [[N:%.*]], 0
; CHECK-NEXT: br i1 [[CMP11]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_END:%.*]]
; CHECK: for.body.preheader:
-; CHECK-NEXT: [[C:%.*]] = load ptr, ptr [[CPTR:%.*]], align 8, !dereferenceable !0
+; CHECK-NEXT: [[C:%.*]] = load ptr, ptr [[CPTR:%.*]], align 8, !dereferenceable [[META0]]
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
; CHECK: for.body:
; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_INC:%.*]] ], [ 0, [[FOR_BODY_PREHEADER]] ]
@@ -1164,5 +1164,78 @@ for.end: ; preds = %for.inc, %entry
ret void
}
+declare void @use(i64)
+
+define void @licm_deref_no_hoist(i1 %c1, i1 %c2, ptr align 8 dereferenceable(8) %p1) {
+; CHECK-LABEL: @licm_deref_no_hoist(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[P2:%.*]] = load ptr, ptr [[P1:%.*]], align 8, !align [[META1:![0-9]+]]
+; CHECK-NEXT: br label [[LOOP:%.*]]
+; CHECK: loop:
+; CHECK-NEXT: br i1 [[C1:%.*]], label [[IF:%.*]], label [[LOOP_LATCH:%.*]]
+; CHECK: if:
+; CHECK-NEXT: [[V:%.*]] = load i64, ptr [[P2]], align 8
+; CHECK-NEXT: call void @use(i64 [[V]]) #[[ATTR1:[0-9]+]]
+; CHECK-NEXT: br label [[LOOP_LATCH]]
+; CHECK: loop.latch:
+; CHECK-NEXT: br i1 [[C2:%.*]], label [[LOOP]], label [[EXIT:%.*]]
+; CHECK: exit:
+; CHECK-NEXT: ret void
+;
+entry:
+ br label %loop
+
+loop:
+ br i1 %c1, label %if, label %loop.latch
+
+if:
+ %p2 = load ptr, ptr %p1, align 8, !dereferenceable !1, !align !1
+ %v = load i64, ptr %p2, align 8
+ call void @use(i64 %v) memory(none)
+ br label %loop.latch
+
+loop.latch:
+ br i1 %c2, label %loop, label %exit
+
+exit:
+ ret void
+}
+
+define void @licm_deref_hoist(i1 %c1, i1 %c2, ptr align 8 dereferenceable(8) %p1) {
+; CHECK-LABEL: @licm_deref_hoist(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[P2:%.*]] = load ptr, ptr [[P1:%.*]], align 8, !dereferenceable [[META1]], !align [[META1]]
+; CHECK-NEXT: [[V:%.*]] = load i64, ptr [[P2]], align 8
+; CHECK-NEXT: br label [[LOOP:%.*]]
+; CHECK: loop:
+; CHECK-NEXT: br i1 [[C1:%.*]], label [[IF:%.*]], label [[LOOP_LATCH:%.*]]
+; CHECK: if:
+; CHECK-NEXT: call void @use(i64 [[V]]) #[[ATTR1]]
+; CHECK-NEXT: br label [[LOOP_LATCH]]
+; CHECK: loop.latch:
+; CHECK-NEXT: br i1 [[C2:%.*]], label [[LOOP]], label [[EXIT:%.*]]
+; CHECK: exit:
+; CHECK-NEXT: ret void
+;
+entry:
+ %p2 = load ptr, ptr %p1, align 8, !dereferenceable !1, !align !1
+ br label %loop
+
+loop:
+ br i1 %c1, label %if, label %loop.latch
+
+if:
+ %v = load i64, ptr %p2, align 8
+ call void @use(i64 %v) memory(none)
+ br label %loop.latch
+
+loop.latch:
+ br i1 %c2, label %loop, label %exit
+
+exit:
+ ret void
+}
+
attributes #0 = { nounwind uwtable nofree nosync }
!0 = !{i64 4}
+!1 = !{i64 8}
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-gep.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-gep.ll
index a1f6ba487e84..6ec9eb849dd5 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-gep.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-gep.ll
@@ -55,8 +55,8 @@ define void @pointer_induction_used_as_vector(ptr noalias %start.1, ptr noalias
; CHECK-NEXT: [[TMP4:%.*]] = mul i64 [[N_VEC]], 8
; CHECK-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[START_1:%.*]], i64 [[TMP4]]
; CHECK-NEXT: [[IND_END2:%.*]] = getelementptr i8, ptr [[START_2:%.*]], i64 [[N_VEC]]
-; CHECK-NEXT: [[TMP20:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT: [[TMP21:%.*]] = mul i64 [[TMP20]], 2
+; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 2
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[POINTER_PHI:%.*]] = phi ptr [ [[START_2]], [[VECTOR_PH]] ], [ [[PTR_IND:%.*]], [[VECTOR_BODY]] ]
@@ -70,23 +70,23 @@ define void @pointer_induction_used_as_vector(ptr noalias %start.1, ptr noalias
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[DOTSPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
; CHECK-NEXT: [[TMP12:%.*]] = call <vscale x 2 x i64> @llvm.stepvector.nxv2i64()
; CHECK-NEXT: [[TMP13:%.*]] = add <vscale x 2 x i64> [[DOTSPLAT]], [[TMP12]]
-; CHECK-NEXT: [[VECTOR_GEP:%.*]] = mul <vscale x 2 x i64> [[TMP13]], shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 1, i64 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer)
-; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], <vscale x 2 x i64> [[VECTOR_GEP]]
+; CHECK-NEXT: [[TMP14:%.*]] = mul <vscale x 2 x i64> [[TMP13]], shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 1, i64 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer)
+; CHECK-NEXT: [[VECTOR_GEP:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], <vscale x 2 x i64> [[TMP14]]
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 8
-; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[OFFSET_IDX]], 0
-; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[START_1]], i64 [[TMP5]]
-; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i8, <vscale x 2 x ptr> [[TMP14]], i64 1
-; CHECK-NEXT: [[TMP16:%.*]] = getelementptr ptr, ptr [[NEXT_GEP]], i32 0
-; CHECK-NEXT: store <vscale x 2 x ptr> [[TMP15]], ptr [[TMP16]], align 8
-; CHECK-NEXT: [[TMP17:%.*]] = extractelement <vscale x 2 x ptr> [[TMP14]], i32 0
-; CHECK-NEXT: [[TMP18:%.*]] = getelementptr i8, ptr [[TMP17]], i32 0
-; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 2 x i8>, ptr [[TMP18]], align 1
-; CHECK-NEXT: [[TMP19:%.*]] = add <vscale x 2 x i8> [[WIDE_LOAD]], shufflevector (<vscale x 2 x i8> insertelement (<vscale x 2 x i8> poison, i8 1, i64 0), <vscale x 2 x i8> poison, <vscale x 2 x i32> zeroinitializer)
-; CHECK-NEXT: store <vscale x 2 x i8> [[TMP19]], ptr [[TMP18]], align 1
-; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP21]]
+; CHECK-NEXT: [[TMP15:%.*]] = add i64 [[OFFSET_IDX]], 0
+; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[START_1]], i64 [[TMP15]]
+; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i8, <vscale x 2 x ptr> [[VECTOR_GEP]], i64 1
+; CHECK-NEXT: [[TMP17:%.*]] = getelementptr ptr, ptr [[NEXT_GEP]], i32 0
+; CHECK-NEXT: store <vscale x 2 x ptr> [[TMP16]], ptr [[TMP17]], align 8
+; CHECK-NEXT: [[TMP18:%.*]] = extractelement <vscale x 2 x ptr> [[VECTOR_GEP]], i32 0
+; CHECK-NEXT: [[TMP19:%.*]] = getelementptr i8, ptr [[TMP18]], i32 0
+; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 2 x i8>, ptr [[TMP19]], align 1
+; CHECK-NEXT: [[TMP20:%.*]] = add <vscale x 2 x i8> [[WIDE_LOAD]], shufflevector (<vscale x 2 x i8> insertelement (<vscale x 2 x i8> poison, i8 1, i64 0), <vscale x 2 x i8> poison, <vscale x 2 x i32> zeroinitializer)
+; CHECK-NEXT: store <vscale x 2 x i8> [[TMP20]], ptr [[TMP19]], align 1
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP6]]
; CHECK-NEXT: [[PTR_IND]] = getelementptr i8, ptr [[POINTER_PHI]], i64 [[TMP10]]
-; CHECK-NEXT: [[TMP22:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; CHECK-NEXT: br i1 [[TMP22]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
+; CHECK-NEXT: [[TMP21:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-NEXT: br i1 [[TMP21]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
@@ -148,30 +148,30 @@ define void @pointer_induction(ptr noalias %start, i64 %N) {
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP0]], [[TMP4]]
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF]]
; CHECK-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[START:%.*]], i64 [[N_VEC]]
-; CHECK-NEXT: [[TMP16:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT: [[TMP17:%.*]] = mul i64 [[TMP16]], 2
+; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 2
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[POINTER_PHI:%.*]] = phi ptr [ [[START]], [[VECTOR_PH]] ], [ [[PTR_IND:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[INDEX2:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 2
-; CHECK-NEXT: [[TMP7:%.*]] = mul i64 [[TMP6]], 1
-; CHECK-NEXT: [[TMP8:%.*]] = mul i64 1, [[TMP7]]
-; CHECK-NEXT: [[TMP9:%.*]] = mul i64 [[TMP6]], 0
-; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[TMP9]], i64 0
+; CHECK-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 2
+; CHECK-NEXT: [[TMP9:%.*]] = mul i64 [[TMP8]], 1
+; CHECK-NEXT: [[TMP10:%.*]] = mul i64 1, [[TMP9]]
+; CHECK-NEXT: [[TMP11:%.*]] = mul i64 [[TMP8]], 0
+; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[TMP11]], i64 0
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[DOTSPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
-; CHECK-NEXT: [[TMP10:%.*]] = call <vscale x 2 x i64> @llvm.stepvector.nxv2i64()
-; CHECK-NEXT: [[TMP11:%.*]] = add <vscale x 2 x i64> [[DOTSPLAT]], [[TMP10]]
-; CHECK-NEXT: [[VECTOR_GEP:%.*]] = mul <vscale x 2 x i64> [[TMP11]], shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 1, i64 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer)
-; CHECK-NEXT: [[TMP12:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], <vscale x 2 x i64> [[VECTOR_GEP]]
-; CHECK-NEXT: [[TMP13:%.*]] = extractelement <vscale x 2 x ptr> [[TMP12]], i32 0
-; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[TMP13]], i32 0
-; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 2 x i8>, ptr [[TMP14]], align 1
-; CHECK-NEXT: [[TMP15:%.*]] = add <vscale x 2 x i8> [[WIDE_LOAD]], shufflevector (<vscale x 2 x i8> insertelement (<vscale x 2 x i8> poison, i8 1, i64 0), <vscale x 2 x i8> poison, <vscale x 2 x i32> zeroinitializer)
-; CHECK-NEXT: store <vscale x 2 x i8> [[TMP15]], ptr [[TMP14]], align 1
-; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX2]], [[TMP17]]
-; CHECK-NEXT: [[PTR_IND]] = getelementptr i8, ptr [[POINTER_PHI]], i64 [[TMP8]]
+; CHECK-NEXT: [[TMP12:%.*]] = call <vscale x 2 x i64> @llvm.stepvector.nxv2i64()
+; CHECK-NEXT: [[TMP13:%.*]] = add <vscale x 2 x i64> [[DOTSPLAT]], [[TMP12]]
+; CHECK-NEXT: [[TMP14:%.*]] = mul <vscale x 2 x i64> [[TMP13]], shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 1, i64 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer)
+; CHECK-NEXT: [[VECTOR_GEP:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], <vscale x 2 x i64> [[TMP14]]
+; CHECK-NEXT: [[TMP15:%.*]] = extractelement <vscale x 2 x ptr> [[VECTOR_GEP]], i32 0
+; CHECK-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[TMP15]], i32 0
+; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 2 x i8>, ptr [[TMP16]], align 1
+; CHECK-NEXT: [[TMP17:%.*]] = add <vscale x 2 x i8> [[WIDE_LOAD]], shufflevector (<vscale x 2 x i8> insertelement (<vscale x 2 x i8> poison, i8 1, i64 0), <vscale x 2 x i8> poison, <vscale x 2 x i32> zeroinitializer)
+; CHECK-NEXT: store <vscale x 2 x i8> [[TMP17]], ptr [[TMP16]], align 1
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX2]], [[TMP6]]
+; CHECK-NEXT: [[PTR_IND]] = getelementptr i8, ptr [[POINTER_PHI]], i64 [[TMP10]]
; CHECK-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
; CHECK: middle.block:
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-phi.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-phi.ll
index 123b3cf3df14..bfb5cf8d6662 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-phi.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-phi.ll
@@ -243,14 +243,14 @@ define i32 @pointer_iv_mixed(ptr noalias %a, ptr noalias %b, i64 %n) #0 {
; CHECK-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP8:%.*]] = shl nuw nsw i64 [[TMP7]], 3
; CHECK-NEXT: [[TMP9:%.*]] = call <vscale x 2 x i64> @llvm.stepvector.nxv2i64()
-; CHECK-NEXT: [[VECTOR_GEP:%.*]] = shl <vscale x 2 x i64> [[TMP9]], shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 2, i64 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer)
-; CHECK-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], <vscale x 2 x i64> [[VECTOR_GEP]]
+; CHECK-NEXT: [[TMP10:%.*]] = shl <vscale x 2 x i64> [[TMP9]], shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 2, i64 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer)
+; CHECK-NEXT: [[VECTOR_GEP:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], <vscale x 2 x i64> [[TMP10]]
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = shl i64 [[INDEX]], 3
; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[B]], i64 [[OFFSET_IDX]]
-; CHECK-NEXT: [[TMP11:%.*]] = extractelement <vscale x 2 x ptr> [[TMP10]], i64 0
+; CHECK-NEXT: [[TMP11:%.*]] = extractelement <vscale x 2 x ptr> [[VECTOR_GEP]], i64 0
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 2 x i32>, ptr [[TMP11]], align 8
; CHECK-NEXT: [[TMP12]] = add <vscale x 2 x i32> [[WIDE_LOAD]], [[VEC_PHI]]
-; CHECK-NEXT: store <vscale x 2 x ptr> [[TMP10]], ptr [[NEXT_GEP]], align 8
+; CHECK-NEXT: store <vscale x 2 x ptr> [[VECTOR_GEP]], ptr [[NEXT_GEP]], align 8
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP6]]
; CHECK-NEXT: [[PTR_IND]] = getelementptr i8, ptr [[POINTER_PHI]], i64 [[TMP8]]
; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
@@ -318,10 +318,10 @@ define void @phi_used_in_vector_compare_and_scalar_indvar_update_and_store(ptr %
; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP3:%.*]] = shl nuw nsw i64 [[TMP2]], 2
; CHECK-NEXT: [[TMP4:%.*]] = call <vscale x 2 x i64> @llvm.stepvector.nxv2i64()
-; CHECK-NEXT: [[VECTOR_GEP:%.*]] = shl <vscale x 2 x i64> [[TMP4]], shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 1, i64 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer)
-; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], <vscale x 2 x i64> [[VECTOR_GEP]]
-; CHECK-NEXT: [[TMP6:%.*]] = icmp ne <vscale x 2 x ptr> [[TMP5]], zeroinitializer
-; CHECK-NEXT: [[TMP7:%.*]] = extractelement <vscale x 2 x ptr> [[TMP5]], i64 0
+; CHECK-NEXT: [[TMP5:%.*]] = shl <vscale x 2 x i64> [[TMP4]], shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 1, i64 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer)
+; CHECK-NEXT: [[VECTOR_GEP:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], <vscale x 2 x i64> [[TMP5]]
+; CHECK-NEXT: [[TMP6:%.*]] = icmp ne <vscale x 2 x ptr> [[VECTOR_GEP]], zeroinitializer
+; CHECK-NEXT: [[TMP7:%.*]] = extractelement <vscale x 2 x ptr> [[VECTOR_GEP]], i64 0
; CHECK-NEXT: call void @llvm.masked.store.nxv2i16.p0(<vscale x 2 x i16> zeroinitializer, ptr [[TMP7]], i32 2, <vscale x 2 x i1> [[TMP6]])
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP1]]
; CHECK-NEXT: [[PTR_IND]] = getelementptr i8, ptr [[POINTER_PHI]], i64 [[TMP3]]
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/strided-accesses.ll b/llvm/test/Transforms/LoopVectorize/RISCV/strided-accesses.ll
index 837e39d12359..99b8cb7ae94b 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/strided-accesses.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/strided-accesses.ll
@@ -181,15 +181,15 @@ define void @single_constant_stride_ptr_iv(ptr %p) {
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 4 x i64> [[DOTSPLATINSERT]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
; CHECK-NEXT: [[TMP14:%.*]] = call <vscale x 4 x i64> @llvm.stepvector.nxv4i64()
; CHECK-NEXT: [[TMP15:%.*]] = add <vscale x 4 x i64> [[DOTSPLAT]], [[TMP14]]
-; CHECK-NEXT: [[VECTOR_GEP:%.*]] = mul <vscale x 4 x i64> [[TMP15]], shufflevector (<vscale x 4 x i64> insertelement (<vscale x 4 x i64> poison, i64 8, i64 0), <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer)
-; CHECK-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], <vscale x 4 x i64> [[VECTOR_GEP]]
-; CHECK-NEXT: [[TMP17:%.*]] = extractelement <vscale x 4 x ptr> [[TMP16]], i32 0
+; CHECK-NEXT: [[TMP16:%.*]] = mul <vscale x 4 x i64> [[TMP15]], shufflevector (<vscale x 4 x i64> insertelement (<vscale x 4 x i64> poison, i64 8, i64 0), <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer)
+; CHECK-NEXT: [[VECTOR_GEP:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], <vscale x 4 x i64> [[TMP16]]
+; CHECK-NEXT: [[TMP17:%.*]] = extractelement <vscale x 4 x ptr> [[VECTOR_GEP]], i32 0
; CHECK-NEXT: [[TMP18:%.*]] = getelementptr i32, ptr [[TMP17]], i32 0
; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <vscale x 8 x i32>, ptr [[TMP18]], align 4
; CHECK-NEXT: [[STRIDED_VEC:%.*]] = call { <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.vector.deinterleave2.nxv8i32(<vscale x 8 x i32> [[WIDE_VEC]])
; CHECK-NEXT: [[TMP19:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } [[STRIDED_VEC]], 0
; CHECK-NEXT: [[TMP20:%.*]] = add <vscale x 4 x i32> [[TMP19]], shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 1, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
-; CHECK-NEXT: call void @llvm.masked.scatter.nxv4i32.nxv4p0(<vscale x 4 x i32> [[TMP20]], <vscale x 4 x ptr> [[TMP16]], i32 4, <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer))
+; CHECK-NEXT: call void @llvm.masked.scatter.nxv4i32.nxv4p0(<vscale x 4 x i32> [[TMP20]], <vscale x 4 x ptr> [[VECTOR_GEP]], i32 4, <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer))
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP8]]
; CHECK-NEXT: [[PTR_IND]] = getelementptr i8, ptr [[POINTER_PHI]], i64 [[TMP12]]
; CHECK-NEXT: [[TMP21:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
@@ -757,8 +757,8 @@ define void @double_stride_ptr_iv(ptr %p, ptr %p2, i64 %stride) {
; STRIDED-NEXT: [[TMP20:%.*]] = add <vscale x 4 x i64> [[DOTSPLAT]], [[TMP19]]
; STRIDED-NEXT: [[DOTSPLATINSERT9:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[STRIDE]], i64 0
; STRIDED-NEXT: [[DOTSPLAT10:%.*]] = shufflevector <vscale x 4 x i64> [[DOTSPLATINSERT9]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
-; STRIDED-NEXT: [[VECTOR_GEP:%.*]] = mul <vscale x 4 x i64> [[TMP20]], [[DOTSPLAT10]]
-; STRIDED-NEXT: [[TMP21:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], <vscale x 4 x i64> [[VECTOR_GEP]]
+; STRIDED-NEXT: [[TMP21:%.*]] = mul <vscale x 4 x i64> [[TMP20]], [[DOTSPLAT10]]
+; STRIDED-NEXT: [[VECTOR_GEP:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], <vscale x 4 x i64> [[TMP21]]
; STRIDED-NEXT: [[TMP22:%.*]] = call i64 @llvm.vscale.i64()
; STRIDED-NEXT: [[TMP23:%.*]] = mul i64 [[TMP22]], 4
; STRIDED-NEXT: [[TMP24:%.*]] = mul i64 [[TMP23]], 1
@@ -768,11 +768,11 @@ define void @double_stride_ptr_iv(ptr %p, ptr %p2, i64 %stride) {
; STRIDED-NEXT: [[DOTSPLAT14:%.*]] = shufflevector <vscale x 4 x i64> [[DOTSPLATINSERT13]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
; STRIDED-NEXT: [[TMP27:%.*]] = call <vscale x 4 x i64> @llvm.stepvector.nxv4i64()
; STRIDED-NEXT: [[TMP28:%.*]] = add <vscale x 4 x i64> [[DOTSPLAT14]], [[TMP27]]
-; STRIDED-NEXT: [[VECTOR_GEP17:%.*]] = mul <vscale x 4 x i64> [[TMP28]], [[DOTSPLAT10]]
-; STRIDED-NEXT: [[TMP29:%.*]] = getelementptr i8, ptr [[POINTER_PHI11]], <vscale x 4 x i64> [[VECTOR_GEP17]]
-; STRIDED-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <vscale x 4 x i32> @llvm.masked.gather.nxv4i32.nxv4p0(<vscale x 4 x ptr> [[TMP21]], i32 4, <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), <vscale x 4 x i32> poison), !alias.scope [[META15:![0-9]+]]
+; STRIDED-NEXT: [[TMP29:%.*]] = mul <vscale x 4 x i64> [[TMP28]], [[DOTSPLAT10]]
+; STRIDED-NEXT: [[VECTOR_GEP17:%.*]] = getelementptr i8, ptr [[POINTER_PHI11]], <vscale x 4 x i64> [[TMP29]]
+; STRIDED-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <vscale x 4 x i32> @llvm.masked.gather.nxv4i32.nxv4p0(<vscale x 4 x ptr> [[VECTOR_GEP]], i32 4, <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), <vscale x 4 x i32> poison), !alias.scope [[META15:![0-9]+]]
; STRIDED-NEXT: [[TMP30:%.*]] = add <vscale x 4 x i32> [[WIDE_MASKED_GATHER]], shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 1, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
-; STRIDED-NEXT: call void @llvm.masked.scatter.nxv4i32.nxv4p0(<vscale x 4 x i32> [[TMP30]], <vscale x 4 x ptr> [[TMP29]], i32 4, <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer)), !alias.scope [[META18:![0-9]+]], !noalias [[META15]]
+; STRIDED-NEXT: call void @llvm.masked.scatter.nxv4i32.nxv4p0(<vscale x 4 x i32> [[TMP30]], <vscale x 4 x ptr> [[VECTOR_GEP17]], i32 4, <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer)), !alias.scope [[META18:![0-9]+]], !noalias [[META15]]
; STRIDED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP13]]
; STRIDED-NEXT: [[PTR_IND]] = getelementptr i8, ptr [[POINTER_PHI]], i64 [[TMP17]]
; STRIDED-NEXT: [[PTR_IND12]] = getelementptr i8, ptr [[POINTER_PHI11]], i64 [[TMP25]]
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-intrinsics.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-intrinsics.ll
index 04b3ba52cbef..6dfe5b608199 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-intrinsics.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-intrinsics.ll
@@ -31,7 +31,7 @@ define void @foo(ptr noalias %a, ptr noalias %b, ptr noalias %c, i64 %N) {
; IF-EVL-NEXT: CLONE ir<[[GEP2:%.+]]> = getelementptr inbounds ir<%c>, vp<[[ST]]>
; IF-EVL-NEXT: vp<[[PTR2:%[0-9]+]]> = vector-pointer ir<[[GEP2]]>
; IF-EVL-NEXT: WIDEN ir<[[LD2:%.+]]> = vp.load vp<[[PTR2]]>, vp<[[EVL]]>
-; IF-EVL-NEXT: WIDEN-VP ir<[[ADD:%.+]]> = add nsw ir<[[LD2]]>, ir<[[LD1]]>, vp<[[EVL]]>
+; IF-EVL-NEXT: WIDEN ir<[[ADD:%.+]]> = vp.add nsw ir<[[LD2]]>, ir<[[LD1]]>, vp<[[EVL]]>
; IF-EVL-NEXT: CLONE ir<[[GEP3:%.+]]> = getelementptr inbounds ir<%a>, vp<[[ST]]>
; IF-EVL-NEXT: vp<[[PTR3:%[0-9]+]]> = vector-pointer ir<[[GEP3]]>
; IF-EVL-NEXT: WIDEN vp.store vp<[[PTR3]]>, ir<[[ADD]]>, vp<[[EVL]]>
diff --git a/llvm/test/Transforms/LoopVectorize/pointer-induction.ll b/llvm/test/Transforms/LoopVectorize/pointer-induction.ll
index 35037968160c..41d9c4d84202 100644
--- a/llvm/test/Transforms/LoopVectorize/pointer-induction.ll
+++ b/llvm/test/Transforms/LoopVectorize/pointer-induction.ll
@@ -142,22 +142,22 @@ define void @pointer_induction_used_as_vector(ptr noalias %start.1, ptr noalias
; CHECK: vector.body:
; CHECK-NEXT: [[POINTER_PHI:%.*]] = phi ptr [ [[START_2]], [[VECTOR_PH]] ], [ [[PTR_IND:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], <4 x i64> <i64 0, i64 1, i64 2, i64 3>
+; CHECK-NEXT: [[VECTOR_GEP:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], <4 x i64> <i64 0, i64 1, i64 2, i64 3>
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 8
-; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 0
-; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[START_1]], i64 [[TMP2]]
-; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, <4 x ptr> [[TMP1]], i64 1
-; CHECK-NEXT: [[TMP4:%.*]] = getelementptr ptr, ptr [[NEXT_GEP]], i32 0
-; CHECK-NEXT: store <4 x ptr> [[TMP3]], ptr [[TMP4]], align 8
-; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x ptr> [[TMP1]], i32 0
-; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[TMP5]], i32 0
-; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[TMP6]], align 1
-; CHECK-NEXT: [[TMP7:%.*]] = add <4 x i8> [[WIDE_LOAD]], <i8 1, i8 1, i8 1, i8 1>
-; CHECK-NEXT: store <4 x i8> [[TMP7]], ptr [[TMP6]], align 1
+; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 0
+; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[START_1]], i64 [[TMP1]]
+; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, <4 x ptr> [[VECTOR_GEP]], i64 1
+; CHECK-NEXT: [[TMP3:%.*]] = getelementptr ptr, ptr [[NEXT_GEP]], i32 0
+; CHECK-NEXT: store <4 x ptr> [[TMP2]], ptr [[TMP3]], align 8
+; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x ptr> [[VECTOR_GEP]], i32 0
+; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[TMP4]], i32 0
+; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[TMP5]], align 1
+; CHECK-NEXT: [[TMP6:%.*]] = add <4 x i8> [[WIDE_LOAD]], <i8 1, i8 1, i8 1, i8 1>
+; CHECK-NEXT: store <4 x i8> [[TMP6]], ptr [[TMP5]], align 1
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; CHECK-NEXT: [[PTR_IND]] = getelementptr i8, ptr [[POINTER_PHI]], i64 4
-; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
+; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
@@ -237,13 +237,13 @@ define void @non_constant_vector_expansion(i32 %0, ptr %call) {
; STRIDED-NEXT: [[TMP3:%.*]] = mul i64 [[TMP1]], 4
; STRIDED-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[TMP1]], i64 0
; STRIDED-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i64> [[DOTSPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer
-; STRIDED-NEXT: [[VECTOR_GEP:%.*]] = mul <4 x i64> <i64 0, i64 1, i64 2, i64 3>, [[DOTSPLAT]]
-; STRIDED-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], <4 x i64> [[VECTOR_GEP]]
+; STRIDED-NEXT: [[TMP4:%.*]] = mul <4 x i64> <i64 0, i64 1, i64 2, i64 3>, [[DOTSPLAT]]
+; STRIDED-NEXT: [[VECTOR_GEP:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], <4 x i64> [[TMP4]]
; STRIDED-NEXT: [[OFFSET_IDX:%.*]] = trunc i64 [[INDEX]] to i32
; STRIDED-NEXT: [[TMP5:%.*]] = add i32 [[OFFSET_IDX]], 0
; STRIDED-NEXT: [[TMP6:%.*]] = getelementptr ptr, ptr [[CALL:%.*]], i32 [[TMP5]]
; STRIDED-NEXT: [[TMP7:%.*]] = getelementptr ptr, ptr [[TMP6]], i32 0
-; STRIDED-NEXT: store <4 x ptr> [[TMP4]], ptr [[TMP7]], align 4
+; STRIDED-NEXT: store <4 x ptr> [[VECTOR_GEP]], ptr [[TMP7]], align 4
; STRIDED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; STRIDED-NEXT: [[PTR_IND]] = getelementptr i8, ptr [[POINTER_PHI]], i64 [[TMP3]]
; STRIDED-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100
diff --git a/llvm/test/Transforms/LoopVectorize/scev-predicate-reasoning.ll b/llvm/test/Transforms/LoopVectorize/scev-predicate-reasoning.ll
index e58c99dc4bc5..43eeefb77449 100644
--- a/llvm/test/Transforms/LoopVectorize/scev-predicate-reasoning.ll
+++ b/llvm/test/Transforms/LoopVectorize/scev-predicate-reasoning.ll
@@ -102,14 +102,14 @@ define void @integer_induction_wraps_scev_predicate_known(i32 %x, ptr %call, ptr
; CHECK-NEXT: [[TMP2:%.*]] = mul i64 [[TMP0]], 4
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[TMP0]], i64 0
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i64> [[DOTSPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer
-; CHECK-NEXT: [[VECTOR_GEP:%.*]] = mul <4 x i64> <i64 0, i64 1, i64 2, i64 3>, [[DOTSPLAT]]
-; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], <4 x i64> [[VECTOR_GEP]]
+; CHECK-NEXT: [[TMP3:%.*]] = mul <4 x i64> <i64 0, i64 1, i64 2, i64 3>, [[DOTSPLAT]]
+; CHECK-NEXT: [[VECTOR_GEP:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], <4 x i64> [[TMP3]]
; CHECK-NEXT: [[DOTCAST:%.*]] = trunc i64 [[INDEX]] to i32
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i32 30, [[DOTCAST]]
; CHECK-NEXT: [[TMP4:%.*]] = add i32 [[OFFSET_IDX]], 0
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr ptr, ptr [[CALL]], i32 [[TMP4]]
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr ptr, ptr [[TMP5]], i32 0
-; CHECK-NEXT: store <4 x ptr> [[TMP3]], ptr [[TMP6]], align 4
+; CHECK-NEXT: store <4 x ptr> [[VECTOR_GEP]], ptr [[TMP6]], align 4
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; CHECK-NEXT: [[PTR_IND]] = getelementptr i8, ptr [[POINTER_PHI]], i64 [[TMP2]]
; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], 4294967264
diff --git a/llvm/test/Transforms/SimplifyCFG/UnreachableEliminate.ll b/llvm/test/Transforms/SimplifyCFG/UnreachableEliminate.ll
index c4602e72ecbc..aae1ab032f36 100644
--- a/llvm/test/Transforms/SimplifyCFG/UnreachableEliminate.ll
+++ b/llvm/test/Transforms/SimplifyCFG/UnreachableEliminate.ll
@@ -918,6 +918,280 @@ bb5: ; preds = %bb3, %bb
ret i32 %i7
}
+declare void @side.effect()
+declare i8 @get.i8()
+
+define i8 @udiv_by_zero(i8 %x, i8 %i, i8 %v) {
+; CHECK-LABEL: @udiv_by_zero(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: switch i8 [[I:%.*]], label [[SW_DEFAULT:%.*]] [
+; CHECK-NEXT: i8 9, label [[SW_BB2:%.*]]
+; CHECK-NEXT: i8 2, label [[RETURN:%.*]]
+; CHECK-NEXT: ]
+; CHECK: sw.bb2:
+; CHECK-NEXT: br label [[RETURN]]
+; CHECK: sw.default:
+; CHECK-NEXT: br label [[RETURN]]
+; CHECK: return:
+; CHECK-NEXT: [[Y:%.*]] = phi i8 [ 9, [[SW_BB2]] ], [ [[V:%.*]], [[SW_DEFAULT]] ], [ 2, [[ENTRY:%.*]] ]
+; CHECK-NEXT: [[R:%.*]] = udiv i8 [[X:%.*]], [[Y]]
+; CHECK-NEXT: ret i8 [[R]]
+;
+entry:
+ switch i8 %i, label %sw.default [
+ i8 0, label %sw.bb0
+ i8 2, label %sw.bb1
+ i8 9, label %sw.bb2
+ ]
+
+sw.bb0:
+ br label %return
+
+sw.bb1:
+ br label %return
+sw.bb2:
+ br label %return
+sw.default:
+ br label %return
+
+return:
+ %y = phi i8 [ 0, %sw.bb0 ], [ 2, %sw.bb1 ], [ 9, %sw.bb2 ], [ %v, %sw.default ]
+ %r = udiv i8 %x, %y
+ ret i8 %r
+}
+
+define i8 @urem_by_zero(i8 %x, i8 %i, i8 %v) {
+; CHECK-LABEL: @urem_by_zero(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: switch i8 [[I:%.*]], label [[SW_DEFAULT:%.*]] [
+; CHECK-NEXT: i8 0, label [[RETURN:%.*]]
+; CHECK-NEXT: i8 2, label [[SW_BB1:%.*]]
+; CHECK-NEXT: i8 9, label [[SW_BB2:%.*]]
+; CHECK-NEXT: ]
+; CHECK: sw.bb1:
+; CHECK-NEXT: br label [[RETURN]]
+; CHECK: sw.bb2:
+; CHECK-NEXT: br label [[RETURN]]
+; CHECK: sw.default:
+; CHECK-NEXT: unreachable
+; CHECK: return:
+; CHECK-NEXT: [[Y:%.*]] = phi i8 [ 2, [[SW_BB1]] ], [ 9, [[SW_BB2]] ], [ [[V:%.*]], [[ENTRY:%.*]] ]
+; CHECK-NEXT: [[R:%.*]] = urem i8 [[X:%.*]], [[Y]]
+; CHECK-NEXT: ret i8 [[R]]
+;
+entry:
+ switch i8 %i, label %sw.default [
+ i8 0, label %sw.bb0
+ i8 2, label %sw.bb1
+ i8 9, label %sw.bb2
+ ]
+
+sw.bb0:
+ br label %return
+
+sw.bb1:
+ br label %return
+sw.bb2:
+ br label %return
+sw.default:
+ br label %return
+
+return:
+ %y = phi i8 [ %v, %sw.bb0 ], [ 2, %sw.bb1 ], [ 9, %sw.bb2 ], [ 0, %sw.default ]
+ %r = urem i8 %x, %y
+ ret i8 %r
+}
+
+define i8 @udiv_of_zero_okay(i8 %x, i8 %i, i8 %v) {
+; CHECK-LABEL: @udiv_of_zero_okay(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: switch i8 [[I:%.*]], label [[SW_DEFAULT:%.*]] [
+; CHECK-NEXT: i8 0, label [[RETURN:%.*]]
+; CHECK-NEXT: i8 2, label [[SW_BB1:%.*]]
+; CHECK-NEXT: i8 9, label [[SW_BB2:%.*]]
+; CHECK-NEXT: ]
+; CHECK: sw.bb1:
+; CHECK-NEXT: br label [[RETURN]]
+; CHECK: sw.bb2:
+; CHECK-NEXT: br label [[RETURN]]
+; CHECK: sw.default:
+; CHECK-NEXT: br label [[RETURN]]
+; CHECK: return:
+; CHECK-NEXT: [[Y:%.*]] = phi i8 [ 2, [[SW_BB1]] ], [ 9, [[SW_BB2]] ], [ [[V:%.*]], [[SW_DEFAULT]] ], [ 0, [[ENTRY:%.*]] ]
+; CHECK-NEXT: [[R:%.*]] = udiv i8 [[Y]], [[X:%.*]]
+; CHECK-NEXT: ret i8 [[R]]
+;
+entry:
+ switch i8 %i, label %sw.default [
+ i8 0, label %sw.bb0
+ i8 2, label %sw.bb1
+ i8 9, label %sw.bb2
+ ]
+
+sw.bb0:
+ br label %return
+
+sw.bb1:
+ br label %return
+sw.bb2:
+ br label %return
+sw.default:
+ br label %return
+
+return:
+ %y = phi i8 [ 0, %sw.bb0 ], [ 2, %sw.bb1 ], [ 9, %sw.bb2 ], [ %v, %sw.default ]
+ %r = udiv i8 %y, %x
+ ret i8 %r
+}
+
+define i8 @srem_by_zero(i8 %x, i8 %i) {
+; CHECK-LABEL: @srem_by_zero(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[CMP:%.*]] = icmp ult i8 [[I:%.*]], 9
+; CHECK-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]]
+; CHECK: if.then:
+; CHECK-NEXT: call void @side.effect()
+; CHECK-NEXT: unreachable
+; CHECK: if.else:
+; CHECK-NEXT: [[V:%.*]] = call i8 @get.i8()
+; CHECK-NEXT: [[R:%.*]] = srem i8 [[X:%.*]], [[V]]
+; CHECK-NEXT: ret i8 [[R]]
+;
+entry:
+ %cmp = icmp ult i8 %i, 9
+ br i1 %cmp, label %if.then, label %if.else
+
+if.then:
+ call void @side.effect()
+ br label %if.end
+
+if.else:
+ %v = call i8 @get.i8()
+ br label %if.end
+
+if.end:
+ %y = phi i8 [ 0, %if.then ], [ %v, %if.else ]
+ %r = srem i8 %x, %y
+ ret i8 %r
+}
+
+define i8 @srem_no_overflow_okay(i8 %i) {
+; CHECK-LABEL: @srem_no_overflow_okay(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[CMP:%.*]] = icmp ult i8 [[I:%.*]], 9
+; CHECK-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]]
+; CHECK: if.then:
+; CHECK-NEXT: call void @side.effect()
+; CHECK-NEXT: br label [[IF_END:%.*]]
+; CHECK: if.else:
+; CHECK-NEXT: [[V:%.*]] = call i8 @get.i8()
+; CHECK-NEXT: br label [[IF_END]]
+; CHECK: if.end:
+; CHECK-NEXT: [[Y:%.*]] = phi i8 [ -1, [[IF_THEN]] ], [ [[V]], [[IF_ELSE]] ]
+; CHECK-NEXT: [[R:%.*]] = srem i8 [[Y]], -128
+; CHECK-NEXT: ret i8 [[R]]
+;
+entry:
+ %cmp = icmp ult i8 %i, 9
+ br i1 %cmp, label %if.then, label %if.else
+
+if.then:
+ call void @side.effect()
+ br label %if.end
+
+if.else:
+ %v = call i8 @get.i8()
+ br label %if.end
+
+if.end:
+ %y = phi i8 [ -1, %if.then ], [ %v, %if.else ]
+ %r = srem i8 %y, 128
+ ret i8 %r
+}
+
+define i8 @sdiv_overflow_ub(i8 %i) {
+; CHECK-LABEL: @sdiv_overflow_ub(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: switch i8 [[I:%.*]], label [[SW_DEFAULT:%.*]] [
+; CHECK-NEXT: i8 0, label [[RETURN:%.*]]
+; CHECK-NEXT: i8 2, label [[SW_BB1:%.*]]
+; CHECK-NEXT: i8 9, label [[SW_BB2:%.*]]
+; CHECK-NEXT: ]
+; CHECK: sw.bb1:
+; CHECK-NEXT: [[V:%.*]] = call i8 @get.i8()
+; CHECK-NEXT: br label [[RETURN]]
+; CHECK: sw.bb2:
+; CHECK-NEXT: br label [[RETURN]]
+; CHECK: sw.default:
+; CHECK-NEXT: unreachable
+; CHECK: return:
+; CHECK-NEXT: [[Y:%.*]] = phi i8 [ [[V]], [[SW_BB1]] ], [ -1, [[SW_BB2]] ], [ 4, [[ENTRY:%.*]] ]
+; CHECK-NEXT: [[R:%.*]] = sdiv i8 -128, [[Y]]
+; CHECK-NEXT: ret i8 [[R]]
+;
+entry:
+ switch i8 %i, label %sw.default [
+ i8 0, label %sw.bb0
+ i8 2, label %sw.bb1
+ i8 9, label %sw.bb2
+ ]
+
+sw.bb0:
+ br label %return
+sw.bb1:
+ %v = call i8 @get.i8()
+ br label %return
+sw.bb2:
+ br label %return
+sw.default:
+ unreachable
+
+return:
+ %y = phi i8 [ 4, %sw.bb0 ], [ %v, %sw.bb1 ], [ -1, %sw.bb2 ]
+ %r = sdiv i8 128, %y
+ ret i8 %r
+}
+
+define i8 @sdiv_overflow_ub_2x(i8 %i) {
+; CHECK-LABEL: @sdiv_overflow_ub_2x(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: switch i8 [[I:%.*]], label [[SW_DEFAULT:%.*]] [
+; CHECK-NEXT: i8 9, label [[RETURN:%.*]]
+; CHECK-NEXT: i8 2, label [[SW_BB1:%.*]]
+; CHECK-NEXT: ]
+; CHECK: sw.bb1:
+; CHECK-NEXT: [[V:%.*]] = call i8 @get.i8()
+; CHECK-NEXT: br label [[RETURN]]
+; CHECK: sw.default:
+; CHECK-NEXT: unreachable
+; CHECK: return:
+; CHECK-NEXT: [[Y:%.*]] = phi i8 [ [[V]], [[SW_BB1]] ], [ -1, [[ENTRY:%.*]] ]
+; CHECK-NEXT: [[R:%.*]] = sdiv i8 -128, [[Y]]
+; CHECK-NEXT: ret i8 [[R]]
+;
+entry:
+ switch i8 %i, label %sw.default [
+ i8 0, label %sw.bb0
+ i8 2, label %sw.bb1
+ i8 9, label %sw.bb2
+ ]
+
+sw.bb0:
+ br label %return
+sw.bb1:
+ %v = call i8 @get.i8()
+ br label %return
+sw.bb2:
+ br label %return
+sw.default:
+ unreachable
+
+return:
+ %y = phi i8 [ 0, %sw.bb0 ], [ %v, %sw.bb1 ], [ -1, %sw.bb2 ]
+ %r = sdiv i8 128, %y
+ ret i8 %r
+}
+
attributes #0 = { null_pointer_is_valid }
;.
; CHECK: attributes #[[ATTR0:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: write) }
diff --git a/llvm/test/Transforms/SimplifyCFG/speculate-derefable-load.ll b/llvm/test/Transforms/SimplifyCFG/speculate-derefable-load.ll
new file mode 100644
index 000000000000..9e3f333018e6
--- /dev/null
+++ b/llvm/test/Transforms/SimplifyCFG/speculate-derefable-load.ll
@@ -0,0 +1,198 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -S -passes=simplifycfg < %s | FileCheck %s
+
+define i64 @align_deref_align(i1 %c, ptr %p) {
+; CHECK-LABEL: define i64 @align_deref_align(
+; CHECK-SAME: i1 [[C:%.*]], ptr [[P:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: call void @llvm.assume(i1 true) [ "dereferenceable"(ptr [[P]], i64 8), "align"(ptr [[P]], i64 8) ]
+; CHECK-NEXT: br i1 [[C]], label %[[IF:.*]], label %[[EXIT:.*]]
+; CHECK: [[IF]]:
+; CHECK-NEXT: [[V:%.*]] = load i64, ptr [[P]], align 8
+; CHECK-NEXT: br label %[[EXIT]]
+; CHECK: [[EXIT]]:
+; CHECK-NEXT: [[RES:%.*]] = phi i64 [ [[V]], %[[IF]] ], [ 0, %[[ENTRY]] ]
+; CHECK-NEXT: ret i64 [[RES]]
+;
+entry:
+ call void @llvm.assume(i1 true) [ "dereferenceable"(ptr %p, i64 8), "align"(ptr %p, i64 8) ]
+ br i1 %c, label %if, label %exit
+
+if:
+ %v = load i64, ptr %p, align 8
+ br label %exit
+
+exit:
+ %res = phi i64 [ %v, %if ], [ 0, %entry ]
+ ret i64 %res
+}
+
+define i64 @assume_deref_align2(i1 %c1, i32 %x, ptr %p) {
+; CHECK-LABEL: define i64 @assume_deref_align2(
+; CHECK-SAME: i1 [[C1:%.*]], i32 [[X:%.*]], ptr [[P:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: call void @llvm.assume(i1 true) [ "dereferenceable"(ptr [[P]], i64 8), "align"(ptr [[P]], i64 8) ]
+; CHECK-NEXT: br i1 [[C1]], label %[[IF1:.*]], label %[[EXIT:.*]]
+; CHECK: [[IF1]]:
+; CHECK-NEXT: [[C2:%.*]] = icmp ugt i32 [[X]], 10
+; CHECK-NEXT: br i1 [[C2]], label %[[IF2:.*]], label %[[EXIT]]
+; CHECK: [[IF2]]:
+; CHECK-NEXT: [[V:%.*]] = load i64, ptr [[P]], align 8
+; CHECK-NEXT: br label %[[EXIT]]
+; CHECK: [[EXIT]]:
+; CHECK-NEXT: [[RES:%.*]] = phi i64 [ [[V]], %[[IF2]] ], [ 1, %[[IF1]] ], [ 0, %[[ENTRY]] ]
+; CHECK-NEXT: ret i64 [[RES]]
+;
+entry:
+ call void @llvm.assume(i1 true) [ "dereferenceable"(ptr %p, i64 8), "align"(ptr %p, i64 8) ]
+ br i1 %c1, label %if1, label %exit
+
+if1:
+ %c2 = icmp ugt i32 %x, 10
+ br i1 %c2, label %if2, label %exit
+
+if2:
+ %v = load i64, ptr %p, align 8
+ br label %exit
+
+exit:
+ %res = phi i64 [ %v, %if2 ], [ 1, %if1 ], [ 0, %entry ]
+ ret i64 %res
+}
+
+define i64 @assume_deref_align_not_dominating(i1 %c, ptr %p) {
+; CHECK-LABEL: define i64 @assume_deref_align_not_dominating(
+; CHECK-SAME: i1 [[C:%.*]], ptr [[P:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: br i1 [[C]], label %[[IF:.*]], label %[[EXIT:.*]]
+; CHECK: [[IF]]:
+; CHECK-NEXT: [[V:%.*]] = load i64, ptr [[P]], align 8
+; CHECK-NEXT: br label %[[EXIT]]
+; CHECK: [[EXIT]]:
+; CHECK-NEXT: [[RES:%.*]] = phi i64 [ [[V]], %[[IF]] ], [ 0, %[[ENTRY]] ]
+; CHECK-NEXT: call void @llvm.assume(i1 true) [ "dereferenceable"(ptr [[P]], i64 8), "align"(ptr [[P]], i64 8) ]
+; CHECK-NEXT: ret i64 [[RES]]
+;
+entry:
+ br i1 %c, label %if, label %exit
+
+if:
+ %v = load i64, ptr %p, align 8
+ br label %exit
+
+exit:
+ %res = phi i64 [ %v, %if ], [ 0, %entry ]
+ call void @llvm.assume(i1 true) [ "dereferenceable"(ptr %p, i64 8), "align"(ptr %p, i64 8) ]
+ ret i64 %res
+}
+
+; FIXME: This is a miscompile.
+define i64 @deref_no_hoist(i1 %c, ptr align 8 dereferenceable(8) %p1) {
+; CHECK-LABEL: define i64 @deref_no_hoist(
+; CHECK-SAME: i1 [[C:%.*]], ptr align 8 dereferenceable(8) [[P1:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[P2:%.*]] = load ptr, ptr [[P1]], align 8, !align [[META0:![0-9]+]]
+; CHECK-NEXT: [[V:%.*]] = load i64, ptr [[P2]], align 8
+; CHECK-NEXT: [[RES:%.*]] = select i1 [[C]], i64 [[V]], i64 0
+; CHECK-NEXT: ret i64 [[RES]]
+;
+entry:
+ br i1 %c, label %if, label %exit
+
+if:
+ %p2 = load ptr, ptr %p1, align 8, !dereferenceable !0, !align !0
+ %v = load i64, ptr %p2, align 8
+ br label %exit
+
+exit:
+ %res = phi i64 [ %v, %if ], [ 0, %entry ]
+ ret i64 %res
+}
+
+define i64 @deref_hoist(i1 %c, ptr align 8 dereferenceable(8) %p1) {
+; CHECK-LABEL: define i64 @deref_hoist(
+; CHECK-SAME: i1 [[C:%.*]], ptr align 8 dereferenceable(8) [[P1:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[P2:%.*]] = load ptr, ptr [[P1]], align 8, !dereferenceable [[META0]], !align [[META0]]
+; CHECK-NEXT: [[V:%.*]] = load i64, ptr [[P2]], align 8
+; CHECK-NEXT: [[RES:%.*]] = select i1 [[C]], i64 [[V]], i64 0
+; CHECK-NEXT: ret i64 [[RES]]
+;
+entry:
+ %p2 = load ptr, ptr %p1, align 8, !dereferenceable !0, !align !0
+ br i1 %c, label %if, label %exit
+
+if:
+ %v = load i64, ptr %p2, align 8
+ br label %exit
+
+exit:
+ %res = phi i64 [ %v, %if ], [ 0, %entry ]
+ ret i64 %res
+}
+
+define i64 @deref_no_hoist2(i1 %c1, i32 %x, ptr align 8 dereferenceable(8) %p1) {
+; CHECK-LABEL: define i64 @deref_no_hoist2(
+; CHECK-SAME: i1 [[C1:%.*]], i32 [[X:%.*]], ptr align 8 dereferenceable(8) [[P1:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: br i1 [[C1]], label %[[IF1:.*]], label %[[EXIT:.*]]
+; CHECK: [[IF1]]:
+; CHECK-NEXT: [[C2:%.*]] = icmp ugt i32 [[X]], 10
+; CHECK-NEXT: br i1 [[C2]], label %[[IF2:.*]], label %[[EXIT]]
+; CHECK: [[IF2]]:
+; CHECK-NEXT: [[P2:%.*]] = load ptr, ptr [[P1]], align 8, !dereferenceable [[META0]], !align [[META0]]
+; CHECK-NEXT: [[V:%.*]] = load i64, ptr [[P2]], align 8
+; CHECK-NEXT: br label %[[EXIT]]
+; CHECK: [[EXIT]]:
+; CHECK-NEXT: [[RES:%.*]] = phi i64 [ [[V]], %[[IF2]] ], [ 1, %[[IF1]] ], [ 0, %[[ENTRY]] ]
+; CHECK-NEXT: ret i64 [[RES]]
+;
+entry:
+ br i1 %c1, label %if1, label %exit
+
+if1:
+ %c2 = icmp ugt i32 %x, 10
+ br i1 %c2, label %if2, label %exit
+
+if2:
+ %p2 = load ptr, ptr %p1, align 8, !dereferenceable !0, !align !0
+ %v = load i64, ptr %p2, align 8
+ br label %exit
+
+exit:
+ %res = phi i64 [ %v, %if2 ], [ 1, %if1 ], [ 0, %entry ]
+ ret i64 %res
+}
+
+define i64 @deref_hoist2(i1 %c1, i32 %x, ptr align 8 dereferenceable(8) %p1) {
+; CHECK-LABEL: define i64 @deref_hoist2(
+; CHECK-SAME: i1 [[C1:%.*]], i32 [[X:%.*]], ptr align 8 dereferenceable(8) [[P1:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[P2:%.*]] = load ptr, ptr [[P1]], align 8, !dereferenceable [[META0]], !align [[META0]]
+; CHECK-NEXT: [[C2:%.*]] = icmp ugt i32 [[X]], 10
+; CHECK-NEXT: [[V:%.*]] = load i64, ptr [[P2]], align 8
+; CHECK-NEXT: [[SPEC_SELECT:%.*]] = select i1 [[C2]], i64 [[V]], i64 1
+; CHECK-NEXT: [[RES:%.*]] = select i1 [[C1]], i64 [[SPEC_SELECT]], i64 0
+; CHECK-NEXT: ret i64 [[RES]]
+;
+entry:
+ %p2 = load ptr, ptr %p1, align 8, !dereferenceable !0, !align !0
+ br i1 %c1, label %if1, label %exit
+
+if1:
+ %c2 = icmp ugt i32 %x, 10
+ br i1 %c2, label %if2, label %exit
+
+if2:
+ %v = load i64, ptr %p2, align 8
+ br label %exit
+
+exit:
+ %res = phi i64 [ %v, %if2 ], [ 1, %if1 ], [ 0, %entry ]
+ ret i64 %res
+}
+
+!0 = !{i64 8}
+;.
+; CHECK: [[META0]] = !{i64 8}
+;.
diff --git a/llvm/test/tools/llvm-mca/AArch64/Neoverse/N2-writeback.s b/llvm/test/tools/llvm-mca/AArch64/Neoverse/N2-writeback.s
index 0c6ccc1face9..5ffaf9138d48 100644
--- a/llvm/test/tools/llvm-mca/AArch64/Neoverse/N2-writeback.s
+++ b/llvm/test/tools/llvm-mca/AArch64/Neoverse/N2-writeback.s
@@ -3298,28 +3298,28 @@ add x0, x27, 1
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 1000
-# CHECK-NEXT: Total Cycles: 3004
+# CHECK-NEXT: Total Cycles: 508
# CHECK-NEXT: Total uOps: 2000
# CHECK: Dispatch Width: 10
-# CHECK-NEXT: uOps Per Cycle: 0.67
-# CHECK-NEXT: IPC: 0.33
+# CHECK-NEXT: uOps Per Cycle: 3.94
+# CHECK-NEXT: IPC: 1.97
# CHECK-NEXT: Block RThroughput: 3.8
# CHECK: Timeline view:
-# CHECK-NEXT: 0123456789 0123
-# CHECK-NEXT: Index 0123456789 0123456789
+# CHECK-NEXT: 012
+# CHECK-NEXT: Index 0123456789
-# CHECK: [0,0] DeeeeeeER . . . . . . ldr b1, [x27], #254
-# CHECK-NEXT: [0,1] D======eER. . . . . . add x0, x27, #1
-# CHECK-NEXT: [0,2] D======eeeeeeER. . . . . ldr h1, [x27], #254
-# CHECK-NEXT: [0,3] D============eER . . . . add x0, x27, #1
-# CHECK-NEXT: [0,4] .D===========eeeeeeER . . . ldr s1, [x27], #254
-# CHECK-NEXT: [0,5] .D=================eER . . . add x0, x27, #1
-# CHECK-NEXT: [0,6] .D=================eeeeeeER . . ldr d1, [x27], #254
-# CHECK-NEXT: [0,7] .D=======================eER . . add x0, x27, #1
-# CHECK-NEXT: [0,8] . D======================eeeeeeER. ldr q1, [x27], #254
-# CHECK-NEXT: [0,9] . D============================eER add x0, x27, #1
+# CHECK: [0,0] DeeeeeeER . . ldr b1, [x27], #254
+# CHECK-NEXT: [0,1] D=eE----R . . add x0, x27, #1
+# CHECK-NEXT: [0,2] D=eeeeeeER. . ldr h1, [x27], #254
+# CHECK-NEXT: [0,3] D==eE----R. . add x0, x27, #1
+# CHECK-NEXT: [0,4] .D=eeeeeeER . ldr s1, [x27], #254
+# CHECK-NEXT: [0,5] .D==eE----R . add x0, x27, #1
+# CHECK-NEXT: [0,6] .D==eeeeeeER. ldr d1, [x27], #254
+# CHECK-NEXT: [0,7] .D===eE----R. add x0, x27, #1
+# CHECK-NEXT: [0,8] . D==eeeeeeER ldr q1, [x27], #254
+# CHECK-NEXT: [0,9] . D===eE----R add x0, x27, #1
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
@@ -3329,16 +3329,16 @@ add x0, x27, 1
# CHECK: [0] [1] [2] [3]
# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ldr b1, [x27], #254
-# CHECK-NEXT: 1. 1 7.0 0.0 0.0 add x0, x27, #1
-# CHECK-NEXT: 2. 1 7.0 0.0 0.0 ldr h1, [x27], #254
-# CHECK-NEXT: 3. 1 13.0 0.0 0.0 add x0, x27, #1
-# CHECK-NEXT: 4. 1 12.0 0.0 0.0 ldr s1, [x27], #254
-# CHECK-NEXT: 5. 1 18.0 0.0 0.0 add x0, x27, #1
-# CHECK-NEXT: 6. 1 18.0 0.0 0.0 ldr d1, [x27], #254
-# CHECK-NEXT: 7. 1 24.0 0.0 0.0 add x0, x27, #1
-# CHECK-NEXT: 8. 1 23.0 0.0 0.0 ldr q1, [x27], #254
-# CHECK-NEXT: 9. 1 29.0 0.0 0.0 add x0, x27, #1
-# CHECK-NEXT: 1 15.2 0.1 0.0 <total>
+# CHECK-NEXT: 1. 1 2.0 0.0 4.0 add x0, x27, #1
+# CHECK-NEXT: 2. 1 2.0 0.0 0.0 ldr h1, [x27], #254
+# CHECK-NEXT: 3. 1 3.0 0.0 4.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 2.0 0.0 0.0 ldr s1, [x27], #254
+# CHECK-NEXT: 5. 1 3.0 0.0 4.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 3.0 0.0 0.0 ldr d1, [x27], #254
+# CHECK-NEXT: 7. 1 4.0 0.0 4.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 3.0 0.0 0.0 ldr q1, [x27], #254
+# CHECK-NEXT: 9. 1 4.0 0.0 4.0 add x0, x27, #1
+# CHECK-NEXT: 1 2.7 0.1 2.0 <total>
# CHECK: [47] Code Region - G48
diff --git a/llvm/test/tools/llvm-tli-checker/ps4-tli-check.yaml b/llvm/test/tools/llvm-tli-checker/ps4-tli-check.yaml
index 47aeb0ad8fde..26efb2bc97cd 100644
--- a/llvm/test/tools/llvm-tli-checker/ps4-tli-check.yaml
+++ b/llvm/test/tools/llvm-tli-checker/ps4-tli-check.yaml
@@ -34,7 +34,7 @@
#
# CHECK: << Total TLI yes SDK no: 18
# CHECK: >> Total TLI no SDK yes: 0
-# CHECK: == Total TLI yes SDK yes: 250
+# CHECK: == Total TLI yes SDK yes: 253
#
# WRONG_DETAIL: << TLI yes SDK no : '_ZdaPv' aka operator delete[](void*)
# WRONG_DETAIL: >> TLI no SDK yes: '_ZdaPvj' aka operator delete[](void*, unsigned int)
@@ -48,14 +48,14 @@
# WRONG_DETAIL: << TLI yes SDK no : 'fminimum_numl'
# WRONG_SUMMARY: << Total TLI yes SDK no: 19{{$}}
# WRONG_SUMMARY: >> Total TLI no SDK yes: 1{{$}}
-# WRONG_SUMMARY: == Total TLI yes SDK yes: 249
+# WRONG_SUMMARY: == Total TLI yes SDK yes: 252
#
## The -COUNT suffix doesn't care if there are too many matches, so check
## the exact count first; the two directives should add up to that.
## Yes, this means additions to TLI will fail this test, but the argument
## to -COUNT can't be an expression.
-# AVAIL: TLI knows 501 symbols, 268 available
-# AVAIL-COUNT-268: {{^}} available
+# AVAIL: TLI knows 504 symbols, 271 available
+# AVAIL-COUNT-271: {{^}} available
# AVAIL-NOT: {{^}} available
# UNAVAIL-COUNT-233: not available
# UNAVAIL-NOT: not available
@@ -862,6 +862,18 @@ DynamicSymbols:
Type: STT_FUNC
Section: .text
Binding: STB_GLOBAL
+ - Name: sincos
+ Type: STT_FUNC
+ Section: .text
+ Binding: STB_GLOBAL
+ - Name: sincosf
+ Type: STT_FUNC
+ Section: .text
+ Binding: STB_GLOBAL
+ - Name: sincosl
+ Type: STT_FUNC
+ Section: .text
+ Binding: STB_GLOBAL
- Name: sinh
Type: STT_FUNC
Section: .text
diff --git a/llvm/unittests/Analysis/TargetLibraryInfoTest.cpp b/llvm/unittests/Analysis/TargetLibraryInfoTest.cpp
index c081c44ed35d..ac8ccc03399e 100644
--- a/llvm/unittests/Analysis/TargetLibraryInfoTest.cpp
+++ b/llvm/unittests/Analysis/TargetLibraryInfoTest.cpp
@@ -339,6 +339,9 @@ TEST_F(TargetLibraryInfoTest, ValidProto) {
"declare float @sinhf(float)\n"
"declare x86_fp80 @sinhl(x86_fp80)\n"
"declare x86_fp80 @sinl(x86_fp80)\n"
+ "declare void @sincos(double, ptr, ptr)\n"
+ "declare void @sincosf(float, ptr, ptr)\n"
+ "declare void @sincosl(x86_fp80, ptr, ptr)\n"
"declare i32 @snprintf(i8*, i64, i8*, ...)\n"
"declare i32 @sprintf(i8*, i8*, ...)\n"
"declare double @sqrt(double)\n"
diff --git a/llvm/unittests/SandboxIR/PassTest.cpp b/llvm/unittests/SandboxIR/PassTest.cpp
index ed226d576558..2eaf369caf08 100644
--- a/llvm/unittests/SandboxIR/PassTest.cpp
+++ b/llvm/unittests/SandboxIR/PassTest.cpp
@@ -179,7 +179,7 @@ TEST_F(PassTest, ParsePassPipeline) {
Registry.registerPass(std::make_unique<TestPass1>());
Registry.registerPass(std::make_unique<TestPass2>());
- auto &FPM =
+ [[maybe_unused]] auto &FPM =
Registry.parseAndCreatePassPipeline("test-pass1,test-pass2,test-pass1");
#ifndef NDEBUG
std::string Buff;
diff --git a/llvm/unittests/SandboxIR/SandboxIRTest.cpp b/llvm/unittests/SandboxIR/SandboxIRTest.cpp
index 5ded063ef6f7..8807716a5273 100644
--- a/llvm/unittests/SandboxIR/SandboxIRTest.cpp
+++ b/llvm/unittests/SandboxIR/SandboxIRTest.cpp
@@ -1051,6 +1051,72 @@ define void @foo() {
EXPECT_EQ(GV0->getCodeModel(), LLVMGV0->getCodeModel());
}
+TEST_F(SandboxIRTest, GlobalAlias) {
+ parseIR(C, R"IR(
+@alias0 = dso_local alias void(), ptr @foo
+@alias1 = dso_local alias void(), ptr @foo
+declare void @bar();
+define void @foo() {
+ call void @alias0()
+ call void @alias1()
+ call void @bar()
+ ret void
+}
+)IR");
+ Function &LLVMF = *M->getFunction("foo");
+ auto *LLVMBB = &*LLVMF.begin();
+ auto LLVMIt = LLVMBB->begin();
+ auto *LLVMCall0 = cast<llvm::CallInst>(&*LLVMIt++);
+ auto *LLVMAlias0 = cast<llvm::GlobalAlias>(LLVMCall0->getCalledOperand());
+ sandboxir::Context Ctx(C);
+
+ auto &F = *Ctx.createFunction(&LLVMF);
+ auto *BB = &*F.begin();
+ auto It = BB->begin();
+ auto *Call0 = cast<sandboxir::CallInst>(&*It++);
+ auto *Call1 = cast<sandboxir::CallInst>(&*It++);
+ auto *CallBar = cast<sandboxir::CallInst>(&*It++);
+ auto *CalleeBar = cast<sandboxir::Constant>(CallBar->getCalledOperand());
+ // Check classof(), creation.
+ auto *Alias0 = cast<sandboxir::GlobalAlias>(Call0->getCalledOperand());
+ auto *Alias1 = cast<sandboxir::GlobalAlias>(Call1->getCalledOperand());
+ // Check getIterator().
+ {
+ auto It0 = Alias0->getIterator();
+ auto It1 = Alias1->getIterator();
+ EXPECT_EQ(&*It0, Alias0);
+ EXPECT_EQ(&*It1, Alias1);
+ EXPECT_EQ(std::next(It0), It1);
+ EXPECT_EQ(std::prev(It1), It0);
+ EXPECT_EQ(&*std::next(It0), Alias1);
+ EXPECT_EQ(&*std::prev(It1), Alias0);
+ }
+ // Check getReverseIterator().
+ {
+ auto RevIt0 = Alias0->getReverseIterator();
+ auto RevIt1 = Alias1->getReverseIterator();
+ EXPECT_EQ(&*RevIt0, Alias0);
+ EXPECT_EQ(&*RevIt1, Alias1);
+ EXPECT_EQ(std::prev(RevIt0), RevIt1);
+ EXPECT_EQ(std::next(RevIt1), RevIt0);
+ EXPECT_EQ(&*std::prev(RevIt0), Alias1);
+ EXPECT_EQ(&*std::next(RevIt1), Alias0);
+ }
+ // Check getAliasee().
+ EXPECT_EQ(Alias0->getAliasee(), Ctx.getValue(LLVMAlias0->getAliasee()));
+ // Check setAliasee().
+ auto *OrigAliasee = Alias0->getAliasee();
+ auto *NewAliasee = CalleeBar;
+ EXPECT_NE(NewAliasee, OrigAliasee);
+ Alias0->setAliasee(NewAliasee);
+ EXPECT_EQ(Alias0->getAliasee(), NewAliasee);
+ Alias0->setAliasee(OrigAliasee);
+ EXPECT_EQ(Alias0->getAliasee(), OrigAliasee);
+ // Check getAliaseeObject().
+ EXPECT_EQ(Alias0->getAliaseeObject(),
+ Ctx.getValue(LLVMAlias0->getAliaseeObject()));
+}
+
TEST_F(SandboxIRTest, BlockAddress) {
parseIR(C, R"IR(
define void @foo(ptr %ptr) {
@@ -1509,12 +1575,16 @@ bb1:
for (sandboxir::Instruction &I : BB0) {
EXPECT_EQ(&I, Ctx.getValue(LLVMI));
LLVMI = LLVMI->getNextNode();
+ // Check getNodeParent().
+ EXPECT_EQ(I.getIterator().getNodeParent(), &BB0);
}
LLVMI = &*LLVMBB1->begin();
for (sandboxir::Instruction &I : BB1) {
EXPECT_EQ(&I, Ctx.getValue(LLVMI));
LLVMI = LLVMI->getNextNode();
}
+ // Check NodeParent() for BB::end().
+ EXPECT_EQ(BB0.end().getNodeParent(), &BB0);
// Check BB.getTerminator()
EXPECT_EQ(BB0.getTerminator(), Ctx.getValue(LLVMBB0->getTerminator()));
diff --git a/llvm/unittests/SandboxIR/TrackerTest.cpp b/llvm/unittests/SandboxIR/TrackerTest.cpp
index f46e16e626ba..da5416395ec4 100644
--- a/llvm/unittests/SandboxIR/TrackerTest.cpp
+++ b/llvm/unittests/SandboxIR/TrackerTest.cpp
@@ -1638,6 +1638,37 @@ define void @foo() {
EXPECT_EQ(GV0->isExternallyInitialized(), OrigIsExtInit);
}
+TEST_F(TrackerTest, GlobalAliasSetters) {
+ parseIR(C, R"IR(
+@alias = dso_local alias void(), ptr @foo
+declare void @bar();
+define void @foo() {
+ call void @alias()
+ call void @bar()
+ ret void
+}
+)IR");
+ Function &LLVMF = *M->getFunction("foo");
+ sandboxir::Context Ctx(C);
+
+ auto &F = *Ctx.createFunction(&LLVMF);
+ auto *BB = &*F.begin();
+ auto It = BB->begin();
+ auto *Call0 = cast<sandboxir::CallInst>(&*It++);
+ auto *Call1 = cast<sandboxir::CallInst>(&*It++);
+ auto *Callee1 = cast<sandboxir::Constant>(Call1->getCalledOperand());
+ auto *Alias = cast<sandboxir::GlobalAlias>(Call0->getCalledOperand());
+ // Check setAliasee().
+ auto *OrigAliasee = Alias->getAliasee();
+ auto *NewAliasee = Callee1;
+ EXPECT_NE(NewAliasee, OrigAliasee);
+ Ctx.save();
+ Alias->setAliasee(NewAliasee);
+ EXPECT_EQ(Alias->getAliasee(), NewAliasee);
+ Ctx.revert();
+ EXPECT_EQ(Alias->getAliasee(), OrigAliasee);
+}
+
TEST_F(TrackerTest, SetVolatile) {
parseIR(C, R"IR(
define void @foo(ptr %arg0, i8 %val) {
diff --git a/llvm/unittests/Support/raw_ostream_test.cpp b/llvm/unittests/Support/raw_ostream_test.cpp
index 1c6dfb6260cc..99aa350adad7 100644
--- a/llvm/unittests/Support/raw_ostream_test.cpp
+++ b/llvm/unittests/Support/raw_ostream_test.cpp
@@ -177,6 +177,19 @@ TEST(raw_ostreamTest, Justify) {
EXPECT_EQ("none", printToString(center_justify("none", 1), 1));
}
+TEST(raw_ostreamTest, Indent) {
+ indent Indent(4);
+ auto Spaces = [](int N) { return std::string(N, ' '); };
+ EXPECT_EQ(Spaces(4), printToString(Indent));
+ EXPECT_EQ("", printToString(indent(0)));
+ EXPECT_EQ(Spaces(5), printToString(Indent + 1));
+ EXPECT_EQ(Spaces(3), printToString(Indent - 1));
+ Indent += 1;
+ EXPECT_EQ(Spaces(5), printToString(Indent));
+ Indent -= 1;
+ EXPECT_EQ(Spaces(4), printToString(Indent));
+}
+
TEST(raw_ostreamTest, FormatHex) {
EXPECT_EQ("0x1234", printToString(format_hex(0x1234, 6), 6));
EXPECT_EQ("0x001234", printToString(format_hex(0x1234, 8), 8));
diff --git a/llvm/unittests/TargetParser/Host.cpp b/llvm/unittests/TargetParser/Host.cpp
index f8dd1d3a60a0..5e2edcef09bf 100644
--- a/llvm/unittests/TargetParser/Host.cpp
+++ b/llvm/unittests/TargetParser/Host.cpp
@@ -83,8 +83,20 @@ TEST(getLinuxHostCPUName, AArch64) {
"CPU part : 0xd40"),
"neoverse-v1");
EXPECT_EQ(sys::detail::getHostCPUNameForARM("CPU implementer : 0x41\n"
+ "CPU part : 0xd4f"),
+ "neoverse-v2");
+ EXPECT_EQ(sys::detail::getHostCPUNameForARM("CPU implementer : 0x41\n"
+ "CPU part : 0xd84"),
+ "neoverse-v3");
+ EXPECT_EQ(sys::detail::getHostCPUNameForARM("CPU implementer : 0x41\n"
"CPU part : 0xd0c"),
"neoverse-n1");
+ EXPECT_EQ(sys::detail::getHostCPUNameForARM("CPU implementer : 0x41\n"
+ "CPU part : 0xd49"),
+ "neoverse-n2");
+ EXPECT_EQ(sys::detail::getHostCPUNameForARM("CPU implementer : 0x41\n"
+ "CPU part : 0xd8e"),
+ "neoverse-n3");
// Verify that both CPU implementer and CPU part are checked:
EXPECT_EQ(sys::detail::getHostCPUNameForARM("CPU implementer : 0x40\n"
"CPU part : 0xd03"),
diff --git a/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/CMakeLists.txt b/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/CMakeLists.txt
index 488c9c2344b5..10a730290608 100644
--- a/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/CMakeLists.txt
+++ b/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/CMakeLists.txt
@@ -9,4 +9,6 @@ set(LLVM_LINK_COMPONENTS
add_llvm_unittest(SandboxVectorizerTests
DependencyGraphTest.cpp
+ LegalityTest.cpp
+ RegionTest.cpp
)
diff --git a/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/LegalityTest.cpp b/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/LegalityTest.cpp
new file mode 100644
index 000000000000..a136be41ae36
--- /dev/null
+++ b/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/LegalityTest.cpp
@@ -0,0 +1,56 @@
+//===- LegalityTest.cpp ---------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Vectorize/SandboxVectorizer/Legality.h"
+#include "llvm/AsmParser/Parser.h"
+#include "llvm/SandboxIR/SandboxIR.h"
+#include "llvm/Support/SourceMgr.h"
+#include "gtest/gtest.h"
+
+using namespace llvm;
+
+struct LegalityTest : public testing::Test {
+ LLVMContext C;
+ std::unique_ptr<Module> M;
+
+ void parseIR(LLVMContext &C, const char *IR) {
+ SMDiagnostic Err;
+ M = parseAssemblyString(IR, Err, C);
+ if (!M)
+ Err.print("LegalityTest", errs());
+ }
+};
+
+TEST_F(LegalityTest, Legality) {
+ parseIR(C, R"IR(
+define void @foo(ptr %ptr) {
+ %gep0 = getelementptr float, ptr %ptr, i32 0
+ %gep1 = getelementptr float, ptr %ptr, i32 1
+ %ld0 = load float, ptr %gep0
+ %ld1 = load float, ptr %gep0
+ store float %ld0, ptr %gep0
+ store float %ld1, ptr %gep1
+ ret void
+}
+)IR");
+ llvm::Function *LLVMF = &*M->getFunction("foo");
+ sandboxir::Context Ctx(C);
+ auto *F = Ctx.createFunction(LLVMF);
+ auto *BB = &*F->begin();
+ auto It = BB->begin();
+ [[maybe_unused]] auto *Gep0 = cast<sandboxir::GetElementPtrInst>(&*It++);
+ [[maybe_unused]] auto *Gep1 = cast<sandboxir::GetElementPtrInst>(&*It++);
+ [[maybe_unused]] auto *Ld0 = cast<sandboxir::LoadInst>(&*It++);
+ [[maybe_unused]] auto *Ld1 = cast<sandboxir::LoadInst>(&*It++);
+ auto *St0 = cast<sandboxir::StoreInst>(&*It++);
+ auto *St1 = cast<sandboxir::StoreInst>(&*It++);
+
+ sandboxir::LegalityAnalysis Legality;
+ auto Result = Legality.canVectorize({St0, St1});
+ EXPECT_TRUE(isa<sandboxir::Widen>(Result));
+}
diff --git a/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/RegionTest.cpp b/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/RegionTest.cpp
new file mode 100644
index 000000000000..2c7390c515f1
--- /dev/null
+++ b/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/RegionTest.cpp
@@ -0,0 +1,81 @@
+//===- RegionTest.cpp -----------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Vectorize/SandboxVectorizer/Region.h"
+#include "llvm/AsmParser/Parser.h"
+#include "llvm/SandboxIR/SandboxIR.h"
+#include "llvm/Support/SourceMgr.h"
+#include "gmock/gmock-matchers.h"
+#include "gtest/gtest.h"
+
+using namespace llvm;
+
+struct RegionTest : public testing::Test {
+ LLVMContext C;
+ std::unique_ptr<Module> M;
+
+ void parseIR(LLVMContext &C, const char *IR) {
+ SMDiagnostic Err;
+ M = parseAssemblyString(IR, Err, C);
+ if (!M)
+ Err.print("RegionTest", errs());
+ }
+};
+
+TEST_F(RegionTest, Basic) {
+ parseIR(C, R"IR(
+define i8 @foo(i8 %v0, i8 %v1) {
+ %t0 = add i8 %v0, 1
+ %t1 = add i8 %t0, %v1
+ ret i8 %t1
+}
+)IR");
+ llvm::Function *LLVMF = &*M->getFunction("foo");
+ sandboxir::Context Ctx(C);
+ auto *F = Ctx.createFunction(LLVMF);
+ auto *BB = &*F->begin();
+ auto It = BB->begin();
+ auto *T0 = cast<sandboxir::Instruction>(&*It++);
+ auto *T1 = cast<sandboxir::Instruction>(&*It++);
+ auto *Ret = cast<sandboxir::Instruction>(&*It++);
+ sandboxir::Region Rgn(Ctx);
+
+ // Check getters
+ EXPECT_EQ(&Ctx, &Rgn.getContext());
+ EXPECT_EQ(0U, Rgn.getID());
+
+ // Check add / remove / empty.
+ EXPECT_TRUE(Rgn.empty());
+ Rgn.add(T0);
+ EXPECT_FALSE(Rgn.empty());
+ Rgn.remove(T0);
+ EXPECT_TRUE(Rgn.empty());
+
+ // Check iteration.
+ Rgn.add(T0);
+ Rgn.add(T1);
+ Rgn.add(Ret);
+ // Use an ordered matcher because we're supposed to preserve the insertion
+ // order for determinism.
+ EXPECT_THAT(Rgn.insts(), testing::ElementsAre(T0, T1, Ret));
+
+ // Check contains
+ EXPECT_TRUE(Rgn.contains(T0));
+ Rgn.remove(T0);
+ EXPECT_FALSE(Rgn.contains(T0));
+
+#ifndef NDEBUG
+ // Check equality comparison. Insert in reverse order into `Other` to check
+ // that comparison is order-independent.
+ sandboxir::Region Other(Ctx);
+ Other.add(Ret);
+ EXPECT_NE(Rgn, Other);
+ Other.add(T1);
+ EXPECT_EQ(Rgn, Other);
+#endif
+}
diff --git a/llvm/unittests/Transforms/Vectorize/VPlanHCFGTest.cpp b/llvm/unittests/Transforms/Vectorize/VPlanHCFGTest.cpp
index 6dd6d860273c..4926afbfc6d8 100644
--- a/llvm/unittests/Transforms/Vectorize/VPlanHCFGTest.cpp
+++ b/llvm/unittests/Transforms/Vectorize/VPlanHCFGTest.cpp
@@ -101,13 +101,12 @@ TEST_F(VPlanHCFGTest, testBuildHCFGInnerLoop) {
raw_string_ostream OS(FullDump);
Plan->printDOT(OS);
const char *ExpectedStr = R"(digraph VPlan {
-graph [labelloc=t, fontsize=30; label="Vectorization Plan\n for UF\>=1\nLive-in vp\<%0\> = vector-trip-count\nvp\<%1\> = original trip-count\n"]
+graph [labelloc=t, fontsize=30; label="Vectorization Plan\n for UF\>=1\nLive-in vp\<%0\> = vector-trip-count\nLive-in ir\<%N\> = original trip-count\n"]
node [shape=rect, fontname=Courier, fontsize=30]
edge [fontname=Courier, fontsize=30]
compound=true
N0 [label =
"ir-bb\<entry\>:\l" +
- " EMIT vp\<%1\> = EXPAND SCEV (-1 + %N)\l" +
"No successors\l"
]
N1 [label =
@@ -134,8 +133,8 @@ compound=true
N2 -> N4 [ label="" ltail=cluster_N3]
N4 [label =
"middle.block:\l" +
- " EMIT vp\<%2\> = icmp eq vp\<%1\>, vp\<%0\>\l" +
- " EMIT branch-on-cond vp\<%2\>\l" +
+ " EMIT vp\<%1\> = icmp eq ir\<%N\>, vp\<%0\>\l" +
+ " EMIT branch-on-cond vp\<%1\>\l" +
"Successor(s): ir-bb\<for.end\>, scalar.ph\l"
]
N4 -> N5 [ label="T"]
diff --git a/llvm/unittests/Transforms/Vectorize/VPlanTestBase.h b/llvm/unittests/Transforms/Vectorize/VPlanTestBase.h
index e7b511904891..06e091da9054 100644
--- a/llvm/unittests/Transforms/Vectorize/VPlanTestBase.h
+++ b/llvm/unittests/Transforms/Vectorize/VPlanTestBase.h
@@ -67,10 +67,11 @@ protected:
assert(!verifyFunction(F) && "input function must be valid");
doAnalysis(F);
- auto Plan = VPlan::createInitialVPlan(
- SE->getBackedgeTakenCount(LI->getLoopFor(LoopHeader)), *SE, true, false,
- LI->getLoopFor(LoopHeader));
- VPlanHCFGBuilder HCFGBuilder(LI->getLoopFor(LoopHeader), LI.get(), *Plan);
+ Loop *L = LI->getLoopFor(LoopHeader);
+ PredicatedScalarEvolution PSE(*SE, *L);
+ auto Plan = VPlan::createInitialVPlan(IntegerType::get(*Ctx, 64), PSE, true,
+ false, L);
+ VPlanHCFGBuilder HCFGBuilder(L, LI.get(), *Plan);
HCFGBuilder.buildHierarchicalCFG();
return Plan;
}
@@ -81,10 +82,11 @@ protected:
assert(!verifyFunction(F) && "input function must be valid");
doAnalysis(F);
- auto Plan = VPlan::createInitialVPlan(
- SE->getBackedgeTakenCount(LI->getLoopFor(LoopHeader)), *SE, true, false,
- LI->getLoopFor(LoopHeader));
- VPlanHCFGBuilder HCFGBuilder(LI->getLoopFor(LoopHeader), LI.get(), *Plan);
+ Loop *L = LI->getLoopFor(LoopHeader);
+ PredicatedScalarEvolution PSE(*SE, *L);
+ auto Plan = VPlan::createInitialVPlan(IntegerType::get(*Ctx, 64), PSE, true,
+ false, L);
+ VPlanHCFGBuilder HCFGBuilder(L, LI.get(), *Plan);
HCFGBuilder.buildPlainCFG();
return Plan;
}
diff --git a/llvm/utils/TableGen/AsmMatcherEmitter.cpp b/llvm/utils/TableGen/AsmMatcherEmitter.cpp
index 2a94b77af66c..0c03440903fc 100644
--- a/llvm/utils/TableGen/AsmMatcherEmitter.cpp
+++ b/llvm/utils/TableGen/AsmMatcherEmitter.cpp
@@ -634,10 +634,10 @@ struct MatchableInfo {
// Compare lexicographically by operand. The matcher validates that other
// orderings wouldn't be ambiguous using \see couldMatchAmbiguouslyWith().
- for (unsigned i = 0, e = AsmOperands.size(); i != e; ++i) {
- if (*AsmOperands[i].Class < *RHS.AsmOperands[i].Class)
+ for (const auto &[LHSOp, RHSOp] : zip_equal(AsmOperands, RHS.AsmOperands)) {
+ if (*LHSOp.Class < *RHSOp.Class)
return true;
- if (*RHS.AsmOperands[i].Class < *AsmOperands[i].Class)
+ if (*RHSOp.Class < *LHSOp.Class)
return false;
}
@@ -692,21 +692,21 @@ struct MatchableInfo {
// Tokens and operand kinds are unambiguous (assuming a correct target
// specific parser).
- for (unsigned i = 0, e = AsmOperands.size(); i != e; ++i)
- if (AsmOperands[i].Class->Kind != RHS.AsmOperands[i].Class->Kind ||
- AsmOperands[i].Class->Kind == ClassInfo::Token)
- if (*AsmOperands[i].Class < *RHS.AsmOperands[i].Class ||
- *RHS.AsmOperands[i].Class < *AsmOperands[i].Class)
+ for (const auto &[LHSOp, RHSOp] : zip_equal(AsmOperands, RHS.AsmOperands)) {
+ if (LHSOp.Class->Kind != RHSOp.Class->Kind ||
+ LHSOp.Class->Kind == ClassInfo::Token)
+ if (*LHSOp.Class < *RHSOp.Class || *RHSOp.Class < *LHSOp.Class)
return false;
+ }
// Otherwise, this operand could commute if all operands are equivalent, or
// there is a pair of operands that compare less than and a pair that
// compare greater than.
bool HasLT = false, HasGT = false;
- for (unsigned i = 0, e = AsmOperands.size(); i != e; ++i) {
- if (*AsmOperands[i].Class < *RHS.AsmOperands[i].Class)
+ for (const auto &[LHSOp, RHSOp] : zip_equal(AsmOperands, RHS.AsmOperands)) {
+ if (*LHSOp.Class < *RHSOp.Class)
HasLT = true;
- if (*RHS.AsmOperands[i].Class < *AsmOperands[i].Class)
+ if (*RHSOp.Class < *LHSOp.Class)
HasGT = true;
}
@@ -810,7 +810,7 @@ public:
/// getSubtargetFeature - Lookup or create the subtarget feature info for the
/// given operand.
- const SubtargetFeatureInfo *getSubtargetFeature(Record *Def) const {
+ const SubtargetFeatureInfo *getSubtargetFeature(const Record *Def) const {
assert(Def->isSubClassOf("Predicate") && "Invalid predicate type!");
const auto &I = SubtargetFeatures.find(Def);
return I == SubtargetFeatures.end() ? nullptr : &I->second;
@@ -833,9 +833,8 @@ LLVM_DUMP_METHOD void MatchableInfo::dump() const {
errs() << " variant: " << AsmVariantID << "\n";
- for (unsigned i = 0, e = AsmOperands.size(); i != e; ++i) {
- const AsmOperand &Op = AsmOperands[i];
- errs() << " op[" << i << "] = " << Op.Class->ClassName << " - ";
+ for (const auto &[Idx, Op] : enumerate(AsmOperands)) {
+ errs() << " op[" << Idx << "] = " << Op.Class->ClassName << " - ";
errs() << '\"' << Op.Token << "\"\n";
}
}
@@ -1490,21 +1489,18 @@ void AsmMatcherInfo::buildOperandMatchInfo() {
// Keep track of all operands of this instructions which belong to the
// same class.
unsigned NumOptionalOps = 0;
- for (unsigned i = 0, e = MI->AsmOperands.size(); i != e; ++i) {
- const MatchableInfo::AsmOperand &Op = MI->AsmOperands[i];
+ for (const auto &[Idx, Op] : enumerate(MI->AsmOperands)) {
if (CallCustomParserForAllOperands || !Op.Class->ParserMethod.empty()) {
unsigned &OperandMask = OpClassMask[Op.Class];
OperandMask |= maskTrailingOnes<unsigned>(NumOptionalOps + 1)
- << (i - NumOptionalOps);
+ << (Idx - NumOptionalOps);
}
if (Op.Class->IsOptional)
++NumOptionalOps;
}
// Generate operand match info for each mnemonic/operand class pair.
- for (const auto &OCM : OpClassMask) {
- unsigned OpMask = OCM.second;
- ClassInfo *CI = OCM.first;
+ for (const auto [CI, OpMask] : OpClassMask) {
OperandMatchInfo.push_back(
OperandMatchEntry::create(MI.get(), CI, OpMask));
}
@@ -1613,11 +1609,11 @@ void AsmMatcherInfo::buildInfo() {
for (auto &II : Matchables) {
// Parse the tokens after the mnemonic.
// Note: buildInstructionOperandReference may insert new AsmOperands, so
- // don't precompute the loop bound.
- for (unsigned i = 0; i != II->AsmOperands.size(); ++i) {
- MatchableInfo::AsmOperand &Op = II->AsmOperands[i];
+ // don't precompute the loop bound, i.e., cannot use range based for loop
+ // here.
+ for (size_t Idx = 0; Idx < II->AsmOperands.size(); ++Idx) {
+ MatchableInfo::AsmOperand &Op = II->AsmOperands[Idx];
StringRef Token = Op.Token;
-
// Check for singleton registers.
if (const Record *RegRecord = Op.SingletonReg) {
Op.Class = RegisterClasses[RegRecord];
@@ -1645,7 +1641,7 @@ void AsmMatcherInfo::buildInfo() {
OperandName = Token.substr(1);
if (isa<const CodeGenInstruction *>(II->DefRec))
- buildInstructionOperandReference(II.get(), OperandName, i);
+ buildInstructionOperandReference(II.get(), OperandName, Idx);
else
buildAliasOperandReference(II.get(), OperandName, Op);
}
@@ -1779,21 +1775,21 @@ void AsmMatcherInfo::buildAliasOperandReference(MatchableInfo *II,
const CodeGenInstAlias &CGA = *cast<const CodeGenInstAlias *>(II->DefRec);
// Set up the operand class.
- for (unsigned i = 0, e = CGA.ResultOperands.size(); i != e; ++i)
- if (CGA.ResultOperands[i].isRecord() &&
- CGA.ResultOperands[i].getName() == OperandName) {
+ for (const auto &[ResultOp, SubOpIdx] :
+ zip_equal(CGA.ResultOperands, CGA.ResultInstOperandIndex)) {
+ if (ResultOp.isRecord() && ResultOp.getName() == OperandName) {
// It's safe to go with the first one we find, because CodeGenInstAlias
// validates that all operands with the same name have the same record.
- Op.SubOpIdx = CGA.ResultInstOperandIndex[i].second;
+ Op.SubOpIdx = SubOpIdx.second;
// Use the match class from the Alias definition, not the
// destination instruction, as we may have an immediate that's
// being munged by the match class.
- Op.Class =
- getOperandClass(CGA.ResultOperands[i].getRecord(), Op.SubOpIdx);
+ Op.Class = getOperandClass(ResultOp.getRecord(), Op.SubOpIdx);
Op.SrcOpName = OperandName;
Op.OrigSrcOpName = OperandName;
return;
}
+ }
PrintFatalError(II->TheDef->getLoc(),
"error: unable to find operand: '" + OperandName + "'");
@@ -1862,13 +1858,11 @@ void MatchableInfo::buildAliasResultOperands(bool AliasConstraintsAreChecked) {
// populate them.
unsigned AliasOpNo = 0;
unsigned LastOpNo = CGA.ResultInstOperandIndex.size();
- for (unsigned i = 0, e = ResultInst->Operands.size(); i != e; ++i) {
- const CGIOperandList::OperandInfo *OpInfo = &ResultInst->Operands[i];
-
+ for (const auto &[Idx, OpInfo] : enumerate(ResultInst->Operands)) {
// If this is a tied operand, just copy from the previously handled operand.
int TiedOp = -1;
- if (OpInfo->MINumOperands == 1)
- TiedOp = OpInfo->getTiedRegister();
+ if (OpInfo.MINumOperands == 1)
+ TiedOp = OpInfo.getTiedRegister();
if (TiedOp != -1) {
unsigned SrcOp1 = 0;
unsigned SrcOp2 = 0;
@@ -1898,7 +1892,7 @@ void MatchableInfo::buildAliasResultOperands(bool AliasConstraintsAreChecked) {
// to benefit from the tied-operands check and just match the operand
// as a normal, but not copy the original (TiedOp) to the result
// instruction. We do this by passing -1 as the tied operand to copy.
- if (ResultInst->Operands[i].Rec->getName() !=
+ if (OpInfo.Rec->getName() !=
ResultInst->Operands[TiedOp].Rec->getName()) {
SrcOp1 = ResOperands[TiedOp].AsmOperandNum;
int SubIdx = CGA.ResultInstOperandIndex[AliasOpNo].second;
@@ -1913,9 +1907,9 @@ void MatchableInfo::buildAliasResultOperands(bool AliasConstraintsAreChecked) {
}
// Handle all the suboperands for this operand.
- const std::string &OpName = OpInfo->Name;
+ const std::string &OpName = OpInfo.Name;
for (; AliasOpNo < LastOpNo &&
- CGA.ResultInstOperandIndex[AliasOpNo].first == i;
+ CGA.ResultInstOperandIndex[AliasOpNo].first == Idx;
++AliasOpNo) {
int SubIdx = CGA.ResultInstOperandIndex[AliasOpNo].second;
@@ -1935,7 +1929,7 @@ void MatchableInfo::buildAliasResultOperands(bool AliasConstraintsAreChecked) {
// record won't be updated and it will fail later on.
OperandRefs.try_emplace(Name, SrcOperand);
- unsigned NumOperands = (SubIdx == -1 ? OpInfo->MINumOperands : 1);
+ unsigned NumOperands = (SubIdx == -1 ? OpInfo.MINumOperands : 1);
ResOperands.push_back(
ResOperand::getRenderedOp(SrcOperand, NumOperands));
break;
@@ -2110,9 +2104,7 @@ emitConvertFuncs(CodeGenTarget &Target, StringRef ClassName,
// Compute the convert enum and the case body.
MaxRowLength = std::max(MaxRowLength, II->ResOperands.size() * 2 + 1);
- for (unsigned i = 0, e = II->ResOperands.size(); i != e; ++i) {
- const MatchableInfo::ResOperand &OpInfo = II->ResOperands[i];
-
+ for (const auto &[Idx, OpInfo] : enumerate(II->ResOperands)) {
// Generate code to populate each result operand.
switch (OpInfo.Kind) {
case MatchableInfo::ResOperand::RenderAsmOperand: {
@@ -2194,7 +2186,7 @@ emitConvertFuncs(CodeGenTarget &Target, StringRef ClassName,
uint8_t TiedOp = OpInfo.TiedOperands.ResOpnd;
uint8_t SrcOp1 = OpInfo.TiedOperands.SrcOpnd1Idx + HasMnemonicFirst;
uint8_t SrcOp2 = OpInfo.TiedOperands.SrcOpnd2Idx + HasMnemonicFirst;
- assert((i > TiedOp || TiedOp == (uint8_t)-1) &&
+ assert((Idx > TiedOp || TiedOp == (uint8_t)-1) &&
"Tied operand precedes its target!");
auto TiedTupleName = std::string("Tie") + utostr(TiedOp) + '_' +
utostr(SrcOp1) + '_' + utostr(SrcOp2);
@@ -2730,26 +2722,21 @@ static void emitGetSubtargetFeatureName(AsmMatcherInfo &Info, raw_ostream &OS) {
OS << "}\n\n";
}
-static std::string GetAliasRequiredFeatures(Record *R,
+static std::string GetAliasRequiredFeatures(const Record *R,
const AsmMatcherInfo &Info) {
- std::vector<Record *> ReqFeatures = R->getValueAsListOfDefs("Predicates");
std::string Result;
- if (ReqFeatures.empty())
- return Result;
-
- for (unsigned i = 0, e = ReqFeatures.size(); i != e; ++i) {
- const SubtargetFeatureInfo *F = Info.getSubtargetFeature(ReqFeatures[i]);
-
+ bool First = true;
+ for (const Record *RF : R->getValueAsListOfDefs("Predicates")) {
+ const SubtargetFeatureInfo *F = Info.getSubtargetFeature(RF);
if (!F)
PrintFatalError(R->getLoc(),
- "Predicate '" + ReqFeatures[i]->getName() +
+ "Predicate '" + RF->getName() +
"' is not marked as an AssemblerPredicate!");
-
- if (i)
+ if (!First)
Result += " && ";
-
Result += "Features.test(" + F->getEnumBitName() + ')';
+ First = false;
}
return Result;
@@ -2778,16 +2765,14 @@ emitMnemonicAliasVariant(raw_ostream &OS, const AsmMatcherInfo &Info,
// by the string remapper.
std::vector<StringMatcher::StringPair> Cases;
for (const auto &AliasEntry : AliasesFromMnemonic) {
- const std::vector<Record *> &ToVec = AliasEntry.second;
-
// Loop through each alias and emit code that handles each case. If there
// are two instructions without predicates, emit an error. If there is one,
// emit it last.
std::string MatchCode;
int AliasWithNoPredicate = -1;
- for (unsigned i = 0, e = ToVec.size(); i != e; ++i) {
- Record *R = ToVec[i];
+ ArrayRef<const Record *> ToVec = AliasEntry.second;
+ for (const auto &[Idx, R] : enumerate(ToVec)) {
std::string FeatureMask = GetAliasRequiredFeatures(R, Info);
// If this unconditionally matches, remember it for later and diagnose
@@ -2804,7 +2789,7 @@ emitMnemonicAliasVariant(raw_ostream &OS, const AsmMatcherInfo &Info,
PrintFatalError(R->getLoc(), "this is the other MnemonicAlias.");
}
- AliasWithNoPredicate = i;
+ AliasWithNoPredicate = Idx;
continue;
}
if (R->getValueAsString("ToMnemonic") == AliasEntry.first)
@@ -2819,7 +2804,7 @@ emitMnemonicAliasVariant(raw_ostream &OS, const AsmMatcherInfo &Info,
}
if (AliasWithNoPredicate != -1) {
- Record *R = ToVec[AliasWithNoPredicate];
+ const Record *R = ToVec[AliasWithNoPredicate];
if (!MatchCode.empty())
MatchCode += "else\n ";
MatchCode += "Mnemonic = \"";
@@ -2955,8 +2940,8 @@ emitCustomOperandParsing(raw_ostream &OS, CodeGenTarget &Target,
if (II.RequiredFeatures.empty())
OS << "_None";
else
- for (unsigned i = 0, e = II.RequiredFeatures.size(); i != e; ++i)
- OS << '_' << II.RequiredFeatures[i]->TheDef->getName();
+ for (const auto &F : II.RequiredFeatures)
+ OS << '_' << F->TheDef->getName();
OS << " },\n";
}
@@ -3467,24 +3452,20 @@ void AsmMatcherEmitter::run(raw_ostream &OS) {
if (MI->RequiredFeatures.empty())
continue;
FeatureBitsets.emplace_back();
- for (unsigned I = 0, E = MI->RequiredFeatures.size(); I != E; ++I)
- FeatureBitsets.back().push_back(MI->RequiredFeatures[I]->TheDef);
+ for (const auto *F : MI->RequiredFeatures)
+ FeatureBitsets.back().push_back(F->TheDef);
}
- llvm::sort(FeatureBitsets, [&](const std::vector<const Record *> &A,
- const std::vector<const Record *> &B) {
- if (A.size() < B.size())
- return true;
- if (A.size() > B.size())
- return false;
- for (auto Pair : zip(A, B)) {
- if (std::get<0>(Pair)->getName() < std::get<1>(Pair)->getName())
- return true;
- if (std::get<0>(Pair)->getName() > std::get<1>(Pair)->getName())
- return false;
- }
- return false;
- });
+ llvm::sort(FeatureBitsets,
+ [&](ArrayRef<const Record *> A, ArrayRef<const Record *> B) {
+ if (A.size() != B.size())
+ return A.size() < B.size();
+ for (const auto [ARec, BRec] : zip_equal(A, B)) {
+ if (ARec->getName() != BRec->getName())
+ return ARec->getName() < BRec->getName();
+ }
+ return false;
+ });
FeatureBitsets.erase(llvm::unique(FeatureBitsets), FeatureBitsets.end());
OS << "// Feature bitsets.\n"
<< "enum : " << getMinimalTypeForRange(FeatureBitsets.size()) << " {\n"
@@ -3577,8 +3558,8 @@ void AsmMatcherEmitter::run(raw_ostream &OS) {
if (MI->RequiredFeatures.empty())
OS << "_None";
else
- for (unsigned i = 0, e = MI->RequiredFeatures.size(); i != e; ++i)
- OS << '_' << MI->RequiredFeatures[i]->TheDef->getName();
+ for (const auto &F : MI->RequiredFeatures)
+ OS << '_' << F->TheDef->getName();
OS << ", { ";
ListSeparator LS;
diff --git a/llvm/utils/TableGen/AsmWriterEmitter.cpp b/llvm/utils/TableGen/AsmWriterEmitter.cpp
index cbf3a380b442..3f1f937e5fd8 100644
--- a/llvm/utils/TableGen/AsmWriterEmitter.cpp
+++ b/llvm/utils/TableGen/AsmWriterEmitter.cpp
@@ -55,13 +55,13 @@ using namespace llvm;
namespace {
class AsmWriterEmitter {
- RecordKeeper &Records;
+ const RecordKeeper &Records;
CodeGenTarget Target;
ArrayRef<const CodeGenInstruction *> NumberedInstructions;
std::vector<AsmWriterInst> Instructions;
public:
- AsmWriterEmitter(RecordKeeper &R);
+ AsmWriterEmitter(const RecordKeeper &R);
void run(raw_ostream &o);
@@ -326,7 +326,7 @@ void AsmWriterEmitter::EmitGetMnemonic(
raw_ostream &O,
std::vector<std::vector<std::string>> &TableDrivenOperandPrinters,
unsigned &BitsLeft, unsigned &AsmStrBits) {
- Record *AsmWriter = Target.getAsmWriter();
+ const Record *AsmWriter = Target.getAsmWriter();
StringRef ClassName = AsmWriter->getValueAsString("AsmWriterClassName");
bool PassSubtarget = AsmWriter->getValueAsInt("PassSubtarget");
@@ -486,7 +486,7 @@ void AsmWriterEmitter::EmitPrintInstruction(
std::vector<std::vector<std::string>> &TableDrivenOperandPrinters,
unsigned &BitsLeft, unsigned &AsmStrBits) {
const unsigned OpcodeInfoBits = 64;
- Record *AsmWriter = Target.getAsmWriter();
+ const Record *AsmWriter = Target.getAsmWriter();
StringRef ClassName = AsmWriter->getValueAsString("AsmWriterClassName");
bool PassSubtarget = AsmWriter->getValueAsInt("PassSubtarget");
@@ -596,8 +596,8 @@ emitRegisterNameString(raw_ostream &O, StringRef AltName,
AsmName = std::string(Reg.getName());
} else {
// Make sure the register has an alternate name for this index.
- std::vector<Record *> AltNameList =
- Reg.TheDef->getValueAsListOfDefs("RegAltNameIndices");
+ std::vector<const Record *> AltNameList =
+ Reg.TheDef->getValueAsListOfConstDefs("RegAltNameIndices");
unsigned Idx = 0, e;
for (e = AltNameList.size();
Idx < e && (AltNameList[Idx]->getName() != AltName); ++Idx)
@@ -633,7 +633,7 @@ emitRegisterNameString(raw_ostream &O, StringRef AltName,
}
void AsmWriterEmitter::EmitGetRegisterName(raw_ostream &O) {
- Record *AsmWriter = Target.getAsmWriter();
+ const Record *AsmWriter = Target.getAsmWriter();
StringRef ClassName = AsmWriter->getValueAsString("AsmWriterClassName");
const auto &Registers = Target.getRegBank().getRegisters();
ArrayRef<const Record *> AltNameIndices = Target.getRegAltNameIndices();
@@ -829,7 +829,7 @@ struct AliasPriorityComparator {
} // end anonymous namespace
void AsmWriterEmitter::EmitPrintAliasInstruction(raw_ostream &O) {
- Record *AsmWriter = Target.getAsmWriter();
+ const Record *AsmWriter = Target.getAsmWriter();
O << "\n#ifdef PRINT_ALIAS_INSTR\n";
O << "#undef PRINT_ALIAS_INSTR\n\n";
@@ -843,14 +843,11 @@ void AsmWriterEmitter::EmitPrintAliasInstruction(raw_ostream &O) {
unsigned Variant = AsmWriter->getValueAsInt("Variant");
bool PassSubtarget = AsmWriter->getValueAsInt("PassSubtarget");
- std::vector<Record *> AllInstAliases =
- Records.getAllDerivedDefinitions("InstAlias");
-
// Create a map from the qualified name to a list of potential matches.
typedef std::set<std::pair<CodeGenInstAlias, int>, AliasPriorityComparator>
AliasWithPriority;
std::map<std::string, AliasWithPriority> AliasMap;
- for (Record *R : AllInstAliases) {
+ for (const Record *R : Records.getAllDerivedDefinitions("InstAlias")) {
int Priority = R->getValueAsInt("EmitPriority");
if (Priority < 1)
continue; // Aliases with priority 0 are never emitted.
@@ -1011,17 +1008,17 @@ void AsmWriterEmitter::EmitPrintAliasInstruction(raw_ostream &O) {
MIOpNum += RO.getMINumOperands();
}
- std::vector<Record *> ReqFeatures;
+ std::vector<const Record *> ReqFeatures;
if (PassSubtarget) {
// We only consider ReqFeatures predicates if PassSubtarget
- std::vector<Record *> RF =
- CGA.TheDef->getValueAsListOfDefs("Predicates");
- copy_if(RF, std::back_inserter(ReqFeatures), [](Record *R) {
+ std::vector<const Record *> RF =
+ CGA.TheDef->getValueAsListOfConstDefs("Predicates");
+ copy_if(RF, std::back_inserter(ReqFeatures), [](const Record *R) {
return R->getValueAsBit("AssemblerMatcherPredicate");
});
}
- for (Record *const R : ReqFeatures) {
+ for (const Record *R : ReqFeatures) {
const DagInit *D = R->getValueAsDag("AssemblerCondDag");
auto *Op = dyn_cast<DefInit>(D->getOperator());
if (!Op)
@@ -1315,17 +1312,17 @@ void AsmWriterEmitter::EmitPrintAliasInstruction(raw_ostream &O) {
O << "#endif // PRINT_ALIAS_INSTR\n";
}
-AsmWriterEmitter::AsmWriterEmitter(RecordKeeper &R) : Records(R), Target(R) {
- Record *AsmWriter = Target.getAsmWriter();
+AsmWriterEmitter::AsmWriterEmitter(const RecordKeeper &R)
+ : Records(R), Target(R) {
+ const Record *AsmWriter = Target.getAsmWriter();
unsigned Variant = AsmWriter->getValueAsInt("Variant");
// Get the instruction numbering.
NumberedInstructions = Target.getInstructionsByEnumValue();
- for (unsigned i = 0, e = NumberedInstructions.size(); i != e; ++i) {
- const CodeGenInstruction *I = NumberedInstructions[i];
+ for (const auto &[Idx, I] : enumerate(NumberedInstructions)) {
if (!I->AsmString.empty() && I->TheDef->getName() != "PHI")
- Instructions.emplace_back(*I, i, Variant);
+ Instructions.emplace_back(*I, Idx, Variant);
}
}
diff --git a/llvm/utils/TableGen/CallingConvEmitter.cpp b/llvm/utils/TableGen/CallingConvEmitter.cpp
index 6a3030bfc1b7..8876bb3ad31e 100644
--- a/llvm/utils/TableGen/CallingConvEmitter.cpp
+++ b/llvm/utils/TableGen/CallingConvEmitter.cpp
@@ -22,7 +22,7 @@ using namespace llvm;
namespace {
class CallingConvEmitter {
- RecordKeeper &Records;
+ const RecordKeeper &Records;
unsigned Counter = 0u;
std::string CurrentAction;
bool SwiftAction = false;
@@ -32,13 +32,13 @@ class CallingConvEmitter {
std::map<std::string, std::set<std::string>> DelegateToMap;
public:
- explicit CallingConvEmitter(RecordKeeper &R) : Records(R) {}
+ explicit CallingConvEmitter(const RecordKeeper &R) : Records(R) {}
void run(raw_ostream &o);
private:
- void EmitCallingConv(Record *CC, raw_ostream &O);
- void EmitAction(Record *Action, unsigned Indent, raw_ostream &O);
+ void EmitCallingConv(const Record *CC, raw_ostream &O);
+ void EmitAction(const Record *Action, unsigned Indent, raw_ostream &O);
void EmitArgRegisterLists(raw_ostream &O);
};
} // End anonymous namespace
@@ -46,13 +46,14 @@ private:
void CallingConvEmitter::run(raw_ostream &O) {
emitSourceFileHeader("Calling Convention Implementation Fragment", O);
- std::vector<Record *> CCs = Records.getAllDerivedDefinitions("CallingConv");
+ ArrayRef<const Record *> CCs =
+ Records.getAllDerivedDefinitions("CallingConv");
// Emit prototypes for all of the non-custom CC's so that they can forward ref
// each other.
Records.startTimer("Emit prototypes");
O << "#ifndef GET_CC_REGISTER_LISTS\n\n";
- for (Record *CC : CCs) {
+ for (const Record *CC : CCs) {
if (!CC->getValueAsBit("Custom")) {
unsigned Pad = CC->getName().size();
if (CC->getValueAsBit("Entry")) {
@@ -71,7 +72,7 @@ void CallingConvEmitter::run(raw_ostream &O) {
// Emit each non-custom calling convention description in full.
Records.startTimer("Emit full descriptions");
- for (Record *CC : CCs) {
+ for (const Record *CC : CCs) {
if (!CC->getValueAsBit("Custom")) {
EmitCallingConv(CC, O);
}
@@ -82,8 +83,8 @@ void CallingConvEmitter::run(raw_ostream &O) {
O << "\n#endif // CC_REGISTER_LIST\n";
}
-void CallingConvEmitter::EmitCallingConv(Record *CC, raw_ostream &O) {
- ListInit *CCActions = CC->getValueAsListInit("Actions");
+void CallingConvEmitter::EmitCallingConv(const Record *CC, raw_ostream &O) {
+ const ListInit *CCActions = CC->getValueAsListInit("Actions");
Counter = 0;
CurrentAction = CC->getName().str();
@@ -106,7 +107,7 @@ void CallingConvEmitter::EmitCallingConv(Record *CC, raw_ostream &O) {
<< std::string(Pad, ' ') << "ISD::ArgFlagsTy ArgFlags, CCState &State) {\n";
// Emit all of the actions, in order.
for (unsigned i = 0, e = CCActions->size(); i != e; ++i) {
- Record *Action = CCActions->getElementAsRecord(i);
+ const Record *Action = CCActions->getElementAsRecord(i);
SwiftAction =
llvm::any_of(Action->getSuperClasses(),
[](const std::pair<Record *, SMRange> &Class) {
@@ -122,7 +123,7 @@ void CallingConvEmitter::EmitCallingConv(Record *CC, raw_ostream &O) {
O << "}\n";
}
-void CallingConvEmitter::EmitAction(Record *Action, unsigned Indent,
+void CallingConvEmitter::EmitAction(const Record *Action, unsigned Indent,
raw_ostream &O) {
std::string IndentStr = std::string(Indent, ' ');
@@ -150,14 +151,14 @@ void CallingConvEmitter::EmitAction(Record *Action, unsigned Indent,
O << IndentStr << "}\n";
} else {
if (Action->isSubClassOf("CCDelegateTo")) {
- Record *CC = Action->getValueAsDef("CC");
+ const Record *CC = Action->getValueAsDef("CC");
O << IndentStr << "if (!" << CC->getName()
<< "(ValNo, ValVT, LocVT, LocInfo, ArgFlags, State))\n"
<< IndentStr << " return false;\n";
DelegateToMap[CurrentAction].insert(CC->getName().str());
} else if (Action->isSubClassOf("CCAssignToReg") ||
Action->isSubClassOf("CCAssignToRegAndStack")) {
- ListInit *RegList = Action->getValueAsListInit("RegList");
+ const ListInit *RegList = Action->getValueAsListInit("RegList");
if (RegList->size() == 1) {
std::string Name = getQualifiedName(RegList->getElementAsRecord(0));
O << IndentStr << "if (MCRegister Reg = State.AllocateReg(" << Name
@@ -210,8 +211,9 @@ void CallingConvEmitter::EmitAction(Record *Action, unsigned Indent,
O << IndentStr << " return false;\n";
O << IndentStr << "}\n";
} else if (Action->isSubClassOf("CCAssignToRegWithShadow")) {
- ListInit *RegList = Action->getValueAsListInit("RegList");
- ListInit *ShadowRegList = Action->getValueAsListInit("ShadowRegList");
+ const ListInit *RegList = Action->getValueAsListInit("RegList");
+ const ListInit *ShadowRegList =
+ Action->getValueAsListInit("ShadowRegList");
if (!ShadowRegList->empty() && ShadowRegList->size() != RegList->size())
PrintFatalError(Action->getLoc(),
"Invalid length of list of shadowed registers");
@@ -278,7 +280,8 @@ void CallingConvEmitter::EmitAction(Record *Action, unsigned Indent,
} else if (Action->isSubClassOf("CCAssignToStackWithShadow")) {
int Size = Action->getValueAsInt("Size");
int Align = Action->getValueAsInt("Align");
- ListInit *ShadowRegList = Action->getValueAsListInit("ShadowRegList");
+ const ListInit *ShadowRegList =
+ Action->getValueAsListInit("ShadowRegList");
unsigned ShadowRegListNumber = ++Counter;
@@ -297,7 +300,7 @@ void CallingConvEmitter::EmitAction(Record *Action, unsigned Indent,
<< Counter << ", LocVT, LocInfo));\n";
O << IndentStr << "return false;\n";
} else if (Action->isSubClassOf("CCPromoteToType")) {
- Record *DestTy = Action->getValueAsDef("DestTy");
+ const Record *DestTy = Action->getValueAsDef("DestTy");
MVT::SimpleValueType DestVT = getValueType(DestTy);
O << IndentStr << "LocVT = " << getEnumName(DestVT) << ";\n";
if (MVT(DestVT).isFloatingPoint()) {
@@ -311,7 +314,7 @@ void CallingConvEmitter::EmitAction(Record *Action, unsigned Indent,
<< IndentStr << " LocInfo = CCValAssign::AExt;\n";
}
} else if (Action->isSubClassOf("CCPromoteToUpperBitsInType")) {
- Record *DestTy = Action->getValueAsDef("DestTy");
+ const Record *DestTy = Action->getValueAsDef("DestTy");
MVT::SimpleValueType DestVT = getValueType(DestTy);
O << IndentStr << "LocVT = " << getEnumName(DestVT) << ";\n";
if (MVT(DestVT).isFloatingPoint()) {
@@ -327,17 +330,17 @@ void CallingConvEmitter::EmitAction(Record *Action, unsigned Indent,
<< IndentStr << " LocInfo = CCValAssign::AExtUpper;\n";
}
} else if (Action->isSubClassOf("CCBitConvertToType")) {
- Record *DestTy = Action->getValueAsDef("DestTy");
+ const Record *DestTy = Action->getValueAsDef("DestTy");
O << IndentStr << "LocVT = " << getEnumName(getValueType(DestTy))
<< ";\n";
O << IndentStr << "LocInfo = CCValAssign::BCvt;\n";
} else if (Action->isSubClassOf("CCTruncToType")) {
- Record *DestTy = Action->getValueAsDef("DestTy");
+ const Record *DestTy = Action->getValueAsDef("DestTy");
O << IndentStr << "LocVT = " << getEnumName(getValueType(DestTy))
<< ";\n";
O << IndentStr << "LocInfo = CCValAssign::Trunc;\n";
} else if (Action->isSubClassOf("CCPassIndirect")) {
- Record *DestTy = Action->getValueAsDef("DestTy");
+ const Record *DestTy = Action->getValueAsDef("DestTy");
O << IndentStr << "LocVT = " << getEnumName(getValueType(DestTy))
<< ";\n";
O << IndentStr << "LocInfo = CCValAssign::Indirect;\n";
diff --git a/llvm/utils/TableGen/CodeEmitterGen.cpp b/llvm/utils/TableGen/CodeEmitterGen.cpp
index 69ca9a84953a..4d356774f98d 100644
--- a/llvm/utils/TableGen/CodeEmitterGen.cpp
+++ b/llvm/utils/TableGen/CodeEmitterGen.cpp
@@ -47,28 +47,30 @@ using namespace llvm;
namespace {
class CodeEmitterGen {
- RecordKeeper &Records;
+ const RecordKeeper &Records;
public:
- CodeEmitterGen(RecordKeeper &R) : Records(R) {}
+ CodeEmitterGen(const RecordKeeper &R) : Records(R) {}
void run(raw_ostream &o);
private:
- int getVariableBit(const std::string &VarName, BitsInit *BI, int bit);
+ int getVariableBit(const std::string &VarName, const BitsInit *BI, int bit);
std::pair<std::string, std::string>
- getInstructionCases(Record *R, CodeGenTarget &Target);
- void addInstructionCasesForEncoding(Record *R, const Record *EncodingDef,
- CodeGenTarget &Target, std::string &Case,
+ getInstructionCases(const Record *R, const CodeGenTarget &Target);
+ void addInstructionCasesForEncoding(const Record *R,
+ const Record *EncodingDef,
+ const CodeGenTarget &Target,
+ std::string &Case,
std::string &BitOffsetCase);
- bool addCodeToMergeInOperand(Record *R, BitsInit *BI,
+ bool addCodeToMergeInOperand(const Record *R, const BitsInit *BI,
const std::string &VarName, std::string &Case,
std::string &BitOffsetCase,
- CodeGenTarget &Target);
+ const CodeGenTarget &Target);
void emitInstructionBaseValues(
raw_ostream &o, ArrayRef<const CodeGenInstruction *> NumberedInstructions,
- CodeGenTarget &Target, unsigned HwMode = DefaultMode);
+ const CodeGenTarget &Target, unsigned HwMode = DefaultMode);
void
emitCaseMap(raw_ostream &o,
const std::map<std::string, std::vector<std::string>> &CaseMap);
@@ -78,13 +80,13 @@ private:
// If the VarBitInit at position 'bit' matches the specified variable then
// return the variable bit position. Otherwise return -1.
-int CodeEmitterGen::getVariableBit(const std::string &VarName, BitsInit *BI,
- int bit) {
- if (VarBitInit *VBI = dyn_cast<VarBitInit>(BI->getBit(bit))) {
- if (VarInit *VI = dyn_cast<VarInit>(VBI->getBitVar()))
+int CodeEmitterGen::getVariableBit(const std::string &VarName,
+ const BitsInit *BI, int bit) {
+ if (const VarBitInit *VBI = dyn_cast<VarBitInit>(BI->getBit(bit))) {
+ if (const VarInit *VI = dyn_cast<VarInit>(VBI->getBitVar()))
if (VI->getName() == VarName)
return VBI->getBitNum();
- } else if (VarInit *VI = dyn_cast<VarInit>(BI->getBit(bit))) {
+ } else if (const VarInit *VI = dyn_cast<VarInit>(BI->getBit(bit))) {
if (VI->getName() == VarName)
return 0;
}
@@ -93,11 +95,12 @@ int CodeEmitterGen::getVariableBit(const std::string &VarName, BitsInit *BI,
}
// Returns true if it succeeds, false if an error.
-bool CodeEmitterGen::addCodeToMergeInOperand(Record *R, BitsInit *BI,
+bool CodeEmitterGen::addCodeToMergeInOperand(const Record *R,
+ const BitsInit *BI,
const std::string &VarName,
std::string &Case,
std::string &BitOffsetCase,
- CodeGenTarget &Target) {
+ const CodeGenTarget &Target) {
CodeGenInstruction &CGI = Target.getInstruction(R);
// Determine if VarName actually contributes to the Inst encoding.
@@ -278,7 +281,8 @@ bool CodeEmitterGen::addCodeToMergeInOperand(Record *R, BitsInit *BI,
}
std::pair<std::string, std::string>
-CodeEmitterGen::getInstructionCases(Record *R, CodeGenTarget &Target) {
+CodeEmitterGen::getInstructionCases(const Record *R,
+ const CodeGenTarget &Target) {
std::string Case, BitOffsetCase;
auto append = [&](const std::string &S) {
@@ -287,7 +291,7 @@ CodeEmitterGen::getInstructionCases(Record *R, CodeGenTarget &Target) {
};
if (const RecordVal *RV = R->getValue("EncodingInfos")) {
- if (auto *DI = dyn_cast_or_null<DefInit>(RV->getValue())) {
+ if (const auto *DI = dyn_cast_or_null<DefInit>(RV->getValue())) {
const CodeGenHwModes &HWM = Target.getHwModes();
EncodingInfoByHwMode EBM(DI->getDef(), HWM);
@@ -342,7 +346,7 @@ CodeEmitterGen::getInstructionCases(Record *R, CodeGenTarget &Target) {
}
void CodeEmitterGen::addInstructionCasesForEncoding(
- Record *R, const Record *EncodingDef, CodeGenTarget &Target,
+ const Record *R, const Record *EncodingDef, const CodeGenTarget &Target,
std::string &Case, std::string &BitOffsetCase) {
BitsInit *BI = EncodingDef->getValueAsBitsInit("Inst");
@@ -394,7 +398,7 @@ static void emitInstBits(raw_ostream &OS, const APInt &Bits) {
void CodeEmitterGen::emitInstructionBaseValues(
raw_ostream &o, ArrayRef<const CodeGenInstruction *> NumberedInstructions,
- CodeGenTarget &Target, unsigned HwMode) {
+ const CodeGenTarget &Target, unsigned HwMode) {
const CodeGenHwModes &HWM = Target.getHwModes();
if (HwMode == DefaultMode)
o << " static const uint64_t InstBits[] = {\n";
@@ -430,12 +434,12 @@ void CodeEmitterGen::emitInstructionBaseValues(
}
}
}
- BitsInit *BI = EncodingDef->getValueAsBitsInit("Inst");
+ const BitsInit *BI = EncodingDef->getValueAsBitsInit("Inst");
// Start by filling in fixed values.
APInt Value(BitWidth, 0);
for (unsigned i = 0, e = BI->getNumBits(); i != e; ++i) {
- if (auto *B = dyn_cast<BitInit>(BI->getBit(i)); B && B->getValue())
+ if (const auto *B = dyn_cast<BitInit>(BI->getBit(i)); B && B->getValue())
Value.setBit(i);
}
o << " ";
@@ -448,15 +452,13 @@ void CodeEmitterGen::emitInstructionBaseValues(
void CodeEmitterGen::emitCaseMap(
raw_ostream &o,
const std::map<std::string, std::vector<std::string>> &CaseMap) {
- std::map<std::string, std::vector<std::string>>::const_iterator IE, EE;
- for (IE = CaseMap.begin(), EE = CaseMap.end(); IE != EE; ++IE) {
- const std::string &Case = IE->first;
- const std::vector<std::string> &InstList = IE->second;
-
- for (int i = 0, N = InstList.size(); i < N; i++) {
- if (i)
+ for (const auto &[Case, InstList] : CaseMap) {
+ bool First = true;
+ for (const auto &Inst : InstList) {
+ if (!First)
o << "\n";
- o << " case " << InstList[i] << ":";
+ o << " case " << Inst << ":";
+ First = false;
}
o << " {\n";
o << Case;
@@ -469,7 +471,6 @@ void CodeEmitterGen::run(raw_ostream &o) {
emitSourceFileHeader("Machine Code Emitter", o);
CodeGenTarget Target(Records);
- std::vector<Record *> Insts = Records.getAllDerivedDefinitions("Instruction");
// For little-endian instruction bit encodings, reverse the bit order
Target.reverseBitsForLittleEndianEncoding();
@@ -491,17 +492,17 @@ void CodeEmitterGen::run(raw_ostream &o) {
continue;
if (const RecordVal *RV = R->getValue("EncodingInfos")) {
- if (DefInit *DI = dyn_cast_or_null<DefInit>(RV->getValue())) {
+ if (const DefInit *DI = dyn_cast_or_null<DefInit>(RV->getValue())) {
EncodingInfoByHwMode EBM(DI->getDef(), HWM);
- for (auto &KV : EBM) {
- BitsInit *BI = KV.second->getValueAsBitsInit("Inst");
+ for (const auto &[Key, Value] : EBM) {
+ const BitsInit *BI = Value->getValueAsBitsInit("Inst");
BitWidth = std::max(BitWidth, BI->getNumBits());
- HwModes.insert(KV.first);
+ HwModes.insert(Key);
}
continue;
}
}
- BitsInit *BI = R->getValueAsBitsInit("Inst");
+ const BitsInit *BI = R->getValueAsBitsInit("Inst");
BitWidth = std::max(BitWidth, BI->getNumBits());
}
UseAPInt = BitWidth > 64;
@@ -540,7 +541,7 @@ void CodeEmitterGen::run(raw_ostream &o) {
std::map<std::string, std::vector<std::string>> BitOffsetCaseMap;
// Construct all cases statement for each opcode
- for (Record *R : Insts) {
+ for (const Record *R : Records.getAllDerivedDefinitions("Instruction")) {
if (R->getValueAsString("Namespace") == "TargetOpcode" ||
R->getValueAsBit("isPseudo"))
continue;
diff --git a/llvm/utils/TableGen/CodeGenMapTable.cpp b/llvm/utils/TableGen/CodeGenMapTable.cpp
index 46aad7f7f8bd..b599ee149bcd 100644
--- a/llvm/utils/TableGen/CodeGenMapTable.cpp
+++ b/llvm/utils/TableGen/CodeGenMapTable.cpp
@@ -77,12 +77,15 @@
#include "Common/CodeGenInstruction.h"
#include "Common/CodeGenTarget.h"
+#include "TableGenBackends.h"
+#include "llvm/ADT/StringExtras.h"
#include "llvm/TableGen/Error.h"
#include "llvm/TableGen/Record.h"
-using namespace llvm;
-typedef std::map<std::string, std::vector<Record *>> InstrRelMapTy;
-typedef std::map<std::vector<Init *>, std::vector<Record *>> RowInstrMapTy;
+using namespace llvm;
+typedef std::map<std::string, std::vector<const Record *>> InstrRelMapTy;
+typedef std::map<std::vector<const Init *>, std::vector<const Record *>>
+ RowInstrMapTy;
namespace {
@@ -92,13 +95,13 @@ class InstrMap {
private:
std::string Name;
std::string FilterClass;
- ListInit *RowFields;
- ListInit *ColFields;
- ListInit *KeyCol;
- std::vector<ListInit *> ValueCols;
+ const ListInit *RowFields;
+ const ListInit *ColFields;
+ const ListInit *KeyCol;
+ std::vector<const ListInit *> ValueCols;
public:
- InstrMap(Record *MapRec) {
+ InstrMap(const Record *MapRec) {
Name = std::string(MapRec->getName());
// FilterClass - It's used to reduce the search space only to the
@@ -133,8 +136,8 @@ public:
MapRec->getName() + "' has empty " +
"`ValueCols' field!");
- for (Init *I : ColValList->getValues()) {
- auto *ColI = cast<ListInit>(I);
+ for (const Init *I : ColValList->getValues()) {
+ const auto *ColI = cast<ListInit>(I);
// Make sure that all the sub-lists in 'ValueCols' have same number of
// elements as the fields in 'ColFields'.
@@ -148,18 +151,12 @@ public:
}
const std::string &getName() const { return Name; }
-
const std::string &getFilterClass() const { return FilterClass; }
-
- ListInit *getRowFields() const { return RowFields; }
-
- ListInit *getColFields() const { return ColFields; }
-
- ListInit *getKeyCol() const { return KeyCol; }
-
- const std::vector<ListInit *> &getValueCols() const { return ValueCols; }
+ const ListInit *getRowFields() const { return RowFields; }
+ const ListInit *getColFields() const { return ColFields; }
+ const ListInit *getKeyCol() const { return KeyCol; }
+ ArrayRef<const ListInit *> getValueCols() const { return ValueCols; }
};
-} // end anonymous namespace
//===----------------------------------------------------------------------===//
// class MapTableEmitter : It builds the instruction relation maps using
@@ -167,7 +164,6 @@ public:
// relationship maps as tables into XXXGenInstrInfo.inc file along with the
// functions to query them.
-namespace {
class MapTableEmitter {
private:
// std::string TargetName;
@@ -177,18 +173,19 @@ private:
// InstrDefs - list of instructions filtered using FilterClass defined
// in InstrMapDesc.
- std::vector<Record *> InstrDefs;
+ ArrayRef<const Record *> InstrDefs;
// RowInstrMap - maps RowFields values to the instructions. It's keyed by the
// values of the row fields and contains vector of records as values.
RowInstrMapTy RowInstrMap;
// KeyInstrVec - list of key instructions.
- std::vector<Record *> KeyInstrVec;
- DenseMap<const Record *, std::vector<Record *>> MapTable;
+ std::vector<const Record *> KeyInstrVec;
+ DenseMap<const Record *, std::vector<const Record *>> MapTable;
public:
- MapTableEmitter(CodeGenTarget &Target, RecordKeeper &Records, Record *IMRec)
+ MapTableEmitter(const CodeGenTarget &Target, const RecordKeeper &Records,
+ const Record *IMRec)
: Target(Target), InstrMapDesc(IMRec) {
const std::string &FilterClass = InstrMapDesc.getFilterClass();
InstrDefs = Records.getAllDerivedDefinitions(FilterClass);
@@ -198,11 +195,12 @@ public:
// Returns true if an instruction is a key instruction, i.e., its ColFields
// have same values as KeyCol.
- bool isKeyColInstr(Record *CurInstr);
+ bool isKeyColInstr(const Record *CurInstr);
// Find column instruction corresponding to a key instruction based on the
// constraints for that column.
- Record *getInstrForColumn(Record *KeyInstr, ListInit *CurValueCol);
+ const Record *getInstrForColumn(const Record *KeyInstr,
+ const ListInit *CurValueCol);
// Find column instructions for each key instruction based
// on ValueCols and store them into MapTable.
@@ -226,17 +224,17 @@ public:
//===----------------------------------------------------------------------===//
void MapTableEmitter::buildRowInstrMap() {
- for (Record *CurInstr : InstrDefs) {
- std::vector<Init *> KeyValue;
- ListInit *RowFields = InstrMapDesc.getRowFields();
- for (Init *RowField : RowFields->getValues()) {
- RecordVal *RecVal = CurInstr->getValue(RowField);
+ for (const Record *CurInstr : InstrDefs) {
+ std::vector<const Init *> KeyValue;
+ const ListInit *RowFields = InstrMapDesc.getRowFields();
+ for (const Init *RowField : RowFields->getValues()) {
+ const RecordVal *RecVal = CurInstr->getValue(RowField);
if (RecVal == nullptr)
PrintFatalError(CurInstr->getLoc(),
"No value " + RowField->getAsString() + " found in \"" +
CurInstr->getName() +
"\" instruction description.");
- Init *CurInstrVal = RecVal->getValue();
+ const Init *CurInstrVal = RecVal->getValue();
KeyValue.push_back(CurInstrVal);
}
@@ -254,18 +252,19 @@ void MapTableEmitter::buildRowInstrMap() {
// Return true if an instruction is a KeyCol instruction.
//===----------------------------------------------------------------------===//
-bool MapTableEmitter::isKeyColInstr(Record *CurInstr) {
- ListInit *ColFields = InstrMapDesc.getColFields();
- ListInit *KeyCol = InstrMapDesc.getKeyCol();
+bool MapTableEmitter::isKeyColInstr(const Record *CurInstr) {
+ const ListInit *ColFields = InstrMapDesc.getColFields();
+ const ListInit *KeyCol = InstrMapDesc.getKeyCol();
// Check if the instruction is a KeyCol instruction.
bool MatchFound = true;
for (unsigned j = 0, endCF = ColFields->size(); (j < endCF) && MatchFound;
j++) {
- RecordVal *ColFieldName = CurInstr->getValue(ColFields->getElement(j));
+ const RecordVal *ColFieldName =
+ CurInstr->getValue(ColFields->getElement(j));
std::string CurInstrVal = ColFieldName->getValue()->getAsUnquotedString();
std::string KeyColValue = KeyCol->getElement(j)->getAsUnquotedString();
- MatchFound = (CurInstrVal == KeyColValue);
+ MatchFound = CurInstrVal == KeyColValue;
}
return MatchFound;
}
@@ -278,15 +277,15 @@ bool MapTableEmitter::isKeyColInstr(Record *CurInstr) {
void MapTableEmitter::buildMapTable() {
// Find column instructions for a given key based on the ColField
// constraints.
- const std::vector<ListInit *> &ValueCols = InstrMapDesc.getValueCols();
+ ArrayRef<const ListInit *> ValueCols = InstrMapDesc.getValueCols();
unsigned NumOfCols = ValueCols.size();
- for (Record *CurKeyInstr : KeyInstrVec) {
- std::vector<Record *> ColInstrVec(NumOfCols);
+ for (const Record *CurKeyInstr : KeyInstrVec) {
+ std::vector<const Record *> ColInstrVec(NumOfCols);
// Find the column instruction based on the constraints for the column.
for (unsigned ColIdx = 0; ColIdx < NumOfCols; ColIdx++) {
- ListInit *CurValueCol = ValueCols[ColIdx];
- Record *ColInstr = getInstrForColumn(CurKeyInstr, CurValueCol);
+ const ListInit *CurValueCol = ValueCols[ColIdx];
+ const Record *ColInstr = getInstrForColumn(CurKeyInstr, CurValueCol);
ColInstrVec[ColIdx] = ColInstr;
}
MapTable[CurKeyInstr] = ColInstrVec;
@@ -297,14 +296,14 @@ void MapTableEmitter::buildMapTable() {
// Find column instruction based on the constraints for that column.
//===----------------------------------------------------------------------===//
-Record *MapTableEmitter::getInstrForColumn(Record *KeyInstr,
- ListInit *CurValueCol) {
- ListInit *RowFields = InstrMapDesc.getRowFields();
- std::vector<Init *> KeyValue;
+const Record *MapTableEmitter::getInstrForColumn(const Record *KeyInstr,
+ const ListInit *CurValueCol) {
+ const ListInit *RowFields = InstrMapDesc.getRowFields();
+ std::vector<const Init *> KeyValue;
// Construct KeyValue using KeyInstr's values for RowFields.
- for (Init *RowField : RowFields->getValues()) {
- Init *KeyInstrVal = KeyInstr->getValue(RowField)->getValue();
+ for (const Init *RowField : RowFields->getValues()) {
+ const Init *KeyInstrVal = KeyInstr->getValue(RowField)->getValue();
KeyValue.push_back(KeyInstrVal);
}
@@ -312,20 +311,20 @@ Record *MapTableEmitter::getInstrForColumn(Record *KeyInstr,
// in RowInstrMap. We search through these instructions to find a match
// for the current column, i.e., the instruction which has the same values
// as CurValueCol for all the fields in ColFields.
- const std::vector<Record *> &RelatedInstrVec = RowInstrMap[KeyValue];
+ ArrayRef<const Record *> RelatedInstrVec = RowInstrMap[KeyValue];
- ListInit *ColFields = InstrMapDesc.getColFields();
- Record *MatchInstr = nullptr;
+ const ListInit *ColFields = InstrMapDesc.getColFields();
+ const Record *MatchInstr = nullptr;
- for (llvm::Record *CurInstr : RelatedInstrVec) {
+ for (const Record *CurInstr : RelatedInstrVec) {
bool MatchFound = true;
for (unsigned j = 0, endCF = ColFields->size(); (j < endCF) && MatchFound;
j++) {
- Init *ColFieldJ = ColFields->getElement(j);
- Init *CurInstrInit = CurInstr->getValue(ColFieldJ)->getValue();
+ const Init *ColFieldJ = ColFields->getElement(j);
+ const Init *CurInstrInit = CurInstr->getValue(ColFieldJ)->getValue();
std::string CurInstrVal = CurInstrInit->getAsUnquotedString();
- Init *ColFieldJVallue = CurValueCol->getElement(j);
- MatchFound = (CurInstrVal == ColFieldJVallue->getAsUnquotedString());
+ const Init *ColFieldJVallue = CurValueCol->getElement(j);
+ MatchFound = CurInstrVal == ColFieldJVallue->getAsUnquotedString();
}
if (MatchFound) {
@@ -333,7 +332,7 @@ Record *MapTableEmitter::getInstrForColumn(Record *KeyInstr,
// Already had a match
// Error if multiple matches are found for a column.
std::string KeyValueStr;
- for (Init *Value : KeyValue) {
+ for (const Init *Value : KeyValue) {
if (!KeyValueStr.empty())
KeyValueStr += ", ";
KeyValueStr += Value->getAsString();
@@ -357,11 +356,10 @@ Record *MapTableEmitter::getInstrForColumn(Record *KeyInstr,
//===----------------------------------------------------------------------===//
unsigned MapTableEmitter::emitBinSearchTable(raw_ostream &OS) {
-
ArrayRef<const CodeGenInstruction *> NumberedInstructions =
Target.getInstructionsByEnumValue();
StringRef Namespace = Target.getInstNamespace();
- const std::vector<ListInit *> &ValueCols = InstrMapDesc.getValueCols();
+ ArrayRef<const ListInit *> ValueCols = InstrMapDesc.getValueCols();
unsigned NumCol = ValueCols.size();
unsigned TotalNumInstr = NumberedInstructions.size();
unsigned TableSize = 0;
@@ -372,7 +370,7 @@ unsigned MapTableEmitter::emitBinSearchTable(raw_ostream &OS) {
OS << "Table[][" << NumCol + 1 << "] = {\n";
for (unsigned i = 0; i < TotalNumInstr; i++) {
const Record *CurInstr = NumberedInstructions[i]->TheDef;
- std::vector<Record *> ColInstrs = MapTable[CurInstr];
+ ArrayRef<const Record *> ColInstrs = MapTable[CurInstr];
std::string OutStr;
unsigned RelExists = 0;
if (!ColInstrs.empty()) {
@@ -434,8 +432,8 @@ void MapTableEmitter::emitBinSearch(raw_ostream &OS, unsigned TableSize) {
void MapTableEmitter::emitMapFuncBody(raw_ostream &OS, unsigned TableSize) {
- ListInit *ColFields = InstrMapDesc.getColFields();
- const std::vector<ListInit *> &ValueCols = InstrMapDesc.getValueCols();
+ const ListInit *ColFields = InstrMapDesc.getColFields();
+ ArrayRef<const ListInit *> ValueCols = InstrMapDesc.getValueCols();
// Emit binary search algorithm to locate instructions in the
// relation table. If found, return opcode value from the appropriate column
@@ -444,7 +442,7 @@ void MapTableEmitter::emitMapFuncBody(raw_ostream &OS, unsigned TableSize) {
if (ValueCols.size() > 1) {
for (unsigned i = 0, e = ValueCols.size(); i < e; i++) {
- ListInit *ColumnI = ValueCols[i];
+ const ListInit *ColumnI = ValueCols[i];
OS << " if (";
for (unsigned j = 0, ColSize = ColumnI->size(); j < ColSize; ++j) {
std::string ColName = ColFields->getElement(j)->getAsUnquotedString();
@@ -476,8 +474,8 @@ void MapTableEmitter::emitTablesWithFunc(raw_ostream &OS) {
// since first column is used for the key instructions), then we also need
// to pass another input to indicate the column to be selected.
- ListInit *ColFields = InstrMapDesc.getColFields();
- const std::vector<ListInit *> &ValueCols = InstrMapDesc.getValueCols();
+ const ListInit *ColFields = InstrMapDesc.getColFields();
+ ArrayRef<const ListInit *> ValueCols = InstrMapDesc.getValueCols();
OS << "// " << InstrMapDesc.getName() << "\nLLVM_READONLY\n";
OS << "int " << InstrMapDesc.getName() << "(uint16_t Opcode";
if (ValueCols.size() > 1) {
@@ -499,23 +497,20 @@ void MapTableEmitter::emitTablesWithFunc(raw_ostream &OS) {
// Emit enums for the column fields across all the instruction maps.
//===----------------------------------------------------------------------===//
-static void emitEnums(raw_ostream &OS, RecordKeeper &Records) {
-
- std::vector<Record *> InstrMapVec;
- InstrMapVec = Records.getAllDerivedDefinitions("InstrMapping");
- std::map<std::string, std::vector<Init *>> ColFieldValueMap;
+static void emitEnums(raw_ostream &OS, const RecordKeeper &Records) {
+ std::map<std::string, std::vector<const Init *>> ColFieldValueMap;
// Iterate over all InstrMapping records and create a map between column
// fields and their possible values across all records.
- for (Record *CurMap : InstrMapVec) {
- ListInit *ColFields;
- ColFields = CurMap->getValueAsListInit("ColFields");
- ListInit *List = CurMap->getValueAsListInit("ValueCols");
- std::vector<ListInit *> ValueCols;
+ for (const Record *CurMap :
+ Records.getAllDerivedDefinitions("InstrMapping")) {
+ const ListInit *ColFields = CurMap->getValueAsListInit("ColFields");
+ const ListInit *List = CurMap->getValueAsListInit("ValueCols");
+ std::vector<const ListInit *> ValueCols;
unsigned ListSize = List->size();
for (unsigned j = 0; j < ListSize; j++) {
- auto *ListJ = cast<ListInit>(List->getElement(j));
+ const auto *ListJ = cast<ListInit>(List->getElement(j));
if (ListJ->size() != ColFields->size())
PrintFatalError("Record `" + CurMap->getName() +
@@ -533,12 +528,10 @@ static void emitEnums(raw_ostream &OS, RecordKeeper &Records) {
}
}
- for (auto &Entry : ColFieldValueMap) {
- std::vector<Init *> FieldValues = Entry.second;
-
+ for (auto &[EnumName, FieldValues] : ColFieldValueMap) {
// Delete duplicate entries from ColFieldValueMap
for (unsigned i = 0; i < FieldValues.size() - 1; i++) {
- Init *CurVal = FieldValues[i];
+ const Init *CurVal = FieldValues[i];
for (unsigned j = i + 1; j < FieldValues.size(); j++) {
if (CurVal == FieldValues[j]) {
FieldValues.erase(FieldValues.begin() + j);
@@ -548,28 +541,24 @@ static void emitEnums(raw_ostream &OS, RecordKeeper &Records) {
}
// Emit enumerated values for the column fields.
- OS << "enum " << Entry.first << " {\n";
- for (unsigned i = 0, endFV = FieldValues.size(); i < endFV; i++) {
- OS << "\t" << Entry.first << "_" << FieldValues[i]->getAsUnquotedString();
- if (i != endFV - 1)
- OS << ",\n";
- else
- OS << "\n};\n\n";
- }
+ OS << "enum " << EnumName << " {\n";
+ ListSeparator LS(",\n");
+ for (const Init *Field : FieldValues)
+ OS << LS << "\t" << EnumName << "_" << Field->getAsUnquotedString();
+ OS << "\n};\n\n";
}
}
-namespace llvm {
//===----------------------------------------------------------------------===//
// Parse 'InstrMapping' records and use the information to form relationship
// between instructions. These relations are emitted as a tables along with the
// functions to query them.
//===----------------------------------------------------------------------===//
-void EmitMapTable(RecordKeeper &Records, raw_ostream &OS) {
+void llvm::EmitMapTable(const RecordKeeper &Records, raw_ostream &OS) {
CodeGenTarget Target(Records);
StringRef NameSpace = Target.getInstNamespace();
- std::vector<Record *> InstrMapVec;
- InstrMapVec = Records.getAllDerivedDefinitions("InstrMapping");
+ ArrayRef<const Record *> InstrMapVec =
+ Records.getAllDerivedDefinitions("InstrMapping");
if (InstrMapVec.empty())
return;
@@ -585,7 +574,7 @@ void EmitMapTable(RecordKeeper &Records, raw_ostream &OS) {
// Iterate over all instruction mapping records and construct relationship
// maps based on the information specified there.
//
- for (Record *CurMap : InstrMapVec) {
+ for (const Record *CurMap : InstrMapVec) {
MapTableEmitter IMap(Target, Records, CurMap);
// Build RowInstrMap to group instructions based on their values for
@@ -604,5 +593,3 @@ void EmitMapTable(RecordKeeper &Records, raw_ostream &OS) {
OS << "} // end namespace llvm\n";
OS << "#endif // GET_INSTRMAP_INFO\n\n";
}
-
-} // namespace llvm
diff --git a/llvm/utils/TableGen/Common/CodeGenSchedule.cpp b/llvm/utils/TableGen/Common/CodeGenSchedule.cpp
index 33d1da2f848b..de2cb67b1f1d 100644
--- a/llvm/utils/TableGen/Common/CodeGenSchedule.cpp
+++ b/llvm/utils/TableGen/Common/CodeGenSchedule.cpp
@@ -271,8 +271,7 @@ void CodeGenSchedModels::checkSTIPredicates() const {
// Disallow InstructionEquivalenceClasses with an empty instruction list.
for (const Record *R :
Records.getAllDerivedDefinitions("InstructionEquivalenceClass")) {
- RecVec Opcodes = R->getValueAsListOfDefs("Opcodes");
- if (Opcodes.empty()) {
+ if (R->getValueAsListOfDefs("Opcodes").empty()) {
PrintFatalError(R->getLoc(), "Invalid InstructionEquivalenceClass "
"defined with an empty opcode list.");
}
@@ -311,13 +310,13 @@ static void processSTIPredicate(STIPredicateFunction &Fn,
// definitions. Each unique opcode will be associated with an OpcodeInfo
// object.
for (const Record *Def : Fn.getDefinitions()) {
- RecVec Classes = Def->getValueAsListOfDefs("Classes");
+ ConstRecVec Classes = Def->getValueAsListOfConstDefs("Classes");
for (const Record *EC : Classes) {
const Record *Pred = EC->getValueAsDef("Predicate");
if (!Predicate2Index.contains(Pred))
Predicate2Index[Pred] = NumUniquePredicates++;
- RecVec Opcodes = EC->getValueAsListOfDefs("Opcodes");
+ ConstRecVec Opcodes = EC->getValueAsListOfConstDefs("Opcodes");
for (const Record *Opcode : Opcodes) {
if (!Opcode2Index.contains(Opcode)) {
Opcode2Index[Opcode] = OpcodeMappings.size();
@@ -342,14 +341,14 @@ static void processSTIPredicate(STIPredicateFunction &Fn,
// Construct a OpcodeInfo object for every unique opcode declared by an
// InstructionEquivalenceClass definition.
for (const Record *Def : Fn.getDefinitions()) {
- RecVec Classes = Def->getValueAsListOfDefs("Classes");
+ ConstRecVec Classes = Def->getValueAsListOfConstDefs("Classes");
const Record *SchedModel = Def->getValueAsDef("SchedModel");
unsigned ProcIndex = ProcModelMap.find(SchedModel)->second;
APInt ProcMask(ProcModelMap.size(), 0);
ProcMask.setBit(ProcIndex);
for (const Record *EC : Classes) {
- RecVec Opcodes = EC->getValueAsListOfDefs("Opcodes");
+ ConstRecVec Opcodes = EC->getValueAsListOfConstDefs("Opcodes");
std::vector<int64_t> OpIndices =
EC->getValueAsListOfInts("OperandIndices");
@@ -579,8 +578,7 @@ static void scanSchedRW(const Record *RWDef, ConstRecVec &RWDefs,
// Visit each variant (guarded by a different predicate).
for (const Record *Variant : RWDef->getValueAsListOfDefs("Variants")) {
// Visit each RW in the sequence selected by the current variant.
- RecVec Selected = Variant->getValueAsListOfDefs("Selected");
- for (Record *SelDef : Selected)
+ for (const Record *SelDef : Variant->getValueAsListOfDefs("Selected"))
scanSchedRW(SelDef, RWDefs, RWSet);
}
}
@@ -601,8 +599,7 @@ void CodeGenSchedModels::collectSchedRW() {
const Record *SchedDef = Inst->TheDef;
if (SchedDef->isValueUnset("SchedRW"))
continue;
- RecVec RWs = SchedDef->getValueAsListOfDefs("SchedRW");
- for (Record *RW : RWs) {
+ for (const Record *RW : SchedDef->getValueAsListOfDefs("SchedRW")) {
if (RW->isSubClassOf("SchedWrite"))
scanSchedRW(RW, SWDefs, RWSet);
else {
@@ -614,8 +611,8 @@ void CodeGenSchedModels::collectSchedRW() {
// Find all ReadWrites referenced by InstRW.
for (const Record *InstRWDef : Records.getAllDerivedDefinitions("InstRW")) {
// For all OperandReadWrites.
- RecVec RWDefs = InstRWDef->getValueAsListOfDefs("OperandReadWrites");
- for (Record *RWDef : RWDefs) {
+ for (const Record *RWDef :
+ InstRWDef->getValueAsListOfDefs("OperandReadWrites")) {
if (RWDef->isSubClassOf("SchedWrite"))
scanSchedRW(RWDef, SWDefs, RWSet);
else {
@@ -627,8 +624,8 @@ void CodeGenSchedModels::collectSchedRW() {
// Find all ReadWrites referenced by ItinRW.
for (const Record *ItinRWDef : Records.getAllDerivedDefinitions("ItinRW")) {
// For all OperandReadWrites.
- RecVec RWDefs = ItinRWDef->getValueAsListOfDefs("OperandReadWrites");
- for (Record *RWDef : RWDefs) {
+ for (const Record *RWDef :
+ ItinRWDef->getValueAsListOfDefs("OperandReadWrites")) {
if (RWDef->isSubClassOf("SchedWrite"))
scanSchedRW(RWDef, SWDefs, RWSet);
else {
@@ -672,7 +669,7 @@ void CodeGenSchedModels::collectSchedRW() {
for (CodeGenSchedRW &CGRW : SchedWrites) {
if (!CGRW.IsSequence)
continue;
- findRWs(CGRW.TheDef->getValueAsListOfDefs("Writes"), CGRW.Sequence,
+ findRWs(CGRW.TheDef->getValueAsListOfConstDefs("Writes"), CGRW.Sequence,
/*IsRead=*/false);
}
// Initialize Aliases vectors.
@@ -726,9 +723,10 @@ unsigned CodeGenSchedModels::getSchedRWIdx(const Record *Def,
return I == RWVec.end() ? 0 : std::distance(RWVec.begin(), I);
}
-static void splitSchedReadWrites(const RecVec &RWDefs, RecVec &WriteDefs,
- RecVec &ReadDefs) {
- for (Record *RWDef : RWDefs) {
+static void splitSchedReadWrites(const ConstRecVec &RWDefs,
+ ConstRecVec &WriteDefs,
+ ConstRecVec &ReadDefs) {
+ for (const Record *RWDef : RWDefs) {
if (RWDef->isSubClassOf("SchedWrite"))
WriteDefs.push_back(RWDef);
else {
@@ -739,19 +737,19 @@ static void splitSchedReadWrites(const RecVec &RWDefs, RecVec &WriteDefs,
}
// Split the SchedReadWrites defs and call findRWs for each list.
-void CodeGenSchedModels::findRWs(const RecVec &RWDefs, IdxVec &Writes,
+void CodeGenSchedModels::findRWs(const ConstRecVec &RWDefs, IdxVec &Writes,
IdxVec &Reads) const {
- RecVec WriteDefs;
- RecVec ReadDefs;
+ ConstRecVec WriteDefs;
+ ConstRecVec ReadDefs;
splitSchedReadWrites(RWDefs, WriteDefs, ReadDefs);
findRWs(WriteDefs, Writes, false);
findRWs(ReadDefs, Reads, true);
}
// Call getSchedRWIdx for all elements in a sequence of SchedRW defs.
-void CodeGenSchedModels::findRWs(const RecVec &RWDefs, IdxVec &RWs,
+void CodeGenSchedModels::findRWs(const ConstRecVec &RWDefs, IdxVec &RWs,
bool IsRead) const {
- for (Record *RWDef : RWDefs) {
+ for (const Record *RWDef : RWDefs) {
unsigned Idx = getSchedRWIdx(RWDef, IsRead);
assert(Idx && "failed to collect SchedReadWrite");
RWs.push_back(Idx);
@@ -859,7 +857,8 @@ void CodeGenSchedModels::collectSchedClasses() {
Record *ItinDef = Inst->TheDef->getValueAsDef("Itinerary");
IdxVec Writes, Reads;
if (!Inst->TheDef->isValueUnset("SchedRW"))
- findRWs(Inst->TheDef->getValueAsListOfDefs("SchedRW"), Writes, Reads);
+ findRWs(Inst->TheDef->getValueAsListOfConstDefs("SchedRW"), Writes,
+ Reads);
// ProcIdx == 0 indicates the class applies to all processors.
unsigned SCIdx = addSchedClass(ItinDef, Writes, Reads, /*ProcIndices*/ {0});
@@ -921,7 +920,8 @@ void CodeGenSchedModels::collectSchedClasses() {
<< InstName);
IdxVec Writes;
IdxVec Reads;
- findRWs(RWDef->getValueAsListOfDefs("OperandReadWrites"), Writes, Reads);
+ findRWs(RWDef->getValueAsListOfConstDefs("OperandReadWrites"), Writes,
+ Reads);
LLVM_DEBUG({
for (unsigned WIdx : Writes)
dbgs() << " " << SchedWrites[WIdx].Name;
@@ -950,10 +950,9 @@ CodeGenSchedModels::getSchedClassIdx(const CodeGenInstruction &Inst) const {
}
std::string
-CodeGenSchedModels::createSchedClassName(Record *ItinClassDef,
+CodeGenSchedModels::createSchedClassName(const Record *ItinClassDef,
ArrayRef<unsigned> OperWrites,
ArrayRef<unsigned> OperReads) {
-
std::string Name;
if (ItinClassDef && ItinClassDef->getName() != "NoItinerary")
Name = std::string(ItinClassDef->getName());
@@ -983,7 +982,7 @@ CodeGenSchedModels::createSchedClassName(const ConstRecVec &InstDefs) {
/// Add an inferred sched class from an itinerary class and per-operand list of
/// SchedWrites and SchedReads. ProcIndices contains the set of IDs of
/// processors that may utilize this class.
-unsigned CodeGenSchedModels::addSchedClass(Record *ItinClassDef,
+unsigned CodeGenSchedModels::addSchedClass(const Record *ItinClassDef,
ArrayRef<unsigned> OperWrites,
ArrayRef<unsigned> OperReads,
ArrayRef<unsigned> ProcIndices) {
@@ -1131,7 +1130,8 @@ void CodeGenSchedModels::collectProcItins() {
if (!ProcModel.hasItineraries())
continue;
- RecVec ItinRecords = ProcModel.ItinsDef->getValueAsListOfDefs("IID");
+ ConstRecVec ItinRecords =
+ ProcModel.ItinsDef->getValueAsListOfConstDefs("IID");
assert(!ItinRecords.empty() && "ProcModel.hasItineraries is incorrect");
// Populate ItinDefList with Itinerary records.
@@ -1139,7 +1139,7 @@ void CodeGenSchedModels::collectProcItins() {
// Insert each itinerary data record in the correct position within
// the processor model's ItinDefList.
- for (Record *ItinData : ItinRecords) {
+ for (const Record *ItinData : ItinRecords) {
const Record *ItinDef = ItinData->getValueAsDef("TheClass");
bool FoundClass = false;
@@ -1217,14 +1217,15 @@ void CodeGenSchedModels::inferSchedClasses() {
}
/// Infer classes from per-processor itinerary resources.
-void CodeGenSchedModels::inferFromItinClass(Record *ItinClassDef,
+void CodeGenSchedModels::inferFromItinClass(const Record *ItinClassDef,
unsigned FromClassIdx) {
for (unsigned PIdx = 0, PEnd = ProcModels.size(); PIdx != PEnd; ++PIdx) {
const CodeGenProcModel &PM = ProcModels[PIdx];
// For all ItinRW entries.
bool HasMatch = false;
for (const Record *Rec : PM.ItinRWDefs) {
- RecVec Matched = Rec->getValueAsListOfDefs("MatchedItinClasses");
+ ConstRecVec Matched =
+ Rec->getValueAsListOfConstDefs("MatchedItinClasses");
if (!llvm::is_contained(Matched, ItinClassDef))
continue;
if (HasMatch)
@@ -1233,7 +1234,8 @@ void CodeGenSchedModels::inferFromItinClass(Record *ItinClassDef,
" in ItinResources for " + PM.ModelName);
HasMatch = true;
IdxVec Writes, Reads;
- findRWs(Rec->getValueAsListOfDefs("OperandReadWrites"), Writes, Reads);
+ findRWs(Rec->getValueAsListOfConstDefs("OperandReadWrites"), Writes,
+ Reads);
inferFromRW(Writes, Reads, FromClassIdx, PIdx);
}
}
@@ -1255,7 +1257,7 @@ void CodeGenSchedModels::inferFromInstRWs(unsigned SCIdx) {
if (II == IE)
continue;
IdxVec Writes, Reads;
- findRWs(Rec->getValueAsListOfDefs("OperandReadWrites"), Writes, Reads);
+ findRWs(Rec->getValueAsListOfConstDefs("OperandReadWrites"), Writes, Reads);
unsigned PIdx = getProcModel(Rec->getValueAsDef("SchedModel")).Index;
inferFromRW(Writes, Reads, SCIdx, PIdx); // May mutate SchedClasses.
SchedClasses[SCIdx].InstRWProcIndices.insert(PIdx);
@@ -1348,7 +1350,8 @@ bool PredTransitions::mutuallyExclusive(Record *PredDef,
const CodeGenSchedRW &SchedRW = SchedModels.getSchedRW(PC.RWIdx, PC.IsRead);
assert(SchedRW.HasVariants && "PredCheck must refer to a SchedVariant");
- RecVec Variants = SchedRW.TheDef->getValueAsListOfDefs("Variants");
+ ConstRecVec Variants =
+ SchedRW.TheDef->getValueAsListOfConstDefs("Variants");
if (any_of(Variants, [PredDef](const Record *R) {
return R->getValueAsDef("Predicate") == PredDef;
})) {
@@ -1414,8 +1417,8 @@ void PredTransitions::getIntersectingVariants(
}
if (VarProcIdx == 0 || VarProcIdx == TransVec[TransIdx].ProcIndex) {
// Push each variant. Assign TransVecIdx later.
- const RecVec VarDefs = SchedRW.TheDef->getValueAsListOfDefs("Variants");
- for (Record *VarDef : VarDefs)
+ for (const Record *VarDef :
+ SchedRW.TheDef->getValueAsListOfDefs("Variants"))
Variants.emplace_back(VarDef, SchedRW.Index, VarProcIdx, 0);
if (VarProcIdx == 0)
GenericRW = true;
@@ -1446,8 +1449,7 @@ void PredTransitions::getIntersectingVariants(
SchedModels.getSchedRW((*AI)->getValueAsDef("AliasRW"));
if (AliasRW.HasVariants) {
- const RecVec VarDefs = AliasRW.TheDef->getValueAsListOfDefs("Variants");
- for (Record *VD : VarDefs)
+ for (const Record *VD : AliasRW.TheDef->getValueAsListOfDefs("Variants"))
Variants.emplace_back(VD, AliasRW.Index, AliasProcIdx, 0);
}
if (AliasRW.IsSequence)
@@ -1495,7 +1497,8 @@ void PredTransitions::pushVariant(const TransVariant &VInfo, bool IsRead) {
if (VInfo.VarOrSeqDef->isSubClassOf("SchedVar")) {
Record *PredDef = VInfo.VarOrSeqDef->getValueAsDef("Predicate");
Trans.PredTerm.emplace_back(IsRead, VInfo.RWIdx, PredDef);
- RecVec SelectedDefs = VInfo.VarOrSeqDef->getValueAsListOfDefs("Selected");
+ ConstRecVec SelectedDefs =
+ VInfo.VarOrSeqDef->getValueAsListOfConstDefs("Selected");
SchedModels.findRWs(SelectedDefs, SelectedRWs, IsRead);
} else {
assert(VInfo.VarOrSeqDef->isSubClassOf("WriteSequence") &&
@@ -1761,12 +1764,14 @@ void CodeGenSchedModels::inferFromRW(ArrayRef<unsigned> OperWrites,
// Check if any processor resource group contains all resource records in
// SubUnits.
-bool CodeGenSchedModels::hasSuperGroup(RecVec &SubUnits, CodeGenProcModel &PM) {
+bool CodeGenSchedModels::hasSuperGroup(ConstRecVec &SubUnits,
+ CodeGenProcModel &PM) {
for (const Record *ProcResourceDef : PM.ProcResourceDefs) {
if (!ProcResourceDef->isSubClassOf("ProcResGroup"))
continue;
- RecVec SuperUnits = ProcResourceDef->getValueAsListOfDefs("Resources");
- RecIter RI = SubUnits.begin(), RE = SubUnits.end();
+ ConstRecVec SuperUnits =
+ ProcResourceDef->getValueAsListOfConstDefs("Resources");
+ auto RI = SubUnits.begin(), RE = SubUnits.end();
for (; RI != RE; ++RI) {
if (!is_contained(SuperUnits, *RI)) {
break;
@@ -1783,13 +1788,13 @@ void CodeGenSchedModels::verifyProcResourceGroups(CodeGenProcModel &PM) {
for (unsigned i = 0, e = PM.ProcResourceDefs.size(); i < e; ++i) {
if (!PM.ProcResourceDefs[i]->isSubClassOf("ProcResGroup"))
continue;
- RecVec CheckUnits =
- PM.ProcResourceDefs[i]->getValueAsListOfDefs("Resources");
+ ConstRecVec CheckUnits =
+ PM.ProcResourceDefs[i]->getValueAsListOfConstDefs("Resources");
for (unsigned j = i + 1; j < e; ++j) {
if (!PM.ProcResourceDefs[j]->isSubClassOf("ProcResGroup"))
continue;
- RecVec OtherUnits =
- PM.ProcResourceDefs[j]->getValueAsListOfDefs("Resources");
+ ConstRecVec OtherUnits =
+ PM.ProcResourceDefs[j]->getValueAsListOfConstDefs("Resources");
if (std::find_first_of(CheckUnits.begin(), CheckUnits.end(),
OtherUnits.begin(),
OtherUnits.end()) != CheckUnits.end()) {
@@ -1828,7 +1833,7 @@ void CodeGenSchedModels::collectRegisterFiles() {
"Invalid RegisterFile with zero physical registers");
}
- RecVec RegisterClasses = RF->getValueAsListOfDefs("RegClasses");
+ ConstRecVec RegisterClasses = RF->getValueAsListOfConstDefs("RegClasses");
std::vector<int64_t> RegisterCosts = RF->getValueAsListOfInts("RegCosts");
ListInit *MoveElimInfo = RF->getValueAsListInit("AllowMoveElimination");
for (unsigned I = 0, E = RegisterClasses.size(); I < E; ++I) {
@@ -1866,7 +1871,8 @@ void CodeGenSchedModels::collectProcResources() {
Record *RWModelDef = RW->getValueAsDef("SchedModel");
unsigned PIdx = getProcModel(RWModelDef).Index;
IdxVec Writes, Reads;
- findRWs(RW->getValueAsListOfDefs("OperandReadWrites"), Writes, Reads);
+ findRWs(RW->getValueAsListOfConstDefs("OperandReadWrites"), Writes,
+ Reads);
collectRWResources(Writes, Reads, PIdx);
}
@@ -2004,13 +2010,13 @@ void CodeGenSchedModels::checkCompleteness() {
}
// Collect itinerary class resources for each processor.
-void CodeGenSchedModels::collectItinProcResources(Record *ItinClassDef) {
+void CodeGenSchedModels::collectItinProcResources(const Record *ItinClassDef) {
for (unsigned PIdx = 0, PEnd = ProcModels.size(); PIdx != PEnd; ++PIdx) {
const CodeGenProcModel &PM = ProcModels[PIdx];
// For all ItinRW entries.
bool HasMatch = false;
for (const Record *R : PM.ItinRWDefs) {
- RecVec Matched = R->getValueAsListOfDefs("MatchedItinClasses");
+ ConstRecVec Matched = R->getValueAsListOfConstDefs("MatchedItinClasses");
if (!llvm::is_contained(Matched, ItinClassDef))
continue;
if (HasMatch)
@@ -2019,7 +2025,7 @@ void CodeGenSchedModels::collectItinProcResources(Record *ItinClassDef) {
" in ItinResources for " + PM.ModelName);
HasMatch = true;
IdxVec Writes, Reads;
- findRWs(R->getValueAsListOfDefs("OperandReadWrites"), Writes, Reads);
+ findRWs(R->getValueAsListOfConstDefs("OperandReadWrites"), Writes, Reads);
collectRWResources(Writes, Reads, PIdx);
}
}
@@ -2139,8 +2145,8 @@ void CodeGenSchedModels::addWriteRes(const Record *ProcWriteResDef,
WRDefs.push_back(ProcWriteResDef);
// Visit ProcResourceKinds referenced by the newly discovered WriteRes.
- RecVec ProcResDefs = ProcWriteResDef->getValueAsListOfDefs("ProcResources");
- for (const Record *ProcResDef : ProcResDefs) {
+ for (const Record *ProcResDef :
+ ProcWriteResDef->getValueAsListOfDefs("ProcResources")) {
addProcResource(ProcResDef, ProcModels[PIdx], ProcWriteResDef->getLoc());
}
}
@@ -2186,7 +2192,7 @@ bool CodeGenProcModel::isUnsupported(const CodeGenInstruction &Inst) const {
bool CodeGenProcModel::hasReadOfWrite(const Record *WriteDef) const {
for (auto &RADef : ReadAdvanceDefs) {
- RecVec ValidWrites = RADef->getValueAsListOfDefs("ValidWrites");
+ ConstRecVec ValidWrites = RADef->getValueAsListOfConstDefs("ValidWrites");
if (is_contained(ValidWrites, WriteDef))
return true;
}
diff --git a/llvm/utils/TableGen/Common/CodeGenSchedule.h b/llvm/utils/TableGen/Common/CodeGenSchedule.h
index ff85ac396859..f43c856b274c 100644
--- a/llvm/utils/TableGen/Common/CodeGenSchedule.h
+++ b/llvm/utils/TableGen/Common/CodeGenSchedule.h
@@ -33,8 +33,6 @@ class CodeGenTarget;
class CodeGenSchedModels;
class CodeGenInstruction;
-using RecVec = std::vector<Record *>;
-using RecIter = RecVec::const_iterator;
using ConstRecVec = std::vector<const Record *>;
using ConstRecIter = ConstRecVec::const_iterator;
@@ -132,7 +130,7 @@ struct CodeGenSchedTransition {
struct CodeGenSchedClass {
unsigned Index;
std::string Name;
- Record *ItinClassDef;
+ const Record *ItinClassDef;
IdxVec Writes;
IdxVec Reads;
@@ -149,10 +147,11 @@ struct CodeGenSchedClass {
// InstRWs processor indices. Filled in inferFromInstRWs
DenseSet<unsigned> InstRWProcIndices;
- CodeGenSchedClass(unsigned Index, std::string Name, Record *ItinClassDef)
+ CodeGenSchedClass(unsigned Index, std::string Name,
+ const Record *ItinClassDef)
: Index(Index), Name(std::move(Name)), ItinClassDef(ItinClassDef) {}
- bool isKeyEqual(Record *IC, ArrayRef<unsigned> W,
+ bool isKeyEqual(const Record *IC, ArrayRef<unsigned> W,
ArrayRef<unsigned> R) const {
return ItinClassDef == IC && ArrayRef(Writes) == W && ArrayRef(Reads) == R;
}
@@ -172,10 +171,10 @@ struct CodeGenSchedClass {
/// registers used by the register renamer. Register costs are defined at
/// register class granularity.
struct CodeGenRegisterCost {
- Record *RCDef;
+ const Record *RCDef;
unsigned Cost;
bool AllowMoveElimination;
- CodeGenRegisterCost(Record *RC, unsigned RegisterCost,
+ CodeGenRegisterCost(const Record *RC, unsigned RegisterCost,
bool AllowMoveElim = false)
: RCDef(RC), Cost(RegisterCost), AllowMoveElimination(AllowMoveElim) {}
CodeGenRegisterCost(const CodeGenRegisterCost &) = default;
@@ -231,7 +230,7 @@ struct CodeGenProcModel {
// Array of InstrItinData records indexed by a CodeGenSchedClass index.
// This list is empty if the Processor has no value for Itineraries.
// Initialized by collectProcItins().
- RecVec ItinDefList;
+ ConstRecVec ItinDefList;
// Map itinerary classes to per-operand resources.
// This list is empty if no ItinRW refers to this Processor.
@@ -239,7 +238,7 @@ struct CodeGenProcModel {
// List of unsupported feature.
// This list is empty if the Processor has no UnsupportedFeatures.
- RecVec UnsupportedFeaturesDefs;
+ ConstRecVec UnsupportedFeaturesDefs;
// All read/write resources associated with this processor.
ConstRecVec WriteResDefs;
@@ -530,13 +529,13 @@ public:
const CodeGenSchedRW &getSchedRW(unsigned Idx, bool IsRead) const {
return IsRead ? getSchedRead(Idx) : getSchedWrite(Idx);
}
- CodeGenSchedRW &getSchedRW(Record *Def) {
+ CodeGenSchedRW &getSchedRW(const Record *Def) {
bool IsRead = Def->isSubClassOf("SchedRead");
unsigned Idx = getSchedRWIdx(Def, IsRead);
return const_cast<CodeGenSchedRW &>(IsRead ? getSchedRead(Idx)
: getSchedWrite(Idx));
}
- const CodeGenSchedRW &getSchedRW(Record *Def) const {
+ const CodeGenSchedRW &getSchedRW(const Record *Def) const {
return const_cast<CodeGenSchedModels &>(*this).getSchedRW(Def);
}
@@ -564,13 +563,13 @@ public:
unsigned numInstrSchedClasses() const { return NumInstrSchedClasses; }
- void findRWs(const RecVec &RWDefs, IdxVec &Writes, IdxVec &Reads) const;
- void findRWs(const RecVec &RWDefs, IdxVec &RWs, bool IsRead) const;
+ void findRWs(const ConstRecVec &RWDefs, IdxVec &Writes, IdxVec &Reads) const;
+ void findRWs(const ConstRecVec &RWDefs, IdxVec &RWs, bool IsRead) const;
void expandRWSequence(unsigned RWIdx, IdxVec &RWSeq, bool IsRead) const;
void expandRWSeqForProc(unsigned RWIdx, IdxVec &RWSeq, bool IsRead,
const CodeGenProcModel &ProcModel) const;
- unsigned addSchedClass(Record *ItinDef, ArrayRef<unsigned> OperWrites,
+ unsigned addSchedClass(const Record *ItinDef, ArrayRef<unsigned> OperWrites,
ArrayRef<unsigned> OperReads,
ArrayRef<unsigned> ProcIndices);
@@ -603,7 +602,7 @@ private:
void collectOptionalProcessorInfo();
- std::string createSchedClassName(Record *ItinClassDef,
+ std::string createSchedClassName(const Record *ItinClassDef,
ArrayRef<unsigned> OperWrites,
ArrayRef<unsigned> OperReads);
std::string createSchedClassName(const ConstRecVec &InstDefs);
@@ -629,15 +628,15 @@ private:
void inferFromRW(ArrayRef<unsigned> OperWrites, ArrayRef<unsigned> OperReads,
unsigned FromClassIdx, ArrayRef<unsigned> ProcIndices);
- void inferFromItinClass(Record *ItinClassDef, unsigned FromClassIdx);
+ void inferFromItinClass(const Record *ItinClassDef, unsigned FromClassIdx);
void inferFromInstRWs(unsigned SCIdx);
- bool hasSuperGroup(RecVec &SubUnits, CodeGenProcModel &PM);
+ bool hasSuperGroup(ConstRecVec &SubUnits, CodeGenProcModel &PM);
void verifyProcResourceGroups(CodeGenProcModel &PM);
void collectProcResources();
- void collectItinProcResources(Record *ItinClassDef);
+ void collectItinProcResources(const Record *ItinClassDef);
void collectRWResources(unsigned RWIdx, bool IsRead,
ArrayRef<unsigned> ProcIndices);
diff --git a/llvm/utils/TableGen/CompressInstEmitter.cpp b/llvm/utils/TableGen/CompressInstEmitter.cpp
index 06801e93f4f4..f46ceb517422 100644
--- a/llvm/utils/TableGen/CompressInstEmitter.cpp
+++ b/llvm/utils/TableGen/CompressInstEmitter.cpp
@@ -92,7 +92,7 @@ class CompressInstEmitter {
// Integer immediate value.
int64_t Imm;
// Physical register.
- Record *Reg;
+ const Record *Reg;
} Data;
// Tied operand index within the instruction.
int TiedOpIdx = -1;
@@ -103,7 +103,7 @@ class CompressInstEmitter {
// The destination instruction to transform to.
CodeGenInstruction Dest;
// Required target features to enable pattern.
- std::vector<Record *> PatReqFeatures;
+ std::vector<const Record *> PatReqFeatures;
// Maps operands in the Source Instruction to
// the corresponding Dest instruction operand.
IndexedMap<OpData> SourceOperandMap;
@@ -112,38 +112,40 @@ class CompressInstEmitter {
IndexedMap<OpData> DestOperandMap;
bool IsCompressOnly;
- CompressPat(CodeGenInstruction &S, CodeGenInstruction &D,
- std::vector<Record *> RF, IndexedMap<OpData> &SourceMap,
+ CompressPat(const CodeGenInstruction &S, const CodeGenInstruction &D,
+ std::vector<const Record *> RF, IndexedMap<OpData> &SourceMap,
IndexedMap<OpData> &DestMap, bool IsCompressOnly)
: Source(S), Dest(D), PatReqFeatures(RF), SourceOperandMap(SourceMap),
DestOperandMap(DestMap), IsCompressOnly(IsCompressOnly) {}
};
enum EmitterType { Compress, Uncompress, CheckCompress };
- RecordKeeper &Records;
- CodeGenTarget Target;
+ const RecordKeeper &Records;
+ const CodeGenTarget Target;
SmallVector<CompressPat, 4> CompressPatterns;
- void addDagOperandMapping(Record *Rec, DagInit *Dag, CodeGenInstruction &Inst,
+ void addDagOperandMapping(const Record *Rec, const DagInit *Dag,
+ const CodeGenInstruction &Inst,
IndexedMap<OpData> &OperandMap, bool IsSourceInst);
- void evaluateCompressPat(Record *Compress);
+ void evaluateCompressPat(const Record *Compress);
void emitCompressInstEmitter(raw_ostream &OS, EmitterType EType);
bool validateTypes(const Record *DagOpType, const Record *InstOpType,
bool IsSourceInst);
bool validateRegister(const Record *Reg, const Record *RegClass);
- void createDagOperandMapping(Record *Rec, StringMap<unsigned> &SourceOperands,
+ void createDagOperandMapping(const Record *Rec,
+ StringMap<unsigned> &SourceOperands,
StringMap<unsigned> &DestOperands,
- DagInit *SourceDag, DagInit *DestDag,
+ const DagInit *SourceDag, const DagInit *DestDag,
IndexedMap<OpData> &SourceOperandMap);
- void createInstOperandMapping(Record *Rec, DagInit *SourceDag,
- DagInit *DestDag,
+ void createInstOperandMapping(const Record *Rec, const DagInit *SourceDag,
+ const DagInit *DestDag,
IndexedMap<OpData> &SourceOperandMap,
IndexedMap<OpData> &DestOperandMap,
StringMap<unsigned> &SourceOperands,
- CodeGenInstruction &DestInst);
+ const CodeGenInstruction &DestInst);
public:
- CompressInstEmitter(RecordKeeper &R) : Records(R), Target(R) {}
+ CompressInstEmitter(const RecordKeeper &R) : Records(R), Target(R) {}
void run(raw_ostream &OS);
};
@@ -156,7 +158,7 @@ bool CompressInstEmitter::validateRegister(const Record *Reg,
"RegClass record should be a RegisterClass");
const CodeGenRegisterClass &RC = Target.getRegisterClass(RegClass);
const CodeGenRegister *R = Target.getRegisterByName(Reg->getName().lower());
- assert((R != nullptr) && "Register not defined!!");
+ assert(R != nullptr && "Register not defined!!");
return RC.contains(R);
}
@@ -199,8 +201,9 @@ bool CompressInstEmitter::validateTypes(const Record *DagOpType,
/// operands and fixed registers it expects the Dag operand type to be contained
/// in the instantiated instruction operand type. For immediate operands and
/// immediates no validation checks are enforced at pattern validation time.
-void CompressInstEmitter::addDagOperandMapping(Record *Rec, DagInit *Dag,
- CodeGenInstruction &Inst,
+void CompressInstEmitter::addDagOperandMapping(const Record *Rec,
+ const DagInit *Dag,
+ const CodeGenInstruction &Inst,
IndexedMap<OpData> &OperandMap,
bool IsSourceInst) {
// TiedCount keeps track of the number of operands skipped in Inst
@@ -218,7 +221,7 @@ void CompressInstEmitter::addDagOperandMapping(Record *Rec, DagInit *Dag,
TiedCount++;
continue;
}
- if (DefInit *DI = dyn_cast<DefInit>(Dag->getArg(I - TiedCount))) {
+ if (const DefInit *DI = dyn_cast<DefInit>(Dag->getArg(I - TiedCount))) {
if (DI->getDef()->isSubClassOf("Register")) {
// Check if the fixed register belongs to the Register class.
if (!validateRegister(DI->getDef(), Inst.Operands[I].Rec))
@@ -267,7 +270,7 @@ void CompressInstEmitter::addDagOperandMapping(Record *Rec, DagInit *Dag,
}
// Verify the Dag operand count is enough to build an instruction.
-static bool verifyDagOpCount(CodeGenInstruction &Inst, DagInit *Dag,
+static bool verifyDagOpCount(const CodeGenInstruction &Inst, const DagInit *Dag,
bool IsSource) {
if (Dag->getNumArgs() == Inst.Operands.size())
return true;
@@ -297,7 +300,7 @@ static bool verifyDagOpCount(CodeGenInstruction &Inst, DagInit *Dag,
return true;
}
-static bool validateArgsTypes(Init *Arg1, Init *Arg2) {
+static bool validateArgsTypes(const Init *Arg1, const Init *Arg2) {
return cast<DefInit>(Arg1)->getDef() == cast<DefInit>(Arg2)->getDef();
}
@@ -307,9 +310,9 @@ static bool validateArgsTypes(Init *Arg1, Init *Arg2) {
// mapping $rs1 --> 0, $rs2 ---> 1. If the operand appears twice in the (tied)
// same Dag we use the last occurrence for indexing.
void CompressInstEmitter::createDagOperandMapping(
- Record *Rec, StringMap<unsigned> &SourceOperands,
- StringMap<unsigned> &DestOperands, DagInit *SourceDag, DagInit *DestDag,
- IndexedMap<OpData> &SourceOperandMap) {
+ const Record *Rec, StringMap<unsigned> &SourceOperands,
+ StringMap<unsigned> &DestOperands, const DagInit *SourceDag,
+ const DagInit *DestDag, IndexedMap<OpData> &SourceOperandMap) {
for (unsigned I = 0; I < DestDag->getNumArgs(); ++I) {
// Skip fixed immediates and registers, they were handled in
// addDagOperandMapping.
@@ -354,9 +357,9 @@ void CompressInstEmitter::createDagOperandMapping(
/// output instructions. Validate that operands defined in the input are
/// used in the output pattern while populating the maps.
void CompressInstEmitter::createInstOperandMapping(
- Record *Rec, DagInit *SourceDag, DagInit *DestDag,
+ const Record *Rec, const DagInit *SourceDag, const DagInit *DestDag,
IndexedMap<OpData> &SourceOperandMap, IndexedMap<OpData> &DestOperandMap,
- StringMap<unsigned> &SourceOperands, CodeGenInstruction &DestInst) {
+ StringMap<unsigned> &SourceOperands, const CodeGenInstruction &DestInst) {
// TiedCount keeps track of the number of operands skipped in Inst
// operands list to get to the corresponding Dag operand.
unsigned TiedCount = 0;
@@ -423,14 +426,14 @@ void CompressInstEmitter::createInstOperandMapping(
/// and generate warning.
/// - Immediate operand type in Dag Input differs from the corresponding Source
/// Instruction type and generate a warning.
-void CompressInstEmitter::evaluateCompressPat(Record *Rec) {
+void CompressInstEmitter::evaluateCompressPat(const Record *Rec) {
// Validate input Dag operands.
DagInit *SourceDag = Rec->getValueAsDag("Input");
assert(SourceDag && "Missing 'Input' in compress pattern!");
LLVM_DEBUG(dbgs() << "Input: " << *SourceDag << "\n");
// Checking we are transforming from compressed to uncompressed instructions.
- Record *SourceOperator = SourceDag->getOperatorAsDef(Rec->getLoc());
+ const Record *SourceOperator = SourceDag->getOperatorAsDef(Rec->getLoc());
CodeGenInstruction SourceInst(SourceOperator);
verifyDagOpCount(SourceInst, SourceDag, true);
@@ -439,7 +442,7 @@ void CompressInstEmitter::evaluateCompressPat(Record *Rec) {
assert(DestDag && "Missing 'Output' in compress pattern!");
LLVM_DEBUG(dbgs() << "Output: " << *DestDag << "\n");
- Record *DestOperator = DestDag->getOperatorAsDef(Rec->getLoc());
+ const Record *DestOperator = DestDag->getOperatorAsDef(Rec->getLoc());
CodeGenInstruction DestInst(DestOperator);
verifyDagOpCount(DestInst, DestDag, false);
@@ -475,9 +478,9 @@ void CompressInstEmitter::evaluateCompressPat(Record *Rec) {
DestOperandMap, SourceOperands, DestInst);
// Get the target features for the CompressPat.
- std::vector<Record *> PatReqFeatures;
- std::vector<Record *> RF = Rec->getValueAsListOfDefs("Predicates");
- copy_if(RF, std::back_inserter(PatReqFeatures), [](Record *R) {
+ std::vector<const Record *> PatReqFeatures;
+ std::vector<const Record *> RF = Rec->getValueAsListOfConstDefs("Predicates");
+ copy_if(RF, std::back_inserter(PatReqFeatures), [](const Record *R) {
return R->getValueAsBit("AssemblerMatcherPredicate");
});
@@ -489,8 +492,8 @@ void CompressInstEmitter::evaluateCompressPat(Record *Rec) {
static void
getReqFeatures(std::set<std::pair<bool, StringRef>> &FeaturesSet,
std::set<std::set<std::pair<bool, StringRef>>> &AnyOfFeatureSets,
- const std::vector<Record *> &ReqFeatures) {
- for (auto &R : ReqFeatures) {
+ ArrayRef<const Record *> ReqFeatures) {
+ for (const Record *R : ReqFeatures) {
const DagInit *D = R->getValueAsDag("AssemblerCondDag");
std::string CombineType = D->getOperator()->getAsString();
if (CombineType != "any_of" && CombineType != "all_of")
@@ -542,8 +545,8 @@ static unsigned getPredicates(DenseMap<const Record *, unsigned> &PredicateMap,
return 0;
}
-static void printPredicates(const std::vector<const Record *> &Predicates,
- StringRef Name, raw_ostream &OS) {
+static void printPredicates(ArrayRef<const Record *> Predicates, StringRef Name,
+ raw_ostream &OS) {
for (unsigned I = 0; I < Predicates.size(); ++I) {
StringRef Pred = Predicates[I]->getValueAsString(Name);
OS << " case " << I + 1 << ": {\n"
@@ -565,7 +568,7 @@ static void mergeCondAndCode(raw_ostream &CombinedStream, StringRef CondStr,
void CompressInstEmitter::emitCompressInstEmitter(raw_ostream &OS,
EmitterType EType) {
- Record *AsmWriter = Target.getAsmWriter();
+ const Record *AsmWriter = Target.getAsmWriter();
if (!AsmWriter->getValueAsInt("PassSubtarget"))
PrintFatalError(AsmWriter->getLoc(),
"'PassSubtarget' is false. SubTargetInfo object is needed "
@@ -683,9 +686,10 @@ void CompressInstEmitter::emitCompressInstEmitter(raw_ostream &OS,
getReqFeatures(FeaturesSet, AnyOfFeatureSets, CompressPat.PatReqFeatures);
// Add Dest instruction required features.
- std::vector<Record *> ReqFeatures;
- std::vector<Record *> RF = Dest.TheDef->getValueAsListOfDefs("Predicates");
- copy_if(RF, std::back_inserter(ReqFeatures), [](Record *R) {
+ std::vector<const Record *> ReqFeatures;
+ std::vector<const Record *> RF =
+ Dest.TheDef->getValueAsListOfConstDefs("Predicates");
+ copy_if(RF, std::back_inserter(ReqFeatures), [](const Record *R) {
return R->getValueAsBit("AssemblerMatcherPredicate");
});
getReqFeatures(FeaturesSet, AnyOfFeatureSets, ReqFeatures);
@@ -738,7 +742,7 @@ void CompressInstEmitter::emitCompressInstEmitter(raw_ostream &OS,
<< ").getImm() == " << SourceOperandMap[OpNo].Data.Imm << ") &&\n";
break;
case OpData::Reg: {
- Record *Reg = SourceOperandMap[OpNo].Data.Reg;
+ const Record *Reg = SourceOperandMap[OpNo].Data.Reg;
CondStream.indent(6)
<< "(MI.getOperand(" << OpNo << ").isReg()) &&\n"
<< " (MI.getOperand(" << OpNo << ").getReg() == " << TargetName
@@ -827,7 +831,7 @@ void CompressInstEmitter::emitCompressInstEmitter(raw_ostream &OS,
case OpData::Reg: {
if (CompressOrUncompress) {
// Fixed register has been validated at pattern validation time.
- Record *Reg = DestOperandMap[OpNo].Data.Reg;
+ const Record *Reg = DestOperandMap[OpNo].Data.Reg;
CodeStream.indent(6)
<< "OutInst.addOperand(MCOperand::createReg(" << TargetName
<< "::" << Reg->getName() << "));\n";
@@ -891,11 +895,9 @@ void CompressInstEmitter::emitCompressInstEmitter(raw_ostream &OS,
}
void CompressInstEmitter::run(raw_ostream &OS) {
- std::vector<Record *> Insts = Records.getAllDerivedDefinitions("CompressPat");
-
// Process the CompressPat definitions, validating them as we do so.
- for (unsigned I = 0, E = Insts.size(); I != E; ++I)
- evaluateCompressPat(Insts[I]);
+ for (const Record *Pat : Records.getAllDerivedDefinitions("CompressPat"))
+ evaluateCompressPat(Pat);
// Emit file header.
emitSourceFileHeader("Compress instruction Source Fragment", OS, Records);
diff --git a/llvm/utils/TableGen/DAGISelEmitter.cpp b/llvm/utils/TableGen/DAGISelEmitter.cpp
index 6c72103f6251..2cceb22afdb9 100644
--- a/llvm/utils/TableGen/DAGISelEmitter.cpp
+++ b/llvm/utils/TableGen/DAGISelEmitter.cpp
@@ -25,11 +25,11 @@ namespace {
/// DAGISelEmitter - The top-level class which coordinates construction
/// and emission of the instruction selector.
class DAGISelEmitter {
- RecordKeeper &Records; // Just so we can get at the timing functions.
- CodeGenDAGPatterns CGP;
+ const RecordKeeper &Records; // Just so we can get at the timing functions.
+ const CodeGenDAGPatterns CGP;
public:
- explicit DAGISelEmitter(RecordKeeper &R) : Records(R), CGP(R) {}
+ explicit DAGISelEmitter(const RecordKeeper &R) : Records(R), CGP(R) {}
void run(raw_ostream &OS);
};
} // End anonymous namespace
@@ -81,8 +81,8 @@ namespace {
// In particular, we want to match maximal patterns first and lowest cost within
// a particular complexity first.
struct PatternSortingPredicate {
- PatternSortingPredicate(CodeGenDAGPatterns &cgp) : CGP(cgp) {}
- CodeGenDAGPatterns &CGP;
+ PatternSortingPredicate(const CodeGenDAGPatterns &cgp) : CGP(cgp) {}
+ const CodeGenDAGPatterns &CGP;
bool operator()(const PatternToMatch *LHS, const PatternToMatch *RHS) {
const TreePatternNode &LT = LHS->getSrcPattern();
diff --git a/llvm/utils/TableGen/DAGISelMatcherGen.cpp b/llvm/utils/TableGen/DAGISelMatcherGen.cpp
index 5cb393ae7a53..e159cf1bbefd 100644
--- a/llvm/utils/TableGen/DAGISelMatcherGen.cpp
+++ b/llvm/utils/TableGen/DAGISelMatcherGen.cpp
@@ -23,7 +23,7 @@ using namespace llvm;
/// getRegisterValueType - Look up and return the ValueType of the specified
/// register. If the register is a member of multiple register classes, they
/// must all have the same type.
-static MVT::SimpleValueType getRegisterValueType(Record *R,
+static MVT::SimpleValueType getRegisterValueType(const Record *R,
const CodeGenTarget &T) {
bool FoundRC = false;
MVT::SimpleValueType VT = MVT::Other;
@@ -91,7 +91,7 @@ class MatcherGen {
/// PhysRegInputs - List list has an entry for each explicitly specified
/// physreg input to the pattern. The first elt is the Register node, the
/// second is the recorded slot number the input pattern match saved it in.
- SmallVector<std::pair<Record *, unsigned>, 2> PhysRegInputs;
+ SmallVector<std::pair<const Record *, unsigned>, 2> PhysRegInputs;
/// Matcher - This is the top level of the generated matcher, the result.
Matcher *TheMatcher;
@@ -220,13 +220,13 @@ void MatcherGen::EmitLeafMatchCode(const TreePatternNode &N) {
return;
}
- DefInit *DI = dyn_cast<DefInit>(N.getLeafValue());
+ const DefInit *DI = dyn_cast<DefInit>(N.getLeafValue());
if (!DI) {
errs() << "Unknown leaf kind: " << N << "\n";
abort();
}
- Record *LeafRec = DI->getDef();
+ const Record *LeafRec = DI->getDef();
// A ValueType leaf node can represent a register when named, or itself when
// unnamed.
@@ -673,7 +673,7 @@ void MatcherGen::EmitResultLeafAsOperand(const TreePatternNode &N,
// If this is an explicit register reference, handle it.
if (DefInit *DI = dyn_cast<DefInit>(N.getLeafValue())) {
- Record *Def = DI->getDef();
+ const Record *Def = DI->getDef();
if (Def->isSubClassOf("Register")) {
const CodeGenRegister *Reg = CGP.getTargetInfo().getRegBank().getReg(Def);
AddMatcher(new EmitRegisterMatcher(Reg, N.getSimpleType(0)));
@@ -690,7 +690,7 @@ void MatcherGen::EmitResultLeafAsOperand(const TreePatternNode &N,
if (Def->getName() == "undef_tied_input") {
MVT::SimpleValueType ResultVT = N.getSimpleType(0);
auto IDOperandNo = NextRecordedOperandNo++;
- Record *ImpDef = Def->getRecords().getDef("IMPLICIT_DEF");
+ const Record *ImpDef = Def->getRecords().getDef("IMPLICIT_DEF");
CodeGenInstruction &II = CGP.getTargetInfo().getInstruction(ImpDef);
AddMatcher(new EmitNodeMatcher(II, ResultVT, std::nullopt, false, false,
false, false, -1, IDOperandNo));
@@ -907,11 +907,11 @@ void MatcherGen::EmitResultInstructionAsOperand(
if (isRoot && !Pattern.getDstRegs().empty()) {
// If the root came from an implicit def in the instruction handling stuff,
// don't re-add it.
- Record *HandledReg = nullptr;
+ const Record *HandledReg = nullptr;
if (II.HasOneImplicitDefWithKnownVT(CGT) != MVT::Other)
HandledReg = II.ImplicitDefs[0];
- for (Record *Reg : Pattern.getDstRegs()) {
+ for (const Record *Reg : Pattern.getDstRegs()) {
if (!Reg->isSubClassOf("Register") || Reg == HandledReg)
continue;
ResultVTs.push_back(getRegisterValueType(Reg, CGT));
@@ -1042,7 +1042,7 @@ void MatcherGen::EmitResultCode() {
if (!Pattern.getDstRegs().empty()) {
// If the root came from an implicit def in the instruction handling stuff,
// don't re-add it.
- Record *HandledReg = nullptr;
+ const Record *HandledReg = nullptr;
const TreePatternNode &DstPat = Pattern.getDstPattern();
if (!DstPat.isLeaf() && DstPat.getOperator()->isSubClassOf("Instruction")) {
const CodeGenTarget &CGT = CGP.getTargetInfo();
@@ -1052,7 +1052,7 @@ void MatcherGen::EmitResultCode() {
HandledReg = II.ImplicitDefs[0];
}
- for (Record *Reg : Pattern.getDstRegs()) {
+ for (const Record *Reg : Pattern.getDstRegs()) {
if (!Reg->isSubClassOf("Register") || Reg == HandledReg)
continue;
++NumSrcResults;
diff --git a/llvm/utils/TableGen/DFAEmitter.cpp b/llvm/utils/TableGen/DFAEmitter.cpp
index 18620b2a073f..7d274a1cf632 100644
--- a/llvm/utils/TableGen/DFAEmitter.cpp
+++ b/llvm/utils/TableGen/DFAEmitter.cpp
@@ -170,7 +170,7 @@ void DfaEmitter::printActionValue(action_type A, raw_ostream &OS) { OS << A; }
namespace {
-using Action = std::variant<Record *, unsigned, std::string>;
+using Action = std::variant<const Record *, unsigned, std::string>;
using ActionTuple = std::vector<Action>;
class Automaton;
@@ -356,7 +356,7 @@ void CustomDfaEmitter::printActionValue(action_type A, raw_ostream &OS) {
ListSeparator LS;
for (const auto &SingleAction : AT) {
OS << LS;
- if (const auto *R = std::get_if<Record *>(&SingleAction))
+ if (const auto *R = std::get_if<const Record *>(&SingleAction))
OS << (*R)->getName();
else if (const auto *S = std::get_if<std::string>(&SingleAction))
OS << '"' << *S << '"';
diff --git a/llvm/utils/TableGen/DFAPacketizerEmitter.cpp b/llvm/utils/TableGen/DFAPacketizerEmitter.cpp
index 55cb39c9de5f..42155e78d0a2 100644
--- a/llvm/utils/TableGen/DFAPacketizerEmitter.cpp
+++ b/llvm/utils/TableGen/DFAPacketizerEmitter.cpp
@@ -61,7 +61,7 @@ struct ScheduleClass {
class DFAPacketizerEmitter {
private:
std::string TargetName;
- RecordKeeper &Records;
+ const RecordKeeper &Records;
UniqueVector<ResourceVector> UniqueResources;
std::vector<ScheduleClass> ScheduleClasses;
@@ -69,17 +69,18 @@ private:
std::map<unsigned, uint64_t> ComboBitToBitsMap;
public:
- DFAPacketizerEmitter(RecordKeeper &R);
+ DFAPacketizerEmitter(const RecordKeeper &R);
// Construct a map of function unit names to bits.
int collectAllFuncUnits(ArrayRef<const CodeGenProcModel *> ProcModels);
// Construct a map from a combo function unit bit to the bits of all included
// functional units.
- int collectAllComboFuncs(ArrayRef<Record *> ComboFuncList);
+ int collectAllComboFuncs(ArrayRef<const Record *> ComboFuncList);
- ResourceVector getResourcesForItinerary(Record *Itinerary);
- void createScheduleClasses(unsigned ItineraryIdx, const RecVec &Itineraries);
+ ResourceVector getResourcesForItinerary(const Record *Itinerary);
+ void createScheduleClasses(unsigned ItineraryIdx,
+ ArrayRef<const Record *> Itineraries);
// Emit code for a subset of itineraries.
void emitForItineraries(raw_ostream &OS,
@@ -90,7 +91,7 @@ public:
};
} // end anonymous namespace
-DFAPacketizerEmitter::DFAPacketizerEmitter(RecordKeeper &R)
+DFAPacketizerEmitter::DFAPacketizerEmitter(const RecordKeeper &R)
: TargetName(std::string(CodeGenTarget(R).getName())), Records(R) {}
int DFAPacketizerEmitter::collectAllFuncUnits(
@@ -107,7 +108,7 @@ int DFAPacketizerEmitter::collectAllFuncUnits(
int totalFUs = 0;
// Parse functional units for all the itineraries.
for (const Record *Proc : ProcItinList) {
- std::vector<Record *> FUs = Proc->getValueAsListOfDefs("FU");
+ std::vector<const Record *> FUs = Proc->getValueAsListOfConstDefs("FU");
LLVM_DEBUG(dbgs() << " FU:"
<< " (" << FUs.size() << " FUs) " << Proc->getName());
@@ -129,7 +130,7 @@ int DFAPacketizerEmitter::collectAllFuncUnits(
}
int DFAPacketizerEmitter::collectAllComboFuncs(
- ArrayRef<Record *> ComboFuncList) {
+ ArrayRef<const Record *> ComboFuncList) {
LLVM_DEBUG(dbgs() << "-------------------------------------------------------"
"----------------------\n");
LLVM_DEBUG(dbgs() << "collectAllComboFuncs");
@@ -137,8 +138,8 @@ int DFAPacketizerEmitter::collectAllComboFuncs(
int numCombos = 0;
for (unsigned i = 0, N = ComboFuncList.size(); i < N; ++i) {
- Record *Func = ComboFuncList[i];
- std::vector<Record *> FUs = Func->getValueAsListOfDefs("CFD");
+ const Record *Func = ComboFuncList[i];
+ std::vector<const Record *> FUs = Func->getValueAsListOfConstDefs("CFD");
LLVM_DEBUG(dbgs() << " CFD:" << i << " (" << FUs.size() << " combo FUs) "
<< Func->getName() << "\n");
@@ -147,16 +148,16 @@ int DFAPacketizerEmitter::collectAllComboFuncs(
for (unsigned j = 0, N = FUs.size(); j < N; ++j) {
assert((j < DFA_MAX_RESOURCES) &&
"Exceeded maximum number of DFA resources");
- Record *FuncData = FUs[j];
- Record *ComboFunc = FuncData->getValueAsDef("TheComboFunc");
- const std::vector<Record *> &FuncList =
- FuncData->getValueAsListOfDefs("FuncList");
+ const Record *FuncData = FUs[j];
+ const Record *ComboFunc = FuncData->getValueAsDef("TheComboFunc");
+ const std::vector<const Record *> FuncList =
+ FuncData->getValueAsListOfConstDefs("FuncList");
const std::string &ComboFuncName = std::string(ComboFunc->getName());
uint64_t ComboBit = FUNameToBitsMap[ComboFuncName];
uint64_t ComboResources = ComboBit;
LLVM_DEBUG(dbgs() << " combo: " << ComboFuncName << ":0x"
<< Twine::utohexstr(ComboResources) << "\n");
- for (auto *K : FuncList) {
+ for (const Record *K : FuncList) {
std::string FuncName = std::string(K->getName());
uint64_t FuncResources = FUNameToBitsMap[FuncName];
LLVM_DEBUG(dbgs() << " " << FuncName << ":0x"
@@ -174,12 +175,12 @@ int DFAPacketizerEmitter::collectAllComboFuncs(
}
ResourceVector
-DFAPacketizerEmitter::getResourcesForItinerary(Record *Itinerary) {
+DFAPacketizerEmitter::getResourcesForItinerary(const Record *Itinerary) {
ResourceVector Resources;
assert(Itinerary);
- for (Record *StageDef : Itinerary->getValueAsListOfDefs("Stages")) {
+ for (const Record *StageDef : Itinerary->getValueAsListOfDefs("Stages")) {
uint64_t StageResources = 0;
- for (Record *Unit : StageDef->getValueAsListOfDefs("Units")) {
+ for (const Record *Unit : StageDef->getValueAsListOfDefs("Units")) {
StageResources |= FUNameToBitsMap[std::string(Unit->getName())];
}
if (StageResources != 0)
@@ -188,10 +189,10 @@ DFAPacketizerEmitter::getResourcesForItinerary(Record *Itinerary) {
return Resources;
}
-void DFAPacketizerEmitter::createScheduleClasses(unsigned ItineraryIdx,
- const RecVec &Itineraries) {
+void DFAPacketizerEmitter::createScheduleClasses(
+ unsigned ItineraryIdx, ArrayRef<const Record *> Itineraries) {
unsigned Idx = 0;
- for (Record *Itinerary : Itineraries) {
+ for (const Record *Itinerary : Itineraries) {
if (!Itinerary) {
ScheduleClasses.push_back({ItineraryIdx, Idx++, 0, ResourceVector{}});
continue;
diff --git a/llvm/utils/TableGen/DXILEmitter.cpp b/llvm/utils/TableGen/DXILEmitter.cpp
index 20164e1368ee..a4b549509286 100644
--- a/llvm/utils/TableGen/DXILEmitter.cpp
+++ b/llvm/utils/TableGen/DXILEmitter.cpp
@@ -325,8 +325,7 @@ static std::string getAttributeMaskString(const SmallVector<Record *> Recs) {
}
/// Emit a mapping of DXIL opcode to opname
-static void emitDXILOpCodes(std::vector<DXILOperationDesc> &Ops,
- raw_ostream &OS) {
+static void emitDXILOpCodes(ArrayRef<DXILOperationDesc> Ops, raw_ostream &OS) {
OS << "#ifdef DXIL_OPCODE\n";
for (const DXILOperationDesc &Op : Ops)
OS << "DXIL_OPCODE(" << Op.OpCode << ", " << Op.OpName << ")\n";
@@ -336,23 +335,20 @@ static void emitDXILOpCodes(std::vector<DXILOperationDesc> &Ops,
}
/// Emit a list of DXIL op classes
-static void emitDXILOpClasses(RecordKeeper &Records, raw_ostream &OS) {
+static void emitDXILOpClasses(const RecordKeeper &Records, raw_ostream &OS) {
OS << "#ifdef DXIL_OPCLASS\n";
- std::vector<Record *> OpClasses =
- Records.getAllDerivedDefinitions("DXILOpClass");
- for (Record *OpClass : OpClasses)
+ for (const Record *OpClass : Records.getAllDerivedDefinitions("DXILOpClass"))
OS << "DXIL_OPCLASS(" << OpClass->getName() << ")\n";
OS << "#undef DXIL_OPCLASS\n";
OS << "#endif\n\n";
}
/// Emit a list of DXIL op parameter types
-static void emitDXILOpParamTypes(RecordKeeper &Records, raw_ostream &OS) {
+static void emitDXILOpParamTypes(const RecordKeeper &Records, raw_ostream &OS) {
OS << "#ifdef DXIL_OP_PARAM_TYPE\n";
- std::vector<Record *> OpClasses =
- Records.getAllDerivedDefinitions("DXILOpParamType");
- for (Record *OpClass : OpClasses)
- OS << "DXIL_OP_PARAM_TYPE(" << OpClass->getName() << ")\n";
+ for (const Record *OpParamType :
+ Records.getAllDerivedDefinitions("DXILOpParamType"))
+ OS << "DXIL_OP_PARAM_TYPE(" << OpParamType->getName() << ")\n";
OS << "#undef DXIL_OP_PARAM_TYPE\n";
OS << "#endif\n\n";
}
@@ -378,7 +374,7 @@ static void emitDXILOpFunctionTypes(ArrayRef<DXILOperationDesc> Ops,
/// Emit map of DXIL operation to LLVM or DirectX intrinsic
/// \param A vector of DXIL Ops
/// \param Output stream
-static void emitDXILIntrinsicMap(std::vector<DXILOperationDesc> &Ops,
+static void emitDXILIntrinsicMap(ArrayRef<DXILOperationDesc> Ops,
raw_ostream &OS) {
OS << "#ifdef DXIL_OP_INTRINSIC\n";
OS << "\n";
@@ -396,14 +392,14 @@ static void emitDXILIntrinsicMap(std::vector<DXILOperationDesc> &Ops,
/// Emit DXIL operation table
/// \param A vector of DXIL Ops
/// \param Output stream
-static void emitDXILOperationTable(std::vector<DXILOperationDesc> &Ops,
+static void emitDXILOperationTable(ArrayRef<DXILOperationDesc> Ops,
raw_ostream &OS) {
// Collect Names.
SequenceToOffsetTable<std::string> OpClassStrings;
SequenceToOffsetTable<std::string> OpStrings;
StringSet<> ClassSet;
- for (auto &Op : Ops) {
+ for (const auto &Op : Ops) {
OpStrings.add(Op.OpName);
if (ClassSet.insert(Op.OpClass).second)
@@ -421,7 +417,7 @@ static void emitDXILOperationTable(std::vector<DXILOperationDesc> &Ops,
OS << " static const OpCodeProperty OpCodeProps[] = {\n";
std::string Prefix = "";
- for (auto &Op : Ops) {
+ for (const auto &Op : Ops) {
OS << Prefix << " { dxil::OpCode::" << Op.OpName << ", "
<< OpStrings.get(Op.OpName) << ", OpCodeClass::" << Op.OpClass << ", "
<< OpClassStrings.get(Op.OpClass.data()) << ", "
@@ -469,14 +465,15 @@ static void emitDXILOperationTable(std::vector<DXILOperationDesc> &Ops,
OS << "}\n\n";
}
-static void emitDXILOperationTableDataStructs(RecordKeeper &Records,
+static void emitDXILOperationTableDataStructs(const RecordKeeper &Records,
raw_ostream &OS) {
// Get Shader stage records
- std::vector<Record *> ShaderKindRecs =
+ std::vector<const Record *> ShaderKindRecs =
Records.getAllDerivedDefinitions("DXILShaderStage");
// Sort records by name
- llvm::sort(ShaderKindRecs,
- [](Record *A, Record *B) { return A->getName() < B->getName(); });
+ llvm::sort(ShaderKindRecs, [](const Record *A, const Record *B) {
+ return A->getName() < B->getName();
+ });
OS << "// Valid shader kinds\n\n";
// Choose the type of enum ShaderKind based on the number of stages declared.
@@ -508,22 +505,21 @@ static void emitDXILOperationTableDataStructs(RecordKeeper &Records,
/// Entry function call that invokes the functionality of this TableGen backend
/// \param Records TableGen records of DXIL Operations defined in DXIL.td
/// \param OS output stream
-static void EmitDXILOperation(RecordKeeper &Records, raw_ostream &OS) {
+static void EmitDXILOperation(const RecordKeeper &Records, raw_ostream &OS) {
OS << "// Generated code, do not edit.\n";
OS << "\n";
// Get all DXIL Ops property records
- std::vector<Record *> OpIntrProps =
- Records.getAllDerivedDefinitions("DXILOp");
std::vector<DXILOperationDesc> DXILOps;
- for (auto *Record : OpIntrProps) {
- DXILOps.emplace_back(DXILOperationDesc(Record));
+ for (const Record *R : Records.getAllDerivedDefinitions("DXILOp")) {
+ DXILOps.emplace_back(DXILOperationDesc(R));
}
// Sort by opcode.
- llvm::sort(DXILOps, [](DXILOperationDesc &A, DXILOperationDesc &B) {
- return A.OpCode < B.OpCode;
- });
+ llvm::sort(DXILOps,
+ [](const DXILOperationDesc &A, const DXILOperationDesc &B) {
+ return A.OpCode < B.OpCode;
+ });
int PrevOp = -1;
- for (DXILOperationDesc &Desc : DXILOps) {
+ for (const DXILOperationDesc &Desc : DXILOps) {
if (Desc.OpCode == PrevOp)
PrintFatalError(Twine("Duplicate opcode: ") + Twine(Desc.OpCode));
PrevOp = Desc.OpCode;
diff --git a/llvm/utils/TableGen/DecoderEmitter.cpp b/llvm/utils/TableGen/DecoderEmitter.cpp
index b5da37b51346..edecb9067bcc 100644
--- a/llvm/utils/TableGen/DecoderEmitter.cpp
+++ b/llvm/utils/TableGen/DecoderEmitter.cpp
@@ -155,12 +155,12 @@ raw_ostream &operator<<(raw_ostream &OS, const EncodingAndInst &Value) {
}
class DecoderEmitter {
- RecordKeeper &RK;
+ const RecordKeeper &RK;
std::vector<EncodingAndInst> NumberedEncodings;
public:
- DecoderEmitter(RecordKeeper &R, std::string PredicateNamespace)
- : RK(R), Target(R), PredicateNamespace(std::move(PredicateNamespace)) {}
+ DecoderEmitter(const RecordKeeper &R, const std::string &PredicateNamespace)
+ : RK(R), Target(R), PredicateNamespace(PredicateNamespace) {}
// Emit the decoder state machine table.
void emitTable(formatted_raw_ostream &o, DecoderTable &Table,
@@ -181,7 +181,7 @@ private:
CodeGenTarget Target;
public:
- std::string PredicateNamespace;
+ const std::string &PredicateNamespace;
};
} // end anonymous namespace
@@ -1302,7 +1302,7 @@ bool FilterChooser::emitPredicateMatch(raw_ostream &o, unsigned &Indentation,
AllInstructions[Opc].EncodingDef->getValueAsListInit("Predicates");
bool IsFirstEmission = true;
for (unsigned i = 0; i < Predicates->size(); ++i) {
- Record *Pred = Predicates->getElementAsRecord(i);
+ const Record *Pred = Predicates->getElementAsRecord(i);
if (!Pred->getValue("AssemblerMatcherPredicate"))
continue;
@@ -1320,10 +1320,10 @@ bool FilterChooser::emitPredicateMatch(raw_ostream &o, unsigned &Indentation,
}
bool FilterChooser::doesOpcodeNeedPredicate(unsigned Opc) const {
- ListInit *Predicates =
+ const ListInit *Predicates =
AllInstructions[Opc].EncodingDef->getValueAsListInit("Predicates");
for (unsigned i = 0; i < Predicates->size(); ++i) {
- Record *Pred = Predicates->getElementAsRecord(i);
+ const Record *Pred = Predicates->getElementAsRecord(i);
if (!Pred->getValue("AssemblerMatcherPredicate"))
continue;
@@ -1868,7 +1868,7 @@ static std::string findOperandDecoderMethod(const Record *Record) {
std::string Decoder;
const RecordVal *DecoderString = Record->getValue("DecoderMethod");
- StringInit *String =
+ const StringInit *String =
DecoderString ? dyn_cast<StringInit>(DecoderString->getValue()) : nullptr;
if (String) {
Decoder = std::string(String->getValue());
@@ -2010,7 +2010,7 @@ static void addOneOperandFields(const Record &EncodingDef, const BitsInit &Bits,
}
static unsigned
-populateInstruction(CodeGenTarget &Target, const Record &EncodingDef,
+populateInstruction(const CodeGenTarget &Target, const Record &EncodingDef,
const CodeGenInstruction &CGI, unsigned Opc,
std::map<unsigned, std::vector<OperandInfo>> &Operands,
bool IsVarLenInst) {
@@ -2089,12 +2089,12 @@ populateInstruction(CodeGenTarget &Target, const Record &EncodingDef,
DagInit *SubArgDag = dyn_cast<DagInit>(OpInit);
if (SubArgDag)
OpInit = SubArgDag->getOperator();
- Record *OpTypeRec = cast<DefInit>(OpInit)->getDef();
+ const Record *OpTypeRec = cast<DefInit>(OpInit)->getDef();
// Lookup the sub-operands from the operand type record (note that only
// Operand subclasses have MIOperandInfo, see CodeGenInstruction.cpp).
- DagInit *SubOps = OpTypeRec->isSubClassOf("Operand")
- ? OpTypeRec->getValueAsDag("MIOperandInfo")
- : nullptr;
+ const DagInit *SubOps = OpTypeRec->isSubClassOf("Operand")
+ ? OpTypeRec->getValueAsDag("MIOperandInfo")
+ : nullptr;
// Lookup the decoder method and construct a new OperandInfo to hold our
// result.
@@ -2549,7 +2549,7 @@ namespace llvm {
handleHwModesUnrelatedEncodings(NumberedInstruction, HwModeNames,
NamespacesWithHwModes, NumberedEncodings);
}
- for (const auto &NumberedAlias :
+ for (const Record *NumberedAlias :
RK.getAllDerivedDefinitions("AdditionalEncoding"))
NumberedEncodings.emplace_back(
NumberedAlias,
diff --git a/llvm/utils/TableGen/FastISelEmitter.cpp b/llvm/utils/TableGen/FastISelEmitter.cpp
index 01df873ece1f..af05496a7b6a 100644
--- a/llvm/utils/TableGen/FastISelEmitter.cpp
+++ b/llvm/utils/TableGen/FastISelEmitter.cpp
@@ -272,7 +272,7 @@ struct OperandsSignature {
DefInit *OpDI = dyn_cast<DefInit>(Op.getLeafValue());
if (!OpDI)
return false;
- Record *OpLeafRec = OpDI->getDef();
+ const Record *OpLeafRec = OpDI->getDef();
// For now, the only other thing we accept is register operands.
const CodeGenRegisterClass *RC = nullptr;
@@ -407,7 +407,7 @@ class FastISelMap {
public:
explicit FastISelMap(StringRef InstNS);
- void collectPatterns(CodeGenDAGPatterns &CGP);
+ void collectPatterns(const CodeGenDAGPatterns &CGP);
void printImmediatePredicates(raw_ostream &OS);
void printFunctionDefinitions(raw_ostream &OS);
@@ -417,7 +417,8 @@ private:
};
} // End anonymous namespace
-static std::string getOpcodeName(const Record *Op, CodeGenDAGPatterns &CGP) {
+static std::string getOpcodeName(const Record *Op,
+ const CodeGenDAGPatterns &CGP) {
return std::string(CGP.getSDNodeInfo(Op).getEnumName());
}
@@ -437,7 +438,7 @@ static std::string PhyRegForNode(TreePatternNode &Op,
if (!Op.isLeaf())
return PhysReg;
- Record *OpLeafRec = cast<DefInit>(Op.getLeafValue())->getDef();
+ const Record *OpLeafRec = cast<DefInit>(Op.getLeafValue())->getDef();
if (!OpLeafRec->isSubClassOf("Register"))
return PhysReg;
@@ -448,7 +449,7 @@ static std::string PhyRegForNode(TreePatternNode &Op,
return PhysReg;
}
-void FastISelMap::collectPatterns(CodeGenDAGPatterns &CGP) {
+void FastISelMap::collectPatterns(const CodeGenDAGPatterns &CGP) {
const CodeGenTarget &Target = CGP.getTargetInfo();
// Scan through all the patterns and record the simple ones.
@@ -864,8 +865,8 @@ void FastISelMap::printFunctionDefinitions(raw_ostream &OS) {
// TODO: SignaturesWithConstantForms should be empty here.
}
-static void EmitFastISel(RecordKeeper &RK, raw_ostream &OS) {
- CodeGenDAGPatterns CGP(RK);
+static void EmitFastISel(const RecordKeeper &RK, raw_ostream &OS) {
+ const CodeGenDAGPatterns CGP(RK);
const CodeGenTarget &Target = CGP.getTargetInfo();
emitSourceFileHeader("\"Fast\" Instruction Selector for the " +
Target.getName().str() + " target",
diff --git a/llvm/utils/TableGen/InstrDocsEmitter.cpp b/llvm/utils/TableGen/InstrDocsEmitter.cpp
index f53428ecdffe..d32cfa235454 100644
--- a/llvm/utils/TableGen/InstrDocsEmitter.cpp
+++ b/llvm/utils/TableGen/InstrDocsEmitter.cpp
@@ -61,9 +61,9 @@ static std::string escapeForRST(StringRef Str) {
return Result;
}
-static void EmitInstrDocs(RecordKeeper &RK, raw_ostream &OS) {
- CodeGenDAGPatterns CDP(RK);
- CodeGenTarget &Target = CDP.getTargetInfo();
+static void EmitInstrDocs(const RecordKeeper &RK, raw_ostream &OS) {
+ const CodeGenDAGPatterns CDP(RK);
+ const CodeGenTarget &Target = CDP.getTargetInfo();
unsigned VariantCount = Target.getAsmParserVariantCount();
// Page title.
@@ -86,7 +86,7 @@ static void EmitInstrDocs(RecordKeeper &RK, raw_ostream &OS) {
// Assembly string(s).
if (!II->AsmString.empty()) {
for (unsigned VarNum = 0; VarNum < VariantCount; ++VarNum) {
- Record *AsmVariant = Target.getAsmParserVariant(VarNum);
+ const Record *AsmVariant = Target.getAsmParserVariant(VarNum);
OS << "Assembly string";
if (VariantCount != 1)
OS << " (" << AsmVariant->getValueAsString("Name") << ")";
@@ -167,7 +167,7 @@ static void EmitInstrDocs(RecordKeeper &RK, raw_ostream &OS) {
// names of both the compound operand and the basic operands it
// contains.
for (unsigned SubOpIdx = 0; SubOpIdx < Op.MINumOperands; ++SubOpIdx) {
- Record *SubRec =
+ const Record *SubRec =
cast<DefInit>(Op.MIOperandInfo->getArg(SubOpIdx))->getDef();
StringRef SubOpName = Op.MIOperandInfo->getArgNameStr(SubOpIdx);
StringRef SubOpTypeName = SubRec->getName();
@@ -198,7 +198,7 @@ static void EmitInstrDocs(RecordKeeper &RK, raw_ostream &OS) {
if (!II->ImplicitDefs.empty()) {
OS << "Implicit defs: ";
ListSeparator LS;
- for (Record *Def : II->ImplicitDefs)
+ for (const Record *Def : II->ImplicitDefs)
OS << LS << "``" << Def->getName() << "``";
OS << "\n\n";
}
@@ -207,18 +207,18 @@ static void EmitInstrDocs(RecordKeeper &RK, raw_ostream &OS) {
if (!II->ImplicitUses.empty()) {
OS << "Implicit uses: ";
ListSeparator LS;
- for (Record *Use : II->ImplicitUses)
+ for (const Record *Use : II->ImplicitUses)
OS << LS << "``" << Use->getName() << "``";
OS << "\n\n";
}
// Predicates.
- std::vector<Record *> Predicates =
- II->TheDef->getValueAsListOfDefs("Predicates");
+ std::vector<const Record *> Predicates =
+ II->TheDef->getValueAsListOfConstDefs("Predicates");
if (!Predicates.empty()) {
OS << "Predicates: ";
ListSeparator LS;
- for (Record *P : Predicates)
+ for (const Record *P : Predicates)
OS << LS << "``" << P->getName() << "``";
OS << "\n\n";
}
diff --git a/llvm/utils/TableGen/InstrInfoEmitter.cpp b/llvm/utils/TableGen/InstrInfoEmitter.cpp
index 4e2138d15fde..5830cdae7096 100644
--- a/llvm/utils/TableGen/InstrInfoEmitter.cpp
+++ b/llvm/utils/TableGen/InstrInfoEmitter.cpp
@@ -672,7 +672,8 @@ void InstrInfoEmitter::emitLogicalOperandTypeMappings(
void InstrInfoEmitter::emitMCIIHelperMethods(raw_ostream &OS,
StringRef TargetName) {
- RecVec TIIPredicates = Records.getAllDerivedDefinitions("TIIPredicate");
+ ArrayRef<const Record *> TIIPredicates =
+ Records.getAllDerivedDefinitions("TIIPredicate");
OS << "#ifdef GET_INSTRINFO_MC_HELPER_DECLS\n";
OS << "#undef GET_INSTRINFO_MC_HELPER_DECLS\n\n";
diff --git a/llvm/utils/TableGen/MacroFusionPredicatorEmitter.cpp b/llvm/utils/TableGen/MacroFusionPredicatorEmitter.cpp
index f61a05861981..c4f238b67476 100644
--- a/llvm/utils/TableGen/MacroFusionPredicatorEmitter.cpp
+++ b/llvm/utils/TableGen/MacroFusionPredicatorEmitter.cpp
@@ -52,36 +52,37 @@ using namespace llvm;
namespace {
class MacroFusionPredicatorEmitter {
- RecordKeeper &Records;
- CodeGenTarget Target;
+ const RecordKeeper &Records;
+ const CodeGenTarget Target;
- void emitMacroFusionDecl(ArrayRef<Record *> Fusions, PredicateExpander &PE,
- raw_ostream &OS);
- void emitMacroFusionImpl(ArrayRef<Record *> Fusions, PredicateExpander &PE,
- raw_ostream &OS);
- void emitPredicates(ArrayRef<Record *> FirstPredicate, bool IsCommutable,
- PredicateExpander &PE, raw_ostream &OS);
- void emitFirstPredicate(Record *SecondPredicate, bool IsCommutable,
+ void emitMacroFusionDecl(ArrayRef<const Record *> Fusions,
+ PredicateExpander &PE, raw_ostream &OS);
+ void emitMacroFusionImpl(ArrayRef<const Record *> Fusions,
+ PredicateExpander &PE, raw_ostream &OS);
+ void emitPredicates(ArrayRef<const Record *> FirstPredicate,
+ bool IsCommutable, PredicateExpander &PE,
+ raw_ostream &OS);
+ void emitFirstPredicate(const Record *SecondPredicate, bool IsCommutable,
PredicateExpander &PE, raw_ostream &OS);
- void emitSecondPredicate(Record *SecondPredicate, bool IsCommutable,
+ void emitSecondPredicate(const Record *SecondPredicate, bool IsCommutable,
PredicateExpander &PE, raw_ostream &OS);
- void emitBothPredicate(Record *Predicates, bool IsCommutable,
+ void emitBothPredicate(const Record *Predicates, bool IsCommutable,
PredicateExpander &PE, raw_ostream &OS);
public:
- MacroFusionPredicatorEmitter(RecordKeeper &R) : Records(R), Target(R) {}
+ MacroFusionPredicatorEmitter(const RecordKeeper &R) : Records(R), Target(R) {}
void run(raw_ostream &OS);
};
} // End anonymous namespace.
void MacroFusionPredicatorEmitter::emitMacroFusionDecl(
- ArrayRef<Record *> Fusions, PredicateExpander &PE, raw_ostream &OS) {
+ ArrayRef<const Record *> Fusions, PredicateExpander &PE, raw_ostream &OS) {
OS << "#ifdef GET_" << Target.getName() << "_MACRO_FUSION_PRED_DECL\n";
OS << "#undef GET_" << Target.getName() << "_MACRO_FUSION_PRED_DECL\n\n";
OS << "namespace llvm {\n";
- for (Record *Fusion : Fusions) {
+ for (const Record *Fusion : Fusions) {
OS << "bool is" << Fusion->getName() << "(const TargetInstrInfo &, "
<< "const TargetSubtargetInfo &, "
<< "const MachineInstr *, "
@@ -93,14 +94,14 @@ void MacroFusionPredicatorEmitter::emitMacroFusionDecl(
}
void MacroFusionPredicatorEmitter::emitMacroFusionImpl(
- ArrayRef<Record *> Fusions, PredicateExpander &PE, raw_ostream &OS) {
+ ArrayRef<const Record *> Fusions, PredicateExpander &PE, raw_ostream &OS) {
OS << "#ifdef GET_" << Target.getName() << "_MACRO_FUSION_PRED_IMPL\n";
OS << "#undef GET_" << Target.getName() << "_MACRO_FUSION_PRED_IMPL\n\n";
OS << "namespace llvm {\n";
- for (Record *Fusion : Fusions) {
- std::vector<Record *> Predicates =
- Fusion->getValueAsListOfDefs("Predicates");
+ for (const Record *Fusion : Fusions) {
+ std::vector<const Record *> Predicates =
+ Fusion->getValueAsListOfConstDefs("Predicates");
bool IsCommutable = Fusion->getValueAsBit("IsCommutable");
OS << "bool is" << Fusion->getName() << "(\n";
@@ -121,12 +122,11 @@ void MacroFusionPredicatorEmitter::emitMacroFusionImpl(
OS << "\n#endif\n";
}
-void MacroFusionPredicatorEmitter::emitPredicates(ArrayRef<Record *> Predicates,
- bool IsCommutable,
- PredicateExpander &PE,
- raw_ostream &OS) {
- for (Record *Predicate : Predicates) {
- Record *Target = Predicate->getValueAsDef("Target");
+void MacroFusionPredicatorEmitter::emitPredicates(
+ ArrayRef<const Record *> Predicates, bool IsCommutable,
+ PredicateExpander &PE, raw_ostream &OS) {
+ for (const Record *Predicate : Predicates) {
+ const Record *Target = Predicate->getValueAsDef("Target");
if (Target->getName() == "first_fusion_target")
emitFirstPredicate(Predicate, IsCommutable, PE, OS);
else if (Target->getName() == "second_fusion_target")
@@ -139,7 +139,7 @@ void MacroFusionPredicatorEmitter::emitPredicates(ArrayRef<Record *> Predicates,
}
}
-void MacroFusionPredicatorEmitter::emitFirstPredicate(Record *Predicate,
+void MacroFusionPredicatorEmitter::emitFirstPredicate(const Record *Predicate,
bool IsCommutable,
PredicateExpander &PE,
raw_ostream &OS) {
@@ -172,7 +172,7 @@ void MacroFusionPredicatorEmitter::emitFirstPredicate(Record *Predicate,
}
}
-void MacroFusionPredicatorEmitter::emitSecondPredicate(Record *Predicate,
+void MacroFusionPredicatorEmitter::emitSecondPredicate(const Record *Predicate,
bool IsCommutable,
PredicateExpander &PE,
raw_ostream &OS) {
@@ -223,7 +223,7 @@ void MacroFusionPredicatorEmitter::emitSecondPredicate(Record *Predicate,
}
}
-void MacroFusionPredicatorEmitter::emitBothPredicate(Record *Predicate,
+void MacroFusionPredicatorEmitter::emitBothPredicate(const Record *Predicate,
bool IsCommutable,
PredicateExpander &PE,
raw_ostream &OS) {
@@ -277,9 +277,7 @@ void MacroFusionPredicatorEmitter::run(raw_ostream &OS) {
PE.setByRef(false);
PE.setExpandForMC(false);
- std::vector<Record *> Fusions = Records.getAllDerivedDefinitions("Fusion");
- // Sort macro fusions by name.
- sort(Fusions, LessRecord());
+ ArrayRef<const Record *> Fusions = Records.getAllDerivedDefinitions("Fusion");
emitMacroFusionDecl(Fusions, PE, OS);
OS << "\n";
emitMacroFusionImpl(Fusions, PE, OS);
diff --git a/llvm/utils/TableGen/RegisterInfoEmitter.cpp b/llvm/utils/TableGen/RegisterInfoEmitter.cpp
index e076832674bd..63e70698d7cd 100644
--- a/llvm/utils/TableGen/RegisterInfoEmitter.cpp
+++ b/llvm/utils/TableGen/RegisterInfoEmitter.cpp
@@ -123,7 +123,7 @@ void RegisterInfoEmitter::runEnums(raw_ostream &OS, CodeGenTarget &Target,
if (!Namespace.empty())
OS << "namespace " << Namespace << " {\n";
- OS << "enum {\n NoRegister,\n";
+ OS << "enum : unsigned {\n NoRegister,\n";
for (const auto &Reg : Registers)
OS << " " << Reg.getName() << " = " << Reg.EnumValue << ",\n";
diff --git a/llvm/utils/TableGen/SubtargetEmitter.cpp b/llvm/utils/TableGen/SubtargetEmitter.cpp
index 394e2eb42c15..c568f6747f4f 100644
--- a/llvm/utils/TableGen/SubtargetEmitter.cpp
+++ b/llvm/utils/TableGen/SubtargetEmitter.cpp
@@ -284,7 +284,7 @@ unsigned SubtargetEmitter::FeatureKeyValues(raw_ostream &OS,
<< "\"" << CommandLineName << "\", "
<< "\"" << Desc << "\", " << Target << "::" << Name << ", ";
- RecVec ImpliesList = Feature->getValueAsListOfDefs("Implies");
+ ConstRecVec ImpliesList = Feature->getValueAsListOfConstDefs("Implies");
printFeatureMask(OS, ImpliesList, FeatureMap);
@@ -320,8 +320,9 @@ unsigned SubtargetEmitter::CPUKeyValues(raw_ostream &OS,
for (const Record *Processor : ProcessorList) {
StringRef Name = Processor->getValueAsString("Name");
- RecVec FeatureList = Processor->getValueAsListOfDefs("Features");
- RecVec TuneFeatureList = Processor->getValueAsListOfDefs("TuneFeatures");
+ ConstRecVec FeatureList = Processor->getValueAsListOfConstDefs("Features");
+ ConstRecVec TuneFeatureList =
+ Processor->getValueAsListOfConstDefs("TuneFeatures");
// Emit as "{ "cpu", "description", 0, { f1 , f2 , ... fn } },".
OS << " { "
@@ -366,7 +367,7 @@ void SubtargetEmitter::FormItineraryStageString(const std::string &Name,
ItinString += " { " + itostr(Cycles) + ", ";
// Get unit list
- RecVec UnitList = Stage->getValueAsListOfDefs("Units");
+ ConstRecVec UnitList = Stage->getValueAsListOfConstDefs("Units");
// For each unit
for (unsigned j = 0, M = UnitList.size(); j < M;) {
@@ -444,7 +445,7 @@ void SubtargetEmitter::EmitStageAndOperandCycleData(
if (!ItinsDefSet.insert(ProcModel.ItinsDef).second)
continue;
- RecVec FUs = ProcModel.ItinsDef->getValueAsListOfDefs("FU");
+ ConstRecVec FUs = ProcModel.ItinsDef->getValueAsListOfConstDefs("FU");
if (FUs.empty())
continue;
@@ -458,7 +459,7 @@ void SubtargetEmitter::EmitStageAndOperandCycleData(
OS << "} // end namespace " << Name << "FU\n";
- RecVec BPs = ProcModel.ItinsDef->getValueAsListOfDefs("BP");
+ ConstRecVec BPs = ProcModel.ItinsDef->getValueAsListOfConstDefs("BP");
if (!BPs.empty()) {
OS << "\n// Pipeline forwarding paths for itineraries \"" << Name
<< "\"\n"
@@ -682,8 +683,7 @@ void SubtargetEmitter::EmitProcessorResourceSubUnits(
const Record *PRDef = ProcModel.ProcResourceDefs[i];
if (!PRDef->isSubClassOf("ProcResGroup"))
continue;
- RecVec ResUnits = PRDef->getValueAsListOfDefs("Resources");
- for (const Record *RUDef : ResUnits) {
+ for (const Record *RUDef : PRDef->getValueAsListOfDefs("Resources")) {
const Record *RU =
SchedModels.findProcResUnits(RUDef, ProcModel, PRDef->getLoc());
for (unsigned J = 0; J < RU->getValueAsInt("NumUnits"); ++J) {
@@ -842,8 +842,7 @@ void SubtargetEmitter::EmitProcessorResources(const CodeGenProcModel &ProcModel,
const unsigned SubUnitsBeginOffset = SubUnitsOffset;
int BufferSize = PRDef->getValueAsInt("BufferSize");
if (PRDef->isSubClassOf("ProcResGroup")) {
- RecVec ResUnits = PRDef->getValueAsListOfDefs("Resources");
- for (const Record *RU : ResUnits) {
+ for (const Record *RU : PRDef->getValueAsListOfDefs("Resources")) {
NumUnits += RU->getValueAsInt("NumUnits");
SubUnitsOffset += RU->getValueAsInt("NumUnits");
}
@@ -1028,7 +1027,7 @@ void SubtargetEmitter::ExpandProcResources(
for (const Record *PR : PM.ProcResourceDefs) {
if (PR == PRDef || !PR->isSubClassOf("ProcResGroup"))
continue;
- RecVec SuperResources = PR->getValueAsListOfDefs("Resources");
+ ConstRecVec SuperResources = PR->getValueAsListOfConstDefs("Resources");
ConstRecIter SubI = SubResources.begin(), SubE = SubResources.end();
for (; SubI != SubE; ++SubI) {
if (!is_contained(SuperResources, *SubI)) {
@@ -1105,16 +1104,18 @@ void SubtargetEmitter::GenSchedClassTables(const CodeGenProcModel &ProcModel,
if (RWDef) {
Writes.clear();
Reads.clear();
- SchedModels.findRWs(RWDef->getValueAsListOfDefs("OperandReadWrites"),
- Writes, Reads);
+ SchedModels.findRWs(
+ RWDef->getValueAsListOfConstDefs("OperandReadWrites"), Writes,
+ Reads);
}
}
if (Writes.empty()) {
// Check this processor's itinerary class resources.
for (const Record *I : ProcModel.ItinRWDefs) {
- RecVec Matched = I->getValueAsListOfDefs("MatchedItinClasses");
+ ConstRecVec Matched =
+ I->getValueAsListOfConstDefs("MatchedItinClasses");
if (is_contained(Matched, SC.ItinClassDef)) {
- SchedModels.findRWs(I->getValueAsListOfDefs("OperandReadWrites"),
+ SchedModels.findRWs(I->getValueAsListOfConstDefs("OperandReadWrites"),
Writes, Reads);
break;
}
@@ -1274,7 +1275,8 @@ void SubtargetEmitter::GenSchedClassTables(const CodeGenProcModel &ProcModel,
SCDesc.NumMicroOps = MCSchedClassDesc::InvalidNumMicroOps;
break;
}
- RecVec ValidWrites = ReadAdvance->getValueAsListOfDefs("ValidWrites");
+ ConstRecVec ValidWrites =
+ ReadAdvance->getValueAsListOfConstDefs("ValidWrites");
IdxVec WriteIDs;
if (ValidWrites.empty())
WriteIDs.push_back(0);
diff --git a/llvm/utils/TableGen/TableGenBackends.h b/llvm/utils/TableGen/TableGenBackends.h
index e0d12abaaa03..fc3b87370766 100644
--- a/llvm/utils/TableGen/TableGenBackends.h
+++ b/llvm/utils/TableGen/TableGenBackends.h
@@ -61,7 +61,7 @@ namespace llvm {
class raw_ostream;
class RecordKeeper;
-void EmitMapTable(RecordKeeper &RK, raw_ostream &OS);
+void EmitMapTable(const RecordKeeper &RK, raw_ostream &OS);
// Defined in DecoderEmitter.cpp
void EmitDecoder(RecordKeeper &RK, raw_ostream &OS,
diff --git a/llvm/utils/TableGen/WebAssemblyDisassemblerEmitter.cpp b/llvm/utils/TableGen/WebAssemblyDisassemblerEmitter.cpp
index e9436ab16e44..7373494e8b12 100644
--- a/llvm/utils/TableGen/WebAssemblyDisassemblerEmitter.cpp
+++ b/llvm/utils/TableGen/WebAssemblyDisassemblerEmitter.cpp
@@ -19,28 +19,23 @@
#include "llvm/Support/raw_ostream.h"
#include "llvm/TableGen/Record.h"
-namespace llvm {
-
static constexpr int WebAssemblyInstructionTableSize = 256;
-void emitWebAssemblyDisassemblerTables(
+void llvm::emitWebAssemblyDisassemblerTables(
raw_ostream &OS,
- const ArrayRef<const CodeGenInstruction *> &NumberedInstructions) {
+ ArrayRef<const CodeGenInstruction *> NumberedInstructions) {
// First lets organize all opcodes by (prefix) byte. Prefix 0 is the
// starting table.
std::map<unsigned,
std::map<unsigned, std::pair<unsigned, const CodeGenInstruction *>>>
OpcodeTable;
for (unsigned I = 0; I != NumberedInstructions.size(); ++I) {
- auto &CGI = *NumberedInstructions[I];
- auto &Def = *CGI.TheDef;
+ const CodeGenInstruction &CGI = *NumberedInstructions[I];
+ const Record &Def = *CGI.TheDef;
if (!Def.getValue("Inst"))
continue;
- auto &Inst = *Def.getValueAsBitsInit("Inst");
- RecordKeeper &RK = Inst.getRecordKeeper();
- unsigned Opc = static_cast<unsigned>(
- cast<IntInit>(Inst.convertInitializerTo(IntRecTy::get(RK)))
- ->getValue());
+ const BitsInit &Inst = *Def.getValueAsBitsInit("Inst");
+ unsigned Opc = static_cast<unsigned>(*Inst.convertInitializerToInt());
if (Opc == 0xFFFFFFFF)
continue; // No opcode defined.
assert(Opc <= 0xFFFFFF);
@@ -97,14 +92,14 @@ void emitWebAssemblyDisassemblerTables(
OS << "};\n\n";
std::vector<std::string> OperandTable, CurOperandList;
// Output one table per prefix.
- for (auto &PrefixPair : OpcodeTable) {
- if (PrefixPair.second.empty())
+ for (const auto &[Prefix, Table] : OpcodeTable) {
+ if (Table.empty())
continue;
- OS << "WebAssemblyInstruction InstructionTable" << PrefixPair.first;
+ OS << "WebAssemblyInstruction InstructionTable" << Prefix;
OS << "[] = {\n";
for (unsigned I = 0; I < WebAssemblyInstructionTableSize; I++) {
- auto InstIt = PrefixPair.second.find(I);
- if (InstIt != PrefixPair.second.end()) {
+ auto InstIt = Table.find(I);
+ if (InstIt != Table.end()) {
// Regular instruction.
assert(InstIt->second.second);
auto &CGI = *InstIt->second.second;
@@ -144,7 +139,7 @@ void emitWebAssemblyDisassemblerTables(
} else {
auto PrefixIt = OpcodeTable.find(I);
// If we have a non-empty table for it that's not 0, this is a prefix.
- if (PrefixIt != OpcodeTable.end() && I && !PrefixPair.first) {
+ if (PrefixIt != OpcodeTable.end() && I && !Prefix) {
OS << " { 0, ET_Prefix, 0, 0";
} else {
OS << " { 0, ET_Unused, 0, 0";
@@ -163,15 +158,11 @@ void emitWebAssemblyDisassemblerTables(
// Create a table of all extension tables:
OS << "struct { uint8_t Prefix; const WebAssemblyInstruction *Table; }\n";
OS << "PrefixTable[] = {\n";
- for (auto &PrefixPair : OpcodeTable) {
- if (PrefixPair.second.empty() || !PrefixPair.first)
+ for (const auto &[Prefix, Table] : OpcodeTable) {
+ if (Table.empty() || !Prefix)
continue;
- OS << " { " << PrefixPair.first << ", InstructionTable"
- << PrefixPair.first;
- OS << " },\n";
+ OS << " { " << Prefix << ", InstructionTable" << Prefix << " },\n";
}
OS << " { 0, nullptr }\n};\n\n";
OS << "} // end namespace llvm\n";
}
-
-} // namespace llvm
diff --git a/llvm/utils/TableGen/WebAssemblyDisassemblerEmitter.h b/llvm/utils/TableGen/WebAssemblyDisassemblerEmitter.h
index aba3a4bfd302..2d814cf0675a 100644
--- a/llvm/utils/TableGen/WebAssemblyDisassemblerEmitter.h
+++ b/llvm/utils/TableGen/WebAssemblyDisassemblerEmitter.h
@@ -22,8 +22,7 @@ class CodeGenInstruction;
class raw_ostream;
void emitWebAssemblyDisassemblerTables(
- raw_ostream &OS,
- const ArrayRef<const CodeGenInstruction *> &NumberedInstructions);
+ raw_ostream &OS, ArrayRef<const CodeGenInstruction *> NumberedInstructions);
} // namespace llvm
diff --git a/llvm/utils/TableGen/X86FoldTablesEmitter.cpp b/llvm/utils/TableGen/X86FoldTablesEmitter.cpp
index 8952c8e0a1c6..dfa10f74974c 100644
--- a/llvm/utils/TableGen/X86FoldTablesEmitter.cpp
+++ b/llvm/utils/TableGen/X86FoldTablesEmitter.cpp
@@ -63,8 +63,8 @@ static bool isExplicitUnalign(const CodeGenInstruction *Inst) {
}
class X86FoldTablesEmitter {
- RecordKeeper &Records;
- CodeGenTarget Target;
+ const RecordKeeper &Records;
+ const CodeGenTarget Target;
// Represents an entry in the folding table
class X86FoldTableEntry {
@@ -196,7 +196,7 @@ class X86FoldTablesEmitter {
FoldTable BroadcastTable4;
public:
- X86FoldTablesEmitter(RecordKeeper &R) : Records(R), Target(R) {}
+ X86FoldTablesEmitter(const RecordKeeper &R) : Records(R), Target(R) {}
// run - Generate the 6 X86 memory fold tables.
void run(raw_ostream &OS);
@@ -670,7 +670,7 @@ void X86FoldTablesEmitter::run(raw_ostream &OS) {
// added into memory fold tables.
auto RegInstsForBroadcast = RegInsts;
- Record *AsmWriter = Target.getAsmWriter();
+ const Record *AsmWriter = Target.getAsmWriter();
unsigned Variant = AsmWriter->getValueAsInt("Variant");
auto FixUp = [&](const CodeGenInstruction *RegInst) {
StringRef RegInstName = RegInst->TheDef->getName();
@@ -721,8 +721,8 @@ void X86FoldTablesEmitter::run(raw_ostream &OS) {
// Add the manually mapped instructions listed above.
for (const ManualMapEntry &Entry : ManualMapSet) {
- Record *RegInstIter = Records.getDef(Entry.RegInstStr);
- Record *MemInstIter = Records.getDef(Entry.MemInstStr);
+ const Record *RegInstIter = Records.getDef(Entry.RegInstStr);
+ const Record *MemInstIter = Records.getDef(Entry.MemInstStr);
updateTables(&(Target.getInstruction(RegInstIter)),
&(Target.getInstruction(MemInstIter)), Entry.Strategy, true);
diff --git a/llvm/utils/TableGen/X86InstrMappingEmitter.cpp b/llvm/utils/TableGen/X86InstrMappingEmitter.cpp
index 0abe353a9a57..f68c727cbe92 100644
--- a/llvm/utils/TableGen/X86InstrMappingEmitter.cpp
+++ b/llvm/utils/TableGen/X86InstrMappingEmitter.cpp
@@ -26,8 +26,8 @@ using namespace X86Disassembler;
namespace {
class X86InstrMappingEmitter {
- RecordKeeper &Records;
- CodeGenTarget Target;
+ const RecordKeeper &Records;
+ const CodeGenTarget Target;
// Hold all pontentially compressible EVEX instructions
std::vector<const CodeGenInstruction *> PreCompressionInsts;
@@ -44,7 +44,7 @@ class X86InstrMappingEmitter {
PredicateInstMap PredicateInsts;
public:
- X86InstrMappingEmitter(RecordKeeper &R) : Records(R), Target(R) {}
+ X86InstrMappingEmitter(const RecordKeeper &R) : Records(R), Target(R) {}
// run - Output X86 EVEX compression tables.
void run(raw_ostream &OS);
@@ -63,8 +63,8 @@ private:
void printClassDef(raw_ostream &OS);
// Prints the given table as a C++ array of type X86TableEntry under the guard
// \p Macro.
- void printTable(const std::vector<Entry> &Table, StringRef Name,
- StringRef Macro, raw_ostream &OS);
+ void printTable(ArrayRef<Entry> Table, StringRef Name, StringRef Macro,
+ raw_ostream &OS);
};
void X86InstrMappingEmitter::printClassDef(raw_ostream &OS) {
@@ -90,9 +90,8 @@ static void printMacroEnd(StringRef Macro, raw_ostream &OS) {
OS << "#endif // " << Macro << "\n\n";
}
-void X86InstrMappingEmitter::printTable(const std::vector<Entry> &Table,
- StringRef Name, StringRef Macro,
- raw_ostream &OS) {
+void X86InstrMappingEmitter::printTable(ArrayRef<Entry> Table, StringRef Name,
+ StringRef Macro, raw_ostream &OS) {
printMacroBegin(Macro, OS);
OS << "static const X86TableEntry " << Name << "[] = {\n";
@@ -220,7 +219,7 @@ void X86InstrMappingEmitter::emitCompressEVEXTable(
assert(NewRec && "Instruction not found!");
NewInst = &Target.getInstruction(NewRec);
} else if (Name.ends_with("_EVEX")) {
- if (auto *NewRec = Records.getDef(Name.drop_back(5)))
+ if (const auto *NewRec = Records.getDef(Name.drop_back(5)))
NewInst = &Target.getInstruction(NewRec);
} else if (Name.ends_with("_ND"))
// Leave it to ND2NONND table.
@@ -319,7 +318,7 @@ void X86InstrMappingEmitter::emitND2NonNDTable(
if (!isInteresting(Rec) || NoCompressSet.find(Name) != NoCompressSet.end())
continue;
if (ManualMap.find(Name) != ManualMap.end()) {
- auto *NewRec = Records.getDef(ManualMap.at(Rec->getName()));
+ const auto *NewRec = Records.getDef(ManualMap.at(Rec->getName()));
assert(NewRec && "Instruction not found!");
auto &NewInst = Target.getInstruction(NewRec);
Table.push_back(std::pair(Inst, &NewInst));
@@ -328,10 +327,10 @@ void X86InstrMappingEmitter::emitND2NonNDTable(
if (!Name.ends_with("_ND"))
continue;
- auto *NewRec = Records.getDef(Name.drop_back(3));
+ const auto *NewRec = Records.getDef(Name.drop_back(3));
if (!NewRec)
continue;
- auto &NewInst = Target.getInstruction(NewRec);
+ const auto &NewInst = Target.getInstruction(NewRec);
if (isRegisterOperand(NewInst.Operands[0].Rec))
Table.push_back(std::pair(Inst, &NewInst));
}
@@ -353,15 +352,15 @@ void X86InstrMappingEmitter::emitSSE2AVXTable(
if (!isInteresting(Rec))
continue;
if (ManualMap.find(Name) != ManualMap.end()) {
- auto *NewRec = Records.getDef(ManualMap.at(Rec->getName()));
+ const auto *NewRec = Records.getDef(ManualMap.at(Rec->getName()));
assert(NewRec && "Instruction not found!");
- auto &NewInst = Target.getInstruction(NewRec);
+ const auto &NewInst = Target.getInstruction(NewRec);
Table.push_back(std::pair(Inst, &NewInst));
continue;
}
std::string NewName = ("V" + Name).str();
- auto *AVXRec = Records.getDef(NewName);
+ const auto *AVXRec = Records.getDef(NewName);
if (!AVXRec)
continue;
auto &AVXInst = Target.getInstruction(AVXRec);
diff --git a/llvm/utils/TableGen/X86ManualInstrMapping.def b/llvm/utils/TableGen/X86ManualInstrMapping.def
index d76c404722b0..bc539d792f38 100644
--- a/llvm/utils/TableGen/X86ManualInstrMapping.def
+++ b/llvm/utils/TableGen/X86ManualInstrMapping.def
@@ -32,6 +32,7 @@ NOCOMP(VPSRAQZ128ri)
NOCOMP(VPSRAQZ128rm)
NOCOMP(VPSRAQZ128rr)
NOCOMP(VSCALEFPSZ128rm)
+NOCOMP(VMOVZPDILo2PDIZrr)
NOCOMP(VDBPSADBWZ256rmi)
NOCOMP(VDBPSADBWZ256rri)
NOCOMP(VPMAXSQZ256rm)
diff --git a/llvm/utils/TableGen/X86MnemonicTables.cpp b/llvm/utils/TableGen/X86MnemonicTables.cpp
index d9ceed40f7c7..ddbfb2af9869 100644
--- a/llvm/utils/TableGen/X86MnemonicTables.cpp
+++ b/llvm/utils/TableGen/X86MnemonicTables.cpp
@@ -22,10 +22,10 @@ using namespace llvm;
namespace {
class X86MnemonicTablesEmitter {
- CodeGenTarget Target;
+ const CodeGenTarget Target;
public:
- X86MnemonicTablesEmitter(RecordKeeper &R) : Target(R) {}
+ X86MnemonicTablesEmitter(const RecordKeeper &R) : Target(R) {}
// Output X86 mnemonic tables.
void run(raw_ostream &OS);
@@ -34,15 +34,13 @@ public:
void X86MnemonicTablesEmitter::run(raw_ostream &OS) {
emitSourceFileHeader("X86 Mnemonic tables", OS);
OS << "namespace llvm {\nnamespace X86 {\n\n";
- Record *AsmWriter = Target.getAsmWriter();
+ const Record *AsmWriter = Target.getAsmWriter();
unsigned Variant = AsmWriter->getValueAsInt("Variant");
// Hold all instructions grouped by mnemonic
StringMap<SmallVector<const CodeGenInstruction *, 0>> MnemonicToCGInstrMap;
- ArrayRef<const CodeGenInstruction *> NumberedInstructions =
- Target.getInstructionsByEnumValue();
- for (const CodeGenInstruction *I : NumberedInstructions) {
+ for (const CodeGenInstruction *I : Target.getInstructionsByEnumValue()) {
const Record *Def = I->TheDef;
// Filter non-X86 instructions.
if (!Def->isSubClassOf("X86Inst"))
diff --git a/llvm/utils/TableGen/X86RecognizableInstr.cpp b/llvm/utils/TableGen/X86RecognizableInstr.cpp
index a1e67e3ea692..4386e8361712 100644
--- a/llvm/utils/TableGen/X86RecognizableInstr.cpp
+++ b/llvm/utils/TableGen/X86RecognizableInstr.cpp
@@ -154,14 +154,13 @@ RecognizableInstr::RecognizableInstr(DisassemblerTables &tables,
UID(uid), Spec(&tables.specForUID(uid)) {
// Check for 64-bit inst which does not require REX
// FIXME: Is there some better way to check for In64BitMode?
- std::vector<Record *> Predicates = Rec->getValueAsListOfDefs("Predicates");
- for (unsigned i = 0, e = Predicates.size(); i != e; ++i) {
- if (Predicates[i]->getName().contains("Not64Bit") ||
- Predicates[i]->getName().contains("In32Bit")) {
+ for (const Record *Predicate : Rec->getValueAsListOfConstDefs("Predicates")) {
+ if (Predicate->getName().contains("Not64Bit") ||
+ Predicate->getName().contains("In32Bit")) {
Is32Bit = true;
break;
}
- if (Predicates[i]->getName().contains("In64Bit")) {
+ if (Predicate->getName().contains("In64Bit")) {
Is64Bit = true;
break;
}
diff --git a/llvm/utils/gn/secondary/clang-tools-extra/test/BUILD.gn b/llvm/utils/gn/secondary/clang-tools-extra/test/BUILD.gn
index 2227ad42cf40..4f9ba335859b 100644
--- a/llvm/utils/gn/secondary/clang-tools-extra/test/BUILD.gn
+++ b/llvm/utils/gn/secondary/clang-tools-extra/test/BUILD.gn
@@ -83,7 +83,6 @@ group("test") {
"//clang-tools-extra/unittests",
"//clang/lib/Headers",
"//clang/tools/c-index-test",
- "//clang/tools/clang-rename",
"//clang/tools/driver:symlinks",
"//llvm/tools/llvm-bcanalyzer",
"//llvm/utils/FileCheck",
diff --git a/llvm/utils/gn/secondary/clang/lib/Headers/BUILD.gn b/llvm/utils/gn/secondary/clang/lib/Headers/BUILD.gn
index 87a2e771dda6..cba7867854df 100644
--- a/llvm/utils/gn/secondary/clang/lib/Headers/BUILD.gn
+++ b/llvm/utils/gn/secondary/clang/lib/Headers/BUILD.gn
@@ -142,6 +142,7 @@ copy("Headers") {
"avx10_2_512satcvtintrin.h",
"avx10_2bf16intrin.h",
"avx10_2convertintrin.h",
+ "avx10_2copyintrin.h",
"avx10_2minmaxintrin.h",
"avx10_2niintrin.h",
"avx10_2satcvtdsintrin.h",
diff --git a/llvm/utils/gn/secondary/clang/test/BUILD.gn b/llvm/utils/gn/secondary/clang/test/BUILD.gn
index 1ec94a419f56..1d5b8025a12a 100644
--- a/llvm/utils/gn/secondary/clang/test/BUILD.gn
+++ b/llvm/utils/gn/secondary/clang/test/BUILD.gn
@@ -171,7 +171,6 @@ group("test") {
"//clang/tools/clang-installapi",
"//clang/tools/clang-offload-bundler",
"//clang/tools/clang-refactor",
- "//clang/tools/clang-rename",
"//clang/tools/clang-repl",
"//clang/tools/clang-scan-deps",
"//clang/tools/diagtool",
diff --git a/llvm/utils/gn/secondary/clang/tools/clang-rename/BUILD.gn b/llvm/utils/gn/secondary/clang/tools/clang-rename/BUILD.gn
deleted file mode 100644
index 1c517b989345..000000000000
--- a/llvm/utils/gn/secondary/clang/tools/clang-rename/BUILD.gn
+++ /dev/null
@@ -1,14 +0,0 @@
-executable("clang-rename") {
- configs += [ "//llvm/utils/gn/build:clang_code" ]
- deps = [
- "//clang/lib/Basic",
- "//clang/lib/Frontend",
- "//clang/lib/Rewrite",
- "//clang/lib/Tooling",
- "//clang/lib/Tooling/Core",
- "//clang/lib/Tooling/Refactoring",
- "//llvm/lib/Option",
- "//llvm/lib/Support",
- ]
- sources = [ "ClangRename.cpp" ]
-}
diff --git a/llvm/utils/gn/secondary/clang/unittests/BUILD.gn b/llvm/utils/gn/secondary/clang/unittests/BUILD.gn
index a6a4a5708341..4aa844ac5a3c 100644
--- a/llvm/utils/gn/secondary/clang/unittests/BUILD.gn
+++ b/llvm/utils/gn/secondary/clang/unittests/BUILD.gn
@@ -16,7 +16,6 @@ group("unittests") {
"InstallAPI:InstallAPITests",
"Interpreter:ClangReplInterpreterTests",
"Lex:LexTests",
- "Rename:ClangRenameTests",
"Rewrite:RewriteTests",
"Sema:SemaTests",
"Serialization:SerializationTests",
diff --git a/llvm/utils/gn/secondary/clang/unittests/Rename/BUILD.gn b/llvm/utils/gn/secondary/clang/unittests/Rename/BUILD.gn
deleted file mode 100644
index 10c922424186..000000000000
--- a/llvm/utils/gn/secondary/clang/unittests/Rename/BUILD.gn
+++ /dev/null
@@ -1,28 +0,0 @@
-import("//third-party/unittest/unittest.gni")
-
-unittest("ClangRenameTests") {
- configs += [ "//llvm/utils/gn/build:clang_code" ]
-
- # We'd like clang/unittests/Tooling/RewriterTestContext.h in the test.
- include_dirs = [ "../.." ]
-
- deps = [
- "//clang/lib/AST",
- "//clang/lib/ASTMatchers",
- "//clang/lib/Basic",
- "//clang/lib/Format",
- "//clang/lib/Frontend",
- "//clang/lib/Rewrite",
- "//clang/lib/Tooling",
- "//clang/lib/Tooling/Core",
- "//clang/lib/Tooling/Refactoring",
- "//llvm/lib/Support",
- ]
- sources = [
- "RenameAliasTest.cpp",
- "RenameClassTest.cpp",
- "RenameEnumTest.cpp",
- "RenameFunctionTest.cpp",
- "RenameMemberTest.cpp",
- ]
-}
diff --git a/llvm/utils/gn/secondary/compiler-rt/lib/builtins/BUILD.gn b/llvm/utils/gn/secondary/compiler-rt/lib/builtins/BUILD.gn
index 1e2c0cdc8630..2fd3b9a7dd3d 100644
--- a/llvm/utils/gn/secondary/compiler-rt/lib/builtins/BUILD.gn
+++ b/llvm/utils/gn/secondary/compiler-rt/lib/builtins/BUILD.gn
@@ -275,8 +275,8 @@ static_library("builtins") {
sources -= [ "fp_mode.c" ]
sources += [
"cpu_model/x86.c",
- "i386/fp_mode.c",
"extendbfsf2.c",
+ "i386/fp_mode.c",
"truncdfbf2.c",
"truncsfbf2.c",
]
diff --git a/llvm/utils/gn/secondary/llvm/include/llvm/Config/BUILD.gn b/llvm/utils/gn/secondary/llvm/include/llvm/Config/BUILD.gn
index 02532f63dd67..8a7bb4a27923 100644
--- a/llvm/utils/gn/secondary/llvm/include/llvm/Config/BUILD.gn
+++ b/llvm/utils/gn/secondary/llvm/include/llvm/Config/BUILD.gn
@@ -101,7 +101,6 @@ write_cmake_config("config") {
"HAVE_PTHREAD_SET_NAME_NP=",
"HAVE_SIGNAL_H=1",
"HAVE_SYS_STAT_H=1",
- "HAVE_SYS_TYPES_H=1",
"HAVE_VALGRIND_VALGRIND_H=",
"HAVE__ALLOCA=",
"HAVE___ALLOCA=",
@@ -228,7 +227,6 @@ write_cmake_config("config") {
"HAVE_SYSCONF=",
"HAVE_SYS_IOCTL_H=",
"HAVE_SYS_MMAN_H=",
- "HAVE_SYS_PARAM_H=",
"HAVE_SYS_RESOURCE_H=",
"HAVE_SYS_TIME_H=",
"HAVE_TERMIOS_H=",
@@ -264,7 +262,6 @@ write_cmake_config("config") {
"HAVE_SYSCONF=1",
"HAVE_SYS_IOCTL_H=1",
"HAVE_SYS_MMAN_H=1",
- "HAVE_SYS_PARAM_H=1",
"HAVE_SYS_RESOURCE_H=1",
"HAVE_SYS_TIME_H=1",
"HAVE_TERMIOS_H=1",
diff --git a/llvm/utils/gn/secondary/llvm/lib/Transforms/Vectorize/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/Transforms/Vectorize/BUILD.gn
index 0900872d4449..853cf341e284 100644
--- a/llvm/utils/gn/secondary/llvm/lib/Transforms/Vectorize/BUILD.gn
+++ b/llvm/utils/gn/secondary/llvm/lib/Transforms/Vectorize/BUILD.gn
@@ -15,6 +15,7 @@ static_library("Vectorize") {
"SLPVectorizer.cpp",
"SandboxVectorizer/DependencyGraph.cpp",
"SandboxVectorizer/Passes/BottomUpVec.cpp",
+ "SandboxVectorizer/Region.cpp",
"SandboxVectorizer/SandboxVectorizer.cpp",
"VPlan.cpp",
"VPlanAnalysis.cpp",
diff --git a/llvm/utils/gn/secondary/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/BUILD.gn b/llvm/utils/gn/secondary/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/BUILD.gn
index f9f519cf65da..a91cb838c5e2 100644
--- a/llvm/utils/gn/secondary/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/BUILD.gn
+++ b/llvm/utils/gn/secondary/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/BUILD.gn
@@ -9,5 +9,9 @@ unittest("SandboxVectorizerTests") {
"//llvm/lib/TargetParser",
"//llvm/lib/Transforms/Vectorize",
]
- sources = [ "DependencyGraphTest.cpp" ]
+ sources = [
+ "DependencyGraphTest.cpp",
+ "LegalityTest.cpp",
+ "RegionTest.cpp",
+ ]
}
diff --git a/llvm/utils/yaml-bench/YAMLBench.cpp b/llvm/utils/yaml-bench/YAMLBench.cpp
index 50e55538a011..4dc6caeb6fdb 100644
--- a/llvm/utils/yaml-bench/YAMLBench.cpp
+++ b/llvm/utils/yaml-bench/YAMLBench.cpp
@@ -56,17 +56,6 @@ cl::opt<cl::boolOrDefault>
UseColor("use-color", cl::desc("Emit colored output (default=autodetect)"),
cl::init(cl::BOU_UNSET));
-struct indent {
- unsigned distance;
- indent(unsigned d) : distance(d) {}
-};
-
-static raw_ostream &operator <<(raw_ostream &os, const indent &in) {
- for (unsigned i = 0; i < in.distance; ++i)
- os << " ";
- return os;
-}
-
/// Pretty print a tag by replacing tag:yaml.org,2002: with !!.
static std::string prettyTag(yaml::Node *N) {
std::string Tag = N->getVerbatimTag();
diff --git a/mlir/include/mlir-c/Pass.h b/mlir/include/mlir-c/Pass.h
index 35db138305d1..2218ec0f47d1 100644
--- a/mlir/include/mlir-c/Pass.h
+++ b/mlir/include/mlir-c/Pass.h
@@ -74,9 +74,11 @@ mlirPassManagerGetAsOpPassManager(MlirPassManager passManager);
MLIR_CAPI_EXPORTED MlirLogicalResult
mlirPassManagerRunOnOp(MlirPassManager passManager, MlirOperation op);
-/// Enable mlir-print-ir-after-all.
-MLIR_CAPI_EXPORTED void
-mlirPassManagerEnableIRPrinting(MlirPassManager passManager);
+/// Enable IR printing.
+MLIR_CAPI_EXPORTED void mlirPassManagerEnableIRPrinting(
+ MlirPassManager passManager, bool printBeforeAll, bool printAfterAll,
+ bool printModuleScope, bool printAfterOnlyOnChange,
+ bool printAfterOnlyOnFailure);
/// Enable / disable verify-each.
MLIR_CAPI_EXPORTED void
diff --git a/mlir/include/mlir/Dialect/GPU/Transforms/Passes.h b/mlir/include/mlir/Dialect/GPU/Transforms/Passes.h
index 67baa8777a6f..8eb711962583 100644
--- a/mlir/include/mlir/Dialect/GPU/Transforms/Passes.h
+++ b/mlir/include/mlir/Dialect/GPU/Transforms/Passes.h
@@ -73,10 +73,20 @@ void populateGpuBreakDownSubgroupReducePatterns(
/// Collect a set of patterns to lower `gpu.subgroup_reduce` into `gpu.shuffle`
/// ops over `shuffleBitwidth` scalar types. Assumes that the subgroup has
/// `subgroupSize` lanes. Uses the butterfly shuffle algorithm.
+///
+/// The patterns populated by this function will ignore ops with the
+/// `cluster_size` attribute.
+/// `populateGpuLowerClusteredSubgroupReduceToShufflePatterns` is the opposite.
void populateGpuLowerSubgroupReduceToShufflePatterns(
RewritePatternSet &patterns, unsigned subgroupSize,
unsigned shuffleBitwidth = 32, PatternBenefit benefit = 1);
+/// Disjoint counterpart of `populateGpuLowerSubgroupReduceToShufflePatterns`
+/// that only matches `gpu.subgroup_reduce` ops with a `cluster_size`.
+void populateGpuLowerClusteredSubgroupReduceToShufflePatterns(
+ RewritePatternSet &patterns, unsigned subgroupSize,
+ unsigned shuffleBitwidth = 32, PatternBenefit benefit = 1);
+
/// Collect all patterns to rewrite ops within the GPU dialect.
inline void populateGpuRewritePatterns(RewritePatternSet &patterns) {
populateGpuAllReducePatterns(patterns);
diff --git a/mlir/lib/Bindings/Python/Pass.cpp b/mlir/lib/Bindings/Python/Pass.cpp
index a68421b61641..1d0e5ce2115a 100644
--- a/mlir/lib/Bindings/Python/Pass.cpp
+++ b/mlir/lib/Bindings/Python/Pass.cpp
@@ -74,10 +74,17 @@ void mlir::python::populatePassManagerSubmodule(py::module &m) {
"Releases (leaks) the backing pass manager (testing)")
.def(
"enable_ir_printing",
- [](PyPassManager &passManager) {
- mlirPassManagerEnableIRPrinting(passManager.get());
+ [](PyPassManager &passManager, bool printBeforeAll,
+ bool printAfterAll, bool printModuleScope, bool printAfterChange,
+ bool printAfterFailure) {
+ mlirPassManagerEnableIRPrinting(
+ passManager.get(), printBeforeAll, printAfterAll,
+ printModuleScope, printAfterChange, printAfterFailure);
},
- "Enable mlir-print-ir-after-all.")
+ "print_before_all"_a = false, "print_after_all"_a = true,
+ "print_module_scope"_a = false, "print_after_change"_a = false,
+ "print_after_failure"_a = false,
+ "Enable IR printing, default as mlir-print-ir-after-all.")
.def(
"enable_verifier",
[](PyPassManager &passManager, bool enable) {
diff --git a/mlir/lib/CAPI/IR/Pass.cpp b/mlir/lib/CAPI/IR/Pass.cpp
index d242baae99c0..a6c9fbd08d45 100644
--- a/mlir/lib/CAPI/IR/Pass.cpp
+++ b/mlir/lib/CAPI/IR/Pass.cpp
@@ -44,8 +44,21 @@ MlirLogicalResult mlirPassManagerRunOnOp(MlirPassManager passManager,
return wrap(unwrap(passManager)->run(unwrap(op)));
}
-void mlirPassManagerEnableIRPrinting(MlirPassManager passManager) {
- return unwrap(passManager)->enableIRPrinting();
+void mlirPassManagerEnableIRPrinting(MlirPassManager passManager,
+ bool printBeforeAll, bool printAfterAll,
+ bool printModuleScope,
+ bool printAfterOnlyOnChange,
+ bool printAfterOnlyOnFailure) {
+ auto shouldPrintBeforePass = [printBeforeAll](Pass *, Operation *) {
+ return printBeforeAll;
+ };
+ auto shouldPrintAfterPass = [printAfterAll](Pass *, Operation *) {
+ return printAfterAll;
+ };
+ return unwrap(passManager)
+ ->enableIRPrinting(shouldPrintBeforePass, shouldPrintAfterPass,
+ printModuleScope, printAfterOnlyOnChange,
+ printAfterOnlyOnFailure);
}
void mlirPassManagerEnableVerifier(MlirPassManager passManager, bool enable) {
diff --git a/mlir/lib/Dialect/GPU/Transforms/SubgroupReduceLowering.cpp b/mlir/lib/Dialect/GPU/Transforms/SubgroupReduceLowering.cpp
index b166f1cd469a..185f824351a2 100644
--- a/mlir/lib/Dialect/GPU/Transforms/SubgroupReduceLowering.cpp
+++ b/mlir/lib/Dialect/GPU/Transforms/SubgroupReduceLowering.cpp
@@ -210,13 +210,21 @@ Value createSubgroupShuffleReduction(OpBuilder &builder, Location loc,
struct ScalarSubgroupReduceToShuffles final
: OpRewritePattern<gpu::SubgroupReduceOp> {
ScalarSubgroupReduceToShuffles(MLIRContext *ctx, unsigned subgroupSize,
- unsigned shuffleBitwidth,
+ unsigned shuffleBitwidth, bool matchClustered,
PatternBenefit benefit)
: OpRewritePattern(ctx, benefit), subgroupSize(subgroupSize),
- shuffleBitwidth(shuffleBitwidth) {}
+ shuffleBitwidth(shuffleBitwidth), matchClustered(matchClustered) {}
LogicalResult matchAndRewrite(gpu::SubgroupReduceOp op,
PatternRewriter &rewriter) const override {
+ if (op.getClusterSize().has_value() != matchClustered) {
+ return rewriter.notifyMatchFailure(
+ op, llvm::formatv("op is {0}clustered but pattern is configured to "
+ "only match {1}clustered ops",
+ matchClustered ? "non-" : "",
+ matchClustered ? "" : "non-"));
+ }
+
auto ci = getAndValidateClusterInfo(op, subgroupSize);
if (failed(ci))
return failure();
@@ -262,19 +270,28 @@ struct ScalarSubgroupReduceToShuffles final
private:
unsigned subgroupSize = 0;
unsigned shuffleBitwidth = 0;
+ bool matchClustered = false;
};
/// Lowers vector gpu subgroup reductions to a series of shuffles.
struct VectorSubgroupReduceToShuffles final
: OpRewritePattern<gpu::SubgroupReduceOp> {
VectorSubgroupReduceToShuffles(MLIRContext *ctx, unsigned subgroupSize,
- unsigned shuffleBitwidth,
+ unsigned shuffleBitwidth, bool matchClustered,
PatternBenefit benefit)
: OpRewritePattern(ctx, benefit), subgroupSize(subgroupSize),
- shuffleBitwidth(shuffleBitwidth) {}
+ shuffleBitwidth(shuffleBitwidth), matchClustered(matchClustered) {}
LogicalResult matchAndRewrite(gpu::SubgroupReduceOp op,
PatternRewriter &rewriter) const override {
+ if (op.getClusterSize().has_value() != matchClustered) {
+ return rewriter.notifyMatchFailure(
+ op, llvm::formatv("op is {0}clustered but pattern is configured to "
+ "only match {1}clustered ops",
+ matchClustered ? "non-" : "",
+ matchClustered ? "" : "non-"));
+ }
+
auto ci = getAndValidateClusterInfo(op, subgroupSize);
if (failed(ci))
return failure();
@@ -343,6 +360,7 @@ struct VectorSubgroupReduceToShuffles final
private:
unsigned subgroupSize = 0;
unsigned shuffleBitwidth = 0;
+ bool matchClustered = false;
};
} // namespace
@@ -358,5 +376,14 @@ void mlir::populateGpuLowerSubgroupReduceToShufflePatterns(
RewritePatternSet &patterns, unsigned subgroupSize,
unsigned shuffleBitwidth, PatternBenefit benefit) {
patterns.add<ScalarSubgroupReduceToShuffles, VectorSubgroupReduceToShuffles>(
- patterns.getContext(), subgroupSize, shuffleBitwidth, benefit);
+ patterns.getContext(), subgroupSize, shuffleBitwidth,
+ /*matchClustered=*/false, benefit);
+}
+
+void mlir::populateGpuLowerClusteredSubgroupReduceToShufflePatterns(
+ RewritePatternSet &patterns, unsigned subgroupSize,
+ unsigned shuffleBitwidth, PatternBenefit benefit) {
+ patterns.add<ScalarSubgroupReduceToShuffles, VectorSubgroupReduceToShuffles>(
+ patterns.getContext(), subgroupSize, shuffleBitwidth,
+ /*matchClustered=*/true, benefit);
}
diff --git a/mlir/lib/IR/AsmPrinter.cpp b/mlir/lib/IR/AsmPrinter.cpp
index c7ed158aabb6..32182c083a8a 100644
--- a/mlir/lib/IR/AsmPrinter.cpp
+++ b/mlir/lib/IR/AsmPrinter.cpp
@@ -584,9 +584,8 @@ private:
struct InProgressAliasInfo {
InProgressAliasInfo()
: aliasDepth(0), isType(false), canBeDeferred(false) {}
- InProgressAliasInfo(StringRef alias, bool isType, bool canBeDeferred)
- : alias(alias), aliasDepth(1), isType(isType),
- canBeDeferred(canBeDeferred) {}
+ InProgressAliasInfo(StringRef alias)
+ : alias(alias), aliasDepth(1), isType(false), canBeDeferred(false) {}
bool operator<(const InProgressAliasInfo &rhs) const {
// Order first by depth, then by attr/type kind, and then by name.
@@ -1096,6 +1095,8 @@ std::pair<size_t, size_t> AliasInitializer::visitImpl(
// Try to generate an alias for this value.
generateAlias(value, it->second, canBeDeferred);
+ it->second.isType = std::is_base_of_v<Type, T>;
+ it->second.canBeDeferred = canBeDeferred;
// Print the value, capturing any nested elements that require aliases.
SmallVector<size_t> childAliases;
@@ -1153,8 +1154,7 @@ void AliasInitializer::generateAlias(T symbol, InProgressAliasInfo &alias,
sanitizeIdentifier(nameBuffer, tempBuffer, /*allowedPunctChars=*/"$_-",
/*allowTrailingDigit=*/false);
name = name.copy(aliasAllocator);
- alias = InProgressAliasInfo(name, /*isType=*/std::is_base_of_v<Type, T>,
- canBeDeferred);
+ alias = InProgressAliasInfo(name);
}
//===----------------------------------------------------------------------===//
diff --git a/mlir/python/mlir/_mlir_libs/_mlir/passmanager.pyi b/mlir/python/mlir/_mlir_libs/_mlir/passmanager.pyi
index c072d5e0fb86..5d115e8222d7 100644
--- a/mlir/python/mlir/_mlir_libs/_mlir/passmanager.pyi
+++ b/mlir/python/mlir/_mlir_libs/_mlir/passmanager.pyi
@@ -16,7 +16,14 @@ class PassManager:
def __init__(self, context: Optional[_ir.Context] = None) -> None: ...
def _CAPICreate(self) -> object: ...
def _testing_release(self) -> None: ...
- def enable_ir_printing(self) -> None: ...
+ def enable_ir_printing(
+ self,
+ print_before_all: bool = False,
+ print_after_all: bool = True,
+ print_module_scope: bool = False,
+ print_after_change: bool = False,
+ print_after_failure: bool = False,
+ ) -> None: ...
def enable_verifier(self, enable: bool) -> None: ...
@staticmethod
def parse(pipeline: str, context: Optional[_ir.Context] = None) -> PassManager: ...
diff --git a/mlir/test/IR/print-attr-type-aliases.mlir b/mlir/test/IR/print-attr-type-aliases.mlir
index 27c5a75addbb..e878d862076c 100644
--- a/mlir/test/IR/print-attr-type-aliases.mlir
+++ b/mlir/test/IR/print-attr-type-aliases.mlir
@@ -1,6 +1,6 @@
-// RUN: mlir-opt %s -split-input-file | FileCheck %s
+// RUN: mlir-opt %s -split-input-file -mlir-print-debuginfo | FileCheck %s
// Verify printer of type & attr aliases.
-// RUN: mlir-opt %s -split-input-file | mlir-opt -split-input-file | FileCheck %s
+// RUN: mlir-opt %s -split-input-file -mlir-print-debuginfo | mlir-opt -split-input-file -mlir-print-debuginfo | FileCheck %s
// CHECK-DAG: #test2Ealias = "alias_test:dot_in_name"
"test.op"() {alias_test = "alias_test:dot_in_name"} : () -> ()
@@ -32,16 +32,16 @@
// CHECK-DAG: tensor<32x!test_ui8_>
"test.op"() : () -> tensor<32x!test.int<unsigned, 8>>
-// CHECK-DAG: #loc = loc("nested")
-// CHECK-DAG: #loc1 = loc("test.mlir":10:8)
-// CHECK-DAG: #loc2 = loc(fused<#loc>[#loc1])
+// CHECK-DAG: #[[LOC_NESTED:.+]] = loc("nested")
+// CHECK-DAG: #[[LOC_RAW:.+]] = loc("test.mlir":10:8)
+// CHECK-DAG: = loc(fused<#[[LOC_NESTED]]>[#[[LOC_RAW]]])
"test.op"() {alias_test = loc(fused<loc("nested")>["test.mlir":10:8])} : () -> ()
// -----
// Check proper ordering of intermixed attribute/type aliases.
// CHECK: !tuple = tuple<
-// CHECK: #loc1 = loc(fused<!tuple
+// CHECK: = loc(fused<!tuple
"test.op"() {alias_test = loc(fused<tuple<i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32>>["test.mlir":10:8])} : () -> ()
// -----
@@ -54,7 +54,7 @@
// -----
// Check that we don't print aliases for things that aren't printed.
-// CHECK: #loc1 = loc(fused<memref<1xi32>
+// CHECK: = loc(fused<memref<1xi32>
// CHECK-NOT: #map
"test.op"() {alias_test = loc(fused<memref<1xi32, affine_map<(d0) -> (d0)>>>["test.mlir":10:8])} : () -> ()
@@ -71,3 +71,16 @@
"test.op"() {attr = #test.conditional_alias<#unalias_me>} : () -> ()
// CHECK-NEXT: #test.conditional_alias<#test2Ealias>
"test.op"() {attr = #test.conditional_alias<#keep_aliased>} : () -> ()
+
+// -----
+
+// Check that a deferred no_alias attr can be un-deferred.
+
+#keep_aliased = "alias_test:dot_in_name"
+#cond_alias = #test.conditional_alias<#keep_aliased>
+#no_alias = loc(fused<#cond_alias>["test.mlir":1:1])
+
+// CHECK: #[[TEST_ALIAS:.+]] = "alias_test:dot_in_name"
+// CHECK: fused<#test.conditional_alias<#[[TEST_ALIAS]]>
+// CHECK: "test.op"
+"test.op"() {attr = #no_alias} : () -> () loc(fused<#no_alias>["test.mlir":0:0])
diff --git a/mlir/test/lib/Dialect/GPU/TestGpuRewrite.cpp b/mlir/test/lib/Dialect/GPU/TestGpuRewrite.cpp
index 99a914506b01..74d057c0b7b6 100644
--- a/mlir/test/lib/Dialect/GPU/TestGpuRewrite.cpp
+++ b/mlir/test/lib/Dialect/GPU/TestGpuRewrite.cpp
@@ -78,9 +78,12 @@ struct TestGpuSubgroupReduceLoweringPass
populateGpuBreakDownSubgroupReducePatterns(patterns,
/*maxShuffleBitwidth=*/32,
PatternBenefit(2));
- if (expandToShuffles)
+ if (expandToShuffles) {
populateGpuLowerSubgroupReduceToShufflePatterns(
patterns, /*subgroupSize=*/32, /*shuffleBitwidth=*/32);
+ populateGpuLowerClusteredSubgroupReduceToShufflePatterns(
+ patterns, /*subgroupSize=*/32, /*shuffleBitwidth=*/32);
+ }
(void)applyPatternsAndFoldGreedily(getOperation(), std::move(patterns));
}
diff --git a/mlir/test/python/execution_engine.py b/mlir/test/python/execution_engine.py
index 1cdda63eefe3..7c375ce81de0 100644
--- a/mlir/test/python/execution_engine.py
+++ b/mlir/test/python/execution_engine.py
@@ -1,4 +1,4 @@
-# RUN: %PYTHON %s 2>&1 | FileCheck %s
+# RUN: env MLIR_RUNNER_UTILS=%mlir_runner_utils MLIR_C_RUNNER_UTILS=%mlir_c_runner_utils %PYTHON %s 2>&1 | FileCheck %s
# REQUIRES: host-supports-jit
import gc, sys, os, tempfile
from mlir.ir import *
@@ -7,6 +7,12 @@ from mlir.execution_engine import *
from mlir.runtime import *
from ml_dtypes import bfloat16, float8_e5m2
+MLIR_RUNNER_UTILS = os.getenv(
+ "MLIR_RUNNER_UTILS", "../../../../lib/libmlir_runner_utils.so"
+)
+MLIR_C_RUNNER_UTILS = os.getenv(
+ "MLIR_C_RUNNER_UTILS", "../../../../lib/libmlir_c_runner_utils.so"
+)
# Log everything to stderr and flush so that we have a unified stream to match
# errors/info emitted by MLIR to stderr.
@@ -700,8 +706,8 @@ def testSharedLibLoad():
]
else:
shared_libs = [
- "../../../../lib/libmlir_runner_utils.so",
- "../../../../lib/libmlir_c_runner_utils.so",
+ MLIR_RUNNER_UTILS,
+ MLIR_C_RUNNER_UTILS,
]
execution_engine = ExecutionEngine(
@@ -743,8 +749,8 @@ def testNanoTime():
]
else:
shared_libs = [
- "../../../../lib/libmlir_runner_utils.so",
- "../../../../lib/libmlir_c_runner_utils.so",
+ MLIR_RUNNER_UTILS,
+ MLIR_C_RUNNER_UTILS,
]
execution_engine = ExecutionEngine(
diff --git a/mlir/test/python/pass_manager.py b/mlir/test/python/pass_manager.py
index 43af80b53166..749670325623 100644
--- a/mlir/test/python/pass_manager.py
+++ b/mlir/test/python/pass_manager.py
@@ -300,14 +300,40 @@ def testPrintIrAfterAll():
pm = PassManager.parse("builtin.module(canonicalize)")
ctx.enable_multithreading(False)
pm.enable_ir_printing()
- # CHECK: // -----// IR Dump Before Canonicalizer (canonicalize) ('builtin.module' operation) //----- //
+ # CHECK: // -----// IR Dump After Canonicalizer (canonicalize) //----- //
+ # CHECK: module {
+ # CHECK: func.func @main() {
+ # CHECK: return
+ # CHECK: }
+ # CHECK: }
+ pm.run(module)
+
+
+# CHECK-LABEL: TEST: testPrintIrBeforeAndAfterAll
+@run
+def testPrintIrBeforeAndAfterAll():
+ with Context() as ctx:
+ module = ModuleOp.parse(
+ """
+ module {
+ func.func @main() {
+ %0 = arith.constant 10
+ return
+ }
+ }
+ """
+ )
+ pm = PassManager.parse("builtin.module(canonicalize)")
+ ctx.enable_multithreading(False)
+ pm.enable_ir_printing(print_before_all=True, print_after_all=True)
+ # CHECK: // -----// IR Dump Before Canonicalizer (canonicalize) //----- //
# CHECK: module {
# CHECK: func.func @main() {
# CHECK: %[[C10:.*]] = arith.constant 10 : i64
# CHECK: return
# CHECK: }
# CHECK: }
- # CHECK: // -----// IR Dump After Canonicalizer (canonicalize) ('builtin.module' operation) //----- //
+ # CHECK: // -----// IR Dump After Canonicalizer (canonicalize) //----- //
# CHECK: module {
# CHECK: func.func @main() {
# CHECK: return
diff --git a/mlir/test/tblgen-to-irdl/CMathDialect.td b/mlir/test/tblgen-to-irdl/CMathDialect.td
index 454543e074c4..abda7ca41e9d 100644
--- a/mlir/test/tblgen-to-irdl/CMathDialect.td
+++ b/mlir/test/tblgen-to-irdl/CMathDialect.td
@@ -19,12 +19,14 @@ class CMath_Op<string mnemonic, list<Trait> traits = []>
def f32Orf64Type : Or<[CPred<"::llvm::isa<::mlir::F32>">,
CPred<"::llvm::isa<::mlir::F64>">]>;
+// CHECK: irdl.type @"!complex"
def CMath_ComplexType : CMath_Type<"ComplexType", "complex"> {
let parameters = (ins f32Orf64Type:$elementType);
+ let assemblyFormat = "`<` $elementType `>`";
}
// CHECK: irdl.operation @identity {
-// CHECK-NEXT: %0 = irdl.base "!cmath.complex"
+// CHECK-NEXT: %0 = irdl.base @cmath::@"!complex"
// CHECK-NEXT: irdl.results(%0)
// CHECK-NEXT: }
def CMath_IdentityOp : CMath_Op<"identity"> {
@@ -32,9 +34,9 @@ def CMath_IdentityOp : CMath_Op<"identity"> {
}
// CHECK: irdl.operation @mul {
-// CHECK-NEXT: %0 = irdl.base "!cmath.complex"
-// CHECK-NEXT: %1 = irdl.base "!cmath.complex"
-// CHECK-NEXT: %2 = irdl.base "!cmath.complex"
+// CHECK-NEXT: %0 = irdl.base @cmath::@"!complex"
+// CHECK-NEXT: %1 = irdl.base @cmath::@"!complex"
+// CHECK-NEXT: %2 = irdl.base @cmath::@"!complex"
// CHECK-NEXT: irdl.operands(%0, %1)
// CHECK-NEXT: irdl.results(%2)
// CHECK-NEXT: }
@@ -45,7 +47,7 @@ def CMath_MulOp : CMath_Op<"mul"> {
// CHECK: irdl.operation @norm {
// CHECK-NEXT: %0 = irdl.any
-// CHECK-NEXT: %1 = irdl.base "!cmath.complex"
+// CHECK-NEXT: %1 = irdl.base @cmath::@"!complex"
// CHECK-NEXT: irdl.operands(%0)
// CHECK-NEXT: irdl.results(%1)
// CHECK-NEXT: }
diff --git a/mlir/test/tblgen-to-irdl/TestDialect.td b/mlir/test/tblgen-to-irdl/TestDialect.td
index 2622c8177607..4fea3d8576e9 100644
--- a/mlir/test/tblgen-to-irdl/TestDialect.td
+++ b/mlir/test/tblgen-to-irdl/TestDialect.td
@@ -16,8 +16,11 @@ class Test_Type<string name, string typeMnemonic, list<Trait> traits = []>
class Test_Op<string mnemonic, list<Trait> traits = []>
: Op<Test_Dialect, mnemonic, traits>;
+// CHECK: irdl.type @"!singleton_a"
def Test_SingletonAType : Test_Type<"SingletonAType", "singleton_a"> {}
+// CHECK: irdl.type @"!singleton_b"
def Test_SingletonBType : Test_Type<"SingletonBType", "singleton_b"> {}
+// CHECK: irdl.type @"!singleton_c"
def Test_SingletonCType : Test_Type<"SingletonCType", "singleton_c"> {}
@@ -26,7 +29,7 @@ def Test_AndOp : Test_Op<"and"> {
let arguments = (ins AllOfType<[Test_SingletonAType, AnyType]>:$in);
}
// CHECK-LABEL: irdl.operation @and {
-// CHECK-NEXT: %[[v0:[^ ]*]] = irdl.base "!test.singleton_a"
+// CHECK-NEXT: %[[v0:[^ ]*]] = irdl.base @test::@"!singleton_a"
// CHECK-NEXT: %[[v1:[^ ]*]] = irdl.any
// CHECK-NEXT: %[[v2:[^ ]*]] = irdl.all_of(%[[v0]], %[[v1]])
// CHECK-NEXT: irdl.operands(%[[v2]])
@@ -79,9 +82,9 @@ def Test_OrOp : Test_Op<"or"> {
let arguments = (ins AnyTypeOf<[Test_SingletonAType, Test_SingletonBType, Test_SingletonCType]>:$in);
}
// CHECK-LABEL: irdl.operation @or {
-// CHECK-NEXT: %[[v0:[^ ]*]] = irdl.base "!test.singleton_a"
-// CHECK-NEXT: %[[v1:[^ ]*]] = irdl.base "!test.singleton_b"
-// CHECK-NEXT: %[[v2:[^ ]*]] = irdl.base "!test.singleton_c"
+// CHECK-NEXT: %[[v0:[^ ]*]] = irdl.base @test::@"!singleton_a"
+// CHECK-NEXT: %[[v1:[^ ]*]] = irdl.base @test::@"!singleton_b"
+// CHECK-NEXT: %[[v2:[^ ]*]] = irdl.base @test::@"!singleton_c"
// CHECK-NEXT: %[[v3:[^ ]*]] = irdl.any_of(%[[v0]], %[[v1]], %[[v2]])
// CHECK-NEXT: irdl.operands(%[[v3]])
// CHECK-NEXT: }
@@ -114,8 +117,8 @@ def Test_VariadicityOp : Test_Op<"variadicity"> {
Test_SingletonCType:$required);
}
// CHECK-LABEL: irdl.operation @variadicity {
-// CHECK-NEXT: %[[v0:[^ ]*]] = irdl.base "!test.singleton_a"
-// CHECK-NEXT: %[[v1:[^ ]*]] = irdl.base "!test.singleton_b"
-// CHECK-NEXT: %[[v2:[^ ]*]] = irdl.base "!test.singleton_c"
+// CHECK-NEXT: %[[v0:[^ ]*]] = irdl.base @test::@"!singleton_a"
+// CHECK-NEXT: %[[v1:[^ ]*]] = irdl.base @test::@"!singleton_b"
+// CHECK-NEXT: %[[v2:[^ ]*]] = irdl.base @test::@"!singleton_c"
// CHECK-NEXT: irdl.operands(variadic %[[v0]], optional %[[v1]], %[[v2]])
// CHECK-NEXT: }
diff --git a/mlir/tools/tblgen-to-irdl/OpDefinitionsGen.cpp b/mlir/tools/tblgen-to-irdl/OpDefinitionsGen.cpp
index dd0d98de496e..45957bafc378 100644
--- a/mlir/tools/tblgen-to-irdl/OpDefinitionsGen.cpp
+++ b/mlir/tools/tblgen-to-irdl/OpDefinitionsGen.cpp
@@ -177,6 +177,15 @@ Value createConstraint(OpBuilder &builder, tblgen::Constraint constraint) {
}
if (predRec.isSubClassOf("TypeDef")) {
+ auto dialect = predRec.getValueAsDef("dialect")->getValueAsString("name");
+ if (dialect == selectedDialect) {
+ std::string combined = ("!" + predRec.getValueAsString("mnemonic")).str();
+ SmallVector<FlatSymbolRefAttr> nested = {
+ SymbolRefAttr::get(ctx, combined)};
+ auto typeSymbol = SymbolRefAttr::get(ctx, dialect, nested);
+ auto op = builder.create<irdl::BaseOp>(UnknownLoc::get(ctx), typeSymbol);
+ return op.getOutput();
+ }
std::string typeName = ("!" + predRec.getValueAsString("typeName")).str();
auto op = builder.create<irdl::BaseOp>(UnknownLoc::get(ctx),
StringAttr::get(ctx, typeName));
@@ -250,6 +259,12 @@ static StringRef getOperatorName(tblgen::Operator &tblgenOp) {
return opName;
}
+/// Returns the name of the type without the dialect prefix.
+static StringRef getTypeName(tblgen::TypeDef &tblgenType) {
+ StringRef opName = tblgenType.getDef()->getValueAsString("mnemonic");
+ return opName;
+}
+
/// Extract an operation to IRDL.
irdl::OperationOp createIRDLOperation(OpBuilder &builder,
tblgen::Operator &tblgenOp) {
@@ -300,6 +315,19 @@ irdl::OperationOp createIRDLOperation(OpBuilder &builder,
return op;
}
+irdl::TypeOp createIRDLType(OpBuilder &builder, tblgen::TypeDef &tblgenType) {
+ MLIRContext *ctx = builder.getContext();
+ StringRef typeName = getTypeName(tblgenType);
+ std::string combined = ("!" + typeName).str();
+
+ irdl::TypeOp op = builder.create<irdl::TypeOp>(
+ UnknownLoc::get(ctx), StringAttr::get(ctx, combined));
+
+ op.getBody().emplaceBlock();
+
+ return op;
+}
+
static irdl::DialectOp createIRDLDialect(OpBuilder &builder) {
MLIRContext *ctx = builder.getContext();
return builder.create<irdl::DialectOp>(UnknownLoc::get(ctx),
@@ -322,6 +350,14 @@ static bool emitDialectIRDLDefs(const RecordKeeper &recordKeeper,
// Set insertion point to start of DialectOp.
builder = builder.atBlockBegin(&dialect.getBody().emplaceBlock());
+ for (const Record *type :
+ recordKeeper.getAllDerivedDefinitionsIfDefined("TypeDef")) {
+ tblgen::TypeDef tblgenType(type);
+ if (tblgenType.getDialect().getName() != selectedDialect)
+ continue;
+ createIRDLType(builder, tblgenType);
+ }
+
for (const Record *def :
recordKeeper.getAllDerivedDefinitionsIfDefined("Op")) {
tblgen::Operator tblgenOp(def);
diff --git a/offload/test/lit.cfg b/offload/test/lit.cfg
index 9ddef42cf903..514bb89e0b64 100644
--- a/offload/test/lit.cfg
+++ b/offload/test/lit.cfg
@@ -182,7 +182,12 @@ def remove_suffix_if_present(name):
return name
def add_libraries(source):
- return source + " " + config.llvm_library_intdir + "/libomptarget.devicertl.a"
+ if config.libomptarget_has_libc:
+ return source + " -Xoffload-linker " + "-lc " + \
+ "-Xoffload-linker " + "-lm " + \
+ config.llvm_library_intdir + "/libomptarget.devicertl.a"
+ else:
+ return source + " " + config.llvm_library_intdir + "/libomptarget.devicertl.a"
# Add platform targets
host_targets = [
diff --git a/openmp/runtime/src/CMakeLists.txt b/openmp/runtime/src/CMakeLists.txt
index f106694841ce..2dd54b5116d9 100644
--- a/openmp/runtime/src/CMakeLists.txt
+++ b/openmp/runtime/src/CMakeLists.txt
@@ -426,7 +426,11 @@ if(WIN32)
endforeach()
else()
- install(TARGETS omp ${export_to_llvmexports} ${LIBOMP_INSTALL_KIND} DESTINATION "${OPENMP_INSTALL_LIBDIR}")
+ if(${CMAKE_SYSTEM_NAME} MATCHES "AIX")
+ install(FILES ${LIBOMP_LIBRARY_DIR}/libomp.a DESTINATION "${OPENMP_INSTALL_LIBDIR}" COMPONENT runtime)
+ else()
+ install(TARGETS omp ${export_to_llvmexports} ${LIBOMP_INSTALL_KIND} DESTINATION "${OPENMP_INSTALL_LIBDIR}")
+ endif()
if(${LIBOMP_INSTALL_ALIASES})
# Create aliases (symlinks) of the library for backwards compatibility
diff --git a/utils/bazel/llvm-project-overlay/clang/BUILD.bazel b/utils/bazel/llvm-project-overlay/clang/BUILD.bazel
index 1d0ba8bd4d58..b39fb8f6795e 100644
--- a/utils/bazel/llvm-project-overlay/clang/BUILD.bazel
+++ b/utils/bazel/llvm-project-overlay/clang/BUILD.bazel
@@ -26,7 +26,6 @@ exports_files([
"tools/clang-format/clang-format.el",
"tools/clang-format/clang-format-test.el",
"tools/clang-format/clang-format.py",
- "tools/clang-rename/clang-rename.el",
"tools/extra/clang-include-fixer/tool/clang-include-fixer.el",
"tools/extra/clang-include-fixer/tool/clang-include-fixer-test.el",
])
@@ -2591,20 +2590,6 @@ cc_binary(
)
cc_binary(
- name = "clang-rename",
- srcs = glob(["tools/clang-rename/*.cpp"]),
- stamp = 0,
- deps = [
- ":basic",
- ":frontend",
- ":rewrite",
- ":tooling",
- ":tooling_refactoring",
- "//llvm:Support",
- ],
-)
-
-cc_binary(
name = "clang-repl",
srcs = glob(["tools/clang-repl/*.cpp"]),
stamp = 0,
diff --git a/utils/bazel/llvm-project-overlay/clang/unittests/BUILD.bazel b/utils/bazel/llvm-project-overlay/clang/unittests/BUILD.bazel
index 884a6055cf4e..e8c7106b2875 100644
--- a/utils/bazel/llvm-project-overlay/clang/unittests/BUILD.bazel
+++ b/utils/bazel/llvm-project-overlay/clang/unittests/BUILD.bazel
@@ -299,32 +299,6 @@ cc_library(
)
cc_test(
- name = "rename_tests",
- size = "small",
- timeout = "moderate",
- srcs = glob(
- [
- "Rename/*.cpp",
- "Rename/*.h",
- ],
- allow_empty = False,
- ),
- shard_count = 20,
- deps = [
- ":rename_tests_tooling_hdrs",
- "//clang:ast_matchers",
- "//clang:basic",
- "//clang:format",
- "//clang:frontend",
- "//clang:tooling",
- "//clang:tooling_refactoring",
- "//llvm:Support",
- "//third-party/unittest:gtest",
- "//third-party/unittest:gtest_main",
- ],
-)
-
-cc_test(
name = "rewrite_tests",
size = "small",
srcs = glob(
diff --git a/utils/bazel/llvm-project-overlay/llvm/include/llvm/Config/config.h b/utils/bazel/llvm-project-overlay/llvm/include/llvm/Config/config.h
index 15696c346bff..74b4eca0889a 100644
--- a/utils/bazel/llvm-project-overlay/llvm/include/llvm/Config/config.h
+++ b/utils/bazel/llvm-project-overlay/llvm/include/llvm/Config/config.h
@@ -198,9 +198,6 @@
/* Define to 1 if you have the <sys/mman.h> header file. */
#define HAVE_SYS_MMAN_H 1
-/* Define to 1 if you have the <sys/param.h> header file. */
-#define HAVE_SYS_PARAM_H 1
-
/* Define to 1 if you have the <sys/resource.h> header file. */
#define HAVE_SYS_RESOURCE_H 1
@@ -216,9 +213,6 @@
/* Define to 1 if stat struct has st_mtim member. */
/* HAVE_STRUCT_STAT_ST_MTIM_TV_NSEC defined in Bazel */
-/* Define to 1 if you have the <sys/types.h> header file. */
-#define HAVE_SYS_TYPES_H 1
-
/* Define to 1 if you have the <termios.h> header file. */
#define HAVE_TERMIOS_H 1
diff --git a/utils/bazel/llvm_configs/config.h.cmake b/utils/bazel/llvm_configs/config.h.cmake
index d71ff40144c0..4c9404d95daf 100644
--- a/utils/bazel/llvm_configs/config.h.cmake
+++ b/utils/bazel/llvm_configs/config.h.cmake
@@ -191,9 +191,6 @@
/* Define to 1 if you have the <sys/mman.h> header file. */
#cmakedefine HAVE_SYS_MMAN_H ${HAVE_SYS_MMAN_H}
-/* Define to 1 if you have the <sys/param.h> header file. */
-#cmakedefine HAVE_SYS_PARAM_H ${HAVE_SYS_PARAM_H}
-
/* Define to 1 if you have the <sys/resource.h> header file. */
#cmakedefine HAVE_SYS_RESOURCE_H ${HAVE_SYS_RESOURCE_H}
@@ -209,9 +206,6 @@
/* Define to 1 if stat struct has st_mtim member. */
#cmakedefine HAVE_STRUCT_STAT_ST_MTIM_TV_NSEC ${HAVE_STRUCT_STAT_ST_MTIM_TV_NSEC}
-/* Define to 1 if you have the <sys/types.h> header file. */
-#cmakedefine HAVE_SYS_TYPES_H ${HAVE_SYS_TYPES_H}
-
/* Define to 1 if you have the <termios.h> header file. */
#cmakedefine HAVE_TERMIOS_H ${HAVE_TERMIOS_H}