summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAiden Grossman <aidengrossman@google.com>2025-04-14 07:38:01 +0000
committerAiden Grossman <aidengrossman@google.com>2025-04-14 07:38:01 +0000
commit0b43a0423bbaa22384d522050a295eb564116d95 (patch)
tree18b4111bcb0563e9f7279666299318617dacc017
parentb7163d6a2490a7517cb8a526e36a877a4fd7bede (diff)
parent97bc9137e545423334b00d60ab64855ccc434c3a (diff)
[𝘀𝗽𝗿] changes introduced through rebaseusers/boomanaiden154/main.githubci-upload-ninja_log-as-an-artifact
Created using spr 1.3.4 [skip ci]
-rw-r--r--clang-tools-extra/clangd/CMakeLists.txt8
-rw-r--r--clang-tools-extra/clangd/test/CMakeLists.txt7
-rw-r--r--clang-tools-extra/clangd/test/lit.site.cfg.py.in1
-rw-r--r--clang/docs/LanguageExtensions.rst16
-rw-r--r--clang/docs/ReleaseNotes.rst5
-rw-r--r--clang/include/clang/Basic/Builtins.td12
-rw-r--r--clang/include/clang/Driver/Options.td7
-rw-r--r--clang/include/clang/Driver/ToolChain.h4
-rw-r--r--clang/include/clang/Support/RISCVVIntrinsicUtils.h22
-rw-r--r--clang/lib/AST/ByteCode/Disasm.cpp177
-rw-r--r--clang/lib/AST/ByteCode/Interp.cpp2
-rw-r--r--clang/lib/AST/ByteCode/Interp.h5
-rw-r--r--clang/lib/CodeGen/CGBuiltin.cpp16
-rw-r--r--clang/lib/CodeGen/Targets/AMDGPU.cpp6
-rw-r--r--clang/lib/Driver/ToolChain.cpp26
-rw-r--r--clang/lib/Driver/ToolChains/AIX.cpp8
-rw-r--r--clang/lib/Driver/ToolChains/AIX.h3
-rw-r--r--clang/lib/Driver/ToolChains/AMDGPU.cpp23
-rw-r--r--clang/lib/Driver/ToolChains/Arch/ARM.cpp18
-rw-r--r--clang/lib/Driver/ToolChains/PPCLinux.cpp16
-rw-r--r--clang/lib/Driver/ToolChains/PPCLinux.h3
-rw-r--r--clang/lib/Driver/ToolChains/ROCm.h19
-rw-r--r--clang/lib/Format/TokenAnnotator.cpp3
-rw-r--r--clang/lib/Format/WhitespaceManager.cpp9
-rw-r--r--clang/lib/Sema/SemaChecking.cpp2
-rw-r--r--clang/lib/Sema/SemaRISCV.cpp3
-rw-r--r--clang/lib/Sema/SemaTemplateInstantiateDecl.cpp10
-rw-r--r--clang/lib/Sema/TreeTransform.h11
-rw-r--r--clang/lib/Serialization/ASTWriter.cpp69
-rw-r--r--clang/lib/Support/RISCVVIntrinsicUtils.cpp105
-rw-r--r--clang/test/AST/ByteCode/codegen.cpp11
-rw-r--r--clang/test/CodeGen/AArch64/sve-acle-__ARM_FEATURE_SVE_VECTOR_OPERATORS.c6
-rw-r--r--clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_rdffr.c6
-rw-r--r--clang/test/CodeGen/attr-target-x86.c8
-rw-r--r--clang/test/CodeGen/builtin-maxnum-minnum.c171
-rw-r--r--clang/test/CodeGenOpenCL/addr-space-struct-arg.cl156
-rw-r--r--clang/test/CodeGenOpenCL/amdgpu-abi-struct-arg-byref.cl56
-rw-r--r--clang/test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl24
-rw-r--r--clang/test/CodeGenOpenCL/opencl-kernel-call.cl70
-rw-r--r--clang/test/Driver/amdgpu-openmp-toolchain.c4
-rw-r--r--clang/test/Driver/arm-fpu-selection.s36
-rw-r--r--clang/test/Driver/armv7-default-neon.s16
-rw-r--r--clang/test/Driver/armv7s-default-vfpv4.s13
-rw-r--r--clang/test/Driver/armv8.1m.main.s51
-rw-r--r--clang/test/Driver/hip-device-libs.hip41
-rw-r--r--clang/test/Driver/rocm-device-libs.cl19
-rw-r--r--clang/test/Modules/relocatable-modules.cpp54
-rw-r--r--clang/unittests/Format/FormatTest.cpp13
-rw-r--r--clang/utils/TableGen/ClangOpcodesEmitter.cpp14
-rw-r--r--clang/utils/TableGen/RISCVVEmitter.cpp8
-rw-r--r--flang/test/Driver/flang-ld-powerpc.f90121
-rw-r--r--flang/test/Driver/linker-flags.f9020
-rw-r--r--flang/test/Driver/omp-driver-offload.f902
-rw-r--r--libcxx/include/__bit/popcount.h43
-rw-r--r--libcxx/include/__stop_token/atomic_unique_lock.h2
-rw-r--r--libunwind/docs/BuildingLibunwind.rst2
-rw-r--r--lldb/include/lldb/Symbol/SymbolContext.h7
-rw-r--r--lldb/include/lldb/Target/Language.h2
-rw-r--r--lldb/source/Core/FormatEntity.cpp161
-rw-r--r--lldb/source/Plugins/Language/CPlusPlus/CPlusPlusLanguage.cpp37
-rw-r--r--lldb/source/Plugins/Language/CPlusPlus/CPlusPlusLanguage.h2
-rw-r--r--lldb/source/Symbol/SymbolContext.cpp30
-rw-r--r--lldb/source/Target/Language.cpp2
-rw-r--r--lldb/test/API/functionalities/param_entry_vals/basic_entry_values/main.cpp4
-rw-r--r--lldb/test/API/functionalities/tail_call_frames/inlining_and_tail_calls/main.cpp10
-rw-r--r--lldb/test/API/python_api/target/read-instructions-flavor/TestTargetReadInstructionsFlavor.py1
-rw-r--r--lldb/test/Shell/Recognizer/verbose_trap-in-stl-max-depth.test2
-rw-r--r--lldb/test/Shell/Settings/TestFrameFormatName.test8
-rw-r--r--lldb/test/Shell/SymbolFile/NativePDB/inline_sites_live.cpp4
-rw-r--r--llvm/cmake/modules/TableGen.cmake4
-rw-r--r--llvm/include/llvm/ADT/APFloat.h1
-rw-r--r--llvm/include/llvm/CodeGen/BranchRelaxation.h25
-rw-r--r--llvm/include/llvm/CodeGen/RemoveLoadsIntoFakeUses.h30
-rw-r--r--llvm/include/llvm/CodeGen/TargetRegisterInfo.h5
-rw-r--r--llvm/include/llvm/DebugInfo/LogicalView/Readers/LVDWARFReader.h6
-rw-r--r--llvm/include/llvm/InitializePasses.h4
-rw-r--r--llvm/include/llvm/MC/MCAsmBackend.h22
-rw-r--r--llvm/include/llvm/MC/MCAssembler.h10
-rw-r--r--llvm/include/llvm/Passes/CodeGenPassBuilder.h2
-rw-r--r--llvm/include/llvm/Passes/MachinePassRegistry.def3
-rw-r--r--llvm/include/llvm/TableGen/Main.h5
-rw-r--r--llvm/include/llvm/TableGen/StringToOffsetTable.h104
-rw-r--r--llvm/lib/Analysis/PHITransAddr.cpp3
-rw-r--r--llvm/lib/Analysis/ScalarEvolution.cpp2
-rw-r--r--llvm/lib/CodeGen/AsmPrinter/WinException.cpp6
-rw-r--r--llvm/lib/CodeGen/BranchRelaxation.cpp32
-rw-r--r--llvm/lib/CodeGen/CodeGen.cpp4
-rw-r--r--llvm/lib/CodeGen/ComplexDeinterleavingPass.cpp5
-rw-r--r--llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp2
-rw-r--r--llvm/lib/CodeGen/MachineLICM.cpp3
-rw-r--r--llvm/lib/CodeGen/RegisterCoalescer.cpp3
-rw-r--r--llvm/lib/CodeGen/RemoveLoadsIntoFakeUses.cpp45
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp2
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp2
-rw-r--r--llvm/lib/CodeGen/WindowScheduler.cpp3
-rw-r--r--llvm/lib/ExecutionEngine/Orc/DebugObjectManagerPlugin.cpp5
-rw-r--r--llvm/lib/ExecutionEngine/Orc/Shared/ObjectFormats.cpp6
-rw-r--r--llvm/lib/MC/MCAsmBackend.cpp7
-rw-r--r--llvm/lib/MC/MCAssembler.cpp110
-rw-r--r--llvm/lib/ObjCopy/ELF/ELFObjcopy.cpp3
-rw-r--r--llvm/lib/Passes/PassBuilder.cpp2
-rw-r--r--llvm/lib/Passes/PassBuilderPipelines.cpp16
-rw-r--r--llvm/lib/Passes/StandardInstrumentations.cpp10
-rw-r--r--llvm/lib/Support/APFloat.cpp19
-rw-r--r--llvm/lib/TableGen/CMakeLists.txt1
-rw-r--r--llvm/lib/TableGen/Main.cpp9
-rw-r--r--llvm/lib/TableGen/StringToOffsetTable.cpp120
-rw-r--r--llvm/lib/Target/AArch64/AArch64FrameLowering.cpp2
-rw-r--r--llvm/lib/Target/AArch64/AArch64InstrInfo.cpp15
-rw-r--r--llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp30
-rw-r--r--llvm/lib/Target/AArch64/AArch64MIPeepholeOpt.cpp7
-rw-r--r--llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp26
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUIGroupLP.cpp10
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp34
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUSwLowerLDS.cpp3
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp3
-rw-r--r--llvm/lib/Target/AMDGPU/SIISelLowering.cpp7
-rw-r--r--llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp2
-rw-r--r--llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp2
-rw-r--r--llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp10
-rw-r--r--llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp46
-rw-r--r--llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.h5
-rw-r--r--llvm/lib/Target/ARM/MCTargetDesc/ARMUnwindOpAsm.h3
-rw-r--r--llvm/lib/Target/CSKY/MCTargetDesc/CSKYAsmBackend.cpp15
-rw-r--r--llvm/lib/Target/CSKY/MCTargetDesc/CSKYAsmBackend.h8
-rw-r--r--llvm/lib/Target/Hexagon/HexagonTfrCleanup.cpp7
-rw-r--r--llvm/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp9
-rw-r--r--llvm/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp8
-rw-r--r--llvm/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.h2
-rw-r--r--llvm/lib/Target/NVPTX/NVPTXMachineFunctionInfo.h3
-rw-r--r--llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp23
-rw-r--r--llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp31
-rw-r--r--llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.h13
-rw-r--r--llvm/lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.cpp16
-rw-r--r--llvm/lib/Target/SPIRV/SPIRVCommandLine.cpp3
-rw-r--r--llvm/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmTypeCheck.cpp5
-rw-r--r--llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp30
-rw-r--r--llvm/lib/Target/X86/X86.td2
-rw-r--r--llvm/lib/TargetParser/X86TargetParser.cpp3
-rw-r--r--llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp3
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp9
-rw-r--r--llvm/lib/Transforms/Scalar/LICM.cpp12
-rw-r--r--llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp4
-rw-r--r--llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp17
-rw-r--r--llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp29
-rw-r--r--llvm/lib/Transforms/Utils/SimplifyCFG.cpp4
-rw-r--r--llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp3
-rw-r--r--llvm/lib/Transforms/Vectorize/LoopVectorize.cpp15
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h50
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp8
-rw-r--r--llvm/test/Analysis/ScalarEvolution/pr135531.ll19
-rw-r--r--llvm/test/CodeGen/AArch64/aarch64-sve-fill-spill-pair.ll283
-rw-r--r--llvm/test/CodeGen/AArch64/branch-relax-block-size.mir1
-rw-r--r--llvm/test/CodeGen/AArch64/branch-relax-cross-section.mir2
-rw-r--r--llvm/test/CodeGen/AArch64/peephole-orr.mir45
-rw-r--r--llvm/test/CodeGen/AArch64/sve-vls-ldst-opt.mir74
-rw-r--r--llvm/test/CodeGen/AMDGPU/branch-relax-no-terminators.mir1
-rw-r--r--llvm/test/CodeGen/AMDGPU/promote-alloca-non-byte-sizes.ll204
-rw-r--r--llvm/test/CodeGen/AMDGPU/swdev380865.ll9
-rw-r--r--llvm/test/CodeGen/PowerPC/pr43527.ll22
-rw-r--r--llvm/test/CodeGen/PowerPC/pr48519.ll5
-rw-r--r--llvm/test/CodeGen/PowerPC/sms-grp-order.ll30
-rw-r--r--llvm/test/CodeGen/RISCV/emit-x8-as-fp.ll121
-rw-r--r--llvm/test/CodeGen/X86/fake-use-remove-loads.mir2
-rw-r--r--llvm/test/CodeGen/X86/pr134602.ll25
-rw-r--r--llvm/test/MC/AMDGPU/gfx11_asm_vop2_t16_promote.s23
-rw-r--r--llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp16_from_vop2.s68
-rw-r--r--llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp8_from_vop2.s28
-rw-r--r--llvm/test/MC/AMDGPU/gfx11_asm_vop3_from_vop2.s69
-rw-r--r--llvm/test/MC/AMDGPU/gfx12_asm_vop2_t16_promote.s21
-rw-r--r--llvm/test/MC/AMDGPU/gfx12_asm_vop3_from_vop2.s69
-rw-r--r--llvm/test/MC/AMDGPU/gfx12_asm_vop3_from_vop2_dpp16.s42
-rw-r--r--llvm/test/MC/AMDGPU/gfx12_asm_vop3_from_vop2_dpp8.s15
-rw-r--r--llvm/test/MC/Disassembler/RISCV/emit-x8-as-fp.txt22
-rw-r--r--llvm/test/MC/RISCV/emit-x8-as-fp.s42
-rw-r--r--llvm/test/MC/RISCV/function-call-invalid.s2
-rw-r--r--llvm/test/MC/RISCV/tail-call-invalid.s1
-rw-r--r--llvm/test/Other/new-pm-lto-prelink-samplepgo-inline-threshold.ll (renamed from llvm/test/Other/new-pm-thinlto-prelink-samplepgo-inline-threshold.ll)5
-rw-r--r--llvm/test/Transforms/GVN/pr65447.ll83
-rw-r--r--llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-abs-srshl.ll3
-rw-r--r--llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-comb-all-active-lanes-cvt.ll105
-rw-r--r--llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-loadstore.ll22
-rw-r--r--llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-opts-cmpne.ll72
-rw-r--r--llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-strictfp.ll18
-rw-r--r--llvm/test/Transforms/InstCombine/AArch64/sve-intrinsics-combine-to-u-forms.ll605
-rw-r--r--llvm/test/Transforms/InstCombine/AArch64/sve-intrinsics-ptest.ll3
-rw-r--r--llvm/test/Transforms/InstCombine/AArch64/sve-intrinsics-rdffr-predication.ll3
-rw-r--r--llvm/test/Transforms/InstCombine/array.ll73
-rw-r--r--llvm/test/Transforms/InstCombine/fabs-as-int.ll4
-rw-r--r--llvm/test/Transforms/InstCombine/fneg-as-int.ll4
-rw-r--r--llvm/test/Transforms/InstCombine/fneg-fabs-as-int.ll5
-rw-r--r--llvm/test/Transforms/InstCombine/sincospi.ll24
-rw-r--r--llvm/test/Transforms/LICM/pr50367.ll41
-rw-r--r--llvm/test/Transforms/LICM/pr59324.ll27
-rw-r--r--llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse-output.ll242
-rw-r--r--llvm/test/Transforms/LoopVectorize/SystemZ/pr47665.ll58
-rw-r--r--llvm/test/Transforms/LoopVectorize/X86/constant-fold.ll209
-rw-r--r--llvm/test/Transforms/ObjCARC/contract.ll16
-rw-r--r--llvm/test/Transforms/SimplifyCFG/X86/switch_to_lookup_table.ll11
-rw-r--r--llvm/test/Transforms/SimplifyCFG/X86/switch_to_lookup_table_big.ll7
-rw-r--r--llvm/test/Transforms/SimplifyCFG/fold-branch-to-common-dest.ll5
-rw-r--r--llvm/unittests/ADT/APFloatTest.cpp9
-rw-r--r--llvm/utils/TableGen/AsmMatcherEmitter.cpp1
-rw-r--r--llvm/utils/TableGen/Basic/SequenceToOffsetTable.h3
-rw-r--r--llvm/utils/TableGen/Basic/TableGen.cpp9
-rw-r--r--llvm/utils/TableGen/SDNodeInfoEmitter.cpp1
-rw-r--r--llvm/utils/gn/secondary/clang-tools-extra/clangd/test/BUILD.gn1
-rw-r--r--llvm/utils/gn/secondary/llvm/lib/TableGen/BUILD.gn1
-rw-r--r--mlir/docs/DefiningDialects/Operations.md17
-rw-r--r--mlir/include/mlir/Dialect/LLVMIR/LLVMEnums.td14
-rw-r--r--mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td8
-rw-r--r--mlir/include/mlir/IR/EnumAttr.td136
-rw-r--r--mlir/include/mlir/IR/Properties.td19
-rw-r--r--mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp16
-rw-r--r--mlir/lib/Dialect/GPU/Transforms/ShuffleRewriter.cpp5
-rw-r--r--mlir/lib/Dialect/LLVMIR/IR/LLVMDialect.cpp65
-rw-r--r--mlir/lib/Dialect/Tosa/IR/TosaCanonicalizations.cpp49
-rw-r--r--mlir/test/Conversion/GPUToROCDL/gpu-to-rocdl-unsupported.mlir13
-rw-r--r--mlir/test/Dialect/GPU/shuffle-rewrite.mlir11
-rw-r--r--mlir/test/Dialect/Tosa/canonicalize.mlir15
-rw-r--r--mlir/test/IR/enum-attr-invalid.mlir75
-rw-r--r--mlir/test/IR/enum-attr-roundtrip.mlir45
-rw-r--r--mlir/test/lib/Dialect/Test/TestOps.td46
223 files changed, 4484 insertions, 1996 deletions
diff --git a/clang-tools-extra/clangd/CMakeLists.txt b/clang-tools-extra/clangd/CMakeLists.txt
index 6f10afe4a562..a1e9da41b4b3 100644
--- a/clang-tools-extra/clangd/CMakeLists.txt
+++ b/clang-tools-extra/clangd/CMakeLists.txt
@@ -210,6 +210,9 @@ if (CLANGD_ENABLE_REMOTE)
include(AddGRPC)
endif()
+option(CLANGD_BUILD_DEXP "Build the dexp tool as part of Clangd" ON)
+llvm_canonicalize_cmake_booleans(CLANGD_BUILD_DEXP)
+
if(CLANG_INCLUDE_TESTS)
add_subdirectory(test)
add_subdirectory(unittests)
@@ -220,4 +223,7 @@ option(CLANGD_ENABLE_REMOTE "Use gRPC library to enable remote index support for
set(GRPC_INSTALL_PATH "" CACHE PATH "Path to gRPC library manual installation.")
add_subdirectory(index/remote)
-add_subdirectory(index/dex/dexp)
+
+if(CLANGD_BUILD_DEXP)
+ add_subdirectory(index/dex/dexp)
+endif()
diff --git a/clang-tools-extra/clangd/test/CMakeLists.txt b/clang-tools-extra/clangd/test/CMakeLists.txt
index b51f461a4986..42fc3506641f 100644
--- a/clang-tools-extra/clangd/test/CMakeLists.txt
+++ b/clang-tools-extra/clangd/test/CMakeLists.txt
@@ -3,8 +3,6 @@ set(CLANGD_TEST_DEPS
ClangdTests
clangd-indexer
split-file
- # No tests for it, but we should still make sure they build.
- dexp
)
if(CLANGD_BUILD_XPC)
@@ -12,6 +10,11 @@ if(CLANGD_BUILD_XPC)
list(APPEND CLANGD_TEST_DEPS ClangdXpcUnitTests)
endif()
+if(CLANGD_BUILD_DEXP)
+ # No tests for it, but we should still make sure they build.
+ list(APPEND CLANGD_TEST_DEPS dexp)
+endif()
+
if(CLANGD_ENABLE_REMOTE)
list(APPEND CLANGD_TEST_DEPS clangd-index-server clangd-index-server-monitor)
endif()
diff --git a/clang-tools-extra/clangd/test/lit.site.cfg.py.in b/clang-tools-extra/clangd/test/lit.site.cfg.py.in
index 1fe7c8d0f324..a0bb3561e19e 100644
--- a/clang-tools-extra/clangd/test/lit.site.cfg.py.in
+++ b/clang-tools-extra/clangd/test/lit.site.cfg.py.in
@@ -15,6 +15,7 @@ config.llvm_shlib_dir = "@SHLIBDIR@"
config.clangd_source_dir = "@CMAKE_CURRENT_SOURCE_DIR@/.."
config.clangd_binary_dir = "@CMAKE_CURRENT_BINARY_DIR@/.."
config.clangd_build_xpc = @CLANGD_BUILD_XPC@
+config.clangd_build_dexp = @CLANGD_BUILD_DEXP@
config.clangd_enable_remote = @CLANGD_ENABLE_REMOTE@
config.clangd_tidy_checks = @CLANGD_TIDY_CHECKS@
config.have_zlib = @LLVM_ENABLE_ZLIB@
diff --git a/clang/docs/LanguageExtensions.rst b/clang/docs/LanguageExtensions.rst
index 3b8a9cac6587..971ab50cc9a6 100644
--- a/clang/docs/LanguageExtensions.rst
+++ b/clang/docs/LanguageExtensions.rst
@@ -818,7 +818,23 @@ of different sizes and signs is forbidden in binary and ternary builtins.
T __builtin_elementwise_fmod(T x, T y) return The floating-point remainder of (x/y) whose sign floating point types
matches the sign of x.
T __builtin_elementwise_max(T x, T y) return x or y, whichever is larger integer and floating point types
+ For floating point types, follows semantics of maxNum
+ in IEEE 754-2008. See `LangRef
+ <http://llvm.org/docs/LangRef.html#llvm-min-intrinsics-comparation>`_
+ for the comparison.
T __builtin_elementwise_min(T x, T y) return x or y, whichever is smaller integer and floating point types
+ For floating point types, follows semantics of minNum
+ in IEEE 754-2008. See `LangRef
+ <http://llvm.org/docs/LangRef.html#llvm-min-intrinsics-comparation>`_
+ for the comparison.
+ T __builtin_elementwise_maxnum(T x, T y) return x or y, whichever is larger. Follows IEEE 754-2008 floating point types
+ semantics (maxNum) with +0.0>-0.0. See `LangRef
+ <http://llvm.org/docs/LangRef.html#llvm-min-intrinsics-comparation>`_
+ for the comparison.
+ T __builtin_elementwise_minnum(T x, T y) return x or y, whichever is smaller. Follows IEEE 754-2008 floating point types
+ semantics (minNum) with +0.0>-0.0. See `LangRef
+ <http://llvm.org/docs/LangRef.html#llvm-min-intrinsics-comparation>`_
+ for the comparison.
T __builtin_elementwise_add_sat(T x, T y) return the sum of x and y, clamped to the range of integer types
representable values for the signed/unsigned integer type.
T __builtin_elementwise_sub_sat(T x, T y) return the difference of x and y, clamped to the range of integer types
diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index 11f62bc881b0..fd9b9a80e993 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -38,6 +38,9 @@ Potentially Breaking Changes
- Fix missing diagnostics for uses of declarations when performing typename access,
such as when performing member access on a '[[deprecated]]' type alias.
(#GH58547)
+- For ARM targets when compiling assembly files, the features included in the selected CPU
+ or Architecture's FPU are included. If you wish not to use a specific feature,
+ the relevant ``+no`` option will need to be amended to the command line option.
C/C++ Language Potentially Breaking Changes
-------------------------------------------
@@ -191,6 +194,7 @@ Non-comprehensive list of changes in this release
- Support parsing the `cc` operand modifier and alias it to the `c` modifier (#GH127719).
- Added `__builtin_elementwise_exp10`.
- For AMDPGU targets, added `__builtin_v_cvt_off_f32_i4` that maps to the `v_cvt_off_f32_i4` instruction.
+- Added `__builtin_elementwise_minnum` and `__builtin_elementwise_maxnum`.
New Compiler Flags
------------------
@@ -511,6 +515,7 @@ X86 Support
Arm and AArch64 Support
^^^^^^^^^^^^^^^^^^^^^^^
+- For ARM targets, cc1as now considers the FPU's features for the selected CPU or Architecture.
Android Support
^^^^^^^^^^^^^^^
diff --git a/clang/include/clang/Basic/Builtins.td b/clang/include/clang/Basic/Builtins.td
index 868e5b92acdc..74a11257b373 100644
--- a/clang/include/clang/Basic/Builtins.td
+++ b/clang/include/clang/Basic/Builtins.td
@@ -1304,6 +1304,18 @@ def ElementwiseMin : Builtin {
let Prototype = "void(...)";
}
+def ElementwiseMaxNum : Builtin {
+ let Spellings = ["__builtin_elementwise_maxnum"];
+ let Attributes = [NoThrow, Const, CustomTypeChecking];
+ let Prototype = "void(...)";
+}
+
+def ElementwiseMinNum : Builtin {
+ let Spellings = ["__builtin_elementwise_minnum"];
+ let Attributes = [NoThrow, Const, CustomTypeChecking];
+ let Prototype = "void(...)";
+}
+
def ElementwiseMaximum : Builtin {
let Spellings = ["__builtin_elementwise_maximum"];
let Attributes = [NoThrow, Const, CustomTypeChecking];
diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td
index c1020b234b13..affc076a876a 100644
--- a/clang/include/clang/Driver/Options.td
+++ b/clang/include/clang/Driver/Options.td
@@ -6881,6 +6881,13 @@ let Flags = [TargetSpecific] in {
defm android_pad_segment : BooleanFFlag<"android-pad-segment">, Group<f_Group>;
} // let Flags = [TargetSpecific]
+def shared_libflangrt : Flag<["-"], "shared-libflangrt">,
+ HelpText<"Link the flang-rt shared library">, Group<Link_Group>,
+ Visibility<[FlangOption]>, Flags<[NoArgumentUnused]>;
+def static_libflangrt : Flag<["-"], "static-libflangrt">,
+ HelpText<"Link the flang-rt static library">, Group<Link_Group>,
+ Visibility<[FlangOption]>, Flags<[NoArgumentUnused]>;
+
//===----------------------------------------------------------------------===//
// FLangOption + NoXarchOption
//===----------------------------------------------------------------------===//
diff --git a/clang/include/clang/Driver/ToolChain.h b/clang/include/clang/Driver/ToolChain.h
index 076e4296c309..d0059673d6a6 100644
--- a/clang/include/clang/Driver/ToolChain.h
+++ b/clang/include/clang/Driver/ToolChain.h
@@ -521,6 +521,10 @@ public:
addFortranRuntimeLibraryPath(const llvm::opt::ArgList &Args,
llvm::opt::ArgStringList &CmdArgs) const;
+ /// Add the path for libflang_rt.runtime.a
+ void addFlangRTLibPath(const llvm::opt::ArgList &Args,
+ llvm::opt::ArgStringList &CmdArgs) const;
+
const char *getCompilerRTArgString(const llvm::opt::ArgList &Args,
StringRef Component,
FileType Type = ToolChain::FT_Static,
diff --git a/clang/include/clang/Support/RISCVVIntrinsicUtils.h b/clang/include/clang/Support/RISCVVIntrinsicUtils.h
index 8f2a4f54a1b7..00a79a0fcb5d 100644
--- a/clang/include/clang/Support/RISCVVIntrinsicUtils.h
+++ b/clang/include/clang/Support/RISCVVIntrinsicUtils.h
@@ -11,6 +11,7 @@
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/BitmaskEnum.h"
+#include "llvm/ADT/Bitset.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringRef.h"
#include <cstdint>
@@ -376,6 +377,8 @@ enum PolicyScheme : uint8_t {
HasPolicyOperand,
};
+llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, enum PolicyScheme PS);
+
// TODO refactor RVVIntrinsic class design after support all intrinsic
// combination. This represents an instantiation of an intrinsic with a
// particular type and prototype
@@ -507,6 +510,23 @@ enum RVVRequire {
RVV_REQ_NUM,
};
+llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, enum RVVRequire Require);
+
+struct RequiredExtensionBits {
+ llvm::Bitset<RVV_REQ_NUM> Bits;
+ RequiredExtensionBits() {}
+ RequiredExtensionBits(std::initializer_list<RVVRequire> Init) {
+ for (auto I : Init)
+ Bits.set(I);
+ }
+
+ void set(unsigned I) { Bits.set(I); }
+ bool operator[](unsigned I) const { return Bits[I]; }
+};
+
+llvm::raw_ostream &operator<<(llvm::raw_ostream &OS,
+ const RequiredExtensionBits &Exts);
+
// Raw RVV intrinsic info, used to expand later.
// This struct is highly compact for minimized code size.
struct RVVIntrinsicRecord {
@@ -518,7 +538,7 @@ struct RVVIntrinsicRecord {
const char *OverloadedName;
// Required target features for this intrinsic.
- uint32_t RequiredExtensions[(RVV_REQ_NUM + 31) / 32];
+ RequiredExtensionBits RequiredExtensions;
// Prototype for this intrinsic, index of RVVSignatureTable.
uint16_t PrototypeIndex;
diff --git a/clang/lib/AST/ByteCode/Disasm.cpp b/clang/lib/AST/ByteCode/Disasm.cpp
index 12c434029562..d4c9ce6050b8 100644
--- a/clang/lib/AST/ByteCode/Disasm.cpp
+++ b/clang/lib/AST/ByteCode/Disasm.cpp
@@ -33,39 +33,74 @@
using namespace clang;
using namespace clang::interp;
-template <typename T> inline static T ReadArg(Program &P, CodePtr &OpPC) {
+template <typename T>
+inline static std::string printArg(Program &P, CodePtr &OpPC) {
if constexpr (std::is_pointer_v<T>) {
uint32_t ID = OpPC.read<uint32_t>();
- return reinterpret_cast<T>(P.getNativePointer(ID));
+ std::string Result;
+ llvm::raw_string_ostream SS(Result);
+ SS << reinterpret_cast<T>(P.getNativePointer(ID));
+ return Result;
} else {
- return OpPC.read<T>();
+ std::string Result;
+ llvm::raw_string_ostream SS(Result);
+ auto Arg = OpPC.read<T>();
+ SS << Arg;
+ return Result;
}
}
-template <> inline Floating ReadArg<Floating>(Program &P, CodePtr &OpPC) {
- Floating F = Floating::deserialize(*OpPC);
+template <> inline std::string printArg<Floating>(Program &P, CodePtr &OpPC) {
+ auto F = Floating::deserialize(*OpPC);
OpPC += align(F.bytesToSerialize());
- return F;
+
+ std::string Result;
+ llvm::raw_string_ostream SS(Result);
+ SS << F;
+ return Result;
}
template <>
-inline IntegralAP<false> ReadArg<IntegralAP<false>>(Program &P, CodePtr &OpPC) {
- IntegralAP<false> I = IntegralAP<false>::deserialize(*OpPC);
- OpPC += align(I.bytesToSerialize());
- return I;
-}
+inline std::string printArg<IntegralAP<false>>(Program &P, CodePtr &OpPC) {
+ auto F = IntegralAP<false>::deserialize(*OpPC);
+ OpPC += align(F.bytesToSerialize());
+ std::string Result;
+ llvm::raw_string_ostream SS(Result);
+ SS << F;
+ return Result;
+}
template <>
-inline IntegralAP<true> ReadArg<IntegralAP<true>>(Program &P, CodePtr &OpPC) {
- IntegralAP<true> I = IntegralAP<true>::deserialize(*OpPC);
- OpPC += align(I.bytesToSerialize());
- return I;
+inline std::string printArg<IntegralAP<true>>(Program &P, CodePtr &OpPC) {
+ auto F = IntegralAP<true>::deserialize(*OpPC);
+ OpPC += align(F.bytesToSerialize());
+
+ std::string Result;
+ llvm::raw_string_ostream SS(Result);
+ SS << F;
+ return Result;
}
-template <> inline FixedPoint ReadArg<FixedPoint>(Program &P, CodePtr &OpPC) {
- FixedPoint I = FixedPoint::deserialize(*OpPC);
- OpPC += align(I.bytesToSerialize());
- return I;
+template <> inline std::string printArg<FixedPoint>(Program &P, CodePtr &OpPC) {
+ auto F = FixedPoint::deserialize(*OpPC);
+ OpPC += align(F.bytesToSerialize());
+
+ std::string Result;
+ llvm::raw_string_ostream SS(Result);
+ SS << F;
+ return Result;
+}
+
+static bool isJumpOpcode(Opcode Op) {
+ return Op == OP_Jmp || Op == OP_Jf || Op == OP_Jt;
+}
+
+static size_t getNumDisplayWidth(size_t N) {
+ unsigned L = 1u, M = 10u;
+ while (M <= N && ++L != std::numeric_limits<size_t>::digits10 + 1)
+ M *= 10u;
+
+ return L;
}
LLVM_DUMP_METHOD void Function::dump() const { dump(llvm::errs()); }
@@ -80,23 +115,115 @@ LLVM_DUMP_METHOD void Function::dump(llvm::raw_ostream &OS) const {
OS << "rvo: " << hasRVO() << "\n";
OS << "this arg: " << hasThisPointer() << "\n";
- auto PrintName = [&OS](const char *Name) {
- OS << Name;
- long N = 30 - strlen(Name);
- if (N > 0)
- OS.indent(N);
+ struct OpText {
+ size_t Addr;
+ std::string Op;
+ bool IsJump;
+ llvm::SmallVector<std::string> Args;
};
+ auto PrintName = [](const char *Name) -> std::string {
+ return std::string(Name);
+ };
+
+ llvm::SmallVector<OpText> Code;
+ size_t LongestAddr = 0;
+ size_t LongestOp = 0;
+
for (CodePtr Start = getCodeBegin(), PC = Start; PC != getCodeEnd();) {
size_t Addr = PC - Start;
+ OpText Text;
auto Op = PC.read<Opcode>();
- OS << llvm::format("%8d", Addr) << " ";
+ Text.Addr = Addr;
+ Text.IsJump = isJumpOpcode(Op);
switch (Op) {
#define GET_DISASM
#include "Opcodes.inc"
#undef GET_DISASM
}
+ Code.push_back(Text);
+ LongestOp = std::max(Text.Op.size(), LongestOp);
+ LongestAddr = std::max(getNumDisplayWidth(Addr), LongestAddr);
}
+
+ // Record jumps and their targets.
+ struct JmpData {
+ size_t From;
+ size_t To;
+ };
+ llvm::SmallVector<JmpData> Jumps;
+ for (auto &Text : Code) {
+ if (Text.IsJump)
+ Jumps.push_back({Text.Addr, Text.Addr + std::stoi(Text.Args[0]) +
+ align(sizeof(Opcode)) +
+ align(sizeof(int32_t))});
+ }
+
+ llvm::SmallVector<std::string> Text;
+ Text.reserve(Code.size());
+ size_t LongestLine = 0;
+ // Print code to a string, one at a time.
+ for (auto C : Code) {
+ std::string Line;
+ llvm::raw_string_ostream LS(Line);
+ LS << C.Addr;
+ LS.indent(LongestAddr - getNumDisplayWidth(C.Addr) + 4);
+ LS << C.Op;
+ LS.indent(LongestOp - C.Op.size() + 4);
+ for (auto &Arg : C.Args) {
+ LS << Arg << ' ';
+ }
+ Text.push_back(Line);
+ LongestLine = std::max(Line.size(), LongestLine);
+ }
+
+ assert(Code.size() == Text.size());
+
+ auto spaces = [](unsigned N) -> std::string {
+ std::string S;
+ for (unsigned I = 0; I != N; ++I)
+ S += ' ';
+ return S;
+ };
+
+ // Now, draw the jump lines.
+ for (auto &J : Jumps) {
+ if (J.To > J.From) {
+ bool FoundStart = false;
+ for (size_t LineIndex = 0; LineIndex != Text.size(); ++LineIndex) {
+ Text[LineIndex] += spaces(LongestLine - Text[LineIndex].size());
+
+ if (Code[LineIndex].Addr == J.From) {
+ Text[LineIndex] += " --+";
+ FoundStart = true;
+ } else if (Code[LineIndex].Addr == J.To) {
+ Text[LineIndex] += " <-+";
+ break;
+ } else if (FoundStart) {
+ Text[LineIndex] += " |";
+ }
+ }
+ LongestLine += 5;
+ } else {
+ bool FoundStart = false;
+ for (ssize_t LineIndex = Text.size() - 1; LineIndex >= 0; --LineIndex) {
+ Text[LineIndex] += spaces(LongestLine - Text[LineIndex].size());
+ if (Code[LineIndex].Addr == J.From) {
+ Text[LineIndex] += " --+";
+ FoundStart = true;
+ } else if (Code[LineIndex].Addr == J.To) {
+ Text[LineIndex] += " <-+";
+ break;
+ } else if (FoundStart) {
+ Text[LineIndex] += " |";
+ }
+ }
+ LongestLine += 5;
+ }
+ }
+
+ for (auto &Line : Text)
+ OS << Line << '\n';
}
LLVM_DUMP_METHOD void Program::dump() const { dump(llvm::errs()); }
diff --git a/clang/lib/AST/ByteCode/Interp.cpp b/clang/lib/AST/ByteCode/Interp.cpp
index 0afd772c73b8..3e1f36da8925 100644
--- a/clang/lib/AST/ByteCode/Interp.cpp
+++ b/clang/lib/AST/ByteCode/Interp.cpp
@@ -307,7 +307,7 @@ bool isConstexprUnknown(const Pointer &P) {
if (P.isDummy())
return false;
const VarDecl *VD = P.block()->getDescriptor()->asVarDecl();
- return VD && VD->hasLocalStorage();
+ return VD && VD->hasLocalStorage() && !isa<ParmVarDecl>(VD);
}
bool CheckBCPResult(InterpState &S, const Pointer &Ptr) {
diff --git a/clang/lib/AST/ByteCode/Interp.h b/clang/lib/AST/ByteCode/Interp.h
index 4e84dcc8d551..b4e15b3ffbe6 100644
--- a/clang/lib/AST/ByteCode/Interp.h
+++ b/clang/lib/AST/ByteCode/Interp.h
@@ -771,6 +771,11 @@ bool IncDecHelper(InterpState &S, CodePtr OpPC, const Pointer &Ptr,
bool CanOverflow) {
assert(!Ptr.isDummy());
+ if (!S.inConstantContext()) {
+ if (isConstexprUnknown(Ptr))
+ return false;
+ }
+
if constexpr (std::is_same_v<T, Boolean>) {
if (!S.getLangOpts().CPlusPlus14)
return Invalid(S, OpPC);
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index fe55dfffc1cb..1e4e055e04af 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -3818,6 +3818,22 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
return RValue::get(Result);
}
+ case Builtin::BI__builtin_elementwise_maxnum: {
+ Value *Op0 = EmitScalarExpr(E->getArg(0));
+ Value *Op1 = EmitScalarExpr(E->getArg(1));
+ Value *Result = Builder.CreateBinaryIntrinsic(llvm::Intrinsic::maxnum, Op0,
+ Op1, nullptr, "elt.maxnum");
+ return RValue::get(Result);
+ }
+
+ case Builtin::BI__builtin_elementwise_minnum: {
+ Value *Op0 = EmitScalarExpr(E->getArg(0));
+ Value *Op1 = EmitScalarExpr(E->getArg(1));
+ Value *Result = Builder.CreateBinaryIntrinsic(llvm::Intrinsic::minnum, Op0,
+ Op1, nullptr, "elt.minnum");
+ return RValue::get(Result);
+ }
+
case Builtin::BI__builtin_elementwise_maximum: {
Value *Op0 = EmitScalarExpr(E->getArg(0));
Value *Op1 = EmitScalarExpr(E->getArg(1));
diff --git a/clang/lib/CodeGen/Targets/AMDGPU.cpp b/clang/lib/CodeGen/Targets/AMDGPU.cpp
index db2a2c574064..bcf039d9f268 100644
--- a/clang/lib/CodeGen/Targets/AMDGPU.cpp
+++ b/clang/lib/CodeGen/Targets/AMDGPU.cpp
@@ -198,14 +198,10 @@ ABIArgInfo AMDGPUABIInfo::classifyKernelArgumentType(QualType Ty) const {
/*ToAS=*/getContext().getTargetAddressSpace(LangAS::cuda_device));
}
- // FIXME: Should also use this for OpenCL, but it requires addressing the
- // problem of kernels being called.
- //
// FIXME: This doesn't apply the optimization of coercing pointers in structs
// to global address space when using byref. This would require implementing a
// new kind of coercion of the in-memory type when for indirect arguments.
- if (!getContext().getLangOpts().OpenCL && LTy == OrigLTy &&
- isAggregateTypeForABI(Ty)) {
+ if (LTy == OrigLTy && isAggregateTypeForABI(Ty)) {
return ABIArgInfo::getIndirectAliased(
getContext().getTypeAlignInChars(Ty),
getContext().getTargetAddressSpace(LangAS::opencl_constant),
diff --git a/clang/lib/Driver/ToolChain.cpp b/clang/lib/Driver/ToolChain.cpp
index 36d0ae34dec8..97317579c8a5 100644
--- a/clang/lib/Driver/ToolChain.cpp
+++ b/clang/lib/Driver/ToolChain.cpp
@@ -744,9 +744,12 @@ std::string ToolChain::buildCompilerRTBasename(const llvm::opt::ArgList &Args,
Suffix = IsITANMSVCWindows ? ".lib" : ".a";
break;
case ToolChain::FT_Shared:
- Suffix = TT.isOSWindows()
- ? (TT.isWindowsGNUEnvironment() ? ".dll.a" : ".lib")
- : ".so";
+ if (TT.isOSWindows())
+ Suffix = TT.isWindowsGNUEnvironment() ? ".dll.a" : ".lib";
+ else if (TT.isOSAIX())
+ Suffix = ".a";
+ else
+ Suffix = ".so";
break;
}
@@ -816,8 +819,7 @@ void ToolChain::addFortranRuntimeLibs(const ArgList &Args,
if (AsNeeded)
addAsNeededOption(*this, Args, CmdArgs, /*as_needed=*/false);
}
- CmdArgs.push_back("-lflang_rt.runtime");
- addArchSpecificRPath(*this, Args, CmdArgs);
+ addFlangRTLibPath(Args, CmdArgs);
// needs libexecinfo for backtrace functions
if (getTriple().isOSFreeBSD() || getTriple().isOSNetBSD() ||
@@ -850,6 +852,20 @@ void ToolChain::addFortranRuntimeLibraryPath(const llvm::opt::ArgList &Args,
CmdArgs.push_back(Args.MakeArgString("-L" + DefaultLibPath));
}
+void ToolChain::addFlangRTLibPath(const ArgList &Args,
+ llvm::opt::ArgStringList &CmdArgs) const {
+ // Link static flang_rt.runtime.a or shared flang_rt.runtime.so.
+ // On AIX, default to static flang-rt.
+ if (Args.hasFlag(options::OPT_static_libflangrt,
+ options::OPT_shared_libflangrt, getTriple().isOSAIX()))
+ CmdArgs.push_back(
+ getCompilerRTArgString(Args, "runtime", ToolChain::FT_Static, true));
+ else {
+ CmdArgs.push_back("-lflang_rt.runtime");
+ addArchSpecificRPath(*this, Args, CmdArgs);
+ }
+}
+
// Android target triples contain a target version. If we don't have libraries
// for the exact target version, we should fall back to the next newest version
// or a versionless path, if any.
diff --git a/clang/lib/Driver/ToolChains/AIX.cpp b/clang/lib/Driver/ToolChains/AIX.cpp
index 26b9d4c772be..5dc80bc5a3d2 100644
--- a/clang/lib/Driver/ToolChains/AIX.cpp
+++ b/clang/lib/Driver/ToolChains/AIX.cpp
@@ -608,14 +608,6 @@ void AIX::addProfileRTLibs(const llvm::opt::ArgList &Args,
ToolChain::addProfileRTLibs(Args, CmdArgs);
}
-void AIX::addFortranRuntimeLibs(const ArgList &Args,
- llvm::opt::ArgStringList &CmdArgs) const {
- // Link flang_rt.runtime.a. On AIX, the static and shared library are all
- // named .a
- CmdArgs.push_back(
- getCompilerRTArgString(Args, "runtime", ToolChain::FT_Static, true));
-}
-
ToolChain::CXXStdlibType AIX::GetDefaultCXXStdlibType() const {
return ToolChain::CST_Libcxx;
}
diff --git a/clang/lib/Driver/ToolChains/AIX.h b/clang/lib/Driver/ToolChains/AIX.h
index 17e8370cd121..8f130f6b5454 100644
--- a/clang/lib/Driver/ToolChains/AIX.h
+++ b/clang/lib/Driver/ToolChains/AIX.h
@@ -87,9 +87,6 @@ public:
void addProfileRTLibs(const llvm::opt::ArgList &Args,
llvm::opt::ArgStringList &CmdArgs) const override;
- void addFortranRuntimeLibs(const llvm::opt::ArgList &Args,
- llvm::opt::ArgStringList &CmdArgs) const override;
-
CXXStdlibType GetDefaultCXXStdlibType() const override;
RuntimeLibType GetDefaultRuntimeLibType() const override;
diff --git a/clang/lib/Driver/ToolChains/AMDGPU.cpp b/clang/lib/Driver/ToolChains/AMDGPU.cpp
index d1de3b91c352..35ca019795dd 100644
--- a/clang/lib/Driver/ToolChains/AMDGPU.cpp
+++ b/clang/lib/Driver/ToolChains/AMDGPU.cpp
@@ -65,6 +65,10 @@ void RocmInstallationDetector::scanLibDevicePath(llvm::StringRef Path) {
FiniteOnly.Off = FilePath;
} else if (BaseName == "oclc_finite_only_on") {
FiniteOnly.On = FilePath;
+ } else if (BaseName == "oclc_daz_opt_on") {
+ DenormalsAreZero.On = FilePath;
+ } else if (BaseName == "oclc_daz_opt_off") {
+ DenormalsAreZero.Off = FilePath;
} else if (BaseName == "oclc_correctly_rounded_sqrt_on") {
CorrectlyRoundedSqrt.On = FilePath;
} else if (BaseName == "oclc_correctly_rounded_sqrt_off") {
@@ -881,6 +885,10 @@ void ROCMToolChain::addClangTargetOptions(
return;
bool Wave64 = isWave64(DriverArgs, Kind);
+ // TODO: There are way too many flags that change this. Do we need to check
+ // them all?
+ bool DAZ = DriverArgs.hasArg(options::OPT_cl_denorms_are_zero) ||
+ getDefaultDenormsAreZeroForTarget(Kind);
bool FiniteOnly = DriverArgs.hasArg(options::OPT_cl_finite_math_only);
bool UnsafeMathOpt =
@@ -901,7 +909,7 @@ void ROCMToolChain::addClangTargetOptions(
// Add the generic set of libraries.
BCLibs.append(RocmInstallation->getCommonBitcodeLibs(
- DriverArgs, LibDeviceFile, Wave64, FiniteOnly, UnsafeMathOpt,
+ DriverArgs, LibDeviceFile, Wave64, DAZ, FiniteOnly, UnsafeMathOpt,
FastRelaxedMath, CorrectSqrt, ABIVer, GPUSan, false));
for (auto [BCFile, Internalize] : BCLibs) {
@@ -940,8 +948,9 @@ bool RocmInstallationDetector::checkCommonBitcodeLibs(
llvm::SmallVector<ToolChain::BitCodeLibraryInfo, 12>
RocmInstallationDetector::getCommonBitcodeLibs(
const llvm::opt::ArgList &DriverArgs, StringRef LibDeviceFile, bool Wave64,
- bool FiniteOnly, bool UnsafeMathOpt, bool FastRelaxedMath, bool CorrectSqrt,
- DeviceLibABIVersion ABIVer, bool GPUSan, bool isOpenMP) const {
+ bool DAZ, bool FiniteOnly, bool UnsafeMathOpt, bool FastRelaxedMath,
+ bool CorrectSqrt, DeviceLibABIVersion ABIVer, bool GPUSan,
+ bool isOpenMP) const {
llvm::SmallVector<ToolChain::BitCodeLibraryInfo, 12> BCLibs;
auto AddBCLib = [&](ToolChain::BitCodeLibraryInfo BCLib,
@@ -960,6 +969,7 @@ RocmInstallationDetector::getCommonBitcodeLibs(
AddBCLib(getOCKLPath());
else if (GPUSan && isOpenMP)
AddBCLib(getOCKLPath(), false);
+ AddBCLib(getDenormalsAreZeroPath(DAZ));
AddBCLib(getUnsafeMathPath(UnsafeMathOpt || FastRelaxedMath));
AddBCLib(getFiniteOnlyPath(FiniteOnly || FastRelaxedMath));
AddBCLib(getCorrectlyRoundedSqrtPath(CorrectSqrt));
@@ -987,6 +997,11 @@ ROCMToolChain::getCommonDeviceLibNames(const llvm::opt::ArgList &DriverArgs,
return {};
// If --hip-device-lib is not set, add the default bitcode libraries.
+ // TODO: There are way too many flags that change this. Do we need to check
+ // them all?
+ bool DAZ = DriverArgs.hasFlag(options::OPT_fgpu_flush_denormals_to_zero,
+ options::OPT_fno_gpu_flush_denormals_to_zero,
+ getDefaultDenormsAreZeroForTarget(Kind));
bool FiniteOnly = DriverArgs.hasFlag(
options::OPT_ffinite_math_only, options::OPT_fno_finite_math_only, false);
bool UnsafeMathOpt =
@@ -1006,7 +1021,7 @@ ROCMToolChain::getCommonDeviceLibNames(const llvm::opt::ArgList &DriverArgs,
getSanitizerArgs(DriverArgs).needsAsanRt();
return RocmInstallation->getCommonBitcodeLibs(
- DriverArgs, LibDeviceFile, Wave64, FiniteOnly, UnsafeMathOpt,
+ DriverArgs, LibDeviceFile, Wave64, DAZ, FiniteOnly, UnsafeMathOpt,
FastRelaxedMath, CorrectSqrt, ABIVer, GPUSan, isOpenMP);
}
diff --git a/clang/lib/Driver/ToolChains/Arch/ARM.cpp b/clang/lib/Driver/ToolChains/Arch/ARM.cpp
index e50cb3836f2c..5084058b3fef 100644
--- a/clang/lib/Driver/ToolChains/Arch/ARM.cpp
+++ b/clang/lib/Driver/ToolChains/Arch/ARM.cpp
@@ -679,21 +679,17 @@ llvm::ARM::FPUKind arm::getARMTargetFeatures(const Driver &D,
CPUArgFPUKind != llvm::ARM::FK_INVALID ? CPUArgFPUKind : ArchArgFPUKind;
(void)llvm::ARM::getFPUFeatures(FPUKind, Features);
} else {
- bool Generic = true;
- if (!ForAS) {
- std::string CPU = arm::getARMTargetCPU(CPUName, ArchName, Triple);
- if (CPU != "generic")
- Generic = false;
- llvm::ARM::ArchKind ArchKind =
- arm::getLLVMArchKindForARM(CPU, ArchName, Triple);
- FPUKind = llvm::ARM::getDefaultFPU(CPU, ArchKind);
- (void)llvm::ARM::getFPUFeatures(FPUKind, Features);
- }
+ std::string CPU = arm::getARMTargetCPU(CPUName, ArchName, Triple);
+ bool Generic = CPU == "generic";
if (Generic && (Triple.isOSWindows() || Triple.isOSDarwin()) &&
getARMSubArchVersionNumber(Triple) >= 7) {
FPUKind = llvm::ARM::parseFPU("neon");
- (void)llvm::ARM::getFPUFeatures(FPUKind, Features);
+ } else {
+ llvm::ARM::ArchKind ArchKind =
+ arm::getLLVMArchKindForARM(CPU, ArchName, Triple);
+ FPUKind = llvm::ARM::getDefaultFPU(CPU, ArchKind);
}
+ (void)llvm::ARM::getFPUFeatures(FPUKind, Features);
}
// Now we've finished accumulating features from arch, cpu and fpu,
diff --git a/clang/lib/Driver/ToolChains/PPCLinux.cpp b/clang/lib/Driver/ToolChains/PPCLinux.cpp
index 575e88c6ab12..0ed0f91ad166 100644
--- a/clang/lib/Driver/ToolChains/PPCLinux.cpp
+++ b/clang/lib/Driver/ToolChains/PPCLinux.cpp
@@ -12,7 +12,6 @@
#include "clang/Driver/Options.h"
#include "llvm/Support/FileSystem.h"
#include "llvm/Support/Path.h"
-#include "llvm/Support/VirtualFileSystem.h"
using namespace clang::driver;
using namespace clang::driver::toolchains;
@@ -102,18 +101,3 @@ bool PPCLinuxToolChain::SupportIEEEFloat128(
return GlibcSupportsFloat128((Twine(D.DyldPrefix) + Linker).str()) &&
!(D.CCCIsCXX() && HasUnsupportedCXXLib);
}
-
-void PPCLinuxToolChain::addFortranRuntimeLibs(
- const ArgList &Args, llvm::opt::ArgStringList &CmdArgs) const {
- // Link static flang_rt.runtime.a or shared flang_rt.runtime.so
- const char *Path;
- if (getVFS().exists(Twine(Path = getCompilerRTArgString(
- Args, "runtime", ToolChain::FT_Static, true))))
- CmdArgs.push_back(Path);
- else if (getVFS().exists(
- Twine(Path = getCompilerRTArgString(
- Args, "runtime", ToolChain::FT_Shared, true))))
- CmdArgs.push_back(Path);
- else
- CmdArgs.push_back("-lflang_rt.runtime");
-}
diff --git a/clang/lib/Driver/ToolChains/PPCLinux.h b/clang/lib/Driver/ToolChains/PPCLinux.h
index 910df3d16e6a..63adaff6be9c 100644
--- a/clang/lib/Driver/ToolChains/PPCLinux.h
+++ b/clang/lib/Driver/ToolChains/PPCLinux.h
@@ -24,9 +24,6 @@ public:
AddClangSystemIncludeArgs(const llvm::opt::ArgList &DriverArgs,
llvm::opt::ArgStringList &CC1Args) const override;
- void addFortranRuntimeLibs(const llvm::opt::ArgList &Args,
- llvm::opt::ArgStringList &CmdArgs) const override;
-
private:
bool SupportIEEEFloat128(const Driver &D, const llvm::Triple &Triple,
const llvm::opt::ArgList &Args) const;
diff --git a/clang/lib/Driver/ToolChains/ROCm.h b/clang/lib/Driver/ToolChains/ROCm.h
index f002b386e11c..2a09da011489 100644
--- a/clang/lib/Driver/ToolChains/ROCm.h
+++ b/clang/lib/Driver/ToolChains/ROCm.h
@@ -137,6 +137,7 @@ private:
ConditionalLibrary WavefrontSize64;
ConditionalLibrary FiniteOnly;
ConditionalLibrary UnsafeMath;
+ ConditionalLibrary DenormalsAreZero;
ConditionalLibrary CorrectlyRoundedSqrt;
// Maps ABI version to library path. The version number is in the format of
@@ -151,7 +152,8 @@ private:
bool allGenericLibsValid() const {
return !OCML.empty() && !OCKL.empty() && !OpenCL.empty() &&
WavefrontSize64.isValid() && FiniteOnly.isValid() &&
- UnsafeMath.isValid() && CorrectlyRoundedSqrt.isValid();
+ UnsafeMath.isValid() && DenormalsAreZero.isValid() &&
+ CorrectlyRoundedSqrt.isValid();
}
void scanLibDevicePath(llvm::StringRef Path);
@@ -173,12 +175,11 @@ public:
/// Get file paths of default bitcode libraries common to AMDGPU based
/// toolchains.
- llvm::SmallVector<ToolChain::BitCodeLibraryInfo, 12>
- getCommonBitcodeLibs(const llvm::opt::ArgList &DriverArgs,
- StringRef LibDeviceFile, bool Wave64, bool FiniteOnly,
- bool UnsafeMathOpt, bool FastRelaxedMath,
- bool CorrectSqrt, DeviceLibABIVersion ABIVer,
- bool GPUSan, bool isOpenMP) const;
+ llvm::SmallVector<ToolChain::BitCodeLibraryInfo, 12> getCommonBitcodeLibs(
+ const llvm::opt::ArgList &DriverArgs, StringRef LibDeviceFile,
+ bool Wave64, bool DAZ, bool FiniteOnly, bool UnsafeMathOpt,
+ bool FastRelaxedMath, bool CorrectSqrt, DeviceLibABIVersion ABIVer,
+ bool GPUSan, bool isOpenMP) const;
/// Check file paths of default bitcode libraries common to AMDGPU based
/// toolchains. \returns false if there are invalid or missing files.
bool checkCommonBitcodeLibs(StringRef GPUArch, StringRef LibDeviceFile,
@@ -244,6 +245,10 @@ public:
return UnsafeMath.get(Enabled);
}
+ StringRef getDenormalsAreZeroPath(bool Enabled) const {
+ return DenormalsAreZero.get(Enabled);
+ }
+
StringRef getCorrectlyRoundedSqrtPath(bool Enabled) const {
return CorrectlyRoundedSqrt.get(Enabled);
}
diff --git a/clang/lib/Format/TokenAnnotator.cpp b/clang/lib/Format/TokenAnnotator.cpp
index 82dc403538c4..ef5f07e2c62e 100644
--- a/clang/lib/Format/TokenAnnotator.cpp
+++ b/clang/lib/Format/TokenAnnotator.cpp
@@ -5569,7 +5569,8 @@ static bool isAllmanBrace(const FormatToken &Tok) {
// Returns 'true' if 'Tok' is a function argument.
static bool IsFunctionArgument(const FormatToken &Tok) {
return Tok.MatchingParen && Tok.MatchingParen->Next &&
- Tok.MatchingParen->Next->isOneOf(tok::comma, tok::r_paren);
+ Tok.MatchingParen->Next->isOneOf(tok::comma, tok::r_paren,
+ tok::r_brace);
}
static bool
diff --git a/clang/lib/Format/WhitespaceManager.cpp b/clang/lib/Format/WhitespaceManager.cpp
index b1e43f0313db..cfaadf07edfd 100644
--- a/clang/lib/Format/WhitespaceManager.cpp
+++ b/clang/lib/Format/WhitespaceManager.cpp
@@ -1012,13 +1012,8 @@ void WhitespaceManager::alignConsecutiveDeclarations() {
AlignTokens(
Style,
[&](Change const &C) {
- if (Style.AlignConsecutiveDeclarations.AlignFunctionPointers) {
- for (const auto *Prev = C.Tok->Previous; Prev; Prev = Prev->Previous)
- if (Prev->is(tok::equal))
- return false;
- if (C.Tok->is(TT_FunctionTypeLParen))
- return true;
- }
+ if (C.Tok->is(TT_FunctionTypeLParen))
+ return Style.AlignConsecutiveDeclarations.AlignFunctionPointers;
if (C.Tok->is(TT_FunctionDeclarationName))
return Style.AlignConsecutiveDeclarations.AlignFunctionDeclarations;
if (C.Tok->isNot(TT_StartOfName))
diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp
index bffd0dd461d3..13bc2bd14621 100644
--- a/clang/lib/Sema/SemaChecking.cpp
+++ b/clang/lib/Sema/SemaChecking.cpp
@@ -2762,6 +2762,8 @@ Sema::CheckBuiltinFunctionCall(FunctionDecl *FDecl, unsigned BuiltinID,
// These builtins restrict the element type to floating point
// types only, and take in two arguments.
+ case Builtin::BI__builtin_elementwise_minnum:
+ case Builtin::BI__builtin_elementwise_maxnum:
case Builtin::BI__builtin_elementwise_minimum:
case Builtin::BI__builtin_elementwise_maximum:
case Builtin::BI__builtin_elementwise_atan2:
diff --git a/clang/lib/Sema/SemaRISCV.cpp b/clang/lib/Sema/SemaRISCV.cpp
index 746609604d1b..b9f843b1920a 100644
--- a/clang/lib/Sema/SemaRISCV.cpp
+++ b/clang/lib/Sema/SemaRISCV.cpp
@@ -232,8 +232,7 @@ void RISCVIntrinsicManagerImpl::ConstructRVVIntrinsics(
for (auto &Record : Recs) {
// Check requirements.
if (llvm::any_of(FeatureCheckList, [&](const auto &Item) {
- return ((Record.RequiredExtensions[Item.second / 32] &
- (1U << (Item.second % 32))) != 0) &&
+ return Record.RequiredExtensions[Item.second] &&
!TI.hasFeature(Item.first);
}))
continue;
diff --git a/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp b/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp
index 108d7e1dbaeb..5c80077f294c 100644
--- a/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp
+++ b/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp
@@ -1280,12 +1280,10 @@ void OpenACCDeclClauseInstantiator::VisitDevicePtrClause(
const OpenACCDevicePtrClause &C) {
llvm::SmallVector<Expr *> VarList = VisitVarList(C.getVarList());
// Ensure each var is a pointer type.
- VarList.erase(std::remove_if(VarList.begin(), VarList.end(),
- [&](Expr *E) {
- return SemaRef.OpenACC().CheckVarIsPointerType(
- OpenACCClauseKind::DevicePtr, E);
- }),
- VarList.end());
+ llvm::erase_if(VarList, [&](Expr *E) {
+ return SemaRef.OpenACC().CheckVarIsPointerType(OpenACCClauseKind::DevicePtr,
+ E);
+ });
ParsedClause.setVarListDetails(VarList, OpenACCModifierKind::Invalid);
if (SemaRef.OpenACC().CheckDeclareClause(ParsedClause,
OpenACCModifierKind::Invalid))
diff --git a/clang/lib/Sema/TreeTransform.h b/clang/lib/Sema/TreeTransform.h
index 21e250e172d5..bb58ec49612c 100644
--- a/clang/lib/Sema/TreeTransform.h
+++ b/clang/lib/Sema/TreeTransform.h
@@ -11992,13 +11992,10 @@ void OpenACCClauseTransform<Derived>::VisitDetachClause(
llvm::SmallVector<Expr *> VarList = VisitVarList(C.getVarList());
// Ensure each var is a pointer type.
- VarList.erase(
- std::remove_if(VarList.begin(), VarList.end(),
- [&](Expr *E) {
- return Self.getSema().OpenACC().CheckVarIsPointerType(
- OpenACCClauseKind::Detach, E);
- }),
- VarList.end());
+ llvm::erase_if(VarList, [&](Expr *E) {
+ return Self.getSema().OpenACC().CheckVarIsPointerType(
+ OpenACCClauseKind::Detach, E);
+ });
ParsedClause.setVarListDetails(VarList, OpenACCModifierKind::Invalid);
NewClause = OpenACCDetachClause::Create(
diff --git a/clang/lib/Serialization/ASTWriter.cpp b/clang/lib/Serialization/ASTWriter.cpp
index a48c05061626..95b5718f1d14 100644
--- a/clang/lib/Serialization/ASTWriter.cpp
+++ b/clang/lib/Serialization/ASTWriter.cpp
@@ -1493,42 +1493,45 @@ void ASTWriter::WriteControlBlock(Preprocessor &PP, StringRef isysroot) {
unsigned AbbrevCode = Stream.EmitAbbrev(std::move(Abbrev));
RecordData::value_type Record[] = {MODULE_NAME};
Stream.EmitRecordWithBlob(AbbrevCode, Record, WritingModule->Name);
- }
- if (WritingModule && WritingModule->Directory) {
- SmallString<128> BaseDir;
- if (PP.getHeaderSearchInfo().getHeaderSearchOpts().ModuleFileHomeIsCwd) {
- // Use the current working directory as the base path for all inputs.
- auto CWD = FileMgr.getOptionalDirectoryRef(".");
- BaseDir.assign(CWD->getName());
- } else {
- BaseDir.assign(WritingModule->Directory->getName());
- }
- cleanPathForOutput(FileMgr, BaseDir);
-
- // If the home of the module is the current working directory, then we
- // want to pick up the cwd of the build process loading the module, not
- // our cwd, when we load this module.
- if (!PP.getHeaderSearchInfo().getHeaderSearchOpts().ModuleFileHomeIsCwd &&
- (!PP.getHeaderSearchInfo()
- .getHeaderSearchOpts()
- .ModuleMapFileHomeIsCwd ||
- WritingModule->Directory->getName() != ".")) {
- // Module directory.
- auto Abbrev = std::make_shared<BitCodeAbbrev>();
- Abbrev->Add(BitCodeAbbrevOp(MODULE_DIRECTORY));
- Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob)); // Directory
- unsigned AbbrevCode = Stream.EmitAbbrev(std::move(Abbrev));
+ auto BaseDir = [&]() -> std::optional<SmallString<128>> {
+ if (PP.getHeaderSearchInfo().getHeaderSearchOpts().ModuleFileHomeIsCwd) {
+ // Use the current working directory as the base path for all inputs.
+ auto CWD = FileMgr.getOptionalDirectoryRef(".");
+ return CWD->getName();
+ }
+ if (WritingModule->Directory) {
+ return WritingModule->Directory->getName();
+ }
+ return std::nullopt;
+ }();
+ if (BaseDir) {
+ cleanPathForOutput(FileMgr, *BaseDir);
+
+ // If the home of the module is the current working directory, then we
+ // want to pick up the cwd of the build process loading the module, not
+ // our cwd, when we load this module.
+ if (!PP.getHeaderSearchInfo().getHeaderSearchOpts().ModuleFileHomeIsCwd &&
+ (!PP.getHeaderSearchInfo()
+ .getHeaderSearchOpts()
+ .ModuleMapFileHomeIsCwd ||
+ WritingModule->Directory->getName() != ".")) {
+ // Module directory.
+ auto Abbrev = std::make_shared<BitCodeAbbrev>();
+ Abbrev->Add(BitCodeAbbrevOp(MODULE_DIRECTORY));
+ Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob)); // Directory
+ unsigned AbbrevCode = Stream.EmitAbbrev(std::move(Abbrev));
+
+ RecordData::value_type Record[] = {MODULE_DIRECTORY};
+ Stream.EmitRecordWithBlob(AbbrevCode, Record, *BaseDir);
+ }
- RecordData::value_type Record[] = {MODULE_DIRECTORY};
- Stream.EmitRecordWithBlob(AbbrevCode, Record, BaseDir);
+ // Write out all other paths relative to the base directory if possible.
+ BaseDirectory.assign(BaseDir->begin(), BaseDir->end());
+ } else if (!isysroot.empty()) {
+ // Write out paths relative to the sysroot if possible.
+ BaseDirectory = std::string(isysroot);
}
-
- // Write out all other paths relative to the base directory if possible.
- BaseDirectory.assign(BaseDir.begin(), BaseDir.end());
- } else if (!isysroot.empty()) {
- // Write out paths relative to the sysroot if possible.
- BaseDirectory = std::string(isysroot);
}
// Module map file
diff --git a/clang/lib/Support/RISCVVIntrinsicUtils.cpp b/clang/lib/Support/RISCVVIntrinsicUtils.cpp
index e44fbb018183..6378596ef31e 100644
--- a/clang/lib/Support/RISCVVIntrinsicUtils.cpp
+++ b/clang/lib/Support/RISCVVIntrinsicUtils.cpp
@@ -1196,36 +1196,91 @@ SmallVector<PrototypeDescriptor> parsePrototypes(StringRef Prototypes) {
return PrototypeDescriptors;
}
+#define STRINGIFY(NAME) \
+ case NAME: \
+ OS << #NAME; \
+ break;
+
+llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, enum PolicyScheme PS) {
+ switch (PS) {
+ STRINGIFY(SchemeNone)
+ STRINGIFY(HasPassthruOperand)
+ STRINGIFY(HasPolicyOperand)
+ }
+ return OS;
+}
+
+llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, enum RVVRequire Require) {
+ switch (Require) {
+ STRINGIFY(RVV_REQ_RV64)
+ STRINGIFY(RVV_REQ_Zvfhmin)
+ STRINGIFY(RVV_REQ_Xsfvcp)
+ STRINGIFY(RVV_REQ_Xsfvfnrclipxfqf)
+ STRINGIFY(RVV_REQ_Xsfvfwmaccqqq)
+ STRINGIFY(RVV_REQ_Xsfvqmaccdod)
+ STRINGIFY(RVV_REQ_Xsfvqmaccqoq)
+ STRINGIFY(RVV_REQ_Zvbb)
+ STRINGIFY(RVV_REQ_Zvbc)
+ STRINGIFY(RVV_REQ_Zvkb)
+ STRINGIFY(RVV_REQ_Zvkg)
+ STRINGIFY(RVV_REQ_Zvkned)
+ STRINGIFY(RVV_REQ_Zvknha)
+ STRINGIFY(RVV_REQ_Zvknhb)
+ STRINGIFY(RVV_REQ_Zvksed)
+ STRINGIFY(RVV_REQ_Zvksh)
+ STRINGIFY(RVV_REQ_Zvfbfwma)
+ STRINGIFY(RVV_REQ_Zvfbfmin)
+ STRINGIFY(RVV_REQ_Zvfh)
+ STRINGIFY(RVV_REQ_Experimental)
+ default:
+ llvm_unreachable("Unsupported RVVRequire!");
+ break;
+ }
+ return OS;
+}
+
+#undef STRINGIFY
+
+llvm::raw_ostream &operator<<(llvm::raw_ostream &OS,
+ const RequiredExtensionBits &Exts) {
+ OS << "{";
+ ListSeparator LS;
+ for (unsigned I = 0; I < RVV_REQ_NUM; I++)
+ if (Exts[I])
+ OS << LS << static_cast<RVVRequire>(I);
+ OS << "}";
+ return OS;
+}
+
raw_ostream &operator<<(raw_ostream &OS, const RVVIntrinsicRecord &Record) {
OS << "{";
- OS << "\"" << Record.Name << "\",";
+ OS << "/*Name=*/\"" << Record.Name << "\", ";
if (Record.OverloadedName == nullptr ||
StringRef(Record.OverloadedName).empty())
- OS << "nullptr,";
+ OS << "/*OverloadedName=*/nullptr, ";
else
- OS << "\"" << Record.OverloadedName << "\",";
- OS << "{";
- for (uint32_t Exts : Record.RequiredExtensions)
- OS << Exts << ',';
- OS << "},";
- OS << Record.PrototypeIndex << ",";
- OS << Record.SuffixIndex << ",";
- OS << Record.OverloadedSuffixIndex << ",";
- OS << (int)Record.PrototypeLength << ",";
- OS << (int)Record.SuffixLength << ",";
- OS << (int)Record.OverloadedSuffixSize << ",";
- OS << (int)Record.TypeRangeMask << ",";
- OS << (int)Record.Log2LMULMask << ",";
- OS << (int)Record.NF << ",";
- OS << (int)Record.HasMasked << ",";
- OS << (int)Record.HasVL << ",";
- OS << (int)Record.HasMaskedOffOperand << ",";
- OS << (int)Record.HasTailPolicy << ",";
- OS << (int)Record.HasMaskPolicy << ",";
- OS << (int)Record.HasFRMRoundModeOp << ",";
- OS << (int)Record.IsTuple << ",";
- OS << (int)Record.UnMaskedPolicyScheme << ",";
- OS << (int)Record.MaskedPolicyScheme << ",";
+ OS << "/*OverloadedName=*/\"" << Record.OverloadedName << "\", ";
+ OS << "/*RequiredExtensions=*/" << Record.RequiredExtensions << ", ";
+ OS << "/*PrototypeIndex=*/" << Record.PrototypeIndex << ", ";
+ OS << "/*SuffixIndex=*/" << Record.SuffixIndex << ", ";
+ OS << "/*OverloadedSuffixIndex=*/" << Record.OverloadedSuffixIndex << ", ";
+ OS << "/*PrototypeLength=*/" << (int)Record.PrototypeLength << ", ";
+ OS << "/*SuffixLength=*/" << (int)Record.SuffixLength << ", ";
+ OS << "/*OverloadedSuffixSize=*/" << (int)Record.OverloadedSuffixSize << ", ";
+ OS << "/*TypeRangeMask=*/" << (int)Record.TypeRangeMask << ", ";
+ OS << "/*Log2LMULMask=*/" << (int)Record.Log2LMULMask << ", ";
+ OS << "/*NF=*/" << (int)Record.NF << ", ";
+ OS << "/*HasMasked=*/" << (int)Record.HasMasked << ", ";
+ OS << "/*HasVL=*/" << (int)Record.HasVL << ", ";
+ OS << "/*HasMaskedOffOperand=*/" << (int)Record.HasMaskedOffOperand << ", ";
+ OS << "/*HasTailPolicy=*/" << (int)Record.HasTailPolicy << ", ";
+ OS << "/*HasMaskPolicy=*/" << (int)Record.HasMaskPolicy << ", ";
+ OS << "/*HasFRMRoundModeOp=*/" << (int)Record.HasFRMRoundModeOp << ", ";
+ OS << "/*IsTuple=*/" << (int)Record.IsTuple << ", ";
+ OS << "/*UnMaskedPolicyScheme=*/" << (PolicyScheme)Record.UnMaskedPolicyScheme
+ << ", ";
+ OS << "/*MaskedPolicyScheme=*/" << (PolicyScheme)Record.MaskedPolicyScheme
+ << ", ";
OS << "},\n";
return OS;
}
diff --git a/clang/test/AST/ByteCode/codegen.cpp b/clang/test/AST/ByteCode/codegen.cpp
index ea2c812f30f6..7c853a20362b 100644
--- a/clang/test/AST/ByteCode/codegen.cpp
+++ b/clang/test/AST/ByteCode/codegen.cpp
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -triple x86_64-linux -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -triple x86_64-linux -emit-llvm -o - %s | FileCheck %s
// RUN: %clang_cc1 -triple x86_64-linux -emit-llvm -o - %s -fexperimental-new-constant-interpreter | FileCheck %s
#ifdef __SIZEOF_INT128__
@@ -95,3 +95,12 @@ void f(A *a) {
// CHECK: call void @_ZN1AD1Ev(
A::E e3 = A().Foo;
}
+
+int notdead() {
+ auto l = [c=0]() mutable {
+ return c++ < 5 ? 10 : 12;
+ };
+ return l();
+}
+// CHECK: _ZZ7notdeadvEN3$_0clEv
+// CHECK: ret i32 %cond
diff --git a/clang/test/CodeGen/AArch64/sve-acle-__ARM_FEATURE_SVE_VECTOR_OPERATORS.c b/clang/test/CodeGen/AArch64/sve-acle-__ARM_FEATURE_SVE_VECTOR_OPERATORS.c
index bbed683ac1fd..c3d0541229fa 100644
--- a/clang/test/CodeGen/AArch64/sve-acle-__ARM_FEATURE_SVE_VECTOR_OPERATORS.c
+++ b/clang/test/CodeGen/AArch64/sve-acle-__ARM_FEATURE_SVE_VECTOR_OPERATORS.c
@@ -52,9 +52,8 @@ vec2048 x2048 = {0, 1, 2, 3, 3 , 2 , 1, 0, 0, 1, 2, 3, 3 , 2 , 1, 0,
typedef int8_t vec_int8 __attribute__((vector_size(N / 8)));
// CHECK128-LABEL: define{{.*}} <16 x i8> @f2(<16 x i8> noundef %x)
// CHECK128-NEXT: entry:
-// CHECK128-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
// CHECK128-NEXT: [[CASTSCALABLESVE:%.*]] = tail call <vscale x 16 x i8> @llvm.vector.insert.nxv16i8.v16i8(<vscale x 16 x i8> poison, <16 x i8> [[X:%.*]], i64 0)
-// CHECK128-NEXT: [[TMP1:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.asrd.nxv16i8(<vscale x 16 x i1> [[TMP0]], <vscale x 16 x i8> [[CASTSCALABLESVE]], i32 1)
+// CHECK128-NEXT: [[TMP1:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.asrd.nxv16i8(<vscale x 16 x i1> splat (i1 true), <vscale x 16 x i8> [[CASTSCALABLESVE]], i32 1)
// CHECK128-NEXT: [[CASTFIXEDSVE:%.*]] = tail call <16 x i8> @llvm.vector.extract.v16i8.nxv16i8(<vscale x 16 x i8> [[TMP1]], i64 0)
// CHECK128-NEXT: ret <16 x i8> [[CASTFIXEDSVE]]
@@ -62,9 +61,8 @@ typedef int8_t vec_int8 __attribute__((vector_size(N / 8)));
// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<[[#div(VBITS,8)]] x i8>) align 16 captures(none) initializes((0, [[#div(VBITS,8)]])) %agg.result, ptr noundef readonly captures(none) %0)
// CHECK-NEXT: entry:
// CHECK-NEXT: [[X:%.*]] = load <[[#div(VBITS,8)]] x i8>, ptr [[TMP0:%.*]], align 16, [[TBAA6:!tbaa !.*]]
-// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = tail call <vscale x 16 x i8> @llvm.vector.insert.nxv16i8.v[[#div(VBITS,8)]]i8(<vscale x 16 x i8> poison, <[[#div(VBITS,8)]] x i8> [[X]], i64 0)
-// CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.asrd.nxv16i8(<vscale x 16 x i1> [[TMP1]], <vscale x 16 x i8> [[CASTSCALABLESVE]], i32 1)
+// CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.asrd.nxv16i8(<vscale x 16 x i1> splat (i1 true), <vscale x 16 x i8> [[CASTSCALABLESVE]], i32 1)
// CHECK-NEXT: [[CASTFIXEDSVE:%.*]] = tail call <[[#div(VBITS,8)]] x i8> @llvm.vector.extract.v[[#div(VBITS,8)]]i8.nxv16i8(<vscale x 16 x i8> [[TMP2]], i64 0)
// CHECK-NEXT: store <[[#div(VBITS,8)]] x i8> [[CASTFIXEDSVE]], ptr [[AGG_RESULT:%.*]], align 16, [[TBAA6]]
// CHECK-NEXT: ret void
diff --git a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_rdffr.c b/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_rdffr.c
index 6bf56bdea505..ca3480d62725 100644
--- a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_rdffr.c
+++ b/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_rdffr.c
@@ -7,14 +7,12 @@
// CHECK-LABEL: @test_svrdffr(
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
-// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.rdffr.z(<vscale x 16 x i1> [[TMP0]])
+// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.rdffr.z(<vscale x 16 x i1> splat (i1 true))
// CHECK-NEXT: ret <vscale x 16 x i1> [[TMP1]]
//
// CPP-CHECK-LABEL: @_Z12test_svrdffrv(
// CPP-CHECK-NEXT: entry:
-// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
-// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.rdffr.z(<vscale x 16 x i1> [[TMP0]])
+// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.rdffr.z(<vscale x 16 x i1> splat (i1 true))
// CPP-CHECK-NEXT: ret <vscale x 16 x i1> [[TMP1]]
//
svbool_t test_svrdffr()
diff --git a/clang/test/CodeGen/attr-target-x86.c b/clang/test/CodeGen/attr-target-x86.c
index c92aad633082..e5067c1c3b07 100644
--- a/clang/test/CodeGen/attr-target-x86.c
+++ b/clang/test/CodeGen/attr-target-x86.c
@@ -56,7 +56,7 @@ void f_default2(void) {
__attribute__((target("avx, sse4.2, arch= ivybridge")))
void f_avx_sse4_2_ivybridge_2(void) {}
-// CHECK: [[f_no_aes_ivybridge]] = {{.*}}"target-cpu"="ivybridge" "target-features"="+avx,+cmov,+crc32,+cx16,+cx8,+f16c,+fsgsbase,+fxsr,+mmx,+pclmul,+popcnt,+rdrnd,+sahf,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsaveopt,-aes,-amx-avx512,-avx10.1-256,-avx10.1-512,-avx10.2-256,-avx10.2-512,-vaes"
+// CHECK: [[f_no_aes_ivybridge]] = {{.*}}"target-cpu"="ivybridge" "target-features"="+avx,+cmov,+crc32,+cx16,+cx8,+f16c,+fsgsbase,+fxsr,+mmx,+pclmul,+popcnt,+rdrnd,+sahf,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsaveopt,-aes,-vaes"
__attribute__((target("no-aes, arch=ivybridge")))
void f_no_aes_ivybridge(void) {}
@@ -98,11 +98,11 @@ void f_x86_64_v3(void) {}
__attribute__((target("arch=x86-64-v4")))
void f_x86_64_v4(void) {}
-// CHECK: [[f_avx10_1_256]] = {{.*}}"target-cpu"="i686" "target-features"="+aes,+avx,+avx10.1-256,+avx2,+avx512bf16,+avx512bitalg,+avx512bw,+avx512cd,+avx512dq,+avx512f,+avx512fp16,+avx512ifma,+avx512vbmi,+avx512vbmi2,+avx512vl,+avx512vnni,+avx512vpopcntdq,+cmov,+crc32,+cx8,+f16c,+fma,+mmx,+pclmul,+popcnt,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+vaes,+vpclmulqdq,+x87,+xsave,-amx-avx512,-avx10.1-512,-avx10.2-512,-evex512"
+// CHECK: [[f_avx10_1_256]] = {{.*}}"target-cpu"="i686" "target-features"="+avx,+avx10.1-256,+avx2,+avx512bf16,+avx512bitalg,+avx512bw,+avx512cd,+avx512dq,+avx512f,+avx512fp16,+avx512ifma,+avx512vbmi,+avx512vbmi2,+avx512vl,+avx512vnni,+avx512vpopcntdq,+cmov,+crc32,+cx8,+f16c,+fma,+mmx,+popcnt,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,-amx-avx512,-avx10.1-512,-avx10.2-512,-evex512"
__attribute__((target("avx10.1-256")))
void f_avx10_1_256(void) {}
-// CHECK: [[f_avx10_1_512]] = {{.*}}"target-cpu"="i686" "target-features"="+aes,+avx,+avx10.1-256,+avx10.1-512,+avx2,+avx512bf16,+avx512bitalg,+avx512bw,+avx512cd,+avx512dq,+avx512f,+avx512fp16,+avx512ifma,+avx512vbmi,+avx512vbmi2,+avx512vl,+avx512vnni,+avx512vpopcntdq,+cmov,+crc32,+cx8,+evex512,+f16c,+fma,+mmx,+pclmul,+popcnt,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+vaes,+vpclmulqdq,+x87,+xsave"
+// CHECK: [[f_avx10_1_512]] = {{.*}}"target-cpu"="i686" "target-features"="+avx,+avx10.1-256,+avx10.1-512,+avx2,+avx512bf16,+avx512bitalg,+avx512bw,+avx512cd,+avx512dq,+avx512f,+avx512fp16,+avx512ifma,+avx512vbmi,+avx512vbmi2,+avx512vl,+avx512vnni,+avx512vpopcntdq,+cmov,+crc32,+cx8,+evex512,+f16c,+fma,+mmx,+popcnt,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave"
__attribute__((target("avx10.1-512")))
void f_avx10_1_512(void) {}
@@ -112,4 +112,4 @@ void f_prefer_256_bit(void) {}
// CHECK: [[f_no_prefer_256_bit]] = {{.*}}"target-features"="{{.*}}-prefer-256-bit
__attribute__((target("no-prefer-256-bit")))
-void f_no_prefer_256_bit(void) {} \ No newline at end of file
+void f_no_prefer_256_bit(void) {}
diff --git a/clang/test/CodeGen/builtin-maxnum-minnum.c b/clang/test/CodeGen/builtin-maxnum-minnum.c
new file mode 100644
index 000000000000..69cec72495d3
--- /dev/null
+++ b/clang/test/CodeGen/builtin-maxnum-minnum.c
@@ -0,0 +1,171 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5
+// RUN: %clang_cc1 -x c++ -std=c++20 -disable-llvm-passes -O3 -triple x86_64 %s -emit-llvm -o - | FileCheck %s --check-prefix=CHECK
+
+typedef _Float16 half8 __attribute__((ext_vector_type(8)));
+typedef __bf16 bf16x8 __attribute__((ext_vector_type(8)));
+typedef float float4 __attribute__((ext_vector_type(4)));
+typedef double double2 __attribute__((ext_vector_type(2)));
+typedef long double ldouble2 __attribute__((ext_vector_type(2)));
+
+// CHECK-LABEL: define dso_local noundef <8 x half> @_Z7pfmin16Dv8_DF16_S_(
+// CHECK-SAME: <8 x half> noundef [[A:%.*]], <8 x half> noundef [[B:%.*]]) #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[A_ADDR:%.*]] = alloca <8 x half>, align 16
+// CHECK-NEXT: [[B_ADDR:%.*]] = alloca <8 x half>, align 16
+// CHECK-NEXT: store <8 x half> [[A]], ptr [[A_ADDR]], align 16, !tbaa [[TBAA2:![0-9]+]]
+// CHECK-NEXT: store <8 x half> [[B]], ptr [[B_ADDR]], align 16, !tbaa [[TBAA2]]
+// CHECK-NEXT: [[TMP0:%.*]] = load <8 x half>, ptr [[A_ADDR]], align 16, !tbaa [[TBAA2]]
+// CHECK-NEXT: [[TMP1:%.*]] = load <8 x half>, ptr [[B_ADDR]], align 16, !tbaa [[TBAA2]]
+// CHECK-NEXT: [[ELT_MINNUM:%.*]] = call <8 x half> @llvm.minnum.v8f16(<8 x half> [[TMP0]], <8 x half> [[TMP1]])
+// CHECK-NEXT: ret <8 x half> [[ELT_MINNUM]]
+//
+half8 pfmin16(half8 a, half8 b) {
+ return __builtin_elementwise_minnum(a, b);
+}
+// CHECK-LABEL: define dso_local noundef <8 x bfloat> @_Z8pfmin16bDv8_DF16bS_(
+// CHECK-SAME: <8 x bfloat> noundef [[A:%.*]], <8 x bfloat> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[A_ADDR:%.*]] = alloca <8 x bfloat>, align 16
+// CHECK-NEXT: [[B_ADDR:%.*]] = alloca <8 x bfloat>, align 16
+// CHECK-NEXT: store <8 x bfloat> [[A]], ptr [[A_ADDR]], align 16, !tbaa [[TBAA2]]
+// CHECK-NEXT: store <8 x bfloat> [[B]], ptr [[B_ADDR]], align 16, !tbaa [[TBAA2]]
+// CHECK-NEXT: [[TMP0:%.*]] = load <8 x bfloat>, ptr [[A_ADDR]], align 16, !tbaa [[TBAA2]]
+// CHECK-NEXT: [[TMP1:%.*]] = load <8 x bfloat>, ptr [[B_ADDR]], align 16, !tbaa [[TBAA2]]
+// CHECK-NEXT: [[ELT_MINNUM:%.*]] = call <8 x bfloat> @llvm.minnum.v8bf16(<8 x bfloat> [[TMP0]], <8 x bfloat> [[TMP1]])
+// CHECK-NEXT: ret <8 x bfloat> [[ELT_MINNUM]]
+//
+bf16x8 pfmin16b(bf16x8 a, bf16x8 b) {
+ return __builtin_elementwise_minnum(a, b);
+}
+// CHECK-LABEL: define dso_local noundef <4 x float> @_Z7pfmin32Dv4_fS_(
+// CHECK-SAME: <4 x float> noundef [[A:%.*]], <4 x float> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[A_ADDR:%.*]] = alloca <4 x float>, align 16
+// CHECK-NEXT: [[B_ADDR:%.*]] = alloca <4 x float>, align 16
+// CHECK-NEXT: store <4 x float> [[A]], ptr [[A_ADDR]], align 16, !tbaa [[TBAA2]]
+// CHECK-NEXT: store <4 x float> [[B]], ptr [[B_ADDR]], align 16, !tbaa [[TBAA2]]
+// CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A_ADDR]], align 16, !tbaa [[TBAA2]]
+// CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, ptr [[B_ADDR]], align 16, !tbaa [[TBAA2]]
+// CHECK-NEXT: [[ELT_MINNUM:%.*]] = call <4 x float> @llvm.minnum.v4f32(<4 x float> [[TMP0]], <4 x float> [[TMP1]])
+// CHECK-NEXT: ret <4 x float> [[ELT_MINNUM]]
+//
+float4 pfmin32(float4 a, float4 b) {
+ return __builtin_elementwise_minnum(a, b);
+}
+// CHECK-LABEL: define dso_local noundef <2 x double> @_Z7pfmin64Dv2_dS_(
+// CHECK-SAME: <2 x double> noundef [[A:%.*]], <2 x double> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[A_ADDR:%.*]] = alloca <2 x double>, align 16
+// CHECK-NEXT: [[B_ADDR:%.*]] = alloca <2 x double>, align 16
+// CHECK-NEXT: store <2 x double> [[A]], ptr [[A_ADDR]], align 16, !tbaa [[TBAA2]]
+// CHECK-NEXT: store <2 x double> [[B]], ptr [[B_ADDR]], align 16, !tbaa [[TBAA2]]
+// CHECK-NEXT: [[TMP0:%.*]] = load <2 x double>, ptr [[A_ADDR]], align 16, !tbaa [[TBAA2]]
+// CHECK-NEXT: [[TMP1:%.*]] = load <2 x double>, ptr [[B_ADDR]], align 16, !tbaa [[TBAA2]]
+// CHECK-NEXT: [[ELT_MINNUM:%.*]] = call <2 x double> @llvm.minnum.v2f64(<2 x double> [[TMP0]], <2 x double> [[TMP1]])
+// CHECK-NEXT: ret <2 x double> [[ELT_MINNUM]]
+//
+double2 pfmin64(double2 a, double2 b) {
+ return __builtin_elementwise_minnum(a, b);
+}
+// CHECK-LABEL: define dso_local noundef <2 x x86_fp80> @_Z7pfmin80Dv2_eS_(
+// CHECK-SAME: ptr noundef byval(<2 x x86_fp80>) align 32 [[TMP0:%.*]], ptr noundef byval(<2 x x86_fp80>) align 32 [[TMP1:%.*]]) #[[ATTR2:[0-9]+]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[A_ADDR:%.*]] = alloca <2 x x86_fp80>, align 32
+// CHECK-NEXT: [[B_ADDR:%.*]] = alloca <2 x x86_fp80>, align 32
+// CHECK-NEXT: [[A:%.*]] = load <2 x x86_fp80>, ptr [[TMP0]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT: [[B:%.*]] = load <2 x x86_fp80>, ptr [[TMP1]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT: store <2 x x86_fp80> [[A]], ptr [[A_ADDR]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT: store <2 x x86_fp80> [[B]], ptr [[B_ADDR]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT: [[TMP2:%.*]] = load <2 x x86_fp80>, ptr [[A_ADDR]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT: [[TMP3:%.*]] = load <2 x x86_fp80>, ptr [[B_ADDR]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT: [[ELT_MINNUM:%.*]] = call <2 x x86_fp80> @llvm.minnum.v2f80(<2 x x86_fp80> [[TMP2]], <2 x x86_fp80> [[TMP3]])
+// CHECK-NEXT: ret <2 x x86_fp80> [[ELT_MINNUM]]
+//
+ldouble2 pfmin80(ldouble2 a, ldouble2 b) {
+ return __builtin_elementwise_minnum(a, b);
+}
+
+// CHECK-LABEL: define dso_local noundef <8 x half> @_Z7pfmax16Dv8_DF16_S_(
+// CHECK-SAME: <8 x half> noundef [[A:%.*]], <8 x half> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[A_ADDR:%.*]] = alloca <8 x half>, align 16
+// CHECK-NEXT: [[B_ADDR:%.*]] = alloca <8 x half>, align 16
+// CHECK-NEXT: store <8 x half> [[A]], ptr [[A_ADDR]], align 16, !tbaa [[TBAA2]]
+// CHECK-NEXT: store <8 x half> [[B]], ptr [[B_ADDR]], align 16, !tbaa [[TBAA2]]
+// CHECK-NEXT: [[TMP0:%.*]] = load <8 x half>, ptr [[A_ADDR]], align 16, !tbaa [[TBAA2]]
+// CHECK-NEXT: [[TMP1:%.*]] = load <8 x half>, ptr [[B_ADDR]], align 16, !tbaa [[TBAA2]]
+// CHECK-NEXT: [[ELT_MAXNUM:%.*]] = call <8 x half> @llvm.maxnum.v8f16(<8 x half> [[TMP0]], <8 x half> [[TMP1]])
+// CHECK-NEXT: ret <8 x half> [[ELT_MAXNUM]]
+//
+half8 pfmax16(half8 a, half8 b) {
+ return __builtin_elementwise_maxnum(a, b);
+}
+// CHECK-LABEL: define dso_local noundef <8 x bfloat> @_Z8pfmax16bDv8_DF16bS_(
+// CHECK-SAME: <8 x bfloat> noundef [[A:%.*]], <8 x bfloat> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[A_ADDR:%.*]] = alloca <8 x bfloat>, align 16
+// CHECK-NEXT: [[B_ADDR:%.*]] = alloca <8 x bfloat>, align 16
+// CHECK-NEXT: store <8 x bfloat> [[A]], ptr [[A_ADDR]], align 16, !tbaa [[TBAA2]]
+// CHECK-NEXT: store <8 x bfloat> [[B]], ptr [[B_ADDR]], align 16, !tbaa [[TBAA2]]
+// CHECK-NEXT: [[TMP0:%.*]] = load <8 x bfloat>, ptr [[A_ADDR]], align 16, !tbaa [[TBAA2]]
+// CHECK-NEXT: [[TMP1:%.*]] = load <8 x bfloat>, ptr [[B_ADDR]], align 16, !tbaa [[TBAA2]]
+// CHECK-NEXT: [[ELT_MAXNUM:%.*]] = call <8 x bfloat> @llvm.maxnum.v8bf16(<8 x bfloat> [[TMP0]], <8 x bfloat> [[TMP1]])
+// CHECK-NEXT: ret <8 x bfloat> [[ELT_MAXNUM]]
+//
+bf16x8 pfmax16b(bf16x8 a, bf16x8 b) {
+ return __builtin_elementwise_maxnum(a, b);
+}
+// CHECK-LABEL: define dso_local noundef <4 x float> @_Z7pfmax32Dv4_fS_(
+// CHECK-SAME: <4 x float> noundef [[A:%.*]], <4 x float> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[A_ADDR:%.*]] = alloca <4 x float>, align 16
+// CHECK-NEXT: [[B_ADDR:%.*]] = alloca <4 x float>, align 16
+// CHECK-NEXT: store <4 x float> [[A]], ptr [[A_ADDR]], align 16, !tbaa [[TBAA2]]
+// CHECK-NEXT: store <4 x float> [[B]], ptr [[B_ADDR]], align 16, !tbaa [[TBAA2]]
+// CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A_ADDR]], align 16, !tbaa [[TBAA2]]
+// CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, ptr [[B_ADDR]], align 16, !tbaa [[TBAA2]]
+// CHECK-NEXT: [[ELT_MAXNUM:%.*]] = call <4 x float> @llvm.maxnum.v4f32(<4 x float> [[TMP0]], <4 x float> [[TMP1]])
+// CHECK-NEXT: ret <4 x float> [[ELT_MAXNUM]]
+//
+float4 pfmax32(float4 a, float4 b) {
+ return __builtin_elementwise_maxnum(a, b);
+}
+// CHECK-LABEL: define dso_local noundef <2 x double> @_Z7pfmax64Dv2_dS_(
+// CHECK-SAME: <2 x double> noundef [[A:%.*]], <2 x double> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[A_ADDR:%.*]] = alloca <2 x double>, align 16
+// CHECK-NEXT: [[B_ADDR:%.*]] = alloca <2 x double>, align 16
+// CHECK-NEXT: store <2 x double> [[A]], ptr [[A_ADDR]], align 16, !tbaa [[TBAA2]]
+// CHECK-NEXT: store <2 x double> [[B]], ptr [[B_ADDR]], align 16, !tbaa [[TBAA2]]
+// CHECK-NEXT: [[TMP0:%.*]] = load <2 x double>, ptr [[A_ADDR]], align 16, !tbaa [[TBAA2]]
+// CHECK-NEXT: [[TMP1:%.*]] = load <2 x double>, ptr [[B_ADDR]], align 16, !tbaa [[TBAA2]]
+// CHECK-NEXT: [[ELT_MAXNUM:%.*]] = call <2 x double> @llvm.maxnum.v2f64(<2 x double> [[TMP0]], <2 x double> [[TMP1]])
+// CHECK-NEXT: ret <2 x double> [[ELT_MAXNUM]]
+//
+double2 pfmax64(double2 a, double2 b) {
+ return __builtin_elementwise_maxnum(a, b);
+}
+
+// CHECK-LABEL: define dso_local noundef <2 x x86_fp80> @_Z7pfmax80Dv2_eS_(
+// CHECK-SAME: ptr noundef byval(<2 x x86_fp80>) align 32 [[TMP0:%.*]], ptr noundef byval(<2 x x86_fp80>) align 32 [[TMP1:%.*]]) #[[ATTR2]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[A_ADDR:%.*]] = alloca <2 x x86_fp80>, align 32
+// CHECK-NEXT: [[B_ADDR:%.*]] = alloca <2 x x86_fp80>, align 32
+// CHECK-NEXT: [[A:%.*]] = load <2 x x86_fp80>, ptr [[TMP0]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT: [[B:%.*]] = load <2 x x86_fp80>, ptr [[TMP1]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT: store <2 x x86_fp80> [[A]], ptr [[A_ADDR]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT: store <2 x x86_fp80> [[B]], ptr [[B_ADDR]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT: [[TMP2:%.*]] = load <2 x x86_fp80>, ptr [[A_ADDR]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT: [[TMP3:%.*]] = load <2 x x86_fp80>, ptr [[B_ADDR]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT: [[ELT_MINNUM:%.*]] = call <2 x x86_fp80> @llvm.minnum.v2f80(<2 x x86_fp80> [[TMP2]], <2 x x86_fp80> [[TMP3]])
+// CHECK-NEXT: ret <2 x x86_fp80> [[ELT_MINNUM]]
+//
+ldouble2 pfmax80(ldouble2 a, ldouble2 b) {
+ return __builtin_elementwise_minnum(a, b);
+}
+
+//.
+// CHECK: [[TBAA2]] = !{[[META3:![0-9]+]], [[META3]], i64 0}
+// CHECK: [[META3]] = !{!"omnipotent char", [[META4:![0-9]+]], i64 0}
+// CHECK: [[META4]] = !{!"Simple C++ TBAA"}
+//.
diff --git a/clang/test/CodeGenOpenCL/addr-space-struct-arg.cl b/clang/test/CodeGenOpenCL/addr-space-struct-arg.cl
index 789aae7a5c34..49604c6c5e61 100644
--- a/clang/test/CodeGenOpenCL/addr-space-struct-arg.cl
+++ b/clang/test/CodeGenOpenCL/addr-space-struct-arg.cl
@@ -546,12 +546,10 @@ kernel void KernelLargeTwoMember(struct LargeStructTwoMember u) {
//
//
// AMDGCN-LABEL: define dso_local amdgpu_kernel void @KernelLargeOneMember(
-// AMDGCN-SAME: [[STRUCT_LARGESTRUCTONEMEMBER:%.*]] [[U_COERCE:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META10]] !kernel_arg_access_qual [[META11]] !kernel_arg_type [[META16:![0-9]+]] !kernel_arg_base_type [[META16]] !kernel_arg_type_qual [[META13]] {
+// AMDGCN-SAME: ptr addrspace(4) noundef byref([[STRUCT_LARGESTRUCTONEMEMBER:%.*]]) align 8 [[TMP0:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META10]] !kernel_arg_access_qual [[META11]] !kernel_arg_type [[META16:![0-9]+]] !kernel_arg_base_type [[META16]] !kernel_arg_type_qual [[META13]] {
// AMDGCN-NEXT: [[ENTRY:.*:]]
// AMDGCN-NEXT: [[U:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER]], align 8, addrspace(5)
-// AMDGCN-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw [[STRUCT_LARGESTRUCTONEMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0
-// AMDGCN-NEXT: [[TMP1:%.*]] = extractvalue [[STRUCT_LARGESTRUCTONEMEMBER]] [[U_COERCE]], 0
-// AMDGCN-NEXT: store [100 x <2 x i32>] [[TMP1]], ptr addrspace(5) [[TMP0]], align 8
+// AMDGCN-NEXT: call void @llvm.memcpy.p5.p4.i64(ptr addrspace(5) align 8 [[U]], ptr addrspace(4) align 8 [[TMP0]], i64 800, i1 false)
// AMDGCN-NEXT: call void @__clang_ocl_kern_imp_KernelLargeOneMember(ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTONEMEMBER]]) align 8 [[U]]) #[[ATTR4]]
// AMDGCN-NEXT: ret void
//
@@ -596,20 +594,15 @@ kernel void KernelLargeTwoMember(struct LargeStructTwoMember u) {
//
//
// AMDGCN-LABEL: define dso_local amdgpu_kernel void @KernelTwoMember(
-// AMDGCN-SAME: [[STRUCT_STRUCTTWOMEMBER:%.*]] [[U_COERCE:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META10]] !kernel_arg_access_qual [[META11]] !kernel_arg_type [[META17:![0-9]+]] !kernel_arg_base_type [[META17]] !kernel_arg_type_qual [[META13]] {
+// AMDGCN-SAME: ptr addrspace(4) noundef byref([[STRUCT_STRUCTTWOMEMBER:%.*]]) align 8 [[TMP0:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META10]] !kernel_arg_access_qual [[META11]] !kernel_arg_type [[META17:![0-9]+]] !kernel_arg_base_type [[META17]] !kernel_arg_type_qual [[META13]] {
// AMDGCN-NEXT: [[ENTRY:.*:]]
// AMDGCN-NEXT: [[U:%.*]] = alloca [[STRUCT_STRUCTTWOMEMBER]], align 8, addrspace(5)
-// AMDGCN-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0
-// AMDGCN-NEXT: [[TMP1:%.*]] = extractvalue [[STRUCT_STRUCTTWOMEMBER]] [[U_COERCE]], 0
-// AMDGCN-NEXT: store <2 x i32> [[TMP1]], ptr addrspace(5) [[TMP0]], align 8
-// AMDGCN-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 1
-// AMDGCN-NEXT: [[TMP3:%.*]] = extractvalue [[STRUCT_STRUCTTWOMEMBER]] [[U_COERCE]], 1
-// AMDGCN-NEXT: store <2 x i32> [[TMP3]], ptr addrspace(5) [[TMP2]], align 8
-// AMDGCN-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0
-// AMDGCN-NEXT: [[TMP5:%.*]] = load <2 x i32>, ptr addrspace(5) [[TMP4]], align 8
-// AMDGCN-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 1
-// AMDGCN-NEXT: [[TMP7:%.*]] = load <2 x i32>, ptr addrspace(5) [[TMP6]], align 8
-// AMDGCN-NEXT: call void @__clang_ocl_kern_imp_KernelTwoMember(<2 x i32> [[TMP5]], <2 x i32> [[TMP7]]) #[[ATTR4]]
+// AMDGCN-NEXT: call void @llvm.memcpy.p5.p4.i64(ptr addrspace(5) align 8 [[U]], ptr addrspace(4) align 8 [[TMP0]], i64 16, i1 false)
+// AMDGCN-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0
+// AMDGCN-NEXT: [[TMP2:%.*]] = load <2 x i32>, ptr addrspace(5) [[TMP1]], align 8
+// AMDGCN-NEXT: [[TMP3:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 1
+// AMDGCN-NEXT: [[TMP4:%.*]] = load <2 x i32>, ptr addrspace(5) [[TMP3]], align 8
+// AMDGCN-NEXT: call void @__clang_ocl_kern_imp_KernelTwoMember(<2 x i32> [[TMP2]], <2 x i32> [[TMP4]]) #[[ATTR4]]
// AMDGCN-NEXT: ret void
//
//
@@ -630,15 +623,10 @@ kernel void KernelLargeTwoMember(struct LargeStructTwoMember u) {
//
//
// AMDGCN-LABEL: define dso_local amdgpu_kernel void @KernelLargeTwoMember(
-// AMDGCN-SAME: [[STRUCT_LARGESTRUCTTWOMEMBER:%.*]] [[U_COERCE:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META10]] !kernel_arg_access_qual [[META11]] !kernel_arg_type [[META18:![0-9]+]] !kernel_arg_base_type [[META18]] !kernel_arg_type_qual [[META13]] {
+// AMDGCN-SAME: ptr addrspace(4) noundef byref([[STRUCT_LARGESTRUCTTWOMEMBER:%.*]]) align 8 [[TMP0:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META10]] !kernel_arg_access_qual [[META11]] !kernel_arg_type [[META18:![0-9]+]] !kernel_arg_base_type [[META18]] !kernel_arg_type_qual [[META13]] {
// AMDGCN-NEXT: [[ENTRY:.*:]]
// AMDGCN-NEXT: [[U:%.*]] = alloca [[STRUCT_LARGESTRUCTTWOMEMBER]], align 8, addrspace(5)
-// AMDGCN-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw [[STRUCT_LARGESTRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0
-// AMDGCN-NEXT: [[TMP1:%.*]] = extractvalue [[STRUCT_LARGESTRUCTTWOMEMBER]] [[U_COERCE]], 0
-// AMDGCN-NEXT: store [40 x <2 x i32>] [[TMP1]], ptr addrspace(5) [[TMP0]], align 8
-// AMDGCN-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw [[STRUCT_LARGESTRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 1
-// AMDGCN-NEXT: [[TMP3:%.*]] = extractvalue [[STRUCT_LARGESTRUCTTWOMEMBER]] [[U_COERCE]], 1
-// AMDGCN-NEXT: store [20 x <2 x i32>] [[TMP3]], ptr addrspace(5) [[TMP2]], align 8
+// AMDGCN-NEXT: call void @llvm.memcpy.p5.p4.i64(ptr addrspace(5) align 8 [[U]], ptr addrspace(4) align 8 [[TMP0]], i64 480, i1 false)
// AMDGCN-NEXT: call void @__clang_ocl_kern_imp_KernelLargeTwoMember(ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTTWOMEMBER]]) align 8 [[U]]) #[[ATTR4]]
// AMDGCN-NEXT: ret void
//
@@ -868,15 +856,13 @@ kernel void KernelLargeTwoMember(struct LargeStructTwoMember u) {
//
//
// AMDGCN20-LABEL: define dso_local amdgpu_kernel void @KernelLargeOneMember(
-// AMDGCN20-SAME: [[STRUCT_LARGESTRUCTONEMEMBER:%.*]] [[U_COERCE:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META10]] !kernel_arg_access_qual [[META11]] !kernel_arg_type [[META16:![0-9]+]] !kernel_arg_base_type [[META16]] !kernel_arg_type_qual [[META13]] {
+// AMDGCN20-SAME: ptr addrspace(4) noundef byref([[STRUCT_LARGESTRUCTONEMEMBER:%.*]]) align 8 [[TMP0:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META10]] !kernel_arg_access_qual [[META11]] !kernel_arg_type [[META16:![0-9]+]] !kernel_arg_base_type [[META16]] !kernel_arg_type_qual [[META13]] {
// AMDGCN20-NEXT: [[ENTRY:.*:]]
-// AMDGCN20-NEXT: [[U:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER]], align 8, addrspace(5)
-// AMDGCN20-NEXT: [[U1:%.*]] = addrspacecast ptr addrspace(5) [[U]] to ptr
-// AMDGCN20-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw [[STRUCT_LARGESTRUCTONEMEMBER]], ptr [[U1]], i32 0, i32 0
-// AMDGCN20-NEXT: [[TMP1:%.*]] = extractvalue [[STRUCT_LARGESTRUCTONEMEMBER]] [[U_COERCE]], 0
-// AMDGCN20-NEXT: store [100 x <2 x i32>] [[TMP1]], ptr [[TMP0]], align 8
-// AMDGCN20-NEXT: [[U1_ASCAST:%.*]] = addrspacecast ptr [[U1]] to ptr addrspace(5)
-// AMDGCN20-NEXT: call void @__clang_ocl_kern_imp_KernelLargeOneMember(ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTONEMEMBER]]) align 8 [[U1_ASCAST]]) #[[ATTR4]]
+// AMDGCN20-NEXT: [[COERCE:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER]], align 8, addrspace(5)
+// AMDGCN20-NEXT: [[U:%.*]] = addrspacecast ptr addrspace(5) [[COERCE]] to ptr
+// AMDGCN20-NEXT: call void @llvm.memcpy.p0.p4.i64(ptr align 8 [[U]], ptr addrspace(4) align 8 [[TMP0]], i64 800, i1 false)
+// AMDGCN20-NEXT: [[U_ASCAST:%.*]] = addrspacecast ptr [[U]] to ptr addrspace(5)
+// AMDGCN20-NEXT: call void @__clang_ocl_kern_imp_KernelLargeOneMember(ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTONEMEMBER]]) align 8 [[U_ASCAST]]) #[[ATTR4]]
// AMDGCN20-NEXT: ret void
//
//
@@ -927,21 +913,16 @@ kernel void KernelLargeTwoMember(struct LargeStructTwoMember u) {
//
//
// AMDGCN20-LABEL: define dso_local amdgpu_kernel void @KernelTwoMember(
-// AMDGCN20-SAME: [[STRUCT_STRUCTTWOMEMBER:%.*]] [[U_COERCE:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META10]] !kernel_arg_access_qual [[META11]] !kernel_arg_type [[META17:![0-9]+]] !kernel_arg_base_type [[META17]] !kernel_arg_type_qual [[META13]] {
+// AMDGCN20-SAME: ptr addrspace(4) noundef byref([[STRUCT_STRUCTTWOMEMBER:%.*]]) align 8 [[TMP0:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META10]] !kernel_arg_access_qual [[META11]] !kernel_arg_type [[META17:![0-9]+]] !kernel_arg_base_type [[META17]] !kernel_arg_type_qual [[META13]] {
// AMDGCN20-NEXT: [[ENTRY:.*:]]
-// AMDGCN20-NEXT: [[U:%.*]] = alloca [[STRUCT_STRUCTTWOMEMBER]], align 8, addrspace(5)
-// AMDGCN20-NEXT: [[U1:%.*]] = addrspacecast ptr addrspace(5) [[U]] to ptr
-// AMDGCN20-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr [[U1]], i32 0, i32 0
-// AMDGCN20-NEXT: [[TMP1:%.*]] = extractvalue [[STRUCT_STRUCTTWOMEMBER]] [[U_COERCE]], 0
-// AMDGCN20-NEXT: store <2 x i32> [[TMP1]], ptr [[TMP0]], align 8
-// AMDGCN20-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr [[U1]], i32 0, i32 1
-// AMDGCN20-NEXT: [[TMP3:%.*]] = extractvalue [[STRUCT_STRUCTTWOMEMBER]] [[U_COERCE]], 1
-// AMDGCN20-NEXT: store <2 x i32> [[TMP3]], ptr [[TMP2]], align 8
-// AMDGCN20-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr [[U1]], i32 0, i32 0
-// AMDGCN20-NEXT: [[TMP5:%.*]] = load <2 x i32>, ptr [[TMP4]], align 8
-// AMDGCN20-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr [[U1]], i32 0, i32 1
-// AMDGCN20-NEXT: [[TMP7:%.*]] = load <2 x i32>, ptr [[TMP6]], align 8
-// AMDGCN20-NEXT: call void @__clang_ocl_kern_imp_KernelTwoMember(<2 x i32> [[TMP5]], <2 x i32> [[TMP7]]) #[[ATTR4]]
+// AMDGCN20-NEXT: [[COERCE:%.*]] = alloca [[STRUCT_STRUCTTWOMEMBER]], align 8, addrspace(5)
+// AMDGCN20-NEXT: [[U:%.*]] = addrspacecast ptr addrspace(5) [[COERCE]] to ptr
+// AMDGCN20-NEXT: call void @llvm.memcpy.p0.p4.i64(ptr align 8 [[U]], ptr addrspace(4) align 8 [[TMP0]], i64 16, i1 false)
+// AMDGCN20-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr [[U]], i32 0, i32 0
+// AMDGCN20-NEXT: [[TMP2:%.*]] = load <2 x i32>, ptr [[TMP1]], align 8
+// AMDGCN20-NEXT: [[TMP3:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr [[U]], i32 0, i32 1
+// AMDGCN20-NEXT: [[TMP4:%.*]] = load <2 x i32>, ptr [[TMP3]], align 8
+// AMDGCN20-NEXT: call void @__clang_ocl_kern_imp_KernelTwoMember(<2 x i32> [[TMP2]], <2 x i32> [[TMP4]]) #[[ATTR4]]
// AMDGCN20-NEXT: ret void
//
//
@@ -963,18 +944,13 @@ kernel void KernelLargeTwoMember(struct LargeStructTwoMember u) {
//
//
// AMDGCN20-LABEL: define dso_local amdgpu_kernel void @KernelLargeTwoMember(
-// AMDGCN20-SAME: [[STRUCT_LARGESTRUCTTWOMEMBER:%.*]] [[U_COERCE:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META10]] !kernel_arg_access_qual [[META11]] !kernel_arg_type [[META18:![0-9]+]] !kernel_arg_base_type [[META18]] !kernel_arg_type_qual [[META13]] {
+// AMDGCN20-SAME: ptr addrspace(4) noundef byref([[STRUCT_LARGESTRUCTTWOMEMBER:%.*]]) align 8 [[TMP0:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META10]] !kernel_arg_access_qual [[META11]] !kernel_arg_type [[META18:![0-9]+]] !kernel_arg_base_type [[META18]] !kernel_arg_type_qual [[META13]] {
// AMDGCN20-NEXT: [[ENTRY:.*:]]
-// AMDGCN20-NEXT: [[U:%.*]] = alloca [[STRUCT_LARGESTRUCTTWOMEMBER]], align 8, addrspace(5)
-// AMDGCN20-NEXT: [[U1:%.*]] = addrspacecast ptr addrspace(5) [[U]] to ptr
-// AMDGCN20-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw [[STRUCT_LARGESTRUCTTWOMEMBER]], ptr [[U1]], i32 0, i32 0
-// AMDGCN20-NEXT: [[TMP1:%.*]] = extractvalue [[STRUCT_LARGESTRUCTTWOMEMBER]] [[U_COERCE]], 0
-// AMDGCN20-NEXT: store [40 x <2 x i32>] [[TMP1]], ptr [[TMP0]], align 8
-// AMDGCN20-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw [[STRUCT_LARGESTRUCTTWOMEMBER]], ptr [[U1]], i32 0, i32 1
-// AMDGCN20-NEXT: [[TMP3:%.*]] = extractvalue [[STRUCT_LARGESTRUCTTWOMEMBER]] [[U_COERCE]], 1
-// AMDGCN20-NEXT: store [20 x <2 x i32>] [[TMP3]], ptr [[TMP2]], align 8
-// AMDGCN20-NEXT: [[U1_ASCAST:%.*]] = addrspacecast ptr [[U1]] to ptr addrspace(5)
-// AMDGCN20-NEXT: call void @__clang_ocl_kern_imp_KernelLargeTwoMember(ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTTWOMEMBER]]) align 8 [[U1_ASCAST]]) #[[ATTR4]]
+// AMDGCN20-NEXT: [[COERCE:%.*]] = alloca [[STRUCT_LARGESTRUCTTWOMEMBER]], align 8, addrspace(5)
+// AMDGCN20-NEXT: [[U:%.*]] = addrspacecast ptr addrspace(5) [[COERCE]] to ptr
+// AMDGCN20-NEXT: call void @llvm.memcpy.p0.p4.i64(ptr align 8 [[U]], ptr addrspace(4) align 8 [[TMP0]], i64 480, i1 false)
+// AMDGCN20-NEXT: [[U_ASCAST:%.*]] = addrspacecast ptr [[U]] to ptr addrspace(5)
+// AMDGCN20-NEXT: call void @__clang_ocl_kern_imp_KernelLargeTwoMember(ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTTWOMEMBER]]) align 8 [[U_ASCAST]]) #[[ATTR4]]
// AMDGCN20-NEXT: ret void
//
//
@@ -1408,12 +1384,10 @@ kernel void KernelLargeTwoMember(struct LargeStructTwoMember u) {
//
//
// AMDGCN30-GVAR-LABEL: define dso_local amdgpu_kernel void @KernelLargeOneMember(
-// AMDGCN30-GVAR-SAME: [[STRUCT_LARGESTRUCTONEMEMBER:%.*]] [[U_COERCE:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META10]] !kernel_arg_access_qual [[META11]] !kernel_arg_type [[META16:![0-9]+]] !kernel_arg_base_type [[META16]] !kernel_arg_type_qual [[META13]] {
+// AMDGCN30-GVAR-SAME: ptr addrspace(4) noundef byref([[STRUCT_LARGESTRUCTONEMEMBER:%.*]]) align 8 [[TMP0:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META10]] !kernel_arg_access_qual [[META11]] !kernel_arg_type [[META16:![0-9]+]] !kernel_arg_base_type [[META16]] !kernel_arg_type_qual [[META13]] {
// AMDGCN30-GVAR-NEXT: [[ENTRY:.*:]]
// AMDGCN30-GVAR-NEXT: [[U:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER]], align 8, addrspace(5)
-// AMDGCN30-GVAR-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw [[STRUCT_LARGESTRUCTONEMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0
-// AMDGCN30-GVAR-NEXT: [[TMP1:%.*]] = extractvalue [[STRUCT_LARGESTRUCTONEMEMBER]] [[U_COERCE]], 0
-// AMDGCN30-GVAR-NEXT: store [100 x <2 x i32>] [[TMP1]], ptr addrspace(5) [[TMP0]], align 8
+// AMDGCN30-GVAR-NEXT: call void @llvm.memcpy.p5.p4.i64(ptr addrspace(5) align 8 [[U]], ptr addrspace(4) align 8 [[TMP0]], i64 800, i1 false)
// AMDGCN30-GVAR-NEXT: call void @__clang_ocl_kern_imp_KernelLargeOneMember(ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTONEMEMBER]]) align 8 [[U]]) #[[ATTR4]]
// AMDGCN30-GVAR-NEXT: ret void
//
@@ -1458,20 +1432,15 @@ kernel void KernelLargeTwoMember(struct LargeStructTwoMember u) {
//
//
// AMDGCN30-GVAR-LABEL: define dso_local amdgpu_kernel void @KernelTwoMember(
-// AMDGCN30-GVAR-SAME: [[STRUCT_STRUCTTWOMEMBER:%.*]] [[U_COERCE:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META10]] !kernel_arg_access_qual [[META11]] !kernel_arg_type [[META17:![0-9]+]] !kernel_arg_base_type [[META17]] !kernel_arg_type_qual [[META13]] {
+// AMDGCN30-GVAR-SAME: ptr addrspace(4) noundef byref([[STRUCT_STRUCTTWOMEMBER:%.*]]) align 8 [[TMP0:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META10]] !kernel_arg_access_qual [[META11]] !kernel_arg_type [[META17:![0-9]+]] !kernel_arg_base_type [[META17]] !kernel_arg_type_qual [[META13]] {
// AMDGCN30-GVAR-NEXT: [[ENTRY:.*:]]
// AMDGCN30-GVAR-NEXT: [[U:%.*]] = alloca [[STRUCT_STRUCTTWOMEMBER]], align 8, addrspace(5)
-// AMDGCN30-GVAR-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0
-// AMDGCN30-GVAR-NEXT: [[TMP1:%.*]] = extractvalue [[STRUCT_STRUCTTWOMEMBER]] [[U_COERCE]], 0
-// AMDGCN30-GVAR-NEXT: store <2 x i32> [[TMP1]], ptr addrspace(5) [[TMP0]], align 8
-// AMDGCN30-GVAR-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 1
-// AMDGCN30-GVAR-NEXT: [[TMP3:%.*]] = extractvalue [[STRUCT_STRUCTTWOMEMBER]] [[U_COERCE]], 1
-// AMDGCN30-GVAR-NEXT: store <2 x i32> [[TMP3]], ptr addrspace(5) [[TMP2]], align 8
-// AMDGCN30-GVAR-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0
-// AMDGCN30-GVAR-NEXT: [[TMP5:%.*]] = load <2 x i32>, ptr addrspace(5) [[TMP4]], align 8
-// AMDGCN30-GVAR-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 1
-// AMDGCN30-GVAR-NEXT: [[TMP7:%.*]] = load <2 x i32>, ptr addrspace(5) [[TMP6]], align 8
-// AMDGCN30-GVAR-NEXT: call void @__clang_ocl_kern_imp_KernelTwoMember(<2 x i32> [[TMP5]], <2 x i32> [[TMP7]]) #[[ATTR4]]
+// AMDGCN30-GVAR-NEXT: call void @llvm.memcpy.p5.p4.i64(ptr addrspace(5) align 8 [[U]], ptr addrspace(4) align 8 [[TMP0]], i64 16, i1 false)
+// AMDGCN30-GVAR-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0
+// AMDGCN30-GVAR-NEXT: [[TMP2:%.*]] = load <2 x i32>, ptr addrspace(5) [[TMP1]], align 8
+// AMDGCN30-GVAR-NEXT: [[TMP3:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 1
+// AMDGCN30-GVAR-NEXT: [[TMP4:%.*]] = load <2 x i32>, ptr addrspace(5) [[TMP3]], align 8
+// AMDGCN30-GVAR-NEXT: call void @__clang_ocl_kern_imp_KernelTwoMember(<2 x i32> [[TMP2]], <2 x i32> [[TMP4]]) #[[ATTR4]]
// AMDGCN30-GVAR-NEXT: ret void
//
//
@@ -1492,15 +1461,10 @@ kernel void KernelLargeTwoMember(struct LargeStructTwoMember u) {
//
//
// AMDGCN30-GVAR-LABEL: define dso_local amdgpu_kernel void @KernelLargeTwoMember(
-// AMDGCN30-GVAR-SAME: [[STRUCT_LARGESTRUCTTWOMEMBER:%.*]] [[U_COERCE:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META10]] !kernel_arg_access_qual [[META11]] !kernel_arg_type [[META18:![0-9]+]] !kernel_arg_base_type [[META18]] !kernel_arg_type_qual [[META13]] {
+// AMDGCN30-GVAR-SAME: ptr addrspace(4) noundef byref([[STRUCT_LARGESTRUCTTWOMEMBER:%.*]]) align 8 [[TMP0:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META10]] !kernel_arg_access_qual [[META11]] !kernel_arg_type [[META18:![0-9]+]] !kernel_arg_base_type [[META18]] !kernel_arg_type_qual [[META13]] {
// AMDGCN30-GVAR-NEXT: [[ENTRY:.*:]]
// AMDGCN30-GVAR-NEXT: [[U:%.*]] = alloca [[STRUCT_LARGESTRUCTTWOMEMBER]], align 8, addrspace(5)
-// AMDGCN30-GVAR-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw [[STRUCT_LARGESTRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0
-// AMDGCN30-GVAR-NEXT: [[TMP1:%.*]] = extractvalue [[STRUCT_LARGESTRUCTTWOMEMBER]] [[U_COERCE]], 0
-// AMDGCN30-GVAR-NEXT: store [40 x <2 x i32>] [[TMP1]], ptr addrspace(5) [[TMP0]], align 8
-// AMDGCN30-GVAR-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw [[STRUCT_LARGESTRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 1
-// AMDGCN30-GVAR-NEXT: [[TMP3:%.*]] = extractvalue [[STRUCT_LARGESTRUCTTWOMEMBER]] [[U_COERCE]], 1
-// AMDGCN30-GVAR-NEXT: store [20 x <2 x i32>] [[TMP3]], ptr addrspace(5) [[TMP2]], align 8
+// AMDGCN30-GVAR-NEXT: call void @llvm.memcpy.p5.p4.i64(ptr addrspace(5) align 8 [[U]], ptr addrspace(4) align 8 [[TMP0]], i64 480, i1 false)
// AMDGCN30-GVAR-NEXT: call void @__clang_ocl_kern_imp_KernelLargeTwoMember(ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTTWOMEMBER]]) align 8 [[U]]) #[[ATTR4]]
// AMDGCN30-GVAR-NEXT: ret void
//
@@ -1699,12 +1663,10 @@ kernel void KernelLargeTwoMember(struct LargeStructTwoMember u) {
//
//
// AMDGCN30-LABEL: define dso_local amdgpu_kernel void @KernelLargeOneMember(
-// AMDGCN30-SAME: [[STRUCT_LARGESTRUCTONEMEMBER:%.*]] [[U_COERCE:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META10]] !kernel_arg_access_qual [[META11]] !kernel_arg_type [[META16:![0-9]+]] !kernel_arg_base_type [[META16]] !kernel_arg_type_qual [[META13]] {
+// AMDGCN30-SAME: ptr addrspace(4) noundef byref([[STRUCT_LARGESTRUCTONEMEMBER:%.*]]) align 8 [[TMP0:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META10]] !kernel_arg_access_qual [[META11]] !kernel_arg_type [[META16:![0-9]+]] !kernel_arg_base_type [[META16]] !kernel_arg_type_qual [[META13]] {
// AMDGCN30-NEXT: [[ENTRY:.*:]]
// AMDGCN30-NEXT: [[U:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER]], align 8, addrspace(5)
-// AMDGCN30-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw [[STRUCT_LARGESTRUCTONEMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0
-// AMDGCN30-NEXT: [[TMP1:%.*]] = extractvalue [[STRUCT_LARGESTRUCTONEMEMBER]] [[U_COERCE]], 0
-// AMDGCN30-NEXT: store [100 x <2 x i32>] [[TMP1]], ptr addrspace(5) [[TMP0]], align 8
+// AMDGCN30-NEXT: call void @llvm.memcpy.p5.p4.i64(ptr addrspace(5) align 8 [[U]], ptr addrspace(4) align 8 [[TMP0]], i64 800, i1 false)
// AMDGCN30-NEXT: call void @__clang_ocl_kern_imp_KernelLargeOneMember(ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTONEMEMBER]]) align 8 [[U]]) #[[ATTR4]]
// AMDGCN30-NEXT: ret void
//
@@ -1749,20 +1711,15 @@ kernel void KernelLargeTwoMember(struct LargeStructTwoMember u) {
//
//
// AMDGCN30-LABEL: define dso_local amdgpu_kernel void @KernelTwoMember(
-// AMDGCN30-SAME: [[STRUCT_STRUCTTWOMEMBER:%.*]] [[U_COERCE:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META10]] !kernel_arg_access_qual [[META11]] !kernel_arg_type [[META17:![0-9]+]] !kernel_arg_base_type [[META17]] !kernel_arg_type_qual [[META13]] {
+// AMDGCN30-SAME: ptr addrspace(4) noundef byref([[STRUCT_STRUCTTWOMEMBER:%.*]]) align 8 [[TMP0:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META10]] !kernel_arg_access_qual [[META11]] !kernel_arg_type [[META17:![0-9]+]] !kernel_arg_base_type [[META17]] !kernel_arg_type_qual [[META13]] {
// AMDGCN30-NEXT: [[ENTRY:.*:]]
// AMDGCN30-NEXT: [[U:%.*]] = alloca [[STRUCT_STRUCTTWOMEMBER]], align 8, addrspace(5)
-// AMDGCN30-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0
-// AMDGCN30-NEXT: [[TMP1:%.*]] = extractvalue [[STRUCT_STRUCTTWOMEMBER]] [[U_COERCE]], 0
-// AMDGCN30-NEXT: store <2 x i32> [[TMP1]], ptr addrspace(5) [[TMP0]], align 8
-// AMDGCN30-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 1
-// AMDGCN30-NEXT: [[TMP3:%.*]] = extractvalue [[STRUCT_STRUCTTWOMEMBER]] [[U_COERCE]], 1
-// AMDGCN30-NEXT: store <2 x i32> [[TMP3]], ptr addrspace(5) [[TMP2]], align 8
-// AMDGCN30-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0
-// AMDGCN30-NEXT: [[TMP5:%.*]] = load <2 x i32>, ptr addrspace(5) [[TMP4]], align 8
-// AMDGCN30-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 1
-// AMDGCN30-NEXT: [[TMP7:%.*]] = load <2 x i32>, ptr addrspace(5) [[TMP6]], align 8
-// AMDGCN30-NEXT: call void @__clang_ocl_kern_imp_KernelTwoMember(<2 x i32> [[TMP5]], <2 x i32> [[TMP7]]) #[[ATTR4]]
+// AMDGCN30-NEXT: call void @llvm.memcpy.p5.p4.i64(ptr addrspace(5) align 8 [[U]], ptr addrspace(4) align 8 [[TMP0]], i64 16, i1 false)
+// AMDGCN30-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0
+// AMDGCN30-NEXT: [[TMP2:%.*]] = load <2 x i32>, ptr addrspace(5) [[TMP1]], align 8
+// AMDGCN30-NEXT: [[TMP3:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 1
+// AMDGCN30-NEXT: [[TMP4:%.*]] = load <2 x i32>, ptr addrspace(5) [[TMP3]], align 8
+// AMDGCN30-NEXT: call void @__clang_ocl_kern_imp_KernelTwoMember(<2 x i32> [[TMP2]], <2 x i32> [[TMP4]]) #[[ATTR4]]
// AMDGCN30-NEXT: ret void
//
//
@@ -1783,15 +1740,10 @@ kernel void KernelLargeTwoMember(struct LargeStructTwoMember u) {
//
//
// AMDGCN30-LABEL: define dso_local amdgpu_kernel void @KernelLargeTwoMember(
-// AMDGCN30-SAME: [[STRUCT_LARGESTRUCTTWOMEMBER:%.*]] [[U_COERCE:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META10]] !kernel_arg_access_qual [[META11]] !kernel_arg_type [[META18:![0-9]+]] !kernel_arg_base_type [[META18]] !kernel_arg_type_qual [[META13]] {
+// AMDGCN30-SAME: ptr addrspace(4) noundef byref([[STRUCT_LARGESTRUCTTWOMEMBER:%.*]]) align 8 [[TMP0:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META10]] !kernel_arg_access_qual [[META11]] !kernel_arg_type [[META18:![0-9]+]] !kernel_arg_base_type [[META18]] !kernel_arg_type_qual [[META13]] {
// AMDGCN30-NEXT: [[ENTRY:.*:]]
// AMDGCN30-NEXT: [[U:%.*]] = alloca [[STRUCT_LARGESTRUCTTWOMEMBER]], align 8, addrspace(5)
-// AMDGCN30-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw [[STRUCT_LARGESTRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0
-// AMDGCN30-NEXT: [[TMP1:%.*]] = extractvalue [[STRUCT_LARGESTRUCTTWOMEMBER]] [[U_COERCE]], 0
-// AMDGCN30-NEXT: store [40 x <2 x i32>] [[TMP1]], ptr addrspace(5) [[TMP0]], align 8
-// AMDGCN30-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw [[STRUCT_LARGESTRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 1
-// AMDGCN30-NEXT: [[TMP3:%.*]] = extractvalue [[STRUCT_LARGESTRUCTTWOMEMBER]] [[U_COERCE]], 1
-// AMDGCN30-NEXT: store [20 x <2 x i32>] [[TMP3]], ptr addrspace(5) [[TMP2]], align 8
+// AMDGCN30-NEXT: call void @llvm.memcpy.p5.p4.i64(ptr addrspace(5) align 8 [[U]], ptr addrspace(4) align 8 [[TMP0]], i64 480, i1 false)
// AMDGCN30-NEXT: call void @__clang_ocl_kern_imp_KernelLargeTwoMember(ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTTWOMEMBER]]) align 8 [[U]]) #[[ATTR4]]
// AMDGCN30-NEXT: ret void
//
diff --git a/clang/test/CodeGenOpenCL/amdgpu-abi-struct-arg-byref.cl b/clang/test/CodeGenOpenCL/amdgpu-abi-struct-arg-byref.cl
index 6dc488c40da7..7d0a66bac146 100644
--- a/clang/test/CodeGenOpenCL/amdgpu-abi-struct-arg-byref.cl
+++ b/clang/test/CodeGenOpenCL/amdgpu-abi-struct-arg-byref.cl
@@ -1,4 +1,4 @@
-// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5 --include-generated-funcs
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --include-generated-funcs --version 5
// RUN: %clang_cc1 %s -emit-llvm -o - -cl-std=CL2.0 -O0 -triple amdgcn | FileCheck -check-prefix=AMDGCN %s
typedef int int2 __attribute__((ext_vector_type(2)));
@@ -330,15 +330,13 @@ kernel void KernelLargeTwoMember(struct LargeStructTwoMember u) {
//
//
// AMDGCN-LABEL: define dso_local amdgpu_kernel void @KernelLargeOneMember(
-// AMDGCN-SAME: [[STRUCT_LARGESTRUCTONEMEMBER:%.*]] [[U_COERCE:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META10]] !kernel_arg_access_qual [[META11]] !kernel_arg_type [[META16:![0-9]+]] !kernel_arg_base_type [[META16]] !kernel_arg_type_qual [[META13]] {
+// AMDGCN-SAME: ptr addrspace(4) noundef byref([[STRUCT_LARGESTRUCTONEMEMBER:%.*]]) align 8 [[TMP0:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META10]] !kernel_arg_access_qual [[META11]] !kernel_arg_type [[META16:![0-9]+]] !kernel_arg_base_type [[META16]] !kernel_arg_type_qual [[META13]] {
// AMDGCN-NEXT: [[ENTRY:.*:]]
-// AMDGCN-NEXT: [[U:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER]], align 8, addrspace(5)
-// AMDGCN-NEXT: [[U1:%.*]] = addrspacecast ptr addrspace(5) [[U]] to ptr
-// AMDGCN-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw [[STRUCT_LARGESTRUCTONEMEMBER]], ptr [[U1]], i32 0, i32 0
-// AMDGCN-NEXT: [[TMP1:%.*]] = extractvalue [[STRUCT_LARGESTRUCTONEMEMBER]] [[U_COERCE]], 0
-// AMDGCN-NEXT: store [100 x <2 x i32>] [[TMP1]], ptr [[TMP0]], align 8
-// AMDGCN-NEXT: [[U1_ASCAST:%.*]] = addrspacecast ptr [[U1]] to ptr addrspace(5)
-// AMDGCN-NEXT: call void @__clang_ocl_kern_imp_KernelLargeOneMember(ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTONEMEMBER]]) align 8 [[U1_ASCAST]]) #[[ATTR4]]
+// AMDGCN-NEXT: [[COERCE:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER]], align 8, addrspace(5)
+// AMDGCN-NEXT: [[U:%.*]] = addrspacecast ptr addrspace(5) [[COERCE]] to ptr
+// AMDGCN-NEXT: call void @llvm.memcpy.p0.p4.i64(ptr align 8 [[U]], ptr addrspace(4) align 8 [[TMP0]], i64 800, i1 false)
+// AMDGCN-NEXT: [[U_ASCAST:%.*]] = addrspacecast ptr [[U]] to ptr addrspace(5)
+// AMDGCN-NEXT: call void @__clang_ocl_kern_imp_KernelLargeOneMember(ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTONEMEMBER]]) align 8 [[U_ASCAST]]) #[[ATTR4]]
// AMDGCN-NEXT: ret void
//
//
@@ -389,21 +387,16 @@ kernel void KernelLargeTwoMember(struct LargeStructTwoMember u) {
//
//
// AMDGCN-LABEL: define dso_local amdgpu_kernel void @KernelTwoMember(
-// AMDGCN-SAME: [[STRUCT_STRUCTTWOMEMBER:%.*]] [[U_COERCE:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META10]] !kernel_arg_access_qual [[META11]] !kernel_arg_type [[META17:![0-9]+]] !kernel_arg_base_type [[META17]] !kernel_arg_type_qual [[META13]] {
+// AMDGCN-SAME: ptr addrspace(4) noundef byref([[STRUCT_STRUCTTWOMEMBER:%.*]]) align 8 [[TMP0:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META10]] !kernel_arg_access_qual [[META11]] !kernel_arg_type [[META17:![0-9]+]] !kernel_arg_base_type [[META17]] !kernel_arg_type_qual [[META13]] {
// AMDGCN-NEXT: [[ENTRY:.*:]]
-// AMDGCN-NEXT: [[U:%.*]] = alloca [[STRUCT_STRUCTTWOMEMBER]], align 8, addrspace(5)
-// AMDGCN-NEXT: [[U1:%.*]] = addrspacecast ptr addrspace(5) [[U]] to ptr
-// AMDGCN-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr [[U1]], i32 0, i32 0
-// AMDGCN-NEXT: [[TMP1:%.*]] = extractvalue [[STRUCT_STRUCTTWOMEMBER]] [[U_COERCE]], 0
-// AMDGCN-NEXT: store <2 x i32> [[TMP1]], ptr [[TMP0]], align 8
-// AMDGCN-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr [[U1]], i32 0, i32 1
-// AMDGCN-NEXT: [[TMP3:%.*]] = extractvalue [[STRUCT_STRUCTTWOMEMBER]] [[U_COERCE]], 1
-// AMDGCN-NEXT: store <2 x i32> [[TMP3]], ptr [[TMP2]], align 8
-// AMDGCN-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr [[U1]], i32 0, i32 0
-// AMDGCN-NEXT: [[TMP5:%.*]] = load <2 x i32>, ptr [[TMP4]], align 8
-// AMDGCN-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr [[U1]], i32 0, i32 1
-// AMDGCN-NEXT: [[TMP7:%.*]] = load <2 x i32>, ptr [[TMP6]], align 8
-// AMDGCN-NEXT: call void @__clang_ocl_kern_imp_KernelTwoMember(<2 x i32> [[TMP5]], <2 x i32> [[TMP7]]) #[[ATTR4]]
+// AMDGCN-NEXT: [[COERCE:%.*]] = alloca [[STRUCT_STRUCTTWOMEMBER]], align 8, addrspace(5)
+// AMDGCN-NEXT: [[U:%.*]] = addrspacecast ptr addrspace(5) [[COERCE]] to ptr
+// AMDGCN-NEXT: call void @llvm.memcpy.p0.p4.i64(ptr align 8 [[U]], ptr addrspace(4) align 8 [[TMP0]], i64 16, i1 false)
+// AMDGCN-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr [[U]], i32 0, i32 0
+// AMDGCN-NEXT: [[TMP2:%.*]] = load <2 x i32>, ptr [[TMP1]], align 8
+// AMDGCN-NEXT: [[TMP3:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr [[U]], i32 0, i32 1
+// AMDGCN-NEXT: [[TMP4:%.*]] = load <2 x i32>, ptr [[TMP3]], align 8
+// AMDGCN-NEXT: call void @__clang_ocl_kern_imp_KernelTwoMember(<2 x i32> [[TMP2]], <2 x i32> [[TMP4]]) #[[ATTR4]]
// AMDGCN-NEXT: ret void
//
//
@@ -425,18 +418,13 @@ kernel void KernelLargeTwoMember(struct LargeStructTwoMember u) {
//
//
// AMDGCN-LABEL: define dso_local amdgpu_kernel void @KernelLargeTwoMember(
-// AMDGCN-SAME: [[STRUCT_LARGESTRUCTTWOMEMBER:%.*]] [[U_COERCE:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META10]] !kernel_arg_access_qual [[META11]] !kernel_arg_type [[META18:![0-9]+]] !kernel_arg_base_type [[META18]] !kernel_arg_type_qual [[META13]] {
+// AMDGCN-SAME: ptr addrspace(4) noundef byref([[STRUCT_LARGESTRUCTTWOMEMBER:%.*]]) align 8 [[TMP0:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META10]] !kernel_arg_access_qual [[META11]] !kernel_arg_type [[META18:![0-9]+]] !kernel_arg_base_type [[META18]] !kernel_arg_type_qual [[META13]] {
// AMDGCN-NEXT: [[ENTRY:.*:]]
-// AMDGCN-NEXT: [[U:%.*]] = alloca [[STRUCT_LARGESTRUCTTWOMEMBER]], align 8, addrspace(5)
-// AMDGCN-NEXT: [[U1:%.*]] = addrspacecast ptr addrspace(5) [[U]] to ptr
-// AMDGCN-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw [[STRUCT_LARGESTRUCTTWOMEMBER]], ptr [[U1]], i32 0, i32 0
-// AMDGCN-NEXT: [[TMP1:%.*]] = extractvalue [[STRUCT_LARGESTRUCTTWOMEMBER]] [[U_COERCE]], 0
-// AMDGCN-NEXT: store [40 x <2 x i32>] [[TMP1]], ptr [[TMP0]], align 8
-// AMDGCN-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw [[STRUCT_LARGESTRUCTTWOMEMBER]], ptr [[U1]], i32 0, i32 1
-// AMDGCN-NEXT: [[TMP3:%.*]] = extractvalue [[STRUCT_LARGESTRUCTTWOMEMBER]] [[U_COERCE]], 1
-// AMDGCN-NEXT: store [20 x <2 x i32>] [[TMP3]], ptr [[TMP2]], align 8
-// AMDGCN-NEXT: [[U1_ASCAST:%.*]] = addrspacecast ptr [[U1]] to ptr addrspace(5)
-// AMDGCN-NEXT: call void @__clang_ocl_kern_imp_KernelLargeTwoMember(ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTTWOMEMBER]]) align 8 [[U1_ASCAST]]) #[[ATTR4]]
+// AMDGCN-NEXT: [[COERCE:%.*]] = alloca [[STRUCT_LARGESTRUCTTWOMEMBER]], align 8, addrspace(5)
+// AMDGCN-NEXT: [[U:%.*]] = addrspacecast ptr addrspace(5) [[COERCE]] to ptr
+// AMDGCN-NEXT: call void @llvm.memcpy.p0.p4.i64(ptr align 8 [[U]], ptr addrspace(4) align 8 [[TMP0]], i64 480, i1 false)
+// AMDGCN-NEXT: [[U_ASCAST:%.*]] = addrspacecast ptr [[U]] to ptr addrspace(5)
+// AMDGCN-NEXT: call void @__clang_ocl_kern_imp_KernelLargeTwoMember(ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTTWOMEMBER]]) align 8 [[U_ASCAST]]) #[[ATTR4]]
// AMDGCN-NEXT: ret void
//
//
diff --git a/clang/test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl b/clang/test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl
index 4d09fc3ffb70..06d3cdb01deb 100644
--- a/clang/test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl
+++ b/clang/test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl
@@ -214,7 +214,8 @@ typedef struct struct_4regs
int w;
} struct_4regs;
-// CHECK: void @kernel_empty_struct_arg(%struct.empty_struct %s.coerce)
+// CHECK: void @kernel_empty_struct_arg(ptr addrspace(4) noundef readnone byref(%struct.empty_struct) align 1 captures(none) {{%.+}})
+// CHECK: void @__clang_ocl_kern_imp_kernel_empty_struct_arg()
__kernel void kernel_empty_struct_arg(empty_struct s) { }
// CHECK: void @kernel_single_element_struct_arg(i32 %arg1.coerce)
@@ -223,28 +224,35 @@ __kernel void kernel_single_element_struct_arg(single_element_struct_arg_t arg1)
// CHECK: void @kernel_nested_single_element_struct_arg(i32 %arg1.coerce)
__kernel void kernel_nested_single_element_struct_arg(nested_single_element_struct_arg_t arg1) { }
-// CHECK: void @kernel_struct_arg(%struct.struct_arg %arg1.coerce)
+// CHECK: void @kernel_struct_arg(ptr addrspace(4) noundef readonly byref(%struct.struct_arg) align 4 captures(none) {{%.+}})
+// CHECK: void @__clang_ocl_kern_imp_kernel_struct_arg(i32 %arg1.coerce0, float %arg1.coerce1, i32 %arg1.coerce2)
__kernel void kernel_struct_arg(struct_arg_t arg1) { }
-// CHECK: void @kernel_struct_padding_arg(%struct.struct_padding_arg %arg1.coerce)
+// CHECK: void @kernel_struct_padding_arg(ptr addrspace(4) noundef readonly byref(%struct.struct_padding_arg) align 8 captures(none) {{%.+}})
+// CHECK: void @__clang_ocl_kern_imp_kernel_struct_padding_arg(i8 %arg1.coerce0, i64 %arg1.coerce1)
__kernel void kernel_struct_padding_arg(struct_padding_arg arg1) { }
-// CHECK: void @kernel_test_struct_of_arrays_arg(%struct.struct_of_arrays_arg %arg1.coerce)
+// CHECK: void @kernel_test_struct_of_arrays_arg(ptr addrspace(4) noundef readonly byref(%struct.struct_of_arrays_arg) align 4 captures(none) {{%.+}})
+// CHECK: void @__clang_ocl_kern_imp_kernel_test_struct_of_arrays_arg([2 x i32] %arg1.coerce0, float %arg1.coerce1, [4 x i32] %arg1.coerce2, [3 x float] %arg1.coerce3, i32 %arg1.coerce4)
__kernel void kernel_test_struct_of_arrays_arg(struct_of_arrays_arg_t arg1) { }
-// CHECK: void @kernel_struct_of_structs_arg(%struct.struct_of_structs_arg %arg1.coerce)
+// CHECK: void @kernel_struct_of_structs_arg(ptr addrspace(4) noundef readonly byref(%struct.struct_of_structs_arg) align 4 captures(none) {{%.+}})
+// CHECK: void @__clang_ocl_kern_imp_kernel_struct_of_structs_arg(i32 %arg1.coerce0, float %arg1.coerce1, %struct.struct_arg %arg1.coerce2, i32 %arg1.coerce3)
__kernel void kernel_struct_of_structs_arg(struct_of_structs_arg_t arg1) { }
// CHECK: void @test_kernel_transparent_union_arg(i32 %u.coerce)
__kernel void test_kernel_transparent_union_arg(transparent_u u) { }
-// CHECK: void @kernel_single_array_element_struct_arg(%struct.single_array_element_struct_arg %arg1.coerce)
+// CHECK: void @kernel_single_array_element_struct_arg(ptr addrspace(4) noundef readonly byref(%struct.single_array_element_struct_arg) align 4 captures(none) {{%.+}})
+// CHECK: void @__clang_ocl_kern_imp_kernel_single_array_element_struct_arg([4 x i32] %arg1.coerce)
__kernel void kernel_single_array_element_struct_arg(single_array_element_struct_arg_t arg1) { }
-// CHECK: void @kernel_single_struct_element_struct_arg(%struct.single_struct_element_struct_arg %arg1.coerce)
+// CHECK: void @kernel_single_struct_element_struct_arg(ptr addrspace(4) noundef readonly byref(%struct.single_struct_element_struct_arg) align 8 captures(none) {{%.+}})
+// CHECK: void @__clang_ocl_kern_imp_kernel_single_struct_element_struct_arg(%struct.inner %arg1.coerce)
__kernel void kernel_single_struct_element_struct_arg(single_struct_element_struct_arg_t arg1) { }
-// CHECK: void @kernel_different_size_type_pair_arg(%struct.different_size_type_pair %arg1.coerce)
+// CHECK: void @kernel_different_size_type_pair_arg(ptr addrspace(4) noundef readonly byref(%struct.different_size_type_pair) align 8 captures(none) {{%.+}})
+// CHECK: void @__clang_ocl_kern_imp_kernel_different_size_type_pair_arg(i64 %arg1.coerce0, i32 %arg1.coerce1)
__kernel void kernel_different_size_type_pair_arg(different_size_type_pair arg1) { }
// CHECK: define{{.*}} void @func_f32_arg(float noundef %arg)
diff --git a/clang/test/CodeGenOpenCL/opencl-kernel-call.cl b/clang/test/CodeGenOpenCL/opencl-kernel-call.cl
index cdbe510b723b..a5b2bee127bd 100644
--- a/clang/test/CodeGenOpenCL/opencl-kernel-call.cl
+++ b/clang/test/CodeGenOpenCL/opencl-kernel-call.cl
@@ -676,12 +676,10 @@ kernel void caller_kern3( struct LargeStructOneMember largeStructOneMem, struct
//
// AMDGCN: Function Attrs: convergent noinline norecurse nounwind optnone
// AMDGCN-LABEL: define dso_local amdgpu_kernel void @KernelLargeOneMember(
-// AMDGCN-SAME: [[STRUCT_LARGESTRUCTONEMEMBER:%.*]] [[U_COERCE:%.*]]) #[[ATTR2]] !kernel_arg_addr_space [[META13]] !kernel_arg_access_qual [[META5]] !kernel_arg_type [[META15:![0-9]+]] !kernel_arg_base_type [[META15]] !kernel_arg_type_qual [[META7]] {
+// AMDGCN-SAME: ptr addrspace(4) noundef byref([[STRUCT_LARGESTRUCTONEMEMBER:%.*]]) align 8 [[TMP0:%.*]]) #[[ATTR2]] !kernel_arg_addr_space [[META13]] !kernel_arg_access_qual [[META5]] !kernel_arg_type [[META15:![0-9]+]] !kernel_arg_base_type [[META15]] !kernel_arg_type_qual [[META7]] {
// AMDGCN-NEXT: entry:
// AMDGCN-NEXT: [[U:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER]], align 8, addrspace(5)
-// AMDGCN-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw [[STRUCT_LARGESTRUCTONEMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0
-// AMDGCN-NEXT: [[TMP1:%.*]] = extractvalue [[STRUCT_LARGESTRUCTONEMEMBER]] [[U_COERCE]], 0
-// AMDGCN-NEXT: store [100 x <2 x i32>] [[TMP1]], ptr addrspace(5) [[TMP0]], align 8
+// AMDGCN-NEXT: call void @llvm.memcpy.p5.p4.i64(ptr addrspace(5) align 8 [[U]], ptr addrspace(4) align 8 [[TMP0]], i64 800, i1 false)
// AMDGCN-NEXT: call void @__clang_ocl_kern_imp_KernelLargeOneMember(ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTONEMEMBER]]) align 8 [[U]]) #[[ATTR5]]
// AMDGCN-NEXT: ret void
//
@@ -698,20 +696,15 @@ kernel void caller_kern3( struct LargeStructOneMember largeStructOneMem, struct
//
// AMDGCN: Function Attrs: convergent noinline norecurse nounwind optnone
// AMDGCN-LABEL: define dso_local amdgpu_kernel void @KernelTwoMember(
-// AMDGCN-SAME: [[STRUCT_STRUCTTWOMEMBER:%.*]] [[U_COERCE:%.*]]) #[[ATTR2]] !kernel_arg_addr_space [[META13]] !kernel_arg_access_qual [[META5]] !kernel_arg_type [[META16:![0-9]+]] !kernel_arg_base_type [[META16]] !kernel_arg_type_qual [[META7]] {
+// AMDGCN-SAME: ptr addrspace(4) noundef byref([[STRUCT_STRUCTTWOMEMBER:%.*]]) align 8 [[TMP0:%.*]]) #[[ATTR2]] !kernel_arg_addr_space [[META13]] !kernel_arg_access_qual [[META5]] !kernel_arg_type [[META16:![0-9]+]] !kernel_arg_base_type [[META16]] !kernel_arg_type_qual [[META7]] {
// AMDGCN-NEXT: entry:
// AMDGCN-NEXT: [[U:%.*]] = alloca [[STRUCT_STRUCTTWOMEMBER]], align 8, addrspace(5)
-// AMDGCN-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0
-// AMDGCN-NEXT: [[TMP1:%.*]] = extractvalue [[STRUCT_STRUCTTWOMEMBER]] [[U_COERCE]], 0
-// AMDGCN-NEXT: store <2 x i32> [[TMP1]], ptr addrspace(5) [[TMP0]], align 8
-// AMDGCN-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 1
-// AMDGCN-NEXT: [[TMP3:%.*]] = extractvalue [[STRUCT_STRUCTTWOMEMBER]] [[U_COERCE]], 1
-// AMDGCN-NEXT: store <2 x i32> [[TMP3]], ptr addrspace(5) [[TMP2]], align 8
-// AMDGCN-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0
-// AMDGCN-NEXT: [[TMP5:%.*]] = load <2 x i32>, ptr addrspace(5) [[TMP4]], align 8
-// AMDGCN-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 1
-// AMDGCN-NEXT: [[TMP7:%.*]] = load <2 x i32>, ptr addrspace(5) [[TMP6]], align 8
-// AMDGCN-NEXT: call void @__clang_ocl_kern_imp_KernelTwoMember(<2 x i32> [[TMP5]], <2 x i32> [[TMP7]]) #[[ATTR5]]
+// AMDGCN-NEXT: call void @llvm.memcpy.p5.p4.i64(ptr addrspace(5) align 8 [[U]], ptr addrspace(4) align 8 [[TMP0]], i64 16, i1 false)
+// AMDGCN-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0
+// AMDGCN-NEXT: [[TMP2:%.*]] = load <2 x i32>, ptr addrspace(5) [[TMP1]], align 8
+// AMDGCN-NEXT: [[TMP3:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 1
+// AMDGCN-NEXT: [[TMP4:%.*]] = load <2 x i32>, ptr addrspace(5) [[TMP3]], align 8
+// AMDGCN-NEXT: call void @__clang_ocl_kern_imp_KernelTwoMember(<2 x i32> [[TMP2]], <2 x i32> [[TMP4]]) #[[ATTR5]]
// AMDGCN-NEXT: ret void
//
//
@@ -734,15 +727,10 @@ kernel void caller_kern3( struct LargeStructOneMember largeStructOneMem, struct
//
// AMDGCN: Function Attrs: convergent noinline norecurse nounwind optnone
// AMDGCN-LABEL: define dso_local amdgpu_kernel void @KernelLargeTwoMember(
-// AMDGCN-SAME: [[STRUCT_LARGESTRUCTTWOMEMBER:%.*]] [[U_COERCE:%.*]]) #[[ATTR2]] !kernel_arg_addr_space [[META13]] !kernel_arg_access_qual [[META5]] !kernel_arg_type [[META17:![0-9]+]] !kernel_arg_base_type [[META17]] !kernel_arg_type_qual [[META7]] {
+// AMDGCN-SAME: ptr addrspace(4) noundef byref([[STRUCT_LARGESTRUCTTWOMEMBER:%.*]]) align 8 [[TMP0:%.*]]) #[[ATTR2]] !kernel_arg_addr_space [[META13]] !kernel_arg_access_qual [[META5]] !kernel_arg_type [[META17:![0-9]+]] !kernel_arg_base_type [[META17]] !kernel_arg_type_qual [[META7]] {
// AMDGCN-NEXT: entry:
// AMDGCN-NEXT: [[U:%.*]] = alloca [[STRUCT_LARGESTRUCTTWOMEMBER]], align 8, addrspace(5)
-// AMDGCN-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw [[STRUCT_LARGESTRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0
-// AMDGCN-NEXT: [[TMP1:%.*]] = extractvalue [[STRUCT_LARGESTRUCTTWOMEMBER]] [[U_COERCE]], 0
-// AMDGCN-NEXT: store [40 x <2 x i32>] [[TMP1]], ptr addrspace(5) [[TMP0]], align 8
-// AMDGCN-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw [[STRUCT_LARGESTRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 1
-// AMDGCN-NEXT: [[TMP3:%.*]] = extractvalue [[STRUCT_LARGESTRUCTTWOMEMBER]] [[U_COERCE]], 1
-// AMDGCN-NEXT: store [20 x <2 x i32>] [[TMP3]], ptr addrspace(5) [[TMP2]], align 8
+// AMDGCN-NEXT: call void @llvm.memcpy.p5.p4.i64(ptr addrspace(5) align 8 [[U]], ptr addrspace(4) align 8 [[TMP0]], i64 480, i1 false)
// AMDGCN-NEXT: call void @__clang_ocl_kern_imp_KernelLargeTwoMember(ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTTWOMEMBER]]) align 8 [[U]]) #[[ATTR5]]
// AMDGCN-NEXT: ret void
//
@@ -815,28 +803,23 @@ kernel void caller_kern3( struct LargeStructOneMember largeStructOneMem, struct
//
// AMDGCN: Function Attrs: convergent noinline norecurse nounwind optnone
// AMDGCN-LABEL: define dso_local amdgpu_kernel void @caller_kern2(
-// AMDGCN-SAME: <2 x i32> [[STRUCTONEMEM_COERCE:%.*]], ptr addrspace(1) noundef align 8 [[GLOBAL_STRUCTONEMEM:%.*]], [[STRUCT_STRUCTTWOMEMBER:%.*]] [[STRUCTTWOMEM_COERCE:%.*]]) #[[ATTR2]] !kernel_arg_addr_space [[META22:![0-9]+]] !kernel_arg_access_qual [[META23:![0-9]+]] !kernel_arg_type [[META24:![0-9]+]] !kernel_arg_base_type [[META24]] !kernel_arg_type_qual [[META25:![0-9]+]] {
+// AMDGCN-SAME: <2 x i32> [[STRUCTONEMEM_COERCE:%.*]], ptr addrspace(1) noundef align 8 [[GLOBAL_STRUCTONEMEM:%.*]], ptr addrspace(4) noundef byref([[STRUCT_STRUCTTWOMEMBER:%.*]]) align 8 [[TMP0:%.*]]) #[[ATTR2]] !kernel_arg_addr_space [[META22:![0-9]+]] !kernel_arg_access_qual [[META23:![0-9]+]] !kernel_arg_type [[META24:![0-9]+]] !kernel_arg_base_type [[META24]] !kernel_arg_type_qual [[META25:![0-9]+]] {
// AMDGCN-NEXT: entry:
// AMDGCN-NEXT: [[STRUCTONEMEM:%.*]] = alloca [[STRUCT_STRUCTONEMEMBER:%.*]], align 8, addrspace(5)
// AMDGCN-NEXT: [[STRUCTTWOMEM:%.*]] = alloca [[STRUCT_STRUCTTWOMEMBER]], align 8, addrspace(5)
// AMDGCN-NEXT: [[GLOBAL_STRUCTONEMEM_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
// AMDGCN-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTONEMEMBER]], ptr addrspace(5) [[STRUCTONEMEM]], i32 0, i32 0
// AMDGCN-NEXT: store <2 x i32> [[STRUCTONEMEM_COERCE]], ptr addrspace(5) [[COERCE_DIVE]], align 8
-// AMDGCN-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[STRUCTTWOMEM]], i32 0, i32 0
-// AMDGCN-NEXT: [[TMP1:%.*]] = extractvalue [[STRUCT_STRUCTTWOMEMBER]] [[STRUCTTWOMEM_COERCE]], 0
-// AMDGCN-NEXT: store <2 x i32> [[TMP1]], ptr addrspace(5) [[TMP0]], align 8
-// AMDGCN-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[STRUCTTWOMEM]], i32 0, i32 1
-// AMDGCN-NEXT: [[TMP3:%.*]] = extractvalue [[STRUCT_STRUCTTWOMEMBER]] [[STRUCTTWOMEM_COERCE]], 1
-// AMDGCN-NEXT: store <2 x i32> [[TMP3]], ptr addrspace(5) [[TMP2]], align 8
+// AMDGCN-NEXT: call void @llvm.memcpy.p5.p4.i64(ptr addrspace(5) align 8 [[STRUCTTWOMEM]], ptr addrspace(4) align 8 [[TMP0]], i64 16, i1 false)
// AMDGCN-NEXT: store ptr addrspace(1) [[GLOBAL_STRUCTONEMEM]], ptr addrspace(5) [[GLOBAL_STRUCTONEMEM_ADDR]], align 8
-// AMDGCN-NEXT: [[TMP4:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[GLOBAL_STRUCTONEMEM_ADDR]], align 8
+// AMDGCN-NEXT: [[TMP1:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[GLOBAL_STRUCTONEMEM_ADDR]], align 8
// AMDGCN-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTONEMEMBER]], ptr addrspace(5) [[STRUCTONEMEM]], i32 0, i32 0
-// AMDGCN-NEXT: [[TMP5:%.*]] = load <2 x i32>, ptr addrspace(5) [[COERCE_DIVE1]], align 8
-// AMDGCN-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[STRUCTTWOMEM]], i32 0, i32 0
-// AMDGCN-NEXT: [[TMP7:%.*]] = load <2 x i32>, ptr addrspace(5) [[TMP6]], align 8
-// AMDGCN-NEXT: [[TMP8:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[STRUCTTWOMEM]], i32 0, i32 1
-// AMDGCN-NEXT: [[TMP9:%.*]] = load <2 x i32>, ptr addrspace(5) [[TMP8]], align 8
-// AMDGCN-NEXT: call void @__clang_ocl_kern_imp_caller_kern2(<2 x i32> [[TMP5]], ptr addrspace(1) noundef align 8 [[TMP4]], <2 x i32> [[TMP7]], <2 x i32> [[TMP9]]) #[[ATTR5]]
+// AMDGCN-NEXT: [[TMP2:%.*]] = load <2 x i32>, ptr addrspace(5) [[COERCE_DIVE1]], align 8
+// AMDGCN-NEXT: [[TMP3:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[STRUCTTWOMEM]], i32 0, i32 0
+// AMDGCN-NEXT: [[TMP4:%.*]] = load <2 x i32>, ptr addrspace(5) [[TMP3]], align 8
+// AMDGCN-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[STRUCTTWOMEM]], i32 0, i32 1
+// AMDGCN-NEXT: [[TMP6:%.*]] = load <2 x i32>, ptr addrspace(5) [[TMP5]], align 8
+// AMDGCN-NEXT: call void @__clang_ocl_kern_imp_caller_kern2(<2 x i32> [[TMP2]], ptr addrspace(1) noundef align 8 [[TMP1]], <2 x i32> [[TMP4]], <2 x i32> [[TMP6]]) #[[ATTR5]]
// AMDGCN-NEXT: ret void
//
//
@@ -875,19 +858,12 @@ kernel void caller_kern3( struct LargeStructOneMember largeStructOneMem, struct
//
// AMDGCN: Function Attrs: convergent noinline norecurse nounwind optnone
// AMDGCN-LABEL: define dso_local amdgpu_kernel void @caller_kern3(
-// AMDGCN-SAME: [[STRUCT_LARGESTRUCTONEMEMBER:%.*]] [[LARGESTRUCTONEMEM_COERCE:%.*]], [[STRUCT_LARGESTRUCTTWOMEMBER:%.*]] [[LARGESTRUCTTWOMEM_COERCE:%.*]]) #[[ATTR2]] !kernel_arg_addr_space [[META26:![0-9]+]] !kernel_arg_access_qual [[META9]] !kernel_arg_type [[META27:![0-9]+]] !kernel_arg_base_type [[META27]] !kernel_arg_type_qual [[META11]] {
+// AMDGCN-SAME: ptr addrspace(4) noundef byref([[STRUCT_LARGESTRUCTONEMEMBER:%.*]]) align 8 [[TMP0:%.*]], ptr addrspace(4) noundef byref([[STRUCT_LARGESTRUCTTWOMEMBER:%.*]]) align 8 [[TMP1:%.*]]) #[[ATTR2]] !kernel_arg_addr_space [[META26:![0-9]+]] !kernel_arg_access_qual [[META9]] !kernel_arg_type [[META27:![0-9]+]] !kernel_arg_base_type [[META27]] !kernel_arg_type_qual [[META11]] {
// AMDGCN-NEXT: entry:
// AMDGCN-NEXT: [[LARGESTRUCTONEMEM:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER]], align 8, addrspace(5)
// AMDGCN-NEXT: [[LARGESTRUCTTWOMEM:%.*]] = alloca [[STRUCT_LARGESTRUCTTWOMEMBER]], align 8, addrspace(5)
-// AMDGCN-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw [[STRUCT_LARGESTRUCTONEMEMBER]], ptr addrspace(5) [[LARGESTRUCTONEMEM]], i32 0, i32 0
-// AMDGCN-NEXT: [[TMP1:%.*]] = extractvalue [[STRUCT_LARGESTRUCTONEMEMBER]] [[LARGESTRUCTONEMEM_COERCE]], 0
-// AMDGCN-NEXT: store [100 x <2 x i32>] [[TMP1]], ptr addrspace(5) [[TMP0]], align 8
-// AMDGCN-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw [[STRUCT_LARGESTRUCTTWOMEMBER]], ptr addrspace(5) [[LARGESTRUCTTWOMEM]], i32 0, i32 0
-// AMDGCN-NEXT: [[TMP3:%.*]] = extractvalue [[STRUCT_LARGESTRUCTTWOMEMBER]] [[LARGESTRUCTTWOMEM_COERCE]], 0
-// AMDGCN-NEXT: store [40 x <2 x i32>] [[TMP3]], ptr addrspace(5) [[TMP2]], align 8
-// AMDGCN-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw [[STRUCT_LARGESTRUCTTWOMEMBER]], ptr addrspace(5) [[LARGESTRUCTTWOMEM]], i32 0, i32 1
-// AMDGCN-NEXT: [[TMP5:%.*]] = extractvalue [[STRUCT_LARGESTRUCTTWOMEMBER]] [[LARGESTRUCTTWOMEM_COERCE]], 1
-// AMDGCN-NEXT: store [20 x <2 x i32>] [[TMP5]], ptr addrspace(5) [[TMP4]], align 8
+// AMDGCN-NEXT: call void @llvm.memcpy.p5.p4.i64(ptr addrspace(5) align 8 [[LARGESTRUCTONEMEM]], ptr addrspace(4) align 8 [[TMP0]], i64 800, i1 false)
+// AMDGCN-NEXT: call void @llvm.memcpy.p5.p4.i64(ptr addrspace(5) align 8 [[LARGESTRUCTTWOMEM]], ptr addrspace(4) align 8 [[TMP1]], i64 480, i1 false)
// AMDGCN-NEXT: call void @__clang_ocl_kern_imp_caller_kern3(ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTONEMEMBER]]) align 8 [[LARGESTRUCTONEMEM]], ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTTWOMEMBER]]) align 8 [[LARGESTRUCTTWOMEM]]) #[[ATTR5]]
// AMDGCN-NEXT: ret void
//
diff --git a/clang/test/Driver/amdgpu-openmp-toolchain.c b/clang/test/Driver/amdgpu-openmp-toolchain.c
index 743f4e74696c..1091e6e372ac 100644
--- a/clang/test/Driver/amdgpu-openmp-toolchain.c
+++ b/clang/test/Driver/amdgpu-openmp-toolchain.c
@@ -54,12 +54,12 @@
// RUN: %clang -### -target x86_64-pc-linux-gnu -fopenmp --offload-arch=gfx803 \
// RUN: --no-offloadlib --offloadlib --rocm-device-lib-path=%S/Inputs/rocm/amdgcn/bitcode %s 2>&1 | \
// RUN: FileCheck %s --check-prefix=CHECK-LIB-DEVICE
-// CHECK-LIB-DEVICE: "-cc1" {{.*}}ocml.bc"{{.*}}oclc_unsafe_math_off.bc"{{.*}}oclc_finite_only_off.bc"{{.*}}oclc_correctly_rounded_sqrt_on.bc"{{.*}}oclc_wavefrontsize64_on.bc"{{.*}}oclc_isa_version_803.bc"
+// CHECK-LIB-DEVICE: "-cc1" {{.*}}ocml.bc"{{.*}}oclc_daz_opt_on.bc"{{.*}}oclc_unsafe_math_off.bc"{{.*}}oclc_finite_only_off.bc"{{.*}}oclc_correctly_rounded_sqrt_on.bc"{{.*}}oclc_wavefrontsize64_on.bc"{{.*}}oclc_isa_version_803.bc"
// RUN: %clang -### -target x86_64-pc-linux-gnu -fopenmp --offload-arch=gfx803 -nogpulib \
// RUN: --offloadlib --no-offloadlib --rocm-device-lib-path=%S/Inputs/rocm/amdgcn/bitcode %s 2>&1 | \
// RUN: FileCheck %s --check-prefix=CHECK-LIB-DEVICE-NOGPULIB
-// CHECK-LIB-DEVICE-NOGPULIB-NOT: "-cc1" {{.*}}ocml.bc"{{.*}}oclc_unsafe_math_off.bc"{{.*}}oclc_finite_only_off.bc"{{.*}}oclc_correctly_rounded_sqrt_on.bc"{{.*}}oclc_wavefrontsize64_on.bc"{{.*}}oclc_isa_version_803.bc"
+// CHECK-LIB-DEVICE-NOGPULIB-NOT: "-cc1" {{.*}}ocml.bc"{{.*}}oclc_daz_opt_on.bc"{{.*}}oclc_unsafe_math_off.bc"{{.*}}oclc_finite_only_off.bc"{{.*}}oclc_correctly_rounded_sqrt_on.bc"{{.*}}oclc_wavefrontsize64_on.bc"{{.*}}oclc_isa_version_803.bc"
// RUN: %clang -### -target x86_64-pc-linux-gnu -fopenmp --offload-arch=gfx90a:sramecc-:xnack+ \
// RUN: -nogpulib %s 2>&1 | FileCheck %s --check-prefix=CHECK-TARGET-ID
diff --git a/clang/test/Driver/arm-fpu-selection.s b/clang/test/Driver/arm-fpu-selection.s
new file mode 100644
index 000000000000..6af374d04905
--- /dev/null
+++ b/clang/test/Driver/arm-fpu-selection.s
@@ -0,0 +1,36 @@
+// REQUIRES: arm-registered-target
+/// Ensures that when targeting an ARM target with an Asm file, clang
+/// collects the features from the FPU. This is critical in the
+/// activation of NEON for supported targets. The Cortex-R52 will be
+/// used and tested for VFP and NEON Support
+
+// RUN: %clang -target arm-none-eabi -mcpu=cortex-r52 -c %s -o /dev/null 2>&1 | FileCheck --check-prefix=CHECK-STDERR %s --allow-empty
+// RUN: %clang -target arm-none-eabi -mcpu=cortex-r52 -c %s -o /dev/null -### 2>&1 | FileCheck --check-prefix=CHECK-TARGET-FEATURES %s
+
+/// Check that no errors or warnings are present when assembling using cc1as.
+// CHECK-STDERR-NOT: error:
+// CHECK-STDERR-NOT: warning:
+
+/// Check that NEON and VFPV5 have been activated when using Cortex-R52 when using cc1as
+// CHECK-TARGET-FEATURES: "-target-feature" "+vfp2sp"
+// CHECK-TARGET-FEATURES: "-target-feature" "+vfp3"
+// CHECK-TARGET-FEATURES: "-target-feature" "+fp-armv8"
+// CHECK-TARGET-FEATURES: "-target-feature" "+fp-armv8d16"
+// CHECK-TARGET-FEATURES: "-target-feature" "+fp-armv8d16sp"
+// CHECK-TARGET-FEATURES: "-target-feature" "+fp-armv8sp"
+// CHECK-TARGET-FEATURES: "-target-feature" "+neon"
+
+ vadd.f32 s0, s1, s2
+ vadd.f64 d0, d1, d2
+ vcvt.u32.f32 s0, s0, #1
+ vcvt.u32.f64 d0, d0, #1
+ vcvtb.f32.f16 s0, s1
+ vcvtb.f64.f16 d0, s1
+ vfma.f32 s0, s1, s2
+ vfma.f64 d0, d1, d2
+ vcvta.u32.f32 s0, s1
+ vcvta.u32.f64 s0, d1
+ vadd.f32 q0, q1, q2
+ vcvt.f32.f16 q0, d1
+ vfma.f32 q0, q1, q2
+ vcvta.u32.f32 q0, q1
diff --git a/clang/test/Driver/armv7-default-neon.s b/clang/test/Driver/armv7-default-neon.s
new file mode 100644
index 000000000000..2015f0bc429d
--- /dev/null
+++ b/clang/test/Driver/armv7-default-neon.s
@@ -0,0 +1,16 @@
+/// Ensure that we can assemble NEON by just specifying an armv7
+/// Apple or Windows target.
+
+// REQUIRES: arm-registered-target
+// RUN: %clang -c -target armv7-apple-darwin -o /dev/null %s 2>&1 | FileCheck --check-prefix=CHECK-STDERR %s --allow-empty
+// RUN: %clang -c -target armv7-apple-darwin -o /dev/null %s -### 2>&1 | FileCheck --check-prefix=CHECK-TARGET-FEATURES %s
+// RUN: %clang -c -target armv7-windows -o /dev/null %s 2>&1 | FileCheck --check-prefix=CHECK-STDERR %s --allow-empty
+// RUN: %clang -c -target armv7-windows -o /dev/null %s -### 2>&1 | FileCheck --check-prefix=CHECK-TARGET-FEATURES %s
+
+/// Check that no errors or warnings are present when assembling using cc1as.
+// CHECK-STDERR-NOT: error:
+// CHECK-STDERR-NOT: warning:
+
+// CHECK-TARGET-FEATURES: "-target-feature" "+neon"
+
+vadd.i32 q0, q0, q0
diff --git a/clang/test/Driver/armv7s-default-vfpv4.s b/clang/test/Driver/armv7s-default-vfpv4.s
new file mode 100644
index 000000000000..3e16503e6316
--- /dev/null
+++ b/clang/test/Driver/armv7s-default-vfpv4.s
@@ -0,0 +1,13 @@
+/// Ensure that we can assemble VFPv4 by just specifying an armv7s target.
+
+// REQUIRES: arm-registered-target
+// RUN: %clang -c -target armv7s-apple-darwin -o /dev/null %s 2>&1 | FileCheck --check-prefix=CHECK-STDERR %s --allow-empty
+// RUN: %clang -c -target armv7s-apple-darwin -o /dev/null %s -### 2>&1 | FileCheck --check-prefix=CHECK-TARGET-FEATURES %s
+
+/// Check that no errors or warnings are present when assembling using cc1as.
+// CHECK-STDERR-NOT: error:
+// CHECK-STDERR-NOT: warning:
+
+// CHECK-TARGET-FEATURES: "-target-feature" "+vfp4"
+
+vfma.f32 q1, q2, q3
diff --git a/clang/test/Driver/armv8.1m.main.s b/clang/test/Driver/armv8.1m.main.s
index 8fc94cf772fa..a660e56c2d1e 100644
--- a/clang/test/Driver/armv8.1m.main.s
+++ b/clang/test/Driver/armv8.1m.main.s
@@ -8,21 +8,21 @@
# RUN: not %clang -c -target arm-none-none-eabi -march=armv8.1-m.main+fp -o /dev/null %s 2>%t
# RUN: FileCheck --check-prefix=ERROR-V81M_FP < %t %s
# RUN: not %clang -c -target arm-none-none-eabi -march=armv8.1-m.main+nofp -o /dev/null %s 2>%t
-# RUN: FileCheck --check-prefix=ERROR-V81M_FP < %t %s
+# RUN: FileCheck --check-prefix=ERROR-V81M_NOFP < %t %s
# RUN: not %clang -c -target arm-none-none-eabi -march=armv8.1-m.main+fp.dp -o /dev/null %s 2>%t
# RUN: FileCheck --check-prefix=ERROR-V81M_FPDP < %t %s
# RUN: not %clang -c -target arm-none-none-eabi -march=armv8.1-m.main+nofp.dp -o /dev/null %s 2>%t
-# RUN: FileCheck --check-prefix=ERROR-V81M_FPDP < %t %s
+# RUN: FileCheck --check-prefix=ERROR-V81M_NOFPDP < %t %s
# RUN: not %clang -c -target arm-none-none-eabi -march=armv8.1-m.main+mve -o /dev/null %s 2>%t
# RUN: FileCheck --check-prefix=ERROR-V81M_MVE < %t %s
# RUN: not %clang -c -target arm-none-none-eabi -march=armv8.1-m.main+nomve -o /dev/null %s 2>%t
-# RUN: FileCheck --check-prefix=ERROR-V81M_MVE < %t %s
+# RUN: FileCheck --check-prefix=ERROR-V81M_NOMVE < %t %s
# RUN: not %clang -c -target arm-none-none-eabi -march=armv8.1-m.main+mve+fp -o /dev/null %s 2>%t
# RUN: FileCheck --check-prefix=ERROR-V81M_MVE_FP < %t %s
# RUN: not %clang -c -target arm-none-none-eabi -march=armv8.1-m.main+mve.fp -o /dev/null %s 2>%t
# RUN: FileCheck --check-prefix=ERROR-V81M_MVEFP < %t %s
# RUN: not %clang -c -target arm-none-none-eabi -march=armv8.1-m.main+nomve.fp -o /dev/null %s 2>%t
-# RUN: FileCheck --check-prefix=ERROR-V81M_MVEFP < %t %s
+# RUN: FileCheck --check-prefix=ERROR-V81M_NOMVEFP < %t %s
.syntax unified
.thumb
@@ -35,28 +35,41 @@ qadd r0, r1, r2
# ERROR-V8M: :[[@LINE-1]]:1: error
# ERROR-V81M: :[[@LINE-2]]:1: error
# ERROR-V81M_FP: :[[@LINE-3]]:1: error
-# ERROR-V81M_FPDP: :[[@LINE-4]]:1: error
+# ERROR-V81M_NOFP: :[[@LINE-4]]:1: error
+# ERROR-V81M_FPDP: :[[@LINE-5]]:1: error
+# ERROR-V81M_NOFPDP: :[[@LINE-6]]:1: error
+# ERROR-V81M_NOMVE: :[[@LINE-7]]:1: error
+# ERROR-V81M_NOMVEFP: :[[@LINE-8]]:1: error
vadd.f16 s0, s1, s2
# ERROR-V8M: :[[@LINE-1]]:1: error
-# ERROR-V81M: :[[@LINE-2]]:1: error
-# ERROR-V81M_DSP: :[[@LINE-3]]:1: error
-# ERROR-V81M_MVE: :[[@LINE-4]]:1: error
+# ERROR-V81M_NOFP: :[[@LINE-2]]:1: error
vabs.f32 s0, s1
-# ERROR-V8M: :[[@LINE-1]]:1: error
-# ERROR-V81M: :[[@LINE-2]]:1: error
-# ERROR-V81M_DSP: :[[@LINE-3]]:1: error
-# ERROR-V81M_MVE: :[[@LINE-4]]:1: error
+# ERROR-V81M_NOFP: :[[@LINE-1]]:1: error
-vcmp.f64 d0,d1
+vabs.s32 q0, q1
# ERROR-V8M: :[[@LINE-1]]:1: error
# ERROR-V81M: :[[@LINE-2]]:1: error
# ERROR-V81M_DSP: :[[@LINE-3]]:1: error
# ERROR-V81M_FP: :[[@LINE-4]]:1: error
-# ERROR-V81M_MVE: :[[@LINE-5]]:1: error
-# ERROR-V81M_MVE_FP: :[[@LINE-6]]:1: error
-# ERROR-V81M_MVEFP: :[[@LINE-7]]:1: error
+# ERROR-V81M_NOFP: :[[@LINE-5]]:1: error
+# ERROR-V81M_FPDP: :[[@LINE-6]]:1: error
+# ERROR-V81M_NOFPDP: :[[@LINE-7]]:1: error
+# ERROR-V81M_NOMVE: :[[@LINE-8]]:1: error
+# ERROR-V81M_NOMVEFP: :[[@LINE-9]]:1: error
+
+vcmp.f64 d0,d1
+# ERROR-V81M: :[[@LINE-1]]:1: error
+# ERROR-V81M_DSP: :[[@LINE-2]]:1: error
+# ERROR-V81M_FP: :[[@LINE-3]]:1: error
+# ERROR-V81M_NOFP: :[[@LINE-4]]:1: error
+# ERROR-V81M_NOFPDP: :[[@LINE-5]]:1: error
+# ERROR-V81M_MVE: :[[@LINE-6]]:1: error
+# ERROR-V81M_NOMVE: :[[@LINE-7]]:1: error
+# ERROR-V81M_MVE_FP: :[[@LINE-8]]:1: error
+# ERROR-V81M_MVEFP: :[[@LINE-9]]:1: error
+# ERROR-V81M_NOMVEFP: :[[@LINE-10]]:1: error
asrl r0, r1, r2
# ERROR-V8M: :[[@LINE-1]]:1: error
@@ -64,6 +77,9 @@ asrl r0, r1, r2
# ERROR-V81M_DSP: :[[@LINE-3]]:1: error
# ERROR-V81M_FP: :[[@LINE-4]]:1: error
# ERROR-V81M_FPDP: :[[@LINE-5]]:1: error
+# ERROR-V81M_NOFPDP: :[[@LINE-6]]:1: error
+# ERROR-V81M_NOMVE: :[[@LINE-7]]:1: error
+# ERROR-V81M_NOMVEFP: :[[@LINE-8]]:1: error
vcadd.i8 q0, q1, q2, #90
# ERROR-V8M: :[[@LINE-1]]:1: error
@@ -71,3 +87,6 @@ vcadd.i8 q0, q1, q2, #90
# ERROR-V81M_DSP: :[[@LINE-3]]:1: error
# ERROR-V81M_FP: :[[@LINE-4]]:1: error
# ERROR-V81M_FPDP: :[[@LINE-5]]:1: error
+# ERROR-V81M_NOFPDP: :[[@LINE-6]]:1: error
+# ERROR-V81M_NOMVE: :[[@LINE-7]]:1: error
+# ERROR-V81M_NOMVEFP: :[[@LINE-8]]:1: error
diff --git a/clang/test/Driver/hip-device-libs.hip b/clang/test/Driver/hip-device-libs.hip
index b3829114138c..effce40d67eb 100644
--- a/clang/test/Driver/hip-device-libs.hip
+++ b/clang/test/Driver/hip-device-libs.hip
@@ -6,7 +6,7 @@
// RUN: --cuda-gpu-arch=gfx803 \
// RUN: --rocm-path=%S/Inputs/rocm \
// RUN: %S/Inputs/hip_multiple_inputs/b.hip \
-// RUN: 2>&1 | FileCheck %s --check-prefixes=ALL,ROCMDIR
+// RUN: 2>&1 | FileCheck %s --check-prefixes=ALL,FLUSHD,ROCMDIR
// Test subtarget with flushing off by ddefault.
@@ -14,7 +14,7 @@
// RUN: --cuda-gpu-arch=gfx900 \
// RUN: --rocm-path=%S/Inputs/rocm \
// RUN: %S/Inputs/hip_multiple_inputs/b.hip \
-// RUN: 2>&1 | FileCheck %s --check-prefixes=ALL,ROCMDIR
+// RUN: 2>&1 | FileCheck %s --check-prefixes=ALL,NOFLUSHD,ROCMDIR
// Test explicit flag, opposite of target default.
@@ -23,7 +23,7 @@
// RUN: -fgpu-flush-denormals-to-zero \
// RUN: --rocm-path=%S/Inputs/rocm \
// RUN: %S/Inputs/hip_multiple_inputs/b.hip \
-// RUN: 2>&1 | FileCheck %s --check-prefixes=ALL,ROCMDIR
+// RUN: 2>&1 | FileCheck %s --check-prefixes=ALL,FLUSHD,ROCMDIR
// Test explicit flag, opposite of target default.
@@ -32,7 +32,7 @@
// RUN: -fno-gpu-flush-denormals-to-zero \
// RUN: --rocm-path=%S/Inputs/rocm \
// RUN: %S/Inputs/hip_multiple_inputs/b.hip \
-// RUN: 2>&1 | FileCheck %s --check-prefixes=ALL,ROCMDIR
+// RUN: 2>&1 | FileCheck %s --check-prefixes=ALL,NOFLUSHD,ROCMDIR
// Test explicit flag, same as target default.
@@ -41,7 +41,7 @@
// RUN: -fno-gpu-flush-denormals-to-zero \
// RUN: --rocm-path=%S/Inputs/rocm \
// RUN: %S/Inputs/hip_multiple_inputs/b.hip \
-// RUN: 2>&1 | FileCheck %s --check-prefixes=ALL,ROCMDIR
+// RUN: 2>&1 | FileCheck %s --check-prefixes=ALL,NOFLUSHD,ROCMDIR
// Test explicit flag, same as target default.
@@ -50,7 +50,7 @@
// RUN: -fgpu-flush-denormals-to-zero \
// RUN: --rocm-path=%S/Inputs/rocm \
// RUN: %S/Inputs/hip_multiple_inputs/b.hip \
-// RUN: 2>&1 | FileCheck %s --check-prefixes=ALL,ROCMDIR
+// RUN: 2>&1 | FileCheck %s --check-prefixes=ALL,FLUSHD,ROCMDIR
// Test last flag wins, not flushing
@@ -59,7 +59,7 @@
// RUN: -fgpu-flush-denormals-to-zero -fno-gpu-flush-denormals-to-zero \
// RUN: --rocm-path=%S/Inputs/rocm \
// RUN: %S/Inputs/hip_multiple_inputs/b.hip \
-// RUN: 2>&1 | FileCheck %s --check-prefixes=ALL,ROCMDIR
+// RUN: 2>&1 | FileCheck %s --check-prefixes=ALL,NOFLUSHD,ROCMDIR
// RUN: %clang -### --target=x86_64-linux-gnu \
@@ -67,7 +67,7 @@
// RUN: -fgpu-flush-denormals-to-zero -fno-gpu-flush-denormals-to-zero \
// RUN: --rocm-path=%S/Inputs/rocm \
// RUN: %S/Inputs/hip_multiple_inputs/b.hip \
-// RUN: 2>&1 | FileCheck %s --check-prefixes=ALL,ROCMDIR
+// RUN: 2>&1 | FileCheck %s --check-prefixes=ALL,NOFLUSHD,ROCMDIR
// RUN: %clang -### --target=x86_64-linux-gnu \
@@ -75,7 +75,7 @@
// RUN: -fno-gpu-flush-denormals-to-zero -fgpu-flush-denormals-to-zero \
// RUN: --rocm-path=%S/Inputs/rocm \
// RUN: %S/Inputs/hip_multiple_inputs/b.hip \
-// RUN: 2>&1 | FileCheck %s --check-prefixes=ALL,ROCMDIR
+// RUN: 2>&1 | FileCheck %s --check-prefixes=ALL,FLUSHD,ROCMDIR
// RUN: %clang -### --target=x86_64-linux-gnu \
@@ -83,21 +83,21 @@
// RUN: -fno-gpu-flush-denormals-to-zero -fgpu-flush-denormals-to-zero \
// RUN: --rocm-path=%S/Inputs/rocm \
// RUN: %S/Inputs/hip_multiple_inputs/b.hip \
-// RUN: 2>&1 | FileCheck %s --check-prefixes=ALL,ROCMDIR
+// RUN: 2>&1 | FileCheck %s --check-prefixes=ALL,FLUSHD,ROCMDIR
// Test finding device lib in resource dir
// RUN: %clang -### --target=x86_64-linux-gnu \
// RUN: --offload-arch=gfx803 -nogpuinc \
// RUN: -resource-dir=%S/Inputs/rocm_resource_dir \
// RUN: %S/Inputs/hip_multiple_inputs/b.hip \
-// RUN: 2>&1 | FileCheck %s --check-prefixes=ALL,RESDIR
+// RUN: 2>&1 | FileCheck %s --check-prefixes=ALL,FLUSHD,RESDIR
// Test --hip-device-lib-path flag
// RUN: %clang -### --target=x86_64-linux-gnu \
// RUN: --cuda-gpu-arch=gfx803 -nogpuinc \
// RUN: --hip-device-lib-path=%S/Inputs/rocm/amdgcn/bitcode \
// RUN: %S/Inputs/hip_multiple_inputs/b.hip \
-// RUN: 2>&1 | FileCheck %s --check-prefixes=ALL,ROCMDIR
+// RUN: 2>&1 | FileCheck %s --check-prefixes=ALL,FLUSHD,ROCMDIR
// Test --hip-device-lib-path wins over -resource-dir
// RUN: %clang -### --target=x86_64-linux-gnu \
@@ -105,7 +105,7 @@
// RUN: --hip-device-lib-path=%S/Inputs/rocm/amdgcn/bitcode \
// RUN: -resource-dir=%S/Inputs/rocm_resource_dir \
// RUN: %S/Inputs/hip_multiple_inputs/b.hip \
-// RUN: 2>&1 | FileCheck %s --check-prefixes=ALL,ROCMDIR
+// RUN: 2>&1 | FileCheck %s --check-prefixes=ALL,FLUSHD,ROCMDIR
// Test environment variable HIP_DEVICE_LIB_PATH
// RUN: env HIP_DEVICE_LIB_PATH=%S/Inputs/rocm/amdgcn/bitcode \
@@ -213,6 +213,9 @@
// ALL-SAME: "-mlink-builtin-bitcode" "[[DEVICELIB_DIR]]ockl.bc"
+// FLUSHD-SAME: "-mlink-builtin-bitcode" "[[DEVICELIB_DIR]]oclc_daz_opt_on.bc"
+// NOFLUSHD-SAME: "-mlink-builtin-bitcode" "[[DEVICELIB_DIR]]oclc_daz_opt_off.bc"
+
// ALL-SAME: "-mlink-builtin-bitcode" "[[DEVICELIB_DIR]]oclc_unsafe_math_off.bc"
// ALL-SAME: "-mlink-builtin-bitcode" "[[DEVICELIB_DIR]]oclc_finite_only_off.bc"
// ALL-SAME: "-mlink-builtin-bitcode" "[[DEVICELIB_DIR]]oclc_correctly_rounded_sqrt_on.bc"
@@ -220,19 +223,23 @@
// ALL-SAME: "-mlink-builtin-bitcode" "[[DEVICELIB_DIR]]oclc_isa_version_{{[0-9]+}}.bc"
// INST-SAME: "-mlink-builtin-bitcode" "{{.*}}instrument.bc"
-// FAST: "-mlink-builtin-bitcode" "{{.*}}oclc_unsafe_math_on.bc"
+// FAST: "-mlink-builtin-bitcode" "{{.*}}oclc_daz_opt_off.bc"
+// FAST-SAME: "-mlink-builtin-bitcode" "{{.*}}oclc_unsafe_math_on.bc"
// FAST-SAME: "-mlink-builtin-bitcode" "{{.*}}oclc_finite_only_on.bc"
// FAST-SAME: "-mlink-builtin-bitcode" "{{.*}}oclc_correctly_rounded_sqrt_on.bc"
-// FINITE: "-mlink-builtin-bitcode" "{{.*}}oclc_unsafe_math_off.bc"
+// FINITE: "-mlink-builtin-bitcode" "{{.*}}oclc_daz_opt_off.bc"
+// FINITE-SAME: "-mlink-builtin-bitcode" "{{.*}}oclc_unsafe_math_off.bc"
// FINITE-SAME: "-mlink-builtin-bitcode" "{{.*}}oclc_finite_only_on.bc"
// FINITE-SAME: "-mlink-builtin-bitcode" "{{.*}}oclc_correctly_rounded_sqrt_on.bc"
-// UNSAFE: "-mlink-builtin-bitcode" "{{.*}}oclc_unsafe_math_on.bc"
+// UNSAFE: "-mlink-builtin-bitcode" "{{.*}}oclc_daz_opt_off.bc"
+// UNSAFE-SAME: "-mlink-builtin-bitcode" "{{.*}}oclc_unsafe_math_on.bc"
// UNSAFE-SAME: "-mlink-builtin-bitcode" "{{.*}}oclc_finite_only_off.bc"
// UNSAFE-SAME: "-mlink-builtin-bitcode" "{{.*}}oclc_correctly_rounded_sqrt_on.bc"
-// DIVSQRT: "-mlink-builtin-bitcode" "{{.*}}oclc_unsafe_math_off.bc"
+// DIVSQRT: "-mlink-builtin-bitcode" "{{.*}}oclc_daz_opt_off.bc"
+// DIVSQRT-SAME: "-mlink-builtin-bitcode" "{{.*}}oclc_unsafe_math_off.bc"
// DIVSQRT-SAME: "-mlink-builtin-bitcode" "{{.*}}oclc_finite_only_off.bc"
// DIVSQRT-SAME: "-mlink-builtin-bitcode" "{{.*}}oclc_correctly_rounded_sqrt_off.bc"
diff --git a/clang/test/Driver/rocm-device-libs.cl b/clang/test/Driver/rocm-device-libs.cl
index 7aee10bf1556..f9766e6fa4d9 100644
--- a/clang/test/Driver/rocm-device-libs.cl
+++ b/clang/test/Driver/rocm-device-libs.cl
@@ -6,7 +6,7 @@
// RUN: -x cl -mcpu=gfx900 \
// RUN: --rocm-path=%S/Inputs/rocm \
// RUN: %s \
-// RUN: 2>&1 | FileCheck --check-prefixes=COMMON,COMMON-DEFAULT,GFX900,WAVE64 %s
+// RUN: 2>&1 | FileCheck --check-prefixes=COMMON,COMMON-DEFAULT,GFX900-DEFAULT,GFX900,WAVE64 %s
@@ -15,7 +15,7 @@
// RUN: -x cl -mcpu=gfx803 \
// RUN: --rocm-path=%S/Inputs/rocm \
// RUN: %s \
-// RUN: 2>&1 | FileCheck --check-prefixes=COMMON,COMMON-DEFAULT,GFX803,WAVE64 %s
+// RUN: 2>&1 | FileCheck --check-prefixes=COMMON,COMMON-DEFAULT,GFX803-DEFAULT,GFX803,WAVE64 %s
@@ -24,7 +24,7 @@
// RUN: -x cl -mcpu=fiji \
// RUN: --rocm-path=%S/Inputs/rocm \
// RUN: %s \
-// RUN: 2>&1 | FileCheck --check-prefixes=COMMON,COMMON-DEFAULT,GFX803,WAVE64 %s
+// RUN: 2>&1 | FileCheck --check-prefixes=COMMON,COMMON-DEFAULT,GFX803-DEFAULT,GFX803,WAVE64 %s
@@ -33,7 +33,7 @@
// RUN: -cl-denorms-are-zero \
// RUN: --rocm-path=%S/Inputs/rocm \
// RUN: %s \
-// RUN: 2>&1 | FileCheck --check-prefixes=COMMON,GFX900,WAVE64 %s
+// RUN: 2>&1 | FileCheck --check-prefixes=COMMON,COMMON-DAZ,GFX900,WAVE64 %s
// RUN: %clang -### -target amdgcn-amd-amdhsa \
@@ -41,7 +41,7 @@
// RUN: -cl-denorms-are-zero \
// RUN: --rocm-path=%S/Inputs/rocm \
// RUN: %s \
-// RUN: 2>&1 | FileCheck --check-prefixes=COMMON,GFX803,WAVE64 %s
+// RUN: 2>&1 | FileCheck --check-prefixes=COMMON,COMMON-DAZ,GFX803,WAVE64 %s
@@ -124,13 +124,13 @@
// RUN: -x cl -mcpu=gfx900 \
// RUN: --hip-device-lib-path=%S/Inputs/rocm/amdgcn/bitcode \
// RUN: %S/opencl.cl \
-// RUN: 2>&1 | FileCheck --check-prefixes=COMMON,COMMON-DEFAULT,GFX900,WAVE64 %s
+// RUN: 2>&1 | FileCheck --check-prefixes=COMMON,COMMON-DEFAULT,GFX900-DEFAULT,GFX900,WAVE64 %s
// Test environment variable HIP_DEVICE_LIB_PATH
// RUN: env HIP_DEVICE_LIB_PATH=%S/Inputs/rocm/amdgcn/bitcode %clang -### -target amdgcn-amd-amdhsa \
// RUN: -x cl -mcpu=gfx900 \
// RUN: %S/opencl.cl \
-// RUN: 2>&1 | FileCheck --check-prefixes=COMMON,COMMON-DEFAULT,GFX900,WAVE64 %s
+// RUN: 2>&1 | FileCheck --check-prefixes=COMMON,COMMON-DEFAULT,GFX900-DEFAULT,GFX900,WAVE64 %s
// RUN: %clang -### -target amdgcn-amd-amdhsa \
// RUN: -x cl -mcpu=gfx908:xnack+ -fsanitize=address \
@@ -150,6 +150,11 @@
// COMMON-SAME: "-mlink-builtin-bitcode" "{{.*}}/amdgcn/bitcode/ocml.bc"
// COMMON-SAME: "-mlink-builtin-bitcode" "{{.*}}/amdgcn/bitcode/ockl.bc"
+// GFX900-DEFAULT-SAME: "-mlink-builtin-bitcode" "{{.*}}/amdgcn/bitcode/oclc_daz_opt_off.bc"
+// GFX803-DEFAULT-SAME: "-mlink-builtin-bitcode" "{{.*}}/amdgcn/bitcode/oclc_daz_opt_on.bc"
+// GFX700-DEFAULT-SAME: "-mlink-builtin-bitcode" "{{.*}}/amdgcn/bitcode/oclc_daz_opt_on.bc"
+// COMMON-DAZ-SAME: "-mlink-builtin-bitcode" "{{.*}}/amdgcn/bitcode/oclc_daz_opt_on.bc"
+
// COMMON-DEFAULT-SAME: "-mlink-builtin-bitcode" "{{.*}}/amdgcn/bitcode/oclc_unsafe_math_off.bc"
// COMMON-DEFAULT-SAME: "-mlink-builtin-bitcode" "{{.*}}/amdgcn/bitcode/oclc_finite_only_off.bc"
diff --git a/clang/test/Modules/relocatable-modules.cpp b/clang/test/Modules/relocatable-modules.cpp
new file mode 100644
index 000000000000..c8d1e6d45566
--- /dev/null
+++ b/clang/test/Modules/relocatable-modules.cpp
@@ -0,0 +1,54 @@
+// RUN: rm -rf %t
+// RUN: mkdir -p %t
+// RUN: split-file %s %t
+// RUN: cd %t
+
+// RUN: %clang_cc1 -std=c++20 -emit-header-unit -xc++-user-header hu-01.h \
+// RUN: -fmodule-name=hu-01 -o hu-01.pcm
+
+// RUN: %clang_cc1 -std=c++20 -emit-header-unit -xc++-user-header hu-02.h \
+// RUN: -Wno-experimental-header-units -fmodule-file=hu-01.pcm -o hu-02-abs.pcm
+
+// RUN: %clang_cc1 -std=c++20 -emit-header-unit -xc++-user-header hu-02.h \
+// RUN: -Wno-experimental-header-units -fmodule-file=hu-01.pcm -o hu-02-rel.pcm \
+// RUN: -fmodule-file-home-is-cwd
+
+// RUN: %clang -module-file-info hu-02-abs.pcm | FileCheck %s --check-prefix=IMPORT-ABS -DPREFIX=%t
+// IMPORT-ABS: Imports module 'hu-01': [[PREFIX]]{{/|\\}}hu-01.pcm
+
+// RUN: %clang -module-file-info hu-02-rel.pcm | FileCheck %s --check-prefix=IMPORT-REL
+// IMPORT-REL: Imports module 'hu-01': hu-01.pcm
+
+// RUN: llvm-bcanalyzer --dump --disable-histogram %t/hu-02-abs.pcm \
+// RUN: | FileCheck %s --check-prefix=INPUT-ABS -DPREFIX=%t
+// INPUT-ABS: <INPUT_FILE {{.*}}/> blob data = '[[PREFIX]]{{/|\\}}hu-02.h'
+
+// RUN: llvm-bcanalyzer --dump --disable-histogram %t/hu-02-rel.pcm \
+// RUN: | FileCheck %s --check-prefix=INPUT-REL
+// INPUT-REL: <INPUT_FILE {{.*}}/> blob data = 'hu-02.h'
+
+//--- hu-01.h
+inline void f() {}
+
+//--- hu-02.h
+import "hu-01.h";
+
+inline void g() {
+ f();
+}
+
+// RUN: %clang_cc1 -std=c++20 -emit-module-interface %t/a.cppm -o %t/a-abs.pcm
+
+// RUN: %clang_cc1 -std=c++20 -emit-module-interface %t/a.cppm -o %t/a-rel.pcm \
+// RUN: -fmodule-file-home-is-cwd
+
+// RUN: llvm-bcanalyzer --dump --disable-histogram %t/a-abs.pcm \
+// RUN: | FileCheck %s --check-prefix=M-INPUT-ABS -DPREFIX=%t
+// M-INPUT-ABS: <INPUT_FILE {{.*}}/> blob data = '[[PREFIX]]{{/|\\}}a.cppm'
+
+// RUN: llvm-bcanalyzer --dump --disable-histogram %t/a-rel.pcm \
+// RUN: | FileCheck %s --check-prefix=M-INPUT-REL
+// M-INPUT-REL: <INPUT_FILE {{.*}}/> blob data = 'a.cppm'
+
+//--- a.cppm
+export module a;
diff --git a/clang/unittests/Format/FormatTest.cpp b/clang/unittests/Format/FormatTest.cpp
index bf3260c6216d..b62d49e17c83 100644
--- a/clang/unittests/Format/FormatTest.cpp
+++ b/clang/unittests/Format/FormatTest.cpp
@@ -20207,6 +20207,16 @@ TEST_F(FormatTest, AlignConsecutiveDeclarations) {
"double b();",
AlignmentLeft);
+ auto Style = AlignmentLeft;
+ Style.AlignConsecutiveDeclarations.AlignFunctionPointers = true;
+ Style.BinPackParameters = FormatStyle::BPPS_OnePerLine;
+ verifyFormat("int function_name(const wchar_t* title,\n"
+ " int x = 0,\n"
+ " long extraStyle = 0,\n"
+ " bool readOnly = false,\n"
+ " FancyClassType* module = nullptr);",
+ Style);
+
// PAS_Middle
FormatStyle AlignmentMiddle = Alignment;
AlignmentMiddle.PointerAlignment = FormatStyle::PAS_Middle;
@@ -20438,7 +20448,7 @@ TEST_F(FormatTest, AlignConsecutiveDeclarations) {
Alignment);
// See PR37175
- FormatStyle Style = getMozillaStyle();
+ Style = getMozillaStyle();
Style.AlignConsecutiveDeclarations.Enabled = true;
verifyFormat("DECOR1 /**/ int8_t /**/ DECOR2 /**/\n"
"foo(int a);",
@@ -23712,6 +23722,7 @@ TEST_F(FormatTest, FormatsLambdas) {
verifyFormat("function([]() { return b; })", MergeInline);
verifyFormat("function([]() { return b; }, a)", MergeInline);
verifyFormat("function(a, []() { return b; })", MergeInline);
+ verifyFormat("auto guard = foo{[&] { exit_status = true; }};", MergeInline);
// Check option "BraceWrapping.BeforeLambdaBody" and different state of
// AllowShortLambdasOnASingleLine
diff --git a/clang/utils/TableGen/ClangOpcodesEmitter.cpp b/clang/utils/TableGen/ClangOpcodesEmitter.cpp
index 64534a50877e..5d6d90994cf3 100644
--- a/clang/utils/TableGen/ClangOpcodesEmitter.cpp
+++ b/clang/utils/TableGen/ClangOpcodesEmitter.cpp
@@ -171,16 +171,12 @@ void ClangOpcodesEmitter::EmitDisasm(raw_ostream &OS, StringRef N,
OS << "#ifdef GET_DISASM\n";
Enumerate(R, N, [R, &OS](ArrayRef<const Record *>, const Twine &ID) {
OS << "case OP_" << ID << ":\n";
- OS << " PrintName(\"" << ID << "\");\n";
- OS << " OS << \"\\t\"";
+ OS << " Text.Op = PrintName(\"" << ID << "\");\n";
+ for (const auto *Arg : R->getValueAsListOfDefs("Args"))
+ OS << " Text.Args.push_back(printArg<" << Arg->getValueAsString("Name")
+ << ">(P, PC));\n";
- for (const auto *Arg : R->getValueAsListOfDefs("Args")) {
- OS << " << ReadArg<" << Arg->getValueAsString("Name") << ">(P, PC)";
- OS << " << \" \"";
- }
-
- OS << " << \"\\n\";\n";
- OS << " continue;\n";
+ OS << " break;\n";
});
OS << "#endif\n";
}
diff --git a/clang/utils/TableGen/RISCVVEmitter.cpp b/clang/utils/TableGen/RISCVVEmitter.cpp
index 02e5e51f6d09..8d94ec3d920d 100644
--- a/clang/utils/TableGen/RISCVVEmitter.cpp
+++ b/clang/utils/TableGen/RISCVVEmitter.cpp
@@ -45,7 +45,7 @@ struct SemaRecord {
unsigned Log2LMULMask;
// Required extensions for this intrinsic.
- uint32_t RequiredExtensions[(RVV_REQ_NUM + 31) / 32];
+ RequiredExtensionBits RequiredExtensions;
// Prototype for this intrinsic.
SmallVector<PrototypeDescriptor> Prototype;
@@ -769,7 +769,6 @@ void RVVEmitter::createRVVIntrinsics(
SR.Log2LMULMask = Log2LMULMask;
- memset(SR.RequiredExtensions, 0, sizeof(SR.RequiredExtensions));
for (auto RequiredFeature : RequiredFeatures) {
unsigned RequireExt =
StringSwitch<RVVRequire>(RequiredFeature)
@@ -793,7 +792,7 @@ void RVVEmitter::createRVVIntrinsics(
.Case("Zvfbfmin", RVV_REQ_Zvfbfmin)
.Case("Zvfh", RVV_REQ_Zvfh)
.Case("Experimental", RVV_REQ_Experimental);
- SR.RequiredExtensions[RequireExt / 32] |= 1U << (RequireExt % 32);
+ SR.RequiredExtensions.set(RequireExt);
}
SR.NF = NF;
@@ -837,8 +836,7 @@ void RVVEmitter::createRVVIntrinsicRecords(std::vector<RVVIntrinsicRecord> &Out,
R.PrototypeLength = SR.Prototype.size();
R.SuffixLength = SR.Suffix.size();
R.OverloadedSuffixSize = SR.OverloadedSuffix.size();
- memcpy(R.RequiredExtensions, SR.RequiredExtensions,
- sizeof(R.RequiredExtensions));
+ R.RequiredExtensions = SR.RequiredExtensions;
R.TypeRangeMask = SR.TypeRangeMask;
R.Log2LMULMask = SR.Log2LMULMask;
R.NF = SR.NF;
diff --git a/flang/test/Driver/flang-ld-powerpc.f90 b/flang/test/Driver/flang-ld-powerpc.f90
index 9a6ee453a22e..a58cb1629cad 100644
--- a/flang/test/Driver/flang-ld-powerpc.f90
+++ b/flang/test/Driver/flang-ld-powerpc.f90
@@ -7,35 +7,100 @@
!! LLVM_ENABLE_PER_TARGET_RUNTIME_DIR=ON, use
!! resource_dir_with_per_target_subdir as inputs.
-! Check powerpc64-ibm-aix 64-bit linking to static flang-rt
-! RUN: %flang %s -### 2>&1 \
+! Check powerpc64-ibm-aix 64-bit linking to static flang-rt by default
+! RUN: %flang -Werror %s -### 2>&1 \
! RUN: --target=powerpc64-ibm-aix \
! RUN: -resource-dir=%S/../../../clang/test/Driver/Inputs/resource_dir_with_per_target_subdir \
-! RUN: | FileCheck %s --check-prefix=AIX64-LD-PER-TARGET
-
-! AIX64-LD-PER-TARGET-NOT: warning:
-! AIX64-LD-PER-TARGET: "-fc1" "-triple" "powerpc64-ibm-aix"
-! AIX64-LD-PER-TARGET-SAME: "-resource-dir" "[[RESOURCE_DIR:[^"]+]]"
-! AIX64-LD-PER-TARGET: "{{.*}}ld{{(.exe)?}}"
-! AIX64-LD-PER-TARGET-NOT: "-bnso"
-! AIX64-LD-PER-TARGET-SAME: "-b64"
-! AIX64-LD-PER-TARGET-SAME: "-bpT:0x100000000" "-bpD:0x110000000"
-! AIX64-LD-PER-TARGET-SAME: "-lc"
-! AIX64-LD-PER-TARGET-SAME: "[[RESOURCE_DIR]]{{/|\\\\}}lib{{/|\\\\}}powerpc64-ibm-aix{{/|\\\\}}libflang_rt.runtime.a"
-! AIX64-LD-PER-TARGET-SAME: "-lm"
-! AIX64-LD-PER-TARGET-SAME: "-lpthread"
-
-! Check powerpc64le-unknown-linux-gnu 64-bit linking to static flang-rt
-! RUN: %flang %s -### 2>&1 \
+! RUN: | FileCheck %s --check-prefix=AIX64-LD-PER-TARGET-DEFAULT
+
+! AIX64-LD-PER-TARGET-DEFAULT: "-fc1" "-triple" "powerpc64-ibm-aix"
+! AIX64-LD-PER-TARGET-DEFAULT-SAME: "-resource-dir" "[[RESOURCE_DIR:[^"]+]]"
+! AIX64-LD-PER-TARGET-DEFAULT: "{{.*}}ld{{(.exe)?}}"
+! AIX64-LD-PER-TARGET-DEFAULT-NOT: "-bnso"
+! AIX64-LD-PER-TARGET-DEFAULT-SAME: "-b64"
+! AIX64-LD-PER-TARGET-DEFAULT-SAME: "-bpT:0x100000000" "-bpD:0x110000000"
+! AIX64-LD-PER-TARGET-DEFAULT-SAME: "-lc"
+! AIX64-LD-PER-TARGET-DEFAULT-SAME: "[[RESOURCE_DIR]]{{/|\\\\}}lib{{/|\\\\}}powerpc64-ibm-aix{{/|\\\\}}libflang_rt.runtime.a"
+! AIX64-LD-PER-TARGET-DEFAULT-SAME: "-lm"
+! AIX64-LD-PER-TARGET-DEFAULT-SAME: "-lpthread"
+
+
+! Check powerpc64-ibm-aix 64-bit linking to static flang-rt by option
+! RUN: %flang -static-libflangrt -Werror %s -### 2>&1 \
+! RUN: --target=powerpc64-ibm-aix \
+! RUN: -resource-dir=%S/../../../clang/test/Driver/Inputs/resource_dir_with_per_target_subdir \
+! RUN: | FileCheck %s --check-prefix=AIX64-LD-PER-TARGET-STATIC
+
+! AIX64-LD-PER-TARGET-STATIC: "-fc1" "-triple" "powerpc64-ibm-aix"
+! AIX64-LD-PER-TARGET-STATIC-SAME: "-resource-dir" "[[RESOURCE_DIR:[^"]+]]"
+! AIX64-LD-PER-TARGET-STATIC: "{{.*}}ld{{(.exe)?}}"
+! AIX64-LD-PER-TARGET-STATIC-NOT: "-bnso"
+! AIX64-LD-PER-TARGET-STATIC-SAME: "-b64"
+! AIX64-LD-PER-TARGET-STATIC-SAME: "-bpT:0x100000000" "-bpD:0x110000000"
+! AIX64-LD-PER-TARGET-STATIC-SAME: "-lc"
+! AIX64-LD-PER-TARGET-STATIC-SAME: "[[RESOURCE_DIR]]{{/|\\\\}}lib{{/|\\\\}}powerpc64-ibm-aix{{/|\\\\}}libflang_rt.runtime.a"
+! AIX64-LD-PER-TARGET-STATIC-SAME: "-lm"
+! AIX64-LD-PER-TARGET-STATIC-SAME: "-lpthread"
+
+
+! Check powerpc64-ibm-aix 64-bit linking to shared flang-rt by option
+! RUN: %flang -shared-libflangrt -Werror %s -### 2>&1 \
+! RUN: --target=powerpc64-ibm-aix \
+! RUN: -resource-dir=%S/../../../clang/test/Driver/Inputs/resource_dir_with_per_target_subdir \
+! RUN: | FileCheck %s --check-prefix=AIX64-LD-PER-TARGET-SHARED
+
+! AIX64-LD-PER-TARGET-SHARED: "-fc1" "-triple" "powerpc64-ibm-aix"
+! AIX64-LD-PER-TARGET-SHARED-SAME: "-resource-dir" "[[RESOURCE_DIR:[^"]+]]"
+! AIX64-LD-PER-TARGET-SHARED: "{{.*}}ld{{(.exe)?}}"
+! AIX64-LD-PER-TARGET-SHARED-NOT: "-bnso"
+! AIX64-LD-PER-TARGET-SHARED-SAME: "-b64"
+! AIX64-LD-PER-TARGET-SHARED-SAME: "-bpT:0x100000000" "-bpD:0x110000000"
+! AIX64-LD-PER-TARGET-SHARED-SAME: "-lc"
+! AIX64-LD-PER-TARGET-SHARED-SAME: "-lflang_rt.runtime"
+! AIX64-LD-PER-TARGET-SHARED-SAME: "-lm"
+! AIX64-LD-PER-TARGET-SHARED-SAME: "-lpthread"
+
+
+! Check powerpc64le-unknown-linux-gnu 64-bit linking to shared flang-rt by default
+! RUN: %flang -Werror %s -### 2>&1 \
+! RUN: --target=powerpc64le-unknown-linux-gnu \
+! RUN: -resource-dir=%S/../../../clang/test/Driver/Inputs/resource_dir_with_per_target_subdir \
+! RUN: | FileCheck %s --check-prefixes=LOP64-LD-PER-TARGET-DEFAULT
+
+! LOP64-LD-PER-TARGET-DEFAULT: "-fc1" "-triple" "powerpc64le-unknown-linux-gnu"
+! LOP64-LD-PER-TARGET-DEFAULT-SAME: "-resource-dir" "[[RESOURCE_DIR:[^"]+]]"
+! LOP64-LD-PER-TARGET-DEFAULT: "{{.*}}ld{{(.exe)?}}"
+! LOP64-LD-PER-TARGET-DEFAULT-NOT: "-bnso"
+! LOP64-LD-PER-TARGET-DEFAULT-SAME: "-lflang_rt.runtime"
+! LOP64-LD-PER-TARGET-DEFAULT-SAME: "-lm"
+! LOP64-LD-PER-TARGET-DEFAULT-SAME: "-lc"
+
+
+! Check powerpc64le-unknown-linux-gnu 64-bit linking to static flang-rt by option
+! RUN: %flang -static-libflangrt -Werror %s -### 2>&1 \
! RUN: --target=powerpc64le-unknown-linux-gnu \
! RUN: -resource-dir=%S/../../../clang/test/Driver/Inputs/resource_dir_with_per_target_subdir \
-! RUN: | FileCheck %s --check-prefixes=LOP64-LD-PER-TARGET
-
-! LOP64-LD-PER-TARGET-NOT: warning:
-! LOP64-LD-PER-TARGET: "-fc1" "-triple" "powerpc64le-unknown-linux-gnu"
-! LOP64-LD-PER-TARGET-SAME: "-resource-dir" "[[RESOURCE_DIR:[^"]+]]"
-! LOP64-LD-PER-TARGET: "{{.*}}ld{{(.exe)?}}"
-! LOP64-LD-PER-TARGET-NOT: "-bnso"
-! LOP64-LD-PER-TARGET-SAME: "[[RESOURCE_DIR]]{{/|\\\\}}lib{{/|\\\\}}powerpc64le-unknown-linux-gnu{{/|\\\\}}libflang_rt.runtime.a"
-! LOP64-LD-PER-TARGET-SAME: "-lm"
-! LOP64-LD-PER-TARGET-SAME: "-lc"
+! RUN: | FileCheck %s --check-prefixes=LOP64-LD-PER-TARGET-STATIC
+
+! LOP64-LD-PER-TARGET-STATIC: "-fc1" "-triple" "powerpc64le-unknown-linux-gnu"
+! LOP64-LD-PER-TARGET-STATIC-SAME: "-resource-dir" "[[RESOURCE_DIR:[^"]+]]"
+! LOP64-LD-PER-TARGET-STATIC: "{{.*}}ld{{(.exe)?}}"
+! LOP64-LD-PER-TARGET-STATIC-NOT: "-bnso"
+! LOP64-LD-PER-TARGET-STATIC-SAME: "[[RESOURCE_DIR]]{{/|\\\\}}lib{{/|\\\\}}powerpc64le-unknown-linux-gnu{{/|\\\\}}libflang_rt.runtime.a"
+! LOP64-LD-PER-TARGET-STATIC-SAME: "-lm"
+! LOP64-LD-PER-TARGET-STATIC-SAME: "-lc"
+
+
+! Check powerpc64le-unknown-linux-gnu 64-bit linking to shared flang-rt by option
+! RUN: %flang -shared-libflangrt -Werror %s -### 2>&1 \
+! RUN: --target=powerpc64le-unknown-linux-gnu \
+! RUN: -resource-dir=%S/../../../clang/test/Driver/Inputs/resource_dir_with_per_target_subdir \
+! RUN: | FileCheck %s --check-prefixes=LOP64-LD-PER-TARGET-SHARED
+
+! LOP64-LD-PER-TARGET-SHARED: "-fc1" "-triple" "powerpc64le-unknown-linux-gnu"
+! LOP64-LD-PER-TARGET-SHARED-SAME: "-resource-dir" "[[RESOURCE_DIR:[^"]+]]"
+! LOP64-LD-PER-TARGET-SHARED: "{{.*}}ld{{(.exe)?}}"
+! LOP64-LD-PER-TARGET-SHARED-NOT: "-bnso"
+! LOP64-LD-PER-TARGET-SHARED-SAME: "-lflang_rt.runtime"
+! LOP64-LD-PER-TARGET-SHARED-SAME: "-lm"
+! LOP64-LD-PER-TARGET-SHARED-SAME: "-lc"
diff --git a/flang/test/Driver/linker-flags.f90 b/flang/test/Driver/linker-flags.f90
index 20104276d2e4..ad48ea1b9e9b 100644
--- a/flang/test/Driver/linker-flags.f90
+++ b/flang/test/Driver/linker-flags.f90
@@ -2,15 +2,16 @@
! invocation. These libraries are added on top of other standard runtime
! libraries that the Clang driver will include.
-! RUN: %flang -### --target=ppc64le-linux-gnu %S/Inputs/hello.f90 2>&1 | FileCheck %s --check-prefixes=CHECK,UNIX,UNIX-F128NONE
-! RUN: %flang -### --target=aarch64-apple-darwin %S/Inputs/hello.f90 2>&1 | FileCheck %s --check-prefixes=CHECK,DARWIN,DARWIN-F128%f128-lib
+! RUN: %flang -### --target=ppc64le-linux-gnu %S/Inputs/hello.f90 2>&1 | FileCheck %s --check-prefixes=CHECK,UNIX,UNIX-F128%f128-lib
! RUN: %flang -### --target=sparc-sun-solaris2.11 %S/Inputs/hello.f90 2>&1 | FileCheck %s --check-prefixes=CHECK,UNIX,SOLARIS-F128%f128-lib
! RUN: %flang -### --target=x86_64-unknown-freebsd %S/Inputs/hello.f90 2>&1 | FileCheck %s --check-prefixes=CHECK,BSD,BSD-F128%f128-lib
! RUN: %flang -### --target=x86_64-unknown-netbsd %S/Inputs/hello.f90 2>&1 | FileCheck %s --check-prefixes=CHECK,BSD,BSD-F128%f128-lib
! RUN: %flang -### --target=x86_64-unknown-openbsd %S/Inputs/hello.f90 2>&1 | FileCheck %s --check-prefixes=CHECK,BSD,BSD-F128%f128-lib
! RUN: %flang -### --target=x86_64-unknown-dragonfly %S/Inputs/hello.f90 2>&1 | FileCheck %s --check-prefixes=CHECK,BSD,BSD-F128%f128-lib
+! RUN: %flang -### --target=aarch64-apple-darwin %S/Inputs/hello.f90 2>&1 | FileCheck %s --check-prefixes=CHECK,DARWIN,DARWIN-F128%f128-lib
! RUN: %flang -### --target=x86_64-unknown-haiku %S/Inputs/hello.f90 2>&1 | FileCheck %s --check-prefixes=CHECK,HAIKU,HAIKU-F128%f128-lib
! RUN: %flang -### --target=x86_64-windows-gnu %S/Inputs/hello.f90 2>&1 | FileCheck %s --check-prefixes=CHECK,MINGW,MINGW-F128%f128-lib
+
! RUN: %flang -### -rtlib=compiler-rt --target=aarch64-linux-gnu %S/Inputs/hello.f90 2>&1 | FileCheck %s --check-prefixes=CHECK,UNIX,COMPILER-RT
! NOTE: Clang's driver library, clangDriver, usually adds 'oldnames' on Windows,
@@ -18,6 +19,16 @@
! additional dependencies. Make sure its not added.
! RUN: %flang -### --target=aarch64-windows-msvc -fuse-ld= %S/Inputs/hello.f90 2>&1 | FileCheck %s --check-prefixes=CHECK,MSVC --implicit-check-not oldnames
+! RUN: %flang -### --target=ppc64le-linux-gnu -static-libflangrt %S/Inputs/hello.f90 2>&1 | FileCheck %s --check-prefixes=UNIX-STATIC-FLANGRT
+! RUN: %flang -### --target=sparc-sun-solaris2.11 -static-libflangrt %S/Inputs/hello.f90 2>&1 | FileCheck %s --check-prefixes=UNIX-STATIC-FLANGRT
+! RUN: %flang -### --target=x86_64-unknown-freebsd -static-libflangrt %S/Inputs/hello.f90 2>&1 | FileCheck %s --check-prefixes=BSD-STATIC-FLANGRT
+! RUN: %flang -### --target=x86_64-unknown-netbsd -static-libflangrt %S/Inputs/hello.f90 2>&1 | FileCheck %s --check-prefixes=BSD-STATIC-FLANGRT
+! RUN: %flang -### --target=x86_64-unknown-openbsd -static-libflangrt %S/Inputs/hello.f90 2>&1 | FileCheck %s --check-prefixes=BSD-STATIC-FLANGRT
+! RUN: %flang -### --target=x86_64-unknown-dragonfly -static-libflangrt %S/Inputs/hello.f90 2>&1 | FileCheck %s --check-prefixes=CHECK,BSD-STATIC-FLANGRT
+! RUN: %flang -### --target=aarch64-apple-darwin -static-libflangrt %S/Inputs/hello.f90 2>&1 | FileCheck %s --check-prefixes=DARWIN-STATIC-FLANGRT
+! RUN: %flang -### --target=x86_64-unknown-haiku -static-libflangrt %S/Inputs/hello.f90 2>&1 | FileCheck %s --check-prefixes=HAIKU-STATIC-FLANGRT
+! RUN: %flang -### --target=x86_64-windows-gnu -static-libflangrt %S/Inputs/hello.f90 2>&1 | FileCheck %s --check-prefixes=MINGW-STATIC-FLANGRT
+
! Compiler invocation to generate the object file
! CHECK-LABEL: {{.*}} "-emit-obj"
! CHECK-SAME: "-o" "[[object_file:.*\.o]]" {{.*}}Inputs/hello.f90
@@ -35,6 +46,7 @@
! SOLARIS-F128LIBQUADMATH-SAME: "-lflang_rt.quadmath" "-z" "ignore" "-lquadmath" "-z" "record"
! UNIX-SAME: "-lflang_rt.runtime" "-lm"
! COMPILER-RT: "{{.*}}{{\\|/}}libclang_rt.builtins.a"
+! UNIX-STATIC-FLANGRT: "{{.*}}{{\\|/}}libflang_rt.runtime.a"
! BSD-LABEL: "{{.*}}ld{{(\.exe)?}}"
! BSD-SAME: "[[object_file]]"
@@ -42,24 +54,28 @@
! BSD-F128LIBQUADMATH-SAME: "-lflang_rt.quadmath" "--as-needed" "-lquadmath" "--no-as-needed"
! BSD-SAME: -lflang_rt.runtime
! BSD-SAME: -lexecinfo
+! BSD-STATIC-FLANGRT: "{{.*}}{{\\|/}}libflang_rt.runtime.a"
! DARWIN-LABEL: "{{.*}}ld{{(\.exe)?}}"
! DARWIN-SAME: "[[object_file]]"
! DARWIN-F128NONE-NOT: libflang_rt.quadmath
! DARWIN-F128LIBQUADMATH-SAME: "-lflang_rt.quadmath" "--as-needed" "-lquadmath" "--no-as-needed"
! DARWIN-SAME: -lflang_rt.runtime
+! DARWIN-STATIC-FLANGRT: "{{.*}}{{\\|/}}libclang_rt.runtime_osx.a"
! HAIKU-LABEL: "{{.*}}ld{{(\.exe)?}}"
! HAIKU-SAME: "[[object_file]]"
! HAIKU-F128NONE-NOT: libflang_rt.quadmath
! HAIKU-F128LIBQUADMATH-SAME: "-lflang_rt.quadmath" "--as-needed" "-lquadmath" "--no-as-needed"
! HAIKU-SAME: "-lflang_rt.runtime"
+! HAIKU-STATIC-FLANGRT: "{{.*}}{{\\|/}}libflang_rt.runtime.a"
! MINGW-LABEL: "{{.*}}ld{{(\.exe)?}}"
! MINGW-SAME: "[[object_file]]"
! MINGW-F128NONE-NOT: libflang_rt.quadmath
! MINGW-F128LIBQUADMATH-SAME: "-lflang_rt.quadmath" "--as-needed" "-lquadmath" "--no-as-needed"
! MINGW-SAME: -lflang_rt.runtime
+! MINGW-STATIC-FLANGRT: "{{.*}}{{\\|/}}libflang_rt.runtime.a"
! NOTE: This also matches lld-link (when CLANG_DEFAULT_LINKER=lld) and
! any .exe suffix that is added when resolving to the full path of
diff --git a/flang/test/Driver/omp-driver-offload.f90 b/flang/test/Driver/omp-driver-offload.f90
index 13d605484506..335bfad4b188 100644
--- a/flang/test/Driver/omp-driver-offload.f90
+++ b/flang/test/Driver/omp-driver-offload.f90
@@ -184,7 +184,7 @@
! RUN: %flang -### -target x86_64-pc-linux-gnu -fopenmp --offload-arch=gfx900 \
! RUN: --rocm-device-lib-path=%S/Inputs/rocm/amdgcn/bitcode %s 2>&1 | \
! RUN: FileCheck %s --check-prefix=ROCM-DEVICE-LIB
-! ROCM-DEVICE-LIB: "-fc1" {{.*}}ocml.bc"{{.*}}oclc_unsafe_math_off.bc"{{.*}}oclc_finite_only_off.bc"{{.*}}oclc_correctly_rounded_sqrt_on.bc"{{.*}}oclc_wavefrontsize64_on.bc"{{.*}}oclc_isa_version_900.bc"
+! ROCM-DEVICE-LIB: "-fc1" {{.*}}ocml.bc"{{.*}}oclc_daz_opt_off.bc"{{.*}}oclc_unsafe_math_off.bc"{{.*}}oclc_finite_only_off.bc"{{.*}}oclc_correctly_rounded_sqrt_on.bc"{{.*}}oclc_wavefrontsize64_on.bc"{{.*}}oclc_isa_version_900.bc"
! Test -fopenmp-force-usm option without offload
! RUN: %flang -S -### %s -o %t 2>&1 \
diff --git a/libcxx/include/__bit/popcount.h b/libcxx/include/__bit/popcount.h
index b1d93caab008..622c8efd7938 100644
--- a/libcxx/include/__bit/popcount.h
+++ b/libcxx/include/__bit/popcount.h
@@ -6,9 +6,6 @@
//
//===----------------------------------------------------------------------===//
-// TODO: __builtin_popcountg is available since Clang 19 and GCC 14. When support for older versions is dropped, we can
-// refactor this code to exclusively use __builtin_popcountg.
-
#ifndef _LIBCPP___BIT_POPCOUNT_H
#define _LIBCPP___BIT_POPCOUNT_H
@@ -27,50 +24,10 @@ _LIBCPP_PUSH_MACROS
_LIBCPP_BEGIN_NAMESPACE_STD
-inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR int __libcpp_popcount(unsigned __x) _NOEXCEPT {
- return __builtin_popcount(__x);
-}
-
-inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR int __libcpp_popcount(unsigned long __x) _NOEXCEPT {
- return __builtin_popcountl(__x);
-}
-
-inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR int __libcpp_popcount(unsigned long long __x) _NOEXCEPT {
- return __builtin_popcountll(__x);
-}
-
-template <class _Tp>
-[[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR int __popcount_impl(_Tp __t) _NOEXCEPT {
- if _LIBCPP_CONSTEXPR (sizeof(_Tp) <= sizeof(unsigned int)) {
- return std::__libcpp_popcount(static_cast<unsigned int>(__t));
- } else if _LIBCPP_CONSTEXPR (sizeof(_Tp) <= sizeof(unsigned long)) {
- return std::__libcpp_popcount(static_cast<unsigned long>(__t));
- } else if _LIBCPP_CONSTEXPR (sizeof(_Tp) <= sizeof(unsigned long long)) {
- return std::__libcpp_popcount(static_cast<unsigned long long>(__t));
- } else {
-#if _LIBCPP_STD_VER == 11
- return __t != 0 ? std::__libcpp_popcount(static_cast<unsigned long long>(__t)) +
- std::__popcount_impl<_Tp>(__t >> numeric_limits<unsigned long long>::digits)
- : 0;
-#else
- int __ret = 0;
- while (__t != 0) {
- __ret += std::__libcpp_popcount(static_cast<unsigned long long>(__t));
- __t >>= std::numeric_limits<unsigned long long>::digits;
- }
- return __ret;
-#endif
- }
-}
-
template <class _Tp>
[[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR int __popcount(_Tp __t) _NOEXCEPT {
static_assert(is_unsigned<_Tp>::value, "__popcount only works with unsigned types");
-#if __has_builtin(__builtin_popcountg) // TODO (LLVM 21): This can be dropped once we only support Clang >= 19.
return __builtin_popcountg(__t);
-#else
- return std::__popcount_impl(__t);
-#endif
}
#if _LIBCPP_STD_VER >= 20
diff --git a/libcxx/include/__stop_token/atomic_unique_lock.h b/libcxx/include/__stop_token/atomic_unique_lock.h
index a698260ac7bb..05e8f223167f 100644
--- a/libcxx/include/__stop_token/atomic_unique_lock.h
+++ b/libcxx/include/__stop_token/atomic_unique_lock.h
@@ -28,7 +28,7 @@ _LIBCPP_BEGIN_NAMESPACE_STD
// and LockedBit is the value of State when the lock bit is set, e.g 1 << 2
template <class _State, _State _LockedBit>
class _LIBCPP_AVAILABILITY_SYNC __atomic_unique_lock {
- static_assert(std::__libcpp_popcount(static_cast<unsigned long long>(_LockedBit)) == 1,
+ static_assert(std::__popcount(static_cast<unsigned long long>(_LockedBit)) == 1,
"LockedBit must be an integer where only one bit is set");
std::atomic<_State>& __state_;
diff --git a/libunwind/docs/BuildingLibunwind.rst b/libunwind/docs/BuildingLibunwind.rst
index 8b4f1207d4ba..c231587fd502 100644
--- a/libunwind/docs/BuildingLibunwind.rst
+++ b/libunwind/docs/BuildingLibunwind.rst
@@ -91,7 +91,7 @@ libunwind specific options
.. option:: LIBUNWIND_ENABLE_WERROR:BOOL
- **Default**: ``ON``
+ **Default**: ``OFF``
Compile with -Werror
diff --git a/lldb/include/lldb/Symbol/SymbolContext.h b/lldb/include/lldb/Symbol/SymbolContext.h
index 8b6317c6f33c..4f8405f1f0db 100644
--- a/lldb/include/lldb/Symbol/SymbolContext.h
+++ b/lldb/include/lldb/Symbol/SymbolContext.h
@@ -307,6 +307,13 @@ public:
SymbolContext &next_frame_sc,
Address &inlined_frame_addr) const;
+ /// If available, will return the function name according to the specified
+ /// mangling preference. If this object represents an inlined function,
+ /// returns the name of the inlined function. Returns nullptr if no function
+ /// name could be determined.
+ const char *GetPossiblyInlinedFunctionName(
+ Mangled::NamePreference mangling_preference) const;
+
// Member variables
lldb::TargetSP target_sp; ///< The Target for a given query
lldb::ModuleSP module_sp; ///< The Module for a given query
diff --git a/lldb/include/lldb/Target/Language.h b/lldb/include/lldb/Target/Language.h
index b699a90aff8e..da2c2cc451da 100644
--- a/lldb/include/lldb/Target/Language.h
+++ b/lldb/include/lldb/Target/Language.h
@@ -268,7 +268,7 @@ public:
// the reference has never been assigned
virtual bool IsUninitializedReference(ValueObject &valobj);
- virtual bool GetFunctionDisplayName(const SymbolContext *sc,
+ virtual bool GetFunctionDisplayName(const SymbolContext &sc,
const ExecutionContext *exe_ctx,
FunctionNameRepresentation representation,
Stream &s);
diff --git a/lldb/source/Core/FormatEntity.cpp b/lldb/source/Core/FormatEntity.cpp
index a9370595c11e..23e5999bd80c 100644
--- a/lldb/source/Core/FormatEntity.cpp
+++ b/lldb/source/Core/FormatEntity.cpp
@@ -1147,19 +1147,6 @@ static void PrettyPrintFunctionNameWithArgs(Stream &out_stream,
out_stream.PutChar(')');
}
-static void FormatInlinedBlock(Stream &out_stream, Block *block) {
- if (!block)
- return;
- Block *inline_block = block->GetContainingInlinedBlock();
- if (inline_block) {
- if (const InlineFunctionInfo *inline_info =
- inline_block->GetInlinedFunctionInfo()) {
- out_stream.PutCString(" [inlined] ");
- inline_info->GetName().Dump(&out_stream);
- }
- }
-}
-
static VariableListSP GetFunctionVariableList(const SymbolContext &sc) {
assert(sc.function);
@@ -1170,22 +1157,6 @@ static VariableListSP GetFunctionVariableList(const SymbolContext &sc) {
return sc.function->GetBlock(true).GetBlockVariableList(true);
}
-static char const *GetInlinedFunctionName(const SymbolContext &sc) {
- if (!sc.block)
- return nullptr;
-
- const Block *inline_block = sc.block->GetContainingInlinedBlock();
- if (!inline_block)
- return nullptr;
-
- const InlineFunctionInfo *inline_info =
- inline_block->GetInlinedFunctionInfo();
- if (!inline_info)
- return nullptr;
-
- return inline_info->GetName().AsCString(nullptr);
-}
-
static bool PrintFunctionNameWithArgs(Stream &s,
const ExecutionContext *exe_ctx,
const SymbolContext &sc) {
@@ -1194,16 +1165,11 @@ static bool PrintFunctionNameWithArgs(Stream &s,
ExecutionContextScope *exe_scope =
exe_ctx ? exe_ctx->GetBestExecutionContextScope() : nullptr;
- const char *cstr = sc.function->GetName().AsCString(nullptr);
+ const char *cstr =
+ sc.GetPossiblyInlinedFunctionName(Mangled::ePreferDemangled);
if (!cstr)
return false;
- if (const char *inlined_name = GetInlinedFunctionName(sc)) {
- s.PutCString(cstr);
- s.PutCString(" [inlined] ");
- cstr = inlined_name;
- }
-
VariableList args;
if (auto variable_list_sp = GetFunctionVariableList(sc))
variable_list_sp->AppendVariablesWithScope(eValueTypeVariableArgument,
@@ -1218,6 +1184,40 @@ static bool PrintFunctionNameWithArgs(Stream &s,
return true;
}
+static bool HandleFunctionNameWithArgs(Stream &s,const ExecutionContext *exe_ctx,
+ const SymbolContext &sc) {
+ Language *language_plugin = nullptr;
+ bool language_plugin_handled = false;
+ StreamString ss;
+ if (sc.function)
+ language_plugin = Language::FindPlugin(sc.function->GetLanguage());
+ else if (sc.symbol)
+ language_plugin = Language::FindPlugin(sc.symbol->GetLanguage());
+
+ if (language_plugin)
+ language_plugin_handled = language_plugin->GetFunctionDisplayName(
+ sc, exe_ctx, Language::FunctionNameRepresentation::eNameWithArgs, ss);
+
+ if (language_plugin_handled) {
+ s << ss.GetString();
+ return true;
+ }
+
+ if (sc.function)
+ return PrintFunctionNameWithArgs(s, exe_ctx, sc);
+
+ if (!sc.symbol)
+ return false;
+
+ const char *cstr = sc.symbol->GetName().AsCString(nullptr);
+ if (!cstr)
+ return false;
+
+ s.PutCString(cstr);
+
+ return true;
+}
+
bool FormatEntity::FormatStringRef(const llvm::StringRef &format_str, Stream &s,
const SymbolContext *sc,
const ExecutionContext *exe_ctx,
@@ -1719,63 +1719,24 @@ bool FormatEntity::Format(const Entry &entry, Stream &s,
if (language_plugin)
language_plugin_handled = language_plugin->GetFunctionDisplayName(
- sc, exe_ctx, Language::FunctionNameRepresentation::eName, ss);
+ *sc, exe_ctx, Language::FunctionNameRepresentation::eName, ss);
if (language_plugin_handled) {
s << ss.GetString();
return true;
- } else {
- const char *name = nullptr;
- if (sc->function)
- name = sc->function->GetName().AsCString(nullptr);
- else if (sc->symbol)
- name = sc->symbol->GetName().AsCString(nullptr);
-
- if (name) {
- s.PutCString(name);
- FormatInlinedBlock(s, sc->block);
- return true;
- }
}
- }
- return false;
- case Entry::Type::FunctionNameNoArgs: {
- if (!sc)
+ const char *name = sc->GetPossiblyInlinedFunctionName(
+ Mangled::NamePreference::ePreferDemangled);
+ if (!name)
return false;
- Language *language_plugin = nullptr;
- bool language_plugin_handled = false;
- StreamString ss;
- if (sc->function)
- language_plugin = Language::FindPlugin(sc->function->GetLanguage());
- else if (sc->symbol)
- language_plugin = Language::FindPlugin(sc->symbol->GetLanguage());
-
- if (language_plugin)
- language_plugin_handled = language_plugin->GetFunctionDisplayName(
- sc, exe_ctx, Language::FunctionNameRepresentation::eNameWithNoArgs,
- ss);
+ s.PutCString(name);
- if (language_plugin_handled) {
- s << ss.GetString();
- return true;
- } else {
- ConstString name;
- if (sc->function)
- name = sc->function->GetNameNoArguments();
- else if (sc->symbol)
- name = sc->symbol->GetNameNoArguments();
- if (name) {
- s.PutCString(name.GetCString());
- FormatInlinedBlock(s, sc->block);
- return true;
- }
- }
+ return true;
}
- return false;
- case Entry::Type::FunctionNameWithArgs: {
+ case Entry::Type::FunctionNameNoArgs: {
if (!sc)
return false;
@@ -1789,44 +1750,42 @@ bool FormatEntity::Format(const Entry &entry, Stream &s,
if (language_plugin)
language_plugin_handled = language_plugin->GetFunctionDisplayName(
- sc, exe_ctx, Language::FunctionNameRepresentation::eNameWithArgs, ss);
+ *sc, exe_ctx, Language::FunctionNameRepresentation::eNameWithNoArgs,
+ ss);
if (language_plugin_handled) {
s << ss.GetString();
return true;
}
- if (sc->function)
- return PrintFunctionNameWithArgs(s, exe_ctx, *sc);
-
- if (!sc->symbol)
+ const char *name = sc->GetPossiblyInlinedFunctionName(
+ Mangled::NamePreference::ePreferDemangledWithoutArguments);
+ if (!name)
return false;
- const char *cstr = sc->symbol->GetName().AsCString(nullptr);
- if (!cstr)
- return false;
+ s.PutCString(name);
- s.PutCString(cstr);
return true;
}
- case Entry::Type::FunctionMangledName: {
+ case Entry::Type::FunctionNameWithArgs: {
if (!sc)
return false;
- const char *name = nullptr;
- if (sc->symbol)
- name =
- sc->symbol->GetMangled().GetName(Mangled::ePreferMangled).AsCString();
- else if (sc->function)
- name = sc->function->GetMangled()
- .GetName(Mangled::ePreferMangled)
- .AsCString();
+ return HandleFunctionNameWithArgs(s, exe_ctx, *sc);
+ }
+ case Entry::Type::FunctionMangledName: {
+ if (!sc)
+ return false;
+
+ const char *name = sc->GetPossiblyInlinedFunctionName(
+ Mangled::NamePreference::ePreferMangled);
if (!name)
return false;
+
s.PutCString(name);
- FormatInlinedBlock(s, sc->block);
+
return true;
}
case Entry::Type::FunctionAddrOffset:
diff --git a/lldb/source/Plugins/Language/CPlusPlus/CPlusPlusLanguage.cpp b/lldb/source/Plugins/Language/CPlusPlus/CPlusPlusLanguage.cpp
index a6fdf66f13e4..9bd48ec55022 100644
--- a/lldb/source/Plugins/Language/CPlusPlus/CPlusPlusLanguage.cpp
+++ b/lldb/source/Plugins/Language/CPlusPlus/CPlusPlusLanguage.cpp
@@ -1707,22 +1707,6 @@ static VariableListSP GetFunctionVariableList(const SymbolContext &sc) {
return sc.function->GetBlock(true).GetBlockVariableList(true);
}
-static char const *GetInlinedFunctionName(const SymbolContext &sc) {
- if (!sc.block)
- return nullptr;
-
- const Block *inline_block = sc.block->GetContainingInlinedBlock();
- if (!inline_block)
- return nullptr;
-
- const InlineFunctionInfo *inline_info =
- inline_block->GetInlinedFunctionInfo();
- if (!inline_info)
- return nullptr;
-
- return inline_info->GetName().AsCString(nullptr);
-}
-
static bool PrintFunctionNameWithArgs(Stream &s,
const ExecutionContext *exe_ctx,
const SymbolContext &sc) {
@@ -1731,16 +1715,11 @@ static bool PrintFunctionNameWithArgs(Stream &s,
ExecutionContextScope *exe_scope =
exe_ctx ? exe_ctx->GetBestExecutionContextScope() : nullptr;
- const char *cstr = sc.function->GetName().AsCString(nullptr);
+ const char *cstr = sc.GetPossiblyInlinedFunctionName(
+ Mangled::NamePreference::ePreferDemangled);
if (!cstr)
return false;
- if (const char *inlined_name = GetInlinedFunctionName(sc)) {
- s.PutCString(cstr);
- s.PutCString(" [inlined] ");
- cstr = inlined_name;
- }
-
VariableList args;
if (auto variable_list_sp = GetFunctionVariableList(sc))
variable_list_sp->AppendVariablesWithScope(eValueTypeVariableArgument,
@@ -1757,20 +1736,18 @@ static bool PrintFunctionNameWithArgs(Stream &s,
}
bool CPlusPlusLanguage::GetFunctionDisplayName(
- const SymbolContext *sc, const ExecutionContext *exe_ctx,
+ const SymbolContext &sc, const ExecutionContext *exe_ctx,
FunctionNameRepresentation representation, Stream &s) {
switch (representation) {
case FunctionNameRepresentation::eNameWithArgs: {
- assert(sc);
-
// Print the function name with arguments in it
- if (sc->function)
- return PrintFunctionNameWithArgs(s, exe_ctx, *sc);
+ if (sc.function)
+ return PrintFunctionNameWithArgs(s, exe_ctx, sc);
- if (!sc->symbol)
+ if (!sc.symbol)
return false;
- const char *cstr = sc->symbol->GetName().AsCString(nullptr);
+ const char *cstr = sc.symbol->GetName().AsCString(nullptr);
if (!cstr)
return false;
diff --git a/lldb/source/Plugins/Language/CPlusPlus/CPlusPlusLanguage.h b/lldb/source/Plugins/Language/CPlusPlus/CPlusPlusLanguage.h
index 623d481bf117..54f5a94388b9 100644
--- a/lldb/source/Plugins/Language/CPlusPlus/CPlusPlusLanguage.h
+++ b/lldb/source/Plugins/Language/CPlusPlus/CPlusPlusLanguage.h
@@ -138,7 +138,7 @@ public:
ConstString
GetDemangledFunctionNameWithoutArguments(Mangled mangled) const override;
- bool GetFunctionDisplayName(const SymbolContext *sc,
+ bool GetFunctionDisplayName(const SymbolContext &sc,
const ExecutionContext *exe_ctx,
FunctionNameRepresentation representation,
Stream &s) override;
diff --git a/lldb/source/Symbol/SymbolContext.cpp b/lldb/source/Symbol/SymbolContext.cpp
index 183947a69436..a9626bbc3777 100644
--- a/lldb/source/Symbol/SymbolContext.cpp
+++ b/lldb/source/Symbol/SymbolContext.cpp
@@ -872,6 +872,36 @@ const Symbol *SymbolContext::FindBestGlobalDataSymbol(ConstString name,
return nullptr; // no error; we just didn't find anything
}
+char const *SymbolContext::GetPossiblyInlinedFunctionName(
+ Mangled::NamePreference mangling_preference) const {
+ const char *name = nullptr;
+ if (function)
+ name = function->GetMangled().GetName(mangling_preference).AsCString();
+ else if (symbol)
+ name = symbol->GetMangled().GetName(mangling_preference).AsCString();
+
+ if (!block)
+ return name;
+
+ const Block *inline_block = block->GetContainingInlinedBlock();
+ if (!inline_block)
+ return name;
+
+ const InlineFunctionInfo *inline_info =
+ inline_block->GetInlinedFunctionInfo();
+ if (!inline_info)
+ return name;
+
+ // If we do have an inlined frame name, return that.
+ if (char const *inline_name =
+ inline_info->GetMangled().GetName(mangling_preference).AsCString())
+ return inline_name;
+
+ // Sometimes an inline frame may not have mangling information,
+ // but does have a valid name.
+ return inline_info->GetName().AsCString();
+}
+
//
// SymbolContextSpecifier
//
diff --git a/lldb/source/Target/Language.cpp b/lldb/source/Target/Language.cpp
index a75894ffa4b3..86754c251cd9 100644
--- a/lldb/source/Target/Language.cpp
+++ b/lldb/source/Target/Language.cpp
@@ -510,7 +510,7 @@ bool Language::IsNilReference(ValueObject &valobj) { return false; }
bool Language::IsUninitializedReference(ValueObject &valobj) { return false; }
-bool Language::GetFunctionDisplayName(const SymbolContext *sc,
+bool Language::GetFunctionDisplayName(const SymbolContext &sc,
const ExecutionContext *exe_ctx,
FunctionNameRepresentation representation,
Stream &s) {
diff --git a/lldb/test/API/functionalities/param_entry_vals/basic_entry_values/main.cpp b/lldb/test/API/functionalities/param_entry_vals/basic_entry_values/main.cpp
index 7ad72b4880d7..64e2a5b47967 100644
--- a/lldb/test/API/functionalities/param_entry_vals/basic_entry_values/main.cpp
+++ b/lldb/test/API/functionalities/param_entry_vals/basic_entry_values/main.cpp
@@ -70,8 +70,8 @@ __attribute__((noinline)) void func6(int &sink, int x) {
__attribute__((noinline)) void func7(int &sink, int x) {
//% self.filecheck("bt", "main.cpp", "-check-prefix=FUNC7-BT")
// FUNC7-BT: func7
- // FUNC7-BT-NEXT: [inlined] func8_inlined
- // FUNC7-BT-NEXT: [inlined] func9_inlined
+ // FUNC7-BT-NEXT: func8_inlined
+ // FUNC7-BT-NEXT: func9_inlined
// FUNC7-BT-NEXT: func10
use<int &, int>(sink, x);
use<int &, int>(dummy, 0);
diff --git a/lldb/test/API/functionalities/tail_call_frames/inlining_and_tail_calls/main.cpp b/lldb/test/API/functionalities/tail_call_frames/inlining_and_tail_calls/main.cpp
index 9829e0246fc2..0a7d365d776c 100644
--- a/lldb/test/API/functionalities/tail_call_frames/inlining_and_tail_calls/main.cpp
+++ b/lldb/test/API/functionalities/tail_call_frames/inlining_and_tail_calls/main.cpp
@@ -1,13 +1,13 @@
volatile int x;
+// clang-format off
void __attribute__((noinline)) tail_call_sink() {
x++; //% self.filecheck("bt", "main.cpp", "-check-prefix=TAIL-CALL-SINK")
// TAIL-CALL-SINK: frame #0: 0x{{[0-9a-f]+}} a.out`tail_call_sink() at main.cpp:[[@LINE-1]]:4
- // TAIL-CALL-SINK-NEXT: func3{{.*}} [artificial]
+ // TAIL-CALL-SINK-NEXT: inlinable_function_which_tail_calls() at main.cpp{{.*}} [artificial]
// TAIL-CALL-SINK-NEXT: main{{.*}}
-
- // TODO: The backtrace should include inlinable_function_which_tail_calls.
}
+// clang-format on
void __attribute__((always_inline)) inlinable_function_which_tail_calls() {
tail_call_sink();
@@ -17,13 +17,15 @@ void __attribute__((noinline)) func3() {
inlinable_function_which_tail_calls();
}
+// clang-format off
void __attribute__((always_inline)) inline_sink() {
x++; //% self.filecheck("bt", "main.cpp", "-check-prefix=INLINE-SINK")
- // INLINE-SINK: frame #0: 0x{{[0-9a-f]+}} a.out`func2() [inlined] inline_sink() at main.cpp:[[@LINE-1]]:4
+ // INLINE-SINK: frame #0: 0x{{[0-9a-f]+}} a.out`inline_sink() at main.cpp:[[@LINE-1]]:4
// INLINE-SINK-NEXT: func2{{.*}}
// INLINE-SINK-NEXT: func1{{.*}} [artificial]
// INLINE-SINK-NEXT: main{{.*}}
}
+// clang-format on
void __attribute__((noinline)) func2() { inline_sink(); /* inlined */ }
diff --git a/lldb/test/API/python_api/target/read-instructions-flavor/TestTargetReadInstructionsFlavor.py b/lldb/test/API/python_api/target/read-instructions-flavor/TestTargetReadInstructionsFlavor.py
index 12805985798d..f488d4f421c9 100644
--- a/lldb/test/API/python_api/target/read-instructions-flavor/TestTargetReadInstructionsFlavor.py
+++ b/lldb/test/API/python_api/target/read-instructions-flavor/TestTargetReadInstructionsFlavor.py
@@ -7,6 +7,7 @@ from lldbsuite.test.lldbtest import *
class TargetReadInstructionsFlavor(TestBase):
+ @skipIfDarwin
@skipIfWindows
@skipIf(archs=no_match(["x86_64", "x86", "i386"]))
def test_read_instructions_with_flavor(self):
diff --git a/lldb/test/Shell/Recognizer/verbose_trap-in-stl-max-depth.test b/lldb/test/Shell/Recognizer/verbose_trap-in-stl-max-depth.test
index 0c3275c571b3..2ea6594643c9 100644
--- a/lldb/test/Shell/Recognizer/verbose_trap-in-stl-max-depth.test
+++ b/lldb/test/Shell/Recognizer/verbose_trap-in-stl-max-depth.test
@@ -12,5 +12,5 @@ run
frame recognizer info 0
# CHECK: frame 0 is recognized by Verbose Trap StackFrame Recognizer
frame info
-# CHECK: frame #0: {{.*}}`std::recursively_aborts(int) {{.*}} at verbose_trap-in-stl-max-depth.cpp
+# CHECK: frame #0: {{.*}}`__clang_trap_msg$Error$max depth at verbose_trap-in-stl-max-depth.cpp
q
diff --git a/lldb/test/Shell/Settings/TestFrameFormatName.test b/lldb/test/Shell/Settings/TestFrameFormatName.test
index caa3242527c6..110daceb47b4 100644
--- a/lldb/test/Shell/Settings/TestFrameFormatName.test
+++ b/lldb/test/Shell/Settings/TestFrameFormatName.test
@@ -30,7 +30,7 @@ c
c
# NAME_WITH_ARGS: frame Foo::returns_func_ptr<int>(this={{.*}}, (null)={{.*}})
c
-# NAME_WITH_ARGS: frame main [inlined] inlined_foo(str="bar")
+# NAME_WITH_ARGS: frame inlined_foo(str="bar")
q
#--- name.input
@@ -38,18 +38,18 @@ q
settings set -f frame-format "frame ${function.name}\n"
break set -n inlined_foo
run
-# NAME: frame main [inlined] inlined_foo(char const*)
+# NAME: frame inlined_foo(char const*)
#--- name_without_args.input
# RUN: %lldb -b -s %t/name_without_args.input %t.out | FileCheck %s --check-prefix=NAME_WITHOUT_ARGS
settings set -f frame-format "frame ${function.name-without-args}\n"
break set -n inlined_foo
run
-# NAME_WITHOUT_ARGS: frame main [inlined] inlined_foo(char const*)
+# NAME_WITHOUT_ARGS: frame inlined_foo
#--- mangled_name.input
# RUN: %lldb -b -s %t/mangled_name.input %t.out | FileCheck %s --check-prefix=MANGLED_NAME
settings set -f frame-format "frame ${function.mangled-name}\n"
break set -n inlined_foo
run
-# MANGLED_NAME: frame main [inlined] inlined_foo(char const*)
+# MANGLED_NAME: frame _Z11inlined_fooPKc
diff --git a/lldb/test/Shell/SymbolFile/NativePDB/inline_sites_live.cpp b/lldb/test/Shell/SymbolFile/NativePDB/inline_sites_live.cpp
index 906f3d7dff0a..4a06e6350b00 100644
--- a/lldb/test/Shell/SymbolFile/NativePDB/inline_sites_live.cpp
+++ b/lldb/test/Shell/SymbolFile/NativePDB/inline_sites_live.cpp
@@ -23,11 +23,11 @@ int main(int argc, char** argv) {
}
// CHECK: * thread #1, {{.*}}stop reason = breakpoint 1
-// CHECK-NEXT: frame #0: {{.*}}`main [inlined] bar(param=2)
+// CHECK-NEXT: frame #0: {{.*}}`bar(param=2)
// CHECK: (lldb) expression param
// CHECK-NEXT: (int) $0 = 2
// CHECK: * thread #1, {{.*}}stop reason = breakpoint 2
-// CHECK-NEXT: frame #0: {{.*}}`main [inlined] foo(param=1)
+// CHECK-NEXT: frame #0: {{.*}}`foo(param=1)
// CHECK: (lldb) expression param
// CHECK-NEXT: (int) $1 = 1
// CHECK-NEXT: (lldb) expression local
diff --git a/llvm/cmake/modules/TableGen.cmake b/llvm/cmake/modules/TableGen.cmake
index ffcc718b4777..bf914c379e80 100644
--- a/llvm/cmake/modules/TableGen.cmake
+++ b/llvm/cmake/modules/TableGen.cmake
@@ -68,7 +68,9 @@ function(tablegen project ofn)
# char literals, instead. If we're cross-compiling, then conservatively assume
# that the source might be consumed by MSVC.
# [1] https://docs.microsoft.com/en-us/cpp/cpp/compiler-limits?view=vs-2017
- if (MSVC AND project STREQUAL LLVM)
+ # Don't pass this flag to mlir-src-sharder, since it doesn't support the
+ # flag, and it doesn't need it.
+ if (MSVC AND NOT "${project}" STREQUAL "MLIR_SRC_SHARDER")
list(APPEND LLVM_TABLEGEN_FLAGS "--long-string-literals=0")
endif()
if (CMAKE_GENERATOR MATCHES "Visual Studio")
diff --git a/llvm/include/llvm/ADT/APFloat.h b/llvm/include/llvm/ADT/APFloat.h
index 25fc8a00f7dd..01e51accec9d 100644
--- a/llvm/include/llvm/ADT/APFloat.h
+++ b/llvm/include/llvm/ADT/APFloat.h
@@ -354,6 +354,7 @@ struct APFloatBase {
static bool semanticsHasInf(const fltSemantics &);
static bool semanticsHasNaN(const fltSemantics &);
static bool isIEEELikeFP(const fltSemantics &);
+ static bool hasSignBitInMSB(const fltSemantics &);
// Returns true if any number described by \p Src can be precisely represented
// by a normal (not subnormal) value in \p Dst.
diff --git a/llvm/include/llvm/CodeGen/BranchRelaxation.h b/llvm/include/llvm/CodeGen/BranchRelaxation.h
new file mode 100644
index 000000000000..2007cf05b3aa
--- /dev/null
+++ b/llvm/include/llvm/CodeGen/BranchRelaxation.h
@@ -0,0 +1,25 @@
+//===- llvm/CodeGen/BranchRelaxation.h --------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_BRANCHRELAXATION_H
+#define LLVM_CODEGEN_BRANCHRELAXATION_H
+
+#include "llvm/CodeGen/MachinePassManager.h"
+
+namespace llvm {
+
+class BranchRelaxationPass : public PassInfoMixin<BranchRelaxationPass> {
+public:
+ PreservedAnalyses run(MachineFunction &MF,
+ MachineFunctionAnalysisManager &MFAM);
+ static bool isRequired() { return true; }
+};
+
+} // namespace llvm
+
+#endif // LLVM_CODEGEN_BRANCHRELAXATION_H
diff --git a/llvm/include/llvm/CodeGen/RemoveLoadsIntoFakeUses.h b/llvm/include/llvm/CodeGen/RemoveLoadsIntoFakeUses.h
new file mode 100644
index 000000000000..bbd5b8b430bf
--- /dev/null
+++ b/llvm/include/llvm/CodeGen/RemoveLoadsIntoFakeUses.h
@@ -0,0 +1,30 @@
+//===- llvm/CodeGen/RemoveLoadsIntoFakeUses.h -------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_REMOVELOADSINTOFAKEUSES_H
+#define LLVM_CODEGEN_REMOVELOADSINTOFAKEUSES_H
+
+#include "llvm/CodeGen/MachinePassManager.h"
+
+namespace llvm {
+
+class RemoveLoadsIntoFakeUsesPass
+ : public PassInfoMixin<RemoveLoadsIntoFakeUsesPass> {
+public:
+ PreservedAnalyses run(MachineFunction &MF,
+ MachineFunctionAnalysisManager &MFAM);
+
+ MachineFunctionProperties getRequiredProperties() const {
+ return MachineFunctionProperties().set(
+ MachineFunctionProperties::Property::NoVRegs);
+ }
+};
+
+} // namespace llvm
+
+#endif // LLVM_CODEGEN_REMOVELOADSINTOFAKEUSES_H
diff --git a/llvm/include/llvm/CodeGen/TargetRegisterInfo.h b/llvm/include/llvm/CodeGen/TargetRegisterInfo.h
index 6180c53a9a94..ab3eaa92548c 100644
--- a/llvm/include/llvm/CodeGen/TargetRegisterInfo.h
+++ b/llvm/include/llvm/CodeGen/TargetRegisterInfo.h
@@ -467,10 +467,7 @@ public:
/// Returns true if Reg contains RegUnit.
bool hasRegUnit(MCRegister Reg, MCRegUnit RegUnit) const {
- for (MCRegUnit Unit : regunits(Reg))
- if (Unit == RegUnit)
- return true;
- return false;
+ return llvm::is_contained(regunits(Reg), RegUnit);
}
/// Returns the original SrcReg unless it is the target of a copy-like
diff --git a/llvm/include/llvm/DebugInfo/LogicalView/Readers/LVDWARFReader.h b/llvm/include/llvm/DebugInfo/LogicalView/Readers/LVDWARFReader.h
index fdc97249d8e5..aa47bd9cd2cd 100644
--- a/llvm/include/llvm/DebugInfo/LogicalView/Readers/LVDWARFReader.h
+++ b/llvm/include/llvm/DebugInfo/LogicalView/Readers/LVDWARFReader.h
@@ -102,11 +102,7 @@ class LVDWARFReader final : public LVBinaryReader {
}
// Remove offset from global map.
- void removeGlobalOffset(LVOffset Offset) {
- LVOffsetElementMap::iterator Iter = GlobalOffsets.find(Offset);
- if (Iter != GlobalOffsets.end())
- GlobalOffsets.erase(Iter);
- }
+ void removeGlobalOffset(LVOffset Offset) { GlobalOffsets.erase(Offset); }
// Get the location information for DW_AT_data_member_location.
void processLocationMember(dwarf::Attribute Attr,
diff --git a/llvm/include/llvm/InitializePasses.h b/llvm/include/llvm/InitializePasses.h
index 9fd5e7676b19..3242ccff7f87 100644
--- a/llvm/include/llvm/InitializePasses.h
+++ b/llvm/include/llvm/InitializePasses.h
@@ -61,7 +61,7 @@ void initializeBasicAAWrapperPassPass(PassRegistry &);
void initializeBlockFrequencyInfoWrapperPassPass(PassRegistry &);
void initializeBranchFolderLegacyPass(PassRegistry &);
void initializeBranchProbabilityInfoWrapperPassPass(PassRegistry &);
-void initializeBranchRelaxationPass(PassRegistry &);
+void initializeBranchRelaxationLegacyPass(PassRegistry &);
void initializeBreakCriticalEdgesPass(PassRegistry &);
void initializeBreakFalseDepsPass(PassRegistry &);
void initializeCanonicalizeFreezeInLoopsPass(PassRegistry &);
@@ -267,7 +267,7 @@ void initializeRegionOnlyViewerPass(PassRegistry &);
void initializeRegionPrinterPass(PassRegistry &);
void initializeRegionViewerPass(PassRegistry &);
void initializeRegisterCoalescerLegacyPass(PassRegistry &);
-void initializeRemoveLoadsIntoFakeUsesPass(PassRegistry &);
+void initializeRemoveLoadsIntoFakeUsesLegacyPass(PassRegistry &);
void initializeRemoveRedundantDebugValuesLegacyPass(PassRegistry &);
void initializeRenameIndependentSubregsLegacyPass(PassRegistry &);
void initializeReplaceWithVeclibLegacyPass(PassRegistry &);
diff --git a/llvm/include/llvm/MC/MCAsmBackend.h b/llvm/include/llvm/MC/MCAsmBackend.h
index 5953de30c2eb..10eabd41e80f 100644
--- a/llvm/include/llvm/MC/MCAsmBackend.h
+++ b/llvm/include/llvm/MC/MCAsmBackend.h
@@ -109,11 +109,10 @@ public:
return false;
}
- virtual bool evaluateTargetFixup(const MCAssembler &Asm,
- const MCFixup &Fixup, const MCFragment *DF,
- const MCValue &Target,
- const MCSubtargetInfo *STI, uint64_t &Value,
- bool &WasForced) {
+ virtual bool evaluateTargetFixup(const MCAssembler &Asm, const MCFixup &Fixup,
+ const MCFragment *DF, const MCValue &Target,
+ const MCSubtargetInfo *STI,
+ uint64_t &Value) {
llvm_unreachable("Need to implement hook if target has custom fixups");
}
@@ -153,11 +152,9 @@ public:
/// Target specific predicate for whether a given fixup requires the
/// associated instruction to be relaxed.
- virtual bool fixupNeedsRelaxationAdvanced(const MCAssembler &Asm,
- const MCFixup &Fixup, bool Resolved,
- uint64_t Value,
- const MCRelaxableFragment *DF,
- const bool WasForced) const;
+ virtual bool fixupNeedsRelaxationAdvanced(const MCAssembler &,
+ const MCFixup &, const MCValue &,
+ uint64_t, bool Resolved) const;
/// Simple predicate for targets where !Resolved implies requiring relaxation
virtual bool fixupNeedsRelaxation(const MCFixup &Fixup,
@@ -225,11 +222,6 @@ public:
return 0;
}
- /// Check whether a given symbol has been flagged with MICROMIPS flag.
- virtual bool isMicroMips(const MCSymbol *Sym) const {
- return false;
- }
-
bool isDarwinCanonicalPersonality(const MCSymbol *Sym) const;
};
diff --git a/llvm/include/llvm/MC/MCAssembler.h b/llvm/include/llvm/MC/MCAssembler.h
index a68eb49fda28..57143e3d59b4 100644
--- a/llvm/include/llvm/MC/MCAssembler.h
+++ b/llvm/include/llvm/MC/MCAssembler.h
@@ -95,14 +95,11 @@ private:
/// evaluates to.
/// \param Value [out] On return, the value of the fixup as currently laid
/// out.
- /// \param WasForced [out] On return, the value in the fixup is set to the
- /// correct value if WasForced is true, even if evaluateFixup returns false.
- /// \return Whether the fixup value was fully resolved. This is true if the
- /// \p Value result is fixed, otherwise the value may change due to
+ /// \param RecordReloc Record relocation if needed.
/// relocation.
bool evaluateFixup(const MCFixup &Fixup, const MCFragment *DF,
MCValue &Target, const MCSubtargetInfo *STI,
- uint64_t &Value, bool &WasForced) const;
+ uint64_t &Value, bool RecordReloc) const;
/// Check whether a fixup can be satisfied, or whether it needs to be relaxed
/// (increased in size, in order to hold its value correctly).
@@ -127,9 +124,6 @@ private:
bool relaxCVDefRange(MCCVDefRangeFragment &DF);
bool relaxPseudoProbeAddr(MCPseudoProbeAddrFragment &DF);
- std::tuple<MCValue, uint64_t, bool>
- handleFixup(MCFragment &F, const MCFixup &Fixup, const MCSubtargetInfo *STI);
-
public:
/// Construct a new assembler instance.
//
diff --git a/llvm/include/llvm/Passes/CodeGenPassBuilder.h b/llvm/include/llvm/Passes/CodeGenPassBuilder.h
index 25ca982916ff..6e2c2683730c 100644
--- a/llvm/include/llvm/Passes/CodeGenPassBuilder.h
+++ b/llvm/include/llvm/Passes/CodeGenPassBuilder.h
@@ -74,6 +74,7 @@
#include "llvm/CodeGen/RegUsageInfoPropagate.h"
#include "llvm/CodeGen/RegisterCoalescerPass.h"
#include "llvm/CodeGen/RegisterUsageInfo.h"
+#include "llvm/CodeGen/RemoveLoadsIntoFakeUses.h"
#include "llvm/CodeGen/RemoveRedundantDebugValues.h"
#include "llvm/CodeGen/RenameIndependentSubregs.h"
#include "llvm/CodeGen/ReplaceWithVeclib.h"
@@ -1003,6 +1004,7 @@ Error CodeGenPassBuilder<Derived, TargetMachineT>::addMachinePasses(
addPass(FuncletLayoutPass());
+ addPass(RemoveLoadsIntoFakeUsesPass());
addPass(StackMapLivenessPass());
addPass(LiveDebugValuesPass(
getTM<TargetMachine>().Options.ShouldEmitDebugEntryValues()));
diff --git a/llvm/include/llvm/Passes/MachinePassRegistry.def b/llvm/include/llvm/Passes/MachinePassRegistry.def
index 232d5506f5b3..94febae16eee 100644
--- a/llvm/include/llvm/Passes/MachinePassRegistry.def
+++ b/llvm/include/llvm/Passes/MachinePassRegistry.def
@@ -137,6 +137,7 @@ MACHINE_FUNCTION_ANALYSIS("virtregmap", VirtRegMapAnalysis())
#ifndef MACHINE_FUNCTION_PASS
#define MACHINE_FUNCTION_PASS(NAME, CREATE_PASS)
#endif
+MACHINE_FUNCTION_PASS("branch-relaxation", BranchRelaxationPass())
MACHINE_FUNCTION_PASS("dead-mi-elimination", DeadMachineInstructionElimPass())
MACHINE_FUNCTION_PASS("detect-dead-lanes", DetectDeadLanesPass())
MACHINE_FUNCTION_PASS("early-ifcvt", EarlyIfConverterPass())
@@ -181,6 +182,7 @@ MACHINE_FUNCTION_PASS("reg-usage-collector", RegUsageInfoCollectorPass())
MACHINE_FUNCTION_PASS("reg-usage-propagation", RegUsageInfoPropagationPass())
MACHINE_FUNCTION_PASS("register-coalescer", RegisterCoalescerPass())
MACHINE_FUNCTION_PASS("rename-independent-subregs", RenameIndependentSubregsPass())
+MACHINE_FUNCTION_PASS("remove-loads-into-fake-uses", RemoveLoadsIntoFakeUsesPass())
MACHINE_FUNCTION_PASS("remove-redundant-debug-values", RemoveRedundantDebugValuesPass())
MACHINE_FUNCTION_PASS("require-all-machine-function-properties",
RequireAllMachineFunctionPropertiesPass())
@@ -310,7 +312,6 @@ DUMMY_MACHINE_FUNCTION_PASS("ra-pbqp", RAPBQPPass)
DUMMY_MACHINE_FUNCTION_PASS("regalloc", RegAllocPass)
DUMMY_MACHINE_FUNCTION_PASS("regallocscoringpass", RegAllocScoringPass)
DUMMY_MACHINE_FUNCTION_PASS("regbankselect", RegBankSelectPass)
-DUMMY_MACHINE_FUNCTION_PASS("remove-loads-into-fake-uses", RemoveLoadsIntoFakeUsesPass)
DUMMY_MACHINE_FUNCTION_PASS("reset-machine-function", ResetMachineFunctionPass)
DUMMY_MACHINE_FUNCTION_PASS("shrink-wrap", ShrinkWrapPass)
DUMMY_MACHINE_FUNCTION_PASS("stack-frame-layout", StackFrameLayoutAnalysisPass)
diff --git a/llvm/include/llvm/TableGen/Main.h b/llvm/include/llvm/TableGen/Main.h
index e8c60e286990..5f68be188de7 100644
--- a/llvm/include/llvm/TableGen/Main.h
+++ b/llvm/include/llvm/TableGen/Main.h
@@ -13,6 +13,7 @@
#ifndef LLVM_TABLEGEN_MAIN_H
#define LLVM_TABLEGEN_MAIN_H
+#include "llvm/Support/CommandLine.h"
#include <functional>
namespace llvm {
@@ -27,6 +28,10 @@ using TableGenMainFn = bool(raw_ostream &OS, const RecordKeeper &Records);
int TableGenMain(const char *argv0,
std::function<TableGenMainFn> MainFn = nullptr);
+/// Controls emitting large character arrays as strings or character arrays.
+/// Typically set to false when building with MSVC.
+extern cl::opt<bool> EmitLongStrLiterals;
+
} // end namespace llvm
#endif // LLVM_TABLEGEN_MAIN_H
diff --git a/llvm/include/llvm/TableGen/StringToOffsetTable.h b/llvm/include/llvm/TableGen/StringToOffsetTable.h
index e716411514bd..21795644d4bd 100644
--- a/llvm/include/llvm/TableGen/StringToOffsetTable.h
+++ b/llvm/include/llvm/TableGen/StringToOffsetTable.h
@@ -12,8 +12,6 @@
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/StringMap.h"
-#include "llvm/Support/FormatVariadic.h"
-#include "llvm/Support/raw_ostream.h"
#include <optional>
namespace llvm {
@@ -36,17 +34,7 @@ public:
bool empty() const { return StringOffset.empty(); }
size_t size() const { return AggregateString.size(); }
- unsigned GetOrAddStringOffset(StringRef Str, bool appendZero = true) {
- auto [II, Inserted] = StringOffset.insert({Str, size()});
- if (Inserted) {
- // Add the string to the aggregate if this is the first time found.
- AggregateString.append(Str.begin(), Str.end());
- if (appendZero)
- AggregateString += '\0';
- }
-
- return II->second;
- }
+ unsigned GetOrAddStringOffset(StringRef Str, bool appendZero = true);
// Returns the offset of `Str` in the table if its preset, else return
// std::nullopt.
@@ -69,96 +57,10 @@ public:
// `static` and `constexpr`. Both `Name` and (`Name` + "Storage") must be
// valid identifiers to declare.
void EmitStringTableDef(raw_ostream &OS, const Twine &Name,
- const Twine &Indent = "") const {
- OS << formatv(R"(
-#ifdef __GNUC__
-#pragma GCC diagnostic push
-#pragma GCC diagnostic ignored "-Woverlength-strings"
-#endif
-{0}static constexpr char {1}Storage[] = )",
- Indent, Name);
-
- // MSVC silently miscompiles string literals longer than 64k in some
- // circumstances. When the string table is longer, emit it as an array of
- // character literals.
- bool UseChars = AggregateString.size() > (64 * 1024);
- OS << (UseChars ? "{\n" : "\n");
-
- llvm::ListSeparator LineSep(UseChars ? ",\n" : "\n");
- llvm::SmallVector<StringRef> Strings(split(AggregateString, '\0'));
- // We should always have an empty string at the start, and because these are
- // null terminators rather than separators, we'll have one at the end as
- // well. Skip the end one.
- assert(Strings.front().empty() && "Expected empty initial string!");
- assert(Strings.back().empty() &&
- "Expected empty string at the end due to terminators!");
- Strings.pop_back();
- for (StringRef Str : Strings) {
- OS << LineSep << Indent << " ";
- // If we can, just emit this as a string literal to be concatenated.
- if (!UseChars) {
- OS << "\"";
- OS.write_escaped(Str);
- OS << "\\0\"";
- continue;
- }
-
- llvm::ListSeparator CharSep(", ");
- for (char C : Str) {
- OS << CharSep << "'";
- OS.write_escaped(StringRef(&C, 1));
- OS << "'";
- }
- OS << CharSep << "'\\0'";
- }
- OS << LineSep << Indent << (UseChars ? "};" : " ;");
-
- OS << formatv(R"(
-#ifdef __GNUC__
-#pragma GCC diagnostic pop
-#endif
-
-{0}static constexpr llvm::StringTable {1} =
-{0} {1}Storage;
-)",
- Indent, Name);
- }
+ const Twine &Indent = "") const;
// Emit the string as one single string.
- void EmitString(raw_ostream &O) const {
- // Escape the string.
- SmallString<256> EscapedStr;
- raw_svector_ostream(EscapedStr).write_escaped(AggregateString);
-
- O << " \"";
- unsigned CharsPrinted = 0;
- for (unsigned i = 0, e = EscapedStr.size(); i != e; ++i) {
- if (CharsPrinted > 70) {
- O << "\"\n \"";
- CharsPrinted = 0;
- }
- O << EscapedStr[i];
- ++CharsPrinted;
-
- // Print escape sequences all together.
- if (EscapedStr[i] != '\\')
- continue;
-
- assert(i + 1 < EscapedStr.size() && "Incomplete escape sequence!");
- if (isDigit(EscapedStr[i + 1])) {
- assert(isDigit(EscapedStr[i + 2]) && isDigit(EscapedStr[i + 3]) &&
- "Expected 3 digit octal escape!");
- O << EscapedStr[++i];
- O << EscapedStr[++i];
- O << EscapedStr[++i];
- CharsPrinted += 3;
- } else {
- O << EscapedStr[++i];
- ++CharsPrinted;
- }
- }
- O << "\"";
- }
+ void EmitString(raw_ostream &O) const;
};
} // end namespace llvm
diff --git a/llvm/lib/Analysis/PHITransAddr.cpp b/llvm/lib/Analysis/PHITransAddr.cpp
index e42113db4278..276708c2ebf7 100644
--- a/llvm/lib/Analysis/PHITransAddr.cpp
+++ b/llvm/lib/Analysis/PHITransAddr.cpp
@@ -224,6 +224,9 @@ Value *PHITransAddr::translateSubExpr(Value *V, BasicBlock *CurBB,
// Scan to see if we have this GEP available.
Value *APHIOp = GEPOps[0];
+ if (isa<ConstantData>(APHIOp))
+ return nullptr;
+
for (User *U : APHIOp->users()) {
if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(U))
if (GEPI->getType() == GEP->getType() &&
diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp
index c62ea1526981..d193c9e3210e 100644
--- a/llvm/lib/Analysis/ScalarEvolution.cpp
+++ b/llvm/lib/Analysis/ScalarEvolution.cpp
@@ -7841,7 +7841,7 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) {
unsigned GCD = std::min(MulZeros, TZ);
APInt DivAmt = APInt::getOneBitSet(BitWidth, TZ - GCD);
SmallVector<const SCEV*, 4> MulOps;
- MulOps.push_back(getConstant(OpC->getAPInt().lshr(GCD)));
+ MulOps.push_back(getConstant(OpC->getAPInt().ashr(GCD)));
append_range(MulOps, LHSMul->operands().drop_front());
auto *NewMul = getMulExpr(MulOps, LHSMul->getNoWrapFlags());
ShiftedLHS = getUDivExpr(NewMul, getConstant(DivAmt));
diff --git a/llvm/lib/CodeGen/AsmPrinter/WinException.cpp b/llvm/lib/CodeGen/AsmPrinter/WinException.cpp
index 062283975851..55d1350e446a 100644
--- a/llvm/lib/CodeGen/AsmPrinter/WinException.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/WinException.cpp
@@ -162,8 +162,7 @@ void WinException::endFunction(const MachineFunction *MF) {
if (!MF->getEHContTargets().empty()) {
// Copy the function's EH Continuation targets to a module-level list.
- EHContTargets.insert(EHContTargets.end(), MF->getEHContTargets().begin(),
- MF->getEHContTargets().end());
+ llvm::append_range(EHContTargets, MF->getEHContTargets());
}
}
@@ -292,8 +291,7 @@ void WinException::endFuncletImpl() {
if (!MF->getEHContTargets().empty()) {
// Copy the function's EH Continuation targets to a module-level list.
- EHContTargets.insert(EHContTargets.end(), MF->getEHContTargets().begin(),
- MF->getEHContTargets().end());
+ llvm::append_range(EHContTargets, MF->getEHContTargets());
}
// Switch back to the funclet start .text section now that we are done
diff --git a/llvm/lib/CodeGen/BranchRelaxation.cpp b/llvm/lib/CodeGen/BranchRelaxation.cpp
index a762aab43ddd..fbdc784c928c 100644
--- a/llvm/lib/CodeGen/BranchRelaxation.cpp
+++ b/llvm/lib/CodeGen/BranchRelaxation.cpp
@@ -6,6 +6,7 @@
//
//===----------------------------------------------------------------------===//
+#include "llvm/CodeGen/BranchRelaxation.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/CodeGen/LivePhysRegs.h"
@@ -44,7 +45,7 @@ STATISTIC(NumUnconditionalRelaxed, "Number of unconditional branches relaxed");
namespace {
-class BranchRelaxation : public MachineFunctionPass {
+class BranchRelaxation {
/// BasicBlockInfo - Information about the offset and size of a single
/// basic block.
struct BasicBlockInfo {
@@ -116,22 +117,30 @@ class BranchRelaxation : public MachineFunctionPass {
void verify();
public:
+ bool run(MachineFunction &MF);
+};
+
+class BranchRelaxationLegacy : public MachineFunctionPass {
+public:
static char ID;
- BranchRelaxation() : MachineFunctionPass(ID) {}
+ BranchRelaxationLegacy() : MachineFunctionPass(ID) {}
- bool runOnMachineFunction(MachineFunction &MF) override;
+ bool runOnMachineFunction(MachineFunction &MF) override {
+ return BranchRelaxation().run(MF);
+ }
StringRef getPassName() const override { return BRANCH_RELAX_NAME; }
};
} // end anonymous namespace
-char BranchRelaxation::ID = 0;
+char BranchRelaxationLegacy::ID = 0;
-char &llvm::BranchRelaxationPassID = BranchRelaxation::ID;
+char &llvm::BranchRelaxationPassID = BranchRelaxationLegacy::ID;
-INITIALIZE_PASS(BranchRelaxation, DEBUG_TYPE, BRANCH_RELAX_NAME, false, false)
+INITIALIZE_PASS(BranchRelaxationLegacy, DEBUG_TYPE, BRANCH_RELAX_NAME, false,
+ false)
/// verify - check BBOffsets, BBSizes, alignment of islands
void BranchRelaxation::verify() {
@@ -744,7 +753,16 @@ bool BranchRelaxation::relaxBranchInstructions() {
return Changed;
}
-bool BranchRelaxation::runOnMachineFunction(MachineFunction &mf) {
+PreservedAnalyses
+BranchRelaxationPass::run(MachineFunction &MF,
+ MachineFunctionAnalysisManager &MFAM) {
+ if (!BranchRelaxation().run(MF))
+ return PreservedAnalyses::all();
+
+ return getMachineFunctionPassPreservedAnalyses();
+}
+
+bool BranchRelaxation::run(MachineFunction &mf) {
MF = &mf;
LLVM_DEBUG(dbgs() << "***** BranchRelaxation *****\n");
diff --git a/llvm/lib/CodeGen/CodeGen.cpp b/llvm/lib/CodeGen/CodeGen.cpp
index 8b777ed2bbc9..b77cefca00b7 100644
--- a/llvm/lib/CodeGen/CodeGen.cpp
+++ b/llvm/lib/CodeGen/CodeGen.cpp
@@ -23,7 +23,7 @@ void llvm::initializeCodeGen(PassRegistry &Registry) {
initializeBasicBlockPathCloningPass(Registry);
initializeBasicBlockSectionsPass(Registry);
initializeBranchFolderLegacyPass(Registry);
- initializeBranchRelaxationPass(Registry);
+ initializeBranchRelaxationLegacyPass(Registry);
initializeBreakFalseDepsPass(Registry);
initializeCallBrPreparePass(Registry);
initializeCFGuardLongjmpPass(Registry);
@@ -117,7 +117,7 @@ void llvm::initializeCodeGen(PassRegistry &Registry) {
initializeRegUsageInfoCollectorLegacyPass(Registry);
initializeRegUsageInfoPropagationLegacyPass(Registry);
initializeRegisterCoalescerLegacyPass(Registry);
- initializeRemoveLoadsIntoFakeUsesPass(Registry);
+ initializeRemoveLoadsIntoFakeUsesLegacyPass(Registry);
initializeRemoveRedundantDebugValuesLegacyPass(Registry);
initializeRenameIndependentSubregsLegacyPass(Registry);
initializeSafeStackLegacyPassPass(Registry);
diff --git a/llvm/lib/CodeGen/ComplexDeinterleavingPass.cpp b/llvm/lib/CodeGen/ComplexDeinterleavingPass.cpp
index 4cd378f9aa59..a83982de14b2 100644
--- a/llvm/lib/CodeGen/ComplexDeinterleavingPass.cpp
+++ b/llvm/lib/CodeGen/ComplexDeinterleavingPass.cpp
@@ -2332,8 +2332,9 @@ void ComplexDeinterleavingGraph::replaceNodes() {
} else if (RootNode->Operation ==
ComplexDeinterleavingOperation::ReductionSingle) {
auto *RootInst = cast<Instruction>(RootNode->Real);
- ReductionInfo[RootInst].first->removeIncomingValue(BackEdge);
- DeadInstrRoots.push_back(ReductionInfo[RootInst].second);
+ auto &Info = ReductionInfo[RootInst];
+ Info.first->removeIncomingValue(BackEdge);
+ DeadInstrRoots.push_back(Info.second);
} else {
assert(R && "Unable to find replacement for RootInstruction");
DeadInstrRoots.push_back(RootInstruction);
diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
index ac68eb55a6fd..ee271234d311 100644
--- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
@@ -1765,7 +1765,7 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI,
LLT GCDTy = extractGCDType(WidenedXors, NarrowTy, LeftoverTy, Xor);
buildLCMMergePieces(LeftoverTy, NarrowTy, GCDTy, WidenedXors,
/* PadStrategy = */ TargetOpcode::G_ZEXT);
- Xors.insert(Xors.end(), WidenedXors.begin(), WidenedXors.end());
+ llvm::append_range(Xors, WidenedXors);
}
// Now, for each part we broke up, we know if they are equal/not equal
diff --git a/llvm/lib/CodeGen/MachineLICM.cpp b/llvm/lib/CodeGen/MachineLICM.cpp
index 1ac1a770ae72..df3dd4196548 100644
--- a/llvm/lib/CodeGen/MachineLICM.cpp
+++ b/llvm/lib/CodeGen/MachineLICM.cpp
@@ -1487,8 +1487,7 @@ void MachineLICMImpl::InitializeLoadsHoistableLoops() {
auto *L = Worklist.pop_back_val();
AllowedToHoistLoads[L] = true;
LoopsInPreOrder.push_back(L);
- Worklist.insert(Worklist.end(), L->getSubLoops().begin(),
- L->getSubLoops().end());
+ llvm::append_range(Worklist, L->getSubLoops());
}
// Going from the innermost to outermost loops, check if a loop has
diff --git a/llvm/lib/CodeGen/RegisterCoalescer.cpp b/llvm/lib/CodeGen/RegisterCoalescer.cpp
index dbd354f2ca2c..c27435aa2dae 100644
--- a/llvm/lib/CodeGen/RegisterCoalescer.cpp
+++ b/llvm/lib/CodeGen/RegisterCoalescer.cpp
@@ -3814,8 +3814,7 @@ bool RegisterCoalescer::joinVirtRegs(CoalescerPair &CP) {
// into an existing tracking collection, or insert a new one.
RegIt = RegToPHIIdx.find(CP.getDstReg());
if (RegIt != RegToPHIIdx.end())
- RegIt->second.insert(RegIt->second.end(), InstrNums.begin(),
- InstrNums.end());
+ llvm::append_range(RegIt->second, InstrNums);
else
RegToPHIIdx.insert({CP.getDstReg(), InstrNums});
}
diff --git a/llvm/lib/CodeGen/RemoveLoadsIntoFakeUses.cpp b/llvm/lib/CodeGen/RemoveLoadsIntoFakeUses.cpp
index 384a049acfe3..042fc13090ef 100644
--- a/llvm/lib/CodeGen/RemoveLoadsIntoFakeUses.cpp
+++ b/llvm/lib/CodeGen/RemoveLoadsIntoFakeUses.cpp
@@ -22,11 +22,13 @@
///
//===----------------------------------------------------------------------===//
+#include "llvm/CodeGen/RemoveLoadsIntoFakeUses.h"
#include "llvm/ADT/PostOrderIterator.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/CodeGen/LiveRegUnits.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachinePassManager.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/IR/Function.h"
@@ -41,12 +43,13 @@ using namespace llvm;
STATISTIC(NumLoadsDeleted, "Number of dead load instructions deleted");
STATISTIC(NumFakeUsesDeleted, "Number of FAKE_USE instructions deleted");
-class RemoveLoadsIntoFakeUses : public MachineFunctionPass {
+class RemoveLoadsIntoFakeUsesLegacy : public MachineFunctionPass {
public:
static char ID;
- RemoveLoadsIntoFakeUses() : MachineFunctionPass(ID) {
- initializeRemoveLoadsIntoFakeUsesPass(*PassRegistry::getPassRegistry());
+ RemoveLoadsIntoFakeUsesLegacy() : MachineFunctionPass(ID) {
+ initializeRemoveLoadsIntoFakeUsesLegacyPass(
+ *PassRegistry::getPassRegistry());
}
void getAnalysisUsage(AnalysisUsage &AU) const override {
@@ -66,21 +69,45 @@ public:
bool runOnMachineFunction(MachineFunction &MF) override;
};
-char RemoveLoadsIntoFakeUses::ID = 0;
-char &llvm::RemoveLoadsIntoFakeUsesID = RemoveLoadsIntoFakeUses::ID;
+struct RemoveLoadsIntoFakeUses {
+ bool run(MachineFunction &MF);
+};
+
+char RemoveLoadsIntoFakeUsesLegacy::ID = 0;
+char &llvm::RemoveLoadsIntoFakeUsesID = RemoveLoadsIntoFakeUsesLegacy::ID;
-INITIALIZE_PASS_BEGIN(RemoveLoadsIntoFakeUses, DEBUG_TYPE,
+INITIALIZE_PASS_BEGIN(RemoveLoadsIntoFakeUsesLegacy, DEBUG_TYPE,
"Remove Loads Into Fake Uses", false, false)
-INITIALIZE_PASS_END(RemoveLoadsIntoFakeUses, DEBUG_TYPE,
+INITIALIZE_PASS_END(RemoveLoadsIntoFakeUsesLegacy, DEBUG_TYPE,
"Remove Loads Into Fake Uses", false, false)
-bool RemoveLoadsIntoFakeUses::runOnMachineFunction(MachineFunction &MF) {
+bool RemoveLoadsIntoFakeUsesLegacy::runOnMachineFunction(MachineFunction &MF) {
+ if (skipFunction(MF.getFunction()))
+ return false;
+
+ return RemoveLoadsIntoFakeUses().run(MF);
+}
+
+PreservedAnalyses
+RemoveLoadsIntoFakeUsesPass::run(MachineFunction &MF,
+ MachineFunctionAnalysisManager &MFAM) {
+ MFPropsModifier _(*this, MF);
+
+ if (!RemoveLoadsIntoFakeUses().run(MF))
+ return PreservedAnalyses::all();
+
+ auto PA = getMachineFunctionPassPreservedAnalyses();
+ PA.preserveSet<CFGAnalyses>();
+ return PA;
+}
+
+bool RemoveLoadsIntoFakeUses::run(MachineFunction &MF) {
// Skip this pass if we would use VarLoc-based LDV, as there may be DBG_VALUE
// instructions of the restored values that would become invalid.
if (!MF.useDebugInstrRef())
return false;
// Only run this for functions that have fake uses.
- if (!MF.hasFakeUses() || skipFunction(MF.getFunction()))
+ if (!MF.hasFakeUses())
return false;
bool AnyChanges = false;
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
index 5210372dd935..83fade45d189 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
@@ -530,7 +530,7 @@ SDNode *DAGTypeLegalizer::AnalyzeNewNode(SDNode *N) {
NewOps.push_back(Op);
} else if (Op != OrigOp) {
// This is the first operand to change - add all operands so far.
- NewOps.insert(NewOps.end(), N->op_begin(), N->op_begin() + i);
+ llvm::append_range(NewOps, N->ops().take_front(i));
NewOps.push_back(Op);
}
}
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
index 083b984444bc..598de6b20775 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
@@ -2320,7 +2320,7 @@ void SelectionDAGISel::SelectInlineAsmMemoryOperands(std::vector<SDValue> &Ops,
SelOps.size());
Flags.setMemConstraint(ConstraintID);
Handles.emplace_back(CurDAG->getTargetConstant(Flags, DL, MVT::i32));
- Handles.insert(Handles.end(), SelOps.begin(), SelOps.end());
+ llvm::append_range(Handles, SelOps);
i += 2;
}
}
diff --git a/llvm/lib/CodeGen/WindowScheduler.cpp b/llvm/lib/CodeGen/WindowScheduler.cpp
index 95c86a9ac266..2492dfc3ca55 100644
--- a/llvm/lib/CodeGen/WindowScheduler.cpp
+++ b/llvm/lib/CodeGen/WindowScheduler.cpp
@@ -679,8 +679,7 @@ MachineInstr *WindowScheduler::getOriMI(MachineInstr *NewMI) {
}
unsigned WindowScheduler::getOriStage(MachineInstr *OriMI, unsigned Offset) {
- assert(llvm::find(OriMIs, OriMI) != OriMIs.end() &&
- "Cannot find OriMI in OriMIs!");
+ assert(llvm::is_contained(OriMIs, OriMI) && "Cannot find OriMI in OriMIs!");
// If there is no instruction fold, all MI stages are 0.
if (Offset == SchedPhiNum)
return 0;
diff --git a/llvm/lib/ExecutionEngine/Orc/DebugObjectManagerPlugin.cpp b/llvm/lib/ExecutionEngine/Orc/DebugObjectManagerPlugin.cpp
index 80b7452a0b22..15e583ca7685 100644
--- a/llvm/lib/ExecutionEngine/Orc/DebugObjectManagerPlugin.cpp
+++ b/llvm/lib/ExecutionEngine/Orc/DebugObjectManagerPlugin.cpp
@@ -475,8 +475,9 @@ Error DebugObjectManagerPlugin::notifyEmitted(
FinalizePromise.set_value(MR.withResourceKeyDo([&](ResourceKey K) {
assert(PendingObjs.count(&MR) && "We still hold PendingObjsLock");
std::lock_guard<std::mutex> Lock(RegisteredObjsLock);
- RegisteredObjs[K].push_back(std::move(PendingObjs[&MR]));
- PendingObjs.erase(&MR);
+ auto It = PendingObjs.find(&MR);
+ RegisteredObjs[K].push_back(std::move(It->second));
+ PendingObjs.erase(It);
}));
});
diff --git a/llvm/lib/ExecutionEngine/Orc/Shared/ObjectFormats.cpp b/llvm/lib/ExecutionEngine/Orc/Shared/ObjectFormats.cpp
index 3bd6c1e5be2c..d95f2f602fbc 100644
--- a/llvm/lib/ExecutionEngine/Orc/Shared/ObjectFormats.cpp
+++ b/llvm/lib/ExecutionEngine/Orc/Shared/ObjectFormats.cpp
@@ -11,6 +11,7 @@
//===----------------------------------------------------------------------===//
#include "llvm/ExecutionEngine/Orc/Shared/ObjectFormats.h"
+#include "llvm/ADT/STLExtras.h"
namespace llvm {
namespace orc {
@@ -34,10 +35,7 @@ StringRef ELFThreadBSSSectionName = ".tbss";
StringRef ELFThreadDataSectionName = ".tdata";
bool isMachOInitializerSection(StringRef QualifiedName) {
- for (auto &InitSection : MachOInitSectionNames)
- if (InitSection == QualifiedName)
- return true;
- return false;
+ return llvm::is_contained(MachOInitSectionNames, QualifiedName);
}
bool isELFInitializerSection(StringRef SecName) {
diff --git a/llvm/lib/MC/MCAsmBackend.cpp b/llvm/lib/MC/MCAsmBackend.cpp
index 23cc134f65b5..85a208ffbce0 100644
--- a/llvm/lib/MC/MCAsmBackend.cpp
+++ b/llvm/lib/MC/MCAsmBackend.cpp
@@ -115,11 +115,10 @@ bool MCAsmBackend::shouldForceRelocation(const MCAssembler &, const MCFixup &,
return Target.getSpecifier();
}
-bool MCAsmBackend::fixupNeedsRelaxationAdvanced(const MCAssembler &Asm,
+bool MCAsmBackend::fixupNeedsRelaxationAdvanced(const MCAssembler &,
const MCFixup &Fixup,
- bool Resolved, uint64_t Value,
- const MCRelaxableFragment *DF,
- const bool WasForced) const {
+ const MCValue &, uint64_t Value,
+ bool Resolved) const {
if (!Resolved)
return true;
return fixupNeedsRelaxation(Fixup, Value);
diff --git a/llvm/lib/MC/MCAssembler.cpp b/llvm/lib/MC/MCAssembler.cpp
index 934bdb40d530..4e925809d20b 100644
--- a/llvm/lib/MC/MCAssembler.cpp
+++ b/llvm/lib/MC/MCAssembler.cpp
@@ -138,7 +138,7 @@ bool MCAssembler::isThumbFunc(const MCSymbol *Symbol) const {
bool MCAssembler::evaluateFixup(const MCFixup &Fixup, const MCFragment *DF,
MCValue &Target, const MCSubtargetInfo *STI,
- uint64_t &Value, bool &WasForced) const {
+ uint64_t &Value, bool RecordReloc) const {
++stats::evaluateFixup;
// FIXME: This code has some duplication with recordRelocation. We should
@@ -150,47 +150,51 @@ bool MCAssembler::evaluateFixup(const MCFixup &Fixup, const MCFragment *DF,
const MCExpr *Expr = Fixup.getValue();
MCContext &Ctx = getContext();
Value = 0;
- WasForced = false;
if (!Expr->evaluateAsRelocatable(Target, this)) {
Ctx.reportError(Fixup.getLoc(), "expected relocatable expression");
return true;
}
- unsigned FixupFlags = getBackend().getFixupKindInfo(Fixup.getKind()).Flags;
- if (FixupFlags & MCFixupKindInfo::FKF_IsTarget)
- return getBackend().evaluateTargetFixup(*this, Fixup, DF, Target, STI,
- Value, WasForced);
-
- const MCSymbol *Add = Target.getAddSym();
- const MCSymbol *Sub = Target.getSubSym();
- bool IsPCRel = FixupFlags & MCFixupKindInfo::FKF_IsPCRel;
bool IsResolved = false;
- if (!IsPCRel) {
- IsResolved = Target.isAbsolute();
- } else if (Add && !Sub && !Add->isUndefined() && !Add->isAbsolute()) {
- IsResolved = (FixupFlags & MCFixupKindInfo::FKF_Constant) ||
- getWriter().isSymbolRefDifferenceFullyResolvedImpl(
- *this, *Add, *DF, false, true);
+ unsigned FixupFlags = getBackend().getFixupKindInfo(Fixup.getKind()).Flags;
+ if (FixupFlags & MCFixupKindInfo::FKF_IsTarget) {
+ IsResolved =
+ getBackend().evaluateTargetFixup(*this, Fixup, DF, Target, STI, Value);
+ } else {
+ const MCSymbol *Add = Target.getAddSym();
+ const MCSymbol *Sub = Target.getSubSym();
+ Value = Target.getConstant();
+ if (Add && Add->isDefined())
+ Value += getSymbolOffset(*Add);
+ if (Sub && Sub->isDefined())
+ Value -= getSymbolOffset(*Sub);
+
+ bool IsPCRel = FixupFlags & MCFixupKindInfo::FKF_IsPCRel;
+ bool ShouldAlignPC =
+ FixupFlags & MCFixupKindInfo::FKF_IsAlignedDownTo32Bits;
+ if (IsPCRel) {
+ uint64_t Offset = getFragmentOffset(*DF) + Fixup.getOffset();
+
+ // A number of ARM fixups in Thumb mode require that the effective PC
+ // address be determined as the 32-bit aligned version of the actual
+ // offset.
+ if (ShouldAlignPC)
+ Offset &= ~0x3;
+ Value -= Offset;
+
+ if (Add && !Sub && !Add->isUndefined() && !Add->isAbsolute()) {
+ IsResolved = (FixupFlags & MCFixupKindInfo::FKF_Constant) ||
+ getWriter().isSymbolRefDifferenceFullyResolvedImpl(
+ *this, *Add, *DF, false, true);
+ }
+ } else {
+ IsResolved = Target.isAbsolute();
+ assert(!ShouldAlignPC && "FKF_IsAlignedDownTo32Bits must be PC-relative");
+ }
}
- Value = Target.getConstant();
- if (Add && Add->isDefined())
- Value += getSymbolOffset(*Add);
- if (Sub && Sub->isDefined())
- Value -= getSymbolOffset(*Sub);
-
- bool ShouldAlignPC = FixupFlags & MCFixupKindInfo::FKF_IsAlignedDownTo32Bits;
- assert((ShouldAlignPC ? IsPCRel : true) &&
- "FKF_IsAlignedDownTo32Bits is only allowed on PC-relative fixups!");
-
- if (IsPCRel) {
- uint64_t Offset = getFragmentOffset(*DF) + Fixup.getOffset();
-
- // A number of ARM fixups in Thumb mode require that the effective PC
- // address be determined as the 32-bit aligned version of the actual offset.
- if (ShouldAlignPC) Offset &= ~0x3;
- Value -= Offset;
- }
+ if (!RecordReloc)
+ return IsResolved;
// .reloc directive and the backend might force the relocation.
// Backends that customize shouldForceRelocation generally just need the fixup
@@ -200,12 +204,12 @@ bool MCAssembler::evaluateFixup(const MCFixup &Fixup, const MCFragment *DF,
auto TargetVal = Target;
TargetVal.Cst = Value;
if (Fixup.getKind() >= FirstLiteralRelocationKind ||
- getBackend().shouldForceRelocation(*this, Fixup, TargetVal, STI)) {
+ getBackend().shouldForceRelocation(*this, Fixup, TargetVal, STI))
IsResolved = false;
- WasForced = true;
- }
}
-
+ if (!IsResolved)
+ getWriter().recordRelocation(const_cast<MCAssembler &>(*this), DF, Fixup,
+ Target, Value);
return IsResolved;
}
@@ -844,24 +848,6 @@ void MCAssembler::writeSectionData(raw_ostream &OS,
OS.tell() - Start == getSectionAddressSize(*Sec));
}
-std::tuple<MCValue, uint64_t, bool>
-MCAssembler::handleFixup(MCFragment &F, const MCFixup &Fixup,
- const MCSubtargetInfo *STI) {
- // Evaluate the fixup.
- MCValue Target;
- uint64_t FixedValue;
- bool WasForced;
- bool IsResolved =
- evaluateFixup(Fixup, &F, Target, STI, FixedValue, WasForced);
- if (!IsResolved) {
- // The fixup was unresolved, we need a relocation. Inform the object
- // writer of the relocation, and give it an opportunity to adjust the
- // fixup value if need be.
- getWriter().recordRelocation(*this, &F, Fixup, Target, FixedValue);
- }
- return std::make_tuple(Target, FixedValue, IsResolved);
-}
-
void MCAssembler::layout() {
assert(getBackendPtr() && "Expected assembler backend");
DEBUG_WITH_TYPE("mc-dump", {
@@ -987,10 +973,9 @@ void MCAssembler::layout() {
}
for (const MCFixup &Fixup : Fixups) {
uint64_t FixedValue;
- bool IsResolved;
MCValue Target;
- std::tie(Target, FixedValue, IsResolved) =
- handleFixup(Frag, Fixup, STI);
+ bool IsResolved = evaluateFixup(Fixup, &Frag, Target, STI, FixedValue,
+ /*RecordReloc=*/true);
getBackend().applyFixup(*this, Fixup, Target, Contents, FixedValue,
IsResolved, STI);
}
@@ -1012,11 +997,10 @@ bool MCAssembler::fixupNeedsRelaxation(const MCFixup &Fixup,
assert(getBackendPtr() && "Expected assembler backend");
MCValue Target;
uint64_t Value;
- bool WasForced;
bool Resolved = evaluateFixup(Fixup, DF, Target, DF->getSubtargetInfo(),
- Value, WasForced);
- return getBackend().fixupNeedsRelaxationAdvanced(*this, Fixup, Resolved,
- Value, DF, WasForced);
+ Value, /*RecordReloc=*/false);
+ return getBackend().fixupNeedsRelaxationAdvanced(*this, Fixup, Target, Value,
+ Resolved);
}
bool MCAssembler::fragmentNeedsRelaxation(const MCRelaxableFragment *F) const {
diff --git a/llvm/lib/ObjCopy/ELF/ELFObjcopy.cpp b/llvm/lib/ObjCopy/ELF/ELFObjcopy.cpp
index 5a785d7afd1e..b0ec215aec20 100644
--- a/llvm/lib/ObjCopy/ELF/ELFObjcopy.cpp
+++ b/llvm/lib/ObjCopy/ELF/ELFObjcopy.cpp
@@ -857,8 +857,7 @@ static Error handleArgs(const CommonConfig &Config, const ELFConfig &ELFConfig,
"cannot change section address in a non-relocatable file");
StringMap<AddressUpdate> SectionsToUpdateAddress;
for (const SectionPatternAddressUpdate &PatternUpdate :
- make_range(Config.ChangeSectionAddress.rbegin(),
- Config.ChangeSectionAddress.rend())) {
+ reverse(Config.ChangeSectionAddress)) {
for (SectionBase &Sec : Obj.sections()) {
if (PatternUpdate.SectionPattern.matches(Sec.Name) &&
SectionsToUpdateAddress.try_emplace(Sec.Name, PatternUpdate.Update)
diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp
index b923181e9726..215355827337 100644
--- a/llvm/lib/Passes/PassBuilder.cpp
+++ b/llvm/lib/Passes/PassBuilder.cpp
@@ -82,6 +82,7 @@
#include "llvm/CodeGen/AtomicExpand.h"
#include "llvm/CodeGen/BasicBlockSectionsProfileReader.h"
#include "llvm/CodeGen/BranchFoldingPass.h"
+#include "llvm/CodeGen/BranchRelaxation.h"
#include "llvm/CodeGen/CallBrPrepare.h"
#include "llvm/CodeGen/CodeGenPrepare.h"
#include "llvm/CodeGen/ComplexDeinterleavingPass.h"
@@ -148,6 +149,7 @@
#include "llvm/CodeGen/RegUsageInfoPropagate.h"
#include "llvm/CodeGen/RegisterCoalescerPass.h"
#include "llvm/CodeGen/RegisterUsageInfo.h"
+#include "llvm/CodeGen/RemoveLoadsIntoFakeUses.h"
#include "llvm/CodeGen/RemoveRedundantDebugValues.h"
#include "llvm/CodeGen/RenameIndependentSubregs.h"
#include "llvm/CodeGen/SafeStack.h"
diff --git a/llvm/lib/Passes/PassBuilderPipelines.cpp b/llvm/lib/Passes/PassBuilderPipelines.cpp
index 300898bb092b..f172271be09a 100644
--- a/llvm/lib/Passes/PassBuilderPipelines.cpp
+++ b/llvm/lib/Passes/PassBuilderPipelines.cpp
@@ -926,14 +926,14 @@ PassBuilder::buildInlinerPipeline(OptimizationLevel Level,
IP = getInlineParamsFromOptLevel(Level);
else
IP = getInlineParams(PTO.InlinerThreshold);
- // For PreLinkThinLTO + SamplePGO, set hot-caller threshold to 0 to
- // disable hot callsite inline (as much as possible [1]) because it makes
+ // For PreLinkThinLTO + SamplePGO or PreLinkFullLTO + SamplePGO,
+ // set hot-caller threshold to 0 to disable hot
+ // callsite inline (as much as possible [1]) because it makes
// profile annotation in the backend inaccurate.
//
// [1] Note the cost of a function could be below zero due to erased
// prologue / epilogue.
- if (Phase == ThinOrFullLTOPhase::ThinLTOPreLink && PGOOpt &&
- PGOOpt->Action == PGOOptions::SampleUse)
+ if (isLTOPreLink(Phase) && PGOOpt && PGOOpt->Action == PGOOptions::SampleUse)
IP.HotCallSiteThreshold = 0;
if (PGOOpt)
@@ -1023,14 +1023,14 @@ PassBuilder::buildModuleInlinerPipeline(OptimizationLevel Level,
ModulePassManager MPM;
InlineParams IP = getInlineParamsFromOptLevel(Level);
- // For PreLinkThinLTO + SamplePGO, set hot-caller threshold to 0 to
- // disable hot callsite inline (as much as possible [1]) because it makes
+ // For PreLinkThinLTO + SamplePGO or PreLinkFullLTO + SamplePGO,
+ // set hot-caller threshold to 0 to disable hot
+ // callsite inline (as much as possible [1]) because it makes
// profile annotation in the backend inaccurate.
//
// [1] Note the cost of a function could be below zero due to erased
// prologue / epilogue.
- if (Phase == ThinOrFullLTOPhase::ThinLTOPreLink && PGOOpt &&
- PGOOpt->Action == PGOOptions::SampleUse)
+ if (isLTOPreLink(Phase) && PGOOpt && PGOOpt->Action == PGOOptions::SampleUse)
IP.HotCallSiteThreshold = 0;
if (PGOOpt)
diff --git a/llvm/lib/Passes/StandardInstrumentations.cpp b/llvm/lib/Passes/StandardInstrumentations.cpp
index 4c59278eaaa0..dc1dd5d9c7f4 100644
--- a/llvm/lib/Passes/StandardInstrumentations.cpp
+++ b/llvm/lib/Passes/StandardInstrumentations.cpp
@@ -2001,14 +2001,12 @@ DotCfgDiff::DotCfgDiff(StringRef Title, const FuncDataT<DCData> &Before,
for (auto &A : After.getData()) {
StringRef Label = A.getKey();
const BlockDataT<DCData> &BD = A.getValue();
- unsigned C = NodePosition.count(Label);
- if (C == 0)
+ auto It = NodePosition.find(Label);
+ if (It == NodePosition.end())
// This only exists in the after IR. Create the node.
createNode(Label, BD, AfterColour);
- else {
- assert(C == 1 && "Unexpected multiple nodes.");
- Nodes[NodePosition[Label]].setCommon(BD);
- }
+ else
+ Nodes[It->second].setCommon(BD);
// Add in the edges between the nodes (as common or only in after).
for (StringMap<std::string>::const_iterator Sink = BD.getData().begin(),
E = BD.getData().end();
diff --git a/llvm/lib/Support/APFloat.cpp b/llvm/lib/Support/APFloat.cpp
index e058010f9d26..a7b9f259bfed 100644
--- a/llvm/lib/Support/APFloat.cpp
+++ b/llvm/lib/Support/APFloat.cpp
@@ -125,6 +125,9 @@ struct fltSemantics {
/* Whether this semantics can represent signed values */
bool hasSignedRepr = true;
+
+ /* Whether the sign bit of this semantics is the most significant bit */
+ bool hasSignBitInMSB = true;
};
static constexpr fltSemantics semIEEEhalf = {15, -14, 11, 16};
@@ -144,9 +147,15 @@ static constexpr fltSemantics semFloat8E4M3B11FNUZ = {
4, -10, 4, 8, fltNonfiniteBehavior::NanOnly, fltNanEncoding::NegativeZero};
static constexpr fltSemantics semFloat8E3M4 = {3, -2, 5, 8};
static constexpr fltSemantics semFloatTF32 = {127, -126, 11, 19};
-static constexpr fltSemantics semFloat8E8M0FNU = {
- 127, -127, 1, 8, fltNonfiniteBehavior::NanOnly, fltNanEncoding::AllOnes,
- false, false};
+static constexpr fltSemantics semFloat8E8M0FNU = {127,
+ -127,
+ 1,
+ 8,
+ fltNonfiniteBehavior::NanOnly,
+ fltNanEncoding::AllOnes,
+ false,
+ false,
+ false};
static constexpr fltSemantics semFloat6E3M2FN = {
4, -2, 3, 6, fltNonfiniteBehavior::FiniteOnly};
@@ -358,6 +367,10 @@ bool APFloatBase::isIEEELikeFP(const fltSemantics &semantics) {
return SemanticsToEnum(semantics) <= S_IEEEquad;
}
+bool APFloatBase::hasSignBitInMSB(const fltSemantics &semantics) {
+ return semantics.hasSignBitInMSB;
+}
+
bool APFloatBase::isRepresentableAsNormalIn(const fltSemantics &Src,
const fltSemantics &Dst) {
// Exponent range must be larger.
diff --git a/llvm/lib/TableGen/CMakeLists.txt b/llvm/lib/TableGen/CMakeLists.txt
index 84815c773699..0f9284c8bb99 100644
--- a/llvm/lib/TableGen/CMakeLists.txt
+++ b/llvm/lib/TableGen/CMakeLists.txt
@@ -7,6 +7,7 @@ add_llvm_component_library(LLVMTableGen
Record.cpp
SetTheory.cpp
StringMatcher.cpp
+ StringToOffsetTable.cpp
TableGenBackend.cpp
TableGenBackendSkeleton.cpp
TGLexer.cpp
diff --git a/llvm/lib/TableGen/Main.cpp b/llvm/lib/TableGen/Main.cpp
index 35600bf2f1f8..ea716215e067 100644
--- a/llvm/lib/TableGen/Main.cpp
+++ b/llvm/lib/TableGen/Main.cpp
@@ -64,6 +64,15 @@ WriteIfChanged("write-if-changed", cl::desc("Only write output if it changed"));
static cl::opt<bool>
TimePhases("time-phases", cl::desc("Time phases of parser and backend"));
+namespace llvm {
+cl::opt<bool> EmitLongStrLiterals(
+ "long-string-literals",
+ cl::desc("when emitting large string tables, prefer string literals over "
+ "comma-separated char literals. This can be a readability and "
+ "compile-time performance win, but upsets some compilers"),
+ cl::Hidden, cl::init(true));
+} // end namespace llvm
+
static cl::opt<bool> NoWarnOnUnusedTemplateArgs(
"no-warn-on-unused-template-args",
cl::desc("Disable unused template argument warnings."));
diff --git a/llvm/lib/TableGen/StringToOffsetTable.cpp b/llvm/lib/TableGen/StringToOffsetTable.cpp
new file mode 100644
index 000000000000..d73b5749ad7d
--- /dev/null
+++ b/llvm/lib/TableGen/StringToOffsetTable.cpp
@@ -0,0 +1,120 @@
+//===- StringToOffsetTable.cpp - Emit a big concatenated string -*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/TableGen/StringToOffsetTable.h"
+#include "llvm/Support/FormatVariadic.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/TableGen/Main.h"
+
+using namespace llvm;
+
+unsigned StringToOffsetTable::GetOrAddStringOffset(StringRef Str,
+ bool appendZero) {
+ auto [II, Inserted] = StringOffset.insert({Str, size()});
+ if (Inserted) {
+ // Add the string to the aggregate if this is the first time found.
+ AggregateString.append(Str.begin(), Str.end());
+ if (appendZero)
+ AggregateString += '\0';
+ }
+
+ return II->second;
+}
+
+void StringToOffsetTable::EmitStringTableDef(raw_ostream &OS, const Twine &Name,
+ const Twine &Indent) const {
+ OS << formatv(R"(
+#ifdef __GNUC__
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Woverlength-strings"
+#endif
+{0}static constexpr char {1}Storage[] = )",
+ Indent, Name);
+
+ // MSVC silently miscompiles string literals longer than 64k in some
+ // circumstances. The build system sets EmitLongStrLiterals to false when it
+ // detects that it is targetting MSVC. When that option is false and the
+ // string table is longer than 64k, emit it as an array of character
+ // literals.
+ bool UseChars = !EmitLongStrLiterals && AggregateString.size() > (64 * 1024);
+ OS << (UseChars ? "{\n" : "\n");
+
+ llvm::ListSeparator LineSep(UseChars ? ",\n" : "\n");
+ llvm::SmallVector<StringRef> Strings(split(AggregateString, '\0'));
+ // We should always have an empty string at the start, and because these are
+ // null terminators rather than separators, we'll have one at the end as
+ // well. Skip the end one.
+ assert(Strings.front().empty() && "Expected empty initial string!");
+ assert(Strings.back().empty() &&
+ "Expected empty string at the end due to terminators!");
+ Strings.pop_back();
+ for (StringRef Str : Strings) {
+ OS << LineSep << Indent << " ";
+ // If we can, just emit this as a string literal to be concatenated.
+ if (!UseChars) {
+ OS << "\"";
+ OS.write_escaped(Str);
+ OS << "\\0\"";
+ continue;
+ }
+
+ llvm::ListSeparator CharSep(", ");
+ for (char C : Str) {
+ OS << CharSep << "'";
+ OS.write_escaped(StringRef(&C, 1));
+ OS << "'";
+ }
+ OS << CharSep << "'\\0'";
+ }
+ OS << LineSep << Indent << (UseChars ? "};" : " ;");
+
+ OS << formatv(R"(
+#ifdef __GNUC__
+#pragma GCC diagnostic pop
+#endif
+
+{0}static constexpr llvm::StringTable {1} =
+{0} {1}Storage;
+)",
+ Indent, Name);
+}
+
+void StringToOffsetTable::EmitString(raw_ostream &O) const {
+ // Escape the string.
+ SmallString<256> EscapedStr;
+ raw_svector_ostream(EscapedStr).write_escaped(AggregateString);
+
+ O << " \"";
+ unsigned CharsPrinted = 0;
+ for (unsigned i = 0, e = EscapedStr.size(); i != e; ++i) {
+ if (CharsPrinted > 70) {
+ O << "\"\n \"";
+ CharsPrinted = 0;
+ }
+ O << EscapedStr[i];
+ ++CharsPrinted;
+
+ // Print escape sequences all together.
+ if (EscapedStr[i] != '\\')
+ continue;
+
+ assert(i + 1 < EscapedStr.size() && "Incomplete escape sequence!");
+ if (isDigit(EscapedStr[i + 1])) {
+ assert(isDigit(EscapedStr[i + 2]) && isDigit(EscapedStr[i + 3]) &&
+ "Expected 3 digit octal escape!");
+ O << EscapedStr[++i];
+ O << EscapedStr[++i];
+ O << EscapedStr[++i];
+ CharsPrinted += 3;
+ } else {
+ O << EscapedStr[++i];
+ ++CharsPrinted;
+ }
+ }
+ O << "\"";
+}
diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
index 6bf6ce716783..68218e59961c 100644
--- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
@@ -3980,7 +3980,7 @@ bool AArch64FrameLowering::assignCalleeSavedSpillSlots(
}
if (!InsertBeforeLR)
- CSI.insert(CSI.end(), VGSaves.begin(), VGSaves.end());
+ llvm::append_range(CSI, VGSaves);
}
Register LastReg = 0;
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
index d370f8c7ff6e..74217fad82a7 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
@@ -2760,6 +2760,9 @@ bool AArch64InstrInfo::isPairableLdStInst(const MachineInstr &MI) {
case AArch64::LDRXpre:
case AArch64::LDURSWi:
case AArch64::LDRSWpre:
+ // SVE instructions.
+ case AArch64::LDR_ZXI:
+ case AArch64::STR_ZXI:
return true;
}
}
@@ -2912,6 +2915,18 @@ bool AArch64InstrInfo::isCandidateToMergeOrPair(const MachineInstr &MI) const {
return false;
}
+ // Pairing SVE fills/spills is only valid for little-endian targets that
+ // implement VLS 128.
+ switch (MI.getOpcode()) {
+ default:
+ break;
+ case AArch64::LDR_ZXI:
+ case AArch64::STR_ZXI:
+ if (!Subtarget.isLittleEndian() ||
+ Subtarget.getSVEVectorSizeInBits() != 128)
+ return false;
+ }
+
// Check if this load/store has a hint to avoid pair formation.
// MachineMemOperands hints are set by the AArch64StorePairSuppress pass.
if (isLdStPairSuppressed(MI))
diff --git a/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp b/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
index 06e633effe87..7c47492cf1a8 100644
--- a/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
+++ b/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
@@ -298,6 +298,7 @@ static unsigned getMatchingNonSExtOpcode(unsigned Opc,
case AArch64::STRXui:
case AArch64::STRXpre:
case AArch64::STURXi:
+ case AArch64::STR_ZXI:
case AArch64::LDRDui:
case AArch64::LDURDi:
case AArch64::LDRDpre:
@@ -316,6 +317,7 @@ static unsigned getMatchingNonSExtOpcode(unsigned Opc,
case AArch64::LDRSui:
case AArch64::LDURSi:
case AArch64::LDRSpre:
+ case AArch64::LDR_ZXI:
return Opc;
case AArch64::LDRSWui:
return AArch64::LDRWui;
@@ -361,6 +363,7 @@ static unsigned getMatchingPairOpcode(unsigned Opc) {
return AArch64::STPDpre;
case AArch64::STRQui:
case AArch64::STURQi:
+ case AArch64::STR_ZXI:
return AArch64::STPQi;
case AArch64::STRQpre:
return AArch64::STPQpre;
@@ -386,6 +389,7 @@ static unsigned getMatchingPairOpcode(unsigned Opc) {
return AArch64::LDPDpre;
case AArch64::LDRQui:
case AArch64::LDURQi:
+ case AArch64::LDR_ZXI:
return AArch64::LDPQi;
case AArch64::LDRQpre:
return AArch64::LDPQpre;
@@ -1225,6 +1229,16 @@ AArch64LoadStoreOpt::mergePairedInsns(MachineBasicBlock::iterator I,
(void)MIBSXTW;
LLVM_DEBUG(dbgs() << " Extend operand:\n ");
LLVM_DEBUG(((MachineInstr *)MIBSXTW)->print(dbgs()));
+ } else if (Opc == AArch64::LDR_ZXI || Opc == AArch64::STR_ZXI) {
+ // We are combining SVE fill/spill to LDP/STP, so we need to use the Q
+ // variant of the registers.
+ MachineOperand &MOp0 = MIB->getOperand(0);
+ MachineOperand &MOp1 = MIB->getOperand(1);
+ assert(AArch64::ZPRRegClass.contains(MOp0.getReg()) &&
+ AArch64::ZPRRegClass.contains(MOp1.getReg()) && "Invalid register.");
+ MOp0.setReg(AArch64::Q0 + (MOp0.getReg() - AArch64::Z0));
+ MOp1.setReg(AArch64::Q0 + (MOp1.getReg() - AArch64::Z0));
+ LLVM_DEBUG(((MachineInstr *)MIB)->print(dbgs()));
} else {
LLVM_DEBUG(((MachineInstr *)MIB)->print(dbgs()));
}
@@ -1499,6 +1513,12 @@ static bool areCandidatesToMergeOrPair(MachineInstr &FirstMI, MachineInstr &MI,
if (OpcA == OpcB)
return !AArch64InstrInfo::isPreLdSt(FirstMI);
+ // Bail out if one of the opcodes is SVE fill/spill, as we currently don't
+ // allow pairing them with other instructions.
+ if (OpcA == AArch64::LDR_ZXI || OpcA == AArch64::STR_ZXI ||
+ OpcB == AArch64::LDR_ZXI || OpcB == AArch64::STR_ZXI)
+ return false;
+
// Two pre ld/st of different opcodes cannot be merged either
if (AArch64InstrInfo::isPreLdSt(FirstMI) && AArch64InstrInfo::isPreLdSt(MI))
return false;
@@ -2659,7 +2679,8 @@ bool AArch64LoadStoreOpt::tryToPairLdStInst(MachineBasicBlock::iterator &MBBI) {
// Get the needed alignments to check them if
// ldp-aligned-only/stp-aligned-only features are opted.
uint64_t MemAlignment = MemOp->getAlign().value();
- uint64_t TypeAlignment = Align(MemOp->getSize().getValue()).value();
+ uint64_t TypeAlignment =
+ Align(MemOp->getSize().getValue().getKnownMinValue()).value();
if (MemAlignment < 2 * TypeAlignment) {
NumFailedAlignmentCheck++;
@@ -2820,11 +2841,18 @@ bool AArch64LoadStoreOpt::optimizeBlock(MachineBasicBlock &MBB,
}
// 3) Find loads and stores that can be merged into a single load or store
// pair instruction.
+ // When compiling for SVE 128, also try to combine SVE fill/spill
+ // instructions into LDP/STP.
// e.g.,
// ldr x0, [x2]
// ldr x1, [x2, #8]
// ; becomes
// ldp x0, x1, [x2]
+ // e.g.,
+ // ldr z0, [x2]
+ // ldr z1, [x2, #1, mul vl]
+ // ; becomes
+ // ldp q0, q1, [x2]
if (MBB.getParent()->getRegInfo().tracksLiveness()) {
DefinedInBB.clear();
diff --git a/llvm/lib/Target/AArch64/AArch64MIPeepholeOpt.cpp b/llvm/lib/Target/AArch64/AArch64MIPeepholeOpt.cpp
index 54347b610c50..0ddd17cee134 100644
--- a/llvm/lib/Target/AArch64/AArch64MIPeepholeOpt.cpp
+++ b/llvm/lib/Target/AArch64/AArch64MIPeepholeOpt.cpp
@@ -261,15 +261,18 @@ bool AArch64MIPeepholeOpt::visitORR(MachineInstr &MI) {
// A COPY from an FPR will become a FMOVSWr, so do so now so that we know
// that the upper bits are zero.
if (RC != &AArch64::FPR32RegClass &&
- ((RC != &AArch64::FPR64RegClass && RC != &AArch64::FPR128RegClass) ||
+ ((RC != &AArch64::FPR64RegClass && RC != &AArch64::FPR128RegClass &&
+ RC != &AArch64::ZPRRegClass) ||
SrcMI->getOperand(1).getSubReg() != AArch64::ssub))
return false;
- Register CpySrc = SrcMI->getOperand(1).getReg();
+ Register CpySrc;
if (SrcMI->getOperand(1).getSubReg() == AArch64::ssub) {
CpySrc = MRI->createVirtualRegister(&AArch64::FPR32RegClass);
BuildMI(*SrcMI->getParent(), SrcMI, SrcMI->getDebugLoc(),
TII->get(TargetOpcode::COPY), CpySrc)
.add(SrcMI->getOperand(1));
+ } else {
+ CpySrc = SrcMI->getOperand(1).getReg();
}
BuildMI(*SrcMI->getParent(), SrcMI, SrcMI->getDebugLoc(),
TII->get(AArch64::FMOVSWr), SrcMI->getOperand(0).getReg())
diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
index ca1a48690195..2b9d32f9208f 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -1492,9 +1492,8 @@ static bool isAllActivePredicate(Value *Pred) {
if (cast<ScalableVectorType>(Pred->getType())->getMinNumElements() <=
cast<ScalableVectorType>(UncastedPred->getType())->getMinNumElements())
Pred = UncastedPred;
-
- return match(Pred, m_Intrinsic<Intrinsic::aarch64_sve_ptrue>(
- m_ConstantInt<AArch64SVEPredPattern::all>()));
+ auto *C = dyn_cast<Constant>(Pred);
+ return (C && C->isAllOnesValue());
}
// Use SVE intrinsic info to eliminate redundant operands and/or canonicalise
@@ -1701,14 +1700,7 @@ static std::optional<Instruction *> instCombineSVECmpNE(InstCombiner &IC,
IntrinsicInst &II) {
LLVMContext &Ctx = II.getContext();
- // Check that the predicate is all active
- auto *Pg = dyn_cast<IntrinsicInst>(II.getArgOperand(0));
- if (!Pg || Pg->getIntrinsicID() != Intrinsic::aarch64_sve_ptrue)
- return std::nullopt;
-
- const auto PTruePattern =
- cast<ConstantInt>(Pg->getOperand(0))->getZExtValue();
- if (PTruePattern != AArch64SVEPredPattern::all)
+ if (!isAllActivePredicate(II.getArgOperand(0)))
return std::nullopt;
// Check that we have a compare of zero..
@@ -2118,8 +2110,7 @@ instCombineSVEVectorBinOp(InstCombiner &IC, IntrinsicInst &II) {
auto *OpPredicate = II.getOperand(0);
auto BinOpCode = intrinsicIDToBinOpCode(II.getIntrinsicID());
if (BinOpCode == Instruction::BinaryOpsEnd ||
- !match(OpPredicate, m_Intrinsic<Intrinsic::aarch64_sve_ptrue>(
- m_ConstantInt<AArch64SVEPredPattern::all>())))
+ !isAllActivePredicate(OpPredicate))
return std::nullopt;
auto BinOp = IC.Builder.CreateBinOpFMF(
BinOpCode, II.getOperand(1), II.getOperand(2), II.getFastMathFlags());
@@ -2641,6 +2632,13 @@ static std::optional<Instruction *> instCombineDMB(InstCombiner &IC,
return std::nullopt;
}
+static std::optional<Instruction *> instCombinePTrue(InstCombiner &IC,
+ IntrinsicInst &II) {
+ if (match(II.getOperand(0), m_ConstantInt<AArch64SVEPredPattern::all>()))
+ return IC.replaceInstUsesWith(II, Constant::getAllOnesValue(II.getType()));
+ return std::nullopt;
+}
+
std::optional<Instruction *>
AArch64TTIImpl::instCombineIntrinsic(InstCombiner &IC,
IntrinsicInst &II) const {
@@ -2744,6 +2742,8 @@ AArch64TTIImpl::instCombineIntrinsic(InstCombiner &IC,
return instCombineSVEDupqLane(IC, II);
case Intrinsic::aarch64_sve_insr:
return instCombineSVEInsr(IC, II);
+ case Intrinsic::aarch64_sve_ptrue:
+ return instCombinePTrue(IC, II);
}
return std::nullopt;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUIGroupLP.cpp b/llvm/lib/Target/AMDGPU/AMDGPUIGroupLP.cpp
index 7b4d00c8214c..153b14ce6050 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUIGroupLP.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUIGroupLP.cpp
@@ -1491,12 +1491,10 @@ bool MFMAExpInterleaveOpt::analyzeDAG(const SIInstrInfo *TII) {
return isBitPack(Opc);
});
- auto *PackPred =
- std::find_if((*TempMFMA)->Preds.begin(), (*TempMFMA)->Preds.end(),
- [&isBitPack](SDep &Pred) {
- auto Opc = Pred.getSUnit()->getInstr()->getOpcode();
- return isBitPack(Opc);
- });
+ auto *PackPred = llvm::find_if((*TempMFMA)->Preds, [&isBitPack](SDep &Pred) {
+ auto Opc = Pred.getSUnit()->getInstr()->getOpcode();
+ return isBitPack(Opc);
+ });
if (PackPred == (*TempMFMA)->Preds.end())
return false;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp b/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
index 94ecb6ba9a2b..6c01f6dd370f 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
@@ -729,6 +729,11 @@ static bool isSupportedAccessType(FixedVectorType *VecTy, Type *AccessTy,
// complicated.
if (isa<FixedVectorType>(AccessTy)) {
TypeSize AccTS = DL.getTypeStoreSize(AccessTy);
+ // If the type size and the store size don't match, we would need to do more
+ // than just bitcast to translate between an extracted/insertable subvectors
+ // and the accessed value.
+ if (AccTS * 8 != DL.getTypeSizeInBits(AccessTy))
+ return false;
TypeSize VecTS = DL.getTypeStoreSize(VecTy->getElementType());
return AccTS.isKnownMultipleOf(VecTS);
}
@@ -813,15 +818,17 @@ bool AMDGPUPromoteAllocaImpl::tryPromoteAllocaToVector(AllocaInst &Alloca) {
if (VectorType::isValidElementType(ElemTy) && NumElems > 0) {
unsigned ElementSize = DL->getTypeSizeInBits(ElemTy) / 8;
- unsigned AllocaSize = DL->getTypeStoreSize(AllocaTy);
- // Expand vector if required to match padding of inner type,
- // i.e. odd size subvectors.
- // Storage size of new vector must match that of alloca for correct
- // behaviour of byte offsets and GEP computation.
- if (NumElems * ElementSize != AllocaSize)
- NumElems = AllocaSize / ElementSize;
- if (NumElems > 0 && (AllocaSize % ElementSize) == 0)
- VectorTy = FixedVectorType::get(ElemTy, NumElems);
+ if (ElementSize > 0) {
+ unsigned AllocaSize = DL->getTypeStoreSize(AllocaTy);
+ // Expand vector if required to match padding of inner type,
+ // i.e. odd size subvectors.
+ // Storage size of new vector must match that of alloca for correct
+ // behaviour of byte offsets and GEP computation.
+ if (NumElems * ElementSize != AllocaSize)
+ NumElems = AllocaSize / ElementSize;
+ if (NumElems > 0 && (AllocaSize % ElementSize) == 0)
+ VectorTy = FixedVectorType::get(ElemTy, NumElems);
+ }
}
}
@@ -861,7 +868,14 @@ bool AMDGPUPromoteAllocaImpl::tryPromoteAllocaToVector(AllocaInst &Alloca) {
LLVM_DEBUG(dbgs() << " Attempting promotion to: " << *VectorTy << "\n");
Type *VecEltTy = VectorTy->getElementType();
- unsigned ElementSize = DL->getTypeSizeInBits(VecEltTy) / 8;
+ unsigned ElementSizeInBits = DL->getTypeSizeInBits(VecEltTy);
+ if (ElementSizeInBits != DL->getTypeAllocSizeInBits(VecEltTy)) {
+ LLVM_DEBUG(dbgs() << " Cannot convert to vector if the allocation size "
+ "does not match the type's size\n");
+ return false;
+ }
+ unsigned ElementSize = ElementSizeInBits / 8;
+ assert(ElementSize > 0);
for (auto *U : Uses) {
Instruction *Inst = cast<Instruction>(U->getUser());
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSwLowerLDS.cpp b/llvm/lib/Target/AMDGPU/AMDGPUSwLowerLDS.cpp
index 43885587ad81..ca093be61d11 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUSwLowerLDS.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUSwLowerLDS.cpp
@@ -1081,8 +1081,7 @@ void AMDGPUSwLowerLDS::lowerNonKernelLDSAccesses(
IRB.CreateLoad(IRB.getPtrTy(AMDGPUAS::GLOBAL_ADDRESS), BaseLoad);
for (GlobalVariable *GV : LDSGlobals) {
- const auto *GVIt =
- std::find(OrdereLDSGlobals.begin(), OrdereLDSGlobals.end(), GV);
+ const auto *GVIt = llvm::find(OrdereLDSGlobals, GV);
assert(GVIt != OrdereLDSGlobals.end());
uint32_t GVOffset = std::distance(OrdereLDSGlobals.begin(), GVIt);
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index 8f3138acaea0..b59e94085272 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -66,6 +66,7 @@
#include "llvm/Analysis/KernelInfo.h"
#include "llvm/Analysis/UniformityAnalysis.h"
#include "llvm/CodeGen/AtomicExpand.h"
+#include "llvm/CodeGen/BranchRelaxation.h"
#include "llvm/CodeGen/DeadMachineInstructionElim.h"
#include "llvm/CodeGen/GlobalISel/CSEInfo.h"
#include "llvm/CodeGen/GlobalISel/IRTranslator.h"
@@ -2205,7 +2206,7 @@ void AMDGPUCodeGenPassBuilder::addPreEmitPass(AddMachinePass &addPass) const {
addPass(AMDGPUInsertDelayAluPass());
}
- // TODO: addPass(BranchRelaxationPass());
+ addPass(BranchRelaxationPass());
}
bool AMDGPUCodeGenPassBuilder::isPassEnabled(const cl::opt<bool> &Opt,
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 356040da9567..bd95bcd89e18 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -4055,8 +4055,7 @@ SDValue SITargetLowering::LowerCall(CallLoweringInfo &CLI,
}
if (IsChainCallConv)
- Ops.insert(Ops.end(), ChainCallSpecialArgs.begin(),
- ChainCallSpecialArgs.end());
+ llvm::append_range(Ops, ChainCallSpecialArgs);
// Add argument registers to the end of the list so that they are known live
// into the call.
@@ -15526,9 +15525,9 @@ SDNode *SITargetLowering::adjustWritemask(MachineSDNode *&Node,
// Adjust the writemask in the node
SmallVector<SDValue, 12> Ops;
- Ops.insert(Ops.end(), Node->op_begin(), Node->op_begin() + DmaskIdx);
+ llvm::append_range(Ops, Node->ops().take_front(DmaskIdx));
Ops.push_back(DAG.getTargetConstant(NewDmask, SDLoc(Node), MVT::i32));
- Ops.insert(Ops.end(), Node->op_begin() + DmaskIdx + 1, Node->op_end());
+ llvm::append_range(Ops, Node->ops().drop_front(DmaskIdx + 1));
MVT SVT = Node->getValueType(0).getVectorElementType().getSimpleVT();
diff --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
index efdf642e29db..1673bfa15267 100644
--- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
@@ -352,7 +352,7 @@ void SIMachineFunctionInfo::shiftWwmVGPRsToLowestRange(
// Replace the register in SpillPhysVGPRs. This is needed to look for free
// lanes while spilling special SGPRs like FP, BP, etc. during PEI.
- auto *RegItr = std::find(SpillPhysVGPRs.begin(), SpillPhysVGPRs.end(), Reg);
+ auto *RegItr = llvm::find(SpillPhysVGPRs, Reg);
if (RegItr != SpillPhysVGPRs.end()) {
unsigned Idx = std::distance(SpillPhysVGPRs.begin(), RegItr);
SpillPhysVGPRs[Idx] = NewReg;
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
index 67b44d2a19ef..0611a97a3cdc 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
@@ -624,7 +624,9 @@ bool isMAC(unsigned Opc) {
Opc == AMDGPU::V_FMAC_LEGACY_F32_e64_gfx10 ||
Opc == AMDGPU::V_FMAC_DX9_ZERO_F32_e64_gfx11 ||
Opc == AMDGPU::V_FMAC_F16_e64_gfx10 ||
+ Opc == AMDGPU::V_FMAC_F16_t16_e64_gfx11 ||
Opc == AMDGPU::V_FMAC_F16_fake16_e64_gfx11 ||
+ Opc == AMDGPU::V_FMAC_F16_t16_e64_gfx12 ||
Opc == AMDGPU::V_FMAC_F16_fake16_e64_gfx12 ||
Opc == AMDGPU::V_DOT2C_F32_F16_e64_vi ||
Opc == AMDGPU::V_DOT2C_F32_BF16_e64_vi ||
diff --git a/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
index 2d45c42e724d..f8790dd063ae 100644
--- a/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
+++ b/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
@@ -2849,12 +2849,10 @@ ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) {
// Erase the entry into the DbgValueSinkCandidates for the DBG_VALUE
// that was moved.
auto DbgVar = createDebugVariableFromMachineInstr(DbgInstr);
- auto DbgIt = DbgValueSinkCandidates.find(DbgVar);
- // If the instruction is a DBG_VALUE_LIST, it may have already been
- // erased from the DbgValueSinkCandidates. Only erase if it exists in
- // the DbgValueSinkCandidates.
- if (DbgIt != DbgValueSinkCandidates.end())
- DbgValueSinkCandidates.erase(DbgIt);
+ // Erase DbgVar from DbgValueSinkCandidates if still present. If the
+ // instruction is a DBG_VALUE_LIST, it may have already been erased from
+ // DbgValueSinkCandidates.
+ DbgValueSinkCandidates.erase(DbgVar);
// Zero out original dbg instr
forEachDbgRegOperand(DbgInstr,
[&](MachineOperand &Op) { Op.setReg(0); });
diff --git a/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp b/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp
index bed15bdc274b..88bcf4cc9c6c 100644
--- a/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp
+++ b/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp
@@ -335,8 +335,36 @@ const char *ARMAsmBackend::reasonForFixupRelaxation(const MCFixup &Fixup,
return nullptr;
}
-bool ARMAsmBackend::fixupNeedsRelaxation(const MCFixup &Fixup,
- uint64_t Value) const {
+static bool needsInterworking(const MCAssembler &Asm, const MCSymbol *Sym,
+ unsigned FixupKind) {
+ // Create relocations for unconditional branches to function symbols with
+ // different execution mode in ELF binaries.
+ if (!Sym || !Sym->isELF())
+ return false;
+ unsigned Type = cast<MCSymbolELF>(Sym)->getType();
+ if ((Type == ELF::STT_FUNC || Type == ELF::STT_GNU_IFUNC)) {
+ if (Asm.isThumbFunc(Sym) && (FixupKind == ARM::fixup_arm_uncondbranch))
+ return true;
+ if (!Asm.isThumbFunc(Sym) && (FixupKind == ARM::fixup_arm_thumb_br ||
+ FixupKind == ARM::fixup_arm_thumb_bl ||
+ FixupKind == ARM::fixup_t2_condbranch ||
+ FixupKind == ARM::fixup_t2_uncondbranch))
+ return true;
+ }
+ return false;
+}
+
+bool ARMAsmBackend::fixupNeedsRelaxationAdvanced(const MCAssembler &Asm,
+ const MCFixup &Fixup,
+ const MCValue &Target,
+ uint64_t Value,
+ bool Resolved) const {
+ const MCSymbol *Sym = Target.getAddSym();
+ if (needsInterworking(Asm, Sym, Fixup.getTargetKind()))
+ return true;
+
+ if (!Resolved)
+ return true;
return reasonForFixupRelaxation(Fixup, Value);
}
@@ -973,18 +1001,8 @@ bool ARMAsmBackend::shouldForceRelocation(const MCAssembler &Asm,
}
// Create relocations for unconditional branches to function symbols with
// different execution mode in ELF binaries.
- if (Sym && Sym->isELF()) {
- unsigned Type = cast<MCSymbolELF>(Sym)->getType();
- if ((Type == ELF::STT_FUNC || Type == ELF::STT_GNU_IFUNC)) {
- if (Asm.isThumbFunc(Sym) && (FixupKind == ARM::fixup_arm_uncondbranch))
- return true;
- if (!Asm.isThumbFunc(Sym) && (FixupKind == ARM::fixup_arm_thumb_br ||
- FixupKind == ARM::fixup_arm_thumb_bl ||
- FixupKind == ARM::fixup_t2_condbranch ||
- FixupKind == ARM::fixup_t2_uncondbranch))
- return true;
- }
- }
+ if (needsInterworking(Asm, Sym, Fixup.getTargetKind()))
+ return true;
// We must always generate a relocation for BL/BLX instructions if we have
// a symbol to reference, as the linker relies on knowing the destination
// symbol's thumb-ness to get interworking right.
diff --git a/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.h b/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.h
index 4e4df16d890c..57588d989d54 100644
--- a/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.h
+++ b/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.h
@@ -54,8 +54,9 @@ public:
const char *reasonForFixupRelaxation(const MCFixup &Fixup,
uint64_t Value) const;
- bool fixupNeedsRelaxation(const MCFixup &Fixup,
- uint64_t Value) const override;
+ bool fixupNeedsRelaxationAdvanced(const MCAssembler &,
+ const MCFixup &, const MCValue &, uint64_t,
+ bool) const override;
void relaxInstruction(MCInst &Inst,
const MCSubtargetInfo &STI) const override;
diff --git a/llvm/lib/Target/ARM/MCTargetDesc/ARMUnwindOpAsm.h b/llvm/lib/Target/ARM/MCTargetDesc/ARMUnwindOpAsm.h
index ec11a78f8a7a..9d35626f449d 100644
--- a/llvm/lib/Target/ARM/MCTargetDesc/ARMUnwindOpAsm.h
+++ b/llvm/lib/Target/ARM/MCTargetDesc/ARMUnwindOpAsm.h
@@ -14,6 +14,7 @@
#ifndef LLVM_LIB_TARGET_ARM_MCTARGETDESC_ARMUNWINDOPASM_H
#define LLVM_LIB_TARGET_ARM_MCTARGETDESC_ARMUNWINDOPASM_H
+#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallVector.h"
#include <cstddef>
#include <cstdint>
@@ -60,7 +61,7 @@ public:
/// Emit unwind raw opcodes
void EmitRaw(const SmallVectorImpl<uint8_t> &Opcodes) {
- Ops.insert(Ops.end(), Opcodes.begin(), Opcodes.end());
+ llvm::append_range(Ops, Opcodes);
OpBegins.push_back(OpBegins.back() + Opcodes.size());
}
diff --git a/llvm/lib/Target/CSKY/MCTargetDesc/CSKYAsmBackend.cpp b/llvm/lib/Target/CSKY/MCTargetDesc/CSKYAsmBackend.cpp
index ea7968f01ee4..792a55555a0d 100644
--- a/llvm/lib/Target/CSKY/MCTargetDesc/CSKYAsmBackend.cpp
+++ b/llvm/lib/Target/CSKY/MCTargetDesc/CSKYAsmBackend.cpp
@@ -171,16 +171,13 @@ static uint64_t adjustFixupValue(const MCFixup &Fixup, uint64_t Value,
}
}
-bool CSKYAsmBackend::fixupNeedsRelaxationAdvanced(const MCAssembler &Asm,
+bool CSKYAsmBackend::fixupNeedsRelaxationAdvanced(const MCAssembler &,
const MCFixup &Fixup,
- bool Resolved, uint64_t Value,
- const MCRelaxableFragment *DF,
- const bool WasForced) const {
- // Return true if the symbol is actually unresolved.
- // Resolved could be always false when shouldForceRelocation return true.
- // We use !WasForced to indicate that the symbol is unresolved and not forced
- // by shouldForceRelocation.
- if (!Resolved && !WasForced)
+ const MCValue &,
+ uint64_t Value,
+ bool Resolved) const {
+ // Return true if the symbol is unresolved.
+ if (!Resolved)
return true;
int64_t Offset = int64_t(Value);
diff --git a/llvm/lib/Target/CSKY/MCTargetDesc/CSKYAsmBackend.h b/llvm/lib/Target/CSKY/MCTargetDesc/CSKYAsmBackend.h
index 07c5065ea4b5..3ce2f37212dc 100644
--- a/llvm/lib/Target/CSKY/MCTargetDesc/CSKYAsmBackend.h
+++ b/llvm/lib/Target/CSKY/MCTargetDesc/CSKYAsmBackend.h
@@ -39,11 +39,9 @@ public:
bool mayNeedRelaxation(const MCInst &Inst,
const MCSubtargetInfo &STI) const override;
- bool fixupNeedsRelaxationAdvanced(const MCAssembler &Asm,
- const MCFixup &Fixup, bool Resolved,
- uint64_t Value,
- const MCRelaxableFragment *DF,
- const bool WasForced) const override;
+ bool fixupNeedsRelaxationAdvanced(const MCAssembler &,
+ const MCFixup &, const MCValue &, uint64_t,
+ bool) const override;
bool writeNopData(raw_ostream &OS, uint64_t Count,
const MCSubtargetInfo *STI) const override;
diff --git a/llvm/lib/Target/Hexagon/HexagonTfrCleanup.cpp b/llvm/lib/Target/Hexagon/HexagonTfrCleanup.cpp
index 0c31e274a4ee..71bdfc6657c5 100644
--- a/llvm/lib/Target/Hexagon/HexagonTfrCleanup.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonTfrCleanup.cpp
@@ -160,11 +160,8 @@ bool HexagonTfrCleanup::updateImmMap(MachineInstr *MI, ImmediateMap &IMap) {
if (!Mo->isReg() || !Mo->isDef())
continue;
unsigned R = Mo->getReg();
- for (MCRegAliasIterator AR(R, TRI, true); AR.isValid(); ++AR) {
- ImmediateMap::iterator F = IMap.find(*AR);
- if (F != IMap.end())
- IMap.erase(F);
- }
+ for (MCRegAliasIterator AR(R, TRI, true); AR.isValid(); ++AR)
+ IMap.erase(*AR);
}
return true;
}
diff --git a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp
index 1c1454a41cc0..ae1dac57d929 100644
--- a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp
+++ b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp
@@ -41,6 +41,7 @@ class HexagonAsmBackend : public MCAsmBackend {
uint8_t OSABI;
StringRef CPU;
mutable uint64_t relaxedCnt;
+ mutable const MCInst *RelaxedMCB = nullptr;
std::unique_ptr <MCInstrInfo> MCII;
std::unique_ptr <MCInst *> RelaxTarget;
MCInst * Extender;
@@ -560,17 +561,17 @@ public:
/// \param Inst - The instruction to test.
bool mayNeedRelaxation(MCInst const &Inst,
const MCSubtargetInfo &STI) const override {
+ RelaxedMCB = &Inst;
return true;
}
/// fixupNeedsRelaxation - Target specific predicate for whether a given
/// fixup requires the associated instruction to be relaxed.
bool fixupNeedsRelaxationAdvanced(const MCAssembler &Asm,
- const MCFixup &Fixup, bool Resolved,
+ const MCFixup &Fixup, const MCValue &,
uint64_t Value,
- const MCRelaxableFragment *DF,
- const bool WasForced) const override {
- MCInst const &MCB = DF->getInst();
+ bool Resolved) const override {
+ MCInst const &MCB = *RelaxedMCB;
assert(HexagonMCInstrInfo::isBundle(MCB));
*RelaxTarget = nullptr;
diff --git a/llvm/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp b/llvm/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp
index 5f7974ab6cae..0c295997ab52 100644
--- a/llvm/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp
+++ b/llvm/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp
@@ -602,14 +602,6 @@ bool MipsAsmBackend::shouldForceRelocation(const MCAssembler &Asm,
}
}
-bool MipsAsmBackend::isMicroMips(const MCSymbol *Sym) const {
- if (const auto *ElfSym = dyn_cast<const MCSymbolELF>(Sym)) {
- if (ElfSym->getOther() & ELF::STO_MIPS_MICROMIPS)
- return true;
- }
- return false;
-}
-
namespace {
class WindowsMipsAsmBackend : public MipsAsmBackend {
diff --git a/llvm/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.h b/llvm/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.h
index 9752615422b6..1e8504aaf2aa 100644
--- a/llvm/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.h
+++ b/llvm/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.h
@@ -54,8 +54,6 @@ public:
bool shouldForceRelocation(const MCAssembler &Asm, const MCFixup &Fixup,
const MCValue &Target,
const MCSubtargetInfo *STI) override;
-
- bool isMicroMips(const MCSymbol *Sym) const override;
}; // class MipsAsmBackend
} // namespace
diff --git a/llvm/lib/Target/NVPTX/NVPTXMachineFunctionInfo.h b/llvm/lib/Target/NVPTX/NVPTXMachineFunctionInfo.h
index d9beab7ec42e..8feae341893a 100644
--- a/llvm/lib/Target/NVPTX/NVPTXMachineFunctionInfo.h
+++ b/llvm/lib/Target/NVPTX/NVPTXMachineFunctionInfo.h
@@ -50,8 +50,7 @@ public:
/// Check if the symbol has a mapping. Having a mapping means the handle is
/// replaced with a reference
bool checkImageHandleSymbol(StringRef Symbol) const {
- return ImageHandleList.end() !=
- std::find(ImageHandleList.begin(), ImageHandleList.end(), Symbol);
+ return llvm::is_contained(ImageHandleList, Symbol);
}
};
}
diff --git a/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp b/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp
index 55f1a90b2a01..f246aee4da39 100644
--- a/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp
+++ b/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp
@@ -539,17 +539,16 @@ public:
// True if operand is a symbol with no modifiers, or a constant with no
// modifiers and isShiftedInt<N-1, 1>(Op).
template <int N> bool isBareSimmNLsb0() const {
- int64_t Imm;
- RISCVMCExpr::Specifier VK = RISCVMCExpr::VK_None;
if (!isImm())
return false;
- bool IsConstantImm = evaluateConstantImm(getImm(), Imm);
- bool IsValid;
- if (!IsConstantImm)
- IsValid = RISCVAsmParser::classifySymbolRef(getImm(), VK);
- else
- IsValid = isShiftedInt<N - 1, 1>(fixImmediateForRV32(Imm, isRV64Imm()));
- return IsValid && VK == RISCVMCExpr::VK_None;
+
+ int64_t Imm;
+ if (evaluateConstantImm(getImm(), Imm))
+ return isShiftedInt<N - 1, 1>(fixImmediateForRV32(Imm, isRV64Imm()));
+
+ RISCVMCExpr::Specifier VK = RISCVMCExpr::VK_None;
+ return RISCVAsmParser::classifySymbolRef(getImm(), VK) &&
+ VK == RISCVMCExpr::VK_None;
}
// True if operand is a symbol with no modifiers, or a constant with no
@@ -2079,9 +2078,6 @@ ParseStatus RISCVAsmParser::parseBareSymbol(OperandVector &Operands) {
SMLoc E = SMLoc::getFromPointer(S.getPointer() + Identifier.size());
- if (Identifier.consume_back("@plt"))
- return Error(getLoc(), "'@plt' operand not valid for instruction");
-
MCSymbol *Sym = getContext().getOrCreateSymbol(Identifier);
if (Sym->isVariable()) {
@@ -2129,8 +2125,9 @@ ParseStatus RISCVAsmParser::parseCallSymbol(OperandVector &Operands) {
Lex();
Lex();
StringRef PLT;
+ SMLoc Loc = getLoc();
if (getParser().parseIdentifier(PLT) || PLT != "plt")
- return ParseStatus::Failure;
+ return Error(Loc, "@ (except the deprecated/ignored @plt) is disallowed");
} else if (!getLexer().peekTok().is(AsmToken::EndOfStatement)) {
// Avoid parsing the register in `call rd, foo` as a call symbol.
return ParseStatus::NoMatch;
diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp
index 49c8c6957aa3..b36b8bd3fb43 100644
--- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp
+++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp
@@ -141,20 +141,19 @@ bool RISCVAsmBackend::shouldForceRelocation(const MCAssembler &Asm,
return STI->hasFeature(RISCV::FeatureRelax) || ForceRelocs;
}
-bool RISCVAsmBackend::fixupNeedsRelaxationAdvanced(
- const MCAssembler &Asm, const MCFixup &Fixup, bool Resolved, uint64_t Value,
- const MCRelaxableFragment *DF, const bool WasForced) const {
+bool RISCVAsmBackend::fixupNeedsRelaxationAdvanced(const MCAssembler &,
+ const MCFixup &Fixup,
+ const MCValue &,
+ uint64_t Value,
+ bool Resolved) const {
if (!RelaxBranches)
return false;
int64_t Offset = int64_t(Value);
unsigned Kind = Fixup.getTargetKind();
- // Return true if the symbol is actually unresolved.
- // Resolved could be always false when shouldForceRelocation return true.
- // We use !WasForced to indicate that the symbol is unresolved and not forced
- // by shouldForceRelocation.
- if (!Resolved && !WasForced)
+ // Return true if the symbol is unresolved.
+ if (!Resolved)
return true;
switch (Kind) {
@@ -594,12 +593,9 @@ static uint64_t adjustFixupValue(const MCFixup &Fixup, uint64_t Value,
}
}
-bool RISCVAsmBackend::evaluateTargetFixup(const MCAssembler &Asm,
- const MCFixup &Fixup,
- const MCFragment *DF,
- const MCValue &Target,
- const MCSubtargetInfo *STI,
- uint64_t &Value, bool &WasForced) {
+bool RISCVAsmBackend::evaluateTargetFixup(
+ const MCAssembler &Asm, const MCFixup &Fixup, const MCFragment *DF,
+ const MCValue &Target, const MCSubtargetInfo *STI, uint64_t &Value) {
const MCFixup *AUIPCFixup;
const MCFragment *AUIPCDF;
MCValue AUIPCTarget;
@@ -646,12 +642,7 @@ bool RISCVAsmBackend::evaluateTargetFixup(const MCAssembler &Asm,
Value = Asm.getSymbolOffset(SA) + AUIPCTarget.getConstant();
Value -= Asm.getFragmentOffset(*AUIPCDF) + AUIPCFixup->getOffset();
- if (shouldForceRelocation(Asm, *AUIPCFixup, AUIPCTarget, STI)) {
- WasForced = true;
- return false;
- }
-
- return true;
+ return !shouldForceRelocation(Asm, *AUIPCFixup, AUIPCTarget, STI);
}
bool RISCVAsmBackend::handleAddSubRelocations(const MCAssembler &Asm,
diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.h b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.h
index f5e8d340d9bc..5d585b4efc11 100644
--- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.h
+++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.h
@@ -49,8 +49,8 @@ public:
bool evaluateTargetFixup(const MCAssembler &Asm, const MCFixup &Fixup,
const MCFragment *DF, const MCValue &Target,
- const MCSubtargetInfo *STI, uint64_t &Value,
- bool &WasForced) override;
+ const MCSubtargetInfo *STI,
+ uint64_t &Value) override;
bool handleAddSubRelocations(const MCAssembler &Asm, const MCFragment &F,
const MCFixup &Fixup, const MCValue &Target,
@@ -68,12 +68,9 @@ public:
const MCValue &Target,
const MCSubtargetInfo *STI) override;
- bool fixupNeedsRelaxationAdvanced(const MCAssembler &Asm,
- const MCFixup &Fixup, bool Resolved,
- uint64_t Value,
- const MCRelaxableFragment *DF,
- const bool WasForced) const override;
-
+ bool fixupNeedsRelaxationAdvanced(const MCAssembler &,
+ const MCFixup &, const MCValue &, uint64_t,
+ bool) const override;
std::optional<MCFixupKind> getFixupKind(StringRef Name) const override;
diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.cpp b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.cpp
index 83ecf805489c..972bee5a0aa7 100644
--- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.cpp
+++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.cpp
@@ -34,6 +34,10 @@ static cl::opt<bool>
cl::desc("Disable the emission of assembler pseudo instructions"),
cl::init(false), cl::Hidden);
+static cl::opt<bool> EmitX8AsFP("riscv-emit-x8-as-fp",
+ cl::desc("Emit x8 as fp instead of s0"),
+ cl::init(false), cl::Hidden);
+
// Print architectural register names rather than the ABI names (such as x2
// instead of sp).
// TODO: Make RISCVInstPrinter::getRegisterName non-static so that this can a
@@ -54,6 +58,11 @@ bool RISCVInstPrinter::applyTargetSpecificCLOption(StringRef Opt) {
ArchRegNames = true;
return true;
}
+ if (Opt == "emit-x8-as-fp") {
+ if (!ArchRegNames)
+ EmitX8AsFP = true;
+ return true;
+ }
return false;
}
@@ -311,6 +320,13 @@ void RISCVInstPrinter::printVMaskReg(const MCInst *MI, unsigned OpNo,
}
const char *RISCVInstPrinter::getRegisterName(MCRegister Reg) {
+ // When PrintAliases is enabled, and EmitX8AsFP is enabled, x8 will be printed
+ // as fp instead of s0. Note that these similar registers are not replaced:
+ // - X8_H: used for f16 register in zhinx
+ // - X8_W: used for f32 register in zfinx
+ // - X8_X9: used for GPR Pair
+ if (!ArchRegNames && EmitX8AsFP && Reg == RISCV::X8)
+ return "fp";
return getRegisterName(Reg, ArchRegNames ? RISCV::NoRegAltName
: RISCV::ABIRegAltName);
}
diff --git a/llvm/lib/Target/SPIRV/SPIRVCommandLine.cpp b/llvm/lib/Target/SPIRV/SPIRVCommandLine.cpp
index 53e88aa48556..86702bbe58f0 100644
--- a/llvm/lib/Target/SPIRV/SPIRVCommandLine.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVCommandLine.cpp
@@ -125,8 +125,7 @@ bool SPIRVExtensionsParser::parse(cl::Option &O, StringRef ArgName,
if (Token.starts_with("+")) {
EnabledExtensions.insert(NameValuePair->second);
} else if (EnabledExtensions.count(NameValuePair->second)) {
- if (std::find(Tokens.begin(), Tokens.end(), "+" + ExtensionName.str()) !=
- Tokens.end())
+ if (llvm::is_contained(Tokens, "+" + ExtensionName.str()))
return O.error(
"Extension cannot be allowed and disallowed at the same time: " +
ExtensionName.str());
diff --git a/llvm/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmTypeCheck.cpp b/llvm/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmTypeCheck.cpp
index 156b40eb43a3..68b24dbe9f00 100644
--- a/llvm/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmTypeCheck.cpp
+++ b/llvm/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmTypeCheck.cpp
@@ -55,7 +55,7 @@ void WebAssemblyAsmTypeCheck::funcDecl(const wasm::WasmSignature &Sig) {
void WebAssemblyAsmTypeCheck::localDecl(
const SmallVectorImpl<wasm::ValType> &Locals) {
- LocalTypes.insert(LocalTypes.end(), Locals.begin(), Locals.end());
+ llvm::append_range(LocalTypes, Locals);
}
void WebAssemblyAsmTypeCheck::dumpTypeStack(Twine Msg) {
@@ -357,8 +357,7 @@ bool WebAssemblyAsmTypeCheck::checkTryTable(SMLoc ErrorLoc,
Opcode == wasm::WASM_OPCODE_CATCH_REF) {
if (!getSignature(ErrorLoc, Inst.getOperand(OpIdx++),
wasm::WASM_SYMBOL_TYPE_TAG, Sig))
- SentTypes.insert(SentTypes.end(), Sig->Params.begin(),
- Sig->Params.end());
+ llvm::append_range(SentTypes, Sig->Params);
else
Error = true;
}
diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp b/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp
index 767818107de8..cb23487e6fbe 100644
--- a/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp
+++ b/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp
@@ -177,11 +177,9 @@ public:
bool mayNeedRelaxation(const MCInst &Inst,
const MCSubtargetInfo &STI) const override;
- bool fixupNeedsRelaxationAdvanced(const MCAssembler &Asm,
- const MCFixup &Fixup, bool Resolved,
- uint64_t Value,
- const MCRelaxableFragment *DF,
- const bool WasForced) const override;
+ bool fixupNeedsRelaxationAdvanced(const MCAssembler &,
+ const MCFixup &, const MCValue &, uint64_t,
+ bool) const override;
void relaxInstruction(MCInst &Inst,
const MCSubtargetInfo &STI) const override;
@@ -731,22 +729,22 @@ bool X86AsmBackend::mayNeedRelaxation(const MCInst &MI,
MI.getOperand(MI.getNumOperands() - 1 - SkipOperands).isExpr());
}
-bool X86AsmBackend::fixupNeedsRelaxationAdvanced(const MCAssembler &Asm,
+bool X86AsmBackend::fixupNeedsRelaxationAdvanced(const MCAssembler &,
const MCFixup &Fixup,
- bool Resolved, uint64_t Value,
- const MCRelaxableFragment *DF,
- const bool WasForced) const {
+ const MCValue &Target,
+ uint64_t Value,
+ bool Resolved) const {
// If resolved, relax if the value is too big for a (signed) i8.
+ //
+ // Currently, `jmp local@plt` relaxes JMP even if the offset is small,
+ // different from gas.
if (Resolved)
- return !isInt<8>(Value);
+ return !isInt<8>(Value) || Target.getSpecifier();
// Otherwise, relax unless there is a @ABS8 specifier.
- if (Fixup.getKind() == FK_Data_1) {
- MCValue Target;
- if (Fixup.getValue()->evaluateAsRelocatable(Target, &Asm) &&
- Target.getAddSym() && Target.getSpecifier() == X86MCExpr::VK_ABS8)
- return false;
- }
+ if (Fixup.getKind() == FK_Data_1 && Target.getAddSym() &&
+ Target.getSpecifier() == X86MCExpr::VK_ABS8)
+ return false;
return true;
}
diff --git a/llvm/lib/Target/X86/X86.td b/llvm/lib/Target/X86/X86.td
index 38761e1fd7ee..577428cad6d6 100644
--- a/llvm/lib/Target/X86/X86.td
+++ b/llvm/lib/Target/X86/X86.td
@@ -338,7 +338,7 @@ def FeatureAVX10_1 : SubtargetFeature<"avx10.1-256", "HasAVX10_1", "true",
"Support AVX10.1 up to 256-bit instruction",
[FeatureCDI, FeatureVBMI, FeatureIFMA, FeatureVNNI,
FeatureBF16, FeatureVPOPCNTDQ, FeatureVBMI2, FeatureBITALG,
- FeatureVAES, FeatureVPCLMULQDQ, FeatureFP16]>;
+ FeatureFP16]>;
def FeatureAVX10_1_512 : SubtargetFeature<"avx10.1-512", "HasAVX10_1_512", "true",
"Support AVX10.1 up to 512-bit instruction",
[FeatureAVX10_1, FeatureEVEX512]>;
diff --git a/llvm/lib/TargetParser/X86TargetParser.cpp b/llvm/lib/TargetParser/X86TargetParser.cpp
index e4b7ed7cf9b6..2ae6dd6b3d1e 100644
--- a/llvm/lib/TargetParser/X86TargetParser.cpp
+++ b/llvm/lib/TargetParser/X86TargetParser.cpp
@@ -637,8 +637,7 @@ constexpr FeatureBitset ImpliedFeaturesAVXVNNI = FeatureAVX2;
constexpr FeatureBitset ImpliedFeaturesAVX10_1 =
FeatureAVX512CD | FeatureAVX512VBMI | FeatureAVX512IFMA |
FeatureAVX512VNNI | FeatureAVX512BF16 | FeatureAVX512VPOPCNTDQ |
- FeatureAVX512VBMI2 | FeatureAVX512BITALG | FeatureVAES | FeatureVPCLMULQDQ |
- FeatureAVX512FP16;
+ FeatureAVX512VBMI2 | FeatureAVX512BITALG | FeatureAVX512FP16;
constexpr FeatureBitset ImpliedFeaturesAVX10_1_512 =
FeatureAVX10_1 | FeatureEVEX512;
constexpr FeatureBitset ImpliedFeaturesAVX10_2 = FeatureAVX10_1;
diff --git a/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp b/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp
index df1f6fddeba6..a5e0251277d8 100644
--- a/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp
+++ b/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp
@@ -3807,8 +3807,7 @@ void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::identifyClones(
// Make sure we don't pick a previously existing caller edge of this
// Node, which would be processed on a different iteration of the
// outer loop over the saved CallerEdges.
- if (std::find(CallerEdges.begin(), CallerEdges.end(), E) !=
- CallerEdges.end())
+ if (llvm::is_contained(CallerEdges, E))
continue;
// The CallerAllocTypeForAlloc and CalleeEdgeAllocTypesForCallerEdge
// are updated further below for all cases where we just invoked
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
index 19bf81137aab..f1b225c0f238 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
@@ -2645,7 +2645,8 @@ Instruction *InstCombinerImpl::visitAnd(BinaryOperator &I) {
!Builder.GetInsertBlock()->getParent()->hasFnAttribute(
Attribute::NoImplicitFloat)) {
Type *EltTy = CastOp->getType()->getScalarType();
- if (EltTy->isFloatingPointTy() && EltTy->isIEEE()) {
+ if (EltTy->isFloatingPointTy() &&
+ APFloat::hasSignBitInMSB(EltTy->getFltSemantics())) {
Value *FAbs = Builder.CreateUnaryIntrinsic(Intrinsic::fabs, CastOp);
return new BitCastInst(FAbs, I.getType());
}
@@ -4058,7 +4059,8 @@ Instruction *InstCombinerImpl::visitOr(BinaryOperator &I) {
!Builder.GetInsertBlock()->getParent()->hasFnAttribute(
Attribute::NoImplicitFloat)) {
Type *EltTy = CastOp->getType()->getScalarType();
- if (EltTy->isFloatingPointTy() && EltTy->isIEEE()) {
+ if (EltTy->isFloatingPointTy() &&
+ APFloat::hasSignBitInMSB(EltTy->getFltSemantics())) {
Value *FAbs = Builder.CreateUnaryIntrinsic(Intrinsic::fabs, CastOp);
Value *FNegFAbs = Builder.CreateFNeg(FAbs);
return new BitCastInst(FNegFAbs, I.getType());
@@ -4860,7 +4862,8 @@ Instruction *InstCombinerImpl::visitXor(BinaryOperator &I) {
!Builder.GetInsertBlock()->getParent()->hasFnAttribute(
Attribute::NoImplicitFloat)) {
Type *EltTy = CastOp->getType()->getScalarType();
- if (EltTy->isFloatingPointTy() && EltTy->isIEEE()) {
+ if (EltTy->isFloatingPointTy() &&
+ APFloat::hasSignBitInMSB(EltTy->getFltSemantics())) {
Value *FNeg = Builder.CreateFNeg(CastOp);
return new BitCastInst(FNeg, I.getType());
}
diff --git a/llvm/lib/Transforms/Scalar/LICM.cpp b/llvm/lib/Transforms/Scalar/LICM.cpp
index d872a381050c..889b43a843be 100644
--- a/llvm/lib/Transforms/Scalar/LICM.cpp
+++ b/llvm/lib/Transforms/Scalar/LICM.cpp
@@ -2294,10 +2294,14 @@ collectPromotionCandidates(MemorySSA *MSSA, AliasAnalysis *AA, Loop *L) {
AliasSetTracker AST(BatchAA);
auto IsPotentiallyPromotable = [L](const Instruction *I) {
- if (const auto *SI = dyn_cast<StoreInst>(I))
- return L->isLoopInvariant(SI->getPointerOperand());
- if (const auto *LI = dyn_cast<LoadInst>(I))
- return L->isLoopInvariant(LI->getPointerOperand());
+ if (const auto *SI = dyn_cast<StoreInst>(I)) {
+ const Value *PtrOp = SI->getPointerOperand();
+ return !isa<ConstantData>(PtrOp) && L->isLoopInvariant(PtrOp);
+ }
+ if (const auto *LI = dyn_cast<LoadInst>(I)) {
+ const Value *PtrOp = LI->getPointerOperand();
+ return !isa<ConstantData>(PtrOp) && L->isLoopInvariant(PtrOp);
+ }
return false;
};
diff --git a/llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp b/llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp
index 60619dbe2f58..950344722b5c 100644
--- a/llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp
+++ b/llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp
@@ -754,9 +754,7 @@ public:
/// Erase \p Inst from both ShapeMap (if an entry exists) and erase \p Inst
/// itself.
void eraseFromParentAndRemoveFromShapeMap(Instruction *Inst) {
- auto Iter = ShapeMap.find(Inst);
- if (Iter != ShapeMap.end())
- ShapeMap.erase(Iter);
+ ShapeMap.erase(Inst);
Inst->eraseFromParent();
}
diff --git a/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp b/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp
index 2f5cf45a1d3d..f98a69380464 100644
--- a/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp
+++ b/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp
@@ -3287,20 +3287,21 @@ static void computeLiveInValues(DominatorTree &DT, Function &F,
// Seed the liveness for each individual block
for (BasicBlock &BB : F) {
Data.KillSet[&BB] = computeKillSet(&BB, GC);
- Data.LiveSet[&BB].clear();
- computeLiveInValues(BB.rbegin(), BB.rend(), Data.LiveSet[&BB], GC);
+ auto &LiveSet = Data.LiveSet[&BB];
+ LiveSet.clear();
+ computeLiveInValues(BB.rbegin(), BB.rend(), LiveSet, GC);
#ifndef NDEBUG
for (Value *Kill : Data.KillSet[&BB])
assert(!Data.LiveSet[&BB].count(Kill) && "live set contains kill");
#endif
- Data.LiveOut[&BB] = SetVector<Value *>();
- computeLiveOutSeed(&BB, Data.LiveOut[&BB], GC);
- Data.LiveIn[&BB] = Data.LiveSet[&BB];
- Data.LiveIn[&BB].set_union(Data.LiveOut[&BB]);
- Data.LiveIn[&BB].set_subtract(Data.KillSet[&BB]);
- if (!Data.LiveIn[&BB].empty())
+ auto &Out = Data.LiveOut[&BB] = SetVector<Value *>();
+ computeLiveOutSeed(&BB, Out, GC);
+ auto &In = Data.LiveIn[&BB] = Data.LiveSet[&BB];
+ In.set_union(Out);
+ In.set_subtract(Data.KillSet[&BB]);
+ if (!In.empty())
Worklist.insert_range(predecessors(&BB));
}
diff --git a/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp b/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp
index 41bf202230e2..e25ec6c3b2a5 100644
--- a/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp
+++ b/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp
@@ -111,20 +111,23 @@ Value *SCEVExpander::ReuseOrCreateCast(Value *V, Type *Ty,
Value *Ret = nullptr;
- // Check to see if there is already a cast!
- for (User *U : V->users()) {
- if (U->getType() != Ty)
- continue;
- CastInst *CI = dyn_cast<CastInst>(U);
- if (!CI || CI->getOpcode() != Op)
- continue;
+ if (!isa<Constant>(V)) {
+ // Check to see if there is already a cast!
+ for (User *U : V->users()) {
+ if (U->getType() != Ty)
+ continue;
+ CastInst *CI = dyn_cast<CastInst>(U);
+ if (!CI || CI->getOpcode() != Op)
+ continue;
- // Found a suitable cast that is at IP or comes before IP. Use it. Note that
- // the cast must also properly dominate the Builder's insertion point.
- if (IP->getParent() == CI->getParent() && &*BIP != CI &&
- (&*IP == CI || CI->comesBefore(&*IP))) {
- Ret = CI;
- break;
+ // Found a suitable cast that is at IP or comes before IP. Use it. Note
+ // that the cast must also properly dominate the Builder's insertion
+ // point.
+ if (IP->getParent() == CI->getParent() && &*BIP != CI &&
+ (&*IP == CI || CI->comesBefore(&*IP))) {
+ Ret = CI;
+ break;
+ }
}
}
diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
index eac7e7c209c9..7f53aa7d4f73 100644
--- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -4087,9 +4087,7 @@ bool llvm::foldBranchToCommonDest(BranchInst *BI, DomTreeUpdater *DTU,
Instruction *Cond = dyn_cast<Instruction>(BI->getCondition());
- if (!Cond ||
- (!isa<CmpInst>(Cond) && !isa<BinaryOperator>(Cond) &&
- !isa<SelectInst>(Cond)) ||
+ if (!Cond || !isa<CmpInst, BinaryOperator, SelectInst, TruncInst>(Cond) ||
Cond->getParent() != BB || !Cond->hasOneUse())
return false;
diff --git a/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp b/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp
index 2d0027d97601..4e37c587dc97 100644
--- a/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp
+++ b/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp
@@ -3002,6 +3002,9 @@ Value *LibCallSimplifier::optimizeSinCosPi(CallInst *CI, bool IsSin, IRBuilderBa
return nullptr;
Value *Arg = CI->getArgOperand(0);
+ if (isa<ConstantData>(Arg))
+ return nullptr;
+
SmallVector<CallInst *, 1> SinCalls;
SmallVector<CallInst *, 1> CosCalls;
SmallVector<CallInst *, 1> SinCosCalls;
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 0acca63503af..a28cda9fe62b 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -1373,21 +1373,6 @@ public:
return false;
}
- /// Returns true if we're required to use a scalar epilogue for at least
- /// the final iteration of the original loop for all VFs in \p Range.
- /// A scalar epilogue must either be required for all VFs in \p Range or for
- /// none.
- bool requiresScalarEpilogue(VFRange Range) const {
- auto RequiresScalarEpilogue = [this](ElementCount VF) {
- return requiresScalarEpilogue(VF.isVector());
- };
- bool IsRequired = all_of(Range, RequiresScalarEpilogue);
- assert(
- (IsRequired || none_of(Range, RequiresScalarEpilogue)) &&
- "all VFs in range must agree on whether a scalar epilogue is required");
- return IsRequired;
- }
-
/// Returns true if a scalar epilogue is not allowed due to optsize or a
/// loop hint annotation.
bool isScalarEpilogueAllowed() const {
diff --git a/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h b/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h
index 2cd23efcf3ea..7934c47ee5ba 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h
@@ -84,13 +84,15 @@ struct deferredval_ty {
/// whichever value m_VPValue(X) populated.
inline deferredval_ty m_Deferred(VPValue *const &V) { return V; }
-/// Match a specified integer value or vector of all elements of that
-/// value. \p BitWidth optionally specifies the bitwidth the matched constant
-/// must have. If it is 0, the matched constant can have any bitwidth.
-template <unsigned BitWidth = 0> struct specific_intval {
- APInt Val;
+/// Match an integer constant or vector of constants if Pred::isValue returns
+/// true for the APInt. \p BitWidth optionally specifies the bitwidth the
+/// matched constant must have. If it is 0, the matched constant can have any
+/// bitwidth.
+template <typename Pred, unsigned BitWidth = 0> struct int_pred_ty {
+ Pred P;
- specific_intval(APInt V) : Val(std::move(V)) {}
+ int_pred_ty(Pred P) : P(std::move(P)) {}
+ int_pred_ty() : P() {}
bool match(VPValue *VPV) const {
if (!VPV->isLiveIn())
@@ -108,17 +110,45 @@ template <unsigned BitWidth = 0> struct specific_intval {
if (BitWidth != 0 && CI->getBitWidth() != BitWidth)
return false;
- return APInt::isSameValue(CI->getValue(), Val);
+ return P.isValue(CI->getValue());
}
};
+/// Match a specified integer value or vector of all elements of that
+/// value. \p BitWidth optionally specifies the bitwidth the matched constant
+/// must have. If it is 0, the matched constant can have any bitwidth.
+struct is_specific_int {
+ APInt Val;
+
+ is_specific_int(APInt Val) : Val(std::move(Val)) {}
+
+ bool isValue(const APInt &C) const { return APInt::isSameValue(Val, C); }
+};
+
+template <unsigned Bitwidth = 0>
+using specific_intval = int_pred_ty<is_specific_int, Bitwidth>;
+
inline specific_intval<0> m_SpecificInt(uint64_t V) {
- return specific_intval<0>(APInt(64, V));
+ return specific_intval<0>(is_specific_int(APInt(64, V)));
}
-inline specific_intval<1> m_False() { return specific_intval<1>(APInt(64, 0)); }
+inline specific_intval<1> m_False() {
+ return specific_intval<1>(is_specific_int(APInt(64, 0)));
+}
-inline specific_intval<1> m_True() { return specific_intval<1>(APInt(64, 1)); }
+inline specific_intval<1> m_True() {
+ return specific_intval<1>(is_specific_int(APInt(64, 1)));
+}
+
+struct is_all_ones {
+ bool isValue(const APInt &C) const { return C.isAllOnes(); }
+};
+
+/// Match an integer or vector with all bits set.
+/// For vectors, this includes constants with undefined elements.
+inline int_pred_ty<is_all_ones> m_AllOnes() {
+ return int_pred_ty<is_all_ones>();
+}
/// Matching combinators
template <typename LTy, typename RTy> struct match_combine_or {
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index 67a55aa67c97..dc9f953f7447 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -994,6 +994,14 @@ static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) {
return;
}
+ // OR x, 1 -> 1.
+ if (match(&R, m_c_BinaryOr(m_VPValue(X), m_AllOnes()))) {
+ R.getVPSingleValue()->replaceAllUsesWith(
+ R.getOperand(0) == X ? R.getOperand(1) : R.getOperand(0));
+ R.eraseFromParent();
+ return;
+ }
+
if (match(&R, m_Select(m_VPValue(), m_VPValue(X), m_Deferred(X))))
return R.getVPSingleValue()->replaceAllUsesWith(X);
diff --git a/llvm/test/Analysis/ScalarEvolution/pr135531.ll b/llvm/test/Analysis/ScalarEvolution/pr135531.ll
new file mode 100644
index 000000000000..e172d56d3a51
--- /dev/null
+++ b/llvm/test/Analysis/ScalarEvolution/pr135531.ll
@@ -0,0 +1,19 @@
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -disable-output -passes='print<scalar-evolution>' < %s 2>&1 | FileCheck %s
+
+define i32 @pr135511(i32 %x) {
+; CHECK-LABEL: 'pr135511'
+; CHECK-NEXT: Classifying expressions for: @pr135511
+; CHECK-NEXT: %and = and i32 %x, 16382
+; CHECK-NEXT: --> (2 * (zext i13 (trunc i32 (%x /u 2) to i13) to i32))<nuw><nsw> U: [0,16383) S: [0,16383)
+; CHECK-NEXT: %neg = sub nsw i32 0, %and
+; CHECK-NEXT: --> (-2 * (zext i13 (trunc i32 (%x /u 2) to i13) to i32))<nsw> U: [0,-1) S: [-16382,1)
+; CHECK-NEXT: %res = and i32 %neg, 268431360
+; CHECK-NEXT: --> (4096 * (zext i16 (trunc i32 ((-1 * (zext i13 (trunc i32 (%x /u 2) to i13) to i32))<nsw> /u 2048) to i16) to i32))<nuw><nsw> U: [0,268431361) S: [0,268431361)
+; CHECK-NEXT: Determining loop execution counts for: @pr135511
+;
+ %and = and i32 %x, 16382
+ %neg = sub nsw i32 0, %and
+ %res = and i32 %neg, 268431360
+ ret i32 %res
+}
diff --git a/llvm/test/CodeGen/AArch64/aarch64-sve-fill-spill-pair.ll b/llvm/test/CodeGen/AArch64/aarch64-sve-fill-spill-pair.ll
new file mode 100644
index 000000000000..503ead4eba2d
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/aarch64-sve-fill-spill-pair.ll
@@ -0,0 +1,283 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -verify-machineinstrs -mtriple=aarch64-linux-gnu -mattr=+sve -aarch64-sve-vector-bits-min=128 -aarch64-sve-vector-bits-max=128 < %s | FileCheck %s
+; RUN: llc -verify-machineinstrs -mtriple=aarch64_be-linux-gnu -mattr=+sve -aarch64-sve-vector-bits-min=128 -aarch64-sve-vector-bits-max=128 < %s | FileCheck %s --check-prefixes=CHECK-BE
+; RUN: llc -verify-machineinstrs -mtriple=aarch64-linux-gnu -mattr=+sve,ldp-aligned-only -aarch64-sve-vector-bits-min=128 -aarch64-sve-vector-bits-max=128 < %s | FileCheck %s --check-prefixes=CHECK-LDPALIGNEDONLY
+; RUN: llc -verify-machineinstrs -mtriple=aarch64-linux-gnu -mattr=+sve,stp-aligned-only -aarch64-sve-vector-bits-min=128 -aarch64-sve-vector-bits-max=128 < %s | FileCheck %s --check-prefixes=CHECK-STPALIGNEDONLY
+; RUN: llc -verify-machineinstrs -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s --check-prefixes=CHECK-OFF
+; RUN: llc -verify-machineinstrs -mtriple=aarch64-linux-gnu -mattr=+sve -aarch64-sve-vector-bits-min=256 -aarch64-sve-vector-bits-max=256 < %s | FileCheck %s --check-prefixes=CHECK-OFF
+
+define void @nxv16i8(ptr %ldptr, ptr %stptr) {
+; CHECK-LABEL: nxv16i8:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldp q0, q1, [x0]
+; CHECK-NEXT: stp q0, q1, [x1]
+; CHECK-NEXT: ret
+;
+; CHECK-BE-LABEL: nxv16i8:
+; CHECK-BE: // %bb.0:
+; CHECK-BE-NEXT: ptrue p0.b
+; CHECK-BE-NEXT: ld1b { z0.b }, p0/z, [x0]
+; CHECK-BE-NEXT: ld1b { z1.b }, p0/z, [x0, #1, mul vl]
+; CHECK-BE-NEXT: st1b { z0.b }, p0, [x1]
+; CHECK-BE-NEXT: st1b { z1.b }, p0, [x1, #1, mul vl]
+; CHECK-BE-NEXT: ret
+;
+; CHECK-LDPALIGNEDONLY-LABEL: nxv16i8:
+; CHECK-LDPALIGNEDONLY: // %bb.0:
+; CHECK-LDPALIGNEDONLY-NEXT: ldr z0, [x0]
+; CHECK-LDPALIGNEDONLY-NEXT: ldr z1, [x0, #1, mul vl]
+; CHECK-LDPALIGNEDONLY-NEXT: stp q0, q1, [x1]
+; CHECK-LDPALIGNEDONLY-NEXT: ret
+;
+; CHECK-STPALIGNEDONLY-LABEL: nxv16i8:
+; CHECK-STPALIGNEDONLY: // %bb.0:
+; CHECK-STPALIGNEDONLY-NEXT: ldp q0, q1, [x0]
+; CHECK-STPALIGNEDONLY-NEXT: str z0, [x1]
+; CHECK-STPALIGNEDONLY-NEXT: str z1, [x1, #1, mul vl]
+; CHECK-STPALIGNEDONLY-NEXT: ret
+;
+; CHECK-OFF-LABEL: nxv16i8:
+; CHECK-OFF: // %bb.0:
+; CHECK-OFF-NEXT: ldr z0, [x0]
+; CHECK-OFF-NEXT: ldr z1, [x0, #1, mul vl]
+; CHECK-OFF-NEXT: str z0, [x1]
+; CHECK-OFF-NEXT: str z1, [x1, #1, mul vl]
+; CHECK-OFF-NEXT: ret
+ %vscale = tail call i64 @llvm.vscale()
+ %vl = shl nuw nsw i64 %vscale, 4
+ %ldptr2 = getelementptr inbounds nuw i8, ptr %ldptr, i64 %vl
+ %stptr2 = getelementptr inbounds nuw i8, ptr %stptr, i64 %vl
+ %ld1 = load <vscale x 16 x i8>, ptr %ldptr, align 1
+ %ld2 = load <vscale x 16 x i8>, ptr %ldptr2, align 1
+ store <vscale x 16 x i8> %ld1, ptr %stptr, align 1
+ store <vscale x 16 x i8> %ld2, ptr %stptr2, align 1
+ ret void
+}
+
+define void @nxv16i8_max_range(ptr %ldptr, ptr %stptr) {
+; CHECK-LABEL: nxv16i8_max_range:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldp q0, q1, [x0, #-1024]
+; CHECK-NEXT: stp q0, q1, [x1, #1008]
+; CHECK-NEXT: ret
+;
+; CHECK-BE-LABEL: nxv16i8_max_range:
+; CHECK-BE: // %bb.0:
+; CHECK-BE-NEXT: rdvl x8, #1
+; CHECK-BE-NEXT: mov x9, #-1008 // =0xfffffffffffffc10
+; CHECK-BE-NEXT: mov x10, #-1024 // =0xfffffffffffffc00
+; CHECK-BE-NEXT: lsr x8, x8, #4
+; CHECK-BE-NEXT: mov w11, #1008 // =0x3f0
+; CHECK-BE-NEXT: mov w12, #1024 // =0x400
+; CHECK-BE-NEXT: ptrue p0.b
+; CHECK-BE-NEXT: mul x9, x8, x9
+; CHECK-BE-NEXT: mul x10, x8, x10
+; CHECK-BE-NEXT: mul x11, x8, x11
+; CHECK-BE-NEXT: ld1b { z1.b }, p0/z, [x0, x9]
+; CHECK-BE-NEXT: mul x8, x8, x12
+; CHECK-BE-NEXT: ld1b { z0.b }, p0/z, [x0, x10]
+; CHECK-BE-NEXT: st1b { z0.b }, p0, [x1, x11]
+; CHECK-BE-NEXT: st1b { z1.b }, p0, [x1, x8]
+; CHECK-BE-NEXT: ret
+;
+; CHECK-LDPALIGNEDONLY-LABEL: nxv16i8_max_range:
+; CHECK-LDPALIGNEDONLY: // %bb.0:
+; CHECK-LDPALIGNEDONLY-NEXT: ldr z0, [x0, #-64, mul vl]
+; CHECK-LDPALIGNEDONLY-NEXT: ldr z1, [x0, #-63, mul vl]
+; CHECK-LDPALIGNEDONLY-NEXT: stp q0, q1, [x1, #1008]
+; CHECK-LDPALIGNEDONLY-NEXT: ret
+;
+; CHECK-STPALIGNEDONLY-LABEL: nxv16i8_max_range:
+; CHECK-STPALIGNEDONLY: // %bb.0:
+; CHECK-STPALIGNEDONLY-NEXT: ldp q0, q1, [x0, #-1024]
+; CHECK-STPALIGNEDONLY-NEXT: str z0, [x1, #63, mul vl]
+; CHECK-STPALIGNEDONLY-NEXT: str z1, [x1, #64, mul vl]
+; CHECK-STPALIGNEDONLY-NEXT: ret
+;
+; CHECK-OFF-LABEL: nxv16i8_max_range:
+; CHECK-OFF: // %bb.0:
+; CHECK-OFF-NEXT: ldr z0, [x0, #-64, mul vl]
+; CHECK-OFF-NEXT: ldr z1, [x0, #-63, mul vl]
+; CHECK-OFF-NEXT: str z0, [x1, #63, mul vl]
+; CHECK-OFF-NEXT: str z1, [x1, #64, mul vl]
+; CHECK-OFF-NEXT: ret
+ %vscale = tail call i64 @llvm.vscale()
+ %ldoff1 = mul i64 %vscale, -1024
+ %ldoff2 = mul i64 %vscale, -1008
+ %stoff1 = mul i64 %vscale, 1008
+ %stoff2 = mul i64 %vscale, 1024
+ %ldptr1 = getelementptr inbounds nuw i8, ptr %ldptr, i64 %ldoff1
+ %ldptr2 = getelementptr inbounds nuw i8, ptr %ldptr, i64 %ldoff2
+ %stptr1 = getelementptr inbounds nuw i8, ptr %stptr, i64 %stoff1
+ %stptr2 = getelementptr inbounds nuw i8, ptr %stptr, i64 %stoff2
+ %ld1 = load <vscale x 16 x i8>, ptr %ldptr1, align 1
+ %ld2 = load <vscale x 16 x i8>, ptr %ldptr2, align 1
+ store <vscale x 16 x i8> %ld1, ptr %stptr1, align 1
+ store <vscale x 16 x i8> %ld2, ptr %stptr2, align 1
+ ret void
+}
+
+define void @nxv16i8_outside_range(ptr %ldptr, ptr %stptr) {
+; CHECK-LABEL: nxv16i8_outside_range:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldr z0, [x0, #-65, mul vl]
+; CHECK-NEXT: ldr z1, [x0, #-64, mul vl]
+; CHECK-NEXT: str z0, [x1, #64, mul vl]
+; CHECK-NEXT: str z1, [x1, #65, mul vl]
+; CHECK-NEXT: ret
+;
+; CHECK-BE-LABEL: nxv16i8_outside_range:
+; CHECK-BE: // %bb.0:
+; CHECK-BE-NEXT: rdvl x8, #1
+; CHECK-BE-NEXT: mov x9, #-1040 // =0xfffffffffffffbf0
+; CHECK-BE-NEXT: mov x10, #-1024 // =0xfffffffffffffc00
+; CHECK-BE-NEXT: lsr x8, x8, #4
+; CHECK-BE-NEXT: mov w11, #1024 // =0x400
+; CHECK-BE-NEXT: mov w12, #1040 // =0x410
+; CHECK-BE-NEXT: ptrue p0.b
+; CHECK-BE-NEXT: mul x9, x8, x9
+; CHECK-BE-NEXT: mul x10, x8, x10
+; CHECK-BE-NEXT: mul x11, x8, x11
+; CHECK-BE-NEXT: ld1b { z0.b }, p0/z, [x0, x9]
+; CHECK-BE-NEXT: mul x8, x8, x12
+; CHECK-BE-NEXT: ld1b { z1.b }, p0/z, [x0, x10]
+; CHECK-BE-NEXT: st1b { z0.b }, p0, [x1, x11]
+; CHECK-BE-NEXT: st1b { z1.b }, p0, [x1, x8]
+; CHECK-BE-NEXT: ret
+;
+; CHECK-LDPALIGNEDONLY-LABEL: nxv16i8_outside_range:
+; CHECK-LDPALIGNEDONLY: // %bb.0:
+; CHECK-LDPALIGNEDONLY-NEXT: ldr z0, [x0, #-65, mul vl]
+; CHECK-LDPALIGNEDONLY-NEXT: ldr z1, [x0, #-64, mul vl]
+; CHECK-LDPALIGNEDONLY-NEXT: str z0, [x1, #64, mul vl]
+; CHECK-LDPALIGNEDONLY-NEXT: str z1, [x1, #65, mul vl]
+; CHECK-LDPALIGNEDONLY-NEXT: ret
+;
+; CHECK-STPALIGNEDONLY-LABEL: nxv16i8_outside_range:
+; CHECK-STPALIGNEDONLY: // %bb.0:
+; CHECK-STPALIGNEDONLY-NEXT: ldr z0, [x0, #-65, mul vl]
+; CHECK-STPALIGNEDONLY-NEXT: ldr z1, [x0, #-64, mul vl]
+; CHECK-STPALIGNEDONLY-NEXT: str z0, [x1, #64, mul vl]
+; CHECK-STPALIGNEDONLY-NEXT: str z1, [x1, #65, mul vl]
+; CHECK-STPALIGNEDONLY-NEXT: ret
+;
+; CHECK-OFF-LABEL: nxv16i8_outside_range:
+; CHECK-OFF: // %bb.0:
+; CHECK-OFF-NEXT: ldr z0, [x0, #-65, mul vl]
+; CHECK-OFF-NEXT: ldr z1, [x0, #-64, mul vl]
+; CHECK-OFF-NEXT: str z0, [x1, #64, mul vl]
+; CHECK-OFF-NEXT: str z1, [x1, #65, mul vl]
+; CHECK-OFF-NEXT: ret
+ %vscale = tail call i64 @llvm.vscale()
+ %ldoff1 = mul i64 %vscale, -1040
+ %ldoff2 = mul i64 %vscale, -1024
+ %stoff1 = mul i64 %vscale, 1024
+ %stoff2 = mul i64 %vscale, 1040
+ %ldptr1 = getelementptr inbounds nuw i8, ptr %ldptr, i64 %ldoff1
+ %ldptr2 = getelementptr inbounds nuw i8, ptr %ldptr, i64 %ldoff2
+ %stptr1 = getelementptr inbounds nuw i8, ptr %stptr, i64 %stoff1
+ %stptr2 = getelementptr inbounds nuw i8, ptr %stptr, i64 %stoff2
+ %ld1 = load <vscale x 16 x i8>, ptr %ldptr1, align 1
+ %ld2 = load <vscale x 16 x i8>, ptr %ldptr2, align 1
+ store <vscale x 16 x i8> %ld1, ptr %stptr1, align 1
+ store <vscale x 16 x i8> %ld2, ptr %stptr2, align 1
+ ret void
+}
+
+define void @nxv16i8_2vl_stride(ptr %ldptr, ptr %stptr) {
+; CHECK-LABEL: nxv16i8_2vl_stride:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldr z0, [x0]
+; CHECK-NEXT: ldr z1, [x0, #2, mul vl]
+; CHECK-NEXT: str z0, [x1]
+; CHECK-NEXT: str z1, [x1, #2, mul vl]
+; CHECK-NEXT: ret
+;
+; CHECK-BE-LABEL: nxv16i8_2vl_stride:
+; CHECK-BE: // %bb.0:
+; CHECK-BE-NEXT: ptrue p0.b
+; CHECK-BE-NEXT: ld1b { z0.b }, p0/z, [x0]
+; CHECK-BE-NEXT: ld1b { z1.b }, p0/z, [x0, #2, mul vl]
+; CHECK-BE-NEXT: st1b { z0.b }, p0, [x1]
+; CHECK-BE-NEXT: st1b { z1.b }, p0, [x1, #2, mul vl]
+; CHECK-BE-NEXT: ret
+;
+; CHECK-LDPALIGNEDONLY-LABEL: nxv16i8_2vl_stride:
+; CHECK-LDPALIGNEDONLY: // %bb.0:
+; CHECK-LDPALIGNEDONLY-NEXT: ldr z0, [x0]
+; CHECK-LDPALIGNEDONLY-NEXT: ldr z1, [x0, #2, mul vl]
+; CHECK-LDPALIGNEDONLY-NEXT: str z0, [x1]
+; CHECK-LDPALIGNEDONLY-NEXT: str z1, [x1, #2, mul vl]
+; CHECK-LDPALIGNEDONLY-NEXT: ret
+;
+; CHECK-STPALIGNEDONLY-LABEL: nxv16i8_2vl_stride:
+; CHECK-STPALIGNEDONLY: // %bb.0:
+; CHECK-STPALIGNEDONLY-NEXT: ldr z0, [x0]
+; CHECK-STPALIGNEDONLY-NEXT: ldr z1, [x0, #2, mul vl]
+; CHECK-STPALIGNEDONLY-NEXT: str z0, [x1]
+; CHECK-STPALIGNEDONLY-NEXT: str z1, [x1, #2, mul vl]
+; CHECK-STPALIGNEDONLY-NEXT: ret
+;
+; CHECK-OFF-LABEL: nxv16i8_2vl_stride:
+; CHECK-OFF: // %bb.0:
+; CHECK-OFF-NEXT: ldr z0, [x0]
+; CHECK-OFF-NEXT: ldr z1, [x0, #2, mul vl]
+; CHECK-OFF-NEXT: str z0, [x1]
+; CHECK-OFF-NEXT: str z1, [x1, #2, mul vl]
+; CHECK-OFF-NEXT: ret
+ %vscale = tail call i64 @llvm.vscale()
+ %vl = shl nuw nsw i64 %vscale, 5
+ %ldptr2 = getelementptr inbounds nuw i8, ptr %ldptr, i64 %vl
+ %stptr2 = getelementptr inbounds nuw i8, ptr %stptr, i64 %vl
+ %ld1 = load <vscale x 16 x i8>, ptr %ldptr, align 1
+ %ld2 = load <vscale x 16 x i8>, ptr %ldptr2, align 1
+ store <vscale x 16 x i8> %ld1, ptr %stptr, align 1
+ store <vscale x 16 x i8> %ld2, ptr %stptr2, align 1
+ ret void
+}
+
+define void @nxv2f64_32b_aligned(ptr %ldptr, ptr %stptr) {
+; CHECK-LABEL: nxv2f64_32b_aligned:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldp q0, q1, [x0]
+; CHECK-NEXT: stp q0, q1, [x1]
+; CHECK-NEXT: ret
+;
+; CHECK-BE-LABEL: nxv2f64_32b_aligned:
+; CHECK-BE: // %bb.0:
+; CHECK-BE-NEXT: ptrue p0.d
+; CHECK-BE-NEXT: ld1d { z0.d }, p0/z, [x0]
+; CHECK-BE-NEXT: ld1d { z1.d }, p0/z, [x0, #1, mul vl]
+; CHECK-BE-NEXT: st1d { z0.d }, p0, [x1]
+; CHECK-BE-NEXT: st1d { z1.d }, p0, [x1, #1, mul vl]
+; CHECK-BE-NEXT: ret
+;
+; CHECK-LDPALIGNEDONLY-LABEL: nxv2f64_32b_aligned:
+; CHECK-LDPALIGNEDONLY: // %bb.0:
+; CHECK-LDPALIGNEDONLY-NEXT: ldp q0, q1, [x0]
+; CHECK-LDPALIGNEDONLY-NEXT: stp q0, q1, [x1]
+; CHECK-LDPALIGNEDONLY-NEXT: ret
+;
+; CHECK-STPALIGNEDONLY-LABEL: nxv2f64_32b_aligned:
+; CHECK-STPALIGNEDONLY: // %bb.0:
+; CHECK-STPALIGNEDONLY-NEXT: ldp q0, q1, [x0]
+; CHECK-STPALIGNEDONLY-NEXT: stp q0, q1, [x1]
+; CHECK-STPALIGNEDONLY-NEXT: ret
+;
+; CHECK-OFF-LABEL: nxv2f64_32b_aligned:
+; CHECK-OFF: // %bb.0:
+; CHECK-OFF-NEXT: ldr z0, [x0]
+; CHECK-OFF-NEXT: ldr z1, [x0, #1, mul vl]
+; CHECK-OFF-NEXT: str z0, [x1]
+; CHECK-OFF-NEXT: str z1, [x1, #1, mul vl]
+; CHECK-OFF-NEXT: ret
+ %vscale = tail call i64 @llvm.vscale()
+ %vl = shl nuw nsw i64 %vscale, 4
+ %ldptr2 = getelementptr inbounds nuw i8, ptr %ldptr, i64 %vl
+ %stptr2 = getelementptr inbounds nuw i8, ptr %stptr, i64 %vl
+ %ld1 = load <vscale x 2 x double>, ptr %ldptr, align 32
+ %ld2 = load <vscale x 2 x double>, ptr %ldptr2, align 32
+ store <vscale x 2 x double> %ld1, ptr %stptr, align 32
+ store <vscale x 2 x double> %ld2, ptr %stptr2, align 32
+ ret void
+}
diff --git a/llvm/test/CodeGen/AArch64/branch-relax-block-size.mir b/llvm/test/CodeGen/AArch64/branch-relax-block-size.mir
index 2684a550d921..6f07d1b2a6ea 100644
--- a/llvm/test/CodeGen/AArch64/branch-relax-block-size.mir
+++ b/llvm/test/CodeGen/AArch64/branch-relax-block-size.mir
@@ -1,5 +1,6 @@
# REQUIRES: asserts
# RUN: llc -mtriple=aarch64--linux-gnu -run-pass=branch-relaxation -debug-only=branch-relaxation %s -o /dev/null 2>&1 | FileCheck %s
+# RUN: llc -mtriple=aarch64--linux-gnu -passes=branch-relaxation -debug-only=branch-relaxation %s -o /dev/null 2>&1 | FileCheck %s
# Ensure meta instructions (e.g. CFI_INSTRUCTION) don't contribute to the code
# size of a basic block.
diff --git a/llvm/test/CodeGen/AArch64/branch-relax-cross-section.mir b/llvm/test/CodeGen/AArch64/branch-relax-cross-section.mir
index db88bf0044a5..000246ad8299 100644
--- a/llvm/test/CodeGen/AArch64/branch-relax-cross-section.mir
+++ b/llvm/test/CodeGen/AArch64/branch-relax-cross-section.mir
@@ -1,6 +1,8 @@
# RUN: llc -mtriple=aarch64-none-linux-gnu -run-pass branch-relaxation -aarch64-b-offset-bits=64 -aarch64-tbz-offset-bits=9 -aarch64-cbz-offset-bits=9 %s -o - | FileCheck %s
# RUN: llc -mtriple=aarch64-none-linux-gnu -run-pass branch-relaxation -aarch64-tbz-offset-bits=9 -aarch64-cbz-offset-bits=9 %s -o - | FileCheck --check-prefix=INDIRECT %s
+# RUN: llc -mtriple=aarch64-none-linux-gnu -passes branch-relaxation -aarch64-tbz-offset-bits=9 -aarch64-cbz-offset-bits=9 %s -o - | FileCheck --check-prefix=INDIRECT %s
+
--- |
declare i32 @bar()
declare i32 @baz()
diff --git a/llvm/test/CodeGen/AArch64/peephole-orr.mir b/llvm/test/CodeGen/AArch64/peephole-orr.mir
index 3431676438bd..f718328ecf2d 100644
--- a/llvm/test/CodeGen/AArch64/peephole-orr.mir
+++ b/llvm/test/CodeGen/AArch64/peephole-orr.mir
@@ -1,6 +1,49 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -run-pass=aarch64-mi-peephole-opt -o - -mtriple=aarch64-unknown-linux -verify-machineinstrs %s | FileCheck %s
-
+---
+name: copy_fpr128_gpr32
+body: |
+ bb.0:
+ liveins: $q0
+ ; CHECK-LABEL: name: copy_fpr128_gpr32
+ ; CHECK: liveins: $q0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:fpr128 = COPY $q0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:fpr32 = COPY [[COPY]].ssub
+ ; CHECK-NEXT: [[FMOVSWr:%[0-9]+]]:gpr32 = FMOVSWr [[COPY1]]
+ %0:fpr128 = COPY $q0
+ %1:gpr32 = COPY %0.ssub:fpr128
+ %2:gpr32 = ORRWrs $wzr, killed %1:gpr32, 0
+...
+---
+name: copy_fpr32_gpr32
+body: |
+ bb.0:
+ liveins: $s0
+ ; CHECK-LABEL: name: copy_fpr32_gpr32
+ ; CHECK: liveins: $s0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:fpr32 = COPY $s0
+ ; CHECK-NEXT: [[FMOVSWr:%[0-9]+]]:gpr32 = FMOVSWr [[COPY]]
+ %0:fpr32 = COPY $s0
+ %1:gpr32 = COPY %0:fpr32
+ %2:gpr32 = ORRWrs $wzr, killed %1:gpr32, 0
+...
+---
+name: copy_zpr_gpr32
+body: |
+ bb.0:
+ liveins: $z0
+ ; CHECK-LABEL: name: copy_zpr_gpr32
+ ; CHECK: liveins: $z0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:zpr = COPY $z0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:fpr32 = COPY [[COPY]].ssub
+ ; CHECK-NEXT: [[FMOVSWr:%[0-9]+]]:gpr32 = FMOVSWr [[COPY1]]
+ %0:zpr = COPY $z0
+ %1:gpr32 = COPY %0.ssub:zpr
+ %2:gpr32 = ORRWrs $wzr, killed %1:gpr32, 0
+...
---
name: copy_multiple_uses
tracksRegLiveness: true
diff --git a/llvm/test/CodeGen/AArch64/sve-vls-ldst-opt.mir b/llvm/test/CodeGen/AArch64/sve-vls-ldst-opt.mir
new file mode 100644
index 000000000000..49453bc17891
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/sve-vls-ldst-opt.mir
@@ -0,0 +1,74 @@
+# RUN: llc -mtriple=aarch64-unknown-linux -mattr=+sve -aarch64-sve-vector-bits-min=128 -aarch64-sve-vector-bits-max=128 -run-pass=aarch64-ldst-opt -verify-machineinstrs %s -o - | FileCheck %s
+---
+name: pair-sve-fill-spill
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $x0, $x1
+ renamable $z0 = LDR_ZXI renamable $x0, 0 :: (load (<vscale x 1 x s128>))
+ renamable $z1 = LDR_ZXI killed renamable $x0, 1 :: (load (<vscale x 1 x s128>))
+ STR_ZXI killed renamable $z0, renamable $x1, 0 :: (store (<vscale x 1 x s128>))
+ STR_ZXI killed renamable $z1, killed renamable $x1, 1 :: (store (<vscale x 1 x s128>))
+ RET_ReallyLR
+...
+# CHECK-LABEL: name: pair-sve-fill-spill
+# CHECK: $q0, $q1 = LDPQi renamable $x0, 0 :: (load (<vscale x 1 x s128>))
+# CHECK: STPQi killed $q0, killed $q1, renamable $x1, 0 :: (store (<vscale x 1 x s128>))
+---
+name: do-not-pair-sve-with-neon-scaled
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $x0, $x1
+ ; SVE LDR + Neon LDR
+ renamable $z0 = LDR_ZXI renamable $x0, 0 :: (load (<vscale x 1 x s128>))
+ renamable $q1 = LDRQui renamable $x0, 1 :: (load (s128))
+ ; Neon LDR + SVE LDR
+ renamable $q2 = LDRQui renamable $x0, 3 :: (load (s128))
+ renamable $z3 = LDR_ZXI renamable $x0, 4 :: (load (<vscale x 1 x s128>))
+ ; SVE STR + Neon STR
+ STR_ZXI killed renamable $z0, renamable $x1, 0 :: (store (<vscale x 1 x s128>))
+ STRQui killed renamable $q1, renamable $x1, 1 :: (store (s128))
+ ; Neon STR + SVE STR
+ STRQui killed renamable $q2, renamable $x1, 3 :: (store (s128))
+ STR_ZXI killed renamable $z3, renamable $x1, 4 :: (store (<vscale x 1 x s128>))
+ RET_ReallyLR
+...
+# CHECK-LABEL: name: do-not-pair-sve-with-neon-scaled
+# CHECK: renamable $z0 = LDR_ZXI renamable $x0, 0 :: (load (<vscale x 1 x s128>))
+# CHECK: renamable $q1 = LDRQui renamable $x0, 1 :: (load (s128))
+# CHECK: renamable $q2 = LDRQui renamable $x0, 3 :: (load (s128))
+# CHECK: renamable $z3 = LDR_ZXI renamable $x0, 4 :: (load (<vscale x 1 x s128>))
+# CHECK: STR_ZXI killed renamable $z0, renamable $x1, 0 :: (store (<vscale x 1 x s128>))
+# CHECK: STRQui killed renamable $q1, renamable $x1, 1 :: (store (s128))
+# CHECK: STRQui killed renamable $q2, renamable $x1, 3 :: (store (s128))
+# CHECK: STR_ZXI killed renamable $z3, renamable $x1, 4 :: (store (<vscale x 1 x s128>))
+---
+name: do-not-pair-sve-with-neon-unscaled
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $x0, $x1
+ ; SVE LDR + Neon LDUR
+ renamable $z0 = LDR_ZXI renamable $x0, 0 :: (load (<vscale x 1 x s128>))
+ renamable $q1 = LDURQi renamable $x0, 16 :: (load (s128))
+ ; Neon LDUR + SVE LDR
+ renamable $q2 = LDURQi renamable $x0, 48 :: (load (s128))
+ renamable $z3 = LDR_ZXI renamable $x0, 4 :: (load (<vscale x 1 x s128>))
+ ; SVE STR + Neon STUR
+ STR_ZXI killed renamable $z0, renamable $x1, 0 :: (store (<vscale x 1 x s128>))
+ STURQi killed renamable $q1, renamable $x1, 16 :: (store (s128))
+ ; Neon STUR + SVE STR
+ STURQi killed renamable $q2, renamable $x1, 48 :: (store (s128))
+ STR_ZXI killed renamable $z3, renamable $x1, 4 :: (store (<vscale x 1 x s128>))
+ RET_ReallyLR
+...
+# CHECK-LABEL: name: do-not-pair-sve-with-neon-unscaled
+# CHECK: renamable $z0 = LDR_ZXI renamable $x0, 0 :: (load (<vscale x 1 x s128>))
+# CHECK: renamable $q1 = LDURQi renamable $x0, 16 :: (load (s128))
+# CHECK: renamable $q2 = LDURQi renamable $x0, 48 :: (load (s128))
+# CHECK: renamable $z3 = LDR_ZXI renamable $x0, 4 :: (load (<vscale x 1 x s128>))
+# CHECK: STR_ZXI killed renamable $z0, renamable $x1, 0 :: (store (<vscale x 1 x s128>))
+# CHECK: STURQi killed renamable $q1, renamable $x1, 16 :: (store (s128))
+# CHECK: STURQi killed renamable $q2, renamable $x1, 48 :: (store (s128))
+# CHECK: STR_ZXI killed renamable $z3, renamable $x1, 4 :: (store (<vscale x 1 x s128>))
diff --git a/llvm/test/CodeGen/AMDGPU/branch-relax-no-terminators.mir b/llvm/test/CodeGen/AMDGPU/branch-relax-no-terminators.mir
index e4d9fbfb1705..474ba71b0eba 100644
--- a/llvm/test/CodeGen/AMDGPU/branch-relax-no-terminators.mir
+++ b/llvm/test/CodeGen/AMDGPU/branch-relax-no-terminators.mir
@@ -1,5 +1,6 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -verify-machineinstrs -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a --amdgpu-s-branch-bits=5 -run-pass branch-relaxation %s -o - | FileCheck %s
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a --amdgpu-s-branch-bits=5 -passes=branch-relaxation %s -o - | FileCheck %s
---
name: branch_no_terminators
diff --git a/llvm/test/CodeGen/AMDGPU/promote-alloca-non-byte-sizes.ll b/llvm/test/CodeGen/AMDGPU/promote-alloca-non-byte-sizes.ll
new file mode 100644
index 000000000000..4095347d7862
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/promote-alloca-non-byte-sizes.ll
@@ -0,0 +1,204 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=amdgpu-promote-alloca < %s | FileCheck %s
+
+; Check that types where the store/allocation sizes don't match the type size
+; don't crash.
+
+
+define <7 x i9> @load_elem_i9_access_7xi9() {
+; CHECK-LABEL: @load_elem_i9_access_7xi9(
+; CHECK-NEXT: [[P:%.*]] = alloca <16 x i9>, align 1, addrspace(5)
+; CHECK-NEXT: [[G:%.*]] = getelementptr i8, ptr addrspace(5) [[P]], i64 4
+; CHECK-NEXT: [[L:%.*]] = load <7 x i9>, ptr addrspace(5) [[G]], align 1
+; CHECK-NEXT: ret <7 x i9> [[L]]
+;
+ %p = alloca <16 x i9>, align 1, addrspace(5)
+ %g = getelementptr i8, ptr addrspace(5) %p, i64 4
+ %l = load <7 x i9>, ptr addrspace(5) %g, align 1
+ ret <7 x i9> %l
+}
+
+define <8 x i1> @load_elem_i1_access_8xi1() {
+; CHECK-LABEL: @load_elem_i1_access_8xi1(
+; CHECK-NEXT: [[P:%.*]] = alloca <16 x i1>, align 1, addrspace(5)
+; CHECK-NEXT: [[G:%.*]] = getelementptr i8, ptr addrspace(5) [[P]], i64 4
+; CHECK-NEXT: [[L:%.*]] = load <8 x i1>, ptr addrspace(5) [[G]], align 1
+; CHECK-NEXT: ret <8 x i1> [[L]]
+;
+ %p = alloca <16 x i1>, align 1, addrspace(5)
+ %g = getelementptr i8, ptr addrspace(5) %p, i64 4
+ %l = load <8 x i1>, ptr addrspace(5) %g, align 1
+ ret <8 x i1> %l
+}
+
+define <3 x i1> @load_elem_i1_access_3xi1() {
+; CHECK-LABEL: @load_elem_i1_access_3xi1(
+; CHECK-NEXT: [[P:%.*]] = alloca <16 x i1>, align 1, addrspace(5)
+; CHECK-NEXT: [[G:%.*]] = getelementptr i8, ptr addrspace(5) [[P]], i64 4
+; CHECK-NEXT: [[L:%.*]] = load <3 x i1>, ptr addrspace(5) [[G]], align 1
+; CHECK-NEXT: ret <3 x i1> [[L]]
+;
+ %p = alloca <16 x i1>, align 1, addrspace(5)
+ %g = getelementptr i8, ptr addrspace(5) %p, i64 4
+ %l = load <3 x i1>, ptr addrspace(5) %g, align 1
+ ret <3 x i1> %l
+}
+
+define <3 x i1> @load_elem_i8_access_3xi1() {
+; CHECK-LABEL: @load_elem_i8_access_3xi1(
+; CHECK-NEXT: [[P:%.*]] = alloca <8 x i8>, align 1, addrspace(5)
+; CHECK-NEXT: store <8 x i8> <i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8>, ptr addrspace(5) [[P]], align 1
+; CHECK-NEXT: [[G:%.*]] = getelementptr <4 x i8>, ptr addrspace(5) [[P]], i64 1
+; CHECK-NEXT: [[L:%.*]] = load <3 x i1>, ptr addrspace(5) [[G]], align 1
+; CHECK-NEXT: ret <3 x i1> [[L]]
+;
+ %p = alloca <8 x i8>, align 1, addrspace(5)
+ store <8 x i8> <i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8>, ptr addrspace(5) %p, align 1
+ %g = getelementptr <4 x i8>, ptr addrspace(5) %p, i64 1
+ %l = load <3 x i1>, ptr addrspace(5) %g, align 1
+ ret <3 x i1> %l
+}
+
+; This one is actually not problematic.
+define <8 x i1> @load_elem_i8_access_8xi1() {
+; CHECK-LABEL: @load_elem_i8_access_8xi1(
+; CHECK-NEXT: [[P:%.*]] = freeze <8 x i8> poison
+; CHECK-NEXT: ret <8 x i1> <i1 true, i1 false, i1 true, i1 false, i1 false, i1 false, i1 false, i1 false>
+;
+ %p = alloca <8 x i8>, align 1, addrspace(5)
+ store <8 x i8> <i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8>, ptr addrspace(5) %p, align 1
+ %g = getelementptr <4 x i8>, ptr addrspace(5) %p, i64 1
+ %l = load <8 x i1>, ptr addrspace(5) %g, align 1
+ ret <8 x i1> %l
+}
+
+define <8 x i1> @storeload_elem_i1_access_8xi1() {
+; CHECK-LABEL: @storeload_elem_i1_access_8xi1(
+; CHECK-NEXT: [[P:%.*]] = alloca <16 x i1>, align 1, addrspace(5)
+; CHECK-NEXT: [[G:%.*]] = getelementptr i8, ptr addrspace(5) [[P]], i64 4
+; CHECK-NEXT: store <8 x i1> <i1 true, i1 false, i1 true, i1 false, i1 false, i1 false, i1 false, i1 false>, ptr addrspace(5) [[G]], align 1
+; CHECK-NEXT: [[L:%.*]] = load <8 x i1>, ptr addrspace(5) [[G]], align 1
+; CHECK-NEXT: ret <8 x i1> [[L]]
+;
+ %p = alloca <16 x i1>, align 1, addrspace(5)
+ %g = getelementptr i8, ptr addrspace(5) %p, i64 4
+ store <8 x i1> <i1 true, i1 false, i1 true, i1 false, i1 false, i1 false, i1 false, i1 false>, ptr addrspace(5) %g, align 1
+ %l = load <8 x i1>, ptr addrspace(5) %g, align 1
+ ret <8 x i1> %l
+}
+
+define <3 x i1> @storeload_elem_i1_access_3xi1() {
+; CHECK-LABEL: @storeload_elem_i1_access_3xi1(
+; CHECK-NEXT: [[P:%.*]] = alloca <16 x i1>, align 1, addrspace(5)
+; CHECK-NEXT: [[G:%.*]] = getelementptr i8, ptr addrspace(5) [[P]], i64 4
+; CHECK-NEXT: store <3 x i1> <i1 true, i1 false, i1 true>, ptr addrspace(5) [[G]], align 1
+; CHECK-NEXT: [[L:%.*]] = load <3 x i1>, ptr addrspace(5) [[G]], align 1
+; CHECK-NEXT: ret <3 x i1> [[L]]
+;
+ %p = alloca <16 x i1>, align 1, addrspace(5)
+ %g = getelementptr i8, ptr addrspace(5) %p, i64 4
+ store <3 x i1> <i1 true, i1 false, i1 true>, ptr addrspace(5) %g, align 1
+ %l = load <3 x i1>, ptr addrspace(5) %g, align 1
+ ret <3 x i1> %l
+}
+
+define <3 x i1> @storeload_elem_i8_access_3xi1() {
+; CHECK-LABEL: @storeload_elem_i8_access_3xi1(
+; CHECK-NEXT: [[P:%.*]] = alloca <8 x i8>, align 1, addrspace(5)
+; CHECK-NEXT: store <8 x i8> <i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8>, ptr addrspace(5) [[P]], align 1
+; CHECK-NEXT: [[G:%.*]] = getelementptr <4 x i8>, ptr addrspace(5) [[P]], i64 1
+; CHECK-NEXT: store <3 x i1> <i1 true, i1 false, i1 true>, ptr addrspace(5) [[G]], align 1
+; CHECK-NEXT: [[L:%.*]] = load <3 x i1>, ptr addrspace(5) [[G]], align 1
+; CHECK-NEXT: ret <3 x i1> [[L]]
+;
+ %p = alloca <8 x i8>, align 1, addrspace(5)
+ store <8 x i8> <i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8>, ptr addrspace(5) %p, align 1
+ %g = getelementptr <4 x i8>, ptr addrspace(5) %p, i64 1
+ store <3 x i1> <i1 true, i1 false, i1 true>, ptr addrspace(5) %g, align 1
+ %l = load <3 x i1>, ptr addrspace(5) %g, align 1
+ ret <3 x i1> %l
+}
+
+; This one is actually not problematic.
+define <8 x i1> @storeload_elem_i8_access_8xi1() {
+; CHECK-LABEL: @storeload_elem_i8_access_8xi1(
+; CHECK-NEXT: [[P:%.*]] = freeze <8 x i8> poison
+; CHECK-NEXT: ret <8 x i1> <i1 true, i1 false, i1 true, i1 false, i1 false, i1 false, i1 false, i1 false>
+;
+ %p = alloca <8 x i8>, align 1, addrspace(5)
+ store <8 x i8> <i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8>, ptr addrspace(5) %p, align 1
+ %g = getelementptr <4 x i8>, ptr addrspace(5) %p, i64 1
+ store <8 x i1> <i1 true, i1 false, i1 true, i1 false, i1 false, i1 false, i1 false, i1 false>, ptr addrspace(5) %g, align 1
+ %l = load <8 x i1>, ptr addrspace(5) %g, align 1
+ ret <8 x i1> %l
+}
+
+define <8 x i1> @array_of_vec_elem_i1_access_8xi1() {
+; CHECK-LABEL: @array_of_vec_elem_i1_access_8xi1(
+; CHECK-NEXT: [[P:%.*]] = alloca [2 x <16 x i1>], align 1, addrspace(5)
+; CHECK-NEXT: [[G:%.*]] = getelementptr i8, ptr addrspace(5) [[P]], i64 4
+; CHECK-NEXT: store <8 x i1> <i1 true, i1 false, i1 true, i1 false, i1 false, i1 false, i1 false, i1 false>, ptr addrspace(5) [[G]], align 1
+; CHECK-NEXT: [[L:%.*]] = load <8 x i1>, ptr addrspace(5) [[G]], align 1
+; CHECK-NEXT: ret <8 x i1> [[L]]
+;
+ %p = alloca [2 x <16 x i1>], align 1, addrspace(5)
+ %g = getelementptr i8, ptr addrspace(5) %p, i64 4
+ store <8 x i1> <i1 true, i1 false, i1 true, i1 false, i1 false, i1 false, i1 false, i1 false>, ptr addrspace(5) %g, align 1
+ %l = load <8 x i1>, ptr addrspace(5) %g, align 1
+ ret <8 x i1> %l
+}
+
+define <3 x i1> @array_of_vec_elem_i1_access_3xi1() {
+; CHECK-LABEL: @array_of_vec_elem_i1_access_3xi1(
+; CHECK-NEXT: [[P:%.*]] = alloca [2 x <16 x i1>], align 1, addrspace(5)
+; CHECK-NEXT: [[G:%.*]] = getelementptr i8, ptr addrspace(5) [[P]], i64 4
+; CHECK-NEXT: store <3 x i1> <i1 true, i1 false, i1 true>, ptr addrspace(5) [[G]], align 1
+; CHECK-NEXT: [[L:%.*]] = load <3 x i1>, ptr addrspace(5) [[G]], align 1
+; CHECK-NEXT: ret <3 x i1> [[L]]
+;
+ %p = alloca [2 x <16 x i1>], align 1, addrspace(5)
+ %g = getelementptr i8, ptr addrspace(5) %p, i64 4
+ store <3 x i1> <i1 true, i1 false, i1 true>, ptr addrspace(5) %g, align 1
+ %l = load <3 x i1>, ptr addrspace(5) %g, align 1
+ ret <3 x i1> %l
+}
+
+define <3 x i1> @array_of_vec_elem_i8_access_3xi1() {
+; CHECK-LABEL: @array_of_vec_elem_i8_access_3xi1(
+; CHECK-NEXT: [[P:%.*]] = alloca [2 x <8 x i8>], align 1, addrspace(5)
+; CHECK-NEXT: store <8 x i8> <i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8>, ptr addrspace(5) [[P]], align 1
+; CHECK-NEXT: [[G:%.*]] = getelementptr <4 x i8>, ptr addrspace(5) [[P]], i64 1
+; CHECK-NEXT: store <3 x i1> <i1 true, i1 false, i1 true>, ptr addrspace(5) [[G]], align 1
+; CHECK-NEXT: [[L:%.*]] = load <3 x i1>, ptr addrspace(5) [[G]], align 1
+; CHECK-NEXT: ret <3 x i1> [[L]]
+;
+ %p = alloca [2 x <8 x i8>], align 1, addrspace(5)
+ store <8 x i8> <i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8>, ptr addrspace(5) %p, align 1
+ %g = getelementptr <4 x i8>, ptr addrspace(5) %p, i64 1
+ store <3 x i1> <i1 true, i1 false, i1 true>, ptr addrspace(5) %g, align 1
+ %l = load <3 x i1>, ptr addrspace(5) %g, align 1
+ ret <3 x i1> %l
+}
+
+; This one is actually not problematic.
+define <8 x i1> @array_of_vec_elem_i8_access_8xi1() {
+; CHECK-LABEL: @array_of_vec_elem_i8_access_8xi1(
+; CHECK-NEXT: [[P:%.*]] = freeze <16 x i8> poison
+; CHECK-NEXT: [[TMP1:%.*]] = insertelement <16 x i8> [[P]], i8 1, i32 0
+; CHECK-NEXT: [[TMP2:%.*]] = insertelement <16 x i8> [[TMP1]], i8 2, i32 1
+; CHECK-NEXT: [[TMP3:%.*]] = insertelement <16 x i8> [[TMP2]], i8 3, i32 2
+; CHECK-NEXT: [[TMP4:%.*]] = insertelement <16 x i8> [[TMP3]], i8 4, i32 3
+; CHECK-NEXT: [[TMP5:%.*]] = insertelement <16 x i8> [[TMP4]], i8 5, i32 4
+; CHECK-NEXT: [[TMP6:%.*]] = insertelement <16 x i8> [[TMP5]], i8 6, i32 5
+; CHECK-NEXT: [[TMP7:%.*]] = insertelement <16 x i8> [[TMP6]], i8 7, i32 6
+; CHECK-NEXT: [[TMP8:%.*]] = insertelement <16 x i8> [[TMP7]], i8 8, i32 7
+; CHECK-NEXT: [[TMP9:%.*]] = insertelement <16 x i8> [[TMP8]], i8 5, i32 4
+; CHECK-NEXT: ret <8 x i1> <i1 true, i1 false, i1 true, i1 false, i1 false, i1 false, i1 false, i1 false>
+;
+ %p = alloca [2 x <8 x i8>], align 1, addrspace(5)
+ store <8 x i8> <i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8>, ptr addrspace(5) %p, align 1
+ %g = getelementptr <4 x i8>, ptr addrspace(5) %p, i64 1
+ store <8 x i1> <i1 true, i1 false, i1 true, i1 false, i1 false, i1 false, i1 false, i1 false>, ptr addrspace(5) %g, align 1
+ %l = load <8 x i1>, ptr addrspace(5) %g, align 1
+ ret <8 x i1> %l
+}
diff --git a/llvm/test/CodeGen/AMDGPU/swdev380865.ll b/llvm/test/CodeGen/AMDGPU/swdev380865.ll
index 9189cef019cf..4a5dc8f300af 100644
--- a/llvm/test/CodeGen/AMDGPU/swdev380865.ll
+++ b/llvm/test/CodeGen/AMDGPU/swdev380865.ll
@@ -16,15 +16,16 @@ define amdgpu_kernel void @_Z6kernelILi4000ELi1EEvPd(ptr addrspace(1) %x.coerce)
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: s_mov_b64 s[0:1], 0
; CHECK-NEXT: s_load_dword s2, s[0:1], 0x0
+; CHECK-NEXT: s_mov_b64 s[0:1], 0x100
; CHECK-NEXT: s_load_dwordx2 s[6:7], s[0:1], 0x0
; CHECK-NEXT: s_mov_b32 s4, 0
; CHECK-NEXT: s_mov_b32 s0, 0
-; CHECK-NEXT: s_mov_b32 s5, 0x40280000
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: s_mov_b32 s1, s2
; CHECK-NEXT: s_mov_b32 s2, 0
; CHECK-NEXT: v_mov_b32_e32 v0, s6
; CHECK-NEXT: s_mov_b32 s3, 0x40260000
+; CHECK-NEXT: s_mov_b32 s5, 0x40280000
; CHECK-NEXT: v_mov_b32_e32 v1, s7
; CHECK-NEXT: .LBB0_1: ; %for.cond4.preheader
; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1
@@ -50,7 +51,7 @@ define amdgpu_kernel void @_Z6kernelILi4000ELi1EEvPd(ptr addrspace(1) %x.coerce)
; CHECK-NEXT: v_add_f64 v[0:1], v[0:1], s[4:5]
; CHECK-NEXT: s_cbranch_scc1 .LBB0_1
; CHECK-NEXT: ; %bb.2: ; %for.cond.cleanup.loopexit
-; CHECK-NEXT: v_mov_b32_e32 v2, 0
+; CHECK-NEXT: v_mov_b32_e32 v2, 0x100
; CHECK-NEXT: v_mov_b32_e32 v3, 0
; CHECK-NEXT: global_store_dwordx2 v[2:3], v[0:1], off
; CHECK-NEXT: s_endpgm
@@ -61,7 +62,7 @@ entry:
for.cond4.preheader: ; preds = %for.cond4.preheader, %entry
%idx.07 = phi i32 [ %add13, %for.cond4.preheader ], [ 0, %entry ]
- %arrayidx.promoted = load double, ptr addrspace(1) null, align 8
+ %arrayidx.promoted = load double, ptr addrspace(1) inttoptr (i64 256 to ptr addrspace(1)), align 8
%add9 = fadd contract double %arrayidx.promoted, 0.000000e+00
%add9.1 = fadd contract double %add9, 5.000000e+00
%add9.2 = fadd contract double %add9.1, 6.000000e+00
@@ -70,7 +71,7 @@ for.cond4.preheader: ; preds = %for.cond4.preheader
%add9.5 = fadd contract double %add9.4, 1.000000e+01
%add9.6 = fadd contract double %add9.5, 1.100000e+01
%add9.7 = fadd contract double %add9.6, 1.200000e+01
- store double %add9.7, ptr addrspace(1) null, align 8
+ store double %add9.7, ptr addrspace(1) inttoptr (i64 256 to ptr addrspace(1)), align 8
%add13 = add i32 %idx.07, %0
%cmp = icmp slt i32 %add13, 2560
br i1 %cmp, label %for.cond4.preheader, label %for.cond.cleanup
diff --git a/llvm/test/CodeGen/PowerPC/pr43527.ll b/llvm/test/CodeGen/PowerPC/pr43527.ll
index 379bd6c070c7..adfea51077a0 100644
--- a/llvm/test/CodeGen/PowerPC/pr43527.ll
+++ b/llvm/test/CodeGen/PowerPC/pr43527.ll
@@ -2,7 +2,7 @@
; RUN: llc -ppc-asm-full-reg-names -verify-machineinstrs \
; RUN: -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr8 < %s | FileCheck %s
; We don't want to produce a CTR loop due to the call to lrint in the body.
-define dso_local void @test(i64 %arg, i64 %arg1) {
+define dso_local void @test(i64 %arg, i64 %arg1, ptr %arg2) {
; CHECK-LABEL: test:
; CHECK: # %bb.0: # %bb
; CHECK-NEXT: bc 4, 4*cr5+lt, .LBB0_5
@@ -12,29 +12,33 @@ define dso_local void @test(i64 %arg, i64 %arg1) {
; CHECK-NEXT: mflr r0
; CHECK-NEXT: .cfi_def_cfa_offset 64
; CHECK-NEXT: .cfi_offset lr, 16
+; CHECK-NEXT: .cfi_offset r28, -32
; CHECK-NEXT: .cfi_offset r29, -24
; CHECK-NEXT: .cfi_offset r30, -16
+; CHECK-NEXT: std r28, -32(r1) # 8-byte Folded Spill
; CHECK-NEXT: std r29, -24(r1) # 8-byte Folded Spill
; CHECK-NEXT: std r30, -16(r1) # 8-byte Folded Spill
; CHECK-NEXT: stdu r1, -64(r1)
-; CHECK-NEXT: sub r30, r4, r3
-; CHECK-NEXT: li r29, -4
+; CHECK-NEXT: mr r30, r5
+; CHECK-NEXT: sub r29, r4, r3
+; CHECK-NEXT: addi r28, r5, -4
; CHECK-NEXT: std r0, 80(r1)
; CHECK-NEXT: .p2align 5
; CHECK-NEXT: .LBB0_3: # %bb5
; CHECK-NEXT: #
-; CHECK-NEXT: lfsu f1, 4(r29)
+; CHECK-NEXT: lfsu f1, 4(r28)
; CHECK-NEXT: bl lrint
; CHECK-NEXT: nop
-; CHECK-NEXT: addi r30, r30, -1
-; CHECK-NEXT: cmpldi r30, 0
+; CHECK-NEXT: addi r29, r29, -1
+; CHECK-NEXT: stb r3, 0(r30)
+; CHECK-NEXT: cmpldi r29, 0
; CHECK-NEXT: bc 12, gt, .LBB0_3
; CHECK-NEXT: # %bb.4: # %bb15
-; CHECK-NEXT: stb r3, 0(r3)
; CHECK-NEXT: addi r1, r1, 64
; CHECK-NEXT: ld r0, 16(r1)
; CHECK-NEXT: ld r30, -16(r1) # 8-byte Folded Reload
; CHECK-NEXT: ld r29, -24(r1) # 8-byte Folded Reload
+; CHECK-NEXT: ld r28, -32(r1) # 8-byte Folded Reload
; CHECK-NEXT: mtlr r0
; CHECK-NEXT: blr
; CHECK-NEXT: .LBB0_5: # %bb2
@@ -54,12 +58,12 @@ bb4: ; preds = %bb3
bb5: ; preds = %bb5, %bb4
%tmp6 = phi i64 [ %tmp12, %bb5 ], [ 0, %bb4 ]
- %tmp7 = getelementptr inbounds float, ptr null, i64 %tmp6
+ %tmp7 = getelementptr inbounds float, ptr %arg2, i64 %tmp6
%tmp8 = load float, ptr %tmp7, align 4
%tmp9 = fpext float %tmp8 to double
%tmp10 = tail call i64 @llvm.lrint.i64.f64(double %tmp9) #2
%tmp11 = trunc i64 %tmp10 to i8
- store i8 %tmp11, ptr undef, align 1
+ store i8 %tmp11, ptr %arg2, align 1
%tmp12 = add nuw i64 %tmp6, 1
%tmp13 = icmp eq i64 %tmp12, %tmp
br i1 %tmp13, label %bb15, label %bb5
diff --git a/llvm/test/CodeGen/PowerPC/pr48519.ll b/llvm/test/CodeGen/PowerPC/pr48519.ll
index fa156454a131..b610f12159ee 100644
--- a/llvm/test/CodeGen/PowerPC/pr48519.ll
+++ b/llvm/test/CodeGen/PowerPC/pr48519.ll
@@ -32,7 +32,7 @@ define void @julia__typed_vcat_20() #0 {
; CHECK-NEXT: # %bb.2: # %bb11
; CHECK-NEXT: bl __truncsfhf2
; CHECK-NEXT: nop
-; CHECK-NEXT: sth r3, 0(r3)
+; CHECK-NEXT: sth r3, 128(0)
;
; CHECK-P9-LABEL: julia__typed_vcat_20:
; CHECK-P9: # %bb.0: # %bb
@@ -54,6 +54,7 @@ define void @julia__typed_vcat_20() #0 {
; CHECK-P9-NEXT: bdnz .LBB0_1
; CHECK-P9-NEXT: # %bb.2: # %bb11
; CHECK-P9-NEXT: xscvdphp f0, f0
+; CHECK-P9-NEXT: li r3, 128
; CHECK-P9-NEXT: stxsihx f0, 0, r3
bb:
%i = load i64, ptr addrspace(11) null, align 8
@@ -67,7 +68,7 @@ bb3: ; preds = %bb3, %bb
%i6 = add nsw i64 %i5, -1
%i7 = add i64 %i6, 0
%i8 = sitofp i64 %i7 to half
- store half %i8, ptr addrspace(13) undef, align 2
+ store half %i8, ptr addrspace(13) inttoptr (i64 128 to ptr addrspace(13)), align 2
%i9 = icmp eq i64 %i4, 0
%i10 = add i64 %i4, 1
br i1 %i9, label %bb11, label %bb3
diff --git a/llvm/test/CodeGen/PowerPC/sms-grp-order.ll b/llvm/test/CodeGen/PowerPC/sms-grp-order.ll
index f72598cb4cbc..eaea47608eb2 100644
--- a/llvm/test/CodeGen/PowerPC/sms-grp-order.ll
+++ b/llvm/test/CodeGen/PowerPC/sms-grp-order.ll
@@ -2,32 +2,32 @@
; RUN: llc < %s -mtriple=powerpc64le-unknown-linux-gnu -verify-machineinstrs\
; RUN: -mcpu=pwr9 --ppc-enable-pipeliner | FileCheck %s
-define void @lame_encode_buffer_interleaved() local_unnamed_addr {
+define void @lame_encode_buffer_interleaved(ptr %arg0) local_unnamed_addr {
; CHECK-LABEL: lame_encode_buffer_interleaved:
; CHECK: # %bb.0:
-; CHECK-NEXT: lha 3, 0(3)
-; CHECK-NEXT: li 5, 1
-; CHECK-NEXT: lhz 4, 0(0)
-; CHECK-NEXT: rldic 5, 5, 62, 1
-; CHECK-NEXT: mtctr 5
-; CHECK-NEXT: srawi 3, 3, 1
-; CHECK-NEXT: addze 3, 3
+; CHECK-NEXT: li 4, 1
+; CHECK-NEXT: rldic 4, 4, 62, 1
+; CHECK-NEXT: mtctr 4
; CHECK-NEXT: .p2align 4
; CHECK-NEXT: .LBB0_1:
-; CHECK-NEXT: extsh 4, 4
+; CHECK-NEXT: lha 4, 0(3)
+; CHECK-NEXT: lha 5, 0(3)
; CHECK-NEXT: srawi 4, 4, 1
; CHECK-NEXT: addze 4, 4
+; CHECK-NEXT: srawi 5, 5, 1
+; CHECK-NEXT: addze 5, 5
+; CHECK-NEXT: sth 4, 0(3)
+; CHECK-NEXT: sth 5, 0(3)
; CHECK-NEXT: bdnz .LBB0_1
; CHECK-NEXT: # %bb.2:
-; CHECK-NEXT: sth 4, 0(0)
-; CHECK-NEXT: sth 3, 0(3)
; CHECK-NEXT: blr
+ %undef = freeze ptr poison
br label %1
1: ; preds = %1, %0
%2 = phi i64 [ 0, %0 ], [ %13, %1 ]
- %3 = load i16, ptr null, align 2
- %4 = load i16, ptr undef, align 2
+ %3 = load i16, ptr %arg0, align 2
+ %4 = load i16, ptr %undef, align 2
%5 = sext i16 %3 to i32
%6 = sext i16 %4 to i32
%7 = add nsw i32 0, %5
@@ -36,8 +36,8 @@ define void @lame_encode_buffer_interleaved() local_unnamed_addr {
%10 = sdiv i32 %8, 2
%11 = trunc i32 %9 to i16
%12 = trunc i32 %10 to i16
- store i16 %11, ptr null, align 2
- store i16 %12, ptr undef, align 2
+ store i16 %11, ptr %arg0, align 2
+ store i16 %12, ptr %undef, align 2
%13 = add i64 %2, 4
%14 = icmp eq i64 %13, 0
br i1 %14, label %15, label %1
diff --git a/llvm/test/CodeGen/RISCV/emit-x8-as-fp.ll b/llvm/test/CodeGen/RISCV/emit-x8-as-fp.ll
new file mode 100644
index 000000000000..0c9ef2176fd7
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/emit-x8-as-fp.ll
@@ -0,0 +1,121 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s \
+; RUN: | FileCheck -check-prefix=RV32I-DEFAULT %s
+; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \
+; RUN: | FileCheck -check-prefix=RV64I-DEFAULT %s
+; RUN: llc -mtriple=riscv32 -M emit-x8-as-fp -verify-machineinstrs < %s \
+; RUN: | FileCheck -check-prefix=RV32I-EMIT-FP %s
+; RUN: llc -mtriple=riscv64 -M emit-x8-as-fp -verify-machineinstrs < %s \
+; RUN: | FileCheck -check-prefix=RV64I-EMIT-FP %s
+; RUN: llc -mtriple=riscv32 -M numeric -M emit-x8-as-fp -verify-machineinstrs \
+; RUN: < %s | FileCheck -check-prefix=RV32I-NUMERIC %s
+; RUN: llc -mtriple=riscv64 -M numeric -M emit-x8-as-fp -verify-machineinstrs \
+; RUN: < %s | FileCheck -check-prefix=RV64I-NUMERIC %s
+
+define signext i32 @add(i32 %0, i32 %1) #0 {
+; RV32I-DEFAULT-LABEL: add:
+; RV32I-DEFAULT: # %bb.0:
+; RV32I-DEFAULT-NEXT: addi sp, sp, -16
+; RV32I-DEFAULT-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-DEFAULT-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
+; RV32I-DEFAULT-NEXT: addi s0, sp, 16
+; RV32I-DEFAULT-NEXT: sw a0, -12(s0)
+; RV32I-DEFAULT-NEXT: sw a1, -16(s0)
+; RV32I-DEFAULT-NEXT: lw a0, -12(s0)
+; RV32I-DEFAULT-NEXT: lw a1, -16(s0)
+; RV32I-DEFAULT-NEXT: add a0, a0, a1
+; RV32I-DEFAULT-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-DEFAULT-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
+; RV32I-DEFAULT-NEXT: addi sp, sp, 16
+; RV32I-DEFAULT-NEXT: ret
+;
+; RV64I-DEFAULT-LABEL: add:
+; RV64I-DEFAULT: # %bb.0:
+; RV64I-DEFAULT-NEXT: addi sp, sp, -32
+; RV64I-DEFAULT-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; RV64I-DEFAULT-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
+; RV64I-DEFAULT-NEXT: addi s0, sp, 32
+; RV64I-DEFAULT-NEXT: sw a0, -20(s0)
+; RV64I-DEFAULT-NEXT: sw a1, -24(s0)
+; RV64I-DEFAULT-NEXT: lw a0, -20(s0)
+; RV64I-DEFAULT-NEXT: lw a1, -24(s0)
+; RV64I-DEFAULT-NEXT: addw a0, a0, a1
+; RV64I-DEFAULT-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; RV64I-DEFAULT-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
+; RV64I-DEFAULT-NEXT: addi sp, sp, 32
+; RV64I-DEFAULT-NEXT: ret
+;
+; RV32I-EMIT-FP-LABEL: add:
+; RV32I-EMIT-FP: # %bb.0:
+; RV32I-EMIT-FP-NEXT: addi sp, sp, -16
+; RV32I-EMIT-FP-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-EMIT-FP-NEXT: sw fp, 8(sp) # 4-byte Folded Spill
+; RV32I-EMIT-FP-NEXT: addi fp, sp, 16
+; RV32I-EMIT-FP-NEXT: sw a0, -12(fp)
+; RV32I-EMIT-FP-NEXT: sw a1, -16(fp)
+; RV32I-EMIT-FP-NEXT: lw a0, -12(fp)
+; RV32I-EMIT-FP-NEXT: lw a1, -16(fp)
+; RV32I-EMIT-FP-NEXT: add a0, a0, a1
+; RV32I-EMIT-FP-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-EMIT-FP-NEXT: lw fp, 8(sp) # 4-byte Folded Reload
+; RV32I-EMIT-FP-NEXT: addi sp, sp, 16
+; RV32I-EMIT-FP-NEXT: ret
+;
+; RV64I-EMIT-FP-LABEL: add:
+; RV64I-EMIT-FP: # %bb.0:
+; RV64I-EMIT-FP-NEXT: addi sp, sp, -32
+; RV64I-EMIT-FP-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; RV64I-EMIT-FP-NEXT: sd fp, 16(sp) # 8-byte Folded Spill
+; RV64I-EMIT-FP-NEXT: addi fp, sp, 32
+; RV64I-EMIT-FP-NEXT: sw a0, -20(fp)
+; RV64I-EMIT-FP-NEXT: sw a1, -24(fp)
+; RV64I-EMIT-FP-NEXT: lw a0, -20(fp)
+; RV64I-EMIT-FP-NEXT: lw a1, -24(fp)
+; RV64I-EMIT-FP-NEXT: addw a0, a0, a1
+; RV64I-EMIT-FP-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; RV64I-EMIT-FP-NEXT: ld fp, 16(sp) # 8-byte Folded Reload
+; RV64I-EMIT-FP-NEXT: addi sp, sp, 32
+; RV64I-EMIT-FP-NEXT: ret
+;
+; RV32I-NUMERIC-LABEL: add:
+; RV32I-NUMERIC: # %bb.0:
+; RV32I-NUMERIC-NEXT: addi x2, x2, -16
+; RV32I-NUMERIC-NEXT: sw x1, 12(x2) # 4-byte Folded Spill
+; RV32I-NUMERIC-NEXT: sw x8, 8(x2) # 4-byte Folded Spill
+; RV32I-NUMERIC-NEXT: addi x8, x2, 16
+; RV32I-NUMERIC-NEXT: sw x10, -12(x8)
+; RV32I-NUMERIC-NEXT: sw x11, -16(x8)
+; RV32I-NUMERIC-NEXT: lw x10, -12(x8)
+; RV32I-NUMERIC-NEXT: lw x11, -16(x8)
+; RV32I-NUMERIC-NEXT: add x10, x10, x11
+; RV32I-NUMERIC-NEXT: lw x1, 12(x2) # 4-byte Folded Reload
+; RV32I-NUMERIC-NEXT: lw x8, 8(x2) # 4-byte Folded Reload
+; RV32I-NUMERIC-NEXT: addi x2, x2, 16
+; RV32I-NUMERIC-NEXT: ret
+;
+; RV64I-NUMERIC-LABEL: add:
+; RV64I-NUMERIC: # %bb.0:
+; RV64I-NUMERIC-NEXT: addi x2, x2, -32
+; RV64I-NUMERIC-NEXT: sd x1, 24(x2) # 8-byte Folded Spill
+; RV64I-NUMERIC-NEXT: sd x8, 16(x2) # 8-byte Folded Spill
+; RV64I-NUMERIC-NEXT: addi x8, x2, 32
+; RV64I-NUMERIC-NEXT: sw x10, -20(x8)
+; RV64I-NUMERIC-NEXT: sw x11, -24(x8)
+; RV64I-NUMERIC-NEXT: lw x10, -20(x8)
+; RV64I-NUMERIC-NEXT: lw x11, -24(x8)
+; RV64I-NUMERIC-NEXT: addw x10, x10, x11
+; RV64I-NUMERIC-NEXT: ld x1, 24(x2) # 8-byte Folded Reload
+; RV64I-NUMERIC-NEXT: ld x8, 16(x2) # 8-byte Folded Reload
+; RV64I-NUMERIC-NEXT: addi x2, x2, 32
+; RV64I-NUMERIC-NEXT: ret
+ %3 = alloca i32, align 4
+ %4 = alloca i32, align 4
+ store i32 %0, ptr %3, align 4
+ store i32 %1, ptr %4, align 4
+ %5 = load i32, ptr %3, align 4
+ %6 = load i32, ptr %4, align 4
+ %7 = add nsw i32 %5, %6
+ ret i32 %7
+}
+
+attributes #0 = { noinline nounwind optnone "frame-pointer"="all" }
diff --git a/llvm/test/CodeGen/X86/fake-use-remove-loads.mir b/llvm/test/CodeGen/X86/fake-use-remove-loads.mir
index 3f67f03c9a63..aa9839d2700a 100644
--- a/llvm/test/CodeGen/X86/fake-use-remove-loads.mir
+++ b/llvm/test/CodeGen/X86/fake-use-remove-loads.mir
@@ -3,6 +3,8 @@
# remove-loads-into-fake-uses pass, and that if the function does not use
# instruction referencing then no changes are made.
# RUN: llc %s -run-pass remove-loads-into-fake-uses -mtriple=x86_64-unknown-linux -debug-only=remove-loads-into-fake-uses 2>&1 -o - | FileCheck %s
+# RUN: llc %s -passes remove-loads-into-fake-uses -mtriple=x86_64-unknown-linux -debug-only=remove-loads-into-fake-uses 2>&1 -o - | FileCheck %s
+
# REQUIRES: asserts
#
## We verify that:
diff --git a/llvm/test/CodeGen/X86/pr134602.ll b/llvm/test/CodeGen/X86/pr134602.ll
new file mode 100644
index 000000000000..e4376cbeab10
--- /dev/null
+++ b/llvm/test/CodeGen/X86/pr134602.ll
@@ -0,0 +1,25 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=i686-- | FileCheck %s --check-prefix=X86
+; RUN: llc < %s -mtriple=x86_64-- | FileCheck %s --check-prefix=X64
+
+; FIXME: incorrect vector codegen due to bad handling of splats of binops containing undefs
+define i32 @PR134602(i16 %a0) {
+; X86-LABEL: PR134602:
+; X86: # %bb.0:
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: orl $1, %eax
+; X86-NEXT: addl $3, %eax
+; X86-NEXT: cwtl
+; X86-NEXT: retl
+;
+; X64-LABEL: PR134602:
+; X64: # %bb.0:
+; X64-NEXT: xorl %eax, %eax
+; X64-NEXT: retq
+ %splat= insertelement <4 x i16> zeroinitializer, i16 %a0, i64 0
+ %mul = mul <4 x i16> %splat, <i16 1, i16 1, i16 0, i16 0>
+ %or = or <4 x i16> splat (i16 1), %mul
+ %reduce = call i16 @llvm.vector.reduce.add.v4i16(<4 x i16> %or)
+ %ret_32 = sext i16 %reduce to i32
+ ret i32 %ret_32
+}
diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vop2_t16_promote.s b/llvm/test/MC/AMDGPU/gfx11_asm_vop2_t16_promote.s
index b289a4c9aa65..c6782a6c9643 100644
--- a/llvm/test/MC/AMDGPU/gfx11_asm_vop2_t16_promote.s
+++ b/llvm/test/MC/AMDGPU/gfx11_asm_vop2_t16_promote.s
@@ -1,4 +1,4 @@
-// NOTE: Assertions have been autogenerated by utils/update_mc_test_checks.py UTC_ARGS: --sort --version 5
+// NOTE: Assertions have been autogenerated by utils/update_mc_test_checks.py UTC_ARGS: --unique --sort --version 5
// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32,+real-true16 -show-encoding %s 2>&1 | FileCheck --check-prefix=GFX11 --implicit-check-not=_e32 %s
// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64,+real-true16 -show-encoding %s 2>&1 | FileCheck --check-prefix=GFX11 --implicit-check-not=_e32 %s
@@ -56,14 +56,23 @@ v_add_f16 v5.l, v255.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
v_add_f16 v5.l, v255.l, v2.l quad_perm:[3,2,1,0]
// GFX11: v_add_f16_e64_dpp v5.l, v255.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x32,0xd5,0xfa,0x04,0x02,0x00,0xff,0x1b,0x00,0xff]
-v_fmac_f16 v255, v1, v2
-// GFX11: v_fmac_f16_e64 v255, v1, v2 ; encoding: [0xff,0x00,0x36,0xd5,0x01,0x05,0x02,0x00]
+v_fmac_f16 v255.h, v1.h, v2.h
+// GFX11: v_fmac_f16_e64 v255.h, v1.h, v2.h op_sel:[1,1,1,1] ; encoding: [0xff,0x58,0x36,0xd5,0x01,0x05,0x02,0x00]
-v_fmac_f16 v5, v1, v255
-// GFX11: v_fmac_f16_e64 v5, v1, v255 ; encoding: [0x05,0x00,0x36,0xd5,0x01,0xff,0x03,0x00]
+v_fmac_f16 v255.l, v1.l, v2.l
+// GFX11: v_fmac_f16_e64 v255.l, v1.l, v2.l ; encoding: [0xff,0x00,0x36,0xd5,0x01,0x05,0x02,0x00]
-v_fmac_f16 v5, v255, v2
-// GFX11: v_fmac_f16_e64 v5, v255, v2 ; encoding: [0x05,0x00,0x36,0xd5,0xff,0x05,0x02,0x00]
+v_fmac_f16 v5.h, v1.h, v255.h
+// GFX11: v_fmac_f16_e64 v5.h, v1.h, v255.h op_sel:[1,1,1,1] ; encoding: [0x05,0x58,0x36,0xd5,0x01,0xff,0x03,0x00]
+
+v_fmac_f16 v5.h, v255.h, v2.h
+// GFX11: v_fmac_f16_e64 v5.h, v255.h, v2.h op_sel:[1,1,1,1] ; encoding: [0x05,0x58,0x36,0xd5,0xff,0x05,0x02,0x00]
+
+v_fmac_f16 v5.l, v1.l, v255.l
+// GFX11: v_fmac_f16_e64 v5.l, v1.l, v255.l ; encoding: [0x05,0x00,0x36,0xd5,0x01,0xff,0x03,0x00]
+
+v_fmac_f16 v5.l, v255.l, v2.l
+// GFX11: v_fmac_f16_e64 v5.l, v255.l, v2.l ; encoding: [0x05,0x00,0x36,0xd5,0xff,0x05,0x02,0x00]
v_ldexp_f16 v255.h, v1.h, v2.h
// GFX11: v_ldexp_f16_e64 v255.h, v1.h, v2.h op_sel:[1,1,1] ; encoding: [0xff,0x58,0x3b,0xd5,0x01,0x05,0x02,0x00]
diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp16_from_vop2.s b/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp16_from_vop2.s
index 15d6547a0477..cb05bc403778 100644
--- a/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp16_from_vop2.s
+++ b/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp16_from_vop2.s
@@ -524,47 +524,59 @@ v_cvt_pkrtz_f16_f32_e64_dpp v5, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3
v_cvt_pkrtz_f16_f32_e64_dpp v255, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
// GFX11: v_cvt_pk_rtz_f16_f32_e64_dpp v255, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x83,0x2f,0xd5,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30]
-v_fmac_f16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0]
-// GFX11: v_fmac_f16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x36,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+v_fmac_f16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0]
+// GFX11: v_fmac_f16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x36,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
-v_fmac_f16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3]
-// GFX11: v_fmac_f16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x36,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+v_fmac_f16_e64_dpp v5.l, v1.l, v2.l quad_perm:[0,1,2,3]
+// GFX11: v_fmac_f16_e64_dpp v5.l, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x36,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
-v_fmac_f16_e64_dpp v5, v1, v2 row_mirror
-// GFX11: v_fmac_f16_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x36,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+v_fmac_f16_e64_dpp v5.l, v1.l, v2.l row_mirror
+// GFX11: v_fmac_f16_e64_dpp v5.l, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x36,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
-v_fmac_f16_e64_dpp v5, v1, v2 row_half_mirror
-// GFX11: v_fmac_f16_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x36,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+v_fmac_f16_e64_dpp v5.l, v1.l, v2.l row_half_mirror
+// GFX11: v_fmac_f16_e64_dpp v5.l, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x36,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
-v_fmac_f16_e64_dpp v5, v1, v2 row_shl:1
-// GFX11: v_fmac_f16_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x36,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+v_fmac_f16_e64_dpp v5.l, v1.l, v2.l row_shl:1
+// GFX11: v_fmac_f16_e64_dpp v5.l, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x36,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
-v_fmac_f16_e64_dpp v5, v1, v2 row_shl:15
-// GFX11: v_fmac_f16_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x36,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+v_fmac_f16_e64_dpp v5.l, v1.l, v2.l row_shl:15
+// GFX11: v_fmac_f16_e64_dpp v5.l, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x36,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
-v_fmac_f16_e64_dpp v5, v1, v2 row_shr:1
-// GFX11: v_fmac_f16_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x36,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+v_fmac_f16_e64_dpp v5.l, v1.l, v2.l row_shr:1
+// GFX11: v_fmac_f16_e64_dpp v5.l, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x36,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
-v_fmac_f16_e64_dpp v5, v1, v2 row_shr:15
-// GFX11: v_fmac_f16_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x36,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+v_fmac_f16_e64_dpp v5.l, v1.l, v2.l row_shr:15
+// GFX11: v_fmac_f16_e64_dpp v5.l, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x36,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
-v_fmac_f16_e64_dpp v5, v1, v2 row_ror:1
-// GFX11: v_fmac_f16_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x36,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+v_fmac_f16_e64_dpp v5.l, v1.l, v2.l row_ror:1
+// GFX11: v_fmac_f16_e64_dpp v5.l, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x36,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
-v_fmac_f16_e64_dpp v5, v1, v2 row_ror:15
-// GFX11: v_fmac_f16_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x36,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+v_fmac_f16_e64_dpp v5.l, v1.l, v2.l row_ror:15
+// GFX11: v_fmac_f16_e64_dpp v5.l, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x36,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
-v_fmac_f16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
-// GFX11: v_fmac_f16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x36,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+v_fmac_f16_e64_dpp v5.l, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf
+// GFX11: v_fmac_f16_e64_dpp v5.l, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x36,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
-v_fmac_f16_e64_dpp v5, |v1|, -v2 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1
-// GFX11: v_fmac_f16_e64_dpp v5, |v1|, -v2 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x01,0x36,0xd5,0xfa,0x04,0x02,0x48,0x01,0x5f,0x01,0x01]
+v_fmac_f16_e64_dpp v5.l, |v1.l|, -v2.l mul:2 row_share:15 row_mask:0x0 bank_mask:0x1
+// GFX11: v_fmac_f16_e64_dpp v5.l, |v1.l|, -v2.l mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x01,0x36,0xd5,0xfa,0x04,0x02,0x48,0x01,0x5f,0x01,0x01]
-v_fmac_f16_e64_dpp v5, -v1, |v2| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
-// GFX11: v_fmac_f16_e64_dpp v5, -v1, |v2| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x02,0x36,0xd5,0xfa,0x04,0x02,0x30,0x01,0x60,0x09,0x13]
+v_fmac_f16_e64_dpp v5.l, -v1.l, |v2.l| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// GFX11: v_fmac_f16_e64_dpp v5.l, -v1.l, |v2.l| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x02,0x36,0xd5,0xfa,0x04,0x02,0x30,0x01,0x60,0x09,0x13]
-v_fmac_f16_e64_dpp v255, -|v255|, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
-// GFX11: v_fmac_f16_e64_dpp v255, -|v255|, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x83,0x36,0xd5,0xfa,0xfe,0x03,0x78,0xff,0x6f,0x05,0x30]
+v_fmac_f16_e64_dpp v255.l, -|v255.l|, -|v255.l| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX11: v_fmac_f16_e64_dpp v255.l, -|v255.l|, -|v255.l| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x83,0x36,0xd5,0xfa,0xfe,0x03,0x78,0xff,0x6f,0x05,0x30]
+
+v_fmac_f16_e64_dpp v5.h, v1.h, v2.h row_share:0 row_mask:0xf bank_mask:0xf
+// GFX11: v_fmac_f16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x36,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+
+v_fmac_f16_e64_dpp v5.l, |v1.h|, -v2.l mul:2 row_share:15 row_mask:0x0 bank_mask:0x1
+// GFX11: v_fmac_f16_e64_dpp v5.l, |v1.h|, -v2.l op_sel:[1,0,0,0] mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x09,0x36,0xd5,0xfa,0x04,0x02,0x48,0x01,0x5f,0x01,0x01]
+
+v_fmac_f16_e64_dpp v5.l, -v1.l, |v2.h| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// GFX11: v_fmac_f16_e64_dpp v5.l, -v1.l, |v2.h| op_sel:[0,1,0,0] mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x12,0x36,0xd5,0xfa,0x04,0x02,0x30,0x01,0x60,0x09,0x13]
+
+v_fmac_f16_e64_dpp v255.h, -|v255.l|, -|v255.l| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX11: v_fmac_f16_e64_dpp v255.h, -|v255.l|, -|v255.l| op_sel:[0,0,1,1] clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0xc3,0x36,0xd5,0xfa,0xfe,0x03,0x78,0xff,0x6f,0x05,0x30]
v_fmac_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0]
// GFX11: v_fmac_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp8_from_vop2.s b/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp8_from_vop2.s
index 2f52b7f467e0..35bbca0b6d0b 100644
--- a/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp8_from_vop2.s
+++ b/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp8_from_vop2.s
@@ -169,17 +169,29 @@ v_cvt_pkrtz_f16_f32_e64_dpp v5, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1
v_cvt_pkrtz_f16_f32_e64_dpp v255, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0
// GFX11: v_cvt_pk_rtz_f16_f32_e64_dpp v255, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x83,0x2f,0xd5,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
-v_fmac_f16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_fmac_f16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x36,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+v_fmac_f16_e64_dpp v5.l, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_fmac_f16_e64_dpp v5.l, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x36,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
-v_fmac_f16_e64_dpp v5, |v1|, -v2 mul:2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_fmac_f16_e64_dpp v5, |v1|, -v2 mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x36,0xd5,0xe9,0x04,0x02,0x48,0x01,0x77,0x39,0x05]
+v_fmac_f16_e64_dpp v5.l, |v1.l|, -v2.l mul:2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_fmac_f16_e64_dpp v5.l, |v1.l|, -v2.l mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x36,0xd5,0xe9,0x04,0x02,0x48,0x01,0x77,0x39,0x05]
-v_fmac_f16_e64_dpp v5, -v1, |v2| mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1
-// GFX11: v_fmac_f16_e64_dpp v5, -v1, |v2| mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x05,0x02,0x36,0xd5,0xea,0x04,0x02,0x30,0x01,0x77,0x39,0x05]
+v_fmac_f16_e64_dpp v5.l, -v1.l, |v2.l| mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX11: v_fmac_f16_e64_dpp v5.l, -v1.l, |v2.l| mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x05,0x02,0x36,0xd5,0xea,0x04,0x02,0x30,0x01,0x77,0x39,0x05]
-v_fmac_f16_e64_dpp v255, -|v255|, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0
-// GFX11: v_fmac_f16_e64_dpp v255, -|v255|, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x83,0x36,0xd5,0xe9,0xfe,0x03,0x78,0xff,0x00,0x00,0x00]
+v_fmac_f16_e64_dpp v255.l, -|v255.l|, -|v255.l| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX11: v_fmac_f16_e64_dpp v255.l, -|v255.l|, -|v255.l| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x83,0x36,0xd5,0xe9,0xfe,0x03,0x78,0xff,0x00,0x00,0x00]
+
+v_fmac_f16_e64_dpp v5.h, v1.h, v2.h dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_fmac_f16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x36,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+
+v_fmac_f16_e64_dpp v5.l, |v1.h|, -v2.l mul:2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_fmac_f16_e64_dpp v5.l, |v1.h|, -v2.l op_sel:[1,0,0,0] mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x09,0x36,0xd5,0xe9,0x04,0x02,0x48,0x01,0x77,0x39,0x05]
+
+v_fmac_f16_e64_dpp v5.l, -v1.l, |v2.h| mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX11: v_fmac_f16_e64_dpp v5.l, -v1.l, |v2.h| op_sel:[0,1,0,0] mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x05,0x12,0x36,0xd5,0xea,0x04,0x02,0x30,0x01,0x77,0x39,0x05]
+
+v_fmac_f16_e64_dpp v255.h, -|v255.l|, -|v255.l| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX11: v_fmac_f16_e64_dpp v255.h, -|v255.l|, -|v255.l| op_sel:[0,0,1,1] clamp div:2 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0xc3,0x36,0xd5,0xe9,0xfe,0x03,0x78,0xff,0x00,0x00,0x00]
v_fmac_f32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
// GFX11: v_fmac_f32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x2b,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vop3_from_vop2.s b/llvm/test/MC/AMDGPU/gfx11_asm_vop3_from_vop2.s
index 5debd064812c..4f410f611c8e 100644
--- a/llvm/test/MC/AMDGPU/gfx11_asm_vop3_from_vop2.s
+++ b/llvm/test/MC/AMDGPU/gfx11_asm_vop3_from_vop2.s
@@ -587,50 +587,59 @@ v_fmac_dx9_zero_f32_e64 v5, -src_scc, |vcc_lo| mul:4
v_fmac_dx9_zero_f32_e64 v255, -|0xaf123456|, -|vcc_hi| clamp div:2
// GFX11: v_fmac_dx9_zero_f32_e64 v255, -|0xaf123456|, -|vcc_hi| clamp div:2 ; encoding: [0xff,0x83,0x06,0xd5,0xff,0xd6,0x00,0x78,0x56,0x34,0x12,0xaf]
-v_fmac_f16_e64 v5, v1, v2
-// GFX11: v_fmac_f16_e64 v5, v1, v2 ; encoding: [0x05,0x00,0x36,0xd5,0x01,0x05,0x02,0x00]
+v_fmac_f16_e64 v5.l, v1.l, v2.l
+// GFX11: v_fmac_f16_e64 v5.l, v1.l, v2.l ; encoding: [0x05,0x00,0x36,0xd5,0x01,0x05,0x02,0x00]
-v_fmac_f16_e64 v5, v255, v255
-// GFX11: v_fmac_f16_e64 v5, v255, v255 ; encoding: [0x05,0x00,0x36,0xd5,0xff,0xff,0x03,0x00]
+v_fmac_f16_e64 v5.l, v255.l, v255.l
+// GFX11: v_fmac_f16_e64 v5.l, v255.l, v255.l ; encoding: [0x05,0x00,0x36,0xd5,0xff,0xff,0x03,0x00]
-v_fmac_f16_e64 v5, s1, s2
-// GFX11: v_fmac_f16_e64 v5, s1, s2 ; encoding: [0x05,0x00,0x36,0xd5,0x01,0x04,0x00,0x00]
+v_fmac_f16_e64 v5.l, s1, s2
+// GFX11: v_fmac_f16_e64 v5.l, s1, s2 ; encoding: [0x05,0x00,0x36,0xd5,0x01,0x04,0x00,0x00]
-v_fmac_f16_e64 v5, s105, s105
-// GFX11: v_fmac_f16_e64 v5, s105, s105 ; encoding: [0x05,0x00,0x36,0xd5,0x69,0xd2,0x00,0x00]
+v_fmac_f16_e64 v5.l, s105, s105
+// GFX11: v_fmac_f16_e64 v5.l, s105, s105 ; encoding: [0x05,0x00,0x36,0xd5,0x69,0xd2,0x00,0x00]
-v_fmac_f16_e64 v5, vcc_lo, ttmp15
-// GFX11: v_fmac_f16_e64 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x36,0xd5,0x6a,0xf6,0x00,0x00]
+v_fmac_f16_e64 v5.l, vcc_lo, ttmp15
+// GFX11: v_fmac_f16_e64 v5.l, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x36,0xd5,0x6a,0xf6,0x00,0x00]
-v_fmac_f16_e64 v5, vcc_hi, 0xfe0b
-// GFX11: v_fmac_f16_e64 v5, vcc_hi, 0xfe0b ; encoding: [0x05,0x00,0x36,0xd5,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00]
+v_fmac_f16_e64 v5.l, vcc_hi, 0xfe0b
+// GFX11: v_fmac_f16_e64 v5.l, vcc_hi, 0xfe0b ; encoding: [0x05,0x00,0x36,0xd5,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00]
-v_fmac_f16_e64 v5, ttmp15, src_scc
-// GFX11: v_fmac_f16_e64 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x36,0xd5,0x7b,0xfa,0x01,0x00]
+v_fmac_f16_e64 v5.l, ttmp15, src_scc
+// GFX11: v_fmac_f16_e64 v5.l, ttmp15, src_scc ; encoding: [0x05,0x00,0x36,0xd5,0x7b,0xfa,0x01,0x00]
-v_fmac_f16_e64 v5, m0, 0.5
-// GFX11: v_fmac_f16_e64 v5, m0, 0.5 ; encoding: [0x05,0x00,0x36,0xd5,0x7d,0xe0,0x01,0x00]
+v_fmac_f16_e64 v5.l, m0, 0.5
+// GFX11: v_fmac_f16_e64 v5.l, m0, 0.5 ; encoding: [0x05,0x00,0x36,0xd5,0x7d,0xe0,0x01,0x00]
-v_fmac_f16_e64 v5, exec_lo, -1
-// GFX11: v_fmac_f16_e64 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x36,0xd5,0x7e,0x82,0x01,0x00]
+v_fmac_f16_e64 v5.l, exec_lo, -1
+// GFX11: v_fmac_f16_e64 v5.l, exec_lo, -1 ; encoding: [0x05,0x00,0x36,0xd5,0x7e,0x82,0x01,0x00]
-v_fmac_f16_e64 v5, |exec_hi|, null
-// GFX11: v_fmac_f16_e64 v5, |exec_hi|, null ; encoding: [0x05,0x01,0x36,0xd5,0x7f,0xf8,0x00,0x00]
+v_fmac_f16_e64 v5.l, |exec_hi|, null
+// GFX11: v_fmac_f16_e64 v5.l, |exec_hi|, null ; encoding: [0x05,0x01,0x36,0xd5,0x7f,0xf8,0x00,0x00]
-v_fmac_f16_e64 v5, null, exec_lo
-// GFX11: v_fmac_f16_e64 v5, null, exec_lo ; encoding: [0x05,0x00,0x36,0xd5,0x7c,0xfc,0x00,0x00]
+v_fmac_f16_e64 v5.l, null, exec_lo
+// GFX11: v_fmac_f16_e64 v5.l, null, exec_lo ; encoding: [0x05,0x00,0x36,0xd5,0x7c,0xfc,0x00,0x00]
-v_fmac_f16_e64 v5, -1, exec_hi
-// GFX11: v_fmac_f16_e64 v5, -1, exec_hi ; encoding: [0x05,0x00,0x36,0xd5,0xc1,0xfe,0x00,0x00]
+v_fmac_f16_e64 v5.l, -1, exec_hi
+// GFX11: v_fmac_f16_e64 v5.l, -1, exec_hi ; encoding: [0x05,0x00,0x36,0xd5,0xc1,0xfe,0x00,0x00]
-v_fmac_f16_e64 v5, 0.5, -m0 mul:2
-// GFX11: v_fmac_f16_e64 v5, 0.5, -m0 mul:2 ; encoding: [0x05,0x00,0x36,0xd5,0xf0,0xfa,0x00,0x48]
+v_fmac_f16_e64 v5.l, 0.5, -m0 mul:2
+// GFX11: v_fmac_f16_e64 v5.l, 0.5, -m0 mul:2 ; encoding: [0x05,0x00,0x36,0xd5,0xf0,0xfa,0x00,0x48]
-v_fmac_f16_e64 v5, -src_scc, |vcc_lo| mul:4
-// GFX11: v_fmac_f16_e64 v5, -src_scc, |vcc_lo| mul:4 ; encoding: [0x05,0x02,0x36,0xd5,0xfd,0xd4,0x00,0x30]
+v_fmac_f16_e64 v5.l, -src_scc, |vcc_lo| mul:4
+// GFX11: v_fmac_f16_e64 v5.l, -src_scc, |vcc_lo| mul:4 ; encoding: [0x05,0x02,0x36,0xd5,0xfd,0xd4,0x00,0x30]
-v_fmac_f16_e64 v255, -|0xfe0b|, -|vcc_hi| clamp div:2
-// GFX11: v_fmac_f16_e64 v255, -|0xfe0b|, -|vcc_hi| clamp div:2 ; encoding: [0xff,0x83,0x36,0xd5,0xff,0xd6,0x00,0x78,0x0b,0xfe,0x00,0x00]
+v_fmac_f16_e64 v255.l, -|0xfe0b|, -|vcc_hi| clamp div:2
+// GFX11: v_fmac_f16_e64 v255.l, -|0xfe0b|, -|vcc_hi| clamp div:2 ; encoding: [0xff,0x83,0x36,0xd5,0xff,0xd6,0x00,0x78,0x0b,0xfe,0x00,0x00]
+
+v_fmac_f16_e64 v5.l, v1.h, v2.l
+// GFX11: v_fmac_f16_e64 v5.l, v1.h, v2.l op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x36,0xd5,0x01,0x05,0x02,0x00]
+
+v_fmac_f16_e64 v5.l, v255.l, v255.h
+// GFX11: v_fmac_f16_e64 v5.l, v255.l, v255.h op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x36,0xd5,0xff,0xff,0x03,0x00]
+
+v_fmac_f16_e64 v255.h, -|0xfe0b|, -|vcc_hi| clamp div:2
+// GFX11: v_fmac_f16_e64 v255.h, -|0xfe0b|, -|vcc_hi| op_sel:[0,0,1,1] clamp div:2 ; encoding: [0xff,0xc3,0x36,0xd5,0xff,0xd6,0x00,0x78,0x0b,0xfe,0x00,0x00]
v_fmac_f32_e64 v5, v1, v2
// GFX11: v_fmac_f32_e64 v5, v1, v2 ; encoding: [0x05,0x00,0x2b,0xd5,0x01,0x05,0x02,0x00]
diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_vop2_t16_promote.s b/llvm/test/MC/AMDGPU/gfx12_asm_vop2_t16_promote.s
index e1286dadfb6e..296c42f4c2c6 100644
--- a/llvm/test/MC/AMDGPU/gfx12_asm_vop2_t16_promote.s
+++ b/llvm/test/MC/AMDGPU/gfx12_asm_vop2_t16_promote.s
@@ -55,14 +55,23 @@ v_add_f16 v5.l, v255.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
v_add_f16 v5.l, v255.l, v2.l quad_perm:[3,2,1,0]
// GFX12: v_add_f16_e64_dpp v5.l, v255.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x32,0xd5,0xfa,0x04,0x02,0x00,0xff,0x1b,0x00,0xff]
-v_fmac_f16 v255, v1, v2
-// GFX12: v_fmac_f16_e64 v255, v1, v2 ; encoding: [0xff,0x00,0x36,0xd5,0x01,0x05,0x02,0x00]
+v_fmac_f16 v255.h, v1.h, v2.h
+// GFX12: v_fmac_f16_e64 v255.h, v1.h, v2.h op_sel:[1,1,1,1] ; encoding: [0xff,0x58,0x36,0xd5,0x01,0x05,0x02,0x00]
-v_fmac_f16 v5, v1, v255
-// GFX12: v_fmac_f16_e64 v5, v1, v255 ; encoding: [0x05,0x00,0x36,0xd5,0x01,0xff,0x03,0x00]
+v_fmac_f16 v255.l, v1.l, v2.l
+// GFX12: v_fmac_f16_e64 v255.l, v1.l, v2.l ; encoding: [0xff,0x00,0x36,0xd5,0x01,0x05,0x02,0x00]
-v_fmac_f16 v5, v255, v2
-// GFX12: v_fmac_f16_e64 v5, v255, v2 ; encoding: [0x05,0x00,0x36,0xd5,0xff,0x05,0x02,0x00]
+v_fmac_f16 v5.h, v1.h, v255.h
+// GFX12: v_fmac_f16_e64 v5.h, v1.h, v255.h op_sel:[1,1,1,1] ; encoding: [0x05,0x58,0x36,0xd5,0x01,0xff,0x03,0x00]
+
+v_fmac_f16 v5.h, v255.h, v2.h
+// GFX12: v_fmac_f16_e64 v5.h, v255.h, v2.h op_sel:[1,1,1,1] ; encoding: [0x05,0x58,0x36,0xd5,0xff,0x05,0x02,0x00]
+
+v_fmac_f16 v5.l, v1.l, v255.l
+// GFX12: v_fmac_f16_e64 v5.l, v1.l, v255.l ; encoding: [0x05,0x00,0x36,0xd5,0x01,0xff,0x03,0x00]
+
+v_fmac_f16 v5.l, v255.l, v2.l
+// GFX12: v_fmac_f16_e64 v5.l, v255.l, v2.l ; encoding: [0x05,0x00,0x36,0xd5,0xff,0x05,0x02,0x00]
v_ldexp_f16 v255.h, v1.h, v2.h
// GFX12: v_ldexp_f16_e64 v255.h, v1.h, v2.h op_sel:[1,1,1] ; encoding: [0xff,0x58,0x3b,0xd5,0x01,0x05,0x02,0x00]
diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_vop3_from_vop2.s b/llvm/test/MC/AMDGPU/gfx12_asm_vop3_from_vop2.s
index 7674eafd9c30..f16838919a4a 100644
--- a/llvm/test/MC/AMDGPU/gfx12_asm_vop3_from_vop2.s
+++ b/llvm/test/MC/AMDGPU/gfx12_asm_vop3_from_vop2.s
@@ -578,50 +578,59 @@ v_cvt_pkrtz_f16_f32_e64 v5, -src_scc, |vcc_lo|
v_cvt_pkrtz_f16_f32_e64 v255, -|0xaf123456|, -|vcc_hi| clamp
// GFX12: v_cvt_pk_rtz_f16_f32_e64 v255, -|0xaf123456|, -|vcc_hi| clamp ; encoding: [0xff,0x83,0x2f,0xd5,0xff,0xd6,0x00,0x60,0x56,0x34,0x12,0xaf]
-v_fmac_f16_e64 v5, v1, v2
-// GFX12: v_fmac_f16_e64 v5, v1, v2 ; encoding: [0x05,0x00,0x36,0xd5,0x01,0x05,0x02,0x00]
+v_fmac_f16_e64 v5.l, v1.l, v2.l
+// GFX12: v_fmac_f16_e64 v5.l, v1.l, v2.l ; encoding: [0x05,0x00,0x36,0xd5,0x01,0x05,0x02,0x00]
-v_fmac_f16_e64 v5, v255, v255
-// GFX12: v_fmac_f16_e64 v5, v255, v255 ; encoding: [0x05,0x00,0x36,0xd5,0xff,0xff,0x03,0x00]
+v_fmac_f16_e64 v5.l, v255.l, v255.l
+// GFX12: v_fmac_f16_e64 v5.l, v255.l, v255.l ; encoding: [0x05,0x00,0x36,0xd5,0xff,0xff,0x03,0x00]
-v_fmac_f16_e64 v5, s1, s2
-// GFX12: v_fmac_f16_e64 v5, s1, s2 ; encoding: [0x05,0x00,0x36,0xd5,0x01,0x04,0x00,0x00]
+v_fmac_f16_e64 v5.l, s1, s2
+// GFX12: v_fmac_f16_e64 v5.l, s1, s2 ; encoding: [0x05,0x00,0x36,0xd5,0x01,0x04,0x00,0x00]
-v_fmac_f16_e64 v5, s105, s105
-// GFX12: v_fmac_f16_e64 v5, s105, s105 ; encoding: [0x05,0x00,0x36,0xd5,0x69,0xd2,0x00,0x00]
+v_fmac_f16_e64 v5.l, s105, s105
+// GFX12: v_fmac_f16_e64 v5.l, s105, s105 ; encoding: [0x05,0x00,0x36,0xd5,0x69,0xd2,0x00,0x00]
-v_fmac_f16_e64 v5, vcc_lo, ttmp15
-// GFX12: v_fmac_f16_e64 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x36,0xd5,0x6a,0xf6,0x00,0x00]
+v_fmac_f16_e64 v5.l, vcc_lo, ttmp15
+// GFX12: v_fmac_f16_e64 v5.l, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x36,0xd5,0x6a,0xf6,0x00,0x00]
-v_fmac_f16_e64 v5, vcc_hi, 0xfe0b
-// GFX12: v_fmac_f16_e64 v5, vcc_hi, 0xfe0b ; encoding: [0x05,0x00,0x36,0xd5,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00]
+v_fmac_f16_e64 v5.l, vcc_hi, 0xfe0b
+// GFX12: v_fmac_f16_e64 v5.l, vcc_hi, 0xfe0b ; encoding: [0x05,0x00,0x36,0xd5,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00]
-v_fmac_f16_e64 v5, ttmp15, src_scc
-// GFX12: v_fmac_f16_e64 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x36,0xd5,0x7b,0xfa,0x01,0x00]
+v_fmac_f16_e64 v5.l, ttmp15, src_scc
+// GFX12: v_fmac_f16_e64 v5.l, ttmp15, src_scc ; encoding: [0x05,0x00,0x36,0xd5,0x7b,0xfa,0x01,0x00]
-v_fmac_f16_e64 v5, m0, 0.5
-// GFX12: v_fmac_f16_e64 v5, m0, 0.5 ; encoding: [0x05,0x00,0x36,0xd5,0x7d,0xe0,0x01,0x00]
+v_fmac_f16_e64 v5.l, m0, 0.5
+// GFX12: v_fmac_f16_e64 v5.l, m0, 0.5 ; encoding: [0x05,0x00,0x36,0xd5,0x7d,0xe0,0x01,0x00]
-v_fmac_f16_e64 v5, exec_lo, -1
-// GFX12: v_fmac_f16_e64 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x36,0xd5,0x7e,0x82,0x01,0x00]
+v_fmac_f16_e64 v5.l, exec_lo, -1
+// GFX12: v_fmac_f16_e64 v5.l, exec_lo, -1 ; encoding: [0x05,0x00,0x36,0xd5,0x7e,0x82,0x01,0x00]
-v_fmac_f16_e64 v5, |exec_hi|, null
-// GFX12: v_fmac_f16_e64 v5, |exec_hi|, null ; encoding: [0x05,0x01,0x36,0xd5,0x7f,0xf8,0x00,0x00]
+v_fmac_f16_e64 v5.l, |exec_hi|, null
+// GFX12: v_fmac_f16_e64 v5.l, |exec_hi|, null ; encoding: [0x05,0x01,0x36,0xd5,0x7f,0xf8,0x00,0x00]
-v_fmac_f16_e64 v5, null, exec_lo
-// GFX12: v_fmac_f16_e64 v5, null, exec_lo ; encoding: [0x05,0x00,0x36,0xd5,0x7c,0xfc,0x00,0x00]
+v_fmac_f16_e64 v5.l, null, exec_lo
+// GFX12: v_fmac_f16_e64 v5.l, null, exec_lo ; encoding: [0x05,0x00,0x36,0xd5,0x7c,0xfc,0x00,0x00]
-v_fmac_f16_e64 v5, -1, exec_hi
-// GFX12: v_fmac_f16_e64 v5, -1, exec_hi ; encoding: [0x05,0x00,0x36,0xd5,0xc1,0xfe,0x00,0x00]
+v_fmac_f16_e64 v5.l, -1, exec_hi
+// GFX12: v_fmac_f16_e64 v5.l, -1, exec_hi ; encoding: [0x05,0x00,0x36,0xd5,0xc1,0xfe,0x00,0x00]
-v_fmac_f16_e64 v5, 0.5, -m0 mul:2
-// GFX12: v_fmac_f16_e64 v5, 0.5, -m0 mul:2 ; encoding: [0x05,0x00,0x36,0xd5,0xf0,0xfa,0x00,0x48]
+v_fmac_f16_e64 v5.l, 0.5, -m0 mul:2
+// GFX12: v_fmac_f16_e64 v5.l, 0.5, -m0 mul:2 ; encoding: [0x05,0x00,0x36,0xd5,0xf0,0xfa,0x00,0x48]
-v_fmac_f16_e64 v5, -src_scc, |vcc_lo| mul:4
-// GFX12: v_fmac_f16_e64 v5, -src_scc, |vcc_lo| mul:4 ; encoding: [0x05,0x02,0x36,0xd5,0xfd,0xd4,0x00,0x30]
+v_fmac_f16_e64 v5.l, -src_scc, |vcc_lo| mul:4
+// GFX12: v_fmac_f16_e64 v5.l, -src_scc, |vcc_lo| mul:4 ; encoding: [0x05,0x02,0x36,0xd5,0xfd,0xd4,0x00,0x30]
-v_fmac_f16_e64 v255, -|0xfe0b|, -|vcc_hi| clamp div:2
-// GFX12: v_fmac_f16_e64 v255, -|0xfe0b|, -|vcc_hi| clamp div:2 ; encoding: [0xff,0x83,0x36,0xd5,0xff,0xd6,0x00,0x78,0x0b,0xfe,0x00,0x00]
+v_fmac_f16_e64 v255.l, -|0xfe0b|, -|vcc_hi| clamp div:2
+// GFX12: v_fmac_f16_e64 v255.l, -|0xfe0b|, -|vcc_hi| clamp div:2 ; encoding: [0xff,0x83,0x36,0xd5,0xff,0xd6,0x00,0x78,0x0b,0xfe,0x00,0x00]
+
+v_fmac_f16_e64 v5.l, v1.h, v2.l
+// GFX12: v_fmac_f16_e64 v5.l, v1.h, v2.l op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x36,0xd5,0x01,0x05,0x02,0x00]
+
+v_fmac_f16_e64 v5.l, v255.l, v255.h
+// GFX12: v_fmac_f16_e64 v5.l, v255.l, v255.h op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x36,0xd5,0xff,0xff,0x03,0x00]
+
+v_fmac_f16_e64 v255.h, -|0xfe0b|, -|vcc_hi| clamp div:2
+// GFX12: v_fmac_f16_e64 v255.h, -|0xfe0b|, -|vcc_hi| op_sel:[0,0,1,1] clamp div:2 ; encoding: [0xff,0xc3,0x36,0xd5,0xff,0xd6,0x00,0x78,0x0b,0xfe,0x00,0x00]
v_fmac_f32_e64 v5, v1, v2
// GFX12: v_fmac_f32_e64 v5, v1, v2 ; encoding: [0x05,0x00,0x2b,0xd5,0x01,0x05,0x02,0x00]
diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_vop3_from_vop2_dpp16.s b/llvm/test/MC/AMDGPU/gfx12_asm_vop3_from_vop2_dpp16.s
index e27706ec02ea..301f756e906a 100644
--- a/llvm/test/MC/AMDGPU/gfx12_asm_vop3_from_vop2_dpp16.s
+++ b/llvm/test/MC/AMDGPU/gfx12_asm_vop3_from_vop2_dpp16.s
@@ -566,6 +566,48 @@ v_cvt_pkrtz_f16_f32_e64_dpp v5, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3
v_cvt_pkrtz_f16_f32_e64_dpp v255, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
// GFX12: v_cvt_pk_rtz_f16_f32_e64_dpp v255, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x83,0x2f,0xd5,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30]
+v_fmac_f16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0]
+// GFX12: v_fmac_f16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x36,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+
+v_fmac_f16_e64_dpp v5.l, v1.l, v2.l quad_perm:[0,1,2,3]
+// GFX12: v_fmac_f16_e64_dpp v5.l, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x36,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+
+v_fmac_f16_e64_dpp v5.l, v1.l, v2.l row_mirror
+// GFX12: v_fmac_f16_e64_dpp v5.l, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x36,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+
+v_fmac_f16_e64_dpp v5.l, v1.l, v2.l row_half_mirror
+// GFX12: v_fmac_f16_e64_dpp v5.l, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x36,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+
+v_fmac_f16_e64_dpp v5.l, v1.l, v2.l row_shl:1
+// GFX12: v_fmac_f16_e64_dpp v5.l, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x36,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+
+v_fmac_f16_e64_dpp v5.l, v1.l, v2.l row_shl:15
+// GFX12: v_fmac_f16_e64_dpp v5.l, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x36,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+
+v_fmac_f16_e64_dpp v5.l, v1.l, v2.l row_shr:1
+// GFX12: v_fmac_f16_e64_dpp v5.l, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x36,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+
+v_fmac_f16_e64_dpp v5.l, v1.l, v2.l row_shr:15
+// GFX12: v_fmac_f16_e64_dpp v5.l, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x36,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+
+v_fmac_f16_e64_dpp v5.l, v1.l, v2.l row_ror:1
+// GFX12: v_fmac_f16_e64_dpp v5.l, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x36,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+
+v_fmac_f16_e64_dpp v5.l, v1.l, v2.l row_ror:15
+// GFX12: v_fmac_f16_e64_dpp v5.l, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x36,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+
+v_fmac_f16_e64_dpp v5.h, v1.h, v2.h row_share:0 row_mask:0xf bank_mask:0xf
+// GFX12: v_fmac_f16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x36,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+
+v_fmac_f16_e64_dpp v5.l, |v1.h|, -v2.l mul:2 row_share:15 row_mask:0x0 bank_mask:0x1
+// GFX12: v_fmac_f16_e64_dpp v5.l, |v1.h|, -v2.l op_sel:[1,0,0,0] mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x09,0x36,0xd5,0xfa,0x04,0x02,0x48,0x01,0x5f,0x01,0x01]
+
+v_fmac_f16_e64_dpp v5.l, -v1.l, |v2.h| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// GFX12: v_fmac_f16_e64_dpp v5.l, -v1.l, |v2.h| op_sel:[0,1,0,0] mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x12,0x36,0xd5,0xfa,0x04,0x02,0x30,0x01,0x60,0x09,0x13]
+
+v_fmac_f16_e64_dpp v255.h, -|v255.l|, -|v255.l| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX12: v_fmac_f16_e64_dpp v255.h, -|v255.l|, -|v255.l| op_sel:[0,0,1,1] clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0xc3,0x36,0xd5,0xfa,0xfe,0x03,0x78,0xff,0x6f,0x05,0x30]
+
v_ldexp_f16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0]
// GFX12: v_ldexp_f16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_vop3_from_vop2_dpp8.s b/llvm/test/MC/AMDGPU/gfx12_asm_vop3_from_vop2_dpp8.s
index 49233697e955..7390720e4dd5 100644
--- a/llvm/test/MC/AMDGPU/gfx12_asm_vop3_from_vop2_dpp8.s
+++ b/llvm/test/MC/AMDGPU/gfx12_asm_vop3_from_vop2_dpp8.s
@@ -239,6 +239,21 @@ v_cvt_pkrtz_f16_f32_e64_dpp v5, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1
v_cvt_pkrtz_f16_f32_e64_dpp v255, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0
// GFX12: v_cvt_pk_rtz_f16_f32_e64_dpp v255, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x83,0x2f,0xd5,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+v_fmac_f16_e64_dpp v5.l, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: v_fmac_f16_e64_dpp v5.l, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x36,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+
+v_fmac_f16_e64_dpp v5.h, v1.h, v2.h dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: v_fmac_f16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x58,0x36,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+
+v_fmac_f16_e64_dpp v5.l, |v1.h|, -v2.l mul:2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: v_fmac_f16_e64_dpp v5.l, |v1.h|, -v2.l op_sel:[1,0,0,0] mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x09,0x36,0xd5,0xe9,0x04,0x02,0x48,0x01,0x77,0x39,0x05]
+
+v_fmac_f16_e64_dpp v5.l, -v1.l, |v2.h| mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX12: v_fmac_f16_e64_dpp v5.l, -v1.l, |v2.h| op_sel:[0,1,0,0] mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x05,0x12,0x36,0xd5,0xea,0x04,0x02,0x30,0x01,0x77,0x39,0x05]
+
+v_fmac_f16_e64_dpp v255.h, -|v255.l|, -|v255.l| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX12: v_fmac_f16_e64_dpp v255.h, -|v255.l|, -|v255.l| op_sel:[0,0,1,1] clamp div:2 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0xc3,0x36,0xd5,0xe9,0xfe,0x03,0x78,0xff,0x00,0x00,0x00]
+
v_ldexp_f16_e64_dpp v5.l, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
// GFX12: v_ldexp_f16_e64_dpp v5.l, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x3b,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
diff --git a/llvm/test/MC/Disassembler/RISCV/emit-x8-as-fp.txt b/llvm/test/MC/Disassembler/RISCV/emit-x8-as-fp.txt
new file mode 100644
index 000000000000..23d3f72c2ef5
--- /dev/null
+++ b/llvm/test/MC/Disassembler/RISCV/emit-x8-as-fp.txt
@@ -0,0 +1,22 @@
+# RUN: llvm-mc --disassemble -triple=riscv32 --show-encoding < %s \
+# RUN: | FileCheck --check-prefixes=DEFAULT %s
+# RUN: llvm-mc --disassemble -triple=riscv64 --show-encoding < %s \
+# RUN: | FileCheck --check-prefixes=DEFAULT %s
+# RUN: llvm-mc --disassemble -triple=riscv32 -M emit-x8-as-fp \
+# RUN: --show-encoding < %s | FileCheck --check-prefixes=EMIT-FP %s
+# RUN: llvm-mc --disassemble -triple=riscv64 -M emit-x8-as-fp \
+# RUN: --show-encoding < %s | FileCheck --check-prefixes=EMIT-FP %s
+# RUN: llvm-mc --disassemble -triple=riscv32 -M numeric -M emit-x8-as-fp \
+# RUN: --show-encoding < %s | FileCheck --check-prefixes=NUMERIC %s
+# RUN: llvm-mc --disassemble -triple=riscv64 -M numeric -M emit-x8-as-fp \
+# RUN: --show-encoding < %s | FileCheck --check-prefixes=NUMERIC %s
+
+# DEFAULT: sw a0, -12(s0) # encoding: [0x23,0x2a,0xa4,0xfe]
+# EMIT-FP: sw a0, -12(fp) # encoding: [0x23,0x2a,0xa4,0xfe]
+# NUMERIC: sw x10, -12(x8) # encoding: [0x23,0x2a,0xa4,0xfe]
+0x23 0x2a 0xa4 0xfe
+
+# DEFAULT: lw a0, -12(s0) # encoding: [0x03,0x25,0x44,0xff]
+# EMIT-FP: lw a0, -12(fp) # encoding: [0x03,0x25,0x44,0xff]
+# NUMERIC: lw x10, -12(x8) # encoding: [0x03,0x25,0x44,0xff]
+0x03 0x25 0x44 0xff
diff --git a/llvm/test/MC/RISCV/emit-x8-as-fp.s b/llvm/test/MC/RISCV/emit-x8-as-fp.s
new file mode 100644
index 000000000000..ea5143f5066f
--- /dev/null
+++ b/llvm/test/MC/RISCV/emit-x8-as-fp.s
@@ -0,0 +1,42 @@
+# RUN: llvm-mc --triple=riscv32 --show-encoding < %s 2>&1 \
+# RUN: | FileCheck --check-prefix=DEFAULT %s
+# RUN: llvm-mc --triple=riscv64 --show-encoding < %s 2>&1 \
+# RUN: | FileCheck --check-prefix=DEFAULT %s
+# RUN: llvm-mc --triple=riscv32 -M emit-x8-as-fp --show-encoding < %s 2>&1 \
+# RUN: | FileCheck --check-prefix=EMIT-FP %s
+# RUN: llvm-mc --triple=riscv64 -M emit-x8-as-fp --show-encoding < %s 2>&1 \
+# RUN: | FileCheck --check-prefix=EMIT-FP %s
+# RUN: llvm-mc --triple=riscv32 -M numeric -M emit-x8-as-fp --show-encoding \
+# RUN: < %s 2>&1 | FileCheck --check-prefix=NUMERIC %s
+# RUN: llvm-mc --triple=riscv64 -M numeric -M emit-x8-as-fp --show-encoding \
+# RUN: < %s 2>&1 | FileCheck --check-prefix=NUMERIC %s
+
+# DEFAULT: sw a0, -12(s0) # encoding: [0x23,0x2a,0xa4,0xfe]
+# EMIT-FP: sw a0, -12(fp) # encoding: [0x23,0x2a,0xa4,0xfe]
+# NUMERIC: sw x10, -12(x8) # encoding: [0x23,0x2a,0xa4,0xfe]
+sw a0, -12(s0)
+
+# DEFAULT: lw a0, -12(s0) # encoding: [0x03,0x25,0x44,0xff]
+# EMIT-FP: lw a0, -12(fp) # encoding: [0x03,0x25,0x44,0xff]
+# NUMERIC: lw x10, -12(x8) # encoding: [0x03,0x25,0x44,0xff]
+lw a0, -12(s0)
+
+# DEFAULT: sw a0, -12(s0) # encoding: [0x23,0x2a,0xa4,0xfe]
+# EMIT-FP: sw a0, -12(fp) # encoding: [0x23,0x2a,0xa4,0xfe]
+# NUMERIC: sw x10, -12(x8) # encoding: [0x23,0x2a,0xa4,0xfe]
+sw a0, -12(fp)
+
+# DEFAULT: lw a0, -12(s0) # encoding: [0x03,0x25,0x44,0xff]
+# EMIT-FP: lw a0, -12(fp) # encoding: [0x03,0x25,0x44,0xff]
+# NUMERIC: lw x10, -12(x8) # encoding: [0x03,0x25,0x44,0xff]
+lw a0, -12(fp)
+
+# DEFAULT: sw a0, -12(s0) # encoding: [0x23,0x2a,0xa4,0xfe]
+# EMIT-FP: sw a0, -12(fp) # encoding: [0x23,0x2a,0xa4,0xfe]
+# NUMERIC: sw x10, -12(x8) # encoding: [0x23,0x2a,0xa4,0xfe]
+sw a0, -12(x8)
+
+# DEFAULT: lw a0, -12(s0) # encoding: [0x03,0x25,0x44,0xff]
+# EMIT-FP: lw a0, -12(fp) # encoding: [0x03,0x25,0x44,0xff]
+# NUMERIC: lw x10, -12(x8) # encoding: [0x03,0x25,0x44,0xff]
+lw a0, -12(x8)
diff --git a/llvm/test/MC/RISCV/function-call-invalid.s b/llvm/test/MC/RISCV/function-call-invalid.s
index 2b7a85245880..17d02015a694 100644
--- a/llvm/test/MC/RISCV/function-call-invalid.s
+++ b/llvm/test/MC/RISCV/function-call-invalid.s
@@ -10,3 +10,5 @@ call %lo(1234) # CHECK: :[[@LINE]]:6: error: operand must be a bare symbol name
call %hi(foo) # CHECK: :[[@LINE]]:6: error: operand must be a bare symbol name
call %lo(foo) # CHECK: :[[@LINE]]:6: error: operand must be a bare symbol name
call foo, bar # CHECK: :[[@LINE]]:6: error: operand must be a bare symbol name
+call foo@pls # CHECK: :[[@LINE]]:10: error: @ (except the deprecated/ignored @plt) is disallowed
+call foo@3 # CHECK: :[[@LINE]]:10: error: @ (except the deprecated/ignored @plt) is disallowed
diff --git a/llvm/test/MC/RISCV/tail-call-invalid.s b/llvm/test/MC/RISCV/tail-call-invalid.s
index 270d84df58ac..14ff996b2e4b 100644
--- a/llvm/test/MC/RISCV/tail-call-invalid.s
+++ b/llvm/test/MC/RISCV/tail-call-invalid.s
@@ -10,3 +10,4 @@ tail %hi(1234) # CHECK: :[[@LINE]]:6: error: operand must be a bare symbol name
tail %lo(1234) # CHECK: :[[@LINE]]:6: error: operand must be a bare symbol name
tail %hi(foo) # CHECK: :[[@LINE]]:6: error: operand must be a bare symbol name
tail %lo(foo) # CHECK: :[[@LINE]]:6: error: operand must be a bare symbol name
+tail foo@pls # CHECK: :[[@LINE]]:10: error: @ (except the deprecated/ignored @plt) is disallowed
diff --git a/llvm/test/Other/new-pm-thinlto-prelink-samplepgo-inline-threshold.ll b/llvm/test/Other/new-pm-lto-prelink-samplepgo-inline-threshold.ll
index 9baedcb02ca0..67af5f18e057 100644
--- a/llvm/test/Other/new-pm-thinlto-prelink-samplepgo-inline-threshold.ll
+++ b/llvm/test/Other/new-pm-lto-prelink-samplepgo-inline-threshold.ll
@@ -1,4 +1,4 @@
-; Tests that hot callsite threshold is set to 0 artifically for thinlto-prelink pipeline.
+; Tests that hot callsite threshold is set to 0 artifically for thinlto-prelink and lto-pre-link pipeline.
;
; Function `sum` is annotated with inline cost -1 and function `sum1` is
; annotated with inline cost 0, by function attribute `function-inline-cost`.
@@ -12,6 +12,9 @@
; RUN: opt < %s -pass-remarks=inline -pass-remarks-missed=inline -passes='thinlto-pre-link<O2>' -pgo-kind=pgo-sample-use-pipeline -sample-profile-file=%S/Inputs/new-pm-thinlto-prelink-samplepgo-inline-threshold.prof -S 2>&1 | FileCheck %s -check-prefix=REMARK
+; RUN: opt < %s -pass-remarks=inline -pass-remarks-missed=inline -passes='lto-pre-link<O2>' -pgo-kind=pgo-sample-use-pipeline -sample-profile-file=%S/Inputs/new-pm-thinlto-prelink-samplepgo-inline-threshold.prof -S | FileCheck %s
+
+; RUN: opt < %s -pass-remarks=inline -pass-remarks-missed=inline -passes='lto-pre-link<O2>' -pgo-kind=pgo-sample-use-pipeline -sample-profile-file=%S/Inputs/new-pm-thinlto-prelink-samplepgo-inline-threshold.prof -S 2>&1 | FileCheck %s -check-prefix=REMARK
; Original C++ test case
;
; #include <stdio.h>
diff --git a/llvm/test/Transforms/GVN/pr65447.ll b/llvm/test/Transforms/GVN/pr65447.ll
index 1b951e907e82..1fa3811a3a81 100644
--- a/llvm/test/Transforms/GVN/pr65447.ll
+++ b/llvm/test/Transforms/GVN/pr65447.ll
@@ -2,29 +2,98 @@
; RUN: opt -S -passes=gvn < %s | FileCheck %s
; Make sure deduplicated phi nodes are removed from the VN map.
-define i64 @f() {
-; CHECK-LABEL: define i64 @f() {
+define i64 @f2(ptr %arg) {
+; CHECK-LABEL: define i64 @f2(
+; CHECK-SAME: ptr [[ARG:%.*]]) {
+; CHECK-NEXT: BB:
+; CHECK-NEXT: store i1 false, ptr [[ARG]], align 1
+; CHECK-NEXT: br label [[BB2D:%.*]]
+; CHECK: BB2a:
+; CHECK-NEXT: br label [[BB2B:%.*]]
+; CHECK: BB2b:
+; CHECK-NEXT: br label [[BB2C:%.*]]
+; CHECK: BB2c:
+; CHECK-NEXT: [[AZ2:%.*]] = phi i1 [ true, [[BB2B]] ], [ [[AZ:%.*]], [[BB2D]] ]
+; CHECK-NEXT: [[DOTPHI_TRANS_INSERT:%.*]] = sext i1 [[AZ2]] to i64
+; CHECK-NEXT: [[GEP2_PHI_TRANS_INSERT:%.*]] = getelementptr i1, ptr [[ARG]], i64 [[DOTPHI_TRANS_INSERT]]
+; CHECK-NEXT: [[L93_PRE:%.*]] = load i1, ptr [[GEP2_PHI_TRANS_INSERT]], align 1
+; CHECK-NEXT: br label [[BB2D]]
+; CHECK: BB2d:
+; CHECK-NEXT: [[AZ]] = phi i1 [ [[AZ2]], [[BB2C]] ], [ false, [[BB:%.*]] ]
+; CHECK-NEXT: [[L93:%.*]] = phi i1 [ [[L93_PRE]], [[BB2C]] ], [ false, [[BB]] ]
+; CHECK-NEXT: [[TMP0:%.*]] = sext i1 [[AZ]] to i64
+; CHECK-NEXT: [[GEP2:%.*]] = getelementptr i1, ptr [[ARG]], i64 [[TMP0]]
+; CHECK-NEXT: store i1 [[AZ]], ptr [[ARG]], align 2
+; CHECK-NEXT: br i1 [[L93]], label [[BB2C]], label [[BB1E:%.*]]
+; CHECK: BB1e:
+; CHECK-NEXT: br i1 [[AZ]], label [[BB2F:%.*]], label [[BB4:%.*]]
+; CHECK: BB2f:
+; CHECK-NEXT: store i1 true, ptr [[ARG]], align 2
+; CHECK-NEXT: br label [[BB2B]]
+; CHECK: BB4:
+; CHECK-NEXT: br label [[BB4]]
+;
+BB:
+ store i1 false, ptr %arg, align 1
+ br label %BB2d
+
+BB2a: ; No predecessors!
+ br label %BB2b
+
+BB2b: ; preds = %BB2f, %BB2a
+ br label %BB2c
+
+BB2c: ; preds = %BB2d, %BB2b
+ %0 = phi i1 [ true, %BB2b ], [ %1, %BB2d ]
+ br label %BB2d
+
+BB2d: ; preds = %BB2c, %BB
+ %1 = phi i1 [ %0, %BB2c ], [ false, %BB ]
+ %2 = sext i1 %1 to i64
+ %gep2 = getelementptr i1, ptr %arg, i64 %2
+ %L93 = load i1, ptr %gep2, align 1
+ %Az = load i1, ptr %arg, align 2
+ store i1 %1, ptr %arg, align 2
+ br i1 %L93, label %BB2c, label %BB1e
+
+BB1e: ; preds = %BB2d
+ br i1 %Az, label %BB2f, label %BB4
+
+BB2f: ; preds = %BB1e
+ store i1 true, ptr %arg, align 2
+ br label %BB2b
+
+BB4: ; preds = %BB1e, %BB4
+ br label %BB4
+
+; uselistorder directives
+ uselistorder label %BB4, { 1, 0 }
+}
+
+; Make sure deduplicated phi nodes are removed from the VN map. Make
+; sure there is no assert on attempt to use ConstantData use lists.
+define i64 @f_null() {
+; CHECK-LABEL: define i64 @f_null() {
; CHECK-NEXT: BB:
; CHECK-NEXT: store i1 false, ptr null, align 1
; CHECK-NEXT: br label [[BB2D:%.*]]
; CHECK: BB2a:
; CHECK-NEXT: br label [[BB2B:%.*]]
; CHECK: BB2b:
-; CHECK-NEXT: [[L93_PRE_PRE:%.*]] = load i1, ptr inttoptr (i64 -1 to ptr), align 1
; CHECK-NEXT: br label [[BB2C:%.*]]
; CHECK: BB2c:
-; CHECK-NEXT: [[L93_PRE:%.*]] = phi i1 [ [[L93_PRE_PRE]], [[BB2B]] ], [ true, [[BB2D]] ]
; CHECK-NEXT: [[AZ2:%.*]] = phi i1 [ true, [[BB2B]] ], [ [[AZ:%.*]], [[BB2D]] ]
; CHECK-NEXT: [[DOTPHI_TRANS_INSERT:%.*]] = sext i1 [[AZ2]] to i64
; CHECK-NEXT: [[GEP2_PHI_TRANS_INSERT:%.*]] = getelementptr i1, ptr null, i64 [[DOTPHI_TRANS_INSERT]]
+; CHECK-NEXT: [[L93_PRE:%.*]] = load i1, ptr [[GEP2_PHI_TRANS_INSERT]], align 1
; CHECK-NEXT: br label [[BB2D]]
; CHECK: BB2d:
-; CHECK-NEXT: [[L93_PRE5:%.*]] = phi i1 [ [[L93_PRE]], [[BB2C]] ], [ false, [[BB:%.*]] ]
-; CHECK-NEXT: [[AZ]] = phi i1 [ [[AZ2]], [[BB2C]] ], [ false, [[BB]] ]
+; CHECK-NEXT: [[AZ]] = phi i1 [ [[AZ2]], [[BB2C]] ], [ false, [[BB:%.*]] ]
+; CHECK-NEXT: [[L93:%.*]] = phi i1 [ [[L93_PRE]], [[BB2C]] ], [ false, [[BB]] ]
; CHECK-NEXT: [[TMP0:%.*]] = sext i1 [[AZ]] to i64
; CHECK-NEXT: [[GEP2:%.*]] = getelementptr i1, ptr null, i64 [[TMP0]]
; CHECK-NEXT: store i1 [[AZ]], ptr null, align 2
-; CHECK-NEXT: br i1 [[L93_PRE5]], label [[BB2C]], label [[BB1E:%.*]]
+; CHECK-NEXT: br i1 [[L93]], label [[BB2C]], label [[BB1E:%.*]]
; CHECK: BB1e:
; CHECK-NEXT: br i1 [[AZ]], label [[BB2F:%.*]], label [[BB4:%.*]]
; CHECK: BB2f:
diff --git a/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-abs-srshl.ll b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-abs-srshl.ll
index 7fb0fbdda0b5..f71aaa289b89 100644
--- a/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-abs-srshl.ll
+++ b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-abs-srshl.ll
@@ -42,8 +42,7 @@ define <vscale x 8 x i16> @srshl_abs_positive_merge(<vscale x 8 x i16> %a, <vsca
define <vscale x 8 x i16> @srshl_abs_all_active_pred(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, <vscale x 8 x i1> %pg2) #0 {
; CHECK-LABEL: @srshl_abs_all_active_pred(
-; CHECK-NEXT: [[PG:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
-; CHECK-NEXT: [[ABS:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.abs.nxv8i16(<vscale x 8 x i16> [[B:%.*]], <vscale x 8 x i1> [[PG]], <vscale x 8 x i16> [[A:%.*]])
+; CHECK-NEXT: [[ABS:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.abs.nxv8i16(<vscale x 8 x i16> [[B:%.*]], <vscale x 8 x i1> splat (i1 true), <vscale x 8 x i16> [[A:%.*]])
; CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 8 x i16> @llvm.aarch64.sve.lsl.nxv8i16(<vscale x 8 x i1> [[PG2:%.*]], <vscale x 8 x i16> [[ABS]], <vscale x 8 x i16> splat (i16 2))
; CHECK-NEXT: ret <vscale x 8 x i16> [[TMP1]]
;
diff --git a/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-comb-all-active-lanes-cvt.ll b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-comb-all-active-lanes-cvt.ll
index b8ea4de3d238..1c5f7464d858 100644
--- a/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-comb-all-active-lanes-cvt.ll
+++ b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-comb-all-active-lanes-cvt.ll
@@ -5,8 +5,7 @@ target triple = "aarch64-unknown-linux-gnu"
define <vscale x 8 x bfloat> @test_fcvt_bf16_f32_poison(<vscale x 8 x bfloat> %a, <vscale x 4 x float> %b) {
; CHECK-LABEL: define <vscale x 8 x bfloat> @test_fcvt_bf16_f32_poison(
; CHECK-SAME: <vscale x 8 x bfloat> [[A:%.*]], <vscale x 4 x float> [[B:%.*]]) {
-; CHECK-NEXT: [[PG:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
-; CHECK-NEXT: [[OUT:%.*]] = call <vscale x 8 x bfloat> @llvm.aarch64.sve.fcvt.bf16f32.v2(<vscale x 8 x bfloat> poison, <vscale x 4 x i1> [[PG]], <vscale x 4 x float> [[B]])
+; CHECK-NEXT: [[OUT:%.*]] = call <vscale x 8 x bfloat> @llvm.aarch64.sve.fcvt.bf16f32.v2(<vscale x 8 x bfloat> poison, <vscale x 4 x i1> splat (i1 true), <vscale x 4 x float> [[B]])
; CHECK-NEXT: ret <vscale x 8 x bfloat> [[OUT]]
;
%pg = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
@@ -17,8 +16,7 @@ define <vscale x 8 x bfloat> @test_fcvt_bf16_f32_poison(<vscale x 8 x bfloat> %a
define <vscale x 8 x bfloat> @test_fcvt_bf16_f32(<vscale x 8 x bfloat> %a, <vscale x 4 x float> %b) {
; CHECK-LABEL: define <vscale x 8 x bfloat> @test_fcvt_bf16_f32(
; CHECK-SAME: <vscale x 8 x bfloat> [[A:%.*]], <vscale x 4 x float> [[B:%.*]]) {
-; CHECK-NEXT: [[PG:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
-; CHECK-NEXT: [[OUT:%.*]] = call <vscale x 8 x bfloat> @llvm.aarch64.sve.fcvt.bf16f32.v2(<vscale x 8 x bfloat> undef, <vscale x 4 x i1> [[PG]], <vscale x 4 x float> [[B]])
+; CHECK-NEXT: [[OUT:%.*]] = call <vscale x 8 x bfloat> @llvm.aarch64.sve.fcvt.bf16f32.v2(<vscale x 8 x bfloat> undef, <vscale x 4 x i1> splat (i1 true), <vscale x 4 x float> [[B]])
; CHECK-NEXT: ret <vscale x 8 x bfloat> [[OUT]]
;
%pg = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
@@ -29,8 +27,7 @@ define <vscale x 8 x bfloat> @test_fcvt_bf16_f32(<vscale x 8 x bfloat> %a, <vsca
define <vscale x 8 x half> @test_fcvt_f16_f32(<vscale x 8 x half> %a, <vscale x 4 x float> %b) {
; CHECK-LABEL: define <vscale x 8 x half> @test_fcvt_f16_f32(
; CHECK-SAME: <vscale x 8 x half> [[A:%.*]], <vscale x 4 x float> [[B:%.*]]) {
-; CHECK-NEXT: [[PG:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
-; CHECK-NEXT: [[OUT:%.*]] = call <vscale x 8 x half> @llvm.aarch64.sve.fcvt.f16f32(<vscale x 8 x half> undef, <vscale x 4 x i1> [[PG]], <vscale x 4 x float> [[B]])
+; CHECK-NEXT: [[OUT:%.*]] = call <vscale x 8 x half> @llvm.aarch64.sve.fcvt.f16f32(<vscale x 8 x half> undef, <vscale x 4 x i1> splat (i1 true), <vscale x 4 x float> [[B]])
; CHECK-NEXT: ret <vscale x 8 x half> [[OUT]]
;
%pg = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
@@ -41,8 +38,7 @@ define <vscale x 8 x half> @test_fcvt_f16_f32(<vscale x 8 x half> %a, <vscale x
define <vscale x 8 x half> @test_fcvt_f16_f64(<vscale x 8 x half> %a, <vscale x 2 x double> %b) {
; CHECK-LABEL: define <vscale x 8 x half> @test_fcvt_f16_f64(
; CHECK-SAME: <vscale x 8 x half> [[A:%.*]], <vscale x 2 x double> [[B:%.*]]) {
-; CHECK-NEXT: [[PG:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
-; CHECK-NEXT: [[OUT:%.*]] = call <vscale x 8 x half> @llvm.aarch64.sve.fcvt.f16f64(<vscale x 8 x half> undef, <vscale x 2 x i1> [[PG]], <vscale x 2 x double> [[B]])
+; CHECK-NEXT: [[OUT:%.*]] = call <vscale x 8 x half> @llvm.aarch64.sve.fcvt.f16f64(<vscale x 8 x half> undef, <vscale x 2 x i1> splat (i1 true), <vscale x 2 x double> [[B]])
; CHECK-NEXT: ret <vscale x 8 x half> [[OUT]]
;
%pg = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
@@ -53,8 +49,7 @@ define <vscale x 8 x half> @test_fcvt_f16_f64(<vscale x 8 x half> %a, <vscale x
define <vscale x 4 x float> @test_fcvt_f32_f16(<vscale x 4 x float> %a, <vscale x 8 x half> %b) {
; CHECK-LABEL: define <vscale x 4 x float> @test_fcvt_f32_f16(
; CHECK-SAME: <vscale x 4 x float> [[A:%.*]], <vscale x 8 x half> [[B:%.*]]) {
-; CHECK-NEXT: [[PG:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
-; CHECK-NEXT: [[OUT:%.*]] = call <vscale x 4 x float> @llvm.aarch64.sve.fcvt.f32f16(<vscale x 4 x float> undef, <vscale x 4 x i1> [[PG]], <vscale x 8 x half> [[B]])
+; CHECK-NEXT: [[OUT:%.*]] = call <vscale x 4 x float> @llvm.aarch64.sve.fcvt.f32f16(<vscale x 4 x float> undef, <vscale x 4 x i1> splat (i1 true), <vscale x 8 x half> [[B]])
; CHECK-NEXT: ret <vscale x 4 x float> [[OUT]]
;
%pg = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
@@ -65,8 +60,7 @@ define <vscale x 4 x float> @test_fcvt_f32_f16(<vscale x 4 x float> %a, <vscale
define <vscale x 4 x float> @test_fcvt_f32_f64(<vscale x 4 x float> %a, <vscale x 2 x double> %b) {
; CHECK-LABEL: define <vscale x 4 x float> @test_fcvt_f32_f64(
; CHECK-SAME: <vscale x 4 x float> [[A:%.*]], <vscale x 2 x double> [[B:%.*]]) {
-; CHECK-NEXT: [[PG:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
-; CHECK-NEXT: [[OUT:%.*]] = call <vscale x 4 x float> @llvm.aarch64.sve.fcvt.f32f64(<vscale x 4 x float> undef, <vscale x 2 x i1> [[PG]], <vscale x 2 x double> [[B]])
+; CHECK-NEXT: [[OUT:%.*]] = call <vscale x 4 x float> @llvm.aarch64.sve.fcvt.f32f64(<vscale x 4 x float> undef, <vscale x 2 x i1> splat (i1 true), <vscale x 2 x double> [[B]])
; CHECK-NEXT: ret <vscale x 4 x float> [[OUT]]
;
%pg = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
@@ -77,8 +71,7 @@ define <vscale x 4 x float> @test_fcvt_f32_f64(<vscale x 4 x float> %a, <vscale
define <vscale x 2 x double> @test_fcvt_f64_f16(<vscale x 2 x double> %a, <vscale x 8 x half> %b) {
; CHECK-LABEL: define <vscale x 2 x double> @test_fcvt_f64_f16(
; CHECK-SAME: <vscale x 2 x double> [[A:%.*]], <vscale x 8 x half> [[B:%.*]]) {
-; CHECK-NEXT: [[PG:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
-; CHECK-NEXT: [[OUT:%.*]] = call <vscale x 2 x double> @llvm.aarch64.sve.fcvt.f64f16(<vscale x 2 x double> undef, <vscale x 2 x i1> [[PG]], <vscale x 8 x half> [[B]])
+; CHECK-NEXT: [[OUT:%.*]] = call <vscale x 2 x double> @llvm.aarch64.sve.fcvt.f64f16(<vscale x 2 x double> undef, <vscale x 2 x i1> splat (i1 true), <vscale x 8 x half> [[B]])
; CHECK-NEXT: ret <vscale x 2 x double> [[OUT]]
;
%pg = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
@@ -89,8 +82,7 @@ define <vscale x 2 x double> @test_fcvt_f64_f16(<vscale x 2 x double> %a, <vscal
define <vscale x 2 x double> @test_fcvt_f64_f32(<vscale x 2 x double> %a, <vscale x 4 x float> %b) {
; CHECK-LABEL: define <vscale x 2 x double> @test_fcvt_f64_f32(
; CHECK-SAME: <vscale x 2 x double> [[A:%.*]], <vscale x 4 x float> [[B:%.*]]) {
-; CHECK-NEXT: [[PG:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
-; CHECK-NEXT: [[OUT:%.*]] = call <vscale x 2 x double> @llvm.aarch64.sve.fcvt.f64f32(<vscale x 2 x double> undef, <vscale x 2 x i1> [[PG]], <vscale x 4 x float> [[B]])
+; CHECK-NEXT: [[OUT:%.*]] = call <vscale x 2 x double> @llvm.aarch64.sve.fcvt.f64f32(<vscale x 2 x double> undef, <vscale x 2 x i1> splat (i1 true), <vscale x 4 x float> [[B]])
; CHECK-NEXT: ret <vscale x 2 x double> [[OUT]]
;
%pg = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
@@ -101,8 +93,7 @@ define <vscale x 2 x double> @test_fcvt_f64_f32(<vscale x 2 x double> %a, <vscal
define <vscale x 4 x float> @test_fcvtlt_f32_f16(<vscale x 4 x float> %a, <vscale x 8 x half> %b) {
; CHECK-LABEL: define <vscale x 4 x float> @test_fcvtlt_f32_f16(
; CHECK-SAME: <vscale x 4 x float> [[A:%.*]], <vscale x 8 x half> [[B:%.*]]) {
-; CHECK-NEXT: [[PG:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
-; CHECK-NEXT: [[OUT:%.*]] = call <vscale x 4 x float> @llvm.aarch64.sve.fcvtlt.f32f16(<vscale x 4 x float> undef, <vscale x 4 x i1> [[PG]], <vscale x 8 x half> [[B]])
+; CHECK-NEXT: [[OUT:%.*]] = call <vscale x 4 x float> @llvm.aarch64.sve.fcvtlt.f32f16(<vscale x 4 x float> undef, <vscale x 4 x i1> splat (i1 true), <vscale x 8 x half> [[B]])
; CHECK-NEXT: ret <vscale x 4 x float> [[OUT]]
;
%pg = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
@@ -113,8 +104,7 @@ define <vscale x 4 x float> @test_fcvtlt_f32_f16(<vscale x 4 x float> %a, <vscal
define <vscale x 2 x double> @test_fcvtlt_f64_f32(<vscale x 2 x double> %a, <vscale x 4 x float> %b) {
; CHECK-LABEL: define <vscale x 2 x double> @test_fcvtlt_f64_f32(
; CHECK-SAME: <vscale x 2 x double> [[A:%.*]], <vscale x 4 x float> [[B:%.*]]) {
-; CHECK-NEXT: [[PG:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
-; CHECK-NEXT: [[OUT:%.*]] = call <vscale x 2 x double> @llvm.aarch64.sve.fcvtlt.f64f32(<vscale x 2 x double> undef, <vscale x 2 x i1> [[PG]], <vscale x 4 x float> [[B]])
+; CHECK-NEXT: [[OUT:%.*]] = call <vscale x 2 x double> @llvm.aarch64.sve.fcvtlt.f64f32(<vscale x 2 x double> undef, <vscale x 2 x i1> splat (i1 true), <vscale x 4 x float> [[B]])
; CHECK-NEXT: ret <vscale x 2 x double> [[OUT]]
;
%pg = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
@@ -125,8 +115,7 @@ define <vscale x 2 x double> @test_fcvtlt_f64_f32(<vscale x 2 x double> %a, <vsc
define <vscale x 8 x bfloat> @test_fcvtnt_bf16_f32(<vscale x 8 x bfloat> %a, <vscale x 4 x float> %b) {
; CHECK-LABEL: define <vscale x 8 x bfloat> @test_fcvtnt_bf16_f32(
; CHECK-SAME: <vscale x 8 x bfloat> [[A:%.*]], <vscale x 4 x float> [[B:%.*]]) {
-; CHECK-NEXT: [[PG:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
-; CHECK-NEXT: [[OUT:%.*]] = call <vscale x 8 x bfloat> @llvm.aarch64.sve.fcvtnt.bf16f32.v2(<vscale x 8 x bfloat> [[A]], <vscale x 4 x i1> [[PG]], <vscale x 4 x float> [[B]])
+; CHECK-NEXT: [[OUT:%.*]] = call <vscale x 8 x bfloat> @llvm.aarch64.sve.fcvtnt.bf16f32.v2(<vscale x 8 x bfloat> [[A]], <vscale x 4 x i1> splat (i1 true), <vscale x 4 x float> [[B]])
; CHECK-NEXT: ret <vscale x 8 x bfloat> [[OUT]]
;
%pg = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
@@ -137,8 +126,7 @@ define <vscale x 8 x bfloat> @test_fcvtnt_bf16_f32(<vscale x 8 x bfloat> %a, <vs
define <vscale x 8 x half> @test_fcvtnt_f16_f32(<vscale x 8 x half> %a, <vscale x 4 x float> %b) {
; CHECK-LABEL: define <vscale x 8 x half> @test_fcvtnt_f16_f32(
; CHECK-SAME: <vscale x 8 x half> [[A:%.*]], <vscale x 4 x float> [[B:%.*]]) {
-; CHECK-NEXT: [[PG:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
-; CHECK-NEXT: [[OUT:%.*]] = call <vscale x 8 x half> @llvm.aarch64.sve.fcvtnt.f16f32(<vscale x 8 x half> [[A]], <vscale x 4 x i1> [[PG]], <vscale x 4 x float> [[B]])
+; CHECK-NEXT: [[OUT:%.*]] = call <vscale x 8 x half> @llvm.aarch64.sve.fcvtnt.f16f32(<vscale x 8 x half> [[A]], <vscale x 4 x i1> splat (i1 true), <vscale x 4 x float> [[B]])
; CHECK-NEXT: ret <vscale x 8 x half> [[OUT]]
;
%pg = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
@@ -149,8 +137,7 @@ define <vscale x 8 x half> @test_fcvtnt_f16_f32(<vscale x 8 x half> %a, <vscale
define <vscale x 4 x float> @test_fcvtnt_f32_f64(<vscale x 4 x float> %a, <vscale x 2 x double> %b) {
; CHECK-LABEL: define <vscale x 4 x float> @test_fcvtnt_f32_f64(
; CHECK-SAME: <vscale x 4 x float> [[A:%.*]], <vscale x 2 x double> [[B:%.*]]) {
-; CHECK-NEXT: [[PG:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
-; CHECK-NEXT: [[OUT:%.*]] = call <vscale x 4 x float> @llvm.aarch64.sve.fcvtnt.f32f64(<vscale x 4 x float> [[A]], <vscale x 2 x i1> [[PG]], <vscale x 2 x double> [[B]])
+; CHECK-NEXT: [[OUT:%.*]] = call <vscale x 4 x float> @llvm.aarch64.sve.fcvtnt.f32f64(<vscale x 4 x float> [[A]], <vscale x 2 x i1> splat (i1 true), <vscale x 2 x double> [[B]])
; CHECK-NEXT: ret <vscale x 4 x float> [[OUT]]
;
%pg = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
@@ -161,8 +148,7 @@ define <vscale x 4 x float> @test_fcvtnt_f32_f64(<vscale x 4 x float> %a, <vscal
define <vscale x 4 x float> @test_fcvtx_f32_f64(<vscale x 4 x float> %a, <vscale x 2 x double> %b) {
; CHECK-LABEL: define <vscale x 4 x float> @test_fcvtx_f32_f64(
; CHECK-SAME: <vscale x 4 x float> [[A:%.*]], <vscale x 2 x double> [[B:%.*]]) {
-; CHECK-NEXT: [[PG:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
-; CHECK-NEXT: [[OUT:%.*]] = call <vscale x 4 x float> @llvm.aarch64.sve.fcvtx.f32f64(<vscale x 4 x float> undef, <vscale x 2 x i1> [[PG]], <vscale x 2 x double> [[B]])
+; CHECK-NEXT: [[OUT:%.*]] = call <vscale x 4 x float> @llvm.aarch64.sve.fcvtx.f32f64(<vscale x 4 x float> undef, <vscale x 2 x i1> splat (i1 true), <vscale x 2 x double> [[B]])
; CHECK-NEXT: ret <vscale x 4 x float> [[OUT]]
;
%pg = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
@@ -173,8 +159,7 @@ define <vscale x 4 x float> @test_fcvtx_f32_f64(<vscale x 4 x float> %a, <vscale
define <vscale x 4 x float> @test_fcvtxnt_f32_f64(<vscale x 4 x float> %a, <vscale x 2 x double> %b) {
; CHECK-LABEL: define <vscale x 4 x float> @test_fcvtxnt_f32_f64(
; CHECK-SAME: <vscale x 4 x float> [[A:%.*]], <vscale x 2 x double> [[B:%.*]]) {
-; CHECK-NEXT: [[PG:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
-; CHECK-NEXT: [[OUT:%.*]] = call <vscale x 4 x float> @llvm.aarch64.sve.fcvtxnt.f32f64(<vscale x 4 x float> [[A]], <vscale x 2 x i1> [[PG]], <vscale x 2 x double> [[B]])
+; CHECK-NEXT: [[OUT:%.*]] = call <vscale x 4 x float> @llvm.aarch64.sve.fcvtxnt.f32f64(<vscale x 4 x float> [[A]], <vscale x 2 x i1> splat (i1 true), <vscale x 2 x double> [[B]])
; CHECK-NEXT: ret <vscale x 4 x float> [[OUT]]
;
%pg = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
@@ -185,8 +170,7 @@ define <vscale x 4 x float> @test_fcvtxnt_f32_f64(<vscale x 4 x float> %a, <vsca
define <vscale x 8 x i16> @test_fcvtzs(<vscale x 8 x i16> %a, <vscale x 8 x half> %b) {
; CHECK-LABEL: define <vscale x 8 x i16> @test_fcvtzs(
; CHECK-SAME: <vscale x 8 x i16> [[A:%.*]], <vscale x 8 x half> [[B:%.*]]) {
-; CHECK-NEXT: [[PG:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
-; CHECK-NEXT: [[OUT:%.*]] = call <vscale x 8 x i16> @llvm.aarch64.sve.fcvtzs.nxv8i16.nxv8f16(<vscale x 8 x i16> undef, <vscale x 8 x i1> [[PG]], <vscale x 8 x half> [[B]])
+; CHECK-NEXT: [[OUT:%.*]] = call <vscale x 8 x i16> @llvm.aarch64.sve.fcvtzs.nxv8i16.nxv8f16(<vscale x 8 x i16> undef, <vscale x 8 x i1> splat (i1 true), <vscale x 8 x half> [[B]])
; CHECK-NEXT: ret <vscale x 8 x i16> [[OUT]]
;
%pg = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
@@ -197,8 +181,7 @@ define <vscale x 8 x i16> @test_fcvtzs(<vscale x 8 x i16> %a, <vscale x 8 x half
define <vscale x 4 x i32> @test_fcvtzs_i32_f16(<vscale x 4 x i32> %a, <vscale x 8 x half> %b) {
; CHECK-LABEL: define <vscale x 4 x i32> @test_fcvtzs_i32_f16(
; CHECK-SAME: <vscale x 4 x i32> [[A:%.*]], <vscale x 8 x half> [[B:%.*]]) {
-; CHECK-NEXT: [[PG:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
-; CHECK-NEXT: [[OUT:%.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.fcvtzs.i32f16(<vscale x 4 x i32> undef, <vscale x 4 x i1> [[PG]], <vscale x 8 x half> [[B]])
+; CHECK-NEXT: [[OUT:%.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.fcvtzs.i32f16(<vscale x 4 x i32> undef, <vscale x 4 x i1> splat (i1 true), <vscale x 8 x half> [[B]])
; CHECK-NEXT: ret <vscale x 4 x i32> [[OUT]]
;
%pg = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
@@ -209,8 +192,7 @@ define <vscale x 4 x i32> @test_fcvtzs_i32_f16(<vscale x 4 x i32> %a, <vscale x
define <vscale x 4 x i32> @test_fcvtzs_i32_f64(<vscale x 4 x i32> %a, <vscale x 2 x double> %b) {
; CHECK-LABEL: define <vscale x 4 x i32> @test_fcvtzs_i32_f64(
; CHECK-SAME: <vscale x 4 x i32> [[A:%.*]], <vscale x 2 x double> [[B:%.*]]) {
-; CHECK-NEXT: [[PG:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
-; CHECK-NEXT: [[OUT:%.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.fcvtzs.i32f64(<vscale x 4 x i32> undef, <vscale x 2 x i1> [[PG]], <vscale x 2 x double> [[B]])
+; CHECK-NEXT: [[OUT:%.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.fcvtzs.i32f64(<vscale x 4 x i32> undef, <vscale x 2 x i1> splat (i1 true), <vscale x 2 x double> [[B]])
; CHECK-NEXT: ret <vscale x 4 x i32> [[OUT]]
;
%pg = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
@@ -221,8 +203,7 @@ define <vscale x 4 x i32> @test_fcvtzs_i32_f64(<vscale x 4 x i32> %a, <vscale x
define <vscale x 2 x i64> @test_fcvtzs_i64_f16(<vscale x 2 x i64> %a, <vscale x 8 x half> %b) {
; CHECK-LABEL: define <vscale x 2 x i64> @test_fcvtzs_i64_f16(
; CHECK-SAME: <vscale x 2 x i64> [[A:%.*]], <vscale x 8 x half> [[B:%.*]]) {
-; CHECK-NEXT: [[PG:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
-; CHECK-NEXT: [[OUT:%.*]] = call <vscale x 2 x i64> @llvm.aarch64.sve.fcvtzs.i64f16(<vscale x 2 x i64> undef, <vscale x 2 x i1> [[PG]], <vscale x 8 x half> [[B]])
+; CHECK-NEXT: [[OUT:%.*]] = call <vscale x 2 x i64> @llvm.aarch64.sve.fcvtzs.i64f16(<vscale x 2 x i64> undef, <vscale x 2 x i1> splat (i1 true), <vscale x 8 x half> [[B]])
; CHECK-NEXT: ret <vscale x 2 x i64> [[OUT]]
;
%pg = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
@@ -233,8 +214,7 @@ define <vscale x 2 x i64> @test_fcvtzs_i64_f16(<vscale x 2 x i64> %a, <vscale x
define <vscale x 2 x i64> @test_fcvtzs_i64_f32(<vscale x 2 x i64> %a, <vscale x 4 x float> %b) {
; CHECK-LABEL: define <vscale x 2 x i64> @test_fcvtzs_i64_f32(
; CHECK-SAME: <vscale x 2 x i64> [[A:%.*]], <vscale x 4 x float> [[B:%.*]]) {
-; CHECK-NEXT: [[PG:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
-; CHECK-NEXT: [[OUT:%.*]] = call <vscale x 2 x i64> @llvm.aarch64.sve.fcvtzs.i64f32(<vscale x 2 x i64> undef, <vscale x 2 x i1> [[PG]], <vscale x 4 x float> [[B]])
+; CHECK-NEXT: [[OUT:%.*]] = call <vscale x 2 x i64> @llvm.aarch64.sve.fcvtzs.i64f32(<vscale x 2 x i64> undef, <vscale x 2 x i1> splat (i1 true), <vscale x 4 x float> [[B]])
; CHECK-NEXT: ret <vscale x 2 x i64> [[OUT]]
;
%pg = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
@@ -245,8 +225,7 @@ define <vscale x 2 x i64> @test_fcvtzs_i64_f32(<vscale x 2 x i64> %a, <vscale x
define <vscale x 8 x i16> @test_fcvtzu(<vscale x 8 x i16> %a, <vscale x 8 x half> %b) {
; CHECK-LABEL: define <vscale x 8 x i16> @test_fcvtzu(
; CHECK-SAME: <vscale x 8 x i16> [[A:%.*]], <vscale x 8 x half> [[B:%.*]]) {
-; CHECK-NEXT: [[PG:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
-; CHECK-NEXT: [[OUT:%.*]] = call <vscale x 8 x i16> @llvm.aarch64.sve.fcvtzu.nxv8i16.nxv8f16(<vscale x 8 x i16> undef, <vscale x 8 x i1> [[PG]], <vscale x 8 x half> [[B]])
+; CHECK-NEXT: [[OUT:%.*]] = call <vscale x 8 x i16> @llvm.aarch64.sve.fcvtzu.nxv8i16.nxv8f16(<vscale x 8 x i16> undef, <vscale x 8 x i1> splat (i1 true), <vscale x 8 x half> [[B]])
; CHECK-NEXT: ret <vscale x 8 x i16> [[OUT]]
;
%pg = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
@@ -257,8 +236,7 @@ define <vscale x 8 x i16> @test_fcvtzu(<vscale x 8 x i16> %a, <vscale x 8 x half
define <vscale x 4 x i32> @test_fcvtzu_i32_f16(<vscale x 4 x i32> %a, <vscale x 8 x half> %b) {
; CHECK-LABEL: define <vscale x 4 x i32> @test_fcvtzu_i32_f16(
; CHECK-SAME: <vscale x 4 x i32> [[A:%.*]], <vscale x 8 x half> [[B:%.*]]) {
-; CHECK-NEXT: [[PG:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
-; CHECK-NEXT: [[OUT:%.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.fcvtzu.i32f16(<vscale x 4 x i32> undef, <vscale x 4 x i1> [[PG]], <vscale x 8 x half> [[B]])
+; CHECK-NEXT: [[OUT:%.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.fcvtzu.i32f16(<vscale x 4 x i32> undef, <vscale x 4 x i1> splat (i1 true), <vscale x 8 x half> [[B]])
; CHECK-NEXT: ret <vscale x 4 x i32> [[OUT]]
;
%pg = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
@@ -269,8 +247,7 @@ define <vscale x 4 x i32> @test_fcvtzu_i32_f16(<vscale x 4 x i32> %a, <vscale x
define <vscale x 4 x i32> @test_fcvtzu_i32_f64(<vscale x 4 x i32> %a, <vscale x 2 x double> %b) {
; CHECK-LABEL: define <vscale x 4 x i32> @test_fcvtzu_i32_f64(
; CHECK-SAME: <vscale x 4 x i32> [[A:%.*]], <vscale x 2 x double> [[B:%.*]]) {
-; CHECK-NEXT: [[PG:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
-; CHECK-NEXT: [[OUT:%.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.fcvtzu.i32f64(<vscale x 4 x i32> undef, <vscale x 2 x i1> [[PG]], <vscale x 2 x double> [[B]])
+; CHECK-NEXT: [[OUT:%.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.fcvtzu.i32f64(<vscale x 4 x i32> undef, <vscale x 2 x i1> splat (i1 true), <vscale x 2 x double> [[B]])
; CHECK-NEXT: ret <vscale x 4 x i32> [[OUT]]
;
%pg = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
@@ -281,8 +258,7 @@ define <vscale x 4 x i32> @test_fcvtzu_i32_f64(<vscale x 4 x i32> %a, <vscale x
define <vscale x 2 x i64> @test_fcvtzu_i64_f16(<vscale x 2 x i64> %a, <vscale x 8 x half> %b) {
; CHECK-LABEL: define <vscale x 2 x i64> @test_fcvtzu_i64_f16(
; CHECK-SAME: <vscale x 2 x i64> [[A:%.*]], <vscale x 8 x half> [[B:%.*]]) {
-; CHECK-NEXT: [[PG:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
-; CHECK-NEXT: [[OUT:%.*]] = call <vscale x 2 x i64> @llvm.aarch64.sve.fcvtzu.i64f16(<vscale x 2 x i64> undef, <vscale x 2 x i1> [[PG]], <vscale x 8 x half> [[B]])
+; CHECK-NEXT: [[OUT:%.*]] = call <vscale x 2 x i64> @llvm.aarch64.sve.fcvtzu.i64f16(<vscale x 2 x i64> undef, <vscale x 2 x i1> splat (i1 true), <vscale x 8 x half> [[B]])
; CHECK-NEXT: ret <vscale x 2 x i64> [[OUT]]
;
%pg = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
@@ -293,8 +269,7 @@ define <vscale x 2 x i64> @test_fcvtzu_i64_f16(<vscale x 2 x i64> %a, <vscale x
define <vscale x 2 x i64> @test_fcvtzu_i64_f32(<vscale x 2 x i64> %a, <vscale x 4 x float> %b) {
; CHECK-LABEL: define <vscale x 2 x i64> @test_fcvtzu_i64_f32(
; CHECK-SAME: <vscale x 2 x i64> [[A:%.*]], <vscale x 4 x float> [[B:%.*]]) {
-; CHECK-NEXT: [[PG:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
-; CHECK-NEXT: [[OUT:%.*]] = call <vscale x 2 x i64> @llvm.aarch64.sve.fcvtzu.i64f32(<vscale x 2 x i64> undef, <vscale x 2 x i1> [[PG]], <vscale x 4 x float> [[B]])
+; CHECK-NEXT: [[OUT:%.*]] = call <vscale x 2 x i64> @llvm.aarch64.sve.fcvtzu.i64f32(<vscale x 2 x i64> undef, <vscale x 2 x i1> splat (i1 true), <vscale x 4 x float> [[B]])
; CHECK-NEXT: ret <vscale x 2 x i64> [[OUT]]
;
%pg = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
@@ -305,8 +280,7 @@ define <vscale x 2 x i64> @test_fcvtzu_i64_f32(<vscale x 2 x i64> %a, <vscale x
define <vscale x 8 x half> @test_scvtf(<vscale x 8 x half> %a, <vscale x 8 x i16> %b) {
; CHECK-LABEL: define <vscale x 8 x half> @test_scvtf(
; CHECK-SAME: <vscale x 8 x half> [[A:%.*]], <vscale x 8 x i16> [[B:%.*]]) {
-; CHECK-NEXT: [[PG:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
-; CHECK-NEXT: [[OUT:%.*]] = call <vscale x 8 x half> @llvm.aarch64.sve.scvtf.nxv8f16.nxv8i16(<vscale x 8 x half> undef, <vscale x 8 x i1> [[PG]], <vscale x 8 x i16> [[B]])
+; CHECK-NEXT: [[OUT:%.*]] = call <vscale x 8 x half> @llvm.aarch64.sve.scvtf.nxv8f16.nxv8i16(<vscale x 8 x half> undef, <vscale x 8 x i1> splat (i1 true), <vscale x 8 x i16> [[B]])
; CHECK-NEXT: ret <vscale x 8 x half> [[OUT]]
;
%pg = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
@@ -317,8 +291,7 @@ define <vscale x 8 x half> @test_scvtf(<vscale x 8 x half> %a, <vscale x 8 x i16
define <vscale x 8 x half> @test_scvtf_f16_i32(<vscale x 8 x half> %a, <vscale x 4 x i32> %b) {
; CHECK-LABEL: define <vscale x 8 x half> @test_scvtf_f16_i32(
; CHECK-SAME: <vscale x 8 x half> [[A:%.*]], <vscale x 4 x i32> [[B:%.*]]) {
-; CHECK-NEXT: [[PG:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
-; CHECK-NEXT: [[OUT:%.*]] = call <vscale x 8 x half> @llvm.aarch64.sve.scvtf.f16i32(<vscale x 8 x half> undef, <vscale x 4 x i1> [[PG]], <vscale x 4 x i32> [[B]])
+; CHECK-NEXT: [[OUT:%.*]] = call <vscale x 8 x half> @llvm.aarch64.sve.scvtf.f16i32(<vscale x 8 x half> undef, <vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> [[B]])
; CHECK-NEXT: ret <vscale x 8 x half> [[OUT]]
;
%pg = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
@@ -329,8 +302,7 @@ define <vscale x 8 x half> @test_scvtf_f16_i32(<vscale x 8 x half> %a, <vscale x
define <vscale x 8 x half> @test_scvtf_f16_i64(<vscale x 8 x half> %a,<vscale x 2 x i64> %b) {
; CHECK-LABEL: define <vscale x 8 x half> @test_scvtf_f16_i64(
; CHECK-SAME: <vscale x 8 x half> [[A:%.*]], <vscale x 2 x i64> [[B:%.*]]) {
-; CHECK-NEXT: [[PG:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
-; CHECK-NEXT: [[OUT:%.*]] = call <vscale x 8 x half> @llvm.aarch64.sve.scvtf.f16i64(<vscale x 8 x half> undef, <vscale x 2 x i1> [[PG]], <vscale x 2 x i64> [[B]])
+; CHECK-NEXT: [[OUT:%.*]] = call <vscale x 8 x half> @llvm.aarch64.sve.scvtf.f16i64(<vscale x 8 x half> undef, <vscale x 2 x i1> splat (i1 true), <vscale x 2 x i64> [[B]])
; CHECK-NEXT: ret <vscale x 8 x half> [[OUT]]
;
%pg = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
@@ -341,8 +313,7 @@ define <vscale x 8 x half> @test_scvtf_f16_i64(<vscale x 8 x half> %a,<vscale x
define <vscale x 4 x float> @test_scvtf_f32_i64(<vscale x 4 x float> %a, <vscale x 2 x i64> %b) {
; CHECK-LABEL: define <vscale x 4 x float> @test_scvtf_f32_i64(
; CHECK-SAME: <vscale x 4 x float> [[A:%.*]], <vscale x 2 x i64> [[B:%.*]]) {
-; CHECK-NEXT: [[PG:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
-; CHECK-NEXT: [[OUT:%.*]] = call <vscale x 4 x float> @llvm.aarch64.sve.scvtf.f32i64(<vscale x 4 x float> undef, <vscale x 2 x i1> [[PG]], <vscale x 2 x i64> [[B]])
+; CHECK-NEXT: [[OUT:%.*]] = call <vscale x 4 x float> @llvm.aarch64.sve.scvtf.f32i64(<vscale x 4 x float> undef, <vscale x 2 x i1> splat (i1 true), <vscale x 2 x i64> [[B]])
; CHECK-NEXT: ret <vscale x 4 x float> [[OUT]]
;
%pg = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
@@ -353,8 +324,7 @@ define <vscale x 4 x float> @test_scvtf_f32_i64(<vscale x 4 x float> %a, <vscale
define <vscale x 2 x double> @test_scvtf_f64_i32(<vscale x 2 x double> %a, <vscale x 4 x i32> %b) {
; CHECK-LABEL: define <vscale x 2 x double> @test_scvtf_f64_i32(
; CHECK-SAME: <vscale x 2 x double> [[A:%.*]], <vscale x 4 x i32> [[B:%.*]]) {
-; CHECK-NEXT: [[PG:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
-; CHECK-NEXT: [[OUT:%.*]] = call <vscale x 2 x double> @llvm.aarch64.sve.scvtf.f64i32(<vscale x 2 x double> undef, <vscale x 2 x i1> [[PG]], <vscale x 4 x i32> [[B]])
+; CHECK-NEXT: [[OUT:%.*]] = call <vscale x 2 x double> @llvm.aarch64.sve.scvtf.f64i32(<vscale x 2 x double> undef, <vscale x 2 x i1> splat (i1 true), <vscale x 4 x i32> [[B]])
; CHECK-NEXT: ret <vscale x 2 x double> [[OUT]]
;
%pg = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
@@ -365,8 +335,7 @@ define <vscale x 2 x double> @test_scvtf_f64_i32(<vscale x 2 x double> %a, <vs
define <vscale x 8 x half> @test_ucvtf(<vscale x 8 x half> %a, <vscale x 8 x i16> %b) {
; CHECK-LABEL: define <vscale x 8 x half> @test_ucvtf(
; CHECK-SAME: <vscale x 8 x half> [[A:%.*]], <vscale x 8 x i16> [[B:%.*]]) {
-; CHECK-NEXT: [[PG:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
-; CHECK-NEXT: [[OUT:%.*]] = call <vscale x 8 x half> @llvm.aarch64.sve.ucvtf.nxv8f16.nxv8i16(<vscale x 8 x half> undef, <vscale x 8 x i1> [[PG]], <vscale x 8 x i16> [[B]])
+; CHECK-NEXT: [[OUT:%.*]] = call <vscale x 8 x half> @llvm.aarch64.sve.ucvtf.nxv8f16.nxv8i16(<vscale x 8 x half> undef, <vscale x 8 x i1> splat (i1 true), <vscale x 8 x i16> [[B]])
; CHECK-NEXT: ret <vscale x 8 x half> [[OUT]]
;
%pg = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
@@ -377,8 +346,7 @@ define <vscale x 8 x half> @test_ucvtf(<vscale x 8 x half> %a, <vscale x 8 x i16
define <vscale x 8 x half> @test_ucvtf_f16_i32(<vscale x 8 x half> %a, <vscale x 4 x i32> %b) {
; CHECK-LABEL: define <vscale x 8 x half> @test_ucvtf_f16_i32(
; CHECK-SAME: <vscale x 8 x half> [[A:%.*]], <vscale x 4 x i32> [[B:%.*]]) {
-; CHECK-NEXT: [[PG:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
-; CHECK-NEXT: [[OUT:%.*]] = call <vscale x 8 x half> @llvm.aarch64.sve.ucvtf.f16i32(<vscale x 8 x half> undef, <vscale x 4 x i1> [[PG]], <vscale x 4 x i32> [[B]])
+; CHECK-NEXT: [[OUT:%.*]] = call <vscale x 8 x half> @llvm.aarch64.sve.ucvtf.f16i32(<vscale x 8 x half> undef, <vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> [[B]])
; CHECK-NEXT: ret <vscale x 8 x half> [[OUT]]
;
%pg = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
@@ -389,8 +357,7 @@ define <vscale x 8 x half> @test_ucvtf_f16_i32(<vscale x 8 x half> %a, <vscale x
define <vscale x 8 x half> @test_ucvtf_f16_i64(<vscale x 8 x half> %a,<vscale x 2 x i64> %b) {
; CHECK-LABEL: define <vscale x 8 x half> @test_ucvtf_f16_i64(
; CHECK-SAME: <vscale x 8 x half> [[A:%.*]], <vscale x 2 x i64> [[B:%.*]]) {
-; CHECK-NEXT: [[PG:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
-; CHECK-NEXT: [[OUT:%.*]] = call <vscale x 8 x half> @llvm.aarch64.sve.ucvtf.f16i64(<vscale x 8 x half> undef, <vscale x 2 x i1> [[PG]], <vscale x 2 x i64> [[B]])
+; CHECK-NEXT: [[OUT:%.*]] = call <vscale x 8 x half> @llvm.aarch64.sve.ucvtf.f16i64(<vscale x 8 x half> undef, <vscale x 2 x i1> splat (i1 true), <vscale x 2 x i64> [[B]])
; CHECK-NEXT: ret <vscale x 8 x half> [[OUT]]
;
%pg = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
@@ -401,8 +368,7 @@ define <vscale x 8 x half> @test_ucvtf_f16_i64(<vscale x 8 x half> %a,<vscale x
define <vscale x 4 x float> @test_ucvtf_f32_i64(<vscale x 4 x float> %a, <vscale x 2 x i64> %b) {
; CHECK-LABEL: define <vscale x 4 x float> @test_ucvtf_f32_i64(
; CHECK-SAME: <vscale x 4 x float> [[A:%.*]], <vscale x 2 x i64> [[B:%.*]]) {
-; CHECK-NEXT: [[PG:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
-; CHECK-NEXT: [[OUT:%.*]] = call <vscale x 4 x float> @llvm.aarch64.sve.ucvtf.f32i64(<vscale x 4 x float> undef, <vscale x 2 x i1> [[PG]], <vscale x 2 x i64> [[B]])
+; CHECK-NEXT: [[OUT:%.*]] = call <vscale x 4 x float> @llvm.aarch64.sve.ucvtf.f32i64(<vscale x 4 x float> undef, <vscale x 2 x i1> splat (i1 true), <vscale x 2 x i64> [[B]])
; CHECK-NEXT: ret <vscale x 4 x float> [[OUT]]
;
%pg = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
@@ -413,8 +379,7 @@ define <vscale x 4 x float> @test_ucvtf_f32_i64(<vscale x 4 x float> %a, <vscale
define <vscale x 2 x double> @test_ucvtf_f64_i32(<vscale x 2 x double> %a, <vscale x 4 x i32> %b) {
; CHECK-LABEL: define <vscale x 2 x double> @test_ucvtf_f64_i32(
; CHECK-SAME: <vscale x 2 x double> [[A:%.*]], <vscale x 4 x i32> [[B:%.*]]) {
-; CHECK-NEXT: [[PG:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
-; CHECK-NEXT: [[OUT:%.*]] = call <vscale x 2 x double> @llvm.aarch64.sve.ucvtf.f64i32(<vscale x 2 x double> undef, <vscale x 2 x i1> [[PG]], <vscale x 4 x i32> [[B]])
+; CHECK-NEXT: [[OUT:%.*]] = call <vscale x 2 x double> @llvm.aarch64.sve.ucvtf.f64i32(<vscale x 2 x double> undef, <vscale x 2 x i1> splat (i1 true), <vscale x 4 x i32> [[B]])
; CHECK-NEXT: ret <vscale x 2 x double> [[OUT]]
;
%pg = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
diff --git a/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-loadstore.ll b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-loadstore.ll
index c67662f87250..d8d674029853 100644
--- a/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-loadstore.ll
+++ b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-loadstore.ll
@@ -5,7 +5,7 @@ target triple = "aarch64-unknown-linux-gnu"
define <vscale x 4 x i32> @combine_ld1(ptr %ptr) #0 {
; CHECK-LABEL: @combine_ld1(
-; CHECK-NEXT: [[TMP2:%.*]] = load <vscale x 4 x i32>, ptr [[PTR:%.*]], align 16, !annotation !0
+; CHECK-NEXT: [[TMP2:%.*]] = load <vscale x 4 x i32>, ptr [[PTR:%.*]], align 16, !annotation [[META0:![0-9]+]]
; CHECK-NEXT: ret <vscale x 4 x i32> [[TMP2]]
;
%1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
@@ -15,7 +15,7 @@ define <vscale x 4 x i32> @combine_ld1(ptr %ptr) #0 {
define <vscale x 4 x i32> @combine_ld1_casted_predicate(ptr %ptr) #0 {
; CHECK-LABEL: @combine_ld1_casted_predicate(
-; CHECK-NEXT: [[TMP2:%.*]] = load <vscale x 4 x i32>, ptr [[PTR:%.*]], align 16, !annotation !0
+; CHECK-NEXT: [[TMP2:%.*]] = load <vscale x 4 x i32>, ptr [[PTR:%.*]], align 16, !annotation [[META0]]
; CHECK-NEXT: ret <vscale x 4 x i32> [[TMP2]]
;
%1 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
@@ -28,7 +28,7 @@ define <vscale x 4 x i32> @combine_ld1_casted_predicate(ptr %ptr) #0 {
define <vscale x 4 x i32> @combine_ld1_masked(ptr %ptr) #0 {
; CHECK-LABEL: @combine_ld1_masked(
; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 16)
-; CHECK-NEXT: [[TMP3:%.*]] = call <vscale x 4 x i32> @llvm.masked.load.nxv4i32.p0(ptr [[PTR:%.*]], i32 1, <vscale x 4 x i1> [[TMP1]], <vscale x 4 x i32> zeroinitializer), !annotation !0
+; CHECK-NEXT: [[TMP3:%.*]] = call <vscale x 4 x i32> @llvm.masked.load.nxv4i32.p0(ptr [[PTR:%.*]], i32 1, <vscale x 4 x i1> [[TMP1]], <vscale x 4 x i32> zeroinitializer), !annotation [[META0]]
; CHECK-NEXT: ret <vscale x 4 x i32> [[TMP3]]
;
%1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 16)
@@ -38,10 +38,9 @@ define <vscale x 4 x i32> @combine_ld1_masked(ptr %ptr) #0 {
define <vscale x 8 x i16> @combine_ld1_masked_casted_predicate(ptr %ptr) #0 {
; CHECK-LABEL: @combine_ld1_masked_casted_predicate(
-; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
-; CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> [[TMP1]])
+; CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> splat (i1 true))
; CHECK-NEXT: [[TMP3:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[TMP2]])
-; CHECK-NEXT: [[TMP5:%.*]] = call <vscale x 8 x i16> @llvm.masked.load.nxv8i16.p0(ptr [[PTR:%.*]], i32 1, <vscale x 8 x i1> [[TMP3]], <vscale x 8 x i16> zeroinitializer), !annotation !0
+; CHECK-NEXT: [[TMP5:%.*]] = call <vscale x 8 x i16> @llvm.masked.load.nxv8i16.p0(ptr [[PTR:%.*]], i32 1, <vscale x 8 x i1> [[TMP3]], <vscale x 8 x i16> zeroinitializer), !annotation [[META0]]
; CHECK-NEXT: ret <vscale x 8 x i16> [[TMP5]]
;
%1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
@@ -53,7 +52,7 @@ define <vscale x 8 x i16> @combine_ld1_masked_casted_predicate(ptr %ptr) #0 {
define void @combine_st1(<vscale x 4 x i32> %vec, ptr %ptr) #0 {
; CHECK-LABEL: @combine_st1(
-; CHECK-NEXT: store <vscale x 4 x i32> [[VEC:%.*]], ptr [[PTR:%.*]], align 16, !annotation !0
+; CHECK-NEXT: store <vscale x 4 x i32> [[VEC:%.*]], ptr [[PTR:%.*]], align 16, !annotation [[META0]]
; CHECK-NEXT: ret void
;
%1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
@@ -63,7 +62,7 @@ define void @combine_st1(<vscale x 4 x i32> %vec, ptr %ptr) #0 {
define void @combine_st1_casted_predicate(<vscale x 4 x i32> %vec, ptr %ptr) #0 {
; CHECK-LABEL: @combine_st1_casted_predicate(
-; CHECK-NEXT: store <vscale x 4 x i32> [[VEC:%.*]], ptr [[PTR:%.*]], align 16, !annotation !0
+; CHECK-NEXT: store <vscale x 4 x i32> [[VEC:%.*]], ptr [[PTR:%.*]], align 16, !annotation [[META0]]
; CHECK-NEXT: ret void
;
%1 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
@@ -76,7 +75,7 @@ define void @combine_st1_casted_predicate(<vscale x 4 x i32> %vec, ptr %ptr) #0
define void @combine_st1_masked(<vscale x 4 x i32> %vec, ptr %ptr) #0 {
; CHECK-LABEL: @combine_st1_masked(
; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 16)
-; CHECK-NEXT: call void @llvm.masked.store.nxv4i32.p0(<vscale x 4 x i32> [[VEC:%.*]], ptr [[PTR:%.*]], i32 1, <vscale x 4 x i1> [[TMP1]]), !annotation !0
+; CHECK-NEXT: call void @llvm.masked.store.nxv4i32.p0(<vscale x 4 x i32> [[VEC:%.*]], ptr [[PTR:%.*]], i32 1, <vscale x 4 x i1> [[TMP1]]), !annotation [[META0]]
; CHECK-NEXT: ret void
;
%1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 16)
@@ -86,10 +85,9 @@ define void @combine_st1_masked(<vscale x 4 x i32> %vec, ptr %ptr) #0 {
define void @combine_st1_masked_casted_predicate(<vscale x 8 x i16> %vec, ptr %ptr) #0 {
; CHECK-LABEL: @combine_st1_masked_casted_predicate(
-; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
-; CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> [[TMP1]])
+; CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> splat (i1 true))
; CHECK-NEXT: [[TMP3:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[TMP2]])
-; CHECK-NEXT: call void @llvm.masked.store.nxv8i16.p0(<vscale x 8 x i16> [[VEC:%.*]], ptr [[PTR:%.*]], i32 1, <vscale x 8 x i1> [[TMP3]]), !annotation !0
+; CHECK-NEXT: call void @llvm.masked.store.nxv8i16.p0(<vscale x 8 x i16> [[VEC:%.*]], ptr [[PTR:%.*]], i32 1, <vscale x 8 x i1> [[TMP3]]), !annotation [[META0]]
; CHECK-NEXT: ret void
;
%1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
diff --git a/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-opts-cmpne.ll b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-opts-cmpne.ll
index 6a2c0f8689ca..93de2e731606 100644
--- a/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-opts-cmpne.ll
+++ b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-opts-cmpne.ll
@@ -22,8 +22,7 @@ define <vscale x 16 x i1> @dupq_b_0() #0 {
define <vscale x 16 x i1> @dupq_b_d() #0 {
; CHECK-LABEL: define <vscale x 16 x i1> @dupq_b_d(
; CHECK-SAME: ) #[[ATTR0]] {
-; CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
-; CHECK-NEXT: [[TMP2:%.*]] = call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> [[TMP1]])
+; CHECK-NEXT: [[TMP2:%.*]] = call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> splat (i1 true))
; CHECK-NEXT: ret <vscale x 16 x i1> [[TMP2]]
;
%1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
@@ -38,8 +37,7 @@ define <vscale x 16 x i1> @dupq_b_d() #0 {
define <vscale x 16 x i1> @dupq_b_w() #0 {
; CHECK-LABEL: define <vscale x 16 x i1> @dupq_b_w(
; CHECK-SAME: ) #[[ATTR0]] {
-; CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
-; CHECK-NEXT: [[TMP2:%.*]] = call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> [[TMP1]])
+; CHECK-NEXT: [[TMP2:%.*]] = call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> splat (i1 true))
; CHECK-NEXT: ret <vscale x 16 x i1> [[TMP2]]
;
%1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
@@ -54,8 +52,7 @@ define <vscale x 16 x i1> @dupq_b_w() #0 {
define <vscale x 16 x i1> @dupq_b_h() #0 {
; CHECK-LABEL: define <vscale x 16 x i1> @dupq_b_h(
; CHECK-SAME: ) #[[ATTR0]] {
-; CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
-; CHECK-NEXT: [[TMP2:%.*]] = call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> [[TMP1]])
+; CHECK-NEXT: [[TMP2:%.*]] = call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> splat (i1 true))
; CHECK-NEXT: ret <vscale x 16 x i1> [[TMP2]]
;
%1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
@@ -70,8 +67,7 @@ define <vscale x 16 x i1> @dupq_b_h() #0 {
define <vscale x 16 x i1> @dupq_b_b() #0 {
; CHECK-LABEL: define <vscale x 16 x i1> @dupq_b_b(
; CHECK-SAME: ) #[[ATTR0]] {
-; CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
-; CHECK-NEXT: ret <vscale x 16 x i1> [[TMP1]]
+; CHECK-NEXT: ret <vscale x 16 x i1> splat (i1 true)
;
%1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
%2 = tail call <vscale x 16 x i8> @llvm.vector.insert.nxv16i8.v16i8(<vscale x 16 x i8> poison,
@@ -101,9 +97,8 @@ define <vscale x 8 x i1> @dupq_h_0() #0 {
define <vscale x 8 x i1> @dupq_h_d() #0 {
; CHECK-LABEL: define <vscale x 8 x i1> @dupq_h_d(
; CHECK-SAME: ) #[[ATTR0]] {
-; CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
-; CHECK-NEXT: [[TMP2:%.*]] = call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> [[TMP1]])
-; CHECK-NEXT: [[TMP3:%.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[TMP2]])
+; CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> splat (i1 true))
+; CHECK-NEXT: [[TMP3:%.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[TMP1]])
; CHECK-NEXT: ret <vscale x 8 x i1> [[TMP3]]
;
%1 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
@@ -118,9 +113,8 @@ define <vscale x 8 x i1> @dupq_h_d() #0 {
define <vscale x 8 x i1> @dupq_h_w() #0 {
; CHECK-LABEL: define <vscale x 8 x i1> @dupq_h_w(
; CHECK-SAME: ) #[[ATTR0]] {
-; CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
-; CHECK-NEXT: [[TMP2:%.*]] = call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> [[TMP1]])
-; CHECK-NEXT: [[TMP3:%.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[TMP2]])
+; CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> splat (i1 true))
+; CHECK-NEXT: [[TMP3:%.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[TMP1]])
; CHECK-NEXT: ret <vscale x 8 x i1> [[TMP3]]
;
%1 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
@@ -135,8 +129,7 @@ define <vscale x 8 x i1> @dupq_h_w() #0 {
define <vscale x 8 x i1> @dupq_h_h() #0 {
; CHECK-LABEL: define <vscale x 8 x i1> @dupq_h_h(
; CHECK-SAME: ) #[[ATTR0]] {
-; CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
-; CHECK-NEXT: ret <vscale x 8 x i1> [[TMP1]]
+; CHECK-NEXT: ret <vscale x 8 x i1> splat (i1 true)
;
%1 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
%2 = tail call <vscale x 8 x i16> @llvm.vector.insert.nxv8i16.v8i16(<vscale x 8 x i16> poison,
@@ -166,9 +159,8 @@ define <vscale x 4 x i1> @dupq_w_0() #0 {
define <vscale x 4 x i1> @dupq_w_d() #0 {
; CHECK-LABEL: define <vscale x 4 x i1> @dupq_w_d(
; CHECK-SAME: ) #[[ATTR0]] {
-; CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
-; CHECK-NEXT: [[TMP2:%.*]] = call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> [[TMP1]])
-; CHECK-NEXT: [[TMP3:%.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[TMP2]])
+; CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> splat (i1 true))
+; CHECK-NEXT: [[TMP3:%.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[TMP1]])
; CHECK-NEXT: ret <vscale x 4 x i1> [[TMP3]]
;
%1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
@@ -183,8 +175,7 @@ define <vscale x 4 x i1> @dupq_w_d() #0 {
define <vscale x 4 x i1> @dupq_w_w() #0 {
; CHECK-LABEL: define <vscale x 4 x i1> @dupq_w_w(
; CHECK-SAME: ) #[[ATTR0]] {
-; CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
-; CHECK-NEXT: ret <vscale x 4 x i1> [[TMP1]]
+; CHECK-NEXT: ret <vscale x 4 x i1> splat (i1 true)
;
%1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
%2 = tail call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v4i32(<vscale x 4 x i32> poison,
@@ -214,8 +205,7 @@ define <vscale x 2 x i1> @dupq_d_0() #0 {
define <vscale x 2 x i1> @dupq_d_d() #0 {
; CHECK-LABEL: define <vscale x 2 x i1> @dupq_d_d(
; CHECK-SAME: ) #[[ATTR0]] {
-; CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
-; CHECK-NEXT: ret <vscale x 2 x i1> [[TMP1]]
+; CHECK-NEXT: ret <vscale x 2 x i1> splat (i1 true)
;
%1 = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
%2 = tail call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v2i64(<vscale x 2 x i64> poison,
@@ -231,10 +221,9 @@ define <vscale x 2 x i1> @dupq_d_d() #0 {
define <vscale x 2 x i1> @dupq_neg1() #0 {
; CHECK-LABEL: define <vscale x 2 x i1> @dupq_neg1(
; CHECK-SAME: ) #[[ATTR0]] {
-; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
; CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v2i64(<vscale x 2 x i64> poison, <2 x i64> <i64 1, i64 0>, i64 0)
; CHECK-NEXT: [[TMP3:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.dupq.lane.nxv2i64(<vscale x 2 x i64> [[TMP2]], i64 0)
-; CHECK-NEXT: [[TMP4:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.cmpne.nxv2i64(<vscale x 2 x i1> [[TMP1]], <vscale x 2 x i64> [[TMP3]], <vscale x 2 x i64> zeroinitializer)
+; CHECK-NEXT: [[TMP4:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.cmpne.nxv2i64(<vscale x 2 x i1> splat (i1 true), <vscale x 2 x i64> [[TMP3]], <vscale x 2 x i64> zeroinitializer)
; CHECK-NEXT: ret <vscale x 2 x i1> [[TMP4]]
;
%1 = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
@@ -249,10 +238,9 @@ define <vscale x 2 x i1> @dupq_neg1() #0 {
define <vscale x 4 x i1> @dupq_neg2() #0 {
; CHECK-LABEL: define <vscale x 4 x i1> @dupq_neg2(
; CHECK-SAME: ) #[[ATTR0]] {
-; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
; CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v4i32(<vscale x 4 x i32> poison, <4 x i32> <i32 1, i32 0, i32 0, i32 1>, i64 0)
; CHECK-NEXT: [[TMP3:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.dupq.lane.nxv4i32(<vscale x 4 x i32> [[TMP2]], i64 0)
-; CHECK-NEXT: [[TMP4:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.cmpne.wide.nxv4i32(<vscale x 4 x i1> [[TMP1]], <vscale x 4 x i32> [[TMP3]], <vscale x 2 x i64> zeroinitializer)
+; CHECK-NEXT: [[TMP4:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.cmpne.wide.nxv4i32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> [[TMP3]], <vscale x 2 x i64> zeroinitializer)
; CHECK-NEXT: ret <vscale x 4 x i1> [[TMP4]]
;
%1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
@@ -267,10 +255,9 @@ define <vscale x 4 x i1> @dupq_neg2() #0 {
define <vscale x 4 x i1> @dupq_neg3() #0 {
; CHECK-LABEL: define <vscale x 4 x i1> @dupq_neg3(
; CHECK-SAME: ) #[[ATTR0]] {
-; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
; CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v4i32(<vscale x 4 x i32> poison, <4 x i32> <i32 0, i32 1, i32 0, i32 1>, i64 0)
; CHECK-NEXT: [[TMP3:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.dupq.lane.nxv4i32(<vscale x 4 x i32> [[TMP2]], i64 0)
-; CHECK-NEXT: [[TMP4:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.cmpne.wide.nxv4i32(<vscale x 4 x i1> [[TMP1]], <vscale x 4 x i32> [[TMP3]], <vscale x 2 x i64> zeroinitializer)
+; CHECK-NEXT: [[TMP4:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.cmpne.wide.nxv4i32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> [[TMP3]], <vscale x 2 x i64> zeroinitializer)
; CHECK-NEXT: ret <vscale x 4 x i1> [[TMP4]]
;
%1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
@@ -285,10 +272,9 @@ define <vscale x 4 x i1> @dupq_neg3() #0 {
define <vscale x 4 x i1> @dupq_neg4() #0 {
; CHECK-LABEL: define <vscale x 4 x i1> @dupq_neg4(
; CHECK-SAME: ) #[[ATTR0]] {
-; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
; CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v4i32(<vscale x 4 x i32> poison, <4 x i32> <i32 1, i32 1, i32 0, i32 0>, i64 0)
; CHECK-NEXT: [[TMP3:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.dupq.lane.nxv4i32(<vscale x 4 x i32> [[TMP2]], i64 0)
-; CHECK-NEXT: [[TMP4:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.cmpne.wide.nxv4i32(<vscale x 4 x i1> [[TMP1]], <vscale x 4 x i32> [[TMP3]], <vscale x 2 x i64> zeroinitializer)
+; CHECK-NEXT: [[TMP4:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.cmpne.wide.nxv4i32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> [[TMP3]], <vscale x 2 x i64> zeroinitializer)
; CHECK-NEXT: ret <vscale x 4 x i1> [[TMP4]]
;
%1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
@@ -303,10 +289,9 @@ define <vscale x 4 x i1> @dupq_neg4() #0 {
define <vscale x 4 x i1> @dupq_neg5() #0 {
; CHECK-LABEL: define <vscale x 4 x i1> @dupq_neg5(
; CHECK-SAME: ) #[[ATTR0]] {
-; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
; CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v4i32(<vscale x 4 x i32> poison, <4 x i32> <i32 0, i32 0, i32 0, i32 1>, i64 0)
; CHECK-NEXT: [[TMP3:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.dupq.lane.nxv4i32(<vscale x 4 x i32> [[TMP2]], i64 0)
-; CHECK-NEXT: [[TMP4:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.cmpne.wide.nxv4i32(<vscale x 4 x i1> [[TMP1]], <vscale x 4 x i32> [[TMP3]], <vscale x 2 x i64> zeroinitializer)
+; CHECK-NEXT: [[TMP4:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.cmpne.wide.nxv4i32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> [[TMP3]], <vscale x 2 x i64> zeroinitializer)
; CHECK-NEXT: ret <vscale x 4 x i1> [[TMP4]]
;
%1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
@@ -321,12 +306,11 @@ define <vscale x 4 x i1> @dupq_neg5() #0 {
define <vscale x 4 x i1> @dupq_neg6(i1 %a) #0 {
; CHECK-LABEL: define <vscale x 4 x i1> @dupq_neg6(
; CHECK-SAME: i1 [[A:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
; CHECK-NEXT: [[TMP2:%.*]] = zext i1 [[A]] to i32
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i32> <i32 1, i32 1, i32 1, i32 poison>, i32 [[TMP2]], i64 3
; CHECK-NEXT: [[TMP4:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v4i32(<vscale x 4 x i32> poison, <4 x i32> [[TMP3]], i64 0)
; CHECK-NEXT: [[TMP5:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.dupq.lane.nxv4i32(<vscale x 4 x i32> [[TMP4]], i64 0)
-; CHECK-NEXT: [[TMP6:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.cmpne.wide.nxv4i32(<vscale x 4 x i1> [[TMP1]], <vscale x 4 x i32> [[TMP5]], <vscale x 2 x i64> zeroinitializer)
+; CHECK-NEXT: [[TMP6:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.cmpne.wide.nxv4i32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> [[TMP5]], <vscale x 2 x i64> zeroinitializer)
; CHECK-NEXT: ret <vscale x 4 x i1> [[TMP6]]
;
%1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
@@ -342,10 +326,9 @@ define <vscale x 4 x i1> @dupq_neg6(i1 %a) #0 {
define <vscale x 2 x i1> @dupq_neg7() #0 {
; CHECK-LABEL: define <vscale x 2 x i1> @dupq_neg7(
; CHECK-SAME: ) #[[ATTR0]] {
-; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
; CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v2i64(<vscale x 2 x i64> poison, <2 x i64> splat (i64 1), i64 2)
; CHECK-NEXT: [[TMP3:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.dupq.lane.nxv2i64(<vscale x 2 x i64> [[TMP2]], i64 0)
-; CHECK-NEXT: [[TMP4:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.cmpne.nxv2i64(<vscale x 2 x i1> [[TMP1]], <vscale x 2 x i64> [[TMP3]], <vscale x 2 x i64> zeroinitializer)
+; CHECK-NEXT: [[TMP4:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.cmpne.nxv2i64(<vscale x 2 x i1> splat (i1 true), <vscale x 2 x i64> [[TMP3]], <vscale x 2 x i64> zeroinitializer)
; CHECK-NEXT: ret <vscale x 2 x i1> [[TMP4]]
;
%1 = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
@@ -360,10 +343,9 @@ define <vscale x 2 x i1> @dupq_neg7() #0 {
define <vscale x 2 x i1> @dupq_neg8() #0 {
; CHECK-LABEL: define <vscale x 2 x i1> @dupq_neg8(
; CHECK-SAME: ) #[[ATTR0]] {
-; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
; CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v2i64(<vscale x 2 x i64> poison, <2 x i64> splat (i64 1), i64 0)
; CHECK-NEXT: [[TMP3:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.dupq.lane.nxv2i64(<vscale x 2 x i64> [[TMP2]], i64 1)
-; CHECK-NEXT: [[TMP4:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.cmpne.nxv2i64(<vscale x 2 x i1> [[TMP1]], <vscale x 2 x i64> [[TMP3]], <vscale x 2 x i64> zeroinitializer)
+; CHECK-NEXT: [[TMP4:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.cmpne.nxv2i64(<vscale x 2 x i1> splat (i1 true), <vscale x 2 x i64> [[TMP3]], <vscale x 2 x i64> zeroinitializer)
; CHECK-NEXT: ret <vscale x 2 x i1> [[TMP4]]
;
%1 = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
@@ -378,10 +360,9 @@ define <vscale x 2 x i1> @dupq_neg8() #0 {
define <vscale x 2 x i1> @dupq_neg9(<vscale x 2 x i64> %x) #0 {
; CHECK-LABEL: define <vscale x 2 x i1> @dupq_neg9(
; CHECK-SAME: <vscale x 2 x i64> [[X:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
; CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v2i64(<vscale x 2 x i64> [[X]], <2 x i64> splat (i64 1), i64 0)
; CHECK-NEXT: [[TMP3:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.dupq.lane.nxv2i64(<vscale x 2 x i64> [[TMP2]], i64 0)
-; CHECK-NEXT: [[TMP4:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.cmpne.nxv2i64(<vscale x 2 x i1> [[TMP1]], <vscale x 2 x i64> [[TMP3]], <vscale x 2 x i64> zeroinitializer)
+; CHECK-NEXT: [[TMP4:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.cmpne.nxv2i64(<vscale x 2 x i1> splat (i1 true), <vscale x 2 x i64> [[TMP3]], <vscale x 2 x i64> zeroinitializer)
; CHECK-NEXT: ret <vscale x 2 x i1> [[TMP4]]
;
%1 = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
@@ -396,10 +377,9 @@ define <vscale x 2 x i1> @dupq_neg9(<vscale x 2 x i64> %x) #0 {
define <vscale x 2 x i1> @dupq_neg10() #0 {
; CHECK-LABEL: define <vscale x 2 x i1> @dupq_neg10(
; CHECK-SAME: ) #[[ATTR0]] {
-; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
; CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v2i64(<vscale x 2 x i64> poison, <2 x i64> splat (i64 1), i64 0)
; CHECK-NEXT: [[TMP3:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.dupq.lane.nxv2i64(<vscale x 2 x i64> [[TMP2]], i64 0)
-; CHECK-NEXT: [[TMP4:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.cmpne.nxv2i64(<vscale x 2 x i1> [[TMP1]], <vscale x 2 x i64> [[TMP3]], <vscale x 2 x i64> splat (i64 1))
+; CHECK-NEXT: [[TMP4:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.cmpne.nxv2i64(<vscale x 2 x i1> splat (i1 true), <vscale x 2 x i64> [[TMP3]], <vscale x 2 x i64> splat (i64 1))
; CHECK-NEXT: ret <vscale x 2 x i1> [[TMP4]]
;
%1 = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
@@ -448,10 +428,9 @@ define <vscale x 2 x i1> @dupq_neg12() #0 {
define <vscale x 2 x i1> @dupq_neg13(<vscale x 2 x i64> %x) #0 {
; CHECK-LABEL: define <vscale x 2 x i1> @dupq_neg13(
; CHECK-SAME: <vscale x 2 x i64> [[X:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
; CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v2i64(<vscale x 2 x i64> poison, <2 x i64> splat (i64 1), i64 0)
; CHECK-NEXT: [[TMP3:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.dupq.lane.nxv2i64(<vscale x 2 x i64> [[TMP2]], i64 0)
-; CHECK-NEXT: [[TMP4:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.cmpne.nxv2i64(<vscale x 2 x i1> [[TMP1]], <vscale x 2 x i64> [[TMP3]], <vscale x 2 x i64> [[X]])
+; CHECK-NEXT: [[TMP4:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.cmpne.nxv2i64(<vscale x 2 x i1> splat (i1 true), <vscale x 2 x i64> [[TMP3]], <vscale x 2 x i64> [[X]])
; CHECK-NEXT: ret <vscale x 2 x i1> [[TMP4]]
;
%1 = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
@@ -465,10 +444,9 @@ define <vscale x 2 x i1> @dupq_neg13(<vscale x 2 x i64> %x) #0 {
define <vscale x 16 x i1> @dupq_b_idx(i64 %idx) #0 {
; CHECK-LABEL: define <vscale x 16 x i1> @dupq_b_idx(
; CHECK-SAME: i64 [[IDX:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
; CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 16 x i8> @llvm.vector.insert.nxv16i8.v16i8(<vscale x 16 x i8> poison, <16 x i8> zeroinitializer, i64 0)
; CHECK-NEXT: [[TMP3:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.dupq.lane.nxv16i8(<vscale x 16 x i8> [[TMP2]], i64 [[IDX]])
-; CHECK-NEXT: [[TMP4:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.cmpne.wide.nxv16i8(<vscale x 16 x i1> [[TMP1]], <vscale x 16 x i8> [[TMP3]], <vscale x 2 x i64> zeroinitializer)
+; CHECK-NEXT: [[TMP4:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.cmpne.wide.nxv16i8(<vscale x 16 x i1> splat (i1 true), <vscale x 16 x i8> [[TMP3]], <vscale x 2 x i64> zeroinitializer)
; CHECK-NEXT: ret <vscale x 16 x i1> [[TMP4]]
;
%1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
diff --git a/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-strictfp.ll b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-strictfp.ll
index f6f60d6d64e7..690bcd3dc033 100644
--- a/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-strictfp.ll
+++ b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-strictfp.ll
@@ -8,8 +8,7 @@ target triple = "aarch64-unknown-linux-gnu"
define <vscale x 2 x double> @replace_fadd_intrinsic_double_strictfp(<vscale x 2 x double> %a, <vscale x 2 x double> %b) #0 {
; CHECK: Function Attrs: strictfp
; CHECK-LABEL: @replace_fadd_intrinsic_double_strictfp(
-; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) #[[ATTR2:[0-9]+]]
-; CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.fadd.u.nxv2f64(<vscale x 2 x i1> [[TMP1]], <vscale x 2 x double> [[A:%.*]], <vscale x 2 x double> [[B:%.*]]) #[[ATTR2]]
+; CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.fadd.u.nxv2f64(<vscale x 2 x i1> splat (i1 true), <vscale x 2 x double> [[A:%.*]], <vscale x 2 x double> [[B:%.*]]) #[[ATTR2:[0-9]+]]
; CHECK-NEXT: ret <vscale x 2 x double> [[TMP2]]
;
%1 = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) #1
@@ -22,8 +21,7 @@ define <vscale x 2 x double> @replace_fadd_intrinsic_double_strictfp(<vscale x 2
define <vscale x 2 x double> @call_replace_fadd_intrinsic_double_strictfp(<vscale x 2 x double> %a, <vscale x 2 x double> %b) #0 {
; CHECK: Function Attrs: strictfp
; CHECK-LABEL: @call_replace_fadd_intrinsic_double_strictfp(
-; CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) #[[ATTR2]]
-; CHECK-NEXT: [[TMP2:%.*]] = call <vscale x 2 x double> @llvm.aarch64.sve.fadd.u.nxv2f64(<vscale x 2 x i1> [[TMP1]], <vscale x 2 x double> [[A:%.*]], <vscale x 2 x double> [[B:%.*]]) #[[ATTR2]]
+; CHECK-NEXT: [[TMP2:%.*]] = call <vscale x 2 x double> @llvm.aarch64.sve.fadd.u.nxv2f64(<vscale x 2 x i1> splat (i1 true), <vscale x 2 x double> [[A:%.*]], <vscale x 2 x double> [[B:%.*]]) #[[ATTR2]]
; CHECK-NEXT: ret <vscale x 2 x double> [[TMP2]]
;
%1 = call <vscale x 2 x double> @replace_fadd_intrinsic_double_strictfp(<vscale x 2 x double> %a, <vscale x 2 x double> %b) #1
@@ -35,8 +33,7 @@ define <vscale x 2 x double> @call_replace_fadd_intrinsic_double_strictfp(<vscal
define <vscale x 2 x double> @replace_fmul_intrinsic_double_strictfp(<vscale x 2 x double> %a, <vscale x 2 x double> %b) #0 {
; CHECK: Function Attrs: strictfp
; CHECK-LABEL: @replace_fmul_intrinsic_double_strictfp(
-; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) #[[ATTR2]]
-; CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.fmul.u.nxv2f64(<vscale x 2 x i1> [[TMP1]], <vscale x 2 x double> [[A:%.*]], <vscale x 2 x double> [[B:%.*]]) #[[ATTR2]]
+; CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.fmul.u.nxv2f64(<vscale x 2 x i1> splat (i1 true), <vscale x 2 x double> [[A:%.*]], <vscale x 2 x double> [[B:%.*]]) #[[ATTR2]]
; CHECK-NEXT: ret <vscale x 2 x double> [[TMP2]]
;
%1 = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) #1
@@ -49,8 +46,7 @@ define <vscale x 2 x double> @replace_fmul_intrinsic_double_strictfp(<vscale x 2
define <vscale x 2 x double> @call_replace_fmul_intrinsic_double_strictfp(<vscale x 2 x double> %a, <vscale x 2 x double> %b) #0 {
; CHECK: Function Attrs: strictfp
; CHECK-LABEL: @call_replace_fmul_intrinsic_double_strictfp(
-; CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) #[[ATTR2]]
-; CHECK-NEXT: [[TMP2:%.*]] = call <vscale x 2 x double> @llvm.aarch64.sve.fmul.u.nxv2f64(<vscale x 2 x i1> [[TMP1]], <vscale x 2 x double> [[A:%.*]], <vscale x 2 x double> [[B:%.*]]) #[[ATTR2]]
+; CHECK-NEXT: [[TMP2:%.*]] = call <vscale x 2 x double> @llvm.aarch64.sve.fmul.u.nxv2f64(<vscale x 2 x i1> splat (i1 true), <vscale x 2 x double> [[A:%.*]], <vscale x 2 x double> [[B:%.*]]) #[[ATTR2]]
; CHECK-NEXT: ret <vscale x 2 x double> [[TMP2]]
;
%1 = call <vscale x 2 x double> @replace_fmul_intrinsic_double_strictfp(<vscale x 2 x double> %a, <vscale x 2 x double> %b) #1
@@ -62,8 +58,7 @@ define <vscale x 2 x double> @call_replace_fmul_intrinsic_double_strictfp(<vscal
define <vscale x 2 x double> @replace_fsub_intrinsic_double_strictfp(<vscale x 2 x double> %a, <vscale x 2 x double> %b) #0 {
; CHECK: Function Attrs: strictfp
; CHECK-LABEL: @replace_fsub_intrinsic_double_strictfp(
-; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) #[[ATTR2]]
-; CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.fsub.u.nxv2f64(<vscale x 2 x i1> [[TMP1]], <vscale x 2 x double> [[A:%.*]], <vscale x 2 x double> [[B:%.*]]) #[[ATTR2]]
+; CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.fsub.u.nxv2f64(<vscale x 2 x i1> splat (i1 true), <vscale x 2 x double> [[A:%.*]], <vscale x 2 x double> [[B:%.*]]) #[[ATTR2]]
; CHECK-NEXT: ret <vscale x 2 x double> [[TMP2]]
;
%1 = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) #1
@@ -76,8 +71,7 @@ define <vscale x 2 x double> @replace_fsub_intrinsic_double_strictfp(<vscale x 2
define <vscale x 2 x double> @call_replace_fsub_intrinsic_double_strictfp(<vscale x 2 x double> %a, <vscale x 2 x double> %b) #0 {
; CHECK: Function Attrs: strictfp
; CHECK-LABEL: @call_replace_fsub_intrinsic_double_strictfp(
-; CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) #[[ATTR2]]
-; CHECK-NEXT: [[TMP2:%.*]] = call <vscale x 2 x double> @llvm.aarch64.sve.fsub.u.nxv2f64(<vscale x 2 x i1> [[TMP1]], <vscale x 2 x double> [[A:%.*]], <vscale x 2 x double> [[B:%.*]]) #[[ATTR2]]
+; CHECK-NEXT: [[TMP2:%.*]] = call <vscale x 2 x double> @llvm.aarch64.sve.fsub.u.nxv2f64(<vscale x 2 x i1> splat (i1 true), <vscale x 2 x double> [[A:%.*]], <vscale x 2 x double> [[B:%.*]]) #[[ATTR2]]
; CHECK-NEXT: ret <vscale x 2 x double> [[TMP2]]
;
%1 = call <vscale x 2 x double> @replace_fsub_intrinsic_double_strictfp(<vscale x 2 x double> %a, <vscale x 2 x double> %b) #1
diff --git a/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsics-combine-to-u-forms.ll b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsics-combine-to-u-forms.ll
index 60b2efe27168..bbf4b3c65c30 100644
--- a/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsics-combine-to-u-forms.ll
+++ b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsics-combine-to-u-forms.ll
@@ -16,9 +16,8 @@ declare <vscale x 8 x half> @llvm.aarch64.sve.fabd.nxv8f16(<vscale x 8 x i1>, <v
define <vscale x 8 x half> @replace_fabd_intrinsic_half(<vscale x 8 x half> %a, <vscale x 8 x half> %b) #0 {
; CHECK-LABEL: define <vscale x 8 x half> @replace_fabd_intrinsic_half
; CHECK-SAME: (<vscale x 8 x half> [[A:%.*]], <vscale x 8 x half> [[B:%.*]]) #[[ATTR1:[0-9]+]] {
-; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
-; CHECK-NEXT: [[TMP2:%.*]] = tail call fast <vscale x 8 x half> @llvm.aarch64.sve.fabd.u.nxv8f16(<vscale x 8 x i1> [[TMP1]], <vscale x 8 x half> [[A]], <vscale x 8 x half> [[B]])
-; CHECK-NEXT: ret <vscale x 8 x half> [[TMP2]]
+; CHECK-NEXT: [[TMP1:%.*]] = tail call fast <vscale x 8 x half> @llvm.aarch64.sve.fabd.u.nxv8f16(<vscale x 8 x i1> splat (i1 true), <vscale x 8 x half> [[A]], <vscale x 8 x half> [[B]])
+; CHECK-NEXT: ret <vscale x 8 x half> [[TMP1]]
;
%1 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
%2 = tail call fast <vscale x 8 x half> @llvm.aarch64.sve.fabd.nxv8f16(<vscale x 8 x i1> %1, <vscale x 8 x half> %a, <vscale x 8 x half> %b)
@@ -29,9 +28,8 @@ declare <vscale x 4 x float> @llvm.aarch64.sve.fabd.nxv4f32(<vscale x 4 x i1>, <
define <vscale x 4 x float> @replace_fabd_intrinsic_float(<vscale x 4 x float> %a, <vscale x 4 x float> %b) #0 {
; CHECK-LABEL: define <vscale x 4 x float> @replace_fabd_intrinsic_float
; CHECK-SAME: (<vscale x 4 x float> [[A:%.*]], <vscale x 4 x float> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
-; CHECK-NEXT: [[TMP2:%.*]] = tail call fast <vscale x 4 x float> @llvm.aarch64.sve.fabd.u.nxv4f32(<vscale x 4 x i1> [[TMP1]], <vscale x 4 x float> [[A]], <vscale x 4 x float> [[B]])
-; CHECK-NEXT: ret <vscale x 4 x float> [[TMP2]]
+; CHECK-NEXT: [[TMP1:%.*]] = tail call fast <vscale x 4 x float> @llvm.aarch64.sve.fabd.u.nxv4f32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x float> [[A]], <vscale x 4 x float> [[B]])
+; CHECK-NEXT: ret <vscale x 4 x float> [[TMP1]]
;
%1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
%2 = tail call fast <vscale x 4 x float> @llvm.aarch64.sve.fabd.nxv4f32(<vscale x 4 x i1> %1, <vscale x 4 x float> %a, <vscale x 4 x float> %b)
@@ -42,9 +40,8 @@ declare <vscale x 2 x double> @llvm.aarch64.sve.fabd.nxv2f64(<vscale x 2 x i1>,
define <vscale x 2 x double> @replace_fabd_intrinsic_double(<vscale x 2 x double> %a, <vscale x 2 x double> %b) #0 {
; CHECK-LABEL: define <vscale x 2 x double> @replace_fabd_intrinsic_double
; CHECK-SAME: (<vscale x 2 x double> [[A:%.*]], <vscale x 2 x double> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
-; CHECK-NEXT: [[TMP2:%.*]] = tail call fast <vscale x 2 x double> @llvm.aarch64.sve.fabd.u.nxv2f64(<vscale x 2 x i1> [[TMP1]], <vscale x 2 x double> [[A]], <vscale x 2 x double> [[B]])
-; CHECK-NEXT: ret <vscale x 2 x double> [[TMP2]]
+; CHECK-NEXT: [[TMP1:%.*]] = tail call fast <vscale x 2 x double> @llvm.aarch64.sve.fabd.u.nxv2f64(<vscale x 2 x i1> splat (i1 true), <vscale x 2 x double> [[A]], <vscale x 2 x double> [[B]])
+; CHECK-NEXT: ret <vscale x 2 x double> [[TMP1]]
;
%1 = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
%2 = tail call fast <vscale x 2 x double> @llvm.aarch64.sve.fabd.nxv2f64(<vscale x 2 x i1> %1, <vscale x 2 x double> %a, <vscale x 2 x double> %b)
@@ -117,9 +114,8 @@ declare <vscale x 8 x half> @llvm.aarch64.sve.fdiv.nxv8f16(<vscale x 8 x i1>, <v
define <vscale x 8 x half> @replace_fdiv_intrinsic_half(<vscale x 8 x half> %a, <vscale x 8 x half> %b) #0 {
; CHECK-LABEL: define <vscale x 8 x half> @replace_fdiv_intrinsic_half
; CHECK-SAME: (<vscale x 8 x half> [[A:%.*]], <vscale x 8 x half> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
-; CHECK-NEXT: [[TMP2:%.*]] = tail call fast <vscale x 8 x half> @llvm.aarch64.sve.fdiv.u.nxv8f16(<vscale x 8 x i1> [[TMP1]], <vscale x 8 x half> [[A]], <vscale x 8 x half> [[B]])
-; CHECK-NEXT: ret <vscale x 8 x half> [[TMP2]]
+; CHECK-NEXT: [[TMP1:%.*]] = tail call fast <vscale x 8 x half> @llvm.aarch64.sve.fdiv.u.nxv8f16(<vscale x 8 x i1> splat (i1 true), <vscale x 8 x half> [[A]], <vscale x 8 x half> [[B]])
+; CHECK-NEXT: ret <vscale x 8 x half> [[TMP1]]
;
%1 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
%2 = tail call fast <vscale x 8 x half> @llvm.aarch64.sve.fdiv.nxv8f16(<vscale x 8 x i1> %1, <vscale x 8 x half> %a, <vscale x 8 x half> %b)
@@ -130,9 +126,8 @@ declare <vscale x 4 x float> @llvm.aarch64.sve.fdiv.nxv4f32(<vscale x 4 x i1>, <
define <vscale x 4 x float> @replace_fdiv_intrinsic_float(<vscale x 4 x float> %a, <vscale x 4 x float> %b) #0 {
; CHECK-LABEL: define <vscale x 4 x float> @replace_fdiv_intrinsic_float
; CHECK-SAME: (<vscale x 4 x float> [[A:%.*]], <vscale x 4 x float> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
-; CHECK-NEXT: [[TMP2:%.*]] = tail call fast <vscale x 4 x float> @llvm.aarch64.sve.fdiv.u.nxv4f32(<vscale x 4 x i1> [[TMP1]], <vscale x 4 x float> [[A]], <vscale x 4 x float> [[B]])
-; CHECK-NEXT: ret <vscale x 4 x float> [[TMP2]]
+; CHECK-NEXT: [[TMP1:%.*]] = tail call fast <vscale x 4 x float> @llvm.aarch64.sve.fdiv.u.nxv4f32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x float> [[A]], <vscale x 4 x float> [[B]])
+; CHECK-NEXT: ret <vscale x 4 x float> [[TMP1]]
;
%1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
%2 = tail call fast <vscale x 4 x float> @llvm.aarch64.sve.fdiv.nxv4f32(<vscale x 4 x i1> %1, <vscale x 4 x float> %a, <vscale x 4 x float> %b)
@@ -143,9 +138,8 @@ declare <vscale x 2 x double> @llvm.aarch64.sve.fdiv.nxv2f64(<vscale x 2 x i1>,
define <vscale x 2 x double> @replace_fdiv_intrinsic_double(<vscale x 2 x double> %a, <vscale x 2 x double> %b) #0 {
; CHECK-LABEL: define <vscale x 2 x double> @replace_fdiv_intrinsic_double
; CHECK-SAME: (<vscale x 2 x double> [[A:%.*]], <vscale x 2 x double> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
-; CHECK-NEXT: [[TMP2:%.*]] = tail call fast <vscale x 2 x double> @llvm.aarch64.sve.fdiv.u.nxv2f64(<vscale x 2 x i1> [[TMP1]], <vscale x 2 x double> [[A]], <vscale x 2 x double> [[B]])
-; CHECK-NEXT: ret <vscale x 2 x double> [[TMP2]]
+; CHECK-NEXT: [[TMP1:%.*]] = tail call fast <vscale x 2 x double> @llvm.aarch64.sve.fdiv.u.nxv2f64(<vscale x 2 x i1> splat (i1 true), <vscale x 2 x double> [[A]], <vscale x 2 x double> [[B]])
+; CHECK-NEXT: ret <vscale x 2 x double> [[TMP1]]
;
%1 = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
%2 = tail call fast <vscale x 2 x double> @llvm.aarch64.sve.fdiv.nxv2f64(<vscale x 2 x i1> %1, <vscale x 2 x double> %a, <vscale x 2 x double> %b)
@@ -168,9 +162,8 @@ declare <vscale x 8 x half> @llvm.aarch64.sve.fmax.nxv8f16(<vscale x 8 x i1>, <v
define <vscale x 8 x half> @replace_fmax_intrinsic_half(<vscale x 8 x half> %a, <vscale x 8 x half> %b) #0 {
; CHECK-LABEL: define <vscale x 8 x half> @replace_fmax_intrinsic_half
; CHECK-SAME: (<vscale x 8 x half> [[A:%.*]], <vscale x 8 x half> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
-; CHECK-NEXT: [[TMP2:%.*]] = tail call fast <vscale x 8 x half> @llvm.aarch64.sve.fmax.u.nxv8f16(<vscale x 8 x i1> [[TMP1]], <vscale x 8 x half> [[A]], <vscale x 8 x half> [[B]])
-; CHECK-NEXT: ret <vscale x 8 x half> [[TMP2]]
+; CHECK-NEXT: [[TMP1:%.*]] = tail call fast <vscale x 8 x half> @llvm.aarch64.sve.fmax.u.nxv8f16(<vscale x 8 x i1> splat (i1 true), <vscale x 8 x half> [[A]], <vscale x 8 x half> [[B]])
+; CHECK-NEXT: ret <vscale x 8 x half> [[TMP1]]
;
%1 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
%2 = tail call fast <vscale x 8 x half> @llvm.aarch64.sve.fmax.nxv8f16(<vscale x 8 x i1> %1, <vscale x 8 x half> %a, <vscale x 8 x half> %b)
@@ -181,9 +174,8 @@ declare <vscale x 4 x float> @llvm.aarch64.sve.fmax.nxv4f32(<vscale x 4 x i1>, <
define <vscale x 4 x float> @replace_fmax_intrinsic_float(<vscale x 4 x float> %a, <vscale x 4 x float> %b) #0 {
; CHECK-LABEL: define <vscale x 4 x float> @replace_fmax_intrinsic_float
; CHECK-SAME: (<vscale x 4 x float> [[A:%.*]], <vscale x 4 x float> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
-; CHECK-NEXT: [[TMP2:%.*]] = tail call fast <vscale x 4 x float> @llvm.aarch64.sve.fmax.u.nxv4f32(<vscale x 4 x i1> [[TMP1]], <vscale x 4 x float> [[A]], <vscale x 4 x float> [[B]])
-; CHECK-NEXT: ret <vscale x 4 x float> [[TMP2]]
+; CHECK-NEXT: [[TMP1:%.*]] = tail call fast <vscale x 4 x float> @llvm.aarch64.sve.fmax.u.nxv4f32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x float> [[A]], <vscale x 4 x float> [[B]])
+; CHECK-NEXT: ret <vscale x 4 x float> [[TMP1]]
;
%1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
%2 = tail call fast <vscale x 4 x float> @llvm.aarch64.sve.fmax.nxv4f32(<vscale x 4 x i1> %1, <vscale x 4 x float> %a, <vscale x 4 x float> %b)
@@ -194,9 +186,8 @@ declare <vscale x 2 x double> @llvm.aarch64.sve.fmax.nxv2f64(<vscale x 2 x i1>,
define <vscale x 2 x double> @replace_fmax_intrinsic_double(<vscale x 2 x double> %a, <vscale x 2 x double> %b) #0 {
; CHECK-LABEL: define <vscale x 2 x double> @replace_fmax_intrinsic_double
; CHECK-SAME: (<vscale x 2 x double> [[A:%.*]], <vscale x 2 x double> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
-; CHECK-NEXT: [[TMP2:%.*]] = tail call fast <vscale x 2 x double> @llvm.aarch64.sve.fmax.u.nxv2f64(<vscale x 2 x i1> [[TMP1]], <vscale x 2 x double> [[A]], <vscale x 2 x double> [[B]])
-; CHECK-NEXT: ret <vscale x 2 x double> [[TMP2]]
+; CHECK-NEXT: [[TMP1:%.*]] = tail call fast <vscale x 2 x double> @llvm.aarch64.sve.fmax.u.nxv2f64(<vscale x 2 x i1> splat (i1 true), <vscale x 2 x double> [[A]], <vscale x 2 x double> [[B]])
+; CHECK-NEXT: ret <vscale x 2 x double> [[TMP1]]
;
%1 = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
%2 = tail call fast <vscale x 2 x double> @llvm.aarch64.sve.fmax.nxv2f64(<vscale x 2 x i1> %1, <vscale x 2 x double> %a, <vscale x 2 x double> %b)
@@ -219,9 +210,8 @@ declare <vscale x 8 x half> @llvm.aarch64.sve.fmaxnm.nxv8f16(<vscale x 8 x i1>,
define <vscale x 8 x half> @replace_fmaxnm_intrinsic_half(<vscale x 8 x half> %a, <vscale x 8 x half> %b) #0 {
; CHECK-LABEL: define <vscale x 8 x half> @replace_fmaxnm_intrinsic_half
; CHECK-SAME: (<vscale x 8 x half> [[A:%.*]], <vscale x 8 x half> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
-; CHECK-NEXT: [[TMP2:%.*]] = tail call fast <vscale x 8 x half> @llvm.aarch64.sve.fmaxnm.u.nxv8f16(<vscale x 8 x i1> [[TMP1]], <vscale x 8 x half> [[A]], <vscale x 8 x half> [[B]])
-; CHECK-NEXT: ret <vscale x 8 x half> [[TMP2]]
+; CHECK-NEXT: [[TMP1:%.*]] = tail call fast <vscale x 8 x half> @llvm.aarch64.sve.fmaxnm.u.nxv8f16(<vscale x 8 x i1> splat (i1 true), <vscale x 8 x half> [[A]], <vscale x 8 x half> [[B]])
+; CHECK-NEXT: ret <vscale x 8 x half> [[TMP1]]
;
%1 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
%2 = tail call fast <vscale x 8 x half> @llvm.aarch64.sve.fmaxnm.nxv8f16(<vscale x 8 x i1> %1, <vscale x 8 x half> %a, <vscale x 8 x half> %b)
@@ -232,9 +222,8 @@ declare <vscale x 4 x float> @llvm.aarch64.sve.fmaxnm.nxv4f32(<vscale x 4 x i1>,
define <vscale x 4 x float> @replace_fmaxnm_intrinsic_float(<vscale x 4 x float> %a, <vscale x 4 x float> %b) #0 {
; CHECK-LABEL: define <vscale x 4 x float> @replace_fmaxnm_intrinsic_float
; CHECK-SAME: (<vscale x 4 x float> [[A:%.*]], <vscale x 4 x float> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
-; CHECK-NEXT: [[TMP2:%.*]] = tail call fast <vscale x 4 x float> @llvm.aarch64.sve.fmaxnm.u.nxv4f32(<vscale x 4 x i1> [[TMP1]], <vscale x 4 x float> [[A]], <vscale x 4 x float> [[B]])
-; CHECK-NEXT: ret <vscale x 4 x float> [[TMP2]]
+; CHECK-NEXT: [[TMP1:%.*]] = tail call fast <vscale x 4 x float> @llvm.aarch64.sve.fmaxnm.u.nxv4f32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x float> [[A]], <vscale x 4 x float> [[B]])
+; CHECK-NEXT: ret <vscale x 4 x float> [[TMP1]]
;
%1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
%2 = tail call fast <vscale x 4 x float> @llvm.aarch64.sve.fmaxnm.nxv4f32(<vscale x 4 x i1> %1, <vscale x 4 x float> %a, <vscale x 4 x float> %b)
@@ -245,9 +234,8 @@ declare <vscale x 2 x double> @llvm.aarch64.sve.fmaxnm.nxv2f64(<vscale x 2 x i1>
define <vscale x 2 x double> @replace_fmaxnm_intrinsic_double(<vscale x 2 x double> %a, <vscale x 2 x double> %b) #0 {
; CHECK-LABEL: define <vscale x 2 x double> @replace_fmaxnm_intrinsic_double
; CHECK-SAME: (<vscale x 2 x double> [[A:%.*]], <vscale x 2 x double> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
-; CHECK-NEXT: [[TMP2:%.*]] = tail call fast <vscale x 2 x double> @llvm.aarch64.sve.fmaxnm.u.nxv2f64(<vscale x 2 x i1> [[TMP1]], <vscale x 2 x double> [[A]], <vscale x 2 x double> [[B]])
-; CHECK-NEXT: ret <vscale x 2 x double> [[TMP2]]
+; CHECK-NEXT: [[TMP1:%.*]] = tail call fast <vscale x 2 x double> @llvm.aarch64.sve.fmaxnm.u.nxv2f64(<vscale x 2 x i1> splat (i1 true), <vscale x 2 x double> [[A]], <vscale x 2 x double> [[B]])
+; CHECK-NEXT: ret <vscale x 2 x double> [[TMP1]]
;
%1 = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
%2 = tail call fast <vscale x 2 x double> @llvm.aarch64.sve.fmaxnm.nxv2f64(<vscale x 2 x i1> %1, <vscale x 2 x double> %a, <vscale x 2 x double> %b)
@@ -270,9 +258,8 @@ declare <vscale x 8 x half> @llvm.aarch64.sve.fmin.nxv8f16(<vscale x 8 x i1>, <v
define <vscale x 8 x half> @replace_fmin_intrinsic_half(<vscale x 8 x half> %a, <vscale x 8 x half> %b) #0 {
; CHECK-LABEL: define <vscale x 8 x half> @replace_fmin_intrinsic_half
; CHECK-SAME: (<vscale x 8 x half> [[A:%.*]], <vscale x 8 x half> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
-; CHECK-NEXT: [[TMP2:%.*]] = tail call fast <vscale x 8 x half> @llvm.aarch64.sve.fmin.u.nxv8f16(<vscale x 8 x i1> [[TMP1]], <vscale x 8 x half> [[A]], <vscale x 8 x half> [[B]])
-; CHECK-NEXT: ret <vscale x 8 x half> [[TMP2]]
+; CHECK-NEXT: [[TMP1:%.*]] = tail call fast <vscale x 8 x half> @llvm.aarch64.sve.fmin.u.nxv8f16(<vscale x 8 x i1> splat (i1 true), <vscale x 8 x half> [[A]], <vscale x 8 x half> [[B]])
+; CHECK-NEXT: ret <vscale x 8 x half> [[TMP1]]
;
%1 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
%2 = tail call fast <vscale x 8 x half> @llvm.aarch64.sve.fmin.nxv8f16(<vscale x 8 x i1> %1, <vscale x 8 x half> %a, <vscale x 8 x half> %b)
@@ -283,9 +270,8 @@ declare <vscale x 4 x float> @llvm.aarch64.sve.fmin.nxv4f32(<vscale x 4 x i1>, <
define <vscale x 4 x float> @replace_fmin_intrinsic_float(<vscale x 4 x float> %a, <vscale x 4 x float> %b) #0 {
; CHECK-LABEL: define <vscale x 4 x float> @replace_fmin_intrinsic_float
; CHECK-SAME: (<vscale x 4 x float> [[A:%.*]], <vscale x 4 x float> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
-; CHECK-NEXT: [[TMP2:%.*]] = tail call fast <vscale x 4 x float> @llvm.aarch64.sve.fmin.u.nxv4f32(<vscale x 4 x i1> [[TMP1]], <vscale x 4 x float> [[A]], <vscale x 4 x float> [[B]])
-; CHECK-NEXT: ret <vscale x 4 x float> [[TMP2]]
+; CHECK-NEXT: [[TMP1:%.*]] = tail call fast <vscale x 4 x float> @llvm.aarch64.sve.fmin.u.nxv4f32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x float> [[A]], <vscale x 4 x float> [[B]])
+; CHECK-NEXT: ret <vscale x 4 x float> [[TMP1]]
;
%1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
%2 = tail call fast <vscale x 4 x float> @llvm.aarch64.sve.fmin.nxv4f32(<vscale x 4 x i1> %1, <vscale x 4 x float> %a, <vscale x 4 x float> %b)
@@ -296,9 +282,8 @@ declare <vscale x 2 x double> @llvm.aarch64.sve.fmin.nxv2f64(<vscale x 2 x i1>,
define <vscale x 2 x double> @replace_fmin_intrinsic_double(<vscale x 2 x double> %a, <vscale x 2 x double> %b) #0 {
; CHECK-LABEL: define <vscale x 2 x double> @replace_fmin_intrinsic_double
; CHECK-SAME: (<vscale x 2 x double> [[A:%.*]], <vscale x 2 x double> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
-; CHECK-NEXT: [[TMP2:%.*]] = tail call fast <vscale x 2 x double> @llvm.aarch64.sve.fmin.u.nxv2f64(<vscale x 2 x i1> [[TMP1]], <vscale x 2 x double> [[A]], <vscale x 2 x double> [[B]])
-; CHECK-NEXT: ret <vscale x 2 x double> [[TMP2]]
+; CHECK-NEXT: [[TMP1:%.*]] = tail call fast <vscale x 2 x double> @llvm.aarch64.sve.fmin.u.nxv2f64(<vscale x 2 x i1> splat (i1 true), <vscale x 2 x double> [[A]], <vscale x 2 x double> [[B]])
+; CHECK-NEXT: ret <vscale x 2 x double> [[TMP1]]
;
%1 = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
%2 = tail call fast <vscale x 2 x double> @llvm.aarch64.sve.fmin.nxv2f64(<vscale x 2 x i1> %1, <vscale x 2 x double> %a, <vscale x 2 x double> %b)
@@ -321,9 +306,8 @@ declare <vscale x 8 x half> @llvm.aarch64.sve.fminnm.nxv8f16(<vscale x 8 x i1>,
define <vscale x 8 x half> @replace_fminnm_intrinsic_half(<vscale x 8 x half> %a, <vscale x 8 x half> %b) #0 {
; CHECK-LABEL: define <vscale x 8 x half> @replace_fminnm_intrinsic_half
; CHECK-SAME: (<vscale x 8 x half> [[A:%.*]], <vscale x 8 x half> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
-; CHECK-NEXT: [[TMP2:%.*]] = tail call fast <vscale x 8 x half> @llvm.aarch64.sve.fminnm.u.nxv8f16(<vscale x 8 x i1> [[TMP1]], <vscale x 8 x half> [[A]], <vscale x 8 x half> [[B]])
-; CHECK-NEXT: ret <vscale x 8 x half> [[TMP2]]
+; CHECK-NEXT: [[TMP1:%.*]] = tail call fast <vscale x 8 x half> @llvm.aarch64.sve.fminnm.u.nxv8f16(<vscale x 8 x i1> splat (i1 true), <vscale x 8 x half> [[A]], <vscale x 8 x half> [[B]])
+; CHECK-NEXT: ret <vscale x 8 x half> [[TMP1]]
;
%1 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
%2 = tail call fast <vscale x 8 x half> @llvm.aarch64.sve.fminnm.nxv8f16(<vscale x 8 x i1> %1, <vscale x 8 x half> %a, <vscale x 8 x half> %b)
@@ -334,9 +318,8 @@ declare <vscale x 4 x float> @llvm.aarch64.sve.fminnm.nxv4f32(<vscale x 4 x i1>,
define <vscale x 4 x float> @replace_fminnm_intrinsic_float(<vscale x 4 x float> %a, <vscale x 4 x float> %b) #0 {
; CHECK-LABEL: define <vscale x 4 x float> @replace_fminnm_intrinsic_float
; CHECK-SAME: (<vscale x 4 x float> [[A:%.*]], <vscale x 4 x float> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
-; CHECK-NEXT: [[TMP2:%.*]] = tail call fast <vscale x 4 x float> @llvm.aarch64.sve.fminnm.u.nxv4f32(<vscale x 4 x i1> [[TMP1]], <vscale x 4 x float> [[A]], <vscale x 4 x float> [[B]])
-; CHECK-NEXT: ret <vscale x 4 x float> [[TMP2]]
+; CHECK-NEXT: [[TMP1:%.*]] = tail call fast <vscale x 4 x float> @llvm.aarch64.sve.fminnm.u.nxv4f32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x float> [[A]], <vscale x 4 x float> [[B]])
+; CHECK-NEXT: ret <vscale x 4 x float> [[TMP1]]
;
%1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
%2 = tail call fast <vscale x 4 x float> @llvm.aarch64.sve.fminnm.nxv4f32(<vscale x 4 x i1> %1, <vscale x 4 x float> %a, <vscale x 4 x float> %b)
@@ -347,9 +330,8 @@ declare <vscale x 2 x double> @llvm.aarch64.sve.fminnm.nxv2f64(<vscale x 2 x i1>
define <vscale x 2 x double> @replace_fminnm_intrinsic_double(<vscale x 2 x double> %a, <vscale x 2 x double> %b) #0 {
; CHECK-LABEL: define <vscale x 2 x double> @replace_fminnm_intrinsic_double
; CHECK-SAME: (<vscale x 2 x double> [[A:%.*]], <vscale x 2 x double> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
-; CHECK-NEXT: [[TMP2:%.*]] = tail call fast <vscale x 2 x double> @llvm.aarch64.sve.fminnm.u.nxv2f64(<vscale x 2 x i1> [[TMP1]], <vscale x 2 x double> [[A]], <vscale x 2 x double> [[B]])
-; CHECK-NEXT: ret <vscale x 2 x double> [[TMP2]]
+; CHECK-NEXT: [[TMP1:%.*]] = tail call fast <vscale x 2 x double> @llvm.aarch64.sve.fminnm.u.nxv2f64(<vscale x 2 x i1> splat (i1 true), <vscale x 2 x double> [[A]], <vscale x 2 x double> [[B]])
+; CHECK-NEXT: ret <vscale x 2 x double> [[TMP1]]
;
%1 = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
%2 = tail call fast <vscale x 2 x double> @llvm.aarch64.sve.fminnm.nxv2f64(<vscale x 2 x i1> %1, <vscale x 2 x double> %a, <vscale x 2 x double> %b)
@@ -372,9 +354,8 @@ declare <vscale x 8 x half> @llvm.aarch64.sve.fmla.nxv8f16(<vscale x 8 x i1>, <v
define <vscale x 8 x half> @replace_fmla_intrinsic_half(<vscale x 8 x half> %a, <vscale x 8 x half> %b, <vscale x 8 x half> %c) #0 {
; CHECK-LABEL: define <vscale x 8 x half> @replace_fmla_intrinsic_half
; CHECK-SAME: (<vscale x 8 x half> [[A:%.*]], <vscale x 8 x half> [[B:%.*]], <vscale x 8 x half> [[C:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
-; CHECK-NEXT: [[TMP2:%.*]] = tail call fast <vscale x 8 x half> @llvm.aarch64.sve.fmla.u.nxv8f16(<vscale x 8 x i1> [[TMP1]], <vscale x 8 x half> [[A]], <vscale x 8 x half> [[B]], <vscale x 8 x half> [[C]])
-; CHECK-NEXT: ret <vscale x 8 x half> [[TMP2]]
+; CHECK-NEXT: [[TMP1:%.*]] = tail call fast <vscale x 8 x half> @llvm.aarch64.sve.fmla.u.nxv8f16(<vscale x 8 x i1> splat (i1 true), <vscale x 8 x half> [[A]], <vscale x 8 x half> [[B]], <vscale x 8 x half> [[C]])
+; CHECK-NEXT: ret <vscale x 8 x half> [[TMP1]]
;
%1 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
%2 = tail call fast <vscale x 8 x half> @llvm.aarch64.sve.fmla.nxv8f16(<vscale x 8 x i1> %1, <vscale x 8 x half> %a, <vscale x 8 x half> %b, <vscale x 8 x half> %c)
@@ -385,9 +366,8 @@ declare <vscale x 4 x float> @llvm.aarch64.sve.fmla.nxv4f32(<vscale x 4 x i1>, <
define <vscale x 4 x float> @replace_fmla_intrinsic_float(<vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c) #0 {
; CHECK-LABEL: define <vscale x 4 x float> @replace_fmla_intrinsic_float
; CHECK-SAME: (<vscale x 4 x float> [[A:%.*]], <vscale x 4 x float> [[B:%.*]], <vscale x 4 x float> [[C:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
-; CHECK-NEXT: [[TMP2:%.*]] = tail call fast <vscale x 4 x float> @llvm.aarch64.sve.fmla.u.nxv4f32(<vscale x 4 x i1> [[TMP1]], <vscale x 4 x float> [[A]], <vscale x 4 x float> [[B]], <vscale x 4 x float> [[C]])
-; CHECK-NEXT: ret <vscale x 4 x float> [[TMP2]]
+; CHECK-NEXT: [[TMP1:%.*]] = tail call fast <vscale x 4 x float> @llvm.aarch64.sve.fmla.u.nxv4f32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x float> [[A]], <vscale x 4 x float> [[B]], <vscale x 4 x float> [[C]])
+; CHECK-NEXT: ret <vscale x 4 x float> [[TMP1]]
;
%1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
%2 = tail call fast <vscale x 4 x float> @llvm.aarch64.sve.fmla.nxv4f32(<vscale x 4 x i1> %1, <vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c)
@@ -398,9 +378,8 @@ declare <vscale x 2 x double> @llvm.aarch64.sve.fmla.nxv2f64(<vscale x 2 x i1>,
define <vscale x 2 x double> @replace_fmla_intrinsic_double(<vscale x 2 x double> %a, <vscale x 2 x double> %b, <vscale x 2 x double> %c) #0 {
; CHECK-LABEL: define <vscale x 2 x double> @replace_fmla_intrinsic_double
; CHECK-SAME: (<vscale x 2 x double> [[A:%.*]], <vscale x 2 x double> [[B:%.*]], <vscale x 2 x double> [[C:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
-; CHECK-NEXT: [[TMP2:%.*]] = tail call fast <vscale x 2 x double> @llvm.aarch64.sve.fmla.u.nxv2f64(<vscale x 2 x i1> [[TMP1]], <vscale x 2 x double> [[A]], <vscale x 2 x double> [[B]], <vscale x 2 x double> [[C]])
-; CHECK-NEXT: ret <vscale x 2 x double> [[TMP2]]
+; CHECK-NEXT: [[TMP1:%.*]] = tail call fast <vscale x 2 x double> @llvm.aarch64.sve.fmla.u.nxv2f64(<vscale x 2 x i1> splat (i1 true), <vscale x 2 x double> [[A]], <vscale x 2 x double> [[B]], <vscale x 2 x double> [[C]])
+; CHECK-NEXT: ret <vscale x 2 x double> [[TMP1]]
;
%1 = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
%2 = tail call fast <vscale x 2 x double> @llvm.aarch64.sve.fmla.nxv2f64(<vscale x 2 x i1> %1, <vscale x 2 x double> %a, <vscale x 2 x double> %b, <vscale x 2 x double> %c)
@@ -423,9 +402,8 @@ declare <vscale x 8 x half> @llvm.aarch64.sve.fmls.nxv8f16(<vscale x 8 x i1>, <v
define <vscale x 8 x half> @replace_fmls_intrinsic_half(<vscale x 8 x half> %a, <vscale x 8 x half> %b, <vscale x 8 x half> %c) #0 {
; CHECK-LABEL: define <vscale x 8 x half> @replace_fmls_intrinsic_half
; CHECK-SAME: (<vscale x 8 x half> [[A:%.*]], <vscale x 8 x half> [[B:%.*]], <vscale x 8 x half> [[C:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
-; CHECK-NEXT: [[TMP2:%.*]] = tail call fast <vscale x 8 x half> @llvm.aarch64.sve.fmls.u.nxv8f16(<vscale x 8 x i1> [[TMP1]], <vscale x 8 x half> [[A]], <vscale x 8 x half> [[B]], <vscale x 8 x half> [[C]])
-; CHECK-NEXT: ret <vscale x 8 x half> [[TMP2]]
+; CHECK-NEXT: [[TMP1:%.*]] = tail call fast <vscale x 8 x half> @llvm.aarch64.sve.fmls.u.nxv8f16(<vscale x 8 x i1> splat (i1 true), <vscale x 8 x half> [[A]], <vscale x 8 x half> [[B]], <vscale x 8 x half> [[C]])
+; CHECK-NEXT: ret <vscale x 8 x half> [[TMP1]]
;
%1 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
%2 = tail call fast <vscale x 8 x half> @llvm.aarch64.sve.fmls.nxv8f16(<vscale x 8 x i1> %1, <vscale x 8 x half> %a, <vscale x 8 x half> %b, <vscale x 8 x half> %c)
@@ -436,9 +414,8 @@ declare <vscale x 4 x float> @llvm.aarch64.sve.fmls.nxv4f32(<vscale x 4 x i1>, <
define <vscale x 4 x float> @replace_fmls_intrinsic_float(<vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c) #0 {
; CHECK-LABEL: define <vscale x 4 x float> @replace_fmls_intrinsic_float
; CHECK-SAME: (<vscale x 4 x float> [[A:%.*]], <vscale x 4 x float> [[B:%.*]], <vscale x 4 x float> [[C:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
-; CHECK-NEXT: [[TMP2:%.*]] = tail call fast <vscale x 4 x float> @llvm.aarch64.sve.fmls.u.nxv4f32(<vscale x 4 x i1> [[TMP1]], <vscale x 4 x float> [[A]], <vscale x 4 x float> [[B]], <vscale x 4 x float> [[C]])
-; CHECK-NEXT: ret <vscale x 4 x float> [[TMP2]]
+; CHECK-NEXT: [[TMP1:%.*]] = tail call fast <vscale x 4 x float> @llvm.aarch64.sve.fmls.u.nxv4f32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x float> [[A]], <vscale x 4 x float> [[B]], <vscale x 4 x float> [[C]])
+; CHECK-NEXT: ret <vscale x 4 x float> [[TMP1]]
;
%1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
%2 = tail call fast <vscale x 4 x float> @llvm.aarch64.sve.fmls.nxv4f32(<vscale x 4 x i1> %1, <vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c)
@@ -449,9 +426,8 @@ declare <vscale x 2 x double> @llvm.aarch64.sve.fmls.nxv2f64(<vscale x 2 x i1>,
define <vscale x 2 x double> @replace_fmls_intrinsic_double(<vscale x 2 x double> %a, <vscale x 2 x double> %b, <vscale x 2 x double> %c) #0 {
; CHECK-LABEL: define <vscale x 2 x double> @replace_fmls_intrinsic_double
; CHECK-SAME: (<vscale x 2 x double> [[A:%.*]], <vscale x 2 x double> [[B:%.*]], <vscale x 2 x double> [[C:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
-; CHECK-NEXT: [[TMP2:%.*]] = tail call fast <vscale x 2 x double> @llvm.aarch64.sve.fmls.u.nxv2f64(<vscale x 2 x i1> [[TMP1]], <vscale x 2 x double> [[A]], <vscale x 2 x double> [[B]], <vscale x 2 x double> [[C]])
-; CHECK-NEXT: ret <vscale x 2 x double> [[TMP2]]
+; CHECK-NEXT: [[TMP1:%.*]] = tail call fast <vscale x 2 x double> @llvm.aarch64.sve.fmls.u.nxv2f64(<vscale x 2 x i1> splat (i1 true), <vscale x 2 x double> [[A]], <vscale x 2 x double> [[B]], <vscale x 2 x double> [[C]])
+; CHECK-NEXT: ret <vscale x 2 x double> [[TMP1]]
;
%1 = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
%2 = tail call fast <vscale x 2 x double> @llvm.aarch64.sve.fmls.nxv2f64(<vscale x 2 x i1> %1, <vscale x 2 x double> %a, <vscale x 2 x double> %b, <vscale x 2 x double> %c)
@@ -524,9 +500,8 @@ declare <vscale x 8 x half> @llvm.aarch64.sve.fmulx.nxv8f16(<vscale x 8 x i1>, <
define <vscale x 8 x half> @replace_fmulx_intrinsic_half(<vscale x 8 x half> %a, <vscale x 8 x half> %b) #0 {
; CHECK-LABEL: define <vscale x 8 x half> @replace_fmulx_intrinsic_half
; CHECK-SAME: (<vscale x 8 x half> [[A:%.*]], <vscale x 8 x half> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
-; CHECK-NEXT: [[TMP2:%.*]] = tail call fast <vscale x 8 x half> @llvm.aarch64.sve.fmulx.u.nxv8f16(<vscale x 8 x i1> [[TMP1]], <vscale x 8 x half> [[A]], <vscale x 8 x half> [[B]])
-; CHECK-NEXT: ret <vscale x 8 x half> [[TMP2]]
+; CHECK-NEXT: [[TMP1:%.*]] = tail call fast <vscale x 8 x half> @llvm.aarch64.sve.fmulx.u.nxv8f16(<vscale x 8 x i1> splat (i1 true), <vscale x 8 x half> [[A]], <vscale x 8 x half> [[B]])
+; CHECK-NEXT: ret <vscale x 8 x half> [[TMP1]]
;
%1 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
%2 = tail call fast <vscale x 8 x half> @llvm.aarch64.sve.fmulx.nxv8f16(<vscale x 8 x i1> %1, <vscale x 8 x half> %a, <vscale x 8 x half> %b)
@@ -537,9 +512,8 @@ declare <vscale x 4 x float> @llvm.aarch64.sve.fmulx.nxv4f32(<vscale x 4 x i1>,
define <vscale x 4 x float> @replace_fmulx_intrinsic_float(<vscale x 4 x float> %a, <vscale x 4 x float> %b) #0 {
; CHECK-LABEL: define <vscale x 4 x float> @replace_fmulx_intrinsic_float
; CHECK-SAME: (<vscale x 4 x float> [[A:%.*]], <vscale x 4 x float> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
-; CHECK-NEXT: [[TMP2:%.*]] = tail call fast <vscale x 4 x float> @llvm.aarch64.sve.fmulx.u.nxv4f32(<vscale x 4 x i1> [[TMP1]], <vscale x 4 x float> [[A]], <vscale x 4 x float> [[B]])
-; CHECK-NEXT: ret <vscale x 4 x float> [[TMP2]]
+; CHECK-NEXT: [[TMP1:%.*]] = tail call fast <vscale x 4 x float> @llvm.aarch64.sve.fmulx.u.nxv4f32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x float> [[A]], <vscale x 4 x float> [[B]])
+; CHECK-NEXT: ret <vscale x 4 x float> [[TMP1]]
;
%1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
%2 = tail call fast <vscale x 4 x float> @llvm.aarch64.sve.fmulx.nxv4f32(<vscale x 4 x i1> %1, <vscale x 4 x float> %a, <vscale x 4 x float> %b)
@@ -550,9 +524,8 @@ declare <vscale x 2 x double> @llvm.aarch64.sve.fmulx.nxv2f64(<vscale x 2 x i1>,
define <vscale x 2 x double> @replace_fmulx_intrinsic_double(<vscale x 2 x double> %a, <vscale x 2 x double> %b) #0 {
; CHECK-LABEL: define <vscale x 2 x double> @replace_fmulx_intrinsic_double
; CHECK-SAME: (<vscale x 2 x double> [[A:%.*]], <vscale x 2 x double> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
-; CHECK-NEXT: [[TMP2:%.*]] = tail call fast <vscale x 2 x double> @llvm.aarch64.sve.fmulx.u.nxv2f64(<vscale x 2 x i1> [[TMP1]], <vscale x 2 x double> [[A]], <vscale x 2 x double> [[B]])
-; CHECK-NEXT: ret <vscale x 2 x double> [[TMP2]]
+; CHECK-NEXT: [[TMP1:%.*]] = tail call fast <vscale x 2 x double> @llvm.aarch64.sve.fmulx.u.nxv2f64(<vscale x 2 x i1> splat (i1 true), <vscale x 2 x double> [[A]], <vscale x 2 x double> [[B]])
+; CHECK-NEXT: ret <vscale x 2 x double> [[TMP1]]
;
%1 = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
%2 = tail call fast <vscale x 2 x double> @llvm.aarch64.sve.fmulx.nxv2f64(<vscale x 2 x i1> %1, <vscale x 2 x double> %a, <vscale x 2 x double> %b)
@@ -575,9 +548,8 @@ declare <vscale x 8 x half> @llvm.aarch64.sve.fnmla.nxv8f16(<vscale x 8 x i1>, <
define <vscale x 8 x half> @replace_fnmla_intrinsic_half(<vscale x 8 x half> %a, <vscale x 8 x half> %b, <vscale x 8 x half> %c) #0 {
; CHECK-LABEL: define <vscale x 8 x half> @replace_fnmla_intrinsic_half
; CHECK-SAME: (<vscale x 8 x half> [[A:%.*]], <vscale x 8 x half> [[B:%.*]], <vscale x 8 x half> [[C:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
-; CHECK-NEXT: [[TMP2:%.*]] = tail call fast <vscale x 8 x half> @llvm.aarch64.sve.fnmla.u.nxv8f16(<vscale x 8 x i1> [[TMP1]], <vscale x 8 x half> [[A]], <vscale x 8 x half> [[B]], <vscale x 8 x half> [[C]])
-; CHECK-NEXT: ret <vscale x 8 x half> [[TMP2]]
+; CHECK-NEXT: [[TMP1:%.*]] = tail call fast <vscale x 8 x half> @llvm.aarch64.sve.fnmla.u.nxv8f16(<vscale x 8 x i1> splat (i1 true), <vscale x 8 x half> [[A]], <vscale x 8 x half> [[B]], <vscale x 8 x half> [[C]])
+; CHECK-NEXT: ret <vscale x 8 x half> [[TMP1]]
;
%1 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
%2 = tail call fast <vscale x 8 x half> @llvm.aarch64.sve.fnmla.nxv8f16(<vscale x 8 x i1> %1, <vscale x 8 x half> %a, <vscale x 8 x half> %b, <vscale x 8 x half> %c)
@@ -588,9 +560,8 @@ declare <vscale x 4 x float> @llvm.aarch64.sve.fnmla.nxv4f32(<vscale x 4 x i1>,
define <vscale x 4 x float> @replace_fnmla_intrinsic_float(<vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c) #0 {
; CHECK-LABEL: define <vscale x 4 x float> @replace_fnmla_intrinsic_float
; CHECK-SAME: (<vscale x 4 x float> [[A:%.*]], <vscale x 4 x float> [[B:%.*]], <vscale x 4 x float> [[C:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
-; CHECK-NEXT: [[TMP2:%.*]] = tail call fast <vscale x 4 x float> @llvm.aarch64.sve.fnmla.u.nxv4f32(<vscale x 4 x i1> [[TMP1]], <vscale x 4 x float> [[A]], <vscale x 4 x float> [[B]], <vscale x 4 x float> [[C]])
-; CHECK-NEXT: ret <vscale x 4 x float> [[TMP2]]
+; CHECK-NEXT: [[TMP1:%.*]] = tail call fast <vscale x 4 x float> @llvm.aarch64.sve.fnmla.u.nxv4f32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x float> [[A]], <vscale x 4 x float> [[B]], <vscale x 4 x float> [[C]])
+; CHECK-NEXT: ret <vscale x 4 x float> [[TMP1]]
;
%1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
%2 = tail call fast <vscale x 4 x float> @llvm.aarch64.sve.fnmla.nxv4f32(<vscale x 4 x i1> %1, <vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c)
@@ -601,9 +572,8 @@ declare <vscale x 2 x double> @llvm.aarch64.sve.fnmla.nxv2f64(<vscale x 2 x i1>,
define <vscale x 2 x double> @replace_fnmla_intrinsic_double(<vscale x 2 x double> %a, <vscale x 2 x double> %b, <vscale x 2 x double> %c) #0 {
; CHECK-LABEL: define <vscale x 2 x double> @replace_fnmla_intrinsic_double
; CHECK-SAME: (<vscale x 2 x double> [[A:%.*]], <vscale x 2 x double> [[B:%.*]], <vscale x 2 x double> [[C:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
-; CHECK-NEXT: [[TMP2:%.*]] = tail call fast <vscale x 2 x double> @llvm.aarch64.sve.fnmla.u.nxv2f64(<vscale x 2 x i1> [[TMP1]], <vscale x 2 x double> [[A]], <vscale x 2 x double> [[B]], <vscale x 2 x double> [[C]])
-; CHECK-NEXT: ret <vscale x 2 x double> [[TMP2]]
+; CHECK-NEXT: [[TMP1:%.*]] = tail call fast <vscale x 2 x double> @llvm.aarch64.sve.fnmla.u.nxv2f64(<vscale x 2 x i1> splat (i1 true), <vscale x 2 x double> [[A]], <vscale x 2 x double> [[B]], <vscale x 2 x double> [[C]])
+; CHECK-NEXT: ret <vscale x 2 x double> [[TMP1]]
;
%1 = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
%2 = tail call fast <vscale x 2 x double> @llvm.aarch64.sve.fnmla.nxv2f64(<vscale x 2 x i1> %1, <vscale x 2 x double> %a, <vscale x 2 x double> %b, <vscale x 2 x double> %c)
@@ -626,9 +596,8 @@ declare <vscale x 8 x half> @llvm.aarch64.sve.fnmls.nxv8f16(<vscale x 8 x i1>, <
define <vscale x 8 x half> @replace_fnmls_intrinsic_half(<vscale x 8 x half> %a, <vscale x 8 x half> %b, <vscale x 8 x half> %c) #0 {
; CHECK-LABEL: define <vscale x 8 x half> @replace_fnmls_intrinsic_half
; CHECK-SAME: (<vscale x 8 x half> [[A:%.*]], <vscale x 8 x half> [[B:%.*]], <vscale x 8 x half> [[C:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
-; CHECK-NEXT: [[TMP2:%.*]] = tail call fast <vscale x 8 x half> @llvm.aarch64.sve.fnmls.u.nxv8f16(<vscale x 8 x i1> [[TMP1]], <vscale x 8 x half> [[A]], <vscale x 8 x half> [[B]], <vscale x 8 x half> [[C]])
-; CHECK-NEXT: ret <vscale x 8 x half> [[TMP2]]
+; CHECK-NEXT: [[TMP1:%.*]] = tail call fast <vscale x 8 x half> @llvm.aarch64.sve.fnmls.u.nxv8f16(<vscale x 8 x i1> splat (i1 true), <vscale x 8 x half> [[A]], <vscale x 8 x half> [[B]], <vscale x 8 x half> [[C]])
+; CHECK-NEXT: ret <vscale x 8 x half> [[TMP1]]
;
%1 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
%2 = tail call fast <vscale x 8 x half> @llvm.aarch64.sve.fnmls.nxv8f16(<vscale x 8 x i1> %1, <vscale x 8 x half> %a, <vscale x 8 x half> %b, <vscale x 8 x half> %c)
@@ -639,9 +608,8 @@ declare <vscale x 4 x float> @llvm.aarch64.sve.fnmls.nxv4f32(<vscale x 4 x i1>,
define <vscale x 4 x float> @replace_fnmls_intrinsic_float(<vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c) #0 {
; CHECK-LABEL: define <vscale x 4 x float> @replace_fnmls_intrinsic_float
; CHECK-SAME: (<vscale x 4 x float> [[A:%.*]], <vscale x 4 x float> [[B:%.*]], <vscale x 4 x float> [[C:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
-; CHECK-NEXT: [[TMP2:%.*]] = tail call fast <vscale x 4 x float> @llvm.aarch64.sve.fnmls.u.nxv4f32(<vscale x 4 x i1> [[TMP1]], <vscale x 4 x float> [[A]], <vscale x 4 x float> [[B]], <vscale x 4 x float> [[C]])
-; CHECK-NEXT: ret <vscale x 4 x float> [[TMP2]]
+; CHECK-NEXT: [[TMP1:%.*]] = tail call fast <vscale x 4 x float> @llvm.aarch64.sve.fnmls.u.nxv4f32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x float> [[A]], <vscale x 4 x float> [[B]], <vscale x 4 x float> [[C]])
+; CHECK-NEXT: ret <vscale x 4 x float> [[TMP1]]
;
%1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
%2 = tail call fast <vscale x 4 x float> @llvm.aarch64.sve.fnmls.nxv4f32(<vscale x 4 x i1> %1, <vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c)
@@ -652,9 +620,8 @@ declare <vscale x 2 x double> @llvm.aarch64.sve.fnmls.nxv2f64(<vscale x 2 x i1>,
define <vscale x 2 x double> @replace_fnmls_intrinsic_double(<vscale x 2 x double> %a, <vscale x 2 x double> %b, <vscale x 2 x double> %c) #0 {
; CHECK-LABEL: define <vscale x 2 x double> @replace_fnmls_intrinsic_double
; CHECK-SAME: (<vscale x 2 x double> [[A:%.*]], <vscale x 2 x double> [[B:%.*]], <vscale x 2 x double> [[C:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
-; CHECK-NEXT: [[TMP2:%.*]] = tail call fast <vscale x 2 x double> @llvm.aarch64.sve.fnmls.u.nxv2f64(<vscale x 2 x i1> [[TMP1]], <vscale x 2 x double> [[A]], <vscale x 2 x double> [[B]], <vscale x 2 x double> [[C]])
-; CHECK-NEXT: ret <vscale x 2 x double> [[TMP2]]
+; CHECK-NEXT: [[TMP1:%.*]] = tail call fast <vscale x 2 x double> @llvm.aarch64.sve.fnmls.u.nxv2f64(<vscale x 2 x i1> splat (i1 true), <vscale x 2 x double> [[A]], <vscale x 2 x double> [[B]], <vscale x 2 x double> [[C]])
+; CHECK-NEXT: ret <vscale x 2 x double> [[TMP1]]
;
%1 = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
%2 = tail call fast <vscale x 2 x double> @llvm.aarch64.sve.fnmls.nxv2f64(<vscale x 2 x i1> %1, <vscale x 2 x double> %a, <vscale x 2 x double> %b, <vscale x 2 x double> %c)
@@ -729,9 +696,8 @@ declare <vscale x 16 x i8> @llvm.aarch64.sve.add.nxv16i8(<vscale x 16 x i1>, <vs
define <vscale x 16 x i8> @replace_add_intrinsic_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) #0 {
; CHECK-LABEL: define <vscale x 16 x i8> @replace_add_intrinsic_i8
; CHECK-SAME: (<vscale x 16 x i8> [[A:%.*]], <vscale x 16 x i8> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
-; CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.add.u.nxv16i8(<vscale x 16 x i1> [[TMP1]], <vscale x 16 x i8> [[A]], <vscale x 16 x i8> [[B]])
-; CHECK-NEXT: ret <vscale x 16 x i8> [[TMP2]]
+; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.add.u.nxv16i8(<vscale x 16 x i1> splat (i1 true), <vscale x 16 x i8> [[A]], <vscale x 16 x i8> [[B]])
+; CHECK-NEXT: ret <vscale x 16 x i8> [[TMP1]]
;
%1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
%2 = tail call <vscale x 16 x i8> @llvm.aarch64.sve.add.nxv16i8(<vscale x 16 x i1> %1, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b)
@@ -742,9 +708,8 @@ declare <vscale x 8 x i16> @llvm.aarch64.sve.add.nxv8i16(<vscale x 8 x i1>, <vsc
define <vscale x 8 x i16> @replace_add_intrinsic_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) #0 {
; CHECK-LABEL: define <vscale x 8 x i16> @replace_add_intrinsic_i16
; CHECK-SAME: (<vscale x 8 x i16> [[A:%.*]], <vscale x 8 x i16> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
-; CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.add.u.nxv8i16(<vscale x 8 x i1> [[TMP1]], <vscale x 8 x i16> [[A]], <vscale x 8 x i16> [[B]])
-; CHECK-NEXT: ret <vscale x 8 x i16> [[TMP2]]
+; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.add.u.nxv8i16(<vscale x 8 x i1> splat (i1 true), <vscale x 8 x i16> [[A]], <vscale x 8 x i16> [[B]])
+; CHECK-NEXT: ret <vscale x 8 x i16> [[TMP1]]
;
%1 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
%2 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.add.nxv8i16(<vscale x 8 x i1> %1, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b)
@@ -755,9 +720,8 @@ declare <vscale x 4 x i32> @llvm.aarch64.sve.add.nxv4i32(<vscale x 4 x i1>, <vsc
define <vscale x 4 x i32> @replace_add_intrinsic_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
; CHECK-LABEL: define <vscale x 4 x i32> @replace_add_intrinsic_i32
; CHECK-SAME: (<vscale x 4 x i32> [[A:%.*]], <vscale x 4 x i32> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
-; CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.add.u.nxv4i32(<vscale x 4 x i1> [[TMP1]], <vscale x 4 x i32> [[A]], <vscale x 4 x i32> [[B]])
-; CHECK-NEXT: ret <vscale x 4 x i32> [[TMP2]]
+; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.add.u.nxv4i32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> [[A]], <vscale x 4 x i32> [[B]])
+; CHECK-NEXT: ret <vscale x 4 x i32> [[TMP1]]
;
%1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
%2 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.add.nxv4i32(<vscale x 4 x i1> %1, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
@@ -768,9 +732,8 @@ declare <vscale x 2 x i64> @llvm.aarch64.sve.add.nxv2i64(<vscale x 2 x i1>, <vsc
define <vscale x 2 x i64> @replace_add_intrinsic_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) #0 {
; CHECK-LABEL: define <vscale x 2 x i64> @replace_add_intrinsic_i64
; CHECK-SAME: (<vscale x 2 x i64> [[A:%.*]], <vscale x 2 x i64> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
-; CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.add.u.nxv2i64(<vscale x 2 x i1> [[TMP1]], <vscale x 2 x i64> [[A]], <vscale x 2 x i64> [[B]])
-; CHECK-NEXT: ret <vscale x 2 x i64> [[TMP2]]
+; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.add.u.nxv2i64(<vscale x 2 x i1> splat (i1 true), <vscale x 2 x i64> [[A]], <vscale x 2 x i64> [[B]])
+; CHECK-NEXT: ret <vscale x 2 x i64> [[TMP1]]
;
%1 = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
%2 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.add.nxv2i64(<vscale x 2 x i1> %1, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b)
@@ -793,9 +756,8 @@ declare <vscale x 16 x i8> @llvm.aarch64.sve.mla.nxv16i8(<vscale x 16 x i1>, <vs
define <vscale x 16 x i8> @replace_mla_intrinsic_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, <vscale x 16 x i8> %c) #0 {
; CHECK-LABEL: define <vscale x 16 x i8> @replace_mla_intrinsic_i8
; CHECK-SAME: (<vscale x 16 x i8> [[A:%.*]], <vscale x 16 x i8> [[B:%.*]], <vscale x 16 x i8> [[C:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
-; CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.mla.u.nxv16i8(<vscale x 16 x i1> [[TMP1]], <vscale x 16 x i8> [[A]], <vscale x 16 x i8> [[B]], <vscale x 16 x i8> [[C]])
-; CHECK-NEXT: ret <vscale x 16 x i8> [[TMP2]]
+; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.mla.u.nxv16i8(<vscale x 16 x i1> splat (i1 true), <vscale x 16 x i8> [[A]], <vscale x 16 x i8> [[B]], <vscale x 16 x i8> [[C]])
+; CHECK-NEXT: ret <vscale x 16 x i8> [[TMP1]]
;
%1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
%2 = tail call <vscale x 16 x i8> @llvm.aarch64.sve.mla.nxv16i8(<vscale x 16 x i1> %1, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b, <vscale x 16 x i8> %c)
@@ -806,9 +768,8 @@ declare <vscale x 8 x i16> @llvm.aarch64.sve.mla.nxv8i16(<vscale x 8 x i1>, <vsc
define <vscale x 8 x i16> @replace_mla_intrinsic_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, <vscale x 8 x i16> %c) #0 {
; CHECK-LABEL: define <vscale x 8 x i16> @replace_mla_intrinsic_i16
; CHECK-SAME: (<vscale x 8 x i16> [[A:%.*]], <vscale x 8 x i16> [[B:%.*]], <vscale x 8 x i16> [[C:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
-; CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.mla.u.nxv8i16(<vscale x 8 x i1> [[TMP1]], <vscale x 8 x i16> [[A]], <vscale x 8 x i16> [[B]], <vscale x 8 x i16> [[C]])
-; CHECK-NEXT: ret <vscale x 8 x i16> [[TMP2]]
+; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.mla.u.nxv8i16(<vscale x 8 x i1> splat (i1 true), <vscale x 8 x i16> [[A]], <vscale x 8 x i16> [[B]], <vscale x 8 x i16> [[C]])
+; CHECK-NEXT: ret <vscale x 8 x i16> [[TMP1]]
;
%1 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
%2 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.mla.nxv8i16(<vscale x 8 x i1> %1, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b, <vscale x 8 x i16> %c)
@@ -819,9 +780,8 @@ declare <vscale x 4 x i32> @llvm.aarch64.sve.mla.nxv4i32(<vscale x 4 x i1>, <vsc
define <vscale x 4 x i32> @replace_mla_intrinsic_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, <vscale x 4 x i32> %c) #0 {
; CHECK-LABEL: define <vscale x 4 x i32> @replace_mla_intrinsic_i32
; CHECK-SAME: (<vscale x 4 x i32> [[A:%.*]], <vscale x 4 x i32> [[B:%.*]], <vscale x 4 x i32> [[C:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
-; CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.mla.u.nxv4i32(<vscale x 4 x i1> [[TMP1]], <vscale x 4 x i32> [[A]], <vscale x 4 x i32> [[B]], <vscale x 4 x i32> [[C]])
-; CHECK-NEXT: ret <vscale x 4 x i32> [[TMP2]]
+; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.mla.u.nxv4i32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> [[A]], <vscale x 4 x i32> [[B]], <vscale x 4 x i32> [[C]])
+; CHECK-NEXT: ret <vscale x 4 x i32> [[TMP1]]
;
%1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
%2 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.mla.nxv4i32(<vscale x 4 x i1> %1, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b, <vscale x 4 x i32> %c)
@@ -832,9 +792,8 @@ declare <vscale x 2 x i64> @llvm.aarch64.sve.mla.nxv2i64(<vscale x 2 x i1>, <vsc
define <vscale x 2 x i64> @replace_mla_intrinsic_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, <vscale x 2 x i64> %c) #0 {
; CHECK-LABEL: define <vscale x 2 x i64> @replace_mla_intrinsic_i64
; CHECK-SAME: (<vscale x 2 x i64> [[A:%.*]], <vscale x 2 x i64> [[B:%.*]], <vscale x 2 x i64> [[C:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
-; CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.mla.u.nxv2i64(<vscale x 2 x i1> [[TMP1]], <vscale x 2 x i64> [[A]], <vscale x 2 x i64> [[B]], <vscale x 2 x i64> [[C]])
-; CHECK-NEXT: ret <vscale x 2 x i64> [[TMP2]]
+; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.mla.u.nxv2i64(<vscale x 2 x i1> splat (i1 true), <vscale x 2 x i64> [[A]], <vscale x 2 x i64> [[B]], <vscale x 2 x i64> [[C]])
+; CHECK-NEXT: ret <vscale x 2 x i64> [[TMP1]]
;
%1 = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
%2 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.mla.nxv2i64(<vscale x 2 x i1> %1, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b, <vscale x 2 x i64> %c)
@@ -857,9 +816,8 @@ declare <vscale x 16 x i8> @llvm.aarch64.sve.mls.nxv16i8(<vscale x 16 x i1>, <vs
define <vscale x 16 x i8> @replace_mls_intrinsic_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, <vscale x 16 x i8> %c) #0 {
; CHECK-LABEL: define <vscale x 16 x i8> @replace_mls_intrinsic_i8
; CHECK-SAME: (<vscale x 16 x i8> [[A:%.*]], <vscale x 16 x i8> [[B:%.*]], <vscale x 16 x i8> [[C:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
-; CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.mls.u.nxv16i8(<vscale x 16 x i1> [[TMP1]], <vscale x 16 x i8> [[A]], <vscale x 16 x i8> [[B]], <vscale x 16 x i8> [[C]])
-; CHECK-NEXT: ret <vscale x 16 x i8> [[TMP2]]
+; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.mls.u.nxv16i8(<vscale x 16 x i1> splat (i1 true), <vscale x 16 x i8> [[A]], <vscale x 16 x i8> [[B]], <vscale x 16 x i8> [[C]])
+; CHECK-NEXT: ret <vscale x 16 x i8> [[TMP1]]
;
%1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
%2 = tail call <vscale x 16 x i8> @llvm.aarch64.sve.mls.nxv16i8(<vscale x 16 x i1> %1, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b, <vscale x 16 x i8> %c)
@@ -870,9 +828,8 @@ declare <vscale x 8 x i16> @llvm.aarch64.sve.mls.nxv8i16(<vscale x 8 x i1>, <vsc
define <vscale x 8 x i16> @replace_mls_intrinsic_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, <vscale x 8 x i16> %c) #0 {
; CHECK-LABEL: define <vscale x 8 x i16> @replace_mls_intrinsic_i16
; CHECK-SAME: (<vscale x 8 x i16> [[A:%.*]], <vscale x 8 x i16> [[B:%.*]], <vscale x 8 x i16> [[C:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
-; CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.mls.u.nxv8i16(<vscale x 8 x i1> [[TMP1]], <vscale x 8 x i16> [[A]], <vscale x 8 x i16> [[B]], <vscale x 8 x i16> [[C]])
-; CHECK-NEXT: ret <vscale x 8 x i16> [[TMP2]]
+; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.mls.u.nxv8i16(<vscale x 8 x i1> splat (i1 true), <vscale x 8 x i16> [[A]], <vscale x 8 x i16> [[B]], <vscale x 8 x i16> [[C]])
+; CHECK-NEXT: ret <vscale x 8 x i16> [[TMP1]]
;
%1 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
%2 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.mls.nxv8i16(<vscale x 8 x i1> %1, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b, <vscale x 8 x i16> %c)
@@ -883,9 +840,8 @@ declare <vscale x 4 x i32> @llvm.aarch64.sve.mls.nxv4i32(<vscale x 4 x i1>, <vsc
define <vscale x 4 x i32> @replace_mls_intrinsic_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, <vscale x 4 x i32> %c) #0 {
; CHECK-LABEL: define <vscale x 4 x i32> @replace_mls_intrinsic_i32
; CHECK-SAME: (<vscale x 4 x i32> [[A:%.*]], <vscale x 4 x i32> [[B:%.*]], <vscale x 4 x i32> [[C:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
-; CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.mls.u.nxv4i32(<vscale x 4 x i1> [[TMP1]], <vscale x 4 x i32> [[A]], <vscale x 4 x i32> [[B]], <vscale x 4 x i32> [[C]])
-; CHECK-NEXT: ret <vscale x 4 x i32> [[TMP2]]
+; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.mls.u.nxv4i32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> [[A]], <vscale x 4 x i32> [[B]], <vscale x 4 x i32> [[C]])
+; CHECK-NEXT: ret <vscale x 4 x i32> [[TMP1]]
;
%1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
%2 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.mls.nxv4i32(<vscale x 4 x i1> %1, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b, <vscale x 4 x i32> %c)
@@ -896,9 +852,8 @@ declare <vscale x 2 x i64> @llvm.aarch64.sve.mls.nxv2i64(<vscale x 2 x i1>, <vsc
define <vscale x 2 x i64> @replace_mls_intrinsic_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, <vscale x 2 x i64> %c) #0 {
; CHECK-LABEL: define <vscale x 2 x i64> @replace_mls_intrinsic_i64
; CHECK-SAME: (<vscale x 2 x i64> [[A:%.*]], <vscale x 2 x i64> [[B:%.*]], <vscale x 2 x i64> [[C:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
-; CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.mls.u.nxv2i64(<vscale x 2 x i1> [[TMP1]], <vscale x 2 x i64> [[A]], <vscale x 2 x i64> [[B]], <vscale x 2 x i64> [[C]])
-; CHECK-NEXT: ret <vscale x 2 x i64> [[TMP2]]
+; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.mls.u.nxv2i64(<vscale x 2 x i1> splat (i1 true), <vscale x 2 x i64> [[A]], <vscale x 2 x i64> [[B]], <vscale x 2 x i64> [[C]])
+; CHECK-NEXT: ret <vscale x 2 x i64> [[TMP1]]
;
%1 = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
%2 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.mls.nxv2i64(<vscale x 2 x i1> %1, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b, <vscale x 2 x i64> %c)
@@ -921,9 +876,8 @@ declare <vscale x 16 x i8> @llvm.aarch64.sve.mul.nxv16i8(<vscale x 16 x i1>, <vs
define <vscale x 16 x i8> @replace_mul_intrinsic_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) #0 {
; CHECK-LABEL: define <vscale x 16 x i8> @replace_mul_intrinsic_i8
; CHECK-SAME: (<vscale x 16 x i8> [[A:%.*]], <vscale x 16 x i8> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
-; CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.mul.u.nxv16i8(<vscale x 16 x i1> [[TMP1]], <vscale x 16 x i8> [[A]], <vscale x 16 x i8> [[B]])
-; CHECK-NEXT: ret <vscale x 16 x i8> [[TMP2]]
+; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.mul.u.nxv16i8(<vscale x 16 x i1> splat (i1 true), <vscale x 16 x i8> [[A]], <vscale x 16 x i8> [[B]])
+; CHECK-NEXT: ret <vscale x 16 x i8> [[TMP1]]
;
%1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
%2 = tail call <vscale x 16 x i8> @llvm.aarch64.sve.mul.nxv16i8(<vscale x 16 x i1> %1, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b)
@@ -934,9 +888,8 @@ declare <vscale x 8 x i16> @llvm.aarch64.sve.mul.nxv8i16(<vscale x 8 x i1>, <vsc
define <vscale x 8 x i16> @replace_mul_intrinsic_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) #0 {
; CHECK-LABEL: define <vscale x 8 x i16> @replace_mul_intrinsic_i16
; CHECK-SAME: (<vscale x 8 x i16> [[A:%.*]], <vscale x 8 x i16> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
-; CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.mul.u.nxv8i16(<vscale x 8 x i1> [[TMP1]], <vscale x 8 x i16> [[A]], <vscale x 8 x i16> [[B]])
-; CHECK-NEXT: ret <vscale x 8 x i16> [[TMP2]]
+; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.mul.u.nxv8i16(<vscale x 8 x i1> splat (i1 true), <vscale x 8 x i16> [[A]], <vscale x 8 x i16> [[B]])
+; CHECK-NEXT: ret <vscale x 8 x i16> [[TMP1]]
;
%1 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
%2 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.mul.nxv8i16(<vscale x 8 x i1> %1, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b)
@@ -947,9 +900,8 @@ declare <vscale x 4 x i32> @llvm.aarch64.sve.mul.nxv4i32(<vscale x 4 x i1>, <vsc
define <vscale x 4 x i32> @replace_mul_intrinsic_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
; CHECK-LABEL: define <vscale x 4 x i32> @replace_mul_intrinsic_i32
; CHECK-SAME: (<vscale x 4 x i32> [[A:%.*]], <vscale x 4 x i32> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
-; CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.mul.u.nxv4i32(<vscale x 4 x i1> [[TMP1]], <vscale x 4 x i32> [[A]], <vscale x 4 x i32> [[B]])
-; CHECK-NEXT: ret <vscale x 4 x i32> [[TMP2]]
+; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.mul.u.nxv4i32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> [[A]], <vscale x 4 x i32> [[B]])
+; CHECK-NEXT: ret <vscale x 4 x i32> [[TMP1]]
;
%1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
%2 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.mul.nxv4i32(<vscale x 4 x i1> %1, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
@@ -960,9 +912,8 @@ declare <vscale x 2 x i64> @llvm.aarch64.sve.mul.nxv2i64(<vscale x 2 x i1>, <vsc
define <vscale x 2 x i64> @replace_mul_intrinsic_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) #0 {
; CHECK-LABEL: define <vscale x 2 x i64> @replace_mul_intrinsic_i64
; CHECK-SAME: (<vscale x 2 x i64> [[A:%.*]], <vscale x 2 x i64> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
-; CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.mul.u.nxv2i64(<vscale x 2 x i1> [[TMP1]], <vscale x 2 x i64> [[A]], <vscale x 2 x i64> [[B]])
-; CHECK-NEXT: ret <vscale x 2 x i64> [[TMP2]]
+; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.mul.u.nxv2i64(<vscale x 2 x i1> splat (i1 true), <vscale x 2 x i64> [[A]], <vscale x 2 x i64> [[B]])
+; CHECK-NEXT: ret <vscale x 2 x i64> [[TMP1]]
;
%1 = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
%2 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.mul.nxv2i64(<vscale x 2 x i1> %1, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b)
@@ -985,9 +936,8 @@ declare <vscale x 16 x i8> @llvm.aarch64.sve.sabd.nxv16i8(<vscale x 16 x i1>, <v
define <vscale x 16 x i8> @replace_sabd_intrinsic_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) #0 {
; CHECK-LABEL: define <vscale x 16 x i8> @replace_sabd_intrinsic_i8
; CHECK-SAME: (<vscale x 16 x i8> [[A:%.*]], <vscale x 16 x i8> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
-; CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.sabd.u.nxv16i8(<vscale x 16 x i1> [[TMP1]], <vscale x 16 x i8> [[A]], <vscale x 16 x i8> [[B]])
-; CHECK-NEXT: ret <vscale x 16 x i8> [[TMP2]]
+; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.sabd.u.nxv16i8(<vscale x 16 x i1> splat (i1 true), <vscale x 16 x i8> [[A]], <vscale x 16 x i8> [[B]])
+; CHECK-NEXT: ret <vscale x 16 x i8> [[TMP1]]
;
%1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
%2 = tail call <vscale x 16 x i8> @llvm.aarch64.sve.sabd.nxv16i8(<vscale x 16 x i1> %1, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b)
@@ -998,9 +948,8 @@ declare <vscale x 8 x i16> @llvm.aarch64.sve.sabd.nxv8i16(<vscale x 8 x i1>, <vs
define <vscale x 8 x i16> @replace_sabd_intrinsic_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) #0 {
; CHECK-LABEL: define <vscale x 8 x i16> @replace_sabd_intrinsic_i16
; CHECK-SAME: (<vscale x 8 x i16> [[A:%.*]], <vscale x 8 x i16> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
-; CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.sabd.u.nxv8i16(<vscale x 8 x i1> [[TMP1]], <vscale x 8 x i16> [[A]], <vscale x 8 x i16> [[B]])
-; CHECK-NEXT: ret <vscale x 8 x i16> [[TMP2]]
+; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.sabd.u.nxv8i16(<vscale x 8 x i1> splat (i1 true), <vscale x 8 x i16> [[A]], <vscale x 8 x i16> [[B]])
+; CHECK-NEXT: ret <vscale x 8 x i16> [[TMP1]]
;
%1 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
%2 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.sabd.nxv8i16(<vscale x 8 x i1> %1, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b)
@@ -1011,9 +960,8 @@ declare <vscale x 4 x i32> @llvm.aarch64.sve.sabd.nxv4i32(<vscale x 4 x i1>, <vs
define <vscale x 4 x i32> @replace_sabd_intrinsic_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
; CHECK-LABEL: define <vscale x 4 x i32> @replace_sabd_intrinsic_i32
; CHECK-SAME: (<vscale x 4 x i32> [[A:%.*]], <vscale x 4 x i32> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
-; CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.sabd.u.nxv4i32(<vscale x 4 x i1> [[TMP1]], <vscale x 4 x i32> [[A]], <vscale x 4 x i32> [[B]])
-; CHECK-NEXT: ret <vscale x 4 x i32> [[TMP2]]
+; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.sabd.u.nxv4i32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> [[A]], <vscale x 4 x i32> [[B]])
+; CHECK-NEXT: ret <vscale x 4 x i32> [[TMP1]]
;
%1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
%2 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.sabd.nxv4i32(<vscale x 4 x i1> %1, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
@@ -1024,9 +972,8 @@ declare <vscale x 2 x i64> @llvm.aarch64.sve.sabd.nxv2i64(<vscale x 2 x i1>, <vs
define <vscale x 2 x i64> @replace_sabd_intrinsic_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) #0 {
; CHECK-LABEL: define <vscale x 2 x i64> @replace_sabd_intrinsic_i64
; CHECK-SAME: (<vscale x 2 x i64> [[A:%.*]], <vscale x 2 x i64> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
-; CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.sabd.u.nxv2i64(<vscale x 2 x i1> [[TMP1]], <vscale x 2 x i64> [[A]], <vscale x 2 x i64> [[B]])
-; CHECK-NEXT: ret <vscale x 2 x i64> [[TMP2]]
+; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.sabd.u.nxv2i64(<vscale x 2 x i1> splat (i1 true), <vscale x 2 x i64> [[A]], <vscale x 2 x i64> [[B]])
+; CHECK-NEXT: ret <vscale x 2 x i64> [[TMP1]]
;
%1 = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
%2 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.sabd.nxv2i64(<vscale x 2 x i1> %1, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b)
@@ -1049,9 +996,8 @@ declare <vscale x 16 x i8> @llvm.aarch64.sve.smax.nxv16i8(<vscale x 16 x i1>, <v
define <vscale x 16 x i8> @replace_smax_intrinsic_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) #0 {
; CHECK-LABEL: define <vscale x 16 x i8> @replace_smax_intrinsic_i8
; CHECK-SAME: (<vscale x 16 x i8> [[A:%.*]], <vscale x 16 x i8> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
-; CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.smax.u.nxv16i8(<vscale x 16 x i1> [[TMP1]], <vscale x 16 x i8> [[A]], <vscale x 16 x i8> [[B]])
-; CHECK-NEXT: ret <vscale x 16 x i8> [[TMP2]]
+; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.smax.u.nxv16i8(<vscale x 16 x i1> splat (i1 true), <vscale x 16 x i8> [[A]], <vscale x 16 x i8> [[B]])
+; CHECK-NEXT: ret <vscale x 16 x i8> [[TMP1]]
;
%1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
%2 = tail call <vscale x 16 x i8> @llvm.aarch64.sve.smax.nxv16i8(<vscale x 16 x i1> %1, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b)
@@ -1062,9 +1008,8 @@ declare <vscale x 8 x i16> @llvm.aarch64.sve.smax.nxv8i16(<vscale x 8 x i1>, <vs
define <vscale x 8 x i16> @replace_smax_intrinsic_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) #0 {
; CHECK-LABEL: define <vscale x 8 x i16> @replace_smax_intrinsic_i16
; CHECK-SAME: (<vscale x 8 x i16> [[A:%.*]], <vscale x 8 x i16> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
-; CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.smax.u.nxv8i16(<vscale x 8 x i1> [[TMP1]], <vscale x 8 x i16> [[A]], <vscale x 8 x i16> [[B]])
-; CHECK-NEXT: ret <vscale x 8 x i16> [[TMP2]]
+; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.smax.u.nxv8i16(<vscale x 8 x i1> splat (i1 true), <vscale x 8 x i16> [[A]], <vscale x 8 x i16> [[B]])
+; CHECK-NEXT: ret <vscale x 8 x i16> [[TMP1]]
;
%1 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
%2 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.smax.nxv8i16(<vscale x 8 x i1> %1, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b)
@@ -1075,9 +1020,8 @@ declare <vscale x 4 x i32> @llvm.aarch64.sve.smax.nxv4i32(<vscale x 4 x i1>, <vs
define <vscale x 4 x i32> @replace_smax_intrinsic_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
; CHECK-LABEL: define <vscale x 4 x i32> @replace_smax_intrinsic_i32
; CHECK-SAME: (<vscale x 4 x i32> [[A:%.*]], <vscale x 4 x i32> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
-; CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.smax.u.nxv4i32(<vscale x 4 x i1> [[TMP1]], <vscale x 4 x i32> [[A]], <vscale x 4 x i32> [[B]])
-; CHECK-NEXT: ret <vscale x 4 x i32> [[TMP2]]
+; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.smax.u.nxv4i32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> [[A]], <vscale x 4 x i32> [[B]])
+; CHECK-NEXT: ret <vscale x 4 x i32> [[TMP1]]
;
%1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
%2 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.smax.nxv4i32(<vscale x 4 x i1> %1, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
@@ -1088,9 +1032,8 @@ declare <vscale x 2 x i64> @llvm.aarch64.sve.smax.nxv2i64(<vscale x 2 x i1>, <vs
define <vscale x 2 x i64> @replace_smax_intrinsic_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) #0 {
; CHECK-LABEL: define <vscale x 2 x i64> @replace_smax_intrinsic_i64
; CHECK-SAME: (<vscale x 2 x i64> [[A:%.*]], <vscale x 2 x i64> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
-; CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.smax.u.nxv2i64(<vscale x 2 x i1> [[TMP1]], <vscale x 2 x i64> [[A]], <vscale x 2 x i64> [[B]])
-; CHECK-NEXT: ret <vscale x 2 x i64> [[TMP2]]
+; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.smax.u.nxv2i64(<vscale x 2 x i1> splat (i1 true), <vscale x 2 x i64> [[A]], <vscale x 2 x i64> [[B]])
+; CHECK-NEXT: ret <vscale x 2 x i64> [[TMP1]]
;
%1 = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
%2 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.smax.nxv2i64(<vscale x 2 x i1> %1, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b)
@@ -1113,9 +1056,8 @@ declare <vscale x 16 x i8> @llvm.aarch64.sve.smin.nxv16i8(<vscale x 16 x i1>, <v
define <vscale x 16 x i8> @replace_smin_intrinsic_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) #0 {
; CHECK-LABEL: define <vscale x 16 x i8> @replace_smin_intrinsic_i8
; CHECK-SAME: (<vscale x 16 x i8> [[A:%.*]], <vscale x 16 x i8> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
-; CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.smin.u.nxv16i8(<vscale x 16 x i1> [[TMP1]], <vscale x 16 x i8> [[A]], <vscale x 16 x i8> [[B]])
-; CHECK-NEXT: ret <vscale x 16 x i8> [[TMP2]]
+; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.smin.u.nxv16i8(<vscale x 16 x i1> splat (i1 true), <vscale x 16 x i8> [[A]], <vscale x 16 x i8> [[B]])
+; CHECK-NEXT: ret <vscale x 16 x i8> [[TMP1]]
;
%1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
%2 = tail call <vscale x 16 x i8> @llvm.aarch64.sve.smin.nxv16i8(<vscale x 16 x i1> %1, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b)
@@ -1126,9 +1068,8 @@ declare <vscale x 8 x i16> @llvm.aarch64.sve.smin.nxv8i16(<vscale x 8 x i1>, <vs
define <vscale x 8 x i16> @replace_smin_intrinsic_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) #0 {
; CHECK-LABEL: define <vscale x 8 x i16> @replace_smin_intrinsic_i16
; CHECK-SAME: (<vscale x 8 x i16> [[A:%.*]], <vscale x 8 x i16> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
-; CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.smin.u.nxv8i16(<vscale x 8 x i1> [[TMP1]], <vscale x 8 x i16> [[A]], <vscale x 8 x i16> [[B]])
-; CHECK-NEXT: ret <vscale x 8 x i16> [[TMP2]]
+; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.smin.u.nxv8i16(<vscale x 8 x i1> splat (i1 true), <vscale x 8 x i16> [[A]], <vscale x 8 x i16> [[B]])
+; CHECK-NEXT: ret <vscale x 8 x i16> [[TMP1]]
;
%1 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
%2 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.smin.nxv8i16(<vscale x 8 x i1> %1, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b)
@@ -1139,9 +1080,8 @@ declare <vscale x 4 x i32> @llvm.aarch64.sve.smin.nxv4i32(<vscale x 4 x i1>, <vs
define <vscale x 4 x i32> @replace_smin_intrinsic_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
; CHECK-LABEL: define <vscale x 4 x i32> @replace_smin_intrinsic_i32
; CHECK-SAME: (<vscale x 4 x i32> [[A:%.*]], <vscale x 4 x i32> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
-; CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.smin.u.nxv4i32(<vscale x 4 x i1> [[TMP1]], <vscale x 4 x i32> [[A]], <vscale x 4 x i32> [[B]])
-; CHECK-NEXT: ret <vscale x 4 x i32> [[TMP2]]
+; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.smin.u.nxv4i32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> [[A]], <vscale x 4 x i32> [[B]])
+; CHECK-NEXT: ret <vscale x 4 x i32> [[TMP1]]
;
%1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
%2 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.smin.nxv4i32(<vscale x 4 x i1> %1, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
@@ -1152,9 +1092,8 @@ declare <vscale x 2 x i64> @llvm.aarch64.sve.smin.nxv2i64(<vscale x 2 x i1>, <vs
define <vscale x 2 x i64> @replace_smin_intrinsic_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) #0 {
; CHECK-LABEL: define <vscale x 2 x i64> @replace_smin_intrinsic_i64
; CHECK-SAME: (<vscale x 2 x i64> [[A:%.*]], <vscale x 2 x i64> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
-; CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.smin.u.nxv2i64(<vscale x 2 x i1> [[TMP1]], <vscale x 2 x i64> [[A]], <vscale x 2 x i64> [[B]])
-; CHECK-NEXT: ret <vscale x 2 x i64> [[TMP2]]
+; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.smin.u.nxv2i64(<vscale x 2 x i1> splat (i1 true), <vscale x 2 x i64> [[A]], <vscale x 2 x i64> [[B]])
+; CHECK-NEXT: ret <vscale x 2 x i64> [[TMP1]]
;
%1 = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
%2 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.smin.nxv2i64(<vscale x 2 x i1> %1, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b)
@@ -1177,9 +1116,8 @@ declare <vscale x 16 x i8> @llvm.aarch64.sve.smulh.nxv16i8(<vscale x 16 x i1>, <
define <vscale x 16 x i8> @replace_smulh_intrinsic_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) #0 {
; CHECK-LABEL: define <vscale x 16 x i8> @replace_smulh_intrinsic_i8
; CHECK-SAME: (<vscale x 16 x i8> [[A:%.*]], <vscale x 16 x i8> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
-; CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.smulh.u.nxv16i8(<vscale x 16 x i1> [[TMP1]], <vscale x 16 x i8> [[A]], <vscale x 16 x i8> [[B]])
-; CHECK-NEXT: ret <vscale x 16 x i8> [[TMP2]]
+; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.smulh.u.nxv16i8(<vscale x 16 x i1> splat (i1 true), <vscale x 16 x i8> [[A]], <vscale x 16 x i8> [[B]])
+; CHECK-NEXT: ret <vscale x 16 x i8> [[TMP1]]
;
%1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
%2 = tail call <vscale x 16 x i8> @llvm.aarch64.sve.smulh.nxv16i8(<vscale x 16 x i1> %1, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b)
@@ -1190,9 +1128,8 @@ declare <vscale x 8 x i16> @llvm.aarch64.sve.smulh.nxv8i16(<vscale x 8 x i1>, <v
define <vscale x 8 x i16> @replace_smulh_intrinsic_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) #0 {
; CHECK-LABEL: define <vscale x 8 x i16> @replace_smulh_intrinsic_i16
; CHECK-SAME: (<vscale x 8 x i16> [[A:%.*]], <vscale x 8 x i16> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
-; CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.smulh.u.nxv8i16(<vscale x 8 x i1> [[TMP1]], <vscale x 8 x i16> [[A]], <vscale x 8 x i16> [[B]])
-; CHECK-NEXT: ret <vscale x 8 x i16> [[TMP2]]
+; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.smulh.u.nxv8i16(<vscale x 8 x i1> splat (i1 true), <vscale x 8 x i16> [[A]], <vscale x 8 x i16> [[B]])
+; CHECK-NEXT: ret <vscale x 8 x i16> [[TMP1]]
;
%1 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
%2 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.smulh.nxv8i16(<vscale x 8 x i1> %1, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b)
@@ -1203,9 +1140,8 @@ declare <vscale x 4 x i32> @llvm.aarch64.sve.smulh.nxv4i32(<vscale x 4 x i1>, <v
define <vscale x 4 x i32> @replace_smulh_intrinsic_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
; CHECK-LABEL: define <vscale x 4 x i32> @replace_smulh_intrinsic_i32
; CHECK-SAME: (<vscale x 4 x i32> [[A:%.*]], <vscale x 4 x i32> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
-; CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.smulh.u.nxv4i32(<vscale x 4 x i1> [[TMP1]], <vscale x 4 x i32> [[A]], <vscale x 4 x i32> [[B]])
-; CHECK-NEXT: ret <vscale x 4 x i32> [[TMP2]]
+; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.smulh.u.nxv4i32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> [[A]], <vscale x 4 x i32> [[B]])
+; CHECK-NEXT: ret <vscale x 4 x i32> [[TMP1]]
;
%1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
%2 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.smulh.nxv4i32(<vscale x 4 x i1> %1, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
@@ -1216,9 +1152,8 @@ declare <vscale x 2 x i64> @llvm.aarch64.sve.smulh.nxv2i64(<vscale x 2 x i1>, <v
define <vscale x 2 x i64> @replace_smulh_intrinsic_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) #0 {
; CHECK-LABEL: define <vscale x 2 x i64> @replace_smulh_intrinsic_i64
; CHECK-SAME: (<vscale x 2 x i64> [[A:%.*]], <vscale x 2 x i64> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
-; CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.smulh.u.nxv2i64(<vscale x 2 x i1> [[TMP1]], <vscale x 2 x i64> [[A]], <vscale x 2 x i64> [[B]])
-; CHECK-NEXT: ret <vscale x 2 x i64> [[TMP2]]
+; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.smulh.u.nxv2i64(<vscale x 2 x i1> splat (i1 true), <vscale x 2 x i64> [[A]], <vscale x 2 x i64> [[B]])
+; CHECK-NEXT: ret <vscale x 2 x i64> [[TMP1]]
;
%1 = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
%2 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.smulh.nxv2i64(<vscale x 2 x i1> %1, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b)
@@ -1241,9 +1176,8 @@ declare <vscale x 16 x i8> @llvm.aarch64.sve.sub.nxv16i8(<vscale x 16 x i1>, <vs
define <vscale x 16 x i8> @replace_sub_intrinsic_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) #0 {
; CHECK-LABEL: define <vscale x 16 x i8> @replace_sub_intrinsic_i8
; CHECK-SAME: (<vscale x 16 x i8> [[A:%.*]], <vscale x 16 x i8> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
-; CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.sub.u.nxv16i8(<vscale x 16 x i1> [[TMP1]], <vscale x 16 x i8> [[A]], <vscale x 16 x i8> [[B]])
-; CHECK-NEXT: ret <vscale x 16 x i8> [[TMP2]]
+; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.sub.u.nxv16i8(<vscale x 16 x i1> splat (i1 true), <vscale x 16 x i8> [[A]], <vscale x 16 x i8> [[B]])
+; CHECK-NEXT: ret <vscale x 16 x i8> [[TMP1]]
;
%1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
%2 = tail call <vscale x 16 x i8> @llvm.aarch64.sve.sub.nxv16i8(<vscale x 16 x i1> %1, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b)
@@ -1254,9 +1188,8 @@ declare <vscale x 8 x i16> @llvm.aarch64.sve.sub.nxv8i16(<vscale x 8 x i1>, <vsc
define <vscale x 8 x i16> @replace_sub_intrinsic_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) #0 {
; CHECK-LABEL: define <vscale x 8 x i16> @replace_sub_intrinsic_i16
; CHECK-SAME: (<vscale x 8 x i16> [[A:%.*]], <vscale x 8 x i16> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
-; CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.sub.u.nxv8i16(<vscale x 8 x i1> [[TMP1]], <vscale x 8 x i16> [[A]], <vscale x 8 x i16> [[B]])
-; CHECK-NEXT: ret <vscale x 8 x i16> [[TMP2]]
+; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.sub.u.nxv8i16(<vscale x 8 x i1> splat (i1 true), <vscale x 8 x i16> [[A]], <vscale x 8 x i16> [[B]])
+; CHECK-NEXT: ret <vscale x 8 x i16> [[TMP1]]
;
%1 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
%2 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.sub.nxv8i16(<vscale x 8 x i1> %1, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b)
@@ -1267,9 +1200,8 @@ declare <vscale x 4 x i32> @llvm.aarch64.sve.sub.nxv4i32(<vscale x 4 x i1>, <vsc
define <vscale x 4 x i32> @replace_sub_intrinsic_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
; CHECK-LABEL: define <vscale x 4 x i32> @replace_sub_intrinsic_i32
; CHECK-SAME: (<vscale x 4 x i32> [[A:%.*]], <vscale x 4 x i32> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
-; CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.sub.u.nxv4i32(<vscale x 4 x i1> [[TMP1]], <vscale x 4 x i32> [[A]], <vscale x 4 x i32> [[B]])
-; CHECK-NEXT: ret <vscale x 4 x i32> [[TMP2]]
+; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.sub.u.nxv4i32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> [[A]], <vscale x 4 x i32> [[B]])
+; CHECK-NEXT: ret <vscale x 4 x i32> [[TMP1]]
;
%1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
%2 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.sub.nxv4i32(<vscale x 4 x i1> %1, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
@@ -1280,9 +1212,8 @@ declare <vscale x 2 x i64> @llvm.aarch64.sve.sub.nxv2i64(<vscale x 2 x i1>, <vsc
define <vscale x 2 x i64> @replace_sub_intrinsic_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) #0 {
; CHECK-LABEL: define <vscale x 2 x i64> @replace_sub_intrinsic_i64
; CHECK-SAME: (<vscale x 2 x i64> [[A:%.*]], <vscale x 2 x i64> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
-; CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.sub.u.nxv2i64(<vscale x 2 x i1> [[TMP1]], <vscale x 2 x i64> [[A]], <vscale x 2 x i64> [[B]])
-; CHECK-NEXT: ret <vscale x 2 x i64> [[TMP2]]
+; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.sub.u.nxv2i64(<vscale x 2 x i1> splat (i1 true), <vscale x 2 x i64> [[A]], <vscale x 2 x i64> [[B]])
+; CHECK-NEXT: ret <vscale x 2 x i64> [[TMP1]]
;
%1 = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
%2 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.sub.nxv2i64(<vscale x 2 x i1> %1, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b)
@@ -1305,9 +1236,8 @@ declare <vscale x 16 x i8> @llvm.aarch64.sve.uabd.nxv16i8(<vscale x 16 x i1>, <v
define <vscale x 16 x i8> @replace_uabd_intrinsic_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) #0 {
; CHECK-LABEL: define <vscale x 16 x i8> @replace_uabd_intrinsic_i8
; CHECK-SAME: (<vscale x 16 x i8> [[A:%.*]], <vscale x 16 x i8> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
-; CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.uabd.u.nxv16i8(<vscale x 16 x i1> [[TMP1]], <vscale x 16 x i8> [[A]], <vscale x 16 x i8> [[B]])
-; CHECK-NEXT: ret <vscale x 16 x i8> [[TMP2]]
+; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.uabd.u.nxv16i8(<vscale x 16 x i1> splat (i1 true), <vscale x 16 x i8> [[A]], <vscale x 16 x i8> [[B]])
+; CHECK-NEXT: ret <vscale x 16 x i8> [[TMP1]]
;
%1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
%2 = tail call <vscale x 16 x i8> @llvm.aarch64.sve.uabd.nxv16i8(<vscale x 16 x i1> %1, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b)
@@ -1318,9 +1248,8 @@ declare <vscale x 8 x i16> @llvm.aarch64.sve.uabd.nxv8i16(<vscale x 8 x i1>, <vs
define <vscale x 8 x i16> @replace_uabd_intrinsic_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) #0 {
; CHECK-LABEL: define <vscale x 8 x i16> @replace_uabd_intrinsic_i16
; CHECK-SAME: (<vscale x 8 x i16> [[A:%.*]], <vscale x 8 x i16> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
-; CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.uabd.u.nxv8i16(<vscale x 8 x i1> [[TMP1]], <vscale x 8 x i16> [[A]], <vscale x 8 x i16> [[B]])
-; CHECK-NEXT: ret <vscale x 8 x i16> [[TMP2]]
+; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.uabd.u.nxv8i16(<vscale x 8 x i1> splat (i1 true), <vscale x 8 x i16> [[A]], <vscale x 8 x i16> [[B]])
+; CHECK-NEXT: ret <vscale x 8 x i16> [[TMP1]]
;
%1 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
%2 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.uabd.nxv8i16(<vscale x 8 x i1> %1, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b)
@@ -1331,9 +1260,8 @@ declare <vscale x 4 x i32> @llvm.aarch64.sve.uabd.nxv4i32(<vscale x 4 x i1>, <vs
define <vscale x 4 x i32> @replace_uabd_intrinsic_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
; CHECK-LABEL: define <vscale x 4 x i32> @replace_uabd_intrinsic_i32
; CHECK-SAME: (<vscale x 4 x i32> [[A:%.*]], <vscale x 4 x i32> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
-; CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.uabd.u.nxv4i32(<vscale x 4 x i1> [[TMP1]], <vscale x 4 x i32> [[A]], <vscale x 4 x i32> [[B]])
-; CHECK-NEXT: ret <vscale x 4 x i32> [[TMP2]]
+; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.uabd.u.nxv4i32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> [[A]], <vscale x 4 x i32> [[B]])
+; CHECK-NEXT: ret <vscale x 4 x i32> [[TMP1]]
;
%1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
%2 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.uabd.nxv4i32(<vscale x 4 x i1> %1, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
@@ -1344,9 +1272,8 @@ declare <vscale x 2 x i64> @llvm.aarch64.sve.uabd.nxv2i64(<vscale x 2 x i1>, <vs
define <vscale x 2 x i64> @replace_uabd_intrinsic_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) #0 {
; CHECK-LABEL: define <vscale x 2 x i64> @replace_uabd_intrinsic_i64
; CHECK-SAME: (<vscale x 2 x i64> [[A:%.*]], <vscale x 2 x i64> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
-; CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.uabd.u.nxv2i64(<vscale x 2 x i1> [[TMP1]], <vscale x 2 x i64> [[A]], <vscale x 2 x i64> [[B]])
-; CHECK-NEXT: ret <vscale x 2 x i64> [[TMP2]]
+; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.uabd.u.nxv2i64(<vscale x 2 x i1> splat (i1 true), <vscale x 2 x i64> [[A]], <vscale x 2 x i64> [[B]])
+; CHECK-NEXT: ret <vscale x 2 x i64> [[TMP1]]
;
%1 = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
%2 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.uabd.nxv2i64(<vscale x 2 x i1> %1, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b)
@@ -1369,9 +1296,8 @@ declare <vscale x 16 x i8> @llvm.aarch64.sve.umax.nxv16i8(<vscale x 16 x i1>, <v
define <vscale x 16 x i8> @replace_umax_intrinsic_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) #0 {
; CHECK-LABEL: define <vscale x 16 x i8> @replace_umax_intrinsic_i8
; CHECK-SAME: (<vscale x 16 x i8> [[A:%.*]], <vscale x 16 x i8> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
-; CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.umax.u.nxv16i8(<vscale x 16 x i1> [[TMP1]], <vscale x 16 x i8> [[A]], <vscale x 16 x i8> [[B]])
-; CHECK-NEXT: ret <vscale x 16 x i8> [[TMP2]]
+; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.umax.u.nxv16i8(<vscale x 16 x i1> splat (i1 true), <vscale x 16 x i8> [[A]], <vscale x 16 x i8> [[B]])
+; CHECK-NEXT: ret <vscale x 16 x i8> [[TMP1]]
;
%1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
%2 = tail call <vscale x 16 x i8> @llvm.aarch64.sve.umax.nxv16i8(<vscale x 16 x i1> %1, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b)
@@ -1382,9 +1308,8 @@ declare <vscale x 8 x i16> @llvm.aarch64.sve.umax.nxv8i16(<vscale x 8 x i1>, <vs
define <vscale x 8 x i16> @replace_umax_intrinsic_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) #0 {
; CHECK-LABEL: define <vscale x 8 x i16> @replace_umax_intrinsic_i16
; CHECK-SAME: (<vscale x 8 x i16> [[A:%.*]], <vscale x 8 x i16> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
-; CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.umax.u.nxv8i16(<vscale x 8 x i1> [[TMP1]], <vscale x 8 x i16> [[A]], <vscale x 8 x i16> [[B]])
-; CHECK-NEXT: ret <vscale x 8 x i16> [[TMP2]]
+; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.umax.u.nxv8i16(<vscale x 8 x i1> splat (i1 true), <vscale x 8 x i16> [[A]], <vscale x 8 x i16> [[B]])
+; CHECK-NEXT: ret <vscale x 8 x i16> [[TMP1]]
;
%1 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
%2 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.umax.nxv8i16(<vscale x 8 x i1> %1, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b)
@@ -1395,9 +1320,8 @@ declare <vscale x 4 x i32> @llvm.aarch64.sve.umax.nxv4i32(<vscale x 4 x i1>, <vs
define <vscale x 4 x i32> @replace_umax_intrinsic_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
; CHECK-LABEL: define <vscale x 4 x i32> @replace_umax_intrinsic_i32
; CHECK-SAME: (<vscale x 4 x i32> [[A:%.*]], <vscale x 4 x i32> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
-; CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.umax.u.nxv4i32(<vscale x 4 x i1> [[TMP1]], <vscale x 4 x i32> [[A]], <vscale x 4 x i32> [[B]])
-; CHECK-NEXT: ret <vscale x 4 x i32> [[TMP2]]
+; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.umax.u.nxv4i32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> [[A]], <vscale x 4 x i32> [[B]])
+; CHECK-NEXT: ret <vscale x 4 x i32> [[TMP1]]
;
%1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
%2 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.umax.nxv4i32(<vscale x 4 x i1> %1, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
@@ -1408,9 +1332,8 @@ declare <vscale x 2 x i64> @llvm.aarch64.sve.umax.nxv2i64(<vscale x 2 x i1>, <vs
define <vscale x 2 x i64> @replace_umax_intrinsic_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) #0 {
; CHECK-LABEL: define <vscale x 2 x i64> @replace_umax_intrinsic_i64
; CHECK-SAME: (<vscale x 2 x i64> [[A:%.*]], <vscale x 2 x i64> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
-; CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.umax.u.nxv2i64(<vscale x 2 x i1> [[TMP1]], <vscale x 2 x i64> [[A]], <vscale x 2 x i64> [[B]])
-; CHECK-NEXT: ret <vscale x 2 x i64> [[TMP2]]
+; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.umax.u.nxv2i64(<vscale x 2 x i1> splat (i1 true), <vscale x 2 x i64> [[A]], <vscale x 2 x i64> [[B]])
+; CHECK-NEXT: ret <vscale x 2 x i64> [[TMP1]]
;
%1 = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
%2 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.umax.nxv2i64(<vscale x 2 x i1> %1, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b)
@@ -1433,9 +1356,8 @@ declare <vscale x 16 x i8> @llvm.aarch64.sve.umin.nxv16i8(<vscale x 16 x i1>, <v
define <vscale x 16 x i8> @replace_umin_intrinsic_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) #0 {
; CHECK-LABEL: define <vscale x 16 x i8> @replace_umin_intrinsic_i8
; CHECK-SAME: (<vscale x 16 x i8> [[A:%.*]], <vscale x 16 x i8> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
-; CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.umin.u.nxv16i8(<vscale x 16 x i1> [[TMP1]], <vscale x 16 x i8> [[A]], <vscale x 16 x i8> [[B]])
-; CHECK-NEXT: ret <vscale x 16 x i8> [[TMP2]]
+; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.umin.u.nxv16i8(<vscale x 16 x i1> splat (i1 true), <vscale x 16 x i8> [[A]], <vscale x 16 x i8> [[B]])
+; CHECK-NEXT: ret <vscale x 16 x i8> [[TMP1]]
;
%1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
%2 = tail call <vscale x 16 x i8> @llvm.aarch64.sve.umin.nxv16i8(<vscale x 16 x i1> %1, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b)
@@ -1446,9 +1368,8 @@ declare <vscale x 8 x i16> @llvm.aarch64.sve.umin.nxv8i16(<vscale x 8 x i1>, <vs
define <vscale x 8 x i16> @replace_umin_intrinsic_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) #0 {
; CHECK-LABEL: define <vscale x 8 x i16> @replace_umin_intrinsic_i16
; CHECK-SAME: (<vscale x 8 x i16> [[A:%.*]], <vscale x 8 x i16> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
-; CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.umin.u.nxv8i16(<vscale x 8 x i1> [[TMP1]], <vscale x 8 x i16> [[A]], <vscale x 8 x i16> [[B]])
-; CHECK-NEXT: ret <vscale x 8 x i16> [[TMP2]]
+; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.umin.u.nxv8i16(<vscale x 8 x i1> splat (i1 true), <vscale x 8 x i16> [[A]], <vscale x 8 x i16> [[B]])
+; CHECK-NEXT: ret <vscale x 8 x i16> [[TMP1]]
;
%1 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
%2 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.umin.nxv8i16(<vscale x 8 x i1> %1, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b)
@@ -1459,9 +1380,8 @@ declare <vscale x 4 x i32> @llvm.aarch64.sve.umin.nxv4i32(<vscale x 4 x i1>, <vs
define <vscale x 4 x i32> @replace_umin_intrinsic_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
; CHECK-LABEL: define <vscale x 4 x i32> @replace_umin_intrinsic_i32
; CHECK-SAME: (<vscale x 4 x i32> [[A:%.*]], <vscale x 4 x i32> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
-; CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.umin.u.nxv4i32(<vscale x 4 x i1> [[TMP1]], <vscale x 4 x i32> [[A]], <vscale x 4 x i32> [[B]])
-; CHECK-NEXT: ret <vscale x 4 x i32> [[TMP2]]
+; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.umin.u.nxv4i32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> [[A]], <vscale x 4 x i32> [[B]])
+; CHECK-NEXT: ret <vscale x 4 x i32> [[TMP1]]
;
%1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
%2 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.umin.nxv4i32(<vscale x 4 x i1> %1, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
@@ -1472,9 +1392,8 @@ declare <vscale x 2 x i64> @llvm.aarch64.sve.umin.nxv2i64(<vscale x 2 x i1>, <vs
define <vscale x 2 x i64> @replace_umin_intrinsic_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) #0 {
; CHECK-LABEL: define <vscale x 2 x i64> @replace_umin_intrinsic_i64
; CHECK-SAME: (<vscale x 2 x i64> [[A:%.*]], <vscale x 2 x i64> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
-; CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.umin.u.nxv2i64(<vscale x 2 x i1> [[TMP1]], <vscale x 2 x i64> [[A]], <vscale x 2 x i64> [[B]])
-; CHECK-NEXT: ret <vscale x 2 x i64> [[TMP2]]
+; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.umin.u.nxv2i64(<vscale x 2 x i1> splat (i1 true), <vscale x 2 x i64> [[A]], <vscale x 2 x i64> [[B]])
+; CHECK-NEXT: ret <vscale x 2 x i64> [[TMP1]]
;
%1 = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
%2 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.umin.nxv2i64(<vscale x 2 x i1> %1, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b)
@@ -1497,9 +1416,8 @@ declare <vscale x 16 x i8> @llvm.aarch64.sve.umulh.nxv16i8(<vscale x 16 x i1>, <
define <vscale x 16 x i8> @replace_umulh_intrinsic_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) #0 {
; CHECK-LABEL: define <vscale x 16 x i8> @replace_umulh_intrinsic_i8
; CHECK-SAME: (<vscale x 16 x i8> [[A:%.*]], <vscale x 16 x i8> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
-; CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.umulh.u.nxv16i8(<vscale x 16 x i1> [[TMP1]], <vscale x 16 x i8> [[A]], <vscale x 16 x i8> [[B]])
-; CHECK-NEXT: ret <vscale x 16 x i8> [[TMP2]]
+; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.umulh.u.nxv16i8(<vscale x 16 x i1> splat (i1 true), <vscale x 16 x i8> [[A]], <vscale x 16 x i8> [[B]])
+; CHECK-NEXT: ret <vscale x 16 x i8> [[TMP1]]
;
%1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
%2 = tail call <vscale x 16 x i8> @llvm.aarch64.sve.umulh.nxv16i8(<vscale x 16 x i1> %1, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b)
@@ -1510,9 +1428,8 @@ declare <vscale x 8 x i16> @llvm.aarch64.sve.umulh.nxv8i16(<vscale x 8 x i1>, <v
define <vscale x 8 x i16> @replace_umulh_intrinsic_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) #0 {
; CHECK-LABEL: define <vscale x 8 x i16> @replace_umulh_intrinsic_i16
; CHECK-SAME: (<vscale x 8 x i16> [[A:%.*]], <vscale x 8 x i16> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
-; CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.umulh.u.nxv8i16(<vscale x 8 x i1> [[TMP1]], <vscale x 8 x i16> [[A]], <vscale x 8 x i16> [[B]])
-; CHECK-NEXT: ret <vscale x 8 x i16> [[TMP2]]
+; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.umulh.u.nxv8i16(<vscale x 8 x i1> splat (i1 true), <vscale x 8 x i16> [[A]], <vscale x 8 x i16> [[B]])
+; CHECK-NEXT: ret <vscale x 8 x i16> [[TMP1]]
;
%1 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
%2 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.umulh.nxv8i16(<vscale x 8 x i1> %1, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b)
@@ -1523,9 +1440,8 @@ declare <vscale x 4 x i32> @llvm.aarch64.sve.umulh.nxv4i32(<vscale x 4 x i1>, <v
define <vscale x 4 x i32> @replace_umulh_intrinsic_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
; CHECK-LABEL: define <vscale x 4 x i32> @replace_umulh_intrinsic_i32
; CHECK-SAME: (<vscale x 4 x i32> [[A:%.*]], <vscale x 4 x i32> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
-; CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.umulh.u.nxv4i32(<vscale x 4 x i1> [[TMP1]], <vscale x 4 x i32> [[A]], <vscale x 4 x i32> [[B]])
-; CHECK-NEXT: ret <vscale x 4 x i32> [[TMP2]]
+; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.umulh.u.nxv4i32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> [[A]], <vscale x 4 x i32> [[B]])
+; CHECK-NEXT: ret <vscale x 4 x i32> [[TMP1]]
;
%1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
%2 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.umulh.nxv4i32(<vscale x 4 x i1> %1, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
@@ -1536,9 +1452,8 @@ declare <vscale x 2 x i64> @llvm.aarch64.sve.umulh.nxv2i64(<vscale x 2 x i1>, <v
define <vscale x 2 x i64> @replace_umulh_intrinsic_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) #0 {
; CHECK-LABEL: define <vscale x 2 x i64> @replace_umulh_intrinsic_i64
; CHECK-SAME: (<vscale x 2 x i64> [[A:%.*]], <vscale x 2 x i64> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
-; CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.umulh.u.nxv2i64(<vscale x 2 x i1> [[TMP1]], <vscale x 2 x i64> [[A]], <vscale x 2 x i64> [[B]])
-; CHECK-NEXT: ret <vscale x 2 x i64> [[TMP2]]
+; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.umulh.u.nxv2i64(<vscale x 2 x i1> splat (i1 true), <vscale x 2 x i64> [[A]], <vscale x 2 x i64> [[B]])
+; CHECK-NEXT: ret <vscale x 2 x i64> [[TMP1]]
;
%1 = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
%2 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.umulh.nxv2i64(<vscale x 2 x i1> %1, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b)
@@ -1563,9 +1478,8 @@ declare <vscale x 16 x i8> @llvm.aarch64.sve.asr.nxv16i8(<vscale x 16 x i1>, <vs
define <vscale x 16 x i8> @replace_asr_intrinsic_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) #0 {
; CHECK-LABEL: define <vscale x 16 x i8> @replace_asr_intrinsic_i8
; CHECK-SAME: (<vscale x 16 x i8> [[A:%.*]], <vscale x 16 x i8> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
-; CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.asr.u.nxv16i8(<vscale x 16 x i1> [[TMP1]], <vscale x 16 x i8> [[A]], <vscale x 16 x i8> [[B]])
-; CHECK-NEXT: ret <vscale x 16 x i8> [[TMP2]]
+; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.asr.u.nxv16i8(<vscale x 16 x i1> splat (i1 true), <vscale x 16 x i8> [[A]], <vscale x 16 x i8> [[B]])
+; CHECK-NEXT: ret <vscale x 16 x i8> [[TMP1]]
;
%1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
%2 = tail call <vscale x 16 x i8> @llvm.aarch64.sve.asr.nxv16i8(<vscale x 16 x i1> %1, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b)
@@ -1576,9 +1490,8 @@ declare <vscale x 8 x i16> @llvm.aarch64.sve.asr.nxv8i16(<vscale x 8 x i1>, <vsc
define <vscale x 8 x i16> @replace_asr_intrinsic_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) #0 {
; CHECK-LABEL: define <vscale x 8 x i16> @replace_asr_intrinsic_i16
; CHECK-SAME: (<vscale x 8 x i16> [[A:%.*]], <vscale x 8 x i16> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
-; CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.asr.u.nxv8i16(<vscale x 8 x i1> [[TMP1]], <vscale x 8 x i16> [[A]], <vscale x 8 x i16> [[B]])
-; CHECK-NEXT: ret <vscale x 8 x i16> [[TMP2]]
+; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.asr.u.nxv8i16(<vscale x 8 x i1> splat (i1 true), <vscale x 8 x i16> [[A]], <vscale x 8 x i16> [[B]])
+; CHECK-NEXT: ret <vscale x 8 x i16> [[TMP1]]
;
%1 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
%2 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.asr.nxv8i16(<vscale x 8 x i1> %1, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b)
@@ -1589,9 +1502,8 @@ declare <vscale x 4 x i32> @llvm.aarch64.sve.asr.nxv4i32(<vscale x 4 x i1>, <vsc
define <vscale x 4 x i32> @replace_asr_intrinsic_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
; CHECK-LABEL: define <vscale x 4 x i32> @replace_asr_intrinsic_i32
; CHECK-SAME: (<vscale x 4 x i32> [[A:%.*]], <vscale x 4 x i32> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
-; CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.asr.u.nxv4i32(<vscale x 4 x i1> [[TMP1]], <vscale x 4 x i32> [[A]], <vscale x 4 x i32> [[B]])
-; CHECK-NEXT: ret <vscale x 4 x i32> [[TMP2]]
+; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.asr.u.nxv4i32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> [[A]], <vscale x 4 x i32> [[B]])
+; CHECK-NEXT: ret <vscale x 4 x i32> [[TMP1]]
;
%1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
%2 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.asr.nxv4i32(<vscale x 4 x i1> %1, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
@@ -1602,9 +1514,8 @@ declare <vscale x 2 x i64> @llvm.aarch64.sve.asr.nxv2i64(<vscale x 2 x i1>, <vsc
define <vscale x 2 x i64> @replace_asr_intrinsic_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) #0 {
; CHECK-LABEL: define <vscale x 2 x i64> @replace_asr_intrinsic_i64
; CHECK-SAME: (<vscale x 2 x i64> [[A:%.*]], <vscale x 2 x i64> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
-; CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.asr.u.nxv2i64(<vscale x 2 x i1> [[TMP1]], <vscale x 2 x i64> [[A]], <vscale x 2 x i64> [[B]])
-; CHECK-NEXT: ret <vscale x 2 x i64> [[TMP2]]
+; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.asr.u.nxv2i64(<vscale x 2 x i1> splat (i1 true), <vscale x 2 x i64> [[A]], <vscale x 2 x i64> [[B]])
+; CHECK-NEXT: ret <vscale x 2 x i64> [[TMP1]]
;
%1 = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
%2 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.asr.nxv2i64(<vscale x 2 x i1> %1, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b)
@@ -1627,9 +1538,8 @@ declare <vscale x 16 x i8> @llvm.aarch64.sve.lsl.nxv16i8(<vscale x 16 x i1>, <vs
define <vscale x 16 x i8> @replace_lsl_intrinsic_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) #0 {
; CHECK-LABEL: define <vscale x 16 x i8> @replace_lsl_intrinsic_i8
; CHECK-SAME: (<vscale x 16 x i8> [[A:%.*]], <vscale x 16 x i8> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
-; CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.lsl.u.nxv16i8(<vscale x 16 x i1> [[TMP1]], <vscale x 16 x i8> [[A]], <vscale x 16 x i8> [[B]])
-; CHECK-NEXT: ret <vscale x 16 x i8> [[TMP2]]
+; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.lsl.u.nxv16i8(<vscale x 16 x i1> splat (i1 true), <vscale x 16 x i8> [[A]], <vscale x 16 x i8> [[B]])
+; CHECK-NEXT: ret <vscale x 16 x i8> [[TMP1]]
;
%1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
%2 = tail call <vscale x 16 x i8> @llvm.aarch64.sve.lsl.nxv16i8(<vscale x 16 x i1> %1, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b)
@@ -1640,9 +1550,8 @@ declare <vscale x 8 x i16> @llvm.aarch64.sve.lsl.nxv8i16(<vscale x 8 x i1>, <vsc
define <vscale x 8 x i16> @replace_lsl_intrinsic_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) #0 {
; CHECK-LABEL: define <vscale x 8 x i16> @replace_lsl_intrinsic_i16
; CHECK-SAME: (<vscale x 8 x i16> [[A:%.*]], <vscale x 8 x i16> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
-; CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.lsl.u.nxv8i16(<vscale x 8 x i1> [[TMP1]], <vscale x 8 x i16> [[A]], <vscale x 8 x i16> [[B]])
-; CHECK-NEXT: ret <vscale x 8 x i16> [[TMP2]]
+; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.lsl.u.nxv8i16(<vscale x 8 x i1> splat (i1 true), <vscale x 8 x i16> [[A]], <vscale x 8 x i16> [[B]])
+; CHECK-NEXT: ret <vscale x 8 x i16> [[TMP1]]
;
%1 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
%2 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.lsl.nxv8i16(<vscale x 8 x i1> %1, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b)
@@ -1653,9 +1562,8 @@ declare <vscale x 4 x i32> @llvm.aarch64.sve.lsl.nxv4i32(<vscale x 4 x i1>, <vsc
define <vscale x 4 x i32> @replace_lsl_intrinsic_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
; CHECK-LABEL: define <vscale x 4 x i32> @replace_lsl_intrinsic_i32
; CHECK-SAME: (<vscale x 4 x i32> [[A:%.*]], <vscale x 4 x i32> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
-; CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.lsl.u.nxv4i32(<vscale x 4 x i1> [[TMP1]], <vscale x 4 x i32> [[A]], <vscale x 4 x i32> [[B]])
-; CHECK-NEXT: ret <vscale x 4 x i32> [[TMP2]]
+; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.lsl.u.nxv4i32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> [[A]], <vscale x 4 x i32> [[B]])
+; CHECK-NEXT: ret <vscale x 4 x i32> [[TMP1]]
;
%1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
%2 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.lsl.nxv4i32(<vscale x 4 x i1> %1, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
@@ -1666,9 +1574,8 @@ declare <vscale x 2 x i64> @llvm.aarch64.sve.lsl.nxv2i64(<vscale x 2 x i1>, <vsc
define <vscale x 2 x i64> @replace_lsl_intrinsic_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) #0 {
; CHECK-LABEL: define <vscale x 2 x i64> @replace_lsl_intrinsic_i64
; CHECK-SAME: (<vscale x 2 x i64> [[A:%.*]], <vscale x 2 x i64> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
-; CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.lsl.u.nxv2i64(<vscale x 2 x i1> [[TMP1]], <vscale x 2 x i64> [[A]], <vscale x 2 x i64> [[B]])
-; CHECK-NEXT: ret <vscale x 2 x i64> [[TMP2]]
+; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.lsl.u.nxv2i64(<vscale x 2 x i1> splat (i1 true), <vscale x 2 x i64> [[A]], <vscale x 2 x i64> [[B]])
+; CHECK-NEXT: ret <vscale x 2 x i64> [[TMP1]]
;
%1 = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
%2 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.lsl.nxv2i64(<vscale x 2 x i1> %1, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b)
@@ -1691,9 +1598,8 @@ declare <vscale x 16 x i8> @llvm.aarch64.sve.lsr.nxv16i8(<vscale x 16 x i1>, <vs
define <vscale x 16 x i8> @replace_lsr_intrinsic_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) #0 {
; CHECK-LABEL: define <vscale x 16 x i8> @replace_lsr_intrinsic_i8
; CHECK-SAME: (<vscale x 16 x i8> [[A:%.*]], <vscale x 16 x i8> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
-; CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.lsr.u.nxv16i8(<vscale x 16 x i1> [[TMP1]], <vscale x 16 x i8> [[A]], <vscale x 16 x i8> [[B]])
-; CHECK-NEXT: ret <vscale x 16 x i8> [[TMP2]]
+; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.lsr.u.nxv16i8(<vscale x 16 x i1> splat (i1 true), <vscale x 16 x i8> [[A]], <vscale x 16 x i8> [[B]])
+; CHECK-NEXT: ret <vscale x 16 x i8> [[TMP1]]
;
%1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
%2 = tail call <vscale x 16 x i8> @llvm.aarch64.sve.lsr.nxv16i8(<vscale x 16 x i1> %1, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b)
@@ -1704,9 +1610,8 @@ declare <vscale x 8 x i16> @llvm.aarch64.sve.lsr.nxv8i16(<vscale x 8 x i1>, <vsc
define <vscale x 8 x i16> @replace_lsr_intrinsic_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) #0 {
; CHECK-LABEL: define <vscale x 8 x i16> @replace_lsr_intrinsic_i16
; CHECK-SAME: (<vscale x 8 x i16> [[A:%.*]], <vscale x 8 x i16> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
-; CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.lsr.u.nxv8i16(<vscale x 8 x i1> [[TMP1]], <vscale x 8 x i16> [[A]], <vscale x 8 x i16> [[B]])
-; CHECK-NEXT: ret <vscale x 8 x i16> [[TMP2]]
+; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.lsr.u.nxv8i16(<vscale x 8 x i1> splat (i1 true), <vscale x 8 x i16> [[A]], <vscale x 8 x i16> [[B]])
+; CHECK-NEXT: ret <vscale x 8 x i16> [[TMP1]]
;
%1 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
%2 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.lsr.nxv8i16(<vscale x 8 x i1> %1, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b)
@@ -1717,9 +1622,8 @@ declare <vscale x 4 x i32> @llvm.aarch64.sve.lsr.nxv4i32(<vscale x 4 x i1>, <vsc
define <vscale x 4 x i32> @replace_lsr_intrinsic_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
; CHECK-LABEL: define <vscale x 4 x i32> @replace_lsr_intrinsic_i32
; CHECK-SAME: (<vscale x 4 x i32> [[A:%.*]], <vscale x 4 x i32> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
-; CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.lsr.u.nxv4i32(<vscale x 4 x i1> [[TMP1]], <vscale x 4 x i32> [[A]], <vscale x 4 x i32> [[B]])
-; CHECK-NEXT: ret <vscale x 4 x i32> [[TMP2]]
+; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.lsr.u.nxv4i32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> [[A]], <vscale x 4 x i32> [[B]])
+; CHECK-NEXT: ret <vscale x 4 x i32> [[TMP1]]
;
%1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
%2 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.lsr.nxv4i32(<vscale x 4 x i1> %1, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
@@ -1730,9 +1634,8 @@ declare <vscale x 2 x i64> @llvm.aarch64.sve.lsr.nxv2i64(<vscale x 2 x i1>, <vsc
define <vscale x 2 x i64> @replace_lsr_intrinsic_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) #0 {
; CHECK-LABEL: define <vscale x 2 x i64> @replace_lsr_intrinsic_i64
; CHECK-SAME: (<vscale x 2 x i64> [[A:%.*]], <vscale x 2 x i64> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
-; CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.lsr.u.nxv2i64(<vscale x 2 x i1> [[TMP1]], <vscale x 2 x i64> [[A]], <vscale x 2 x i64> [[B]])
-; CHECK-NEXT: ret <vscale x 2 x i64> [[TMP2]]
+; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.lsr.u.nxv2i64(<vscale x 2 x i1> splat (i1 true), <vscale x 2 x i64> [[A]], <vscale x 2 x i64> [[B]])
+; CHECK-NEXT: ret <vscale x 2 x i64> [[TMP1]]
;
%1 = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
%2 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.lsr.nxv2i64(<vscale x 2 x i1> %1, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b)
@@ -1757,9 +1660,8 @@ declare <vscale x 16 x i8> @llvm.aarch64.sve.and.nxv16i8(<vscale x 16 x i1>, <vs
define <vscale x 16 x i8> @replace_and_intrinsic_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) #0 {
; CHECK-LABEL: define <vscale x 16 x i8> @replace_and_intrinsic_i8
; CHECK-SAME: (<vscale x 16 x i8> [[A:%.*]], <vscale x 16 x i8> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
-; CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.and.u.nxv16i8(<vscale x 16 x i1> [[TMP1]], <vscale x 16 x i8> [[A]], <vscale x 16 x i8> [[B]])
-; CHECK-NEXT: ret <vscale x 16 x i8> [[TMP2]]
+; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.and.u.nxv16i8(<vscale x 16 x i1> splat (i1 true), <vscale x 16 x i8> [[A]], <vscale x 16 x i8> [[B]])
+; CHECK-NEXT: ret <vscale x 16 x i8> [[TMP1]]
;
%1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
%2 = tail call <vscale x 16 x i8> @llvm.aarch64.sve.and.nxv16i8(<vscale x 16 x i1> %1, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b)
@@ -1770,9 +1672,8 @@ declare <vscale x 8 x i16> @llvm.aarch64.sve.and.nxv8i16(<vscale x 8 x i1>, <vsc
define <vscale x 8 x i16> @replace_and_intrinsic_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) #0 {
; CHECK-LABEL: define <vscale x 8 x i16> @replace_and_intrinsic_i16
; CHECK-SAME: (<vscale x 8 x i16> [[A:%.*]], <vscale x 8 x i16> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
-; CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.and.u.nxv8i16(<vscale x 8 x i1> [[TMP1]], <vscale x 8 x i16> [[A]], <vscale x 8 x i16> [[B]])
-; CHECK-NEXT: ret <vscale x 8 x i16> [[TMP2]]
+; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.and.u.nxv8i16(<vscale x 8 x i1> splat (i1 true), <vscale x 8 x i16> [[A]], <vscale x 8 x i16> [[B]])
+; CHECK-NEXT: ret <vscale x 8 x i16> [[TMP1]]
;
%1 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
%2 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.and.nxv8i16(<vscale x 8 x i1> %1, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b)
@@ -1783,9 +1684,8 @@ declare <vscale x 4 x i32> @llvm.aarch64.sve.and.nxv4i32(<vscale x 4 x i1>, <vsc
define <vscale x 4 x i32> @replace_and_intrinsic_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
; CHECK-LABEL: define <vscale x 4 x i32> @replace_and_intrinsic_i32
; CHECK-SAME: (<vscale x 4 x i32> [[A:%.*]], <vscale x 4 x i32> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
-; CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.and.u.nxv4i32(<vscale x 4 x i1> [[TMP1]], <vscale x 4 x i32> [[A]], <vscale x 4 x i32> [[B]])
-; CHECK-NEXT: ret <vscale x 4 x i32> [[TMP2]]
+; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.and.u.nxv4i32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> [[A]], <vscale x 4 x i32> [[B]])
+; CHECK-NEXT: ret <vscale x 4 x i32> [[TMP1]]
;
%1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
%2 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.and.nxv4i32(<vscale x 4 x i1> %1, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
@@ -1796,9 +1696,8 @@ declare <vscale x 2 x i64> @llvm.aarch64.sve.and.nxv2i64(<vscale x 2 x i1>, <vsc
define <vscale x 2 x i64> @replace_and_intrinsic_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) #0 {
; CHECK-LABEL: define <vscale x 2 x i64> @replace_and_intrinsic_i64
; CHECK-SAME: (<vscale x 2 x i64> [[A:%.*]], <vscale x 2 x i64> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
-; CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.and.u.nxv2i64(<vscale x 2 x i1> [[TMP1]], <vscale x 2 x i64> [[A]], <vscale x 2 x i64> [[B]])
-; CHECK-NEXT: ret <vscale x 2 x i64> [[TMP2]]
+; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.and.u.nxv2i64(<vscale x 2 x i1> splat (i1 true), <vscale x 2 x i64> [[A]], <vscale x 2 x i64> [[B]])
+; CHECK-NEXT: ret <vscale x 2 x i64> [[TMP1]]
;
%1 = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
%2 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.and.nxv2i64(<vscale x 2 x i1> %1, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b)
@@ -1821,9 +1720,8 @@ declare <vscale x 16 x i8> @llvm.aarch64.sve.bic.nxv16i8(<vscale x 16 x i1>, <vs
define <vscale x 16 x i8> @replace_bic_intrinsic_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) #0 {
; CHECK-LABEL: define <vscale x 16 x i8> @replace_bic_intrinsic_i8
; CHECK-SAME: (<vscale x 16 x i8> [[A:%.*]], <vscale x 16 x i8> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
-; CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.bic.u.nxv16i8(<vscale x 16 x i1> [[TMP1]], <vscale x 16 x i8> [[A]], <vscale x 16 x i8> [[B]])
-; CHECK-NEXT: ret <vscale x 16 x i8> [[TMP2]]
+; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.bic.u.nxv16i8(<vscale x 16 x i1> splat (i1 true), <vscale x 16 x i8> [[A]], <vscale x 16 x i8> [[B]])
+; CHECK-NEXT: ret <vscale x 16 x i8> [[TMP1]]
;
%1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
%2 = tail call <vscale x 16 x i8> @llvm.aarch64.sve.bic.nxv16i8(<vscale x 16 x i1> %1, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b)
@@ -1834,9 +1732,8 @@ declare <vscale x 8 x i16> @llvm.aarch64.sve.bic.nxv8i16(<vscale x 8 x i1>, <vsc
define <vscale x 8 x i16> @replace_bic_intrinsic_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) #0 {
; CHECK-LABEL: define <vscale x 8 x i16> @replace_bic_intrinsic_i16
; CHECK-SAME: (<vscale x 8 x i16> [[A:%.*]], <vscale x 8 x i16> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
-; CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.bic.u.nxv8i16(<vscale x 8 x i1> [[TMP1]], <vscale x 8 x i16> [[A]], <vscale x 8 x i16> [[B]])
-; CHECK-NEXT: ret <vscale x 8 x i16> [[TMP2]]
+; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.bic.u.nxv8i16(<vscale x 8 x i1> splat (i1 true), <vscale x 8 x i16> [[A]], <vscale x 8 x i16> [[B]])
+; CHECK-NEXT: ret <vscale x 8 x i16> [[TMP1]]
;
%1 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
%2 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.bic.nxv8i16(<vscale x 8 x i1> %1, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b)
@@ -1847,9 +1744,8 @@ declare <vscale x 4 x i32> @llvm.aarch64.sve.bic.nxv4i32(<vscale x 4 x i1>, <vsc
define <vscale x 4 x i32> @replace_bic_intrinsic_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
; CHECK-LABEL: define <vscale x 4 x i32> @replace_bic_intrinsic_i32
; CHECK-SAME: (<vscale x 4 x i32> [[A:%.*]], <vscale x 4 x i32> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
-; CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.bic.u.nxv4i32(<vscale x 4 x i1> [[TMP1]], <vscale x 4 x i32> [[A]], <vscale x 4 x i32> [[B]])
-; CHECK-NEXT: ret <vscale x 4 x i32> [[TMP2]]
+; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.bic.u.nxv4i32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> [[A]], <vscale x 4 x i32> [[B]])
+; CHECK-NEXT: ret <vscale x 4 x i32> [[TMP1]]
;
%1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
%2 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.bic.nxv4i32(<vscale x 4 x i1> %1, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
@@ -1860,9 +1756,8 @@ declare <vscale x 2 x i64> @llvm.aarch64.sve.bic.nxv2i64(<vscale x 2 x i1>, <vsc
define <vscale x 2 x i64> @replace_bic_intrinsic_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) #0 {
; CHECK-LABEL: define <vscale x 2 x i64> @replace_bic_intrinsic_i64
; CHECK-SAME: (<vscale x 2 x i64> [[A:%.*]], <vscale x 2 x i64> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
-; CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.bic.u.nxv2i64(<vscale x 2 x i1> [[TMP1]], <vscale x 2 x i64> [[A]], <vscale x 2 x i64> [[B]])
-; CHECK-NEXT: ret <vscale x 2 x i64> [[TMP2]]
+; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.bic.u.nxv2i64(<vscale x 2 x i1> splat (i1 true), <vscale x 2 x i64> [[A]], <vscale x 2 x i64> [[B]])
+; CHECK-NEXT: ret <vscale x 2 x i64> [[TMP1]]
;
%1 = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
%2 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.bic.nxv2i64(<vscale x 2 x i1> %1, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b)
@@ -1885,9 +1780,8 @@ declare <vscale x 16 x i8> @llvm.aarch64.sve.eor.nxv16i8(<vscale x 16 x i1>, <vs
define <vscale x 16 x i8> @replace_eor_intrinsic_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) #0 {
; CHECK-LABEL: define <vscale x 16 x i8> @replace_eor_intrinsic_i8
; CHECK-SAME: (<vscale x 16 x i8> [[A:%.*]], <vscale x 16 x i8> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
-; CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.eor.u.nxv16i8(<vscale x 16 x i1> [[TMP1]], <vscale x 16 x i8> [[A]], <vscale x 16 x i8> [[B]])
-; CHECK-NEXT: ret <vscale x 16 x i8> [[TMP2]]
+; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.eor.u.nxv16i8(<vscale x 16 x i1> splat (i1 true), <vscale x 16 x i8> [[A]], <vscale x 16 x i8> [[B]])
+; CHECK-NEXT: ret <vscale x 16 x i8> [[TMP1]]
;
%1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
%2 = tail call <vscale x 16 x i8> @llvm.aarch64.sve.eor.nxv16i8(<vscale x 16 x i1> %1, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b)
@@ -1898,9 +1792,8 @@ declare <vscale x 8 x i16> @llvm.aarch64.sve.eor.nxv8i16(<vscale x 8 x i1>, <vsc
define <vscale x 8 x i16> @replace_eor_intrinsic_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) #0 {
; CHECK-LABEL: define <vscale x 8 x i16> @replace_eor_intrinsic_i16
; CHECK-SAME: (<vscale x 8 x i16> [[A:%.*]], <vscale x 8 x i16> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
-; CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.eor.u.nxv8i16(<vscale x 8 x i1> [[TMP1]], <vscale x 8 x i16> [[A]], <vscale x 8 x i16> [[B]])
-; CHECK-NEXT: ret <vscale x 8 x i16> [[TMP2]]
+; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.eor.u.nxv8i16(<vscale x 8 x i1> splat (i1 true), <vscale x 8 x i16> [[A]], <vscale x 8 x i16> [[B]])
+; CHECK-NEXT: ret <vscale x 8 x i16> [[TMP1]]
;
%1 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
%2 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.eor.nxv8i16(<vscale x 8 x i1> %1, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b)
@@ -1911,9 +1804,8 @@ declare <vscale x 4 x i32> @llvm.aarch64.sve.eor.nxv4i32(<vscale x 4 x i1>, <vsc
define <vscale x 4 x i32> @replace_eor_intrinsic_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
; CHECK-LABEL: define <vscale x 4 x i32> @replace_eor_intrinsic_i32
; CHECK-SAME: (<vscale x 4 x i32> [[A:%.*]], <vscale x 4 x i32> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
-; CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.eor.u.nxv4i32(<vscale x 4 x i1> [[TMP1]], <vscale x 4 x i32> [[A]], <vscale x 4 x i32> [[B]])
-; CHECK-NEXT: ret <vscale x 4 x i32> [[TMP2]]
+; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.eor.u.nxv4i32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> [[A]], <vscale x 4 x i32> [[B]])
+; CHECK-NEXT: ret <vscale x 4 x i32> [[TMP1]]
;
%1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
%2 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.eor.nxv4i32(<vscale x 4 x i1> %1, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
@@ -1924,9 +1816,8 @@ declare <vscale x 2 x i64> @llvm.aarch64.sve.eor.nxv2i64(<vscale x 2 x i1>, <vsc
define <vscale x 2 x i64> @replace_eor_intrinsic_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) #0 {
; CHECK-LABEL: define <vscale x 2 x i64> @replace_eor_intrinsic_i64
; CHECK-SAME: (<vscale x 2 x i64> [[A:%.*]], <vscale x 2 x i64> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
-; CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.eor.u.nxv2i64(<vscale x 2 x i1> [[TMP1]], <vscale x 2 x i64> [[A]], <vscale x 2 x i64> [[B]])
-; CHECK-NEXT: ret <vscale x 2 x i64> [[TMP2]]
+; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.eor.u.nxv2i64(<vscale x 2 x i1> splat (i1 true), <vscale x 2 x i64> [[A]], <vscale x 2 x i64> [[B]])
+; CHECK-NEXT: ret <vscale x 2 x i64> [[TMP1]]
;
%1 = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
%2 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.eor.nxv2i64(<vscale x 2 x i1> %1, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b)
@@ -1949,9 +1840,8 @@ declare <vscale x 16 x i8> @llvm.aarch64.sve.orr.nxv16i8(<vscale x 16 x i1>, <vs
define <vscale x 16 x i8> @replace_orr_intrinsic_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) #0 {
; CHECK-LABEL: define <vscale x 16 x i8> @replace_orr_intrinsic_i8
; CHECK-SAME: (<vscale x 16 x i8> [[A:%.*]], <vscale x 16 x i8> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
-; CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.orr.u.nxv16i8(<vscale x 16 x i1> [[TMP1]], <vscale x 16 x i8> [[A]], <vscale x 16 x i8> [[B]])
-; CHECK-NEXT: ret <vscale x 16 x i8> [[TMP2]]
+; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.orr.u.nxv16i8(<vscale x 16 x i1> splat (i1 true), <vscale x 16 x i8> [[A]], <vscale x 16 x i8> [[B]])
+; CHECK-NEXT: ret <vscale x 16 x i8> [[TMP1]]
;
%1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
%2 = tail call <vscale x 16 x i8> @llvm.aarch64.sve.orr.nxv16i8(<vscale x 16 x i1> %1, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b)
@@ -1962,9 +1852,8 @@ declare <vscale x 8 x i16> @llvm.aarch64.sve.orr.nxv8i16(<vscale x 8 x i1>, <vsc
define <vscale x 8 x i16> @replace_orr_intrinsic_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) #0 {
; CHECK-LABEL: define <vscale x 8 x i16> @replace_orr_intrinsic_i16
; CHECK-SAME: (<vscale x 8 x i16> [[A:%.*]], <vscale x 8 x i16> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
-; CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.orr.u.nxv8i16(<vscale x 8 x i1> [[TMP1]], <vscale x 8 x i16> [[A]], <vscale x 8 x i16> [[B]])
-; CHECK-NEXT: ret <vscale x 8 x i16> [[TMP2]]
+; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.orr.u.nxv8i16(<vscale x 8 x i1> splat (i1 true), <vscale x 8 x i16> [[A]], <vscale x 8 x i16> [[B]])
+; CHECK-NEXT: ret <vscale x 8 x i16> [[TMP1]]
;
%1 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
%2 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.orr.nxv8i16(<vscale x 8 x i1> %1, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b)
@@ -1975,9 +1864,8 @@ declare <vscale x 4 x i32> @llvm.aarch64.sve.orr.nxv4i32(<vscale x 4 x i1>, <vsc
define <vscale x 4 x i32> @replace_orr_intrinsic_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
; CHECK-LABEL: define <vscale x 4 x i32> @replace_orr_intrinsic_i32
; CHECK-SAME: (<vscale x 4 x i32> [[A:%.*]], <vscale x 4 x i32> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
-; CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.orr.u.nxv4i32(<vscale x 4 x i1> [[TMP1]], <vscale x 4 x i32> [[A]], <vscale x 4 x i32> [[B]])
-; CHECK-NEXT: ret <vscale x 4 x i32> [[TMP2]]
+; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.orr.u.nxv4i32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> [[A]], <vscale x 4 x i32> [[B]])
+; CHECK-NEXT: ret <vscale x 4 x i32> [[TMP1]]
;
%1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
%2 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.orr.nxv4i32(<vscale x 4 x i1> %1, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
@@ -1988,9 +1876,8 @@ declare <vscale x 2 x i64> @llvm.aarch64.sve.orr.nxv2i64(<vscale x 2 x i1>, <vsc
define <vscale x 2 x i64> @replace_orr_intrinsic_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) #0 {
; CHECK-LABEL: define <vscale x 2 x i64> @replace_orr_intrinsic_i64
; CHECK-SAME: (<vscale x 2 x i64> [[A:%.*]], <vscale x 2 x i64> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
-; CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.orr.u.nxv2i64(<vscale x 2 x i1> [[TMP1]], <vscale x 2 x i64> [[A]], <vscale x 2 x i64> [[B]])
-; CHECK-NEXT: ret <vscale x 2 x i64> [[TMP2]]
+; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.orr.u.nxv2i64(<vscale x 2 x i1> splat (i1 true), <vscale x 2 x i64> [[A]], <vscale x 2 x i64> [[B]])
+; CHECK-NEXT: ret <vscale x 2 x i64> [[TMP1]]
;
%1 = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
%2 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.orr.nxv2i64(<vscale x 2 x i1> %1, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b)
@@ -2015,9 +1902,8 @@ declare <vscale x 16 x i8> @llvm.aarch64.sve.sqsub.nxv16i8(<vscale x 16 x i1>, <
define <vscale x 16 x i8> @replace_sqsub_intrinsic_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) #0 {
; CHECK-LABEL: define <vscale x 16 x i8> @replace_sqsub_intrinsic_i8
; CHECK-SAME: (<vscale x 16 x i8> [[A:%.*]], <vscale x 16 x i8> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
-; CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.sqsub.u.nxv16i8(<vscale x 16 x i1> [[TMP1]], <vscale x 16 x i8> [[A]], <vscale x 16 x i8> [[B]])
-; CHECK-NEXT: ret <vscale x 16 x i8> [[TMP2]]
+; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.sqsub.u.nxv16i8(<vscale x 16 x i1> splat (i1 true), <vscale x 16 x i8> [[A]], <vscale x 16 x i8> [[B]])
+; CHECK-NEXT: ret <vscale x 16 x i8> [[TMP1]]
;
%1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
%2 = tail call <vscale x 16 x i8> @llvm.aarch64.sve.sqsub.nxv16i8(<vscale x 16 x i1> %1, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b)
@@ -2028,9 +1914,8 @@ declare <vscale x 8 x i16> @llvm.aarch64.sve.sqsub.nxv8i16(<vscale x 8 x i1>, <v
define <vscale x 8 x i16> @replace_sqsub_intrinsic_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) #0 {
; CHECK-LABEL: define <vscale x 8 x i16> @replace_sqsub_intrinsic_i16
; CHECK-SAME: (<vscale x 8 x i16> [[A:%.*]], <vscale x 8 x i16> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
-; CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.sqsub.u.nxv8i16(<vscale x 8 x i1> [[TMP1]], <vscale x 8 x i16> [[A]], <vscale x 8 x i16> [[B]])
-; CHECK-NEXT: ret <vscale x 8 x i16> [[TMP2]]
+; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.sqsub.u.nxv8i16(<vscale x 8 x i1> splat (i1 true), <vscale x 8 x i16> [[A]], <vscale x 8 x i16> [[B]])
+; CHECK-NEXT: ret <vscale x 8 x i16> [[TMP1]]
;
%1 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
%2 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.sqsub.nxv8i16(<vscale x 8 x i1> %1, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b)
@@ -2041,9 +1926,8 @@ declare <vscale x 4 x i32> @llvm.aarch64.sve.sqsub.nxv4i32(<vscale x 4 x i1>, <v
define <vscale x 4 x i32> @replace_sqsub_intrinsic_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
; CHECK-LABEL: define <vscale x 4 x i32> @replace_sqsub_intrinsic_i32
; CHECK-SAME: (<vscale x 4 x i32> [[A:%.*]], <vscale x 4 x i32> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
-; CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.sqsub.u.nxv4i32(<vscale x 4 x i1> [[TMP1]], <vscale x 4 x i32> [[A]], <vscale x 4 x i32> [[B]])
-; CHECK-NEXT: ret <vscale x 4 x i32> [[TMP2]]
+; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.sqsub.u.nxv4i32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> [[A]], <vscale x 4 x i32> [[B]])
+; CHECK-NEXT: ret <vscale x 4 x i32> [[TMP1]]
;
%1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
%2 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.sqsub.nxv4i32(<vscale x 4 x i1> %1, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
@@ -2054,9 +1938,8 @@ declare <vscale x 2 x i64> @llvm.aarch64.sve.sqsub.nxv2i64(<vscale x 2 x i1>, <v
define <vscale x 2 x i64> @replace_sqsub_intrinsic_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) #0 {
; CHECK-LABEL: define <vscale x 2 x i64> @replace_sqsub_intrinsic_i64
; CHECK-SAME: (<vscale x 2 x i64> [[A:%.*]], <vscale x 2 x i64> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
-; CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.sqsub.u.nxv2i64(<vscale x 2 x i1> [[TMP1]], <vscale x 2 x i64> [[A]], <vscale x 2 x i64> [[B]])
-; CHECK-NEXT: ret <vscale x 2 x i64> [[TMP2]]
+; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.sqsub.u.nxv2i64(<vscale x 2 x i1> splat (i1 true), <vscale x 2 x i64> [[A]], <vscale x 2 x i64> [[B]])
+; CHECK-NEXT: ret <vscale x 2 x i64> [[TMP1]]
;
%1 = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
%2 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.sqsub.nxv2i64(<vscale x 2 x i1> %1, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b)
@@ -2079,9 +1962,8 @@ declare <vscale x 16 x i8> @llvm.aarch64.sve.uqsub.nxv16i8(<vscale x 16 x i1>, <
define <vscale x 16 x i8> @replace_uqsub_intrinsic_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) #0 {
; CHECK-LABEL: define <vscale x 16 x i8> @replace_uqsub_intrinsic_i8
; CHECK-SAME: (<vscale x 16 x i8> [[A:%.*]], <vscale x 16 x i8> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
-; CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.uqsub.u.nxv16i8(<vscale x 16 x i1> [[TMP1]], <vscale x 16 x i8> [[A]], <vscale x 16 x i8> [[B]])
-; CHECK-NEXT: ret <vscale x 16 x i8> [[TMP2]]
+; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.uqsub.u.nxv16i8(<vscale x 16 x i1> splat (i1 true), <vscale x 16 x i8> [[A]], <vscale x 16 x i8> [[B]])
+; CHECK-NEXT: ret <vscale x 16 x i8> [[TMP1]]
;
%1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
%2 = tail call <vscale x 16 x i8> @llvm.aarch64.sve.uqsub.nxv16i8(<vscale x 16 x i1> %1, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b)
@@ -2092,9 +1974,8 @@ declare <vscale x 8 x i16> @llvm.aarch64.sve.uqsub.nxv8i16(<vscale x 8 x i1>, <v
define <vscale x 8 x i16> @replace_uqsub_intrinsic_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) #0 {
; CHECK-LABEL: define <vscale x 8 x i16> @replace_uqsub_intrinsic_i16
; CHECK-SAME: (<vscale x 8 x i16> [[A:%.*]], <vscale x 8 x i16> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
-; CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.uqsub.u.nxv8i16(<vscale x 8 x i1> [[TMP1]], <vscale x 8 x i16> [[A]], <vscale x 8 x i16> [[B]])
-; CHECK-NEXT: ret <vscale x 8 x i16> [[TMP2]]
+; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.uqsub.u.nxv8i16(<vscale x 8 x i1> splat (i1 true), <vscale x 8 x i16> [[A]], <vscale x 8 x i16> [[B]])
+; CHECK-NEXT: ret <vscale x 8 x i16> [[TMP1]]
;
%1 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
%2 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.uqsub.nxv8i16(<vscale x 8 x i1> %1, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b)
@@ -2105,9 +1986,8 @@ declare <vscale x 4 x i32> @llvm.aarch64.sve.uqsub.nxv4i32(<vscale x 4 x i1>, <v
define <vscale x 4 x i32> @replace_uqsub_intrinsic_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
; CHECK-LABEL: define <vscale x 4 x i32> @replace_uqsub_intrinsic_i32
; CHECK-SAME: (<vscale x 4 x i32> [[A:%.*]], <vscale x 4 x i32> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
-; CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.uqsub.u.nxv4i32(<vscale x 4 x i1> [[TMP1]], <vscale x 4 x i32> [[A]], <vscale x 4 x i32> [[B]])
-; CHECK-NEXT: ret <vscale x 4 x i32> [[TMP2]]
+; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.uqsub.u.nxv4i32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> [[A]], <vscale x 4 x i32> [[B]])
+; CHECK-NEXT: ret <vscale x 4 x i32> [[TMP1]]
;
%1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
%2 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.uqsub.nxv4i32(<vscale x 4 x i1> %1, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
@@ -2118,9 +1998,8 @@ declare <vscale x 2 x i64> @llvm.aarch64.sve.uqsub.nxv2i64(<vscale x 2 x i1>, <v
define <vscale x 2 x i64> @replace_uqsub_intrinsic_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) #0 {
; CHECK-LABEL: define <vscale x 2 x i64> @replace_uqsub_intrinsic_i64
; CHECK-SAME: (<vscale x 2 x i64> [[A:%.*]], <vscale x 2 x i64> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
-; CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.uqsub.u.nxv2i64(<vscale x 2 x i1> [[TMP1]], <vscale x 2 x i64> [[A]], <vscale x 2 x i64> [[B]])
-; CHECK-NEXT: ret <vscale x 2 x i64> [[TMP2]]
+; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.uqsub.u.nxv2i64(<vscale x 2 x i1> splat (i1 true), <vscale x 2 x i64> [[A]], <vscale x 2 x i64> [[B]])
+; CHECK-NEXT: ret <vscale x 2 x i64> [[TMP1]]
;
%1 = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
%2 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.uqsub.nxv2i64(<vscale x 2 x i1> %1, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b)
diff --git a/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsics-ptest.ll b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsics-ptest.ll
index a22454b586c2..e00ad68bdfac 100644
--- a/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsics-ptest.ll
+++ b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsics-ptest.ll
@@ -39,8 +39,7 @@ define i1 @ptest_any1(<vscale x 2 x i1> %a) #0 {
; No transform because the ptest is using differently sized operands.
define i1 @ptest_any2(<vscale x 4 x i1> %a) #0 {
; CHECK-LABEL: @ptest_any2(
-; CHECK-NEXT: [[MASK:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
-; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> [[MASK]])
+; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> splat (i1 true))
; CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> [[A:%.*]])
; CHECK-NEXT: [[OUT:%.*]] = call i1 @llvm.aarch64.sve.ptest.any.nxv16i1(<vscale x 16 x i1> [[TMP1]], <vscale x 16 x i1> [[TMP2]])
; CHECK-NEXT: ret i1 [[OUT]]
diff --git a/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsics-rdffr-predication.ll b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsics-rdffr-predication.ll
index 155102db52b5..ff728dc47c66 100644
--- a/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsics-rdffr-predication.ll
+++ b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsics-rdffr-predication.ll
@@ -6,8 +6,7 @@ target triple = "aarch64-unknown-linux-gnu"
; Test that rdffr is substituted with predicated form which enables ptest optimization later.
define <vscale x 16 x i1> @predicate_rdffr() #0 {
; CHECK-LABEL: @predicate_rdffr(
-; CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
-; CHECK-NEXT: [[OUT:%.*]] = call <vscale x 16 x i1> @llvm.aarch64.sve.rdffr.z(<vscale x 16 x i1> [[TMP1]])
+; CHECK-NEXT: [[OUT:%.*]] = call <vscale x 16 x i1> @llvm.aarch64.sve.rdffr.z(<vscale x 16 x i1> splat (i1 true))
; CHECK-NEXT: ret <vscale x 16 x i1> [[OUT]]
;
%out = call <vscale x 16 x i1> @llvm.aarch64.sve.rdffr()
diff --git a/llvm/test/Transforms/InstCombine/array.ll b/llvm/test/Transforms/InstCombine/array.ll
index 3edb47dda62c..a09b66273368 100644
--- a/llvm/test/Transforms/InstCombine/array.ll
+++ b/llvm/test/Transforms/InstCombine/array.ll
@@ -109,6 +109,58 @@ entry:
ret void
}
+; FIXME: Should be transformed as OR+GEP -> GEP+GEP (similar to gep_inbounds_add_nuw below).
+define ptr @gep_inbounds_nuwaddlike(ptr %ptr, i64 %a, i64 %b) {
+; CHECK-LABEL: define ptr @gep_inbounds_nuwaddlike(
+; CHECK-SAME: ptr [[PTR:%.*]], i64 [[A:%.*]], i64 [[B:%.*]]) {
+; CHECK-NEXT: [[ADD:%.*]] = or disjoint i64 [[A]], [[B]]
+; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds nuw i32, ptr [[PTR]], i64 [[ADD]]
+; CHECK-NEXT: ret ptr [[GEP]]
+;
+ %add = or disjoint i64 %a, %b
+ %gep = getelementptr inbounds nuw i32, ptr %ptr, i64 %add
+ ret ptr %gep
+}
+
+; FIXME: Preserve "inbounds nuw".
+define ptr @gep_inbounds_add_nuw(ptr %ptr, i64 %a, i64 %b) {
+; CHECK-LABEL: define ptr @gep_inbounds_add_nuw(
+; CHECK-SAME: ptr [[PTR:%.*]], i64 [[A:%.*]], i64 [[B:%.*]]) {
+; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i32, ptr [[PTR]], i64 [[A]]
+; CHECK-NEXT: [[GEP:%.*]] = getelementptr i32, ptr [[TMP1]], i64 [[B]]
+; CHECK-NEXT: ret ptr [[GEP]]
+;
+ %add = add nuw i64 %a, %b
+ %gep = getelementptr inbounds nuw i32, ptr %ptr, i64 %add
+ ret ptr %gep
+}
+
+; FIXME: Preserve "nusw nuw".
+define ptr @gep_inbounds_add_nusw_nuw(ptr %ptr, i64 %a, i64 %b) {
+; CHECK-LABEL: define ptr @gep_inbounds_add_nusw_nuw(
+; CHECK-SAME: ptr [[PTR:%.*]], i64 [[A:%.*]], i64 [[B:%.*]]) {
+; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i32, ptr [[PTR]], i64 [[A]]
+; CHECK-NEXT: [[GEP:%.*]] = getelementptr i32, ptr [[TMP1]], i64 [[B]]
+; CHECK-NEXT: ret ptr [[GEP]]
+;
+ %add = add nuw i64 %a, %b
+ %gep = getelementptr nusw nuw i32, ptr %ptr, i64 %add
+ ret ptr %gep
+}
+
+; FIXME: Preserve "nuw".
+define ptr @gep_add_nuw(ptr %ptr, i64 %a, i64 %b) {
+; CHECK-LABEL: define ptr @gep_add_nuw(
+; CHECK-SAME: ptr [[PTR:%.*]], i64 [[A:%.*]], i64 [[B:%.*]]) {
+; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i32, ptr [[PTR]], i64 [[A]]
+; CHECK-NEXT: [[GEP:%.*]] = getelementptr i32, ptr [[TMP1]], i64 [[B]]
+; CHECK-NEXT: ret ptr [[GEP]]
+;
+ %add = add nuw i64 %a, %b
+ %gep = getelementptr nuw i32, ptr %ptr, i64 %add
+ ret ptr %gep
+}
+
define ptr @gep_inbounds_add_nsw_nonneg(ptr %ptr, i64 %a, i64 %b) {
; CHECK-LABEL: define ptr @gep_inbounds_add_nsw_nonneg(
; CHECK-SAME: ptr [[PTR:%.*]], i64 [[A:%.*]], i64 [[B:%.*]]) {
@@ -219,6 +271,27 @@ define ptr @gep_inbounds_sext_add_nonneg(ptr %ptr, i32 %a) {
ret ptr %gep
}
+; FIXME: Could be optimized similar to gep_inbounds_sext_add_nonneg above
+; (difference is that we are using disjoint OR which is canonical form
+; of ADD with disjoint operands).
+define ptr @gep_inbounds_sext_addlike_nonneg(ptr %ptr, i32 %a) {
+; CHECK-LABEL: define ptr @gep_inbounds_sext_addlike_nonneg(
+; CHECK-SAME: ptr [[PTR:%.*]], i32 [[A:%.*]]) {
+; CHECK-NEXT: [[A_NNEG:%.*]] = icmp sgt i32 [[A]], -1
+; CHECK-NEXT: call void @llvm.assume(i1 [[A_NNEG]])
+; CHECK-NEXT: [[ADD:%.*]] = or disjoint i32 [[A]], 10
+; CHECK-NEXT: [[IDX:%.*]] = zext nneg i32 [[ADD]] to i64
+; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds nuw i32, ptr [[PTR]], i64 [[IDX]]
+; CHECK-NEXT: ret ptr [[GEP]]
+;
+ %a.nneg = icmp sgt i32 %a, -1
+ call void @llvm.assume(i1 %a.nneg)
+ %add = or disjoint i32 %a, 10
+ %idx = sext i32 %add to i64
+ %gep = getelementptr inbounds i32, ptr %ptr, i64 %idx
+ ret ptr %gep
+}
+
define ptr @gep_inbounds_sext_add_not_nonneg_1(ptr %ptr, i32 %a) {
; CHECK-LABEL: define ptr @gep_inbounds_sext_add_not_nonneg_1(
; CHECK-SAME: ptr [[PTR:%.*]], i32 [[A:%.*]]) {
diff --git a/llvm/test/Transforms/InstCombine/fabs-as-int.ll b/llvm/test/Transforms/InstCombine/fabs-as-int.ll
index 9d28cae8f04d..f0e83ca6302f 100644
--- a/llvm/test/Transforms/InstCombine/fabs-as-int.ll
+++ b/llvm/test/Transforms/InstCombine/fabs-as-int.ll
@@ -289,8 +289,8 @@ define i128 @fabs_as_int_ppc_fp128_f64_mask(ppc_fp128 %x) {
define i128 @fabs_as_int_ppc_fp128_f128_mask(ppc_fp128 %x) {
; CHECK-LABEL: define i128 @fabs_as_int_ppc_fp128_f128_mask
; CHECK-SAME: (ppc_fp128 [[X:%.*]]) {
-; CHECK-NEXT: [[BC:%.*]] = bitcast ppc_fp128 [[X]] to i128
-; CHECK-NEXT: [[AND:%.*]] = and i128 [[BC]], 170141183460469231731687303715884105727
+; CHECK-NEXT: [[TMP1:%.*]] = call ppc_fp128 @llvm.fabs.ppcf128(ppc_fp128 [[X]])
+; CHECK-NEXT: [[AND:%.*]] = bitcast ppc_fp128 [[TMP1]] to i128
; CHECK-NEXT: ret i128 [[AND]]
;
%bc = bitcast ppc_fp128 %x to i128
diff --git a/llvm/test/Transforms/InstCombine/fneg-as-int.ll b/llvm/test/Transforms/InstCombine/fneg-as-int.ll
index f8d88b4f238f..590aca687e5b 100644
--- a/llvm/test/Transforms/InstCombine/fneg-as-int.ll
+++ b/llvm/test/Transforms/InstCombine/fneg-as-int.ll
@@ -291,8 +291,8 @@ define i128 @fneg_as_int_ppc_fp128_f64_mask(ppc_fp128 %x) {
define i128 @fneg_as_int_ppc_fp128_f128_mask(ppc_fp128 %x) {
; CHECK-LABEL: define i128 @fneg_as_int_ppc_fp128_f128_mask
; CHECK-SAME: (ppc_fp128 [[X:%.*]]) {
-; CHECK-NEXT: [[BC:%.*]] = bitcast ppc_fp128 [[X]] to i128
-; CHECK-NEXT: [[XOR:%.*]] = xor i128 [[BC]], -170141183460469231731687303715884105728
+; CHECK-NEXT: [[TMP1:%.*]] = fneg ppc_fp128 [[X]]
+; CHECK-NEXT: [[XOR:%.*]] = bitcast ppc_fp128 [[TMP1]] to i128
; CHECK-NEXT: ret i128 [[XOR]]
;
%bc = bitcast ppc_fp128 %x to i128
diff --git a/llvm/test/Transforms/InstCombine/fneg-fabs-as-int.ll b/llvm/test/Transforms/InstCombine/fneg-fabs-as-int.ll
index 8b245bdd7929..a0894c3febc9 100644
--- a/llvm/test/Transforms/InstCombine/fneg-fabs-as-int.ll
+++ b/llvm/test/Transforms/InstCombine/fneg-fabs-as-int.ll
@@ -317,8 +317,9 @@ define i128 @fneg_fabs_as_int_ppc_fp128_f64_mask(ppc_fp128 %x) {
define i128 @fneg_fabs_as_int_ppc_fp128_f128_mask(ppc_fp128 %x) {
; CHECK-LABEL: define i128 @fneg_fabs_as_int_ppc_fp128_f128_mask
; CHECK-SAME: (ppc_fp128 [[X:%.*]]) {
-; CHECK-NEXT: [[BC:%.*]] = bitcast ppc_fp128 [[X]] to i128
-; CHECK-NEXT: [[OR:%.*]] = or i128 [[BC]], -170141183460469231731687303715884105728
+; CHECK-NEXT: [[TMP1:%.*]] = call ppc_fp128 @llvm.fabs.ppcf128(ppc_fp128 [[X]])
+; CHECK-NEXT: [[TMP2:%.*]] = fneg ppc_fp128 [[TMP1]]
+; CHECK-NEXT: [[OR:%.*]] = bitcast ppc_fp128 [[TMP2]] to i128
; CHECK-NEXT: ret i128 [[OR]]
;
%bc = bitcast ppc_fp128 %x to i128
diff --git a/llvm/test/Transforms/InstCombine/sincospi.ll b/llvm/test/Transforms/InstCombine/sincospi.ll
index b76ae2017114..14da03dff6f4 100644
--- a/llvm/test/Transforms/InstCombine/sincospi.ll
+++ b/llvm/test/Transforms/InstCombine/sincospi.ll
@@ -90,18 +90,14 @@ define float @test_instbased_f32_other_user(ptr %ptr) {
define float @test_constant_f32() {
; CHECK-FLOAT-IN-VEC-LABEL: @test_constant_f32(
-; CHECK-FLOAT-IN-VEC-NEXT: [[SINCOSPI:%.*]] = call <2 x float> @__sincospif_stret(float 1.000000e+00)
-; CHECK-FLOAT-IN-VEC-NEXT: [[SINPI:%.*]] = extractelement <2 x float> [[SINCOSPI]], i64 0
-; CHECK-FLOAT-IN-VEC-NEXT: [[COSPI:%.*]] = extractelement <2 x float> [[SINCOSPI]], i64 1
-; CHECK-FLOAT-IN-VEC-NEXT: [[COS:%.*]] = call float @__cospif(float 1.000000e+00) #[[ATTR0]]
+; CHECK-FLOAT-IN-VEC-NEXT: [[SINPI:%.*]] = call float @__sinpif(float 1.000000e+00) #[[ATTR0]]
+; CHECK-FLOAT-IN-VEC-NEXT: [[COSPI:%.*]] = call float @__cospif(float 1.000000e+00) #[[ATTR0]]
; CHECK-FLOAT-IN-VEC-NEXT: [[RES:%.*]] = fadd float [[SINPI]], [[COSPI]]
; CHECK-FLOAT-IN-VEC-NEXT: ret float [[RES]]
;
; CHECK-LABEL: @test_constant_f32(
-; CHECK-NEXT: [[SINCOSPI:%.*]] = call { float, float } @__sincospif_stret(float 1.000000e+00)
-; CHECK-NEXT: [[SINPI:%.*]] = extractvalue { float, float } [[SINCOSPI]], 0
-; CHECK-NEXT: [[COSPI:%.*]] = extractvalue { float, float } [[SINCOSPI]], 1
-; CHECK-NEXT: [[COS:%.*]] = call float @__cospif(float 1.000000e+00) #[[ATTR0]]
+; CHECK-NEXT: [[SINPI:%.*]] = call float @__sinpif(float 1.000000e+00) #[[ATTR0]]
+; CHECK-NEXT: [[COSPI:%.*]] = call float @__cospif(float 1.000000e+00) #[[ATTR0]]
; CHECK-NEXT: [[RES:%.*]] = fadd float [[SINPI]], [[COSPI]]
; CHECK-NEXT: ret float [[RES]]
;
@@ -172,18 +168,14 @@ define double @test_instbased_f64() {
define double @test_constant_f64() {
; CHECK-FLOAT-IN-VEC-LABEL: @test_constant_f64(
-; CHECK-FLOAT-IN-VEC-NEXT: [[SINCOSPI:%.*]] = call { double, double } @__sincospi_stret(double 1.000000e+00)
-; CHECK-FLOAT-IN-VEC-NEXT: [[SINPI:%.*]] = extractvalue { double, double } [[SINCOSPI]], 0
-; CHECK-FLOAT-IN-VEC-NEXT: [[COSPI:%.*]] = extractvalue { double, double } [[SINCOSPI]], 1
-; CHECK-FLOAT-IN-VEC-NEXT: [[COS:%.*]] = call double @__cospi(double 1.000000e+00) #[[ATTR0]]
+; CHECK-FLOAT-IN-VEC-NEXT: [[SINPI:%.*]] = call double @__sinpi(double 1.000000e+00) #[[ATTR0]]
+; CHECK-FLOAT-IN-VEC-NEXT: [[COSPI:%.*]] = call double @__cospi(double 1.000000e+00) #[[ATTR0]]
; CHECK-FLOAT-IN-VEC-NEXT: [[RES:%.*]] = fadd double [[SINPI]], [[COSPI]]
; CHECK-FLOAT-IN-VEC-NEXT: ret double [[RES]]
;
; CHECK-LABEL: @test_constant_f64(
-; CHECK-NEXT: [[SINCOSPI:%.*]] = call { double, double } @__sincospi_stret(double 1.000000e+00)
-; CHECK-NEXT: [[SINPI:%.*]] = extractvalue { double, double } [[SINCOSPI]], 0
-; CHECK-NEXT: [[COSPI:%.*]] = extractvalue { double, double } [[SINCOSPI]], 1
-; CHECK-NEXT: [[COS:%.*]] = call double @__cospi(double 1.000000e+00) #[[ATTR0]]
+; CHECK-NEXT: [[SINPI:%.*]] = call double @__sinpi(double 1.000000e+00) #[[ATTR0]]
+; CHECK-NEXT: [[COSPI:%.*]] = call double @__cospi(double 1.000000e+00) #[[ATTR0]]
; CHECK-NEXT: [[RES:%.*]] = fadd double [[SINPI]], [[COSPI]]
; CHECK-NEXT: ret double [[RES]]
;
diff --git a/llvm/test/Transforms/LICM/pr50367.ll b/llvm/test/Transforms/LICM/pr50367.ll
index a7cf21deff62..7fd176b6c6bb 100644
--- a/llvm/test/Transforms/LICM/pr50367.ll
+++ b/llvm/test/Transforms/LICM/pr50367.ll
@@ -2,7 +2,7 @@
; RUN: opt -S -passes='loop-mssa(licm)' < %s | FileCheck %s
@e = external dso_local global ptr, align 8
-define void @main(i1 %arg) {
+define void @main(i1 %arg, ptr %arg1) {
; CHECK-LABEL: @main(
; CHECK-NEXT: entry:
; CHECK-NEXT: br label [[LOOP1:%.*]]
@@ -11,8 +11,47 @@ define void @main(i1 %arg) {
; CHECK: loop2:
; CHECK-NEXT: br i1 [[ARG:%.*]], label [[LOOP2_LATCH:%.*]], label [[LOOP_LATCH:%.*]]
; CHECK: loop2.latch:
+; CHECK-NEXT: store i32 0, ptr [[ARG1:%.*]], align 4
; CHECK-NEXT: br label [[LOOP2]]
; CHECK: loop.latch:
+; CHECK-NEXT: store ptr null, ptr @e, align 8, !tbaa [[TBAA0:![0-9]+]]
+; CHECK-NEXT: [[PTR:%.*]] = load ptr, ptr @e, align 8, !tbaa [[TBAA0]]
+; CHECK-NEXT: store i32 0, ptr [[PTR]], align 4, !tbaa [[TBAA4:![0-9]+]]
+; CHECK-NEXT: br label [[LOOP1]]
+;
+entry:
+ br label %loop1
+
+loop1:
+ br label %loop2
+
+loop2:
+ br i1 %arg, label %loop2.latch, label %loop.latch
+
+loop2.latch:
+ store i32 0, ptr %arg1, align 4
+ br label %loop2
+
+loop.latch:
+ store ptr null, ptr @e, align 8, !tbaa !0
+ %ptr = load ptr, ptr @e, align 8, !tbaa !0
+ store i32 0, ptr %ptr, align 4, !tbaa !4
+ br label %loop1
+}
+
+define void @store_null(i1 %arg) {
+; CHECK-LABEL: @store_null(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br label [[LOOP1:%.*]]
+; CHECK: loop1:
+; CHECK-NEXT: br label [[LOOP2:%.*]]
+; CHECK: loop2:
+; CHECK-NEXT: br i1 [[ARG:%.*]], label [[LOOP2_LATCH:%.*]], label [[LOOP_LATCH:%.*]]
+; CHECK: loop2.latch:
+; CHECK-NEXT: store i32 0, ptr null, align 4
+; CHECK-NEXT: br label [[LOOP2]]
+; CHECK: loop.latch:
+; CHECK-NEXT: store i32 0, ptr null, align 4, !tbaa [[TBAA4]]
; CHECK-NEXT: br label [[LOOP1]]
;
entry:
diff --git a/llvm/test/Transforms/LICM/pr59324.ll b/llvm/test/Transforms/LICM/pr59324.ll
index b0ad60e65069..ec33a0f8ded0 100644
--- a/llvm/test/Transforms/LICM/pr59324.ll
+++ b/llvm/test/Transforms/LICM/pr59324.ll
@@ -6,7 +6,10 @@ define void @test(ptr %a) {
; CHECK-NEXT: entry:
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop:
-; CHECK-NEXT: [[V:%.*]] = load i32, ptr null, align 4
+; CHECK-NEXT: store ptr null, ptr null, align 8
+; CHECK-NEXT: [[P:%.*]] = load ptr, ptr null, align 8
+; CHECK-NEXT: [[V:%.*]] = load i32, ptr [[P]], align 4
+; CHECK-NEXT: store i32 [[V]], ptr [[A:%.*]], align 4
; CHECK-NEXT: br label [[LOOP]]
;
entry:
@@ -19,3 +22,25 @@ loop:
store i32 %v, ptr %a
br label %loop
}
+
+define void @test_inttoptr(ptr %a) {
+; CHECK-LABEL: @test_inttoptr(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br label [[LOOP:%.*]]
+; CHECK: loop:
+; CHECK-NEXT: store ptr null, ptr inttoptr (i64 128 to ptr), align 8
+; CHECK-NEXT: [[P:%.*]] = load ptr, ptr inttoptr (i64 128 to ptr), align 8
+; CHECK-NEXT: [[V:%.*]] = load i32, ptr [[P]], align 4
+; CHECK-NEXT: store i32 [[V]], ptr [[A:%.*]], align 4
+; CHECK-NEXT: br label [[LOOP]]
+;
+entry:
+ br label %loop
+
+loop:
+ store ptr null, ptr inttoptr (i64 128 to ptr)
+ %p = load ptr, ptr inttoptr (i64 128 to ptr)
+ %v = load i32, ptr %p
+ store i32 %v, ptr %a
+ br label %loop
+}
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse-output.ll b/llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse-output.ll
index f01aaa04606d..a28673cf8e55 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse-output.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse-output.ll
@@ -429,7 +429,249 @@ exit:
ret void
}
+define void @vector_reverse_irregular_type(ptr noalias %A, ptr noalias %B) {
+; RV64-LABEL: define void @vector_reverse_irregular_type(
+; RV64-SAME: ptr noalias [[A:%.*]], ptr noalias [[B:%.*]]) #[[ATTR0]] {
+; RV64-NEXT: [[ENTRY:.*]]:
+; RV64-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
+; RV64: [[VECTOR_PH]]:
+; RV64-NEXT: br label %[[VECTOR_BODY:.*]]
+; RV64: [[VECTOR_BODY]]:
+; RV64-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; RV64-NEXT: [[OFFSET_IDX:%.*]] = sub i64 1023, [[INDEX]]
+; RV64-NEXT: [[DEC_IV:%.*]] = add i64 [[OFFSET_IDX]], 0
+; RV64-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], -1
+; RV64-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], -2
+; RV64-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], -3
+; RV64-NEXT: [[IV_NEXT:%.*]] = add nsw i64 [[DEC_IV]], -1
+; RV64-NEXT: [[TMP5:%.*]] = add nsw i64 [[TMP1]], -1
+; RV64-NEXT: [[TMP6:%.*]] = add nsw i64 [[TMP2]], -1
+; RV64-NEXT: [[TMP7:%.*]] = add nsw i64 [[TMP3]], -1
+; RV64-NEXT: [[ARRAYIDX_B:%.*]] = getelementptr inbounds i7, ptr [[B]], i64 [[IV_NEXT]]
+; RV64-NEXT: [[TMP9:%.*]] = getelementptr inbounds i7, ptr [[B]], i64 [[TMP5]]
+; RV64-NEXT: [[TMP10:%.*]] = getelementptr inbounds i7, ptr [[B]], i64 [[TMP6]]
+; RV64-NEXT: [[TMP11:%.*]] = getelementptr inbounds i7, ptr [[B]], i64 [[TMP7]]
+; RV64-NEXT: [[TMP0:%.*]] = load i7, ptr [[ARRAYIDX_B]], align 1
+; RV64-NEXT: [[TMP13:%.*]] = load i7, ptr [[TMP9]], align 1
+; RV64-NEXT: [[TMP14:%.*]] = load i7, ptr [[TMP10]], align 1
+; RV64-NEXT: [[TMP15:%.*]] = load i7, ptr [[TMP11]], align 1
+; RV64-NEXT: [[TMP16:%.*]] = insertelement <4 x i7> poison, i7 [[TMP0]], i32 0
+; RV64-NEXT: [[TMP17:%.*]] = insertelement <4 x i7> [[TMP16]], i7 [[TMP13]], i32 1
+; RV64-NEXT: [[TMP18:%.*]] = insertelement <4 x i7> [[TMP17]], i7 [[TMP14]], i32 2
+; RV64-NEXT: [[TMP19:%.*]] = insertelement <4 x i7> [[TMP18]], i7 [[TMP15]], i32 3
+; RV64-NEXT: [[TMP20:%.*]] = add <4 x i7> [[TMP19]], splat (i7 1)
+; RV64-NEXT: [[TMP21:%.*]] = getelementptr inbounds i7, ptr [[A]], i64 [[IV_NEXT]]
+; RV64-NEXT: [[TMP22:%.*]] = getelementptr inbounds i7, ptr [[A]], i64 [[TMP5]]
+; RV64-NEXT: [[TMP23:%.*]] = getelementptr inbounds i7, ptr [[A]], i64 [[TMP6]]
+; RV64-NEXT: [[TMP24:%.*]] = getelementptr inbounds i7, ptr [[A]], i64 [[TMP7]]
+; RV64-NEXT: [[TMP25:%.*]] = extractelement <4 x i7> [[TMP20]], i32 0
+; RV64-NEXT: store i7 [[TMP25]], ptr [[TMP21]], align 1
+; RV64-NEXT: [[TMP26:%.*]] = extractelement <4 x i7> [[TMP20]], i32 1
+; RV64-NEXT: store i7 [[TMP26]], ptr [[TMP22]], align 1
+; RV64-NEXT: [[TMP27:%.*]] = extractelement <4 x i7> [[TMP20]], i32 2
+; RV64-NEXT: store i7 [[TMP27]], ptr [[TMP23]], align 1
+; RV64-NEXT: [[TMP28:%.*]] = extractelement <4 x i7> [[TMP20]], i32 3
+; RV64-NEXT: store i7 [[TMP28]], ptr [[TMP24]], align 1
+; RV64-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
+; RV64-NEXT: [[TMP29:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1020
+; RV64-NEXT: br i1 [[TMP29]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
+; RV64: [[MIDDLE_BLOCK]]:
+; RV64-NEXT: br i1 false, label %[[EXIT:.*]], label %[[SCALAR_PH]]
+; RV64: [[SCALAR_PH]]:
+; RV64-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 3, %[[MIDDLE_BLOCK]] ], [ 1023, %[[ENTRY]] ]
+; RV64-NEXT: br label %[[FOR_BODY:.*]]
+; RV64: [[FOR_BODY]]:
+; RV64-NEXT: [[DEC_IV1:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT1:%.*]], %[[FOR_BODY]] ]
+; RV64-NEXT: [[IV_NEXT1]] = add nsw i64 [[DEC_IV1]], -1
+; RV64-NEXT: [[ARRAYIDX_B1:%.*]] = getelementptr inbounds i7, ptr [[B]], i64 [[IV_NEXT1]]
+; RV64-NEXT: [[TMP30:%.*]] = load i7, ptr [[ARRAYIDX_B1]], align 1
+; RV64-NEXT: [[ADD:%.*]] = add i7 [[TMP30]], 1
+; RV64-NEXT: [[ARRAYIDX_A:%.*]] = getelementptr inbounds i7, ptr [[A]], i64 [[IV_NEXT1]]
+; RV64-NEXT: store i7 [[ADD]], ptr [[ARRAYIDX_A]], align 1
+; RV64-NEXT: [[CMP:%.*]] = icmp ugt i64 [[DEC_IV1]], 1
+; RV64-NEXT: br i1 [[CMP]], label %[[FOR_BODY]], label %[[EXIT]], !llvm.loop [[LOOP7:![0-9]+]]
+; RV64: [[EXIT]]:
+; RV64-NEXT: ret void
+;
+; RV32-LABEL: define void @vector_reverse_irregular_type(
+; RV32-SAME: ptr noalias [[A:%.*]], ptr noalias [[B:%.*]]) #[[ATTR0]] {
+; RV32-NEXT: [[ENTRY:.*]]:
+; RV32-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
+; RV32: [[VECTOR_PH]]:
+; RV32-NEXT: br label %[[VECTOR_BODY:.*]]
+; RV32: [[VECTOR_BODY]]:
+; RV32-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; RV32-NEXT: [[OFFSET_IDX:%.*]] = sub i64 1023, [[INDEX]]
+; RV32-NEXT: [[DEC_IV:%.*]] = add i64 [[OFFSET_IDX]], 0
+; RV32-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], -1
+; RV32-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], -2
+; RV32-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], -3
+; RV32-NEXT: [[IV_NEXT:%.*]] = add nsw i64 [[DEC_IV]], -1
+; RV32-NEXT: [[TMP5:%.*]] = add nsw i64 [[TMP1]], -1
+; RV32-NEXT: [[TMP6:%.*]] = add nsw i64 [[TMP2]], -1
+; RV32-NEXT: [[TMP7:%.*]] = add nsw i64 [[TMP3]], -1
+; RV32-NEXT: [[ARRAYIDX_B:%.*]] = getelementptr inbounds i7, ptr [[B]], i64 [[IV_NEXT]]
+; RV32-NEXT: [[TMP9:%.*]] = getelementptr inbounds i7, ptr [[B]], i64 [[TMP5]]
+; RV32-NEXT: [[TMP10:%.*]] = getelementptr inbounds i7, ptr [[B]], i64 [[TMP6]]
+; RV32-NEXT: [[TMP11:%.*]] = getelementptr inbounds i7, ptr [[B]], i64 [[TMP7]]
+; RV32-NEXT: [[TMP0:%.*]] = load i7, ptr [[ARRAYIDX_B]], align 1
+; RV32-NEXT: [[TMP13:%.*]] = load i7, ptr [[TMP9]], align 1
+; RV32-NEXT: [[TMP14:%.*]] = load i7, ptr [[TMP10]], align 1
+; RV32-NEXT: [[TMP15:%.*]] = load i7, ptr [[TMP11]], align 1
+; RV32-NEXT: [[TMP16:%.*]] = insertelement <4 x i7> poison, i7 [[TMP0]], i32 0
+; RV32-NEXT: [[TMP17:%.*]] = insertelement <4 x i7> [[TMP16]], i7 [[TMP13]], i32 1
+; RV32-NEXT: [[TMP18:%.*]] = insertelement <4 x i7> [[TMP17]], i7 [[TMP14]], i32 2
+; RV32-NEXT: [[TMP19:%.*]] = insertelement <4 x i7> [[TMP18]], i7 [[TMP15]], i32 3
+; RV32-NEXT: [[TMP20:%.*]] = add <4 x i7> [[TMP19]], splat (i7 1)
+; RV32-NEXT: [[TMP21:%.*]] = getelementptr inbounds i7, ptr [[A]], i64 [[IV_NEXT]]
+; RV32-NEXT: [[TMP22:%.*]] = getelementptr inbounds i7, ptr [[A]], i64 [[TMP5]]
+; RV32-NEXT: [[TMP23:%.*]] = getelementptr inbounds i7, ptr [[A]], i64 [[TMP6]]
+; RV32-NEXT: [[TMP24:%.*]] = getelementptr inbounds i7, ptr [[A]], i64 [[TMP7]]
+; RV32-NEXT: [[TMP25:%.*]] = extractelement <4 x i7> [[TMP20]], i32 0
+; RV32-NEXT: store i7 [[TMP25]], ptr [[TMP21]], align 1
+; RV32-NEXT: [[TMP26:%.*]] = extractelement <4 x i7> [[TMP20]], i32 1
+; RV32-NEXT: store i7 [[TMP26]], ptr [[TMP22]], align 1
+; RV32-NEXT: [[TMP27:%.*]] = extractelement <4 x i7> [[TMP20]], i32 2
+; RV32-NEXT: store i7 [[TMP27]], ptr [[TMP23]], align 1
+; RV32-NEXT: [[TMP28:%.*]] = extractelement <4 x i7> [[TMP20]], i32 3
+; RV32-NEXT: store i7 [[TMP28]], ptr [[TMP24]], align 1
+; RV32-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
+; RV32-NEXT: [[TMP29:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1020
+; RV32-NEXT: br i1 [[TMP29]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
+; RV32: [[MIDDLE_BLOCK]]:
+; RV32-NEXT: br i1 false, label %[[EXIT:.*]], label %[[SCALAR_PH]]
+; RV32: [[SCALAR_PH]]:
+; RV32-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 3, %[[MIDDLE_BLOCK]] ], [ 1023, %[[ENTRY]] ]
+; RV32-NEXT: br label %[[FOR_BODY:.*]]
+; RV32: [[FOR_BODY]]:
+; RV32-NEXT: [[DEC_IV1:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT1:%.*]], %[[FOR_BODY]] ]
+; RV32-NEXT: [[IV_NEXT1]] = add nsw i64 [[DEC_IV1]], -1
+; RV32-NEXT: [[ARRAYIDX_B1:%.*]] = getelementptr inbounds i7, ptr [[B]], i64 [[IV_NEXT1]]
+; RV32-NEXT: [[TMP30:%.*]] = load i7, ptr [[ARRAYIDX_B1]], align 1
+; RV32-NEXT: [[ADD:%.*]] = add i7 [[TMP30]], 1
+; RV32-NEXT: [[ARRAYIDX_A:%.*]] = getelementptr inbounds i7, ptr [[A]], i64 [[IV_NEXT1]]
+; RV32-NEXT: store i7 [[ADD]], ptr [[ARRAYIDX_A]], align 1
+; RV32-NEXT: [[CMP:%.*]] = icmp ugt i64 [[DEC_IV1]], 1
+; RV32-NEXT: br i1 [[CMP]], label %[[FOR_BODY]], label %[[EXIT]], !llvm.loop [[LOOP7:![0-9]+]]
+; RV32: [[EXIT]]:
+; RV32-NEXT: ret void
+;
+; RV64-UF2-LABEL: define void @vector_reverse_irregular_type(
+; RV64-UF2-SAME: ptr noalias [[A:%.*]], ptr noalias [[B:%.*]]) #[[ATTR0]] {
+; RV64-UF2-NEXT: [[ENTRY:.*]]:
+; RV64-UF2-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
+; RV64-UF2: [[VECTOR_PH]]:
+; RV64-UF2-NEXT: br label %[[VECTOR_BODY:.*]]
+; RV64-UF2: [[VECTOR_BODY]]:
+; RV64-UF2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; RV64-UF2-NEXT: [[OFFSET_IDX:%.*]] = sub i64 1023, [[INDEX]]
+; RV64-UF2-NEXT: [[TMP16:%.*]] = add i64 [[OFFSET_IDX]], 0
+; RV64-UF2-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], -1
+; RV64-UF2-NEXT: [[TMP17:%.*]] = add i64 [[OFFSET_IDX]], -2
+; RV64-UF2-NEXT: [[TMP24:%.*]] = add i64 [[OFFSET_IDX]], -3
+; RV64-UF2-NEXT: [[TMP25:%.*]] = add i64 [[OFFSET_IDX]], -4
+; RV64-UF2-NEXT: [[TMP42:%.*]] = add i64 [[OFFSET_IDX]], -5
+; RV64-UF2-NEXT: [[TMP43:%.*]] = add i64 [[OFFSET_IDX]], -6
+; RV64-UF2-NEXT: [[TMP50:%.*]] = add i64 [[OFFSET_IDX]], -7
+; RV64-UF2-NEXT: [[TMP1:%.*]] = add nsw i64 [[TMP16]], -1
+; RV64-UF2-NEXT: [[TMP2:%.*]] = add nsw i64 [[TMP0]], -1
+; RV64-UF2-NEXT: [[TMP51:%.*]] = add nsw i64 [[TMP17]], -1
+; RV64-UF2-NEXT: [[TMP11:%.*]] = add nsw i64 [[TMP24]], -1
+; RV64-UF2-NEXT: [[TMP59:%.*]] = add nsw i64 [[TMP25]], -1
+; RV64-UF2-NEXT: [[TMP13:%.*]] = add nsw i64 [[TMP42]], -1
+; RV64-UF2-NEXT: [[TMP14:%.*]] = add nsw i64 [[TMP43]], -1
+; RV64-UF2-NEXT: [[TMP15:%.*]] = add nsw i64 [[TMP50]], -1
+; RV64-UF2-NEXT: [[TMP3:%.*]] = getelementptr inbounds i7, ptr [[B]], i64 [[TMP1]]
+; RV64-UF2-NEXT: [[TMP4:%.*]] = getelementptr inbounds i7, ptr [[B]], i64 [[TMP2]]
+; RV64-UF2-NEXT: [[TMP18:%.*]] = getelementptr inbounds i7, ptr [[B]], i64 [[TMP51]]
+; RV64-UF2-NEXT: [[TMP19:%.*]] = getelementptr inbounds i7, ptr [[B]], i64 [[TMP11]]
+; RV64-UF2-NEXT: [[TMP20:%.*]] = getelementptr inbounds i7, ptr [[B]], i64 [[TMP59]]
+; RV64-UF2-NEXT: [[TMP21:%.*]] = getelementptr inbounds i7, ptr [[B]], i64 [[TMP13]]
+; RV64-UF2-NEXT: [[TMP22:%.*]] = getelementptr inbounds i7, ptr [[B]], i64 [[TMP14]]
+; RV64-UF2-NEXT: [[TMP23:%.*]] = getelementptr inbounds i7, ptr [[B]], i64 [[TMP15]]
+; RV64-UF2-NEXT: [[TMP5:%.*]] = load i7, ptr [[TMP3]], align 1
+; RV64-UF2-NEXT: [[TMP6:%.*]] = load i7, ptr [[TMP4]], align 1
+; RV64-UF2-NEXT: [[TMP26:%.*]] = load i7, ptr [[TMP18]], align 1
+; RV64-UF2-NEXT: [[TMP27:%.*]] = load i7, ptr [[TMP19]], align 1
+; RV64-UF2-NEXT: [[TMP28:%.*]] = insertelement <4 x i7> poison, i7 [[TMP5]], i32 0
+; RV64-UF2-NEXT: [[TMP29:%.*]] = insertelement <4 x i7> [[TMP28]], i7 [[TMP6]], i32 1
+; RV64-UF2-NEXT: [[TMP30:%.*]] = insertelement <4 x i7> [[TMP29]], i7 [[TMP26]], i32 2
+; RV64-UF2-NEXT: [[TMP31:%.*]] = insertelement <4 x i7> [[TMP30]], i7 [[TMP27]], i32 3
+; RV64-UF2-NEXT: [[TMP32:%.*]] = load i7, ptr [[TMP20]], align 1
+; RV64-UF2-NEXT: [[TMP33:%.*]] = load i7, ptr [[TMP21]], align 1
+; RV64-UF2-NEXT: [[TMP34:%.*]] = load i7, ptr [[TMP22]], align 1
+; RV64-UF2-NEXT: [[TMP35:%.*]] = load i7, ptr [[TMP23]], align 1
+; RV64-UF2-NEXT: [[TMP36:%.*]] = insertelement <4 x i7> poison, i7 [[TMP32]], i32 0
+; RV64-UF2-NEXT: [[TMP37:%.*]] = insertelement <4 x i7> [[TMP36]], i7 [[TMP33]], i32 1
+; RV64-UF2-NEXT: [[TMP38:%.*]] = insertelement <4 x i7> [[TMP37]], i7 [[TMP34]], i32 2
+; RV64-UF2-NEXT: [[TMP39:%.*]] = insertelement <4 x i7> [[TMP38]], i7 [[TMP35]], i32 3
+; RV64-UF2-NEXT: [[TMP40:%.*]] = add <4 x i7> [[TMP31]], splat (i7 1)
+; RV64-UF2-NEXT: [[TMP41:%.*]] = add <4 x i7> [[TMP39]], splat (i7 1)
+; RV64-UF2-NEXT: [[TMP9:%.*]] = getelementptr inbounds i7, ptr [[A]], i64 [[TMP1]]
+; RV64-UF2-NEXT: [[TMP10:%.*]] = getelementptr inbounds i7, ptr [[A]], i64 [[TMP2]]
+; RV64-UF2-NEXT: [[TMP44:%.*]] = getelementptr inbounds i7, ptr [[A]], i64 [[TMP51]]
+; RV64-UF2-NEXT: [[TMP45:%.*]] = getelementptr inbounds i7, ptr [[A]], i64 [[TMP11]]
+; RV64-UF2-NEXT: [[TMP46:%.*]] = getelementptr inbounds i7, ptr [[A]], i64 [[TMP59]]
+; RV64-UF2-NEXT: [[TMP47:%.*]] = getelementptr inbounds i7, ptr [[A]], i64 [[TMP13]]
+; RV64-UF2-NEXT: [[TMP48:%.*]] = getelementptr inbounds i7, ptr [[A]], i64 [[TMP14]]
+; RV64-UF2-NEXT: [[TMP49:%.*]] = getelementptr inbounds i7, ptr [[A]], i64 [[TMP15]]
+; RV64-UF2-NEXT: [[TMP7:%.*]] = extractelement <4 x i7> [[TMP40]], i32 0
+; RV64-UF2-NEXT: store i7 [[TMP7]], ptr [[TMP9]], align 1
+; RV64-UF2-NEXT: [[TMP8:%.*]] = extractelement <4 x i7> [[TMP40]], i32 1
+; RV64-UF2-NEXT: store i7 [[TMP8]], ptr [[TMP10]], align 1
+; RV64-UF2-NEXT: [[TMP52:%.*]] = extractelement <4 x i7> [[TMP40]], i32 2
+; RV64-UF2-NEXT: store i7 [[TMP52]], ptr [[TMP44]], align 1
+; RV64-UF2-NEXT: [[TMP53:%.*]] = extractelement <4 x i7> [[TMP40]], i32 3
+; RV64-UF2-NEXT: store i7 [[TMP53]], ptr [[TMP45]], align 1
+; RV64-UF2-NEXT: [[TMP54:%.*]] = extractelement <4 x i7> [[TMP41]], i32 0
+; RV64-UF2-NEXT: store i7 [[TMP54]], ptr [[TMP46]], align 1
+; RV64-UF2-NEXT: [[TMP55:%.*]] = extractelement <4 x i7> [[TMP41]], i32 1
+; RV64-UF2-NEXT: store i7 [[TMP55]], ptr [[TMP47]], align 1
+; RV64-UF2-NEXT: [[TMP56:%.*]] = extractelement <4 x i7> [[TMP41]], i32 2
+; RV64-UF2-NEXT: store i7 [[TMP56]], ptr [[TMP48]], align 1
+; RV64-UF2-NEXT: [[TMP57:%.*]] = extractelement <4 x i7> [[TMP41]], i32 3
+; RV64-UF2-NEXT: store i7 [[TMP57]], ptr [[TMP49]], align 1
+; RV64-UF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
+; RV64-UF2-NEXT: [[TMP58:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1016
+; RV64-UF2-NEXT: br i1 [[TMP58]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
+; RV64-UF2: [[MIDDLE_BLOCK]]:
+; RV64-UF2-NEXT: br i1 false, label %[[EXIT:.*]], label %[[SCALAR_PH]]
+; RV64-UF2: [[SCALAR_PH]]:
+; RV64-UF2-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 7, %[[MIDDLE_BLOCK]] ], [ 1023, %[[ENTRY]] ]
+; RV64-UF2-NEXT: br label %[[FOR_BODY:.*]]
+; RV64-UF2: [[FOR_BODY]]:
+; RV64-UF2-NEXT: [[DEC_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[FOR_BODY]] ]
+; RV64-UF2-NEXT: [[IV_NEXT]] = add nsw i64 [[DEC_IV]], -1
+; RV64-UF2-NEXT: [[ARRAYIDX_B:%.*]] = getelementptr inbounds i7, ptr [[B]], i64 [[IV_NEXT]]
+; RV64-UF2-NEXT: [[TMP12:%.*]] = load i7, ptr [[ARRAYIDX_B]], align 1
+; RV64-UF2-NEXT: [[ADD:%.*]] = add i7 [[TMP12]], 1
+; RV64-UF2-NEXT: [[ARRAYIDX_A:%.*]] = getelementptr inbounds i7, ptr [[A]], i64 [[IV_NEXT]]
+; RV64-UF2-NEXT: store i7 [[ADD]], ptr [[ARRAYIDX_A]], align 1
+; RV64-UF2-NEXT: [[CMP:%.*]] = icmp ugt i64 [[DEC_IV]], 1
+; RV64-UF2-NEXT: br i1 [[CMP]], label %[[FOR_BODY]], label %[[EXIT]], !llvm.loop [[LOOP7:![0-9]+]]
+; RV64-UF2: [[EXIT]]:
+; RV64-UF2-NEXT: ret void
+;
+entry:
+ br label %for.body
+
+for.body:
+ %dec.iv = phi i64 [ 1023, %entry ], [ %iv.next, %for.body ]
+ %iv.next = add nsw i64 %dec.iv, -1
+ %arrayidx.b = getelementptr inbounds i7, ptr %B, i64 %iv.next
+ %0 = load i7, ptr %arrayidx.b, align 1
+ %add = add i7 %0, 1
+ %arrayidx.a = getelementptr inbounds i7, ptr %A, i64 %iv.next
+ store i7 %add, ptr %arrayidx.a, align 1
+ %cmp = icmp ugt i64 %dec.iv, 1
+ br i1 %cmp, label %for.body, label %exit, !llvm.loop !4
+
+exit:
+ ret void
+}
+
!0 = distinct !{!0, !1, !2, !3}
!1 = !{!"llvm.loop.vectorize.width", i32 4}
!2 = !{!"llvm.loop.vectorize.scalable.enable", i1 true}
!3 = !{!"llvm.loop.vectorize.enable", i1 true}
+!4 = distinct !{!4, !1, !3}
diff --git a/llvm/test/Transforms/LoopVectorize/SystemZ/pr47665.ll b/llvm/test/Transforms/LoopVectorize/SystemZ/pr47665.ll
index bc0ccfb45c05..02a876a3fda6 100644
--- a/llvm/test/Transforms/LoopVectorize/SystemZ/pr47665.ll
+++ b/llvm/test/Transforms/LoopVectorize/SystemZ/pr47665.ll
@@ -7,112 +7,86 @@ define void @test(ptr %p, i40 %a) {
; CHECK-NEXT: entry:
; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
-; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <16 x i40> poison, i40 [[A]], i64 0
-; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <16 x i40> [[BROADCAST_SPLATINSERT1]], <16 x i40> poison, <16 x i32> zeroinitializer
-; CHECK-NEXT: [[TMP1:%.*]] = shl <16 x i40> [[BROADCAST_SPLAT2]], splat (i40 24)
-; CHECK-NEXT: [[TMP2:%.*]] = ashr <16 x i40> [[TMP1]], splat (i40 28)
-; CHECK-NEXT: [[TMP3:%.*]] = trunc <16 x i40> [[TMP2]] to <16 x i32>
-; CHECK-NEXT: [[TMP4:%.*]] = trunc <16 x i32> [[TMP3]] to <16 x i1>
-; CHECK-NEXT: [[TMP5:%.*]] = icmp eq <16 x i1> [[TMP4]], zeroinitializer
-; CHECK-NEXT: [[TMP6:%.*]] = icmp ult <16 x i1> zeroinitializer, [[TMP5]]
-; CHECK-NEXT: [[TMP7:%.*]] = or <16 x i1> [[TMP6]], splat (i1 true)
-; CHECK-NEXT: [[TMP8:%.*]] = icmp sgt <16 x i1> [[TMP7]], zeroinitializer
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: br i1 true, label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
; CHECK: pred.store.if:
-; CHECK-NEXT: [[TMP10:%.*]] = extractelement <16 x i1> [[TMP8]], i32 0
-; CHECK-NEXT: store i1 [[TMP10]], ptr [[P]], align 1
+; CHECK-NEXT: store i1 false, ptr [[P]], align 1
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE]]
; CHECK: pred.store.continue:
; CHECK-NEXT: br i1 true, label [[PRED_STORE_IF1:%.*]], label [[PRED_STORE_CONTINUE2:%.*]]
; CHECK: pred.store.if1:
-; CHECK-NEXT: [[TMP9:%.*]] = extractelement <16 x i1> [[TMP8]], i32 1
-; CHECK-NEXT: store i1 [[TMP9]], ptr [[P]], align 1
+; CHECK-NEXT: store i1 false, ptr [[P]], align 1
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE2]]
; CHECK: pred.store.continue2:
; CHECK-NEXT: br i1 true, label [[PRED_STORE_IF3:%.*]], label [[PRED_STORE_CONTINUE4:%.*]]
; CHECK: pred.store.if3:
-; CHECK-NEXT: [[TMP12:%.*]] = extractelement <16 x i1> [[TMP8]], i32 2
-; CHECK-NEXT: store i1 [[TMP12]], ptr [[P]], align 1
+; CHECK-NEXT: store i1 false, ptr [[P]], align 1
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE4]]
; CHECK: pred.store.continue4:
; CHECK-NEXT: br i1 true, label [[PRED_STORE_IF5:%.*]], label [[PRED_STORE_CONTINUE6:%.*]]
; CHECK: pred.store.if5:
-; CHECK-NEXT: [[TMP14:%.*]] = extractelement <16 x i1> [[TMP8]], i32 3
-; CHECK-NEXT: store i1 [[TMP14]], ptr [[P]], align 1
+; CHECK-NEXT: store i1 false, ptr [[P]], align 1
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE6]]
; CHECK: pred.store.continue6:
; CHECK-NEXT: br i1 true, label [[PRED_STORE_IF7:%.*]], label [[PRED_STORE_CONTINUE8:%.*]]
; CHECK: pred.store.if7:
-; CHECK-NEXT: [[TMP16:%.*]] = extractelement <16 x i1> [[TMP8]], i32 4
-; CHECK-NEXT: store i1 [[TMP16]], ptr [[P]], align 1
+; CHECK-NEXT: store i1 false, ptr [[P]], align 1
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE8]]
; CHECK: pred.store.continue8:
; CHECK-NEXT: br i1 true, label [[PRED_STORE_IF9:%.*]], label [[PRED_STORE_CONTINUE10:%.*]]
; CHECK: pred.store.if9:
-; CHECK-NEXT: [[TMP18:%.*]] = extractelement <16 x i1> [[TMP8]], i32 5
-; CHECK-NEXT: store i1 [[TMP18]], ptr [[P]], align 1
+; CHECK-NEXT: store i1 false, ptr [[P]], align 1
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE10]]
; CHECK: pred.store.continue10:
; CHECK-NEXT: br i1 true, label [[PRED_STORE_IF11:%.*]], label [[PRED_STORE_CONTINUE12:%.*]]
; CHECK: pred.store.if11:
-; CHECK-NEXT: [[TMP20:%.*]] = extractelement <16 x i1> [[TMP8]], i32 6
-; CHECK-NEXT: store i1 [[TMP20]], ptr [[P]], align 1
+; CHECK-NEXT: store i1 false, ptr [[P]], align 1
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE12]]
; CHECK: pred.store.continue12:
; CHECK-NEXT: br i1 true, label [[PRED_STORE_IF13:%.*]], label [[PRED_STORE_CONTINUE14:%.*]]
; CHECK: pred.store.if13:
-; CHECK-NEXT: [[TMP22:%.*]] = extractelement <16 x i1> [[TMP8]], i32 7
-; CHECK-NEXT: store i1 [[TMP22]], ptr [[P]], align 1
+; CHECK-NEXT: store i1 false, ptr [[P]], align 1
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE14]]
; CHECK: pred.store.continue14:
; CHECK-NEXT: br i1 true, label [[PRED_STORE_IF15:%.*]], label [[PRED_STORE_CONTINUE16:%.*]]
; CHECK: pred.store.if15:
-; CHECK-NEXT: [[TMP24:%.*]] = extractelement <16 x i1> [[TMP8]], i32 8
-; CHECK-NEXT: store i1 [[TMP24]], ptr [[P]], align 1
+; CHECK-NEXT: store i1 false, ptr [[P]], align 1
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE16]]
; CHECK: pred.store.continue16:
; CHECK-NEXT: br i1 true, label [[PRED_STORE_IF17:%.*]], label [[PRED_STORE_CONTINUE18:%.*]]
; CHECK: pred.store.if17:
-; CHECK-NEXT: [[TMP26:%.*]] = extractelement <16 x i1> [[TMP8]], i32 9
-; CHECK-NEXT: store i1 [[TMP26]], ptr [[P]], align 1
+; CHECK-NEXT: store i1 false, ptr [[P]], align 1
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE18]]
; CHECK: pred.store.continue18:
; CHECK-NEXT: br i1 false, label [[PRED_STORE_IF19:%.*]], label [[PRED_STORE_CONTINUE20:%.*]]
; CHECK: pred.store.if19:
-; CHECK-NEXT: [[TMP28:%.*]] = extractelement <16 x i1> [[TMP8]], i32 10
-; CHECK-NEXT: store i1 [[TMP28]], ptr [[P]], align 1
+; CHECK-NEXT: store i1 false, ptr [[P]], align 1
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE20]]
; CHECK: pred.store.continue20:
; CHECK-NEXT: br i1 false, label [[PRED_STORE_IF21:%.*]], label [[PRED_STORE_CONTINUE22:%.*]]
; CHECK: pred.store.if21:
-; CHECK-NEXT: [[TMP30:%.*]] = extractelement <16 x i1> [[TMP8]], i32 11
-; CHECK-NEXT: store i1 [[TMP30]], ptr [[P]], align 1
+; CHECK-NEXT: store i1 false, ptr [[P]], align 1
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE22]]
; CHECK: pred.store.continue22:
; CHECK-NEXT: br i1 false, label [[PRED_STORE_IF23:%.*]], label [[PRED_STORE_CONTINUE24:%.*]]
; CHECK: pred.store.if23:
-; CHECK-NEXT: [[TMP32:%.*]] = extractelement <16 x i1> [[TMP8]], i32 12
-; CHECK-NEXT: store i1 [[TMP32]], ptr [[P]], align 1
+; CHECK-NEXT: store i1 false, ptr [[P]], align 1
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE24]]
; CHECK: pred.store.continue24:
; CHECK-NEXT: br i1 false, label [[PRED_STORE_IF25:%.*]], label [[PRED_STORE_CONTINUE26:%.*]]
; CHECK: pred.store.if25:
-; CHECK-NEXT: [[TMP34:%.*]] = extractelement <16 x i1> [[TMP8]], i32 13
-; CHECK-NEXT: store i1 [[TMP34]], ptr [[P]], align 1
+; CHECK-NEXT: store i1 false, ptr [[P]], align 1
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE26]]
; CHECK: pred.store.continue26:
; CHECK-NEXT: br i1 false, label [[PRED_STORE_IF27:%.*]], label [[PRED_STORE_CONTINUE28:%.*]]
; CHECK: pred.store.if27:
-; CHECK-NEXT: [[TMP36:%.*]] = extractelement <16 x i1> [[TMP8]], i32 14
-; CHECK-NEXT: store i1 [[TMP36]], ptr [[P]], align 1
+; CHECK-NEXT: store i1 false, ptr [[P]], align 1
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE28]]
; CHECK: pred.store.continue28:
; CHECK-NEXT: br i1 false, label [[PRED_STORE_IF29:%.*]], label [[PRED_STORE_CONTINUE30:%.*]]
; CHECK: pred.store.if29:
-; CHECK-NEXT: [[TMP38:%.*]] = extractelement <16 x i1> [[TMP8]], i32 15
-; CHECK-NEXT: store i1 [[TMP38]], ptr [[P]], align 1
+; CHECK-NEXT: store i1 false, ptr [[P]], align 1
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE30]]
; CHECK: pred.store.continue30:
; CHECK-NEXT: br label [[MIDDLE_BLOCK:%.*]]
diff --git a/llvm/test/Transforms/LoopVectorize/X86/constant-fold.ll b/llvm/test/Transforms/LoopVectorize/X86/constant-fold.ll
index 83e2f84814ad..7d9ed7d6215c 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/constant-fold.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/constant-fold.ll
@@ -58,3 +58,212 @@ bb2:
bb3:
ret void
}
+
+; Test case for https://github.com/llvm/llvm-project/issues/131359.
+define void @redundant_or_1(ptr %dst, i1 %c.0, i1 %c.1) {
+; CHECK-LABEL: @redundant_or_1(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
+; CHECK: vector.ph:
+; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i1> poison, i1 [[C_0:%.*]], i64 0
+; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i1> [[BROADCAST_SPLATINSERT]], <4 x i1> poison, <4 x i32> zeroinitializer
+; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <4 x i1> poison, i1 [[C_1:%.*]], i64 0
+; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <4 x i1> [[BROADCAST_SPLATINSERT1]], <4 x i1> poison, <4 x i32> zeroinitializer
+; CHECK-NEXT: [[TMP0:%.*]] = xor <4 x i1> [[BROADCAST_SPLAT2]], splat (i1 true)
+; CHECK-NEXT: [[TMP4:%.*]] = select i1 true, <4 x i1> [[BROADCAST_SPLAT]], <4 x i1> zeroinitializer
+; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
+; CHECK: vector.body:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE8:%.*]] ]
+; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i8> [ <i8 0, i8 1, i8 2, i8 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_STORE_CONTINUE8]] ]
+; CHECK-NEXT: [[TMP1:%.*]] = icmp ule <4 x i8> [[VEC_IND]], splat (i8 2)
+; CHECK-NEXT: [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x i1> [[TMP0]], <4 x i1> zeroinitializer
+; CHECK-NEXT: [[TMP5:%.*]] = select <4 x i1> [[TMP2]], <4 x i1> [[TMP4]], <4 x i1> zeroinitializer
+; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x i1> [[TMP5]], i32 0
+; CHECK-NEXT: br i1 [[TMP6]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
+; CHECK: pred.store.if:
+; CHECK-NEXT: [[TMP7:%.*]] = add i32 [[INDEX]], 0
+; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[DST:%.*]], i32 [[TMP7]]
+; CHECK-NEXT: store i32 0, ptr [[TMP8]], align 4
+; CHECK-NEXT: br label [[PRED_STORE_CONTINUE]]
+; CHECK: pred.store.continue:
+; CHECK-NEXT: [[TMP9:%.*]] = extractelement <4 x i1> [[TMP5]], i32 1
+; CHECK-NEXT: br i1 [[TMP9]], label [[PRED_STORE_IF3:%.*]], label [[PRED_STORE_CONTINUE4:%.*]]
+; CHECK: pred.store.if3:
+; CHECK-NEXT: [[TMP10:%.*]] = add i32 [[INDEX]], 1
+; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP10]]
+; CHECK-NEXT: store i32 0, ptr [[TMP11]], align 4
+; CHECK-NEXT: br label [[PRED_STORE_CONTINUE4]]
+; CHECK: pred.store.continue4:
+; CHECK-NEXT: [[TMP12:%.*]] = extractelement <4 x i1> [[TMP5]], i32 2
+; CHECK-NEXT: br i1 [[TMP12]], label [[PRED_STORE_IF5:%.*]], label [[PRED_STORE_CONTINUE6:%.*]]
+; CHECK: pred.store.if5:
+; CHECK-NEXT: [[TMP13:%.*]] = add i32 [[INDEX]], 2
+; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP13]]
+; CHECK-NEXT: store i32 0, ptr [[TMP14]], align 4
+; CHECK-NEXT: br label [[PRED_STORE_CONTINUE6]]
+; CHECK: pred.store.continue6:
+; CHECK-NEXT: [[TMP15:%.*]] = extractelement <4 x i1> [[TMP5]], i32 3
+; CHECK-NEXT: br i1 [[TMP15]], label [[PRED_STORE_IF7:%.*]], label [[PRED_STORE_CONTINUE8]]
+; CHECK: pred.store.if7:
+; CHECK-NEXT: [[TMP16:%.*]] = add i32 [[INDEX]], 3
+; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP16]]
+; CHECK-NEXT: store i32 0, ptr [[TMP17]], align 4
+; CHECK-NEXT: br label [[PRED_STORE_CONTINUE8]]
+; CHECK: pred.store.continue8:
+; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i8> [[VEC_IND]], splat (i8 4)
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
+; CHECK-NEXT: br i1 true, label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
+; CHECK: middle.block:
+; CHECK-NEXT: br label [[EXIT:%.*]]
+; CHECK: scalar.ph:
+; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ]
+; CHECK-NEXT: br label [[LOOP_HEADER:%.*]]
+; CHECK: loop.header:
+; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ]
+; CHECK-NEXT: br i1 [[C_1]], label [[LOOP_LATCH]], label [[THEN_1:%.*]]
+; CHECK: then.1:
+; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[IV]], 2
+; CHECK-NEXT: [[OR:%.*]] = or i1 [[CMP]], true
+; CHECK-NEXT: [[COND:%.*]] = select i1 [[OR]], i1 [[C_0]], i1 false
+; CHECK-NEXT: br i1 [[COND]], label [[THEN_2:%.*]], label [[LOOP_LATCH]]
+; CHECK: then.2:
+; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[IV]]
+; CHECK-NEXT: store i32 0, ptr [[GEP]], align 4
+; CHECK-NEXT: br label [[LOOP_LATCH]]
+; CHECK: loop.latch:
+; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1
+; CHECK-NEXT: [[EC:%.*]] = icmp eq i32 [[IV_NEXT]], 3
+; CHECK-NEXT: br i1 [[EC]], label [[EXIT]], label [[LOOP_HEADER]], !llvm.loop [[LOOP4:![0-9]+]]
+; CHECK: exit:
+; CHECK-NEXT: ret void
+;
+entry:
+ br label %loop.header
+
+loop.header:
+ %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop.latch ]
+ br i1 %c.0, label %loop.latch, label %then.1
+
+then.1:
+ %cmp = icmp eq i32 %iv, 2
+ %or = or i1 %cmp, true
+ %cond = select i1 %or, i1 %c.1, i1 false
+ br i1 %cond, label %then.2, label %loop.latch
+
+then.2:
+ %gep = getelementptr inbounds i32, ptr %dst, i32 %iv
+ store i32 0, ptr %gep, align 4
+ br label %loop.latch
+
+loop.latch:
+ %iv.next = add nuw nsw i32 %iv, 1
+ %ec = icmp eq i32 %iv.next, 3
+ br i1 %ec, label %exit, label %loop.header
+
+exit:
+ ret void
+}
+
+define void @redundant_or_2(ptr %dst, i1 %c.0, i1 %c.1) {
+; CHECK-LABEL: @redundant_or_2(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
+; CHECK: vector.ph:
+; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i1> poison, i1 [[C_1:%.*]], i64 0
+; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i1> [[BROADCAST_SPLATINSERT]], <4 x i1> poison, <4 x i32> zeroinitializer
+; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <4 x i1> poison, i1 [[C_0:%.*]], i64 0
+; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <4 x i1> [[BROADCAST_SPLATINSERT1]], <4 x i1> poison, <4 x i32> zeroinitializer
+; CHECK-NEXT: [[TMP0:%.*]] = xor <4 x i1> [[BROADCAST_SPLAT2]], splat (i1 true)
+; CHECK-NEXT: [[TMP1:%.*]] = select i1 true, <4 x i1> [[BROADCAST_SPLAT]], <4 x i1> zeroinitializer
+; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
+; CHECK: vector.body:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE8:%.*]] ]
+; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i8> [ <i8 0, i8 1, i8 2, i8 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_STORE_CONTINUE8]] ]
+; CHECK-NEXT: [[TMP2:%.*]] = icmp ule <4 x i8> [[VEC_IND]], splat (i8 2)
+; CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> [[TMP2]], <4 x i1> [[TMP0]], <4 x i1> zeroinitializer
+; CHECK-NEXT: [[TMP4:%.*]] = select <4 x i1> [[TMP3]], <4 x i1> [[TMP1]], <4 x i1> zeroinitializer
+; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x i1> [[TMP4]], i32 0
+; CHECK-NEXT: br i1 [[TMP5]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
+; CHECK: pred.store.if:
+; CHECK-NEXT: [[TMP6:%.*]] = add i32 [[INDEX]], 0
+; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[DST:%.*]], i32 [[TMP6]]
+; CHECK-NEXT: store i32 0, ptr [[TMP7]], align 4
+; CHECK-NEXT: br label [[PRED_STORE_CONTINUE]]
+; CHECK: pred.store.continue:
+; CHECK-NEXT: [[TMP8:%.*]] = extractelement <4 x i1> [[TMP4]], i32 1
+; CHECK-NEXT: br i1 [[TMP8]], label [[PRED_STORE_IF3:%.*]], label [[PRED_STORE_CONTINUE4:%.*]]
+; CHECK: pred.store.if3:
+; CHECK-NEXT: [[TMP9:%.*]] = add i32 [[INDEX]], 1
+; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP9]]
+; CHECK-NEXT: store i32 0, ptr [[TMP10]], align 4
+; CHECK-NEXT: br label [[PRED_STORE_CONTINUE4]]
+; CHECK: pred.store.continue4:
+; CHECK-NEXT: [[TMP11:%.*]] = extractelement <4 x i1> [[TMP4]], i32 2
+; CHECK-NEXT: br i1 [[TMP11]], label [[PRED_STORE_IF5:%.*]], label [[PRED_STORE_CONTINUE6:%.*]]
+; CHECK: pred.store.if5:
+; CHECK-NEXT: [[TMP12:%.*]] = add i32 [[INDEX]], 2
+; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP12]]
+; CHECK-NEXT: store i32 0, ptr [[TMP13]], align 4
+; CHECK-NEXT: br label [[PRED_STORE_CONTINUE6]]
+; CHECK: pred.store.continue6:
+; CHECK-NEXT: [[TMP14:%.*]] = extractelement <4 x i1> [[TMP4]], i32 3
+; CHECK-NEXT: br i1 [[TMP14]], label [[PRED_STORE_IF7:%.*]], label [[PRED_STORE_CONTINUE8]]
+; CHECK: pred.store.if7:
+; CHECK-NEXT: [[TMP15:%.*]] = add i32 [[INDEX]], 3
+; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP15]]
+; CHECK-NEXT: store i32 0, ptr [[TMP16]], align 4
+; CHECK-NEXT: br label [[PRED_STORE_CONTINUE8]]
+; CHECK: pred.store.continue8:
+; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i8> [[VEC_IND]], splat (i8 4)
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
+; CHECK-NEXT: br i1 true, label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
+; CHECK: middle.block:
+; CHECK-NEXT: br label [[EXIT:%.*]]
+; CHECK: scalar.ph:
+; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ]
+; CHECK-NEXT: br label [[LOOP_HEADER:%.*]]
+; CHECK: loop.header:
+; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ]
+; CHECK-NEXT: br i1 [[C_0]], label [[LOOP_LATCH]], label [[THEN_1:%.*]]
+; CHECK: then.1:
+; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[IV]], 2
+; CHECK-NEXT: [[OR:%.*]] = or i1 true, [[CMP]]
+; CHECK-NEXT: [[COND:%.*]] = select i1 [[OR]], i1 [[C_1]], i1 false
+; CHECK-NEXT: br i1 [[COND]], label [[THEN_2:%.*]], label [[LOOP_LATCH]]
+; CHECK: then.2:
+; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[IV]]
+; CHECK-NEXT: store i32 0, ptr [[GEP]], align 4
+; CHECK-NEXT: br label [[LOOP_LATCH]]
+; CHECK: loop.latch:
+; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1
+; CHECK-NEXT: [[EC:%.*]] = icmp eq i32 [[IV_NEXT]], 3
+; CHECK-NEXT: br i1 [[EC]], label [[EXIT]], label [[LOOP_HEADER]], !llvm.loop [[LOOP6:![0-9]+]]
+; CHECK: exit:
+; CHECK-NEXT: ret void
+;
+entry:
+ br label %loop.header
+
+loop.header:
+ %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop.latch ]
+ br i1 %c.0, label %loop.latch, label %then.1
+
+then.1:
+ %cmp = icmp eq i32 %iv, 2
+ %or = or i1 true, %cmp
+ %cond = select i1 %or, i1 %c.1, i1 false
+ br i1 %cond, label %then.2, label %loop.latch
+
+then.2:
+ %gep = getelementptr inbounds i32, ptr %dst, i32 %iv
+ store i32 0, ptr %gep, align 4
+ br label %loop.latch
+
+loop.latch:
+ %iv.next = add nuw nsw i32 %iv, 1
+ %ec = icmp eq i32 %iv.next, 3
+ br i1 %ec, label %exit, label %loop.header
+
+exit:
+ ret void
+}
diff --git a/llvm/test/Transforms/ObjCARC/contract.ll b/llvm/test/Transforms/ObjCARC/contract.ll
index 70bd57a0c719..24f9a712ccd0 100644
--- a/llvm/test/Transforms/ObjCARC/contract.ll
+++ b/llvm/test/Transforms/ObjCARC/contract.ll
@@ -234,6 +234,22 @@ define void @test14(ptr %a, ptr %b) {
ret void
}
+define void @test15(ptr %x) {
+; CHECK-LABEL: define void @test15(
+; CHECK-SAME: ptr [[X:%.*]]) {
+; CHECK-NEXT: [[Y:%.*]] = getelementptr inbounds ptr, ptr [[X]], i32 0
+; CHECK-NEXT: [[V0:%.*]] = call ptr @llvm.objc.retain(ptr [[Y]]) #[[ATTR0:[0-9]+]]
+; CHECK-NEXT: call void @use_pointer(ptr [[V0]])
+; CHECK-NEXT: call void @use_pointer(ptr [[V0]])
+; CHECK-NEXT: ret void
+;
+ %y = getelementptr inbounds ptr, ptr %x, i32 0
+ %v0 = call ptr @llvm.objc.retain(ptr %y) nounwind
+ call void @use_pointer(ptr %x)
+ call void @use_pointer(ptr %y)
+ ret void
+}
+
declare void @llvm.objc.clang.arc.use(...) nounwind
declare void @llvm.objc.clang.arc.noop.use(...) nounwind
diff --git a/llvm/test/Transforms/SimplifyCFG/X86/switch_to_lookup_table.ll b/llvm/test/Transforms/SimplifyCFG/X86/switch_to_lookup_table.ll
index ffbacc1a8903..95bf296af9b0 100644
--- a/llvm/test/Transforms/SimplifyCFG/X86/switch_to_lookup_table.ll
+++ b/llvm/test/Transforms/SimplifyCFG/X86/switch_to_lookup_table.ll
@@ -1028,15 +1028,14 @@ define i32 @nodefaultwithholes(i32 %c) {
; CHECK-LABEL: @nodefaultwithholes(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = icmp ult i32 [[C:%.*]], 6
-; CHECK-NEXT: br i1 [[TMP0]], label [[SWITCH_HOLE_CHECK:%.*]], label [[SW_DEFAULT:%.*]]
-; CHECK: sw.default:
-; CHECK-NEXT: call void @exit(i32 1)
-; CHECK-NEXT: unreachable
-; CHECK: switch.hole_check:
; CHECK-NEXT: [[SWITCH_MASKINDEX:%.*]] = trunc i32 [[C]] to i8
; CHECK-NEXT: [[SWITCH_SHIFTED:%.*]] = lshr i8 47, [[SWITCH_MASKINDEX]]
; CHECK-NEXT: [[SWITCH_LOBIT:%.*]] = trunc i8 [[SWITCH_SHIFTED]] to i1
-; CHECK-NEXT: br i1 [[SWITCH_LOBIT]], label [[SWITCH_LOOKUP:%.*]], label [[SW_DEFAULT]]
+; CHECK-NEXT: [[OR_COND:%.*]] = select i1 [[TMP0]], i1 [[SWITCH_LOBIT]], i1 false
+; CHECK-NEXT: br i1 [[OR_COND]], label [[SWITCH_LOOKUP:%.*]], label [[SW_DEFAULT:%.*]]
+; CHECK: sw.default:
+; CHECK-NEXT: call void @exit(i32 1)
+; CHECK-NEXT: unreachable
; CHECK: switch.lookup:
; CHECK-NEXT: [[SWITCH_GEP:%.*]] = getelementptr inbounds [6 x i32], ptr @switch.table.nodefaultwithholes, i32 0, i32 [[C]]
; CHECK-NEXT: [[SWITCH_LOAD:%.*]] = load i32, ptr [[SWITCH_GEP]], align 4
diff --git a/llvm/test/Transforms/SimplifyCFG/X86/switch_to_lookup_table_big.ll b/llvm/test/Transforms/SimplifyCFG/X86/switch_to_lookup_table_big.ll
index 4ebf09ae3b12..fd6b21a7f9e6 100644
--- a/llvm/test/Transforms/SimplifyCFG/X86/switch_to_lookup_table_big.ll
+++ b/llvm/test/Transforms/SimplifyCFG/X86/switch_to_lookup_table_big.ll
@@ -143,17 +143,16 @@ define i32 @reachable_default_holes_0to31(i32 %x, i32 %y) {
; CHECK-LABEL: @reachable_default_holes_0to31(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = icmp ult i32 [[X:%.*]], 32
-; CHECK-NEXT: br i1 [[TMP0]], label [[SWITCH_HOLE_CHECK:%.*]], label [[RETURN:%.*]]
-; CHECK: switch.hole_check:
; CHECK-NEXT: [[SWITCH_SHIFTED:%.*]] = lshr i32 -277094665, [[X]]
; CHECK-NEXT: [[SWITCH_LOBIT:%.*]] = trunc i32 [[SWITCH_SHIFTED]] to i1
-; CHECK-NEXT: br i1 [[SWITCH_LOBIT]], label [[SWITCH_LOOKUP:%.*]], label [[RETURN]]
+; CHECK-NEXT: [[OR_COND:%.*]] = select i1 [[TMP0]], i1 [[SWITCH_LOBIT]], i1 false
+; CHECK-NEXT: br i1 [[OR_COND]], label [[SWITCH_LOOKUP:%.*]], label [[RETURN:%.*]]
; CHECK: switch.lookup:
; CHECK-NEXT: [[SWITCH_GEP:%.*]] = getelementptr inbounds [32 x i32], ptr @switch.table.reachable_default_holes_0to31, i32 0, i32 [[X]]
; CHECK-NEXT: [[SWITCH_LOAD:%.*]] = load i32, ptr [[SWITCH_GEP]], align 4
; CHECK-NEXT: br label [[RETURN]]
; CHECK: return:
-; CHECK-NEXT: [[RES:%.*]] = phi i32 [ [[SWITCH_LOAD]], [[SWITCH_LOOKUP]] ], [ [[Y:%.*]], [[SWITCH_HOLE_CHECK]] ], [ [[Y]], [[ENTRY:%.*]] ]
+; CHECK-NEXT: [[RES:%.*]] = phi i32 [ [[SWITCH_LOAD]], [[SWITCH_LOOKUP]] ], [ [[Y:%.*]], [[ENTRY:%.*]] ]
; CHECK-NEXT: ret i32 [[RES]]
;
entry:
diff --git a/llvm/test/Transforms/SimplifyCFG/fold-branch-to-common-dest.ll b/llvm/test/Transforms/SimplifyCFG/fold-branch-to-common-dest.ll
index 37f48a9a7e03..7b88ec338cf5 100644
--- a/llvm/test/Transforms/SimplifyCFG/fold-branch-to-common-dest.ll
+++ b/llvm/test/Transforms/SimplifyCFG/fold-branch-to-common-dest.ll
@@ -88,10 +88,9 @@ define void @one_pred_trunc_cond(i8 %v0, i8 %v1) {
; CHECK-LABEL: @one_pred_trunc_cond(
; CHECK-NEXT: pred:
; CHECK-NEXT: [[C0:%.*]] = icmp eq i8 [[V0:%.*]], 0
-; CHECK-NEXT: br i1 [[C0]], label [[DISPATCH:%.*]], label [[FINAL_RIGHT:%.*]]
-; CHECK: dispatch:
; CHECK-NEXT: [[C1:%.*]] = trunc i8 [[V1:%.*]] to i1
-; CHECK-NEXT: br i1 [[C1]], label [[FINAL_LEFT:%.*]], label [[FINAL_RIGHT]]
+; CHECK-NEXT: [[OR_COND:%.*]] = select i1 [[C0]], i1 [[C1]], i1 false
+; CHECK-NEXT: br i1 [[OR_COND]], label [[FINAL_LEFT:%.*]], label [[FINAL_RIGHT:%.*]]
; CHECK: common.ret:
; CHECK-NEXT: ret void
; CHECK: final_left:
diff --git a/llvm/unittests/ADT/APFloatTest.cpp b/llvm/unittests/ADT/APFloatTest.cpp
index 09356191345f..7a5fd83cd958 100644
--- a/llvm/unittests/ADT/APFloatTest.cpp
+++ b/llvm/unittests/ADT/APFloatTest.cpp
@@ -8347,4 +8347,13 @@ TEST(APFloatTest, AddOrSubtractSignificand) {
Helper::runTest(true, false, 3, 0x10001, false, 7, 0x100, false, 6, 0x1e00,
lfLessThanHalf);
}
+
+TEST(APFloatTest, hasSignBitInMSB) {
+ EXPECT_TRUE(APFloat::hasSignBitInMSB(APFloat::IEEEsingle()));
+ EXPECT_TRUE(APFloat::hasSignBitInMSB(APFloat::x87DoubleExtended()));
+ EXPECT_TRUE(APFloat::hasSignBitInMSB(APFloat::PPCDoubleDouble()));
+ EXPECT_TRUE(APFloat::hasSignBitInMSB(APFloat::IEEEquad()));
+ EXPECT_FALSE(APFloat::hasSignBitInMSB(APFloat::Float8E8M0FNU()));
+}
+
} // namespace
diff --git a/llvm/utils/TableGen/AsmMatcherEmitter.cpp b/llvm/utils/TableGen/AsmMatcherEmitter.cpp
index 24822c847046..c954163cdeb3 100644
--- a/llvm/utils/TableGen/AsmMatcherEmitter.cpp
+++ b/llvm/utils/TableGen/AsmMatcherEmitter.cpp
@@ -110,6 +110,7 @@
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/FormatVariadic.h"
#include "llvm/TableGen/Error.h"
#include "llvm/TableGen/Record.h"
#include "llvm/TableGen/StringMatcher.h"
diff --git a/llvm/utils/TableGen/Basic/SequenceToOffsetTable.h b/llvm/utils/TableGen/Basic/SequenceToOffsetTable.h
index 35a9abdc37c8..8da6fbef0672 100644
--- a/llvm/utils/TableGen/Basic/SequenceToOffsetTable.h
+++ b/llvm/utils/TableGen/Basic/SequenceToOffsetTable.h
@@ -15,15 +15,14 @@
#define LLVM_UTILS_TABLEGEN_BASIC_SEQUENCETOOFFSETTABLE_H
#include "llvm/ADT/StringExtras.h"
-#include "llvm/Support/CommandLine.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/TableGen/Main.h"
#include <algorithm>
#include <cassert>
#include <functional>
#include <map>
namespace llvm {
-extern cl::opt<bool> EmitLongStrLiterals;
inline void printChar(raw_ostream &OS, char C) {
unsigned char UC(C);
diff --git a/llvm/utils/TableGen/Basic/TableGen.cpp b/llvm/utils/TableGen/Basic/TableGen.cpp
index 80ac93f2b54f..edb779150069 100644
--- a/llvm/utils/TableGen/Basic/TableGen.cpp
+++ b/llvm/utils/TableGen/Basic/TableGen.cpp
@@ -26,15 +26,6 @@
using namespace llvm;
-namespace llvm {
-cl::opt<bool> EmitLongStrLiterals(
- "long-string-literals",
- cl::desc("when emitting large string tables, prefer string literals over "
- "comma-separated char literals. This can be a readability and "
- "compile-time performance win, but upsets some compilers"),
- cl::Hidden, cl::init(true));
-} // end namespace llvm
-
static cl::OptionCategory PrintEnumsCat("Options for -print-enums");
static cl::opt<std::string> Class("class",
cl::desc("Print Enum list for this class"),
diff --git a/llvm/utils/TableGen/SDNodeInfoEmitter.cpp b/llvm/utils/TableGen/SDNodeInfoEmitter.cpp
index 63ee0deb8711..64f03dae83e7 100644
--- a/llvm/utils/TableGen/SDNodeInfoEmitter.cpp
+++ b/llvm/utils/TableGen/SDNodeInfoEmitter.cpp
@@ -9,6 +9,7 @@
#include "Basic/SequenceToOffsetTable.h"
#include "Common/CodeGenDAGPatterns.h" // For SDNodeInfo.
#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/FormatVariadic.h"
#include "llvm/TableGen/Error.h"
#include "llvm/TableGen/StringToOffsetTable.h"
#include "llvm/TableGen/TableGenBackend.h"
diff --git a/llvm/utils/gn/secondary/clang-tools-extra/clangd/test/BUILD.gn b/llvm/utils/gn/secondary/clang-tools-extra/clangd/test/BUILD.gn
index 9d42409f1973..d40ce6424fe8 100644
--- a/llvm/utils/gn/secondary/clang-tools-extra/clangd/test/BUILD.gn
+++ b/llvm/utils/gn/secondary/clang-tools-extra/clangd/test/BUILD.gn
@@ -35,6 +35,7 @@ write_lit_config("lit_site_cfg") {
rebase_path("//clang-tools-extra/clangd/test"),
"CURRENT_TOOLS_DIR=",
+ "CLANGD_BUILD_DEXP=1",
"CLANGD_ENABLE_REMOTE=0",
"CLANGD_TIDY_CHECKS=1",
"LLVM_HOST_TRIPLE=$llvm_current_triple",
diff --git a/llvm/utils/gn/secondary/llvm/lib/TableGen/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/TableGen/BUILD.gn
index d90df7bc0e57..b40fdf154b01 100644
--- a/llvm/utils/gn/secondary/llvm/lib/TableGen/BUILD.gn
+++ b/llvm/utils/gn/secondary/llvm/lib/TableGen/BUILD.gn
@@ -10,6 +10,7 @@ static_library("TableGen") {
"Record.cpp",
"SetTheory.cpp",
"StringMatcher.cpp",
+ "StringToOffsetTable.cpp",
"TGLexer.cpp",
"TGParser.cpp",
"TGTimer.cpp",
diff --git a/mlir/docs/DefiningDialects/Operations.md b/mlir/docs/DefiningDialects/Operations.md
index fafda816a388..88d58e0a1efb 100644
--- a/mlir/docs/DefiningDialects/Operations.md
+++ b/mlir/docs/DefiningDialects/Operations.md
@@ -1756,6 +1756,23 @@ that it has a value within the valid range of the enum. If their
wrapper attribute instead of using a bare signless integer attribute
for storage.
+### Enum properties
+
+Enums can be wrapped in properties so that they can be stored inline.
+This causes a value of the enum's C++ class to become a member of the operation's
+property struct and for the operation's verifier to check that the enum's value
+is a valid value for the enum.
+
+The basic wrapper is `EnumProp`, which simply takes an `EnumInfo`.
+
+A less ambiguous syntax, namely putting a mnemonic and `<>`s surrounding
+the enum is generated with `NamedEnumProp`, which takes a `*EnumInfo`
+and a mnemonic string, which becomes part of the property's syntax.
+
+Both of these `EnumProp` types have a `*EnumPropWithAttrForm`, which allows for
+transparently upgrading from `EnumAttr`s and optionally retaining those
+attributes in the generic form.
+
## Debugging Tips
### Run `mlir-tblgen` to see the generated content
diff --git a/mlir/include/mlir/Dialect/LLVMIR/LLVMEnums.td b/mlir/include/mlir/Dialect/LLVMIR/LLVMEnums.td
index a9de78780645..34a30a00790e 100644
--- a/mlir/include/mlir/Dialect/LLVMIR/LLVMEnums.td
+++ b/mlir/include/mlir/Dialect/LLVMIR/LLVMEnums.td
@@ -485,17 +485,16 @@ def DISubprogramFlags : I32BitEnumAttr<
// IntegerOverflowFlags
//===----------------------------------------------------------------------===//
-def IOFnone : I32BitEnumAttrCaseNone<"none">;
-def IOFnsw : I32BitEnumAttrCaseBit<"nsw", 0>;
-def IOFnuw : I32BitEnumAttrCaseBit<"nuw", 1>;
+def IOFnone : I32BitEnumCaseNone<"none">;
+def IOFnsw : I32BitEnumCaseBit<"nsw", 0>;
+def IOFnuw : I32BitEnumCaseBit<"nuw", 1>;
-def IntegerOverflowFlags : I32BitEnumAttr<
+def IntegerOverflowFlags : I32BitEnum<
"IntegerOverflowFlags",
"LLVM integer overflow flags",
[IOFnone, IOFnsw, IOFnuw]> {
let separator = ", ";
let cppNamespace = "::mlir::LLVM";
- let genSpecializedAttr = 0;
let printBitEnumPrimaryGroups = 1;
}
@@ -504,6 +503,11 @@ def LLVM_IntegerOverflowFlagsAttr :
let assemblyFormat = "`<` $value `>`";
}
+def LLVM_IntegerOverflowFlagsProp :
+ NamedEnumPropWithAttrForm<IntegerOverflowFlags, "overflow", LLVM_IntegerOverflowFlagsAttr> {
+ let defaultValue = enum.cppType # "::" # "none";
+}
+
//===----------------------------------------------------------------------===//
// FastmathFlags
//===----------------------------------------------------------------------===//
diff --git a/mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td b/mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td
index 7ffa880dc8da..e89e78aec714 100644
--- a/mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td
+++ b/mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td
@@ -60,7 +60,7 @@ class LLVM_IntArithmeticOpWithOverflowFlag<string mnemonic, string instName,
list<Trait> traits = []> :
LLVM_ArithmeticOpBase<AnySignlessInteger, mnemonic, instName,
!listconcat([DeclareOpInterfaceMethods<IntegerOverflowFlagsInterface>], traits)> {
- dag iofArg = (ins EnumProp<"IntegerOverflowFlags", "", "IntegerOverflowFlags::none">:$overflowFlags);
+ dag iofArg = (ins LLVM_IntegerOverflowFlagsProp:$overflowFlags);
let arguments = !con(commonArgs, iofArg);
string mlirBuilder = [{
@@ -69,7 +69,7 @@ class LLVM_IntArithmeticOpWithOverflowFlag<string mnemonic, string instName,
$res = op;
}];
let assemblyFormat = [{
- $lhs `,` $rhs `` custom<OverflowFlags>($overflowFlags) attr-dict `:` type($res)
+ $lhs `,` $rhs ($overflowFlags^)? attr-dict `:` type($res)
}];
string llvmBuilder =
"$res = builder.Create" # instName #
@@ -563,10 +563,10 @@ class LLVM_CastOpWithOverflowFlag<string mnemonic, string instName, Type type,
Type resultType, list<Trait> traits = []> :
LLVM_Op<mnemonic, !listconcat([Pure], [DeclareOpInterfaceMethods<IntegerOverflowFlagsInterface>], traits)>,
LLVM_Builder<"$res = builder.Create" # instName # "($arg, $_resultType, /*Name=*/\"\", op.hasNoUnsignedWrap(), op.hasNoSignedWrap());"> {
- let arguments = (ins type:$arg, EnumProp<"IntegerOverflowFlags", "", "IntegerOverflowFlags::none">:$overflowFlags);
+ let arguments = (ins type:$arg, LLVM_IntegerOverflowFlagsProp:$overflowFlags);
let results = (outs resultType:$res);
let builders = [LLVM_OneResultOpBuilder];
- let assemblyFormat = "$arg `` custom<OverflowFlags>($overflowFlags) attr-dict `:` type($arg) `to` type($res)";
+ let assemblyFormat = "$arg ($overflowFlags^)? attr-dict `:` type($arg) `to` type($res)";
string llvmInstName = instName;
string mlirBuilder = [{
auto op = $_builder.create<$_qualCppClassName>(
diff --git a/mlir/include/mlir/IR/EnumAttr.td b/mlir/include/mlir/IR/EnumAttr.td
index 931126a155fb..3f7f747ac20d 100644
--- a/mlir/include/mlir/IR/EnumAttr.td
+++ b/mlir/include/mlir/IR/EnumAttr.td
@@ -10,6 +10,7 @@
#define ENUMATTR_TD
include "mlir/IR/AttrTypeBase.td"
+include "mlir/IR/Properties.td"
//===----------------------------------------------------------------------===//
// Enum attribute kinds
@@ -552,6 +553,141 @@ class EnumAttr<Dialect dialect, EnumInfo enumInfo, string name = "",
let assemblyFormat = "$value";
}
+// A property wrapping by a C++ enum. This class will automatically create bytecode
+// serialization logic for the given enum, as well as arranging for parser and
+// printer calls.
+class EnumProp<EnumInfo enumInfo> : Property<enumInfo.cppType, enumInfo.summary> {
+ EnumInfo enum = enumInfo;
+
+ let description = enum.description;
+ let predicate = !if(
+ !isa<BitEnumBase>(enum),
+ CPred<"(static_cast<" # enum.underlyingType # ">($_self) & ~" # !cast<BitEnumBase>(enum).validBits # ") == 0">,
+ Or<!foreach(case, enum.enumerants, CPred<"$_self == " # enum.cppType # "::" # case.symbol>)>);
+
+ let convertFromAttribute = [{
+ auto intAttr = ::mlir::dyn_cast_if_present<::mlir::IntegerAttr>($_attr);
+ if (!intAttr) {
+ return $_diag() << "expected IntegerAttr storage for }] #
+ enum.cppType # [{";
+ }
+ $_storage = static_cast<}] # enum.cppType # [{>(intAttr.getValue().getZExtValue());
+ return ::mlir::success();
+ }];
+
+ let convertToAttribute = [{
+ return ::mlir::IntegerAttr::get(::mlir::IntegerType::get($_ctxt, }] # enum.bitwidth
+ # [{), static_cast<}] # enum.underlyingType #[{>($_storage));
+ }];
+
+ let writeToMlirBytecode = [{
+ $_writer.writeVarInt(static_cast<uint64_t>($_storage));
+ }];
+
+ let readFromMlirBytecode = [{
+ uint64_t rawValue;
+ if (::mlir::failed($_reader.readVarInt(rawValue)))
+ return ::mlir::failure();
+ if (rawValue > std::numeric_limits<}] # enum.underlyingType # [{>::max())
+ return ::mlir::failure();
+ $_storage = static_cast<}] # enum.cppType # [{>(rawValue);
+ }];
+
+ let optionalParser = [{
+ auto value = ::mlir::FieldParser<std::optional<}] # enum.cppType # [{>>::parse($_parser);
+ if (::mlir::failed(value))
+ return ::mlir::failure();
+ if (!(value->has_value()))
+ return std::nullopt;
+ $_storage = std::move(**value);
+ }];
+}
+
+// Enum property that can have been (or, if `storeInCustomAttribute` is true, will also
+// be stored as) an attribute, in addition to being stored as an integer attribute.
+class EnumPropWithAttrForm<EnumInfo enumInfo, Attr attributeForm>
+ : EnumProp<enumInfo> {
+ Attr attrForm = attributeForm;
+ bit storeInCustomAttribute = 0;
+
+ let convertFromAttribute = [{
+ auto customAttr = ::mlir::dyn_cast_if_present<}]
+ # attrForm.storageType # [{>($_attr);
+ if (customAttr) {
+ $_storage = customAttr.getValue();
+ return ::mlir::success();
+ }
+ auto intAttr = ::mlir::dyn_cast_if_present<::mlir::IntegerAttr>($_attr);
+ if (!intAttr) {
+ return $_diag() << "expected }] # attrForm.storageType
+ # [{ or IntegerAttr storage for }] # enum.cppType # [{";
+ }
+ $_storage = static_cast<}] # enum.cppType # [{>(intAttr.getValue().getZExtValue());
+ return ::mlir::success();
+ }];
+
+ let convertToAttribute = !if(storeInCustomAttribute, [{
+ return }] # attrForm.storageType # [{::get($_ctxt, $_storage);
+ }], [{
+ return ::mlir::IntegerAttr::get(::mlir::IntegerType::get($_ctxt, }] # enumInfo.bitwidth
+ # [{), static_cast<}] # enum.underlyingType #[{>($_storage));
+ }]);
+}
+
+class _namedEnumPropFields<string cppType, string mnemonic> {
+ code parser = [{
+ if ($_parser.parseKeyword("}] # mnemonic # [{")
+ || $_parser.parseLess()) {
+ return ::mlir::failure();
+ }
+ auto parseRes = ::mlir::FieldParser<}] # cppType # [{>::parse($_parser);
+ if (::mlir::failed(parseRes) ||
+ ::mlir::failed($_parser.parseGreater())) {
+ return ::mlir::failure();
+ }
+ $_storage = *parseRes;
+ }];
+
+ code optionalParser = [{
+ if ($_parser.parseOptionalKeyword("}] # mnemonic # [{")) {
+ return std::nullopt;
+ }
+ if ($_parser.parseLess()) {
+ return ::mlir::failure();
+ }
+ auto parseRes = ::mlir::FieldParser<}] # cppType # [{>::parse($_parser);
+ if (::mlir::failed(parseRes) ||
+ ::mlir::failed($_parser.parseGreater())) {
+ return ::mlir::failure();
+ }
+ $_storage = *parseRes;
+ }];
+
+ code printer = [{
+ $_printer << "}] # mnemonic # [{<" << $_storage << ">";
+ }];
+}
+
+// An EnumProp which, when printed, is surrounded by mnemonic<>.
+// For example, if the enum can be a, b, or c, and the mnemonic is foo,
+// the format of this property will be "foo<a>", "foo<b>", or "foo<c>".
+class NamedEnumProp<EnumInfo enumInfo, string name>
+ : EnumProp<enumInfo> {
+ string mnemonic = name;
+ let parser = _namedEnumPropFields<enum.cppType, mnemonic>.parser;
+ let optionalParser = _namedEnumPropFields<enum.cppType, mnemonic>.optionalParser;
+ let printer = _namedEnumPropFields<enum.cppType, mnemonic>.printer;
+}
+
+// A `NamedEnumProp` with an attribute form as in `EnumPropWithAttrForm`.
+class NamedEnumPropWithAttrForm<EnumInfo enumInfo, string name, Attr attributeForm>
+ : EnumPropWithAttrForm<enumInfo, attributeForm> {
+ string mnemonic = name;
+ let parser = _namedEnumPropFields<enum.cppType, mnemonic>.parser;
+ let optionalParser = _namedEnumPropFields<enum.cppType, mnemonic>.optionalParser;
+ let printer = _namedEnumPropFields<enumInfo.cppType, mnemonic>.printer;
+}
+
class _symbolToValue<EnumInfo enumInfo, string case> {
defvar cases =
!filter(iter, enumInfo.enumerants, !eq(iter.str, case));
diff --git a/mlir/include/mlir/IR/Properties.td b/mlir/include/mlir/IR/Properties.td
index 8bd834379040..739df03c7ef2 100644
--- a/mlir/include/mlir/IR/Properties.td
+++ b/mlir/include/mlir/IR/Properties.td
@@ -239,25 +239,6 @@ def I64Prop : IntProp<"int64_t">;
def I32Property : IntProp<"int32_t">, Deprecated<"moved to shorter name I32Prop">;
def I64Property : IntProp<"int64_t">, Deprecated<"moved to shorter name I64Prop">;
-class EnumProp<string storageTypeParam, string desc = "", string default = ""> :
- Property<storageTypeParam, desc> {
- // TODO: implement predicate for enum validity.
- let writeToMlirBytecode = [{
- $_writer.writeVarInt(static_cast<uint64_t>($_storage));
- }];
- let readFromMlirBytecode = [{
- uint64_t val;
- if (failed($_reader.readVarInt(val)))
- return ::mlir::failure();
- $_storage = static_cast<}] # storageTypeParam # [{>(val);
- }];
- let defaultValue = default;
-}
-
-class EnumProperty<string storageTypeParam, string desc = "", string default = ""> :
- EnumProp<storageTypeParam, desc, default>,
- Deprecated<"moved to shorter name EnumProp">;
-
// Note: only a class so we can deprecate the old name
class _cls_StringProp : Property<"std::string", "string"> {
let interfaceType = "::llvm::StringRef";
diff --git a/mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp b/mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp
index 4891dab3aa1d..c6c695b442b4 100644
--- a/mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp
+++ b/mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp
@@ -136,9 +136,13 @@ struct GPUShuffleOpLowering : public ConvertOpToLLVMPattern<gpu::ShuffleOp> {
matchAndRewrite(gpu::ShuffleOp op, OpAdaptor adaptor,
ConversionPatternRewriter &rewriter) const override {
Location loc = op->getLoc();
+ Value initShflValue = adaptor.getValue();
+ Type shflType = initShflValue.getType();
// TODO: Add support for non 32-bit shuffle values.
- if (adaptor.getValue().getType().getIntOrFloatBitWidth() != 32)
- return failure();
+ if (!shflType.isIntOrFloat() || shflType.getIntOrFloatBitWidth() != 32)
+ return rewriter.notifyMatchFailure(
+ op, "only 32-bit int/float types are supported");
+
const unsigned indexBitwidth = getTypeConverter()->getIndexTypeBitwidth();
Value srcLaneId = getLaneId(rewriter, loc, indexBitwidth);
@@ -175,16 +179,14 @@ struct GPUShuffleOpLowering : public ConvertOpToLLVMPattern<gpu::ShuffleOp> {
Value two = rewriter.create<LLVM::ConstantOp>(loc, int32Type, 2);
Value dwordAlignedDstLane =
rewriter.create<LLVM::ShlOp>(loc, int32Type, selectDstLane, two);
- Value initShflValue = adaptor.getValue();
- if (adaptor.getValue().getType().isF32()) {
+ if (shflType.isF32()) {
initShflValue =
rewriter.create<LLVM::BitcastOp>(loc, int32Type, initShflValue);
}
Value shflValue = rewriter.create<ROCDL::DsBpermuteOp>(
loc, int32Type, dwordAlignedDstLane, initShflValue);
- if (adaptor.getValue().getType().isF32()) {
- shflValue = rewriter.create<LLVM::BitcastOp>(
- loc, adaptor.getValue().getType(), shflValue);
+ if (shflType.isF32()) {
+ shflValue = rewriter.create<LLVM::BitcastOp>(loc, shflType, shflValue);
}
rewriter.replaceOp(op, {shflValue, isActiveSrcLane});
return success();
diff --git a/mlir/lib/Dialect/GPU/Transforms/ShuffleRewriter.cpp b/mlir/lib/Dialect/GPU/Transforms/ShuffleRewriter.cpp
index 4bd4da25f6e5..9f2900214e8b 100644
--- a/mlir/lib/Dialect/GPU/Transforms/ShuffleRewriter.cpp
+++ b/mlir/lib/Dialect/GPU/Transforms/ShuffleRewriter.cpp
@@ -40,8 +40,9 @@ struct GpuShuffleRewriter : public OpRewritePattern<gpu::ShuffleOp> {
auto i64 = rewriter.getI64Type();
// If the type of the value is either i32 or f32, the op is already valid.
- if (valueType.getIntOrFloatBitWidth() == 32)
- return failure();
+ if (!valueType.isIntOrFloat() || valueType.getIntOrFloatBitWidth() != 64)
+ return rewriter.notifyMatchFailure(
+ op, "only 64-bit int/float types are supported");
Value lo, hi;
diff --git a/mlir/lib/Dialect/LLVMIR/IR/LLVMDialect.cpp b/mlir/lib/Dialect/LLVMIR/IR/LLVMDialect.cpp
index 28d2019b225c..d1d00ca9681e 100644
--- a/mlir/lib/Dialect/LLVMIR/IR/LLVMDialect.cpp
+++ b/mlir/lib/Dialect/LLVMIR/IR/LLVMDialect.cpp
@@ -50,71 +50,6 @@ using mlir::LLVM::tailcallkind::getMaxEnumValForTailCallKind;
#include "mlir/Dialect/LLVMIR/LLVMOpsDialect.cpp.inc"
//===----------------------------------------------------------------------===//
-// Property Helpers
-//===----------------------------------------------------------------------===//
-
-//===----------------------------------------------------------------------===//
-// IntegerOverflowFlags
-//===----------------------------------------------------------------------===//
-
-namespace mlir {
-static Attribute convertToAttribute(MLIRContext *ctx,
- IntegerOverflowFlags flags) {
- return IntegerOverflowFlagsAttr::get(ctx, flags);
-}
-
-static LogicalResult
-convertFromAttribute(IntegerOverflowFlags &flags, Attribute attr,
- function_ref<InFlightDiagnostic()> emitError) {
- auto flagsAttr = dyn_cast<IntegerOverflowFlagsAttr>(attr);
- if (!flagsAttr) {
- return emitError() << "expected 'overflowFlags' attribute to be an "
- "IntegerOverflowFlagsAttr, but got "
- << attr;
- }
- flags = flagsAttr.getValue();
- return success();
-}
-} // namespace mlir
-
-static ParseResult parseOverflowFlags(AsmParser &p,
- IntegerOverflowFlags &flags) {
- if (failed(p.parseOptionalKeyword("overflow"))) {
- flags = IntegerOverflowFlags::none;
- return success();
- }
- if (p.parseLess())
- return failure();
- do {
- StringRef kw;
- SMLoc loc = p.getCurrentLocation();
- if (p.parseKeyword(&kw))
- return failure();
- std::optional<IntegerOverflowFlags> flag =
- symbolizeIntegerOverflowFlags(kw);
- if (!flag)
- return p.emitError(loc,
- "invalid overflow flag: expected nsw, nuw, or none");
- flags = flags | *flag;
- } while (succeeded(p.parseOptionalComma()));
- return p.parseGreater();
-}
-
-static void printOverflowFlags(AsmPrinter &p, Operation *op,
- IntegerOverflowFlags flags) {
- if (flags == IntegerOverflowFlags::none)
- return;
- p << " overflow<";
- SmallVector<StringRef, 2> strs;
- if (bitEnumContainsAny(flags, IntegerOverflowFlags::nsw))
- strs.push_back("nsw");
- if (bitEnumContainsAny(flags, IntegerOverflowFlags::nuw))
- strs.push_back("nuw");
- llvm::interleaveComma(strs, p);
- p << ">";
-}
-
-//===----------------------------------------------------------------------===//
// Attribute Helpers
//===----------------------------------------------------------------------===//
diff --git a/mlir/lib/Dialect/Tosa/IR/TosaCanonicalizations.cpp b/mlir/lib/Dialect/Tosa/IR/TosaCanonicalizations.cpp
index c4ef7d0bb9ff..47368532df16 100644
--- a/mlir/lib/Dialect/Tosa/IR/TosaCanonicalizations.cpp
+++ b/mlir/lib/Dialect/Tosa/IR/TosaCanonicalizations.cpp
@@ -731,9 +731,56 @@ struct ConcatSliceOptimization : public OpRewritePattern<tosa::SliceOp> {
}
};
+// Update size operand of tosa.slice if size has dynamic dims but corresponding
+// output dim is static
+struct SliceDynamicSizeCanonicalization
+ : public OpRewritePattern<tosa::SliceOp> {
+ using OpRewritePattern<tosa::SliceOp>::OpRewritePattern;
+
+ LogicalResult matchAndRewrite(tosa::SliceOp sliceOp,
+ PatternRewriter &rewriter) const override {
+ ShapedType resultType = cast<ShapedType>(sliceOp.getType());
+
+ ElementsAttr sizeElems;
+ if (!matchPattern(sliceOp.getSize(), m_Constant(&sizeElems))) {
+ return rewriter.notifyMatchFailure(
+ sliceOp, "size of slice must be a static ranked shape");
+ }
+
+ llvm::SmallVector<int64_t> sliceSizes =
+ llvm::to_vector(sizeElems.getValues<int64_t>());
+
+ bool replaceSliceSize{false};
+ // if size op has -1 indicating dynamic shape but corresponding dim on the
+ // output is statically known, update size to match with known output dim
+ // shape
+ for (const auto &[index, size] : llvm::enumerate(sliceSizes)) {
+ if (size == -1 && !resultType.isDynamicDim(index)) {
+ sliceSizes[index] = resultType.getDimSize(index);
+ replaceSliceSize = true;
+ }
+ }
+
+ if (!replaceSliceSize) {
+ return rewriter.notifyMatchFailure(
+ sliceOp, "no dimension of size of slice is dynamic that resolves "
+ "to static output shape");
+ }
+
+ auto size_op = getTosaConstShape(rewriter, sliceOp.getLoc(), sliceSizes);
+ auto newSliceOp = rewriter.create<tosa::SliceOp>(
+ sliceOp.getLoc(), sliceOp.getType(), sliceOp.getInput1(),
+ sliceOp.getStart(), size_op);
+
+ rewriter.replaceOp(sliceOp, newSliceOp.getResult());
+ return success();
+ }
+};
+
void SliceOp::getCanonicalizationPatterns(RewritePatternSet &results,
MLIRContext *context) {
- results.add<ConcatSliceOptimization>(context);
+ results.add<ConcatSliceOptimization, SliceDynamicSizeCanonicalization>(
+ context);
}
//===----------------------------------------------------------------------===//
diff --git a/mlir/test/Conversion/GPUToROCDL/gpu-to-rocdl-unsupported.mlir b/mlir/test/Conversion/GPUToROCDL/gpu-to-rocdl-unsupported.mlir
new file mode 100644
index 000000000000..90f2e5f047cd
--- /dev/null
+++ b/mlir/test/Conversion/GPUToROCDL/gpu-to-rocdl-unsupported.mlir
@@ -0,0 +1,13 @@
+// RUN: mlir-opt %s -convert-gpu-to-rocdl -verify-diagnostics
+
+gpu.module @test_module {
+ // ROCDL lowering only suport shuffles for 32bit ints/floats, but they
+ // shouldn't crash on unsupported types.
+ func.func @gpu_shuffle_unsupported(%arg0 : vector<4xf16>) -> vector<4xf16> {
+ %offset = arith.constant 4 : i32
+ %width = arith.constant 64 : i32
+ // expected-error @+1 {{failed to legalize operation 'gpu.shuffle'}}
+ %shfl, %pred = gpu.shuffle xor %arg0, %offset, %width : vector<4xf16>
+ return %shfl : vector<4xf16>
+ }
+}
diff --git a/mlir/test/Dialect/GPU/shuffle-rewrite.mlir b/mlir/test/Dialect/GPU/shuffle-rewrite.mlir
index 461825820153..c0ccae05a057 100644
--- a/mlir/test/Dialect/GPU/shuffle-rewrite.mlir
+++ b/mlir/test/Dialect/GPU/shuffle-rewrite.mlir
@@ -49,3 +49,14 @@ module {
return
}
}
+
+// -----
+
+// CHECK-LABEL: @gpu_shuffle_unsupported
+func.func @gpu_shuffle_unsupported(%arg0 : vector<4xf16>) -> vector<4xf16> {
+ %offset = arith.constant 4 : i32
+ %width = arith.constant 64 : i32
+ // CHECK: gpu.shuffle xor %{{.*}}, %{{.*}}, %{{.*}} : vector<4xf16>
+ %shfl, %pred = gpu.shuffle xor %arg0, %offset, %width : vector<4xf16>
+ return %shfl : vector<4xf16>
+}
diff --git a/mlir/test/Dialect/Tosa/canonicalize.mlir b/mlir/test/Dialect/Tosa/canonicalize.mlir
index b366b4f1e4fd..d153474593d8 100644
--- a/mlir/test/Dialect/Tosa/canonicalize.mlir
+++ b/mlir/test/Dialect/Tosa/canonicalize.mlir
@@ -1212,3 +1212,18 @@ func.func @do_not_fold_intdiv_division_by_0() -> tensor<1x24x2xi32> {
%16 = tosa.intdiv %4, %1 : (tensor<1x24x2xi32>, tensor<1x24x2xi32>) -> tensor<1x24x2xi32>
return %16 : tensor<1x24x2xi32>
}
+
+
+// -----
+// CHECK-LABEL: func.func @slice_dynamic_size_static_output_canonicalize(
+// CHECK-SAME: %[[ARG0:.*]]: tensor<2x60x59x?xf32>) -> tensor<2x60x58x?xf32> {
+// CHECK: %[[START:.*]] = tosa.const_shape {values = dense<0> : tensor<4xindex>} : () -> !tosa.shape<4>
+// CHECK: %[[SIZE:.*]] = tosa.const_shape {values = dense<[2, 60, 58, -1]> : tensor<4xindex>} : () -> !tosa.shape<4>
+// CHECK: %[[SLICE:.*]] = tosa.slice %[[ARG0]], %[[START]], %[[SIZE]] : (tensor<2x60x59x?xf32>, !tosa.shape<4>, !tosa.shape<4>) -> tensor<2x60x58x?xf32>
+// CHECK: return %[[SLICE]]
+func.func @slice_dynamic_size_static_output_canonicalize(%arg0: tensor<2x60x59x?xf32>) -> tensor<2x60x58x?xf32> {
+ %0 = tosa.const_shape {values = dense<0> : tensor<4xindex>} : () -> !tosa.shape<4>
+ %1 = tosa.const_shape {values = dense<[-1, 60, 58, -1]> : tensor<4xindex>} : () -> !tosa.shape<4>
+ %2 = tosa.slice %arg0, %0, %1 : (tensor<2x60x59x?xf32>, !tosa.shape<4>, !tosa.shape<4>) -> tensor<2x60x58x?xf32>
+ return %2 : tensor<2x60x58x?xf32>
+ }
diff --git a/mlir/test/IR/enum-attr-invalid.mlir b/mlir/test/IR/enum-attr-invalid.mlir
index 923736f28dad..2f240a56c987 100644
--- a/mlir/test/IR/enum-attr-invalid.mlir
+++ b/mlir/test/IR/enum-attr-invalid.mlir
@@ -28,3 +28,78 @@ func.func @test_parse_invalid_attr() -> () {
// expected-error@+1 {{failed to parse TestEnumAttr parameter 'value'}}
test.op_with_enum 1 : index
}
+
+// -----
+
+func.func @test_non_keyword_prop_enum() -> () {
+ // expected-error@+2 {{expected keyword for a test enum}}
+ // expected-error@+1 {{invalid value for property value, expected a test enum}}
+ test.op_with_enum_prop 0
+ return
+}
+
+// -----
+
+func.func @test_wrong_keyword_prop_enum() -> () {
+ // expected-error@+2 {{expected one of [first, second, third] for a test enum, got: fourth}}
+ // expected-error@+1 {{invalid value for property value, expected a test enum}}
+ test.op_with_enum_prop fourth
+}
+
+// -----
+
+func.func @test_bad_integer() -> () {
+ // expected-error@+1 {{op property 'value' failed to satisfy constraint: a test enum}}
+ "test.op_with_enum_prop"() <{value = 4 : i32}> {} : () -> ()
+}
+
+// -----
+
+func.func @test_bit_enum_prop_not_keyword() -> () {
+ // expected-error@+2 {{expected keyword for a test bit enum}}
+ // expected-error@+1 {{invalid value for property value1, expected a test bit enum}}
+ test.op_with_bit_enum_prop 0
+ return
+}
+
+// -----
+
+func.func @test_bit_enum_prop_wrong_keyword() -> () {
+ // expected-error@+2 {{expected one of [read, write, execute] for a test bit enum, got: chroot}}
+ // expected-error@+1 {{invalid value for property value1, expected a test bit enum}}
+ test.op_with_bit_enum_prop read, chroot : ()
+ return
+}
+
+// -----
+
+func.func @test_bit_enum_prop_bad_value() -> () {
+ // expected-error@+1 {{op property 'value2' failed to satisfy constraint: a test bit enum}}
+ "test.op_with_bit_enum_prop"() <{value1 = 7 : i32, value2 = 8 : i32}> {} : () -> ()
+ return
+}
+
+// -----
+
+func.func @test_bit_enum_prop_named_wrong_keyword() -> () {
+ // expected-error@+2 {{expected 'bit_enum'}}
+ // expected-error@+1 {{invalid value for property value1, expected a test bit enum}}
+ test.op_with_bit_enum_prop_named foo<read, execute>
+ return
+}
+
+// -----
+
+func.func @test_bit_enum_prop_named_not_open() -> () {
+ // expected-error@+2 {{expected '<'}}
+ // expected-error@+1 {{invalid value for property value1, expected a test bit enum}}
+ test.op_with_bit_enum_prop_named bit_enum read, execute>
+}
+
+// -----
+
+func.func @test_bit_enum_prop_named_not_closed() -> () {
+ // expected-error@+2 {{expected '>'}}
+ // expected-error@+1 {{invalid value for property value1, expected a test bit enum}}
+ test.op_with_bit_enum_prop_named bit_enum<read, execute +
+}
diff --git a/mlir/test/IR/enum-attr-roundtrip.mlir b/mlir/test/IR/enum-attr-roundtrip.mlir
index 36e605bdbff4..f1f09f977b7d 100644
--- a/mlir/test/IR/enum-attr-roundtrip.mlir
+++ b/mlir/test/IR/enum-attr-roundtrip.mlir
@@ -35,3 +35,48 @@ func.func @test_match_op_with_bit_enum() -> () {
test.op_with_bit_enum <execute, write> tag 0 : i32
return
}
+
+// CHECK-LABEL: @test_enum_prop
+func.func @test_enum_prop() -> () {
+ // CHECK: test.op_with_enum_prop first
+ test.op_with_enum_prop first
+
+ // CHECK: test.op_with_enum_prop first
+ "test.op_with_enum_prop"() <{value = 0 : i32}> {} : () -> ()
+
+ // CHECK: test.op_with_enum_prop_attr_form <{value = 0 : i32}>
+ test.op_with_enum_prop_attr_form <{value = 0 : i32}>
+ // CHECK: test.op_with_enum_prop_attr_form <{value = 1 : i32}>
+ test.op_with_enum_prop_attr_form <{value = #test<enum second>}>
+
+ // CHECK: test.op_with_enum_prop_attr_form_always <{value = #test<enum first>}>
+ test.op_with_enum_prop_attr_form_always <{value = #test<enum first>}>
+ // CHECK: test.op_with_enum_prop_attr_form_always <{value = #test<enum second>}
+ test.op_with_enum_prop_attr_form_always <{value = #test<enum second>}>
+
+ return
+}
+
+// CHECK-LABEL @test_bit_enum_prop()
+func.func @test_bit_enum_prop() -> () {
+ // CHECK: test.op_with_bit_enum_prop read : ()
+ test.op_with_bit_enum_prop read read : ()
+
+ // CHECK: test.op_with_bit_enum_prop read, write write, execute
+ test.op_with_bit_enum_prop read, write write, execute : ()
+
+ // CHECK: test.op_with_bit_enum_prop read, execute write
+ "test.op_with_bit_enum_prop"() <{value1 = 5 : i32, value2 = 2 : i32}> {} : () -> ()
+
+ // CHECK: test.op_with_bit_enum_prop read, write, execute
+ test.op_with_bit_enum_prop read, write, execute : ()
+
+ // CHECK: test.op_with_bit_enum_prop_named bit_enum<read>{{$}}
+ test.op_with_bit_enum_prop_named bit_enum<read> bit_enum<read>
+ // CHECK: test.op_with_bit_enum_prop_named bit_enum<read, write> bit_enum<write, execute>
+ test.op_with_bit_enum_prop_named bit_enum<read, write> bit_enum<write, execute>
+ // CHECK: test.op_with_bit_enum_prop_named bit_enum<read, write, execute>
+ test.op_with_bit_enum_prop_named bit_enum<read, write, execute>
+
+ return
+}
diff --git a/mlir/test/lib/Dialect/Test/TestOps.td b/mlir/test/lib/Dialect/Test/TestOps.td
index 31be00ace138..85a49e05d4c7 100644
--- a/mlir/test/lib/Dialect/Test/TestOps.td
+++ b/mlir/test/lib/Dialect/Test/TestOps.td
@@ -424,6 +424,52 @@ def : Pat<(OpWithEnum ConstantEnumCase<TestEnumAttr, "first">:$value,
ConstantAttr<I32Attr, "1">)>;
//===----------------------------------------------------------------------===//
+// Test Enum Properties
+//===----------------------------------------------------------------------===//
+
+// Define the enum property.
+def TestEnumProp : EnumProp<TestEnum>;
+// Define an op that contains the enum property.
+def OpWithEnumProp : TEST_Op<"op_with_enum_prop"> {
+ let arguments = (ins TestEnumProp:$value);
+ let assemblyFormat = "$value attr-dict";
+}
+
+def TestEnumPropAttrForm : EnumPropWithAttrForm<TestEnum, TestEnumAttr>;
+def OpWithEnumPropAttrForm : TEST_Op<"op_with_enum_prop_attr_form"> {
+ let arguments = (ins TestEnumPropAttrForm:$value);
+ let assemblyFormat = "prop-dict attr-dict";
+}
+
+def TestEnumPropAttrFormAlways : EnumPropWithAttrForm<TestEnum, TestEnumAttr> {
+ let storeInCustomAttribute = 1;
+}
+def OpWithEnumPropAttrFormAlways : TEST_Op<"op_with_enum_prop_attr_form_always"> {
+ let arguments = (ins TestEnumPropAttrFormAlways:$value);
+ let assemblyFormat = "prop-dict attr-dict";
+}
+
+def TestBitEnumProp : EnumProp<TestBitEnum> {
+ let defaultValue = TestBitEnum.cppType # "::Read";
+}
+def OpWithTestBitEnum : TEST_Op<"op_with_bit_enum_prop"> {
+ let arguments = (ins
+ TestBitEnumProp:$value1,
+ TestBitEnumProp:$value2);
+ let assemblyFormat = "$value1 ($value2^)? attr-dict `:` `(``)`";
+}
+
+def TestBitEnumPropNamed : NamedEnumProp<TestBitEnum, "bit_enum"> {
+ let defaultValue = TestBitEnum.cppType # "::Read";
+}
+def OpWithBitEnumPropNamed : TEST_Op<"op_with_bit_enum_prop_named"> {
+ let arguments = (ins
+ TestBitEnumPropNamed:$value1,
+ TestBitEnumPropNamed:$value2);
+ let assemblyFormat = "$value1 ($value2^)? attr-dict";
+}
+
+//===----------------------------------------------------------------------===//
// Test Bit Enum Attributes
//===----------------------------------------------------------------------===//