summaryrefslogtreecommitdiff
path: root/llvm/test/CodeGen
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/test/CodeGen')
-rw-r--r--llvm/test/CodeGen/AArch64/Atomics/aarch64-atomic-exchange-fence.ll64
-rw-r--r--llvm/test/CodeGen/AArch64/GlobalISel/postlegalizer-lowering-shuffle-splat.mir29
-rw-r--r--llvm/test/CodeGen/AArch64/O0-pipeline.ll1
-rw-r--r--llvm/test/CodeGen/AArch64/O3-pipeline.ll1
-rw-r--r--llvm/test/CodeGen/AArch64/abds.ll22
-rw-r--r--llvm/test/CodeGen/AArch64/abdu.ll26
-rw-r--r--llvm/test/CodeGen/AArch64/emutls_alias.ll17
-rw-r--r--llvm/test/CodeGen/AArch64/note-gnu-property-elf-pauthabi.ll6
-rw-r--r--llvm/test/CodeGen/AArch64/ptrauth-basic-pic.ll82
-rw-r--r--llvm/test/CodeGen/AArch64/ptrauth-elf-globals-pic.ll23
-rw-r--r--llvm/test/CodeGen/AArch64/ptrauth-extern-weak.ll36
-rw-r--r--llvm/test/CodeGen/AArch64/ptrauth-got-abuse.ll44
-rw-r--r--llvm/test/CodeGen/AArch64/ptrauth-init-fini.ll104
-rw-r--r--llvm/test/CodeGen/AArch64/ptrauth-tagged-globals-pic.ll66
-rw-r--r--llvm/test/CodeGen/AArch64/sme2-intrinsics-max.ll2
-rw-r--r--llvm/test/CodeGen/AArch64/sme2-intrinsics-min.ll2
-rw-r--r--llvm/test/CodeGen/AArch64/sms-order-physreg-deps.mir452
-rw-r--r--llvm/test/CodeGen/AArch64/srem-seteq-vec-splat.ll2
-rw-r--r--llvm/test/CodeGen/AArch64/ssve-stack-hazard-remarks.ll152
-rw-r--r--llvm/test/CodeGen/AArch64/sve-pred-selectop2.ll28
-rw-r--r--llvm/test/CodeGen/AArch64/sve-pred-selectop3.ll28
-rw-r--r--llvm/test/CodeGen/AArch64/sve-stack-frame-layout.ll4
-rw-r--r--llvm/test/CodeGen/AArch64/sve2-min-max-clamp.ll2
-rw-r--r--llvm/test/CodeGen/AArch64/sve2p1-intrinsics-bfadd.ll2
-rw-r--r--llvm/test/CodeGen/AArch64/sve2p1-intrinsics-bfclamp.ll2
-rw-r--r--llvm/test/CodeGen/AArch64/sve2p1-intrinsics-bfmax.ll2
-rw-r--r--llvm/test/CodeGen/AArch64/sve2p1-intrinsics-bfmaxnm.ll2
-rw-r--r--llvm/test/CodeGen/AArch64/sve2p1-intrinsics-bfmin.ll2
-rw-r--r--llvm/test/CodeGen/AArch64/sve2p1-intrinsics-bfminnm.ll2
-rw-r--r--llvm/test/CodeGen/AArch64/sve2p1-intrinsics-bfmla.ll2
-rw-r--r--llvm/test/CodeGen/AArch64/sve2p1-intrinsics-bfmla_lane.ll2
-rw-r--r--llvm/test/CodeGen/AArch64/sve2p1-intrinsics-bfmls.ll2
-rw-r--r--llvm/test/CodeGen/AArch64/sve2p1-intrinsics-bfmls_lane.ll2
-rw-r--r--llvm/test/CodeGen/AArch64/sve2p1-intrinsics-bfmul.ll2
-rw-r--r--llvm/test/CodeGen/AArch64/sve2p1-intrinsics-bfmul_lane.ll2
-rw-r--r--llvm/test/CodeGen/AArch64/sve2p1-intrinsics-bfsub.ll2
-rw-r--r--llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-add-ext-fma.ll37
-rw-r--r--llvm/test/CodeGen/AMDGPU/GlobalISel/flat-scratch-init.ll7
-rw-r--r--llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-copy.mir298
-rw-r--r--llvm/test/CodeGen/AMDGPU/addrspacecast.ll128
-rw-r--r--llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa.ll36
-rw-r--r--llvm/test/CodeGen/AMDGPU/callee-special-input-sgprs-fixed-abi.ll10
-rw-r--r--llvm/test/CodeGen/AMDGPU/eliminate-frame-index-v-add-co-u32.mir1541
-rw-r--r--llvm/test/CodeGen/AMDGPU/eliminate-frame-index-v-add-u32.mir1277
-rw-r--r--llvm/test/CodeGen/AMDGPU/indirect-call-set-from-other-function.ll73
-rw-r--r--llvm/test/CodeGen/AMDGPU/llc-pipeline.ll5
-rw-r--r--llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.buffer.load.ll56
-rw-r--r--llvm/test/CodeGen/AMDGPU/merge-sbuffer-load.mir725
-rw-r--r--llvm/test/CodeGen/AMDGPU/simple-indirect-call.ll15
-rw-r--r--llvm/test/CodeGen/ARM/O3-pipeline.ll1
-rw-r--r--llvm/test/CodeGen/ARM/setjmp-bti-basic.ll76
-rw-r--r--llvm/test/CodeGen/BPF/objdump_atomics.ll2
-rw-r--r--llvm/test/CodeGen/BPF/objdump_cond_op.ll2
-rw-r--r--llvm/test/CodeGen/BPF/objdump_imm_hex.ll4
-rw-r--r--llvm/test/CodeGen/BPF/objdump_static_var.ll4
-rw-r--r--llvm/test/CodeGen/Generic/expand-vp-fp-intrinsics.ll176
-rw-r--r--llvm/test/CodeGen/Generic/expand-vp-gather-scatter.ll118
-rw-r--r--llvm/test/CodeGen/Generic/expand-vp-load-store.ll205
-rw-r--r--llvm/test/CodeGen/Generic/expand-vp.ll567
-rw-r--r--llvm/test/CodeGen/LoongArch/O0-pipeline.ll1
-rw-r--r--llvm/test/CodeGen/LoongArch/opt-pipeline.ll1
-rw-r--r--llvm/test/CodeGen/M68k/pipeline.ll1
-rw-r--r--llvm/test/CodeGen/Mips/llvm-ir/and-srl.ll28
-rw-r--r--llvm/test/CodeGen/NVPTX/fence-proxy-tensormap.ll36
-rw-r--r--llvm/test/CodeGen/NVPTX/load-store-sm-70.ll1188
-rw-r--r--llvm/test/CodeGen/NVPTX/load-store.ll823
-rw-r--r--llvm/test/CodeGen/PowerPC/O0-pipeline.ll1
-rw-r--r--llvm/test/CodeGen/PowerPC/O3-pipeline.ll1
-rw-r--r--llvm/test/CodeGen/PowerPC/aix-base-pointer.ll5
-rw-r--r--llvm/test/CodeGen/PowerPC/builtins-bcd-assist.ll111
-rw-r--r--llvm/test/CodeGen/PowerPC/builtins-ppc-bcd-assist.ll79
-rw-r--r--llvm/test/CodeGen/PowerPC/common-chain.ll313
-rw-r--r--llvm/test/CodeGen/RISCV/O0-pipeline.ll2
-rw-r--r--llvm/test/CodeGen/RISCV/O3-pipeline.ll2
-rw-r--r--llvm/test/CodeGen/RISCV/jumptable-swguarded.ll1
-rw-r--r--llvm/test/CodeGen/RISCV/lpad.ll101
-rw-r--r--llvm/test/CodeGen/RISCV/rvv/vcompress.ll137
-rw-r--r--llvm/test/CodeGen/RISCV/rvv/vp-select.ll19
-rw-r--r--llvm/test/CodeGen/RISCV/rvv/vrgather.ll786
-rw-r--r--llvm/test/CodeGen/RISCV/selectcc-to-shiftand.ll5
-rw-r--r--llvm/test/CodeGen/WebAssembly/offset.ll20
-rw-r--r--llvm/test/CodeGen/X86/O0-pipeline.ll1
-rw-r--r--llvm/test/CodeGen/X86/apx/and.ll64
-rw-r--r--llvm/test/CodeGen/X86/apx/cmov.ll22
-rw-r--r--llvm/test/CodeGen/X86/apx/mul-i1024.ll916
-rw-r--r--llvm/test/CodeGen/X86/apx/or.ll64
-rw-r--r--llvm/test/CodeGen/X86/apx/push2-pop2-vector-register.ll4
-rw-r--r--llvm/test/CodeGen/X86/apx/push2-pop2.ll24
-rw-r--r--llvm/test/CodeGen/X86/apx/pushp-popp.ll4
-rw-r--r--llvm/test/CodeGen/X86/apx/shift-eflags.ll28
-rw-r--r--llvm/test/CodeGen/X86/apx/sub.ll96
-rw-r--r--llvm/test/CodeGen/X86/apx/xor.ll80
-rw-r--r--llvm/test/CodeGen/X86/avx10_2_512satcvt-intrinsics.ll1003
-rw-r--r--llvm/test/CodeGen/X86/avx10_2satcvt-intrinsics.ll1618
-rw-r--r--llvm/test/CodeGen/X86/avx512-intel-ocl.ll8
-rw-r--r--llvm/test/CodeGen/X86/clobber_base_ptr.ll118
-rw-r--r--llvm/test/CodeGen/X86/clobber_frame_ptr.ll159
-rw-r--r--llvm/test/CodeGen/X86/clobber_frame_ptr_x32.ll53
-rw-r--r--llvm/test/CodeGen/X86/cmp.ll52
-rw-r--r--llvm/test/CodeGen/X86/combine-srem.ll40
-rw-r--r--llvm/test/CodeGen/X86/i386-baseptr.ll4
-rw-r--r--llvm/test/CodeGen/X86/inline-asm-function-call-pic.ll4
-rw-r--r--llvm/test/CodeGen/X86/opt-pipeline.ll1
-rw-r--r--llvm/test/CodeGen/X86/popcnt.ll216
-rw-r--r--llvm/test/CodeGen/X86/select_const_i128.ll4
-rw-r--r--llvm/test/CodeGen/X86/srem-seteq-vec-splat.ll6
-rw-r--r--llvm/test/CodeGen/X86/x86-32-intrcc.ll4
-rw-r--r--llvm/test/CodeGen/X86/x86-64-baseptr.ll12
-rw-r--r--llvm/test/CodeGen/X86/x86-64-flags-intrinsics.ll8
109 files changed, 11554 insertions, 3302 deletions
diff --git a/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomic-exchange-fence.ll b/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomic-exchange-fence.ll
new file mode 100644
index 000000000000..2adbc709d238
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomic-exchange-fence.ll
@@ -0,0 +1,64 @@
+; RUN: llc %s -o - -verify-machineinstrs -mtriple=aarch64 -mattr=+lse -O0 | FileCheck %s
+; RUN: llc %s -o - -verify-machineinstrs -mtriple=aarch64 -mattr=+lse -O1 | FileCheck %s
+
+; When their destination register is WZR/ZZR, SWP operations are not regarded as
+; a read for the purpose of a DMB.LD in the AArch64 memory model.
+; This test ensures that the AArch64DeadRegisterDefinitions pass does not
+; replace the desitnation register of SWP instructions with the zero register
+; when the read value is unused.
+
+define dso_local i32 @atomic_exchange_monotonic(ptr %ptr, ptr %ptr2, i32 %value) {
+; CHECK-LABEL: atomic_exchange_monotonic:
+; CHECK: // %bb.0:
+; CHECK-NEXT: swp
+; CHECK-NOT: wzr
+; CHECK-NEXT: dmb ishld
+; CHECK-NEXT: ldr w0, [x1]
+; CHECK-NEXT: ret
+ %r0 = atomicrmw xchg ptr %ptr, i32 %value monotonic
+ fence acquire
+ %r1 = load atomic i32, ptr %ptr2 monotonic, align 4
+ ret i32 %r1
+}
+
+define dso_local i32 @atomic_exchange_acquire(ptr %ptr, ptr %ptr2, i32 %value) {
+; CHECK-LABEL: atomic_exchange_acquire:
+; CHECK: // %bb.0:
+; CHECK-NEXT: swpa
+; CHECK-NOT: wzr
+; CHECK-NEXT: dmb ishld
+; CHECK-NEXT: ldr w0, [x1]
+; CHECK-NEXT: ret
+ %r0 = atomicrmw xchg ptr %ptr, i32 %value acquire
+ fence acquire
+ %r1 = load atomic i32, ptr %ptr2 monotonic, align 4
+ ret i32 %r1
+}
+
+define dso_local i32 @atomic_exchange_release(ptr %ptr, ptr %ptr2, i32 %value) {
+; CHECK-LABEL: atomic_exchange_release:
+; CHECK: // %bb.0:
+; CHECK-NEXT: swpl
+; CHECK-NOT: wzr
+; CHECK-NEXT: dmb ishld
+; CHECK-NEXT: ldr w0, [x1]
+; CHECK-NEXT: ret
+ %r0 = atomicrmw xchg ptr %ptr, i32 %value release
+ fence acquire
+ %r1 = load atomic i32, ptr %ptr2 monotonic, align 4
+ ret i32 %r1
+}
+
+define dso_local i32 @atomic_exchange_acquire_release(ptr %ptr, ptr %ptr2, i32 %value) {
+; CHECK-LABEL: atomic_exchange_acquire_release:
+; CHECK: // %bb.0:
+; CHECK-NEXT: swpal
+; CHECK-NOT: wzr
+; CHECK-NEXT: dmb ishld
+; CHECK-NEXT: ldr w0, [x1]
+; CHECK-NEXT: ret
+ %r0 = atomicrmw xchg ptr %ptr, i32 %value acq_rel
+ fence acquire
+ %r1 = load atomic i32, ptr %ptr2 monotonic, align 4
+ ret i32 %r1
+}
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/postlegalizer-lowering-shuffle-splat.mir b/llvm/test/CodeGen/AArch64/GlobalISel/postlegalizer-lowering-shuffle-splat.mir
index 9d12c3c32c7f..71094825e42f 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/postlegalizer-lowering-shuffle-splat.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/postlegalizer-lowering-shuffle-splat.mir
@@ -367,3 +367,32 @@ body: |
%shuf:_(<4 x s32>) = G_SHUFFLE_VECTOR %buildvec(<4 x s32>), %undef, shufflemask(0, 0, 0, 0)
$q0 = COPY %shuf(<4 x s32>)
RET_ReallyLR implicit $q0
+
+...
+---
+name: build_vector_rhs
+alignment: 4
+legalized: true
+tracksRegLiveness: true
+body: |
+ bb.1.entry:
+ liveins: $w0, $w1, $w2, $w3, $w4
+ ; The G_SHUFFLE_VECTOR is fed by a G_BUILD_VECTOR, and the 0th input
+ ; operand is not a constant. We should get a G_DUP.
+ ;
+ ; CHECK-LABEL: name: build_vector
+ ; CHECK: liveins: $w0, $w1, $w2, $w3, $w4
+ ; CHECK: %lane_1:_(s32) = COPY $w1
+ ; CHECK: %shuf:_(<4 x s32>) = G_DUP %lane_1(s32)
+ ; CHECK: $q0 = COPY %shuf(<4 x s32>)
+ ; CHECK: RET_ReallyLR implicit $q0
+ %lane_0:_(s32) = COPY $w0
+ %lane_1:_(s32) = COPY $w1
+ %b:_(s32) = COPY $w2
+ %c:_(s32) = COPY $w3
+ %d:_(s32) = COPY $w4
+ %buildvec0:_(<4 x s32>) = G_BUILD_VECTOR %lane_0(s32), %b(s32), %c(s32), %d(s32)
+ %buildvec1:_(<4 x s32>) = G_BUILD_VECTOR %lane_1(s32), %b(s32), %c(s32), %d(s32)
+ %shuf:_(<4 x s32>) = G_SHUFFLE_VECTOR %buildvec0(<4 x s32>), %buildvec1, shufflemask(4, 4, 4, 4)
+ $q0 = COPY %shuf(<4 x s32>)
+ RET_ReallyLR implicit $q0
diff --git a/llvm/test/CodeGen/AArch64/O0-pipeline.ll b/llvm/test/CodeGen/AArch64/O0-pipeline.ll
index bfcb9a710f70..ba611493e1a7 100644
--- a/llvm/test/CodeGen/AArch64/O0-pipeline.ll
+++ b/llvm/test/CodeGen/AArch64/O0-pipeline.ll
@@ -22,7 +22,6 @@
; CHECK-NEXT: Lower Garbage Collection Instructions
; CHECK-NEXT: Shadow Stack GC Lowering
; CHECK-NEXT: Remove unreachable blocks from the CFG
-; CHECK-NEXT: Expand vector predication intrinsics
; CHECK-NEXT: Instrument function entry/exit with calls to e.g. mcount() (post inlining)
; CHECK-NEXT: Scalarize Masked Memory Intrinsics
; CHECK-NEXT: Expand reduction intrinsics
diff --git a/llvm/test/CodeGen/AArch64/O3-pipeline.ll b/llvm/test/CodeGen/AArch64/O3-pipeline.ll
index 017349aa32af..845634e8e983 100644
--- a/llvm/test/CodeGen/AArch64/O3-pipeline.ll
+++ b/llvm/test/CodeGen/AArch64/O3-pipeline.ll
@@ -60,7 +60,6 @@
; CHECK-NEXT: Constant Hoisting
; CHECK-NEXT: Replace intrinsics with calls to vector library
; CHECK-NEXT: Partially inline calls to library functions
-; CHECK-NEXT: Expand vector predication intrinsics
; CHECK-NEXT: Instrument function entry/exit with calls to e.g. mcount() (post inlining)
; CHECK-NEXT: Scalarize Masked Memory Intrinsics
; CHECK-NEXT: Expand reduction intrinsics
diff --git a/llvm/test/CodeGen/AArch64/abds.ll b/llvm/test/CodeGen/AArch64/abds.ll
index d4ad33f963ba..215907c66a6e 100644
--- a/llvm/test/CodeGen/AArch64/abds.ll
+++ b/llvm/test/CodeGen/AArch64/abds.ll
@@ -571,6 +571,28 @@ define i32 @abd_sub_i32(i32 %a, i32 %b) nounwind {
ret i32 %abs
}
+define i64 @vector_legalized(i16 %a, i16 %b) {
+; CHECK-LABEL: vector_legalized:
+; CHECK: // %bb.0:
+; CHECK-NEXT: movi v0.2d, #0000000000000000
+; CHECK-NEXT: sxth w8, w0
+; CHECK-NEXT: sub w8, w8, w1, sxth
+; CHECK-NEXT: addp d0, v0.2d
+; CHECK-NEXT: cmp w8, #0
+; CHECK-NEXT: cneg w8, w8, mi
+; CHECK-NEXT: fmov x9, d0
+; CHECK-NEXT: add x0, x9, x8
+; CHECK-NEXT: ret
+ %ea = sext i16 %a to i32
+ %eb = sext i16 %b to i32
+ %s = sub i32 %ea, %eb
+ %ab = call i32 @llvm.abs.i32(i32 %s, i1 false)
+ %e = zext i32 %ab to i64
+ %red = call i64 @llvm.vector.reduce.add.v32i64(<32 x i64> zeroinitializer)
+ %z = add i64 %red, %e
+ ret i64 %z
+}
+
declare i8 @llvm.abs.i8(i8, i1)
declare i16 @llvm.abs.i16(i16, i1)
diff --git a/llvm/test/CodeGen/AArch64/abdu.ll b/llvm/test/CodeGen/AArch64/abdu.ll
index 983db629e449..f70f095d7dab 100644
--- a/llvm/test/CodeGen/AArch64/abdu.ll
+++ b/llvm/test/CodeGen/AArch64/abdu.ll
@@ -409,6 +409,32 @@ define i128 @abd_cmp_i128(i128 %a, i128 %b) nounwind {
ret i128 %sel
}
+;
+; negative tests
+;
+
+define i64 @vector_legalized(i16 %a, i16 %b) {
+; CHECK-LABEL: vector_legalized:
+; CHECK: // %bb.0:
+; CHECK-NEXT: movi v0.2d, #0000000000000000
+; CHECK-NEXT: and w8, w0, #0xffff
+; CHECK-NEXT: sub w8, w8, w1, uxth
+; CHECK-NEXT: cmp w8, #0
+; CHECK-NEXT: addp d0, v0.2d
+; CHECK-NEXT: cneg w8, w8, mi
+; CHECK-NEXT: fmov x9, d0
+; CHECK-NEXT: add x0, x9, x8
+; CHECK-NEXT: ret
+ %ea = zext i16 %a to i32
+ %eb = zext i16 %b to i32
+ %s = sub i32 %ea, %eb
+ %ab = call i32 @llvm.abs.i32(i32 %s, i1 false)
+ %e = zext i32 %ab to i64
+ %red = call i64 @llvm.vector.reduce.add.v32i64(<32 x i64> zeroinitializer)
+ %z = add i64 %red, %e
+ ret i64 %z
+}
+
declare i8 @llvm.abs.i8(i8, i1)
declare i16 @llvm.abs.i16(i16, i1)
declare i32 @llvm.abs.i32(i32, i1)
diff --git a/llvm/test/CodeGen/AArch64/emutls_alias.ll b/llvm/test/CodeGen/AArch64/emutls_alias.ll
new file mode 100644
index 000000000000..4a157d8d03e7
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/emutls_alias.ll
@@ -0,0 +1,17 @@
+; RUN: llc < %s -emulated-tls -mtriple=aarch64-linux-ohos \
+; RUN: | FileCheck -check-prefix=EMUTLS_CHECK %s
+
+%struct.__res_state = type { [5 x i8] }
+
+@foo = dso_local thread_local global %struct.__res_state { [5 x i8] c"\01\02\03\04\05" }, align 1
+
+@bar = hidden thread_local(initialexec) alias %struct.__res_state, ptr @foo
+
+define dso_local i32 @main() {
+ %1 = alloca i32, align 4
+ store i32 0, ptr %1, align 4
+ store i8 0, ptr @bar, align 1
+ ; EMUTLS_CHECK: adrp x0, __emutls_v.foo
+ ; EMUTLS_CHECK-NEXT: add x0, x0, :lo12:__emutls_v.foo
+ ret i32 0
+}
diff --git a/llvm/test/CodeGen/AArch64/note-gnu-property-elf-pauthabi.ll b/llvm/test/CodeGen/AArch64/note-gnu-property-elf-pauthabi.ll
index 728cffeba02a..b2ebf1fc0411 100644
--- a/llvm/test/CodeGen/AArch64/note-gnu-property-elf-pauthabi.ll
+++ b/llvm/test/CodeGen/AArch64/note-gnu-property-elf-pauthabi.ll
@@ -10,7 +10,7 @@
!llvm.module.flags = !{!0, !1}
!0 = !{i32 1, !"aarch64-elf-pauthabi-platform", i32 268435458}
-!1 = !{i32 1, !"aarch64-elf-pauthabi-version", i32 85}
+!1 = !{i32 1, !"aarch64-elf-pauthabi-version", i32 341}
; ASM: .section .note.gnu.property,"a",@note
; ASM-NEXT: .p2align 3, 0x0
@@ -22,12 +22,12 @@
; ASM-NEXT: .word 3221225473
; ASM-NEXT: .word 16
; ASM-NEXT: .xword 268435458
-; ASM-NEXT: .xword 85
+; ASM-NEXT: .xword 341
; OBJ: Displaying notes found in: .note.gnu.property
; OBJ-NEXT: Owner Data size Description
; OBJ-NEXT: GNU 0x00000018 NT_GNU_PROPERTY_TYPE_0 (property note)
-; OBJ-NEXT: AArch64 PAuth ABI core info: platform 0x10000002 (llvm_linux), version 0x55 (PointerAuthIntrinsics, !PointerAuthCalls, PointerAuthReturns, !PointerAuthAuthTraps, PointerAuthVTPtrAddressDiscrimination, !PointerAuthVTPtrTypeDiscrimination, PointerAuthInitFini)
+; OBJ-NEXT: AArch64 PAuth ABI core info: platform 0x10000002 (llvm_linux), version 0x155 (PointerAuthIntrinsics, !PointerAuthCalls, PointerAuthReturns, !PointerAuthAuthTraps, PointerAuthVTPtrAddressDiscrimination, !PointerAuthVTPtrTypeDiscrimination, PointerAuthInitFini, !PointerAuthInitFiniAddressDiscrimination, PointerAuthELFGOT)
; ERR: either both or no 'aarch64-elf-pauthabi-platform' and 'aarch64-elf-pauthabi-version' module flags must be present
diff --git a/llvm/test/CodeGen/AArch64/ptrauth-basic-pic.ll b/llvm/test/CodeGen/AArch64/ptrauth-basic-pic.ll
new file mode 100644
index 000000000000..de6901f10761
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/ptrauth-basic-pic.ll
@@ -0,0 +1,82 @@
+; RUN: llc -mtriple=aarch64-linux-gnu -global-isel=0 -fast-isel=0 -verify-machineinstrs \
+; RUN: -relocation-model=pic -mattr=+pauth %s -o - | FileCheck %s
+; RUN: llc -mtriple=aarch64-linux-gnu -global-isel=0 -fast-isel=1 -verify-machineinstrs \
+; RUN: -relocation-model=pic -mattr=+pauth %s -o - | FileCheck %s
+; RUN: llc -mtriple=aarch64-linux-gnu -global-isel=1 -global-isel-abort=1 -verify-machineinstrs \
+; RUN: -relocation-model=pic -mattr=+pauth %s -o - | FileCheck %s
+
+;; Note: for FastISel, we fall back to SelectionDAG
+
+@var = global i32 0
+
+define i32 @get_globalvar() {
+; CHECK-LABEL: get_globalvar:
+; CHECK: adrp x[[GOT:[0-9]+]], :got_auth:var
+; CHECK-NEXT: add x[[GOT]], x[[GOT]], :got_auth_lo12:var
+; CHECK-NEXT: ldr x[[SYM:[0-9]+]], [x[[GOT]]]
+; CHECK-NEXT: autda x[[SYM]], x[[GOT]]
+; CHECK-NEXT: ldr w0, [x[[SYM]]]
+
+ %val = load i32, ptr @var
+ ret i32 %val
+}
+
+define ptr @get_globalvaraddr() {
+; CHECK-LABEL: get_globalvaraddr:
+; CHECK: adrp x[[GOT:[0-9]+]], :got_auth:var
+; CHECK-NEXT: add x[[GOT]], x[[GOT]], :got_auth_lo12:var
+; CHECK-NEXT: ldr x0, [x[[GOT]]]
+; CHECK-NEXT: autda x0, x[[GOT]]
+
+ %val = load i32, ptr @var
+ ret ptr @var
+}
+
+declare i32 @foo()
+
+define ptr @resign_globalfunc() {
+; CHECK-LABEL: resign_globalfunc:
+; CHECK: adrp x17, :got_auth:foo
+; CHECK-NEXT: add x17, x17, :got_auth_lo12:foo
+; CHECK-NEXT: ldr x16, [x17]
+; CHECK-NEXT: autia x16, x17
+; CHECK-NEXT: mov x17, #42
+; CHECK-NEXT: pacia x16, x17
+; CHECK-NEXT: mov x0, x16
+; CHECK-NEXT: ret
+
+ ret ptr ptrauth (ptr @foo, i32 0, i64 42)
+}
+
+define ptr @resign_globalvar() {
+; CHECK-LABEL: resign_globalvar:
+; CHECK: adrp x17, :got_auth:var
+; CHECK-NEXT: add x17, x17, :got_auth_lo12:var
+; CHECK-NEXT: ldr x16, [x17]
+; CHECK-NEXT: autda x16, x17
+; CHECK-NEXT: mov x17, #43
+; CHECK-NEXT: pacdb x16, x17
+; CHECK-NEXT: mov x0, x16
+; CHECK-NEXT: ret
+
+ ret ptr ptrauth (ptr @var, i32 3, i64 43)
+}
+
+define ptr @resign_globalvar_offset() {
+; CHECK-LABEL: resign_globalvar_offset:
+; CHECK: adrp x17, :got_auth:var
+; CHECK-NEXT: add x17, x17, :got_auth_lo12:var
+; CHECK-NEXT: ldr x16, [x17]
+; CHECK-NEXT: autda x16, x17
+; CHECK-NEXT: add x16, x16, #16
+; CHECK-NEXT: mov x17, #44
+; CHECK-NEXT: pacda x16, x17
+; CHECK-NEXT: mov x0, x16
+; CHECK-NEXT: ret
+
+ ret ptr ptrauth (ptr getelementptr (i8, ptr @var, i64 16), i32 2, i64 44)
+}
+
+!llvm.module.flags = !{!0, !1}
+!0 = !{i32 1, !"aarch64-elf-pauthabi-platform", i32 268435458}
+!1 = !{i32 1, !"aarch64-elf-pauthabi-version", i32 256}
diff --git a/llvm/test/CodeGen/AArch64/ptrauth-elf-globals-pic.ll b/llvm/test/CodeGen/AArch64/ptrauth-elf-globals-pic.ll
new file mode 100644
index 000000000000..2b7d8637b432
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/ptrauth-elf-globals-pic.ll
@@ -0,0 +1,23 @@
+; RUN: llc -mtriple=arm64 -global-isel=0 -fast-isel=0 -relocation-model=pic -o - %s -mcpu=cyclone -mattr=+pauth | FileCheck %s
+; RUN: llc -mtriple=arm64 -global-isel=0 -fast-isel=1 -relocation-model=pic -o - %s -mcpu=cyclone -mattr=+pauth | FileCheck %s
+; RUN: llc -mtriple=arm64 -global-isel=1 -global-isel-abort=1 -relocation-model=pic -o - %s -mcpu=cyclone -mattr=+pauth | FileCheck %s
+
+;; Note: for FastISel, we fall back to SelectionDAG
+
+@var8 = external global i8, align 1
+
+define i8 @test_i8(i8 %new) {
+ %val = load i8, ptr @var8, align 1
+ store i8 %new, ptr @var8
+ ret i8 %val
+
+; CHECK: adrp x[[HIREG:[0-9]+]], :got_auth:var8
+; CHECK-NEXT: add x[[HIREG]], x[[HIREG]], :got_auth_lo12:var8
+; CHECK-NEXT: ldr x[[VAR_ADDR:[0-9]+]], [x[[HIREG]]]
+; CHECK-NEXT: autda x[[VAR_ADDR]], x[[HIREG]]
+; CHECK-NEXT: ldrb {{w[0-9]+}}, [x[[VAR_ADDR]]]
+}
+
+!llvm.module.flags = !{!0, !1}
+!0 = !{i32 1, !"aarch64-elf-pauthabi-platform", i32 268435458}
+!1 = !{i32 1, !"aarch64-elf-pauthabi-version", i32 256}
diff --git a/llvm/test/CodeGen/AArch64/ptrauth-extern-weak.ll b/llvm/test/CodeGen/AArch64/ptrauth-extern-weak.ll
new file mode 100644
index 000000000000..88b611141c04
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/ptrauth-extern-weak.ll
@@ -0,0 +1,36 @@
+; RUN: llc -mtriple=aarch64-none-linux-gnu -global-isel=0 -fast-isel=0 -relocation-model=pic -mattr=+pauth -o - %s | FileCheck %s
+; RUN: llc -mtriple=aarch64-none-linux-gnu -global-isel=0 -fast-isel=1 -relocation-model=pic -mattr=+pauth -o - %s | FileCheck %s
+; RUN: llc -mtriple=aarch64-none-linux-gnu -global-isel=1 -global-isel-abort=1 -relocation-model=pic -mattr=+pauth -o - %s | FileCheck %s
+
+;; Note: for FastISel, we fall back to SelectionDAG
+
+declare extern_weak dso_local i32 @var()
+
+define ptr @foo() {
+; The usual ADRP/ADD pair can't be used for a weak reference because it must
+; evaluate to 0 if the symbol is undefined. We use a GOT entry for PIC
+; otherwise a litpool entry.
+ ret ptr @var
+
+; CHECK: adrp x[[ADDRHI:[0-9]+]], :got_auth:var
+; CHECK-NEXT: add x[[ADDRHI]], x[[ADDRHI]], :got_auth_lo12:var
+; CHECK-NEXT: ldr x0, [x[[ADDRHI]]]
+; CHECK-NEXT: autia x0, x[[ADDRHI]]
+}
+
+@arr_var = extern_weak global [10 x i32]
+
+define ptr @bar() {
+ %addr = getelementptr [10 x i32], ptr @arr_var, i32 0, i32 5
+
+; CHECK: adrp x[[ADDRHI:[0-9]+]], :got_auth:arr_var
+; CHECK-NEXT: add x[[ADDRHI]], x[[ADDRHI]], :got_auth_lo12:arr_var
+; CHECK-NEXT: ldr [[BASE:x[0-9]+]], [x[[ADDRHI]]]
+; CHECK-NEXT: autda [[BASE]], x[[ADDRHI]]
+; CHECK-NEXT: add x0, [[BASE]], #20
+ ret ptr %addr
+}
+
+!llvm.module.flags = !{!0, !1}
+!0 = !{i32 1, !"aarch64-elf-pauthabi-platform", i32 268435458}
+!1 = !{i32 1, !"aarch64-elf-pauthabi-version", i32 256}
diff --git a/llvm/test/CodeGen/AArch64/ptrauth-got-abuse.ll b/llvm/test/CodeGen/AArch64/ptrauth-got-abuse.ll
new file mode 100644
index 000000000000..c1580534b62c
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/ptrauth-got-abuse.ll
@@ -0,0 +1,44 @@
+; RUN: llc -mtriple=aarch64-none-linux-gnu -asm-verbose=false -global-isel=0 -fast-isel=0 -relocation-model=pic -mattr=+pauth -o - %s | FileCheck %s
+; RUN: llc -mtriple=aarch64-none-linux-gnu -asm-verbose=false -global-isel=0 -fast-isel=1 -relocation-model=pic -mattr=+pauth -o - %s | FileCheck %s
+; RUN: llc -mtriple=aarch64-none-linux-gnu -asm-verbose=false -global-isel=1 -global-isel-abort=1 -relocation-model=pic -mattr=+pauth -o - %s | FileCheck %s
+; RUN: llc -mtriple=aarch64-none-linux-gnu -asm-verbose=false -global-isel=0 -fast-isel=0 -relocation-model=pic -filetype=obj -mattr=+pauth -o /dev/null %s
+; RUN: llc -mtriple=aarch64-none-linux-gnu -asm-verbose=false -global-isel=0 -fast-isel=1 -relocation-model=pic -filetype=obj -mattr=+pauth -o /dev/null %s
+; RUN: llc -mtriple=aarch64-none-linux-gnu -asm-verbose=false -global-isel=1 -global-isel-abort=1 -relocation-model=pic -filetype=obj -mattr=+pauth -o /dev/null %s
+
+;; Note: for FastISel, we fall back to SelectionDAG
+
+declare void @consume(i32)
+declare void @func()
+
+define void @aliasee_func() {
+ ret void
+}
+@alias_func = alias void (), ptr @aliasee_func
+
+@aliasee_global = global i32 42
+@alias_global = alias i32, ptr @aliasee_global
+
+define void @foo() nounwind {
+; CHECK-LABEL: foo:
+entry:
+ call void @consume(i32 ptrtoint (ptr @func to i32))
+; CHECK: adrp x[[ADDRHI:[0-9]+]], :got_auth:func
+; CHECK-NEXT: add x[[ADDRHI]], x[[ADDRHI]], :got_auth_lo12:func
+; CHECK-NEXT: ldr x[[SYM:[0-9]+]], [x[[ADDRHI]]]
+; CHECK-NEXT: autia x[[SYM:[0-9]+]], x[[ADDRHI]]
+ call void @consume(i32 ptrtoint (ptr @alias_func to i32))
+; CHECK: adrp x[[ADDRHI:[0-9]+]], :got_auth:alias_func
+; CHECK-NEXT: add x[[ADDRHI]], x[[ADDRHI]], :got_auth_lo12:alias_func
+; CHECK-NEXT: ldr x[[SYM:[0-9]+]], [x[[ADDRHI]]]
+; CHECK-NEXT: autia x[[SYM:[0-9]+]], x[[ADDRHI]]
+ call void @consume(i32 ptrtoint (ptr @alias_global to i32))
+; CHECK: adrp x[[ADDRHI:[0-9]+]], :got_auth:alias_global
+; CHECK-NEXT: add x[[ADDRHI]], x[[ADDRHI]], :got_auth_lo12:alias_global
+; CHECK-NEXT: ldr x[[SYM:[0-9]+]], [x[[ADDRHI]]]
+; CHECK-NEXT: autda x[[SYM:[0-9]+]], x[[ADDRHI]]
+ ret void
+}
+
+!llvm.module.flags = !{!0, !1}
+!0 = !{i32 1, !"aarch64-elf-pauthabi-platform", i32 268435458}
+!1 = !{i32 1, !"aarch64-elf-pauthabi-version", i32 256}
diff --git a/llvm/test/CodeGen/AArch64/ptrauth-init-fini.ll b/llvm/test/CodeGen/AArch64/ptrauth-init-fini.ll
new file mode 100644
index 000000000000..186a31c63ba1
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/ptrauth-init-fini.ll
@@ -0,0 +1,104 @@
+; RUN: rm -rf %t && split-file %s %t && cd %t
+
+;--- nodisc.ll
+
+; RUN: llc -mtriple aarch64-elf -mattr=+pauth -filetype=asm -o - nodisc.ll | \
+; RUN: FileCheck %s --check-prefix=ASM
+; RUN: llc -mtriple aarch64-elf -mattr=+pauth -filetype=obj -o - nodisc.ll | \
+; RUN: llvm-readelf -r -x .init_array -x .fini_array - | FileCheck %s --check-prefix=OBJ
+
+; ASM: .section .init_array,"aw",@init_array
+; ASM-NEXT: .p2align 3, 0x0
+; ASM-NEXT: .xword foo@AUTH(ia,55764)
+; ASM-NEXT: .section .fini_array,"aw",@fini_array
+; ASM-NEXT: .p2align 3, 0x0
+; ASM-NEXT: .xword bar@AUTH(ia,55764)
+
+; OBJ: Relocation section '.rela.init_array' at offset 0x[[#]] contains 1 entries:
+; OBJ-NEXT: Offset Info Type Symbol's Value Symbol's Name + Addend
+; OBJ-NEXT: 0000000000000000 0000000700000244 R_AARCH64_AUTH_ABS64 0000000000000000 foo + 0
+; OBJ: Relocation section '.rela.fini_array' at offset 0x[[#]] contains 1 entries:
+; OBJ-NEXT: Offset Info Type Symbol's Value Symbol's Name + Addend
+; OBJ-NEXT: 0000000000000000 0000000800000244 R_AARCH64_AUTH_ABS64 0000000000000004 bar + 0
+; OBJ: Hex dump of section '.init_array':
+; OBJ-NEXT: 0x00000000 00000000 d4d90000
+; OBJ: Hex dump of section '.fini_array':
+; OBJ-NEXT: 0x00000000 00000000 d4d90000
+;; ^^^^ 0xD9D4: constant discriminator = 55764
+;; ^^ 0x80: bits 61..60 key = IA; bit 63 addr disc = false
+
+@llvm.global_ctors = appending global [1 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 65535, ptr ptrauth (ptr @foo, i32 0, i64 55764), ptr null }]
+@llvm.global_dtors = appending global [1 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 65535, ptr ptrauth (ptr @bar, i32 0, i64 55764), ptr null }]
+
+define void @foo() {
+ ret void
+}
+
+define void @bar() {
+ ret void
+}
+
+;--- disc.ll
+
+; RUN: llc -mtriple aarch64-elf -mattr=+pauth -filetype=asm -o - disc.ll | \
+; RUN: FileCheck %s --check-prefix=ASM-DISC
+; RUN: llc -mtriple aarch64-elf -mattr=+pauth -filetype=obj -o - disc.ll | \
+; RUN: llvm-readelf -r -x .init_array -x .fini_array - | FileCheck %s --check-prefix=OBJ-DISC
+
+; ASM-DISC: .section .init_array,"aw",@init_array
+; ASM-DISC-NEXT: .p2align 3, 0x0
+; ASM-DISC-NEXT: .xword foo@AUTH(ia,55764,addr)
+; ASM-DISC-NEXT: .section .fini_array,"aw",@fini_array
+; ASM-DISC-NEXT: .p2align 3, 0x0
+; ASM-DISC-NEXT: .xword bar@AUTH(ia,55764,addr)
+
+; OBJ-DISC: Relocation section '.rela.init_array' at offset 0x[[#]] contains 1 entries:
+; OBJ-DISC-NEXT: Offset Info Type Symbol's Value Symbol's Name + Addend
+; OBJ-DISC-NEXT: 0000000000000000 0000000700000244 R_AARCH64_AUTH_ABS64 0000000000000000 foo + 0
+; OBJ-DISC: Relocation section '.rela.fini_array' at offset 0x[[#]] contains 1 entries:
+; OBJ-DISC-NEXT: Offset Info Type Symbol's Value Symbol's Name + Addend
+; OBJ-DISC-NEXT: 0000000000000000 0000000800000244 R_AARCH64_AUTH_ABS64 0000000000000004 bar + 0
+; OBJ-DISC: Hex dump of section '.init_array':
+; OBJ-DISC-NEXT: 0x00000000 00000000 d4d90080
+; OBJ-DISC: Hex dump of section '.fini_array':
+; OBJ-DISC-NEXT: 0x00000000 00000000 d4d90080
+;; ^^^^ 0xD9D4: constant discriminator = 55764
+;; ^^ 0x80: bits 61..60 key = IA; bit 63 addr disc = true
+
+@llvm.global_ctors = appending global [1 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 65535, ptr ptrauth (ptr @foo, i32 0, i64 55764, ptr inttoptr (i64 1 to ptr)), ptr null }]
+@llvm.global_dtors = appending global [1 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 65535, ptr ptrauth (ptr @bar, i32 0, i64 55764, ptr inttoptr (i64 1 to ptr)), ptr null }]
+
+define void @foo() {
+ ret void
+}
+
+define void @bar() {
+ ret void
+}
+
+;--- err1.ll
+
+; RUN: not --crash llc -mtriple aarch64-elf -mattr=+pauth -filetype=asm -o - err1.ll 2>&1 | \
+; RUN: FileCheck %s --check-prefix=ERR1
+
+; ERR1: LLVM ERROR: unexpected address discrimination value for ctors/dtors entry, only 'ptr inttoptr (i64 1 to ptr)' is allowed
+
+@llvm.global_ctors = appending global [1 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 65535, ptr ptrauth (ptr @foo, i32 0, i64 55764, ptr inttoptr (i64 2 to ptr)), ptr null }]
+
+define void @foo() {
+ ret void
+}
+
+;--- err2.ll
+
+; RUN: not --crash llc -mtriple aarch64-elf -mattr=+pauth -filetype=asm -o - err2.ll 2>&1 | \
+; RUN: FileCheck %s --check-prefix=ERR2
+
+; ERR2: LLVM ERROR: unexpected address discrimination value for ctors/dtors entry, only 'ptr inttoptr (i64 1 to ptr)' is allowed
+
+@g = external global ptr
+@llvm.global_dtors = appending global [1 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 65535, ptr ptrauth (ptr @bar, i32 0, i64 55764, ptr @g), ptr null }]
+
+define void @bar() {
+ ret void
+}
diff --git a/llvm/test/CodeGen/AArch64/ptrauth-tagged-globals-pic.ll b/llvm/test/CodeGen/AArch64/ptrauth-tagged-globals-pic.ll
new file mode 100644
index 000000000000..c9a6722505bd
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/ptrauth-tagged-globals-pic.ll
@@ -0,0 +1,66 @@
+; RUN: llc --relocation-model=pic -mattr=+pauth < %s | FileCheck %s --check-prefixes=CHECK,GISEL
+
+; RUN: llc -global-isel=0 -fast-isel=0 -O0 --relocation-model=pic < %s -mattr=+pauth | FileCheck %s --check-prefixes=CHECK,DAGISEL
+; RUN: llc -global-isel=0 -fast-isel=1 -O0 --relocation-model=pic < %s -mattr=+pauth | FileCheck %s --check-prefixes=CHECK,DAGISEL
+; RUN: llc -global-isel=1 -global-isel-abort=1 -O0 --relocation-model=pic < %s -mattr=+pauth | FileCheck %s --check-prefixes=CHECK,GISEL
+
+;; Note: for FastISel, we fall back to SelectionDAG
+
+target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
+target triple = "aarch64-unknown-linux-android"
+
+@global = external global i32
+declare void @func()
+
+define ptr @global_addr() #0 {
+ ; CHECK-LABEL: global_addr:
+ ; CHECK: adrp [[REG:x[0-9]+]], :got_auth:global
+ ; CHECK-NEXT: add [[REG]], [[REG]], :got_auth_lo12:global
+ ; CHECK-NEXT: ldr x0, [[[REG]]]
+ ; CHECK-NEXT: autda x0, [[REG]]
+ ; CHECK-NEXT: ret
+
+ ret ptr @global
+}
+
+define i32 @global_load() #0 {
+ ; CHECK-LABEL: global_load:
+ ; CHECK: adrp [[REG0:x[0-9]+]], :got_auth:global
+ ; CHECK-NEXT: add [[REG0]], [[REG0]], :got_auth_lo12:global
+ ; CHECK-NEXT: ldr [[REG1:x[0-9]+]], [[[REG0]]]
+ ; CHECK-NEXT: autda [[REG1]], [[REG0]]
+ ; CHECK-NEXT: ldr w0, [[[REG1]]]
+ ; CHECK-NEXT: ret
+ %load = load i32, ptr @global
+ ret i32 %load
+}
+
+define void @global_store() #0 {
+ ; CHECK-LABEL: global_store:
+ ; CHECK: adrp [[REG0:x[0-9]+]], :got_auth:global
+ ; CHECK-NEXT: add [[REG0]], [[REG0]], :got_auth_lo12:global
+ ; CHECK-NEXT: ldr [[REG1:x[0-9]+]], [[[REG0]]]
+ ; CHECK-NEXT: autda [[REG1]], [[REG0]]
+ ; GISEL-NEXT: str wzr, [[[REG1]]]
+ ; DAGISEL-NEXT: mov w8, wzr
+ ; DAGISEL-NEXT: str w8, [[[REG1]]]
+ ; CHECK-NEXT: ret
+ store i32 0, ptr @global
+ ret void
+}
+
+define ptr @func_addr() #0 {
+ ; CHECK-LABEL: func_addr:
+ ; CHECK: adrp [[REG:x[0-9]+]], :got_auth:func
+ ; CHECK-NEXT: add [[REG]], [[REG]], :got_auth_lo12:func
+ ; CHECK-NEXT: ldr x0, [[[REG]]]
+ ; CHECK-NEXT: autia x0, [[REG]]
+ ; CHECK-NEXT: ret
+ ret ptr @func
+}
+
+attributes #0 = { "target-features"="+tagged-globals" }
+
+!llvm.module.flags = !{!0, !1}
+!0 = !{i32 1, !"aarch64-elf-pauthabi-platform", i32 268435458}
+!1 = !{i32 1, !"aarch64-elf-pauthabi-version", i32 256}
diff --git a/llvm/test/CodeGen/AArch64/sme2-intrinsics-max.ll b/llvm/test/CodeGen/AArch64/sme2-intrinsics-max.ll
index 6e2c48f88e02..9d865b1e7447 100644
--- a/llvm/test/CodeGen/AArch64/sme2-intrinsics-max.ll
+++ b/llvm/test/CodeGen/AArch64/sme2-intrinsics-max.ll
@@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
-; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme2 -mattr=+b16b16 -force-streaming -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme2 -mattr=+sve-b16b16 -force-streaming -verify-machineinstrs < %s | FileCheck %s
; SMAX (Single, x2)
diff --git a/llvm/test/CodeGen/AArch64/sme2-intrinsics-min.ll b/llvm/test/CodeGen/AArch64/sme2-intrinsics-min.ll
index d37984596f84..575bcbc919b8 100644
--- a/llvm/test/CodeGen/AArch64/sme2-intrinsics-min.ll
+++ b/llvm/test/CodeGen/AArch64/sme2-intrinsics-min.ll
@@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
-; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme2 -mattr=+b16b16 -force-streaming -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme2 -mattr=+sve-b16b16 -force-streaming -verify-machineinstrs < %s | FileCheck %s
; SMIN (Single, x2)
diff --git a/llvm/test/CodeGen/AArch64/sms-order-physreg-deps.mir b/llvm/test/CodeGen/AArch64/sms-order-physreg-deps.mir
new file mode 100644
index 000000000000..4d8067e16b96
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/sms-order-physreg-deps.mir
@@ -0,0 +1,452 @@
+# RUN: llc --verify-machineinstrs -mtriple=aarch64 -o - %s -mcpu=a64fx -aarch64-enable-pipeliner -pipeliner-max-mii=100 -pipeliner-enable-copytophi=0 -debug-only=pipeliner -run-pass=pipeliner -treat-scalable-fixed-error-as-warning 2>&1 | FileCheck %s
+
+# REQUIRES: asserts
+
+# Verify that the order of the instructions is correct if they are scheduled in
+# the same cycle and they have physical register dependencies.
+
+# CHECK: Schedule Found? 1
+# CHECK: cycle {{[0-9]+}} (0) {{.*}} SUBS{{.*}} implicit-def $nzcv
+# CHECK-NOT: cycle {{[0-9]+}} (0) {{.*}} implicit-def {{.*}} $nzcv
+
+--- |
+ target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128-Fn32"
+
+ declare void @free(ptr allocptr nocapture noundef) local_unnamed_addr #0
+
+ define dso_local noundef i32 @main(i32 noundef %argc, ptr nocapture noundef readnone %argv) local_unnamed_addr #1 {
+ entry:
+ %ret.i.i55 = alloca ptr, align 8
+ %ret.i.i = alloca ptr, align 8
+ %0 = load ptr, ptr %ret.i.i, align 8
+ br label %vector.ph
+
+ vector.ph: ; preds = %for.inc20.i, %entry
+ %lsr.iv1 = phi i64 [ %lsr.iv.next2, %for.inc20.i ], [ 0, %entry ]
+ %indvars.iv45.i = phi i64 [ 0, %entry ], [ %indvars.iv.next46.i, %for.inc20.i ]
+ %broadcast.splatinsert = insertelement <vscale x 4 x i64> poison, i64 %indvars.iv45.i, i64 0
+ %broadcast.splat = shufflevector <vscale x 4 x i64> %broadcast.splatinsert, <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
+ br label %vector.body
+
+ vector.body: ; preds = %vector.body, %vector.ph
+ %lsr.iv3 = phi i64 [ %lsr.iv.next4, %vector.body ], [ %lsr.iv1, %vector.ph ]
+ %lsr.iv = phi i64 [ %lsr.iv.next, %vector.body ], [ 2800, %vector.ph ]
+ %vec.ind = phi <vscale x 4 x i64> [ zeroinitializer, %vector.ph ], [ %vec.ind.next.6, %vector.body ]
+ %1 = mul nuw nsw <vscale x 4 x i64> %vec.ind, %broadcast.splat
+ %2 = trunc <vscale x 4 x i64> %1 to <vscale x 4 x i32>
+ %3 = urem <vscale x 4 x i32> %2, shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 7, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
+ %4 = add nuw nsw <vscale x 4 x i32> %3, shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 1, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
+ %5 = shl nuw nsw i64 %lsr.iv3, 2
+ %scevgep16 = getelementptr i8, ptr %0, i64 %5
+ %6 = add nuw nsw <vscale x 4 x i64> %vec.ind, %broadcast.splat
+ %7 = trunc <vscale x 4 x i64> %6 to <vscale x 4 x i32>
+ %8 = urem <vscale x 4 x i32> %7, shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 13, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
+ %9 = icmp eq <vscale x 4 x i32> %8, zeroinitializer
+ %10 = urem <vscale x 4 x i32> %7, shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 7, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
+ %11 = icmp eq <vscale x 4 x i32> %10, zeroinitializer
+ %12 = or <vscale x 4 x i1> %9, %11
+ %13 = urem <vscale x 4 x i32> %7, shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 11, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
+ %14 = icmp eq <vscale x 4 x i32> %13, zeroinitializer
+ %15 = or <vscale x 4 x i1> %14, %12
+ %16 = select <vscale x 4 x i1> %15, <vscale x 4 x i32> shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 999, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer), <vscale x 4 x i32> %4
+ store <vscale x 4 x i32> %16, ptr %scevgep16, align 4
+ %vec.ind.next = add <vscale x 4 x i64> %vec.ind, shufflevector (<vscale x 4 x i64> insertelement (<vscale x 4 x i64> poison, i64 4, i64 0), <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer)
+ %17 = mul nuw nsw <vscale x 4 x i64> %vec.ind.next, %broadcast.splat
+ %18 = trunc <vscale x 4 x i64> %17 to <vscale x 4 x i32>
+ %19 = urem <vscale x 4 x i32> %18, shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 7, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
+ %20 = add nuw nsw <vscale x 4 x i32> %19, shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 1, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
+ %21 = shl nuw nsw i64 %lsr.iv3, 2
+ %scevgep14 = getelementptr i8, ptr %0, i64 %21
+ %scevgep15 = getelementptr i8, ptr %scevgep14, i64 16
+ %22 = add nuw nsw <vscale x 4 x i64> %vec.ind.next, %broadcast.splat
+ %23 = trunc <vscale x 4 x i64> %22 to <vscale x 4 x i32>
+ %24 = urem <vscale x 4 x i32> %23, shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 13, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
+ %25 = icmp eq <vscale x 4 x i32> %24, zeroinitializer
+ %26 = urem <vscale x 4 x i32> %23, shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 7, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
+ %27 = icmp eq <vscale x 4 x i32> %26, zeroinitializer
+ %28 = or <vscale x 4 x i1> %25, %27
+ %29 = urem <vscale x 4 x i32> %23, shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 11, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
+ %30 = icmp eq <vscale x 4 x i32> %29, zeroinitializer
+ %31 = or <vscale x 4 x i1> %30, %28
+ %32 = select <vscale x 4 x i1> %31, <vscale x 4 x i32> shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 999, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer), <vscale x 4 x i32> %20
+ store <vscale x 4 x i32> %32, ptr %scevgep15, align 4
+ %vec.ind.next.1 = add <vscale x 4 x i64> %vec.ind, shufflevector (<vscale x 4 x i64> insertelement (<vscale x 4 x i64> poison, i64 8, i64 0), <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer)
+ %33 = mul nuw nsw <vscale x 4 x i64> %vec.ind.next.1, %broadcast.splat
+ %34 = trunc <vscale x 4 x i64> %33 to <vscale x 4 x i32>
+ %35 = urem <vscale x 4 x i32> %34, shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 7, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
+ %36 = add nuw nsw <vscale x 4 x i32> %35, shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 1, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
+ %37 = shl nuw nsw i64 %lsr.iv3, 2
+ %scevgep12 = getelementptr i8, ptr %0, i64 %37
+ %scevgep13 = getelementptr i8, ptr %scevgep12, i64 32
+ %38 = add nuw nsw <vscale x 4 x i64> %vec.ind.next.1, %broadcast.splat
+ %39 = trunc <vscale x 4 x i64> %38 to <vscale x 4 x i32>
+ %40 = urem <vscale x 4 x i32> %39, shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 13, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
+ %41 = icmp eq <vscale x 4 x i32> %40, zeroinitializer
+ %42 = urem <vscale x 4 x i32> %39, shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 7, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
+ %43 = icmp eq <vscale x 4 x i32> %42, zeroinitializer
+ %44 = or <vscale x 4 x i1> %41, %43
+ %45 = urem <vscale x 4 x i32> %39, shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 11, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
+ %46 = icmp eq <vscale x 4 x i32> %45, zeroinitializer
+ %47 = or <vscale x 4 x i1> %46, %44
+ %48 = select <vscale x 4 x i1> %47, <vscale x 4 x i32> shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 999, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer), <vscale x 4 x i32> %36
+ store <vscale x 4 x i32> %48, ptr %scevgep13, align 4
+ %vec.ind.next.2 = add <vscale x 4 x i64> %vec.ind, shufflevector (<vscale x 4 x i64> insertelement (<vscale x 4 x i64> poison, i64 12, i64 0), <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer)
+ %49 = mul nuw nsw <vscale x 4 x i64> %vec.ind.next.2, %broadcast.splat
+ %50 = trunc <vscale x 4 x i64> %49 to <vscale x 4 x i32>
+ %51 = urem <vscale x 4 x i32> %50, shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 7, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
+ %52 = add nuw nsw <vscale x 4 x i32> %51, shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 1, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
+ %53 = shl nuw nsw i64 %lsr.iv3, 2
+ %scevgep10 = getelementptr i8, ptr %0, i64 %53
+ %scevgep11 = getelementptr i8, ptr %scevgep10, i64 48
+ %54 = add nuw nsw <vscale x 4 x i64> %vec.ind.next.2, %broadcast.splat
+ %55 = trunc <vscale x 4 x i64> %54 to <vscale x 4 x i32>
+ %56 = urem <vscale x 4 x i32> %55, shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 13, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
+ %57 = icmp eq <vscale x 4 x i32> %56, zeroinitializer
+ %58 = urem <vscale x 4 x i32> %55, shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 7, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
+ %59 = icmp eq <vscale x 4 x i32> %58, zeroinitializer
+ %60 = or <vscale x 4 x i1> %57, %59
+ %61 = urem <vscale x 4 x i32> %55, shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 11, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
+ %62 = icmp eq <vscale x 4 x i32> %61, zeroinitializer
+ %63 = or <vscale x 4 x i1> %62, %60
+ %64 = select <vscale x 4 x i1> %63, <vscale x 4 x i32> shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 999, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer), <vscale x 4 x i32> %52
+ store <vscale x 4 x i32> %64, ptr %scevgep11, align 4
+ %vec.ind.next.3 = add <vscale x 4 x i64> %vec.ind, shufflevector (<vscale x 4 x i64> insertelement (<vscale x 4 x i64> poison, i64 16, i64 0), <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer)
+ %65 = mul nuw nsw <vscale x 4 x i64> %vec.ind.next.3, %broadcast.splat
+ %66 = trunc <vscale x 4 x i64> %65 to <vscale x 4 x i32>
+ %67 = urem <vscale x 4 x i32> %66, shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 7, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
+ %68 = add nuw nsw <vscale x 4 x i32> %67, shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 1, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
+ %69 = shl nuw nsw i64 %lsr.iv3, 2
+ %scevgep8 = getelementptr i8, ptr %0, i64 %69
+ %scevgep9 = getelementptr i8, ptr %scevgep8, i64 64
+ %70 = add nuw nsw <vscale x 4 x i64> %vec.ind.next.3, %broadcast.splat
+ %71 = trunc <vscale x 4 x i64> %70 to <vscale x 4 x i32>
+ %72 = urem <vscale x 4 x i32> %71, shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 13, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
+ %73 = icmp eq <vscale x 4 x i32> %72, zeroinitializer
+ %74 = urem <vscale x 4 x i32> %71, shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 7, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
+ %75 = icmp eq <vscale x 4 x i32> %74, zeroinitializer
+ %76 = or <vscale x 4 x i1> %73, %75
+ %77 = urem <vscale x 4 x i32> %71, shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 11, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
+ %78 = icmp eq <vscale x 4 x i32> %77, zeroinitializer
+ %79 = or <vscale x 4 x i1> %78, %76
+ %80 = select <vscale x 4 x i1> %79, <vscale x 4 x i32> shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 999, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer), <vscale x 4 x i32> %68
+ store <vscale x 4 x i32> %80, ptr %scevgep9, align 4
+ %vec.ind.next.4 = add <vscale x 4 x i64> %vec.ind, shufflevector (<vscale x 4 x i64> insertelement (<vscale x 4 x i64> poison, i64 20, i64 0), <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer)
+ %81 = mul nuw nsw <vscale x 4 x i64> %vec.ind.next.4, %broadcast.splat
+ %82 = trunc <vscale x 4 x i64> %81 to <vscale x 4 x i32>
+ %83 = urem <vscale x 4 x i32> %82, shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 7, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
+ %84 = add nuw nsw <vscale x 4 x i32> %83, shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 1, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
+ %85 = shl nuw nsw i64 %lsr.iv3, 2
+ %scevgep6 = getelementptr i8, ptr %0, i64 %85
+ %scevgep7 = getelementptr i8, ptr %scevgep6, i64 80
+ %86 = add nuw nsw <vscale x 4 x i64> %vec.ind.next.4, %broadcast.splat
+ %87 = trunc <vscale x 4 x i64> %86 to <vscale x 4 x i32>
+ %88 = urem <vscale x 4 x i32> %87, shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 13, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
+ %89 = icmp eq <vscale x 4 x i32> %88, zeroinitializer
+ %90 = urem <vscale x 4 x i32> %87, shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 7, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
+ %91 = icmp eq <vscale x 4 x i32> %90, zeroinitializer
+ %92 = or <vscale x 4 x i1> %89, %91
+ %93 = urem <vscale x 4 x i32> %87, shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 11, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
+ %94 = icmp eq <vscale x 4 x i32> %93, zeroinitializer
+ %95 = or <vscale x 4 x i1> %94, %92
+ %96 = select <vscale x 4 x i1> %95, <vscale x 4 x i32> shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 999, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer), <vscale x 4 x i32> %84
+ store <vscale x 4 x i32> %96, ptr %scevgep7, align 4
+ %vec.ind.next.5 = add <vscale x 4 x i64> %vec.ind, shufflevector (<vscale x 4 x i64> insertelement (<vscale x 4 x i64> poison, i64 24, i64 0), <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer)
+ %97 = mul nuw nsw <vscale x 4 x i64> %vec.ind.next.5, %broadcast.splat
+ %98 = trunc <vscale x 4 x i64> %97 to <vscale x 4 x i32>
+ %99 = urem <vscale x 4 x i32> %98, shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 7, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
+ %100 = add nuw nsw <vscale x 4 x i32> %99, shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 1, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
+ %101 = shl nuw nsw i64 %lsr.iv3, 2
+ %scevgep = getelementptr i8, ptr %0, i64 %101
+ %scevgep5 = getelementptr i8, ptr %scevgep, i64 96
+ %102 = add nuw nsw <vscale x 4 x i64> %vec.ind.next.5, %broadcast.splat
+ %103 = trunc <vscale x 4 x i64> %102 to <vscale x 4 x i32>
+ %104 = urem <vscale x 4 x i32> %103, shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 13, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
+ %105 = icmp eq <vscale x 4 x i32> %104, zeroinitializer
+ %106 = urem <vscale x 4 x i32> %103, shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 7, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
+ %107 = icmp eq <vscale x 4 x i32> %106, zeroinitializer
+ %108 = or <vscale x 4 x i1> %105, %107
+ %109 = urem <vscale x 4 x i32> %103, shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 11, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
+ %110 = icmp eq <vscale x 4 x i32> %109, zeroinitializer
+ %111 = or <vscale x 4 x i1> %110, %108
+ %112 = select <vscale x 4 x i1> %111, <vscale x 4 x i32> shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 999, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer), <vscale x 4 x i32> %100
+ store <vscale x 4 x i32> %112, ptr %scevgep5, align 4
+ %vec.ind.next.6 = add <vscale x 4 x i64> %vec.ind, shufflevector (<vscale x 4 x i64> insertelement (<vscale x 4 x i64> poison, i64 28, i64 0), <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer)
+ %lsr.iv.next = add nsw i64 %lsr.iv, -28
+ %lsr.iv.next4 = add nuw nsw i64 %lsr.iv3, 28
+ %113 = icmp eq i64 %lsr.iv.next, 0
+ br i1 %113, label %for.inc20.i, label %vector.body
+
+ for.inc20.i: ; preds = %vector.body
+ %indvars.iv.next46.i = add nuw nsw i64 %indvars.iv45.i, 1
+ %lsr.iv.next2 = add nuw nsw i64 %lsr.iv1, 2800
+ %exitcond48.not.i = icmp eq i64 %indvars.iv.next46.i, 2800
+ br i1 %exitcond48.not.i, label %init_array.exit, label %vector.ph
+
+ init_array.exit: ; preds = %for.inc20.i
+ call void @free(ptr noundef nonnull %0)
+ ret i32 0
+ }
+
+ attributes #0 = { mustprogress nounwind willreturn allockind("free") memory(argmem: readwrite, inaccessiblemem: readwrite) "alloc-family"="malloc" "frame-pointer"="non-leaf" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="a64fx" "target-features"="+aes,+complxnum,+crc,+fp-armv8,+fullfp16,+lse,+neon,+outline-atomics,+perfmon,+ras,+rdm,+sha2,+sve,+v8.1a,+v8.2a,+v8a,-fmv" }
+ attributes #1 = { nounwind uwtable vscale_range(1,1) "frame-pointer"="non-leaf" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="a64fx" "target-features"="+aes,+complxnum,+crc,+fp-armv8,+fullfp16,+lse,+neon,+outline-atomics,+perfmon,+ras,+rdm,+sha2,+sve,+v8.1a,+v8.2a,+v8a,-fmv" }
+
+...
+---
+name: main
+tracksRegLiveness: true
+stack:
+ - { id: 0, name: ret.i.i55, size: 8, alignment: 8, local-offset: -8 }
+ - { id: 1, name: ret.i.i, size: 8, alignment: 8, local-offset: -16 }
+machineFunctionInfo: {}
+body: |
+ bb.0.entry:
+ %18:gpr64all = COPY $xzr
+ %17:gpr64all = COPY %18
+ %19:gpr64common = LDRXui %stack.1.ret.i.i, 0 :: (dereferenceable load (s64) from %ir.ret.i.i)
+ %0:gpr64common = COPY %19
+ %21:zpr = DUP_ZI_D 0, 0, implicit $vg
+ %23:gpr32 = MOVi32imm 2800
+ %24:ppr_3b = PTRUE_D 31, implicit $vg
+ %28:ppr_3b = PTRUE_S 31, implicit $vg
+ %29:gpr32common = MOVi32imm 613566757
+ %30:zpr = DUP_ZR_S %29
+ %36:zpr = DUP_ZI_S 7, 0, implicit $vg
+ %43:gpr32common = MOVi32imm -991146299
+ %44:zpr = DUP_ZR_S %43
+ %46:gpr32common = MOVi32imm 330382099
+ %47:zpr = DUP_ZR_S %46
+ %49:gpr32common = MOVi32imm -1227133513
+ %50:zpr = DUP_ZR_S %49
+ %52:gpr32common = MOVi32imm 613566756
+ %53:zpr = DUP_ZR_S %52
+ %56:gpr32common = MOVi32imm -1171354717
+ %57:zpr = DUP_ZR_S %56
+ %59:gpr32common = MOVi32imm 390451572
+ %60:zpr = DUP_ZR_S %59
+ %63:gpr32common = MOVi32imm 999
+ %64:zpr = DUP_ZR_S %63
+ %79:gpr64common = MOVi64imm 4
+ %104:gpr64common = MOVi64imm 8
+ %129:gpr64common = MOVi64imm 12
+ %154:gpr64common = MOVi64imm 16
+ %179:gpr64common = MOVi64imm 20
+ %204:gpr64common = MOVi64imm 24
+
+ bb.1.vector.ph:
+ %1:gpr64sp = PHI %17, %bb.0, %14, %bb.3
+ %2:gpr64sp = PHI %17, %bb.0, %13, %bb.3
+ %4:zpr = DUP_ZR_D %2
+ %22:zpr = COPY %21
+ %20:gpr64all = SUBREG_TO_REG 0, %23, %subreg.sub_32
+
+ bb.2.vector.body:
+ successors: %bb.3(0x04000000), %bb.2(0x7c000000)
+
+ %5:gpr64common = PHI %1, %bb.1, %12, %bb.2
+ %6:gpr64sp = PHI %20, %bb.1, %11, %bb.2
+ %7:zpr = PHI %21, %bb.1, %9, %bb.2
+ %8:zpr = PHI %22, %bb.1, %10, %bb.2
+ %25:zpr = nuw nsw MUL_ZPZZ_D_UNDEF %24, %7, %4
+ %26:zpr = nuw nsw MUL_ZPZZ_D_UNDEF %24, %8, %4
+ %27:zpr = UZP1_ZZZ_S killed %25, killed %26
+ %31:zpr = UMULH_ZPZZ_S_UNDEF %28, %27, %30
+ %32:zpr = SUB_ZZZ_S %27, %31
+ %33:zpr = LSR_ZZI_S killed %32, 1
+ %34:zpr = ADD_ZZZ_S killed %33, %31
+ %35:zpr = LSR_ZZI_S killed %34, 2
+ %37:zpr = MLS_ZPZZZ_S_UNDEF %28, %27, killed %35, %36
+ %38:zpr = nuw nsw ADD_ZI_S %37, 1, 0
+ %39:gpr64common = ADDXrs %19, %5, 2
+ %40:zpr = nuw nsw ADD_ZZZ_D %7, %4
+ %41:zpr = nuw nsw ADD_ZZZ_D %8, %4
+ %42:zpr = UZP1_ZZZ_S killed %40, killed %41
+ %45:zpr = MUL_ZPZZ_S_UNDEF %28, %42, %44
+ %48:ppr = CMPHS_PPzZZ_S %28, %47, killed %45, implicit-def dead $nzcv
+ %51:zpr = MUL_ZPZZ_S_UNDEF %28, %42, %50
+ %54:ppr = CMPHS_PPzZZ_S %28, %53, killed %51, implicit-def dead $nzcv
+ %55:ppr = SEL_PPPP %48, %48, killed %54
+ %58:zpr = MUL_ZPZZ_S_UNDEF %28, %42, %57
+ %61:ppr = CMPHS_PPzZZ_S %28, %60, killed %58, implicit-def dead $nzcv
+ %62:ppr = SEL_PPPP %61, %61, killed %55
+ %65:zpr = SEL_ZPZZ_S killed %62, %64, killed %38
+ ST1W killed %65, %28, %0, %5 :: (store (<vscale x 1 x s128>) into %ir.scevgep16, align 4)
+ %67:zpr = ADD_ZI_D %8, 4, 0
+ %68:zpr = ADD_ZI_D %7, 4, 0
+ %69:zpr = nuw nsw MUL_ZPZZ_D_UNDEF %24, %68, %4
+ %70:zpr = nuw nsw MUL_ZPZZ_D_UNDEF %24, %67, %4
+ %71:zpr = UZP1_ZZZ_S killed %69, killed %70
+ %72:zpr = UMULH_ZPZZ_S_UNDEF %28, %71, %30
+ %73:zpr = SUB_ZZZ_S %71, %72
+ %74:zpr = LSR_ZZI_S killed %73, 1
+ %75:zpr = ADD_ZZZ_S killed %74, %72
+ %76:zpr = LSR_ZZI_S killed %75, 2
+ %77:zpr = MLS_ZPZZZ_S_UNDEF %28, %71, killed %76, %36
+ %78:zpr = nuw nsw ADD_ZI_S %77, 1, 0
+ %80:zpr = nuw nsw ADD_ZZZ_D %68, %4
+ %81:zpr = nuw nsw ADD_ZZZ_D %67, %4
+ %82:zpr = UZP1_ZZZ_S killed %80, killed %81
+ %83:zpr = MUL_ZPZZ_S_UNDEF %28, %82, %44
+ %84:ppr = CMPHS_PPzZZ_S %28, %47, killed %83, implicit-def dead $nzcv
+ %85:zpr = MUL_ZPZZ_S_UNDEF %28, %82, %50
+ %86:ppr = CMPHS_PPzZZ_S %28, %53, killed %85, implicit-def dead $nzcv
+ %87:ppr = SEL_PPPP %84, %84, killed %86
+ %88:zpr = MUL_ZPZZ_S_UNDEF %28, %82, %57
+ %89:ppr = CMPHS_PPzZZ_S %28, %60, killed %88, implicit-def dead $nzcv
+ %90:ppr = SEL_PPPP %89, %89, killed %87
+ %91:zpr = SEL_ZPZZ_S killed %90, %64, killed %78
+ ST1W killed %91, %28, %39, %79 :: (store (<vscale x 1 x s128>) into %ir.scevgep15, align 4)
+ %92:zpr = ADD_ZI_D %8, 8, 0
+ %93:zpr = ADD_ZI_D %7, 8, 0
+ %94:zpr = nuw nsw MUL_ZPZZ_D_UNDEF %24, %93, %4
+ %95:zpr = nuw nsw MUL_ZPZZ_D_UNDEF %24, %92, %4
+ %96:zpr = UZP1_ZZZ_S killed %94, killed %95
+ %97:zpr = UMULH_ZPZZ_S_UNDEF %28, %96, %30
+ %98:zpr = SUB_ZZZ_S %96, %97
+ %99:zpr = LSR_ZZI_S killed %98, 1
+ %100:zpr = ADD_ZZZ_S killed %99, %97
+ %101:zpr = LSR_ZZI_S killed %100, 2
+ %102:zpr = MLS_ZPZZZ_S_UNDEF %28, %96, killed %101, %36
+ %103:zpr = nuw nsw ADD_ZI_S %102, 1, 0
+ %105:zpr = nuw nsw ADD_ZZZ_D %93, %4
+ %106:zpr = nuw nsw ADD_ZZZ_D %92, %4
+ %107:zpr = UZP1_ZZZ_S killed %105, killed %106
+ %108:zpr = MUL_ZPZZ_S_UNDEF %28, %107, %44
+ %109:ppr = CMPHS_PPzZZ_S %28, %47, killed %108, implicit-def dead $nzcv
+ %110:zpr = MUL_ZPZZ_S_UNDEF %28, %107, %50
+ %111:ppr = CMPHS_PPzZZ_S %28, %53, killed %110, implicit-def dead $nzcv
+ %112:ppr = SEL_PPPP %109, %109, killed %111
+ %113:zpr = MUL_ZPZZ_S_UNDEF %28, %107, %57
+ %114:ppr = CMPHS_PPzZZ_S %28, %60, killed %113, implicit-def dead $nzcv
+ %115:ppr = SEL_PPPP %114, %114, killed %112
+ %116:zpr = SEL_ZPZZ_S killed %115, %64, killed %103
+ ST1W killed %116, %28, %39, %104 :: (store (<vscale x 1 x s128>) into %ir.scevgep13, align 4)
+ %117:zpr = ADD_ZI_D %8, 12, 0
+ %118:zpr = ADD_ZI_D %7, 12, 0
+ %119:zpr = nuw nsw MUL_ZPZZ_D_UNDEF %24, %118, %4
+ %120:zpr = nuw nsw MUL_ZPZZ_D_UNDEF %24, %117, %4
+ %121:zpr = UZP1_ZZZ_S killed %119, killed %120
+ %122:zpr = UMULH_ZPZZ_S_UNDEF %28, %121, %30
+ %123:zpr = SUB_ZZZ_S %121, %122
+ %124:zpr = LSR_ZZI_S killed %123, 1
+ %125:zpr = ADD_ZZZ_S killed %124, %122
+ %126:zpr = LSR_ZZI_S killed %125, 2
+ %127:zpr = MLS_ZPZZZ_S_UNDEF %28, %121, killed %126, %36
+ %128:zpr = nuw nsw ADD_ZI_S %127, 1, 0
+ %130:zpr = nuw nsw ADD_ZZZ_D %118, %4
+ %131:zpr = nuw nsw ADD_ZZZ_D %117, %4
+ %132:zpr = UZP1_ZZZ_S killed %130, killed %131
+ %133:zpr = MUL_ZPZZ_S_UNDEF %28, %132, %44
+ %134:ppr = CMPHS_PPzZZ_S %28, %47, killed %133, implicit-def dead $nzcv
+ %135:zpr = MUL_ZPZZ_S_UNDEF %28, %132, %50
+ %136:ppr = CMPHS_PPzZZ_S %28, %53, killed %135, implicit-def dead $nzcv
+ %137:ppr = SEL_PPPP %134, %134, killed %136
+ %138:zpr = MUL_ZPZZ_S_UNDEF %28, %132, %57
+ %139:ppr = CMPHS_PPzZZ_S %28, %60, killed %138, implicit-def dead $nzcv
+ %140:ppr = SEL_PPPP %139, %139, killed %137
+ %141:zpr = SEL_ZPZZ_S killed %140, %64, killed %128
+ ST1W killed %141, %28, %39, %129 :: (store (<vscale x 1 x s128>) into %ir.scevgep11, align 4)
+ %142:zpr = ADD_ZI_D %8, 16, 0
+ %143:zpr = ADD_ZI_D %7, 16, 0
+ %144:zpr = nuw nsw MUL_ZPZZ_D_UNDEF %24, %143, %4
+ %145:zpr = nuw nsw MUL_ZPZZ_D_UNDEF %24, %142, %4
+ %146:zpr = UZP1_ZZZ_S killed %144, killed %145
+ %147:zpr = UMULH_ZPZZ_S_UNDEF %28, %146, %30
+ %148:zpr = SUB_ZZZ_S %146, %147
+ %149:zpr = LSR_ZZI_S killed %148, 1
+ %150:zpr = ADD_ZZZ_S killed %149, %147
+ %151:zpr = LSR_ZZI_S killed %150, 2
+ %152:zpr = MLS_ZPZZZ_S_UNDEF %28, %146, killed %151, %36
+ %153:zpr = nuw nsw ADD_ZI_S %152, 1, 0
+ %155:zpr = nuw nsw ADD_ZZZ_D %143, %4
+ %156:zpr = nuw nsw ADD_ZZZ_D %142, %4
+ %157:zpr = UZP1_ZZZ_S killed %155, killed %156
+ %158:zpr = MUL_ZPZZ_S_UNDEF %28, %157, %44
+ %159:ppr = CMPHS_PPzZZ_S %28, %47, killed %158, implicit-def dead $nzcv
+ %160:zpr = MUL_ZPZZ_S_UNDEF %28, %157, %50
+ %161:ppr = CMPHS_PPzZZ_S %28, %53, killed %160, implicit-def dead $nzcv
+ %162:ppr = SEL_PPPP %159, %159, killed %161
+ %163:zpr = MUL_ZPZZ_S_UNDEF %28, %157, %57
+ %164:ppr = CMPHS_PPzZZ_S %28, %60, killed %163, implicit-def dead $nzcv
+ %165:ppr = SEL_PPPP %164, %164, killed %162
+ %166:zpr = SEL_ZPZZ_S killed %165, %64, killed %153
+ ST1W killed %166, %28, %39, %154 :: (store (<vscale x 1 x s128>) into %ir.scevgep9, align 4)
+ %167:zpr = ADD_ZI_D %8, 20, 0
+ %168:zpr = ADD_ZI_D %7, 20, 0
+ %169:zpr = nuw nsw MUL_ZPZZ_D_UNDEF %24, %168, %4
+ %170:zpr = nuw nsw MUL_ZPZZ_D_UNDEF %24, %167, %4
+ %171:zpr = UZP1_ZZZ_S killed %169, killed %170
+ %172:zpr = UMULH_ZPZZ_S_UNDEF %28, %171, %30
+ %173:zpr = SUB_ZZZ_S %171, %172
+ %174:zpr = LSR_ZZI_S killed %173, 1
+ %175:zpr = ADD_ZZZ_S killed %174, %172
+ %176:zpr = LSR_ZZI_S killed %175, 2
+ %177:zpr = MLS_ZPZZZ_S_UNDEF %28, %171, killed %176, %36
+ %178:zpr = nuw nsw ADD_ZI_S %177, 1, 0
+ %180:zpr = nuw nsw ADD_ZZZ_D %168, %4
+ %181:zpr = nuw nsw ADD_ZZZ_D %167, %4
+ %182:zpr = UZP1_ZZZ_S killed %180, killed %181
+ %183:zpr = MUL_ZPZZ_S_UNDEF %28, %182, %44
+ %184:ppr = CMPHS_PPzZZ_S %28, %47, killed %183, implicit-def dead $nzcv
+ %185:zpr = MUL_ZPZZ_S_UNDEF %28, %182, %50
+ %186:ppr = CMPHS_PPzZZ_S %28, %53, killed %185, implicit-def dead $nzcv
+ %187:ppr = SEL_PPPP %184, %184, killed %186
+ %188:zpr = MUL_ZPZZ_S_UNDEF %28, %182, %57
+ %189:ppr = CMPHS_PPzZZ_S %28, %60, killed %188, implicit-def dead $nzcv
+ %190:ppr = SEL_PPPP %189, %189, killed %187
+ %191:zpr = SEL_ZPZZ_S killed %190, %64, killed %178
+ ST1W killed %191, %28, %39, %179 :: (store (<vscale x 1 x s128>) into %ir.scevgep7, align 4)
+ %192:zpr = ADD_ZI_D %8, 24, 0
+ %193:zpr = ADD_ZI_D %7, 24, 0
+ %194:zpr = nuw nsw MUL_ZPZZ_D_UNDEF %24, %193, %4
+ %195:zpr = nuw nsw MUL_ZPZZ_D_UNDEF %24, %192, %4
+ %196:zpr = UZP1_ZZZ_S killed %194, killed %195
+ %197:zpr = UMULH_ZPZZ_S_UNDEF %28, %196, %30
+ %198:zpr = SUB_ZZZ_S %196, %197
+ %199:zpr = LSR_ZZI_S killed %198, 1
+ %200:zpr = ADD_ZZZ_S killed %199, %197
+ %201:zpr = LSR_ZZI_S killed %200, 2
+ %202:zpr = MLS_ZPZZZ_S_UNDEF %28, %196, killed %201, %36
+ %203:zpr = nuw nsw ADD_ZI_S %202, 1, 0
+ %205:zpr = nuw nsw ADD_ZZZ_D %193, %4
+ %206:zpr = nuw nsw ADD_ZZZ_D %192, %4
+ %207:zpr = UZP1_ZZZ_S killed %205, killed %206
+ %208:zpr = MUL_ZPZZ_S_UNDEF %28, %207, %44
+ %209:ppr = CMPHS_PPzZZ_S %28, %47, killed %208, implicit-def dead $nzcv
+ %210:zpr = MUL_ZPZZ_S_UNDEF %28, %207, %50
+ %211:ppr = CMPHS_PPzZZ_S %28, %53, killed %210, implicit-def dead $nzcv
+ %212:ppr = SEL_PPPP %209, %209, killed %211
+ %213:zpr = MUL_ZPZZ_S_UNDEF %28, %207, %57
+ %214:ppr = CMPHS_PPzZZ_S %28, %60, killed %213, implicit-def dead $nzcv
+ %215:ppr = SEL_PPPP %214, %214, killed %212
+ %216:zpr = SEL_ZPZZ_S killed %215, %64, killed %203
+ ST1W killed %216, %28, %39, %204 :: (store (<vscale x 1 x s128>) into %ir.scevgep5, align 4)
+ %9:zpr = ADD_ZI_D %7, 28, 0
+ %10:zpr = ADD_ZI_D %8, 28, 0
+ %217:gpr64 = nsw SUBSXri %6, 28, 0, implicit-def $nzcv
+ %11:gpr64all = COPY %217
+ %218:gpr64sp = nuw nsw ADDXri %5, 28, 0
+ %12:gpr64all = COPY %218
+ Bcc 1, %bb.2, implicit $nzcv
+ B %bb.3
+
+ bb.3.for.inc20.i:
+ successors: %bb.4(0x04000000), %bb.1(0x7c000000)
+
+ %219:gpr64sp = nuw nsw ADDXri %2, 1, 0
+ %13:gpr64all = COPY %219
+ %220:gpr64sp = nuw nsw ADDXri %1, 2800, 0
+ %14:gpr64all = COPY %220
+ dead $xzr = SUBSXri %219, 2800, 0, implicit-def $nzcv
+ Bcc 1, %bb.1, implicit $nzcv
+ B %bb.4
+
+ bb.4.init_array.exit:
+ ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp
+ $x0 = COPY %0
+ BL @free, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit $x0, implicit-def $sp
+ ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp
+ %222:gpr32all = COPY $wzr
+ $w0 = COPY %222
+ RET_ReallyLR implicit $w0
+
+...
diff --git a/llvm/test/CodeGen/AArch64/srem-seteq-vec-splat.ll b/llvm/test/CodeGen/AArch64/srem-seteq-vec-splat.ll
index c0c0ae5c9d1f..03d40fc61f0c 100644
--- a/llvm/test/CodeGen/AArch64/srem-seteq-vec-splat.ll
+++ b/llvm/test/CodeGen/AArch64/srem-seteq-vec-splat.ll
@@ -206,7 +206,7 @@ define <4 x i32> @test_srem_int_min(<4 x i32> %X) nounwind {
; CHECK-NEXT: movi v1.4s, #128, lsl #24
; CHECK-NEXT: usra v3.4s, v2.4s, #1
; CHECK-NEXT: and v1.16b, v3.16b, v1.16b
-; CHECK-NEXT: add v0.4s, v1.4s, v0.4s
+; CHECK-NEXT: add v0.4s, v0.4s, v1.4s
; CHECK-NEXT: movi v1.4s, #1
; CHECK-NEXT: cmeq v0.4s, v0.4s, #0
; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
diff --git a/llvm/test/CodeGen/AArch64/ssve-stack-hazard-remarks.ll b/llvm/test/CodeGen/AArch64/ssve-stack-hazard-remarks.ll
new file mode 100644
index 000000000000..0b6bf3892a0c
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/ssve-stack-hazard-remarks.ll
@@ -0,0 +1,152 @@
+; RUN: llc < %s -mtriple=aarch64 -mattr=+sve2 -pass-remarks-analysis=sme -aarch64-stack-hazard-remark-size=64 -o /dev/null < %s 2>&1 | FileCheck %s --check-prefixes=CHECK
+; RUN: llc < %s -mtriple=aarch64 -mattr=+sve2 -pass-remarks-analysis=sme -aarch64-stack-hazard-size=1024 -o /dev/null < %s 2>&1 | FileCheck %s --check-prefixes=CHECK-PADDING
+
+; Don't emit remarks for non-streaming functions.
+define float @csr_x20_stackargs_notsc(float %a, float %b, float %c, float %d, float %e, float %f, float %g, float %h, float %i) {
+; CHECK-NOT: remark: <unknown>:0:0: stack hazard in 'csr_x20_stackargs_notsc':
+; CHECK-PADDING-NOT: remark: <unknown>:0:0: stack hazard in 'csr_x20_stackargs_notsc':
+entry:
+ tail call void asm sideeffect "", "~{x20}"() #1
+ ret float %i
+}
+
+; Don't emit remarks for functions that only access GPR stack objects.
+define i64 @stackargs_gpr(i64 %a, i64 %b, i64 %c, i64 %d, i64 %e, i64 %f, i64 %g, i64 %h, i64 %i) #2 {
+; CHECK-NOT: remark: <unknown>:0:0: stack hazard in 'csr_x20_stackargs_gpr':
+; CHECK-PADDING-NOT: remark: <unknown>:0:0: stack hazard in 'csr_x20_stackargs_gpr':
+entry:
+ ret i64 %i
+}
+
+; Don't emit remarks for functions that only access FPR stack objects.
+define double @stackargs_fpr(double %a, double %b, double %c, double %d, double %e, double %f, double %g, double %h, double %i) #2 {
+; CHECK-NOT: remark: <unknown>:0:0: stack hazard in 'csr_x20_stackargs_fpr':
+; CHECK-PADDING-NOT: remark: <unknown>:0:0: stack hazard in 'csr_x20_stackargs_fpr':
+entry:
+ ret double %i
+}
+
+; As this case is handled by addition of stack hazard padding, only emit remarks when this is not switched on.
+define i32 @csr_d8_alloci64(i64 %d) #2 {
+; CHECK: remark: <unknown>:0:0: stack hazard in 'csr_d8_alloci64': FPR stack object at [SP-16] is too close to GPR stack object at [SP-8]
+; CHECK-PADDING-NOT: remark: <unknown>:0:0: stack hazard in 'csr_d8_alloci64':
+entry:
+ %a = alloca i64
+ tail call void asm sideeffect "", "~{d8}"() #1
+ store i64 %d, ptr %a
+ ret i32 0
+}
+
+; As this case is handled by addition of stack hazard padding, only emit remarks when this is not switched on.
+define i32 @csr_d8_allocnxv4i32(i64 %d) #2 {
+; CHECK: remark: <unknown>:0:0: stack hazard in 'csr_d8_allocnxv4i32': FPR stack object at [SP-16] is too close to GPR stack object at [SP-8]
+; CHECK-PADDING-NOT: remark: <unknown>:0:0: stack hazard in 'csr_d8_allocnxv4i32':
+entry:
+ %a = alloca <vscale x 4 x i32>
+ tail call void asm sideeffect "", "~{d8}"() #1
+ store <vscale x 4 x i32> zeroinitializer, ptr %a
+ ret i32 0
+}
+
+define float @csr_x20_stackargs(float %a, float %b, float %c, float %d, float %e, float %f, float %g, float %h, float %i) #2 {
+; CHECK: remark: <unknown>:0:0: stack hazard in 'csr_x20_stackargs': GPR stack object at [SP-16] is too close to FPR stack object at [SP+0]
+; CHECK-PADDING: remark: <unknown>:0:0: stack hazard in 'csr_x20_stackargs': GPR stack object at [SP-16] is too close to FPR stack object at [SP+0]
+entry:
+ tail call void asm sideeffect "", "~{x20}"() #1
+ ret float %i
+}
+
+; In this case, addition of stack hazard padding triggers x29 (fp) spill, so we hazard occurs between FPR argument and GPR spill.
+define float @csr_d8_stackargs(float %a, float %b, float %c, float %d, float %e, float %f, float %g, float %h, float %i) #2 {
+; CHECK-NOT: remark: <unknown>:0:0: stack hazard in 'csr_d8_stackargs':
+; CHECK-PADDING: remark: <unknown>:0:0: stack hazard in 'csr_d8_stackargs': GPR stack object at [SP-8] is too close to FPR stack object at [SP+0]
+entry:
+ tail call void asm sideeffect "", "~{d8}"() #1
+ ret float %i
+}
+
+; SVE calling conventions
+; Predicate register spills end up in FP region, currently.
+
+define i32 @svecc_call(<4 x i16> %P0, ptr %P1, i32 %P2, <vscale x 16 x i8> %P3, i16 %P4) #2 {
+; CHECK: remark: <unknown>:0:0: stack hazard in 'svecc_call': PPR stack object at [SP-48-258 * vscale] is too close to FPR stack object at [SP-48-256 * vscale]
+; CHECK: remark: <unknown>:0:0: stack hazard in 'svecc_call': FPR stack object at [SP-48-16 * vscale] is too close to GPR stack object at [SP-48]
+; CHECK-PADDING: remark: <unknown>:0:0: stack hazard in 'svecc_call': PPR stack object at [SP-1072-258 * vscale] is too close to FPR stack object at [SP-1072-256 * vscale]
+; CHECK-PADDING-NOT: remark: <unknown>:0:0: stack hazard in 'svecc_call':
+entry:
+ tail call void asm sideeffect "", "~{x0},~{x28},~{x27},~{x3}"() #2
+ %call = call ptr @memset(ptr noundef nonnull %P1, i32 noundef 45, i32 noundef 37)
+ ret i32 -396142473
+}
+
+define i32 @svecc_alloca_call(<4 x i16> %P0, ptr %P1, i32 %P2, <vscale x 16 x i8> %P3, i16 %P4) #2 {
+; CHECK: remark: <unknown>:0:0: stack hazard in 'svecc_alloca_call': PPR stack object at [SP-48-258 * vscale] is too close to FPR stack object at [SP-48-256 * vscale]
+; CHECK: remark: <unknown>:0:0: stack hazard in 'svecc_alloca_call': FPR stack object at [SP-48-16 * vscale] is too close to GPR stack object at [SP-48]
+; CHECK-PADDING: remark: <unknown>:0:0: stack hazard in 'svecc_alloca_call': PPR stack object at [SP-1072-258 * vscale] is too close to FPR stack object at [SP-1072-256 * vscale]
+; CHECK-PADDING-NOT: remark: <unknown>:0:0: stack hazard in 'svecc_alloca_call':
+entry:
+ tail call void asm sideeffect "", "~{x0},~{x28},~{x27},~{x3}"() #2
+ %0 = alloca [37 x i8], align 16
+ %call = call ptr @memset(ptr noundef nonnull %0, i32 noundef 45, i32 noundef 37)
+ ret i32 -396142473
+}
+declare ptr @memset(ptr, i32, i32)
+
+%struct.mixed_struct = type { i32, float }
+
+define i32 @mixed_stack_object(i32 %a, float %b) #2 {
+; CHECK: remark: <unknown>:0:0: stack hazard in 'mixed_stack_object': Mixed stack object at [SP-8] accessed by both GP and FP instructions
+; CHECK-PADDING: remark: <unknown>:0:0: stack hazard in 'mixed_stack_object': Mixed stack object at [SP-8] accessed by both GP and FP instructions
+entry:
+ %s = alloca %struct.mixed_struct
+ %s.i = getelementptr %struct.mixed_struct, ptr %s, i32 0, i32 0
+ %s.f = getelementptr %struct.mixed_struct, ptr %s, i32 0, i32 1
+ store i32 %a, ptr %s.i
+ store float %b, ptr %s.f
+ ret i32 %a
+}
+
+define i32 @mixed_stack_objects(i32 %a, float %b) #2 {
+; CHECK: remark: <unknown>:0:0: stack hazard in 'mixed_stack_objects': Mixed stack object at [SP-16] is too close to Mixed stack object at [SP-8]
+; CHECK: remark: <unknown>:0:0: stack hazard in 'mixed_stack_objects': Mixed stack object at [SP-16] accessed by both GP and FP instructions
+; CHECK: remark: <unknown>:0:0: stack hazard in 'mixed_stack_objects': Mixed stack object at [SP-8] accessed by both GP and FP instructions
+; CHECK-PADDING: remark: <unknown>:0:0: stack hazard in 'mixed_stack_objects': Mixed stack object at [SP-16] is too close to Mixed stack object at [SP-8]
+; CHECK-PADDING: remark: <unknown>:0:0: stack hazard in 'mixed_stack_objects': Mixed stack object at [SP-16] accessed by both GP and FP instructions
+; CHECK-PADDING: remark: <unknown>:0:0: stack hazard in 'mixed_stack_objects': Mixed stack object at [SP-8] accessed by both GP and FP instructions
+entry:
+ %s0 = alloca %struct.mixed_struct
+ %s0.i = getelementptr %struct.mixed_struct, ptr %s0, i32 0, i32 0
+ %s0.f = getelementptr %struct.mixed_struct, ptr %s0, i32 0, i32 1
+ store i32 %a, ptr %s0.i
+ store float %b, ptr %s0.f
+
+ %s1 = alloca %struct.mixed_struct
+ %s1.i = getelementptr %struct.mixed_struct, ptr %s1, i32 0, i32 0
+ %s1.f = getelementptr %struct.mixed_struct, ptr %s1, i32 0, i32 1
+ store i32 %a, ptr %s1.i
+ store float %b, ptr %s1.f
+
+ ret i32 %a
+}
+
+; VLA-area stack objects are not separated.
+define i32 @csr_d8_allocnxv4i32i32f64_vlai32f64(double %d, i32 %i) #2 {
+; CHECK: remark: <unknown>:0:0: stack hazard in 'csr_d8_allocnxv4i32i32f64_vlai32f64': GPR stack object at [SP-48-16 * vscale] is too close to FPR stack object at [SP-48-16 * vscale]
+; CHECK: remark: <unknown>:0:0: stack hazard in 'csr_d8_allocnxv4i32i32f64_vlai32f64': FPR stack object at [SP-32] is too close to GPR stack object at [SP-24]
+; CHECK-PADDING: remark: <unknown>:0:0: stack hazard in 'csr_d8_allocnxv4i32i32f64_vlai32f64': GPR stack object at [SP-2096-16 * vscale] is too close to FPR stack object at [SP-2096-16 * vscale]
+; CHECK-PADDING-NOT: remark: <unknown>:0:0: stack hazard in 'csr_d8_allocnxv4i32i32f64_vlai32f64':
+entry:
+ %a = alloca <vscale x 4 x i32>
+ %0 = zext i32 %i to i64
+ %vla0 = alloca i32, i64 %0
+ %vla1 = alloca double, i64 %0
+ %c = alloca double
+ tail call void asm sideeffect "", "~{d8}"() #1
+ store <vscale x 4 x i32> zeroinitializer, ptr %a
+ store i32 zeroinitializer, ptr %vla0
+ store double %d, ptr %vla1
+ store double %d, ptr %c
+ ret i32 0
+}
+
+attributes #2 = { "aarch64_pstate_sm_compatible" }
diff --git a/llvm/test/CodeGen/AArch64/sve-pred-selectop2.ll b/llvm/test/CodeGen/AArch64/sve-pred-selectop2.ll
index 2541910e080e..adbdee0eb084 100644
--- a/llvm/test/CodeGen/AArch64/sve-pred-selectop2.ll
+++ b/llvm/test/CodeGen/AArch64/sve-pred-selectop2.ll
@@ -1505,9 +1505,9 @@ define <vscale x 2 x i64> @sub_nxv2i64_y(<vscale x 2 x i64> %x, <vscale x 2 x i6
; CHECK-LABEL: sub_nxv2i64_y:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.d
-; CHECK-NEXT: sub z0.d, z0.d, z1.d
; CHECK-NEXT: cmpgt p0.d, p0/z, z2.d, #0
-; CHECK-NEXT: sel z0.d, p0, z0.d, z1.d
+; CHECK-NEXT: subr z1.d, p0/m, z1.d, z0.d
+; CHECK-NEXT: mov z0.d, z1.d
; CHECK-NEXT: ret
entry:
%c = icmp sgt <vscale x 2 x i64> %n, zeroinitializer
@@ -1520,9 +1520,9 @@ define <vscale x 4 x i32> @sub_nxv4i32_y(<vscale x 4 x i32> %x, <vscale x 4 x i3
; CHECK-LABEL: sub_nxv4i32_y:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.s
-; CHECK-NEXT: sub z0.s, z0.s, z1.s
; CHECK-NEXT: cmpgt p0.s, p0/z, z2.s, #0
-; CHECK-NEXT: sel z0.s, p0, z0.s, z1.s
+; CHECK-NEXT: subr z1.s, p0/m, z1.s, z0.s
+; CHECK-NEXT: mov z0.d, z1.d
; CHECK-NEXT: ret
entry:
%c = icmp sgt <vscale x 4 x i32> %n, zeroinitializer
@@ -1535,9 +1535,9 @@ define <vscale x 8 x i16> @sub_nxv8i16_y(<vscale x 8 x i16> %x, <vscale x 8 x i1
; CHECK-LABEL: sub_nxv8i16_y:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.h
-; CHECK-NEXT: sub z0.h, z0.h, z1.h
; CHECK-NEXT: cmpgt p0.h, p0/z, z2.h, #0
-; CHECK-NEXT: sel z0.h, p0, z0.h, z1.h
+; CHECK-NEXT: subr z1.h, p0/m, z1.h, z0.h
+; CHECK-NEXT: mov z0.d, z1.d
; CHECK-NEXT: ret
entry:
%c = icmp sgt <vscale x 8 x i16> %n, zeroinitializer
@@ -1550,9 +1550,9 @@ define <vscale x 16 x i8> @sub_nxv16i8_y(<vscale x 16 x i8> %x, <vscale x 16 x i
; CHECK-LABEL: sub_nxv16i8_y:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.b
-; CHECK-NEXT: sub z0.b, z0.b, z1.b
; CHECK-NEXT: cmpgt p0.b, p0/z, z2.b, #0
-; CHECK-NEXT: sel z0.b, p0, z0.b, z1.b
+; CHECK-NEXT: subr z1.b, p0/m, z1.b, z0.b
+; CHECK-NEXT: mov z0.d, z1.d
; CHECK-NEXT: ret
entry:
%c = icmp sgt <vscale x 16 x i8> %n, zeroinitializer
@@ -2517,10 +2517,10 @@ define <vscale x 4 x float> @fsub_nxv4f32_y(<vscale x 4 x float> %x, <vscale x 4
; CHECK-LABEL: fsub_nxv4f32_y:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.s
-; CHECK-NEXT: fsub z0.s, z0.s, z1.s
; CHECK-NEXT: fcmle p1.s, p0/z, z2.s, #0.0
; CHECK-NEXT: not p0.b, p0/z, p1.b
-; CHECK-NEXT: sel z0.s, p0, z0.s, z1.s
+; CHECK-NEXT: fsubr z1.s, p0/m, z1.s, z0.s
+; CHECK-NEXT: mov z0.d, z1.d
; CHECK-NEXT: ret
entry:
%c = fcmp ugt <vscale x 4 x float> %n, zeroinitializer
@@ -2533,10 +2533,10 @@ define <vscale x 8 x half> @fsub_nxv8f16_y(<vscale x 8 x half> %x, <vscale x 8 x
; CHECK-LABEL: fsub_nxv8f16_y:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.h
-; CHECK-NEXT: fsub z0.h, z0.h, z1.h
; CHECK-NEXT: fcmle p1.h, p0/z, z2.h, #0.0
; CHECK-NEXT: not p0.b, p0/z, p1.b
-; CHECK-NEXT: sel z0.h, p0, z0.h, z1.h
+; CHECK-NEXT: fsubr z1.h, p0/m, z1.h, z0.h
+; CHECK-NEXT: mov z0.d, z1.d
; CHECK-NEXT: ret
entry:
%c = fcmp ugt <vscale x 8 x half> %n, zeroinitializer
@@ -2549,10 +2549,10 @@ define <vscale x 2 x double> @fsub_nxv2f64_y(<vscale x 2 x double> %x, <vscale x
; CHECK-LABEL: fsub_nxv2f64_y:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.d
-; CHECK-NEXT: fsub z0.d, z0.d, z1.d
; CHECK-NEXT: fcmle p1.d, p0/z, z2.d, #0.0
; CHECK-NEXT: not p0.b, p0/z, p1.b
-; CHECK-NEXT: sel z0.d, p0, z0.d, z1.d
+; CHECK-NEXT: fsubr z1.d, p0/m, z1.d, z0.d
+; CHECK-NEXT: mov z0.d, z1.d
; CHECK-NEXT: ret
entry:
%c = fcmp ugt <vscale x 2 x double> %n, zeroinitializer
diff --git a/llvm/test/CodeGen/AArch64/sve-pred-selectop3.ll b/llvm/test/CodeGen/AArch64/sve-pred-selectop3.ll
index bafd5abcc7b2..6607f9c3b368 100644
--- a/llvm/test/CodeGen/AArch64/sve-pred-selectop3.ll
+++ b/llvm/test/CodeGen/AArch64/sve-pred-selectop3.ll
@@ -932,9 +932,9 @@ define <vscale x 2 x i64> @sub_nxv2i64_y(<vscale x 2 x i64> %x, <vscale x 2 x i6
; CHECK-LABEL: sub_nxv2i64_y:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.d
-; CHECK-NEXT: sub z0.d, z0.d, z1.d
; CHECK-NEXT: cmpgt p0.d, p0/z, z2.d, #0
-; CHECK-NEXT: sel z0.d, p0, z0.d, z1.d
+; CHECK-NEXT: subr z1.d, p0/m, z1.d, z0.d
+; CHECK-NEXT: mov z0.d, z1.d
; CHECK-NEXT: ret
entry:
%c = icmp sgt <vscale x 2 x i64> %n, zeroinitializer
@@ -947,9 +947,9 @@ define <vscale x 4 x i32> @sub_nxv4i32_y(<vscale x 4 x i32> %x, <vscale x 4 x i3
; CHECK-LABEL: sub_nxv4i32_y:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.s
-; CHECK-NEXT: sub z0.s, z0.s, z1.s
; CHECK-NEXT: cmpgt p0.s, p0/z, z2.s, #0
-; CHECK-NEXT: sel z0.s, p0, z0.s, z1.s
+; CHECK-NEXT: subr z1.s, p0/m, z1.s, z0.s
+; CHECK-NEXT: mov z0.d, z1.d
; CHECK-NEXT: ret
entry:
%c = icmp sgt <vscale x 4 x i32> %n, zeroinitializer
@@ -962,9 +962,9 @@ define <vscale x 8 x i16> @sub_nxv8i16_y(<vscale x 8 x i16> %x, <vscale x 8 x i1
; CHECK-LABEL: sub_nxv8i16_y:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.h
-; CHECK-NEXT: sub z0.h, z0.h, z1.h
; CHECK-NEXT: cmpgt p0.h, p0/z, z2.h, #0
-; CHECK-NEXT: sel z0.h, p0, z0.h, z1.h
+; CHECK-NEXT: subr z1.h, p0/m, z1.h, z0.h
+; CHECK-NEXT: mov z0.d, z1.d
; CHECK-NEXT: ret
entry:
%c = icmp sgt <vscale x 8 x i16> %n, zeroinitializer
@@ -977,9 +977,9 @@ define <vscale x 16 x i8> @sub_nxv16i8_y(<vscale x 16 x i8> %x, <vscale x 16 x i
; CHECK-LABEL: sub_nxv16i8_y:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.b
-; CHECK-NEXT: sub z0.b, z0.b, z1.b
; CHECK-NEXT: cmpgt p0.b, p0/z, z2.b, #0
-; CHECK-NEXT: sel z0.b, p0, z0.b, z1.b
+; CHECK-NEXT: subr z1.b, p0/m, z1.b, z0.b
+; CHECK-NEXT: mov z0.d, z1.d
; CHECK-NEXT: ret
entry:
%c = icmp sgt <vscale x 16 x i8> %n, zeroinitializer
@@ -1588,10 +1588,10 @@ define <vscale x 4 x float> @fsub_nxv4f32_y(<vscale x 4 x float> %x, <vscale x 4
; CHECK-LABEL: fsub_nxv4f32_y:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.s
-; CHECK-NEXT: fsub z0.s, z0.s, z1.s
; CHECK-NEXT: fcmle p1.s, p0/z, z2.s, #0.0
; CHECK-NEXT: not p0.b, p0/z, p1.b
-; CHECK-NEXT: sel z0.s, p0, z0.s, z1.s
+; CHECK-NEXT: fsubr z1.s, p0/m, z1.s, z0.s
+; CHECK-NEXT: mov z0.d, z1.d
; CHECK-NEXT: ret
entry:
%c = fcmp ugt <vscale x 4 x float> %n, zeroinitializer
@@ -1604,10 +1604,10 @@ define <vscale x 8 x half> @fsub_nxv8f16_y(<vscale x 8 x half> %x, <vscale x 8 x
; CHECK-LABEL: fsub_nxv8f16_y:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.h
-; CHECK-NEXT: fsub z0.h, z0.h, z1.h
; CHECK-NEXT: fcmle p1.h, p0/z, z2.h, #0.0
; CHECK-NEXT: not p0.b, p0/z, p1.b
-; CHECK-NEXT: sel z0.h, p0, z0.h, z1.h
+; CHECK-NEXT: fsubr z1.h, p0/m, z1.h, z0.h
+; CHECK-NEXT: mov z0.d, z1.d
; CHECK-NEXT: ret
entry:
%c = fcmp ugt <vscale x 8 x half> %n, zeroinitializer
@@ -1620,10 +1620,10 @@ define <vscale x 2 x double> @fsub_nxv2f64_y(<vscale x 2 x double> %x, <vscale x
; CHECK-LABEL: fsub_nxv2f64_y:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.d
-; CHECK-NEXT: fsub z0.d, z0.d, z1.d
; CHECK-NEXT: fcmle p1.d, p0/z, z2.d, #0.0
; CHECK-NEXT: not p0.b, p0/z, p1.b
-; CHECK-NEXT: sel z0.d, p0, z0.d, z1.d
+; CHECK-NEXT: fsubr z1.d, p0/m, z1.d, z0.d
+; CHECK-NEXT: mov z0.d, z1.d
; CHECK-NEXT: ret
entry:
%c = fcmp ugt <vscale x 2 x double> %n, zeroinitializer
diff --git a/llvm/test/CodeGen/AArch64/sve-stack-frame-layout.ll b/llvm/test/CodeGen/AArch64/sve-stack-frame-layout.ll
index 431c9dc76508..ec94198a08ca 100644
--- a/llvm/test/CodeGen/AArch64/sve-stack-frame-layout.ll
+++ b/llvm/test/CodeGen/AArch64/sve-stack-frame-layout.ll
@@ -150,8 +150,8 @@ entry:
; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-32], Type: Spill, Align: 8, Size: 8
; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-32-16 x vscale], Type: Variable, Align: 16, Size: vscale x 16
; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-40-16 x vscale], Type: Variable, Align: 8, Size: 8
-; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-32], Type: VariableSized, Align: 1, Size: 0
-; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-32], Type: VariableSized, Align: 1, Size: 0
+; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-48-16 x vscale], Type: VariableSized, Align: 1, Size: 0
+; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-48-16 x vscale], Type: VariableSized, Align: 1, Size: 0
define i32 @csr_d8_allocnxv4i32i32f64_vla(double %d, i32 %i) "aarch64_pstate_sm_compatible" {
; CHECK-LABEL: csr_d8_allocnxv4i32i32f64_vla:
diff --git a/llvm/test/CodeGen/AArch64/sve2-min-max-clamp.ll b/llvm/test/CodeGen/AArch64/sve2-min-max-clamp.ll
index 365fd5345484..d5fda04b9773 100644
--- a/llvm/test/CodeGen/AArch64/sve2-min-max-clamp.ll
+++ b/llvm/test/CodeGen/AArch64/sve2-min-max-clamp.ll
@@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
-; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2p1 -mattr=+b16b16 < %s | FileCheck %s
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2p1 -mattr=+sve-b16b16 < %s | FileCheck %s
; Replace pattern min(max(v1,v2),v3) by clamp
diff --git a/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-bfadd.ll b/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-bfadd.ll
index 221bb3b6045f..7b921d71cbfb 100644
--- a/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-bfadd.ll
+++ b/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-bfadd.ll
@@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2p1 -mattr=+b16b16 -mattr=+use-experimental-zeroing-pseudos -verify-machineinstrs < %s \
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2p1 -mattr=+sve-b16b16 -mattr=+use-experimental-zeroing-pseudos -verify-machineinstrs < %s \
; RUN: | FileCheck %s
define <vscale x 8 x bfloat> @bfadd_pred(<vscale x 8 x i1> %pg, <vscale x 8 x bfloat> %a, <vscale x 8 x bfloat> %b){
diff --git a/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-bfclamp.ll b/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-bfclamp.ll
index 7934f831a7e6..baadd08e392d 100644
--- a/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-bfclamp.ll
+++ b/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-bfclamp.ll
@@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2p1 -mattr=+sme2 -mattr=+b16b16 -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2p1 -mattr=+sme2 -mattr=+sve-b16b16 -verify-machineinstrs < %s | FileCheck %s
define <vscale x 8 x bfloat> @bfclamp(<vscale x 8 x bfloat> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c){
; CHECK-LABEL: bfclamp:
diff --git a/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-bfmax.ll b/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-bfmax.ll
index 24c4fedb3426..55ef452b6030 100644
--- a/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-bfmax.ll
+++ b/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-bfmax.ll
@@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2p1 -mattr=+b16b16 -mattr=+use-experimental-zeroing-pseudos -verify-machineinstrs < %s \
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2p1 -mattr=+sve-b16b16 -mattr=+use-experimental-zeroing-pseudos -verify-machineinstrs < %s \
; RUN: | FileCheck %s
define <vscale x 8 x bfloat> @bfmax_pred(<vscale x 8 x i1> %pg, <vscale x 8 x bfloat> %a, <vscale x 8 x bfloat> %b){
diff --git a/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-bfmaxnm.ll b/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-bfmaxnm.ll
index 25fe9cf7243a..9b0f7e039f2e 100644
--- a/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-bfmaxnm.ll
+++ b/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-bfmaxnm.ll
@@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2p1 -mattr=+b16b16 -mattr=+use-experimental-zeroing-pseudos -verify-machineinstrs < %s \
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2p1 -mattr=+sve-b16b16 -mattr=+use-experimental-zeroing-pseudos -verify-machineinstrs < %s \
; RUN: | FileCheck %s
define <vscale x 8 x bfloat> @bfmaxnm_pred(<vscale x 8 x i1> %pg, <vscale x 8 x bfloat> %a, <vscale x 8 x bfloat> %b){
diff --git a/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-bfmin.ll b/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-bfmin.ll
index d5b0b8be8b85..8c586fd47f5a 100644
--- a/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-bfmin.ll
+++ b/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-bfmin.ll
@@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2p1 -mattr=+b16b16 -mattr=+use-experimental-zeroing-pseudos -verify-machineinstrs < %s \
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2p1 -mattr=+sve-b16b16 -mattr=+use-experimental-zeroing-pseudos -verify-machineinstrs < %s \
; RUN: | FileCheck %s
define <vscale x 8 x bfloat> @bfmin_pred(<vscale x 8 x i1> %pg, <vscale x 8 x bfloat> %a, <vscale x 8 x bfloat> %b){
diff --git a/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-bfminnm.ll b/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-bfminnm.ll
index c019dc7cbe29..90132224e022 100644
--- a/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-bfminnm.ll
+++ b/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-bfminnm.ll
@@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2p1 -mattr=+b16b16 -mattr=+use-experimental-zeroing-pseudos -verify-machineinstrs < %s \
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2p1 -mattr=+sve-b16b16 -mattr=+use-experimental-zeroing-pseudos -verify-machineinstrs < %s \
; RUN: | FileCheck %s
define <vscale x 8 x bfloat> @bfminnm_pred(<vscale x 8 x i1> %pg, <vscale x 8 x bfloat> %a, <vscale x 8 x bfloat> %b){
diff --git a/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-bfmla.ll b/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-bfmla.ll
index 02b1db13ea34..eb7e99f332da 100644
--- a/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-bfmla.ll
+++ b/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-bfmla.ll
@@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2p1 -mattr=+b16b16 -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2p1 -mattr=+sve-b16b16 -verify-machineinstrs < %s | FileCheck %s
define <vscale x 8 x bfloat> @bfmla_m(<vscale x 8 x i1> %pg, <vscale x 8 x bfloat> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c){
; CHECK-LABEL: bfmla_m:
diff --git a/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-bfmla_lane.ll b/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-bfmla_lane.ll
index d0e3a82df3ff..ece96b38d786 100644
--- a/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-bfmla_lane.ll
+++ b/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-bfmla_lane.ll
@@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2p1 -mattr=+b16b16 -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2p1 -mattr=+sve-b16b16 -verify-machineinstrs < %s | FileCheck %s
define <vscale x 8 x bfloat> @bfmla_lane_idx1(<vscale x 8 x bfloat> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c){
; CHECK-LABEL: bfmla_lane_idx1:
diff --git a/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-bfmls.ll b/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-bfmls.ll
index 987fe1fb5822..8ff1afcc9b4a 100644
--- a/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-bfmls.ll
+++ b/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-bfmls.ll
@@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2p1 -mattr=+b16b16 -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2p1 -mattr=+sve-b16b16 -verify-machineinstrs < %s | FileCheck %s
define <vscale x 8 x bfloat> @bfmls_m(<vscale x 8 x i1> %pg, <vscale x 8 x bfloat> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c){
; CHECK-LABEL: bfmls_m:
diff --git a/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-bfmls_lane.ll b/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-bfmls_lane.ll
index 16b4538ffab9..81406bf08b93 100644
--- a/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-bfmls_lane.ll
+++ b/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-bfmls_lane.ll
@@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2p1 -mattr=+b16b16 -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2p1 -mattr=+sve-b16b16 -verify-machineinstrs < %s | FileCheck %s
define <vscale x 8 x bfloat> @bfmls_lane_idx1(<vscale x 8 x bfloat> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c){
; CHECK-LABEL: bfmls_lane_idx1:
diff --git a/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-bfmul.ll b/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-bfmul.ll
index a04c5a52139c..8b6a087578ed 100644
--- a/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-bfmul.ll
+++ b/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-bfmul.ll
@@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2p1 -mattr=+b16b16 -mattr=+use-experimental-zeroing-pseudos -verify-machineinstrs < %s \
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2p1 -mattr=+sve-b16b16 -mattr=+use-experimental-zeroing-pseudos -verify-machineinstrs < %s \
; RUN: | FileCheck %s
define <vscale x 8 x bfloat> @bfmul_pred(<vscale x 8 x i1> %pg, <vscale x 8 x bfloat> %a, <vscale x 8 x bfloat> %b){
diff --git a/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-bfmul_lane.ll b/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-bfmul_lane.ll
index 2962d59e707c..28ae9b0d19e1 100644
--- a/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-bfmul_lane.ll
+++ b/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-bfmul_lane.ll
@@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2p1 -mattr=+b16b16 -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2p1 -mattr=+sve-b16b16 -verify-machineinstrs < %s | FileCheck %s
define <vscale x 8 x bfloat> @bfmul_lane_idx1(<vscale x 8 x bfloat> %a, <vscale x 8 x bfloat> %b) {
; CHECK-LABEL: bfmul_lane_idx1:
diff --git a/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-bfsub.ll b/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-bfsub.ll
index 752b5ae9df63..1b1304312ceb 100644
--- a/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-bfsub.ll
+++ b/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-bfsub.ll
@@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2p1 -mattr=+b16b16 -mattr=+use-experimental-zeroing-pseudos -verify-machineinstrs < %s \
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2p1 -mattr=+sve-b16b16 -mattr=+use-experimental-zeroing-pseudos -verify-machineinstrs < %s \
; RUN: | FileCheck %s
define <vscale x 8 x bfloat> @bfsub_pred(<vscale x 8 x i1> %pg, <vscale x 8 x bfloat> %a, <vscale x 8 x bfloat> %b){
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-add-ext-fma.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-add-ext-fma.ll
index b9846a6a555d..b2b433167fe4 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-add-ext-fma.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-add-ext-fma.ll
@@ -441,6 +441,43 @@ define amdgpu_vs <4 x float> @test_v4f16_v4f32_add_ext_fma_mul_rhs(<4 x float> %
ret <4 x float> %d
}
+define amdgpu_ps float @test_matching_source_from_unmerge(ptr addrspace(3) %aptr, float %b) {
+; GFX9-DENORM-LABEL: test_matching_source_from_unmerge:
+; GFX9-DENORM: ; %bb.0: ; %.entry
+; GFX9-DENORM-NEXT: ds_read_b64 v[2:3], v0
+; GFX9-DENORM-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9-DENORM-NEXT: v_mad_mix_f32 v0, v2, v3, v1 op_sel:[1,1,0] op_sel_hi:[1,1,0]
+; GFX9-DENORM-NEXT: ; return to shader part epilog
+;
+; GFX10-LABEL: test_matching_source_from_unmerge:
+; GFX10: ; %bb.0: ; %.entry
+; GFX10-NEXT: ds_read_b64 v[2:3], v0
+; GFX10-NEXT: s_waitcnt lgkmcnt(0)
+; GFX10-NEXT: v_fma_mix_f32 v0, v2, v3, v1 op_sel:[1,1,0] op_sel_hi:[1,1,0]
+; GFX10-NEXT: ; return to shader part epilog
+;
+; GFX10-CONTRACT-LABEL: test_matching_source_from_unmerge:
+; GFX10-CONTRACT: ; %bb.0: ; %.entry
+; GFX10-CONTRACT-NEXT: ds_read_b64 v[2:3], v0
+; GFX10-CONTRACT-NEXT: s_waitcnt lgkmcnt(0)
+; GFX10-CONTRACT-NEXT: v_fma_mix_f32 v0, v2, v3, v1 op_sel:[1,1,0] op_sel_hi:[1,1,0]
+; GFX10-CONTRACT-NEXT: ; return to shader part epilog
+;
+; GFX10-DENORM-LABEL: test_matching_source_from_unmerge:
+; GFX10-DENORM: ; %bb.0: ; %.entry
+; GFX10-DENORM-NEXT: ds_read_b64 v[2:3], v0
+; GFX10-DENORM-NEXT: s_waitcnt lgkmcnt(0)
+; GFX10-DENORM-NEXT: v_fma_mix_f32 v0, v2, v3, v1 op_sel:[1,1,0] op_sel_hi:[1,1,0]
+; GFX10-DENORM-NEXT: ; return to shader part epilog
+.entry:
+ %a = load <4 x half>, ptr addrspace(3) %aptr, align 16
+ %a_f32 = fpext <4 x half> %a to <4 x float>
+ %.a3_f32 = extractelement <4 x float> %a_f32, i64 3
+ %.a1_f32 = extractelement <4 x float> %a_f32, i64 1
+ %res = call float @llvm.fmuladd.f32(float %.a1_f32, float %.a3_f32, float %b)
+ ret float %res
+}
+
declare float @llvm.fmuladd.f32(float, float, float) #0
declare half @llvm.fmuladd.f16(half, half, half) #0
declare <4 x float> @llvm.fmuladd.v4f32(<4 x float>, <4 x float>, <4 x float>) #0
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-scratch-init.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-scratch-init.ll
index 9b9249b62b0b..66b88236bbb4 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-scratch-init.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-scratch-init.ll
@@ -7,10 +7,11 @@ target triple = "amdgcn-amd-amdhsa"
; Make sure flat_scratch_init is set
; GCN-LABEL: {{^}}stack_object_addrspacecast_in_kernel_no_calls:
-; RW-FLAT: s_add_u32 flat_scratch_lo, s4, s7
-; RW-FLAT: s_addc_u32 flat_scratch_hi, s5, 0
+; RW-FLAT: s_add_u32 s0, s0, s7
+; RW-FLAT: s_addc_u32 s1, s1, 0
; RO-FLAT-NOT: flat_scratch
-; GCN: flat_store_dword
+; RW-FLAT: buffer_store_dword
+; RO-FLAT: scratch_store_dword
; RO-FLAT-NOT: .amdhsa_user_sgpr_private_segment_buffer
; RW-FLAT: .amdhsa_user_sgpr_flat_scratch_init 1
; RO-FLAT-NOT: .amdhsa_user_sgpr_flat_scratch_init
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-copy.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-copy.mir
index 48de4838b78f..30c374ddee57 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-copy.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-copy.mir
@@ -1,6 +1,8 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -mtriple=amdgcn -mcpu=gfx908 -run-pass=amdgpu-regbankselect -regbankselect-fast -verify-machineinstrs %s -o - | FileCheck %s
# RUN: llc -mtriple=amdgcn -mcpu=gfx908 -run-pass=amdgpu-regbankselect -regbankselect-greedy -verify-machineinstrs %s -o - | FileCheck %s
+# RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -run-pass=amdgpu-regbankselect -regbankselect-fast -verify-machineinstrs %s -o - | FileCheck --check-prefix=WAVE32 %s
+# RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -run-pass=amdgpu-regbankselect -regbankselect-greedy -verify-machineinstrs %s -o - | FileCheck --check-prefix=WAVE32 %s
---
name: copy_s32_vgpr_to_vgpr
@@ -201,3 +203,299 @@ body: |
%2:vcc(s1) = COPY %1
S_ENDPGM 0, implicit %2
...
+
+---
+name: wave64_copy_sgpr_64_to_s1
+legalized: true
+
+body: |
+ bb.0:
+ liveins: $sgpr4_sgpr5
+ ; CHECK-LABEL: name: wave64_copy_sgpr_64_to_s1
+ ; CHECK: liveins: $sgpr4_sgpr5
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vcc(s1) = COPY $sgpr4_sgpr5
+ ; CHECK-NEXT: [[CONST1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 1
+ ; CHECK-NEXT: [[CONST2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0
+ ; CHECK-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY]](s1), [[CONST1]], [[CONST2]]
+ %0:_(s1) = COPY $sgpr4_sgpr5
+ %1:_(s32) = G_ZEXT %0:_(s1)
+...
+
+---
+name: wave32_copy_sgpr_32_to_s1
+legalized: true
+
+body: |
+ bb.0:
+ liveins: $sgpr0
+ ; WAVE32-LABEL: name: wave32_copy_sgpr_32_to_s1
+ ; WAVE32: liveins: $sgpr0
+ ; WAVE32-NEXT: {{ $}}
+ ; WAVE32-NEXT: [[COPY:%[0-9]+]]:vcc(s1) = COPY $sgpr0
+ ; WAVE32-NEXT: [[CONST1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 1
+ ; WAVE32-NEXT: [[CONST2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0
+ ; WAVE32-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY]](s1), [[CONST1]], [[CONST2]]
+ %0:_(s1) = COPY $sgpr0
+ %1:_(s32) = G_ZEXT %0:_(s1)
+...
+
+---
+name: wave64_copy2_sgpr_64_to_s1
+legalized: true
+
+body: |
+ bb.0:
+ liveins: $sgpr4_sgpr5, $sgpr6_sgpr7
+ ; CHECK-LABEL: name: wave64_copy2_sgpr_64_to_s1
+ ; CHECK: liveins: $sgpr4_sgpr5, $sgpr6_sgpr7
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vcc(s1) = COPY $sgpr4_sgpr5
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vcc(s1) = COPY $sgpr6_sgpr7
+ ; CHECK-NEXT: [[CONST1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 1
+ ; CHECK-NEXT: [[CONST2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0
+ ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY1]](s1), [[CONST1]], [[CONST2]]
+ ; CHECK-NEXT: [[CONST3:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 1
+ ; CHECK-NEXT: [[CONST4:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0
+ ; CHECK-NEXT: [[SELECT2:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY2]](s1), [[CONST3]], [[CONST4]]
+ %0:_(s1) = COPY $sgpr4_sgpr5
+ %1:_(s1) = COPY $sgpr6_sgpr7
+ %2:_(s32) = G_ZEXT %0:_(s1)
+ %3:_(s32) = G_ZEXT %1:_(s1)
+...
+
+---
+name: wave32_copy2_sgpr_32_to_s1
+legalized: true
+
+body: |
+ bb.0:
+ liveins: $sgpr0, $sgpr1
+ ; WAVE32-LABEL: name: wave32_copy2_sgpr_32_to_s1
+ ; WAVE32: liveins: $sgpr0, $sgpr1
+ ; WAVE32-NEXT: {{ $}}
+ ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:vcc(s1) = COPY $sgpr0
+ ; WAVE32-NEXT: [[COPY2:%[0-9]+]]:vcc(s1) = COPY $sgpr1
+ ; WAVE32-NEXT: [[CONST1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 1
+ ; WAVE32-NEXT: [[CONST2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0
+ ; WAVE32-NEXT: [[SELECT1:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY1]](s1), [[CONST1]], [[CONST2]]
+ ; WAVE32-NEXT: [[CONST3:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 1
+ ; WAVE32-NEXT: [[CONST4:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0
+ ; WAVE32-NEXT: [[SELECT2:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY2]](s1), [[CONST3]], [[CONST4]]
+ %0:_(s1) = COPY $sgpr0
+ %1:_(s1) = COPY $sgpr1
+ %2:_(s32) = G_ZEXT %0:_(s1)
+ %3:_(s32) = G_ZEXT %1:_(s1)
+...
+
+---
+name: copy_sgpr_64_to_s1_vgpr
+legalized: true
+
+body: |
+ bb.0:
+ liveins: $sgpr4_sgpr5
+ ; CHECK-LABEL: name: copy_sgpr_64_to_s1_vgpr
+ ; CHECK: liveins: $sgpr4_sgpr5
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s1) = COPY $sgpr4_sgpr5
+ ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:vgpr(s32) = G_ZEXT [[COPY]](s1)
+ ;
+ ; WAVE32-LABEL: name: copy_sgpr_64_to_s1_vgpr
+ ; WAVE32: liveins: $sgpr4_sgpr5
+ ; WAVE32-NEXT: {{ $}}
+ ; WAVE32-NEXT: [[COPY:%[0-9]+]]:vgpr(s1) = COPY $sgpr4_sgpr5
+ ; WAVE32-NEXT: [[ZEXT:%[0-9]+]]:vgpr(s32) = G_ZEXT [[COPY]](s1)
+ %0:vgpr(s1) = COPY $sgpr4_sgpr5
+ %1:_(s32) = G_ZEXT %0:vgpr(s1)
+...
+
+---
+name: copy_sgpr_32_to_s1_vgpr
+legalized: true
+
+body: |
+ bb.0:
+ liveins: $sgpr0
+ ; CHECK-LABEL: name: copy_sgpr_32_to_s1_vgpr
+ ; CHECK: liveins: $sgpr0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s1) = COPY $sgpr0
+ ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:vgpr(s32) = G_ZEXT [[COPY]](s1)
+ ;
+ ; WAVE32-LABEL: name: copy_sgpr_32_to_s1_vgpr
+ ; WAVE32: liveins: $sgpr0
+ ; WAVE32-NEXT: {{ $}}
+ ; WAVE32-NEXT: [[COPY:%[0-9]+]]:vgpr(s1) = COPY $sgpr0
+ ; WAVE32-NEXT: [[ZEXT:%[0-9]+]]:vgpr(s32) = G_ZEXT [[COPY]](s1)
+ %0:vgpr(s1) = COPY $sgpr0
+ %1:_(s32) = G_ZEXT %0:vgpr(s1)
+...
+
+---
+name: wave64_copy_sgpr_64_to_s1_vcc
+legalized: true
+
+body: |
+ bb.0:
+ liveins: $sgpr4_sgpr5
+ ; CHECK-LABEL: name: wave64_copy_sgpr_64_to_s1_vcc
+ ; CHECK: liveins: $sgpr4_sgpr5
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vcc(s1) = COPY $sgpr4_sgpr5
+ ; CHECK-NEXT: [[CONST1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 1
+ ; CHECK-NEXT: [[CONST2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0
+ ; CHECK-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY]](s1), [[CONST1]], [[CONST2]]
+ %0:vcc(s1) = COPY $sgpr4_sgpr5
+ %1:_(s32) = G_ZEXT %0:vcc(s1)
+...
+
+---
+name: wave32_copy_sgpr_32_to_s1_vcc
+legalized: true
+
+body: |
+ bb.0:
+ liveins: $sgpr0
+ ; WAVE32-LABEL: name: wave32_copy_sgpr_32_to_s1_vcc
+ ; WAVE32: liveins: $sgpr0
+ ; WAVE32-NEXT: {{ $}}
+ ; WAVE32-NEXT: [[COPY:%[0-9]+]]:vcc(s1) = COPY $sgpr0
+ ; WAVE32-NEXT: [[CONST1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 1
+ ; WAVE32-NEXT: [[CONST2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0
+ ; WAVE32-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY]](s1), [[CONST1]], [[CONST2]]
+ %0:vcc(s1) = COPY $sgpr0
+ %1:_(s32) = G_ZEXT %0:vcc(s1)
+...
+
+---
+name: copy_virt_reg_to_s1
+legalized: true
+
+body: |
+ bb.0:
+ liveins: $vgpr0
+ ; CHECK-LABEL: name: copy_virt_reg_to_s1
+ ; CHECK: liveins: $vgpr0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+ ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s1) = G_TRUNC [[COPY]](s32)
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s1) = COPY [[TRUNC]](s1)
+ ;
+ ; WAVE32-LABEL: name: copy_virt_reg_to_s1
+ ; WAVE32: liveins: $vgpr0
+ ; WAVE32-NEXT: {{ $}}
+ ; WAVE32-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+ ; WAVE32-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s1) = G_TRUNC [[COPY]](s32)
+ ; WAVE32-NEXT: [[COPY2:%[0-9]+]]:vgpr(s1) = COPY [[TRUNC]](s1)
+ %0:_(s32) = COPY $vgpr0
+ %1:_(s1) = G_TRUNC %0
+ %2:_(s1) = COPY %1
+...
+
+---
+name: copy_virt_reg_to_s1_vgpr
+legalized: true
+
+body: |
+ bb.0:
+ liveins: $vgpr0
+ ; CHECK-LABEL: name: copy_virt_reg_to_s1_vgpr
+ ; CHECK: liveins: $vgpr0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+ ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s1) = G_TRUNC [[COPY]](s32)
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s1) = COPY [[TRUNC]](s1)
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s1) = COPY [[COPY2]](s1)
+ ;
+ ; WAVE32-LABEL: name: copy_virt_reg_to_s1_vgpr
+ ; WAVE32: liveins: $vgpr0
+ ; WAVE32-NEXT: {{ $}}
+ ; WAVE32-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+ ; WAVE32-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s1) = G_TRUNC [[COPY]](s32)
+ ; WAVE32-NEXT: [[COPY2:%[0-9]+]]:vgpr(s1) = COPY [[TRUNC]](s1)
+ ; WAVE32-NEXT: [[COPY3:%[0-9]+]]:vgpr(s1) = COPY [[COPY2]](s1)
+ %0:_(s32) = COPY $vgpr0
+ %1:_(s1) = G_TRUNC %0
+ %2:vgpr(s1) = COPY %1
+ %3:_(s1) = COPY %2
+...
+
+
+---
+name: copy_virt_reg_to_s1_vcc
+legalized: true
+
+body: |
+ bb.0:
+ liveins: $vgpr0
+ ; CHECK-LABEL: name: copy_virt_reg_to_s1_vcc
+ ; CHECK: liveins: $vgpr0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+ ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s1) = G_TRUNC [[COPY]](s32)
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1)
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vcc(s1) = COPY [[COPY2]](s1)
+ ;
+ ; WAVE32-LABEL: name: copy_virt_reg_to_s1_vcc
+ ; WAVE32: liveins: $vgpr0
+ ; WAVE32-NEXT: {{ $}}
+ ; WAVE32-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+ ; WAVE32-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s1) = G_TRUNC [[COPY]](s32)
+ ; WAVE32-NEXT: [[COPY2:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1)
+ ; WAVE32-NEXT: [[COPY3:%[0-9]+]]:vcc(s1) = COPY [[COPY2]](s1)
+ %0:_(s32) = COPY $vgpr0
+ %1:_(s1) = G_TRUNC %0
+ %2:vcc(s1) = COPY %1
+ %3:_(s1) = COPY %2
+...
+
+---
+name: copy_s1_to_sgpr_64
+legalized: true
+
+body: |
+ bb.0:
+ liveins: $vgpr0
+ ; CHECK-LABEL: name: copy_s1_to_sgpr_64
+ ; CHECK: liveins: $vgpr0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+ ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s1) = G_TRUNC [[COPY]](s32)
+ ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[TRUNC]](s1)
+ ;
+ ; WAVE32-LABEL: name: copy_s1_to_sgpr_64
+ ; WAVE32: liveins: $vgpr0
+ ; WAVE32-NEXT: {{ $}}
+ ; WAVE32-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+ ; WAVE32-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s1) = G_TRUNC [[COPY]](s32)
+ ; WAVE32-NEXT: $sgpr4_sgpr5 = COPY [[TRUNC]](s1)
+ %0:_(s32) = COPY $vgpr0
+ %1:_(s1) = G_TRUNC %0
+ $sgpr4_sgpr5 = COPY %1
+...
+
+---
+name: copy_s1_to_sgpr_32
+legalized: true
+
+body: |
+ bb.0:
+ liveins: $vgpr0
+ ; CHECK-LABEL: name: copy_s1_to_sgpr_32
+ ; CHECK: liveins: $vgpr0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+ ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s1) = G_TRUNC [[COPY]](s32)
+ ; CHECK-NEXT: $sgpr0 = COPY [[TRUNC]](s1)
+ ;
+ ; WAVE32-LABEL: name: copy_s1_to_sgpr_32
+ ; WAVE32: liveins: $vgpr0
+ ; WAVE32-NEXT: {{ $}}
+ ; WAVE32-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+ ; WAVE32-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s1) = G_TRUNC [[COPY]](s32)
+ ; WAVE32-NEXT: $sgpr0 = COPY [[TRUNC]](s1)
+ %0:_(s32) = COPY $vgpr0
+ %1:_(s1) = G_TRUNC %0
+ $sgpr0 = COPY %1
+...
diff --git a/llvm/test/CodeGen/AMDGPU/addrspacecast.ll b/llvm/test/CodeGen/AMDGPU/addrspacecast.ll
index 1315d576a83e..4b1484e9bd95 100644
--- a/llvm/test/CodeGen/AMDGPU/addrspacecast.ll
+++ b/llvm/test/CodeGen/AMDGPU/addrspacecast.ll
@@ -5,22 +5,11 @@ target triple = "amdgcn-amd-amdhsa"
; HSA-LABEL: {{^}}use_group_to_flat_addrspacecast:
-; CI-DAG: s_load_dword [[PTR:s[0-9]+]], s[6:7], 0x0{{$}}
-; CI-DAG: s_load_dword [[APERTURE:s[0-9]+]], s[4:5], 0x10{{$}}
-; CI-DAG: s_cmp_lg_u32 [[PTR]], -1
-; CI-DAG: s_cselect_b32 s[[HI:[0-9]+]], [[APERTURE]], 0
-; CI-DAG: s_cselect_b32 s[[LO:[0-9]+]], [[PTR]], 0
-
-; GFX9-DAG: s_mov_b64 s[{{[0-9]+}}:[[HIBASE:[0-9]+]]], src_shared_base
-
+; CI-DAG: s_load_dword [[APERTURE:s[0-9]+]], s[6:7], 0x0{{$}}
+; GFX9-DAG: s_load_dword [[APERTURE:s[0-9]+]], s[4:5], 0x0{{$}}
+; HSA-DAG: v_mov_b32_e32 [[PTR:v[0-9]+]], [[APERTURE]]
; HSA-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 7
-; GFX9-DAG: s_load_dword [[PTR:s[0-9]+]], s[4:5], 0x0{{$}}
-
-; GFX9: s_cmp_lg_u32 [[PTR]], -1
-; GFX9-DAG: s_cselect_b32 s[[LO:[0-9]+]], s[[HIBASE]], 0
-; GFX9-DAG: s_cselect_b32 s[[HI:[0-9]+]], [[PTR]], 0
-
-; HSA: flat_store_dword v[[[LO]]:[[HI]]], [[K]]
+; HSA-DAG: ds_write_b32 [[PTR]], [[K]]
; HSA: .amdhsa_user_sgpr_private_segment_buffer 1
; HSA: .amdhsa_user_sgpr_dispatch_ptr 0
@@ -39,22 +28,8 @@ define amdgpu_kernel void @use_group_to_flat_addrspacecast(ptr addrspace(3) %ptr
; Test handling inside a non-kernel
; HSA-LABEL: {{^}}use_group_to_flat_addrspacecast_func:
-; CI-DAG: s_load_dword [[APERTURE:s[0-9]+]], s[6:7], 0x10{{$}}
-; CI-DAG: v_mov_b32_e32 [[VAPERTURE:v[0-9]+]], [[APERTURE]]
-; CI-DAG: v_cmp_ne_u32_e32 vcc, -1, v0
-; CI-DAG: v_cndmask_b32_e32 v[[HI:[0-9]+]], 0, [[VAPERTURE]], vcc
-; CI-DAG: v_cndmask_b32_e32 v[[LO:[0-9]+]], 0, v0
-
-; GFX9-DAG: s_mov_b64 s[{{[0-9]+}}:[[HIBASE:[0-9]+]]], src_shared_base
-
; HSA-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 7
-
-; GFX9-DAG: v_mov_b32_e32 v[[VREG_HIBASE:[0-9]+]], s[[HIBASE]]
-; GFX9-DAG: v_cmp_ne_u32_e32 vcc, -1, v0
-; GFX9-DAG: v_cndmask_b32_e32 v[[LO:[0-9]+]], 0, v0, vcc
-; GFX9-DAG: v_cndmask_b32_e32 v[[HI:[0-9]+]], 0, v[[VREG_HIBASE]], vcc
-
-; HSA: flat_store_dword v[[[LO]]:[[HI]]], [[K]]
+; HSA-DAG: ds_write_b32 v0, [[K]]
define void @use_group_to_flat_addrspacecast_func(ptr addrspace(3) %ptr) #0 {
%stof = addrspacecast ptr addrspace(3) %ptr to ptr
store volatile i32 7, ptr %stof
@@ -63,23 +38,16 @@ define void @use_group_to_flat_addrspacecast_func(ptr addrspace(3) %ptr) #0 {
; HSA-LABEL: {{^}}use_private_to_flat_addrspacecast:
-; CI-DAG: s_load_dword [[PTR:s[0-9]+]], s[6:7], 0x0{{$}}
-; CI-DAG: s_load_dword [[APERTURE:s[0-9]+]], s[4:5], 0x11{{$}}
-
-; CI-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 7
-; CI-DAG: s_cmp_lg_u32 [[PTR]], -1
-; CI-DAG: s_cselect_b32 s[[HI:[0-9]+]], [[APERTURE]], 0
-; CI-DAG: s_cselect_b32 s[[LO:[0-9]+]], [[PTR]], 0
-
-; GFX9-DAG: s_load_dword [[PTR:s[0-9]+]], s[4:5], 0x0{{$}}
-; GFX9-DAG: s_mov_b64 s[{{[0-9]+}}:[[HIBASE:[0-9]+]]], src_private_base
-
-; GFX9-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 7
-; GFX9: s_cmp_lg_u32 [[PTR]], -1
-; GFX9: s_cselect_b32 s[[LO:[0-9]+]], s[[HIBASE]], 0
-; GFX9: s_cselect_b32 s[[HI:[0-9]+]], [[PTR]], 0
-
-; HSA: flat_store_dword v[[[LO]]:[[HI]]], [[K]]
+; CI-DAG: s_load_dword [[APERTURE:s[0-9]+]], s[6:7], 0x0{{$}}
+; GFX9-DAG: s_load_dword [[APERTURE:s[0-9]+]], s[4:5], 0x0{{$}}
+; HSA-DAG: v_mov_b32_e32 [[PTR:v[0-9]+]], [[APERTURE]]
+; HSA-DAG: s_mov_b64 s[{{[0-9]+}}:[[RSRCHI:[0-9]+]]], s[2:3]
+; HSA-DAG: s_mov_b64 s[[[BASELO:[0-9]+]]:[[BASEHI:[0-9]+]]], s[0:1]
+; SI-DAG: s_add_u32 s[[BASELO]], s[[BASELO]], s9
+; GFX9-DAG: s_add_u32 s[[BASELO]], s[[BASELO]], s7
+; HSA-DAG: s_addc_u32 s[[BASEHI]], s[[BASEHI]], 0
+; HSA-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 7
+; HSA: buffer_store_dword [[K]], [[PTR]], s[[[BASELO]]:[[RSRCHI]]], 0 offen
; HSA: .amdhsa_user_sgpr_private_segment_buffer 1
; HSA: .amdhsa_user_sgpr_dispatch_ptr 0
@@ -97,10 +65,12 @@ define amdgpu_kernel void @use_private_to_flat_addrspacecast(ptr addrspace(5) %p
; HSA-LABEL: {{^}}use_global_to_flat_addrspacecast:
; HSA: s_load_dwordx2 s[[[PTRLO:[0-9]+]]:[[PTRHI:[0-9]+]]]
-; HSA-DAG: v_mov_b32_e32 v[[VPTRLO:[0-9]+]], s[[PTRLO]]
-; HSA-DAG: v_mov_b32_e32 v[[VPTRHI:[0-9]+]], s[[PTRHI]]
+; CI-DAG: v_mov_b32_e32 v[[VPTRLO:[0-9]+]], s[[PTRLO]]
+; CI-DAG: v_mov_b32_e32 v[[VPTRHI:[0-9]+]], s[[PTRHI]]
; HSA-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 7
-; HSA: flat_store_dword v[[[VPTRLO]]:[[VPTRHI]]], [[K]]
+; CI: flat_store_dword v[[[VPTRLO]]:[[VPTRHI]]], [[K]]
+; GFX9-DAG: v_mov_b32_e32 [[ADDR:v[0-9]+]], 0
+; GFX9: global_store_dword [[ADDR]], [[K]], s[[[PTRLO]]:[[PTRHI]]]
; HSA: .amdhsa_user_sgpr_queue_ptr 0
define amdgpu_kernel void @use_global_to_flat_addrspacecast(ptr addrspace(1) %ptr) #0 {
@@ -112,9 +82,7 @@ define amdgpu_kernel void @use_global_to_flat_addrspacecast(ptr addrspace(1) %pt
; no-op
; HSA-LABEL: {{^}}use_constant_to_flat_addrspacecast:
; HSA: s_load_dwordx2 s[[[PTRLO:[0-9]+]]:[[PTRHI:[0-9]+]]]
-; HSA-DAG: v_mov_b32_e32 v[[VPTRLO:[0-9]+]], s[[PTRLO]]
-; HSA-DAG: v_mov_b32_e32 v[[VPTRHI:[0-9]+]], s[[PTRHI]]
-; HSA: flat_load_dword v{{[0-9]+}}, v[[[VPTRLO]]:[[VPTRHI]]]
+; HSA-DAG: s_load_dword s0, s[[[PTRLO]]:[[PTRHI]]], 0x0
define amdgpu_kernel void @use_constant_to_flat_addrspacecast(ptr addrspace(4) %ptr) #0 {
%stof = addrspacecast ptr addrspace(4) %ptr to ptr
%ld = load volatile i32, ptr %stof
@@ -215,14 +183,9 @@ define amdgpu_kernel void @use_flat_to_constant_addrspacecast(ptr %ptr) #0 {
}
; HSA-LABEL: {{^}}cast_0_group_to_flat_addrspacecast:
-; CI: s_load_dword [[APERTURE:s[0-9]+]], s[4:5], 0x10
-; CI-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], [[APERTURE]]
-
-; GFX9-DAG: s_mov_b64 s[{{[0-9]+}}:[[HI:[0-9]+]]], src_shared_base
-
; HSA-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], 0{{$}}
; HSA-DAG: v_mov_b32_e32 v[[K:[0-9]+]], 7{{$}}
-; HSA: {{flat|global}}_store_dword v[[[LO]]:[[HI]]], v[[K]]
+; HSA: ds_write_b32 v[[LO]], v[[K]]
define amdgpu_kernel void @cast_0_group_to_flat_addrspacecast() #0 {
%cast = addrspacecast ptr addrspace(3) null to ptr
store volatile i32 7, ptr %cast
@@ -240,10 +203,9 @@ define amdgpu_kernel void @cast_0_flat_to_group_addrspacecast() #0 {
}
; HSA-LABEL: {{^}}cast_neg1_group_to_flat_addrspacecast:
-; HSA: v_mov_b32_e32 v[[LO:[0-9]+]], 0{{$}}
; HSA-DAG: v_mov_b32_e32 v[[K:[0-9]+]], 7{{$}}
-; HSA-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0{{$}}
-; HSA: {{flat|global}}_store_dword v[[[LO]]:[[HI]]], v[[K]]
+; HSA-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], -1
+; HSA: ds_write_b32 v[[LO]], v[[K]]
define amdgpu_kernel void @cast_neg1_group_to_flat_addrspacecast() #0 {
%cast = addrspacecast ptr addrspace(3) inttoptr (i32 -1 to ptr addrspace(3)) to ptr
store volatile i32 7, ptr %cast
@@ -262,14 +224,13 @@ define amdgpu_kernel void @cast_neg1_flat_to_group_addrspacecast() #0 {
; FIXME: Shouldn't need to enable queue ptr
; HSA-LABEL: {{^}}cast_0_private_to_flat_addrspacecast:
-; CI: s_load_dword [[APERTURE:s[0-9]+]], s[4:5], 0x11
-; CI-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], [[APERTURE]]
-
-; GFX9-DAG: s_mov_b64 s[{{[0-9]+}}:[[HI:[0-9]+]]], src_private_base
-
-; HSA-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], 0{{$}}
+; HSA-DAG: s_mov_b64 s[{{[0-9]+}}:[[RSRCHI:[0-9]+]]], s[2:3]
+; HSA-DAG: s_mov_b64 s[[[BASELO:[0-9]+]]:[[BASEHI:[0-9]+]]], s[0:1]
+; CI-DAG: s_add_u32 s[[BASELO]], s[[BASELO]], s7
+; GFX9-DAG: s_add_u32 s[[BASELO]], s[[BASELO]], s5
+; HSA-DAG: s_addc_u32 s[[BASEHI]], s[[BASEHI]], 0
; HSA-DAG: v_mov_b32_e32 v[[K:[0-9]+]], 7{{$}}
-; HSA: {{flat|global}}_store_dword v[[[LO]]:[[HI]]], v[[K]]
+; HSA: buffer_store_dword v[[K]], off, s[[[BASELO]]:[[RSRCHI]]], 0
define amdgpu_kernel void @cast_0_private_to_flat_addrspacecast() #0 {
%cast = addrspacecast ptr addrspace(5) null to ptr
store volatile i32 7, ptr %cast
@@ -286,13 +247,16 @@ define amdgpu_kernel void @cast_0_flat_to_private_addrspacecast() #0 {
ret void
}
-
; HSA-LABEL: {{^}}cast_neg1_private_to_flat_addrspacecast:
-; HSA: v_mov_b32_e32 v[[LO:[0-9]+]], 0{{$}}
+; HSA-DAG: s_mov_b64 s[{{[0-9]+}}:[[RSRCHI:[0-9]+]]], s[2:3]
+; HSA-DAG: s_mov_b64 s[[[BASELO:[0-9]+]]:[[BASEHI:[0-9]+]]], s[0:1]
+; CI-DAG: s_add_u32 s[[BASELO]], s[[BASELO]], s7
+; GFX9-DAG: s_add_u32 s[[BASELO]], s[[BASELO]], s5
+; HSA-DAG: s_addc_u32 s[[BASEHI]], s[[BASEHI]], 0
+; HSA-DAG: v_mov_b32_e32 [[PTR:v[0-9]+]], -1{{$}}
; HSA-DAG: v_mov_b32_e32 v[[K:[0-9]+]], 7{{$}}
-; HSA-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0{{$}}
-; HSA: {{flat|global}}_store_dword v[[[LO]]:[[HI]]], v[[K]]
+; HSA: buffer_store_dword v[[K]], [[PTR]], s[[[BASELO]]:[[RSRCHI]]], 0 offen
; CI: .amdhsa_user_sgpr_queue_ptr 1
; GFX9: .amdhsa_user_sgpr_queue_ptr 0
@@ -342,16 +306,18 @@ end:
; Check for prologue initializing special SGPRs pointing to scratch.
; HSA-LABEL: {{^}}store_flat_scratch:
-; CI-DAG: s_mov_b32 flat_scratch_lo, s9
; CI-DAG: s_add_i32 [[ADD:s[0-9]+]], s8, s11
; CI-DAG: s_lshr_b32 flat_scratch_hi, [[ADD]], 8
-
-; GFX9: s_add_u32 flat_scratch_lo, s6, s9
-; GFX9: s_addc_u32 flat_scratch_hi, s7, 0
-
-; HSA: {{flat|global}}_store_dword
-; HSA: s_barrier
-; HSA: {{flat|global}}_load_dword
+; HSA: buffer_store_dword
+; HSA: s_barrier
+; HSA: buffer_load_dword [[K:v[0-9]+]], v{{[0-9]+}}, s[0:3], 0 offen glc
+; HSA-DAG: s_load_dwordx2
+; CI-DAG: s_mov_b32 flat_scratch_lo, s9
+; CI-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], s4
+; CI-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], s5
+; GFX9-DAG: v_mov_b32_e32 [[PTR:v[0-9]+]], 0
+; CI: flat_store_dword v[[[LO]]:[[HI]]], [[K]]
+; GFX9: global_store_dword [[PTR]], [[K]]
define amdgpu_kernel void @store_flat_scratch(ptr addrspace(1) noalias %out, i32) #0 {
%alloca = alloca i32, i32 9, align 4, addrspace(5)
%x = call i32 @llvm.amdgcn.workitem.id.x() #2
diff --git a/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa.ll b/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa.ll
index 43cdf85ed381..879bceaef97c 100644
--- a/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa.ll
+++ b/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa.ll
@@ -425,8 +425,7 @@ define amdgpu_kernel void @use_group_to_flat_addrspacecast(ptr addrspace(3) %ptr
;
; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@use_group_to_flat_addrspacecast
; ATTRIBUTOR_HSA-SAME: (ptr addrspace(3) [[PTR:%.*]]) #[[ATTR12:[0-9]+]] {
-; ATTRIBUTOR_HSA-NEXT: [[STOF:%.*]] = addrspacecast ptr addrspace(3) [[PTR]] to ptr
-; ATTRIBUTOR_HSA-NEXT: store volatile i32 0, ptr [[STOF]], align 4
+; ATTRIBUTOR_HSA-NEXT: store volatile i32 0, ptr addrspace(3) [[PTR]], align 4
; ATTRIBUTOR_HSA-NEXT: ret void
;
%stof = addrspacecast ptr addrspace(3) %ptr to ptr
@@ -443,8 +442,7 @@ define amdgpu_kernel void @use_private_to_flat_addrspacecast(ptr addrspace(5) %p
;
; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@use_private_to_flat_addrspacecast
; ATTRIBUTOR_HSA-SAME: (ptr addrspace(5) [[PTR:%.*]]) #[[ATTR12]] {
-; ATTRIBUTOR_HSA-NEXT: [[STOF:%.*]] = addrspacecast ptr addrspace(5) [[PTR]] to ptr
-; ATTRIBUTOR_HSA-NEXT: store volatile i32 0, ptr [[STOF]], align 4
+; ATTRIBUTOR_HSA-NEXT: store volatile i32 0, ptr addrspace(5) [[PTR]], align 4
; ATTRIBUTOR_HSA-NEXT: ret void
;
%stof = addrspacecast ptr addrspace(5) %ptr to ptr
@@ -478,11 +476,16 @@ define amdgpu_kernel void @use_flat_to_private_addrspacecast(ptr %ptr) #1 {
; No-op addrspacecast should not use queue ptr
define amdgpu_kernel void @use_global_to_flat_addrspacecast(ptr addrspace(1) %ptr) #1 {
-; HSA-LABEL: define {{[^@]+}}@use_global_to_flat_addrspacecast
-; HSA-SAME: (ptr addrspace(1) [[PTR:%.*]]) #[[ATTR1]] {
-; HSA-NEXT: [[STOF:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
-; HSA-NEXT: store volatile i32 0, ptr [[STOF]], align 4
-; HSA-NEXT: ret void
+; AKF_HSA-LABEL: define {{[^@]+}}@use_global_to_flat_addrspacecast
+; AKF_HSA-SAME: (ptr addrspace(1) [[PTR:%.*]]) #[[ATTR1]] {
+; AKF_HSA-NEXT: [[STOF:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
+; AKF_HSA-NEXT: store volatile i32 0, ptr [[STOF]], align 4
+; AKF_HSA-NEXT: ret void
+;
+; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@use_global_to_flat_addrspacecast
+; ATTRIBUTOR_HSA-SAME: (ptr addrspace(1) [[PTR:%.*]]) #[[ATTR1]] {
+; ATTRIBUTOR_HSA-NEXT: store volatile i32 0, ptr addrspace(1) [[PTR]], align 4
+; ATTRIBUTOR_HSA-NEXT: ret void
;
%stof = addrspacecast ptr addrspace(1) %ptr to ptr
store volatile i32 0, ptr %stof
@@ -490,11 +493,16 @@ define amdgpu_kernel void @use_global_to_flat_addrspacecast(ptr addrspace(1) %pt
}
define amdgpu_kernel void @use_constant_to_flat_addrspacecast(ptr addrspace(4) %ptr) #1 {
-; HSA-LABEL: define {{[^@]+}}@use_constant_to_flat_addrspacecast
-; HSA-SAME: (ptr addrspace(4) [[PTR:%.*]]) #[[ATTR1]] {
-; HSA-NEXT: [[STOF:%.*]] = addrspacecast ptr addrspace(4) [[PTR]] to ptr
-; HSA-NEXT: [[LD:%.*]] = load volatile i32, ptr [[STOF]], align 4
-; HSA-NEXT: ret void
+; AKF_HSA-LABEL: define {{[^@]+}}@use_constant_to_flat_addrspacecast
+; AKF_HSA-SAME: (ptr addrspace(4) [[PTR:%.*]]) #[[ATTR1]] {
+; AKF_HSA-NEXT: [[STOF:%.*]] = addrspacecast ptr addrspace(4) [[PTR]] to ptr
+; AKF_HSA-NEXT: [[LD:%.*]] = load volatile i32, ptr [[STOF]], align 4
+; AKF_HSA-NEXT: ret void
+;
+; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@use_constant_to_flat_addrspacecast
+; ATTRIBUTOR_HSA-SAME: (ptr addrspace(4) [[PTR:%.*]]) #[[ATTR1]] {
+; ATTRIBUTOR_HSA-NEXT: [[LD:%.*]] = load volatile i32, ptr addrspace(4) [[PTR]], align 4
+; ATTRIBUTOR_HSA-NEXT: ret void
;
%stof = addrspacecast ptr addrspace(4) %ptr to ptr
%ld = load volatile i32, ptr %stof
diff --git a/llvm/test/CodeGen/AMDGPU/callee-special-input-sgprs-fixed-abi.ll b/llvm/test/CodeGen/AMDGPU/callee-special-input-sgprs-fixed-abi.ll
index 8ef2d89e76d4..032ec65fa851 100644
--- a/llvm/test/CodeGen/AMDGPU/callee-special-input-sgprs-fixed-abi.ll
+++ b/llvm/test/CodeGen/AMDGPU/callee-special-input-sgprs-fixed-abi.ll
@@ -38,15 +38,9 @@ define amdgpu_kernel void @kern_indirect_use_queue_ptr(i32) #1 {
}
; GCN-LABEL: {{^}}use_queue_ptr_addrspacecast:
-; CIVI: s_load_dword [[APERTURE_LOAD:s[0-9]+]], s[4:5], 0x0
-; CIVI: v_mov_b32_e32 v[[LO:[0-9]+]], 16
-; CIVI-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], [[APERTURE_LOAD]]
+; GCN: v_mov_b32_e32 v[[LO:[0-9]+]], 0
+; GCN-DAG: ds_write_b32 v[[LO]], v[[LO]] offset:16
-; GFX9: s_mov_b64 s[{{[0-9]+}}:[[HI:[0-9]+]]], src_shared_base
-; GFX9-DAG: v_mov_b32_e32 v[[VGPR_HI:[0-9]+]], s[[HI]]
-; GFX9: {{flat|global}}_store_dword v{{\[[0-9]+}}:[[VGPR_HI]]]
-
-; CIVI: {{flat|global}}_store_dword v[[[LO]]:[[HI]]]
define hidden void @use_queue_ptr_addrspacecast() #1 {
%asc = addrspacecast ptr addrspace(3) inttoptr (i32 16 to ptr addrspace(3)) to ptr
store volatile i32 0, ptr %asc
diff --git a/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-v-add-co-u32.mir b/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-v-add-co-u32.mir
index 2cd7b8a6424b..6ec296144bf1 100644
--- a/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-v-add-co-u32.mir
+++ b/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-v-add-co-u32.mir
@@ -23,10 +23,35 @@ body: |
bb.0:
; MUBUFW64-LABEL: name: v_add_co_u32_e32__inline_imm__fi_offset0
; MUBUFW64: $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec
+ ; MUBUFW64-NEXT: renamable $vgpr0 = V_ADD_CO_U32_e32 12, killed $vgpr1, implicit-def dead $vcc, implicit $exec
+ ; MUBUFW64-NEXT: SI_RETURN implicit $vgpr0
+ ;
+ ; FLATSCRW64-LABEL: name: v_add_co_u32_e32__inline_imm__fi_offset0
+ ; FLATSCRW64: $vgpr1 = V_MOV_B32_e32 $sgpr32, implicit $exec
+ ; FLATSCRW64-NEXT: renamable $vgpr0 = V_ADD_CO_U32_e32 12, killed $vgpr1, implicit-def dead $vcc, implicit $exec
+ ; FLATSCRW64-NEXT: SI_RETURN implicit $vgpr0
+ renamable $vgpr0 = V_ADD_CO_U32_e32 12, %stack.0, implicit-def dead $vcc, implicit $exec
+ SI_RETURN implicit $vgpr0
+
+...
+
+---
+name: v_add_co_u32_e32__inline_imm__fi_offset0_live_vcc
+tracksRegLiveness: true
+stack:
+ - { id: 0, size: 4, alignment: 16 }
+machineFunctionInfo:
+ scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3'
+ frameOffsetReg: '$sgpr33'
+ stackPtrOffsetReg: '$sgpr32'
+body: |
+ bb.0:
+ ; MUBUFW64-LABEL: name: v_add_co_u32_e32__inline_imm__fi_offset0_live_vcc
+ ; MUBUFW64: $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec
; MUBUFW64-NEXT: renamable $vgpr0 = V_ADD_CO_U32_e32 12, killed $vgpr1, implicit-def $vcc, implicit $exec
; MUBUFW64-NEXT: SI_RETURN implicit $vgpr0, implicit $vcc
;
- ; FLATSCRW64-LABEL: name: v_add_co_u32_e32__inline_imm__fi_offset0
+ ; FLATSCRW64-LABEL: name: v_add_co_u32_e32__inline_imm__fi_offset0_live_vcc
; FLATSCRW64: $vgpr1 = V_MOV_B32_e32 $sgpr32, implicit $exec
; FLATSCRW64-NEXT: renamable $vgpr0 = V_ADD_CO_U32_e32 12, killed $vgpr1, implicit-def $vcc, implicit $exec
; FLATSCRW64-NEXT: SI_RETURN implicit $vgpr0, implicit $vcc
@@ -49,35 +74,87 @@ body: |
bb.0:
; GFX7-LABEL: name: v_add_co_u32_e32__inline_imm___fi_offset_inline_imm
; GFX7: $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec
+ ; GFX7-NEXT: $vgpr1, dead $vcc = V_ADD_CO_U32_e64 16, killed $vgpr1, 0, implicit $exec
+ ; GFX7-NEXT: renamable $vgpr0 = V_ADD_CO_U32_e32 12, killed $vgpr1, implicit-def dead $vcc, implicit $exec
+ ; GFX7-NEXT: SI_RETURN implicit $vgpr0
+ ;
+ ; GFX8-LABEL: name: v_add_co_u32_e32__inline_imm___fi_offset_inline_imm
+ ; GFX8: $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec
+ ; GFX8-NEXT: $vgpr1, dead $vcc = V_ADD_CO_U32_e64 16, killed $vgpr1, 0, implicit $exec
+ ; GFX8-NEXT: renamable $vgpr0 = V_ADD_CO_U32_e32 12, killed $vgpr1, implicit-def dead $vcc, implicit $exec
+ ; GFX8-NEXT: SI_RETURN implicit $vgpr0
+ ;
+ ; GFX900-LABEL: name: v_add_co_u32_e32__inline_imm___fi_offset_inline_imm
+ ; GFX900: $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec
+ ; GFX900-NEXT: $vgpr1 = V_ADD_U32_e32 16, killed $vgpr1, implicit $exec
+ ; GFX900-NEXT: renamable $vgpr0 = V_ADD_CO_U32_e32 12, killed $vgpr1, implicit-def dead $vcc, implicit $exec
+ ; GFX900-NEXT: SI_RETURN implicit $vgpr0
+ ;
+ ; GFX90A-LABEL: name: v_add_co_u32_e32__inline_imm___fi_offset_inline_imm
+ ; GFX90A: $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec
+ ; GFX90A-NEXT: $vgpr1 = V_ADD_U32_e32 16, killed $vgpr1, implicit $exec
+ ; GFX90A-NEXT: renamable $vgpr0 = V_ADD_CO_U32_e32 12, killed $vgpr1, implicit-def dead $vcc, implicit $exec
+ ; GFX90A-NEXT: SI_RETURN implicit $vgpr0
+ ;
+ ; GFX10-LABEL: name: v_add_co_u32_e32__inline_imm___fi_offset_inline_imm
+ ; GFX10: $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec
+ ; GFX10-NEXT: $vgpr1 = V_ADD_U32_e32 16, killed $vgpr1, implicit $exec
+ ; GFX10-NEXT: renamable $vgpr0 = V_ADD_CO_U32_e32 12, killed $vgpr1, implicit-def dead $vcc, implicit $exec
+ ; GFX10-NEXT: SI_RETURN implicit $vgpr0
+ ;
+ ; FLATSCRW64-LABEL: name: v_add_co_u32_e32__inline_imm___fi_offset_inline_imm
+ ; FLATSCRW64: $sgpr4 = S_ADD_I32 $sgpr32, 16, implicit-def $scc
+ ; FLATSCRW64-NEXT: $vgpr1 = V_MOV_B32_e32 killed $sgpr4, implicit $exec
+ ; FLATSCRW64-NEXT: renamable $vgpr0 = V_ADD_CO_U32_e32 12, killed $vgpr1, implicit-def dead $vcc, implicit $exec
+ ; FLATSCRW64-NEXT: SI_RETURN implicit $vgpr0
+ renamable $vgpr0 = V_ADD_CO_U32_e32 12, %stack.1, implicit-def dead $vcc, implicit $exec
+ SI_RETURN implicit $vgpr0
+
+...
+
+---
+name: v_add_co_u32_e32__inline_imm___fi_offset_inline_imm_live_vcc
+tracksRegLiveness: true
+stack:
+ - { id: 0, size: 16, alignment: 16 }
+ - { id: 1, size: 4, alignment: 4 }
+machineFunctionInfo:
+ scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3'
+ frameOffsetReg: '$sgpr33'
+ stackPtrOffsetReg: '$sgpr32'
+body: |
+ bb.0:
+ ; GFX7-LABEL: name: v_add_co_u32_e32__inline_imm___fi_offset_inline_imm_live_vcc
+ ; GFX7: $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec
; GFX7-NEXT: $vgpr1, dead $sgpr4_sgpr5 = V_ADD_CO_U32_e64 16, killed $vgpr1, 0, implicit $exec
; GFX7-NEXT: renamable $vgpr0 = V_ADD_CO_U32_e32 12, killed $vgpr1, implicit-def $vcc, implicit $exec
; GFX7-NEXT: SI_RETURN implicit $vgpr0, implicit $vcc
;
- ; GFX8-LABEL: name: v_add_co_u32_e32__inline_imm___fi_offset_inline_imm
+ ; GFX8-LABEL: name: v_add_co_u32_e32__inline_imm___fi_offset_inline_imm_live_vcc
; GFX8: $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec
; GFX8-NEXT: $vgpr1, dead $sgpr4_sgpr5 = V_ADD_CO_U32_e64 16, killed $vgpr1, 0, implicit $exec
; GFX8-NEXT: renamable $vgpr0 = V_ADD_CO_U32_e32 12, killed $vgpr1, implicit-def $vcc, implicit $exec
; GFX8-NEXT: SI_RETURN implicit $vgpr0, implicit $vcc
;
- ; GFX900-LABEL: name: v_add_co_u32_e32__inline_imm___fi_offset_inline_imm
+ ; GFX900-LABEL: name: v_add_co_u32_e32__inline_imm___fi_offset_inline_imm_live_vcc
; GFX900: $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec
; GFX900-NEXT: $vgpr1 = V_ADD_U32_e32 16, killed $vgpr1, implicit $exec
; GFX900-NEXT: renamable $vgpr0 = V_ADD_CO_U32_e32 12, killed $vgpr1, implicit-def $vcc, implicit $exec
; GFX900-NEXT: SI_RETURN implicit $vgpr0, implicit $vcc
;
- ; GFX90A-LABEL: name: v_add_co_u32_e32__inline_imm___fi_offset_inline_imm
+ ; GFX90A-LABEL: name: v_add_co_u32_e32__inline_imm___fi_offset_inline_imm_live_vcc
; GFX90A: $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec
; GFX90A-NEXT: $vgpr1 = V_ADD_U32_e32 16, killed $vgpr1, implicit $exec
; GFX90A-NEXT: renamable $vgpr0 = V_ADD_CO_U32_e32 12, killed $vgpr1, implicit-def $vcc, implicit $exec
; GFX90A-NEXT: SI_RETURN implicit $vgpr0, implicit $vcc
;
- ; GFX10-LABEL: name: v_add_co_u32_e32__inline_imm___fi_offset_inline_imm
+ ; GFX10-LABEL: name: v_add_co_u32_e32__inline_imm___fi_offset_inline_imm_live_vcc
; GFX10: $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec
; GFX10-NEXT: $vgpr1 = V_ADD_U32_e32 16, killed $vgpr1, implicit $exec
; GFX10-NEXT: renamable $vgpr0 = V_ADD_CO_U32_e32 12, killed $vgpr1, implicit-def $vcc, implicit $exec
; GFX10-NEXT: SI_RETURN implicit $vgpr0, implicit $vcc
;
- ; FLATSCRW64-LABEL: name: v_add_co_u32_e32__inline_imm___fi_offset_inline_imm
+ ; FLATSCRW64-LABEL: name: v_add_co_u32_e32__inline_imm___fi_offset_inline_imm_live_vcc
; FLATSCRW64: $sgpr4 = S_ADD_I32 $sgpr32, 16, implicit-def $scc
; FLATSCRW64-NEXT: $vgpr1 = V_MOV_B32_e32 killed $sgpr4, implicit $exec
; FLATSCRW64-NEXT: renamable $vgpr0 = V_ADD_CO_U32_e32 12, killed $vgpr1, implicit-def $vcc, implicit $exec
@@ -100,10 +177,35 @@ body: |
bb.0:
; MUBUFW64-LABEL: name: v_add_co_u32_e32__literal__fi_offset0
; MUBUFW64: $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec
+ ; MUBUFW64-NEXT: renamable $vgpr0 = V_ADD_CO_U32_e32 68, killed $vgpr1, implicit-def dead $vcc, implicit $exec
+ ; MUBUFW64-NEXT: SI_RETURN implicit $vgpr0
+ ;
+ ; FLATSCRW64-LABEL: name: v_add_co_u32_e32__literal__fi_offset0
+ ; FLATSCRW64: $vgpr1 = V_MOV_B32_e32 $sgpr32, implicit $exec
+ ; FLATSCRW64-NEXT: renamable $vgpr0 = V_ADD_CO_U32_e32 68, killed $vgpr1, implicit-def dead $vcc, implicit $exec
+ ; FLATSCRW64-NEXT: SI_RETURN implicit $vgpr0
+ renamable $vgpr0 = V_ADD_CO_U32_e32 68, %stack.0, implicit-def dead $vcc, implicit $exec
+ SI_RETURN implicit $vgpr0
+
+...
+
+---
+name: v_add_co_u32_e32__literal__fi_offset0_live_vcc
+tracksRegLiveness: true
+stack:
+ - { id: 0, size: 4, alignment: 16 }
+machineFunctionInfo:
+ scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3'
+ frameOffsetReg: '$sgpr33'
+ stackPtrOffsetReg: '$sgpr32'
+body: |
+ bb.0:
+ ; MUBUFW64-LABEL: name: v_add_co_u32_e32__literal__fi_offset0_live_vcc
+ ; MUBUFW64: $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec
; MUBUFW64-NEXT: renamable $vgpr0 = V_ADD_CO_U32_e32 68, killed $vgpr1, implicit-def $vcc, implicit $exec
; MUBUFW64-NEXT: SI_RETURN implicit $vgpr0, implicit $vcc
;
- ; FLATSCRW64-LABEL: name: v_add_co_u32_e32__literal__fi_offset0
+ ; FLATSCRW64-LABEL: name: v_add_co_u32_e32__literal__fi_offset0_live_vcc
; FLATSCRW64: $vgpr1 = V_MOV_B32_e32 $sgpr32, implicit $exec
; FLATSCRW64-NEXT: renamable $vgpr0 = V_ADD_CO_U32_e32 68, killed $vgpr1, implicit-def $vcc, implicit $exec
; FLATSCRW64-NEXT: SI_RETURN implicit $vgpr0, implicit $vcc
@@ -126,35 +228,87 @@ body: |
bb.0:
; GFX7-LABEL: name: v_add_co_u32_e32__literal__fi_offset0__offset_inlineimm
; GFX7: $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec
+ ; GFX7-NEXT: $vgpr1, dead $vcc = V_ADD_CO_U32_e64 32, killed $vgpr1, 0, implicit $exec
+ ; GFX7-NEXT: renamable $vgpr0 = V_ADD_CO_U32_e32 68, killed $vgpr1, implicit-def dead $vcc, implicit $exec
+ ; GFX7-NEXT: SI_RETURN implicit $vgpr0
+ ;
+ ; GFX8-LABEL: name: v_add_co_u32_e32__literal__fi_offset0__offset_inlineimm
+ ; GFX8: $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec
+ ; GFX8-NEXT: $vgpr1, dead $vcc = V_ADD_CO_U32_e64 32, killed $vgpr1, 0, implicit $exec
+ ; GFX8-NEXT: renamable $vgpr0 = V_ADD_CO_U32_e32 68, killed $vgpr1, implicit-def dead $vcc, implicit $exec
+ ; GFX8-NEXT: SI_RETURN implicit $vgpr0
+ ;
+ ; GFX900-LABEL: name: v_add_co_u32_e32__literal__fi_offset0__offset_inlineimm
+ ; GFX900: $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec
+ ; GFX900-NEXT: $vgpr1 = V_ADD_U32_e32 32, killed $vgpr1, implicit $exec
+ ; GFX900-NEXT: renamable $vgpr0 = V_ADD_CO_U32_e32 68, killed $vgpr1, implicit-def dead $vcc, implicit $exec
+ ; GFX900-NEXT: SI_RETURN implicit $vgpr0
+ ;
+ ; GFX90A-LABEL: name: v_add_co_u32_e32__literal__fi_offset0__offset_inlineimm
+ ; GFX90A: $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec
+ ; GFX90A-NEXT: $vgpr1 = V_ADD_U32_e32 32, killed $vgpr1, implicit $exec
+ ; GFX90A-NEXT: renamable $vgpr0 = V_ADD_CO_U32_e32 68, killed $vgpr1, implicit-def dead $vcc, implicit $exec
+ ; GFX90A-NEXT: SI_RETURN implicit $vgpr0
+ ;
+ ; GFX10-LABEL: name: v_add_co_u32_e32__literal__fi_offset0__offset_inlineimm
+ ; GFX10: $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec
+ ; GFX10-NEXT: $vgpr1 = V_ADD_U32_e32 32, killed $vgpr1, implicit $exec
+ ; GFX10-NEXT: renamable $vgpr0 = V_ADD_CO_U32_e32 68, killed $vgpr1, implicit-def dead $vcc, implicit $exec
+ ; GFX10-NEXT: SI_RETURN implicit $vgpr0
+ ;
+ ; FLATSCRW64-LABEL: name: v_add_co_u32_e32__literal__fi_offset0__offset_inlineimm
+ ; FLATSCRW64: $sgpr4 = S_ADD_I32 $sgpr32, 32, implicit-def $scc
+ ; FLATSCRW64-NEXT: $vgpr1 = V_MOV_B32_e32 killed $sgpr4, implicit $exec
+ ; FLATSCRW64-NEXT: renamable $vgpr0 = V_ADD_CO_U32_e32 68, killed $vgpr1, implicit-def dead $vcc, implicit $exec
+ ; FLATSCRW64-NEXT: SI_RETURN implicit $vgpr0
+ renamable $vgpr0 = V_ADD_CO_U32_e32 68, %stack.1, implicit-def dead $vcc, implicit $exec
+ SI_RETURN implicit $vgpr0
+
+...
+
+---
+name: v_add_co_u32_e32__literal__fi_offset0__offset_inlineimm_live_vcc
+tracksRegLiveness: true
+stack:
+ - { id: 0, size: 32, alignment: 16 }
+ - { id: 1, size: 4, alignment: 4 }
+machineFunctionInfo:
+ scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3'
+ frameOffsetReg: '$sgpr33'
+ stackPtrOffsetReg: '$sgpr32'
+body: |
+ bb.0:
+ ; GFX7-LABEL: name: v_add_co_u32_e32__literal__fi_offset0__offset_inlineimm_live_vcc
+ ; GFX7: $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec
; GFX7-NEXT: $vgpr1, dead $sgpr4_sgpr5 = V_ADD_CO_U32_e64 32, killed $vgpr1, 0, implicit $exec
; GFX7-NEXT: renamable $vgpr0 = V_ADD_CO_U32_e32 68, killed $vgpr1, implicit-def $vcc, implicit $exec
; GFX7-NEXT: SI_RETURN implicit $vgpr0, implicit $vcc
;
- ; GFX8-LABEL: name: v_add_co_u32_e32__literal__fi_offset0__offset_inlineimm
+ ; GFX8-LABEL: name: v_add_co_u32_e32__literal__fi_offset0__offset_inlineimm_live_vcc
; GFX8: $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec
; GFX8-NEXT: $vgpr1, dead $sgpr4_sgpr5 = V_ADD_CO_U32_e64 32, killed $vgpr1, 0, implicit $exec
; GFX8-NEXT: renamable $vgpr0 = V_ADD_CO_U32_e32 68, killed $vgpr1, implicit-def $vcc, implicit $exec
; GFX8-NEXT: SI_RETURN implicit $vgpr0, implicit $vcc
;
- ; GFX900-LABEL: name: v_add_co_u32_e32__literal__fi_offset0__offset_inlineimm
+ ; GFX900-LABEL: name: v_add_co_u32_e32__literal__fi_offset0__offset_inlineimm_live_vcc
; GFX900: $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec
; GFX900-NEXT: $vgpr1 = V_ADD_U32_e32 32, killed $vgpr1, implicit $exec
; GFX900-NEXT: renamable $vgpr0 = V_ADD_CO_U32_e32 68, killed $vgpr1, implicit-def $vcc, implicit $exec
; GFX900-NEXT: SI_RETURN implicit $vgpr0, implicit $vcc
;
- ; GFX90A-LABEL: name: v_add_co_u32_e32__literal__fi_offset0__offset_inlineimm
+ ; GFX90A-LABEL: name: v_add_co_u32_e32__literal__fi_offset0__offset_inlineimm_live_vcc
; GFX90A: $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec
; GFX90A-NEXT: $vgpr1 = V_ADD_U32_e32 32, killed $vgpr1, implicit $exec
; GFX90A-NEXT: renamable $vgpr0 = V_ADD_CO_U32_e32 68, killed $vgpr1, implicit-def $vcc, implicit $exec
; GFX90A-NEXT: SI_RETURN implicit $vgpr0, implicit $vcc
;
- ; GFX10-LABEL: name: v_add_co_u32_e32__literal__fi_offset0__offset_inlineimm
+ ; GFX10-LABEL: name: v_add_co_u32_e32__literal__fi_offset0__offset_inlineimm_live_vcc
; GFX10: $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec
; GFX10-NEXT: $vgpr1 = V_ADD_U32_e32 32, killed $vgpr1, implicit $exec
; GFX10-NEXT: renamable $vgpr0 = V_ADD_CO_U32_e32 68, killed $vgpr1, implicit-def $vcc, implicit $exec
; GFX10-NEXT: SI_RETURN implicit $vgpr0, implicit $vcc
;
- ; FLATSCRW64-LABEL: name: v_add_co_u32_e32__literal__fi_offset0__offset_inlineimm
+ ; FLATSCRW64-LABEL: name: v_add_co_u32_e32__literal__fi_offset0__offset_inlineimm_live_vcc
; FLATSCRW64: $sgpr4 = S_ADD_I32 $sgpr32, 32, implicit-def $scc
; FLATSCRW64-NEXT: $vgpr1 = V_MOV_B32_e32 killed $sgpr4, implicit $exec
; FLATSCRW64-NEXT: renamable $vgpr0 = V_ADD_CO_U32_e32 68, killed $vgpr1, implicit-def $vcc, implicit $exec
@@ -180,17 +334,17 @@ body: |
; MUBUFW64: liveins: $vgpr1
; MUBUFW64-NEXT: {{ $}}
; MUBUFW64-NEXT: $vgpr2 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec
- ; MUBUFW64-NEXT: renamable $vgpr0 = V_ADD_CO_U32_e32 $vgpr1, killed $vgpr2, implicit-def $vcc, implicit $exec
- ; MUBUFW64-NEXT: SI_RETURN implicit $vgpr0, implicit $vcc
+ ; MUBUFW64-NEXT: renamable $vgpr0 = V_ADD_CO_U32_e32 $vgpr1, killed $vgpr2, implicit-def dead $vcc, implicit $exec
+ ; MUBUFW64-NEXT: SI_RETURN implicit $vgpr0
;
; FLATSCRW64-LABEL: name: v_add_co_u32_e32__vgpr__fi_offset0
; FLATSCRW64: liveins: $vgpr1
; FLATSCRW64-NEXT: {{ $}}
; FLATSCRW64-NEXT: $vgpr2 = V_MOV_B32_e32 $sgpr32, implicit $exec
- ; FLATSCRW64-NEXT: renamable $vgpr0 = V_ADD_CO_U32_e32 $vgpr1, killed $vgpr2, implicit-def $vcc, implicit $exec
- ; FLATSCRW64-NEXT: SI_RETURN implicit $vgpr0, implicit $vcc
- renamable $vgpr0 = V_ADD_CO_U32_e32 $vgpr1, %stack.0, implicit-def $vcc, implicit $exec
- SI_RETURN implicit $vgpr0, implicit $vcc
+ ; FLATSCRW64-NEXT: renamable $vgpr0 = V_ADD_CO_U32_e32 $vgpr1, killed $vgpr2, implicit-def dead $vcc, implicit $exec
+ ; FLATSCRW64-NEXT: SI_RETURN implicit $vgpr0
+ renamable $vgpr0 = V_ADD_CO_U32_e32 $vgpr1, %stack.0, implicit-def dead $vcc, implicit $exec
+ SI_RETURN implicit $vgpr0
...
@@ -210,16 +364,16 @@ body: |
; MUBUFW64: liveins: $vgpr1
; MUBUFW64-NEXT: {{ $}}
; MUBUFW64-NEXT: $vgpr2 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec
- ; MUBUFW64-NEXT: renamable $vgpr0 = V_ADD_CO_U32_e32 killed $vgpr2, $vgpr1, implicit-def $vcc, implicit $exec
- ; MUBUFW64-NEXT: SI_RETURN implicit $vgpr0, implicit $vcc
+ ; MUBUFW64-NEXT: renamable $vgpr0 = V_ADD_CO_U32_e32 killed $vgpr2, $vgpr1, implicit-def dead $vcc, implicit $exec
+ ; MUBUFW64-NEXT: SI_RETURN implicit $vgpr0
;
; FLATSCRW64-LABEL: name: v_add_co_u32_e32__fi_offset0__vgpr
; FLATSCRW64: liveins: $vgpr1
; FLATSCRW64-NEXT: {{ $}}
- ; FLATSCRW64-NEXT: renamable $vgpr0 = V_ADD_CO_U32_e32 $sgpr32, $vgpr1, implicit-def $vcc, implicit $exec
- ; FLATSCRW64-NEXT: SI_RETURN implicit $vgpr0, implicit $vcc
- renamable $vgpr0 = V_ADD_CO_U32_e32 %stack.0, $vgpr1, implicit-def $vcc, implicit $exec
- SI_RETURN implicit $vgpr0, implicit $vcc
+ ; FLATSCRW64-NEXT: renamable $vgpr0 = V_ADD_CO_U32_e32 $sgpr32, $vgpr1, implicit-def dead $vcc, implicit $exec
+ ; FLATSCRW64-NEXT: SI_RETURN implicit $vgpr0
+ renamable $vgpr0 = V_ADD_CO_U32_e32 %stack.0, $vgpr1, implicit-def dead $vcc, implicit $exec
+ SI_RETURN implicit $vgpr0
...
@@ -240,53 +394,53 @@ body: |
; GFX7: liveins: $vgpr1
; GFX7-NEXT: {{ $}}
; GFX7-NEXT: $vgpr2 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec
- ; GFX7-NEXT: $sgpr4 = S_MOV_B32 128
- ; GFX7-NEXT: $vgpr2, dead $sgpr4_sgpr5 = V_ADD_CO_U32_e64 killed $sgpr4, killed $vgpr2, 0, implicit $exec
- ; GFX7-NEXT: renamable $vgpr0 = V_ADD_CO_U32_e32 $vgpr1, killed $vgpr2, implicit-def $vcc, implicit $exec
- ; GFX7-NEXT: SI_RETURN implicit $vgpr0, implicit $vcc
+ ; GFX7-NEXT: $vcc_lo = S_MOV_B32 128
+ ; GFX7-NEXT: $vgpr2, dead $vcc = V_ADD_CO_U32_e64 killed $vcc_lo, killed $vgpr2, 0, implicit $exec
+ ; GFX7-NEXT: renamable $vgpr0 = V_ADD_CO_U32_e32 $vgpr1, killed $vgpr2, implicit-def dead $vcc, implicit $exec
+ ; GFX7-NEXT: SI_RETURN implicit $vgpr0
;
; GFX8-LABEL: name: v_add_co_u32_e32__vgpr__fi_literal_offset
; GFX8: liveins: $vgpr1
; GFX8-NEXT: {{ $}}
; GFX8-NEXT: $vgpr2 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec
- ; GFX8-NEXT: $sgpr4 = S_MOV_B32 128
- ; GFX8-NEXT: $vgpr2, dead $sgpr4_sgpr5 = V_ADD_CO_U32_e64 killed $sgpr4, killed $vgpr2, 0, implicit $exec
- ; GFX8-NEXT: renamable $vgpr0 = V_ADD_CO_U32_e32 $vgpr1, killed $vgpr2, implicit-def $vcc, implicit $exec
- ; GFX8-NEXT: SI_RETURN implicit $vgpr0, implicit $vcc
+ ; GFX8-NEXT: $vcc_lo = S_MOV_B32 128
+ ; GFX8-NEXT: $vgpr2, dead $vcc = V_ADD_CO_U32_e64 killed $vcc_lo, killed $vgpr2, 0, implicit $exec
+ ; GFX8-NEXT: renamable $vgpr0 = V_ADD_CO_U32_e32 $vgpr1, killed $vgpr2, implicit-def dead $vcc, implicit $exec
+ ; GFX8-NEXT: SI_RETURN implicit $vgpr0
;
; GFX900-LABEL: name: v_add_co_u32_e32__vgpr__fi_literal_offset
; GFX900: liveins: $vgpr1
; GFX900-NEXT: {{ $}}
; GFX900-NEXT: $vgpr2 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec
; GFX900-NEXT: $vgpr2 = V_ADD_U32_e32 128, killed $vgpr2, implicit $exec
- ; GFX900-NEXT: renamable $vgpr0 = V_ADD_CO_U32_e32 $vgpr1, killed $vgpr2, implicit-def $vcc, implicit $exec
- ; GFX900-NEXT: SI_RETURN implicit $vgpr0, implicit $vcc
+ ; GFX900-NEXT: renamable $vgpr0 = V_ADD_CO_U32_e32 $vgpr1, killed $vgpr2, implicit-def dead $vcc, implicit $exec
+ ; GFX900-NEXT: SI_RETURN implicit $vgpr0
;
; GFX90A-LABEL: name: v_add_co_u32_e32__vgpr__fi_literal_offset
; GFX90A: liveins: $vgpr1
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: $vgpr2 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec
; GFX90A-NEXT: $vgpr2 = V_ADD_U32_e32 128, killed $vgpr2, implicit $exec
- ; GFX90A-NEXT: renamable $vgpr0 = V_ADD_CO_U32_e32 $vgpr1, killed $vgpr2, implicit-def $vcc, implicit $exec
- ; GFX90A-NEXT: SI_RETURN implicit $vgpr0, implicit $vcc
+ ; GFX90A-NEXT: renamable $vgpr0 = V_ADD_CO_U32_e32 $vgpr1, killed $vgpr2, implicit-def dead $vcc, implicit $exec
+ ; GFX90A-NEXT: SI_RETURN implicit $vgpr0
;
; GFX10-LABEL: name: v_add_co_u32_e32__vgpr__fi_literal_offset
; GFX10: liveins: $vgpr1
; GFX10-NEXT: {{ $}}
; GFX10-NEXT: $vgpr2 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec
; GFX10-NEXT: $vgpr2 = V_ADD_U32_e32 128, killed $vgpr2, implicit $exec
- ; GFX10-NEXT: renamable $vgpr0 = V_ADD_CO_U32_e32 $vgpr1, killed $vgpr2, implicit-def $vcc, implicit $exec
- ; GFX10-NEXT: SI_RETURN implicit $vgpr0, implicit $vcc
+ ; GFX10-NEXT: renamable $vgpr0 = V_ADD_CO_U32_e32 $vgpr1, killed $vgpr2, implicit-def dead $vcc, implicit $exec
+ ; GFX10-NEXT: SI_RETURN implicit $vgpr0
;
; FLATSCRW64-LABEL: name: v_add_co_u32_e32__vgpr__fi_literal_offset
; FLATSCRW64: liveins: $vgpr1
; FLATSCRW64-NEXT: {{ $}}
; FLATSCRW64-NEXT: $sgpr4 = S_ADD_I32 $sgpr32, 128, implicit-def $scc
; FLATSCRW64-NEXT: $vgpr2 = V_MOV_B32_e32 killed $sgpr4, implicit $exec
- ; FLATSCRW64-NEXT: renamable $vgpr0 = V_ADD_CO_U32_e32 $vgpr1, killed $vgpr2, implicit-def $vcc, implicit $exec
- ; FLATSCRW64-NEXT: SI_RETURN implicit $vgpr0, implicit $vcc
- renamable $vgpr0 = V_ADD_CO_U32_e32 $vgpr1, %stack.1, implicit-def $vcc, implicit $exec
- SI_RETURN implicit $vgpr0, implicit $vcc
+ ; FLATSCRW64-NEXT: renamable $vgpr0 = V_ADD_CO_U32_e32 $vgpr1, killed $vgpr2, implicit-def dead $vcc, implicit $exec
+ ; FLATSCRW64-NEXT: SI_RETURN implicit $vgpr0
+ renamable $vgpr0 = V_ADD_CO_U32_e32 $vgpr1, %stack.1, implicit-def dead $vcc, implicit $exec
+ SI_RETURN implicit $vgpr0
...
@@ -307,52 +461,52 @@ body: |
; GFX7: liveins: $vgpr1
; GFX7-NEXT: {{ $}}
; GFX7-NEXT: $vgpr2 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec
- ; GFX7-NEXT: $sgpr4 = S_MOV_B32 128
- ; GFX7-NEXT: $vgpr2, dead $sgpr4_sgpr5 = V_ADD_CO_U32_e64 killed $sgpr4, killed $vgpr2, 0, implicit $exec
- ; GFX7-NEXT: renamable $vgpr0 = V_ADD_CO_U32_e32 killed $vgpr2, $vgpr1, implicit-def $vcc, implicit $exec
- ; GFX7-NEXT: SI_RETURN implicit $vgpr0, implicit $vcc
+ ; GFX7-NEXT: $vcc_lo = S_MOV_B32 128
+ ; GFX7-NEXT: $vgpr2, dead $vcc = V_ADD_CO_U32_e64 killed $vcc_lo, killed $vgpr2, 0, implicit $exec
+ ; GFX7-NEXT: renamable $vgpr0 = V_ADD_CO_U32_e32 killed $vgpr2, $vgpr1, implicit-def dead $vcc, implicit $exec
+ ; GFX7-NEXT: SI_RETURN implicit $vgpr0
;
; GFX8-LABEL: name: v_add_co_u32_e32__fi_literal_offset__vgpr
; GFX8: liveins: $vgpr1
; GFX8-NEXT: {{ $}}
; GFX8-NEXT: $vgpr2 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec
- ; GFX8-NEXT: $sgpr4 = S_MOV_B32 128
- ; GFX8-NEXT: $vgpr2, dead $sgpr4_sgpr5 = V_ADD_CO_U32_e64 killed $sgpr4, killed $vgpr2, 0, implicit $exec
- ; GFX8-NEXT: renamable $vgpr0 = V_ADD_CO_U32_e32 killed $vgpr2, $vgpr1, implicit-def $vcc, implicit $exec
- ; GFX8-NEXT: SI_RETURN implicit $vgpr0, implicit $vcc
+ ; GFX8-NEXT: $vcc_lo = S_MOV_B32 128
+ ; GFX8-NEXT: $vgpr2, dead $vcc = V_ADD_CO_U32_e64 killed $vcc_lo, killed $vgpr2, 0, implicit $exec
+ ; GFX8-NEXT: renamable $vgpr0 = V_ADD_CO_U32_e32 killed $vgpr2, $vgpr1, implicit-def dead $vcc, implicit $exec
+ ; GFX8-NEXT: SI_RETURN implicit $vgpr0
;
; GFX900-LABEL: name: v_add_co_u32_e32__fi_literal_offset__vgpr
; GFX900: liveins: $vgpr1
; GFX900-NEXT: {{ $}}
; GFX900-NEXT: $vgpr2 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec
; GFX900-NEXT: $vgpr2 = V_ADD_U32_e32 128, killed $vgpr2, implicit $exec
- ; GFX900-NEXT: renamable $vgpr0 = V_ADD_CO_U32_e32 killed $vgpr2, $vgpr1, implicit-def $vcc, implicit $exec
- ; GFX900-NEXT: SI_RETURN implicit $vgpr0, implicit $vcc
+ ; GFX900-NEXT: renamable $vgpr0 = V_ADD_CO_U32_e32 killed $vgpr2, $vgpr1, implicit-def dead $vcc, implicit $exec
+ ; GFX900-NEXT: SI_RETURN implicit $vgpr0
;
; GFX90A-LABEL: name: v_add_co_u32_e32__fi_literal_offset__vgpr
; GFX90A: liveins: $vgpr1
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: $vgpr2 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec
; GFX90A-NEXT: $vgpr2 = V_ADD_U32_e32 128, killed $vgpr2, implicit $exec
- ; GFX90A-NEXT: renamable $vgpr0 = V_ADD_CO_U32_e32 killed $vgpr2, $vgpr1, implicit-def $vcc, implicit $exec
- ; GFX90A-NEXT: SI_RETURN implicit $vgpr0, implicit $vcc
+ ; GFX90A-NEXT: renamable $vgpr0 = V_ADD_CO_U32_e32 killed $vgpr2, $vgpr1, implicit-def dead $vcc, implicit $exec
+ ; GFX90A-NEXT: SI_RETURN implicit $vgpr0
;
; GFX10-LABEL: name: v_add_co_u32_e32__fi_literal_offset__vgpr
; GFX10: liveins: $vgpr1
; GFX10-NEXT: {{ $}}
; GFX10-NEXT: $vgpr2 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec
; GFX10-NEXT: $vgpr2 = V_ADD_U32_e32 128, killed $vgpr2, implicit $exec
- ; GFX10-NEXT: renamable $vgpr0 = V_ADD_CO_U32_e32 killed $vgpr2, $vgpr1, implicit-def $vcc, implicit $exec
- ; GFX10-NEXT: SI_RETURN implicit $vgpr0, implicit $vcc
+ ; GFX10-NEXT: renamable $vgpr0 = V_ADD_CO_U32_e32 killed $vgpr2, $vgpr1, implicit-def dead $vcc, implicit $exec
+ ; GFX10-NEXT: SI_RETURN implicit $vgpr0
;
; FLATSCRW64-LABEL: name: v_add_co_u32_e32__fi_literal_offset__vgpr
; FLATSCRW64: liveins: $vgpr1
; FLATSCRW64-NEXT: {{ $}}
; FLATSCRW64-NEXT: $sgpr4 = S_ADD_I32 $sgpr32, 128, implicit-def $scc
- ; FLATSCRW64-NEXT: renamable $vgpr0 = V_ADD_CO_U32_e32 killed $sgpr4, $vgpr1, implicit-def $vcc, implicit $exec
- ; FLATSCRW64-NEXT: SI_RETURN implicit $vgpr0, implicit $vcc
- renamable $vgpr0 = V_ADD_CO_U32_e32 %stack.1, $vgpr1, implicit-def $vcc, implicit $exec
- SI_RETURN implicit $vgpr0, implicit $vcc
+ ; FLATSCRW64-NEXT: renamable $vgpr0 = V_ADD_CO_U32_e32 killed $sgpr4, $vgpr1, implicit-def dead $vcc, implicit $exec
+ ; FLATSCRW64-NEXT: SI_RETURN implicit $vgpr0
+ renamable $vgpr0 = V_ADD_CO_U32_e32 %stack.1, $vgpr1, implicit-def dead $vcc, implicit $exec
+ SI_RETURN implicit $vgpr0
...
@@ -373,53 +527,53 @@ body: |
; GFX7: liveins: $sgpr8
; GFX7-NEXT: {{ $}}
; GFX7-NEXT: $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec
- ; GFX7-NEXT: $sgpr4 = S_MOV_B32 128
- ; GFX7-NEXT: $vgpr1, dead $sgpr4_sgpr5 = V_ADD_CO_U32_e64 killed $sgpr4, killed $vgpr1, 0, implicit $exec
- ; GFX7-NEXT: renamable $vgpr0 = V_ADD_CO_U32_e32 $sgpr8, killed $vgpr1, implicit-def $vcc, implicit $exec
- ; GFX7-NEXT: SI_RETURN implicit $vgpr0, implicit $vcc
+ ; GFX7-NEXT: $vcc_lo = S_MOV_B32 128
+ ; GFX7-NEXT: $vgpr1, dead $vcc = V_ADD_CO_U32_e64 killed $vcc_lo, killed $vgpr1, 0, implicit $exec
+ ; GFX7-NEXT: renamable $vgpr0 = V_ADD_CO_U32_e32 $sgpr8, killed $vgpr1, implicit-def dead $vcc, implicit $exec
+ ; GFX7-NEXT: SI_RETURN implicit $vgpr0
;
; GFX8-LABEL: name: v_add_co_u32_e32__sgpr__fi_literal_offset
; GFX8: liveins: $sgpr8
; GFX8-NEXT: {{ $}}
; GFX8-NEXT: $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec
- ; GFX8-NEXT: $sgpr4 = S_MOV_B32 128
- ; GFX8-NEXT: $vgpr1, dead $sgpr4_sgpr5 = V_ADD_CO_U32_e64 killed $sgpr4, killed $vgpr1, 0, implicit $exec
- ; GFX8-NEXT: renamable $vgpr0 = V_ADD_CO_U32_e32 $sgpr8, killed $vgpr1, implicit-def $vcc, implicit $exec
- ; GFX8-NEXT: SI_RETURN implicit $vgpr0, implicit $vcc
+ ; GFX8-NEXT: $vcc_lo = S_MOV_B32 128
+ ; GFX8-NEXT: $vgpr1, dead $vcc = V_ADD_CO_U32_e64 killed $vcc_lo, killed $vgpr1, 0, implicit $exec
+ ; GFX8-NEXT: renamable $vgpr0 = V_ADD_CO_U32_e32 $sgpr8, killed $vgpr1, implicit-def dead $vcc, implicit $exec
+ ; GFX8-NEXT: SI_RETURN implicit $vgpr0
;
; GFX900-LABEL: name: v_add_co_u32_e32__sgpr__fi_literal_offset
; GFX900: liveins: $sgpr8
; GFX900-NEXT: {{ $}}
; GFX900-NEXT: $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec
; GFX900-NEXT: $vgpr1 = V_ADD_U32_e32 128, killed $vgpr1, implicit $exec
- ; GFX900-NEXT: renamable $vgpr0 = V_ADD_CO_U32_e32 $sgpr8, killed $vgpr1, implicit-def $vcc, implicit $exec
- ; GFX900-NEXT: SI_RETURN implicit $vgpr0, implicit $vcc
+ ; GFX900-NEXT: renamable $vgpr0 = V_ADD_CO_U32_e32 $sgpr8, killed $vgpr1, implicit-def dead $vcc, implicit $exec
+ ; GFX900-NEXT: SI_RETURN implicit $vgpr0
;
; GFX90A-LABEL: name: v_add_co_u32_e32__sgpr__fi_literal_offset
; GFX90A: liveins: $sgpr8
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec
; GFX90A-NEXT: $vgpr1 = V_ADD_U32_e32 128, killed $vgpr1, implicit $exec
- ; GFX90A-NEXT: renamable $vgpr0 = V_ADD_CO_U32_e32 $sgpr8, killed $vgpr1, implicit-def $vcc, implicit $exec
- ; GFX90A-NEXT: SI_RETURN implicit $vgpr0, implicit $vcc
+ ; GFX90A-NEXT: renamable $vgpr0 = V_ADD_CO_U32_e32 $sgpr8, killed $vgpr1, implicit-def dead $vcc, implicit $exec
+ ; GFX90A-NEXT: SI_RETURN implicit $vgpr0
;
; GFX10-LABEL: name: v_add_co_u32_e32__sgpr__fi_literal_offset
; GFX10: liveins: $sgpr8
; GFX10-NEXT: {{ $}}
; GFX10-NEXT: $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec
; GFX10-NEXT: $vgpr1 = V_ADD_U32_e32 128, killed $vgpr1, implicit $exec
- ; GFX10-NEXT: renamable $vgpr0 = V_ADD_CO_U32_e32 $sgpr8, killed $vgpr1, implicit-def $vcc, implicit $exec
- ; GFX10-NEXT: SI_RETURN implicit $vgpr0, implicit $vcc
+ ; GFX10-NEXT: renamable $vgpr0 = V_ADD_CO_U32_e32 $sgpr8, killed $vgpr1, implicit-def dead $vcc, implicit $exec
+ ; GFX10-NEXT: SI_RETURN implicit $vgpr0
;
; FLATSCRW64-LABEL: name: v_add_co_u32_e32__sgpr__fi_literal_offset
; FLATSCRW64: liveins: $sgpr8
; FLATSCRW64-NEXT: {{ $}}
; FLATSCRW64-NEXT: $sgpr4 = S_ADD_I32 $sgpr32, 128, implicit-def $scc
; FLATSCRW64-NEXT: $vgpr1 = V_MOV_B32_e32 killed $sgpr4, implicit $exec
- ; FLATSCRW64-NEXT: renamable $vgpr0 = V_ADD_CO_U32_e32 $sgpr8, killed $vgpr1, implicit-def $vcc, implicit $exec
- ; FLATSCRW64-NEXT: SI_RETURN implicit $vgpr0, implicit $vcc
- renamable $vgpr0 = V_ADD_CO_U32_e32 $sgpr8, %stack.1, implicit-def $vcc, implicit $exec
- SI_RETURN implicit $vgpr0, implicit $vcc
+ ; FLATSCRW64-NEXT: renamable $vgpr0 = V_ADD_CO_U32_e32 $sgpr8, killed $vgpr1, implicit-def dead $vcc, implicit $exec
+ ; FLATSCRW64-NEXT: SI_RETURN implicit $vgpr0
+ renamable $vgpr0 = V_ADD_CO_U32_e32 $sgpr8, %stack.1, implicit-def dead $vcc, implicit $exec
+ SI_RETURN implicit $vgpr0
...
@@ -448,6 +602,54 @@ body: |
...
---
+name: v_add_co_u32_e64__inline_imm__fi_offset0__clamp
+tracksRegLiveness: true
+stack:
+ - { id: 0, size: 4, alignment: 16 }
+machineFunctionInfo:
+ scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3'
+ frameOffsetReg: '$sgpr33'
+ stackPtrOffsetReg: '$sgpr32'
+body: |
+ bb.0:
+ ; MUBUFW64-LABEL: name: v_add_co_u32_e64__inline_imm__fi_offset0__clamp
+ ; MUBUFW64: $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec
+ ; MUBUFW64-NEXT: renamable $vgpr0, dead $vcc = V_ADD_CO_U32_e64 12, killed $vgpr1, 1, implicit $exec
+ ; MUBUFW64-NEXT: SI_RETURN implicit $vgpr0
+ ;
+ ; FLATSCRW64-LABEL: name: v_add_co_u32_e64__inline_imm__fi_offset0__clamp
+ ; FLATSCRW64: renamable $vgpr0, dead $vcc = V_ADD_CO_U32_e64 12, $sgpr32, 1, implicit $exec
+ ; FLATSCRW64-NEXT: SI_RETURN implicit $vgpr0
+ renamable $vgpr0, dead $vcc = V_ADD_CO_U32_e64 12, %stack.0, 1, implicit $exec
+ SI_RETURN implicit $vgpr0
+
+...
+
+---
+name: v_add_co_u32_e64__inline_imm__fi_offset0__live_vcc_clamp
+tracksRegLiveness: true
+stack:
+ - { id: 0, size: 4, alignment: 16 }
+machineFunctionInfo:
+ scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3'
+ frameOffsetReg: '$sgpr33'
+ stackPtrOffsetReg: '$sgpr32'
+body: |
+ bb.0:
+ ; MUBUFW64-LABEL: name: v_add_co_u32_e64__inline_imm__fi_offset0__live_vcc_clamp
+ ; MUBUFW64: $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec
+ ; MUBUFW64-NEXT: renamable $vgpr0, renamable $vcc = V_ADD_CO_U32_e64 12, killed $vgpr1, 1, implicit $exec
+ ; MUBUFW64-NEXT: SI_RETURN implicit $vgpr0, implicit $vcc
+ ;
+ ; FLATSCRW64-LABEL: name: v_add_co_u32_e64__inline_imm__fi_offset0__live_vcc_clamp
+ ; FLATSCRW64: renamable $vgpr0, renamable $vcc = V_ADD_CO_U32_e64 12, $sgpr32, 1, implicit $exec
+ ; FLATSCRW64-NEXT: SI_RETURN implicit $vgpr0, implicit $vcc
+ renamable $vgpr0, renamable $vcc = V_ADD_CO_U32_e64 12, %stack.0, 1, implicit $exec
+ SI_RETURN implicit $vgpr0, implicit $vcc
+
+...
+
+---
name: v_add_co_u32_e64__fi_literal_offset__sgpr
tracksRegLiveness: true
stack:
@@ -527,3 +729,1186 @@ body: |
SI_RETURN implicit $vgpr0, implicit $vcc
...
+
+---
+name: v_add_co_u32_e64__fi_literal_offset__sgpr_clamp
+tracksRegLiveness: true
+stack:
+ - { id: 0, size: 128, alignment: 16 }
+ - { id: 1, size: 4, alignment: 4 }
+machineFunctionInfo:
+ scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3'
+ frameOffsetReg: '$sgpr33'
+ stackPtrOffsetReg: '$sgpr32'
+body: |
+ bb.0:
+ liveins: $sgpr8
+ ; GFX7-LABEL: name: v_add_co_u32_e64__fi_literal_offset__sgpr_clamp
+ ; GFX7: liveins: $sgpr8
+ ; GFX7-NEXT: {{ $}}
+ ; GFX7-NEXT: $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec
+ ; GFX7-NEXT: $sgpr4 = S_MOV_B32 128
+ ; GFX7-NEXT: $vgpr1, dead $sgpr4_sgpr5 = V_ADD_CO_U32_e64 killed $sgpr4, killed $vgpr1, 0, implicit $exec
+ ; GFX7-NEXT: renamable $vgpr0, renamable $vcc = V_ADD_CO_U32_e64 killed $vgpr1, $sgpr8, 1, implicit $exec
+ ; GFX7-NEXT: SI_RETURN implicit $vgpr0, implicit $vcc
+ ;
+ ; GFX8-LABEL: name: v_add_co_u32_e64__fi_literal_offset__sgpr_clamp
+ ; GFX8: liveins: $sgpr8
+ ; GFX8-NEXT: {{ $}}
+ ; GFX8-NEXT: $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec
+ ; GFX8-NEXT: $sgpr4 = S_MOV_B32 128
+ ; GFX8-NEXT: $vgpr1, dead $sgpr4_sgpr5 = V_ADD_CO_U32_e64 killed $sgpr4, killed $vgpr1, 0, implicit $exec
+ ; GFX8-NEXT: renamable $vgpr0, renamable $vcc = V_ADD_CO_U32_e64 killed $vgpr1, $sgpr8, 1, implicit $exec
+ ; GFX8-NEXT: SI_RETURN implicit $vgpr0, implicit $vcc
+ ;
+ ; GFX900-LABEL: name: v_add_co_u32_e64__fi_literal_offset__sgpr_clamp
+ ; GFX900: liveins: $sgpr8
+ ; GFX900-NEXT: {{ $}}
+ ; GFX900-NEXT: $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec
+ ; GFX900-NEXT: $vgpr1 = V_ADD_U32_e32 128, killed $vgpr1, implicit $exec
+ ; GFX900-NEXT: renamable $vgpr0, renamable $vcc = V_ADD_CO_U32_e64 killed $vgpr1, $sgpr8, 1, implicit $exec
+ ; GFX900-NEXT: SI_RETURN implicit $vgpr0, implicit $vcc
+ ;
+ ; GFX90A-LABEL: name: v_add_co_u32_e64__fi_literal_offset__sgpr_clamp
+ ; GFX90A: liveins: $sgpr8
+ ; GFX90A-NEXT: {{ $}}
+ ; GFX90A-NEXT: $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec
+ ; GFX90A-NEXT: $vgpr1 = V_ADD_U32_e32 128, killed $vgpr1, implicit $exec
+ ; GFX90A-NEXT: renamable $vgpr0, renamable $vcc = V_ADD_CO_U32_e64 killed $vgpr1, $sgpr8, 1, implicit $exec
+ ; GFX90A-NEXT: SI_RETURN implicit $vgpr0, implicit $vcc
+ ;
+ ; GFX10-LABEL: name: v_add_co_u32_e64__fi_literal_offset__sgpr_clamp
+ ; GFX10: liveins: $sgpr8
+ ; GFX10-NEXT: {{ $}}
+ ; GFX10-NEXT: $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec
+ ; GFX10-NEXT: $vgpr1 = V_ADD_U32_e32 128, killed $vgpr1, implicit $exec
+ ; GFX10-NEXT: renamable $vgpr0, renamable $vcc = V_ADD_CO_U32_e64 killed $vgpr1, $sgpr8, 1, implicit $exec
+ ; GFX10-NEXT: SI_RETURN implicit $vgpr0, implicit $vcc
+ ;
+ ; GFX940-LABEL: name: v_add_co_u32_e64__fi_literal_offset__sgpr_clamp
+ ; GFX940: liveins: $sgpr8
+ ; GFX940-NEXT: {{ $}}
+ ; GFX940-NEXT: $sgpr4 = S_ADD_I32 $sgpr32, 128, implicit-def $scc
+ ; GFX940-NEXT: $vgpr1 = V_MOV_B32_e32 killed $sgpr4, implicit $exec
+ ; GFX940-NEXT: renamable $vgpr0, renamable $vcc = V_ADD_CO_U32_e64 killed $vgpr1, $sgpr8, 1, implicit $exec
+ ; GFX940-NEXT: SI_RETURN implicit $vgpr0, implicit $vcc
+ ;
+ ; GFX11-LABEL: name: v_add_co_u32_e64__fi_literal_offset__sgpr_clamp
+ ; GFX11: liveins: $sgpr8
+ ; GFX11-NEXT: {{ $}}
+ ; GFX11-NEXT: $sgpr4 = S_ADD_I32 $sgpr32, 128, implicit-def $scc
+ ; GFX11-NEXT: renamable $vgpr0, renamable $vcc = V_ADD_CO_U32_e64 killed $sgpr4, $sgpr8, 1, implicit $exec
+ ; GFX11-NEXT: SI_RETURN implicit $vgpr0, implicit $vcc
+ ;
+ ; GFX12-LABEL: name: v_add_co_u32_e64__fi_literal_offset__sgpr_clamp
+ ; GFX12: liveins: $sgpr8
+ ; GFX12-NEXT: {{ $}}
+ ; GFX12-NEXT: $sgpr4 = S_ADD_I32 $sgpr32, 128, implicit-def $scc
+ ; GFX12-NEXT: renamable $vgpr0, renamable $vcc = V_ADD_CO_U32_e64 killed $sgpr4, $sgpr8, 1, implicit $exec
+ ; GFX12-NEXT: SI_RETURN implicit $vgpr0, implicit $vcc
+ renamable $vgpr0, renamable $vcc = V_ADD_CO_U32_e64 %stack.1, $sgpr8, 1, implicit $exec
+ SI_RETURN implicit $vgpr0, implicit $vcc
+
+...
+
+---
+name: v_add_co_u32_e64__fi_literal_offset__vgpr
+tracksRegLiveness: true
+stack:
+ - { id: 0, size: 128, alignment: 16 }
+ - { id: 1, size: 4, alignment: 4 }
+machineFunctionInfo:
+ scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3'
+ frameOffsetReg: '$sgpr33'
+ stackPtrOffsetReg: '$sgpr32'
+body: |
+ bb.0:
+ liveins: $vgpr8
+
+ ; GFX7-LABEL: name: v_add_co_u32_e64__fi_literal_offset__vgpr
+ ; GFX7: liveins: $vgpr8
+ ; GFX7-NEXT: {{ $}}
+ ; GFX7-NEXT: $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec
+ ; GFX7-NEXT: $vcc_lo = S_MOV_B32 128
+ ; GFX7-NEXT: $vgpr1, dead $vcc = V_ADD_CO_U32_e64 killed $vcc_lo, killed $vgpr1, 0, implicit $exec
+ ; GFX7-NEXT: renamable $vgpr0, dead renamable $vcc = V_ADD_CO_U32_e64 killed $vgpr1, $vgpr8, 0, implicit $exec
+ ; GFX7-NEXT: SI_RETURN implicit $vgpr0
+ ;
+ ; GFX8-LABEL: name: v_add_co_u32_e64__fi_literal_offset__vgpr
+ ; GFX8: liveins: $vgpr8
+ ; GFX8-NEXT: {{ $}}
+ ; GFX8-NEXT: $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec
+ ; GFX8-NEXT: $vcc_lo = S_MOV_B32 128
+ ; GFX8-NEXT: $vgpr1, dead $vcc = V_ADD_CO_U32_e64 killed $vcc_lo, killed $vgpr1, 0, implicit $exec
+ ; GFX8-NEXT: renamable $vgpr0, dead renamable $vcc = V_ADD_CO_U32_e64 killed $vgpr1, $vgpr8, 0, implicit $exec
+ ; GFX8-NEXT: SI_RETURN implicit $vgpr0
+ ;
+ ; GFX900-LABEL: name: v_add_co_u32_e64__fi_literal_offset__vgpr
+ ; GFX900: liveins: $vgpr8
+ ; GFX900-NEXT: {{ $}}
+ ; GFX900-NEXT: $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec
+ ; GFX900-NEXT: $vgpr1 = V_ADD_U32_e32 128, killed $vgpr1, implicit $exec
+ ; GFX900-NEXT: renamable $vgpr0, dead renamable $vcc = V_ADD_CO_U32_e64 killed $vgpr1, $vgpr8, 0, implicit $exec
+ ; GFX900-NEXT: SI_RETURN implicit $vgpr0
+ ;
+ ; GFX90A-LABEL: name: v_add_co_u32_e64__fi_literal_offset__vgpr
+ ; GFX90A: liveins: $vgpr8
+ ; GFX90A-NEXT: {{ $}}
+ ; GFX90A-NEXT: $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec
+ ; GFX90A-NEXT: $vgpr1 = V_ADD_U32_e32 128, killed $vgpr1, implicit $exec
+ ; GFX90A-NEXT: renamable $vgpr0, dead renamable $vcc = V_ADD_CO_U32_e64 killed $vgpr1, $vgpr8, 0, implicit $exec
+ ; GFX90A-NEXT: SI_RETURN implicit $vgpr0
+ ;
+ ; GFX10-LABEL: name: v_add_co_u32_e64__fi_literal_offset__vgpr
+ ; GFX10: liveins: $vgpr8
+ ; GFX10-NEXT: {{ $}}
+ ; GFX10-NEXT: $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec
+ ; GFX10-NEXT: $vgpr1 = V_ADD_U32_e32 128, killed $vgpr1, implicit $exec
+ ; GFX10-NEXT: renamable $vgpr0, dead renamable $vcc = V_ADD_CO_U32_e64 killed $vgpr1, $vgpr8, 0, implicit $exec
+ ; GFX10-NEXT: SI_RETURN implicit $vgpr0
+ ;
+ ; FLATSCRW64-LABEL: name: v_add_co_u32_e64__fi_literal_offset__vgpr
+ ; FLATSCRW64: liveins: $vgpr8
+ ; FLATSCRW64-NEXT: {{ $}}
+ ; FLATSCRW64-NEXT: $sgpr4 = S_ADD_I32 $sgpr32, 128, implicit-def $scc
+ ; FLATSCRW64-NEXT: renamable $vgpr0, dead renamable $vcc = V_ADD_CO_U32_e64 killed $sgpr4, $vgpr8, 0, implicit $exec
+ ; FLATSCRW64-NEXT: SI_RETURN implicit $vgpr0
+ renamable $vgpr0, renamable dead $vcc = V_ADD_CO_U32_e64 %stack.1, $vgpr8, 0, implicit $exec
+ SI_RETURN implicit $vgpr0
+
+...
+
+---
+name: v_add_co_u32_e64__fi_literal_offset__vgpr__clamp
+tracksRegLiveness: true
+stack:
+ - { id: 0, size: 128, alignment: 16 }
+ - { id: 1, size: 4, alignment: 4 }
+machineFunctionInfo:
+ scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3'
+ frameOffsetReg: '$sgpr33'
+ stackPtrOffsetReg: '$sgpr32'
+body: |
+ bb.0:
+ liveins: $vgpr8
+
+ ; GFX7-LABEL: name: v_add_co_u32_e64__fi_literal_offset__vgpr__clamp
+ ; GFX7: liveins: $vgpr8
+ ; GFX7-NEXT: {{ $}}
+ ; GFX7-NEXT: $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec
+ ; GFX7-NEXT: $vcc_lo = S_MOV_B32 128
+ ; GFX7-NEXT: $vgpr1, dead $vcc = V_ADD_CO_U32_e64 killed $vcc_lo, killed $vgpr1, 0, implicit $exec
+ ; GFX7-NEXT: renamable $vgpr0, dead renamable $vcc = V_ADD_CO_U32_e64 killed $vgpr1, $vgpr8, 1, implicit $exec
+ ; GFX7-NEXT: SI_RETURN implicit $vgpr0
+ ;
+ ; GFX8-LABEL: name: v_add_co_u32_e64__fi_literal_offset__vgpr__clamp
+ ; GFX8: liveins: $vgpr8
+ ; GFX8-NEXT: {{ $}}
+ ; GFX8-NEXT: $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec
+ ; GFX8-NEXT: $vcc_lo = S_MOV_B32 128
+ ; GFX8-NEXT: $vgpr1, dead $vcc = V_ADD_CO_U32_e64 killed $vcc_lo, killed $vgpr1, 0, implicit $exec
+ ; GFX8-NEXT: renamable $vgpr0, dead renamable $vcc = V_ADD_CO_U32_e64 killed $vgpr1, $vgpr8, 1, implicit $exec
+ ; GFX8-NEXT: SI_RETURN implicit $vgpr0
+ ;
+ ; GFX900-LABEL: name: v_add_co_u32_e64__fi_literal_offset__vgpr__clamp
+ ; GFX900: liveins: $vgpr8
+ ; GFX900-NEXT: {{ $}}
+ ; GFX900-NEXT: $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec
+ ; GFX900-NEXT: $vgpr1 = V_ADD_U32_e32 128, killed $vgpr1, implicit $exec
+ ; GFX900-NEXT: renamable $vgpr0, dead renamable $vcc = V_ADD_CO_U32_e64 killed $vgpr1, $vgpr8, 1, implicit $exec
+ ; GFX900-NEXT: SI_RETURN implicit $vgpr0
+ ;
+ ; GFX90A-LABEL: name: v_add_co_u32_e64__fi_literal_offset__vgpr__clamp
+ ; GFX90A: liveins: $vgpr8
+ ; GFX90A-NEXT: {{ $}}
+ ; GFX90A-NEXT: $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec
+ ; GFX90A-NEXT: $vgpr1 = V_ADD_U32_e32 128, killed $vgpr1, implicit $exec
+ ; GFX90A-NEXT: renamable $vgpr0, dead renamable $vcc = V_ADD_CO_U32_e64 killed $vgpr1, $vgpr8, 1, implicit $exec
+ ; GFX90A-NEXT: SI_RETURN implicit $vgpr0
+ ;
+ ; GFX10-LABEL: name: v_add_co_u32_e64__fi_literal_offset__vgpr__clamp
+ ; GFX10: liveins: $vgpr8
+ ; GFX10-NEXT: {{ $}}
+ ; GFX10-NEXT: $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec
+ ; GFX10-NEXT: $vgpr1 = V_ADD_U32_e32 128, killed $vgpr1, implicit $exec
+ ; GFX10-NEXT: renamable $vgpr0, dead renamable $vcc = V_ADD_CO_U32_e64 killed $vgpr1, $vgpr8, 1, implicit $exec
+ ; GFX10-NEXT: SI_RETURN implicit $vgpr0
+ ;
+ ; FLATSCRW64-LABEL: name: v_add_co_u32_e64__fi_literal_offset__vgpr__clamp
+ ; FLATSCRW64: liveins: $vgpr8
+ ; FLATSCRW64-NEXT: {{ $}}
+ ; FLATSCRW64-NEXT: $sgpr4 = S_ADD_I32 $sgpr32, 128, implicit-def $scc
+ ; FLATSCRW64-NEXT: renamable $vgpr0, dead renamable $vcc = V_ADD_CO_U32_e64 killed $sgpr4, $vgpr8, 1, implicit $exec
+ ; FLATSCRW64-NEXT: SI_RETURN implicit $vgpr0
+ renamable $vgpr0, renamable dead $vcc = V_ADD_CO_U32_e64 %stack.1, $vgpr8, 1, implicit $exec
+ SI_RETURN implicit $vgpr0
+
+...
+
+---
+name: v_add_co_u32_e64__fi_literal_offset__vgpr__live_vcc
+tracksRegLiveness: true
+stack:
+ - { id: 0, size: 128, alignment: 16 }
+ - { id: 1, size: 4, alignment: 4 }
+machineFunctionInfo:
+ scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3'
+ frameOffsetReg: '$sgpr33'
+ stackPtrOffsetReg: '$sgpr32'
+body: |
+ bb.0:
+ liveins: $vgpr8
+
+ ; GFX7-LABEL: name: v_add_co_u32_e64__fi_literal_offset__vgpr__live_vcc
+ ; GFX7: liveins: $vgpr8
+ ; GFX7-NEXT: {{ $}}
+ ; GFX7-NEXT: $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec
+ ; GFX7-NEXT: $sgpr4 = S_MOV_B32 128
+ ; GFX7-NEXT: $vgpr1, dead $sgpr4_sgpr5 = V_ADD_CO_U32_e64 killed $sgpr4, killed $vgpr1, 0, implicit $exec
+ ; GFX7-NEXT: renamable $vgpr0, renamable $vcc = V_ADD_CO_U32_e64 killed $vgpr1, $vgpr8, 0, implicit $exec
+ ; GFX7-NEXT: SI_RETURN implicit $vgpr0, implicit $vcc
+ ;
+ ; GFX8-LABEL: name: v_add_co_u32_e64__fi_literal_offset__vgpr__live_vcc
+ ; GFX8: liveins: $vgpr8
+ ; GFX8-NEXT: {{ $}}
+ ; GFX8-NEXT: $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec
+ ; GFX8-NEXT: $sgpr4 = S_MOV_B32 128
+ ; GFX8-NEXT: $vgpr1, dead $sgpr4_sgpr5 = V_ADD_CO_U32_e64 killed $sgpr4, killed $vgpr1, 0, implicit $exec
+ ; GFX8-NEXT: renamable $vgpr0, renamable $vcc = V_ADD_CO_U32_e64 killed $vgpr1, $vgpr8, 0, implicit $exec
+ ; GFX8-NEXT: SI_RETURN implicit $vgpr0, implicit $vcc
+ ;
+ ; GFX900-LABEL: name: v_add_co_u32_e64__fi_literal_offset__vgpr__live_vcc
+ ; GFX900: liveins: $vgpr8
+ ; GFX900-NEXT: {{ $}}
+ ; GFX900-NEXT: $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec
+ ; GFX900-NEXT: $vgpr1 = V_ADD_U32_e32 128, killed $vgpr1, implicit $exec
+ ; GFX900-NEXT: renamable $vgpr0, renamable $vcc = V_ADD_CO_U32_e64 killed $vgpr1, $vgpr8, 0, implicit $exec
+ ; GFX900-NEXT: SI_RETURN implicit $vgpr0, implicit $vcc
+ ;
+ ; GFX90A-LABEL: name: v_add_co_u32_e64__fi_literal_offset__vgpr__live_vcc
+ ; GFX90A: liveins: $vgpr8
+ ; GFX90A-NEXT: {{ $}}
+ ; GFX90A-NEXT: $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec
+ ; GFX90A-NEXT: $vgpr1 = V_ADD_U32_e32 128, killed $vgpr1, implicit $exec
+ ; GFX90A-NEXT: renamable $vgpr0, renamable $vcc = V_ADD_CO_U32_e64 killed $vgpr1, $vgpr8, 0, implicit $exec
+ ; GFX90A-NEXT: SI_RETURN implicit $vgpr0, implicit $vcc
+ ;
+ ; GFX10-LABEL: name: v_add_co_u32_e64__fi_literal_offset__vgpr__live_vcc
+ ; GFX10: liveins: $vgpr8
+ ; GFX10-NEXT: {{ $}}
+ ; GFX10-NEXT: $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec
+ ; GFX10-NEXT: $vgpr1 = V_ADD_U32_e32 128, killed $vgpr1, implicit $exec
+ ; GFX10-NEXT: renamable $vgpr0, renamable $vcc = V_ADD_CO_U32_e64 killed $vgpr1, $vgpr8, 0, implicit $exec
+ ; GFX10-NEXT: SI_RETURN implicit $vgpr0, implicit $vcc
+ ;
+ ; FLATSCRW64-LABEL: name: v_add_co_u32_e64__fi_literal_offset__vgpr__live_vcc
+ ; FLATSCRW64: liveins: $vgpr8
+ ; FLATSCRW64-NEXT: {{ $}}
+ ; FLATSCRW64-NEXT: $sgpr4 = S_ADD_I32 $sgpr32, 128, implicit-def $scc
+ ; FLATSCRW64-NEXT: renamable $vgpr0, renamable $vcc = V_ADD_CO_U32_e64 killed $sgpr4, $vgpr8, 0, implicit $exec
+ ; FLATSCRW64-NEXT: SI_RETURN implicit $vgpr0, implicit $vcc
+ renamable $vgpr0, renamable $vcc = V_ADD_CO_U32_e64 %stack.1, $vgpr8, 0, implicit $exec
+ SI_RETURN implicit $vgpr0, implicit $vcc
+
+...
+
+---
+name: v_add_co_u32_e32__inline_imm__fi_offset0__kernel
+tracksRegLiveness: true
+stack:
+ - { id: 0, size: 32, alignment: 16 }
+machineFunctionInfo:
+ scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3'
+ frameOffsetReg: '$sgpr33'
+ stackPtrOffsetReg: '$sgpr32'
+ isEntryFunction: true
+body: |
+ bb.0:
+ ; MUBUFW64-LABEL: name: v_add_co_u32_e32__inline_imm__fi_offset0__kernel
+ ; MUBUFW64: liveins: $sgpr0_sgpr1_sgpr2_sgpr3
+ ; MUBUFW64-NEXT: {{ $}}
+ ; MUBUFW64-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3
+ ; MUBUFW64-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3
+ ; MUBUFW64-NEXT: $vgpr1 = V_MOV_B32_e32 0, implicit $exec
+ ; MUBUFW64-NEXT: renamable $vgpr0 = V_ADD_CO_U32_e32 12, killed $vgpr1, implicit-def dead $vcc, implicit $exec
+ ; MUBUFW64-NEXT: SI_RETURN implicit $vgpr0
+ ;
+ ; FLATSCRW64-LABEL: name: v_add_co_u32_e32__inline_imm__fi_offset0__kernel
+ ; FLATSCRW64: $vgpr1 = V_MOV_B32_e32 0, implicit $exec
+ ; FLATSCRW64-NEXT: renamable $vgpr0 = V_ADD_CO_U32_e32 12, killed $vgpr1, implicit-def dead $vcc, implicit $exec
+ ; FLATSCRW64-NEXT: SI_RETURN implicit $vgpr0
+ renamable $vgpr0 = V_ADD_CO_U32_e32 12, %stack.0, implicit-def dead $vcc, implicit $exec
+ SI_RETURN implicit $vgpr0
+
+...
+
+---
+name: v_add_co_u32_e32__inline_imm__fi_offset0__kernel__live_vcc
+tracksRegLiveness: true
+stack:
+ - { id: 0, size: 32, alignment: 16 }
+machineFunctionInfo:
+ scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3'
+ frameOffsetReg: '$sgpr33'
+ stackPtrOffsetReg: '$sgpr32'
+ isEntryFunction: true
+body: |
+ bb.0:
+ ; MUBUFW64-LABEL: name: v_add_co_u32_e32__inline_imm__fi_offset0__kernel__live_vcc
+ ; MUBUFW64: liveins: $sgpr0_sgpr1_sgpr2_sgpr3
+ ; MUBUFW64-NEXT: {{ $}}
+ ; MUBUFW64-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3
+ ; MUBUFW64-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3
+ ; MUBUFW64-NEXT: $vgpr1 = V_MOV_B32_e32 0, implicit $exec
+ ; MUBUFW64-NEXT: renamable $vgpr0 = V_ADD_CO_U32_e32 12, killed $vgpr1, implicit-def $vcc, implicit $exec
+ ; MUBUFW64-NEXT: SI_RETURN implicit $vgpr0, implicit $vcc
+ ;
+ ; FLATSCRW64-LABEL: name: v_add_co_u32_e32__inline_imm__fi_offset0__kernel__live_vcc
+ ; FLATSCRW64: $vgpr1 = V_MOV_B32_e32 0, implicit $exec
+ ; FLATSCRW64-NEXT: renamable $vgpr0 = V_ADD_CO_U32_e32 12, killed $vgpr1, implicit-def $vcc, implicit $exec
+ ; FLATSCRW64-NEXT: SI_RETURN implicit $vgpr0, implicit $vcc
+ renamable $vgpr0 = V_ADD_CO_U32_e32 12, %stack.0, implicit-def $vcc, implicit $exec
+ SI_RETURN implicit $vgpr0, implicit $vcc
+
+...
+
+---
+name: v_add_co_u32_e32__inline_imm__fi_offset_literal__kernel
+tracksRegLiveness: true
+stack:
+ - { id: 0, size: 72, alignment: 16 }
+ - { id: 1, size: 40, alignment: 4 }
+machineFunctionInfo:
+ scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3'
+ frameOffsetReg: '$sgpr33'
+ stackPtrOffsetReg: '$sgpr32'
+ isEntryFunction: true
+body: |
+ bb.0:
+ ; MUBUFW64-LABEL: name: v_add_co_u32_e32__inline_imm__fi_offset_literal__kernel
+ ; MUBUFW64: liveins: $sgpr0_sgpr1_sgpr2_sgpr3
+ ; MUBUFW64-NEXT: {{ $}}
+ ; MUBUFW64-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3
+ ; MUBUFW64-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3
+ ; MUBUFW64-NEXT: $vgpr1 = V_MOV_B32_e32 72, implicit $exec
+ ; MUBUFW64-NEXT: renamable $vgpr0 = V_ADD_CO_U32_e32 12, killed $vgpr1, implicit-def dead $vcc, implicit $exec
+ ; MUBUFW64-NEXT: SI_RETURN implicit $vgpr0
+ ;
+ ; FLATSCRW64-LABEL: name: v_add_co_u32_e32__inline_imm__fi_offset_literal__kernel
+ ; FLATSCRW64: $vgpr1 = V_MOV_B32_e32 72, implicit $exec
+ ; FLATSCRW64-NEXT: renamable $vgpr0 = V_ADD_CO_U32_e32 12, killed $vgpr1, implicit-def dead $vcc, implicit $exec
+ ; FLATSCRW64-NEXT: SI_RETURN implicit $vgpr0
+ renamable $vgpr0 = V_ADD_CO_U32_e32 12, %stack.1, implicit-def dead $vcc, implicit $exec
+ SI_RETURN implicit $vgpr0
+
+...
+
+---
+name: v_add_co_u32_e64__inline_imm__fi_offset_literal__kernel__clamp
+tracksRegLiveness: true
+stack:
+ - { id: 0, size: 72, alignment: 16 }
+ - { id: 1, size: 40, alignment: 4 }
+machineFunctionInfo:
+ scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3'
+ frameOffsetReg: '$sgpr33'
+ stackPtrOffsetReg: '$sgpr32'
+ isEntryFunction: true
+body: |
+ bb.0:
+ ; GFX7-LABEL: name: v_add_co_u32_e64__inline_imm__fi_offset_literal__kernel__clamp
+ ; GFX7: liveins: $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GFX7-NEXT: {{ $}}
+ ; GFX7-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GFX7-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GFX7-NEXT: $vgpr1 = V_MOV_B32_e32 72, implicit $exec
+ ; GFX7-NEXT: renamable $vgpr0, dead $vcc = V_ADD_CO_U32_e64 12, killed $vgpr1, 1, implicit $exec
+ ; GFX7-NEXT: SI_RETURN implicit $vgpr0
+ ;
+ ; GFX8-LABEL: name: v_add_co_u32_e64__inline_imm__fi_offset_literal__kernel__clamp
+ ; GFX8: liveins: $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GFX8-NEXT: {{ $}}
+ ; GFX8-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GFX8-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GFX8-NEXT: $vgpr1 = V_MOV_B32_e32 72, implicit $exec
+ ; GFX8-NEXT: renamable $vgpr0, dead $vcc = V_ADD_CO_U32_e64 12, killed $vgpr1, 1, implicit $exec
+ ; GFX8-NEXT: SI_RETURN implicit $vgpr0
+ ;
+ ; GFX900-LABEL: name: v_add_co_u32_e64__inline_imm__fi_offset_literal__kernel__clamp
+ ; GFX900: liveins: $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GFX900-NEXT: {{ $}}
+ ; GFX900-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GFX900-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GFX900-NEXT: $vgpr1 = V_MOV_B32_e32 72, implicit $exec
+ ; GFX900-NEXT: renamable $vgpr0, dead $vcc = V_ADD_CO_U32_e64 12, killed $vgpr1, 1, implicit $exec
+ ; GFX900-NEXT: SI_RETURN implicit $vgpr0
+ ;
+ ; GFX90A-LABEL: name: v_add_co_u32_e64__inline_imm__fi_offset_literal__kernel__clamp
+ ; GFX90A: liveins: $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GFX90A-NEXT: {{ $}}
+ ; GFX90A-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GFX90A-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GFX90A-NEXT: $vgpr1 = V_MOV_B32_e32 72, implicit $exec
+ ; GFX90A-NEXT: renamable $vgpr0, dead $vcc = V_ADD_CO_U32_e64 12, killed $vgpr1, 1, implicit $exec
+ ; GFX90A-NEXT: SI_RETURN implicit $vgpr0
+ ;
+ ; GFX10-LABEL: name: v_add_co_u32_e64__inline_imm__fi_offset_literal__kernel__clamp
+ ; GFX10: liveins: $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GFX10-NEXT: {{ $}}
+ ; GFX10-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GFX10-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GFX10-NEXT: renamable $vgpr0, dead $vcc = V_ADD_CO_U32_e64 12, 72, 1, implicit $exec
+ ; GFX10-NEXT: SI_RETURN implicit $vgpr0
+ ;
+ ; GFX940-LABEL: name: v_add_co_u32_e64__inline_imm__fi_offset_literal__kernel__clamp
+ ; GFX940: $sgpr4 = S_MOV_B32 72
+ ; GFX940-NEXT: renamable $vgpr0, dead $vcc = V_ADD_CO_U32_e64 12, killed $sgpr4, 1, implicit $exec
+ ; GFX940-NEXT: SI_RETURN implicit $vgpr0
+ ;
+ ; GFX11-LABEL: name: v_add_co_u32_e64__inline_imm__fi_offset_literal__kernel__clamp
+ ; GFX11: renamable $vgpr0, dead $vcc = V_ADD_CO_U32_e64 12, 72, 1, implicit $exec
+ ; GFX11-NEXT: SI_RETURN implicit $vgpr0
+ ;
+ ; GFX12-LABEL: name: v_add_co_u32_e64__inline_imm__fi_offset_literal__kernel__clamp
+ ; GFX12: renamable $vgpr0, dead $vcc = V_ADD_CO_U32_e64 12, 72, 1, implicit $exec
+ ; GFX12-NEXT: SI_RETURN implicit $vgpr0
+ renamable $vgpr0, dead $vcc = V_ADD_CO_U32_e64 12, %stack.1, 1, implicit $exec
+ SI_RETURN implicit $vgpr0
+
+...
+
+---
+name: v_add_co_u32_e64__fi_literal_offset__sgpr__scavenge_spill_required
+tracksRegLiveness: true
+stack:
+ - { id: 0, size: 128, alignment: 16 }
+ - { id: 1, size: 4, alignment: 4 }
+machineFunctionInfo:
+ scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3'
+ frameOffsetReg: '$sgpr33'
+ stackPtrOffsetReg: '$sgpr32'
+body: |
+ bb.0:
+ liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253, $vgpr254, $vgpr255, $sgpr8
+
+ ; GFX7-LABEL: name: v_add_co_u32_e64__fi_literal_offset__sgpr__scavenge_spill_required
+ ; GFX7: liveins: $sgpr8, $vgpr254, $vgpr255, $vgpr252_vgpr253, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239
+ ; GFX7-NEXT: {{ $}}
+ ; GFX7-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 132, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5)
+ ; GFX7-NEXT: $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec
+ ; GFX7-NEXT: $vcc_lo = S_MOV_B32 128
+ ; GFX7-NEXT: $vgpr1, dead $vcc = V_ADD_CO_U32_e64 killed $vcc_lo, killed $vgpr1, 0, implicit $exec
+ ; GFX7-NEXT: renamable $vgpr0, renamable $vcc = V_ADD_CO_U32_e64 killed $vgpr1, $sgpr8, 0, implicit $exec
+ ; GFX7-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 132, 0, 0, implicit $exec :: (load (s32) from %stack.2, addrspace 5)
+ ; GFX7-NEXT: S_NOP 0, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, implicit $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, implicit $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, implicit $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, implicit $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, implicit $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, implicit $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, implicit $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, implicit $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, implicit $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, implicit $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, implicit $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, implicit $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, implicit $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, implicit $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, implicit $vgpr248_vgpr249_vgpr250_vgpr251, implicit $vgpr252_vgpr253, implicit $vgpr254, implicit $vgpr255
+ ; GFX7-NEXT: SI_RETURN implicit $vgpr0
+ ;
+ ; GFX8-LABEL: name: v_add_co_u32_e64__fi_literal_offset__sgpr__scavenge_spill_required
+ ; GFX8: liveins: $sgpr8, $vgpr254, $vgpr255, $vgpr252_vgpr253, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239
+ ; GFX8-NEXT: {{ $}}
+ ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 132, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5)
+ ; GFX8-NEXT: $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec
+ ; GFX8-NEXT: $vcc_lo = S_MOV_B32 128
+ ; GFX8-NEXT: $vgpr1, dead $vcc = V_ADD_CO_U32_e64 killed $vcc_lo, killed $vgpr1, 0, implicit $exec
+ ; GFX8-NEXT: renamable $vgpr0, renamable $vcc = V_ADD_CO_U32_e64 killed $vgpr1, $sgpr8, 0, implicit $exec
+ ; GFX8-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 132, 0, 0, implicit $exec :: (load (s32) from %stack.2, addrspace 5)
+ ; GFX8-NEXT: S_NOP 0, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, implicit $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, implicit $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, implicit $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, implicit $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, implicit $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, implicit $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, implicit $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, implicit $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, implicit $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, implicit $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, implicit $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, implicit $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, implicit $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, implicit $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, implicit $vgpr248_vgpr249_vgpr250_vgpr251, implicit $vgpr252_vgpr253, implicit $vgpr254, implicit $vgpr255
+ ; GFX8-NEXT: SI_RETURN implicit $vgpr0
+ ;
+ ; GFX900-LABEL: name: v_add_co_u32_e64__fi_literal_offset__sgpr__scavenge_spill_required
+ ; GFX900: liveins: $sgpr8, $vgpr254, $vgpr255, $vgpr252_vgpr253, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239
+ ; GFX900-NEXT: {{ $}}
+ ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 132, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5)
+ ; GFX900-NEXT: $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec
+ ; GFX900-NEXT: $vgpr1 = V_ADD_U32_e32 128, killed $vgpr1, implicit $exec
+ ; GFX900-NEXT: renamable $vgpr0, renamable $vcc = V_ADD_CO_U32_e64 killed $vgpr1, $sgpr8, 0, implicit $exec
+ ; GFX900-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 132, 0, 0, implicit $exec :: (load (s32) from %stack.2, addrspace 5)
+ ; GFX900-NEXT: S_NOP 0, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, implicit $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, implicit $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, implicit $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, implicit $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, implicit $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, implicit $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, implicit $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, implicit $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, implicit $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, implicit $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, implicit $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, implicit $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, implicit $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, implicit $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, implicit $vgpr248_vgpr249_vgpr250_vgpr251, implicit $vgpr252_vgpr253, implicit $vgpr254, implicit $vgpr255
+ ; GFX900-NEXT: SI_RETURN implicit $vgpr0
+ ;
+ ; GFX90A-LABEL: name: v_add_co_u32_e64__fi_literal_offset__sgpr__scavenge_spill_required
+ ; GFX90A: liveins: $sgpr8, $vgpr254, $vgpr255, $vgpr252_vgpr253, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239
+ ; GFX90A-NEXT: {{ $}}
+ ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 132, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5)
+ ; GFX90A-NEXT: $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec
+ ; GFX90A-NEXT: $vgpr1 = V_ADD_U32_e32 128, killed $vgpr1, implicit $exec
+ ; GFX90A-NEXT: renamable $vgpr0, renamable $vcc = V_ADD_CO_U32_e64 killed $vgpr1, $sgpr8, 0, implicit $exec
+ ; GFX90A-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 132, 0, 0, implicit $exec :: (load (s32) from %stack.2, addrspace 5)
+ ; GFX90A-NEXT: S_NOP 0, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, implicit $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, implicit $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, implicit $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, implicit $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, implicit $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, implicit $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, implicit $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, implicit $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, implicit $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, implicit $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, implicit $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, implicit $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, implicit $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, implicit $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, implicit $vgpr248_vgpr249_vgpr250_vgpr251, implicit $vgpr252_vgpr253, implicit $vgpr254, implicit $vgpr255
+ ; GFX90A-NEXT: SI_RETURN implicit $vgpr0
+ ;
+ ; GFX10-LABEL: name: v_add_co_u32_e64__fi_literal_offset__sgpr__scavenge_spill_required
+ ; GFX10: liveins: $sgpr8, $vgpr254, $vgpr255, $vgpr252_vgpr253, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239
+ ; GFX10-NEXT: {{ $}}
+ ; GFX10-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 132, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5)
+ ; GFX10-NEXT: $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec
+ ; GFX10-NEXT: $vgpr1 = V_ADD_U32_e32 128, killed $vgpr1, implicit $exec
+ ; GFX10-NEXT: renamable $vgpr0, renamable $vcc = V_ADD_CO_U32_e64 killed $vgpr1, $sgpr8, 0, implicit $exec
+ ; GFX10-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 132, 0, 0, implicit $exec :: (load (s32) from %stack.2, addrspace 5)
+ ; GFX10-NEXT: S_NOP 0, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, implicit $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, implicit $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, implicit $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, implicit $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, implicit $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, implicit $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, implicit $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, implicit $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, implicit $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, implicit $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, implicit $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, implicit $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, implicit $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, implicit $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, implicit $vgpr248_vgpr249_vgpr250_vgpr251, implicit $vgpr252_vgpr253, implicit $vgpr254, implicit $vgpr255
+ ; GFX10-NEXT: SI_RETURN implicit $vgpr0
+ ;
+ ; GFX940-LABEL: name: v_add_co_u32_e64__fi_literal_offset__sgpr__scavenge_spill_required
+ ; GFX940: liveins: $sgpr8, $vgpr254, $vgpr255, $vgpr252_vgpr253, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239
+ ; GFX940-NEXT: {{ $}}
+ ; GFX940-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr1, $sgpr32, 132, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.2, addrspace 5)
+ ; GFX940-NEXT: $sgpr4 = S_ADD_I32 $sgpr32, 128, implicit-def $scc
+ ; GFX940-NEXT: $vgpr1 = V_MOV_B32_e32 killed $sgpr4, implicit $exec
+ ; GFX940-NEXT: renamable $vgpr0, renamable $vcc = V_ADD_CO_U32_e64 killed $vgpr1, $sgpr8, 0, implicit $exec
+ ; GFX940-NEXT: $vgpr1 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 132, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.2, addrspace 5)
+ ; GFX940-NEXT: S_NOP 0, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, implicit $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, implicit $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, implicit $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, implicit $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, implicit $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, implicit $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, implicit $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, implicit $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, implicit $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, implicit $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, implicit $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, implicit $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, implicit $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, implicit $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, implicit $vgpr248_vgpr249_vgpr250_vgpr251, implicit $vgpr252_vgpr253, implicit $vgpr254, implicit $vgpr255
+ ; GFX940-NEXT: SI_RETURN implicit $vgpr0
+ ;
+ ; GFX11-LABEL: name: v_add_co_u32_e64__fi_literal_offset__sgpr__scavenge_spill_required
+ ; GFX11: liveins: $sgpr8, $vgpr254, $vgpr255, $vgpr252_vgpr253, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239
+ ; GFX11-NEXT: {{ $}}
+ ; GFX11-NEXT: $sgpr4 = S_ADD_I32 $sgpr32, 128, implicit-def $scc
+ ; GFX11-NEXT: renamable $vgpr0, renamable $vcc = V_ADD_CO_U32_e64 killed $sgpr4, $sgpr8, 0, implicit $exec
+ ; GFX11-NEXT: S_NOP 0, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, implicit $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, implicit $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, implicit $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, implicit $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, implicit $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, implicit $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, implicit $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, implicit $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, implicit $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, implicit $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, implicit $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, implicit $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, implicit $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, implicit $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, implicit $vgpr248_vgpr249_vgpr250_vgpr251, implicit $vgpr252_vgpr253, implicit $vgpr254, implicit $vgpr255
+ ; GFX11-NEXT: SI_RETURN implicit $vgpr0
+ ;
+ ; GFX12-LABEL: name: v_add_co_u32_e64__fi_literal_offset__sgpr__scavenge_spill_required
+ ; GFX12: liveins: $sgpr8, $vgpr254, $vgpr255, $vgpr252_vgpr253, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239
+ ; GFX12-NEXT: {{ $}}
+ ; GFX12-NEXT: $sgpr4 = S_ADD_I32 $sgpr32, 128, implicit-def $scc
+ ; GFX12-NEXT: renamable $vgpr0, renamable $vcc = V_ADD_CO_U32_e64 killed $sgpr4, $sgpr8, 0, implicit $exec
+ ; GFX12-NEXT: S_NOP 0, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, implicit $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, implicit $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, implicit $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, implicit $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, implicit $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, implicit $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, implicit $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, implicit $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, implicit $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, implicit $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, implicit $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, implicit $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, implicit $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, implicit $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, implicit $vgpr248_vgpr249_vgpr250_vgpr251, implicit $vgpr252_vgpr253, implicit $vgpr254, implicit $vgpr255
+ ; GFX12-NEXT: SI_RETURN implicit $vgpr0
+ renamable $vgpr0, renamable $vcc = V_ADD_CO_U32_e64 %stack.1, $sgpr8, 0, implicit $exec
+
+ S_NOP 0, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, implicit $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, implicit $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, implicit $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, implicit $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, implicit $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, implicit $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, implicit $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, implicit $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, implicit $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, implicit $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, implicit $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, implicit $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, implicit $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, implicit $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, implicit $vgpr248_vgpr249_vgpr250_vgpr251, implicit $vgpr252_vgpr253, implicit $vgpr254, implicit $vgpr255
+ SI_RETURN implicit $vgpr0
+
+...
+
+---
+name: v_add_co_u32_e32__fi_literal_offset__sgpr__scavenge_spill_required
+tracksRegLiveness: true
+stack:
+ - { id: 0, size: 128, alignment: 16 }
+ - { id: 1, size: 4, alignment: 4 }
+machineFunctionInfo:
+ scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3'
+ frameOffsetReg: '$sgpr33'
+ stackPtrOffsetReg: '$sgpr32'
+body: |
+ bb.0:
+ liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253, $vgpr254, $vgpr255, $sgpr8
+
+ ; GFX7-LABEL: name: v_add_co_u32_e32__fi_literal_offset__sgpr__scavenge_spill_required
+ ; GFX7: liveins: $sgpr8, $vgpr254, $vgpr255, $vgpr252_vgpr253, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239
+ ; GFX7-NEXT: {{ $}}
+ ; GFX7-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 132, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5)
+ ; GFX7-NEXT: $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec
+ ; GFX7-NEXT: $vcc_lo = S_MOV_B32 128
+ ; GFX7-NEXT: $vgpr1, dead $vcc = V_ADD_CO_U32_e64 killed $vcc_lo, killed $vgpr1, 0, implicit $exec
+ ; GFX7-NEXT: renamable $vgpr0 = V_ADD_CO_U32_e32 $sgpr8, killed $vgpr1, implicit-def dead $vcc, implicit $exec
+ ; GFX7-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 132, 0, 0, implicit $exec :: (load (s32) from %stack.2, addrspace 5)
+ ; GFX7-NEXT: S_NOP 0, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, implicit $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, implicit $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, implicit $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, implicit $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, implicit $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, implicit $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, implicit $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, implicit $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, implicit $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, implicit $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, implicit $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, implicit $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, implicit $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, implicit $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, implicit $vgpr248_vgpr249_vgpr250_vgpr251, implicit $vgpr252_vgpr253, implicit $vgpr254, implicit $vgpr255
+ ; GFX7-NEXT: SI_RETURN implicit $vgpr0
+ ;
+ ; GFX8-LABEL: name: v_add_co_u32_e32__fi_literal_offset__sgpr__scavenge_spill_required
+ ; GFX8: liveins: $sgpr8, $vgpr254, $vgpr255, $vgpr252_vgpr253, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239
+ ; GFX8-NEXT: {{ $}}
+ ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 132, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5)
+ ; GFX8-NEXT: $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec
+ ; GFX8-NEXT: $vcc_lo = S_MOV_B32 128
+ ; GFX8-NEXT: $vgpr1, dead $vcc = V_ADD_CO_U32_e64 killed $vcc_lo, killed $vgpr1, 0, implicit $exec
+ ; GFX8-NEXT: renamable $vgpr0 = V_ADD_CO_U32_e32 $sgpr8, killed $vgpr1, implicit-def dead $vcc, implicit $exec
+ ; GFX8-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 132, 0, 0, implicit $exec :: (load (s32) from %stack.2, addrspace 5)
+ ; GFX8-NEXT: S_NOP 0, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, implicit $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, implicit $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, implicit $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, implicit $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, implicit $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, implicit $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, implicit $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, implicit $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, implicit $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, implicit $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, implicit $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, implicit $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, implicit $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, implicit $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, implicit $vgpr248_vgpr249_vgpr250_vgpr251, implicit $vgpr252_vgpr253, implicit $vgpr254, implicit $vgpr255
+ ; GFX8-NEXT: SI_RETURN implicit $vgpr0
+ ;
+ ; GFX900-LABEL: name: v_add_co_u32_e32__fi_literal_offset__sgpr__scavenge_spill_required
+ ; GFX900: liveins: $sgpr8, $vgpr254, $vgpr255, $vgpr252_vgpr253, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239
+ ; GFX900-NEXT: {{ $}}
+ ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 132, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5)
+ ; GFX900-NEXT: $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec
+ ; GFX900-NEXT: $vgpr1 = V_ADD_U32_e32 128, killed $vgpr1, implicit $exec
+ ; GFX900-NEXT: renamable $vgpr0 = V_ADD_CO_U32_e32 $sgpr8, killed $vgpr1, implicit-def dead $vcc, implicit $exec
+ ; GFX900-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 132, 0, 0, implicit $exec :: (load (s32) from %stack.2, addrspace 5)
+ ; GFX900-NEXT: S_NOP 0, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, implicit $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, implicit $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, implicit $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, implicit $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, implicit $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, implicit $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, implicit $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, implicit $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, implicit $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, implicit $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, implicit $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, implicit $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, implicit $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, implicit $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, implicit $vgpr248_vgpr249_vgpr250_vgpr251, implicit $vgpr252_vgpr253, implicit $vgpr254, implicit $vgpr255
+ ; GFX900-NEXT: SI_RETURN implicit $vgpr0
+ ;
+ ; GFX90A-LABEL: name: v_add_co_u32_e32__fi_literal_offset__sgpr__scavenge_spill_required
+ ; GFX90A: liveins: $sgpr8, $vgpr254, $vgpr255, $vgpr252_vgpr253, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239
+ ; GFX90A-NEXT: {{ $}}
+ ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 132, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5)
+ ; GFX90A-NEXT: $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec
+ ; GFX90A-NEXT: $vgpr1 = V_ADD_U32_e32 128, killed $vgpr1, implicit $exec
+ ; GFX90A-NEXT: renamable $vgpr0 = V_ADD_CO_U32_e32 $sgpr8, killed $vgpr1, implicit-def dead $vcc, implicit $exec
+ ; GFX90A-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 132, 0, 0, implicit $exec :: (load (s32) from %stack.2, addrspace 5)
+ ; GFX90A-NEXT: S_NOP 0, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, implicit $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, implicit $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, implicit $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, implicit $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, implicit $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, implicit $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, implicit $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, implicit $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, implicit $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, implicit $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, implicit $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, implicit $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, implicit $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, implicit $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, implicit $vgpr248_vgpr249_vgpr250_vgpr251, implicit $vgpr252_vgpr253, implicit $vgpr254, implicit $vgpr255
+ ; GFX90A-NEXT: SI_RETURN implicit $vgpr0
+ ;
+ ; GFX10-LABEL: name: v_add_co_u32_e32__fi_literal_offset__sgpr__scavenge_spill_required
+ ; GFX10: liveins: $sgpr8, $vgpr254, $vgpr255, $vgpr252_vgpr253, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239
+ ; GFX10-NEXT: {{ $}}
+ ; GFX10-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 132, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5)
+ ; GFX10-NEXT: $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec
+ ; GFX10-NEXT: $vgpr1 = V_ADD_U32_e32 128, killed $vgpr1, implicit $exec
+ ; GFX10-NEXT: renamable $vgpr0 = V_ADD_CO_U32_e32 $sgpr8, killed $vgpr1, implicit-def dead $vcc, implicit $exec
+ ; GFX10-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 132, 0, 0, implicit $exec :: (load (s32) from %stack.2, addrspace 5)
+ ; GFX10-NEXT: S_NOP 0, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, implicit $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, implicit $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, implicit $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, implicit $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, implicit $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, implicit $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, implicit $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, implicit $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, implicit $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, implicit $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, implicit $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, implicit $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, implicit $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, implicit $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, implicit $vgpr248_vgpr249_vgpr250_vgpr251, implicit $vgpr252_vgpr253, implicit $vgpr254, implicit $vgpr255
+ ; GFX10-NEXT: SI_RETURN implicit $vgpr0
+ ;
+ ; FLATSCRW64-LABEL: name: v_add_co_u32_e32__fi_literal_offset__sgpr__scavenge_spill_required
+ ; FLATSCRW64: liveins: $sgpr8, $vgpr254, $vgpr255, $vgpr252_vgpr253, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239
+ ; FLATSCRW64-NEXT: {{ $}}
+ ; FLATSCRW64-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr1, $sgpr32, 132, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.2, addrspace 5)
+ ; FLATSCRW64-NEXT: $sgpr4 = S_ADD_I32 $sgpr32, 128, implicit-def $scc
+ ; FLATSCRW64-NEXT: $vgpr1 = V_MOV_B32_e32 killed $sgpr4, implicit $exec
+ ; FLATSCRW64-NEXT: renamable $vgpr0 = V_ADD_CO_U32_e32 $sgpr8, killed $vgpr1, implicit-def dead $vcc, implicit $exec
+ ; FLATSCRW64-NEXT: $vgpr1 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 132, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.2, addrspace 5)
+ ; FLATSCRW64-NEXT: S_NOP 0, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, implicit $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, implicit $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, implicit $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, implicit $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, implicit $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, implicit $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, implicit $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, implicit $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, implicit $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, implicit $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, implicit $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, implicit $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, implicit $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, implicit $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, implicit $vgpr248_vgpr249_vgpr250_vgpr251, implicit $vgpr252_vgpr253, implicit $vgpr254, implicit $vgpr255
+ ; FLATSCRW64-NEXT: SI_RETURN implicit $vgpr0
+ renamable $vgpr0 = V_ADD_CO_U32_e32 $sgpr8, %stack.1, implicit-def dead $vcc, implicit $exec
+
+ S_NOP 0, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, implicit $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, implicit $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, implicit $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, implicit $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, implicit $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, implicit $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, implicit $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, implicit $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, implicit $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, implicit $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, implicit $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, implicit $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, implicit $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, implicit $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, implicit $vgpr248_vgpr249_vgpr250_vgpr251, implicit $vgpr252_vgpr253, implicit $vgpr254, implicit $vgpr255
+ SI_RETURN implicit $vgpr0
+
+...
+
+---
+name: v_add_co_u32_e32__fi_literal_offset__vgpr__scavenge_spill_required
+tracksRegLiveness: true
+stack:
+ - { id: 0, size: 128, alignment: 16 }
+ - { id: 1, size: 4, alignment: 4 }
+machineFunctionInfo:
+ scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3'
+ frameOffsetReg: '$sgpr33'
+ stackPtrOffsetReg: '$sgpr32'
+body: |
+ bb.0:
+ liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253, $vgpr254, $vgpr255, $sgpr8
+
+ ; GFX7-LABEL: name: v_add_co_u32_e32__fi_literal_offset__vgpr__scavenge_spill_required
+ ; GFX7: liveins: $sgpr8, $vgpr254, $vgpr255, $vgpr252_vgpr253, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239
+ ; GFX7-NEXT: {{ $}}
+ ; GFX7-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 132, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5)
+ ; GFX7-NEXT: $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec
+ ; GFX7-NEXT: $vcc_lo = S_MOV_B32 128
+ ; GFX7-NEXT: $vgpr1, dead $vcc = V_ADD_CO_U32_e64 killed $vcc_lo, killed $vgpr1, 0, implicit $exec
+ ; GFX7-NEXT: renamable $vgpr0 = V_ADD_CO_U32_e32 $vgpr8, killed $vgpr1, implicit-def dead $vcc, implicit $exec
+ ; GFX7-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 132, 0, 0, implicit $exec :: (load (s32) from %stack.2, addrspace 5)
+ ; GFX7-NEXT: S_NOP 0, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, implicit $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, implicit $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, implicit $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, implicit $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, implicit $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, implicit $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, implicit $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, implicit $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, implicit $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, implicit $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, implicit $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, implicit $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, implicit $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, implicit $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, implicit $vgpr248_vgpr249_vgpr250_vgpr251, implicit $vgpr252_vgpr253, implicit $vgpr254, implicit $vgpr255
+ ; GFX7-NEXT: SI_RETURN implicit $vgpr0
+ ;
+ ; GFX8-LABEL: name: v_add_co_u32_e32__fi_literal_offset__vgpr__scavenge_spill_required
+ ; GFX8: liveins: $sgpr8, $vgpr254, $vgpr255, $vgpr252_vgpr253, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239
+ ; GFX8-NEXT: {{ $}}
+ ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 132, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5)
+ ; GFX8-NEXT: $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec
+ ; GFX8-NEXT: $vcc_lo = S_MOV_B32 128
+ ; GFX8-NEXT: $vgpr1, dead $vcc = V_ADD_CO_U32_e64 killed $vcc_lo, killed $vgpr1, 0, implicit $exec
+ ; GFX8-NEXT: renamable $vgpr0 = V_ADD_CO_U32_e32 $vgpr8, killed $vgpr1, implicit-def dead $vcc, implicit $exec
+ ; GFX8-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 132, 0, 0, implicit $exec :: (load (s32) from %stack.2, addrspace 5)
+ ; GFX8-NEXT: S_NOP 0, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, implicit $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, implicit $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, implicit $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, implicit $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, implicit $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, implicit $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, implicit $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, implicit $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, implicit $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, implicit $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, implicit $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, implicit $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, implicit $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, implicit $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, implicit $vgpr248_vgpr249_vgpr250_vgpr251, implicit $vgpr252_vgpr253, implicit $vgpr254, implicit $vgpr255
+ ; GFX8-NEXT: SI_RETURN implicit $vgpr0
+ ;
+ ; GFX900-LABEL: name: v_add_co_u32_e32__fi_literal_offset__vgpr__scavenge_spill_required
+ ; GFX900: liveins: $sgpr8, $vgpr254, $vgpr255, $vgpr252_vgpr253, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239
+ ; GFX900-NEXT: {{ $}}
+ ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 132, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5)
+ ; GFX900-NEXT: $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec
+ ; GFX900-NEXT: $vgpr1 = V_ADD_U32_e32 128, killed $vgpr1, implicit $exec
+ ; GFX900-NEXT: renamable $vgpr0 = V_ADD_CO_U32_e32 $vgpr8, killed $vgpr1, implicit-def dead $vcc, implicit $exec
+ ; GFX900-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 132, 0, 0, implicit $exec :: (load (s32) from %stack.2, addrspace 5)
+ ; GFX900-NEXT: S_NOP 0, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, implicit $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, implicit $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, implicit $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, implicit $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, implicit $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, implicit $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, implicit $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, implicit $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, implicit $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, implicit $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, implicit $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, implicit $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, implicit $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, implicit $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, implicit $vgpr248_vgpr249_vgpr250_vgpr251, implicit $vgpr252_vgpr253, implicit $vgpr254, implicit $vgpr255
+ ; GFX900-NEXT: SI_RETURN implicit $vgpr0
+ ;
+ ; GFX90A-LABEL: name: v_add_co_u32_e32__fi_literal_offset__vgpr__scavenge_spill_required
+ ; GFX90A: liveins: $sgpr8, $vgpr254, $vgpr255, $vgpr252_vgpr253, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239
+ ; GFX90A-NEXT: {{ $}}
+ ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 132, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5)
+ ; GFX90A-NEXT: $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec
+ ; GFX90A-NEXT: $vgpr1 = V_ADD_U32_e32 128, killed $vgpr1, implicit $exec
+ ; GFX90A-NEXT: renamable $vgpr0 = V_ADD_CO_U32_e32 $vgpr8, killed $vgpr1, implicit-def dead $vcc, implicit $exec
+ ; GFX90A-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 132, 0, 0, implicit $exec :: (load (s32) from %stack.2, addrspace 5)
+ ; GFX90A-NEXT: S_NOP 0, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, implicit $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, implicit $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, implicit $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, implicit $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, implicit $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, implicit $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, implicit $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, implicit $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, implicit $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, implicit $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, implicit $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, implicit $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, implicit $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, implicit $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, implicit $vgpr248_vgpr249_vgpr250_vgpr251, implicit $vgpr252_vgpr253, implicit $vgpr254, implicit $vgpr255
+ ; GFX90A-NEXT: SI_RETURN implicit $vgpr0
+ ;
+ ; GFX10-LABEL: name: v_add_co_u32_e32__fi_literal_offset__vgpr__scavenge_spill_required
+ ; GFX10: liveins: $sgpr8, $vgpr254, $vgpr255, $vgpr252_vgpr253, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239
+ ; GFX10-NEXT: {{ $}}
+ ; GFX10-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 132, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5)
+ ; GFX10-NEXT: $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec
+ ; GFX10-NEXT: $vgpr1 = V_ADD_U32_e32 128, killed $vgpr1, implicit $exec
+ ; GFX10-NEXT: renamable $vgpr0 = V_ADD_CO_U32_e32 $vgpr8, killed $vgpr1, implicit-def dead $vcc, implicit $exec
+ ; GFX10-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 132, 0, 0, implicit $exec :: (load (s32) from %stack.2, addrspace 5)
+ ; GFX10-NEXT: S_NOP 0, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, implicit $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, implicit $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, implicit $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, implicit $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, implicit $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, implicit $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, implicit $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, implicit $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, implicit $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, implicit $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, implicit $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, implicit $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, implicit $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, implicit $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, implicit $vgpr248_vgpr249_vgpr250_vgpr251, implicit $vgpr252_vgpr253, implicit $vgpr254, implicit $vgpr255
+ ; GFX10-NEXT: SI_RETURN implicit $vgpr0
+ ;
+ ; FLATSCRW64-LABEL: name: v_add_co_u32_e32__fi_literal_offset__vgpr__scavenge_spill_required
+ ; FLATSCRW64: liveins: $sgpr8, $vgpr254, $vgpr255, $vgpr252_vgpr253, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239
+ ; FLATSCRW64-NEXT: {{ $}}
+ ; FLATSCRW64-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr1, $sgpr32, 132, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.2, addrspace 5)
+ ; FLATSCRW64-NEXT: $sgpr4 = S_ADD_I32 $sgpr32, 128, implicit-def $scc
+ ; FLATSCRW64-NEXT: $vgpr1 = V_MOV_B32_e32 killed $sgpr4, implicit $exec
+ ; FLATSCRW64-NEXT: renamable $vgpr0 = V_ADD_CO_U32_e32 $vgpr8, killed $vgpr1, implicit-def dead $vcc, implicit $exec
+ ; FLATSCRW64-NEXT: $vgpr1 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 132, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.2, addrspace 5)
+ ; FLATSCRW64-NEXT: S_NOP 0, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, implicit $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, implicit $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, implicit $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, implicit $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, implicit $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, implicit $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, implicit $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, implicit $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, implicit $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, implicit $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, implicit $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, implicit $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, implicit $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, implicit $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, implicit $vgpr248_vgpr249_vgpr250_vgpr251, implicit $vgpr252_vgpr253, implicit $vgpr254, implicit $vgpr255
+ ; FLATSCRW64-NEXT: SI_RETURN implicit $vgpr0
+ renamable $vgpr0 = V_ADD_CO_U32_e32 $vgpr8, %stack.1, implicit-def dead $vcc, implicit $exec
+
+ S_NOP 0, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, implicit $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, implicit $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, implicit $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, implicit $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, implicit $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, implicit $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, implicit $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, implicit $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, implicit $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, implicit $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, implicit $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, implicit $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, implicit $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, implicit $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, implicit $vgpr248_vgpr249_vgpr250_vgpr251, implicit $vgpr252_vgpr253, implicit $vgpr254, implicit $vgpr255
+ SI_RETURN implicit $vgpr0
+
+...
+
+---
+name: v_add_co_u32_e32__kernel_fi_offset0__other_vgpr_live_after
+tracksRegLiveness: true
+stack:
+ - { id: 0, size: 16, alignment: 16 }
+machineFunctionInfo:
+ scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3'
+ frameOffsetReg: '$sgpr33'
+ stackPtrOffsetReg: '$sgpr32'
+ isEntryFunction: true
+body: |
+ bb.0:
+ liveins: $vgpr1
+ ; MUBUFW64-LABEL: name: v_add_co_u32_e32__kernel_fi_offset0__other_vgpr_live_after
+ ; MUBUFW64: liveins: $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3
+ ; MUBUFW64-NEXT: {{ $}}
+ ; MUBUFW64-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3
+ ; MUBUFW64-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3
+ ; MUBUFW64-NEXT: $vgpr2 = V_MOV_B32_e32 0, implicit $exec
+ ; MUBUFW64-NEXT: renamable $vgpr0 = V_ADD_CO_U32_e32 $vgpr1, killed $vgpr2, implicit-def dead $vcc, implicit $exec
+ ; MUBUFW64-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1
+ ;
+ ; FLATSCRW64-LABEL: name: v_add_co_u32_e32__kernel_fi_offset0__other_vgpr_live_after
+ ; FLATSCRW64: liveins: $vgpr1
+ ; FLATSCRW64-NEXT: {{ $}}
+ ; FLATSCRW64-NEXT: $vgpr2 = V_MOV_B32_e32 0, implicit $exec
+ ; FLATSCRW64-NEXT: renamable $vgpr0 = V_ADD_CO_U32_e32 $vgpr1, killed $vgpr2, implicit-def dead $vcc, implicit $exec
+ ; FLATSCRW64-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1
+ renamable $vgpr0 = V_ADD_CO_U32_e32 renamable $vgpr1, %stack.0, implicit-def dead $vcc, implicit $exec
+ SI_RETURN implicit $vgpr0, implicit $vgpr1
+
+...
+
+---
+name: v_add_co_u32_e64__kernel_fi_offset0__other_vgpr_live_after
+tracksRegLiveness: true
+stack:
+ - { id: 0, size: 16, alignment: 16 }
+machineFunctionInfo:
+ scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3'
+ frameOffsetReg: '$sgpr33'
+ stackPtrOffsetReg: '$sgpr32'
+ isEntryFunction: true
+body: |
+ bb.0:
+ liveins: $vgpr1
+ ; MUBUFW64-LABEL: name: v_add_co_u32_e64__kernel_fi_offset0__other_vgpr_live_after
+ ; MUBUFW64: liveins: $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3
+ ; MUBUFW64-NEXT: {{ $}}
+ ; MUBUFW64-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3
+ ; MUBUFW64-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3
+ ; MUBUFW64-NEXT: renamable $vgpr0, renamable $sgpr8_sgpr9 = V_ADD_CO_U32_e64 $vgpr1, 0, 0, implicit $exec
+ ; MUBUFW64-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1
+ ;
+ ; FLATSCRW64-LABEL: name: v_add_co_u32_e64__kernel_fi_offset0__other_vgpr_live_after
+ ; FLATSCRW64: liveins: $vgpr1
+ ; FLATSCRW64-NEXT: {{ $}}
+ ; FLATSCRW64-NEXT: renamable $vgpr0, renamable $sgpr8_sgpr9 = V_ADD_CO_U32_e64 $vgpr1, 0, 0, implicit $exec
+ ; FLATSCRW64-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1
+ renamable $vgpr0, renamable $sgpr8_sgpr9 = V_ADD_CO_U32_e64 renamable $vgpr1, %stack.0, 0, implicit $exec
+ SI_RETURN implicit $vgpr0, implicit $vgpr1
+
+...
+
+---
+name: v_add_co_u32_e64__kernel__other_vgpr_live_after__fi_offset0
+tracksRegLiveness: true
+stack:
+ - { id: 0, size: 16, alignment: 16 }
+machineFunctionInfo:
+ scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3'
+ frameOffsetReg: '$sgpr33'
+ stackPtrOffsetReg: '$sgpr32'
+ isEntryFunction: true
+body: |
+ bb.0:
+ liveins: $vgpr1
+ ; MUBUFW64-LABEL: name: v_add_co_u32_e64__kernel__other_vgpr_live_after__fi_offset0
+ ; MUBUFW64: liveins: $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3
+ ; MUBUFW64-NEXT: {{ $}}
+ ; MUBUFW64-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3
+ ; MUBUFW64-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3
+ ; MUBUFW64-NEXT: renamable $vgpr0, renamable $sgpr8_sgpr9 = V_ADD_CO_U32_e64 0, $vgpr1, 0, implicit $exec
+ ; MUBUFW64-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1
+ ;
+ ; FLATSCRW64-LABEL: name: v_add_co_u32_e64__kernel__other_vgpr_live_after__fi_offset0
+ ; FLATSCRW64: liveins: $vgpr1
+ ; FLATSCRW64-NEXT: {{ $}}
+ ; FLATSCRW64-NEXT: renamable $vgpr0, renamable $sgpr8_sgpr9 = V_ADD_CO_U32_e64 0, $vgpr1, 0, implicit $exec
+ ; FLATSCRW64-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1
+ renamable $vgpr0, renamable $sgpr8_sgpr9 = V_ADD_CO_U32_e64 %stack.0, renamable $vgpr1, 0, implicit $exec
+ SI_RETURN implicit $vgpr0, implicit $vgpr1
+
+...
+
+---
+name: v_add_co_u32_e32__identity_vgpr__fi_offset0__kernel
+tracksRegLiveness: true
+stack:
+ - { id: 0, size: 32, alignment: 16 }
+machineFunctionInfo:
+ scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3'
+ frameOffsetReg: '$sgpr33'
+ stackPtrOffsetReg: '$sgpr32'
+ isEntryFunction: true
+body: |
+ bb.0:
+ liveins: $vgpr0
+
+ ; MUBUFW64-LABEL: name: v_add_co_u32_e32__identity_vgpr__fi_offset0__kernel
+ ; MUBUFW64: liveins: $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3
+ ; MUBUFW64-NEXT: {{ $}}
+ ; MUBUFW64-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3
+ ; MUBUFW64-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3
+ ; MUBUFW64-NEXT: $vgpr1 = V_MOV_B32_e32 0, implicit $exec
+ ; MUBUFW64-NEXT: renamable $vgpr0 = V_ADD_CO_U32_e32 $vgpr0, killed $vgpr1, implicit-def dead $vcc, implicit $exec
+ ; MUBUFW64-NEXT: SI_RETURN implicit $vgpr0
+ ;
+ ; FLATSCRW64-LABEL: name: v_add_co_u32_e32__identity_vgpr__fi_offset0__kernel
+ ; FLATSCRW64: liveins: $vgpr0
+ ; FLATSCRW64-NEXT: {{ $}}
+ ; FLATSCRW64-NEXT: $vgpr1 = V_MOV_B32_e32 0, implicit $exec
+ ; FLATSCRW64-NEXT: renamable $vgpr0 = V_ADD_CO_U32_e32 $vgpr0, killed $vgpr1, implicit-def dead $vcc, implicit $exec
+ ; FLATSCRW64-NEXT: SI_RETURN implicit $vgpr0
+ renamable $vgpr0 = V_ADD_CO_U32_e32 $vgpr0, %stack.0, implicit-def dead $vcc, implicit $exec
+ SI_RETURN implicit $vgpr0
+
+...
+
+---
+name: v_add_co_u32_e32__fi_offset0__identity_vgpr__kernel
+tracksRegLiveness: true
+stack:
+ - { id: 0, size: 32, alignment: 16 }
+machineFunctionInfo:
+ scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3'
+ frameOffsetReg: '$sgpr33'
+ stackPtrOffsetReg: '$sgpr32'
+ isEntryFunction: true
+body: |
+ bb.0:
+ liveins: $vgpr0
+ ; MUBUFW64-LABEL: name: v_add_co_u32_e32__fi_offset0__identity_vgpr__kernel
+ ; MUBUFW64: liveins: $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3
+ ; MUBUFW64-NEXT: {{ $}}
+ ; MUBUFW64-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3
+ ; MUBUFW64-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3
+ ; MUBUFW64-NEXT: renamable $vgpr0 = V_ADD_CO_U32_e32 0, $vgpr0, implicit-def dead $vcc, implicit $exec
+ ; MUBUFW64-NEXT: SI_RETURN implicit $vgpr0
+ ;
+ ; FLATSCRW64-LABEL: name: v_add_co_u32_e32__fi_offset0__identity_vgpr__kernel
+ ; FLATSCRW64: liveins: $vgpr0
+ ; FLATSCRW64-NEXT: {{ $}}
+ ; FLATSCRW64-NEXT: renamable $vgpr0 = V_ADD_CO_U32_e32 0, $vgpr0, implicit-def dead $vcc, implicit $exec
+ ; FLATSCRW64-NEXT: SI_RETURN implicit $vgpr0
+ renamable $vgpr0 = V_ADD_CO_U32_e32 %stack.0, $vgpr0, implicit-def dead $vcc, implicit $exec
+ SI_RETURN implicit $vgpr0
+
+...
+
+---
+name: v_add_co_u32_e64__identity_vgpr__fi_offset0__kernel
+tracksRegLiveness: true
+stack:
+ - { id: 0, size: 32, alignment: 16 }
+machineFunctionInfo:
+ scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3'
+ frameOffsetReg: '$sgpr33'
+ stackPtrOffsetReg: '$sgpr32'
+ isEntryFunction: true
+body: |
+ bb.0:
+ liveins: $vgpr0
+
+ ; MUBUFW64-LABEL: name: v_add_co_u32_e64__identity_vgpr__fi_offset0__kernel
+ ; MUBUFW64: liveins: $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3
+ ; MUBUFW64-NEXT: {{ $}}
+ ; MUBUFW64-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3
+ ; MUBUFW64-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3
+ ; MUBUFW64-NEXT: renamable $vgpr0, dead renamable $vcc = V_ADD_CO_U32_e64 $vgpr0, 0, 0, implicit $exec
+ ; MUBUFW64-NEXT: SI_RETURN implicit $vgpr0
+ ;
+ ; FLATSCRW64-LABEL: name: v_add_co_u32_e64__identity_vgpr__fi_offset0__kernel
+ ; FLATSCRW64: liveins: $vgpr0
+ ; FLATSCRW64-NEXT: {{ $}}
+ ; FLATSCRW64-NEXT: renamable $vgpr0, dead renamable $vcc = V_ADD_CO_U32_e64 $vgpr0, 0, 0, implicit $exec
+ ; FLATSCRW64-NEXT: SI_RETURN implicit $vgpr0
+ renamable $vgpr0, renamable dead $vcc = V_ADD_CO_U32_e64 $vgpr0, %stack.0, 0, implicit $exec
+ SI_RETURN implicit $vgpr0
+
+...
+
+---
+name: v_add_co_u32_e64__fi_offset0__identity_vgpr__kernel
+tracksRegLiveness: true
+stack:
+ - { id: 0, size: 32, alignment: 16 }
+machineFunctionInfo:
+ scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3'
+ frameOffsetReg: '$sgpr33'
+ stackPtrOffsetReg: '$sgpr32'
+ isEntryFunction: true
+body: |
+ bb.0:
+ liveins: $vgpr0
+
+ ; MUBUFW64-LABEL: name: v_add_co_u32_e64__fi_offset0__identity_vgpr__kernel
+ ; MUBUFW64: liveins: $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3
+ ; MUBUFW64-NEXT: {{ $}}
+ ; MUBUFW64-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3
+ ; MUBUFW64-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3
+ ; MUBUFW64-NEXT: renamable $vgpr0, dead renamable $vcc = V_ADD_CO_U32_e64 0, $vgpr0, 0, implicit $exec
+ ; MUBUFW64-NEXT: SI_RETURN implicit $vgpr0
+ ;
+ ; FLATSCRW64-LABEL: name: v_add_co_u32_e64__fi_offset0__identity_vgpr__kernel
+ ; FLATSCRW64: liveins: $vgpr0
+ ; FLATSCRW64-NEXT: {{ $}}
+ ; FLATSCRW64-NEXT: renamable $vgpr0, dead renamable $vcc = V_ADD_CO_U32_e64 0, $vgpr0, 0, implicit $exec
+ ; FLATSCRW64-NEXT: SI_RETURN implicit $vgpr0
+ renamable $vgpr0, renamable dead $vcc = V_ADD_CO_U32_e64 %stack.0, $vgpr0, 0, implicit $exec
+ SI_RETURN implicit $vgpr0
+
+...
+
+---
+name: v_add_co_u32_e32__fi_offset0__identity_vgpr__kernel_kill
+tracksRegLiveness: true
+stack:
+ - { id: 0, size: 32, alignment: 16 }
+machineFunctionInfo:
+ scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3'
+ frameOffsetReg: '$sgpr33'
+ stackPtrOffsetReg: '$sgpr32'
+ isEntryFunction: true
+body: |
+ bb.0:
+ liveins: $vgpr0
+ ; MUBUFW64-LABEL: name: v_add_co_u32_e32__fi_offset0__identity_vgpr__kernel_kill
+ ; MUBUFW64: liveins: $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3
+ ; MUBUFW64-NEXT: {{ $}}
+ ; MUBUFW64-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3
+ ; MUBUFW64-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3
+ ; MUBUFW64-NEXT: renamable $vgpr0 = V_ADD_CO_U32_e32 0, killed $vgpr0, implicit-def dead $vcc, implicit $exec
+ ; MUBUFW64-NEXT: SI_RETURN implicit $vgpr0
+ ;
+ ; FLATSCRW64-LABEL: name: v_add_co_u32_e32__fi_offset0__identity_vgpr__kernel_kill
+ ; FLATSCRW64: liveins: $vgpr0
+ ; FLATSCRW64-NEXT: {{ $}}
+ ; FLATSCRW64-NEXT: renamable $vgpr0 = V_ADD_CO_U32_e32 0, killed $vgpr0, implicit-def dead $vcc, implicit $exec
+ ; FLATSCRW64-NEXT: SI_RETURN implicit $vgpr0
+ renamable $vgpr0 = V_ADD_CO_U32_e32 %stack.0, killed $vgpr0, implicit-def dead $vcc, implicit $exec
+ SI_RETURN implicit $vgpr0
+
+...
+
+---
+name: v_add_co_u32_e32__fi_offset0__identity_vgpr__kernel_live_vcc
+tracksRegLiveness: true
+stack:
+ - { id: 0, size: 32, alignment: 16 }
+machineFunctionInfo:
+ scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3'
+ frameOffsetReg: '$sgpr33'
+ stackPtrOffsetReg: '$sgpr32'
+ isEntryFunction: true
+body: |
+ bb.0:
+ liveins: $vgpr0
+ ; MUBUFW64-LABEL: name: v_add_co_u32_e32__fi_offset0__identity_vgpr__kernel_live_vcc
+ ; MUBUFW64: liveins: $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3
+ ; MUBUFW64-NEXT: {{ $}}
+ ; MUBUFW64-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3
+ ; MUBUFW64-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3
+ ; MUBUFW64-NEXT: renamable $vgpr0 = V_ADD_CO_U32_e32 0, $vgpr0, implicit-def $vcc, implicit $exec
+ ; MUBUFW64-NEXT: SI_RETURN implicit $vgpr0, implicit $vcc
+ ;
+ ; FLATSCRW64-LABEL: name: v_add_co_u32_e32__fi_offset0__identity_vgpr__kernel_live_vcc
+ ; FLATSCRW64: liveins: $vgpr0
+ ; FLATSCRW64-NEXT: {{ $}}
+ ; FLATSCRW64-NEXT: renamable $vgpr0 = V_ADD_CO_U32_e32 0, $vgpr0, implicit-def $vcc, implicit $exec
+ ; FLATSCRW64-NEXT: SI_RETURN implicit $vgpr0, implicit $vcc
+ renamable $vgpr0 = V_ADD_CO_U32_e32 %stack.0, $vgpr0, implicit-def $vcc, implicit $exec
+ SI_RETURN implicit $vgpr0, implicit $vcc
+
+...
+
+---
+name: v_add_co_u32_e32__identity_vgpr__fi_offset32__kernel
+tracksRegLiveness: true
+stack:
+ - { id: 0, size: 32, alignment: 16 }
+ - { id: 1, size: 64, alignment: 4 }
+machineFunctionInfo:
+ scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3'
+ frameOffsetReg: '$sgpr33'
+ stackPtrOffsetReg: '$sgpr32'
+ isEntryFunction: true
+body: |
+ bb.0:
+ liveins: $vgpr0
+
+ ; MUBUFW64-LABEL: name: v_add_co_u32_e32__identity_vgpr__fi_offset32__kernel
+ ; MUBUFW64: liveins: $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3
+ ; MUBUFW64-NEXT: {{ $}}
+ ; MUBUFW64-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3
+ ; MUBUFW64-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3
+ ; MUBUFW64-NEXT: $vgpr1 = V_MOV_B32_e32 32, implicit $exec
+ ; MUBUFW64-NEXT: renamable $vgpr0 = V_ADD_CO_U32_e32 $vgpr0, killed $vgpr1, implicit-def dead $vcc, implicit $exec
+ ; MUBUFW64-NEXT: SI_RETURN implicit $vgpr0
+ ;
+ ; FLATSCRW64-LABEL: name: v_add_co_u32_e32__identity_vgpr__fi_offset32__kernel
+ ; FLATSCRW64: liveins: $vgpr0
+ ; FLATSCRW64-NEXT: {{ $}}
+ ; FLATSCRW64-NEXT: $vgpr1 = V_MOV_B32_e32 32, implicit $exec
+ ; FLATSCRW64-NEXT: renamable $vgpr0 = V_ADD_CO_U32_e32 $vgpr0, killed $vgpr1, implicit-def dead $vcc, implicit $exec
+ ; FLATSCRW64-NEXT: SI_RETURN implicit $vgpr0
+ renamable $vgpr0 = V_ADD_CO_U32_e32 $vgpr0, %stack.1, implicit-def dead $vcc, implicit $exec
+ SI_RETURN implicit $vgpr0
+
+...
+
+
+---
+name: v_add_co_u32_e32__identity_vgpr__fi_offset72__kernel
+tracksRegLiveness: true
+stack:
+ - { id: 0, size: 72, alignment: 16 }
+ - { id: 1, size: 64, alignment: 4 }
+machineFunctionInfo:
+ scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3'
+ frameOffsetReg: '$sgpr33'
+ stackPtrOffsetReg: '$sgpr32'
+ isEntryFunction: true
+body: |
+ bb.0:
+ liveins: $vgpr0
+
+ ; MUBUFW64-LABEL: name: v_add_co_u32_e32__identity_vgpr__fi_offset72__kernel
+ ; MUBUFW64: liveins: $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3
+ ; MUBUFW64-NEXT: {{ $}}
+ ; MUBUFW64-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3
+ ; MUBUFW64-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3
+ ; MUBUFW64-NEXT: $vgpr1 = V_MOV_B32_e32 72, implicit $exec
+ ; MUBUFW64-NEXT: renamable $vgpr0 = V_ADD_CO_U32_e32 $vgpr0, killed $vgpr1, implicit-def dead $vcc, implicit $exec
+ ; MUBUFW64-NEXT: SI_RETURN implicit $vgpr0
+ ;
+ ; FLATSCRW64-LABEL: name: v_add_co_u32_e32__identity_vgpr__fi_offset72__kernel
+ ; FLATSCRW64: liveins: $vgpr0
+ ; FLATSCRW64-NEXT: {{ $}}
+ ; FLATSCRW64-NEXT: $vgpr1 = V_MOV_B32_e32 72, implicit $exec
+ ; FLATSCRW64-NEXT: renamable $vgpr0 = V_ADD_CO_U32_e32 $vgpr0, killed $vgpr1, implicit-def dead $vcc, implicit $exec
+ ; FLATSCRW64-NEXT: SI_RETURN implicit $vgpr0
+ renamable $vgpr0 = V_ADD_CO_U32_e32 $vgpr0, %stack.1, implicit-def dead $vcc, implicit $exec
+ SI_RETURN implicit $vgpr0
+
+...
+
+---
+name: v_add_co_u32_e32__fi_offset72__identity_vgpr__kernel
+tracksRegLiveness: true
+stack:
+ - { id: 0, size: 72, alignment: 16 }
+ - { id: 1, size: 64, alignment: 4 }
+machineFunctionInfo:
+ scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3'
+ frameOffsetReg: '$sgpr33'
+ stackPtrOffsetReg: '$sgpr32'
+ isEntryFunction: true
+body: |
+ bb.0:
+ liveins: $vgpr0
+
+ ; MUBUFW64-LABEL: name: v_add_co_u32_e32__fi_offset72__identity_vgpr__kernel
+ ; MUBUFW64: liveins: $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3
+ ; MUBUFW64-NEXT: {{ $}}
+ ; MUBUFW64-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3
+ ; MUBUFW64-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3
+ ; MUBUFW64-NEXT: renamable $vgpr0 = V_ADD_CO_U32_e32 72, $vgpr0, implicit-def dead $vcc, implicit $exec
+ ; MUBUFW64-NEXT: SI_RETURN implicit $vgpr0
+ ;
+ ; FLATSCRW64-LABEL: name: v_add_co_u32_e32__fi_offset72__identity_vgpr__kernel
+ ; FLATSCRW64: liveins: $vgpr0
+ ; FLATSCRW64-NEXT: {{ $}}
+ ; FLATSCRW64-NEXT: renamable $vgpr0 = V_ADD_CO_U32_e32 72, $vgpr0, implicit-def dead $vcc, implicit $exec
+ ; FLATSCRW64-NEXT: SI_RETURN implicit $vgpr0
+ renamable $vgpr0 = V_ADD_CO_U32_e32 %stack.1, $vgpr0, implicit-def dead $vcc, implicit $exec
+ SI_RETURN implicit $vgpr0
+
+...
+
+---
+name: v_add_co_u32_e32__fi_offset32__identity_vgpr__kernel
+tracksRegLiveness: true
+stack:
+ - { id: 0, size: 32, alignment: 16 }
+ - { id: 1, size: 64, alignment: 4 }
+machineFunctionInfo:
+ scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3'
+ frameOffsetReg: '$sgpr33'
+ stackPtrOffsetReg: '$sgpr32'
+ isEntryFunction: true
+body: |
+ bb.0:
+ liveins: $vgpr0
+
+ ; MUBUFW64-LABEL: name: v_add_co_u32_e32__fi_offset32__identity_vgpr__kernel
+ ; MUBUFW64: liveins: $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3
+ ; MUBUFW64-NEXT: {{ $}}
+ ; MUBUFW64-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3
+ ; MUBUFW64-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3
+ ; MUBUFW64-NEXT: renamable $vgpr0 = V_ADD_CO_U32_e32 32, $vgpr0, implicit-def dead $vcc, implicit $exec
+ ; MUBUFW64-NEXT: SI_RETURN implicit $vgpr0
+ ;
+ ; FLATSCRW64-LABEL: name: v_add_co_u32_e32__fi_offset32__identity_vgpr__kernel
+ ; FLATSCRW64: liveins: $vgpr0
+ ; FLATSCRW64-NEXT: {{ $}}
+ ; FLATSCRW64-NEXT: renamable $vgpr0 = V_ADD_CO_U32_e32 32, $vgpr0, implicit-def dead $vcc, implicit $exec
+ ; FLATSCRW64-NEXT: SI_RETURN implicit $vgpr0
+ renamable $vgpr0 = V_ADD_CO_U32_e32 %stack.1, $vgpr0, implicit-def dead $vcc, implicit $exec
+ SI_RETURN implicit $vgpr0
+
+...
+
+---
+name: v_add_co_u32_e64__identity_vgpr__fi_offset32__kernel
+tracksRegLiveness: true
+stack:
+ - { id: 0, size: 32, alignment: 16 }
+ - { id: 1, size: 64, alignment: 4 }
+machineFunctionInfo:
+ scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3'
+ frameOffsetReg: '$sgpr33'
+ stackPtrOffsetReg: '$sgpr32'
+ isEntryFunction: true
+body: |
+ bb.0:
+ liveins: $vgpr0
+
+ ; MUBUFW64-LABEL: name: v_add_co_u32_e64__identity_vgpr__fi_offset32__kernel
+ ; MUBUFW64: liveins: $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3
+ ; MUBUFW64-NEXT: {{ $}}
+ ; MUBUFW64-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3
+ ; MUBUFW64-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3
+ ; MUBUFW64-NEXT: renamable $vgpr0, dead renamable $vcc = V_ADD_CO_U32_e64 $vgpr0, 32, 0, implicit $exec
+ ; MUBUFW64-NEXT: SI_RETURN implicit $vgpr0
+ ;
+ ; FLATSCRW64-LABEL: name: v_add_co_u32_e64__identity_vgpr__fi_offset32__kernel
+ ; FLATSCRW64: liveins: $vgpr0
+ ; FLATSCRW64-NEXT: {{ $}}
+ ; FLATSCRW64-NEXT: renamable $vgpr0, dead renamable $vcc = V_ADD_CO_U32_e64 $vgpr0, 32, 0, implicit $exec
+ ; FLATSCRW64-NEXT: SI_RETURN implicit $vgpr0
+ renamable $vgpr0, renamable dead $vcc = V_ADD_CO_U32_e64 $vgpr0, %stack.1, 0, implicit $exec
+ SI_RETURN implicit $vgpr0
+
+...
diff --git a/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-v-add-u32.mir b/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-v-add-u32.mir
index 2d62d4238daa..af6823c1ab64 100644
--- a/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-v-add-u32.mir
+++ b/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-v-add-u32.mir
@@ -467,3 +467,1280 @@ body: |
SI_RETURN implicit $vgpr0
...
+
+---
+name: v_add_u32_e64__vgpr__fi_literal_offset
+tracksRegLiveness: true
+stack:
+ - { id: 0, size: 128, alignment: 16 }
+ - { id: 1, size: 4, alignment: 4 }
+machineFunctionInfo:
+ scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3'
+ frameOffsetReg: '$sgpr33'
+ stackPtrOffsetReg: '$sgpr32'
+body: |
+ bb.0:
+ liveins: $vgpr8
+ ; MUBUF-LABEL: name: v_add_u32_e64__vgpr__fi_literal_offset
+ ; MUBUF: liveins: $vgpr8
+ ; MUBUF-NEXT: {{ $}}
+ ; MUBUF-NEXT: $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec
+ ; MUBUF-NEXT: $vgpr1 = V_ADD_U32_e32 128, killed $vgpr1, implicit $exec
+ ; MUBUF-NEXT: renamable $vgpr0 = V_ADD_U32_e64 $vgpr8, killed $vgpr1, 0, implicit $exec
+ ; MUBUF-NEXT: SI_RETURN implicit $vgpr0
+ ;
+ ; MUBUFW32-LABEL: name: v_add_u32_e64__vgpr__fi_literal_offset
+ ; MUBUFW32: liveins: $vgpr8
+ ; MUBUFW32-NEXT: {{ $}}
+ ; MUBUFW32-NEXT: $vgpr1 = V_LSHRREV_B32_e64 5, $sgpr32, implicit $exec
+ ; MUBUFW32-NEXT: $vgpr1 = V_ADD_U32_e32 128, killed $vgpr1, implicit $exec
+ ; MUBUFW32-NEXT: renamable $vgpr0 = V_ADD_U32_e64 $vgpr8, killed $vgpr1, 0, implicit $exec
+ ; MUBUFW32-NEXT: SI_RETURN implicit $vgpr0
+ ;
+ ; FLATSCRW64-LABEL: name: v_add_u32_e64__vgpr__fi_literal_offset
+ ; FLATSCRW64: liveins: $vgpr8
+ ; FLATSCRW64-NEXT: {{ $}}
+ ; FLATSCRW64-NEXT: $sgpr4 = S_ADD_I32 $sgpr32, 128, implicit-def $scc
+ ; FLATSCRW64-NEXT: renamable $vgpr0 = V_ADD_U32_e64 $vgpr8, killed $sgpr4, 0, implicit $exec
+ ; FLATSCRW64-NEXT: SI_RETURN implicit $vgpr0
+ ;
+ ; FLATSCRW32-LABEL: name: v_add_u32_e64__vgpr__fi_literal_offset
+ ; FLATSCRW32: liveins: $vgpr8
+ ; FLATSCRW32-NEXT: {{ $}}
+ ; FLATSCRW32-NEXT: $sgpr4 = S_ADD_I32 $sgpr32, 128, implicit-def $scc
+ ; FLATSCRW32-NEXT: renamable $vgpr0 = V_ADD_U32_e64 $vgpr8, killed $sgpr4, 0, implicit $exec
+ ; FLATSCRW32-NEXT: SI_RETURN implicit $vgpr0
+ renamable $vgpr0 = V_ADD_U32_e64 $vgpr8, %stack.1, 0, implicit $exec
+ SI_RETURN implicit $vgpr0
+
+...
+
+---
+name: v_add_u32_e64__vgpr__fi_literal_offset__clamp
+tracksRegLiveness: true
+stack:
+ - { id: 0, size: 128, alignment: 16 }
+ - { id: 1, size: 4, alignment: 4 }
+machineFunctionInfo:
+ scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3'
+ frameOffsetReg: '$sgpr33'
+ stackPtrOffsetReg: '$sgpr32'
+body: |
+ bb.0:
+ liveins: $vgpr8
+ ; MUBUF-LABEL: name: v_add_u32_e64__vgpr__fi_literal_offset__clamp
+ ; MUBUF: liveins: $vgpr8
+ ; MUBUF-NEXT: {{ $}}
+ ; MUBUF-NEXT: $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec
+ ; MUBUF-NEXT: $vgpr1 = V_ADD_U32_e32 128, killed $vgpr1, implicit $exec
+ ; MUBUF-NEXT: renamable $vgpr0 = V_ADD_U32_e64 $vgpr8, killed $vgpr1, 1, implicit $exec
+ ; MUBUF-NEXT: SI_RETURN implicit $vgpr0
+ ;
+ ; MUBUFW32-LABEL: name: v_add_u32_e64__vgpr__fi_literal_offset__clamp
+ ; MUBUFW32: liveins: $vgpr8
+ ; MUBUFW32-NEXT: {{ $}}
+ ; MUBUFW32-NEXT: $vgpr1 = V_LSHRREV_B32_e64 5, $sgpr32, implicit $exec
+ ; MUBUFW32-NEXT: $vgpr1 = V_ADD_U32_e32 128, killed $vgpr1, implicit $exec
+ ; MUBUFW32-NEXT: renamable $vgpr0 = V_ADD_U32_e64 $vgpr8, killed $vgpr1, 1, implicit $exec
+ ; MUBUFW32-NEXT: SI_RETURN implicit $vgpr0
+ ;
+ ; FLATSCRW64-LABEL: name: v_add_u32_e64__vgpr__fi_literal_offset__clamp
+ ; FLATSCRW64: liveins: $vgpr8
+ ; FLATSCRW64-NEXT: {{ $}}
+ ; FLATSCRW64-NEXT: $sgpr4 = S_ADD_I32 $sgpr32, 128, implicit-def $scc
+ ; FLATSCRW64-NEXT: renamable $vgpr0 = V_ADD_U32_e64 $vgpr8, killed $sgpr4, 1, implicit $exec
+ ; FLATSCRW64-NEXT: SI_RETURN implicit $vgpr0
+ ;
+ ; FLATSCRW32-LABEL: name: v_add_u32_e64__vgpr__fi_literal_offset__clamp
+ ; FLATSCRW32: liveins: $vgpr8
+ ; FLATSCRW32-NEXT: {{ $}}
+ ; FLATSCRW32-NEXT: $sgpr4 = S_ADD_I32 $sgpr32, 128, implicit-def $scc
+ ; FLATSCRW32-NEXT: renamable $vgpr0 = V_ADD_U32_e64 $vgpr8, killed $sgpr4, 1, implicit $exec
+ ; FLATSCRW32-NEXT: SI_RETURN implicit $vgpr0
+ renamable $vgpr0 = V_ADD_U32_e64 $vgpr8, %stack.1, 1, implicit $exec
+ SI_RETURN implicit $vgpr0
+
+...
+
+---
+name: v_add_u32_e64__fi_literal_offset__vgpr__clamp
+tracksRegLiveness: true
+stack:
+ - { id: 0, size: 128, alignment: 16 }
+ - { id: 1, size: 4, alignment: 4 }
+machineFunctionInfo:
+ scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3'
+ frameOffsetReg: '$sgpr33'
+ stackPtrOffsetReg: '$sgpr32'
+body: |
+ bb.0:
+ liveins: $vgpr8
+ ; MUBUF-LABEL: name: v_add_u32_e64__fi_literal_offset__vgpr__clamp
+ ; MUBUF: liveins: $vgpr8
+ ; MUBUF-NEXT: {{ $}}
+ ; MUBUF-NEXT: $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec
+ ; MUBUF-NEXT: $vgpr1 = V_ADD_U32_e32 128, killed $vgpr1, implicit $exec
+ ; MUBUF-NEXT: renamable $vgpr0 = V_ADD_U32_e64 killed $vgpr1, $vgpr8, 1, implicit $exec
+ ; MUBUF-NEXT: SI_RETURN implicit $vgpr0
+ ;
+ ; MUBUFW32-LABEL: name: v_add_u32_e64__fi_literal_offset__vgpr__clamp
+ ; MUBUFW32: liveins: $vgpr8
+ ; MUBUFW32-NEXT: {{ $}}
+ ; MUBUFW32-NEXT: $vgpr1 = V_LSHRREV_B32_e64 5, $sgpr32, implicit $exec
+ ; MUBUFW32-NEXT: $vgpr1 = V_ADD_U32_e32 128, killed $vgpr1, implicit $exec
+ ; MUBUFW32-NEXT: renamable $vgpr0 = V_ADD_U32_e64 killed $vgpr1, $vgpr8, 1, implicit $exec
+ ; MUBUFW32-NEXT: SI_RETURN implicit $vgpr0
+ ;
+ ; FLATSCRW64-LABEL: name: v_add_u32_e64__fi_literal_offset__vgpr__clamp
+ ; FLATSCRW64: liveins: $vgpr8
+ ; FLATSCRW64-NEXT: {{ $}}
+ ; FLATSCRW64-NEXT: $sgpr4 = S_ADD_I32 $sgpr32, 128, implicit-def $scc
+ ; FLATSCRW64-NEXT: renamable $vgpr0 = V_ADD_U32_e64 killed $sgpr4, $vgpr8, 1, implicit $exec
+ ; FLATSCRW64-NEXT: SI_RETURN implicit $vgpr0
+ ;
+ ; FLATSCRW32-LABEL: name: v_add_u32_e64__fi_literal_offset__vgpr__clamp
+ ; FLATSCRW32: liveins: $vgpr8
+ ; FLATSCRW32-NEXT: {{ $}}
+ ; FLATSCRW32-NEXT: $sgpr4 = S_ADD_I32 $sgpr32, 128, implicit-def $scc
+ ; FLATSCRW32-NEXT: renamable $vgpr0 = V_ADD_U32_e64 killed $sgpr4, $vgpr8, 1, implicit $exec
+ ; FLATSCRW32-NEXT: SI_RETURN implicit $vgpr0
+ renamable $vgpr0 = V_ADD_U32_e64 %stack.1, $vgpr8, 1, implicit $exec
+ SI_RETURN implicit $vgpr0
+
+...
+
+---
+name: v_add_u32_e64__fi_literal_offset__vgpr__clamp__kernel
+tracksRegLiveness: true
+stack:
+ - { id: 0, size: 128, alignment: 16 }
+ - { id: 1, size: 4, alignment: 4 }
+machineFunctionInfo:
+ scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3'
+ frameOffsetReg: '$sgpr33'
+ stackPtrOffsetReg: '$sgpr32'
+ isEntryFunction: true
+body: |
+ bb.0:
+ liveins: $vgpr8
+ ; MUBUF-LABEL: name: v_add_u32_e64__fi_literal_offset__vgpr__clamp__kernel
+ ; MUBUF: liveins: $vgpr8, $sgpr0_sgpr1_sgpr2_sgpr3
+ ; MUBUF-NEXT: {{ $}}
+ ; MUBUF-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3
+ ; MUBUF-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3
+ ; MUBUF-NEXT: $vgpr1 = V_MOV_B32_e32 128, implicit $exec
+ ; MUBUF-NEXT: renamable $vgpr0 = V_ADD_U32_e64 killed $vgpr1, $vgpr8, 1, implicit $exec
+ ; MUBUF-NEXT: SI_RETURN implicit $vgpr0
+ ;
+ ; MUBUFW32-LABEL: name: v_add_u32_e64__fi_literal_offset__vgpr__clamp__kernel
+ ; MUBUFW32: liveins: $vgpr8, $sgpr0_sgpr1_sgpr2_sgpr3
+ ; MUBUFW32-NEXT: {{ $}}
+ ; MUBUFW32-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3
+ ; MUBUFW32-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3
+ ; MUBUFW32-NEXT: renamable $vgpr0 = V_ADD_U32_e64 128, $vgpr8, 1, implicit $exec
+ ; MUBUFW32-NEXT: SI_RETURN implicit $vgpr0
+ ;
+ ; FLATSCRW64-LABEL: name: v_add_u32_e64__fi_literal_offset__vgpr__clamp__kernel
+ ; FLATSCRW64: liveins: $vgpr8
+ ; FLATSCRW64-NEXT: {{ $}}
+ ; FLATSCRW64-NEXT: $sgpr4 = S_MOV_B32 128
+ ; FLATSCRW64-NEXT: renamable $vgpr0 = V_ADD_U32_e64 killed $sgpr4, $vgpr8, 1, implicit $exec
+ ; FLATSCRW64-NEXT: SI_RETURN implicit $vgpr0
+ ;
+ ; FLATSCRW32-LABEL: name: v_add_u32_e64__fi_literal_offset__vgpr__clamp__kernel
+ ; FLATSCRW32: liveins: $vgpr8
+ ; FLATSCRW32-NEXT: {{ $}}
+ ; FLATSCRW32-NEXT: renamable $vgpr0 = V_ADD_U32_e64 128, $vgpr8, 1, implicit $exec
+ ; FLATSCRW32-NEXT: SI_RETURN implicit $vgpr0
+ renamable $vgpr0 = V_ADD_U32_e64 %stack.1, $vgpr8, 1, implicit $exec
+ SI_RETURN implicit $vgpr0
+
+...
+
+---
+name: v_add_u32_e32__inline_imm__fi_offset0__kernel
+tracksRegLiveness: true
+stack:
+ - { id: 0, size: 32, alignment: 16 }
+machineFunctionInfo:
+ scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3'
+ frameOffsetReg: '$sgpr33'
+ stackPtrOffsetReg: '$sgpr32'
+ isEntryFunction: true
+body: |
+ bb.0:
+ ; MUBUF-LABEL: name: v_add_u32_e32__inline_imm__fi_offset0__kernel
+ ; MUBUF: liveins: $sgpr0_sgpr1_sgpr2_sgpr3
+ ; MUBUF-NEXT: {{ $}}
+ ; MUBUF-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3
+ ; MUBUF-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3
+ ; MUBUF-NEXT: $vgpr1 = V_MOV_B32_e32 0, implicit $exec
+ ; MUBUF-NEXT: renamable $vgpr0 = V_ADD_U32_e32 12, killed $vgpr1, implicit $exec
+ ; MUBUF-NEXT: SI_RETURN implicit $vgpr0
+ ;
+ ; MUBUFW32-LABEL: name: v_add_u32_e32__inline_imm__fi_offset0__kernel
+ ; MUBUFW32: liveins: $sgpr0_sgpr1_sgpr2_sgpr3
+ ; MUBUFW32-NEXT: {{ $}}
+ ; MUBUFW32-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3
+ ; MUBUFW32-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3
+ ; MUBUFW32-NEXT: $vgpr1 = V_MOV_B32_e32 0, implicit $exec
+ ; MUBUFW32-NEXT: renamable $vgpr0 = V_ADD_U32_e32 12, killed $vgpr1, implicit $exec
+ ; MUBUFW32-NEXT: SI_RETURN implicit $vgpr0
+ ;
+ ; FLATSCRW64-LABEL: name: v_add_u32_e32__inline_imm__fi_offset0__kernel
+ ; FLATSCRW64: $vgpr1 = V_MOV_B32_e32 0, implicit $exec
+ ; FLATSCRW64-NEXT: renamable $vgpr0 = V_ADD_U32_e32 12, killed $vgpr1, implicit $exec
+ ; FLATSCRW64-NEXT: SI_RETURN implicit $vgpr0
+ ;
+ ; FLATSCRW32-LABEL: name: v_add_u32_e32__inline_imm__fi_offset0__kernel
+ ; FLATSCRW32: $vgpr1 = V_MOV_B32_e32 0, implicit $exec
+ ; FLATSCRW32-NEXT: renamable $vgpr0 = V_ADD_U32_e32 12, killed $vgpr1, implicit $exec
+ ; FLATSCRW32-NEXT: SI_RETURN implicit $vgpr0
+ renamable $vgpr0 = V_ADD_U32_e32 12, %stack.0, implicit $exec
+ SI_RETURN implicit $vgpr0
+
+...
+
+---
+name: v_add_u32_e64__inline_imm__fi_offset0__kernel
+tracksRegLiveness: true
+stack:
+ - { id: 0, size: 32, alignment: 16 }
+machineFunctionInfo:
+ scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3'
+ frameOffsetReg: '$sgpr33'
+ stackPtrOffsetReg: '$sgpr32'
+ isEntryFunction: true
+body: |
+ bb.0:
+ ; MUBUF-LABEL: name: v_add_u32_e64__inline_imm__fi_offset0__kernel
+ ; MUBUF: liveins: $sgpr0_sgpr1_sgpr2_sgpr3
+ ; MUBUF-NEXT: {{ $}}
+ ; MUBUF-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3
+ ; MUBUF-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3
+ ; MUBUF-NEXT: renamable $vgpr0 = V_ADD_U32_e64 12, 0, 0, implicit $exec
+ ; MUBUF-NEXT: SI_RETURN implicit $vgpr0
+ ;
+ ; MUBUFW32-LABEL: name: v_add_u32_e64__inline_imm__fi_offset0__kernel
+ ; MUBUFW32: liveins: $sgpr0_sgpr1_sgpr2_sgpr3
+ ; MUBUFW32-NEXT: {{ $}}
+ ; MUBUFW32-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3
+ ; MUBUFW32-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3
+ ; MUBUFW32-NEXT: renamable $vgpr0 = V_ADD_U32_e64 12, 0, 0, implicit $exec
+ ; MUBUFW32-NEXT: SI_RETURN implicit $vgpr0
+ ;
+ ; FLATSCRW64-LABEL: name: v_add_u32_e64__inline_imm__fi_offset0__kernel
+ ; FLATSCRW64: renamable $vgpr0 = V_ADD_U32_e64 12, 0, 0, implicit $exec
+ ; FLATSCRW64-NEXT: SI_RETURN implicit $vgpr0
+ ;
+ ; FLATSCRW32-LABEL: name: v_add_u32_e64__inline_imm__fi_offset0__kernel
+ ; FLATSCRW32: renamable $vgpr0 = V_ADD_U32_e64 12, 0, 0, implicit $exec
+ ; FLATSCRW32-NEXT: SI_RETURN implicit $vgpr0
+ renamable $vgpr0 = V_ADD_U32_e64 12, %stack.0, 0, implicit $exec
+ SI_RETURN implicit $vgpr0
+
+...
+
+
+
+---
+name: v_add_u32_e32__inline_imm__fi_literal__kernel
+tracksRegLiveness: true
+stack:
+ - { id: 0, size: 32, alignment: 16 }
+ - { id: 1, size: 80, alignment: 4 }
+machineFunctionInfo:
+ scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3'
+ frameOffsetReg: '$sgpr33'
+ stackPtrOffsetReg: '$sgpr32'
+ isEntryFunction: true
+body: |
+ bb.0:
+ ; MUBUF-LABEL: name: v_add_u32_e32__inline_imm__fi_literal__kernel
+ ; MUBUF: liveins: $sgpr0_sgpr1_sgpr2_sgpr3
+ ; MUBUF-NEXT: {{ $}}
+ ; MUBUF-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3
+ ; MUBUF-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3
+ ; MUBUF-NEXT: $vgpr1 = V_MOV_B32_e32 32, implicit $exec
+ ; MUBUF-NEXT: renamable $vgpr0 = V_ADD_U32_e32 12, killed $vgpr1, implicit $exec
+ ; MUBUF-NEXT: SI_RETURN implicit $vgpr0
+ ;
+ ; MUBUFW32-LABEL: name: v_add_u32_e32__inline_imm__fi_literal__kernel
+ ; MUBUFW32: liveins: $sgpr0_sgpr1_sgpr2_sgpr3
+ ; MUBUFW32-NEXT: {{ $}}
+ ; MUBUFW32-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3
+ ; MUBUFW32-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3
+ ; MUBUFW32-NEXT: $vgpr1 = V_MOV_B32_e32 32, implicit $exec
+ ; MUBUFW32-NEXT: renamable $vgpr0 = V_ADD_U32_e32 12, killed $vgpr1, implicit $exec
+ ; MUBUFW32-NEXT: SI_RETURN implicit $vgpr0
+ ;
+ ; FLATSCRW64-LABEL: name: v_add_u32_e32__inline_imm__fi_literal__kernel
+ ; FLATSCRW64: $vgpr1 = V_MOV_B32_e32 32, implicit $exec
+ ; FLATSCRW64-NEXT: renamable $vgpr0 = V_ADD_U32_e32 12, killed $vgpr1, implicit $exec
+ ; FLATSCRW64-NEXT: SI_RETURN implicit $vgpr0
+ ;
+ ; FLATSCRW32-LABEL: name: v_add_u32_e32__inline_imm__fi_literal__kernel
+ ; FLATSCRW32: $vgpr1 = V_MOV_B32_e32 32, implicit $exec
+ ; FLATSCRW32-NEXT: renamable $vgpr0 = V_ADD_U32_e32 12, killed $vgpr1, implicit $exec
+ ; FLATSCRW32-NEXT: SI_RETURN implicit $vgpr0
+ renamable $vgpr0 = V_ADD_U32_e32 12, %stack.1, implicit $exec
+ SI_RETURN implicit $vgpr0
+
+...
+
+---
+name: v_add_u32_e64__inline_imm__fi_literal__kernel
+tracksRegLiveness: true
+stack:
+ - { id: 0, size: 32, alignment: 16 }
+ - { id: 1, size: 80, alignment: 4 }
+machineFunctionInfo:
+ scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3'
+ frameOffsetReg: '$sgpr33'
+ stackPtrOffsetReg: '$sgpr32'
+ isEntryFunction: true
+body: |
+ bb.0:
+ ; MUBUF-LABEL: name: v_add_u32_e64__inline_imm__fi_literal__kernel
+ ; MUBUF: liveins: $sgpr0_sgpr1_sgpr2_sgpr3
+ ; MUBUF-NEXT: {{ $}}
+ ; MUBUF-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3
+ ; MUBUF-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3
+ ; MUBUF-NEXT: renamable $vgpr0 = V_ADD_U32_e64 12, 32, 0, implicit $exec
+ ; MUBUF-NEXT: SI_RETURN implicit $vgpr0
+ ;
+ ; MUBUFW32-LABEL: name: v_add_u32_e64__inline_imm__fi_literal__kernel
+ ; MUBUFW32: liveins: $sgpr0_sgpr1_sgpr2_sgpr3
+ ; MUBUFW32-NEXT: {{ $}}
+ ; MUBUFW32-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3
+ ; MUBUFW32-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3
+ ; MUBUFW32-NEXT: renamable $vgpr0 = V_ADD_U32_e64 12, 32, 0, implicit $exec
+ ; MUBUFW32-NEXT: SI_RETURN implicit $vgpr0
+ ;
+ ; FLATSCRW64-LABEL: name: v_add_u32_e64__inline_imm__fi_literal__kernel
+ ; FLATSCRW64: renamable $vgpr0 = V_ADD_U32_e64 12, 32, 0, implicit $exec
+ ; FLATSCRW64-NEXT: SI_RETURN implicit $vgpr0
+ ;
+ ; FLATSCRW32-LABEL: name: v_add_u32_e64__inline_imm__fi_literal__kernel
+ ; FLATSCRW32: renamable $vgpr0 = V_ADD_U32_e64 12, 32, 0, implicit $exec
+ ; FLATSCRW32-NEXT: SI_RETURN implicit $vgpr0
+ renamable $vgpr0 = V_ADD_U32_e64 12, %stack.1, 0, implicit $exec
+ SI_RETURN implicit $vgpr0
+
+...
+
+---
+name: v_add_u32_e64__fi_literal__inline_imm__kernel
+tracksRegLiveness: true
+stack:
+ - { id: 0, size: 32, alignment: 16 }
+ - { id: 1, size: 80, alignment: 4 }
+machineFunctionInfo:
+ scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3'
+ frameOffsetReg: '$sgpr33'
+ stackPtrOffsetReg: '$sgpr32'
+ isEntryFunction: true
+body: |
+ bb.0:
+ ; MUBUF-LABEL: name: v_add_u32_e64__fi_literal__inline_imm__kernel
+ ; MUBUF: liveins: $sgpr0_sgpr1_sgpr2_sgpr3
+ ; MUBUF-NEXT: {{ $}}
+ ; MUBUF-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3
+ ; MUBUF-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3
+ ; MUBUF-NEXT: renamable $vgpr0 = V_ADD_U32_e64 32, 12, 0, implicit $exec
+ ; MUBUF-NEXT: SI_RETURN implicit $vgpr0
+ ;
+ ; MUBUFW32-LABEL: name: v_add_u32_e64__fi_literal__inline_imm__kernel
+ ; MUBUFW32: liveins: $sgpr0_sgpr1_sgpr2_sgpr3
+ ; MUBUFW32-NEXT: {{ $}}
+ ; MUBUFW32-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3
+ ; MUBUFW32-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3
+ ; MUBUFW32-NEXT: renamable $vgpr0 = V_ADD_U32_e64 32, 12, 0, implicit $exec
+ ; MUBUFW32-NEXT: SI_RETURN implicit $vgpr0
+ ;
+ ; FLATSCRW64-LABEL: name: v_add_u32_e64__fi_literal__inline_imm__kernel
+ ; FLATSCRW64: renamable $vgpr0 = V_ADD_U32_e64 32, 12, 0, implicit $exec
+ ; FLATSCRW64-NEXT: SI_RETURN implicit $vgpr0
+ ;
+ ; FLATSCRW32-LABEL: name: v_add_u32_e64__fi_literal__inline_imm__kernel
+ ; FLATSCRW32: renamable $vgpr0 = V_ADD_U32_e64 32, 12, 0, implicit $exec
+ ; FLATSCRW32-NEXT: SI_RETURN implicit $vgpr0
+ renamable $vgpr0 = V_ADD_U32_e64 %stack.1, 12, 0, implicit $exec
+ SI_RETURN implicit $vgpr0
+
+...
+
+---
+name: v_add_u32_e64__inline_imm__fi_literal__kernel__clamp
+tracksRegLiveness: true
+stack:
+ - { id: 0, size: 32, alignment: 16 }
+ - { id: 1, size: 80, alignment: 4 }
+machineFunctionInfo:
+ scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3'
+ frameOffsetReg: '$sgpr33'
+ stackPtrOffsetReg: '$sgpr32'
+ isEntryFunction: true
+body: |
+ bb.0:
+ ; MUBUF-LABEL: name: v_add_u32_e64__inline_imm__fi_literal__kernel__clamp
+ ; MUBUF: liveins: $sgpr0_sgpr1_sgpr2_sgpr3
+ ; MUBUF-NEXT: {{ $}}
+ ; MUBUF-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3
+ ; MUBUF-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3
+ ; MUBUF-NEXT: renamable $vgpr0 = V_ADD_U32_e64 12, 32, 1, implicit $exec
+ ; MUBUF-NEXT: SI_RETURN implicit $vgpr0
+ ;
+ ; MUBUFW32-LABEL: name: v_add_u32_e64__inline_imm__fi_literal__kernel__clamp
+ ; MUBUFW32: liveins: $sgpr0_sgpr1_sgpr2_sgpr3
+ ; MUBUFW32-NEXT: {{ $}}
+ ; MUBUFW32-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3
+ ; MUBUFW32-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3
+ ; MUBUFW32-NEXT: renamable $vgpr0 = V_ADD_U32_e64 12, 32, 1, implicit $exec
+ ; MUBUFW32-NEXT: SI_RETURN implicit $vgpr0
+ ;
+ ; FLATSCRW64-LABEL: name: v_add_u32_e64__inline_imm__fi_literal__kernel__clamp
+ ; FLATSCRW64: renamable $vgpr0 = V_ADD_U32_e64 12, 32, 1, implicit $exec
+ ; FLATSCRW64-NEXT: SI_RETURN implicit $vgpr0
+ ;
+ ; FLATSCRW32-LABEL: name: v_add_u32_e64__inline_imm__fi_literal__kernel__clamp
+ ; FLATSCRW32: renamable $vgpr0 = V_ADD_U32_e64 12, 32, 1, implicit $exec
+ ; FLATSCRW32-NEXT: SI_RETURN implicit $vgpr0
+ renamable $vgpr0 = V_ADD_U32_e64 12, %stack.1, 1, implicit $exec
+ SI_RETURN implicit $vgpr0
+
+...
+
+---
+name: killed_reg_regression
+tracksRegLiveness: true
+stack:
+ - { id: 0, size: 32, alignment: 16 }
+ - { id: 1, size: 80, alignment: 4 }
+machineFunctionInfo:
+ scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3'
+ frameOffsetReg: '$sgpr33'
+ stackPtrOffsetReg: '$sgpr32'
+ isEntryFunction: true
+body: |
+ bb.0:
+ liveins: $vgpr0
+
+ ; MUBUF-LABEL: name: killed_reg_regression
+ ; MUBUF: liveins: $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3
+ ; MUBUF-NEXT: {{ $}}
+ ; MUBUF-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3
+ ; MUBUF-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3
+ ; MUBUF-NEXT: renamable $vgpr0 = V_LSHLREV_B32_e32 2, killed $vgpr0, implicit $exec
+ ; MUBUF-NEXT: renamable $vgpr1 = V_ADD_U32_e32 0, $vgpr0, implicit $exec
+ ; MUBUF-NEXT: renamable $vgpr2 = V_MOV_B32_e32 15, implicit $exec
+ ; MUBUF-NEXT: SCRATCH_STORE_DWORD killed renamable $vgpr2, killed renamable $vgpr1, 0, 0, implicit $exec, implicit $flat_scr :: (volatile store (s32), addrspace 5)
+ ; MUBUF-NEXT: renamable $vgpr0 = V_SUB_U32_e32 0, killed $vgpr0, implicit $exec
+ ; MUBUF-NEXT: dead renamable $vgpr0 = SCRATCH_LOAD_DWORD killed renamable $vgpr0, 124, 0, implicit $exec, implicit $flat_scr :: (volatile load (s32), addrspace 5)
+ ; MUBUF-NEXT: S_ENDPGM 0
+ ;
+ ; MUBUFW32-LABEL: name: killed_reg_regression
+ ; MUBUFW32: liveins: $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3
+ ; MUBUFW32-NEXT: {{ $}}
+ ; MUBUFW32-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3
+ ; MUBUFW32-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3
+ ; MUBUFW32-NEXT: renamable $vgpr0 = V_LSHLREV_B32_e32 2, killed $vgpr0, implicit $exec
+ ; MUBUFW32-NEXT: renamable $vgpr1 = V_ADD_U32_e32 0, $vgpr0, implicit $exec
+ ; MUBUFW32-NEXT: renamable $vgpr2 = V_MOV_B32_e32 15, implicit $exec
+ ; MUBUFW32-NEXT: SCRATCH_STORE_DWORD killed renamable $vgpr2, killed renamable $vgpr1, 0, 0, implicit $exec, implicit $flat_scr :: (volatile store (s32), addrspace 5)
+ ; MUBUFW32-NEXT: renamable $vgpr0 = V_SUB_U32_e32 0, killed $vgpr0, implicit $exec
+ ; MUBUFW32-NEXT: dead renamable $vgpr0 = SCRATCH_LOAD_DWORD killed renamable $vgpr0, 124, 0, implicit $exec, implicit $flat_scr :: (volatile load (s32), addrspace 5)
+ ; MUBUFW32-NEXT: S_ENDPGM 0
+ ;
+ ; FLATSCRW64-LABEL: name: killed_reg_regression
+ ; FLATSCRW64: liveins: $vgpr0
+ ; FLATSCRW64-NEXT: {{ $}}
+ ; FLATSCRW64-NEXT: renamable $vgpr0 = V_LSHLREV_B32_e32 2, killed $vgpr0, implicit $exec
+ ; FLATSCRW64-NEXT: renamable $vgpr1 = V_ADD_U32_e32 0, $vgpr0, implicit $exec
+ ; FLATSCRW64-NEXT: renamable $vgpr2 = V_MOV_B32_e32 15, implicit $exec
+ ; FLATSCRW64-NEXT: SCRATCH_STORE_DWORD killed renamable $vgpr2, killed renamable $vgpr1, 0, 0, implicit $exec, implicit $flat_scr :: (volatile store (s32), addrspace 5)
+ ; FLATSCRW64-NEXT: renamable $vgpr0 = V_SUB_U32_e32 0, killed $vgpr0, implicit $exec
+ ; FLATSCRW64-NEXT: dead renamable $vgpr0 = SCRATCH_LOAD_DWORD killed renamable $vgpr0, 124, 0, implicit $exec, implicit $flat_scr :: (volatile load (s32), addrspace 5)
+ ; FLATSCRW64-NEXT: S_ENDPGM 0
+ ;
+ ; FLATSCRW32-LABEL: name: killed_reg_regression
+ ; FLATSCRW32: liveins: $vgpr0
+ ; FLATSCRW32-NEXT: {{ $}}
+ ; FLATSCRW32-NEXT: renamable $vgpr0 = V_LSHLREV_B32_e32 2, killed $vgpr0, implicit $exec
+ ; FLATSCRW32-NEXT: renamable $vgpr1 = V_ADD_U32_e32 0, $vgpr0, implicit $exec
+ ; FLATSCRW32-NEXT: renamable $vgpr2 = V_MOV_B32_e32 15, implicit $exec
+ ; FLATSCRW32-NEXT: SCRATCH_STORE_DWORD killed renamable $vgpr2, killed renamable $vgpr1, 0, 0, implicit $exec, implicit $flat_scr :: (volatile store (s32), addrspace 5)
+ ; FLATSCRW32-NEXT: renamable $vgpr0 = V_SUB_U32_e32 0, killed $vgpr0, implicit $exec
+ ; FLATSCRW32-NEXT: dead renamable $vgpr0 = SCRATCH_LOAD_DWORD killed renamable $vgpr0, 124, 0, implicit $exec, implicit $flat_scr :: (volatile load (s32), addrspace 5)
+ ; FLATSCRW32-NEXT: S_ENDPGM 0
+ renamable $vgpr0 = V_LSHLREV_B32_e32 2, killed $vgpr0, implicit $exec
+ renamable $vgpr1 = V_ADD_U32_e32 %stack.0, $vgpr0, implicit $exec
+ renamable $vgpr2 = V_MOV_B32_e32 15, implicit $exec
+ SCRATCH_STORE_DWORD killed renamable $vgpr2, killed renamable $vgpr1, 0, 0, implicit $exec, implicit $flat_scr :: (volatile store (s32), addrspace 5)
+ renamable $vgpr0 = V_SUB_U32_e32 %stack.0, killed $vgpr0, implicit $exec
+ dead renamable $vgpr0 = SCRATCH_LOAD_DWORD killed renamable $vgpr0, 124, 0, implicit $exec, implicit $flat_scr :: (volatile load (s32), addrspace 5)
+ S_ENDPGM 0
+
+...
+
+---
+name: v_add_u32_e32__kernel_fi_offset0__other_vgpr_live_after
+tracksRegLiveness: true
+stack:
+ - { id: 0, size: 16, alignment: 16 }
+machineFunctionInfo:
+ scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3'
+ frameOffsetReg: '$sgpr33'
+ stackPtrOffsetReg: '$sgpr32'
+ isEntryFunction: true
+body: |
+ bb.0:
+ liveins: $vgpr1
+ ; MUBUF-LABEL: name: v_add_u32_e32__kernel_fi_offset0__other_vgpr_live_after
+ ; MUBUF: liveins: $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3
+ ; MUBUF-NEXT: {{ $}}
+ ; MUBUF-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3
+ ; MUBUF-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3
+ ; MUBUF-NEXT: $vgpr2 = V_MOV_B32_e32 0, implicit $exec
+ ; MUBUF-NEXT: renamable $vgpr0 = V_ADD_U32_e32 $vgpr1, killed $vgpr2, implicit $exec
+ ; MUBUF-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1
+ ;
+ ; MUBUFW32-LABEL: name: v_add_u32_e32__kernel_fi_offset0__other_vgpr_live_after
+ ; MUBUFW32: liveins: $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3
+ ; MUBUFW32-NEXT: {{ $}}
+ ; MUBUFW32-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3
+ ; MUBUFW32-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3
+ ; MUBUFW32-NEXT: $vgpr2 = V_MOV_B32_e32 0, implicit $exec
+ ; MUBUFW32-NEXT: renamable $vgpr0 = V_ADD_U32_e32 $vgpr1, killed $vgpr2, implicit $exec
+ ; MUBUFW32-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1
+ ;
+ ; FLATSCRW64-LABEL: name: v_add_u32_e32__kernel_fi_offset0__other_vgpr_live_after
+ ; FLATSCRW64: liveins: $vgpr1
+ ; FLATSCRW64-NEXT: {{ $}}
+ ; FLATSCRW64-NEXT: $vgpr2 = V_MOV_B32_e32 0, implicit $exec
+ ; FLATSCRW64-NEXT: renamable $vgpr0 = V_ADD_U32_e32 $vgpr1, killed $vgpr2, implicit $exec
+ ; FLATSCRW64-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1
+ ;
+ ; FLATSCRW32-LABEL: name: v_add_u32_e32__kernel_fi_offset0__other_vgpr_live_after
+ ; FLATSCRW32: liveins: $vgpr1
+ ; FLATSCRW32-NEXT: {{ $}}
+ ; FLATSCRW32-NEXT: $vgpr2 = V_MOV_B32_e32 0, implicit $exec
+ ; FLATSCRW32-NEXT: renamable $vgpr0 = V_ADD_U32_e32 $vgpr1, killed $vgpr2, implicit $exec
+ ; FLATSCRW32-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1
+ renamable $vgpr0 = V_ADD_U32_e32 renamable $vgpr1, %stack.0, implicit $exec
+ SI_RETURN implicit $vgpr0, implicit $vgpr1
+
+...
+
+---
+name: v_add_u32_e32__kernel_other_vgpr_live_after__fi_offset0
+tracksRegLiveness: true
+stack:
+ - { id: 0, size: 16, alignment: 16 }
+machineFunctionInfo:
+ scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3'
+ frameOffsetReg: '$sgpr33'
+ stackPtrOffsetReg: '$sgpr32'
+ isEntryFunction: true
+body: |
+ bb.0:
+ liveins: $vgpr1
+ ; MUBUF-LABEL: name: v_add_u32_e32__kernel_other_vgpr_live_after__fi_offset0
+ ; MUBUF: liveins: $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3
+ ; MUBUF-NEXT: {{ $}}
+ ; MUBUF-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3
+ ; MUBUF-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3
+ ; MUBUF-NEXT: renamable $vgpr0 = V_ADD_U32_e32 0, $vgpr1, implicit $exec
+ ; MUBUF-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1
+ ;
+ ; MUBUFW32-LABEL: name: v_add_u32_e32__kernel_other_vgpr_live_after__fi_offset0
+ ; MUBUFW32: liveins: $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3
+ ; MUBUFW32-NEXT: {{ $}}
+ ; MUBUFW32-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3
+ ; MUBUFW32-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3
+ ; MUBUFW32-NEXT: renamable $vgpr0 = V_ADD_U32_e32 0, $vgpr1, implicit $exec
+ ; MUBUFW32-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1
+ ;
+ ; FLATSCRW64-LABEL: name: v_add_u32_e32__kernel_other_vgpr_live_after__fi_offset0
+ ; FLATSCRW64: liveins: $vgpr1
+ ; FLATSCRW64-NEXT: {{ $}}
+ ; FLATSCRW64-NEXT: renamable $vgpr0 = V_ADD_U32_e32 0, $vgpr1, implicit $exec
+ ; FLATSCRW64-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1
+ ;
+ ; FLATSCRW32-LABEL: name: v_add_u32_e32__kernel_other_vgpr_live_after__fi_offset0
+ ; FLATSCRW32: liveins: $vgpr1
+ ; FLATSCRW32-NEXT: {{ $}}
+ ; FLATSCRW32-NEXT: renamable $vgpr0 = V_ADD_U32_e32 0, $vgpr1, implicit $exec
+ ; FLATSCRW32-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1
+ renamable $vgpr0 = V_ADD_U32_e32 %stack.0, renamable $vgpr1, implicit $exec
+ SI_RETURN implicit $vgpr0, implicit $vgpr1
+
+...
+
+---
+name: v_add_u32_e32__kernel_fi_offset0__sgpr_live_after
+tracksRegLiveness: true
+stack:
+ - { id: 0, size: 16, alignment: 16 }
+machineFunctionInfo:
+ scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3'
+ frameOffsetReg: '$sgpr33'
+ stackPtrOffsetReg: '$sgpr32'
+ isEntryFunction: true
+body: |
+ bb.0:
+ liveins: $sgpr8
+ ; MUBUF-LABEL: name: v_add_u32_e32__kernel_fi_offset0__sgpr_live_after
+ ; MUBUF: liveins: $sgpr8, $sgpr0_sgpr1_sgpr2_sgpr3
+ ; MUBUF-NEXT: {{ $}}
+ ; MUBUF-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3
+ ; MUBUF-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3
+ ; MUBUF-NEXT: $vgpr1 = V_MOV_B32_e32 0, implicit $exec
+ ; MUBUF-NEXT: renamable $vgpr0 = V_ADD_U32_e32 $sgpr8, killed $vgpr1, implicit $exec
+ ; MUBUF-NEXT: SI_RETURN implicit $vgpr0, implicit $sgpr8
+ ;
+ ; MUBUFW32-LABEL: name: v_add_u32_e32__kernel_fi_offset0__sgpr_live_after
+ ; MUBUFW32: liveins: $sgpr8, $sgpr0_sgpr1_sgpr2_sgpr3
+ ; MUBUFW32-NEXT: {{ $}}
+ ; MUBUFW32-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3
+ ; MUBUFW32-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3
+ ; MUBUFW32-NEXT: $vgpr1 = V_MOV_B32_e32 0, implicit $exec
+ ; MUBUFW32-NEXT: renamable $vgpr0 = V_ADD_U32_e32 $sgpr8, killed $vgpr1, implicit $exec
+ ; MUBUFW32-NEXT: SI_RETURN implicit $vgpr0, implicit $sgpr8
+ ;
+ ; FLATSCRW64-LABEL: name: v_add_u32_e32__kernel_fi_offset0__sgpr_live_after
+ ; FLATSCRW64: liveins: $sgpr8
+ ; FLATSCRW64-NEXT: {{ $}}
+ ; FLATSCRW64-NEXT: $vgpr1 = V_MOV_B32_e32 0, implicit $exec
+ ; FLATSCRW64-NEXT: renamable $vgpr0 = V_ADD_U32_e32 $sgpr8, killed $vgpr1, implicit $exec
+ ; FLATSCRW64-NEXT: SI_RETURN implicit $vgpr0, implicit $sgpr8
+ ;
+ ; FLATSCRW32-LABEL: name: v_add_u32_e32__kernel_fi_offset0__sgpr_live_after
+ ; FLATSCRW32: liveins: $sgpr8
+ ; FLATSCRW32-NEXT: {{ $}}
+ ; FLATSCRW32-NEXT: $vgpr1 = V_MOV_B32_e32 0, implicit $exec
+ ; FLATSCRW32-NEXT: renamable $vgpr0 = V_ADD_U32_e32 $sgpr8, killed $vgpr1, implicit $exec
+ ; FLATSCRW32-NEXT: SI_RETURN implicit $vgpr0, implicit $sgpr8
+ renamable $vgpr0 = V_ADD_U32_e32 renamable $sgpr8, %stack.0, implicit $exec
+ SI_RETURN implicit $vgpr0, implicit $sgpr8
+
+...
+
+---
+name: v_add_u32_e64__kernel_fi_offset0__other_vgpr_live_after
+tracksRegLiveness: true
+stack:
+ - { id: 0, size: 16, alignment: 16 }
+machineFunctionInfo:
+ scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3'
+ frameOffsetReg: '$sgpr33'
+ stackPtrOffsetReg: '$sgpr32'
+ isEntryFunction: true
+body: |
+ bb.0:
+ liveins: $vgpr1
+ ; MUBUF-LABEL: name: v_add_u32_e64__kernel_fi_offset0__other_vgpr_live_after
+ ; MUBUF: liveins: $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3
+ ; MUBUF-NEXT: {{ $}}
+ ; MUBUF-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3
+ ; MUBUF-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3
+ ; MUBUF-NEXT: renamable $vgpr0 = V_ADD_U32_e64 $vgpr1, 0, 0, implicit $exec
+ ; MUBUF-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1
+ ;
+ ; MUBUFW32-LABEL: name: v_add_u32_e64__kernel_fi_offset0__other_vgpr_live_after
+ ; MUBUFW32: liveins: $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3
+ ; MUBUFW32-NEXT: {{ $}}
+ ; MUBUFW32-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3
+ ; MUBUFW32-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3
+ ; MUBUFW32-NEXT: renamable $vgpr0 = V_ADD_U32_e64 $vgpr1, 0, 0, implicit $exec
+ ; MUBUFW32-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1
+ ;
+ ; FLATSCRW64-LABEL: name: v_add_u32_e64__kernel_fi_offset0__other_vgpr_live_after
+ ; FLATSCRW64: liveins: $vgpr1
+ ; FLATSCRW64-NEXT: {{ $}}
+ ; FLATSCRW64-NEXT: renamable $vgpr0 = V_ADD_U32_e64 $vgpr1, 0, 0, implicit $exec
+ ; FLATSCRW64-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1
+ ;
+ ; FLATSCRW32-LABEL: name: v_add_u32_e64__kernel_fi_offset0__other_vgpr_live_after
+ ; FLATSCRW32: liveins: $vgpr1
+ ; FLATSCRW32-NEXT: {{ $}}
+ ; FLATSCRW32-NEXT: renamable $vgpr0 = V_ADD_U32_e64 $vgpr1, 0, 0, implicit $exec
+ ; FLATSCRW32-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1
+ renamable $vgpr0 = V_ADD_U32_e64 renamable $vgpr1, %stack.0, 0, implicit $exec
+ SI_RETURN implicit $vgpr0, implicit $vgpr1
+
+...
+
+---
+name: v_add_u32_e32__kernel_fi_offset72__sgpr_live_after
+tracksRegLiveness: true
+stack:
+ - { id: 0, size: 72, alignment: 16 }
+ - { id: 1, size: 32, alignment: 4 }
+machineFunctionInfo:
+ scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3'
+ frameOffsetReg: '$sgpr33'
+ stackPtrOffsetReg: '$sgpr32'
+ isEntryFunction: true
+body: |
+ bb.0:
+ liveins: $sgpr8
+ ; MUBUF-LABEL: name: v_add_u32_e32__kernel_fi_offset72__sgpr_live_after
+ ; MUBUF: liveins: $sgpr8, $sgpr0_sgpr1_sgpr2_sgpr3
+ ; MUBUF-NEXT: {{ $}}
+ ; MUBUF-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3
+ ; MUBUF-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3
+ ; MUBUF-NEXT: $vgpr1 = V_MOV_B32_e32 72, implicit $exec
+ ; MUBUF-NEXT: renamable $vgpr0 = V_ADD_U32_e32 $sgpr8, killed $vgpr1, implicit $exec
+ ; MUBUF-NEXT: SI_RETURN implicit $vgpr0, implicit $sgpr8
+ ;
+ ; MUBUFW32-LABEL: name: v_add_u32_e32__kernel_fi_offset72__sgpr_live_after
+ ; MUBUFW32: liveins: $sgpr8, $sgpr0_sgpr1_sgpr2_sgpr3
+ ; MUBUFW32-NEXT: {{ $}}
+ ; MUBUFW32-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3
+ ; MUBUFW32-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3
+ ; MUBUFW32-NEXT: $vgpr1 = V_MOV_B32_e32 72, implicit $exec
+ ; MUBUFW32-NEXT: renamable $vgpr0 = V_ADD_U32_e32 $sgpr8, killed $vgpr1, implicit $exec
+ ; MUBUFW32-NEXT: SI_RETURN implicit $vgpr0, implicit $sgpr8
+ ;
+ ; FLATSCRW64-LABEL: name: v_add_u32_e32__kernel_fi_offset72__sgpr_live_after
+ ; FLATSCRW64: liveins: $sgpr8
+ ; FLATSCRW64-NEXT: {{ $}}
+ ; FLATSCRW64-NEXT: $vgpr1 = V_MOV_B32_e32 72, implicit $exec
+ ; FLATSCRW64-NEXT: renamable $vgpr0 = V_ADD_U32_e32 $sgpr8, killed $vgpr1, implicit $exec
+ ; FLATSCRW64-NEXT: SI_RETURN implicit $vgpr0, implicit $sgpr8
+ ;
+ ; FLATSCRW32-LABEL: name: v_add_u32_e32__kernel_fi_offset72__sgpr_live_after
+ ; FLATSCRW32: liveins: $sgpr8
+ ; FLATSCRW32-NEXT: {{ $}}
+ ; FLATSCRW32-NEXT: $vgpr1 = V_MOV_B32_e32 72, implicit $exec
+ ; FLATSCRW32-NEXT: renamable $vgpr0 = V_ADD_U32_e32 $sgpr8, killed $vgpr1, implicit $exec
+ ; FLATSCRW32-NEXT: SI_RETURN implicit $vgpr0, implicit $sgpr8
+ renamable $vgpr0 = V_ADD_U32_e32 renamable $sgpr8, %stack.1, implicit $exec
+ SI_RETURN implicit $vgpr0, implicit $sgpr8
+
+...
+
+---
+name: v_add_u32_e64__kernel_fi_offset72__sgpr_live_after
+tracksRegLiveness: true
+stack:
+ - { id: 0, size: 72, alignment: 16 }
+ - { id: 1, size: 32, alignment: 4 }
+machineFunctionInfo:
+ scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3'
+ frameOffsetReg: '$sgpr33'
+ stackPtrOffsetReg: '$sgpr32'
+ isEntryFunction: true
+body: |
+ bb.0:
+ liveins: $sgpr8
+ ; MUBUF-LABEL: name: v_add_u32_e64__kernel_fi_offset72__sgpr_live_after
+ ; MUBUF: liveins: $sgpr8, $sgpr0_sgpr1_sgpr2_sgpr3
+ ; MUBUF-NEXT: {{ $}}
+ ; MUBUF-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3
+ ; MUBUF-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3
+ ; MUBUF-NEXT: $vgpr1 = V_MOV_B32_e32 72, implicit $exec
+ ; MUBUF-NEXT: renamable $vgpr0 = V_ADD_U32_e64 $sgpr8, killed $vgpr1, 0, implicit $exec
+ ; MUBUF-NEXT: SI_RETURN implicit $vgpr0, implicit $sgpr8
+ ;
+ ; MUBUFW32-LABEL: name: v_add_u32_e64__kernel_fi_offset72__sgpr_live_after
+ ; MUBUFW32: liveins: $sgpr8, $sgpr0_sgpr1_sgpr2_sgpr3
+ ; MUBUFW32-NEXT: {{ $}}
+ ; MUBUFW32-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3
+ ; MUBUFW32-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3
+ ; MUBUFW32-NEXT: renamable $vgpr0 = V_ADD_U32_e64 $sgpr8, 72, 0, implicit $exec
+ ; MUBUFW32-NEXT: SI_RETURN implicit $vgpr0, implicit $sgpr8
+ ;
+ ; FLATSCRW64-LABEL: name: v_add_u32_e64__kernel_fi_offset72__sgpr_live_after
+ ; FLATSCRW64: liveins: $sgpr8
+ ; FLATSCRW64-NEXT: {{ $}}
+ ; FLATSCRW64-NEXT: $vgpr1 = V_MOV_B32_e32 72, implicit $exec
+ ; FLATSCRW64-NEXT: renamable $vgpr0 = V_ADD_U32_e64 $sgpr8, killed $vgpr1, 0, implicit $exec
+ ; FLATSCRW64-NEXT: SI_RETURN implicit $vgpr0, implicit $sgpr8
+ ;
+ ; FLATSCRW32-LABEL: name: v_add_u32_e64__kernel_fi_offset72__sgpr_live_after
+ ; FLATSCRW32: liveins: $sgpr8
+ ; FLATSCRW32-NEXT: {{ $}}
+ ; FLATSCRW32-NEXT: renamable $vgpr0 = V_ADD_U32_e64 $sgpr8, 72, 0, implicit $exec
+ ; FLATSCRW32-NEXT: SI_RETURN implicit $vgpr0, implicit $sgpr8
+ renamable $vgpr0 = V_ADD_U32_e64 renamable $sgpr8, %stack.1, 0, implicit $exec
+ SI_RETURN implicit $vgpr0, implicit $sgpr8
+
+...
+
+---
+name: v_add_u32_e32__identity_vgpr__fi_offset0__kernel
+tracksRegLiveness: true
+stack:
+ - { id: 0, size: 32, alignment: 16 }
+machineFunctionInfo:
+ scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3'
+ frameOffsetReg: '$sgpr33'
+ stackPtrOffsetReg: '$sgpr32'
+ isEntryFunction: true
+body: |
+ bb.0:
+ liveins: $vgpr0
+
+ ; MUBUF-LABEL: name: v_add_u32_e32__identity_vgpr__fi_offset0__kernel
+ ; MUBUF: liveins: $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3
+ ; MUBUF-NEXT: {{ $}}
+ ; MUBUF-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3
+ ; MUBUF-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3
+ ; MUBUF-NEXT: $vgpr1 = V_MOV_B32_e32 0, implicit $exec
+ ; MUBUF-NEXT: renamable $vgpr0 = V_ADD_U32_e32 $vgpr0, killed $vgpr1, implicit $exec
+ ; MUBUF-NEXT: SI_RETURN implicit $vgpr0
+ ;
+ ; MUBUFW32-LABEL: name: v_add_u32_e32__identity_vgpr__fi_offset0__kernel
+ ; MUBUFW32: liveins: $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3
+ ; MUBUFW32-NEXT: {{ $}}
+ ; MUBUFW32-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3
+ ; MUBUFW32-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3
+ ; MUBUFW32-NEXT: $vgpr1 = V_MOV_B32_e32 0, implicit $exec
+ ; MUBUFW32-NEXT: renamable $vgpr0 = V_ADD_U32_e32 $vgpr0, killed $vgpr1, implicit $exec
+ ; MUBUFW32-NEXT: SI_RETURN implicit $vgpr0
+ ;
+ ; FLATSCRW64-LABEL: name: v_add_u32_e32__identity_vgpr__fi_offset0__kernel
+ ; FLATSCRW64: liveins: $vgpr0
+ ; FLATSCRW64-NEXT: {{ $}}
+ ; FLATSCRW64-NEXT: $vgpr1 = V_MOV_B32_e32 0, implicit $exec
+ ; FLATSCRW64-NEXT: renamable $vgpr0 = V_ADD_U32_e32 $vgpr0, killed $vgpr1, implicit $exec
+ ; FLATSCRW64-NEXT: SI_RETURN implicit $vgpr0
+ ;
+ ; FLATSCRW32-LABEL: name: v_add_u32_e32__identity_vgpr__fi_offset0__kernel
+ ; FLATSCRW32: liveins: $vgpr0
+ ; FLATSCRW32-NEXT: {{ $}}
+ ; FLATSCRW32-NEXT: $vgpr1 = V_MOV_B32_e32 0, implicit $exec
+ ; FLATSCRW32-NEXT: renamable $vgpr0 = V_ADD_U32_e32 $vgpr0, killed $vgpr1, implicit $exec
+ ; FLATSCRW32-NEXT: SI_RETURN implicit $vgpr0
+ renamable $vgpr0 = V_ADD_U32_e32 $vgpr0, %stack.0, implicit $exec
+ SI_RETURN implicit $vgpr0
+
+...
+
+---
+name: v_add_u32_e32__fi_offset0__identity_vgpr__kernel
+tracksRegLiveness: true
+stack:
+ - { id: 0, size: 32, alignment: 16 }
+machineFunctionInfo:
+ scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3'
+ frameOffsetReg: '$sgpr33'
+ stackPtrOffsetReg: '$sgpr32'
+ isEntryFunction: true
+body: |
+ bb.0:
+ liveins: $vgpr0
+ ; MUBUF-LABEL: name: v_add_u32_e32__fi_offset0__identity_vgpr__kernel
+ ; MUBUF: liveins: $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3
+ ; MUBUF-NEXT: {{ $}}
+ ; MUBUF-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3
+ ; MUBUF-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3
+ ; MUBUF-NEXT: renamable $vgpr0 = V_ADD_U32_e32 0, $vgpr0, implicit $exec
+ ; MUBUF-NEXT: SI_RETURN implicit $vgpr0
+ ;
+ ; MUBUFW32-LABEL: name: v_add_u32_e32__fi_offset0__identity_vgpr__kernel
+ ; MUBUFW32: liveins: $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3
+ ; MUBUFW32-NEXT: {{ $}}
+ ; MUBUFW32-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3
+ ; MUBUFW32-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3
+ ; MUBUFW32-NEXT: renamable $vgpr0 = V_ADD_U32_e32 0, $vgpr0, implicit $exec
+ ; MUBUFW32-NEXT: SI_RETURN implicit $vgpr0
+ ;
+ ; FLATSCRW64-LABEL: name: v_add_u32_e32__fi_offset0__identity_vgpr__kernel
+ ; FLATSCRW64: liveins: $vgpr0
+ ; FLATSCRW64-NEXT: {{ $}}
+ ; FLATSCRW64-NEXT: renamable $vgpr0 = V_ADD_U32_e32 0, $vgpr0, implicit $exec
+ ; FLATSCRW64-NEXT: SI_RETURN implicit $vgpr0
+ ;
+ ; FLATSCRW32-LABEL: name: v_add_u32_e32__fi_offset0__identity_vgpr__kernel
+ ; FLATSCRW32: liveins: $vgpr0
+ ; FLATSCRW32-NEXT: {{ $}}
+ ; FLATSCRW32-NEXT: renamable $vgpr0 = V_ADD_U32_e32 0, $vgpr0, implicit $exec
+ ; FLATSCRW32-NEXT: SI_RETURN implicit $vgpr0
+ renamable $vgpr0 = V_ADD_U32_e32 %stack.0, $vgpr0, implicit $exec
+ SI_RETURN implicit $vgpr0
+
+...
+
+---
+name: v_add_u32_e64__identity_vgpr__fi_offset0__kernel
+tracksRegLiveness: true
+stack:
+ - { id: 0, size: 32, alignment: 16 }
+machineFunctionInfo:
+ scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3'
+ frameOffsetReg: '$sgpr33'
+ stackPtrOffsetReg: '$sgpr32'
+ isEntryFunction: true
+body: |
+ bb.0:
+ liveins: $vgpr0
+
+ ; MUBUF-LABEL: name: v_add_u32_e64__identity_vgpr__fi_offset0__kernel
+ ; MUBUF: liveins: $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3
+ ; MUBUF-NEXT: {{ $}}
+ ; MUBUF-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3
+ ; MUBUF-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3
+ ; MUBUF-NEXT: renamable $vgpr0 = V_ADD_U32_e64 $vgpr0, 0, 0, implicit $exec
+ ; MUBUF-NEXT: SI_RETURN implicit $vgpr0
+ ;
+ ; MUBUFW32-LABEL: name: v_add_u32_e64__identity_vgpr__fi_offset0__kernel
+ ; MUBUFW32: liveins: $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3
+ ; MUBUFW32-NEXT: {{ $}}
+ ; MUBUFW32-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3
+ ; MUBUFW32-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3
+ ; MUBUFW32-NEXT: renamable $vgpr0 = V_ADD_U32_e64 $vgpr0, 0, 0, implicit $exec
+ ; MUBUFW32-NEXT: SI_RETURN implicit $vgpr0
+ ;
+ ; FLATSCRW64-LABEL: name: v_add_u32_e64__identity_vgpr__fi_offset0__kernel
+ ; FLATSCRW64: liveins: $vgpr0
+ ; FLATSCRW64-NEXT: {{ $}}
+ ; FLATSCRW64-NEXT: renamable $vgpr0 = V_ADD_U32_e64 $vgpr0, 0, 0, implicit $exec
+ ; FLATSCRW64-NEXT: SI_RETURN implicit $vgpr0
+ ;
+ ; FLATSCRW32-LABEL: name: v_add_u32_e64__identity_vgpr__fi_offset0__kernel
+ ; FLATSCRW32: liveins: $vgpr0
+ ; FLATSCRW32-NEXT: {{ $}}
+ ; FLATSCRW32-NEXT: renamable $vgpr0 = V_ADD_U32_e64 $vgpr0, 0, 0, implicit $exec
+ ; FLATSCRW32-NEXT: SI_RETURN implicit $vgpr0
+ renamable $vgpr0 = V_ADD_U32_e64 $vgpr0, %stack.0, 0, implicit $exec
+ SI_RETURN implicit $vgpr0
+
+...
+
+---
+name: v_add_u32_e64__fi_offset0__identity_vgpr__kernel
+tracksRegLiveness: true
+stack:
+ - { id: 0, size: 32, alignment: 16 }
+machineFunctionInfo:
+ scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3'
+ frameOffsetReg: '$sgpr33'
+ stackPtrOffsetReg: '$sgpr32'
+ isEntryFunction: true
+body: |
+ bb.0:
+ liveins: $vgpr0
+
+ ; MUBUF-LABEL: name: v_add_u32_e64__fi_offset0__identity_vgpr__kernel
+ ; MUBUF: liveins: $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3
+ ; MUBUF-NEXT: {{ $}}
+ ; MUBUF-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3
+ ; MUBUF-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3
+ ; MUBUF-NEXT: renamable $vgpr0 = V_ADD_U32_e64 0, $vgpr0, 0, implicit $exec
+ ; MUBUF-NEXT: SI_RETURN implicit $vgpr0
+ ;
+ ; MUBUFW32-LABEL: name: v_add_u32_e64__fi_offset0__identity_vgpr__kernel
+ ; MUBUFW32: liveins: $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3
+ ; MUBUFW32-NEXT: {{ $}}
+ ; MUBUFW32-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3
+ ; MUBUFW32-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3
+ ; MUBUFW32-NEXT: renamable $vgpr0 = V_ADD_U32_e64 0, $vgpr0, 0, implicit $exec
+ ; MUBUFW32-NEXT: SI_RETURN implicit $vgpr0
+ ;
+ ; FLATSCRW64-LABEL: name: v_add_u32_e64__fi_offset0__identity_vgpr__kernel
+ ; FLATSCRW64: liveins: $vgpr0
+ ; FLATSCRW64-NEXT: {{ $}}
+ ; FLATSCRW64-NEXT: renamable $vgpr0 = V_ADD_U32_e64 0, $vgpr0, 0, implicit $exec
+ ; FLATSCRW64-NEXT: SI_RETURN implicit $vgpr0
+ ;
+ ; FLATSCRW32-LABEL: name: v_add_u32_e64__fi_offset0__identity_vgpr__kernel
+ ; FLATSCRW32: liveins: $vgpr0
+ ; FLATSCRW32-NEXT: {{ $}}
+ ; FLATSCRW32-NEXT: renamable $vgpr0 = V_ADD_U32_e64 0, $vgpr0, 0, implicit $exec
+ ; FLATSCRW32-NEXT: SI_RETURN implicit $vgpr0
+ renamable $vgpr0 = V_ADD_U32_e64 %stack.0, $vgpr0, 0, implicit $exec
+ SI_RETURN implicit $vgpr0
+
+...
+
+---
+name: v_add_u32_e32__fi_offset0__identity_vgpr__kernel_kill
+tracksRegLiveness: true
+stack:
+ - { id: 0, size: 32, alignment: 16 }
+machineFunctionInfo:
+ scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3'
+ frameOffsetReg: '$sgpr33'
+ stackPtrOffsetReg: '$sgpr32'
+ isEntryFunction: true
+body: |
+ bb.0:
+ liveins: $vgpr0
+ ; MUBUF-LABEL: name: v_add_u32_e32__fi_offset0__identity_vgpr__kernel_kill
+ ; MUBUF: liveins: $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3
+ ; MUBUF-NEXT: {{ $}}
+ ; MUBUF-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3
+ ; MUBUF-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3
+ ; MUBUF-NEXT: renamable $vgpr0 = V_ADD_U32_e32 0, killed $vgpr0, implicit $exec
+ ; MUBUF-NEXT: SI_RETURN implicit $vgpr0
+ ;
+ ; MUBUFW32-LABEL: name: v_add_u32_e32__fi_offset0__identity_vgpr__kernel_kill
+ ; MUBUFW32: liveins: $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3
+ ; MUBUFW32-NEXT: {{ $}}
+ ; MUBUFW32-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3
+ ; MUBUFW32-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3
+ ; MUBUFW32-NEXT: renamable $vgpr0 = V_ADD_U32_e32 0, killed $vgpr0, implicit $exec
+ ; MUBUFW32-NEXT: SI_RETURN implicit $vgpr0
+ ;
+ ; FLATSCRW64-LABEL: name: v_add_u32_e32__fi_offset0__identity_vgpr__kernel_kill
+ ; FLATSCRW64: liveins: $vgpr0
+ ; FLATSCRW64-NEXT: {{ $}}
+ ; FLATSCRW64-NEXT: renamable $vgpr0 = V_ADD_U32_e32 0, killed $vgpr0, implicit $exec
+ ; FLATSCRW64-NEXT: SI_RETURN implicit $vgpr0
+ ;
+ ; FLATSCRW32-LABEL: name: v_add_u32_e32__fi_offset0__identity_vgpr__kernel_kill
+ ; FLATSCRW32: liveins: $vgpr0
+ ; FLATSCRW32-NEXT: {{ $}}
+ ; FLATSCRW32-NEXT: renamable $vgpr0 = V_ADD_U32_e32 0, killed $vgpr0, implicit $exec
+ ; FLATSCRW32-NEXT: SI_RETURN implicit $vgpr0
+ renamable $vgpr0 = V_ADD_U32_e32 %stack.0, killed $vgpr0, implicit $exec
+ SI_RETURN implicit $vgpr0
+
+...
+
+---
+name: v_add_u32_e32__identity_vgpr__fi_offset32__kernel
+tracksRegLiveness: true
+stack:
+ - { id: 0, size: 32, alignment: 16 }
+ - { id: 1, size: 64, alignment: 4 }
+machineFunctionInfo:
+ scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3'
+ frameOffsetReg: '$sgpr33'
+ stackPtrOffsetReg: '$sgpr32'
+ isEntryFunction: true
+body: |
+ bb.0:
+ liveins: $vgpr0
+
+ ; MUBUF-LABEL: name: v_add_u32_e32__identity_vgpr__fi_offset32__kernel
+ ; MUBUF: liveins: $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3
+ ; MUBUF-NEXT: {{ $}}
+ ; MUBUF-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3
+ ; MUBUF-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3
+ ; MUBUF-NEXT: $vgpr1 = V_MOV_B32_e32 32, implicit $exec
+ ; MUBUF-NEXT: renamable $vgpr0 = V_ADD_U32_e32 $vgpr0, killed $vgpr1, implicit $exec
+ ; MUBUF-NEXT: SI_RETURN implicit $vgpr0
+ ;
+ ; MUBUFW32-LABEL: name: v_add_u32_e32__identity_vgpr__fi_offset32__kernel
+ ; MUBUFW32: liveins: $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3
+ ; MUBUFW32-NEXT: {{ $}}
+ ; MUBUFW32-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3
+ ; MUBUFW32-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3
+ ; MUBUFW32-NEXT: $vgpr1 = V_MOV_B32_e32 32, implicit $exec
+ ; MUBUFW32-NEXT: renamable $vgpr0 = V_ADD_U32_e32 $vgpr0, killed $vgpr1, implicit $exec
+ ; MUBUFW32-NEXT: SI_RETURN implicit $vgpr0
+ ;
+ ; FLATSCRW64-LABEL: name: v_add_u32_e32__identity_vgpr__fi_offset32__kernel
+ ; FLATSCRW64: liveins: $vgpr0
+ ; FLATSCRW64-NEXT: {{ $}}
+ ; FLATSCRW64-NEXT: $vgpr1 = V_MOV_B32_e32 32, implicit $exec
+ ; FLATSCRW64-NEXT: renamable $vgpr0 = V_ADD_U32_e32 $vgpr0, killed $vgpr1, implicit $exec
+ ; FLATSCRW64-NEXT: SI_RETURN implicit $vgpr0
+ ;
+ ; FLATSCRW32-LABEL: name: v_add_u32_e32__identity_vgpr__fi_offset32__kernel
+ ; FLATSCRW32: liveins: $vgpr0
+ ; FLATSCRW32-NEXT: {{ $}}
+ ; FLATSCRW32-NEXT: $vgpr1 = V_MOV_B32_e32 32, implicit $exec
+ ; FLATSCRW32-NEXT: renamable $vgpr0 = V_ADD_U32_e32 $vgpr0, killed $vgpr1, implicit $exec
+ ; FLATSCRW32-NEXT: SI_RETURN implicit $vgpr0
+ renamable $vgpr0 = V_ADD_U32_e32 $vgpr0, %stack.1, implicit $exec
+ SI_RETURN implicit $vgpr0
+
+...
+
+
+---
+name: v_add_u32_e32__identity_vgpr__fi_offset72__kernel
+tracksRegLiveness: true
+stack:
+ - { id: 0, size: 72, alignment: 16 }
+ - { id: 1, size: 64, alignment: 4 }
+machineFunctionInfo:
+ scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3'
+ frameOffsetReg: '$sgpr33'
+ stackPtrOffsetReg: '$sgpr32'
+ isEntryFunction: true
+body: |
+ bb.0:
+ liveins: $vgpr0
+
+ ; MUBUF-LABEL: name: v_add_u32_e32__identity_vgpr__fi_offset72__kernel
+ ; MUBUF: liveins: $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3
+ ; MUBUF-NEXT: {{ $}}
+ ; MUBUF-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3
+ ; MUBUF-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3
+ ; MUBUF-NEXT: $vgpr1 = V_MOV_B32_e32 72, implicit $exec
+ ; MUBUF-NEXT: renamable $vgpr0 = V_ADD_U32_e32 $vgpr0, killed $vgpr1, implicit $exec
+ ; MUBUF-NEXT: SI_RETURN implicit $vgpr0
+ ;
+ ; MUBUFW32-LABEL: name: v_add_u32_e32__identity_vgpr__fi_offset72__kernel
+ ; MUBUFW32: liveins: $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3
+ ; MUBUFW32-NEXT: {{ $}}
+ ; MUBUFW32-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3
+ ; MUBUFW32-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3
+ ; MUBUFW32-NEXT: $vgpr1 = V_MOV_B32_e32 72, implicit $exec
+ ; MUBUFW32-NEXT: renamable $vgpr0 = V_ADD_U32_e32 $vgpr0, killed $vgpr1, implicit $exec
+ ; MUBUFW32-NEXT: SI_RETURN implicit $vgpr0
+ ;
+ ; FLATSCRW64-LABEL: name: v_add_u32_e32__identity_vgpr__fi_offset72__kernel
+ ; FLATSCRW64: liveins: $vgpr0
+ ; FLATSCRW64-NEXT: {{ $}}
+ ; FLATSCRW64-NEXT: $vgpr1 = V_MOV_B32_e32 72, implicit $exec
+ ; FLATSCRW64-NEXT: renamable $vgpr0 = V_ADD_U32_e32 $vgpr0, killed $vgpr1, implicit $exec
+ ; FLATSCRW64-NEXT: SI_RETURN implicit $vgpr0
+ ;
+ ; FLATSCRW32-LABEL: name: v_add_u32_e32__identity_vgpr__fi_offset72__kernel
+ ; FLATSCRW32: liveins: $vgpr0
+ ; FLATSCRW32-NEXT: {{ $}}
+ ; FLATSCRW32-NEXT: $vgpr1 = V_MOV_B32_e32 72, implicit $exec
+ ; FLATSCRW32-NEXT: renamable $vgpr0 = V_ADD_U32_e32 $vgpr0, killed $vgpr1, implicit $exec
+ ; FLATSCRW32-NEXT: SI_RETURN implicit $vgpr0
+ renamable $vgpr0 = V_ADD_U32_e32 $vgpr0, %stack.1, implicit $exec
+ SI_RETURN implicit $vgpr0
+
+...
+
+---
+name: v_add_u32_e32__fi_offset72__identity_vgpr__kernel
+tracksRegLiveness: true
+stack:
+ - { id: 0, size: 72, alignment: 16 }
+ - { id: 1, size: 64, alignment: 4 }
+machineFunctionInfo:
+ scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3'
+ frameOffsetReg: '$sgpr33'
+ stackPtrOffsetReg: '$sgpr32'
+ isEntryFunction: true
+body: |
+ bb.0:
+ liveins: $vgpr0
+
+ ; MUBUF-LABEL: name: v_add_u32_e32__fi_offset72__identity_vgpr__kernel
+ ; MUBUF: liveins: $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3
+ ; MUBUF-NEXT: {{ $}}
+ ; MUBUF-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3
+ ; MUBUF-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3
+ ; MUBUF-NEXT: renamable $vgpr0 = V_ADD_U32_e32 72, $vgpr0, implicit $exec
+ ; MUBUF-NEXT: SI_RETURN implicit $vgpr0
+ ;
+ ; MUBUFW32-LABEL: name: v_add_u32_e32__fi_offset72__identity_vgpr__kernel
+ ; MUBUFW32: liveins: $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3
+ ; MUBUFW32-NEXT: {{ $}}
+ ; MUBUFW32-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3
+ ; MUBUFW32-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3
+ ; MUBUFW32-NEXT: renamable $vgpr0 = V_ADD_U32_e32 72, $vgpr0, implicit $exec
+ ; MUBUFW32-NEXT: SI_RETURN implicit $vgpr0
+ ;
+ ; FLATSCRW64-LABEL: name: v_add_u32_e32__fi_offset72__identity_vgpr__kernel
+ ; FLATSCRW64: liveins: $vgpr0
+ ; FLATSCRW64-NEXT: {{ $}}
+ ; FLATSCRW64-NEXT: renamable $vgpr0 = V_ADD_U32_e32 72, $vgpr0, implicit $exec
+ ; FLATSCRW64-NEXT: SI_RETURN implicit $vgpr0
+ ;
+ ; FLATSCRW32-LABEL: name: v_add_u32_e32__fi_offset72__identity_vgpr__kernel
+ ; FLATSCRW32: liveins: $vgpr0
+ ; FLATSCRW32-NEXT: {{ $}}
+ ; FLATSCRW32-NEXT: renamable $vgpr0 = V_ADD_U32_e32 72, $vgpr0, implicit $exec
+ ; FLATSCRW32-NEXT: SI_RETURN implicit $vgpr0
+ renamable $vgpr0 = V_ADD_U32_e32 %stack.1, $vgpr0, implicit $exec
+ SI_RETURN implicit $vgpr0
+
+...
+
+---
+name: v_add_u32_e32__fi_offset32__identity_vgpr__kernel
+tracksRegLiveness: true
+stack:
+ - { id: 0, size: 32, alignment: 16 }
+ - { id: 1, size: 64, alignment: 4 }
+machineFunctionInfo:
+ scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3'
+ frameOffsetReg: '$sgpr33'
+ stackPtrOffsetReg: '$sgpr32'
+ isEntryFunction: true
+body: |
+ bb.0:
+ liveins: $vgpr0
+
+ ; MUBUF-LABEL: name: v_add_u32_e32__fi_offset32__identity_vgpr__kernel
+ ; MUBUF: liveins: $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3
+ ; MUBUF-NEXT: {{ $}}
+ ; MUBUF-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3
+ ; MUBUF-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3
+ ; MUBUF-NEXT: renamable $vgpr0 = V_ADD_U32_e32 32, $vgpr0, implicit $exec
+ ; MUBUF-NEXT: SI_RETURN implicit $vgpr0
+ ;
+ ; MUBUFW32-LABEL: name: v_add_u32_e32__fi_offset32__identity_vgpr__kernel
+ ; MUBUFW32: liveins: $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3
+ ; MUBUFW32-NEXT: {{ $}}
+ ; MUBUFW32-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3
+ ; MUBUFW32-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3
+ ; MUBUFW32-NEXT: renamable $vgpr0 = V_ADD_U32_e32 32, $vgpr0, implicit $exec
+ ; MUBUFW32-NEXT: SI_RETURN implicit $vgpr0
+ ;
+ ; FLATSCRW64-LABEL: name: v_add_u32_e32__fi_offset32__identity_vgpr__kernel
+ ; FLATSCRW64: liveins: $vgpr0
+ ; FLATSCRW64-NEXT: {{ $}}
+ ; FLATSCRW64-NEXT: renamable $vgpr0 = V_ADD_U32_e32 32, $vgpr0, implicit $exec
+ ; FLATSCRW64-NEXT: SI_RETURN implicit $vgpr0
+ ;
+ ; FLATSCRW32-LABEL: name: v_add_u32_e32__fi_offset32__identity_vgpr__kernel
+ ; FLATSCRW32: liveins: $vgpr0
+ ; FLATSCRW32-NEXT: {{ $}}
+ ; FLATSCRW32-NEXT: renamable $vgpr0 = V_ADD_U32_e32 32, $vgpr0, implicit $exec
+ ; FLATSCRW32-NEXT: SI_RETURN implicit $vgpr0
+ renamable $vgpr0 = V_ADD_U32_e32 %stack.1, $vgpr0, implicit $exec
+ SI_RETURN implicit $vgpr0
+
+...
+
+---
+name: v_add_u32_e64__identity_vgpr__fi_offset32__kernel
+tracksRegLiveness: true
+stack:
+ - { id: 0, size: 32, alignment: 16 }
+ - { id: 1, size: 64, alignment: 4 }
+machineFunctionInfo:
+ scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3'
+ frameOffsetReg: '$sgpr33'
+ stackPtrOffsetReg: '$sgpr32'
+ isEntryFunction: true
+body: |
+ bb.0:
+ liveins: $vgpr0
+
+ ; MUBUF-LABEL: name: v_add_u32_e64__identity_vgpr__fi_offset32__kernel
+ ; MUBUF: liveins: $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3
+ ; MUBUF-NEXT: {{ $}}
+ ; MUBUF-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3
+ ; MUBUF-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3
+ ; MUBUF-NEXT: renamable $vgpr0 = V_ADD_U32_e64 $vgpr0, 32, 0, implicit $exec
+ ; MUBUF-NEXT: SI_RETURN implicit $vgpr0
+ ;
+ ; MUBUFW32-LABEL: name: v_add_u32_e64__identity_vgpr__fi_offset32__kernel
+ ; MUBUFW32: liveins: $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3
+ ; MUBUFW32-NEXT: {{ $}}
+ ; MUBUFW32-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3
+ ; MUBUFW32-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3
+ ; MUBUFW32-NEXT: renamable $vgpr0 = V_ADD_U32_e64 $vgpr0, 32, 0, implicit $exec
+ ; MUBUFW32-NEXT: SI_RETURN implicit $vgpr0
+ ;
+ ; FLATSCRW64-LABEL: name: v_add_u32_e64__identity_vgpr__fi_offset32__kernel
+ ; FLATSCRW64: liveins: $vgpr0
+ ; FLATSCRW64-NEXT: {{ $}}
+ ; FLATSCRW64-NEXT: renamable $vgpr0 = V_ADD_U32_e64 $vgpr0, 32, 0, implicit $exec
+ ; FLATSCRW64-NEXT: SI_RETURN implicit $vgpr0
+ ;
+ ; FLATSCRW32-LABEL: name: v_add_u32_e64__identity_vgpr__fi_offset32__kernel
+ ; FLATSCRW32: liveins: $vgpr0
+ ; FLATSCRW32-NEXT: {{ $}}
+ ; FLATSCRW32-NEXT: renamable $vgpr0 = V_ADD_U32_e64 $vgpr0, 32, 0, implicit $exec
+ ; FLATSCRW32-NEXT: SI_RETURN implicit $vgpr0
+ renamable $vgpr0 = V_ADD_U32_e64 $vgpr0, %stack.1, 0, implicit $exec
+ SI_RETURN implicit $vgpr0
+
+...
diff --git a/llvm/test/CodeGen/AMDGPU/indirect-call-set-from-other-function.ll b/llvm/test/CodeGen/AMDGPU/indirect-call-set-from-other-function.ll
new file mode 100644
index 000000000000..f419d89a7f0a
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/indirect-call-set-from-other-function.ll
@@ -0,0 +1,73 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-globals
+; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=amdgpu-attributor %s -o - | FileCheck %s
+
+@g_fn = addrspace(1) global ptr null
+
+;.
+; CHECK: @g_fn = addrspace(1) global ptr null
+;.
+define void @set_fn(ptr %fn) {
+; CHECK-LABEL: define {{[^@]+}}@set_fn
+; CHECK-SAME: (ptr [[FN:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: store ptr [[FN]], ptr addrspace(1) @g_fn, align 8
+; CHECK-NEXT: ret void
+;
+entry:
+ store ptr %fn, ptr addrspace(1) @g_fn
+ ret void
+}
+
+define void @get_fn(ptr %fn) {
+; CHECK-LABEL: define {{[^@]+}}@get_fn
+; CHECK-SAME: (ptr [[FN:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[LOAD:%.*]] = load ptr, ptr addrspace(1) @g_fn, align 8
+; CHECK-NEXT: store ptr [[LOAD]], ptr [[FN]], align 8
+; CHECK-NEXT: ret void
+;
+entry:
+ %load = load ptr, ptr addrspace(1) @g_fn
+ store ptr %load, ptr %fn
+ ret void
+}
+
+define void @foo() {
+; CHECK-LABEL: define {{[^@]+}}@foo
+; CHECK-SAME: () #[[ATTR1:[0-9]+]] {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[FN:%.*]] = alloca ptr, align 8, addrspace(5)
+; CHECK-NEXT: store ptr null, ptr addrspace(5) [[FN]], align 8
+; CHECK-NEXT: [[FN_CAST:%.*]] = addrspacecast ptr addrspace(5) [[FN]] to ptr
+; CHECK-NEXT: call void @get_fn(ptr [[FN_CAST]])
+; CHECK-NEXT: [[LOAD:%.*]] = load ptr, ptr addrspace(5) [[FN]], align 8
+; CHECK-NEXT: [[TOBOOL:%.*]] = icmp ne ptr [[LOAD]], null
+; CHECK-NEXT: br i1 [[TOBOOL]], label [[IF_THEN:%.*]], label [[IF_END:%.*]]
+; CHECK: if.then:
+; CHECK-NEXT: [[LOAD_1:%.*]] = load ptr, ptr addrspace(5) [[FN]], align 8
+; CHECK-NEXT: call void [[LOAD_1]]()
+; CHECK-NEXT: br label [[IF_END]]
+; CHECK: if.end:
+; CHECK-NEXT: ret void
+;
+entry:
+ %fn = alloca ptr, addrspace(5)
+ store ptr null, ptr addrspace(5) %fn
+ %fn.cast = addrspacecast ptr addrspace(5) %fn to ptr
+ call void @get_fn(ptr %fn.cast)
+ %load = load ptr, ptr addrspace(5) %fn
+ %tobool = icmp ne ptr %load, null
+ br i1 %tobool, label %if.then, label %if.end
+
+if.then:
+ %load.1 = load ptr, ptr addrspace(5) %fn
+ call void %load.1()
+ br label %if.end
+
+if.end:
+ ret void
+}
+;.
+; CHECK: attributes #[[ATTR0]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR1]] = { "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
+;.
diff --git a/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll b/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll
index 15f23eda241b..ee7f9375bf5d 100644
--- a/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll
+++ b/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll
@@ -43,7 +43,6 @@
; GCN-O0-NEXT: FunctionPass Manager
; GCN-O0-NEXT: Expand Atomic instructions
; GCN-O0-NEXT: Remove unreachable blocks from the CFG
-; GCN-O0-NEXT: Expand vector predication intrinsics
; GCN-O0-NEXT: Instrument function entry/exit with calls to e.g. mcount() (post inlining)
; GCN-O0-NEXT: Scalarize Masked Memory Intrinsics
; GCN-O0-NEXT: Expand reduction intrinsics
@@ -222,7 +221,6 @@
; GCN-O1-NEXT: Constant Hoisting
; GCN-O1-NEXT: Replace intrinsics with calls to vector library
; GCN-O1-NEXT: Partially inline calls to library functions
-; GCN-O1-NEXT: Expand vector predication intrinsics
; GCN-O1-NEXT: Instrument function entry/exit with calls to e.g. mcount() (post inlining)
; GCN-O1-NEXT: Scalarize Masked Memory Intrinsics
; GCN-O1-NEXT: Expand reduction intrinsics
@@ -508,7 +506,6 @@
; GCN-O1-OPTS-NEXT: Constant Hoisting
; GCN-O1-OPTS-NEXT: Replace intrinsics with calls to vector library
; GCN-O1-OPTS-NEXT: Partially inline calls to library functions
-; GCN-O1-OPTS-NEXT: Expand vector predication intrinsics
; GCN-O1-OPTS-NEXT: Instrument function entry/exit with calls to e.g. mcount() (post inlining)
; GCN-O1-OPTS-NEXT: Scalarize Masked Memory Intrinsics
; GCN-O1-OPTS-NEXT: Expand reduction intrinsics
@@ -813,7 +810,6 @@
; GCN-O2-NEXT: Constant Hoisting
; GCN-O2-NEXT: Replace intrinsics with calls to vector library
; GCN-O2-NEXT: Partially inline calls to library functions
-; GCN-O2-NEXT: Expand vector predication intrinsics
; GCN-O2-NEXT: Instrument function entry/exit with calls to e.g. mcount() (post inlining)
; GCN-O2-NEXT: Scalarize Masked Memory Intrinsics
; GCN-O2-NEXT: Expand reduction intrinsics
@@ -1126,7 +1122,6 @@
; GCN-O3-NEXT: Constant Hoisting
; GCN-O3-NEXT: Replace intrinsics with calls to vector library
; GCN-O3-NEXT: Partially inline calls to library functions
-; GCN-O3-NEXT: Expand vector predication intrinsics
; GCN-O3-NEXT: Instrument function entry/exit with calls to e.g. mcount() (post inlining)
; GCN-O3-NEXT: Scalarize Masked Memory Intrinsics
; GCN-O3-NEXT: Expand reduction intrinsics
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.buffer.load.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.buffer.load.ll
index 074489b9ff50..d085b3c768a8 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.buffer.load.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.buffer.load.ll
@@ -523,14 +523,23 @@ define amdgpu_ps void @s_buffer_load_imm_mergex2(<4 x i32> inreg %desc) {
; GFX67-NEXT: exp mrt0 v0, v1, v0, v0 done vm
; GFX67-NEXT: s_endpgm
;
-; GFX8910-LABEL: s_buffer_load_imm_mergex2:
-; GFX8910: ; %bb.0: ; %main_body
-; GFX8910-NEXT: s_buffer_load_dwordx2 s[0:1], s[0:3], 0x4
-; GFX8910-NEXT: s_waitcnt lgkmcnt(0)
-; GFX8910-NEXT: v_mov_b32_e32 v0, s0
-; GFX8910-NEXT: v_mov_b32_e32 v1, s1
-; GFX8910-NEXT: exp mrt0 v0, v1, v0, v0 done vm
-; GFX8910-NEXT: s_endpgm
+; GFX8-LABEL: s_buffer_load_imm_mergex2:
+; GFX8: ; %bb.0: ; %main_body
+; GFX8-NEXT: s_buffer_load_dwordx2 s[0:1], s[0:3], 0x4
+; GFX8-NEXT: s_waitcnt lgkmcnt(0)
+; GFX8-NEXT: v_mov_b32_e32 v0, s0
+; GFX8-NEXT: v_mov_b32_e32 v1, s1
+; GFX8-NEXT: exp mrt0 v0, v1, v0, v0 done vm
+; GFX8-NEXT: s_endpgm
+;
+; GFX910-LABEL: s_buffer_load_imm_mergex2:
+; GFX910: ; %bb.0: ; %main_body
+; GFX910-NEXT: s_buffer_load_dwordx2 s[4:5], s[0:3], 0x4
+; GFX910-NEXT: s_waitcnt lgkmcnt(0)
+; GFX910-NEXT: v_mov_b32_e32 v0, s4
+; GFX910-NEXT: v_mov_b32_e32 v1, s5
+; GFX910-NEXT: exp mrt0 v0, v1, v0, v0 done vm
+; GFX910-NEXT: s_endpgm
;
; GFX11-LABEL: s_buffer_load_imm_mergex2:
; GFX11: ; %bb.0: ; %main_body
@@ -570,16 +579,27 @@ define amdgpu_ps void @s_buffer_load_imm_mergex4(<4 x i32> inreg %desc) {
; GFX67-NEXT: exp mrt0 v0, v1, v2, v3 done vm
; GFX67-NEXT: s_endpgm
;
-; GFX8910-LABEL: s_buffer_load_imm_mergex4:
-; GFX8910: ; %bb.0: ; %main_body
-; GFX8910-NEXT: s_buffer_load_dwordx4 s[0:3], s[0:3], 0x8
-; GFX8910-NEXT: s_waitcnt lgkmcnt(0)
-; GFX8910-NEXT: v_mov_b32_e32 v0, s0
-; GFX8910-NEXT: v_mov_b32_e32 v1, s1
-; GFX8910-NEXT: v_mov_b32_e32 v2, s2
-; GFX8910-NEXT: v_mov_b32_e32 v3, s3
-; GFX8910-NEXT: exp mrt0 v0, v1, v2, v3 done vm
-; GFX8910-NEXT: s_endpgm
+; GFX8-LABEL: s_buffer_load_imm_mergex4:
+; GFX8: ; %bb.0: ; %main_body
+; GFX8-NEXT: s_buffer_load_dwordx4 s[0:3], s[0:3], 0x8
+; GFX8-NEXT: s_waitcnt lgkmcnt(0)
+; GFX8-NEXT: v_mov_b32_e32 v0, s0
+; GFX8-NEXT: v_mov_b32_e32 v1, s1
+; GFX8-NEXT: v_mov_b32_e32 v2, s2
+; GFX8-NEXT: v_mov_b32_e32 v3, s3
+; GFX8-NEXT: exp mrt0 v0, v1, v2, v3 done vm
+; GFX8-NEXT: s_endpgm
+;
+; GFX910-LABEL: s_buffer_load_imm_mergex4:
+; GFX910: ; %bb.0: ; %main_body
+; GFX910-NEXT: s_buffer_load_dwordx4 s[4:7], s[0:3], 0x8
+; GFX910-NEXT: s_waitcnt lgkmcnt(0)
+; GFX910-NEXT: v_mov_b32_e32 v0, s4
+; GFX910-NEXT: v_mov_b32_e32 v1, s5
+; GFX910-NEXT: v_mov_b32_e32 v2, s6
+; GFX910-NEXT: v_mov_b32_e32 v3, s7
+; GFX910-NEXT: exp mrt0 v0, v1, v2, v3 done vm
+; GFX910-NEXT: s_endpgm
;
; GFX11-LABEL: s_buffer_load_imm_mergex4:
; GFX11: ; %bb.0: ; %main_body
diff --git a/llvm/test/CodeGen/AMDGPU/merge-sbuffer-load.mir b/llvm/test/CodeGen/AMDGPU/merge-sbuffer-load.mir
index 1b2f672fd57b..02c1a328f482 100644
--- a/llvm/test/CodeGen/AMDGPU/merge-sbuffer-load.mir
+++ b/llvm/test/CodeGen/AMDGPU/merge-sbuffer-load.mir
@@ -1,14 +1,31 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
# RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -verify-machineinstrs -run-pass si-load-store-opt -o - %s | FileCheck %s -check-prefixes=CHECK,GFX10
# RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs -run-pass si-load-store-opt -o - %s | FileCheck %s -check-prefixes=CHECK,GFX12
-# CHECK-LABEL: name: merge_s_buffer_load_x2
-# CHECK: S_BUFFER_LOAD_DWORDX2_IMM %0, 0, 0 :: (dereferenceable invariant load (s64), align 4)
+---
name: merge_s_buffer_load_x2
tracksRegLiveness: true
body: |
bb.0:
liveins: $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GFX10-LABEL: name: merge_s_buffer_load_x2
+ ; GFX10: liveins: $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GFX10-NEXT: {{ $}}
+ ; GFX10-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GFX10-NEXT: early-clobber %3:sreg_64_xexec = S_BUFFER_LOAD_DWORDX2_IMM_ec [[COPY]], 0, 0 :: (dereferenceable invariant load (s64), align 4)
+ ; GFX10-NEXT: [[COPY1:%[0-9]+]]:sreg_32_xm0_xexec = COPY %3.sub0
+ ; GFX10-NEXT: [[COPY2:%[0-9]+]]:sreg_32_xm0_xexec = COPY killed %3.sub1
+ ; GFX10-NEXT: S_ENDPGM 0
+ ;
+ ; GFX12-LABEL: name: merge_s_buffer_load_x2
+ ; GFX12: liveins: $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GFX12-NEXT: {{ $}}
+ ; GFX12-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GFX12-NEXT: [[S_BUFFER_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_BUFFER_LOAD_DWORDX2_IMM [[COPY]], 0, 0 :: (dereferenceable invariant load (s64), align 4)
+ ; GFX12-NEXT: [[COPY1:%[0-9]+]]:sreg_32_xm0_xexec = COPY [[S_BUFFER_LOAD_DWORDX2_IMM]].sub0
+ ; GFX12-NEXT: [[COPY2:%[0-9]+]]:sreg_32_xm0_xexec = COPY killed [[S_BUFFER_LOAD_DWORDX2_IMM]].sub1
+ ; GFX12-NEXT: S_ENDPGM 0
%0:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3
%1:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM %0:sgpr_128, 0, 0 :: (dereferenceable invariant load (s32))
%2:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM %0:sgpr_128, 4, 0 :: (dereferenceable invariant load (s32))
@@ -17,15 +34,19 @@ body: |
...
---
-# CHECK-LABEL: name: merge_s_buffer_load_x1_x2
-# CHECK: S_BUFFER_LOAD_DWORD_IMM %0, 0, 0 :: (dereferenceable invariant load (s32))
-# CHECK: S_BUFFER_LOAD_DWORDX2_IMM %0, 4, 0 :: (dereferenceable invariant load (s64))
name: merge_s_buffer_load_x1_x2
tracksRegLiveness: true
body: |
bb.0:
liveins: $sgpr0_sgpr1_sgpr2_sgpr3
+ ; CHECK-LABEL: name: merge_s_buffer_load_x1_x2
+ ; CHECK: liveins: $sgpr0_sgpr1_sgpr2_sgpr3
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+ ; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[COPY]], 0, 0 :: (dereferenceable invariant load (s32))
+ ; CHECK-NEXT: [[S_BUFFER_LOAD_DWORDX2_IMM:%[0-9]+]]:sgpr_64 = S_BUFFER_LOAD_DWORDX2_IMM [[COPY]], 4, 0 :: (dereferenceable invariant load (s64))
+ ; CHECK-NEXT: S_ENDPGM 0
%0:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3
%1:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM %0:sgpr_128, 0, 0 :: (dereferenceable invariant load (s32))
%2:sgpr_64 = S_BUFFER_LOAD_DWORDX2_IMM %0:sgpr_128, 4, 0 :: (dereferenceable invariant load (s64))
@@ -34,16 +55,28 @@ body: |
...
---
-# CHECK-LABEL: name: merge_s_buffer_load_x2_x1
-# GFX10: S_BUFFER_LOAD_DWORDX2_IMM %0, 0, 0 :: (dereferenceable invariant load (s64))
-# GFX10: S_BUFFER_LOAD_DWORD_IMM %0, 8, 0 :: (dereferenceable invariant load (s32))
-# GFX12: S_BUFFER_LOAD_DWORDX3_IMM %0, 0, 0 :: (dereferenceable invariant load (s96), align 8)
name: merge_s_buffer_load_x2_x1
tracksRegLiveness: true
body: |
bb.0:
liveins: $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GFX10-LABEL: name: merge_s_buffer_load_x2_x1
+ ; GFX10: liveins: $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GFX10-NEXT: {{ $}}
+ ; GFX10-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GFX10-NEXT: [[S_BUFFER_LOAD_DWORDX2_IMM:%[0-9]+]]:sgpr_64 = S_BUFFER_LOAD_DWORDX2_IMM [[COPY]], 0, 0 :: (dereferenceable invariant load (s64))
+ ; GFX10-NEXT: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[COPY]], 8, 0 :: (dereferenceable invariant load (s32))
+ ; GFX10-NEXT: S_ENDPGM 0
+ ;
+ ; GFX12-LABEL: name: merge_s_buffer_load_x2_x1
+ ; GFX12: liveins: $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GFX12-NEXT: {{ $}}
+ ; GFX12-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GFX12-NEXT: [[S_BUFFER_LOAD_DWORDX3_IMM:%[0-9]+]]:sgpr_96 = S_BUFFER_LOAD_DWORDX3_IMM [[COPY]], 0, 0 :: (dereferenceable invariant load (s96), align 8)
+ ; GFX12-NEXT: [[COPY1:%[0-9]+]]:sgpr_64 = COPY [[S_BUFFER_LOAD_DWORDX3_IMM]].sub0_sub1
+ ; GFX12-NEXT: [[COPY2:%[0-9]+]]:sreg_32_xm0_xexec = COPY killed [[S_BUFFER_LOAD_DWORDX3_IMM]].sub2
+ ; GFX12-NEXT: S_ENDPGM 0
%0:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3
%1:sgpr_64 = S_BUFFER_LOAD_DWORDX2_IMM %0:sgpr_128, 0, 0 :: (dereferenceable invariant load (s64))
%2:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM %0:sgpr_128, 8, 0 :: (dereferenceable invariant load (s32))
@@ -52,14 +85,37 @@ body: |
...
---
-# CHECK-LABEL: name: merge_s_buffer_load_x4
-# CHECK: S_BUFFER_LOAD_DWORDX4_IMM %0, 0, 0 :: (dereferenceable invariant load (s128), align 4)
name: merge_s_buffer_load_x4
tracksRegLiveness: true
body: |
bb.0:
liveins: $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GFX10-LABEL: name: merge_s_buffer_load_x4
+ ; GFX10: liveins: $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GFX10-NEXT: {{ $}}
+ ; GFX10-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GFX10-NEXT: early-clobber %7:sgpr_128 = S_BUFFER_LOAD_DWORDX4_IMM_ec [[COPY]], 0, 0 :: (dereferenceable invariant load (s128), align 4)
+ ; GFX10-NEXT: [[COPY1:%[0-9]+]]:sreg_64_xexec = COPY %7.sub0_sub1
+ ; GFX10-NEXT: [[COPY2:%[0-9]+]]:sreg_64_xexec = COPY killed %7.sub2_sub3
+ ; GFX10-NEXT: [[COPY3:%[0-9]+]]:sreg_32_xm0_xexec = COPY [[COPY1]].sub0
+ ; GFX10-NEXT: [[COPY4:%[0-9]+]]:sreg_32_xm0_xexec = COPY killed [[COPY1]].sub1
+ ; GFX10-NEXT: [[COPY5:%[0-9]+]]:sreg_32_xm0_xexec = COPY [[COPY2]].sub0
+ ; GFX10-NEXT: [[COPY6:%[0-9]+]]:sreg_32_xm0_xexec = COPY killed [[COPY2]].sub1
+ ; GFX10-NEXT: S_ENDPGM 0
+ ;
+ ; GFX12-LABEL: name: merge_s_buffer_load_x4
+ ; GFX12: liveins: $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GFX12-NEXT: {{ $}}
+ ; GFX12-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GFX12-NEXT: [[S_BUFFER_LOAD_DWORDX4_IMM:%[0-9]+]]:sgpr_128 = S_BUFFER_LOAD_DWORDX4_IMM [[COPY]], 0, 0 :: (dereferenceable invariant load (s128), align 4)
+ ; GFX12-NEXT: [[COPY1:%[0-9]+]]:sgpr_96 = COPY [[S_BUFFER_LOAD_DWORDX4_IMM]].sub0_sub1_sub2
+ ; GFX12-NEXT: [[COPY2:%[0-9]+]]:sreg_32_xm0_xexec = COPY killed [[S_BUFFER_LOAD_DWORDX4_IMM]].sub3
+ ; GFX12-NEXT: [[COPY3:%[0-9]+]]:sreg_64_xexec = COPY [[COPY1]].sub0_sub1
+ ; GFX12-NEXT: [[COPY4:%[0-9]+]]:sreg_32_xm0_xexec = COPY killed [[COPY1]].sub2
+ ; GFX12-NEXT: [[COPY5:%[0-9]+]]:sreg_32_xm0_xexec = COPY [[COPY3]].sub0
+ ; GFX12-NEXT: [[COPY6:%[0-9]+]]:sreg_32_xm0_xexec = COPY killed [[COPY3]].sub1
+ ; GFX12-NEXT: S_ENDPGM 0
%0:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3
%1:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM %0:sgpr_128, 0, 0 :: (dereferenceable invariant load (s32))
%2:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM %0:sgpr_128, 4, 0 :: (dereferenceable invariant load (s32))
@@ -70,15 +126,19 @@ body: |
...
---
-# CHECK-LABEL: name: merge_s_buffer_load_x1_x3
-# CHECK: S_BUFFER_LOAD_DWORD_IMM %0, 0, 0 :: (dereferenceable invariant load (s32))
-# CHECK: S_BUFFER_LOAD_DWORDX3_IMM %0, 4, 0 :: (dereferenceable invariant load (s96), align 16)
name: merge_s_buffer_load_x1_x3
tracksRegLiveness: true
body: |
bb.0:
liveins: $sgpr0_sgpr1_sgpr2_sgpr3
+ ; CHECK-LABEL: name: merge_s_buffer_load_x1_x3
+ ; CHECK: liveins: $sgpr0_sgpr1_sgpr2_sgpr3
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+ ; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[COPY]], 0, 0 :: (dereferenceable invariant load (s32))
+ ; CHECK-NEXT: [[S_BUFFER_LOAD_DWORDX3_IMM:%[0-9]+]]:sgpr_96 = S_BUFFER_LOAD_DWORDX3_IMM [[COPY]], 4, 0 :: (dereferenceable invariant load (s96), align 16)
+ ; CHECK-NEXT: S_ENDPGM 0
%0:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3
%1:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM %0:sgpr_128, 0, 0 :: (dereferenceable invariant load (s32))
%2:sgpr_96 = S_BUFFER_LOAD_DWORDX3_IMM %0:sgpr_128, 4, 0 :: (dereferenceable invariant load (s96))
@@ -87,14 +147,20 @@ body: |
...
---
-# CHECK-LABEL: name: merge_s_buffer_load_x3_x1
-# CHECK: S_BUFFER_LOAD_DWORDX4_IMM %0, 0, 0 :: (dereferenceable invariant load (s128))
name: merge_s_buffer_load_x3_x1
tracksRegLiveness: true
body: |
bb.0:
liveins: $sgpr0_sgpr1_sgpr2_sgpr3
+ ; CHECK-LABEL: name: merge_s_buffer_load_x3_x1
+ ; CHECK: liveins: $sgpr0_sgpr1_sgpr2_sgpr3
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+ ; CHECK-NEXT: [[S_BUFFER_LOAD_DWORDX4_IMM:%[0-9]+]]:sgpr_128 = S_BUFFER_LOAD_DWORDX4_IMM [[COPY]], 0, 0 :: (dereferenceable invariant load (s128))
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_96 = COPY [[S_BUFFER_LOAD_DWORDX4_IMM]].sub0_sub1_sub2
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32_xm0_xexec = COPY killed [[S_BUFFER_LOAD_DWORDX4_IMM]].sub3
+ ; CHECK-NEXT: S_ENDPGM 0
%0:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3
%1:sgpr_96 = S_BUFFER_LOAD_DWORDX3_IMM %0:sgpr_128, 0, 0 :: (dereferenceable invariant load (s96))
%2:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM %0:sgpr_128, 12, 0 :: (dereferenceable invariant load (s32))
@@ -103,14 +169,53 @@ body: |
...
---
-# CHECK-LABEL: name: merge_s_buffer_load_x8
-# CHECK: S_BUFFER_LOAD_DWORDX8_IMM %0, 0, 0 :: (dereferenceable invariant load (s256), align 4)
name: merge_s_buffer_load_x8
tracksRegLiveness: true
body: |
bb.0:
liveins: $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GFX10-LABEL: name: merge_s_buffer_load_x8
+ ; GFX10: liveins: $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GFX10-NEXT: {{ $}}
+ ; GFX10-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GFX10-NEXT: early-clobber %15:sgpr_256 = S_BUFFER_LOAD_DWORDX8_IMM_ec [[COPY]], 0, 0 :: (dereferenceable invariant load (s256), align 4)
+ ; GFX10-NEXT: [[COPY1:%[0-9]+]]:sgpr_128 = COPY %15.sub0_sub1_sub2_sub3
+ ; GFX10-NEXT: [[COPY2:%[0-9]+]]:sgpr_128 = COPY killed %15.sub4_sub5_sub6_sub7
+ ; GFX10-NEXT: [[COPY3:%[0-9]+]]:sreg_64_xexec = COPY [[COPY1]].sub0_sub1
+ ; GFX10-NEXT: [[COPY4:%[0-9]+]]:sreg_64_xexec = COPY killed [[COPY1]].sub2_sub3
+ ; GFX10-NEXT: [[COPY5:%[0-9]+]]:sreg_32_xm0_xexec = COPY [[COPY3]].sub0
+ ; GFX10-NEXT: [[COPY6:%[0-9]+]]:sreg_32_xm0_xexec = COPY killed [[COPY3]].sub1
+ ; GFX10-NEXT: [[COPY7:%[0-9]+]]:sreg_32_xm0_xexec = COPY [[COPY4]].sub0
+ ; GFX10-NEXT: [[COPY8:%[0-9]+]]:sreg_32_xm0_xexec = COPY killed [[COPY4]].sub1
+ ; GFX10-NEXT: [[COPY9:%[0-9]+]]:sreg_64_xexec = COPY [[COPY2]].sub0_sub1
+ ; GFX10-NEXT: [[COPY10:%[0-9]+]]:sreg_64_xexec = COPY killed [[COPY2]].sub2_sub3
+ ; GFX10-NEXT: [[COPY11:%[0-9]+]]:sreg_32_xm0_xexec = COPY [[COPY9]].sub0
+ ; GFX10-NEXT: [[COPY12:%[0-9]+]]:sreg_32_xm0_xexec = COPY killed [[COPY9]].sub1
+ ; GFX10-NEXT: [[COPY13:%[0-9]+]]:sreg_32_xm0_xexec = COPY [[COPY10]].sub0
+ ; GFX10-NEXT: [[COPY14:%[0-9]+]]:sreg_32_xm0_xexec = COPY killed [[COPY10]].sub1
+ ; GFX10-NEXT: S_ENDPGM 0
+ ;
+ ; GFX12-LABEL: name: merge_s_buffer_load_x8
+ ; GFX12: liveins: $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GFX12-NEXT: {{ $}}
+ ; GFX12-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GFX12-NEXT: [[S_BUFFER_LOAD_DWORDX8_IMM:%[0-9]+]]:sgpr_256 = S_BUFFER_LOAD_DWORDX8_IMM [[COPY]], 0, 0 :: (dereferenceable invariant load (s256), align 4)
+ ; GFX12-NEXT: [[COPY1:%[0-9]+]]:sgpr_128 = COPY [[S_BUFFER_LOAD_DWORDX8_IMM]].sub0_sub1_sub2_sub3
+ ; GFX12-NEXT: [[COPY2:%[0-9]+]]:sgpr_128 = COPY killed [[S_BUFFER_LOAD_DWORDX8_IMM]].sub4_sub5_sub6_sub7
+ ; GFX12-NEXT: [[COPY3:%[0-9]+]]:sgpr_96 = COPY [[COPY1]].sub0_sub1_sub2
+ ; GFX12-NEXT: [[COPY4:%[0-9]+]]:sreg_32_xm0_xexec = COPY killed [[COPY1]].sub3
+ ; GFX12-NEXT: [[COPY5:%[0-9]+]]:sreg_64_xexec = COPY [[COPY3]].sub0_sub1
+ ; GFX12-NEXT: [[COPY6:%[0-9]+]]:sreg_32_xm0_xexec = COPY killed [[COPY3]].sub2
+ ; GFX12-NEXT: [[COPY7:%[0-9]+]]:sreg_32_xm0_xexec = COPY [[COPY5]].sub0
+ ; GFX12-NEXT: [[COPY8:%[0-9]+]]:sreg_32_xm0_xexec = COPY killed [[COPY5]].sub1
+ ; GFX12-NEXT: [[COPY9:%[0-9]+]]:sgpr_96 = COPY [[COPY2]].sub0_sub1_sub2
+ ; GFX12-NEXT: [[COPY10:%[0-9]+]]:sreg_32_xm0_xexec = COPY killed [[COPY2]].sub3
+ ; GFX12-NEXT: [[COPY11:%[0-9]+]]:sreg_64_xexec = COPY [[COPY9]].sub0_sub1
+ ; GFX12-NEXT: [[COPY12:%[0-9]+]]:sreg_32_xm0_xexec = COPY killed [[COPY9]].sub2
+ ; GFX12-NEXT: [[COPY13:%[0-9]+]]:sreg_32_xm0_xexec = COPY [[COPY11]].sub0
+ ; GFX12-NEXT: [[COPY14:%[0-9]+]]:sreg_32_xm0_xexec = COPY killed [[COPY11]].sub1
+ ; GFX12-NEXT: S_ENDPGM 0
%0:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3
%1:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM %0:sgpr_128, 0, 0 :: (dereferenceable invariant load (s32))
%2:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM %0:sgpr_128, 4, 0 :: (dereferenceable invariant load (s32))
@@ -125,14 +230,53 @@ body: |
...
---
-# CHECK-LABEL: name: merge_s_buffer_load_x8_reordered
-# CHECK: S_BUFFER_LOAD_DWORDX8_IMM %0, 0, 0 :: (dereferenceable invariant load (s256), align 4)
name: merge_s_buffer_load_x8_reordered
tracksRegLiveness: true
body: |
bb.0:
liveins: $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GFX10-LABEL: name: merge_s_buffer_load_x8_reordered
+ ; GFX10: liveins: $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GFX10-NEXT: {{ $}}
+ ; GFX10-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GFX10-NEXT: early-clobber %15:sgpr_256 = S_BUFFER_LOAD_DWORDX8_IMM_ec [[COPY]], 0, 0 :: (dereferenceable invariant load (s256), align 4)
+ ; GFX10-NEXT: [[COPY1:%[0-9]+]]:sgpr_128 = COPY %15.sub4_sub5_sub6_sub7
+ ; GFX10-NEXT: [[COPY2:%[0-9]+]]:sgpr_128 = COPY killed %15.sub0_sub1_sub2_sub3
+ ; GFX10-NEXT: [[COPY3:%[0-9]+]]:sreg_64_xexec = COPY [[COPY1]].sub0_sub1
+ ; GFX10-NEXT: [[COPY4:%[0-9]+]]:sreg_64_xexec = COPY killed [[COPY1]].sub2_sub3
+ ; GFX10-NEXT: [[COPY5:%[0-9]+]]:sreg_32_xm0_xexec = COPY [[COPY3]].sub1
+ ; GFX10-NEXT: [[COPY6:%[0-9]+]]:sreg_32_xm0_xexec = COPY killed [[COPY3]].sub0
+ ; GFX10-NEXT: [[COPY7:%[0-9]+]]:sreg_64_xexec = COPY [[COPY2]].sub0_sub1
+ ; GFX10-NEXT: [[COPY8:%[0-9]+]]:sreg_64_xexec = COPY killed [[COPY2]].sub2_sub3
+ ; GFX10-NEXT: [[COPY9:%[0-9]+]]:sreg_32_xm0_xexec = COPY [[COPY7]].sub1
+ ; GFX10-NEXT: [[COPY10:%[0-9]+]]:sreg_32_xm0_xexec = COPY killed [[COPY7]].sub0
+ ; GFX10-NEXT: [[COPY11:%[0-9]+]]:sreg_32_xm0_xexec = COPY [[COPY4]].sub1
+ ; GFX10-NEXT: [[COPY12:%[0-9]+]]:sreg_32_xm0_xexec = COPY killed [[COPY4]].sub0
+ ; GFX10-NEXT: [[COPY13:%[0-9]+]]:sreg_32_xm0_xexec = COPY [[COPY8]].sub1
+ ; GFX10-NEXT: [[COPY14:%[0-9]+]]:sreg_32_xm0_xexec = COPY killed [[COPY8]].sub0
+ ; GFX10-NEXT: S_ENDPGM 0
+ ;
+ ; GFX12-LABEL: name: merge_s_buffer_load_x8_reordered
+ ; GFX12: liveins: $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GFX12-NEXT: {{ $}}
+ ; GFX12-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GFX12-NEXT: [[S_BUFFER_LOAD_DWORDX8_IMM:%[0-9]+]]:sgpr_256 = S_BUFFER_LOAD_DWORDX8_IMM [[COPY]], 0, 0 :: (dereferenceable invariant load (s256), align 4)
+ ; GFX12-NEXT: [[COPY1:%[0-9]+]]:sgpr_128 = COPY [[S_BUFFER_LOAD_DWORDX8_IMM]].sub4_sub5_sub6_sub7
+ ; GFX12-NEXT: [[COPY2:%[0-9]+]]:sgpr_128 = COPY killed [[S_BUFFER_LOAD_DWORDX8_IMM]].sub0_sub1_sub2_sub3
+ ; GFX12-NEXT: [[COPY3:%[0-9]+]]:sgpr_96 = COPY [[COPY1]].sub0_sub1_sub2
+ ; GFX12-NEXT: [[COPY4:%[0-9]+]]:sreg_32_xm0_xexec = COPY killed [[COPY1]].sub3
+ ; GFX12-NEXT: [[COPY5:%[0-9]+]]:sreg_64_xexec = COPY [[COPY3]].sub0_sub1
+ ; GFX12-NEXT: [[COPY6:%[0-9]+]]:sreg_32_xm0_xexec = COPY killed [[COPY3]].sub2
+ ; GFX12-NEXT: [[COPY7:%[0-9]+]]:sreg_32_xm0_xexec = COPY [[COPY5]].sub1
+ ; GFX12-NEXT: [[COPY8:%[0-9]+]]:sreg_32_xm0_xexec = COPY killed [[COPY5]].sub0
+ ; GFX12-NEXT: [[COPY9:%[0-9]+]]:sgpr_96 = COPY [[COPY2]].sub0_sub1_sub2
+ ; GFX12-NEXT: [[COPY10:%[0-9]+]]:sreg_32_xm0_xexec = COPY killed [[COPY2]].sub3
+ ; GFX12-NEXT: [[COPY11:%[0-9]+]]:sreg_64_xexec = COPY [[COPY9]].sub0_sub1
+ ; GFX12-NEXT: [[COPY12:%[0-9]+]]:sreg_32_xm0_xexec = COPY killed [[COPY9]].sub2
+ ; GFX12-NEXT: [[COPY13:%[0-9]+]]:sreg_32_xm0_xexec = COPY [[COPY11]].sub1
+ ; GFX12-NEXT: [[COPY14:%[0-9]+]]:sreg_32_xm0_xexec = COPY killed [[COPY11]].sub0
+ ; GFX12-NEXT: S_ENDPGM 0
%0:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3
%1:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM %0:sgpr_128, 20, 0 :: (dereferenceable invariant load (s32))
%2:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM %0:sgpr_128, 4, 0 :: (dereferenceable invariant load (s32))
@@ -147,14 +291,37 @@ body: |
...
---
-# CHECK-LABEL: name: merge_s_buffer_load_x8_out_of_x2
-# CHECK: S_BUFFER_LOAD_DWORDX8_IMM %0, 0, 0 :: (dereferenceable invariant load (s256), align 8)
name: merge_s_buffer_load_x8_out_of_x2
tracksRegLiveness: true
body: |
bb.0:
liveins: $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GFX10-LABEL: name: merge_s_buffer_load_x8_out_of_x2
+ ; GFX10: liveins: $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GFX10-NEXT: {{ $}}
+ ; GFX10-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GFX10-NEXT: early-clobber %7:sgpr_256 = S_BUFFER_LOAD_DWORDX8_IMM_ec [[COPY]], 0, 0 :: (dereferenceable invariant load (s256), align 8)
+ ; GFX10-NEXT: [[COPY1:%[0-9]+]]:sgpr_128 = COPY %7.sub4_sub5_sub6_sub7
+ ; GFX10-NEXT: [[COPY2:%[0-9]+]]:sgpr_128 = COPY killed %7.sub0_sub1_sub2_sub3
+ ; GFX10-NEXT: [[COPY3:%[0-9]+]]:sgpr_64 = COPY [[COPY1]].sub0_sub1
+ ; GFX10-NEXT: [[COPY4:%[0-9]+]]:sgpr_64 = COPY killed [[COPY1]].sub2_sub3
+ ; GFX10-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY [[COPY2]].sub2_sub3
+ ; GFX10-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY killed [[COPY2]].sub0_sub1
+ ; GFX10-NEXT: S_ENDPGM 0
+ ;
+ ; GFX12-LABEL: name: merge_s_buffer_load_x8_out_of_x2
+ ; GFX12: liveins: $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GFX12-NEXT: {{ $}}
+ ; GFX12-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GFX12-NEXT: [[S_BUFFER_LOAD_DWORDX8_IMM:%[0-9]+]]:sgpr_256 = S_BUFFER_LOAD_DWORDX8_IMM [[COPY]], 0, 0 :: (dereferenceable invariant load (s256), align 8)
+ ; GFX12-NEXT: [[COPY1:%[0-9]+]]:sgpr_128 = COPY [[S_BUFFER_LOAD_DWORDX8_IMM]].sub4_sub5_sub6_sub7
+ ; GFX12-NEXT: [[COPY2:%[0-9]+]]:sgpr_128 = COPY killed [[S_BUFFER_LOAD_DWORDX8_IMM]].sub0_sub1_sub2_sub3
+ ; GFX12-NEXT: [[COPY3:%[0-9]+]]:sgpr_64 = COPY [[COPY1]].sub0_sub1
+ ; GFX12-NEXT: [[COPY4:%[0-9]+]]:sgpr_64 = COPY killed [[COPY1]].sub2_sub3
+ ; GFX12-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY [[COPY2]].sub2_sub3
+ ; GFX12-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY killed [[COPY2]].sub0_sub1
+ ; GFX12-NEXT: S_ENDPGM 0
%0:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3
%1:sgpr_64 = S_BUFFER_LOAD_DWORDX2_IMM %0:sgpr_128, 16, 0 :: (dereferenceable invariant load (s64))
%2:sgpr_64 = S_BUFFER_LOAD_DWORDX2_IMM %0:sgpr_128, 8, 0 :: (dereferenceable invariant load (s64))
@@ -165,14 +332,29 @@ body: |
...
---
-# CHECK-LABEL: name: merge_s_buffer_load_x8_out_of_x4
-# CHECK: S_BUFFER_LOAD_DWORDX8_IMM %0, 0, 0 :: (dereferenceable invariant load (s256), align 16)
name: merge_s_buffer_load_x8_out_of_x4
tracksRegLiveness: true
body: |
bb.0:
liveins: $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GFX10-LABEL: name: merge_s_buffer_load_x8_out_of_x4
+ ; GFX10: liveins: $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GFX10-NEXT: {{ $}}
+ ; GFX10-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GFX10-NEXT: early-clobber %3:sgpr_256 = S_BUFFER_LOAD_DWORDX8_IMM_ec [[COPY]], 0, 0 :: (dereferenceable invariant load (s256), align 16)
+ ; GFX10-NEXT: [[COPY1:%[0-9]+]]:sgpr_128 = COPY %3.sub0_sub1_sub2_sub3
+ ; GFX10-NEXT: [[COPY2:%[0-9]+]]:sgpr_128 = COPY killed %3.sub4_sub5_sub6_sub7
+ ; GFX10-NEXT: S_ENDPGM 0
+ ;
+ ; GFX12-LABEL: name: merge_s_buffer_load_x8_out_of_x4
+ ; GFX12: liveins: $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GFX12-NEXT: {{ $}}
+ ; GFX12-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GFX12-NEXT: [[S_BUFFER_LOAD_DWORDX8_IMM:%[0-9]+]]:sgpr_256 = S_BUFFER_LOAD_DWORDX8_IMM [[COPY]], 0, 0 :: (dereferenceable invariant load (s256), align 16)
+ ; GFX12-NEXT: [[COPY1:%[0-9]+]]:sgpr_128 = COPY [[S_BUFFER_LOAD_DWORDX8_IMM]].sub0_sub1_sub2_sub3
+ ; GFX12-NEXT: [[COPY2:%[0-9]+]]:sgpr_128 = COPY killed [[S_BUFFER_LOAD_DWORDX8_IMM]].sub4_sub5_sub6_sub7
+ ; GFX12-NEXT: S_ENDPGM 0
%0:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3
%1:sgpr_128 = S_BUFFER_LOAD_DWORDX4_IMM %0:sgpr_128, 0, 0 :: (dereferenceable invariant load (s128))
%2:sgpr_128 = S_BUFFER_LOAD_DWORDX4_IMM %0:sgpr_128, 16, 0 :: (dereferenceable invariant load (s128))
@@ -181,14 +363,37 @@ body: |
...
---
-# CHECK-LABEL: name: merge_s_buffer_load_x8_mixed
-# CHECK: S_BUFFER_LOAD_DWORDX8_IMM %0, 0, 0 :: (dereferenceable invariant load (s256), align 16)
name: merge_s_buffer_load_x8_mixed
tracksRegLiveness: true
body: |
bb.0:
liveins: $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GFX10-LABEL: name: merge_s_buffer_load_x8_mixed
+ ; GFX10: liveins: $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GFX10-NEXT: {{ $}}
+ ; GFX10-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GFX10-NEXT: early-clobber %7:sgpr_256 = S_BUFFER_LOAD_DWORDX8_IMM_ec [[COPY]], 0, 0 :: (dereferenceable invariant load (s256), align 16)
+ ; GFX10-NEXT: [[COPY1:%[0-9]+]]:sgpr_128 = COPY %7.sub0_sub1_sub2_sub3
+ ; GFX10-NEXT: [[COPY2:%[0-9]+]]:sgpr_128 = COPY killed %7.sub4_sub5_sub6_sub7
+ ; GFX10-NEXT: [[COPY3:%[0-9]+]]:sreg_64_xexec = COPY [[COPY2]].sub0_sub1
+ ; GFX10-NEXT: [[COPY4:%[0-9]+]]:sgpr_64 = COPY killed [[COPY2]].sub2_sub3
+ ; GFX10-NEXT: [[COPY5:%[0-9]+]]:sreg_32_xm0_xexec = COPY [[COPY3]].sub0
+ ; GFX10-NEXT: [[COPY6:%[0-9]+]]:sreg_32_xm0_xexec = COPY killed [[COPY3]].sub1
+ ; GFX10-NEXT: S_ENDPGM 0
+ ;
+ ; GFX12-LABEL: name: merge_s_buffer_load_x8_mixed
+ ; GFX12: liveins: $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GFX12-NEXT: {{ $}}
+ ; GFX12-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GFX12-NEXT: [[S_BUFFER_LOAD_DWORDX8_IMM:%[0-9]+]]:sgpr_256 = S_BUFFER_LOAD_DWORDX8_IMM [[COPY]], 0, 0 :: (dereferenceable invariant load (s256), align 16)
+ ; GFX12-NEXT: [[COPY1:%[0-9]+]]:sgpr_128 = COPY [[S_BUFFER_LOAD_DWORDX8_IMM]].sub0_sub1_sub2_sub3
+ ; GFX12-NEXT: [[COPY2:%[0-9]+]]:sgpr_128 = COPY killed [[S_BUFFER_LOAD_DWORDX8_IMM]].sub4_sub5_sub6_sub7
+ ; GFX12-NEXT: [[COPY3:%[0-9]+]]:sreg_64_xexec = COPY [[COPY2]].sub0_sub1
+ ; GFX12-NEXT: [[COPY4:%[0-9]+]]:sgpr_64 = COPY killed [[COPY2]].sub2_sub3
+ ; GFX12-NEXT: [[COPY5:%[0-9]+]]:sreg_32_xm0_xexec = COPY [[COPY3]].sub0
+ ; GFX12-NEXT: [[COPY6:%[0-9]+]]:sreg_32_xm0_xexec = COPY killed [[COPY3]].sub1
+ ; GFX12-NEXT: S_ENDPGM 0
%0:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3
%1:sgpr_128 = S_BUFFER_LOAD_DWORDX4_IMM %0:sgpr_128, 0, 0 :: (dereferenceable invariant load (s128))
%2:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM %0:sgpr_128, 16, 0 :: (dereferenceable invariant load (s32))
@@ -199,14 +404,39 @@ body: |
...
---
-# CHECK-LABEL: name: merge_s_buffer_load_sgpr_imm
-# CHECK: S_BUFFER_LOAD_DWORDX4_SGPR_IMM %0, %1, 0, 0 :: (dereferenceable invariant load (s128), align 4)
name: merge_s_buffer_load_sgpr_imm
tracksRegLiveness: true
body: |
bb.0:
liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4
+ ; GFX10-LABEL: name: merge_s_buffer_load_sgpr_imm
+ ; GFX10: liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4
+ ; GFX10-NEXT: {{ $}}
+ ; GFX10-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GFX10-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr4
+ ; GFX10-NEXT: early-clobber %8:sgpr_128 = S_BUFFER_LOAD_DWORDX4_SGPR_IMM_ec [[COPY]], [[COPY1]], 0, 0 :: (dereferenceable invariant load (s128), align 4)
+ ; GFX10-NEXT: [[COPY2:%[0-9]+]]:sreg_64_xexec = COPY %8.sub0_sub1
+ ; GFX10-NEXT: [[COPY3:%[0-9]+]]:sreg_64_xexec = COPY killed %8.sub2_sub3
+ ; GFX10-NEXT: [[COPY4:%[0-9]+]]:sreg_32_xm0_xexec = COPY [[COPY2]].sub0
+ ; GFX10-NEXT: [[COPY5:%[0-9]+]]:sreg_32_xm0_xexec = COPY killed [[COPY2]].sub1
+ ; GFX10-NEXT: [[COPY6:%[0-9]+]]:sreg_32_xm0_xexec = COPY [[COPY3]].sub0
+ ; GFX10-NEXT: [[COPY7:%[0-9]+]]:sreg_32_xm0_xexec = COPY killed [[COPY3]].sub1
+ ; GFX10-NEXT: S_ENDPGM 0
+ ;
+ ; GFX12-LABEL: name: merge_s_buffer_load_sgpr_imm
+ ; GFX12: liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4
+ ; GFX12-NEXT: {{ $}}
+ ; GFX12-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GFX12-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr4
+ ; GFX12-NEXT: [[S_BUFFER_LOAD_DWORDX4_SGPR_IMM:%[0-9]+]]:sgpr_128 = S_BUFFER_LOAD_DWORDX4_SGPR_IMM [[COPY]], [[COPY1]], 0, 0 :: (dereferenceable invariant load (s128), align 4)
+ ; GFX12-NEXT: [[COPY2:%[0-9]+]]:sgpr_96 = COPY [[S_BUFFER_LOAD_DWORDX4_SGPR_IMM]].sub0_sub1_sub2
+ ; GFX12-NEXT: [[COPY3:%[0-9]+]]:sreg_32_xm0_xexec = COPY killed [[S_BUFFER_LOAD_DWORDX4_SGPR_IMM]].sub3
+ ; GFX12-NEXT: [[COPY4:%[0-9]+]]:sreg_64_xexec = COPY [[COPY2]].sub0_sub1
+ ; GFX12-NEXT: [[COPY5:%[0-9]+]]:sreg_32_xm0_xexec = COPY killed [[COPY2]].sub2
+ ; GFX12-NEXT: [[COPY6:%[0-9]+]]:sreg_32_xm0_xexec = COPY [[COPY4]].sub0
+ ; GFX12-NEXT: [[COPY7:%[0-9]+]]:sreg_32_xm0_xexec = COPY killed [[COPY4]].sub1
+ ; GFX12-NEXT: S_ENDPGM 0
%0:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3
%1:sreg_32 = COPY $sgpr4
%2:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR_IMM %0:sgpr_128, %1:sreg_32, 0, 0 :: (dereferenceable invariant load (s32))
@@ -218,15 +448,21 @@ body: |
...
---
-# CHECK-LABEL: name: no_merge_for_different_soffsets
-# CHECK: S_BUFFER_LOAD_DWORD_SGPR_IMM %0, %1, 4, 0 :: (dereferenceable invariant load (s32))
-# CHECK: S_BUFFER_LOAD_DWORD_SGPR_IMM %0, %2, 8, 0 :: (dereferenceable invariant load (s32))
name: no_merge_for_different_soffsets
tracksRegLiveness: true
body: |
bb.0:
liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, $sgpr5
+ ; CHECK-LABEL: name: no_merge_for_different_soffsets
+ ; CHECK: liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, $sgpr5
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr4
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr5
+ ; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR_IMM [[COPY]], [[COPY1]], 4, 0 :: (dereferenceable invariant load (s32))
+ ; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR_IMM1:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR_IMM [[COPY]], [[COPY2]], 8, 0 :: (dereferenceable invariant load (s32))
+ ; CHECK-NEXT: S_ENDPGM 0
%0:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3
%1:sreg_32 = COPY $sgpr4
%2:sreg_32 = COPY $sgpr5
@@ -237,15 +473,20 @@ body: |
...
---
-# CHECK-LABEL: name: no_merge_for_non_adjacent_offsets
-# CHECK: S_BUFFER_LOAD_DWORD_SGPR_IMM %0, %1, 4, 0 :: (dereferenceable invariant load (s32))
-# CHECK: S_BUFFER_LOAD_DWORD_SGPR_IMM %0, %1, 12, 0 :: (dereferenceable invariant load (s32))
name: no_merge_for_non_adjacent_offsets
tracksRegLiveness: true
body: |
bb.0:
liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4
+ ; CHECK-LABEL: name: no_merge_for_non_adjacent_offsets
+ ; CHECK: liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr4
+ ; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR_IMM [[COPY]], [[COPY1]], 4, 0 :: (dereferenceable invariant load (s32))
+ ; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR_IMM1:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR_IMM [[COPY]], [[COPY1]], 12, 0 :: (dereferenceable invariant load (s32))
+ ; CHECK-NEXT: S_ENDPGM 0
%0:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3
%1:sreg_32 = COPY $sgpr4
%2:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR_IMM %0:sgpr_128, %1:sreg_32, 4, 0 :: (dereferenceable invariant load (s32))
@@ -253,4 +494,420 @@ body: |
S_ENDPGM 0
...
+
+# The constrained multi-dword buffer load merge tests.
+
+---
+name: merge_s_buffer_load_x1_x2ec
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $sgpr0_sgpr1_sgpr2_sgpr3
+
+ ; CHECK-LABEL: name: merge_s_buffer_load_x1_x2ec
+ ; CHECK: liveins: $sgpr0_sgpr1_sgpr2_sgpr3
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+ ; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[COPY]], 0, 0 :: (dereferenceable invariant load (s32))
+ ; CHECK-NEXT: early-clobber %2:sgpr_64 = S_BUFFER_LOAD_DWORDX2_IMM_ec [[COPY]], 4, 0 :: (dereferenceable invariant load (s64))
+ ; CHECK-NEXT: S_ENDPGM 0
+ %0:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+ %1:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM %0:sgpr_128, 0, 0 :: (dereferenceable invariant load (s32))
+ early-clobber %2:sgpr_64 = S_BUFFER_LOAD_DWORDX2_IMM_ec %0:sgpr_128, 4, 0 :: (dereferenceable invariant load (s64))
+
+ S_ENDPGM 0
+...
+---
+
+name: merge_s_buffer_load_x2ec_x1
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $sgpr0_sgpr1_sgpr2_sgpr3
+
+ ; GFX10-LABEL: name: merge_s_buffer_load_x2ec_x1
+ ; GFX10: liveins: $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GFX10-NEXT: {{ $}}
+ ; GFX10-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GFX10-NEXT: early-clobber %1:sgpr_64 = S_BUFFER_LOAD_DWORDX2_IMM_ec [[COPY]], 0, 0 :: (dereferenceable invariant load (s64))
+ ; GFX10-NEXT: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[COPY]], 8, 0 :: (dereferenceable invariant load (s32))
+ ; GFX10-NEXT: S_ENDPGM 0
+ ;
+ ; GFX12-LABEL: name: merge_s_buffer_load_x2ec_x1
+ ; GFX12: liveins: $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GFX12-NEXT: {{ $}}
+ ; GFX12-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GFX12-NEXT: [[S_BUFFER_LOAD_DWORDX3_IMM:%[0-9]+]]:sgpr_96 = S_BUFFER_LOAD_DWORDX3_IMM [[COPY]], 0, 0 :: (dereferenceable invariant load (s96), align 8)
+ ; GFX12-NEXT: [[COPY1:%[0-9]+]]:sgpr_64 = COPY [[S_BUFFER_LOAD_DWORDX3_IMM]].sub0_sub1
+ ; GFX12-NEXT: [[COPY2:%[0-9]+]]:sreg_32_xm0_xexec = COPY killed [[S_BUFFER_LOAD_DWORDX3_IMM]].sub2
+ ; GFX12-NEXT: S_ENDPGM 0
+ %0:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+ early-clobber %1:sgpr_64 = S_BUFFER_LOAD_DWORDX2_IMM_ec %0:sgpr_128, 0, 0 :: (dereferenceable invariant load (s64))
+ %2:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM %0:sgpr_128, 8, 0 :: (dereferenceable invariant load (s32))
+
+ S_ENDPGM 0
+...
+---
+
+name: merge_s_buffer_load_x1_x3ec
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $sgpr0_sgpr1_sgpr2_sgpr3
+
+ ; CHECK-LABEL: name: merge_s_buffer_load_x1_x3ec
+ ; CHECK: liveins: $sgpr0_sgpr1_sgpr2_sgpr3
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+ ; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[COPY]], 0, 0 :: (dereferenceable invariant load (s32))
+ ; CHECK-NEXT: early-clobber %2:sgpr_96 = S_BUFFER_LOAD_DWORDX3_IMM_ec [[COPY]], 4, 0 :: (dereferenceable invariant load (s96), align 16)
+ ; CHECK-NEXT: S_ENDPGM 0
+ %0:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+ %1:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM %0:sgpr_128, 0, 0 :: (dereferenceable invariant load (s32))
+ early-clobber %2:sgpr_96 = S_BUFFER_LOAD_DWORDX3_IMM_ec %0:sgpr_128, 4, 0 :: (dereferenceable invariant load (s96))
+
+ S_ENDPGM 0
+...
+---
+
+name: merge_s_buffer_load_x3ec_x1
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $sgpr0_sgpr1_sgpr2_sgpr3
+
+ ; CHECK-LABEL: name: merge_s_buffer_load_x3ec_x1
+ ; CHECK: liveins: $sgpr0_sgpr1_sgpr2_sgpr3
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+ ; CHECK-NEXT: [[S_BUFFER_LOAD_DWORDX4_IMM:%[0-9]+]]:sgpr_128 = S_BUFFER_LOAD_DWORDX4_IMM [[COPY]], 0, 0 :: (dereferenceable invariant load (s128))
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_96 = COPY [[S_BUFFER_LOAD_DWORDX4_IMM]].sub0_sub1_sub2
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32_xm0_xexec = COPY killed [[S_BUFFER_LOAD_DWORDX4_IMM]].sub3
+ ; CHECK-NEXT: S_ENDPGM 0
+ %0:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+ early-clobber %1:sgpr_96 = S_BUFFER_LOAD_DWORDX3_IMM_ec %0:sgpr_128, 0, 0 :: (dereferenceable invariant load (s96))
+ %2:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM %0:sgpr_128, 12, 0 :: (dereferenceable invariant load (s32))
+
+ S_ENDPGM 0
+...
+---
+
+name: merge_s_buffer_load_x8_out_of_x2ec_reordered
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $sgpr0_sgpr1_sgpr2_sgpr3
+
+ ; GFX10-LABEL: name: merge_s_buffer_load_x8_out_of_x2ec_reordered
+ ; GFX10: liveins: $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GFX10-NEXT: {{ $}}
+ ; GFX10-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GFX10-NEXT: early-clobber %7:sgpr_256 = S_BUFFER_LOAD_DWORDX8_IMM_ec [[COPY]], 0, 0 :: (dereferenceable invariant load (s256), align 8)
+ ; GFX10-NEXT: [[COPY1:%[0-9]+]]:sgpr_128 = COPY %7.sub4_sub5_sub6_sub7
+ ; GFX10-NEXT: [[COPY2:%[0-9]+]]:sgpr_128 = COPY killed %7.sub0_sub1_sub2_sub3
+ ; GFX10-NEXT: [[COPY3:%[0-9]+]]:sgpr_64 = COPY [[COPY1]].sub0_sub1
+ ; GFX10-NEXT: [[COPY4:%[0-9]+]]:sgpr_64 = COPY killed [[COPY1]].sub2_sub3
+ ; GFX10-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY [[COPY2]].sub2_sub3
+ ; GFX10-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY killed [[COPY2]].sub0_sub1
+ ; GFX10-NEXT: S_ENDPGM 0
+ ;
+ ; GFX12-LABEL: name: merge_s_buffer_load_x8_out_of_x2ec_reordered
+ ; GFX12: liveins: $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GFX12-NEXT: {{ $}}
+ ; GFX12-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GFX12-NEXT: [[S_BUFFER_LOAD_DWORDX8_IMM:%[0-9]+]]:sgpr_256 = S_BUFFER_LOAD_DWORDX8_IMM [[COPY]], 0, 0 :: (dereferenceable invariant load (s256), align 8)
+ ; GFX12-NEXT: [[COPY1:%[0-9]+]]:sgpr_128 = COPY [[S_BUFFER_LOAD_DWORDX8_IMM]].sub4_sub5_sub6_sub7
+ ; GFX12-NEXT: [[COPY2:%[0-9]+]]:sgpr_128 = COPY killed [[S_BUFFER_LOAD_DWORDX8_IMM]].sub0_sub1_sub2_sub3
+ ; GFX12-NEXT: [[COPY3:%[0-9]+]]:sgpr_64 = COPY [[COPY1]].sub0_sub1
+ ; GFX12-NEXT: [[COPY4:%[0-9]+]]:sgpr_64 = COPY killed [[COPY1]].sub2_sub3
+ ; GFX12-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY [[COPY2]].sub2_sub3
+ ; GFX12-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY killed [[COPY2]].sub0_sub1
+ ; GFX12-NEXT: S_ENDPGM 0
+ %0:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+ early-clobber %1:sgpr_64 = S_BUFFER_LOAD_DWORDX2_IMM_ec %0:sgpr_128, 16, 0 :: (dereferenceable invariant load (s64))
+ early-clobber %2:sgpr_64 = S_BUFFER_LOAD_DWORDX2_IMM_ec %0:sgpr_128, 8, 0 :: (dereferenceable invariant load (s64))
+ early-clobber %3:sgpr_64 = S_BUFFER_LOAD_DWORDX2_IMM_ec %0:sgpr_128, 0, 0 :: (dereferenceable invariant load (s64))
+ early-clobber %4:sgpr_64 = S_BUFFER_LOAD_DWORDX2_IMM_ec %0:sgpr_128, 24, 0 :: (dereferenceable invariant load (s64))
+
+ S_ENDPGM 0
+...
---
+
+name: merge_s_buffer_load_x8_out_of_x2ec_x2
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $sgpr0_sgpr1_sgpr2_sgpr3
+
+ ; GFX10-LABEL: name: merge_s_buffer_load_x8_out_of_x2ec_x2
+ ; GFX10: liveins: $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GFX10-NEXT: {{ $}}
+ ; GFX10-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GFX10-NEXT: early-clobber %7:sgpr_256 = S_BUFFER_LOAD_DWORDX8_IMM_ec [[COPY]], 0, 0 :: (dereferenceable invariant load (s256), align 8)
+ ; GFX10-NEXT: [[COPY1:%[0-9]+]]:sgpr_128 = COPY %7.sub4_sub5_sub6_sub7
+ ; GFX10-NEXT: [[COPY2:%[0-9]+]]:sgpr_128 = COPY killed %7.sub0_sub1_sub2_sub3
+ ; GFX10-NEXT: [[COPY3:%[0-9]+]]:sgpr_64 = COPY [[COPY1]].sub0_sub1
+ ; GFX10-NEXT: [[COPY4:%[0-9]+]]:sgpr_64 = COPY killed [[COPY1]].sub2_sub3
+ ; GFX10-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY [[COPY2]].sub2_sub3
+ ; GFX10-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY killed [[COPY2]].sub0_sub1
+ ; GFX10-NEXT: S_ENDPGM 0
+ ;
+ ; GFX12-LABEL: name: merge_s_buffer_load_x8_out_of_x2ec_x2
+ ; GFX12: liveins: $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GFX12-NEXT: {{ $}}
+ ; GFX12-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GFX12-NEXT: [[S_BUFFER_LOAD_DWORDX8_IMM:%[0-9]+]]:sgpr_256 = S_BUFFER_LOAD_DWORDX8_IMM [[COPY]], 0, 0 :: (dereferenceable invariant load (s256), align 8)
+ ; GFX12-NEXT: [[COPY1:%[0-9]+]]:sgpr_128 = COPY [[S_BUFFER_LOAD_DWORDX8_IMM]].sub4_sub5_sub6_sub7
+ ; GFX12-NEXT: [[COPY2:%[0-9]+]]:sgpr_128 = COPY killed [[S_BUFFER_LOAD_DWORDX8_IMM]].sub0_sub1_sub2_sub3
+ ; GFX12-NEXT: [[COPY3:%[0-9]+]]:sgpr_64 = COPY [[COPY1]].sub0_sub1
+ ; GFX12-NEXT: [[COPY4:%[0-9]+]]:sgpr_64 = COPY killed [[COPY1]].sub2_sub3
+ ; GFX12-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY [[COPY2]].sub2_sub3
+ ; GFX12-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY killed [[COPY2]].sub0_sub1
+ ; GFX12-NEXT: S_ENDPGM 0
+ %0:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+ early-clobber %1:sgpr_64 = S_BUFFER_LOAD_DWORDX2_IMM_ec %0:sgpr_128, 16, 0 :: (dereferenceable invariant load (s64))
+ early-clobber %2:sgpr_64 = S_BUFFER_LOAD_DWORDX2_IMM_ec %0:sgpr_128, 8, 0 :: (dereferenceable invariant load (s64))
+ %3:sgpr_64 = S_BUFFER_LOAD_DWORDX2_IMM %0:sgpr_128, 0, 0 :: (dereferenceable invariant load (s64))
+ %4:sgpr_64 = S_BUFFER_LOAD_DWORDX2_IMM %0:sgpr_128, 24, 0 :: (dereferenceable invariant load (s64))
+
+ S_ENDPGM 0
+...
+---
+
+name: merge_s_buffer_load_x8_out_of_x4ec
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $sgpr0_sgpr1_sgpr2_sgpr3
+
+ ; GFX10-LABEL: name: merge_s_buffer_load_x8_out_of_x4ec
+ ; GFX10: liveins: $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GFX10-NEXT: {{ $}}
+ ; GFX10-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GFX10-NEXT: early-clobber %3:sgpr_256 = S_BUFFER_LOAD_DWORDX8_IMM_ec [[COPY]], 0, 0 :: (dereferenceable invariant load (s256), align 16)
+ ; GFX10-NEXT: [[COPY1:%[0-9]+]]:sgpr_128 = COPY %3.sub0_sub1_sub2_sub3
+ ; GFX10-NEXT: [[COPY2:%[0-9]+]]:sgpr_128 = COPY killed %3.sub4_sub5_sub6_sub7
+ ; GFX10-NEXT: S_ENDPGM 0
+ ;
+ ; GFX12-LABEL: name: merge_s_buffer_load_x8_out_of_x4ec
+ ; GFX12: liveins: $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GFX12-NEXT: {{ $}}
+ ; GFX12-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GFX12-NEXT: [[S_BUFFER_LOAD_DWORDX8_IMM:%[0-9]+]]:sgpr_256 = S_BUFFER_LOAD_DWORDX8_IMM [[COPY]], 0, 0 :: (dereferenceable invariant load (s256), align 16)
+ ; GFX12-NEXT: [[COPY1:%[0-9]+]]:sgpr_128 = COPY [[S_BUFFER_LOAD_DWORDX8_IMM]].sub0_sub1_sub2_sub3
+ ; GFX12-NEXT: [[COPY2:%[0-9]+]]:sgpr_128 = COPY killed [[S_BUFFER_LOAD_DWORDX8_IMM]].sub4_sub5_sub6_sub7
+ ; GFX12-NEXT: S_ENDPGM 0
+ %0:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+ early-clobber %1:sgpr_128 = S_BUFFER_LOAD_DWORDX4_IMM_ec %0:sgpr_128, 0, 0 :: (dereferenceable invariant load (s128))
+ early-clobber %2:sgpr_128 = S_BUFFER_LOAD_DWORDX4_IMM_ec %0:sgpr_128, 16, 0 :: (dereferenceable invariant load (s128))
+
+ S_ENDPGM 0
+...
+---
+
+name: merge_s_buffer_load_x8_out_of_x4ec_x4
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $sgpr0_sgpr1_sgpr2_sgpr3
+
+ ; GFX10-LABEL: name: merge_s_buffer_load_x8_out_of_x4ec_x4
+ ; GFX10: liveins: $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GFX10-NEXT: {{ $}}
+ ; GFX10-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GFX10-NEXT: early-clobber %3:sgpr_256 = S_BUFFER_LOAD_DWORDX8_IMM_ec [[COPY]], 0, 0 :: (dereferenceable invariant load (s256), align 16)
+ ; GFX10-NEXT: [[COPY1:%[0-9]+]]:sgpr_128 = COPY %3.sub0_sub1_sub2_sub3
+ ; GFX10-NEXT: [[COPY2:%[0-9]+]]:sgpr_128 = COPY killed %3.sub4_sub5_sub6_sub7
+ ; GFX10-NEXT: S_ENDPGM 0
+ ;
+ ; GFX12-LABEL: name: merge_s_buffer_load_x8_out_of_x4ec_x4
+ ; GFX12: liveins: $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GFX12-NEXT: {{ $}}
+ ; GFX12-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GFX12-NEXT: [[S_BUFFER_LOAD_DWORDX8_IMM:%[0-9]+]]:sgpr_256 = S_BUFFER_LOAD_DWORDX8_IMM [[COPY]], 0, 0 :: (dereferenceable invariant load (s256), align 16)
+ ; GFX12-NEXT: [[COPY1:%[0-9]+]]:sgpr_128 = COPY [[S_BUFFER_LOAD_DWORDX8_IMM]].sub0_sub1_sub2_sub3
+ ; GFX12-NEXT: [[COPY2:%[0-9]+]]:sgpr_128 = COPY killed [[S_BUFFER_LOAD_DWORDX8_IMM]].sub4_sub5_sub6_sub7
+ ; GFX12-NEXT: S_ENDPGM 0
+ %0:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+ early-clobber %1:sgpr_128 = S_BUFFER_LOAD_DWORDX4_IMM_ec %0:sgpr_128, 0, 0 :: (dereferenceable invariant load (s128))
+ %2:sgpr_128 = S_BUFFER_LOAD_DWORDX4_IMM %0:sgpr_128, 16, 0 :: (dereferenceable invariant load (s128))
+
+ S_ENDPGM 0
+...
+---
+
+name: merge_s_buffer_load_x8_out_of_x4_x4ec
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $sgpr0_sgpr1_sgpr2_sgpr3
+
+ ; GFX10-LABEL: name: merge_s_buffer_load_x8_out_of_x4_x4ec
+ ; GFX10: liveins: $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GFX10-NEXT: {{ $}}
+ ; GFX10-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GFX10-NEXT: early-clobber %3:sgpr_256 = S_BUFFER_LOAD_DWORDX8_IMM_ec [[COPY]], 0, 0 :: (dereferenceable invariant load (s256), align 16)
+ ; GFX10-NEXT: [[COPY1:%[0-9]+]]:sgpr_128 = COPY %3.sub0_sub1_sub2_sub3
+ ; GFX10-NEXT: [[COPY2:%[0-9]+]]:sgpr_128 = COPY killed %3.sub4_sub5_sub6_sub7
+ ; GFX10-NEXT: S_ENDPGM 0
+ ;
+ ; GFX12-LABEL: name: merge_s_buffer_load_x8_out_of_x4_x4ec
+ ; GFX12: liveins: $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GFX12-NEXT: {{ $}}
+ ; GFX12-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GFX12-NEXT: [[S_BUFFER_LOAD_DWORDX8_IMM:%[0-9]+]]:sgpr_256 = S_BUFFER_LOAD_DWORDX8_IMM [[COPY]], 0, 0 :: (dereferenceable invariant load (s256), align 16)
+ ; GFX12-NEXT: [[COPY1:%[0-9]+]]:sgpr_128 = COPY [[S_BUFFER_LOAD_DWORDX8_IMM]].sub0_sub1_sub2_sub3
+ ; GFX12-NEXT: [[COPY2:%[0-9]+]]:sgpr_128 = COPY killed [[S_BUFFER_LOAD_DWORDX8_IMM]].sub4_sub5_sub6_sub7
+ ; GFX12-NEXT: S_ENDPGM 0
+ %0:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+ %1:sgpr_128 = S_BUFFER_LOAD_DWORDX4_IMM %0:sgpr_128, 0, 0 :: (dereferenceable invariant load (s128))
+ early-clobber %2:sgpr_128 = S_BUFFER_LOAD_DWORDX4_IMM_ec %0:sgpr_128, 16, 0 :: (dereferenceable invariant load (s128))
+
+ S_ENDPGM 0
+...
+---
+
+name: merge_s_buffer_load_x8_mixed_including_ec_opcodes
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $sgpr0_sgpr1_sgpr2_sgpr3
+
+ ; GFX10-LABEL: name: merge_s_buffer_load_x8_mixed_including_ec_opcodes
+ ; GFX10: liveins: $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GFX10-NEXT: {{ $}}
+ ; GFX10-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GFX10-NEXT: early-clobber %7:sgpr_256 = S_BUFFER_LOAD_DWORDX8_IMM_ec [[COPY]], 0, 0 :: (dereferenceable invariant load (s256), align 16)
+ ; GFX10-NEXT: [[COPY1:%[0-9]+]]:sgpr_128 = COPY %7.sub0_sub1_sub2_sub3
+ ; GFX10-NEXT: [[COPY2:%[0-9]+]]:sgpr_128 = COPY killed %7.sub4_sub5_sub6_sub7
+ ; GFX10-NEXT: [[COPY3:%[0-9]+]]:sreg_64_xexec = COPY [[COPY2]].sub0_sub1
+ ; GFX10-NEXT: [[COPY4:%[0-9]+]]:sgpr_64 = COPY killed [[COPY2]].sub2_sub3
+ ; GFX10-NEXT: [[COPY5:%[0-9]+]]:sreg_32_xm0_xexec = COPY [[COPY3]].sub0
+ ; GFX10-NEXT: [[COPY6:%[0-9]+]]:sreg_32_xm0_xexec = COPY killed [[COPY3]].sub1
+ ; GFX10-NEXT: S_ENDPGM 0
+ ;
+ ; GFX12-LABEL: name: merge_s_buffer_load_x8_mixed_including_ec_opcodes
+ ; GFX12: liveins: $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GFX12-NEXT: {{ $}}
+ ; GFX12-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GFX12-NEXT: [[S_BUFFER_LOAD_DWORDX8_IMM:%[0-9]+]]:sgpr_256 = S_BUFFER_LOAD_DWORDX8_IMM [[COPY]], 0, 0 :: (dereferenceable invariant load (s256), align 16)
+ ; GFX12-NEXT: [[COPY1:%[0-9]+]]:sgpr_128 = COPY [[S_BUFFER_LOAD_DWORDX8_IMM]].sub0_sub1_sub2_sub3
+ ; GFX12-NEXT: [[COPY2:%[0-9]+]]:sgpr_128 = COPY killed [[S_BUFFER_LOAD_DWORDX8_IMM]].sub4_sub5_sub6_sub7
+ ; GFX12-NEXT: [[COPY3:%[0-9]+]]:sreg_64_xexec = COPY [[COPY2]].sub0_sub1
+ ; GFX12-NEXT: [[COPY4:%[0-9]+]]:sgpr_64 = COPY killed [[COPY2]].sub2_sub3
+ ; GFX12-NEXT: [[COPY5:%[0-9]+]]:sreg_32_xm0_xexec = COPY [[COPY3]].sub0
+ ; GFX12-NEXT: [[COPY6:%[0-9]+]]:sreg_32_xm0_xexec = COPY killed [[COPY3]].sub1
+ ; GFX12-NEXT: S_ENDPGM 0
+ %0:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+ early-clobber %1:sgpr_128 = S_BUFFER_LOAD_DWORDX4_IMM_ec %0:sgpr_128, 0, 0 :: (dereferenceable invariant load (s128))
+ %2:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM %0:sgpr_128, 16, 0 :: (dereferenceable invariant load (s32))
+ early-clobber %3:sgpr_64 = S_BUFFER_LOAD_DWORDX2_IMM_ec %0:sgpr_128, 24, 0 :: (dereferenceable invariant load (s64))
+ %4:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM %0:sgpr_128, 20, 0 :: (dereferenceable invariant load (s32))
+
+ S_ENDPGM 0
+...
+---
+
+name: merge_s_buffer_load_sgpr_imm_x2ec_x2ec
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4
+
+ ; GFX10-LABEL: name: merge_s_buffer_load_sgpr_imm_x2ec_x2ec
+ ; GFX10: liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4
+ ; GFX10-NEXT: {{ $}}
+ ; GFX10-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GFX10-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr4
+ ; GFX10-NEXT: early-clobber %4:sgpr_128 = S_BUFFER_LOAD_DWORDX4_SGPR_IMM_ec [[COPY]], [[COPY1]], 0, 0 :: (dereferenceable invariant load (s128), align 8)
+ ; GFX10-NEXT: [[COPY2:%[0-9]+]]:sgpr_64 = COPY %4.sub0_sub1
+ ; GFX10-NEXT: [[COPY3:%[0-9]+]]:sgpr_64 = COPY killed %4.sub2_sub3
+ ; GFX10-NEXT: S_ENDPGM 0
+ ;
+ ; GFX12-LABEL: name: merge_s_buffer_load_sgpr_imm_x2ec_x2ec
+ ; GFX12: liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4
+ ; GFX12-NEXT: {{ $}}
+ ; GFX12-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GFX12-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr4
+ ; GFX12-NEXT: [[S_BUFFER_LOAD_DWORDX4_SGPR_IMM:%[0-9]+]]:sgpr_128 = S_BUFFER_LOAD_DWORDX4_SGPR_IMM [[COPY]], [[COPY1]], 0, 0 :: (dereferenceable invariant load (s128), align 8)
+ ; GFX12-NEXT: [[COPY2:%[0-9]+]]:sgpr_64 = COPY [[S_BUFFER_LOAD_DWORDX4_SGPR_IMM]].sub0_sub1
+ ; GFX12-NEXT: [[COPY3:%[0-9]+]]:sgpr_64 = COPY killed [[S_BUFFER_LOAD_DWORDX4_SGPR_IMM]].sub2_sub3
+ ; GFX12-NEXT: S_ENDPGM 0
+ %0:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+ %1:sreg_32 = COPY $sgpr4
+ early-clobber %2:sgpr_64 = S_BUFFER_LOAD_DWORDX2_SGPR_IMM_ec %0:sgpr_128, %1:sreg_32, 0, 0 :: (dereferenceable invariant load (s64))
+ early-clobber %3:sgpr_64 = S_BUFFER_LOAD_DWORDX2_SGPR_IMM_ec %0:sgpr_128, %1:sreg_32, 8, 0 :: (dereferenceable invariant load (s64))
+
+ S_ENDPGM 0
+...
+
+# No constrained opcode required when the MEM operand has met the required alignment.
+
+---
+
+name: merge_s_buffer_load_x2_x2_no_constrained_opc_needed
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $sgpr0_sgpr1_sgpr2_sgpr3
+
+ ; CHECK-LABEL: name: merge_s_buffer_load_x2_x2_no_constrained_opc_needed
+ ; CHECK: liveins: $sgpr0_sgpr1_sgpr2_sgpr3
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+ ; CHECK-NEXT: [[S_BUFFER_LOAD_DWORDX4_IMM:%[0-9]+]]:sgpr_128 = S_BUFFER_LOAD_DWORDX4_IMM [[COPY]], 0, 0 :: (dereferenceable invariant load (s128))
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_64 = COPY [[S_BUFFER_LOAD_DWORDX4_IMM]].sub0_sub1
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_64 = COPY killed [[S_BUFFER_LOAD_DWORDX4_IMM]].sub2_sub3
+ ; CHECK-NEXT: S_ENDPGM 0
+ %0:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+ %1:sgpr_64 = S_BUFFER_LOAD_DWORDX2_IMM %0:sgpr_128, 0, 0 :: (dereferenceable invariant load (s64), align 16)
+ %2:sgpr_64 = S_BUFFER_LOAD_DWORDX2_IMM %0:sgpr_128, 8, 0 :: (dereferenceable invariant load (s64))
+
+ S_ENDPGM 0
+...
+---
+
+name: merge_s_buffer_load_x4_x4_no_constrained_opc_needed
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $sgpr0_sgpr1_sgpr2_sgpr3
+
+ ; CHECK-LABEL: name: merge_s_buffer_load_x4_x4_no_constrained_opc_needed
+ ; CHECK: liveins: $sgpr0_sgpr1_sgpr2_sgpr3
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+ ; CHECK-NEXT: [[S_BUFFER_LOAD_DWORDX8_IMM:%[0-9]+]]:sgpr_256 = S_BUFFER_LOAD_DWORDX8_IMM [[COPY]], 0, 0 :: (dereferenceable invariant load (s256))
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_128 = COPY [[S_BUFFER_LOAD_DWORDX8_IMM]].sub0_sub1_sub2_sub3
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_128 = COPY killed [[S_BUFFER_LOAD_DWORDX8_IMM]].sub4_sub5_sub6_sub7
+ ; CHECK-NEXT: S_ENDPGM 0
+ %0:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+ %1:sgpr_128 = S_BUFFER_LOAD_DWORDX4_IMM %0:sgpr_128, 0, 0 :: (dereferenceable invariant load (s128), align 32)
+ %2:sgpr_128 = S_BUFFER_LOAD_DWORDX4_IMM %0:sgpr_128, 16, 0 :: (dereferenceable invariant load (s128))
+
+ S_ENDPGM 0
+...
+---
+
+name: merge_s_buffer_load_sgpr_imm_x2ec_x2ec_no_constrained_opc_needed
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4
+
+ ; CHECK-LABEL: name: merge_s_buffer_load_sgpr_imm_x2ec_x2ec_no_constrained_opc_needed
+ ; CHECK: liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr4
+ ; CHECK-NEXT: [[S_BUFFER_LOAD_DWORDX4_SGPR_IMM:%[0-9]+]]:sgpr_128 = S_BUFFER_LOAD_DWORDX4_SGPR_IMM [[COPY]], [[COPY1]], 0, 0 :: (dereferenceable invariant load (s128))
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_64 = COPY [[S_BUFFER_LOAD_DWORDX4_SGPR_IMM]].sub0_sub1
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_64 = COPY killed [[S_BUFFER_LOAD_DWORDX4_SGPR_IMM]].sub2_sub3
+ ; CHECK-NEXT: S_ENDPGM 0
+ %0:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+ %1:sreg_32 = COPY $sgpr4
+ %2:sgpr_64 = S_BUFFER_LOAD_DWORDX2_SGPR_IMM %0:sgpr_128, %1:sreg_32, 0, 0 :: (dereferenceable invariant load (s64), align 16)
+ %3:sgpr_64 = S_BUFFER_LOAD_DWORDX2_SGPR_IMM %0:sgpr_128, %1:sreg_32, 8, 0 :: (dereferenceable invariant load (s64))
+
+ S_ENDPGM 0
+...
diff --git a/llvm/test/CodeGen/AMDGPU/simple-indirect-call.ll b/llvm/test/CodeGen/AMDGPU/simple-indirect-call.ll
index e86ee1adef3d..3a6b0485d241 100644
--- a/llvm/test/CodeGen/AMDGPU/simple-indirect-call.ll
+++ b/llvm/test/CodeGen/AMDGPU/simple-indirect-call.ll
@@ -34,9 +34,8 @@ define amdgpu_kernel void @test_simple_indirect_call() {
; ATTRIBUTOR_GCN-LABEL: define {{[^@]+}}@test_simple_indirect_call
; ATTRIBUTOR_GCN-SAME: () #[[ATTR1:[0-9]+]] {
; ATTRIBUTOR_GCN-NEXT: [[FPTR:%.*]] = alloca ptr, align 8, addrspace(5)
-; ATTRIBUTOR_GCN-NEXT: [[FPTR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[FPTR]] to ptr
-; ATTRIBUTOR_GCN-NEXT: store ptr @indirect, ptr [[FPTR_CAST]], align 8
-; ATTRIBUTOR_GCN-NEXT: [[FP:%.*]] = load ptr, ptr [[FPTR_CAST]], align 8
+; ATTRIBUTOR_GCN-NEXT: store ptr @indirect, ptr addrspace(5) [[FPTR]], align 8
+; ATTRIBUTOR_GCN-NEXT: [[FP:%.*]] = load ptr, ptr addrspace(5) [[FPTR]], align 8
; ATTRIBUTOR_GCN-NEXT: call void [[FP]]()
; ATTRIBUTOR_GCN-NEXT: ret void
;
@@ -75,12 +74,16 @@ define amdgpu_kernel void @test_simple_indirect_call() {
ret void
}
+
+!llvm.module.flags = !{!0}
+!0 = !{i32 1, !"amdhsa_code_object_version", i32 500}
;.
; AKF_GCN: attributes #[[ATTR0]] = { "amdgpu-calls" "amdgpu-stack-objects" }
;.
; ATTRIBUTOR_GCN: attributes #[[ATTR0]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
; ATTRIBUTOR_GCN: attributes #[[ATTR1]] = { "uniform-work-group-size"="false" }
;.
-
-!llvm.module.flags = !{!0}
-!0 = !{i32 1, !"amdhsa_code_object_version", i32 500}
+; AKF_GCN: [[META0:![0-9]+]] = !{i32 1, !"amdhsa_code_object_version", i32 500}
+;.
+; ATTRIBUTOR_GCN: [[META0:![0-9]+]] = !{i32 1, !"amdhsa_code_object_version", i32 500}
+;.
diff --git a/llvm/test/CodeGen/ARM/O3-pipeline.ll b/llvm/test/CodeGen/ARM/O3-pipeline.ll
index aa92c7ab4fb9..e74e4f2ad389 100644
--- a/llvm/test/CodeGen/ARM/O3-pipeline.ll
+++ b/llvm/test/CodeGen/ARM/O3-pipeline.ll
@@ -38,7 +38,6 @@
; CHECK-NEXT: Constant Hoisting
; CHECK-NEXT: Replace intrinsics with calls to vector library
; CHECK-NEXT: Partially inline calls to library functions
-; CHECK-NEXT: Expand vector predication intrinsics
; CHECK-NEXT: Instrument function entry/exit with calls to e.g. mcount() (post inlining)
; CHECK-NEXT: Scalarize Masked Memory Intrinsics
; CHECK-NEXT: Expand reduction intrinsics
diff --git a/llvm/test/CodeGen/ARM/setjmp-bti-basic.ll b/llvm/test/CodeGen/ARM/setjmp-bti-basic.ll
index 3b01e3e9327e..7fe7015a482a 100644
--- a/llvm/test/CodeGen/ARM/setjmp-bti-basic.ll
+++ b/llvm/test/CodeGen/ARM/setjmp-bti-basic.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc -mtriple=thumbv8.1m.main-arm-none-eabi < %s | FileCheck %s --check-prefix=BTI
; RUN: llc -mtriple=thumbv8.1m.main-arm-none-eabi -mattr=+no-bti-at-return-twice < %s | \
; RUN: FileCheck %s --check-prefix=NOBTI
@@ -20,11 +21,43 @@
define i32 @foo(i32 %x) "branch-target-enforcement" {
; BTI-LABEL: foo:
-; BTI: bl setjmp
-; BTI-NEXT: bti
+; BTI: @ %bb.0: @ %entry
+; BTI-NEXT: bti
+; BTI-NEXT: .save {r4, lr}
+; BTI-NEXT: push {r4, lr}
+; BTI-NEXT: mov r4, r0
+; BTI-NEXT: movw r0, :lower16:buf
+; BTI-NEXT: movt r0, :upper16:buf
+; BTI-NEXT: bl setjmp
+; BTI-NEXT: bti
+; BTI-NEXT: cmp r0, #0
+; BTI-NEXT: itt ne
+; BTI-NEXT: movne r0, #0
+; BTI-NEXT: popne {r4, pc}
+; BTI-NEXT: .LBB0_1: @ %if.else
+; BTI-NEXT: mov r0, r4
+; BTI-NEXT: bl bar
+; BTI-NEXT: mov r0, r4
+; BTI-NEXT: pop {r4, pc}
+;
; NOBTI-LABEL: foo:
-; NOBTI: bl setjmp
-; NOBTI-NOT: bti
+; NOBTI: @ %bb.0: @ %entry
+; NOBTI-NEXT: bti
+; NOBTI-NEXT: .save {r4, lr}
+; NOBTI-NEXT: push {r4, lr}
+; NOBTI-NEXT: mov r4, r0
+; NOBTI-NEXT: movw r0, :lower16:buf
+; NOBTI-NEXT: movt r0, :upper16:buf
+; NOBTI-NEXT: bl setjmp
+; NOBTI-NEXT: cmp r0, #0
+; NOBTI-NEXT: itt ne
+; NOBTI-NEXT: movne r0, #0
+; NOBTI-NEXT: popne {r4, pc}
+; NOBTI-NEXT: .LBB0_1: @ %if.else
+; NOBTI-NEXT: mov r0, r4
+; NOBTI-NEXT: bl bar
+; NOBTI-NEXT: mov r0, r4
+; NOBTI-NEXT: pop {r4, pc}
entry:
%call = call i32 @setjmp(ptr @buf) #0
@@ -40,6 +73,41 @@ if.end: ; preds = %entry, %if.else
ret i32 %x.addr.0
}
+;; Check that the BL to setjmp correctly clobbers LR
+
+define i32 @baz() "branch-target-enforcement" {
+; BTI-LABEL: baz:
+; BTI: @ %bb.0: @ %entry
+; BTI-NEXT: bti
+; BTI-NEXT: .save {r7, lr}
+; BTI-NEXT: push {r7, lr}
+; BTI-NEXT: .pad #160
+; BTI-NEXT: sub sp, #160
+; BTI-NEXT: mov r0, sp
+; BTI-NEXT: bl setjmp
+; BTI-NEXT: bti
+; BTI-NEXT: movs r0, #0
+; BTI-NEXT: add sp, #160
+; BTI-NEXT: pop {r7, pc}
+;
+; NOBTI-LABEL: baz:
+; NOBTI: @ %bb.0: @ %entry
+; NOBTI-NEXT: bti
+; NOBTI-NEXT: .save {r7, lr}
+; NOBTI-NEXT: push {r7, lr}
+; NOBTI-NEXT: .pad #160
+; NOBTI-NEXT: sub sp, #160
+; NOBTI-NEXT: mov r0, sp
+; NOBTI-NEXT: bl setjmp
+; NOBTI-NEXT: movs r0, #0
+; NOBTI-NEXT: add sp, #160
+; NOBTI-NEXT: pop {r7, pc}
+entry:
+ %outgoing_jb = alloca [20 x i64], align 8
+ %call = call i32 @setjmp(ptr %outgoing_jb) returns_twice
+ ret i32 0
+}
+
declare void @bar(i32)
declare i32 @setjmp(ptr) #0
diff --git a/llvm/test/CodeGen/BPF/objdump_atomics.ll b/llvm/test/CodeGen/BPF/objdump_atomics.ll
index 3ec364f7368b..c4cb16b2c364 100644
--- a/llvm/test/CodeGen/BPF/objdump_atomics.ll
+++ b/llvm/test/CodeGen/BPF/objdump_atomics.ll
@@ -2,7 +2,7 @@
; CHECK-LABEL: test_load_add_32
; CHECK: c3 21
-; CHECK: r2 = atomic_fetch_add((u32 *)(r1 + 0), r2)
+; CHECK: w2 = atomic_fetch_add((u32 *)(r1 + 0), w2)
define void @test_load_add_32(ptr %p, i32 zeroext %v) {
entry:
atomicrmw add ptr %p, i32 %v seq_cst
diff --git a/llvm/test/CodeGen/BPF/objdump_cond_op.ll b/llvm/test/CodeGen/BPF/objdump_cond_op.ll
index 3b2e6c1922fc..c64a0f2f2938 100644
--- a/llvm/test/CodeGen/BPF/objdump_cond_op.ll
+++ b/llvm/test/CodeGen/BPF/objdump_cond_op.ll
@@ -1,4 +1,4 @@
-; RUN: llc -mtriple=bpfel -filetype=obj -o - %s | llvm-objdump --no-print-imm-hex -d - | FileCheck %s
+; RUN: llc -mtriple=bpfel -filetype=obj -o - %s | llvm-objdump --no-print-imm-hex --mcpu=v1 -d - | FileCheck %s
; Source Code:
; int gbl;
diff --git a/llvm/test/CodeGen/BPF/objdump_imm_hex.ll b/llvm/test/CodeGen/BPF/objdump_imm_hex.ll
index 1760bb6b6c52..38b93e8a39b5 100644
--- a/llvm/test/CodeGen/BPF/objdump_imm_hex.ll
+++ b/llvm/test/CodeGen/BPF/objdump_imm_hex.ll
@@ -53,8 +53,8 @@ define i32 @test(i64, i64) local_unnamed_addr #0 {
%14 = phi i32 [ %12, %10 ], [ %7, %4 ]
%15 = phi i32 [ 2, %10 ], [ 1, %4 ]
store i32 %14, ptr @gbl, align 4
-; CHECK-DEC: 63 12 00 00 00 00 00 00 *(u32 *)(r2 + 0) = r1
-; CHECK-HEX: 63 12 00 00 00 00 00 00 *(u32 *)(r2 + 0x0) = r1
+; CHECK-DEC: 63 12 00 00 00 00 00 00 *(u32 *)(r2 + 0) = w1
+; CHECK-HEX: 63 12 00 00 00 00 00 00 *(u32 *)(r2 + 0x0) = w1
br label %16
; <label>:16: ; preds = %13, %8
diff --git a/llvm/test/CodeGen/BPF/objdump_static_var.ll b/llvm/test/CodeGen/BPF/objdump_static_var.ll
index a91074ebddd4..b743d82fe5e3 100644
--- a/llvm/test/CodeGen/BPF/objdump_static_var.ll
+++ b/llvm/test/CodeGen/BPF/objdump_static_var.ll
@@ -1,5 +1,5 @@
-; RUN: llc -mtriple=bpfel -filetype=obj -o - %s | llvm-objdump --no-print-imm-hex -d - | FileCheck --check-prefix=CHECK %s
-; RUN: llc -mtriple=bpfeb -filetype=obj -o - %s | llvm-objdump --no-print-imm-hex -d - | FileCheck --check-prefix=CHECK %s
+; RUN: llc -mtriple=bpfel -filetype=obj -o - %s | llvm-objdump --no-print-imm-hex --mcpu=v1 -d - | FileCheck --check-prefix=CHECK %s
+; RUN: llc -mtriple=bpfeb -filetype=obj -o - %s | llvm-objdump --no-print-imm-hex --mcpu=v1 -d - | FileCheck --check-prefix=CHECK %s
; src:
; static volatile long a = 2;
diff --git a/llvm/test/CodeGen/Generic/expand-vp-fp-intrinsics.ll b/llvm/test/CodeGen/Generic/expand-vp-fp-intrinsics.ll
deleted file mode 100644
index bc89ddea6b85..000000000000
--- a/llvm/test/CodeGen/Generic/expand-vp-fp-intrinsics.ll
+++ /dev/null
@@ -1,176 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
-; RUN: opt -expandvp -S < %s | FileCheck %s
-
-define void @vp_fadd_v4f32(<4 x float> %a0, <4 x float> %a1, ptr %out, i32 %vp) nounwind {
-; CHECK-LABEL: define void @vp_fadd_v4f32(
-; CHECK-SAME: <4 x float> [[A0:%.*]], <4 x float> [[A1:%.*]], ptr [[OUT:%.*]], i32 [[VP:%.*]]) #[[ATTR0:[0-9]+]] {
-; CHECK-NEXT: [[RES1:%.*]] = fadd <4 x float> [[A0]], [[A1]]
-; CHECK-NEXT: store <4 x float> [[RES1]], ptr [[OUT]], align 16
-; CHECK-NEXT: ret void
-;
- %res = call <4 x float> @llvm.vp.fadd.v4f32(<4 x float> %a0, <4 x float> %a1, <4 x i1> <i1 -1, i1 -1, i1 -1, i1 -1>, i32 %vp)
- store <4 x float> %res, ptr %out
- ret void
-}
-declare <4 x float> @llvm.vp.fadd.v4f32(<4 x float>, <4 x float>, <4 x i1>, i32)
-
-define void @vp_fsub_v4f32(<4 x float> %a0, <4 x float> %a1, ptr %out, i32 %vp) nounwind {
-; CHECK-LABEL: define void @vp_fsub_v4f32(
-; CHECK-SAME: <4 x float> [[A0:%.*]], <4 x float> [[A1:%.*]], ptr [[OUT:%.*]], i32 [[VP:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT: [[RES1:%.*]] = fsub <4 x float> [[A0]], [[A1]]
-; CHECK-NEXT: store <4 x float> [[RES1]], ptr [[OUT]], align 16
-; CHECK-NEXT: ret void
-;
- %res = call <4 x float> @llvm.vp.fsub.v4f32(<4 x float> %a0, <4 x float> %a1, <4 x i1> <i1 -1, i1 -1, i1 -1, i1 -1>, i32 %vp)
- store <4 x float> %res, ptr %out
- ret void
-}
-declare <4 x float> @llvm.vp.fsub.v4f32(<4 x float>, <4 x float>, <4 x i1>, i32)
-
-define void @vp_fmul_v4f32(<4 x float> %a0, <4 x float> %a1, ptr %out, i32 %vp) nounwind {
-; CHECK-LABEL: define void @vp_fmul_v4f32(
-; CHECK-SAME: <4 x float> [[A0:%.*]], <4 x float> [[A1:%.*]], ptr [[OUT:%.*]], i32 [[VP:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT: [[RES1:%.*]] = fmul <4 x float> [[A0]], [[A1]]
-; CHECK-NEXT: store <4 x float> [[RES1]], ptr [[OUT]], align 16
-; CHECK-NEXT: ret void
-;
- %res = call <4 x float> @llvm.vp.fmul.v4f32(<4 x float> %a0, <4 x float> %a1, <4 x i1> <i1 -1, i1 -1, i1 -1, i1 -1>, i32 %vp)
- store <4 x float> %res, ptr %out
- ret void
-}
-declare <4 x float> @llvm.vp.fmul.v4f32(<4 x float>, <4 x float>, <4 x i1>, i32)
-
-define void @vp_fdiv_v4f32(<4 x float> %a0, <4 x float> %a1, ptr %out, i32 %vp) nounwind {
-; CHECK-LABEL: define void @vp_fdiv_v4f32(
-; CHECK-SAME: <4 x float> [[A0:%.*]], <4 x float> [[A1:%.*]], ptr [[OUT:%.*]], i32 [[VP:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT: [[RES1:%.*]] = fdiv <4 x float> [[A0]], [[A1]]
-; CHECK-NEXT: store <4 x float> [[RES1]], ptr [[OUT]], align 16
-; CHECK-NEXT: ret void
-;
- %res = call <4 x float> @llvm.vp.fdiv.v4f32(<4 x float> %a0, <4 x float> %a1, <4 x i1> <i1 -1, i1 -1, i1 -1, i1 -1>, i32 %vp)
- store <4 x float> %res, ptr %out
- ret void
-}
-declare <4 x float> @llvm.vp.fdiv.v4f32(<4 x float>, <4 x float>, <4 x i1>, i32)
-
-define void @vp_frem_v4f32(<4 x float> %a0, <4 x float> %a1, ptr %out, i32 %vp) nounwind {
-; CHECK-LABEL: define void @vp_frem_v4f32(
-; CHECK-SAME: <4 x float> [[A0:%.*]], <4 x float> [[A1:%.*]], ptr [[OUT:%.*]], i32 [[VP:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT: [[RES1:%.*]] = frem <4 x float> [[A0]], [[A1]]
-; CHECK-NEXT: store <4 x float> [[RES1]], ptr [[OUT]], align 16
-; CHECK-NEXT: ret void
-;
- %res = call <4 x float> @llvm.vp.frem.v4f32(<4 x float> %a0, <4 x float> %a1, <4 x i1> <i1 -1, i1 -1, i1 -1, i1 -1>, i32 %vp)
- store <4 x float> %res, ptr %out
- ret void
-}
-declare <4 x float> @llvm.vp.frem.v4f32(<4 x float>, <4 x float>, <4 x i1>, i32)
-
-define void @vp_fabs_v4f32(<4 x float> %a0, <4 x float> %a1, ptr %out, i32 %vp) nounwind {
-; CHECK-LABEL: define void @vp_fabs_v4f32(
-; CHECK-SAME: <4 x float> [[A0:%.*]], <4 x float> [[A1:%.*]], ptr [[OUT:%.*]], i32 [[VP:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT: [[RES1:%.*]] = call <4 x float> @llvm.fabs.v4f32(<4 x float> [[A0]])
-; CHECK-NEXT: store <4 x float> [[RES1]], ptr [[OUT]], align 16
-; CHECK-NEXT: ret void
-;
- %res = call <4 x float> @llvm.vp.fabs.v4f32(<4 x float> %a0, <4 x i1> <i1 -1, i1 -1, i1 -1, i1 -1>, i32 %vp)
- store <4 x float> %res, ptr %out
- ret void
-}
-declare <4 x float> @llvm.vp.fabs.v4f32(<4 x float>, <4 x i1>, i32)
-
-define void @vp_sqrt_v4f32(<4 x float> %a0, <4 x float> %a1, ptr %out, i32 %vp) nounwind {
-; CHECK-LABEL: define void @vp_sqrt_v4f32(
-; CHECK-SAME: <4 x float> [[A0:%.*]], <4 x float> [[A1:%.*]], ptr [[OUT:%.*]], i32 [[VP:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT: [[RES1:%.*]] = call <4 x float> @llvm.sqrt.v4f32(<4 x float> [[A0]])
-; CHECK-NEXT: store <4 x float> [[RES1]], ptr [[OUT]], align 16
-; CHECK-NEXT: ret void
-;
- %res = call <4 x float> @llvm.vp.sqrt.v4f32(<4 x float> %a0, <4 x i1> <i1 -1, i1 -1, i1 -1, i1 -1>, i32 %vp)
- store <4 x float> %res, ptr %out
- ret void
-}
-declare <4 x float> @llvm.vp.sqrt.v4f32(<4 x float>, <4 x i1>, i32)
-
-define void @vp_fneg_v4f32(<4 x float> %a0, <4 x float> %a1, ptr %out, i32 %vp) nounwind {
-; CHECK-LABEL: define void @vp_fneg_v4f32(
-; CHECK-SAME: <4 x float> [[A0:%.*]], <4 x float> [[A1:%.*]], ptr [[OUT:%.*]], i32 [[VP:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT: [[RES1:%.*]] = fneg <4 x float> [[A0]]
-; CHECK-NEXT: store <4 x float> [[RES1]], ptr [[OUT]], align 16
-; CHECK-NEXT: ret void
-;
- %res = call <4 x float> @llvm.vp.fneg.v4f32(<4 x float> %a0, <4 x i1> <i1 -1, i1 -1, i1 -1, i1 -1>, i32 %vp)
- store <4 x float> %res, ptr %out
- ret void
-}
-declare <4 x float> @llvm.vp.fneg.v4f32(<4 x float>, <4 x i1>, i32)
-
-define void @vp_fma_v4f32(<4 x float> %a0, <4 x float> %a1, ptr %out, i4 %a5) nounwind {
-; CHECK-LABEL: define void @vp_fma_v4f32(
-; CHECK-SAME: <4 x float> [[A0:%.*]], <4 x float> [[A1:%.*]], ptr [[OUT:%.*]], i4 [[A5:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT: [[RES1:%.*]] = call <4 x float> @llvm.fma.v4f32(<4 x float> [[A0]], <4 x float> [[A1]], <4 x float> [[A1]])
-; CHECK-NEXT: store <4 x float> [[RES1]], ptr [[OUT]], align 16
-; CHECK-NEXT: ret void
-;
- %res = call <4 x float> @llvm.vp.fma.v4f32(<4 x float> %a0, <4 x float> %a1, <4 x float> %a1, <4 x i1> <i1 -1, i1 -1, i1 -1, i1 -1>, i32 4)
- store <4 x float> %res, ptr %out
- ret void
-}
-declare <4 x float> @llvm.vp.fma.v4f32(<4 x float>, <4 x float>, <4 x float>, <4 x i1>, i32)
-
-define void @vp_fmuladd_v4f32(<4 x float> %a0, <4 x float> %a1, ptr %out, i4 %a5) nounwind {
-; CHECK-LABEL: define void @vp_fmuladd_v4f32(
-; CHECK-SAME: <4 x float> [[A0:%.*]], <4 x float> [[A1:%.*]], ptr [[OUT:%.*]], i4 [[A5:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT: [[RES1:%.*]] = call <4 x float> @llvm.fmuladd.v4f32(<4 x float> [[A0]], <4 x float> [[A1]], <4 x float> [[A1]])
-; CHECK-NEXT: store <4 x float> [[RES1]], ptr [[OUT]], align 16
-; CHECK-NEXT: ret void
-;
- %res = call <4 x float> @llvm.vp.fmuladd.v4f32(<4 x float> %a0, <4 x float> %a1, <4 x float> %a1, <4 x i1> <i1 -1, i1 -1, i1 -1, i1 -1>, i32 4)
- store <4 x float> %res, ptr %out
- ret void
-}
-declare <4 x float> @llvm.vp.fmuladd.v4f32(<4 x float>, <4 x float>, <4 x float>, <4 x i1>, i32)
-
-declare <4 x float> @llvm.vp.maxnum.v4f32(<4 x float>, <4 x float>, <4 x i1>, i32)
-define <4 x float> @vfmax_vv_v4f32(<4 x float> %va, <4 x float> %vb, <4 x i1> %m, i32 zeroext %evl) {
-; CHECK-LABEL: define <4 x float> @vfmax_vv_v4f32(
-; CHECK-SAME: <4 x float> [[VA:%.*]], <4 x float> [[VB:%.*]], <4 x i1> [[M:%.*]], i32 zeroext [[EVL:%.*]]) {
-; CHECK-NEXT: [[V1:%.*]] = call <4 x float> @llvm.maxnum.v4f32(<4 x float> [[VA]], <4 x float> [[VB]])
-; CHECK-NEXT: ret <4 x float> [[V1]]
-;
- %v = call <4 x float> @llvm.vp.maxnum.v4f32(<4 x float> %va, <4 x float> %vb, <4 x i1> %m, i32 %evl)
- ret <4 x float> %v
-}
-
-declare <8 x float> @llvm.vp.maxnum.v8f32(<8 x float>, <8 x float>, <8 x i1>, i32)
-define <8 x float> @vfmax_vv_v8f32(<8 x float> %va, <8 x float> %vb, <8 x i1> %m, i32 zeroext %evl) {
-; CHECK-LABEL: define <8 x float> @vfmax_vv_v8f32(
-; CHECK-SAME: <8 x float> [[VA:%.*]], <8 x float> [[VB:%.*]], <8 x i1> [[M:%.*]], i32 zeroext [[EVL:%.*]]) {
-; CHECK-NEXT: [[V1:%.*]] = call <8 x float> @llvm.maxnum.v8f32(<8 x float> [[VA]], <8 x float> [[VB]])
-; CHECK-NEXT: ret <8 x float> [[V1]]
-;
- %v = call <8 x float> @llvm.vp.maxnum.v8f32(<8 x float> %va, <8 x float> %vb, <8 x i1> %m, i32 %evl)
- ret <8 x float> %v
-}
-
-declare <4 x float> @llvm.vp.minnum.v4f32(<4 x float>, <4 x float>, <4 x i1>, i32)
-define <4 x float> @vfmin_vv_v4f32(<4 x float> %va, <4 x float> %vb, <4 x i1> %m, i32 zeroext %evl) {
-; CHECK-LABEL: define <4 x float> @vfmin_vv_v4f32(
-; CHECK-SAME: <4 x float> [[VA:%.*]], <4 x float> [[VB:%.*]], <4 x i1> [[M:%.*]], i32 zeroext [[EVL:%.*]]) {
-; CHECK-NEXT: [[V1:%.*]] = call <4 x float> @llvm.minnum.v4f32(<4 x float> [[VA]], <4 x float> [[VB]])
-; CHECK-NEXT: ret <4 x float> [[V1]]
-;
- %v = call <4 x float> @llvm.vp.minnum.v4f32(<4 x float> %va, <4 x float> %vb, <4 x i1> %m, i32 %evl)
- ret <4 x float> %v
-}
-
-declare <8 x float> @llvm.vp.minnum.v8f32(<8 x float>, <8 x float>, <8 x i1>, i32)
-define <8 x float> @vfmin_vv_v8f32(<8 x float> %va, <8 x float> %vb, <8 x i1> %m, i32 zeroext %evl) {
-; CHECK-LABEL: define <8 x float> @vfmin_vv_v8f32(
-; CHECK-SAME: <8 x float> [[VA:%.*]], <8 x float> [[VB:%.*]], <8 x i1> [[M:%.*]], i32 zeroext [[EVL:%.*]]) {
-; CHECK-NEXT: [[V1:%.*]] = call <8 x float> @llvm.minnum.v8f32(<8 x float> [[VA]], <8 x float> [[VB]])
-; CHECK-NEXT: ret <8 x float> [[V1]]
-;
- %v = call <8 x float> @llvm.vp.minnum.v8f32(<8 x float> %va, <8 x float> %vb, <8 x i1> %m, i32 %evl)
- ret <8 x float> %v
-}
diff --git a/llvm/test/CodeGen/Generic/expand-vp-gather-scatter.ll b/llvm/test/CodeGen/Generic/expand-vp-gather-scatter.ll
deleted file mode 100644
index 2e2dba50c845..000000000000
--- a/llvm/test/CodeGen/Generic/expand-vp-gather-scatter.ll
+++ /dev/null
@@ -1,118 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt --expandvp -S < %s | FileCheck %s
-
-; Fixed vectors
-define <4 x i32> @vpgather_v4i32(<4 x ptr> %ptrs, <4 x i1> %m, i32 zeroext %evl) {
-; CHECK-LABEL: @vpgather_v4i32(
-; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[EVL:%.*]], i64 0
-; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
-; CHECK-NEXT: [[TMP1:%.*]] = icmp ult <4 x i32> <i32 0, i32 1, i32 2, i32 3>, [[DOTSPLAT]]
-; CHECK-NEXT: [[TMP2:%.*]] = and <4 x i1> [[TMP1]], [[M:%.*]]
-; CHECK-NEXT: [[V1:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> [[PTRS:%.*]], i32 4, <4 x i1> [[TMP2]], <4 x i32> poison)
-; CHECK-NEXT: ret <4 x i32> [[V1]]
-;
- %v = call <4 x i32> @llvm.vp.gather.v4i32.v4p0(<4 x ptr> %ptrs, <4 x i1> %m, i32 %evl)
- ret <4 x i32> %v
-}
-
-define <2 x i64> @vpgather_v2i64(<2 x ptr> %ptrs, <2 x i1> %m, i32 zeroext %evl) {
-; CHECK-LABEL: @vpgather_v2i64(
-; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[EVL:%.*]], i64 0
-; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <2 x i32> [[DOTSPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer
-; CHECK-NEXT: [[TMP1:%.*]] = icmp ult <2 x i32> <i32 0, i32 1>, [[DOTSPLAT]]
-; CHECK-NEXT: [[TMP2:%.*]] = and <2 x i1> [[TMP1]], [[M:%.*]]
-; CHECK-NEXT: [[V1:%.*]] = call <2 x i64> @llvm.masked.gather.v2i64.v2p0(<2 x ptr> [[PTRS:%.*]], i32 8, <2 x i1> [[TMP2]], <2 x i64> poison)
-; CHECK-NEXT: ret <2 x i64> [[V1]]
-;
- %v = call <2 x i64> @llvm.vp.gather.v2i64.v2p0(<2 x ptr> %ptrs, <2 x i1> %m, i32 %evl)
- ret <2 x i64> %v
-}
-
-define void @vpscatter_v4i32(<4 x i32> %val, <4 x ptr> %ptrs, <4 x i1> %m, i32 zeroext %evl) {
-; CHECK-LABEL: @vpscatter_v4i32(
-; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[EVL:%.*]], i64 0
-; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
-; CHECK-NEXT: [[TMP1:%.*]] = icmp ult <4 x i32> <i32 0, i32 1, i32 2, i32 3>, [[DOTSPLAT]]
-; CHECK-NEXT: [[TMP2:%.*]] = and <4 x i1> [[TMP1]], [[M:%.*]]
-; CHECK-NEXT: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> [[VAL:%.*]], <4 x ptr> [[PTRS:%.*]], i32 4, <4 x i1> [[TMP2]])
-; CHECK-NEXT: ret void
-;
- call void @llvm.vp.scatter.v4i32.v4p0(<4 x i32> %val, <4 x ptr> %ptrs, <4 x i1> %m, i32 %evl)
- ret void
-}
-
-define void @vpscatter_v2i64(<2 x i64> %val, <2 x ptr> %ptrs, <2 x i1> %m, i32 zeroext %evl) {
-; CHECK-LABEL: @vpscatter_v2i64(
-; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[EVL:%.*]], i64 0
-; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <2 x i32> [[DOTSPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer
-; CHECK-NEXT: [[TMP1:%.*]] = icmp ult <2 x i32> <i32 0, i32 1>, [[DOTSPLAT]]
-; CHECK-NEXT: [[TMP2:%.*]] = and <2 x i1> [[TMP1]], [[M:%.*]]
-; CHECK-NEXT: call void @llvm.masked.scatter.v2i64.v2p0(<2 x i64> [[VAL:%.*]], <2 x ptr> [[PTRS:%.*]], i32 8, <2 x i1> [[TMP2]])
-; CHECK-NEXT: ret void
-;
- call void @llvm.vp.scatter.v2i64.v2p0(<2 x i64> %val, <2 x ptr> %ptrs, <2 x i1> %m, i32 %evl)
- ret void
-}
-
-; Scalable vectors
-define <vscale x 2 x i32> @vpgather_nxv2i32(<vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, i32 zeroext %evl) {
-; CHECK-LABEL: @vpgather_nxv2i32(
-; CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i32(i32 0, i32 [[EVL:%.*]])
-; CHECK-NEXT: [[TMP2:%.*]] = and <vscale x 2 x i1> [[TMP1]], [[M:%.*]]
-; CHECK-NEXT: [[VSCALE:%.*]] = call i32 @llvm.vscale.i32()
-; CHECK-NEXT: [[SCALABLE_SIZE:%.*]] = mul nuw i32 [[VSCALE]], 2
-; CHECK-NEXT: [[V1:%.*]] = call <vscale x 2 x i32> @llvm.masked.gather.nxv2i32.nxv2p0(<vscale x 2 x ptr> [[PTRS:%.*]], i32 4, <vscale x 2 x i1> [[TMP2]], <vscale x 2 x i32> poison)
-; CHECK-NEXT: ret <vscale x 2 x i32> [[V1]]
-;
- %v = call <vscale x 2 x i32> @llvm.vp.gather.nxv2i32.nxv2p0(<vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, i32 %evl)
- ret <vscale x 2 x i32> %v
-}
-
-define <vscale x 1 x i64> @vpgather_nxv1i64(<vscale x 1 x ptr> %ptrs, <vscale x 1 x i1> %m, i32 zeroext %evl) {
-; CHECK-LABEL: @vpgather_nxv1i64(
-; CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 1 x i1> @llvm.get.active.lane.mask.nxv1i1.i32(i32 0, i32 [[EVL:%.*]])
-; CHECK-NEXT: [[TMP2:%.*]] = and <vscale x 1 x i1> [[TMP1]], [[M:%.*]]
-; CHECK-NEXT: [[VSCALE:%.*]] = call i32 @llvm.vscale.i32()
-; CHECK-NEXT: [[SCALABLE_SIZE:%.*]] = mul nuw i32 [[VSCALE]], 1
-; CHECK-NEXT: [[V1:%.*]] = call <vscale x 1 x i64> @llvm.masked.gather.nxv1i64.nxv1p0(<vscale x 1 x ptr> [[PTRS:%.*]], i32 8, <vscale x 1 x i1> [[TMP2]], <vscale x 1 x i64> poison)
-; CHECK-NEXT: ret <vscale x 1 x i64> [[V1]]
-;
- %v = call <vscale x 1 x i64> @llvm.vp.gather.nxv1i64.nxv1p0(<vscale x 1 x ptr> %ptrs, <vscale x 1 x i1> %m, i32 %evl)
- ret <vscale x 1 x i64> %v
-}
-
-define void @vpscatter_nxv2i32(<vscale x 2 x i32> %val, <vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, i32 zeroext %evl) {
-; CHECK-LABEL: @vpscatter_nxv2i32(
-; CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i32(i32 0, i32 [[EVL:%.*]])
-; CHECK-NEXT: [[TMP2:%.*]] = and <vscale x 2 x i1> [[TMP1]], [[M:%.*]]
-; CHECK-NEXT: [[VSCALE:%.*]] = call i32 @llvm.vscale.i32()
-; CHECK-NEXT: [[SCALABLE_SIZE:%.*]] = mul nuw i32 [[VSCALE]], 2
-; CHECK-NEXT: call void @llvm.masked.scatter.nxv2i32.nxv2p0(<vscale x 2 x i32> [[VAL:%.*]], <vscale x 2 x ptr> [[PTRS:%.*]], i32 4, <vscale x 2 x i1> [[TMP2]])
-; CHECK-NEXT: ret void
-;
- call void @llvm.vp.scatter.nxv2i32.nxv2p0(<vscale x 2 x i32> %val, <vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, i32 %evl)
- ret void
-}
-
-define void @vpscatter_nxv1i64(<vscale x 1 x i64> %val, <vscale x 1 x ptr> %ptrs, <vscale x 1 x i1> %m, i32 zeroext %evl) {
-; CHECK-LABEL: @vpscatter_nxv1i64(
-; CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 1 x i1> @llvm.get.active.lane.mask.nxv1i1.i32(i32 0, i32 [[EVL:%.*]])
-; CHECK-NEXT: [[TMP2:%.*]] = and <vscale x 1 x i1> [[TMP1]], [[M:%.*]]
-; CHECK-NEXT: [[VSCALE:%.*]] = call i32 @llvm.vscale.i32()
-; CHECK-NEXT: [[SCALABLE_SIZE:%.*]] = mul nuw i32 [[VSCALE]], 1
-; CHECK-NEXT: call void @llvm.masked.scatter.nxv1i64.nxv1p0(<vscale x 1 x i64> [[VAL:%.*]], <vscale x 1 x ptr> [[PTRS:%.*]], i32 8, <vscale x 1 x i1> [[TMP2]])
-; CHECK-NEXT: ret void
-;
- call void @llvm.vp.scatter.nxv1i64.nxv1p0(<vscale x 1 x i64> %val, <vscale x 1 x ptr> %ptrs, <vscale x 1 x i1> %m, i32 %evl)
- ret void
-}
-
-declare <4 x i32> @llvm.vp.gather.v4i32.v4p0(<4 x ptr>, <4 x i1>, i32)
-declare <2 x i64> @llvm.vp.gather.v2i64.v2p0(<2 x ptr>, <2 x i1>, i32)
-declare void @llvm.vp.scatter.v4i32.v4p0(<4 x i32>, <4 x ptr>, <4 x i1>, i32)
-declare void @llvm.vp.scatter.v2i64.v2p0(<2 x i64>, <2 x ptr>, <2 x i1>, i32)
-
-declare <vscale x 2 x i32> @llvm.vp.gather.nxv2i32.nxv2p0(<vscale x 2 x ptr>, <vscale x 2 x i1>, i32)
-declare <vscale x 1 x i64> @llvm.vp.gather.nxv1i64.nxv1p0(<vscale x 1 x ptr>, <vscale x 1 x i1>, i32)
-declare void @llvm.vp.scatter.nxv2i32.nxv2p0(<vscale x 2 x i32>, <vscale x 2 x ptr>, <vscale x 2 x i1>, i32)
-declare void @llvm.vp.scatter.nxv1i64.nxv1p0(<vscale x 1 x i64>, <vscale x 1 x ptr>, <vscale x 1 x i1>, i32)
diff --git a/llvm/test/CodeGen/Generic/expand-vp-load-store.ll b/llvm/test/CodeGen/Generic/expand-vp-load-store.ll
deleted file mode 100644
index 5c6f1e858ce7..000000000000
--- a/llvm/test/CodeGen/Generic/expand-vp-load-store.ll
+++ /dev/null
@@ -1,205 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt --expandvp -S < %s | FileCheck %s
-; RUN: opt --expandvp --expandvp-override-evl-transform=Legal --expandvp-override-mask-transform=Convert -S < %s | FileCheck %s
-
-; Fixed vectors
-define <2 x i64> @vpload_v2i64(ptr %ptr, <2 x i1> %m, i32 zeroext %evl) {
-; CHECK-LABEL: @vpload_v2i64(
-; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[EVL:%.*]], i64 0
-; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <2 x i32> [[DOTSPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer
-; CHECK-NEXT: [[TMP1:%.*]] = icmp ult <2 x i32> <i32 0, i32 1>, [[DOTSPLAT]]
-; CHECK-NEXT: [[TMP2:%.*]] = and <2 x i1> [[TMP1]], [[M:%.*]]
-; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i64> @llvm.masked.load.v2i64.p0(ptr [[PTR:%.*]], i32 1, <2 x i1> [[TMP2]], <2 x i64> poison)
-; CHECK-NEXT: ret <2 x i64> [[TMP3]]
-;
- %load = call <2 x i64> @llvm.vp.load.v2i64.p0(ptr %ptr, <2 x i1> %m, i32 %evl)
- ret <2 x i64> %load
-}
-
-define <2 x i64> @vpload_v2i64_vlmax(ptr %ptr, <2 x i1> %m) {
-; CHECK-LABEL: @vpload_v2i64_vlmax(
-; CHECK-NEXT: [[TMP1:%.*]] = call <2 x i64> @llvm.masked.load.v2i64.p0(ptr [[PTR:%.*]], i32 1, <2 x i1> [[M:%.*]], <2 x i64> poison)
-; CHECK-NEXT: ret <2 x i64> [[TMP1]]
-;
- %load = call <2 x i64> @llvm.vp.load.v2i64.p0(ptr %ptr, <2 x i1> %m, i32 2)
- ret <2 x i64> %load
-}
-
-define <2 x i64> @vpload_v2i64_allones_mask(ptr %ptr, i32 zeroext %evl) {
-; CHECK-LABEL: @vpload_v2i64_allones_mask(
-; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[EVL:%.*]], i64 0
-; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <2 x i32> [[DOTSPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer
-; CHECK-NEXT: [[TMP1:%.*]] = icmp ult <2 x i32> <i32 0, i32 1>, [[DOTSPLAT]]
-; CHECK-NEXT: [[TMP2:%.*]] = and <2 x i1> [[TMP1]], <i1 true, i1 true>
-; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i64> @llvm.masked.load.v2i64.p0(ptr [[PTR:%.*]], i32 1, <2 x i1> [[TMP2]], <2 x i64> poison)
-; CHECK-NEXT: ret <2 x i64> [[TMP3]]
-;
- %load = call <2 x i64> @llvm.vp.load.v2i64.p0(ptr %ptr, <2 x i1> <i1 1, i1 1>, i32 %evl)
- ret <2 x i64> %load
-}
-
-define <2 x i64> @vpload_v2i64_allones_mask_vlmax(ptr %ptr) {
-; CHECK-LABEL: @vpload_v2i64_allones_mask_vlmax(
-; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr [[PTR:%.*]], align 16
-; CHECK-NEXT: ret <2 x i64> [[TMP1]]
-;
- %load = call <2 x i64> @llvm.vp.load.v2i64.p0(ptr %ptr, <2 x i1> <i1 1, i1 1>, i32 2)
- ret <2 x i64> %load
-}
-
-define void @vpstore_v2i64(<2 x i64> %val, ptr %ptr, <2 x i1> %m, i32 zeroext %evl) {
-; CHECK-LABEL: @vpstore_v2i64(
-; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[EVL:%.*]], i64 0
-; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <2 x i32> [[DOTSPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer
-; CHECK-NEXT: [[TMP1:%.*]] = icmp ult <2 x i32> <i32 0, i32 1>, [[DOTSPLAT]]
-; CHECK-NEXT: [[TMP2:%.*]] = and <2 x i1> [[TMP1]], [[M:%.*]]
-; CHECK-NEXT: call void @llvm.masked.store.v2i64.p0(<2 x i64> [[VAL:%.*]], ptr [[PTR:%.*]], i32 1, <2 x i1> [[TMP2]])
-; CHECK-NEXT: ret void
-;
- call void @llvm.vp.store.v2i64.p0(<2 x i64> %val, ptr %ptr, <2 x i1> %m, i32 %evl)
- ret void
-}
-
-define void @vpstore_v2i64_vlmax(<2 x i64> %val, ptr %ptr, <2 x i1> %m) {
-; CHECK-LABEL: @vpstore_v2i64_vlmax(
-; CHECK-NEXT: call void @llvm.masked.store.v2i64.p0(<2 x i64> [[VAL:%.*]], ptr [[PTR:%.*]], i32 1, <2 x i1> [[M:%.*]])
-; CHECK-NEXT: ret void
-;
- call void @llvm.vp.store.v2i64.p0(<2 x i64> %val, ptr %ptr, <2 x i1> %m, i32 2)
- ret void
-}
-
-define void @vpstore_v2i64_allones_mask(<2 x i64> %val, ptr %ptr, i32 zeroext %evl) {
-; CHECK-LABEL: @vpstore_v2i64_allones_mask(
-; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[EVL:%.*]], i64 0
-; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <2 x i32> [[DOTSPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer
-; CHECK-NEXT: [[TMP1:%.*]] = icmp ult <2 x i32> <i32 0, i32 1>, [[DOTSPLAT]]
-; CHECK-NEXT: [[TMP2:%.*]] = and <2 x i1> [[TMP1]], <i1 true, i1 true>
-; CHECK-NEXT: call void @llvm.masked.store.v2i64.p0(<2 x i64> [[VAL:%.*]], ptr [[PTR:%.*]], i32 1, <2 x i1> [[TMP2]])
-; CHECK-NEXT: ret void
-;
- call void @llvm.vp.store.v2i64.p0(<2 x i64> %val, ptr %ptr, <2 x i1> <i1 1, i1 1>, i32 %evl)
- ret void
-}
-
-define void @vpstore_v2i64_allones_mask_vlmax(<2 x i64> %val, ptr %ptr) {
-; CHECK-LABEL: @vpstore_v2i64_allones_mask_vlmax(
-; CHECK-NEXT: store <2 x i64> [[VAL:%.*]], ptr [[PTR:%.*]], align 16
-; CHECK-NEXT: ret void
-;
- call void @llvm.vp.store.v2i64.p0(<2 x i64> %val, ptr %ptr, <2 x i1> <i1 1, i1 1>, i32 2)
- ret void
-}
-
-; Scalable vectors
-define <vscale x 1 x i64> @vpload_nxv1i64(ptr %ptr, <vscale x 1 x i1> %m, i32 zeroext %evl) {
-; CHECK-LABEL: @vpload_nxv1i64(
-; CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 1 x i1> @llvm.get.active.lane.mask.nxv1i1.i32(i32 0, i32 [[EVL:%.*]])
-; CHECK-NEXT: [[TMP2:%.*]] = and <vscale x 1 x i1> [[TMP1]], [[M:%.*]]
-; CHECK-NEXT: [[VSCALE:%.*]] = call i32 @llvm.vscale.i32()
-; CHECK-NEXT: [[SCALABLE_SIZE:%.*]] = mul nuw i32 [[VSCALE]], 1
-; CHECK-NEXT: [[TMP3:%.*]] = call <vscale x 1 x i64> @llvm.masked.load.nxv1i64.p0(ptr [[PTR:%.*]], i32 1, <vscale x 1 x i1> [[TMP2]], <vscale x 1 x i64> poison)
-; CHECK-NEXT: ret <vscale x 1 x i64> [[TMP3]]
-;
- %load = call <vscale x 1 x i64> @llvm.vp.load.nxv1i64.p0(ptr %ptr, <vscale x 1 x i1> %m, i32 %evl)
- ret <vscale x 1 x i64> %load
-}
-
-define <vscale x 1 x i64> @vpload_nxv1i64_vscale(ptr %ptr, <vscale x 1 x i1> %m) {
-; CHECK-LABEL: @vpload_nxv1i64_vscale(
-; CHECK-NEXT: [[VSCALE:%.*]] = call i32 @llvm.vscale.i32()
-; CHECK-NEXT: [[VLMAX:%.*]] = mul nuw i32 [[VSCALE]], 1
-; CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 1 x i64> @llvm.masked.load.nxv1i64.p0(ptr [[PTR:%.*]], i32 1, <vscale x 1 x i1> [[M:%.*]], <vscale x 1 x i64> poison)
-; CHECK-NEXT: ret <vscale x 1 x i64> [[TMP1]]
-;
- %vscale = call i32 @llvm.vscale.i32()
- %vlmax = mul nuw i32 %vscale, 1
- %load = call <vscale x 1 x i64> @llvm.vp.load.nxv1i64.p0(ptr %ptr, <vscale x 1 x i1> %m, i32 %vlmax)
- ret <vscale x 1 x i64> %load
-}
-
-define <vscale x 1 x i64> @vpload_nxv1i64_allones_mask(ptr %ptr, i32 zeroext %evl) {
-; CHECK-LABEL: @vpload_nxv1i64_allones_mask(
-; CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 1 x i1> @llvm.get.active.lane.mask.nxv1i1.i32(i32 0, i32 [[EVL:%.*]])
-; CHECK-NEXT: [[TMP2:%.*]] = and <vscale x 1 x i1> [[TMP1]], shufflevector (<vscale x 1 x i1> insertelement (<vscale x 1 x i1> poison, i1 true, i64 0), <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer)
-; CHECK-NEXT: [[VSCALE:%.*]] = call i32 @llvm.vscale.i32()
-; CHECK-NEXT: [[SCALABLE_SIZE:%.*]] = mul nuw i32 [[VSCALE]], 1
-; CHECK-NEXT: [[TMP3:%.*]] = call <vscale x 1 x i64> @llvm.masked.load.nxv1i64.p0(ptr [[PTR:%.*]], i32 1, <vscale x 1 x i1> [[TMP2]], <vscale x 1 x i64> poison)
-; CHECK-NEXT: ret <vscale x 1 x i64> [[TMP3]]
-;
- %load = call <vscale x 1 x i64> @llvm.vp.load.nxv1i64.p0(ptr %ptr, <vscale x 1 x i1> splat (i1 true), i32 %evl)
- ret <vscale x 1 x i64> %load
-}
-
-define <vscale x 1 x i64> @vpload_nxv1i64_allones_mask_vscale(ptr %ptr) {
-; CHECK-LABEL: @vpload_nxv1i64_allones_mask_vscale(
-; CHECK-NEXT: [[VSCALE:%.*]] = call i32 @llvm.vscale.i32()
-; CHECK-NEXT: [[VLMAX:%.*]] = mul nuw i32 [[VSCALE]], 1
-; CHECK-NEXT: [[TMP1:%.*]] = load <vscale x 1 x i64>, ptr [[PTR:%.*]], align 8
-; CHECK-NEXT: ret <vscale x 1 x i64> [[TMP1]]
-;
- %vscale = call i32 @llvm.vscale.i32()
- %vlmax = mul nuw i32 %vscale, 1
- %load = call <vscale x 1 x i64> @llvm.vp.load.nxv1i64.p0(ptr %ptr, <vscale x 1 x i1> splat (i1 true), i32 %vlmax)
- ret <vscale x 1 x i64> %load
-}
-
-define void @vpstore_nxv1i64(<vscale x 1 x i64> %val, ptr %ptr, <vscale x 1 x i1> %m, i32 zeroext %evl) {
-; CHECK-LABEL: @vpstore_nxv1i64(
-; CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 1 x i1> @llvm.get.active.lane.mask.nxv1i1.i32(i32 0, i32 [[EVL:%.*]])
-; CHECK-NEXT: [[TMP2:%.*]] = and <vscale x 1 x i1> [[TMP1]], [[M:%.*]]
-; CHECK-NEXT: [[VSCALE:%.*]] = call i32 @llvm.vscale.i32()
-; CHECK-NEXT: [[SCALABLE_SIZE:%.*]] = mul nuw i32 [[VSCALE]], 1
-; CHECK-NEXT: call void @llvm.masked.store.nxv1i64.p0(<vscale x 1 x i64> [[VAL:%.*]], ptr [[PTR:%.*]], i32 1, <vscale x 1 x i1> [[TMP2]])
-; CHECK-NEXT: ret void
-;
- call void @llvm.vp.store.nxv1i64.p0(<vscale x 1 x i64> %val, ptr %ptr, <vscale x 1 x i1> %m, i32 %evl)
- ret void
-}
-
-define void @vpstore_nxv1i64_vscale(<vscale x 1 x i64> %val, ptr %ptr, <vscale x 1 x i1> %m, i32 zeroext %evl) {
-; CHECK-LABEL: @vpstore_nxv1i64_vscale(
-; CHECK-NEXT: [[VSCALE:%.*]] = call i32 @llvm.vscale.i32()
-; CHECK-NEXT: [[VLMAX:%.*]] = mul nuw i32 [[VSCALE]], 1
-; CHECK-NEXT: call void @llvm.masked.store.nxv1i64.p0(<vscale x 1 x i64> [[VAL:%.*]], ptr [[PTR:%.*]], i32 1, <vscale x 1 x i1> [[M:%.*]])
-; CHECK-NEXT: ret void
-;
- %vscale = call i32 @llvm.vscale.i32()
- %vlmax = mul nuw i32 %vscale, 1
- call void @llvm.vp.store.nxv1i64.p0(<vscale x 1 x i64> %val, ptr %ptr, <vscale x 1 x i1> %m, i32 %vlmax)
- ret void
-}
-
-define void @vpstore_nxv1i64_allones_mask(<vscale x 1 x i64> %val, ptr %ptr, i32 zeroext %evl) {
-; CHECK-LABEL: @vpstore_nxv1i64_allones_mask(
-; CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 1 x i1> @llvm.get.active.lane.mask.nxv1i1.i32(i32 0, i32 [[EVL:%.*]])
-; CHECK-NEXT: [[TMP2:%.*]] = and <vscale x 1 x i1> [[TMP1]], shufflevector (<vscale x 1 x i1> insertelement (<vscale x 1 x i1> poison, i1 true, i64 0), <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer)
-; CHECK-NEXT: [[VSCALE:%.*]] = call i32 @llvm.vscale.i32()
-; CHECK-NEXT: [[SCALABLE_SIZE:%.*]] = mul nuw i32 [[VSCALE]], 1
-; CHECK-NEXT: call void @llvm.masked.store.nxv1i64.p0(<vscale x 1 x i64> [[VAL:%.*]], ptr [[PTR:%.*]], i32 1, <vscale x 1 x i1> [[TMP2]])
-; CHECK-NEXT: ret void
-;
- call void @llvm.vp.store.nxv1i64.p0(<vscale x 1 x i64> %val, ptr %ptr, <vscale x 1 x i1> splat (i1 true), i32 %evl)
- ret void
-}
-
-define void @vpstore_nxv1i64_allones_mask_vscale(<vscale x 1 x i64> %val, ptr %ptr) {
-; CHECK-LABEL: @vpstore_nxv1i64_allones_mask_vscale(
-; CHECK-NEXT: [[VSCALE:%.*]] = call i32 @llvm.vscale.i32()
-; CHECK-NEXT: [[VLMAX:%.*]] = mul nuw i32 [[VSCALE]], 1
-; CHECK-NEXT: store <vscale x 1 x i64> [[VAL:%.*]], ptr [[PTR:%.*]], align 8
-; CHECK-NEXT: ret void
-;
- %vscale = call i32 @llvm.vscale.i32()
- %vlmax = mul nuw i32 %vscale, 1
- call void @llvm.vp.store.nxv1i64.p0(<vscale x 1 x i64> %val, ptr %ptr, <vscale x 1 x i1> splat (i1 true), i32 %vlmax)
- ret void
-}
-
-declare i32 @llvm.vscale.i32()
-
-declare <2 x i64> @llvm.vp.load.v2i64.p0(ptr, <2 x i1>, i32)
-declare void @llvm.vp.store.v2i64.p0(<2 x i64>, ptr, <2 x i1>, i32)
-
-declare <vscale x 1 x i64> @llvm.vp.load.nxv1i64.p0(ptr, <vscale x 1 x i1>, i32)
-declare void @llvm.vp.store.nxv1i64.p0(<vscale x 1 x i64>, ptr, <vscale x 1 x i1>, i32)
diff --git a/llvm/test/CodeGen/Generic/expand-vp.ll b/llvm/test/CodeGen/Generic/expand-vp.ll
deleted file mode 100644
index 4fee9a533b94..000000000000
--- a/llvm/test/CodeGen/Generic/expand-vp.ll
+++ /dev/null
@@ -1,567 +0,0 @@
-; Partial expansion cases (still VP with parameter expansions).
-; RUN: opt --expandvp --expandvp-override-evl-transform=Legal --expandvp-override-mask-transform=Legal -S < %s | FileCheck %s --check-prefix=LEGAL_LEGAL
-; RUN: opt --expandvp --expandvp-override-evl-transform=Discard --expandvp-override-mask-transform=Legal -S < %s | FileCheck %s --check-prefix=DISCARD_LEGAL
-; RUN: opt --expandvp --expandvp-override-evl-transform=Convert --expandvp-override-mask-transform=Legal -S < %s | FileCheck %s --check-prefix=CONVERT_LEGAL
-; Full expansion cases (all expanded to non-VP).
-; RUN: opt --expandvp --expandvp-override-evl-transform=Discard --expandvp-override-mask-transform=Convert -S < %s | FileCheck %s --check-prefix=ALL-CONVERT
-; RUN: opt --expandvp -S < %s | FileCheck %s --check-prefix=ALL-CONVERT
-; RUN: opt --expandvp --expandvp-override-evl-transform=Legal --expandvp-override-mask-transform=Convert -S < %s | FileCheck %s --check-prefix=ALL-CONVERT
-; RUN: opt --expandvp --expandvp-override-evl-transform=Convert --expandvp-override-mask-transform=Convert -S < %s | FileCheck %s --check-prefix=ALL-CONVERT
-
-
-; Fixed-width vectors
-; Integer arith
-declare <8 x i32> @llvm.vp.add.v8i32(<8 x i32>, <8 x i32>, <8 x i1>, i32)
-declare <8 x i32> @llvm.vp.sub.v8i32(<8 x i32>, <8 x i32>, <8 x i1>, i32)
-declare <8 x i32> @llvm.vp.mul.v8i32(<8 x i32>, <8 x i32>, <8 x i1>, i32)
-declare <8 x i32> @llvm.vp.sdiv.v8i32(<8 x i32>, <8 x i32>, <8 x i1>, i32)
-declare <8 x i32> @llvm.vp.srem.v8i32(<8 x i32>, <8 x i32>, <8 x i1>, i32)
-declare <8 x i32> @llvm.vp.udiv.v8i32(<8 x i32>, <8 x i32>, <8 x i1>, i32)
-declare <8 x i32> @llvm.vp.urem.v8i32(<8 x i32>, <8 x i32>, <8 x i1>, i32)
-declare <8 x i32> @llvm.vp.smax.v8i32(<8 x i32>, <8 x i32>, <8 x i1>, i32)
-declare <8 x i32> @llvm.vp.smin.v8i32(<8 x i32>, <8 x i32>, <8 x i1>, i32)
-declare <8 x i32> @llvm.vp.umax.v8i32(<8 x i32>, <8 x i32>, <8 x i1>, i32)
-declare <8 x i32> @llvm.vp.umin.v8i32(<8 x i32>, <8 x i32>, <8 x i1>, i32)
-; Bit arith
-declare <8 x i32> @llvm.vp.and.v8i32(<8 x i32>, <8 x i32>, <8 x i1>, i32)
-declare <8 x i32> @llvm.vp.xor.v8i32(<8 x i32>, <8 x i32>, <8 x i1>, i32)
-declare <8 x i32> @llvm.vp.or.v8i32(<8 x i32>, <8 x i32>, <8 x i1>, i32)
-declare <8 x i32> @llvm.vp.ashr.v8i32(<8 x i32>, <8 x i32>, <8 x i1>, i32)
-declare <8 x i32> @llvm.vp.lshr.v8i32(<8 x i32>, <8 x i32>, <8 x i1>, i32)
-declare <8 x i32> @llvm.vp.shl.v8i32(<8 x i32>, <8 x i32>, <8 x i1>, i32)
-; Reductions
-declare i32 @llvm.vp.reduce.add.v4i32(i32, <4 x i32>, <4 x i1>, i32)
-declare i32 @llvm.vp.reduce.mul.v4i32(i32, <4 x i32>, <4 x i1>, i32)
-declare i32 @llvm.vp.reduce.and.v4i32(i32, <4 x i32>, <4 x i1>, i32)
-declare i32 @llvm.vp.reduce.or.v4i32(i32, <4 x i32>, <4 x i1>, i32)
-declare i32 @llvm.vp.reduce.xor.v4i32(i32, <4 x i32>, <4 x i1>, i32)
-declare i32 @llvm.vp.reduce.smin.v4i32(i32, <4 x i32>, <4 x i1>, i32)
-declare i32 @llvm.vp.reduce.smax.v4i32(i32, <4 x i32>, <4 x i1>, i32)
-declare i32 @llvm.vp.reduce.umin.v4i32(i32, <4 x i32>, <4 x i1>, i32)
-declare i32 @llvm.vp.reduce.umax.v4i32(i32, <4 x i32>, <4 x i1>, i32)
-declare float @llvm.vp.reduce.fmin.v4f32(float, <4 x float>, <4 x i1>, i32)
-declare float @llvm.vp.reduce.fmax.v4f32(float, <4 x float>, <4 x i1>, i32)
-declare float @llvm.vp.reduce.fminimum.v4f32(float, <4 x float>, <4 x i1>, i32)
-declare float @llvm.vp.reduce.fmaximum.v4f32(float, <4 x float>, <4 x i1>, i32)
-declare float @llvm.vp.reduce.fadd.v4f32(float, <4 x float>, <4 x i1>, i32)
-declare float @llvm.vp.reduce.fmul.v4f32(float, <4 x float>, <4 x i1>, i32)
-; Comparisons
-declare <8 x i1> @llvm.vp.icmp.v8i32(<8 x i32>, <8 x i32>, metadata, <8 x i1>, i32)
-declare <8 x i1> @llvm.vp.fcmp.v8f32(<8 x float>, <8 x float>, metadata, <8 x i1>, i32)
-
-; Fixed vector test function.
-define void @test_vp_int_v8(<8 x i32> %i0, <8 x i32> %i1, <8 x i32> %i2, <8 x i32> %f3, <8 x i1> %m, i32 %n) {
- %r0 = call <8 x i32> @llvm.vp.add.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 %n)
- %r1 = call <8 x i32> @llvm.vp.sub.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 %n)
- %r2 = call <8 x i32> @llvm.vp.mul.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 %n)
- %r3 = call <8 x i32> @llvm.vp.sdiv.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 %n)
- %r4 = call <8 x i32> @llvm.vp.srem.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 %n)
- %r5 = call <8 x i32> @llvm.vp.udiv.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 %n)
- %r6 = call <8 x i32> @llvm.vp.urem.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 %n)
- %r7 = call <8 x i32> @llvm.vp.smax.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 %n)
- %r8 = call <8 x i32> @llvm.vp.smin.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 %n)
- %r9 = call <8 x i32> @llvm.vp.umax.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 %n)
- %rA = call <8 x i32> @llvm.vp.umin.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 %n)
- %rB = call <8 x i32> @llvm.vp.and.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 %n)
- %rC = call <8 x i32> @llvm.vp.or.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 %n)
- %rD = call <8 x i32> @llvm.vp.xor.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 %n)
- %rE = call <8 x i32> @llvm.vp.ashr.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 %n)
- %rF = call <8 x i32> @llvm.vp.lshr.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 %n)
- %r10 = call <8 x i32> @llvm.vp.shl.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 %n)
- ret void
-}
-
-; Scalable-width vectors
-; Integer arith
-declare <vscale x 4 x i32> @llvm.vp.add.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i1>, i32)
-declare <vscale x 4 x i32> @llvm.vp.sub.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i1>, i32)
-declare <vscale x 4 x i32> @llvm.vp.mul.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i1>, i32)
-declare <vscale x 4 x i32> @llvm.vp.sdiv.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i1>, i32)
-declare <vscale x 4 x i32> @llvm.vp.srem.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i1>, i32)
-declare <vscale x 4 x i32> @llvm.vp.udiv.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i1>, i32)
-declare <vscale x 4 x i32> @llvm.vp.urem.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i1>, i32)
-declare <vscale x 4 x i32> @llvm.vp.smax.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i1>, i32)
-declare <vscale x 4 x i32> @llvm.vp.smin.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i1>, i32)
-declare <vscale x 4 x i32> @llvm.vp.umax.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i1>, i32)
-declare <vscale x 4 x i32> @llvm.vp.umin.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i1>, i32)
-; Bit arith
-declare <vscale x 4 x i32> @llvm.vp.and.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i1>, i32)
-declare <vscale x 4 x i32> @llvm.vp.xor.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i1>, i32)
-declare <vscale x 4 x i32> @llvm.vp.or.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i1>, i32)
-declare <vscale x 4 x i32> @llvm.vp.ashr.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i1>, i32)
-declare <vscale x 4 x i32> @llvm.vp.lshr.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i1>, i32)
-declare <vscale x 4 x i32> @llvm.vp.shl.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i1>, i32)
-
-; Scalable vector test function.
-define void @test_vp_int_vscale(<vscale x 4 x i32> %i0, <vscale x 4 x i32> %i1, <vscale x 4 x i32> %i2, <vscale x 4 x i32> %f3, <vscale x 4 x i1> %m, i32 %n) {
- %r0 = call <vscale x 4 x i32> @llvm.vp.add.nxv4i32(<vscale x 4 x i32> %i0, <vscale x 4 x i32> %i1, <vscale x 4 x i1> %m, i32 %n)
- %r1 = call <vscale x 4 x i32> @llvm.vp.sub.nxv4i32(<vscale x 4 x i32> %i0, <vscale x 4 x i32> %i1, <vscale x 4 x i1> %m, i32 %n)
- %r2 = call <vscale x 4 x i32> @llvm.vp.mul.nxv4i32(<vscale x 4 x i32> %i0, <vscale x 4 x i32> %i1, <vscale x 4 x i1> %m, i32 %n)
- %r3 = call <vscale x 4 x i32> @llvm.vp.sdiv.nxv4i32(<vscale x 4 x i32> %i0, <vscale x 4 x i32> %i1, <vscale x 4 x i1> %m, i32 %n)
- %r4 = call <vscale x 4 x i32> @llvm.vp.srem.nxv4i32(<vscale x 4 x i32> %i0, <vscale x 4 x i32> %i1, <vscale x 4 x i1> %m, i32 %n)
- %r5 = call <vscale x 4 x i32> @llvm.vp.udiv.nxv4i32(<vscale x 4 x i32> %i0, <vscale x 4 x i32> %i1, <vscale x 4 x i1> %m, i32 %n)
- %r6 = call <vscale x 4 x i32> @llvm.vp.urem.nxv4i32(<vscale x 4 x i32> %i0, <vscale x 4 x i32> %i1, <vscale x 4 x i1> %m, i32 %n)
- %r7 = call <vscale x 4 x i32> @llvm.vp.smax.nxv4i32(<vscale x 4 x i32> %i0, <vscale x 4 x i32> %i1, <vscale x 4 x i1> %m, i32 %n)
- %r8 = call <vscale x 4 x i32> @llvm.vp.smin.nxv4i32(<vscale x 4 x i32> %i0, <vscale x 4 x i32> %i1, <vscale x 4 x i1> %m, i32 %n)
- %r9 = call <vscale x 4 x i32> @llvm.vp.umax.nxv4i32(<vscale x 4 x i32> %i0, <vscale x 4 x i32> %i1, <vscale x 4 x i1> %m, i32 %n)
- %rA = call <vscale x 4 x i32> @llvm.vp.umin.nxv4i32(<vscale x 4 x i32> %i0, <vscale x 4 x i32> %i1, <vscale x 4 x i1> %m, i32 %n)
- %rB = call <vscale x 4 x i32> @llvm.vp.and.nxv4i32(<vscale x 4 x i32> %i0, <vscale x 4 x i32> %i1, <vscale x 4 x i1> %m, i32 %n)
- %rC = call <vscale x 4 x i32> @llvm.vp.or.nxv4i32(<vscale x 4 x i32> %i0, <vscale x 4 x i32> %i1, <vscale x 4 x i1> %m, i32 %n)
- %rD = call <vscale x 4 x i32> @llvm.vp.xor.nxv4i32(<vscale x 4 x i32> %i0, <vscale x 4 x i32> %i1, <vscale x 4 x i1> %m, i32 %n)
- %rE = call <vscale x 4 x i32> @llvm.vp.ashr.nxv4i32(<vscale x 4 x i32> %i0, <vscale x 4 x i32> %i1, <vscale x 4 x i1> %m, i32 %n)
- %rF = call <vscale x 4 x i32> @llvm.vp.lshr.nxv4i32(<vscale x 4 x i32> %i0, <vscale x 4 x i32> %i1, <vscale x 4 x i1> %m, i32 %n)
- %r10 = call <vscale x 4 x i32> @llvm.vp.shl.nxv4i32(<vscale x 4 x i32> %i0, <vscale x 4 x i32> %i1, <vscale x 4 x i1> %m, i32 %n)
- ret void
-}
-
-; Fixed vector reduce test function.
-define void @test_vp_reduce_int_v4(i32 %start, <4 x i32> %vi, <4 x i1> %m, i32 %n) {
- %r0 = call i32 @llvm.vp.reduce.add.v4i32(i32 %start, <4 x i32> %vi, <4 x i1> %m, i32 %n)
- %r1 = call i32 @llvm.vp.reduce.mul.v4i32(i32 %start, <4 x i32> %vi, <4 x i1> %m, i32 %n)
- %r2 = call i32 @llvm.vp.reduce.and.v4i32(i32 %start, <4 x i32> %vi, <4 x i1> %m, i32 %n)
- %r3 = call i32 @llvm.vp.reduce.or.v4i32(i32 %start, <4 x i32> %vi, <4 x i1> %m, i32 %n)
- %r4 = call i32 @llvm.vp.reduce.xor.v4i32(i32 %start, <4 x i32> %vi, <4 x i1> %m, i32 %n)
- %r5 = call i32 @llvm.vp.reduce.smin.v4i32(i32 %start, <4 x i32> %vi, <4 x i1> %m, i32 %n)
- %r6 = call i32 @llvm.vp.reduce.smax.v4i32(i32 %start, <4 x i32> %vi, <4 x i1> %m, i32 %n)
- %r7 = call i32 @llvm.vp.reduce.umin.v4i32(i32 %start, <4 x i32> %vi, <4 x i1> %m, i32 %n)
- %r8 = call i32 @llvm.vp.reduce.umax.v4i32(i32 %start, <4 x i32> %vi, <4 x i1> %m, i32 %n)
- ret void
-}
-
-define void @test_vp_reduce_fp_v4(float %f, <4 x float> %vf, <4 x i1> %m, i32 %n) {
- %r0 = call float @llvm.vp.reduce.fmin.v4f32(float %f, <4 x float> %vf, <4 x i1> %m, i32 %n)
- %r1 = call nnan float @llvm.vp.reduce.fmin.v4f32(float %f, <4 x float> %vf, <4 x i1> %m, i32 %n)
- %r2 = call nnan ninf float @llvm.vp.reduce.fmin.v4f32(float %f, <4 x float> %vf, <4 x i1> %m, i32 %n)
- %r3 = call float @llvm.vp.reduce.fmax.v4f32(float %f, <4 x float> %vf, <4 x i1> %m, i32 %n)
- %r4 = call nnan float @llvm.vp.reduce.fmax.v4f32(float %f, <4 x float> %vf, <4 x i1> %m, i32 %n)
- %r5 = call nnan ninf float @llvm.vp.reduce.fmax.v4f32(float %f, <4 x float> %vf, <4 x i1> %m, i32 %n)
- %r6 = call float @llvm.vp.reduce.fminimum.v4f32(float %f, <4 x float> %vf, <4 x i1> %m, i32 %n)
- %r7 = call nnan float @llvm.vp.reduce.fminimum.v4f32(float %f, <4 x float> %vf, <4 x i1> %m, i32 %n)
- %r8 = call nnan ninf float @llvm.vp.reduce.fminimum.v4f32(float %f, <4 x float> %vf, <4 x i1> %m, i32 %n)
- %r9 = call float @llvm.vp.reduce.fmaximum.v4f32(float %f, <4 x float> %vf, <4 x i1> %m, i32 %n)
- %r10 = call nnan float @llvm.vp.reduce.fmaximum.v4f32(float %f, <4 x float> %vf, <4 x i1> %m, i32 %n)
- %r11 = call nnan ninf float @llvm.vp.reduce.fmaximum.v4f32(float %f, <4 x float> %vf, <4 x i1> %m, i32 %n)
- %r12 = call float @llvm.vp.reduce.fadd.v4f32(float %f, <4 x float> %vf, <4 x i1> %m, i32 %n)
- %r13 = call reassoc float @llvm.vp.reduce.fadd.v4f32(float %f, <4 x float> %vf, <4 x i1> %m, i32 %n)
- %r14 = call float @llvm.vp.reduce.fmul.v4f32(float %f, <4 x float> %vf, <4 x i1> %m, i32 %n)
- %r15 = call reassoc float @llvm.vp.reduce.fmul.v4f32(float %f, <4 x float> %vf, <4 x i1> %m, i32 %n)
- ret void
-}
-
-define void @test_vp_cmp_v8(<8 x i32> %i0, <8 x i32> %i1, <8 x float> %f0, <8 x float> %f1, <8 x i1> %m, i32 %n) {
- %r0 = call <8 x i1> @llvm.vp.icmp.v8i32(<8 x i32> %i0, <8 x i32> %i1, metadata !"eq", <8 x i1> %m, i32 %n)
- %r1 = call <8 x i1> @llvm.vp.icmp.v8i32(<8 x i32> %i0, <8 x i32> %i1, metadata !"slt", <8 x i1> %m, i32 %n)
- %r2 = call <8 x i1> @llvm.vp.fcmp.v8f32(<8 x float> %f0, <8 x float> %f1, metadata !"oeq", <8 x i1> %m, i32 %n)
- %r3 = call <8 x i1> @llvm.vp.fcmp.v8f32(<8 x float> %f0, <8 x float> %f1, metadata !"ult", <8 x i1> %m, i32 %n)
- ret void
-}
-
-; All VP intrinsics have to be lowered into non-VP ops
-; Convert %evl into %mask for non-speculatable VP intrinsics and emit the
-; instruction+select idiom with a non-VP SIMD instruction.
-;
-; ALL-CONVERT-NOT: {{call.* @llvm.vp.add}}
-; ALL-CONVERT-NOT: {{call.* @llvm.vp.sub}}
-; ALL-CONVERT-NOT: {{call.* @llvm.vp.mul}}
-; ALL-CONVERT-NOT: {{call.* @llvm.vp.sdiv}}
-; ALL-CONVERT-NOT: {{call.* @llvm.vp.srem}}
-; ALL-CONVERT-NOT: {{call.* @llvm.vp.udiv}}
-; ALL-CONVERT-NOT: {{call.* @llvm.vp.urem}}
-; ALL-CONVERT-NOT: {{call.* @llvm.vp.and}}
-; ALL-CONVERT-NOT: {{call.* @llvm.vp.or}}
-; ALL-CONVERT-NOT: {{call.* @llvm.vp.xor}}
-; ALL-CONVERT-NOT: {{call.* @llvm.vp.ashr}}
-; ALL-CONVERT-NOT: {{call.* @llvm.vp.lshr}}
-; ALL-CONVERT-NOT: {{call.* @llvm.vp.shl}}
-;
-; ALL-CONVERT: define void @test_vp_int_v8(<8 x i32> %i0, <8 x i32> %i1, <8 x i32> %i2, <8 x i32> %f3, <8 x i1> %m, i32 %n) {
-; ALL-CONVERT-NEXT: %{{.*}} = add <8 x i32> %i0, %i1
-; ALL-CONVERT-NEXT: %{{.*}} = sub <8 x i32> %i0, %i1
-; ALL-CONVERT-NEXT: %{{.*}} = mul <8 x i32> %i0, %i1
-; ALL-CONVERT-NEXT: [[NINS:%.+]] = insertelement <8 x i32> poison, i32 %n, i64 0
-; ALL-CONVERT-NEXT: [[NSPLAT:%.+]] = shufflevector <8 x i32> [[NINS]], <8 x i32> poison, <8 x i32> zeroinitializer
-; ALL-CONVERT-NEXT: [[EVLM:%.+]] = icmp ult <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>, [[NSPLAT]]
-; ALL-CONVERT-NEXT: [[NEWM:%.+]] = and <8 x i1> [[EVLM]], %m
-; ALL-CONVERT-NEXT: [[SELONE:%.+]] = select <8 x i1> [[NEWM]], <8 x i32> %i1, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
-; ALL-CONVERT-NEXT: %{{.+}} = sdiv <8 x i32> %i0, [[SELONE]]
-; ALL-CONVERT-NOT: %{{.+}} = srem <8 x i32> %i0, %i1
-; ALL-CONVERT: %{{.+}} = srem <8 x i32> %i0, %{{.+}}
-; ALL-CONVERT-NOT: %{{.+}} = udiv <8 x i32> %i0, %i1
-; ALL-CONVERT: %{{.+}} = udiv <8 x i32> %i0, %{{.+}}
-; ALL-CONVERT-NOT: %{{.+}} = urem <8 x i32> %i0, %i1
-; ALL-CONVERT: %{{.+}} = urem <8 x i32> %i0, %{{.+}}
-; ALL-CONVERT-NEXT: %{{.+}} = call <8 x i32> @llvm.smax.v8i32(<8 x i32> %i0, <8 x i32> %i1)
-; ALL-CONVERT-NEXT: %{{.+}} = call <8 x i32> @llvm.smin.v8i32(<8 x i32> %i0, <8 x i32> %i1)
-; ALL-CONVERT-NEXT: %{{.+}} = call <8 x i32> @llvm.umax.v8i32(<8 x i32> %i0, <8 x i32> %i1)
-; ALL-CONVERT-NEXT: %{{.+}} = call <8 x i32> @llvm.umin.v8i32(<8 x i32> %i0, <8 x i32> %i1)
-; ALL-CONVERT-NEXT: %{{.+}} = and <8 x i32> %i0, %i1
-; ALL-CONVERT-NEXT: %{{.+}} = or <8 x i32> %i0, %i1
-; ALL-CONVERT-NEXT: %{{.+}} = xor <8 x i32> %i0, %i1
-; ALL-CONVERT-NEXT: %{{.+}} = ashr <8 x i32> %i0, %i1
-; ALL-CONVERT-NEXT: %{{.+}} = lshr <8 x i32> %i0, %i1
-; ALL-CONVERT-NEXT: %{{.+}} = shl <8 x i32> %i0, %i1
-; ALL-CONVERT: ret void
-
-
-; Check that reductions use the correct neutral element for masked-off elements
-; ALL-CONVERT: define void @test_vp_reduce_int_v4(i32 %start, <4 x i32> %vi, <4 x i1> %m, i32 %n) {
-; ALL-CONVERT-NEXT: [[NINS:%.+]] = insertelement <4 x i32> poison, i32 %n, i64 0
-; ALL-CONVERT-NEXT: [[NSPLAT:%.+]] = shufflevector <4 x i32> [[NINS]], <4 x i32> poison, <4 x i32> zeroinitializer
-; ALL-CONVERT-NEXT: [[EVLM:%.+]] = icmp ult <4 x i32> <i32 0, i32 1, i32 2, i32 3>, [[NSPLAT]]
-; ALL-CONVERT-NEXT: [[NEWM:%.+]] = and <4 x i1> [[EVLM]], %m
-; ALL-CONVERT-NEXT: [[ADD:%.+]] = select <4 x i1> [[NEWM]], <4 x i32> %vi, <4 x i32> zeroinitializer
-; ALL-CONVERT-NEXT: [[RED:%.+]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[ADD]])
-; ALL-CONVERT-NEXT: %{{.+}} = add i32 [[RED]], %start
-; ALL-CONVERT: [[MUL:%.+]] = select <4 x i1> %{{.+}}, <4 x i32> %vi, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
-; ALL-CONVERT-NEXT: [[RED:%.+]] = call i32 @llvm.vector.reduce.mul.v4i32(<4 x i32> [[MUL]])
-; ALL-CONVERT-NEXT: %{{.+}} = mul i32 [[RED]], %start
-; ALL-CONVERT: [[AND:%.+]] = select <4 x i1> %{{.+}}, <4 x i32> %vi, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>
-; ALL-CONVERT-NEXT: [[RED:%.+]] = call i32 @llvm.vector.reduce.and.v4i32(<4 x i32> [[AND]])
-; ALL-CONVERT-NEXT: %{{.+}} = and i32 [[RED]], %start
-; ALL-CONVERT: [[OR:%.+]] = select <4 x i1> %{{.+}}, <4 x i32> %vi, <4 x i32> zeroinitializer
-; ALL-CONVERT-NEXT: [[RED:%.+]] = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> [[OR]])
-; ALL-CONVERT-NEXT: %{{.+}} = or i32 [[RED]], %start
-; ALL-CONVERT: [[XOR:%.+]] = select <4 x i1> %{{.+}}, <4 x i32> %vi, <4 x i32> zeroinitializer
-; ALL-CONVERT-NEXT: [[RED:%.+]] = call i32 @llvm.vector.reduce.xor.v4i32(<4 x i32> [[XOR]])
-; ALL-CONVERT-NEXT: %{{.+}} = xor i32 [[RED]], %start
-; ALL-CONVERT: [[SMIN:%.+]] = select <4 x i1> %{{.+}}, <4 x i32> %vi, <4 x i32> <i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647>
-; ALL-CONVERT-NEXT: [[RED:%.+]] = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> [[SMIN]])
-; ALL-CONVERT-NEXT: %{{.+}} = call i32 @llvm.smin.i32(i32 [[RED]], i32 %start)
-; ALL-CONVERT: [[SMAX:%.+]] = select <4 x i1> %{{.+}}, <4 x i32> %vi, <4 x i32> <i32 -2147483648, i32 -2147483648, i32 -2147483648, i32 -2147483648>
-; ALL-CONVERT-NEXT: [[RED:%.+]] = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> [[SMAX]])
-; ALL-CONVERT-NEXT: %{{.+}} = call i32 @llvm.smax.i32(i32 [[RED]], i32 %start)
-; ALL-CONVERT: [[UMIN:%.+]] = select <4 x i1> %{{.+}}, <4 x i32> %vi, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>
-; ALL-CONVERT-NEXT: [[RED:%.+]] = call i32 @llvm.vector.reduce.umin.v4i32(<4 x i32> [[UMIN]])
-; ALL-CONVERT-NEXT: %{{.+}} = call i32 @llvm.umin.i32(i32 [[RED]], i32 %start)
-; ALL-CONVERT: [[UMAX:%.+]] = select <4 x i1> %{{.+}}, <4 x i32> %vi, <4 x i32> zeroinitializer
-; ALL-CONVERT-NEXT: [[RED:%.+]] = call i32 @llvm.vector.reduce.umax.v4i32(<4 x i32> [[UMAX]])
-; ALL-CONVERT-NEXT: %{{.+}} = call i32 @llvm.umax.i32(i32 [[RED]], i32 %start)
-; ALL-CONVERT-NEXT: ret void
-
-; Check that reductions use the correct neutral element for masked-off elements
-; ALL-CONVERT: define void @test_vp_reduce_fp_v4(float %f, <4 x float> %vf, <4 x i1> %m, i32 %n) {
-; ALL-CONVERT-NEXT: [[NINS:%.+]] = insertelement <4 x i32> poison, i32 %n, i64 0
-; ALL-CONVERT-NEXT: [[NSPLAT:%.+]] = shufflevector <4 x i32> [[NINS]], <4 x i32> poison, <4 x i32> zeroinitializer
-; ALL-CONVERT-NEXT: [[EVLM:%.+]] = icmp ult <4 x i32> <i32 0, i32 1, i32 2, i32 3>, [[NSPLAT]]
-; ALL-CONVERT-NEXT: [[NEWM:%.+]] = and <4 x i1> [[EVLM]], %m
-; ALL-CONVERT-NEXT: [[FMIN:%.+]] = select <4 x i1> [[NEWM]], <4 x float> %vf, <4 x float> <float 0x7FF8000000000000, float 0x7FF8000000000000, float 0x7FF8000000000000, float 0x7FF8000000000000>
-; ALL-CONVERT-NEXT: [[RED:%.+]] = call float @llvm.vector.reduce.fmin.v4f32(<4 x float> [[FMIN]])
-; ALL-CONVERT-NEXT: %{{.+}} = call float @llvm.minnum.f32(float [[RED]], float %f)
-; ALL-CONVERT: [[FMIN_NNAN:%.+]] = select <4 x i1> %{{.+}}, <4 x float> %vf, <4 x float> <float 0x7FF0000000000000, float 0x7FF0000000000000, float 0x7FF0000000000000, float 0x7FF0000000000000>
-; ALL-CONVERT-NEXT: [[RED:%.+]] = call nnan float @llvm.vector.reduce.fmin.v4f32(<4 x float> [[FMIN_NNAN]])
-; ALL-CONVERT-NEXT: %{{.+}} = call nnan float @llvm.minnum.f32(float [[RED]], float %f)
-; ALL-CONVERT: [[FMIN_NNAN_NINF:%.+]] = select <4 x i1> %{{.+}}, <4 x float> %vf, <4 x float> <float 0x47EFFFFFE0000000, float 0x47EFFFFFE0000000, float 0x47EFFFFFE0000000, float 0x47EFFFFFE0000000>
-; ALL-CONVERT-NEXT: [[RED:%.+]] = call nnan ninf float @llvm.vector.reduce.fmin.v4f32(<4 x float> [[FMIN_NNAN_NINF]])
-; ALL-CONVERT-NEXT: %{{.+}} = call nnan ninf float @llvm.minnum.f32(float [[RED]], float %f)
-; ALL-CONVERT: [[FMAX:%.+]] = select <4 x i1> %{{.+}}, <4 x float> %vf, <4 x float> <float 0xFFF8000000000000, float 0xFFF8000000000000, float 0xFFF8000000000000, float 0xFFF8000000000000>
-; ALL-CONVERT-NEXT: [[RED:%.+]] = call float @llvm.vector.reduce.fmax.v4f32(<4 x float> [[FMAX]])
-; ALL-CONVERT-NEXT: %{{.+}} = call float @llvm.maxnum.f32(float [[RED]], float %f)
-; ALL-CONVERT: [[FMAX_NNAN:%.+]] = select <4 x i1> %{{.+}}, <4 x float> %vf, <4 x float> <float 0xFFF0000000000000, float 0xFFF0000000000000, float 0xFFF0000000000000, float 0xFFF0000000000000>
-; ALL-CONVERT-NEXT: [[RED:%.+]] = call nnan float @llvm.vector.reduce.fmax.v4f32(<4 x float> [[FMAX_NNAN]])
-; ALL-CONVERT-NEXT: %{{.+}} = call nnan float @llvm.maxnum.f32(float [[RED]], float %f)
-; ALL-CONVERT: [[FMAX_NNAN_NINF:%.+]] = select <4 x i1> %{{.+}}, <4 x float> %vf, <4 x float> <float 0xC7EFFFFFE0000000, float 0xC7EFFFFFE0000000, float 0xC7EFFFFFE0000000, float 0xC7EFFFFFE0000000>
-; ALL-CONVERT-NEXT: [[RED:%.+]] = call nnan ninf float @llvm.vector.reduce.fmax.v4f32(<4 x float> [[FMAX_NNAN_NINF]])
-; ALL-CONVERT-NEXT: %{{.+}} = call nnan ninf float @llvm.maxnum.f32(float [[RED]], float %f)
-
-; ALL-CONVERT: [[FMINIMUM:%.+]] = select <4 x i1> %{{.+}}, <4 x float> %vf, <4 x float> <float 0x7FF0000000000000, float 0x7FF0000000000000, float 0x7FF0000000000000, float 0x7FF0000000000000>
-; ALL-CONVERT-NEXT: [[RED:%.+]] = call float @llvm.vector.reduce.fminimum.v4f32(<4 x float> [[FMINIMUM]])
-; ALL-CONVERT-NEXT: %{{.+}} = call float @llvm.minimum.f32(float [[RED]], float %f)
-; ALL-CONVERT: [[FMINIMUM_NNAN:%.+]] = select <4 x i1> %{{.+}}, <4 x float> %vf, <4 x float> <float 0x7FF0000000000000, float 0x7FF0000000000000, float 0x7FF0000000000000, float 0x7FF0000000000000>
-; ALL-CONVERT-NEXT: [[RED:%.+]] = call nnan float @llvm.vector.reduce.fminimum.v4f32(<4 x float> [[FMINIMUM_NNAN]])
-; ALL-CONVERT-NEXT: %{{.+}} = call nnan float @llvm.minimum.f32(float [[RED]], float %f)
-; ALL-CONVERT: [[FMINIMUM_NNAN_NINF:%.+]] = select <4 x i1> %{{.+}}, <4 x float> %vf, <4 x float> <float 0x47EFFFFFE0000000, float 0x47EFFFFFE0000000, float 0x47EFFFFFE0000000, float 0x47EFFFFFE0000000>
-; ALL-CONVERT-NEXT: [[RED:%.+]] = call nnan ninf float @llvm.vector.reduce.fminimum.v4f32(<4 x float> [[FMINIMUM_NNAN_NINF]])
-; ALL-CONVERT-NEXT: %{{.+}} = call nnan ninf float @llvm.minimum.f32(float [[RED]], float %f)
-
-; ALL-CONVERT: [[FMAXIMUM:%.+]] = select <4 x i1> %{{.+}}, <4 x float> %vf, <4 x float> <float 0xFFF0000000000000, float 0xFFF0000000000000, float 0xFFF0000000000000, float 0xFFF0000000000000>
-; ALL-CONVERT-NEXT: [[RED:%.+]] = call float @llvm.vector.reduce.fmaximum.v4f32(<4 x float> [[FMAXIMUM]])
-; ALL-CONVERT-NEXT: %{{.+}} = call float @llvm.maximum.f32(float [[RED]], float %f)
-; ALL-CONVERT: [[FMAXIMUM_NNAN:%.+]] = select <4 x i1> %{{.+}}, <4 x float> %vf, <4 x float> <float 0xFFF0000000000000, float 0xFFF0000000000000, float 0xFFF0000000000000, float 0xFFF0000000000000>
-; ALL-CONVERT-NEXT: [[RED:%.+]] = call nnan float @llvm.vector.reduce.fmaximum.v4f32(<4 x float> [[FMAXIMUM_NNAN]])
-; ALL-CONVERT-NEXT: %{{.+}} = call nnan float @llvm.maximum.f32(float [[RED]], float %f)
-; ALL-CONVERT: [[FMAXIMUM_NNAN_NINF:%.+]] = select <4 x i1> %{{.+}}, <4 x float> %vf, <4 x float> <float 0xC7EFFFFFE0000000, float 0xC7EFFFFFE0000000, float 0xC7EFFFFFE0000000, float 0xC7EFFFFFE0000000>
-; ALL-CONVERT-NEXT: [[RED:%.+]] = call nnan ninf float @llvm.vector.reduce.fmaximum.v4f32(<4 x float> [[FMAXIMUM_NNAN_NINF]])
-; ALL-CONVERT-NEXT: %{{.+}} = call nnan ninf float @llvm.maximum.f32(float [[RED]], float %f)
-
-; ALL-CONVERT: [[FADD:%.+]] = select <4 x i1> %{{.+}}, <4 x float> %vf, <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>
-; ALL-CONVERT-NEXT: %{{.+}} = call float @llvm.vector.reduce.fadd.v4f32(float %f, <4 x float> [[FADD]])
-; ALL-CONVERT: [[FADD:%.+]] = select <4 x i1> %{{.+}}, <4 x float> %vf, <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>
-; ALL-CONVERT-NEXT: %{{.+}} = call reassoc float @llvm.vector.reduce.fadd.v4f32(float %f, <4 x float> [[FADD]])
-; ALL-CONVERT: [[FMUL:%.+]] = select <4 x i1> %{{.+}}, <4 x float> %vf, <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>
-; ALL-CONVERT-NEXT: %{{.+}} = call float @llvm.vector.reduce.fmul.v4f32(float %f, <4 x float> [[FMUL]])
-; ALL-CONVERT: [[FMUL:%.+]] = select <4 x i1> %{{.+}}, <4 x float> %vf, <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>
-; ALL-CONVERT-NEXT: %{{.+}} = call reassoc float @llvm.vector.reduce.fmul.v4f32(float %f, <4 x float> [[FMUL]])
-; ALL-CONVERT-NEXT: ret void
-
-; Check that comparisons use the correct condition codes
-; ALL-CONVERT: define void @test_vp_cmp_v8(<8 x i32> %i0, <8 x i32> %i1, <8 x float> %f0, <8 x float> %f1, <8 x i1> %m, i32 %n) {
-; ALL-CONVERT-NEXT: %{{.+}} = icmp eq <8 x i32> %i0, %i1
-; ALL-CONVERT-NEXT: %{{.+}} = icmp slt <8 x i32> %i0, %i1
-; ALL-CONVERT-NEXT: %{{.+}} = fcmp oeq <8 x float> %f0, %f1
-; ALL-CONVERT-NEXT: %{{.+}} = fcmp ult <8 x float> %f0, %f1
-; ALL-CONVERT-NEXT: ret void
-
-
-; All legal - don't transform anything.
-
-; LEGAL_LEGAL: define void @test_vp_int_v8(<8 x i32> %i0, <8 x i32> %i1, <8 x i32> %i2, <8 x i32> %f3, <8 x i1> %m, i32 %n) {
-; LEGAL_LEGAL-NEXT: %r0 = call <8 x i32> @llvm.vp.add.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 %n)
-; LEGAL_LEGAL-NEXT: %r1 = call <8 x i32> @llvm.vp.sub.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 %n)
-; LEGAL_LEGAL-NEXT: %r2 = call <8 x i32> @llvm.vp.mul.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 %n)
-; LEGAL_LEGAL-NEXT: %r3 = call <8 x i32> @llvm.vp.sdiv.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 %n)
-; LEGAL_LEGAL-NEXT: %r4 = call <8 x i32> @llvm.vp.srem.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 %n)
-; LEGAL_LEGAL-NEXT: %r5 = call <8 x i32> @llvm.vp.udiv.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 %n)
-; LEGAL_LEGAL-NEXT: %r6 = call <8 x i32> @llvm.vp.urem.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 %n)
-; LEGAL_LEGAL-NEXT: %r7 = call <8 x i32> @llvm.vp.smax.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 %n)
-; LEGAL_LEGAL-NEXT: %r8 = call <8 x i32> @llvm.vp.smin.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 %n)
-; LEGAL_LEGAL-NEXT: %r9 = call <8 x i32> @llvm.vp.umax.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 %n)
-; LEGAL_LEGAL-NEXT: %rA = call <8 x i32> @llvm.vp.umin.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 %n)
-; LEGAL_LEGAL-NEXT: %rB = call <8 x i32> @llvm.vp.and.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 %n)
-; LEGAL_LEGAL-NEXT: %rC = call <8 x i32> @llvm.vp.or.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 %n)
-; LEGAL_LEGAL-NEXT: %rD = call <8 x i32> @llvm.vp.xor.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 %n)
-; LEGAL_LEGAL-NEXT: %rE = call <8 x i32> @llvm.vp.ashr.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 %n)
-; LEGAL_LEGAL-NEXT: %rF = call <8 x i32> @llvm.vp.lshr.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 %n)
-; LEGAL_LEGAL-NEXT: %r10 = call <8 x i32> @llvm.vp.shl.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 %n)
-; LEGAL_LEGAL-NEXT: ret void
-
-; LEGAL_LEGAL:define void @test_vp_int_vscale(<vscale x 4 x i32> %i0, <vscale x 4 x i32> %i1, <vscale x 4 x i32> %i2, <vscale x 4 x i32> %f3, <vscale x 4 x i1> %m, i32 %n) {
-; LEGAL_LEGAL-NEXT: %r0 = call <vscale x 4 x i32> @llvm.vp.add.nxv4i32(<vscale x 4 x i32> %i0, <vscale x 4 x i32> %i1, <vscale x 4 x i1> %m, i32 %n)
-; LEGAL_LEGAL-NEXT: %r1 = call <vscale x 4 x i32> @llvm.vp.sub.nxv4i32(<vscale x 4 x i32> %i0, <vscale x 4 x i32> %i1, <vscale x 4 x i1> %m, i32 %n)
-; LEGAL_LEGAL-NEXT: %r2 = call <vscale x 4 x i32> @llvm.vp.mul.nxv4i32(<vscale x 4 x i32> %i0, <vscale x 4 x i32> %i1, <vscale x 4 x i1> %m, i32 %n)
-; LEGAL_LEGAL-NEXT: %r3 = call <vscale x 4 x i32> @llvm.vp.sdiv.nxv4i32(<vscale x 4 x i32> %i0, <vscale x 4 x i32> %i1, <vscale x 4 x i1> %m, i32 %n)
-; LEGAL_LEGAL-NEXT: %r4 = call <vscale x 4 x i32> @llvm.vp.srem.nxv4i32(<vscale x 4 x i32> %i0, <vscale x 4 x i32> %i1, <vscale x 4 x i1> %m, i32 %n)
-; LEGAL_LEGAL-NEXT: %r5 = call <vscale x 4 x i32> @llvm.vp.udiv.nxv4i32(<vscale x 4 x i32> %i0, <vscale x 4 x i32> %i1, <vscale x 4 x i1> %m, i32 %n)
-; LEGAL_LEGAL-NEXT: %r6 = call <vscale x 4 x i32> @llvm.vp.urem.nxv4i32(<vscale x 4 x i32> %i0, <vscale x 4 x i32> %i1, <vscale x 4 x i1> %m, i32 %n)
-; LEGAL_LEGAL-NEXT: %r7 = call <vscale x 4 x i32> @llvm.vp.smax.nxv4i32(<vscale x 4 x i32> %i0, <vscale x 4 x i32> %i1, <vscale x 4 x i1> %m, i32 %n)
-; LEGAL_LEGAL-NEXT: %r8 = call <vscale x 4 x i32> @llvm.vp.smin.nxv4i32(<vscale x 4 x i32> %i0, <vscale x 4 x i32> %i1, <vscale x 4 x i1> %m, i32 %n)
-; LEGAL_LEGAL-NEXT: %r9 = call <vscale x 4 x i32> @llvm.vp.umax.nxv4i32(<vscale x 4 x i32> %i0, <vscale x 4 x i32> %i1, <vscale x 4 x i1> %m, i32 %n)
-; LEGAL_LEGAL-NEXT: %rA = call <vscale x 4 x i32> @llvm.vp.umin.nxv4i32(<vscale x 4 x i32> %i0, <vscale x 4 x i32> %i1, <vscale x 4 x i1> %m, i32 %n)
-; LEGAL_LEGAL-NEXT: %rB = call <vscale x 4 x i32> @llvm.vp.and.nxv4i32(<vscale x 4 x i32> %i0, <vscale x 4 x i32> %i1, <vscale x 4 x i1> %m, i32 %n)
-; LEGAL_LEGAL-NEXT: %rC = call <vscale x 4 x i32> @llvm.vp.or.nxv4i32(<vscale x 4 x i32> %i0, <vscale x 4 x i32> %i1, <vscale x 4 x i1> %m, i32 %n)
-; LEGAL_LEGAL-NEXT: %rD = call <vscale x 4 x i32> @llvm.vp.xor.nxv4i32(<vscale x 4 x i32> %i0, <vscale x 4 x i32> %i1, <vscale x 4 x i1> %m, i32 %n)
-; LEGAL_LEGAL-NEXT: %rE = call <vscale x 4 x i32> @llvm.vp.ashr.nxv4i32(<vscale x 4 x i32> %i0, <vscale x 4 x i32> %i1, <vscale x 4 x i1> %m, i32 %n)
-; LEGAL_LEGAL-NEXT: %rF = call <vscale x 4 x i32> @llvm.vp.lshr.nxv4i32(<vscale x 4 x i32> %i0, <vscale x 4 x i32> %i1, <vscale x 4 x i1> %m, i32 %n)
-; LEGAL_LEGAL-NEXT: %r10 = call <vscale x 4 x i32> @llvm.vp.shl.nxv4i32(<vscale x 4 x i32> %i0, <vscale x 4 x i32> %i1, <vscale x 4 x i1> %m, i32 %n)
-; LEGAL_LEGAL-NEXT: ret void
-
-; LEGAL_LEGAL: define void @test_vp_reduce_int_v4(i32 %start, <4 x i32> %vi, <4 x i1> %m, i32 %n) {
-; LEGAL_LEGAL-NEXT: %r0 = call i32 @llvm.vp.reduce.add.v4i32(i32 %start, <4 x i32> %vi, <4 x i1> %m, i32 %n)
-; LEGAL_LEGAL-NEXT: %r1 = call i32 @llvm.vp.reduce.mul.v4i32(i32 %start, <4 x i32> %vi, <4 x i1> %m, i32 %n)
-; LEGAL_LEGAL-NEXT: %r2 = call i32 @llvm.vp.reduce.and.v4i32(i32 %start, <4 x i32> %vi, <4 x i1> %m, i32 %n)
-; LEGAL_LEGAL-NEXT: %r3 = call i32 @llvm.vp.reduce.or.v4i32(i32 %start, <4 x i32> %vi, <4 x i1> %m, i32 %n)
-; LEGAL_LEGAL-NEXT: %r4 = call i32 @llvm.vp.reduce.xor.v4i32(i32 %start, <4 x i32> %vi, <4 x i1> %m, i32 %n)
-; LEGAL_LEGAL-NEXT: %r5 = call i32 @llvm.vp.reduce.smin.v4i32(i32 %start, <4 x i32> %vi, <4 x i1> %m, i32 %n)
-; LEGAL_LEGAL-NEXT: %r6 = call i32 @llvm.vp.reduce.smax.v4i32(i32 %start, <4 x i32> %vi, <4 x i1> %m, i32 %n)
-; LEGAL_LEGAL-NEXT: %r7 = call i32 @llvm.vp.reduce.umin.v4i32(i32 %start, <4 x i32> %vi, <4 x i1> %m, i32 %n)
-; LEGAL_LEGAL-NEXT: %r8 = call i32 @llvm.vp.reduce.umax.v4i32(i32 %start, <4 x i32> %vi, <4 x i1> %m, i32 %n)
-; LEGAL_LEGAL-NEXT: ret void
-
-; LEGAL_LEGAL: define void @test_vp_reduce_fp_v4(float %f, <4 x float> %vf, <4 x i1> %m, i32 %n) {
-; LEGAL_LEGAL-NEXT: %r0 = call float @llvm.vp.reduce.fmin.v4f32(float %f, <4 x float> %vf, <4 x i1> %m, i32 %n)
-; LEGAL_LEGAL-NEXT: %r1 = call nnan float @llvm.vp.reduce.fmin.v4f32(float %f, <4 x float> %vf, <4 x i1> %m, i32 %n)
-; LEGAL_LEGAL-NEXT: %r2 = call nnan ninf float @llvm.vp.reduce.fmin.v4f32(float %f, <4 x float> %vf, <4 x i1> %m, i32 %n)
-; LEGAL_LEGAL-NEXT: %r3 = call float @llvm.vp.reduce.fmax.v4f32(float %f, <4 x float> %vf, <4 x i1> %m, i32 %n)
-; LEGAL_LEGAL-NEXT: %r4 = call nnan float @llvm.vp.reduce.fmax.v4f32(float %f, <4 x float> %vf, <4 x i1> %m, i32 %n)
-; LEGAL_LEGAL-NEXT: %r5 = call nnan ninf float @llvm.vp.reduce.fmax.v4f32(float %f, <4 x float> %vf, <4 x i1> %m, i32 %n)
-; LEGAL_LEGAL-NEXT: %r6 = call float @llvm.vp.reduce.fminimum.v4f32(float %f, <4 x float> %vf, <4 x i1> %m, i32 %n)
-; LEGAL_LEGAL-NEXT: %r7 = call nnan float @llvm.vp.reduce.fminimum.v4f32(float %f, <4 x float> %vf, <4 x i1> %m, i32 %n)
-; LEGAL_LEGAL-NEXT: %r8 = call nnan ninf float @llvm.vp.reduce.fminimum.v4f32(float %f, <4 x float> %vf, <4 x i1> %m, i32 %n)
-; LEGAL_LEGAL-NEXT: %r9 = call float @llvm.vp.reduce.fmaximum.v4f32(float %f, <4 x float> %vf, <4 x i1> %m, i32 %n)
-; LEGAL_LEGAL-NEXT: %r10 = call nnan float @llvm.vp.reduce.fmaximum.v4f32(float %f, <4 x float> %vf, <4 x i1> %m, i32 %n)
-; LEGAL_LEGAL-NEXT: %r11 = call nnan ninf float @llvm.vp.reduce.fmaximum.v4f32(float %f, <4 x float> %vf, <4 x i1> %m, i32 %n)
-; LEGAL_LEGAL-NEXT: %r12 = call float @llvm.vp.reduce.fadd.v4f32(float %f, <4 x float> %vf, <4 x i1> %m, i32 %n)
-; LEGAL_LEGAL-NEXT: %r13 = call reassoc float @llvm.vp.reduce.fadd.v4f32(float %f, <4 x float> %vf, <4 x i1> %m, i32 %n)
-; LEGAL_LEGAL-NEXT: %r14 = call float @llvm.vp.reduce.fmul.v4f32(float %f, <4 x float> %vf, <4 x i1> %m, i32 %n)
-; LEGAL_LEGAL-NEXT: %r15 = call reassoc float @llvm.vp.reduce.fmul.v4f32(float %f, <4 x float> %vf, <4 x i1> %m, i32 %n)
-; LEGAL_LEGAL-NEXT: ret void
-
-; LEGAL_LEGAL: define void @test_vp_cmp_v8(<8 x i32> %i0, <8 x i32> %i1, <8 x float> %f0, <8 x float> %f1, <8 x i1> %m, i32 %n) {
-; LEGAL_LEGAL-NEXT: %r0 = call <8 x i1> @llvm.vp.icmp.v8i32(<8 x i32> %i0, <8 x i32> %i1, metadata !"eq", <8 x i1> %m, i32 %n)
-; LEGAL_LEGAL-NEXT: %r1 = call <8 x i1> @llvm.vp.icmp.v8i32(<8 x i32> %i0, <8 x i32> %i1, metadata !"slt", <8 x i1> %m, i32 %n)
-; LEGAL_LEGAL-NEXT: %r2 = call <8 x i1> @llvm.vp.fcmp.v8f32(<8 x float> %f0, <8 x float> %f1, metadata !"oeq", <8 x i1> %m, i32 %n)
-; LEGAL_LEGAL-NEXT: %r3 = call <8 x i1> @llvm.vp.fcmp.v8f32(<8 x float> %f0, <8 x float> %f1, metadata !"ult", <8 x i1> %m, i32 %n)
-; LEGAL_LEGAL-NEXT: ret void
-
-; Drop %evl where possible else fold %evl into %mask (%evl Discard, %mask Legal)
-;
-; There is no caching yet in the ExpandVectorPredication pass and the %evl
-; expansion code is emitted for every non-speculatable intrinsic again. Hence,
-; only check that..
-; (1) The %evl folding code and %mask are correct for the first
-; non-speculatable VP intrinsic.
-; (2) All other non-speculatable VP intrinsics have a modified mask argument.
-; (3) All speculatable VP intrinsics keep their %mask and %evl.
-; (4) All VP intrinsics have an ineffective %evl parameter.
-
-; DISCARD_LEGAL: define void @test_vp_int_v8(<8 x i32> %i0, <8 x i32> %i1, <8 x i32> %i2, <8 x i32> %f3, <8 x i1> %m, i32 %n) {
-; DISCARD_LEGAL-NEXT: %r0 = call <8 x i32> @llvm.vp.add.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 8)
-; DISCARD_LEGAL-NEXT: %r1 = call <8 x i32> @llvm.vp.sub.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 8)
-; DISCARD_LEGAL-NEXT: %r2 = call <8 x i32> @llvm.vp.mul.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 8)
-; DISCARD_LEGAL-NEXT: [[NSPLATINS:%.+]] = insertelement <8 x i32> poison, i32 %n, i64 0
-; DISCARD_LEGAL-NEXT: [[NSPLAT:%.+]] = shufflevector <8 x i32> [[NSPLATINS]], <8 x i32> poison, <8 x i32> zeroinitializer
-; DISCARD_LEGAL-NEXT: [[EVLMASK:%.+]] = icmp ult <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>, [[NSPLAT]]
-; DISCARD_LEGAL-NEXT: [[NEWMASK:%.+]] = and <8 x i1> [[EVLMASK]], %m
-; DISCARD_LEGAL-NEXT: %r3 = call <8 x i32> @llvm.vp.sdiv.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> [[NEWMASK]], i32 8)
-; DISCARD_LEGAL-NOT: %r4 = call <8 x i32> @llvm.vp.srem.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 8)
-; DISCARD_LEGAL-NOT: %r5 = call <8 x i32> @llvm.vp.udiv.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 8)
-; DISCARD_LEGAL-NOT: %r6 = call <8 x i32> @llvm.vp.urem.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 8)
-; DISCARD_LEGAL: %r7 = call <8 x i32> @llvm.vp.smax.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 8)
-; DISCARD_LEGAL: %r8 = call <8 x i32> @llvm.vp.smin.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 8)
-; DISCARD_LEGAL: %r9 = call <8 x i32> @llvm.vp.umax.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 8)
-; DISCARD_LEGAL: %rA = call <8 x i32> @llvm.vp.umin.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 8)
-; DISCARD_LEGAL-NEXT: %rB = call <8 x i32> @llvm.vp.and.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 8)
-; DISCARD_LEGAL-NEXT: %rC = call <8 x i32> @llvm.vp.or.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 8)
-; DISCARD_LEGAL-NEXT: %rD = call <8 x i32> @llvm.vp.xor.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 8)
-; DISCARD_LEGAL-NEXT: %rE = call <8 x i32> @llvm.vp.ashr.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 8)
-; DISCARD_LEGAL-NEXT: %rF = call <8 x i32> @llvm.vp.lshr.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 8)
-; DISCARD_LEGAL-NEXT: %r10 = call <8 x i32> @llvm.vp.shl.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 8)
-; DISCARD_LEGAL-NEXT: ret void
-
-; TODO compute vscale only once and use caching.
-; In the meantime, we only check for the correct vscale code for the first VP
-; intrinsic and skip over it for all others.
-
-; DISCARD_LEGAL: define void @test_vp_int_vscale(<vscale x 4 x i32> %i0, <vscale x 4 x i32> %i1, <vscale x 4 x i32> %i2, <vscale x 4 x i32> %f3, <vscale x 4 x i1> %m, i32 %n) {
-; DISCARD_LEGAL-NEXT: %vscale = call i32 @llvm.vscale.i32()
-; DISCARD_LEGAL-NEXT: %scalable_size = mul nuw i32 %vscale, 4
-; DISCARD_LEGAL-NEXT: %r0 = call <vscale x 4 x i32> @llvm.vp.add.nxv4i32(<vscale x 4 x i32> %i0, <vscale x 4 x i32> %i1, <vscale x 4 x i1> %m, i32 %scalable_size)
-; DISCARD_LEGAL: %r1 = call <vscale x 4 x i32> @llvm.vp.sub.nxv4i32(<vscale x 4 x i32> %i0, <vscale x 4 x i32> %i1, <vscale x 4 x i1> %m, i32 %scalable_size{{.*}})
-; DISCARD_LEGAL: %r2 = call <vscale x 4 x i32> @llvm.vp.mul.nxv4i32(<vscale x 4 x i32> %i0, <vscale x 4 x i32> %i1, <vscale x 4 x i1> %m, i32 %scalable_size{{.*}})
-; DISCARD_LEGAL: [[EVLM:%.+]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i32(i32 0, i32 %n)
-; DISCARD_LEGAL: [[NEWM:%.+]] = and <vscale x 4 x i1> [[EVLM]], %m
-; DISCARD_LEGAL: %r3 = call <vscale x 4 x i32> @llvm.vp.sdiv.nxv4i32(<vscale x 4 x i32> %i0, <vscale x 4 x i32> %i1, <vscale x 4 x i1> [[NEWM]], i32 %scalable_size{{.*}})
-; DISCARD_LEGAL-NOT: %{{.+}} = call <vscale x 4 x i32> @llvm.vp.{{.*}}, i32 %n)
-; DISCARD_LEGAL: ret void
-
-; DISCARD_LEGAL: define void @test_vp_reduce_int_v4(i32 %start, <4 x i32> %vi, <4 x i1> %m, i32 %n) {
-; DISCARD_LEGAL-NEXT: [[NSPLATINS:%.+]] = insertelement <4 x i32> poison, i32 %n, i64 0
-; DISCARD_LEGAL-NEXT: [[NSPLAT:%.+]] = shufflevector <4 x i32> [[NSPLATINS]], <4 x i32> poison, <4 x i32> zeroinitializer
-; DISCARD_LEGAL-NEXT: [[EVLMASK:%.+]] = icmp ult <4 x i32> <i32 0, i32 1, i32 2, i32 3>, [[NSPLAT]]
-; DISCARD_LEGAL-NEXT: [[NEWMASK:%.+]] = and <4 x i1> [[EVLMASK]], %m
-; DISCARD_LEGAL-NEXT: %r0 = call i32 @llvm.vp.reduce.add.v4i32(i32 %start, <4 x i32> %vi, <4 x i1> [[NEWMASK]], i32 4)
-; DISCARD_LEGAL-NOT: %r1 = call i32 @llvm.vp.reduce.mul.v4i32(i32 %start, <4 x i32> %vi, <4 x i1> %m, i32 4)
-; DISCARD_LEGAL-NOT: %r2 = call i32 @llvm.vp.reduce.and.v4i32(i32 %start, <4 x i32> %vi, <4 x i1> %m, i32 4)
-; DISCARD_LEGAL-NOT: %r3 = call i32 @llvm.vp.reduce.or.v4i32(i32 %start, <4 x i32> %vi, <4 x i1> %m, i32 4)
-; DISCARD_LEGAL-NOT: %r4 = call i32 @llvm.vp.reduce.xor.v4i32(i32 %start, <4 x i32> %vi, <4 x i1> %m, i32 4)
-; DISCARD_LEGAL-NOT: %r5 = call i32 @llvm.vp.reduce.smin.v4i32(i32 %start, <4 x i32> %vi, <4 x i1> %m, i32 4)
-; DISCARD_LEGAL-NOT: %r6 = call i32 @llvm.vp.reduce.smax.v4i32(i32 %start, <4 x i32> %vi, <4 x i1> %m, i32 4)
-; DISCARD_LEGAL-NOT: %r7 = call i32 @llvm.vp.reduce.umin.v4i32(i32 %start, <4 x i32> %vi, <4 x i1> %m, i32 4)
-; DISCARD_LEGAL-NOT: %r8 = call i32 @llvm.vp.reduce.umax.v4i32(i32 %start, <4 x i32> %vi, <4 x i1> %m, i32 4)
-; DISCARD_LEGAL: ret void
-
-; DISCARD_LEGAL: define void @test_vp_reduce_fp_v4(float %f, <4 x float> %vf, <4 x i1> %m, i32 %n) {
-; DISCARD_LEGAL-NEXT: [[NSPLATINS:%.+]] = insertelement <4 x i32> poison, i32 %n, i64 0
-; DISCARD_LEGAL-NEXT: [[NSPLAT:%.+]] = shufflevector <4 x i32> [[NSPLATINS]], <4 x i32> poison, <4 x i32> zeroinitializer
-; DISCARD_LEGAL-NEXT: [[EVLMASK:%.+]] = icmp ult <4 x i32> <i32 0, i32 1, i32 2, i32 3>, [[NSPLAT]]
-; DISCARD_LEGAL-NEXT: [[NEWMASK:%.+]] = and <4 x i1> [[EVLMASK]], %m
-; DISCARD_LEGAL-NEXT: %r0 = call float @llvm.vp.reduce.fmin.v4f32(float %f, <4 x float> %vf, <4 x i1> [[NEWMASK]], i32 4)
-; DISCARD_LEGAL-NOT: %r1 = call nnan float @llvm.vp.reduce.fmin.v4f32(float %f, <4 x float> %vf, <4 x i1> %m, i32 4)
-; DISCARD_LEGAL-NOT: %r2 = call nnan ninf float @llvm.vp.reduce.fmin.v4f32(float %f, <4 x float> %vf, <4 x i1> %m, i32 4)
-; DISCARD_LEGAL-NOT: %r3 = call float @llvm.vp.reduce.fmax.v4f32(float %f, <4 x float> %vf, <4 x i1> %m, i32 4)
-; DISCARD_LEGAL-NOT: %r4 = call nnan float @llvm.vp.reduce.fmax.v4f32(float %f, <4 x float> %vf, <4 x i1> %m, i32 4)
-; DISCARD_LEGAL-NOT: %r5 = call nnan ninf float @llvm.vp.reduce.fmax.v4f32(float %f, <4 x float> %vf, <4 x i1> %m, i32 4)
-; DISCARD_LEGAL-NOT: %r6 = call float @llvm.vp.reduce.fminimum.v4f32(float %f, <4 x float> %vf, <4 x i1> %m, i32 %n)
-; DISCARD_LEGAL-NOT: %r7 = call nnan float @llvm.vp.reduce.fminimum.v4f32(float %f, <4 x float> %vf, <4 x i1> %m, i32 %n)
-; DISCARD_LEGAL-NOT: %r8 = call nnan ninf float @llvm.vp.reduce.fminimum.v4f32(float %f, <4 x float> %vf, <4 x i1> %m, i32 %n)
-; DISCARD_LEGAL-NOT: %r9 = call float @llvm.vp.reduce.fmaximum.v4f32(float %f, <4 x float> %vf, <4 x i1> %m, i32 %n)
-; DISCARD_LEGAL-NOT: %r10 = call nnan float @llvm.vp.reduce.fmaximum.v4f32(float %f, <4 x float> %vf, <4 x i1> %m, i32 %n)
-; DISCARD_LEGAL-NOT: %r11 = call nnan ninf float @llvm.vp.reduce.fmaximum.v4f32(float %f, <4 x float> %vf, <4 x i1> %m, i32 %n)
-; DISCARD_LEGAL-NOT: %r12 = call float @llvm.vp.reduce.fadd.v4f32(float %f, <4 x float> %vf, <4 x i1> %m, i32 %n)
-; DISCARD_LEGAL-NOT: %r13 = call reassoc float @llvm.vp.reduce.fadd.v4f32(float %f, <4 x float> %vf, <4 x i1> %m, i32 %n)
-; DISCARD_LEGAL-NOT: %r14 = call float @llvm.vp.reduce.fmul.v4f32(float %f, <4 x float> %vf, <4 x i1> %m, i32 %n)
-; DISCARD_LEGAL-NOT: %r15 = call reassoc float @llvm.vp.reduce.fmul.v4f32(float %f, <4 x float> %vf, <4 x i1> %m, i32 %n)
-; DISCARD_LEGAL: ret void
-
-; DISCARD_LEGAL: define void @test_vp_cmp_v8(<8 x i32> %i0, <8 x i32> %i1, <8 x float> %f0, <8 x float> %f1, <8 x i1> %m, i32 %n) {
-; DISCARD_LEGAL-NEXT: %r0 = call <8 x i1> @llvm.vp.icmp.v8i32(<8 x i32> %i0, <8 x i32> %i1, metadata !"eq", <8 x i1> %m, i32 8)
-; DISCARD_LEGAL-NEXT: %r1 = call <8 x i1> @llvm.vp.icmp.v8i32(<8 x i32> %i0, <8 x i32> %i1, metadata !"slt", <8 x i1> %m, i32 8)
-; DISCARD_LEGAL-NEXT: %r2 = call <8 x i1> @llvm.vp.fcmp.v8f32(<8 x float> %f0, <8 x float> %f1, metadata !"oeq", <8 x i1> %m, i32 8)
-; DISCARD_LEGAL-NEXT: %r3 = call <8 x i1> @llvm.vp.fcmp.v8f32(<8 x float> %f0, <8 x float> %f1, metadata !"ult", <8 x i1> %m, i32 8)
-
-; Convert %evl into %mask everywhere (%evl Convert, %mask Legal)
-;
-; For the same reasons as in the (%evl Discard, %mask Legal) case only check that..
-; (1) The %evl folding code and %mask are correct for the first VP intrinsic.
-; (2) All other VP intrinsics have a modified mask argument.
-; (3) All VP intrinsics have an ineffective %evl parameter.
-;
-; CONVERT_LEGAL: define void @test_vp_int_v8(<8 x i32> %i0, <8 x i32> %i1, <8 x i32> %i2, <8 x i32> %f3, <8 x i1> %m, i32 %n) {
-; CONVERT_LEGAL-NEXT: [[NINS:%.+]] = insertelement <8 x i32> poison, i32 %n, i64 0
-; CONVERT_LEGAL-NEXT: [[NSPLAT:%.+]] = shufflevector <8 x i32> [[NINS]], <8 x i32> poison, <8 x i32> zeroinitializer
-; CONVERT_LEGAL-NEXT: [[EVLM:%.+]] = icmp ult <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>, [[NSPLAT]]
-; CONVERT_LEGAL-NEXT: [[NEWM:%.+]] = and <8 x i1> [[EVLM]], %m
-; CONVERT_LEGAL-NEXT: %{{.+}} = call <8 x i32> @llvm.vp.add.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> [[NEWM]], i32 8)
-; CONVERT_LEGAL-NOT: %{{.+}} = call <8 x i32> @llvm.vp.sub.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 8)
-; CONVERT_LEGAL-NOT: %{{.+}} = call <8 x i32> @llvm.vp.mul.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 8)
-; CONVERT_LEGAL-NOT: %{{.+}} = call <8 x i32> @llvm.vp.sdiv.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 8)
-; CONVERT_LEGAL-NOT: %{{.+}} = call <8 x i32> @llvm.vp.srem.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 8)
-; CONVERT_LEGAL-NOT: %{{.+}} = call <8 x i32> @llvm.vp.udiv.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 8)
-; CONVERT_LEGAL-NOT: %{{.+}} = call <8 x i32> @llvm.vp.urem.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 8)
-; CONVERT_LEGAL-NOT: %{{.+}} = call <8 x i32> @llvm.vp.and.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 8)
-; CONVERT_LEGAL-NOT: %{{.+}} = call <8 x i32> @llvm.vp.or.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 8)
-; CONVERT_LEGAL-NOT: %{{.+}} = call <8 x i32> @llvm.vp.xor.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 8)
-; CONVERT_LEGAL-NOT: %{{.+}} = call <8 x i32> @llvm.vp.ashr.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 8)
-; CONVERT_LEGAL-NOT: %{{.+}} = call <8 x i32> @llvm.vp.lshr.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 8)
-; CONVERT_LEGAL-NOT: %{{.+}} = call <8 x i32> @llvm.vp.shl.v8i32(<8 x i32> %i0, <8 x i32> %i1, <8 x i1> %m, i32 8)
-; CONVERT_LEGAL: ret void
-
-; Similar to %evl discard, %mask legal but make sure the first VP intrinsic has a legal expansion
-; CONVERT_LEGAL: define void @test_vp_int_vscale(<vscale x 4 x i32> %i0, <vscale x 4 x i32> %i1, <vscale x 4 x i32> %i2, <vscale x 4 x i32> %f3, <vscale x 4 x i1> %m, i32 %n) {
-; CONVERT_LEGAL-NEXT: [[EVLM:%.+]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i32(i32 0, i32 %n)
-; CONVERT_LEGAL-NEXT: [[NEWM:%.+]] = and <vscale x 4 x i1> [[EVLM]], %m
-; CONVERT_LEGAL-NEXT: %vscale = call i32 @llvm.vscale.i32()
-; CONVERT_LEGAL-NEXT: %scalable_size = mul nuw i32 %vscale, 4
-; CONVERT_LEGAL-NEXT: %r0 = call <vscale x 4 x i32> @llvm.vp.add.nxv4i32(<vscale x 4 x i32> %i0, <vscale x 4 x i32> %i1, <vscale x 4 x i1> [[NEWM]], i32 %scalable_size)
-; CONVERT_LEGAL-NOT: %{{.*}} = call <vscale x 4 x i32> @llvm.vp.{{.*}}, i32 %n)
-; CONVERT_LEGAL: ret void
-
-; CONVERT_LEGAL: define void @test_vp_reduce_int_v4(i32 %start, <4 x i32> %vi, <4 x i1> %m, i32 %n) {
-; CONVERT_LEGAL-NEXT: [[NINS:%.+]] = insertelement <4 x i32> poison, i32 %n, i64 0
-; CONVERT_LEGAL-NEXT: [[NSPLAT:%.+]] = shufflevector <4 x i32> [[NINS]], <4 x i32> poison, <4 x i32> zeroinitializer
-; CONVERT_LEGAL-NEXT: [[EVLM:%.+]] = icmp ult <4 x i32> <i32 0, i32 1, i32 2, i32 3>, [[NSPLAT]]
-; CONVERT_LEGAL-NEXT: [[NEWM:%.+]] = and <4 x i1> [[EVLM]], %m
-; CONVERT_LEGAL-NEXT: %{{.+}} = call i32 @llvm.vp.reduce.add.v4i32(i32 %start, <4 x i32> %vi, <4 x i1> [[NEWM]], i32 4)
-; CONVERT_LEGAL-NOT: %{{.+}} = call i32 @llvm.vp.reduce.mul.v4i32(i32 %start, <4 x i32> %vi, <4 x i1> %m, i32 4)
-; CONVERT_LEGAL-NOT: %{{.+}} = call i32 @llvm.vp.reduce.and.v4i32(i32 %start, <4 x i32> %vi, <4 x i1> %m, i32 4)
-; CONVERT_LEGAL-NOT: %{{.+}} = call i32 @llvm.vp.reduce.or.v4i32(i32 %start, <4 x i32> %vi, <4 x i1> %m, i32 4)
-; CONVERT_LEGAL-NOT: %{{.+}} = call i32 @llvm.vp.reduce.xor.v4i32(i32 %start, <4 x i32> %vi, <4 x i1> %m, i32 4)
-; CONVERT_LEGAL-NOT: %{{.+}} = call i32 @llvm.vp.reduce.smin.v4i32(i32 %start, <4 x i32> %vi, <4 x i1> %m, i32 4)
-; CONVERT_LEGAL-NOT: %{{.+}} = call i32 @llvm.vp.reduce.smax.v4i32(i32 %start, <4 x i32> %vi, <4 x i1> %m, i32 4)
-; CONVERT_LEGAL-NOT: %{{.+}} = call i32 @llvm.vp.reduce.umin.v4i32(i32 %start, <4 x i32> %vi, <4 x i1> %m, i32 4)
-; CONVERT_LEGAL-NOT: %{{.+}} = call i32 @llvm.vp.reduce.umax.v4i32(i32 %start, <4 x i32> %vi, <4 x i1> %m, i32 4)
-; CONVERT_LEGAL: ret void
-
-; CONVERT_LEGAL: define void @test_vp_reduce_fp_v4(float %f, <4 x float> %vf, <4 x i1> %m, i32 %n) {
-; CONVERT_LEGAL-NEXT: [[NINS:%.+]] = insertelement <4 x i32> poison, i32 %n, i64 0
-; CONVERT_LEGAL-NEXT: [[NSPLAT:%.+]] = shufflevector <4 x i32> [[NINS]], <4 x i32> poison, <4 x i32> zeroinitializer
-; CONVERT_LEGAL-NEXT: [[EVLM:%.+]] = icmp ult <4 x i32> <i32 0, i32 1, i32 2, i32 3>, [[NSPLAT]]
-; CONVERT_LEGAL-NEXT: [[NEWM:%.+]] = and <4 x i1> [[EVLM]], %m
-; CONVERT_LEGAL-NEXT: %{{.+}} = call float @llvm.vp.reduce.fmin.v4f32(float %f, <4 x float> %vf, <4 x i1> [[NEWM]], i32 4)
-; CONVERT_LEGAL-NOT: %{{.+}} = call nnan float @llvm.vp.reduce.fmin.v4f32(float %f, <4 x float> %vf, <4 x i1> %m, i32 4)
-; CONVERT_LEGAL-NOT: %{{.+}} = call nnan ninf float @llvm.vp.reduce.fmin.v4f32(float %f, <4 x float> %vf, <4 x i1> %m, i32 4)
-; CONVERT_LEGAL-NOT: %{{.+}} = call float @llvm.vp.reduce.fmax.v4f32(float %f, <4 x float> %vf, <4 x i1> %m, i32 4)
-; CONVERT_LEGAL-NOT: %{{.+}} = call nnan float @llvm.vp.reduce.fmax.v4f32(float %f, <4 x float> %vf, <4 x i1> %m, i32 4)
-; CONVERT_LEGAL-NOT: %{{.+}} = call nnan ninf float @llvm.vp.reduce.fmax.v4f32(float %f, <4 x float> %vf, <4 x i1> %m, i32 4)
-; CONVERT_LEGAL-NOT: %{{.+}} = call float @llvm.vp.reduce.fminimum.v4f32(float %f, <4 x float> %vf, <4 x i1> [[NEWM]], i32 4)
-; CONVERT_LEGAL-NOT: %{{.+}} = call nnan float @llvm.vp.reduce.fminimum.v4f32(float %f, <4 x float> %vf, <4 x i1> %m, i32 4)
-; CONVERT_LEGAL-NOT: %{{.+}} = call nnan ninf float @llvm.vp.reduce.fminimum.v4f32(float %f, <4 x float> %vf, <4 x i1> %m, i32 4)
-; CONVERT_LEGAL-NOT: %{{.+}} = call float @llvm.vp.reduce.fmaximum.v4f32(float %f, <4 x float> %vf, <4 x i1> %m, i32 4)
-; CONVERT_LEGAL-NOT: %{{.+}} = call nnan float @llvm.vp.reduce.fmaximum.v4f32(float %f, <4 x float> %vf, <4 x i1> %m, i32 4)
-; CONVERT_LEGAL-NOT: %{{.+}} = call nnan ninf float @llvm.vp.reduce.fmaximum.v4f32(float %f, <4 x float> %vf, <4 x i1> %m, i32 4)
-; CONVERT_LEGAL-NOT: %{{.+}} = call float @llvm.vp.reduce.fadd.v4f32(float %f, <4 x float> %vf, <4 x i1> %m, i32 4)
-; CONVERT_LEGAL-NOT: %{{.+}} = call reassoc float @llvm.vp.reduce.fadd.v4f32(float %f, <4 x float> %vf, <4 x i1> %m, i32 4)
-; CONVERT_LEGAL-NOT: %{{.+}} = call float @llvm.vp.reduce.fmul.v4f32(float %f, <4 x float> %vf, <4 x i1> %m, i32 4)
-; CONVERT_LEGAL-NOT: %{{.+}} = call reassoc float @llvm.vp.reduce.fmul.v4f32(float %f, <4 x float> %vf, <4 x i1> %m, i32 4)
-; CONVERT_LEGAL: ret void
-
-; CONVERT_LEGAL: define void @test_vp_cmp_v8(<8 x i32> %i0, <8 x i32> %i1, <8 x float> %f0, <8 x float> %f1, <8 x i1> %m, i32 %n) {
-; CONVERT_LEGAL-NEXT: [[NINS:%.+]] = insertelement <8 x i32> poison, i32 %n, i64 0
-; CONVERT_LEGAL-NEXT: [[NSPLAT:%.+]] = shufflevector <8 x i32> [[NINS]], <8 x i32> poison, <8 x i32> zeroinitializer
-; CONVERT_LEGAL-NEXT: [[EVLM:%.+]] = icmp ult <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>, [[NSPLAT]]
-; CONVERT_LEGAL-NEXT: [[NEWM:%.+]] = and <8 x i1> [[EVLM]], %m
-; CONVERT_LEGAL-NEXT: %{{.+}} = call <8 x i1> @llvm.vp.icmp.v8i32(<8 x i32> %i0, <8 x i32> %i1, metadata !"eq", <8 x i1> [[NEWM]], i32 8)
-; CONVERT_LEGAL-NOT: %{{.+}} = call <8 x i1> @llvm.vp.icmp.v8i32(<8 x i32> %i0, <8 x i32> %i1, metadata !"eq", <8 x i1> %m, i32 %n)
-; CONVERT_LEGAL-NOT: %{{.+}} = call <8 x i1> @llvm.vp.icmp.v8i32(<8 x i32> %i0, <8 x i32> %i1, metadata !"slt", <8 x i1> %m, i32 %n
-; CONVERT_LEGAL-NOT: %{{.+}} = call <8 x i1> @llvm.vp.fcmp.v8f32(<8 x float> %f0, <8 x float> %f1, metadata !"oeq", <8 x i1> %m, i32 %n)
-; CONVERT_LEGAL-NOT: %{{.+}} = call <8 x i1> @llvm.vp.fcmp.v8f32(<8 x float> %f0, <8 x float> %f1, metadata !"ult", <8 x i1> %m, i32 %n)
-; CONVERT_LEGAL: ret void
diff --git a/llvm/test/CodeGen/LoongArch/O0-pipeline.ll b/llvm/test/CodeGen/LoongArch/O0-pipeline.ll
index 138f0c81238b..38c1dbcb1075 100644
--- a/llvm/test/CodeGen/LoongArch/O0-pipeline.ll
+++ b/llvm/test/CodeGen/LoongArch/O0-pipeline.ll
@@ -26,7 +26,6 @@
; CHECK-NEXT: Lower Garbage Collection Instructions
; CHECK-NEXT: Shadow Stack GC Lowering
; CHECK-NEXT: Remove unreachable blocks from the CFG
-; CHECK-NEXT: Expand vector predication intrinsics
; CHECK-NEXT: Instrument function entry/exit with calls to e.g. mcount() (post inlining)
; CHECK-NEXT: Scalarize Masked Memory Intrinsics
; CHECK-NEXT: Expand reduction intrinsics
diff --git a/llvm/test/CodeGen/LoongArch/opt-pipeline.ll b/llvm/test/CodeGen/LoongArch/opt-pipeline.ll
index c5c5342e303c..391888a38daf 100644
--- a/llvm/test/CodeGen/LoongArch/opt-pipeline.ll
+++ b/llvm/test/CodeGen/LoongArch/opt-pipeline.ll
@@ -61,7 +61,6 @@
; LAXX-NEXT: Constant Hoisting
; LAXX-NEXT: Replace intrinsics with calls to vector library
; LAXX-NEXT: Partially inline calls to library functions
-; LAXX-NEXT: Expand vector predication intrinsics
; LAXX-NEXT: Instrument function entry/exit with calls to e.g. mcount() (post inlining)
; LAXX-NEXT: Scalarize Masked Memory Intrinsics
; LAXX-NEXT: Expand reduction intrinsics
diff --git a/llvm/test/CodeGen/M68k/pipeline.ll b/llvm/test/CodeGen/M68k/pipeline.ll
index 0481d5c18e0c..6aa66d09aa78 100644
--- a/llvm/test/CodeGen/M68k/pipeline.ll
+++ b/llvm/test/CodeGen/M68k/pipeline.ll
@@ -32,7 +32,6 @@
; CHECK-NEXT: Constant Hoisting
; CHECK-NEXT: Replace intrinsics with calls to vector library
; CHECK-NEXT: Partially inline calls to library functions
-; CHECK-NEXT: Expand vector predication intrinsics
; CHECK-NEXT: Instrument function entry/exit with calls to e.g. mcount() (post inlining)
; CHECK-NEXT: Scalarize Masked Memory Intrinsics
; CHECK-NEXT: Expand reduction intrinsics
diff --git a/llvm/test/CodeGen/Mips/llvm-ir/and-srl.ll b/llvm/test/CodeGen/Mips/llvm-ir/and-srl.ll
new file mode 100644
index 000000000000..988a0f5ee5ba
--- /dev/null
+++ b/llvm/test/CodeGen/Mips/llvm-ir/and-srl.ll
@@ -0,0 +1,28 @@
+; RUN: llc < %s -mtriple=mips64el-unknown-linux-gnu -mcpu=mips64 | FileCheck %s \
+; RUN: -check-prefix=MIPS4
+; RUN: llc < %s -mtriple=mips64el-unknown-linux-gnu -mcpu=mips64r2 | FileCheck %s \
+; RUN: -check-prefix=MIPS64R2
+
+define i64 @foo(i64 noundef %a) {
+; MIPS4-LABEL: foo:
+; MIPS4: # %bb.0: # %entry
+; MIPS4-NEXT: sll $1, $4, 0
+; MIPS4-NEXT: srl $1, $1, 2
+; MIPS4-NEXT: andi $1, $1, 7
+; MIPS4-NEXT: daddiu $2, $zero, 1
+; MIPS4-NEXT: jr $ra
+; MIPS4-NEXT: dsllv $2, $2, $1
+;
+; MIPS64R2-LABEL: foo:
+; MIPS64R2: # %bb.0: # %entry
+; MIPS64R2-NEXT: sll $1, $4, 0
+; MIPS64R2-NEXT: ext $1, $1, 2, 3
+; MIPS64R2-NEXT: daddiu $2, $zero, 1
+; MIPS64R2-NEXT: jr $ra
+; MIPS64R2-NEXT: dsllv $2, $2, $1
+entry:
+ %div1 = lshr i64 %a, 2
+ %and = and i64 %div1, 7
+ %shl = shl nuw nsw i64 1, %and
+ ret i64 %shl
+}
diff --git a/llvm/test/CodeGen/NVPTX/fence-proxy-tensormap.ll b/llvm/test/CodeGen/NVPTX/fence-proxy-tensormap.ll
new file mode 100644
index 000000000000..83a2ca4f481b
--- /dev/null
+++ b/llvm/test/CodeGen/NVPTX/fence-proxy-tensormap.ll
@@ -0,0 +1,36 @@
+; RUN: llc < %s -march=nvptx64 -mcpu=sm_90 -mattr=+ptx83 | FileCheck --check-prefixes=CHECK %s
+; RUN: %if ptxas-12.3 %{ llc < %s -march=nvptx64 -mcpu=sm_90 -mattr=+ptx83 | %ptxas-verify -arch=sm_90 %}
+
+; CHECK-LABEL: test_fence_proxy_tensormap_generic_release
+define void @test_fence_proxy_tensormap_generic_release() {
+ ; CHECK: fence.proxy.tensormap::generic.release.cta;
+ call void @llvm.nvvm.fence.proxy.tensormap_generic.release.cta();
+
+ ; CHECK: fence.proxy.tensormap::generic.release.cluster;
+ call void @llvm.nvvm.fence.proxy.tensormap_generic.release.cluster();
+
+ ; CHECK: fence.proxy.tensormap::generic.release.gpu;
+ call void @llvm.nvvm.fence.proxy.tensormap_generic.release.gpu();
+
+ ; CHECK: fence.proxy.tensormap::generic.release.sys;
+ call void @llvm.nvvm.fence.proxy.tensormap_generic.release.sys();
+
+ ret void
+}
+
+; CHECK-LABEL: test_fence_proxy_tensormap_generic_acquire
+define void @test_fence_proxy_tensormap_generic_acquire(ptr addrspace(0) %addr) {
+ ; CHECK: fence.proxy.tensormap::generic.acquire.cta [%rd{{[0-9]+}}], 128;
+ call void @llvm.nvvm.fence.proxy.tensormap_generic.acquire.cta(ptr addrspace(0) %addr, i32 128);
+
+ ; CHECK: fence.proxy.tensormap::generic.acquire.cluster [%rd{{[0-9]+}}], 128;
+ call void @llvm.nvvm.fence.proxy.tensormap_generic.acquire.cluster(ptr addrspace(0) %addr, i32 128);
+
+ ; CHECK: fence.proxy.tensormap::generic.acquire.gpu [%rd{{[0-9]+}}], 128;
+ call void @llvm.nvvm.fence.proxy.tensormap_generic.acquire.gpu(ptr addrspace(0) %addr, i32 128);
+
+ ; CHECK: fence.proxy.tensormap::generic.acquire.sys [%rd{{[0-9]+}}], 128;
+ call void @llvm.nvvm.fence.proxy.tensormap_generic.acquire.sys(ptr addrspace(0) %addr, i32 128);
+
+ ret void
+}
diff --git a/llvm/test/CodeGen/NVPTX/load-store-sm-70.ll b/llvm/test/CodeGen/NVPTX/load-store-sm-70.ll
index 68915b0f2698..9cea33d12027 100644
--- a/llvm/test/CodeGen/NVPTX/load-store-sm-70.ll
+++ b/llvm/test/CodeGen/NVPTX/load-store-sm-70.ll
@@ -1,169 +1,7 @@
; RUN: llc < %s -march=nvptx64 -mcpu=sm_70 -mattr=+ptx82 | FileCheck %s
; RUN: %if ptxas-12.2 %{ llc < %s -march=nvptx64 -mcpu=sm_70 -mattr=+ptx82 | %ptxas-verify -arch=sm_70 %}
-; CHECK-LABEL: generic_plain
-define void @generic_plain(ptr %a, ptr %b, ptr %c, ptr %d) local_unnamed_addr {
- ; CHECK: ld.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
- %a.load = load i8, ptr %a
- %a.add = add i8 %a.load, 1
- ; CHECK: st.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
- store i8 %a.add, ptr %a
-
- ; CHECK: ld.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
- %b.load = load i16, ptr %b
- %b.add = add i16 %b.load, 1
- ; CHECK: st.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
- store i16 %b.add, ptr %b
-
- ; CHECK: ld.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
- %c.load = load i32, ptr %c
- %c.add = add i32 %c.load, 1
- ; CHECK: st.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
- store i32 %c.add, ptr %c
-
- ; CHECK: ld.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
- %d.load = load i64, ptr %d
- %d.add = add i64 %d.load, 1
- ; CHECK: st.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
- store i64 %d.add, ptr %d
-
- ; CHECK: ld.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
- %e.load = load float, ptr %c
- %e.add = fadd float %e.load, 1.
- ; CHECK: st.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
- store float %e.add, ptr %c
-
- ; CHECK: ld.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
- %f.load = load double, ptr %c
- %f.add = fadd double %f.load, 1.
- ; CHECK: st.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
- store double %f.add, ptr %c
-
- ret void
-}
-
-; CHECK-LABEL: generic_volatile
-define void @generic_volatile(ptr %a, ptr %b, ptr %c, ptr %d) local_unnamed_addr {
- ; CHECK: ld.volatile.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
- %a.load = load volatile i8, ptr %a
- %a.add = add i8 %a.load, 1
- ; CHECK: st.volatile.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
- store volatile i8 %a.add, ptr %a
-
- ; CHECK: ld.volatile.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
- %b.load = load volatile i16, ptr %b
- %b.add = add i16 %b.load, 1
- ; CHECK: st.volatile.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
- store volatile i16 %b.add, ptr %b
-
- ; CHECK: ld.volatile.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
- %c.load = load volatile i32, ptr %c
- %c.add = add i32 %c.load, 1
- ; CHECK: st.volatile.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
- store volatile i32 %c.add, ptr %c
-
- ; CHECK: ld.volatile.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
- %d.load = load volatile i64, ptr %d
- %d.add = add i64 %d.load, 1
- ; CHECK: st.volatile.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
- store volatile i64 %d.add, ptr %d
-
- ; CHECK: ld.volatile.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
- %e.load = load volatile float, ptr %c
- %e.add = fadd float %e.load, 1.
- ; CHECK: st.volatile.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
- store volatile float %e.add, ptr %c
-
- ; CHECK: ld.volatile.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
- %f.load = load volatile double, ptr %c
- %f.add = fadd double %f.load, 1.
- ; CHECK: st.volatile.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
- store volatile double %f.add, ptr %c
-
- ret void
-}
-
-; CHECK-LABEL: generic_unordered
-define void @generic_unordered(ptr %a, ptr %b, ptr %c, ptr %d, ptr %e) local_unnamed_addr {
- ; CHECK: ld.relaxed.sys.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
- %a.load = load atomic i8, ptr %a unordered, align 1
- %a.add = add i8 %a.load, 1
- ; CHECK: st.relaxed.sys.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
- store atomic i8 %a.add, ptr %a unordered, align 1
-
- ; CHECK: ld.relaxed.sys.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
- %b.load = load atomic i16, ptr %b unordered, align 2
- %b.add = add i16 %b.load, 1
- ; CHECK: st.relaxed.sys.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
- store atomic i16 %b.add, ptr %b unordered, align 2
-
- ; CHECK: ld.relaxed.sys.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
- %c.load = load atomic i32, ptr %c unordered, align 4
- %c.add = add i32 %c.load, 1
- ; CHECK: st.relaxed.sys.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
- store atomic i32 %c.add, ptr %c unordered, align 4
-
- ; CHECK: ld.relaxed.sys.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
- %d.load = load atomic i64, ptr %d unordered, align 8
- %d.add = add i64 %d.load, 1
- ; CHECK: st.relaxed.sys.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
- store atomic i64 %d.add, ptr %d unordered, align 8
-
- ; CHECK: ld.relaxed.sys.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
- %e.load = load atomic float, ptr %e unordered, align 4
- %e.add = fadd float %e.load, 1.0
- ; CHECK: st.relaxed.sys.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
- store atomic float %e.add, ptr %e unordered, align 4
-
- ; CHECK: ld.relaxed.sys.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
- %f.load = load atomic double, ptr %e unordered, align 8
- %f.add = fadd double %f.load, 1.
- ; CHECK: st.relaxed.sys.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
- store atomic double %f.add, ptr %e unordered, align 8
-
- ret void
-}
-
-; CHECK-LABEL: generic_monotonic
-define void @generic_monotonic(ptr %a, ptr %b, ptr %c, ptr %d, ptr %e) local_unnamed_addr {
- ; CHECK: ld.relaxed.sys.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
- %a.load = load atomic i8, ptr %a monotonic, align 1
- %a.add = add i8 %a.load, 1
- ; CHECK: st.relaxed.sys.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
- store atomic i8 %a.add, ptr %a monotonic, align 1
-
- ; CHECK: ld.relaxed.sys.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
- %b.load = load atomic i16, ptr %b monotonic, align 2
- %b.add = add i16 %b.load, 1
- ; CHECK: st.relaxed.sys.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
- store atomic i16 %b.add, ptr %b monotonic, align 2
-
- ; CHECK: ld.relaxed.sys.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
- %c.load = load atomic i32, ptr %c monotonic, align 4
- %c.add = add i32 %c.load, 1
- ; CHECK: st.relaxed.sys.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
- store atomic i32 %c.add, ptr %c monotonic, align 4
-
- ; CHECK: ld.relaxed.sys.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
- %d.load = load atomic i64, ptr %d monotonic, align 8
- %d.add = add i64 %d.load, 1
- ; CHECK: st.relaxed.sys.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
- store atomic i64 %d.add, ptr %d monotonic, align 8
-
- ; CHECK: ld.relaxed.sys.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
- %e.load = load atomic float, ptr %e monotonic, align 4
- %e.add = fadd float %e.load, 1.0
- ; CHECK: st.relaxed.sys.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
- store atomic float %e.add, ptr %e monotonic, align 4
-
- ; CHECK: ld.relaxed.sys.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
- %f.load = load atomic double, ptr %e monotonic, align 8
- %f.add = fadd double %f.load, 1.
- ; CHECK: st.relaxed.sys.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
- store atomic double %f.add, ptr %e monotonic, align 8
-
- ret void
-}
+;; generic statespace
; CHECK-LABEL: generic_acq_rel
define void @generic_acq_rel(ptr %a, ptr %b, ptr %c, ptr %d, ptr %e) local_unnamed_addr {
@@ -206,335 +44,154 @@ define void @generic_acq_rel(ptr %a, ptr %b, ptr %c, ptr %d, ptr %e) local_unnam
ret void
}
-; CHECK-LABEL: generic_unordered_volatile
-define void @generic_unordered_volatile(ptr %a, ptr %b, ptr %c, ptr %d, ptr %e) local_unnamed_addr {
- ; CHECK: ld.volatile.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
- %a.load = load atomic volatile i8, ptr %a unordered, align 1
- %a.add = add i8 %a.load, 1
- ; CHECK: st.volatile.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
- store atomic volatile i8 %a.add, ptr %a unordered, align 1
-
- ; CHECK: ld.volatile.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
- %b.load = load atomic volatile i16, ptr %b unordered, align 2
- %b.add = add i16 %b.load, 1
- ; CHECK: st.volatile.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
- store atomic volatile i16 %b.add, ptr %b unordered, align 2
-
- ; CHECK: ld.volatile.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
- %c.load = load atomic volatile i32, ptr %c unordered, align 4
- %c.add = add i32 %c.load, 1
- ; CHECK: st.volatile.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
- store atomic volatile i32 %c.add, ptr %c unordered, align 4
-
- ; CHECK: ld.volatile.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
- %d.load = load atomic volatile i64, ptr %d unordered, align 8
- %d.add = add i64 %d.load, 1
- ; CHECK: st.volatile.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
- store atomic volatile i64 %d.add, ptr %d unordered, align 8
-
- ; CHECK: ld.volatile.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
- %e.load = load atomic volatile float, ptr %e unordered, align 4
- %e.add = fadd float %e.load, 1.0
- ; CHECK: st.volatile.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
- store atomic volatile float %e.add, ptr %e unordered, align 4
-
- ; CHECK: ld.volatile.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
- %f.load = load atomic volatile double, ptr %e unordered, align 8
- %f.add = fadd double %f.load, 1.
- ; CHECK: st.volatile.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
- store atomic volatile double %f.add, ptr %e unordered, align 8
-
- ret void
-}
-
-; CHECK-LABEL: generic_monotonic_volatile
-define void @generic_monotonic_volatile(ptr %a, ptr %b, ptr %c, ptr %d, ptr %e) local_unnamed_addr {
- ; CHECK: ld.volatile.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
- %a.load = load atomic volatile i8, ptr %a monotonic, align 1
- %a.add = add i8 %a.load, 1
- ; CHECK: st.volatile.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
- store atomic volatile i8 %a.add, ptr %a monotonic, align 1
-
- ; CHECK: ld.volatile.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
- %b.load = load atomic volatile i16, ptr %b monotonic, align 2
- %b.add = add i16 %b.load, 1
- ; CHECK: st.volatile.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
- store atomic volatile i16 %b.add, ptr %b monotonic, align 2
-
- ; CHECK: ld.volatile.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
- %c.load = load atomic volatile i32, ptr %c monotonic, align 4
- %c.add = add i32 %c.load, 1
- ; CHECK: st.volatile.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
- store atomic volatile i32 %c.add, ptr %c monotonic, align 4
-
- ; CHECK: ld.volatile.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
- %d.load = load atomic volatile i64, ptr %d monotonic, align 8
- %d.add = add i64 %d.load, 1
- ; CHECK: st.volatile.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
- store atomic volatile i64 %d.add, ptr %d monotonic, align 8
-
- ; CHECK: ld.volatile.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
- %e.load = load atomic volatile float, ptr %e monotonic, align 4
- %e.add = fadd float %e.load, 1.0
- ; CHECK: st.volatile.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
- store atomic volatile float %e.add, ptr %e monotonic, align 4
-
- ; CHECK: ld.volatile.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
- %f.load = load atomic volatile double, ptr %e monotonic, align 8
- %f.add = fadd double %f.load, 1.
- ; CHECK: st.volatile.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
- store atomic volatile double %f.add, ptr %e monotonic, align 8
-
- ret void
-}
-
-;; global statespace
-
-; CHECK-LABEL: global_plain
-define void @global_plain(ptr addrspace(1) %a, ptr addrspace(1) %b, ptr addrspace(1) %c, ptr addrspace(1) %d) local_unnamed_addr {
- ; CHECK: ld.global.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
- %a.load = load i8, ptr addrspace(1) %a
- %a.add = add i8 %a.load, 1
- ; CHECK: st.global.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
- store i8 %a.add, ptr addrspace(1) %a
-
- ; CHECK: ld.global.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
- %b.load = load i16, ptr addrspace(1) %b
- %b.add = add i16 %b.load, 1
- ; CHECK: st.global.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
- store i16 %b.add, ptr addrspace(1) %b
-
- ; CHECK: ld.global.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
- %c.load = load i32, ptr addrspace(1) %c
- %c.add = add i32 %c.load, 1
- ; CHECK: st.global.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
- store i32 %c.add, ptr addrspace(1) %c
-
- ; CHECK: ld.global.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
- %d.load = load i64, ptr addrspace(1) %d
- %d.add = add i64 %d.load, 1
- ; CHECK: st.global.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
- store i64 %d.add, ptr addrspace(1) %d
-
- ; CHECK: ld.global.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
- %e.load = load float, ptr addrspace(1) %c
- %e.add = fadd float %e.load, 1.
- ; CHECK: st.global.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
- store float %e.add, ptr addrspace(1) %c
-
- ; CHECK: ld.global.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
- %f.load = load double, ptr addrspace(1) %c
- %f.add = fadd double %f.load, 1.
- ; CHECK: st.global.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
- store double %f.add, ptr addrspace(1) %c
-
- ret void
-}
-
-; CHECK-LABEL: global_volatile
-define void @global_volatile(ptr addrspace(1) %a, ptr addrspace(1) %b, ptr addrspace(1) %c, ptr addrspace(1) %d) local_unnamed_addr {
- ; CHECK: ld.volatile.global.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
- %a.load = load volatile i8, ptr addrspace(1) %a
- %a.add = add i8 %a.load, 1
- ; CHECK: st.volatile.global.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
- store volatile i8 %a.add, ptr addrspace(1) %a
-
- ; CHECK: ld.volatile.global.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
- %b.load = load volatile i16, ptr addrspace(1) %b
- %b.add = add i16 %b.load, 1
- ; CHECK: st.volatile.global.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
- store volatile i16 %b.add, ptr addrspace(1) %b
-
- ; CHECK: ld.volatile.global.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
- %c.load = load volatile i32, ptr addrspace(1) %c
- %c.add = add i32 %c.load, 1
- ; CHECK: st.volatile.global.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
- store volatile i32 %c.add, ptr addrspace(1) %c
-
- ; CHECK: ld.volatile.global.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
- %d.load = load volatile i64, ptr addrspace(1) %d
- %d.add = add i64 %d.load, 1
- ; CHECK: st.volatile.global.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
- store volatile i64 %d.add, ptr addrspace(1) %d
-
- ; CHECK: ld.volatile.global.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
- %e.load = load volatile float, ptr addrspace(1) %c
- %e.add = fadd float %e.load, 1.
- ; CHECK: st.volatile.global.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
- store volatile float %e.add, ptr addrspace(1) %c
-
- ; CHECK: ld.volatile.global.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
- %f.load = load volatile double, ptr addrspace(1) %c
- %f.add = fadd double %f.load, 1.
- ; CHECK: st.volatile.global.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
- store volatile double %f.add, ptr addrspace(1) %c
-
- ret void
-}
-
-; CHECK-LABEL: global_unordered
-define void @global_unordered(ptr addrspace(1) %a, ptr addrspace(1) %b, ptr addrspace(1) %c, ptr addrspace(1) %d, ptr addrspace(1) %e) local_unnamed_addr {
- ; CHECK: ld.relaxed.sys.global.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
- %a.load = load atomic i8, ptr addrspace(1) %a unordered, align 1
+; CHECK-LABEL: generic_acq_rel_volatile
+define void @generic_acq_rel_volatile(ptr %a, ptr %b, ptr %c, ptr %d, ptr %e) local_unnamed_addr {
+ ; CHECK: ld.acquire.sys.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %a.load = load atomic volatile i8, ptr %a acquire, align 1
%a.add = add i8 %a.load, 1
- ; CHECK: st.relaxed.sys.global.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
- store atomic i8 %a.add, ptr addrspace(1) %a unordered, align 1
+ ; CHECK: st.release.sys.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
+ store atomic volatile i8 %a.add, ptr %a release, align 1
- ; CHECK: ld.relaxed.sys.global.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
- %b.load = load atomic i16, ptr addrspace(1) %b unordered, align 2
+ ; CHECK: ld.acquire.sys.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %b.load = load atomic volatile i16, ptr %b acquire, align 2
%b.add = add i16 %b.load, 1
- ; CHECK: st.relaxed.sys.global.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
- store atomic i16 %b.add, ptr addrspace(1) %b unordered, align 2
+ ; CHECK: st.release.sys.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
+ store atomic volatile i16 %b.add, ptr %b release, align 2
- ; CHECK: ld.relaxed.sys.global.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
- %c.load = load atomic i32, ptr addrspace(1) %c unordered, align 4
+ ; CHECK: ld.acquire.sys.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %c.load = load atomic volatile i32, ptr %c acquire, align 4
%c.add = add i32 %c.load, 1
- ; CHECK: st.relaxed.sys.global.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
- store atomic i32 %c.add, ptr addrspace(1) %c unordered, align 4
+ ; CHECK: st.release.sys.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
+ store atomic volatile i32 %c.add, ptr %c release, align 4
- ; CHECK: ld.relaxed.sys.global.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
- %d.load = load atomic i64, ptr addrspace(1) %d unordered, align 8
+ ; CHECK: ld.acquire.sys.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %d.load = load atomic volatile i64, ptr %d acquire, align 8
%d.add = add i64 %d.load, 1
- ; CHECK: st.relaxed.sys.global.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
- store atomic i64 %d.add, ptr addrspace(1) %d unordered, align 8
+ ; CHECK: st.release.sys.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
+ store atomic volatile i64 %d.add, ptr %d release, align 8
- ; CHECK: ld.relaxed.sys.global.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
- %e.load = load atomic float, ptr addrspace(1) %e unordered, align 4
+ ; CHECK: ld.acquire.sys.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %e.load = load atomic volatile float, ptr %e acquire, align 4
%e.add = fadd float %e.load, 1.0
- ; CHECK: st.relaxed.sys.global.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
- store atomic float %e.add, ptr addrspace(1) %e unordered, align 4
+ ; CHECK: st.release.sys.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
+ store atomic volatile float %e.add, ptr %e release, align 4
- ; CHECK: ld.relaxed.sys.global.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
- %f.load = load atomic double, ptr addrspace(1) %e unordered, align 8
+ ; CHECK: ld.acquire.sys.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %f.load = load atomic volatile double, ptr %e acquire, align 8
%f.add = fadd double %f.load, 1.
- ; CHECK: st.relaxed.sys.global.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
- store atomic double %f.add, ptr addrspace(1) %e unordered, align 8
+ ; CHECK: st.release.sys.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
+ store atomic volatile double %f.add, ptr %e release, align 8
ret void
}
-; CHECK-LABEL: global_monotonic
-define void @global_monotonic(ptr addrspace(1) %a, ptr addrspace(1) %b, ptr addrspace(1) %c, ptr addrspace(1) %d, ptr addrspace(1) %e) local_unnamed_addr {
- ; CHECK: ld.relaxed.sys.global.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
- %a.load = load atomic i8, ptr addrspace(1) %a monotonic, align 1
+; CHECK-LABEL: generic_sc
+define void @generic_sc(ptr %a, ptr %b, ptr %c, ptr %d, ptr %e) local_unnamed_addr {
+ ; CHECK: fence.sc.sys
+ ; CHECK: ld.acquire.sys.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %a.load = load atomic i8, ptr %a seq_cst, align 1
%a.add = add i8 %a.load, 1
- ; CHECK: st.relaxed.sys.global.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
- store atomic i8 %a.add, ptr addrspace(1) %a monotonic, align 1
+ ; CHECK: fence.sc.sys
+ ; CHECK: st.release.sys.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
+ store atomic i8 %a.add, ptr %a seq_cst, align 1
- ; CHECK: ld.relaxed.sys.global.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
- %b.load = load atomic i16, ptr addrspace(1) %b monotonic, align 2
+ ; CHECK: fence.sc.sys
+ ; CHECK: ld.acquire.sys.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %b.load = load atomic i16, ptr %b seq_cst, align 2
%b.add = add i16 %b.load, 1
- ; CHECK: st.relaxed.sys.global.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
- store atomic i16 %b.add, ptr addrspace(1) %b monotonic, align 2
+ ; CHECK: fence.sc.sys
+ ; CHECK: st.release.sys.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
+ store atomic i16 %b.add, ptr %b seq_cst, align 2
- ; CHECK: ld.relaxed.sys.global.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
- %c.load = load atomic i32, ptr addrspace(1) %c monotonic, align 4
+ ; CHECK: fence.sc.sys
+ ; CHECK: ld.acquire.sys.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %c.load = load atomic i32, ptr %c seq_cst, align 4
%c.add = add i32 %c.load, 1
- ; CHECK: st.relaxed.sys.global.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
- store atomic i32 %c.add, ptr addrspace(1) %c monotonic, align 4
+ ; CHECK: fence.sc.sys
+ ; CHECK: st.release.sys.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
+ store atomic i32 %c.add, ptr %c seq_cst, align 4
- ; CHECK: ld.relaxed.sys.global.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
- %d.load = load atomic i64, ptr addrspace(1) %d monotonic, align 8
+ ; CHECK: fence.sc.sys
+ ; CHECK: ld.acquire.sys.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %d.load = load atomic i64, ptr %d seq_cst, align 8
%d.add = add i64 %d.load, 1
- ; CHECK: st.relaxed.sys.global.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
- store atomic i64 %d.add, ptr addrspace(1) %d monotonic, align 8
+ ; CHECK: fence.sc.sys
+ ; CHECK: st.release.sys.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
+ store atomic i64 %d.add, ptr %d seq_cst, align 8
- ; CHECK: ld.relaxed.sys.global.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
- %e.load = load atomic float, ptr addrspace(1) %e monotonic, align 4
+ ; CHECK: fence.sc.sys
+ ; CHECK: ld.acquire.sys.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %e.load = load atomic float, ptr %e seq_cst, align 4
%e.add = fadd float %e.load, 1.0
- ; CHECK: st.relaxed.sys.global.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
- store atomic float %e.add, ptr addrspace(1) %e monotonic, align 4
+ ; CHECK: fence.sc.sys
+ ; CHECK: st.release.sys.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
+ store atomic float %e.add, ptr %e seq_cst, align 4
- ; CHECK: ld.relaxed.sys.global.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
- %f.load = load atomic double, ptr addrspace(1) %e monotonic, align 8
+ ; CHECK: fence.sc.sys
+ ; CHECK: ld.acquire.sys.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %f.load = load atomic double, ptr %e seq_cst, align 8
%f.add = fadd double %f.load, 1.
- ; CHECK: st.relaxed.sys.global.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
- store atomic double %f.add, ptr addrspace(1) %e monotonic, align 8
+ ; CHECK: fence.sc.sys
+ ; CHECK: st.release.sys.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
+ store atomic double %f.add, ptr %e seq_cst, align 8
ret void
}
-; CHECK-LABEL: global_unordered_volatile
-define void @global_unordered_volatile(ptr addrspace(1) %a, ptr addrspace(1) %b, ptr addrspace(1) %c, ptr addrspace(1) %d, ptr addrspace(1) %e) local_unnamed_addr {
- ; CHECK: ld.mmio.relaxed.sys.global.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
- %a.load = load atomic volatile i8, ptr addrspace(1) %a unordered, align 1
+; CHECK-LABEL: generic_sc_volatile
+define void @generic_sc_volatile(ptr %a, ptr %b, ptr %c, ptr %d, ptr %e) local_unnamed_addr {
+ ; CHECK: fence.sc.sys
+ ; CHECK: ld.acquire.sys.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %a.load = load atomic volatile i8, ptr %a seq_cst, align 1
%a.add = add i8 %a.load, 1
- ; CHECK: st.mmio.relaxed.sys.global.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
- store atomic volatile i8 %a.add, ptr addrspace(1) %a unordered, align 1
+ ; CHECK: fence.sc.sys
+ ; CHECK: st.release.sys.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
+ store atomic volatile i8 %a.add, ptr %a seq_cst, align 1
- ; CHECK: ld.mmio.relaxed.sys.global.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
- %b.load = load atomic volatile i16, ptr addrspace(1) %b unordered, align 2
+ ; CHECK: fence.sc.sys
+ ; CHECK: ld.acquire.sys.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %b.load = load atomic volatile i16, ptr %b seq_cst, align 2
%b.add = add i16 %b.load, 1
- ; CHECK: st.mmio.relaxed.sys.global.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
- store atomic volatile i16 %b.add, ptr addrspace(1) %b unordered, align 2
+ ; CHECK: fence.sc.sys
+ ; CHECK: st.release.sys.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
+ store atomic volatile i16 %b.add, ptr %b seq_cst, align 2
- ; CHECK: ld.mmio.relaxed.sys.global.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
- %c.load = load atomic volatile i32, ptr addrspace(1) %c unordered, align 4
+ ; CHECK: fence.sc.sys
+ ; CHECK: ld.acquire.sys.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %c.load = load atomic volatile i32, ptr %c seq_cst, align 4
%c.add = add i32 %c.load, 1
- ; CHECK: st.mmio.relaxed.sys.global.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
- store atomic volatile i32 %c.add, ptr addrspace(1) %c unordered, align 4
+ ; CHECK: fence.sc.sys
+ ; CHECK: st.release.sys.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
+ store atomic volatile i32 %c.add, ptr %c seq_cst, align 4
- ; CHECK: ld.mmio.relaxed.sys.global.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
- %d.load = load atomic volatile i64, ptr addrspace(1) %d unordered, align 8
+ ; CHECK: fence.sc.sys
+ ; CHECK: ld.acquire.sys.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %d.load = load atomic volatile i64, ptr %d seq_cst, align 8
%d.add = add i64 %d.load, 1
- ; CHECK: st.mmio.relaxed.sys.global.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
- store atomic volatile i64 %d.add, ptr addrspace(1) %d unordered, align 8
+ ; CHECK: fence.sc.sys
+ ; CHECK: st.release.sys.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
+ store atomic volatile i64 %d.add, ptr %d seq_cst, align 8
- ; CHECK: ld.mmio.relaxed.sys.global.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
- %e.load = load atomic volatile float, ptr addrspace(1) %e unordered, align 4
+ ; CHECK: fence.sc.sys
+ ; CHECK: ld.acquire.sys.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %e.load = load atomic volatile float, ptr %e seq_cst, align 4
%e.add = fadd float %e.load, 1.0
- ; CHECK: st.mmio.relaxed.sys.global.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
- store atomic volatile float %e.add, ptr addrspace(1) %e unordered, align 4
+ ; CHECK: fence.sc.sys
+ ; CHECK: st.release.sys.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
+ store atomic volatile float %e.add, ptr %e seq_cst, align 4
- ; CHECK: ld.mmio.relaxed.sys.global.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
- %f.load = load atomic volatile double, ptr addrspace(1) %e unordered, align 8
+ ; CHECK: fence.sc.sys
+ ; CHECK: ld.acquire.sys.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %f.load = load atomic volatile double, ptr %e seq_cst, align 8
%f.add = fadd double %f.load, 1.
- ; CHECK: st.mmio.relaxed.sys.global.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
- store atomic volatile double %f.add, ptr addrspace(1) %e unordered, align 8
+ ; CHECK: fence.sc.sys
+ ; CHECK: st.release.sys.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
+ store atomic volatile double %f.add, ptr %e seq_cst, align 8
ret void
}
-; CHECK-LABEL: global_monotonic_volatile
-define void @global_monotonic_volatile(ptr addrspace(1) %a, ptr addrspace(1) %b, ptr addrspace(1) %c, ptr addrspace(1) %d, ptr addrspace(1) %e) local_unnamed_addr {
- ; CHECK: ld.mmio.relaxed.sys.global.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
- %a.load = load atomic volatile i8, ptr addrspace(1) %a monotonic, align 1
- %a.add = add i8 %a.load, 1
- ; CHECK: st.mmio.relaxed.sys.global.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
- store atomic volatile i8 %a.add, ptr addrspace(1) %a monotonic, align 1
-
- ; CHECK: ld.mmio.relaxed.sys.global.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
- %b.load = load atomic volatile i16, ptr addrspace(1) %b monotonic, align 2
- %b.add = add i16 %b.load, 1
- ; CHECK: st.mmio.relaxed.sys.global.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
- store atomic volatile i16 %b.add, ptr addrspace(1) %b monotonic, align 2
-
- ; CHECK: ld.mmio.relaxed.sys.global.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
- %c.load = load atomic volatile i32, ptr addrspace(1) %c monotonic, align 4
- %c.add = add i32 %c.load, 1
- ; CHECK: st.mmio.relaxed.sys.global.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
- store atomic volatile i32 %c.add, ptr addrspace(1) %c monotonic, align 4
-
- ; CHECK: ld.mmio.relaxed.sys.global.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
- %d.load = load atomic volatile i64, ptr addrspace(1) %d monotonic, align 8
- %d.add = add i64 %d.load, 1
- ; CHECK: st.mmio.relaxed.sys.global.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
- store atomic volatile i64 %d.add, ptr addrspace(1) %d monotonic, align 8
-
- ; CHECK: ld.mmio.relaxed.sys.global.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
- %e.load = load atomic volatile float, ptr addrspace(1) %e monotonic, align 4
- %e.add = fadd float %e.load, 1.0
- ; CHECK: st.mmio.relaxed.sys.global.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
- store atomic volatile float %e.add, ptr addrspace(1) %e monotonic, align 4
-
- ; CHECK: ld.mmio.relaxed.sys.global.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
- %f.load = load atomic volatile double, ptr addrspace(1) %e monotonic, align 8
- %f.add = fadd double %f.load, 1.
- ; CHECK: st.mmio.relaxed.sys.global.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
- store atomic volatile double %f.add, ptr addrspace(1) %e monotonic, align 8
-
- ret void
-}
+;; global statespace
; CHECK-LABEL: global_acq_rel
define void @global_acq_rel(ptr addrspace(1) %a, ptr addrspace(1) %b, ptr addrspace(1) %c, ptr addrspace(1) %d, ptr addrspace(1) %e) local_unnamed_addr {
@@ -618,253 +275,113 @@ define void @global_acq_rel_volatile(ptr addrspace(1) %a, ptr addrspace(1) %b, p
ret void
}
-;; shared statespace
-
-; CHECK-LABEL: shared_plain
-define void @shared_plain(ptr addrspace(3) %a, ptr addrspace(3) %b, ptr addrspace(3) %c, ptr addrspace(3) %d) local_unnamed_addr {
- ; CHECK: ld.shared.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
- %a.load = load i8, ptr addrspace(3) %a
- %a.add = add i8 %a.load, 1
- ; CHECK: st.shared.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
- store i8 %a.add, ptr addrspace(3) %a
-
- ; CHECK: ld.shared.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
- %b.load = load i16, ptr addrspace(3) %b
- %b.add = add i16 %b.load, 1
- ; CHECK: st.shared.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
- store i16 %b.add, ptr addrspace(3) %b
-
- ; CHECK: ld.shared.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
- %c.load = load i32, ptr addrspace(3) %c
- %c.add = add i32 %c.load, 1
- ; CHECK: st.shared.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
- store i32 %c.add, ptr addrspace(3) %c
-
- ; CHECK: ld.shared.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
- %d.load = load i64, ptr addrspace(3) %d
- %d.add = add i64 %d.load, 1
- ; CHECK: st.shared.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
- store i64 %d.add, ptr addrspace(3) %d
-
- ; CHECK: ld.shared.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
- %e.load = load float, ptr addrspace(3) %c
- %e.add = fadd float %e.load, 1.
- ; CHECK: st.shared.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
- store float %e.add, ptr addrspace(3) %c
-
- ; CHECK: ld.shared.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
- %f.load = load double, ptr addrspace(3) %c
- %f.add = fadd double %f.load, 1.
- ; CHECK: st.shared.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
- store double %f.add, ptr addrspace(3) %c
-
- ret void
-}
-
-; CHECK-LABEL: shared_volatile
-define void @shared_volatile(ptr addrspace(3) %a, ptr addrspace(3) %b, ptr addrspace(3) %c, ptr addrspace(3) %d) local_unnamed_addr {
- ; CHECK: ld.volatile.shared.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
- %a.load = load volatile i8, ptr addrspace(3) %a
- %a.add = add i8 %a.load, 1
- ; CHECK: st.volatile.shared.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
- store volatile i8 %a.add, ptr addrspace(3) %a
-
- ; CHECK: ld.volatile.shared.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
- %b.load = load volatile i16, ptr addrspace(3) %b
- %b.add = add i16 %b.load, 1
- ; CHECK: st.volatile.shared.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
- store volatile i16 %b.add, ptr addrspace(3) %b
-
- ; CHECK: ld.volatile.shared.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
- %c.load = load volatile i32, ptr addrspace(3) %c
- %c.add = add i32 %c.load, 1
- ; CHECK: st.volatile.shared.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
- store volatile i32 %c.add, ptr addrspace(3) %c
-
- ; CHECK: ld.volatile.shared.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
- %d.load = load volatile i64, ptr addrspace(3) %d
- %d.add = add i64 %d.load, 1
- ; CHECK: st.volatile.shared.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
- store volatile i64 %d.add, ptr addrspace(3) %d
-
- ; CHECK: ld.volatile.shared.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
- %e.load = load volatile float, ptr addrspace(3) %c
- %e.add = fadd float %e.load, 1.
- ; CHECK: st.volatile.shared.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
- store volatile float %e.add, ptr addrspace(3) %c
-
- ; CHECK: ld.volatile.shared.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
- %f.load = load volatile double, ptr addrspace(3) %c
- %f.add = fadd double %f.load, 1.
- ; CHECK: st.volatile.shared.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
- store volatile double %f.add, ptr addrspace(3) %c
-
- ret void
-}
-
-; CHECK-LABEL: shared_unordered
-define void @shared_unordered(ptr addrspace(3) %a, ptr addrspace(3) %b, ptr addrspace(3) %c, ptr addrspace(3) %d, ptr addrspace(3) %e) local_unnamed_addr {
- ; CHECK: ld.relaxed.sys.shared.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
- %a.load = load atomic i8, ptr addrspace(3) %a unordered, align 1
- %a.add = add i8 %a.load, 1
- ; CHECK: st.relaxed.sys.shared.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
- store atomic i8 %a.add, ptr addrspace(3) %a unordered, align 1
-
- ; CHECK: ld.relaxed.sys.shared.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
- %b.load = load atomic i16, ptr addrspace(3) %b unordered, align 2
- %b.add = add i16 %b.load, 1
- ; CHECK: st.relaxed.sys.shared.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
- store atomic i16 %b.add, ptr addrspace(3) %b unordered, align 2
-
- ; CHECK: ld.relaxed.sys.shared.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
- %c.load = load atomic i32, ptr addrspace(3) %c unordered, align 4
- %c.add = add i32 %c.load, 1
- ; CHECK: st.relaxed.sys.shared.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
- store atomic i32 %c.add, ptr addrspace(3) %c unordered, align 4
-
- ; CHECK: ld.relaxed.sys.shared.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
- %d.load = load atomic i64, ptr addrspace(3) %d unordered, align 8
- %d.add = add i64 %d.load, 1
- ; CHECK: st.relaxed.sys.shared.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
- store atomic i64 %d.add, ptr addrspace(3) %d unordered, align 8
-
- ; CHECK: ld.relaxed.sys.shared.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
- %e.load = load atomic float, ptr addrspace(3) %e unordered, align 4
- %e.add = fadd float %e.load, 1.0
- ; CHECK: st.relaxed.sys.shared.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
- store atomic float %e.add, ptr addrspace(3) %e unordered, align 4
-
- ; CHECK: ld.relaxed.sys.shared.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
- %f.load = load atomic double, ptr addrspace(3) %e unordered, align 8
- %f.add = fadd double %f.load, 1.
- ; CHECK: st.relaxed.sys.shared.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
- store atomic double %f.add, ptr addrspace(3) %e unordered, align 8
-
- ret void
-}
-
-; CHECK-LABEL: shared_unordered_volatile
-define void @shared_unordered_volatile(ptr addrspace(3) %a, ptr addrspace(3) %b, ptr addrspace(3) %c, ptr addrspace(3) %d, ptr addrspace(3) %e) local_unnamed_addr {
- ; CHECK: ld.volatile.shared.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
- %a.load = load atomic volatile i8, ptr addrspace(3) %a unordered, align 1
+; CHECK-LABEL: global_seq_cst
+define void @global_seq_cst(ptr addrspace(1) %a, ptr addrspace(1) %b, ptr addrspace(1) %c, ptr addrspace(1) %d, ptr addrspace(1) %e) local_unnamed_addr {
+ ; CHECK: fence.sc.sys
+ ; CHECK: ld.acquire.sys.global.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %a.load = load atomic i8, ptr addrspace(1) %a seq_cst, align 1
%a.add = add i8 %a.load, 1
- ; CHECK: st.volatile.shared.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
- store atomic volatile i8 %a.add, ptr addrspace(3) %a unordered, align 1
+ ; CHECK: fence.sc.sys
+ ; CHECK: st.release.sys.global.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
+ store atomic i8 %a.add, ptr addrspace(1) %a seq_cst, align 1
- ; CHECK: ld.volatile.shared.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
- %b.load = load atomic volatile i16, ptr addrspace(3) %b unordered, align 2
+ ; CHECK: fence.sc.sys
+ ; CHECK: ld.acquire.sys.global.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %b.load = load atomic i16, ptr addrspace(1) %b seq_cst, align 2
%b.add = add i16 %b.load, 1
- ; CHECK: st.volatile.shared.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
- store atomic volatile i16 %b.add, ptr addrspace(3) %b unordered, align 2
+ ; CHECK: fence.sc.sys
+ ; CHECK: st.release.sys.global.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
+ store atomic i16 %b.add, ptr addrspace(1) %b seq_cst, align 2
- ; CHECK: ld.volatile.shared.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
- %c.load = load atomic volatile i32, ptr addrspace(3) %c unordered, align 4
+ ; CHECK: fence.sc.sys
+ ; CHECK: ld.acquire.sys.global.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %c.load = load atomic i32, ptr addrspace(1) %c seq_cst, align 4
%c.add = add i32 %c.load, 1
- ; CHECK: st.volatile.shared.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
- store atomic volatile i32 %c.add, ptr addrspace(3) %c unordered, align 4
+ ; CHECK: fence.sc.sys
+ ; CHECK: st.release.sys.global.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
+ store atomic i32 %c.add, ptr addrspace(1) %c seq_cst, align 4
- ; CHECK: ld.volatile.shared.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
- %d.load = load atomic volatile i64, ptr addrspace(3) %d unordered, align 8
+ ; CHECK: fence.sc.sys
+ ; CHECK: ld.acquire.sys.global.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %d.load = load atomic i64, ptr addrspace(1) %d seq_cst, align 8
%d.add = add i64 %d.load, 1
- ; CHECK: st.volatile.shared.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
- store atomic volatile i64 %d.add, ptr addrspace(3) %d unordered, align 8
+ ; CHECK: fence.sc.sys
+ ; CHECK: st.release.sys.global.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
+ store atomic i64 %d.add, ptr addrspace(1) %d seq_cst, align 8
- ; CHECK: ld.volatile.shared.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
- %e.load = load atomic volatile float, ptr addrspace(3) %e unordered, align 4
+ ; CHECK: fence.sc.sys
+ ; CHECK: ld.acquire.sys.global.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %e.load = load atomic float, ptr addrspace(1) %e seq_cst, align 4
%e.add = fadd float %e.load, 1.0
- ; CHECK: st.volatile.shared.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
- store atomic volatile float %e.add, ptr addrspace(3) %e unordered, align 4
+ ; CHECK: fence.sc.sys
+ ; CHECK: st.release.sys.global.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
+ store atomic float %e.add, ptr addrspace(1) %e seq_cst, align 4
- ; CHECK: ld.volatile.shared.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
- %f.load = load atomic volatile double, ptr addrspace(3) %e unordered, align 8
+ ; CHECK: fence.sc.sys
+ ; CHECK: ld.acquire.sys.global.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %f.load = load atomic double, ptr addrspace(1) %e seq_cst, align 8
%f.add = fadd double %f.load, 1.
- ; CHECK: st.volatile.shared.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
- store atomic volatile double %f.add, ptr addrspace(3) %e unordered, align 8
+ ; CHECK: fence.sc.sys
+ ; CHECK: st.release.sys.global.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
+ store atomic double %f.add, ptr addrspace(1) %e seq_cst, align 8
ret void
}
-; CHECK-LABEL: shared_monotonic
-define void @shared_monotonic(ptr addrspace(3) %a, ptr addrspace(3) %b, ptr addrspace(3) %c, ptr addrspace(3) %d, ptr addrspace(3) %e) local_unnamed_addr {
- ; CHECK: ld.relaxed.sys.shared.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
- %a.load = load atomic i8, ptr addrspace(3) %a monotonic, align 1
+; CHECK-LABEL: global_seq_cst_volatile
+define void @global_seq_cst_volatile(ptr addrspace(1) %a, ptr addrspace(1) %b, ptr addrspace(1) %c, ptr addrspace(1) %d, ptr addrspace(1) %e) local_unnamed_addr {
+ ; CHECK: fence.sc.sys
+ ; CHECK: ld.acquire.sys.global.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %a.load = load atomic volatile i8, ptr addrspace(1) %a seq_cst, align 1
%a.add = add i8 %a.load, 1
- ; CHECK: st.relaxed.sys.shared.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
- store atomic i8 %a.add, ptr addrspace(3) %a monotonic, align 1
+ ; CHECK: fence.sc.sys
+ ; CHECK: st.release.sys.global.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
+ store atomic volatile i8 %a.add, ptr addrspace(1) %a seq_cst, align 1
- ; CHECK: ld.relaxed.sys.shared.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
- %b.load = load atomic i16, ptr addrspace(3) %b monotonic, align 2
+ ; CHECK: fence.sc.sys
+ ; CHECK: ld.acquire.sys.global.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %b.load = load atomic volatile i16, ptr addrspace(1) %b seq_cst, align 2
%b.add = add i16 %b.load, 1
- ; CHECK: st.relaxed.sys.shared.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
- store atomic i16 %b.add, ptr addrspace(3) %b monotonic, align 2
+ ; CHECK: fence.sc.sys
+ ; CHECK: st.release.sys.global.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
+ store atomic volatile i16 %b.add, ptr addrspace(1) %b seq_cst, align 2
- ; CHECK: ld.relaxed.sys.shared.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
- %c.load = load atomic i32, ptr addrspace(3) %c monotonic, align 4
+ ; CHECK: fence.sc.sys
+ ; CHECK: ld.acquire.sys.global.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %c.load = load atomic volatile i32, ptr addrspace(1) %c seq_cst, align 4
%c.add = add i32 %c.load, 1
- ; CHECK: st.relaxed.sys.shared.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
- store atomic i32 %c.add, ptr addrspace(3) %c monotonic, align 4
+ ; CHECK: fence.sc.sys
+ ; CHECK: st.release.sys.global.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
+ store atomic volatile i32 %c.add, ptr addrspace(1) %c seq_cst, align 4
- ; CHECK: ld.relaxed.sys.shared.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
- %d.load = load atomic i64, ptr addrspace(3) %d monotonic, align 8
+ ; CHECK: fence.sc.sys
+ ; CHECK: ld.acquire.sys.global.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %d.load = load atomic volatile i64, ptr addrspace(1) %d seq_cst, align 8
%d.add = add i64 %d.load, 1
- ; CHECK: st.relaxed.sys.shared.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
- store atomic i64 %d.add, ptr addrspace(3) %d monotonic, align 8
+ ; CHECK: fence.sc.sys
+ ; CHECK: st.release.sys.global.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
+ store atomic volatile i64 %d.add, ptr addrspace(1) %d seq_cst, align 8
- ; CHECK: ld.relaxed.sys.shared.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
- %e.load = load atomic float, ptr addrspace(3) %e monotonic, align 4
+ ; CHECK: fence.sc.sys
+ ; CHECK: ld.acquire.sys.global.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %e.load = load atomic volatile float, ptr addrspace(1) %e seq_cst, align 4
%e.add = fadd float %e.load, 1.0
- ; CHECK: st.relaxed.sys.shared.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
- store atomic float %e.add, ptr addrspace(3) %e monotonic, align 4
+ ; CHECK: fence.sc.sys
+ ; CHECK: st.release.sys.global.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
+ store atomic volatile float %e.add, ptr addrspace(1) %e seq_cst, align 4
- ; CHECK: ld.relaxed.sys.shared.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
- %f.load = load atomic double, ptr addrspace(3) %e monotonic, align 8
+ ; CHECK: fence.sc.sys
+ ; CHECK: ld.acquire.sys.global.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %f.load = load atomic volatile double, ptr addrspace(1) %e seq_cst, align 8
%f.add = fadd double %f.load, 1.
- ; CHECK: st.relaxed.sys.shared.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
- store atomic double %f.add, ptr addrspace(3) %e monotonic, align 8
+ ; CHECK: fence.sc.sys
+ ; CHECK: st.release.sys.global.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
+ store atomic volatile double %f.add, ptr addrspace(1) %e seq_cst, align 8
ret void
}
-; CHECK-LABEL: shared_monotonic_volatile
-define void @shared_monotonic_volatile(ptr addrspace(3) %a, ptr addrspace(3) %b, ptr addrspace(3) %c, ptr addrspace(3) %d, ptr addrspace(3) %e) local_unnamed_addr {
- ; CHECK: ld.volatile.shared.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
- %a.load = load atomic volatile i8, ptr addrspace(3) %a monotonic, align 1
- %a.add = add i8 %a.load, 1
- ; CHECK: st.volatile.shared.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
- store atomic volatile i8 %a.add, ptr addrspace(3) %a monotonic, align 1
-
- ; CHECK: ld.volatile.shared.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
- %b.load = load atomic volatile i16, ptr addrspace(3) %b monotonic, align 2
- %b.add = add i16 %b.load, 1
- ; CHECK: st.volatile.shared.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
- store atomic volatile i16 %b.add, ptr addrspace(3) %b monotonic, align 2
-
- ; CHECK: ld.volatile.shared.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
- %c.load = load atomic volatile i32, ptr addrspace(3) %c monotonic, align 4
- %c.add = add i32 %c.load, 1
- ; CHECK: st.volatile.shared.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
- store atomic volatile i32 %c.add, ptr addrspace(3) %c monotonic, align 4
-
- ; CHECK: ld.volatile.shared.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
- %d.load = load atomic volatile i64, ptr addrspace(3) %d monotonic, align 8
- %d.add = add i64 %d.load, 1
- ; CHECK: st.volatile.shared.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
- store atomic volatile i64 %d.add, ptr addrspace(3) %d monotonic, align 8
-
- ; CHECK: ld.volatile.shared.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
- %e.load = load atomic volatile float, ptr addrspace(3) %e monotonic, align 4
- %e.add = fadd float %e.load, 1.0
- ; CHECK: st.volatile.shared.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
- store atomic volatile float %e.add, ptr addrspace(3) %e monotonic, align 4
-
- ; CHECK: ld.volatile.shared.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
- %f.load = load atomic volatile double, ptr addrspace(3) %e monotonic, align 8
- %f.add = fadd double %f.load, 1.
- ; CHECK: st.volatile.shared.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
- store atomic volatile double %f.add, ptr addrspace(3) %e monotonic, align 8
-
- ret void
-}
+;; shared statespace
; CHECK-LABEL: shared_acq_rel
define void @shared_acq_rel(ptr addrspace(3) %a, ptr addrspace(3) %b, ptr addrspace(3) %c, ptr addrspace(3) %d, ptr addrspace(3) %e) local_unnamed_addr {
@@ -948,332 +465,291 @@ define void @shared_acq_rel_volatile(ptr addrspace(3) %a, ptr addrspace(3) %b, p
ret void
}
-;; local statespace
-
-; CHECK-LABEL: local_plain
-define void @local_plain(ptr addrspace(5) %a, ptr addrspace(5) %b, ptr addrspace(5) %c, ptr addrspace(5) %d) local_unnamed_addr {
- ; CHECK: ld.local.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
- %a.load = load i8, ptr addrspace(5) %a
- %a.add = add i8 %a.load, 1
- ; CHECK: st.local.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
- store i8 %a.add, ptr addrspace(5) %a
-
- ; CHECK: ld.local.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
- %b.load = load i16, ptr addrspace(5) %b
- %b.add = add i16 %b.load, 1
- ; CHECK: st.local.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
- store i16 %b.add, ptr addrspace(5) %b
-
- ; CHECK: ld.local.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
- %c.load = load i32, ptr addrspace(5) %c
- %c.add = add i32 %c.load, 1
- ; CHECK: st.local.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
- store i32 %c.add, ptr addrspace(5) %c
-
- ; CHECK: ld.local.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
- %d.load = load i64, ptr addrspace(5) %d
- %d.add = add i64 %d.load, 1
- ; CHECK: st.local.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
- store i64 %d.add, ptr addrspace(5) %d
-
- ; CHECK: ld.local.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
- %e.load = load float, ptr addrspace(5) %c
- %e.add = fadd float %e.load, 1.
- ; CHECK: st.local.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
- store float %e.add, ptr addrspace(5) %c
-
- ; CHECK: ld.local.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
- %f.load = load double, ptr addrspace(5) %c
- %f.add = fadd double %f.load, 1.
- ; CHECK: st.local.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
- store double %f.add, ptr addrspace(5) %c
-
- ret void
-}
-
-; CHECK-LABEL: local_volatile
-define void @local_volatile(ptr addrspace(5) %a, ptr addrspace(5) %b, ptr addrspace(5) %c, ptr addrspace(5) %d) local_unnamed_addr {
- ; CHECK: ld.local.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
- %a.load = load volatile i8, ptr addrspace(5) %a
+; CHECK-LABEL: shared_seq_cst
+define void @shared_seq_cst(ptr addrspace(3) %a, ptr addrspace(3) %b, ptr addrspace(3) %c, ptr addrspace(3) %d, ptr addrspace(3) %e) local_unnamed_addr {
+ ; CHECK: fence.sc.sys
+ ; CHECK: ld.acquire.sys.shared.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %a.load = load atomic i8, ptr addrspace(3) %a seq_cst, align 1
%a.add = add i8 %a.load, 1
- ; CHECK: st.local.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
- store volatile i8 %a.add, ptr addrspace(5) %a
+ ; CHECK: fence.sc.sys
+ ; CHECK: st.release.sys.shared.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
+ store atomic i8 %a.add, ptr addrspace(3) %a seq_cst, align 1
- ; CHECK: ld.local.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
- %b.load = load volatile i16, ptr addrspace(5) %b
+ ; CHECK: fence.sc.sys
+ ; CHECK: ld.acquire.sys.shared.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %b.load = load atomic i16, ptr addrspace(3) %b seq_cst, align 2
%b.add = add i16 %b.load, 1
- ; CHECK: st.local.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
- store volatile i16 %b.add, ptr addrspace(5) %b
+ ; CHECK: fence.sc.sys
+ ; CHECK: st.release.sys.shared.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
+ store atomic i16 %b.add, ptr addrspace(3) %b seq_cst, align 2
- ; CHECK: ld.local.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
- %c.load = load volatile i32, ptr addrspace(5) %c
+ ; CHECK: fence.sc.sys
+ ; CHECK: ld.acquire.sys.shared.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %c.load = load atomic i32, ptr addrspace(3) %c seq_cst, align 4
%c.add = add i32 %c.load, 1
- ; CHECK: st.local.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
- store volatile i32 %c.add, ptr addrspace(5) %c
+ ; CHECK: fence.sc.sys
+ ; CHECK: st.release.sys.shared.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
+ store atomic i32 %c.add, ptr addrspace(3) %c seq_cst, align 4
- ; CHECK: ld.local.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
- %d.load = load volatile i64, ptr addrspace(5) %d
+ ; CHECK: fence.sc.sys
+ ; CHECK: ld.acquire.sys.shared.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %d.load = load atomic i64, ptr addrspace(3) %d seq_cst, align 8
%d.add = add i64 %d.load, 1
- ; CHECK: st.local.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
- store volatile i64 %d.add, ptr addrspace(5) %d
+ ; CHECK: fence.sc.sys
+ ; CHECK: st.release.sys.shared.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
+ store atomic i64 %d.add, ptr addrspace(3) %d seq_cst, align 8
- ; CHECK: ld.local.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
- %e.load = load volatile float, ptr addrspace(5) %c
- %e.add = fadd float %e.load, 1.
- ; CHECK: st.local.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
- store volatile float %e.add, ptr addrspace(5) %c
+ ; CHECK: fence.sc.sys
+ ; CHECK: ld.acquire.sys.shared.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %e.load = load atomic float, ptr addrspace(3) %e seq_cst, align 4
+ %e.add = fadd float %e.load, 1.0
+ ; CHECK: fence.sc.sys
+ ; CHECK: st.release.sys.shared.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
+ store atomic float %e.add, ptr addrspace(3) %e seq_cst, align 4
- ; CHECK: ld.local.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
- %f.load = load volatile double, ptr addrspace(5) %c
- %f.add = fadd double %f.load, 1.
- ; CHECK: st.local.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
- store volatile double %f.add, ptr addrspace(5) %c
+ ; CHECK: fence.sc.sys
+ ; CHECK: ld.acquire.sys.shared.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %f.load = load atomic double, ptr addrspace(3) %e seq_cst, align 8
+ %f.add = fadd double %f.load, 1.
+ ; CHECK: fence.sc.sys
+ ; CHECK: st.release.sys.shared.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
+ store atomic double %f.add, ptr addrspace(3) %e seq_cst, align 8
ret void
}
-; CHECK-LABEL: local_unordered
-define void @local_unordered(ptr addrspace(5) %a, ptr addrspace(5) %b, ptr addrspace(5) %c, ptr addrspace(5) %d, ptr addrspace(5) %e) local_unnamed_addr {
- ; CHECK: ld.local.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
- %a.load = load atomic i8, ptr addrspace(5) %a unordered, align 1
+; CHECK-LABEL: shared_seq_cst_volatile
+define void @shared_seq_cst_volatile(ptr addrspace(3) %a, ptr addrspace(3) %b, ptr addrspace(3) %c, ptr addrspace(3) %d, ptr addrspace(3) %e) local_unnamed_addr {
+ ; CHECK: fence.sc.sys
+ ; CHECK: ld.acquire.sys.shared.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %a.load = load atomic volatile i8, ptr addrspace(3) %a seq_cst, align 1
%a.add = add i8 %a.load, 1
- ; CHECK: st.local.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
- store atomic i8 %a.add, ptr addrspace(5) %a unordered, align 1
+ ; CHECK: fence.sc.sys
+ ; CHECK: st.release.sys.shared.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
+ store atomic volatile i8 %a.add, ptr addrspace(3) %a seq_cst, align 1
- ; CHECK: ld.local.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
- %b.load = load atomic i16, ptr addrspace(5) %b unordered, align 2
+ ; CHECK: fence.sc.sys
+ ; CHECK: ld.acquire.sys.shared.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %b.load = load atomic volatile i16, ptr addrspace(3) %b seq_cst, align 2
%b.add = add i16 %b.load, 1
- ; CHECK: st.local.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
- store atomic i16 %b.add, ptr addrspace(5) %b unordered, align 2
+ ; CHECK: fence.sc.sys
+ ; CHECK: st.release.sys.shared.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
+ store atomic volatile i16 %b.add, ptr addrspace(3) %b seq_cst, align 2
- ; CHECK: ld.local.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
- %c.load = load atomic i32, ptr addrspace(5) %c unordered, align 4
+ ; CHECK: fence.sc.sys
+ ; CHECK: ld.acquire.sys.shared.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %c.load = load atomic volatile i32, ptr addrspace(3) %c seq_cst, align 4
%c.add = add i32 %c.load, 1
- ; CHECK: st.local.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
- store atomic i32 %c.add, ptr addrspace(5) %c unordered, align 4
+ ; CHECK: fence.sc.sys
+ ; CHECK: st.release.sys.shared.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
+ store atomic volatile i32 %c.add, ptr addrspace(3) %c seq_cst, align 4
- ; CHECK: ld.local.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
- %d.load = load atomic i64, ptr addrspace(5) %d unordered, align 8
+ ; CHECK: fence.sc.sys
+ ; CHECK: ld.acquire.sys.shared.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %d.load = load atomic volatile i64, ptr addrspace(3) %d seq_cst, align 8
%d.add = add i64 %d.load, 1
- ; CHECK: st.local.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
- store atomic i64 %d.add, ptr addrspace(5) %d unordered, align 8
+ ; CHECK: fence.sc.sys
+ ; CHECK: st.release.sys.shared.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
+ store atomic volatile i64 %d.add, ptr addrspace(3) %d seq_cst, align 8
- ; CHECK: ld.local.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
- %e.load = load atomic float, ptr addrspace(5) %e unordered, align 4
+ ; CHECK: fence.sc.sys
+ ; CHECK: ld.acquire.sys.shared.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %e.load = load atomic volatile float, ptr addrspace(3) %e seq_cst, align 4
%e.add = fadd float %e.load, 1.0
- ; CHECK: st.local.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
- store atomic float %e.add, ptr addrspace(5) %e unordered, align 4
+ ; CHECK: fence.sc.sys
+ ; CHECK: st.release.sys.shared.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
+ store atomic volatile float %e.add, ptr addrspace(3) %e seq_cst, align 4
- ; CHECK: ld.local.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
- %f.load = load atomic double, ptr addrspace(5) %e unordered, align 8
+ ; CHECK: fence.sc.sys
+ ; CHECK: ld.acquire.sys.shared.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %f.load = load atomic volatile double, ptr addrspace(3) %e seq_cst, align 8
%f.add = fadd double %f.load, 1.
- ; CHECK: st.local.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
- store atomic double %f.add, ptr addrspace(5) %e unordered, align 8
+ ; CHECK: fence.sc.sys
+ ; CHECK: st.release.sys.shared.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
+ store atomic volatile double %f.add, ptr addrspace(3) %e seq_cst, align 8
ret void
}
-; CHECK-LABEL: local_unordered_volatile
-define void @local_unordered_volatile(ptr addrspace(5) %a, ptr addrspace(5) %b, ptr addrspace(5) %c, ptr addrspace(5) %d, ptr addrspace(5) %e) local_unnamed_addr {
- ; CHECK: ld.local.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
- %a.load = load atomic volatile i8, ptr addrspace(5) %a unordered, align 1
- %a.add = add i8 %a.load, 1
- ; CHECK: st.local.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
- store atomic volatile i8 %a.add, ptr addrspace(5) %a unordered, align 1
-
- ; CHECK: ld.local.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
- %b.load = load atomic volatile i16, ptr addrspace(5) %b unordered, align 2
- %b.add = add i16 %b.load, 1
- ; CHECK: st.local.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
- store atomic volatile i16 %b.add, ptr addrspace(5) %b unordered, align 2
-
- ; CHECK: ld.local.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
- %c.load = load atomic volatile i32, ptr addrspace(5) %c unordered, align 4
- %c.add = add i32 %c.load, 1
- ; CHECK: st.local.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
- store atomic volatile i32 %c.add, ptr addrspace(5) %c unordered, align 4
-
- ; CHECK: ld.local.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
- %d.load = load atomic volatile i64, ptr addrspace(5) %d unordered, align 8
- %d.add = add i64 %d.load, 1
- ; CHECK: st.local.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
- store atomic volatile i64 %d.add, ptr addrspace(5) %d unordered, align 8
-
- ; CHECK: ld.local.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
- %e.load = load atomic volatile float, ptr addrspace(5) %e unordered, align 4
- %e.add = fadd float %e.load, 1.0
- ; CHECK: st.local.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
- store atomic volatile float %e.add, ptr addrspace(5) %e unordered, align 4
-
- ; CHECK: ld.local.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
- %f.load = load atomic volatile double, ptr addrspace(5) %e unordered, align 8
- %f.add = fadd double %f.load, 1.
- ; CHECK: st.local.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
- store atomic volatile double %f.add, ptr addrspace(5) %e unordered, align 8
+;; local statespace
- ret void
-}
+; CHECK-LABEL: local_acq_rel
+define void @local_acq_rel(ptr addrspace(5) %a, ptr addrspace(5) %b, ptr addrspace(5) %c, ptr addrspace(5) %d, ptr addrspace(5) %e) local_unnamed_addr {
+ ; TODO: generate PTX that preserves Concurrent Forward Progress
+ ; by using PTX atomic operations.
-; CHECK-LABEL: local_monotonic
-define void @local_monotonic(ptr addrspace(5) %a, ptr addrspace(5) %b, ptr addrspace(5) %c, ptr addrspace(5) %d, ptr addrspace(5) %e) local_unnamed_addr {
; CHECK: ld.local.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
- %a.load = load atomic i8, ptr addrspace(5) %a monotonic, align 1
+ %a.load = load atomic i8, ptr addrspace(5) %a acquire, align 1
%a.add = add i8 %a.load, 1
; CHECK: st.local.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
- store atomic i8 %a.add, ptr addrspace(5) %a monotonic, align 1
+ store atomic i8 %a.add, ptr addrspace(5) %a release, align 1
; CHECK: ld.local.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
- %b.load = load atomic i16, ptr addrspace(5) %b monotonic, align 2
+ %b.load = load atomic i16, ptr addrspace(5) %b acquire, align 2
%b.add = add i16 %b.load, 1
; CHECK: st.local.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
- store atomic i16 %b.add, ptr addrspace(5) %b monotonic, align 2
+ store atomic i16 %b.add, ptr addrspace(5) %b release, align 2
; CHECK: ld.local.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
- %c.load = load atomic i32, ptr addrspace(5) %c monotonic, align 4
+ %c.load = load atomic i32, ptr addrspace(5) %c acquire, align 4
%c.add = add i32 %c.load, 1
; CHECK: st.local.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
- store atomic i32 %c.add, ptr addrspace(5) %c monotonic, align 4
+ store atomic i32 %c.add, ptr addrspace(5) %c release, align 4
; CHECK: ld.local.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
- %d.load = load atomic i64, ptr addrspace(5) %d monotonic, align 8
+ %d.load = load atomic i64, ptr addrspace(5) %d acquire, align 8
%d.add = add i64 %d.load, 1
; CHECK: st.local.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
- store atomic i64 %d.add, ptr addrspace(5) %d monotonic, align 8
+ store atomic i64 %d.add, ptr addrspace(5) %d release, align 8
; CHECK: ld.local.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
- %e.load = load atomic float, ptr addrspace(5) %e monotonic, align 4
+ %e.load = load atomic float, ptr addrspace(5) %e acquire, align 4
%e.add = fadd float %e.load, 1.0
; CHECK: st.local.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
- store atomic float %e.add, ptr addrspace(5) %e monotonic, align 4
+ store atomic float %e.add, ptr addrspace(5) %e release, align 4
; CHECK: ld.local.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
- %f.load = load atomic double, ptr addrspace(5) %e monotonic, align 8
+ %f.load = load atomic double, ptr addrspace(5) %e acquire, align 8
%f.add = fadd double %f.load, 1.
; CHECK: st.local.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
- store atomic double %f.add, ptr addrspace(5) %e monotonic, align 8
+ store atomic double %f.add, ptr addrspace(5) %e release, align 8
ret void
}
-; CHECK-LABEL: local_monotonic_volatile
-define void @local_monotonic_volatile(ptr addrspace(5) %a, ptr addrspace(5) %b, ptr addrspace(5) %c, ptr addrspace(5) %d, ptr addrspace(5) %e) local_unnamed_addr {
+; CHECK-LABEL: local_acq_rel_volatile
+define void @local_acq_rel_volatile(ptr addrspace(5) %a, ptr addrspace(5) %b, ptr addrspace(5) %c, ptr addrspace(5) %d, ptr addrspace(5) %e) local_unnamed_addr {
+ ; TODO: generate PTX that preserves Concurrent Forward Progress
+ ; by using PTX atomic operations.
+
; CHECK: ld.local.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
- %a.load = load atomic volatile i8, ptr addrspace(5) %a monotonic, align 1
+ %a.load = load atomic volatile i8, ptr addrspace(5) %a acquire, align 1
%a.add = add i8 %a.load, 1
; CHECK: st.local.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
- store atomic volatile i8 %a.add, ptr addrspace(5) %a monotonic, align 1
+ store atomic volatile i8 %a.add, ptr addrspace(5) %a release, align 1
; CHECK: ld.local.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
- %b.load = load atomic volatile i16, ptr addrspace(5) %b monotonic, align 2
+ %b.load = load atomic volatile i16, ptr addrspace(5) %b acquire, align 2
%b.add = add i16 %b.load, 1
; CHECK: st.local.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
- store atomic volatile i16 %b.add, ptr addrspace(5) %b monotonic, align 2
+ store atomic volatile i16 %b.add, ptr addrspace(5) %b release, align 2
; CHECK: ld.local.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
- %c.load = load atomic volatile i32, ptr addrspace(5) %c monotonic, align 4
+ %c.load = load atomic volatile i32, ptr addrspace(5) %c acquire, align 4
%c.add = add i32 %c.load, 1
; CHECK: st.local.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
- store atomic volatile i32 %c.add, ptr addrspace(5) %c monotonic, align 4
+ store atomic volatile i32 %c.add, ptr addrspace(5) %c release, align 4
; CHECK: ld.local.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
- %d.load = load atomic volatile i64, ptr addrspace(5) %d monotonic, align 8
+ %d.load = load atomic volatile i64, ptr addrspace(5) %d acquire, align 8
%d.add = add i64 %d.load, 1
; CHECK: st.local.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
- store atomic volatile i64 %d.add, ptr addrspace(5) %d monotonic, align 8
+ store atomic volatile i64 %d.add, ptr addrspace(5) %d release, align 8
; CHECK: ld.local.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
- %e.load = load atomic volatile float, ptr addrspace(5) %e monotonic, align 4
+ %e.load = load atomic volatile float, ptr addrspace(5) %e acquire, align 4
%e.add = fadd float %e.load, 1.0
; CHECK: st.local.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
- store atomic volatile float %e.add, ptr addrspace(5) %e monotonic, align 4
+ store atomic volatile float %e.add, ptr addrspace(5) %e release, align 4
; CHECK: ld.local.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
- %f.load = load atomic volatile double, ptr addrspace(5) %e monotonic, align 8
+ %f.load = load atomic volatile double, ptr addrspace(5) %e acquire, align 8
%f.add = fadd double %f.load, 1.
; CHECK: st.local.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
- store atomic volatile double %f.add, ptr addrspace(5) %e monotonic, align 8
+ store atomic volatile double %f.add, ptr addrspace(5) %e release, align 8
ret void
}
-; CHECK-LABEL: local_acq_rel
-define void @local_acq_rel(ptr addrspace(5) %a, ptr addrspace(5) %b, ptr addrspace(5) %c, ptr addrspace(5) %d, ptr addrspace(5) %e) local_unnamed_addr {
+; CHECK-LABEL: local_seq_cst
+define void @local_seq_cst(ptr addrspace(5) %a, ptr addrspace(5) %b, ptr addrspace(5) %c, ptr addrspace(5) %d, ptr addrspace(5) %e) local_unnamed_addr {
+ ; TODO: generate PTX that preserves Concurrent Forward Progress
+ ; by using PTX atomic operations.
+
; CHECK: ld.local.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
- %a.load = load atomic i8, ptr addrspace(5) %a acquire, align 1
+ %a.load = load atomic i8, ptr addrspace(5) %a seq_cst, align 1
%a.add = add i8 %a.load, 1
; CHECK: st.local.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
- store atomic i8 %a.add, ptr addrspace(5) %a release, align 1
+ store atomic i8 %a.add, ptr addrspace(5) %a seq_cst, align 1
; CHECK: ld.local.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
- %b.load = load atomic i16, ptr addrspace(5) %b acquire, align 2
+ %b.load = load atomic i16, ptr addrspace(5) %b seq_cst, align 2
%b.add = add i16 %b.load, 1
; CHECK: st.local.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
- store atomic i16 %b.add, ptr addrspace(5) %b release, align 2
+ store atomic i16 %b.add, ptr addrspace(5) %b seq_cst, align 2
; CHECK: ld.local.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
- %c.load = load atomic i32, ptr addrspace(5) %c acquire, align 4
+ %c.load = load atomic i32, ptr addrspace(5) %c seq_cst, align 4
%c.add = add i32 %c.load, 1
; CHECK: st.local.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
- store atomic i32 %c.add, ptr addrspace(5) %c release, align 4
+ store atomic i32 %c.add, ptr addrspace(5) %c seq_cst, align 4
; CHECK: ld.local.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
- %d.load = load atomic i64, ptr addrspace(5) %d acquire, align 8
+ %d.load = load atomic i64, ptr addrspace(5) %d seq_cst, align 8
%d.add = add i64 %d.load, 1
; CHECK: st.local.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
- store atomic i64 %d.add, ptr addrspace(5) %d release, align 8
+ store atomic i64 %d.add, ptr addrspace(5) %d seq_cst, align 8
; CHECK: ld.local.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
- %e.load = load atomic float, ptr addrspace(5) %e acquire, align 4
+ %e.load = load atomic float, ptr addrspace(5) %e seq_cst, align 4
%e.add = fadd float %e.load, 1.0
; CHECK: st.local.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
- store atomic float %e.add, ptr addrspace(5) %e release, align 4
+ store atomic float %e.add, ptr addrspace(5) %e seq_cst, align 4
; CHECK: ld.local.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
- %f.load = load atomic double, ptr addrspace(5) %e acquire, align 8
+ %f.load = load atomic double, ptr addrspace(5) %e seq_cst, align 8
%f.add = fadd double %f.load, 1.
; CHECK: st.local.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
- store atomic double %f.add, ptr addrspace(5) %e release, align 8
+ store atomic double %f.add, ptr addrspace(5) %e seq_cst, align 8
ret void
}
-; CHECK-LABEL: local_acq_rel_volatile
-define void @local_acq_rel_volatile(ptr addrspace(5) %a, ptr addrspace(5) %b, ptr addrspace(5) %c, ptr addrspace(5) %d, ptr addrspace(5) %e) local_unnamed_addr {
+; CHECK-LABEL: local_seq_cst_volatile
+define void @local_seq_cst_volatile(ptr addrspace(5) %a, ptr addrspace(5) %b, ptr addrspace(5) %c, ptr addrspace(5) %d, ptr addrspace(5) %e) local_unnamed_addr {
+ ; TODO: generate PTX that preserves Concurrent Forward Progress
+ ; by using PTX atomic operations.
+
; CHECK: ld.local.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
- %a.load = load atomic volatile i8, ptr addrspace(5) %a acquire, align 1
+ %a.load = load atomic volatile i8, ptr addrspace(5) %a seq_cst, align 1
%a.add = add i8 %a.load, 1
; CHECK: st.local.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
- store atomic volatile i8 %a.add, ptr addrspace(5) %a release, align 1
+ store atomic volatile i8 %a.add, ptr addrspace(5) %a seq_cst, align 1
; CHECK: ld.local.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
- %b.load = load atomic volatile i16, ptr addrspace(5) %b acquire, align 2
+ %b.load = load atomic volatile i16, ptr addrspace(5) %b seq_cst, align 2
%b.add = add i16 %b.load, 1
; CHECK: st.local.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
- store atomic volatile i16 %b.add, ptr addrspace(5) %b release, align 2
+ store atomic volatile i16 %b.add, ptr addrspace(5) %b seq_cst, align 2
; CHECK: ld.local.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
- %c.load = load atomic volatile i32, ptr addrspace(5) %c acquire, align 4
+ %c.load = load atomic volatile i32, ptr addrspace(5) %c seq_cst, align 4
%c.add = add i32 %c.load, 1
; CHECK: st.local.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
- store atomic volatile i32 %c.add, ptr addrspace(5) %c release, align 4
+ store atomic volatile i32 %c.add, ptr addrspace(5) %c seq_cst, align 4
; CHECK: ld.local.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
- %d.load = load atomic volatile i64, ptr addrspace(5) %d acquire, align 8
+ %d.load = load atomic volatile i64, ptr addrspace(5) %d seq_cst, align 8
%d.add = add i64 %d.load, 1
; CHECK: st.local.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
- store atomic volatile i64 %d.add, ptr addrspace(5) %d release, align 8
+ store atomic volatile i64 %d.add, ptr addrspace(5) %d seq_cst, align 8
; CHECK: ld.local.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
- %e.load = load atomic volatile float, ptr addrspace(5) %e acquire, align 4
+ %e.load = load atomic volatile float, ptr addrspace(5) %e seq_cst, align 4
%e.add = fadd float %e.load, 1.0
; CHECK: st.local.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
- store atomic volatile float %e.add, ptr addrspace(5) %e release, align 4
+ store atomic volatile float %e.add, ptr addrspace(5) %e seq_cst, align 4
; CHECK: ld.local.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
- %f.load = load atomic volatile double, ptr addrspace(5) %e acquire, align 8
+ %f.load = load atomic volatile double, ptr addrspace(5) %e seq_cst, align 8
%f.add = fadd double %f.load, 1.
; CHECK: st.local.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
- store atomic volatile double %f.add, ptr addrspace(5) %e release, align 8
+ store atomic volatile double %f.add, ptr addrspace(5) %e seq_cst, align 8
+
+ ; TODO: LLVM IR Verifier does not support atomics on vector types.
ret void
}
+
+; TODO: add plain,atomic,volatile,atomic volatile tests
+; for .const and .param statespaces \ No newline at end of file
diff --git a/llvm/test/CodeGen/NVPTX/load-store.ll b/llvm/test/CodeGen/NVPTX/load-store.ll
index 4c5e0920ce1a..aac73f71a676 100644
--- a/llvm/test/CodeGen/NVPTX/load-store.ll
+++ b/llvm/test/CodeGen/NVPTX/load-store.ll
@@ -1,5 +1,13 @@
-; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s
+; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck -check-prefixes=CHECK,SM60 %s
; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_20 | %ptxas-verify %}
+; RUN: llc < %s -march=nvptx64 -mcpu=sm_70 -mattr=+ptx82 | FileCheck %s -check-prefixes=CHECK,SM70
+; RUN: %if ptxas-12.2 %{ llc < %s -march=nvptx64 -mcpu=sm_70 -mattr=+ptx82 | %ptxas-verify -arch=sm_70 %}
+
+; TODO: add i1, <8 x i8>, and <6 x i8> vector tests.
+
+; TODO: add test for vectors that exceed 128-bit length
+; Per https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#vectors
+; vectors cannot exceed 128-bit in length, i.e., .v4.u64 is not allowed.
; generic statespace
@@ -36,10 +44,76 @@ define void @generic_plain(ptr %a, ptr %b, ptr %c, ptr %d) local_unnamed_addr {
store float %e.add, ptr %c
; CHECK: ld.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
- %f.load = load double, ptr %c
+ %f.load = load double, ptr %d
%f.add = fadd double %f.load, 1.
; CHECK: st.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
- store double %f.add, ptr %c
+ store double %f.add, ptr %d
+
+ ; TODO: make the lowering of this weak vector ops consistent with
+ ; the ones of the next tests. This test lowers to a weak PTX
+ ; vector op, but next test lowers to a vector PTX op.
+ ; CHECK: ld.v2.u8 {%rs{{[0-9]+}}, %rs{{[0-9]+}}}, [%rd{{[0-9]+}}]
+ %h.load = load <2 x i8>, ptr %b
+ %h.add = add <2 x i8> %h.load, <i8 1, i8 1>
+ ; CHECK: st.v2.u8 [%rd{{[0-9]+}}], {%rs{{[0-9]+}}, %rs{{[0-9]+}}}
+ store <2 x i8> %h.add, ptr %b
+
+ ; TODO: make the lowering of this weak vector ops consistent with
+ ; the ones of the previous test. This test lowers to a weak
+ ; PTX scalar op, but prior test lowers to a vector PTX op.
+ ; CHECK: ld.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %i.load = load <4 x i8>, ptr %c
+ %i.add = add <4 x i8> %i.load, <i8 1, i8 1, i8 1, i8 1>
+ ; CHECK: st.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
+ store <4 x i8> %i.add, ptr %c
+
+ ; CHECK: ld.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %j.load = load <2 x i16>, ptr %c
+ %j.add = add <2 x i16> %j.load, <i16 1, i16 1>
+ ; CHECK: st.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
+ store <2 x i16> %j.add, ptr %c
+
+ ; CHECK: ld.v4.u16 {%rs{{[0-9]+}}, %rs{{[0-9]+}}, %rs{{[0-9]+}}, %rs{{[0-9]+}}}, [%rd{{[0-9]+}}]
+ %k.load = load <4 x i16>, ptr %d
+ %k.add = add <4 x i16> %k.load, <i16 1, i16 1, i16 1, i16 1>
+ ; CHECK: st.v4.u16 [%rd{{[0-9]+}}], {%rs{{[0-9]+}}, %rs{{[0-9]+}}, %rs{{[0-9]+}}, %rs{{[0-9]+}}}
+ store <4 x i16> %k.add, ptr %d
+
+ ; CHECK: ld.v2.u32 {%r{{[0-9]+}}, %r{{[0-9]+}}}, [%rd{{[0-9]+}}]
+ %l.load = load <2 x i32>, ptr %d
+ %l.add = add <2 x i32> %l.load, <i32 1, i32 1>
+ ; CHECK: st.v2.u32 [%rd{{[0-9]+}}], {%r{{[0-9]+}}, %r{{[0-9]+}}}
+ store <2 x i32> %l.add, ptr %d
+
+ ; CHECK: ld.v4.u32 {%r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}}, [%rd{{[0-9]+}}]
+ %m.load = load <4 x i32>, ptr %d
+ %m.add = add <4 x i32> %m.load, <i32 1, i32 1, i32 1, i32 1>
+ ; CHECK: st.v4.u32 [%rd{{[0-9]+}}], {%r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}}
+ store <4 x i32> %m.add, ptr %d
+
+ ; CHECK: ld.v2.u64 {%rd{{[0-9]+}}, %rd{{[0-9]+}}}, [%rd{{[0-9]+}}]
+ %n.load = load <2 x i64>, ptr %d
+ %n.add = add <2 x i64> %n.load, <i64 1, i64 1>
+ ; CHECK: st.v2.u64 [%rd{{[0-9]+}}], {%rd{{[0-9]+}}, %rd{{[0-9]+}}}
+ store <2 x i64> %n.add, ptr %d
+
+ ; CHECK: ld.v2.f32 {%f{{[0-9]+}}, %f{{[0-9]+}}}, [%rd{{[0-9]+}}]
+ %o.load = load <2 x float>, ptr %d
+ %o.add = fadd <2 x float> %o.load, <float 1., float 1.>
+ ; CHECK: st.v2.f32 [%rd{{[0-9]+}}], {%f{{[0-9]+}}, %f{{[0-9]+}}}
+ store <2 x float> %o.add, ptr %d
+
+ ; CHECK: ld.v4.f32 {%f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}}, [%rd{{[0-9]+}}]
+ %p.load = load <4 x float>, ptr %d
+ %p.add = fadd <4 x float> %p.load, <float 1., float 1., float 1., float 1.>
+ ; CHECK: st.v4.f32 [%rd{{[0-9]+}}], {%f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}}
+ store <4 x float> %p.add, ptr %d
+
+ ; CHECK: ld.v2.f64 {%fd{{[0-9]+}}, %fd{{[0-9]+}}}, [%rd{{[0-9]+}}]
+ %q.load = load <2 x double>, ptr %d
+ %q.add = fadd <2 x double> %q.load, <double 1., double 1.>
+ ; CHECK: st.v2.f64 [%rd{{[0-9]+}}], {%fd{{[0-9]+}}, %fd{{[0-9]+}}}
+ store <2 x double> %q.add, ptr %d
ret void
}
@@ -82,45 +156,136 @@ define void @generic_volatile(ptr %a, ptr %b, ptr %c, ptr %d) local_unnamed_addr
; CHECK: st.volatile.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
store volatile double %f.add, ptr %c
+ ; TODO: volatile, atomic, and volatile atomic memory operations on vector types.
+ ; Currently, LLVM:
+ ; - does not allow atomic operations on vectors.
+ ; - it allows volatile operations but not clear what that means.
+ ; Following both semantics make sense in general and PTX supports both:
+ ; - volatile/atomic/volatile atomic applies to the whole vector
+ ; - volatile/atomic/volatile atomic applies elementwise
+ ; Actions required:
+ ; - clarify LLVM semantics for volatile on vectors and align the NVPTX backend with those
+ ; Below tests show that the current implementation picks the semantics in an inconsistent way
+ ; * volatile <2 x i8> lowers to "elementwise volatile"
+ ; * <4 x i8> lowers to "full vector volatile"
+ ; - provide support for vector atomics, e.g., by extending LLVM IR or via intrinsics
+ ; - update tests in load-store-sm70.ll as well.
+
+ ; TODO: make this operation consistent with the one for <4 x i8>
+ ; This operation lowers to a "element wise volatile PTX operation".
+ ; CHECK: ld.volatile.v2.u8 {%rs{{[0-9]+}}, %rs{{[0-9]+}}}, [%rd{{[0-9]+}}]
+ %h.load = load volatile <2 x i8>, ptr %b
+ %h.add = add <2 x i8> %h.load, <i8 1, i8 1>
+ ; CHECK: st.volatile.v2.u8 [%rd{{[0-9]+}}], {%rs{{[0-9]+}}, %rs{{[0-9]+}}}
+ store volatile <2 x i8> %h.add, ptr %b
+
+ ; TODO: make this operation consistent with the one for <2 x i8>
+ ; This operation lowers to a "full vector volatile PTX operation".
+ ; CHECK: ld.volatile.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %i.load = load volatile <4 x i8>, ptr %c
+ %i.add = add <4 x i8> %i.load, <i8 1, i8 1, i8 1, i8 1>
+ ; CHECK: st.volatile.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
+ store volatile <4 x i8> %i.add, ptr %c
+
+ ; CHECK: ld.volatile.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %j.load = load volatile <2 x i16>, ptr %c
+ %j.add = add <2 x i16> %j.load, <i16 1, i16 1>
+ ; CHECK: st.volatile.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
+ store volatile <2 x i16> %j.add, ptr %c
+
+ ; CHECK: ld.volatile.v4.u16 {%rs{{[0-9]+}}, %rs{{[0-9]+}}, %rs{{[0-9]+}}, %rs{{[0-9]+}}}, [%rd{{[0-9]+}}]
+ %k.load = load volatile <4 x i16>, ptr %d
+ %k.add = add <4 x i16> %k.load, <i16 1, i16 1, i16 1, i16 1>
+ ; CHECK: st.volatile.v4.u16 [%rd{{[0-9]+}}], {%rs{{[0-9]+}}, %rs{{[0-9]+}}, %rs{{[0-9]+}}, %rs{{[0-9]+}}}
+ store volatile <4 x i16> %k.add, ptr %d
+
+ ; CHECK: ld.volatile.v2.u32 {%r{{[0-9]+}}, %r{{[0-9]+}}}, [%rd{{[0-9]+}}]
+ %l.load = load volatile <2 x i32>, ptr %d
+ %l.add = add <2 x i32> %l.load, <i32 1, i32 1>
+ ; CHECK: st.volatile.v2.u32 [%rd{{[0-9]+}}], {%r{{[0-9]+}}, %r{{[0-9]+}}}
+ store volatile <2 x i32> %l.add, ptr %d
+
+ ; CHECK: ld.volatile.v4.u32 {%r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}}, [%rd{{[0-9]+}}]
+ %m.load = load volatile <4 x i32>, ptr %d
+ %m.add = add <4 x i32> %m.load, <i32 1, i32 1, i32 1, i32 1>
+ ; CHECK: st.volatile.v4.u32 [%rd{{[0-9]+}}], {%r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}}
+ store volatile <4 x i32> %m.add, ptr %d
+
+ ; CHECK: ld.volatile.v2.u64 {%rd{{[0-9]+}}, %rd{{[0-9]+}}}, [%rd{{[0-9]+}}]
+ %n.load = load volatile <2 x i64>, ptr %d
+ %n.add = add <2 x i64> %n.load, <i64 1, i64 1>
+ ; CHECK: st.volatile.v2.u64 [%rd{{[0-9]+}}], {%rd{{[0-9]+}}, %rd{{[0-9]+}}}
+ store volatile <2 x i64> %n.add, ptr %d
+
+ ; CHECK: ld.volatile.v2.f32 {%f{{[0-9]+}}, %f{{[0-9]+}}}, [%rd{{[0-9]+}}]
+ %o.load = load volatile <2 x float>, ptr %d
+ %o.add = fadd <2 x float> %o.load, <float 1., float 1.>
+ ; CHECK: st.volatile.v2.f32 [%rd{{[0-9]+}}], {%f{{[0-9]+}}, %f{{[0-9]+}}}
+ store volatile <2 x float> %o.add, ptr %d
+
+ ; CHECK: ld.volatile.v4.f32 {%f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}}, [%rd{{[0-9]+}}]
+ %p.load = load volatile <4 x float>, ptr %d
+ %p.add = fadd <4 x float> %p.load, <float 1., float 1., float 1., float 1.>
+ ; CHECK: st.volatile.v4.f32 [%rd{{[0-9]+}}], {%f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}}
+ store volatile <4 x float> %p.add, ptr %d
+
+ ; CHECK: ld.volatile.v2.f64 {%fd{{[0-9]+}}, %fd{{[0-9]+}}}, [%rd{{[0-9]+}}]
+ %q.load = load volatile <2 x double>, ptr %d
+ %q.add = fadd <2 x double> %q.load, <double 1., double 1.>
+ ; CHECK: st.volatile.v2.f64 [%rd{{[0-9]+}}], {%fd{{[0-9]+}}, %fd{{[0-9]+}}}
+ store volatile <2 x double> %q.add, ptr %d
+
ret void
}
; CHECK-LABEL: generic_monotonic
define void @generic_monotonic(ptr %a, ptr %b, ptr %c, ptr %d, ptr %e) local_unnamed_addr {
- ; CHECK: ld.volatile.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
+ ; SM60: ld.volatile.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
+ ; SM70: ld.relaxed.sys.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
%a.load = load atomic i8, ptr %a monotonic, align 1
%a.add = add i8 %a.load, 1
- ; CHECK: st.volatile.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
+ ; SM60: st.volatile.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
+ ; SM70: st.relaxed.sys.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
store atomic i8 %a.add, ptr %a monotonic, align 1
- ; CHECK: ld.volatile.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
+ ; SM60: ld.volatile.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
+ ; SM70: ld.relaxed.sys.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
%b.load = load atomic i16, ptr %b monotonic, align 2
%b.add = add i16 %b.load, 1
- ; CHECK: st.volatile.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
+ ; SM60: st.volatile.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
+ ; SM70: st.relaxed.sys.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
store atomic i16 %b.add, ptr %b monotonic, align 2
- ; CHECK: ld.volatile.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
+ ; SM60: ld.volatile.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
+ ; SM70: ld.relaxed.sys.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
%c.load = load atomic i32, ptr %c monotonic, align 4
%c.add = add i32 %c.load, 1
- ; CHECK: st.volatile.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
+ ; SM60: st.volatile.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
+ ; SM70: st.relaxed.sys.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
store atomic i32 %c.add, ptr %c monotonic, align 4
- ; CHECK: ld.volatile.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
+ ; SM60: ld.volatile.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
+ ; SM70: ld.relaxed.sys.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
%d.load = load atomic i64, ptr %d monotonic, align 8
%d.add = add i64 %d.load, 1
- ; CHECK: st.volatile.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
+ ; SM60: st.volatile.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
+ ; SM70: st.relaxed.sys.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
store atomic i64 %d.add, ptr %d monotonic, align 8
- ; CHECK: ld.volatile.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
+ ; SM60: ld.volatile.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
+ ; SM70: ld.relaxed.sys.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
%e.load = load atomic float, ptr %e monotonic, align 4
%e.add = fadd float %e.load, 1.0
- ; CHECK: st.volatile.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
+ ; SM60: st.volatile.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
+ ; SM70: st.relaxed.sys.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
store atomic float %e.add, ptr %e monotonic, align 4
- ; CHECK: ld.volatile.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
+ ; SM60: ld.volatile.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
+ ; SM70: ld.relaxed.sys.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
%f.load = load atomic double, ptr %e monotonic, align 8
%f.add = fadd double %f.load, 1.
- ; CHECK: st.volatile.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
+ ; SM60: st.volatile.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
+ ; SM70: st.relaxed.sys.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
store atomic double %f.add, ptr %e monotonic, align 8
ret void
@@ -169,40 +334,52 @@ define void @generic_monotonic_volatile(ptr %a, ptr %b, ptr %c, ptr %d, ptr %e)
; CHECK-LABEL: generic_unordered
define void @generic_unordered(ptr %a, ptr %b, ptr %c, ptr %d, ptr %e) local_unnamed_addr {
- ; CHECK: ld.volatile.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
+ ; SM60: ld.volatile.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
+ ; SM70: ld.relaxed.sys.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
%a.load = load atomic i8, ptr %a unordered, align 1
%a.add = add i8 %a.load, 1
- ; CHECK: st.volatile.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
+ ; SM60: st.volatile.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
+ ; SM70: st.relaxed.sys.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
store atomic i8 %a.add, ptr %a unordered, align 1
- ; CHECK: ld.volatile.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
+ ; SM60: ld.volatile.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
+ ; SM70: ld.relaxed.sys.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
%b.load = load atomic i16, ptr %b unordered, align 2
%b.add = add i16 %b.load, 1
- ; CHECK: st.volatile.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
+ ; SM60: st.volatile.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
+ ; SM70: st.relaxed.sys.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
store atomic i16 %b.add, ptr %b unordered, align 2
- ; CHECK: ld.volatile.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
+ ; SM60: ld.volatile.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
+ ; SM70: ld.relaxed.sys.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
%c.load = load atomic i32, ptr %c unordered, align 4
%c.add = add i32 %c.load, 1
- ; CHECK: st.volatile.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
+ ; SM60: st.volatile.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
+ ; SM70: st.relaxed.sys.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
store atomic i32 %c.add, ptr %c unordered, align 4
- ; CHECK: ld.volatile.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
+ ; SM60: ld.volatile.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
+ ; SM70: ld.relaxed.sys.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
%d.load = load atomic i64, ptr %d unordered, align 8
%d.add = add i64 %d.load, 1
- ; CHECK: st.volatile.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
+ ; SM60: st.volatile.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
+ ; SM70: st.relaxed.sys.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
store atomic i64 %d.add, ptr %d unordered, align 8
- ; CHECK: ld.volatile.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
+ ; SM60: ld.volatile.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
+ ; SM70: ld.relaxed.sys.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
%e.load = load atomic float, ptr %e unordered, align 4
%e.add = fadd float %e.load, 1.0
- ; CHECK: st.volatile.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
+ ; SM60: st.volatile.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
+ ; SM70: st.relaxed.sys.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
store atomic float %e.add, ptr %e unordered, align 4
- ; CHECK: ld.volatile.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
+ ; SM60: ld.volatile.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
+ ; SM70: ld.relaxed.sys.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
%f.load = load atomic double, ptr %e unordered, align 8
%f.add = fadd double %f.load, 1.
- ; CHECK: st.volatile.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
+ ; SM60: st.volatile.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
+ ; SM70: st.relaxed.sys.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
store atomic double %f.add, ptr %e unordered, align 8
ret void
@@ -289,6 +466,66 @@ define void @global_plain(ptr addrspace(1) %a, ptr addrspace(1) %b, ptr addrspac
; CHECK: st.global.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
store double %f.add, ptr addrspace(1) %c
+ ; CHECK: ld.global.v2.u8 {%rs{{[0-9]+}}, %rs{{[0-9]+}}}, [%rd{{[0-9]+}}]
+ %h.load = load <2 x i8>, ptr addrspace(1) %b
+ %h.add = add <2 x i8> %h.load, <i8 1, i8 1>
+ ; CHECK: st.global.v2.u8 [%rd{{[0-9]+}}], {%rs{{[0-9]+}}, %rs{{[0-9]+}}}
+ store <2 x i8> %h.add, ptr addrspace(1) %b
+
+ ; CHECK: ld.global.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %i.load = load <4 x i8>, ptr addrspace(1) %c
+ %i.add = add <4 x i8> %i.load, <i8 1, i8 1, i8 1, i8 1>
+ ; CHECK: st.global.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
+ store <4 x i8> %i.add, ptr addrspace(1) %c
+
+ ; CHECK: ld.global.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %j.load = load <2 x i16>, ptr addrspace(1) %c
+ %j.add = add <2 x i16> %j.load, <i16 1, i16 1>
+ ; CHECK: st.global.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
+ store <2 x i16> %j.add, ptr addrspace(1) %c
+
+ ; CHECK: ld.global.v4.u16 {%rs{{[0-9]+}}, %rs{{[0-9]+}}, %rs{{[0-9]+}}, %rs{{[0-9]+}}}, [%rd{{[0-9]+}}]
+ %k.load = load <4 x i16>, ptr addrspace(1) %d
+ %k.add = add <4 x i16> %k.load, <i16 1, i16 1, i16 1, i16 1>
+ ; CHECK: st.global.v4.u16 [%rd{{[0-9]+}}], {%rs{{[0-9]+}}, %rs{{[0-9]+}}, %rs{{[0-9]+}}, %rs{{[0-9]+}}}
+ store <4 x i16> %k.add, ptr addrspace(1) %d
+
+ ; CHECK: ld.global.v2.u32 {%r{{[0-9]+}}, %r{{[0-9]+}}}, [%rd{{[0-9]+}}]
+ %l.load = load <2 x i32>, ptr addrspace(1) %d
+ %l.add = add <2 x i32> %l.load, <i32 1, i32 1>
+ ; CHECK: st.global.v2.u32 [%rd{{[0-9]+}}], {%r{{[0-9]+}}, %r{{[0-9]+}}}
+ store <2 x i32> %l.add, ptr addrspace(1) %d
+
+ ; CHECK: ld.global.v4.u32 {%r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}}, [%rd{{[0-9]+}}]
+ %m.load = load <4 x i32>, ptr addrspace(1) %d
+ %m.add = add <4 x i32> %m.load, <i32 1, i32 1, i32 1, i32 1>
+ ; CHECK: st.global.v4.u32 [%rd{{[0-9]+}}], {%r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}}
+ store <4 x i32> %m.add, ptr addrspace(1) %d
+
+ ; CHECK: ld.global.v2.u64 {%rd{{[0-9]+}}, %rd{{[0-9]+}}}, [%rd{{[0-9]+}}]
+ %n.load = load <2 x i64>, ptr addrspace(1) %d
+ %n.add = add <2 x i64> %n.load, <i64 1, i64 1>
+ ; CHECK: st.global.v2.u64 [%rd{{[0-9]+}}], {%rd{{[0-9]+}}, %rd{{[0-9]+}}}
+ store <2 x i64> %n.add, ptr addrspace(1) %d
+
+ ; CHECK: ld.global.v2.f32 {%f{{[0-9]+}}, %f{{[0-9]+}}}, [%rd{{[0-9]+}}]
+ %o.load = load <2 x float>, ptr addrspace(1) %d
+ %o.add = fadd <2 x float> %o.load, <float 1., float 1.>
+ ; CHECK: st.global.v2.f32 [%rd{{[0-9]+}}], {%f{{[0-9]+}}, %f{{[0-9]+}}}
+ store <2 x float> %o.add, ptr addrspace(1) %d
+
+ ; CHECK: ld.global.v4.f32 {%f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}}, [%rd{{[0-9]+}}]
+ %p.load = load <4 x float>, ptr addrspace(1) %d
+ %p.add = fadd <4 x float> %p.load, <float 1., float 1., float 1., float 1.>
+ ; CHECK: st.global.v4.f32 [%rd{{[0-9]+}}], {%f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}}
+ store <4 x float> %p.add, ptr addrspace(1) %d
+
+ ; CHECK: ld.global.v2.f64 {%fd{{[0-9]+}}, %fd{{[0-9]+}}}, [%rd{{[0-9]+}}]
+ %q.load = load <2 x double>, ptr addrspace(1) %d
+ %q.add = fadd <2 x double> %q.load, <double 1., double 1.>
+ ; CHECK: st.global.v2.f64 [%rd{{[0-9]+}}], {%fd{{[0-9]+}}, %fd{{[0-9]+}}}
+ store <2 x double> %q.add, ptr addrspace(1) %d
+
ret void
}
@@ -330,45 +567,117 @@ define void @global_volatile(ptr addrspace(1) %a, ptr addrspace(1) %b, ptr addrs
; CHECK: st.volatile.global.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
store volatile double %f.add, ptr addrspace(1) %c
+ ; CHECK: ld.volatile.global.v2.u8 {%rs{{[0-9]+}}, %rs{{[0-9]+}}}, [%rd{{[0-9]+}}]
+ %h.load = load volatile <2 x i8>, ptr addrspace(1) %b
+ %h.add = add <2 x i8> %h.load, <i8 1, i8 1>
+ ; CHECK: st.volatile.global.v2.u8 [%rd{{[0-9]+}}], {%rs{{[0-9]+}}, %rs{{[0-9]+}}}
+ store volatile<2 x i8> %h.add, ptr addrspace(1) %b
+
+ ; CHECK: ld.volatile.global.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %i.load = load volatile <4 x i8>, ptr addrspace(1) %c
+ %i.add = add <4 x i8> %i.load, <i8 1, i8 1, i8 1, i8 1>
+ ; CHECK: st.volatile.global.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
+ store volatile<4 x i8> %i.add, ptr addrspace(1) %c
+
+ ; CHECK: ld.volatile.global.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %j.load = load volatile <2 x i16>, ptr addrspace(1) %c
+ %j.add = add <2 x i16> %j.load, <i16 1, i16 1>
+ ; CHECK: st.volatile.global.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
+ store volatile<2 x i16> %j.add, ptr addrspace(1) %c
+
+ ; CHECK: ld.volatile.global.v4.u16 {%rs{{[0-9]+}}, %rs{{[0-9]+}}, %rs{{[0-9]+}}, %rs{{[0-9]+}}}, [%rd{{[0-9]+}}]
+ %k.load = load volatile <4 x i16>, ptr addrspace(1) %d
+ %k.add = add <4 x i16> %k.load, <i16 1, i16 1, i16 1, i16 1>
+ ; CHECK: st.volatile.global.v4.u16 [%rd{{[0-9]+}}], {%rs{{[0-9]+}}, %rs{{[0-9]+}}, %rs{{[0-9]+}}, %rs{{[0-9]+}}}
+ store volatile<4 x i16> %k.add, ptr addrspace(1) %d
+
+ ; CHECK: ld.volatile.global.v2.u32 {%r{{[0-9]+}}, %r{{[0-9]+}}}, [%rd{{[0-9]+}}]
+ %l.load = load volatile <2 x i32>, ptr addrspace(1) %d
+ %l.add = add <2 x i32> %l.load, <i32 1, i32 1>
+ ; CHECK: st.volatile.global.v2.u32 [%rd{{[0-9]+}}], {%r{{[0-9]+}}, %r{{[0-9]+}}}
+ store volatile<2 x i32> %l.add, ptr addrspace(1) %d
+
+ ; CHECK: ld.volatile.global.v4.u32 {%r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}}, [%rd{{[0-9]+}}]
+ %m.load = load volatile <4 x i32>, ptr addrspace(1) %d
+ %m.add = add <4 x i32> %m.load, <i32 1, i32 1, i32 1, i32 1>
+ ; CHECK: st.volatile.global.v4.u32 [%rd{{[0-9]+}}], {%r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}}
+ store volatile<4 x i32> %m.add, ptr addrspace(1) %d
+
+ ; CHECK: ld.volatile.global.v2.u64 {%rd{{[0-9]+}}, %rd{{[0-9]+}}}, [%rd{{[0-9]+}}]
+ %n.load = load volatile <2 x i64>, ptr addrspace(1) %d
+ %n.add = add <2 x i64> %n.load, <i64 1, i64 1>
+ ; CHECK: st.volatile.global.v2.u64 [%rd{{[0-9]+}}], {%rd{{[0-9]+}}, %rd{{[0-9]+}}}
+ store volatile<2 x i64> %n.add, ptr addrspace(1) %d
+
+ ; CHECK: ld.volatile.global.v2.f32 {%f{{[0-9]+}}, %f{{[0-9]+}}}, [%rd{{[0-9]+}}]
+ %o.load = load volatile <2 x float>, ptr addrspace(1) %d
+ %o.add = fadd <2 x float> %o.load, <float 1., float 1.>
+ ; CHECK: st.volatile.global.v2.f32 [%rd{{[0-9]+}}], {%f{{[0-9]+}}, %f{{[0-9]+}}}
+ store volatile<2 x float> %o.add, ptr addrspace(1) %d
+
+ ; CHECK: ld.volatile.global.v4.f32 {%f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}}, [%rd{{[0-9]+}}]
+ %p.load = load volatile <4 x float>, ptr addrspace(1) %d
+ %p.add = fadd <4 x float> %p.load, <float 1., float 1., float 1., float 1.>
+ ; CHECK: st.volatile.global.v4.f32 [%rd{{[0-9]+}}], {%f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}}
+ store volatile<4 x float> %p.add, ptr addrspace(1) %d
+
+ ; CHECK: ld.volatile.global.v2.f64 {%fd{{[0-9]+}}, %fd{{[0-9]+}}}, [%rd{{[0-9]+}}]
+ %q.load = load volatile <2 x double>, ptr addrspace(1) %d
+ %q.add = fadd <2 x double> %q.load, <double 1., double 1.>
+ ; CHECK: st.volatile.global.v2.f64 [%rd{{[0-9]+}}], {%fd{{[0-9]+}}, %fd{{[0-9]+}}}
+ store volatile<2 x double> %q.add, ptr addrspace(1) %d
+
ret void
}
; CHECK-LABEL: global_monotonic
define void @global_monotonic(ptr addrspace(1) %a, ptr addrspace(1) %b, ptr addrspace(1) %c, ptr addrspace(1) %d, ptr addrspace(1) %e) local_unnamed_addr {
- ; CHECK: ld.volatile.global.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
+ ; SM60: ld.volatile.global.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
+ ; SM70: ld.relaxed.sys.global.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
%a.load = load atomic i8, ptr addrspace(1) %a monotonic, align 1
%a.add = add i8 %a.load, 1
- ; CHECK: st.volatile.global.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
+ ; SM60: st.volatile.global.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
+ ; SM70: st.relaxed.sys.global.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
store atomic i8 %a.add, ptr addrspace(1) %a monotonic, align 1
- ; CHECK: ld.volatile.global.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
+ ; SM60: ld.volatile.global.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
+ ; SM70: ld.relaxed.sys.global.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
%b.load = load atomic i16, ptr addrspace(1) %b monotonic, align 2
%b.add = add i16 %b.load, 1
- ; CHECK: st.volatile.global.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
+ ; SM60: st.volatile.global.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
+ ; SM70: st.relaxed.sys.global.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
store atomic i16 %b.add, ptr addrspace(1) %b monotonic, align 2
- ; CHECK: ld.volatile.global.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
+ ; SM60: ld.volatile.global.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
+ ; SM70: ld.relaxed.sys.global.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
%c.load = load atomic i32, ptr addrspace(1) %c monotonic, align 4
%c.add = add i32 %c.load, 1
- ; CHECK: st.volatile.global.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
+ ; SM60: st.volatile.global.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
+ ; SM70: st.relaxed.sys.global.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
store atomic i32 %c.add, ptr addrspace(1) %c monotonic, align 4
- ; CHECK: ld.volatile.global.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
+ ; SM60: ld.volatile.global.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
+ ; SM70: ld.relaxed.sys.global.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
%d.load = load atomic i64, ptr addrspace(1) %d monotonic, align 8
%d.add = add i64 %d.load, 1
- ; CHECK: st.volatile.global.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
+ ; SM60: st.volatile.global.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
+ ; SM70: st.relaxed.sys.global.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
store atomic i64 %d.add, ptr addrspace(1) %d monotonic, align 8
- ; CHECK: ld.volatile.global.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
+ ; SM60: ld.volatile.global.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
+ ; SM70: ld.relaxed.sys.global.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
%e.load = load atomic float, ptr addrspace(1) %e monotonic, align 4
%e.add = fadd float %e.load, 1.0
- ; CHECK: st.volatile.global.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
+ ; SM60: st.volatile.global.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
+ ; SM70: st.relaxed.sys.global.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
store atomic float %e.add, ptr addrspace(1) %e monotonic, align 4
- ; CHECK: ld.volatile.global.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
+ ; SM60: ld.volatile.global.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
+ ; SM70: ld.relaxed.sys.global.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
%f.load = load atomic double, ptr addrspace(1) %e monotonic, align 8
%f.add = fadd double %f.load, 1.
- ; CHECK: st.volatile.global.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
+ ; SM60: st.volatile.global.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
+ ; SM70: st.relaxed.sys.global.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
store atomic double %f.add, ptr addrspace(1) %e monotonic, align 8
ret void
@@ -376,40 +685,52 @@ define void @global_monotonic(ptr addrspace(1) %a, ptr addrspace(1) %b, ptr addr
; CHECK-LABEL: global_monotonic_volatile
define void @global_monotonic_volatile(ptr addrspace(1) %a, ptr addrspace(1) %b, ptr addrspace(1) %c, ptr addrspace(1) %d, ptr addrspace(1) %e) local_unnamed_addr {
- ; CHECK: ld.volatile.global.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
+ ; SM60: ld.volatile.global.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
+ ; SM70: ld.mmio.relaxed.sys.global.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
%a.load = load atomic volatile i8, ptr addrspace(1) %a monotonic, align 1
%a.add = add i8 %a.load, 1
- ; CHECK: st.volatile.global.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
+ ; SM60: st.volatile.global.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
+ ; SM70: st.mmio.relaxed.sys.global.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
store atomic volatile i8 %a.add, ptr addrspace(1) %a monotonic, align 1
- ; CHECK: ld.volatile.global.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
+ ; SM60: ld.volatile.global.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
+ ; SM70: ld.mmio.relaxed.sys.global.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
%b.load = load atomic volatile i16, ptr addrspace(1) %b monotonic, align 2
%b.add = add i16 %b.load, 1
- ; CHECK: st.volatile.global.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
+ ; SM60: st.volatile.global.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
+ ; SM70: st.mmio.relaxed.sys.global.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
store atomic volatile i16 %b.add, ptr addrspace(1) %b monotonic, align 2
- ; CHECK: ld.volatile.global.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
+ ; SM60: ld.volatile.global.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
+ ; SM70: ld.mmio.relaxed.sys.global.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
%c.load = load atomic volatile i32, ptr addrspace(1) %c monotonic, align 4
%c.add = add i32 %c.load, 1
- ; CHECK: st.volatile.global.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
+ ; SM60: st.volatile.global.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
+ ; SM70: st.mmio.relaxed.sys.global.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
store atomic volatile i32 %c.add, ptr addrspace(1) %c monotonic, align 4
- ; CHECK: ld.volatile.global.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
+ ; SM60: ld.volatile.global.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
+ ; SM70: ld.mmio.relaxed.sys.global.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
%d.load = load atomic volatile i64, ptr addrspace(1) %d monotonic, align 8
%d.add = add i64 %d.load, 1
- ; CHECK: st.volatile.global.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
+ ; SM60: st.volatile.global.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
+ ; SM70: st.mmio.relaxed.sys.global.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
store atomic volatile i64 %d.add, ptr addrspace(1) %d monotonic, align 8
- ; CHECK: ld.volatile.global.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
+ ; SM60: ld.volatile.global.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
+ ; SM70: ld.mmio.relaxed.sys.global.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
%e.load = load atomic volatile float, ptr addrspace(1) %e monotonic, align 4
%e.add = fadd float %e.load, 1.0
- ; CHECK: st.volatile.global.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
+ ; SM60: st.volatile.global.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
+ ; SM70: st.mmio.relaxed.sys.global.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
store atomic volatile float %e.add, ptr addrspace(1) %e monotonic, align 4
- ; CHECK: ld.volatile.global.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
+ ; SM60: ld.volatile.global.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
+ ; SM70: ld.mmio.relaxed.sys.global.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
%f.load = load atomic volatile double, ptr addrspace(1) %e monotonic, align 8
%f.add = fadd double %f.load, 1.
- ; CHECK: st.volatile.global.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
+ ; SM60: st.volatile.global.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
+ ; SM70: st.mmio.relaxed.sys.global.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
store atomic volatile double %f.add, ptr addrspace(1) %e monotonic, align 8
ret void
@@ -417,40 +738,52 @@ define void @global_monotonic_volatile(ptr addrspace(1) %a, ptr addrspace(1) %b,
; CHECK-LABEL: global_unordered
define void @global_unordered(ptr addrspace(1) %a, ptr addrspace(1) %b, ptr addrspace(1) %c, ptr addrspace(1) %d, ptr addrspace(1) %e) local_unnamed_addr {
- ; CHECK: ld.volatile.global.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
+ ; SM60: ld.volatile.global.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
+ ; SM70: ld.relaxed.sys.global.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
%a.load = load atomic i8, ptr addrspace(1) %a unordered, align 1
%a.add = add i8 %a.load, 1
- ; CHECK: st.volatile.global.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
+ ; SM60: st.volatile.global.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
+ ; SM70: st.relaxed.sys.global.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
store atomic i8 %a.add, ptr addrspace(1) %a unordered, align 1
- ; CHECK: ld.volatile.global.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
+ ; SM60: ld.volatile.global.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
+ ; SM70: ld.relaxed.sys.global.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
%b.load = load atomic i16, ptr addrspace(1) %b unordered, align 2
%b.add = add i16 %b.load, 1
- ; CHECK: st.volatile.global.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
+ ; SM60: st.volatile.global.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
+ ; SM70: st.relaxed.sys.global.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
store atomic i16 %b.add, ptr addrspace(1) %b unordered, align 2
- ; CHECK: ld.volatile.global.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
+ ; SM60: ld.volatile.global.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
+ ; SM70: ld.relaxed.sys.global.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
%c.load = load atomic i32, ptr addrspace(1) %c unordered, align 4
%c.add = add i32 %c.load, 1
- ; CHECK: st.volatile.global.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
+ ; SM60: st.volatile.global.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
+ ; SM70: st.relaxed.sys.global.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
store atomic i32 %c.add, ptr addrspace(1) %c unordered, align 4
- ; CHECK: ld.volatile.global.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
+ ; SM60: ld.volatile.global.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
+ ; SM70: ld.relaxed.sys.global.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
%d.load = load atomic i64, ptr addrspace(1) %d unordered, align 8
%d.add = add i64 %d.load, 1
- ; CHECK: st.volatile.global.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
+ ; SM60: st.volatile.global.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
+ ; SM70: st.relaxed.sys.global.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
store atomic i64 %d.add, ptr addrspace(1) %d unordered, align 8
- ; CHECK: ld.volatile.global.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
+ ; SM60: ld.volatile.global.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
+ ; SM70: ld.relaxed.sys.global.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
%e.load = load atomic float, ptr addrspace(1) %e unordered, align 4
%e.add = fadd float %e.load, 1.0
- ; CHECK: st.volatile.global.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
+ ; SM60: st.volatile.global.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
+ ; SM70: st.relaxed.sys.global.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
store atomic float %e.add, ptr addrspace(1) %e unordered, align 4
- ; CHECK: ld.volatile.global.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
+ ; SM60: ld.volatile.global.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
+ ; SM70: ld.relaxed.sys.global.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
%f.load = load atomic double, ptr addrspace(1) %e unordered, align 8
%f.add = fadd double %f.load, 1.
- ; CHECK: st.volatile.global.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
+ ; SM60: st.volatile.global.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
+ ; SM70: st.relaxed.sys.global.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
store atomic double %f.add, ptr addrspace(1) %e unordered, align 8
ret void
@@ -458,40 +791,52 @@ define void @global_unordered(ptr addrspace(1) %a, ptr addrspace(1) %b, ptr addr
; CHECK-LABEL: global_unordered_volatile
define void @global_unordered_volatile(ptr addrspace(1) %a, ptr addrspace(1) %b, ptr addrspace(1) %c, ptr addrspace(1) %d, ptr addrspace(1) %e) local_unnamed_addr {
- ; CHECK: ld.volatile.global.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
+ ; SM60: ld.volatile.global.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
+ ; SM70: ld.mmio.relaxed.sys.global.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
%a.load = load atomic volatile i8, ptr addrspace(1) %a unordered, align 1
%a.add = add i8 %a.load, 1
- ; CHECK: st.volatile.global.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
+ ; SM60: st.volatile.global.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
+ ; SM70: st.mmio.relaxed.sys.global.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
store atomic volatile i8 %a.add, ptr addrspace(1) %a unordered, align 1
- ; CHECK: ld.volatile.global.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
+ ; SM60: ld.volatile.global.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
+ ; SM70: ld.mmio.relaxed.sys.global.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
%b.load = load atomic volatile i16, ptr addrspace(1) %b unordered, align 2
%b.add = add i16 %b.load, 1
- ; CHECK: st.volatile.global.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
+ ; SM60: st.volatile.global.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
+ ; SM70: st.mmio.relaxed.sys.global.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
store atomic volatile i16 %b.add, ptr addrspace(1) %b unordered, align 2
- ; CHECK: ld.volatile.global.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
+ ; SM60: ld.volatile.global.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
+ ; SM70: ld.mmio.relaxed.sys.global.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
%c.load = load atomic volatile i32, ptr addrspace(1) %c unordered, align 4
%c.add = add i32 %c.load, 1
- ; CHECK: st.volatile.global.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
+ ; SM60: st.volatile.global.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
+ ; SM70: st.mmio.relaxed.sys.global.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
store atomic volatile i32 %c.add, ptr addrspace(1) %c unordered, align 4
- ; CHECK: ld.volatile.global.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
+ ; SM60: ld.volatile.global.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
+ ; SM70: ld.mmio.relaxed.sys.global.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
%d.load = load atomic volatile i64, ptr addrspace(1) %d unordered, align 8
%d.add = add i64 %d.load, 1
- ; CHECK: st.volatile.global.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
+ ; SM60: st.volatile.global.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
+ ; SM70: st.mmio.relaxed.sys.global.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
store atomic volatile i64 %d.add, ptr addrspace(1) %d unordered, align 8
- ; CHECK: ld.volatile.global.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
+ ; SM60: ld.volatile.global.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
+ ; SM70: ld.mmio.relaxed.sys.global.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
%e.load = load atomic volatile float, ptr addrspace(1) %e unordered, align 4
%e.add = fadd float %e.load, 1.0
- ; CHECK: st.volatile.global.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
+ ; SM60: st.volatile.global.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
+ ; SM70: st.mmio.relaxed.sys.global.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
store atomic volatile float %e.add, ptr addrspace(1) %e unordered, align 4
- ; CHECK: ld.volatile.global.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
+ ; SM60: ld.volatile.global.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
+ ; SM70: ld.mmio.relaxed.sys.global.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
%f.load = load atomic volatile double, ptr addrspace(1) %e unordered, align 8
%f.add = fadd double %f.load, 1.
- ; CHECK: st.volatile.global.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
+ ; SM60: st.volatile.global.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
+ ; SM70: st.mmio.relaxed.sys.global.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
store atomic volatile double %f.add, ptr addrspace(1) %e unordered, align 8
ret void
@@ -537,6 +882,66 @@ define void @shared_plain(ptr addrspace(3) %a, ptr addrspace(3) %b, ptr addrspac
; CHECK: st.shared.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
store double %f.add, ptr addrspace(3) %c
+ ; CHECK: ld.shared.v2.u8 {%rs{{[0-9]+}}, %rs{{[0-9]+}}}, [%rd{{[0-9]+}}]
+ %h.load = load <2 x i8>, ptr addrspace(3) %b
+ %h.add = add <2 x i8> %h.load, <i8 1, i8 1>
+ ; CHECK: st.shared.v2.u8 [%rd{{[0-9]+}}], {%rs{{[0-9]+}}, %rs{{[0-9]+}}}
+ store <2 x i8> %h.add, ptr addrspace(3) %b
+
+ ; CHECK: ld.shared.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %i.load = load <4 x i8>, ptr addrspace(3) %c
+ %i.add = add <4 x i8> %i.load, <i8 1, i8 1, i8 1, i8 1>
+ ; CHECK: st.shared.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
+ store <4 x i8> %i.add, ptr addrspace(3) %c
+
+ ; CHECK: ld.shared.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %j.load = load <2 x i16>, ptr addrspace(3) %c
+ %j.add = add <2 x i16> %j.load, <i16 1, i16 1>
+ ; CHECK: st.shared.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
+ store <2 x i16> %j.add, ptr addrspace(3) %c
+
+ ; CHECK: ld.shared.v4.u16 {%rs{{[0-9]+}}, %rs{{[0-9]+}}, %rs{{[0-9]+}}, %rs{{[0-9]+}}}, [%rd{{[0-9]+}}]
+ %k.load = load <4 x i16>, ptr addrspace(3) %d
+ %k.add = add <4 x i16> %k.load, <i16 1, i16 1, i16 1, i16 1>
+ ; CHECK: st.shared.v4.u16 [%rd{{[0-9]+}}], {%rs{{[0-9]+}}, %rs{{[0-9]+}}, %rs{{[0-9]+}}, %rs{{[0-9]+}}}
+ store <4 x i16> %k.add, ptr addrspace(3) %d
+
+ ; CHECK: ld.shared.v2.u32 {%r{{[0-9]+}}, %r{{[0-9]+}}}, [%rd{{[0-9]+}}]
+ %l.load = load <2 x i32>, ptr addrspace(3) %d
+ %l.add = add <2 x i32> %l.load, <i32 1, i32 1>
+ ; CHECK: st.shared.v2.u32 [%rd{{[0-9]+}}], {%r{{[0-9]+}}, %r{{[0-9]+}}}
+ store <2 x i32> %l.add, ptr addrspace(3) %d
+
+ ; CHECK: ld.shared.v4.u32 {%r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}}, [%rd{{[0-9]+}}]
+ %m.load = load <4 x i32>, ptr addrspace(3) %d
+ %m.add = add <4 x i32> %m.load, <i32 1, i32 1, i32 1, i32 1>
+ ; CHECK: st.shared.v4.u32 [%rd{{[0-9]+}}], {%r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}}
+ store <4 x i32> %m.add, ptr addrspace(3) %d
+
+ ; CHECK: ld.shared.v2.u64 {%rd{{[0-9]+}}, %rd{{[0-9]+}}}, [%rd{{[0-9]+}}]
+ %n.load = load <2 x i64>, ptr addrspace(3) %d
+ %n.add = add <2 x i64> %n.load, <i64 1, i64 1>
+ ; CHECK: st.shared.v2.u64 [%rd{{[0-9]+}}], {%rd{{[0-9]+}}, %rd{{[0-9]+}}}
+ store <2 x i64> %n.add, ptr addrspace(3) %d
+
+ ; CHECK: ld.shared.v2.f32 {%f{{[0-9]+}}, %f{{[0-9]+}}}, [%rd{{[0-9]+}}]
+ %o.load = load <2 x float>, ptr addrspace(3) %d
+ %o.add = fadd <2 x float> %o.load, <float 1., float 1.>
+ ; CHECK: st.shared.v2.f32 [%rd{{[0-9]+}}], {%f{{[0-9]+}}, %f{{[0-9]+}}}
+ store <2 x float> %o.add, ptr addrspace(3) %d
+
+ ; CHECK: ld.shared.v4.f32 {%f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}}, [%rd{{[0-9]+}}]
+ %p.load = load <4 x float>, ptr addrspace(3) %d
+ %p.add = fadd <4 x float> %p.load, <float 1., float 1., float 1., float 1.>
+ ; CHECK: st.shared.v4.f32 [%rd{{[0-9]+}}], {%f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}}
+ store <4 x float> %p.add, ptr addrspace(3) %d
+
+ ; CHECK: ld.shared.v2.f64 {%fd{{[0-9]+}}, %fd{{[0-9]+}}}, [%rd{{[0-9]+}}]
+ %q.load = load <2 x double>, ptr addrspace(3) %d
+ %q.add = fadd <2 x double> %q.load, <double 1., double 1.>
+ ; CHECK: st.shared.v2.f64 [%rd{{[0-9]+}}], {%fd{{[0-9]+}}, %fd{{[0-9]+}}}
+ store <2 x double> %q.add, ptr addrspace(3) %d
+
ret void
}
@@ -578,45 +983,119 @@ define void @shared_volatile(ptr addrspace(3) %a, ptr addrspace(3) %b, ptr addrs
; CHECK: st.volatile.shared.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
store volatile double %f.add, ptr addrspace(3) %c
+ ; CHECK: ld.volatile.shared.v2.u8 {%rs{{[0-9]+}}, %rs{{[0-9]+}}}, [%rd{{[0-9]+}}]
+ %h.load = load volatile <2 x i8>, ptr addrspace(3) %b
+ %h.add = add <2 x i8> %h.load, <i8 1, i8 1>
+ ; CHECK: st.volatile.shared.v2.u8 [%rd{{[0-9]+}}], {%rs{{[0-9]+}}, %rs{{[0-9]+}}}
+ store volatile <2 x i8> %h.add, ptr addrspace(3) %b
+
+ ; CHECK: ld.volatile.shared.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %i.load = load volatile <4 x i8>, ptr addrspace(3) %c
+ %i.add = add <4 x i8> %i.load, <i8 1, i8 1, i8 1, i8 1>
+ ; CHECK: st.volatile.shared.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
+ store volatile <4 x i8> %i.add, ptr addrspace(3) %c
+
+ ; CHECK: ld.volatile.shared.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %j.load = load volatile <2 x i16>, ptr addrspace(3) %c
+ %j.add = add <2 x i16> %j.load, <i16 1, i16 1>
+ ; CHECK: st.volatile.shared.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
+ store volatile <2 x i16> %j.add, ptr addrspace(3) %c
+
+ ; CHECK: ld.volatile.shared.v4.u16 {%rs{{[0-9]+}}, %rs{{[0-9]+}}, %rs{{[0-9]+}}, %rs{{[0-9]+}}}, [%rd{{[0-9]+}}]
+ %k.load = load volatile <4 x i16>, ptr addrspace(3) %d
+ %k.add = add <4 x i16> %k.load, <i16 1, i16 1, i16 1, i16 1>
+ ; CHECK: st.volatile.shared.v4.u16 [%rd{{[0-9]+}}], {%rs{{[0-9]+}}, %rs{{[0-9]+}}, %rs{{[0-9]+}}, %rs{{[0-9]+}}}
+ store volatile <4 x i16> %k.add, ptr addrspace(3) %d
+
+ ; CHECK: ld.volatile.shared.v2.u32 {%r{{[0-9]+}}, %r{{[0-9]+}}}, [%rd{{[0-9]+}}]
+ %l.load = load volatile <2 x i32>, ptr addrspace(3) %d
+ %l.add = add <2 x i32> %l.load, <i32 1, i32 1>
+ ; CHECK: st.volatile.shared.v2.u32 [%rd{{[0-9]+}}], {%r{{[0-9]+}}, %r{{[0-9]+}}}
+ store volatile <2 x i32> %l.add, ptr addrspace(3) %d
+
+ ; CHECK: ld.volatile.shared.v4.u32 {%r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}}, [%rd{{[0-9]+}}]
+ %m.load = load volatile <4 x i32>, ptr addrspace(3) %d
+ %m.add = add <4 x i32> %m.load, <i32 1, i32 1, i32 1, i32 1>
+ ; CHECK: st.volatile.shared.v4.u32 [%rd{{[0-9]+}}], {%r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}}
+ store volatile <4 x i32> %m.add, ptr addrspace(3) %d
+
+ ; CHECK: ld.volatile.shared.v2.u64 {%rd{{[0-9]+}}, %rd{{[0-9]+}}}, [%rd{{[0-9]+}}]
+ %n.load = load volatile <2 x i64>, ptr addrspace(3) %d
+ %n.add = add <2 x i64> %n.load, <i64 1, i64 1>
+ ; CHECK: st.volatile.shared.v2.u64 [%rd{{[0-9]+}}], {%rd{{[0-9]+}}, %rd{{[0-9]+}}}
+ store volatile <2 x i64> %n.add, ptr addrspace(3) %d
+
+ ; CHECK: ld.volatile.shared.v2.f32 {%f{{[0-9]+}}, %f{{[0-9]+}}}, [%rd{{[0-9]+}}]
+ %o.load = load volatile <2 x float>, ptr addrspace(3) %d
+ %o.add = fadd <2 x float> %o.load, <float 1., float 1.>
+ ; CHECK: st.volatile.shared.v2.f32 [%rd{{[0-9]+}}], {%f{{[0-9]+}}, %f{{[0-9]+}}}
+ store volatile <2 x float> %o.add, ptr addrspace(3) %d
+
+ ; CHECK: ld.volatile.shared.v4.f32 {%f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}}, [%rd{{[0-9]+}}]
+ %p.load = load volatile <4 x float>, ptr addrspace(3) %d
+ %p.add = fadd <4 x float> %p.load, <float 1., float 1., float 1., float 1.>
+ ; CHECK: st.volatile.shared.v4.f32 [%rd{{[0-9]+}}], {%f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}}
+ store volatile <4 x float> %p.add, ptr addrspace(3) %d
+
+ ; CHECK: ld.volatile.shared.v2.f64 {%fd{{[0-9]+}}, %fd{{[0-9]+}}}, [%rd{{[0-9]+}}]
+ %q.load = load volatile <2 x double>, ptr addrspace(3) %d
+ %q.add = fadd <2 x double> %q.load, <double 1., double 1.>
+ ; CHECK: st.volatile.shared.v2.f64 [%rd{{[0-9]+}}], {%fd{{[0-9]+}}, %fd{{[0-9]+}}}
+ store volatile <2 x double> %q.add, ptr addrspace(3) %d
+
ret void
}
; CHECK-LABEL: shared_monotonic
define void @shared_monotonic(ptr addrspace(3) %a, ptr addrspace(3) %b, ptr addrspace(3) %c, ptr addrspace(3) %d, ptr addrspace(3) %e) local_unnamed_addr {
- ; CHECK: ld.volatile.shared.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
+ ; TODO: optimize .sys.shared to .cta.shared or .cluster.shared.
+
+ ; SM60: ld.volatile.shared.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
+ ; SM70: ld.relaxed.sys.shared.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
%a.load = load atomic i8, ptr addrspace(3) %a monotonic, align 1
%a.add = add i8 %a.load, 1
- ; CHECK: st.volatile.shared.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
+ ; SM60: st.volatile.shared.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
+ ; SM70: st.relaxed.sys.shared.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
store atomic i8 %a.add, ptr addrspace(3) %a monotonic, align 1
- ; CHECK: ld.volatile.shared.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
+ ; SM60: ld.volatile.shared.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
+ ; SM70: ld.relaxed.sys.shared.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
%b.load = load atomic i16, ptr addrspace(3) %b monotonic, align 2
%b.add = add i16 %b.load, 1
- ; CHECK: st.volatile.shared.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
+ ; SM60: st.volatile.shared.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
+ ; SM70: st.relaxed.sys.shared.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
store atomic i16 %b.add, ptr addrspace(3) %b monotonic, align 2
- ; CHECK: ld.volatile.shared.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
+ ; SM60: ld.volatile.shared.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
+ ; SM70: ld.relaxed.sys.shared.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
%c.load = load atomic i32, ptr addrspace(3) %c monotonic, align 4
%c.add = add i32 %c.load, 1
- ; CHECK: st.volatile.shared.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
+ ; SM60: st.volatile.shared.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
+ ; SM70: st.relaxed.sys.shared.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
store atomic i32 %c.add, ptr addrspace(3) %c monotonic, align 4
- ; CHECK: ld.volatile.shared.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
+ ; SM60: ld.volatile.shared.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
+ ; SM70: ld.relaxed.sys.shared.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
%d.load = load atomic i64, ptr addrspace(3) %d monotonic, align 8
%d.add = add i64 %d.load, 1
- ; CHECK: st.volatile.shared.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
+ ; SM60: st.volatile.shared.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
+ ; SM70: st.relaxed.sys.shared.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
store atomic i64 %d.add, ptr addrspace(3) %d monotonic, align 8
- ; CHECK: ld.volatile.shared.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
+ ; SM60: ld.volatile.shared.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
+ ; SM70: ld.relaxed.sys.shared.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
%e.load = load atomic float, ptr addrspace(3) %e monotonic, align 4
%e.add = fadd float %e.load, 1.0
- ; CHECK: st.volatile.shared.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
+ ; SM60: st.volatile.shared.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
+ ; SM70: st.relaxed.sys.shared.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
store atomic float %e.add, ptr addrspace(3) %e monotonic, align 4
- ; CHECK: ld.volatile.shared.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
+ ; SM60: ld.volatile.shared.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
+ ; SM70: ld.relaxed.sys.shared.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
%f.load = load atomic double, ptr addrspace(3) %e monotonic, align 8
%f.add = fadd double %f.load, 1.
- ; CHECK: st.volatile.shared.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
+ ; SM60: st.volatile.shared.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
+ ; SM70: st.relaxed.sys.shared.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
store atomic double %f.add, ptr addrspace(3) %e monotonic, align 8
ret void
@@ -665,40 +1144,54 @@ define void @shared_monotonic_volatile(ptr addrspace(3) %a, ptr addrspace(3) %b,
; CHECK-LABEL: shared_unordered
define void @shared_unordered(ptr addrspace(3) %a, ptr addrspace(3) %b, ptr addrspace(3) %c, ptr addrspace(3) %d, ptr addrspace(3) %e) local_unnamed_addr {
- ; CHECK: ld.volatile.shared.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
+ ; TODO: optimize .sys.shared to .cta.shared or .cluster.shared.
+
+ ; SM60: ld.volatile.shared.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
+ ; SM70: ld.relaxed.sys.shared.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
%a.load = load atomic i8, ptr addrspace(3) %a unordered, align 1
%a.add = add i8 %a.load, 1
- ; CHECK: st.volatile.shared.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
+ ; SM60: st.volatile.shared.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
+ ; SM70: st.relaxed.sys.shared.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
store atomic i8 %a.add, ptr addrspace(3) %a unordered, align 1
- ; CHECK: ld.volatile.shared.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
+ ; SM60: ld.volatile.shared.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
+ ; SM70: ld.relaxed.sys.shared.u16 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
%b.load = load atomic i16, ptr addrspace(3) %b unordered, align 2
%b.add = add i16 %b.load, 1
- ; CHECK: st.volatile.shared.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
+ ; SM60: st.volatile.shared.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
+ ; SM70: st.relaxed.sys.shared.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
store atomic i16 %b.add, ptr addrspace(3) %b unordered, align 2
- ; CHECK: ld.volatile.shared.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
+ ; SM60: ld.volatile.shared.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
+ ; SM70: ld.relaxed.sys.shared.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
%c.load = load atomic i32, ptr addrspace(3) %c unordered, align 4
%c.add = add i32 %c.load, 1
- ; CHECK: st.volatile.shared.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
+ ; SM60: st.volatile.shared.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
+ ; SM70: st.relaxed.sys.shared.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
store atomic i32 %c.add, ptr addrspace(3) %c unordered, align 4
- ; CHECK: ld.volatile.shared.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
+ ; SM60: ld.volatile.shared.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
+ ; SM70: ld.relaxed.sys.shared.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
%d.load = load atomic i64, ptr addrspace(3) %d unordered, align 8
%d.add = add i64 %d.load, 1
- ; CHECK: st.volatile.shared.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
+ ; SM60: st.volatile.shared.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
+ ; SM70: st.relaxed.sys.shared.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
store atomic i64 %d.add, ptr addrspace(3) %d unordered, align 8
- ; CHECK: ld.volatile.shared.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
+ ; SM60: ld.volatile.shared.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
+ ; SM70: ld.relaxed.sys.shared.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
%e.load = load atomic float, ptr addrspace(3) %e unordered, align 4
%e.add = fadd float %e.load, 1.0
- ; CHECK: st.volatile.shared.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
+ ; SM60: st.volatile.shared.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
+ ; SM70: st.relaxed.sys.shared.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
store atomic float %e.add, ptr addrspace(3) %e unordered, align 4
- ; CHECK: ld.volatile.shared.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
+ ; SM60: ld.volatile.shared.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
+ ; SM70: ld.relaxed.sys.shared.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
%f.load = load atomic double, ptr addrspace(3) %e unordered, align 8
%f.add = fadd double %f.load, 1.
- ; CHECK: st.volatile.shared.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
+ ; SM60: st.volatile.shared.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
+ ; SM70: st.relaxed.sys.shared.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
store atomic double %f.add, ptr addrspace(3) %e unordered, align 8
ret void
@@ -785,11 +1278,74 @@ define void @local_plain(ptr addrspace(5) %a, ptr addrspace(5) %b, ptr addrspace
; CHECK: st.local.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
store double %f.add, ptr addrspace(5) %c
+ ; CHECK: ld.local.v2.u8 {%rs{{[0-9]+}}, %rs{{[0-9]+}}}, [%rd{{[0-9]+}}]
+ %h.load = load <2 x i8>, ptr addrspace(5) %b
+ %h.add = add <2 x i8> %h.load, <i8 1, i8 1>
+ ; CHECK: st.local.v2.u8 [%rd{{[0-9]+}}], {%rs{{[0-9]+}}, %rs{{[0-9]+}}}
+ store <2 x i8> %h.add, ptr addrspace(5) %b
+
+ ; CHECK: ld.local.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %i.load = load <4 x i8>, ptr addrspace(5) %c
+ %i.add = add <4 x i8> %i.load, <i8 1, i8 1, i8 1, i8 1>
+ ; CHECK: st.local.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
+ store <4 x i8> %i.add, ptr addrspace(5) %c
+
+ ; CHECK: ld.local.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %j.load = load <2 x i16>, ptr addrspace(5) %c
+ %j.add = add <2 x i16> %j.load, <i16 1, i16 1>
+ ; CHECK: st.local.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
+ store <2 x i16> %j.add, ptr addrspace(5) %c
+
+ ; CHECK: ld.local.v4.u16 {%rs{{[0-9]+}}, %rs{{[0-9]+}}, %rs{{[0-9]+}}, %rs{{[0-9]+}}}, [%rd{{[0-9]+}}]
+ %k.load = load <4 x i16>, ptr addrspace(5) %d
+ %k.add = add <4 x i16> %k.load, <i16 1, i16 1, i16 1, i16 1>
+ ; CHECK: st.local.v4.u16 [%rd{{[0-9]+}}], {%rs{{[0-9]+}}, %rs{{[0-9]+}}, %rs{{[0-9]+}}, %rs{{[0-9]+}}}
+ store <4 x i16> %k.add, ptr addrspace(5) %d
+
+ ; CHECK: ld.local.v2.u32 {%r{{[0-9]+}}, %r{{[0-9]+}}}, [%rd{{[0-9]+}}]
+ %l.load = load <2 x i32>, ptr addrspace(5) %d
+ %l.add = add <2 x i32> %l.load, <i32 1, i32 1>
+ ; CHECK: st.local.v2.u32 [%rd{{[0-9]+}}], {%r{{[0-9]+}}, %r{{[0-9]+}}}
+ store <2 x i32> %l.add, ptr addrspace(5) %d
+
+ ; CHECK: ld.local.v4.u32 {%r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}}, [%rd{{[0-9]+}}]
+ %m.load = load <4 x i32>, ptr addrspace(5) %d
+ %m.add = add <4 x i32> %m.load, <i32 1, i32 1, i32 1, i32 1>
+ ; CHECK: st.local.v4.u32 [%rd{{[0-9]+}}], {%r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}}
+ store <4 x i32> %m.add, ptr addrspace(5) %d
+
+ ; CHECK: ld.local.v2.u64 {%rd{{[0-9]+}}, %rd{{[0-9]+}}}, [%rd{{[0-9]+}}]
+ %n.load = load <2 x i64>, ptr addrspace(5) %d
+ %n.add = add <2 x i64> %n.load, <i64 1, i64 1>
+ ; CHECK: st.local.v2.u64 [%rd{{[0-9]+}}], {%rd{{[0-9]+}}, %rd{{[0-9]+}}}
+ store <2 x i64> %n.add, ptr addrspace(5) %d
+
+ ; CHECK: ld.local.v2.f32 {%f{{[0-9]+}}, %f{{[0-9]+}}}, [%rd{{[0-9]+}}]
+ %o.load = load <2 x float>, ptr addrspace(5) %d
+ %o.add = fadd <2 x float> %o.load, <float 1., float 1.>
+ ; CHECK: st.local.v2.f32 [%rd{{[0-9]+}}], {%f{{[0-9]+}}, %f{{[0-9]+}}}
+ store <2 x float> %o.add, ptr addrspace(5) %d
+
+ ; CHECK: ld.local.v4.f32 {%f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}}, [%rd{{[0-9]+}}]
+ %p.load = load <4 x float>, ptr addrspace(5) %d
+ %p.add = fadd <4 x float> %p.load, <float 1., float 1., float 1., float 1.>
+ ; CHECK: st.local.v4.f32 [%rd{{[0-9]+}}], {%f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}}
+ store <4 x float> %p.add, ptr addrspace(5) %d
+
+ ; CHECK: ld.local.v2.f64 {%fd{{[0-9]+}}, %fd{{[0-9]+}}}, [%rd{{[0-9]+}}]
+ %q.load = load <2 x double>, ptr addrspace(5) %d
+ %q.add = fadd <2 x double> %q.load, <double 1., double 1.>
+ ; CHECK: st.local.v2.f64 [%rd{{[0-9]+}}], {%fd{{[0-9]+}}, %fd{{[0-9]+}}}
+ store <2 x double> %q.add, ptr addrspace(5) %d
+
ret void
}
; CHECK-LABEL: local_volatile
define void @local_volatile(ptr addrspace(5) %a, ptr addrspace(5) %b, ptr addrspace(5) %c, ptr addrspace(5) %d) local_unnamed_addr {
+ ; TODO: generate PTX that preserves Concurrent Forward Progress
+ ; by using volatile operations.
+
; CHECK: ld.local.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
%a.load = load volatile i8, ptr addrspace(5) %a
%a.add = add i8 %a.load, 1
@@ -826,11 +1382,74 @@ define void @local_volatile(ptr addrspace(5) %a, ptr addrspace(5) %b, ptr addrsp
; CHECK: st.local.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
store volatile double %f.add, ptr addrspace(5) %c
+ ; CHECK: ld.local.v2.u8 {%rs{{[0-9]+}}, %rs{{[0-9]+}}}, [%rd{{[0-9]+}}]
+ %h.load = load volatile <2 x i8>, ptr addrspace(5) %b
+ %h.add = add <2 x i8> %h.load, <i8 1, i8 1>
+ ; CHECK: st.local.v2.u8 [%rd{{[0-9]+}}], {%rs{{[0-9]+}}, %rs{{[0-9]+}}}
+ store volatile <2 x i8> %h.add, ptr addrspace(5) %b
+
+ ; CHECK: ld.local.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %i.load = load volatile <4 x i8>, ptr addrspace(5) %c
+ %i.add = add <4 x i8> %i.load, <i8 1, i8 1, i8 1, i8 1>
+ ; CHECK: st.local.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
+ store volatile <4 x i8> %i.add, ptr addrspace(5) %c
+
+ ; CHECK: ld.local.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
+ %j.load = load volatile <2 x i16>, ptr addrspace(5) %c
+ %j.add = add <2 x i16> %j.load, <i16 1, i16 1>
+ ; CHECK: st.local.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
+ store volatile <2 x i16> %j.add, ptr addrspace(5) %c
+
+ ; CHECK: ld.local.v4.u16 {%rs{{[0-9]+}}, %rs{{[0-9]+}}, %rs{{[0-9]+}}, %rs{{[0-9]+}}}, [%rd{{[0-9]+}}]
+ %k.load = load volatile <4 x i16>, ptr addrspace(5) %d
+ %k.add = add <4 x i16> %k.load, <i16 1, i16 1, i16 1, i16 1>
+ ; CHECK: st.local.v4.u16 [%rd{{[0-9]+}}], {%rs{{[0-9]+}}, %rs{{[0-9]+}}, %rs{{[0-9]+}}, %rs{{[0-9]+}}}
+ store volatile <4 x i16> %k.add, ptr addrspace(5) %d
+
+ ; CHECK: ld.local.v2.u32 {%r{{[0-9]+}}, %r{{[0-9]+}}}, [%rd{{[0-9]+}}]
+ %l.load = load volatile <2 x i32>, ptr addrspace(5) %d
+ %l.add = add <2 x i32> %l.load, <i32 1, i32 1>
+ ; CHECK: st.local.v2.u32 [%rd{{[0-9]+}}], {%r{{[0-9]+}}, %r{{[0-9]+}}}
+ store volatile <2 x i32> %l.add, ptr addrspace(5) %d
+
+ ; CHECK: ld.local.v4.u32 {%r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}}, [%rd{{[0-9]+}}]
+ %m.load = load volatile <4 x i32>, ptr addrspace(5) %d
+ %m.add = add <4 x i32> %m.load, <i32 1, i32 1, i32 1, i32 1>
+ ; CHECK: st.local.v4.u32 [%rd{{[0-9]+}}], {%r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}}
+ store volatile <4 x i32> %m.add, ptr addrspace(5) %d
+
+ ; CHECK: ld.local.v2.u64 {%rd{{[0-9]+}}, %rd{{[0-9]+}}}, [%rd{{[0-9]+}}]
+ %n.load = load volatile <2 x i64>, ptr addrspace(5) %d
+ %n.add = add <2 x i64> %n.load, <i64 1, i64 1>
+ ; CHECK: st.local.v2.u64 [%rd{{[0-9]+}}], {%rd{{[0-9]+}}, %rd{{[0-9]+}}}
+ store volatile <2 x i64> %n.add, ptr addrspace(5) %d
+
+ ; CHECK: ld.local.v2.f32 {%f{{[0-9]+}}, %f{{[0-9]+}}}, [%rd{{[0-9]+}}]
+ %o.load = load volatile <2 x float>, ptr addrspace(5) %d
+ %o.add = fadd <2 x float> %o.load, <float 1., float 1.>
+ ; CHECK: st.local.v2.f32 [%rd{{[0-9]+}}], {%f{{[0-9]+}}, %f{{[0-9]+}}}
+ store volatile <2 x float> %o.add, ptr addrspace(5) %d
+
+ ; CHECK: ld.local.v4.f32 {%f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}}, [%rd{{[0-9]+}}]
+ %p.load = load volatile <4 x float>, ptr addrspace(5) %d
+ %p.add = fadd <4 x float> %p.load, <float 1., float 1., float 1., float 1.>
+ ; CHECK: st.local.v4.f32 [%rd{{[0-9]+}}], {%f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}}
+ store volatile <4 x float> %p.add, ptr addrspace(5) %d
+
+ ; CHECK: ld.local.v2.f64 {%fd{{[0-9]+}}, %fd{{[0-9]+}}}, [%rd{{[0-9]+}}]
+ %q.load = load volatile <2 x double>, ptr addrspace(5) %d
+ %q.add = fadd <2 x double> %q.load, <double 1., double 1.>
+ ; CHECK: st.local.v2.f64 [%rd{{[0-9]+}}], {%fd{{[0-9]+}}, %fd{{[0-9]+}}}
+ store volatile <2 x double> %q.add, ptr addrspace(5) %d
+
ret void
}
; CHECK-LABEL: local_monotonic
define void @local_monotonic(ptr addrspace(5) %a, ptr addrspace(5) %b, ptr addrspace(5) %c, ptr addrspace(5) %d, ptr addrspace(5) %e) local_unnamed_addr {
+ ; TODO: generate PTX that preserves Concurrent Forward Progress
+ ; by using PTX atomic operations.
+
; CHECK: ld.local.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
%a.load = load atomic i8, ptr addrspace(5) %a monotonic, align 1
%a.add = add i8 %a.load, 1
@@ -872,6 +1491,9 @@ define void @local_monotonic(ptr addrspace(5) %a, ptr addrspace(5) %b, ptr addrs
; CHECK-LABEL: local_monotonic_volatile
define void @local_monotonic_volatile(ptr addrspace(5) %a, ptr addrspace(5) %b, ptr addrspace(5) %c, ptr addrspace(5) %d, ptr addrspace(5) %e) local_unnamed_addr {
+ ; TODO: generate PTX that preserves Concurrent Forward Progress
+ ; by generating atomic or volatile operations
+
; CHECK: ld.local.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
%a.load = load atomic volatile i8, ptr addrspace(5) %a monotonic, align 1
%a.add = add i8 %a.load, 1
@@ -992,3 +1614,6 @@ define void @local_unordered_volatile(ptr addrspace(5) %a, ptr addrspace(5) %b,
ret void
}
+
+; TODO: add plain,atomic,volatile,atomic volatile tests
+; for .const and .param statespaces \ No newline at end of file
diff --git a/llvm/test/CodeGen/PowerPC/O0-pipeline.ll b/llvm/test/CodeGen/PowerPC/O0-pipeline.ll
index f1c9da078a16..70b421f8c0c5 100644
--- a/llvm/test/CodeGen/PowerPC/O0-pipeline.ll
+++ b/llvm/test/CodeGen/PowerPC/O0-pipeline.ll
@@ -25,7 +25,6 @@
; CHECK-NEXT: Lower Garbage Collection Instructions
; CHECK-NEXT: Shadow Stack GC Lowering
; CHECK-NEXT: Remove unreachable blocks from the CFG
-; CHECK-NEXT: Expand vector predication intrinsics
; CHECK-NEXT: Instrument function entry/exit with calls to e.g. mcount() (post inlining)
; CHECK-NEXT: Scalarize Masked Memory Intrinsics
; CHECK-NEXT: Expand reduction intrinsics
diff --git a/llvm/test/CodeGen/PowerPC/O3-pipeline.ll b/llvm/test/CodeGen/PowerPC/O3-pipeline.ll
index be0fbf3abcda..f4f492782eb6 100644
--- a/llvm/test/CodeGen/PowerPC/O3-pipeline.ll
+++ b/llvm/test/CodeGen/PowerPC/O3-pipeline.ll
@@ -62,7 +62,6 @@
; CHECK-NEXT: Constant Hoisting
; CHECK-NEXT: Replace intrinsics with calls to vector library
; CHECK-NEXT: Partially inline calls to library functions
-; CHECK-NEXT: Expand vector predication intrinsics
; CHECK-NEXT: Instrument function entry/exit with calls to e.g. mcount() (post inlining)
; CHECK-NEXT: Scalarize Masked Memory Intrinsics
; CHECK-NEXT: Expand reduction intrinsics
diff --git a/llvm/test/CodeGen/PowerPC/aix-base-pointer.ll b/llvm/test/CodeGen/PowerPC/aix-base-pointer.ll
index ab222d770360..5e66e5ec2763 100644
--- a/llvm/test/CodeGen/PowerPC/aix-base-pointer.ll
+++ b/llvm/test/CodeGen/PowerPC/aix-base-pointer.ll
@@ -6,6 +6,7 @@
; Use an overaligned buffer to force base-pointer usage. Test verifies:
; - base pointer register (r30) is saved/defined/restored.
+; - frame pointer register (r31) is saved/defined/restored.
; - stack frame is allocated with correct alignment.
; - Address of %AlignedBuffer is calculated based off offset from the stack
; pointer.
@@ -25,7 +26,9 @@ declare void @callee(ptr)
; 32BIT: subfic 0, 0, -224
; 32BIT: stwux 1, 1, 0
; 32BIT: addi 3, 1, 64
+; 32BIT: stw 31, -12(30)
; 32BIT: bl .callee
+; 32BIT: lwz 31, -12(30)
; 32BIT: mr 1, 30
; 32BIT: lwz 30, -16(1)
@@ -36,6 +39,8 @@ declare void @callee(ptr)
; 64BIT: subfic 0, 0, -288
; 64BIT: stdux 1, 1, 0
; 64BIT: addi 3, 1, 128
+; 64BIT: std 31, -16(30)
; 64BIT: bl .callee
+; 64BIT: ld 31, -16(30)
; 64BIT: mr 1, 30
; 64BIT: ld 30, -24(1)
diff --git a/llvm/test/CodeGen/PowerPC/builtins-bcd-assist.ll b/llvm/test/CodeGen/PowerPC/builtins-bcd-assist.ll
new file mode 100644
index 000000000000..cc5d6bee3c97
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/builtins-bcd-assist.ll
@@ -0,0 +1,111 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux \
+; RUN: --ppc-asm-full-reg-names -mcpu=pwr7 < %s | FileCheck %s
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-aix \
+; RUN: --ppc-asm-full-reg-names -mcpu=pwr7 < %s | FileCheck %s
+; RUN: llc -verify-machineinstrs -mtriple=powerpc-unknown-aix \
+; RUN: --ppc-asm-full-reg-names -mcpu=pwr7 < %s | FileCheck %s --check-prefix=CHECK-AIX32
+
+define dso_local i64 @cdtbcd_test(i64 noundef %ll) {
+; CHECK-LABEL: cdtbcd_test:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: cdtbcd r3, r3
+; CHECK-NEXT: clrldi r3, r3, 32
+; CHECK-NEXT: blr
+; CHECK-AIX32-LABEL: cdtbcd_test:
+; CHECK-AIX32: # %bb.0: # %entry
+; CHECK-AIX32-NEXT: li r3, 0
+; CHECK-AIX32-NEXT: cdtbcd r4, r4
+; CHECK-AIX32-NEXT: blr
+entry:
+ %conv = trunc i64 %ll to i32
+ %0 = tail call i32 @llvm.ppc.cdtbcd(i32 %conv)
+ %conv1 = zext i32 %0 to i64
+ ret i64 %conv1
+}
+
+define dso_local zeroext i32 @cdtbcd_test_ui(i32 noundef zeroext %ui) {
+; CHECK-LABEL: cdtbcd_test_ui:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: cdtbcd r3, r3
+; CHECK-NEXT: clrldi r3, r3, 32
+; CHECK-NEXT: blr
+; CHECK-AIX32-LABEL: cdtbcd_test_ui:
+; CHECK-AIX32: # %bb.0: # %entry
+; CHECK-AIX32-NEXT: cdtbcd r3, r3
+; CHECK-AIX32-NEXT: blr
+entry:
+ %0 = tail call i32 @llvm.ppc.cdtbcd(i32 %ui)
+ ret i32 %0
+}
+
+define dso_local i64 @cbcdtd_test(i64 noundef %ll) {
+; CHECK-LABEL: cbcdtd_test:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: cbcdtd r3, r3
+; CHECK-NEXT: clrldi r3, r3, 32
+; CHECK-NEXT: blr
+; CHECK-AIX32-LABEL: cbcdtd_test:
+; CHECK-AIX32: # %bb.0: # %entry
+; CHECK-AIX32-NEXT: li r3, 0
+; CHECK-AIX32-NEXT: cbcdtd r4, r4
+; CHECK-AIX32-NEXT: blr
+entry:
+ %conv = trunc i64 %ll to i32
+ %0 = tail call i32@llvm.ppc.cbcdtd(i32 %conv)
+ %conv1 = zext i32 %0 to i64
+ ret i64 %conv1
+}
+
+define dso_local zeroext i32 @cbcdtd_test_ui(i32 noundef zeroext %ui) {
+; CHECK-LABEL: cbcdtd_test_ui:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: cbcdtd r3, r3
+; CHECK-NEXT: clrldi r3, r3, 32
+; CHECK-NEXT: blr
+; CHECK-AIX32-LABEL: cbcdtd_test_ui:
+; CHECK-AIX32: # %bb.0: # %entry
+; CHECK-AIX32-NEXT: cbcdtd r3, r3
+; CHECK-AIX32-NEXT: blr
+entry:
+ %0 = tail call i32 @llvm.ppc.cbcdtd(i32 %ui)
+ ret i32 %0
+}
+
+define dso_local i64 @addg6s_test(i64 noundef %ll, i64 noundef %ll2) {
+; CHECK-LABEL: addg6s_test:
+; CHECK: bb.0: # %entry
+; CHECK-NEXT: addg6s r3, r3, r4
+; CHECK-NEXT: clrldi r3, r3, 32
+; CHECK-NEXT: blr
+; CHECK-AIX32-LABEL: addg6s_test:
+; CHECK-AIX32: # %bb.0: # %entry
+; CHECK-AIX32-NEXT: li r3, 0
+; CHECK-AIX32-NEXT: addg6s r4, r4, r6
+; CHECK-AIX32-NEXT: blr
+entry:
+ %conv = trunc i64 %ll to i32
+ %conv1 = trunc i64 %ll2 to i32
+ %0 = tail call i32 @llvm.ppc.addg6s(i32 %conv, i32 %conv1)
+ %conv2 = zext i32 %0 to i64
+ ret i64 %conv2
+}
+
+define dso_local zeroext i32 @addg6s_test_ui(i32 noundef zeroext %ui, i32 noundef zeroext %ui2) {
+; CHECK-LABEL: addg6s_test_ui:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: addg6s r3, r3, r4
+; CHECK-NEXT: clrldi r3, r3, 32
+; CHECK-NEXT: blr
+; CHECK-AIX32-LABEL: addg6s_test_ui:
+; CHECK-AIX32: # %bb.0: # %entry
+; CHECK-AIX32-NEXT: addg6s r3, r3, r4
+; CHECK-AIX32-NEXT: blr
+entry:
+ %0 = tail call i32 @llvm.ppc.addg6s(i32 %ui, i32 %ui2)
+ ret i32 %0
+}
+
+declare i32 @llvm.ppc.cdtbcd(i32)
+declare i32 @llvm.ppc.cbcdtd(i32)
+declare i32 @llvm.ppc.addg6s(i32, i32)
diff --git a/llvm/test/CodeGen/PowerPC/builtins-ppc-bcd-assist.ll b/llvm/test/CodeGen/PowerPC/builtins-ppc-bcd-assist.ll
new file mode 100644
index 000000000000..d188f6014f0c
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/builtins-ppc-bcd-assist.ll
@@ -0,0 +1,79 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux \
+; RUN: --ppc-asm-full-reg-names -mcpu=pwr7 < %s | FileCheck %s
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-aix \
+; RUN: --ppc-asm-full-reg-names -mcpu=pwr7 < %s | FileCheck %s
+
+define i64 @cdtbcd_test(i64 noundef %ll) {
+; CHECK-LABEL: cdtbcd_test:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: cdtbcd r3, r3
+; CHECK-NEXT: blr
+entry:
+ %0 = tail call i64 @llvm.ppc.cdtbcdd(i64 %ll)
+ ret i64 %0
+}
+
+define zeroext i32 @cdtbcd_test_ui(i32 noundef zeroext %ui) {
+; CHECK-LABEL: cdtbcd_test_ui:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: cdtbcd r3, r3
+; CHECK-NEXT: clrldi r3, r3, 32
+; CHECK-NEXT: blr
+entry:
+ %conv = zext i32 %ui to i64
+ %0 = tail call i64 @llvm.ppc.cdtbcdd(i64 %conv)
+ %conv1 = trunc i64 %0 to i32
+ ret i32 %conv1
+}
+
+define i64 @cbcdtd_test(i64 noundef %ll) {
+; CHECK-LABEL: cbcdtd_test:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: cbcdtd r3, r3
+; CHECK-NEXT: blr
+entry:
+ %0 = tail call i64 @llvm.ppc.cbcdtdd(i64 %ll)
+ ret i64 %0
+}
+
+define zeroext i32 @cbcdtd_test_ui(i32 noundef zeroext %ui) {
+; CHECK-LABEL: cbcdtd_test_ui:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: cbcdtd r3, r3
+; CHECK-NEXT: clrldi r3, r3, 32
+; CHECK-NEXT: blr
+entry:
+ %conv = zext i32 %ui to i64
+ %0 = tail call i64 @llvm.ppc.cbcdtdd(i64 %conv)
+ %conv1 = trunc i64 %0 to i32
+ ret i32 %conv1
+}
+
+define i64 @addg6s_test(i64 noundef %ll, i64 noundef %ll2) {
+; CHECK-LABEL: addg6s_test:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: addg6s r3, r3, r4
+; CHECK-NEXT: blr
+entry:
+ %0 = tail call i64 @llvm.ppc.addg6sd(i64 %ll, i64 %ll2)
+ ret i64 %0
+}
+
+define zeroext i32 @addg6s_test_ui(i32 noundef zeroext %ui, i32 noundef zeroext %ui2) {
+; CHECK-LABEL: addg6s_test_ui:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: addg6s r3, r3, r4
+; CHECK-NEXT: clrldi r3, r3, 32
+; CHECK-NEXT: blr
+entry:
+ %conv = zext i32 %ui to i64
+ %conv1 = zext i32 %ui2 to i64
+ %0 = tail call i64 @llvm.ppc.addg6sd(i64 %conv, i64 %conv1)
+ %conv2 = trunc i64 %0 to i32
+ ret i32 %conv2
+}
+
+declare i64 @llvm.ppc.cdtbcdd(i64)
+declare i64 @llvm.ppc.cbcdtdd(i64)
+declare i64 @llvm.ppc.addg6sd(i64, i64)
diff --git a/llvm/test/CodeGen/PowerPC/common-chain.ll b/llvm/test/CodeGen/PowerPC/common-chain.ll
index ccf0e4520f46..b71a360d1be1 100644
--- a/llvm/test/CodeGen/PowerPC/common-chain.ll
+++ b/llvm/test/CodeGen/PowerPC/common-chain.ll
@@ -743,219 +743,214 @@ define signext i32 @spill_reduce_succ(ptr %input1, ptr %input2, ptr %output, i64
; CHECK-NEXT: std r9, -184(r1) # 8-byte Folded Spill
; CHECK-NEXT: std r8, -176(r1) # 8-byte Folded Spill
; CHECK-NEXT: std r7, -168(r1) # 8-byte Folded Spill
-; CHECK-NEXT: std r4, -160(r1) # 8-byte Folded Spill
+; CHECK-NEXT: std r3, -160(r1) # 8-byte Folded Spill
; CHECK-NEXT: ble cr0, .LBB7_7
; CHECK-NEXT: # %bb.1: # %for.body.preheader
-; CHECK-NEXT: sldi r4, r6, 2
-; CHECK-NEXT: li r6, 1
-; CHECK-NEXT: mr r0, r10
-; CHECK-NEXT: std r10, -192(r1) # 8-byte Folded Spill
-; CHECK-NEXT: cmpdi r4, 1
-; CHECK-NEXT: iselgt r4, r4, r6
-; CHECK-NEXT: addi r7, r4, -1
-; CHECK-NEXT: clrldi r6, r4, 63
-; CHECK-NEXT: cmpldi r7, 3
+; CHECK-NEXT: sldi r6, r6, 2
+; CHECK-NEXT: li r7, 1
+; CHECK-NEXT: mr r30, r10
+; CHECK-NEXT: cmpdi r6, 1
+; CHECK-NEXT: iselgt r7, r6, r7
+; CHECK-NEXT: addi r8, r7, -1
+; CHECK-NEXT: clrldi r6, r7, 63
+; CHECK-NEXT: cmpldi r8, 3
; CHECK-NEXT: blt cr0, .LBB7_4
; CHECK-NEXT: # %bb.2: # %for.body.preheader.new
-; CHECK-NEXT: ld r0, -192(r1) # 8-byte Folded Reload
-; CHECK-NEXT: ld r30, -184(r1) # 8-byte Folded Reload
-; CHECK-NEXT: ld r8, -176(r1) # 8-byte Folded Reload
-; CHECK-NEXT: rldicl r7, r4, 62, 2
-; CHECK-NEXT: ld r9, -168(r1) # 8-byte Folded Reload
-; CHECK-NEXT: add r11, r0, r30
-; CHECK-NEXT: add r4, r0, r0
-; CHECK-NEXT: mulli r23, r0, 24
-; CHECK-NEXT: add r14, r0, r8
-; CHECK-NEXT: sldi r12, r0, 5
-; CHECK-NEXT: add r31, r0, r9
-; CHECK-NEXT: sldi r9, r9, 3
-; CHECK-NEXT: sldi r18, r0, 4
-; CHECK-NEXT: sldi r8, r8, 3
-; CHECK-NEXT: add r10, r4, r4
-; CHECK-NEXT: sldi r4, r30, 3
-; CHECK-NEXT: sldi r11, r11, 3
-; CHECK-NEXT: add r26, r12, r9
-; CHECK-NEXT: add r16, r18, r9
-; CHECK-NEXT: add r29, r12, r8
-; CHECK-NEXT: add r19, r18, r8
-; CHECK-NEXT: add r30, r12, r4
-; CHECK-NEXT: mr r20, r4
-; CHECK-NEXT: std r4, -200(r1) # 8-byte Folded Spill
-; CHECK-NEXT: ld r4, -160(r1) # 8-byte Folded Reload
-; CHECK-NEXT: add r15, r5, r11
-; CHECK-NEXT: sldi r11, r14, 3
-; CHECK-NEXT: add r29, r5, r29
-; CHECK-NEXT: add r28, r3, r26
-; CHECK-NEXT: add r19, r5, r19
-; CHECK-NEXT: add r21, r23, r9
-; CHECK-NEXT: add r24, r23, r8
-; CHECK-NEXT: add r14, r5, r11
-; CHECK-NEXT: sldi r11, r31, 3
-; CHECK-NEXT: add r25, r23, r20
-; CHECK-NEXT: add r20, r18, r20
-; CHECK-NEXT: add r30, r5, r30
-; CHECK-NEXT: add r18, r3, r16
-; CHECK-NEXT: add r24, r5, r24
-; CHECK-NEXT: add r23, r3, r21
-; CHECK-NEXT: add r27, r4, r26
-; CHECK-NEXT: add r22, r4, r21
-; CHECK-NEXT: add r17, r4, r16
-; CHECK-NEXT: add r2, r4, r11
-; CHECK-NEXT: rldicl r4, r7, 2, 1
-; CHECK-NEXT: sub r7, r8, r9
-; CHECK-NEXT: ld r8, -200(r1) # 8-byte Folded Reload
+; CHECK-NEXT: ld r14, -168(r1) # 8-byte Folded Reload
+; CHECK-NEXT: mulli r24, r30, 24
+; CHECK-NEXT: ld r16, -184(r1) # 8-byte Folded Reload
+; CHECK-NEXT: ld r15, -176(r1) # 8-byte Folded Reload
+; CHECK-NEXT: ld r3, -160(r1) # 8-byte Folded Reload
+; CHECK-NEXT: rldicl r0, r7, 62, 2
+; CHECK-NEXT: sldi r11, r30, 5
+; CHECK-NEXT: sldi r19, r30, 4
+; CHECK-NEXT: sldi r7, r14, 3
+; CHECK-NEXT: add r14, r30, r14
+; CHECK-NEXT: sldi r10, r16, 3
+; CHECK-NEXT: sldi r12, r15, 3
+; CHECK-NEXT: add r16, r30, r16
+; CHECK-NEXT: add r15, r30, r15
+; CHECK-NEXT: add r27, r11, r7
+; CHECK-NEXT: add r22, r24, r7
+; CHECK-NEXT: add r17, r19, r7
+; CHECK-NEXT: sldi r2, r14, 3
+; CHECK-NEXT: add r26, r24, r10
+; CHECK-NEXT: add r25, r24, r12
+; CHECK-NEXT: add r21, r19, r10
+; CHECK-NEXT: add r20, r19, r12
+; CHECK-NEXT: add r8, r11, r10
+; CHECK-NEXT: sldi r16, r16, 3
+; CHECK-NEXT: add r29, r5, r27
+; CHECK-NEXT: add r28, r4, r27
+; CHECK-NEXT: add r27, r3, r27
+; CHECK-NEXT: add r24, r5, r22
+; CHECK-NEXT: add r23, r4, r22
+; CHECK-NEXT: add r22, r3, r22
+; CHECK-NEXT: add r19, r5, r17
+; CHECK-NEXT: add r18, r4, r17
+; CHECK-NEXT: add r17, r3, r17
+; CHECK-NEXT: add r14, r5, r2
+; CHECK-NEXT: add r31, r4, r2
+; CHECK-NEXT: add r2, r3, r2
+; CHECK-NEXT: add r9, r5, r8
+; CHECK-NEXT: add r8, r11, r12
; CHECK-NEXT: add r26, r5, r26
; CHECK-NEXT: add r25, r5, r25
; CHECK-NEXT: add r21, r5, r21
; CHECK-NEXT: add r20, r5, r20
; CHECK-NEXT: add r16, r5, r16
-; CHECK-NEXT: add r31, r5, r11
-; CHECK-NEXT: add r11, r3, r11
-; CHECK-NEXT: addi r4, r4, -4
-; CHECK-NEXT: rldicl r4, r4, 62, 2
-; CHECK-NEXT: sub r8, r8, r9
-; CHECK-NEXT: li r9, 0
-; CHECK-NEXT: addi r4, r4, 1
-; CHECK-NEXT: mtctr r4
+; CHECK-NEXT: add r8, r5, r8
+; CHECK-NEXT: rldicl r3, r0, 2, 1
+; CHECK-NEXT: addi r3, r3, -4
+; CHECK-NEXT: sub r0, r12, r7
+; CHECK-NEXT: sub r12, r10, r7
+; CHECK-NEXT: li r7, 0
+; CHECK-NEXT: mr r10, r30
+; CHECK-NEXT: sldi r15, r15, 3
+; CHECK-NEXT: add r15, r5, r15
+; CHECK-NEXT: rldicl r3, r3, 62, 2
+; CHECK-NEXT: addi r3, r3, 1
+; CHECK-NEXT: mtctr r3
; CHECK-NEXT: .p2align 4
; CHECK-NEXT: .LBB7_3: # %for.body
; CHECK-NEXT: #
-; CHECK-NEXT: lfd f0, 0(r11)
-; CHECK-NEXT: lfd f1, 0(r2)
-; CHECK-NEXT: add r0, r0, r10
-; CHECK-NEXT: xsmuldp f0, f0, f1
+; CHECK-NEXT: lfd f0, 0(r2)
; CHECK-NEXT: lfd f1, 0(r31)
+; CHECK-NEXT: add r3, r10, r30
+; CHECK-NEXT: add r3, r3, r30
+; CHECK-NEXT: xsmuldp f0, f0, f1
+; CHECK-NEXT: lfd f1, 0(r14)
+; CHECK-NEXT: add r3, r3, r30
+; CHECK-NEXT: add r10, r3, r30
; CHECK-NEXT: xsadddp f0, f1, f0
-; CHECK-NEXT: stfd f0, 0(r31)
-; CHECK-NEXT: add r31, r31, r12
-; CHECK-NEXT: lfdx f0, r11, r7
-; CHECK-NEXT: lfdx f1, r2, r7
+; CHECK-NEXT: stfd f0, 0(r14)
+; CHECK-NEXT: add r14, r14, r11
+; CHECK-NEXT: lfdx f0, r2, r0
+; CHECK-NEXT: lfdx f1, r31, r0
; CHECK-NEXT: xsmuldp f0, f0, f1
-; CHECK-NEXT: lfdx f1, r14, r9
+; CHECK-NEXT: lfdx f1, r15, r7
; CHECK-NEXT: xsadddp f0, f1, f0
-; CHECK-NEXT: stfdx f0, r14, r9
-; CHECK-NEXT: lfdx f0, r11, r8
-; CHECK-NEXT: lfdx f1, r2, r8
-; CHECK-NEXT: add r11, r11, r12
-; CHECK-NEXT: add r2, r2, r12
+; CHECK-NEXT: stfdx f0, r15, r7
+; CHECK-NEXT: lfdx f0, r2, r12
+; CHECK-NEXT: lfdx f1, r31, r12
+; CHECK-NEXT: add r2, r2, r11
+; CHECK-NEXT: add r31, r31, r11
; CHECK-NEXT: xsmuldp f0, f0, f1
-; CHECK-NEXT: lfdx f1, r15, r9
+; CHECK-NEXT: lfdx f1, r16, r7
; CHECK-NEXT: xsadddp f0, f1, f0
-; CHECK-NEXT: stfdx f0, r15, r9
-; CHECK-NEXT: lfd f0, 0(r18)
-; CHECK-NEXT: lfd f1, 0(r17)
+; CHECK-NEXT: stfdx f0, r16, r7
+; CHECK-NEXT: lfd f0, 0(r17)
+; CHECK-NEXT: lfd f1, 0(r18)
; CHECK-NEXT: xsmuldp f0, f0, f1
-; CHECK-NEXT: lfdx f1, r16, r9
+; CHECK-NEXT: lfdx f1, r19, r7
; CHECK-NEXT: xsadddp f0, f1, f0
-; CHECK-NEXT: stfdx f0, r16, r9
-; CHECK-NEXT: lfdx f0, r18, r7
-; CHECK-NEXT: lfdx f1, r17, r7
+; CHECK-NEXT: stfdx f0, r19, r7
+; CHECK-NEXT: lfdx f0, r17, r0
+; CHECK-NEXT: lfdx f1, r18, r0
; CHECK-NEXT: xsmuldp f0, f0, f1
-; CHECK-NEXT: lfdx f1, r19, r9
+; CHECK-NEXT: lfdx f1, r20, r7
; CHECK-NEXT: xsadddp f0, f1, f0
-; CHECK-NEXT: stfdx f0, r19, r9
-; CHECK-NEXT: lfdx f0, r18, r8
-; CHECK-NEXT: lfdx f1, r17, r8
-; CHECK-NEXT: add r18, r18, r12
-; CHECK-NEXT: add r17, r17, r12
+; CHECK-NEXT: stfdx f0, r20, r7
+; CHECK-NEXT: lfdx f0, r17, r12
+; CHECK-NEXT: lfdx f1, r18, r12
+; CHECK-NEXT: add r17, r17, r11
+; CHECK-NEXT: add r18, r18, r11
; CHECK-NEXT: xsmuldp f0, f0, f1
-; CHECK-NEXT: lfdx f1, r20, r9
+; CHECK-NEXT: lfdx f1, r21, r7
; CHECK-NEXT: xsadddp f0, f1, f0
-; CHECK-NEXT: stfdx f0, r20, r9
-; CHECK-NEXT: lfd f0, 0(r23)
-; CHECK-NEXT: lfd f1, 0(r22)
+; CHECK-NEXT: stfdx f0, r21, r7
+; CHECK-NEXT: lfd f0, 0(r22)
+; CHECK-NEXT: lfd f1, 0(r23)
; CHECK-NEXT: xsmuldp f0, f0, f1
-; CHECK-NEXT: lfdx f1, r21, r9
+; CHECK-NEXT: lfdx f1, r24, r7
; CHECK-NEXT: xsadddp f0, f1, f0
-; CHECK-NEXT: stfdx f0, r21, r9
-; CHECK-NEXT: lfdx f0, r23, r7
-; CHECK-NEXT: lfdx f1, r22, r7
+; CHECK-NEXT: stfdx f0, r24, r7
+; CHECK-NEXT: lfdx f0, r22, r0
+; CHECK-NEXT: lfdx f1, r23, r0
; CHECK-NEXT: xsmuldp f0, f0, f1
-; CHECK-NEXT: lfdx f1, r24, r9
+; CHECK-NEXT: lfdx f1, r25, r7
; CHECK-NEXT: xsadddp f0, f1, f0
-; CHECK-NEXT: stfdx f0, r24, r9
-; CHECK-NEXT: lfdx f0, r23, r8
-; CHECK-NEXT: lfdx f1, r22, r8
-; CHECK-NEXT: add r23, r23, r12
-; CHECK-NEXT: add r22, r22, r12
+; CHECK-NEXT: stfdx f0, r25, r7
+; CHECK-NEXT: lfdx f0, r22, r12
+; CHECK-NEXT: lfdx f1, r23, r12
+; CHECK-NEXT: add r22, r22, r11
+; CHECK-NEXT: add r23, r23, r11
; CHECK-NEXT: xsmuldp f0, f0, f1
-; CHECK-NEXT: lfdx f1, r25, r9
+; CHECK-NEXT: lfdx f1, r26, r7
; CHECK-NEXT: xsadddp f0, f1, f0
-; CHECK-NEXT: stfdx f0, r25, r9
-; CHECK-NEXT: lfd f0, 0(r28)
-; CHECK-NEXT: lfd f1, 0(r27)
+; CHECK-NEXT: stfdx f0, r26, r7
+; CHECK-NEXT: lfd f0, 0(r27)
+; CHECK-NEXT: lfd f1, 0(r28)
; CHECK-NEXT: xsmuldp f0, f0, f1
-; CHECK-NEXT: lfdx f1, r26, r9
+; CHECK-NEXT: lfdx f1, r29, r7
; CHECK-NEXT: xsadddp f0, f1, f0
-; CHECK-NEXT: stfdx f0, r26, r9
-; CHECK-NEXT: lfdx f0, r28, r7
-; CHECK-NEXT: lfdx f1, r27, r7
+; CHECK-NEXT: stfdx f0, r29, r7
+; CHECK-NEXT: lfdx f0, r27, r0
+; CHECK-NEXT: lfdx f1, r28, r0
; CHECK-NEXT: xsmuldp f0, f0, f1
-; CHECK-NEXT: lfdx f1, r29, r9
+; CHECK-NEXT: lfdx f1, r8, r7
; CHECK-NEXT: xsadddp f0, f1, f0
-; CHECK-NEXT: stfdx f0, r29, r9
-; CHECK-NEXT: lfdx f0, r28, r8
-; CHECK-NEXT: lfdx f1, r27, r8
-; CHECK-NEXT: add r28, r28, r12
-; CHECK-NEXT: add r27, r27, r12
+; CHECK-NEXT: stfdx f0, r8, r7
+; CHECK-NEXT: lfdx f0, r27, r12
+; CHECK-NEXT: lfdx f1, r28, r12
+; CHECK-NEXT: add r27, r27, r11
+; CHECK-NEXT: add r28, r28, r11
; CHECK-NEXT: xsmuldp f0, f0, f1
-; CHECK-NEXT: lfdx f1, r30, r9
+; CHECK-NEXT: lfdx f1, r9, r7
; CHECK-NEXT: xsadddp f0, f1, f0
-; CHECK-NEXT: stfdx f0, r30, r9
-; CHECK-NEXT: add r9, r9, r12
+; CHECK-NEXT: stfdx f0, r9, r7
+; CHECK-NEXT: add r7, r7, r11
; CHECK-NEXT: bdnz .LBB7_3
; CHECK-NEXT: .LBB7_4: # %for.cond.cleanup.loopexit.unr-lcssa
-; CHECK-NEXT: ld r7, -192(r1) # 8-byte Folded Reload
; CHECK-NEXT: cmpldi r6, 0
; CHECK-NEXT: beq cr0, .LBB7_7
; CHECK-NEXT: # %bb.5: # %for.body.epil.preheader
-; CHECK-NEXT: ld r4, -184(r1) # 8-byte Folded Reload
-; CHECK-NEXT: ld r29, -160(r1) # 8-byte Folded Reload
-; CHECK-NEXT: mr r30, r3
-; CHECK-NEXT: sldi r7, r7, 3
-; CHECK-NEXT: add r4, r0, r4
-; CHECK-NEXT: sldi r4, r4, 3
-; CHECK-NEXT: add r3, r5, r4
-; CHECK-NEXT: add r8, r29, r4
-; CHECK-NEXT: add r9, r30, r4
-; CHECK-NEXT: ld r4, -176(r1) # 8-byte Folded Reload
-; CHECK-NEXT: add r4, r0, r4
-; CHECK-NEXT: sldi r4, r4, 3
-; CHECK-NEXT: add r10, r5, r4
-; CHECK-NEXT: add r11, r29, r4
-; CHECK-NEXT: add r12, r30, r4
-; CHECK-NEXT: ld r4, -168(r1) # 8-byte Folded Reload
-; CHECK-NEXT: add r4, r0, r4
-; CHECK-NEXT: sldi r0, r4, 3
-; CHECK-NEXT: add r5, r5, r0
-; CHECK-NEXT: add r4, r29, r0
-; CHECK-NEXT: add r30, r30, r0
-; CHECK-NEXT: li r0, 0
+; CHECK-NEXT: ld r3, -184(r1) # 8-byte Folded Reload
+; CHECK-NEXT: ld r0, -160(r1) # 8-byte Folded Reload
+; CHECK-NEXT: sldi r8, r30, 3
+; CHECK-NEXT: add r3, r10, r3
+; CHECK-NEXT: sldi r3, r3, 3
+; CHECK-NEXT: add r7, r5, r3
+; CHECK-NEXT: add r9, r4, r3
+; CHECK-NEXT: add r11, r0, r3
+; CHECK-NEXT: ld r3, -176(r1) # 8-byte Folded Reload
+; CHECK-NEXT: add r3, r10, r3
+; CHECK-NEXT: sldi r3, r3, 3
+; CHECK-NEXT: add r12, r5, r3
+; CHECK-NEXT: add r30, r4, r3
+; CHECK-NEXT: add r29, r0, r3
+; CHECK-NEXT: ld r3, -168(r1) # 8-byte Folded Reload
+; CHECK-NEXT: add r3, r10, r3
+; CHECK-NEXT: li r10, 0
+; CHECK-NEXT: sldi r3, r3, 3
+; CHECK-NEXT: add r5, r5, r3
+; CHECK-NEXT: add r4, r4, r3
+; CHECK-NEXT: add r3, r0, r3
; CHECK-NEXT: .p2align 4
; CHECK-NEXT: .LBB7_6: # %for.body.epil
; CHECK-NEXT: #
-; CHECK-NEXT: lfdx f0, r30, r0
-; CHECK-NEXT: lfdx f1, r4, r0
+; CHECK-NEXT: lfdx f0, r3, r10
+; CHECK-NEXT: lfdx f1, r4, r10
; CHECK-NEXT: addi r6, r6, -1
; CHECK-NEXT: cmpldi r6, 0
; CHECK-NEXT: xsmuldp f0, f0, f1
; CHECK-NEXT: lfd f1, 0(r5)
; CHECK-NEXT: xsadddp f0, f1, f0
; CHECK-NEXT: stfd f0, 0(r5)
-; CHECK-NEXT: add r5, r5, r7
-; CHECK-NEXT: lfdx f0, r12, r0
-; CHECK-NEXT: lfdx f1, r11, r0
+; CHECK-NEXT: add r5, r5, r8
+; CHECK-NEXT: lfdx f0, r29, r10
+; CHECK-NEXT: lfdx f1, r30, r10
; CHECK-NEXT: xsmuldp f0, f0, f1
-; CHECK-NEXT: lfdx f1, r10, r0
+; CHECK-NEXT: lfdx f1, r12, r10
; CHECK-NEXT: xsadddp f0, f1, f0
-; CHECK-NEXT: stfdx f0, r10, r0
-; CHECK-NEXT: lfdx f0, r9, r0
-; CHECK-NEXT: lfdx f1, r8, r0
+; CHECK-NEXT: stfdx f0, r12, r10
+; CHECK-NEXT: lfdx f0, r11, r10
+; CHECK-NEXT: lfdx f1, r9, r10
; CHECK-NEXT: xsmuldp f0, f0, f1
-; CHECK-NEXT: lfdx f1, r3, r0
+; CHECK-NEXT: lfdx f1, r7, r10
; CHECK-NEXT: xsadddp f0, f1, f0
-; CHECK-NEXT: stfdx f0, r3, r0
-; CHECK-NEXT: add r0, r0, r7
+; CHECK-NEXT: stfdx f0, r7, r10
+; CHECK-NEXT: add r10, r10, r8
; CHECK-NEXT: bne cr0, .LBB7_6
; CHECK-NEXT: .LBB7_7: # %for.cond.cleanup
; CHECK-NEXT: ld r2, -152(r1) # 8-byte Folded Reload
diff --git a/llvm/test/CodeGen/RISCV/O0-pipeline.ll b/llvm/test/CodeGen/RISCV/O0-pipeline.ll
index fd2ba49ea861..9be03d557bd8 100644
--- a/llvm/test/CodeGen/RISCV/O0-pipeline.ll
+++ b/llvm/test/CodeGen/RISCV/O0-pipeline.ll
@@ -26,7 +26,6 @@
; CHECK-NEXT: Lower Garbage Collection Instructions
; CHECK-NEXT: Shadow Stack GC Lowering
; CHECK-NEXT: Remove unreachable blocks from the CFG
-; CHECK-NEXT: Expand vector predication intrinsics
; CHECK-NEXT: Instrument function entry/exit with calls to e.g. mcount() (post inlining)
; CHECK-NEXT: Scalarize Masked Memory Intrinsics
; CHECK-NEXT: Expand reduction intrinsics
@@ -69,6 +68,7 @@
; CHECK-NEXT: Lazy Machine Block Frequency Analysis
; CHECK-NEXT: Machine Optimization Remark Emitter
; CHECK-NEXT: Stack Frame Layout Analysis
+; CHECK-NEXT: RISC-V Indirect Branch Tracking
; CHECK-NEXT: RISC-V pseudo instruction expansion pass
; CHECK-NEXT: RISC-V atomic pseudo instruction expansion pass
; CHECK-NEXT: Unpack machine instruction bundles
diff --git a/llvm/test/CodeGen/RISCV/O3-pipeline.ll b/llvm/test/CodeGen/RISCV/O3-pipeline.ll
index d6d0cca6ddae..7bad290bf313 100644
--- a/llvm/test/CodeGen/RISCV/O3-pipeline.ll
+++ b/llvm/test/CodeGen/RISCV/O3-pipeline.ll
@@ -62,7 +62,6 @@
; CHECK-NEXT: Constant Hoisting
; CHECK-NEXT: Replace intrinsics with calls to vector library
; CHECK-NEXT: Partially inline calls to library functions
-; CHECK-NEXT: Expand vector predication intrinsics
; CHECK-NEXT: Instrument function entry/exit with calls to e.g. mcount() (post inlining)
; CHECK-NEXT: Scalarize Masked Memory Intrinsics
; CHECK-NEXT: Expand reduction intrinsics
@@ -195,6 +194,7 @@
; CHECK-NEXT: Stack Frame Layout Analysis
; CHECK-NEXT: RISC-V Zcmp move merging pass
; CHECK-NEXT: RISC-V Zcmp Push/Pop optimization pass
+; CHECK-NEXT: RISC-V Indirect Branch Tracking
; CHECK-NEXT: RISC-V pseudo instruction expansion pass
; CHECK-NEXT: RISC-V atomic pseudo instruction expansion pass
; CHECK-NEXT: Unpack machine instruction bundles
diff --git a/llvm/test/CodeGen/RISCV/jumptable-swguarded.ll b/llvm/test/CodeGen/RISCV/jumptable-swguarded.ll
index 9d57ca74cd78..0e87d8d6f82f 100644
--- a/llvm/test/CodeGen/RISCV/jumptable-swguarded.ll
+++ b/llvm/test/CodeGen/RISCV/jumptable-swguarded.ll
@@ -8,6 +8,7 @@
define void @above_threshold(i32 signext %in, ptr %out) nounwind {
; CHECK-LABEL: above_threshold:
; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: lpad 0
; CHECK-NEXT: addi a0, a0, -1
; CHECK-NEXT: li a2, 5
; CHECK-NEXT: bltu a2, a0, .LBB0_9
diff --git a/llvm/test/CodeGen/RISCV/lpad.ll b/llvm/test/CodeGen/RISCV/lpad.ll
new file mode 100644
index 000000000000..de82a9ee4e34
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/lpad.ll
@@ -0,0 +1,101 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple riscv32 -mattr=+experimental-zicfilp < %s | FileCheck %s --check-prefixes=CHECK,RV32
+; RUN: llc -mtriple riscv64 -mattr=+experimental-zicfilp < %s | FileCheck %s --check-prefixes=CHECK,RV64
+
+; Check indirectbr.
+@__const.indirctbr.addr = private unnamed_addr constant [2 x ptr] [ptr blockaddress(@indirctbr, %labelA), ptr blockaddress(@indirctbr, %labelB)], align 8
+define void @indirctbr(i32 %i, ptr %p) {
+; RV32-LABEL: indirctbr:
+; RV32: # %bb.0: # %entry
+; RV32-NEXT: lpad 0
+; RV32-NEXT: slli a0, a0, 2
+; RV32-NEXT: lui a2, %hi(.L__const.indirctbr.addr)
+; RV32-NEXT: addi a2, a2, %lo(.L__const.indirctbr.addr)
+; RV32-NEXT: add a0, a2, a0
+; RV32-NEXT: lw a0, 0(a0)
+; RV32-NEXT: jr a0
+; RV32-NEXT: .p2align 2
+; RV32-NEXT: .Ltmp0: # Block address taken
+; RV32-NEXT: .LBB0_1: # %labelA
+; RV32-NEXT: lpad 0
+; RV32-NEXT: li a0, 1
+; RV32-NEXT: sw a0, 0(a1)
+; RV32-NEXT: .p2align 2
+; RV32-NEXT: .Ltmp1: # Block address taken
+; RV32-NEXT: .LBB0_2: # %labelB
+; RV32-NEXT: lpad 0
+; RV32-NEXT: li a0, 2
+; RV32-NEXT: sw a0, 0(a1)
+; RV32-NEXT: ret
+;
+; RV64-LABEL: indirctbr:
+; RV64: # %bb.0: # %entry
+; RV64-NEXT: lpad 0
+; RV64-NEXT: sext.w a0, a0
+; RV64-NEXT: slli a0, a0, 3
+; RV64-NEXT: lui a2, %hi(.L__const.indirctbr.addr)
+; RV64-NEXT: addi a2, a2, %lo(.L__const.indirctbr.addr)
+; RV64-NEXT: add a0, a2, a0
+; RV64-NEXT: ld a0, 0(a0)
+; RV64-NEXT: jr a0
+; RV64-NEXT: .p2align 2
+; RV64-NEXT: .Ltmp0: # Block address taken
+; RV64-NEXT: .LBB0_1: # %labelA
+; RV64-NEXT: lpad 0
+; RV64-NEXT: li a0, 1
+; RV64-NEXT: sw a0, 0(a1)
+; RV64-NEXT: .p2align 2
+; RV64-NEXT: .Ltmp1: # Block address taken
+; RV64-NEXT: .LBB0_2: # %labelB
+; RV64-NEXT: lpad 0
+; RV64-NEXT: li a0, 2
+; RV64-NEXT: sw a0, 0(a1)
+; RV64-NEXT: ret
+entry:
+ %arrayidx = getelementptr inbounds [2 x ptr], ptr @__const.indirctbr.addr, i64 0, i32 %i
+ %0 = load ptr, ptr %arrayidx
+ indirectbr ptr %0, [label %labelA, label %labelB]
+
+labelA: ; preds = %entry
+ store volatile i32 1, ptr %p
+ br label %labelB
+
+labelB: ; preds = %labelA, %entry
+ store volatile i32 2, ptr %p
+ ret void
+}
+
+; Check external linkage function.
+define void @external() {
+; CHECK-LABEL: external:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lpad 0
+; CHECK-NEXT: ret
+ ret void
+}
+
+; Check internal linkage function.
+define internal void @internal() {
+; CHECK-LABEL: internal:
+; CHECK: # %bb.0:
+; CHECK-NEXT: ret
+ ret void
+}
+
+; Check internal linkage function with taken address.
+@foo = constant ptr @internal2
+define internal void @internal2() {
+; CHECK-LABEL: internal2:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lpad 0
+; CHECK-NEXT: ret
+ ret void
+}
+
+; Check interrupt function does not need landing pad.
+define void @interrupt() "interrupt"="user" {
+; CHECK-LABEL: interrupt:
+; CHECK: # %bb.0:
+; CHECK-NEXT: mret
+ ret void
+}
diff --git a/llvm/test/CodeGen/RISCV/rvv/vcompress.ll b/llvm/test/CodeGen/RISCV/rvv/vcompress.ll
index 85663f08db6a..b763e116a9f6 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vcompress.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vcompress.ll
@@ -1,7 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+zfh,+zvfh \
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+zfh,+zvfhmin,+zvfbfmin \
; RUN: -verify-machineinstrs | FileCheck %s
-; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+zfh,+zvfh \
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+zfh,+zvfhmin,+zvfbfmin \
; RUN: -verify-machineinstrs | FileCheck %s
declare <vscale x 1 x i8> @llvm.riscv.vcompress.nxv1i8(
@@ -817,3 +817,136 @@ entry:
ret <vscale x 8 x double> %a
}
+
+declare <vscale x 1 x bfloat> @llvm.riscv.vcompress.nxv1bf16(
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x i1>,
+ iXLen);
+
+define <vscale x 1 x bfloat> @intrinsic_vcompress_vm_nxv1bf16(<vscale x 1 x bfloat> %0, <vscale x 1 x bfloat> %1, <vscale x 1 x i1> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vcompress_vm_nxv1bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, ma
+; CHECK-NEXT: vcompress.vm v8, v9, v0
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x bfloat> @llvm.riscv.vcompress.nxv1bf16(
+ <vscale x 1 x bfloat> %0,
+ <vscale x 1 x bfloat> %1,
+ <vscale x 1 x i1> %2,
+ iXLen %3)
+
+ ret <vscale x 1 x bfloat> %a
+}
+
+declare <vscale x 2 x bfloat> @llvm.riscv.vcompress.nxv2bf16(
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x i1>,
+ iXLen);
+
+define <vscale x 2 x bfloat> @intrinsic_vcompress_vm_nxv2bf16(<vscale x 2 x bfloat> %0, <vscale x 2 x bfloat> %1, <vscale x 2 x i1> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vcompress_vm_nxv2bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, ma
+; CHECK-NEXT: vcompress.vm v8, v9, v0
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x bfloat> @llvm.riscv.vcompress.nxv2bf16(
+ <vscale x 2 x bfloat> %0,
+ <vscale x 2 x bfloat> %1,
+ <vscale x 2 x i1> %2,
+ iXLen %3)
+
+ ret <vscale x 2 x bfloat> %a
+}
+
+declare <vscale x 4 x bfloat> @llvm.riscv.vcompress.nxv4bf16(
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x i1>,
+ iXLen);
+
+define <vscale x 4 x bfloat> @intrinsic_vcompress_vm_nxv4bf16(<vscale x 4 x bfloat> %0, <vscale x 4 x bfloat> %1, <vscale x 4 x i1> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vcompress_vm_nxv4bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, ma
+; CHECK-NEXT: vcompress.vm v8, v9, v0
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x bfloat> @llvm.riscv.vcompress.nxv4bf16(
+ <vscale x 4 x bfloat> %0,
+ <vscale x 4 x bfloat> %1,
+ <vscale x 4 x i1> %2,
+ iXLen %3)
+
+ ret <vscale x 4 x bfloat> %a
+}
+
+declare <vscale x 8 x bfloat> @llvm.riscv.vcompress.nxv8bf16(
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x i1>,
+ iXLen);
+
+define <vscale x 8 x bfloat> @intrinsic_vcompress_vm_nxv8bf16(<vscale x 8 x bfloat> %0, <vscale x 8 x bfloat> %1, <vscale x 8 x i1> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vcompress_vm_nxv8bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16, m2, tu, ma
+; CHECK-NEXT: vcompress.vm v8, v10, v0
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x bfloat> @llvm.riscv.vcompress.nxv8bf16(
+ <vscale x 8 x bfloat> %0,
+ <vscale x 8 x bfloat> %1,
+ <vscale x 8 x i1> %2,
+ iXLen %3)
+
+ ret <vscale x 8 x bfloat> %a
+}
+
+declare <vscale x 16 x bfloat> @llvm.riscv.vcompress.nxv16bf16(
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x i1>,
+ iXLen);
+
+define <vscale x 16 x bfloat> @intrinsic_vcompress_vm_nxv16bf16(<vscale x 16 x bfloat> %0, <vscale x 16 x bfloat> %1, <vscale x 16 x i1> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vcompress_vm_nxv16bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16, m4, tu, ma
+; CHECK-NEXT: vcompress.vm v8, v12, v0
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x bfloat> @llvm.riscv.vcompress.nxv16bf16(
+ <vscale x 16 x bfloat> %0,
+ <vscale x 16 x bfloat> %1,
+ <vscale x 16 x i1> %2,
+ iXLen %3)
+
+ ret <vscale x 16 x bfloat> %a
+}
+
+declare <vscale x 32 x bfloat> @llvm.riscv.vcompress.nxv32bf16(
+ <vscale x 32 x bfloat>,
+ <vscale x 32 x bfloat>,
+ <vscale x 32 x i1>,
+ iXLen);
+
+define <vscale x 32 x bfloat> @intrinsic_vcompress_vm_nxv32bf16(<vscale x 32 x bfloat> %0, <vscale x 32 x bfloat> %1, <vscale x 32 x i1> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vcompress_vm_nxv32bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16, m8, tu, ma
+; CHECK-NEXT: vcompress.vm v8, v16, v0
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 32 x bfloat> @llvm.riscv.vcompress.nxv32bf16(
+ <vscale x 32 x bfloat> %0,
+ <vscale x 32 x bfloat> %1,
+ <vscale x 32 x i1> %2,
+ iXLen %3)
+
+ ret <vscale x 32 x bfloat> %a
+}
+
diff --git a/llvm/test/CodeGen/RISCV/rvv/vp-select.ll b/llvm/test/CodeGen/RISCV/rvv/vp-select.ll
new file mode 100644
index 000000000000..c8a048971a80
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/vp-select.ll
@@ -0,0 +1,19 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc < %s -mtriple=riscv64 -mattr=+v -verify-machineinstrs | FileCheck %s
+
+define <vscale x 1 x i64> @all_ones(<vscale x 1 x i64> %true, <vscale x 1 x i64> %false, i32 %evl) {
+; CHECK-LABEL: all_ones:
+; CHECK: # %bb.0:
+; CHECK-NEXT: ret
+ %v = call <vscale x 1 x i64> @llvm.vp.select.nxv1i64(<vscale x 1 x i1> splat (i1 true), <vscale x 1 x i64> %true, <vscale x 1 x i64> %false, i32 %evl)
+ ret <vscale x 1 x i64> %v
+}
+
+define <vscale x 1 x i64> @all_zeroes(<vscale x 1 x i64> %true, <vscale x 1 x i64> %false, i32 %evl) {
+; CHECK-LABEL: all_zeroes:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vmv1r.v v8, v9
+; CHECK-NEXT: ret
+ %v = call <vscale x 1 x i64> @llvm.vp.select.nxv1i64(<vscale x 1 x i1> splat (i1 false), <vscale x 1 x i64> %true, <vscale x 1 x i64> %false, i32 %evl)
+ ret <vscale x 1 x i64> %v
+}
diff --git a/llvm/test/CodeGen/RISCV/rvv/vrgather.ll b/llvm/test/CodeGen/RISCV/rvv/vrgather.ll
index d11e172b2503..5d700e683a96 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vrgather.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vrgather.ll
@@ -1,7 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+d,+zfh,+zvfh \
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+d,+zfhmin,+zvfh,+zvfbfmin \
; RUN: -verify-machineinstrs | FileCheck %s
-; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+d,+zfh,+zvfh \
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+d,+zfhmin,+zvfh,+zvfbfmin \
; RUN: -verify-machineinstrs | FileCheck %s
declare <vscale x 1 x i8> @llvm.riscv.vrgather.vv.nxv1i8.iXLen(
@@ -4820,3 +4820,785 @@ entry:
ret <vscale x 8 x double> %a
}
+
+declare <vscale x 1 x bfloat> @llvm.riscv.vrgather.vv.nxv1bf16.iXLen(
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x i16>,
+ iXLen)
+
+define <vscale x 1 x bfloat> @intrinsic_vrgather_vv_nxv1bf16_nxv1bf16_nxv1i16(<vscale x 1 x bfloat> %0, <vscale x 1 x i16> %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vrgather_vv_nxv1bf16_nxv1bf16_nxv1i16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
+; CHECK-NEXT: vrgather.vv v10, v8, v9
+; CHECK-NEXT: vmv1r.v v8, v10
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x bfloat> @llvm.riscv.vrgather.vv.nxv1bf16.iXLen(
+ <vscale x 1 x bfloat> undef,
+ <vscale x 1 x bfloat> %0,
+ <vscale x 1 x i16> %1,
+ iXLen %2)
+
+ ret <vscale x 1 x bfloat> %a
+}
+
+declare <vscale x 1 x bfloat> @llvm.riscv.vrgather.vv.mask.nxv1bf16.iXLen(
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x i16>,
+ <vscale x 1 x i1>,
+ iXLen,
+ iXLen)
+
+define <vscale x 1 x bfloat> @intrinsic_vrgather_mask_vv_nxv1bf16_nxv1bf16_nxv1i16(<vscale x 1 x bfloat> %0, <vscale x 1 x bfloat> %1, <vscale x 1 x i16> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vrgather_mask_vv_nxv1bf16_nxv1bf16_nxv1i16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu
+; CHECK-NEXT: vrgather.vv v8, v9, v10, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x bfloat> @llvm.riscv.vrgather.vv.mask.nxv1bf16.iXLen(
+ <vscale x 1 x bfloat> %0,
+ <vscale x 1 x bfloat> %1,
+ <vscale x 1 x i16> %2,
+ <vscale x 1 x i1> %3,
+ iXLen %4, iXLen 1)
+
+ ret <vscale x 1 x bfloat> %a
+}
+
+declare <vscale x 2 x bfloat> @llvm.riscv.vrgather.vv.nxv2bf16.iXLen(
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x i16>,
+ iXLen)
+
+define <vscale x 2 x bfloat> @intrinsic_vrgather_vv_nxv2bf16_nxv2bf16_nxv2i16(<vscale x 2 x bfloat> %0, <vscale x 2 x i16> %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vrgather_vv_nxv2bf16_nxv2bf16_nxv2i16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
+; CHECK-NEXT: vrgather.vv v10, v8, v9
+; CHECK-NEXT: vmv1r.v v8, v10
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x bfloat> @llvm.riscv.vrgather.vv.nxv2bf16.iXLen(
+ <vscale x 2 x bfloat> undef,
+ <vscale x 2 x bfloat> %0,
+ <vscale x 2 x i16> %1,
+ iXLen %2)
+
+ ret <vscale x 2 x bfloat> %a
+}
+
+declare <vscale x 2 x bfloat> @llvm.riscv.vrgather.vv.mask.nxv2bf16.iXLen(
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x i16>,
+ <vscale x 2 x i1>,
+ iXLen,
+ iXLen)
+
+define <vscale x 2 x bfloat> @intrinsic_vrgather_mask_vv_nxv2bf16_nxv2bf16_nxv2i16(<vscale x 2 x bfloat> %0, <vscale x 2 x bfloat> %1, <vscale x 2 x i16> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vrgather_mask_vv_nxv2bf16_nxv2bf16_nxv2i16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu
+; CHECK-NEXT: vrgather.vv v8, v9, v10, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x bfloat> @llvm.riscv.vrgather.vv.mask.nxv2bf16.iXLen(
+ <vscale x 2 x bfloat> %0,
+ <vscale x 2 x bfloat> %1,
+ <vscale x 2 x i16> %2,
+ <vscale x 2 x i1> %3,
+ iXLen %4, iXLen 1)
+
+ ret <vscale x 2 x bfloat> %a
+}
+
+declare <vscale x 4 x bfloat> @llvm.riscv.vrgather.vv.nxv4bf16.iXLen(
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x i16>,
+ iXLen)
+
+define <vscale x 4 x bfloat> @intrinsic_vrgather_vv_nxv4bf16_nxv4bf16_nxv4i16(<vscale x 4 x bfloat> %0, <vscale x 4 x i16> %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vrgather_vv_nxv4bf16_nxv4bf16_nxv4i16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; CHECK-NEXT: vrgather.vv v10, v8, v9
+; CHECK-NEXT: vmv.v.v v8, v10
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x bfloat> @llvm.riscv.vrgather.vv.nxv4bf16.iXLen(
+ <vscale x 4 x bfloat> undef,
+ <vscale x 4 x bfloat> %0,
+ <vscale x 4 x i16> %1,
+ iXLen %2)
+
+ ret <vscale x 4 x bfloat> %a
+}
+
+declare <vscale x 4 x bfloat> @llvm.riscv.vrgather.vv.mask.nxv4bf16.iXLen(
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x i16>,
+ <vscale x 4 x i1>,
+ iXLen,
+ iXLen)
+
+define <vscale x 4 x bfloat> @intrinsic_vrgather_mask_vv_nxv4bf16_nxv4bf16_nxv4i16(<vscale x 4 x bfloat> %0, <vscale x 4 x bfloat> %1, <vscale x 4 x i16> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vrgather_mask_vv_nxv4bf16_nxv4bf16_nxv4i16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu
+; CHECK-NEXT: vrgather.vv v8, v9, v10, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x bfloat> @llvm.riscv.vrgather.vv.mask.nxv4bf16.iXLen(
+ <vscale x 4 x bfloat> %0,
+ <vscale x 4 x bfloat> %1,
+ <vscale x 4 x i16> %2,
+ <vscale x 4 x i1> %3,
+ iXLen %4, iXLen 1)
+
+ ret <vscale x 4 x bfloat> %a
+}
+
+declare <vscale x 8 x bfloat> @llvm.riscv.vrgather.vv.nxv8bf16.iXLen(
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x i16>,
+ iXLen)
+
+define <vscale x 8 x bfloat> @intrinsic_vrgather_vv_nxv8bf16_nxv8bf16_nxv8i16(<vscale x 8 x bfloat> %0, <vscale x 8 x i16> %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vrgather_vv_nxv8bf16_nxv8bf16_nxv8i16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma
+; CHECK-NEXT: vrgather.vv v12, v8, v10
+; CHECK-NEXT: vmv.v.v v8, v12
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x bfloat> @llvm.riscv.vrgather.vv.nxv8bf16.iXLen(
+ <vscale x 8 x bfloat> undef,
+ <vscale x 8 x bfloat> %0,
+ <vscale x 8 x i16> %1,
+ iXLen %2)
+
+ ret <vscale x 8 x bfloat> %a
+}
+
+declare <vscale x 8 x bfloat> @llvm.riscv.vrgather.vv.mask.nxv8bf16.iXLen(
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x i16>,
+ <vscale x 8 x i1>,
+ iXLen,
+ iXLen)
+
+define <vscale x 8 x bfloat> @intrinsic_vrgather_mask_vv_nxv8bf16_nxv8bf16_nxv8i16(<vscale x 8 x bfloat> %0, <vscale x 8 x bfloat> %1, <vscale x 8 x i16> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vrgather_mask_vv_nxv8bf16_nxv8bf16_nxv8i16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu
+; CHECK-NEXT: vrgather.vv v8, v10, v12, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x bfloat> @llvm.riscv.vrgather.vv.mask.nxv8bf16.iXLen(
+ <vscale x 8 x bfloat> %0,
+ <vscale x 8 x bfloat> %1,
+ <vscale x 8 x i16> %2,
+ <vscale x 8 x i1> %3,
+ iXLen %4, iXLen 1)
+
+ ret <vscale x 8 x bfloat> %a
+}
+
+declare <vscale x 16 x bfloat> @llvm.riscv.vrgather.vv.nxv16bf16.iXLen(
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x i16>,
+ iXLen)
+
+define <vscale x 16 x bfloat> @intrinsic_vrgather_vv_nxv16bf16_nxv16bf16_nxv16i16(<vscale x 16 x bfloat> %0, <vscale x 16 x i16> %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vrgather_vv_nxv16bf16_nxv16bf16_nxv16i16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma
+; CHECK-NEXT: vrgather.vv v16, v8, v12
+; CHECK-NEXT: vmv.v.v v8, v16
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x bfloat> @llvm.riscv.vrgather.vv.nxv16bf16.iXLen(
+ <vscale x 16 x bfloat> undef,
+ <vscale x 16 x bfloat> %0,
+ <vscale x 16 x i16> %1,
+ iXLen %2)
+
+ ret <vscale x 16 x bfloat> %a
+}
+
+declare <vscale x 16 x bfloat> @llvm.riscv.vrgather.vv.mask.nxv16bf16.iXLen(
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x i16>,
+ <vscale x 16 x i1>,
+ iXLen,
+ iXLen)
+
+define <vscale x 16 x bfloat> @intrinsic_vrgather_mask_vv_nxv16bf16_nxv16bf16_nxv16i16(<vscale x 16 x bfloat> %0, <vscale x 16 x bfloat> %1, <vscale x 16 x i16> %2, <vscale x 16 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vrgather_mask_vv_nxv16bf16_nxv16bf16_nxv16i16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu
+; CHECK-NEXT: vrgather.vv v8, v12, v16, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x bfloat> @llvm.riscv.vrgather.vv.mask.nxv16bf16.iXLen(
+ <vscale x 16 x bfloat> %0,
+ <vscale x 16 x bfloat> %1,
+ <vscale x 16 x i16> %2,
+ <vscale x 16 x i1> %3,
+ iXLen %4, iXLen 1)
+
+ ret <vscale x 16 x bfloat> %a
+}
+
+declare <vscale x 32 x bfloat> @llvm.riscv.vrgather.vv.nxv32bf16.iXLen(
+ <vscale x 32 x bfloat>,
+ <vscale x 32 x bfloat>,
+ <vscale x 32 x i16>,
+ iXLen)
+
+define <vscale x 32 x bfloat> @intrinsic_vrgather_vv_nxv32bf16_nxv32bf16_nxv32i16(<vscale x 32 x bfloat> %0, <vscale x 32 x i16> %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vrgather_vv_nxv32bf16_nxv32bf16_nxv32i16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma
+; CHECK-NEXT: vrgather.vv v24, v8, v16
+; CHECK-NEXT: vmv.v.v v8, v24
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 32 x bfloat> @llvm.riscv.vrgather.vv.nxv32bf16.iXLen(
+ <vscale x 32 x bfloat> undef,
+ <vscale x 32 x bfloat> %0,
+ <vscale x 32 x i16> %1,
+ iXLen %2)
+
+ ret <vscale x 32 x bfloat> %a
+}
+
+declare <vscale x 32 x bfloat> @llvm.riscv.vrgather.vv.mask.nxv32bf16.iXLen(
+ <vscale x 32 x bfloat>,
+ <vscale x 32 x bfloat>,
+ <vscale x 32 x i16>,
+ <vscale x 32 x i1>,
+ iXLen,
+ iXLen)
+
+define <vscale x 32 x bfloat> @intrinsic_vrgather_mask_vv_nxv32bf16_nxv32bf16_nxv32i16(<vscale x 32 x bfloat> %0, <vscale x 32 x bfloat> %1, <vscale x 32 x i16> %2, <vscale x 32 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vrgather_mask_vv_nxv32bf16_nxv32bf16_nxv32i16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vl8re16.v v24, (a0)
+; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, mu
+; CHECK-NEXT: vrgather.vv v8, v16, v24, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 32 x bfloat> @llvm.riscv.vrgather.vv.mask.nxv32bf16.iXLen(
+ <vscale x 32 x bfloat> %0,
+ <vscale x 32 x bfloat> %1,
+ <vscale x 32 x i16> %2,
+ <vscale x 32 x i1> %3,
+ iXLen %4, iXLen 1)
+
+ ret <vscale x 32 x bfloat> %a
+}
+
+declare <vscale x 1 x bfloat> @llvm.riscv.vrgather.vx.nxv1bf16.iXLen(
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x bfloat>,
+ iXLen,
+ iXLen)
+
+define <vscale x 1 x bfloat> @intrinsic_vrgather_vx_nxv1bf16_nxv1bf16(<vscale x 1 x bfloat> %0, iXLen %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vrgather_vx_nxv1bf16_nxv1bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma
+; CHECK-NEXT: vrgather.vx v9, v8, a0
+; CHECK-NEXT: vmv1r.v v8, v9
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x bfloat> @llvm.riscv.vrgather.vx.nxv1bf16.iXLen(
+ <vscale x 1 x bfloat> undef,
+ <vscale x 1 x bfloat> %0,
+ iXLen %1,
+ iXLen %2)
+
+ ret <vscale x 1 x bfloat> %a
+}
+
+declare <vscale x 1 x bfloat> @llvm.riscv.vrgather.vx.mask.nxv1bf16.iXLen(
+ <vscale x 1 x bfloat>,
+ <vscale x 1 x bfloat>,
+ iXLen,
+ <vscale x 1 x i1>,
+ iXLen,
+ iXLen)
+
+define <vscale x 1 x bfloat> @intrinsic_vrgather_mask_vx_nxv1bf16_nxv1bf16(<vscale x 1 x bfloat> %0, <vscale x 1 x bfloat> %1, iXLen %2, <vscale x 1 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vrgather_mask_vx_nxv1bf16_nxv1bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, mu
+; CHECK-NEXT: vrgather.vx v8, v9, a0, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x bfloat> @llvm.riscv.vrgather.vx.mask.nxv1bf16.iXLen(
+ <vscale x 1 x bfloat> %0,
+ <vscale x 1 x bfloat> %1,
+ iXLen %2,
+ <vscale x 1 x i1> %3,
+ iXLen %4, iXLen 1)
+
+ ret <vscale x 1 x bfloat> %a
+}
+
+declare <vscale x 2 x bfloat> @llvm.riscv.vrgather.vx.nxv2bf16.iXLen(
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x bfloat>,
+ iXLen,
+ iXLen)
+
+define <vscale x 2 x bfloat> @intrinsic_vrgather_vx_nxv2bf16_nxv2bf16(<vscale x 2 x bfloat> %0, iXLen %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vrgather_vx_nxv2bf16_nxv2bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma
+; CHECK-NEXT: vrgather.vx v9, v8, a0
+; CHECK-NEXT: vmv1r.v v8, v9
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x bfloat> @llvm.riscv.vrgather.vx.nxv2bf16.iXLen(
+ <vscale x 2 x bfloat> undef,
+ <vscale x 2 x bfloat> %0,
+ iXLen %1,
+ iXLen %2)
+
+ ret <vscale x 2 x bfloat> %a
+}
+
+declare <vscale x 2 x bfloat> @llvm.riscv.vrgather.vx.mask.nxv2bf16.iXLen(
+ <vscale x 2 x bfloat>,
+ <vscale x 2 x bfloat>,
+ iXLen,
+ <vscale x 2 x i1>,
+ iXLen,
+ iXLen)
+
+define <vscale x 2 x bfloat> @intrinsic_vrgather_mask_vx_nxv2bf16_nxv2bf16(<vscale x 2 x bfloat> %0, <vscale x 2 x bfloat> %1, iXLen %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vrgather_mask_vx_nxv2bf16_nxv2bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, mu
+; CHECK-NEXT: vrgather.vx v8, v9, a0, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x bfloat> @llvm.riscv.vrgather.vx.mask.nxv2bf1bf16XLen(
+ <vscale x 2 x bfloat> %0,
+ <vscale x 2 x bfloat> %1,
+ iXLen %2,
+ <vscale x 2 x i1> %3,
+ iXLen %4, iXLen 1)
+
+ ret <vscale x 2 x bfloat> %a
+}
+
+declare <vscale x 4 x bfloat> @llvm.riscv.vrgather.vx.nxv4bf16.iXLen(
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x bfloat>,
+ iXLen,
+ iXLen)
+
+define <vscale x 4 x bfloat> @intrinsic_vrgather_vx_nxv4bf16_nxv4bf16(<vscale x 4 x bfloat> %0, iXLen %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vrgather_vx_nxv4bf16_nxv4bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma
+; CHECK-NEXT: vrgather.vx v9, v8, a0
+; CHECK-NEXT: vmv.v.v v8, v9
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x bfloat> @llvm.riscv.vrgather.vx.nxv4bf16.iXLen(
+ <vscale x 4 x bfloat> undef,
+ <vscale x 4 x bfloat> %0,
+ iXLen %1,
+ iXLen %2)
+
+ ret <vscale x 4 x bfloat> %a
+}
+
+declare <vscale x 4 x bfloat> @llvm.riscv.vrgather.vx.mask.nxv4bf16.iXLen(
+ <vscale x 4 x bfloat>,
+ <vscale x 4 x bfloat>,
+ iXLen,
+ <vscale x 4 x i1>,
+ iXLen,
+ iXLen)
+
+define <vscale x 4 x bfloat> @intrinsic_vrgather_mask_vx_nxv4bf16_nxv4bf16(<vscale x 4 x bfloat> %0, <vscale x 4 x bfloat> %1, iXLen %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vrgather_mask_vx_nxv4bf16_nxv4bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu
+; CHECK-NEXT: vrgather.vx v8, v9, a0, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x bfloat> @llvm.riscv.vrgather.vx.mask.nxv4bf16.iXLen(
+ <vscale x 4 x bfloat> %0,
+ <vscale x 4 x bfloat> %1,
+ iXLen %2,
+ <vscale x 4 x i1> %3,
+ iXLen %4, iXLen 1)
+
+ ret <vscale x 4 x bfloat> %a
+}
+
+declare <vscale x 8 x bfloat> @llvm.riscv.vrgather.vx.nxv8bf16.iXLen(
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x bfloat>,
+ iXLen,
+ iXLen)
+
+define <vscale x 8 x bfloat> @intrinsic_vrgather_vx_nxv8bf16_nxv8bf16(<vscale x 8 x bfloat> %0, iXLen %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vrgather_vx_nxv8bf16_nxv8bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma
+; CHECK-NEXT: vrgather.vx v10, v8, a0
+; CHECK-NEXT: vmv.v.v v8, v10
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x bfloat> @llvm.riscv.vrgather.vx.nxv8bf16.iXLen(
+ <vscale x 8 x bfloat> undef,
+ <vscale x 8 x bfloat> %0,
+ iXLen %1,
+ iXLen %2)
+
+ ret <vscale x 8 x bfloat> %a
+}
+
+declare <vscale x 8 x bfloat> @llvm.riscv.vrgather.vx.mask.nxv8bf16.iXLen(
+ <vscale x 8 x bfloat>,
+ <vscale x 8 x bfloat>,
+ iXLen,
+ <vscale x 8 x i1>,
+ iXLen,
+ iXLen)
+
+define <vscale x 8 x bfloat> @intrinsic_vrgather_mask_vx_nxv8bf16_nxv8bf16(<vscale x 8 x bfloat> %0, <vscale x 8 x bfloat> %1, iXLen %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vrgather_mask_vx_nxv8bf16_nxv8bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, mu
+; CHECK-NEXT: vrgather.vx v8, v10, a0, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x bfloat> @llvm.riscv.vrgather.vx.mask.nxv8bf16.iXLen(
+ <vscale x 8 x bfloat> %0,
+ <vscale x 8 x bfloat> %1,
+ iXLen %2,
+ <vscale x 8 x i1> %3,
+ iXLen %4, iXLen 1)
+
+ ret <vscale x 8 x bfloat> %a
+}
+
+declare <vscale x 16 x bfloat> @llvm.riscv.vrgather.vx.nxv16bf16.iXLen(
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x bfloat>,
+ iXLen,
+ iXLen)
+
+define <vscale x 16 x bfloat> @intrinsic_vrgather_vx_nxv16bf16_nxv16bf16(<vscale x 16 x bfloat> %0, iXLen %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vrgather_vx_nxv16bf16_nxv16bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma
+; CHECK-NEXT: vrgather.vx v12, v8, a0
+; CHECK-NEXT: vmv.v.v v8, v12
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x bfloat> @llvm.riscv.vrgather.vx.nxv16bf16.iXLen(
+ <vscale x 16 x bfloat> undef,
+ <vscale x 16 x bfloat> %0,
+ iXLen %1,
+ iXLen %2)
+
+ ret <vscale x 16 x bfloat> %a
+}
+
+declare <vscale x 16 x bfloat> @llvm.riscv.vrgather.vx.mask.nxv16bf16.iXLen(
+ <vscale x 16 x bfloat>,
+ <vscale x 16 x bfloat>,
+ iXLen,
+ <vscale x 16 x i1>,
+ iXLen,
+ iXLen)
+
+define <vscale x 16 x bfloat> @intrinsic_vrgather_mask_vx_nxv16bf16_nxv16bf16(<vscale x 16 x bfloat> %0, <vscale x 16 x bfloat> %1, iXLen %2, <vscale x 16 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vrgather_mask_vx_nxv16bf16_nxv16bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, mu
+; CHECK-NEXT: vrgather.vx v8, v12, a0, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x bfloat> @llvm.riscv.vrgather.vx.mask.nxv16bf16.iXLen(
+ <vscale x 16 x bfloat> %0,
+ <vscale x 16 x bfloat> %1,
+ iXLen %2,
+ <vscale x 16 x i1> %3,
+ iXLen %4, iXLen 1)
+
+ ret <vscale x 16 x bfloat> %a
+}
+
+declare <vscale x 32 x bfloat> @llvm.riscv.vrgather.vx.nxv32bf16.iXLen(
+ <vscale x 32 x bfloat>,
+ <vscale x 32 x bfloat>,
+ iXLen,
+ iXLen)
+
+define <vscale x 32 x bfloat> @intrinsic_vrgather_vx_nxv32bf16_nxv32bf16(<vscale x 32 x bfloat> %0, iXLen %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vrgather_vx_nxv32bf16_nxv32bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma
+; CHECK-NEXT: vrgather.vx v16, v8, a0
+; CHECK-NEXT: vmv.v.v v8, v16
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 32 x bfloat> @llvm.riscv.vrgather.vx.nxv32bf16.iXLen(
+ <vscale x 32 x bfloat> undef,
+ <vscale x 32 x bfloat> %0,
+ iXLen %1,
+ iXLen %2)
+
+ ret <vscale x 32 x bfloat> %a
+}
+
+declare <vscale x 32 x bfloat> @llvm.riscv.vrgather.vx.mask.nxv32bf16.iXLen(
+ <vscale x 32 x bfloat>,
+ <vscale x 32 x bfloat>,
+ iXLen,
+ <vscale x 32 x i1>,
+ iXLen,
+ iXLen)
+
+define <vscale x 32 x bfloat> @intrinsic_vrgather_mask_vx_nxv32bf16_nxv32bf16(<vscale x 32 x bfloat> %0, <vscale x 32 x bfloat> %1, iXLen %2, <vscale x 32 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vrgather_mask_vx_nxv32bf16_nxv32bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, mu
+; CHECK-NEXT: vrgather.vx v8, v16, a0, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 32 x bfloat> @llvm.riscv.vrgather.vx.mask.nxv32bf16.iXLen(
+ <vscale x 32 x bfloat> %0,
+ <vscale x 32 x bfloat> %1,
+ iXLen %2,
+ <vscale x 32 x i1> %3,
+ iXLen %4, iXLen 1)
+
+ ret <vscale x 32 x bfloat> %a
+}
+
+define <vscale x 1 x bfloat> @intrinsic_vrgather_vi_nxv1bf16_nxv1bf16(<vscale x 1 x bfloat> %0, iXLen %1) nounwind {
+; CHECK-LABEL: intrinsic_vrgather_vi_nxv1bf16_nxv1bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
+; CHECK-NEXT: vrgather.vi v9, v8, 9
+; CHECK-NEXT: vmv1r.v v8, v9
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x bfloat> @llvm.riscv.vrgather.vx.nxv1bf16.iXLen(
+ <vscale x 1 x bfloat> undef,
+ <vscale x 1 x bfloat> %0,
+ iXLen 9,
+ iXLen %1)
+
+ ret <vscale x 1 x bfloat> %a
+}
+
+define <vscale x 1 x bfloat> @intrinsic_vrgather_mask_vi_nxv1bf16_nxv1bf16(<vscale x 1 x bfloat> %0, <vscale x 1 x bfloat> %1, <vscale x 1 x i1> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vrgather_mask_vi_nxv1bf16_nxv1bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu
+; CHECK-NEXT: vrgather.vi v8, v9, 9, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x bfloat> @llvm.riscv.vrgather.vx.mask.nxv1bf16.iXLen(
+ <vscale x 1 x bfloat> %0,
+ <vscale x 1 x bfloat> %1,
+ iXLen 9,
+ <vscale x 1 x i1> %2,
+ iXLen %3, iXLen 1)
+
+ ret <vscale x 1 x bfloat> %a
+}
+
+define <vscale x 2 x bfloat> @intrinsic_vrgather_vi_nxv2bf16_nxv2bf16(<vscale x 2 x bfloat> %0, iXLen %1) nounwind {
+; CHECK-LABEL: intrinsic_vrgather_vi_nxv2bf16_nxv2bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
+; CHECK-NEXT: vrgather.vi v9, v8, 9
+; CHECK-NEXT: vmv1r.v v8, v9
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x bfloat> @llvm.riscv.vrgather.vx.nxv2bf16.iXLen(
+ <vscale x 2 x bfloat> undef,
+ <vscale x 2 x bfloat> %0,
+ iXLen 9,
+ iXLen %1)
+
+ ret <vscale x 2 x bfloat> %a
+}
+
+define <vscale x 2 x bfloat> @intrinsic_vrgather_mask_vi_nxv2bf16_nxv2bf16(<vscale x 2 x bfloat> %0, <vscale x 2 x bfloat> %1, <vscale x 2 x i1> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vrgather_mask_vi_nxv2bf16_nxv2bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu
+; CHECK-NEXT: vrgather.vi v8, v9, 9, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x bfloat> @llvm.riscv.vrgather.vx.mask.nxv2bf16.iXLen(
+ <vscale x 2 x bfloat> %0,
+ <vscale x 2 x bfloat> %1,
+ iXLen 9,
+ <vscale x 2 x i1> %2,
+ iXLen %3, iXLen 1)
+
+ ret <vscale x 2 x bfloat> %a
+}
+
+define <vscale x 4 x bfloat> @intrinsic_vrgather_vi_nxv4bf16_nxv4bf16(<vscale x 4 x bfloat> %0, iXLen %1) nounwind {
+; CHECK-LABEL: intrinsic_vrgather_vi_nxv4bf16_nxv4bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; CHECK-NEXT: vrgather.vi v9, v8, 9
+; CHECK-NEXT: vmv.v.v v8, v9
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x bfloat> @llvm.riscv.vrgather.vx.nxv4bf16.iXLen(
+ <vscale x 4 x bfloat> undef,
+ <vscale x 4 x bfloat> %0,
+ iXLen 9,
+ iXLen %1)
+
+ ret <vscale x 4 x bfloat> %a
+}
+
+define <vscale x 4 x bfloat> @intrinsic_vrgather_mask_vi_nxv4bf16_nxv4bf16(<vscale x 4 x bfloat> %0, <vscale x 4 x bfloat> %1, <vscale x 4 x i1> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vrgather_mask_vi_nxv4bf16_nxv4bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu
+; CHECK-NEXT: vrgather.vi v8, v9, 9, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x bfloat> @llvm.riscv.vrgather.vx.mask.nxv4bf16.iXLen(
+ <vscale x 4 x bfloat> %0,
+ <vscale x 4 x bfloat> %1,
+ iXLen 9,
+ <vscale x 4 x i1> %2,
+ iXLen %3, iXLen 1)
+
+ ret <vscale x 4 x bfloat> %a
+}
+
+define <vscale x 8 x bfloat> @intrinsic_vrgather_vi_nxv8bf16_nxv8bf16(<vscale x 8 x bfloat> %0, iXLen %1) nounwind {
+; CHECK-LABEL: intrinsic_vrgather_vi_nxv8bf16_nxv8bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma
+; CHECK-NEXT: vrgather.vi v10, v8, 9
+; CHECK-NEXT: vmv.v.v v8, v10
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x bfloat> @llvm.riscv.vrgather.vx.nxv8bf16.iXLen(
+ <vscale x 8 x bfloat> undef,
+ <vscale x 8 x bfloat> %0,
+ iXLen 9,
+ iXLen %1)
+
+ ret <vscale x 8 x bfloat> %a
+}
+
+define <vscale x 8 x bfloat> @intrinsic_vrgather_mask_vi_nxv8bf16_nxv8bf16(<vscale x 8 x bfloat> %0, <vscale x 8 x bfloat> %1, <vscale x 8 x i1> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vrgather_mask_vi_nxv8bf16_nxv8bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu
+; CHECK-NEXT: vrgather.vi v8, v10, 9, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x bfloat> @llvm.riscv.vrgather.vx.mask.nxv8bf16.iXLen(
+ <vscale x 8 x bfloat> %0,
+ <vscale x 8 x bfloat> %1,
+ iXLen 9,
+ <vscale x 8 x i1> %2,
+ iXLen %3, iXLen 1)
+
+ ret <vscale x 8 x bfloat> %a
+}
+
+define <vscale x 16 x bfloat> @intrinsic_vrgather_vi_nxv16bf16_nxv16bf16(<vscale x 16 x bfloat> %0, iXLen %1) nounwind {
+; CHECK-LABEL: intrinsic_vrgather_vi_nxv16bf16_nxv16bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma
+; CHECK-NEXT: vrgather.vi v12, v8, 9
+; CHECK-NEXT: vmv.v.v v8, v12
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x bfloat> @llvm.riscv.vrgather.vx.nxv16bf16.iXLen(
+ <vscale x 16 x bfloat> undef,
+ <vscale x 16 x bfloat> %0,
+ iXLen 9,
+ iXLen %1)
+
+ ret <vscale x 16 x bfloat> %a
+}
+
+define <vscale x 16 x bfloat> @intrinsic_vrgather_mask_vi_nxv16bf16_nxv16bf16(<vscale x 16 x bfloat> %0, <vscale x 16 x bfloat> %1, <vscale x 16 x i1> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vrgather_mask_vi_nxv16bf16_nxv16bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu
+; CHECK-NEXT: vrgather.vi v8, v12, 9, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x bfloat> @llvm.riscv.vrgather.vx.mask.nxv16bf16.iXLen(
+ <vscale x 16 x bfloat> %0,
+ <vscale x 16 x bfloat> %1,
+ iXLen 9,
+ <vscale x 16 x i1> %2,
+ iXLen %3, iXLen 1)
+
+ ret <vscale x 16 x bfloat> %a
+}
+
+define <vscale x 32 x bfloat> @intrinsic_vrgather_vi_nxv32bf16_nxv32bf16(<vscale x 32 x bfloat> %0, iXLen %1) nounwind {
+; CHECK-LABEL: intrinsic_vrgather_vi_nxv32bf16_nxv32bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma
+; CHECK-NEXT: vrgather.vi v16, v8, 9
+; CHECK-NEXT: vmv.v.v v8, v16
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 32 x bfloat> @llvm.riscv.vrgather.vx.nxv32bf16.iXLen(
+ <vscale x 32 x bfloat> undef,
+ <vscale x 32 x bfloat> %0,
+ iXLen 9,
+ iXLen %1)
+
+ ret <vscale x 32 x bfloat> %a
+}
+
+define <vscale x 32 x bfloat> @intrinsic_vrgather_mask_vi_nxv32bf16_nxv32bf16(<vscale x 32 x bfloat> %0, <vscale x 32 x bfloat> %1, <vscale x 32 x i1> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vrgather_mask_vi_nxv32bf16_nxv32bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, mu
+; CHECK-NEXT: vrgather.vi v8, v16, 9, v0.t
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 32 x bfloat> @llvm.riscv.vrgather.vx.mask.nxv32bf16.iXLen(
+ <vscale x 32 x bfloat> %0,
+ <vscale x 32 x bfloat> %1,
+ iXLen 9,
+ <vscale x 32 x i1> %2,
+ iXLen %3, iXLen 1)
+
+ ret <vscale x 32 x bfloat> %a
+}
+
diff --git a/llvm/test/CodeGen/RISCV/selectcc-to-shiftand.ll b/llvm/test/CodeGen/RISCV/selectcc-to-shiftand.ll
index 4749cc656693..0d96fbfa8127 100644
--- a/llvm/test/CodeGen/RISCV/selectcc-to-shiftand.ll
+++ b/llvm/test/CodeGen/RISCV/selectcc-to-shiftand.ll
@@ -276,9 +276,8 @@ define i64 @sraiw_andi(i32 signext %0, i32 signext %1) nounwind {
; RV64-LABEL: sraiw_andi:
; RV64: # %bb.0: # %entry
; RV64-NEXT: add a0, a0, a1
-; RV64-NEXT: slli a0, a0, 32
-; RV64-NEXT: srai a0, a0, 2
-; RV64-NEXT: srli a0, a0, 61
+; RV64-NEXT: sraiw a0, a0, 31
+; RV64-NEXT: andi a0, a0, 7
; RV64-NEXT: ret
entry:
%3 = add i32 %0, %1
diff --git a/llvm/test/CodeGen/WebAssembly/offset.ll b/llvm/test/CodeGen/WebAssembly/offset.ll
index 65de341780e3..763c60cef818 100644
--- a/llvm/test/CodeGen/WebAssembly/offset.ll
+++ b/llvm/test/CodeGen/WebAssembly/offset.ll
@@ -40,6 +40,26 @@ define i32 @load_i32_with_folded_gep_offset(ptr %p) {
ret i32 %t
}
+; Same for nusw.
+
+; CHECK-LABEL: load_i32_with_folded_gep_offset_nusw:
+; CHECK: i32.load $push0=, 24($0){{$}}
+define i32 @load_i32_with_folded_gep_offset_nusw(ptr %p) {
+ %s = getelementptr nusw i32, ptr %p, i32 6
+ %t = load i32, ptr %s
+ ret i32 %t
+}
+
+; For nuw we don't need the offset to be positive.
+
+; CHECK-LABEL: load_i32_with_folded_gep_offset_nuw:
+; CHECK: i32.load $push0=, -24($0){{$}}
+define i32 @load_i32_with_folded_gep_offset_nuw(ptr %p) {
+ %s = getelementptr nuw i32, ptr %p, i32 -6
+ %t = load i32, ptr %s
+ ret i32 %t
+}
+
; We can't fold a negative offset though, even with an inbounds gep.
; CHECK-LABEL: load_i32_with_unfolded_gep_negative_offset:
diff --git a/llvm/test/CodeGen/X86/O0-pipeline.ll b/llvm/test/CodeGen/X86/O0-pipeline.ll
index 29d3c2795ffb..98b86384b844 100644
--- a/llvm/test/CodeGen/X86/O0-pipeline.ll
+++ b/llvm/test/CodeGen/X86/O0-pipeline.ll
@@ -26,7 +26,6 @@
; CHECK-NEXT: Lower Garbage Collection Instructions
; CHECK-NEXT: Shadow Stack GC Lowering
; CHECK-NEXT: Remove unreachable blocks from the CFG
-; CHECK-NEXT: Expand vector predication intrinsics
; CHECK-NEXT: Instrument function entry/exit with calls to e.g. mcount() (post inlining)
; CHECK-NEXT: Scalarize Masked Memory Intrinsics
; CHECK-NEXT: Expand reduction intrinsics
diff --git a/llvm/test/CodeGen/X86/apx/and.ll b/llvm/test/CodeGen/X86/apx/and.ll
index 51858ad59160..23aed77b948b 100644
--- a/llvm/test/CodeGen/X86/apx/and.ll
+++ b/llvm/test/CodeGen/X86/apx/and.ll
@@ -482,17 +482,17 @@ define i1 @andflag16rr(i16 %a, i16 %b) {
define i1 @andflag32rr(i32 %a, i32 %b) {
; CHECK-LABEL: andflag32rr:
; CHECK: # %bb.0:
-; CHECK-NEXT: andl %esi, %edi, %ecx # encoding: [0x62,0xf4,0x74,0x18,0x21,0xf7]
+; CHECK-NEXT: andl %edi, %esi # EVEX TO LEGACY Compression encoding: [0x21,0xfe]
; CHECK-NEXT: sete %al # encoding: [0x0f,0x94,0xc0]
-; CHECK-NEXT: movl %ecx, d64(%rip) # encoding: [0x89,0x0d,A,A,A,A]
+; CHECK-NEXT: movl %esi, d64(%rip) # encoding: [0x89,0x35,A,A,A,A]
; CHECK-NEXT: # fixup A - offset: 2, value: d64-4, kind: reloc_riprel_4byte
; CHECK-NEXT: retq # encoding: [0xc3]
;
; NF-LABEL: andflag32rr:
; NF: # %bb.0:
-; NF-NEXT: andl %esi, %edi, %ecx # encoding: [0x62,0xf4,0x74,0x18,0x21,0xf7]
+; NF-NEXT: andl %edi, %esi # EVEX TO LEGACY Compression encoding: [0x21,0xfe]
; NF-NEXT: sete %al # encoding: [0x0f,0x94,0xc0]
-; NF-NEXT: movl %ecx, d64(%rip) # encoding: [0x89,0x0d,A,A,A,A]
+; NF-NEXT: movl %esi, d64(%rip) # encoding: [0x89,0x35,A,A,A,A]
; NF-NEXT: # fixup A - offset: 2, value: d64-4, kind: reloc_riprel_4byte
; NF-NEXT: retq # encoding: [0xc3]
%v0 = and i32 %a, %b ; 0xff << 50
@@ -504,17 +504,17 @@ define i1 @andflag32rr(i32 %a, i32 %b) {
define i1 @andflag64rr(i64 %a, i64 %b) {
; CHECK-LABEL: andflag64rr:
; CHECK: # %bb.0:
-; CHECK-NEXT: andq %rsi, %rdi, %rcx # encoding: [0x62,0xf4,0xf4,0x18,0x21,0xf7]
+; CHECK-NEXT: andq %rdi, %rsi # EVEX TO LEGACY Compression encoding: [0x48,0x21,0xfe]
; CHECK-NEXT: sete %al # encoding: [0x0f,0x94,0xc0]
-; CHECK-NEXT: movq %rcx, d64(%rip) # encoding: [0x48,0x89,0x0d,A,A,A,A]
+; CHECK-NEXT: movq %rsi, d64(%rip) # encoding: [0x48,0x89,0x35,A,A,A,A]
; CHECK-NEXT: # fixup A - offset: 3, value: d64-4, kind: reloc_riprel_4byte
; CHECK-NEXT: retq # encoding: [0xc3]
;
; NF-LABEL: andflag64rr:
; NF: # %bb.0:
-; NF-NEXT: andq %rsi, %rdi, %rcx # encoding: [0x62,0xf4,0xf4,0x18,0x21,0xf7]
+; NF-NEXT: andq %rdi, %rsi # EVEX TO LEGACY Compression encoding: [0x48,0x21,0xfe]
; NF-NEXT: sete %al # encoding: [0x0f,0x94,0xc0]
-; NF-NEXT: movq %rcx, d64(%rip) # encoding: [0x48,0x89,0x0d,A,A,A,A]
+; NF-NEXT: movq %rsi, d64(%rip) # encoding: [0x48,0x89,0x35,A,A,A,A]
; NF-NEXT: # fixup A - offset: 3, value: d64-4, kind: reloc_riprel_4byte
; NF-NEXT: retq # encoding: [0xc3]
%v0 = and i64 %a, %b ; 0xff << 50
@@ -578,17 +578,17 @@ define i1 @andflag16rm(ptr %ptr, i16 %b) {
define i1 @andflag32rm(ptr %ptr, i32 %b) {
; CHECK-LABEL: andflag32rm:
; CHECK: # %bb.0:
-; CHECK-NEXT: andl (%rdi), %esi, %ecx # encoding: [0x62,0xf4,0x74,0x18,0x23,0x37]
+; CHECK-NEXT: andl (%rdi), %esi # EVEX TO LEGACY Compression encoding: [0x23,0x37]
; CHECK-NEXT: sete %al # encoding: [0x0f,0x94,0xc0]
-; CHECK-NEXT: movl %ecx, d64(%rip) # encoding: [0x89,0x0d,A,A,A,A]
+; CHECK-NEXT: movl %esi, d64(%rip) # encoding: [0x89,0x35,A,A,A,A]
; CHECK-NEXT: # fixup A - offset: 2, value: d64-4, kind: reloc_riprel_4byte
; CHECK-NEXT: retq # encoding: [0xc3]
;
; NF-LABEL: andflag32rm:
; NF: # %bb.0:
-; NF-NEXT: andl (%rdi), %esi, %ecx # encoding: [0x62,0xf4,0x74,0x18,0x23,0x37]
+; NF-NEXT: andl (%rdi), %esi # EVEX TO LEGACY Compression encoding: [0x23,0x37]
; NF-NEXT: sete %al # encoding: [0x0f,0x94,0xc0]
-; NF-NEXT: movl %ecx, d64(%rip) # encoding: [0x89,0x0d,A,A,A,A]
+; NF-NEXT: movl %esi, d64(%rip) # encoding: [0x89,0x35,A,A,A,A]
; NF-NEXT: # fixup A - offset: 2, value: d64-4, kind: reloc_riprel_4byte
; NF-NEXT: retq # encoding: [0xc3]
%a = load i32, ptr %ptr
@@ -601,17 +601,17 @@ define i1 @andflag32rm(ptr %ptr, i32 %b) {
define i1 @andflag64rm(ptr %ptr, i64 %b) {
; CHECK-LABEL: andflag64rm:
; CHECK: # %bb.0:
-; CHECK-NEXT: andq (%rdi), %rsi, %rcx # encoding: [0x62,0xf4,0xf4,0x18,0x23,0x37]
+; CHECK-NEXT: andq (%rdi), %rsi # EVEX TO LEGACY Compression encoding: [0x48,0x23,0x37]
; CHECK-NEXT: sete %al # encoding: [0x0f,0x94,0xc0]
-; CHECK-NEXT: movq %rcx, d64(%rip) # encoding: [0x48,0x89,0x0d,A,A,A,A]
+; CHECK-NEXT: movq %rsi, d64(%rip) # encoding: [0x48,0x89,0x35,A,A,A,A]
; CHECK-NEXT: # fixup A - offset: 3, value: d64-4, kind: reloc_riprel_4byte
; CHECK-NEXT: retq # encoding: [0xc3]
;
; NF-LABEL: andflag64rm:
; NF: # %bb.0:
-; NF-NEXT: andq (%rdi), %rsi, %rcx # encoding: [0x62,0xf4,0xf4,0x18,0x23,0x37]
+; NF-NEXT: andq (%rdi), %rsi # EVEX TO LEGACY Compression encoding: [0x48,0x23,0x37]
; NF-NEXT: sete %al # encoding: [0x0f,0x94,0xc0]
-; NF-NEXT: movq %rcx, d64(%rip) # encoding: [0x48,0x89,0x0d,A,A,A,A]
+; NF-NEXT: movq %rsi, d64(%rip) # encoding: [0x48,0x89,0x35,A,A,A,A]
; NF-NEXT: # fixup A - offset: 3, value: d64-4, kind: reloc_riprel_4byte
; NF-NEXT: retq # encoding: [0xc3]
%a = load i64, ptr %ptr
@@ -672,19 +672,19 @@ define i1 @andflag16ri(i16 %a) {
define i1 @andflag32ri(i32 %a) {
; CHECK-LABEL: andflag32ri:
; CHECK: # %bb.0:
-; CHECK-NEXT: andl $123456, %edi, %ecx # encoding: [0x62,0xf4,0x74,0x18,0x81,0xe7,0x40,0xe2,0x01,0x00]
+; CHECK-NEXT: andl $123456, %edi # EVEX TO LEGACY Compression encoding: [0x81,0xe7,0x40,0xe2,0x01,0x00]
; CHECK-NEXT: # imm = 0x1E240
; CHECK-NEXT: sete %al # encoding: [0x0f,0x94,0xc0]
-; CHECK-NEXT: movl %ecx, d64(%rip) # encoding: [0x89,0x0d,A,A,A,A]
+; CHECK-NEXT: movl %edi, d64(%rip) # encoding: [0x89,0x3d,A,A,A,A]
; CHECK-NEXT: # fixup A - offset: 2, value: d64-4, kind: reloc_riprel_4byte
; CHECK-NEXT: retq # encoding: [0xc3]
;
; NF-LABEL: andflag32ri:
; NF: # %bb.0:
-; NF-NEXT: andl $123456, %edi, %ecx # encoding: [0x62,0xf4,0x74,0x18,0x81,0xe7,0x40,0xe2,0x01,0x00]
+; NF-NEXT: andl $123456, %edi # EVEX TO LEGACY Compression encoding: [0x81,0xe7,0x40,0xe2,0x01,0x00]
; NF-NEXT: # imm = 0x1E240
; NF-NEXT: sete %al # encoding: [0x0f,0x94,0xc0]
-; NF-NEXT: movl %ecx, d64(%rip) # encoding: [0x89,0x0d,A,A,A,A]
+; NF-NEXT: movl %edi, d64(%rip) # encoding: [0x89,0x3d,A,A,A,A]
; NF-NEXT: # fixup A - offset: 2, value: d64-4, kind: reloc_riprel_4byte
; NF-NEXT: retq # encoding: [0xc3]
%v0 = and i32 %a, 123456 ; 0xff << 50
@@ -696,19 +696,19 @@ define i1 @andflag32ri(i32 %a) {
define i1 @andflag64ri(i64 %a) {
; CHECK-LABEL: andflag64ri:
; CHECK: # %bb.0:
-; CHECK-NEXT: andq $123456, %rdi, %rcx # encoding: [0x62,0xf4,0xf4,0x18,0x81,0xe7,0x40,0xe2,0x01,0x00]
+; CHECK-NEXT: andq $123456, %rdi # EVEX TO LEGACY Compression encoding: [0x48,0x81,0xe7,0x40,0xe2,0x01,0x00]
; CHECK-NEXT: # imm = 0x1E240
; CHECK-NEXT: sete %al # encoding: [0x0f,0x94,0xc0]
-; CHECK-NEXT: movq %rcx, d64(%rip) # encoding: [0x48,0x89,0x0d,A,A,A,A]
+; CHECK-NEXT: movq %rdi, d64(%rip) # encoding: [0x48,0x89,0x3d,A,A,A,A]
; CHECK-NEXT: # fixup A - offset: 3, value: d64-4, kind: reloc_riprel_4byte
; CHECK-NEXT: retq # encoding: [0xc3]
;
; NF-LABEL: andflag64ri:
; NF: # %bb.0:
-; NF-NEXT: andq $123456, %rdi, %rcx # encoding: [0x62,0xf4,0xf4,0x18,0x81,0xe7,0x40,0xe2,0x01,0x00]
+; NF-NEXT: andq $123456, %rdi # EVEX TO LEGACY Compression encoding: [0x48,0x81,0xe7,0x40,0xe2,0x01,0x00]
; NF-NEXT: # imm = 0x1E240
; NF-NEXT: sete %al # encoding: [0x0f,0x94,0xc0]
-; NF-NEXT: movq %rcx, d64(%rip) # encoding: [0x48,0x89,0x0d,A,A,A,A]
+; NF-NEXT: movq %rdi, d64(%rip) # encoding: [0x48,0x89,0x3d,A,A,A,A]
; NF-NEXT: # fixup A - offset: 3, value: d64-4, kind: reloc_riprel_4byte
; NF-NEXT: retq # encoding: [0xc3]
%v0 = and i64 %a, 123456 ; 0xff << 50
@@ -743,17 +743,17 @@ define i1 @andflag16ri8(i16 %a) {
define i1 @andflag32ri8(i32 %a) {
; CHECK-LABEL: andflag32ri8:
; CHECK: # %bb.0:
-; CHECK-NEXT: andl $123, %edi, %ecx # encoding: [0x62,0xf4,0x74,0x18,0x83,0xe7,0x7b]
+; CHECK-NEXT: andl $123, %edi # EVEX TO LEGACY Compression encoding: [0x83,0xe7,0x7b]
; CHECK-NEXT: sete %al # encoding: [0x0f,0x94,0xc0]
-; CHECK-NEXT: movl %ecx, d64(%rip) # encoding: [0x89,0x0d,A,A,A,A]
+; CHECK-NEXT: movl %edi, d64(%rip) # encoding: [0x89,0x3d,A,A,A,A]
; CHECK-NEXT: # fixup A - offset: 2, value: d64-4, kind: reloc_riprel_4byte
; CHECK-NEXT: retq # encoding: [0xc3]
;
; NF-LABEL: andflag32ri8:
; NF: # %bb.0:
-; NF-NEXT: andl $123, %edi, %ecx # encoding: [0x62,0xf4,0x74,0x18,0x83,0xe7,0x7b]
+; NF-NEXT: andl $123, %edi # EVEX TO LEGACY Compression encoding: [0x83,0xe7,0x7b]
; NF-NEXT: sete %al # encoding: [0x0f,0x94,0xc0]
-; NF-NEXT: movl %ecx, d64(%rip) # encoding: [0x89,0x0d,A,A,A,A]
+; NF-NEXT: movl %edi, d64(%rip) # encoding: [0x89,0x3d,A,A,A,A]
; NF-NEXT: # fixup A - offset: 2, value: d64-4, kind: reloc_riprel_4byte
; NF-NEXT: retq # encoding: [0xc3]
%v0 = and i32 %a, 123 ; 0xff << 50
@@ -765,17 +765,17 @@ define i1 @andflag32ri8(i32 %a) {
define i1 @andflag64ri8(i64 %a) {
; CHECK-LABEL: andflag64ri8:
; CHECK: # %bb.0:
-; CHECK-NEXT: andq $123, %rdi, %rcx # encoding: [0x62,0xf4,0xf4,0x18,0x83,0xe7,0x7b]
+; CHECK-NEXT: andq $123, %rdi # EVEX TO LEGACY Compression encoding: [0x48,0x83,0xe7,0x7b]
; CHECK-NEXT: sete %al # encoding: [0x0f,0x94,0xc0]
-; CHECK-NEXT: movq %rcx, d64(%rip) # encoding: [0x48,0x89,0x0d,A,A,A,A]
+; CHECK-NEXT: movq %rdi, d64(%rip) # encoding: [0x48,0x89,0x3d,A,A,A,A]
; CHECK-NEXT: # fixup A - offset: 3, value: d64-4, kind: reloc_riprel_4byte
; CHECK-NEXT: retq # encoding: [0xc3]
;
; NF-LABEL: andflag64ri8:
; NF: # %bb.0:
-; NF-NEXT: andq $123, %rdi, %rcx # encoding: [0x62,0xf4,0xf4,0x18,0x83,0xe7,0x7b]
+; NF-NEXT: andq $123, %rdi # EVEX TO LEGACY Compression encoding: [0x48,0x83,0xe7,0x7b]
; NF-NEXT: sete %al # encoding: [0x0f,0x94,0xc0]
-; NF-NEXT: movq %rcx, d64(%rip) # encoding: [0x48,0x89,0x0d,A,A,A,A]
+; NF-NEXT: movq %rdi, d64(%rip) # encoding: [0x48,0x89,0x3d,A,A,A,A]
; NF-NEXT: # fixup A - offset: 3, value: d64-4, kind: reloc_riprel_4byte
; NF-NEXT: retq # encoding: [0xc3]
%v0 = and i64 %a, 123 ; 0xff << 50
diff --git a/llvm/test/CodeGen/X86/apx/cmov.ll b/llvm/test/CodeGen/X86/apx/cmov.ll
index 7a6a63f813c0..7b846120d3f7 100644
--- a/llvm/test/CodeGen/X86/apx/cmov.ll
+++ b/llvm/test/CodeGen/X86/apx/cmov.ll
@@ -5,10 +5,10 @@ define i8 @cmov8(i8 %a, i8 %b, i8 %x, ptr %y.ptr) {
; CHECK-LABEL: cmov8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: cmpb %sil, %dil # encoding: [0x40,0x38,0xf7]
-; CHECK-NEXT: cmoval %edi, %edx, %eax # encoding: [0x62,0xf4,0x7c,0x18,0x47,0xd7]
-; CHECK-NEXT: movzbl (%rcx), %ecx # encoding: [0x0f,0xb6,0x09]
-; CHECK-NEXT: cmovbel %edx, %ecx # EVEX TO LEGACY Compression encoding: [0x0f,0x46,0xca]
-; CHECK-NEXT: addb %cl, %al # EVEX TO LEGACY Compression encoding: [0x00,0xc8]
+; CHECK-NEXT: cmovbel %edx, %edi # EVEX TO LEGACY Compression encoding: [0x0f,0x46,0xfa]
+; CHECK-NEXT: movzbl (%rcx), %eax # encoding: [0x0f,0xb6,0x01]
+; CHECK-NEXT: cmovbel %edx, %eax # EVEX TO LEGACY Compression encoding: [0x0f,0x46,0xc2]
+; CHECK-NEXT: addb %dil, %al # EVEX TO LEGACY Compression encoding: [0x40,0x00,0xf8]
; CHECK-NEXT: retq # encoding: [0xc3]
entry:
%cond = icmp ugt i8 %a, %b
@@ -23,9 +23,9 @@ define i16 @cmov16(i16 %a, i16 %b, i16 %x, ptr %y.ptr) {
; CHECK-LABEL: cmov16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: cmpw %si, %di # encoding: [0x66,0x39,0xf7]
-; CHECK-NEXT: cmoval %edi, %edx, %eax # encoding: [0x62,0xf4,0x7c,0x18,0x47,0xd7]
-; CHECK-NEXT: cmovaw (%rcx), %dx, %cx # encoding: [0x62,0xf4,0x75,0x18,0x47,0x11]
-; CHECK-NEXT: addw %cx, %ax # EVEX TO LEGACY Compression encoding: [0x66,0x01,0xc8]
+; CHECK-NEXT: cmovbel %edx, %edi # EVEX TO LEGACY Compression encoding: [0x0f,0x46,0xfa]
+; CHECK-NEXT: cmovaw (%rcx), %dx, %ax # encoding: [0x62,0xf4,0x7d,0x18,0x47,0x11]
+; CHECK-NEXT: addw %di, %ax # EVEX TO LEGACY Compression encoding: [0x66,0x01,0xf8]
; CHECK-NEXT: retq # encoding: [0xc3]
entry:
%cond = icmp ugt i16 %a, %b
@@ -41,8 +41,8 @@ define i32 @cmov32(i32 %a, i32 %b, i32 %x, ptr %y.ptr) {
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: cmpl %esi, %edi # encoding: [0x39,0xf7]
; CHECK-NEXT: cmoval %edi, %edx, %eax # encoding: [0x62,0xf4,0x7c,0x18,0x47,0xd7]
-; CHECK-NEXT: cmoval (%rcx), %edx, %ecx # encoding: [0x62,0xf4,0x74,0x18,0x47,0x11]
-; CHECK-NEXT: addl %ecx, %eax # EVEX TO LEGACY Compression encoding: [0x01,0xc8]
+; CHECK-NEXT: cmoval (%rcx), %edx # EVEX TO LEGACY Compression encoding: [0x0f,0x47,0x11]
+; CHECK-NEXT: addl %edx, %eax # EVEX TO LEGACY Compression encoding: [0x01,0xd0]
; CHECK-NEXT: retq # encoding: [0xc3]
entry:
%cond = icmp ugt i32 %a, %b
@@ -58,8 +58,8 @@ define i64 @cmov64(i64 %a, i64 %b, i64 %x, ptr %y.ptr) {
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: cmpq %rsi, %rdi # encoding: [0x48,0x39,0xf7]
; CHECK-NEXT: cmovaq %rdi, %rdx, %rax # encoding: [0x62,0xf4,0xfc,0x18,0x47,0xd7]
-; CHECK-NEXT: cmovaq (%rcx), %rdx, %rcx # encoding: [0x62,0xf4,0xf4,0x18,0x47,0x11]
-; CHECK-NEXT: addq %rcx, %rax # EVEX TO LEGACY Compression encoding: [0x48,0x01,0xc8]
+; CHECK-NEXT: cmovaq (%rcx), %rdx # EVEX TO LEGACY Compression encoding: [0x48,0x0f,0x47,0x11]
+; CHECK-NEXT: addq %rdx, %rax # EVEX TO LEGACY Compression encoding: [0x48,0x01,0xd0]
; CHECK-NEXT: retq # encoding: [0xc3]
entry:
%cond = icmp ugt i64 %a, %b
diff --git a/llvm/test/CodeGen/X86/apx/mul-i1024.ll b/llvm/test/CodeGen/X86/apx/mul-i1024.ll
index 2b99c44fc769..a4d15a1b21d6 100644
--- a/llvm/test/CodeGen/X86/apx/mul-i1024.ll
+++ b/llvm/test/CodeGen/X86/apx/mul-i1024.ll
@@ -1041,41 +1041,41 @@ define void @test_1024(ptr %a, ptr %b, ptr %out) nounwind {
; EGPR-NDD-NEXT: pushq %r13
; EGPR-NDD-NEXT: pushq %r12
; EGPR-NDD-NEXT: pushq %rbx
-; EGPR-NDD-NEXT: subq $104, %rsp
+; EGPR-NDD-NEXT: subq $96, %rsp
; EGPR-NDD-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; EGPR-NDD-NEXT: movq %rsi, %r15
; EGPR-NDD-NEXT: movq %rdi, %r20
-; EGPR-NDD-NEXT: movq (%rdi), %r16
-; EGPR-NDD-NEXT: movq 8(%rdi), %r14
+; EGPR-NDD-NEXT: movq (%rdi), %r17
+; EGPR-NDD-NEXT: movq 8(%rdi), %r11
; EGPR-NDD-NEXT: movq 24(%rdi), %r9
; EGPR-NDD-NEXT: movq 16(%rdi), %r10
; EGPR-NDD-NEXT: movq 40(%rdi), %rdi
-; EGPR-NDD-NEXT: movq 32(%r20), %r11
-; EGPR-NDD-NEXT: movq 56(%r20), %r17
-; EGPR-NDD-NEXT: movq 48(%r20), %r15
-; EGPR-NDD-NEXT: movq 24(%rsi), %r18
+; EGPR-NDD-NEXT: movq 32(%r20), %r16
+; EGPR-NDD-NEXT: movq 56(%r20), %r18
+; EGPR-NDD-NEXT: movq 48(%r20), %r23
+; EGPR-NDD-NEXT: movq 24(%rsi), %r14
; EGPR-NDD-NEXT: movq 16(%rsi), %r24
; EGPR-NDD-NEXT: movq (%rsi), %r22
; EGPR-NDD-NEXT: movq 8(%rsi), %r21
-; EGPR-NDD-NEXT: movq %rsi, %r23
-; EGPR-NDD-NEXT: movq %r15, %rax
+; EGPR-NDD-NEXT: movq %r23, %rax
; EGPR-NDD-NEXT: mulq %r22
; EGPR-NDD-NEXT: movq %rdx, %r25
; EGPR-NDD-NEXT: movq %rax, %r19
-; EGPR-NDD-NEXT: movq %r17, %rax
+; EGPR-NDD-NEXT: movq %r18, %rax
; EGPR-NDD-NEXT: mulq %r22
-; EGPR-NDD-NEXT: addq %r25, %rax, %rcx
-; EGPR-NDD-NEXT: adcq $0, %rdx, %rsi
-; EGPR-NDD-NEXT: movq %r15, %rax
+; EGPR-NDD-NEXT: addq %rax, %r25
+; EGPR-NDD-NEXT: adcq $0, %rdx, %rcx
+; EGPR-NDD-NEXT: movq %r23, %rax
; EGPR-NDD-NEXT: mulq %r21
-; EGPR-NDD-NEXT: addq %rcx, %rax, %r8
-; EGPR-NDD-NEXT: adcq %rdx, %rsi, %rcx
+; EGPR-NDD-NEXT: addq %r25, %rax, %rsi
+; EGPR-NDD-NEXT: adcq %rdx, %rcx
; EGPR-NDD-NEXT: setb %al
-; EGPR-NDD-NEXT: movzbl %al, %esi
-; EGPR-NDD-NEXT: movq %r17, %rax
+; EGPR-NDD-NEXT: movzbl %al, %r8d
+; EGPR-NDD-NEXT: movq %r18, %rax
; EGPR-NDD-NEXT: mulq %r21
; EGPR-NDD-NEXT: addq %rcx, %rax, %r27
-; EGPR-NDD-NEXT: adcq %rdx, %rsi
-; EGPR-NDD-NEXT: movq %r11, %rax
+; EGPR-NDD-NEXT: adcq %rdx, %r8
+; EGPR-NDD-NEXT: movq %r16, %rax
; EGPR-NDD-NEXT: mulq %r22
; EGPR-NDD-NEXT: movq %rdx, %r26
; EGPR-NDD-NEXT: movq %rax, %r25
@@ -1083,7 +1083,7 @@ define void @test_1024(ptr %a, ptr %b, ptr %out) nounwind {
; EGPR-NDD-NEXT: mulq %r22
; EGPR-NDD-NEXT: addq %r26, %rax, %rcx
; EGPR-NDD-NEXT: adcq $0, %rdx, %r26
-; EGPR-NDD-NEXT: movq %r11, %rax
+; EGPR-NDD-NEXT: movq %r16, %rax
; EGPR-NDD-NEXT: mulq %r21
; EGPR-NDD-NEXT: addq %rax, %rcx
; EGPR-NDD-NEXT: adcq %rdx, %r26
@@ -1094,58 +1094,59 @@ define void @test_1024(ptr %a, ptr %b, ptr %out) nounwind {
; EGPR-NDD-NEXT: addq %r26, %rax
; EGPR-NDD-NEXT: adcq %r28, %rdx
; EGPR-NDD-NEXT: addq %rax, %r19, %r28
-; EGPR-NDD-NEXT: adcq %rdx, %r8
+; EGPR-NDD-NEXT: adcq %rdx, %rsi, %r29
; EGPR-NDD-NEXT: adcq $0, %r27
-; EGPR-NDD-NEXT: adcq $0, %rsi, %r29
-; EGPR-NDD-NEXT: movq %r11, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; EGPR-NDD-NEXT: movq %r11, %rax
+; EGPR-NDD-NEXT: adcq $0, %r8
+; EGPR-NDD-NEXT: movq %r16, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; EGPR-NDD-NEXT: movq %r16, %rax
; EGPR-NDD-NEXT: mulq %r24
; EGPR-NDD-NEXT: movq %rdx, %r19
; EGPR-NDD-NEXT: movq %rax, %r26
; EGPR-NDD-NEXT: movq %rdi, %rax
; EGPR-NDD-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; EGPR-NDD-NEXT: mulq %r24
-; EGPR-NDD-NEXT: addq %r19, %rax, %rsi
-; EGPR-NDD-NEXT: adcq $0, %rdx, %r19
-; EGPR-NDD-NEXT: movq %r11, %rax
-; EGPR-NDD-NEXT: mulq %r18
-; EGPR-NDD-NEXT: addq %rsi, %rax, %r30
-; EGPR-NDD-NEXT: adcq %rdx, %r19, %rsi
+; EGPR-NDD-NEXT: addq %rax, %r19
+; EGPR-NDD-NEXT: adcq $0, %rdx, %rsi
+; EGPR-NDD-NEXT: movq %r16, %rax
+; EGPR-NDD-NEXT: mulq %r14
+; EGPR-NDD-NEXT: addq %rax, %r19
+; EGPR-NDD-NEXT: adcq %rdx, %rsi
; EGPR-NDD-NEXT: setb %al
-; EGPR-NDD-NEXT: movzbl %al, %r19d
+; EGPR-NDD-NEXT: movzbl %al, %r30d
; EGPR-NDD-NEXT: movq %rdi, %rax
-; EGPR-NDD-NEXT: mulq %r18
+; EGPR-NDD-NEXT: mulq %r14
; EGPR-NDD-NEXT: addq %rsi, %rax
-; EGPR-NDD-NEXT: adcq %r19, %rdx
+; EGPR-NDD-NEXT: adcq %r30, %rdx
; EGPR-NDD-NEXT: addq %r28, %r26, %rsi
-; EGPR-NDD-NEXT: adcq %r8, %r30, %r28
+; EGPR-NDD-NEXT: adcq %r29, %r19, %r28
; EGPR-NDD-NEXT: adcq $0, %rax
; EGPR-NDD-NEXT: adcq $0, %rdx
-; EGPR-NDD-NEXT: addq %rax, %r27, %r8
-; EGPR-NDD-NEXT: adcq %rdx, %r29, %r27
+; EGPR-NDD-NEXT: addq %rax, %r27
+; EGPR-NDD-NEXT: adcq %rdx, %r8
; EGPR-NDD-NEXT: setb %al
; EGPR-NDD-NEXT: movzbl %al, %r31d
-; EGPR-NDD-NEXT: movq %r15, %rax
+; EGPR-NDD-NEXT: movq %r23, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; EGPR-NDD-NEXT: movq %r23, %rax
; EGPR-NDD-NEXT: mulq %r24
; EGPR-NDD-NEXT: movq %rdx, %r19
; EGPR-NDD-NEXT: movq %rax, %r26
-; EGPR-NDD-NEXT: movq %r17, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; EGPR-NDD-NEXT: movq %r17, %rax
+; EGPR-NDD-NEXT: movq %r18, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; EGPR-NDD-NEXT: movq %r18, %rax
; EGPR-NDD-NEXT: mulq %r24
; EGPR-NDD-NEXT: addq %rax, %r19
; EGPR-NDD-NEXT: adcq $0, %rdx, %r29
-; EGPR-NDD-NEXT: movq %r15, %rax
-; EGPR-NDD-NEXT: mulq %r18
+; EGPR-NDD-NEXT: movq %r23, %rax
+; EGPR-NDD-NEXT: mulq %r14
; EGPR-NDD-NEXT: addq %rax, %r19
; EGPR-NDD-NEXT: adcq %rdx, %r29
; EGPR-NDD-NEXT: setb %al
; EGPR-NDD-NEXT: movzbl %al, %r30d
-; EGPR-NDD-NEXT: movq %r17, %rax
-; EGPR-NDD-NEXT: mulq %r18
+; EGPR-NDD-NEXT: movq %r18, %rax
+; EGPR-NDD-NEXT: mulq %r14
; EGPR-NDD-NEXT: addq %r29, %rax
; EGPR-NDD-NEXT: adcq %r30, %rdx
-; EGPR-NDD-NEXT: addq %r8, %r26, %r29
-; EGPR-NDD-NEXT: adcq %r27, %r19, %r30
+; EGPR-NDD-NEXT: addq %r27, %r26, %r29
+; EGPR-NDD-NEXT: adcq %r8, %r19, %r30
; EGPR-NDD-NEXT: adcq %rax, %r31
; EGPR-NDD-NEXT: adcq $0, %rdx, %rdi
; EGPR-NDD-NEXT: movq %r10, %rax
@@ -1154,69 +1155,69 @@ define void @test_1024(ptr %a, ptr %b, ptr %out) nounwind {
; EGPR-NDD-NEXT: movq %rax, %r26
; EGPR-NDD-NEXT: movq %r9, %rax
; EGPR-NDD-NEXT: mulq %r22
-; EGPR-NDD-NEXT: addq %r19, %rax, %r8
-; EGPR-NDD-NEXT: adcq $0, %rdx, %r19
+; EGPR-NDD-NEXT: addq %rax, %r19
+; EGPR-NDD-NEXT: adcq $0, %rdx, %r8
; EGPR-NDD-NEXT: movq %r10, %rax
; EGPR-NDD-NEXT: mulq %r21
-; EGPR-NDD-NEXT: addq %rax, %r8
-; EGPR-NDD-NEXT: adcq %rdx, %r19
+; EGPR-NDD-NEXT: addq %rax, %r19
+; EGPR-NDD-NEXT: adcq %rdx, %r8
; EGPR-NDD-NEXT: setb %al
; EGPR-NDD-NEXT: movzbl %al, %r27d
; EGPR-NDD-NEXT: movq %r9, %rax
; EGPR-NDD-NEXT: mulq %r21
-; EGPR-NDD-NEXT: addq %rax, %r19
+; EGPR-NDD-NEXT: addq %rax, %r8
; EGPR-NDD-NEXT: adcq %r27, %rdx, %rbx
-; EGPR-NDD-NEXT: movq %r16, %rax
+; EGPR-NDD-NEXT: movq %r17, %rax
; EGPR-NDD-NEXT: mulq %r22
; EGPR-NDD-NEXT: movq %rdx, %r27
; EGPR-NDD-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; EGPR-NDD-NEXT: movq %r14, %rax
+; EGPR-NDD-NEXT: movq %r11, %rax
; EGPR-NDD-NEXT: mulq %r22
; EGPR-NDD-NEXT: addq %rax, %r27
; EGPR-NDD-NEXT: adcq $0, %rdx, %r12
-; EGPR-NDD-NEXT: movq %r16, %rax
+; EGPR-NDD-NEXT: movq %r17, %rax
; EGPR-NDD-NEXT: mulq %r21
; EGPR-NDD-NEXT: addq %r27, %rax
; EGPR-NDD-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; EGPR-NDD-NEXT: adcq %rdx, %r12, %r27
-; EGPR-NDD-NEXT: setb %bpl
-; EGPR-NDD-NEXT: movq %r14, %rax
+; EGPR-NDD-NEXT: adcq %rdx, %r12
+; EGPR-NDD-NEXT: setb %r27b
+; EGPR-NDD-NEXT: movq %r11, %rax
; EGPR-NDD-NEXT: mulq %r21
-; EGPR-NDD-NEXT: addq %r27, %rax
-; EGPR-NDD-NEXT: movzbl %bpl, %r27d
+; EGPR-NDD-NEXT: addq %r12, %rax
+; EGPR-NDD-NEXT: movzbl %r27b, %r27d
; EGPR-NDD-NEXT: adcq %r27, %rdx
; EGPR-NDD-NEXT: addq %rax, %r26, %r12
-; EGPR-NDD-NEXT: adcq %rdx, %r8
-; EGPR-NDD-NEXT: adcq $0, %r19
+; EGPR-NDD-NEXT: adcq %rdx, %r19
+; EGPR-NDD-NEXT: adcq $0, %r8
; EGPR-NDD-NEXT: adcq $0, %rbx
-; EGPR-NDD-NEXT: movq %r16, %rax
+; EGPR-NDD-NEXT: movq %r17, %rax
; EGPR-NDD-NEXT: mulq %r24
; EGPR-NDD-NEXT: movq %rdx, %r26
; EGPR-NDD-NEXT: movq %rax, %r27
-; EGPR-NDD-NEXT: movq %r14, %rax
+; EGPR-NDD-NEXT: movq %r11, %rax
; EGPR-NDD-NEXT: mulq %r24
; EGPR-NDD-NEXT: addq %rax, %r26
; EGPR-NDD-NEXT: adcq $0, %rdx, %r13
-; EGPR-NDD-NEXT: movq %r16, %rax
-; EGPR-NDD-NEXT: mulq %r18
+; EGPR-NDD-NEXT: movq %r17, %rax
+; EGPR-NDD-NEXT: mulq %r14
; EGPR-NDD-NEXT: addq %rax, %r26
; EGPR-NDD-NEXT: adcq %rdx, %r13
; EGPR-NDD-NEXT: setb %bpl
-; EGPR-NDD-NEXT: movq %r14, %rax
-; EGPR-NDD-NEXT: mulq %r18
+; EGPR-NDD-NEXT: movq %r11, %rax
+; EGPR-NDD-NEXT: mulq %r14
; EGPR-NDD-NEXT: addq %r13, %rax
; EGPR-NDD-NEXT: movzbl %bpl, %r13d
; EGPR-NDD-NEXT: adcq %r13, %rdx
-; EGPR-NDD-NEXT: addq %r12, %r27, %r11
-; EGPR-NDD-NEXT: movq %r11, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; EGPR-NDD-NEXT: adcq %r26, %r8
-; EGPR-NDD-NEXT: movq %r8, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; EGPR-NDD-NEXT: addq %r12, %r27
+; EGPR-NDD-NEXT: movq %r27, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; EGPR-NDD-NEXT: adcq %r26, %r19
+; EGPR-NDD-NEXT: movq %r19, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; EGPR-NDD-NEXT: adcq $0, %rax
; EGPR-NDD-NEXT: adcq $0, %rdx
-; EGPR-NDD-NEXT: addq %rax, %r19, %r8
-; EGPR-NDD-NEXT: adcq %rdx, %rbx, %r19
-; EGPR-NDD-NEXT: setb %bl
-; EGPR-NDD-NEXT: movq %r10, %r17
+; EGPR-NDD-NEXT: addq %rax, %r8
+; EGPR-NDD-NEXT: adcq %rdx, %rbx
+; EGPR-NDD-NEXT: setb %r19b
+; EGPR-NDD-NEXT: movq %r10, %r16
; EGPR-NDD-NEXT: movq %r10, %rax
; EGPR-NDD-NEXT: mulq %r24
; EGPR-NDD-NEXT: movq %rdx, %r26
@@ -1226,32 +1227,31 @@ define void @test_1024(ptr %a, ptr %b, ptr %out) nounwind {
; EGPR-NDD-NEXT: addq %rax, %r26
; EGPR-NDD-NEXT: adcq $0, %rdx, %r12
; EGPR-NDD-NEXT: movq %r10, %rax
-; EGPR-NDD-NEXT: movq %r18, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; EGPR-NDD-NEXT: mulq %r18
+; EGPR-NDD-NEXT: mulq %r14
; EGPR-NDD-NEXT: addq %rax, %r26
; EGPR-NDD-NEXT: adcq %rdx, %r12
; EGPR-NDD-NEXT: setb %bpl
; EGPR-NDD-NEXT: movq %r9, %rax
-; EGPR-NDD-NEXT: mulq %r18
+; EGPR-NDD-NEXT: mulq %r14
; EGPR-NDD-NEXT: addq %r12, %rax
; EGPR-NDD-NEXT: movzbl %bpl, %r12d
; EGPR-NDD-NEXT: adcq %r12, %rdx
; EGPR-NDD-NEXT: addq %r27, %r8
-; EGPR-NDD-NEXT: adcq %r26, %r19
-; EGPR-NDD-NEXT: movzbl %bl, %r26d
-; EGPR-NDD-NEXT: adcq %r26, %rax
+; EGPR-NDD-NEXT: adcq %r26, %rbx
+; EGPR-NDD-NEXT: movzbl %r19b, %r19d
+; EGPR-NDD-NEXT: adcq %r19, %rax
; EGPR-NDD-NEXT: adcq $0, %rdx
; EGPR-NDD-NEXT: addq %r8, %r25, %r12
-; EGPR-NDD-NEXT: movq 32(%r23), %r26
-; EGPR-NDD-NEXT: adcq %r19, %rcx, %r13
+; EGPR-NDD-NEXT: movq 32(%r15), %r26
+; EGPR-NDD-NEXT: adcq %rbx, %rcx, %r13
; EGPR-NDD-NEXT: adcq %rax, %rsi, %rbp
; EGPR-NDD-NEXT: adcq %rdx, %r28, %rbx
-; EGPR-NDD-NEXT: adcq $0, %r29, %rax
-; EGPR-NDD-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; EGPR-NDD-NEXT: adcq $0, %r29
+; EGPR-NDD-NEXT: movq %r29, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; EGPR-NDD-NEXT: adcq $0, %r30
; EGPR-NDD-NEXT: adcq $0, %r31
-; EGPR-NDD-NEXT: adcq $0, %rdi, %rax
-; EGPR-NDD-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; EGPR-NDD-NEXT: adcq $0, %rdi
+; EGPR-NDD-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; EGPR-NDD-NEXT: movq %r10, %rax
; EGPR-NDD-NEXT: mulq %r26
; EGPR-NDD-NEXT: movq %rdx, %r25
@@ -1259,341 +1259,333 @@ define void @test_1024(ptr %a, ptr %b, ptr %out) nounwind {
; EGPR-NDD-NEXT: movq %r9, %r19
; EGPR-NDD-NEXT: movq %r9, %rax
; EGPR-NDD-NEXT: mulq %r26
-; EGPR-NDD-NEXT: addq %r25, %rax, %rcx
-; EGPR-NDD-NEXT: adcq $0, %rdx, %r8
-; EGPR-NDD-NEXT: movq 40(%r23), %r18
-; EGPR-NDD-NEXT: movq %r23, %r11
+; EGPR-NDD-NEXT: addq %rax, %r25
+; EGPR-NDD-NEXT: adcq $0, %rdx, %rcx
+; EGPR-NDD-NEXT: movq 40(%r15), %r18
; EGPR-NDD-NEXT: movq %r10, %rax
; EGPR-NDD-NEXT: mulq %r18
-; EGPR-NDD-NEXT: addq %rcx, %rax, %rdi
-; EGPR-NDD-NEXT: adcq %rdx, %r8
-; EGPR-NDD-NEXT: setb %r25b
+; EGPR-NDD-NEXT: addq %r25, %rax, %r29
+; EGPR-NDD-NEXT: adcq %rdx, %rcx
+; EGPR-NDD-NEXT: setb %r8b
; EGPR-NDD-NEXT: movq %r9, %rax
; EGPR-NDD-NEXT: mulq %r18
-; EGPR-NDD-NEXT: addq %r8, %rax, %r29
-; EGPR-NDD-NEXT: movzbl %r25b, %eax
+; EGPR-NDD-NEXT: addq %rcx, %rax, %rdi
+; EGPR-NDD-NEXT: movzbl %r8b, %eax
; EGPR-NDD-NEXT: adcq %rax, %rdx, %rsi
-; EGPR-NDD-NEXT: movq %r16, %rax
+; EGPR-NDD-NEXT: movq %r17, %rax
; EGPR-NDD-NEXT: mulq %r26
; EGPR-NDD-NEXT: movq %rdx, %r28
; EGPR-NDD-NEXT: movq %rax, %r25
-; EGPR-NDD-NEXT: movq %r14, %rax
+; EGPR-NDD-NEXT: movq %r11, %r10
+; EGPR-NDD-NEXT: movq %r11, %rax
; EGPR-NDD-NEXT: mulq %r26
; EGPR-NDD-NEXT: addq %r28, %rax, %r8
; EGPR-NDD-NEXT: adcq $0, %rdx, %r28
-; EGPR-NDD-NEXT: movq %r16, %rax
-; EGPR-NDD-NEXT: movq %r16, %r10
+; EGPR-NDD-NEXT: movq %r17, %rax
; EGPR-NDD-NEXT: mulq %r18
; EGPR-NDD-NEXT: addq %r8, %rax, %r23
; EGPR-NDD-NEXT: adcq %rdx, %r28
; EGPR-NDD-NEXT: setb %cl
-; EGPR-NDD-NEXT: movq %r14, %rax
-; EGPR-NDD-NEXT: movq %r14, %r16
+; EGPR-NDD-NEXT: movq %r11, %rax
; EGPR-NDD-NEXT: mulq %r18
; EGPR-NDD-NEXT: addq %r28, %rax
; EGPR-NDD-NEXT: movzbl %cl, %ecx
; EGPR-NDD-NEXT: adcq %rdx, %rcx
; EGPR-NDD-NEXT: addq %rax, %r27
-; EGPR-NDD-NEXT: adcq %rcx, %rdi
-; EGPR-NDD-NEXT: adcq $0, %r29, %r8
+; EGPR-NDD-NEXT: adcq %rcx, %r29, %r8
+; EGPR-NDD-NEXT: adcq $0, %rdi
; EGPR-NDD-NEXT: adcq $0, %rsi, %r9
-; EGPR-NDD-NEXT: movq %r11, %r14
-; EGPR-NDD-NEXT: movq %r11, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; EGPR-NDD-NEXT: movq 48(%r11), %r11
-; EGPR-NDD-NEXT: movq %r10, %rsi
-; EGPR-NDD-NEXT: movq %r10, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; EGPR-NDD-NEXT: movq %r10, %rax
+; EGPR-NDD-NEXT: movq 48(%r15), %r11
+; EGPR-NDD-NEXT: movq %r17, %rsi
+; EGPR-NDD-NEXT: movq %r17, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; EGPR-NDD-NEXT: movq %r17, %rax
; EGPR-NDD-NEXT: mulq %r11
; EGPR-NDD-NEXT: movq %rdx, %r28
; EGPR-NDD-NEXT: movq %rax, %r29
-; EGPR-NDD-NEXT: movq %r16, %rax
-; EGPR-NDD-NEXT: movq %r16, %r10
-; EGPR-NDD-NEXT: movq %r16, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; EGPR-NDD-NEXT: movq %r10, %rax
+; EGPR-NDD-NEXT: movq %r10, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; EGPR-NDD-NEXT: mulq %r11
; EGPR-NDD-NEXT: addq %rax, %r28
; EGPR-NDD-NEXT: adcq $0, %rdx, %rcx
-; EGPR-NDD-NEXT: movq 56(%r14), %r16
+; EGPR-NDD-NEXT: movq 56(%r15), %r17
; EGPR-NDD-NEXT: movq %rsi, %rax
-; EGPR-NDD-NEXT: mulq %r16
+; EGPR-NDD-NEXT: mulq %r17
; EGPR-NDD-NEXT: addq %rax, %r28
; EGPR-NDD-NEXT: adcq %rdx, %rcx
; EGPR-NDD-NEXT: setb %sil
; EGPR-NDD-NEXT: movq %r10, %rax
-; EGPR-NDD-NEXT: mulq %r16
+; EGPR-NDD-NEXT: mulq %r17
; EGPR-NDD-NEXT: addq %rcx, %rax
; EGPR-NDD-NEXT: movzbl %sil, %ecx
; EGPR-NDD-NEXT: adcq %rdx, %rcx
-; EGPR-NDD-NEXT: addq %r27, %r29, %r10
-; EGPR-NDD-NEXT: adcq %r28, %rdi
+; EGPR-NDD-NEXT: addq %r29, %r27
+; EGPR-NDD-NEXT: adcq %r8, %r28, %r10
; EGPR-NDD-NEXT: adcq $0, %rax
; EGPR-NDD-NEXT: adcq $0, %rcx
-; EGPR-NDD-NEXT: addq %rax, %r8
-; EGPR-NDD-NEXT: adcq %rcx, %r9, %rsi
-; EGPR-NDD-NEXT: setb %r9b
-; EGPR-NDD-NEXT: movq %r17, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; EGPR-NDD-NEXT: movq %r17, %rax
+; EGPR-NDD-NEXT: addq %rax, %rdi
+; EGPR-NDD-NEXT: adcq %rcx, %r9, %r8
+; EGPR-NDD-NEXT: setb %sil
+; EGPR-NDD-NEXT: movq %r16, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; EGPR-NDD-NEXT: movq %r16, %rax
; EGPR-NDD-NEXT: mulq %r11
; EGPR-NDD-NEXT: movq %rdx, %r28
; EGPR-NDD-NEXT: movq %rax, %r29
+; EGPR-NDD-NEXT: movq %r19, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; EGPR-NDD-NEXT: movq %r19, %rax
; EGPR-NDD-NEXT: mulq %r11
-; EGPR-NDD-NEXT: addq %r28, %rax, %r27
-; EGPR-NDD-NEXT: adcq $0, %rdx, %r28
-; EGPR-NDD-NEXT: movq %r17, %rax
-; EGPR-NDD-NEXT: mulq %r16
-; EGPR-NDD-NEXT: addq %rax, %r27
-; EGPR-NDD-NEXT: adcq %rdx, %r28
+; EGPR-NDD-NEXT: addq %rax, %r28
+; EGPR-NDD-NEXT: adcq $0, %rdx, %r9
+; EGPR-NDD-NEXT: movq %r16, %rax
+; EGPR-NDD-NEXT: mulq %r17
+; EGPR-NDD-NEXT: addq %rax, %r28
+; EGPR-NDD-NEXT: adcq %rdx, %r9
; EGPR-NDD-NEXT: setb %cl
; EGPR-NDD-NEXT: movq %r19, %rax
-; EGPR-NDD-NEXT: movq %r19, %r17
-; EGPR-NDD-NEXT: mulq %r16
-; EGPR-NDD-NEXT: addq %r28, %rax
+; EGPR-NDD-NEXT: mulq %r17
+; EGPR-NDD-NEXT: addq %r9, %rax
; EGPR-NDD-NEXT: movzbl %cl, %ecx
; EGPR-NDD-NEXT: adcq %rdx, %rcx
-; EGPR-NDD-NEXT: addq %r8, %r29, %rdx
-; EGPR-NDD-NEXT: adcq %r27, %rsi
-; EGPR-NDD-NEXT: movzbl %r9b, %r8d
-; EGPR-NDD-NEXT: adcq %r8, %rax
+; EGPR-NDD-NEXT: addq %r29, %rdi
+; EGPR-NDD-NEXT: adcq %r28, %r8
+; EGPR-NDD-NEXT: movzbl %sil, %edx
+; EGPR-NDD-NEXT: adcq %rdx, %rax
; EGPR-NDD-NEXT: adcq $0, %rcx
-; EGPR-NDD-NEXT: addq %r12, %r25, %r8
-; EGPR-NDD-NEXT: movq %r8, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; EGPR-NDD-NEXT: adcq %r13, %r23, %r8
-; EGPR-NDD-NEXT: movq %r8, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; EGPR-NDD-NEXT: adcq %rbp, %r10, %r8
-; EGPR-NDD-NEXT: movq %r8, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; EGPR-NDD-NEXT: adcq %rbx, %rdi
-; EGPR-NDD-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; EGPR-NDD-NEXT: adcq $0, %rdx
-; EGPR-NDD-NEXT: adcq $0, %rsi
+; EGPR-NDD-NEXT: addq %r12, %r25
+; EGPR-NDD-NEXT: movq %r25, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; EGPR-NDD-NEXT: adcq %r13, %r23, %r19
+; EGPR-NDD-NEXT: movq %r19, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; EGPR-NDD-NEXT: adcq %rbp, %r27
+; EGPR-NDD-NEXT: movq %r27, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; EGPR-NDD-NEXT: adcq %rbx, %r10
+; EGPR-NDD-NEXT: movq %r10, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; EGPR-NDD-NEXT: adcq $0, %rdi
+; EGPR-NDD-NEXT: adcq $0, %r8
; EGPR-NDD-NEXT: adcq $0, %rax
-; EGPR-NDD-NEXT: adcq $0, %rcx, %rdi
-; EGPR-NDD-NEXT: addq %rdx, {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Folded Reload
-; EGPR-NDD-NEXT: adcq %rsi, %r30, %r19
-; EGPR-NDD-NEXT: adcq %rax, %r31, %r30
-; EGPR-NDD-NEXT: adcq %rdi, {{[-0-9]+}}(%r{{[sb]}}p), %r31 # 8-byte Folded Reload
-; EGPR-NDD-NEXT: setb %bpl
-; EGPR-NDD-NEXT: movq %r15, %rax
+; EGPR-NDD-NEXT: adcq $0, %rcx
+; EGPR-NDD-NEXT: addq %rdi, {{[-0-9]+}}(%r{{[sb]}}p), %r19 # 8-byte Folded Reload
+; EGPR-NDD-NEXT: adcq %r8, %r30
+; EGPR-NDD-NEXT: adcq %rax, %r31
+; EGPR-NDD-NEXT: adcq %rcx, {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Folded Reload
+; EGPR-NDD-NEXT: setb %r8b
+; EGPR-NDD-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r13 # 8-byte Reload
+; EGPR-NDD-NEXT: movq %r13, %rax
; EGPR-NDD-NEXT: mulq %r26
; EGPR-NDD-NEXT: movq %rdx, %r25
; EGPR-NDD-NEXT: movq %rax, %r28
-; EGPR-NDD-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r9 # 8-byte Reload
-; EGPR-NDD-NEXT: movq %r9, %rax
+; EGPR-NDD-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r10 # 8-byte Reload
+; EGPR-NDD-NEXT: movq %r10, %rax
; EGPR-NDD-NEXT: mulq %r26
-; EGPR-NDD-NEXT: addq %r25, %rax, %rsi
-; EGPR-NDD-NEXT: adcq $0, %rdx, %rdi
-; EGPR-NDD-NEXT: movq %r15, %rax
-; EGPR-NDD-NEXT: movq %r15, %r13
+; EGPR-NDD-NEXT: addq %rax, %r25
+; EGPR-NDD-NEXT: adcq $0, %rdx, %rsi
+; EGPR-NDD-NEXT: movq %r13, %rax
; EGPR-NDD-NEXT: mulq %r18
-; EGPR-NDD-NEXT: addq %rax, %rsi
-; EGPR-NDD-NEXT: adcq %rdx, %rdi
-; EGPR-NDD-NEXT: setb %r8b
-; EGPR-NDD-NEXT: movq %r9, %rax
-; EGPR-NDD-NEXT: movq %r9, %r23
+; EGPR-NDD-NEXT: addq %r25, %rax, %rdi
+; EGPR-NDD-NEXT: adcq %rdx, %rsi
+; EGPR-NDD-NEXT: setb %r9b
+; EGPR-NDD-NEXT: movq %r10, %rax
+; EGPR-NDD-NEXT: movq %r10, %r16
; EGPR-NDD-NEXT: mulq %r18
-; EGPR-NDD-NEXT: addq %rax, %rdi
-; EGPR-NDD-NEXT: movzbl %r8b, %eax
-; EGPR-NDD-NEXT: adcq %rax, %rdx, %r8
-; EGPR-NDD-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r14 # 8-byte Reload
-; EGPR-NDD-NEXT: movq %r14, %rax
+; EGPR-NDD-NEXT: addq %rax, %rsi
+; EGPR-NDD-NEXT: movzbl %r9b, %eax
+; EGPR-NDD-NEXT: adcq %rax, %rdx, %r9
+; EGPR-NDD-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r23 # 8-byte Reload
+; EGPR-NDD-NEXT: movq %r23, %rax
; EGPR-NDD-NEXT: mulq %r26
; EGPR-NDD-NEXT: movq %rdx, %r29
; EGPR-NDD-NEXT: movq %rax, %r25
-; EGPR-NDD-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r15 # 8-byte Reload
-; EGPR-NDD-NEXT: movq %r15, %rax
+; EGPR-NDD-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r12 # 8-byte Reload
+; EGPR-NDD-NEXT: movq %r12, %rax
; EGPR-NDD-NEXT: mulq %r26
-; EGPR-NDD-NEXT: addq %r29, %rax, %r9
+; EGPR-NDD-NEXT: addq %rax, %r29
; EGPR-NDD-NEXT: adcq $0, %rdx, %r10
-; EGPR-NDD-NEXT: movq %r14, %rax
+; EGPR-NDD-NEXT: movq %r23, %rax
; EGPR-NDD-NEXT: mulq %r18
-; EGPR-NDD-NEXT: addq %r9, %rax, %rbx
-; EGPR-NDD-NEXT: adcq %rdx, %r10, %r9
-; EGPR-NDD-NEXT: setb %r10b
-; EGPR-NDD-NEXT: movq %r15, %rax
+; EGPR-NDD-NEXT: addq %r29, %rax, %rbx
+; EGPR-NDD-NEXT: adcq %rdx, %r10
+; EGPR-NDD-NEXT: setb %r27b
+; EGPR-NDD-NEXT: movq %r12, %rax
; EGPR-NDD-NEXT: mulq %r18
-; EGPR-NDD-NEXT: addq %r9, %rax
-; EGPR-NDD-NEXT: movzbl %r10b, %r9d
-; EGPR-NDD-NEXT: adcq %r9, %rdx
-; EGPR-NDD-NEXT: addq %rax, %r28, %r9
-; EGPR-NDD-NEXT: adcq %rdx, %rsi
-; EGPR-NDD-NEXT: adcq $0, %rdi
-; EGPR-NDD-NEXT: adcq $0, %r8
-; EGPR-NDD-NEXT: movq %r14, %rax
+; EGPR-NDD-NEXT: addq %r10, %rax
+; EGPR-NDD-NEXT: movzbl %r27b, %r10d
+; EGPR-NDD-NEXT: adcq %r10, %rdx
+; EGPR-NDD-NEXT: addq %rax, %r28, %r10
+; EGPR-NDD-NEXT: adcq %rdx, %rdi
+; EGPR-NDD-NEXT: adcq $0, %rsi
+; EGPR-NDD-NEXT: adcq $0, %r9
+; EGPR-NDD-NEXT: movq %r23, %rax
; EGPR-NDD-NEXT: mulq %r11
; EGPR-NDD-NEXT: movq %rdx, %r28
; EGPR-NDD-NEXT: movq %rax, %r29
-; EGPR-NDD-NEXT: movq %r15, %rax
+; EGPR-NDD-NEXT: movq %r12, %rax
; EGPR-NDD-NEXT: mulq %r11
-; EGPR-NDD-NEXT: addq %r28, %rax, %r10
+; EGPR-NDD-NEXT: addq %rax, %r28
; EGPR-NDD-NEXT: adcq $0, %rdx, %r27
-; EGPR-NDD-NEXT: movq %r14, %rax
-; EGPR-NDD-NEXT: mulq %r16
-; EGPR-NDD-NEXT: addq %rax, %r10
+; EGPR-NDD-NEXT: movq %r23, %rax
+; EGPR-NDD-NEXT: mulq %r17
+; EGPR-NDD-NEXT: addq %rax, %r28
; EGPR-NDD-NEXT: adcq %rdx, %r27
-; EGPR-NDD-NEXT: setb %r28b
-; EGPR-NDD-NEXT: movq %r15, %rax
-; EGPR-NDD-NEXT: mulq %r16
+; EGPR-NDD-NEXT: setb %bpl
+; EGPR-NDD-NEXT: movq %r12, %rax
+; EGPR-NDD-NEXT: mulq %r17
; EGPR-NDD-NEXT: addq %r27, %rax
-; EGPR-NDD-NEXT: movzbl %r28b, %r27d
+; EGPR-NDD-NEXT: movzbl %bpl, %r27d
; EGPR-NDD-NEXT: adcq %r27, %rdx
-; EGPR-NDD-NEXT: addq %r29, %r9
-; EGPR-NDD-NEXT: adcq %r10, %rsi
+; EGPR-NDD-NEXT: addq %r29, %r10
+; EGPR-NDD-NEXT: adcq %r28, %rdi
; EGPR-NDD-NEXT: adcq $0, %rax
; EGPR-NDD-NEXT: adcq $0, %rdx
-; EGPR-NDD-NEXT: addq %rax, %rdi
-; EGPR-NDD-NEXT: adcq %rdx, %r8
-; EGPR-NDD-NEXT: setb %r10b
+; EGPR-NDD-NEXT: addq %rax, %rsi
+; EGPR-NDD-NEXT: adcq %rdx, %r9
+; EGPR-NDD-NEXT: setb %r27b
; EGPR-NDD-NEXT: movq %r13, %rax
; EGPR-NDD-NEXT: mulq %r11
; EGPR-NDD-NEXT: movq %rdx, %r28
; EGPR-NDD-NEXT: movq %rax, %r29
-; EGPR-NDD-NEXT: movq %r23, %r14
-; EGPR-NDD-NEXT: movq %r23, %rax
+; EGPR-NDD-NEXT: movq %r16, %rax
; EGPR-NDD-NEXT: mulq %r11
-; EGPR-NDD-NEXT: addq %r28, %rax, %r27
-; EGPR-NDD-NEXT: adcq $0, %rdx, %r28
+; EGPR-NDD-NEXT: addq %rax, %r28
+; EGPR-NDD-NEXT: adcq $0, %rdx, %r12
; EGPR-NDD-NEXT: movq %r13, %rax
-; EGPR-NDD-NEXT: mulq %r16
-; EGPR-NDD-NEXT: addq %rax, %r27
-; EGPR-NDD-NEXT: adcq %rdx, %r28
-; EGPR-NDD-NEXT: setb %r15b
-; EGPR-NDD-NEXT: movq %r23, %rax
-; EGPR-NDD-NEXT: mulq %r16
-; EGPR-NDD-NEXT: addq %r28, %rax
-; EGPR-NDD-NEXT: movzbl %r15b, %r28d
-; EGPR-NDD-NEXT: adcq %r28, %rdx
-; EGPR-NDD-NEXT: addq %r29, %rdi
-; EGPR-NDD-NEXT: adcq %r27, %r8
-; EGPR-NDD-NEXT: movzbl %r10b, %r10d
-; EGPR-NDD-NEXT: adcq %r10, %rax
+; EGPR-NDD-NEXT: mulq %r17
+; EGPR-NDD-NEXT: addq %rax, %r28
+; EGPR-NDD-NEXT: adcq %rdx, %r12
+; EGPR-NDD-NEXT: setb %bpl
+; EGPR-NDD-NEXT: movq %r16, %rax
+; EGPR-NDD-NEXT: mulq %r17
+; EGPR-NDD-NEXT: addq %r12, %rax
+; EGPR-NDD-NEXT: movzbl %bpl, %r12d
+; EGPR-NDD-NEXT: adcq %r12, %rdx
+; EGPR-NDD-NEXT: addq %r29, %rsi
+; EGPR-NDD-NEXT: adcq %r28, %r9
+; EGPR-NDD-NEXT: movzbl %r27b, %r27d
+; EGPR-NDD-NEXT: adcq %r27, %rax
; EGPR-NDD-NEXT: adcq $0, %rdx
-; EGPR-NDD-NEXT: addq %r25, %rcx
-; EGPR-NDD-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; EGPR-NDD-NEXT: adcq %r19, %rbx, %rcx
-; EGPR-NDD-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; EGPR-NDD-NEXT: adcq %r30, %r9, %rcx
-; EGPR-NDD-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; EGPR-NDD-NEXT: adcq %r31, %rsi, %rcx
-; EGPR-NDD-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; EGPR-NDD-NEXT: movzbl %bpl, %ecx
+; EGPR-NDD-NEXT: addq %r25, %r19
+; EGPR-NDD-NEXT: movq %r19, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; EGPR-NDD-NEXT: adcq %rbx, %r30
+; EGPR-NDD-NEXT: movq %r30, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; EGPR-NDD-NEXT: adcq %r31, %r10
+; EGPR-NDD-NEXT: movq %r10, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; EGPR-NDD-NEXT: adcq %rdi, %rcx
; EGPR-NDD-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; EGPR-NDD-NEXT: adcq $0, %r8, %rcx
+; EGPR-NDD-NEXT: movzbl %r8b, %ecx
+; EGPR-NDD-NEXT: adcq %rsi, %rcx
; EGPR-NDD-NEXT: movq %rcx, (%rsp) # 8-byte Spill
+; EGPR-NDD-NEXT: adcq $0, %r9
+; EGPR-NDD-NEXT: movq %r9, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; EGPR-NDD-NEXT: adcq $0, %rax
; EGPR-NDD-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; EGPR-NDD-NEXT: adcq $0, %rdx, %rax
-; EGPR-NDD-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; EGPR-NDD-NEXT: adcq $0, %rdx
+; EGPR-NDD-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; EGPR-NDD-NEXT: movq 64(%r20), %r28
; EGPR-NDD-NEXT: movq %r24, %rax
; EGPR-NDD-NEXT: mulq %r28
; EGPR-NDD-NEXT: movq %rdx, %r25
; EGPR-NDD-NEXT: movq %rax, %r30
-; EGPR-NDD-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r23 # 8-byte Reload
-; EGPR-NDD-NEXT: movq %r23, %rax
+; EGPR-NDD-NEXT: movq %r14, %rax
; EGPR-NDD-NEXT: mulq %r28
-; EGPR-NDD-NEXT: addq %r25, %rax, %rcx
-; EGPR-NDD-NEXT: adcq $0, %rdx, %rsi
+; EGPR-NDD-NEXT: addq %rax, %r25
+; EGPR-NDD-NEXT: adcq $0, %rdx, %rcx
; EGPR-NDD-NEXT: movq 72(%r20), %r29
; EGPR-NDD-NEXT: movq %r24, %rax
; EGPR-NDD-NEXT: mulq %r29
-; EGPR-NDD-NEXT: addq %rax, %rcx
-; EGPR-NDD-NEXT: adcq %rdx, %rsi
-; EGPR-NDD-NEXT: setb %dil
-; EGPR-NDD-NEXT: movq %r23, %rax
+; EGPR-NDD-NEXT: addq %rax, %r25
+; EGPR-NDD-NEXT: adcq %rdx, %rcx
+; EGPR-NDD-NEXT: setb %sil
+; EGPR-NDD-NEXT: movq %r14, %rax
; EGPR-NDD-NEXT: mulq %r29
-; EGPR-NDD-NEXT: addq %rax, %rsi
-; EGPR-NDD-NEXT: movzbl %dil, %eax
-; EGPR-NDD-NEXT: adcq %rax, %rdx, %rdi
+; EGPR-NDD-NEXT: addq %rax, %rcx
+; EGPR-NDD-NEXT: movzbl %sil, %eax
+; EGPR-NDD-NEXT: adcq %rax, %rdx, %rsi
; EGPR-NDD-NEXT: movq %r22, %rax
; EGPR-NDD-NEXT: mulq %r28
; EGPR-NDD-NEXT: movq %rdx, %r31
; EGPR-NDD-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; EGPR-NDD-NEXT: movq %r21, %rax
; EGPR-NDD-NEXT: mulq %r28
-; EGPR-NDD-NEXT: addq %r31, %rax, %r8
-; EGPR-NDD-NEXT: adcq $0, %rdx, %r9
+; EGPR-NDD-NEXT: addq %rax, %r31
+; EGPR-NDD-NEXT: adcq $0, %rdx, %rdi
; EGPR-NDD-NEXT: movq %r22, %rax
; EGPR-NDD-NEXT: mulq %r29
-; EGPR-NDD-NEXT: addq %r8, %rax
+; EGPR-NDD-NEXT: addq %r31, %rax
; EGPR-NDD-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; EGPR-NDD-NEXT: adcq %rdx, %r9, %r8
-; EGPR-NDD-NEXT: setb %r9b
+; EGPR-NDD-NEXT: adcq %rdx, %rdi
+; EGPR-NDD-NEXT: setb %r8b
; EGPR-NDD-NEXT: movq %r21, %rax
; EGPR-NDD-NEXT: mulq %r29
-; EGPR-NDD-NEXT: addq %r8, %rax
-; EGPR-NDD-NEXT: movzbl %r9b, %r8d
-; EGPR-NDD-NEXT: adcq %r8, %rdx
-; EGPR-NDD-NEXT: addq %rax, %r30, %r8
-; EGPR-NDD-NEXT: adcq %rdx, %rcx
+; EGPR-NDD-NEXT: addq %rdi, %rax
+; EGPR-NDD-NEXT: movzbl %r8b, %edi
+; EGPR-NDD-NEXT: adcq %rdi, %rdx
+; EGPR-NDD-NEXT: addq %rax, %r30, %rdi
+; EGPR-NDD-NEXT: adcq %rdx, %r25
+; EGPR-NDD-NEXT: adcq $0, %rcx
; EGPR-NDD-NEXT: adcq $0, %rsi
-; EGPR-NDD-NEXT: adcq $0, %rdi
-; EGPR-NDD-NEXT: movq 80(%r20), %rbx
+; EGPR-NDD-NEXT: movq 80(%r20), %r8
; EGPR-NDD-NEXT: movq %r22, %rax
-; EGPR-NDD-NEXT: mulq %rbx
+; EGPR-NDD-NEXT: mulq %r8
; EGPR-NDD-NEXT: movq %rdx, %r30
; EGPR-NDD-NEXT: movq %rax, %r31
; EGPR-NDD-NEXT: movq %r21, %rax
-; EGPR-NDD-NEXT: mulq %rbx
-; EGPR-NDD-NEXT: addq %r30, %rax, %r9
-; EGPR-NDD-NEXT: adcq $0, %rdx, %r10
-; EGPR-NDD-NEXT: movq 88(%r20), %r15
+; EGPR-NDD-NEXT: mulq %r8
+; EGPR-NDD-NEXT: addq %rax, %r30
+; EGPR-NDD-NEXT: adcq $0, %rdx, %r9
+; EGPR-NDD-NEXT: movq 88(%r20), %rbx
; EGPR-NDD-NEXT: movq %r22, %rax
-; EGPR-NDD-NEXT: mulq %r15
-; EGPR-NDD-NEXT: addq %rax, %r9
-; EGPR-NDD-NEXT: adcq %rdx, %r10
-; EGPR-NDD-NEXT: setb %r19b
+; EGPR-NDD-NEXT: mulq %rbx
+; EGPR-NDD-NEXT: addq %rax, %r30
+; EGPR-NDD-NEXT: adcq %rdx, %r9
+; EGPR-NDD-NEXT: setb %r10b
; EGPR-NDD-NEXT: movq %r21, %rax
-; EGPR-NDD-NEXT: mulq %r15
-; EGPR-NDD-NEXT: addq %r10, %rax
-; EGPR-NDD-NEXT: movzbl %r19b, %r10d
-; EGPR-NDD-NEXT: adcq %r10, %rdx
-; EGPR-NDD-NEXT: addq %r31, %r8
-; EGPR-NDD-NEXT: movq %r8, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; EGPR-NDD-NEXT: adcq %r9, %rcx
-; EGPR-NDD-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; EGPR-NDD-NEXT: mulq %rbx
+; EGPR-NDD-NEXT: addq %r9, %rax
+; EGPR-NDD-NEXT: movzbl %r10b, %r9d
+; EGPR-NDD-NEXT: adcq %r9, %rdx
+; EGPR-NDD-NEXT: addq %r31, %rdi
+; EGPR-NDD-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; EGPR-NDD-NEXT: adcq %r25, %r30, %rbp
; EGPR-NDD-NEXT: adcq $0, %rax
-; EGPR-NDD-NEXT: adcq $0, %rdx, %rcx
-; EGPR-NDD-NEXT: addq %rax, %rsi
-; EGPR-NDD-NEXT: adcq %rdi, %rcx
+; EGPR-NDD-NEXT: adcq $0, %rdx
+; EGPR-NDD-NEXT: addq %rax, %rcx
+; EGPR-NDD-NEXT: adcq %rdx, %rsi
; EGPR-NDD-NEXT: setb %dil
; EGPR-NDD-NEXT: movq %r24, %rax
-; EGPR-NDD-NEXT: mulq %rbx
+; EGPR-NDD-NEXT: mulq %r8
; EGPR-NDD-NEXT: movq %rdx, %r30
; EGPR-NDD-NEXT: movq %rax, %r31
-; EGPR-NDD-NEXT: movq %r23, %rax
-; EGPR-NDD-NEXT: mulq %rbx
-; EGPR-NDD-NEXT: addq %r30, %rax, %r8
+; EGPR-NDD-NEXT: movq %r14, %rax
+; EGPR-NDD-NEXT: mulq %r8
+; EGPR-NDD-NEXT: addq %rax, %r30
; EGPR-NDD-NEXT: adcq $0, %rdx, %r9
; EGPR-NDD-NEXT: movq %r24, %rax
-; EGPR-NDD-NEXT: mulq %r15
-; EGPR-NDD-NEXT: addq %rax, %r8
+; EGPR-NDD-NEXT: mulq %rbx
+; EGPR-NDD-NEXT: addq %rax, %r30
; EGPR-NDD-NEXT: adcq %rdx, %r9
; EGPR-NDD-NEXT: setb %r10b
-; EGPR-NDD-NEXT: movq %r23, %rax
-; EGPR-NDD-NEXT: mulq %r15
+; EGPR-NDD-NEXT: movq %r14, %rax
+; EGPR-NDD-NEXT: mulq %rbx
; EGPR-NDD-NEXT: addq %r9, %rax
; EGPR-NDD-NEXT: movzbl %r10b, %r9d
; EGPR-NDD-NEXT: adcq %r9, %rdx
-; EGPR-NDD-NEXT: addq %rsi, %r31, %r25
-; EGPR-NDD-NEXT: adcq %rcx, %r8, %r19
-; EGPR-NDD-NEXT: movzbl %dil, %ecx
-; EGPR-NDD-NEXT: adcq %rcx, %rax, %r31
-; EGPR-NDD-NEXT: adcq $0, %rdx, %r12
-; EGPR-NDD-NEXT: imulq %r15, %r26, %rcx
+; EGPR-NDD-NEXT: addq %rcx, %r31, %r25
+; EGPR-NDD-NEXT: adcq %rsi, %r30, %r12
+; EGPR-NDD-NEXT: movzbl %dil, %r19d
+; EGPR-NDD-NEXT: adcq %rax, %r19
+; EGPR-NDD-NEXT: adcq $0, %rdx, %r31
+; EGPR-NDD-NEXT: imulq %r26, %rbx
; EGPR-NDD-NEXT: movq %r26, %rax
-; EGPR-NDD-NEXT: mulq %rbx
+; EGPR-NDD-NEXT: mulq %r8
; EGPR-NDD-NEXT: movq %rax, %r30
-; EGPR-NDD-NEXT: addq %rcx, %rdx, %rax
-; EGPR-NDD-NEXT: imulq %rbx, %r18, %rcx
-; EGPR-NDD-NEXT: addq %rax, %rcx
-; EGPR-NDD-NEXT: imulq %r29, %r11, %rsi
+; EGPR-NDD-NEXT: addq %rbx, %rdx
+; EGPR-NDD-NEXT: imulq %r18, %r8
+; EGPR-NDD-NEXT: addq %rdx, %r8
+; EGPR-NDD-NEXT: imulq %r29, %r11, %rcx
; EGPR-NDD-NEXT: movq %r11, %rax
; EGPR-NDD-NEXT: mulq %r28
-; EGPR-NDD-NEXT: addq %rsi, %rdx
-; EGPR-NDD-NEXT: imulq %r28, %r16, %rsi
-; EGPR-NDD-NEXT: addq %rsi, %rdx
+; EGPR-NDD-NEXT: addq %rdx, %rcx
+; EGPR-NDD-NEXT: imulq %r28, %r17, %r16
+; EGPR-NDD-NEXT: addq %r16, %rcx
; EGPR-NDD-NEXT: addq %r30, %rax, %rsi
-; EGPR-NDD-NEXT: adcq %rcx, %rdx, %rdi
+; EGPR-NDD-NEXT: adcq %rcx, %r8
; EGPR-NDD-NEXT: movq %r28, %rax
; EGPR-NDD-NEXT: mulq %r26
; EGPR-NDD-NEXT: movq %rdx, %r30
@@ -1601,215 +1593,215 @@ define void @test_1024(ptr %a, ptr %b, ptr %out) nounwind {
; EGPR-NDD-NEXT: movq %r29, %rax
; EGPR-NDD-NEXT: mulq %r26
; EGPR-NDD-NEXT: addq %r30, %rax, %rcx
-; EGPR-NDD-NEXT: adcq $0, %rdx, %r8
+; EGPR-NDD-NEXT: adcq $0, %rdx, %rdi
; EGPR-NDD-NEXT: movq %r28, %rax
; EGPR-NDD-NEXT: mulq %r18
; EGPR-NDD-NEXT: addq %rax, %rcx
-; EGPR-NDD-NEXT: adcq %rdx, %r8
+; EGPR-NDD-NEXT: adcq %rdx, %rdi
; EGPR-NDD-NEXT: setb %r9b
; EGPR-NDD-NEXT: movq %r29, %rax
; EGPR-NDD-NEXT: mulq %r18
-; EGPR-NDD-NEXT: addq %r8, %rax
-; EGPR-NDD-NEXT: movzbl %r9b, %r8d
-; EGPR-NDD-NEXT: adcq %r8, %rdx
+; EGPR-NDD-NEXT: addq %rdi, %rax
+; EGPR-NDD-NEXT: movzbl %r9b, %edi
+; EGPR-NDD-NEXT: adcq %rdi, %rdx
; EGPR-NDD-NEXT: addq %rax, %rsi
-; EGPR-NDD-NEXT: adcq %rdi, %rdx, %r29
+; EGPR-NDD-NEXT: adcq %rdx, %r8
; EGPR-NDD-NEXT: movq 112(%r20), %rdi
; EGPR-NDD-NEXT: movq %r22, %rax
; EGPR-NDD-NEXT: mulq %rdi
; EGPR-NDD-NEXT: movq %rax, %r26
-; EGPR-NDD-NEXT: imulq %rdi, %r21, %rax
-; EGPR-NDD-NEXT: addq %rdx, %rax
-; EGPR-NDD-NEXT: imulq 120(%r20), %r22, %rdx
-; EGPR-NDD-NEXT: addq %rdx, %rax, %r8
+; EGPR-NDD-NEXT: imulq %r21, %rdi
+; EGPR-NDD-NEXT: addq %rdi, %rdx
+; EGPR-NDD-NEXT: imulq 120(%r20), %r22, %rax
+; EGPR-NDD-NEXT: addq %rax, %rdx, %r9
; EGPR-NDD-NEXT: movq 96(%r20), %r28
; EGPR-NDD-NEXT: movq 104(%r20), %rdi
-; EGPR-NDD-NEXT: imulq %rdi, %r24, %r9
+; EGPR-NDD-NEXT: imulq %rdi, %r24, %r10
; EGPR-NDD-NEXT: movq %r24, %rax
; EGPR-NDD-NEXT: mulq %r28
-; EGPR-NDD-NEXT: addq %r9, %rdx
-; EGPR-NDD-NEXT: imulq %r28, %r23, %r9
-; EGPR-NDD-NEXT: addq %r9, %rdx
-; EGPR-NDD-NEXT: addq %r26, %rax, %r9
-; EGPR-NDD-NEXT: adcq %rdx, %r8
+; EGPR-NDD-NEXT: addq %r10, %rdx
+; EGPR-NDD-NEXT: imulq %r28, %r14, %r23
+; EGPR-NDD-NEXT: addq %r23, %rdx
+; EGPR-NDD-NEXT: addq %rax, %r26
+; EGPR-NDD-NEXT: adcq %rdx, %r9
; EGPR-NDD-NEXT: movq %r28, %rax
; EGPR-NDD-NEXT: mulq %r22
; EGPR-NDD-NEXT: movq %rdx, %r23
; EGPR-NDD-NEXT: movq %rax, %r24
; EGPR-NDD-NEXT: movq %rdi, %rax
; EGPR-NDD-NEXT: mulq %r22
-; EGPR-NDD-NEXT: addq %r23, %rax, %r10
-; EGPR-NDD-NEXT: adcq $0, %rdx, %r11
+; EGPR-NDD-NEXT: addq %rax, %r23
+; EGPR-NDD-NEXT: adcq $0, %rdx, %r10
; EGPR-NDD-NEXT: movq %r28, %rax
; EGPR-NDD-NEXT: mulq %r21
-; EGPR-NDD-NEXT: addq %rax, %r10
-; EGPR-NDD-NEXT: adcq %rdx, %r11
-; EGPR-NDD-NEXT: setb %r16b
+; EGPR-NDD-NEXT: addq %rax, %r23
+; EGPR-NDD-NEXT: adcq %rdx, %r10
+; EGPR-NDD-NEXT: setb %r11b
; EGPR-NDD-NEXT: movq %rdi, %rax
; EGPR-NDD-NEXT: mulq %r21
-; EGPR-NDD-NEXT: addq %r11, %rax
-; EGPR-NDD-NEXT: movzbl %r16b, %edi
+; EGPR-NDD-NEXT: addq %r10, %rax
+; EGPR-NDD-NEXT: movzbl %r11b, %edi
; EGPR-NDD-NEXT: adcq %rdi, %rdx
-; EGPR-NDD-NEXT: addq %r9, %rax
-; EGPR-NDD-NEXT: adcq %r8, %rdx
-; EGPR-NDD-NEXT: addq %r27, %r24, %rdi
-; EGPR-NDD-NEXT: adcq %r10, %rcx
+; EGPR-NDD-NEXT: addq %r26, %rax
+; EGPR-NDD-NEXT: adcq %r9, %rdx
+; EGPR-NDD-NEXT: addq %r27, %r24
+; EGPR-NDD-NEXT: adcq %r23, %rcx
; EGPR-NDD-NEXT: adcq %rsi, %rax
-; EGPR-NDD-NEXT: adcq %r29, %rdx
-; EGPR-NDD-NEXT: addq %rdi, %r25, %r15
-; EGPR-NDD-NEXT: adcq %rcx, %r19, %rbx
-; EGPR-NDD-NEXT: adcq %rax, %r31, %rbp
-; EGPR-NDD-NEXT: adcq %rdx, %r12, %r30
-; EGPR-NDD-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r18 # 8-byte Reload
-; EGPR-NDD-NEXT: movq 80(%r18), %r22
+; EGPR-NDD-NEXT: adcq %r8, %rdx
+; EGPR-NDD-NEXT: addq %r24, %r25, %rbx
+; EGPR-NDD-NEXT: adcq %rcx, %r12
+; EGPR-NDD-NEXT: adcq %rax, %r19, %r13
+; EGPR-NDD-NEXT: adcq %rdx, %r31, %r30
+; EGPR-NDD-NEXT: movq 80(%r15), %r22
; EGPR-NDD-NEXT: movq %r22, %rax
-; EGPR-NDD-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r21 # 8-byte Reload
-; EGPR-NDD-NEXT: mulq %r21
+; EGPR-NDD-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r16 # 8-byte Reload
+; EGPR-NDD-NEXT: mulq %r16
; EGPR-NDD-NEXT: movq %rax, %r26
; EGPR-NDD-NEXT: movq %rdx, %rdi
-; EGPR-NDD-NEXT: movq 88(%r18), %r20
+; EGPR-NDD-NEXT: movq 88(%r15), %r20
; EGPR-NDD-NEXT: movq %r20, %rax
-; EGPR-NDD-NEXT: mulq %r21
-; EGPR-NDD-NEXT: addq %rdi, %rax, %rcx
-; EGPR-NDD-NEXT: adcq $0, %rdx, %rsi
+; EGPR-NDD-NEXT: mulq %r16
+; EGPR-NDD-NEXT: addq %rax, %rdi
+; EGPR-NDD-NEXT: adcq $0, %rdx, %rcx
; EGPR-NDD-NEXT: movq %r22, %rax
-; EGPR-NDD-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r12 # 8-byte Reload
-; EGPR-NDD-NEXT: mulq %r12
-; EGPR-NDD-NEXT: addq %rax, %rcx
-; EGPR-NDD-NEXT: adcq %rdx, %rsi
-; EGPR-NDD-NEXT: setb %dil
+; EGPR-NDD-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r21 # 8-byte Reload
+; EGPR-NDD-NEXT: mulq %r21
+; EGPR-NDD-NEXT: addq %rax, %rdi
+; EGPR-NDD-NEXT: adcq %rdx, %rcx
+; EGPR-NDD-NEXT: setb %sil
; EGPR-NDD-NEXT: movq %r20, %rax
-; EGPR-NDD-NEXT: mulq %r12
-; EGPR-NDD-NEXT: addq %rax, %rsi
-; EGPR-NDD-NEXT: movzbl %dil, %eax
-; EGPR-NDD-NEXT: adcq %rax, %rdx, %rdi
-; EGPR-NDD-NEXT: movq 64(%r18), %r24
-; EGPR-NDD-NEXT: movq %r24, %rax
; EGPR-NDD-NEXT: mulq %r21
+; EGPR-NDD-NEXT: addq %rax, %rcx
+; EGPR-NDD-NEXT: movzbl %sil, %eax
+; EGPR-NDD-NEXT: adcq %rax, %rdx, %rsi
+; EGPR-NDD-NEXT: movq 64(%r15), %r24
+; EGPR-NDD-NEXT: movq %r24, %rax
+; EGPR-NDD-NEXT: mulq %r16
; EGPR-NDD-NEXT: movq %rax, %r29
; EGPR-NDD-NEXT: movq %rdx, %r27
-; EGPR-NDD-NEXT: movq 72(%r18), %r23
+; EGPR-NDD-NEXT: movq 72(%r15), %r23
; EGPR-NDD-NEXT: movq %r23, %rax
-; EGPR-NDD-NEXT: mulq %r21
-; EGPR-NDD-NEXT: addq %r27, %rax, %r8
-; EGPR-NDD-NEXT: adcq $0, %rdx, %r9
+; EGPR-NDD-NEXT: mulq %r16
+; EGPR-NDD-NEXT: addq %rax, %r27
+; EGPR-NDD-NEXT: adcq $0, %rdx, %r8
; EGPR-NDD-NEXT: movq %r24, %rax
-; EGPR-NDD-NEXT: mulq %r12
-; EGPR-NDD-NEXT: addq %r8, %rax, %r31
-; EGPR-NDD-NEXT: adcq %rdx, %r9, %r8
+; EGPR-NDD-NEXT: mulq %r21
+; EGPR-NDD-NEXT: addq %r27, %rax, %r31
+; EGPR-NDD-NEXT: adcq %rdx, %r8
; EGPR-NDD-NEXT: setb %r9b
; EGPR-NDD-NEXT: movq %r23, %rax
-; EGPR-NDD-NEXT: mulq %r12
+; EGPR-NDD-NEXT: mulq %r21
; EGPR-NDD-NEXT: addq %r8, %rax
; EGPR-NDD-NEXT: movzbl %r9b, %r8d
; EGPR-NDD-NEXT: adcq %r8, %rdx
-; EGPR-NDD-NEXT: addq %rax, %r26, %r8
-; EGPR-NDD-NEXT: adcq %rdx, %rcx
+; EGPR-NDD-NEXT: addq %rax, %r26, %r28
+; EGPR-NDD-NEXT: adcq %rdx, %rdi
+; EGPR-NDD-NEXT: adcq $0, %rcx
; EGPR-NDD-NEXT: adcq $0, %rsi
-; EGPR-NDD-NEXT: adcq $0, %rdi
; EGPR-NDD-NEXT: movq %r24, %rax
-; EGPR-NDD-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r16 # 8-byte Reload
-; EGPR-NDD-NEXT: mulq %r16
+; EGPR-NDD-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r10 # 8-byte Reload
+; EGPR-NDD-NEXT: mulq %r10
; EGPR-NDD-NEXT: movq %rdx, %r26
; EGPR-NDD-NEXT: movq %rax, %r27
; EGPR-NDD-NEXT: movq %r23, %rax
-; EGPR-NDD-NEXT: mulq %r16
-; EGPR-NDD-NEXT: addq %r26, %rax, %r9
-; EGPR-NDD-NEXT: adcq $0, %rdx, %r10
+; EGPR-NDD-NEXT: mulq %r10
+; EGPR-NDD-NEXT: addq %rax, %r26
+; EGPR-NDD-NEXT: adcq $0, %rdx, %r8
; EGPR-NDD-NEXT: movq %r24, %rax
-; EGPR-NDD-NEXT: mulq %r17
-; EGPR-NDD-NEXT: addq %rax, %r9
-; EGPR-NDD-NEXT: adcq %rdx, %r10
-; EGPR-NDD-NEXT: setb %r11b
+; EGPR-NDD-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r11 # 8-byte Reload
+; EGPR-NDD-NEXT: mulq %r11
+; EGPR-NDD-NEXT: addq %r26, %rax, %r25
+; EGPR-NDD-NEXT: adcq %rdx, %r8
+; EGPR-NDD-NEXT: setb %r9b
; EGPR-NDD-NEXT: movq %r23, %rax
-; EGPR-NDD-NEXT: mulq %r17
-; EGPR-NDD-NEXT: addq %r10, %rax
-; EGPR-NDD-NEXT: movzbl %r11b, %r10d
-; EGPR-NDD-NEXT: adcq %r10, %rdx
-; EGPR-NDD-NEXT: addq %r8, %r27, %r28
-; EGPR-NDD-NEXT: adcq %rcx, %r9, %r25
+; EGPR-NDD-NEXT: mulq %r11
+; EGPR-NDD-NEXT: addq %r8, %rax
+; EGPR-NDD-NEXT: movzbl %r9b, %r8d
+; EGPR-NDD-NEXT: adcq %r8, %rdx
+; EGPR-NDD-NEXT: addq %r27, %r28
+; EGPR-NDD-NEXT: adcq %rdi, %r25
; EGPR-NDD-NEXT: adcq $0, %rax
-; EGPR-NDD-NEXT: adcq $0, %rdx, %rcx
-; EGPR-NDD-NEXT: addq %rax, %rsi
-; EGPR-NDD-NEXT: adcq %rdi, %rcx
+; EGPR-NDD-NEXT: adcq $0, %rdx
+; EGPR-NDD-NEXT: addq %rax, %rcx
+; EGPR-NDD-NEXT: adcq %rdx, %rsi
; EGPR-NDD-NEXT: setb %dil
; EGPR-NDD-NEXT: movq %r22, %rax
-; EGPR-NDD-NEXT: mulq %r16
+; EGPR-NDD-NEXT: mulq %r10
; EGPR-NDD-NEXT: movq %rdx, %r26
; EGPR-NDD-NEXT: movq %rax, %r27
; EGPR-NDD-NEXT: movq %r20, %rax
-; EGPR-NDD-NEXT: mulq %r16
-; EGPR-NDD-NEXT: addq %r26, %rax, %r8
-; EGPR-NDD-NEXT: adcq $0, %rdx, %r9
+; EGPR-NDD-NEXT: mulq %r10
+; EGPR-NDD-NEXT: addq %rax, %r26
+; EGPR-NDD-NEXT: adcq $0, %rdx, %r8
; EGPR-NDD-NEXT: movq %r22, %rax
-; EGPR-NDD-NEXT: mulq %r17
-; EGPR-NDD-NEXT: addq %rax, %r8
-; EGPR-NDD-NEXT: adcq %rdx, %r9
-; EGPR-NDD-NEXT: setb %r10b
+; EGPR-NDD-NEXT: mulq %r11
+; EGPR-NDD-NEXT: addq %r26, %rax, %r19
+; EGPR-NDD-NEXT: adcq %rdx, %r8
+; EGPR-NDD-NEXT: setb %r9b
; EGPR-NDD-NEXT: movq %r20, %rax
-; EGPR-NDD-NEXT: mulq %r17
-; EGPR-NDD-NEXT: addq %r9, %rax
-; EGPR-NDD-NEXT: movzbl %r10b, %r9d
-; EGPR-NDD-NEXT: adcq %r9, %rdx
-; EGPR-NDD-NEXT: addq %rsi, %r27
-; EGPR-NDD-NEXT: adcq %rcx, %r8, %r19
+; EGPR-NDD-NEXT: mulq %r11
+; EGPR-NDD-NEXT: addq %r8, %rax
+; EGPR-NDD-NEXT: movzbl %r9b, %r8d
+; EGPR-NDD-NEXT: adcq %r8, %rdx
+; EGPR-NDD-NEXT: addq %rcx, %r27
+; EGPR-NDD-NEXT: adcq %rsi, %r19
; EGPR-NDD-NEXT: movzbl %dil, %ecx
; EGPR-NDD-NEXT: adcq %rax, %rcx
; EGPR-NDD-NEXT: adcq $0, %rdx, %rdi
-; EGPR-NDD-NEXT: movq %r18, %r9
-; EGPR-NDD-NEXT: movq 96(%r18), %r26
-; EGPR-NDD-NEXT: imulq %r17, %r26, %rsi
+; EGPR-NDD-NEXT: movq 96(%r15), %r26
+; EGPR-NDD-NEXT: imulq %r11, %r26, %rsi
; EGPR-NDD-NEXT: movq %r26, %rax
-; EGPR-NDD-NEXT: mulq %r16
+; EGPR-NDD-NEXT: mulq %r10
; EGPR-NDD-NEXT: movq %rax, %r18
-; EGPR-NDD-NEXT: addq %rsi, %rdx, %rax
-; EGPR-NDD-NEXT: movq 104(%r9), %r8
-; EGPR-NDD-NEXT: imulq %r16, %r8, %rdx
-; EGPR-NDD-NEXT: addq %rdx, %rax, %rsi
-; EGPR-NDD-NEXT: movq 112(%r9), %rax
-; EGPR-NDD-NEXT: movq %r9, %r11
-; EGPR-NDD-NEXT: imulq %r12, %rax, %r9
-; EGPR-NDD-NEXT: mulq %r21
+; EGPR-NDD-NEXT: addq %rsi, %rdx
+; EGPR-NDD-NEXT: movq 104(%r15), %r8
+; EGPR-NDD-NEXT: imulq %r10, %r8, %rax
+; EGPR-NDD-NEXT: addq %rax, %rdx, %rsi
+; EGPR-NDD-NEXT: movq 112(%r15), %rax
+; EGPR-NDD-NEXT: imulq %r21, %rax, %r9
+; EGPR-NDD-NEXT: mulq %r16
; EGPR-NDD-NEXT: addq %r9, %rdx
-; EGPR-NDD-NEXT: imulq 120(%r11), %r21, %r9
+; EGPR-NDD-NEXT: imulq 120(%r15), %r16, %r9
; EGPR-NDD-NEXT: addq %r9, %rdx
-; EGPR-NDD-NEXT: addq %r18, %rax, %r9
-; EGPR-NDD-NEXT: adcq %rsi, %rdx, %r16
-; EGPR-NDD-NEXT: movq %r21, %rax
+; EGPR-NDD-NEXT: addq %r18, %rax, %r10
+; EGPR-NDD-NEXT: adcq %rsi, %rdx, %r9
+; EGPR-NDD-NEXT: movq %r16, %rax
+; EGPR-NDD-NEXT: movq %r16, %r18
; EGPR-NDD-NEXT: mulq %r26
; EGPR-NDD-NEXT: movq %rdx, %r17
; EGPR-NDD-NEXT: movq %rax, %rsi
-; EGPR-NDD-NEXT: movq %r12, %rax
-; EGPR-NDD-NEXT: mulq %r26
-; EGPR-NDD-NEXT: addq %r17, %rax, %r10
-; EGPR-NDD-NEXT: adcq $0, %rdx, %r17
; EGPR-NDD-NEXT: movq %r21, %rax
+; EGPR-NDD-NEXT: mulq %r26
+; EGPR-NDD-NEXT: addq %r17, %rax, %r11
+; EGPR-NDD-NEXT: adcq $0, %rdx, %r16
+; EGPR-NDD-NEXT: movq %r18, %rax
; EGPR-NDD-NEXT: mulq %r8
-; EGPR-NDD-NEXT: addq %r10, %rax, %r11
-; EGPR-NDD-NEXT: adcq %rdx, %r17, %r10
+; EGPR-NDD-NEXT: addq %rax, %r11
+; EGPR-NDD-NEXT: adcq %rdx, %r16
; EGPR-NDD-NEXT: setb %r17b
-; EGPR-NDD-NEXT: movq %r12, %rax
+; EGPR-NDD-NEXT: movq %r21, %rax
; EGPR-NDD-NEXT: mulq %r8
-; EGPR-NDD-NEXT: addq %r10, %rax
+; EGPR-NDD-NEXT: addq %r16, %rax
; EGPR-NDD-NEXT: movzbl %r17b, %r8d
; EGPR-NDD-NEXT: adcq %r8, %rdx
-; EGPR-NDD-NEXT: addq %r9, %rax, %r10
-; EGPR-NDD-NEXT: adcq %r16, %rdx, %r17
-; EGPR-NDD-NEXT: imulq %r14, %r24, %r8
+; EGPR-NDD-NEXT: addq %rax, %r10
+; EGPR-NDD-NEXT: adcq %r9, %rdx, %r17
+; EGPR-NDD-NEXT: imulq {{[-0-9]+}}(%r{{[sb]}}p), %r24, %r8 # 8-byte Folded Reload
; EGPR-NDD-NEXT: movq %r24, %rax
-; EGPR-NDD-NEXT: mulq %r13
+; EGPR-NDD-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r16 # 8-byte Reload
+; EGPR-NDD-NEXT: mulq %r16
; EGPR-NDD-NEXT: movq %rax, %r9
-; EGPR-NDD-NEXT: addq %r8, %rdx, %rax
-; EGPR-NDD-NEXT: imulq %r13, %r23, %rdx
-; EGPR-NDD-NEXT: addq %rdx, %rax, %r8
+; EGPR-NDD-NEXT: addq %r8, %rdx
+; EGPR-NDD-NEXT: imulq %r16, %r23, %rax
+; EGPR-NDD-NEXT: addq %rax, %rdx, %r8
; EGPR-NDD-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r21 # 8-byte Reload
; EGPR-NDD-NEXT: imulq %r21, %r22, %r16
; EGPR-NDD-NEXT: movq %r22, %rax
; EGPR-NDD-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r26 # 8-byte Reload
; EGPR-NDD-NEXT: mulq %r26
; EGPR-NDD-NEXT: addq %r16, %rdx
-; EGPR-NDD-NEXT: imulq %r26, %r20, %r16
-; EGPR-NDD-NEXT: addq %r16, %rdx
+; EGPR-NDD-NEXT: imulq %r26, %r20
+; EGPR-NDD-NEXT: addq %r20, %rdx
; EGPR-NDD-NEXT: addq %r9, %rax, %r16
; EGPR-NDD-NEXT: adcq %r8, %rdx, %r18
; EGPR-NDD-NEXT: movq %r26, %rax
@@ -1840,49 +1832,49 @@ define void @test_1024(ptr %a, ptr %b, ptr %out) nounwind {
; EGPR-NDD-NEXT: addq %r27, %rsi
; EGPR-NDD-NEXT: adcq %r19, %r8
; EGPR-NDD-NEXT: adcq %rcx, %rax
-; EGPR-NDD-NEXT: adcq %rdx, %rdi, %rcx
-; EGPR-NDD-NEXT: addq {{[-0-9]+}}(%r{{[sb]}}p), %r29, %rdx # 8-byte Folded Reload
-; EGPR-NDD-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %r31, %rdi # 8-byte Folded Reload
-; EGPR-NDD-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %r28, %r9 # 8-byte Folded Reload
-; EGPR-NDD-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %r25, %r10 # 8-byte Folded Reload
-; EGPR-NDD-NEXT: adcq %r15, %rsi
-; EGPR-NDD-NEXT: adcq %rbx, %r8
-; EGPR-NDD-NEXT: adcq %rbp, %rax
-; EGPR-NDD-NEXT: adcq %r30, %rcx
-; EGPR-NDD-NEXT: addq %rdx, {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Folded Reload
-; EGPR-NDD-NEXT: adcq %rdi, {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Folded Reload
-; EGPR-NDD-NEXT: adcq %r9, {{[-0-9]+}}(%r{{[sb]}}p), %r9 # 8-byte Folded Reload
-; EGPR-NDD-NEXT: adcq %r10, {{[-0-9]+}}(%r{{[sb]}}p), %r10 # 8-byte Folded Reload
-; EGPR-NDD-NEXT: adcq %rsi, {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Folded Reload
-; EGPR-NDD-NEXT: adcq %r8, (%rsp), %r8 # 8-byte Folded Reload
+; EGPR-NDD-NEXT: adcq %rdi, %rdx
+; EGPR-NDD-NEXT: addq {{[-0-9]+}}(%r{{[sb]}}p), %r29 # 8-byte Folded Reload
+; EGPR-NDD-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %r31 # 8-byte Folded Reload
+; EGPR-NDD-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %r28 # 8-byte Folded Reload
+; EGPR-NDD-NEXT: adcq %rbp, %r25
+; EGPR-NDD-NEXT: adcq %rbx, %rsi
+; EGPR-NDD-NEXT: adcq %r12, %r8
+; EGPR-NDD-NEXT: adcq %r13, %rax
+; EGPR-NDD-NEXT: adcq %r30, %rdx
+; EGPR-NDD-NEXT: addq %r29, {{[-0-9]+}}(%r{{[sb]}}p), %r29 # 8-byte Folded Reload
+; EGPR-NDD-NEXT: adcq %r31, {{[-0-9]+}}(%r{{[sb]}}p), %r31 # 8-byte Folded Reload
+; EGPR-NDD-NEXT: adcq %r28, {{[-0-9]+}}(%r{{[sb]}}p), %r28 # 8-byte Folded Reload
+; EGPR-NDD-NEXT: adcq %r25, {{[-0-9]+}}(%r{{[sb]}}p), %r25 # 8-byte Folded Reload
+; EGPR-NDD-NEXT: adcq %rsi, (%rsp), %rsi # 8-byte Folded Reload
+; EGPR-NDD-NEXT: adcq %r8, {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Folded Reload
; EGPR-NDD-NEXT: adcq %rax, {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Folded Reload
-; EGPR-NDD-NEXT: adcq %rcx, {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Folded Reload
-; EGPR-NDD-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r11 # 8-byte Reload
-; EGPR-NDD-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r16 # 8-byte Reload
-; EGPR-NDD-NEXT: movq %r16, (%r11)
-; EGPR-NDD-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r16 # 8-byte Reload
-; EGPR-NDD-NEXT: movq %r16, 8(%r11)
-; EGPR-NDD-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r16 # 8-byte Reload
-; EGPR-NDD-NEXT: movq %r16, 16(%r11)
-; EGPR-NDD-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r16 # 8-byte Reload
-; EGPR-NDD-NEXT: movq %r16, 24(%r11)
-; EGPR-NDD-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r16 # 8-byte Reload
-; EGPR-NDD-NEXT: movq %r16, 32(%r11)
-; EGPR-NDD-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r16 # 8-byte Reload
-; EGPR-NDD-NEXT: movq %r16, 40(%r11)
-; EGPR-NDD-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r16 # 8-byte Reload
-; EGPR-NDD-NEXT: movq %r16, 48(%r11)
-; EGPR-NDD-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r16 # 8-byte Reload
-; EGPR-NDD-NEXT: movq %r16, 56(%r11)
-; EGPR-NDD-NEXT: movq %rdx, 64(%r11)
-; EGPR-NDD-NEXT: movq %rdi, 72(%r11)
-; EGPR-NDD-NEXT: movq %r9, 80(%r11)
-; EGPR-NDD-NEXT: movq %r10, 88(%r11)
-; EGPR-NDD-NEXT: movq %rsi, 96(%r11)
-; EGPR-NDD-NEXT: movq %r8, 104(%r11)
-; EGPR-NDD-NEXT: movq %rax, 112(%r11)
-; EGPR-NDD-NEXT: movq %rcx, 120(%r11)
-; EGPR-NDD-NEXT: addq $104, %rsp
+; EGPR-NDD-NEXT: adcq %rdx, {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Folded Reload
+; EGPR-NDD-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
+; EGPR-NDD-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Reload
+; EGPR-NDD-NEXT: movq %rdi, (%rcx)
+; EGPR-NDD-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Reload
+; EGPR-NDD-NEXT: movq %rdi, 8(%rcx)
+; EGPR-NDD-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Reload
+; EGPR-NDD-NEXT: movq %rdi, 16(%rcx)
+; EGPR-NDD-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Reload
+; EGPR-NDD-NEXT: movq %rdi, 24(%rcx)
+; EGPR-NDD-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Reload
+; EGPR-NDD-NEXT: movq %rdi, 32(%rcx)
+; EGPR-NDD-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Reload
+; EGPR-NDD-NEXT: movq %rdi, 40(%rcx)
+; EGPR-NDD-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Reload
+; EGPR-NDD-NEXT: movq %rdi, 48(%rcx)
+; EGPR-NDD-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Reload
+; EGPR-NDD-NEXT: movq %rdi, 56(%rcx)
+; EGPR-NDD-NEXT: movq %r29, 64(%rcx)
+; EGPR-NDD-NEXT: movq %r31, 72(%rcx)
+; EGPR-NDD-NEXT: movq %r28, 80(%rcx)
+; EGPR-NDD-NEXT: movq %r25, 88(%rcx)
+; EGPR-NDD-NEXT: movq %rsi, 96(%rcx)
+; EGPR-NDD-NEXT: movq %r8, 104(%rcx)
+; EGPR-NDD-NEXT: movq %rax, 112(%rcx)
+; EGPR-NDD-NEXT: movq %rdx, 120(%rcx)
+; EGPR-NDD-NEXT: addq $96, %rsp
; EGPR-NDD-NEXT: popq %rbx
; EGPR-NDD-NEXT: popq %r12
; EGPR-NDD-NEXT: popq %r13
diff --git a/llvm/test/CodeGen/X86/apx/or.ll b/llvm/test/CodeGen/X86/apx/or.ll
index 6a3db295c8c1..e51ba9d9bf03 100644
--- a/llvm/test/CodeGen/X86/apx/or.ll
+++ b/llvm/test/CodeGen/X86/apx/or.ll
@@ -478,17 +478,17 @@ define i1 @orflag16rr(i16 %a, i16 %b) {
define i1 @orflag32rr(i32 %a, i32 %b) {
; CHECK-LABEL: orflag32rr:
; CHECK: # %bb.0:
-; CHECK-NEXT: orl %esi, %edi, %ecx # encoding: [0x62,0xf4,0x74,0x18,0x09,0xf7]
+; CHECK-NEXT: orl %edi, %esi # EVEX TO LEGACY Compression encoding: [0x09,0xfe]
; CHECK-NEXT: sete %al # encoding: [0x0f,0x94,0xc0]
-; CHECK-NEXT: movl %ecx, d64(%rip) # encoding: [0x89,0x0d,A,A,A,A]
+; CHECK-NEXT: movl %esi, d64(%rip) # encoding: [0x89,0x35,A,A,A,A]
; CHECK-NEXT: # fixup A - offset: 2, value: d64-4, kind: reloc_riprel_4byte
; CHECK-NEXT: retq # encoding: [0xc3]
;
; NF-LABEL: orflag32rr:
; NF: # %bb.0:
-; NF-NEXT: orl %esi, %edi, %ecx # encoding: [0x62,0xf4,0x74,0x18,0x09,0xf7]
+; NF-NEXT: orl %edi, %esi # EVEX TO LEGACY Compression encoding: [0x09,0xfe]
; NF-NEXT: sete %al # encoding: [0x0f,0x94,0xc0]
-; NF-NEXT: movl %ecx, d64(%rip) # encoding: [0x89,0x0d,A,A,A,A]
+; NF-NEXT: movl %esi, d64(%rip) # encoding: [0x89,0x35,A,A,A,A]
; NF-NEXT: # fixup A - offset: 2, value: d64-4, kind: reloc_riprel_4byte
; NF-NEXT: retq # encoding: [0xc3]
%v0 = or i32 %a, %b ; 0xff << 50
@@ -500,17 +500,17 @@ define i1 @orflag32rr(i32 %a, i32 %b) {
define i1 @orflag64rr(i64 %a, i64 %b) {
; CHECK-LABEL: orflag64rr:
; CHECK: # %bb.0:
-; CHECK-NEXT: orq %rsi, %rdi, %rcx # encoding: [0x62,0xf4,0xf4,0x18,0x09,0xf7]
+; CHECK-NEXT: orq %rdi, %rsi # EVEX TO LEGACY Compression encoding: [0x48,0x09,0xfe]
; CHECK-NEXT: sete %al # encoding: [0x0f,0x94,0xc0]
-; CHECK-NEXT: movq %rcx, d64(%rip) # encoding: [0x48,0x89,0x0d,A,A,A,A]
+; CHECK-NEXT: movq %rsi, d64(%rip) # encoding: [0x48,0x89,0x35,A,A,A,A]
; CHECK-NEXT: # fixup A - offset: 3, value: d64-4, kind: reloc_riprel_4byte
; CHECK-NEXT: retq # encoding: [0xc3]
;
; NF-LABEL: orflag64rr:
; NF: # %bb.0:
-; NF-NEXT: orq %rsi, %rdi, %rcx # encoding: [0x62,0xf4,0xf4,0x18,0x09,0xf7]
+; NF-NEXT: orq %rdi, %rsi # EVEX TO LEGACY Compression encoding: [0x48,0x09,0xfe]
; NF-NEXT: sete %al # encoding: [0x0f,0x94,0xc0]
-; NF-NEXT: movq %rcx, d64(%rip) # encoding: [0x48,0x89,0x0d,A,A,A,A]
+; NF-NEXT: movq %rsi, d64(%rip) # encoding: [0x48,0x89,0x35,A,A,A,A]
; NF-NEXT: # fixup A - offset: 3, value: d64-4, kind: reloc_riprel_4byte
; NF-NEXT: retq # encoding: [0xc3]
%v0 = or i64 %a, %b ; 0xff << 50
@@ -574,17 +574,17 @@ define i1 @orflag16rm(ptr %ptr, i16 %b) {
define i1 @orflag32rm(ptr %ptr, i32 %b) {
; CHECK-LABEL: orflag32rm:
; CHECK: # %bb.0:
-; CHECK-NEXT: orl (%rdi), %esi, %ecx # encoding: [0x62,0xf4,0x74,0x18,0x0b,0x37]
+; CHECK-NEXT: orl (%rdi), %esi # EVEX TO LEGACY Compression encoding: [0x0b,0x37]
; CHECK-NEXT: sete %al # encoding: [0x0f,0x94,0xc0]
-; CHECK-NEXT: movl %ecx, d64(%rip) # encoding: [0x89,0x0d,A,A,A,A]
+; CHECK-NEXT: movl %esi, d64(%rip) # encoding: [0x89,0x35,A,A,A,A]
; CHECK-NEXT: # fixup A - offset: 2, value: d64-4, kind: reloc_riprel_4byte
; CHECK-NEXT: retq # encoding: [0xc3]
;
; NF-LABEL: orflag32rm:
; NF: # %bb.0:
-; NF-NEXT: orl (%rdi), %esi, %ecx # encoding: [0x62,0xf4,0x74,0x18,0x0b,0x37]
+; NF-NEXT: orl (%rdi), %esi # EVEX TO LEGACY Compression encoding: [0x0b,0x37]
; NF-NEXT: sete %al # encoding: [0x0f,0x94,0xc0]
-; NF-NEXT: movl %ecx, d64(%rip) # encoding: [0x89,0x0d,A,A,A,A]
+; NF-NEXT: movl %esi, d64(%rip) # encoding: [0x89,0x35,A,A,A,A]
; NF-NEXT: # fixup A - offset: 2, value: d64-4, kind: reloc_riprel_4byte
; NF-NEXT: retq # encoding: [0xc3]
%a = load i32, ptr %ptr
@@ -597,17 +597,17 @@ define i1 @orflag32rm(ptr %ptr, i32 %b) {
define i1 @orflag64rm(ptr %ptr, i64 %b) {
; CHECK-LABEL: orflag64rm:
; CHECK: # %bb.0:
-; CHECK-NEXT: orq (%rdi), %rsi, %rcx # encoding: [0x62,0xf4,0xf4,0x18,0x0b,0x37]
+; CHECK-NEXT: orq (%rdi), %rsi # EVEX TO LEGACY Compression encoding: [0x48,0x0b,0x37]
; CHECK-NEXT: sete %al # encoding: [0x0f,0x94,0xc0]
-; CHECK-NEXT: movq %rcx, d64(%rip) # encoding: [0x48,0x89,0x0d,A,A,A,A]
+; CHECK-NEXT: movq %rsi, d64(%rip) # encoding: [0x48,0x89,0x35,A,A,A,A]
; CHECK-NEXT: # fixup A - offset: 3, value: d64-4, kind: reloc_riprel_4byte
; CHECK-NEXT: retq # encoding: [0xc3]
;
; NF-LABEL: orflag64rm:
; NF: # %bb.0:
-; NF-NEXT: orq (%rdi), %rsi, %rcx # encoding: [0x62,0xf4,0xf4,0x18,0x0b,0x37]
+; NF-NEXT: orq (%rdi), %rsi # EVEX TO LEGACY Compression encoding: [0x48,0x0b,0x37]
; NF-NEXT: sete %al # encoding: [0x0f,0x94,0xc0]
-; NF-NEXT: movq %rcx, d64(%rip) # encoding: [0x48,0x89,0x0d,A,A,A,A]
+; NF-NEXT: movq %rsi, d64(%rip) # encoding: [0x48,0x89,0x35,A,A,A,A]
; NF-NEXT: # fixup A - offset: 3, value: d64-4, kind: reloc_riprel_4byte
; NF-NEXT: retq # encoding: [0xc3]
%a = load i64, ptr %ptr
@@ -668,19 +668,19 @@ define i1 @orflag16ri(i16 %a) {
define i1 @orflag32ri(i32 %a) {
; CHECK-LABEL: orflag32ri:
; CHECK: # %bb.0:
-; CHECK-NEXT: orl $123456, %edi, %ecx # encoding: [0x62,0xf4,0x74,0x18,0x81,0xcf,0x40,0xe2,0x01,0x00]
+; CHECK-NEXT: orl $123456, %edi # EVEX TO LEGACY Compression encoding: [0x81,0xcf,0x40,0xe2,0x01,0x00]
; CHECK-NEXT: # imm = 0x1E240
; CHECK-NEXT: sete %al # encoding: [0x0f,0x94,0xc0]
-; CHECK-NEXT: movl %ecx, d64(%rip) # encoding: [0x89,0x0d,A,A,A,A]
+; CHECK-NEXT: movl %edi, d64(%rip) # encoding: [0x89,0x3d,A,A,A,A]
; CHECK-NEXT: # fixup A - offset: 2, value: d64-4, kind: reloc_riprel_4byte
; CHECK-NEXT: retq # encoding: [0xc3]
;
; NF-LABEL: orflag32ri:
; NF: # %bb.0:
-; NF-NEXT: orl $123456, %edi, %ecx # encoding: [0x62,0xf4,0x74,0x18,0x81,0xcf,0x40,0xe2,0x01,0x00]
+; NF-NEXT: orl $123456, %edi # EVEX TO LEGACY Compression encoding: [0x81,0xcf,0x40,0xe2,0x01,0x00]
; NF-NEXT: # imm = 0x1E240
; NF-NEXT: sete %al # encoding: [0x0f,0x94,0xc0]
-; NF-NEXT: movl %ecx, d64(%rip) # encoding: [0x89,0x0d,A,A,A,A]
+; NF-NEXT: movl %edi, d64(%rip) # encoding: [0x89,0x3d,A,A,A,A]
; NF-NEXT: # fixup A - offset: 2, value: d64-4, kind: reloc_riprel_4byte
; NF-NEXT: retq # encoding: [0xc3]
%v0 = or i32 %a, 123456 ; 0xff << 50
@@ -692,19 +692,19 @@ define i1 @orflag32ri(i32 %a) {
define i1 @orflag64ri(i64 %a) {
; CHECK-LABEL: orflag64ri:
; CHECK: # %bb.0:
-; CHECK-NEXT: orq $123456, %rdi, %rcx # encoding: [0x62,0xf4,0xf4,0x18,0x81,0xcf,0x40,0xe2,0x01,0x00]
+; CHECK-NEXT: orq $123456, %rdi # EVEX TO LEGACY Compression encoding: [0x48,0x81,0xcf,0x40,0xe2,0x01,0x00]
; CHECK-NEXT: # imm = 0x1E240
; CHECK-NEXT: sete %al # encoding: [0x0f,0x94,0xc0]
-; CHECK-NEXT: movq %rcx, d64(%rip) # encoding: [0x48,0x89,0x0d,A,A,A,A]
+; CHECK-NEXT: movq %rdi, d64(%rip) # encoding: [0x48,0x89,0x3d,A,A,A,A]
; CHECK-NEXT: # fixup A - offset: 3, value: d64-4, kind: reloc_riprel_4byte
; CHECK-NEXT: retq # encoding: [0xc3]
;
; NF-LABEL: orflag64ri:
; NF: # %bb.0:
-; NF-NEXT: orq $123456, %rdi, %rcx # encoding: [0x62,0xf4,0xf4,0x18,0x81,0xcf,0x40,0xe2,0x01,0x00]
+; NF-NEXT: orq $123456, %rdi # EVEX TO LEGACY Compression encoding: [0x48,0x81,0xcf,0x40,0xe2,0x01,0x00]
; NF-NEXT: # imm = 0x1E240
; NF-NEXT: sete %al # encoding: [0x0f,0x94,0xc0]
-; NF-NEXT: movq %rcx, d64(%rip) # encoding: [0x48,0x89,0x0d,A,A,A,A]
+; NF-NEXT: movq %rdi, d64(%rip) # encoding: [0x48,0x89,0x3d,A,A,A,A]
; NF-NEXT: # fixup A - offset: 3, value: d64-4, kind: reloc_riprel_4byte
; NF-NEXT: retq # encoding: [0xc3]
%v0 = or i64 %a, 123456 ; 0xff << 50
@@ -739,17 +739,17 @@ define i1 @orflag16ri8(i16 %a) {
define i1 @orflag32ri8(i32 %a) {
; CHECK-LABEL: orflag32ri8:
; CHECK: # %bb.0:
-; CHECK-NEXT: orl $123, %edi, %ecx # encoding: [0x62,0xf4,0x74,0x18,0x83,0xcf,0x7b]
+; CHECK-NEXT: orl $123, %edi # EVEX TO LEGACY Compression encoding: [0x83,0xcf,0x7b]
; CHECK-NEXT: sete %al # encoding: [0x0f,0x94,0xc0]
-; CHECK-NEXT: movl %ecx, d64(%rip) # encoding: [0x89,0x0d,A,A,A,A]
+; CHECK-NEXT: movl %edi, d64(%rip) # encoding: [0x89,0x3d,A,A,A,A]
; CHECK-NEXT: # fixup A - offset: 2, value: d64-4, kind: reloc_riprel_4byte
; CHECK-NEXT: retq # encoding: [0xc3]
;
; NF-LABEL: orflag32ri8:
; NF: # %bb.0:
-; NF-NEXT: orl $123, %edi, %ecx # encoding: [0x62,0xf4,0x74,0x18,0x83,0xcf,0x7b]
+; NF-NEXT: orl $123, %edi # EVEX TO LEGACY Compression encoding: [0x83,0xcf,0x7b]
; NF-NEXT: sete %al # encoding: [0x0f,0x94,0xc0]
-; NF-NEXT: movl %ecx, d64(%rip) # encoding: [0x89,0x0d,A,A,A,A]
+; NF-NEXT: movl %edi, d64(%rip) # encoding: [0x89,0x3d,A,A,A,A]
; NF-NEXT: # fixup A - offset: 2, value: d64-4, kind: reloc_riprel_4byte
; NF-NEXT: retq # encoding: [0xc3]
%v0 = or i32 %a, 123 ; 0xff << 50
@@ -761,17 +761,17 @@ define i1 @orflag32ri8(i32 %a) {
define i1 @orflag64ri8(i64 %a) {
; CHECK-LABEL: orflag64ri8:
; CHECK: # %bb.0:
-; CHECK-NEXT: orq $123, %rdi, %rcx # encoding: [0x62,0xf4,0xf4,0x18,0x83,0xcf,0x7b]
+; CHECK-NEXT: orq $123, %rdi # EVEX TO LEGACY Compression encoding: [0x48,0x83,0xcf,0x7b]
; CHECK-NEXT: sete %al # encoding: [0x0f,0x94,0xc0]
-; CHECK-NEXT: movq %rcx, d64(%rip) # encoding: [0x48,0x89,0x0d,A,A,A,A]
+; CHECK-NEXT: movq %rdi, d64(%rip) # encoding: [0x48,0x89,0x3d,A,A,A,A]
; CHECK-NEXT: # fixup A - offset: 3, value: d64-4, kind: reloc_riprel_4byte
; CHECK-NEXT: retq # encoding: [0xc3]
;
; NF-LABEL: orflag64ri8:
; NF: # %bb.0:
-; NF-NEXT: orq $123, %rdi, %rcx # encoding: [0x62,0xf4,0xf4,0x18,0x83,0xcf,0x7b]
+; NF-NEXT: orq $123, %rdi # EVEX TO LEGACY Compression encoding: [0x48,0x83,0xcf,0x7b]
; NF-NEXT: sete %al # encoding: [0x0f,0x94,0xc0]
-; NF-NEXT: movq %rcx, d64(%rip) # encoding: [0x48,0x89,0x0d,A,A,A,A]
+; NF-NEXT: movq %rdi, d64(%rip) # encoding: [0x48,0x89,0x3d,A,A,A,A]
; NF-NEXT: # fixup A - offset: 3, value: d64-4, kind: reloc_riprel_4byte
; NF-NEXT: retq # encoding: [0xc3]
%v0 = or i64 %a, 123 ; 0xff << 50
diff --git a/llvm/test/CodeGen/X86/apx/push2-pop2-vector-register.ll b/llvm/test/CodeGen/X86/apx/push2-pop2-vector-register.ll
index aa5c54d30e3b..f20c4c1ae278 100644
--- a/llvm/test/CodeGen/X86/apx/push2-pop2-vector-register.ll
+++ b/llvm/test/CodeGen/X86/apx/push2-pop2-vector-register.ll
@@ -43,8 +43,12 @@ define void @widget(float %arg) nounwind {
; FRAME-NEXT: xorl %r8d, %r8d
; FRAME-NEXT: callq *%rsi
; FRAME-NEXT: movss %xmm6, 0
+; FRAME-NEXT: pushq %rbp
+; FRAME-NEXT: pushq %rax
; FRAME-NEXT: #APP
; FRAME-NEXT: #NO_APP
+; FRAME-NEXT: popq %rax
+; FRAME-NEXT: popq %rbp
; FRAME-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm6 # 16-byte Reload
; FRAME-NEXT: addq $48, %rsp
; FRAME-NEXT: pop2 %r15, %rsi
diff --git a/llvm/test/CodeGen/X86/apx/push2-pop2.ll b/llvm/test/CodeGen/X86/apx/push2-pop2.ll
index 25139f1da827..6bd9f525090e 100644
--- a/llvm/test/CodeGen/X86/apx/push2-pop2.ll
+++ b/llvm/test/CodeGen/X86/apx/push2-pop2.ll
@@ -24,8 +24,12 @@ define void @csr1() nounwind {
; FRAME: # %bb.0: # %entry
; FRAME-NEXT: pushq %rbp
; FRAME-NEXT: movq %rsp, %rbp
+; FRAME-NEXT: pushq %rbp
+; FRAME-NEXT: pushq %rax
; FRAME-NEXT: #APP
; FRAME-NEXT: #NO_APP
+; FRAME-NEXT: popq %rax
+; FRAME-NEXT: popq %rbp
; FRAME-NEXT: popq %rbp
; FRAME-NEXT: retq
entry:
@@ -59,8 +63,12 @@ define void @csr2() nounwind {
; FRAME-NEXT: pushq %rbp
; FRAME-NEXT: movq %rsp, %rbp
; FRAME-NEXT: pushq %r15
+; FRAME-NEXT: pushq %rbp
+; FRAME-NEXT: pushq %rax
; FRAME-NEXT: #APP
; FRAME-NEXT: #NO_APP
+; FRAME-NEXT: popq %rax
+; FRAME-NEXT: popq %rbp
; FRAME-NEXT: popq %r15
; FRAME-NEXT: popq %rbp
; FRAME-NEXT: retq
@@ -95,8 +103,12 @@ define void @csr3() nounwind {
; FRAME-NEXT: pushq %rbp
; FRAME-NEXT: movq %rsp, %rbp
; FRAME-NEXT: push2 %r14, %r15
+; FRAME-NEXT: pushq %rbp
+; FRAME-NEXT: pushq %rax
; FRAME-NEXT: #APP
; FRAME-NEXT: #NO_APP
+; FRAME-NEXT: popq %rax
+; FRAME-NEXT: popq %rbp
; FRAME-NEXT: pop2 %r15, %r14
; FRAME-NEXT: popq %rbp
; FRAME-NEXT: retq
@@ -136,8 +148,12 @@ define void @csr4() nounwind {
; FRAME-NEXT: movq %rsp, %rbp
; FRAME-NEXT: push2 %r14, %r15
; FRAME-NEXT: pushq %r13
+; FRAME-NEXT: pushq %rbp
+; FRAME-NEXT: pushq %rax
; FRAME-NEXT: #APP
; FRAME-NEXT: #NO_APP
+; FRAME-NEXT: popq %rax
+; FRAME-NEXT: popq %rbp
; FRAME-NEXT: popq %r13
; FRAME-NEXT: pop2 %r15, %r14
; FRAME-NEXT: popq %rbp
@@ -178,8 +194,12 @@ define void @csr5() nounwind {
; FRAME-NEXT: movq %rsp, %rbp
; FRAME-NEXT: push2 %r14, %r15
; FRAME-NEXT: push2 %r12, %r13
+; FRAME-NEXT: pushq %rbp
+; FRAME-NEXT: pushq %rax
; FRAME-NEXT: #APP
; FRAME-NEXT: #NO_APP
+; FRAME-NEXT: popq %rax
+; FRAME-NEXT: popq %rbp
; FRAME-NEXT: pop2 %r13, %r12
; FRAME-NEXT: pop2 %r15, %r14
; FRAME-NEXT: popq %rbp
@@ -225,8 +245,12 @@ define void @csr6() nounwind {
; FRAME-NEXT: push2 %r14, %r15
; FRAME-NEXT: push2 %r12, %r13
; FRAME-NEXT: pushq %rbx
+; FRAME-NEXT: pushq %rbp
+; FRAME-NEXT: pushq %rax
; FRAME-NEXT: #APP
; FRAME-NEXT: #NO_APP
+; FRAME-NEXT: popq %rax
+; FRAME-NEXT: popq %rbp
; FRAME-NEXT: popq %rbx
; FRAME-NEXT: pop2 %r13, %r12
; FRAME-NEXT: pop2 %r15, %r14
diff --git a/llvm/test/CodeGen/X86/apx/pushp-popp.ll b/llvm/test/CodeGen/X86/apx/pushp-popp.ll
index ad4306fccce6..625e70b07198 100644
--- a/llvm/test/CodeGen/X86/apx/pushp-popp.ll
+++ b/llvm/test/CodeGen/X86/apx/pushp-popp.ll
@@ -18,8 +18,12 @@ define void @csr2() nounwind {
; FRAME-NEXT: pushp %rbp
; FRAME-NEXT: movq %rsp, %rbp
; FRAME-NEXT: pushp %r15
+; FRAME-NEXT: pushp %rbp
+; FRAME-NEXT: pushq %rax
; FRAME-NEXT: #APP
; FRAME-NEXT: #NO_APP
+; FRAME-NEXT: popq %rax
+; FRAME-NEXT: popp %rbp
; FRAME-NEXT: popp %r15
; FRAME-NEXT: popp %rbp
; FRAME-NEXT: retq
diff --git a/llvm/test/CodeGen/X86/apx/shift-eflags.ll b/llvm/test/CodeGen/X86/apx/shift-eflags.ll
index 5da5090307e6..2659f8031ef7 100644
--- a/llvm/test/CodeGen/X86/apx/shift-eflags.ll
+++ b/llvm/test/CodeGen/X86/apx/shift-eflags.ll
@@ -7,7 +7,7 @@
define i32 @ashr_const(i32 %a0, i32 %a1, i32 %a2, i32 %a3) {
; CHECK-LABEL: ashr_const:
; CHECK: # %bb.0:
-; CHECK-NEXT: sarl $14, %edi, %eax
+; CHECK-NEXT: sarl $14, %edi
; CHECK-NEXT: cmovel %edx, %ecx, %eax
; CHECK-NEXT: retq
%s = ashr i32 %a0, 14
@@ -85,7 +85,7 @@ define i32 @shl_const_self_select(i32 %a0, i32 %a1, i32 %a2, i32 %a3) {
define i32 @ashr_const1(i32 %a0, i32 %a1, i32 %a2, i32 %a3) {
; CHECK-LABEL: ashr_const1:
; CHECK: # %bb.0:
-; CHECK-NEXT: sarl %edi, %eax
+; CHECK-NEXT: sarl %edi
; CHECK-NEXT: cmovel %edx, %ecx, %eax
; CHECK-NEXT: retq
%s = ashr i32 %a0, 1
@@ -166,8 +166,8 @@ define i32 @ashr_var(i32 %a0, i32 %a1, i32 %a2, i32 %a3) {
; CHECK-NEXT: movl %ecx, %eax
; CHECK-NEXT: movl %esi, %ecx
; CHECK-NEXT: # kill: def $cl killed $cl killed $ecx
-; CHECK-NEXT: sarl %cl, %edi, %ecx
-; CHECK-NEXT: testl %ecx, %ecx
+; CHECK-NEXT: sarl %cl, %edi
+; CHECK-NEXT: testl %edi, %edi
; CHECK-NEXT: cmovel %edx, %eax
; CHECK-NEXT: retq
%s = ashr i32 %a0, %a1
@@ -183,8 +183,8 @@ define i32 @lshr_var(i32 %a0, i32 %a1, i32 %a2, i32 %a3) {
; CHECK-NEXT: movl %ecx, %eax
; CHECK-NEXT: movl %esi, %ecx
; CHECK-NEXT: # kill: def $cl killed $cl killed $ecx
-; CHECK-NEXT: shrl %cl, %edi, %ecx
-; CHECK-NEXT: testl %ecx, %ecx
+; CHECK-NEXT: shrl %cl, %edi
+; CHECK-NEXT: testl %edi, %edi
; CHECK-NEXT: cmovel %edx, %eax
; CHECK-NEXT: retq
%s = lshr i32 %a0, %a1
@@ -200,8 +200,8 @@ define i32 @shl_var(i32 %a0, i32 %a1, i32 %a2, i32 %a3) {
; CHECK-NEXT: movl %ecx, %eax
; CHECK-NEXT: movl %esi, %ecx
; CHECK-NEXT: # kill: def $cl killed $cl killed $ecx
-; CHECK-NEXT: shll %cl, %edi, %ecx
-; CHECK-NEXT: testl %ecx, %ecx
+; CHECK-NEXT: shll %cl, %edi
+; CHECK-NEXT: testl %edi, %edi
; CHECK-NEXT: cmovel %edx, %eax
; CHECK-NEXT: retq
%s = shl i32 %a0, %a1
@@ -264,8 +264,8 @@ define i32 @ashr_var_amt_never_zero(i32 %a0, i32 %a1, i32 %a2, i32 %a3) {
; CHECK: # %bb.0:
; CHECK-NEXT: movl %ecx, %eax
; CHECK-NEXT: orb $1, %sil, %cl
-; CHECK-NEXT: sarl %cl, %edi, %ecx
-; CHECK-NEXT: testl %ecx, %ecx
+; CHECK-NEXT: sarl %cl, %edi
+; CHECK-NEXT: testl %edi, %edi
; CHECK-NEXT: cmovel %edx, %eax
; CHECK-NEXT: retq
%a = or i32 %a1, 1
@@ -281,8 +281,8 @@ define i32 @lshr_var_amt_never_zero(i32 %a0, i32 %a1, i32 %a2, i32 %a3) {
; CHECK: # %bb.0:
; CHECK-NEXT: movl %ecx, %eax
; CHECK-NEXT: orb $1, %sil, %cl
-; CHECK-NEXT: shrl %cl, %edi, %ecx
-; CHECK-NEXT: testl %ecx, %ecx
+; CHECK-NEXT: shrl %cl, %edi
+; CHECK-NEXT: testl %edi, %edi
; CHECK-NEXT: cmovel %edx, %eax
; CHECK-NEXT: retq
%a = or i32 %a1, 1
@@ -298,8 +298,8 @@ define i32 @shl_var_amt_never_zero(i32 %a0, i32 %a1, i32 %a2, i32 %a3) {
; CHECK: # %bb.0:
; CHECK-NEXT: movl %ecx, %eax
; CHECK-NEXT: orb $1, %sil, %cl
-; CHECK-NEXT: shll %cl, %edi, %ecx
-; CHECK-NEXT: testl %ecx, %ecx
+; CHECK-NEXT: shll %cl, %edi
+; CHECK-NEXT: testl %edi, %edi
; CHECK-NEXT: cmovel %edx, %eax
; CHECK-NEXT: retq
%a = or i32 %a1, 1
diff --git a/llvm/test/CodeGen/X86/apx/sub.ll b/llvm/test/CodeGen/X86/apx/sub.ll
index 75d705557cdf..9519fab4ee51 100644
--- a/llvm/test/CodeGen/X86/apx/sub.ll
+++ b/llvm/test/CodeGen/X86/apx/sub.ll
@@ -451,16 +451,16 @@ define i16 @subflag16rr(i16 noundef %a, i16 noundef %b) {
; CHECK-LABEL: subflag16rr:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0]
-; CHECK-NEXT: subw %si, %di, %cx # encoding: [0x62,0xf4,0x75,0x18,0x29,0xf7]
-; CHECK-NEXT: cmovael %ecx, %eax # EVEX TO LEGACY Compression encoding: [0x0f,0x43,0xc1]
+; CHECK-NEXT: subw %si, %di # EVEX TO LEGACY Compression encoding: [0x66,0x29,0xf7]
+; CHECK-NEXT: cmovael %edi, %eax # EVEX TO LEGACY Compression encoding: [0x0f,0x43,0xc7]
; CHECK-NEXT: # kill: def $ax killed $ax killed $eax
; CHECK-NEXT: retq # encoding: [0xc3]
;
; NF-LABEL: subflag16rr:
; NF: # %bb.0: # %entry
; NF-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0]
-; NF-NEXT: subw %si, %di, %cx # encoding: [0x62,0xf4,0x75,0x18,0x29,0xf7]
-; NF-NEXT: cmovael %ecx, %eax # EVEX TO LEGACY Compression encoding: [0x0f,0x43,0xc1]
+; NF-NEXT: subw %si, %di # EVEX TO LEGACY Compression encoding: [0x66,0x29,0xf7]
+; NF-NEXT: cmovael %edi, %eax # EVEX TO LEGACY Compression encoding: [0x0f,0x43,0xc7]
; NF-NEXT: # kill: def $ax killed $ax killed $eax
; NF-NEXT: retq # encoding: [0xc3]
entry:
@@ -472,15 +472,15 @@ define i32 @subflag32rr(i32 noundef %a, i32 noundef %b) {
; CHECK-LABEL: subflag32rr:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0]
-; CHECK-NEXT: subl %esi, %edi, %ecx # encoding: [0x62,0xf4,0x74,0x18,0x29,0xf7]
-; CHECK-NEXT: cmovael %ecx, %eax # EVEX TO LEGACY Compression encoding: [0x0f,0x43,0xc1]
+; CHECK-NEXT: subl %esi, %edi # EVEX TO LEGACY Compression encoding: [0x29,0xf7]
+; CHECK-NEXT: cmovael %edi, %eax # EVEX TO LEGACY Compression encoding: [0x0f,0x43,0xc7]
; CHECK-NEXT: retq # encoding: [0xc3]
;
; NF-LABEL: subflag32rr:
; NF: # %bb.0: # %entry
; NF-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0]
-; NF-NEXT: subl %esi, %edi, %ecx # encoding: [0x62,0xf4,0x74,0x18,0x29,0xf7]
-; NF-NEXT: cmovael %ecx, %eax # EVEX TO LEGACY Compression encoding: [0x0f,0x43,0xc1]
+; NF-NEXT: subl %esi, %edi # EVEX TO LEGACY Compression encoding: [0x29,0xf7]
+; NF-NEXT: cmovael %edi, %eax # EVEX TO LEGACY Compression encoding: [0x0f,0x43,0xc7]
; NF-NEXT: retq # encoding: [0xc3]
entry:
%sub = call i32 @llvm.usub.sat.i32(i32 %a, i32 %b)
@@ -491,15 +491,15 @@ define i64 @subflag64rr(i64 noundef %a, i64 noundef %b) {
; CHECK-LABEL: subflag64rr:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0]
-; CHECK-NEXT: subq %rsi, %rdi, %rcx # encoding: [0x62,0xf4,0xf4,0x18,0x29,0xf7]
-; CHECK-NEXT: cmovaeq %rcx, %rax # EVEX TO LEGACY Compression encoding: [0x48,0x0f,0x43,0xc1]
+; CHECK-NEXT: subq %rsi, %rdi # EVEX TO LEGACY Compression encoding: [0x48,0x29,0xf7]
+; CHECK-NEXT: cmovaeq %rdi, %rax # EVEX TO LEGACY Compression encoding: [0x48,0x0f,0x43,0xc7]
; CHECK-NEXT: retq # encoding: [0xc3]
;
; NF-LABEL: subflag64rr:
; NF: # %bb.0: # %entry
; NF-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0]
-; NF-NEXT: subq %rsi, %rdi, %rcx # encoding: [0x62,0xf4,0xf4,0x18,0x29,0xf7]
-; NF-NEXT: cmovaeq %rcx, %rax # EVEX TO LEGACY Compression encoding: [0x48,0x0f,0x43,0xc1]
+; NF-NEXT: subq %rsi, %rdi # EVEX TO LEGACY Compression encoding: [0x48,0x29,0xf7]
+; NF-NEXT: cmovaeq %rdi, %rax # EVEX TO LEGACY Compression encoding: [0x48,0x0f,0x43,0xc7]
; NF-NEXT: retq # encoding: [0xc3]
entry:
%sub = call i64 @llvm.usub.sat.i64(i64 %a, i64 %b)
@@ -534,16 +534,16 @@ define i16 @subflag16rm(i16 noundef %a, ptr %b) {
; CHECK-LABEL: subflag16rm:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0]
-; CHECK-NEXT: subw (%rsi), %di, %cx # encoding: [0x62,0xf4,0x75,0x18,0x2b,0x3e]
-; CHECK-NEXT: cmovael %ecx, %eax # EVEX TO LEGACY Compression encoding: [0x0f,0x43,0xc1]
+; CHECK-NEXT: subw (%rsi), %di # EVEX TO LEGACY Compression encoding: [0x66,0x2b,0x3e]
+; CHECK-NEXT: cmovael %edi, %eax # EVEX TO LEGACY Compression encoding: [0x0f,0x43,0xc7]
; CHECK-NEXT: # kill: def $ax killed $ax killed $eax
; CHECK-NEXT: retq # encoding: [0xc3]
;
; NF-LABEL: subflag16rm:
; NF: # %bb.0: # %entry
; NF-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0]
-; NF-NEXT: subw (%rsi), %di, %cx # encoding: [0x62,0xf4,0x75,0x18,0x2b,0x3e]
-; NF-NEXT: cmovael %ecx, %eax # EVEX TO LEGACY Compression encoding: [0x0f,0x43,0xc1]
+; NF-NEXT: subw (%rsi), %di # EVEX TO LEGACY Compression encoding: [0x66,0x2b,0x3e]
+; NF-NEXT: cmovael %edi, %eax # EVEX TO LEGACY Compression encoding: [0x0f,0x43,0xc7]
; NF-NEXT: # kill: def $ax killed $ax killed $eax
; NF-NEXT: retq # encoding: [0xc3]
entry:
@@ -556,15 +556,15 @@ define i32 @subflag32rm(i32 noundef %a, ptr %b) {
; CHECK-LABEL: subflag32rm:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0]
-; CHECK-NEXT: subl (%rsi), %edi, %ecx # encoding: [0x62,0xf4,0x74,0x18,0x2b,0x3e]
-; CHECK-NEXT: cmovael %ecx, %eax # EVEX TO LEGACY Compression encoding: [0x0f,0x43,0xc1]
+; CHECK-NEXT: subl (%rsi), %edi # EVEX TO LEGACY Compression encoding: [0x2b,0x3e]
+; CHECK-NEXT: cmovael %edi, %eax # EVEX TO LEGACY Compression encoding: [0x0f,0x43,0xc7]
; CHECK-NEXT: retq # encoding: [0xc3]
;
; NF-LABEL: subflag32rm:
; NF: # %bb.0: # %entry
; NF-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0]
-; NF-NEXT: subl (%rsi), %edi, %ecx # encoding: [0x62,0xf4,0x74,0x18,0x2b,0x3e]
-; NF-NEXT: cmovael %ecx, %eax # EVEX TO LEGACY Compression encoding: [0x0f,0x43,0xc1]
+; NF-NEXT: subl (%rsi), %edi # EVEX TO LEGACY Compression encoding: [0x2b,0x3e]
+; NF-NEXT: cmovael %edi, %eax # EVEX TO LEGACY Compression encoding: [0x0f,0x43,0xc7]
; NF-NEXT: retq # encoding: [0xc3]
entry:
%t = load i32, ptr %b
@@ -576,15 +576,15 @@ define i64 @subflag64rm(i64 noundef %a, ptr %b) {
; CHECK-LABEL: subflag64rm:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0]
-; CHECK-NEXT: subq (%rsi), %rdi, %rcx # encoding: [0x62,0xf4,0xf4,0x18,0x2b,0x3e]
-; CHECK-NEXT: cmovaeq %rcx, %rax # EVEX TO LEGACY Compression encoding: [0x48,0x0f,0x43,0xc1]
+; CHECK-NEXT: subq (%rsi), %rdi # EVEX TO LEGACY Compression encoding: [0x48,0x2b,0x3e]
+; CHECK-NEXT: cmovaeq %rdi, %rax # EVEX TO LEGACY Compression encoding: [0x48,0x0f,0x43,0xc7]
; CHECK-NEXT: retq # encoding: [0xc3]
;
; NF-LABEL: subflag64rm:
; NF: # %bb.0: # %entry
; NF-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0]
-; NF-NEXT: subq (%rsi), %rdi, %rcx # encoding: [0x62,0xf4,0xf4,0x18,0x2b,0x3e]
-; NF-NEXT: cmovaeq %rcx, %rax # EVEX TO LEGACY Compression encoding: [0x48,0x0f,0x43,0xc1]
+; NF-NEXT: subq (%rsi), %rdi # EVEX TO LEGACY Compression encoding: [0x48,0x2b,0x3e]
+; NF-NEXT: cmovaeq %rdi, %rax # EVEX TO LEGACY Compression encoding: [0x48,0x0f,0x43,0xc7]
; NF-NEXT: retq # encoding: [0xc3]
entry:
%t = load i64, ptr %b
@@ -596,16 +596,16 @@ define i16 @subflag16ri8(i16 noundef %a) {
; CHECK-LABEL: subflag16ri8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0]
-; CHECK-NEXT: subw $123, %di, %cx # encoding: [0x62,0xf4,0x75,0x18,0x83,0xef,0x7b]
-; CHECK-NEXT: cmovael %ecx, %eax # EVEX TO LEGACY Compression encoding: [0x0f,0x43,0xc1]
+; CHECK-NEXT: subw $123, %di # EVEX TO LEGACY Compression encoding: [0x66,0x83,0xef,0x7b]
+; CHECK-NEXT: cmovael %edi, %eax # EVEX TO LEGACY Compression encoding: [0x0f,0x43,0xc7]
; CHECK-NEXT: # kill: def $ax killed $ax killed $eax
; CHECK-NEXT: retq # encoding: [0xc3]
;
; NF-LABEL: subflag16ri8:
; NF: # %bb.0: # %entry
; NF-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0]
-; NF-NEXT: subw $123, %di, %cx # encoding: [0x62,0xf4,0x75,0x18,0x83,0xef,0x7b]
-; NF-NEXT: cmovael %ecx, %eax # EVEX TO LEGACY Compression encoding: [0x0f,0x43,0xc1]
+; NF-NEXT: subw $123, %di # EVEX TO LEGACY Compression encoding: [0x66,0x83,0xef,0x7b]
+; NF-NEXT: cmovael %edi, %eax # EVEX TO LEGACY Compression encoding: [0x0f,0x43,0xc7]
; NF-NEXT: # kill: def $ax killed $ax killed $eax
; NF-NEXT: retq # encoding: [0xc3]
entry:
@@ -617,15 +617,15 @@ define i32 @subflag32ri8(i32 noundef %a) {
; CHECK-LABEL: subflag32ri8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0]
-; CHECK-NEXT: subl $123, %edi, %ecx # encoding: [0x62,0xf4,0x74,0x18,0x83,0xef,0x7b]
-; CHECK-NEXT: cmovael %ecx, %eax # EVEX TO LEGACY Compression encoding: [0x0f,0x43,0xc1]
+; CHECK-NEXT: subl $123, %edi # EVEX TO LEGACY Compression encoding: [0x83,0xef,0x7b]
+; CHECK-NEXT: cmovael %edi, %eax # EVEX TO LEGACY Compression encoding: [0x0f,0x43,0xc7]
; CHECK-NEXT: retq # encoding: [0xc3]
;
; NF-LABEL: subflag32ri8:
; NF: # %bb.0: # %entry
; NF-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0]
-; NF-NEXT: subl $123, %edi, %ecx # encoding: [0x62,0xf4,0x74,0x18,0x83,0xef,0x7b]
-; NF-NEXT: cmovael %ecx, %eax # EVEX TO LEGACY Compression encoding: [0x0f,0x43,0xc1]
+; NF-NEXT: subl $123, %edi # EVEX TO LEGACY Compression encoding: [0x83,0xef,0x7b]
+; NF-NEXT: cmovael %edi, %eax # EVEX TO LEGACY Compression encoding: [0x0f,0x43,0xc7]
; NF-NEXT: retq # encoding: [0xc3]
entry:
%sub = call i32 @llvm.usub.sat.i32(i32 %a, i32 123)
@@ -636,15 +636,15 @@ define i64 @subflag64ri8(i64 noundef %a) {
; CHECK-LABEL: subflag64ri8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0]
-; CHECK-NEXT: subq $123, %rdi, %rcx # encoding: [0x62,0xf4,0xf4,0x18,0x83,0xef,0x7b]
-; CHECK-NEXT: cmovaeq %rcx, %rax # EVEX TO LEGACY Compression encoding: [0x48,0x0f,0x43,0xc1]
+; CHECK-NEXT: subq $123, %rdi # EVEX TO LEGACY Compression encoding: [0x48,0x83,0xef,0x7b]
+; CHECK-NEXT: cmovaeq %rdi, %rax # EVEX TO LEGACY Compression encoding: [0x48,0x0f,0x43,0xc7]
; CHECK-NEXT: retq # encoding: [0xc3]
;
; NF-LABEL: subflag64ri8:
; NF: # %bb.0: # %entry
; NF-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0]
-; NF-NEXT: subq $123, %rdi, %rcx # encoding: [0x62,0xf4,0xf4,0x18,0x83,0xef,0x7b]
-; NF-NEXT: cmovaeq %rcx, %rax # EVEX TO LEGACY Compression encoding: [0x48,0x0f,0x43,0xc1]
+; NF-NEXT: subq $123, %rdi # EVEX TO LEGACY Compression encoding: [0x48,0x83,0xef,0x7b]
+; NF-NEXT: cmovaeq %rdi, %rax # EVEX TO LEGACY Compression encoding: [0x48,0x0f,0x43,0xc7]
; NF-NEXT: retq # encoding: [0xc3]
entry:
%sub = call i64 @llvm.usub.sat.i64(i64 %a, i64 123)
@@ -678,18 +678,18 @@ define i16 @subflag16ri(i16 noundef %a) {
; CHECK-LABEL: subflag16ri:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0]
-; CHECK-NEXT: subw $1234, %di, %cx # encoding: [0x62,0xf4,0x75,0x18,0x81,0xef,0xd2,0x04]
+; CHECK-NEXT: subw $1234, %di # EVEX TO LEGACY Compression encoding: [0x66,0x81,0xef,0xd2,0x04]
; CHECK-NEXT: # imm = 0x4D2
-; CHECK-NEXT: cmovael %ecx, %eax # EVEX TO LEGACY Compression encoding: [0x0f,0x43,0xc1]
+; CHECK-NEXT: cmovael %edi, %eax # EVEX TO LEGACY Compression encoding: [0x0f,0x43,0xc7]
; CHECK-NEXT: # kill: def $ax killed $ax killed $eax
; CHECK-NEXT: retq # encoding: [0xc3]
;
; NF-LABEL: subflag16ri:
; NF: # %bb.0: # %entry
; NF-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0]
-; NF-NEXT: subw $1234, %di, %cx # encoding: [0x62,0xf4,0x75,0x18,0x81,0xef,0xd2,0x04]
+; NF-NEXT: subw $1234, %di # EVEX TO LEGACY Compression encoding: [0x66,0x81,0xef,0xd2,0x04]
; NF-NEXT: # imm = 0x4D2
-; NF-NEXT: cmovael %ecx, %eax # EVEX TO LEGACY Compression encoding: [0x0f,0x43,0xc1]
+; NF-NEXT: cmovael %edi, %eax # EVEX TO LEGACY Compression encoding: [0x0f,0x43,0xc7]
; NF-NEXT: # kill: def $ax killed $ax killed $eax
; NF-NEXT: retq # encoding: [0xc3]
entry:
@@ -701,17 +701,17 @@ define i32 @subflag32ri(i32 noundef %a) {
; CHECK-LABEL: subflag32ri:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0]
-; CHECK-NEXT: subl $123456, %edi, %ecx # encoding: [0x62,0xf4,0x74,0x18,0x81,0xef,0x40,0xe2,0x01,0x00]
+; CHECK-NEXT: subl $123456, %edi # EVEX TO LEGACY Compression encoding: [0x81,0xef,0x40,0xe2,0x01,0x00]
; CHECK-NEXT: # imm = 0x1E240
-; CHECK-NEXT: cmovael %ecx, %eax # EVEX TO LEGACY Compression encoding: [0x0f,0x43,0xc1]
+; CHECK-NEXT: cmovael %edi, %eax # EVEX TO LEGACY Compression encoding: [0x0f,0x43,0xc7]
; CHECK-NEXT: retq # encoding: [0xc3]
;
; NF-LABEL: subflag32ri:
; NF: # %bb.0: # %entry
; NF-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0]
-; NF-NEXT: subl $123456, %edi, %ecx # encoding: [0x62,0xf4,0x74,0x18,0x81,0xef,0x40,0xe2,0x01,0x00]
+; NF-NEXT: subl $123456, %edi # EVEX TO LEGACY Compression encoding: [0x81,0xef,0x40,0xe2,0x01,0x00]
; NF-NEXT: # imm = 0x1E240
-; NF-NEXT: cmovael %ecx, %eax # EVEX TO LEGACY Compression encoding: [0x0f,0x43,0xc1]
+; NF-NEXT: cmovael %edi, %eax # EVEX TO LEGACY Compression encoding: [0x0f,0x43,0xc7]
; NF-NEXT: retq # encoding: [0xc3]
entry:
%sub = call i32 @llvm.usub.sat.i32(i32 %a, i32 123456)
@@ -722,17 +722,17 @@ define i64 @subflag64ri(i64 noundef %a) {
; CHECK-LABEL: subflag64ri:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0]
-; CHECK-NEXT: subq $123456, %rdi, %rcx # encoding: [0x62,0xf4,0xf4,0x18,0x81,0xef,0x40,0xe2,0x01,0x00]
+; CHECK-NEXT: subq $123456, %rdi # EVEX TO LEGACY Compression encoding: [0x48,0x81,0xef,0x40,0xe2,0x01,0x00]
; CHECK-NEXT: # imm = 0x1E240
-; CHECK-NEXT: cmovaeq %rcx, %rax # EVEX TO LEGACY Compression encoding: [0x48,0x0f,0x43,0xc1]
+; CHECK-NEXT: cmovaeq %rdi, %rax # EVEX TO LEGACY Compression encoding: [0x48,0x0f,0x43,0xc7]
; CHECK-NEXT: retq # encoding: [0xc3]
;
; NF-LABEL: subflag64ri:
; NF: # %bb.0: # %entry
; NF-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0]
-; NF-NEXT: subq $123456, %rdi, %rcx # encoding: [0x62,0xf4,0xf4,0x18,0x81,0xef,0x40,0xe2,0x01,0x00]
+; NF-NEXT: subq $123456, %rdi # EVEX TO LEGACY Compression encoding: [0x48,0x81,0xef,0x40,0xe2,0x01,0x00]
; NF-NEXT: # imm = 0x1E240
-; NF-NEXT: cmovaeq %rcx, %rax # EVEX TO LEGACY Compression encoding: [0x48,0x0f,0x43,0xc1]
+; NF-NEXT: cmovaeq %rdi, %rax # EVEX TO LEGACY Compression encoding: [0x48,0x0f,0x43,0xc7]
; NF-NEXT: retq # encoding: [0xc3]
entry:
%sub = call i64 @llvm.usub.sat.i64(i64 %a, i64 123456)
diff --git a/llvm/test/CodeGen/X86/apx/xor.ll b/llvm/test/CodeGen/X86/apx/xor.ll
index 3426f9cc92ce..d908849e2848 100644
--- a/llvm/test/CodeGen/X86/apx/xor.ll
+++ b/llvm/test/CodeGen/X86/apx/xor.ll
@@ -428,8 +428,8 @@ entry:
define i1 @xorflag8rr(i8 %a, i8 %b) {
; CHECK-LABEL: xorflag8rr:
; CHECK: # %bb.0:
-; CHECK-NEXT: xorl %edi, %esi, %eax # encoding: [0x62,0xf4,0x7c,0x18,0x31,0xfe]
-; CHECK-NEXT: xorb $-1, %al, %cl # encoding: [0x62,0xf4,0x74,0x18,0x80,0xf0,0xff]
+; CHECK-NEXT: xorl %edi, %esi # EVEX TO LEGACY Compression encoding: [0x31,0xfe]
+; CHECK-NEXT: xorb $-1, %sil, %cl # encoding: [0x62,0xf4,0x74,0x18,0x80,0xf6,0xff]
; CHECK-NEXT: sete %al # encoding: [0x0f,0x94,0xc0]
; CHECK-NEXT: movb %cl, d64(%rip) # encoding: [0x88,0x0d,A,A,A,A]
; CHECK-NEXT: # fixup A - offset: 2, value: d64-4, kind: reloc_riprel_4byte
@@ -437,8 +437,8 @@ define i1 @xorflag8rr(i8 %a, i8 %b) {
;
; NF-LABEL: xorflag8rr:
; NF: # %bb.0:
-; NF-NEXT: {nf} xorl %edi, %esi, %eax # EVEX TO EVEX Compression encoding: [0x62,0xf4,0x7c,0x1c,0x31,0xfe]
-; NF-NEXT: xorb $-1, %al, %cl # encoding: [0x62,0xf4,0x74,0x18,0x80,0xf0,0xff]
+; NF-NEXT: xorl %edi, %esi # EVEX TO LEGACY Compression encoding: [0x31,0xfe]
+; NF-NEXT: xorb $-1, %sil, %cl # encoding: [0x62,0xf4,0x74,0x18,0x80,0xf6,0xff]
; NF-NEXT: sete %al # encoding: [0x0f,0x94,0xc0]
; NF-NEXT: movb %cl, d64(%rip) # encoding: [0x88,0x0d,A,A,A,A]
; NF-NEXT: # fixup A - offset: 2, value: d64-4, kind: reloc_riprel_4byte
@@ -453,8 +453,8 @@ define i1 @xorflag8rr(i8 %a, i8 %b) {
define i1 @xorflag16rr(i16 %a, i16 %b) {
; CHECK-LABEL: xorflag16rr:
; CHECK: # %bb.0:
-; CHECK-NEXT: xorl %edi, %esi, %eax # encoding: [0x62,0xf4,0x7c,0x18,0x31,0xfe]
-; CHECK-NEXT: xorw $-1, %ax, %cx # encoding: [0x62,0xf4,0x75,0x18,0x83,0xf0,0xff]
+; CHECK-NEXT: xorl %edi, %esi # EVEX TO LEGACY Compression encoding: [0x31,0xfe]
+; CHECK-NEXT: xorw $-1, %si, %cx # encoding: [0x62,0xf4,0x75,0x18,0x83,0xf6,0xff]
; CHECK-NEXT: sete %al # encoding: [0x0f,0x94,0xc0]
; CHECK-NEXT: movw %cx, d64(%rip) # encoding: [0x66,0x89,0x0d,A,A,A,A]
; CHECK-NEXT: # fixup A - offset: 3, value: d64-4, kind: reloc_riprel_4byte
@@ -462,8 +462,8 @@ define i1 @xorflag16rr(i16 %a, i16 %b) {
;
; NF-LABEL: xorflag16rr:
; NF: # %bb.0:
-; NF-NEXT: {nf} xorl %edi, %esi, %eax # EVEX TO EVEX Compression encoding: [0x62,0xf4,0x7c,0x1c,0x31,0xfe]
-; NF-NEXT: xorw $-1, %ax, %cx # encoding: [0x62,0xf4,0x75,0x18,0x83,0xf0,0xff]
+; NF-NEXT: xorl %edi, %esi # EVEX TO LEGACY Compression encoding: [0x31,0xfe]
+; NF-NEXT: xorw $-1, %si, %cx # encoding: [0x62,0xf4,0x75,0x18,0x83,0xf6,0xff]
; NF-NEXT: sete %al # encoding: [0x0f,0x94,0xc0]
; NF-NEXT: movw %cx, d64(%rip) # encoding: [0x66,0x89,0x0d,A,A,A,A]
; NF-NEXT: # fixup A - offset: 3, value: d64-4, kind: reloc_riprel_4byte
@@ -478,17 +478,17 @@ define i1 @xorflag16rr(i16 %a, i16 %b) {
define i1 @xorflag32rr(i32 %a, i32 %b) {
; CHECK-LABEL: xorflag32rr:
; CHECK: # %bb.0:
-; CHECK-NEXT: xorl %esi, %edi, %ecx # encoding: [0x62,0xf4,0x74,0x18,0x31,0xf7]
+; CHECK-NEXT: xorl %edi, %esi # EVEX TO LEGACY Compression encoding: [0x31,0xfe]
; CHECK-NEXT: sete %al # encoding: [0x0f,0x94,0xc0]
-; CHECK-NEXT: movl %ecx, d64(%rip) # encoding: [0x89,0x0d,A,A,A,A]
+; CHECK-NEXT: movl %esi, d64(%rip) # encoding: [0x89,0x35,A,A,A,A]
; CHECK-NEXT: # fixup A - offset: 2, value: d64-4, kind: reloc_riprel_4byte
; CHECK-NEXT: retq # encoding: [0xc3]
;
; NF-LABEL: xorflag32rr:
; NF: # %bb.0:
-; NF-NEXT: xorl %esi, %edi, %ecx # encoding: [0x62,0xf4,0x74,0x18,0x31,0xf7]
+; NF-NEXT: xorl %edi, %esi # EVEX TO LEGACY Compression encoding: [0x31,0xfe]
; NF-NEXT: sete %al # encoding: [0x0f,0x94,0xc0]
-; NF-NEXT: movl %ecx, d64(%rip) # encoding: [0x89,0x0d,A,A,A,A]
+; NF-NEXT: movl %esi, d64(%rip) # encoding: [0x89,0x35,A,A,A,A]
; NF-NEXT: # fixup A - offset: 2, value: d64-4, kind: reloc_riprel_4byte
; NF-NEXT: retq # encoding: [0xc3]
%v0 = xor i32 %a, %b ; 0xff << 50
@@ -500,17 +500,17 @@ define i1 @xorflag32rr(i32 %a, i32 %b) {
define i1 @xorflag64rr(i64 %a, i64 %b) {
; CHECK-LABEL: xorflag64rr:
; CHECK: # %bb.0:
-; CHECK-NEXT: xorq %rsi, %rdi, %rcx # encoding: [0x62,0xf4,0xf4,0x18,0x31,0xf7]
+; CHECK-NEXT: xorq %rdi, %rsi # EVEX TO LEGACY Compression encoding: [0x48,0x31,0xfe]
; CHECK-NEXT: sete %al # encoding: [0x0f,0x94,0xc0]
-; CHECK-NEXT: movq %rcx, d64(%rip) # encoding: [0x48,0x89,0x0d,A,A,A,A]
+; CHECK-NEXT: movq %rsi, d64(%rip) # encoding: [0x48,0x89,0x35,A,A,A,A]
; CHECK-NEXT: # fixup A - offset: 3, value: d64-4, kind: reloc_riprel_4byte
; CHECK-NEXT: retq # encoding: [0xc3]
;
; NF-LABEL: xorflag64rr:
; NF: # %bb.0:
-; NF-NEXT: xorq %rsi, %rdi, %rcx # encoding: [0x62,0xf4,0xf4,0x18,0x31,0xf7]
+; NF-NEXT: xorq %rdi, %rsi # EVEX TO LEGACY Compression encoding: [0x48,0x31,0xfe]
; NF-NEXT: sete %al # encoding: [0x0f,0x94,0xc0]
-; NF-NEXT: movq %rcx, d64(%rip) # encoding: [0x48,0x89,0x0d,A,A,A,A]
+; NF-NEXT: movq %rsi, d64(%rip) # encoding: [0x48,0x89,0x35,A,A,A,A]
; NF-NEXT: # fixup A - offset: 3, value: d64-4, kind: reloc_riprel_4byte
; NF-NEXT: retq # encoding: [0xc3]
%v0 = xor i64 %a, %b ; 0xff << 50
@@ -574,17 +574,17 @@ define i1 @xorflag16rm(ptr %ptr, i16 %b) {
define i1 @xorflag32rm(ptr %ptr, i32 %b) {
; CHECK-LABEL: xorflag32rm:
; CHECK: # %bb.0:
-; CHECK-NEXT: xorl (%rdi), %esi, %ecx # encoding: [0x62,0xf4,0x74,0x18,0x33,0x37]
+; CHECK-NEXT: xorl (%rdi), %esi # EVEX TO LEGACY Compression encoding: [0x33,0x37]
; CHECK-NEXT: sete %al # encoding: [0x0f,0x94,0xc0]
-; CHECK-NEXT: movl %ecx, d64(%rip) # encoding: [0x89,0x0d,A,A,A,A]
+; CHECK-NEXT: movl %esi, d64(%rip) # encoding: [0x89,0x35,A,A,A,A]
; CHECK-NEXT: # fixup A - offset: 2, value: d64-4, kind: reloc_riprel_4byte
; CHECK-NEXT: retq # encoding: [0xc3]
;
; NF-LABEL: xorflag32rm:
; NF: # %bb.0:
-; NF-NEXT: xorl (%rdi), %esi, %ecx # encoding: [0x62,0xf4,0x74,0x18,0x33,0x37]
+; NF-NEXT: xorl (%rdi), %esi # EVEX TO LEGACY Compression encoding: [0x33,0x37]
; NF-NEXT: sete %al # encoding: [0x0f,0x94,0xc0]
-; NF-NEXT: movl %ecx, d64(%rip) # encoding: [0x89,0x0d,A,A,A,A]
+; NF-NEXT: movl %esi, d64(%rip) # encoding: [0x89,0x35,A,A,A,A]
; NF-NEXT: # fixup A - offset: 2, value: d64-4, kind: reloc_riprel_4byte
; NF-NEXT: retq # encoding: [0xc3]
%a = load i32, ptr %ptr
@@ -597,17 +597,17 @@ define i1 @xorflag32rm(ptr %ptr, i32 %b) {
define i1 @xorflag64rm(ptr %ptr, i64 %b) {
; CHECK-LABEL: xorflag64rm:
; CHECK: # %bb.0:
-; CHECK-NEXT: xorq (%rdi), %rsi, %rcx # encoding: [0x62,0xf4,0xf4,0x18,0x33,0x37]
+; CHECK-NEXT: xorq (%rdi), %rsi # EVEX TO LEGACY Compression encoding: [0x48,0x33,0x37]
; CHECK-NEXT: sete %al # encoding: [0x0f,0x94,0xc0]
-; CHECK-NEXT: movq %rcx, d64(%rip) # encoding: [0x48,0x89,0x0d,A,A,A,A]
+; CHECK-NEXT: movq %rsi, d64(%rip) # encoding: [0x48,0x89,0x35,A,A,A,A]
; CHECK-NEXT: # fixup A - offset: 3, value: d64-4, kind: reloc_riprel_4byte
; CHECK-NEXT: retq # encoding: [0xc3]
;
; NF-LABEL: xorflag64rm:
; NF: # %bb.0:
-; NF-NEXT: xorq (%rdi), %rsi, %rcx # encoding: [0x62,0xf4,0xf4,0x18,0x33,0x37]
+; NF-NEXT: xorq (%rdi), %rsi # EVEX TO LEGACY Compression encoding: [0x48,0x33,0x37]
; NF-NEXT: sete %al # encoding: [0x0f,0x94,0xc0]
-; NF-NEXT: movq %rcx, d64(%rip) # encoding: [0x48,0x89,0x0d,A,A,A,A]
+; NF-NEXT: movq %rsi, d64(%rip) # encoding: [0x48,0x89,0x35,A,A,A,A]
; NF-NEXT: # fixup A - offset: 3, value: d64-4, kind: reloc_riprel_4byte
; NF-NEXT: retq # encoding: [0xc3]
%a = load i64, ptr %ptr
@@ -668,19 +668,19 @@ define i1 @xorflag16ri(i16 %a) {
define i1 @xorflag32ri(i32 %a) {
; CHECK-LABEL: xorflag32ri:
; CHECK: # %bb.0:
-; CHECK-NEXT: xorl $123456, %edi, %ecx # encoding: [0x62,0xf4,0x74,0x18,0x81,0xf7,0x40,0xe2,0x01,0x00]
+; CHECK-NEXT: xorl $123456, %edi # EVEX TO LEGACY Compression encoding: [0x81,0xf7,0x40,0xe2,0x01,0x00]
; CHECK-NEXT: # imm = 0x1E240
; CHECK-NEXT: sete %al # encoding: [0x0f,0x94,0xc0]
-; CHECK-NEXT: movl %ecx, d64(%rip) # encoding: [0x89,0x0d,A,A,A,A]
+; CHECK-NEXT: movl %edi, d64(%rip) # encoding: [0x89,0x3d,A,A,A,A]
; CHECK-NEXT: # fixup A - offset: 2, value: d64-4, kind: reloc_riprel_4byte
; CHECK-NEXT: retq # encoding: [0xc3]
;
; NF-LABEL: xorflag32ri:
; NF: # %bb.0:
-; NF-NEXT: xorl $123456, %edi, %ecx # encoding: [0x62,0xf4,0x74,0x18,0x81,0xf7,0x40,0xe2,0x01,0x00]
+; NF-NEXT: xorl $123456, %edi # EVEX TO LEGACY Compression encoding: [0x81,0xf7,0x40,0xe2,0x01,0x00]
; NF-NEXT: # imm = 0x1E240
; NF-NEXT: sete %al # encoding: [0x0f,0x94,0xc0]
-; NF-NEXT: movl %ecx, d64(%rip) # encoding: [0x89,0x0d,A,A,A,A]
+; NF-NEXT: movl %edi, d64(%rip) # encoding: [0x89,0x3d,A,A,A,A]
; NF-NEXT: # fixup A - offset: 2, value: d64-4, kind: reloc_riprel_4byte
; NF-NEXT: retq # encoding: [0xc3]
%v0 = xor i32 %a, 123456 ; 0xff << 50
@@ -692,19 +692,19 @@ define i1 @xorflag32ri(i32 %a) {
define i1 @xorflag64ri(i64 %a) {
; CHECK-LABEL: xorflag64ri:
; CHECK: # %bb.0:
-; CHECK-NEXT: xorq $123456, %rdi, %rcx # encoding: [0x62,0xf4,0xf4,0x18,0x81,0xf7,0x40,0xe2,0x01,0x00]
+; CHECK-NEXT: xorq $123456, %rdi # EVEX TO LEGACY Compression encoding: [0x48,0x81,0xf7,0x40,0xe2,0x01,0x00]
; CHECK-NEXT: # imm = 0x1E240
; CHECK-NEXT: sete %al # encoding: [0x0f,0x94,0xc0]
-; CHECK-NEXT: movq %rcx, d64(%rip) # encoding: [0x48,0x89,0x0d,A,A,A,A]
+; CHECK-NEXT: movq %rdi, d64(%rip) # encoding: [0x48,0x89,0x3d,A,A,A,A]
; CHECK-NEXT: # fixup A - offset: 3, value: d64-4, kind: reloc_riprel_4byte
; CHECK-NEXT: retq # encoding: [0xc3]
;
; NF-LABEL: xorflag64ri:
; NF: # %bb.0:
-; NF-NEXT: xorq $123456, %rdi, %rcx # encoding: [0x62,0xf4,0xf4,0x18,0x81,0xf7,0x40,0xe2,0x01,0x00]
+; NF-NEXT: xorq $123456, %rdi # EVEX TO LEGACY Compression encoding: [0x48,0x81,0xf7,0x40,0xe2,0x01,0x00]
; NF-NEXT: # imm = 0x1E240
; NF-NEXT: sete %al # encoding: [0x0f,0x94,0xc0]
-; NF-NEXT: movq %rcx, d64(%rip) # encoding: [0x48,0x89,0x0d,A,A,A,A]
+; NF-NEXT: movq %rdi, d64(%rip) # encoding: [0x48,0x89,0x3d,A,A,A,A]
; NF-NEXT: # fixup A - offset: 3, value: d64-4, kind: reloc_riprel_4byte
; NF-NEXT: retq # encoding: [0xc3]
%v0 = xor i64 %a, 123456 ; 0xff << 50
@@ -739,17 +739,17 @@ define i1 @xorflag16ri8(i16 %a) {
define i1 @xorflag32ri8(i32 %a) {
; CHECK-LABEL: xorflag32ri8:
; CHECK: # %bb.0:
-; CHECK-NEXT: xorl $123, %edi, %ecx # encoding: [0x62,0xf4,0x74,0x18,0x83,0xf7,0x7b]
+; CHECK-NEXT: xorl $123, %edi # EVEX TO LEGACY Compression encoding: [0x83,0xf7,0x7b]
; CHECK-NEXT: sete %al # encoding: [0x0f,0x94,0xc0]
-; CHECK-NEXT: movl %ecx, d64(%rip) # encoding: [0x89,0x0d,A,A,A,A]
+; CHECK-NEXT: movl %edi, d64(%rip) # encoding: [0x89,0x3d,A,A,A,A]
; CHECK-NEXT: # fixup A - offset: 2, value: d64-4, kind: reloc_riprel_4byte
; CHECK-NEXT: retq # encoding: [0xc3]
;
; NF-LABEL: xorflag32ri8:
; NF: # %bb.0:
-; NF-NEXT: xorl $123, %edi, %ecx # encoding: [0x62,0xf4,0x74,0x18,0x83,0xf7,0x7b]
+; NF-NEXT: xorl $123, %edi # EVEX TO LEGACY Compression encoding: [0x83,0xf7,0x7b]
; NF-NEXT: sete %al # encoding: [0x0f,0x94,0xc0]
-; NF-NEXT: movl %ecx, d64(%rip) # encoding: [0x89,0x0d,A,A,A,A]
+; NF-NEXT: movl %edi, d64(%rip) # encoding: [0x89,0x3d,A,A,A,A]
; NF-NEXT: # fixup A - offset: 2, value: d64-4, kind: reloc_riprel_4byte
; NF-NEXT: retq # encoding: [0xc3]
%v0 = xor i32 %a, 123 ; 0xff << 50
@@ -761,17 +761,17 @@ define i1 @xorflag32ri8(i32 %a) {
define i1 @xorflag64ri8(i64 %a) {
; CHECK-LABEL: xorflag64ri8:
; CHECK: # %bb.0:
-; CHECK-NEXT: xorq $123, %rdi, %rcx # encoding: [0x62,0xf4,0xf4,0x18,0x83,0xf7,0x7b]
+; CHECK-NEXT: xorq $123, %rdi # EVEX TO LEGACY Compression encoding: [0x48,0x83,0xf7,0x7b]
; CHECK-NEXT: sete %al # encoding: [0x0f,0x94,0xc0]
-; CHECK-NEXT: movq %rcx, d64(%rip) # encoding: [0x48,0x89,0x0d,A,A,A,A]
+; CHECK-NEXT: movq %rdi, d64(%rip) # encoding: [0x48,0x89,0x3d,A,A,A,A]
; CHECK-NEXT: # fixup A - offset: 3, value: d64-4, kind: reloc_riprel_4byte
; CHECK-NEXT: retq # encoding: [0xc3]
;
; NF-LABEL: xorflag64ri8:
; NF: # %bb.0:
-; NF-NEXT: xorq $123, %rdi, %rcx # encoding: [0x62,0xf4,0xf4,0x18,0x83,0xf7,0x7b]
+; NF-NEXT: xorq $123, %rdi # EVEX TO LEGACY Compression encoding: [0x48,0x83,0xf7,0x7b]
; NF-NEXT: sete %al # encoding: [0x0f,0x94,0xc0]
-; NF-NEXT: movq %rcx, d64(%rip) # encoding: [0x48,0x89,0x0d,A,A,A,A]
+; NF-NEXT: movq %rdi, d64(%rip) # encoding: [0x48,0x89,0x3d,A,A,A,A]
; NF-NEXT: # fixup A - offset: 3, value: d64-4, kind: reloc_riprel_4byte
; NF-NEXT: retq # encoding: [0xc3]
%v0 = xor i64 %a, 123 ; 0xff << 50
diff --git a/llvm/test/CodeGen/X86/avx10_2_512satcvt-intrinsics.ll b/llvm/test/CodeGen/X86/avx10_2_512satcvt-intrinsics.ll
new file mode 100644
index 000000000000..19860530c030
--- /dev/null
+++ b/llvm/test/CodeGen/X86/avx10_2_512satcvt-intrinsics.ll
@@ -0,0 +1,1003 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -verify-machineinstrs -mtriple=x86_64 --show-mc-encoding -mattr=+avx10.2-512 | FileCheck %s --check-prefixes=CHECK,X64
+; RUN: llc < %s -verify-machineinstrs -mtriple=i686 --show-mc-encoding -mattr=+avx10.2-512 | FileCheck %s --check-prefixes=CHECK,X86
+
+define dso_local <8 x i64> @test_mm512_ipcvtnebf16_epi8(<32 x bfloat> noundef %__A) {
+; CHECK-LABEL: test_mm512_ipcvtnebf16_epi8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vcvtnebf162ibs %zmm0, %zmm0 # encoding: [0x62,0xf5,0x7f,0x48,0x69,0xc0]
+; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
+entry:
+ %0 = tail call <32 x i16> @llvm.x86.avx10.vcvtnebf162ibs512(<32 x bfloat> %__A)
+ %1 = bitcast <32 x i16> %0 to <8 x i64>
+ ret <8 x i64> %1
+}
+
+define dso_local <8 x i64> @test_mm512_mask_ipcvtnebf16_epi8(<8 x i64> noundef %__S, i32 noundef %__A, <32 x bfloat> noundef %__B) {
+; X64-LABEL: test_mm512_mask_ipcvtnebf16_epi8:
+; X64: # %bb.0: # %entry
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vcvtnebf162ibs %zmm1, %zmm0 {%k1} # encoding: [0x62,0xf5,0x7f,0x49,0x69,0xc1]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_mm512_mask_ipcvtnebf16_epi8:
+; X86: # %bb.0: # %entry
+; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vcvtnebf162ibs %zmm1, %zmm0 {%k1} # encoding: [0x62,0xf5,0x7f,0x49,0x69,0xc1]
+; X86-NEXT: retl # encoding: [0xc3]
+entry:
+ %0 = bitcast <8 x i64> %__S to <32 x i16>
+ %1 = tail call <32 x i16> @llvm.x86.avx10.vcvtnebf162ibs512(<32 x bfloat> %__B)
+ %2 = bitcast i32 %__A to <32 x i1>
+ %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> %0
+ %4 = bitcast <32 x i16> %3 to <8 x i64>
+ ret <8 x i64> %4
+}
+
+declare <32 x i16> @llvm.x86.avx10.vcvtnebf162ibs512(<32 x bfloat>)
+
+define dso_local <8 x i64> @test_mm512_maskz_ipcvtnebf16_epi8(i32 noundef %__A, <32 x bfloat> noundef %__B) {
+; X64-LABEL: test_mm512_maskz_ipcvtnebf16_epi8:
+; X64: # %bb.0: # %entry
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vcvtnebf162ibs %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf5,0x7f,0xc9,0x69,0xc0]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_mm512_maskz_ipcvtnebf16_epi8:
+; X86: # %bb.0: # %entry
+; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vcvtnebf162ibs %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf5,0x7f,0xc9,0x69,0xc0]
+; X86-NEXT: retl # encoding: [0xc3]
+entry:
+ %0 = tail call <32 x i16> @llvm.x86.avx10.vcvtnebf162ibs512(<32 x bfloat> %__B)
+ %1 = bitcast i32 %__A to <32 x i1>
+ %2 = select <32 x i1> %1, <32 x i16> %0, <32 x i16> zeroinitializer
+ %3 = bitcast <32 x i16> %2 to <8 x i64>
+ ret <8 x i64> %3
+}
+
+define dso_local <8 x i64> @test_mm512_ipcvtnebf16_epu8(<32 x bfloat> noundef %__A) {
+; CHECK-LABEL: test_mm512_ipcvtnebf16_epu8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vcvtnebf162iubs %zmm0, %zmm0 # encoding: [0x62,0xf5,0x7f,0x48,0x6b,0xc0]
+; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
+entry:
+ %0 = tail call <32 x i16> @llvm.x86.avx10.vcvtnebf162iubs512(<32 x bfloat> %__A)
+ %1 = bitcast <32 x i16> %0 to <8 x i64>
+ ret <8 x i64> %1
+}
+
+define dso_local <8 x i64> @test_mm512_mask_ipcvtnebf16_epu8(<8 x i64> noundef %__S, i32 noundef %__A, <32 x bfloat> noundef %__B) {
+; X64-LABEL: test_mm512_mask_ipcvtnebf16_epu8:
+; X64: # %bb.0: # %entry
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vcvtnebf162iubs %zmm1, %zmm0 {%k1} # encoding: [0x62,0xf5,0x7f,0x49,0x6b,0xc1]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_mm512_mask_ipcvtnebf16_epu8:
+; X86: # %bb.0: # %entry
+; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vcvtnebf162iubs %zmm1, %zmm0 {%k1} # encoding: [0x62,0xf5,0x7f,0x49,0x6b,0xc1]
+; X86-NEXT: retl # encoding: [0xc3]
+entry:
+ %0 = bitcast <8 x i64> %__S to <32 x i16>
+ %1 = tail call <32 x i16> @llvm.x86.avx10.vcvtnebf162iubs512(<32 x bfloat> %__B)
+ %2 = bitcast i32 %__A to <32 x i1>
+ %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> %0
+ %4 = bitcast <32 x i16> %3 to <8 x i64>
+ ret <8 x i64> %4
+}
+
+declare <32 x i16> @llvm.x86.avx10.vcvtnebf162iubs512(<32 x bfloat>)
+
+define dso_local <8 x i64> @test_mm512_maskz_ipcvtnebf16_epu8(i32 noundef %__A, <32 x bfloat> noundef %__B) {
+; X64-LABEL: test_mm512_maskz_ipcvtnebf16_epu8:
+; X64: # %bb.0: # %entry
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vcvtnebf162iubs %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf5,0x7f,0xc9,0x6b,0xc0]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_mm512_maskz_ipcvtnebf16_epu8:
+; X86: # %bb.0: # %entry
+; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vcvtnebf162iubs %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf5,0x7f,0xc9,0x6b,0xc0]
+; X86-NEXT: retl # encoding: [0xc3]
+entry:
+ %0 = tail call <32 x i16> @llvm.x86.avx10.vcvtnebf162iubs512(<32 x bfloat> %__B)
+ %1 = bitcast i32 %__A to <32 x i1>
+ %2 = select <32 x i1> %1, <32 x i16> %0, <32 x i16> zeroinitializer
+ %3 = bitcast <32 x i16> %2 to <8 x i64>
+ ret <8 x i64> %3
+}
+
+define dso_local <8 x i64> @test_mm512_ipcvtph_epi8(<32 x half> noundef %__A) {
+; CHECK-LABEL: test_mm512_ipcvtph_epi8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vcvtph2ibs %zmm0, %zmm0 # encoding: [0x62,0xf5,0x7c,0x48,0x69,0xc0]
+; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
+entry:
+ %0 = tail call <32 x i16> @llvm.x86.avx10.mask.vcvtph2ibs512(<32 x half> %__A, <32 x i16> zeroinitializer, i32 -1, i32 4)
+ %1 = bitcast <32 x i16> %0 to <8 x i64>
+ ret <8 x i64> %1
+}
+
+define dso_local <8 x i64> @test_mm512_mask_ipcvtph_epi8(<8 x i64> noundef %__S, i32 noundef %__A, <32 x half> noundef %__B) {
+; X64-LABEL: test_mm512_mask_ipcvtph_epi8:
+; X64: # %bb.0: # %entry
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vcvtph2ibs %zmm1, %zmm0 {%k1} # encoding: [0x62,0xf5,0x7c,0x49,0x69,0xc1]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_mm512_mask_ipcvtph_epi8:
+; X86: # %bb.0: # %entry
+; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vcvtph2ibs %zmm1, %zmm0 {%k1} # encoding: [0x62,0xf5,0x7c,0x49,0x69,0xc1]
+; X86-NEXT: retl # encoding: [0xc3]
+entry:
+ %0 = bitcast <8 x i64> %__S to <32 x i16>
+ %1 = tail call <32 x i16> @llvm.x86.avx10.mask.vcvtph2ibs512(<32 x half> %__B, <32 x i16> %0, i32 %__A, i32 4)
+ %2 = bitcast <32 x i16> %1 to <8 x i64>
+ ret <8 x i64> %2
+}
+
+declare <32 x i16> @llvm.x86.avx10.mask.vcvtph2ibs512(<32 x half>, <32 x i16>, i32, i32)
+
+define dso_local <8 x i64> @test_mm512_maskz_ipcvtph_epi8(i32 noundef %__A, <32 x half> noundef %__B) {
+; X64-LABEL: test_mm512_maskz_ipcvtph_epi8:
+; X64: # %bb.0: # %entry
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vcvtph2ibs %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf5,0x7c,0xc9,0x69,0xc0]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_mm512_maskz_ipcvtph_epi8:
+; X86: # %bb.0: # %entry
+; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vcvtph2ibs %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf5,0x7c,0xc9,0x69,0xc0]
+; X86-NEXT: retl # encoding: [0xc3]
+entry:
+ %0 = tail call <32 x i16> @llvm.x86.avx10.mask.vcvtph2ibs512(<32 x half> %__B, <32 x i16> zeroinitializer, i32 %__A, i32 4)
+ %1 = bitcast <32 x i16> %0 to <8 x i64>
+ ret <8 x i64> %1
+}
+
+define dso_local <8 x i64> @test_mm512_ipcvt_roundph_epi8(<32 x half> noundef %__A) {
+; CHECK-LABEL: test_mm512_ipcvt_roundph_epi8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vcvtph2ibs {rz-sae}, %zmm0, %zmm0 # encoding: [0x62,0xf5,0x7c,0x78,0x69,0xc0]
+; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
+entry:
+ %0 = tail call <32 x i16> @llvm.x86.avx10.mask.vcvtph2ibs512(<32 x half> %__A, <32 x i16> zeroinitializer, i32 -1, i32 11)
+ %1 = bitcast <32 x i16> %0 to <8 x i64>
+ ret <8 x i64> %1
+}
+
+define dso_local <8 x i64> @test_mm512_mask_ipcvt_roundph_epi8(<8 x i64> noundef %__S, i32 noundef %__A, <32 x half> noundef %__B) {
+; X64-LABEL: test_mm512_mask_ipcvt_roundph_epi8:
+; X64: # %bb.0: # %entry
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vcvtph2ibs {rz-sae}, %zmm1, %zmm0 {%k1} # encoding: [0x62,0xf5,0x7c,0x79,0x69,0xc1]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_mm512_mask_ipcvt_roundph_epi8:
+; X86: # %bb.0: # %entry
+; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vcvtph2ibs {rz-sae}, %zmm1, %zmm0 {%k1} # encoding: [0x62,0xf5,0x7c,0x79,0x69,0xc1]
+; X86-NEXT: retl # encoding: [0xc3]
+entry:
+ %0 = bitcast <8 x i64> %__S to <32 x i16>
+ %1 = tail call <32 x i16> @llvm.x86.avx10.mask.vcvtph2ibs512(<32 x half> %__B, <32 x i16> %0, i32 %__A, i32 11)
+ %2 = bitcast <32 x i16> %1 to <8 x i64>
+ ret <8 x i64> %2
+}
+
+define dso_local <8 x i64> @test_mm512_maskz_ipcvt_roundph_epi8(i32 noundef %__A, <32 x half> noundef %__B) {
+; X64-LABEL: test_mm512_maskz_ipcvt_roundph_epi8:
+; X64: # %bb.0: # %entry
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vcvtph2ibs {rz-sae}, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf5,0x7c,0xf9,0x69,0xc0]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_mm512_maskz_ipcvt_roundph_epi8:
+; X86: # %bb.0: # %entry
+; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vcvtph2ibs {rz-sae}, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf5,0x7c,0xf9,0x69,0xc0]
+; X86-NEXT: retl # encoding: [0xc3]
+entry:
+ %0 = tail call <32 x i16> @llvm.x86.avx10.mask.vcvtph2ibs512(<32 x half> %__B, <32 x i16> zeroinitializer, i32 %__A, i32 11)
+ %1 = bitcast <32 x i16> %0 to <8 x i64>
+ ret <8 x i64> %1
+}
+
+define dso_local <8 x i64> @test_mm512_ipcvtph_epu8(<32 x half> noundef %__A) {
+; CHECK-LABEL: test_mm512_ipcvtph_epu8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vcvtph2iubs %zmm0, %zmm0 # encoding: [0x62,0xf5,0x7c,0x48,0x6b,0xc0]
+; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
+entry:
+ %0 = tail call <32 x i16> @llvm.x86.avx10.mask.vcvtph2iubs512(<32 x half> %__A, <32 x i16> zeroinitializer, i32 -1, i32 4)
+ %1 = bitcast <32 x i16> %0 to <8 x i64>
+ ret <8 x i64> %1
+}
+
+define dso_local <8 x i64> @test_mm512_mask_ipcvtph_epu8(<8 x i64> noundef %__S, i32 noundef %__A, <32 x half> noundef %__B) {
+; X64-LABEL: test_mm512_mask_ipcvtph_epu8:
+; X64: # %bb.0: # %entry
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vcvtph2iubs %zmm1, %zmm0 {%k1} # encoding: [0x62,0xf5,0x7c,0x49,0x6b,0xc1]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_mm512_mask_ipcvtph_epu8:
+; X86: # %bb.0: # %entry
+; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vcvtph2iubs %zmm1, %zmm0 {%k1} # encoding: [0x62,0xf5,0x7c,0x49,0x6b,0xc1]
+; X86-NEXT: retl # encoding: [0xc3]
+entry:
+ %0 = bitcast <8 x i64> %__S to <32 x i16>
+ %1 = tail call <32 x i16> @llvm.x86.avx10.mask.vcvtph2iubs512( <32 x half> %__B, <32 x i16> %0, i32 %__A, i32 4)
+ %2 = bitcast <32 x i16> %1 to <8 x i64>
+ ret <8 x i64> %2
+}
+
+define dso_local <8 x i64> @test_mm512_maskz_ipcvtph_epu8(i32 noundef %__A, <32 x half> noundef %__B) {
+; X64-LABEL: test_mm512_maskz_ipcvtph_epu8:
+; X64: # %bb.0: # %entry
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vcvtph2iubs %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf5,0x7c,0xc9,0x6b,0xc0]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_mm512_maskz_ipcvtph_epu8:
+; X86: # %bb.0: # %entry
+; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vcvtph2iubs %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf5,0x7c,0xc9,0x6b,0xc0]
+; X86-NEXT: retl # encoding: [0xc3]
+entry:
+ %0 = tail call <32 x i16> @llvm.x86.avx10.mask.vcvtph2iubs512(<32 x half> %__B, <32 x i16> zeroinitializer, i32 %__A, i32 4)
+ %1 = bitcast <32 x i16> %0 to <8 x i64>
+ ret <8 x i64> %1
+}
+
+define dso_local <8 x i64> @test_mm512_ipcvt_roundph_epu8(<32 x half> noundef %__A) {
+; CHECK-LABEL: test_mm512_ipcvt_roundph_epu8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vcvtph2iubs {rz-sae}, %zmm0, %zmm0 # encoding: [0x62,0xf5,0x7c,0x78,0x6b,0xc0]
+; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
+entry:
+ %0 = tail call <32 x i16> @llvm.x86.avx10.mask.vcvtph2iubs512(<32 x half> %__A, <32 x i16> zeroinitializer, i32 -1, i32 11)
+ %1 = bitcast <32 x i16> %0 to <8 x i64>
+ ret <8 x i64> %1
+}
+
+define dso_local <8 x i64> @test_mm512_mask_ipcvt_roundph_epu8(<8 x i64> noundef %__S, i32 noundef %__A, <32 x half> noundef %__B) {
+; X64-LABEL: test_mm512_mask_ipcvt_roundph_epu8:
+; X64: # %bb.0: # %entry
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vcvtph2iubs {rz-sae}, %zmm1, %zmm0 {%k1} # encoding: [0x62,0xf5,0x7c,0x79,0x6b,0xc1]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_mm512_mask_ipcvt_roundph_epu8:
+; X86: # %bb.0: # %entry
+; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vcvtph2iubs {rz-sae}, %zmm1, %zmm0 {%k1} # encoding: [0x62,0xf5,0x7c,0x79,0x6b,0xc1]
+; X86-NEXT: retl # encoding: [0xc3]
+entry:
+ %0 = bitcast <8 x i64> %__S to <32 x i16>
+ %1 = tail call <32 x i16> @llvm.x86.avx10.mask.vcvtph2iubs512(<32 x half> %__B, <32 x i16> %0, i32 %__A, i32 11)
+ %2 = bitcast <32 x i16> %1 to <8 x i64>
+ ret <8 x i64> %2
+}
+
+declare <32 x i16> @llvm.x86.avx10.mask.vcvtph2iubs512(<32 x half>, <32 x i16>, i32, i32)
+
+define dso_local <8 x i64> @test_mm512_maskz_ipcvt_roundph_epu8(i32 noundef %__A, <32 x half> noundef %__B) {
+; X64-LABEL: test_mm512_maskz_ipcvt_roundph_epu8:
+; X64: # %bb.0: # %entry
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vcvtph2iubs {rz-sae}, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf5,0x7c,0xf9,0x6b,0xc0]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_mm512_maskz_ipcvt_roundph_epu8:
+; X86: # %bb.0: # %entry
+; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vcvtph2iubs {rz-sae}, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf5,0x7c,0xf9,0x6b,0xc0]
+; X86-NEXT: retl # encoding: [0xc3]
+entry:
+ %0 = tail call <32 x i16> @llvm.x86.avx10.mask.vcvtph2iubs512(<32 x half> %__B, <32 x i16> zeroinitializer, i32 %__A, i32 11)
+ %1 = bitcast <32 x i16> %0 to <8 x i64>
+ ret <8 x i64> %1
+}
+
+define dso_local <8 x i64> @test_mm512_ipcvtps_epi8(<16 x float> noundef %__A) {
+; CHECK-LABEL: test_mm512_ipcvtps_epi8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vcvtps2ibs %zmm0, %zmm0 # encoding: [0x62,0xf5,0x7d,0x48,0x69,0xc0]
+; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
+entry:
+ %0 = tail call <16 x i32> @llvm.x86.avx10.mask.vcvtps2ibs512(<16 x float> %__A, <16 x i32> zeroinitializer, i16 -1, i32 4)
+ %1 = bitcast <16 x i32> %0 to <8 x i64>
+ ret <8 x i64> %1
+}
+
+define dso_local <8 x i64> @test_mm512_mask_ipcvtps_epi8(<8 x i64> noundef %__S, i16 noundef zeroext %__A, <16 x float> noundef %__B) {
+; X64-LABEL: test_mm512_mask_ipcvtps_epi8:
+; X64: # %bb.0: # %entry
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vcvtps2ibs %zmm1, %zmm0 {%k1} # encoding: [0x62,0xf5,0x7d,0x49,0x69,0xc1]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_mm512_mask_ipcvtps_epi8:
+; X86: # %bb.0: # %entry
+; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vcvtps2ibs %zmm1, %zmm0 {%k1} # encoding: [0x62,0xf5,0x7d,0x49,0x69,0xc1]
+; X86-NEXT: retl # encoding: [0xc3]
+entry:
+ %0 = bitcast <8 x i64> %__S to <16 x i32>
+ %1 = tail call <16 x i32> @llvm.x86.avx10.mask.vcvtps2ibs512(<16 x float> %__B, <16 x i32> %0, i16 %__A, i32 4)
+ %2 = bitcast <16 x i32> %1 to <8 x i64>
+ ret <8 x i64> %2
+}
+
+declare <16 x i32> @llvm.x86.avx10.mask.vcvtps2ibs512(<16 x float>, <16 x i32>, i16, i32)
+
+define dso_local <8 x i64> @test_mm512_maskz_ipcvtps_epi8(i16 noundef zeroext %__A, <16 x float> noundef %__B) {
+; X64-LABEL: test_mm512_maskz_ipcvtps_epi8:
+; X64: # %bb.0: # %entry
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vcvtps2ibs %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf5,0x7d,0xc9,0x69,0xc0]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_mm512_maskz_ipcvtps_epi8:
+; X86: # %bb.0: # %entry
+; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vcvtps2ibs %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf5,0x7d,0xc9,0x69,0xc0]
+; X86-NEXT: retl # encoding: [0xc3]
+entry:
+ %0 = tail call <16 x i32> @llvm.x86.avx10.mask.vcvtps2ibs512(<16 x float> %__B, <16 x i32> zeroinitializer, i16 %__A, i32 4)
+ %1 = bitcast <16 x i32> %0 to <8 x i64>
+ ret <8 x i64> %1
+}
+
+define dso_local <8 x i64> @test_mm512_ipcvt_roundps_epi8(<16 x float> noundef %__A) {
+; CHECK-LABEL: test_mm512_ipcvt_roundps_epi8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vcvtps2ibs {rz-sae}, %zmm0, %zmm0 # encoding: [0x62,0xf5,0x7d,0x78,0x69,0xc0]
+; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
+entry:
+ %0 = tail call <16 x i32> @llvm.x86.avx10.mask.vcvtps2ibs512(<16 x float> %__A, <16 x i32> zeroinitializer, i16 -1, i32 11)
+ %1 = bitcast <16 x i32> %0 to <8 x i64>
+ ret <8 x i64> %1
+}
+
+define dso_local <8 x i64> @test_mm512_mask_ipcvt_roundps_epi8(<8 x i64> noundef %__S, i16 noundef zeroext %__A, <16 x float> noundef %__B) {
+; X64-LABEL: test_mm512_mask_ipcvt_roundps_epi8:
+; X64: # %bb.0: # %entry
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vcvtps2ibs {rz-sae}, %zmm1, %zmm0 {%k1} # encoding: [0x62,0xf5,0x7d,0x79,0x69,0xc1]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_mm512_mask_ipcvt_roundps_epi8:
+; X86: # %bb.0: # %entry
+; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vcvtps2ibs {rz-sae}, %zmm1, %zmm0 {%k1} # encoding: [0x62,0xf5,0x7d,0x79,0x69,0xc1]
+; X86-NEXT: retl # encoding: [0xc3]
+entry:
+ %0 = bitcast <8 x i64> %__S to <16 x i32>
+ %1 = tail call <16 x i32> @llvm.x86.avx10.mask.vcvtps2ibs512(<16 x float> %__B, <16 x i32> %0, i16 %__A, i32 11)
+ %2 = bitcast <16 x i32> %1 to <8 x i64>
+ ret <8 x i64> %2
+}
+
+define dso_local <8 x i64> @test_mm512_maskz_ipcvt_roundps_epi8(i16 noundef zeroext %__A, <16 x float> noundef %__B) {
+; X64-LABEL: test_mm512_maskz_ipcvt_roundps_epi8:
+; X64: # %bb.0: # %entry
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vcvtps2ibs {rz-sae}, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf5,0x7d,0xf9,0x69,0xc0]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_mm512_maskz_ipcvt_roundps_epi8:
+; X86: # %bb.0: # %entry
+; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vcvtps2ibs {rz-sae}, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf5,0x7d,0xf9,0x69,0xc0]
+; X86-NEXT: retl # encoding: [0xc3]
+entry:
+ %0 = tail call <16 x i32> @llvm.x86.avx10.mask.vcvtps2ibs512(<16 x float> %__B, <16 x i32> zeroinitializer, i16 %__A, i32 11)
+ %1 = bitcast <16 x i32> %0 to <8 x i64>
+ ret <8 x i64> %1
+}
+
+define dso_local <8 x i64> @test_mm512_ipcvtps_epu8(<16 x float> noundef %__A) {
+; CHECK-LABEL: test_mm512_ipcvtps_epu8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vcvtps2iubs %zmm0, %zmm0 # encoding: [0x62,0xf5,0x7d,0x48,0x6b,0xc0]
+; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
+entry:
+ %0 = tail call <16 x i32> @llvm.x86.avx10.mask.vcvtps2iubs512(<16 x float> %__A, <16 x i32> zeroinitializer, i16 -1, i32 4)
+ %1 = bitcast <16 x i32> %0 to <8 x i64>
+ ret <8 x i64> %1
+}
+
+define dso_local <8 x i64> @test_mm512_mask_ipcvtps_epu8(<8 x i64> noundef %__S, i16 noundef zeroext %__A, <16 x float> noundef %__B) {
+; X64-LABEL: test_mm512_mask_ipcvtps_epu8:
+; X64: # %bb.0: # %entry
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vcvtps2iubs %zmm1, %zmm0 {%k1} # encoding: [0x62,0xf5,0x7d,0x49,0x6b,0xc1]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_mm512_mask_ipcvtps_epu8:
+; X86: # %bb.0: # %entry
+; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vcvtps2iubs %zmm1, %zmm0 {%k1} # encoding: [0x62,0xf5,0x7d,0x49,0x6b,0xc1]
+; X86-NEXT: retl # encoding: [0xc3]
+entry:
+ %0 = bitcast <8 x i64> %__S to <16 x i32>
+ %1 = tail call <16 x i32> @llvm.x86.avx10.mask.vcvtps2iubs512(<16 x float> %__B, <16 x i32> %0, i16 %__A, i32 4)
+ %2 = bitcast <16 x i32> %1 to <8 x i64>
+ ret <8 x i64> %2
+}
+
+define dso_local <8 x i64> @test_mm512_maskz_ipcvtps_epu8(i16 noundef zeroext %__A, <16 x float> noundef %__B) {
+; X64-LABEL: test_mm512_maskz_ipcvtps_epu8:
+; X64: # %bb.0: # %entry
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vcvtps2iubs %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf5,0x7d,0xc9,0x6b,0xc0]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_mm512_maskz_ipcvtps_epu8:
+; X86: # %bb.0: # %entry
+; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vcvtps2iubs %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf5,0x7d,0xc9,0x6b,0xc0]
+; X86-NEXT: retl # encoding: [0xc3]
+entry:
+ %0 = tail call <16 x i32> @llvm.x86.avx10.mask.vcvtps2iubs512(<16 x float> %__B, <16 x i32> zeroinitializer, i16 %__A, i32 4)
+ %1 = bitcast <16 x i32> %0 to <8 x i64>
+ ret <8 x i64> %1
+}
+
+define dso_local <8 x i64> @test_mm512_ipcvt_roundps_epu8(<16 x float> noundef %__A) {
+; CHECK-LABEL: test_mm512_ipcvt_roundps_epu8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vcvtps2iubs {rz-sae}, %zmm0, %zmm0 # encoding: [0x62,0xf5,0x7d,0x78,0x6b,0xc0]
+; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
+entry:
+ %0 = tail call <16 x i32> @llvm.x86.avx10.mask.vcvtps2iubs512(<16 x float> %__A, <16 x i32> zeroinitializer, i16 -1, i32 11)
+ %1 = bitcast <16 x i32> %0 to <8 x i64>
+ ret <8 x i64> %1
+}
+
+define dso_local <8 x i64> @test_mm512_mask_ipcvt_roundps_epu8(<8 x i64> noundef %__S, i16 noundef zeroext %__A, <16 x float> noundef %__B) {
+; X64-LABEL: test_mm512_mask_ipcvt_roundps_epu8:
+; X64: # %bb.0: # %entry
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vcvtps2iubs {rz-sae}, %zmm1, %zmm0 {%k1} # encoding: [0x62,0xf5,0x7d,0x79,0x6b,0xc1]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_mm512_mask_ipcvt_roundps_epu8:
+; X86: # %bb.0: # %entry
+; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vcvtps2iubs {rz-sae}, %zmm1, %zmm0 {%k1} # encoding: [0x62,0xf5,0x7d,0x79,0x6b,0xc1]
+; X86-NEXT: retl # encoding: [0xc3]
+entry:
+ %0 = bitcast <8 x i64> %__S to <16 x i32>
+ %1 = tail call <16 x i32> @llvm.x86.avx10.mask.vcvtps2iubs512(<16 x float> %__B, <16 x i32> %0, i16 %__A, i32 11)
+ %2 = bitcast <16 x i32> %1 to <8 x i64>
+ ret <8 x i64> %2
+}
+
+declare <16 x i32> @llvm.x86.avx10.mask.vcvtps2iubs512(<16 x float>, <16 x i32>, i16, i32)
+
+define dso_local <8 x i64> @test_mm512_maskz_ipcvt_roundps_epu8(i16 noundef zeroext %__A, <16 x float> noundef %__B) {
+; X64-LABEL: test_mm512_maskz_ipcvt_roundps_epu8:
+; X64: # %bb.0: # %entry
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vcvtps2iubs {rz-sae}, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf5,0x7d,0xf9,0x6b,0xc0]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_mm512_maskz_ipcvt_roundps_epu8:
+; X86: # %bb.0: # %entry
+; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vcvtps2iubs {rz-sae}, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf5,0x7d,0xf9,0x6b,0xc0]
+; X86-NEXT: retl # encoding: [0xc3]
+entry:
+ %0 = tail call <16 x i32> @llvm.x86.avx10.mask.vcvtps2iubs512(<16 x float> %__B, <16 x i32> zeroinitializer, i16 %__A, i32 11)
+ %1 = bitcast <16 x i32> %0 to <8 x i64>
+ ret <8 x i64> %1
+}
+
+define dso_local <8 x i64> @test_mm512_ipcvttnebf16_epi8(<32 x bfloat> noundef %__A) {
+; CHECK-LABEL: test_mm512_ipcvttnebf16_epi8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vcvttnebf162ibs %zmm0, %zmm0 # encoding: [0x62,0xf5,0x7f,0x48,0x68,0xc0]
+; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
+entry:
+ %0 = tail call <32 x i16> @llvm.x86.avx10.vcvttnebf162ibs512(<32 x bfloat> %__A)
+ %1 = bitcast <32 x i16> %0 to <8 x i64>
+ ret <8 x i64> %1
+}
+
+define dso_local <8 x i64> @test_mm512_mask_ipcvttnebf16_epi8(<8 x i64> noundef %__S, i32 noundef %__A, <32 x bfloat> noundef %__B) {
+; X64-LABEL: test_mm512_mask_ipcvttnebf16_epi8:
+; X64: # %bb.0: # %entry
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vcvttnebf162ibs %zmm1, %zmm0 {%k1} # encoding: [0x62,0xf5,0x7f,0x49,0x68,0xc1]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_mm512_mask_ipcvttnebf16_epi8:
+; X86: # %bb.0: # %entry
+; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vcvttnebf162ibs %zmm1, %zmm0 {%k1} # encoding: [0x62,0xf5,0x7f,0x49,0x68,0xc1]
+; X86-NEXT: retl # encoding: [0xc3]
+entry:
+ %0 = bitcast <8 x i64> %__S to <32 x i16>
+ %1 = tail call <32 x i16> @llvm.x86.avx10.vcvttnebf162ibs512(<32 x bfloat> %__B)
+ %2 = bitcast i32 %__A to <32 x i1>
+ %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> %0
+ %4 = bitcast <32 x i16> %3 to <8 x i64>
+ ret <8 x i64> %4
+}
+
+declare <32 x i16> @llvm.x86.avx10.vcvttnebf162ibs512(<32 x bfloat>)
+
+define dso_local <8 x i64> @test_mm512_maskz_ipcvttnebf16_epi8(i32 noundef %__A, <32 x bfloat> noundef %__B) {
+; X64-LABEL: test_mm512_maskz_ipcvttnebf16_epi8:
+; X64: # %bb.0: # %entry
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vcvttnebf162ibs %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf5,0x7f,0xc9,0x68,0xc0]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_mm512_maskz_ipcvttnebf16_epi8:
+; X86: # %bb.0: # %entry
+; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vcvttnebf162ibs %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf5,0x7f,0xc9,0x68,0xc0]
+; X86-NEXT: retl # encoding: [0xc3]
+entry:
+ %0 = tail call <32 x i16> @llvm.x86.avx10.vcvttnebf162ibs512(<32 x bfloat> %__B)
+ %1 = bitcast i32 %__A to <32 x i1>
+ %2 = select <32 x i1> %1, <32 x i16> %0, <32 x i16> zeroinitializer
+ %3 = bitcast <32 x i16> %2 to <8 x i64>
+ ret <8 x i64> %3
+}
+
+define dso_local <8 x i64> @test_mm512_ipcvttnebf16_epu8(<32 x bfloat> noundef %__A) {
+; CHECK-LABEL: test_mm512_ipcvttnebf16_epu8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vcvttnebf162iubs %zmm0, %zmm0 # encoding: [0x62,0xf5,0x7f,0x48,0x6a,0xc0]
+; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
+entry:
+ %0 = tail call <32 x i16> @llvm.x86.avx10.vcvttnebf162iubs512(<32 x bfloat> %__A)
+ %1 = bitcast <32 x i16> %0 to <8 x i64>
+ ret <8 x i64> %1
+}
+
+define dso_local <8 x i64> @test_mm512_mask_ipcvttnebf16_epu8(<8 x i64> noundef %__S, i32 noundef %__A, <32 x bfloat> noundef %__B) {
+; X64-LABEL: test_mm512_mask_ipcvttnebf16_epu8:
+; X64: # %bb.0: # %entry
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vcvttnebf162iubs %zmm1, %zmm0 {%k1} # encoding: [0x62,0xf5,0x7f,0x49,0x6a,0xc1]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_mm512_mask_ipcvttnebf16_epu8:
+; X86: # %bb.0: # %entry
+; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vcvttnebf162iubs %zmm1, %zmm0 {%k1} # encoding: [0x62,0xf5,0x7f,0x49,0x6a,0xc1]
+; X86-NEXT: retl # encoding: [0xc3]
+entry:
+ %0 = bitcast <8 x i64> %__S to <32 x i16>
+ %1 = tail call <32 x i16> @llvm.x86.avx10.vcvttnebf162iubs512(<32 x bfloat> %__B)
+ %2 = bitcast i32 %__A to <32 x i1>
+ %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> %0
+ %4 = bitcast <32 x i16> %3 to <8 x i64>
+ ret <8 x i64> %4
+}
+
+declare <32 x i16> @llvm.x86.avx10.vcvttnebf162iubs512(<32 x bfloat>)
+
+define dso_local <8 x i64> @test_mm512_maskz_ipcvttnebf16_epu8(i32 noundef %__A, <32 x bfloat> noundef %__B) {
+; X64-LABEL: test_mm512_maskz_ipcvttnebf16_epu8:
+; X64: # %bb.0: # %entry
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vcvttnebf162iubs %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf5,0x7f,0xc9,0x6a,0xc0]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_mm512_maskz_ipcvttnebf16_epu8:
+; X86: # %bb.0: # %entry
+; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vcvttnebf162iubs %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf5,0x7f,0xc9,0x6a,0xc0]
+; X86-NEXT: retl # encoding: [0xc3]
+entry:
+ %0 = tail call <32 x i16> @llvm.x86.avx10.vcvttnebf162iubs512(<32 x bfloat> %__B)
+ %1 = bitcast i32 %__A to <32 x i1>
+ %2 = select <32 x i1> %1, <32 x i16> %0, <32 x i16> zeroinitializer
+ %3 = bitcast <32 x i16> %2 to <8 x i64>
+ ret <8 x i64> %3
+}
+
+define dso_local <8 x i64> @test_mm512_ipcvttph_epi8(<32 x half> noundef %__A) {
+; CHECK-LABEL: test_mm512_ipcvttph_epi8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vcvttph2ibs %zmm0, %zmm0 # encoding: [0x62,0xf5,0x7c,0x48,0x68,0xc0]
+; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
+entry:
+ %0 = tail call <32 x i16> @llvm.x86.avx10.mask.vcvttph2ibs512(<32 x half> %__A, <32 x i16> zeroinitializer, i32 -1, i32 4)
+ %1 = bitcast <32 x i16> %0 to <8 x i64>
+ ret <8 x i64> %1
+}
+
+define dso_local <8 x i64> @test_mm512_mask_ipcvttph_epi8(<8 x i64> noundef %__S, i32 noundef %__A, <32 x half> noundef %__B) {
+; X64-LABEL: test_mm512_mask_ipcvttph_epi8:
+; X64: # %bb.0: # %entry
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vcvttph2ibs %zmm1, %zmm0 {%k1} # encoding: [0x62,0xf5,0x7c,0x49,0x68,0xc1]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_mm512_mask_ipcvttph_epi8:
+; X86: # %bb.0: # %entry
+; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vcvttph2ibs %zmm1, %zmm0 {%k1} # encoding: [0x62,0xf5,0x7c,0x49,0x68,0xc1]
+; X86-NEXT: retl # encoding: [0xc3]
+entry:
+ %0 = bitcast <8 x i64> %__S to <32 x i16>
+ %1 = tail call <32 x i16> @llvm.x86.avx10.mask.vcvttph2ibs512(<32 x half> %__B, <32 x i16> %0, i32 %__A, i32 4)
+ %2 = bitcast <32 x i16> %1 to <8 x i64>
+ ret <8 x i64> %2
+}
+
+define dso_local <8 x i64> @test_mm512_maskz_ipcvttph_epi8(i32 noundef %__A, <32 x half> noundef %__B) {
+; X64-LABEL: test_mm512_maskz_ipcvttph_epi8:
+; X64: # %bb.0: # %entry
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vcvttph2ibs %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf5,0x7c,0xc9,0x68,0xc0]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_mm512_maskz_ipcvttph_epi8:
+; X86: # %bb.0: # %entry
+; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vcvttph2ibs %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf5,0x7c,0xc9,0x68,0xc0]
+; X86-NEXT: retl # encoding: [0xc3]
+entry:
+ %0 = tail call <32 x i16> @llvm.x86.avx10.mask.vcvttph2ibs512(<32 x half> %__B, <32 x i16> zeroinitializer, i32 %__A, i32 4)
+ %1 = bitcast <32 x i16> %0 to <8 x i64>
+ ret <8 x i64> %1
+}
+
+define dso_local <8 x i64> @test_mm512_ipcvtt_roundph_epi8(<32 x half> noundef %__A) {
+; CHECK-LABEL: test_mm512_ipcvtt_roundph_epi8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vcvttph2ibs {sae}, %zmm0, %zmm0 # encoding: [0x62,0xf5,0x7c,0x18,0x68,0xc0]
+; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
+entry:
+ %0 = tail call <32 x i16> @llvm.x86.avx10.mask.vcvttph2ibs512(<32 x half> %__A, <32 x i16> zeroinitializer, i32 -1, i32 8)
+ %1 = bitcast <32 x i16> %0 to <8 x i64>
+ ret <8 x i64> %1
+}
+
+define dso_local <8 x i64> @test_mm512_mask_ipcvtt_roundph_epi8(<8 x i64> noundef %__S, i32 noundef %__A, <32 x half> noundef %__B) {
+; X64-LABEL: test_mm512_mask_ipcvtt_roundph_epi8:
+; X64: # %bb.0: # %entry
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vcvttph2ibs {sae}, %zmm1, %zmm0 {%k1} # encoding: [0x62,0xf5,0x7c,0x19,0x68,0xc1]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_mm512_mask_ipcvtt_roundph_epi8:
+; X86: # %bb.0: # %entry
+; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vcvttph2ibs {sae}, %zmm1, %zmm0 {%k1} # encoding: [0x62,0xf5,0x7c,0x19,0x68,0xc1]
+; X86-NEXT: retl # encoding: [0xc3]
+entry:
+ %0 = bitcast <8 x i64> %__S to <32 x i16>
+ %1 = tail call <32 x i16> @llvm.x86.avx10.mask.vcvttph2ibs512(<32 x half> %__B, <32 x i16> %0, i32 %__A, i32 8)
+ %2 = bitcast <32 x i16> %1 to <8 x i64>
+ ret <8 x i64> %2
+}
+
+declare <32 x i16> @llvm.x86.avx10.mask.vcvttph2ibs512(<32 x half>, <32 x i16>, i32, i32)
+
+define dso_local <8 x i64> @test_mm512_maskz_ipcvtt_roundph_epi8(i32 noundef %__A, <32 x half> noundef %__B) {
+; X64-LABEL: test_mm512_maskz_ipcvtt_roundph_epi8:
+; X64: # %bb.0: # %entry
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vcvttph2ibs {sae}, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf5,0x7c,0x99,0x68,0xc0]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_mm512_maskz_ipcvtt_roundph_epi8:
+; X86: # %bb.0: # %entry
+; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vcvttph2ibs {sae}, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf5,0x7c,0x99,0x68,0xc0]
+; X86-NEXT: retl # encoding: [0xc3]
+entry:
+ %0 = tail call <32 x i16> @llvm.x86.avx10.mask.vcvttph2ibs512(<32 x half> %__B, <32 x i16> zeroinitializer, i32 %__A, i32 8)
+ %1 = bitcast <32 x i16> %0 to <8 x i64>
+ ret <8 x i64> %1
+}
+
+define dso_local <8 x i64> @test_mm512_ipcvttph_epu8(<32 x half> noundef %__A) {
+; CHECK-LABEL: test_mm512_ipcvttph_epu8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vcvttph2iubs %zmm0, %zmm0 # encoding: [0x62,0xf5,0x7c,0x48,0x6a,0xc0]
+; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
+entry:
+ %0 = tail call <32 x i16> @llvm.x86.avx10.mask.vcvttph2iubs512(<32 x half> %__A, <32 x i16> zeroinitializer, i32 -1, i32 4)
+ %1 = bitcast <32 x i16> %0 to <8 x i64>
+ ret <8 x i64> %1
+}
+
+define dso_local <8 x i64> @test_mm512_mask_ipcvttph_epu8(<8 x i64> noundef %__S, i32 noundef %__A, <32 x half> noundef %__B) {
+; X64-LABEL: test_mm512_mask_ipcvttph_epu8:
+; X64: # %bb.0: # %entry
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vcvttph2iubs %zmm1, %zmm0 {%k1} # encoding: [0x62,0xf5,0x7c,0x49,0x6a,0xc1]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_mm512_mask_ipcvttph_epu8:
+; X86: # %bb.0: # %entry
+; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vcvttph2iubs %zmm1, %zmm0 {%k1} # encoding: [0x62,0xf5,0x7c,0x49,0x6a,0xc1]
+; X86-NEXT: retl # encoding: [0xc3]
+entry:
+ %0 = bitcast <8 x i64> %__S to <32 x i16>
+ %1 = tail call <32 x i16> @llvm.x86.avx10.mask.vcvttph2iubs512(<32 x half> %__B, <32 x i16> %0, i32 %__A, i32 4)
+ %2 = bitcast <32 x i16> %1 to <8 x i64>
+ ret <8 x i64> %2
+}
+
+define dso_local <8 x i64> @test_mm512_maskz_ipcvttph_epu8(i32 noundef %__A, <32 x half> noundef %__B) {
+; X64-LABEL: test_mm512_maskz_ipcvttph_epu8:
+; X64: # %bb.0: # %entry
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vcvttph2iubs %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf5,0x7c,0xc9,0x6a,0xc0]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_mm512_maskz_ipcvttph_epu8:
+; X86: # %bb.0: # %entry
+; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vcvttph2iubs %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf5,0x7c,0xc9,0x6a,0xc0]
+; X86-NEXT: retl # encoding: [0xc3]
+entry:
+ %0 = tail call <32 x i16> @llvm.x86.avx10.mask.vcvttph2iubs512(<32 x half> %__B, <32 x i16> zeroinitializer, i32 %__A, i32 4)
+ %1 = bitcast <32 x i16> %0 to <8 x i64>
+ ret <8 x i64> %1
+}
+
+define dso_local <8 x i64> @test_mm512_ipcvtt_roundph_epu8(<32 x half> noundef %__A) {
+; CHECK-LABEL: test_mm512_ipcvtt_roundph_epu8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vcvttph2iubs {sae}, %zmm0, %zmm0 # encoding: [0x62,0xf5,0x7c,0x18,0x6a,0xc0]
+; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
+entry:
+ %0 = tail call <32 x i16> @llvm.x86.avx10.mask.vcvttph2iubs512(<32 x half> %__A, <32 x i16> zeroinitializer, i32 -1, i32 8)
+ %1 = bitcast <32 x i16> %0 to <8 x i64>
+ ret <8 x i64> %1
+}
+
+define dso_local <8 x i64> @test_mm512_mask_ipcvtt_roundph_epu8(<8 x i64> noundef %__S, i32 noundef %__A, <32 x half> noundef %__B) {
+; X64-LABEL: test_mm512_mask_ipcvtt_roundph_epu8:
+; X64: # %bb.0: # %entry
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vcvttph2iubs {sae}, %zmm1, %zmm0 {%k1} # encoding: [0x62,0xf5,0x7c,0x19,0x6a,0xc1]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_mm512_mask_ipcvtt_roundph_epu8:
+; X86: # %bb.0: # %entry
+; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vcvttph2iubs {sae}, %zmm1, %zmm0 {%k1} # encoding: [0x62,0xf5,0x7c,0x19,0x6a,0xc1]
+; X86-NEXT: retl # encoding: [0xc3]
+entry:
+ %0 = bitcast <8 x i64> %__S to <32 x i16>
+ %1 = tail call <32 x i16> @llvm.x86.avx10.mask.vcvttph2iubs512(<32 x half> %__B, <32 x i16> %0, i32 %__A, i32 8)
+ %2 = bitcast <32 x i16> %1 to <8 x i64>
+ ret <8 x i64> %2
+}
+
+declare <32 x i16> @llvm.x86.avx10.mask.vcvttph2iubs512(<32 x half>, <32 x i16>, i32, i32)
+
+define dso_local <8 x i64> @test_mm512_maskz_ipcvtt_roundph_epu8(i32 noundef %__A, <32 x half> noundef %__B) {
+; X64-LABEL: test_mm512_maskz_ipcvtt_roundph_epu8:
+; X64: # %bb.0: # %entry
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vcvttph2iubs {sae}, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf5,0x7c,0x99,0x6a,0xc0]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_mm512_maskz_ipcvtt_roundph_epu8:
+; X86: # %bb.0: # %entry
+; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vcvttph2iubs {sae}, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf5,0x7c,0x99,0x6a,0xc0]
+; X86-NEXT: retl # encoding: [0xc3]
+entry:
+ %0 = tail call <32 x i16> @llvm.x86.avx10.mask.vcvttph2iubs512(<32 x half> %__B, <32 x i16> zeroinitializer, i32 %__A, i32 8)
+ %1 = bitcast <32 x i16> %0 to <8 x i64>
+ ret <8 x i64> %1
+}
+
+define dso_local <8 x i64> @test_mm512_ipcvttps_epi8(<16 x float> noundef %__A) {
+; CHECK-LABEL: test_mm512_ipcvttps_epi8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vcvttps2ibs %zmm0, %zmm0 # encoding: [0x62,0xf5,0x7d,0x48,0x68,0xc0]
+; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
+entry:
+ %0 = tail call <16 x i32> @llvm.x86.avx10.mask.vcvttps2ibs512(<16 x float> %__A, <16 x i32> zeroinitializer, i16 -1, i32 4)
+ %1 = bitcast <16 x i32> %0 to <8 x i64>
+ ret <8 x i64> %1
+}
+
+define dso_local <8 x i64> @test_mm512_mask_ipcvttps_epi8(<8 x i64> noundef %__S, i16 noundef zeroext %__A, <16 x float> noundef %__B) {
+; X64-LABEL: test_mm512_mask_ipcvttps_epi8:
+; X64: # %bb.0: # %entry
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vcvttps2ibs %zmm1, %zmm0 {%k1} # encoding: [0x62,0xf5,0x7d,0x49,0x68,0xc1]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_mm512_mask_ipcvttps_epi8:
+; X86: # %bb.0: # %entry
+; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vcvttps2ibs %zmm1, %zmm0 {%k1} # encoding: [0x62,0xf5,0x7d,0x49,0x68,0xc1]
+; X86-NEXT: retl # encoding: [0xc3]
+entry:
+ %0 = bitcast <8 x i64> %__S to <16 x i32>
+ %1 = tail call <16 x i32> @llvm.x86.avx10.mask.vcvttps2ibs512(<16 x float> %__B, <16 x i32> %0, i16 %__A, i32 4)
+ %2 = bitcast <16 x i32> %1 to <8 x i64>
+ ret <8 x i64> %2
+}
+
+define dso_local <8 x i64> @test_mm512_maskz_ipcvttps_epi8(i16 noundef zeroext %__A, <16 x float> noundef %__B) {
+; X64-LABEL: test_mm512_maskz_ipcvttps_epi8:
+; X64: # %bb.0: # %entry
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vcvttps2ibs %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf5,0x7d,0xc9,0x68,0xc0]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_mm512_maskz_ipcvttps_epi8:
+; X86: # %bb.0: # %entry
+; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vcvttps2ibs %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf5,0x7d,0xc9,0x68,0xc0]
+; X86-NEXT: retl # encoding: [0xc3]
+entry:
+ %0 = tail call <16 x i32> @llvm.x86.avx10.mask.vcvttps2ibs512(<16 x float> %__B, <16 x i32> zeroinitializer, i16 %__A, i32 4)
+ %1 = bitcast <16 x i32> %0 to <8 x i64>
+ ret <8 x i64> %1
+}
+
+define dso_local <8 x i64> @test_mm512_ipcvtt_roundps_epi8(<16 x float> noundef %__A) {
+; CHECK-LABEL: test_mm512_ipcvtt_roundps_epi8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vcvttps2ibs {sae}, %zmm0, %zmm0 # encoding: [0x62,0xf5,0x7d,0x18,0x68,0xc0]
+; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
+entry:
+ %0 = tail call <16 x i32> @llvm.x86.avx10.mask.vcvttps2ibs512(<16 x float> %__A, <16 x i32> zeroinitializer, i16 -1, i32 8)
+ %1 = bitcast <16 x i32> %0 to <8 x i64>
+ ret <8 x i64> %1
+}
+
+define dso_local <8 x i64> @test_mm512_mask_ipcvtt_roundps_epi8(<8 x i64> noundef %__S, i16 noundef zeroext %__A, <16 x float> noundef %__B) {
+; X64-LABEL: test_mm512_mask_ipcvtt_roundps_epi8:
+; X64: # %bb.0: # %entry
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vcvttps2ibs {sae}, %zmm1, %zmm0 {%k1} # encoding: [0x62,0xf5,0x7d,0x19,0x68,0xc1]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_mm512_mask_ipcvtt_roundps_epi8:
+; X86: # %bb.0: # %entry
+; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vcvttps2ibs {sae}, %zmm1, %zmm0 {%k1} # encoding: [0x62,0xf5,0x7d,0x19,0x68,0xc1]
+; X86-NEXT: retl # encoding: [0xc3]
+entry:
+ %0 = bitcast <8 x i64> %__S to <16 x i32>
+ %1 = tail call <16 x i32> @llvm.x86.avx10.mask.vcvttps2ibs512(<16 x float> %__B, <16 x i32> %0, i16 %__A, i32 8)
+ %2 = bitcast <16 x i32> %1 to <8 x i64>
+ ret <8 x i64> %2
+}
+
+declare <16 x i32> @llvm.x86.avx10.mask.vcvttps2ibs512(<16 x float>, <16 x i32>, i16, i32)
+
+define dso_local <8 x i64> @test_mm512_maskz_ipcvtt_roundps_epi8(i16 noundef zeroext %__A, <16 x float> noundef %__B) {
+; X64-LABEL: test_mm512_maskz_ipcvtt_roundps_epi8:
+; X64: # %bb.0: # %entry
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vcvttps2ibs {sae}, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf5,0x7d,0x99,0x68,0xc0]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_mm512_maskz_ipcvtt_roundps_epi8:
+; X86: # %bb.0: # %entry
+; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vcvttps2ibs {sae}, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf5,0x7d,0x99,0x68,0xc0]
+; X86-NEXT: retl # encoding: [0xc3]
+entry:
+ %0 = tail call <16 x i32> @llvm.x86.avx10.mask.vcvttps2ibs512(<16 x float> %__B, <16 x i32> zeroinitializer, i16 %__A, i32 8)
+ %1 = bitcast <16 x i32> %0 to <8 x i64>
+ ret <8 x i64> %1
+}
+
+define dso_local <8 x i64> @test_mm512_ipcvttps_epu8(<16 x float> noundef %__A) {
+; CHECK-LABEL: test_mm512_ipcvttps_epu8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vcvttps2iubs %zmm0, %zmm0 # encoding: [0x62,0xf5,0x7d,0x48,0x6a,0xc0]
+; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
+entry:
+ %0 = tail call <16 x i32> @llvm.x86.avx10.mask.vcvttps2iubs512(<16 x float> %__A, <16 x i32> zeroinitializer, i16 -1, i32 4)
+ %1 = bitcast <16 x i32> %0 to <8 x i64>
+ ret <8 x i64> %1
+}
+
+define dso_local <8 x i64> @test_mm512_mask_ipcvttps_epu8(<8 x i64> noundef %__S, i16 noundef zeroext %__A, <16 x float> noundef %__B) {
+; X64-LABEL: test_mm512_mask_ipcvttps_epu8:
+; X64: # %bb.0: # %entry
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vcvttps2iubs %zmm1, %zmm0 {%k1} # encoding: [0x62,0xf5,0x7d,0x49,0x6a,0xc1]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_mm512_mask_ipcvttps_epu8:
+; X86: # %bb.0: # %entry
+; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vcvttps2iubs %zmm1, %zmm0 {%k1} # encoding: [0x62,0xf5,0x7d,0x49,0x6a,0xc1]
+; X86-NEXT: retl # encoding: [0xc3]
+entry:
+ %0 = bitcast <8 x i64> %__S to <16 x i32>
+ %1 = tail call <16 x i32> @llvm.x86.avx10.mask.vcvttps2iubs512(<16 x float> %__B, <16 x i32> %0, i16 %__A, i32 4)
+ %2 = bitcast <16 x i32> %1 to <8 x i64>
+ ret <8 x i64> %2
+}
+
+define dso_local <8 x i64> @test_mm512_maskz_ipcvttps_epu8(i16 noundef zeroext %__A, <16 x float> noundef %__B) {
+; X64-LABEL: test_mm512_maskz_ipcvttps_epu8:
+; X64: # %bb.0: # %entry
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vcvttps2iubs %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf5,0x7d,0xc9,0x6a,0xc0]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_mm512_maskz_ipcvttps_epu8:
+; X86: # %bb.0: # %entry
+; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vcvttps2iubs %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf5,0x7d,0xc9,0x6a,0xc0]
+; X86-NEXT: retl # encoding: [0xc3]
+entry:
+ %0 = tail call <16 x i32> @llvm.x86.avx10.mask.vcvttps2iubs512(<16 x float> %__B, <16 x i32> zeroinitializer, i16 %__A, i32 4)
+ %1 = bitcast <16 x i32> %0 to <8 x i64>
+ ret <8 x i64> %1
+}
+
+define dso_local <8 x i64> @test_mm512_ipcvtt_roundps_epu8(<16 x float> noundef %__A) {
+; CHECK-LABEL: test_mm512_ipcvtt_roundps_epu8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vcvttps2iubs {sae}, %zmm0, %zmm0 # encoding: [0x62,0xf5,0x7d,0x18,0x6a,0xc0]
+; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
+entry:
+ %0 = tail call <16 x i32> @llvm.x86.avx10.mask.vcvttps2iubs512(<16 x float> %__A, <16 x i32> zeroinitializer, i16 -1, i32 8)
+ %1 = bitcast <16 x i32> %0 to <8 x i64>
+ ret <8 x i64> %1
+}
+
+define dso_local <8 x i64> @test_mm512_mask_ipcvtt_roundps_epu8(<8 x i64> noundef %__S, i16 noundef zeroext %__A, <16 x float> noundef %__B) {
+; X64-LABEL: test_mm512_mask_ipcvtt_roundps_epu8:
+; X64: # %bb.0: # %entry
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vcvttps2iubs {sae}, %zmm1, %zmm0 {%k1} # encoding: [0x62,0xf5,0x7d,0x19,0x6a,0xc1]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_mm512_mask_ipcvtt_roundps_epu8:
+; X86: # %bb.0: # %entry
+; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vcvttps2iubs {sae}, %zmm1, %zmm0 {%k1} # encoding: [0x62,0xf5,0x7d,0x19,0x6a,0xc1]
+; X86-NEXT: retl # encoding: [0xc3]
+entry:
+ %0 = bitcast <8 x i64> %__S to <16 x i32>
+ %1 = tail call <16 x i32> @llvm.x86.avx10.mask.vcvttps2iubs512(<16 x float> %__B, <16 x i32> %0, i16 %__A, i32 8)
+ %2 = bitcast <16 x i32> %1 to <8 x i64>
+ ret <8 x i64> %2
+}
+
+declare <16 x i32> @llvm.x86.avx10.mask.vcvttps2iubs512(<16 x float>, <16 x i32>, i16, i32)
+
+define dso_local <8 x i64> @test_mm512_maskz_ipcvtt_roundps_epu8(i16 noundef zeroext %__A, <16 x float> noundef %__B) {
+; X64-LABEL: test_mm512_maskz_ipcvtt_roundps_epu8:
+; X64: # %bb.0: # %entry
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vcvttps2iubs {sae}, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf5,0x7d,0x99,0x6a,0xc0]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_mm512_maskz_ipcvtt_roundps_epu8:
+; X86: # %bb.0: # %entry
+; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vcvttps2iubs {sae}, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf5,0x7d,0x99,0x6a,0xc0]
+; X86-NEXT: retl # encoding: [0xc3]
+entry:
+ %0 = tail call <16 x i32> @llvm.x86.avx10.mask.vcvttps2iubs512(<16 x float> %__B, <16 x i32> zeroinitializer, i16 %__A, i32 8)
+ %1 = bitcast <16 x i32> %0 to <8 x i64>
+ ret <8 x i64> %1
+}
diff --git a/llvm/test/CodeGen/X86/avx10_2satcvt-intrinsics.ll b/llvm/test/CodeGen/X86/avx10_2satcvt-intrinsics.ll
new file mode 100644
index 000000000000..e16aa9d2de31
--- /dev/null
+++ b/llvm/test/CodeGen/X86/avx10_2satcvt-intrinsics.ll
@@ -0,0 +1,1618 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -verify-machineinstrs -mtriple=x86_64 --show-mc-encoding -mattr=+avx10.2-256 | FileCheck %s --check-prefixes=CHECK,X64
+; RUN: llc < %s -verify-machineinstrs -mtriple=i686 --show-mc-encoding -mattr=+avx10.2-256 | FileCheck %s --check-prefixes=CHECK,X86
+
+define dso_local <2 x i64> @test_mm_ipcvtnebf16_epi8(<8 x bfloat> noundef %__A) {
+; CHECK-LABEL: test_mm_ipcvtnebf16_epi8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vcvtnebf162ibs %xmm0, %xmm0 # encoding: [0x62,0xf5,0x7f,0x08,0x69,0xc0]
+; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
+entry:
+ %0 = tail call <8 x i16> @llvm.x86.avx10.vcvtnebf162ibs128(<8 x bfloat> %__A)
+ %1 = bitcast <8 x i16> %0 to <2 x i64>
+ ret <2 x i64> %1
+}
+
+define dso_local <2 x i64> @test_mm_mask_ipcvtnebf16_epi8(<2 x i64> noundef %__S, i8 noundef zeroext %__A, <8 x bfloat> noundef %__B) {
+; X64-LABEL: test_mm_mask_ipcvtnebf16_epi8:
+; X64: # %bb.0: # %entry
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vcvtnebf162ibs %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf5,0x7f,0x09,0x69,0xc1]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_mm_mask_ipcvtnebf16_epi8:
+; X86: # %bb.0: # %entry
+; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vcvtnebf162ibs %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf5,0x7f,0x09,0x69,0xc1]
+; X86-NEXT: retl # encoding: [0xc3]
+entry:
+ %0 = bitcast <2 x i64> %__S to <8 x i16>
+ %1 = tail call <8 x i16> @llvm.x86.avx10.vcvtnebf162ibs128(<8 x bfloat> %__B)
+ %2 = bitcast i8 %__A to <8 x i1>
+ %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> %0
+ %4 = bitcast <8 x i16> %3 to <2 x i64>
+ ret <2 x i64> %4
+}
+
+declare <8 x i16> @llvm.x86.avx10.vcvtnebf162ibs128(<8 x bfloat>)
+
+define dso_local <2 x i64> @test_mm_maskz_ipcvtnebf16_epi8(i8 noundef zeroext %__A, <8 x bfloat> noundef %__B) {
+; X64-LABEL: test_mm_maskz_ipcvtnebf16_epi8:
+; X64: # %bb.0: # %entry
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vcvtnebf162ibs %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf5,0x7f,0x89,0x69,0xc0]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_mm_maskz_ipcvtnebf16_epi8:
+; X86: # %bb.0: # %entry
+; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vcvtnebf162ibs %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf5,0x7f,0x89,0x69,0xc0]
+; X86-NEXT: retl # encoding: [0xc3]
+entry:
+ %0 = tail call <8 x i16> @llvm.x86.avx10.vcvtnebf162ibs128(<8 x bfloat> %__B)
+ %1 = bitcast i8 %__A to <8 x i1>
+ %2 = select <8 x i1> %1, <8 x i16> %0, <8 x i16> zeroinitializer
+ %3 = bitcast <8 x i16> %2 to <2 x i64>
+ ret <2 x i64> %3
+}
+
+define dso_local <4 x i64> @test_mm256_ipcvtnebf16_epi8(<16 x bfloat> noundef %__A) local_unnamed_addr #2 {
+; CHECK-LABEL: test_mm256_ipcvtnebf16_epi8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vcvtnebf162ibs %ymm0, %ymm0 # encoding: [0x62,0xf5,0x7f,0x28,0x69,0xc0]
+; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
+entry:
+ %0 = tail call <16 x i16> @llvm.x86.avx10.vcvtnebf162ibs256(<16 x bfloat> %__A)
+ %1 = bitcast <16 x i16> %0 to <4 x i64>
+ ret <4 x i64> %1
+}
+
+define dso_local <4 x i64> @test_mm256_mask_ipcvtnebf16_epi8(<4 x i64> noundef %__S, i16 noundef zeroext %__A, <16 x bfloat> noundef %__B) local_unnamed_addr #2 {
+; X64-LABEL: test_mm256_mask_ipcvtnebf16_epi8:
+; X64: # %bb.0: # %entry
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vcvtnebf162ibs %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf5,0x7f,0x29,0x69,0xc1]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_mm256_mask_ipcvtnebf16_epi8:
+; X86: # %bb.0: # %entry
+; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vcvtnebf162ibs %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf5,0x7f,0x29,0x69,0xc1]
+; X86-NEXT: retl # encoding: [0xc3]
+entry:
+ %0 = bitcast <4 x i64> %__S to <16 x i16>
+ %1 = tail call <16 x i16> @llvm.x86.avx10.vcvtnebf162ibs256(<16 x bfloat> %__B)
+ %2 = bitcast i16 %__A to <16 x i1>
+ %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> %0
+ %4 = bitcast <16 x i16> %3 to <4 x i64>
+ ret <4 x i64> %4
+}
+
+declare <16 x i16> @llvm.x86.avx10.vcvtnebf162ibs256(<16 x bfloat>)
+
+define dso_local <4 x i64> @test_mm256_maskz_ipcvtnebf16_epi8(i16 noundef zeroext %__A, <16 x bfloat> noundef %__B) local_unnamed_addr #2 {
+; X64-LABEL: test_mm256_maskz_ipcvtnebf16_epi8:
+; X64: # %bb.0: # %entry
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vcvtnebf162ibs %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf5,0x7f,0xa9,0x69,0xc0]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_mm256_maskz_ipcvtnebf16_epi8:
+; X86: # %bb.0: # %entry
+; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vcvtnebf162ibs %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf5,0x7f,0xa9,0x69,0xc0]
+; X86-NEXT: retl # encoding: [0xc3]
+entry:
+ %0 = tail call <16 x i16> @llvm.x86.avx10.vcvtnebf162ibs256(<16 x bfloat> %__B)
+ %1 = bitcast i16 %__A to <16 x i1>
+ %2 = select <16 x i1> %1, <16 x i16> %0, <16 x i16> zeroinitializer
+ %3 = bitcast <16 x i16> %2 to <4 x i64>
+ ret <4 x i64> %3
+}
+
+define dso_local <2 x i64> @test_mm_ipcvtnebf16_epu8(<8 x bfloat> noundef %__A) {
+; CHECK-LABEL: test_mm_ipcvtnebf16_epu8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vcvtnebf162iubs %xmm0, %xmm0 # encoding: [0x62,0xf5,0x7f,0x08,0x6b,0xc0]
+; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
+entry:
+ %0 = tail call <8 x i16> @llvm.x86.avx10.vcvtnebf162iubs128(<8 x bfloat> %__A)
+ %1 = bitcast <8 x i16> %0 to <2 x i64>
+ ret <2 x i64> %1
+}
+
+define dso_local <2 x i64> @test_mm_mask_ipcvtnebf16_epu8(<2 x i64> noundef %__S, i8 noundef zeroext %__A, <8 x bfloat> noundef %__B) {
+; X64-LABEL: test_mm_mask_ipcvtnebf16_epu8:
+; X64: # %bb.0: # %entry
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vcvtnebf162iubs %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf5,0x7f,0x09,0x6b,0xc1]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_mm_mask_ipcvtnebf16_epu8:
+; X86: # %bb.0: # %entry
+; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vcvtnebf162iubs %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf5,0x7f,0x09,0x6b,0xc1]
+; X86-NEXT: retl # encoding: [0xc3]
+entry:
+ %0 = bitcast <2 x i64> %__S to <8 x i16>
+ %1 = tail call <8 x i16> @llvm.x86.avx10.vcvtnebf162iubs128(<8 x bfloat> %__B)
+ %2 = bitcast i8 %__A to <8 x i1>
+ %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> %0
+ %4 = bitcast <8 x i16> %3 to <2 x i64>
+ ret <2 x i64> %4
+}
+
+declare <8 x i16> @llvm.x86.avx10.vcvtnebf162iubs128(<8 x bfloat>)
+
+define dso_local <2 x i64> @test_mm_maskz_ipcvtnebf16_epu8(i8 noundef zeroext %__A, <8 x bfloat> noundef %__B) {
+; X64-LABEL: test_mm_maskz_ipcvtnebf16_epu8:
+; X64: # %bb.0: # %entry
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vcvtnebf162iubs %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf5,0x7f,0x89,0x6b,0xc0]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_mm_maskz_ipcvtnebf16_epu8:
+; X86: # %bb.0: # %entry
+; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vcvtnebf162iubs %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf5,0x7f,0x89,0x6b,0xc0]
+; X86-NEXT: retl # encoding: [0xc3]
+entry:
+ %0 = tail call <8 x i16> @llvm.x86.avx10.vcvtnebf162iubs128(<8 x bfloat> %__B)
+ %1 = bitcast i8 %__A to <8 x i1>
+ %2 = select <8 x i1> %1, <8 x i16> %0, <8 x i16> zeroinitializer
+ %3 = bitcast <8 x i16> %2 to <2 x i64>
+ ret <2 x i64> %3
+}
+
+define dso_local <4 x i64> @test_mm256_ipcvtnebf16_epu8(<16 x bfloat> noundef %__A) local_unnamed_addr #2 {
+; CHECK-LABEL: test_mm256_ipcvtnebf16_epu8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vcvtnebf162iubs %ymm0, %ymm0 # encoding: [0x62,0xf5,0x7f,0x28,0x6b,0xc0]
+; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
+entry:
+ %0 = tail call <16 x i16> @llvm.x86.avx10.vcvtnebf162iubs256(<16 x bfloat> %__A)
+ %1 = bitcast <16 x i16> %0 to <4 x i64>
+ ret <4 x i64> %1
+}
+
+define dso_local <4 x i64> @test_mm256_mask_ipcvtnebf16_epu8(<4 x i64> noundef %__S, i16 noundef zeroext %__A, <16 x bfloat> noundef %__B) local_unnamed_addr #2 {
+; X64-LABEL: test_mm256_mask_ipcvtnebf16_epu8:
+; X64: # %bb.0: # %entry
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vcvtnebf162iubs %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf5,0x7f,0x29,0x6b,0xc1]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_mm256_mask_ipcvtnebf16_epu8:
+; X86: # %bb.0: # %entry
+; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vcvtnebf162iubs %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf5,0x7f,0x29,0x6b,0xc1]
+; X86-NEXT: retl # encoding: [0xc3]
+entry:
+ %0 = bitcast <4 x i64> %__S to <16 x i16>
+ %1 = tail call <16 x i16> @llvm.x86.avx10.vcvtnebf162iubs256(<16 x bfloat> %__B)
+ %2 = bitcast i16 %__A to <16 x i1>
+ %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> %0
+ %4 = bitcast <16 x i16> %3 to <4 x i64>
+ ret <4 x i64> %4
+}
+
+define dso_local <4 x i64> @test_mm256_maskz_ipcvtnebf16_epu8(i16 noundef zeroext %__A, <16 x bfloat> noundef %__B) local_unnamed_addr #2 {
+; X64-LABEL: test_mm256_maskz_ipcvtnebf16_epu8:
+; X64: # %bb.0: # %entry
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vcvtnebf162iubs %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf5,0x7f,0xa9,0x6b,0xc0]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_mm256_maskz_ipcvtnebf16_epu8:
+; X86: # %bb.0: # %entry
+; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vcvtnebf162iubs %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf5,0x7f,0xa9,0x6b,0xc0]
+; X86-NEXT: retl # encoding: [0xc3]
+entry:
+ %0 = tail call <16 x i16> @llvm.x86.avx10.vcvtnebf162iubs256(<16 x bfloat> %__B)
+ %1 = bitcast i16 %__A to <16 x i1>
+ %2 = select <16 x i1> %1, <16 x i16> %0, <16 x i16> zeroinitializer
+ %3 = bitcast <16 x i16> %2 to <4 x i64>
+ ret <4 x i64> %3
+}
+
+declare <16 x i16> @llvm.x86.avx10.vcvtnebf162iubs256(<16 x bfloat>)
+
+define dso_local <2 x i64> @test_mm_ipcvtph_epi8(<8 x half> noundef %__A) {
+; CHECK-LABEL: test_mm_ipcvtph_epi8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vcvtph2ibs %xmm0, %xmm0 # encoding: [0x62,0xf5,0x7c,0x08,0x69,0xc0]
+; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
+entry:
+ %0 = tail call <8 x i16> @llvm.x86.avx10.mask.vcvtph2ibs128(<8 x half> %__A, <8 x i16> zeroinitializer, i8 -1)
+ %1 = bitcast <8 x i16> %0 to <2 x i64>
+ ret <2 x i64> %1
+}
+
+define dso_local <2 x i64> @test_mm_mask_ipcvtph_epi8(<2 x i64> noundef %__S, i8 noundef zeroext %__A, <8 x half> noundef %__B) {
+; X64-LABEL: test_mm_mask_ipcvtph_epi8:
+; X64: # %bb.0: # %entry
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vcvtph2ibs %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf5,0x7c,0x89,0x69,0xc1]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_mm_mask_ipcvtph_epi8:
+; X86: # %bb.0: # %entry
+; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vcvtph2ibs %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf5,0x7c,0x89,0x69,0xc1]
+; X86-NEXT: retl # encoding: [0xc3]
+entry:
+ %0 = bitcast <2 x i64> %__S to <8 x i16>
+ %1 = tail call <8 x i16> @llvm.x86.avx10.mask.vcvtph2ibs128(<8 x half> %__B, <8 x i16> zeroinitializer, i8 %__A)
+ %2 = bitcast <8 x i16> %1 to <2 x i64>
+ ret <2 x i64> %2
+}
+
+declare <8 x i16> @llvm.x86.avx10.mask.vcvtph2ibs128(<8 x half>, <8 x i16>, i8)
+
+define dso_local <2 x i64> @test_mm_maskz_ipcvtph_epi8(i8 noundef zeroext %__A, <8 x half> noundef %__B) {
+; X64-LABEL: test_mm_maskz_ipcvtph_epi8:
+; X64: # %bb.0: # %entry
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vcvtph2ibs %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf5,0x7c,0x89,0x69,0xc0]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_mm_maskz_ipcvtph_epi8:
+; X86: # %bb.0: # %entry
+; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vcvtph2ibs %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf5,0x7c,0x89,0x69,0xc0]
+; X86-NEXT: retl # encoding: [0xc3]
+entry:
+ %0 = tail call <8 x i16> @llvm.x86.avx10.mask.vcvtph2ibs128(<8 x half> %__B, <8 x i16> zeroinitializer, i8 %__A)
+ %1 = bitcast <8 x i16> %0 to <2 x i64>
+ ret <2 x i64> %1
+}
+
+define dso_local <4 x i64> @test_mm256_ipcvtph_epi8(<16 x half> noundef %__A) local_unnamed_addr #2 {
+; CHECK-LABEL: test_mm256_ipcvtph_epi8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vcvtph2ibs %ymm0, %ymm0 # encoding: [0x62,0xf5,0x7c,0x28,0x69,0xc0]
+; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
+entry:
+ %0 = tail call <16 x i16> @llvm.x86.avx10.mask.vcvtph2ibs256(<16 x half> %__A, <16 x i16> zeroinitializer, i16 -1, i32 4)
+ %1 = bitcast <16 x i16> %0 to <4 x i64>
+ ret <4 x i64> %1
+}
+
+define dso_local <4 x i64> @test_mm256_mask_ipcvtph_epi8(<4 x i64> noundef %__S, i16 noundef zeroext %__A, <16 x half> noundef %__B) local_unnamed_addr #2 {
+; X64-LABEL: test_mm256_mask_ipcvtph_epi8:
+; X64: # %bb.0: # %entry
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vcvtph2ibs %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf5,0x7c,0x29,0x69,0xc1]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_mm256_mask_ipcvtph_epi8:
+; X86: # %bb.0: # %entry
+; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vcvtph2ibs %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf5,0x7c,0x29,0x69,0xc1]
+; X86-NEXT: retl # encoding: [0xc3]
+entry:
+ %0 = bitcast <4 x i64> %__S to <16 x i16>
+ %1 = tail call <16 x i16> @llvm.x86.avx10.mask.vcvtph2ibs256(<16 x half> %__B, <16 x i16> %0, i16 %__A, i32 4)
+ %2 = bitcast <16 x i16> %1 to <4 x i64>
+ ret <4 x i64> %2
+}
+
+define dso_local <4 x i64> @test_mm256_maskz_ipcvtph_epi8(i16 noundef zeroext %__A, <16 x half> noundef %__B) local_unnamed_addr #2 {
+; X64-LABEL: test_mm256_maskz_ipcvtph_epi8:
+; X64: # %bb.0: # %entry
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vcvtph2ibs %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf5,0x7c,0xa9,0x69,0xc0]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_mm256_maskz_ipcvtph_epi8:
+; X86: # %bb.0: # %entry
+; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vcvtph2ibs %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf5,0x7c,0xa9,0x69,0xc0]
+; X86-NEXT: retl # encoding: [0xc3]
+entry:
+ %0 = tail call <16 x i16> @llvm.x86.avx10.mask.vcvtph2ibs256(<16 x half> %__B, <16 x i16> zeroinitializer, i16 %__A, i32 4)
+ %1 = bitcast <16 x i16> %0 to <4 x i64>
+ ret <4 x i64> %1
+}
+
+define dso_local <4 x i64> @test_mm256_ipcvtph_epi8_round(<16 x half> noundef %__A) {
+; CHECK-LABEL: test_mm256_ipcvtph_epi8_round:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vcvtph2ibs {rz-sae}, %ymm0, %ymm0 # encoding: [0x62,0xf5,0x78,0x78,0x69,0xc0]
+; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
+entry:
+ %0 = tail call <16 x i16> @llvm.x86.avx10.mask.vcvtph2ibs256(<16 x half> %__A, <16 x i16> zeroinitializer, i16 -1, i32 11)
+ %1 = bitcast <16 x i16> %0 to <4 x i64>
+ ret <4 x i64> %1
+}
+
+define dso_local <4 x i64> @test_mm256_mask_ipcvtph_epi8_round(<4 x i64> noundef %__S, i16 noundef zeroext %__A, <16 x half> noundef %__B) {
+; X64-LABEL: test_mm256_mask_ipcvtph_epi8_round:
+; X64: # %bb.0: # %entry
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vcvtph2ibs {rz-sae}, %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf5,0x78,0x79,0x69,0xc1]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_mm256_mask_ipcvtph_epi8_round:
+; X86: # %bb.0: # %entry
+; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vcvtph2ibs {rz-sae}, %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf5,0x78,0x79,0x69,0xc1]
+; X86-NEXT: retl # encoding: [0xc3]
+entry:
+ %0 = bitcast <4 x i64> %__S to <16 x i16>
+ %1 = tail call <16 x i16> @llvm.x86.avx10.mask.vcvtph2ibs256(<16 x half> %__B, <16 x i16> %0, i16 %__A, i32 11)
+ %2 = bitcast <16 x i16> %1 to <4 x i64>
+ ret <4 x i64> %2
+}
+
+define dso_local <4 x i64> @test_mm256_maskz_ipcvtph_epi8_round(i16 noundef zeroext %__A, <16 x half> noundef %__B) {
+; X64-LABEL: test_mm256_maskz_ipcvtph_epi8_round:
+; X64: # %bb.0: # %entry
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vcvtph2ibs {rz-sae}, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf5,0x78,0xf9,0x69,0xc0]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_mm256_maskz_ipcvtph_epi8_round:
+; X86: # %bb.0: # %entry
+; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vcvtph2ibs {rz-sae}, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf5,0x78,0xf9,0x69,0xc0]
+; X86-NEXT: retl # encoding: [0xc3]
+entry:
+ %0 = tail call <16 x i16> @llvm.x86.avx10.mask.vcvtph2ibs256(<16 x half> %__B, <16 x i16> zeroinitializer, i16 %__A, i32 11)
+ %1 = bitcast <16 x i16> %0 to <4 x i64>
+ ret <4 x i64> %1
+}
+
+declare <16 x i16> @llvm.x86.avx10.mask.vcvtph2ibs256(<16 x half>, <16 x i16>, i16, i32)
+
+define dso_local <2 x i64> @test_mm_ipcvtph_epu8(<8 x half> noundef %__A) {
+; CHECK-LABEL: test_mm_ipcvtph_epu8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vcvtph2iubs %xmm0, %xmm0 # encoding: [0x62,0xf5,0x7c,0x08,0x6b,0xc0]
+; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
+entry:
+ %0 = tail call <8 x i16> @llvm.x86.avx10.mask.vcvtph2iubs128(<8 x half> %__A, <8 x i16> zeroinitializer, i8 -1)
+ %1 = bitcast <8 x i16> %0 to <2 x i64>
+ ret <2 x i64> %1
+}
+
+define dso_local <2 x i64> @test_mm_mask_ipcvtph_epu8(<2 x i64> noundef %__S, i8 noundef zeroext %__A, <8 x half> noundef %__B) {
+; X64-LABEL: test_mm_mask_ipcvtph_epu8:
+; X64: # %bb.0: # %entry
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vcvtph2iubs %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf5,0x7c,0x09,0x6b,0xc1]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_mm_mask_ipcvtph_epu8:
+; X86: # %bb.0: # %entry
+; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vcvtph2iubs %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf5,0x7c,0x09,0x6b,0xc1]
+; X86-NEXT: retl # encoding: [0xc3]
+entry:
+ %0 = bitcast <2 x i64> %__S to <8 x i16>
+ %1 = tail call <8 x i16> @llvm.x86.avx10.mask.vcvtph2iubs128(<8 x half> %__B, <8 x i16> %0, i8 %__A)
+ %2 = bitcast <8 x i16> %1 to <2 x i64>
+ ret <2 x i64> %2
+}
+
+declare <8 x i16> @llvm.x86.avx10.mask.vcvtph2iubs128(<8 x half>, <8 x i16>, i8)
+
+define dso_local <2 x i64> @test_mm_maskz_ipcvtph_epu8(i8 noundef zeroext %__A, <8 x half> noundef %__B) {
+; X64-LABEL: test_mm_maskz_ipcvtph_epu8:
+; X64: # %bb.0: # %entry
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vcvtph2iubs %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf5,0x7c,0x89,0x6b,0xc0]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_mm_maskz_ipcvtph_epu8:
+; X86: # %bb.0: # %entry
+; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vcvtph2iubs %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf5,0x7c,0x89,0x6b,0xc0]
+; X86-NEXT: retl # encoding: [0xc3]
+entry:
+ %0 = tail call <8 x i16> @llvm.x86.avx10.mask.vcvtph2iubs128(<8 x half> %__B, <8 x i16> zeroinitializer, i8 %__A)
+ %1 = bitcast <8 x i16> %0 to <2 x i64>
+ ret <2 x i64> %1
+}
+
+define dso_local <4 x i64> @test_mm256_ipcvtph_epu8(<16 x half> noundef %__A) local_unnamed_addr #2 {
+; CHECK-LABEL: test_mm256_ipcvtph_epu8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vcvtph2iubs %ymm0, %ymm0 # encoding: [0x62,0xf5,0x7c,0x28,0x6b,0xc0]
+; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
+entry:
+ %0 = tail call <16 x i16> @llvm.x86.avx10.mask.vcvtph2iubs256(<16 x half> %__A, <16 x i16> zeroinitializer, i16 -1, i32 4)
+ %1 = bitcast <16 x i16> %0 to <4 x i64>
+ ret <4 x i64> %1
+}
+
+define dso_local <4 x i64> @test_mm256_mask_ipcvtph_epu8(<4 x i64> noundef %__S, i16 noundef zeroext %__A, <16 x half> noundef %__B) local_unnamed_addr #2 {
+; X64-LABEL: test_mm256_mask_ipcvtph_epu8:
+; X64: # %bb.0: # %entry
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vcvtph2iubs %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf5,0x7c,0x29,0x6b,0xc1]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_mm256_mask_ipcvtph_epu8:
+; X86: # %bb.0: # %entry
+; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vcvtph2iubs %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf5,0x7c,0x29,0x6b,0xc1]
+; X86-NEXT: retl # encoding: [0xc3]
+entry:
+ %0 = bitcast <4 x i64> %__S to <16 x i16>
+ %1 = tail call <16 x i16> @llvm.x86.avx10.mask.vcvtph2iubs256(<16 x half> %__B, <16 x i16> %0, i16 %__A, i32 4)
+ %2 = bitcast <16 x i16> %1 to <4 x i64>
+ ret <4 x i64> %2
+}
+
+define dso_local <4 x i64> @test_mm256_maskz_ipcvtph_epu8(i16 noundef zeroext %__A, <16 x half> noundef %__B) local_unnamed_addr #2 {
+; X64-LABEL: test_mm256_maskz_ipcvtph_epu8:
+; X64: # %bb.0: # %entry
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vcvtph2iubs %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf5,0x7c,0xa9,0x6b,0xc0]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_mm256_maskz_ipcvtph_epu8:
+; X86: # %bb.0: # %entry
+; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vcvtph2iubs %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf5,0x7c,0xa9,0x6b,0xc0]
+; X86-NEXT: retl # encoding: [0xc3]
+entry:
+ %0 = tail call <16 x i16> @llvm.x86.avx10.mask.vcvtph2iubs256(<16 x half> %__B, <16 x i16> zeroinitializer, i16 %__A, i32 4)
+ %1 = bitcast <16 x i16> %0 to <4 x i64>
+ ret <4 x i64> %1
+}
+
+define dso_local <4 x i64> @test_mm256_ipcvtph_epu8_round(<16 x half> noundef %__A) {
+; CHECK-LABEL: test_mm256_ipcvtph_epu8_round:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vcvtph2iubs {rz-sae}, %ymm0, %ymm0 # encoding: [0x62,0xf5,0x78,0x78,0x6b,0xc0]
+; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
+entry:
+ %0 = tail call <16 x i16> @llvm.x86.avx10.mask.vcvtph2iubs256(<16 x half> %__A, <16 x i16> zeroinitializer, i16 -1, i32 11)
+ %1 = bitcast <16 x i16> %0 to <4 x i64>
+ ret <4 x i64> %1
+}
+
+define dso_local <4 x i64> @test_mm256_mask_ipcvtph_epu8_round(<4 x i64> noundef %__S, i16 noundef zeroext %__A, <16 x half> noundef %__B) {
+; X64-LABEL: test_mm256_mask_ipcvtph_epu8_round:
+; X64: # %bb.0: # %entry
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vcvtph2iubs {rz-sae}, %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf5,0x78,0x79,0x6b,0xc1]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_mm256_mask_ipcvtph_epu8_round:
+; X86: # %bb.0: # %entry
+; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vcvtph2iubs {rz-sae}, %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf5,0x78,0x79,0x6b,0xc1]
+; X86-NEXT: retl # encoding: [0xc3]
+entry:
+ %0 = bitcast <4 x i64> %__S to <16 x i16>
+ %1 = tail call <16 x i16> @llvm.x86.avx10.mask.vcvtph2iubs256(<16 x half> %__B, <16 x i16> %0, i16 %__A, i32 11)
+ %2 = bitcast <16 x i16> %1 to <4 x i64>
+ ret <4 x i64> %2
+}
+
+define dso_local <4 x i64> @test_mm256_maskz_ipcvtph_epu8_round(i16 noundef zeroext %__A, <16 x half> noundef %__B) {
+; X64-LABEL: test_mm256_maskz_ipcvtph_epu8_round:
+; X64: # %bb.0: # %entry
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vcvtph2iubs {rz-sae}, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf5,0x78,0xf9,0x6b,0xc0]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_mm256_maskz_ipcvtph_epu8_round:
+; X86: # %bb.0: # %entry
+; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vcvtph2iubs {rz-sae}, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf5,0x78,0xf9,0x6b,0xc0]
+; X86-NEXT: retl # encoding: [0xc3]
+entry:
+ %0 = tail call <16 x i16> @llvm.x86.avx10.mask.vcvtph2iubs256(<16 x half> %__B, <16 x i16> zeroinitializer, i16 %__A, i32 11)
+ %1 = bitcast <16 x i16> %0 to <4 x i64>
+ ret <4 x i64> %1
+}
+
+declare <16 x i16> @llvm.x86.avx10.mask.vcvtph2iubs256(<16 x half>, <16 x i16>, i16, i32)
+
+define dso_local <2 x i64> @test_mm_ipcvtps_epi8(<4 x float> noundef %__A) {
+; CHECK-LABEL: test_mm_ipcvtps_epi8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vcvtps2ibs %xmm0, %xmm0 # encoding: [0x62,0xf5,0x7d,0x08,0x69,0xc0]
+; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
+entry:
+ %0 = tail call <4 x i32> @llvm.x86.avx10.mask.vcvtps2ibs128(<4 x float> %__A, <4 x i32> zeroinitializer, i8 -1)
+ %1 = bitcast <4 x i32> %0 to <2 x i64>
+ ret <2 x i64> %1
+}
+
+define dso_local <2 x i64> @test_mm_mask_ipcvtps_epi8(<2 x i64> noundef %__S, i8 noundef zeroext %__A, <4 x float> noundef %__B) {
+; X64-LABEL: test_mm_mask_ipcvtps_epi8:
+; X64: # %bb.0: # %entry
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vcvtps2ibs %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf5,0x7d,0x09,0x69,0xc1]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_mm_mask_ipcvtps_epi8:
+; X86: # %bb.0: # %entry
+; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vcvtps2ibs %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf5,0x7d,0x09,0x69,0xc1]
+; X86-NEXT: retl # encoding: [0xc3]
+entry:
+ %0 = bitcast <2 x i64> %__S to <4 x i32>
+ %1 = tail call <4 x i32> @llvm.x86.avx10.mask.vcvtps2ibs128(<4 x float> %__B, <4 x i32> %0, i8 %__A)
+ %2 = bitcast <4 x i32> %1 to <2 x i64>
+ ret <2 x i64> %2
+}
+
+define dso_local <2 x i64> @test_mm_maskz_ipcvtps_epi8(i8 noundef zeroext %__A, <4 x float> noundef %__B) {
+; X64-LABEL: test_mm_maskz_ipcvtps_epi8:
+; X64: # %bb.0: # %entry
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vcvtps2ibs %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf5,0x7d,0x89,0x69,0xc0]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_mm_maskz_ipcvtps_epi8:
+; X86: # %bb.0: # %entry
+; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vcvtps2ibs %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf5,0x7d,0x89,0x69,0xc0]
+; X86-NEXT: retl # encoding: [0xc3]
+entry:
+ %0 = tail call <4 x i32> @llvm.x86.avx10.mask.vcvtps2ibs128(<4 x float> %__B, <4 x i32> zeroinitializer, i8 %__A)
+ %1 = bitcast <4 x i32> %0 to <2 x i64>
+ ret <2 x i64> %1
+}
+
+declare <4 x i32> @llvm.x86.avx10.mask.vcvtps2ibs128(<4 x float>, <4 x i32>, i8)
+
+define dso_local <4 x i64> @test_mm256_ipcvtps_epi8(<8 x float> noundef %__A) local_unnamed_addr #2 {
+; CHECK-LABEL: test_mm256_ipcvtps_epi8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vcvtps2ibs %ymm0, %ymm0 # encoding: [0x62,0xf5,0x7d,0x28,0x69,0xc0]
+; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
+entry:
+ %0 = tail call <8 x i32> @llvm.x86.avx10.mask.vcvtps2ibs256(<8 x float> %__A, <8 x i32> zeroinitializer, i8 -1, i32 4)
+ %1 = bitcast <8 x i32> %0 to <4 x i64>
+ ret <4 x i64> %1
+}
+
+define dso_local <4 x i64> @test_mm256_mask_ipcvtps_epi8(<4 x i64> noundef %__S, i8 noundef zeroext %__A, <8 x float> noundef %__B) local_unnamed_addr #2 {
+; X64-LABEL: test_mm256_mask_ipcvtps_epi8:
+; X64: # %bb.0: # %entry
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vcvtps2ibs %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf5,0x7d,0x29,0x69,0xc1]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_mm256_mask_ipcvtps_epi8:
+; X86: # %bb.0: # %entry
+; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vcvtps2ibs %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf5,0x7d,0x29,0x69,0xc1]
+; X86-NEXT: retl # encoding: [0xc3]
+entry:
+ %0 = bitcast <4 x i64> %__S to <8 x i32>
+ %1 = tail call <8 x i32> @llvm.x86.avx10.mask.vcvtps2ibs256(<8 x float> %__B, <8 x i32> %0, i8 %__A, i32 4)
+ %2 = bitcast <8 x i32> %1 to <4 x i64>
+ ret <4 x i64> %2
+}
+
+define dso_local <4 x i64> @test_mm256_maskz_ipcvtps_epi8(i8 noundef zeroext %__A, <8 x float> noundef %__B) local_unnamed_addr #2 {
+; X64-LABEL: test_mm256_maskz_ipcvtps_epi8:
+; X64: # %bb.0: # %entry
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vcvtps2ibs %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf5,0x7d,0xa9,0x69,0xc0]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_mm256_maskz_ipcvtps_epi8:
+; X86: # %bb.0: # %entry
+; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vcvtps2ibs %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf5,0x7d,0xa9,0x69,0xc0]
+; X86-NEXT: retl # encoding: [0xc3]
+entry:
+ %0 = tail call <8 x i32> @llvm.x86.avx10.mask.vcvtps2ibs256(<8 x float> %__B, <8 x i32> zeroinitializer, i8 %__A, i32 4)
+ %1 = bitcast <8 x i32> %0 to <4 x i64>
+ ret <4 x i64> %1
+}
+
+define dso_local <4 x i64> @test_mm256_ipcvtps_epi8_round(<8 x float> noundef %__A) {
+; CHECK-LABEL: test_mm256_ipcvtps_epi8_round:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vcvtps2ibs {rz-sae}, %ymm0, %ymm0 # encoding: [0x62,0xf5,0x79,0x78,0x69,0xc0]
+; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
+entry:
+ %0 = tail call <8 x i32> @llvm.x86.avx10.mask.vcvtps2ibs256(<8 x float> %__A, <8 x i32> zeroinitializer, i8 -1, i32 11)
+ %1 = bitcast <8 x i32> %0 to <4 x i64>
+ ret <4 x i64> %1
+}
+
+define dso_local <4 x i64> @test_mm256_mask_ipcvtps_epi8_round(<4 x i64> noundef %__S, i8 noundef zeroext %__A, <8 x float> noundef %__B) {
+; X64-LABEL: test_mm256_mask_ipcvtps_epi8_round:
+; X64: # %bb.0: # %entry
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vcvtps2ibs {rz-sae}, %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf5,0x79,0x79,0x69,0xc1]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_mm256_mask_ipcvtps_epi8_round:
+; X86: # %bb.0: # %entry
+; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vcvtps2ibs {rz-sae}, %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf5,0x79,0x79,0x69,0xc1]
+; X86-NEXT: retl # encoding: [0xc3]
+entry:
+ %0 = bitcast <4 x i64> %__S to <8 x i32>
+ %1 = tail call <8 x i32> @llvm.x86.avx10.mask.vcvtps2ibs256(<8 x float> %__B, <8 x i32> %0, i8 %__A, i32 11)
+ %2 = bitcast <8 x i32> %1 to <4 x i64>
+ ret <4 x i64> %2
+}
+
+define dso_local <4 x i64> @test_mm256_maskz_ipcvtps_epi8_round(i8 noundef zeroext %__A, <8 x float> noundef %__B) {
+; X64-LABEL: test_mm256_maskz_ipcvtps_epi8_round:
+; X64: # %bb.0: # %entry
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vcvtps2ibs {rz-sae}, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf5,0x79,0xf9,0x69,0xc0]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_mm256_maskz_ipcvtps_epi8_round:
+; X86: # %bb.0: # %entry
+; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vcvtps2ibs {rz-sae}, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf5,0x79,0xf9,0x69,0xc0]
+; X86-NEXT: retl # encoding: [0xc3]
+entry:
+ %0 = tail call <8 x i32> @llvm.x86.avx10.mask.vcvtps2ibs256(<8 x float> %__B, <8 x i32> zeroinitializer, i8 %__A, i32 11)
+ %1 = bitcast <8 x i32> %0 to <4 x i64>
+ ret <4 x i64> %1
+}
+
+declare <8 x i32> @llvm.x86.avx10.mask.vcvtps2ibs256(<8 x float>, <8 x i32>, i8, i32)
+
+define dso_local <2 x i64> @test_mm_ipcvtps_epu8(<4 x float> noundef %__A) {
+; CHECK-LABEL: test_mm_ipcvtps_epu8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vcvtps2iubs %xmm0, %xmm0 # encoding: [0x62,0xf5,0x7d,0x08,0x6b,0xc0]
+; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
+entry:
+ %0 = tail call <4 x i32> @llvm.x86.avx10.mask.vcvtps2iubs128(<4 x float> %__A, <4 x i32> zeroinitializer, i8 -1)
+ %1 = bitcast <4 x i32> %0 to <2 x i64>
+ ret <2 x i64> %1
+}
+
+define dso_local <2 x i64> @test_mm_mask_ipcvtps_epu8(<2 x i64> noundef %__S, i8 noundef zeroext %__A, <4 x float> noundef %__B) {
+; X64-LABEL: test_mm_mask_ipcvtps_epu8:
+; X64: # %bb.0: # %entry
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vcvtps2iubs %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf5,0x7d,0x09,0x6b,0xc1]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_mm_mask_ipcvtps_epu8:
+; X86: # %bb.0: # %entry
+; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vcvtps2iubs %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf5,0x7d,0x09,0x6b,0xc1]
+; X86-NEXT: retl # encoding: [0xc3]
+entry:
+ %0 = bitcast <2 x i64> %__S to <4 x i32>
+ %1 = tail call <4 x i32> @llvm.x86.avx10.mask.vcvtps2iubs128(<4 x float> %__B, <4 x i32> %0, i8 %__A)
+ %2 = bitcast <4 x i32> %1 to <2 x i64>
+ ret <2 x i64> %2
+}
+
+define dso_local <2 x i64> @test_mm_maskz_ipcvtps_epu8(i8 noundef zeroext %__A, <4 x float> noundef %__B) {
+; X64-LABEL: test_mm_maskz_ipcvtps_epu8:
+; X64: # %bb.0: # %entry
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vcvtps2iubs %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf5,0x7d,0x89,0x6b,0xc0]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_mm_maskz_ipcvtps_epu8:
+; X86: # %bb.0: # %entry
+; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vcvtps2iubs %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf5,0x7d,0x89,0x6b,0xc0]
+; X86-NEXT: retl # encoding: [0xc3]
+entry:
+ %0 = tail call <4 x i32> @llvm.x86.avx10.mask.vcvtps2iubs128(<4 x float> %__B, <4 x i32> zeroinitializer, i8 %__A)
+ %1 = bitcast <4 x i32> %0 to <2 x i64>
+ ret <2 x i64> %1
+}
+
+declare <4 x i32> @llvm.x86.avx10.mask.vcvtps2iubs128(<4 x float>, <4 x i32>, i8)
+
+define dso_local <4 x i64> @test_mm256_ipcvtps_epu8(<8 x float> noundef %__A) local_unnamed_addr #2 {
+; CHECK-LABEL: test_mm256_ipcvtps_epu8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vcvtps2iubs %ymm0, %ymm0 # encoding: [0x62,0xf5,0x7d,0x28,0x6b,0xc0]
+; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
+entry:
+ %0 = tail call <8 x i32> @llvm.x86.avx10.mask.vcvtps2iubs256(<8 x float> %__A, <8 x i32> zeroinitializer, i8 -1, i32 4)
+ %1 = bitcast <8 x i32> %0 to <4 x i64>
+ ret <4 x i64> %1
+}
+
+define dso_local <4 x i64> @test_mm256_mask_ipcvtps_epu8(<4 x i64> noundef %__S, i8 noundef zeroext %__A, <8 x float> noundef %__B) local_unnamed_addr #2 {
+; X64-LABEL: test_mm256_mask_ipcvtps_epu8:
+; X64: # %bb.0: # %entry
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vcvtps2iubs %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf5,0x7d,0x29,0x6b,0xc1]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_mm256_mask_ipcvtps_epu8:
+; X86: # %bb.0: # %entry
+; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vcvtps2iubs %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf5,0x7d,0x29,0x6b,0xc1]
+; X86-NEXT: retl # encoding: [0xc3]
+entry:
+ %0 = bitcast <4 x i64> %__S to <8 x i32>
+ %1 = tail call <8 x i32> @llvm.x86.avx10.mask.vcvtps2iubs256(<8 x float> %__B, <8 x i32> %0, i8 %__A, i32 4)
+ %2 = bitcast <8 x i32> %1 to <4 x i64>
+ ret <4 x i64> %2
+}
+
+define dso_local <4 x i64> @test_mm256_maskz_ipcvtps_epu8(i8 noundef zeroext %__A, <8 x float> noundef %__B) local_unnamed_addr #2 {
+; X64-LABEL: test_mm256_maskz_ipcvtps_epu8:
+; X64: # %bb.0: # %entry
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vcvtps2iubs %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf5,0x7d,0xa9,0x6b,0xc0]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_mm256_maskz_ipcvtps_epu8:
+; X86: # %bb.0: # %entry
+; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vcvtps2iubs %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf5,0x7d,0xa9,0x6b,0xc0]
+; X86-NEXT: retl # encoding: [0xc3]
+entry:
+ %0 = tail call <8 x i32> @llvm.x86.avx10.mask.vcvtps2iubs256(<8 x float> %__B, <8 x i32> zeroinitializer, i8 %__A, i32 4)
+ %1 = bitcast <8 x i32> %0 to <4 x i64>
+ ret <4 x i64> %1
+}
+
+define dso_local <4 x i64> @test_mm256_ipcvtps_epu8_round(<8 x float> noundef %__A) {
+; CHECK-LABEL: test_mm256_ipcvtps_epu8_round:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vcvtps2iubs {rz-sae}, %ymm0, %ymm0 # encoding: [0x62,0xf5,0x79,0x78,0x6b,0xc0]
+; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
+entry:
+ %0 = tail call <8 x i32> @llvm.x86.avx10.mask.vcvtps2iubs256(<8 x float> %__A, <8 x i32> zeroinitializer, i8 -1, i32 11)
+ %1 = bitcast <8 x i32> %0 to <4 x i64>
+ ret <4 x i64> %1
+}
+
+define dso_local <4 x i64> @test_mm256_mask_ipcvtps_epu8_round(<4 x i64> noundef %__S, i8 noundef zeroext %__A, <8 x float> noundef %__B) {
+; X64-LABEL: test_mm256_mask_ipcvtps_epu8_round:
+; X64: # %bb.0: # %entry
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vcvtps2iubs {rz-sae}, %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf5,0x79,0x79,0x6b,0xc1]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_mm256_mask_ipcvtps_epu8_round:
+; X86: # %bb.0: # %entry
+; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vcvtps2iubs {rz-sae}, %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf5,0x79,0x79,0x6b,0xc1]
+; X86-NEXT: retl # encoding: [0xc3]
+entry:
+ %0 = bitcast <4 x i64> %__S to <8 x i32>
+ %1 = tail call <8 x i32> @llvm.x86.avx10.mask.vcvtps2iubs256(<8 x float> %__B, <8 x i32> %0, i8 %__A, i32 11)
+ %2 = bitcast <8 x i32> %1 to <4 x i64>
+ ret <4 x i64> %2
+}
+
+define dso_local <4 x i64> @test_mm256_maskz_ipcvtps_epu8_round(i8 noundef zeroext %__A, <8 x float> noundef %__B) {
+; X64-LABEL: test_mm256_maskz_ipcvtps_epu8_round:
+; X64: # %bb.0: # %entry
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vcvtps2iubs {rz-sae}, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf5,0x79,0xf9,0x6b,0xc0]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_mm256_maskz_ipcvtps_epu8_round:
+; X86: # %bb.0: # %entry
+; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vcvtps2iubs {rz-sae}, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf5,0x79,0xf9,0x6b,0xc0]
+; X86-NEXT: retl # encoding: [0xc3]
+entry:
+ %0 = tail call <8 x i32> @llvm.x86.avx10.mask.vcvtps2iubs256(<8 x float> %__B, <8 x i32> zeroinitializer, i8 %__A, i32 11)
+ %1 = bitcast <8 x i32> %0 to <4 x i64>
+ ret <4 x i64> %1
+}
+
+declare <8 x i32> @llvm.x86.avx10.mask.vcvtps2iubs256(<8 x float>, <8 x i32>, i8, i32)
+
+define dso_local <2 x i64> @test_mm_ipcvttnebf16_epi8(<8 x bfloat> noundef %__A) {
+; CHECK-LABEL: test_mm_ipcvttnebf16_epi8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vcvttnebf162ibs %xmm0, %xmm0 # encoding: [0x62,0xf5,0x7f,0x08,0x68,0xc0]
+; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
+entry:
+ %0 = tail call <8 x i16> @llvm.x86.avx10.vcvttnebf162ibs128(<8 x bfloat> %__A)
+ %1 = bitcast <8 x i16> %0 to <2 x i64>
+ ret <2 x i64> %1
+}
+
+define dso_local <2 x i64> @test_mm_mask_ipcvttnebf16_epi8(<2 x i64> noundef %__S, i8 noundef zeroext %__A, <8 x bfloat> noundef %__B) {
+; X64-LABEL: test_mm_mask_ipcvttnebf16_epi8:
+; X64: # %bb.0: # %entry
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vcvttnebf162ibs %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf5,0x7f,0x09,0x68,0xc1]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_mm_mask_ipcvttnebf16_epi8:
+; X86: # %bb.0: # %entry
+; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vcvttnebf162ibs %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf5,0x7f,0x09,0x68,0xc1]
+; X86-NEXT: retl # encoding: [0xc3]
+entry:
+ %0 = bitcast <2 x i64> %__S to <8 x i16>
+ %1 = tail call <8 x i16> @llvm.x86.avx10.vcvttnebf162ibs128(<8 x bfloat> %__B)
+ %2 = bitcast i8 %__A to <8 x i1>
+ %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> %0
+ %4 = bitcast <8 x i16> %3 to <2 x i64>
+ ret <2 x i64> %4
+}
+
+declare <8 x i16> @llvm.x86.avx10.vcvttnebf162ibs128(<8 x bfloat>)
+
+define dso_local <2 x i64> @test_mm_maskz_ipcvttnebf16_epi8(i8 noundef zeroext %__A, <8 x bfloat> noundef %__B) {
+; X64-LABEL: test_mm_maskz_ipcvttnebf16_epi8:
+; X64: # %bb.0: # %entry
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vcvttnebf162ibs %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf5,0x7f,0x89,0x68,0xc0]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_mm_maskz_ipcvttnebf16_epi8:
+; X86: # %bb.0: # %entry
+; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vcvttnebf162ibs %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf5,0x7f,0x89,0x68,0xc0]
+; X86-NEXT: retl # encoding: [0xc3]
+entry:
+ %0 = tail call <8 x i16> @llvm.x86.avx10.vcvttnebf162ibs128(<8 x bfloat> %__B)
+ %1 = bitcast i8 %__A to <8 x i1>
+ %2 = select <8 x i1> %1, <8 x i16> %0, <8 x i16> zeroinitializer
+ %3 = bitcast <8 x i16> %2 to <2 x i64>
+ ret <2 x i64> %3
+}
+
+define dso_local <4 x i64> @test_mm256_ipcvttnebf16_epi8(<16 x bfloat> noundef %__A) local_unnamed_addr #2 {
+; CHECK-LABEL: test_mm256_ipcvttnebf16_epi8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vcvttnebf162ibs %ymm0, %ymm0 # encoding: [0x62,0xf5,0x7f,0x28,0x68,0xc0]
+; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
+entry:
+ %0 = tail call <16 x i16> @llvm.x86.avx10.vcvttnebf162ibs256(<16 x bfloat> %__A)
+ %1 = bitcast <16 x i16> %0 to <4 x i64>
+ ret <4 x i64> %1
+}
+
+define dso_local <4 x i64> @test_mm256_mask_ipcvttnebf16_epi8(<4 x i64> noundef %__S, i16 noundef zeroext %__A, <16 x bfloat> noundef %__B) local_unnamed_addr #2 {
+; X64-LABEL: test_mm256_mask_ipcvttnebf16_epi8:
+; X64: # %bb.0: # %entry
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vcvttnebf162ibs %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf5,0x7f,0x29,0x68,0xc1]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_mm256_mask_ipcvttnebf16_epi8:
+; X86: # %bb.0: # %entry
+; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vcvttnebf162ibs %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf5,0x7f,0x29,0x68,0xc1]
+; X86-NEXT: retl # encoding: [0xc3]
+entry:
+ %0 = bitcast <4 x i64> %__S to <16 x i16>
+ %1 = tail call <16 x i16> @llvm.x86.avx10.vcvttnebf162ibs256(<16 x bfloat> %__B)
+ %2 = bitcast i16 %__A to <16 x i1>
+ %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> %0
+ %4 = bitcast <16 x i16> %3 to <4 x i64>
+ ret <4 x i64> %4
+}
+
+define dso_local <4 x i64> @test_mm256_maskz_ipcvttnebf16_epi8(i16 noundef zeroext %__A, <16 x bfloat> noundef %__B) local_unnamed_addr #2 {
+; X64-LABEL: test_mm256_maskz_ipcvttnebf16_epi8:
+; X64: # %bb.0: # %entry
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vcvttnebf162ibs %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf5,0x7f,0xa9,0x68,0xc0]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_mm256_maskz_ipcvttnebf16_epi8:
+; X86: # %bb.0: # %entry
+; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vcvttnebf162ibs %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf5,0x7f,0xa9,0x68,0xc0]
+; X86-NEXT: retl # encoding: [0xc3]
+entry:
+ %0 = tail call <16 x i16> @llvm.x86.avx10.vcvttnebf162ibs256(<16 x bfloat> %__B)
+ %1 = bitcast i16 %__A to <16 x i1>
+ %2 = select <16 x i1> %1, <16 x i16> %0, <16 x i16> zeroinitializer
+ %3 = bitcast <16 x i16> %2 to <4 x i64>
+ ret <4 x i64> %3
+}
+
+declare <16 x i16> @llvm.x86.avx10.vcvttnebf162ibs256(<16 x bfloat>)
+
+define dso_local <2 x i64> @test_mm_ipcvttnebf16_epu8(<8 x bfloat> noundef %__A) {
+; CHECK-LABEL: test_mm_ipcvttnebf16_epu8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vcvttnebf162iubs %xmm0, %xmm0 # encoding: [0x62,0xf5,0x7f,0x08,0x6a,0xc0]
+; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
+entry:
+ %0 = tail call <8 x i16> @llvm.x86.avx10.vcvttnebf162iubs128(<8 x bfloat> %__A)
+ %1 = bitcast <8 x i16> %0 to <2 x i64>
+ ret <2 x i64> %1
+}
+
+define dso_local <2 x i64> @test_mm_mask_ipcvttnebf16_epu8(<2 x i64> noundef %__S, i8 noundef zeroext %__A, <8 x bfloat> noundef %__B) {
+; X64-LABEL: test_mm_mask_ipcvttnebf16_epu8:
+; X64: # %bb.0: # %entry
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vcvttnebf162iubs %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf5,0x7f,0x09,0x6a,0xc1]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_mm_mask_ipcvttnebf16_epu8:
+; X86: # %bb.0: # %entry
+; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vcvttnebf162iubs %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf5,0x7f,0x09,0x6a,0xc1]
+; X86-NEXT: retl # encoding: [0xc3]
+entry:
+ %0 = bitcast <2 x i64> %__S to <8 x i16>
+ %1 = tail call <8 x i16> @llvm.x86.avx10.vcvttnebf162iubs128(<8 x bfloat> %__B)
+ %2 = bitcast i8 %__A to <8 x i1>
+ %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> %0
+ %4 = bitcast <8 x i16> %3 to <2 x i64>
+ ret <2 x i64> %4
+}
+
+declare <8 x i16> @llvm.x86.avx10.vcvttnebf162iubs128(<8 x bfloat>)
+
+define dso_local <2 x i64> @test_mm_maskz_ipcvttnebf16_epu8(i8 noundef zeroext %__A, <8 x bfloat> noundef %__B) {
+; X64-LABEL: test_mm_maskz_ipcvttnebf16_epu8:
+; X64: # %bb.0: # %entry
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vcvttnebf162iubs %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf5,0x7f,0x89,0x6a,0xc0]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_mm_maskz_ipcvttnebf16_epu8:
+; X86: # %bb.0: # %entry
+; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vcvttnebf162iubs %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf5,0x7f,0x89,0x6a,0xc0]
+; X86-NEXT: retl # encoding: [0xc3]
+entry:
+ %0 = tail call <8 x i16> @llvm.x86.avx10.vcvttnebf162iubs128(<8 x bfloat> %__B)
+ %1 = bitcast i8 %__A to <8 x i1>
+ %2 = select <8 x i1> %1, <8 x i16> %0, <8 x i16> zeroinitializer
+ %3 = bitcast <8 x i16> %2 to <2 x i64>
+ ret <2 x i64> %3
+}
+
+define dso_local <4 x i64> @test_mm256_ipcvttnebf16_epu8(<16 x bfloat> noundef %__A) local_unnamed_addr #2 {
+; CHECK-LABEL: test_mm256_ipcvttnebf16_epu8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vcvttnebf162iubs %ymm0, %ymm0 # encoding: [0x62,0xf5,0x7f,0x28,0x6a,0xc0]
+; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
+entry:
+ %0 = tail call <16 x i16> @llvm.x86.avx10.vcvttnebf162iubs256(<16 x bfloat> %__A)
+ %1 = bitcast <16 x i16> %0 to <4 x i64>
+ ret <4 x i64> %1
+}
+
+define dso_local <4 x i64> @test_mm256_mask_ipcvttnebf16_epu8(<4 x i64> noundef %__S, i16 noundef zeroext %__A, <16 x bfloat> noundef %__B) local_unnamed_addr #2 {
+; X64-LABEL: test_mm256_mask_ipcvttnebf16_epu8:
+; X64: # %bb.0: # %entry
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vcvttnebf162iubs %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf5,0x7f,0x29,0x6a,0xc1]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_mm256_mask_ipcvttnebf16_epu8:
+; X86: # %bb.0: # %entry
+; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vcvttnebf162iubs %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf5,0x7f,0x29,0x6a,0xc1]
+; X86-NEXT: retl # encoding: [0xc3]
+entry:
+ %0 = bitcast <4 x i64> %__S to <16 x i16>
+ %1 = tail call <16 x i16> @llvm.x86.avx10.vcvttnebf162iubs256(<16 x bfloat> %__B)
+ %2 = bitcast i16 %__A to <16 x i1>
+ %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> %0
+ %4 = bitcast <16 x i16> %3 to <4 x i64>
+ ret <4 x i64> %4
+}
+
+define dso_local <4 x i64> @test_mm256_maskz_ipcvttnebf16_epu8(i16 noundef zeroext %__A, <16 x bfloat> noundef %__B) local_unnamed_addr #2 {
+; X64-LABEL: test_mm256_maskz_ipcvttnebf16_epu8:
+; X64: # %bb.0: # %entry
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vcvttnebf162iubs %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf5,0x7f,0xa9,0x6a,0xc0]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_mm256_maskz_ipcvttnebf16_epu8:
+; X86: # %bb.0: # %entry
+; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vcvttnebf162iubs %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf5,0x7f,0xa9,0x6a,0xc0]
+; X86-NEXT: retl # encoding: [0xc3]
+entry:
+ %0 = tail call <16 x i16> @llvm.x86.avx10.vcvttnebf162iubs256(<16 x bfloat> %__B)
+ %1 = bitcast i16 %__A to <16 x i1>
+ %2 = select <16 x i1> %1, <16 x i16> %0, <16 x i16> zeroinitializer
+ %3 = bitcast <16 x i16> %2 to <4 x i64>
+ ret <4 x i64> %3
+}
+
+declare <16 x i16> @llvm.x86.avx10.vcvttnebf162iubs256(<16 x bfloat>)
+
+define dso_local <2 x i64> @test_mm_ipcvttph_epi8(<8 x half> noundef %__A) {
+; CHECK-LABEL: test_mm_ipcvttph_epi8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vcvttph2ibs %xmm0, %xmm0 # encoding: [0x62,0xf5,0x7c,0x08,0x68,0xc0]
+; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
+entry:
+ %0 = tail call <8 x i16> @llvm.x86.avx10.mask.vcvttph2ibs128(<8 x half> %__A, <8 x i16> zeroinitializer, i8 -1)
+ %1 = bitcast <8 x i16> %0 to <2 x i64>
+ ret <2 x i64> %1
+}
+
+define dso_local <2 x i64> @test_mm_mask_ipcvttph_epi8(<2 x i64> noundef %__S, i8 noundef zeroext %__A, <8 x half> noundef %__B) {
+; X64-LABEL: test_mm_mask_ipcvttph_epi8:
+; X64: # %bb.0: # %entry
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vcvttph2ibs %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf5,0x7c,0x09,0x68,0xc1]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_mm_mask_ipcvttph_epi8:
+; X86: # %bb.0: # %entry
+; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vcvttph2ibs %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf5,0x7c,0x09,0x68,0xc1]
+; X86-NEXT: retl # encoding: [0xc3]
+entry:
+ %0 = bitcast <2 x i64> %__S to <8 x i16>
+ %1 = tail call <8 x i16> @llvm.x86.avx10.mask.vcvttph2ibs128(<8 x half> %__B, <8 x i16> %0, i8 %__A)
+ %2 = bitcast <8 x i16> %1 to <2 x i64>
+ ret <2 x i64> %2
+}
+
+declare <8 x i16> @llvm.x86.avx10.mask.vcvttph2ibs128(<8 x half>, <8 x i16>, i8)
+
+define dso_local <2 x i64> @test_mm_maskz_ipcvttph_epi8(i8 noundef zeroext %__A, <8 x half> noundef %__B) {
+; X64-LABEL: test_mm_maskz_ipcvttph_epi8:
+; X64: # %bb.0: # %entry
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vcvttph2ibs %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf5,0x7c,0x89,0x68,0xc0]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_mm_maskz_ipcvttph_epi8:
+; X86: # %bb.0: # %entry
+; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vcvttph2ibs %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf5,0x7c,0x89,0x68,0xc0]
+; X86-NEXT: retl # encoding: [0xc3]
+entry:
+ %0 = tail call <8 x i16> @llvm.x86.avx10.mask.vcvttph2ibs128(<8 x half> %__B, <8 x i16> zeroinitializer, i8 %__A)
+ %1 = bitcast <8 x i16> %0 to <2 x i64>
+ ret <2 x i64> %1
+}
+
+define dso_local <4 x i64> @test_mm256_ipcvttph_epi8(<16 x half> noundef %__A) local_unnamed_addr #2 {
+; CHECK-LABEL: test_mm256_ipcvttph_epi8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vcvttph2ibs %ymm0, %ymm0 # encoding: [0x62,0xf5,0x7c,0x28,0x68,0xc0]
+; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
+entry:
+ %0 = tail call <16 x i16> @llvm.x86.avx10.mask.vcvttph2ibs256(<16 x half> %__A, <16 x i16> zeroinitializer, i16 -1, i32 4)
+ %1 = bitcast <16 x i16> %0 to <4 x i64>
+ ret <4 x i64> %1
+}
+
+define dso_local <4 x i64> @test_mm256_mask_ipcvttph_epi8(<4 x i64> noundef %__S, i16 noundef zeroext %__A, <16 x half> noundef %__B) local_unnamed_addr #2 {
+; X64-LABEL: test_mm256_mask_ipcvttph_epi8:
+; X64: # %bb.0: # %entry
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vcvttph2ibs %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf5,0x7c,0x29,0x68,0xc1]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_mm256_mask_ipcvttph_epi8:
+; X86: # %bb.0: # %entry
+; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vcvttph2ibs %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf5,0x7c,0x29,0x68,0xc1]
+; X86-NEXT: retl # encoding: [0xc3]
+entry:
+ %0 = bitcast <4 x i64> %__S to <16 x i16>
+ %1 = tail call <16 x i16> @llvm.x86.avx10.mask.vcvttph2ibs256(<16 x half> %__B, <16 x i16> %0, i16 %__A, i32 4)
+ %2 = bitcast <16 x i16> %1 to <4 x i64>
+ ret <4 x i64> %2
+}
+
+define dso_local <4 x i64> @test_mm256_maskz_ipcvttph_epi8(i16 noundef zeroext %__A, <16 x half> noundef %__B) local_unnamed_addr #2 {
+; X64-LABEL: test_mm256_maskz_ipcvttph_epi8:
+; X64: # %bb.0: # %entry
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vcvttph2ibs %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf5,0x7c,0xa9,0x68,0xc0]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_mm256_maskz_ipcvttph_epi8:
+; X86: # %bb.0: # %entry
+; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vcvttph2ibs %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf5,0x7c,0xa9,0x68,0xc0]
+; X86-NEXT: retl # encoding: [0xc3]
+entry:
+ %0 = tail call <16 x i16> @llvm.x86.avx10.mask.vcvttph2ibs256(<16 x half> %__B, <16 x i16> zeroinitializer, i16 %__A, i32 4)
+ %1 = bitcast <16 x i16> %0 to <4 x i64>
+ ret <4 x i64> %1
+}
+
+define dso_local <4 x i64> @test_mm256_ipcvttph_epi8_round(<16 x half> noundef %__A) {
+; CHECK-LABEL: test_mm256_ipcvttph_epi8_round:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vcvttph2ibs {sae}, %ymm0, %ymm0 # encoding: [0x62,0xf5,0x78,0x18,0x68,0xc0]
+; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
+entry:
+ %0 = tail call <16 x i16> @llvm.x86.avx10.mask.vcvttph2ibs256(<16 x half> %__A, <16 x i16> zeroinitializer, i16 -1, i32 8)
+ %1 = bitcast <16 x i16> %0 to <4 x i64>
+ ret <4 x i64> %1
+}
+
+define dso_local <4 x i64> @test_mm256_mask_ipcvttph_epi8_round(<4 x i64> noundef %__S, i16 noundef zeroext %__A, <16 x half> noundef %__B) {
+; X64-LABEL: test_mm256_mask_ipcvttph_epi8_round:
+; X64: # %bb.0: # %entry
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vcvttph2ibs {sae}, %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf5,0x78,0x19,0x68,0xc1]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_mm256_mask_ipcvttph_epi8_round:
+; X86: # %bb.0: # %entry
+; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vcvttph2ibs {sae}, %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf5,0x78,0x19,0x68,0xc1]
+; X86-NEXT: retl # encoding: [0xc3]
+entry:
+ %0 = bitcast <4 x i64> %__S to <16 x i16>
+ %1 = tail call <16 x i16> @llvm.x86.avx10.mask.vcvttph2ibs256(<16 x half> %__B, <16 x i16> %0, i16 %__A, i32 8)
+ %2 = bitcast <16 x i16> %1 to <4 x i64>
+ ret <4 x i64> %2
+}
+
+define dso_local <4 x i64> @test_mm256_maskz_ipcvttph_epi8_round(i16 noundef zeroext %__A, <16 x half> noundef %__B) {
+; X64-LABEL: test_mm256_maskz_ipcvttph_epi8_round:
+; X64: # %bb.0: # %entry
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vcvttph2ibs {sae}, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf5,0x78,0x99,0x68,0xc0]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_mm256_maskz_ipcvttph_epi8_round:
+; X86: # %bb.0: # %entry
+; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vcvttph2ibs {sae}, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf5,0x78,0x99,0x68,0xc0]
+; X86-NEXT: retl # encoding: [0xc3]
+entry:
+ %0 = tail call <16 x i16> @llvm.x86.avx10.mask.vcvttph2ibs256(<16 x half> %__B, <16 x i16> zeroinitializer, i16 %__A, i32 8)
+ %1 = bitcast <16 x i16> %0 to <4 x i64>
+ ret <4 x i64> %1
+}
+declare <16 x i16> @llvm.x86.avx10.mask.vcvttph2ibs256(<16 x half>, <16 x i16>, i16, i32)
+
+define dso_local <2 x i64> @test_mm_ipcvttph_epu8(<8 x half> noundef %__A) {
+; CHECK-LABEL: test_mm_ipcvttph_epu8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vcvttph2iubs %xmm0, %xmm0 # encoding: [0x62,0xf5,0x7c,0x08,0x6a,0xc0]
+; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
+entry:
+ %0 = tail call <8 x i16> @llvm.x86.avx10.mask.vcvttph2iubs128(<8 x half> %__A, <8 x i16> zeroinitializer, i8 -1)
+ %1 = bitcast <8 x i16> %0 to <2 x i64>
+ ret <2 x i64> %1
+}
+
+define dso_local <2 x i64> @test_mm_mask_ipcvttph_epu8(<2 x i64> noundef %__S, i8 noundef zeroext %__A, <8 x half> noundef %__B) {
+; X64-LABEL: test_mm_mask_ipcvttph_epu8:
+; X64: # %bb.0: # %entry
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vcvttph2iubs %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf5,0x7c,0x89,0x6a,0xc1]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_mm_mask_ipcvttph_epu8:
+; X86: # %bb.0: # %entry
+; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vcvttph2iubs %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf5,0x7c,0x89,0x6a,0xc1]
+; X86-NEXT: retl # encoding: [0xc3]
+entry:
+ %0 = bitcast <2 x i64> %__S to <8 x i16>
+ %1 = tail call <8 x i16> @llvm.x86.avx10.mask.vcvttph2iubs128(<8 x half> %__B, <8 x i16> zeroinitializer, i8 %__A)
+ %2 = bitcast <8 x i16> %1 to <2 x i64>
+ ret <2 x i64> %2
+}
+
+declare <8 x i16> @llvm.x86.avx10.mask.vcvttph2iubs128(<8 x half>, <8 x i16>, i8)
+
+define dso_local <2 x i64> @test_mm_maskz_ipcvttph_epu8(i8 noundef zeroext %__A, <8 x half> noundef %__B) {
+; X64-LABEL: test_mm_maskz_ipcvttph_epu8:
+; X64: # %bb.0: # %entry
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vcvttph2iubs %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf5,0x7c,0x89,0x6a,0xc0]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_mm_maskz_ipcvttph_epu8:
+; X86: # %bb.0: # %entry
+; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vcvttph2iubs %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf5,0x7c,0x89,0x6a,0xc0]
+; X86-NEXT: retl # encoding: [0xc3]
+entry:
+ %0 = tail call <8 x i16> @llvm.x86.avx10.mask.vcvttph2iubs128(<8 x half> %__B, <8 x i16> zeroinitializer, i8 %__A)
+ %1 = bitcast <8 x i16> %0 to <2 x i64>
+ ret <2 x i64> %1
+}
+
+define dso_local <4 x i64> @test_mm256_ipcvttph_epu8(<16 x half> noundef %__A) local_unnamed_addr #2 {
+; CHECK-LABEL: test_mm256_ipcvttph_epu8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vcvttph2iubs %ymm0, %ymm0 # encoding: [0x62,0xf5,0x7c,0x28,0x6a,0xc0]
+; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
+entry:
+ %0 = tail call <16 x i16> @llvm.x86.avx10.mask.vcvttph2iubs256(<16 x half> %__A, <16 x i16> zeroinitializer, i16 -1, i32 4)
+ %1 = bitcast <16 x i16> %0 to <4 x i64>
+ ret <4 x i64> %1
+}
+
+define dso_local <4 x i64> @test_mm256_mask_ipcvttph_epu8(<4 x i64> noundef %__S, i16 noundef zeroext %__A, <16 x half> noundef %__B) local_unnamed_addr #2 {
+; X64-LABEL: test_mm256_mask_ipcvttph_epu8:
+; X64: # %bb.0: # %entry
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vcvttph2iubs %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf5,0x7c,0x29,0x6a,0xc1]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_mm256_mask_ipcvttph_epu8:
+; X86: # %bb.0: # %entry
+; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vcvttph2iubs %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf5,0x7c,0x29,0x6a,0xc1]
+; X86-NEXT: retl # encoding: [0xc3]
+entry:
+ %0 = bitcast <4 x i64> %__S to <16 x i16>
+ %1 = tail call <16 x i16> @llvm.x86.avx10.mask.vcvttph2iubs256(<16 x half> %__B, <16 x i16> %0, i16 %__A, i32 4)
+ %2 = bitcast <16 x i16> %1 to <4 x i64>
+ ret <4 x i64> %2
+}
+
+define dso_local <4 x i64> @test_mm256_maskz_ipcvttph_epu8(i16 noundef zeroext %__A, <16 x half> noundef %__B) local_unnamed_addr #2 {
+; X64-LABEL: test_mm256_maskz_ipcvttph_epu8:
+; X64: # %bb.0: # %entry
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vcvttph2iubs %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf5,0x7c,0xa9,0x6a,0xc0]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_mm256_maskz_ipcvttph_epu8:
+; X86: # %bb.0: # %entry
+; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vcvttph2iubs %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf5,0x7c,0xa9,0x6a,0xc0]
+; X86-NEXT: retl # encoding: [0xc3]
+entry:
+ %0 = tail call <16 x i16> @llvm.x86.avx10.mask.vcvttph2iubs256(<16 x half> %__B, <16 x i16> zeroinitializer, i16 %__A, i32 4)
+ %1 = bitcast <16 x i16> %0 to <4 x i64>
+ ret <4 x i64> %1
+}
+
+define dso_local <4 x i64> @test_mm256_ipcvttph_epu8_round(<16 x half> noundef %__A) {
+; CHECK-LABEL: test_mm256_ipcvttph_epu8_round:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vcvttph2iubs {sae}, %ymm0, %ymm0 # encoding: [0x62,0xf5,0x78,0x18,0x6a,0xc0]
+; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
+entry:
+ %0 = tail call <16 x i16> @llvm.x86.avx10.mask.vcvttph2iubs256(<16 x half> %__A, <16 x i16> zeroinitializer, i16 -1, i32 8)
+ %1 = bitcast <16 x i16> %0 to <4 x i64>
+ ret <4 x i64> %1
+}
+
+define dso_local <4 x i64> @test_mm256_mask_ipcvttph_epu8_round(<4 x i64> noundef %__S, i16 noundef zeroext %__A, <16 x half> noundef %__B) {
+; X64-LABEL: test_mm256_mask_ipcvttph_epu8_round:
+; X64: # %bb.0: # %entry
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vcvttph2iubs {sae}, %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf5,0x78,0x19,0x6a,0xc1]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_mm256_mask_ipcvttph_epu8_round:
+; X86: # %bb.0: # %entry
+; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vcvttph2iubs {sae}, %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf5,0x78,0x19,0x6a,0xc1]
+; X86-NEXT: retl # encoding: [0xc3]
+entry:
+ %0 = bitcast <4 x i64> %__S to <16 x i16>
+ %1 = tail call <16 x i16> @llvm.x86.avx10.mask.vcvttph2iubs256(<16 x half> %__B, <16 x i16> %0, i16 %__A, i32 8)
+ %2 = bitcast <16 x i16> %1 to <4 x i64>
+ ret <4 x i64> %2
+}
+
+define dso_local <4 x i64> @test_mm256_maskz_ipcvttph_epu8_round(i16 noundef zeroext %__A, <16 x half> noundef %__B) {
+; X64-LABEL: test_mm256_maskz_ipcvttph_epu8_round:
+; X64: # %bb.0: # %entry
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vcvttph2iubs {sae}, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf5,0x78,0x99,0x6a,0xc0]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_mm256_maskz_ipcvttph_epu8_round:
+; X86: # %bb.0: # %entry
+; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vcvttph2iubs {sae}, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf5,0x78,0x99,0x6a,0xc0]
+; X86-NEXT: retl # encoding: [0xc3]
+entry:
+ %0 = tail call <16 x i16> @llvm.x86.avx10.mask.vcvttph2iubs256(<16 x half> %__B, <16 x i16> zeroinitializer, i16 %__A, i32 8)
+ %1 = bitcast <16 x i16> %0 to <4 x i64>
+ ret <4 x i64> %1
+}
+
+declare <16 x i16> @llvm.x86.avx10.mask.vcvttph2iubs256(<16 x half>, <16 x i16>, i16, i32)
+
+define dso_local <2 x i64> @test_mm_ipcvttps_epi8(<4 x float> noundef %__A) {
+; CHECK-LABEL: test_mm_ipcvttps_epi8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vcvttps2ibs %xmm0, %xmm0 # encoding: [0x62,0xf5,0x7d,0x08,0x68,0xc0]
+; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
+entry:
+ %0 = tail call <4 x i32> @llvm.x86.avx10.mask.vcvttps2ibs128(<4 x float> %__A, <4 x i32> zeroinitializer, i8 -1)
+ %1 = bitcast <4 x i32> %0 to <2 x i64>
+ ret <2 x i64> %1
+}
+
+define dso_local <2 x i64> @test_mm_mask_ipcvttps_epi8(<2 x i64> noundef %__S, i8 noundef zeroext %__A, <4 x float> noundef %__B) {
+; X64-LABEL: test_mm_mask_ipcvttps_epi8:
+; X64: # %bb.0: # %entry
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vcvttps2ibs %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf5,0x7d,0x09,0x68,0xc1]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_mm_mask_ipcvttps_epi8:
+; X86: # %bb.0: # %entry
+; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vcvttps2ibs %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf5,0x7d,0x09,0x68,0xc1]
+; X86-NEXT: retl # encoding: [0xc3]
+entry:
+ %0 = bitcast <2 x i64> %__S to <4 x i32>
+ %1 = tail call <4 x i32> @llvm.x86.avx10.mask.vcvttps2ibs128(<4 x float> %__B, <4 x i32> %0, i8 %__A)
+ %2 = bitcast <4 x i32> %1 to <2 x i64>
+ ret <2 x i64> %2
+}
+
+define dso_local <2 x i64> @test_mm_maskz_ipcvttps_epi8(i8 noundef zeroext %__A, <4 x float> noundef %__B) {
+; X64-LABEL: test_mm_maskz_ipcvttps_epi8:
+; X64: # %bb.0: # %entry
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vcvttps2ibs %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf5,0x7d,0x89,0x68,0xc0]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_mm_maskz_ipcvttps_epi8:
+; X86: # %bb.0: # %entry
+; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vcvttps2ibs %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf5,0x7d,0x89,0x68,0xc0]
+; X86-NEXT: retl # encoding: [0xc3]
+entry:
+ %0 = tail call <4 x i32> @llvm.x86.avx10.mask.vcvttps2ibs128(<4 x float> %__B, <4 x i32> zeroinitializer, i8 %__A)
+ %1 = bitcast <4 x i32> %0 to <2 x i64>
+ ret <2 x i64> %1
+}
+
+declare <4 x i32> @llvm.x86.avx10.mask.vcvttps2ibs128(<4 x float>, <4 x i32>, i8)
+
+define dso_local <4 x i64> @test_mm256_ipcvttps_epi8(<8 x float> noundef %__A) local_unnamed_addr #2 {
+; CHECK-LABEL: test_mm256_ipcvttps_epi8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vcvttps2ibs %ymm0, %ymm0 # encoding: [0x62,0xf5,0x7d,0x28,0x68,0xc0]
+; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
+entry:
+ %0 = tail call <8 x i32> @llvm.x86.avx10.mask.vcvttps2ibs256(<8 x float> %__A, <8 x i32> zeroinitializer, i8 -1, i32 4)
+ %1 = bitcast <8 x i32> %0 to <4 x i64>
+ ret <4 x i64> %1
+}
+
+define dso_local <4 x i64> @test_mm256_mask_ipcvttps_epi8(<4 x i64> noundef %__S, i8 noundef zeroext %__A, <8 x float> noundef %__B) local_unnamed_addr #2 {
+; X64-LABEL: test_mm256_mask_ipcvttps_epi8:
+; X64: # %bb.0: # %entry
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vcvttps2ibs %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf5,0x7d,0x29,0x68,0xc1]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_mm256_mask_ipcvttps_epi8:
+; X86: # %bb.0: # %entry
+; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vcvttps2ibs %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf5,0x7d,0x29,0x68,0xc1]
+; X86-NEXT: retl # encoding: [0xc3]
+entry:
+ %0 = bitcast <4 x i64> %__S to <8 x i32>
+ %1 = tail call <8 x i32> @llvm.x86.avx10.mask.vcvttps2ibs256(<8 x float> %__B, <8 x i32> %0, i8 %__A, i32 4)
+ %2 = bitcast <8 x i32> %1 to <4 x i64>
+ ret <4 x i64> %2
+}
+
+define dso_local <4 x i64> @test_mm256_maskz_ipcvttps_epi8(i8 noundef zeroext %__A, <8 x float> noundef %__B) local_unnamed_addr #2 {
+; X64-LABEL: test_mm256_maskz_ipcvttps_epi8:
+; X64: # %bb.0: # %entry
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vcvttps2ibs %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf5,0x7d,0xa9,0x68,0xc0]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_mm256_maskz_ipcvttps_epi8:
+; X86: # %bb.0: # %entry
+; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vcvttps2ibs %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf5,0x7d,0xa9,0x68,0xc0]
+; X86-NEXT: retl # encoding: [0xc3]
+entry:
+ %0 = tail call <8 x i32> @llvm.x86.avx10.mask.vcvttps2ibs256(<8 x float> %__B, <8 x i32> zeroinitializer, i8 %__A, i32 4)
+ %1 = bitcast <8 x i32> %0 to <4 x i64>
+ ret <4 x i64> %1
+}
+
+define dso_local <4 x i64> @test_mm256_ipcvttps_epi8_round(<8 x float> noundef %__A) {
+; CHECK-LABEL: test_mm256_ipcvttps_epi8_round:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vcvttps2ibs {sae}, %ymm0, %ymm0 # encoding: [0x62,0xf5,0x79,0x18,0x68,0xc0]
+; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
+entry:
+ %0 = tail call <8 x i32> @llvm.x86.avx10.mask.vcvttps2ibs256(<8 x float> %__A, <8 x i32> zeroinitializer, i8 -1, i32 8)
+ %1 = bitcast <8 x i32> %0 to <4 x i64>
+ ret <4 x i64> %1
+}
+
+define dso_local <4 x i64> @test_mm256_mask_ipcvttps_epi8_round(<4 x i64> noundef %__S, i8 noundef zeroext %__A, <8 x float> noundef %__B) {
+; X64-LABEL: test_mm256_mask_ipcvttps_epi8_round:
+; X64: # %bb.0: # %entry
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vcvttps2ibs {sae}, %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf5,0x79,0x19,0x68,0xc1]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_mm256_mask_ipcvttps_epi8_round:
+; X86: # %bb.0: # %entry
+; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vcvttps2ibs {sae}, %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf5,0x79,0x19,0x68,0xc1]
+; X86-NEXT: retl # encoding: [0xc3]
+entry:
+ %0 = bitcast <4 x i64> %__S to <8 x i32>
+ %1 = tail call <8 x i32> @llvm.x86.avx10.mask.vcvttps2ibs256(<8 x float> %__B, <8 x i32> %0, i8 %__A, i32 8)
+ %2 = bitcast <8 x i32> %1 to <4 x i64>
+ ret <4 x i64> %2
+}
+
+define dso_local <4 x i64> @test_mm256_maskz_ipcvttps_epi8_round(i8 noundef zeroext %__A, <8 x float> noundef %__B) {
+; X64-LABEL: test_mm256_maskz_ipcvttps_epi8_round:
+; X64: # %bb.0: # %entry
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vcvttps2ibs {sae}, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf5,0x79,0x99,0x68,0xc0]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_mm256_maskz_ipcvttps_epi8_round:
+; X86: # %bb.0: # %entry
+; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vcvttps2ibs {sae}, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf5,0x79,0x99,0x68,0xc0]
+; X86-NEXT: retl # encoding: [0xc3]
+entry:
+ %0 = tail call <8 x i32> @llvm.x86.avx10.mask.vcvttps2ibs256(<8 x float> %__B, <8 x i32> zeroinitializer, i8 %__A, i32 8)
+ %1 = bitcast <8 x i32> %0 to <4 x i64>
+ ret <4 x i64> %1
+}
+
+declare <8 x i32> @llvm.x86.avx10.mask.vcvttps2ibs256(<8 x float>, <8 x i32>, i8, i32)
+
+define dso_local <2 x i64> @test_mm_ipcvttps_epu8(<4 x float> noundef %__A) {
+; CHECK-LABEL: test_mm_ipcvttps_epu8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vcvttps2iubs %xmm0, %xmm0 # encoding: [0x62,0xf5,0x7d,0x08,0x6a,0xc0]
+; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
+entry:
+ %0 = tail call <4 x i32> @llvm.x86.avx10.mask.vcvttps2iubs128(<4 x float> %__A, <4 x i32> zeroinitializer, i8 -1)
+ %1 = bitcast <4 x i32> %0 to <2 x i64>
+ ret <2 x i64> %1
+}
+
+define dso_local <2 x i64> @test_mm_mask_ipcvttps_epu8(<2 x i64> noundef %__S, i8 noundef zeroext %__A, <4 x float> noundef %__B) {
+; X64-LABEL: test_mm_mask_ipcvttps_epu8:
+; X64: # %bb.0: # %entry
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vcvttps2iubs %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf5,0x7d,0x09,0x6a,0xc1]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_mm_mask_ipcvttps_epu8:
+; X86: # %bb.0: # %entry
+; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vcvttps2iubs %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf5,0x7d,0x09,0x6a,0xc1]
+; X86-NEXT: retl # encoding: [0xc3]
+entry:
+ %0 = bitcast <2 x i64> %__S to <4 x i32>
+ %1 = tail call <4 x i32> @llvm.x86.avx10.mask.vcvttps2iubs128(<4 x float> %__B, <4 x i32> %0, i8 %__A)
+ %2 = bitcast <4 x i32> %1 to <2 x i64>
+ ret <2 x i64> %2
+}
+
+define dso_local <2 x i64> @test_mm_maskz_ipcvttps_epu8(i8 noundef zeroext %__A, <4 x float> noundef %__B) {
+; X64-LABEL: test_mm_maskz_ipcvttps_epu8:
+; X64: # %bb.0: # %entry
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vcvttps2iubs %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf5,0x7d,0x89,0x6a,0xc0]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_mm_maskz_ipcvttps_epu8:
+; X86: # %bb.0: # %entry
+; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vcvttps2iubs %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf5,0x7d,0x89,0x6a,0xc0]
+; X86-NEXT: retl # encoding: [0xc3]
+entry:
+ %0 = tail call <4 x i32> @llvm.x86.avx10.mask.vcvttps2iubs128(<4 x float> %__B, <4 x i32> zeroinitializer, i8 %__A)
+ %1 = bitcast <4 x i32> %0 to <2 x i64>
+ ret <2 x i64> %1
+}
+
+declare <4 x i32> @llvm.x86.avx10.mask.vcvttps2iubs128(<4 x float>, <4 x i32>, i8)
+
+define dso_local <4 x i64> @test_mm256_ipcvttps_epu8(<8 x float> noundef %__A) local_unnamed_addr #2 {
+; CHECK-LABEL: test_mm256_ipcvttps_epu8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vcvttps2iubs %ymm0, %ymm0 # encoding: [0x62,0xf5,0x7d,0x28,0x6a,0xc0]
+; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
+entry:
+ %0 = tail call <8 x i32> @llvm.x86.avx10.mask.vcvttps2iubs256(<8 x float> %__A, <8 x i32> zeroinitializer, i8 -1, i32 4)
+ %1 = bitcast <8 x i32> %0 to <4 x i64>
+ ret <4 x i64> %1
+}
+
+define dso_local <4 x i64> @test_mm256_mask_ipcvttps_epu8(<4 x i64> noundef %__S, i8 noundef zeroext %__A, <8 x float> noundef %__B) local_unnamed_addr #2 {
+; X64-LABEL: test_mm256_mask_ipcvttps_epu8:
+; X64: # %bb.0: # %entry
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vcvttps2iubs %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf5,0x7d,0x29,0x6a,0xc1]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_mm256_mask_ipcvttps_epu8:
+; X86: # %bb.0: # %entry
+; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vcvttps2iubs %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf5,0x7d,0x29,0x6a,0xc1]
+; X86-NEXT: retl # encoding: [0xc3]
+entry:
+ %0 = bitcast <4 x i64> %__S to <8 x i32>
+ %1 = tail call <8 x i32> @llvm.x86.avx10.mask.vcvttps2iubs256(<8 x float> %__B, <8 x i32> %0, i8 %__A, i32 4)
+ %2 = bitcast <8 x i32> %1 to <4 x i64>
+ ret <4 x i64> %2
+}
+
+define dso_local <4 x i64> @test_mm256_maskz_ipcvttps_epu8(i8 noundef zeroext %__A, <8 x float> noundef %__B) local_unnamed_addr #2 {
+; X64-LABEL: test_mm256_maskz_ipcvttps_epu8:
+; X64: # %bb.0: # %entry
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vcvttps2iubs %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf5,0x7d,0xa9,0x6a,0xc0]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_mm256_maskz_ipcvttps_epu8:
+; X86: # %bb.0: # %entry
+; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vcvttps2iubs %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf5,0x7d,0xa9,0x6a,0xc0]
+; X86-NEXT: retl # encoding: [0xc3]
+entry:
+ %0 = tail call <8 x i32> @llvm.x86.avx10.mask.vcvttps2iubs256(<8 x float> %__B, <8 x i32> zeroinitializer, i8 %__A, i32 4)
+ %1 = bitcast <8 x i32> %0 to <4 x i64>
+ ret <4 x i64> %1
+}
+
+define dso_local <4 x i64> @test_mm256_ipcvttps_epu8_round(<8 x float> noundef %__A) {
+; CHECK-LABEL: test_mm256_ipcvttps_epu8_round:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vcvttps2iubs {sae}, %ymm0, %ymm0 # encoding: [0x62,0xf5,0x79,0x18,0x6a,0xc0]
+; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
+entry:
+ %0 = tail call <8 x i32> @llvm.x86.avx10.mask.vcvttps2iubs256(<8 x float> %__A, <8 x i32> zeroinitializer, i8 -1, i32 8)
+ %1 = bitcast <8 x i32> %0 to <4 x i64>
+ ret <4 x i64> %1
+}
+
+define dso_local <4 x i64> @test_mm256_mask_ipcvttps_epu8_round(<4 x i64> noundef %__S, i8 noundef zeroext %__A, <8 x float> noundef %__B) {
+; X64-LABEL: test_mm256_mask_ipcvttps_epu8_round:
+; X64: # %bb.0: # %entry
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vcvttps2iubs {sae}, %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf5,0x79,0x19,0x6a,0xc1]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_mm256_mask_ipcvttps_epu8_round:
+; X86: # %bb.0: # %entry
+; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vcvttps2iubs {sae}, %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf5,0x79,0x19,0x6a,0xc1]
+; X86-NEXT: retl # encoding: [0xc3]
+entry:
+ %0 = bitcast <4 x i64> %__S to <8 x i32>
+ %1 = tail call <8 x i32> @llvm.x86.avx10.mask.vcvttps2iubs256(<8 x float> %__B, <8 x i32> %0, i8 %__A, i32 8)
+ %2 = bitcast <8 x i32> %1 to <4 x i64>
+ ret <4 x i64> %2
+}
+
+define dso_local <4 x i64> @test_mm256_maskz_ipcvttps_epu8_round(i8 noundef zeroext %__A, <8 x float> noundef %__B) {
+; X64-LABEL: test_mm256_maskz_ipcvttps_epu8_round:
+; X64: # %bb.0: # %entry
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vcvttps2iubs {sae}, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf5,0x79,0x99,0x6a,0xc0]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: test_mm256_maskz_ipcvttps_epu8_round:
+; X86: # %bb.0: # %entry
+; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vcvttps2iubs {sae}, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf5,0x79,0x99,0x6a,0xc0]
+; X86-NEXT: retl # encoding: [0xc3]
+entry:
+ %0 = tail call <8 x i32> @llvm.x86.avx10.mask.vcvttps2iubs256(<8 x float> %__B, <8 x i32> zeroinitializer, i8 %__A, i32 8)
+ %1 = bitcast <8 x i32> %0 to <4 x i64>
+ ret <4 x i64> %1
+}
+
+declare <8 x i32> @llvm.x86.avx10.mask.vcvttps2iubs256(<8 x float>, <8 x i32>, i8, i32)
diff --git a/llvm/test/CodeGen/X86/avx512-intel-ocl.ll b/llvm/test/CodeGen/X86/avx512-intel-ocl.ll
index 25d182afd66e..78870278eeac 100644
--- a/llvm/test/CodeGen/X86/avx512-intel-ocl.ll
+++ b/llvm/test/CodeGen/X86/avx512-intel-ocl.ll
@@ -69,8 +69,12 @@ define <16 x float> @testf16_inp(<16 x float> %a, <16 x float> %b) nounwind {
; X64-NEXT: andq $-64, %rsp
; X64-NEXT: subq $128, %rsp
; X64-NEXT: vaddps %zmm1, %zmm0, %zmm0
+; X64-NEXT: pushq %rbp
+; X64-NEXT: pushq %rax
; X64-NEXT: movq %rsp, %rdi
; X64-NEXT: callq _func_float16_ptr
+; X64-NEXT: addq $8, %rsp
+; X64-NEXT: popq %rbp
; X64-NEXT: vaddps (%rsp), %zmm0, %zmm0
; X64-NEXT: leaq -16(%rbp), %rsp
; X64-NEXT: popq %r12
@@ -149,8 +153,12 @@ define <16 x float> @testf16_regs(<16 x float> %a, <16 x float> %b) nounwind {
; X64-NEXT: subq $128, %rsp
; X64-NEXT: vmovaps %zmm1, %zmm16
; X64-NEXT: vaddps %zmm1, %zmm0, %zmm0
+; X64-NEXT: pushq %rbp
+; X64-NEXT: pushq %rax
; X64-NEXT: movq %rsp, %rdi
; X64-NEXT: callq _func_float16_ptr
+; X64-NEXT: addq $8, %rsp
+; X64-NEXT: popq %rbp
; X64-NEXT: vaddps %zmm16, %zmm0, %zmm0
; X64-NEXT: vaddps (%rsp), %zmm0, %zmm0
; X64-NEXT: leaq -16(%rbp), %rsp
diff --git a/llvm/test/CodeGen/X86/clobber_base_ptr.ll b/llvm/test/CodeGen/X86/clobber_base_ptr.ll
new file mode 100644
index 000000000000..2c39560f02d1
--- /dev/null
+++ b/llvm/test/CodeGen/X86/clobber_base_ptr.ll
@@ -0,0 +1,118 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
+; RUN: llc < %s | FileCheck %s
+
+target datalayout = "e-m:x-p:32:32-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:32-n8:16:32-a:0:32-S32"
+target triple = "i386-pc-windows-gnu"
+
+; This function uses esi as base pointer, the inline asm clobbers esi, so we
+; should save esi using esp before the inline asm, and restore esi after the
+; inline asm.
+
+define i32 @clober_bp() {
+; CHECK-LABEL: clober_bp:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: pushl %ebp
+; CHECK-NEXT: .cfi_def_cfa_offset 8
+; CHECK-NEXT: .cfi_offset %ebp, -8
+; CHECK-NEXT: movl %esp, %ebp
+; CHECK-NEXT: .cfi_def_cfa_register %ebp
+; CHECK-NEXT: pushl %edi
+; CHECK-NEXT: pushl %esi
+; CHECK-NEXT: andl $-16, %esp
+; CHECK-NEXT: subl $16, %esp
+; CHECK-NEXT: movl %esp, %esi
+; CHECK-NEXT: .cfi_offset %esi, -16
+; CHECK-NEXT: .cfi_offset %edi, -12
+; CHECK-NEXT: movl $4, 12(%esi)
+; CHECK-NEXT: movl 12(%esi), %eax
+; CHECK-NEXT: addl $3, %eax
+; CHECK-NEXT: andl $-4, %eax
+; CHECK-NEXT: calll __alloca
+; CHECK-NEXT: movl %esp, %eax
+; CHECK-NEXT: andl $-16, %eax
+; CHECK-NEXT: movl %eax, %esp
+; CHECK-NEXT: movl $1, (%eax)
+; CHECK-NEXT: leal 8(%esi), %edi
+; CHECK-NEXT: movl $4, %ecx
+; CHECK-NEXT: pushl %esi
+; CHECK-NEXT: movl %eax, %esi
+; CHECK-NEXT: #APP
+; CHECK-NEXT: rep movsb (%esi), %es:(%edi)
+; CHECK-NEXT: #NO_APP
+; CHECK-NEXT: popl %esi
+; CHECK-NEXT: movl 8(%esi), %eax
+; CHECK-NEXT: leal -8(%ebp), %esp
+; CHECK-NEXT: popl %esi
+; CHECK-NEXT: popl %edi
+; CHECK-NEXT: popl %ebp
+; CHECK-NEXT: retl
+entry:
+ %size = alloca i32, align 4
+ %g = alloca i32, align 4
+ store volatile i32 4, ptr %size, align 4
+ %len = load volatile i32, ptr %size, align 4
+ %var_array = alloca i8, i32 %len, align 16
+ store i32 1, ptr %var_array, align 16
+ %nil = call { ptr, ptr, i32 } asm "rep movsb", "={di},={si},={cx},0,1,2,~{memory},~{dirflag},~{fpsr},~{flags}"(ptr %g, ptr %var_array, i32 4)
+ %retval = load i32, ptr %g, align 4
+ ret i32 %retval
+}
+
+; This function has the same code except the inline asm also clobbers
+; frame pointer.
+
+define i32 @clobber_bpfp() {
+; CHECK-LABEL: clobber_bpfp:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: pushl %ebp
+; CHECK-NEXT: .cfi_def_cfa_offset 8
+; CHECK-NEXT: .cfi_offset %ebp, -8
+; CHECK-NEXT: movl %esp, %ebp
+; CHECK-NEXT: .cfi_def_cfa_register %ebp
+; CHECK-NEXT: pushl %edi
+; CHECK-NEXT: pushl %esi
+; CHECK-NEXT: andl $-16, %esp
+; CHECK-NEXT: subl $16, %esp
+; CHECK-NEXT: movl %esp, %esi
+; CHECK-NEXT: .cfi_offset %esi, -16
+; CHECK-NEXT: .cfi_offset %edi, -12
+; CHECK-NEXT: movl $4, 12(%esi)
+; CHECK-NEXT: movl 12(%esi), %eax
+; CHECK-NEXT: addl $3, %eax
+; CHECK-NEXT: andl $-4, %eax
+; CHECK-NEXT: calll __alloca
+; CHECK-NEXT: movl %esp, %eax
+; CHECK-NEXT: andl $-16, %eax
+; CHECK-NEXT: movl %eax, %esp
+; CHECK-NEXT: movl $1, (%eax)
+; CHECK-NEXT: leal 8(%esi), %edi
+; CHECK-NEXT: movl $4, %ecx
+; CHECK-NEXT: pushl %ebp
+; CHECK-NEXT: pushl %esi
+; CHECK-NEXT: .cfi_remember_state
+; CHECK-NEXT: .cfi_escape 0x0f, 0x06, 0x74, 0x04, 0x06, 0x11, 0x08, 0x22 #
+; CHECK-NEXT: movl %eax, %esi
+; CHECK-NEXT: #APP
+; CHECK-NEXT: rep movsb (%esi), %es:(%edi)
+; CHECK-NEXT: #NO_APP
+; CHECK-NEXT: popl %esi
+; CHECK-NEXT: popl %ebp
+; CHECK-NEXT: .cfi_restore_state
+; CHECK-NEXT: movl 8(%esi), %eax
+; CHECK-NEXT: leal -8(%ebp), %esp
+; CHECK-NEXT: popl %esi
+; CHECK-NEXT: popl %edi
+; CHECK-NEXT: popl %ebp
+; CHECK-NEXT: retl
+entry:
+ %size = alloca i32, align 4
+ %g = alloca i32, align 4
+ store volatile i32 4, ptr %size, align 4
+ %len = load volatile i32, ptr %size, align 4
+ %var_array = alloca i8, i32 %len, align 16
+ store i32 1, ptr %var_array, align 16
+ %nil = call { ptr, ptr, i32 } asm "rep movsb", "={di},={si},={cx},0,1,2,~{memory},~{dirflag},~{fpsr},~{flags},~{ebp}"(ptr %g, ptr %var_array, i32 4)
+ %retval = load i32, ptr %g, align 4
+ ret i32 %retval
+}
+
diff --git a/llvm/test/CodeGen/X86/clobber_frame_ptr.ll b/llvm/test/CodeGen/X86/clobber_frame_ptr.ll
new file mode 100644
index 000000000000..6209e1a85e9e
--- /dev/null
+++ b/llvm/test/CodeGen/X86/clobber_frame_ptr.ll
@@ -0,0 +1,159 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
+; RUN: llc -mtriple=x86_64-pc-linux -stackrealign -verify-machineinstrs < %s | FileCheck %s
+
+; Calling convention ghccc uses ebp to pass parameter, so calling a function
+; using ghccc clobbers ebp. We should save and restore ebp around such a call
+; if ebp is used as frame pointer.
+
+declare ghccc i32 @external(i32)
+
+; Basic test with ghccc calling convention.
+define i32 @test1(i32 %0, i32 %1) {
+; CHECK-LABEL: test1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: pushq %rbp
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: .cfi_offset %rbp, -16
+; CHECK-NEXT: movq %rsp, %rbp
+; CHECK-NEXT: .cfi_def_cfa_register %rbp
+; CHECK-NEXT: pushq %r15
+; CHECK-NEXT: pushq %r14
+; CHECK-NEXT: pushq %r13
+; CHECK-NEXT: pushq %r12
+; CHECK-NEXT: pushq %rbx
+; CHECK-NEXT: andq $-16, %rsp
+; CHECK-NEXT: subq $16, %rsp
+; CHECK-NEXT: .cfi_offset %rbx, -56
+; CHECK-NEXT: .cfi_offset %r12, -48
+; CHECK-NEXT: .cfi_offset %r13, -40
+; CHECK-NEXT: .cfi_offset %r14, -32
+; CHECK-NEXT: .cfi_offset %r15, -24
+; CHECK-NEXT: # kill: def $edi killed $edi def $rdi
+; CHECK-NEXT: pushq %rbp
+; CHECK-NEXT: pushq %rax
+; CHECK-NEXT: .cfi_remember_state
+; CHECK-NEXT: .cfi_escape 0x0f, 0x06, 0x77, 0x08, 0x06, 0x11, 0x10, 0x22 #
+; CHECK-NEXT: movl %esi, %ebp
+; CHECK-NEXT: movq %rdi, %r13
+; CHECK-NEXT: callq external@PLT
+; CHECK-NEXT: addq $8, %rsp
+; CHECK-NEXT: popq %rbp
+; CHECK-NEXT: .cfi_restore_state
+; CHECK-NEXT: leaq -40(%rbp), %rsp
+; CHECK-NEXT: popq %rbx
+; CHECK-NEXT: popq %r12
+; CHECK-NEXT: popq %r13
+; CHECK-NEXT: popq %r14
+; CHECK-NEXT: popq %r15
+; CHECK-NEXT: popq %rbp
+; CHECK-NEXT: .cfi_def_cfa %rsp, 8
+; CHECK-NEXT: retq
+ %x = call ghccc i32 @external(i32 %0, i32 %1)
+ ret i32 %x
+}
+
+; Calling convention hipe has similar behavior. It clobbers rbp but not rbx.
+
+declare cc 11 i64 @hipe1(i64)
+declare cc 11 i64 @hipe2(i64, i64, i64, i64, i64, i64, i64)
+
+; Basic test with hipe calling convention.
+define i64 @test2(i64 %a0, i64 %a1) {
+; CHECK-LABEL: test2:
+; CHECK: # %bb.0:
+; CHECK-NEXT: pushq %rbp
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: .cfi_offset %rbp, -16
+; CHECK-NEXT: movq %rsp, %rbp
+; CHECK-NEXT: .cfi_def_cfa_register %rbp
+; CHECK-NEXT: pushq %r15
+; CHECK-NEXT: pushq %r14
+; CHECK-NEXT: pushq %r13
+; CHECK-NEXT: pushq %r12
+; CHECK-NEXT: pushq %rbx
+; CHECK-NEXT: andq $-16, %rsp
+; CHECK-NEXT: subq $16, %rsp
+; CHECK-NEXT: .cfi_offset %rbx, -56
+; CHECK-NEXT: .cfi_offset %r12, -48
+; CHECK-NEXT: .cfi_offset %r13, -40
+; CHECK-NEXT: .cfi_offset %r14, -32
+; CHECK-NEXT: .cfi_offset %r15, -24
+; CHECK-NEXT: pushq %rbp
+; CHECK-NEXT: pushq %rax
+; CHECK-NEXT: .cfi_remember_state
+; CHECK-NEXT: .cfi_escape 0x0f, 0x06, 0x77, 0x08, 0x06, 0x11, 0x10, 0x22 #
+; CHECK-NEXT: movq %rsi, %rbp
+; CHECK-NEXT: movq %rdi, %r15
+; CHECK-NEXT: callq hipe1@PLT
+; CHECK-NEXT: addq $8, %rsp
+; CHECK-NEXT: popq %rbp
+; CHECK-NEXT: .cfi_restore_state
+; CHECK-NEXT: movq %r15, %rax
+; CHECK-NEXT: leaq -40(%rbp), %rsp
+; CHECK-NEXT: popq %rbx
+; CHECK-NEXT: popq %r12
+; CHECK-NEXT: popq %r13
+; CHECK-NEXT: popq %r14
+; CHECK-NEXT: popq %r15
+; CHECK-NEXT: popq %rbp
+; CHECK-NEXT: .cfi_def_cfa %rsp, 8
+; CHECK-NEXT: retq
+ %x = call cc 11 i64 @hipe1(i64 %a0, i64 %a1)
+ ret i64 %x
+}
+
+; Test with more arguments, so some of them are passed from stack. The spilling
+; of rbp should not disturb stack arguments.
+; fixme: current generated code is wrong because rbp is used to load passed in
+; argument after rbp is assigned argument for function call, it is caused
+; by x86-cf-opt.
+define i64 @test3(i64 %a0, i64 %a1, i64 %a2, i64 %a3, i64 %a4, i64 %a5, i64 %a6, i64 %a7) {
+; CHECK-LABEL: test3:
+; CHECK: # %bb.0:
+; CHECK-NEXT: pushq %rbp
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: .cfi_offset %rbp, -16
+; CHECK-NEXT: movq %rsp, %rbp
+; CHECK-NEXT: .cfi_def_cfa_register %rbp
+; CHECK-NEXT: pushq %r15
+; CHECK-NEXT: pushq %r14
+; CHECK-NEXT: pushq %r13
+; CHECK-NEXT: pushq %r12
+; CHECK-NEXT: pushq %rbx
+; CHECK-NEXT: andq $-16, %rsp
+; CHECK-NEXT: subq $16, %rsp
+; CHECK-NEXT: .cfi_offset %rbx, -56
+; CHECK-NEXT: .cfi_offset %r12, -48
+; CHECK-NEXT: .cfi_offset %r13, -40
+; CHECK-NEXT: .cfi_offset %r14, -32
+; CHECK-NEXT: .cfi_offset %r15, -24
+; CHECK-NEXT: pushq %rbp
+; CHECK-NEXT: pushq %rax
+; CHECK-NEXT: .cfi_remember_state
+; CHECK-NEXT: .cfi_escape 0x0f, 0x06, 0x77, 0x08, 0x06, 0x11, 0x10, 0x22 #
+; CHECK-NEXT: movq %rsi, %rbp
+; CHECK-NEXT: movq %rdi, %r15
+; CHECK-NEXT: movq %rdx, %rsi
+; CHECK-NEXT: movq %rcx, %rdx
+; CHECK-NEXT: movq %r8, %rcx
+; CHECK-NEXT: movq %r9, %r8
+; CHECK-NEXT: pushq 24(%rbp)
+; CHECK-NEXT: pushq 16(%rbp)
+; CHECK-NEXT: callq hipe2@PLT
+; CHECK-NEXT: addq $8, %rsp
+; CHECK-NEXT: popq %rbp
+; CHECK-NEXT: .cfi_restore_state
+; CHECK-NEXT: addq $16, %rsp
+; CHECK-NEXT: movq %r15, %rax
+; CHECK-NEXT: leaq -40(%rbp), %rsp
+; CHECK-NEXT: popq %rbx
+; CHECK-NEXT: popq %r12
+; CHECK-NEXT: popq %r13
+; CHECK-NEXT: popq %r14
+; CHECK-NEXT: popq %r15
+; CHECK-NEXT: popq %rbp
+; CHECK-NEXT: .cfi_def_cfa %rsp, 8
+; CHECK-NEXT: retq
+ %x = call cc 11 i64 @hipe2(i64 %a0, i64 %a1, i64 %a2, i64 %a3, i64 %a4, i64 %a5, i64 %a6, i64 %a7)
+ ret i64 %x
+}
diff --git a/llvm/test/CodeGen/X86/clobber_frame_ptr_x32.ll b/llvm/test/CodeGen/X86/clobber_frame_ptr_x32.ll
new file mode 100644
index 000000000000..25c951d8b1a1
--- /dev/null
+++ b/llvm/test/CodeGen/X86/clobber_frame_ptr_x32.ll
@@ -0,0 +1,53 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
+; RUN: llc < %s | FileCheck %s
+
+target triple = "x86_64-linux-gnux32"
+
+define i32 @foo() {
+; CHECK-LABEL: foo:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: pushq %rbp
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: .cfi_offset %rbp, -16
+; CHECK-NEXT: movq %rsp, %rbp
+; CHECK-NEXT: .cfi_def_cfa_register %rbp
+; CHECK-NEXT: subq $16, %rsp
+; CHECK-NEXT: movl $4, -8(%rbp)
+; CHECK-NEXT: movl $5, -4(%rbp)
+; CHECK-NEXT: movl -8(%rbp), %eax
+; CHECK-NEXT: movq %rsp, %rcx
+; CHECK-NEXT: addq $15, %rax
+; CHECK-NEXT: andq $-16, %rax
+; CHECK-NEXT: movq %rcx, %rdx
+; CHECK-NEXT: subq %rax, %rdx
+; CHECK-NEXT: movq %rdx, %rsp
+; CHECK-NEXT: negq %rax
+; CHECK-NEXT: movl $1, (%rcx,%rax)
+; CHECK-NEXT: pushq %rbp
+; CHECK-NEXT: pushq %rax
+; CHECK-NEXT: .cfi_remember_state
+; CHECK-NEXT: .cfi_escape 0x0f, 0x06, 0x77, 0x08, 0x06, 0x11, 0x10, 0x22 #
+; CHECK-NEXT: movl $123, %ebp
+; CHECK-NEXT: #APP
+; CHECK-NEXT: nop
+; CHECK-NEXT: #NO_APP
+; CHECK-NEXT: addq $8, %rsp
+; CHECK-NEXT: popq %rbp
+; CHECK-NEXT: .cfi_restore_state
+; CHECK-NEXT: movl -4(%rbp), %eax
+; CHECK-NEXT: movq %rbp, %rsp
+; CHECK-NEXT: popq %rbp
+; CHECK-NEXT: .cfi_def_cfa %rsp, 8
+; CHECK-NEXT: retq
+entry:
+ %size = alloca i32, align 4
+ %g = alloca i32, align 4
+ store volatile i32 4, ptr %size, align 4
+ store volatile i32 5, ptr %g, align 4
+ %len = load volatile i32, ptr %size, align 4
+ %var_array = alloca i8, i32 %len, align 16
+ store i32 1, ptr %var_array, align 16
+ call void asm "nop", "{ebp},~{memory}"(i32 123)
+ %retval = load i32, ptr %g, align 4
+ ret i32 %retval
+}
diff --git a/llvm/test/CodeGen/X86/cmp.ll b/llvm/test/CodeGen/X86/cmp.ll
index 5a63d36a6be4..0965b1c7208f 100644
--- a/llvm/test/CodeGen/X86/cmp.ll
+++ b/llvm/test/CodeGen/X86/cmp.ll
@@ -178,7 +178,7 @@ define i32 @test7(i64 %res) nounwind {
; NDD-LABEL: test7:
; NDD: # %bb.0: # %entry
; NDD-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0]
-; NDD-NEXT: shrq $32, %rdi, %rcx # encoding: [0x62,0xf4,0xf4,0x18,0xc1,0xef,0x20]
+; NDD-NEXT: shrq $32, %rdi # EVEX TO LEGACY Compression encoding: [0x48,0xc1,0xef,0x20]
; NDD-NEXT: sete %al # encoding: [0x0f,0x94,0xc0]
; NDD-NEXT: retq # encoding: [0xc3]
entry:
@@ -198,9 +198,9 @@ define i32 @test8(i64 %res) nounwind {
;
; NDD-LABEL: test8:
; NDD: # %bb.0:
-; NDD-NEXT: shrq $32, %rdi, %rcx # encoding: [0x62,0xf4,0xf4,0x18,0xc1,0xef,0x20]
+; NDD-NEXT: shrq $32, %rdi # EVEX TO LEGACY Compression encoding: [0x48,0xc1,0xef,0x20]
; NDD-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0]
-; NDD-NEXT: cmpl $3, %ecx # encoding: [0x83,0xf9,0x03]
+; NDD-NEXT: cmpl $3, %edi # encoding: [0x83,0xff,0x03]
; NDD-NEXT: setb %al # encoding: [0x0f,0x92,0xc0]
; NDD-NEXT: retq # encoding: [0xc3]
%lnot = icmp ult i64 %res, 12884901888
@@ -219,7 +219,7 @@ define i32 @test9(i64 %res) nounwind {
; NDD-LABEL: test9:
; NDD: # %bb.0:
; NDD-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0]
-; NDD-NEXT: shrq $33, %rdi, %rcx # encoding: [0x62,0xf4,0xf4,0x18,0xc1,0xef,0x21]
+; NDD-NEXT: shrq $33, %rdi # EVEX TO LEGACY Compression encoding: [0x48,0xc1,0xef,0x21]
; NDD-NEXT: sete %al # encoding: [0x0f,0x94,0xc0]
; NDD-NEXT: retq # encoding: [0xc3]
%lnot = icmp ult i64 %res, 8589934592
@@ -238,7 +238,7 @@ define i32 @test10(i64 %res) nounwind {
; NDD-LABEL: test10:
; NDD: # %bb.0:
; NDD-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0]
-; NDD-NEXT: shrq $32, %rdi, %rcx # encoding: [0x62,0xf4,0xf4,0x18,0xc1,0xef,0x20]
+; NDD-NEXT: shrq $32, %rdi # EVEX TO LEGACY Compression encoding: [0x48,0xc1,0xef,0x20]
; NDD-NEXT: setne %al # encoding: [0x0f,0x95,0xc0]
; NDD-NEXT: retq # encoding: [0xc3]
%lnot = icmp uge i64 %res, 4294967296
@@ -257,9 +257,9 @@ define i32 @test11(i64 %l) nounwind {
;
; NDD-LABEL: test11:
; NDD: # %bb.0:
-; NDD-NEXT: shrq $47, %rdi, %rcx # encoding: [0x62,0xf4,0xf4,0x18,0xc1,0xef,0x2f]
+; NDD-NEXT: shrq $47, %rdi # EVEX TO LEGACY Compression encoding: [0x48,0xc1,0xef,0x2f]
; NDD-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0]
-; NDD-NEXT: cmpl $1, %ecx # encoding: [0x83,0xf9,0x01]
+; NDD-NEXT: cmpl $1, %edi # encoding: [0x83,0xff,0x01]
; NDD-NEXT: sete %al # encoding: [0x0f,0x94,0xc0]
; NDD-NEXT: retq # encoding: [0xc3]
%shr.mask = and i64 %l, -140737488355328
@@ -331,7 +331,7 @@ define i32 @test14(i32 %mask, i32 %base, i32 %intra) {
;
; NDD-LABEL: test14:
; NDD: # %bb.0:
-; NDD-NEXT: shrl $7, %edi, %eax # encoding: [0x62,0xf4,0x7c,0x18,0xc1,0xef,0x07]
+; NDD-NEXT: shrl $7, %edi # EVEX TO LEGACY Compression encoding: [0xc1,0xef,0x07]
; NDD-NEXT: cmovnsl %edx, %esi, %eax # encoding: [0x62,0xf4,0x7c,0x18,0x49,0xf2]
; NDD-NEXT: retq # encoding: [0xc3]
%s = lshr i32 %mask, 7
@@ -353,10 +353,10 @@ define zeroext i1 @test15(i32 %bf.load, i32 %n) {
;
; NDD-LABEL: test15:
; NDD: # %bb.0:
-; NDD-NEXT: shrl $16, %edi, %eax # encoding: [0x62,0xf4,0x7c,0x18,0xc1,0xef,0x10]
-; NDD-NEXT: sete %cl # encoding: [0x0f,0x94,0xc1]
-; NDD-NEXT: cmpl %esi, %eax # encoding: [0x39,0xf0]
-; NDD-NEXT: setae %al # encoding: [0x0f,0x93,0xc0]
+; NDD-NEXT: shrl $16, %edi # EVEX TO LEGACY Compression encoding: [0xc1,0xef,0x10]
+; NDD-NEXT: sete %al # encoding: [0x0f,0x94,0xc0]
+; NDD-NEXT: cmpl %esi, %edi # encoding: [0x39,0xf7]
+; NDD-NEXT: setae %cl # encoding: [0x0f,0x93,0xc1]
; NDD-NEXT: orb %cl, %al # EVEX TO LEGACY Compression encoding: [0x08,0xc8]
; NDD-NEXT: retq # encoding: [0xc3]
%bf.lshr = lshr i32 %bf.load, 16
@@ -482,7 +482,7 @@ define i32 @highmask_i64_mask64(i64 %val) {
; NDD-LABEL: highmask_i64_mask64:
; NDD: # %bb.0:
; NDD-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0]
-; NDD-NEXT: shrq $41, %rdi, %rcx # encoding: [0x62,0xf4,0xf4,0x18,0xc1,0xef,0x29]
+; NDD-NEXT: shrq $41, %rdi # EVEX TO LEGACY Compression encoding: [0x48,0xc1,0xef,0x29]
; NDD-NEXT: setne %al # encoding: [0x0f,0x95,0xc0]
; NDD-NEXT: retq # encoding: [0xc3]
%and = and i64 %val, -2199023255552
@@ -526,7 +526,7 @@ define i32 @highmask_i64_mask32(i64 %val) {
; NDD-LABEL: highmask_i64_mask32:
; NDD: # %bb.0:
; NDD-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0]
-; NDD-NEXT: shrq $20, %rdi, %rcx # encoding: [0x62,0xf4,0xf4,0x18,0xc1,0xef,0x14]
+; NDD-NEXT: shrq $20, %rdi # EVEX TO LEGACY Compression encoding: [0x48,0xc1,0xef,0x14]
; NDD-NEXT: sete %al # encoding: [0x0f,0x94,0xc0]
; NDD-NEXT: retq # encoding: [0xc3]
%and = and i64 %val, -1048576
@@ -584,7 +584,7 @@ define i32 @lowmask_i64_mask64(i64 %val) {
; NDD-LABEL: lowmask_i64_mask64:
; NDD: # %bb.0:
; NDD-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0]
-; NDD-NEXT: shlq $16, %rdi, %rcx # encoding: [0x62,0xf4,0xf4,0x18,0xc1,0xe7,0x10]
+; NDD-NEXT: shlq $16, %rdi # EVEX TO LEGACY Compression encoding: [0x48,0xc1,0xe7,0x10]
; NDD-NEXT: sete %al # encoding: [0x0f,0x94,0xc0]
; NDD-NEXT: retq # encoding: [0xc3]
%and = and i64 %val, 281474976710655
@@ -628,7 +628,7 @@ define i32 @lowmask_i64_mask32(i64 %val) {
; NDD-LABEL: lowmask_i64_mask32:
; NDD: # %bb.0:
; NDD-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0]
-; NDD-NEXT: shlq $44, %rdi, %rcx # encoding: [0x62,0xf4,0xf4,0x18,0xc1,0xe7,0x2c]
+; NDD-NEXT: shlq $44, %rdi # EVEX TO LEGACY Compression encoding: [0x48,0xc1,0xe7,0x2c]
; NDD-NEXT: setne %al # encoding: [0x0f,0x95,0xc0]
; NDD-NEXT: retq # encoding: [0xc3]
%and = and i64 %val, 1048575
@@ -739,8 +739,8 @@ define i1 @shifted_mask64_testb(i64 %a) {
;
; NDD-LABEL: shifted_mask64_testb:
; NDD: # %bb.0:
-; NDD-NEXT: shrq $50, %rdi, %rax # encoding: [0x62,0xf4,0xfc,0x18,0xc1,0xef,0x32]
-; NDD-NEXT: testb %al, %al # encoding: [0x84,0xc0]
+; NDD-NEXT: shrq $50, %rdi # EVEX TO LEGACY Compression encoding: [0x48,0xc1,0xef,0x32]
+; NDD-NEXT: testb %dil, %dil # encoding: [0x40,0x84,0xff]
; NDD-NEXT: setne %al # encoding: [0x0f,0x95,0xc0]
; NDD-NEXT: retq # encoding: [0xc3]
%v0 = and i64 %a, 287104476244869120 ; 0xff << 50
@@ -758,8 +758,8 @@ define i1 @shifted_mask64_testw(i64 %a) {
;
; NDD-LABEL: shifted_mask64_testw:
; NDD: # %bb.0:
-; NDD-NEXT: shrq $33, %rdi, %rax # encoding: [0x62,0xf4,0xfc,0x18,0xc1,0xef,0x21]
-; NDD-NEXT: testw %ax, %ax # encoding: [0x66,0x85,0xc0]
+; NDD-NEXT: shrq $33, %rdi # EVEX TO LEGACY Compression encoding: [0x48,0xc1,0xef,0x21]
+; NDD-NEXT: testw %di, %di # encoding: [0x66,0x85,0xff]
; NDD-NEXT: setne %al # encoding: [0x0f,0x95,0xc0]
; NDD-NEXT: retq # encoding: [0xc3]
%v0 = and i64 %a, 562941363486720 ; 0xffff << 33
@@ -777,8 +777,8 @@ define i1 @shifted_mask64_testl(i64 %a) {
;
; NDD-LABEL: shifted_mask64_testl:
; NDD: # %bb.0:
-; NDD-NEXT: shrq $7, %rdi, %rax # encoding: [0x62,0xf4,0xfc,0x18,0xc1,0xef,0x07]
-; NDD-NEXT: testl %eax, %eax # encoding: [0x85,0xc0]
+; NDD-NEXT: shrq $7, %rdi # EVEX TO LEGACY Compression encoding: [0x48,0xc1,0xef,0x07]
+; NDD-NEXT: testl %edi, %edi # encoding: [0x85,0xff]
; NDD-NEXT: sete %al # encoding: [0x0f,0x94,0xc0]
; NDD-NEXT: retq # encoding: [0xc3]
%v0 = and i64 %a, 549755813760 ; 0xffffffff << 7
@@ -817,9 +817,9 @@ define i1 @shifted_mask64_extra_use_and(i64 %a) {
; NDD: # %bb.0:
; NDD-NEXT: movabsq $287104476244869120, %rax # encoding: [0x48,0xb8,0x00,0x00,0x00,0x00,0x00,0x00,0xfc,0x03]
; NDD-NEXT: # imm = 0x3FC000000000000
-; NDD-NEXT: andq %rax, %rdi, %rcx # encoding: [0x62,0xf4,0xf4,0x18,0x21,0xc7]
+; NDD-NEXT: andq %rax, %rdi # EVEX TO LEGACY Compression encoding: [0x48,0x21,0xc7]
; NDD-NEXT: setne %al # encoding: [0x0f,0x95,0xc0]
-; NDD-NEXT: movq %rcx, d64(%rip) # encoding: [0x48,0x89,0x0d,A,A,A,A]
+; NDD-NEXT: movq %rdi, d64(%rip) # encoding: [0x48,0x89,0x3d,A,A,A,A]
; NDD-NEXT: # fixup A - offset: 3, value: d64-4, kind: reloc_riprel_4byte
; NDD-NEXT: retq # encoding: [0xc3]
%v0 = and i64 %a, 287104476244869120 ; 0xff << 50
@@ -868,10 +868,10 @@ define i1 @shifted_mask32_extra_use_and(i64 %a) {
;
; NDD-LABEL: shifted_mask32_extra_use_and:
; NDD: # %bb.0:
-; NDD-NEXT: andq $66846720, %rdi, %rcx # encoding: [0x62,0xf4,0xf4,0x18,0x81,0xe7,0x00,0x00,0xfc,0x03]
+; NDD-NEXT: andq $66846720, %rdi # EVEX TO LEGACY Compression encoding: [0x48,0x81,0xe7,0x00,0x00,0xfc,0x03]
; NDD-NEXT: # imm = 0x3FC0000
; NDD-NEXT: setne %al # encoding: [0x0f,0x95,0xc0]
-; NDD-NEXT: movq %rcx, d64(%rip) # encoding: [0x48,0x89,0x0d,A,A,A,A]
+; NDD-NEXT: movq %rdi, d64(%rip) # encoding: [0x48,0x89,0x3d,A,A,A,A]
; NDD-NEXT: # fixup A - offset: 3, value: d64-4, kind: reloc_riprel_4byte
; NDD-NEXT: retq # encoding: [0xc3]
%v0 = and i64 %a, 66846720 ; 0xff << 50
diff --git a/llvm/test/CodeGen/X86/combine-srem.ll b/llvm/test/CodeGen/X86/combine-srem.ll
index 4ed00a9d66bd..8bfaa6118b79 100644
--- a/llvm/test/CodeGen/X86/combine-srem.ll
+++ b/llvm/test/CodeGen/X86/combine-srem.ll
@@ -83,7 +83,7 @@ define <4 x i32> @combine_vec_srem_by_minsigned(<4 x i32> %x) {
; AVX1-NEXT: vpsrld $1, %xmm1, %xmm1
; AVX1-NEXT: vpaddd %xmm1, %xmm0, %xmm1
; AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; AVX1-NEXT: vpaddd %xmm0, %xmm1, %xmm0
+; AVX1-NEXT: vpaddd %xmm1, %xmm0, %xmm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: combine_vec_srem_by_minsigned:
@@ -93,7 +93,7 @@ define <4 x i32> @combine_vec_srem_by_minsigned(<4 x i32> %x) {
; AVX2-NEXT: vpaddd %xmm1, %xmm0, %xmm1
; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
; AVX2-NEXT: vpand %xmm2, %xmm1, %xmm1
-; AVX2-NEXT: vpaddd %xmm0, %xmm1, %xmm0
+; AVX2-NEXT: vpaddd %xmm1, %xmm0, %xmm0
; AVX2-NEXT: retq
%1 = srem <4 x i32> %x, <i32 -2147483648, i32 -2147483648, i32 -2147483648, i32 -2147483648>
ret <4 x i32> %1
@@ -225,24 +225,28 @@ define <4 x i32> @combine_vec_srem_by_pow2a_neg(<4 x i32> %x) {
; SSE-NEXT: psrad $31, %xmm1
; SSE-NEXT: psrld $30, %xmm1
; SSE-NEXT: paddd %xmm0, %xmm1
-; SSE-NEXT: psrld $2, %xmm1
-; SSE-NEXT: pxor %xmm2, %xmm2
-; SSE-NEXT: psubd %xmm1, %xmm2
-; SSE-NEXT: pslld $2, %xmm2
-; SSE-NEXT: paddd %xmm2, %xmm0
+; SSE-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
+; SSE-NEXT: psubd %xmm1, %xmm0
; SSE-NEXT: retq
;
-; AVX-LABEL: combine_vec_srem_by_pow2a_neg:
-; AVX: # %bb.0:
-; AVX-NEXT: vpsrad $31, %xmm0, %xmm1
-; AVX-NEXT: vpsrld $30, %xmm1, %xmm1
-; AVX-NEXT: vpaddd %xmm1, %xmm0, %xmm1
-; AVX-NEXT: vpsrld $2, %xmm1, %xmm1
-; AVX-NEXT: vpxor %xmm2, %xmm2, %xmm2
-; AVX-NEXT: vpsubd %xmm1, %xmm2, %xmm1
-; AVX-NEXT: vpslld $2, %xmm1, %xmm1
-; AVX-NEXT: vpaddd %xmm1, %xmm0, %xmm0
-; AVX-NEXT: retq
+; AVX1-LABEL: combine_vec_srem_by_pow2a_neg:
+; AVX1: # %bb.0:
+; AVX1-NEXT: vpsrad $31, %xmm0, %xmm1
+; AVX1-NEXT: vpsrld $30, %xmm1, %xmm1
+; AVX1-NEXT: vpaddd %xmm1, %xmm0, %xmm1
+; AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; AVX1-NEXT: vpsubd %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: combine_vec_srem_by_pow2a_neg:
+; AVX2: # %bb.0:
+; AVX2-NEXT: vpsrad $31, %xmm0, %xmm1
+; AVX2-NEXT: vpsrld $30, %xmm1, %xmm1
+; AVX2-NEXT: vpaddd %xmm1, %xmm0, %xmm1
+; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [4294967292,4294967292,4294967292,4294967292]
+; AVX2-NEXT: vpand %xmm2, %xmm1, %xmm1
+; AVX2-NEXT: vpsubd %xmm1, %xmm0, %xmm0
+; AVX2-NEXT: retq
%1 = srem <4 x i32> %x, <i32 -4, i32 -4, i32 -4, i32 -4>
ret <4 x i32> %1
}
diff --git a/llvm/test/CodeGen/X86/i386-baseptr.ll b/llvm/test/CodeGen/X86/i386-baseptr.ll
index 08e4bde7353a..777eb838b84c 100644
--- a/llvm/test/CodeGen/X86/i386-baseptr.ll
+++ b/llvm/test/CodeGen/X86/i386-baseptr.ll
@@ -109,10 +109,14 @@ define x86_regcallcc void @clobber_baseptr_argptr(i32 %param1, i32 %param2, i32
; CHECK-NEXT: subl %eax, %edx
; CHECK-NEXT: movl %edx, %esp
; CHECK-NEXT: negl %eax
+; CHECK-NEXT: pushl %esi
+; CHECK-NEXT: subl $28, %esp
; CHECK-NEXT: movl $405, %esi # imm = 0x195
; CHECK-NEXT: #APP
; CHECK-NEXT: nop
; CHECK-NEXT: #NO_APP
+; CHECK-NEXT: addl $28, %esp
+; CHECK-NEXT: popl %esi
; CHECK-NEXT: movl $405, %ebx # imm = 0x195
; CHECK-NEXT: #APP
; CHECK-NEXT: nop
diff --git a/llvm/test/CodeGen/X86/inline-asm-function-call-pic.ll b/llvm/test/CodeGen/X86/inline-asm-function-call-pic.ll
index 3c98eead8d18..d3ca872509ad 100644
--- a/llvm/test/CodeGen/X86/inline-asm-function-call-pic.ll
+++ b/llvm/test/CodeGen/X86/inline-asm-function-call-pic.ll
@@ -37,6 +37,8 @@ define void @func() local_unnamed_addr #0 {
; CHECK-NEXT: .Ltmp0:
; CHECK-NEXT: addl $_GLOBAL_OFFSET_TABLE_+(.Ltmp0-.L0$pb), %ebx
; CHECK-NEXT: calll static_func
+; CHECK-NEXT: pushl %ebp
+; CHECK-NEXT: subl $12, %esp
; CHECK-NEXT: #APP
; CHECK-EMPTY:
; CHECK-NEXT: calll static_func
@@ -52,6 +54,8 @@ define void @func() local_unnamed_addr #0 {
; CHECK-NEXT: shrl $0, %esp
; CHECK-EMPTY:
; CHECK-NEXT: #NO_APP
+; CHECK-NEXT: addl $12, %esp
+; CHECK-NEXT: popl %ebp
entry:
%call = tail call i32 @static_func()
;; We test call, CALL, and jmp.
diff --git a/llvm/test/CodeGen/X86/opt-pipeline.ll b/llvm/test/CodeGen/X86/opt-pipeline.ll
index 275a42e02d74..6fcc1ed068f4 100644
--- a/llvm/test/CodeGen/X86/opt-pipeline.ll
+++ b/llvm/test/CodeGen/X86/opt-pipeline.ll
@@ -59,7 +59,6 @@
; CHECK-NEXT: Constant Hoisting
; CHECK-NEXT: Replace intrinsics with calls to vector library
; CHECK-NEXT: Partially inline calls to library functions
-; CHECK-NEXT: Expand vector predication intrinsics
; CHECK-NEXT: Instrument function entry/exit with calls to e.g. mcount() (post inlining)
; CHECK-NEXT: Scalarize Masked Memory Intrinsics
; CHECK-NEXT: Expand reduction intrinsics
diff --git a/llvm/test/CodeGen/X86/popcnt.ll b/llvm/test/CodeGen/X86/popcnt.ll
index 13fa639dc63b..35c7c0e09f39 100644
--- a/llvm/test/CodeGen/X86/popcnt.ll
+++ b/llvm/test/CodeGen/X86/popcnt.ll
@@ -182,11 +182,11 @@ define i32 @cnt32(i32 %x) nounwind readnone {
; X64-NDD: # %bb.0:
; X64-NDD-NEXT: shrl %edi, %eax
; X64-NDD-NEXT: andl $1431655765, %eax # imm = 0x55555555
-; X64-NDD-NEXT: subl %eax, %edi, %eax
-; X64-NDD-NEXT: andl $858993459, %eax, %ecx # imm = 0x33333333
-; X64-NDD-NEXT: shrl $2, %eax
-; X64-NDD-NEXT: andl $858993459, %eax # imm = 0x33333333
-; X64-NDD-NEXT: addl %ecx, %eax
+; X64-NDD-NEXT: subl %eax, %edi
+; X64-NDD-NEXT: andl $858993459, %edi, %eax # imm = 0x33333333
+; X64-NDD-NEXT: shrl $2, %edi
+; X64-NDD-NEXT: andl $858993459, %edi # imm = 0x33333333
+; X64-NDD-NEXT: addl %edi, %eax
; X64-NDD-NEXT: shrl $4, %eax, %ecx
; X64-NDD-NEXT: addl %ecx, %eax
; X64-NDD-NEXT: andl $252645135, %eax # imm = 0xF0F0F0F
@@ -277,12 +277,12 @@ define i64 @cnt64(i64 %x) nounwind readnone {
; X64-NDD-NEXT: shrq %rdi, %rax
; X64-NDD-NEXT: movabsq $6148914691236517205, %rcx # imm = 0x5555555555555555
; X64-NDD-NEXT: andq %rcx, %rax
-; X64-NDD-NEXT: subq %rax, %rdi, %rax
-; X64-NDD-NEXT: movabsq $3689348814741910323, %rcx # imm = 0x3333333333333333
-; X64-NDD-NEXT: andq %rcx, %rax, %rdx
-; X64-NDD-NEXT: shrq $2, %rax
-; X64-NDD-NEXT: andq %rcx, %rax
-; X64-NDD-NEXT: addq %rdx, %rax
+; X64-NDD-NEXT: subq %rax, %rdi
+; X64-NDD-NEXT: movabsq $3689348814741910323, %rax # imm = 0x3333333333333333
+; X64-NDD-NEXT: andq %rax, %rdi, %rcx
+; X64-NDD-NEXT: shrq $2, %rdi
+; X64-NDD-NEXT: andq %rdi, %rax
+; X64-NDD-NEXT: addq %rcx, %rax
; X64-NDD-NEXT: shrq $4, %rax, %rcx
; X64-NDD-NEXT: addq %rcx, %rax
; X64-NDD-NEXT: movabsq $1085102592571150095, %rcx # imm = 0xF0F0F0F0F0F0F0F
@@ -491,32 +491,32 @@ define i128 @cnt128(i128 %x) nounwind readnone {
; X64-NDD-NEXT: shrq %rsi, %rax
; X64-NDD-NEXT: movabsq $6148914691236517205, %rcx # imm = 0x5555555555555555
; X64-NDD-NEXT: andq %rcx, %rax
-; X64-NDD-NEXT: subq %rax, %rsi, %rax
-; X64-NDD-NEXT: movabsq $3689348814741910323, %rdx # imm = 0x3333333333333333
-; X64-NDD-NEXT: andq %rdx, %rax, %rsi
-; X64-NDD-NEXT: shrq $2, %rax
-; X64-NDD-NEXT: andq %rdx, %rax
-; X64-NDD-NEXT: addq %rsi, %rax
-; X64-NDD-NEXT: shrq $4, %rax, %rsi
-; X64-NDD-NEXT: addq %rsi, %rax
+; X64-NDD-NEXT: subq %rax, %rsi
+; X64-NDD-NEXT: movabsq $3689348814741910323, %rax # imm = 0x3333333333333333
+; X64-NDD-NEXT: andq %rax, %rsi, %rdx
+; X64-NDD-NEXT: shrq $2, %rsi
+; X64-NDD-NEXT: andq %rax, %rsi
+; X64-NDD-NEXT: addq %rsi, %rdx
+; X64-NDD-NEXT: shrq $4, %rdx, %rsi
+; X64-NDD-NEXT: addq %rsi, %rdx
; X64-NDD-NEXT: movabsq $1085102592571150095, %rsi # imm = 0xF0F0F0F0F0F0F0F
-; X64-NDD-NEXT: andq %rsi, %rax
+; X64-NDD-NEXT: andq %rsi, %rdx
; X64-NDD-NEXT: movabsq $72340172838076673, %r8 # imm = 0x101010101010101
-; X64-NDD-NEXT: imulq %r8, %rax
-; X64-NDD-NEXT: shrq $56, %rax
+; X64-NDD-NEXT: imulq %r8, %rdx
+; X64-NDD-NEXT: shrq $56, %rdx
; X64-NDD-NEXT: shrq %rdi, %r9
; X64-NDD-NEXT: andq %r9, %rcx
-; X64-NDD-NEXT: subq %rcx, %rdi, %rcx
-; X64-NDD-NEXT: andq %rdx, %rcx, %rdi
-; X64-NDD-NEXT: shrq $2, %rcx
-; X64-NDD-NEXT: andq %rdx, %rcx
-; X64-NDD-NEXT: addq %rdi, %rcx
-; X64-NDD-NEXT: shrq $4, %rcx, %rdx
-; X64-NDD-NEXT: addq %rdx, %rcx
-; X64-NDD-NEXT: andq %rsi, %rcx
-; X64-NDD-NEXT: imulq %r8, %rcx
-; X64-NDD-NEXT: shrq $56, %rcx
+; X64-NDD-NEXT: subq %rcx, %rdi
+; X64-NDD-NEXT: andq %rax, %rdi, %rcx
+; X64-NDD-NEXT: shrq $2, %rdi
+; X64-NDD-NEXT: andq %rdi, %rax
+; X64-NDD-NEXT: addq %rcx, %rax
+; X64-NDD-NEXT: shrq $4, %rax, %rcx
; X64-NDD-NEXT: addq %rcx, %rax
+; X64-NDD-NEXT: andq %rsi, %rax
+; X64-NDD-NEXT: imulq %r8, %rax
+; X64-NDD-NEXT: shrq $56, %rax
+; X64-NDD-NEXT: addq %rdx, %rax
; X64-NDD-NEXT: xorl %edx, %edx
; X64-NDD-NEXT: retq
;
@@ -685,12 +685,12 @@ define i64 @cnt64_noimplicitfloat(i64 %x) nounwind readnone noimplicitfloat {
; X64-NDD-NEXT: shrq %rdi, %rax
; X64-NDD-NEXT: movabsq $6148914691236517205, %rcx # imm = 0x5555555555555555
; X64-NDD-NEXT: andq %rcx, %rax
-; X64-NDD-NEXT: subq %rax, %rdi, %rax
-; X64-NDD-NEXT: movabsq $3689348814741910323, %rcx # imm = 0x3333333333333333
-; X64-NDD-NEXT: andq %rcx, %rax, %rdx
-; X64-NDD-NEXT: shrq $2, %rax
-; X64-NDD-NEXT: andq %rcx, %rax
-; X64-NDD-NEXT: addq %rdx, %rax
+; X64-NDD-NEXT: subq %rax, %rdi
+; X64-NDD-NEXT: movabsq $3689348814741910323, %rax # imm = 0x3333333333333333
+; X64-NDD-NEXT: andq %rax, %rdi, %rcx
+; X64-NDD-NEXT: shrq $2, %rdi
+; X64-NDD-NEXT: andq %rdi, %rax
+; X64-NDD-NEXT: addq %rcx, %rax
; X64-NDD-NEXT: shrq $4, %rax, %rcx
; X64-NDD-NEXT: addq %rcx, %rax
; X64-NDD-NEXT: movabsq $1085102592571150095, %rcx # imm = 0xF0F0F0F0F0F0F0F
@@ -759,12 +759,12 @@ define i32 @cnt32_optsize(i32 %x) nounwind readnone optsize {
; X64-NDD: # %bb.0:
; X64-NDD-NEXT: shrl %edi, %eax
; X64-NDD-NEXT: andl $1431655765, %eax # imm = 0x55555555
-; X64-NDD-NEXT: subl %eax, %edi, %eax
-; X64-NDD-NEXT: movl $858993459, %ecx # imm = 0x33333333
-; X64-NDD-NEXT: andl %ecx, %eax, %edx
-; X64-NDD-NEXT: shrl $2, %eax
-; X64-NDD-NEXT: andl %ecx, %eax
-; X64-NDD-NEXT: addl %edx, %eax
+; X64-NDD-NEXT: subl %eax, %edi
+; X64-NDD-NEXT: movl $858993459, %eax # imm = 0x33333333
+; X64-NDD-NEXT: andl %eax, %edi, %ecx
+; X64-NDD-NEXT: shrl $2, %edi
+; X64-NDD-NEXT: andl %edi, %eax
+; X64-NDD-NEXT: addl %ecx, %eax
; X64-NDD-NEXT: shrl $4, %eax, %ecx
; X64-NDD-NEXT: addl %ecx, %eax
; X64-NDD-NEXT: andl $252645135, %eax # imm = 0xF0F0F0F
@@ -864,12 +864,12 @@ define i64 @cnt64_optsize(i64 %x) nounwind readnone optsize {
; X64-NDD-NEXT: shrq %rdi, %rax
; X64-NDD-NEXT: movabsq $6148914691236517205, %rcx # imm = 0x5555555555555555
; X64-NDD-NEXT: andq %rcx, %rax
-; X64-NDD-NEXT: subq %rax, %rdi, %rax
-; X64-NDD-NEXT: movabsq $3689348814741910323, %rcx # imm = 0x3333333333333333
-; X64-NDD-NEXT: andq %rcx, %rax, %rdx
-; X64-NDD-NEXT: shrq $2, %rax
-; X64-NDD-NEXT: andq %rcx, %rax
-; X64-NDD-NEXT: addq %rdx, %rax
+; X64-NDD-NEXT: subq %rax, %rdi
+; X64-NDD-NEXT: movabsq $3689348814741910323, %rax # imm = 0x3333333333333333
+; X64-NDD-NEXT: andq %rax, %rdi, %rcx
+; X64-NDD-NEXT: shrq $2, %rdi
+; X64-NDD-NEXT: andq %rdi, %rax
+; X64-NDD-NEXT: addq %rcx, %rax
; X64-NDD-NEXT: shrq $4, %rax, %rcx
; X64-NDD-NEXT: addq %rcx, %rax
; X64-NDD-NEXT: movabsq $1085102592571150095, %rcx # imm = 0xF0F0F0F0F0F0F0F
@@ -1087,32 +1087,32 @@ define i128 @cnt128_optsize(i128 %x) nounwind readnone optsize {
; X64-NDD-NEXT: shrq %rsi, %rax
; X64-NDD-NEXT: movabsq $6148914691236517205, %rcx # imm = 0x5555555555555555
; X64-NDD-NEXT: andq %rcx, %rax
-; X64-NDD-NEXT: subq %rax, %rsi, %rax
-; X64-NDD-NEXT: movabsq $3689348814741910323, %rdx # imm = 0x3333333333333333
-; X64-NDD-NEXT: andq %rdx, %rax, %rsi
-; X64-NDD-NEXT: shrq $2, %rax
-; X64-NDD-NEXT: andq %rdx, %rax
-; X64-NDD-NEXT: addq %rsi, %rax
-; X64-NDD-NEXT: shrq $4, %rax, %rsi
-; X64-NDD-NEXT: addq %rsi, %rax
+; X64-NDD-NEXT: subq %rax, %rsi
+; X64-NDD-NEXT: movabsq $3689348814741910323, %rax # imm = 0x3333333333333333
+; X64-NDD-NEXT: andq %rax, %rsi, %rdx
+; X64-NDD-NEXT: shrq $2, %rsi
+; X64-NDD-NEXT: andq %rax, %rsi
+; X64-NDD-NEXT: addq %rsi, %rdx
+; X64-NDD-NEXT: shrq $4, %rdx, %rsi
+; X64-NDD-NEXT: addq %rsi, %rdx
; X64-NDD-NEXT: movabsq $1085102592571150095, %rsi # imm = 0xF0F0F0F0F0F0F0F
-; X64-NDD-NEXT: andq %rsi, %rax
+; X64-NDD-NEXT: andq %rsi, %rdx
; X64-NDD-NEXT: movabsq $72340172838076673, %r8 # imm = 0x101010101010101
-; X64-NDD-NEXT: imulq %r8, %rax
-; X64-NDD-NEXT: shrq $56, %rax
+; X64-NDD-NEXT: imulq %r8, %rdx
+; X64-NDD-NEXT: shrq $56, %rdx
; X64-NDD-NEXT: shrq %rdi, %r9
; X64-NDD-NEXT: andq %r9, %rcx
-; X64-NDD-NEXT: subq %rcx, %rdi, %rcx
-; X64-NDD-NEXT: andq %rdx, %rcx, %rdi
-; X64-NDD-NEXT: shrq $2, %rcx
-; X64-NDD-NEXT: andq %rdx, %rcx
-; X64-NDD-NEXT: addq %rdi, %rcx
-; X64-NDD-NEXT: shrq $4, %rcx, %rdx
-; X64-NDD-NEXT: addq %rdx, %rcx
-; X64-NDD-NEXT: andq %rsi, %rcx
-; X64-NDD-NEXT: imulq %r8, %rcx
-; X64-NDD-NEXT: shrq $56, %rcx
+; X64-NDD-NEXT: subq %rcx, %rdi
+; X64-NDD-NEXT: andq %rax, %rdi, %rcx
+; X64-NDD-NEXT: shrq $2, %rdi
+; X64-NDD-NEXT: andq %rdi, %rax
+; X64-NDD-NEXT: addq %rcx, %rax
+; X64-NDD-NEXT: shrq $4, %rax, %rcx
; X64-NDD-NEXT: addq %rcx, %rax
+; X64-NDD-NEXT: andq %rsi, %rax
+; X64-NDD-NEXT: imulq %r8, %rax
+; X64-NDD-NEXT: shrq $56, %rax
+; X64-NDD-NEXT: addq %rdx, %rax
; X64-NDD-NEXT: xorl %edx, %edx
; X64-NDD-NEXT: retq
;
@@ -1257,11 +1257,11 @@ define i32 @cnt32_pgso(i32 %x) nounwind readnone !prof !14 {
; X64-NDD: # %bb.0:
; X64-NDD-NEXT: shrl %edi, %eax
; X64-NDD-NEXT: andl $1431655765, %eax # imm = 0x55555555
-; X64-NDD-NEXT: subl %eax, %edi, %eax
-; X64-NDD-NEXT: andl $858993459, %eax, %ecx # imm = 0x33333333
-; X64-NDD-NEXT: shrl $2, %eax
-; X64-NDD-NEXT: andl $858993459, %eax # imm = 0x33333333
-; X64-NDD-NEXT: addl %ecx, %eax
+; X64-NDD-NEXT: subl %eax, %edi
+; X64-NDD-NEXT: andl $858993459, %edi, %eax # imm = 0x33333333
+; X64-NDD-NEXT: shrl $2, %edi
+; X64-NDD-NEXT: andl $858993459, %edi # imm = 0x33333333
+; X64-NDD-NEXT: addl %edi, %eax
; X64-NDD-NEXT: shrl $4, %eax, %ecx
; X64-NDD-NEXT: addl %ecx, %eax
; X64-NDD-NEXT: andl $252645135, %eax # imm = 0xF0F0F0F
@@ -1352,12 +1352,12 @@ define i64 @cnt64_pgso(i64 %x) nounwind readnone !prof !14 {
; X64-NDD-NEXT: shrq %rdi, %rax
; X64-NDD-NEXT: movabsq $6148914691236517205, %rcx # imm = 0x5555555555555555
; X64-NDD-NEXT: andq %rcx, %rax
-; X64-NDD-NEXT: subq %rax, %rdi, %rax
-; X64-NDD-NEXT: movabsq $3689348814741910323, %rcx # imm = 0x3333333333333333
-; X64-NDD-NEXT: andq %rcx, %rax, %rdx
-; X64-NDD-NEXT: shrq $2, %rax
-; X64-NDD-NEXT: andq %rcx, %rax
-; X64-NDD-NEXT: addq %rdx, %rax
+; X64-NDD-NEXT: subq %rax, %rdi
+; X64-NDD-NEXT: movabsq $3689348814741910323, %rax # imm = 0x3333333333333333
+; X64-NDD-NEXT: andq %rax, %rdi, %rcx
+; X64-NDD-NEXT: shrq $2, %rdi
+; X64-NDD-NEXT: andq %rdi, %rax
+; X64-NDD-NEXT: addq %rcx, %rax
; X64-NDD-NEXT: shrq $4, %rax, %rcx
; X64-NDD-NEXT: addq %rcx, %rax
; X64-NDD-NEXT: movabsq $1085102592571150095, %rcx # imm = 0xF0F0F0F0F0F0F0F
@@ -1568,32 +1568,32 @@ define i128 @cnt128_pgso(i128 %x) nounwind readnone !prof !14 {
; X64-NDD-NEXT: shrq %rsi, %rax
; X64-NDD-NEXT: movabsq $6148914691236517205, %rcx # imm = 0x5555555555555555
; X64-NDD-NEXT: andq %rcx, %rax
-; X64-NDD-NEXT: subq %rax, %rsi, %rax
-; X64-NDD-NEXT: movabsq $3689348814741910323, %rdx # imm = 0x3333333333333333
-; X64-NDD-NEXT: andq %rdx, %rax, %rsi
-; X64-NDD-NEXT: shrq $2, %rax
-; X64-NDD-NEXT: andq %rdx, %rax
-; X64-NDD-NEXT: addq %rsi, %rax
-; X64-NDD-NEXT: shrq $4, %rax, %rsi
-; X64-NDD-NEXT: addq %rsi, %rax
+; X64-NDD-NEXT: subq %rax, %rsi
+; X64-NDD-NEXT: movabsq $3689348814741910323, %rax # imm = 0x3333333333333333
+; X64-NDD-NEXT: andq %rax, %rsi, %rdx
+; X64-NDD-NEXT: shrq $2, %rsi
+; X64-NDD-NEXT: andq %rax, %rsi
+; X64-NDD-NEXT: addq %rsi, %rdx
+; X64-NDD-NEXT: shrq $4, %rdx, %rsi
+; X64-NDD-NEXT: addq %rsi, %rdx
; X64-NDD-NEXT: movabsq $1085102592571150095, %rsi # imm = 0xF0F0F0F0F0F0F0F
-; X64-NDD-NEXT: andq %rsi, %rax
+; X64-NDD-NEXT: andq %rsi, %rdx
; X64-NDD-NEXT: movabsq $72340172838076673, %r8 # imm = 0x101010101010101
-; X64-NDD-NEXT: imulq %r8, %rax
-; X64-NDD-NEXT: shrq $56, %rax
+; X64-NDD-NEXT: imulq %r8, %rdx
+; X64-NDD-NEXT: shrq $56, %rdx
; X64-NDD-NEXT: shrq %rdi, %r9
; X64-NDD-NEXT: andq %r9, %rcx
-; X64-NDD-NEXT: subq %rcx, %rdi, %rcx
-; X64-NDD-NEXT: andq %rdx, %rcx, %rdi
-; X64-NDD-NEXT: shrq $2, %rcx
-; X64-NDD-NEXT: andq %rdx, %rcx
-; X64-NDD-NEXT: addq %rdi, %rcx
-; X64-NDD-NEXT: shrq $4, %rcx, %rdx
-; X64-NDD-NEXT: addq %rdx, %rcx
-; X64-NDD-NEXT: andq %rsi, %rcx
-; X64-NDD-NEXT: imulq %r8, %rcx
-; X64-NDD-NEXT: shrq $56, %rcx
+; X64-NDD-NEXT: subq %rcx, %rdi
+; X64-NDD-NEXT: andq %rax, %rdi, %rcx
+; X64-NDD-NEXT: shrq $2, %rdi
+; X64-NDD-NEXT: andq %rdi, %rax
+; X64-NDD-NEXT: addq %rcx, %rax
+; X64-NDD-NEXT: shrq $4, %rax, %rcx
; X64-NDD-NEXT: addq %rcx, %rax
+; X64-NDD-NEXT: andq %rsi, %rax
+; X64-NDD-NEXT: imulq %r8, %rax
+; X64-NDD-NEXT: shrq $56, %rax
+; X64-NDD-NEXT: addq %rdx, %rax
; X64-NDD-NEXT: xorl %edx, %edx
; X64-NDD-NEXT: retq
;
@@ -1739,11 +1739,11 @@ define i32 @popcount_zext_i32(i16 zeroext %x) {
; X64-NDD: # %bb.0:
; X64-NDD-NEXT: shrl %edi, %eax
; X64-NDD-NEXT: andl $21845, %eax # imm = 0x5555
-; X64-NDD-NEXT: subl %eax, %edi, %eax
-; X64-NDD-NEXT: andl $858993459, %eax, %ecx # imm = 0x33333333
-; X64-NDD-NEXT: shrl $2, %eax
-; X64-NDD-NEXT: andl $858993459, %eax # imm = 0x33333333
-; X64-NDD-NEXT: addl %ecx, %eax
+; X64-NDD-NEXT: subl %eax, %edi
+; X64-NDD-NEXT: andl $858993459, %edi, %eax # imm = 0x33333333
+; X64-NDD-NEXT: shrl $2, %edi
+; X64-NDD-NEXT: andl $858993459, %edi # imm = 0x33333333
+; X64-NDD-NEXT: addl %edi, %eax
; X64-NDD-NEXT: shrl $4, %eax, %ecx
; X64-NDD-NEXT: addl %ecx, %eax
; X64-NDD-NEXT: andl $252645135, %eax # imm = 0xF0F0F0F
diff --git a/llvm/test/CodeGen/X86/select_const_i128.ll b/llvm/test/CodeGen/X86/select_const_i128.ll
index d7859baec815..f0f0c584a7fc 100644
--- a/llvm/test/CodeGen/X86/select_const_i128.ll
+++ b/llvm/test/CodeGen/X86/select_const_i128.ll
@@ -23,8 +23,8 @@ define i128 @select_eq_i128(ptr %a) {
; NDD-NEXT: ptest %xmm0, %xmm0
; NDD-NEXT: setne %al
; NDD-NEXT: addq $-1, %rax
-; NDD-NEXT: movabsq $9223372036854775807, %rcx # imm = 0x7FFFFFFFFFFFFFFF
-; NDD-NEXT: adcq $0, %rcx, %rdx
+; NDD-NEXT: movabsq $9223372036854775807, %rdx # imm = 0x7FFFFFFFFFFFFFFF
+; NDD-NEXT: adcq $0, %rdx
; NDD-NEXT: retq
%1 = load i128, ptr %a, align 16
%cmp = icmp eq i128 %1, 1
diff --git a/llvm/test/CodeGen/X86/srem-seteq-vec-splat.ll b/llvm/test/CodeGen/X86/srem-seteq-vec-splat.ll
index d2a1e5e42812..33592027dee9 100644
--- a/llvm/test/CodeGen/X86/srem-seteq-vec-splat.ll
+++ b/llvm/test/CodeGen/X86/srem-seteq-vec-splat.ll
@@ -624,7 +624,7 @@ define <4 x i32> @test_srem_int_min(<4 x i32> %X) nounwind {
; CHECK-AVX1-NEXT: vpsrld $1, %xmm1, %xmm1
; CHECK-AVX1-NEXT: vpaddd %xmm1, %xmm0, %xmm1
; CHECK-AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; CHECK-AVX1-NEXT: vpaddd %xmm0, %xmm1, %xmm0
+; CHECK-AVX1-NEXT: vpaddd %xmm1, %xmm0, %xmm0
; CHECK-AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
; CHECK-AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
; CHECK-AVX1-NEXT: vpsrld $31, %xmm0, %xmm0
@@ -637,7 +637,7 @@ define <4 x i32> @test_srem_int_min(<4 x i32> %X) nounwind {
; CHECK-AVX2-NEXT: vpaddd %xmm1, %xmm0, %xmm1
; CHECK-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
; CHECK-AVX2-NEXT: vpand %xmm2, %xmm1, %xmm1
-; CHECK-AVX2-NEXT: vpaddd %xmm0, %xmm1, %xmm0
+; CHECK-AVX2-NEXT: vpaddd %xmm1, %xmm0, %xmm0
; CHECK-AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
; CHECK-AVX2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
; CHECK-AVX2-NEXT: vpsrld $31, %xmm0, %xmm0
@@ -649,7 +649,7 @@ define <4 x i32> @test_srem_int_min(<4 x i32> %X) nounwind {
; CHECK-AVX512VL-NEXT: vpsrld $1, %xmm1, %xmm1
; CHECK-AVX512VL-NEXT: vpaddd %xmm1, %xmm0, %xmm1
; CHECK-AVX512VL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm1, %xmm1
-; CHECK-AVX512VL-NEXT: vpaddd %xmm0, %xmm1, %xmm0
+; CHECK-AVX512VL-NEXT: vpaddd %xmm1, %xmm0, %xmm0
; CHECK-AVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1
; CHECK-AVX512VL-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
; CHECK-AVX512VL-NEXT: vpsrld $31, %xmm0, %xmm0
diff --git a/llvm/test/CodeGen/X86/x86-32-intrcc.ll b/llvm/test/CodeGen/X86/x86-32-intrcc.ll
index 3c3944c2082b..a0f937e2c323 100644
--- a/llvm/test/CodeGen/X86/x86-32-intrcc.ll
+++ b/llvm/test/CodeGen/X86/x86-32-intrcc.ll
@@ -108,8 +108,10 @@ define x86_intrcc void @test_isr_clobbers(ptr byval(%struct.interrupt_frame) %fr
; CHECK-NEXT: pushl %eax
; CHECK-NEXT: andl $-16, %esp
; CHECK-NEXT: cld
+; CHECK-NEXT: pushl %ebp
; CHECK-NEXT: #APP
; CHECK-NEXT: #NO_APP
+; CHECK-NEXT: popl %ebp
; CHECK-NEXT: leal -12(%ebp), %esp
; CHECK-NEXT: popl %eax
; CHECK-NEXT: popl %ebx
@@ -127,8 +129,10 @@ define x86_intrcc void @test_isr_clobbers(ptr byval(%struct.interrupt_frame) %fr
; CHECK0-NEXT: pushl %eax
; CHECK0-NEXT: andl $-16, %esp
; CHECK0-NEXT: cld
+; CHECK0-NEXT: pushl %ebp
; CHECK0-NEXT: #APP
; CHECK0-NEXT: #NO_APP
+; CHECK0-NEXT: popl %ebp
; CHECK0-NEXT: leal -12(%ebp), %esp
; CHECK0-NEXT: popl %eax
; CHECK0-NEXT: popl %ebx
diff --git a/llvm/test/CodeGen/X86/x86-64-baseptr.ll b/llvm/test/CodeGen/X86/x86-64-baseptr.ll
index 8cda4ba2814b..020004def6e7 100644
--- a/llvm/test/CodeGen/X86/x86-64-baseptr.ll
+++ b/llvm/test/CodeGen/X86/x86-64-baseptr.ll
@@ -136,10 +136,14 @@ define void @clobber_base() #0 {
; X32ABI-NEXT: subl %eax, %edx
; X32ABI-NEXT: negl %eax
; X32ABI-NEXT: movl %edx, %esp
+; X32ABI-NEXT: pushq %rbx
+; X32ABI-NEXT: subl $24, %esp
; X32ABI-NEXT: movl $405, %ebx # imm = 0x195
; X32ABI-NEXT: #APP
; X32ABI-NEXT: nop
; X32ABI-NEXT: #NO_APP
+; X32ABI-NEXT: addl $24, %esp
+; X32ABI-NEXT: popq %rbx
; X32ABI-NEXT: movl $8, %edx
; X32ABI-NEXT: #APP
; X32ABI-NEXT: movl %edx, (%ebx)
@@ -268,6 +272,8 @@ define x86_regcallcc void @clobber_baseptr_argptr(i32 %param1, i32 %param2, i32
; X32ABI-NEXT: subl %eax, %edx
; X32ABI-NEXT: negl %eax
; X32ABI-NEXT: movl %edx, %esp
+; X32ABI-NEXT: pushq %rbx
+; X32ABI-NEXT: subl $24, %esp
; X32ABI-NEXT: movl $405, %ebx # imm = 0x195
; X32ABI-NEXT: #APP
; X32ABI-NEXT: nop
@@ -275,6 +281,8 @@ define x86_regcallcc void @clobber_baseptr_argptr(i32 %param1, i32 %param2, i32
; X32ABI-NEXT: #APP
; X32ABI-NEXT: nop
; X32ABI-NEXT: #NO_APP
+; X32ABI-NEXT: addl $24, %esp
+; X32ABI-NEXT: popq %rbx
; X32ABI-NEXT: movl $8, %edx
; X32ABI-NEXT: #APP
; X32ABI-NEXT: movl %edx, (%ebx)
@@ -385,10 +393,14 @@ define void @vmw_host_printf(ptr %fmt, ...) nounwind {
; X32ABI-NEXT: movl $48, (%eax)
; X32ABI-NEXT: movl $8, (%eax)
; X32ABI-NEXT: xorl %eax, %eax
+; X32ABI-NEXT: pushq %rbx
+; X32ABI-NEXT: subl $24, %esp
; X32ABI-NEXT: xorl %ebx, %ebx
; X32ABI-NEXT: xorl %ecx, %ecx
; X32ABI-NEXT: #APP
; X32ABI-NEXT: #NO_APP
+; X32ABI-NEXT: addl $24, %esp
+; X32ABI-NEXT: popq %rbx
; X32ABI-NEXT: leal -8(%ebp), %esp
; X32ABI-NEXT: popq %rbx
; X32ABI-NEXT: popq %rbp
diff --git a/llvm/test/CodeGen/X86/x86-64-flags-intrinsics.ll b/llvm/test/CodeGen/X86/x86-64-flags-intrinsics.ll
index 47aefdbf0e46..b4c18dd7f457 100644
--- a/llvm/test/CodeGen/X86/x86-64-flags-intrinsics.ll
+++ b/llvm/test/CodeGen/X86/x86-64-flags-intrinsics.ll
@@ -94,6 +94,8 @@ define i64 @read_flags_reg_pressure() nounwind {
; WIN64-NEXT: pushq %rbx
; WIN64-NEXT: subq $16, %rsp
; WIN64-NEXT: leaq {{[0-9]+}}(%rsp), %rbp
+; WIN64-NEXT: pushq %rbp
+; WIN64-NEXT: pushq %rax
; WIN64-NEXT: #APP
; WIN64-NEXT: #NO_APP
; WIN64-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
@@ -103,6 +105,8 @@ define i64 @read_flags_reg_pressure() nounwind {
; WIN64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload
; WIN64-NEXT: #APP
; WIN64-NEXT: #NO_APP
+; WIN64-NEXT: addq $8, %rsp
+; WIN64-NEXT: popq %rbp
; WIN64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
; WIN64-NEXT: addq $16, %rsp
; WIN64-NEXT: popq %rbx
@@ -177,6 +181,8 @@ define void @write_flags_reg_pressure(i64 noundef %0) nounwind {
; WIN64-NEXT: subq $16, %rsp
; WIN64-NEXT: leaq {{[0-9]+}}(%rsp), %rbp
; WIN64-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; WIN64-NEXT: pushq %rbp
+; WIN64-NEXT: pushq %rax
; WIN64-NEXT: #APP
; WIN64-NEXT: #NO_APP
; WIN64-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
@@ -186,6 +192,8 @@ define void @write_flags_reg_pressure(i64 noundef %0) nounwind {
; WIN64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload
; WIN64-NEXT: #APP
; WIN64-NEXT: #NO_APP
+; WIN64-NEXT: popq %rax
+; WIN64-NEXT: popq %rbp
; WIN64-NEXT: addq $16, %rsp
; WIN64-NEXT: popq %rbx
; WIN64-NEXT: popq %rdi