From e14f327d8094e02134efa98625acaf6fd43fee08 Mon Sep 17 00:00:00 2001 From: Liao Chunyu Date: Tue, 17 Jun 2025 23:32:01 -0400 Subject: [RISCV] Pre-test for #144461 --- llvm/test/CodeGen/RISCV/rvv/vselect-vp.ll | 26 ++++++++++++++++++++++++-- 1 file changed, 24 insertions(+), 2 deletions(-) diff --git a/llvm/test/CodeGen/RISCV/rvv/vselect-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vselect-vp.ll index 371ec7c790dd..522c83fd9fa9 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vselect-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vselect-vp.ll @@ -470,6 +470,28 @@ define @select_nxv2i64( %a, %v } +define @select_nxv2i64_constant_true( %a, %b, i32 zeroext %evl) { +; CHECK-LABEL: select_nxv2i64_constant_true: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma +; CHECK-NEXT: vmerge.vim v8, v8, -1, v0 +; CHECK-NEXT: ret + %v = call @llvm.vp.select.nxv2i64( %a, splat (i64 -1), %b, i32 %evl) + ret %v +} + +define @select_nxv2i64_constant_false( %a, %b, i32 zeroext %evl) { +; CHECK-LABEL: select_nxv2i64_constant_false: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, 100 +; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma +; CHECK-NEXT: vmv.v.x v10, a1 +; CHECK-NEXT: vmerge.vvm v8, v10, v8, v0 +; CHECK-NEXT: ret + %v = call @llvm.vp.select.nxv2i64( %a, %b, splat (i64 100), i32 %evl) + ret %v +} + declare @llvm.vp.select.nxv4i64(, , , i32) define @select_nxv4i64( %a, %b, %c, i32 zeroext %evl) { @@ -702,10 +724,10 @@ define @select_nxv16f64( %a, Date: Wed, 18 Jun 2025 13:23:17 +0900 Subject: PowerPC: Add baseline tests for more f128 libcall handling (#144381) Some of these incorrectly call the l suffixed version of libm functions and others assert. --- llvm/test/CodeGen/PowerPC/f128-arith.ll | 445 ++++++++++++++++++++++++++++++++ 1 file changed, 445 insertions(+) diff --git a/llvm/test/CodeGen/PowerPC/f128-arith.ll b/llvm/test/CodeGen/PowerPC/f128-arith.ll index decc4a38f7cc..ffa7ac6cb007 100644 --- a/llvm/test/CodeGen/PowerPC/f128-arith.ll +++ b/llvm/test/CodeGen/PowerPC/f128-arith.ll @@ -1403,3 +1403,448 @@ entry: ret fp128 %3 } declare { fp128, i32 } @llvm.frexp.f128.i32(fp128) + + +define dso_local fp128 @acos_f128(fp128 %x) { +; CHECK-LABEL: acos_f128: +; CHECK: # %bb.0: +; CHECK-NEXT: mflr r0 +; CHECK-NEXT: stdu r1, -32(r1) +; CHECK-NEXT: std r0, 48(r1) +; CHECK-NEXT: .cfi_def_cfa_offset 32 +; CHECK-NEXT: .cfi_offset lr, 16 +; CHECK-NEXT: bl acosl +; CHECK-NEXT: nop +; CHECK-NEXT: addi r1, r1, 32 +; CHECK-NEXT: ld r0, 16(r1) +; CHECK-NEXT: mtlr r0 +; CHECK-NEXT: blr +; +; CHECK-P8-LABEL: acos_f128: +; CHECK-P8: # %bb.0: +; CHECK-P8-NEXT: mflr r0 +; CHECK-P8-NEXT: stdu r1, -32(r1) +; CHECK-P8-NEXT: std r0, 48(r1) +; CHECK-P8-NEXT: .cfi_def_cfa_offset 32 +; CHECK-P8-NEXT: .cfi_offset lr, 16 +; CHECK-P8-NEXT: bl acosl +; CHECK-P8-NEXT: nop +; CHECK-P8-NEXT: addi r1, r1, 32 +; CHECK-P8-NEXT: ld r0, 16(r1) +; CHECK-P8-NEXT: mtlr r0 +; CHECK-P8-NEXT: blr + %result = call fp128 @llvm.acos.f128(fp128 %x) + ret fp128 %result +} + +define dso_local fp128 @asin_f128(fp128 %x) { +; CHECK-LABEL: asin_f128: +; CHECK: # %bb.0: +; CHECK-NEXT: mflr r0 +; CHECK-NEXT: stdu r1, -32(r1) +; CHECK-NEXT: std r0, 48(r1) +; CHECK-NEXT: .cfi_def_cfa_offset 32 +; CHECK-NEXT: .cfi_offset lr, 16 +; CHECK-NEXT: bl asinl +; CHECK-NEXT: nop +; CHECK-NEXT: addi r1, r1, 32 +; CHECK-NEXT: ld r0, 16(r1) +; CHECK-NEXT: mtlr r0 +; CHECK-NEXT: blr +; +; CHECK-P8-LABEL: asin_f128: +; CHECK-P8: # %bb.0: +; CHECK-P8-NEXT: mflr r0 +; CHECK-P8-NEXT: stdu r1, -32(r1) +; CHECK-P8-NEXT: std r0, 48(r1) +; CHECK-P8-NEXT: .cfi_def_cfa_offset 32 +; CHECK-P8-NEXT: .cfi_offset lr, 16 +; CHECK-P8-NEXT: bl asinl +; CHECK-P8-NEXT: nop +; CHECK-P8-NEXT: addi r1, r1, 32 +; CHECK-P8-NEXT: ld r0, 16(r1) +; CHECK-P8-NEXT: mtlr r0 +; CHECK-P8-NEXT: blr + %result = call fp128 @llvm.asin.f128(fp128 %x) + ret fp128 %result +} + +define dso_local fp128 @atan_f128(fp128 %x) { +; CHECK-LABEL: atan_f128: +; CHECK: # %bb.0: +; CHECK-NEXT: mflr r0 +; CHECK-NEXT: stdu r1, -32(r1) +; CHECK-NEXT: std r0, 48(r1) +; CHECK-NEXT: .cfi_def_cfa_offset 32 +; CHECK-NEXT: .cfi_offset lr, 16 +; CHECK-NEXT: bl atanl +; CHECK-NEXT: nop +; CHECK-NEXT: addi r1, r1, 32 +; CHECK-NEXT: ld r0, 16(r1) +; CHECK-NEXT: mtlr r0 +; CHECK-NEXT: blr +; +; CHECK-P8-LABEL: atan_f128: +; CHECK-P8: # %bb.0: +; CHECK-P8-NEXT: mflr r0 +; CHECK-P8-NEXT: stdu r1, -32(r1) +; CHECK-P8-NEXT: std r0, 48(r1) +; CHECK-P8-NEXT: .cfi_def_cfa_offset 32 +; CHECK-P8-NEXT: .cfi_offset lr, 16 +; CHECK-P8-NEXT: bl atanl +; CHECK-P8-NEXT: nop +; CHECK-P8-NEXT: addi r1, r1, 32 +; CHECK-P8-NEXT: ld r0, 16(r1) +; CHECK-P8-NEXT: mtlr r0 +; CHECK-P8-NEXT: blr + %result = call fp128 @llvm.atan.f128(fp128 %x) + ret fp128 %result +} + +define dso_local fp128 @atan2_f128(fp128 %x, fp128 %y) { +; CHECK-LABEL: atan2_f128: +; CHECK: # %bb.0: +; CHECK-NEXT: mflr r0 +; CHECK-NEXT: stdu r1, -32(r1) +; CHECK-NEXT: std r0, 48(r1) +; CHECK-NEXT: .cfi_def_cfa_offset 32 +; CHECK-NEXT: .cfi_offset lr, 16 +; CHECK-NEXT: bl atan2l +; CHECK-NEXT: nop +; CHECK-NEXT: addi r1, r1, 32 +; CHECK-NEXT: ld r0, 16(r1) +; CHECK-NEXT: mtlr r0 +; CHECK-NEXT: blr +; +; CHECK-P8-LABEL: atan2_f128: +; CHECK-P8: # %bb.0: +; CHECK-P8-NEXT: mflr r0 +; CHECK-P8-NEXT: stdu r1, -32(r1) +; CHECK-P8-NEXT: std r0, 48(r1) +; CHECK-P8-NEXT: .cfi_def_cfa_offset 32 +; CHECK-P8-NEXT: .cfi_offset lr, 16 +; CHECK-P8-NEXT: bl atan2l +; CHECK-P8-NEXT: nop +; CHECK-P8-NEXT: addi r1, r1, 32 +; CHECK-P8-NEXT: ld r0, 16(r1) +; CHECK-P8-NEXT: mtlr r0 +; CHECK-P8-NEXT: blr + %result = call fp128 @llvm.atan2.f128(fp128 %x, fp128 %y) + ret fp128 %result +} + +define dso_local fp128 @copysign_f128(fp128 %x, fp128 %y) { +; CHECK-LABEL: copysign_f128: +; CHECK: # %bb.0: +; CHECK-NEXT: xscpsgnqp v2, v3, v2 +; CHECK-NEXT: blr +; +; CHECK-P8-LABEL: copysign_f128: +; CHECK-P8: # %bb.0: +; CHECK-P8-NEXT: xxswapd vs0, v3 +; CHECK-P8-NEXT: addi r3, r1, -16 +; CHECK-P8-NEXT: xxswapd vs1, v2 +; CHECK-P8-NEXT: stxvd2x vs0, 0, r3 +; CHECK-P8-NEXT: addi r3, r1, -32 +; CHECK-P8-NEXT: stxvd2x vs1, 0, r3 +; CHECK-P8-NEXT: lbz r4, -1(r1) +; CHECK-P8-NEXT: lbz r5, -17(r1) +; CHECK-P8-NEXT: rlwimi r5, r4, 0, 0, 24 +; CHECK-P8-NEXT: stb r5, -17(r1) +; CHECK-P8-NEXT: lxvd2x vs0, 0, r3 +; CHECK-P8-NEXT: xxswapd v2, vs0 +; CHECK-P8-NEXT: blr + %result = call fp128 @llvm.copysign.f128(fp128 %x, fp128 %y) + ret fp128 %result +} + +define dso_local fp128 @cosh_f128(fp128 %x) { +; CHECK-LABEL: cosh_f128: +; CHECK: # %bb.0: +; CHECK-NEXT: mflr r0 +; CHECK-NEXT: stdu r1, -32(r1) +; CHECK-NEXT: std r0, 48(r1) +; CHECK-NEXT: .cfi_def_cfa_offset 32 +; CHECK-NEXT: .cfi_offset lr, 16 +; CHECK-NEXT: bl coshl +; CHECK-NEXT: nop +; CHECK-NEXT: addi r1, r1, 32 +; CHECK-NEXT: ld r0, 16(r1) +; CHECK-NEXT: mtlr r0 +; CHECK-NEXT: blr +; +; CHECK-P8-LABEL: cosh_f128: +; CHECK-P8: # %bb.0: +; CHECK-P8-NEXT: mflr r0 +; CHECK-P8-NEXT: stdu r1, -32(r1) +; CHECK-P8-NEXT: std r0, 48(r1) +; CHECK-P8-NEXT: .cfi_def_cfa_offset 32 +; CHECK-P8-NEXT: .cfi_offset lr, 16 +; CHECK-P8-NEXT: bl coshl +; CHECK-P8-NEXT: nop +; CHECK-P8-NEXT: addi r1, r1, 32 +; CHECK-P8-NEXT: ld r0, 16(r1) +; CHECK-P8-NEXT: mtlr r0 +; CHECK-P8-NEXT: blr + %result = call fp128 @llvm.cosh.f128(fp128 %x) + ret fp128 %result +} + +define dso_local fp128 @exp10_f128(fp128 %x) { +; CHECK-LABEL: exp10_f128: +; CHECK: # %bb.0: +; CHECK-NEXT: mflr r0 +; CHECK-NEXT: stdu r1, -32(r1) +; CHECK-NEXT: std r0, 48(r1) +; CHECK-NEXT: .cfi_def_cfa_offset 32 +; CHECK-NEXT: .cfi_offset lr, 16 +; CHECK-NEXT: bl exp10l +; CHECK-NEXT: nop +; CHECK-NEXT: addi r1, r1, 32 +; CHECK-NEXT: ld r0, 16(r1) +; CHECK-NEXT: mtlr r0 +; CHECK-NEXT: blr +; +; CHECK-P8-LABEL: exp10_f128: +; CHECK-P8: # %bb.0: +; CHECK-P8-NEXT: mflr r0 +; CHECK-P8-NEXT: stdu r1, -32(r1) +; CHECK-P8-NEXT: std r0, 48(r1) +; CHECK-P8-NEXT: .cfi_def_cfa_offset 32 +; CHECK-P8-NEXT: .cfi_offset lr, 16 +; CHECK-P8-NEXT: bl exp10l +; CHECK-P8-NEXT: nop +; CHECK-P8-NEXT: addi r1, r1, 32 +; CHECK-P8-NEXT: ld r0, 16(r1) +; CHECK-P8-NEXT: mtlr r0 +; CHECK-P8-NEXT: blr + %result = call fp128 @llvm.exp10.f128(fp128 %x) + ret fp128 %result +} + +; FIXME: Asserts +; define dso_local fp128 @maximum_f128(fp128 %x, fp128 %y) { +; %result = call fp128 @llvm.maximum.f128(fp128 %x, fp128 %y) +; ret fp128 %result +; } + +; FIXME: Asserts +; define dso_local fp128 @minimum_f128(fp128 %x, fp128 %y) { +; %result = call fp128 @llvm.minimum.f128(fp128 %x, fp128 %y) +; ret fp128 %result +; } + +; FIXME: Asserts +; define dso_local fp128 @maximumnum_f128(fp128 %x, fp128 %y) { +; %result = call fp128 @llvm.maximumnum.f128(fp128 %x, fp128 %y) +; ret fp128 %result +; } + +; FIXME: Asserts +; define dso_local fp128 @minimumnum_f128(fp128 %x, fp128 %y) { +; %result = call fp128 @llvm.minimumnum.f128(fp128 %x, fp128 %y) +; ret fp128 %result +; } + +define dso_local fp128 @ldexp_f128(fp128 %x, i32 %y) { +; CHECK-LABEL: ldexp_f128: +; CHECK: # %bb.0: +; CHECK-NEXT: mflr r0 +; CHECK-NEXT: stdu r1, -32(r1) +; CHECK-NEXT: std r0, 48(r1) +; CHECK-NEXT: .cfi_def_cfa_offset 32 +; CHECK-NEXT: .cfi_offset lr, 16 +; CHECK-NEXT: extsw r5, r5 +; CHECK-NEXT: bl ldexpl +; CHECK-NEXT: nop +; CHECK-NEXT: addi r1, r1, 32 +; CHECK-NEXT: ld r0, 16(r1) +; CHECK-NEXT: mtlr r0 +; CHECK-NEXT: blr +; +; CHECK-P8-LABEL: ldexp_f128: +; CHECK-P8: # %bb.0: +; CHECK-P8-NEXT: mflr r0 +; CHECK-P8-NEXT: stdu r1, -32(r1) +; CHECK-P8-NEXT: std r0, 48(r1) +; CHECK-P8-NEXT: .cfi_def_cfa_offset 32 +; CHECK-P8-NEXT: .cfi_offset lr, 16 +; CHECK-P8-NEXT: extsw r5, r5 +; CHECK-P8-NEXT: bl ldexpl +; CHECK-P8-NEXT: nop +; CHECK-P8-NEXT: addi r1, r1, 32 +; CHECK-P8-NEXT: ld r0, 16(r1) +; CHECK-P8-NEXT: mtlr r0 +; CHECK-P8-NEXT: blr + %result = call fp128 @llvm.ldexp.f128.i32(fp128 %x, i32 %y) + ret fp128 %result +} + +define dso_local { fp128, fp128 } @modf_f128(fp128 %x) { +; CHECK-LABEL: modf_f128: +; CHECK: # %bb.0: +; CHECK-NEXT: mflr r0 +; CHECK-NEXT: stdu r1, -48(r1) +; CHECK-NEXT: std r0, 64(r1) +; CHECK-NEXT: .cfi_def_cfa_offset 48 +; CHECK-NEXT: .cfi_offset lr, 16 +; CHECK-NEXT: addi r5, r1, 32 +; CHECK-NEXT: bl modfl +; CHECK-NEXT: nop +; CHECK-NEXT: lxv v3, 32(r1) +; CHECK-NEXT: addi r1, r1, 48 +; CHECK-NEXT: ld r0, 16(r1) +; CHECK-NEXT: mtlr r0 +; CHECK-NEXT: blr +; +; CHECK-P8-LABEL: modf_f128: +; CHECK-P8: # %bb.0: +; CHECK-P8-NEXT: mflr r0 +; CHECK-P8-NEXT: .cfi_def_cfa_offset 64 +; CHECK-P8-NEXT: .cfi_offset lr, 16 +; CHECK-P8-NEXT: .cfi_offset r30, -16 +; CHECK-P8-NEXT: std r30, -16(r1) # 8-byte Folded Spill +; CHECK-P8-NEXT: stdu r1, -64(r1) +; CHECK-P8-NEXT: addi r30, r1, 32 +; CHECK-P8-NEXT: std r0, 80(r1) +; CHECK-P8-NEXT: mr r5, r30 +; CHECK-P8-NEXT: bl modfl +; CHECK-P8-NEXT: nop +; CHECK-P8-NEXT: lxvd2x vs0, 0, r30 +; CHECK-P8-NEXT: xxswapd v3, vs0 +; CHECK-P8-NEXT: addi r1, r1, 64 +; CHECK-P8-NEXT: ld r0, 16(r1) +; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload +; CHECK-P8-NEXT: mtlr r0 +; CHECK-P8-NEXT: blr + %result = call { fp128, fp128 } @llvm.modf.f128(fp128 %x) + ret { fp128, fp128 } %result +} + +define dso_local fp128 @roundeven_f128(fp128 %x) { +; CHECK-LABEL: roundeven_f128: +; CHECK: # %bb.0: +; CHECK-NEXT: mflr r0 +; CHECK-NEXT: stdu r1, -32(r1) +; CHECK-NEXT: std r0, 48(r1) +; CHECK-NEXT: .cfi_def_cfa_offset 32 +; CHECK-NEXT: .cfi_offset lr, 16 +; CHECK-NEXT: bl roundevenl +; CHECK-NEXT: nop +; CHECK-NEXT: addi r1, r1, 32 +; CHECK-NEXT: ld r0, 16(r1) +; CHECK-NEXT: mtlr r0 +; CHECK-NEXT: blr +; +; CHECK-P8-LABEL: roundeven_f128: +; CHECK-P8: # %bb.0: +; CHECK-P8-NEXT: mflr r0 +; CHECK-P8-NEXT: stdu r1, -32(r1) +; CHECK-P8-NEXT: std r0, 48(r1) +; CHECK-P8-NEXT: .cfi_def_cfa_offset 32 +; CHECK-P8-NEXT: .cfi_offset lr, 16 +; CHECK-P8-NEXT: bl roundevenl +; CHECK-P8-NEXT: nop +; CHECK-P8-NEXT: addi r1, r1, 32 +; CHECK-P8-NEXT: ld r0, 16(r1) +; CHECK-P8-NEXT: mtlr r0 +; CHECK-P8-NEXT: blr + %result = call fp128 @llvm.roundeven.f128(fp128 %x) + ret fp128 %result +} + +define dso_local fp128 @sinh_f128(fp128 %x) { +; CHECK-LABEL: sinh_f128: +; CHECK: # %bb.0: +; CHECK-NEXT: mflr r0 +; CHECK-NEXT: stdu r1, -32(r1) +; CHECK-NEXT: std r0, 48(r1) +; CHECK-NEXT: .cfi_def_cfa_offset 32 +; CHECK-NEXT: .cfi_offset lr, 16 +; CHECK-NEXT: bl sinhl +; CHECK-NEXT: nop +; CHECK-NEXT: addi r1, r1, 32 +; CHECK-NEXT: ld r0, 16(r1) +; CHECK-NEXT: mtlr r0 +; CHECK-NEXT: blr +; +; CHECK-P8-LABEL: sinh_f128: +; CHECK-P8: # %bb.0: +; CHECK-P8-NEXT: mflr r0 +; CHECK-P8-NEXT: stdu r1, -32(r1) +; CHECK-P8-NEXT: std r0, 48(r1) +; CHECK-P8-NEXT: .cfi_def_cfa_offset 32 +; CHECK-P8-NEXT: .cfi_offset lr, 16 +; CHECK-P8-NEXT: bl sinhl +; CHECK-P8-NEXT: nop +; CHECK-P8-NEXT: addi r1, r1, 32 +; CHECK-P8-NEXT: ld r0, 16(r1) +; CHECK-P8-NEXT: mtlr r0 +; CHECK-P8-NEXT: blr + %result = call fp128 @llvm.sinh.f128(fp128 %x) + ret fp128 %result +} + +define dso_local fp128 @tanh_f128(fp128 %x) { +; CHECK-LABEL: tanh_f128: +; CHECK: # %bb.0: +; CHECK-NEXT: mflr r0 +; CHECK-NEXT: stdu r1, -32(r1) +; CHECK-NEXT: std r0, 48(r1) +; CHECK-NEXT: .cfi_def_cfa_offset 32 +; CHECK-NEXT: .cfi_offset lr, 16 +; CHECK-NEXT: bl tanhl +; CHECK-NEXT: nop +; CHECK-NEXT: addi r1, r1, 32 +; CHECK-NEXT: ld r0, 16(r1) +; CHECK-NEXT: mtlr r0 +; CHECK-NEXT: blr +; +; CHECK-P8-LABEL: tanh_f128: +; CHECK-P8: # %bb.0: +; CHECK-P8-NEXT: mflr r0 +; CHECK-P8-NEXT: stdu r1, -32(r1) +; CHECK-P8-NEXT: std r0, 48(r1) +; CHECK-P8-NEXT: .cfi_def_cfa_offset 32 +; CHECK-P8-NEXT: .cfi_offset lr, 16 +; CHECK-P8-NEXT: bl tanhl +; CHECK-P8-NEXT: nop +; CHECK-P8-NEXT: addi r1, r1, 32 +; CHECK-P8-NEXT: ld r0, 16(r1) +; CHECK-P8-NEXT: mtlr r0 +; CHECK-P8-NEXT: blr + %result = call fp128 @llvm.tanh.f128(fp128 %x) + ret fp128 %result +} + +define dso_local fp128 @tan_f128(fp128 %x) { +; CHECK-LABEL: tan_f128: +; CHECK: # %bb.0: +; CHECK-NEXT: mflr r0 +; CHECK-NEXT: stdu r1, -32(r1) +; CHECK-NEXT: std r0, 48(r1) +; CHECK-NEXT: .cfi_def_cfa_offset 32 +; CHECK-NEXT: .cfi_offset lr, 16 +; CHECK-NEXT: bl tanl +; CHECK-NEXT: nop +; CHECK-NEXT: addi r1, r1, 32 +; CHECK-NEXT: ld r0, 16(r1) +; CHECK-NEXT: mtlr r0 +; CHECK-NEXT: blr +; +; CHECK-P8-LABEL: tan_f128: +; CHECK-P8: # %bb.0: +; CHECK-P8-NEXT: mflr r0 +; CHECK-P8-NEXT: stdu r1, -32(r1) +; CHECK-P8-NEXT: std r0, 48(r1) +; CHECK-P8-NEXT: .cfi_def_cfa_offset 32 +; CHECK-P8-NEXT: .cfi_offset lr, 16 +; CHECK-P8-NEXT: bl tanl +; CHECK-P8-NEXT: nop +; CHECK-P8-NEXT: addi r1, r1, 32 +; CHECK-P8-NEXT: ld r0, 16(r1) +; CHECK-P8-NEXT: mtlr r0 +; CHECK-P8-NEXT: blr + %result = call fp128 @llvm.tan.f128(fp128 %x) + ret fp128 %result +} -- cgit v1.2.3 From 7b9d10d2e6410029fd0750b2e0566432dbf03dc7 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Wed, 18 Jun 2025 13:26:15 +0900 Subject: PowerPC: Fix using long double libm functions for f128 intrinsics (#144382) This wasn't setting the correct libcall names, which default to the l suffixed libm names. --- llvm/lib/IR/RuntimeLibcalls.cpp | 143 +++++++++++++++----------------- llvm/test/CodeGen/PowerPC/f128-arith.ll | 48 +++++------ 2 files changed, 91 insertions(+), 100 deletions(-) diff --git a/llvm/lib/IR/RuntimeLibcalls.cpp b/llvm/lib/IR/RuntimeLibcalls.cpp index 7396626a03d4..a57b08919346 100644 --- a/llvm/lib/IR/RuntimeLibcalls.cpp +++ b/llvm/lib/IR/RuntimeLibcalls.cpp @@ -281,6 +281,69 @@ void RuntimeLibcallsInfo::initSoftFloatCmpLibcallPredicates() { SoftFloatCompareLibcallPredicates[RTLIB::UO_PPCF128] = CmpInst::ICMP_NE; } +static void setLongDoubleIsF128Libm(RuntimeLibcallsInfo &Info, + bool FiniteOnlyFuncs = false) { + Info.setLibcallName(RTLIB::REM_F128, "fmodf128"); + Info.setLibcallName(RTLIB::FMA_F128, "fmaf128"); + Info.setLibcallName(RTLIB::SQRT_F128, "sqrtf128"); + Info.setLibcallName(RTLIB::CBRT_F128, "cbrtf128"); + Info.setLibcallName(RTLIB::LOG_F128, "logf128"); + Info.setLibcallName(RTLIB::LOG2_F128, "log2f128"); + Info.setLibcallName(RTLIB::LOG10_F128, "log10f128"); + Info.setLibcallName(RTLIB::EXP_F128, "expf128"); + Info.setLibcallName(RTLIB::EXP2_F128, "exp2f128"); + Info.setLibcallName(RTLIB::EXP10_F128, "exp10f128"); + Info.setLibcallName(RTLIB::SIN_F128, "sinf128"); + Info.setLibcallName(RTLIB::COS_F128, "cosf128"); + Info.setLibcallName(RTLIB::TAN_F128, "tanf128"); + Info.setLibcallName(RTLIB::SINCOS_F128, "sincosf128"); + Info.setLibcallName(RTLIB::ASIN_F128, "asinf128"); + Info.setLibcallName(RTLIB::ACOS_F128, "acosf128"); + Info.setLibcallName(RTLIB::ATAN_F128, "atanf128"); + Info.setLibcallName(RTLIB::ATAN2_F128, "atan2f128"); + Info.setLibcallName(RTLIB::SINH_F128, "sinhf128"); + Info.setLibcallName(RTLIB::COSH_F128, "coshf128"); + Info.setLibcallName(RTLIB::TANH_F128, "tanhf128"); + Info.setLibcallName(RTLIB::POW_F128, "powf128"); + Info.setLibcallName(RTLIB::CEIL_F128, "ceilf128"); + Info.setLibcallName(RTLIB::TRUNC_F128, "truncf128"); + Info.setLibcallName(RTLIB::RINT_F128, "rintf128"); + Info.setLibcallName(RTLIB::NEARBYINT_F128, "nearbyintf128"); + Info.setLibcallName(RTLIB::ROUND_F128, "roundf128"); + Info.setLibcallName(RTLIB::ROUNDEVEN_F128, "roundevenf128"); + Info.setLibcallName(RTLIB::FLOOR_F128, "floorf128"); + Info.setLibcallName(RTLIB::COPYSIGN_F128, "copysignf128"); + Info.setLibcallName(RTLIB::FMIN_F128, "fminf128"); + Info.setLibcallName(RTLIB::FMAX_F128, "fmaxf128"); + Info.setLibcallName(RTLIB::FMINIMUM_F128, "fminimumf128"); + Info.setLibcallName(RTLIB::FMAXIMUM_F128, "fmaximumf128"); + Info.setLibcallName(RTLIB::FMINIMUM_NUM_F128, "fminimum_numf128"); + Info.setLibcallName(RTLIB::FMAXIMUM_NUM_F128, "fmaximum_numf128"); + Info.setLibcallName(RTLIB::LROUND_F128, "lroundf128"); + Info.setLibcallName(RTLIB::LLROUND_F128, "llroundf128"); + Info.setLibcallName(RTLIB::LRINT_F128, "lrintf128"); + Info.setLibcallName(RTLIB::LLRINT_F128, "llrintf128"); + Info.setLibcallName(RTLIB::LDEXP_F128, "ldexpf128"); + Info.setLibcallName(RTLIB::FREXP_F128, "frexpf128"); + Info.setLibcallName(RTLIB::MODF_F128, "modff128"); + + if (FiniteOnlyFuncs) { + Info.setLibcallName(RTLIB::LOG_FINITE_F128, "__logf128_finite"); + Info.setLibcallName(RTLIB::LOG2_FINITE_F128, "__log2f128_finite"); + Info.setLibcallName(RTLIB::LOG10_FINITE_F128, "__log10f128_finite"); + Info.setLibcallName(RTLIB::EXP_FINITE_F128, "__expf128_finite"); + Info.setLibcallName(RTLIB::EXP2_FINITE_F128, "__exp2f128_finite"); + Info.setLibcallName(RTLIB::POW_FINITE_F128, "__powf128_finite"); + } else { + Info.setLibcallName(RTLIB::LOG_FINITE_F128, nullptr); + Info.setLibcallName(RTLIB::LOG2_FINITE_F128, nullptr); + Info.setLibcallName(RTLIB::LOG10_FINITE_F128, nullptr); + Info.setLibcallName(RTLIB::EXP_FINITE_F128, nullptr); + Info.setLibcallName(RTLIB::EXP2_FINITE_F128, nullptr); + Info.setLibcallName(RTLIB::POW_FINITE_F128, nullptr); + } +} + /// Set default libcall names. If a target wants to opt-out of a libcall it /// should be placed here. void RuntimeLibcallsInfo::initLibcalls(const Triple &TT) { @@ -295,57 +358,8 @@ void RuntimeLibcallsInfo::initLibcalls(const Triple &TT) { #undef LIBCALL_NO_NAME // Use the f128 variants of math functions on x86 - if (TT.isX86() && TT.isGNUEnvironment()) { - setLibcallName(RTLIB::REM_F128, "fmodf128"); - setLibcallName(RTLIB::FMA_F128, "fmaf128"); - setLibcallName(RTLIB::SQRT_F128, "sqrtf128"); - setLibcallName(RTLIB::CBRT_F128, "cbrtf128"); - setLibcallName(RTLIB::LOG_F128, "logf128"); - setLibcallName(RTLIB::LOG_FINITE_F128, "__logf128_finite"); - setLibcallName(RTLIB::LOG2_F128, "log2f128"); - setLibcallName(RTLIB::LOG2_FINITE_F128, "__log2f128_finite"); - setLibcallName(RTLIB::LOG10_F128, "log10f128"); - setLibcallName(RTLIB::LOG10_FINITE_F128, "__log10f128_finite"); - setLibcallName(RTLIB::EXP_F128, "expf128"); - setLibcallName(RTLIB::EXP_FINITE_F128, "__expf128_finite"); - setLibcallName(RTLIB::EXP2_F128, "exp2f128"); - setLibcallName(RTLIB::EXP2_FINITE_F128, "__exp2f128_finite"); - setLibcallName(RTLIB::EXP10_F128, "exp10f128"); - setLibcallName(RTLIB::SIN_F128, "sinf128"); - setLibcallName(RTLIB::COS_F128, "cosf128"); - setLibcallName(RTLIB::TAN_F128, "tanf128"); - setLibcallName(RTLIB::SINCOS_F128, "sincosf128"); - setLibcallName(RTLIB::ASIN_F128, "asinf128"); - setLibcallName(RTLIB::ACOS_F128, "acosf128"); - setLibcallName(RTLIB::ATAN_F128, "atanf128"); - setLibcallName(RTLIB::ATAN2_F128, "atan2f128"); - setLibcallName(RTLIB::SINH_F128, "sinhf128"); - setLibcallName(RTLIB::COSH_F128, "coshf128"); - setLibcallName(RTLIB::TANH_F128, "tanhf128"); - setLibcallName(RTLIB::POW_F128, "powf128"); - setLibcallName(RTLIB::POW_FINITE_F128, "__powf128_finite"); - setLibcallName(RTLIB::CEIL_F128, "ceilf128"); - setLibcallName(RTLIB::TRUNC_F128, "truncf128"); - setLibcallName(RTLIB::RINT_F128, "rintf128"); - setLibcallName(RTLIB::NEARBYINT_F128, "nearbyintf128"); - setLibcallName(RTLIB::ROUND_F128, "roundf128"); - setLibcallName(RTLIB::ROUNDEVEN_F128, "roundevenf128"); - setLibcallName(RTLIB::FLOOR_F128, "floorf128"); - setLibcallName(RTLIB::COPYSIGN_F128, "copysignf128"); - setLibcallName(RTLIB::FMIN_F128, "fminf128"); - setLibcallName(RTLIB::FMAX_F128, "fmaxf128"); - setLibcallName(RTLIB::FMINIMUM_F128, "fminimumf128"); - setLibcallName(RTLIB::FMAXIMUM_F128, "fmaximumf128"); - setLibcallName(RTLIB::FMINIMUM_NUM_F128, "fminimum_numf128"); - setLibcallName(RTLIB::FMAXIMUM_NUM_F128, "fmaximum_numf128"); - setLibcallName(RTLIB::LROUND_F128, "lroundf128"); - setLibcallName(RTLIB::LLROUND_F128, "llroundf128"); - setLibcallName(RTLIB::LRINT_F128, "lrintf128"); - setLibcallName(RTLIB::LLRINT_F128, "llrintf128"); - setLibcallName(RTLIB::LDEXP_F128, "ldexpf128"); - setLibcallName(RTLIB::FREXP_F128, "frexpf128"); - setLibcallName(RTLIB::MODF_F128, "modff128"); - } + if (TT.isX86() && TT.isGNUEnvironment()) + setLongDoubleIsF128Libm(*this, /*FiniteOnlyFuncs=*/true); // For IEEE quad-precision libcall names, PPC uses "kf" instead of "tf". if (TT.isPPC()) { @@ -379,31 +393,8 @@ void RuntimeLibcallsInfo::initLibcalls(const Triple &TT) { setLibcallName(RTLIB::OGT_F128, "__gtkf2"); setLibcallName(RTLIB::UO_F128, "__unordkf2"); - setLibcallName(RTLIB::LOG_F128, "logf128"); - setLibcallName(RTLIB::LOG2_F128, "log2f128"); - setLibcallName(RTLIB::LOG10_F128, "log10f128"); - setLibcallName(RTLIB::EXP_F128, "expf128"); - setLibcallName(RTLIB::EXP2_F128, "exp2f128"); - setLibcallName(RTLIB::SIN_F128, "sinf128"); - setLibcallName(RTLIB::COS_F128, "cosf128"); - setLibcallName(RTLIB::SINCOS_F128, "sincosf128"); - setLibcallName(RTLIB::POW_F128, "powf128"); - setLibcallName(RTLIB::FMIN_F128, "fminf128"); - setLibcallName(RTLIB::FMAX_F128, "fmaxf128"); - setLibcallName(RTLIB::REM_F128, "fmodf128"); - setLibcallName(RTLIB::SQRT_F128, "sqrtf128"); - setLibcallName(RTLIB::CEIL_F128, "ceilf128"); - setLibcallName(RTLIB::FLOOR_F128, "floorf128"); - setLibcallName(RTLIB::TRUNC_F128, "truncf128"); - setLibcallName(RTLIB::ROUND_F128, "roundf128"); - setLibcallName(RTLIB::LROUND_F128, "lroundf128"); - setLibcallName(RTLIB::LLROUND_F128, "llroundf128"); - setLibcallName(RTLIB::RINT_F128, "rintf128"); - setLibcallName(RTLIB::LRINT_F128, "lrintf128"); - setLibcallName(RTLIB::LLRINT_F128, "llrintf128"); - setLibcallName(RTLIB::NEARBYINT_F128, "nearbyintf128"); - setLibcallName(RTLIB::FMA_F128, "fmaf128"); - setLibcallName(RTLIB::FREXP_F128, "frexpf128"); + // TODO: Do the finite only functions exist? + setLongDoubleIsF128Libm(*this, /*FiniteOnlyFuncs=*/false); if (TT.isOSAIX()) { bool isPPC64 = TT.isPPC64(); diff --git a/llvm/test/CodeGen/PowerPC/f128-arith.ll b/llvm/test/CodeGen/PowerPC/f128-arith.ll index ffa7ac6cb007..f9c953d483ff 100644 --- a/llvm/test/CodeGen/PowerPC/f128-arith.ll +++ b/llvm/test/CodeGen/PowerPC/f128-arith.ll @@ -1413,7 +1413,7 @@ define dso_local fp128 @acos_f128(fp128 %x) { ; CHECK-NEXT: std r0, 48(r1) ; CHECK-NEXT: .cfi_def_cfa_offset 32 ; CHECK-NEXT: .cfi_offset lr, 16 -; CHECK-NEXT: bl acosl +; CHECK-NEXT: bl acosf128 ; CHECK-NEXT: nop ; CHECK-NEXT: addi r1, r1, 32 ; CHECK-NEXT: ld r0, 16(r1) @@ -1427,7 +1427,7 @@ define dso_local fp128 @acos_f128(fp128 %x) { ; CHECK-P8-NEXT: std r0, 48(r1) ; CHECK-P8-NEXT: .cfi_def_cfa_offset 32 ; CHECK-P8-NEXT: .cfi_offset lr, 16 -; CHECK-P8-NEXT: bl acosl +; CHECK-P8-NEXT: bl acosf128 ; CHECK-P8-NEXT: nop ; CHECK-P8-NEXT: addi r1, r1, 32 ; CHECK-P8-NEXT: ld r0, 16(r1) @@ -1445,7 +1445,7 @@ define dso_local fp128 @asin_f128(fp128 %x) { ; CHECK-NEXT: std r0, 48(r1) ; CHECK-NEXT: .cfi_def_cfa_offset 32 ; CHECK-NEXT: .cfi_offset lr, 16 -; CHECK-NEXT: bl asinl +; CHECK-NEXT: bl asinf128 ; CHECK-NEXT: nop ; CHECK-NEXT: addi r1, r1, 32 ; CHECK-NEXT: ld r0, 16(r1) @@ -1459,7 +1459,7 @@ define dso_local fp128 @asin_f128(fp128 %x) { ; CHECK-P8-NEXT: std r0, 48(r1) ; CHECK-P8-NEXT: .cfi_def_cfa_offset 32 ; CHECK-P8-NEXT: .cfi_offset lr, 16 -; CHECK-P8-NEXT: bl asinl +; CHECK-P8-NEXT: bl asinf128 ; CHECK-P8-NEXT: nop ; CHECK-P8-NEXT: addi r1, r1, 32 ; CHECK-P8-NEXT: ld r0, 16(r1) @@ -1477,7 +1477,7 @@ define dso_local fp128 @atan_f128(fp128 %x) { ; CHECK-NEXT: std r0, 48(r1) ; CHECK-NEXT: .cfi_def_cfa_offset 32 ; CHECK-NEXT: .cfi_offset lr, 16 -; CHECK-NEXT: bl atanl +; CHECK-NEXT: bl atanf128 ; CHECK-NEXT: nop ; CHECK-NEXT: addi r1, r1, 32 ; CHECK-NEXT: ld r0, 16(r1) @@ -1491,7 +1491,7 @@ define dso_local fp128 @atan_f128(fp128 %x) { ; CHECK-P8-NEXT: std r0, 48(r1) ; CHECK-P8-NEXT: .cfi_def_cfa_offset 32 ; CHECK-P8-NEXT: .cfi_offset lr, 16 -; CHECK-P8-NEXT: bl atanl +; CHECK-P8-NEXT: bl atanf128 ; CHECK-P8-NEXT: nop ; CHECK-P8-NEXT: addi r1, r1, 32 ; CHECK-P8-NEXT: ld r0, 16(r1) @@ -1509,7 +1509,7 @@ define dso_local fp128 @atan2_f128(fp128 %x, fp128 %y) { ; CHECK-NEXT: std r0, 48(r1) ; CHECK-NEXT: .cfi_def_cfa_offset 32 ; CHECK-NEXT: .cfi_offset lr, 16 -; CHECK-NEXT: bl atan2l +; CHECK-NEXT: bl atan2f128 ; CHECK-NEXT: nop ; CHECK-NEXT: addi r1, r1, 32 ; CHECK-NEXT: ld r0, 16(r1) @@ -1523,7 +1523,7 @@ define dso_local fp128 @atan2_f128(fp128 %x, fp128 %y) { ; CHECK-P8-NEXT: std r0, 48(r1) ; CHECK-P8-NEXT: .cfi_def_cfa_offset 32 ; CHECK-P8-NEXT: .cfi_offset lr, 16 -; CHECK-P8-NEXT: bl atan2l +; CHECK-P8-NEXT: bl atan2f128 ; CHECK-P8-NEXT: nop ; CHECK-P8-NEXT: addi r1, r1, 32 ; CHECK-P8-NEXT: ld r0, 16(r1) @@ -1566,7 +1566,7 @@ define dso_local fp128 @cosh_f128(fp128 %x) { ; CHECK-NEXT: std r0, 48(r1) ; CHECK-NEXT: .cfi_def_cfa_offset 32 ; CHECK-NEXT: .cfi_offset lr, 16 -; CHECK-NEXT: bl coshl +; CHECK-NEXT: bl coshf128 ; CHECK-NEXT: nop ; CHECK-NEXT: addi r1, r1, 32 ; CHECK-NEXT: ld r0, 16(r1) @@ -1580,7 +1580,7 @@ define dso_local fp128 @cosh_f128(fp128 %x) { ; CHECK-P8-NEXT: std r0, 48(r1) ; CHECK-P8-NEXT: .cfi_def_cfa_offset 32 ; CHECK-P8-NEXT: .cfi_offset lr, 16 -; CHECK-P8-NEXT: bl coshl +; CHECK-P8-NEXT: bl coshf128 ; CHECK-P8-NEXT: nop ; CHECK-P8-NEXT: addi r1, r1, 32 ; CHECK-P8-NEXT: ld r0, 16(r1) @@ -1598,7 +1598,7 @@ define dso_local fp128 @exp10_f128(fp128 %x) { ; CHECK-NEXT: std r0, 48(r1) ; CHECK-NEXT: .cfi_def_cfa_offset 32 ; CHECK-NEXT: .cfi_offset lr, 16 -; CHECK-NEXT: bl exp10l +; CHECK-NEXT: bl exp10f128 ; CHECK-NEXT: nop ; CHECK-NEXT: addi r1, r1, 32 ; CHECK-NEXT: ld r0, 16(r1) @@ -1612,7 +1612,7 @@ define dso_local fp128 @exp10_f128(fp128 %x) { ; CHECK-P8-NEXT: std r0, 48(r1) ; CHECK-P8-NEXT: .cfi_def_cfa_offset 32 ; CHECK-P8-NEXT: .cfi_offset lr, 16 -; CHECK-P8-NEXT: bl exp10l +; CHECK-P8-NEXT: bl exp10f128 ; CHECK-P8-NEXT: nop ; CHECK-P8-NEXT: addi r1, r1, 32 ; CHECK-P8-NEXT: ld r0, 16(r1) @@ -1655,7 +1655,7 @@ define dso_local fp128 @ldexp_f128(fp128 %x, i32 %y) { ; CHECK-NEXT: .cfi_def_cfa_offset 32 ; CHECK-NEXT: .cfi_offset lr, 16 ; CHECK-NEXT: extsw r5, r5 -; CHECK-NEXT: bl ldexpl +; CHECK-NEXT: bl ldexpf128 ; CHECK-NEXT: nop ; CHECK-NEXT: addi r1, r1, 32 ; CHECK-NEXT: ld r0, 16(r1) @@ -1670,7 +1670,7 @@ define dso_local fp128 @ldexp_f128(fp128 %x, i32 %y) { ; CHECK-P8-NEXT: .cfi_def_cfa_offset 32 ; CHECK-P8-NEXT: .cfi_offset lr, 16 ; CHECK-P8-NEXT: extsw r5, r5 -; CHECK-P8-NEXT: bl ldexpl +; CHECK-P8-NEXT: bl ldexpf128 ; CHECK-P8-NEXT: nop ; CHECK-P8-NEXT: addi r1, r1, 32 ; CHECK-P8-NEXT: ld r0, 16(r1) @@ -1689,7 +1689,7 @@ define dso_local { fp128, fp128 } @modf_f128(fp128 %x) { ; CHECK-NEXT: .cfi_def_cfa_offset 48 ; CHECK-NEXT: .cfi_offset lr, 16 ; CHECK-NEXT: addi r5, r1, 32 -; CHECK-NEXT: bl modfl +; CHECK-NEXT: bl modff128 ; CHECK-NEXT: nop ; CHECK-NEXT: lxv v3, 32(r1) ; CHECK-NEXT: addi r1, r1, 48 @@ -1708,7 +1708,7 @@ define dso_local { fp128, fp128 } @modf_f128(fp128 %x) { ; CHECK-P8-NEXT: addi r30, r1, 32 ; CHECK-P8-NEXT: std r0, 80(r1) ; CHECK-P8-NEXT: mr r5, r30 -; CHECK-P8-NEXT: bl modfl +; CHECK-P8-NEXT: bl modff128 ; CHECK-P8-NEXT: nop ; CHECK-P8-NEXT: lxvd2x vs0, 0, r30 ; CHECK-P8-NEXT: xxswapd v3, vs0 @@ -1729,7 +1729,7 @@ define dso_local fp128 @roundeven_f128(fp128 %x) { ; CHECK-NEXT: std r0, 48(r1) ; CHECK-NEXT: .cfi_def_cfa_offset 32 ; CHECK-NEXT: .cfi_offset lr, 16 -; CHECK-NEXT: bl roundevenl +; CHECK-NEXT: bl roundevenf128 ; CHECK-NEXT: nop ; CHECK-NEXT: addi r1, r1, 32 ; CHECK-NEXT: ld r0, 16(r1) @@ -1743,7 +1743,7 @@ define dso_local fp128 @roundeven_f128(fp128 %x) { ; CHECK-P8-NEXT: std r0, 48(r1) ; CHECK-P8-NEXT: .cfi_def_cfa_offset 32 ; CHECK-P8-NEXT: .cfi_offset lr, 16 -; CHECK-P8-NEXT: bl roundevenl +; CHECK-P8-NEXT: bl roundevenf128 ; CHECK-P8-NEXT: nop ; CHECK-P8-NEXT: addi r1, r1, 32 ; CHECK-P8-NEXT: ld r0, 16(r1) @@ -1761,7 +1761,7 @@ define dso_local fp128 @sinh_f128(fp128 %x) { ; CHECK-NEXT: std r0, 48(r1) ; CHECK-NEXT: .cfi_def_cfa_offset 32 ; CHECK-NEXT: .cfi_offset lr, 16 -; CHECK-NEXT: bl sinhl +; CHECK-NEXT: bl sinhf128 ; CHECK-NEXT: nop ; CHECK-NEXT: addi r1, r1, 32 ; CHECK-NEXT: ld r0, 16(r1) @@ -1775,7 +1775,7 @@ define dso_local fp128 @sinh_f128(fp128 %x) { ; CHECK-P8-NEXT: std r0, 48(r1) ; CHECK-P8-NEXT: .cfi_def_cfa_offset 32 ; CHECK-P8-NEXT: .cfi_offset lr, 16 -; CHECK-P8-NEXT: bl sinhl +; CHECK-P8-NEXT: bl sinhf128 ; CHECK-P8-NEXT: nop ; CHECK-P8-NEXT: addi r1, r1, 32 ; CHECK-P8-NEXT: ld r0, 16(r1) @@ -1793,7 +1793,7 @@ define dso_local fp128 @tanh_f128(fp128 %x) { ; CHECK-NEXT: std r0, 48(r1) ; CHECK-NEXT: .cfi_def_cfa_offset 32 ; CHECK-NEXT: .cfi_offset lr, 16 -; CHECK-NEXT: bl tanhl +; CHECK-NEXT: bl tanhf128 ; CHECK-NEXT: nop ; CHECK-NEXT: addi r1, r1, 32 ; CHECK-NEXT: ld r0, 16(r1) @@ -1807,7 +1807,7 @@ define dso_local fp128 @tanh_f128(fp128 %x) { ; CHECK-P8-NEXT: std r0, 48(r1) ; CHECK-P8-NEXT: .cfi_def_cfa_offset 32 ; CHECK-P8-NEXT: .cfi_offset lr, 16 -; CHECK-P8-NEXT: bl tanhl +; CHECK-P8-NEXT: bl tanhf128 ; CHECK-P8-NEXT: nop ; CHECK-P8-NEXT: addi r1, r1, 32 ; CHECK-P8-NEXT: ld r0, 16(r1) @@ -1825,7 +1825,7 @@ define dso_local fp128 @tan_f128(fp128 %x) { ; CHECK-NEXT: std r0, 48(r1) ; CHECK-NEXT: .cfi_def_cfa_offset 32 ; CHECK-NEXT: .cfi_offset lr, 16 -; CHECK-NEXT: bl tanl +; CHECK-NEXT: bl tanf128 ; CHECK-NEXT: nop ; CHECK-NEXT: addi r1, r1, 32 ; CHECK-NEXT: ld r0, 16(r1) @@ -1839,7 +1839,7 @@ define dso_local fp128 @tan_f128(fp128 %x) { ; CHECK-P8-NEXT: std r0, 48(r1) ; CHECK-P8-NEXT: .cfi_def_cfa_offset 32 ; CHECK-P8-NEXT: .cfi_offset lr, 16 -; CHECK-P8-NEXT: bl tanl +; CHECK-P8-NEXT: bl tanf128 ; CHECK-P8-NEXT: nop ; CHECK-P8-NEXT: addi r1, r1, 32 ; CHECK-P8-NEXT: ld r0, 16(r1) -- cgit v1.2.3 From ad9e591fd53f2cf91a2744973b59669d873658af Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Tue, 17 Jun 2025 21:33:50 -0700 Subject: [SelectionDAG][RISCV] Fold (add (vscale * C0), (vscale * C1)) to (vscale * (C0 + C1)) in getNode. (#144565) We already have shl/mul vscale related folds in getNode. This is an alternative to the DAGCombine proposed in #144507. --- llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 7 + llvm/test/CodeGen/RISCV/rvv/extract-subvector.ll | 4 - llvm/test/CodeGen/RISCV/rvv/insert-subvector.ll | 32 +- llvm/test/CodeGen/RISCV/rvv/setcc-fp-vp.ll | 283 +++---- llvm/test/CodeGen/RISCV/rvv/setcc-fp.ll | 12 +- llvm/test/CodeGen/RISCV/rvv/setcc-int-vp.ll | 9 +- llvm/test/CodeGen/RISCV/rvv/setcc-integer.ll | 3 +- .../CodeGen/RISCV/rvv/undef-earlyclobber-chain.ll | 2 +- .../CodeGen/RISCV/rvv/vector-deinterleave-fixed.ll | 184 ++--- llvm/test/CodeGen/RISCV/rvv/vector-deinterleave.ll | 18 - .../CodeGen/RISCV/rvv/vector-interleave-store.ll | 9 +- llvm/test/CodeGen/RISCV/rvv/vector-interleave.ll | 817 ++++++++++----------- llvm/test/CodeGen/RISCV/rvv/vfptoi-sdnode.ll | 12 +- .../RISCV/rvv/vp-vector-interleaved-access.ll | 100 ++- 14 files changed, 682 insertions(+), 810 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 45a37622a531..b0e3f534e2aa 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -7377,6 +7377,13 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, if ((Opcode == ISD::ADD || Opcode == ISD::SUB) && VT.getScalarType() == MVT::i1) return getNode(ISD::XOR, DL, VT, N1, N2); + // Fold (add (vscale * C0), (vscale * C1)) to (vscale * (C0 + C1)). + if (Opcode == ISD::ADD && N1.getOpcode() == ISD::VSCALE && + N2.getOpcode() == ISD::VSCALE) { + const APInt &C1 = N1->getConstantOperandAPInt(0); + const APInt &C2 = N2->getConstantOperandAPInt(0); + return getVScale(DL, VT, C1 + C2); + } break; case ISD::MUL: assert(VT.isInteger() && "This operator does not apply to FP types!"); diff --git a/llvm/test/CodeGen/RISCV/rvv/extract-subvector.ll b/llvm/test/CodeGen/RISCV/rvv/extract-subvector.ll index d42c42c7ce03..7c9a283dd54b 100644 --- a/llvm/test/CodeGen/RISCV/rvv/extract-subvector.ll +++ b/llvm/test/CodeGen/RISCV/rvv/extract-subvector.ll @@ -488,8 +488,6 @@ define @extract_nxv6f16_nxv12f16_6( %in) ; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma ; CHECK-NEXT: vslidedown.vx v13, v10, a0 ; CHECK-NEXT: vslidedown.vx v12, v9, a0 -; CHECK-NEXT: add a1, a0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma ; CHECK-NEXT: vslideup.vx v12, v10, a0 ; CHECK-NEXT: vmv2r.v v8, v12 ; CHECK-NEXT: ret @@ -543,8 +541,6 @@ define @extract_nxv6bf16_nxv12bf16_6( @insert_nxv1i8_nxv4i8_3( %vec, @llvm.vector.insert.nxv1i8.nxv4i8( %vec, %subvec, i64 3) @@ -246,8 +245,7 @@ define @insert_nxv16i32_nxv1i32_1( %vec, ; CHECK: # %bb.0: ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: srli a0, a0, 3 -; CHECK-NEXT: add a1, a0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, ma ; CHECK-NEXT: vslideup.vx v8, v16, a0 ; CHECK-NEXT: ret %v = call @llvm.vector.insert.nxv1i32.nxv16i32( %vec, %subvec, i64 1) @@ -282,8 +280,8 @@ define @insert_nxv16i8_nxv1i8_1( %vec, @insert_nxv16i8_nxv1i8_3( %vec, @llvm.vector.insert.nxv1i8.nxv16i8( %vec, %subvec, i64 3) ret %v @@ -363,8 +361,7 @@ define @insert_nxv32f16_nxv2f16_2( %vec ; CHECK: # %bb.0: ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: srli a0, a0, 2 -; CHECK-NEXT: add a1, a0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma ; CHECK-NEXT: vslideup.vx v8, v16, a0 ; CHECK-NEXT: ret %v = call @llvm.vector.insert.nxv2f16.nxv32f16( %vec, %subvec, i64 2) @@ -376,8 +373,7 @@ define @insert_nxv32f16_nxv2f16_26( %ve ; CHECK: # %bb.0: ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: srli a0, a0, 2 -; CHECK-NEXT: add a1, a0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma ; CHECK-NEXT: vslideup.vx v14, v16, a0 ; CHECK-NEXT: ret %v = call @llvm.vector.insert.nxv2f16.nxv32f16( %vec, %subvec, i64 26) @@ -422,8 +418,8 @@ define @insert_nxv32i1_nxv8i1_8( %v, @insert_nxv32bf16_nxv2bf16_2( @llvm.vector.insert.nxv2bf16.nxv32bf16( %vec, %subvec, i64 2) @@ -583,8 +578,7 @@ define @insert_nxv32bf16_nxv2bf16_26( @llvm.vector.insert.nxv2bf16.nxv32bf16( %vec, %subvec, i64 26) diff --git a/llvm/test/CodeGen/RISCV/rvv/setcc-fp-vp.ll b/llvm/test/CodeGen/RISCV/rvv/setcc-fp-vp.ll index 28b27bb75f21..9972df97ad9f 100644 --- a/llvm/test/CodeGen/RISCV/rvv/setcc-fp-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/setcc-fp-vp.ll @@ -1371,6 +1371,8 @@ define @fcmp_oeq_vv_nxv64bf16( %va, @fcmp_oeq_vv_nxv64bf16( %va, @fcmp_oeq_vv_nxv64bf16( %va, @fcmp_oeq_vv_nxv64bf16( %va, @fcmp_oeq_vv_nxv64bf16( %va, @fcmp_oeq_vv_nxv64bf16( %va, @fcmp_oeq_vv_nxv64f16( %va, @fcmp_oeq_vv_nxv64f16( %va, @fcmp_oeq_vv_nxv64f16( %va, @fcmp_oeq_vv_nxv64f16( %va, @fcmp_oeq_vv_nxv64f16( %va, @fcmp_oeq_vv_nxv64f16( %va, @fcmp_oeq_vv_nxv64f16( %va, @fcmp_oeq_vf_nx16f64( %va) { ; RV32-NEXT: vmfeq.vf v24, v16, fa5 ; RV32-NEXT: vmfeq.vf v0, v8, fa5 ; RV32-NEXT: srli a0, a0, 3 -; RV32-NEXT: add a1, a0, a0 -; RV32-NEXT: vsetvli zero, a1, e8, mf4, ta, ma +; RV32-NEXT: vsetvli a1, zero, e8, mf4, ta, ma ; RV32-NEXT: vslideup.vx v0, v24, a0 ; RV32-NEXT: ret ; @@ -4293,8 +4292,7 @@ define @fcmp_oeq_vf_nx16f64( %va) { ; RV64-NEXT: vmfeq.vf v24, v16, fa5 ; RV64-NEXT: vmfeq.vf v0, v8, fa5 ; RV64-NEXT: srli a0, a0, 3 -; RV64-NEXT: add a1, a0, a0 -; RV64-NEXT: vsetvli zero, a1, e8, mf4, ta, ma +; RV64-NEXT: vsetvli a1, zero, e8, mf4, ta, ma ; RV64-NEXT: vslideup.vx v0, v24, a0 ; RV64-NEXT: ret ; @@ -4306,8 +4304,7 @@ define @fcmp_oeq_vf_nx16f64( %va) { ; ZVFHMIN32-NEXT: vmfeq.vf v24, v16, fa5 ; ZVFHMIN32-NEXT: vmfeq.vf v0, v8, fa5 ; ZVFHMIN32-NEXT: srli a0, a0, 3 -; ZVFHMIN32-NEXT: add a1, a0, a0 -; ZVFHMIN32-NEXT: vsetvli zero, a1, e8, mf4, ta, ma +; ZVFHMIN32-NEXT: vsetvli a1, zero, e8, mf4, ta, ma ; ZVFHMIN32-NEXT: vslideup.vx v0, v24, a0 ; ZVFHMIN32-NEXT: ret ; @@ -4319,8 +4316,7 @@ define @fcmp_oeq_vf_nx16f64( %va) { ; ZVFHMIN64-NEXT: vmfeq.vf v24, v16, fa5 ; ZVFHMIN64-NEXT: vmfeq.vf v0, v8, fa5 ; ZVFHMIN64-NEXT: srli a0, a0, 3 -; ZVFHMIN64-NEXT: add a1, a0, a0 -; ZVFHMIN64-NEXT: vsetvli zero, a1, e8, mf4, ta, ma +; ZVFHMIN64-NEXT: vsetvli a1, zero, e8, mf4, ta, ma ; ZVFHMIN64-NEXT: vslideup.vx v0, v24, a0 ; ZVFHMIN64-NEXT: ret %vc = fcmp oeq %va, zeroinitializer diff --git a/llvm/test/CodeGen/RISCV/rvv/setcc-int-vp.ll b/llvm/test/CodeGen/RISCV/rvv/setcc-int-vp.ll index ef560a7631de..13c63d9c80a9 100644 --- a/llvm/test/CodeGen/RISCV/rvv/setcc-int-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/setcc-int-vp.ll @@ -2246,8 +2246,7 @@ define @icmp_eq_vv_nxv32i32( %va, @icmp_eq_vx_nxv32i32( %va, i32 %b, ; CHECK-NEXT: vmv1r.v v0, v24 ; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; CHECK-NEXT: vmseq.vx v16, v8, a0, v0.t -; CHECK-NEXT: add a0, a2, a2 -; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e8, mf2, ta, ma ; CHECK-NEXT: vslideup.vx v16, v25, a2 ; CHECK-NEXT: vmv1r.v v0, v16 ; CHECK-NEXT: ret @@ -2316,8 +2314,7 @@ define @icmp_eq_vx_swap_nxv32i32( %va, i32 ; CHECK-NEXT: vmv1r.v v0, v24 ; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; CHECK-NEXT: vmseq.vx v16, v8, a0, v0.t -; CHECK-NEXT: add a0, a2, a2 -; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e8, mf2, ta, ma ; CHECK-NEXT: vslideup.vx v16, v25, a2 ; CHECK-NEXT: vmv1r.v v0, v16 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/setcc-integer.ll b/llvm/test/CodeGen/RISCV/rvv/setcc-integer.ll index bd3c29b0c6ef..a85b471530cc 100644 --- a/llvm/test/CodeGen/RISCV/rvv/setcc-integer.ll +++ b/llvm/test/CodeGen/RISCV/rvv/setcc-integer.ll @@ -3001,9 +3001,8 @@ define @icmp_eq_vi_nx16i64( %va) { ; CHECK-NEXT: vsetvli a1, zero, e64, m8, ta, ma ; CHECK-NEXT: vmseq.vi v24, v16, 0 ; CHECK-NEXT: srli a0, a0, 3 -; CHECK-NEXT: add a1, a0, a0 ; CHECK-NEXT: vmseq.vi v0, v8, 0 -; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e8, mf4, ta, ma ; CHECK-NEXT: vslideup.vx v0, v24, a0 ; CHECK-NEXT: ret %vc = icmp eq %va, zeroinitializer diff --git a/llvm/test/CodeGen/RISCV/rvv/undef-earlyclobber-chain.ll b/llvm/test/CodeGen/RISCV/rvv/undef-earlyclobber-chain.ll index c9f9a7973300..790cd56ee952 100644 --- a/llvm/test/CodeGen/RISCV/rvv/undef-earlyclobber-chain.ll +++ b/llvm/test/CodeGen/RISCV/rvv/undef-earlyclobber-chain.ll @@ -48,10 +48,10 @@ define internal void @SubRegLivenessUndefInPhi(i64 %cond) { ; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma ; CHECK-NEXT: vid.v v9 ; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: srli a1, a0, 2 ; CHECK-NEXT: srli a0, a0, 3 ; CHECK-NEXT: vadd.vi v10, v9, 1 ; CHECK-NEXT: vadd.vi v11, v9, 3 -; CHECK-NEXT: add a1, a0, a0 ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma ; CHECK-NEXT: vslideup.vx v8, v9, a0 ; CHECK-NEXT: vslideup.vx v12, v10, a0 diff --git a/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave-fixed.ll b/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave-fixed.ll index baace6d26f14..4753ab915bdf 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave-fixed.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave-fixed.ll @@ -191,8 +191,7 @@ define {<2 x i32>, <2 x i32>, <2 x i32>} @vector_deinterleave3_v2i32_v6i32(<6 x ; CHECK-NEXT: vsetivli zero, 2, e32, m2, ta, ma ; CHECK-NEXT: vslidedown.vi v10, v8, 4 ; CHECK-NEXT: srli a0, a0, 3 -; CHECK-NEXT: add a1, a0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, ma ; CHECK-NEXT: vslideup.vx v8, v12, a0 ; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vmv1r.v v9, v10 @@ -222,8 +221,7 @@ define {<2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>} @vector_deinterleave4_v2i32_ ; CHECK-NEXT: vsetivli zero, 2, e32, m1, ta, ma ; CHECK-NEXT: vslidedown.vi v9, v8, 2 ; CHECK-NEXT: srli a0, a0, 3 -; CHECK-NEXT: add a1, a0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, ma ; CHECK-NEXT: vslideup.vx v12, v10, a0 ; CHECK-NEXT: vslideup.vx v8, v9, a0 ; CHECK-NEXT: addi a0, sp, 16 @@ -254,15 +252,13 @@ define {<2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>} @vector_deinterle ; CHECK-NEXT: vslidedown.vi v14, v8, 2 ; CHECK-NEXT: vsetivli zero, 2, e16, m2, ta, ma ; CHECK-NEXT: vslidedown.vi v10, v8, 8 -; CHECK-NEXT: srli a1, a0, 3 -; CHECK-NEXT: srli a0, a0, 2 -; CHECK-NEXT: add a2, a1, a1 -; CHECK-NEXT: vsetvli zero, a2, e16, mf2, ta, ma -; CHECK-NEXT: vslideup.vx v13, v12, a1 -; CHECK-NEXT: vslideup.vx v8, v14, a1 -; CHECK-NEXT: add a1, a0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma -; CHECK-NEXT: vslideup.vx v8, v13, a0 +; CHECK-NEXT: srli a1, a0, 2 +; CHECK-NEXT: srli a0, a0, 3 +; CHECK-NEXT: vsetvli a2, zero, e16, mf2, ta, ma +; CHECK-NEXT: vslideup.vx v13, v12, a0 +; CHECK-NEXT: vslideup.vx v8, v14, a0 +; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; CHECK-NEXT: vslideup.vx v8, v13, a1 ; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vmv1r.v v9, v10 ; CHECK-NEXT: vs2r.v v8, (a0) @@ -292,16 +288,14 @@ define {<2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>} @vecto ; CHECK-NEXT: vsetivli zero, 2, e16, m2, ta, ma ; CHECK-NEXT: vslidedown.vi v10, v8, 10 ; CHECK-NEXT: vslidedown.vi v12, v8, 8 -; CHECK-NEXT: srli a1, a0, 3 -; CHECK-NEXT: srli a0, a0, 2 -; CHECK-NEXT: add a2, a1, a1 -; CHECK-NEXT: add a3, a0, a0 -; CHECK-NEXT: vsetvli zero, a2, e16, mf2, ta, ma -; CHECK-NEXT: vslideup.vx v15, v14, a1 -; CHECK-NEXT: vslideup.vx v8, v16, a1 -; CHECK-NEXT: vslideup.vx v12, v10, a1 -; CHECK-NEXT: vsetvli zero, a3, e16, m1, ta, ma -; CHECK-NEXT: vslideup.vx v8, v15, a0 +; CHECK-NEXT: srli a1, a0, 2 +; CHECK-NEXT: srli a0, a0, 3 +; CHECK-NEXT: vsetvli a2, zero, e16, mf2, ta, ma +; CHECK-NEXT: vslideup.vx v15, v14, a0 +; CHECK-NEXT: vslideup.vx v8, v16, a0 +; CHECK-NEXT: vslideup.vx v12, v10, a0 +; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; CHECK-NEXT: vslideup.vx v8, v15, a1 ; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vmv1r.v v9, v12 ; CHECK-NEXT: vs2r.v v8, (a0) @@ -330,22 +324,19 @@ define {<2 x i8>, <2 x i8>, <2 x i8>, <2 x i8>, <2 x i8>, <2 x i8>, <2 x i8>} @v ; CHECK-NEXT: vslidedown.vi v12, v8, 2 ; CHECK-NEXT: vslidedown.vi v13, v8, 4 ; CHECK-NEXT: vslidedown.vi v14, v8, 6 -; CHECK-NEXT: srli a1, a0, 3 -; CHECK-NEXT: srli a2, a0, 2 +; CHECK-NEXT: srli a1, a0, 2 +; CHECK-NEXT: srli a2, a0, 3 ; CHECK-NEXT: srli a0, a0, 1 -; CHECK-NEXT: add a3, a1, a1 -; CHECK-NEXT: add a4, a2, a1 +; CHECK-NEXT: vsetvli zero, a1, e8, mf2, tu, ma +; CHECK-NEXT: vslideup.vx v10, v9, a2 +; CHECK-NEXT: add a3, a1, a2 +; CHECK-NEXT: vslideup.vx v8, v12, a2 ; CHECK-NEXT: vsetvli zero, a3, e8, mf2, tu, ma -; CHECK-NEXT: vslideup.vx v10, v9, a1 -; CHECK-NEXT: vslideup.vx v8, v12, a1 -; CHECK-NEXT: add a3, a0, a0 -; CHECK-NEXT: add a1, a4, a1 -; CHECK-NEXT: vsetvli zero, a4, e8, mf2, tu, ma -; CHECK-NEXT: vslideup.vx v10, v11, a2 -; CHECK-NEXT: vslideup.vx v8, v13, a2 -; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma -; CHECK-NEXT: vslideup.vx v8, v14, a4 -; CHECK-NEXT: vsetvli zero, a3, e8, m1, ta, ma +; CHECK-NEXT: vslideup.vx v10, v11, a1 +; CHECK-NEXT: vslideup.vx v8, v13, a1 +; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, ma +; CHECK-NEXT: vslideup.vx v8, v14, a3 +; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma ; CHECK-NEXT: vslideup.vx v8, v10, a0 ; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vs1r.v v8, (a0) @@ -374,23 +365,20 @@ define {<2 x i8>, <2 x i8>, <2 x i8>, <2 x i8>, <2 x i8>, <2 x i8>, <2 x i8>, <2 ; CHECK-NEXT: vslidedown.vi v13, v8, 2 ; CHECK-NEXT: vslidedown.vi v14, v8, 4 ; CHECK-NEXT: vslidedown.vi v15, v8, 6 -; CHECK-NEXT: srli a1, a0, 3 -; CHECK-NEXT: srli a2, a0, 2 +; CHECK-NEXT: srli a1, a0, 2 +; CHECK-NEXT: srli a2, a0, 3 ; CHECK-NEXT: srli a0, a0, 1 -; CHECK-NEXT: add a3, a1, a1 -; CHECK-NEXT: add a4, a2, a1 -; CHECK-NEXT: add a5, a0, a0 +; CHECK-NEXT: vsetvli zero, a1, e8, mf2, tu, ma +; CHECK-NEXT: vslideup.vx v10, v9, a2 +; CHECK-NEXT: add a3, a1, a2 +; CHECK-NEXT: vslideup.vx v8, v13, a2 ; CHECK-NEXT: vsetvli zero, a3, e8, mf2, tu, ma -; CHECK-NEXT: vslideup.vx v10, v9, a1 -; CHECK-NEXT: vslideup.vx v8, v13, a1 -; CHECK-NEXT: add a1, a4, a1 -; CHECK-NEXT: vsetvli zero, a4, e8, mf2, tu, ma -; CHECK-NEXT: vslideup.vx v10, v11, a2 -; CHECK-NEXT: vslideup.vx v8, v14, a2 -; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma -; CHECK-NEXT: vslideup.vx v10, v12, a4 -; CHECK-NEXT: vslideup.vx v8, v15, a4 -; CHECK-NEXT: vsetvli zero, a5, e8, m1, ta, ma +; CHECK-NEXT: vslideup.vx v10, v11, a1 +; CHECK-NEXT: vslideup.vx v8, v14, a1 +; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, ma +; CHECK-NEXT: vslideup.vx v10, v12, a3 +; CHECK-NEXT: vslideup.vx v8, v15, a3 +; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma ; CHECK-NEXT: vslideup.vx v8, v10, a0 ; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vs1r.v v8, (a0) @@ -551,8 +539,7 @@ define {<2 x float>, <2 x float>, <2 x float>} @vector_deinterleave3_v6f32_v2f32 ; CHECK-NEXT: vsetivli zero, 2, e32, m2, ta, ma ; CHECK-NEXT: vslidedown.vi v10, v8, 4 ; CHECK-NEXT: srli a0, a0, 3 -; CHECK-NEXT: add a1, a0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, ma ; CHECK-NEXT: vslideup.vx v8, v12, a0 ; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vmv1r.v v9, v10 @@ -586,8 +573,7 @@ define {<2 x float>, <2 x float>, <2 x float>, <2 x float>} @vector_deinterleave ; CHECK-NEXT: vsetivli zero, 2, e32, m1, ta, ma ; CHECK-NEXT: vslidedown.vi v9, v8, 2 ; CHECK-NEXT: srli a0, a0, 3 -; CHECK-NEXT: add a1, a0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, ma ; CHECK-NEXT: vslideup.vx v12, v10, a0 ; CHECK-NEXT: vslideup.vx v8, v9, a0 ; CHECK-NEXT: addi a0, sp, 16 @@ -622,15 +608,13 @@ define {<2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>} @vector_dein ; CHECK-NEXT: vslidedown.vi v14, v8, 2 ; CHECK-NEXT: vsetivli zero, 2, e16, m2, ta, ma ; CHECK-NEXT: vslidedown.vi v10, v8, 8 -; CHECK-NEXT: srli a1, a0, 3 -; CHECK-NEXT: srli a0, a0, 2 -; CHECK-NEXT: add a2, a1, a1 -; CHECK-NEXT: vsetvli zero, a2, e16, mf2, ta, ma -; CHECK-NEXT: vslideup.vx v13, v12, a1 -; CHECK-NEXT: vslideup.vx v8, v14, a1 -; CHECK-NEXT: add a1, a0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma -; CHECK-NEXT: vslideup.vx v8, v13, a0 +; CHECK-NEXT: srli a1, a0, 2 +; CHECK-NEXT: srli a0, a0, 3 +; CHECK-NEXT: vsetvli a2, zero, e16, mf2, ta, ma +; CHECK-NEXT: vslideup.vx v13, v12, a0 +; CHECK-NEXT: vslideup.vx v8, v14, a0 +; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; CHECK-NEXT: vslideup.vx v8, v13, a1 ; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vmv1r.v v9, v10 ; CHECK-NEXT: vs2r.v v8, (a0) @@ -664,16 +648,14 @@ define {<2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>} ; CHECK-NEXT: vsetivli zero, 2, e16, m2, ta, ma ; CHECK-NEXT: vslidedown.vi v10, v8, 10 ; CHECK-NEXT: vslidedown.vi v12, v8, 8 -; CHECK-NEXT: srli a1, a0, 3 -; CHECK-NEXT: srli a0, a0, 2 -; CHECK-NEXT: add a2, a1, a1 -; CHECK-NEXT: add a3, a0, a0 -; CHECK-NEXT: vsetvli zero, a2, e16, mf2, ta, ma -; CHECK-NEXT: vslideup.vx v15, v14, a1 -; CHECK-NEXT: vslideup.vx v8, v16, a1 -; CHECK-NEXT: vslideup.vx v12, v10, a1 -; CHECK-NEXT: vsetvli zero, a3, e16, m1, ta, ma -; CHECK-NEXT: vslideup.vx v8, v15, a0 +; CHECK-NEXT: srli a1, a0, 2 +; CHECK-NEXT: srli a0, a0, 3 +; CHECK-NEXT: vsetvli a2, zero, e16, mf2, ta, ma +; CHECK-NEXT: vslideup.vx v15, v14, a0 +; CHECK-NEXT: vslideup.vx v8, v16, a0 +; CHECK-NEXT: vslideup.vx v12, v10, a0 +; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; CHECK-NEXT: vslideup.vx v8, v15, a1 ; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vmv1r.v v9, v12 ; CHECK-NEXT: vs2r.v v8, (a0) @@ -707,21 +689,18 @@ define {<1 x half>, <1 x half>, <1 x half>, <1 x half>, <1 x half>, <1 x half>, ; CHECK-NEXT: vmv1r.v v10, v8 ; CHECK-NEXT: vslidedown.vi v13, v8, 5 ; CHECK-NEXT: vslidedown.vi v14, v8, 6 -; CHECK-NEXT: srli a1, a0, 3 -; CHECK-NEXT: srli a0, a0, 2 -; CHECK-NEXT: add a2, a1, a1 -; CHECK-NEXT: add a3, a0, a0 -; CHECK-NEXT: vsetvli zero, a2, e16, mf2, ta, ma -; CHECK-NEXT: vslideup.vx v11, v9, a1 -; CHECK-NEXT: vslideup.vx v10, v12, a1 -; CHECK-NEXT: vsetvli zero, a3, e16, m1, ta, ma -; CHECK-NEXT: vslideup.vx v10, v11, a0 -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; CHECK-NEXT: srli a1, a0, 2 +; CHECK-NEXT: srli a0, a0, 3 +; CHECK-NEXT: vsetvli a2, zero, e16, mf2, ta, ma +; CHECK-NEXT: vslideup.vx v11, v9, a0 +; CHECK-NEXT: vslideup.vx v10, v12, a0 +; CHECK-NEXT: vsetvli a2, zero, e16, m1, ta, ma +; CHECK-NEXT: vslideup.vx v10, v11, a1 ; CHECK-NEXT: vslidedown.vi v11, v8, 4 -; CHECK-NEXT: vsetvli zero, a2, e16, mf2, ta, ma -; CHECK-NEXT: vslideup.vx v11, v13, a1 -; CHECK-NEXT: vsetvli zero, a3, e16, m1, ta, ma -; CHECK-NEXT: vslideup.vx v11, v14, a0 +; CHECK-NEXT: vsetvli a2, zero, e16, mf2, ta, ma +; CHECK-NEXT: vslideup.vx v11, v13, a0 +; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; CHECK-NEXT: vslideup.vx v11, v14, a1 ; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vs2r.v v10, (a0) ; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma @@ -751,25 +730,22 @@ define {<1 x half>, <1 x half>, <1 x half>, <1 x half>, <1 x half>, <1 x half>, ; CHECK-NEXT: vslidedown.vi v10, v8, 7 ; CHECK-NEXT: vslidedown.vi v11, v8, 6 ; CHECK-NEXT: vslidedown.vi v12, v8, 5 -; CHECK-NEXT: srli a1, a0, 3 +; CHECK-NEXT: srli a1, a0, 2 +; CHECK-NEXT: srli a0, a0, 3 ; CHECK-NEXT: vslidedown.vi v9, v8, 4 -; CHECK-NEXT: srli a0, a0, 2 -; CHECK-NEXT: add a2, a1, a1 -; CHECK-NEXT: add a3, a0, a0 -; CHECK-NEXT: vsetvli zero, a2, e16, mf2, ta, ma -; CHECK-NEXT: vslideup.vx v11, v10, a1 -; CHECK-NEXT: vslideup.vx v9, v12, a1 -; CHECK-NEXT: vsetvli zero, a3, e16, m1, ta, ma -; CHECK-NEXT: vslideup.vx v9, v11, a0 -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; CHECK-NEXT: vsetvli a2, zero, e16, mf2, ta, ma +; CHECK-NEXT: vslideup.vx v11, v10, a0 +; CHECK-NEXT: vslideup.vx v9, v12, a0 +; CHECK-NEXT: vsetvli a2, zero, e16, m1, ta, ma +; CHECK-NEXT: vslideup.vx v9, v11, a1 ; CHECK-NEXT: vslidedown.vi v10, v8, 3 ; CHECK-NEXT: vslidedown.vi v11, v8, 2 ; CHECK-NEXT: vslidedown.vi v12, v8, 1 -; CHECK-NEXT: vsetvli zero, a2, e16, mf2, ta, ma -; CHECK-NEXT: vslideup.vx v11, v10, a1 -; CHECK-NEXT: vslideup.vx v8, v12, a1 -; CHECK-NEXT: vsetvli zero, a3, e16, m1, ta, ma -; CHECK-NEXT: vslideup.vx v8, v11, a0 +; CHECK-NEXT: vsetvli a2, zero, e16, mf2, ta, ma +; CHECK-NEXT: vslideup.vx v11, v10, a0 +; CHECK-NEXT: vslideup.vx v8, v12, a0 +; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; CHECK-NEXT: vslideup.vx v8, v11, a1 ; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vs2r.v v8, (a0) ; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma diff --git a/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave.ll b/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave.ll index 75f92c86ff09..6144f916ea52 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave.ll @@ -2705,16 +2705,10 @@ define {, , , , , , , , , %a, @vector_interleave_nxv32i1_nxv16i1( ; V-NEXT: vmv1r.v v0, v8 ; V-NEXT: vmv.v.i v10, 0 ; V-NEXT: li a0, -1 -; V-NEXT: csrr a1, vlenb ; V-NEXT: vmerge.vim v12, v10, 1, v0 ; V-NEXT: vmv1r.v v0, v9 ; V-NEXT: vmerge.vim v14, v10, 1, v0 -; V-NEXT: srli a1, a1, 2 ; V-NEXT: vwaddu.vv v8, v14, v12 ; V-NEXT: vwmaccu.vx v8, a0, v12 +; V-NEXT: csrr a0, vlenb ; V-NEXT: vmsne.vi v12, v10, 0 ; V-NEXT: vmsne.vi v0, v8, 0 -; V-NEXT: add a0, a1, a1 -; V-NEXT: vsetvli zero, a0, e8, mf2, ta, ma -; V-NEXT: vslideup.vx v0, v12, a1 +; V-NEXT: srli a0, a0, 2 +; V-NEXT: vsetvli a1, zero, e8, mf2, ta, ma +; V-NEXT: vslideup.vx v0, v12, a0 ; V-NEXT: ret ; ; ZVBB-LABEL: vector_interleave_nxv32i1_nxv16i1: @@ -38,17 +37,16 @@ define @vector_interleave_nxv32i1_nxv16i1( ; ZVBB-NEXT: vmv1r.v v0, v8 ; ZVBB-NEXT: vmv.v.i v10, 0 ; ZVBB-NEXT: li a0, 1 -; ZVBB-NEXT: csrr a1, vlenb ; ZVBB-NEXT: vmerge.vim v10, v10, 1, v0 -; ZVBB-NEXT: srli a1, a1, 2 ; ZVBB-NEXT: vwsll.vi v12, v10, 8 ; ZVBB-NEXT: vmv1r.v v0, v9 ; ZVBB-NEXT: vwaddu.wx v12, v12, a0, v0.t +; ZVBB-NEXT: csrr a0, vlenb ; ZVBB-NEXT: vmsne.vi v8, v14, 0 ; ZVBB-NEXT: vmsne.vi v0, v12, 0 -; ZVBB-NEXT: add a0, a1, a1 -; ZVBB-NEXT: vsetvli zero, a0, e8, mf2, ta, ma -; ZVBB-NEXT: vslideup.vx v0, v8, a1 +; ZVBB-NEXT: srli a0, a0, 2 +; ZVBB-NEXT: vsetvli a1, zero, e8, mf2, ta, ma +; ZVBB-NEXT: vslideup.vx v0, v8, a0 ; ZVBB-NEXT: ret ; ; ZIP-LABEL: vector_interleave_nxv32i1_nxv16i1: @@ -61,13 +59,12 @@ define @vector_interleave_nxv32i1_nxv16i1( ; ZIP-NEXT: vmerge.vim v12, v10, 1, v0 ; ZIP-NEXT: vmv1r.v v0, v9 ; ZIP-NEXT: vmerge.vim v8, v10, 1, v0 -; ZIP-NEXT: srli a0, a0, 2 ; ZIP-NEXT: ri.vzip2b.vv v10, v8, v12 ; ZIP-NEXT: ri.vzip2a.vv v14, v8, v12 ; ZIP-NEXT: vmsne.vi v8, v10, 0 ; ZIP-NEXT: vmsne.vi v0, v14, 0 -; ZIP-NEXT: add a1, a0, a0 -; ZIP-NEXT: vsetvli zero, a1, e8, mf2, ta, ma +; ZIP-NEXT: srli a0, a0, 2 +; ZIP-NEXT: vsetvli a1, zero, e8, mf2, ta, ma ; ZIP-NEXT: vslideup.vx v0, v8, a0 ; ZIP-NEXT: ret %res = call @llvm.vector.interleave2.nxv32i1( %a, %b) @@ -508,19 +505,17 @@ define @vector_interleave_nxv48i1_nxv16i1( ; CHECK-NEXT: add a2, a3, a2 ; CHECK-NEXT: vsseg3e8.v v14, (a0) ; CHECK-NEXT: vl2r.v v8, (a2) -; CHECK-NEXT: srli a2, a1, 2 -; CHECK-NEXT: srli a1, a1, 1 +; CHECK-NEXT: srli a2, a1, 1 ; CHECK-NEXT: vl2r.v v10, (a3) ; CHECK-NEXT: vl2r.v v12, (a0) -; CHECK-NEXT: add a0, a2, a2 +; CHECK-NEXT: srli a1, a1, 2 ; CHECK-NEXT: vmsne.vi v14, v8, 0 ; CHECK-NEXT: vmsne.vi v8, v10, 0 ; CHECK-NEXT: vmsne.vi v0, v12, 0 -; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma -; CHECK-NEXT: vslideup.vx v0, v8, a2 -; CHECK-NEXT: add a0, a1, a1 -; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma -; CHECK-NEXT: vslideup.vx v0, v14, a1 +; CHECK-NEXT: vsetvli a0, zero, e8, mf2, ta, ma +; CHECK-NEXT: vslideup.vx v0, v8, a1 +; CHECK-NEXT: vsetvli a0, zero, e8, m1, ta, ma +; CHECK-NEXT: vslideup.vx v0, v14, a2 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: li a1, 6 ; CHECK-NEXT: mul a0, a0, a1 @@ -551,19 +546,17 @@ define @vector_interleave_nxv48i1_nxv16i1( ; ZVBB-NEXT: add a2, a3, a2 ; ZVBB-NEXT: vsseg3e8.v v14, (a0) ; ZVBB-NEXT: vl2r.v v8, (a2) -; ZVBB-NEXT: srli a2, a1, 2 -; ZVBB-NEXT: srli a1, a1, 1 +; ZVBB-NEXT: srli a2, a1, 1 ; ZVBB-NEXT: vl2r.v v10, (a3) ; ZVBB-NEXT: vl2r.v v12, (a0) -; ZVBB-NEXT: add a0, a2, a2 +; ZVBB-NEXT: srli a1, a1, 2 ; ZVBB-NEXT: vmsne.vi v14, v8, 0 ; ZVBB-NEXT: vmsne.vi v8, v10, 0 ; ZVBB-NEXT: vmsne.vi v0, v12, 0 -; ZVBB-NEXT: vsetvli zero, a0, e8, mf2, ta, ma -; ZVBB-NEXT: vslideup.vx v0, v8, a2 -; ZVBB-NEXT: add a0, a1, a1 -; ZVBB-NEXT: vsetvli zero, a0, e8, m1, ta, ma -; ZVBB-NEXT: vslideup.vx v0, v14, a1 +; ZVBB-NEXT: vsetvli a0, zero, e8, mf2, ta, ma +; ZVBB-NEXT: vslideup.vx v0, v8, a1 +; ZVBB-NEXT: vsetvli a0, zero, e8, m1, ta, ma +; ZVBB-NEXT: vslideup.vx v0, v14, a2 ; ZVBB-NEXT: csrr a0, vlenb ; ZVBB-NEXT: li a1, 6 ; ZVBB-NEXT: mul a0, a0, a1 @@ -812,22 +805,20 @@ define @vector_interleave_nxv64i1_nxv16i1( ; CHECK-NEXT: add a2, a4, a2 ; CHECK-NEXT: vsseg4e8.v v14, (a0) ; CHECK-NEXT: vl2r.v v8, (a2) -; CHECK-NEXT: srli a2, a1, 2 -; CHECK-NEXT: srli a1, a1, 1 +; CHECK-NEXT: srli a2, a1, 1 +; CHECK-NEXT: srli a1, a1, 2 ; CHECK-NEXT: vl2r.v v10, (a4) -; CHECK-NEXT: add a4, a2, a2 ; CHECK-NEXT: vl2r.v v12, (a3) ; CHECK-NEXT: vl2r.v v14, (a0) ; CHECK-NEXT: vmsne.vi v16, v8, 0 ; CHECK-NEXT: vmsne.vi v8, v10, 0 ; CHECK-NEXT: vmsne.vi v9, v12, 0 ; CHECK-NEXT: vmsne.vi v0, v14, 0 -; CHECK-NEXT: vsetvli zero, a4, e8, mf2, ta, ma -; CHECK-NEXT: vslideup.vx v8, v16, a2 -; CHECK-NEXT: vslideup.vx v0, v9, a2 -; CHECK-NEXT: add a0, a1, a1 -; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma -; CHECK-NEXT: vslideup.vx v0, v8, a1 +; CHECK-NEXT: vsetvli a0, zero, e8, mf2, ta, ma +; CHECK-NEXT: vslideup.vx v8, v16, a1 +; CHECK-NEXT: vslideup.vx v0, v9, a1 +; CHECK-NEXT: vsetvli a0, zero, e8, m1, ta, ma +; CHECK-NEXT: vslideup.vx v0, v8, a2 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add sp, sp, a0 @@ -859,22 +850,20 @@ define @vector_interleave_nxv64i1_nxv16i1( ; ZVBB-NEXT: add a2, a4, a2 ; ZVBB-NEXT: vsseg4e8.v v14, (a0) ; ZVBB-NEXT: vl2r.v v8, (a2) -; ZVBB-NEXT: srli a2, a1, 2 -; ZVBB-NEXT: srli a1, a1, 1 +; ZVBB-NEXT: srli a2, a1, 1 +; ZVBB-NEXT: srli a1, a1, 2 ; ZVBB-NEXT: vl2r.v v10, (a4) -; ZVBB-NEXT: add a4, a2, a2 ; ZVBB-NEXT: vl2r.v v12, (a3) ; ZVBB-NEXT: vl2r.v v14, (a0) ; ZVBB-NEXT: vmsne.vi v16, v8, 0 ; ZVBB-NEXT: vmsne.vi v8, v10, 0 ; ZVBB-NEXT: vmsne.vi v9, v12, 0 ; ZVBB-NEXT: vmsne.vi v0, v14, 0 -; ZVBB-NEXT: vsetvli zero, a4, e8, mf2, ta, ma -; ZVBB-NEXT: vslideup.vx v8, v16, a2 -; ZVBB-NEXT: vslideup.vx v0, v9, a2 -; ZVBB-NEXT: add a0, a1, a1 -; ZVBB-NEXT: vsetvli zero, a0, e8, m1, ta, ma -; ZVBB-NEXT: vslideup.vx v0, v8, a1 +; ZVBB-NEXT: vsetvli a0, zero, e8, mf2, ta, ma +; ZVBB-NEXT: vslideup.vx v8, v16, a1 +; ZVBB-NEXT: vslideup.vx v0, v9, a1 +; ZVBB-NEXT: vsetvli a0, zero, e8, m1, ta, ma +; ZVBB-NEXT: vslideup.vx v0, v8, a2 ; ZVBB-NEXT: csrr a0, vlenb ; ZVBB-NEXT: slli a0, a0, 3 ; ZVBB-NEXT: add sp, sp, a0 @@ -1114,7 +1103,7 @@ define @vector_interleave_nxv80i1_nxv16i1( ; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vmerge.vim v18, v12, 1, v0 ; CHECK-NEXT: add a2, a4, a1 -; CHECK-NEXT: srli a3, a1, 2 +; CHECK-NEXT: srli a3, a1, 1 ; CHECK-NEXT: vmv2r.v v20, v14 ; CHECK-NEXT: vmv1r.v v0, v9 ; CHECK-NEXT: vmerge.vim v16, v12, 1, v0 @@ -1144,11 +1133,9 @@ define @vector_interleave_nxv80i1_nxv16i1( ; CHECK-NEXT: add a5, a4, a1 ; CHECK-NEXT: vl1r.v v16, (a5) ; CHECK-NEXT: add a5, a5, a1 -; CHECK-NEXT: srli a1, a1, 1 +; CHECK-NEXT: srli a1, a1, 2 ; CHECK-NEXT: vl1r.v v11, (a2) -; CHECK-NEXT: add a2, a3, a3 ; CHECK-NEXT: vl1r.v v15, (a4) -; CHECK-NEXT: add a4, a1, a1 ; CHECK-NEXT: vl1r.v v13, (a0) ; CHECK-NEXT: vl1r.v v17, (a5) ; CHECK-NEXT: vsetvli a0, zero, e8, m2, ta, ma @@ -1156,11 +1143,11 @@ define @vector_interleave_nxv80i1_nxv16i1( ; CHECK-NEXT: vmsne.vi v0, v10, 0 ; CHECK-NEXT: vmsne.vi v8, v14, 0 ; CHECK-NEXT: vmsne.vi v9, v12, 0 -; CHECK-NEXT: vsetvli zero, a2, e8, mf2, ta, ma -; CHECK-NEXT: vslideup.vx v0, v18, a3 -; CHECK-NEXT: vslideup.vx v9, v8, a3 -; CHECK-NEXT: vsetvli zero, a4, e8, m1, ta, ma -; CHECK-NEXT: vslideup.vx v0, v9, a1 +; CHECK-NEXT: vsetvli a0, zero, e8, mf2, ta, ma +; CHECK-NEXT: vslideup.vx v0, v18, a1 +; CHECK-NEXT: vslideup.vx v9, v8, a1 +; CHECK-NEXT: vsetvli a0, zero, e8, m1, ta, ma +; CHECK-NEXT: vslideup.vx v0, v9, a3 ; CHECK-NEXT: vsetvli a0, zero, e8, m2, ta, ma ; CHECK-NEXT: vmsne.vi v8, v16, 0 ; CHECK-NEXT: csrr a0, vlenb @@ -1190,7 +1177,7 @@ define @vector_interleave_nxv80i1_nxv16i1( ; ZVBB-NEXT: vmv1r.v v0, v8 ; ZVBB-NEXT: vmerge.vim v18, v12, 1, v0 ; ZVBB-NEXT: add a2, a4, a1 -; ZVBB-NEXT: srli a3, a1, 2 +; ZVBB-NEXT: srli a3, a1, 1 ; ZVBB-NEXT: vmv2r.v v20, v14 ; ZVBB-NEXT: vmv1r.v v0, v9 ; ZVBB-NEXT: vmerge.vim v16, v12, 1, v0 @@ -1220,11 +1207,9 @@ define @vector_interleave_nxv80i1_nxv16i1( ; ZVBB-NEXT: add a5, a4, a1 ; ZVBB-NEXT: vl1r.v v16, (a5) ; ZVBB-NEXT: add a5, a5, a1 -; ZVBB-NEXT: srli a1, a1, 1 +; ZVBB-NEXT: srli a1, a1, 2 ; ZVBB-NEXT: vl1r.v v11, (a2) -; ZVBB-NEXT: add a2, a3, a3 ; ZVBB-NEXT: vl1r.v v15, (a4) -; ZVBB-NEXT: add a4, a1, a1 ; ZVBB-NEXT: vl1r.v v13, (a0) ; ZVBB-NEXT: vl1r.v v17, (a5) ; ZVBB-NEXT: vsetvli a0, zero, e8, m2, ta, ma @@ -1232,11 +1217,11 @@ define @vector_interleave_nxv80i1_nxv16i1( ; ZVBB-NEXT: vmsne.vi v0, v10, 0 ; ZVBB-NEXT: vmsne.vi v8, v14, 0 ; ZVBB-NEXT: vmsne.vi v9, v12, 0 -; ZVBB-NEXT: vsetvli zero, a2, e8, mf2, ta, ma -; ZVBB-NEXT: vslideup.vx v0, v18, a3 -; ZVBB-NEXT: vslideup.vx v9, v8, a3 -; ZVBB-NEXT: vsetvli zero, a4, e8, m1, ta, ma -; ZVBB-NEXT: vslideup.vx v0, v9, a1 +; ZVBB-NEXT: vsetvli a0, zero, e8, mf2, ta, ma +; ZVBB-NEXT: vslideup.vx v0, v18, a1 +; ZVBB-NEXT: vslideup.vx v9, v8, a1 +; ZVBB-NEXT: vsetvli a0, zero, e8, m1, ta, ma +; ZVBB-NEXT: vslideup.vx v0, v9, a3 ; ZVBB-NEXT: vsetvli a0, zero, e8, m2, ta, ma ; ZVBB-NEXT: vmsne.vi v8, v16, 0 ; ZVBB-NEXT: csrr a0, vlenb @@ -2340,47 +2325,45 @@ define @vector_interleave_nxv96i1_nxv16i1( ; CHECK-NEXT: vmv1r.v v17, v9 ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: vmerge.vim v24, v20, 1, v0 -; CHECK-NEXT: addi a5, sp, 16 +; CHECK-NEXT: addi a4, sp, 16 ; CHECK-NEXT: vmv1r.v v18, v25 ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: vmerge.vim v26, v20, 1, v0 -; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: vmv1r.v v19, v27 ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: vmerge.vim v10, v20, 1, v0 -; CHECK-NEXT: add a3, a0, a2 +; CHECK-NEXT: add a2, a0, a1 ; CHECK-NEXT: vmv1r.v v20, v11 -; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma +; CHECK-NEXT: vsetvli a3, zero, e8, m1, ta, ma ; CHECK-NEXT: vsseg6e8.v v15, (a0) ; CHECK-NEXT: vmv1r.v v15, v22 -; CHECK-NEXT: add a4, a5, a2 +; CHECK-NEXT: add a5, a4, a1 ; CHECK-NEXT: vmv1r.v v16, v8 -; CHECK-NEXT: srli a1, a2, 2 +; CHECK-NEXT: srli a3, a1, 1 ; CHECK-NEXT: vmv1r.v v17, v24 -; CHECK-NEXT: add a6, a4, a2 +; CHECK-NEXT: add a6, a5, a1 ; CHECK-NEXT: vmv1r.v v18, v26 -; CHECK-NEXT: add a7, a3, a2 +; CHECK-NEXT: add a7, a2, a1 ; CHECK-NEXT: vmv1r.v v19, v10 -; CHECK-NEXT: vsseg6e8.v v14, (a5) +; CHECK-NEXT: vsseg6e8.v v14, (a4) ; CHECK-NEXT: vl1r.v v8, (a0) -; CHECK-NEXT: add a0, a6, a2 +; CHECK-NEXT: add a0, a6, a1 ; CHECK-NEXT: vl1r.v v10, (a6) -; CHECK-NEXT: add a6, a7, a2 -; CHECK-NEXT: vl1r.v v12, (a5) -; CHECK-NEXT: add a5, a0, a2 +; CHECK-NEXT: add a6, a7, a1 +; CHECK-NEXT: vl1r.v v12, (a4) +; CHECK-NEXT: add a4, a0, a1 ; CHECK-NEXT: vl1r.v v14, (a7) -; CHECK-NEXT: add a7, a6, a2 -; CHECK-NEXT: vl1r.v v16, (a5) -; CHECK-NEXT: add a5, a5, a2 +; CHECK-NEXT: add a7, a6, a1 +; CHECK-NEXT: vl1r.v v16, (a4) +; CHECK-NEXT: add a4, a4, a1 ; CHECK-NEXT: vl1r.v v18, (a7) -; CHECK-NEXT: add a7, a7, a2 -; CHECK-NEXT: srli a2, a2, 1 -; CHECK-NEXT: vl1r.v v9, (a3) -; CHECK-NEXT: add a3, a1, a1 -; CHECK-NEXT: vl1r.v v17, (a5) -; CHECK-NEXT: add a5, a2, a2 +; CHECK-NEXT: add a7, a7, a1 +; CHECK-NEXT: srli a1, a1, 2 +; CHECK-NEXT: vl1r.v v9, (a2) +; CHECK-NEXT: vl1r.v v17, (a4) ; CHECK-NEXT: vl1r.v v11, (a0) -; CHECK-NEXT: vl1r.v v13, (a4) +; CHECK-NEXT: vl1r.v v13, (a5) ; CHECK-NEXT: vl1r.v v19, (a7) ; CHECK-NEXT: vl1r.v v15, (a6) ; CHECK-NEXT: vsetvli a0, zero, e8, m2, ta, ma @@ -2390,12 +2373,12 @@ define @vector_interleave_nxv96i1_nxv16i1( ; CHECK-NEXT: vmsne.vi v0, v12, 0 ; CHECK-NEXT: vmsne.vi v10, v18, 0 ; CHECK-NEXT: vmsne.vi v8, v14, 0 -; CHECK-NEXT: vsetvli zero, a3, e8, mf2, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e8, mf2, ta, ma ; CHECK-NEXT: vslideup.vx v9, v20, a1 ; CHECK-NEXT: vslideup.vx v0, v16, a1 -; CHECK-NEXT: vsetvli zero, a5, e8, m1, ta, ma -; CHECK-NEXT: vslideup.vx v0, v9, a2 -; CHECK-NEXT: vsetvli zero, a3, e8, mf2, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e8, m1, ta, ma +; CHECK-NEXT: vslideup.vx v0, v9, a3 +; CHECK-NEXT: vsetvli a0, zero, e8, mf2, ta, ma ; CHECK-NEXT: vslideup.vx v8, v10, a1 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: li a1, 12 @@ -2427,47 +2410,45 @@ define @vector_interleave_nxv96i1_nxv16i1( ; ZVBB-NEXT: vmv1r.v v17, v9 ; ZVBB-NEXT: vmv1r.v v0, v10 ; ZVBB-NEXT: vmerge.vim v24, v20, 1, v0 -; ZVBB-NEXT: addi a5, sp, 16 +; ZVBB-NEXT: addi a4, sp, 16 ; ZVBB-NEXT: vmv1r.v v18, v25 ; ZVBB-NEXT: vmv1r.v v0, v11 ; ZVBB-NEXT: vmerge.vim v26, v20, 1, v0 -; ZVBB-NEXT: csrr a2, vlenb +; ZVBB-NEXT: csrr a1, vlenb ; ZVBB-NEXT: vmv1r.v v19, v27 ; ZVBB-NEXT: vmv1r.v v0, v12 ; ZVBB-NEXT: vmerge.vim v10, v20, 1, v0 -; ZVBB-NEXT: add a3, a0, a2 +; ZVBB-NEXT: add a2, a0, a1 ; ZVBB-NEXT: vmv1r.v v20, v11 -; ZVBB-NEXT: vsetvli a1, zero, e8, m1, ta, ma +; ZVBB-NEXT: vsetvli a3, zero, e8, m1, ta, ma ; ZVBB-NEXT: vsseg6e8.v v15, (a0) ; ZVBB-NEXT: vmv1r.v v15, v22 -; ZVBB-NEXT: add a4, a5, a2 +; ZVBB-NEXT: add a5, a4, a1 ; ZVBB-NEXT: vmv1r.v v16, v8 -; ZVBB-NEXT: srli a1, a2, 2 +; ZVBB-NEXT: srli a3, a1, 1 ; ZVBB-NEXT: vmv1r.v v17, v24 -; ZVBB-NEXT: add a6, a4, a2 +; ZVBB-NEXT: add a6, a5, a1 ; ZVBB-NEXT: vmv1r.v v18, v26 -; ZVBB-NEXT: add a7, a3, a2 +; ZVBB-NEXT: add a7, a2, a1 ; ZVBB-NEXT: vmv1r.v v19, v10 -; ZVBB-NEXT: vsseg6e8.v v14, (a5) +; ZVBB-NEXT: vsseg6e8.v v14, (a4) ; ZVBB-NEXT: vl1r.v v8, (a0) -; ZVBB-NEXT: add a0, a6, a2 +; ZVBB-NEXT: add a0, a6, a1 ; ZVBB-NEXT: vl1r.v v10, (a6) -; ZVBB-NEXT: add a6, a7, a2 -; ZVBB-NEXT: vl1r.v v12, (a5) -; ZVBB-NEXT: add a5, a0, a2 +; ZVBB-NEXT: add a6, a7, a1 +; ZVBB-NEXT: vl1r.v v12, (a4) +; ZVBB-NEXT: add a4, a0, a1 ; ZVBB-NEXT: vl1r.v v14, (a7) -; ZVBB-NEXT: add a7, a6, a2 -; ZVBB-NEXT: vl1r.v v16, (a5) -; ZVBB-NEXT: add a5, a5, a2 +; ZVBB-NEXT: add a7, a6, a1 +; ZVBB-NEXT: vl1r.v v16, (a4) +; ZVBB-NEXT: add a4, a4, a1 ; ZVBB-NEXT: vl1r.v v18, (a7) -; ZVBB-NEXT: add a7, a7, a2 -; ZVBB-NEXT: srli a2, a2, 1 -; ZVBB-NEXT: vl1r.v v9, (a3) -; ZVBB-NEXT: add a3, a1, a1 -; ZVBB-NEXT: vl1r.v v17, (a5) -; ZVBB-NEXT: add a5, a2, a2 +; ZVBB-NEXT: add a7, a7, a1 +; ZVBB-NEXT: srli a1, a1, 2 +; ZVBB-NEXT: vl1r.v v9, (a2) +; ZVBB-NEXT: vl1r.v v17, (a4) ; ZVBB-NEXT: vl1r.v v11, (a0) -; ZVBB-NEXT: vl1r.v v13, (a4) +; ZVBB-NEXT: vl1r.v v13, (a5) ; ZVBB-NEXT: vl1r.v v19, (a7) ; ZVBB-NEXT: vl1r.v v15, (a6) ; ZVBB-NEXT: vsetvli a0, zero, e8, m2, ta, ma @@ -2477,12 +2458,12 @@ define @vector_interleave_nxv96i1_nxv16i1( ; ZVBB-NEXT: vmsne.vi v0, v12, 0 ; ZVBB-NEXT: vmsne.vi v10, v18, 0 ; ZVBB-NEXT: vmsne.vi v8, v14, 0 -; ZVBB-NEXT: vsetvli zero, a3, e8, mf2, ta, ma +; ZVBB-NEXT: vsetvli a0, zero, e8, mf2, ta, ma ; ZVBB-NEXT: vslideup.vx v9, v20, a1 ; ZVBB-NEXT: vslideup.vx v0, v16, a1 -; ZVBB-NEXT: vsetvli zero, a5, e8, m1, ta, ma -; ZVBB-NEXT: vslideup.vx v0, v9, a2 -; ZVBB-NEXT: vsetvli zero, a3, e8, mf2, ta, ma +; ZVBB-NEXT: vsetvli a0, zero, e8, m1, ta, ma +; ZVBB-NEXT: vslideup.vx v0, v9, a3 +; ZVBB-NEXT: vsetvli a0, zero, e8, mf2, ta, ma ; ZVBB-NEXT: vslideup.vx v8, v10, a1 ; ZVBB-NEXT: csrr a0, vlenb ; ZVBB-NEXT: li a1, 12 @@ -3676,23 +3657,21 @@ define @vector_interleave_nxv112i1_nxv16i1( @vector_interleave_nxv112i1_nxv16i1( @vector_interleave_nxv112i1_nxv16i1( @vector_interleave_nxv112i1_nxv16i1( @vector_interleave_nxv112i1_nxv16i1( @vector_interleave_nxv112i1_nxv16i1( @vector_interleave_nxv128i1_nxv16i1( @vector_interleave_nxv128i1_nxv16i1( @vector_interleave_nxv4bf16_nxv2bf16( @vector_interleave_nxv4bf16_nxv2bf16( @vector_interleave_nxv4bf16_nxv2bf16( @vector_interleave_nxv4f16_nxv2f16( @vector_interleave_nxv4f16_nxv2f16( @vector_interleave_nxv4f16_nxv2f16( @vector_interleave_nxv6f16_nxv2f16( @vector_interleave_nxv6f16_nxv2f16( @vector_interleave_nxv6bf16_nxv2bf16( @vector_interleave_nxv6bf16_nxv2bf16( @vector_interleave_nxv3f32_nxv1f32( @vector_interleave_nxv3f32_nxv1f32( @vector_interleave_nxv8f16_nxv2f16( @vector_interleave_nxv8f16_nxv2f16( @vector_interleave_nxv8bf16_nxv2bf16( @vector_interleave_nxv8bf16_nxv2bf16( @vector_interleave_nxv4f32_nxv1f32( @vector_interleave_nxv4f32_nxv1f32( @vector_interleave_nxv10f16_nxv2f16( @vector_interleave_nxv10f16_nxv2f16( @vector_interleave_nxv10bf16_nxv2bf16( @vector_interleave_nxv10bf16_nxv2bf16( @vector_interleave_nxv5f32_nxv1f32( @vector_interleave_nxv5f32_nxv1f32( @vector_interleave_nxv12f16_nxv2f16( @vector_interleave_nxv12f16_nxv2f16( @vector_interleave_nxv12bf16_nxv2bf16( @vector_interleave_nxv12bf16_nxv2bf16( @vector_interleave_nxv6f32_nxv1f32( @vector_interleave_nxv6f32_nxv1f32( @vector_interleave_nxv14f16_nxv2f16( @vector_interleave_nxv14f16_nxv2f16( @vector_interleave_nxv14f16_nxv2f16( @vector_interleave_nxv14f16_nxv2f16( @vector_interleave_nxv14bf16_nxv2bf16( @vector_interleave_nxv14bf16_nxv2bf16( @vector_interleave_nxv14bf16_nxv2bf16( @vector_interleave_nxv14bf16_nxv2bf16( @vector_interleave_nxv7f32_nxv1f32( @vector_interleave_nxv7f32_nxv1f32( @vector_interleave_nxv7f32_nxv1f32( @vector_interleave_nxv7f32_nxv1f32( @vector_interleave_nxv16f16_nxv2f16( @vector_interleave_nxv16f16_nxv2f16( @vector_interleave_nxv16bf16_nxv2bf16( @vector_interleave_nxv16bf16_nxv2bf16( @vector_interleave_nxv8f32_nxv1f32( @vector_interleave_nxv8f32_nxv1f32( @vfptosi_nxv32bf16_nxv32i1( %va) ; CHECK-NEXT: srli a0, a0, 2 ; CHECK-NEXT: vfncvt.rtz.x.f.w v8, v16 ; CHECK-NEXT: vfncvt.rtz.x.f.w v12, v24 -; CHECK-NEXT: add a1, a0, a0 ; CHECK-NEXT: vand.vi v8, v8, 1 ; CHECK-NEXT: vand.vi v12, v12, 1 ; CHECK-NEXT: vmsne.vi v16, v8, 0 ; CHECK-NEXT: vmsne.vi v0, v12, 0 -; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, ma ; CHECK-NEXT: vslideup.vx v0, v16, a0 ; CHECK-NEXT: ret %evec = fptosi %va to @@ -656,12 +655,11 @@ define @vfptoui_nxv32bf16_nxv32i1( %va) ; CHECK-NEXT: srli a0, a0, 2 ; CHECK-NEXT: vfncvt.rtz.xu.f.w v8, v16 ; CHECK-NEXT: vfncvt.rtz.xu.f.w v12, v24 -; CHECK-NEXT: add a1, a0, a0 ; CHECK-NEXT: vand.vi v8, v8, 1 ; CHECK-NEXT: vand.vi v12, v12, 1 ; CHECK-NEXT: vmsne.vi v16, v8, 0 ; CHECK-NEXT: vmsne.vi v0, v12, 0 -; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, ma ; CHECK-NEXT: vslideup.vx v0, v16, a0 ; CHECK-NEXT: ret %evec = fptoui %va to @@ -1654,12 +1652,11 @@ define @vfptosi_nxv32f16_nxv32i1( %va) { ; ZVFHMIN-NEXT: srli a0, a0, 2 ; ZVFHMIN-NEXT: vfncvt.rtz.x.f.w v8, v16 ; ZVFHMIN-NEXT: vfncvt.rtz.x.f.w v12, v24 -; ZVFHMIN-NEXT: add a1, a0, a0 ; ZVFHMIN-NEXT: vand.vi v8, v8, 1 ; ZVFHMIN-NEXT: vand.vi v12, v12, 1 ; ZVFHMIN-NEXT: vmsne.vi v16, v8, 0 ; ZVFHMIN-NEXT: vmsne.vi v0, v12, 0 -; ZVFHMIN-NEXT: vsetvli zero, a1, e8, mf2, ta, ma +; ZVFHMIN-NEXT: vsetvli a1, zero, e8, mf2, ta, ma ; ZVFHMIN-NEXT: vslideup.vx v0, v16, a0 ; ZVFHMIN-NEXT: ret %evec = fptosi %va to @@ -1684,12 +1681,11 @@ define @vfptoui_nxv32f16_nxv32i1( %va) { ; ZVFHMIN-NEXT: srli a0, a0, 2 ; ZVFHMIN-NEXT: vfncvt.rtz.xu.f.w v8, v16 ; ZVFHMIN-NEXT: vfncvt.rtz.xu.f.w v12, v24 -; ZVFHMIN-NEXT: add a1, a0, a0 ; ZVFHMIN-NEXT: vand.vi v8, v8, 1 ; ZVFHMIN-NEXT: vand.vi v12, v12, 1 ; ZVFHMIN-NEXT: vmsne.vi v16, v8, 0 ; ZVFHMIN-NEXT: vmsne.vi v0, v12, 0 -; ZVFHMIN-NEXT: vsetvli zero, a1, e8, mf2, ta, ma +; ZVFHMIN-NEXT: vsetvli a1, zero, e8, mf2, ta, ma ; ZVFHMIN-NEXT: vslideup.vx v0, v16, a0 ; ZVFHMIN-NEXT: ret %evec = fptoui %va to diff --git a/llvm/test/CodeGen/RISCV/rvv/vp-vector-interleaved-access.ll b/llvm/test/CodeGen/RISCV/rvv/vp-vector-interleaved-access.ll index 142ee5256f9e..186815405227 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vp-vector-interleaved-access.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vp-vector-interleaved-access.ll @@ -567,38 +567,37 @@ define i32 @masked_load_store_factor2_v2_shared_mask_extract( % ; RV32: # %bb.0: ; RV32-NEXT: vsetvli a2, zero, e8, mf4, ta, ma ; RV32-NEXT: vmv1r.v v8, v0 -; RV32-NEXT: slli a2, a1, 1 ; RV32-NEXT: vmv.v.i v9, 0 -; RV32-NEXT: li a1, -1 +; RV32-NEXT: li a2, -1 ; RV32-NEXT: vsetvli a3, zero, e8, mf2, ta, ma ; RV32-NEXT: vmv.v.i v10, 0 -; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: vsetvli a4, zero, e8, mf4, ta, ma +; RV32-NEXT: vsetvli a3, zero, e8, mf4, ta, ma ; RV32-NEXT: vmerge.vim v11, v9, 1, v0 -; RV32-NEXT: srli a3, a3, 2 ; RV32-NEXT: vwaddu.vv v12, v11, v11 -; RV32-NEXT: vwmaccu.vx v12, a1, v11 +; RV32-NEXT: vwmaccu.vx v12, a2, v11 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: srli a2, a2, 2 ; RV32-NEXT: vmsne.vi v0, v12, 0 -; RV32-NEXT: vsetvli a1, zero, e8, mf2, ta, ma -; RV32-NEXT: vslidedown.vx v11, v12, a3 +; RV32-NEXT: vsetvli a3, zero, e8, mf2, ta, ma +; RV32-NEXT: vslidedown.vx v11, v12, a2 ; RV32-NEXT: vmerge.vim v10, v10, 1, v0 -; RV32-NEXT: vsetvli a1, zero, e8, mf4, ta, ma +; RV32-NEXT: vsetvli a3, zero, e8, mf4, ta, ma ; RV32-NEXT: vmsne.vi v0, v11, 0 -; RV32-NEXT: add a1, a3, a3 +; RV32-NEXT: slli a3, a1, 1 ; RV32-NEXT: vmerge.vim v9, v9, 1, v0 -; RV32-NEXT: vsetvli zero, a1, e8, mf2, ta, ma -; RV32-NEXT: vslideup.vx v10, v9, a3 -; RV32-NEXT: vsetvli zero, a2, e8, mf2, ta, ma +; RV32-NEXT: vsetvli a1, zero, e8, mf2, ta, ma +; RV32-NEXT: vslideup.vx v10, v9, a2 +; RV32-NEXT: vsetvli zero, a3, e8, mf2, ta, ma ; RV32-NEXT: vmsne.vi v0, v10, 0 ; RV32-NEXT: vle32.v v10, (a0), v0.t ; RV32-NEXT: li a1, 32 -; RV32-NEXT: vsetvli a3, zero, e32, m1, ta, ma +; RV32-NEXT: vsetvli a2, zero, e32, m1, ta, ma ; RV32-NEXT: vnsrl.wx v13, v10, a1 ; RV32-NEXT: vmv.x.s a1, v10 ; RV32-NEXT: vnsrl.wi v12, v10, 0 -; RV32-NEXT: srli a2, a2, 1 +; RV32-NEXT: srli a3, a3, 1 ; RV32-NEXT: vmv1r.v v0, v8 -; RV32-NEXT: vsetvli zero, a2, e32, m1, ta, ma +; RV32-NEXT: vsetvli zero, a3, e32, m1, ta, ma ; RV32-NEXT: vsseg2e32.v v12, (a0), v0.t ; RV32-NEXT: mv a0, a1 ; RV32-NEXT: ret @@ -611,26 +610,24 @@ define i32 @masked_load_store_factor2_v2_shared_mask_extract( % ; RV64-NEXT: li a2, -1 ; RV64-NEXT: vsetvli a3, zero, e8, mf2, ta, ma ; RV64-NEXT: vmv.v.i v10, 0 -; RV64-NEXT: csrr a3, vlenb -; RV64-NEXT: slli a4, a1, 33 -; RV64-NEXT: vsetvli a1, zero, e8, mf4, ta, ma +; RV64-NEXT: vsetvli a3, zero, e8, mf4, ta, ma ; RV64-NEXT: vmerge.vim v11, v9, 1, v0 -; RV64-NEXT: srli a3, a3, 2 ; RV64-NEXT: vwaddu.vv v12, v11, v11 ; RV64-NEXT: vwmaccu.vx v12, a2, v11 +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: srli a2, a2, 2 ; RV64-NEXT: vmsne.vi v0, v12, 0 -; RV64-NEXT: vsetvli a1, zero, e8, mf2, ta, ma -; RV64-NEXT: vslidedown.vx v11, v12, a3 +; RV64-NEXT: vsetvli a3, zero, e8, mf2, ta, ma +; RV64-NEXT: vslidedown.vx v11, v12, a2 ; RV64-NEXT: vmerge.vim v10, v10, 1, v0 -; RV64-NEXT: vsetvli a1, zero, e8, mf4, ta, ma +; RV64-NEXT: vsetvli a3, zero, e8, mf4, ta, ma ; RV64-NEXT: vmsne.vi v0, v11, 0 -; RV64-NEXT: add a1, a3, a3 +; RV64-NEXT: slli a3, a1, 33 ; RV64-NEXT: vmerge.vim v9, v9, 1, v0 -; RV64-NEXT: vsetvli zero, a1, e8, mf2, ta, ma -; RV64-NEXT: vslideup.vx v10, v9, a3 ; RV64-NEXT: vsetvli a1, zero, e8, mf2, ta, ma +; RV64-NEXT: vslideup.vx v10, v9, a2 ; RV64-NEXT: vmsne.vi v0, v10, 0 -; RV64-NEXT: srli a1, a4, 32 +; RV64-NEXT: srli a1, a3, 32 ; RV64-NEXT: vsetvli zero, a1, e32, m2, ta, ma ; RV64-NEXT: vle32.v v10, (a0), v0.t ; RV64-NEXT: li a1, 32 @@ -638,9 +635,9 @@ define i32 @masked_load_store_factor2_v2_shared_mask_extract( % ; RV64-NEXT: vnsrl.wx v13, v10, a1 ; RV64-NEXT: vmv.x.s a1, v10 ; RV64-NEXT: vnsrl.wi v12, v10, 0 -; RV64-NEXT: srli a4, a4, 33 +; RV64-NEXT: srli a3, a3, 33 ; RV64-NEXT: vmv1r.v v0, v8 -; RV64-NEXT: vsetvli zero, a4, e32, m1, ta, ma +; RV64-NEXT: vsetvli zero, a3, e32, m1, ta, ma ; RV64-NEXT: vsseg2e32.v v12, (a0), v0.t ; RV64-NEXT: mv a0, a1 ; RV64-NEXT: ret @@ -807,10 +804,7 @@ define void @not_balanced_store_tree( %v0, ; RV32-NEXT: srli a3, a3, 3 ; RV32-NEXT: vsetvli a4, zero, e32, m1, ta, ma ; RV32-NEXT: vslidedown.vx v8, v12, a3 -; RV32-NEXT: add a4, a3, a3 -; RV32-NEXT: vsetvli zero, a4, e32, m1, ta, ma ; RV32-NEXT: vslideup.vx v12, v8, a3 -; RV32-NEXT: vsetvli a3, zero, e32, m1, ta, ma ; RV32-NEXT: vwaddu.vv v16, v12, v9 ; RV32-NEXT: vwmaccu.vx v16, a2, v9 ; RV32-NEXT: vsetvli a3, zero, e32, m2, ta, ma @@ -831,10 +825,7 @@ define void @not_balanced_store_tree( %v0, ; RV64-NEXT: srli a3, a3, 3 ; RV64-NEXT: vsetvli a4, zero, e32, m1, ta, ma ; RV64-NEXT: vslidedown.vx v8, v12, a3 -; RV64-NEXT: add a4, a3, a3 -; RV64-NEXT: vsetvli zero, a4, e32, m1, ta, ma ; RV64-NEXT: vslideup.vx v12, v8, a3 -; RV64-NEXT: vsetvli a3, zero, e32, m1, ta, ma ; RV64-NEXT: vwaddu.vv v16, v12, v9 ; RV64-NEXT: vwmaccu.vx v16, a2, v9 ; RV64-NEXT: vsetvli a3, zero, e32, m2, ta, ma @@ -858,29 +849,28 @@ define {, } @not_same_mask( ; RV32-NEXT: vsetvli a2, zero, e8, mf4, ta, ma ; RV32-NEXT: vmv1r.v v9, v0 ; RV32-NEXT: vmv1r.v v0, v8 -; RV32-NEXT: slli a1, a1, 1 ; RV32-NEXT: vmv.v.i v8, 0 ; RV32-NEXT: li a2, -1 ; RV32-NEXT: vsetvli a3, zero, e8, mf2, ta, ma ; RV32-NEXT: vmv.v.i v10, 0 -; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: vsetvli a4, zero, e8, mf4, ta, ma +; RV32-NEXT: vsetvli a3, zero, e8, mf4, ta, ma ; RV32-NEXT: vmerge.vim v11, v8, 1, v0 ; RV32-NEXT: vmv1r.v v0, v9 ; RV32-NEXT: vmerge.vim v9, v8, 1, v0 -; RV32-NEXT: srli a3, a3, 2 ; RV32-NEXT: vwaddu.vv v12, v9, v11 ; RV32-NEXT: vwmaccu.vx v12, a2, v11 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: srli a2, a2, 2 ; RV32-NEXT: vmsne.vi v0, v12, 0 -; RV32-NEXT: vsetvli a2, zero, e8, mf2, ta, ma -; RV32-NEXT: vslidedown.vx v9, v12, a3 +; RV32-NEXT: vsetvli a3, zero, e8, mf2, ta, ma +; RV32-NEXT: vslidedown.vx v9, v12, a2 ; RV32-NEXT: vmerge.vim v10, v10, 1, v0 -; RV32-NEXT: vsetvli a2, zero, e8, mf4, ta, ma +; RV32-NEXT: vsetvli a3, zero, e8, mf4, ta, ma ; RV32-NEXT: vmsne.vi v0, v9, 0 -; RV32-NEXT: add a2, a3, a3 +; RV32-NEXT: slli a1, a1, 1 ; RV32-NEXT: vmerge.vim v8, v8, 1, v0 -; RV32-NEXT: vsetvli zero, a2, e8, mf2, ta, ma -; RV32-NEXT: vslideup.vx v10, v8, a3 +; RV32-NEXT: vsetvli a3, zero, e8, mf2, ta, ma +; RV32-NEXT: vslideup.vx v10, v8, a2 ; RV32-NEXT: vsetvli zero, a1, e8, mf2, ta, ma ; RV32-NEXT: vmsne.vi v0, v10, 0 ; RV32-NEXT: vle32.v v10, (a0), v0.t @@ -899,26 +889,24 @@ define {, } @not_same_mask( ; RV64-NEXT: li a2, -1 ; RV64-NEXT: vsetvli a3, zero, e8, mf2, ta, ma ; RV64-NEXT: vmv.v.i v10, 0 -; RV64-NEXT: csrr a3, vlenb -; RV64-NEXT: slli a1, a1, 33 -; RV64-NEXT: vsetvli a4, zero, e8, mf4, ta, ma +; RV64-NEXT: vsetvli a3, zero, e8, mf4, ta, ma ; RV64-NEXT: vmerge.vim v11, v8, 1, v0 ; RV64-NEXT: vmv1r.v v0, v9 ; RV64-NEXT: vmerge.vim v9, v8, 1, v0 -; RV64-NEXT: srli a3, a3, 2 ; RV64-NEXT: vwaddu.vv v12, v9, v11 ; RV64-NEXT: vwmaccu.vx v12, a2, v11 +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: srli a2, a2, 2 ; RV64-NEXT: vmsne.vi v0, v12, 0 -; RV64-NEXT: vsetvli a2, zero, e8, mf2, ta, ma -; RV64-NEXT: vslidedown.vx v9, v12, a3 +; RV64-NEXT: vsetvli a3, zero, e8, mf2, ta, ma +; RV64-NEXT: vslidedown.vx v9, v12, a2 ; RV64-NEXT: vmerge.vim v10, v10, 1, v0 -; RV64-NEXT: vsetvli a2, zero, e8, mf4, ta, ma +; RV64-NEXT: vsetvli a3, zero, e8, mf4, ta, ma ; RV64-NEXT: vmsne.vi v0, v9, 0 -; RV64-NEXT: add a2, a3, a3 +; RV64-NEXT: slli a1, a1, 33 ; RV64-NEXT: vmerge.vim v8, v8, 1, v0 -; RV64-NEXT: vsetvli zero, a2, e8, mf2, ta, ma -; RV64-NEXT: vslideup.vx v10, v8, a3 -; RV64-NEXT: vsetvli a2, zero, e8, mf2, ta, ma +; RV64-NEXT: vsetvli a3, zero, e8, mf2, ta, ma +; RV64-NEXT: vslideup.vx v10, v8, a2 ; RV64-NEXT: vmsne.vi v0, v10, 0 ; RV64-NEXT: srli a1, a1, 32 ; RV64-NEXT: vsetvli zero, a1, e32, m2, ta, ma -- cgit v1.2.3 From 74687180dde07312521db09c6f6454fe9d1e5662 Mon Sep 17 00:00:00 2001 From: Kirill Chibisov Date: Wed, 18 Jun 2025 14:38:47 +0900 Subject: [mlir][emitc] Make CExpression trait into interface (#142771) By defining `CExpressionInterface`, we move the side effect detection logic from `emitc.expression` into the individual operations implementing the interface allowing operations to gradually tune the side effect. It also allows checking for side effects each operation individually. --- mlir/include/mlir/Dialect/EmitC/IR/CMakeLists.txt | 6 ++ mlir/include/mlir/Dialect/EmitC/IR/EmitC.h | 2 +- mlir/include/mlir/Dialect/EmitC/IR/EmitC.td | 107 ++++++++++++--------- .../mlir/Dialect/EmitC/IR/EmitCInterfaces.h | 31 ++++++ .../mlir/Dialect/EmitC/IR/EmitCInterfaces.td | 48 +++++++++ mlir/include/mlir/Dialect/EmitC/IR/EmitCTraits.h | 30 ------ mlir/lib/Dialect/EmitC/IR/EmitC.cpp | 6 +- .../Dialect/EmitC/Transforms/FormExpressions.cpp | 2 +- mlir/lib/Dialect/EmitC/Transforms/Transforms.cpp | 3 +- mlir/lib/Target/Cpp/TranslateToCpp.cpp | 6 +- 10 files changed, 159 insertions(+), 82 deletions(-) create mode 100644 mlir/include/mlir/Dialect/EmitC/IR/EmitCInterfaces.h create mode 100644 mlir/include/mlir/Dialect/EmitC/IR/EmitCInterfaces.td delete mode 100644 mlir/include/mlir/Dialect/EmitC/IR/EmitCTraits.h diff --git a/mlir/include/mlir/Dialect/EmitC/IR/CMakeLists.txt b/mlir/include/mlir/Dialect/EmitC/IR/CMakeLists.txt index 610170f5944e..299cee76cb1b 100644 --- a/mlir/include/mlir/Dialect/EmitC/IR/CMakeLists.txt +++ b/mlir/include/mlir/Dialect/EmitC/IR/CMakeLists.txt @@ -1,6 +1,12 @@ add_mlir_dialect(EmitC emitc) add_mlir_doc(EmitC EmitC Dialects/ -gen-dialect-doc -dialect emitc) +set(LLVM_TARGET_DEFINITIONS EmitCInterfaces.td) +mlir_tablegen(EmitCInterfaces.h.inc -gen-op-interface-decls) +mlir_tablegen(EmitCInterfaces.cpp.inc -gen-op-interface-defs) +add_public_tablegen_target(MLIREmitCInterfacesIncGen) +add_dependencies(mlir-generic-headers MLIREmitCInterfacesIncGen) + set(LLVM_TARGET_DEFINITIONS EmitCAttributes.td) mlir_tablegen(EmitCEnums.h.inc -gen-enum-decls) mlir_tablegen(EmitCEnums.cpp.inc -gen-enum-defs) diff --git a/mlir/include/mlir/Dialect/EmitC/IR/EmitC.h b/mlir/include/mlir/Dialect/EmitC/IR/EmitC.h index 57029c64ffd0..1984ed8a7f06 100644 --- a/mlir/include/mlir/Dialect/EmitC/IR/EmitC.h +++ b/mlir/include/mlir/Dialect/EmitC/IR/EmitC.h @@ -14,7 +14,7 @@ #define MLIR_DIALECT_EMITC_IR_EMITC_H #include "mlir/Bytecode/BytecodeOpInterface.h" -#include "mlir/Dialect/EmitC/IR/EmitCTraits.h" +#include "mlir/Dialect/EmitC/IR/EmitCInterfaces.h" #include "mlir/IR/Builders.h" #include "mlir/IR/BuiltinOps.h" #include "mlir/IR/BuiltinTypes.h" diff --git a/mlir/include/mlir/Dialect/EmitC/IR/EmitC.td b/mlir/include/mlir/Dialect/EmitC/IR/EmitC.td index e53d3e45875d..9ecdb74f4d82 100644 --- a/mlir/include/mlir/Dialect/EmitC/IR/EmitC.td +++ b/mlir/include/mlir/Dialect/EmitC/IR/EmitC.td @@ -14,6 +14,7 @@ #define MLIR_DIALECT_EMITC_IR_EMITC include "mlir/Dialect/EmitC/IR/EmitCAttributes.td" +include "mlir/Dialect/EmitC/IR/EmitCInterfaces.td" include "mlir/Dialect/EmitC/IR/EmitCTypes.td" include "mlir/Interfaces/CallInterfaces.td" @@ -35,22 +36,31 @@ class EmitC_Op traits = []> // Base class for unary operations. class EmitC_UnaryOp traits = []> : - EmitC_Op { + EmitC_Op { let arguments = (ins EmitCType); let results = (outs EmitCType); let assemblyFormat = "operands attr-dict `:` functional-type(operands, results)"; + + let extraClassDeclaration = [{ + bool hasSideEffects() { + return false; + } + }]; } // Base class for binary operations. class EmitC_BinaryOp traits = []> : - EmitC_Op { + EmitC_Op { let arguments = (ins EmitCType:$lhs, EmitCType:$rhs); let results = (outs EmitCType); let assemblyFormat = "operands attr-dict `:` functional-type(operands, results)"; -} -// EmitC OpTrait -def CExpression : NativeOpTrait<"emitc::CExpression">; + let extraClassDeclaration = [{ + bool hasSideEffects() { + return false; + } + }]; +} // Types only used in binary arithmetic operations. def IntegerIndexOrOpaqueType : Type, @@ -103,7 +113,7 @@ def EmitC_FileOp let skipDefaultBuilders = 1; } -def EmitC_AddOp : EmitC_BinaryOp<"add", [CExpression]> { +def EmitC_AddOp : EmitC_BinaryOp<"add", []> { let summary = "Addition operation"; let description = [{ With the `emitc.add` operation the arithmetic operator + (addition) can @@ -126,7 +136,7 @@ def EmitC_AddOp : EmitC_BinaryOp<"add", [CExpression]> { let hasVerifier = 1; } -def EmitC_ApplyOp : EmitC_Op<"apply", [CExpression]> { +def EmitC_ApplyOp : EmitC_Op<"apply", [CExpressionInterface]> { let summary = "Apply operation"; let description = [{ With the `emitc.apply` operation the operators & (address of) and * (contents of) @@ -152,10 +162,17 @@ def EmitC_ApplyOp : EmitC_Op<"apply", [CExpression]> { let assemblyFormat = [{ $applicableOperator `(` $operand `)` attr-dict `:` functional-type($operand, results) }]; + + let extraClassDeclaration = [{ + bool hasSideEffects() { + return getApplicableOperator() == "*"; + } + }]; + let hasVerifier = 1; } -def EmitC_BitwiseAndOp : EmitC_BinaryOp<"bitwise_and", [CExpression]> { +def EmitC_BitwiseAndOp : EmitC_BinaryOp<"bitwise_and", []> { let summary = "Bitwise and operation"; let description = [{ With the `emitc.bitwise_and` operation the bitwise operator & (and) can @@ -173,8 +190,7 @@ def EmitC_BitwiseAndOp : EmitC_BinaryOp<"bitwise_and", [CExpression]> { }]; } -def EmitC_BitwiseLeftShiftOp : EmitC_BinaryOp<"bitwise_left_shift", - [CExpression]> { +def EmitC_BitwiseLeftShiftOp : EmitC_BinaryOp<"bitwise_left_shift", []> { let summary = "Bitwise left shift operation"; let description = [{ With the `emitc.bitwise_left_shift` operation the bitwise operator << @@ -192,7 +208,7 @@ def EmitC_BitwiseLeftShiftOp : EmitC_BinaryOp<"bitwise_left_shift", }]; } -def EmitC_BitwiseNotOp : EmitC_UnaryOp<"bitwise_not", [CExpression]> { +def EmitC_BitwiseNotOp : EmitC_UnaryOp<"bitwise_not", []> { let summary = "Bitwise not operation"; let description = [{ With the `emitc.bitwise_not` operation the bitwise operator ~ (not) can @@ -210,7 +226,7 @@ def EmitC_BitwiseNotOp : EmitC_UnaryOp<"bitwise_not", [CExpression]> { }]; } -def EmitC_BitwiseOrOp : EmitC_BinaryOp<"bitwise_or", [CExpression]> { +def EmitC_BitwiseOrOp : EmitC_BinaryOp<"bitwise_or", []> { let summary = "Bitwise or operation"; let description = [{ With the `emitc.bitwise_or` operation the bitwise operator | (or) @@ -228,8 +244,7 @@ def EmitC_BitwiseOrOp : EmitC_BinaryOp<"bitwise_or", [CExpression]> { }]; } -def EmitC_BitwiseRightShiftOp : EmitC_BinaryOp<"bitwise_right_shift", - [CExpression]> { +def EmitC_BitwiseRightShiftOp : EmitC_BinaryOp<"bitwise_right_shift", []> { let summary = "Bitwise right shift operation"; let description = [{ With the `emitc.bitwise_right_shift` operation the bitwise operator >> @@ -247,7 +262,7 @@ def EmitC_BitwiseRightShiftOp : EmitC_BinaryOp<"bitwise_right_shift", }]; } -def EmitC_BitwiseXorOp : EmitC_BinaryOp<"bitwise_xor", [CExpression]> { +def EmitC_BitwiseXorOp : EmitC_BinaryOp<"bitwise_xor", []> { let summary = "Bitwise xor operation"; let description = [{ With the `emitc.bitwise_xor` operation the bitwise operator ^ (xor) @@ -265,7 +280,7 @@ def EmitC_BitwiseXorOp : EmitC_BinaryOp<"bitwise_xor", [CExpression]> { }]; } -def EmitC_CallOpaqueOp : EmitC_Op<"call_opaque", [CExpression]> { +def EmitC_CallOpaqueOp : EmitC_Op<"call_opaque", [CExpressionInterface]> { let summary = "Opaque call operation"; let description = [{ The `emitc.call_opaque` operation represents a C++ function call. The callee @@ -312,7 +327,7 @@ def EmitC_CallOpaqueOp : EmitC_Op<"call_opaque", [CExpression]> { } def EmitC_CastOp : EmitC_Op<"cast", - [CExpression, + [CExpressionInterface, DeclareOpInterfaceMethods]> { let summary = "Cast operation"; let description = [{ @@ -335,9 +350,15 @@ def EmitC_CastOp : EmitC_Op<"cast", let arguments = (ins EmitCType:$source); let results = (outs EmitCType:$dest); let assemblyFormat = "$source attr-dict `:` type($source) `to` type($dest)"; + + let extraClassDeclaration = [{ + bool hasSideEffects() { + return false; + } + }]; } -def EmitC_CmpOp : EmitC_BinaryOp<"cmp", [CExpression]> { +def EmitC_CmpOp : EmitC_BinaryOp<"cmp", []> { let summary = "Comparison operation"; let description = [{ With the `emitc.cmp` operation the comparison operators ==, !=, <, <=, >, >=, <=> @@ -407,7 +428,7 @@ def EmitC_ConstantOp : EmitC_Op<"constant", [ConstantLike]> { let hasVerifier = 1; } -def EmitC_DivOp : EmitC_BinaryOp<"div", [CExpression]> { +def EmitC_DivOp : EmitC_BinaryOp<"div", []> { let summary = "Division operation"; let description = [{ With the `emitc.div` operation the arithmetic operator / (division) can @@ -462,7 +483,7 @@ def EmitC_ExpressionOp : EmitC_Op<"expression", ``` The operations allowed within expression body are EmitC operations with the - CExpression trait. + CExpressionInterface interface. When specified, the optional `do_not_inline` indicates that the expression is to be emitted as seen above, i.e. as the rhs of an EmitC SSA value @@ -480,18 +501,8 @@ def EmitC_ExpressionOp : EmitC_Op<"expression", let extraClassDeclaration = [{ bool hasSideEffects() { auto predicate = [](Operation &op) { - assert(op.hasTrait() && "Expected a C expression"); - // Conservatively assume calls to read and write memory. - if (isa(op)) - return true; - // De-referencing reads modifiable memory, address-taking has no - // side-effect. - auto applyOp = dyn_cast(op); - if (applyOp) - return applyOp.getApplicableOperator() == "*"; - // Any load operation is assumed to read from memory and thus perform - // a side effect. - return isa(op); + assert(isa(op) && "Expected a C expression"); + return cast(op).hasSideEffects(); }; return llvm::any_of(getRegion().front().without_terminator(), predicate); }; @@ -579,7 +590,7 @@ def EmitC_ForOp : EmitC_Op<"for", } def EmitC_CallOp : EmitC_Op<"call", - [CallOpInterface, CExpression, + [CallOpInterface, CExpressionInterface, DeclareOpInterfaceMethods]> { let summary = "Call operation"; let description = [{ @@ -649,6 +660,10 @@ def EmitC_CallOp : EmitC_Op<"call", void setCalleeFromCallable(CallInterfaceCallable callee) { (*this)->setAttr("callee", cast(callee)); } + + bool hasSideEffects() { + return false; + } }]; let assemblyFormat = [{ @@ -861,7 +876,7 @@ def EmitC_LiteralOp : EmitC_Op<"literal", [Pure]> { let assemblyFormat = "$value attr-dict `:` type($result)"; } -def EmitC_LogicalAndOp : EmitC_BinaryOp<"logical_and", [CExpression]> { +def EmitC_LogicalAndOp : EmitC_BinaryOp<"logical_and", []> { let summary = "Logical and operation"; let description = [{ With the `emitc.logical_and` operation the logical operator && (and) can @@ -882,7 +897,7 @@ def EmitC_LogicalAndOp : EmitC_BinaryOp<"logical_and", [CExpression]> { let assemblyFormat = "operands attr-dict `:` type(operands)"; } -def EmitC_LogicalNotOp : EmitC_UnaryOp<"logical_not", [CExpression]> { +def EmitC_LogicalNotOp : EmitC_UnaryOp<"logical_not", []> { let summary = "Logical not operation"; let description = [{ With the `emitc.logical_not` operation the logical operator ! (negation) can @@ -903,7 +918,7 @@ def EmitC_LogicalNotOp : EmitC_UnaryOp<"logical_not", [CExpression]> { let assemblyFormat = "operands attr-dict `:` type(operands)"; } -def EmitC_LogicalOrOp : EmitC_BinaryOp<"logical_or", [CExpression]> { +def EmitC_LogicalOrOp : EmitC_BinaryOp<"logical_or", []> { let summary = "Logical or operation"; let description = [{ With the `emitc.logical_or` operation the logical operator || (inclusive or) @@ -924,7 +939,7 @@ def EmitC_LogicalOrOp : EmitC_BinaryOp<"logical_or", [CExpression]> { let assemblyFormat = "operands attr-dict `:` type(operands)"; } -def EmitC_LoadOp : EmitC_Op<"load", [CExpression, +def EmitC_LoadOp : EmitC_Op<"load", [CExpressionInterface, TypesMatchWith<"result type matches value type of 'operand'", "operand", "result", "::llvm::cast($_self).getValueType()"> @@ -953,7 +968,7 @@ def EmitC_LoadOp : EmitC_Op<"load", [CExpression, let assemblyFormat = "$operand attr-dict `:` type($operand)"; } -def EmitC_MulOp : EmitC_BinaryOp<"mul", [CExpression]> { +def EmitC_MulOp : EmitC_BinaryOp<"mul", []> { let summary = "Multiplication operation"; let description = [{ With the `emitc.mul` operation the arithmetic operator * (multiplication) can @@ -977,7 +992,7 @@ def EmitC_MulOp : EmitC_BinaryOp<"mul", [CExpression]> { let results = (outs FloatIntegerIndexOrOpaqueType); } -def EmitC_RemOp : EmitC_BinaryOp<"rem", [CExpression]> { +def EmitC_RemOp : EmitC_BinaryOp<"rem", []> { let summary = "Remainder operation"; let description = [{ With the `emitc.rem` operation the arithmetic operator % (remainder) can @@ -999,7 +1014,7 @@ def EmitC_RemOp : EmitC_BinaryOp<"rem", [CExpression]> { let results = (outs IntegerIndexOrOpaqueType); } -def EmitC_SubOp : EmitC_BinaryOp<"sub", [CExpression]> { +def EmitC_SubOp : EmitC_BinaryOp<"sub", []> { let summary = "Subtraction operation"; let description = [{ With the `emitc.sub` operation the arithmetic operator - (subtraction) can @@ -1069,7 +1084,7 @@ def EmitC_MemberOfPtrOp : EmitC_Op<"member_of_ptr"> { } def EmitC_ConditionalOp : EmitC_Op<"conditional", - [AllTypesMatch<["true_value", "false_value", "result"]>, CExpression]> { + [AllTypesMatch<["true_value", "false_value", "result"]>, CExpressionInterface]> { let summary = "Conditional (ternary) operation"; let description = [{ With the `emitc.conditional` operation the ternary conditional operator can @@ -1096,9 +1111,15 @@ def EmitC_ConditionalOp : EmitC_Op<"conditional", let arguments = (ins I1:$condition, EmitCType:$true_value, EmitCType:$false_value); let results = (outs EmitCType:$result); let assemblyFormat = "operands attr-dict `:` type($result)"; + + let extraClassDeclaration = [{ + bool hasSideEffects() { + return false; + } + }]; } -def EmitC_UnaryMinusOp : EmitC_UnaryOp<"unary_minus", [CExpression]> { +def EmitC_UnaryMinusOp : EmitC_UnaryOp<"unary_minus", []> { let summary = "Unary minus operation"; let description = [{ With the `emitc.unary_minus` operation the unary operator - (minus) can be @@ -1116,7 +1137,7 @@ def EmitC_UnaryMinusOp : EmitC_UnaryOp<"unary_minus", [CExpression]> { }]; } -def EmitC_UnaryPlusOp : EmitC_UnaryOp<"unary_plus", [CExpression]> { +def EmitC_UnaryPlusOp : EmitC_UnaryOp<"unary_plus", []> { let summary = "Unary plus operation"; let description = [{ With the `emitc.unary_plus` operation the unary operator + (plus) can be diff --git a/mlir/include/mlir/Dialect/EmitC/IR/EmitCInterfaces.h b/mlir/include/mlir/Dialect/EmitC/IR/EmitCInterfaces.h new file mode 100644 index 000000000000..51efe76aceb5 --- /dev/null +++ b/mlir/include/mlir/Dialect/EmitC/IR/EmitCInterfaces.h @@ -0,0 +1,31 @@ +//===- EmitCInterfaces.h - EmitC interfaces definitions ---------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file declares C++ classes for some of the interfaces used in the EmitC +// dialect. +// +//===----------------------------------------------------------------------===// + +#ifndef MLIR_DIALECT_EMITC_IR_EMITCINTERFACES_H +#define MLIR_DIALECT_EMITC_IR_EMITCINTERFACES_H + +#include "mlir/IR/OpDefinition.h" + +namespace mlir { +namespace emitc { +// +} // namespace emitc +} // namespace mlir + +//===----------------------------------------------------------------------===// +// EmitC Dialect Interfaces +//===----------------------------------------------------------------------===// + +#include "mlir/Dialect/EmitC/IR/EmitCInterfaces.h.inc" + +#endif // MLIR_DIALECT_EMITC_IR_EMITCINTERFACES_H diff --git a/mlir/include/mlir/Dialect/EmitC/IR/EmitCInterfaces.td b/mlir/include/mlir/Dialect/EmitC/IR/EmitCInterfaces.td new file mode 100644 index 000000000000..777784e56202 --- /dev/null +++ b/mlir/include/mlir/Dialect/EmitC/IR/EmitCInterfaces.td @@ -0,0 +1,48 @@ +//===- EmitCInterfaces.td - EmitC Interfaces ---------------*- tablegen -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file declares the interfaces used by EmitC. +// +//===----------------------------------------------------------------------===// + +#ifndef MLIR_DIALECT_EMITC_IR_EMITCINTERFACES +#define MLIR_DIALECT_EMITC_IR_EMITCINTERFACES + +include "mlir/IR/OpBase.td" + +def CExpressionInterface : OpInterface<"CExpressionInterface"> { + let description = [{ + Interface to mark operations that can be part of the CExpression. + }]; + + let cppNamespace = "::mlir::emitc"; + let methods = [ + InterfaceMethod<[{ + Check whether operation has side effects that may affect the expression + evaluation. + + By default operation is marked as having side effects. + + ```c++ + class ConcreteOp ... { + public: + bool hasSideEffects() { + // That way we can override the default implementation. + return false; + } + }; + ``` + }], + "bool", "hasSideEffects", (ins), /*methodBody=*/[{}], + /*defaultImplementation=*/[{ + return true; + }]>, + ]; +} + +#endif // MLIR_DIALECT_EMITC_IR_EMITCINTERFACES diff --git a/mlir/include/mlir/Dialect/EmitC/IR/EmitCTraits.h b/mlir/include/mlir/Dialect/EmitC/IR/EmitCTraits.h deleted file mode 100644 index c1602dfce4b4..000000000000 --- a/mlir/include/mlir/Dialect/EmitC/IR/EmitCTraits.h +++ /dev/null @@ -1,30 +0,0 @@ -//===- EmitCTraits.h - EmitC trait definitions ------------------*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This file declares C++ classes for some of the traits used in the EmitC -// dialect. -// -//===----------------------------------------------------------------------===// - -#ifndef MLIR_DIALECT_EMITC_IR_EMITCTRAITS_H -#define MLIR_DIALECT_EMITC_IR_EMITCTRAITS_H - -#include "mlir/IR/OpDefinition.h" - -namespace mlir { -namespace OpTrait { -namespace emitc { - -template -class CExpression : public TraitBase {}; - -} // namespace emitc -} // namespace OpTrait -} // namespace mlir - -#endif // MLIR_DIALECT_EMITC_IR_EMITCTRAITS_H diff --git a/mlir/lib/Dialect/EmitC/IR/EmitC.cpp b/mlir/lib/Dialect/EmitC/IR/EmitC.cpp index f82b20712b8c..e602210c2dc6 100644 --- a/mlir/lib/Dialect/EmitC/IR/EmitC.cpp +++ b/mlir/lib/Dialect/EmitC/IR/EmitC.cpp @@ -7,7 +7,7 @@ //===----------------------------------------------------------------------===// #include "mlir/Dialect/EmitC/IR/EmitC.h" -#include "mlir/Dialect/EmitC/IR/EmitCTraits.h" +#include "mlir/Dialect/EmitC/IR/EmitCInterfaces.h" #include "mlir/IR/Builders.h" #include "mlir/IR/BuiltinAttributes.h" #include "mlir/IR/BuiltinTypes.h" @@ -418,7 +418,7 @@ LogicalResult ExpressionOp::verify() { return emitOpError("requires yielded type to match return type"); for (Operation &op : region.front().without_terminator()) { - if (!op.hasTrait()) + if (!isa(op)) return emitOpError("contains an unsupported operation"); if (op.getNumResults() != 1) return emitOpError("requires exactly one result for each operation"); @@ -1404,5 +1404,7 @@ void FileOp::build(OpBuilder &builder, OperationState &state, StringRef id) { // TableGen'd op method definitions //===----------------------------------------------------------------------===// +#include "mlir/Dialect/EmitC/IR/EmitCInterfaces.cpp.inc" + #define GET_OP_CLASSES #include "mlir/Dialect/EmitC/IR/EmitC.cpp.inc" diff --git a/mlir/lib/Dialect/EmitC/Transforms/FormExpressions.cpp b/mlir/lib/Dialect/EmitC/Transforms/FormExpressions.cpp index 224d68ab8b4a..2f3e2618f4d7 100644 --- a/mlir/lib/Dialect/EmitC/Transforms/FormExpressions.cpp +++ b/mlir/lib/Dialect/EmitC/Transforms/FormExpressions.cpp @@ -36,7 +36,7 @@ struct FormExpressionsPass // Wrap each C operator op with an expression op. OpBuilder builder(context); auto matchFun = [&](Operation *op) { - if (op->hasTrait() && + if (isa(*op) && !op->getParentOfType() && op->getNumResults() == 1) createExpression(op, builder); diff --git a/mlir/lib/Dialect/EmitC/Transforms/Transforms.cpp b/mlir/lib/Dialect/EmitC/Transforms/Transforms.cpp index 87350ecdceaa..a578a86b499a 100644 --- a/mlir/lib/Dialect/EmitC/Transforms/Transforms.cpp +++ b/mlir/lib/Dialect/EmitC/Transforms/Transforms.cpp @@ -16,8 +16,7 @@ namespace mlir { namespace emitc { ExpressionOp createExpression(Operation *op, OpBuilder &builder) { - assert(op->hasTrait() && - "Expected a C expression"); + assert(isa(op) && "Expected a C expression"); // Create an expression yielding the value returned by op. assert(op->getNumResults() == 1 && "Expected exactly one result"); diff --git a/mlir/lib/Target/Cpp/TranslateToCpp.cpp b/mlir/lib/Target/Cpp/TranslateToCpp.cpp index 5abc112ab8c7..067a0470b14e 100644 --- a/mlir/lib/Target/Cpp/TranslateToCpp.cpp +++ b/mlir/lib/Target/Cpp/TranslateToCpp.cpp @@ -329,9 +329,9 @@ static bool shouldBeInlined(ExpressionOp expressionOp) { if (hasDeferredEmission(user)) return false; - // Do not inline expressions used by ops with the CExpression trait. If this - // was intended, the user could have been merged into the expression op. - return !user->hasTrait(); + // Do not inline expressions used by ops with the CExpressionInterface. If + // this was intended, the user could have been merged into the expression op. + return !isa(*user); } static LogicalResult printConstantOp(CppEmitter &emitter, Operation *operation, -- cgit v1.2.3 From 10f29a607205c0c17ee9249a66feb63f0fdae182 Mon Sep 17 00:00:00 2001 From: Kunqiu Chen Date: Wed, 18 Jun 2025 14:53:33 +0800 Subject: [MSan] Fix wrong unpoison size in SignalAction (#144071) MSan should unpoison the parameters of extended signal handlers. However, MSan unpoisoned the second parameter with the wrong size `sizeof(__sanitizer_sigaction)`, inconsistent with its real type `siginfo_t`. This commit fixes this issue by correcting the size to `sizeof(__sanitizer_siginfo)`. --- compiler-rt/lib/msan/msan_interceptors.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/compiler-rt/lib/msan/msan_interceptors.cpp b/compiler-rt/lib/msan/msan_interceptors.cpp index 76255cdb742a..f94d3cb79aa0 100644 --- a/compiler-rt/lib/msan/msan_interceptors.cpp +++ b/compiler-rt/lib/msan/msan_interceptors.cpp @@ -1127,7 +1127,7 @@ static void SignalAction(int signo, void *si, void *uc) { SignalHandlerScope signal_handler_scope; ScopedThreadLocalStateBackup stlsb; UnpoisonParam(3); - __msan_unpoison(si, sizeof(__sanitizer_sigaction)); + __msan_unpoison(si, sizeof(__sanitizer_siginfo)); __msan_unpoison(uc, ucontext_t_sz(uc)); typedef void (*sigaction_cb)(int, void *, void *); -- cgit v1.2.3 From 4d71f20b287e398f10bbff55d52bec9683ef89d2 Mon Sep 17 00:00:00 2001 From: Robert Imschweiler Date: Wed, 18 Jun 2025 09:07:08 +0200 Subject: [GlobalISel] prevent G_UNMERGE_VALUES for vectors with different elements (#133335) This commit prevents building a G_UNMERGE_VALUES instruction with different source and destination vector elements in `LegalizationArtifactCombiner::ArtifactValueFinder::tryCombineMergeLike()`, e.g.: `%1:_(<2 x s8>), %2:_(<2 x s8>) = G_UNMERGE_VALUES %0:_(<2 x s16>)` This LLVM defect was identified via the AMD Fuzzing project. --- .../GlobalISel/LegalizationArtifactCombiner.h | 5 +- .../CodeGen/AMDGPU/GlobalISel/insertelement.ll | 55 ++++++++++++++++++++++ 2 files changed, 59 insertions(+), 1 deletion(-) diff --git a/llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h b/llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h index 22f6a5fde546..8f560c42082f 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h @@ -997,6 +997,7 @@ public: // Recognize UnmergeSrc that can be unmerged to DstTy directly. // Types have to be either both vector or both non-vector types. + // In case of vector types, the scalar elements need to match. // Merge-like opcodes are combined one at the time. First one creates new // unmerge, following should use the same unmerge (builder performs CSE). // @@ -1005,7 +1006,9 @@ public: // %AnotherDst:_(DstTy) = G_merge_like_opcode %2:_(EltTy), %3 // // %Dst:_(DstTy), %AnotherDst = G_UNMERGE_VALUES %UnmergeSrc - if ((DstTy.isVector() == UnmergeSrcTy.isVector()) && + if (((!DstTy.isVector() && !UnmergeSrcTy.isVector()) || + (DstTy.isVector() && UnmergeSrcTy.isVector() && + DstTy.getScalarType() == UnmergeSrcTy.getScalarType())) && (Elt0UnmergeIdx % NumMIElts == 0) && getCoverTy(UnmergeSrcTy, DstTy) == UnmergeSrcTy) { if (!isSequenceFromUnmerge(MI, 0, Unmerge, Elt0UnmergeIdx, NumMIElts, diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement.ll index 8134eb3ca2af..132a89478c5f 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement.ll @@ -6506,3 +6506,58 @@ entry: %insert = insertelement <5 x double> %vec, double %val, i32 %idx ret <5 x double> %insert } + +; Found by fuzzer, reduced with llvm-reduce. +define amdgpu_kernel void @insert_very_small_from_very_large(<32 x i16> %L3, ptr %ptr) { +; GPRIDX-LABEL: insert_very_small_from_very_large: +; GPRIDX: ; %bb.0: ; %bb +; GPRIDX-NEXT: s_load_dwordx16 s[12:27], s[8:9], 0x0 +; GPRIDX-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x40 +; GPRIDX-NEXT: s_waitcnt lgkmcnt(0) +; GPRIDX-NEXT: s_lshr_b32 s2, s12, 1 +; GPRIDX-NEXT: s_and_b32 s2, s2, 1 +; GPRIDX-NEXT: s_lshl_b32 s2, s2, 1 +; GPRIDX-NEXT: v_mov_b32_e32 v0, s0 +; GPRIDX-NEXT: v_mov_b32_e32 v2, s2 +; GPRIDX-NEXT: v_mov_b32_e32 v1, s1 +; GPRIDX-NEXT: flat_store_byte v[0:1], v2 +; GPRIDX-NEXT: s_endpgm +; +; GFX10-LABEL: insert_very_small_from_very_large: +; GFX10: ; %bb.0: ; %bb +; GFX10-NEXT: s_clause 0x1 +; GFX10-NEXT: s_load_dwordx16 s[12:27], s[8:9], 0x0 +; GFX10-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x40 +; GFX10-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-NEXT: s_lshr_b32 s2, s12, 1 +; GFX10-NEXT: v_mov_b32_e32 v0, s0 +; GFX10-NEXT: s_and_b32 s2, s2, 1 +; GFX10-NEXT: v_mov_b32_e32 v1, s1 +; GFX10-NEXT: s_lshl_b32 s2, s2, 1 +; GFX10-NEXT: v_mov_b32_e32 v2, s2 +; GFX10-NEXT: flat_store_byte v[0:1], v2 +; GFX10-NEXT: s_endpgm +; +; GFX11-LABEL: insert_very_small_from_very_large: +; GFX11: ; %bb.0: ; %bb +; GFX11-NEXT: s_clause 0x1 +; GFX11-NEXT: s_load_b512 s[8:23], s[4:5], 0x0 +; GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x40 +; GFX11-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-NEXT: s_lshr_b32 s2, s8, 1 +; GFX11-NEXT: v_mov_b32_e32 v0, s0 +; GFX11-NEXT: s_and_b32 s2, s2, 1 +; GFX11-NEXT: v_mov_b32_e32 v1, s1 +; GFX11-NEXT: s_lshl_b32 s2, s2, 1 +; GFX11-NEXT: v_mov_b32_e32 v2, s2 +; GFX11-NEXT: flat_store_b8 v[0:1], v2 +; GFX11-NEXT: s_endpgm +bb: + %a = bitcast <32 x i16> %L3 to i512 + %b = trunc i512 %a to i8 + %c = trunc i8 %b to i2 + %d = bitcast i2 %c to <2 x i1> + %insert = insertelement <2 x i1> %d, i1 false, i32 0 + store <2 x i1> %insert, ptr %ptr, align 1 + ret void +} -- cgit v1.2.3 From 896e187a6e923b8441428f9db63c412d989fc51d Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Tue, 17 Jun 2025 17:00:21 +0100 Subject: [X86] combineAndMaskToShift - pull out repeated SDLoc(). NFC. --- llvm/lib/Target/X86/X86ISelLowering.cpp | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 7f425b3d479d..335481b97d47 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -51225,7 +51225,8 @@ static SDValue combineBitOpWithPACK(unsigned Opc, const SDLoc &DL, EVT VT, /// If this is a zero/all-bits result that is bitwise-anded with a low bits /// mask. (Mask == 1 for the x86 lowering of a SETCC + ZEXT), replace the 'and' /// with a shift-right to eliminate loading the vector constant mask value. -static SDValue combineAndMaskToShift(SDNode *N, SelectionDAG &DAG, +static SDValue combineAndMaskToShift(SDNode *N, const SDLoc &DL, + SelectionDAG &DAG, const X86Subtarget &Subtarget) { SDValue Op0 = peekThroughBitcasts(N->getOperand(0)); SDValue Op1 = peekThroughBitcasts(N->getOperand(1)); @@ -51255,7 +51256,6 @@ static SDValue combineAndMaskToShift(SDNode *N, SelectionDAG &DAG, Y = Op1; } if (X && Y) { - SDLoc DL(N); SDValue Sra = getTargetVShiftByConstNode(X86ISD::VSRAI, DL, VT.getSimpleVT(), X, VT.getScalarSizeInBits() - 1, DAG); @@ -51278,7 +51278,6 @@ static SDValue combineAndMaskToShift(SDNode *N, SelectionDAG &DAG, if (EltBitWidth != DAG.ComputeNumSignBits(Op0)) return SDValue(); - SDLoc DL(N); unsigned ShiftVal = SplatVal.countr_one(); SDValue ShAmt = DAG.getTargetConstant(EltBitWidth - ShiftVal, DL, MVT::i8); SDValue Shift = DAG.getNode(X86ISD::VSRLI, DL, VT, Op0, ShAmt); @@ -51845,7 +51844,7 @@ static SDValue combineAnd(SDNode *N, SelectionDAG &DAG, if (SDValue R = combineAndNotIntoANDNP(N, DAG)) return R; - if (SDValue ShiftRight = combineAndMaskToShift(N, DAG, Subtarget)) + if (SDValue ShiftRight = combineAndMaskToShift(N, dl, DAG, Subtarget)) return ShiftRight; if (SDValue R = combineAndLoadToBZHI(N, DAG, Subtarget)) -- cgit v1.2.3 From dac94f28e696e8234ec69bbed549533ea6b00227 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Tue, 17 Jun 2025 17:06:02 +0100 Subject: [X86] combineAndNotOrIntoAndNotAnd - pull out repeated SDLoc(). NFC. --- llvm/lib/Target/X86/X86ISelLowering.cpp | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 335481b97d47..62912f1c8d12 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -51308,13 +51308,11 @@ static bool hasBZHI(const X86Subtarget &Subtarget, MVT VT) { /// Folds (and X, (or Y, ~Z)) --> (and X, ~(and ~Y, Z)) /// This undoes the inverse fold performed in InstCombine -static SDValue combineAndNotOrIntoAndNotAnd(SDNode *N, SelectionDAG &DAG) { - +static SDValue combineAndNotOrIntoAndNotAnd(SDNode *N, const SDLoc &DL, + SelectionDAG &DAG) { using namespace llvm::SDPatternMatch; MVT VT = N->getSimpleValueType(0); - SDLoc DL(N); - const TargetLowering &TLI = DAG.getTargetLoweringInfo(); - if (!TLI.hasAndNot(SDValue(N, 0))) + if (!DAG.getTargetLoweringInfo().hasAndNot(SDValue(N, 0))) return SDValue(); SDValue X, Y, Z; @@ -51850,7 +51848,7 @@ static SDValue combineAnd(SDNode *N, SelectionDAG &DAG, if (SDValue R = combineAndLoadToBZHI(N, DAG, Subtarget)) return R; - if (SDValue R = combineAndNotOrIntoAndNotAnd(N, DAG)) + if (SDValue R = combineAndNotOrIntoAndNotAnd(N, dl, DAG)) return R; // fold (and (mul x, c1), c2) -> (mul x, (and c1, c2)) -- cgit v1.2.3 From 0875bee2b10185eca40aea3b3f49eb8462522eda Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Tue, 17 Jun 2025 17:14:13 +0100 Subject: [X86] combineAndNotIntoANDNP - pull out repeated SDLoc(). NFC. --- llvm/lib/Target/X86/X86ISelLowering.cpp | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 62912f1c8d12..1ca5fc5376f0 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -50839,7 +50839,8 @@ static SDValue combineCompareEqual(SDNode *N, SelectionDAG &DAG, } /// Try to fold: (and (xor X, -1), Y) -> (andnp X, Y). -static SDValue combineAndNotIntoANDNP(SDNode *N, SelectionDAG &DAG) { +static SDValue combineAndNotIntoANDNP(SDNode *N, const SDLoc &DL, + SelectionDAG &DAG) { assert(N->getOpcode() == ISD::AND && "Unexpected opcode combine into ANDNP"); MVT VT = N->getSimpleValueType(0); @@ -50861,7 +50862,7 @@ static SDValue combineAndNotIntoANDNP(SDNode *N, SelectionDAG &DAG) { X = DAG.getBitcast(VT, X); Y = DAG.getBitcast(VT, Y); - return DAG.getNode(X86ISD::ANDNP, SDLoc(N), VT, X, Y); + return DAG.getNode(X86ISD::ANDNP, DL, VT, X, Y); } /// Try to fold: @@ -51839,7 +51840,7 @@ static SDValue combineAnd(SDNode *N, SelectionDAG &DAG, if (SDValue R = combineCompareEqual(N, DAG, DCI, Subtarget)) return R; - if (SDValue R = combineAndNotIntoANDNP(N, DAG)) + if (SDValue R = combineAndNotIntoANDNP(N, dl ,DAG)) return R; if (SDValue ShiftRight = combineAndMaskToShift(N, dl, DAG, Subtarget)) -- cgit v1.2.3 From 44b715293fcad79ef4a54474627ac574a759fa5a Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Tue, 17 Jun 2025 18:38:40 +0100 Subject: [PhaseOrdering][X86] Copy FMUL+ADDSUB/FMADDSUB build vector patterns from codegen tests As detailed on #144489 - confirm the vectorisation of scalar FMUL+ADDSUB/FMADDSUB on various targets --- llvm/test/Transforms/PhaseOrdering/X86/fmaddsub.ll | 720 +++++++++++++++++++++ 1 file changed, 720 insertions(+) create mode 100644 llvm/test/Transforms/PhaseOrdering/X86/fmaddsub.ll diff --git a/llvm/test/Transforms/PhaseOrdering/X86/fmaddsub.ll b/llvm/test/Transforms/PhaseOrdering/X86/fmaddsub.ll new file mode 100644 index 000000000000..ad4452431a48 --- /dev/null +++ b/llvm/test/Transforms/PhaseOrdering/X86/fmaddsub.ll @@ -0,0 +1,720 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -O3 -S -mtriple=x86_64-- -mcpu=x86-64 | FileCheck %s --check-prefixes=CHECK,SSE,SSE2 +; RUN: opt < %s -O3 -S -mtriple=x86_64-- -mcpu=x86-64-v2 | FileCheck %s --check-prefixes=CHECK,SSE,SSE4 +; RUN: opt < %s -O3 -S -mtriple=x86_64-- -mcpu=bdver2 | FileCheck %s --check-prefixes=CHECK,AVX,AVX_FMA4 +; RUN: opt < %s -O3 -S -mtriple=x86_64-- -mcpu=x86-64-v3 | FileCheck %s --check-prefixes=CHECK,AVX,AVX_FMA3 +; RUN: opt < %s -O3 -S -mtriple=x86_64-- -mcpu=x86-64-v4 | FileCheck %s --check-prefixes=CHECK,AVX,AVX512 +; RUN: opt < %s -passes="default" -S -mtriple=x86_64-- -mcpu=x86-64 | FileCheck %s --check-prefixes=CHECK,SSE,SSE2 +; RUN: opt < %s -passes="default" -S -mtriple=x86_64-- -mcpu=x86-64-v2 | FileCheck %s --check-prefixes=CHECK,SSE,SSE4 +; RUN: opt < %s -passes="default" -S -mtriple=x86_64-- -mcpu=bdver2 | FileCheck %s --check-prefixes=CHECK,AVX,AVX_FMA4 +; RUN: opt < %s -passes="default" -S -mtriple=x86_64-- -mcpu=x86-64-v3 | FileCheck %s --check-prefixes=CHECK,AVX,AVX_FMA3 +; RUN: opt < %s -passes="default" -S -mtriple=x86_64-- -mcpu=x86-64-v4 | FileCheck %s --check-prefixes=CHECK,AVX,AVX512 + +; This test checks the vectorisation of FMUL+ADDSUB/FMADDSUB patterns, including cases with undef elements. + +; Ideally, this should reach the backend with 1 fmul, 1 fsub, 1 fadd, and 1 shuffle. +; That may require some coordination between VectorCombine, SLP, and other passes. + +define <4 x float> @buildvector_mul_addsub_ps128(<4 x float> %C, <4 x float> %D, <4 x float> %B) #0 { +; CHECK-LABEL: @buildvector_mul_addsub_ps128( +; CHECK-NEXT: [[A:%.*]] = fmul <4 x float> [[C:%.*]], [[D:%.*]] +; CHECK-NEXT: [[TMP0:%.*]] = fsub <4 x float> [[A]], [[B:%.*]] +; CHECK-NEXT: [[TMP1:%.*]] = fadd <4 x float> [[A]], [[B]] +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> [[TMP1]], <4 x i32> +; CHECK-NEXT: ret <4 x float> [[TMP2]] +; + %A = fmul <4 x float> %C, %D + %A0 = extractelement <4 x float> %A, i32 0 + %B0 = extractelement <4 x float> %B, i32 0 + %sub0 = fsub float %A0, %B0 + %A2 = extractelement <4 x float> %A, i32 2 + %B2 = extractelement <4 x float> %B, i32 2 + %sub2 = fsub float %A2, %B2 + %A1 = extractelement <4 x float> %A, i32 1 + %B1 = extractelement <4 x float> %B, i32 1 + %add1 = fadd float %A1, %B1 + %A3 = extractelement <4 x float> %A, i32 3 + %B3 = extractelement <4 x float> %B, i32 3 + %add3 = fadd float %A3, %B3 + %vecinsert1 = insertelement <4 x float> undef, float %sub0, i32 0 + %vecinsert2 = insertelement <4 x float> %vecinsert1, float %add1, i32 1 + %vecinsert3 = insertelement <4 x float> %vecinsert2, float %sub2, i32 2 + %vecinsert4 = insertelement <4 x float> %vecinsert3, float %add3, i32 3 + ret <4 x float> %vecinsert4 +} + +define <2 x double> @buildvector_mul_addsub_pd128(<2 x double> %C, <2 x double> %D, <2 x double> %B) #0 { +; CHECK-LABEL: @buildvector_mul_addsub_pd128( +; CHECK-NEXT: [[A:%.*]] = fmul <2 x double> [[C:%.*]], [[D:%.*]] +; CHECK-NEXT: [[TMP0:%.*]] = fsub <2 x double> [[A]], [[B:%.*]] +; CHECK-NEXT: [[TMP1:%.*]] = fadd <2 x double> [[A]], [[B]] +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x double> [[TMP0]], <2 x double> [[TMP1]], <2 x i32> +; CHECK-NEXT: ret <2 x double> [[TMP2]] +; + %A = fmul <2 x double> %C, %D + %A0 = extractelement <2 x double> %A, i32 0 + %B0 = extractelement <2 x double> %B, i32 0 + %sub0 = fsub double %A0, %B0 + %A1 = extractelement <2 x double> %A, i32 1 + %B1 = extractelement <2 x double> %B, i32 1 + %add1 = fadd double %A1, %B1 + %vecinsert1 = insertelement <2 x double> undef, double %sub0, i32 0 + %vecinsert2 = insertelement <2 x double> %vecinsert1, double %add1, i32 1 + ret <2 x double> %vecinsert2 +} + +define <8 x float> @buildvector_mul_addsub_ps256(<8 x float> %C, <8 x float> %D, <8 x float> %B) #0 { +; SSE2-LABEL: @buildvector_mul_addsub_ps256( +; SSE2-NEXT: [[A:%.*]] = fmul <8 x float> [[C:%.*]], [[D:%.*]] +; SSE2-NEXT: [[TMP0:%.*]] = fsub <8 x float> [[A]], [[B:%.*]] +; SSE2-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> [[TMP0]], <8 x float> poison, <4 x i32> +; SSE2-NEXT: [[TMP2:%.*]] = fadd <8 x float> [[A]], [[B]] +; SSE2-NEXT: [[TMP3:%.*]] = shufflevector <8 x float> [[TMP2]], <8 x float> poison, <4 x i32> +; SSE2-NEXT: [[TMP4:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> [[TMP3]], <8 x i32> +; SSE2-NEXT: ret <8 x float> [[TMP4]] +; +; SSE4-LABEL: @buildvector_mul_addsub_ps256( +; SSE4-NEXT: [[A:%.*]] = fmul <8 x float> [[C:%.*]], [[D:%.*]] +; SSE4-NEXT: [[TMP0:%.*]] = fsub <8 x float> [[A]], [[B:%.*]] +; SSE4-NEXT: [[TMP1:%.*]] = fadd <8 x float> [[A]], [[B]] +; SSE4-NEXT: [[TMP2:%.*]] = shufflevector <8 x float> [[TMP0]], <8 x float> [[TMP1]], <8 x i32> +; SSE4-NEXT: ret <8 x float> [[TMP2]] +; +; AVX-LABEL: @buildvector_mul_addsub_ps256( +; AVX-NEXT: [[A:%.*]] = fmul <8 x float> [[C:%.*]], [[D:%.*]] +; AVX-NEXT: [[TMP0:%.*]] = fsub <8 x float> [[A]], [[B:%.*]] +; AVX-NEXT: [[TMP1:%.*]] = fadd <8 x float> [[A]], [[B]] +; AVX-NEXT: [[TMP2:%.*]] = shufflevector <8 x float> [[TMP0]], <8 x float> [[TMP1]], <8 x i32> +; AVX-NEXT: ret <8 x float> [[TMP2]] +; + %A = fmul <8 x float> %C, %D + %A0 = extractelement <8 x float> %A, i32 0 + %B0 = extractelement <8 x float> %B, i32 0 + %sub0 = fsub float %A0, %B0 + %A2 = extractelement <8 x float> %A, i32 2 + %B2 = extractelement <8 x float> %B, i32 2 + %sub2 = fsub float %A2, %B2 + %A4 = extractelement <8 x float> %A, i32 4 + %B4 = extractelement <8 x float> %B, i32 4 + %sub4 = fsub float %A4, %B4 + %A6 = extractelement <8 x float> %A, i32 6 + %B6 = extractelement <8 x float> %B, i32 6 + %sub6 = fsub float %A6, %B6 + %A1 = extractelement <8 x float> %A, i32 1 + %B1 = extractelement <8 x float> %B, i32 1 + %add1 = fadd float %A1, %B1 + %A3 = extractelement <8 x float> %A, i32 3 + %B3 = extractelement <8 x float> %B, i32 3 + %add3 = fadd float %A3, %B3 + %A5 = extractelement <8 x float> %A, i32 5 + %B5 = extractelement <8 x float> %B, i32 5 + %add5 = fadd float %A5, %B5 + %A7 = extractelement <8 x float> %A, i32 7 + %B7 = extractelement <8 x float> %B, i32 7 + %add7 = fadd float %A7, %B7 + %vecinsert1 = insertelement <8 x float> undef, float %sub0, i32 0 + %vecinsert2 = insertelement <8 x float> %vecinsert1, float %add1, i32 1 + %vecinsert3 = insertelement <8 x float> %vecinsert2, float %sub2, i32 2 + %vecinsert4 = insertelement <8 x float> %vecinsert3, float %add3, i32 3 + %vecinsert5 = insertelement <8 x float> %vecinsert4, float %sub4, i32 4 + %vecinsert6 = insertelement <8 x float> %vecinsert5, float %add5, i32 5 + %vecinsert7 = insertelement <8 x float> %vecinsert6, float %sub6, i32 6 + %vecinsert8 = insertelement <8 x float> %vecinsert7, float %add7, i32 7 + ret <8 x float> %vecinsert8 +} + +define <4 x double> @buildvector_mul_addsub_pd256(<4 x double> %C, <4 x double> %D, <4 x double> %B) #0 { +; CHECK-LABEL: @buildvector_mul_addsub_pd256( +; CHECK-NEXT: [[A:%.*]] = fmul <4 x double> [[C:%.*]], [[D:%.*]] +; CHECK-NEXT: [[TMP0:%.*]] = fsub <4 x double> [[A]], [[B:%.*]] +; CHECK-NEXT: [[TMP1:%.*]] = fadd <4 x double> [[A]], [[B]] +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x double> [[TMP0]], <4 x double> [[TMP1]], <4 x i32> +; CHECK-NEXT: ret <4 x double> [[TMP2]] +; + %A = fmul <4 x double> %C, %D + %A0 = extractelement <4 x double> %A, i32 0 + %B0 = extractelement <4 x double> %B, i32 0 + %sub0 = fsub double %A0, %B0 + %A2 = extractelement <4 x double> %A, i32 2 + %B2 = extractelement <4 x double> %B, i32 2 + %sub2 = fsub double %A2, %B2 + %A1 = extractelement <4 x double> %A, i32 1 + %B1 = extractelement <4 x double> %B, i32 1 + %add1 = fadd double %A1, %B1 + %A3 = extractelement <4 x double> %A, i32 3 + %B3 = extractelement <4 x double> %B, i32 3 + %add3 = fadd double %A3, %B3 + %vecinsert1 = insertelement <4 x double> undef, double %sub0, i32 0 + %vecinsert2 = insertelement <4 x double> %vecinsert1, double %add1, i32 1 + %vecinsert3 = insertelement <4 x double> %vecinsert2, double %sub2, i32 2 + %vecinsert4 = insertelement <4 x double> %vecinsert3, double %add3, i32 3 + ret <4 x double> %vecinsert4 +} + +define <16 x float> @buildvector_mul_addsub_ps512(<16 x float> %C, <16 x float> %D, <16 x float> %B) #0 { +; SSE-LABEL: @buildvector_mul_addsub_ps512( +; SSE-NEXT: [[A:%.*]] = fmul <16 x float> [[C:%.*]], [[D:%.*]] +; SSE-NEXT: [[TMP0:%.*]] = shufflevector <16 x float> [[A]], <16 x float> poison, <12 x i32> +; SSE-NEXT: [[TMP1:%.*]] = shufflevector <16 x float> [[B:%.*]], <16 x float> poison, <12 x i32> +; SSE-NEXT: [[TMP2:%.*]] = fsub <12 x float> [[TMP0]], [[TMP1]] +; SSE-NEXT: [[TMP3:%.*]] = fadd <12 x float> [[TMP0]], [[TMP1]] +; SSE-NEXT: [[TMP4:%.*]] = shufflevector <12 x float> [[TMP2]], <12 x float> [[TMP3]], <12 x i32> +; SSE-NEXT: [[TMP5:%.*]] = shufflevector <16 x float> [[A]], <16 x float> poison, <2 x i32> +; SSE-NEXT: [[TMP6:%.*]] = shufflevector <16 x float> [[B]], <16 x float> poison, <2 x i32> +; SSE-NEXT: [[TMP7:%.*]] = fsub <2 x float> [[TMP5]], [[TMP6]] +; SSE-NEXT: [[TMP8:%.*]] = fadd <2 x float> [[TMP5]], [[TMP6]] +; SSE-NEXT: [[TMP9:%.*]] = shufflevector <12 x float> [[TMP4]], <12 x float> , <16 x i32> +; SSE-NEXT: [[TMP10:%.*]] = shufflevector <2 x float> [[TMP7]], <2 x float> [[TMP8]], <16 x i32> +; SSE-NEXT: [[VECINSERT161:%.*]] = shufflevector <16 x float> [[TMP9]], <16 x float> [[TMP10]], <16 x i32> +; SSE-NEXT: ret <16 x float> [[VECINSERT161]] +; +; AVX-LABEL: @buildvector_mul_addsub_ps512( +; AVX-NEXT: [[A:%.*]] = fmul <16 x float> [[C:%.*]], [[D:%.*]] +; AVX-NEXT: [[TMP0:%.*]] = shufflevector <16 x float> [[A]], <16 x float> poison, <8 x i32> +; AVX-NEXT: [[TMP1:%.*]] = shufflevector <16 x float> [[B:%.*]], <16 x float> poison, <8 x i32> +; AVX-NEXT: [[TMP2:%.*]] = fsub <8 x float> [[TMP0]], [[TMP1]] +; AVX-NEXT: [[TMP3:%.*]] = fadd <8 x float> [[TMP0]], [[TMP1]] +; AVX-NEXT: [[TMP4:%.*]] = shufflevector <8 x float> [[TMP2]], <8 x float> [[TMP3]], <8 x i32> +; AVX-NEXT: [[TMP5:%.*]] = shufflevector <16 x float> [[A]], <16 x float> poison, <4 x i32> +; AVX-NEXT: [[TMP6:%.*]] = shufflevector <16 x float> [[B]], <16 x float> poison, <4 x i32> +; AVX-NEXT: [[TMP7:%.*]] = fadd <4 x float> [[TMP5]], [[TMP6]] +; AVX-NEXT: [[TMP8:%.*]] = fsub <4 x float> [[TMP5]], [[TMP6]] +; AVX-NEXT: [[TMP9:%.*]] = shufflevector <16 x float> [[A]], <16 x float> poison, <2 x i32> +; AVX-NEXT: [[TMP10:%.*]] = shufflevector <16 x float> [[B]], <16 x float> poison, <2 x i32> +; AVX-NEXT: [[TMP11:%.*]] = fsub <2 x float> [[TMP9]], [[TMP10]] +; AVX-NEXT: [[TMP12:%.*]] = fadd <2 x float> [[TMP9]], [[TMP10]] +; AVX-NEXT: [[TMP13:%.*]] = shufflevector <8 x float> [[TMP4]], <8 x float> , <16 x i32> +; AVX-NEXT: [[TMP14:%.*]] = shufflevector <4 x float> [[TMP7]], <4 x float> [[TMP8]], <16 x i32> +; AVX-NEXT: [[VECINSERT141:%.*]] = shufflevector <16 x float> [[TMP13]], <16 x float> [[TMP14]], <16 x i32> +; AVX-NEXT: [[TMP15:%.*]] = shufflevector <2 x float> [[TMP11]], <2 x float> [[TMP12]], <16 x i32> +; AVX-NEXT: [[VECINSERT162:%.*]] = shufflevector <16 x float> [[VECINSERT141]], <16 x float> [[TMP15]], <16 x i32> +; AVX-NEXT: ret <16 x float> [[VECINSERT162]] +; + %A = fmul <16 x float> %C, %D + %A0 = extractelement <16 x float> %A, i32 0 + %B0 = extractelement <16 x float> %B, i32 0 + %sub0 = fsub float %A0, %B0 + %A2 = extractelement <16 x float> %A, i32 2 + %B2 = extractelement <16 x float> %B, i32 2 + %sub2 = fsub float %A2, %B2 + %A4 = extractelement <16 x float> %A, i32 4 + %B4 = extractelement <16 x float> %B, i32 4 + %sub4 = fsub float %A4, %B4 + %A6 = extractelement <16 x float> %A, i32 6 + %B6 = extractelement <16 x float> %B, i32 6 + %sub6 = fsub float %A6, %B6 + %A8 = extractelement <16 x float> %A, i32 8 + %B8 = extractelement <16 x float> %B, i32 8 + %sub8 = fsub float %A8, %B8 + %A10 = extractelement <16 x float> %A, i32 10 + %B10 = extractelement <16 x float> %B, i32 10 + %sub10 = fsub float %A10, %B10 + %A12 = extractelement <16 x float> %A, i32 12 + %B12 = extractelement <16 x float> %B, i32 12 + %sub12 = fsub float %A12, %B12 + %A14 = extractelement <16 x float> %A, i32 14 + %B14 = extractelement <16 x float> %B, i32 14 + %sub14 = fsub float %A14, %B14 + %A1 = extractelement <16 x float> %A, i32 1 + %B1 = extractelement <16 x float> %B, i32 1 + %add1 = fadd float %A1, %B1 + %A3 = extractelement <16 x float> %A, i32 3 + %B3 = extractelement <16 x float> %B, i32 3 + %add3 = fadd float %A3, %B3 + %A5 = extractelement <16 x float> %A, i32 5 + %B5 = extractelement <16 x float> %B, i32 5 + %add5 = fadd float %A5, %B5 + %A7 = extractelement <16 x float> %A, i32 7 + %B7 = extractelement <16 x float> %B, i32 7 + %add7 = fadd float %A7, %B7 + %A9 = extractelement <16 x float> %A, i32 9 + %B9 = extractelement <16 x float> %B, i32 9 + %add9 = fadd float %A9, %B9 + %A11 = extractelement <16 x float> %A, i32 11 + %B11 = extractelement <16 x float> %B, i32 11 + %add11 = fadd float %A11, %B11 + %A13 = extractelement <16 x float> %A, i32 13 + %B13 = extractelement <16 x float> %B, i32 13 + %add13 = fadd float %A13, %B13 + %A15 = extractelement <16 x float> %A, i32 15 + %B15 = extractelement <16 x float> %B, i32 15 + %add15 = fadd float %A15, %B15 + %vecinsert1 = insertelement <16 x float> undef, float %sub0, i32 0 + %vecinsert2 = insertelement <16 x float> %vecinsert1, float %add1, i32 1 + %vecinsert3 = insertelement <16 x float> %vecinsert2, float %sub2, i32 2 + %vecinsert4 = insertelement <16 x float> %vecinsert3, float %add3, i32 3 + %vecinsert5 = insertelement <16 x float> %vecinsert4, float %sub4, i32 4 + ; element 5 is undef + %vecinsert7 = insertelement <16 x float> %vecinsert5, float %sub6, i32 6 + %vecinsert8 = insertelement <16 x float> %vecinsert7, float %add7, i32 7 + %vecinsert9 = insertelement <16 x float> %vecinsert8, float %sub8, i32 8 + %vecinsert10 = insertelement <16 x float> %vecinsert9, float %add9, i32 9 + %vecinsert11 = insertelement <16 x float> %vecinsert10, float %sub10, i32 10 + %vecinsert12 = insertelement <16 x float> %vecinsert11, float %add11, i32 11 + ; element 12 is undef + %vecinsert14 = insertelement <16 x float> %vecinsert12, float %add13, i32 13 + %vecinsert15 = insertelement <16 x float> %vecinsert14, float %sub14, i32 14 + %vecinsert16 = insertelement <16 x float> %vecinsert15, float %add15, i32 15 + ret <16 x float> %vecinsert16 +} + +define <8 x double> @buildvector_mul_addsub_pd512(<8 x double> %C, <8 x double> %D, <8 x double> %B) #0 { +; SSE-LABEL: @buildvector_mul_addsub_pd512( +; SSE-NEXT: [[A:%.*]] = fmul <8 x double> [[C:%.*]], [[D:%.*]] +; SSE-NEXT: [[TMP0:%.*]] = fsub <8 x double> [[A]], [[B:%.*]] +; SSE-NEXT: [[TMP1:%.*]] = shufflevector <8 x double> [[TMP0]], <8 x double> poison, <4 x i32> +; SSE-NEXT: [[TMP2:%.*]] = fadd <8 x double> [[A]], [[B]] +; SSE-NEXT: [[TMP3:%.*]] = shufflevector <8 x double> [[TMP2]], <8 x double> poison, <2 x i32> +; SSE-NEXT: [[TMP4:%.*]] = shufflevector <4 x double> [[TMP1]], <4 x double> poison, <6 x i32> +; SSE-NEXT: [[TMP5:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> poison, <6 x i32> +; SSE-NEXT: [[TMP6:%.*]] = shufflevector <6 x double> [[TMP4]], <6 x double> [[TMP5]], <6 x i32> +; SSE-NEXT: [[A7:%.*]] = extractelement <8 x double> [[A]], i64 7 +; SSE-NEXT: [[B7:%.*]] = extractelement <8 x double> [[B]], i64 7 +; SSE-NEXT: [[ADD7:%.*]] = fadd double [[A7]], [[B7]] +; SSE-NEXT: [[TMP7:%.*]] = shufflevector <6 x double> [[TMP6]], <6 x double> , <8 x i32> +; SSE-NEXT: [[VECINSERT8:%.*]] = insertelement <8 x double> [[TMP7]], double [[ADD7]], i64 7 +; SSE-NEXT: ret <8 x double> [[VECINSERT8]] +; +; AVX_FMA4-LABEL: @buildvector_mul_addsub_pd512( +; AVX_FMA4-NEXT: [[A:%.*]] = fmul <8 x double> [[C:%.*]], [[D:%.*]] +; AVX_FMA4-NEXT: [[TMP0:%.*]] = shufflevector <8 x double> [[A]], <8 x double> poison, <4 x i32> +; AVX_FMA4-NEXT: [[TMP1:%.*]] = shufflevector <8 x double> [[B:%.*]], <8 x double> poison, <4 x i32> +; AVX_FMA4-NEXT: [[TMP2:%.*]] = fsub <4 x double> [[TMP0]], [[TMP1]] +; AVX_FMA4-NEXT: [[TMP3:%.*]] = fadd <4 x double> [[TMP0]], [[TMP1]] +; AVX_FMA4-NEXT: [[TMP4:%.*]] = shufflevector <4 x double> [[TMP2]], <4 x double> [[TMP3]], <4 x i32> +; AVX_FMA4-NEXT: [[A7:%.*]] = extractelement <8 x double> [[A]], i64 7 +; AVX_FMA4-NEXT: [[B7:%.*]] = extractelement <8 x double> [[B]], i64 7 +; AVX_FMA4-NEXT: [[ADD7:%.*]] = fadd double [[A7]], [[B7]] +; AVX_FMA4-NEXT: [[TMP5:%.*]] = shufflevector <4 x double> [[TMP4]], <4 x double> , <8 x i32> +; AVX_FMA4-NEXT: [[TMP6:%.*]] = fsub <8 x double> [[A]], [[B]] +; AVX_FMA4-NEXT: [[TMP7:%.*]] = shufflevector <8 x double> [[TMP6]], <8 x double> poison, <8 x i32> +; AVX_FMA4-NEXT: [[VECINSERT71:%.*]] = shufflevector <8 x double> [[TMP5]], <8 x double> [[TMP7]], <8 x i32> +; AVX_FMA4-NEXT: [[VECINSERT8:%.*]] = insertelement <8 x double> [[VECINSERT71]], double [[ADD7]], i64 7 +; AVX_FMA4-NEXT: ret <8 x double> [[VECINSERT8]] +; +; AVX_FMA3-LABEL: @buildvector_mul_addsub_pd512( +; AVX_FMA3-NEXT: [[A:%.*]] = fmul <8 x double> [[C:%.*]], [[D:%.*]] +; AVX_FMA3-NEXT: [[TMP0:%.*]] = shufflevector <8 x double> [[A]], <8 x double> poison, <4 x i32> +; AVX_FMA3-NEXT: [[TMP1:%.*]] = shufflevector <8 x double> [[B:%.*]], <8 x double> poison, <4 x i32> +; AVX_FMA3-NEXT: [[TMP2:%.*]] = fsub <4 x double> [[TMP0]], [[TMP1]] +; AVX_FMA3-NEXT: [[TMP3:%.*]] = fadd <4 x double> [[TMP0]], [[TMP1]] +; AVX_FMA3-NEXT: [[TMP4:%.*]] = shufflevector <4 x double> [[TMP2]], <4 x double> [[TMP3]], <4 x i32> +; AVX_FMA3-NEXT: [[TMP5:%.*]] = fadd <8 x double> [[A]], [[B]] +; AVX_FMA3-NEXT: [[TMP6:%.*]] = shufflevector <4 x double> [[TMP4]], <4 x double> , <8 x i32> +; AVX_FMA3-NEXT: [[TMP7:%.*]] = fsub <8 x double> [[A]], [[B]] +; AVX_FMA3-NEXT: [[TMP8:%.*]] = shufflevector <8 x double> [[TMP7]], <8 x double> poison, <8 x i32> +; AVX_FMA3-NEXT: [[VECINSERT71:%.*]] = shufflevector <8 x double> [[TMP6]], <8 x double> [[TMP8]], <8 x i32> +; AVX_FMA3-NEXT: [[VECINSERT8:%.*]] = shufflevector <8 x double> [[VECINSERT71]], <8 x double> [[TMP5]], <8 x i32> +; AVX_FMA3-NEXT: ret <8 x double> [[VECINSERT8]] +; +; AVX512-LABEL: @buildvector_mul_addsub_pd512( +; AVX512-NEXT: [[A:%.*]] = fmul <8 x double> [[C:%.*]], [[D:%.*]] +; AVX512-NEXT: [[TMP0:%.*]] = shufflevector <8 x double> [[A]], <8 x double> poison, <4 x i32> +; AVX512-NEXT: [[TMP1:%.*]] = shufflevector <8 x double> [[B:%.*]], <8 x double> poison, <4 x i32> +; AVX512-NEXT: [[TMP2:%.*]] = fsub <4 x double> [[TMP0]], [[TMP1]] +; AVX512-NEXT: [[TMP3:%.*]] = fadd <4 x double> [[TMP0]], [[TMP1]] +; AVX512-NEXT: [[TMP4:%.*]] = shufflevector <4 x double> [[TMP2]], <4 x double> [[TMP3]], <4 x i32> +; AVX512-NEXT: [[TMP5:%.*]] = fadd <8 x double> [[A]], [[B]] +; AVX512-NEXT: [[TMP6:%.*]] = shufflevector <4 x double> [[TMP4]], <4 x double> , <8 x i32> +; AVX512-NEXT: [[TMP7:%.*]] = fsub <8 x double> [[A]], [[B]] +; AVX512-NEXT: [[TMP8:%.*]] = shufflevector <8 x double> [[TMP7]], <8 x double> poison, <8 x i32> +; AVX512-NEXT: [[VECINSERT71:%.*]] = shufflevector <8 x double> [[TMP6]], <8 x double> [[TMP8]], <8 x i32> +; AVX512-NEXT: [[VECINSERT8:%.*]] = shufflevector <8 x double> [[VECINSERT71]], <8 x double> [[TMP5]], <8 x i32> +; AVX512-NEXT: ret <8 x double> [[VECINSERT8]] +; + %A = fmul <8 x double> %C, %D + %A0 = extractelement <8 x double> %A, i32 0 + %B0 = extractelement <8 x double> %B, i32 0 + %sub0 = fsub double %A0, %B0 + %A2 = extractelement <8 x double> %A, i32 2 + %B2 = extractelement <8 x double> %B, i32 2 + %sub2 = fsub double %A2, %B2 + %A4 = extractelement <8 x double> %A, i32 4 + %B4 = extractelement <8 x double> %B, i32 4 + %sub4 = fsub double %A4, %B4 + %A6 = extractelement <8 x double> %A, i32 6 + %B6 = extractelement <8 x double> %B, i32 6 + %sub6 = fsub double %A6, %B6 + %A1 = extractelement <8 x double> %A, i32 1 + %B1 = extractelement <8 x double> %B, i32 1 + %add1 = fadd double %A1, %B1 + %A3 = extractelement <8 x double> %A, i32 3 + %B3 = extractelement <8 x double> %B, i32 3 + %add3 = fadd double %A3, %B3 + %A7 = extractelement <8 x double> %A, i32 7 + %B7 = extractelement <8 x double> %B, i32 7 + %add7 = fadd double %A7, %B7 + %vecinsert1 = insertelement <8 x double> undef, double %sub0, i32 0 + %vecinsert2 = insertelement <8 x double> %vecinsert1, double %add1, i32 1 + %vecinsert3 = insertelement <8 x double> %vecinsert2, double %sub2, i32 2 + %vecinsert4 = insertelement <8 x double> %vecinsert3, double %add3, i32 3 + %vecinsert5 = insertelement <8 x double> %vecinsert4, double %sub4, i32 4 + ; element 5 is undef + %vecinsert7 = insertelement <8 x double> %vecinsert5, double %sub6, i32 6 + %vecinsert8 = insertelement <8 x double> %vecinsert7, double %add7, i32 7 + ret <8 x double> %vecinsert8 +} + +define <4 x float> @buildvector_mul_subadd_ps128(<4 x float> %C, <4 x float> %D, <4 x float> %B) #0 { +; CHECK-LABEL: @buildvector_mul_subadd_ps128( +; CHECK-NEXT: [[A:%.*]] = fmul <4 x float> [[C:%.*]], [[D:%.*]] +; CHECK-NEXT: [[TMP0:%.*]] = fadd <4 x float> [[A]], [[B:%.*]] +; CHECK-NEXT: [[TMP1:%.*]] = fsub <4 x float> [[A]], [[B]] +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> [[TMP1]], <4 x i32> +; CHECK-NEXT: ret <4 x float> [[TMP2]] +; + %A = fmul <4 x float> %C, %D + %A0 = extractelement <4 x float> %A, i32 0 + %B0 = extractelement <4 x float> %B, i32 0 + %sub0 = fadd float %A0, %B0 + %A2 = extractelement <4 x float> %A, i32 2 + %B2 = extractelement <4 x float> %B, i32 2 + %sub2 = fadd float %A2, %B2 + %A1 = extractelement <4 x float> %A, i32 1 + %B1 = extractelement <4 x float> %B, i32 1 + %add1 = fsub float %A1, %B1 + %A3 = extractelement <4 x float> %A, i32 3 + %B3 = extractelement <4 x float> %B, i32 3 + %add3 = fsub float %A3, %B3 + %vecinsert1 = insertelement <4 x float> undef, float %sub0, i32 0 + %vecinsert2 = insertelement <4 x float> %vecinsert1, float %add1, i32 1 + %vecinsert3 = insertelement <4 x float> %vecinsert2, float %sub2, i32 2 + %vecinsert4 = insertelement <4 x float> %vecinsert3, float %add3, i32 3 + ret <4 x float> %vecinsert4 +} + +define <2 x double> @buildvector_mul_subadd_pd128(<2 x double> %C, <2 x double> %D, <2 x double> %B) #0 { +; CHECK-LABEL: @buildvector_mul_subadd_pd128( +; CHECK-NEXT: [[A:%.*]] = fmul <2 x double> [[C:%.*]], [[D:%.*]] +; CHECK-NEXT: [[TMP0:%.*]] = fadd <2 x double> [[A]], [[B:%.*]] +; CHECK-NEXT: [[TMP1:%.*]] = fsub <2 x double> [[A]], [[B]] +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x double> [[TMP0]], <2 x double> [[TMP1]], <2 x i32> +; CHECK-NEXT: ret <2 x double> [[TMP2]] +; + %A = fmul <2 x double> %C, %D + %A0 = extractelement <2 x double> %A, i32 0 + %B0 = extractelement <2 x double> %B, i32 0 + %sub0 = fadd double %A0, %B0 + %A1 = extractelement <2 x double> %A, i32 1 + %B1 = extractelement <2 x double> %B, i32 1 + %add1 = fsub double %A1, %B1 + %vecinsert1 = insertelement <2 x double> undef, double %sub0, i32 0 + %vecinsert2 = insertelement <2 x double> %vecinsert1, double %add1, i32 1 + ret <2 x double> %vecinsert2 +} + +define <8 x float> @buildvector_mul_subadd_ps256(<8 x float> %C, <8 x float> %D, <8 x float> %B) #0 { +; SSE2-LABEL: @buildvector_mul_subadd_ps256( +; SSE2-NEXT: [[A:%.*]] = fmul <8 x float> [[C:%.*]], [[D:%.*]] +; SSE2-NEXT: [[TMP0:%.*]] = fadd <8 x float> [[A]], [[B:%.*]] +; SSE2-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> [[TMP0]], <8 x float> poison, <4 x i32> +; SSE2-NEXT: [[TMP2:%.*]] = fsub <8 x float> [[A]], [[B]] +; SSE2-NEXT: [[TMP3:%.*]] = shufflevector <8 x float> [[TMP2]], <8 x float> poison, <4 x i32> +; SSE2-NEXT: [[TMP4:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> [[TMP3]], <8 x i32> +; SSE2-NEXT: ret <8 x float> [[TMP4]] +; +; SSE4-LABEL: @buildvector_mul_subadd_ps256( +; SSE4-NEXT: [[A:%.*]] = fmul <8 x float> [[C:%.*]], [[D:%.*]] +; SSE4-NEXT: [[TMP0:%.*]] = fsub <8 x float> [[A]], [[B:%.*]] +; SSE4-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> [[TMP0]], <8 x float> poison, <4 x i32> +; SSE4-NEXT: [[TMP2:%.*]] = fadd <8 x float> [[A]], [[B]] +; SSE4-NEXT: [[TMP3:%.*]] = shufflevector <8 x float> [[TMP2]], <8 x float> poison, <8 x i32> +; SSE4-NEXT: [[TMP4:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <8 x i32> +; SSE4-NEXT: [[TMP5:%.*]] = shufflevector <8 x float> [[TMP3]], <8 x float> [[TMP4]], <8 x i32> +; SSE4-NEXT: [[TMP6:%.*]] = shufflevector <8 x float> [[TMP5]], <8 x float> poison, <8 x i32> +; SSE4-NEXT: ret <8 x float> [[TMP6]] +; +; AVX_FMA4-LABEL: @buildvector_mul_subadd_ps256( +; AVX_FMA4-NEXT: [[A:%.*]] = fmul <8 x float> [[C:%.*]], [[D:%.*]] +; AVX_FMA4-NEXT: [[TMP0:%.*]] = fsub <8 x float> [[A]], [[B:%.*]] +; AVX_FMA4-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> [[TMP0]], <8 x float> poison, <4 x i32> +; AVX_FMA4-NEXT: [[TMP2:%.*]] = fadd <8 x float> [[A]], [[B]] +; AVX_FMA4-NEXT: [[TMP3:%.*]] = shufflevector <8 x float> [[TMP2]], <8 x float> poison, <8 x i32> +; AVX_FMA4-NEXT: [[TMP4:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <8 x i32> +; AVX_FMA4-NEXT: [[TMP5:%.*]] = shufflevector <8 x float> [[TMP3]], <8 x float> [[TMP4]], <8 x i32> +; AVX_FMA4-NEXT: [[TMP6:%.*]] = shufflevector <8 x float> [[TMP5]], <8 x float> poison, <8 x i32> +; AVX_FMA4-NEXT: ret <8 x float> [[TMP6]] +; +; AVX_FMA3-LABEL: @buildvector_mul_subadd_ps256( +; AVX_FMA3-NEXT: [[A:%.*]] = fmul <8 x float> [[C:%.*]], [[D:%.*]] +; AVX_FMA3-NEXT: [[TMP0:%.*]] = fadd <8 x float> [[A]], [[B:%.*]] +; AVX_FMA3-NEXT: [[TMP1:%.*]] = fsub <8 x float> [[A]], [[B]] +; AVX_FMA3-NEXT: [[TMP2:%.*]] = shufflevector <8 x float> [[TMP0]], <8 x float> [[TMP1]], <8 x i32> +; AVX_FMA3-NEXT: ret <8 x float> [[TMP2]] +; +; AVX512-LABEL: @buildvector_mul_subadd_ps256( +; AVX512-NEXT: [[A:%.*]] = fmul <8 x float> [[C:%.*]], [[D:%.*]] +; AVX512-NEXT: [[TMP0:%.*]] = fadd <8 x float> [[A]], [[B:%.*]] +; AVX512-NEXT: [[TMP1:%.*]] = fsub <8 x float> [[A]], [[B]] +; AVX512-NEXT: [[TMP2:%.*]] = shufflevector <8 x float> [[TMP0]], <8 x float> [[TMP1]], <8 x i32> +; AVX512-NEXT: ret <8 x float> [[TMP2]] +; + %A = fmul <8 x float> %C, %D + %A0 = extractelement <8 x float> %A, i32 0 + %B0 = extractelement <8 x float> %B, i32 0 + %sub0 = fadd float %A0, %B0 + %A2 = extractelement <8 x float> %A, i32 2 + %B2 = extractelement <8 x float> %B, i32 2 + %sub2 = fadd float %A2, %B2 + %A4 = extractelement <8 x float> %A, i32 4 + %B4 = extractelement <8 x float> %B, i32 4 + %sub4 = fadd float %A4, %B4 + %A6 = extractelement <8 x float> %A, i32 6 + %B6 = extractelement <8 x float> %B, i32 6 + %sub6 = fadd float %A6, %B6 + %A1 = extractelement <8 x float> %A, i32 1 + %B1 = extractelement <8 x float> %B, i32 1 + %add1 = fsub float %A1, %B1 + %A3 = extractelement <8 x float> %A, i32 3 + %B3 = extractelement <8 x float> %B, i32 3 + %add3 = fsub float %A3, %B3 + %A5 = extractelement <8 x float> %A, i32 5 + %B5 = extractelement <8 x float> %B, i32 5 + %add5 = fsub float %A5, %B5 + %A7 = extractelement <8 x float> %A, i32 7 + %B7 = extractelement <8 x float> %B, i32 7 + %add7 = fsub float %A7, %B7 + %vecinsert1 = insertelement <8 x float> undef, float %sub0, i32 0 + %vecinsert2 = insertelement <8 x float> %vecinsert1, float %add1, i32 1 + %vecinsert3 = insertelement <8 x float> %vecinsert2, float %sub2, i32 2 + %vecinsert4 = insertelement <8 x float> %vecinsert3, float %add3, i32 3 + %vecinsert5 = insertelement <8 x float> %vecinsert4, float %sub4, i32 4 + %vecinsert6 = insertelement <8 x float> %vecinsert5, float %add5, i32 5 + %vecinsert7 = insertelement <8 x float> %vecinsert6, float %sub6, i32 6 + %vecinsert8 = insertelement <8 x float> %vecinsert7, float %add7, i32 7 + ret <8 x float> %vecinsert8 +} + +define <4 x double> @buildvector_mul_subadd_pd256(<4 x double> %C, <4 x double> %D, <4 x double> %B) #0 { +; CHECK-LABEL: @buildvector_mul_subadd_pd256( +; CHECK-NEXT: [[A:%.*]] = fmul <4 x double> [[C:%.*]], [[D:%.*]] +; CHECK-NEXT: [[TMP0:%.*]] = fadd <4 x double> [[A]], [[B:%.*]] +; CHECK-NEXT: [[TMP1:%.*]] = fsub <4 x double> [[A]], [[B]] +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x double> [[TMP0]], <4 x double> [[TMP1]], <4 x i32> +; CHECK-NEXT: ret <4 x double> [[TMP2]] +; + %A = fmul <4 x double> %C, %D + %A0 = extractelement <4 x double> %A, i32 0 + %B0 = extractelement <4 x double> %B, i32 0 + %sub0 = fadd double %A0, %B0 + %A2 = extractelement <4 x double> %A, i32 2 + %B2 = extractelement <4 x double> %B, i32 2 + %sub2 = fadd double %A2, %B2 + %A1 = extractelement <4 x double> %A, i32 1 + %B1 = extractelement <4 x double> %B, i32 1 + %add1 = fsub double %A1, %B1 + %A3 = extractelement <4 x double> %A, i32 3 + %B3 = extractelement <4 x double> %B, i32 3 + %add3 = fsub double %A3, %B3 + %vecinsert1 = insertelement <4 x double> undef, double %sub0, i32 0 + %vecinsert2 = insertelement <4 x double> %vecinsert1, double %add1, i32 1 + %vecinsert3 = insertelement <4 x double> %vecinsert2, double %sub2, i32 2 + %vecinsert4 = insertelement <4 x double> %vecinsert3, double %add3, i32 3 + ret <4 x double> %vecinsert4 +} + +define <16 x float> @buildvector_mul_subadd_ps512(<16 x float> %C, <16 x float> %D, <16 x float> %B) #0 { +; SSE-LABEL: @buildvector_mul_subadd_ps512( +; SSE-NEXT: [[A:%.*]] = fmul <16 x float> [[C:%.*]], [[D:%.*]] +; SSE-NEXT: [[TMP0:%.*]] = shufflevector <16 x float> [[A]], <16 x float> poison, <12 x i32> +; SSE-NEXT: [[TMP1:%.*]] = shufflevector <16 x float> [[B:%.*]], <16 x float> poison, <12 x i32> +; SSE-NEXT: [[TMP2:%.*]] = fadd <12 x float> [[TMP0]], [[TMP1]] +; SSE-NEXT: [[TMP3:%.*]] = fsub <12 x float> [[TMP0]], [[TMP1]] +; SSE-NEXT: [[TMP4:%.*]] = shufflevector <12 x float> [[TMP2]], <12 x float> [[TMP3]], <12 x i32> +; SSE-NEXT: [[TMP5:%.*]] = shufflevector <16 x float> [[A]], <16 x float> poison, <2 x i32> +; SSE-NEXT: [[TMP6:%.*]] = shufflevector <16 x float> [[B]], <16 x float> poison, <2 x i32> +; SSE-NEXT: [[TMP7:%.*]] = fadd <2 x float> [[TMP5]], [[TMP6]] +; SSE-NEXT: [[TMP8:%.*]] = fsub <2 x float> [[TMP5]], [[TMP6]] +; SSE-NEXT: [[TMP9:%.*]] = shufflevector <12 x float> [[TMP4]], <12 x float> , <16 x i32> +; SSE-NEXT: [[TMP10:%.*]] = shufflevector <2 x float> [[TMP7]], <2 x float> [[TMP8]], <16 x i32> +; SSE-NEXT: [[VECINSERT161:%.*]] = shufflevector <16 x float> [[TMP9]], <16 x float> [[TMP10]], <16 x i32> +; SSE-NEXT: ret <16 x float> [[VECINSERT161]] +; +; AVX-LABEL: @buildvector_mul_subadd_ps512( +; AVX-NEXT: [[A:%.*]] = fmul <16 x float> [[C:%.*]], [[D:%.*]] +; AVX-NEXT: [[TMP0:%.*]] = shufflevector <16 x float> [[A]], <16 x float> poison, <8 x i32> +; AVX-NEXT: [[TMP1:%.*]] = shufflevector <16 x float> [[B:%.*]], <16 x float> poison, <8 x i32> +; AVX-NEXT: [[TMP2:%.*]] = fadd <8 x float> [[TMP0]], [[TMP1]] +; AVX-NEXT: [[TMP3:%.*]] = fsub <8 x float> [[TMP0]], [[TMP1]] +; AVX-NEXT: [[TMP4:%.*]] = shufflevector <8 x float> [[TMP2]], <8 x float> [[TMP3]], <8 x i32> +; AVX-NEXT: [[TMP5:%.*]] = shufflevector <16 x float> [[A]], <16 x float> poison, <4 x i32> +; AVX-NEXT: [[TMP6:%.*]] = shufflevector <16 x float> [[B]], <16 x float> poison, <4 x i32> +; AVX-NEXT: [[TMP7:%.*]] = fsub <4 x float> [[TMP5]], [[TMP6]] +; AVX-NEXT: [[TMP8:%.*]] = fadd <4 x float> [[TMP5]], [[TMP6]] +; AVX-NEXT: [[TMP9:%.*]] = shufflevector <16 x float> [[A]], <16 x float> poison, <2 x i32> +; AVX-NEXT: [[TMP10:%.*]] = shufflevector <16 x float> [[B]], <16 x float> poison, <2 x i32> +; AVX-NEXT: [[TMP11:%.*]] = fadd <2 x float> [[TMP9]], [[TMP10]] +; AVX-NEXT: [[TMP12:%.*]] = fsub <2 x float> [[TMP9]], [[TMP10]] +; AVX-NEXT: [[TMP13:%.*]] = shufflevector <8 x float> [[TMP4]], <8 x float> , <16 x i32> +; AVX-NEXT: [[TMP14:%.*]] = shufflevector <4 x float> [[TMP7]], <4 x float> [[TMP8]], <16 x i32> +; AVX-NEXT: [[VECINSERT141:%.*]] = shufflevector <16 x float> [[TMP13]], <16 x float> [[TMP14]], <16 x i32> +; AVX-NEXT: [[TMP15:%.*]] = shufflevector <2 x float> [[TMP11]], <2 x float> [[TMP12]], <16 x i32> +; AVX-NEXT: [[VECINSERT162:%.*]] = shufflevector <16 x float> [[VECINSERT141]], <16 x float> [[TMP15]], <16 x i32> +; AVX-NEXT: ret <16 x float> [[VECINSERT162]] +; + %A = fmul <16 x float> %C, %D + %A0 = extractelement <16 x float> %A, i32 0 + %B0 = extractelement <16 x float> %B, i32 0 + %sub0 = fadd float %A0, %B0 + %A2 = extractelement <16 x float> %A, i32 2 + %B2 = extractelement <16 x float> %B, i32 2 + %sub2 = fadd float %A2, %B2 + %A4 = extractelement <16 x float> %A, i32 4 + %B4 = extractelement <16 x float> %B, i32 4 + %sub4 = fadd float %A4, %B4 + %A6 = extractelement <16 x float> %A, i32 6 + %B6 = extractelement <16 x float> %B, i32 6 + %sub6 = fadd float %A6, %B6 + %A8 = extractelement <16 x float> %A, i32 8 + %B8 = extractelement <16 x float> %B, i32 8 + %sub8 = fadd float %A8, %B8 + %A10 = extractelement <16 x float> %A, i32 10 + %B10 = extractelement <16 x float> %B, i32 10 + %sub10 = fadd float %A10, %B10 + %A12 = extractelement <16 x float> %A, i32 12 + %B12 = extractelement <16 x float> %B, i32 12 + %sub12 = fadd float %A12, %B12 + %A14 = extractelement <16 x float> %A, i32 14 + %B14 = extractelement <16 x float> %B, i32 14 + %sub14 = fadd float %A14, %B14 + %A1 = extractelement <16 x float> %A, i32 1 + %B1 = extractelement <16 x float> %B, i32 1 + %add1 = fsub float %A1, %B1 + %A3 = extractelement <16 x float> %A, i32 3 + %B3 = extractelement <16 x float> %B, i32 3 + %add3 = fsub float %A3, %B3 + %A5 = extractelement <16 x float> %A, i32 5 + %B5 = extractelement <16 x float> %B, i32 5 + %add5 = fsub float %A5, %B5 + %A7 = extractelement <16 x float> %A, i32 7 + %B7 = extractelement <16 x float> %B, i32 7 + %add7 = fsub float %A7, %B7 + %A9 = extractelement <16 x float> %A, i32 9 + %B9 = extractelement <16 x float> %B, i32 9 + %add9 = fsub float %A9, %B9 + %A11 = extractelement <16 x float> %A, i32 11 + %B11 = extractelement <16 x float> %B, i32 11 + %add11 = fsub float %A11, %B11 + %A13 = extractelement <16 x float> %A, i32 13 + %B13 = extractelement <16 x float> %B, i32 13 + %add13 = fsub float %A13, %B13 + %A15 = extractelement <16 x float> %A, i32 15 + %B15 = extractelement <16 x float> %B, i32 15 + %add15 = fsub float %A15, %B15 + %vecinsert1 = insertelement <16 x float> undef, float %sub0, i32 0 + %vecinsert2 = insertelement <16 x float> %vecinsert1, float %add1, i32 1 + %vecinsert3 = insertelement <16 x float> %vecinsert2, float %sub2, i32 2 + %vecinsert4 = insertelement <16 x float> %vecinsert3, float %add3, i32 3 + %vecinsert5 = insertelement <16 x float> %vecinsert4, float %sub4, i32 4 + ; element 5 is undef + %vecinsert7 = insertelement <16 x float> %vecinsert5, float %sub6, i32 6 + %vecinsert8 = insertelement <16 x float> %vecinsert7, float %add7, i32 7 + %vecinsert9 = insertelement <16 x float> %vecinsert8, float %sub8, i32 8 + %vecinsert10 = insertelement <16 x float> %vecinsert9, float %add9, i32 9 + %vecinsert11 = insertelement <16 x float> %vecinsert10, float %sub10, i32 10 + %vecinsert12 = insertelement <16 x float> %vecinsert11, float %add11, i32 11 + ; element 12 is undef + %vecinsert14 = insertelement <16 x float> %vecinsert12, float %add13, i32 13 + %vecinsert15 = insertelement <16 x float> %vecinsert14, float %sub14, i32 14 + %vecinsert16 = insertelement <16 x float> %vecinsert15, float %add15, i32 15 + ret <16 x float> %vecinsert16 +} + +define <8 x double> @buildvector_mul_subadd_pd512(<8 x double> %C, <8 x double> %D, <8 x double> %B) #0 { +; SSE-LABEL: @buildvector_mul_subadd_pd512( +; SSE-NEXT: [[A:%.*]] = fmul <8 x double> [[C:%.*]], [[D:%.*]] +; SSE-NEXT: [[TMP0:%.*]] = fadd <8 x double> [[A]], [[B:%.*]] +; SSE-NEXT: [[TMP1:%.*]] = shufflevector <8 x double> [[TMP0]], <8 x double> poison, <4 x i32> +; SSE-NEXT: [[TMP2:%.*]] = fsub <8 x double> [[A]], [[B]] +; SSE-NEXT: [[TMP3:%.*]] = shufflevector <8 x double> [[TMP2]], <8 x double> poison, <2 x i32> +; SSE-NEXT: [[TMP4:%.*]] = shufflevector <4 x double> [[TMP1]], <4 x double> poison, <6 x i32> +; SSE-NEXT: [[TMP5:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> poison, <6 x i32> +; SSE-NEXT: [[TMP6:%.*]] = shufflevector <6 x double> [[TMP4]], <6 x double> [[TMP5]], <6 x i32> +; SSE-NEXT: [[A7:%.*]] = extractelement <8 x double> [[A]], i64 7 +; SSE-NEXT: [[B7:%.*]] = extractelement <8 x double> [[B]], i64 7 +; SSE-NEXT: [[ADD7:%.*]] = fsub double [[A7]], [[B7]] +; SSE-NEXT: [[TMP7:%.*]] = shufflevector <6 x double> [[TMP6]], <6 x double> , <8 x i32> +; SSE-NEXT: [[VECINSERT8:%.*]] = insertelement <8 x double> [[TMP7]], double [[ADD7]], i64 7 +; SSE-NEXT: ret <8 x double> [[VECINSERT8]] +; +; AVX_FMA4-LABEL: @buildvector_mul_subadd_pd512( +; AVX_FMA4-NEXT: [[A:%.*]] = fmul <8 x double> [[C:%.*]], [[D:%.*]] +; AVX_FMA4-NEXT: [[TMP0:%.*]] = shufflevector <8 x double> [[A]], <8 x double> poison, <4 x i32> +; AVX_FMA4-NEXT: [[TMP1:%.*]] = shufflevector <8 x double> [[B:%.*]], <8 x double> poison, <4 x i32> +; AVX_FMA4-NEXT: [[TMP2:%.*]] = fadd <4 x double> [[TMP0]], [[TMP1]] +; AVX_FMA4-NEXT: [[TMP3:%.*]] = fsub <4 x double> [[TMP0]], [[TMP1]] +; AVX_FMA4-NEXT: [[TMP4:%.*]] = shufflevector <4 x double> [[TMP2]], <4 x double> [[TMP3]], <4 x i32> +; AVX_FMA4-NEXT: [[A7:%.*]] = extractelement <8 x double> [[A]], i64 7 +; AVX_FMA4-NEXT: [[B7:%.*]] = extractelement <8 x double> [[B]], i64 7 +; AVX_FMA4-NEXT: [[ADD7:%.*]] = fsub double [[A7]], [[B7]] +; AVX_FMA4-NEXT: [[TMP5:%.*]] = shufflevector <4 x double> [[TMP4]], <4 x double> , <8 x i32> +; AVX_FMA4-NEXT: [[TMP6:%.*]] = fadd <8 x double> [[A]], [[B]] +; AVX_FMA4-NEXT: [[TMP7:%.*]] = shufflevector <8 x double> [[TMP6]], <8 x double> poison, <8 x i32> +; AVX_FMA4-NEXT: [[VECINSERT71:%.*]] = shufflevector <8 x double> [[TMP5]], <8 x double> [[TMP7]], <8 x i32> +; AVX_FMA4-NEXT: [[VECINSERT8:%.*]] = insertelement <8 x double> [[VECINSERT71]], double [[ADD7]], i64 7 +; AVX_FMA4-NEXT: ret <8 x double> [[VECINSERT8]] +; +; AVX_FMA3-LABEL: @buildvector_mul_subadd_pd512( +; AVX_FMA3-NEXT: [[A:%.*]] = fmul <8 x double> [[C:%.*]], [[D:%.*]] +; AVX_FMA3-NEXT: [[TMP0:%.*]] = shufflevector <8 x double> [[A]], <8 x double> poison, <4 x i32> +; AVX_FMA3-NEXT: [[TMP1:%.*]] = shufflevector <8 x double> [[B:%.*]], <8 x double> poison, <4 x i32> +; AVX_FMA3-NEXT: [[TMP2:%.*]] = fadd <4 x double> [[TMP0]], [[TMP1]] +; AVX_FMA3-NEXT: [[TMP3:%.*]] = fsub <4 x double> [[TMP0]], [[TMP1]] +; AVX_FMA3-NEXT: [[TMP4:%.*]] = shufflevector <4 x double> [[TMP2]], <4 x double> [[TMP3]], <4 x i32> +; AVX_FMA3-NEXT: [[TMP5:%.*]] = fsub <8 x double> [[A]], [[B]] +; AVX_FMA3-NEXT: [[TMP6:%.*]] = shufflevector <4 x double> [[TMP4]], <4 x double> , <8 x i32> +; AVX_FMA3-NEXT: [[TMP7:%.*]] = fadd <8 x double> [[A]], [[B]] +; AVX_FMA3-NEXT: [[TMP8:%.*]] = shufflevector <8 x double> [[TMP7]], <8 x double> poison, <8 x i32> +; AVX_FMA3-NEXT: [[VECINSERT71:%.*]] = shufflevector <8 x double> [[TMP6]], <8 x double> [[TMP8]], <8 x i32> +; AVX_FMA3-NEXT: [[VECINSERT8:%.*]] = shufflevector <8 x double> [[VECINSERT71]], <8 x double> [[TMP5]], <8 x i32> +; AVX_FMA3-NEXT: ret <8 x double> [[VECINSERT8]] +; +; AVX512-LABEL: @buildvector_mul_subadd_pd512( +; AVX512-NEXT: [[A:%.*]] = fmul <8 x double> [[C:%.*]], [[D:%.*]] +; AVX512-NEXT: [[TMP0:%.*]] = shufflevector <8 x double> [[A]], <8 x double> poison, <4 x i32> +; AVX512-NEXT: [[TMP1:%.*]] = shufflevector <8 x double> [[B:%.*]], <8 x double> poison, <4 x i32> +; AVX512-NEXT: [[TMP2:%.*]] = fadd <4 x double> [[TMP0]], [[TMP1]] +; AVX512-NEXT: [[TMP3:%.*]] = fsub <4 x double> [[TMP0]], [[TMP1]] +; AVX512-NEXT: [[TMP4:%.*]] = shufflevector <4 x double> [[TMP2]], <4 x double> [[TMP3]], <4 x i32> +; AVX512-NEXT: [[TMP5:%.*]] = fsub <8 x double> [[A]], [[B]] +; AVX512-NEXT: [[TMP6:%.*]] = shufflevector <4 x double> [[TMP4]], <4 x double> , <8 x i32> +; AVX512-NEXT: [[TMP7:%.*]] = fadd <8 x double> [[A]], [[B]] +; AVX512-NEXT: [[TMP8:%.*]] = shufflevector <8 x double> [[TMP7]], <8 x double> poison, <8 x i32> +; AVX512-NEXT: [[VECINSERT71:%.*]] = shufflevector <8 x double> [[TMP6]], <8 x double> [[TMP8]], <8 x i32> +; AVX512-NEXT: [[VECINSERT8:%.*]] = shufflevector <8 x double> [[VECINSERT71]], <8 x double> [[TMP5]], <8 x i32> +; AVX512-NEXT: ret <8 x double> [[VECINSERT8]] +; + %A = fmul <8 x double> %C, %D + %A0 = extractelement <8 x double> %A, i32 0 + %B0 = extractelement <8 x double> %B, i32 0 + %sub0 = fadd double %A0, %B0 + %A2 = extractelement <8 x double> %A, i32 2 + %B2 = extractelement <8 x double> %B, i32 2 + %sub2 = fadd double %A2, %B2 + %A4 = extractelement <8 x double> %A, i32 4 + %B4 = extractelement <8 x double> %B, i32 4 + %sub4 = fadd double %A4, %B4 + %A6 = extractelement <8 x double> %A, i32 6 + %B6 = extractelement <8 x double> %B, i32 6 + %sub6 = fadd double %A6, %B6 + %A1 = extractelement <8 x double> %A, i32 1 + %B1 = extractelement <8 x double> %B, i32 1 + %add1 = fsub double %A1, %B1 + %A3 = extractelement <8 x double> %A, i32 3 + %B3 = extractelement <8 x double> %B, i32 3 + %add3 = fsub double %A3, %B3 + %A7 = extractelement <8 x double> %A, i32 7 + %B7 = extractelement <8 x double> %B, i32 7 + %add7 = fsub double %A7, %B7 + %vecinsert1 = insertelement <8 x double> undef, double %sub0, i32 0 + %vecinsert2 = insertelement <8 x double> %vecinsert1, double %add1, i32 1 + %vecinsert3 = insertelement <8 x double> %vecinsert2, double %sub2, i32 2 + %vecinsert4 = insertelement <8 x double> %vecinsert3, double %add3, i32 3 + %vecinsert5 = insertelement <8 x double> %vecinsert4, double %sub4, i32 4 + ; element 5 is undef + %vecinsert7 = insertelement <8 x double> %vecinsert5, double %sub6, i32 6 + %vecinsert8 = insertelement <8 x double> %vecinsert7, double %add7, i32 7 + ret <8 x double> %vecinsert8 +} + +attributes #0 = { nounwind "unsafe-fp-math"="true" } -- cgit v1.2.3 From 45ea46c44636094e9fcdbbeabfd11f9d0fad5e38 Mon Sep 17 00:00:00 2001 From: Garvit Gupta Date: Wed, 18 Jun 2025 12:50:48 +0530 Subject: Reland [Driver] Add support for GCC installation detection in Baremetal toolchain (#144640) This patch introduces enhancements to the Baremetal toolchain to support GCC toolchain detection. - If the --gcc-install-dir or --gcc-toolchain options are provided and point to valid paths, the sysroot is derived from those locations. - If not, the logic falls back to the existing sysroot inference mechanism already present in the Baremetal toolchain. - Support for adding include paths for the libstdc++ library has also been added. Additionally, the restriction to always use the integrated assembler has been removed. With a valid GCC installation, the GNU assembler can now be used as well. This patch currently updates and adds tests for the ARM target only. RISC-V-specific tests will be introduced in a later patch, once the RISCVToolChain is fully merged into the Baremetal toolchain. At this stage, there is no way to test the RISC-V target within this PR. RFC: https://discourse.llvm.org/t/merging-riscvtoolchain-and-baremetal-toolchains/75524 --- clang/docs/Toolchain.rst | 5 + clang/include/clang/Basic/DiagnosticDriverKinds.td | 3 + clang/lib/Driver/ToolChains/BareMetal.cpp | 235 +++++++++++++++------ clang/lib/Driver/ToolChains/BareMetal.h | 19 +- .../aarch64-none-elf/include/c++/8.2.1/.keep | 0 .../aarch64-none-elf/lib/.keep | 0 .../aarch64-none-elf/lib/crt0.o | 0 .../basic_aarch64_gcc_tree/bin/aarch64-none-elf-ld | 1 + .../lib/gcc/aarch64-none-elf/8.2.1/crtbegin.o | 0 .../lib/gcc/aarch64-none-elf/8.2.1/crtend.o | 0 .../aarch64-none-elf/lib/crt0.o | 0 .../aarch64-none-elf/lib/crtbegin.o | 0 .../aarch64-none-elf/lib/crtend.o | 0 .../bin/aarch64-none-elf-ld | 1 + .../armv6m-none-eabi/include/c++/8.2.1/.keep | 0 .../basic_arm_gcc_tree/armv6m-none-eabi/lib/.keep | 0 .../basic_arm_gcc_tree/armv6m-none-eabi/lib/crt0.o | 0 .../basic_arm_gcc_tree/bin/armv6m-none-eabi-ld | 1 + .../lib/gcc/armv6m-none-eabi/8.2.1/crtbegin.o | 0 .../lib/gcc/armv6m-none-eabi/8.2.1/crtend.o | 0 .../armv6m-none-eabi/lib/crt0.o | 0 .../armv6m-none-eabi/lib/crtbegin.o | 0 .../armv6m-none-eabi/lib/crtend.o | 0 .../basic_arm_nogcc_tree/bin/armv6m-none-eabi-ld | 1 + clang/test/Driver/aarch64-gnutools.c | 4 + clang/test/Driver/aarch64-toolchain-extra.c | 28 +++ clang/test/Driver/aarch64-toolchain.c | 61 ++++++ clang/test/Driver/arm-gnutools.c | 6 + clang/test/Driver/arm-toolchain-extra.c | 29 +++ clang/test/Driver/arm-toolchain.c | 62 ++++++ clang/test/Driver/baremetal.cpp | 16 ++ clang/test/Driver/check-no-multlib-warning.c | 10 + 32 files changed, 418 insertions(+), 64 deletions(-) create mode 100644 clang/test/Driver/Inputs/basic_aarch64_gcc_tree/aarch64-none-elf/include/c++/8.2.1/.keep create mode 100644 clang/test/Driver/Inputs/basic_aarch64_gcc_tree/aarch64-none-elf/lib/.keep create mode 100644 clang/test/Driver/Inputs/basic_aarch64_gcc_tree/aarch64-none-elf/lib/crt0.o create mode 100755 clang/test/Driver/Inputs/basic_aarch64_gcc_tree/bin/aarch64-none-elf-ld create mode 100644 clang/test/Driver/Inputs/basic_aarch64_gcc_tree/lib/gcc/aarch64-none-elf/8.2.1/crtbegin.o create mode 100644 clang/test/Driver/Inputs/basic_aarch64_gcc_tree/lib/gcc/aarch64-none-elf/8.2.1/crtend.o create mode 100644 clang/test/Driver/Inputs/basic_aarch64_nogcc_tree/aarch64-none-elf/lib/crt0.o create mode 100644 clang/test/Driver/Inputs/basic_aarch64_nogcc_tree/aarch64-none-elf/lib/crtbegin.o create mode 100644 clang/test/Driver/Inputs/basic_aarch64_nogcc_tree/aarch64-none-elf/lib/crtend.o create mode 100755 clang/test/Driver/Inputs/basic_aarch64_nogcc_tree/bin/aarch64-none-elf-ld create mode 100644 clang/test/Driver/Inputs/basic_arm_gcc_tree/armv6m-none-eabi/include/c++/8.2.1/.keep create mode 100644 clang/test/Driver/Inputs/basic_arm_gcc_tree/armv6m-none-eabi/lib/.keep create mode 100644 clang/test/Driver/Inputs/basic_arm_gcc_tree/armv6m-none-eabi/lib/crt0.o create mode 100755 clang/test/Driver/Inputs/basic_arm_gcc_tree/bin/armv6m-none-eabi-ld create mode 100644 clang/test/Driver/Inputs/basic_arm_gcc_tree/lib/gcc/armv6m-none-eabi/8.2.1/crtbegin.o create mode 100644 clang/test/Driver/Inputs/basic_arm_gcc_tree/lib/gcc/armv6m-none-eabi/8.2.1/crtend.o create mode 100644 clang/test/Driver/Inputs/basic_arm_nogcc_tree/armv6m-none-eabi/lib/crt0.o create mode 100644 clang/test/Driver/Inputs/basic_arm_nogcc_tree/armv6m-none-eabi/lib/crtbegin.o create mode 100644 clang/test/Driver/Inputs/basic_arm_nogcc_tree/armv6m-none-eabi/lib/crtend.o create mode 100755 clang/test/Driver/Inputs/basic_arm_nogcc_tree/bin/armv6m-none-eabi-ld create mode 100644 clang/test/Driver/aarch64-gnutools.c create mode 100644 clang/test/Driver/aarch64-toolchain-extra.c create mode 100644 clang/test/Driver/aarch64-toolchain.c create mode 100644 clang/test/Driver/arm-gnutools.c create mode 100644 clang/test/Driver/arm-toolchain-extra.c create mode 100644 clang/test/Driver/arm-toolchain.c create mode 100644 clang/test/Driver/check-no-multlib-warning.c diff --git a/clang/docs/Toolchain.rst b/clang/docs/Toolchain.rst index 958199eb7a2e..d56b21d74c7e 100644 --- a/clang/docs/Toolchain.rst +++ b/clang/docs/Toolchain.rst @@ -347,3 +347,8 @@ workarounds for issues discovered in libstdc++, and these are removed as fixed libstdc++ becomes sufficiently old. You can instruct Clang to use libstdc++ with the ``-stdlib=libstdc++`` flag. + +GCC Installation +================= +Users can point to their GCC installation by using the ``-gcc-toolchain`` or by +using ``-gcc-install-dir`` flag. diff --git a/clang/include/clang/Basic/DiagnosticDriverKinds.td b/clang/include/clang/Basic/DiagnosticDriverKinds.td index 29f6480ba935..94224e103875 100644 --- a/clang/include/clang/Basic/DiagnosticDriverKinds.td +++ b/clang/include/clang/Basic/DiagnosticDriverKinds.td @@ -847,6 +847,9 @@ def note_drv_available_multilibs : Note< "available multilibs are:%0">; def err_drv_multilib_custom_error : Error< "multilib configuration error: %0">; +def warn_drv_multilib_not_available_for_target: Warning< + "no multilib structure encoded for Arm, Aarch64 and PPC targets">, + InGroup>; def err_drv_experimental_crel : Error< "-Wa,--allow-experimental-crel must be specified to use -Wa,--crel. " diff --git a/clang/lib/Driver/ToolChains/BareMetal.cpp b/clang/lib/Driver/ToolChains/BareMetal.cpp index d8168ed15feb..0fbfe6c77f34 100644 --- a/clang/lib/Driver/ToolChains/BareMetal.cpp +++ b/clang/lib/Driver/ToolChains/BareMetal.cpp @@ -31,6 +31,40 @@ using namespace clang::driver; using namespace clang::driver::tools; using namespace clang::driver::toolchains; +/// Is the triple {aarch64.aarch64_be}-none-elf? +static bool isAArch64BareMetal(const llvm::Triple &Triple) { + if (Triple.getArch() != llvm::Triple::aarch64 && + Triple.getArch() != llvm::Triple::aarch64_be) + return false; + + if (Triple.getVendor() != llvm::Triple::UnknownVendor) + return false; + + if (Triple.getOS() != llvm::Triple::UnknownOS) + return false; + + return Triple.getEnvironmentName() == "elf"; +} + +static bool isRISCVBareMetal(const llvm::Triple &Triple) { + if (!Triple.isRISCV()) + return false; + + if (Triple.getVendor() != llvm::Triple::UnknownVendor) + return false; + + if (Triple.getOS() != llvm::Triple::UnknownOS) + return false; + + return Triple.getEnvironmentName() == "elf"; +} + +/// Is the triple powerpc[64][le]-*-none-eabi? +static bool isPPCBareMetal(const llvm::Triple &Triple) { + return Triple.isPPC() && Triple.getOS() == llvm::Triple::UnknownOS && + Triple.getEnvironment() == llvm::Triple::EABI; +} + static bool findRISCVMultilibs(const Driver &D, const llvm::Triple &TargetTriple, const ArgList &Args, DetectedMultilibs &Result) { @@ -95,7 +129,8 @@ static bool findRISCVMultilibs(const Driver &D, return false; } -static std::string computeBaseSysRoot(const Driver &D, bool IncludeTriple) { +static std::string computeClangRuntimesSysRoot(const Driver &D, + bool IncludeTriple) { if (!D.SysRoot.empty()) return D.SysRoot; @@ -108,56 +143,123 @@ static std::string computeBaseSysRoot(const Driver &D, bool IncludeTriple) { return std::string(SysRootDir); } -BareMetal::BareMetal(const Driver &D, const llvm::Triple &Triple, - const ArgList &Args) - : ToolChain(D, Triple, Args), - SysRoot(computeBaseSysRoot(D, /*IncludeTriple=*/true)) { - getProgramPaths().push_back(getDriver().Dir); - - findMultilibs(D, Triple, Args); - SmallString<128> SysRoot(computeSysRoot()); - if (!SysRoot.empty()) { - for (const Multilib &M : getOrderedMultilibs()) { - SmallString<128> Dir(SysRoot); - llvm::sys::path::append(Dir, M.osSuffix(), "lib"); - getFilePaths().push_back(std::string(Dir)); - getLibraryPaths().push_back(std::string(Dir)); - } +// Only consider the GCC toolchain based on the values provided through the +// `--gcc-toolchain` and `--gcc-install-dir` flags. The function below returns +// whether the GCC toolchain was initialized successfully. +bool BareMetal::initGCCInstallation(const llvm::Triple &Triple, + const llvm::opt::ArgList &Args) { + if (Args.getLastArg(options::OPT_gcc_toolchain) || + Args.getLastArg(clang::driver::options::OPT_gcc_install_dir_EQ)) { + GCCInstallation.init(Triple, Args); + return GCCInstallation.isValid(); } + return false; } -/// Is the triple {aarch64.aarch64_be}-none-elf? -static bool isAArch64BareMetal(const llvm::Triple &Triple) { - if (Triple.getArch() != llvm::Triple::aarch64 && - Triple.getArch() != llvm::Triple::aarch64_be) - return false; - - if (Triple.getVendor() != llvm::Triple::UnknownVendor) - return false; - - if (Triple.getOS() != llvm::Triple::UnknownOS) - return false; - - return Triple.getEnvironmentName() == "elf"; +// This logic is adapted from RISCVToolChain.cpp as part of the ongoing effort +// to merge RISCVToolChain into the Baremetal toolchain. It infers the presence +// of a valid GCC toolchain by checking whether the `crt0.o` file exists in the +// `bin/..//lib` directory. +static bool detectGCCToolchainAdjacent(const Driver &D) { + SmallString<128> GCCDir; + llvm::sys::path::append(GCCDir, D.Dir, "..", D.getTargetTriple(), + "lib/crt0.o"); + return llvm::sys::fs::exists(GCCDir); } -static bool isRISCVBareMetal(const llvm::Triple &Triple) { - if (!Triple.isRISCV()) - return false; +// If no sysroot is provided the driver will first attempt to infer it from the +// values of `--gcc-install-dir` or `--gcc-toolchain`, which specify the +// location of a GCC toolchain. +// If neither flag is used, the sysroot defaults to either: +//    - `bin/../` +//    - `bin/../lib/clang-runtimes/` +// +// To use the `clang-runtimes` path, ensure that `..//lib/crt0.o` +// does not exist relative to the driver. +std::string BareMetal::computeSysRoot() const { + // Use Baremetal::sysroot if it has already been set. + if (!SysRoot.empty()) + return SysRoot; + + // Use the sysroot specified via the `--sysroot` command-line flag, if + // provided. + const Driver &D = getDriver(); + if (!D.SysRoot.empty()) + return D.SysRoot; - if (Triple.getVendor() != llvm::Triple::UnknownVendor) - return false; + // Attempt to infer sysroot from a valid GCC installation. + // If no valid GCC installation, check for a GCC toolchain alongside Clang. + SmallString<128> inferredSysRoot; + if (IsGCCInstallationValid) { + llvm::sys::path::append(inferredSysRoot, GCCInstallation.getParentLibPath(), + "..", GCCInstallation.getTriple().str()); + } else if (detectGCCToolchainAdjacent(D)) { + // Use the triple as provided to the driver. Unlike the parsed triple + // this has not been normalized to always contain every field. + llvm::sys::path::append(inferredSysRoot, D.Dir, "..", D.getTargetTriple()); + } + // If a valid sysroot was inferred and exists, use it + if (!inferredSysRoot.empty() && llvm::sys::fs::exists(inferredSysRoot)) + return std::string(inferredSysRoot); - if (Triple.getOS() != llvm::Triple::UnknownOS) - return false; + // Use the clang-runtimes path. + return computeClangRuntimesSysRoot(D, /*IncludeTriple*/ true); +} - return Triple.getEnvironmentName() == "elf"; +static void addMultilibsFilePaths(const Driver &D, const MultilibSet &Multilibs, + const Multilib &Multilib, + StringRef InstallPath, + ToolChain::path_list &Paths) { + if (const auto &PathsCallback = Multilibs.filePathsCallback()) + for (const auto &Path : PathsCallback(Multilib)) + addPathIfExists(D, InstallPath + Path, Paths); } -/// Is the triple powerpc[64][le]-*-none-eabi? -static bool isPPCBareMetal(const llvm::Triple &Triple) { - return Triple.isPPC() && Triple.getOS() == llvm::Triple::UnknownOS && - Triple.getEnvironment() == llvm::Triple::EABI; +// GCC mutltilibs will only work for those targets that have their multlib +// structure encoded into GCCInstallation. Baremetal toolchain supports ARM, +// AArch64, RISCV and PPC and of these only RISCV have GCC multilibs hardcoded +// in GCCInstallation. +BareMetal::BareMetal(const Driver &D, const llvm::Triple &Triple, + const ArgList &Args) + : Generic_ELF(D, Triple, Args) { + IsGCCInstallationValid = initGCCInstallation(Triple, Args); + std::string ComputedSysRoot = computeSysRoot(); + if (IsGCCInstallationValid) { + if (!isRISCVBareMetal(Triple)) + D.Diag(clang::diag::warn_drv_multilib_not_available_for_target); + + Multilibs = GCCInstallation.getMultilibs(); + SelectedMultilibs.assign({GCCInstallation.getMultilib()}); + + path_list &Paths = getFilePaths(); + // Add toolchain/multilib specific file paths. + addMultilibsFilePaths(D, Multilibs, SelectedMultilibs.back(), + GCCInstallation.getInstallPath(), Paths); + // Adding filepath for locating crt{begin,end}.o files. + Paths.push_back(GCCInstallation.getInstallPath().str()); + // Adding filepath for locating crt0.o file. + Paths.push_back(ComputedSysRoot + "/lib"); + + ToolChain::path_list &PPaths = getProgramPaths(); + // Multilib cross-compiler GCC installations put ld in a triple-prefixed + // directory off of the parent of the GCC installation. + PPaths.push_back(Twine(GCCInstallation.getParentLibPath() + "/../" + + GCCInstallation.getTriple().str() + "/bin") + .str()); + PPaths.push_back((GCCInstallation.getParentLibPath() + "/../bin").str()); + } else { + getProgramPaths().push_back(getDriver().Dir); + findMultilibs(D, Triple, Args); + const SmallString<128> SysRootDir(computeSysRoot()); + if (!SysRootDir.empty()) { + for (const Multilib &M : getOrderedMultilibs()) { + SmallString<128> Dir(SysRootDir); + llvm::sys::path::append(Dir, M.osSuffix(), "lib"); + getFilePaths().push_back(std::string(Dir)); + getLibraryPaths().push_back(std::string(Dir)); + } + } + } } static void @@ -216,7 +318,7 @@ getMultilibConfigPath(const Driver &D, const llvm::Triple &Triple, return {}; } } else { - MultilibPath = computeBaseSysRoot(D, /*IncludeTriple=*/false); + MultilibPath = computeClangRuntimesSysRoot(D, /*IncludeTriple=*/false); llvm::sys::path::append(MultilibPath, MultilibFilename); } return MultilibPath; @@ -234,7 +336,7 @@ void BareMetal::findMultilibs(const Driver &D, const llvm::Triple &Triple, if (D.getVFS().exists(*MultilibPath)) { // If multilib.yaml is found, update sysroot so it doesn't use a target // specific suffix - SysRoot = computeBaseSysRoot(D, /*IncludeTriple=*/false); + SysRoot = computeClangRuntimesSysRoot(D, /*IncludeTriple=*/false); SmallVector CustomFlagMacroDefines; findMultilibsFromYAML(*this, D, *MultilibPath, Args, Result, CustomFlagMacroDefines); @@ -242,7 +344,7 @@ void BareMetal::findMultilibs(const Driver &D, const llvm::Triple &Triple, Multilibs = Result.Multilibs; MultilibMacroDefines.append(CustomFlagMacroDefines.begin(), CustomFlagMacroDefines.end()); - } else if (isRISCVBareMetal(Triple)) { + } else if (isRISCVBareMetal(Triple) && !detectGCCToolchainAdjacent(D)) { if (findRISCVMultilibs(D, Triple, Args, Result)) { SelectedMultilibs = Result.SelectedMultilibs; Multilibs = Result.Multilibs; @@ -263,8 +365,6 @@ Tool *BareMetal::buildStaticLibTool() const { return new tools::baremetal::StaticLibTool(*this); } -std::string BareMetal::computeSysRoot() const { return SysRoot; } - BareMetal::OrderedMultilibs BareMetal::getOrderedMultilibs() const { // Get multilibs in reverse order because they're ordered most-specific last. if (!SelectedMultilibs.empty()) @@ -292,10 +392,10 @@ void BareMetal::AddClangSystemIncludeArgs(const ArgList &DriverArgs, if (std::optional Path = getStdlibIncludePath()) addSystemInclude(DriverArgs, CC1Args, *Path); - const SmallString<128> SysRoot(computeSysRoot()); - if (!SysRoot.empty()) { + const SmallString<128> SysRootDir(computeSysRoot()); + if (!SysRootDir.empty()) { for (const Multilib &M : getOrderedMultilibs()) { - SmallString<128> Dir(SysRoot); + SmallString<128> Dir(SysRootDir); llvm::sys::path::append(Dir, M.includeSuffix()); llvm::sys::path::append(Dir, "include"); addSystemInclude(DriverArgs, CC1Args, Dir.str()); @@ -309,6 +409,19 @@ void BareMetal::addClangTargetOptions(const ArgList &DriverArgs, CC1Args.push_back("-nostdsysteminc"); } +void BareMetal::addLibStdCxxIncludePaths( + const llvm::opt::ArgList &DriverArgs, + llvm::opt::ArgStringList &CC1Args) const { + if (!IsGCCInstallationValid) + return; + const GCCVersion &Version = GCCInstallation.getVersion(); + StringRef TripleStr = GCCInstallation.getTriple().str(); + const Multilib &Multilib = GCCInstallation.getMultilib(); + addLibStdCXXIncludePaths(computeSysRoot() + "/include/c++/" + Version.Text, + TripleStr, Multilib.includeSuffix(), DriverArgs, + CC1Args); +} + void BareMetal::AddClangCXXStdlibIncludeArgs(const ArgList &DriverArgs, ArgStringList &CC1Args) const { if (DriverArgs.hasArg(options::OPT_nostdinc, options::OPT_nostdlibinc, @@ -339,23 +452,23 @@ void BareMetal::AddClangCXXStdlibIncludeArgs(const ArgList &DriverArgs, }; switch (GetCXXStdlibType(DriverArgs)) { - case ToolChain::CST_Libcxx: { - SmallString<128> P(D.Dir); - llvm::sys::path::append(P, "..", "include"); - AddCXXIncludePath(P); - break; - } - case ToolChain::CST_Libstdcxx: - // We only support libc++ toolchain installation. - break; + case ToolChain::CST_Libcxx: { + SmallString<128> P(D.Dir); + llvm::sys::path::append(P, "..", "include"); + AddCXXIncludePath(P); + break; + } + case ToolChain::CST_Libstdcxx: + addLibStdCxxIncludePaths(DriverArgs, CC1Args); + break; } - std::string SysRoot(computeSysRoot()); - if (SysRoot.empty()) + std::string SysRootDir(computeSysRoot()); + if (SysRootDir.empty()) return; for (const Multilib &M : getOrderedMultilibs()) { - SmallString<128> Dir(SysRoot); + SmallString<128> Dir(SysRootDir); llvm::sys::path::append(Dir, M.gccSuffix()); switch (GetCXXStdlibType(DriverArgs)) { case ToolChain::CST_Libcxx: { diff --git a/clang/lib/Driver/ToolChains/BareMetal.h b/clang/lib/Driver/ToolChains/BareMetal.h index f6295bda0a6a..930f8584e643 100644 --- a/clang/lib/Driver/ToolChains/BareMetal.h +++ b/clang/lib/Driver/ToolChains/BareMetal.h @@ -9,6 +9,7 @@ #ifndef LLVM_CLANG_LIB_DRIVER_TOOLCHAINS_BAREMETAL_H #define LLVM_CLANG_LIB_DRIVER_TOOLCHAINS_BAREMETAL_H +#include "ToolChains/Gnu.h" #include "clang/Driver/Tool.h" #include "clang/Driver/ToolChain.h" @@ -19,7 +20,7 @@ namespace driver { namespace toolchains { -class LLVM_LIBRARY_VISIBILITY BareMetal : public ToolChain { +class LLVM_LIBRARY_VISIBILITY BareMetal : public Generic_ELF { public: BareMetal(const Driver &D, const llvm::Triple &Triple, const llvm::opt::ArgList &Args); @@ -35,7 +36,8 @@ protected: Tool *buildStaticLibTool() const override; public: - bool useIntegratedAs() const override { return true; } + bool initGCCInstallation(const llvm::Triple &Triple, + const llvm::opt::ArgList &Args); bool isBareMetal() const override { return true; } bool isCrossCompiling() const override { return true; } bool HasNativeLLVMSupport() const override { return true; } @@ -48,9 +50,15 @@ public: StringRef getOSLibName() const override { return "baremetal"; } + UnwindTableLevel + getDefaultUnwindTableLevel(const llvm::opt::ArgList &Args) const override { + return UnwindTableLevel::None; + } + RuntimeLibType GetDefaultRuntimeLibType() const override { return ToolChain::RLT_CompilerRT; } + CXXStdlibType GetDefaultCXXStdlibType() const override { return ToolChain::CST_Libcxx; } @@ -67,6 +75,9 @@ public: void AddClangCXXStdlibIncludeArgs( const llvm::opt::ArgList &DriverArgs, llvm::opt::ArgStringList &CC1Args) const override; + void + addLibStdCxxIncludePaths(const llvm::opt::ArgList &DriverArgs, + llvm::opt::ArgStringList &CC1Args) const override; std::string computeSysRoot() const override; SanitizerMask getSupportedSanitizers() const override; @@ -80,6 +91,8 @@ private: std::string SysRoot; + bool IsGCCInstallationValid; + SmallVector MultilibMacroDefines; }; @@ -104,7 +117,7 @@ public: class LLVM_LIBRARY_VISIBILITY Linker final : public Tool { public: - Linker(const ToolChain &TC) : Tool("baremetal::Linker", "ld.lld", TC) {} + Linker(const ToolChain &TC) : Tool("baremetal::Linker", "linker", TC) {} bool isLinkJob() const override { return true; } bool hasIntegratedCPP() const override { return false; } void ConstructJob(Compilation &C, const JobAction &JA, diff --git a/clang/test/Driver/Inputs/basic_aarch64_gcc_tree/aarch64-none-elf/include/c++/8.2.1/.keep b/clang/test/Driver/Inputs/basic_aarch64_gcc_tree/aarch64-none-elf/include/c++/8.2.1/.keep new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/clang/test/Driver/Inputs/basic_aarch64_gcc_tree/aarch64-none-elf/lib/.keep b/clang/test/Driver/Inputs/basic_aarch64_gcc_tree/aarch64-none-elf/lib/.keep new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/clang/test/Driver/Inputs/basic_aarch64_gcc_tree/aarch64-none-elf/lib/crt0.o b/clang/test/Driver/Inputs/basic_aarch64_gcc_tree/aarch64-none-elf/lib/crt0.o new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/clang/test/Driver/Inputs/basic_aarch64_gcc_tree/bin/aarch64-none-elf-ld b/clang/test/Driver/Inputs/basic_aarch64_gcc_tree/bin/aarch64-none-elf-ld new file mode 100755 index 000000000000..b23e55619b2f --- /dev/null +++ b/clang/test/Driver/Inputs/basic_aarch64_gcc_tree/bin/aarch64-none-elf-ld @@ -0,0 +1 @@ +#!/bin/true diff --git a/clang/test/Driver/Inputs/basic_aarch64_gcc_tree/lib/gcc/aarch64-none-elf/8.2.1/crtbegin.o b/clang/test/Driver/Inputs/basic_aarch64_gcc_tree/lib/gcc/aarch64-none-elf/8.2.1/crtbegin.o new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/clang/test/Driver/Inputs/basic_aarch64_gcc_tree/lib/gcc/aarch64-none-elf/8.2.1/crtend.o b/clang/test/Driver/Inputs/basic_aarch64_gcc_tree/lib/gcc/aarch64-none-elf/8.2.1/crtend.o new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/clang/test/Driver/Inputs/basic_aarch64_nogcc_tree/aarch64-none-elf/lib/crt0.o b/clang/test/Driver/Inputs/basic_aarch64_nogcc_tree/aarch64-none-elf/lib/crt0.o new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/clang/test/Driver/Inputs/basic_aarch64_nogcc_tree/aarch64-none-elf/lib/crtbegin.o b/clang/test/Driver/Inputs/basic_aarch64_nogcc_tree/aarch64-none-elf/lib/crtbegin.o new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/clang/test/Driver/Inputs/basic_aarch64_nogcc_tree/aarch64-none-elf/lib/crtend.o b/clang/test/Driver/Inputs/basic_aarch64_nogcc_tree/aarch64-none-elf/lib/crtend.o new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/clang/test/Driver/Inputs/basic_aarch64_nogcc_tree/bin/aarch64-none-elf-ld b/clang/test/Driver/Inputs/basic_aarch64_nogcc_tree/bin/aarch64-none-elf-ld new file mode 100755 index 000000000000..b23e55619b2f --- /dev/null +++ b/clang/test/Driver/Inputs/basic_aarch64_nogcc_tree/bin/aarch64-none-elf-ld @@ -0,0 +1 @@ +#!/bin/true diff --git a/clang/test/Driver/Inputs/basic_arm_gcc_tree/armv6m-none-eabi/include/c++/8.2.1/.keep b/clang/test/Driver/Inputs/basic_arm_gcc_tree/armv6m-none-eabi/include/c++/8.2.1/.keep new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/clang/test/Driver/Inputs/basic_arm_gcc_tree/armv6m-none-eabi/lib/.keep b/clang/test/Driver/Inputs/basic_arm_gcc_tree/armv6m-none-eabi/lib/.keep new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/clang/test/Driver/Inputs/basic_arm_gcc_tree/armv6m-none-eabi/lib/crt0.o b/clang/test/Driver/Inputs/basic_arm_gcc_tree/armv6m-none-eabi/lib/crt0.o new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/clang/test/Driver/Inputs/basic_arm_gcc_tree/bin/armv6m-none-eabi-ld b/clang/test/Driver/Inputs/basic_arm_gcc_tree/bin/armv6m-none-eabi-ld new file mode 100755 index 000000000000..b23e55619b2f --- /dev/null +++ b/clang/test/Driver/Inputs/basic_arm_gcc_tree/bin/armv6m-none-eabi-ld @@ -0,0 +1 @@ +#!/bin/true diff --git a/clang/test/Driver/Inputs/basic_arm_gcc_tree/lib/gcc/armv6m-none-eabi/8.2.1/crtbegin.o b/clang/test/Driver/Inputs/basic_arm_gcc_tree/lib/gcc/armv6m-none-eabi/8.2.1/crtbegin.o new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/clang/test/Driver/Inputs/basic_arm_gcc_tree/lib/gcc/armv6m-none-eabi/8.2.1/crtend.o b/clang/test/Driver/Inputs/basic_arm_gcc_tree/lib/gcc/armv6m-none-eabi/8.2.1/crtend.o new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/clang/test/Driver/Inputs/basic_arm_nogcc_tree/armv6m-none-eabi/lib/crt0.o b/clang/test/Driver/Inputs/basic_arm_nogcc_tree/armv6m-none-eabi/lib/crt0.o new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/clang/test/Driver/Inputs/basic_arm_nogcc_tree/armv6m-none-eabi/lib/crtbegin.o b/clang/test/Driver/Inputs/basic_arm_nogcc_tree/armv6m-none-eabi/lib/crtbegin.o new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/clang/test/Driver/Inputs/basic_arm_nogcc_tree/armv6m-none-eabi/lib/crtend.o b/clang/test/Driver/Inputs/basic_arm_nogcc_tree/armv6m-none-eabi/lib/crtend.o new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/clang/test/Driver/Inputs/basic_arm_nogcc_tree/bin/armv6m-none-eabi-ld b/clang/test/Driver/Inputs/basic_arm_nogcc_tree/bin/armv6m-none-eabi-ld new file mode 100755 index 000000000000..b23e55619b2f --- /dev/null +++ b/clang/test/Driver/Inputs/basic_arm_nogcc_tree/bin/armv6m-none-eabi-ld @@ -0,0 +1 @@ +#!/bin/true diff --git a/clang/test/Driver/aarch64-gnutools.c b/clang/test/Driver/aarch64-gnutools.c new file mode 100644 index 000000000000..0214639ed380 --- /dev/null +++ b/clang/test/Driver/aarch64-gnutools.c @@ -0,0 +1,4 @@ +// RUN: %clang --target=aarch64-none-elf --gcc-toolchain=%S/Inputs/basic_aarch64_gcc_tree -fno-integrated-as %s -### -c \ +// RUN: 2>&1 | FileCheck %s + +// CHECK: "{{.*}}as{{(.exe)?}}" diff --git a/clang/test/Driver/aarch64-toolchain-extra.c b/clang/test/Driver/aarch64-toolchain-extra.c new file mode 100644 index 000000000000..2610e962bd69 --- /dev/null +++ b/clang/test/Driver/aarch64-toolchain-extra.c @@ -0,0 +1,28 @@ +// A basic clang -cc1 command-line, and simple environment check. + +// The tests here are similar to those in aarch64-toolchain.c, however +// these tests need to create symlinks to test directory trees in order to +// set up the environment and therefore shell support is required. +// REQUIRES: shell +// UNSUPPORTED: system-windows + +// If there is no GCC install detected then the driver searches for executables +// and runtime starting from the directory tree above the driver itself. +// The test below checks that the driver correctly finds the linker and +// runtime if and only if they exist. +// +// RUN: rm -rf %t +// RUN: mkdir -p %t/aarch64-nogcc/bin +// RUN: ln -s %clang %t/aarch64-nogcc/bin/clang +// RUN: ln -s %S/Inputs/basic_aarch64_nogcc_tree/aarch64-none-elf %t/aarch64-nogcc/aarch64-none-elf +// RUN: %t/aarch64-nogcc/bin/clang %s -### -no-canonical-prefixes \ +// RUN: --gcc-toolchain=%t/aarch64-nogcc/invalid \ +// RUN: --target=aarch64-none-elf --rtlib=libgcc -fuse-ld=ld 2>&1 \ +// RUN: | FileCheck -check-prefix=C-ARM-BAREMETAL-NOGCC %s + +// RUN: %t/aarch64-nogcc/bin/clang %s -### -no-canonical-prefixes \ +// RUN: --sysroot=%t/aarch64-nogcc/bin/../aarch64-none-elf \ +// RUN: --target=aarch64-none-elf --rtlib=libgcc -fuse-ld=ld 2>&1 \ +// RUN: | FileCheck -check-prefix=C-ARM-BAREMETAL-NOGCC %s + +// C-ARM-BAREMETAL-NOGCC: "-internal-isystem" "{{.*}}/aarch64-nogcc/bin/../aarch64-none-elf/include" diff --git a/clang/test/Driver/aarch64-toolchain.c b/clang/test/Driver/aarch64-toolchain.c new file mode 100644 index 000000000000..7f2c01d928e4 --- /dev/null +++ b/clang/test/Driver/aarch64-toolchain.c @@ -0,0 +1,61 @@ +// UNSUPPORTED: system-windows + +// RUN: %clang -### %s -fuse-ld= \ +// RUN: --target=aarch64-none-elf --rtlib=libgcc \ +// RUN: --gcc-toolchain=%S/Inputs/basic_aarch64_gcc_tree \ +// RUN: --sysroot=%S/Inputs/basic_aarch64_gcc_tree/aarch64-none-elf 2>&1 \ +// RUN: | FileCheck -check-prefix=C-AARCH64-BAREMETAL %s + +// C-AARCH64-BAREMETAL: "-cc1" "-triple" "aarch64-unknown-none-elf" +// C-AARCH64-BAREMETAL: "-isysroot" "{{.*}}Inputs/basic_aarch64_gcc_tree/aarch64-none-elf" +// C-AARCH64-BAREMETAL: "-internal-isystem" "{{.*}}Inputs/basic_aarch64_gcc_tree/aarch64-none-elf/include" + +// RUN: %clang -### %s -fuse-ld= \ +// RUN: --target=aarch64-none-elf --rtlib=libgcc \ +// RUN: --gcc-toolchain=%S/Inputs/basic_aarch64_gcc_tree \ +// RUN: --sysroot= 2>&1 \ +// RUN: | FileCheck -check-prefix=C-AARCH64-BAREMETAL-NOSYSROOT %s + +// C-AARCH64-BAREMETAL-NOSYSROOT: "-cc1" "-triple" "aarch64-unknown-none-elf" +// C-AARCH64-BAREMETAL-NOSYSROOT: "-internal-isystem" "{{.*}}/Inputs/basic_aarch64_gcc_tree/lib/gcc/aarch64-none-elf/8.2.1/../../../../aarch64-none-elf/include" + +// RUN: %clangxx -### %s -fuse-ld= \ +// RUN: --target=aarch64-none-elf -stdlib=libstdc++ --rtlib=libgcc \ +// RUN: --gcc-toolchain=%S/Inputs/basic_aarch64_gcc_tree \ +// RUN: --sysroot=%S/Inputs/basic_aarch64_gcc_tree/aarch64-none-elf 2>&1 \ +// RUN: | FileCheck -check-prefix=CXX-AARCH64-BAREMETAL %s + +// CXX-AARCH64-BAREMETAL: "-internal-isystem" "{{.*}}/Inputs/basic_aarch64_gcc_tree/aarch64-none-elf/include/c++/8.2.1/aarch64-none-elf" +// CXX-AARCH64-BAREMETAL: "-internal-isystem" "{{.*}}/Inputs/basic_aarch64_gcc_tree/aarch64-none-elf/include/c++/8.2.1/backward" +// CXX-AARCH64-BAREMETAL: "-internal-isystem" "{{.*}}/Inputs/basic_aarch64_gcc_tree/aarch64-none-elf/include/c++/8.2.1" +// CXX-AARCH64-BAREMETAL: "-internal-isystem" "{{.*}}/Inputs/basic_aarch64_gcc_tree/aarch64-none-elf/include" + +// RUN: %clangxx -### %s -fuse-ld= \ +// RUN: --target=aarch64-none-elf -stdlib=libstdc++ --rtlib=libgcc \ +// RUN: --gcc-toolchain=%S/Inputs/basic_aarch64_gcc_tree \ +// RUN: --sysroot= 2>&1 \ +// RUN: | FileCheck -check-prefix=CXX-AARCH64-BAREMETAL-NOSYSROOT %s + +// CXX-AARCH64-BAREMETAL-NOSYSROOT: "-internal-isystem" "{{.*}}/Inputs/basic_aarch64_gcc_tree/lib/gcc/aarch64-none-elf/8.2.1/../../../../aarch64-none-elf/include/c++/8.2.1/aarch64-none-elf" +// CXX-AARCH64-BAREMETAL-NOSYSROOT: "-internal-isystem" "{{.*}}/Inputs/basic_aarch64_gcc_tree/lib/gcc/aarch64-none-elf/8.2.1/../../../../aarch64-none-elf/include/c++/8.2.1/backward" +// CXX-AARCH64-BAREMETAL-NOSYSROOT: "-internal-isystem" "{{.*}}/Inputs/basic_aarch64_gcc_tree/lib/gcc/aarch64-none-elf/8.2.1/../../../../aarch64-none-elf/include/c++/8.2.1" +// CXX-AARCH64-BAREMETAL-NOSYSROOT: "-internal-isystem" "{{.*}}/Inputs/basic_aarch64_gcc_tree/lib/gcc/aarch64-none-elf/8.2.1/../../../../aarch64-none-elf/include" + +// RUN: %clangxx -### %s -fuse-ld= \ +// RUN: --target=aarch64-none-elf -stdlib=libc++ --rtlib=libgcc \ +// RUN: --gcc-toolchain=%S/Inputs/basic_aarch64_gcc_tree \ +// RUN: --sysroot=%S/Inputs/basic_aarch64_gcc_tree/aarch64-none-elf 2>&1 \ +// RUN: | FileCheck -check-prefix=CXX-AARCH64-BAREMETAL-LIBCXX %s + +// CXX-AARCH64-BAREMETAL-LIBCXX: "-isysroot" "{{.*}}Inputs/basic_aarch64_gcc_tree/aarch64-none-elf" +// CXX-AARCH64-BAREMETAL-LIBCXX: "-internal-isystem" "{{.*}}/Inputs/basic_aarch64_gcc_tree/aarch64-none-elf/include/c++/v1" +// CXX-AARCH64-BAREMETAL-LIBCXX: "-internal-isystem" "{{.*}}/Inputs/basic_aarch64_gcc_tree/aarch64-none-elf/include" + +// RUN: %clangxx -### %s -fuse-ld= \ +// RUN: --target=aarch64-none-elf -stdlib=libc++ --rtlib=libgcc \ +// RUN: --gcc-toolchain=%S/Inputs/basic_aarch64_gcc_tree \ +// RUN: --sysroot= 2>&1 \ +// RUN: | FileCheck -check-prefix=CXX-AARCH64-BAREMETAL-NOSYSROOT-LIBCXX %s + +// CXX-AARCH64-BAREMETAL-NOSYSROOT-LIBCXX: "-internal-isystem" "{{.*}}/Inputs/basic_aarch64_gcc_tree/lib/gcc/aarch64-none-elf/8.2.1/../../../../aarch64-none-elf/include/c++/v1" +// CXX-AARCH64-BAREMETAL-NOSYSROOT-LIBCXX: "-internal-isystem" "{{.*}}/Inputs/basic_aarch64_gcc_tree/lib/gcc/aarch64-none-elf/8.2.1/../../../../aarch64-none-elf/include" diff --git a/clang/test/Driver/arm-gnutools.c b/clang/test/Driver/arm-gnutools.c new file mode 100644 index 000000000000..6e107f19dabc --- /dev/null +++ b/clang/test/Driver/arm-gnutools.c @@ -0,0 +1,6 @@ +// check that gnu assembler is invoked with arm baremetal as well + +// RUN: %clang --target=armv6m-none-eabi --gcc-toolchain=%S/Inputs/basic_arm_gcc_tree -fno-integrated-as %s -### -c \ +// RUN: 2>&1 | FileCheck %s + +// CHECK: "{{.*}}as{{(.exe)?}}" diff --git a/clang/test/Driver/arm-toolchain-extra.c b/clang/test/Driver/arm-toolchain-extra.c new file mode 100644 index 000000000000..114de0a8154a --- /dev/null +++ b/clang/test/Driver/arm-toolchain-extra.c @@ -0,0 +1,29 @@ +// A basic clang -cc1 command-line, and simple environment check. + +// The tests here are similar to those in arm-toolchain.c, however +// these tests need to create symlinks to test directory trees in order to +// set up the environment and therefore shell support is required. +// REQUIRES: shell +// UNSUPPORTED: system-windows + +// If there is no GCC install detected then the driver searches for executables +// and runtime starting from the directory tree above the driver itself. +// The test below checks that the driver correctly finds the linker and +// runtime if and only if they exist. +// +// RUN: rm -rf %t +// RUN: mkdir -p %t/arm-nogcc/bin +// RUN: ln -s %clang %t/arm-nogcc/bin/clang +// RUN: ln -s %S/Inputs/basic_arm_nogcc_tree/armv6m-none-eabi %t/arm-nogcc/armv6m-none-eabi +// RUN: %t/arm-nogcc/bin/clang %s -### -no-canonical-prefixes \ +// RUN: --gcc-toolchain=%t/arm-nogcc/invalid \ +// RUN: --target=armv6m-none-eabi --rtlib=libgcc -fuse-ld=ld 2>&1 \ +// RUN: | FileCheck -check-prefix=C-ARM-BAREMETAL-NOGCC %s + +// RUN: %t/arm-nogcc/bin/clang %s -### -no-canonical-prefixes \ +// RUN: --sysroot=%t/arm-nogcc/bin/../armv6m-none-eabi \ +// RUN: --target=armv6m-none-eabi --rtlib=libgcc -fuse-ld=ld 2>&1 \ +// RUN: | FileCheck -check-prefix=C-ARM-BAREMETAL-NOGCC %s + +// C-ARM-BAREMETAL-NOGCC: "-internal-isystem" "{{.*}}/arm-nogcc/bin/../armv6m-none-eabi/include" + diff --git a/clang/test/Driver/arm-toolchain.c b/clang/test/Driver/arm-toolchain.c new file mode 100644 index 000000000000..2e38461fb7a3 --- /dev/null +++ b/clang/test/Driver/arm-toolchain.c @@ -0,0 +1,62 @@ +// UNSUPPORTED: system-windows + +// RUN: %clang -### %s -fuse-ld= \ +// RUN: --target=armv6m-none-eabi --rtlib=libgcc \ +// RUN: --gcc-toolchain=%S/Inputs/basic_arm_gcc_tree \ +// RUN: --sysroot=%S/Inputs/basic_arm_gcc_tree/armv6m-none-eabi 2>&1 \ +// RUN: | FileCheck -check-prefix=C-ARM-BAREMETAL %s + +// C-ARM-BAREMETAL: "-cc1" "-triple" "thumbv6m-unknown-none-eabi" +// C-ARM-BAREMETAL: "-isysroot" "{{.*}}Inputs/basic_arm_gcc_tree/armv6m-none-eabi" +// C-ARM-BAREMETAL: "-internal-isystem" "{{.*}}Inputs/basic_arm_gcc_tree/armv6m-none-eabi/include" + +// RUN: %clang -### %s -fuse-ld= \ +// RUN: --target=armv6m-none-eabi --rtlib=libgcc \ +// RUN: --gcc-toolchain=%S/Inputs/basic_arm_gcc_tree \ +// RUN: --sysroot= 2>&1 \ +// RUN: | FileCheck -check-prefix=C-ARM-BAREMETAL-NOSYSROOT %s + +// C-ARM-BAREMETAL-NOSYSROOT: "-cc1" "-triple" "thumbv6m-unknown-none-eabi" +// C-ARM-BAREMETAL-NOSYSROOT: "-internal-isystem" "{{.*}}/Inputs/basic_arm_gcc_tree/lib/gcc/armv6m-none-eabi/8.2.1/../../../../armv6m-none-eabi/include" + +// RUN: %clangxx -### %s -fuse-ld= \ +// RUN: --target=armv6m-none-eabi -stdlib=libstdc++ --rtlib=libgcc \ +// RUN: --gcc-toolchain=%S/Inputs/basic_arm_gcc_tree \ +// RUN: --sysroot=%S/Inputs/basic_arm_gcc_tree/armv6m-none-eabi 2>&1 \ +// RUN: | FileCheck -check-prefix=CXX-ARM-BAREMETAL %s + +// CXX-ARM-BAREMETAL: "-isysroot" "{{.*}}Inputs/basic_arm_gcc_tree/armv6m-none-eabi" +// CXX-ARM-BAREMETAL: "-internal-isystem" "{{.*}}/Inputs/basic_arm_gcc_tree/armv6m-none-eabi/include/c++/8.2.1/armv6m-none-eabi" +// CXX-ARM-BAREMETAL: "-internal-isystem" "{{.*}}/Inputs/basic_arm_gcc_tree/armv6m-none-eabi/include/c++/8.2.1/backward" +// CXX-ARM-BAREMETAL: "-internal-isystem" "{{.*}}/Inputs/basic_arm_gcc_tree/armv6m-none-eabi/include/c++/8.2.1" +// CXX-ARM-BAREMETAL: "-internal-isystem" "{{.*}}/Inputs/basic_arm_gcc_tree/armv6m-none-eabi/include" + +// RUN: %clangxx -### %s -fuse-ld= \ +// RUN: --target=armv6m-none-eabi -stdlib=libstdc++ --rtlib=libgcc \ +// RUN: --gcc-toolchain=%S/Inputs/basic_arm_gcc_tree \ +// RUN: --sysroot= 2>&1 \ +// RUN: | FileCheck -check-prefix=CXX-ARM-BAREMETAL-NOSYSROOT %s + +// CXX-ARM-BAREMETAL-NOSYSROOT: "-internal-isystem" "{{.*}}/Inputs/basic_arm_gcc_tree/lib/gcc/armv6m-none-eabi/8.2.1/../../../../armv6m-none-eabi/include/c++/8.2.1/armv6m-none-eabi" +// CXX-ARM-BAREMETAL-NOSYSROOT: "-internal-isystem" "{{.*}}/Inputs/basic_arm_gcc_tree/lib/gcc/armv6m-none-eabi/8.2.1/../../../../armv6m-none-eabi/include/c++/8.2.1/backward" +// CXX-ARM-BAREMETAL-NOSYSROOT: "-internal-isystem" "{{.*}}/Inputs/basic_arm_gcc_tree/lib/gcc/armv6m-none-eabi/8.2.1/../../../../armv6m-none-eabi/include/c++/8.2.1" +// CXX-ARM-BAREMETAL-NOSYSROOT: "-internal-isystem" "{{.*}}/Inputs/basic_arm_gcc_tree/lib/gcc/armv6m-none-eabi/8.2.1/../../../../armv6m-none-eabi/include" + +// RUN: %clangxx -### %s -fuse-ld= \ +// RUN: --target=armv6m-none-eabi -stdlib=libc++ --rtlib=libgcc \ +// RUN: --gcc-toolchain=%S/Inputs/basic_arm_gcc_tree \ +// RUN: --sysroot=%S/Inputs/basic_arm_gcc_tree/armv6m-none-eabi 2>&1 \ +// RUN: | FileCheck -check-prefix=CXX-ARM-BAREMETAL-LIBCXX %s + +// CXX-ARM-BAREMETAL-LIBCXX: "-isysroot" "{{.*}}Inputs/basic_arm_gcc_tree/armv6m-none-eabi" +// CXX-ARM-BAREMETAL-LIBCXX: "-internal-isystem" "{{.*}}/Inputs/basic_arm_gcc_tree/armv6m-none-eabi/include/c++/v1" +// CXX-ARM-BAREMETAL-LIBCXX: "-internal-isystem" "{{.*}}/Inputs/basic_arm_gcc_tree/armv6m-none-eabi/include" + +// RUN: %clangxx -### %s -fuse-ld= \ +// RUN: --target=armv6m-none-eabi -stdlib=libc++ --rtlib=libgcc \ +// RUN: --gcc-toolchain=%S/Inputs/basic_arm_gcc_tree \ +// RUN: --sysroot= 2>&1 \ +// RUN: | FileCheck -check-prefix=CXX-ARM-BAREMETAL-NOSYSROOT-LIBCXX %s + +// CXX-ARM-BAREMETAL-NOSYSROOT-LIBCXX: "-internal-isystem" "{{.*}}/Inputs/basic_arm_gcc_tree/lib/gcc/armv6m-none-eabi/8.2.1/../../../../armv6m-none-eabi/include/c++/v1" +// CXX-ARM-BAREMETAL-NOSYSROOT-LIBCXX: "-internal-isystem" "{{.*}}/Inputs/basic_arm_gcc_tree/lib/gcc/armv6m-none-eabi/8.2.1/../../../../armv6m-none-eabi/include diff --git a/clang/test/Driver/baremetal.cpp b/clang/test/Driver/baremetal.cpp index a80aa9b43711..2ac83402dda3 100644 --- a/clang/test/Driver/baremetal.cpp +++ b/clang/test/Driver/baremetal.cpp @@ -196,6 +196,22 @@ // CHECK-AARCH64-NO-HOST-INC-SAME: "-internal-isystem" "[[RESOURCE]]{{[/\\]+}}include" // CHECK-AARCH64-NO-HOST-INC-SAME: "-internal-isystem" "[[INSTALLEDDIR]]{{[/\\]+}}..{{[/\\]+}}lib{{[/\\]+}}clang-runtimes{{[/\\]+[^"]*}}include" +// RUN: %clang -no-canonical-prefixes %s -### --target=riscv32-unknown-elf 2>&1 \ +// RUN: | FileCheck --check-prefix=CHECK-RISCV32-NO-HOST-INC %s +// CHECK-RISCV32-NO-HOST-INC: InstalledDir: [[INSTALLEDDIR:.+]] +// CHECK-RISCV32-NO-HOST-INC: "-resource-dir" "[[RESOURCE:[^"]+]]" +// CHECK-RISCV32-NO-HOST-INC-SAME: "-internal-isystem" "[[INSTALLEDDIR]]{{[/\\]+}}..{{[/\\]+}}lib{{[/\\]+}}clang-runtimes{{[/\\]+[^"]*}}include{{[/\\]+}}c++{{[/\\]+}}v1" +// CHECK-RISCV32-NO-HOST-INC-SAME: "-internal-isystem" "[[RESOURCE]]{{[/\\]+}}include" +// CHECK-RISCV32-NO-HOST-INC-SAME: "-internal-isystem" "[[INSTALLEDDIR]]{{[/\\]+}}..{{[/\\]+}}lib{{[/\\]+}}clang-runtimes{{[/\\]+[^"]*}}include" + +// RUN: %clang -no-canonical-prefixes %s -### --target=riscv64-unknown-elf 2>&1 \ +// RUN: | FileCheck --check-prefix=CHECK-RISCV64-NO-HOST-INC %s +// CHECK-RISCV64-NO-HOST-INC: InstalledDir: [[INSTALLEDDIR:.+]] +// CHECK-RISCV64-NO-HOST-INC: "-resource-dir" "[[RESOURCE:[^"]+]]" +// CHECK-RISCV64-NO-HOST-INC-SAME: "-internal-isystem" "[[INSTALLEDDIR]]{{[/\\]+}}..{{[/\\]+}}lib{{[/\\]+}}clang-runtimes{{[/\\]+[^"]*}}include{{[/\\]+}}c++{{[/\\]+}}v1" +// CHECK-RISCV64-NO-HOST-INC-SAME: "-internal-isystem" "[[RESOURCE]]{{[/\\]+}}include" +// CHECK-RISCV64-NO-HOST-INC-SAME: "-internal-isystem" "[[INSTALLEDDIR]]{{[/\\]+}}..{{[/\\]+}}lib{{[/\\]+}}clang-runtimes{{[/\\]+[^"]*}}include" + // RUN: %clang %s -### --target=riscv64-unknown-elf -o %t.out -L some/directory/user/asked/for \ // RUN: --sysroot=%S/Inputs/basic_riscv64_tree/riscv64-unknown-elf 2>&1 \ // RUN: | FileCheck --check-prefix=CHECK-RV64 %s diff --git a/clang/test/Driver/check-no-multlib-warning.c b/clang/test/Driver/check-no-multlib-warning.c new file mode 100644 index 000000000000..9a0d7cee450a --- /dev/null +++ b/clang/test/Driver/check-no-multlib-warning.c @@ -0,0 +1,10 @@ +// UNSUPPORTED: system-windows + + +// RUN: %clang --target=armv6m-none-eabi --gcc-toolchain=%S/Inputs/basic_arm_gcc_tree -### 2>&1 | FileCheck %s +// RUN: %clang --target=aarch64-none-elf --gcc-toolchain=%S/Inputs/basic_aarch64_gcc_tree -### 2>&1 | FileCheck %s +// RUN: %clang --target=riscv32-unknown-elf --gcc-toolchain=%S/Inputs/basic_riscv32_tree -### 2>&1 | FileCheck --check-prefix=NOCHECK %s +// RUN: %clang --target=riscv64-unknown-elf --gcc-toolchain=%S/Inputs/basic_riscv64_tree -### 2>&1 | FileCheck --check-prefix=NOCHECK %s + +// CHECK: warning: no multilib structure encoded for Arm, Aarch64 and PPC targets +// NOCHECK-NOT: warning: no multilib structure encoded for Arm, Aarch64 and PPC targets -- cgit v1.2.3 From e07b1b26c38ba48af247b370a29eeb9879cefc97 Mon Sep 17 00:00:00 2001 From: Rajveer Singh Bharadwaj Date: Wed, 18 Jun 2025 12:59:27 +0530 Subject: [DAG] Implement SDPatternMatch `m_Abs()` matcher (#144512) --- llvm/include/llvm/CodeGen/SDPatternMatch.h | 4 ++++ llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 12 +++--------- llvm/unittests/CodeGen/SelectionDAGPatternMatchTest.cpp | 4 ++++ 3 files changed, 11 insertions(+), 9 deletions(-) diff --git a/llvm/include/llvm/CodeGen/SDPatternMatch.h b/llvm/include/llvm/CodeGen/SDPatternMatch.h index 2e3807a2dfff..d413227c4d96 100644 --- a/llvm/include/llvm/CodeGen/SDPatternMatch.h +++ b/llvm/include/llvm/CodeGen/SDPatternMatch.h @@ -938,6 +938,10 @@ template inline UnaryOpc_match m_Trunc(const Opnd &Op) { return UnaryOpc_match(ISD::TRUNCATE, Op); } +template inline UnaryOpc_match m_Abs(const Opnd &Op) { + return UnaryOpc_match(ISD::ABS, Op); +} + /// Match a zext or identity /// Allows to peek through optional extensions template inline auto m_ZExtOrSelf(const Opnd &Op) { diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index d14615dcbc5e..934199e414c7 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -11260,19 +11260,13 @@ SDValue DAGCombiner::foldABSToABD(SDNode *N, const SDLoc &DL) { if (N->getOpcode() == ISD::TRUNCATE) N = N->getOperand(0).getNode(); - if (N->getOpcode() != ISD::ABS) - return SDValue(); - EVT VT = N->getValueType(0); - SDValue AbsOp1 = N->getOperand(0); SDValue Op0, Op1; - if (AbsOp1.getOpcode() != ISD::SUB) + if (!sd_match(N, m_Abs(m_Sub(m_Value(Op0), m_Value(Op1))))) return SDValue(); - Op0 = AbsOp1.getOperand(0); - Op1 = AbsOp1.getOperand(1); - + SDValue AbsOp0 = N->getOperand(0); unsigned Opc0 = Op0.getOpcode(); // Check if the operands of the sub are (zero|sign)-extended. @@ -11282,7 +11276,7 @@ SDValue DAGCombiner::foldABSToABD(SDNode *N, const SDLoc &DL) { Opc0 != ISD::SIGN_EXTEND_INREG)) { // fold (abs (sub nsw x, y)) -> abds(x, y) // Don't fold this for unsupported types as we lose the NSW handling. - if (AbsOp1->getFlags().hasNoSignedWrap() && hasOperation(ISD::ABDS, VT) && + if (AbsOp0->getFlags().hasNoSignedWrap() && hasOperation(ISD::ABDS, VT) && TLI.preferABDSToABSWithNSW(VT)) { SDValue ABD = DAG.getNode(ISD::ABDS, DL, VT, Op0, Op1); return DAG.getZExtOrTrunc(ABD, DL, SrcVT); diff --git a/llvm/unittests/CodeGen/SelectionDAGPatternMatchTest.cpp b/llvm/unittests/CodeGen/SelectionDAGPatternMatchTest.cpp index 1b590aa33bd8..2162588aadfd 100644 --- a/llvm/unittests/CodeGen/SelectionDAGPatternMatchTest.cpp +++ b/llvm/unittests/CodeGen/SelectionDAGPatternMatchTest.cpp @@ -388,6 +388,8 @@ TEST_F(SelectionDAGPatternMatchTest, matchUnaryOp) { SDValue SExt = DAG->getNode(ISD::SIGN_EXTEND, DL, Int64VT, Op0); SDValue Trunc = DAG->getNode(ISD::TRUNCATE, DL, Int32VT, Op1); + SDValue Abs = DAG->getNode(ISD::ABS, DL, Int32VT, Op0); + SDValue Sub = DAG->getNode(ISD::SUB, DL, Int32VT, Trunc, Op0); SDValue Neg = DAG->getNegative(Op0, DL, Int32VT); SDValue Not = DAG->getNOT(DL, Op0, Int32VT); @@ -417,6 +419,8 @@ TEST_F(SelectionDAGPatternMatchTest, matchUnaryOp) { EXPECT_FALSE(sd_match(ZExt, m_SExtLike(m_Value()))); EXPECT_TRUE(sd_match(Trunc, m_Trunc(m_Specific(Op1)))); + EXPECT_TRUE(sd_match(Abs, m_Abs(m_Specific(Op0)))); + EXPECT_TRUE(sd_match(Neg, m_Neg(m_Value()))); EXPECT_TRUE(sd_match(Not, m_Not(m_Value()))); EXPECT_FALSE(sd_match(ZExt, m_Neg(m_Value()))); -- cgit v1.2.3 From a38932ac3c0a16226e3dde7f1532f117959c58df Mon Sep 17 00:00:00 2001 From: Robert Imschweiler Date: Wed, 18 Jun 2025 09:49:32 +0200 Subject: Revert "[GlobalISel] prevent G_UNMERGE_VALUES for vectors with different elements" (#144650) Reverts llvm/llvm-project#133335 --- .../GlobalISel/LegalizationArtifactCombiner.h | 5 +- .../CodeGen/AMDGPU/GlobalISel/insertelement.ll | 55 ---------------------- 2 files changed, 1 insertion(+), 59 deletions(-) diff --git a/llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h b/llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h index 8f560c42082f..22f6a5fde546 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h @@ -997,7 +997,6 @@ public: // Recognize UnmergeSrc that can be unmerged to DstTy directly. // Types have to be either both vector or both non-vector types. - // In case of vector types, the scalar elements need to match. // Merge-like opcodes are combined one at the time. First one creates new // unmerge, following should use the same unmerge (builder performs CSE). // @@ -1006,9 +1005,7 @@ public: // %AnotherDst:_(DstTy) = G_merge_like_opcode %2:_(EltTy), %3 // // %Dst:_(DstTy), %AnotherDst = G_UNMERGE_VALUES %UnmergeSrc - if (((!DstTy.isVector() && !UnmergeSrcTy.isVector()) || - (DstTy.isVector() && UnmergeSrcTy.isVector() && - DstTy.getScalarType() == UnmergeSrcTy.getScalarType())) && + if ((DstTy.isVector() == UnmergeSrcTy.isVector()) && (Elt0UnmergeIdx % NumMIElts == 0) && getCoverTy(UnmergeSrcTy, DstTy) == UnmergeSrcTy) { if (!isSequenceFromUnmerge(MI, 0, Unmerge, Elt0UnmergeIdx, NumMIElts, diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement.ll index 132a89478c5f..8134eb3ca2af 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement.ll @@ -6506,58 +6506,3 @@ entry: %insert = insertelement <5 x double> %vec, double %val, i32 %idx ret <5 x double> %insert } - -; Found by fuzzer, reduced with llvm-reduce. -define amdgpu_kernel void @insert_very_small_from_very_large(<32 x i16> %L3, ptr %ptr) { -; GPRIDX-LABEL: insert_very_small_from_very_large: -; GPRIDX: ; %bb.0: ; %bb -; GPRIDX-NEXT: s_load_dwordx16 s[12:27], s[8:9], 0x0 -; GPRIDX-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x40 -; GPRIDX-NEXT: s_waitcnt lgkmcnt(0) -; GPRIDX-NEXT: s_lshr_b32 s2, s12, 1 -; GPRIDX-NEXT: s_and_b32 s2, s2, 1 -; GPRIDX-NEXT: s_lshl_b32 s2, s2, 1 -; GPRIDX-NEXT: v_mov_b32_e32 v0, s0 -; GPRIDX-NEXT: v_mov_b32_e32 v2, s2 -; GPRIDX-NEXT: v_mov_b32_e32 v1, s1 -; GPRIDX-NEXT: flat_store_byte v[0:1], v2 -; GPRIDX-NEXT: s_endpgm -; -; GFX10-LABEL: insert_very_small_from_very_large: -; GFX10: ; %bb.0: ; %bb -; GFX10-NEXT: s_clause 0x1 -; GFX10-NEXT: s_load_dwordx16 s[12:27], s[8:9], 0x0 -; GFX10-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x40 -; GFX10-NEXT: s_waitcnt lgkmcnt(0) -; GFX10-NEXT: s_lshr_b32 s2, s12, 1 -; GFX10-NEXT: v_mov_b32_e32 v0, s0 -; GFX10-NEXT: s_and_b32 s2, s2, 1 -; GFX10-NEXT: v_mov_b32_e32 v1, s1 -; GFX10-NEXT: s_lshl_b32 s2, s2, 1 -; GFX10-NEXT: v_mov_b32_e32 v2, s2 -; GFX10-NEXT: flat_store_byte v[0:1], v2 -; GFX10-NEXT: s_endpgm -; -; GFX11-LABEL: insert_very_small_from_very_large: -; GFX11: ; %bb.0: ; %bb -; GFX11-NEXT: s_clause 0x1 -; GFX11-NEXT: s_load_b512 s[8:23], s[4:5], 0x0 -; GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x40 -; GFX11-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-NEXT: s_lshr_b32 s2, s8, 1 -; GFX11-NEXT: v_mov_b32_e32 v0, s0 -; GFX11-NEXT: s_and_b32 s2, s2, 1 -; GFX11-NEXT: v_mov_b32_e32 v1, s1 -; GFX11-NEXT: s_lshl_b32 s2, s2, 1 -; GFX11-NEXT: v_mov_b32_e32 v2, s2 -; GFX11-NEXT: flat_store_b8 v[0:1], v2 -; GFX11-NEXT: s_endpgm -bb: - %a = bitcast <32 x i16> %L3 to i512 - %b = trunc i512 %a to i8 - %c = trunc i8 %b to i2 - %d = bitcast i2 %c to <2 x i1> - %insert = insertelement <2 x i1> %d, i1 false, i32 0 - store <2 x i1> %insert, ptr %ptr, align 1 - ret void -} -- cgit v1.2.3 From 49df87e71b73b230ecb21335dcb5f5390eebdab3 Mon Sep 17 00:00:00 2001 From: Simon Tatham Date: Wed, 18 Jun 2025 08:57:51 +0100 Subject: [libc][printf] Fix out-of-range shift in float320 printf (#144542) If you enable `LIBC_CONF_PRINTF_FLOAT_TO_STR_USE_FLOAT320` and use a `%f` style printf format directive to print a nonzero number too small to show up in the output digits, e.g. `printf("%.2f", 0.001)`, then the output would be intermittently incorrect, because `DyadicFloat::as_mantissa_type_rounded` would try to shift the 320-bit mantissa right by more than 320 bits, invoking the 'undefined behavior' clause commented in the `shift()` function in `big_int.h`. There were already tests in the libc test suite exercising this case, e.g. the subnormal tests in `LlvmLibcSPrintfTest.FloatDecimalConv` use `%f` at the default precision of 6 decimal places on tiny numbers such as 2^-1027. But because the behavior is undefined, they don't visibly fail all the time, and in all previous test runs we'd tried with USE_FLOAT320, they had got lucky. The fix is simply to detect an out-of-range right shift before doing it, and instead just set the output value to zero. --- libc/src/__support/FPUtil/dyadic_float.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/libc/src/__support/FPUtil/dyadic_float.h b/libc/src/__support/FPUtil/dyadic_float.h index 6c3e1520e5af..4c77d3c541cd 100644 --- a/libc/src/__support/FPUtil/dyadic_float.h +++ b/libc/src/__support/FPUtil/dyadic_float.h @@ -465,7 +465,10 @@ template struct DyadicFloat { // exponents coming in to this function _shouldn't_ be that large). The // result should always end up as a positive size_t. size_t shift = -static_cast(exponent); - new_mant >>= shift; + if (shift >= Bits) + new_mant = 0; + else + new_mant >>= shift; round_dir = rounding_direction(mantissa, shift, sign); if (round_dir > 0) ++new_mant; -- cgit v1.2.3 From ba40a7bc2e65be86ac23c9cf6038ac085dda77eb Mon Sep 17 00:00:00 2001 From: Mel Chen Date: Wed, 18 Jun 2025 16:03:20 +0800 Subject: [LoopVectorize] Vectorize fixed-order recurrence with vscale x 1. (#142772) When the fixed-order recurrence phi is live-out from the loop, the vectorizer uses VPInstruction::ExtractPenultimateElement to extract the penultimate element from the recurrence vector. However, this is not feasible when the VF is vscale x 1, since vscale could be 1, making the vector contain only one element. This patch changes the behavior for vscale x 1 by extracting the last element from the vector produced by splicing the recurrence phi and the previous value. This ensures we can still determine the correct live-out value of the recurrence phi. --- llvm/lib/Transforms/Vectorize/LoopVectorize.cpp | 22 ++++-- llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp | 3 - .../RISCV/first-order-recurrence-scalable-vf1.ll | 57 ++++++++------ .../first-order-recurrence-scalable-vf1.ll | 90 ++++++++++++++++++++-- 4 files changed, 130 insertions(+), 42 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index f887b34e7642..16d48b06dce4 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -6163,11 +6163,6 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I, // First-order recurrences are replaced by vector shuffles inside the loop. if (VF.isVector() && Legal->isFixedOrderRecurrence(Phi)) { - // For , if vscale = 1 we are unable to extract the - // penultimate value of the recurrence. - // TODO: Consider vscale_range info. - if (VF.isScalable() && VF.getKnownMinValue() == 1) - return InstructionCost::getInvalid(); SmallVector Mask(VF.getKnownMinValue()); std::iota(Mask.begin(), Mask.end(), VF.getKnownMinValue() - 1); return TTI.getShuffleCost(TargetTransformInfo::SK_Splice, @@ -8556,13 +8551,17 @@ addUsersInExitBlocks(VPlan &Plan, /// users in the original exit block using the VPIRInstruction wrapping to the /// LCSSA phi. static void addExitUsersForFirstOrderRecurrences( - VPlan &Plan, SetVector &ExitUsersToFix) { + VPlan &Plan, SetVector &ExitUsersToFix, VFRange &Range) { VPRegionBlock *VectorRegion = Plan.getVectorLoopRegion(); auto *ScalarPHVPBB = Plan.getScalarPreheader(); auto *MiddleVPBB = Plan.getMiddleBlock(); VPBuilder ScalarPHBuilder(ScalarPHVPBB); VPBuilder MiddleBuilder(MiddleVPBB, MiddleVPBB->getFirstNonPhi()); + auto IsScalableOne = [](ElementCount VF) -> bool { + return VF == ElementCount::getScalable(1); + }; + for (auto &HeaderPhi : VectorRegion->getEntryBasicBlock()->phis()) { auto *FOR = dyn_cast(&HeaderPhi); if (!FOR) @@ -8644,6 +8643,15 @@ static void addExitUsersForFirstOrderRecurrences( for (VPIRInstruction *ExitIRI : ExitUsersToFix) { if (ExitIRI->getOperand(0) != FOR) continue; + // For VF vscale x 1, if vscale = 1, we are unable to extract the + // penultimate value of the recurrence. Instead, we rely on function + // addUsersInExitBlocks to extract the last element from the result of + // VPInstruction::FirstOrderRecurrenceSplice by leaving the user of the + // recurrence phi in ExitUsersToFix. + // TODO: Consider vscale_range info and UF. + if (LoopVectorizationPlanner::getDecisionAndClampRange(IsScalableOne, + Range)) + return; VPValue *PenultimateElement = MiddleBuilder.createNaryOp( VPInstruction::ExtractPenultimateElement, {FOR->getBackedgeValue()}, {}, "vector.recur.extract.for.phi"); @@ -8858,7 +8866,7 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes( addScalarResumePhis(RecipeBuilder, *Plan, IVEndValues); SetVector ExitUsersToFix = collectUsersInLatchExitBlock(*Plan); - addExitUsersForFirstOrderRecurrences(*Plan, ExitUsersToFix); + addExitUsersForFirstOrderRecurrences(*Plan, ExitUsersToFix, Range); addUsersInExitBlocks(*Plan, ExitUsersToFix); // --------------------------------------------------------------------------- diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp index 1ed0b97849a8..f3b5c8cfa988 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp @@ -3680,9 +3680,6 @@ VPFirstOrderRecurrencePHIRecipe::computeCost(ElementCount VF, if (VF.isScalar()) return Ctx.TTI.getCFInstrCost(Instruction::PHI, Ctx.CostKind); - if (VF == ElementCount::getScalable(1)) - return InstructionCost::getInvalid(); - return 0; } diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/first-order-recurrence-scalable-vf1.ll b/llvm/test/Transforms/LoopVectorize/RISCV/first-order-recurrence-scalable-vf1.ll index d34098545716..e3f9540ff3df 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/first-order-recurrence-scalable-vf1.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/first-order-recurrence-scalable-vf1.ll @@ -1,56 +1,63 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 -; RUN: opt -p loop-vectorize -S %s | FileCheck %s +; RUN: opt -p loop-vectorize -scalable-vectorization=on -S %s | FileCheck %s target datalayout = "e-m:e-p:64:64-i64:64-i128:128-n32:64-S128" target triple = "riscv64-unknown-linux-gnu" -; Make sure we do not pick as VF for a loop with a -; first-order recurrence. define i64 @pr97452_scalable_vf1_for(ptr %src, ptr noalias %dst) #0 { ; CHECK-LABEL: define i64 @pr97452_scalable_vf1_for( ; CHECK-SAME: ptr [[SRC:%.*]], ptr noalias [[DST:%.*]]) #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: [[ENTRY:.*]]: -; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 23, [[TMP0]] +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: +; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 23, [[TMP1]] +; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 23, [[N_MOD_VF]] +; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.vscale.i32() +; CHECK-NEXT: [[TMP4:%.*]] = sub i32 [[TMP3]], 1 +; CHECK-NEXT: [[VECTOR_RECUR_INIT:%.*]] = insertelement poison, i64 0, i32 [[TMP4]] ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i64> [ , %[[VECTOR_PH]] ], [ [[WIDE_LOAD1:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[SRC]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i32 0 -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i32 4 -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP1]], align 8 -; CHECK-NEXT: [[WIDE_LOAD1]] = load <4 x i64>, ptr [[TMP5]], align 8 -; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i64> [[VECTOR_RECUR]], <4 x i64> [[WIDE_LOAD]], <4 x i32> -; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x i64> [[WIDE_LOAD]], <4 x i64> [[WIDE_LOAD1]], <4 x i32> +; CHECK-NEXT: [[VECTOR_RECUR:%.*]] = phi [ [[VECTOR_RECUR_INIT]], %[[VECTOR_PH]] ], [ [[WIDE_LOAD:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[SRC]], i64 [[INDEX]] +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[TMP5]], i32 0 +; CHECK-NEXT: [[WIDE_LOAD]] = load , ptr [[TMP6]], align 8 +; CHECK-NEXT: [[TMP7:%.*]] = call @llvm.vector.splice.nxv1i64( [[VECTOR_RECUR]], [[WIDE_LOAD]], i32 -1) ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[DST]], i64 [[INDEX]] ; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[TMP8]], i32 0 -; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[TMP8]], i32 4 -; CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[TMP9]], align 8 -; CHECK-NEXT: store <4 x i64> [[TMP4]], ptr [[TMP7]], align 8 -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 -; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 16 -; CHECK-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK-NEXT: store [[TMP7]], ptr [[TMP9]], align 8 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP2]] +; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP10]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: -; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x i64> [[WIDE_LOAD1]], i32 2 -; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i64> [[WIDE_LOAD1]], i32 3 -; CHECK-NEXT: br i1 false, label %[[EXIT:.*]], label %[[SCALAR_PH]] +; CHECK-NEXT: [[TMP11:%.*]] = call i32 @llvm.vscale.i32() +; CHECK-NEXT: [[TMP12:%.*]] = sub i32 [[TMP11]], 1 +; CHECK-NEXT: [[TMP13:%.*]] = extractelement [[TMP7]], i32 [[TMP12]] +; CHECK-NEXT: [[TMP14:%.*]] = call i32 @llvm.vscale.i32() +; CHECK-NEXT: [[TMP15:%.*]] = sub i32 [[TMP14]], 1 +; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement [[WIDE_LOAD]], i32 [[TMP15]] +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 23, [[N_VEC]] +; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]] ; CHECK: [[SCALAR_PH]]: ; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i64 [ [[VECTOR_RECUR_EXTRACT]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 16, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] ; CHECK-NEXT: br label %[[LOOP:.*]] ; CHECK: [[LOOP]]: ; CHECK-NEXT: [[FOR:%.*]] = phi i64 [ [[SCALAR_RECUR_INIT]], %[[SCALAR_PH]] ], [ [[L:%.*]], %[[LOOP]] ] ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] ; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 -; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i64, ptr [[SRC]], i64 [[IV]] -; CHECK-NEXT: [[L]] = load i64, ptr [[GEP]], align 8 +; CHECK-NEXT: [[GEP_SRC:%.*]] = getelementptr inbounds i64, ptr [[SRC]], i64 [[IV]] +; CHECK-NEXT: [[L]] = load i64, ptr [[GEP_SRC]], align 8 ; CHECK-NEXT: [[GEP_DST:%.*]] = getelementptr inbounds i64, ptr [[DST]], i64 [[IV]] ; CHECK-NEXT: store i64 [[FOR]], ptr [[GEP_DST]], align 8 ; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV]], 22 ; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP3:![0-9]+]] ; CHECK: [[EXIT]]: -; CHECK-NEXT: [[RES:%.*]] = phi i64 [ [[FOR]], %[[LOOP]] ], [ [[VECTOR_RECUR_EXTRACT_FOR_PHI]], %[[MIDDLE_BLOCK]] ] +; CHECK-NEXT: [[RES:%.*]] = phi i64 [ [[FOR]], %[[LOOP]] ], [ [[TMP13]], %[[MIDDLE_BLOCK]] ] ; CHECK-NEXT: ret i64 [[RES]] ; entry: diff --git a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-scalable-vf1.ll b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-scalable-vf1.ll index 98a942a50107..b20d59bd5760 100644 --- a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-scalable-vf1.ll +++ b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-scalable-vf1.ll @@ -8,17 +8,51 @@ define i64 @pr97452_scalable_vf1_for_live_out(ptr %src) { ; CHECK-LABEL: define i64 @pr97452_scalable_vf1_for_live_out( ; CHECK-SAME: ptr [[SRC:%.*]]) { ; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 23, [[TMP0]] +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; CHECK: [[VECTOR_PH]]: +; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 23, [[TMP1]] +; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 23, [[N_MOD_VF]] +; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.vscale.i32() +; CHECK-NEXT: [[TMP4:%.*]] = sub i32 [[TMP3]], 1 +; CHECK-NEXT: [[VECTOR_RECUR_INIT:%.*]] = insertelement poison, i64 0, i32 [[TMP4]] +; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK: [[VECTOR_BODY]]: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[VECTOR_RECUR:%.*]] = phi [ [[VECTOR_RECUR_INIT]], %[[VECTOR_PH]] ], [ [[WIDE_LOAD:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[SRC]], i64 [[INDEX]] +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[TMP5]], i32 0 +; CHECK-NEXT: [[WIDE_LOAD]] = load , ptr [[TMP6]], align 8 +; CHECK-NEXT: [[TMP7:%.*]] = call @llvm.vector.splice.nxv1i64( [[VECTOR_RECUR]], [[WIDE_LOAD]], i32 -1) +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP2]] +; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP8]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK: [[MIDDLE_BLOCK]]: +; CHECK-NEXT: [[TMP9:%.*]] = call i32 @llvm.vscale.i32() +; CHECK-NEXT: [[TMP10:%.*]] = sub i32 [[TMP9]], 1 +; CHECK-NEXT: [[TMP11:%.*]] = extractelement [[TMP7]], i32 [[TMP10]] +; CHECK-NEXT: [[TMP12:%.*]] = call i32 @llvm.vscale.i32() +; CHECK-NEXT: [[TMP13:%.*]] = sub i32 [[TMP12]], 1 +; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement [[WIDE_LOAD]], i32 [[TMP13]] +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 23, [[N_VEC]] +; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]] +; CHECK: [[SCALAR_PH]]: +; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i64 [ [[VECTOR_RECUR_EXTRACT]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] ; CHECK-NEXT: br label %[[LOOP:.*]] ; CHECK: [[LOOP]]: -; CHECK-NEXT: [[FOR:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[L:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[FOR:%.*]] = phi i64 [ [[SCALAR_RECUR_INIT]], %[[SCALAR_PH]] ], [ [[L:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] ; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 ; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i64, ptr [[SRC]], i64 [[IV]] ; CHECK-NEXT: [[L]] = load i64, ptr [[GEP]], align 8 ; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV]], 22 -; CHECK-NEXT: br i1 [[EC]], label %[[EXIT:.*]], label %[[LOOP]] +; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP3:![0-9]+]] ; CHECK: [[EXIT]]: -; CHECK-NEXT: [[RES:%.*]] = phi i64 [ [[FOR]], %[[LOOP]] ] +; CHECK-NEXT: [[RES:%.*]] = phi i64 [ [[FOR]], %[[LOOP]] ], [ [[TMP11]], %[[MIDDLE_BLOCK]] ] ; CHECK-NEXT: ret i64 [[RES]] ; entry: @@ -43,17 +77,51 @@ define void @pr97452_scalable_vf1_for_no_live_out(ptr %src, ptr noalias %dst) { ; CHECK-LABEL: define void @pr97452_scalable_vf1_for_no_live_out( ; CHECK-SAME: ptr [[SRC:%.*]], ptr noalias [[DST:%.*]]) { ; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 23, [[TMP0]] +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; CHECK: [[VECTOR_PH]]: +; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 23, [[TMP1]] +; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 23, [[N_MOD_VF]] +; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.vscale.i32() +; CHECK-NEXT: [[TMP4:%.*]] = sub i32 [[TMP3]], 1 +; CHECK-NEXT: [[VECTOR_RECUR_INIT:%.*]] = insertelement poison, i64 0, i32 [[TMP4]] +; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK: [[VECTOR_BODY]]: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[VECTOR_RECUR:%.*]] = phi [ [[VECTOR_RECUR_INIT]], %[[VECTOR_PH]] ], [ [[WIDE_LOAD:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[SRC]], i64 [[INDEX]] +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[TMP5]], i32 0 +; CHECK-NEXT: [[WIDE_LOAD]] = load , ptr [[TMP6]], align 8 +; CHECK-NEXT: [[TMP7:%.*]] = call @llvm.vector.splice.nxv1i64( [[VECTOR_RECUR]], [[WIDE_LOAD]], i32 -1) +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[DST]], i64 [[INDEX]] +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[TMP8]], i32 0 +; CHECK-NEXT: store [[TMP7]], ptr [[TMP9]], align 8 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP2]] +; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP10]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; CHECK: [[MIDDLE_BLOCK]]: +; CHECK-NEXT: [[TMP11:%.*]] = call i32 @llvm.vscale.i32() +; CHECK-NEXT: [[TMP12:%.*]] = sub i32 [[TMP11]], 1 +; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement [[WIDE_LOAD]], i32 [[TMP12]] +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 23, [[N_VEC]] +; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]] +; CHECK: [[SCALAR_PH]]: +; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i64 [ [[VECTOR_RECUR_EXTRACT]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] ; CHECK-NEXT: br label %[[LOOP:.*]] ; CHECK: [[LOOP]]: -; CHECK-NEXT: [[FOR:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[L:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[FOR:%.*]] = phi i64 [ [[SCALAR_RECUR_INIT]], %[[SCALAR_PH]] ], [ [[L:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] ; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 ; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i64, ptr [[SRC]], i64 [[IV]] ; CHECK-NEXT: [[L]] = load i64, ptr [[GEP]], align 8 ; CHECK-NEXT: [[GEP_DST:%.*]] = getelementptr inbounds i64, ptr [[DST]], i64 [[IV]] ; CHECK-NEXT: store i64 [[FOR]], ptr [[GEP_DST]], align 8 ; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV]], 22 -; CHECK-NEXT: br i1 [[EC]], label %[[EXIT:.*]], label %[[LOOP]] +; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP5:![0-9]+]] ; CHECK: [[EXIT]]: ; CHECK-NEXT: ret void ; @@ -74,3 +142,11 @@ loop: exit: ret void } +;. +; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]} +; CHECK: [[META1]] = !{!"llvm.loop.isvectorized", i32 1} +; CHECK: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"} +; CHECK: [[LOOP3]] = distinct !{[[LOOP3]], [[META2]], [[META1]]} +; CHECK: [[LOOP4]] = distinct !{[[LOOP4]], [[META1]], [[META2]]} +; CHECK: [[LOOP5]] = distinct !{[[LOOP5]], [[META2]], [[META1]]} +;. -- cgit v1.2.3 From ca29c632f06fc0e02ebbbb9fbdc73e3abd6b096b Mon Sep 17 00:00:00 2001 From: Pengcheng Wang Date: Wed, 18 Jun 2025 16:11:18 +0800 Subject: [RISCV] Support non-power-of-2 types when expanding memcmp We can convert non-power-of-2 types into extended value types and then they will be widen. Reviewers: lukel97 Reviewed By: lukel97 Pull Request: https://github.com/llvm/llvm-project/pull/114971 --- llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 23 +- llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp | 21 +- llvm/test/CodeGen/RISCV/icmp-non-byte-sized.ll | 41 ++ llvm/test/CodeGen/RISCV/memcmp-optsize.ll | 800 +++++++++------------ llvm/test/CodeGen/RISCV/memcmp.ll | 800 +++++++++------------ 5 files changed, 709 insertions(+), 976 deletions(-) create mode 100644 llvm/test/CodeGen/RISCV/icmp-non-byte-sized.ll diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index e670567bd184..b8ef221742a2 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -16190,10 +16190,6 @@ combineVectorSizedSetCCEquality(EVT VT, SDValue X, SDValue Y, ISD::CondCode CC, return SDValue(); unsigned OpSize = OpVT.getSizeInBits(); - // TODO: Support non-power-of-2 types. - if (!isPowerOf2_32(OpSize)) - return SDValue(); - // The size should be larger than XLen and smaller than the maximum vector // size. if (OpSize <= Subtarget.getXLen() || @@ -16214,14 +16210,25 @@ combineVectorSizedSetCCEquality(EVT VT, SDValue X, SDValue Y, ISD::CondCode CC, Attribute::NoImplicitFloat)) return SDValue(); + // Bail out for non-byte-sized types. + if (!OpVT.isByteSized()) + return SDValue(); + unsigned VecSize = OpSize / 8; - EVT VecVT = MVT::getVectorVT(MVT::i8, VecSize); - EVT CmpVT = MVT::getVectorVT(MVT::i1, VecSize); + EVT VecVT = EVT::getVectorVT(*DAG.getContext(), MVT::i8, VecSize); + EVT CmpVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1, VecSize); SDValue VecX = DAG.getBitcast(VecVT, X); SDValue VecY = DAG.getBitcast(VecVT, Y); - SDValue Cmp = DAG.getSetCC(DL, CmpVT, VecX, VecY, ISD::SETNE); - return DAG.getSetCC(DL, VT, DAG.getNode(ISD::VECREDUCE_OR, DL, XLenVT, Cmp), + SDValue Mask = DAG.getAllOnesConstant(DL, CmpVT); + SDValue VL = DAG.getConstant(VecSize, DL, XLenVT); + + SDValue Cmp = DAG.getNode(ISD::VP_SETCC, DL, CmpVT, VecX, VecY, + DAG.getCondCode(ISD::SETNE), Mask, VL); + return DAG.getSetCC(DL, VT, + DAG.getNode(ISD::VP_REDUCE_OR, DL, XLenVT, + DAG.getConstant(0, DL, XLenVT), Cmp, Mask, + VL), DAG.getConstant(0, DL, XLenVT), CC); } diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp index aadda2ce8552..46e30ce4c18a 100644 --- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp @@ -2985,20 +2985,13 @@ RISCVTTIImpl::enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const { } if (IsZeroCmp && ST->hasVInstructions()) { - unsigned RealMinVLen = ST->getRealMinVLen(); - // Support Fractional LMULs if the lengths are larger than XLen. - // TODO: Support non-power-of-2 types. - for (unsigned FLMUL = 8; FLMUL >= 2; FLMUL /= 2) { - unsigned Len = RealMinVLen / FLMUL; - if (Len > ST->getXLen()) - Options.LoadSizes.insert(Options.LoadSizes.begin(), Len / 8); - } - for (unsigned LMUL = 1; LMUL <= ST->getMaxLMULForFixedLengthVectors(); - LMUL *= 2) { - unsigned Len = RealMinVLen * LMUL; - if (Len > ST->getXLen()) - Options.LoadSizes.insert(Options.LoadSizes.begin(), Len / 8); - } + unsigned VLenB = ST->getRealMinVLen() / 8; + // The minimum size should be `XLen / 8 + 1`, and the maxinum size should be + // `VLenB * MaxLMUL` so that it fits in a single register group. + unsigned MinSize = ST->getXLen() / 8 + 1; + unsigned MaxSize = VLenB * ST->getMaxLMULForFixedLengthVectors(); + for (unsigned Size = MinSize; Size <= MaxSize; Size++) + Options.LoadSizes.insert(Options.LoadSizes.begin(), Size); } return Options; } diff --git a/llvm/test/CodeGen/RISCV/icmp-non-byte-sized.ll b/llvm/test/CodeGen/RISCV/icmp-non-byte-sized.ll new file mode 100644 index 000000000000..fca6238548aa --- /dev/null +++ b/llvm/test/CodeGen/RISCV/icmp-non-byte-sized.ll @@ -0,0 +1,41 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc -mtriple=riscv32 -mattr=+v -O2 < %s | FileCheck %s --check-prefix=CHECK-RV32 +; RUN: llc -mtriple=riscv64 -mattr=+v -O2 < %s | FileCheck %s --check-prefix=CHECK-RV64 + +define i1 @icmp_non_byte_type(ptr %p1, ptr %p2) nounwind { +; CHECK-RV32-LABEL: icmp_non_byte_type: +; CHECK-RV32: # %bb.0: +; CHECK-RV32-NEXT: lw a2, 0(a0) +; CHECK-RV32-NEXT: lw a3, 4(a0) +; CHECK-RV32-NEXT: lw a4, 8(a0) +; CHECK-RV32-NEXT: lw a0, 12(a0) +; CHECK-RV32-NEXT: lw a5, 12(a1) +; CHECK-RV32-NEXT: lw a6, 4(a1) +; CHECK-RV32-NEXT: lw a7, 8(a1) +; CHECK-RV32-NEXT: lw a1, 0(a1) +; CHECK-RV32-NEXT: xor a0, a0, a5 +; CHECK-RV32-NEXT: xor a3, a3, a6 +; CHECK-RV32-NEXT: xor a4, a4, a7 +; CHECK-RV32-NEXT: xor a1, a2, a1 +; CHECK-RV32-NEXT: or a0, a3, a0 +; CHECK-RV32-NEXT: or a1, a1, a4 +; CHECK-RV32-NEXT: or a0, a1, a0 +; CHECK-RV32-NEXT: seqz a0, a0 +; CHECK-RV32-NEXT: ret +; +; CHECK-RV64-LABEL: icmp_non_byte_type: +; CHECK-RV64: # %bb.0: +; CHECK-RV64-NEXT: ld a2, 0(a0) +; CHECK-RV64-NEXT: ld a0, 8(a0) +; CHECK-RV64-NEXT: ld a3, 8(a1) +; CHECK-RV64-NEXT: ld a1, 0(a1) +; CHECK-RV64-NEXT: xor a0, a0, a3 +; CHECK-RV64-NEXT: xor a1, a2, a1 +; CHECK-RV64-NEXT: or a0, a1, a0 +; CHECK-RV64-NEXT: seqz a0, a0 +; CHECK-RV64-NEXT: ret + %v1 = load i127, ptr %p1 + %v2 = load i127, ptr %p2 + %ret = icmp eq i127 %v1, %v2 + ret i1 %ret +} diff --git a/llvm/test/CodeGen/RISCV/memcmp-optsize.ll b/llvm/test/CodeGen/RISCV/memcmp-optsize.ll index 3742383675b9..0d57e4201512 100644 --- a/llvm/test/CodeGen/RISCV/memcmp-optsize.ll +++ b/llvm/test/CodeGen/RISCV/memcmp-optsize.ll @@ -517,17 +517,99 @@ define i32 @bcmp_size_5(ptr %s1, ptr %s2) nounwind optsize { ; CHECK-ALIGNED-RV64-V-NEXT: addi sp, sp, 16 ; CHECK-ALIGNED-RV64-V-NEXT: ret ; -; CHECK-UNALIGNED-LABEL: bcmp_size_5: -; CHECK-UNALIGNED: # %bb.0: # %entry -; CHECK-UNALIGNED-NEXT: lw a2, 0(a0) -; CHECK-UNALIGNED-NEXT: lbu a0, 4(a0) -; CHECK-UNALIGNED-NEXT: lw a3, 0(a1) -; CHECK-UNALIGNED-NEXT: lbu a1, 4(a1) -; CHECK-UNALIGNED-NEXT: xor a2, a2, a3 -; CHECK-UNALIGNED-NEXT: xor a0, a0, a1 -; CHECK-UNALIGNED-NEXT: or a0, a2, a0 -; CHECK-UNALIGNED-NEXT: snez a0, a0 -; CHECK-UNALIGNED-NEXT: ret +; CHECK-UNALIGNED-RV32-LABEL: bcmp_size_5: +; CHECK-UNALIGNED-RV32: # %bb.0: # %entry +; CHECK-UNALIGNED-RV32-NEXT: lw a2, 0(a0) +; CHECK-UNALIGNED-RV32-NEXT: lbu a0, 4(a0) +; CHECK-UNALIGNED-RV32-NEXT: lw a3, 0(a1) +; CHECK-UNALIGNED-RV32-NEXT: lbu a1, 4(a1) +; CHECK-UNALIGNED-RV32-NEXT: xor a2, a2, a3 +; CHECK-UNALIGNED-RV32-NEXT: xor a0, a0, a1 +; CHECK-UNALIGNED-RV32-NEXT: or a0, a2, a0 +; CHECK-UNALIGNED-RV32-NEXT: snez a0, a0 +; CHECK-UNALIGNED-RV32-NEXT: ret +; +; CHECK-UNALIGNED-RV64-LABEL: bcmp_size_5: +; CHECK-UNALIGNED-RV64: # %bb.0: # %entry +; CHECK-UNALIGNED-RV64-NEXT: lw a2, 0(a0) +; CHECK-UNALIGNED-RV64-NEXT: lbu a0, 4(a0) +; CHECK-UNALIGNED-RV64-NEXT: lw a3, 0(a1) +; CHECK-UNALIGNED-RV64-NEXT: lbu a1, 4(a1) +; CHECK-UNALIGNED-RV64-NEXT: xor a2, a2, a3 +; CHECK-UNALIGNED-RV64-NEXT: xor a0, a0, a1 +; CHECK-UNALIGNED-RV64-NEXT: or a0, a2, a0 +; CHECK-UNALIGNED-RV64-NEXT: snez a0, a0 +; CHECK-UNALIGNED-RV64-NEXT: ret +; +; CHECK-UNALIGNED-RV32-ZBB-LABEL: bcmp_size_5: +; CHECK-UNALIGNED-RV32-ZBB: # %bb.0: # %entry +; CHECK-UNALIGNED-RV32-ZBB-NEXT: lw a2, 0(a0) +; CHECK-UNALIGNED-RV32-ZBB-NEXT: lbu a0, 4(a0) +; CHECK-UNALIGNED-RV32-ZBB-NEXT: lw a3, 0(a1) +; CHECK-UNALIGNED-RV32-ZBB-NEXT: lbu a1, 4(a1) +; CHECK-UNALIGNED-RV32-ZBB-NEXT: xor a2, a2, a3 +; CHECK-UNALIGNED-RV32-ZBB-NEXT: xor a0, a0, a1 +; CHECK-UNALIGNED-RV32-ZBB-NEXT: or a0, a2, a0 +; CHECK-UNALIGNED-RV32-ZBB-NEXT: snez a0, a0 +; CHECK-UNALIGNED-RV32-ZBB-NEXT: ret +; +; CHECK-UNALIGNED-RV64-ZBB-LABEL: bcmp_size_5: +; CHECK-UNALIGNED-RV64-ZBB: # %bb.0: # %entry +; CHECK-UNALIGNED-RV64-ZBB-NEXT: lw a2, 0(a0) +; CHECK-UNALIGNED-RV64-ZBB-NEXT: lbu a0, 4(a0) +; CHECK-UNALIGNED-RV64-ZBB-NEXT: lw a3, 0(a1) +; CHECK-UNALIGNED-RV64-ZBB-NEXT: lbu a1, 4(a1) +; CHECK-UNALIGNED-RV64-ZBB-NEXT: xor a2, a2, a3 +; CHECK-UNALIGNED-RV64-ZBB-NEXT: xor a0, a0, a1 +; CHECK-UNALIGNED-RV64-ZBB-NEXT: or a0, a2, a0 +; CHECK-UNALIGNED-RV64-ZBB-NEXT: snez a0, a0 +; CHECK-UNALIGNED-RV64-ZBB-NEXT: ret +; +; CHECK-UNALIGNED-RV32-ZBKB-LABEL: bcmp_size_5: +; CHECK-UNALIGNED-RV32-ZBKB: # %bb.0: # %entry +; CHECK-UNALIGNED-RV32-ZBKB-NEXT: lw a2, 0(a0) +; CHECK-UNALIGNED-RV32-ZBKB-NEXT: lbu a0, 4(a0) +; CHECK-UNALIGNED-RV32-ZBKB-NEXT: lw a3, 0(a1) +; CHECK-UNALIGNED-RV32-ZBKB-NEXT: lbu a1, 4(a1) +; CHECK-UNALIGNED-RV32-ZBKB-NEXT: xor a2, a2, a3 +; CHECK-UNALIGNED-RV32-ZBKB-NEXT: xor a0, a0, a1 +; CHECK-UNALIGNED-RV32-ZBKB-NEXT: or a0, a2, a0 +; CHECK-UNALIGNED-RV32-ZBKB-NEXT: snez a0, a0 +; CHECK-UNALIGNED-RV32-ZBKB-NEXT: ret +; +; CHECK-UNALIGNED-RV64-ZBKB-LABEL: bcmp_size_5: +; CHECK-UNALIGNED-RV64-ZBKB: # %bb.0: # %entry +; CHECK-UNALIGNED-RV64-ZBKB-NEXT: lw a2, 0(a0) +; CHECK-UNALIGNED-RV64-ZBKB-NEXT: lbu a0, 4(a0) +; CHECK-UNALIGNED-RV64-ZBKB-NEXT: lw a3, 0(a1) +; CHECK-UNALIGNED-RV64-ZBKB-NEXT: lbu a1, 4(a1) +; CHECK-UNALIGNED-RV64-ZBKB-NEXT: xor a2, a2, a3 +; CHECK-UNALIGNED-RV64-ZBKB-NEXT: xor a0, a0, a1 +; CHECK-UNALIGNED-RV64-ZBKB-NEXT: or a0, a2, a0 +; CHECK-UNALIGNED-RV64-ZBKB-NEXT: snez a0, a0 +; CHECK-UNALIGNED-RV64-ZBKB-NEXT: ret +; +; CHECK-UNALIGNED-RV32-V-LABEL: bcmp_size_5: +; CHECK-UNALIGNED-RV32-V: # %bb.0: # %entry +; CHECK-UNALIGNED-RV32-V-NEXT: vsetivli zero, 5, e8, mf2, ta, ma +; CHECK-UNALIGNED-RV32-V-NEXT: vle8.v v8, (a0) +; CHECK-UNALIGNED-RV32-V-NEXT: vle8.v v9, (a1) +; CHECK-UNALIGNED-RV32-V-NEXT: vmsne.vv v8, v8, v9 +; CHECK-UNALIGNED-RV32-V-NEXT: vcpop.m a0, v8 +; CHECK-UNALIGNED-RV32-V-NEXT: snez a0, a0 +; CHECK-UNALIGNED-RV32-V-NEXT: ret +; +; CHECK-UNALIGNED-RV64-V-LABEL: bcmp_size_5: +; CHECK-UNALIGNED-RV64-V: # %bb.0: # %entry +; CHECK-UNALIGNED-RV64-V-NEXT: lw a2, 0(a0) +; CHECK-UNALIGNED-RV64-V-NEXT: lbu a0, 4(a0) +; CHECK-UNALIGNED-RV64-V-NEXT: lw a3, 0(a1) +; CHECK-UNALIGNED-RV64-V-NEXT: lbu a1, 4(a1) +; CHECK-UNALIGNED-RV64-V-NEXT: xor a2, a2, a3 +; CHECK-UNALIGNED-RV64-V-NEXT: xor a0, a0, a1 +; CHECK-UNALIGNED-RV64-V-NEXT: or a0, a2, a0 +; CHECK-UNALIGNED-RV64-V-NEXT: snez a0, a0 +; CHECK-UNALIGNED-RV64-V-NEXT: ret entry: %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iXLen 5) ret i32 %bcmp @@ -614,17 +696,99 @@ define i32 @bcmp_size_6(ptr %s1, ptr %s2) nounwind optsize { ; CHECK-ALIGNED-RV64-V-NEXT: addi sp, sp, 16 ; CHECK-ALIGNED-RV64-V-NEXT: ret ; -; CHECK-UNALIGNED-LABEL: bcmp_size_6: -; CHECK-UNALIGNED: # %bb.0: # %entry -; CHECK-UNALIGNED-NEXT: lw a2, 0(a0) -; CHECK-UNALIGNED-NEXT: lhu a0, 4(a0) -; CHECK-UNALIGNED-NEXT: lw a3, 0(a1) -; CHECK-UNALIGNED-NEXT: lhu a1, 4(a1) -; CHECK-UNALIGNED-NEXT: xor a2, a2, a3 -; CHECK-UNALIGNED-NEXT: xor a0, a0, a1 -; CHECK-UNALIGNED-NEXT: or a0, a2, a0 -; CHECK-UNALIGNED-NEXT: snez a0, a0 -; CHECK-UNALIGNED-NEXT: ret +; CHECK-UNALIGNED-RV32-LABEL: bcmp_size_6: +; CHECK-UNALIGNED-RV32: # %bb.0: # %entry +; CHECK-UNALIGNED-RV32-NEXT: lw a2, 0(a0) +; CHECK-UNALIGNED-RV32-NEXT: lhu a0, 4(a0) +; CHECK-UNALIGNED-RV32-NEXT: lw a3, 0(a1) +; CHECK-UNALIGNED-RV32-NEXT: lhu a1, 4(a1) +; CHECK-UNALIGNED-RV32-NEXT: xor a2, a2, a3 +; CHECK-UNALIGNED-RV32-NEXT: xor a0, a0, a1 +; CHECK-UNALIGNED-RV32-NEXT: or a0, a2, a0 +; CHECK-UNALIGNED-RV32-NEXT: snez a0, a0 +; CHECK-UNALIGNED-RV32-NEXT: ret +; +; CHECK-UNALIGNED-RV64-LABEL: bcmp_size_6: +; CHECK-UNALIGNED-RV64: # %bb.0: # %entry +; CHECK-UNALIGNED-RV64-NEXT: lw a2, 0(a0) +; CHECK-UNALIGNED-RV64-NEXT: lhu a0, 4(a0) +; CHECK-UNALIGNED-RV64-NEXT: lw a3, 0(a1) +; CHECK-UNALIGNED-RV64-NEXT: lhu a1, 4(a1) +; CHECK-UNALIGNED-RV64-NEXT: xor a2, a2, a3 +; CHECK-UNALIGNED-RV64-NEXT: xor a0, a0, a1 +; CHECK-UNALIGNED-RV64-NEXT: or a0, a2, a0 +; CHECK-UNALIGNED-RV64-NEXT: snez a0, a0 +; CHECK-UNALIGNED-RV64-NEXT: ret +; +; CHECK-UNALIGNED-RV32-ZBB-LABEL: bcmp_size_6: +; CHECK-UNALIGNED-RV32-ZBB: # %bb.0: # %entry +; CHECK-UNALIGNED-RV32-ZBB-NEXT: lw a2, 0(a0) +; CHECK-UNALIGNED-RV32-ZBB-NEXT: lhu a0, 4(a0) +; CHECK-UNALIGNED-RV32-ZBB-NEXT: lw a3, 0(a1) +; CHECK-UNALIGNED-RV32-ZBB-NEXT: lhu a1, 4(a1) +; CHECK-UNALIGNED-RV32-ZBB-NEXT: xor a2, a2, a3 +; CHECK-UNALIGNED-RV32-ZBB-NEXT: xor a0, a0, a1 +; CHECK-UNALIGNED-RV32-ZBB-NEXT: or a0, a2, a0 +; CHECK-UNALIGNED-RV32-ZBB-NEXT: snez a0, a0 +; CHECK-UNALIGNED-RV32-ZBB-NEXT: ret +; +; CHECK-UNALIGNED-RV64-ZBB-LABEL: bcmp_size_6: +; CHECK-UNALIGNED-RV64-ZBB: # %bb.0: # %entry +; CHECK-UNALIGNED-RV64-ZBB-NEXT: lw a2, 0(a0) +; CHECK-UNALIGNED-RV64-ZBB-NEXT: lhu a0, 4(a0) +; CHECK-UNALIGNED-RV64-ZBB-NEXT: lw a3, 0(a1) +; CHECK-UNALIGNED-RV64-ZBB-NEXT: lhu a1, 4(a1) +; CHECK-UNALIGNED-RV64-ZBB-NEXT: xor a2, a2, a3 +; CHECK-UNALIGNED-RV64-ZBB-NEXT: xor a0, a0, a1 +; CHECK-UNALIGNED-RV64-ZBB-NEXT: or a0, a2, a0 +; CHECK-UNALIGNED-RV64-ZBB-NEXT: snez a0, a0 +; CHECK-UNALIGNED-RV64-ZBB-NEXT: ret +; +; CHECK-UNALIGNED-RV32-ZBKB-LABEL: bcmp_size_6: +; CHECK-UNALIGNED-RV32-ZBKB: # %bb.0: # %entry +; CHECK-UNALIGNED-RV32-ZBKB-NEXT: lw a2, 0(a0) +; CHECK-UNALIGNED-RV32-ZBKB-NEXT: lhu a0, 4(a0) +; CHECK-UNALIGNED-RV32-ZBKB-NEXT: lw a3, 0(a1) +; CHECK-UNALIGNED-RV32-ZBKB-NEXT: lhu a1, 4(a1) +; CHECK-UNALIGNED-RV32-ZBKB-NEXT: xor a2, a2, a3 +; CHECK-UNALIGNED-RV32-ZBKB-NEXT: xor a0, a0, a1 +; CHECK-UNALIGNED-RV32-ZBKB-NEXT: or a0, a2, a0 +; CHECK-UNALIGNED-RV32-ZBKB-NEXT: snez a0, a0 +; CHECK-UNALIGNED-RV32-ZBKB-NEXT: ret +; +; CHECK-UNALIGNED-RV64-ZBKB-LABEL: bcmp_size_6: +; CHECK-UNALIGNED-RV64-ZBKB: # %bb.0: # %entry +; CHECK-UNALIGNED-RV64-ZBKB-NEXT: lw a2, 0(a0) +; CHECK-UNALIGNED-RV64-ZBKB-NEXT: lhu a0, 4(a0) +; CHECK-UNALIGNED-RV64-ZBKB-NEXT: lw a3, 0(a1) +; CHECK-UNALIGNED-RV64-ZBKB-NEXT: lhu a1, 4(a1) +; CHECK-UNALIGNED-RV64-ZBKB-NEXT: xor a2, a2, a3 +; CHECK-UNALIGNED-RV64-ZBKB-NEXT: xor a0, a0, a1 +; CHECK-UNALIGNED-RV64-ZBKB-NEXT: or a0, a2, a0 +; CHECK-UNALIGNED-RV64-ZBKB-NEXT: snez a0, a0 +; CHECK-UNALIGNED-RV64-ZBKB-NEXT: ret +; +; CHECK-UNALIGNED-RV32-V-LABEL: bcmp_size_6: +; CHECK-UNALIGNED-RV32-V: # %bb.0: # %entry +; CHECK-UNALIGNED-RV32-V-NEXT: vsetivli zero, 6, e8, mf2, ta, ma +; CHECK-UNALIGNED-RV32-V-NEXT: vle8.v v8, (a0) +; CHECK-UNALIGNED-RV32-V-NEXT: vle8.v v9, (a1) +; CHECK-UNALIGNED-RV32-V-NEXT: vmsne.vv v8, v8, v9 +; CHECK-UNALIGNED-RV32-V-NEXT: vcpop.m a0, v8 +; CHECK-UNALIGNED-RV32-V-NEXT: snez a0, a0 +; CHECK-UNALIGNED-RV32-V-NEXT: ret +; +; CHECK-UNALIGNED-RV64-V-LABEL: bcmp_size_6: +; CHECK-UNALIGNED-RV64-V: # %bb.0: # %entry +; CHECK-UNALIGNED-RV64-V-NEXT: lw a2, 0(a0) +; CHECK-UNALIGNED-RV64-V-NEXT: lhu a0, 4(a0) +; CHECK-UNALIGNED-RV64-V-NEXT: lw a3, 0(a1) +; CHECK-UNALIGNED-RV64-V-NEXT: lhu a1, 4(a1) +; CHECK-UNALIGNED-RV64-V-NEXT: xor a2, a2, a3 +; CHECK-UNALIGNED-RV64-V-NEXT: xor a0, a0, a1 +; CHECK-UNALIGNED-RV64-V-NEXT: or a0, a2, a0 +; CHECK-UNALIGNED-RV64-V-NEXT: snez a0, a0 +; CHECK-UNALIGNED-RV64-V-NEXT: ret entry: %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iXLen 6) ret i32 %bcmp @@ -711,17 +875,99 @@ define i32 @bcmp_size_7(ptr %s1, ptr %s2) nounwind optsize { ; CHECK-ALIGNED-RV64-V-NEXT: addi sp, sp, 16 ; CHECK-ALIGNED-RV64-V-NEXT: ret ; -; CHECK-UNALIGNED-LABEL: bcmp_size_7: -; CHECK-UNALIGNED: # %bb.0: # %entry -; CHECK-UNALIGNED-NEXT: lw a2, 0(a0) -; CHECK-UNALIGNED-NEXT: lw a0, 3(a0) -; CHECK-UNALIGNED-NEXT: lw a3, 0(a1) -; CHECK-UNALIGNED-NEXT: lw a1, 3(a1) -; CHECK-UNALIGNED-NEXT: xor a2, a2, a3 -; CHECK-UNALIGNED-NEXT: xor a0, a0, a1 -; CHECK-UNALIGNED-NEXT: or a0, a2, a0 -; CHECK-UNALIGNED-NEXT: snez a0, a0 -; CHECK-UNALIGNED-NEXT: ret +; CHECK-UNALIGNED-RV32-LABEL: bcmp_size_7: +; CHECK-UNALIGNED-RV32: # %bb.0: # %entry +; CHECK-UNALIGNED-RV32-NEXT: lw a2, 0(a0) +; CHECK-UNALIGNED-RV32-NEXT: lw a0, 3(a0) +; CHECK-UNALIGNED-RV32-NEXT: lw a3, 0(a1) +; CHECK-UNALIGNED-RV32-NEXT: lw a1, 3(a1) +; CHECK-UNALIGNED-RV32-NEXT: xor a2, a2, a3 +; CHECK-UNALIGNED-RV32-NEXT: xor a0, a0, a1 +; CHECK-UNALIGNED-RV32-NEXT: or a0, a2, a0 +; CHECK-UNALIGNED-RV32-NEXT: snez a0, a0 +; CHECK-UNALIGNED-RV32-NEXT: ret +; +; CHECK-UNALIGNED-RV64-LABEL: bcmp_size_7: +; CHECK-UNALIGNED-RV64: # %bb.0: # %entry +; CHECK-UNALIGNED-RV64-NEXT: lw a2, 0(a0) +; CHECK-UNALIGNED-RV64-NEXT: lw a0, 3(a0) +; CHECK-UNALIGNED-RV64-NEXT: lw a3, 0(a1) +; CHECK-UNALIGNED-RV64-NEXT: lw a1, 3(a1) +; CHECK-UNALIGNED-RV64-NEXT: xor a2, a2, a3 +; CHECK-UNALIGNED-RV64-NEXT: xor a0, a0, a1 +; CHECK-UNALIGNED-RV64-NEXT: or a0, a2, a0 +; CHECK-UNALIGNED-RV64-NEXT: snez a0, a0 +; CHECK-UNALIGNED-RV64-NEXT: ret +; +; CHECK-UNALIGNED-RV32-ZBB-LABEL: bcmp_size_7: +; CHECK-UNALIGNED-RV32-ZBB: # %bb.0: # %entry +; CHECK-UNALIGNED-RV32-ZBB-NEXT: lw a2, 0(a0) +; CHECK-UNALIGNED-RV32-ZBB-NEXT: lw a0, 3(a0) +; CHECK-UNALIGNED-RV32-ZBB-NEXT: lw a3, 0(a1) +; CHECK-UNALIGNED-RV32-ZBB-NEXT: lw a1, 3(a1) +; CHECK-UNALIGNED-RV32-ZBB-NEXT: xor a2, a2, a3 +; CHECK-UNALIGNED-RV32-ZBB-NEXT: xor a0, a0, a1 +; CHECK-UNALIGNED-RV32-ZBB-NEXT: or a0, a2, a0 +; CHECK-UNALIGNED-RV32-ZBB-NEXT: snez a0, a0 +; CHECK-UNALIGNED-RV32-ZBB-NEXT: ret +; +; CHECK-UNALIGNED-RV64-ZBB-LABEL: bcmp_size_7: +; CHECK-UNALIGNED-RV64-ZBB: # %bb.0: # %entry +; CHECK-UNALIGNED-RV64-ZBB-NEXT: lw a2, 0(a0) +; CHECK-UNALIGNED-RV64-ZBB-NEXT: lw a0, 3(a0) +; CHECK-UNALIGNED-RV64-ZBB-NEXT: lw a3, 0(a1) +; CHECK-UNALIGNED-RV64-ZBB-NEXT: lw a1, 3(a1) +; CHECK-UNALIGNED-RV64-ZBB-NEXT: xor a2, a2, a3 +; CHECK-UNALIGNED-RV64-ZBB-NEXT: xor a0, a0, a1 +; CHECK-UNALIGNED-RV64-ZBB-NEXT: or a0, a2, a0 +; CHECK-UNALIGNED-RV64-ZBB-NEXT: snez a0, a0 +; CHECK-UNALIGNED-RV64-ZBB-NEXT: ret +; +; CHECK-UNALIGNED-RV32-ZBKB-LABEL: bcmp_size_7: +; CHECK-UNALIGNED-RV32-ZBKB: # %bb.0: # %entry +; CHECK-UNALIGNED-RV32-ZBKB-NEXT: lw a2, 0(a0) +; CHECK-UNALIGNED-RV32-ZBKB-NEXT: lw a0, 3(a0) +; CHECK-UNALIGNED-RV32-ZBKB-NEXT: lw a3, 0(a1) +; CHECK-UNALIGNED-RV32-ZBKB-NEXT: lw a1, 3(a1) +; CHECK-UNALIGNED-RV32-ZBKB-NEXT: xor a2, a2, a3 +; CHECK-UNALIGNED-RV32-ZBKB-NEXT: xor a0, a0, a1 +; CHECK-UNALIGNED-RV32-ZBKB-NEXT: or a0, a2, a0 +; CHECK-UNALIGNED-RV32-ZBKB-NEXT: snez a0, a0 +; CHECK-UNALIGNED-RV32-ZBKB-NEXT: ret +; +; CHECK-UNALIGNED-RV64-ZBKB-LABEL: bcmp_size_7: +; CHECK-UNALIGNED-RV64-ZBKB: # %bb.0: # %entry +; CHECK-UNALIGNED-RV64-ZBKB-NEXT: lw a2, 0(a0) +; CHECK-UNALIGNED-RV64-ZBKB-NEXT: lw a0, 3(a0) +; CHECK-UNALIGNED-RV64-ZBKB-NEXT: lw a3, 0(a1) +; CHECK-UNALIGNED-RV64-ZBKB-NEXT: lw a1, 3(a1) +; CHECK-UNALIGNED-RV64-ZBKB-NEXT: xor a2, a2, a3 +; CHECK-UNALIGNED-RV64-ZBKB-NEXT: xor a0, a0, a1 +; CHECK-UNALIGNED-RV64-ZBKB-NEXT: or a0, a2, a0 +; CHECK-UNALIGNED-RV64-ZBKB-NEXT: snez a0, a0 +; CHECK-UNALIGNED-RV64-ZBKB-NEXT: ret +; +; CHECK-UNALIGNED-RV32-V-LABEL: bcmp_size_7: +; CHECK-UNALIGNED-RV32-V: # %bb.0: # %entry +; CHECK-UNALIGNED-RV32-V-NEXT: vsetivli zero, 7, e8, mf2, ta, ma +; CHECK-UNALIGNED-RV32-V-NEXT: vle8.v v8, (a0) +; CHECK-UNALIGNED-RV32-V-NEXT: vle8.v v9, (a1) +; CHECK-UNALIGNED-RV32-V-NEXT: vmsne.vv v8, v8, v9 +; CHECK-UNALIGNED-RV32-V-NEXT: vcpop.m a0, v8 +; CHECK-UNALIGNED-RV32-V-NEXT: snez a0, a0 +; CHECK-UNALIGNED-RV32-V-NEXT: ret +; +; CHECK-UNALIGNED-RV64-V-LABEL: bcmp_size_7: +; CHECK-UNALIGNED-RV64-V: # %bb.0: # %entry +; CHECK-UNALIGNED-RV64-V-NEXT: lw a2, 0(a0) +; CHECK-UNALIGNED-RV64-V-NEXT: lw a0, 3(a0) +; CHECK-UNALIGNED-RV64-V-NEXT: lw a3, 0(a1) +; CHECK-UNALIGNED-RV64-V-NEXT: lw a1, 3(a1) +; CHECK-UNALIGNED-RV64-V-NEXT: xor a2, a2, a3 +; CHECK-UNALIGNED-RV64-V-NEXT: xor a0, a0, a1 +; CHECK-UNALIGNED-RV64-V-NEXT: or a0, a2, a0 +; CHECK-UNALIGNED-RV64-V-NEXT: snez a0, a0 +; CHECK-UNALIGNED-RV64-V-NEXT: ret entry: %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iXLen 7) ret i32 %bcmp @@ -1069,33 +1315,21 @@ define i32 @bcmp_size_15(ptr %s1, ptr %s2) nounwind optsize { ; ; CHECK-UNALIGNED-RV32-V-LABEL: bcmp_size_15: ; CHECK-UNALIGNED-RV32-V: # %bb.0: # %entry -; CHECK-UNALIGNED-RV32-V-NEXT: lw a2, 0(a0) -; CHECK-UNALIGNED-RV32-V-NEXT: lw a3, 4(a0) -; CHECK-UNALIGNED-RV32-V-NEXT: lw a4, 7(a0) -; CHECK-UNALIGNED-RV32-V-NEXT: lw a0, 11(a0) -; CHECK-UNALIGNED-RV32-V-NEXT: lw a5, 0(a1) -; CHECK-UNALIGNED-RV32-V-NEXT: lw a6, 4(a1) -; CHECK-UNALIGNED-RV32-V-NEXT: lw a7, 7(a1) -; CHECK-UNALIGNED-RV32-V-NEXT: lw a1, 11(a1) -; CHECK-UNALIGNED-RV32-V-NEXT: xor a2, a2, a5 -; CHECK-UNALIGNED-RV32-V-NEXT: xor a3, a3, a6 -; CHECK-UNALIGNED-RV32-V-NEXT: xor a4, a4, a7 -; CHECK-UNALIGNED-RV32-V-NEXT: xor a0, a0, a1 -; CHECK-UNALIGNED-RV32-V-NEXT: or a0, a3, a0 -; CHECK-UNALIGNED-RV32-V-NEXT: or a2, a2, a4 -; CHECK-UNALIGNED-RV32-V-NEXT: or a0, a2, a0 +; CHECK-UNALIGNED-RV32-V-NEXT: vsetivli zero, 15, e8, m1, ta, ma +; CHECK-UNALIGNED-RV32-V-NEXT: vle8.v v8, (a0) +; CHECK-UNALIGNED-RV32-V-NEXT: vle8.v v9, (a1) +; CHECK-UNALIGNED-RV32-V-NEXT: vmsne.vv v8, v8, v9 +; CHECK-UNALIGNED-RV32-V-NEXT: vcpop.m a0, v8 ; CHECK-UNALIGNED-RV32-V-NEXT: snez a0, a0 ; CHECK-UNALIGNED-RV32-V-NEXT: ret ; ; CHECK-UNALIGNED-RV64-V-LABEL: bcmp_size_15: ; CHECK-UNALIGNED-RV64-V: # %bb.0: # %entry -; CHECK-UNALIGNED-RV64-V-NEXT: ld a2, 0(a0) -; CHECK-UNALIGNED-RV64-V-NEXT: ld a0, 7(a0) -; CHECK-UNALIGNED-RV64-V-NEXT: ld a3, 0(a1) -; CHECK-UNALIGNED-RV64-V-NEXT: ld a1, 7(a1) -; CHECK-UNALIGNED-RV64-V-NEXT: xor a2, a2, a3 -; CHECK-UNALIGNED-RV64-V-NEXT: xor a0, a0, a1 -; CHECK-UNALIGNED-RV64-V-NEXT: or a0, a2, a0 +; CHECK-UNALIGNED-RV64-V-NEXT: vsetivli zero, 15, e8, m1, ta, ma +; CHECK-UNALIGNED-RV64-V-NEXT: vle8.v v8, (a0) +; CHECK-UNALIGNED-RV64-V-NEXT: vle8.v v9, (a1) +; CHECK-UNALIGNED-RV64-V-NEXT: vmsne.vv v8, v8, v9 +; CHECK-UNALIGNED-RV64-V-NEXT: vcpop.m a0, v8 ; CHECK-UNALIGNED-RV64-V-NEXT: snez a0, a0 ; CHECK-UNALIGNED-RV64-V-NEXT: ret entry: @@ -1477,57 +1711,21 @@ define i32 @bcmp_size_31(ptr %s1, ptr %s2) nounwind optsize { ; ; CHECK-UNALIGNED-RV32-V-LABEL: bcmp_size_31: ; CHECK-UNALIGNED-RV32-V: # %bb.0: # %entry -; CHECK-UNALIGNED-RV32-V-NEXT: lw a2, 0(a0) -; CHECK-UNALIGNED-RV32-V-NEXT: lw a3, 4(a0) -; CHECK-UNALIGNED-RV32-V-NEXT: lw a4, 8(a0) -; CHECK-UNALIGNED-RV32-V-NEXT: lw a5, 12(a0) -; CHECK-UNALIGNED-RV32-V-NEXT: lw a6, 0(a1) -; CHECK-UNALIGNED-RV32-V-NEXT: lw a7, 4(a1) -; CHECK-UNALIGNED-RV32-V-NEXT: lw t0, 8(a1) -; CHECK-UNALIGNED-RV32-V-NEXT: lw t1, 12(a1) -; CHECK-UNALIGNED-RV32-V-NEXT: lw t2, 15(a0) -; CHECK-UNALIGNED-RV32-V-NEXT: lw t3, 19(a0) -; CHECK-UNALIGNED-RV32-V-NEXT: lw t4, 23(a0) -; CHECK-UNALIGNED-RV32-V-NEXT: lw a0, 27(a0) -; CHECK-UNALIGNED-RV32-V-NEXT: xor a3, a3, a7 -; CHECK-UNALIGNED-RV32-V-NEXT: xor a5, a5, t1 -; CHECK-UNALIGNED-RV32-V-NEXT: lw a7, 15(a1) -; CHECK-UNALIGNED-RV32-V-NEXT: lw t1, 19(a1) -; CHECK-UNALIGNED-RV32-V-NEXT: lw t5, 23(a1) -; CHECK-UNALIGNED-RV32-V-NEXT: lw a1, 27(a1) -; CHECK-UNALIGNED-RV32-V-NEXT: xor a2, a2, a6 -; CHECK-UNALIGNED-RV32-V-NEXT: xor a4, a4, t0 -; CHECK-UNALIGNED-RV32-V-NEXT: xor a6, t3, t1 -; CHECK-UNALIGNED-RV32-V-NEXT: xor a0, a0, a1 -; CHECK-UNALIGNED-RV32-V-NEXT: xor a1, t2, a7 -; CHECK-UNALIGNED-RV32-V-NEXT: xor a7, t4, t5 -; CHECK-UNALIGNED-RV32-V-NEXT: or a4, a4, a7 -; CHECK-UNALIGNED-RV32-V-NEXT: or a1, a2, a1 -; CHECK-UNALIGNED-RV32-V-NEXT: or a0, a5, a0 -; CHECK-UNALIGNED-RV32-V-NEXT: or a2, a3, a6 -; CHECK-UNALIGNED-RV32-V-NEXT: or a0, a2, a0 -; CHECK-UNALIGNED-RV32-V-NEXT: or a1, a1, a4 -; CHECK-UNALIGNED-RV32-V-NEXT: or a0, a1, a0 +; CHECK-UNALIGNED-RV32-V-NEXT: vsetivli zero, 31, e8, m2, ta, ma +; CHECK-UNALIGNED-RV32-V-NEXT: vle8.v v8, (a0) +; CHECK-UNALIGNED-RV32-V-NEXT: vle8.v v10, (a1) +; CHECK-UNALIGNED-RV32-V-NEXT: vmsne.vv v12, v8, v10 +; CHECK-UNALIGNED-RV32-V-NEXT: vcpop.m a0, v12 ; CHECK-UNALIGNED-RV32-V-NEXT: snez a0, a0 ; CHECK-UNALIGNED-RV32-V-NEXT: ret ; ; CHECK-UNALIGNED-RV64-V-LABEL: bcmp_size_31: ; CHECK-UNALIGNED-RV64-V: # %bb.0: # %entry -; CHECK-UNALIGNED-RV64-V-NEXT: ld a2, 0(a0) -; CHECK-UNALIGNED-RV64-V-NEXT: ld a3, 8(a0) -; CHECK-UNALIGNED-RV64-V-NEXT: ld a4, 15(a0) -; CHECK-UNALIGNED-RV64-V-NEXT: ld a0, 23(a0) -; CHECK-UNALIGNED-RV64-V-NEXT: ld a5, 0(a1) -; CHECK-UNALIGNED-RV64-V-NEXT: ld a6, 8(a1) -; CHECK-UNALIGNED-RV64-V-NEXT: ld a7, 15(a1) -; CHECK-UNALIGNED-RV64-V-NEXT: ld a1, 23(a1) -; CHECK-UNALIGNED-RV64-V-NEXT: xor a2, a2, a5 -; CHECK-UNALIGNED-RV64-V-NEXT: xor a3, a3, a6 -; CHECK-UNALIGNED-RV64-V-NEXT: xor a4, a4, a7 -; CHECK-UNALIGNED-RV64-V-NEXT: xor a0, a0, a1 -; CHECK-UNALIGNED-RV64-V-NEXT: or a0, a3, a0 -; CHECK-UNALIGNED-RV64-V-NEXT: or a2, a2, a4 -; CHECK-UNALIGNED-RV64-V-NEXT: or a0, a2, a0 +; CHECK-UNALIGNED-RV64-V-NEXT: vsetivli zero, 31, e8, m2, ta, ma +; CHECK-UNALIGNED-RV64-V-NEXT: vle8.v v8, (a0) +; CHECK-UNALIGNED-RV64-V-NEXT: vle8.v v10, (a1) +; CHECK-UNALIGNED-RV64-V-NEXT: vmsne.vv v12, v8, v10 +; CHECK-UNALIGNED-RV64-V-NEXT: vcpop.m a0, v12 ; CHECK-UNALIGNED-RV64-V-NEXT: snez a0, a0 ; CHECK-UNALIGNED-RV64-V-NEXT: ret entry: @@ -1875,129 +2073,23 @@ define i32 @bcmp_size_63(ptr %s1, ptr %s2) nounwind optsize { ; ; CHECK-UNALIGNED-RV32-V-LABEL: bcmp_size_63: ; CHECK-UNALIGNED-RV32-V: # %bb.0: # %entry -; CHECK-UNALIGNED-RV32-V-NEXT: addi sp, sp, -48 -; CHECK-UNALIGNED-RV32-V-NEXT: sw s0, 44(sp) # 4-byte Folded Spill -; CHECK-UNALIGNED-RV32-V-NEXT: sw s1, 40(sp) # 4-byte Folded Spill -; CHECK-UNALIGNED-RV32-V-NEXT: sw s2, 36(sp) # 4-byte Folded Spill -; CHECK-UNALIGNED-RV32-V-NEXT: sw s3, 32(sp) # 4-byte Folded Spill -; CHECK-UNALIGNED-RV32-V-NEXT: sw s4, 28(sp) # 4-byte Folded Spill -; CHECK-UNALIGNED-RV32-V-NEXT: sw s5, 24(sp) # 4-byte Folded Spill -; CHECK-UNALIGNED-RV32-V-NEXT: sw s6, 20(sp) # 4-byte Folded Spill -; CHECK-UNALIGNED-RV32-V-NEXT: sw s7, 16(sp) # 4-byte Folded Spill -; CHECK-UNALIGNED-RV32-V-NEXT: sw s8, 12(sp) # 4-byte Folded Spill -; CHECK-UNALIGNED-RV32-V-NEXT: sw s9, 8(sp) # 4-byte Folded Spill -; CHECK-UNALIGNED-RV32-V-NEXT: sw s10, 4(sp) # 4-byte Folded Spill -; CHECK-UNALIGNED-RV32-V-NEXT: lw a2, 16(a0) -; CHECK-UNALIGNED-RV32-V-NEXT: lw a3, 20(a0) -; CHECK-UNALIGNED-RV32-V-NEXT: lw a4, 24(a0) -; CHECK-UNALIGNED-RV32-V-NEXT: lw a5, 28(a0) -; CHECK-UNALIGNED-RV32-V-NEXT: lw a6, 0(a0) -; CHECK-UNALIGNED-RV32-V-NEXT: lw a7, 4(a0) -; CHECK-UNALIGNED-RV32-V-NEXT: lw t0, 8(a0) -; CHECK-UNALIGNED-RV32-V-NEXT: lw t1, 12(a0) -; CHECK-UNALIGNED-RV32-V-NEXT: lw t2, 16(a1) -; CHECK-UNALIGNED-RV32-V-NEXT: lw t3, 20(a1) -; CHECK-UNALIGNED-RV32-V-NEXT: lw t4, 24(a1) -; CHECK-UNALIGNED-RV32-V-NEXT: lw t5, 28(a1) -; CHECK-UNALIGNED-RV32-V-NEXT: lw t6, 0(a1) -; CHECK-UNALIGNED-RV32-V-NEXT: lw s0, 4(a1) -; CHECK-UNALIGNED-RV32-V-NEXT: lw s1, 8(a1) -; CHECK-UNALIGNED-RV32-V-NEXT: lw s2, 12(a1) -; CHECK-UNALIGNED-RV32-V-NEXT: lw s3, 47(a0) -; CHECK-UNALIGNED-RV32-V-NEXT: lw s4, 51(a0) -; CHECK-UNALIGNED-RV32-V-NEXT: lw s5, 55(a0) -; CHECK-UNALIGNED-RV32-V-NEXT: lw s6, 59(a0) -; CHECK-UNALIGNED-RV32-V-NEXT: lw s7, 31(a0) -; CHECK-UNALIGNED-RV32-V-NEXT: lw s8, 35(a0) -; CHECK-UNALIGNED-RV32-V-NEXT: lw s9, 39(a0) -; CHECK-UNALIGNED-RV32-V-NEXT: lw a0, 43(a0) -; CHECK-UNALIGNED-RV32-V-NEXT: xor t1, t1, s2 -; CHECK-UNALIGNED-RV32-V-NEXT: xor a5, a5, t5 -; CHECK-UNALIGNED-RV32-V-NEXT: xor a7, a7, s0 -; CHECK-UNALIGNED-RV32-V-NEXT: lw t5, 31(a1) -; CHECK-UNALIGNED-RV32-V-NEXT: lw s0, 35(a1) -; CHECK-UNALIGNED-RV32-V-NEXT: lw s2, 39(a1) -; CHECK-UNALIGNED-RV32-V-NEXT: lw s10, 43(a1) -; CHECK-UNALIGNED-RV32-V-NEXT: xor a3, a3, t3 -; CHECK-UNALIGNED-RV32-V-NEXT: xor t0, t0, s1 -; CHECK-UNALIGNED-RV32-V-NEXT: xor a4, a4, t4 -; CHECK-UNALIGNED-RV32-V-NEXT: lw t3, 47(a1) -; CHECK-UNALIGNED-RV32-V-NEXT: lw t4, 51(a1) -; CHECK-UNALIGNED-RV32-V-NEXT: lw s1, 55(a1) -; CHECK-UNALIGNED-RV32-V-NEXT: lw a1, 59(a1) -; CHECK-UNALIGNED-RV32-V-NEXT: xor a6, a6, t6 -; CHECK-UNALIGNED-RV32-V-NEXT: xor a2, a2, t2 -; CHECK-UNALIGNED-RV32-V-NEXT: xor a0, a0, s10 -; CHECK-UNALIGNED-RV32-V-NEXT: xor a1, s6, a1 -; CHECK-UNALIGNED-RV32-V-NEXT: xor t2, s8, s0 -; CHECK-UNALIGNED-RV32-V-NEXT: xor t4, s4, t4 -; CHECK-UNALIGNED-RV32-V-NEXT: xor t6, s9, s2 -; CHECK-UNALIGNED-RV32-V-NEXT: xor s0, s5, s1 -; CHECK-UNALIGNED-RV32-V-NEXT: xor t5, s7, t5 -; CHECK-UNALIGNED-RV32-V-NEXT: xor t3, s3, t3 -; CHECK-UNALIGNED-RV32-V-NEXT: or a2, a2, t3 -; CHECK-UNALIGNED-RV32-V-NEXT: or a6, a6, t5 -; CHECK-UNALIGNED-RV32-V-NEXT: or a4, a4, s0 -; CHECK-UNALIGNED-RV32-V-NEXT: or t0, t0, t6 -; CHECK-UNALIGNED-RV32-V-NEXT: or a3, a3, t4 -; CHECK-UNALIGNED-RV32-V-NEXT: or a7, a7, t2 -; CHECK-UNALIGNED-RV32-V-NEXT: or a1, a5, a1 -; CHECK-UNALIGNED-RV32-V-NEXT: or a0, t1, a0 -; CHECK-UNALIGNED-RV32-V-NEXT: or a0, a0, a1 -; CHECK-UNALIGNED-RV32-V-NEXT: or a1, a7, a3 -; CHECK-UNALIGNED-RV32-V-NEXT: or a3, t0, a4 -; CHECK-UNALIGNED-RV32-V-NEXT: or a2, a6, a2 -; CHECK-UNALIGNED-RV32-V-NEXT: or a0, a1, a0 -; CHECK-UNALIGNED-RV32-V-NEXT: or a2, a2, a3 -; CHECK-UNALIGNED-RV32-V-NEXT: or a0, a2, a0 +; CHECK-UNALIGNED-RV32-V-NEXT: li a2, 63 +; CHECK-UNALIGNED-RV32-V-NEXT: vsetvli zero, a2, e8, m4, ta, ma +; CHECK-UNALIGNED-RV32-V-NEXT: vle8.v v8, (a0) +; CHECK-UNALIGNED-RV32-V-NEXT: vle8.v v12, (a1) +; CHECK-UNALIGNED-RV32-V-NEXT: vmsne.vv v16, v8, v12 +; CHECK-UNALIGNED-RV32-V-NEXT: vcpop.m a0, v16 ; CHECK-UNALIGNED-RV32-V-NEXT: snez a0, a0 -; CHECK-UNALIGNED-RV32-V-NEXT: lw s0, 44(sp) # 4-byte Folded Reload -; CHECK-UNALIGNED-RV32-V-NEXT: lw s1, 40(sp) # 4-byte Folded Reload -; CHECK-UNALIGNED-RV32-V-NEXT: lw s2, 36(sp) # 4-byte Folded Reload -; CHECK-UNALIGNED-RV32-V-NEXT: lw s3, 32(sp) # 4-byte Folded Reload -; CHECK-UNALIGNED-RV32-V-NEXT: lw s4, 28(sp) # 4-byte Folded Reload -; CHECK-UNALIGNED-RV32-V-NEXT: lw s5, 24(sp) # 4-byte Folded Reload -; CHECK-UNALIGNED-RV32-V-NEXT: lw s6, 20(sp) # 4-byte Folded Reload -; CHECK-UNALIGNED-RV32-V-NEXT: lw s7, 16(sp) # 4-byte Folded Reload -; CHECK-UNALIGNED-RV32-V-NEXT: lw s8, 12(sp) # 4-byte Folded Reload -; CHECK-UNALIGNED-RV32-V-NEXT: lw s9, 8(sp) # 4-byte Folded Reload -; CHECK-UNALIGNED-RV32-V-NEXT: lw s10, 4(sp) # 4-byte Folded Reload -; CHECK-UNALIGNED-RV32-V-NEXT: addi sp, sp, 48 ; CHECK-UNALIGNED-RV32-V-NEXT: ret ; ; CHECK-UNALIGNED-RV64-V-LABEL: bcmp_size_63: ; CHECK-UNALIGNED-RV64-V: # %bb.0: # %entry -; CHECK-UNALIGNED-RV64-V-NEXT: ld a2, 0(a0) -; CHECK-UNALIGNED-RV64-V-NEXT: ld a3, 8(a0) -; CHECK-UNALIGNED-RV64-V-NEXT: ld a4, 16(a0) -; CHECK-UNALIGNED-RV64-V-NEXT: ld a5, 24(a0) -; CHECK-UNALIGNED-RV64-V-NEXT: ld a6, 0(a1) -; CHECK-UNALIGNED-RV64-V-NEXT: ld a7, 8(a1) -; CHECK-UNALIGNED-RV64-V-NEXT: ld t0, 16(a1) -; CHECK-UNALIGNED-RV64-V-NEXT: ld t1, 24(a1) -; CHECK-UNALIGNED-RV64-V-NEXT: ld t2, 31(a0) -; CHECK-UNALIGNED-RV64-V-NEXT: ld t3, 39(a0) -; CHECK-UNALIGNED-RV64-V-NEXT: ld t4, 47(a0) -; CHECK-UNALIGNED-RV64-V-NEXT: ld a0, 55(a0) -; CHECK-UNALIGNED-RV64-V-NEXT: xor a3, a3, a7 -; CHECK-UNALIGNED-RV64-V-NEXT: xor a5, a5, t1 -; CHECK-UNALIGNED-RV64-V-NEXT: ld a7, 31(a1) -; CHECK-UNALIGNED-RV64-V-NEXT: ld t1, 39(a1) -; CHECK-UNALIGNED-RV64-V-NEXT: ld t5, 47(a1) -; CHECK-UNALIGNED-RV64-V-NEXT: ld a1, 55(a1) -; CHECK-UNALIGNED-RV64-V-NEXT: xor a2, a2, a6 -; CHECK-UNALIGNED-RV64-V-NEXT: xor a4, a4, t0 -; CHECK-UNALIGNED-RV64-V-NEXT: xor a6, t3, t1 -; CHECK-UNALIGNED-RV64-V-NEXT: xor a0, a0, a1 -; CHECK-UNALIGNED-RV64-V-NEXT: xor a1, t2, a7 -; CHECK-UNALIGNED-RV64-V-NEXT: xor a7, t4, t5 -; CHECK-UNALIGNED-RV64-V-NEXT: or a4, a4, a7 -; CHECK-UNALIGNED-RV64-V-NEXT: or a1, a2, a1 -; CHECK-UNALIGNED-RV64-V-NEXT: or a0, a5, a0 -; CHECK-UNALIGNED-RV64-V-NEXT: or a2, a3, a6 -; CHECK-UNALIGNED-RV64-V-NEXT: or a0, a2, a0 -; CHECK-UNALIGNED-RV64-V-NEXT: or a1, a1, a4 -; CHECK-UNALIGNED-RV64-V-NEXT: or a0, a1, a0 +; CHECK-UNALIGNED-RV64-V-NEXT: li a2, 63 +; CHECK-UNALIGNED-RV64-V-NEXT: vsetvli zero, a2, e8, m4, ta, ma +; CHECK-UNALIGNED-RV64-V-NEXT: vle8.v v8, (a0) +; CHECK-UNALIGNED-RV64-V-NEXT: vle8.v v12, (a1) +; CHECK-UNALIGNED-RV64-V-NEXT: vmsne.vv v16, v8, v12 +; CHECK-UNALIGNED-RV64-V-NEXT: vcpop.m a0, v16 ; CHECK-UNALIGNED-RV64-V-NEXT: snez a0, a0 ; CHECK-UNALIGNED-RV64-V-NEXT: ret entry: @@ -2315,270 +2407,24 @@ define i32 @bcmp_size_127(ptr %s1, ptr %s2) nounwind optsize { ; ; CHECK-UNALIGNED-RV32-V-LABEL: bcmp_size_127: ; CHECK-UNALIGNED-RV32-V: # %bb.0: # %entry -; CHECK-UNALIGNED-RV32-V-NEXT: addi sp, sp, -96 -; CHECK-UNALIGNED-RV32-V-NEXT: sw ra, 92(sp) # 4-byte Folded Spill -; CHECK-UNALIGNED-RV32-V-NEXT: sw s0, 88(sp) # 4-byte Folded Spill -; CHECK-UNALIGNED-RV32-V-NEXT: sw s1, 84(sp) # 4-byte Folded Spill -; CHECK-UNALIGNED-RV32-V-NEXT: sw s2, 80(sp) # 4-byte Folded Spill -; CHECK-UNALIGNED-RV32-V-NEXT: sw s3, 76(sp) # 4-byte Folded Spill -; CHECK-UNALIGNED-RV32-V-NEXT: sw s4, 72(sp) # 4-byte Folded Spill -; CHECK-UNALIGNED-RV32-V-NEXT: sw s5, 68(sp) # 4-byte Folded Spill -; CHECK-UNALIGNED-RV32-V-NEXT: sw s6, 64(sp) # 4-byte Folded Spill -; CHECK-UNALIGNED-RV32-V-NEXT: sw s7, 60(sp) # 4-byte Folded Spill -; CHECK-UNALIGNED-RV32-V-NEXT: sw s8, 56(sp) # 4-byte Folded Spill -; CHECK-UNALIGNED-RV32-V-NEXT: sw s9, 52(sp) # 4-byte Folded Spill -; CHECK-UNALIGNED-RV32-V-NEXT: sw s10, 48(sp) # 4-byte Folded Spill -; CHECK-UNALIGNED-RV32-V-NEXT: sw s11, 44(sp) # 4-byte Folded Spill -; CHECK-UNALIGNED-RV32-V-NEXT: lw a5, 32(a0) -; CHECK-UNALIGNED-RV32-V-NEXT: lw t5, 36(a0) -; CHECK-UNALIGNED-RV32-V-NEXT: lw t4, 40(a0) -; CHECK-UNALIGNED-RV32-V-NEXT: lw a6, 44(a0) -; CHECK-UNALIGNED-RV32-V-NEXT: lw t2, 0(a0) -; CHECK-UNALIGNED-RV32-V-NEXT: lw t1, 4(a0) -; CHECK-UNALIGNED-RV32-V-NEXT: lw s0, 8(a0) -; CHECK-UNALIGNED-RV32-V-NEXT: lw a4, 12(a0) -; CHECK-UNALIGNED-RV32-V-NEXT: lw t3, 48(a0) -; CHECK-UNALIGNED-RV32-V-NEXT: lw t0, 52(a0) -; CHECK-UNALIGNED-RV32-V-NEXT: lw s2, 56(a0) -; CHECK-UNALIGNED-RV32-V-NEXT: lw a3, 60(a0) -; CHECK-UNALIGNED-RV32-V-NEXT: lw s1, 16(a0) -; CHECK-UNALIGNED-RV32-V-NEXT: lw a7, 20(a0) -; CHECK-UNALIGNED-RV32-V-NEXT: lw t6, 24(a0) -; CHECK-UNALIGNED-RV32-V-NEXT: lw a2, 28(a0) -; CHECK-UNALIGNED-RV32-V-NEXT: lw s4, 12(a1) -; CHECK-UNALIGNED-RV32-V-NEXT: lw s5, 60(a1) -; CHECK-UNALIGNED-RV32-V-NEXT: lw s6, 16(a1) -; CHECK-UNALIGNED-RV32-V-NEXT: lw s7, 20(a1) -; CHECK-UNALIGNED-RV32-V-NEXT: lw s8, 24(a1) -; CHECK-UNALIGNED-RV32-V-NEXT: lw s9, 28(a1) -; CHECK-UNALIGNED-RV32-V-NEXT: lw s3, 32(a1) -; CHECK-UNALIGNED-RV32-V-NEXT: lw s10, 36(a1) -; CHECK-UNALIGNED-RV32-V-NEXT: lw s11, 40(a1) -; CHECK-UNALIGNED-RV32-V-NEXT: lw ra, 44(a1) -; CHECK-UNALIGNED-RV32-V-NEXT: xor a2, a2, s9 -; CHECK-UNALIGNED-RV32-V-NEXT: sw a2, 40(sp) # 4-byte Folded Spill -; CHECK-UNALIGNED-RV32-V-NEXT: xor a2, a3, s5 -; CHECK-UNALIGNED-RV32-V-NEXT: sw a2, 36(sp) # 4-byte Folded Spill -; CHECK-UNALIGNED-RV32-V-NEXT: xor a2, a4, s4 -; CHECK-UNALIGNED-RV32-V-NEXT: sw a2, 32(sp) # 4-byte Folded Spill -; CHECK-UNALIGNED-RV32-V-NEXT: lw s4, 56(a1) -; CHECK-UNALIGNED-RV32-V-NEXT: lw s5, 48(a1) -; CHECK-UNALIGNED-RV32-V-NEXT: lw s9, 52(a1) -; CHECK-UNALIGNED-RV32-V-NEXT: xor a2, a6, ra -; CHECK-UNALIGNED-RV32-V-NEXT: sw a2, 28(sp) # 4-byte Folded Spill -; CHECK-UNALIGNED-RV32-V-NEXT: lw ra, 4(a1) -; CHECK-UNALIGNED-RV32-V-NEXT: xor a2, a7, s7 -; CHECK-UNALIGNED-RV32-V-NEXT: sw a2, 24(sp) # 4-byte Folded Spill -; CHECK-UNALIGNED-RV32-V-NEXT: xor a2, t0, s9 -; CHECK-UNALIGNED-RV32-V-NEXT: sw a2, 20(sp) # 4-byte Folded Spill -; CHECK-UNALIGNED-RV32-V-NEXT: lw s7, 0(a1) -; CHECK-UNALIGNED-RV32-V-NEXT: lw s9, 8(a1) -; CHECK-UNALIGNED-RV32-V-NEXT: xor a2, t1, ra -; CHECK-UNALIGNED-RV32-V-NEXT: sw a2, 16(sp) # 4-byte Folded Spill -; CHECK-UNALIGNED-RV32-V-NEXT: lw ra, 107(a0) -; CHECK-UNALIGNED-RV32-V-NEXT: xor a2, t5, s10 -; CHECK-UNALIGNED-RV32-V-NEXT: sw a2, 12(sp) # 4-byte Folded Spill -; CHECK-UNALIGNED-RV32-V-NEXT: lw s10, 75(a0) -; CHECK-UNALIGNED-RV32-V-NEXT: xor t6, t6, s8 -; CHECK-UNALIGNED-RV32-V-NEXT: lw s8, 123(a0) -; CHECK-UNALIGNED-RV32-V-NEXT: xor s2, s2, s4 -; CHECK-UNALIGNED-RV32-V-NEXT: xor s0, s0, s9 -; CHECK-UNALIGNED-RV32-V-NEXT: xor t5, t4, s11 -; CHECK-UNALIGNED-RV32-V-NEXT: lw s4, 83(a0) -; CHECK-UNALIGNED-RV32-V-NEXT: lw s9, 87(a0) -; CHECK-UNALIGNED-RV32-V-NEXT: lw s11, 91(a0) -; CHECK-UNALIGNED-RV32-V-NEXT: xor s1, s1, s6 -; CHECK-UNALIGNED-RV32-V-NEXT: lw s6, 107(a1) -; CHECK-UNALIGNED-RV32-V-NEXT: xor t4, t3, s5 -; CHECK-UNALIGNED-RV32-V-NEXT: lw s5, 91(a1) -; CHECK-UNALIGNED-RV32-V-NEXT: xor t3, t2, s7 -; CHECK-UNALIGNED-RV32-V-NEXT: lw s7, 123(a1) -; CHECK-UNALIGNED-RV32-V-NEXT: xor t2, a5, s3 -; CHECK-UNALIGNED-RV32-V-NEXT: lw s3, 75(a1) -; CHECK-UNALIGNED-RV32-V-NEXT: xor s5, s11, s5 -; CHECK-UNALIGNED-RV32-V-NEXT: xor s7, s8, s7 -; CHECK-UNALIGNED-RV32-V-NEXT: lw s8, 87(a1) -; CHECK-UNALIGNED-RV32-V-NEXT: lw s11, 83(a1) -; CHECK-UNALIGNED-RV32-V-NEXT: xor s3, s10, s3 -; CHECK-UNALIGNED-RV32-V-NEXT: lw s10, 115(a0) -; CHECK-UNALIGNED-RV32-V-NEXT: xor s6, ra, s6 -; CHECK-UNALIGNED-RV32-V-NEXT: lw ra, 115(a1) -; CHECK-UNALIGNED-RV32-V-NEXT: xor s4, s4, s11 -; CHECK-UNALIGNED-RV32-V-NEXT: lw s11, 119(a0) -; CHECK-UNALIGNED-RV32-V-NEXT: lw a7, 119(a1) -; CHECK-UNALIGNED-RV32-V-NEXT: xor s10, s10, ra -; CHECK-UNALIGNED-RV32-V-NEXT: lw ra, 71(a0) -; CHECK-UNALIGNED-RV32-V-NEXT: lw a5, 67(a0) -; CHECK-UNALIGNED-RV32-V-NEXT: lw a2, 67(a1) -; CHECK-UNALIGNED-RV32-V-NEXT: lw a6, 71(a1) -; CHECK-UNALIGNED-RV32-V-NEXT: lw a4, 99(a0) -; CHECK-UNALIGNED-RV32-V-NEXT: lw a3, 99(a1) -; CHECK-UNALIGNED-RV32-V-NEXT: xor t1, a5, a2 -; CHECK-UNALIGNED-RV32-V-NEXT: lw a5, 103(a0) -; CHECK-UNALIGNED-RV32-V-NEXT: lw a2, 103(a1) -; CHECK-UNALIGNED-RV32-V-NEXT: xor t0, a4, a3 -; CHECK-UNALIGNED-RV32-V-NEXT: xor a4, s9, s8 -; CHECK-UNALIGNED-RV32-V-NEXT: xor a7, s11, a7 -; CHECK-UNALIGNED-RV32-V-NEXT: xor a6, ra, a6 -; CHECK-UNALIGNED-RV32-V-NEXT: xor a3, a5, a2 -; CHECK-UNALIGNED-RV32-V-NEXT: lw a5, 95(a0) -; CHECK-UNALIGNED-RV32-V-NEXT: lw s8, 63(a0) -; CHECK-UNALIGNED-RV32-V-NEXT: lw s9, 111(a0) -; CHECK-UNALIGNED-RV32-V-NEXT: lw a2, 79(a0) -; CHECK-UNALIGNED-RV32-V-NEXT: lw s11, 79(a1) -; CHECK-UNALIGNED-RV32-V-NEXT: lw ra, 111(a1) -; CHECK-UNALIGNED-RV32-V-NEXT: lw a0, 63(a1) -; CHECK-UNALIGNED-RV32-V-NEXT: lw a1, 95(a1) -; CHECK-UNALIGNED-RV32-V-NEXT: xor a2, a2, s11 -; CHECK-UNALIGNED-RV32-V-NEXT: xor s9, s9, ra -; CHECK-UNALIGNED-RV32-V-NEXT: xor a0, s8, a0 -; CHECK-UNALIGNED-RV32-V-NEXT: xor a1, a5, a1 -; CHECK-UNALIGNED-RV32-V-NEXT: or a1, t2, a1 -; CHECK-UNALIGNED-RV32-V-NEXT: or a0, t3, a0 -; CHECK-UNALIGNED-RV32-V-NEXT: or a5, t4, s9 -; CHECK-UNALIGNED-RV32-V-NEXT: or a2, s1, a2 -; CHECK-UNALIGNED-RV32-V-NEXT: or a3, t5, a3 -; CHECK-UNALIGNED-RV32-V-NEXT: or a6, s0, a6 -; CHECK-UNALIGNED-RV32-V-NEXT: or a7, s2, a7 -; CHECK-UNALIGNED-RV32-V-NEXT: or a4, t6, a4 -; CHECK-UNALIGNED-RV32-V-NEXT: lw t2, 12(sp) # 4-byte Folded Reload -; CHECK-UNALIGNED-RV32-V-NEXT: or t0, t2, t0 -; CHECK-UNALIGNED-RV32-V-NEXT: lw t2, 16(sp) # 4-byte Folded Reload -; CHECK-UNALIGNED-RV32-V-NEXT: or t1, t2, t1 -; CHECK-UNALIGNED-RV32-V-NEXT: lw t2, 20(sp) # 4-byte Folded Reload -; CHECK-UNALIGNED-RV32-V-NEXT: or t2, t2, s10 -; CHECK-UNALIGNED-RV32-V-NEXT: lw t3, 24(sp) # 4-byte Folded Reload -; CHECK-UNALIGNED-RV32-V-NEXT: or t3, t3, s4 -; CHECK-UNALIGNED-RV32-V-NEXT: lw t4, 28(sp) # 4-byte Folded Reload -; CHECK-UNALIGNED-RV32-V-NEXT: or t4, t4, s6 -; CHECK-UNALIGNED-RV32-V-NEXT: lw t5, 32(sp) # 4-byte Folded Reload -; CHECK-UNALIGNED-RV32-V-NEXT: or t5, t5, s3 -; CHECK-UNALIGNED-RV32-V-NEXT: lw t6, 36(sp) # 4-byte Folded Reload -; CHECK-UNALIGNED-RV32-V-NEXT: or t6, t6, s7 -; CHECK-UNALIGNED-RV32-V-NEXT: lw s0, 40(sp) # 4-byte Folded Reload -; CHECK-UNALIGNED-RV32-V-NEXT: or s0, s0, s5 -; CHECK-UNALIGNED-RV32-V-NEXT: or t6, s0, t6 -; CHECK-UNALIGNED-RV32-V-NEXT: or t4, t5, t4 -; CHECK-UNALIGNED-RV32-V-NEXT: or t2, t3, t2 -; CHECK-UNALIGNED-RV32-V-NEXT: or t0, t1, t0 -; CHECK-UNALIGNED-RV32-V-NEXT: or a4, a4, a7 -; CHECK-UNALIGNED-RV32-V-NEXT: or a3, a6, a3 -; CHECK-UNALIGNED-RV32-V-NEXT: or a2, a2, a5 -; CHECK-UNALIGNED-RV32-V-NEXT: or a0, a0, a1 -; CHECK-UNALIGNED-RV32-V-NEXT: or a1, t4, t6 -; CHECK-UNALIGNED-RV32-V-NEXT: or a5, t0, t2 -; CHECK-UNALIGNED-RV32-V-NEXT: or a3, a3, a4 -; CHECK-UNALIGNED-RV32-V-NEXT: or a0, a0, a2 -; CHECK-UNALIGNED-RV32-V-NEXT: or a1, a5, a1 -; CHECK-UNALIGNED-RV32-V-NEXT: or a0, a0, a3 -; CHECK-UNALIGNED-RV32-V-NEXT: or a0, a0, a1 +; CHECK-UNALIGNED-RV32-V-NEXT: li a2, 127 +; CHECK-UNALIGNED-RV32-V-NEXT: vsetvli zero, a2, e8, m8, ta, ma +; CHECK-UNALIGNED-RV32-V-NEXT: vle8.v v8, (a0) +; CHECK-UNALIGNED-RV32-V-NEXT: vle8.v v16, (a1) +; CHECK-UNALIGNED-RV32-V-NEXT: vmsne.vv v24, v8, v16 +; CHECK-UNALIGNED-RV32-V-NEXT: vcpop.m a0, v24 ; CHECK-UNALIGNED-RV32-V-NEXT: snez a0, a0 -; CHECK-UNALIGNED-RV32-V-NEXT: lw ra, 92(sp) # 4-byte Folded Reload -; CHECK-UNALIGNED-RV32-V-NEXT: lw s0, 88(sp) # 4-byte Folded Reload -; CHECK-UNALIGNED-RV32-V-NEXT: lw s1, 84(sp) # 4-byte Folded Reload -; CHECK-UNALIGNED-RV32-V-NEXT: lw s2, 80(sp) # 4-byte Folded Reload -; CHECK-UNALIGNED-RV32-V-NEXT: lw s3, 76(sp) # 4-byte Folded Reload -; CHECK-UNALIGNED-RV32-V-NEXT: lw s4, 72(sp) # 4-byte Folded Reload -; CHECK-UNALIGNED-RV32-V-NEXT: lw s5, 68(sp) # 4-byte Folded Reload -; CHECK-UNALIGNED-RV32-V-NEXT: lw s6, 64(sp) # 4-byte Folded Reload -; CHECK-UNALIGNED-RV32-V-NEXT: lw s7, 60(sp) # 4-byte Folded Reload -; CHECK-UNALIGNED-RV32-V-NEXT: lw s8, 56(sp) # 4-byte Folded Reload -; CHECK-UNALIGNED-RV32-V-NEXT: lw s9, 52(sp) # 4-byte Folded Reload -; CHECK-UNALIGNED-RV32-V-NEXT: lw s10, 48(sp) # 4-byte Folded Reload -; CHECK-UNALIGNED-RV32-V-NEXT: lw s11, 44(sp) # 4-byte Folded Reload -; CHECK-UNALIGNED-RV32-V-NEXT: addi sp, sp, 96 ; CHECK-UNALIGNED-RV32-V-NEXT: ret ; ; CHECK-UNALIGNED-RV64-V-LABEL: bcmp_size_127: ; CHECK-UNALIGNED-RV64-V: # %bb.0: # %entry -; CHECK-UNALIGNED-RV64-V-NEXT: addi sp, sp, -96 -; CHECK-UNALIGNED-RV64-V-NEXT: sd s0, 88(sp) # 8-byte Folded Spill -; CHECK-UNALIGNED-RV64-V-NEXT: sd s1, 80(sp) # 8-byte Folded Spill -; CHECK-UNALIGNED-RV64-V-NEXT: sd s2, 72(sp) # 8-byte Folded Spill -; CHECK-UNALIGNED-RV64-V-NEXT: sd s3, 64(sp) # 8-byte Folded Spill -; CHECK-UNALIGNED-RV64-V-NEXT: sd s4, 56(sp) # 8-byte Folded Spill -; CHECK-UNALIGNED-RV64-V-NEXT: sd s5, 48(sp) # 8-byte Folded Spill -; CHECK-UNALIGNED-RV64-V-NEXT: sd s6, 40(sp) # 8-byte Folded Spill -; CHECK-UNALIGNED-RV64-V-NEXT: sd s7, 32(sp) # 8-byte Folded Spill -; CHECK-UNALIGNED-RV64-V-NEXT: sd s8, 24(sp) # 8-byte Folded Spill -; CHECK-UNALIGNED-RV64-V-NEXT: sd s9, 16(sp) # 8-byte Folded Spill -; CHECK-UNALIGNED-RV64-V-NEXT: sd s10, 8(sp) # 8-byte Folded Spill -; CHECK-UNALIGNED-RV64-V-NEXT: ld a2, 32(a0) -; CHECK-UNALIGNED-RV64-V-NEXT: ld a3, 40(a0) -; CHECK-UNALIGNED-RV64-V-NEXT: ld a4, 48(a0) -; CHECK-UNALIGNED-RV64-V-NEXT: ld a5, 56(a0) -; CHECK-UNALIGNED-RV64-V-NEXT: ld a6, 0(a0) -; CHECK-UNALIGNED-RV64-V-NEXT: ld a7, 8(a0) -; CHECK-UNALIGNED-RV64-V-NEXT: ld t0, 16(a0) -; CHECK-UNALIGNED-RV64-V-NEXT: ld t1, 24(a0) -; CHECK-UNALIGNED-RV64-V-NEXT: ld t2, 32(a1) -; CHECK-UNALIGNED-RV64-V-NEXT: ld t3, 40(a1) -; CHECK-UNALIGNED-RV64-V-NEXT: ld t4, 48(a1) -; CHECK-UNALIGNED-RV64-V-NEXT: ld t5, 56(a1) -; CHECK-UNALIGNED-RV64-V-NEXT: ld t6, 0(a1) -; CHECK-UNALIGNED-RV64-V-NEXT: ld s0, 8(a1) -; CHECK-UNALIGNED-RV64-V-NEXT: ld s1, 16(a1) -; CHECK-UNALIGNED-RV64-V-NEXT: ld s2, 24(a1) -; CHECK-UNALIGNED-RV64-V-NEXT: ld s3, 95(a0) -; CHECK-UNALIGNED-RV64-V-NEXT: ld s4, 103(a0) -; CHECK-UNALIGNED-RV64-V-NEXT: ld s5, 111(a0) -; CHECK-UNALIGNED-RV64-V-NEXT: ld s6, 119(a0) -; CHECK-UNALIGNED-RV64-V-NEXT: ld s7, 63(a0) -; CHECK-UNALIGNED-RV64-V-NEXT: ld s8, 71(a0) -; CHECK-UNALIGNED-RV64-V-NEXT: ld s9, 79(a0) -; CHECK-UNALIGNED-RV64-V-NEXT: ld a0, 87(a0) -; CHECK-UNALIGNED-RV64-V-NEXT: xor t1, t1, s2 -; CHECK-UNALIGNED-RV64-V-NEXT: xor a5, a5, t5 -; CHECK-UNALIGNED-RV64-V-NEXT: xor a7, a7, s0 -; CHECK-UNALIGNED-RV64-V-NEXT: ld t5, 63(a1) -; CHECK-UNALIGNED-RV64-V-NEXT: ld s0, 71(a1) -; CHECK-UNALIGNED-RV64-V-NEXT: ld s2, 79(a1) -; CHECK-UNALIGNED-RV64-V-NEXT: ld s10, 87(a1) -; CHECK-UNALIGNED-RV64-V-NEXT: xor a3, a3, t3 -; CHECK-UNALIGNED-RV64-V-NEXT: xor t0, t0, s1 -; CHECK-UNALIGNED-RV64-V-NEXT: xor a4, a4, t4 -; CHECK-UNALIGNED-RV64-V-NEXT: ld t3, 95(a1) -; CHECK-UNALIGNED-RV64-V-NEXT: ld t4, 103(a1) -; CHECK-UNALIGNED-RV64-V-NEXT: ld s1, 111(a1) -; CHECK-UNALIGNED-RV64-V-NEXT: ld a1, 119(a1) -; CHECK-UNALIGNED-RV64-V-NEXT: xor a6, a6, t6 -; CHECK-UNALIGNED-RV64-V-NEXT: xor a2, a2, t2 -; CHECK-UNALIGNED-RV64-V-NEXT: xor a0, a0, s10 -; CHECK-UNALIGNED-RV64-V-NEXT: xor a1, s6, a1 -; CHECK-UNALIGNED-RV64-V-NEXT: xor t2, s8, s0 -; CHECK-UNALIGNED-RV64-V-NEXT: xor t4, s4, t4 -; CHECK-UNALIGNED-RV64-V-NEXT: xor t6, s9, s2 -; CHECK-UNALIGNED-RV64-V-NEXT: xor s0, s5, s1 -; CHECK-UNALIGNED-RV64-V-NEXT: xor t5, s7, t5 -; CHECK-UNALIGNED-RV64-V-NEXT: xor t3, s3, t3 -; CHECK-UNALIGNED-RV64-V-NEXT: or a2, a2, t3 -; CHECK-UNALIGNED-RV64-V-NEXT: or a6, a6, t5 -; CHECK-UNALIGNED-RV64-V-NEXT: or a4, a4, s0 -; CHECK-UNALIGNED-RV64-V-NEXT: or t0, t0, t6 -; CHECK-UNALIGNED-RV64-V-NEXT: or a3, a3, t4 -; CHECK-UNALIGNED-RV64-V-NEXT: or a7, a7, t2 -; CHECK-UNALIGNED-RV64-V-NEXT: or a1, a5, a1 -; CHECK-UNALIGNED-RV64-V-NEXT: or a0, t1, a0 -; CHECK-UNALIGNED-RV64-V-NEXT: or a0, a0, a1 -; CHECK-UNALIGNED-RV64-V-NEXT: or a1, a7, a3 -; CHECK-UNALIGNED-RV64-V-NEXT: or a3, t0, a4 -; CHECK-UNALIGNED-RV64-V-NEXT: or a2, a6, a2 -; CHECK-UNALIGNED-RV64-V-NEXT: or a0, a1, a0 -; CHECK-UNALIGNED-RV64-V-NEXT: or a2, a2, a3 -; CHECK-UNALIGNED-RV64-V-NEXT: or a0, a2, a0 +; CHECK-UNALIGNED-RV64-V-NEXT: li a2, 127 +; CHECK-UNALIGNED-RV64-V-NEXT: vsetvli zero, a2, e8, m8, ta, ma +; CHECK-UNALIGNED-RV64-V-NEXT: vle8.v v8, (a0) +; CHECK-UNALIGNED-RV64-V-NEXT: vle8.v v16, (a1) +; CHECK-UNALIGNED-RV64-V-NEXT: vmsne.vv v24, v8, v16 +; CHECK-UNALIGNED-RV64-V-NEXT: vcpop.m a0, v24 ; CHECK-UNALIGNED-RV64-V-NEXT: snez a0, a0 -; CHECK-UNALIGNED-RV64-V-NEXT: ld s0, 88(sp) # 8-byte Folded Reload -; CHECK-UNALIGNED-RV64-V-NEXT: ld s1, 80(sp) # 8-byte Folded Reload -; CHECK-UNALIGNED-RV64-V-NEXT: ld s2, 72(sp) # 8-byte Folded Reload -; CHECK-UNALIGNED-RV64-V-NEXT: ld s3, 64(sp) # 8-byte Folded Reload -; CHECK-UNALIGNED-RV64-V-NEXT: ld s4, 56(sp) # 8-byte Folded Reload -; CHECK-UNALIGNED-RV64-V-NEXT: ld s5, 48(sp) # 8-byte Folded Reload -; CHECK-UNALIGNED-RV64-V-NEXT: ld s6, 40(sp) # 8-byte Folded Reload -; CHECK-UNALIGNED-RV64-V-NEXT: ld s7, 32(sp) # 8-byte Folded Reload -; CHECK-UNALIGNED-RV64-V-NEXT: ld s8, 24(sp) # 8-byte Folded Reload -; CHECK-UNALIGNED-RV64-V-NEXT: ld s9, 16(sp) # 8-byte Folded Reload -; CHECK-UNALIGNED-RV64-V-NEXT: ld s10, 8(sp) # 8-byte Folded Reload -; CHECK-UNALIGNED-RV64-V-NEXT: addi sp, sp, 96 ; CHECK-UNALIGNED-RV64-V-NEXT: ret entry: %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iXLen 127) diff --git a/llvm/test/CodeGen/RISCV/memcmp.ll b/llvm/test/CodeGen/RISCV/memcmp.ll index f9a6dbba04fc..0caab1f5ce2f 100644 --- a/llvm/test/CodeGen/RISCV/memcmp.ll +++ b/llvm/test/CodeGen/RISCV/memcmp.ll @@ -517,17 +517,99 @@ define i32 @bcmp_size_5(ptr %s1, ptr %s2) nounwind { ; CHECK-ALIGNED-RV64-V-NEXT: addi sp, sp, 16 ; CHECK-ALIGNED-RV64-V-NEXT: ret ; -; CHECK-UNALIGNED-LABEL: bcmp_size_5: -; CHECK-UNALIGNED: # %bb.0: # %entry -; CHECK-UNALIGNED-NEXT: lw a2, 0(a0) -; CHECK-UNALIGNED-NEXT: lbu a0, 4(a0) -; CHECK-UNALIGNED-NEXT: lw a3, 0(a1) -; CHECK-UNALIGNED-NEXT: lbu a1, 4(a1) -; CHECK-UNALIGNED-NEXT: xor a2, a2, a3 -; CHECK-UNALIGNED-NEXT: xor a0, a0, a1 -; CHECK-UNALIGNED-NEXT: or a0, a2, a0 -; CHECK-UNALIGNED-NEXT: snez a0, a0 -; CHECK-UNALIGNED-NEXT: ret +; CHECK-UNALIGNED-RV32-LABEL: bcmp_size_5: +; CHECK-UNALIGNED-RV32: # %bb.0: # %entry +; CHECK-UNALIGNED-RV32-NEXT: lw a2, 0(a0) +; CHECK-UNALIGNED-RV32-NEXT: lbu a0, 4(a0) +; CHECK-UNALIGNED-RV32-NEXT: lw a3, 0(a1) +; CHECK-UNALIGNED-RV32-NEXT: lbu a1, 4(a1) +; CHECK-UNALIGNED-RV32-NEXT: xor a2, a2, a3 +; CHECK-UNALIGNED-RV32-NEXT: xor a0, a0, a1 +; CHECK-UNALIGNED-RV32-NEXT: or a0, a2, a0 +; CHECK-UNALIGNED-RV32-NEXT: snez a0, a0 +; CHECK-UNALIGNED-RV32-NEXT: ret +; +; CHECK-UNALIGNED-RV64-LABEL: bcmp_size_5: +; CHECK-UNALIGNED-RV64: # %bb.0: # %entry +; CHECK-UNALIGNED-RV64-NEXT: lw a2, 0(a0) +; CHECK-UNALIGNED-RV64-NEXT: lbu a0, 4(a0) +; CHECK-UNALIGNED-RV64-NEXT: lw a3, 0(a1) +; CHECK-UNALIGNED-RV64-NEXT: lbu a1, 4(a1) +; CHECK-UNALIGNED-RV64-NEXT: xor a2, a2, a3 +; CHECK-UNALIGNED-RV64-NEXT: xor a0, a0, a1 +; CHECK-UNALIGNED-RV64-NEXT: or a0, a2, a0 +; CHECK-UNALIGNED-RV64-NEXT: snez a0, a0 +; CHECK-UNALIGNED-RV64-NEXT: ret +; +; CHECK-UNALIGNED-RV32-ZBB-LABEL: bcmp_size_5: +; CHECK-UNALIGNED-RV32-ZBB: # %bb.0: # %entry +; CHECK-UNALIGNED-RV32-ZBB-NEXT: lw a2, 0(a0) +; CHECK-UNALIGNED-RV32-ZBB-NEXT: lbu a0, 4(a0) +; CHECK-UNALIGNED-RV32-ZBB-NEXT: lw a3, 0(a1) +; CHECK-UNALIGNED-RV32-ZBB-NEXT: lbu a1, 4(a1) +; CHECK-UNALIGNED-RV32-ZBB-NEXT: xor a2, a2, a3 +; CHECK-UNALIGNED-RV32-ZBB-NEXT: xor a0, a0, a1 +; CHECK-UNALIGNED-RV32-ZBB-NEXT: or a0, a2, a0 +; CHECK-UNALIGNED-RV32-ZBB-NEXT: snez a0, a0 +; CHECK-UNALIGNED-RV32-ZBB-NEXT: ret +; +; CHECK-UNALIGNED-RV64-ZBB-LABEL: bcmp_size_5: +; CHECK-UNALIGNED-RV64-ZBB: # %bb.0: # %entry +; CHECK-UNALIGNED-RV64-ZBB-NEXT: lw a2, 0(a0) +; CHECK-UNALIGNED-RV64-ZBB-NEXT: lbu a0, 4(a0) +; CHECK-UNALIGNED-RV64-ZBB-NEXT: lw a3, 0(a1) +; CHECK-UNALIGNED-RV64-ZBB-NEXT: lbu a1, 4(a1) +; CHECK-UNALIGNED-RV64-ZBB-NEXT: xor a2, a2, a3 +; CHECK-UNALIGNED-RV64-ZBB-NEXT: xor a0, a0, a1 +; CHECK-UNALIGNED-RV64-ZBB-NEXT: or a0, a2, a0 +; CHECK-UNALIGNED-RV64-ZBB-NEXT: snez a0, a0 +; CHECK-UNALIGNED-RV64-ZBB-NEXT: ret +; +; CHECK-UNALIGNED-RV32-ZBKB-LABEL: bcmp_size_5: +; CHECK-UNALIGNED-RV32-ZBKB: # %bb.0: # %entry +; CHECK-UNALIGNED-RV32-ZBKB-NEXT: lw a2, 0(a0) +; CHECK-UNALIGNED-RV32-ZBKB-NEXT: lbu a0, 4(a0) +; CHECK-UNALIGNED-RV32-ZBKB-NEXT: lw a3, 0(a1) +; CHECK-UNALIGNED-RV32-ZBKB-NEXT: lbu a1, 4(a1) +; CHECK-UNALIGNED-RV32-ZBKB-NEXT: xor a2, a2, a3 +; CHECK-UNALIGNED-RV32-ZBKB-NEXT: xor a0, a0, a1 +; CHECK-UNALIGNED-RV32-ZBKB-NEXT: or a0, a2, a0 +; CHECK-UNALIGNED-RV32-ZBKB-NEXT: snez a0, a0 +; CHECK-UNALIGNED-RV32-ZBKB-NEXT: ret +; +; CHECK-UNALIGNED-RV64-ZBKB-LABEL: bcmp_size_5: +; CHECK-UNALIGNED-RV64-ZBKB: # %bb.0: # %entry +; CHECK-UNALIGNED-RV64-ZBKB-NEXT: lw a2, 0(a0) +; CHECK-UNALIGNED-RV64-ZBKB-NEXT: lbu a0, 4(a0) +; CHECK-UNALIGNED-RV64-ZBKB-NEXT: lw a3, 0(a1) +; CHECK-UNALIGNED-RV64-ZBKB-NEXT: lbu a1, 4(a1) +; CHECK-UNALIGNED-RV64-ZBKB-NEXT: xor a2, a2, a3 +; CHECK-UNALIGNED-RV64-ZBKB-NEXT: xor a0, a0, a1 +; CHECK-UNALIGNED-RV64-ZBKB-NEXT: or a0, a2, a0 +; CHECK-UNALIGNED-RV64-ZBKB-NEXT: snez a0, a0 +; CHECK-UNALIGNED-RV64-ZBKB-NEXT: ret +; +; CHECK-UNALIGNED-RV32-V-LABEL: bcmp_size_5: +; CHECK-UNALIGNED-RV32-V: # %bb.0: # %entry +; CHECK-UNALIGNED-RV32-V-NEXT: vsetivli zero, 5, e8, mf2, ta, ma +; CHECK-UNALIGNED-RV32-V-NEXT: vle8.v v8, (a0) +; CHECK-UNALIGNED-RV32-V-NEXT: vle8.v v9, (a1) +; CHECK-UNALIGNED-RV32-V-NEXT: vmsne.vv v8, v8, v9 +; CHECK-UNALIGNED-RV32-V-NEXT: vcpop.m a0, v8 +; CHECK-UNALIGNED-RV32-V-NEXT: snez a0, a0 +; CHECK-UNALIGNED-RV32-V-NEXT: ret +; +; CHECK-UNALIGNED-RV64-V-LABEL: bcmp_size_5: +; CHECK-UNALIGNED-RV64-V: # %bb.0: # %entry +; CHECK-UNALIGNED-RV64-V-NEXT: lw a2, 0(a0) +; CHECK-UNALIGNED-RV64-V-NEXT: lbu a0, 4(a0) +; CHECK-UNALIGNED-RV64-V-NEXT: lw a3, 0(a1) +; CHECK-UNALIGNED-RV64-V-NEXT: lbu a1, 4(a1) +; CHECK-UNALIGNED-RV64-V-NEXT: xor a2, a2, a3 +; CHECK-UNALIGNED-RV64-V-NEXT: xor a0, a0, a1 +; CHECK-UNALIGNED-RV64-V-NEXT: or a0, a2, a0 +; CHECK-UNALIGNED-RV64-V-NEXT: snez a0, a0 +; CHECK-UNALIGNED-RV64-V-NEXT: ret entry: %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iXLen 5) ret i32 %bcmp @@ -614,17 +696,99 @@ define i32 @bcmp_size_6(ptr %s1, ptr %s2) nounwind { ; CHECK-ALIGNED-RV64-V-NEXT: addi sp, sp, 16 ; CHECK-ALIGNED-RV64-V-NEXT: ret ; -; CHECK-UNALIGNED-LABEL: bcmp_size_6: -; CHECK-UNALIGNED: # %bb.0: # %entry -; CHECK-UNALIGNED-NEXT: lw a2, 0(a0) -; CHECK-UNALIGNED-NEXT: lhu a0, 4(a0) -; CHECK-UNALIGNED-NEXT: lw a3, 0(a1) -; CHECK-UNALIGNED-NEXT: lhu a1, 4(a1) -; CHECK-UNALIGNED-NEXT: xor a2, a2, a3 -; CHECK-UNALIGNED-NEXT: xor a0, a0, a1 -; CHECK-UNALIGNED-NEXT: or a0, a2, a0 -; CHECK-UNALIGNED-NEXT: snez a0, a0 -; CHECK-UNALIGNED-NEXT: ret +; CHECK-UNALIGNED-RV32-LABEL: bcmp_size_6: +; CHECK-UNALIGNED-RV32: # %bb.0: # %entry +; CHECK-UNALIGNED-RV32-NEXT: lw a2, 0(a0) +; CHECK-UNALIGNED-RV32-NEXT: lhu a0, 4(a0) +; CHECK-UNALIGNED-RV32-NEXT: lw a3, 0(a1) +; CHECK-UNALIGNED-RV32-NEXT: lhu a1, 4(a1) +; CHECK-UNALIGNED-RV32-NEXT: xor a2, a2, a3 +; CHECK-UNALIGNED-RV32-NEXT: xor a0, a0, a1 +; CHECK-UNALIGNED-RV32-NEXT: or a0, a2, a0 +; CHECK-UNALIGNED-RV32-NEXT: snez a0, a0 +; CHECK-UNALIGNED-RV32-NEXT: ret +; +; CHECK-UNALIGNED-RV64-LABEL: bcmp_size_6: +; CHECK-UNALIGNED-RV64: # %bb.0: # %entry +; CHECK-UNALIGNED-RV64-NEXT: lw a2, 0(a0) +; CHECK-UNALIGNED-RV64-NEXT: lhu a0, 4(a0) +; CHECK-UNALIGNED-RV64-NEXT: lw a3, 0(a1) +; CHECK-UNALIGNED-RV64-NEXT: lhu a1, 4(a1) +; CHECK-UNALIGNED-RV64-NEXT: xor a2, a2, a3 +; CHECK-UNALIGNED-RV64-NEXT: xor a0, a0, a1 +; CHECK-UNALIGNED-RV64-NEXT: or a0, a2, a0 +; CHECK-UNALIGNED-RV64-NEXT: snez a0, a0 +; CHECK-UNALIGNED-RV64-NEXT: ret +; +; CHECK-UNALIGNED-RV32-ZBB-LABEL: bcmp_size_6: +; CHECK-UNALIGNED-RV32-ZBB: # %bb.0: # %entry +; CHECK-UNALIGNED-RV32-ZBB-NEXT: lw a2, 0(a0) +; CHECK-UNALIGNED-RV32-ZBB-NEXT: lhu a0, 4(a0) +; CHECK-UNALIGNED-RV32-ZBB-NEXT: lw a3, 0(a1) +; CHECK-UNALIGNED-RV32-ZBB-NEXT: lhu a1, 4(a1) +; CHECK-UNALIGNED-RV32-ZBB-NEXT: xor a2, a2, a3 +; CHECK-UNALIGNED-RV32-ZBB-NEXT: xor a0, a0, a1 +; CHECK-UNALIGNED-RV32-ZBB-NEXT: or a0, a2, a0 +; CHECK-UNALIGNED-RV32-ZBB-NEXT: snez a0, a0 +; CHECK-UNALIGNED-RV32-ZBB-NEXT: ret +; +; CHECK-UNALIGNED-RV64-ZBB-LABEL: bcmp_size_6: +; CHECK-UNALIGNED-RV64-ZBB: # %bb.0: # %entry +; CHECK-UNALIGNED-RV64-ZBB-NEXT: lw a2, 0(a0) +; CHECK-UNALIGNED-RV64-ZBB-NEXT: lhu a0, 4(a0) +; CHECK-UNALIGNED-RV64-ZBB-NEXT: lw a3, 0(a1) +; CHECK-UNALIGNED-RV64-ZBB-NEXT: lhu a1, 4(a1) +; CHECK-UNALIGNED-RV64-ZBB-NEXT: xor a2, a2, a3 +; CHECK-UNALIGNED-RV64-ZBB-NEXT: xor a0, a0, a1 +; CHECK-UNALIGNED-RV64-ZBB-NEXT: or a0, a2, a0 +; CHECK-UNALIGNED-RV64-ZBB-NEXT: snez a0, a0 +; CHECK-UNALIGNED-RV64-ZBB-NEXT: ret +; +; CHECK-UNALIGNED-RV32-ZBKB-LABEL: bcmp_size_6: +; CHECK-UNALIGNED-RV32-ZBKB: # %bb.0: # %entry +; CHECK-UNALIGNED-RV32-ZBKB-NEXT: lw a2, 0(a0) +; CHECK-UNALIGNED-RV32-ZBKB-NEXT: lhu a0, 4(a0) +; CHECK-UNALIGNED-RV32-ZBKB-NEXT: lw a3, 0(a1) +; CHECK-UNALIGNED-RV32-ZBKB-NEXT: lhu a1, 4(a1) +; CHECK-UNALIGNED-RV32-ZBKB-NEXT: xor a2, a2, a3 +; CHECK-UNALIGNED-RV32-ZBKB-NEXT: xor a0, a0, a1 +; CHECK-UNALIGNED-RV32-ZBKB-NEXT: or a0, a2, a0 +; CHECK-UNALIGNED-RV32-ZBKB-NEXT: snez a0, a0 +; CHECK-UNALIGNED-RV32-ZBKB-NEXT: ret +; +; CHECK-UNALIGNED-RV64-ZBKB-LABEL: bcmp_size_6: +; CHECK-UNALIGNED-RV64-ZBKB: # %bb.0: # %entry +; CHECK-UNALIGNED-RV64-ZBKB-NEXT: lw a2, 0(a0) +; CHECK-UNALIGNED-RV64-ZBKB-NEXT: lhu a0, 4(a0) +; CHECK-UNALIGNED-RV64-ZBKB-NEXT: lw a3, 0(a1) +; CHECK-UNALIGNED-RV64-ZBKB-NEXT: lhu a1, 4(a1) +; CHECK-UNALIGNED-RV64-ZBKB-NEXT: xor a2, a2, a3 +; CHECK-UNALIGNED-RV64-ZBKB-NEXT: xor a0, a0, a1 +; CHECK-UNALIGNED-RV64-ZBKB-NEXT: or a0, a2, a0 +; CHECK-UNALIGNED-RV64-ZBKB-NEXT: snez a0, a0 +; CHECK-UNALIGNED-RV64-ZBKB-NEXT: ret +; +; CHECK-UNALIGNED-RV32-V-LABEL: bcmp_size_6: +; CHECK-UNALIGNED-RV32-V: # %bb.0: # %entry +; CHECK-UNALIGNED-RV32-V-NEXT: vsetivli zero, 6, e8, mf2, ta, ma +; CHECK-UNALIGNED-RV32-V-NEXT: vle8.v v8, (a0) +; CHECK-UNALIGNED-RV32-V-NEXT: vle8.v v9, (a1) +; CHECK-UNALIGNED-RV32-V-NEXT: vmsne.vv v8, v8, v9 +; CHECK-UNALIGNED-RV32-V-NEXT: vcpop.m a0, v8 +; CHECK-UNALIGNED-RV32-V-NEXT: snez a0, a0 +; CHECK-UNALIGNED-RV32-V-NEXT: ret +; +; CHECK-UNALIGNED-RV64-V-LABEL: bcmp_size_6: +; CHECK-UNALIGNED-RV64-V: # %bb.0: # %entry +; CHECK-UNALIGNED-RV64-V-NEXT: lw a2, 0(a0) +; CHECK-UNALIGNED-RV64-V-NEXT: lhu a0, 4(a0) +; CHECK-UNALIGNED-RV64-V-NEXT: lw a3, 0(a1) +; CHECK-UNALIGNED-RV64-V-NEXT: lhu a1, 4(a1) +; CHECK-UNALIGNED-RV64-V-NEXT: xor a2, a2, a3 +; CHECK-UNALIGNED-RV64-V-NEXT: xor a0, a0, a1 +; CHECK-UNALIGNED-RV64-V-NEXT: or a0, a2, a0 +; CHECK-UNALIGNED-RV64-V-NEXT: snez a0, a0 +; CHECK-UNALIGNED-RV64-V-NEXT: ret entry: %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iXLen 6) ret i32 %bcmp @@ -711,17 +875,99 @@ define i32 @bcmp_size_7(ptr %s1, ptr %s2) nounwind { ; CHECK-ALIGNED-RV64-V-NEXT: addi sp, sp, 16 ; CHECK-ALIGNED-RV64-V-NEXT: ret ; -; CHECK-UNALIGNED-LABEL: bcmp_size_7: -; CHECK-UNALIGNED: # %bb.0: # %entry -; CHECK-UNALIGNED-NEXT: lw a2, 0(a0) -; CHECK-UNALIGNED-NEXT: lw a0, 3(a0) -; CHECK-UNALIGNED-NEXT: lw a3, 0(a1) -; CHECK-UNALIGNED-NEXT: lw a1, 3(a1) -; CHECK-UNALIGNED-NEXT: xor a2, a2, a3 -; CHECK-UNALIGNED-NEXT: xor a0, a0, a1 -; CHECK-UNALIGNED-NEXT: or a0, a2, a0 -; CHECK-UNALIGNED-NEXT: snez a0, a0 -; CHECK-UNALIGNED-NEXT: ret +; CHECK-UNALIGNED-RV32-LABEL: bcmp_size_7: +; CHECK-UNALIGNED-RV32: # %bb.0: # %entry +; CHECK-UNALIGNED-RV32-NEXT: lw a2, 0(a0) +; CHECK-UNALIGNED-RV32-NEXT: lw a0, 3(a0) +; CHECK-UNALIGNED-RV32-NEXT: lw a3, 0(a1) +; CHECK-UNALIGNED-RV32-NEXT: lw a1, 3(a1) +; CHECK-UNALIGNED-RV32-NEXT: xor a2, a2, a3 +; CHECK-UNALIGNED-RV32-NEXT: xor a0, a0, a1 +; CHECK-UNALIGNED-RV32-NEXT: or a0, a2, a0 +; CHECK-UNALIGNED-RV32-NEXT: snez a0, a0 +; CHECK-UNALIGNED-RV32-NEXT: ret +; +; CHECK-UNALIGNED-RV64-LABEL: bcmp_size_7: +; CHECK-UNALIGNED-RV64: # %bb.0: # %entry +; CHECK-UNALIGNED-RV64-NEXT: lw a2, 0(a0) +; CHECK-UNALIGNED-RV64-NEXT: lw a0, 3(a0) +; CHECK-UNALIGNED-RV64-NEXT: lw a3, 0(a1) +; CHECK-UNALIGNED-RV64-NEXT: lw a1, 3(a1) +; CHECK-UNALIGNED-RV64-NEXT: xor a2, a2, a3 +; CHECK-UNALIGNED-RV64-NEXT: xor a0, a0, a1 +; CHECK-UNALIGNED-RV64-NEXT: or a0, a2, a0 +; CHECK-UNALIGNED-RV64-NEXT: snez a0, a0 +; CHECK-UNALIGNED-RV64-NEXT: ret +; +; CHECK-UNALIGNED-RV32-ZBB-LABEL: bcmp_size_7: +; CHECK-UNALIGNED-RV32-ZBB: # %bb.0: # %entry +; CHECK-UNALIGNED-RV32-ZBB-NEXT: lw a2, 0(a0) +; CHECK-UNALIGNED-RV32-ZBB-NEXT: lw a0, 3(a0) +; CHECK-UNALIGNED-RV32-ZBB-NEXT: lw a3, 0(a1) +; CHECK-UNALIGNED-RV32-ZBB-NEXT: lw a1, 3(a1) +; CHECK-UNALIGNED-RV32-ZBB-NEXT: xor a2, a2, a3 +; CHECK-UNALIGNED-RV32-ZBB-NEXT: xor a0, a0, a1 +; CHECK-UNALIGNED-RV32-ZBB-NEXT: or a0, a2, a0 +; CHECK-UNALIGNED-RV32-ZBB-NEXT: snez a0, a0 +; CHECK-UNALIGNED-RV32-ZBB-NEXT: ret +; +; CHECK-UNALIGNED-RV64-ZBB-LABEL: bcmp_size_7: +; CHECK-UNALIGNED-RV64-ZBB: # %bb.0: # %entry +; CHECK-UNALIGNED-RV64-ZBB-NEXT: lw a2, 0(a0) +; CHECK-UNALIGNED-RV64-ZBB-NEXT: lw a0, 3(a0) +; CHECK-UNALIGNED-RV64-ZBB-NEXT: lw a3, 0(a1) +; CHECK-UNALIGNED-RV64-ZBB-NEXT: lw a1, 3(a1) +; CHECK-UNALIGNED-RV64-ZBB-NEXT: xor a2, a2, a3 +; CHECK-UNALIGNED-RV64-ZBB-NEXT: xor a0, a0, a1 +; CHECK-UNALIGNED-RV64-ZBB-NEXT: or a0, a2, a0 +; CHECK-UNALIGNED-RV64-ZBB-NEXT: snez a0, a0 +; CHECK-UNALIGNED-RV64-ZBB-NEXT: ret +; +; CHECK-UNALIGNED-RV32-ZBKB-LABEL: bcmp_size_7: +; CHECK-UNALIGNED-RV32-ZBKB: # %bb.0: # %entry +; CHECK-UNALIGNED-RV32-ZBKB-NEXT: lw a2, 0(a0) +; CHECK-UNALIGNED-RV32-ZBKB-NEXT: lw a0, 3(a0) +; CHECK-UNALIGNED-RV32-ZBKB-NEXT: lw a3, 0(a1) +; CHECK-UNALIGNED-RV32-ZBKB-NEXT: lw a1, 3(a1) +; CHECK-UNALIGNED-RV32-ZBKB-NEXT: xor a2, a2, a3 +; CHECK-UNALIGNED-RV32-ZBKB-NEXT: xor a0, a0, a1 +; CHECK-UNALIGNED-RV32-ZBKB-NEXT: or a0, a2, a0 +; CHECK-UNALIGNED-RV32-ZBKB-NEXT: snez a0, a0 +; CHECK-UNALIGNED-RV32-ZBKB-NEXT: ret +; +; CHECK-UNALIGNED-RV64-ZBKB-LABEL: bcmp_size_7: +; CHECK-UNALIGNED-RV64-ZBKB: # %bb.0: # %entry +; CHECK-UNALIGNED-RV64-ZBKB-NEXT: lw a2, 0(a0) +; CHECK-UNALIGNED-RV64-ZBKB-NEXT: lw a0, 3(a0) +; CHECK-UNALIGNED-RV64-ZBKB-NEXT: lw a3, 0(a1) +; CHECK-UNALIGNED-RV64-ZBKB-NEXT: lw a1, 3(a1) +; CHECK-UNALIGNED-RV64-ZBKB-NEXT: xor a2, a2, a3 +; CHECK-UNALIGNED-RV64-ZBKB-NEXT: xor a0, a0, a1 +; CHECK-UNALIGNED-RV64-ZBKB-NEXT: or a0, a2, a0 +; CHECK-UNALIGNED-RV64-ZBKB-NEXT: snez a0, a0 +; CHECK-UNALIGNED-RV64-ZBKB-NEXT: ret +; +; CHECK-UNALIGNED-RV32-V-LABEL: bcmp_size_7: +; CHECK-UNALIGNED-RV32-V: # %bb.0: # %entry +; CHECK-UNALIGNED-RV32-V-NEXT: vsetivli zero, 7, e8, mf2, ta, ma +; CHECK-UNALIGNED-RV32-V-NEXT: vle8.v v8, (a0) +; CHECK-UNALIGNED-RV32-V-NEXT: vle8.v v9, (a1) +; CHECK-UNALIGNED-RV32-V-NEXT: vmsne.vv v8, v8, v9 +; CHECK-UNALIGNED-RV32-V-NEXT: vcpop.m a0, v8 +; CHECK-UNALIGNED-RV32-V-NEXT: snez a0, a0 +; CHECK-UNALIGNED-RV32-V-NEXT: ret +; +; CHECK-UNALIGNED-RV64-V-LABEL: bcmp_size_7: +; CHECK-UNALIGNED-RV64-V: # %bb.0: # %entry +; CHECK-UNALIGNED-RV64-V-NEXT: lw a2, 0(a0) +; CHECK-UNALIGNED-RV64-V-NEXT: lw a0, 3(a0) +; CHECK-UNALIGNED-RV64-V-NEXT: lw a3, 0(a1) +; CHECK-UNALIGNED-RV64-V-NEXT: lw a1, 3(a1) +; CHECK-UNALIGNED-RV64-V-NEXT: xor a2, a2, a3 +; CHECK-UNALIGNED-RV64-V-NEXT: xor a0, a0, a1 +; CHECK-UNALIGNED-RV64-V-NEXT: or a0, a2, a0 +; CHECK-UNALIGNED-RV64-V-NEXT: snez a0, a0 +; CHECK-UNALIGNED-RV64-V-NEXT: ret entry: %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iXLen 7) ret i32 %bcmp @@ -1069,33 +1315,21 @@ define i32 @bcmp_size_15(ptr %s1, ptr %s2) nounwind { ; ; CHECK-UNALIGNED-RV32-V-LABEL: bcmp_size_15: ; CHECK-UNALIGNED-RV32-V: # %bb.0: # %entry -; CHECK-UNALIGNED-RV32-V-NEXT: lw a2, 0(a0) -; CHECK-UNALIGNED-RV32-V-NEXT: lw a3, 4(a0) -; CHECK-UNALIGNED-RV32-V-NEXT: lw a4, 7(a0) -; CHECK-UNALIGNED-RV32-V-NEXT: lw a0, 11(a0) -; CHECK-UNALIGNED-RV32-V-NEXT: lw a5, 0(a1) -; CHECK-UNALIGNED-RV32-V-NEXT: lw a6, 4(a1) -; CHECK-UNALIGNED-RV32-V-NEXT: lw a7, 7(a1) -; CHECK-UNALIGNED-RV32-V-NEXT: lw a1, 11(a1) -; CHECK-UNALIGNED-RV32-V-NEXT: xor a2, a2, a5 -; CHECK-UNALIGNED-RV32-V-NEXT: xor a3, a3, a6 -; CHECK-UNALIGNED-RV32-V-NEXT: xor a4, a4, a7 -; CHECK-UNALIGNED-RV32-V-NEXT: xor a0, a0, a1 -; CHECK-UNALIGNED-RV32-V-NEXT: or a0, a3, a0 -; CHECK-UNALIGNED-RV32-V-NEXT: or a2, a2, a4 -; CHECK-UNALIGNED-RV32-V-NEXT: or a0, a2, a0 +; CHECK-UNALIGNED-RV32-V-NEXT: vsetivli zero, 15, e8, m1, ta, ma +; CHECK-UNALIGNED-RV32-V-NEXT: vle8.v v8, (a0) +; CHECK-UNALIGNED-RV32-V-NEXT: vle8.v v9, (a1) +; CHECK-UNALIGNED-RV32-V-NEXT: vmsne.vv v8, v8, v9 +; CHECK-UNALIGNED-RV32-V-NEXT: vcpop.m a0, v8 ; CHECK-UNALIGNED-RV32-V-NEXT: snez a0, a0 ; CHECK-UNALIGNED-RV32-V-NEXT: ret ; ; CHECK-UNALIGNED-RV64-V-LABEL: bcmp_size_15: ; CHECK-UNALIGNED-RV64-V: # %bb.0: # %entry -; CHECK-UNALIGNED-RV64-V-NEXT: ld a2, 0(a0) -; CHECK-UNALIGNED-RV64-V-NEXT: ld a0, 7(a0) -; CHECK-UNALIGNED-RV64-V-NEXT: ld a3, 0(a1) -; CHECK-UNALIGNED-RV64-V-NEXT: ld a1, 7(a1) -; CHECK-UNALIGNED-RV64-V-NEXT: xor a2, a2, a3 -; CHECK-UNALIGNED-RV64-V-NEXT: xor a0, a0, a1 -; CHECK-UNALIGNED-RV64-V-NEXT: or a0, a2, a0 +; CHECK-UNALIGNED-RV64-V-NEXT: vsetivli zero, 15, e8, m1, ta, ma +; CHECK-UNALIGNED-RV64-V-NEXT: vle8.v v8, (a0) +; CHECK-UNALIGNED-RV64-V-NEXT: vle8.v v9, (a1) +; CHECK-UNALIGNED-RV64-V-NEXT: vmsne.vv v8, v8, v9 +; CHECK-UNALIGNED-RV64-V-NEXT: vcpop.m a0, v8 ; CHECK-UNALIGNED-RV64-V-NEXT: snez a0, a0 ; CHECK-UNALIGNED-RV64-V-NEXT: ret entry: @@ -1555,57 +1789,21 @@ define i32 @bcmp_size_31(ptr %s1, ptr %s2) nounwind { ; ; CHECK-UNALIGNED-RV32-V-LABEL: bcmp_size_31: ; CHECK-UNALIGNED-RV32-V: # %bb.0: # %entry -; CHECK-UNALIGNED-RV32-V-NEXT: lw a2, 0(a0) -; CHECK-UNALIGNED-RV32-V-NEXT: lw a3, 4(a0) -; CHECK-UNALIGNED-RV32-V-NEXT: lw a4, 8(a0) -; CHECK-UNALIGNED-RV32-V-NEXT: lw a5, 12(a0) -; CHECK-UNALIGNED-RV32-V-NEXT: lw a6, 0(a1) -; CHECK-UNALIGNED-RV32-V-NEXT: lw a7, 4(a1) -; CHECK-UNALIGNED-RV32-V-NEXT: lw t0, 8(a1) -; CHECK-UNALIGNED-RV32-V-NEXT: lw t1, 12(a1) -; CHECK-UNALIGNED-RV32-V-NEXT: lw t2, 15(a0) -; CHECK-UNALIGNED-RV32-V-NEXT: lw t3, 19(a0) -; CHECK-UNALIGNED-RV32-V-NEXT: lw t4, 23(a0) -; CHECK-UNALIGNED-RV32-V-NEXT: lw a0, 27(a0) -; CHECK-UNALIGNED-RV32-V-NEXT: xor a3, a3, a7 -; CHECK-UNALIGNED-RV32-V-NEXT: xor a5, a5, t1 -; CHECK-UNALIGNED-RV32-V-NEXT: lw a7, 15(a1) -; CHECK-UNALIGNED-RV32-V-NEXT: lw t1, 19(a1) -; CHECK-UNALIGNED-RV32-V-NEXT: lw t5, 23(a1) -; CHECK-UNALIGNED-RV32-V-NEXT: lw a1, 27(a1) -; CHECK-UNALIGNED-RV32-V-NEXT: xor a2, a2, a6 -; CHECK-UNALIGNED-RV32-V-NEXT: xor a4, a4, t0 -; CHECK-UNALIGNED-RV32-V-NEXT: xor a6, t3, t1 -; CHECK-UNALIGNED-RV32-V-NEXT: xor a0, a0, a1 -; CHECK-UNALIGNED-RV32-V-NEXT: xor a1, t2, a7 -; CHECK-UNALIGNED-RV32-V-NEXT: xor a7, t4, t5 -; CHECK-UNALIGNED-RV32-V-NEXT: or a4, a4, a7 -; CHECK-UNALIGNED-RV32-V-NEXT: or a1, a2, a1 -; CHECK-UNALIGNED-RV32-V-NEXT: or a0, a5, a0 -; CHECK-UNALIGNED-RV32-V-NEXT: or a2, a3, a6 -; CHECK-UNALIGNED-RV32-V-NEXT: or a0, a2, a0 -; CHECK-UNALIGNED-RV32-V-NEXT: or a1, a1, a4 -; CHECK-UNALIGNED-RV32-V-NEXT: or a0, a1, a0 +; CHECK-UNALIGNED-RV32-V-NEXT: vsetivli zero, 31, e8, m2, ta, ma +; CHECK-UNALIGNED-RV32-V-NEXT: vle8.v v8, (a0) +; CHECK-UNALIGNED-RV32-V-NEXT: vle8.v v10, (a1) +; CHECK-UNALIGNED-RV32-V-NEXT: vmsne.vv v12, v8, v10 +; CHECK-UNALIGNED-RV32-V-NEXT: vcpop.m a0, v12 ; CHECK-UNALIGNED-RV32-V-NEXT: snez a0, a0 ; CHECK-UNALIGNED-RV32-V-NEXT: ret ; ; CHECK-UNALIGNED-RV64-V-LABEL: bcmp_size_31: ; CHECK-UNALIGNED-RV64-V: # %bb.0: # %entry -; CHECK-UNALIGNED-RV64-V-NEXT: ld a2, 0(a0) -; CHECK-UNALIGNED-RV64-V-NEXT: ld a3, 8(a0) -; CHECK-UNALIGNED-RV64-V-NEXT: ld a4, 15(a0) -; CHECK-UNALIGNED-RV64-V-NEXT: ld a0, 23(a0) -; CHECK-UNALIGNED-RV64-V-NEXT: ld a5, 0(a1) -; CHECK-UNALIGNED-RV64-V-NEXT: ld a6, 8(a1) -; CHECK-UNALIGNED-RV64-V-NEXT: ld a7, 15(a1) -; CHECK-UNALIGNED-RV64-V-NEXT: ld a1, 23(a1) -; CHECK-UNALIGNED-RV64-V-NEXT: xor a2, a2, a5 -; CHECK-UNALIGNED-RV64-V-NEXT: xor a3, a3, a6 -; CHECK-UNALIGNED-RV64-V-NEXT: xor a4, a4, a7 -; CHECK-UNALIGNED-RV64-V-NEXT: xor a0, a0, a1 -; CHECK-UNALIGNED-RV64-V-NEXT: or a0, a3, a0 -; CHECK-UNALIGNED-RV64-V-NEXT: or a2, a2, a4 -; CHECK-UNALIGNED-RV64-V-NEXT: or a0, a2, a0 +; CHECK-UNALIGNED-RV64-V-NEXT: vsetivli zero, 31, e8, m2, ta, ma +; CHECK-UNALIGNED-RV64-V-NEXT: vle8.v v8, (a0) +; CHECK-UNALIGNED-RV64-V-NEXT: vle8.v v10, (a1) +; CHECK-UNALIGNED-RV64-V-NEXT: vmsne.vv v12, v8, v10 +; CHECK-UNALIGNED-RV64-V-NEXT: vcpop.m a0, v12 ; CHECK-UNALIGNED-RV64-V-NEXT: snez a0, a0 ; CHECK-UNALIGNED-RV64-V-NEXT: ret entry: @@ -2109,129 +2307,23 @@ define i32 @bcmp_size_63(ptr %s1, ptr %s2) nounwind { ; ; CHECK-UNALIGNED-RV32-V-LABEL: bcmp_size_63: ; CHECK-UNALIGNED-RV32-V: # %bb.0: # %entry -; CHECK-UNALIGNED-RV32-V-NEXT: addi sp, sp, -48 -; CHECK-UNALIGNED-RV32-V-NEXT: sw s0, 44(sp) # 4-byte Folded Spill -; CHECK-UNALIGNED-RV32-V-NEXT: sw s1, 40(sp) # 4-byte Folded Spill -; CHECK-UNALIGNED-RV32-V-NEXT: sw s2, 36(sp) # 4-byte Folded Spill -; CHECK-UNALIGNED-RV32-V-NEXT: sw s3, 32(sp) # 4-byte Folded Spill -; CHECK-UNALIGNED-RV32-V-NEXT: sw s4, 28(sp) # 4-byte Folded Spill -; CHECK-UNALIGNED-RV32-V-NEXT: sw s5, 24(sp) # 4-byte Folded Spill -; CHECK-UNALIGNED-RV32-V-NEXT: sw s6, 20(sp) # 4-byte Folded Spill -; CHECK-UNALIGNED-RV32-V-NEXT: sw s7, 16(sp) # 4-byte Folded Spill -; CHECK-UNALIGNED-RV32-V-NEXT: sw s8, 12(sp) # 4-byte Folded Spill -; CHECK-UNALIGNED-RV32-V-NEXT: sw s9, 8(sp) # 4-byte Folded Spill -; CHECK-UNALIGNED-RV32-V-NEXT: sw s10, 4(sp) # 4-byte Folded Spill -; CHECK-UNALIGNED-RV32-V-NEXT: lw a2, 16(a0) -; CHECK-UNALIGNED-RV32-V-NEXT: lw a3, 20(a0) -; CHECK-UNALIGNED-RV32-V-NEXT: lw a4, 24(a0) -; CHECK-UNALIGNED-RV32-V-NEXT: lw a5, 28(a0) -; CHECK-UNALIGNED-RV32-V-NEXT: lw a6, 0(a0) -; CHECK-UNALIGNED-RV32-V-NEXT: lw a7, 4(a0) -; CHECK-UNALIGNED-RV32-V-NEXT: lw t0, 8(a0) -; CHECK-UNALIGNED-RV32-V-NEXT: lw t1, 12(a0) -; CHECK-UNALIGNED-RV32-V-NEXT: lw t2, 16(a1) -; CHECK-UNALIGNED-RV32-V-NEXT: lw t3, 20(a1) -; CHECK-UNALIGNED-RV32-V-NEXT: lw t4, 24(a1) -; CHECK-UNALIGNED-RV32-V-NEXT: lw t5, 28(a1) -; CHECK-UNALIGNED-RV32-V-NEXT: lw t6, 0(a1) -; CHECK-UNALIGNED-RV32-V-NEXT: lw s0, 4(a1) -; CHECK-UNALIGNED-RV32-V-NEXT: lw s1, 8(a1) -; CHECK-UNALIGNED-RV32-V-NEXT: lw s2, 12(a1) -; CHECK-UNALIGNED-RV32-V-NEXT: lw s3, 47(a0) -; CHECK-UNALIGNED-RV32-V-NEXT: lw s4, 51(a0) -; CHECK-UNALIGNED-RV32-V-NEXT: lw s5, 55(a0) -; CHECK-UNALIGNED-RV32-V-NEXT: lw s6, 59(a0) -; CHECK-UNALIGNED-RV32-V-NEXT: lw s7, 31(a0) -; CHECK-UNALIGNED-RV32-V-NEXT: lw s8, 35(a0) -; CHECK-UNALIGNED-RV32-V-NEXT: lw s9, 39(a0) -; CHECK-UNALIGNED-RV32-V-NEXT: lw a0, 43(a0) -; CHECK-UNALIGNED-RV32-V-NEXT: xor t1, t1, s2 -; CHECK-UNALIGNED-RV32-V-NEXT: xor a5, a5, t5 -; CHECK-UNALIGNED-RV32-V-NEXT: xor a7, a7, s0 -; CHECK-UNALIGNED-RV32-V-NEXT: lw t5, 31(a1) -; CHECK-UNALIGNED-RV32-V-NEXT: lw s0, 35(a1) -; CHECK-UNALIGNED-RV32-V-NEXT: lw s2, 39(a1) -; CHECK-UNALIGNED-RV32-V-NEXT: lw s10, 43(a1) -; CHECK-UNALIGNED-RV32-V-NEXT: xor a3, a3, t3 -; CHECK-UNALIGNED-RV32-V-NEXT: xor t0, t0, s1 -; CHECK-UNALIGNED-RV32-V-NEXT: xor a4, a4, t4 -; CHECK-UNALIGNED-RV32-V-NEXT: lw t3, 47(a1) -; CHECK-UNALIGNED-RV32-V-NEXT: lw t4, 51(a1) -; CHECK-UNALIGNED-RV32-V-NEXT: lw s1, 55(a1) -; CHECK-UNALIGNED-RV32-V-NEXT: lw a1, 59(a1) -; CHECK-UNALIGNED-RV32-V-NEXT: xor a6, a6, t6 -; CHECK-UNALIGNED-RV32-V-NEXT: xor a2, a2, t2 -; CHECK-UNALIGNED-RV32-V-NEXT: xor a0, a0, s10 -; CHECK-UNALIGNED-RV32-V-NEXT: xor a1, s6, a1 -; CHECK-UNALIGNED-RV32-V-NEXT: xor t2, s8, s0 -; CHECK-UNALIGNED-RV32-V-NEXT: xor t4, s4, t4 -; CHECK-UNALIGNED-RV32-V-NEXT: xor t6, s9, s2 -; CHECK-UNALIGNED-RV32-V-NEXT: xor s0, s5, s1 -; CHECK-UNALIGNED-RV32-V-NEXT: xor t5, s7, t5 -; CHECK-UNALIGNED-RV32-V-NEXT: xor t3, s3, t3 -; CHECK-UNALIGNED-RV32-V-NEXT: or a2, a2, t3 -; CHECK-UNALIGNED-RV32-V-NEXT: or a6, a6, t5 -; CHECK-UNALIGNED-RV32-V-NEXT: or a4, a4, s0 -; CHECK-UNALIGNED-RV32-V-NEXT: or t0, t0, t6 -; CHECK-UNALIGNED-RV32-V-NEXT: or a3, a3, t4 -; CHECK-UNALIGNED-RV32-V-NEXT: or a7, a7, t2 -; CHECK-UNALIGNED-RV32-V-NEXT: or a1, a5, a1 -; CHECK-UNALIGNED-RV32-V-NEXT: or a0, t1, a0 -; CHECK-UNALIGNED-RV32-V-NEXT: or a0, a0, a1 -; CHECK-UNALIGNED-RV32-V-NEXT: or a1, a7, a3 -; CHECK-UNALIGNED-RV32-V-NEXT: or a3, t0, a4 -; CHECK-UNALIGNED-RV32-V-NEXT: or a2, a6, a2 -; CHECK-UNALIGNED-RV32-V-NEXT: or a0, a1, a0 -; CHECK-UNALIGNED-RV32-V-NEXT: or a2, a2, a3 -; CHECK-UNALIGNED-RV32-V-NEXT: or a0, a2, a0 +; CHECK-UNALIGNED-RV32-V-NEXT: li a2, 63 +; CHECK-UNALIGNED-RV32-V-NEXT: vsetvli zero, a2, e8, m4, ta, ma +; CHECK-UNALIGNED-RV32-V-NEXT: vle8.v v8, (a0) +; CHECK-UNALIGNED-RV32-V-NEXT: vle8.v v12, (a1) +; CHECK-UNALIGNED-RV32-V-NEXT: vmsne.vv v16, v8, v12 +; CHECK-UNALIGNED-RV32-V-NEXT: vcpop.m a0, v16 ; CHECK-UNALIGNED-RV32-V-NEXT: snez a0, a0 -; CHECK-UNALIGNED-RV32-V-NEXT: lw s0, 44(sp) # 4-byte Folded Reload -; CHECK-UNALIGNED-RV32-V-NEXT: lw s1, 40(sp) # 4-byte Folded Reload -; CHECK-UNALIGNED-RV32-V-NEXT: lw s2, 36(sp) # 4-byte Folded Reload -; CHECK-UNALIGNED-RV32-V-NEXT: lw s3, 32(sp) # 4-byte Folded Reload -; CHECK-UNALIGNED-RV32-V-NEXT: lw s4, 28(sp) # 4-byte Folded Reload -; CHECK-UNALIGNED-RV32-V-NEXT: lw s5, 24(sp) # 4-byte Folded Reload -; CHECK-UNALIGNED-RV32-V-NEXT: lw s6, 20(sp) # 4-byte Folded Reload -; CHECK-UNALIGNED-RV32-V-NEXT: lw s7, 16(sp) # 4-byte Folded Reload -; CHECK-UNALIGNED-RV32-V-NEXT: lw s8, 12(sp) # 4-byte Folded Reload -; CHECK-UNALIGNED-RV32-V-NEXT: lw s9, 8(sp) # 4-byte Folded Reload -; CHECK-UNALIGNED-RV32-V-NEXT: lw s10, 4(sp) # 4-byte Folded Reload -; CHECK-UNALIGNED-RV32-V-NEXT: addi sp, sp, 48 ; CHECK-UNALIGNED-RV32-V-NEXT: ret ; ; CHECK-UNALIGNED-RV64-V-LABEL: bcmp_size_63: ; CHECK-UNALIGNED-RV64-V: # %bb.0: # %entry -; CHECK-UNALIGNED-RV64-V-NEXT: ld a2, 0(a0) -; CHECK-UNALIGNED-RV64-V-NEXT: ld a3, 8(a0) -; CHECK-UNALIGNED-RV64-V-NEXT: ld a4, 16(a0) -; CHECK-UNALIGNED-RV64-V-NEXT: ld a5, 24(a0) -; CHECK-UNALIGNED-RV64-V-NEXT: ld a6, 0(a1) -; CHECK-UNALIGNED-RV64-V-NEXT: ld a7, 8(a1) -; CHECK-UNALIGNED-RV64-V-NEXT: ld t0, 16(a1) -; CHECK-UNALIGNED-RV64-V-NEXT: ld t1, 24(a1) -; CHECK-UNALIGNED-RV64-V-NEXT: ld t2, 31(a0) -; CHECK-UNALIGNED-RV64-V-NEXT: ld t3, 39(a0) -; CHECK-UNALIGNED-RV64-V-NEXT: ld t4, 47(a0) -; CHECK-UNALIGNED-RV64-V-NEXT: ld a0, 55(a0) -; CHECK-UNALIGNED-RV64-V-NEXT: xor a3, a3, a7 -; CHECK-UNALIGNED-RV64-V-NEXT: xor a5, a5, t1 -; CHECK-UNALIGNED-RV64-V-NEXT: ld a7, 31(a1) -; CHECK-UNALIGNED-RV64-V-NEXT: ld t1, 39(a1) -; CHECK-UNALIGNED-RV64-V-NEXT: ld t5, 47(a1) -; CHECK-UNALIGNED-RV64-V-NEXT: ld a1, 55(a1) -; CHECK-UNALIGNED-RV64-V-NEXT: xor a2, a2, a6 -; CHECK-UNALIGNED-RV64-V-NEXT: xor a4, a4, t0 -; CHECK-UNALIGNED-RV64-V-NEXT: xor a6, t3, t1 -; CHECK-UNALIGNED-RV64-V-NEXT: xor a0, a0, a1 -; CHECK-UNALIGNED-RV64-V-NEXT: xor a1, t2, a7 -; CHECK-UNALIGNED-RV64-V-NEXT: xor a7, t4, t5 -; CHECK-UNALIGNED-RV64-V-NEXT: or a4, a4, a7 -; CHECK-UNALIGNED-RV64-V-NEXT: or a1, a2, a1 -; CHECK-UNALIGNED-RV64-V-NEXT: or a0, a5, a0 -; CHECK-UNALIGNED-RV64-V-NEXT: or a2, a3, a6 -; CHECK-UNALIGNED-RV64-V-NEXT: or a0, a2, a0 -; CHECK-UNALIGNED-RV64-V-NEXT: or a1, a1, a4 -; CHECK-UNALIGNED-RV64-V-NEXT: or a0, a1, a0 +; CHECK-UNALIGNED-RV64-V-NEXT: li a2, 63 +; CHECK-UNALIGNED-RV64-V-NEXT: vsetvli zero, a2, e8, m4, ta, ma +; CHECK-UNALIGNED-RV64-V-NEXT: vle8.v v8, (a0) +; CHECK-UNALIGNED-RV64-V-NEXT: vle8.v v12, (a1) +; CHECK-UNALIGNED-RV64-V-NEXT: vmsne.vv v16, v8, v12 +; CHECK-UNALIGNED-RV64-V-NEXT: vcpop.m a0, v16 ; CHECK-UNALIGNED-RV64-V-NEXT: snez a0, a0 ; CHECK-UNALIGNED-RV64-V-NEXT: ret entry: @@ -2627,270 +2719,24 @@ define i32 @bcmp_size_127(ptr %s1, ptr %s2) nounwind { ; ; CHECK-UNALIGNED-RV32-V-LABEL: bcmp_size_127: ; CHECK-UNALIGNED-RV32-V: # %bb.0: # %entry -; CHECK-UNALIGNED-RV32-V-NEXT: addi sp, sp, -96 -; CHECK-UNALIGNED-RV32-V-NEXT: sw ra, 92(sp) # 4-byte Folded Spill -; CHECK-UNALIGNED-RV32-V-NEXT: sw s0, 88(sp) # 4-byte Folded Spill -; CHECK-UNALIGNED-RV32-V-NEXT: sw s1, 84(sp) # 4-byte Folded Spill -; CHECK-UNALIGNED-RV32-V-NEXT: sw s2, 80(sp) # 4-byte Folded Spill -; CHECK-UNALIGNED-RV32-V-NEXT: sw s3, 76(sp) # 4-byte Folded Spill -; CHECK-UNALIGNED-RV32-V-NEXT: sw s4, 72(sp) # 4-byte Folded Spill -; CHECK-UNALIGNED-RV32-V-NEXT: sw s5, 68(sp) # 4-byte Folded Spill -; CHECK-UNALIGNED-RV32-V-NEXT: sw s6, 64(sp) # 4-byte Folded Spill -; CHECK-UNALIGNED-RV32-V-NEXT: sw s7, 60(sp) # 4-byte Folded Spill -; CHECK-UNALIGNED-RV32-V-NEXT: sw s8, 56(sp) # 4-byte Folded Spill -; CHECK-UNALIGNED-RV32-V-NEXT: sw s9, 52(sp) # 4-byte Folded Spill -; CHECK-UNALIGNED-RV32-V-NEXT: sw s10, 48(sp) # 4-byte Folded Spill -; CHECK-UNALIGNED-RV32-V-NEXT: sw s11, 44(sp) # 4-byte Folded Spill -; CHECK-UNALIGNED-RV32-V-NEXT: lw a5, 32(a0) -; CHECK-UNALIGNED-RV32-V-NEXT: lw t5, 36(a0) -; CHECK-UNALIGNED-RV32-V-NEXT: lw t4, 40(a0) -; CHECK-UNALIGNED-RV32-V-NEXT: lw a6, 44(a0) -; CHECK-UNALIGNED-RV32-V-NEXT: lw t2, 0(a0) -; CHECK-UNALIGNED-RV32-V-NEXT: lw t1, 4(a0) -; CHECK-UNALIGNED-RV32-V-NEXT: lw s0, 8(a0) -; CHECK-UNALIGNED-RV32-V-NEXT: lw a4, 12(a0) -; CHECK-UNALIGNED-RV32-V-NEXT: lw t3, 48(a0) -; CHECK-UNALIGNED-RV32-V-NEXT: lw t0, 52(a0) -; CHECK-UNALIGNED-RV32-V-NEXT: lw s2, 56(a0) -; CHECK-UNALIGNED-RV32-V-NEXT: lw a3, 60(a0) -; CHECK-UNALIGNED-RV32-V-NEXT: lw s1, 16(a0) -; CHECK-UNALIGNED-RV32-V-NEXT: lw a7, 20(a0) -; CHECK-UNALIGNED-RV32-V-NEXT: lw t6, 24(a0) -; CHECK-UNALIGNED-RV32-V-NEXT: lw a2, 28(a0) -; CHECK-UNALIGNED-RV32-V-NEXT: lw s4, 12(a1) -; CHECK-UNALIGNED-RV32-V-NEXT: lw s5, 60(a1) -; CHECK-UNALIGNED-RV32-V-NEXT: lw s6, 16(a1) -; CHECK-UNALIGNED-RV32-V-NEXT: lw s7, 20(a1) -; CHECK-UNALIGNED-RV32-V-NEXT: lw s8, 24(a1) -; CHECK-UNALIGNED-RV32-V-NEXT: lw s9, 28(a1) -; CHECK-UNALIGNED-RV32-V-NEXT: lw s3, 32(a1) -; CHECK-UNALIGNED-RV32-V-NEXT: lw s10, 36(a1) -; CHECK-UNALIGNED-RV32-V-NEXT: lw s11, 40(a1) -; CHECK-UNALIGNED-RV32-V-NEXT: lw ra, 44(a1) -; CHECK-UNALIGNED-RV32-V-NEXT: xor a2, a2, s9 -; CHECK-UNALIGNED-RV32-V-NEXT: sw a2, 40(sp) # 4-byte Folded Spill -; CHECK-UNALIGNED-RV32-V-NEXT: xor a2, a3, s5 -; CHECK-UNALIGNED-RV32-V-NEXT: sw a2, 36(sp) # 4-byte Folded Spill -; CHECK-UNALIGNED-RV32-V-NEXT: xor a2, a4, s4 -; CHECK-UNALIGNED-RV32-V-NEXT: sw a2, 32(sp) # 4-byte Folded Spill -; CHECK-UNALIGNED-RV32-V-NEXT: lw s4, 56(a1) -; CHECK-UNALIGNED-RV32-V-NEXT: lw s5, 48(a1) -; CHECK-UNALIGNED-RV32-V-NEXT: lw s9, 52(a1) -; CHECK-UNALIGNED-RV32-V-NEXT: xor a2, a6, ra -; CHECK-UNALIGNED-RV32-V-NEXT: sw a2, 28(sp) # 4-byte Folded Spill -; CHECK-UNALIGNED-RV32-V-NEXT: lw ra, 4(a1) -; CHECK-UNALIGNED-RV32-V-NEXT: xor a2, a7, s7 -; CHECK-UNALIGNED-RV32-V-NEXT: sw a2, 24(sp) # 4-byte Folded Spill -; CHECK-UNALIGNED-RV32-V-NEXT: xor a2, t0, s9 -; CHECK-UNALIGNED-RV32-V-NEXT: sw a2, 20(sp) # 4-byte Folded Spill -; CHECK-UNALIGNED-RV32-V-NEXT: lw s7, 0(a1) -; CHECK-UNALIGNED-RV32-V-NEXT: lw s9, 8(a1) -; CHECK-UNALIGNED-RV32-V-NEXT: xor a2, t1, ra -; CHECK-UNALIGNED-RV32-V-NEXT: sw a2, 16(sp) # 4-byte Folded Spill -; CHECK-UNALIGNED-RV32-V-NEXT: lw ra, 107(a0) -; CHECK-UNALIGNED-RV32-V-NEXT: xor a2, t5, s10 -; CHECK-UNALIGNED-RV32-V-NEXT: sw a2, 12(sp) # 4-byte Folded Spill -; CHECK-UNALIGNED-RV32-V-NEXT: lw s10, 75(a0) -; CHECK-UNALIGNED-RV32-V-NEXT: xor t6, t6, s8 -; CHECK-UNALIGNED-RV32-V-NEXT: lw s8, 123(a0) -; CHECK-UNALIGNED-RV32-V-NEXT: xor s2, s2, s4 -; CHECK-UNALIGNED-RV32-V-NEXT: xor s0, s0, s9 -; CHECK-UNALIGNED-RV32-V-NEXT: xor t5, t4, s11 -; CHECK-UNALIGNED-RV32-V-NEXT: lw s4, 83(a0) -; CHECK-UNALIGNED-RV32-V-NEXT: lw s9, 87(a0) -; CHECK-UNALIGNED-RV32-V-NEXT: lw s11, 91(a0) -; CHECK-UNALIGNED-RV32-V-NEXT: xor s1, s1, s6 -; CHECK-UNALIGNED-RV32-V-NEXT: lw s6, 107(a1) -; CHECK-UNALIGNED-RV32-V-NEXT: xor t4, t3, s5 -; CHECK-UNALIGNED-RV32-V-NEXT: lw s5, 91(a1) -; CHECK-UNALIGNED-RV32-V-NEXT: xor t3, t2, s7 -; CHECK-UNALIGNED-RV32-V-NEXT: lw s7, 123(a1) -; CHECK-UNALIGNED-RV32-V-NEXT: xor t2, a5, s3 -; CHECK-UNALIGNED-RV32-V-NEXT: lw s3, 75(a1) -; CHECK-UNALIGNED-RV32-V-NEXT: xor s5, s11, s5 -; CHECK-UNALIGNED-RV32-V-NEXT: xor s7, s8, s7 -; CHECK-UNALIGNED-RV32-V-NEXT: lw s8, 87(a1) -; CHECK-UNALIGNED-RV32-V-NEXT: lw s11, 83(a1) -; CHECK-UNALIGNED-RV32-V-NEXT: xor s3, s10, s3 -; CHECK-UNALIGNED-RV32-V-NEXT: lw s10, 115(a0) -; CHECK-UNALIGNED-RV32-V-NEXT: xor s6, ra, s6 -; CHECK-UNALIGNED-RV32-V-NEXT: lw ra, 115(a1) -; CHECK-UNALIGNED-RV32-V-NEXT: xor s4, s4, s11 -; CHECK-UNALIGNED-RV32-V-NEXT: lw s11, 119(a0) -; CHECK-UNALIGNED-RV32-V-NEXT: lw a7, 119(a1) -; CHECK-UNALIGNED-RV32-V-NEXT: xor s10, s10, ra -; CHECK-UNALIGNED-RV32-V-NEXT: lw ra, 71(a0) -; CHECK-UNALIGNED-RV32-V-NEXT: lw a5, 67(a0) -; CHECK-UNALIGNED-RV32-V-NEXT: lw a2, 67(a1) -; CHECK-UNALIGNED-RV32-V-NEXT: lw a6, 71(a1) -; CHECK-UNALIGNED-RV32-V-NEXT: lw a4, 99(a0) -; CHECK-UNALIGNED-RV32-V-NEXT: lw a3, 99(a1) -; CHECK-UNALIGNED-RV32-V-NEXT: xor t1, a5, a2 -; CHECK-UNALIGNED-RV32-V-NEXT: lw a5, 103(a0) -; CHECK-UNALIGNED-RV32-V-NEXT: lw a2, 103(a1) -; CHECK-UNALIGNED-RV32-V-NEXT: xor t0, a4, a3 -; CHECK-UNALIGNED-RV32-V-NEXT: xor a4, s9, s8 -; CHECK-UNALIGNED-RV32-V-NEXT: xor a7, s11, a7 -; CHECK-UNALIGNED-RV32-V-NEXT: xor a6, ra, a6 -; CHECK-UNALIGNED-RV32-V-NEXT: xor a3, a5, a2 -; CHECK-UNALIGNED-RV32-V-NEXT: lw a5, 95(a0) -; CHECK-UNALIGNED-RV32-V-NEXT: lw s8, 63(a0) -; CHECK-UNALIGNED-RV32-V-NEXT: lw s9, 111(a0) -; CHECK-UNALIGNED-RV32-V-NEXT: lw a2, 79(a0) -; CHECK-UNALIGNED-RV32-V-NEXT: lw s11, 79(a1) -; CHECK-UNALIGNED-RV32-V-NEXT: lw ra, 111(a1) -; CHECK-UNALIGNED-RV32-V-NEXT: lw a0, 63(a1) -; CHECK-UNALIGNED-RV32-V-NEXT: lw a1, 95(a1) -; CHECK-UNALIGNED-RV32-V-NEXT: xor a2, a2, s11 -; CHECK-UNALIGNED-RV32-V-NEXT: xor s9, s9, ra -; CHECK-UNALIGNED-RV32-V-NEXT: xor a0, s8, a0 -; CHECK-UNALIGNED-RV32-V-NEXT: xor a1, a5, a1 -; CHECK-UNALIGNED-RV32-V-NEXT: or a1, t2, a1 -; CHECK-UNALIGNED-RV32-V-NEXT: or a0, t3, a0 -; CHECK-UNALIGNED-RV32-V-NEXT: or a5, t4, s9 -; CHECK-UNALIGNED-RV32-V-NEXT: or a2, s1, a2 -; CHECK-UNALIGNED-RV32-V-NEXT: or a3, t5, a3 -; CHECK-UNALIGNED-RV32-V-NEXT: or a6, s0, a6 -; CHECK-UNALIGNED-RV32-V-NEXT: or a7, s2, a7 -; CHECK-UNALIGNED-RV32-V-NEXT: or a4, t6, a4 -; CHECK-UNALIGNED-RV32-V-NEXT: lw t2, 12(sp) # 4-byte Folded Reload -; CHECK-UNALIGNED-RV32-V-NEXT: or t0, t2, t0 -; CHECK-UNALIGNED-RV32-V-NEXT: lw t2, 16(sp) # 4-byte Folded Reload -; CHECK-UNALIGNED-RV32-V-NEXT: or t1, t2, t1 -; CHECK-UNALIGNED-RV32-V-NEXT: lw t2, 20(sp) # 4-byte Folded Reload -; CHECK-UNALIGNED-RV32-V-NEXT: or t2, t2, s10 -; CHECK-UNALIGNED-RV32-V-NEXT: lw t3, 24(sp) # 4-byte Folded Reload -; CHECK-UNALIGNED-RV32-V-NEXT: or t3, t3, s4 -; CHECK-UNALIGNED-RV32-V-NEXT: lw t4, 28(sp) # 4-byte Folded Reload -; CHECK-UNALIGNED-RV32-V-NEXT: or t4, t4, s6 -; CHECK-UNALIGNED-RV32-V-NEXT: lw t5, 32(sp) # 4-byte Folded Reload -; CHECK-UNALIGNED-RV32-V-NEXT: or t5, t5, s3 -; CHECK-UNALIGNED-RV32-V-NEXT: lw t6, 36(sp) # 4-byte Folded Reload -; CHECK-UNALIGNED-RV32-V-NEXT: or t6, t6, s7 -; CHECK-UNALIGNED-RV32-V-NEXT: lw s0, 40(sp) # 4-byte Folded Reload -; CHECK-UNALIGNED-RV32-V-NEXT: or s0, s0, s5 -; CHECK-UNALIGNED-RV32-V-NEXT: or t6, s0, t6 -; CHECK-UNALIGNED-RV32-V-NEXT: or t4, t5, t4 -; CHECK-UNALIGNED-RV32-V-NEXT: or t2, t3, t2 -; CHECK-UNALIGNED-RV32-V-NEXT: or t0, t1, t0 -; CHECK-UNALIGNED-RV32-V-NEXT: or a4, a4, a7 -; CHECK-UNALIGNED-RV32-V-NEXT: or a3, a6, a3 -; CHECK-UNALIGNED-RV32-V-NEXT: or a2, a2, a5 -; CHECK-UNALIGNED-RV32-V-NEXT: or a0, a0, a1 -; CHECK-UNALIGNED-RV32-V-NEXT: or a1, t4, t6 -; CHECK-UNALIGNED-RV32-V-NEXT: or a5, t0, t2 -; CHECK-UNALIGNED-RV32-V-NEXT: or a3, a3, a4 -; CHECK-UNALIGNED-RV32-V-NEXT: or a0, a0, a2 -; CHECK-UNALIGNED-RV32-V-NEXT: or a1, a5, a1 -; CHECK-UNALIGNED-RV32-V-NEXT: or a0, a0, a3 -; CHECK-UNALIGNED-RV32-V-NEXT: or a0, a0, a1 +; CHECK-UNALIGNED-RV32-V-NEXT: li a2, 127 +; CHECK-UNALIGNED-RV32-V-NEXT: vsetvli zero, a2, e8, m8, ta, ma +; CHECK-UNALIGNED-RV32-V-NEXT: vle8.v v8, (a0) +; CHECK-UNALIGNED-RV32-V-NEXT: vle8.v v16, (a1) +; CHECK-UNALIGNED-RV32-V-NEXT: vmsne.vv v24, v8, v16 +; CHECK-UNALIGNED-RV32-V-NEXT: vcpop.m a0, v24 ; CHECK-UNALIGNED-RV32-V-NEXT: snez a0, a0 -; CHECK-UNALIGNED-RV32-V-NEXT: lw ra, 92(sp) # 4-byte Folded Reload -; CHECK-UNALIGNED-RV32-V-NEXT: lw s0, 88(sp) # 4-byte Folded Reload -; CHECK-UNALIGNED-RV32-V-NEXT: lw s1, 84(sp) # 4-byte Folded Reload -; CHECK-UNALIGNED-RV32-V-NEXT: lw s2, 80(sp) # 4-byte Folded Reload -; CHECK-UNALIGNED-RV32-V-NEXT: lw s3, 76(sp) # 4-byte Folded Reload -; CHECK-UNALIGNED-RV32-V-NEXT: lw s4, 72(sp) # 4-byte Folded Reload -; CHECK-UNALIGNED-RV32-V-NEXT: lw s5, 68(sp) # 4-byte Folded Reload -; CHECK-UNALIGNED-RV32-V-NEXT: lw s6, 64(sp) # 4-byte Folded Reload -; CHECK-UNALIGNED-RV32-V-NEXT: lw s7, 60(sp) # 4-byte Folded Reload -; CHECK-UNALIGNED-RV32-V-NEXT: lw s8, 56(sp) # 4-byte Folded Reload -; CHECK-UNALIGNED-RV32-V-NEXT: lw s9, 52(sp) # 4-byte Folded Reload -; CHECK-UNALIGNED-RV32-V-NEXT: lw s10, 48(sp) # 4-byte Folded Reload -; CHECK-UNALIGNED-RV32-V-NEXT: lw s11, 44(sp) # 4-byte Folded Reload -; CHECK-UNALIGNED-RV32-V-NEXT: addi sp, sp, 96 ; CHECK-UNALIGNED-RV32-V-NEXT: ret ; ; CHECK-UNALIGNED-RV64-V-LABEL: bcmp_size_127: ; CHECK-UNALIGNED-RV64-V: # %bb.0: # %entry -; CHECK-UNALIGNED-RV64-V-NEXT: addi sp, sp, -96 -; CHECK-UNALIGNED-RV64-V-NEXT: sd s0, 88(sp) # 8-byte Folded Spill -; CHECK-UNALIGNED-RV64-V-NEXT: sd s1, 80(sp) # 8-byte Folded Spill -; CHECK-UNALIGNED-RV64-V-NEXT: sd s2, 72(sp) # 8-byte Folded Spill -; CHECK-UNALIGNED-RV64-V-NEXT: sd s3, 64(sp) # 8-byte Folded Spill -; CHECK-UNALIGNED-RV64-V-NEXT: sd s4, 56(sp) # 8-byte Folded Spill -; CHECK-UNALIGNED-RV64-V-NEXT: sd s5, 48(sp) # 8-byte Folded Spill -; CHECK-UNALIGNED-RV64-V-NEXT: sd s6, 40(sp) # 8-byte Folded Spill -; CHECK-UNALIGNED-RV64-V-NEXT: sd s7, 32(sp) # 8-byte Folded Spill -; CHECK-UNALIGNED-RV64-V-NEXT: sd s8, 24(sp) # 8-byte Folded Spill -; CHECK-UNALIGNED-RV64-V-NEXT: sd s9, 16(sp) # 8-byte Folded Spill -; CHECK-UNALIGNED-RV64-V-NEXT: sd s10, 8(sp) # 8-byte Folded Spill -; CHECK-UNALIGNED-RV64-V-NEXT: ld a2, 32(a0) -; CHECK-UNALIGNED-RV64-V-NEXT: ld a3, 40(a0) -; CHECK-UNALIGNED-RV64-V-NEXT: ld a4, 48(a0) -; CHECK-UNALIGNED-RV64-V-NEXT: ld a5, 56(a0) -; CHECK-UNALIGNED-RV64-V-NEXT: ld a6, 0(a0) -; CHECK-UNALIGNED-RV64-V-NEXT: ld a7, 8(a0) -; CHECK-UNALIGNED-RV64-V-NEXT: ld t0, 16(a0) -; CHECK-UNALIGNED-RV64-V-NEXT: ld t1, 24(a0) -; CHECK-UNALIGNED-RV64-V-NEXT: ld t2, 32(a1) -; CHECK-UNALIGNED-RV64-V-NEXT: ld t3, 40(a1) -; CHECK-UNALIGNED-RV64-V-NEXT: ld t4, 48(a1) -; CHECK-UNALIGNED-RV64-V-NEXT: ld t5, 56(a1) -; CHECK-UNALIGNED-RV64-V-NEXT: ld t6, 0(a1) -; CHECK-UNALIGNED-RV64-V-NEXT: ld s0, 8(a1) -; CHECK-UNALIGNED-RV64-V-NEXT: ld s1, 16(a1) -; CHECK-UNALIGNED-RV64-V-NEXT: ld s2, 24(a1) -; CHECK-UNALIGNED-RV64-V-NEXT: ld s3, 95(a0) -; CHECK-UNALIGNED-RV64-V-NEXT: ld s4, 103(a0) -; CHECK-UNALIGNED-RV64-V-NEXT: ld s5, 111(a0) -; CHECK-UNALIGNED-RV64-V-NEXT: ld s6, 119(a0) -; CHECK-UNALIGNED-RV64-V-NEXT: ld s7, 63(a0) -; CHECK-UNALIGNED-RV64-V-NEXT: ld s8, 71(a0) -; CHECK-UNALIGNED-RV64-V-NEXT: ld s9, 79(a0) -; CHECK-UNALIGNED-RV64-V-NEXT: ld a0, 87(a0) -; CHECK-UNALIGNED-RV64-V-NEXT: xor t1, t1, s2 -; CHECK-UNALIGNED-RV64-V-NEXT: xor a5, a5, t5 -; CHECK-UNALIGNED-RV64-V-NEXT: xor a7, a7, s0 -; CHECK-UNALIGNED-RV64-V-NEXT: ld t5, 63(a1) -; CHECK-UNALIGNED-RV64-V-NEXT: ld s0, 71(a1) -; CHECK-UNALIGNED-RV64-V-NEXT: ld s2, 79(a1) -; CHECK-UNALIGNED-RV64-V-NEXT: ld s10, 87(a1) -; CHECK-UNALIGNED-RV64-V-NEXT: xor a3, a3, t3 -; CHECK-UNALIGNED-RV64-V-NEXT: xor t0, t0, s1 -; CHECK-UNALIGNED-RV64-V-NEXT: xor a4, a4, t4 -; CHECK-UNALIGNED-RV64-V-NEXT: ld t3, 95(a1) -; CHECK-UNALIGNED-RV64-V-NEXT: ld t4, 103(a1) -; CHECK-UNALIGNED-RV64-V-NEXT: ld s1, 111(a1) -; CHECK-UNALIGNED-RV64-V-NEXT: ld a1, 119(a1) -; CHECK-UNALIGNED-RV64-V-NEXT: xor a6, a6, t6 -; CHECK-UNALIGNED-RV64-V-NEXT: xor a2, a2, t2 -; CHECK-UNALIGNED-RV64-V-NEXT: xor a0, a0, s10 -; CHECK-UNALIGNED-RV64-V-NEXT: xor a1, s6, a1 -; CHECK-UNALIGNED-RV64-V-NEXT: xor t2, s8, s0 -; CHECK-UNALIGNED-RV64-V-NEXT: xor t4, s4, t4 -; CHECK-UNALIGNED-RV64-V-NEXT: xor t6, s9, s2 -; CHECK-UNALIGNED-RV64-V-NEXT: xor s0, s5, s1 -; CHECK-UNALIGNED-RV64-V-NEXT: xor t5, s7, t5 -; CHECK-UNALIGNED-RV64-V-NEXT: xor t3, s3, t3 -; CHECK-UNALIGNED-RV64-V-NEXT: or a2, a2, t3 -; CHECK-UNALIGNED-RV64-V-NEXT: or a6, a6, t5 -; CHECK-UNALIGNED-RV64-V-NEXT: or a4, a4, s0 -; CHECK-UNALIGNED-RV64-V-NEXT: or t0, t0, t6 -; CHECK-UNALIGNED-RV64-V-NEXT: or a3, a3, t4 -; CHECK-UNALIGNED-RV64-V-NEXT: or a7, a7, t2 -; CHECK-UNALIGNED-RV64-V-NEXT: or a1, a5, a1 -; CHECK-UNALIGNED-RV64-V-NEXT: or a0, t1, a0 -; CHECK-UNALIGNED-RV64-V-NEXT: or a0, a0, a1 -; CHECK-UNALIGNED-RV64-V-NEXT: or a1, a7, a3 -; CHECK-UNALIGNED-RV64-V-NEXT: or a3, t0, a4 -; CHECK-UNALIGNED-RV64-V-NEXT: or a2, a6, a2 -; CHECK-UNALIGNED-RV64-V-NEXT: or a0, a1, a0 -; CHECK-UNALIGNED-RV64-V-NEXT: or a2, a2, a3 -; CHECK-UNALIGNED-RV64-V-NEXT: or a0, a2, a0 +; CHECK-UNALIGNED-RV64-V-NEXT: li a2, 127 +; CHECK-UNALIGNED-RV64-V-NEXT: vsetvli zero, a2, e8, m8, ta, ma +; CHECK-UNALIGNED-RV64-V-NEXT: vle8.v v8, (a0) +; CHECK-UNALIGNED-RV64-V-NEXT: vle8.v v16, (a1) +; CHECK-UNALIGNED-RV64-V-NEXT: vmsne.vv v24, v8, v16 +; CHECK-UNALIGNED-RV64-V-NEXT: vcpop.m a0, v24 ; CHECK-UNALIGNED-RV64-V-NEXT: snez a0, a0 -; CHECK-UNALIGNED-RV64-V-NEXT: ld s0, 88(sp) # 8-byte Folded Reload -; CHECK-UNALIGNED-RV64-V-NEXT: ld s1, 80(sp) # 8-byte Folded Reload -; CHECK-UNALIGNED-RV64-V-NEXT: ld s2, 72(sp) # 8-byte Folded Reload -; CHECK-UNALIGNED-RV64-V-NEXT: ld s3, 64(sp) # 8-byte Folded Reload -; CHECK-UNALIGNED-RV64-V-NEXT: ld s4, 56(sp) # 8-byte Folded Reload -; CHECK-UNALIGNED-RV64-V-NEXT: ld s5, 48(sp) # 8-byte Folded Reload -; CHECK-UNALIGNED-RV64-V-NEXT: ld s6, 40(sp) # 8-byte Folded Reload -; CHECK-UNALIGNED-RV64-V-NEXT: ld s7, 32(sp) # 8-byte Folded Reload -; CHECK-UNALIGNED-RV64-V-NEXT: ld s8, 24(sp) # 8-byte Folded Reload -; CHECK-UNALIGNED-RV64-V-NEXT: ld s9, 16(sp) # 8-byte Folded Reload -; CHECK-UNALIGNED-RV64-V-NEXT: ld s10, 8(sp) # 8-byte Folded Reload -; CHECK-UNALIGNED-RV64-V-NEXT: addi sp, sp, 96 ; CHECK-UNALIGNED-RV64-V-NEXT: ret entry: %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iXLen 127) -- cgit v1.2.3 From 59d6fbb8ffe03ceecfcc07ebe22e256c97ef70dd Mon Sep 17 00:00:00 2001 From: Kareem Ergawy Date: Wed, 18 Jun 2025 10:24:08 +0200 Subject: [flang][fir] Provide allocation block for `fir.local` when required (#144521) Extends `fir::FirOpBuilder::getAllocaBlock()` to support `fir.local`. This allows us to retrieve an allocation block when needed for `fir.local`. --- flang/lib/Optimizer/Builder/FIRBuilder.cpp | 3 +++ flang/test/HLFIR/fir-local-alloca-block.fir | 34 +++++++++++++++++++++++++++++ 2 files changed, 37 insertions(+) create mode 100644 flang/test/HLFIR/fir-local-alloca-block.fir diff --git a/flang/lib/Optimizer/Builder/FIRBuilder.cpp b/flang/lib/Optimizer/Builder/FIRBuilder.cpp index 584f3c8ee310..6ac87067f651 100644 --- a/flang/lib/Optimizer/Builder/FIRBuilder.cpp +++ b/flang/lib/Optimizer/Builder/FIRBuilder.cpp @@ -283,6 +283,9 @@ mlir::Block *fir::FirOpBuilder::getAllocaBlock() { if (auto doConcurentOp = getRegion().getParentOfType()) return doConcurentOp.getBody(); + if (auto firLocalOp = getRegion().getParentOfType()) + return &getRegion().front(); + return getEntryBlock(); } diff --git a/flang/test/HLFIR/fir-local-alloca-block.fir b/flang/test/HLFIR/fir-local-alloca-block.fir new file mode 100644 index 000000000000..9d76e86fec3d --- /dev/null +++ b/flang/test/HLFIR/fir-local-alloca-block.fir @@ -0,0 +1,34 @@ +// Tests that `fir.local` ops are able to provide an alloca block when required. + +// RUN: fir-opt %s -convert-hlfir-to-fir | FileCheck %s + +fir.local {type = local_init} @localizer : !fir.box> copy { +^bb0(%arg0: !fir.ref>>, %arg1: !fir.ref>>): + %0 = fir.load %arg0 : !fir.ref>> + hlfir.assign %0 to %arg1 : !fir.box>, !fir.ref>> + fir.yield(%arg1 : !fir.ref>>) +} + +func.func @foo() { + %c1 = arith.constant 1 : index + %0 = fir.alloca !fir.box> + fir.do_concurrent { + fir.do_concurrent.loop (%arg0) = (%c1) to (%c1) step (%c1) local(@localizer %0 -> %arg1 : !fir.ref>>) { + } + } + return +} + +// CHECK: fir.local {type = local_init} @localizer : ![[TYPE:fir.box>]] copy { +// CHECK: ^bb0(%[[VAL_0:.*]]: !fir.ref, %[[VAL_1:.*]]: !fir.ref): +// CHECK: %[[VAL_2:.*]] = fir.alloca ![[TYPE]] +// CHECK: %[[VAL_3:.*]] = fir.load %[[VAL_0]] : !fir.ref +// CHECK: %[[VAL_4:.*]] = arith.constant 0 : index +// CHECK: %[[VAL_5:.*]]:3 = fir.box_dims %[[VAL_3]], %[[VAL_4]] : (![[TYPE]], index) -> (index, index, index) +// CHECK: %[[VAL_6:.*]] = fir.load %[[VAL_1]] : !fir.ref +// CHECK: fir.store %[[VAL_6]] to %[[VAL_2]] : !fir.ref +// CHECK: %[[VAL_10:.*]] = fir.convert %[[VAL_2]] : (!fir.ref) -> !fir.ref> +// CHECK: %[[VAL_11:.*]] = fir.convert %[[VAL_3]] : (![[TYPE]]) -> !fir.box +// CHECK: fir.call @_FortranAAssign(%[[VAL_10]], %[[VAL_11]], %{{.*}}, %{{.*}}) +// CHECK: fir.yield(%[[VAL_1]] : !fir.ref) +// CHECK: } -- cgit v1.2.3 From 255b55c602f73964262893859a543a115b278e21 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Wed, 18 Jun 2025 01:35:56 -0700 Subject: [GlobalOpt] Use cast instead of dyn_cast. NFC (#144634) The dyn_cast was not checked for null, and the cast is guaranteed to succeed by an earlier check. --- llvm/lib/Transforms/IPO/GlobalOpt.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/lib/Transforms/IPO/GlobalOpt.cpp b/llvm/lib/Transforms/IPO/GlobalOpt.cpp index 7db058638650..4a06e0fa619c 100644 --- a/llvm/lib/Transforms/IPO/GlobalOpt.cpp +++ b/llvm/lib/Transforms/IPO/GlobalOpt.cpp @@ -2169,7 +2169,7 @@ static bool tryWidenGlobalArraysUsedByMemcpy( unsigned NumBytesToCopy = BytesToCopyOp->getZExtValue(); - auto *Alloca = dyn_cast(CI->getArgOperand(0)); + auto *Alloca = cast(CI->getArgOperand(0)); uint64_t DZSize = Alloca->getAllocatedType()->getArrayNumElements(); uint64_t SZSize = SourceDataArray->getType()->getNumElements(); unsigned ElementByteWidth = SourceDataArray->getElementByteSize(); -- cgit v1.2.3 From 7ea7ccd24d603ceec6eb5194d98911e6ab7c0717 Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Wed, 18 Jun 2025 10:50:17 +0200 Subject: [PowerPC][AIX] Specify pointer info and alignment for stack store (#144526) When lowering call arguments to stack, specify a stack MPI, as well as the stack alignment, instead of using the defaults (which would be an unknown location with ABI alignment). I believe the asm diffs are just changes in scheduling. --- llvm/lib/Target/PowerPC/PPCISelLowering.cpp | 4 +- llvm/test/CodeGen/PowerPC/aix-cc-abi-mir.ll | 366 +++++++++++---------- llvm/test/CodeGen/PowerPC/aix-cc-abi.ll | 176 +++++----- llvm/test/CodeGen/PowerPC/aix-cc-byval-mir.ll | 4 +- .../test/CodeGen/PowerPC/aix-vec-arg-spills-mir.ll | 34 +- llvm/test/CodeGen/PowerPC/aix-vec-arg-spills.ll | 38 +-- .../CodeGen/PowerPC/aix-vector-vararg-caller.ll | 26 +- .../PowerPC/aix-vector-vararg-fixed-caller.ll | 8 +- 8 files changed, 341 insertions(+), 315 deletions(-) diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp index 0f8e5e57c58b..f502d8570425 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -7767,7 +7767,9 @@ SDValue PPCTargetLowering::LowerCall_AIX( DAG.getConstant(VA.getLocMemOffset(), dl, StackPtr.getValueType()); PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff); MemOpChains.push_back( - DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo())); + DAG.getStore(Chain, dl, Arg, PtrOff, + MachinePointerInfo::getStack(MF, VA.getLocMemOffset()), + Subtarget.getFrameLowering()->getStackAlign())); continue; } diff --git a/llvm/test/CodeGen/PowerPC/aix-cc-abi-mir.ll b/llvm/test/CodeGen/PowerPC/aix-cc-abi-mir.ll index aead5762d092..9ffb4fd5eae4 100644 --- a/llvm/test/CodeGen/PowerPC/aix-cc-abi-mir.ll +++ b/llvm/test/CodeGen/PowerPC/aix-cc-abi-mir.ll @@ -474,14 +474,14 @@ define void @call_test_fpr_max() { ; 32BIT-NEXT: renamable $r3 = LWZtoc @d1, $r2 :: (load (s32) from got) ; 32BIT-NEXT: renamable $f1 = LFD 0, killed renamable $r3 :: (dereferenceable load (s64) from @d1) ; 32BIT-NEXT: ADJCALLSTACKDOWN 128, 0, implicit-def dead $r1, implicit $r1 - ; 32BIT-NEXT: STFD renamable $f1, 120, $r1 :: (store (s64)) - ; 32BIT-NEXT: STFD renamable $f1, 112, $r1 :: (store (s64)) - ; 32BIT-NEXT: STFD renamable $f1, 104, $r1 :: (store (s64)) - ; 32BIT-NEXT: STFD renamable $f1, 96, $r1 :: (store (s64)) - ; 32BIT-NEXT: STFD renamable $f1, 88, $r1 :: (store (s64)) - ; 32BIT-NEXT: STFD renamable $f1, 80, $r1 :: (store (s64)) - ; 32BIT-NEXT: STFD renamable $f1, 72, $r1 :: (store (s64)) - ; 32BIT-NEXT: STFD renamable $f1, 64, $r1 :: (store (s64)) + ; 32BIT-NEXT: STFD renamable $f1, 120, $r1 :: (store (s64) into stack + 120, basealign 16) + ; 32BIT-NEXT: STFD renamable $f1, 112, $r1 :: (store (s64) into stack + 112, align 16) + ; 32BIT-NEXT: STFD renamable $f1, 104, $r1 :: (store (s64) into stack + 104, basealign 16) + ; 32BIT-NEXT: STFD renamable $f1, 96, $r1 :: (store (s64) into stack + 96, align 16) + ; 32BIT-NEXT: STFD renamable $f1, 88, $r1 :: (store (s64) into stack + 88, basealign 16) + ; 32BIT-NEXT: STFD renamable $f1, 80, $r1 :: (store (s64) into stack + 80, align 16) + ; 32BIT-NEXT: STFD renamable $f1, 72, $r1 :: (store (s64) into stack + 72, basealign 16) + ; 32BIT-NEXT: STFD renamable $f1, 64, $r1 :: (store (s64) into stack + 64, align 16) ; 32BIT-NEXT: $f2 = COPY renamable $f1 ; 32BIT-NEXT: $f3 = COPY renamable $f1 ; 32BIT-NEXT: $f4 = COPY renamable $f1 @@ -494,7 +494,7 @@ define void @call_test_fpr_max() { ; 32BIT-NEXT: $f11 = COPY renamable $f1 ; 32BIT-NEXT: $f12 = COPY renamable $f1 ; 32BIT-NEXT: $f13 = COPY renamable $f1 - ; 32BIT-NEXT: STFD renamable $f1, 56, $r1 :: (store (s64)) + ; 32BIT-NEXT: STFD renamable $f1, 56, $r1 :: (store (s64) into stack + 56, basealign 16) ; 32BIT-NEXT: BL_NOP , csr_aix32, implicit-def dead $lr, implicit $rm, implicit $f1, implicit killed $f2, implicit killed $f3, implicit killed $f4, implicit killed $f5, implicit killed $f6, implicit killed $f7, implicit killed $f8, implicit killed $f9, implicit killed $f10, implicit killed $f11, implicit killed $f12, implicit killed $f13, implicit $r2, implicit-def $r1, implicit-def dead $f1 ; 32BIT-NEXT: ADJCALLSTACKUP 128, 0, implicit-def dead $r1, implicit $r1 ; 32BIT-NEXT: BLR implicit $lr, implicit $rm @@ -504,10 +504,10 @@ define void @call_test_fpr_max() { ; 64BIT-NEXT: renamable $x3 = LDtoc @d1, $x2 :: (load (s64) from got) ; 64BIT-NEXT: renamable $f1 = LFD 0, killed renamable $x3 :: (dereferenceable load (s64) from @d1) ; 64BIT-NEXT: ADJCALLSTACKDOWN 152, 0, implicit-def dead $r1, implicit $r1 - ; 64BIT-NEXT: STFD renamable $f1, 144, $x1 :: (store (s64)) - ; 64BIT-NEXT: STFD renamable $f1, 136, $x1 :: (store (s64)) - ; 64BIT-NEXT: STFD renamable $f1, 128, $x1 :: (store (s64)) - ; 64BIT-NEXT: STFD renamable $f1, 120, $x1 :: (store (s64)) + ; 64BIT-NEXT: STFD renamable $f1, 144, $x1 :: (store (s64) into stack + 144, align 16) + ; 64BIT-NEXT: STFD renamable $f1, 136, $x1 :: (store (s64) into stack + 136, basealign 16) + ; 64BIT-NEXT: STFD renamable $f1, 128, $x1 :: (store (s64) into stack + 128, align 16) + ; 64BIT-NEXT: STFD renamable $f1, 120, $x1 :: (store (s64) into stack + 120, basealign 16) ; 64BIT-NEXT: $f2 = COPY renamable $f1 ; 64BIT-NEXT: $f3 = COPY renamable $f1 ; 64BIT-NEXT: $f4 = COPY renamable $f1 @@ -520,7 +520,7 @@ define void @call_test_fpr_max() { ; 64BIT-NEXT: $f11 = COPY renamable $f1 ; 64BIT-NEXT: $f12 = COPY renamable $f1 ; 64BIT-NEXT: $f13 = COPY renamable $f1 - ; 64BIT-NEXT: STFD renamable $f1, 112, $x1 :: (store (s64)) + ; 64BIT-NEXT: STFD renamable $f1, 112, $x1 :: (store (s64) into stack + 112, align 16) ; 64BIT-NEXT: BL8_NOP , csr_ppc64, implicit-def dead $lr8, implicit $rm, implicit $f1, implicit killed $f2, implicit killed $f3, implicit killed $f4, implicit killed $f5, implicit killed $f6, implicit killed $f7, implicit killed $f8, implicit killed $f9, implicit killed $f10, implicit killed $f11, implicit killed $f12, implicit killed $f13, implicit $x2, implicit-def $r1, implicit-def dead $f1 ; 64BIT-NEXT: ADJCALLSTACKUP 152, 0, implicit-def dead $r1, implicit $r1 ; 64BIT-NEXT: BLR8 implicit $lr8, implicit $rm @@ -889,11 +889,11 @@ define void @call_test_stackarg_int() { ; 32BIT-NEXT: renamable $r6 = LWZ 0, renamable $r3 :: (dereferenceable load (s32) from @lli, align 8) ; 32BIT-NEXT: renamable $r3 = LWZ 4, killed renamable $r3 :: (dereferenceable load (s32) from @lli + 4, basealign 8) ; 32BIT-NEXT: ADJCALLSTACKDOWN 80, 0, implicit-def dead $r1, implicit $r1 - ; 32BIT-NEXT: STW renamable $r5, 76, $r1 :: (store (s32)) - ; 32BIT-NEXT: STW killed renamable $r3, 72, $r1 :: (store (s32)) - ; 32BIT-NEXT: STW killed renamable $r6, 68, $r1 :: (store (s32)) - ; 32BIT-NEXT: STW killed renamable $r5, 64, $r1 :: (store (s32)) - ; 32BIT-NEXT: STW killed renamable $r4, 60, $r1 :: (store (s32)) + ; 32BIT-NEXT: STW renamable $r5, 76, $r1 :: (store (s32) into stack + 76, basealign 16) + ; 32BIT-NEXT: STW killed renamable $r3, 72, $r1 :: (store (s32) into stack + 72, align 8, basealign 16) + ; 32BIT-NEXT: STW killed renamable $r6, 68, $r1 :: (store (s32) into stack + 68, basealign 16) + ; 32BIT-NEXT: STW killed renamable $r5, 64, $r1 :: (store (s32) into stack + 64, align 16) + ; 32BIT-NEXT: STW killed renamable $r4, 60, $r1 :: (store (s32) into stack + 60, basealign 16) ; 32BIT-NEXT: $r3 = LI 1 ; 32BIT-NEXT: $r4 = LI 2 ; 32BIT-NEXT: $r5 = LI 3 @@ -902,7 +902,7 @@ define void @call_test_stackarg_int() { ; 32BIT-NEXT: $r8 = LI 6 ; 32BIT-NEXT: $r9 = LI 7 ; 32BIT-NEXT: $r10 = LI 8 - ; 32BIT-NEXT: STW killed renamable $r11, 56, $r1 :: (store (s32)) + ; 32BIT-NEXT: STW killed renamable $r11, 56, $r1 :: (store (s32) into stack + 56, align 8, basealign 16) ; 32BIT-NEXT: BL_NOP , csr_aix32, implicit-def dead $lr, implicit $rm, implicit $r3, implicit $r4, implicit $r5, implicit $r6, implicit $r7, implicit $r8, implicit $r9, implicit $r10, implicit $r2, implicit-def $r1 ; 32BIT-NEXT: ADJCALLSTACKUP 80, 0, implicit-def dead $r1, implicit $r1 ; 32BIT-NEXT: BLR implicit $lr, implicit $rm @@ -926,11 +926,11 @@ define void @call_test_stackarg_int() { ; 64BIT-NEXT: $x8 = LI8 6 ; 64BIT-NEXT: $x9 = LI8 7 ; 64BIT-NEXT: $x10 = LI8 8 - ; 64BIT-NEXT: STD killed renamable $x31, 136, $x1 :: (store (s64)) - ; 64BIT-NEXT: STD renamable $x0, 144, $x1 :: (store (s64)) - ; 64BIT-NEXT: STD killed renamable $x0, 128, $x1 :: (store (s64)) - ; 64BIT-NEXT: STD killed renamable $x12, 120, $x1 :: (store (s64)) - ; 64BIT-NEXT: STD killed renamable $x11, 112, $x1 :: (store (s64)) + ; 64BIT-NEXT: STD killed renamable $x31, 136, $x1 :: (store (s64) into stack + 136, basealign 16) + ; 64BIT-NEXT: STD renamable $x0, 144, $x1 :: (store (s64) into stack + 144, align 16) + ; 64BIT-NEXT: STD killed renamable $x0, 128, $x1 :: (store (s64) into stack + 128, align 16) + ; 64BIT-NEXT: STD killed renamable $x12, 120, $x1 :: (store (s64) into stack + 120, basealign 16) + ; 64BIT-NEXT: STD killed renamable $x11, 112, $x1 :: (store (s64) into stack + 112, align 16) ; 64BIT-NEXT: BL8_NOP , csr_ppc64, implicit-def dead $lr8, implicit $rm, implicit $x3, implicit $x4, implicit $x5, implicit $x6, implicit $x7, implicit $x8, implicit $x9, implicit $x10, implicit $x2, implicit-def $r1 ; 64BIT-NEXT: ADJCALLSTACKUP 152, 0, implicit-def dead $r1, implicit $r1 ; 64BIT-NEXT: BLR8 implicit $lr8, implicit $rm @@ -956,7 +956,11 @@ define void @call_test_stackarg_float() { ; 32BIT-NEXT: renamable $f1 = LFS 0, killed renamable $r3 :: (dereferenceable load (s32) from @f) ; 32BIT-NEXT: renamable $f2 = LFD 0, killed renamable $r4 :: (dereferenceable load (s64) from @d) ; 32BIT-NEXT: ADJCALLSTACKDOWN 68, 0, implicit-def dead $r1, implicit $r1 - ; 32BIT-NEXT: STFD renamable $f2, 60, $r1 :: (store (s64)) + ; 32BIT-NEXT: STFD renamable $f2, 0, %stack.0 :: (store (s64) into %stack.0) + ; 32BIT-NEXT: STFS renamable $f1, 56, $r1 :: (store (s32) into stack + 56, align 8, basealign 16) + ; 32BIT-NEXT: renamable $r3 = LWZ 4, %stack.0 :: (load (s32) from %stack.0 + 4) + ; 32BIT-NEXT: STW killed renamable $r3, 64, $r1 :: (store (s32) into stack + 64, align 16) + ; 32BIT-NEXT: renamable $r11 = LWZ 0, %stack.0 :: (load (s32) from %stack.0, align 8) ; 32BIT-NEXT: $r3 = LI 1 ; 32BIT-NEXT: $r4 = LI 2 ; 32BIT-NEXT: $r5 = LI 3 @@ -965,8 +969,8 @@ define void @call_test_stackarg_float() { ; 32BIT-NEXT: $r8 = LI 6 ; 32BIT-NEXT: $r9 = LI 7 ; 32BIT-NEXT: $r10 = LI 8 - ; 32BIT-NEXT: STFS renamable $f1, 56, $r1 :: (store (s32)) - ; 32BIT-NEXT: BL_NOP , csr_aix32, implicit-def dead $lr, implicit $rm, implicit $r3, implicit $r4, implicit killed $r5, implicit killed $r6, implicit killed $r7, implicit killed $r8, implicit killed $r9, implicit killed $r10, implicit $f1, implicit $f2, implicit $r2, implicit-def $r1 + ; 32BIT-NEXT: STW killed renamable $r11, 60, $r1 :: (store (s32) into stack + 60, basealign 16) + ; 32BIT-NEXT: BL_NOP , csr_aix32, implicit-def dead $lr, implicit $rm, implicit $r3, implicit $r4, implicit $r5, implicit $r6, implicit $r7, implicit $r8, implicit $r9, implicit $r10, implicit $f1, implicit $f2, implicit $r2, implicit-def $r1 ; 32BIT-NEXT: ADJCALLSTACKUP 68, 0, implicit-def dead $r1, implicit $r1 ; 32BIT-NEXT: BLR implicit $lr, implicit $rm ; @@ -977,7 +981,7 @@ define void @call_test_stackarg_float() { ; 64BIT-NEXT: renamable $f1 = LFS 0, killed renamable $x3 :: (dereferenceable load (s32) from @f) ; 64BIT-NEXT: renamable $f2 = LFD 0, killed renamable $x4 :: (dereferenceable load (s64) from @d) ; 64BIT-NEXT: ADJCALLSTACKDOWN 128, 0, implicit-def dead $r1, implicit $r1 - ; 64BIT-NEXT: STFD renamable $f2, 120, $x1 :: (store (s64)) + ; 64BIT-NEXT: STFD renamable $f2, 120, $x1 :: (store (s64) into stack + 120, basealign 16) ; 64BIT-NEXT: $x3 = LI8 1 ; 64BIT-NEXT: $x4 = LI8 2 ; 64BIT-NEXT: $x5 = LI8 3 @@ -986,7 +990,7 @@ define void @call_test_stackarg_float() { ; 64BIT-NEXT: $x8 = LI8 6 ; 64BIT-NEXT: $x9 = LI8 7 ; 64BIT-NEXT: $x10 = LI8 8 - ; 64BIT-NEXT: STFS renamable $f1, 112, $x1 :: (store (s32)) + ; 64BIT-NEXT: STFS renamable $f1, 112, $x1 :: (store (s32) into stack + 112, align 16) ; 64BIT-NEXT: BL8_NOP , csr_ppc64, implicit-def dead $lr8, implicit $rm, implicit $x3, implicit $x4, implicit killed $x5, implicit killed $x6, implicit killed $x7, implicit killed $x8, implicit killed $x9, implicit killed $x10, implicit $f1, implicit $f2, implicit $x2, implicit-def $r1 ; 64BIT-NEXT: ADJCALLSTACKUP 128, 0, implicit-def dead $r1, implicit $r1 ; 64BIT-NEXT: BLR8 implicit $lr8, implicit $rm @@ -1053,7 +1057,11 @@ define void @call_test_stackarg_float3() { ; 32BIT-NEXT: renamable $r10 = LWZ 0, %stack.0 :: (load (s32) from %stack.0, align 8) ; 32BIT-NEXT: renamable $f2 = LFS 0, killed renamable $r3 :: (dereferenceable load (s32) from @f) ; 32BIT-NEXT: ADJCALLSTACKDOWN 64, 0, implicit-def dead $r1, implicit $r1 - ; 32BIT-NEXT: STFS renamable $f2, 60, $r1 :: (store (s32)) + ; 32BIT-NEXT: STFD renamable $f1, 0, %stack.1 :: (store (s64) into %stack.1) + ; 32BIT-NEXT: STFS renamable $f2, 60, $r1 :: (store (s32) into stack + 60, basealign 16) + ; 32BIT-NEXT: renamable $r3 = LWZ 4, %stack.1 :: (load (s32) from %stack.1 + 4) + ; 32BIT-NEXT: STW killed renamable $r3, 56, $r1 :: (store (s32) into stack + 56, align 8, basealign 16) + ; 32BIT-NEXT: renamable $r11 = LWZ 0, %stack.1 :: (load (s32) from %stack.1, align 8) ; 32BIT-NEXT: $r3 = LI 1 ; 32BIT-NEXT: $r4 = LI 2 ; 32BIT-NEXT: $r5 = LI 3 @@ -1061,8 +1069,8 @@ define void @call_test_stackarg_float3() { ; 32BIT-NEXT: $r7 = LI 5 ; 32BIT-NEXT: $r8 = LI 6 ; 32BIT-NEXT: $r9 = LI 7 - ; 32BIT-NEXT: STFD renamable $f1, 52, $r1 :: (store (s64)) - ; 32BIT-NEXT: BL_NOP , csr_aix32, implicit-def dead $lr, implicit $rm, implicit $r3, implicit killed $r4, implicit killed $r5, implicit killed $r6, implicit killed $r7, implicit killed $r8, implicit killed $r9, implicit $f1, implicit $r10, implicit $f2, implicit $r2, implicit-def $r1 + ; 32BIT-NEXT: STW killed renamable $r11, 52, $r1 :: (store (s32) into stack + 52, basealign 16) + ; 32BIT-NEXT: BL_NOP , csr_aix32, implicit-def dead $lr, implicit $rm, implicit $r3, implicit $r4, implicit $r5, implicit $r6, implicit $r7, implicit $r8, implicit $r9, implicit $f1, implicit $r10, implicit $f2, implicit $r2, implicit-def $r1 ; 32BIT-NEXT: ADJCALLSTACKUP 64, 0, implicit-def dead $r1, implicit $r1 ; 32BIT-NEXT: BLR implicit $lr, implicit $rm ; @@ -1082,7 +1090,7 @@ define void @call_test_stackarg_float3() { ; 64BIT-NEXT: $x7 = LI8 5 ; 64BIT-NEXT: $x8 = LI8 6 ; 64BIT-NEXT: $x9 = LI8 7 - ; 64BIT-NEXT: STFS renamable $f2, 112, $x1 :: (store (s32)) + ; 64BIT-NEXT: STFS renamable $f2, 112, $x1 :: (store (s32) into stack + 112, align 16) ; 64BIT-NEXT: BL8_NOP , csr_ppc64, implicit-def dead $lr8, implicit $rm, implicit $x3, implicit killed $x4, implicit killed $x5, implicit killed $x6, implicit killed $x7, implicit killed $x8, implicit killed $x9, implicit $f1, implicit $x10, implicit $f2, implicit $x2, implicit-def $r1 ; 64BIT-NEXT: ADJCALLSTACKUP 120, 0, implicit-def dead $r1, implicit $r1 ; 64BIT-NEXT: BLR8 implicit $lr8, implicit $rm @@ -1225,15 +1233,15 @@ define void @caller_ints_stack() { ; 32BIT-NEXT: renamable $r9 = LBZ 0, killed renamable $r9 :: (dereferenceable load (s8) from @uc1) ; 32BIT-NEXT: renamable $r12 = LWZ 0, killed renamable $r12 :: (dereferenceable load (s32) from @i1) ; 32BIT-NEXT: ADJCALLSTACKDOWN 96, 0, implicit-def dead $r1, implicit $r1 - ; 32BIT-NEXT: STW killed renamable $r12, 92, $r1 :: (store (s32)) - ; 32BIT-NEXT: STW killed renamable $r9, 88, $r1 :: (store (s32)) - ; 32BIT-NEXT: STW killed renamable $r8, 84, $r1 :: (store (s32)) - ; 32BIT-NEXT: STW killed renamable $r10, 80, $r1 :: (store (s32)) - ; 32BIT-NEXT: STW killed renamable $r7, 76, $r1 :: (store (s32)) - ; 32BIT-NEXT: STW killed renamable $r6, 72, $r1 :: (store (s32)) - ; 32BIT-NEXT: STW killed renamable $r5, 68, $r1 :: (store (s32)) - ; 32BIT-NEXT: STW killed renamable $r4, 64, $r1 :: (store (s32)) - ; 32BIT-NEXT: STW killed renamable $r3, 60, $r1 :: (store (s32)) + ; 32BIT-NEXT: STW killed renamable $r12, 92, $r1 :: (store (s32) into stack + 92, basealign 16) + ; 32BIT-NEXT: STW killed renamable $r9, 88, $r1 :: (store (s32) into stack + 88, align 8, basealign 16) + ; 32BIT-NEXT: STW killed renamable $r8, 84, $r1 :: (store (s32) into stack + 84, basealign 16) + ; 32BIT-NEXT: STW killed renamable $r10, 80, $r1 :: (store (s32) into stack + 80, align 16) + ; 32BIT-NEXT: STW killed renamable $r7, 76, $r1 :: (store (s32) into stack + 76, basealign 16) + ; 32BIT-NEXT: STW killed renamable $r6, 72, $r1 :: (store (s32) into stack + 72, align 8, basealign 16) + ; 32BIT-NEXT: STW killed renamable $r5, 68, $r1 :: (store (s32) into stack + 68, basealign 16) + ; 32BIT-NEXT: STW killed renamable $r4, 64, $r1 :: (store (s32) into stack + 64, align 16) + ; 32BIT-NEXT: STW killed renamable $r3, 60, $r1 :: (store (s32) into stack + 60, basealign 16) ; 32BIT-NEXT: $r3 = LI 1 ; 32BIT-NEXT: $r4 = LI 2 ; 32BIT-NEXT: $r5 = LI 3 @@ -1242,7 +1250,7 @@ define void @caller_ints_stack() { ; 32BIT-NEXT: $r8 = LI 6 ; 32BIT-NEXT: $r9 = LI 7 ; 32BIT-NEXT: $r10 = LI 8 - ; 32BIT-NEXT: STW killed renamable $r11, 56, $r1 :: (store (s32)) + ; 32BIT-NEXT: STW killed renamable $r11, 56, $r1 :: (store (s32) into stack + 56, align 8, basealign 16) ; 32BIT-NEXT: BL_NOP , csr_aix32, implicit-def dead $lr, implicit $rm, implicit $r3, implicit $r4, implicit $r5, implicit $r6, implicit $r7, implicit $r8, implicit $r9, implicit $r10, implicit $r2, implicit-def $r1, implicit-def dead $r3, implicit-def dead $r4 ; 32BIT-NEXT: ADJCALLSTACKUP 96, 0, implicit-def dead $r1, implicit $r1 ; 32BIT-NEXT: BLR implicit $lr, implicit $rm @@ -1274,14 +1282,14 @@ define void @caller_ints_stack() { ; 64BIT-NEXT: $x8 = LI8 6 ; 64BIT-NEXT: $x9 = LI8 7 ; 64BIT-NEXT: $x10 = LI8 8 - ; 64BIT-NEXT: STD killed renamable $x27, 168, $x1 :: (store (s64)) - ; 64BIT-NEXT: STD killed renamable $x30, 160, $x1 :: (store (s64)) - ; 64BIT-NEXT: STD killed renamable $x28, 152, $x1 :: (store (s64)) - ; 64BIT-NEXT: STD killed renamable $x31, 144, $x1 :: (store (s64)) - ; 64BIT-NEXT: STD killed renamable $x0, 136, $x1 :: (store (s64)) - ; 64BIT-NEXT: STD killed renamable $x12, 128, $x1 :: (store (s64)) - ; 64BIT-NEXT: STD killed renamable $x11, 120, $x1 :: (store (s64)) - ; 64BIT-NEXT: STD killed renamable $x29, 112, $x1 :: (store (s64)) + ; 64BIT-NEXT: STD killed renamable $x27, 168, $x1 :: (store (s64) into stack + 168, basealign 16) + ; 64BIT-NEXT: STD killed renamable $x30, 160, $x1 :: (store (s64) into stack + 160, align 16) + ; 64BIT-NEXT: STD killed renamable $x28, 152, $x1 :: (store (s64) into stack + 152, basealign 16) + ; 64BIT-NEXT: STD killed renamable $x31, 144, $x1 :: (store (s64) into stack + 144, align 16) + ; 64BIT-NEXT: STD killed renamable $x0, 136, $x1 :: (store (s64) into stack + 136, basealign 16) + ; 64BIT-NEXT: STD killed renamable $x12, 128, $x1 :: (store (s64) into stack + 128, align 16) + ; 64BIT-NEXT: STD killed renamable $x11, 120, $x1 :: (store (s64) into stack + 120, basealign 16) + ; 64BIT-NEXT: STD killed renamable $x29, 112, $x1 :: (store (s64) into stack + 112, align 16) ; 64BIT-NEXT: BL8_NOP , csr_ppc64, implicit-def dead $lr8, implicit $rm, implicit $x3, implicit $x4, implicit $x5, implicit $x6, implicit $x7, implicit $x8, implicit $x9, implicit $x10, implicit $x2, implicit-def $r1, implicit-def dead $x3 ; 64BIT-NEXT: ADJCALLSTACKUP 176, 0, implicit-def dead $r1, implicit $r1 ; 64BIT-NEXT: BLR8 implicit $lr8, implicit $rm @@ -1333,7 +1341,7 @@ define void @call_test_i1_stack() { ; 32BIT-NEXT: $r8 = LI 6 ; 32BIT-NEXT: $r9 = LI 7 ; 32BIT-NEXT: $r10 = LI 8 - ; 32BIT-NEXT: STW killed renamable $r11, 56, $r1 :: (store (s32)) + ; 32BIT-NEXT: STW killed renamable $r11, 56, $r1 :: (store (s32) into stack + 56, align 8, basealign 16) ; 32BIT-NEXT: BL_NOP , csr_aix32, implicit-def dead $lr, implicit $rm, implicit $r3, implicit $r4, implicit $r5, implicit $r6, implicit $r7, implicit $r8, implicit $r9, implicit $r10, implicit $r2, implicit-def $r1 ; 32BIT-NEXT: ADJCALLSTACKUP 60, 0, implicit-def dead $r1, implicit $r1 ; 32BIT-NEXT: BLR implicit $lr, implicit $rm @@ -1350,7 +1358,7 @@ define void @call_test_i1_stack() { ; 64BIT-NEXT: $x8 = LI8 6 ; 64BIT-NEXT: $x9 = LI8 7 ; 64BIT-NEXT: $x10 = LI8 8 - ; 64BIT-NEXT: STD killed renamable $x11, 112, $x1 :: (store (s64)) + ; 64BIT-NEXT: STD killed renamable $x11, 112, $x1 :: (store (s64) into stack + 112, align 16) ; 64BIT-NEXT: BL8_NOP , csr_ppc64, implicit-def dead $lr8, implicit $rm, implicit $x3, implicit $x4, implicit $x5, implicit $x6, implicit $x7, implicit $x8, implicit $x9, implicit $x10, implicit $x2, implicit-def $r1 ; 64BIT-NEXT: ADJCALLSTACKUP 120, 0, implicit-def dead $r1, implicit $r1 ; 64BIT-NEXT: BLR8 implicit $lr8, implicit $rm @@ -1441,88 +1449,92 @@ define void @caller_fpr_stack() { ; 32BIT-NEXT: renamable $r3 = LWZtoc @d15, $r2 :: (load (s32) from got) ; 32BIT-NEXT: renamable $r4 = LWZtoc @f14, $r2 :: (load (s32) from got) ; 32BIT-NEXT: renamable $f0 = LFD 0, killed renamable $r3 :: (dereferenceable load (s64) from @d15) - ; 32BIT-NEXT: renamable $r5 = LWZtoc @f16, $r2 :: (load (s32) from got) - ; 32BIT-NEXT: renamable $r3 = LWZ 0, killed renamable $r4 :: (dereferenceable load (s32) from @f14) - ; 32BIT-NEXT: renamable $r4 = LWZ 0, killed renamable $r5 :: (dereferenceable load (s32) from @f16) + ; 32BIT-NEXT: renamable $r3 = LWZtoc @f16, $r2 :: (load (s32) from got) + ; 32BIT-NEXT: renamable $r4 = LWZ 0, killed renamable $r4 :: (dereferenceable load (s32) from @f14) + ; 32BIT-NEXT: renamable $r3 = LWZ 0, killed renamable $r3 :: (dereferenceable load (s32) from @f16) ; 32BIT-NEXT: ADJCALLSTACKDOWN 144, 0, implicit-def dead $r1, implicit $r1 + ; 32BIT-NEXT: STFD killed renamable $f0, 0, %stack.0 :: (store (s64) into %stack.0) ; 32BIT-NEXT: renamable $r5 = LI 0 ; 32BIT-NEXT: renamable $r6 = LIS 16352 - ; 32BIT-NEXT: STW killed renamable $r5, 60, $r1 :: (store (s32) into unknown-address + 4, basealign 8) - ; 32BIT-NEXT: renamable $r5 = LIS 13107 - ; 32BIT-NEXT: STW killed renamable $r6, 56, $r1 :: (store (s32), align 8) - ; 32BIT-NEXT: renamable $r6 = LIS 16355 - ; 32BIT-NEXT: renamable $r5 = ORI killed renamable $r5, 13107 - ; 32BIT-NEXT: STW killed renamable $r5, 68, $r1 :: (store (s32) into unknown-address + 4, basealign 8) + ; 32BIT-NEXT: STW killed renamable $r3, 140, $r1 :: (store (s32) into stack + 140, basealign 16) + ; 32BIT-NEXT: renamable $r3 = LIS 13107 + ; 32BIT-NEXT: STW killed renamable $r4, 128, $r1 :: (store (s32) into stack + 128, align 16) + ; 32BIT-NEXT: renamable $r4 = LIS 16355 + ; 32BIT-NEXT: STW killed renamable $r5, 60, $r1 :: (store (s32) into stack + 60, basealign 16) ; 32BIT-NEXT: renamable $r5 = LIS 26214 - ; 32BIT-NEXT: renamable $r6 = ORI killed renamable $r6, 13107 - ; 32BIT-NEXT: STW killed renamable $r6, 64, $r1 :: (store (s32), align 8) + ; 32BIT-NEXT: STW killed renamable $r6, 56, $r1 :: (store (s32) into stack + 56, align 8, basealign 16) ; 32BIT-NEXT: renamable $r6 = LIS 16358 + ; 32BIT-NEXT: renamable $r3 = ORI killed renamable $r3, 13107 + ; 32BIT-NEXT: STW killed renamable $r3, 68, $r1 :: (store (s32) into stack + 68, basealign 16) + ; 32BIT-NEXT: renamable $r3 = LIS 39321 + ; 32BIT-NEXT: renamable $r4 = ORI killed renamable $r4, 13107 + ; 32BIT-NEXT: STW killed renamable $r4, 64, $r1 :: (store (s32) into stack + 64, align 16) + ; 32BIT-NEXT: renamable $r4 = LIS 16361 ; 32BIT-NEXT: renamable $r5 = ORI killed renamable $r5, 26214 - ; 32BIT-NEXT: STW killed renamable $r5, 76, $r1 :: (store (s32) into unknown-address + 4, basealign 8) - ; 32BIT-NEXT: renamable $r5 = LIS 39321 + ; 32BIT-NEXT: STW killed renamable $r5, 76, $r1 :: (store (s32) into stack + 76, basealign 16) + ; 32BIT-NEXT: renamable $r5 = LIS 52428 ; 32BIT-NEXT: renamable $r6 = ORI killed renamable $r6, 26214 - ; 32BIT-NEXT: STW killed renamable $r6, 72, $r1 :: (store (s32), align 8) - ; 32BIT-NEXT: renamable $r6 = LIS 16361 - ; 32BIT-NEXT: renamable $r6 = ORI killed renamable $r6, 39321 - ; 32BIT-NEXT: STW killed renamable $r6, 80, $r1 :: (store (s32), align 8) - ; 32BIT-NEXT: renamable $r6 = LIS 52428 - ; 32BIT-NEXT: renamable $r6 = ORI killed renamable $r6, 52429 - ; 32BIT-NEXT: STW killed renamable $r6, 92, $r1 :: (store (s32) into unknown-address + 4, basealign 8) + ; 32BIT-NEXT: STW killed renamable $r6, 72, $r1 :: (store (s32) into stack + 72, align 8, basealign 16) ; 32BIT-NEXT: renamable $r6 = LIS 16364 - ; 32BIT-NEXT: renamable $r5 = ORI killed renamable $r5, 39322 - ; 32BIT-NEXT: STW renamable $r5, 84, $r1 :: (store (s32) into unknown-address + 4, basealign 8) - ; 32BIT-NEXT: renamable $r6 = ORI killed renamable $r6, 52428 - ; 32BIT-NEXT: STW killed renamable $r6, 88, $r1 :: (store (s32), align 8) - ; 32BIT-NEXT: renamable $r6 = LIS 16313 - ; 32BIT-NEXT: STW killed renamable $r5, 100, $r1 :: (store (s32) into unknown-address + 4, basealign 8) + ; 32BIT-NEXT: renamable $r4 = ORI killed renamable $r4, 39321 + ; 32BIT-NEXT: STW killed renamable $r4, 80, $r1 :: (store (s32) into stack + 80, align 16) + ; 32BIT-NEXT: renamable $r4 = LIS 16313 + ; 32BIT-NEXT: renamable $r5 = ORI killed renamable $r5, 52429 + ; 32BIT-NEXT: STW killed renamable $r5, 92, $r1 :: (store (s32) into stack + 92, basealign 16) ; 32BIT-NEXT: renamable $r5 = LIS 49807 - ; 32BIT-NEXT: renamable $r6 = ORI killed renamable $r6, 39321 - ; 32BIT-NEXT: STW killed renamable $r6, 96, $r1 :: (store (s32), align 8) + ; 32BIT-NEXT: renamable $r3 = ORI killed renamable $r3, 39322 + ; 32BIT-NEXT: STW renamable $r3, 84, $r1 :: (store (s32) into stack + 84, basealign 16) + ; 32BIT-NEXT: renamable $r6 = ORI killed renamable $r6, 52428 + ; 32BIT-NEXT: STW killed renamable $r6, 88, $r1 :: (store (s32) into stack + 88, align 8, basealign 16) ; 32BIT-NEXT: renamable $r6 = LIS 16316 + ; 32BIT-NEXT: STW killed renamable $r3, 100, $r1 :: (store (s32) into stack + 100, basealign 16) + ; 32BIT-NEXT: renamable $r3 = LIS 60293 + ; 32BIT-NEXT: renamable $r4 = ORI killed renamable $r4, 39321 + ; 32BIT-NEXT: STW killed renamable $r4, 96, $r1 :: (store (s32) into stack + 96, align 16) + ; 32BIT-NEXT: renamable $r4 = LIS 16318 ; 32BIT-NEXT: renamable $r5 = ORI killed renamable $r5, 23593 - ; 32BIT-NEXT: STW killed renamable $r5, 108, $r1 :: (store (s32) into unknown-address + 4, basealign 8) - ; 32BIT-NEXT: renamable $r5 = LIS 60293 - ; 32BIT-NEXT: renamable $r6 = ORI killed renamable $r6, 10485 - ; 32BIT-NEXT: STW killed renamable $r6, 104, $r1 :: (store (s32), align 8) - ; 32BIT-NEXT: renamable $r6 = LIS 16318 - ; 32BIT-NEXT: renamable $r5 = ORI killed renamable $r5, 7864 - ; 32BIT-NEXT: STW killed renamable $r5, 116, $r1 :: (store (s32) into unknown-address + 4, basealign 8) + ; 32BIT-NEXT: STW killed renamable $r5, 108, $r1 :: (store (s32) into stack + 108, basealign 16) ; 32BIT-NEXT: renamable $r5 = LIS 2621 - ; 32BIT-NEXT: renamable $r6 = ORI killed renamable $r6, 47185 - ; 32BIT-NEXT: STW killed renamable $r6, 112, $r1 :: (store (s32), align 8) + ; 32BIT-NEXT: renamable $r6 = ORI killed renamable $r6, 10485 + ; 32BIT-NEXT: STW killed renamable $r6, 104, $r1 :: (store (s32) into stack + 104, align 8, basealign 16) ; 32BIT-NEXT: renamable $r6 = LIS 16320 - ; 32BIT-NEXT: renamable $r5 = ORI killed renamable $r5, 28836 - ; 32BIT-NEXT: STW killed renamable $r5, 124, $r1 :: (store (s32) into unknown-address + 4, basealign 8) - ; 32BIT-NEXT: renamable $r5 = LWZtoc %const.0, $r2 :: (load (s32) from got) - ; 32BIT-NEXT: renamable $r6 = ORI killed renamable $r6, 41943 - ; 32BIT-NEXT: STW killed renamable $r6, 120, $r1 :: (store (s32), align 8) - ; 32BIT-NEXT: renamable $r6 = LWZtoc %const.1, $r2 :: (load (s32) from got) - ; 32BIT-NEXT: renamable $f2 = LFD 0, killed renamable $r5 :: (load (s64) from constant-pool) - ; 32BIT-NEXT: renamable $r5 = LWZtoc %const.2, $r2 :: (load (s32) from got) - ; 32BIT-NEXT: renamable $f3 = LFD 0, killed renamable $r6 :: (load (s64) from constant-pool) - ; 32BIT-NEXT: renamable $r6 = LWZtoc %const.3, $r2 :: (load (s32) from got) - ; 32BIT-NEXT: renamable $f4 = LFD 0, killed renamable $r5 :: (load (s64) from constant-pool) - ; 32BIT-NEXT: renamable $r5 = LWZtoc %const.4, $r2 :: (load (s32) from got) - ; 32BIT-NEXT: renamable $f6 = LFD 0, killed renamable $r6 :: (load (s64) from constant-pool) + ; 32BIT-NEXT: renamable $r3 = ORI killed renamable $r3, 7864 + ; 32BIT-NEXT: STW killed renamable $r3, 116, $r1 :: (store (s32) into stack + 116, basealign 16) + ; 32BIT-NEXT: renamable $r3 = LWZtoc %const.0, $r2 :: (load (s32) from got) + ; 32BIT-NEXT: renamable $r4 = ORI killed renamable $r4, 47185 + ; 32BIT-NEXT: STW killed renamable $r4, 112, $r1 :: (store (s32) into stack + 112, align 16) + ; 32BIT-NEXT: renamable $r4 = ORI killed renamable $r5, 28836 + ; 32BIT-NEXT: STW killed renamable $r4, 124, $r1 :: (store (s32) into stack + 124, basealign 16) + ; 32BIT-NEXT: renamable $r4 = ORI killed renamable $r6, 41943 + ; 32BIT-NEXT: STW killed renamable $r4, 120, $r1 :: (store (s32) into stack + 120, align 8, basealign 16) + ; 32BIT-NEXT: renamable $r4 = LWZtoc %const.1, $r2 :: (load (s32) from got) + ; 32BIT-NEXT: renamable $r5 = LWZ 4, %stack.0 :: (load (s32) from %stack.0 + 4) + ; 32BIT-NEXT: renamable $r6 = LWZtoc %const.2, $r2 :: (load (s32) from got) + ; 32BIT-NEXT: renamable $f2 = LFD 0, killed renamable $r3 :: (load (s64) from constant-pool) + ; 32BIT-NEXT: renamable $r3 = LWZtoc %const.3, $r2 :: (load (s32) from got) + ; 32BIT-NEXT: renamable $f3 = LFD 0, killed renamable $r4 :: (load (s64) from constant-pool) + ; 32BIT-NEXT: renamable $r4 = LWZtoc %const.4, $r2 :: (load (s32) from got) + ; 32BIT-NEXT: renamable $f4 = LFD 0, killed renamable $r6 :: (load (s64) from constant-pool) ; 32BIT-NEXT: renamable $r6 = LWZtoc %const.5, $r2 :: (load (s32) from got) - ; 32BIT-NEXT: renamable $f7 = LFD 0, killed renamable $r5 :: (load (s64) from constant-pool) - ; 32BIT-NEXT: renamable $r5 = LWZtoc %const.6, $r2 :: (load (s32) from got) + ; 32BIT-NEXT: renamable $f6 = LFD 0, killed renamable $r3 :: (load (s64) from constant-pool) + ; 32BIT-NEXT: renamable $r3 = LWZtoc %const.6, $r2 :: (load (s32) from got) + ; 32BIT-NEXT: renamable $f7 = LFD 0, killed renamable $r4 :: (load (s64) from constant-pool) + ; 32BIT-NEXT: renamable $r4 = LWZtoc %const.7, $r2 :: (load (s32) from got) ; 32BIT-NEXT: renamable $f8 = LFD 0, killed renamable $r6 :: (load (s64) from constant-pool) - ; 32BIT-NEXT: renamable $r6 = LWZtoc %const.7, $r2 :: (load (s32) from got) - ; 32BIT-NEXT: renamable $f9 = LFD 0, killed renamable $r5 :: (load (s64) from constant-pool) - ; 32BIT-NEXT: renamable $r5 = LWZtoc %const.8, $r2 :: (load (s32) from got) - ; 32BIT-NEXT: renamable $f1 = LFD 0, killed renamable $r6 :: (load (s64) from constant-pool) - ; 32BIT-NEXT: renamable $r6 = LWZtoc %const.9, $r2 :: (load (s32) from got) - ; 32BIT-NEXT: renamable $f11 = LFD 0, killed renamable $r5 :: (load (s64) from constant-pool) - ; 32BIT-NEXT: renamable $r5 = LWZtoc %const.10, $r2 :: (load (s32) from got) - ; 32BIT-NEXT: renamable $f12 = LFD 0, killed renamable $r6 :: (load (s64) from constant-pool) + ; 32BIT-NEXT: renamable $r6 = LWZtoc %const.8, $r2 :: (load (s32) from got) + ; 32BIT-NEXT: renamable $f9 = LFD 0, killed renamable $r3 :: (load (s64) from constant-pool) + ; 32BIT-NEXT: renamable $r3 = LWZtoc %const.9, $r2 :: (load (s32) from got) + ; 32BIT-NEXT: renamable $f1 = LFD 0, killed renamable $r4 :: (load (s64) from constant-pool) + ; 32BIT-NEXT: renamable $r4 = LWZtoc %const.10, $r2 :: (load (s32) from got) + ; 32BIT-NEXT: renamable $f11 = LFD 0, killed renamable $r6 :: (load (s64) from constant-pool) ; 32BIT-NEXT: renamable $r6 = LWZtoc %const.11, $r2 :: (load (s32) from got) - ; 32BIT-NEXT: renamable $f13 = LFD 0, killed renamable $r5 :: (load (s64) from constant-pool) + ; 32BIT-NEXT: renamable $f12 = LFD 0, killed renamable $r3 :: (load (s64) from constant-pool) + ; 32BIT-NEXT: renamable $f13 = LFD 0, killed renamable $r4 :: (load (s64) from constant-pool) ; 32BIT-NEXT: renamable $f5 = LFS 0, killed renamable $r6 :: (load (s32) from constant-pool) - ; 32BIT-NEXT: STW killed renamable $r4, 140, $r1 :: (store (s32)) - ; 32BIT-NEXT: STFD killed renamable $f0, 132, $r1 :: (store (s64)) + ; 32BIT-NEXT: STW killed renamable $r5, 136, $r1 :: (store (s32) into stack + 136, align 8, basealign 16) + ; 32BIT-NEXT: renamable $r3 = LWZ 0, %stack.0 :: (load (s32) from %stack.0, align 8) ; 32BIT-NEXT: $f10 = COPY renamable $f1 - ; 32BIT-NEXT: STW killed renamable $r3, 128, $r1 :: (store (s32)) + ; 32BIT-NEXT: STW killed renamable $r3, 132, $r1 :: (store (s32) into stack + 132, basealign 16) ; 32BIT-NEXT: BL_NOP , csr_aix32, implicit-def dead $lr, implicit $rm, implicit $f1, implicit $f2, implicit $f3, implicit $f4, implicit $f5, implicit $f6, implicit $f7, implicit $f8, implicit $f9, implicit killed $f10, implicit $f11, implicit $f12, implicit $f13, implicit $r2, implicit-def $r1, implicit-def dead $f1 ; 32BIT-NEXT: ADJCALLSTACKUP 144, 0, implicit-def dead $r1, implicit $r1 ; 32BIT-NEXT: BLR implicit $lr, implicit $rm @@ -1537,7 +1549,7 @@ define void @caller_fpr_stack() { ; 64BIT-NEXT: renamable $r5 = LWZ 0, killed renamable $x5 :: (dereferenceable load (s32) from @f16) ; 64BIT-NEXT: ADJCALLSTACKDOWN 176, 0, implicit-def dead $r1, implicit $r1 ; 64BIT-NEXT: renamable $x6 = LDtocCPT %const.0, $x2 :: (load (s64) from got) - ; 64BIT-NEXT: STW killed renamable $r5, 168, $x1 :: (store (s32)) + ; 64BIT-NEXT: STW killed renamable $r5, 168, $x1 :: (store (s32) into stack + 168, align 8, basealign 16) ; 64BIT-NEXT: renamable $x5 = LDtocCPT %const.1, $x2 :: (load (s64) from got) ; 64BIT-NEXT: renamable $x7 = LDtocCPT %const.2, $x2 :: (load (s64) from got) ; 64BIT-NEXT: renamable $f2 = LFD 0, killed renamable $x6 :: (load (s64) from constant-pool) @@ -1549,7 +1561,7 @@ define void @caller_fpr_stack() { ; 64BIT-NEXT: renamable $f6 = LFD 0, killed renamable $x6 :: (load (s64) from constant-pool) ; 64BIT-NEXT: renamable $x6 = LDtocCPT %const.6, $x2 :: (load (s64) from got) ; 64BIT-NEXT: renamable $f7 = LFD 0, killed renamable $x5 :: (load (s64) from constant-pool) - ; 64BIT-NEXT: STD killed renamable $x4, 160, $x1 :: (store (s64)) + ; 64BIT-NEXT: STD killed renamable $x4, 160, $x1 :: (store (s64) into stack + 160, align 16) ; 64BIT-NEXT: renamable $x4 = LDtocCPT %const.7, $x2 :: (load (s64) from got) ; 64BIT-NEXT: renamable $f8 = LFD 0, killed renamable $x7 :: (load (s64) from constant-pool) ; 64BIT-NEXT: renamable $x5 = LIS8 16320 @@ -1588,12 +1600,12 @@ define void @caller_fpr_stack() { ; 64BIT-NEXT: renamable $x8 = ORIS8 killed renamable $x8, 52428 ; 64BIT-NEXT: renamable $x8 = ORI8 killed renamable $x8, 52429 ; 64BIT-NEXT: $f10 = COPY renamable $f1 - ; 64BIT-NEXT: STW killed renamable $r3, 152, $x1 :: (store (s32)) - ; 64BIT-NEXT: STD killed renamable $x5, 144, $x1 :: (store (s64)) - ; 64BIT-NEXT: STD killed renamable $x6, 136, $x1 :: (store (s64)) - ; 64BIT-NEXT: STD killed renamable $x4, 128, $x1 :: (store (s64)) - ; 64BIT-NEXT: STD killed renamable $x7, 120, $x1 :: (store (s64)) - ; 64BIT-NEXT: STD killed renamable $x8, 112, $x1 :: (store (s64)) + ; 64BIT-NEXT: STW killed renamable $r3, 152, $x1 :: (store (s32) into stack + 152, align 8, basealign 16) + ; 64BIT-NEXT: STD killed renamable $x5, 144, $x1 :: (store (s64) into stack + 144, align 16) + ; 64BIT-NEXT: STD killed renamable $x6, 136, $x1 :: (store (s64) into stack + 136, basealign 16) + ; 64BIT-NEXT: STD killed renamable $x4, 128, $x1 :: (store (s64) into stack + 128, align 16) + ; 64BIT-NEXT: STD killed renamable $x7, 120, $x1 :: (store (s64) into stack + 120, basealign 16) + ; 64BIT-NEXT: STD killed renamable $x8, 112, $x1 :: (store (s64) into stack + 112, align 16) ; 64BIT-NEXT: BL8_NOP , csr_ppc64, implicit-def dead $lr8, implicit $rm, implicit $f1, implicit $f2, implicit $f3, implicit $f4, implicit $f5, implicit $f6, implicit $f7, implicit $f8, implicit $f9, implicit killed $f10, implicit $f11, implicit $f12, implicit $f13, implicit $x2, implicit-def $r1, implicit-def dead $f1 ; 64BIT-NEXT: ADJCALLSTACKUP 176, 0, implicit-def dead $r1, implicit $r1 ; 64BIT-NEXT: BLR8 implicit $lr8, implicit $rm @@ -1688,16 +1700,16 @@ define void @caller_mix() { ; 32BIT: bb.0.entry: ; 32BIT-NEXT: ADJCALLSTACKDOWN 84, 0, implicit-def dead $r1, implicit $r1 ; 32BIT-NEXT: renamable $r3 = LI 60 - ; 32BIT-NEXT: STW killed renamable $r3, 80, $r1 :: (store (s32)) + ; 32BIT-NEXT: STW killed renamable $r3, 80, $r1 :: (store (s32) into stack + 80, align 16) ; 32BIT-NEXT: renamable $r3 = LI 50 - ; 32BIT-NEXT: STW killed renamable $r3, 76, $r1 :: (store (s32)) + ; 32BIT-NEXT: STW killed renamable $r3, 76, $r1 :: (store (s32) into stack + 76, basealign 16) ; 32BIT-NEXT: renamable $r3 = LI 40 - ; 32BIT-NEXT: STW killed renamable $r3, 72, $r1 :: (store (s32)) + ; 32BIT-NEXT: STW killed renamable $r3, 72, $r1 :: (store (s32) into stack + 72, align 8, basealign 16) ; 32BIT-NEXT: renamable $r3 = LI 0 ; 32BIT-NEXT: renamable $r4 = LWZtoc %const.0, $r2 :: (load (s32) from got) - ; 32BIT-NEXT: STW killed renamable $r3, 64, $r1 :: (store (s32)) + ; 32BIT-NEXT: STW killed renamable $r3, 64, $r1 :: (store (s32) into stack + 64, align 16) ; 32BIT-NEXT: renamable $r3 = LI 2 - ; 32BIT-NEXT: STW killed renamable $r3, 60, $r1 :: (store (s32)) + ; 32BIT-NEXT: STW killed renamable $r3, 60, $r1 :: (store (s32) into stack + 60, basealign 16) ; 32BIT-NEXT: renamable $r3 = LWZtoc %const.1, $r2 :: (load (s32) from got) ; 32BIT-NEXT: renamable $f1 = LFD 0, killed renamable $r4 :: (load (s64) from constant-pool) ; 32BIT-NEXT: renamable $r4 = LWZtoc %const.2, $r2 :: (load (s32) from got) @@ -1706,10 +1718,10 @@ define void @caller_mix() { ; 32BIT-NEXT: renamable $f3 = LFD 0, killed renamable $r4 :: (load (s64) from constant-pool) ; 32BIT-NEXT: renamable $f4 = LFD 0, killed renamable $r3 :: (load (s64) from constant-pool) ; 32BIT-NEXT: renamable $r3 = LI 1 - ; 32BIT-NEXT: STW killed renamable $r3, 56, $r1 :: (store (s32)) + ; 32BIT-NEXT: STW killed renamable $r3, 56, $r1 :: (store (s32) into stack + 56, align 8, basealign 16) ; 32BIT-NEXT: renamable $r3 = LIS 457 ; 32BIT-NEXT: renamable $r3 = ORI killed renamable $r3, 50048 - ; 32BIT-NEXT: STW killed renamable $r3, 68, $r1 :: (store (s32)) + ; 32BIT-NEXT: STW killed renamable $r3, 68, $r1 :: (store (s32) into stack + 68, basealign 16) ; 32BIT-NEXT: BL_NOP , csr_aix32, implicit-def dead $lr, implicit $rm, implicit $f1, implicit $f2, implicit $f3, implicit $f4, implicit $r2, implicit-def $r1, implicit-def dead $r3 ; 32BIT-NEXT: ADJCALLSTACKUP 84, 0, implicit-def dead $r1, implicit $r1 ; 32BIT-NEXT: BLR implicit $lr, implicit $rm @@ -1732,8 +1744,8 @@ define void @caller_mix() { ; 64BIT-NEXT: $x7 = LI8 1 ; 64BIT-NEXT: $x8 = LI8 2 ; 64BIT-NEXT: $x10 = LI8 40 - ; 64BIT-NEXT: STD killed renamable $x4, 120, $x1 :: (store (s64)) - ; 64BIT-NEXT: STD killed renamable $x5, 112, $x1 :: (store (s64)) + ; 64BIT-NEXT: STD killed renamable $x4, 120, $x1 :: (store (s64) into stack + 120, basealign 16) + ; 64BIT-NEXT: STD killed renamable $x5, 112, $x1 :: (store (s64) into stack + 112, align 16) ; 64BIT-NEXT: BL8_NOP , csr_ppc64, implicit-def dead $lr8, implicit $rm, implicit $f1, implicit $f2, implicit $f3, implicit $f4, implicit killed $x7, implicit killed $x8, implicit $x9, implicit killed $x10, implicit $x2, implicit-def $r1, implicit-def dead $x3 ; 64BIT-NEXT: ADJCALLSTACKUP 128, 0, implicit-def dead $r1, implicit $r1 ; 64BIT-NEXT: BLR8 implicit $lr8, implicit $rm @@ -1855,60 +1867,60 @@ define void @caller_mix() { ; 32BIT-NEXT: renamable $r8 = LIS 16329 ; 32BIT-NEXT: renamable $r9 = LIS 13107 ; 32BIT-NEXT: renamable $r10 = LIS 16339 - ; 32BIT-NEXT: STW renamable $r3, 92, $r1 :: (store (s32) into unknown-address + 4, basealign 8) + ; 32BIT-NEXT: STW renamable $r3, 92, $r1 :: (store (s32) into stack + 92, basealign 16) ; 32BIT-NEXT: renamable $r11 = LIS 16345 - ; 32BIT-NEXT: STW killed renamable $r4, 88, $r1 :: (store (s32), align 8) + ; 32BIT-NEXT: STW killed renamable $r4, 88, $r1 :: (store (s32) into stack + 88, align 8, basealign 16) ; 32BIT-NEXT: renamable $r4 = LIS 16355 - ; 32BIT-NEXT: STW killed renamable $r3, 132, $r1 :: (store (s32) into unknown-address + 4, basealign 8) + ; 32BIT-NEXT: STW killed renamable $r3, 132, $r1 :: (store (s32) into stack + 132, basealign 16) ; 32BIT-NEXT: renamable $r3 = LIS 26214 - ; 32BIT-NEXT: STW killed renamable $r5, 128, $r1 :: (store (s32), align 8) + ; 32BIT-NEXT: STW killed renamable $r5, 128, $r1 :: (store (s32) into stack + 128, align 16) ; 32BIT-NEXT: renamable $r5 = ORI killed renamable $r6, 39322 - ; 32BIT-NEXT: STW renamable $r5, 60, $r1 :: (store (s32) into unknown-address + 4, basealign 8) + ; 32BIT-NEXT: STW renamable $r5, 60, $r1 :: (store (s32) into stack + 60, basealign 16) ; 32BIT-NEXT: renamable $r6 = ORI killed renamable $r7, 39321 - ; 32BIT-NEXT: STW killed renamable $r6, 56, $r1 :: (store (s32), align 8) + ; 32BIT-NEXT: STW killed renamable $r6, 56, $r1 :: (store (s32) into stack + 56, align 8, basealign 16) ; 32BIT-NEXT: renamable $r6 = LIS 16358 - ; 32BIT-NEXT: STW renamable $r5, 68, $r1 :: (store (s32) into unknown-address + 4, basealign 8) + ; 32BIT-NEXT: STW renamable $r5, 68, $r1 :: (store (s32) into stack + 68, basealign 16) ; 32BIT-NEXT: renamable $r7 = ORI killed renamable $r8, 39321 - ; 32BIT-NEXT: STW killed renamable $r7, 64, $r1 :: (store (s32), align 8) + ; 32BIT-NEXT: STW killed renamable $r7, 64, $r1 :: (store (s32) into stack + 64, align 16) ; 32BIT-NEXT: renamable $r7 = ORI killed renamable $r9, 13107 - ; 32BIT-NEXT: STW renamable $r7, 76, $r1 :: (store (s32) into unknown-address + 4, basealign 8) + ; 32BIT-NEXT: STW renamable $r7, 76, $r1 :: (store (s32) into stack + 76, basealign 16) ; 32BIT-NEXT: renamable $r8 = ORI killed renamable $r10, 13107 - ; 32BIT-NEXT: STW killed renamable $r8, 72, $r1 :: (store (s32), align 8) + ; 32BIT-NEXT: STW killed renamable $r8, 72, $r1 :: (store (s32) into stack + 72, align 8, basealign 16) ; 32BIT-NEXT: renamable $r8 = LIS 16361 - ; 32BIT-NEXT: STW renamable $r5, 84, $r1 :: (store (s32) into unknown-address + 4, basealign 8) + ; 32BIT-NEXT: STW renamable $r5, 84, $r1 :: (store (s32) into stack + 84, basealign 16) ; 32BIT-NEXT: renamable $r9 = ORI killed renamable $r11, 39321 - ; 32BIT-NEXT: STW killed renamable $r9, 80, $r1 :: (store (s32), align 8) + ; 32BIT-NEXT: STW killed renamable $r9, 80, $r1 :: (store (s32) into stack + 80, align 16) ; 32BIT-NEXT: renamable $r9 = LIS 52428 - ; 32BIT-NEXT: STW renamable $r7, 100, $r1 :: (store (s32) into unknown-address + 4, basealign 8) + ; 32BIT-NEXT: STW renamable $r7, 100, $r1 :: (store (s32) into stack + 100, basealign 16) ; 32BIT-NEXT: renamable $r4 = ORI killed renamable $r4, 13107 - ; 32BIT-NEXT: STW killed renamable $r4, 96, $r1 :: (store (s32), align 8) + ; 32BIT-NEXT: STW killed renamable $r4, 96, $r1 :: (store (s32) into stack + 96, align 16) ; 32BIT-NEXT: renamable $r3 = ORI killed renamable $r3, 26214 - ; 32BIT-NEXT: STW renamable $r3, 108, $r1 :: (store (s32) into unknown-address + 4, basealign 8) + ; 32BIT-NEXT: STW renamable $r3, 108, $r1 :: (store (s32) into stack + 108, basealign 16) ; 32BIT-NEXT: renamable $r4 = ORI killed renamable $r6, 26214 - ; 32BIT-NEXT: STW killed renamable $r4, 104, $r1 :: (store (s32), align 8) + ; 32BIT-NEXT: STW killed renamable $r4, 104, $r1 :: (store (s32) into stack + 104, align 8, basealign 16) ; 32BIT-NEXT: renamable $r4 = LIS 16364 - ; 32BIT-NEXT: STW renamable $r5, 116, $r1 :: (store (s32) into unknown-address + 4, basealign 8) + ; 32BIT-NEXT: STW renamable $r5, 116, $r1 :: (store (s32) into stack + 116, basealign 16) ; 32BIT-NEXT: renamable $r6 = ORI killed renamable $r8, 39321 - ; 32BIT-NEXT: STW killed renamable $r6, 112, $r1 :: (store (s32), align 8) + ; 32BIT-NEXT: STW killed renamable $r6, 112, $r1 :: (store (s32) into stack + 112, align 16) ; 32BIT-NEXT: renamable $r6 = ORI killed renamable $r9, 52429 - ; 32BIT-NEXT: STW renamable $r6, 124, $r1 :: (store (s32) into unknown-address + 4, basealign 8) + ; 32BIT-NEXT: STW renamable $r6, 124, $r1 :: (store (s32) into stack + 124, basealign 16) ; 32BIT-NEXT: renamable $r4 = ORI killed renamable $r4, 52428 - ; 32BIT-NEXT: STW killed renamable $r4, 120, $r1 :: (store (s32), align 8) + ; 32BIT-NEXT: STW killed renamable $r4, 120, $r1 :: (store (s32) into stack + 120, align 8, basealign 16) ; 32BIT-NEXT: renamable $r4 = LIS 16369 - ; 32BIT-NEXT: STW killed renamable $r5, 140, $r1 :: (store (s32) into unknown-address + 4, basealign 8) + ; 32BIT-NEXT: STW killed renamable $r5, 140, $r1 :: (store (s32) into stack + 140, basealign 16) ; 32BIT-NEXT: renamable $r4 = ORI killed renamable $r4, 39321 - ; 32BIT-NEXT: STW killed renamable $r4, 136, $r1 :: (store (s32), align 8) + ; 32BIT-NEXT: STW killed renamable $r4, 136, $r1 :: (store (s32) into stack + 136, align 8, basealign 16) ; 32BIT-NEXT: renamable $r4 = LIS 16371 - ; 32BIT-NEXT: STW killed renamable $r7, 148, $r1 :: (store (s32) into unknown-address + 4, basealign 8) + ; 32BIT-NEXT: STW killed renamable $r7, 148, $r1 :: (store (s32) into stack + 148, basealign 16) ; 32BIT-NEXT: renamable $r4 = ORI killed renamable $r4, 13107 - ; 32BIT-NEXT: STW killed renamable $r4, 144, $r1 :: (store (s32), align 8) + ; 32BIT-NEXT: STW killed renamable $r4, 144, $r1 :: (store (s32) into stack + 144, align 16) ; 32BIT-NEXT: renamable $r4 = LIS 16372 ; 32BIT-NEXT: renamable $r5 = LWZtoc %const.0, $r2 :: (load (s32) from got) - ; 32BIT-NEXT: STW killed renamable $r6, 156, $r1 :: (store (s32) into unknown-address + 4, basealign 8) + ; 32BIT-NEXT: STW killed renamable $r6, 156, $r1 :: (store (s32) into stack + 156, basealign 16) ; 32BIT-NEXT: renamable $r4 = ORI killed renamable $r4, 52428 - ; 32BIT-NEXT: STW killed renamable $r4, 152, $r1 :: (store (s32), align 8) + ; 32BIT-NEXT: STW killed renamable $r4, 152, $r1 :: (store (s32) into stack + 152, align 8, basealign 16) ; 32BIT-NEXT: renamable $r4 = LWZtoc %const.1, $r2 :: (load (s32) from got) - ; 32BIT-NEXT: STW killed renamable $r3, 164, $r1 :: (store (s32) into unknown-address + 4, basealign 8) + ; 32BIT-NEXT: STW killed renamable $r3, 164, $r1 :: (store (s32) into stack + 164, basealign 16) ; 32BIT-NEXT: renamable $r3 = LWZtoc %const.2, $r2 :: (load (s32) from got) ; 32BIT-NEXT: renamable $f1 = LFD 0, killed renamable $r5 :: (load (s64) from constant-pool) ; 32BIT-NEXT: renamable $r5 = LWZtoc %const.3, $r2 :: (load (s32) from got) @@ -1943,7 +1955,7 @@ define void @caller_mix() { ; 32BIT-NEXT: $r8 = LI 6 ; 32BIT-NEXT: $r9 = LI 7 ; 32BIT-NEXT: $r10 = LI 8 - ; 32BIT-NEXT: STW killed renamable $r11, 160, $r1 :: (store (s32), align 8) + ; 32BIT-NEXT: STW killed renamable $r11, 160, $r1 :: (store (s32) into stack + 160, align 16) ; 32BIT-NEXT: BL_NOP , csr_aix32, implicit-def dead $lr, implicit $rm, implicit $r3, implicit $r4, implicit $r5, implicit $r6, implicit $r7, implicit $r8, implicit $r9, implicit $r10, implicit $f1, implicit $f2, implicit $f3, implicit $f4, implicit $f5, implicit $f6, implicit $f7, implicit $f8, implicit $f9, implicit $f10, implicit $f11, implicit $f12, implicit $f13, implicit $r2, implicit-def $r1, implicit-def dead $r3 ; 32BIT-NEXT: ADJCALLSTACKUP 168, 0, implicit-def dead $r1, implicit $r1 ; 32BIT-NEXT: BLR implicit $lr, implicit $rm @@ -2044,20 +2056,20 @@ define void @caller_mix() { ; 64BIT-NEXT: $x8 = LI8 6 ; 64BIT-NEXT: $x9 = LI8 7 ; 64BIT-NEXT: $x10 = LI8 8 - ; 64BIT-NEXT: STD killed renamable $x29, 184, $x1 :: (store (s64)) - ; 64BIT-NEXT: STD killed renamable $x28, 144, $x1 :: (store (s64)) - ; 64BIT-NEXT: STD killed renamable $x11, 216, $x1 :: (store (s64)) - ; 64BIT-NEXT: STD killed renamable $x12, 200, $x1 :: (store (s64)) - ; 64BIT-NEXT: STD killed renamable $x0, 160, $x1 :: (store (s64)) - ; 64BIT-NEXT: STD killed renamable $x31, 152, $x1 :: (store (s64)) - ; 64BIT-NEXT: STD killed renamable $x30, 128, $x1 :: (store (s64)) - ; 64BIT-NEXT: STD killed renamable $x27, 208, $x1 :: (store (s64)) - ; 64BIT-NEXT: STD killed renamable $x25, 192, $x1 :: (store (s64)) - ; 64BIT-NEXT: STD killed renamable $x24, 176, $x1 :: (store (s64)) - ; 64BIT-NEXT: STD killed renamable $x23, 168, $x1 :: (store (s64)) - ; 64BIT-NEXT: STD killed renamable $x22, 136, $x1 :: (store (s64)) - ; 64BIT-NEXT: STD killed renamable $x21, 120, $x1 :: (store (s64)) - ; 64BIT-NEXT: STD killed renamable $x20, 112, $x1 :: (store (s64)) + ; 64BIT-NEXT: STD killed renamable $x29, 184, $x1 :: (store (s64) into stack + 184, basealign 16) + ; 64BIT-NEXT: STD killed renamable $x28, 144, $x1 :: (store (s64) into stack + 144, align 16) + ; 64BIT-NEXT: STD killed renamable $x11, 216, $x1 :: (store (s64) into stack + 216, basealign 16) + ; 64BIT-NEXT: STD killed renamable $x12, 200, $x1 :: (store (s64) into stack + 200, basealign 16) + ; 64BIT-NEXT: STD killed renamable $x0, 160, $x1 :: (store (s64) into stack + 160, align 16) + ; 64BIT-NEXT: STD killed renamable $x31, 152, $x1 :: (store (s64) into stack + 152, basealign 16) + ; 64BIT-NEXT: STD killed renamable $x30, 128, $x1 :: (store (s64) into stack + 128, align 16) + ; 64BIT-NEXT: STD killed renamable $x27, 208, $x1 :: (store (s64) into stack + 208, align 16) + ; 64BIT-NEXT: STD killed renamable $x25, 192, $x1 :: (store (s64) into stack + 192, align 16) + ; 64BIT-NEXT: STD killed renamable $x24, 176, $x1 :: (store (s64) into stack + 176, align 16) + ; 64BIT-NEXT: STD killed renamable $x23, 168, $x1 :: (store (s64) into stack + 168, basealign 16) + ; 64BIT-NEXT: STD killed renamable $x22, 136, $x1 :: (store (s64) into stack + 136, basealign 16) + ; 64BIT-NEXT: STD killed renamable $x21, 120, $x1 :: (store (s64) into stack + 120, basealign 16) + ; 64BIT-NEXT: STD killed renamable $x20, 112, $x1 :: (store (s64) into stack + 112, align 16) ; 64BIT-NEXT: BL8_NOP , csr_ppc64, implicit-def dead $lr8, implicit $rm, implicit $x3, implicit $x4, implicit $x5, implicit $x6, implicit $x7, implicit $x8, implicit $x9, implicit $x10, implicit $f1, implicit $f2, implicit $f3, implicit $f4, implicit $f5, implicit $f6, implicit $f7, implicit $f8, implicit $f9, implicit $f10, implicit $f11, implicit $f12, implicit $f13, implicit $x2, implicit-def $r1, implicit-def dead $x3 ; 64BIT-NEXT: ADJCALLSTACKUP 224, 0, implicit-def dead $r1, implicit $r1 ; 64BIT-NEXT: BLR8 implicit $lr8, implicit $rm diff --git a/llvm/test/CodeGen/PowerPC/aix-cc-abi.ll b/llvm/test/CodeGen/PowerPC/aix-cc-abi.ll index 8f33f5ef863e..03770d22d9f4 100644 --- a/llvm/test/CodeGen/PowerPC/aix-cc-abi.ll +++ b/llvm/test/CodeGen/PowerPC/aix-cc-abi.ll @@ -1012,18 +1012,22 @@ define void @call_test_stackarg_float() { ; ASM32PWR4-NEXT: lwz 3, L..C8(2) # @f ; ASM32PWR4-NEXT: stw 0, 88(1) ; ASM32PWR4-NEXT: li 4, 2 -; ASM32PWR4-NEXT: li 5, 3 ; ASM32PWR4-NEXT: li 6, 4 ; ASM32PWR4-NEXT: li 7, 5 +; ASM32PWR4-NEXT: li 8, 6 ; ASM32PWR4-NEXT: lfs 1, 0(3) ; ASM32PWR4-NEXT: lwz 3, L..C9(2) # @d -; ASM32PWR4-NEXT: li 8, 6 ; ASM32PWR4-NEXT: li 9, 7 +; ASM32PWR4-NEXT: li 10, 8 ; ASM32PWR4-NEXT: lfd 2, 0(3) ; ASM32PWR4-NEXT: li 3, 1 -; ASM32PWR4-NEXT: li 10, 8 -; ASM32PWR4-NEXT: stfd 2, 60(1) +; ASM32PWR4-NEXT: stfd 2, 72(1) +; ASM32PWR4-NEXT: lwz 5, 76(1) +; ASM32PWR4-NEXT: lwz 11, 72(1) +; ASM32PWR4-NEXT: stw 5, 64(1) +; ASM32PWR4-NEXT: li 5, 3 ; ASM32PWR4-NEXT: stfs 1, 56(1) +; ASM32PWR4-NEXT: stw 11, 60(1) ; ASM32PWR4-NEXT: bl .test_stackarg_float[PR] ; ASM32PWR4-NEXT: nop ; ASM32PWR4-NEXT: addi 1, 1, 80 @@ -1126,20 +1130,24 @@ define void @call_test_stackarg_float3() { ; ASM32PWR4-NEXT: stwu 1, -80(1) ; ASM32PWR4-NEXT: lwz 3, L..C9(2) # @d ; ASM32PWR4-NEXT: stw 0, 88(1) -; ASM32PWR4-NEXT: li 4, 2 ; ASM32PWR4-NEXT: li 5, 3 ; ASM32PWR4-NEXT: li 6, 4 ; ASM32PWR4-NEXT: li 7, 5 +; ASM32PWR4-NEXT: li 8, 6 ; ASM32PWR4-NEXT: lfd 1, 0(3) ; ASM32PWR4-NEXT: lwz 3, L..C8(2) # @f -; ASM32PWR4-NEXT: li 8, 6 ; ASM32PWR4-NEXT: li 9, 7 ; ASM32PWR4-NEXT: stfd 1, 72(1) -; ASM32PWR4-NEXT: lwz 10, 72(1) ; ASM32PWR4-NEXT: lfs 2, 0(3) ; ASM32PWR4-NEXT: li 3, 1 +; ASM32PWR4-NEXT: stfd 1, 64(1) +; ASM32PWR4-NEXT: lwz 4, 68(1) +; ASM32PWR4-NEXT: lwz 10, 72(1) +; ASM32PWR4-NEXT: lwz 11, 64(1) +; ASM32PWR4-NEXT: stw 4, 56(1) +; ASM32PWR4-NEXT: li 4, 2 ; ASM32PWR4-NEXT: stfs 2, 60(1) -; ASM32PWR4-NEXT: stfd 1, 52(1) +; ASM32PWR4-NEXT: stw 11, 52(1) ; ASM32PWR4-NEXT: bl .test_stackarg_float3[PR] ; ASM32PWR4-NEXT: nop ; ASM32PWR4-NEXT: addi 1, 1, 80 @@ -1562,95 +1570,99 @@ define void @caller_fpr_stack() { ; ASM32PWR4-LABEL: caller_fpr_stack: ; ASM32PWR4: # %bb.0: # %entry ; ASM32PWR4-NEXT: mflr 0 -; ASM32PWR4-NEXT: stwu 1, -144(1) +; ASM32PWR4-NEXT: stwu 1, -160(1) ; ASM32PWR4-NEXT: lwz 3, L..C19(2) # @d15 -; ASM32PWR4-NEXT: lwz 4, L..C20(2) # @f14 -; ASM32PWR4-NEXT: lwz 5, L..C21(2) # @f16 -; ASM32PWR4-NEXT: stw 0, 152(1) -; ASM32PWR4-NEXT: lis 6, 16361 -; ASM32PWR4-NEXT: ori 6, 6, 39321 +; ASM32PWR4-NEXT: stw 0, 168(1) +; ASM32PWR4-NEXT: lwz 5, L..C20(2) # %const.1 +; ASM32PWR4-NEXT: lwz 4, L..C21(2) # @f14 ; ASM32PWR4-NEXT: lfd 0, 0(3) -; ASM32PWR4-NEXT: lwz 3, 0(4) -; ASM32PWR4-NEXT: lwz 4, 0(5) -; ASM32PWR4-NEXT: li 5, 0 -; ASM32PWR4-NEXT: stw 5, 60(1) -; ASM32PWR4-NEXT: lis 5, 16352 -; ASM32PWR4-NEXT: stw 5, 56(1) -; ASM32PWR4-NEXT: lis 5, 13107 -; ASM32PWR4-NEXT: ori 5, 5, 13107 -; ASM32PWR4-NEXT: stw 5, 68(1) -; ASM32PWR4-NEXT: lis 5, 16355 -; ASM32PWR4-NEXT: ori 5, 5, 13107 -; ASM32PWR4-NEXT: stw 5, 64(1) -; ASM32PWR4-NEXT: lis 5, 26214 -; ASM32PWR4-NEXT: ori 5, 5, 26214 -; ASM32PWR4-NEXT: stw 5, 76(1) -; ASM32PWR4-NEXT: lis 5, 16358 -; ASM32PWR4-NEXT: ori 5, 5, 26214 -; ASM32PWR4-NEXT: stw 5, 72(1) -; ASM32PWR4-NEXT: lis 5, -26215 -; ASM32PWR4-NEXT: ori 5, 5, 39322 -; ASM32PWR4-NEXT: stw 5, 84(1) -; ASM32PWR4-NEXT: stw 5, 100(1) -; ASM32PWR4-NEXT: lis 5, 16313 -; ASM32PWR4-NEXT: ori 5, 5, 39321 -; ASM32PWR4-NEXT: stw 5, 96(1) -; ASM32PWR4-NEXT: lis 5, -15729 -; ASM32PWR4-NEXT: ori 5, 5, 23593 -; ASM32PWR4-NEXT: stw 5, 108(1) -; ASM32PWR4-NEXT: lis 5, 16316 -; ASM32PWR4-NEXT: ori 5, 5, 10485 -; ASM32PWR4-NEXT: stw 5, 104(1) -; ASM32PWR4-NEXT: lis 5, -5243 -; ASM32PWR4-NEXT: ori 5, 5, 7864 -; ASM32PWR4-NEXT: stw 5, 116(1) -; ASM32PWR4-NEXT: lis 5, 16318 -; ASM32PWR4-NEXT: ori 5, 5, 47185 -; ASM32PWR4-NEXT: stw 6, 80(1) -; ASM32PWR4-NEXT: lis 6, -13108 -; ASM32PWR4-NEXT: ori 6, 6, 52429 -; ASM32PWR4-NEXT: stw 5, 112(1) -; ASM32PWR4-NEXT: lis 5, 2621 -; ASM32PWR4-NEXT: ori 5, 5, 28836 -; ASM32PWR4-NEXT: stw 6, 92(1) -; ASM32PWR4-NEXT: lis 6, 16364 -; ASM32PWR4-NEXT: ori 6, 6, 52428 -; ASM32PWR4-NEXT: stw 5, 124(1) -; ASM32PWR4-NEXT: lis 5, 16320 -; ASM32PWR4-NEXT: ori 5, 5, 41943 -; ASM32PWR4-NEXT: stw 6, 88(1) -; ASM32PWR4-NEXT: lwz 6, L..C22(2) # %const.0 -; ASM32PWR4-NEXT: stw 5, 120(1) -; ASM32PWR4-NEXT: lwz 5, L..C23(2) # %const.1 -; ASM32PWR4-NEXT: lfd 2, 0(6) -; ASM32PWR4-NEXT: lwz 6, L..C24(2) # %const.2 +; ASM32PWR4-NEXT: lwz 3, L..C22(2) # @f16 +; ASM32PWR4-NEXT: lwz 3, 0(3) +; ASM32PWR4-NEXT: stw 3, 140(1) +; ASM32PWR4-NEXT: li 3, 0 +; ASM32PWR4-NEXT: stw 3, 60(1) +; ASM32PWR4-NEXT: lis 3, 16352 +; ASM32PWR4-NEXT: stw 3, 56(1) +; ASM32PWR4-NEXT: lis 3, 13107 +; ASM32PWR4-NEXT: ori 3, 3, 13107 +; ASM32PWR4-NEXT: stw 3, 68(1) +; ASM32PWR4-NEXT: lis 3, 16355 +; ASM32PWR4-NEXT: ori 3, 3, 13107 +; ASM32PWR4-NEXT: stw 3, 64(1) +; ASM32PWR4-NEXT: lis 3, 26214 +; ASM32PWR4-NEXT: ori 3, 3, 26214 +; ASM32PWR4-NEXT: stw 3, 76(1) +; ASM32PWR4-NEXT: lis 3, 16358 +; ASM32PWR4-NEXT: ori 3, 3, 26214 +; ASM32PWR4-NEXT: stw 3, 72(1) +; ASM32PWR4-NEXT: lis 3, -26215 +; ASM32PWR4-NEXT: ori 3, 3, 39322 +; ASM32PWR4-NEXT: stw 3, 84(1) +; ASM32PWR4-NEXT: stw 3, 100(1) +; ASM32PWR4-NEXT: lis 3, 16313 +; ASM32PWR4-NEXT: ori 3, 3, 39321 +; ASM32PWR4-NEXT: stw 3, 96(1) +; ASM32PWR4-NEXT: lis 3, -15729 +; ASM32PWR4-NEXT: ori 3, 3, 23593 +; ASM32PWR4-NEXT: stw 3, 108(1) +; ASM32PWR4-NEXT: lis 3, 16316 +; ASM32PWR4-NEXT: ori 3, 3, 10485 +; ASM32PWR4-NEXT: stw 3, 104(1) +; ASM32PWR4-NEXT: lis 3, -5243 +; ASM32PWR4-NEXT: ori 3, 3, 7864 +; ASM32PWR4-NEXT: stw 3, 116(1) +; ASM32PWR4-NEXT: lis 3, 16318 +; ASM32PWR4-NEXT: ori 3, 3, 47185 +; ASM32PWR4-NEXT: stw 3, 112(1) +; ASM32PWR4-NEXT: lis 3, 2621 +; ASM32PWR4-NEXT: ori 3, 3, 28836 +; ASM32PWR4-NEXT: stw 3, 124(1) +; ASM32PWR4-NEXT: lis 3, 16320 +; ASM32PWR4-NEXT: ori 3, 3, 41943 +; ASM32PWR4-NEXT: stw 3, 120(1) +; ASM32PWR4-NEXT: lwz 3, L..C23(2) # %const.0 +; ASM32PWR4-NEXT: lfd 2, 0(3) +; ASM32PWR4-NEXT: lwz 3, L..C24(2) # %const.2 ; ASM32PWR4-NEXT: lfd 3, 0(5) ; ASM32PWR4-NEXT: lwz 5, L..C25(2) # %const.3 -; ASM32PWR4-NEXT: lfd 4, 0(6) -; ASM32PWR4-NEXT: lwz 6, L..C26(2) # %const.4 +; ASM32PWR4-NEXT: lfd 4, 0(3) +; ASM32PWR4-NEXT: lwz 3, L..C26(2) # %const.4 ; ASM32PWR4-NEXT: lfd 6, 0(5) ; ASM32PWR4-NEXT: lwz 5, L..C27(2) # %const.5 -; ASM32PWR4-NEXT: lfd 7, 0(6) -; ASM32PWR4-NEXT: lwz 6, L..C28(2) # %const.6 +; ASM32PWR4-NEXT: lwz 4, 0(4) +; ASM32PWR4-NEXT: lfd 7, 0(3) +; ASM32PWR4-NEXT: lwz 3, L..C28(2) # %const.6 ; ASM32PWR4-NEXT: lfd 8, 0(5) ; ASM32PWR4-NEXT: lwz 5, L..C29(2) # %const.7 -; ASM32PWR4-NEXT: lfd 9, 0(6) -; ASM32PWR4-NEXT: lwz 6, L..C30(2) # %const.8 +; ASM32PWR4-NEXT: stw 4, 128(1) +; ASM32PWR4-NEXT: lis 4, 16361 +; ASM32PWR4-NEXT: ori 4, 4, 39321 +; ASM32PWR4-NEXT: lfd 9, 0(3) +; ASM32PWR4-NEXT: lwz 3, L..C30(2) # %const.8 ; ASM32PWR4-NEXT: lfd 1, 0(5) ; ASM32PWR4-NEXT: lwz 5, L..C31(2) # %const.9 -; ASM32PWR4-NEXT: lfd 11, 0(6) -; ASM32PWR4-NEXT: lwz 6, L..C32(2) # %const.10 +; ASM32PWR4-NEXT: stw 4, 80(1) +; ASM32PWR4-NEXT: lis 4, -13108 ; ASM32PWR4-NEXT: fmr 10, 1 +; ASM32PWR4-NEXT: ori 4, 4, 52429 +; ASM32PWR4-NEXT: lfd 11, 0(3) +; ASM32PWR4-NEXT: lwz 3, L..C32(2) # %const.10 ; ASM32PWR4-NEXT: lfd 12, 0(5) ; ASM32PWR4-NEXT: lwz 5, L..C33(2) # %const.11 -; ASM32PWR4-NEXT: lfd 13, 0(6) +; ASM32PWR4-NEXT: stw 4, 92(1) +; ASM32PWR4-NEXT: lis 4, 16364 +; ASM32PWR4-NEXT: ori 4, 4, 52428 +; ASM32PWR4-NEXT: stfd 0, 152(1) +; ASM32PWR4-NEXT: stw 4, 88(1) +; ASM32PWR4-NEXT: lwz 4, 156(1) +; ASM32PWR4-NEXT: lfd 13, 0(3) ; ASM32PWR4-NEXT: lfs 5, 0(5) -; ASM32PWR4-NEXT: stfd 0, 132(1) -; ASM32PWR4-NEXT: stw 4, 140(1) -; ASM32PWR4-NEXT: stw 3, 128(1) +; ASM32PWR4-NEXT: lwz 3, 152(1) +; ASM32PWR4-NEXT: stw 4, 136(1) +; ASM32PWR4-NEXT: stw 3, 132(1) ; ASM32PWR4-NEXT: bl .test_fpr_stack ; ASM32PWR4-NEXT: nop -; ASM32PWR4-NEXT: addi 1, 1, 144 +; ASM32PWR4-NEXT: addi 1, 1, 160 ; ASM32PWR4-NEXT: lwz 0, 8(1) ; ASM32PWR4-NEXT: mtlr 0 ; ASM32PWR4-NEXT: blr @@ -1667,7 +1679,6 @@ define void @caller_fpr_stack() { ; ASM64PWR4-NEXT: lis 7, 16313 ; ASM64PWR4-NEXT: lwz 3, 0(3) ; ASM64PWR4-NEXT: ld 4, 0(4) -; ASM64PWR4-NEXT: lwz 5, 0(5) ; ASM64PWR4-NEXT: stw 3, 152(1) ; ASM64PWR4-NEXT: ld 3, L..C22(2) # %const.0 ; ASM64PWR4-NEXT: std 4, 160(1) @@ -1686,6 +1697,7 @@ define void @caller_fpr_stack() { ; ASM64PWR4-NEXT: ld 4, L..C29(2) # %const.7 ; ASM64PWR4-NEXT: lfd 9, 0(3) ; ASM64PWR4-NEXT: ld 3, L..C30(2) # %const.8 +; ASM64PWR4-NEXT: lwz 5, 0(5) ; ASM64PWR4-NEXT: lfd 1, 0(4) ; ASM64PWR4-NEXT: lis 4, 16320 ; ASM64PWR4-NEXT: ori 4, 4, 41943 diff --git a/llvm/test/CodeGen/PowerPC/aix-cc-byval-mir.ll b/llvm/test/CodeGen/PowerPC/aix-cc-byval-mir.ll index 67800df6ed4b..95fed680e696 100644 --- a/llvm/test/CodeGen/PowerPC/aix-cc-byval-mir.ll +++ b/llvm/test/CodeGen/PowerPC/aix-cc-byval-mir.ll @@ -134,7 +134,7 @@ define void @call_test_byval_3Byte() { ; 32BIT-NEXT: ADJCALLSTACKDOWN 60, 0, implicit-def dead $r1, implicit $r1 ; 32BIT-NEXT: renamable $r3 = LI 42 ; 32BIT-NEXT: renamable $r4 = LWZtoc @gS3, $r2 :: (load (s32) from got) - ; 32BIT-NEXT: STW killed renamable $r3, 56, $r1 :: (store (s32)) + ; 32BIT-NEXT: STW killed renamable $r3, 56, $r1 :: (store (s32) into stack + 56, align 8, basealign 16) ; 32BIT-NEXT: renamable $r3 = LBZ 2, renamable $r4 :: (load (s8)) ; 32BIT-NEXT: renamable $r4 = LHZ 0, killed renamable $r4 :: (load (s16)) ; 32BIT-NEXT: renamable $r10 = RLWINM killed renamable $r3, 8, 16, 23 @@ -155,7 +155,7 @@ define void @call_test_byval_3Byte() { ; 64BIT-NEXT: ADJCALLSTACKDOWN 120, 0, implicit-def dead $r1, implicit $r1 ; 64BIT-NEXT: renamable $x3 = LI8 42 ; 64BIT-NEXT: renamable $x4 = LDtoc @gS3, $x2 :: (load (s64) from got) - ; 64BIT-NEXT: STD killed renamable $x3, 112, $x1 :: (store (s64)) + ; 64BIT-NEXT: STD killed renamable $x3, 112, $x1 :: (store (s64) into stack + 112, align 16) ; 64BIT-NEXT: renamable $x3 = LBZ8 2, renamable $x4 :: (load (s8)) ; 64BIT-NEXT: renamable $x4 = LHZ8 0, killed renamable $x4 :: (load (s16)) ; 64BIT-NEXT: renamable $x10 = RLDIC killed renamable $x3, 40, 16 diff --git a/llvm/test/CodeGen/PowerPC/aix-vec-arg-spills-mir.ll b/llvm/test/CodeGen/PowerPC/aix-vec-arg-spills-mir.ll index 7c45958a1c2f..7ee854c2ae22 100644 --- a/llvm/test/CodeGen/PowerPC/aix-vec-arg-spills-mir.ll +++ b/llvm/test/CodeGen/PowerPC/aix-vec-arg-spills-mir.ll @@ -34,17 +34,17 @@ define double @caller() { ; MIR32-NEXT: STXVW4X renamable $vsl0, $r1, killed renamable $r3 :: (store (s128), align 8) ; MIR32-NEXT: renamable $r3 = LI 104 ; MIR32-NEXT: STXVW4X renamable $vsl0, $r1, killed renamable $r4 :: (store (s128), align 8) - ; MIR32-NEXT: renamable $r4 = LI 88 ; MIR32-NEXT: STXVW4X renamable $vsl0, $r1, killed renamable $r3 :: (store (s128), align 8) - ; MIR32-NEXT: STXVW4X renamable $vsl0, $r1, killed renamable $r4 :: (store (s128), align 8) - ; MIR32-NEXT: renamable $r3 = LI 72 - ; MIR32-NEXT: renamable $r4 = LWZtoc %const.0, $r2 :: (load (s32) from got) - ; MIR32-NEXT: STXVW4X killed renamable $vsl0, $r1, killed renamable $r3 :: (store (s128), align 8) + ; MIR32-NEXT: renamable $r3 = LI 88 + ; MIR32-NEXT: renamable $r4 = LI 72 + ; MIR32-NEXT: STXVW4X renamable $vsl0, $r1, killed renamable $r3 :: (store (s128), align 8) ; MIR32-NEXT: renamable $r3 = LI 48 - ; MIR32-NEXT: renamable $vsl0 = LXVD2X $zero, killed renamable $r4 :: (load (s128) from constant-pool) + ; MIR32-NEXT: STXVW4X killed renamable $vsl0, $r1, killed renamable $r4 :: (store (s128), align 8) ; MIR32-NEXT: renamable $r4 = LI 512 - ; MIR32-NEXT: STXVD2X killed renamable $vsl0, $r1, killed renamable $r3 :: (store (s128)) - ; MIR32-NEXT: STW killed renamable $r4, 152, $r1 :: (store (s32)) + ; MIR32-NEXT: STW killed renamable $r4, 152, $r1 :: (store (s32) into stack + 152, align 8, basealign 16) + ; MIR32-NEXT: renamable $r4 = LWZtoc %const.0, $r2 :: (load (s32) from got) + ; MIR32-NEXT: renamable $vsl0 = LXVD2X $zero, killed renamable $r4 :: (load (s128) from constant-pool) + ; MIR32-NEXT: STXVD2X killed renamable $vsl0, $r1, killed renamable $r3 :: (store (s128) into stack + 48) ; MIR32-NEXT: $f1 = XXLXORdpz ; MIR32-NEXT: $f2 = XXLXORdpz ; MIR32-NEXT: $v2 = XXLXORz @@ -92,18 +92,18 @@ define double @caller() { ; MIR64-NEXT: ADJCALLSTACKDOWN 224, 0, implicit-def dead $r1, implicit $r1 ; MIR64-NEXT: renamable $vsl0 = XXLXORz ; MIR64-NEXT: renamable $x3 = LI8 160 - ; MIR64-NEXT: STXVW4X renamable $vsl0, $x1, killed renamable $x3 :: (store (s128), align 8) + ; MIR64-NEXT: STXVW4X renamable $vsl0, $x1, killed renamable $x3 :: (store (s128)) ; MIR64-NEXT: renamable $x3 = LI8 144 - ; MIR64-NEXT: STXVW4X renamable $vsl0, $x1, killed renamable $x3 :: (store (s128), align 8) + ; MIR64-NEXT: STXVW4X renamable $vsl0, $x1, killed renamable $x3 :: (store (s128)) ; MIR64-NEXT: renamable $x3 = LI8 128 - ; MIR64-NEXT: STXVW4X killed renamable $vsl0, $x1, killed renamable $x3 :: (store (s128), align 8) - ; MIR64-NEXT: renamable $x3 = LDtocCPT %const.0, $x2 :: (load (s64) from got) - ; MIR64-NEXT: renamable $vsl0 = LXVD2X $zero8, killed renamable $x3 :: (load (s128) from constant-pool) - ; MIR64-NEXT: renamable $x3 = LI8 80 - ; MIR64-NEXT: STXVD2X killed renamable $vsl0, $x1, killed renamable $x3 :: (store (s128)) + ; MIR64-NEXT: STXVW4X killed renamable $vsl0, $x1, killed renamable $x3 :: (store (s128)) ; MIR64-NEXT: renamable $x3 = LI8 512 - ; MIR64-NEXT: STD killed renamable $x3, 184, $x1 :: (store (s64)) - ; MIR64-NEXT: STD killed renamable $x4, 176, $x1 :: (store (s64)) + ; MIR64-NEXT: STD killed renamable $x3, 184, $x1 :: (store (s64) into stack + 184, basealign 16) + ; MIR64-NEXT: renamable $x3 = LI8 80 + ; MIR64-NEXT: STD killed renamable $x4, 176, $x1 :: (store (s64) into stack + 176, align 16) + ; MIR64-NEXT: renamable $x4 = LDtocCPT %const.0, $x2 :: (load (s64) from got) + ; MIR64-NEXT: renamable $vsl0 = LXVD2X $zero8, killed renamable $x4 :: (load (s128) from constant-pool) + ; MIR64-NEXT: STXVD2X killed renamable $vsl0, $x1, killed renamable $x3 :: (store (s128) into stack + 80) ; MIR64-NEXT: $f1 = XXLXORdpz ; MIR64-NEXT: $f2 = XXLXORdpz ; MIR64-NEXT: $v2 = XXLXORz diff --git a/llvm/test/CodeGen/PowerPC/aix-vec-arg-spills.ll b/llvm/test/CodeGen/PowerPC/aix-vec-arg-spills.ll index 66f88b4e3d5a..294f074807f1 100644 --- a/llvm/test/CodeGen/PowerPC/aix-vec-arg-spills.ll +++ b/llvm/test/CodeGen/PowerPC/aix-vec-arg-spills.ll @@ -25,42 +25,42 @@ define double @caller() { ; 32BIT-NEXT: stw 3, 184(1) ; 32BIT-NEXT: stw 3, 176(1) ; 32BIT-NEXT: stw 4, 172(1) -; 32BIT-NEXT: lis 4, 16368 ; 32BIT-NEXT: stw 3, 168(1) +; 32BIT-NEXT: lis 4, 16368 ; 32BIT-NEXT: stw 3, 160(1) -; 32BIT-NEXT: stw 4, 164(1) ; 32BIT-NEXT: stw 3, 156(1) ; 32BIT-NEXT: li 3, 136 +; 32BIT-NEXT: stw 4, 164(1) ; 32BIT-NEXT: li 4, 120 -; 32BIT-NEXT: xxlxor 2, 2, 2 ; 32BIT-NEXT: stxvw4x 0, 1, 3 ; 32BIT-NEXT: li 3, 104 ; 32BIT-NEXT: stxvw4x 0, 1, 4 -; 32BIT-NEXT: li 4, 88 +; 32BIT-NEXT: stxvw4x 0, 1, 3 +; 32BIT-NEXT: li 3, 88 +; 32BIT-NEXT: li 4, 72 +; 32BIT-NEXT: xxlxor 2, 2, 2 ; 32BIT-NEXT: stxvw4x 0, 1, 3 ; 32BIT-NEXT: stxvw4x 0, 1, 4 +; 32BIT-NEXT: li 4, 512 +; 32BIT-NEXT: stw 4, 152(1) ; 32BIT-NEXT: lwz 4, L..C0(2) # %const.0 -; 32BIT-NEXT: li 3, 72 -; 32BIT-NEXT: stxvw4x 0, 1, 3 ; 32BIT-NEXT: li 3, 48 ; 32BIT-NEXT: xxlxor 34, 34, 34 ; 32BIT-NEXT: xxlxor 35, 35, 35 -; 32BIT-NEXT: lxvd2x 0, 0, 4 -; 32BIT-NEXT: li 4, 512 ; 32BIT-NEXT: xxlxor 36, 36, 36 ; 32BIT-NEXT: xxlxor 37, 37, 37 ; 32BIT-NEXT: xxlxor 38, 38, 38 +; 32BIT-NEXT: lxvd2x 0, 0, 4 ; 32BIT-NEXT: xxlxor 39, 39, 39 ; 32BIT-NEXT: xxlxor 40, 40, 40 +; 32BIT-NEXT: li 4, 256 ; 32BIT-NEXT: xxlxor 41, 41, 41 ; 32BIT-NEXT: xxlxor 42, 42, 42 -; 32BIT-NEXT: stxvd2x 0, 1, 3 -; 32BIT-NEXT: stw 4, 152(1) -; 32BIT-NEXT: li 3, 128 -; 32BIT-NEXT: li 4, 256 ; 32BIT-NEXT: xxlxor 43, 43, 43 ; 32BIT-NEXT: xxlxor 44, 44, 44 ; 32BIT-NEXT: xxlxor 45, 45, 45 +; 32BIT-NEXT: stxvd2x 0, 1, 3 +; 32BIT-NEXT: li 3, 128 ; 32BIT-NEXT: xxlxor 3, 3, 3 ; 32BIT-NEXT: xxlxor 4, 4, 4 ; 32BIT-NEXT: xxlxor 5, 5, 5 @@ -114,23 +114,23 @@ define double @caller() { ; 64BIT-NEXT: li 3, 128 ; 64BIT-NEXT: xxlxor 43, 43, 43 ; 64BIT-NEXT: stxvw4x 0, 1, 3 -; 64BIT-NEXT: ld 3, L..C0(2) # %const.0 +; 64BIT-NEXT: std 4, 176(1) +; 64BIT-NEXT: ld 4, L..C0(2) # %const.0 +; 64BIT-NEXT: li 3, 512 ; 64BIT-NEXT: xxlxor 44, 44, 44 ; 64BIT-NEXT: xxlxor 45, 45, 45 -; 64BIT-NEXT: lxvd2x 0, 0, 3 +; 64BIT-NEXT: lxvd2x 0, 0, 4 +; 64BIT-NEXT: std 3, 184(1) ; 64BIT-NEXT: li 3, 80 +; 64BIT-NEXT: li 4, 256 ; 64BIT-NEXT: xxlxor 3, 3, 3 ; 64BIT-NEXT: xxlxor 4, 4, 4 ; 64BIT-NEXT: xxlxor 5, 5, 5 ; 64BIT-NEXT: stxvd2x 0, 1, 3 -; 64BIT-NEXT: li 3, 512 -; 64BIT-NEXT: std 4, 176(1) -; 64BIT-NEXT: li 4, 256 +; 64BIT-NEXT: li 3, 128 ; 64BIT-NEXT: xxlxor 6, 6, 6 ; 64BIT-NEXT: xxlxor 7, 7, 7 ; 64BIT-NEXT: xxlxor 8, 8, 8 -; 64BIT-NEXT: std 3, 184(1) -; 64BIT-NEXT: li 3, 128 ; 64BIT-NEXT: xxlxor 9, 9, 9 ; 64BIT-NEXT: xxlxor 10, 10, 10 ; 64BIT-NEXT: xxlxor 11, 11, 11 diff --git a/llvm/test/CodeGen/PowerPC/aix-vector-vararg-caller.ll b/llvm/test/CodeGen/PowerPC/aix-vector-vararg-caller.ll index 4697a093e5d6..0ba345fb5275 100644 --- a/llvm/test/CodeGen/PowerPC/aix-vector-vararg-caller.ll +++ b/llvm/test/CodeGen/PowerPC/aix-vector-vararg-caller.ll @@ -23,31 +23,31 @@ define <4 x i32> @caller() { ; 32BIT-NEXT: [[LWZtoc2:%[0-9]+]]:gprc = LWZtoc %const.2, $r2 :: (load (s32) from got) ; 32BIT-NEXT: [[LXVW4X2:%[0-9]+]]:vsrc = LXVW4X $zero, killed [[LWZtoc2]] :: (load (s128) from constant-pool) ; 32BIT-NEXT: [[LI2:%[0-9]+]]:gprc = LI 160 - ; 32BIT-NEXT: STXVW4X killed [[LXVW4X2]], $r1, killed [[LI2]] :: (store (s128)) + ; 32BIT-NEXT: STXVW4X killed [[LXVW4X2]], $r1, killed [[LI2]] :: (store (s128) into stack + 160) ; 32BIT-NEXT: [[LWZtoc3:%[0-9]+]]:gprc = LWZtoc %const.3, $r2 :: (load (s32) from got) ; 32BIT-NEXT: [[LXVW4X3:%[0-9]+]]:vsrc = LXVW4X $zero, killed [[LWZtoc3]] :: (load (s128) from constant-pool) ; 32BIT-NEXT: [[LI3:%[0-9]+]]:gprc = LI 144 - ; 32BIT-NEXT: STXVW4X killed [[LXVW4X3]], $r1, killed [[LI3]] :: (store (s128)) + ; 32BIT-NEXT: STXVW4X killed [[LXVW4X3]], $r1, killed [[LI3]] :: (store (s128) into stack + 144) ; 32BIT-NEXT: [[LWZtoc4:%[0-9]+]]:gprc = LWZtoc %const.4, $r2 :: (load (s32) from got) ; 32BIT-NEXT: [[LXVW4X4:%[0-9]+]]:vsrc = LXVW4X $zero, killed [[LWZtoc4]] :: (load (s128) from constant-pool) ; 32BIT-NEXT: [[LI4:%[0-9]+]]:gprc = LI 128 - ; 32BIT-NEXT: STXVW4X killed [[LXVW4X4]], $r1, killed [[LI4]] :: (store (s128)) + ; 32BIT-NEXT: STXVW4X killed [[LXVW4X4]], $r1, killed [[LI4]] :: (store (s128) into stack + 128) ; 32BIT-NEXT: [[LWZtoc5:%[0-9]+]]:gprc = LWZtoc %const.5, $r2 :: (load (s32) from got) ; 32BIT-NEXT: [[LXVW4X5:%[0-9]+]]:vsrc = LXVW4X $zero, killed [[LWZtoc5]] :: (load (s128) from constant-pool) ; 32BIT-NEXT: [[LI5:%[0-9]+]]:gprc = LI 112 - ; 32BIT-NEXT: STXVW4X killed [[LXVW4X5]], $r1, killed [[LI5]] :: (store (s128)) + ; 32BIT-NEXT: STXVW4X killed [[LXVW4X5]], $r1, killed [[LI5]] :: (store (s128) into stack + 112) ; 32BIT-NEXT: [[LWZtoc6:%[0-9]+]]:gprc = LWZtoc %const.6, $r2 :: (load (s32) from got) ; 32BIT-NEXT: [[LXVW4X6:%[0-9]+]]:vsrc = LXVW4X $zero, killed [[LWZtoc6]] :: (load (s128) from constant-pool) ; 32BIT-NEXT: [[LI6:%[0-9]+]]:gprc = LI 96 - ; 32BIT-NEXT: STXVW4X killed [[LXVW4X6]], $r1, killed [[LI6]] :: (store (s128)) + ; 32BIT-NEXT: STXVW4X killed [[LXVW4X6]], $r1, killed [[LI6]] :: (store (s128) into stack + 96) ; 32BIT-NEXT: [[LWZtoc7:%[0-9]+]]:gprc = LWZtoc %const.7, $r2 :: (load (s32) from got) ; 32BIT-NEXT: [[LXVW4X7:%[0-9]+]]:vsrc = LXVW4X $zero, killed [[LWZtoc7]] :: (load (s128) from constant-pool) ; 32BIT-NEXT: [[LI7:%[0-9]+]]:gprc = LI 80 - ; 32BIT-NEXT: STXVW4X killed [[LXVW4X7]], $r1, killed [[LI7]] :: (store (s128)) + ; 32BIT-NEXT: STXVW4X killed [[LXVW4X7]], $r1, killed [[LI7]] :: (store (s128) into stack + 80) ; 32BIT-NEXT: [[LWZtoc8:%[0-9]+]]:gprc = LWZtoc %const.8, $r2 :: (load (s32) from got) ; 32BIT-NEXT: [[LXVW4X8:%[0-9]+]]:vsrc = LXVW4X $zero, killed [[LWZtoc8]] :: (load (s128) from constant-pool) ; 32BIT-NEXT: [[LI8:%[0-9]+]]:gprc = LI 64 - ; 32BIT-NEXT: STXVW4X killed [[LXVW4X8]], $r1, killed [[LI8]] :: (store (s128)) + ; 32BIT-NEXT: STXVW4X killed [[LXVW4X8]], $r1, killed [[LI8]] :: (store (s128) into stack + 64) ; 32BIT-NEXT: [[LWZ:%[0-9]+]]:gprc = LWZ 52, $r1 :: (load (s32)) ; 32BIT-NEXT: [[LWZ1:%[0-9]+]]:gprc = LWZ 48, $r1 :: (load (s32)) ; 32BIT-NEXT: [[LWZ2:%[0-9]+]]:gprc = LWZ 44, $r1 :: (load (s32)) @@ -86,27 +86,27 @@ define <4 x i32> @caller() { ; 64BIT-NEXT: [[LDtocCPT3:%[0-9]+]]:g8rc = LDtocCPT %const.3, $x2 :: (load (s64) from got) ; 64BIT-NEXT: [[LXVW4X3:%[0-9]+]]:vsrc = LXVW4X $zero8, killed [[LDtocCPT3]] :: (load (s128) from constant-pool) ; 64BIT-NEXT: [[LI8_3:%[0-9]+]]:g8rc = LI8 192 - ; 64BIT-NEXT: STXVW4X killed [[LXVW4X3]], $x1, killed [[LI8_3]] :: (store (s128)) + ; 64BIT-NEXT: STXVW4X killed [[LXVW4X3]], $x1, killed [[LI8_3]] :: (store (s128) into stack + 192) ; 64BIT-NEXT: [[LDtocCPT4:%[0-9]+]]:g8rc = LDtocCPT %const.4, $x2 :: (load (s64) from got) ; 64BIT-NEXT: [[LXVW4X4:%[0-9]+]]:vsrc = LXVW4X $zero8, killed [[LDtocCPT4]] :: (load (s128) from constant-pool) ; 64BIT-NEXT: [[LI8_4:%[0-9]+]]:g8rc = LI8 176 - ; 64BIT-NEXT: STXVW4X killed [[LXVW4X4]], $x1, killed [[LI8_4]] :: (store (s128)) + ; 64BIT-NEXT: STXVW4X killed [[LXVW4X4]], $x1, killed [[LI8_4]] :: (store (s128) into stack + 176) ; 64BIT-NEXT: [[LDtocCPT5:%[0-9]+]]:g8rc = LDtocCPT %const.5, $x2 :: (load (s64) from got) ; 64BIT-NEXT: [[LXVW4X5:%[0-9]+]]:vsrc = LXVW4X $zero8, killed [[LDtocCPT5]] :: (load (s128) from constant-pool) ; 64BIT-NEXT: [[LI8_5:%[0-9]+]]:g8rc = LI8 160 - ; 64BIT-NEXT: STXVW4X killed [[LXVW4X5]], $x1, killed [[LI8_5]] :: (store (s128)) + ; 64BIT-NEXT: STXVW4X killed [[LXVW4X5]], $x1, killed [[LI8_5]] :: (store (s128) into stack + 160) ; 64BIT-NEXT: [[LDtocCPT6:%[0-9]+]]:g8rc = LDtocCPT %const.6, $x2 :: (load (s64) from got) ; 64BIT-NEXT: [[LXVW4X6:%[0-9]+]]:vsrc = LXVW4X $zero8, killed [[LDtocCPT6]] :: (load (s128) from constant-pool) ; 64BIT-NEXT: [[LI8_6:%[0-9]+]]:g8rc = LI8 144 - ; 64BIT-NEXT: STXVW4X killed [[LXVW4X6]], $x1, killed [[LI8_6]] :: (store (s128)) + ; 64BIT-NEXT: STXVW4X killed [[LXVW4X6]], $x1, killed [[LI8_6]] :: (store (s128) into stack + 144) ; 64BIT-NEXT: [[LDtocCPT7:%[0-9]+]]:g8rc = LDtocCPT %const.7, $x2 :: (load (s64) from got) ; 64BIT-NEXT: [[LXVW4X7:%[0-9]+]]:vsrc = LXVW4X $zero8, killed [[LDtocCPT7]] :: (load (s128) from constant-pool) ; 64BIT-NEXT: [[LI8_7:%[0-9]+]]:g8rc = LI8 128 - ; 64BIT-NEXT: STXVW4X killed [[LXVW4X7]], $x1, killed [[LI8_7]] :: (store (s128)) + ; 64BIT-NEXT: STXVW4X killed [[LXVW4X7]], $x1, killed [[LI8_7]] :: (store (s128) into stack + 128) ; 64BIT-NEXT: [[LDtocCPT8:%[0-9]+]]:g8rc = LDtocCPT %const.8, $x2 :: (load (s64) from got) ; 64BIT-NEXT: [[LXVW4X8:%[0-9]+]]:vsrc = LXVW4X $zero8, killed [[LDtocCPT8]] :: (load (s128) from constant-pool) ; 64BIT-NEXT: [[LI8_8:%[0-9]+]]:g8rc = LI8 112 - ; 64BIT-NEXT: STXVW4X killed [[LXVW4X8]], $x1, killed [[LI8_8]] :: (store (s128)) + ; 64BIT-NEXT: STXVW4X killed [[LXVW4X8]], $x1, killed [[LI8_8]] :: (store (s128) into stack + 112) ; 64BIT-NEXT: [[LD:%[0-9]+]]:g8rc = LD 104, $x1 :: (load (s64)) ; 64BIT-NEXT: [[LD1:%[0-9]+]]:g8rc = LD 96, $x1 :: (load (s64)) ; 64BIT-NEXT: [[LD2:%[0-9]+]]:g8rc = LD 88, $x1 :: (load (s64)) diff --git a/llvm/test/CodeGen/PowerPC/aix-vector-vararg-fixed-caller.ll b/llvm/test/CodeGen/PowerPC/aix-vector-vararg-fixed-caller.ll index fad275f58cd0..b39a94e17563 100644 --- a/llvm/test/CodeGen/PowerPC/aix-vector-vararg-fixed-caller.ll +++ b/llvm/test/CodeGen/PowerPC/aix-vector-vararg-fixed-caller.ll @@ -16,13 +16,13 @@ define void @caller() { ; 32BIT-NEXT: [[LWZtoc:%[0-9]+]]:gprc = LWZtoc %const.0, $r2 :: (load (s32) from got) ; 32BIT-NEXT: [[LXVW4X:%[0-9]+]]:vsrc = LXVW4X $zero, killed [[LWZtoc]] :: (load (s128) from constant-pool) ; 32BIT-NEXT: [[LI:%[0-9]+]]:gprc = LI 64 - ; 32BIT-NEXT: STXVW4X killed [[LXVW4X]], $r1, killed [[LI]] :: (store (s128)) + ; 32BIT-NEXT: STXVW4X killed [[LXVW4X]], $r1, killed [[LI]] :: (store (s128) into stack + 64) ; 32BIT-NEXT: [[LIS:%[0-9]+]]:gprc = LIS 38314 ; 32BIT-NEXT: [[ORI:%[0-9]+]]:gprc = ORI killed [[LIS]], 63376 - ; 32BIT-NEXT: STW killed [[ORI]], 84, $r1 :: (store (s32) into unknown-address + 4, basealign 8) + ; 32BIT-NEXT: STW killed [[ORI]], 84, $r1 :: (store (s32) into stack + 84, basealign 16) ; 32BIT-NEXT: [[LIS1:%[0-9]+]]:gprc = LIS 16389 ; 32BIT-NEXT: [[ORI1:%[0-9]+]]:gprc = ORI killed [[LIS1]], 48905 - ; 32BIT-NEXT: STW killed [[ORI1]], 80, $r1 :: (store (s32), align 8) + ; 32BIT-NEXT: STW killed [[ORI1]], 80, $r1 :: (store (s32) into stack + 80, align 16) ; 32BIT-NEXT: [[LWZtoc1:%[0-9]+]]:gprc = LWZtoc %const.1, $r2 :: (load (s32) from got) ; 32BIT-NEXT: [[LXVW4X1:%[0-9]+]]:vsrc = LXVW4X $zero, killed [[LWZtoc1]] :: (load (s128) from constant-pool) ; 32BIT-NEXT: [[LWZtoc2:%[0-9]+]]:gprc_and_gprc_nor0 = LWZtoc %const.2, $r2 :: (load (s32) from got) @@ -57,7 +57,7 @@ define void @caller() { ; 64BIT-NEXT: [[RLDIC:%[0-9]+]]:g8rc = RLDIC killed [[ORI8_]], 32, 1 ; 64BIT-NEXT: [[ORIS8_:%[0-9]+]]:g8rc = ORIS8 killed [[RLDIC]], 38314 ; 64BIT-NEXT: [[ORI8_1:%[0-9]+]]:g8rc = ORI8 killed [[ORIS8_]], 63376 - ; 64BIT-NEXT: STD killed [[ORI8_1]], 112, $x1 :: (store (s64)) + ; 64BIT-NEXT: STD killed [[ORI8_1]], 112, $x1 :: (store (s64) into stack + 112, align 16) ; 64BIT-NEXT: [[LDtocCPT1:%[0-9]+]]:g8rc = LDtocCPT %const.1, $x2 :: (load (s64) from got) ; 64BIT-NEXT: [[LXVW4X1:%[0-9]+]]:vsrc = LXVW4X $zero8, killed [[LDtocCPT1]] :: (load (s128) from constant-pool) ; 64BIT-NEXT: [[LD:%[0-9]+]]:g8rc = LD 104, $x1 :: (load (s64)) -- cgit v1.2.3 From c16dc63b44ae039f2ac123a8ffbc90031767d00b Mon Sep 17 00:00:00 2001 From: Mikael Holmen Date: Wed, 18 Jun 2025 09:23:25 +0200 Subject: [OMPIRBuilder] Fix gcc -Wparentheses warning [NFC] Without this gcc warned like /repo/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp:7559:68: warning: suggest parentheses around '&&' within '||' [-Wparentheses] 7559 | NumStaleCIArgs == (OffloadingArraysToPrivatize.size() + 2) && | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~^~ 7560 | "Wrong number of arguments for StaleCI when shareds are present"); | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ --- llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp index 7cbbbff511c8..ddc9c5392f92 100644 --- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp +++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp @@ -7558,10 +7558,9 @@ OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::emitTargetTask( // StaleCI is exactly OffloadingArraysToPrivatize.size() + 2 const unsigned int NumStaleCIArgs = StaleCI->arg_size(); bool HasShareds = NumStaleCIArgs > OffloadingArraysToPrivatize.size() + 1; - assert( - !HasShareds || - NumStaleCIArgs == (OffloadingArraysToPrivatize.size() + 2) && - "Wrong number of arguments for StaleCI when shareds are present"); + assert((!HasShareds || + NumStaleCIArgs == (OffloadingArraysToPrivatize.size() + 2)) && + "Wrong number of arguments for StaleCI when shareds are present"); int SharedArgOperandNo = HasShareds ? OffloadingArraysToPrivatize.size() + 1 : 0; -- cgit v1.2.3 From 669627d0c77ed8408358bc8c5973255fe28a36ea Mon Sep 17 00:00:00 2001 From: Philipp Jung Date: Wed, 18 Jun 2025 11:02:53 +0200 Subject: Add check 'cppcoreguidelines-use-enum-class' (#138282) Warn on non-class enum definitions as suggested by the Core Guidelines: https://isocpp.github.io/CppCoreGuidelines/CppCoreGuidelines#Renum-class --- .../clang-tidy/cppcoreguidelines/CMakeLists.txt | 1 + .../CppCoreGuidelinesTidyModule.cpp | 3 ++ .../cppcoreguidelines/UseEnumClassCheck.cpp | 42 +++++++++++++++ .../cppcoreguidelines/UseEnumClassCheck.h | 40 ++++++++++++++ clang-tools-extra/docs/ReleaseNotes.rst | 6 +++ .../checks/cppcoreguidelines/use-enum-class.rst | 35 ++++++++++++ clang-tools-extra/docs/clang-tidy/checks/list.rst | 1 + .../checkers/cppcoreguidelines/use-enum-class.cpp | 62 ++++++++++++++++++++++ 8 files changed, 190 insertions(+) create mode 100644 clang-tools-extra/clang-tidy/cppcoreguidelines/UseEnumClassCheck.cpp create mode 100644 clang-tools-extra/clang-tidy/cppcoreguidelines/UseEnumClassCheck.h create mode 100644 clang-tools-extra/docs/clang-tidy/checks/cppcoreguidelines/use-enum-class.rst create mode 100644 clang-tools-extra/test/clang-tidy/checkers/cppcoreguidelines/use-enum-class.cpp diff --git a/clang-tools-extra/clang-tidy/cppcoreguidelines/CMakeLists.txt b/clang-tools-extra/clang-tidy/cppcoreguidelines/CMakeLists.txt index b023f76a2543..2fb4d7f1d734 100644 --- a/clang-tools-extra/clang-tidy/cppcoreguidelines/CMakeLists.txt +++ b/clang-tools-extra/clang-tidy/cppcoreguidelines/CMakeLists.txt @@ -33,6 +33,7 @@ add_clang_library(clangTidyCppCoreGuidelinesModule STATIC RvalueReferenceParamNotMovedCheck.cpp SlicingCheck.cpp SpecialMemberFunctionsCheck.cpp + UseEnumClassCheck.cpp VirtualClassDestructorCheck.cpp LINK_LIBS diff --git a/clang-tools-extra/clang-tidy/cppcoreguidelines/CppCoreGuidelinesTidyModule.cpp b/clang-tools-extra/clang-tidy/cppcoreguidelines/CppCoreGuidelinesTidyModule.cpp index 4dd9b0904f07..4b3b7bf963fd 100644 --- a/clang-tools-extra/clang-tidy/cppcoreguidelines/CppCoreGuidelinesTidyModule.cpp +++ b/clang-tools-extra/clang-tidy/cppcoreguidelines/CppCoreGuidelinesTidyModule.cpp @@ -48,6 +48,7 @@ #include "RvalueReferenceParamNotMovedCheck.h" #include "SlicingCheck.h" #include "SpecialMemberFunctionsCheck.h" +#include "UseEnumClassCheck.h" #include "VirtualClassDestructorCheck.h" namespace clang::tidy { @@ -131,6 +132,8 @@ public: CheckFactories.registerCheck("cppcoreguidelines-slicing"); CheckFactories.registerCheck( "cppcoreguidelines-use-default-member-init"); + CheckFactories.registerCheck( + "cppcoreguidelines-use-enum-class"); CheckFactories.registerCheck( "cppcoreguidelines-c-copy-assignment-signature"); CheckFactories.registerCheck( diff --git a/clang-tools-extra/clang-tidy/cppcoreguidelines/UseEnumClassCheck.cpp b/clang-tools-extra/clang-tidy/cppcoreguidelines/UseEnumClassCheck.cpp new file mode 100644 index 000000000000..ec7d9237afa3 --- /dev/null +++ b/clang-tools-extra/clang-tidy/cppcoreguidelines/UseEnumClassCheck.cpp @@ -0,0 +1,42 @@ +//===--- UseEnumClassCheck.cpp - clang-tidy -------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "UseEnumClassCheck.h" +#include "clang/ASTMatchers/ASTMatchFinder.h" + +using namespace clang::ast_matchers; + +namespace clang::tidy::cppcoreguidelines { + +UseEnumClassCheck::UseEnumClassCheck(StringRef Name, ClangTidyContext *Context) + : ClangTidyCheck(Name, Context), + IgnoreUnscopedEnumsInClasses( + Options.get("IgnoreUnscopedEnumsInClasses", false)) {} + +void UseEnumClassCheck::storeOptions(ClangTidyOptions::OptionMap &Opts) { + Options.store(Opts, "IgnoreUnscopedEnumsInClasses", + IgnoreUnscopedEnumsInClasses); +} + +void UseEnumClassCheck::registerMatchers(MatchFinder *Finder) { + auto EnumDecl = + IgnoreUnscopedEnumsInClasses + ? enumDecl(unless(isScoped()), unless(hasParent(recordDecl()))) + : enumDecl(unless(isScoped())); + Finder->addMatcher(EnumDecl.bind("unscoped_enum"), this); +} + +void UseEnumClassCheck::check(const MatchFinder::MatchResult &Result) { + const auto *UnscopedEnum = Result.Nodes.getNodeAs("unscoped_enum"); + + diag(UnscopedEnum->getLocation(), + "enum %0 is unscoped, use 'enum class' instead") + << UnscopedEnum; +} + +} // namespace clang::tidy::cppcoreguidelines diff --git a/clang-tools-extra/clang-tidy/cppcoreguidelines/UseEnumClassCheck.h b/clang-tools-extra/clang-tidy/cppcoreguidelines/UseEnumClassCheck.h new file mode 100644 index 000000000000..dfa4b7e3fda6 --- /dev/null +++ b/clang-tools-extra/clang-tidy/cppcoreguidelines/UseEnumClassCheck.h @@ -0,0 +1,40 @@ +//===--- UseEnumClassCheck.h - clang-tidy -----------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_CPPCOREGUIDELINES_USEENUMCLASSCHECK_H +#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_CPPCOREGUIDELINES_USEENUMCLASSCHECK_H + +#include "../ClangTidyCheck.h" + +namespace clang::tidy::cppcoreguidelines { + +/// Finds unscoped (non-class) enum declarations and suggests using enum class +/// instead. +/// +/// For the user-facing documentation see: +/// http://clang.llvm.org/extra/clang-tidy/checks/cppcoreguidelines/use-enum-class.html +class UseEnumClassCheck : public ClangTidyCheck { +public: + UseEnumClassCheck(StringRef Name, ClangTidyContext *Context); + void storeOptions(ClangTidyOptions::OptionMap &Opts) override; + void registerMatchers(ast_matchers::MatchFinder *Finder) override; + void check(const ast_matchers::MatchFinder::MatchResult &Result) override; + bool isLanguageVersionSupported(const LangOptions &LangOpts) const override { + return LangOpts.CPlusPlus11; + } + std::optional getCheckTraversalKind() const override { + return TraversalKind::TK_IgnoreUnlessSpelledInSource; + } + +private: + const bool IgnoreUnscopedEnumsInClasses; +}; + +} // namespace clang::tidy::cppcoreguidelines + +#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_CPPCOREGUIDELINES_USEENUMCLASSCHECK_H diff --git a/clang-tools-extra/docs/ReleaseNotes.rst b/clang-tools-extra/docs/ReleaseNotes.rst index 3c1ca2f92904..7c0c534dbc73 100644 --- a/clang-tools-extra/docs/ReleaseNotes.rst +++ b/clang-tools-extra/docs/ReleaseNotes.rst @@ -136,6 +136,12 @@ New checks Finds unintended character output from ``unsigned char`` and ``signed char`` to an ``ostream``. +- New :doc:`cppcoreguidelines-use-enum-class + ` check. + + Finds unscoped (non-class) ``enum`` declarations and suggests using + ``enum class`` instead. + - New :doc:`portability-avoid-pragma-once ` check. diff --git a/clang-tools-extra/docs/clang-tidy/checks/cppcoreguidelines/use-enum-class.rst b/clang-tools-extra/docs/clang-tidy/checks/cppcoreguidelines/use-enum-class.rst new file mode 100644 index 000000000000..9e9f4c99dc24 --- /dev/null +++ b/clang-tools-extra/docs/clang-tidy/checks/cppcoreguidelines/use-enum-class.rst @@ -0,0 +1,35 @@ +.. title:: clang-tidy - cppcoreguidelines-use-enum-class + +cppcoreguidelines-use-enum-class +================================ + +Finds unscoped (non-class) ``enum`` declarations and suggests using +``enum class`` instead. + +This check implements `Enum.3 +`_ +from the C++ Core Guidelines." + +Example: + +.. code-block:: c++ + + enum E {}; // use "enum class E {};" instead + enum class E {}; // OK + + struct S { + enum E {}; // use "enum class E {};" instead + // OK with option IgnoreUnscopedEnumsInClasses + }; + + namespace N { + enum E {}; // use "enum class E {};" instead + } + +Options +------- + +.. option:: IgnoreUnscopedEnumsInClasses + + When `true`, ignores unscoped ``enum`` declarations in classes. + Default is `false`. diff --git a/clang-tools-extra/docs/clang-tidy/checks/list.rst b/clang-tools-extra/docs/clang-tidy/checks/list.rst index 5a79d61b1fd7..ccb78ee45e9c 100644 --- a/clang-tools-extra/docs/clang-tidy/checks/list.rst +++ b/clang-tools-extra/docs/clang-tidy/checks/list.rst @@ -212,6 +212,7 @@ Clang-Tidy Checks :doc:`cppcoreguidelines-rvalue-reference-param-not-moved `, :doc:`cppcoreguidelines-slicing `, :doc:`cppcoreguidelines-special-member-functions `, + :doc:`cppcoreguidelines-use-enum-class `, :doc:`cppcoreguidelines-virtual-class-destructor `, "Yes" :doc:`darwin-avoid-spinlock `, :doc:`darwin-dispatch-once-nonstatic `, "Yes" diff --git a/clang-tools-extra/test/clang-tidy/checkers/cppcoreguidelines/use-enum-class.cpp b/clang-tools-extra/test/clang-tidy/checkers/cppcoreguidelines/use-enum-class.cpp new file mode 100644 index 000000000000..f53d787f80ef --- /dev/null +++ b/clang-tools-extra/test/clang-tidy/checkers/cppcoreguidelines/use-enum-class.cpp @@ -0,0 +1,62 @@ +// RUN: %check_clang_tidy -std=c++11-or-later -check-suffix=ALL,DEFAULT %s \ +// RUN: cppcoreguidelines-use-enum-class %t -- + +// RUN: %check_clang_tidy -std=c++11-or-later -check-suffix=ALL %s \ +// RUN: cppcoreguidelines-use-enum-class %t -- \ +// RUN: -config="{CheckOptions: { \ +// RUN: cppcoreguidelines-use-enum-class.IgnoreUnscopedEnumsInClasses: true \ +// RUN: }}" -- + +enum E {}; +// CHECK-MESSAGES-ALL: :[[@LINE-1]]:6: warning: enum 'E' is unscoped, use 'enum class' instead + +enum class EC {}; + +enum struct ES {}; + +struct S { + enum E {}; + // CHECK-MESSAGES-DEFAULT: :[[@LINE-1]]:8: warning: enum 'E' is unscoped, use 'enum class' instead + enum class EC {}; +}; + +class C { + enum E {}; + // CHECK-MESSAGES-DEFAULT: :[[@LINE-1]]:8: warning: enum 'E' is unscoped, use 'enum class' instead + enum class EC {}; +}; + +template +class TC { + enum E {}; + // CHECK-MESSAGES-DEFAULT: :[[@LINE-1]]:8: warning: enum 'E' is unscoped, use 'enum class' instead + enum class EC {}; +}; + +union U { + enum E {}; + // CHECK-MESSAGES-DEFAULT: :[[@LINE-1]]:8: warning: enum 'E' is unscoped, use 'enum class' instead + enum class EC {}; +}; + +namespace { +enum E {}; +// CHECK-MESSAGES-ALL: :[[@LINE-1]]:6: warning: enum 'E' is unscoped, use 'enum class' instead +enum class EC {}; +} // namespace + +namespace N { +enum E {}; +// CHECK-MESSAGES-ALL: :[[@LINE-1]]:6: warning: enum 'E' is unscoped, use 'enum class' instead +enum class EC {}; +} // namespace N + +template +static void foo(); + +enum ForwardE : int; +// CHECK-MESSAGES-ALL: :[[@LINE-1]]:6: warning: enum 'ForwardE' is unscoped, use 'enum class' instead + +enum class ForwardEC : int; + +enum struct ForwardES : int; -- cgit v1.2.3 From 43e1a5a411d972fe06a1afb86ffd5ba21fd2a376 Mon Sep 17 00:00:00 2001 From: Frank Schlimbach Date: Wed, 18 Jun 2025 11:06:48 +0200 Subject: [mlir][mesh] adding option for traversal order in sharding propagation (#144079) The traversal order in sharding propagation was hard-coded. This PR provides options to the pass to select a suitable order - forward-only - backward-only - forward-backward - backward-forward Default is the previous behavior (backward-forward). --- mlir/include/mlir/Dialect/Mesh/IR/MeshOps.h | 3 -- mlir/include/mlir/Dialect/Mesh/Transforms/Passes.h | 12 ++++++ .../include/mlir/Dialect/Mesh/Transforms/Passes.td | 15 +++++++ mlir/lib/Dialect/Mesh/IR/MeshOps.cpp | 27 ++++++------ .../Mesh/Transforms/ShardingPropagation.cpp | 42 +++++++++++++------ .../Mesh/backward-sharding-propagation.mlir | 26 ++++++++++++ .../forward-backward-sharding-propagation.mlir | 27 ++++++++++++ .../Dialect/Mesh/forward-sharding-propagation.mlir | 49 ++++++++++++++++++++++ 8 files changed, 173 insertions(+), 28 deletions(-) create mode 100644 mlir/test/Dialect/Mesh/backward-sharding-propagation.mlir create mode 100644 mlir/test/Dialect/Mesh/forward-backward-sharding-propagation.mlir create mode 100644 mlir/test/Dialect/Mesh/forward-sharding-propagation.mlir diff --git a/mlir/include/mlir/Dialect/Mesh/IR/MeshOps.h b/mlir/include/mlir/Dialect/Mesh/IR/MeshOps.h index 32c2eca2cefa..3878505f8f93 100644 --- a/mlir/include/mlir/Dialect/Mesh/IR/MeshOps.h +++ b/mlir/include/mlir/Dialect/Mesh/IR/MeshOps.h @@ -206,9 +206,6 @@ Type shardType(Type type, MeshOp mesh, MeshSharding sharding); // Use newShardOp if it is not null. Otherwise create a new one. // May insert resharding if required. // Potentially updates newShardOp. -void maybeInsertTargetShardingAnnotation(MeshSharding sharding, - OpOperand &operand, OpBuilder &builder, - ShardOp &newShardOp); void maybeInsertTargetShardingAnnotation(MeshSharding sharding, OpResult result, OpBuilder &builder); void maybeInsertSourceShardingAnnotation(MeshSharding sharding, diff --git a/mlir/include/mlir/Dialect/Mesh/Transforms/Passes.h b/mlir/include/mlir/Dialect/Mesh/Transforms/Passes.h index 83399d10beaa..a2424d43a8ba 100644 --- a/mlir/include/mlir/Dialect/Mesh/Transforms/Passes.h +++ b/mlir/include/mlir/Dialect/Mesh/Transforms/Passes.h @@ -19,6 +19,18 @@ class FuncOp; namespace mesh { +/// This enum controls the traversal order for the sharding propagation. +enum class TraversalOrder { + /// Forward traversal. + Forward, + /// Backward traversal. + Backward, + /// Forward then backward traversal. + ForwardBackward, + /// Backward then forward traversal. + BackwardForward +}; + //===----------------------------------------------------------------------===// // Passes //===----------------------------------------------------------------------===// diff --git a/mlir/include/mlir/Dialect/Mesh/Transforms/Passes.td b/mlir/include/mlir/Dialect/Mesh/Transforms/Passes.td index 06ebf151e7d6..11ec7e78cd5e 100644 --- a/mlir/include/mlir/Dialect/Mesh/Transforms/Passes.td +++ b/mlir/include/mlir/Dialect/Mesh/Transforms/Passes.td @@ -24,6 +24,21 @@ def ShardingPropagation : InterfacePass<"sharding-propagation", "mlir::FunctionO operation, and the operations themselves are added with sharding option attributes. }]; + let options = [ + Option<"traversal", "traversal", + "mlir::mesh::TraversalOrder", /*default=*/"mlir::mesh::TraversalOrder::BackwardForward", + "Traversal order to use for sharding propagation:", + [{::llvm::cl::values( + clEnumValN(mlir::mesh::TraversalOrder::Forward, "forward", + "Forward only traversal."), + clEnumValN(mlir::mesh::TraversalOrder::Backward, "backward", + "backward only traversal."), + clEnumValN(mlir::mesh::TraversalOrder::ForwardBackward, "forward-backward", + "forward-backward traversal."), + clEnumValN(mlir::mesh::TraversalOrder::BackwardForward, "backward-forward", + "backward-forward traversal.") + )}]>, + ]; let dependentDialects = [ "mesh::MeshDialect" ]; diff --git a/mlir/lib/Dialect/Mesh/IR/MeshOps.cpp b/mlir/lib/Dialect/Mesh/IR/MeshOps.cpp index 304cb55a3508..a2c2d1a7470c 100644 --- a/mlir/lib/Dialect/Mesh/IR/MeshOps.cpp +++ b/mlir/lib/Dialect/Mesh/IR/MeshOps.cpp @@ -275,13 +275,12 @@ Type mesh::shardType(Type type, MeshOp mesh, MeshSharding sharding) { return type; } -void mlir::mesh::maybeInsertTargetShardingAnnotation(MeshSharding sharding, - OpOperand &operand, - OpBuilder &builder, - ShardOp &newShardOp) { +static void maybeInsertTargetShardingAnnotationImpl(MeshSharding sharding, + Value &operandValue, + Operation *operandOp, + OpBuilder &builder, + ShardOp &newShardOp) { OpBuilder::InsertionGuard insertionGuard(builder); - Value operandValue = operand.get(); - Operation *operandOp = operand.getOwner(); builder.setInsertionPointAfterValue(operandValue); ShardOp shardOp = dyn_cast(operandOp); if (shardOp && sharding == shardOp.getSharding() && @@ -300,9 +299,8 @@ void mlir::mesh::maybeInsertTargetShardingAnnotation(MeshSharding sharding, builder.create(operandValue.getLoc(), operandValue, shardingOp, /*annotate_for_users*/ false); } - IRRewriter rewriter(builder); - rewriter.replaceUsesWithIf( - operandValue, newShardOp, [operandOp, operandValue](OpOperand &use) { + operandValue.replaceUsesWithIf( + newShardOp, [operandOp, operandValue](OpOperand &use) { return use.getOwner() == operandOp && use.get() == operandValue; }); @@ -313,15 +311,20 @@ void mlir::mesh::maybeInsertTargetShardingAnnotation(MeshSharding sharding, auto newShardOp2 = builder.create(operandValue.getLoc(), newShardOp, newShardOp.getSharding(), /*annotate_for_users*/ true); - rewriter.replaceAllUsesExcept(newShardOp, newShardOp2, newShardOp2); + newShardOp.getResult().replaceAllUsesExcept(newShardOp2, newShardOp2); } void mlir::mesh::maybeInsertTargetShardingAnnotation(MeshSharding sharding, OpResult result, OpBuilder &builder) { ShardOp newShardOp; - for (auto &use : llvm::make_early_inc_range(result.getUses())) { - maybeInsertTargetShardingAnnotation(sharding, use, builder, newShardOp); + SmallVector> uses; + for (auto &use : result.getUses()) { + uses.emplace_back(use.get(), use.getOwner()); + } + for (auto &[operandValue, operandOp] : uses) { + maybeInsertTargetShardingAnnotationImpl(sharding, operandValue, operandOp, + builder, newShardOp); } } diff --git a/mlir/lib/Dialect/Mesh/Transforms/ShardingPropagation.cpp b/mlir/lib/Dialect/Mesh/Transforms/ShardingPropagation.cpp index 4452dd65fce9..6751fafaf177 100644 --- a/mlir/lib/Dialect/Mesh/Transforms/ShardingPropagation.cpp +++ b/mlir/lib/Dialect/Mesh/Transforms/ShardingPropagation.cpp @@ -362,6 +362,9 @@ static LogicalResult visitOp(Operation *op, OpBuilder &builder) { //===----------------------------------------------------------------------===// struct ShardingPropagation : public mesh::impl::ShardingPropagationBase { + + using ShardingPropagationBase::ShardingPropagationBase; + void runOnOperation() override { FunctionOpInterface funcOp = getOperation(); MLIRContext *ctx = funcOp.getContext(); @@ -382,18 +385,31 @@ struct ShardingPropagation shardingOp.printLoopTypesAndIndexingMaps(llvm::dbgs()); }); - // 1. propagate in reversed order - for (Operation &op : llvm::make_early_inc_range(llvm::reverse(block))) - if (failed(visitOp(&op, builder))) - return signalPassFailure(); - - LLVM_DEBUG(DBGS() << "After reversed order propagation:\n" - << funcOp << "\n"); - LLVM_DEBUG(assert(succeeded(mlir::verify(funcOp)))); - - // 2. propagate in original order - for (Operation &op : llvm::make_early_inc_range(block)) - if (failed(visitOp(&op, builder))) - return signalPassFailure(); + auto traverse = [&](auto &&range, OpBuilder &builder, + const char *order) -> bool { + for (Operation &op : range) { + if (failed(visitOp(&op, builder))) { + signalPassFailure(); + return true; + } + } + LLVM_DEBUG(DBGS() << "After " << order << " order propagation:\n" + << funcOp << "\n"); + LLVM_DEBUG(assert(succeeded(mlir::verify(funcOp)))); + return false; + }; + + // 1. Propagate in reversed order. + if (traversal == TraversalOrder::Backward || + traversal == TraversalOrder::BackwardForward) + traverse(llvm::reverse(block), builder, "backward"); + + // 2. Propagate in original order. + if (traversal != TraversalOrder::Backward) + traverse(block, builder, "forward"); + + // 3. Propagate in backward order if needed. + if (traversal == TraversalOrder::ForwardBackward) + traverse(llvm::reverse(block), builder, "backward"); } }; diff --git a/mlir/test/Dialect/Mesh/backward-sharding-propagation.mlir b/mlir/test/Dialect/Mesh/backward-sharding-propagation.mlir new file mode 100644 index 000000000000..4223d01d6511 --- /dev/null +++ b/mlir/test/Dialect/Mesh/backward-sharding-propagation.mlir @@ -0,0 +1,26 @@ +// RUN: mlir-opt --pass-pipeline="builtin.module(func.func(sharding-propagation{traversal=backward}))" %s | FileCheck %s + +#map = affine_map<(d0, d1) -> (d0, d1)> +module { + mesh.mesh @mesh(shape = 1) {sym_visibility = "private"} + func.func @test_forward() -> tensor<6x6xi32> { + %c1_i32 = arith.constant 1 : i32 + // CHECK: tensor.empty() + %0 = tensor.empty() : tensor<6x6xi32> + %sharding = mesh.sharding @mesh split_axes = [[0]] : !mesh.sharding + // CHECK-COUNT-2: mesh.shard + %sharding_annotated = mesh.shard %0 to %sharding : tensor<6x6xi32> + %1 = linalg.fill ins(%c1_i32 : i32) outs(%sharding_annotated : tensor<6x6xi32>) -> tensor<6x6xi32> + // CHECK: tensor.empty() + // CHECK-NOT: mesh.shard @ + %2 = tensor.empty() : tensor<6x6xi32> + %3 = linalg.generic {indexing_maps = [#map, #map, #map], iterator_types = ["parallel", "parallel"]} ins(%1, %1 + : tensor<6x6xi32>, tensor<6x6xi32>) outs(%2 : tensor<6x6xi32>) { + ^bb0(%in: i32, %in_2: i32, %out: i32): + %9 = arith.addi %in, %in_2 : i32 + linalg.yield %9 : i32 + } -> tensor<6x6xi32> + // CHECK: return + return %3 : tensor<6x6xi32> + } +} diff --git a/mlir/test/Dialect/Mesh/forward-backward-sharding-propagation.mlir b/mlir/test/Dialect/Mesh/forward-backward-sharding-propagation.mlir new file mode 100644 index 000000000000..dd2eee2f7def --- /dev/null +++ b/mlir/test/Dialect/Mesh/forward-backward-sharding-propagation.mlir @@ -0,0 +1,27 @@ +// RUN: mlir-opt --pass-pipeline="builtin.module(func.func(sharding-propagation{traversal=forward-backward}))" %s | FileCheck %s + +#map = affine_map<(d0, d1) -> (d0, d1)> +module { + mesh.mesh @mesh(shape = 1) {sym_visibility = "private"} + func.func @test_forward() -> tensor<6x6xi32> { + %c1_i32 = arith.constant 1 : i32 + // CHECK: tensor.empty() + %0 = tensor.empty() : tensor<6x6xi32> + // CHECK-COUNT-3: mesh.sharding @mesh split_axes = {{\[\[0}}]] + %sharding_row = mesh.sharding @mesh split_axes = [[0]] : !mesh.sharding + %annotated_row = mesh.shard %0 to %sharding_row : tensor<6x6xi32> + %1 = linalg.fill ins(%c1_i32 : i32) outs(%annotated_row : tensor<6x6xi32>) -> tensor<6x6xi32> + %2 = tensor.empty() : tensor<6x6xi32> + // CHECK-COUNT-4: mesh.sharding @mesh split_axes = {{\[\[1}}]] + %3 = linalg.generic {indexing_maps = [#map, #map, #map], iterator_types = ["parallel", "parallel"]} ins(%2, %1 + : tensor<6x6xi32>, tensor<6x6xi32>) outs(%2 : tensor<6x6xi32>) { + ^bb0(%in: i32, %in_2: i32, %out: i32): + %9 = arith.addi %in, %in_2 : i32 + linalg.yield %9 : i32 + } -> tensor<6x6xi32> + %sharding_col = mesh.sharding @mesh split_axes = [[1]] : !mesh.sharding + %annotated_col = mesh.shard %3 to %sharding_col : tensor<6x6xi32> + // CHECK: return + return %annotated_col : tensor<6x6xi32> + } +} diff --git a/mlir/test/Dialect/Mesh/forward-sharding-propagation.mlir b/mlir/test/Dialect/Mesh/forward-sharding-propagation.mlir new file mode 100644 index 000000000000..98e9931b8de9 --- /dev/null +++ b/mlir/test/Dialect/Mesh/forward-sharding-propagation.mlir @@ -0,0 +1,49 @@ +// RUN: mlir-opt --pass-pipeline="builtin.module(func.func(sharding-propagation{traversal=forward}))" %s | FileCheck %s + +#map = affine_map<(d0, d1) -> (d0, d1)> +module attributes {dlti.map = #dlti.map<"MPI:Implementation" = "mpich", "MPI:comm_world_rank" = 0 : i32>} { + mesh.mesh @mesh(shape = 1) {sym_visibility = "private"} + func.func @test_forward() -> (tensor<6x6xi32>, tensor<6x6xi32>, tensor) attributes {llvm.emit_c_interface} { + %c1_i32 = arith.constant 1 : i32 + // CHECK: [[v3:%.*]] = tensor.empty() : tensor<6x6xi32> + %0 = tensor.empty() : tensor<6x6xi32> + // CHECK: [[v1:%.*]] = linalg.fill ins + // CHECK: [[vsharding_0:%.*]] = mesh.sharding @mesh split_axes = {{\[\[}}0]] : !mesh.sharding + // CHECK: [[vsharding_annotated_1:%.*]] = mesh.shard [[v1]] to [[vsharding_0]] : tensor<6x6xi32> + %1 = linalg.fill ins(%c1_i32 : i32) outs(%0 : tensor<6x6xi32>) -> tensor<6x6xi32> + %sharding = mesh.sharding @mesh split_axes = [[0]] : !mesh.sharding + %sharding_annotated = mesh.shard %1 to %sharding : tensor<6x6xi32> + // CHECK: [[v2:%.*]] = tensor.empty() : tensor<6x6xi32> + // CHECK: [[vsharding_2:%.*]] = mesh.sharding @mesh split_axes = {{\[\[}}0]] : !mesh.sharding + // CHECK: [[vsharding_annotated_3:%.*]] = mesh.shard [[vsharding_annotated_1]] to [[vsharding_2]] annotate_for_users : tensor<6x6xi32> + %3 = tensor.empty() : tensor<6x6xi32> + // CHECK: [[vsharding_4:%.*]] = mesh.sharding @mesh split_axes = {{\[\[}}0]] : !mesh.sharding + // CHECK: [[vsharding_annotated_5:%.*]] = mesh.shard [[v2]] to [[vsharding_4]] annotate_for_users : tensor<6x6xi32> + // CHECK: [[v3:%.*]] = linalg.generic {indexing_maps = [#map, #map, #map], iterator_types = ["parallel", "parallel"]} + // CHECK-SAME: ins([[vsharding_annotated_3]], [[vsharding_annotated_3]] : tensor<6x6xi32>, tensor<6x6xi32>) outs([[vsharding_annotated_5]] : tensor<6x6xi32>) { + // CHECK: [[vsharding_6:%.*]] = mesh.sharding @mesh split_axes = {{\[\[}}0]] : !mesh.sharding + // CHECK: [[vsharding_annotated_7:%.*]] = mesh.shard [[v3]] to [[vsharding_6]] : tensor<6x6xi32> + %4 = linalg.generic {indexing_maps = [#map, #map, #map], iterator_types = ["parallel", "parallel"]} ins(%sharding_annotated, %sharding_annotated + : tensor<6x6xi32>, tensor<6x6xi32>) outs(%3 : tensor<6x6xi32>) { + ^bb0(%in: i32, %in_2: i32, %out: i32): + %9 = arith.addi %in, %in_2 : i32 + linalg.yield %9 : i32 + } -> tensor<6x6xi32> + %c0_i32 = arith.constant 0 : i32 + %6 = tensor.empty() : tensor + %7 = linalg.fill ins(%c0_i32 : i32) outs(%6 : tensor) -> tensor + // CHECK: [[vreduced:%.*]] = linalg.reduce ins + // CHECK: [[vsharding_12:%.*]] = mesh.sharding @mesh split_axes = [] partial = sum [0] : !mesh.sharding + // CHECK: [[vsharding_annotated_13:%.*]] = mesh.shard [[vreduced]] to [[vsharding_12]] : tensor + %reduced = linalg.reduce ins(%4 : tensor<6x6xi32>) outs(%7 : tensor) dimensions = [0, 1] + (%in: i32, %init: i32) { + %9 = arith.addi %in, %init : i32 + linalg.yield %9 : i32 + } + // CHECK: [[vsharding_14:%.*]] = mesh.sharding @mesh split_axes = {{\[\[}}]] : !mesh.sharding + %sharding_0 = mesh.sharding @mesh split_axes = [[]] : !mesh.sharding + // CHECK: [[vsharding_annotated_15:%.*]] = mesh.shard [[vsharding_annotated_13]] to [[vsharding_14]] annotate_for_users : tensor + %sharding_annotated_1 = mesh.shard %reduced to %sharding_0 annotate_for_users : tensor + return %sharding_annotated, %4, %sharding_annotated_1 : tensor<6x6xi32>, tensor<6x6xi32>, tensor + } +} -- cgit v1.2.3 From 355725a25e6be38d7a97cab9e206d2a16a1bd849 Mon Sep 17 00:00:00 2001 From: Kunqiu Chen Date: Wed, 18 Jun 2025 17:09:32 +0800 Subject: [TSan] Fix missing inst cleanup (#144067) Commit 44e875ad5b2ce26826dd53f9e7d1a71436c86212 introduced a change that replaces `ReplaceInstWithInst` with `Instruction::replaceAllUsesWith`, without subsequent instruction cleanup. This results in TSan leaving behind useless `load atomic` instructions after 'replacing' them. This commit adds cleanup back, consistent with the context. --- llvm/lib/Transforms/Instrumentation/ThreadSanitizer.cpp | 1 + llvm/test/Instrumentation/ThreadSanitizer/atomic-non-integer.ll | 3 --- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/llvm/lib/Transforms/Instrumentation/ThreadSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/ThreadSanitizer.cpp index 8ae6f7745a9e..5485998164f1 100644 --- a/llvm/lib/Transforms/Instrumentation/ThreadSanitizer.cpp +++ b/llvm/lib/Transforms/Instrumentation/ThreadSanitizer.cpp @@ -728,6 +728,7 @@ bool ThreadSanitizer::instrumentAtomic(Instruction *I, const DataLayout &DL) { Value *C = IRB.CreateCall(TsanAtomicLoad[Idx], Args); Value *Cast = IRB.CreateBitOrPointerCast(C, OrigTy); I->replaceAllUsesWith(Cast); + I->eraseFromParent(); } else if (StoreInst *SI = dyn_cast(I)) { Value *Addr = SI->getPointerOperand(); int Idx = diff --git a/llvm/test/Instrumentation/ThreadSanitizer/atomic-non-integer.ll b/llvm/test/Instrumentation/ThreadSanitizer/atomic-non-integer.ll index 8bcabaecf0fd..015ee2fe711e 100644 --- a/llvm/test/Instrumentation/ThreadSanitizer/atomic-non-integer.ll +++ b/llvm/test/Instrumentation/ThreadSanitizer/atomic-non-integer.ll @@ -10,7 +10,6 @@ define float @load_float(ptr %fptr) { ; CHECK-NEXT: call void @__tsan_func_entry(ptr [[TMP1]]) ; CHECK-NEXT: [[TMP2:%.*]] = call i32 @__tsan_atomic32_load(ptr [[FPTR]], i32 0) ; CHECK-NEXT: [[TMP3:%.*]] = bitcast i32 [[TMP2]] to float -; CHECK-NEXT: [[V:%.*]] = load atomic float, ptr [[FPTR]] unordered, align 4 ; CHECK-NEXT: call void @__tsan_func_exit() ; CHECK-NEXT: ret float [[TMP3]] ; @@ -25,7 +24,6 @@ define double @load_double(ptr %fptr) { ; CHECK-NEXT: call void @__tsan_func_entry(ptr [[TMP1]]) ; CHECK-NEXT: [[TMP2:%.*]] = call i64 @__tsan_atomic64_load(ptr [[FPTR]], i32 0) ; CHECK-NEXT: [[TMP3:%.*]] = bitcast i64 [[TMP2]] to double -; CHECK-NEXT: [[V:%.*]] = load atomic double, ptr [[FPTR]] unordered, align 8 ; CHECK-NEXT: call void @__tsan_func_exit() ; CHECK-NEXT: ret double [[TMP3]] ; @@ -40,7 +38,6 @@ define fp128 @load_fp128(ptr %fptr) { ; CHECK-NEXT: call void @__tsan_func_entry(ptr [[TMP1]]) ; CHECK-NEXT: [[TMP2:%.*]] = call i128 @__tsan_atomic128_load(ptr [[FPTR]], i32 0) ; CHECK-NEXT: [[TMP3:%.*]] = bitcast i128 [[TMP2]] to fp128 -; CHECK-NEXT: [[V:%.*]] = load atomic fp128, ptr [[FPTR]] unordered, align 16 ; CHECK-NEXT: call void @__tsan_func_exit() ; CHECK-NEXT: ret fp128 [[TMP3]] ; -- cgit v1.2.3 From 8e157fdbb7b4af9f67b139a9f05feaa9b338d3f5 Mon Sep 17 00:00:00 2001 From: Sirui Mu Date: Wed, 18 Jun 2025 17:10:29 +0800 Subject: [CIR] Add support for __builtin_assume (#144376) This patch adds support for the `__builtin_assume` builtin function. --- clang/include/clang/CIR/Dialect/IR/CIROps.td | 22 +++++++++++++ clang/include/clang/CIR/MissingFeatures.h | 3 ++ clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp | 38 ++++++++++++++++++++++ clang/lib/CIR/CodeGen/CIRGenFunction.h | 4 +++ .../lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp | 9 +++++ clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.h | 10 ++++++ clang/test/CIR/CodeGen/builtin_call.cpp | 16 +++++++++ 7 files changed, 102 insertions(+) diff --git a/clang/include/clang/CIR/Dialect/IR/CIROps.td b/clang/include/clang/CIR/Dialect/IR/CIROps.td index 8dd1f0ce361d..4655cebc82ee 100644 --- a/clang/include/clang/CIR/Dialect/IR/CIROps.td +++ b/clang/include/clang/CIR/Dialect/IR/CIROps.td @@ -2387,4 +2387,26 @@ def ComplexCreateOp : CIR_Op<"complex.create", [Pure, SameTypeOperands]> { let hasFolder = 1; } +//===----------------------------------------------------------------------===// +// Assume Operations +//===----------------------------------------------------------------------===// + +def AssumeOp : CIR_Op<"assume"> { + let summary = "Tell the optimizer that a boolean value is true"; + let description = [{ + The `cir.assume` operation takes a single boolean prediate as its only + argument and does not have any results. The operation tells the optimizer + that the predicate is always true. + + This operation corresponds to the `__assume` and the `__builtin_assume` + builtin functions. + }]; + + let arguments = (ins CIR_BoolType:$predicate); + + let assemblyFormat = [{ + $predicate `:` type($predicate) attr-dict + }]; +} + #endif // CLANG_CIR_DIALECT_IR_CIROPS_TD diff --git a/clang/include/clang/CIR/MissingFeatures.h b/clang/include/clang/CIR/MissingFeatures.h index 3dc28e6f2e5b..3d120903dea1 100644 --- a/clang/include/clang/CIR/MissingFeatures.h +++ b/clang/include/clang/CIR/MissingFeatures.h @@ -237,6 +237,9 @@ struct MissingFeatures { static bool lowerAggregateLoadStore() { return false; } static bool dataLayoutTypeAllocSize() { return false; } static bool asmLabelAttr() { return false; } + static bool builtinCall() { return false; } + static bool builtinCallF128() { return false; } + static bool builtinCallMathErrno() { return false; } // Missing types static bool dataMemberType() { return false; } diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp index 19fac00ab873..83825f0835a1 100644 --- a/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp @@ -12,6 +12,7 @@ //===----------------------------------------------------------------------===// #include "CIRGenCall.h" +#include "CIRGenConstantEmitter.h" #include "CIRGenFunction.h" #include "CIRGenModule.h" #include "CIRGenValue.h" @@ -66,6 +67,32 @@ RValue CIRGenFunction::emitBuiltinExpr(const GlobalDecl &gd, unsigned builtinID, return emitLibraryCall(*this, fd, e, cgm.getBuiltinLibFunction(fd, builtinID)); + assert(!cir::MissingFeatures::builtinCallF128()); + + // If the builtin has been declared explicitly with an assembler label, + // disable the specialized emitting below. Ideally we should communicate the + // rename in IR, or at least avoid generating the intrinsic calls that are + // likely to get lowered to the renamed library functions. + unsigned builtinIDIfNoAsmLabel = fd->hasAttr() ? 0 : builtinID; + + assert(!cir::MissingFeatures::builtinCallMathErrno()); + assert(!cir::MissingFeatures::builtinCall()); + + switch (builtinIDIfNoAsmLabel) { + default: + break; + + case Builtin::BI__assume: + case Builtin::BI__builtin_assume: { + if (e->getArg(0)->HasSideEffects(getContext())) + return RValue::get(nullptr); + + mlir::Value argValue = emitCheckedArgForAssume(e->getArg(0)); + builder.create(getLoc(e->getExprLoc()), argValue); + return RValue::get(nullptr); + } + } + cgm.errorNYI(e->getSourceRange(), "unimplemented builtin call"); return getUndefRValue(e->getType()); } @@ -88,3 +115,14 @@ cir::FuncOp CIRGenModule::getBuiltinLibFunction(const FunctionDecl *fd, mlir::Type type = convertType(fd->getType()); return getOrCreateCIRFunction(name, type, d, /*forVTable=*/false); } + +mlir::Value CIRGenFunction::emitCheckedArgForAssume(const Expr *e) { + mlir::Value argValue = evaluateExprAsBool(e); + if (!sanOpts.has(SanitizerKind::Builtin)) + return argValue; + + assert(!cir::MissingFeatures::sanitizers()); + cgm.errorNYI(e->getSourceRange(), + "emitCheckedArgForAssume: sanitizers are NYI"); + return {}; +} diff --git a/clang/lib/CIR/CodeGen/CIRGenFunction.h b/clang/lib/CIR/CodeGen/CIRGenFunction.h index de6ef2a69faf..6c490a72b2e9 100644 --- a/clang/lib/CIR/CodeGen/CIRGenFunction.h +++ b/clang/lib/CIR/CodeGen/CIRGenFunction.h @@ -772,6 +772,10 @@ public: LValue emitCastLValue(const CastExpr *e); + /// Emits an argument for a call to a `__builtin_assume`. If the builtin + /// sanitizer is enabled, a runtime check is also emitted. + mlir::Value emitCheckedArgForAssume(const Expr *e); + LValue emitCompoundAssignmentLValue(const clang::CompoundAssignOperator *e); void emitConstructorBody(FunctionArgList &args); diff --git a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp index 6a4e4e4a7df3..a96501ab2c38 100644 --- a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp +++ b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp @@ -407,6 +407,14 @@ struct ConvertCIRToLLVMPass StringRef getArgument() const override { return "cir-flat-to-llvm"; } }; +mlir::LogicalResult CIRToLLVMAssumeOpLowering::matchAndRewrite( + cir::AssumeOp op, OpAdaptor adaptor, + mlir::ConversionPatternRewriter &rewriter) const { + auto cond = adaptor.getPredicate(); + rewriter.replaceOpWithNewOp(op, cond); + return mlir::success(); +} + mlir::LogicalResult CIRToLLVMBrCondOpLowering::matchAndRewrite( cir::BrCondOp brOp, OpAdaptor adaptor, mlir::ConversionPatternRewriter &rewriter) const { @@ -1811,6 +1819,7 @@ void ConvertCIRToLLVMPass::runOnOperation() { dl); patterns.add< // clang-format off + CIRToLLVMAssumeOpLowering, CIRToLLVMBaseClassAddrOpLowering, CIRToLLVMBinOpLowering, CIRToLLVMBrCondOpLowering, diff --git a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.h b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.h index a80981806354..a80c66ac1abf 100644 --- a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.h +++ b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.h @@ -29,6 +29,16 @@ mlir::Value lowerCirAttrAsValue(mlir::Operation *parentOp, mlir::Attribute attr, mlir::LLVM::Linkage convertLinkage(cir::GlobalLinkageKind linkage); +class CIRToLLVMAssumeOpLowering + : public mlir::OpConversionPattern { +public: + using mlir::OpConversionPattern::OpConversionPattern; + + mlir::LogicalResult + matchAndRewrite(cir::AssumeOp op, OpAdaptor, + mlir::ConversionPatternRewriter &) const override; +}; + class CIRToLLVMBrCondOpLowering : public mlir::OpConversionPattern { public: diff --git a/clang/test/CIR/CodeGen/builtin_call.cpp b/clang/test/CIR/CodeGen/builtin_call.cpp index 322c13c8f081..0a2226a2cc59 100644 --- a/clang/test/CIR/CodeGen/builtin_call.cpp +++ b/clang/test/CIR/CodeGen/builtin_call.cpp @@ -94,3 +94,19 @@ void library_builtins() { // OGCG: define dso_local void @_Z16library_builtinsv() // OGCG: call i32 (ptr, ...) @printf(ptr noundef null) // OGCG: call void @abort() + +void assume(bool arg) { + __builtin_assume(arg); +} + +// CIR: cir.func @_Z6assumeb +// CIR: cir.assume %{{.+}} : !cir.bool +// CIR: } + +// LLVM: define void @_Z6assumeb +// LLVM: call void @llvm.assume(i1 %{{.+}}) +// LLVM: } + +// OGCG: define {{.*}}void @_Z6assumeb +// OGCG: call void @llvm.assume(i1 %{{.+}}) +// OGCG: } -- cgit v1.2.3 From fe42d34274cac79794637bf2f69f85537dde8b74 Mon Sep 17 00:00:00 2001 From: Ying Yi Date: Wed, 18 Jun 2025 10:13:46 +0100 Subject: [clang][headers]Remove unnecessary guard of !defined(__SCE__). (#144522) Sony PlayStation now supports C++20, and we wish to change the default C++ mode to C++20 sometime in the future. As such, the !defined(__SCE__) guards are redundant and we want to remove them. This in turn makes the entire guard lines redundant (always true), so this patch removes them entirely. --- clang/lib/Headers/bmiintrin.h | 4 - clang/lib/Headers/immintrin.h | 224 ------------------------------------ clang/lib/Headers/keylockerintrin.h | 9 -- clang/lib/Headers/x86gprintrin.h | 14 --- clang/lib/Headers/x86intrin.h | 18 --- 5 files changed, 269 deletions(-) diff --git a/clang/lib/Headers/bmiintrin.h b/clang/lib/Headers/bmiintrin.h index 59c5ece3977f..8024da55379c 100644 --- a/clang/lib/Headers/bmiintrin.h +++ b/clang/lib/Headers/bmiintrin.h @@ -161,8 +161,6 @@ _mm_tzcnt_64(unsigned long long __X) { #undef __RELAXED_FN_ATTRS -#if !defined(__SCE__) || __has_feature(modules) || defined(__BMI__) - /* Define the default attributes for the functions in this file. */ #if defined(__cplusplus) && (__cplusplus >= 201103L) #define __DEFAULT_FN_ATTRS \ @@ -603,6 +601,4 @@ __blsr_u64(unsigned long long __X) { #undef __DEFAULT_FN_ATTRS -#endif /* !defined(__SCE__) || __has_feature(modules) || defined(__BMI__) */ - #endif /* __BMIINTRIN_H */ diff --git a/clang/lib/Headers/immintrin.h b/clang/lib/Headers/immintrin.h index 19c5987257a2..35f012cc7004 100644 --- a/clang/lib/Headers/immintrin.h +++ b/clang/lib/Headers/immintrin.h @@ -16,231 +16,112 @@ #include -#if !defined(__SCE__) || __has_feature(modules) || defined(__MMX__) #include -#endif -#if !defined(__SCE__) || __has_feature(modules) || defined(__SSE__) #include -#endif -#if !defined(__SCE__) || __has_feature(modules) || defined(__SSE2__) #include -#endif -#if !defined(__SCE__) || __has_feature(modules) || defined(__SSE3__) #include -#endif -#if !defined(__SCE__) || __has_feature(modules) || defined(__SSSE3__) #include -#endif -#if !defined(__SCE__) || __has_feature(modules) || \ - (defined(__SSE4_2__) || defined(__SSE4_1__)) #include -#endif -#if !defined(__SCE__) || __has_feature(modules) || \ - (defined(__AES__) || defined(__PCLMUL__)) #include -#endif -#if !defined(__SCE__) || __has_feature(modules) || defined(__CLFLUSHOPT__) #include -#endif -#if !defined(__SCE__) || __has_feature(modules) || defined(__CLWB__) #include -#endif -#if !defined(__SCE__) || __has_feature(modules) || defined(__AVX__) #include -#endif -#if !defined(__SCE__) || __has_feature(modules) || defined(__AVX2__) #include -#endif -#if !defined(__SCE__) || __has_feature(modules) || defined(__F16C__) #include -#endif -/* No feature check desired due to internal checks */ #include -#if !defined(__SCE__) || __has_feature(modules) || defined(__BMI2__) #include -#endif -#if !defined(__SCE__) || __has_feature(modules) || defined(__LZCNT__) #include -#endif -#if !defined(__SCE__) || __has_feature(modules) || defined(__POPCNT__) #include -#endif -#if !defined(__SCE__) || __has_feature(modules) || defined(__FMA__) #include -#endif -#if !defined(__SCE__) || __has_feature(modules) || defined(__AVX512F__) #include -#endif -#if !defined(__SCE__) || __has_feature(modules) || defined(__AVX512VL__) #include -#endif -#if !defined(__SCE__) || __has_feature(modules) || defined(__AVX512BW__) #include -#endif -#if !defined(__SCE__) || __has_feature(modules) || defined(__AVX512BITALG__) #include -#endif -#if !defined(__SCE__) || __has_feature(modules) || defined(__AVX512CD__) #include -#endif -#if !defined(__SCE__) || __has_feature(modules) || defined(__AVX512VPOPCNTDQ__) #include -#endif -#if !defined(__SCE__) || __has_feature(modules) || \ - (defined(__AVX512VL__) && defined(__AVX512VPOPCNTDQ__)) #include -#endif -#if !defined(__SCE__) || __has_feature(modules) || defined(__AVX512VNNI__) #include -#endif -#if !defined(__SCE__) || __has_feature(modules) || \ - (defined(__AVX512VL__) && defined(__AVX512VNNI__)) #include -#endif -#if !defined(__SCE__) || __has_feature(modules) || defined(__AVXVNNI__) #include -#endif -#if !defined(__SCE__) || __has_feature(modules) || defined(__AVX512DQ__) #include -#endif -#if !defined(__SCE__) || __has_feature(modules) || \ - (defined(__AVX512VL__) && defined(__AVX512BITALG__)) #include -#endif -#if !defined(__SCE__) || __has_feature(modules) || \ - (defined(__AVX512VL__) && defined(__AVX512BW__)) #include -#endif -#if !defined(__SCE__) || __has_feature(modules) || \ - (defined(__AVX512VL__) && defined(__AVX512CD__)) #include -#endif -#if !defined(__SCE__) || __has_feature(modules) || \ - (defined(__AVX512VL__) && defined(__AVX512DQ__)) #include -#endif -#if !defined(__SCE__) || __has_feature(modules) || defined(__AVX512IFMA__) #include -#endif -#if !defined(__SCE__) || __has_feature(modules) || \ - (defined(__AVX512IFMA__) && defined(__AVX512VL__)) #include -#endif -#if !defined(__SCE__) || __has_feature(modules) || defined(__AVXIFMA__) #include -#endif -#if !defined(__SCE__) || __has_feature(modules) || defined(__AVX512VBMI__) #include -#endif -#if !defined(__SCE__) || __has_feature(modules) || \ - (defined(__AVX512VBMI__) && defined(__AVX512VL__)) #include -#endif -#if !defined(__SCE__) || __has_feature(modules) || defined(__AVX512VBMI2__) #include -#endif -#if !defined(__SCE__) || __has_feature(modules) || \ - (defined(__AVX512VBMI2__) && defined(__AVX512VL__)) #include -#endif -#if !defined(__SCE__) || __has_feature(modules) || defined(__AVX512FP16__) #include -#endif -#if !defined(__SCE__) || __has_feature(modules) || \ - (defined(__AVX512VL__) && defined(__AVX512FP16__)) #include -#endif -#if !defined(__SCE__) || __has_feature(modules) || defined(__AVX512BF16__) #include -#endif -#if !defined(__SCE__) || __has_feature(modules) || \ - (defined(__AVX512VL__) && defined(__AVX512BF16__)) #include -#endif -#if !defined(__SCE__) || __has_feature(modules) || defined(__PKU__) #include -#endif -#if !defined(__SCE__) || __has_feature(modules) || defined(__VPCLMULQDQ__) #include -#endif -#if !defined(__SCE__) || __has_feature(modules) || defined(__VAES__) #include -#endif -#if !defined(__SCE__) || __has_feature(modules) || defined(__GFNI__) #include -#endif -#if !defined(__SCE__) || __has_feature(modules) || defined(__AVXVNNIINT8__) #include -#endif -#if !defined(__SCE__) || __has_feature(modules) || defined(__AVXNECONVERT__) #include -#endif -#if !defined(__SCE__) || __has_feature(modules) || defined(__SHA512__) #include -#endif -#if !defined(__SCE__) || __has_feature(modules) || defined(__SM3__) #include -#endif -#if !defined(__SCE__) || __has_feature(modules) || defined(__SM4__) #include -#endif -#if !defined(__SCE__) || __has_feature(modules) || defined(__AVXVNNIINT16__) #include -#endif -#if !defined(__SCE__) || __has_feature(modules) || defined(__RDPID__) /// Reads the value of the IA32_TSC_AUX MSR (0xc0000103). /// /// \headerfile @@ -252,9 +133,7 @@ static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__, __ _rdpid_u32(void) { return __builtin_ia32_rdpid(); } -#endif // __RDPID__ -#if !defined(__SCE__) || __has_feature(modules) || defined(__RDRND__) /// Returns a 16-bit hardware-generated random value. /// /// \headerfile @@ -314,9 +193,7 @@ _rdrand64_step(unsigned long long *__p) } #endif } -#endif /* __RDRND__ */ -#if !defined(__SCE__) || __has_feature(modules) || defined(__FSGSBASE__) #ifdef __x86_64__ /// Reads the FS base register. /// @@ -427,9 +304,6 @@ _writegsbase_u64(unsigned long long __V) } #endif -#endif /* __FSGSBASE__ */ - -#if !defined(__SCE__) || __has_feature(modules) || defined(__MOVBE__) /* The structs used below are to force the load/store to be unaligned. This * is accomplished with the __packed__ attribute. The __may_alias__ prevents @@ -543,172 +417,86 @@ _storebe_i64(void * __P, long long __D) { ((struct __storeu_i64*)__P)->__v = __builtin_bswap64((unsigned long long)__D); } #endif -#endif /* __MOVBE */ -#if !defined(__SCE__) || __has_feature(modules) || defined(__RTM__) #include #include -#endif -#if !defined(__SCE__) || __has_feature(modules) || defined(__SHA__) #include -#endif -#if !defined(__SCE__) || __has_feature(modules) || defined(__FXSR__) #include -#endif /* No feature check desired due to internal MSC_VER checks */ #include -#if !defined(__SCE__) || __has_feature(modules) || defined(__XSAVEOPT__) #include -#endif -#if !defined(__SCE__) || __has_feature(modules) || defined(__XSAVEC__) #include -#endif -#if !defined(__SCE__) || __has_feature(modules) || defined(__XSAVES__) #include -#endif -#if !defined(__SCE__) || __has_feature(modules) || defined(__SHSTK__) #include -#endif /* Intrinsics inside adcintrin.h are available at all times. */ #include -#if !defined(__SCE__) || __has_feature(modules) || defined(__ADX__) #include -#endif -#if !defined(__SCE__) || __has_feature(modules) || defined(__RDSEED__) #include -#endif -#if !defined(__SCE__) || __has_feature(modules) || defined(__WBNOINVD__) #include -#endif -#if !defined(__SCE__) || __has_feature(modules) || defined(__CLDEMOTE__) #include -#endif -#if !defined(__SCE__) || __has_feature(modules) || defined(__WAITPKG__) #include -#endif -#if !defined(__SCE__) || __has_feature(modules) || defined(__MOVDIRI__) || \ - defined(__MOVDIR64B__) #include -#endif -#if !defined(__SCE__) || __has_feature(modules) || defined(__MOVRS__) #include -#endif -#if !defined(__SCE__) || __has_feature(modules) || \ - (defined(__AVX10_2__) && defined(__MOVRS__)) #include -#endif -#if !defined(__SCE__) || __has_feature(modules) || \ - (defined(__AVX10_2_512__) && defined(__MOVRS__)) #include -#endif -#if !defined(__SCE__) || __has_feature(modules) || defined(__PCONFIG__) #include -#endif -#if !defined(__SCE__) || __has_feature(modules) || defined(__SGX__) #include -#endif -#if !defined(__SCE__) || __has_feature(modules) || defined(__PTWRITE__) #include -#endif -#if !defined(__SCE__) || __has_feature(modules) || defined(__INVPCID__) #include -#endif -#if !defined(__SCE__) || __has_feature(modules) || defined(__KL__) || \ - defined(__WIDEKL__) #include -#endif -#if !defined(__SCE__) || __has_feature(modules) || defined(__AMX_TILE__) || \ - defined(__AMX_INT8__) || defined(__AMX_BF16__) #include -#endif -#if !defined(__SCE__) || __has_feature(modules) || defined(__AMX_FP16__) #include -#endif -#if !defined(__SCE__) || __has_feature(modules) || defined(__AMX_COMPLEX__) #include -#endif -#if !defined(__SCE__) || __has_feature(modules) || defined(__AMX_FP8__) #include -#endif -#if !defined(__SCE__) || __has_feature(modules) || defined(__AMX_TRANSPOSE__) #include -#endif -#if !defined(__SCE__) || __has_feature(modules) || defined(__AMX_MOVRS__) #include -#endif -#if !defined(__SCE__) || __has_feature(modules) || \ - (defined(__AMX_MOVRS__) && defined(__AMX_TRANSPOSE__)) #include -#endif -#if !defined(__SCE__) || __has_feature(modules) || defined(__AMX_AVX512__) #include -#endif -#if !defined(__SCE__) || __has_feature(modules) || defined(__AMX_TF32__) #include -#endif -#if !defined(__SCE__) || __has_feature(modules) || \ - (defined(__AMX_TF32__) && defined(__AMX_TRANSPOSE__)) #include -#endif -#if !defined(__SCE__) || __has_feature(modules) || \ - (defined(__AMX_BF16__) && defined(__AMX_TRANSPOSE__)) #include -#endif -#if !defined(__SCE__) || __has_feature(modules) || \ - (defined(__AMX_FP16__) && defined(__AMX_TRANSPOSE__)) #include -#endif -#if !defined(__SCE__) || __has_feature(modules) || \ - (defined(__AMX_COMPLEX__) && defined(__AMX_TRANSPOSE__)) #include -#endif -#if !defined(__SCE__) || __has_feature(modules) || \ - defined(__AVX512VP2INTERSECT__) #include -#endif -#if !defined(__SCE__) || __has_feature(modules) || \ - (defined(__AVX512VL__) && defined(__AVX512VP2INTERSECT__)) #include -#endif -#if !defined(__SCE__) || __has_feature(modules) || defined(__AVX10_2__) #include #include #include @@ -716,33 +504,21 @@ _storebe_i64(void * __P, long long __D) { #include #include #include -#endif -#if !defined(__SCE__) || __has_feature(modules) || defined(__AVX10_2_512__) #include #include #include #include #include #include -#endif -#if !defined(__SCE__) || __has_feature(modules) || \ - (defined(__AVX10_2_512__) && defined(__SM4__)) #include -#endif -#if !defined(__SCE__) || __has_feature(modules) || defined(__ENQCMD__) #include -#endif -#if !defined(__SCE__) || __has_feature(modules) || defined(__SERIALIZE__) #include -#endif -#if !defined(__SCE__) || __has_feature(modules) || defined(__TSXLDTRK__) #include -#endif #if defined(_MSC_VER) && __has_extension(gnu_asm) /* Define the default attributes for these intrinsics */ diff --git a/clang/lib/Headers/keylockerintrin.h b/clang/lib/Headers/keylockerintrin.h index f76e91b4d4b3..4e9e6bec20c0 100644 --- a/clang/lib/Headers/keylockerintrin.h +++ b/clang/lib/Headers/keylockerintrin.h @@ -28,8 +28,6 @@ #ifndef _KEYLOCKERINTRIN_H #define _KEYLOCKERINTRIN_H -#if !defined(__SCE__) || __has_feature(modules) || defined(__KL__) - /* Define the default attributes for the functions in this file. */ #define __DEFAULT_FN_ATTRS \ __attribute__((__always_inline__, __nodebug__, __target__("kl"),\ @@ -326,10 +324,6 @@ _mm_aesdec256kl_u8(__m128i* __odata, __m128i __idata, const void *__h) { #undef __DEFAULT_FN_ATTRS -#endif /* !defined(__SCE__ || __has_feature(modules) || defined(__KL__) */ - -#if !defined(__SCE__) || __has_feature(modules) || defined(__WIDEKL__) - /* Define the default attributes for the functions in this file. */ #define __DEFAULT_FN_ATTRS \ __attribute__((__always_inline__, __nodebug__, __target__("kl,widekl"),\ @@ -521,7 +515,4 @@ _mm_aesdecwide256kl_u8(__m128i __odata[8], const __m128i __idata[8], const void* #undef __DEFAULT_FN_ATTRS -#endif /* !defined(__SCE__) || __has_feature(modules) || defined(__WIDEKL__) \ - */ - #endif /* _KEYLOCKERINTRIN_H */ diff --git a/clang/lib/Headers/x86gprintrin.h b/clang/lib/Headers/x86gprintrin.h index 3d5cc606d7e6..8d513ceffb6d 100644 --- a/clang/lib/Headers/x86gprintrin.h +++ b/clang/lib/Headers/x86gprintrin.h @@ -10,33 +10,19 @@ #ifndef __X86GPRINTRIN_H #define __X86GPRINTRIN_H -#if !defined(__SCE__) || __has_feature(modules) || defined(__HRESET__) #include -#endif -#if !defined(__SCE__) || __has_feature(modules) || defined(__UINTR__) #include -#endif -#if !defined(__SCE__) || __has_feature(modules) || defined(__USERMSR__) #include -#endif -#if !defined(__SCE__) || __has_feature(modules) || defined(__CRC32__) #include -#endif -#if !defined(__SCE__) || __has_feature(modules) || defined(__PRFCHI__) #include -#endif -#if !defined(__SCE__) || __has_feature(modules) || defined(__RAOINT__) #include -#endif -#if !defined(__SCE__) || __has_feature(modules) || defined(__CMPCCXADD__) #include -#endif #if defined(__i386__) #define __SAVE_GPRBX "mov {%%ebx, %%eax |eax, ebx};" diff --git a/clang/lib/Headers/x86intrin.h b/clang/lib/Headers/x86intrin.h index f42e9e580f88..aaa84365ce3e 100644 --- a/clang/lib/Headers/x86intrin.h +++ b/clang/lib/Headers/x86intrin.h @@ -14,40 +14,22 @@ #include -#if !defined(__SCE__) || __has_feature(modules) || defined(__PRFCHW__) #include -#endif -#if !defined(__SCE__) || __has_feature(modules) || defined(__SSE4A__) #include -#endif -#if !defined(__SCE__) || __has_feature(modules) || defined(__FMA4__) #include -#endif -#if !defined(__SCE__) || __has_feature(modules) || defined(__XOP__) #include -#endif -#if !defined(__SCE__) || __has_feature(modules) || defined(__TBM__) #include -#endif -#if !defined(__SCE__) || __has_feature(modules) || defined(__LWP__) #include -#endif -#if !defined(__SCE__) || __has_feature(modules) || defined(__MWAITX__) #include -#endif -#if !defined(__SCE__) || __has_feature(modules) || defined(__CLZERO__) #include -#endif -#if !defined(__SCE__) || __has_feature(modules) || defined(__RDPRU__) #include -#endif #endif /* __X86INTRIN_H */ -- cgit v1.2.3 From 58c4fa96cb111ea8d399296838f4cb6a294115ca Mon Sep 17 00:00:00 2001 From: Karlo Basioli Date: Wed, 18 Jun 2025 10:21:37 +0100 Subject: Fix bazel build for #142771 (#144659) --- utils/bazel/llvm-project-overlay/mlir/BUILD.bazel | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel index cb0f9d8c7413..c750eb733b3b 100644 --- a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel @@ -1650,6 +1650,7 @@ td_library( srcs = [ "include/mlir/Dialect/EmitC/IR/EmitC.td", "include/mlir/Dialect/EmitC/IR/EmitCAttributes.td", + "include/mlir/Dialect/EmitC/IR/EmitCInterfaces.td", "include/mlir/Dialect/EmitC/IR/EmitCBase.td", "include/mlir/Dialect/EmitC/IR/EmitCTypes.td", ], @@ -1665,6 +1666,17 @@ td_library( ], ) +gentbl_cc_library( + name = "EmitCInterfacesIncGen", + tbl_outs = { + "include/mlir/Dialect/EmitC/IR/EmitCInterfaces.h.inc": ["-gen-op-interface-decls"], + "include/mlir/Dialect/EmitC/IR/EmitCInterfaces.cpp.inc": ["-gen-op-interface-defs"], + }, + tblgen = ":mlir-tblgen", + td_file = "include/mlir/Dialect/EmitC/IR/EmitCInterfaces.td", + deps = [":EmitCTdFiles"], +) + gentbl_cc_library( name = "EmitCAttributesIncGen", tbl_outs = { @@ -3679,6 +3691,7 @@ cc_library( ":BytecodeOpInterface", ":CastInterfaces", ":ControlFlowInterfaces", + ":EmitCInterfacesIncGen", ":EmitCAttributesIncGen", ":EmitCOpsIncGen", ":FunctionInterfaces", -- cgit v1.2.3 From 6fcdde2a4eb9eaf34511ac3a35075be329fe1fae Mon Sep 17 00:00:00 2001 From: Lucas Duarte Prates Date: Wed, 18 Jun 2025 10:26:46 +0100 Subject: [runtimes] Allow use of external llvm-lit on standalone builds (#144347) When creating a standalone build of the runtimes sub-project, the current CMake implementation looks for a lit executable that might potentially exist in the build tree and unconditionally overrides the value of `LLVM_EXTERNAL_LIT`. Due to this, any value passed via `-DLLVM_EXTERNAL_LIT` when configuring the CMake project is ignored. This change adds the `ALLOW_EXTERNAL` argument to the `get_llvm_lit_path` call in the runtimes' CMakeLists.txt, allowing any value previously set to be considered. --- runtimes/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/runtimes/CMakeLists.txt b/runtimes/CMakeLists.txt index 878b2eee3861..e4dd4ebfc678 100644 --- a/runtimes/CMakeLists.txt +++ b/runtimes/CMakeLists.txt @@ -259,7 +259,7 @@ if(LLVM_INCLUDE_TESTS) # dir rather than ${LLVM_INSTALL_DIR}/bin/llvm-lit (which may not exist if # LLVM_BINARY_DIR points at an installed LLVM tree rather than a build tree). set(LLVM_LIT_OUTPUT_DIR ${CMAKE_CURRENT_BINARY_DIR}/bin) - get_llvm_lit_path(_base_dir _file_name) + get_llvm_lit_path(_base_dir _file_name ALLOW_EXTERNAL) set(LLVM_EXTERNAL_LIT "${_base_dir}/${_file_name}" CACHE STRING "Command used to spawn lit" FORCE) # Avoid warning about missing llvm-lit from runtimes CMake files. This is # fine since we call configure_file() to create llvm-lit at the end of this -- cgit v1.2.3 From 757a0e6d3b6130a984960ee413a3c8a6f99c7cb5 Mon Sep 17 00:00:00 2001 From: Stephen Tozer Date: Wed, 18 Jun 2025 11:29:23 +0200 Subject: [SystemZ] Treat FAKE_USE instructions as instructions without a size (#144390) This patch fixes an error in which `FAKE_USE` instructions would trigger an assertion in SystemZLongBranch due to them having a size of 0 without being excepted in the assertion that each instruction, other than a set of known 0-size instruction types, should have a non-0 size. `FAKE_USE` instructions are no-op instructions that are emitted into LLVM by the `-fextend-variable-liveness` clang flag to help preserve the liveness of source variables in optimized code, and therefore they should be understood as being valid size 0 instructions. --- llvm/lib/Target/SystemZ/SystemZLongBranch.cpp | 2 +- llvm/test/CodeGen/SystemZ/fake-use-size.ll | 14 ++++++++++++++ 2 files changed, 15 insertions(+), 1 deletion(-) create mode 100644 llvm/test/CodeGen/SystemZ/fake-use-size.ll diff --git a/llvm/lib/Target/SystemZ/SystemZLongBranch.cpp b/llvm/lib/Target/SystemZ/SystemZLongBranch.cpp index 54e1eb095494..21a233b2ffa1 100644 --- a/llvm/lib/Target/SystemZ/SystemZLongBranch.cpp +++ b/llvm/lib/Target/SystemZ/SystemZLongBranch.cpp @@ -215,7 +215,7 @@ static unsigned getInstSizeInBytes(const MachineInstr &MI, // These do not have a size: MI.isDebugOrPseudoInstr() || MI.isPosition() || MI.isKill() || MI.isImplicitDef() || MI.getOpcode() == TargetOpcode::MEMBARRIER || - MI.getOpcode() == TargetOpcode::INIT_UNDEF || + MI.getOpcode() == TargetOpcode::INIT_UNDEF || MI.isFakeUse() || // These have a size that may be zero: MI.isInlineAsm() || MI.getOpcode() == SystemZ::STACKMAP || MI.getOpcode() == SystemZ::PATCHPOINT || diff --git a/llvm/test/CodeGen/SystemZ/fake-use-size.ll b/llvm/test/CodeGen/SystemZ/fake-use-size.ll new file mode 100644 index 000000000000..1690a046aad4 --- /dev/null +++ b/llvm/test/CodeGen/SystemZ/fake-use-size.ll @@ -0,0 +1,14 @@ +; RUN: llc -O0 < %s -mtriple=s390x-linux-gnu 2>&1 | FileCheck %s + +;; Tests that we can handle FAKE_USE instructions, emitting a comment for them +;; in the resulting assembly. + +; CHECK: .type idd,@function +; CHECK: # %bb.0: +; CHECK-NEXT: # fake_use: + +define double @idd(double %d) { +entry: + notail call void (...) @llvm.fake.use(double %d) + ret double %d +} -- cgit v1.2.3 From bb00fd087a3c3e02fb812e41218ad0a85d9f0fe1 Mon Sep 17 00:00:00 2001 From: LLVM GN Syncbot Date: Wed, 18 Jun 2025 09:35:13 +0000 Subject: [gn build] Port 669627d0c77e --- .../gn/secondary/clang-tools-extra/clang-tidy/cppcoreguidelines/BUILD.gn | 1 + 1 file changed, 1 insertion(+) diff --git a/llvm/utils/gn/secondary/clang-tools-extra/clang-tidy/cppcoreguidelines/BUILD.gn b/llvm/utils/gn/secondary/clang-tools-extra/clang-tidy/cppcoreguidelines/BUILD.gn index a06b2f11b452..4f3ef5bf174b 100644 --- a/llvm/utils/gn/secondary/clang-tools-extra/clang-tidy/cppcoreguidelines/BUILD.gn +++ b/llvm/utils/gn/secondary/clang-tools-extra/clang-tidy/cppcoreguidelines/BUILD.gn @@ -45,6 +45,7 @@ static_library("cppcoreguidelines") { "RvalueReferenceParamNotMovedCheck.cpp", "SlicingCheck.cpp", "SpecialMemberFunctionsCheck.cpp", + "UseEnumClassCheck.cpp", "VirtualClassDestructorCheck.cpp", ] } -- cgit v1.2.3 From cd8248f3e856a37cc1addcb74475b4d37dc8aa42 Mon Sep 17 00:00:00 2001 From: Scott Constable Date: Wed, 18 Jun 2025 02:52:55 -0700 Subject: Fixed a bug in `-fsanitize-kcfi-arity` (#142867) Compiling with `fsanitize-kcfi-arity` can crash the compiler if a function has more than 6 arguments, including floating-point arguments passed in XMM registers. This patch fixes the feature by only counter integer and stack arguments toward kCFI arity. For example, the compiler crashed when it attempted to generate kCFI arity information for this function: https://github.com/torvalds/linux/blob/16b70698aa3ae7888826d0c84567c72241cf6713/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.h#L680 As noted in a comment, floating-point registers are not relevant to enforcing kCFI at this time. --- llvm/lib/Target/X86/X86AsmPrinter.cpp | 23 ++++++++++++++++++----- llvm/test/CodeGen/X86/kcfi-arity.ll | 29 +++++++++++++++++++++++++++-- 2 files changed, 45 insertions(+), 7 deletions(-) diff --git a/llvm/lib/Target/X86/X86AsmPrinter.cpp b/llvm/lib/Target/X86/X86AsmPrinter.cpp index 24eda602effd..c7238839c26b 100644 --- a/llvm/lib/Target/X86/X86AsmPrinter.cpp +++ b/llvm/lib/Target/X86/X86AsmPrinter.cpp @@ -198,14 +198,27 @@ void X86AsmPrinter::emitKCFITypeId(const MachineFunction &MF) { // Determine the function's arity (i.e., the number of arguments) at the ABI // level by counting the number of parameters that are passed // as registers, such as pointers and 64-bit (or smaller) integers. The - // Linux x86-64 ABI allows up to 6 parameters to be passed in GPRs. + // Linux x86-64 ABI allows up to 6 integer parameters to be passed in GPRs. // Additional parameters or parameters larger than 64 bits may be passed on - // the stack, in which case the arity is denoted as 7. + // the stack, in which case the arity is denoted as 7. Floating-point + // arguments passed in XMM0-XMM7 are not counted toward arity because + // floating-point values are not relevant to enforcing kCFI at this time. const unsigned ArityToRegMap[8] = {X86::EAX, X86::ECX, X86::EDX, X86::EBX, X86::ESP, X86::EBP, X86::ESI, X86::EDI}; - int Arity = MF.getInfo()->getArgumentStackSize() > 0 - ? 7 - : MF.getRegInfo().liveins().size(); + int Arity; + if (MF.getInfo()->getArgumentStackSize() > 0) { + Arity = 7; + } else { + Arity = 0; + for (const auto &LI : MF.getRegInfo().liveins()) { + auto Reg = LI.first; + if (X86::GR8RegClass.contains(Reg) || X86::GR16RegClass.contains(Reg) || + X86::GR32RegClass.contains(Reg) || + X86::GR64RegClass.contains(Reg)) { + ++Arity; + } + } + } DestReg = ArityToRegMap[Arity]; } diff --git a/llvm/test/CodeGen/X86/kcfi-arity.ll b/llvm/test/CodeGen/X86/kcfi-arity.ll index 68d90adaf2a1..009fa7d2dc0a 100644 --- a/llvm/test/CodeGen/X86/kcfi-arity.ll +++ b/llvm/test/CodeGen/X86/kcfi-arity.ll @@ -192,9 +192,33 @@ entry: ret void } +;; Ensure that floating-point values are not counted toward the arity +; ASM-LABEL: __cfi_f12: +; ASM: movl $2253188362, %ebp +define dso_local void @f12(i32 noundef %v1, i32 noundef %v2, float noundef %v3, double noundef %v4, float noundef %v5, i32 noundef %v6, i32 noundef %v7, i32 noundef %v8) #0 !kcfi_type !7 { +entry: + %v1.addr = alloca i32, align 4 + %v2.addr = alloca i32, align 4 + %v3.addr = alloca float, align 4 + %v4.addr = alloca double, align 4 + %v5.addr = alloca float, align 4 + %v6.addr = alloca i32, align 4 + %v7.addr = alloca i32, align 4 + %v8.addr = alloca i32, align 4 + store i32 %v1, ptr %v1.addr, align 4 + store i32 %v2, ptr %v2.addr, align 4 + store float %v3, ptr %v3.addr, align 4 + store double %v4, ptr %v4.addr, align 4 + store float %v5, ptr %v5.addr, align 4 + store i32 %v6, ptr %v6.addr, align 4 + store i32 %v7, ptr %v7.addr, align 4 + store i32 %v8, ptr %v8.addr, align 4 + ret void +} + attributes #0 = { "target-features"="+retpoline-indirect-branches,+retpoline-indirect-calls" } -!llvm.module.flags = !{!0, !7} +!llvm.module.flags = !{!0, !8} !0 = !{i32 4, !"kcfi", i32 1} !1 = !{i32 12345678} !2 = !{i32 4196274163} @@ -202,4 +226,5 @@ attributes #0 = { "target-features"="+retpoline-indirect-branches,+retpoline-ind !4 = !{i32 199571451} !5 = !{i32 1046421190} !6 = !{i32 1342488295} -!7 = !{i32 4, !"kcfi-arity", i32 1} +!7 = !{i32 2253188362} +!8 = !{i32 4, !"kcfi-arity", i32 1} -- cgit v1.2.3 From dac0820b277835b7506a9c0d1dc5e077597f6742 Mon Sep 17 00:00:00 2001 From: Bjorn Pettersson Date: Wed, 18 Jun 2025 11:37:51 +0200 Subject: [Thumb2] Regenerate some test checks. NFC --- llvm/test/CodeGen/Thumb2/mve-shuffle.ll | 15 +- llvm/test/CodeGen/Thumb2/mve-vld3.ll | 431 ++++++++++------------------- llvm/test/CodeGen/Thumb2/schedm7-hazard.ll | 1 + 3 files changed, 159 insertions(+), 288 deletions(-) diff --git a/llvm/test/CodeGen/Thumb2/mve-shuffle.ll b/llvm/test/CodeGen/Thumb2/mve-shuffle.ll index 82c8d50e518b..94d5490cead2 100644 --- a/llvm/test/CodeGen/Thumb2/mve-shuffle.ll +++ b/llvm/test/CodeGen/Thumb2/mve-shuffle.ll @@ -235,7 +235,7 @@ define arm_aapcs_vfpcc <8 x i16> @shuffle3_i16(<8 x i16> %src) { ; CHECK-LV-NEXT: vmov.f32 s7, s1 ; CHECK-LV-NEXT: vmov q0, q1 ; CHECK-LV-NEXT: bx lr - +; ; CHECK-LIS-LABEL: shuffle3_i16: ; CHECK-LIS: @ %bb.0: @ %entry ; CHECK-LIS-NEXT: vmov q1, q0 @@ -248,6 +248,7 @@ define arm_aapcs_vfpcc <8 x i16> @shuffle3_i16(<8 x i16> %src) { ; CHECK-LIS-NEXT: vmov.f32 s3, s5 ; CHECK-LIS-NEXT: vins.f16 s1, s7 ; CHECK-LIS-NEXT: bx lr + entry: %out = shufflevector <8 x i16> %src, <8 x i16> undef, <8 x i32> ret <8 x i16> %out @@ -1170,7 +1171,7 @@ define arm_aapcs_vfpcc <8 x half> @shuffle3_f16(<8 x half> %src) { ; CHECK-LV-NEXT: vmov.f32 s7, s1 ; CHECK-LV-NEXT: vmov q0, q1 ; CHECK-LV-NEXT: bx lr - +; ; CHECK-LIS-LABEL: shuffle3_f16: ; CHECK-LIS: @ %bb.0: @ %entry ; CHECK-LIS-NEXT: vmov q1, q0 @@ -1183,6 +1184,7 @@ define arm_aapcs_vfpcc <8 x half> @shuffle3_f16(<8 x half> %src) { ; CHECK-LIS-NEXT: vmov.f32 s3, s5 ; CHECK-LIS-NEXT: vins.f16 s1, s7 ; CHECK-LIS-NEXT: bx lr + entry: %out = shufflevector <8 x half> %src, <8 x half> undef, <8 x i32> ret <8 x half> %out @@ -1514,7 +1516,7 @@ define arm_aapcs_vfpcc <8 x double> @shuffle9_f64(<4 x double> %src1, <4 x doubl ; CHECK-LV-NEXT: vmov q1, q5 ; CHECK-LV-NEXT: vpop {d8, d9, d10, d11} ; CHECK-LV-NEXT: bx lr - +; ; CHECK-LIS-LABEL: shuffle9_f64: ; CHECK-LIS: @ %bb.0: @ %entry ; CHECK-LIS-NEXT: .vsave {d8, d9, d10, d11} @@ -1534,6 +1536,7 @@ define arm_aapcs_vfpcc <8 x double> @shuffle9_f64(<4 x double> %src1, <4 x doubl ; CHECK-LIS-NEXT: vmov q1, q5 ; CHECK-LIS-NEXT: vpop {d8, d9, d10, d11} ; CHECK-LIS-NEXT: bx lr + entry: %out = shufflevector <4 x double> %src1, <4 x double> %src2, <8 x i32> ret <8 x double> %out @@ -1627,7 +1630,7 @@ define arm_aapcs_vfpcc <8 x i64> @shuffle9_i64(<4 x i64> %src1, <4 x i64> %src2) ; CHECK-LV-NEXT: vmov q1, q5 ; CHECK-LV-NEXT: vpop {d8, d9, d10, d11} ; CHECK-LV-NEXT: bx lr - +; ; CHECK-LIS-LABEL: shuffle9_i64: ; CHECK-LIS: @ %bb.0: @ %entry ; CHECK-LIS-NEXT: .vsave {d8, d9, d10, d11} @@ -1647,6 +1650,7 @@ define arm_aapcs_vfpcc <8 x i64> @shuffle9_i64(<4 x i64> %src1, <4 x i64> %src2) ; CHECK-LIS-NEXT: vmov q1, q5 ; CHECK-LIS-NEXT: vpop {d8, d9, d10, d11} ; CHECK-LIS-NEXT: bx lr + entry: %out = shufflevector <4 x i64> %src1, <4 x i64> %src2, <8 x i32> ret <8 x i64> %out @@ -1886,6 +1890,3 @@ entry: ret double %res } -;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: -; CHECK-LIS: {{.*}} -; CHECK-LV: {{.*}} diff --git a/llvm/test/CodeGen/Thumb2/mve-vld3.ll b/llvm/test/CodeGen/Thumb2/mve-vld3.ll index b6c8056891f8..4dd9173e2d41 100644 --- a/llvm/test/CodeGen/Thumb2/mve-vld3.ll +++ b/llvm/test/CodeGen/Thumb2/mve-vld3.ll @@ -56,7 +56,7 @@ define void @vld3_v4i32(ptr %src, ptr %dst) { ; CHECK-LV-NEXT: vstrw.32 q0, [r1] ; CHECK-LV-NEXT: vpop {d8, d9} ; CHECK-LV-NEXT: bx lr - +; ; CHECK-LIS-LABEL: vld3_v4i32: ; CHECK-LIS: @ %bb.0: @ %entry ; CHECK-LIS-NEXT: .vsave {d8, d9} @@ -80,6 +80,7 @@ define void @vld3_v4i32(ptr %src, ptr %dst) { ; CHECK-LIS-NEXT: vstrw.32 q0, [r1] ; CHECK-LIS-NEXT: vpop {d8, d9} ; CHECK-LIS-NEXT: bx lr + entry: %l1 = load <12 x i32>, ptr %src, align 4 %s1 = shufflevector <12 x i32> %l1, <12 x i32> undef, <4 x i32> @@ -132,7 +133,7 @@ define void @vld3_v8i32(ptr %src, ptr %dst) { ; CHECK-LV-NEXT: vstrw.32 q1, [r1] ; CHECK-LV-NEXT: vpop {d8, d9, d10, d11} ; CHECK-LV-NEXT: bx lr - +; ; CHECK-LIS-LABEL: vld3_v8i32: ; CHECK-LIS: @ %bb.0: @ %entry ; CHECK-LIS-NEXT: .vsave {d8, d9, d10, d11} @@ -173,6 +174,7 @@ define void @vld3_v8i32(ptr %src, ptr %dst) { ; CHECK-LIS-NEXT: vstrw.32 q1, [r1] ; CHECK-LIS-NEXT: vpop {d8, d9, d10, d11} ; CHECK-LIS-NEXT: bx lr + entry: %l1 = load <24 x i32>, ptr %src, align 4 %s1 = shufflevector <24 x i32> %l1, <24 x i32> undef, <8 x i32> @@ -259,7 +261,7 @@ define void @vld3_v16i32(ptr %src, ptr %dst) { ; CHECK-LV-NEXT: vstrw.32 q3, [r1, #32] ; CHECK-LV-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} ; CHECK-LV-NEXT: bx lr - +; ; CHECK-LIS-LABEL: vld3_v16i32: ; CHECK-LIS: @ %bb.0: @ %entry ; CHECK-LIS-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15} @@ -334,6 +336,7 @@ define void @vld3_v16i32(ptr %src, ptr %dst) { ; CHECK-LIS-NEXT: vstrw.32 q3, [r1, #32] ; CHECK-LIS-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} ; CHECK-LIS-NEXT: bx lr + entry: %l1 = load <48 x i32>, ptr %src, align 4 %s1 = shufflevector <48 x i32> %l1, <48 x i32> undef, <16 x i32> @@ -961,7 +964,7 @@ define void @vld3_v2i64(ptr %src, ptr %dst) { ; CHECK-LV-NEXT: vmov q0[3], q0[1], r7, r2 ; CHECK-LV-NEXT: vstrw.32 q0, [r1] ; CHECK-LV-NEXT: pop.w {r4, r5, r6, r7, r8, pc} - +; ; CHECK-LIS-LABEL: vld3_v2i64: ; CHECK-LIS: @ %bb.0: @ %entry ; CHECK-LIS-NEXT: .save {r4, r5, r6, r7, r8, lr} @@ -991,6 +994,7 @@ define void @vld3_v2i64(ptr %src, ptr %dst) { ; CHECK-LIS-NEXT: vmov q0[3], q0[1], r7, r2 ; CHECK-LIS-NEXT: vstrw.32 q0, [r1] ; CHECK-LIS-NEXT: pop.w {r4, r5, r6, r7, r8, pc} + entry: %l1 = load <6 x i64>, ptr %src, align 4 %s1 = shufflevector <6 x i64> %l1, <6 x i64> undef, <2 x i32> @@ -1147,7 +1151,7 @@ define void @vld3_v2f32(ptr %src, ptr %dst) { ; CHECK-LV-NEXT: vadd.f32 q0, q0, q1 ; CHECK-LV-NEXT: vstmia r1, {s0, s1} ; CHECK-LV-NEXT: bx lr - +; ; CHECK-LIS-LABEL: vld3_v2f32: ; CHECK-LIS: @ %bb.0: @ %entry ; CHECK-LIS-NEXT: vldrw.u32 q2, [r0] @@ -1161,6 +1165,7 @@ define void @vld3_v2f32(ptr %src, ptr %dst) { ; CHECK-LIS-NEXT: vadd.f32 q0, q1, q0 ; CHECK-LIS-NEXT: vstmia r1, {s0, s1} ; CHECK-LIS-NEXT: bx lr + entry: %l1 = load <6 x float>, ptr %src, align 4 %s1 = shufflevector <6 x float> %l1, <6 x float> undef, <2 x i32> @@ -1173,53 +1178,30 @@ entry: } define void @vld3_v4f32(ptr %src, ptr %dst) { -; CHECK-LV-LABEL: vld3_v4f32: -; CHECK-LV: @ %bb.0: @ %entry -; CHECK-LV-NEXT: .vsave {d8, d9} -; CHECK-LV-NEXT: vpush {d8, d9} -; CHECK-LV-NEXT: vldrw.u32 q0, [r0, #16] -; CHECK-LV-NEXT: vldrw.u32 q1, [r0] -; CHECK-LV-NEXT: vldrw.u32 q4, [r0, #32] -; CHECK-LV-NEXT: vmov.f32 s10, s2 -; CHECK-LV-NEXT: vmov.f32 s13, s0 -; CHECK-LV-NEXT: vmov.f32 s14, s3 -; CHECK-LV-NEXT: vmov.f32 s8, s4 -; CHECK-LV-NEXT: vmov.f32 s9, s7 -; CHECK-LV-NEXT: vmov.f32 s12, s5 -; CHECK-LV-NEXT: vmov.f32 s15, s18 -; CHECK-LV-NEXT: vmov.f32 s11, s17 -; CHECK-LV-NEXT: vadd.f32 q2, q2, q3 -; CHECK-LV-NEXT: vmov.f32 s0, s6 -; CHECK-LV-NEXT: vmov.f32 s2, s16 -; CHECK-LV-NEXT: vmov.f32 s3, s19 -; CHECK-LV-NEXT: vadd.f32 q0, q2, q0 -; CHECK-LV-NEXT: vstrw.32 q0, [r1] -; CHECK-LV-NEXT: vpop {d8, d9} -; CHECK-LV-NEXT: bx lr +; CHECK-LABEL: vld3_v4f32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: .vsave {d8, d9} +; CHECK-NEXT: vpush {d8, d9} +; CHECK-NEXT: vldrw.u32 q0, [r0, #16] +; CHECK-NEXT: vldrw.u32 q1, [r0] +; CHECK-NEXT: vldrw.u32 q4, [r0, #32] +; CHECK-NEXT: vmov.f32 s10, s2 +; CHECK-NEXT: vmov.f32 s13, s0 +; CHECK-NEXT: vmov.f32 s14, s3 +; CHECK-NEXT: vmov.f32 s8, s4 +; CHECK-NEXT: vmov.f32 s9, s7 +; CHECK-NEXT: vmov.f32 s12, s5 +; CHECK-NEXT: vmov.f32 s15, s18 +; CHECK-NEXT: vmov.f32 s11, s17 +; CHECK-NEXT: vadd.f32 q2, q2, q3 +; CHECK-NEXT: vmov.f32 s0, s6 +; CHECK-NEXT: vmov.f32 s2, s16 +; CHECK-NEXT: vmov.f32 s3, s19 +; CHECK-NEXT: vadd.f32 q0, q2, q0 +; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: vpop {d8, d9} +; CHECK-NEXT: bx lr -; CHECK-LIS-LABEL: vld3_v4f32: -; CHECK-LIS: @ %bb.0: @ %entry -; CHECK-LIS-NEXT: .vsave {d8, d9} -; CHECK-LIS-NEXT: vpush {d8, d9} -; CHECK-LIS-NEXT: vldrw.u32 q0, [r0, #16] -; CHECK-LIS-NEXT: vldrw.u32 q1, [r0] -; CHECK-LIS-NEXT: vldrw.u32 q4, [r0, #32] -; CHECK-LIS-NEXT: vmov.f32 s10, s2 -; CHECK-LIS-NEXT: vmov.f32 s13, s0 -; CHECK-LIS-NEXT: vmov.f32 s14, s3 -; CHECK-LIS-NEXT: vmov.f32 s8, s4 -; CHECK-LIS-NEXT: vmov.f32 s9, s7 -; CHECK-LIS-NEXT: vmov.f32 s12, s5 -; CHECK-LIS-NEXT: vmov.f32 s15, s18 -; CHECK-LIS-NEXT: vmov.f32 s11, s17 -; CHECK-LIS-NEXT: vadd.f32 q2, q2, q3 -; CHECK-LIS-NEXT: vmov.f32 s0, s6 -; CHECK-LIS-NEXT: vmov.f32 s2, s16 -; CHECK-LIS-NEXT: vmov.f32 s3, s19 -; CHECK-LIS-NEXT: vadd.f32 q0, q2, q0 -; CHECK-LIS-NEXT: vstrw.32 q0, [r1] -; CHECK-LIS-NEXT: vpop {d8, d9} -; CHECK-LIS-NEXT: bx lr entry: %l1 = load <12 x float>, ptr %src, align 4 %s1 = shufflevector <12 x float> %l1, <12 x float> undef, <4 x i32> @@ -1232,87 +1214,47 @@ entry: } define void @vld3_v8f32(ptr %src, ptr %dst) { -; CHECK-LV-LABEL: vld3_v8f32: -; CHECK-LV: @ %bb.0: @ %entry -; CHECK-LV-NEXT: .vsave {d8, d9, d10, d11} -; CHECK-LV-NEXT: vpush {d8, d9, d10, d11} -; CHECK-LV-NEXT: vldrw.u32 q0, [r0, #64] -; CHECK-LV-NEXT: vldrw.u32 q1, [r0, #48] -; CHECK-LV-NEXT: vldrw.u32 q4, [r0, #80] -; CHECK-LV-NEXT: vmov.f32 s10, s2 -; CHECK-LV-NEXT: vmov.f32 s13, s0 -; CHECK-LV-NEXT: vmov.f32 s14, s3 -; CHECK-LV-NEXT: vmov.f32 s8, s4 -; CHECK-LV-NEXT: vmov.f32 s9, s7 -; CHECK-LV-NEXT: vmov.f32 s12, s5 -; CHECK-LV-NEXT: vmov.f32 s15, s18 -; CHECK-LV-NEXT: vmov.f32 s11, s17 -; CHECK-LV-NEXT: vadd.f32 q2, q2, q3 -; CHECK-LV-NEXT: vmov.f32 s0, s6 -; CHECK-LV-NEXT: vmov.f32 s2, s16 -; CHECK-LV-NEXT: vldrw.u32 q1, [r0, #16] -; CHECK-LV-NEXT: vmov.f32 s3, s19 -; CHECK-LV-NEXT: vldrw.u32 q3, [r0, #32] -; CHECK-LV-NEXT: vadd.f32 q0, q2, q0 -; CHECK-LV-NEXT: vldrw.u32 q2, [r0] -; CHECK-LV-NEXT: vmov.f32 s17, s4 -; CHECK-LV-NEXT: vstrw.32 q0, [r1, #16] -; CHECK-LV-NEXT: vmov.f32 s18, s7 -; CHECK-LV-NEXT: vmov.f32 s22, s6 -; CHECK-LV-NEXT: vmov.f32 s16, s9 -; CHECK-LV-NEXT: vmov.f32 s19, s14 -; CHECK-LV-NEXT: vmov.f32 s20, s8 -; CHECK-LV-NEXT: vmov.f32 s21, s11 -; CHECK-LV-NEXT: vmov.f32 s23, s13 -; CHECK-LV-NEXT: vadd.f32 q4, q5, q4 -; CHECK-LV-NEXT: vmov.f32 s4, s10 -; CHECK-LV-NEXT: vmov.f32 s6, s12 -; CHECK-LV-NEXT: vmov.f32 s7, s15 -; CHECK-LV-NEXT: vadd.f32 q1, q4, q1 -; CHECK-LV-NEXT: vstrw.32 q1, [r1] -; CHECK-LV-NEXT: vpop {d8, d9, d10, d11} -; CHECK-LV-NEXT: bx lr +; CHECK-LABEL: vld3_v8f32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: .vsave {d8, d9, d10, d11} +; CHECK-NEXT: vpush {d8, d9, d10, d11} +; CHECK-NEXT: vldrw.u32 q0, [r0, #64] +; CHECK-NEXT: vldrw.u32 q1, [r0, #48] +; CHECK-NEXT: vldrw.u32 q4, [r0, #80] +; CHECK-NEXT: vmov.f32 s10, s2 +; CHECK-NEXT: vmov.f32 s13, s0 +; CHECK-NEXT: vmov.f32 s14, s3 +; CHECK-NEXT: vmov.f32 s8, s4 +; CHECK-NEXT: vmov.f32 s9, s7 +; CHECK-NEXT: vmov.f32 s12, s5 +; CHECK-NEXT: vmov.f32 s15, s18 +; CHECK-NEXT: vmov.f32 s11, s17 +; CHECK-NEXT: vadd.f32 q2, q2, q3 +; CHECK-NEXT: vmov.f32 s0, s6 +; CHECK-NEXT: vmov.f32 s2, s16 +; CHECK-NEXT: vldrw.u32 q1, [r0, #16] +; CHECK-NEXT: vmov.f32 s3, s19 +; CHECK-NEXT: vldrw.u32 q3, [r0, #32] +; CHECK-NEXT: vadd.f32 q0, q2, q0 +; CHECK-NEXT: vldrw.u32 q2, [r0] +; CHECK-NEXT: vmov.f32 s17, s4 +; CHECK-NEXT: vstrw.32 q0, [r1, #16] +; CHECK-NEXT: vmov.f32 s18, s7 +; CHECK-NEXT: vmov.f32 s22, s6 +; CHECK-NEXT: vmov.f32 s16, s9 +; CHECK-NEXT: vmov.f32 s19, s14 +; CHECK-NEXT: vmov.f32 s20, s8 +; CHECK-NEXT: vmov.f32 s21, s11 +; CHECK-NEXT: vmov.f32 s23, s13 +; CHECK-NEXT: vadd.f32 q4, q5, q4 +; CHECK-NEXT: vmov.f32 s4, s10 +; CHECK-NEXT: vmov.f32 s6, s12 +; CHECK-NEXT: vmov.f32 s7, s15 +; CHECK-NEXT: vadd.f32 q1, q4, q1 +; CHECK-NEXT: vstrw.32 q1, [r1] +; CHECK-NEXT: vpop {d8, d9, d10, d11} +; CHECK-NEXT: bx lr -; CHECK-LIS-LABEL: vld3_v8f32: -; CHECK-LIS: @ %bb.0: @ %entry -; CHECK-LIS-NEXT: .vsave {d8, d9, d10, d11} -; CHECK-LIS-NEXT: vpush {d8, d9, d10, d11} -; CHECK-LIS-NEXT: vldrw.u32 q0, [r0, #64] -; CHECK-LIS-NEXT: vldrw.u32 q1, [r0, #48] -; CHECK-LIS-NEXT: vldrw.u32 q4, [r0, #80] -; CHECK-LIS-NEXT: vmov.f32 s10, s2 -; CHECK-LIS-NEXT: vmov.f32 s13, s0 -; CHECK-LIS-NEXT: vmov.f32 s14, s3 -; CHECK-LIS-NEXT: vmov.f32 s8, s4 -; CHECK-LIS-NEXT: vmov.f32 s9, s7 -; CHECK-LIS-NEXT: vmov.f32 s12, s5 -; CHECK-LIS-NEXT: vmov.f32 s15, s18 -; CHECK-LIS-NEXT: vmov.f32 s11, s17 -; CHECK-LIS-NEXT: vadd.f32 q2, q2, q3 -; CHECK-LIS-NEXT: vmov.f32 s0, s6 -; CHECK-LIS-NEXT: vmov.f32 s2, s16 -; CHECK-LIS-NEXT: vldrw.u32 q1, [r0, #16] -; CHECK-LIS-NEXT: vmov.f32 s3, s19 -; CHECK-LIS-NEXT: vldrw.u32 q3, [r0, #32] -; CHECK-LIS-NEXT: vadd.f32 q0, q2, q0 -; CHECK-LIS-NEXT: vldrw.u32 q2, [r0] -; CHECK-LIS-NEXT: vmov.f32 s17, s4 -; CHECK-LIS-NEXT: vstrw.32 q0, [r1, #16] -; CHECK-LIS-NEXT: vmov.f32 s18, s7 -; CHECK-LIS-NEXT: vmov.f32 s22, s6 -; CHECK-LIS-NEXT: vmov.f32 s16, s9 -; CHECK-LIS-NEXT: vmov.f32 s19, s14 -; CHECK-LIS-NEXT: vmov.f32 s20, s8 -; CHECK-LIS-NEXT: vmov.f32 s21, s11 -; CHECK-LIS-NEXT: vmov.f32 s23, s13 -; CHECK-LIS-NEXT: vadd.f32 q4, q5, q4 -; CHECK-LIS-NEXT: vmov.f32 s4, s10 -; CHECK-LIS-NEXT: vmov.f32 s6, s12 -; CHECK-LIS-NEXT: vmov.f32 s7, s15 -; CHECK-LIS-NEXT: vadd.f32 q1, q4, q1 -; CHECK-LIS-NEXT: vstrw.32 q1, [r1] -; CHECK-LIS-NEXT: vpop {d8, d9, d10, d11} -; CHECK-LIS-NEXT: bx lr entry: %l1 = load <24 x float>, ptr %src, align 4 %s1 = shufflevector <24 x float> %l1, <24 x float> undef, <8 x i32> @@ -1325,155 +1267,81 @@ entry: } define void @vld3_v16f32(ptr %src, ptr %dst) { -; CHECK-LV-LABEL: vld3_v16f32: -; CHECK-LV: @ %bb.0: @ %entry -; CHECK-LV-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15} -; CHECK-LV-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} -; CHECK-LV-NEXT: vldrw.u32 q0, [r0, #64] -; CHECK-LV-NEXT: vldrw.u32 q1, [r0, #48] -; CHECK-LV-NEXT: vldrw.u32 q4, [r0, #80] -; CHECK-LV-NEXT: vldrw.u32 q6, [r0, #176] -; CHECK-LV-NEXT: vmov.f32 s10, s2 -; CHECK-LV-NEXT: vmov.f32 s13, s0 -; CHECK-LV-NEXT: vmov.f32 s14, s3 -; CHECK-LV-NEXT: vmov.f32 s8, s4 -; CHECK-LV-NEXT: vmov.f32 s9, s7 -; CHECK-LV-NEXT: vmov.f32 s12, s5 -; CHECK-LV-NEXT: vmov.f32 s15, s18 -; CHECK-LV-NEXT: vmov.f32 s11, s17 -; CHECK-LV-NEXT: vadd.f32 q2, q2, q3 -; CHECK-LV-NEXT: vmov.f32 s0, s6 -; CHECK-LV-NEXT: vmov.f32 s2, s16 -; CHECK-LV-NEXT: vldrw.u32 q1, [r0, #16] -; CHECK-LV-NEXT: vmov.f32 s3, s19 -; CHECK-LV-NEXT: vldrw.u32 q3, [r0, #32] -; CHECK-LV-NEXT: vadd.f32 q0, q2, q0 -; CHECK-LV-NEXT: vldrw.u32 q2, [r0] -; CHECK-LV-NEXT: vmov.f32 s17, s4 -; CHECK-LV-NEXT: vmov.f32 s18, s7 -; CHECK-LV-NEXT: vmov.f32 s22, s6 -; CHECK-LV-NEXT: vmov.f32 s16, s9 -; CHECK-LV-NEXT: vmov.f32 s19, s14 -; CHECK-LV-NEXT: vmov.f32 s20, s8 -; CHECK-LV-NEXT: vmov.f32 s21, s11 -; CHECK-LV-NEXT: vmov.f32 s23, s13 -; CHECK-LV-NEXT: vmov.f32 s4, s10 -; CHECK-LV-NEXT: vldrw.u32 q2, [r0, #160] -; CHECK-LV-NEXT: vmov.f32 s6, s12 -; CHECK-LV-NEXT: vadd.f32 q4, q5, q4 -; CHECK-LV-NEXT: vmov.f32 s7, s15 -; CHECK-LV-NEXT: vldrw.u32 q3, [r0, #144] -; CHECK-LV-NEXT: vadd.f32 q1, q4, q1 -; CHECK-LV-NEXT: vmov.f32 s18, s10 -; CHECK-LV-NEXT: vmov.f32 s21, s8 -; CHECK-LV-NEXT: vmov.f32 s22, s11 -; CHECK-LV-NEXT: vmov.f32 s16, s12 -; CHECK-LV-NEXT: vmov.f32 s17, s15 -; CHECK-LV-NEXT: vmov.f32 s20, s13 -; CHECK-LV-NEXT: vmov.f32 s23, s26 -; CHECK-LV-NEXT: vmov.f32 s19, s25 -; CHECK-LV-NEXT: vadd.f32 q4, q4, q5 -; CHECK-LV-NEXT: vmov.f32 s8, s14 -; CHECK-LV-NEXT: vmov.f32 s10, s24 -; CHECK-LV-NEXT: vldrw.u32 q3, [r0, #112] -; CHECK-LV-NEXT: vmov.f32 s11, s27 -; CHECK-LV-NEXT: vldrw.u32 q5, [r0, #128] -; CHECK-LV-NEXT: vadd.f32 q2, q4, q2 -; CHECK-LV-NEXT: vldrw.u32 q4, [r0, #96] -; CHECK-LV-NEXT: vmov.f32 s25, s12 -; CHECK-LV-NEXT: vstrw.32 q2, [r1, #48] -; CHECK-LV-NEXT: vmov.f32 s26, s15 -; CHECK-LV-NEXT: vstrw.32 q0, [r1, #16] -; CHECK-LV-NEXT: vmov.f32 s30, s14 -; CHECK-LV-NEXT: vstrw.32 q1, [r1] -; CHECK-LV-NEXT: vmov.f32 s24, s17 -; CHECK-LV-NEXT: vmov.f32 s27, s22 -; CHECK-LV-NEXT: vmov.f32 s28, s16 -; CHECK-LV-NEXT: vmov.f32 s29, s19 -; CHECK-LV-NEXT: vmov.f32 s31, s21 -; CHECK-LV-NEXT: vadd.f32 q6, q7, q6 -; CHECK-LV-NEXT: vmov.f32 s12, s18 -; CHECK-LV-NEXT: vmov.f32 s14, s20 -; CHECK-LV-NEXT: vmov.f32 s15, s23 -; CHECK-LV-NEXT: vadd.f32 q3, q6, q3 -; CHECK-LV-NEXT: vstrw.32 q3, [r1, #32] -; CHECK-LV-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} -; CHECK-LV-NEXT: bx lr +; CHECK-LABEL: vld3_v16f32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15} +; CHECK-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} +; CHECK-NEXT: vldrw.u32 q0, [r0, #64] +; CHECK-NEXT: vldrw.u32 q1, [r0, #48] +; CHECK-NEXT: vldrw.u32 q4, [r0, #80] +; CHECK-NEXT: vldrw.u32 q6, [r0, #176] +; CHECK-NEXT: vmov.f32 s10, s2 +; CHECK-NEXT: vmov.f32 s13, s0 +; CHECK-NEXT: vmov.f32 s14, s3 +; CHECK-NEXT: vmov.f32 s8, s4 +; CHECK-NEXT: vmov.f32 s9, s7 +; CHECK-NEXT: vmov.f32 s12, s5 +; CHECK-NEXT: vmov.f32 s15, s18 +; CHECK-NEXT: vmov.f32 s11, s17 +; CHECK-NEXT: vadd.f32 q2, q2, q3 +; CHECK-NEXT: vmov.f32 s0, s6 +; CHECK-NEXT: vmov.f32 s2, s16 +; CHECK-NEXT: vldrw.u32 q1, [r0, #16] +; CHECK-NEXT: vmov.f32 s3, s19 +; CHECK-NEXT: vldrw.u32 q3, [r0, #32] +; CHECK-NEXT: vadd.f32 q0, q2, q0 +; CHECK-NEXT: vldrw.u32 q2, [r0] +; CHECK-NEXT: vmov.f32 s17, s4 +; CHECK-NEXT: vmov.f32 s18, s7 +; CHECK-NEXT: vmov.f32 s22, s6 +; CHECK-NEXT: vmov.f32 s16, s9 +; CHECK-NEXT: vmov.f32 s19, s14 +; CHECK-NEXT: vmov.f32 s20, s8 +; CHECK-NEXT: vmov.f32 s21, s11 +; CHECK-NEXT: vmov.f32 s23, s13 +; CHECK-NEXT: vmov.f32 s4, s10 +; CHECK-NEXT: vldrw.u32 q2, [r0, #160] +; CHECK-NEXT: vmov.f32 s6, s12 +; CHECK-NEXT: vadd.f32 q4, q5, q4 +; CHECK-NEXT: vmov.f32 s7, s15 +; CHECK-NEXT: vldrw.u32 q3, [r0, #144] +; CHECK-NEXT: vadd.f32 q1, q4, q1 +; CHECK-NEXT: vmov.f32 s18, s10 +; CHECK-NEXT: vmov.f32 s21, s8 +; CHECK-NEXT: vmov.f32 s22, s11 +; CHECK-NEXT: vmov.f32 s16, s12 +; CHECK-NEXT: vmov.f32 s17, s15 +; CHECK-NEXT: vmov.f32 s20, s13 +; CHECK-NEXT: vmov.f32 s23, s26 +; CHECK-NEXT: vmov.f32 s19, s25 +; CHECK-NEXT: vadd.f32 q4, q4, q5 +; CHECK-NEXT: vmov.f32 s8, s14 +; CHECK-NEXT: vmov.f32 s10, s24 +; CHECK-NEXT: vldrw.u32 q3, [r0, #112] +; CHECK-NEXT: vmov.f32 s11, s27 +; CHECK-NEXT: vldrw.u32 q5, [r0, #128] +; CHECK-NEXT: vadd.f32 q2, q4, q2 +; CHECK-NEXT: vldrw.u32 q4, [r0, #96] +; CHECK-NEXT: vmov.f32 s25, s12 +; CHECK-NEXT: vstrw.32 q2, [r1, #48] +; CHECK-NEXT: vmov.f32 s26, s15 +; CHECK-NEXT: vstrw.32 q0, [r1, #16] +; CHECK-NEXT: vmov.f32 s30, s14 +; CHECK-NEXT: vstrw.32 q1, [r1] +; CHECK-NEXT: vmov.f32 s24, s17 +; CHECK-NEXT: vmov.f32 s27, s22 +; CHECK-NEXT: vmov.f32 s28, s16 +; CHECK-NEXT: vmov.f32 s29, s19 +; CHECK-NEXT: vmov.f32 s31, s21 +; CHECK-NEXT: vadd.f32 q6, q7, q6 +; CHECK-NEXT: vmov.f32 s12, s18 +; CHECK-NEXT: vmov.f32 s14, s20 +; CHECK-NEXT: vmov.f32 s15, s23 +; CHECK-NEXT: vadd.f32 q3, q6, q3 +; CHECK-NEXT: vstrw.32 q3, [r1, #32] +; CHECK-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} +; CHECK-NEXT: bx lr -; CHECK-LIS-LABEL: vld3_v16f32: -; CHECK-LIS: @ %bb.0: @ %entry -; CHECK-LIS-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15} -; CHECK-LIS-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} -; CHECK-LIS-NEXT: vldrw.u32 q0, [r0, #64] -; CHECK-LIS-NEXT: vldrw.u32 q1, [r0, #48] -; CHECK-LIS-NEXT: vldrw.u32 q4, [r0, #80] -; CHECK-LIS-NEXT: vldrw.u32 q6, [r0, #176] -; CHECK-LIS-NEXT: vmov.f32 s10, s2 -; CHECK-LIS-NEXT: vmov.f32 s13, s0 -; CHECK-LIS-NEXT: vmov.f32 s14, s3 -; CHECK-LIS-NEXT: vmov.f32 s8, s4 -; CHECK-LIS-NEXT: vmov.f32 s9, s7 -; CHECK-LIS-NEXT: vmov.f32 s12, s5 -; CHECK-LIS-NEXT: vmov.f32 s15, s18 -; CHECK-LIS-NEXT: vmov.f32 s11, s17 -; CHECK-LIS-NEXT: vadd.f32 q2, q2, q3 -; CHECK-LIS-NEXT: vmov.f32 s0, s6 -; CHECK-LIS-NEXT: vmov.f32 s2, s16 -; CHECK-LIS-NEXT: vldrw.u32 q1, [r0, #16] -; CHECK-LIS-NEXT: vmov.f32 s3, s19 -; CHECK-LIS-NEXT: vldrw.u32 q3, [r0, #32] -; CHECK-LIS-NEXT: vadd.f32 q0, q2, q0 -; CHECK-LIS-NEXT: vldrw.u32 q2, [r0] -; CHECK-LIS-NEXT: vmov.f32 s17, s4 -; CHECK-LIS-NEXT: vmov.f32 s18, s7 -; CHECK-LIS-NEXT: vmov.f32 s22, s6 -; CHECK-LIS-NEXT: vmov.f32 s16, s9 -; CHECK-LIS-NEXT: vmov.f32 s19, s14 -; CHECK-LIS-NEXT: vmov.f32 s20, s8 -; CHECK-LIS-NEXT: vmov.f32 s21, s11 -; CHECK-LIS-NEXT: vmov.f32 s23, s13 -; CHECK-LIS-NEXT: vmov.f32 s4, s10 -; CHECK-LIS-NEXT: vldrw.u32 q2, [r0, #160] -; CHECK-LIS-NEXT: vmov.f32 s6, s12 -; CHECK-LIS-NEXT: vadd.f32 q4, q5, q4 -; CHECK-LIS-NEXT: vmov.f32 s7, s15 -; CHECK-LIS-NEXT: vldrw.u32 q3, [r0, #144] -; CHECK-LIS-NEXT: vadd.f32 q1, q4, q1 -; CHECK-LIS-NEXT: vmov.f32 s18, s10 -; CHECK-LIS-NEXT: vmov.f32 s21, s8 -; CHECK-LIS-NEXT: vmov.f32 s22, s11 -; CHECK-LIS-NEXT: vmov.f32 s16, s12 -; CHECK-LIS-NEXT: vmov.f32 s17, s15 -; CHECK-LIS-NEXT: vmov.f32 s20, s13 -; CHECK-LIS-NEXT: vmov.f32 s23, s26 -; CHECK-LIS-NEXT: vmov.f32 s19, s25 -; CHECK-LIS-NEXT: vadd.f32 q4, q4, q5 -; CHECK-LIS-NEXT: vmov.f32 s8, s14 -; CHECK-LIS-NEXT: vmov.f32 s10, s24 -; CHECK-LIS-NEXT: vldrw.u32 q3, [r0, #112] -; CHECK-LIS-NEXT: vmov.f32 s11, s27 -; CHECK-LIS-NEXT: vldrw.u32 q5, [r0, #128] -; CHECK-LIS-NEXT: vadd.f32 q2, q4, q2 -; CHECK-LIS-NEXT: vldrw.u32 q4, [r0, #96] -; CHECK-LIS-NEXT: vmov.f32 s25, s12 -; CHECK-LIS-NEXT: vstrw.32 q2, [r1, #48] -; CHECK-LIS-NEXT: vmov.f32 s26, s15 -; CHECK-LIS-NEXT: vstrw.32 q0, [r1, #16] -; CHECK-LIS-NEXT: vmov.f32 s30, s14 -; CHECK-LIS-NEXT: vstrw.32 q1, [r1] -; CHECK-LIS-NEXT: vmov.f32 s24, s17 -; CHECK-LIS-NEXT: vmov.f32 s27, s22 -; CHECK-LIS-NEXT: vmov.f32 s28, s16 -; CHECK-LIS-NEXT: vmov.f32 s29, s19 -; CHECK-LIS-NEXT: vmov.f32 s31, s21 -; CHECK-LIS-NEXT: vadd.f32 q6, q7, q6 -; CHECK-LIS-NEXT: vmov.f32 s12, s18 -; CHECK-LIS-NEXT: vmov.f32 s14, s20 -; CHECK-LIS-NEXT: vmov.f32 s15, s23 -; CHECK-LIS-NEXT: vadd.f32 q3, q6, q3 -; CHECK-LIS-NEXT: vstrw.32 q3, [r1, #32] -; CHECK-LIS-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} -; CHECK-LIS-NEXT: bx lr entry: %l1 = load <48 x float>, ptr %src, align 4 %s1 = shufflevector <48 x float> %l1, <48 x float> undef, <16 x i32> @@ -1692,7 +1560,7 @@ define void @vld3_v16f16(ptr %src, ptr %dst) { ; CHECK-LV-NEXT: vstrw.32 q0, [r1] ; CHECK-LV-NEXT: vpop {d8, d9} ; CHECK-LV-NEXT: bx lr - +; ; CHECK-LIS-LABEL: vld3_v16f16: ; CHECK-LIS: @ %bb.0: @ %entry ; CHECK-LIS-NEXT: .vsave {d8, d9} @@ -1773,6 +1641,7 @@ define void @vld3_v16f16(ptr %src, ptr %dst) { ; CHECK-LIS-NEXT: vstrw.32 q0, [r1] ; CHECK-LIS-NEXT: vpop {d8, d9} ; CHECK-LIS-NEXT: bx lr + entry: %l1 = load <48 x half>, ptr %src, align 4 %s1 = shufflevector <48 x half> %l1, <48 x half> undef, <16 x i32> @@ -1841,4 +1710,4 @@ entry: %a = fadd <4 x double> %a1, %s3 store <4 x double> %a, ptr %dst ret void -} \ No newline at end of file +} diff --git a/llvm/test/CodeGen/Thumb2/schedm7-hazard.ll b/llvm/test/CodeGen/Thumb2/schedm7-hazard.ll index d3e31d192d57..1b222b2a131a 100644 --- a/llvm/test/CodeGen/Thumb2/schedm7-hazard.ll +++ b/llvm/test/CodeGen/Thumb2/schedm7-hazard.ll @@ -17,6 +17,7 @@ define i32 @test(ptr %x0, i32 %y, i32 %z) { ; CHECK-NEXT: adds r1, #1 ; CHECK-NEXT: muls r0, r1, r0 ; CHECK-NEXT: bx lr +; ; NOBANK-LABEL: test: ; NOBANK: @ %bb.0: @ %entry ; NOBANK-NEXT: ldr r3, [r0] -- cgit v1.2.3 From 5a9cc93a2058e2c26d766f7be6aee63e928bf825 Mon Sep 17 00:00:00 2001 From: Karlo Basioli Date: Wed, 18 Jun 2025 10:57:03 +0100 Subject: Fix for bazel build #142079 (#144665) --- utils/bazel/llvm-project-overlay/mlir/BUILD.bazel | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel index c750eb733b3b..0b4441c15794 100644 --- a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel @@ -323,7 +323,6 @@ cc_library( ]) + [ "include/mlir/IR/OpAsmOpInterface.h.inc", "include/mlir/IR/PDLPatternMatch.h.inc", - "include/mlir/Interfaces/CallInterfaces.h", "include/mlir/Interfaces/DataLayoutInterfaces.h", "include/mlir/Interfaces/InferIntRangeInterface.h", "include/mlir/Interfaces/SideEffectInterfaces.h", @@ -332,6 +331,7 @@ cc_library( "include/mlir/IR/*.h", ]) + [ "include/mlir/Interfaces/FoldInterfaces.h", + "include/mlir/Interfaces/CallInterfaces.h", ], includes = ["include"], deps = [ -- cgit v1.2.3 From a13b7cc00c5f4b9d2636ed7a22c1390cf8033baf Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Wed, 18 Jun 2025 12:24:55 +0200 Subject: [LICM] Support hoisting of non-argmemonly readonly calls (#144497) The code checking whether a readonly call is safe to hoist is currently limited to only argmemonly calls. However, the actual implementation does not depend on this in any way. It either does an MSSA clobber walk on the memory access (which will take all locations accessed by the call into account), or it will look at all MemoryDefs in an entirely location-independent manner. The current restriction dates back to the time when LICM still supported AST, in which case this code *did* reason about the individual pointer arguments. --- llvm/lib/Transforms/Scalar/LICM.cpp | 30 +++++----------------------- llvm/test/Transforms/LICM/call-hoisting.ll | 32 ++++++++++++++++++++++++++++++ llvm/test/Transforms/LICM/funclet.ll | 2 +- 3 files changed, 38 insertions(+), 26 deletions(-) diff --git a/llvm/lib/Transforms/Scalar/LICM.cpp b/llvm/lib/Transforms/Scalar/LICM.cpp index abb6ff1dcfe6..a6bb8b8a21b0 100644 --- a/llvm/lib/Transforms/Scalar/LICM.cpp +++ b/llvm/lib/Transforms/Scalar/LICM.cpp @@ -1117,13 +1117,6 @@ bool isHoistableAndSinkableInst(Instruction &I) { isa(I) || isa(I) || isa(I) || isa(I)); } -/// Return true if MSSA knows there are no MemoryDefs in the loop. -bool isReadOnly(const MemorySSAUpdater &MSSAU, const Loop *L) { - for (auto *BB : L->getBlocks()) - if (MSSAU.getMemorySSA()->getBlockDefs(BB)) - return false; - return true; -} /// Return true if I is the only Instruction with a MemoryAccess in L. bool isOnlyMemoryAccess(const Instruction *I, const Loop *L, @@ -1234,24 +1227,11 @@ bool llvm::canSinkOrHoistInst(Instruction &I, AAResults *AA, DominatorTree *DT, if (Behavior.doesNotAccessMemory()) return true; if (Behavior.onlyReadsMemory()) { - // A readonly argmemonly function only reads from memory pointed to by - // it's arguments with arbitrary offsets. If we can prove there are no - // writes to this memory in the loop, we can hoist or sink. - if (Behavior.onlyAccessesArgPointees()) { - // TODO: expand to writeable arguments - for (Value *Op : CI->args()) - if (Op->getType()->isPointerTy() && - pointerInvalidatedByLoop( - MSSA, cast(MSSA->getMemoryAccess(CI)), CurLoop, I, - Flags, /*InvariantGroup=*/false)) - return false; - return true; - } - - // If this call only reads from memory and there are no writes to memory - // in the loop, we can hoist or sink the call as appropriate. - if (isReadOnly(MSSAU, CurLoop)) - return true; + // If we can prove there are no writes to the memory read by the call, we + // can hoist or sink. + return !pointerInvalidatedByLoop( + MSSA, cast(MSSA->getMemoryAccess(CI)), CurLoop, I, Flags, + /*InvariantGroup=*/false); } // FIXME: This should use mod/ref information to see if we can hoist or diff --git a/llvm/test/Transforms/LICM/call-hoisting.ll b/llvm/test/Transforms/LICM/call-hoisting.ll index 907f13438623..7124b4e445eb 100644 --- a/llvm/test/Transforms/LICM/call-hoisting.ll +++ b/llvm/test/Transforms/LICM/call-hoisting.ll @@ -84,6 +84,38 @@ exit: ret void } +declare i32 @load_not_argmemonly() readonly nounwind willreturn + +define void @test_load_not_argmemonly(ptr noalias %sink) { +; CHECK-LABEL: define void @test_load_not_argmemonly( +; CHECK-SAME: ptr noalias [[SINK:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[RET:%.*]] = call i32 @load_not_argmemonly() +; CHECK-NEXT: store i32 [[RET]], ptr [[SINK]], align 4 +; CHECK-NEXT: br label %[[LOOP:.*]] +; CHECK: [[LOOP]]: +; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1 +; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[IV]], 200 +; CHECK-NEXT: br i1 [[CMP]], label %[[LOOP]], label %[[EXIT:.*]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: ret void +; +entry: + br label %loop + +loop: + %iv = phi i32 [0, %entry], [%iv.next, %loop] + %ret = call i32 @load_not_argmemonly() + store i32 %ret, ptr %sink + %iv.next = add i32 %iv, 1 + %cmp = icmp slt i32 %iv, 200 + br i1 %cmp, label %loop, label %exit + +exit: + ret void +} + declare void @store(i32 %val, ptr %p) argmemonly writeonly nounwind define void @test(ptr %loc) { diff --git a/llvm/test/Transforms/LICM/funclet.ll b/llvm/test/Transforms/LICM/funclet.ll index 1cdd12ddc98e..03a49d8ddf69 100644 --- a/llvm/test/Transforms/LICM/funclet.ll +++ b/llvm/test/Transforms/LICM/funclet.ll @@ -153,6 +153,6 @@ else: ; preds = %postinvoke declare void @may_throw() -declare i32 @pure_computation() nounwind argmemonly readonly willreturn +declare i32 @pure_computation() nounwind willreturn memory(none) declare i32 @__CxxFrameHandler3(...) -- cgit v1.2.3 From ee4c2bb68752a6c4b463f3873cde278b8d348628 Mon Sep 17 00:00:00 2001 From: Michael Buch Date: Wed, 18 Jun 2025 11:32:22 +0100 Subject: [lldb][test] explicit-member-function-quals.cpp: add -glldb This will get un-XFAILed but requires `-glldb` in an upcoming patch. --- lldb/test/Shell/SymbolFile/DWARF/x86/explicit-member-function-quals.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lldb/test/Shell/SymbolFile/DWARF/x86/explicit-member-function-quals.cpp b/lldb/test/Shell/SymbolFile/DWARF/x86/explicit-member-function-quals.cpp index 5d1222795dd8..33001db69f83 100644 --- a/lldb/test/Shell/SymbolFile/DWARF/x86/explicit-member-function-quals.cpp +++ b/lldb/test/Shell/SymbolFile/DWARF/x86/explicit-member-function-quals.cpp @@ -3,7 +3,7 @@ // Tests that we correctly deduce the CV-quals and storage // class of explicit object member functions. // -// RUN: %clangxx_host %s -target x86_64-pc-linux -g -std=c++23 -c -o %t +// RUN: %clangxx_host %s -glldb -target x86_64-pc-linux -g -std=c++23 -c -o %t // RUN: %lldb %t -b -o "type lookup Foo" 2>&1 | FileCheck %s // // CHECK: (lldb) type lookup Foo -- cgit v1.2.3 From 561eca44e7639ee8805d0bf65a59b9898d782538 Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Wed, 18 Jun 2025 12:32:33 +0200 Subject: [PowerPC] Split tests into asm and mir parts (NFC) To allow both to be generated. --- .../test/CodeGen/PowerPC/aix32-cc-abi-vaarg-mir.ll | 221 +++++++++ llvm/test/CodeGen/PowerPC/aix32-cc-abi-vaarg.ll | 346 ++++---------- .../test/CodeGen/PowerPC/aix64-cc-abi-vaarg-mir.ll | 192 ++++++++ llvm/test/CodeGen/PowerPC/aix64-cc-abi-vaarg.ll | 532 +++++++-------------- 4 files changed, 686 insertions(+), 605 deletions(-) create mode 100644 llvm/test/CodeGen/PowerPC/aix32-cc-abi-vaarg-mir.ll create mode 100644 llvm/test/CodeGen/PowerPC/aix64-cc-abi-vaarg-mir.ll diff --git a/llvm/test/CodeGen/PowerPC/aix32-cc-abi-vaarg-mir.ll b/llvm/test/CodeGen/PowerPC/aix32-cc-abi-vaarg-mir.ll new file mode 100644 index 000000000000..3eef8d5ff90f --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/aix32-cc-abi-vaarg-mir.ll @@ -0,0 +1,221 @@ +; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5 +; RUN: llc -O2 -mtriple powerpc-ibm-aix-xcoff -mcpu=ppc -stop-after=machine-cp -verify-machineinstrs < %s | FileCheck %s + +define i32 @int_va_arg(i32 %a, ...) local_unnamed_addr { + ; CHECK-LABEL: name: int_va_arg + ; CHECK: bb.0.entry: + ; CHECK-NEXT: liveins: $r3, $r4, $r5, $r6, $r7, $r8, $r9, $r10 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: renamable $r11 = ADDI %fixed-stack.0, 0 + ; CHECK-NEXT: STW killed renamable $r4, 0, %fixed-stack.0 :: (store (s32) into %fixed-stack.0) + ; CHECK-NEXT: STW killed renamable $r6, 8, %fixed-stack.0 :: (store (s32)) + ; CHECK-NEXT: STW killed renamable $r7, 12, %fixed-stack.0 :: (store (s32)) + ; CHECK-NEXT: STW killed renamable $r8, 16, %fixed-stack.0 :: (store (s32)) + ; CHECK-NEXT: STW killed renamable $r9, 20, %fixed-stack.0 :: (store (s32)) + ; CHECK-NEXT: STW killed renamable $r10, 24, %fixed-stack.0 :: (store (s32)) + ; CHECK-NEXT: STW renamable $r11, 0, %stack.0.arg1 :: (store (s32) into %ir.arg1) + ; CHECK-NEXT: STW killed renamable $r11, 0, %stack.1.arg2 :: (store (s32) into %ir.arg2) + ; CHECK-NEXT: renamable $r4 = ADDI %fixed-stack.0, 4 + ; CHECK-NEXT: STW renamable $r4, 0, %stack.0.arg1 :: (store (s32) into %ir.arg1) + ; CHECK-NEXT: renamable $r6 = LWZ 0, %fixed-stack.0 :: (load (s32) from %ir.argp.cur) + ; CHECK-NEXT: STW killed renamable $r4, 0, %stack.1.arg2 :: (store (s32) into %ir.arg2) + ; CHECK-NEXT: renamable $r4 = LWZ 0, %fixed-stack.0 :: (load (s32) from %ir.argp.cur2) + ; CHECK-NEXT: renamable $r3 = nsw ADD4 killed renamable $r6, killed renamable $r3 + ; CHECK-NEXT: renamable $r4 = RLWINM killed renamable $r4, 1, 0, 30 + ; CHECK-NEXT: renamable $r3 = nsw ADD4 killed renamable $r3, killed renamable $r4 + ; CHECK-NEXT: STW killed renamable $r5, 4, %fixed-stack.0 :: (store (s32) into %fixed-stack.0 + 4) + ; CHECK-NEXT: BLR implicit $lr, implicit $rm, implicit $r3 +entry: + %arg1 = alloca ptr, align 4 + %arg2 = alloca ptr, align 4 + call void @llvm.lifetime.start.p0(i64 4, ptr nonnull %arg1) + call void @llvm.lifetime.start.p0(i64 4, ptr nonnull %arg2) + call void @llvm.va_start(ptr nonnull %arg1) + call void @llvm.va_copy(ptr nonnull %arg2, ptr nonnull %arg1) + %argp.cur = load ptr, ptr %arg1, align 4 + %argp.next = getelementptr inbounds i8, ptr %argp.cur, i32 4 + store ptr %argp.next, ptr %arg1, align 4 + %0 = load i32, ptr %argp.cur, align 4 + %add = add nsw i32 %0, %a + %argp.cur2 = load ptr, ptr %arg2, align 4 + %argp.next3 = getelementptr inbounds i8, ptr %argp.cur2, i32 4 + store ptr %argp.next3, ptr %arg2, align 4 + %1 = load i32, ptr %argp.cur2, align 4 + %mul = shl i32 %1, 1 + %add4 = add nsw i32 %add, %mul + call void @llvm.va_end(ptr nonnull %arg1) + call void @llvm.va_end(ptr nonnull %arg2) + call void @llvm.lifetime.end.p0(i64 4, ptr nonnull %arg2) + call void @llvm.lifetime.end.p0(i64 4, ptr nonnull %arg1) + ret i32 %add4 +} + +declare void @llvm.lifetime.start.p0(i64 immarg, ptr nocapture) +declare void @llvm.va_start(ptr) +declare void @llvm.va_copy(ptr, ptr) +declare void @llvm.va_end(ptr) +declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture) + +define i32 @int_stack_va_arg(i32 %one, i32 %two, i32 %three, i32 %four, i32 %five, i32 %six, i32 %seven, i32 %eight, ...) local_unnamed_addr { + ; CHECK-LABEL: name: int_stack_va_arg + ; CHECK: bb.0.entry: + ; CHECK-NEXT: liveins: $r3, $r4, $r5, $r6, $r7, $r8, $r9, $r10 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: renamable $r11 = LI 4 + ; CHECK-NEXT: renamable $r3 = nsw ADD4 killed renamable $r4, killed renamable $r3 + ; CHECK-NEXT: renamable $r4 = ADDI %fixed-stack.0, 0 + ; CHECK-NEXT: renamable $r4 = RLWIMI killed renamable $r4, killed renamable $r11, 0, 29, 29 + ; CHECK-NEXT: renamable $r3 = nsw ADD4 killed renamable $r3, killed renamable $r5 + ; CHECK-NEXT: STW killed renamable $r4, 0, %stack.0.arg1 :: (store (s32) into %ir.arg1) + ; CHECK-NEXT: renamable $r3 = nsw ADD4 killed renamable $r3, killed renamable $r6 + ; CHECK-NEXT: renamable $r4 = LWZ 0, %fixed-stack.0 :: (load (s32) from %ir.argp.cur9, align 8) + ; CHECK-NEXT: renamable $r3 = nsw ADD4 killed renamable $r3, killed renamable $r7 + ; CHECK-NEXT: renamable $r3 = nsw ADD4 killed renamable $r3, killed renamable $r8 + ; CHECK-NEXT: renamable $r3 = nsw ADD4 killed renamable $r3, killed renamable $r9 + ; CHECK-NEXT: renamable $r3 = nsw ADD4 killed renamable $r3, killed renamable $r10 + ; CHECK-NEXT: renamable $r3 = nsw ADD4 killed renamable $r3, renamable $r4 + ; CHECK-NEXT: renamable $r4 = RLWINM killed renamable $r4, 1, 0, 30 + ; CHECK-NEXT: renamable $r3 = nsw ADD4 killed renamable $r3, killed renamable $r4 + ; CHECK-NEXT: BLR implicit $lr, implicit $rm, implicit $r3 +entry: + %arg1 = alloca ptr, align 4 + %arg2 = alloca ptr, align 4 + call void @llvm.lifetime.start.p0(i64 4, ptr nonnull %arg1) + call void @llvm.lifetime.start.p0(i64 4, ptr nonnull %arg2) + call void @llvm.va_start(ptr nonnull %arg1) + call void @llvm.va_copy(ptr nonnull %arg2, ptr nonnull %arg1) + %add = add nsw i32 %two, %one + %add2 = add nsw i32 %add, %three + %add3 = add nsw i32 %add2, %four + %add4 = add nsw i32 %add3, %five + %add5 = add nsw i32 %add4, %six + %add6 = add nsw i32 %add5, %seven + %add7 = add nsw i32 %add6, %eight + %argp.cur = load ptr, ptr %arg1, align 4 + %argp.next = getelementptr inbounds i8, ptr %argp.cur, i32 4 + store ptr %argp.next, ptr %arg1, align 4 + %0 = load i32, ptr %argp.cur, align 4 + %add8 = add nsw i32 %add7, %0 + %argp.cur9 = load ptr, ptr %arg2, align 4 + %argp.next10 = getelementptr inbounds i8, ptr %argp.cur9, i32 4 + store ptr %argp.next10, ptr %arg2, align 4 + %1 = load i32, ptr %argp.cur9, align 4 + %mul = shl i32 %1, 1 + %add11 = add nsw i32 %add8, %mul + call void @llvm.va_end(ptr nonnull %arg1) + call void @llvm.va_end(ptr nonnull %arg2) + call void @llvm.lifetime.end.p0(i64 4, ptr nonnull %arg2) + call void @llvm.lifetime.end.p0(i64 4, ptr nonnull %arg1) + ret i32 %add11 +} + +define double @double_va_arg(double %a, ...) local_unnamed_addr { + ; CHECK-LABEL: name: double_va_arg + ; CHECK: bb.0.entry: + ; CHECK-NEXT: liveins: $f1, $r5, $r6, $r7, $r8, $r9, $r10 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: renamable $r3 = ADDI %fixed-stack.0, 0 + ; CHECK-NEXT: STW killed renamable $r7, 8, %fixed-stack.0 :: (store (s32), align 8) + ; CHECK-NEXT: STW renamable $r5, 0, %fixed-stack.0 :: (store (s32) into %fixed-stack.0, align 16) + ; CHECK-NEXT: STW renamable $r6, 4, %fixed-stack.0 :: (store (s32) into %fixed-stack.0 + 4) + ; CHECK-NEXT: STW killed renamable $r8, 12, %fixed-stack.0 :: (store (s32)) + ; CHECK-NEXT: STW killed renamable $r9, 16, %fixed-stack.0 :: (store (s32) into %fixed-stack.0 + 16, align 16) + ; CHECK-NEXT: STW killed renamable $r10, 20, %fixed-stack.0 :: (store (s32)) + ; CHECK-NEXT: STW renamable $r3, 0, %stack.0.arg1 :: (store (s32) into %ir.arg1) + ; CHECK-NEXT: STW killed renamable $r3, 0, %stack.1.arg2 :: (store (s32) into %ir.arg2) + ; CHECK-NEXT: STW renamable $r5, 0, %stack.2 :: (store (s32) into %stack.2, align 8) + ; CHECK-NEXT: STW renamable $r6, 4, %stack.2 :: (store (s32) into %stack.2 + 4) + ; CHECK-NEXT: renamable $f0 = LFD 0, %stack.2 :: (load (s64) from %stack.2) + ; CHECK-NEXT: STW killed renamable $r5, 0, %stack.3 :: (store (s32) into %stack.3, align 8) + ; CHECK-NEXT: STW killed renamable $r6, 4, %stack.3 :: (store (s32) into %stack.3 + 4) + ; CHECK-NEXT: renamable $f2 = LFD 0, %stack.3 :: (load (s64) from %stack.3) + ; CHECK-NEXT: renamable $f0 = nofpexcept FADD killed renamable $f0, killed renamable $f1, implicit $rm + ; CHECK-NEXT: renamable $f1 = nofpexcept FADD killed renamable $f2, renamable $f2, implicit $rm + ; CHECK-NEXT: renamable $f1 = nofpexcept FADD killed renamable $f0, killed renamable $f1, implicit $rm + ; CHECK-NEXT: BLR implicit $lr, implicit $rm, implicit $f1 +entry: + %arg1 = alloca ptr, align 4 + %arg2 = alloca ptr, align 4 + call void @llvm.lifetime.start.p0(i64 4, ptr nonnull %arg1) + call void @llvm.lifetime.start.p0(i64 4, ptr nonnull %arg2) + call void @llvm.va_start(ptr nonnull %arg1) + call void @llvm.va_copy(ptr nonnull %arg2, ptr nonnull %arg1) + %argp.cur = load ptr, ptr %arg1, align 4 + %argp.next = getelementptr inbounds i8, ptr %argp.cur, i32 8 + store ptr %argp.next, ptr %arg1, align 4 + %0 = load double, ptr %argp.cur, align 4 + %add = fadd double %0, %a + %argp.cur2 = load ptr, ptr %arg2, align 4 + %argp.next3 = getelementptr inbounds i8, ptr %argp.cur2, i32 8 + store ptr %argp.next3, ptr %arg2, align 4 + %1 = load double, ptr %argp.cur2, align 4 + %mul = fmul double %1, 2.000000e+00 + %add4 = fadd double %add, %mul + call void @llvm.va_end(ptr nonnull %arg1) + call void @llvm.va_end(ptr nonnull %arg2) + call void @llvm.lifetime.end.p0(i64 4, ptr nonnull %arg2) + call void @llvm.lifetime.end.p0(i64 4, ptr nonnull %arg1) + ret double %add4 +} + +define double @double_stack_va_arg(double %one, double %two, double %three, double %four, double %five, double %six, double %seven, double %eight, double %nine, double %ten, double %eleven, double %twelve, double %thirteen, ...) local_unnamed_addr { + ; CHECK-LABEL: name: double_stack_va_arg + ; CHECK: bb.0.entry: + ; CHECK-NEXT: liveins: $f1, $f2, $f3, $f4, $f5, $f6, $f7, $f8, $f9, $f10, $f11, $f12, $f13 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: renamable $r3 = ADDI %fixed-stack.0, 0 + ; CHECK-NEXT: STW killed renamable $r3, 0, %stack.0.arg1 :: (store (s32) into %ir.arg1) + ; CHECK-NEXT: renamable $r3 = LWZ 0, %fixed-stack.0 :: (load (s32) from %ir.argp.cur142, align 16) + ; CHECK-NEXT: renamable $f0 = nofpexcept FADD killed renamable $f1, killed renamable $f2, implicit $rm + ; CHECK-NEXT: renamable $f0 = nofpexcept FADD killed renamable $f0, killed renamable $f3, implicit $rm + ; CHECK-NEXT: STW renamable $r3, 0, %stack.2 :: (store (s32) into %stack.2, align 8) + ; CHECK-NEXT: renamable $f0 = nofpexcept FADD killed renamable $f0, killed renamable $f4, implicit $rm + ; CHECK-NEXT: renamable $r4 = LWZ 4, %fixed-stack.0 :: (load (s32) from %ir.argp.cur142 + 4) + ; CHECK-NEXT: renamable $f0 = nofpexcept FADD killed renamable $f0, killed renamable $f5, implicit $rm + ; CHECK-NEXT: renamable $f0 = nofpexcept FADD killed renamable $f0, killed renamable $f6, implicit $rm + ; CHECK-NEXT: renamable $f0 = nofpexcept FADD killed renamable $f0, killed renamable $f7, implicit $rm + ; CHECK-NEXT: STW renamable $r4, 4, %stack.2 :: (store (s32) into %stack.2 + 4) + ; CHECK-NEXT: renamable $f0 = nofpexcept FADD killed renamable $f0, killed renamable $f8, implicit $rm + ; CHECK-NEXT: renamable $f1 = LFD 0, %stack.2 :: (load (s64) from %stack.2) + ; CHECK-NEXT: renamable $f0 = nofpexcept FADD killed renamable $f0, killed renamable $f9, implicit $rm + ; CHECK-NEXT: STW killed renamable $r3, 0, %stack.3 :: (store (s32) into %stack.3, align 8) + ; CHECK-NEXT: renamable $f0 = nofpexcept FADD killed renamable $f0, killed renamable $f10, implicit $rm + ; CHECK-NEXT: STW killed renamable $r4, 4, %stack.3 :: (store (s32) into %stack.3 + 4) + ; CHECK-NEXT: renamable $f0 = nofpexcept FADD killed renamable $f0, killed renamable $f11, implicit $rm + ; CHECK-NEXT: renamable $f2 = LFD 0, %stack.3 :: (load (s64) from %stack.3) + ; CHECK-NEXT: renamable $f0 = nofpexcept FADD killed renamable $f0, killed renamable $f12, implicit $rm + ; CHECK-NEXT: renamable $f0 = nofpexcept FADD killed renamable $f0, killed renamable $f13, implicit $rm + ; CHECK-NEXT: renamable $f0 = nofpexcept FADD killed renamable $f0, killed renamable $f1, implicit $rm + ; CHECK-NEXT: renamable $f1 = nofpexcept FADD killed renamable $f2, renamable $f2, implicit $rm + ; CHECK-NEXT: renamable $f1 = nofpexcept FADD killed renamable $f0, killed renamable $f1, implicit $rm + ; CHECK-NEXT: BLR implicit $lr, implicit $rm, implicit $f1 +entry: + %arg1 = alloca ptr, align 4 + %arg2 = alloca ptr, align 4 + call void @llvm.lifetime.start.p0(i64 4, ptr nonnull %arg1) + call void @llvm.lifetime.start.p0(i64 4, ptr nonnull %arg2) + call void @llvm.va_start(ptr nonnull %arg1) + call void @llvm.va_copy(ptr nonnull %arg2, ptr nonnull %arg1) + %add = fadd double %one, %two + %add2 = fadd double %add, %three + %add3 = fadd double %add2, %four + %add4 = fadd double %add3, %five + %add5 = fadd double %add4, %six + %add6 = fadd double %add5, %seven + %add7 = fadd double %add6, %eight + %add8 = fadd double %add7, %nine + %add9 = fadd double %add8, %ten + %add10 = fadd double %add9, %eleven + %add11 = fadd double %add10, %twelve + %add12 = fadd double %add11, %thirteen + %argp.cur1 = load ptr, ptr %arg1, align 4 + %0 = load double, ptr %argp.cur1, align 4 + %add13 = fadd double %add12, %0 + %argp.cur142 = load ptr, ptr %arg2, align 4 + %1 = load double, ptr %argp.cur142, align 4 + %mul = fmul double %1, 2.000000e+00 + %add16 = fadd double %add13, %mul + call void @llvm.lifetime.end.p0(i64 4, ptr nonnull %arg2) + call void @llvm.lifetime.end.p0(i64 4, ptr nonnull %arg1) + ret double %add16 +} diff --git a/llvm/test/CodeGen/PowerPC/aix32-cc-abi-vaarg.ll b/llvm/test/CodeGen/PowerPC/aix32-cc-abi-vaarg.ll index 3c1b28a4eff1..6ec56ffe3e25 100644 --- a/llvm/test/CodeGen/PowerPC/aix32-cc-abi-vaarg.ll +++ b/llvm/test/CodeGen/PowerPC/aix32-cc-abi-vaarg.ll @@ -1,29 +1,28 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -O2 -verify-machineinstrs -mcpu=pwr4 -mattr=-altivec -mtriple powerpc-ibm-aix-xcoff < %s | FileCheck --check-prefix=ASM32 %s -; RUN: llc -O2 -mtriple powerpc-ibm-aix-xcoff -mcpu=ppc -stop-after=machine-cp -verify-machineinstrs < %s | FileCheck --check-prefix=32BIT %s +; RUN: llc -O2 -verify-machineinstrs -mcpu=pwr4 -mattr=-altivec -mtriple powerpc-ibm-aix-xcoff < %s | FileCheck %s define i32 @int_va_arg(i32 %a, ...) local_unnamed_addr { -; ASM32-LABEL: int_va_arg: -; ASM32: # %bb.0: # %entry -; ASM32-NEXT: addi 11, 1, 28 -; ASM32-NEXT: stw 4, 28(1) -; ASM32-NEXT: addi 4, 1, 32 -; ASM32-NEXT: stw 6, 36(1) -; ASM32-NEXT: stw 11, -4(1) -; ASM32-NEXT: stw 11, -8(1) -; ASM32-NEXT: stw 4, -4(1) -; ASM32-NEXT: lwz 6, 28(1) -; ASM32-NEXT: stw 4, -8(1) -; ASM32-NEXT: add 3, 6, 3 -; ASM32-NEXT: lwz 4, 28(1) -; ASM32-NEXT: slwi 4, 4, 1 -; ASM32-NEXT: stw 7, 40(1) -; ASM32-NEXT: add 3, 3, 4 -; ASM32-NEXT: stw 8, 44(1) -; ASM32-NEXT: stw 9, 48(1) -; ASM32-NEXT: stw 10, 52(1) -; ASM32-NEXT: stw 5, 32(1) -; ASM32-NEXT: blr +; CHECK-LABEL: int_va_arg: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi 11, 1, 28 +; CHECK-NEXT: stw 4, 28(1) +; CHECK-NEXT: addi 4, 1, 32 +; CHECK-NEXT: stw 6, 36(1) +; CHECK-NEXT: stw 11, -4(1) +; CHECK-NEXT: stw 11, -8(1) +; CHECK-NEXT: stw 4, -4(1) +; CHECK-NEXT: lwz 6, 28(1) +; CHECK-NEXT: stw 4, -8(1) +; CHECK-NEXT: add 3, 6, 3 +; CHECK-NEXT: lwz 4, 28(1) +; CHECK-NEXT: slwi 4, 4, 1 +; CHECK-NEXT: stw 7, 40(1) +; CHECK-NEXT: add 3, 3, 4 +; CHECK-NEXT: stw 8, 44(1) +; CHECK-NEXT: stw 9, 48(1) +; CHECK-NEXT: stw 10, 52(1) +; CHECK-NEXT: stw 5, 32(1) +; CHECK-NEXT: blr entry: %arg1 = alloca ptr, align 4 %arg2 = alloca ptr, align 4 @@ -49,45 +48,6 @@ entry: ret i32 %add4 } -; 32BIT-LABEL: name: int_va_arg -; 32BIT-LABEL: liveins: -; 32BIT-DAG: - { reg: '$r3', virtual-reg: '' } -; 32BIT-DAG: - { reg: '$r4', virtual-reg: '' } -; 32BIT-DAG: - { reg: '$r5', virtual-reg: '' } -; 32BIT-DAG: - { reg: '$r6', virtual-reg: '' } -; 32BIT-DAG: - { reg: '$r7', virtual-reg: '' } -; 32BIT-DAG: - { reg: '$r8', virtual-reg: '' } -; 32BIT-DAG: - { reg: '$r9', virtual-reg: '' } -; 32BIT-DAG: - { reg: '$r10', virtual-reg: '' } - -; 32BIT-LABEL: fixedStack: -; 32BIT-DAG: - { id: 0, type: default, offset: 28, size: 4 - -; 32BIT-LABEL: stack: -; 32BIT-DAG: - { id: 0, name: arg1, type: default, offset: 0, size: 4 -; 32BIT-DAG: - { id: 1, name: arg2, type: default, offset: 0, size: 4 - -; 32BIT-LABEL: body: | -; 32BIT-DAG: liveins: $r3, $r4, $r5, $r6, $r7, $r8, $r9, $r10 -; 32BIT-DAG: STW killed renamable $r4, 0, %fixed-stack.0 :: (store (s32) into %fixed-stack.0) -; 32BIT-DAG: STW killed renamable $r5, 4, %fixed-stack.0 :: (store (s32) into %fixed-stack.0 + 4) -; 32BIT-DAG: STW killed renamable $r6, 8, %fixed-stack.0 :: (store (s32)) -; 32BIT-DAG: STW killed renamable $r7, 12, %fixed-stack.0 :: (store (s32)) -; 32BIT-DAG: STW killed renamable $r8, 16, %fixed-stack.0 :: (store (s32)) -; 32BIT-DAG: STW killed renamable $r9, 20, %fixed-stack.0 :: (store (s32)) -; 32BIT-DAG: STW killed renamable $r10, 24, %fixed-stack.0 :: (store (s32)) -; 32BIT-DAG: STW killed renamable $r4, 0, %stack.1.arg2 :: (store (s32) into %ir.arg2) -; 32BIT-DAG: renamable $r4 = ADDI %fixed-stack.0, 4 -; 32BIT-DAG: STW killed renamable $r11, 0, %stack.1.arg2 :: (store (s32) into %ir.arg2) -; 32BIT-DAG: renamable $r11 = ADDI %fixed-stack.0, 0 -; 32BIT-DAG: STW renamable $r11, 0, %stack.0.arg1 :: (store (s32) into %ir.arg1) -; 32BIT-DAG: STW renamable $r4, 0, %stack.0.arg1 :: (store (s32) into %ir.arg1) -; 32BIT-DAG: renamable $r6 = LWZ 0, %fixed-stack.0 :: (load (s32) from %ir.argp.cur) -; 32BIT-DAG: renamable $r4 = LWZ 0, %fixed-stack.0 :: (load (s32) from %ir.argp.cur2) -; 32BIT-DAG: renamable $r3 = nsw ADD4 killed renamable $r6, killed renamable $r3 -; 32BIT-DAG: renamable $r3 = nsw ADD4 killed renamable $r3, killed renamable $r4 -; 32BIT-DAG: BLR implicit $lr, implicit $rm, implicit $r3 - declare void @llvm.lifetime.start.p0(i64 immarg, ptr nocapture) declare void @llvm.va_start(ptr) declare void @llvm.va_copy(ptr, ptr) @@ -95,24 +55,24 @@ declare void @llvm.va_end(ptr) declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture) define i32 @int_stack_va_arg(i32 %one, i32 %two, i32 %three, i32 %four, i32 %five, i32 %six, i32 %seven, i32 %eight, ...) local_unnamed_addr { -; ASM32-LABEL: int_stack_va_arg: -; ASM32: # %bb.0: # %entry -; ASM32-NEXT: add 3, 4, 3 -; ASM32-NEXT: lwz 4, 56(1) -; ASM32-NEXT: li 11, 4 -; ASM32-NEXT: add 3, 3, 5 -; ASM32-NEXT: addi 12, 1, 56 -; ASM32-NEXT: add 3, 3, 6 -; ASM32-NEXT: rlwimi 12, 11, 0, 29, 29 -; ASM32-NEXT: stw 12, -4(1) -; ASM32-NEXT: add 3, 3, 7 -; ASM32-NEXT: add 3, 3, 8 -; ASM32-NEXT: add 3, 3, 9 -; ASM32-NEXT: add 3, 3, 10 -; ASM32-NEXT: add 3, 3, 4 -; ASM32-NEXT: slwi 4, 4, 1 -; ASM32-NEXT: add 3, 3, 4 -; ASM32-NEXT: blr +; CHECK-LABEL: int_stack_va_arg: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: add 3, 4, 3 +; CHECK-NEXT: lwz 4, 56(1) +; CHECK-NEXT: li 11, 4 +; CHECK-NEXT: add 3, 3, 5 +; CHECK-NEXT: addi 12, 1, 56 +; CHECK-NEXT: add 3, 3, 6 +; CHECK-NEXT: rlwimi 12, 11, 0, 29, 29 +; CHECK-NEXT: stw 12, -4(1) +; CHECK-NEXT: add 3, 3, 7 +; CHECK-NEXT: add 3, 3, 8 +; CHECK-NEXT: add 3, 3, 9 +; CHECK-NEXT: add 3, 3, 10 +; CHECK-NEXT: add 3, 3, 4 +; CHECK-NEXT: slwi 4, 4, 1 +; CHECK-NEXT: add 3, 3, 4 +; CHECK-NEXT: blr entry: %arg1 = alloca ptr, align 4 %arg2 = alloca ptr, align 4 @@ -145,63 +105,28 @@ entry: ret i32 %add11 } -; 32BIT-LABEL: name: int_stack_va_arg -; 32BIT-LABEL: liveins: -; 32BIT-DAG: - { reg: '$r3', virtual-reg: '' } -; 32BIT-DAG: - { reg: '$r4', virtual-reg: '' } -; 32BIT-DAG: - { reg: '$r5', virtual-reg: '' } -; 32BIT-DAG: - { reg: '$r6', virtual-reg: '' } -; 32BIT-DAG: - { reg: '$r7', virtual-reg: '' } -; 32BIT-DAG: - { reg: '$r8', virtual-reg: '' } -; 32BIT-DAG: - { reg: '$r9', virtual-reg: '' } -; 32BIT-DAG: - { reg: '$r10', virtual-reg: '' } - -; 32BIT-LABEL: fixedStack: -; 32BIT-DAG: - { id: 0, type: default, offset: 56, size: 4 - -; 32BIT-LABEL: stack: -; 32BIT-DAG: - { id: 0, name: arg1, type: default, offset: 0, size: 4 -; 32BIT-DAG: - { id: 1, name: arg2, type: default, offset: 0, size: 4 - -; 32BIT-LABEL: body: | -; 32BIT-DAG: liveins: $r3, $r4, $r5, $r6, $r7, $r8, $r9, $r10 -; 32BIT-DAG: renamable $r3 = nsw ADD4 killed renamable $r3, killed renamable $r4 -; 32BIT-DAG: renamable $r3 = nsw ADD4 killed renamable $r3, killed renamable $r5 -; 32BIT-DAG: renamable $r3 = nsw ADD4 killed renamable $r3, killed renamable $r6 -; 32BIT-DAG: renamable $r3 = nsw ADD4 killed renamable $r3, killed renamable $r7 -; 32BIT-DAG: renamable $r3 = nsw ADD4 killed renamable $r3, killed renamable $r8 -; 32BIT-DAG: renamable $r3 = nsw ADD4 killed renamable $r3, killed renamable $r9 -; 32BIT-DAG: renamable $r3 = nsw ADD4 killed renamable $r3, killed renamable $r10 -; 32BIT-DAG: renamable $r3 = nsw ADD4 killed renamable $r4, killed renamable $r3 -; 32BIT-DAG: renamable $r4 = ADDI %fixed-stack.0, 0 -; 32BIT-DAG: STW killed renamable $r4, 0, %stack.0.arg1 :: (store (s32) into %ir.arg1) -; 32BIT-DAG: renamable $r3 = nsw ADD4 killed renamable $r3, renamable $r4 -; 32BIT-DAG: renamable $r4 = LWZ 0, %fixed-stack.0 :: (load (s32) from %ir.argp.cur9, align 8) -; 32BIT-DAG: renamable $r11 = LI 4 -; 32BIT-DAG: BLR implicit $lr, implicit $rm, implicit $r3 - define double @double_va_arg(double %a, ...) local_unnamed_addr { -; ASM32-LABEL: double_va_arg: -; ASM32: # %bb.0: # %entry -; ASM32-NEXT: stw 5, -16(1) -; ASM32-NEXT: addi 3, 1, 32 -; ASM32-NEXT: stw 6, -12(1) -; ASM32-NEXT: lfd 0, -16(1) -; ASM32-NEXT: stw 5, -24(1) -; ASM32-NEXT: fadd 0, 0, 1 -; ASM32-NEXT: stw 6, -20(1) -; ASM32-NEXT: lfd 1, -24(1) -; ASM32-NEXT: fadd 1, 1, 1 -; ASM32-NEXT: stw 7, 40(1) -; ASM32-NEXT: fadd 1, 0, 1 -; ASM32-NEXT: stw 5, 32(1) -; ASM32-NEXT: stw 6, 36(1) -; ASM32-NEXT: stw 8, 44(1) -; ASM32-NEXT: stw 9, 48(1) -; ASM32-NEXT: stw 10, 52(1) -; ASM32-NEXT: stw 3, -4(1) -; ASM32-NEXT: stw 3, -8(1) -; ASM32-NEXT: blr +; CHECK-LABEL: double_va_arg: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: stw 5, -16(1) +; CHECK-NEXT: addi 3, 1, 32 +; CHECK-NEXT: stw 6, -12(1) +; CHECK-NEXT: lfd 0, -16(1) +; CHECK-NEXT: stw 5, -24(1) +; CHECK-NEXT: fadd 0, 0, 1 +; CHECK-NEXT: stw 6, -20(1) +; CHECK-NEXT: lfd 1, -24(1) +; CHECK-NEXT: fadd 1, 1, 1 +; CHECK-NEXT: stw 7, 40(1) +; CHECK-NEXT: fadd 1, 0, 1 +; CHECK-NEXT: stw 5, 32(1) +; CHECK-NEXT: stw 6, 36(1) +; CHECK-NEXT: stw 8, 44(1) +; CHECK-NEXT: stw 9, 48(1) +; CHECK-NEXT: stw 10, 52(1) +; CHECK-NEXT: stw 3, -4(1) +; CHECK-NEXT: stw 3, -8(1) +; CHECK-NEXT: blr entry: %arg1 = alloca ptr, align 4 %arg2 = alloca ptr, align 4 @@ -227,74 +152,35 @@ entry: ret double %add4 } -; 32BIT-LABEL: name: double_va_arg -; 32BIT-LABEL: liveins: -; 32BIT-DAG: - { reg: '$f1', virtual-reg: '' } -; 32BIT-DAG: - { reg: '$r5', virtual-reg: '' } -; 32BIT-DAG: - { reg: '$r6', virtual-reg: '' } -; 32BIT-DAG: - { reg: '$r7', virtual-reg: '' } -; 32BIT-DAG: - { reg: '$r8', virtual-reg: '' } -; 32BIT-DAG: - { reg: '$r9', virtual-reg: '' } -; 32BIT-DAG: - { reg: '$r10', virtual-reg: '' } - -; 32BIT-LABEL: fixedStack: -; 32BIT-DAG: - { id: 0, type: default, offset: 32, size: 4 - -; 32BIT-LABEL: stack: -; 32BIT-DAG: - { id: 0, name: arg1, type: default, offset: 0, size: 4 -; 32BIT-DAG: - { id: 1, name: arg2, type: default, offset: 0, size: 4 - -; 32BIT-LABEL: body: | -; 32BIT-DAG: liveins: $f1, $r5, $r6, $r7, $r8, $r9, $r10 -; 32BIT-DAG: renamable $r3 = ADDI %fixed-stack.0, 0 -; 32BIT-DAG: STW killed renamable $r7, 8, %fixed-stack.0 :: (store (s32), align 8) -; 32BIT-DAG: STW renamable $r5, 0, %fixed-stack.0 :: (store (s32) into %fixed-stack.0, align 16) -; 32BIT-DAG: STW renamable $r6, 4, %fixed-stack.0 :: (store (s32) into %fixed-stack.0 + 4) -; 32BIT-DAG: STW killed renamable $r8, 12, %fixed-stack.0 :: (store (s32)) -; 32BIT-DAG: STW killed renamable $r9, 16, %fixed-stack.0 :: (store (s32) into %fixed-stack.0 + 16, align 16) -; 32BIT-DAG: STW killed renamable $r10, 20, %fixed-stack.0 :: (store (s32)) -; 32BIT-DAG: STW renamable $r3, 0, %stack.0.arg1 :: (store (s32) into %ir.arg1) -; 32BIT-DAG: STW killed renamable $r3, 0, %stack.1.arg2 :: (store (s32) into %ir.arg2) -; 32BIT-DAG: STW renamable $r5, 0, %stack.2 :: (store (s32) into %stack.2, align 8) -; 32BIT-DAG: STW renamable $r6, 4, %stack.2 :: (store (s32) into %stack.2 + 4) -; 32BIT-DAG: renamable $f0 = LFD 0, %stack.2 :: (load (s64) from %stack.2) -; 32BIT-DAG: STW killed renamable $r5, 0, %stack.3 :: (store (s32) into %stack.3, align 8) -; 32BIT-DAG: STW killed renamable $r6, 4, %stack.3 :: (store (s32) into %stack.3 + 4) -; 32BIT-DAG: renamable $f2 = LFD 0, %stack.3 :: (load (s64) from %stack.3) -; 32BIT-DAG: renamable $f0 = nofpexcept FADD killed renamable $f0, killed renamable $f1, implicit $rm -; 32BIT-DAG: renamable $f1 = nofpexcept FADD killed renamable $f2, renamable $f2, implicit $rm -; 32BIT-DAG: renamable $f1 = nofpexcept FADD killed renamable $f0, killed renamable $f1, implicit $rm -; 32BIT-DAG: BLR implicit $lr, implicit $rm, implicit $f1 - define double @double_stack_va_arg(double %one, double %two, double %three, double %four, double %five, double %six, double %seven, double %eight, double %nine, double %ten, double %eleven, double %twelve, double %thirteen, ...) local_unnamed_addr { -; ASM32-LABEL: double_stack_va_arg: -; ASM32: # %bb.0: # %entry -; ASM32-NEXT: fadd 0, 1, 2 -; ASM32-NEXT: addi 3, 1, 128 -; ASM32-NEXT: lwz 4, 132(1) -; ASM32-NEXT: fadd 0, 0, 3 -; ASM32-NEXT: stw 3, -4(1) -; ASM32-NEXT: fadd 0, 0, 4 -; ASM32-NEXT: lwz 3, 128(1) -; ASM32-NEXT: fadd 0, 0, 5 -; ASM32-NEXT: stw 3, -16(1) -; ASM32-NEXT: fadd 0, 0, 6 -; ASM32-NEXT: stw 4, -12(1) -; ASM32-NEXT: fadd 0, 0, 7 -; ASM32-NEXT: lfd 1, -16(1) -; ASM32-NEXT: fadd 0, 0, 8 -; ASM32-NEXT: stw 3, -24(1) -; ASM32-NEXT: fadd 0, 0, 9 -; ASM32-NEXT: stw 4, -20(1) -; ASM32-NEXT: fadd 0, 0, 10 -; ASM32-NEXT: fadd 0, 0, 11 -; ASM32-NEXT: fadd 0, 0, 12 -; ASM32-NEXT: fadd 0, 0, 13 -; ASM32-NEXT: fadd 0, 0, 1 -; ASM32-NEXT: lfd 1, -24(1) -; ASM32-NEXT: fadd 1, 1, 1 -; ASM32-NEXT: fadd 1, 0, 1 -; ASM32-NEXT: blr +; CHECK-LABEL: double_stack_va_arg: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fadd 0, 1, 2 +; CHECK-NEXT: addi 3, 1, 128 +; CHECK-NEXT: lwz 4, 132(1) +; CHECK-NEXT: fadd 0, 0, 3 +; CHECK-NEXT: stw 3, -4(1) +; CHECK-NEXT: fadd 0, 0, 4 +; CHECK-NEXT: lwz 3, 128(1) +; CHECK-NEXT: fadd 0, 0, 5 +; CHECK-NEXT: stw 3, -16(1) +; CHECK-NEXT: fadd 0, 0, 6 +; CHECK-NEXT: stw 4, -12(1) +; CHECK-NEXT: fadd 0, 0, 7 +; CHECK-NEXT: lfd 1, -16(1) +; CHECK-NEXT: fadd 0, 0, 8 +; CHECK-NEXT: stw 3, -24(1) +; CHECK-NEXT: fadd 0, 0, 9 +; CHECK-NEXT: stw 4, -20(1) +; CHECK-NEXT: fadd 0, 0, 10 +; CHECK-NEXT: fadd 0, 0, 11 +; CHECK-NEXT: fadd 0, 0, 12 +; CHECK-NEXT: fadd 0, 0, 13 +; CHECK-NEXT: fadd 0, 0, 1 +; CHECK-NEXT: lfd 1, -24(1) +; CHECK-NEXT: fadd 1, 1, 1 +; CHECK-NEXT: fadd 1, 0, 1 +; CHECK-NEXT: blr entry: %arg1 = alloca ptr, align 4 %arg2 = alloca ptr, align 4 @@ -325,57 +211,3 @@ entry: call void @llvm.lifetime.end.p0(i64 4, ptr nonnull %arg1) ret double %add16 } - -; 32BIT-LABEL: name: double_stack_va_arg -; 32BIT-LABEL: liveins: -; 32BIT-DAG: - { reg: '$f1', virtual-reg: '' } -; 32BIT-DAG: - { reg: '$f2', virtual-reg: '' } -; 32BIT-DAG: - { reg: '$f3', virtual-reg: '' } -; 32BIT-DAG: - { reg: '$f4', virtual-reg: '' } -; 32BIT-DAG: - { reg: '$f5', virtual-reg: '' } -; 32BIT-DAG: - { reg: '$f6', virtual-reg: '' } -; 32BIT-DAG: - { reg: '$f7', virtual-reg: '' } -; 32BIT-DAG: - { reg: '$f8', virtual-reg: '' } -; 32BIT-DAG: - { reg: '$f9', virtual-reg: '' } -; 32BIT-DAG: - { reg: '$f10', virtual-reg: '' } -; 32BIT-DAG: - { reg: '$f11', virtual-reg: '' } -; 32BIT-DAG: - { reg: '$f12', virtual-reg: '' } -; 32BIT-DAG: - { reg: '$f13', virtual-reg: '' } - -; 32BIT-LABEL: fixedStack: -; 32BIT-DAG: - { id: 0, type: default, offset: 128, size: 4 - -; 32BIT-LABEL: stack: -; 32BIT-DAG: - { id: 0, name: arg1, type: default, offset: 0, size: 4, alignment: 4, -; 32BIT-DAG: - { id: 1, name: arg2, type: default, offset: 0, size: 4, alignment: 4, -; 32BIT-DAG: - { id: 2, name: '', type: default, offset: 0, size: 8, alignment: 8, -; 32BIT-DAG: - { id: 3, name: '', type: default, offset: 0, size: 8, alignment: 8, - -; 32BIT-LABEL: body: | -; 32BIT-DAG: liveins: $f1, $f2, $f3, $f4, $f5, $f6, $f7, $f8, $f9, $f10, $f11, $f12, $f13 -; 32BIT-DAG: renamable $r3 = ADDI %fixed-stack.0, 0 -; 32BIT-DAG: STW killed renamable $r3, 0, %stack.0.arg1 :: (store (s32) into %ir.arg1) -; 32BIT-DAG: renamable $r3 = LWZ 0, %fixed-stack.0 :: (load (s32) from %ir.argp.cur142, align 16) -; 32BIT-DAG: renamable $f0 = nofpexcept FADD killed renamable $f1, killed renamable $f2, implicit $rm -; 32BIT-DAG: renamable $f0 = nofpexcept FADD killed renamable $f0, killed renamable $f3, implicit $rm -; 32BIT-DAG: STW renamable $r3, 0, %stack.2 :: (store (s32) into %stack.2, align 8) -; 32BIT-DAG: renamable $f0 = nofpexcept FADD killed renamable $f0, killed renamable $f4, implicit $rm -; 32BIT-DAG: renamable $r4 = LWZ 4, %fixed-stack.0 :: (load (s32) from %ir.argp.cur142 + 4) -; 32BIT-DAG: renamable $f0 = nofpexcept FADD killed renamable $f0, killed renamable $f5, implicit $rm -; 32BIT-DAG: renamable $f0 = nofpexcept FADD killed renamable $f0, killed renamable $f6, implicit $rm -; 32BIT-DAG: renamable $f0 = nofpexcept FADD killed renamable $f0, killed renamable $f7, implicit $rm -; 32BIT-DAG: STW renamable $r4, 4, %stack.2 :: (store (s32) into %stack.2 + 4) -; 32BIT-DAG: renamable $f0 = nofpexcept FADD killed renamable $f0, killed renamable $f8, implicit $rm -; 32BIT-DAG: renamable $f1 = LFD 0, %stack.2 :: (load (s64) from %stack.2) -; 32BIT-DAG: renamable $f0 = nofpexcept FADD killed renamable $f0, killed renamable $f9, implicit $rm -; 32BIT-DAG: STW killed renamable $r3, 0, %stack.3 :: (store (s32) into %stack.3, align 8) -; 32BIT-DAG: renamable $f0 = nofpexcept FADD killed renamable $f0, killed renamable $f10, implicit $rm -; 32BIT-DAG: STW killed renamable $r4, 4, %stack.3 :: (store (s32) into %stack.3 + 4) -; 32BIT-DAG: renamable $f0 = nofpexcept FADD killed renamable $f0, killed renamable $f11, implicit $rm -; 32BIT-DAG: renamable $f2 = LFD 0, %stack.3 :: (load (s64) from %stack.3) -; 32BIT-DAG: renamable $f0 = nofpexcept FADD killed renamable $f0, killed renamable $f12, implicit $rm -; 32BIT-DAG: renamable $f0 = nofpexcept FADD killed renamable $f0, killed renamable $f13, implicit $rm -; 32BIT-DAG: renamable $f0 = nofpexcept FADD killed renamable $f0, killed renamable $f1, implicit $rm -; 32BIT-DAG: renamable $f1 = nofpexcept FADD killed renamable $f2, renamable $f2, implicit $rm -; 32BIT-DAG: renamable $f1 = nofpexcept FADD killed renamable $f0, killed renamable $f1, implicit $rm -; 32BIT-DAG: BLR implicit $lr, implicit $rm, implicit $f1 diff --git a/llvm/test/CodeGen/PowerPC/aix64-cc-abi-vaarg-mir.ll b/llvm/test/CodeGen/PowerPC/aix64-cc-abi-vaarg-mir.ll new file mode 100644 index 000000000000..4d7c6fb6fa31 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/aix64-cc-abi-vaarg-mir.ll @@ -0,0 +1,192 @@ +; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5 +; RUN: llc -O2 -mtriple powerpc64-ibm-aix-xcoff -mcpu=ppc -stop-after=machine-cp -verify-machineinstrs < %s | FileCheck %s + +define i32 @int_va_arg(i32 %a, ...) local_unnamed_addr { + ; CHECK-LABEL: name: int_va_arg + ; CHECK: bb.0.entry: + ; CHECK-NEXT: liveins: $x3, $x4, $x5, $x6, $x7, $x8, $x9, $x10 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: renamable $x11 = ADDI8 %fixed-stack.0, 0 + ; CHECK-NEXT: STD killed renamable $x6, 16, %fixed-stack.0 :: (store (s64)) + ; CHECK-NEXT: STD killed renamable $x7, 24, %fixed-stack.0 :: (store (s64)) + ; CHECK-NEXT: STD killed renamable $x8, 32, %fixed-stack.0 :: (store (s64)) + ; CHECK-NEXT: STD killed renamable $x9, 40, %fixed-stack.0 :: (store (s64)) + ; CHECK-NEXT: STD killed renamable $x10, 48, %fixed-stack.0 :: (store (s64)) + ; CHECK-NEXT: STD renamable $x11, 0, %stack.1.arg2 :: (store (s64) into %ir.arg2) + ; CHECK-NEXT: renamable $x6 = LD 0, %stack.1.arg2 :: (load (s64) from %ir.arg2) + ; CHECK-NEXT: renamable $x7 = disjoint ADDI8 %fixed-stack.0, 4 + ; CHECK-NEXT: renamable $r8 = LWZ 0, %fixed-stack.0 :: (load (s32) from %fixed-stack.0, align 8) + ; CHECK-NEXT: renamable $x9 = ADDI8 renamable $x6, 4 + ; CHECK-NEXT: STD killed renamable $x4, 0, %fixed-stack.0 :: (store (s64) into %fixed-stack.0) + ; CHECK-NEXT: STD killed renamable $x5, 8, %fixed-stack.0 :: (store (s64) into %fixed-stack.0 + 8) + ; CHECK-NEXT: STD killed renamable $x11, 0, %stack.0.arg1 :: (store (s64) into %ir.arg1) + ; CHECK-NEXT: STD killed renamable $x7, 0, %stack.0.arg1 :: (store (s64) into %ir.arg1) + ; CHECK-NEXT: STD killed renamable $x9, 0, %stack.1.arg2 :: (store (s64) into %ir.arg2) + ; CHECK-NEXT: renamable $r4 = LWZ 0, killed renamable $x6 :: (load (s32)) + ; CHECK-NEXT: renamable $r3 = nsw ADD4 killed renamable $r8, renamable $r3, implicit killed $x3 + ; CHECK-NEXT: renamable $r4 = RLWINM killed renamable $r4, 1, 0, 30 + ; CHECK-NEXT: renamable $r3 = nsw ADD4 killed renamable $r3, killed renamable $r4, implicit-def $x3 + ; CHECK-NEXT: BLR8 implicit $lr8, implicit $rm, implicit $x3 +entry: + %arg1 = alloca ptr, align 8 + %arg2 = alloca ptr, align 8 + call void @llvm.lifetime.start.p0(i64 8, ptr nonnull %arg1) + call void @llvm.lifetime.start.p0(i64 8, ptr nonnull %arg2) + call void @llvm.va_start(ptr nonnull %arg1) + call void @llvm.va_copy(ptr nonnull %arg2, ptr nonnull %arg1) + %0 = va_arg ptr %arg1, i32 + %add = add nsw i32 %0, %a + %1 = va_arg ptr %arg2, i32 + %mul = shl i32 %1, 1 + %add3 = add nsw i32 %add, %mul + call void @llvm.va_end(ptr nonnull %arg1) + call void @llvm.va_end(ptr nonnull %arg2) + call void @llvm.lifetime.end.p0(i64 8, ptr nonnull %arg2) + call void @llvm.lifetime.end.p0(i64 8, ptr nonnull %arg1) + ret i32 %add3 +} + +declare void @llvm.lifetime.start.p0(i64 immarg, ptr nocapture) +declare void @llvm.va_start(ptr) +declare void @llvm.va_copy(ptr, ptr) +declare void @llvm.va_end(ptr) +declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture) + +define i32 @int_stack_va_arg(i32 %one, i32 %two, i32 %three, i32 %four, i32 %five, i32 %six, i32 %seven, i32 %eight, ...) local_unnamed_addr { + ; CHECK-LABEL: name: int_stack_va_arg + ; CHECK: bb.0.entry: + ; CHECK-NEXT: liveins: $x3, $x4, $x5, $x6, $x7, $x8, $x9, $x10 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: renamable $r11 = LWZ 0, %fixed-stack.0 :: (load (s32) from %fixed-stack.0, align 16) + ; CHECK-NEXT: renamable $r3 = nsw ADD4 renamable $r4, renamable $r3, implicit killed $x3, implicit killed $x4 + ; CHECK-NEXT: renamable $r3 = nsw ADD4 killed renamable $r3, renamable $r5, implicit killed $x5 + ; CHECK-NEXT: renamable $r3 = nsw ADD4 killed renamable $r3, renamable $r6, implicit killed $x6 + ; CHECK-NEXT: renamable $r3 = nsw ADD4 killed renamable $r3, renamable $r7, implicit killed $x7 + ; CHECK-NEXT: renamable $r3 = nsw ADD4 killed renamable $r3, renamable $r8, implicit killed $x8 + ; CHECK-NEXT: renamable $r3 = nsw ADD4 killed renamable $r3, renamable $r9, implicit killed $x9 + ; CHECK-NEXT: renamable $r3 = nsw ADD4 killed renamable $r3, renamable $r10, implicit killed $x10 + ; CHECK-NEXT: renamable $r3 = nsw ADD4 killed renamable $r3, renamable $r11 + ; CHECK-NEXT: renamable $r4 = RLWINM killed renamable $r11, 1, 0, 30 + ; CHECK-NEXT: renamable $r3 = nsw ADD4 killed renamable $r3, killed renamable $r4, implicit-def $x3 + ; CHECK-NEXT: BLR8 implicit $lr8, implicit $rm, implicit $x3 +entry: + %arg1 = alloca ptr, align 8 + %arg2 = alloca ptr, align 8 + call void @llvm.lifetime.start.p0(i64 8, ptr nonnull %arg1) + call void @llvm.lifetime.start.p0(i64 8, ptr nonnull %arg2) + call void @llvm.va_start(ptr nonnull %arg1) + call void @llvm.va_copy(ptr nonnull %arg2, ptr nonnull %arg1) + %add = add nsw i32 %two, %one + %add2 = add nsw i32 %add, %three + %add3 = add nsw i32 %add2, %four + %add4 = add nsw i32 %add3, %five + %add5 = add nsw i32 %add4, %six + %add6 = add nsw i32 %add5, %seven + %add7 = add nsw i32 %add6, %eight + %0 = va_arg ptr %arg1, i32 + %add8 = add nsw i32 %add7, %0 + %1 = va_arg ptr %arg2, i32 + %mul = shl i32 %1, 1 + %add10 = add nsw i32 %add8, %mul + call void @llvm.va_end(ptr nonnull %arg1) + call void @llvm.va_end(ptr nonnull %arg2) + call void @llvm.lifetime.end.p0(i64 8, ptr nonnull %arg2) + call void @llvm.lifetime.end.p0(i64 8, ptr nonnull %arg1) + ret i32 %add10 +} + +define double @double_va_arg(double %a, ...) local_unnamed_addr { + ; CHECK-LABEL: name: double_va_arg + ; CHECK: bb.0.entry: + ; CHECK-NEXT: liveins: $f1, $x4, $x5, $x6, $x7, $x8, $x9, $x10 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: renamable $x3 = ADDI8 %fixed-stack.0, 0 + ; CHECK-NEXT: STD killed renamable $x6, 16, %fixed-stack.0 :: (store (s64)) + ; CHECK-NEXT: STD killed renamable $x7, 24, %fixed-stack.0 :: (store (s64)) + ; CHECK-NEXT: STD killed renamable $x8, 32, %fixed-stack.0 :: (store (s64)) + ; CHECK-NEXT: STD killed renamable $x9, 40, %fixed-stack.0 :: (store (s64)) + ; CHECK-NEXT: STD killed renamable $x10, 48, %fixed-stack.0 :: (store (s64)) + ; CHECK-NEXT: STD renamable $x3, 0, %stack.1.arg2 :: (store (s64) into %ir.arg2) + ; CHECK-NEXT: renamable $x6 = LD 0, %stack.1.arg2 :: (load (s64) from %ir.arg2) + ; CHECK-NEXT: renamable $x7 = ADDI8 %fixed-stack.0, 8 + ; CHECK-NEXT: STD killed renamable $x4, 0, %fixed-stack.0 :: (store (s64) into %fixed-stack.0) + ; CHECK-NEXT: STD killed renamable $x3, 0, %stack.0.arg1 :: (store (s64) into %ir.arg1) + ; CHECK-NEXT: STD killed renamable $x7, 0, %stack.0.arg1 :: (store (s64) into %ir.arg1) + ; CHECK-NEXT: renamable $f0 = LFD 0, %fixed-stack.0 :: (load (s64)) + ; CHECK-NEXT: renamable $x3 = ADDI8 renamable $x6, 8 + ; CHECK-NEXT: STD killed renamable $x5, 8, %fixed-stack.0 :: (store (s64) into %fixed-stack.0 + 8) + ; CHECK-NEXT: STD killed renamable $x3, 0, %stack.1.arg2 :: (store (s64) into %ir.arg2) + ; CHECK-NEXT: renamable $f2 = LFD 0, killed renamable $x6 :: (load (s64)) + ; CHECK-NEXT: renamable $f0 = nofpexcept FADD killed renamable $f0, killed renamable $f1, implicit $rm + ; CHECK-NEXT: renamable $f1 = nofpexcept FADD killed renamable $f2, renamable $f2, implicit $rm + ; CHECK-NEXT: renamable $f1 = nofpexcept FADD killed renamable $f0, killed renamable $f1, implicit $rm + ; CHECK-NEXT: BLR8 implicit $lr8, implicit $rm, implicit $f1 +entry: + %arg1 = alloca ptr, align 8 + %arg2 = alloca ptr, align 8 + call void @llvm.lifetime.start.p0(i64 8, ptr nonnull %arg1) + call void @llvm.lifetime.start.p0(i64 8, ptr nonnull %arg2) + call void @llvm.va_start(ptr nonnull %arg1) + call void @llvm.va_copy(ptr nonnull %arg2, ptr nonnull %arg1) + %0 = va_arg ptr %arg1, double + %add = fadd double %0, %a + %1 = va_arg ptr %arg2, double + %mul = fmul double %1, 2.000000e+00 + %add3 = fadd double %add, %mul + call void @llvm.va_end(ptr nonnull %arg1) + call void @llvm.va_end(ptr nonnull %arg2) + call void @llvm.lifetime.end.p0(i64 8, ptr nonnull %arg2) + call void @llvm.lifetime.end.p0(i64 8, ptr nonnull %arg1) + ret double %add3 +} + +define double @double_stack_va_arg(double %one, double %two, double %three, double %four, double %five, double %six, double %seven, double %eight, double %nine, double %ten, double %eleven, double %twelve, double %thirteen, ...) local_unnamed_addr { + ; CHECK-LABEL: name: double_stack_va_arg + ; CHECK: bb.0.entry: + ; CHECK-NEXT: liveins: $f1, $f2, $f3, $f4, $f5, $f6, $f7, $f8, $f9, $f10, $f11, $f12, $f13 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: renamable $f0 = LFD 0, %fixed-stack.0 :: (load (s64)) + ; CHECK-NEXT: renamable $f1 = nofpexcept FADD killed renamable $f1, killed renamable $f2, implicit $rm + ; CHECK-NEXT: renamable $f1 = nofpexcept FADD killed renamable $f1, killed renamable $f3, implicit $rm + ; CHECK-NEXT: renamable $f1 = nofpexcept FADD killed renamable $f1, killed renamable $f4, implicit $rm + ; CHECK-NEXT: renamable $f1 = nofpexcept FADD killed renamable $f1, killed renamable $f5, implicit $rm + ; CHECK-NEXT: renamable $f1 = nofpexcept FADD killed renamable $f1, killed renamable $f6, implicit $rm + ; CHECK-NEXT: renamable $f1 = nofpexcept FADD killed renamable $f1, killed renamable $f7, implicit $rm + ; CHECK-NEXT: renamable $f1 = nofpexcept FADD killed renamable $f1, killed renamable $f8, implicit $rm + ; CHECK-NEXT: renamable $f1 = nofpexcept FADD killed renamable $f1, killed renamable $f9, implicit $rm + ; CHECK-NEXT: renamable $f1 = nofpexcept FADD killed renamable $f1, killed renamable $f10, implicit $rm + ; CHECK-NEXT: renamable $f1 = nofpexcept FADD killed renamable $f1, killed renamable $f11, implicit $rm + ; CHECK-NEXT: renamable $f1 = nofpexcept FADD killed renamable $f1, killed renamable $f12, implicit $rm + ; CHECK-NEXT: renamable $f1 = nofpexcept FADD killed renamable $f1, killed renamable $f13, implicit $rm + ; CHECK-NEXT: renamable $f1 = nofpexcept FADD killed renamable $f1, renamable $f0, implicit $rm + ; CHECK-NEXT: renamable $f0 = nofpexcept FADD killed renamable $f0, renamable $f0, implicit $rm + ; CHECK-NEXT: renamable $f1 = nofpexcept FADD killed renamable $f1, killed renamable $f0, implicit $rm + ; CHECK-NEXT: BLR8 implicit $lr8, implicit $rm, implicit $f1 +entry: + %arg1 = alloca ptr, align 8 + %arg2 = alloca ptr, align 8 + call void @llvm.lifetime.start.p0(i64 8, ptr nonnull %arg1) + call void @llvm.lifetime.start.p0(i64 8, ptr nonnull %arg2) + call void @llvm.va_start(ptr nonnull %arg1) + call void @llvm.va_copy(ptr nonnull %arg2, ptr nonnull %arg1) + %add = fadd double %one, %two + %add2 = fadd double %add, %three + %add3 = fadd double %add2, %four + %add4 = fadd double %add3, %five + %add5 = fadd double %add4, %six + %add6 = fadd double %add5, %seven + %add7 = fadd double %add6, %eight + %add8 = fadd double %add7, %nine + %add9 = fadd double %add8, %ten + %add10 = fadd double %add9, %eleven + %add11 = fadd double %add10, %twelve + %add12 = fadd double %add11, %thirteen + %0 = va_arg ptr %arg1, double + %add13 = fadd double %add12, %0 + %1 = va_arg ptr %arg2, double + %mul = fmul double %1, 2.000000e+00 + %add15 = fadd double %add13, %mul + call void @llvm.lifetime.end.p0(i64 8, ptr nonnull %arg2) + call void @llvm.lifetime.end.p0(i64 8, ptr nonnull %arg1) + ret double %add15 +} diff --git a/llvm/test/CodeGen/PowerPC/aix64-cc-abi-vaarg.ll b/llvm/test/CodeGen/PowerPC/aix64-cc-abi-vaarg.ll index 1b9c66ad0b23..87f46fe3aca8 100644 --- a/llvm/test/CodeGen/PowerPC/aix64-cc-abi-vaarg.ll +++ b/llvm/test/CodeGen/PowerPC/aix64-cc-abi-vaarg.ll @@ -1,349 +1,185 @@ -; RUN: llc -O2 -mtriple powerpc64-ibm-aix-xcoff -mcpu=ppc -stop-after=machine-cp -verify-machineinstrs < %s | \ -; RUN: FileCheck --check-prefix=64BIT %s - +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 ; RUN: llc -O2 -verify-machineinstrs -mcpu=pwr4 -mattr=-altivec \ -; RUN: -mtriple powerpc64-ibm-aix-xcoff < %s | \ -; RUN: FileCheck --check-prefix=ASM64 %s - - define i32 @int_va_arg(i32 %a, ...) local_unnamed_addr { - entry: - %arg1 = alloca ptr, align 8 - %arg2 = alloca ptr, align 8 - call void @llvm.lifetime.start.p0(i64 8, ptr nonnull %arg1) - call void @llvm.lifetime.start.p0(i64 8, ptr nonnull %arg2) - call void @llvm.va_start(ptr nonnull %arg1) - call void @llvm.va_copy(ptr nonnull %arg2, ptr nonnull %arg1) - %0 = va_arg ptr %arg1, i32 - %add = add nsw i32 %0, %a - %1 = va_arg ptr %arg2, i32 - %mul = shl i32 %1, 1 - %add3 = add nsw i32 %add, %mul - call void @llvm.va_end(ptr nonnull %arg1) - call void @llvm.va_end(ptr nonnull %arg2) - call void @llvm.lifetime.end.p0(i64 8, ptr nonnull %arg2) - call void @llvm.lifetime.end.p0(i64 8, ptr nonnull %arg1) - ret i32 %add3 - } - - declare void @llvm.lifetime.start.p0(i64 immarg, ptr nocapture) - declare void @llvm.va_start(ptr) - declare void @llvm.va_copy(ptr, ptr) - declare void @llvm.va_end(ptr) - declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture) - -; 64BIT-LABEL: name: int_va_arg -; 64BIT-LABEL: liveins: -; 64BIT-DAG: - { reg: '$x3', virtual-reg: '' } -; 64BIT-DAG: - { reg: '$x4', virtual-reg: '' } -; 64BIT-DAG: - { reg: '$x5', virtual-reg: '' } -; 64BIT-DAG: - { reg: '$x6', virtual-reg: '' } -; 64BIT-DAG: - { reg: '$x7', virtual-reg: '' } -; 64BIT-DAG: - { reg: '$x8', virtual-reg: '' } -; 64BIT-DAG: - { reg: '$x9', virtual-reg: '' } -; 64BIT-DAG: - { reg: '$x10', virtual-reg: '' } - -; 64BIT-LABEL: fixedStack: -; 64BIT-DAG: - { id: 0, type: default, offset: 56, size: 8 - -; 64BIT-LABEL: stack: -; 64BIT-DAG: - { id: 0, name: arg1, type: default, offset: 0, size: 8 -; 64BIT-DAG: - { id: 1, name: arg2, type: default, offset: 0, size: 8 - -; 64BIT-LABEL: body: | -; 64BIT-DAG: bb.0.entry: -; 64BIT-DAG: liveins: $x3, $x4, $x5, $x6, $x7, $x8, $x9, $x10 -; 64BIT-DAG: STD killed renamable $x4, 0, %fixed-stack.0 :: (store (s64) into %fixed-stack.0) -; 64BIT-DAG: STD killed renamable $x5, 8, %fixed-stack.0 :: (store (s64) into %fixed-stack.0 + 8) -; 64BIT-DAG: STD killed renamable $x6, 16, %fixed-stack.0 :: (store (s64)) -; 64BIT-DAG: STD killed renamable $x7, 24, %fixed-stack.0 :: (store (s64)) -; 64BIT-DAG: STD killed renamable $x8, 32, %fixed-stack.0 :: (store (s64)) -; 64BIT-DAG: STD killed renamable $x9, 40, %fixed-stack.0 :: (store (s64)) -; 64BIT-DAG: STD killed renamable $x10, 48, %fixed-stack.0 :: (store (s64)) -; 64BIT-DAG: renamable $x11 = ADDI8 %fixed-stack.0, 0 -; 64BIT-DAG: STD renamable $x11, 0, %stack.1.arg2 :: (store (s64) into %ir.arg2) -; 64BIT-DAG: renamable $x6 = LD 0, %stack.1.arg2 :: (load (s64) from %ir.arg2) -; 64BIT-DAG: renamable $x9 = ADDI8 renamable $x6, 4 -; 64BIT-DAG: renamable $x7 = disjoint ADDI8 %fixed-stack.0, 4 -; 64BIT-DAG: renamable $r8 = LWZ 0, %fixed-stack.0 :: (load (s32) from %fixed-stack.0, align 8) -; 64BIT-DAG: STD killed renamable $x11, 0, %stack.0.arg1 :: (store (s64) into %ir.arg1) -; 64BIT-DAG: STD killed renamable $x7, 0, %stack.0.arg1 :: (store (s64) into %ir.arg1) -; 64BIT-DAG: STD killed renamable $x9, 0, %stack.1.arg2 :: (store (s64) into %ir.arg2) -; 64BIT-DAG: renamable $r4 = LWZ 0, killed renamable $x6 :: (load (s32)) -; 64BIT-DAG: renamable $r3 = nsw ADD4 killed renamable $r8, renamable $r3, implicit killed $x3 -; 64BIT-DAG: renamable $r4 = RLWINM killed renamable $r4, 1, 0, 30 -; 64BIT-DAG: renamable $r3 = nsw ADD4 killed renamable $r3, killed renamable $r4, implicit-def $x3 -; 64BIT-DAG: BLR8 implicit $lr8, implicit $rm, implicit $x3 - -; ASM64-LABEL: .int_va_arg: -; ASM64-DAG: std 4, 56(1) -; ASM64-DAG: addi 4, 1, 56 -; ASM64-DAG: std 4, -16(1) -; ASM64-DAG: std 4, -8(1) -; ASM64-DAG: ld 4, -16(1) -; ASM64-DAG: std 5, 64(1) -; ASM64-DAG: addi 5, 1, 60 -; ASM64-DAG: std 5, -8(1) -; ASM64-DAG: addi 5, 4, 4 -; ASM64-DAG: std 6, 72(1) -; ASM64-DAG: std 7, 80(1) -; ASM64-DAG: std 8, 88(1) -; ASM64-DAG: std 9, 96(1) -; ASM64-DAG: std 10, 104(1) -; ASM64-DAG: std 5, -16(1) -; ASM64-DAG: lwz 11, 56(1) -; ASM64-DAG: lwz 4, 0(4) -; ASM64-DAG: add 3, 11, 3 -; ASM64-DAG: slwi 4, 4, 1 -; ASM64-DAG: add 3, 3, 4 -; ASM64-DAG: blr - - define i32 @int_stack_va_arg(i32 %one, i32 %two, i32 %three, i32 %four, i32 %five, i32 %six, i32 %seven, i32 %eight, ...) local_unnamed_addr { - entry: - %arg1 = alloca ptr, align 8 - %arg2 = alloca ptr, align 8 - call void @llvm.lifetime.start.p0(i64 8, ptr nonnull %arg1) - call void @llvm.lifetime.start.p0(i64 8, ptr nonnull %arg2) - call void @llvm.va_start(ptr nonnull %arg1) - call void @llvm.va_copy(ptr nonnull %arg2, ptr nonnull %arg1) - %add = add nsw i32 %two, %one - %add2 = add nsw i32 %add, %three - %add3 = add nsw i32 %add2, %four - %add4 = add nsw i32 %add3, %five - %add5 = add nsw i32 %add4, %six - %add6 = add nsw i32 %add5, %seven - %add7 = add nsw i32 %add6, %eight - %0 = va_arg ptr %arg1, i32 - %add8 = add nsw i32 %add7, %0 - %1 = va_arg ptr %arg2, i32 - %mul = shl i32 %1, 1 - %add10 = add nsw i32 %add8, %mul - call void @llvm.va_end(ptr nonnull %arg1) - call void @llvm.va_end(ptr nonnull %arg2) - call void @llvm.lifetime.end.p0(i64 8, ptr nonnull %arg2) - call void @llvm.lifetime.end.p0(i64 8, ptr nonnull %arg1) - ret i32 %add10 - } - -; 64BIT-LABEL: name: int_stack_va_arg -; 64BIT-LABEL: liveins: -; 64BIT-DAG: - { reg: '$x3', virtual-reg: '' } -; 64BIT-DAG: - { reg: '$x4', virtual-reg: '' } -; 64BIT-DAG: - { reg: '$x5', virtual-reg: '' } -; 64BIT-DAG: - { reg: '$x6', virtual-reg: '' } -; 64BIT-DAG: - { reg: '$x7', virtual-reg: '' } -; 64BIT-DAG: - { reg: '$x8', virtual-reg: '' } -; 64BIT-DAG: - { reg: '$x9', virtual-reg: '' } -; 64BIT-DAG: - { reg: '$x10', virtual-reg: '' } - -; 64BIT-LABEL: fixedStack: -; 64BIT-DAG: - { id: 0, type: default, offset: 112, size: 8, alignment: 16, stack-id: default, - -; 64BIT-LABEL: stack: -; 64BIT-DAG: - { id: 0, name: arg1, type: default, offset: 0, size: 8, alignment: 8, -; 64BIT-DAG: - { id: 1, name: arg2, type: default, offset: 0, size: 8, alignment: 8, - -; 64BIT-LABEL: body: | -; 64BIT-DAG: liveins: $x3, $x4, $x5, $x6, $x7, $x8, $x9, $x10 -; 64BIT-DAG: renamable $r11 = LWZ 0, %fixed-stack.0 :: (load (s32) from %fixed-stack.0, align 16) -; 64BIT-DAG: renamable $r3 = nsw ADD4 renamable $r4, renamable $r3, implicit killed $x3, implicit killed $x4 -; 64BIT-DAG: renamable $r3 = nsw ADD4 killed renamable $r3, renamable $r5, implicit killed $x5 -; 64BIT-DAG: renamable $r3 = nsw ADD4 killed renamable $r3, renamable $r6, implicit killed $x6 -; 64BIT-DAG: renamable $r3 = nsw ADD4 killed renamable $r3, renamable $r7, implicit killed $x7 -; 64BIT-DAG: renamable $r3 = nsw ADD4 killed renamable $r3, renamable $r8, implicit killed $x8 -; 64BIT-DAG: renamable $r3 = nsw ADD4 killed renamable $r3, renamable $r9, implicit killed $x9 -; 64BIT-DAG: renamable $r3 = nsw ADD4 killed renamable $r3, renamable $r10, implicit killed $x10 -; 64BIT-DAG: renamable $r3 = nsw ADD4 killed renamable $r3, renamable $r11 -; 64BIT-DAG: renamable $r3 = nsw ADD4 killed renamable $r3, killed renamable $r4, implicit-def $x3 -; 64BIT-DAG: BLR8 implicit $lr8, implicit $rm, implicit $x3 - -; ASM64-LABEL: .int_stack_va_arg: -; ASM64-DAG: add 3, 4, 3 -; ASM64-DAG: add 3, 3, 5 -; ASM64-DAG: add 3, 3, 6 -; ASM64-DAG: add 3, 3, 7 -; ASM64-DAG: add 3, 3, 8 -; ASM64-DAG: add 3, 3, 9 -; ASM64-DAG: add 3, 3, 10 -; ASM64-DAG: lwz 11, 112(1) -; ASM64-DAG: slwi 4, 11, 1 -; ASM64-DAG: add 3, 3, 11 -; ASM64-DAG: add 3, 3, 4 -; ASM64-DAG: blr - - define double @double_va_arg(double %a, ...) local_unnamed_addr { - entry: - %arg1 = alloca ptr, align 8 - %arg2 = alloca ptr, align 8 - call void @llvm.lifetime.start.p0(i64 8, ptr nonnull %arg1) - call void @llvm.lifetime.start.p0(i64 8, ptr nonnull %arg2) - call void @llvm.va_start(ptr nonnull %arg1) - call void @llvm.va_copy(ptr nonnull %arg2, ptr nonnull %arg1) - %0 = va_arg ptr %arg1, double - %add = fadd double %0, %a - %1 = va_arg ptr %arg2, double - %mul = fmul double %1, 2.000000e+00 - %add3 = fadd double %add, %mul - call void @llvm.va_end(ptr nonnull %arg1) - call void @llvm.va_end(ptr nonnull %arg2) - call void @llvm.lifetime.end.p0(i64 8, ptr nonnull %arg2) - call void @llvm.lifetime.end.p0(i64 8, ptr nonnull %arg1) - ret double %add3 - } - -; 64BIT-LABEL: name: double_va_arg -; 64BIT-LABEL: liveins: -; 64BIT-DAG: - { reg: '$f1', virtual-reg: '' } -; 64BIT-DAG: - { reg: '$x4', virtual-reg: '' } -; 64BIT-DAG: - { reg: '$x5', virtual-reg: '' } -; 64BIT-DAG: - { reg: '$x6', virtual-reg: '' } -; 64BIT-DAG: - { reg: '$x7', virtual-reg: '' } -; 64BIT-DAG: - { reg: '$x8', virtual-reg: '' } -; 64BIT-DAG: - { reg: '$x9', virtual-reg: '' } -; 64BIT-DAG: - { reg: '$x10', virtual-reg: '' } - -; 64BIT-LABEL: fixedStack: -; 64BIT-DAG: - { id: 0, type: default, offset: 56, size: 8 - -; 64BIT-LABEL: stack: -; 64BIT-DAG: - { id: 0, name: arg1, type: default, offset: 0, size: 8 -; 64BIT-DAG: - { id: 1, name: arg2, type: default, offset: 0, size: 8 - -; 64BIT-LABEL: body: | -; 64BIT-DAG: liveins: $f1, $x4, $x5, $x6, $x7, $x8, $x9, $x10 -; 64BIT-DAG: renamable $x3 = ADDI8 %fixed-stack.0, 0 -; 64BIT-DAG: STD killed renamable $x4, 0, %fixed-stack.0 :: (store (s64) into %fixed-stack.0) -; 64BIT-DAG: STD killed renamable $x5, 8, %fixed-stack.0 :: (store (s64) into %fixed-stack.0 + 8) -; 64BIT-DAG: STD killed renamable $x6, 16, %fixed-stack.0 :: (store (s64)) -; 64BIT-DAG: STD killed renamable $x7, 24, %fixed-stack.0 :: (store (s64)) -; 64BIT-DAG: STD killed renamable $x8, 32, %fixed-stack.0 :: (store (s64)) -; 64BIT-DAG: STD killed renamable $x9, 40, %fixed-stack.0 :: (store (s64)) -; 64BIT-DAG: STD killed renamable $x10, 48, %fixed-stack.0 :: (store (s64)) -; 64BIT-DAG: STD renamable $x3, 0, %stack.1.arg2 :: (store (s64) into %ir.arg2) -; 64BIT-DAG: renamable $x6 = LD 0, %stack.1.arg2 :: (load (s64) from %ir.arg2) -; 64BIT-DAG: renamable $x7 = ADDI8 %fixed-stack.0, 8 -; 64BIT-DAG: STD killed renamable $x3, 0, %stack.0.arg1 :: (store (s64) into %ir.arg1) -; 64BIT-DAG: STD killed renamable $x7, 0, %stack.0.arg1 :: (store (s64) into %ir.arg1) -; 64BIT-DAG: renamable $f0 = LFD 0, %fixed-stack.0 :: (load (s64)) -; 64BIT-DAG: renamable $x3 = ADDI8 renamable $x6, 8 -; 64BIT-DAG: STD killed renamable $x3, 0, %stack.1.arg2 :: (store (s64) into %ir.arg2) -; 64BIT-DAG: renamable $f2 = LFD 0, killed renamable $x6 :: (load (s64)) -; 64BIT-DAG: renamable $f0 = nofpexcept FADD killed renamable $f0, killed renamable $f1, implicit $rm -; 64BIT-DAG: renamable $f1 = nofpexcept FADD killed renamable $f2, renamable $f2, implicit $rm -; 64BIT-DAG: renamable $f1 = nofpexcept FADD killed renamable $f0, killed renamable $f1, implicit $rm -; 64BIT-DAG: BLR8 implicit $lr8, implicit $rm, implicit $f1 - -; ASM64-LABEL: .double_va_arg: -; ASM64-DAG: addi 3, 1, 56 -; ASM64-DAG: std 4, 56(1) -; ASM64-DAG: std 3, -8(1) -; ASM64-DAG: std 3, -16(1) -; ASM64-DAG: addi 3, 1, 64 -; ASM64-DAG: std 3, -8(1) -; ASM64-DAG: ld 3, -16(1) -; ASM64-DAG: lfd 0, 56(1) -; ASM64-DAG: addi 4, 3, 8 -; ASM64-DAG: std 5, 64(1) -; ASM64-DAG: fadd 0, 0, 1 -; ASM64-DAG: std 6, 72(1) -; ASM64-DAG: std 7, 80(1) -; ASM64-DAG: std 8, 88(1) -; ASM64-DAG: std 9, 96(1) -; ASM64-DAG: std 10, 104(1) -; ASM64-DAG: std 4, -16(1) -; ASM64-DAG: lfd 1, 0(3) -; ASM64-DAG: fadd 1, 1, 1 -; ASM64-DAG: fadd 1, 0, 1 -; ASM64-DAG: blr - - define double @double_stack_va_arg(double %one, double %two, double %three, double %four, double %five, double %six, double %seven, double %eight, double %nine, double %ten, double %eleven, double %twelve, double %thirteen, ...) local_unnamed_addr { - entry: - %arg1 = alloca ptr, align 8 - %arg2 = alloca ptr, align 8 - call void @llvm.lifetime.start.p0(i64 8, ptr nonnull %arg1) - call void @llvm.lifetime.start.p0(i64 8, ptr nonnull %arg2) - call void @llvm.va_start(ptr nonnull %arg1) - call void @llvm.va_copy(ptr nonnull %arg2, ptr nonnull %arg1) - %add = fadd double %one, %two - %add2 = fadd double %add, %three - %add3 = fadd double %add2, %four - %add4 = fadd double %add3, %five - %add5 = fadd double %add4, %six - %add6 = fadd double %add5, %seven - %add7 = fadd double %add6, %eight - %add8 = fadd double %add7, %nine - %add9 = fadd double %add8, %ten - %add10 = fadd double %add9, %eleven - %add11 = fadd double %add10, %twelve - %add12 = fadd double %add11, %thirteen - %0 = va_arg ptr %arg1, double - %add13 = fadd double %add12, %0 - %1 = va_arg ptr %arg2, double - %mul = fmul double %1, 2.000000e+00 - %add15 = fadd double %add13, %mul - call void @llvm.lifetime.end.p0(i64 8, ptr nonnull %arg2) - call void @llvm.lifetime.end.p0(i64 8, ptr nonnull %arg1) - ret double %add15 - } - - -; 64BIT-LABEL: name: double_stack_va_arg -; 64BIT-LABEL: liveins: -; 64BIT-DAG: - { reg: '$f1', virtual-reg: '' } -; 64BIT-DAG: - { reg: '$f2', virtual-reg: '' } -; 64BIT-DAG: - { reg: '$f3', virtual-reg: '' } -; 64BIT-DAG: - { reg: '$f4', virtual-reg: '' } -; 64BIT-DAG: - { reg: '$f5', virtual-reg: '' } -; 64BIT-DAG: - { reg: '$f6', virtual-reg: '' } -; 64BIT-DAG: - { reg: '$f7', virtual-reg: '' } -; 64BIT-DAG: - { reg: '$f8', virtual-reg: '' } -; 64BIT-DAG: - { reg: '$f9', virtual-reg: '' } -; 64BIT-DAG: - { reg: '$f10', virtual-reg: '' } -; 64BIT-DAG: - { reg: '$f11', virtual-reg: '' } -; 64BIT-DAG: - { reg: '$f12', virtual-reg: '' } -; 64BIT-DAG: - { reg: '$f13', virtual-reg: '' } - -; 64BIT-LABEL: fixedStack: -; 64BIT-DAG: - { id: 0, type: default, offset: 152, size: 8 - -; 64BIT-LABEL: stack: -; 64BIT-DAG: - { id: 0, name: arg1, type: default, offset: 0, size: 8 -; 64BIT-DAG: - { id: 1, name: arg2, type: default, offset: 0, size: 8 - -; 64BIT-LABEL: body: | -; 64BIT-DAG: liveins: $f1, $f2, $f3, $f4, $f5, $f6, $f7, $f8, $f9, $f10, $f11, $f12, $f13 -; 64BIT-DAG: renamable $f0 = LFD 0, %fixed-stack.0 :: (load (s64)) -; 64BIT-DAG: renamable $f1 = nofpexcept FADD killed renamable $f1, killed renamable $f2, implicit $rm -; 64BIT-DAG: renamable $f1 = nofpexcept FADD killed renamable $f1, killed renamable $f3, implicit $rm -; 64BIT-DAG: renamable $f1 = nofpexcept FADD killed renamable $f1, killed renamable $f4, implicit $rm -; 64BIT-DAG: renamable $f1 = nofpexcept FADD killed renamable $f1, killed renamable $f5, implicit $rm -; 64BIT-DAG: renamable $f1 = nofpexcept FADD killed renamable $f1, killed renamable $f6, implicit $rm -; 64BIT-DAG: renamable $f1 = nofpexcept FADD killed renamable $f1, killed renamable $f7, implicit $rm -; 64BIT-DAG: renamable $f1 = nofpexcept FADD killed renamable $f1, killed renamable $f8, implicit $rm -; 64BIT-DAG: renamable $f1 = nofpexcept FADD killed renamable $f1, killed renamable $f9, implicit $rm -; 64BIT-DAG: renamable $f1 = nofpexcept FADD killed renamable $f1, killed renamable $f10, implicit $rm -; 64BIT-DAG: renamable $f1 = nofpexcept FADD killed renamable $f1, killed renamable $f11, implicit $rm -; 64BIT-DAG: renamable $f1 = nofpexcept FADD killed renamable $f1, killed renamable $f12, implicit $rm -; 64BIT-DAG: renamable $f1 = nofpexcept FADD killed renamable $f1, killed renamable $f13, implicit $rm -; 64BIT-DAG: renamable $f1 = nofpexcept FADD killed renamable $f1, renamable $f0, implicit $rm -; 64BIT-DAG: renamable $f0 = nofpexcept FADD killed renamable $f0, renamable $f0, implicit $rm -; 64BIT-DAG: renamable $f1 = nofpexcept FADD killed renamable $f1, killed renamable $f0, implicit $rm -; 64BIT-DAG: BLR8 implicit $lr8, implicit $rm, implicit $f1 - -; ASM64-LABEL: .double_stack_va_arg: -; ASM64-DAG: fadd 1, 1, 2 -; ASM64-DAG: fadd 1, 1, 3 -; ASM64-DAG: fadd 1, 1, 4 -; ASM64-DAG: fadd 1, 1, 5 -; ASM64-DAG: fadd 1, 1, 6 -; ASM64-DAG: fadd 1, 1, 7 -; ASM64-DAG: fadd 1, 1, 8 -; ASM64-DAG: fadd 1, 1, 9 -; ASM64-DAG: fadd 1, 1, 10 -; ASM64-DAG: fadd 1, 1, 11 -; ASM64-DAG: fadd 1, 1, 12 -; ASM64-DAG: fadd 1, 1, 13 -; ASM64-DAG: lfd 0, 152(1) -; ASM64-DAG: fadd 1, 1, 0 -; ASM64-DAG: fadd 0, 0, 0 -; ASM64-DAG: fadd 1, 1, 0 -; ASM64-DAG: blr +; RUN: -mtriple powerpc64-ibm-aix-xcoff < %s | FileCheck %s + +define i32 @int_va_arg(i32 %a, ...) local_unnamed_addr { +; CHECK-LABEL: int_va_arg: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: std 4, 56(1) +; CHECK-NEXT: addi 4, 1, 56 +; CHECK-NEXT: std 4, -16(1) +; CHECK-NEXT: std 4, -8(1) +; CHECK-NEXT: ld 4, -16(1) +; CHECK-NEXT: std 5, 64(1) +; CHECK-NEXT: addi 5, 1, 60 +; CHECK-NEXT: std 5, -8(1) +; CHECK-NEXT: addi 5, 4, 4 +; CHECK-NEXT: std 6, 72(1) +; CHECK-NEXT: std 7, 80(1) +; CHECK-NEXT: std 8, 88(1) +; CHECK-NEXT: std 9, 96(1) +; CHECK-NEXT: std 10, 104(1) +; CHECK-NEXT: std 5, -16(1) +; CHECK-NEXT: lwz 11, 56(1) +; CHECK-NEXT: lwz 4, 0(4) +; CHECK-NEXT: add 3, 11, 3 +; CHECK-NEXT: slwi 4, 4, 1 +; CHECK-NEXT: add 3, 3, 4 +; CHECK-NEXT: blr +entry: + %arg1 = alloca ptr, align 8 + %arg2 = alloca ptr, align 8 + call void @llvm.lifetime.start.p0(i64 8, ptr nonnull %arg1) + call void @llvm.lifetime.start.p0(i64 8, ptr nonnull %arg2) + call void @llvm.va_start(ptr nonnull %arg1) + call void @llvm.va_copy(ptr nonnull %arg2, ptr nonnull %arg1) + %0 = va_arg ptr %arg1, i32 + %add = add nsw i32 %0, %a + %1 = va_arg ptr %arg2, i32 + %mul = shl i32 %1, 1 + %add3 = add nsw i32 %add, %mul + call void @llvm.va_end(ptr nonnull %arg1) + call void @llvm.va_end(ptr nonnull %arg2) + call void @llvm.lifetime.end.p0(i64 8, ptr nonnull %arg2) + call void @llvm.lifetime.end.p0(i64 8, ptr nonnull %arg1) + ret i32 %add3 +} + +declare void @llvm.lifetime.start.p0(i64 immarg, ptr nocapture) +declare void @llvm.va_start(ptr) +declare void @llvm.va_copy(ptr, ptr) +declare void @llvm.va_end(ptr) +declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture) + +define i32 @int_stack_va_arg(i32 %one, i32 %two, i32 %three, i32 %four, i32 %five, i32 %six, i32 %seven, i32 %eight, ...) local_unnamed_addr { +; CHECK-LABEL: int_stack_va_arg: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: add 3, 4, 3 +; CHECK-NEXT: lwz 11, 112(1) +; CHECK-NEXT: add 3, 3, 5 +; CHECK-NEXT: add 3, 3, 6 +; CHECK-NEXT: add 3, 3, 7 +; CHECK-NEXT: add 3, 3, 8 +; CHECK-NEXT: add 3, 3, 9 +; CHECK-NEXT: add 3, 3, 10 +; CHECK-NEXT: add 3, 3, 11 +; CHECK-NEXT: slwi 4, 11, 1 +; CHECK-NEXT: add 3, 3, 4 +; CHECK-NEXT: blr +entry: + %arg1 = alloca ptr, align 8 + %arg2 = alloca ptr, align 8 + call void @llvm.lifetime.start.p0(i64 8, ptr nonnull %arg1) + call void @llvm.lifetime.start.p0(i64 8, ptr nonnull %arg2) + call void @llvm.va_start(ptr nonnull %arg1) + call void @llvm.va_copy(ptr nonnull %arg2, ptr nonnull %arg1) + %add = add nsw i32 %two, %one + %add2 = add nsw i32 %add, %three + %add3 = add nsw i32 %add2, %four + %add4 = add nsw i32 %add3, %five + %add5 = add nsw i32 %add4, %six + %add6 = add nsw i32 %add5, %seven + %add7 = add nsw i32 %add6, %eight + %0 = va_arg ptr %arg1, i32 + %add8 = add nsw i32 %add7, %0 + %1 = va_arg ptr %arg2, i32 + %mul = shl i32 %1, 1 + %add10 = add nsw i32 %add8, %mul + call void @llvm.va_end(ptr nonnull %arg1) + call void @llvm.va_end(ptr nonnull %arg2) + call void @llvm.lifetime.end.p0(i64 8, ptr nonnull %arg2) + call void @llvm.lifetime.end.p0(i64 8, ptr nonnull %arg1) + ret i32 %add10 +} + +define double @double_va_arg(double %a, ...) local_unnamed_addr { +; CHECK-LABEL: double_va_arg: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi 3, 1, 56 +; CHECK-NEXT: std 4, 56(1) +; CHECK-NEXT: std 3, -8(1) +; CHECK-NEXT: std 3, -16(1) +; CHECK-NEXT: addi 3, 1, 64 +; CHECK-NEXT: std 3, -8(1) +; CHECK-NEXT: ld 3, -16(1) +; CHECK-NEXT: lfd 0, 56(1) +; CHECK-NEXT: addi 4, 3, 8 +; CHECK-NEXT: std 5, 64(1) +; CHECK-NEXT: fadd 0, 0, 1 +; CHECK-NEXT: std 6, 72(1) +; CHECK-NEXT: std 7, 80(1) +; CHECK-NEXT: std 8, 88(1) +; CHECK-NEXT: std 9, 96(1) +; CHECK-NEXT: std 10, 104(1) +; CHECK-NEXT: std 4, -16(1) +; CHECK-NEXT: lfd 1, 0(3) +; CHECK-NEXT: fadd 1, 1, 1 +; CHECK-NEXT: fadd 1, 0, 1 +; CHECK-NEXT: blr +entry: + %arg1 = alloca ptr, align 8 + %arg2 = alloca ptr, align 8 + call void @llvm.lifetime.start.p0(i64 8, ptr nonnull %arg1) + call void @llvm.lifetime.start.p0(i64 8, ptr nonnull %arg2) + call void @llvm.va_start(ptr nonnull %arg1) + call void @llvm.va_copy(ptr nonnull %arg2, ptr nonnull %arg1) + %0 = va_arg ptr %arg1, double + %add = fadd double %0, %a + %1 = va_arg ptr %arg2, double + %mul = fmul double %1, 2.000000e+00 + %add3 = fadd double %add, %mul + call void @llvm.va_end(ptr nonnull %arg1) + call void @llvm.va_end(ptr nonnull %arg2) + call void @llvm.lifetime.end.p0(i64 8, ptr nonnull %arg2) + call void @llvm.lifetime.end.p0(i64 8, ptr nonnull %arg1) + ret double %add3 +} + +define double @double_stack_va_arg(double %one, double %two, double %three, double %four, double %five, double %six, double %seven, double %eight, double %nine, double %ten, double %eleven, double %twelve, double %thirteen, ...) local_unnamed_addr { +; CHECK-LABEL: double_stack_va_arg: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fadd 1, 1, 2 +; CHECK-NEXT: lfd 0, 152(1) +; CHECK-NEXT: fadd 1, 1, 3 +; CHECK-NEXT: fadd 1, 1, 4 +; CHECK-NEXT: fadd 1, 1, 5 +; CHECK-NEXT: fadd 1, 1, 6 +; CHECK-NEXT: fadd 1, 1, 7 +; CHECK-NEXT: fadd 1, 1, 8 +; CHECK-NEXT: fadd 1, 1, 9 +; CHECK-NEXT: fadd 1, 1, 10 +; CHECK-NEXT: fadd 1, 1, 11 +; CHECK-NEXT: fadd 1, 1, 12 +; CHECK-NEXT: fadd 1, 1, 13 +; CHECK-NEXT: fadd 1, 1, 0 +; CHECK-NEXT: fadd 0, 0, 0 +; CHECK-NEXT: fadd 1, 1, 0 +; CHECK-NEXT: blr +entry: + %arg1 = alloca ptr, align 8 + %arg2 = alloca ptr, align 8 + call void @llvm.lifetime.start.p0(i64 8, ptr nonnull %arg1) + call void @llvm.lifetime.start.p0(i64 8, ptr nonnull %arg2) + call void @llvm.va_start(ptr nonnull %arg1) + call void @llvm.va_copy(ptr nonnull %arg2, ptr nonnull %arg1) + %add = fadd double %one, %two + %add2 = fadd double %add, %three + %add3 = fadd double %add2, %four + %add4 = fadd double %add3, %five + %add5 = fadd double %add4, %six + %add6 = fadd double %add5, %seven + %add7 = fadd double %add6, %eight + %add8 = fadd double %add7, %nine + %add9 = fadd double %add8, %ten + %add10 = fadd double %add9, %eleven + %add11 = fadd double %add10, %twelve + %add12 = fadd double %add11, %thirteen + %0 = va_arg ptr %arg1, double + %add13 = fadd double %add12, %0 + %1 = va_arg ptr %arg2, double + %mul = fmul double %1, 2.000000e+00 + %add15 = fadd double %add13, %mul + call void @llvm.lifetime.end.p0(i64 8, ptr nonnull %arg2) + call void @llvm.lifetime.end.p0(i64 8, ptr nonnull %arg1) + ret double %add15 +} -- cgit v1.2.3 From acde20b5605f3a3a8da2217e4526fc045e6603ed Mon Sep 17 00:00:00 2001 From: Steven Perron Date: Wed, 18 Jun 2025 06:39:52 -0400 Subject: [HLSL][SPIRV] Add vk::constant_id attribute. (#143544) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The vk::constant_id attribute is used to indicate that a global const variable represents a specialization constant in SPIR-V. This PR adds this attribute to clang. The documentation for the attribute is [here](https://github.com/microsoft/DirectXShaderCompiler/blob/main/docs/SPIR-V.rst#specialization-constants). The strategy is to to modify the initializer to get the value of a specialize constant for a builtin defined in the SPIR-V backend. Implements https://github.com/llvm/wg-hlsl/pull/287 Fixes https://github.com/llvm/llvm-project/issues/142448 --------- Co-authored-by: Nathan Gauër --- clang/include/clang/Basic/Attr.td | 8 + clang/include/clang/Basic/AttrDocs.td | 15 ++ clang/include/clang/Basic/Builtins.td | 13 ++ clang/include/clang/Basic/DiagnosticSemaKinds.td | 4 + clang/include/clang/Sema/SemaHLSL.h | 5 +- clang/lib/CodeGen/CGHLSLBuiltins.cpp | 74 ++++++++ clang/lib/CodeGen/CodeGenFunction.h | 6 + clang/lib/Sema/SemaDecl.cpp | 13 ++ clang/lib/Sema/SemaDeclAttr.cpp | 3 + clang/lib/Sema/SemaHLSL.cpp | 120 +++++++++++- clang/test/AST/HLSL/vk.spec-constant.usage.hlsl | 130 +++++++++++++ .../inline-spirv/SpirvType.alignment.hlsl | 16 -- clang/test/CodeGenHLSL/inline-spirv/SpirvType.hlsl | 68 ------- .../vk-features/SpirvType.alignment.hlsl | 16 ++ clang/test/CodeGenHLSL/vk-features/SpirvType.hlsl | 68 +++++++ .../CodeGenHLSL/vk-features/vk.spec-constant.hlsl | 210 +++++++++++++++++++++ clang/test/SemaHLSL/vk.spec-constant.error.hlsl | 37 ++++ 17 files changed, 720 insertions(+), 86 deletions(-) create mode 100644 clang/test/AST/HLSL/vk.spec-constant.usage.hlsl delete mode 100644 clang/test/CodeGenHLSL/inline-spirv/SpirvType.alignment.hlsl delete mode 100644 clang/test/CodeGenHLSL/inline-spirv/SpirvType.hlsl create mode 100644 clang/test/CodeGenHLSL/vk-features/SpirvType.alignment.hlsl create mode 100644 clang/test/CodeGenHLSL/vk-features/SpirvType.hlsl create mode 100644 clang/test/CodeGenHLSL/vk-features/vk.spec-constant.hlsl create mode 100644 clang/test/SemaHLSL/vk.spec-constant.error.hlsl diff --git a/clang/include/clang/Basic/Attr.td b/clang/include/clang/Basic/Attr.td index f113cd2ba2fb..27fea7dea0a5 100644 --- a/clang/include/clang/Basic/Attr.td +++ b/clang/include/clang/Basic/Attr.td @@ -5023,6 +5023,14 @@ def HLSLVkExtBuiltinInput : InheritableAttr { let Documentation = [HLSLVkExtBuiltinInputDocs]; } +def HLSLVkConstantId : InheritableAttr { + let Spellings = [CXX11<"vk", "constant_id">]; + let Args = [IntArgument<"Id">]; + let Subjects = SubjectList<[ExternalGlobalVar]>; + let LangOpts = [HLSL]; + let Documentation = [VkConstantIdDocs]; +} + def RandomizeLayout : InheritableAttr { let Spellings = [GCC<"randomize_layout">]; let Subjects = SubjectList<[Record]>; diff --git a/clang/include/clang/Basic/AttrDocs.td b/clang/include/clang/Basic/AttrDocs.td index 6051e1fc4511..43442f177ab7 100644 --- a/clang/include/clang/Basic/AttrDocs.td +++ b/clang/include/clang/Basic/AttrDocs.td @@ -8252,6 +8252,21 @@ and https://microsoft.github.io/hlsl-specs/proposals/0013-wave-size-range.html }]; } +def VkConstantIdDocs : Documentation { + let Category = DocCatFunction; + let Content = [{ +The ``vk::constant_id`` attribute specifies the id for a SPIR-V specialization +constant. The attribute applies to const global scalar variables. The variable must be initialized with a C++11 constexpr. +In SPIR-V, the +variable will be replaced with an `OpSpecConstant` with the given id. +The syntax is: + +.. code-block:: text + + ``[[vk::constant_id()]] const T Name = `` +}]; +} + def RootSignatureDocs : Documentation { let Category = DocCatFunction; let Content = [{ diff --git a/clang/include/clang/Basic/Builtins.td b/clang/include/clang/Basic/Builtins.td index 68cd3d790e78..d65b3a5d2f44 100644 --- a/clang/include/clang/Basic/Builtins.td +++ b/clang/include/clang/Basic/Builtins.td @@ -5065,6 +5065,19 @@ def HLSLGroupMemoryBarrierWithGroupSync: LangBuiltin<"HLSL_LANG"> { let Prototype = "void()"; } +class HLSLScalarTemplate + : Template<["bool", "char", "short", "int", "long long int", + "unsigned short", "unsigned int", "unsigned long long int", + "__fp16", "float", "double"], + ["_bool", "_char", "_short", "_int", "_longlong", "_ushort", + "_uint", "_ulonglong", "_half", "_float", "_double"]>; + +def HLSLGetSpirvSpecConstant : LangBuiltin<"HLSL_LANG">, HLSLScalarTemplate { + let Spellings = ["__builtin_get_spirv_spec_constant"]; + let Attributes = [NoThrow, Const, Pure]; + let Prototype = "T(unsigned int, T)"; +} + // Builtins for XRay. def XRayCustomEvent : Builtin { let Spellings = ["__xray_customevent"]; diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td index 979ff60b73b7..34b798a09c21 100644 --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -12927,6 +12927,10 @@ def err_spirv_enum_not_int : Error< def err_spirv_enum_not_valid : Error< "invalid value for %select{storage class}0 argument">; +def err_specialization_const + : Error<"variable with 'vk::constant_id' attribute must be a const " + "int/float/enum/bool and be initialized with a literal">; + // errors of expect.with.probability def err_probability_not_constant_float : Error< "probability argument to __builtin_expect_with_probability must be constant " diff --git a/clang/include/clang/Sema/SemaHLSL.h b/clang/include/clang/Sema/SemaHLSL.h index 33c4b8d1568b..97091792ba23 100644 --- a/clang/include/clang/Sema/SemaHLSL.h +++ b/clang/include/clang/Sema/SemaHLSL.h @@ -98,6 +98,8 @@ public: HLSLWaveSizeAttr *mergeWaveSizeAttr(Decl *D, const AttributeCommonInfo &AL, int Min, int Max, int Preferred, int SpelledArgsCount); + HLSLVkConstantIdAttr * + mergeVkConstantIdAttr(Decl *D, const AttributeCommonInfo &AL, int Id); HLSLShaderAttr *mergeShaderAttr(Decl *D, const AttributeCommonInfo &AL, llvm::Triple::EnvironmentType ShaderType); HLSLParamModifierAttr * @@ -135,6 +137,7 @@ public: void handleRootSignatureAttr(Decl *D, const ParsedAttr &AL); void handleNumThreadsAttr(Decl *D, const ParsedAttr &AL); void handleWaveSizeAttr(Decl *D, const ParsedAttr &AL); + void handleVkConstantIdAttr(Decl *D, const ParsedAttr &AL); void handleSV_DispatchThreadIDAttr(Decl *D, const ParsedAttr &AL); void handleSV_GroupThreadIDAttr(Decl *D, const ParsedAttr &AL); void handleSV_GroupIDAttr(Decl *D, const ParsedAttr &AL); @@ -171,7 +174,7 @@ public: QualType getInoutParameterType(QualType Ty); bool transformInitList(const InitializedEntity &Entity, InitListExpr *Init); - + bool handleInitialization(VarDecl *VDecl, Expr *&Init); void deduceAddressSpace(VarDecl *Decl); private: diff --git a/clang/lib/CodeGen/CGHLSLBuiltins.cpp b/clang/lib/CodeGen/CGHLSLBuiltins.cpp index ccf45c0c6ff1..cbc5ef9cb0d5 100644 --- a/clang/lib/CodeGen/CGHLSLBuiltins.cpp +++ b/clang/lib/CodeGen/CGHLSLBuiltins.cpp @@ -12,6 +12,7 @@ #include "CGBuiltin.h" #include "CGHLSLRuntime.h" +#include "CodeGenFunction.h" using namespace clang; using namespace CodeGen; @@ -214,6 +215,43 @@ static Intrinsic::ID getWaveActiveMaxIntrinsic(llvm::Triple::ArchType Arch, } } +// Returns the mangled name for a builtin function that the SPIR-V backend +// will expand into a spec Constant. +static std::string getSpecConstantFunctionName(clang::QualType SpecConstantType, + ASTContext &Context) { + // The parameter types for our conceptual intrinsic function. + QualType ClangParamTypes[] = {Context.IntTy, SpecConstantType}; + + // Create a temporary FunctionDecl for the builtin fuction. It won't be + // added to the AST. + FunctionProtoType::ExtProtoInfo EPI; + QualType FnType = + Context.getFunctionType(SpecConstantType, ClangParamTypes, EPI); + DeclarationName FuncName = &Context.Idents.get("__spirv_SpecConstant"); + FunctionDecl *FnDeclForMangling = FunctionDecl::Create( + Context, Context.getTranslationUnitDecl(), SourceLocation(), + SourceLocation(), FuncName, FnType, /*TSI=*/nullptr, SC_Extern); + + // Attach the created parameter declarations to the function declaration. + SmallVector ParamDecls; + for (QualType ParamType : ClangParamTypes) { + ParmVarDecl *PD = ParmVarDecl::Create( + Context, FnDeclForMangling, SourceLocation(), SourceLocation(), + /*IdentifierInfo*/ nullptr, ParamType, /*TSI*/ nullptr, SC_None, + /*DefaultArg*/ nullptr); + ParamDecls.push_back(PD); + } + FnDeclForMangling->setParams(ParamDecls); + + // Get the mangled name. + std::string Name; + llvm::raw_string_ostream MangledNameStream(Name); + MangleContext *Mangler = Context.createMangleContext(); + Mangler->mangleName(FnDeclForMangling, MangledNameStream); + MangledNameStream.flush(); + return Name; +} + Value *CodeGenFunction::EmitHLSLBuiltinExpr(unsigned BuiltinID, const CallExpr *E, ReturnValueSlot ReturnValue) { @@ -773,6 +811,42 @@ Value *CodeGenFunction::EmitHLSLBuiltinExpr(unsigned BuiltinID, return EmitRuntimeCall( Intrinsic::getOrInsertDeclaration(&CGM.getModule(), ID)); } + case Builtin::BI__builtin_get_spirv_spec_constant_bool: + case Builtin::BI__builtin_get_spirv_spec_constant_short: + case Builtin::BI__builtin_get_spirv_spec_constant_ushort: + case Builtin::BI__builtin_get_spirv_spec_constant_int: + case Builtin::BI__builtin_get_spirv_spec_constant_uint: + case Builtin::BI__builtin_get_spirv_spec_constant_longlong: + case Builtin::BI__builtin_get_spirv_spec_constant_ulonglong: + case Builtin::BI__builtin_get_spirv_spec_constant_half: + case Builtin::BI__builtin_get_spirv_spec_constant_float: + case Builtin::BI__builtin_get_spirv_spec_constant_double: { + llvm::Function *SpecConstantFn = getSpecConstantFunction(E->getType()); + llvm::Value *SpecId = EmitScalarExpr(E->getArg(0)); + llvm::Value *DefaultVal = EmitScalarExpr(E->getArg(1)); + llvm::Value *Args[] = {SpecId, DefaultVal}; + return Builder.CreateCall(SpecConstantFn, Args); + } } return nullptr; } + +llvm::Function *clang::CodeGen::CodeGenFunction::getSpecConstantFunction( + const clang::QualType &SpecConstantType) { + + // Find or create the declaration for the function. + llvm::Module *M = &CGM.getModule(); + std::string MangledName = + getSpecConstantFunctionName(SpecConstantType, getContext()); + llvm::Function *SpecConstantFn = M->getFunction(MangledName); + + if (!SpecConstantFn) { + llvm::Type *IntType = ConvertType(getContext().IntTy); + llvm::Type *RetTy = ConvertType(SpecConstantType); + llvm::Type *ArgTypes[] = {IntType, RetTy}; + llvm::FunctionType *FnTy = llvm::FunctionType::get(RetTy, ArgTypes, false); + SpecConstantFn = llvm::Function::Create( + FnTy, llvm::GlobalValue::ExternalLinkage, MangledName, M); + } + return SpecConstantFn; +} diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h index a5ab9df01dba..59f14b3e35fd 100644 --- a/clang/lib/CodeGen/CodeGenFunction.h +++ b/clang/lib/CodeGen/CodeGenFunction.h @@ -4850,6 +4850,12 @@ public: llvm::Value *EmitAMDGPUBuiltinExpr(unsigned BuiltinID, const CallExpr *E); llvm::Value *EmitHLSLBuiltinExpr(unsigned BuiltinID, const CallExpr *E, ReturnValueSlot ReturnValue); + + // Returns a builtin function that the SPIR-V backend will expand into a spec + // constant. + llvm::Function * + getSpecConstantFunction(const clang::QualType &SpecConstantType); + llvm::Value *EmitDirectXBuiltinExpr(unsigned BuiltinID, const CallExpr *E); llvm::Value *EmitSPIRVBuiltinExpr(unsigned BuiltinID, const CallExpr *E); llvm::Value *EmitScalarOrConstFoldImmArg(unsigned ICEArguments, unsigned Idx, diff --git a/clang/lib/Sema/SemaDecl.cpp b/clang/lib/Sema/SemaDecl.cpp index 1bf72e5bb7b9..e1cccf068b5a 100644 --- a/clang/lib/Sema/SemaDecl.cpp +++ b/clang/lib/Sema/SemaDecl.cpp @@ -2890,6 +2890,8 @@ static bool mergeDeclAttribute(Sema &S, NamedDecl *D, NewAttr = S.HLSL().mergeWaveSizeAttr(D, *WS, WS->getMin(), WS->getMax(), WS->getPreferred(), WS->getSpelledArgsCount()); + else if (const auto *CI = dyn_cast(Attr)) + NewAttr = S.HLSL().mergeVkConstantIdAttr(D, *CI, CI->getId()); else if (const auto *SA = dyn_cast(Attr)) NewAttr = S.HLSL().mergeShaderAttr(D, *SA, SA->getType()); else if (isa(Attr)) @@ -13757,6 +13759,10 @@ void Sema::AddInitializerToDecl(Decl *RealDecl, Expr *Init, bool DirectInit) { return; } + if (getLangOpts().HLSL) + if (!HLSL().handleInitialization(VDecl, Init)) + return; + // Get the decls type and save a reference for later, since // CheckInitializerTypes may change it. QualType DclT = VDecl->getType(), SavT = DclT; @@ -14179,6 +14185,13 @@ void Sema::ActOnUninitializedDecl(Decl *RealDecl) { } } + // HLSL variable with the `vk::constant_id` attribute must be initialized. + if (!Var->isInvalidDecl() && Var->hasAttr()) { + Diag(Var->getLocation(), diag::err_specialization_const); + Var->setInvalidDecl(); + return; + } + if (!Var->isInvalidDecl() && RealDecl->hasAttr()) { if (Var->getStorageClass() == SC_Extern) { Diag(Var->getLocation(), diag::err_loader_uninitialized_extern_decl) diff --git a/clang/lib/Sema/SemaDeclAttr.cpp b/clang/lib/Sema/SemaDeclAttr.cpp index 1c2fa80e782d..eba29e609cb0 100644 --- a/clang/lib/Sema/SemaDeclAttr.cpp +++ b/clang/lib/Sema/SemaDeclAttr.cpp @@ -7590,6 +7590,9 @@ ProcessDeclAttribute(Sema &S, Scope *scope, Decl *D, const ParsedAttr &AL, case ParsedAttr::AT_HLSLVkExtBuiltinInput: S.HLSL().handleVkExtBuiltinInputAttr(D, AL); break; + case ParsedAttr::AT_HLSLVkConstantId: + S.HLSL().handleVkConstantIdAttr(D, AL); + break; case ParsedAttr::AT_HLSLSV_GroupThreadID: S.HLSL().handleSV_GroupThreadIDAttr(D, AL); break; diff --git a/clang/lib/Sema/SemaHLSL.cpp b/clang/lib/Sema/SemaHLSL.cpp index b55f4fd786b5..9b43ee00810b 100644 --- a/clang/lib/Sema/SemaHLSL.cpp +++ b/clang/lib/Sema/SemaHLSL.cpp @@ -119,6 +119,40 @@ static ResourceClass getResourceClass(RegisterType RT) { llvm_unreachable("unexpected RegisterType value"); } +static Builtin::ID getSpecConstBuiltinId(QualType Type) { + const auto *BT = dyn_cast(Type); + if (!BT) { + if (!Type->isEnumeralType()) + return Builtin::NotBuiltin; + return Builtin::BI__builtin_get_spirv_spec_constant_int; + } + + switch (BT->getKind()) { + case BuiltinType::Bool: + return Builtin::BI__builtin_get_spirv_spec_constant_bool; + case BuiltinType::Short: + return Builtin::BI__builtin_get_spirv_spec_constant_short; + case BuiltinType::Int: + return Builtin::BI__builtin_get_spirv_spec_constant_int; + case BuiltinType::LongLong: + return Builtin::BI__builtin_get_spirv_spec_constant_longlong; + case BuiltinType::UShort: + return Builtin::BI__builtin_get_spirv_spec_constant_ushort; + case BuiltinType::UInt: + return Builtin::BI__builtin_get_spirv_spec_constant_uint; + case BuiltinType::ULongLong: + return Builtin::BI__builtin_get_spirv_spec_constant_ulonglong; + case BuiltinType::Half: + return Builtin::BI__builtin_get_spirv_spec_constant_half; + case BuiltinType::Float: + return Builtin::BI__builtin_get_spirv_spec_constant_float; + case BuiltinType::Double: + return Builtin::BI__builtin_get_spirv_spec_constant_double; + default: + return Builtin::NotBuiltin; + } +} + DeclBindingInfo *ResourceBindings::addDeclBindingInfo(const VarDecl *VD, ResourceClass ResClass) { assert(getDeclBindingInfo(VD, ResClass) == nullptr && @@ -607,6 +641,41 @@ HLSLWaveSizeAttr *SemaHLSL::mergeWaveSizeAttr(Decl *D, return Result; } +HLSLVkConstantIdAttr * +SemaHLSL::mergeVkConstantIdAttr(Decl *D, const AttributeCommonInfo &AL, + int Id) { + + auto &TargetInfo = getASTContext().getTargetInfo(); + if (TargetInfo.getTriple().getArch() != llvm::Triple::spirv) { + Diag(AL.getLoc(), diag::warn_attribute_ignored) << AL; + return nullptr; + } + + auto *VD = cast(D); + + if (getSpecConstBuiltinId(VD->getType()) == Builtin::NotBuiltin) { + Diag(VD->getLocation(), diag::err_specialization_const); + return nullptr; + } + + if (!VD->getType().isConstQualified()) { + Diag(VD->getLocation(), diag::err_specialization_const); + return nullptr; + } + + if (HLSLVkConstantIdAttr *CI = D->getAttr()) { + if (CI->getId() != Id) { + Diag(CI->getLocation(), diag::err_hlsl_attribute_param_mismatch) << AL; + Diag(AL.getLoc(), diag::note_conflicting_attribute); + } + return nullptr; + } + + HLSLVkConstantIdAttr *Result = + ::new (getASTContext()) HLSLVkConstantIdAttr(getASTContext(), AL, Id); + return Result; +} + HLSLShaderAttr * SemaHLSL::mergeShaderAttr(Decl *D, const AttributeCommonInfo &AL, llvm::Triple::EnvironmentType ShaderType) { @@ -1157,6 +1226,15 @@ void SemaHLSL::handleVkExtBuiltinInputAttr(Decl *D, const ParsedAttr &AL) { HLSLVkExtBuiltinInputAttr(getASTContext(), AL, ID)); } +void SemaHLSL::handleVkConstantIdAttr(Decl *D, const ParsedAttr &AL) { + uint32_t Id; + if (!SemaRef.checkUInt32Argument(AL, AL.getArgAsExpr(0), Id)) + return; + HLSLVkConstantIdAttr *NewAttr = mergeVkConstantIdAttr(D, AL, Id); + if (NewAttr) + D->addAttr(NewAttr); +} + bool SemaHLSL::diagnoseInputIDType(QualType T, const ParsedAttr &AL) { const auto *VT = T->getAs(); @@ -3206,6 +3284,7 @@ static bool IsDefaultBufferConstantDecl(VarDecl *VD) { return VD->getDeclContext()->isTranslationUnit() && QT.getAddressSpace() == LangAS::Default && VD->getStorageClass() != SC_Static && + !VD->hasAttr() && !isInvalidConstantBufferLeafElementType(QT.getTypePtr()); } @@ -3273,7 +3352,8 @@ void SemaHLSL::ActOnVariableDeclarator(VarDecl *VD) { const Type *VarType = VD->getType().getTypePtr(); while (VarType->isArrayType()) VarType = VarType->getArrayElementTypeNoTypeQual(); - if (VarType->isHLSLResourceRecord()) { + if (VarType->isHLSLResourceRecord() || + VD->hasAttr()) { // Make the variable for resources static. The global externally visible // storage is accessed through the handle, which is a member. The variable // itself is not externally visible. @@ -3696,3 +3776,41 @@ bool SemaHLSL::transformInitList(const InitializedEntity &Entity, Init->updateInit(Ctx, I, NewInit->getInit(I)); return true; } + +bool SemaHLSL::handleInitialization(VarDecl *VDecl, Expr *&Init) { + const HLSLVkConstantIdAttr *ConstIdAttr = + VDecl->getAttr(); + if (!ConstIdAttr) + return true; + + ASTContext &Context = SemaRef.getASTContext(); + + APValue InitValue; + if (!Init->isCXX11ConstantExpr(Context, &InitValue)) { + Diag(VDecl->getLocation(), diag::err_specialization_const); + VDecl->setInvalidDecl(); + return false; + } + + Builtin::ID BID = getSpecConstBuiltinId(VDecl->getType()); + + // Argument 1: The ID from the attribute + int ConstantID = ConstIdAttr->getId(); + llvm::APInt IDVal(Context.getIntWidth(Context.IntTy), ConstantID); + Expr *IdExpr = IntegerLiteral::Create(Context, IDVal, Context.IntTy, + ConstIdAttr->getLocation()); + + SmallVector Args = {IdExpr, Init}; + Expr *C = SemaRef.BuildBuiltinCallExpr(Init->getExprLoc(), BID, Args); + if (C->getType()->getCanonicalTypeUnqualified() != + VDecl->getType()->getCanonicalTypeUnqualified()) { + C = SemaRef + .BuildCStyleCastExpr(SourceLocation(), + Context.getTrivialTypeSourceInfo( + Init->getType(), Init->getExprLoc()), + SourceLocation(), C) + .get(); + } + Init = C; + return true; +} diff --git a/clang/test/AST/HLSL/vk.spec-constant.usage.hlsl b/clang/test/AST/HLSL/vk.spec-constant.usage.hlsl new file mode 100644 index 000000000000..c0955c1ea7b4 --- /dev/null +++ b/clang/test/AST/HLSL/vk.spec-constant.usage.hlsl @@ -0,0 +1,130 @@ +// RUN: %clang_cc1 -finclude-default-header -triple spirv-unknown-vulkan-compute -x hlsl -ast-dump -o - %s | FileCheck %s + +// CHECK: VarDecl {{.*}} bool_const 'const hlsl_private bool' static cinit +// CHECK-NEXT: CallExpr {{.*}} 'bool' +// CHECK-NEXT: ImplicitCastExpr {{.*}} 'bool (*)(unsigned int, bool) noexcept' +// CHECK-NEXT: DeclRefExpr {{.*}} 'bool (unsigned int, bool) noexcept' lvalue Function {{.*}} '__builtin_get_spirv_spec_constant_bool' 'bool (unsigned int, bool) noexcept' +// CHECK-NEXT: ImplicitCastExpr {{.*}} 'unsigned int' +// CHECK-NEXT: IntegerLiteral {{.*}} 'int' 1 +// CHECK-NEXT: CXXBoolLiteralExpr {{.*}} 'bool' true +[[vk::constant_id(1)]] +const bool bool_const = true; + +// CHECK: VarDecl {{.*}} short_const 'const hlsl_private short' static cinit +// CHECK-NEXT: CallExpr {{.*}} 'short' +// CHECK-NEXT: ImplicitCastExpr {{.*}} 'short (*)(unsigned int, short) noexcept' +// CHECK-NEXT: DeclRefExpr {{.*}} 'short (unsigned int, short) noexcept' lvalue Function {{.*}} '__builtin_get_spirv_spec_constant_short' 'short (unsigned int, short) noexcept' +// CHECK-NEXT: ImplicitCastExpr {{.*}} 'unsigned int' +// CHECK-NEXT: IntegerLiteral {{.*}} 'int' 2 +// CHECK-NEXT: ImplicitCastExpr {{.*}} 'short' +// CHECK-NEXT: IntegerLiteral {{.*}} 'int' 4 +[[vk::constant_id(2)]] +const short short_const = 4; + +// CHECK: VarDecl {{.*}} int_const 'const hlsl_private int' static cinit +// CHECK-NEXT: CallExpr {{.*}} 'int' +// CHECK-NEXT: ImplicitCastExpr {{.*}} 'int (*)(unsigned int, int) noexcept' +// CHECK-NEXT: DeclRefExpr {{.*}} 'int (unsigned int, int) noexcept' lvalue Function {{.*}} '__builtin_get_spirv_spec_constant_int' 'int (unsigned int, int) noexcept' +// CHECK-NEXT: ImplicitCastExpr {{.*}} 'unsigned int' +// CHECK-NEXT: IntegerLiteral {{.*}} 'int' 3 +// CHECK-NEXT: IntegerLiteral {{.*}} 'int' 5 +[[vk::constant_id(3)]] +const int int_const = 5; + +// CHECK: VarDecl {{.*}} long_const 'const hlsl_private long long' static cinit +// CHECK-NEXT: CallExpr {{.*}} 'long long' +// CHECK-NEXT: ImplicitCastExpr {{.*}} 'long long (*)(unsigned int, long long) noexcept' +// CHECK-NEXT: DeclRefExpr {{.*}} 'long long (unsigned int, long long) noexcept' lvalue Function {{.*}} '__builtin_get_spirv_spec_constant_longlong' 'long long (unsigned int, long long) noexcept' +// CHECK-NEXT: ImplicitCastExpr {{.*}} 'unsigned int' +// CHECK-NEXT: IntegerLiteral {{.*}} 'int' 4 +// CHECK-NEXT: ImplicitCastExpr {{.*}} 'long long' +// CHECK-NEXT: IntegerLiteral {{.*}} 'int' 8 +[[vk::constant_id(4)]] +const long long long_const = 8; + +// CHECK: VarDecl {{.*}} ushort_const 'const hlsl_private unsigned short' static cinit +// CHECK-NEXT: CallExpr {{.*}} 'unsigned short' +// CHECK-NEXT: ImplicitCastExpr {{.*}} 'unsigned short (*)(unsigned int, unsigned short) noexcept' +// CHECK-NEXT: DeclRefExpr {{.*}} 'unsigned short (unsigned int, unsigned short) noexcept' lvalue Function {{.*}} '__builtin_get_spirv_spec_constant_ushort' 'unsigned short (unsigned int, unsigned short) noexcept' +// CHECK-NEXT: ImplicitCastExpr {{.*}} 'unsigned int' +// CHECK-NEXT: IntegerLiteral {{.*}} 'int' 5 +// CHECK-NEXT: ImplicitCastExpr {{.*}} 'unsigned short' +// CHECK-NEXT: IntegerLiteral {{.*}} 'int' 10 +[[vk::constant_id(5)]] +const unsigned short ushort_const = 10; + +// CHECK: VarDecl {{.*}} uint_const 'const hlsl_private unsigned int' static cinit +// CHECK-NEXT: CallExpr {{.*}} 'unsigned int' +// CHECK-NEXT: ImplicitCastExpr {{.*}} 'unsigned int (*)(unsigned int, unsigned int) noexcept' +// CHECK-NEXT: DeclRefExpr {{.*}} 'unsigned int (unsigned int, unsigned int) noexcept' lvalue Function {{.*}} '__builtin_get_spirv_spec_constant_uint' 'unsigned int (unsigned int, unsigned int) noexcept' +// CHECK-NEXT: ImplicitCastExpr {{.*}} 'unsigned int' +// CHECK-NEXT: IntegerLiteral {{.*}} 'int' 6 +// CHECK-NEXT: ImplicitCastExpr {{.*}} 'unsigned int' +// CHECK-NEXT: IntegerLiteral {{.*}} 'int' 12 +[[vk::constant_id(6)]] +const unsigned int uint_const = 12; + + +// CHECK: VarDecl {{.*}} ulong_const 'const hlsl_private unsigned long long' static cinit +// CHECK-NEXT: CallExpr {{.*}} 'unsigned long long' +// CHECK-NEXT: ImplicitCastExpr {{.*}} 'unsigned long long (*)(unsigned int, unsigned long long) noexcept' +// CHECK-NEXT: DeclRefExpr {{.*}} 'unsigned long long (unsigned int, unsigned long long) noexcept' lvalue Function {{.*}} '__builtin_get_spirv_spec_constant_ulonglong' 'unsigned long long (unsigned int, unsigned long long) noexcept' +// CHECK-NEXT: ImplicitCastExpr {{.*}} 'unsigned int' +// CHECK-NEXT: IntegerLiteral {{.*}} 'int' 7 +// CHECK-NEXT: ImplicitCastExpr {{.*}} 'unsigned long long' +// CHECK-NEXT: IntegerLiteral {{.*}} 'int' 25 +[[vk::constant_id(7)]] +const unsigned long long ulong_const = 25; + +// CHECK: VarDecl {{.*}} half_const 'const hlsl_private half' static cinit +// CHECK-NEXT: CallExpr {{.*}} 'half' +// CHECK-NEXT: ImplicitCastExpr {{.*}} 'half (*)(unsigned int, half) noexcept' +// CHECK-NEXT: DeclRefExpr {{.*}} 'half (unsigned int, half) noexcept' lvalue Function {{.*}} '__builtin_get_spirv_spec_constant_half' 'half (unsigned int, half) noexcept' +// CHECK-NEXT: ImplicitCastExpr {{.*}} 'unsigned int' +// CHECK-NEXT: IntegerLiteral {{.*}} 'int' 8 +// CHECK-NEXT: ImplicitCastExpr {{.*}} 'half' +// CHECK-NEXT: FloatingLiteral {{.*}} 'float' 4.040000e+01 +[[vk::constant_id(8)]] +const half half_const = 40.4; + +// CHECK: VarDecl {{.*}} float_const 'const hlsl_private float' static cinit +// CHECK-NEXT: CallExpr {{.*}} 'float' +// CHECK-NEXT: ImplicitCastExpr {{.*}} 'float (*)(unsigned int, float) noexcept' +// CHECK-NEXT: DeclRefExpr {{.*}} 'float (unsigned int, float) noexcept' lvalue Function {{.*}} '__builtin_get_spirv_spec_constant_float' 'float (unsigned int, float) noexcept' +// CHECK-NEXT: ImplicitCastExpr {{.*}} 'unsigned int' +// CHECK-NEXT: IntegerLiteral {{.*}} 'int' 8 +// CHECK-NEXT: ImplicitCastExpr {{.*}} 'float' +// CHECK-NEXT: IntegerLiteral {{.*}} 'int' 50 +[[vk::constant_id(8)]] +const float float_const = 50; + +// CHECK: VarDecl {{.*}} double_const 'const hlsl_private double' static cinit +// CHECK-NEXT: CallExpr {{.*}} 'double' +// CHECK-NEXT: ImplicitCastExpr {{.*}} 'double (*)(unsigned int, double) noexcept' +// CHECK-NEXT: DeclRefExpr {{.*}} 'double (unsigned int, double) noexcept' lvalue Function {{.*}} '__builtin_get_spirv_spec_constant_double' 'double (unsigned int, double) noexcept' +// CHECK-NEXT: ImplicitCastExpr {{.*}} 'unsigned int' +// CHECK-NEXT: IntegerLiteral {{.*}} 'int' 9 +// CHECK-NEXT: ImplicitCastExpr {{.*}} 'double' +// CHECK-NEXT: IntegerLiteral {{.*}} 'int' 100 +[[vk::constant_id(9)]] +const double double_const = 100; + +// CHECK: VarDecl {{.*}} enum_const 'const hlsl_private E' static cinit +// CHECK-NEXT: CStyleCastExpr {{.*}} 'E' +// CHECK-NEXT: CallExpr {{.*}} 'int' +// CHECK-NEXT: ImplicitCastExpr {{.*}} 'int (*)(unsigned int, int) noexcept' +// CHECK-NEXT: DeclRefExpr {{.*}} 'int (unsigned int, int) noexcept' lvalue Function {{.*}} '__builtin_get_spirv_spec_constant_int' 'int (unsigned int, int) noexcept' +// CHECK-NEXT: ImplicitCastExpr {{.*}} 'unsigned int' +// CHECK-NEXT: IntegerLiteral {{.*}} 'int' 10 +// CHECK-NEXT: ImplicitCastExpr {{.*}} 'int' +// CHECK-NEXT: DeclRefExpr {{.*}} 'E' EnumConstant {{.*}} 'e2' 'E' +enum E { + e0 = 10, + e1 = 20, + e2 = 30 +}; + +[[vk::constant_id(10)]] +const E enum_const = e2; + +// CHECK-NOT: CXXRecordDecl {{.*}} implicit struct __cblayout_$Globals definition diff --git a/clang/test/CodeGenHLSL/inline-spirv/SpirvType.alignment.hlsl b/clang/test/CodeGenHLSL/inline-spirv/SpirvType.alignment.hlsl deleted file mode 100644 index 41cdd7d21bcb..000000000000 --- a/clang/test/CodeGenHLSL/inline-spirv/SpirvType.alignment.hlsl +++ /dev/null @@ -1,16 +0,0 @@ -// RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \ -// RUN: spirv-unknown-vulkan-compute %s -emit-llvm -disable-llvm-passes \ -// RUN: -o - | FileCheck %s - -using Int = vk::SpirvType>, vk::Literal>>; - -// CHECK: %struct.S = type <{ i32, target("spirv.Type", target("spirv.Literal", 8), target("spirv.Literal", 0), 21, 4, 64), [4 x i8] }> -struct S { - int a; - Int b; -}; - -[numthreads(1,1,1)] -void main() { - S value; -} diff --git a/clang/test/CodeGenHLSL/inline-spirv/SpirvType.hlsl b/clang/test/CodeGenHLSL/inline-spirv/SpirvType.hlsl deleted file mode 100644 index 7149be0122f4..000000000000 --- a/clang/test/CodeGenHLSL/inline-spirv/SpirvType.hlsl +++ /dev/null @@ -1,68 +0,0 @@ -// RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \ -// RUN: spirv-unknown-vulkan-compute %s -emit-llvm -disable-llvm-passes \ -// RUN: -o - | FileCheck %s - -template -using Array = vk::SpirvOpaqueType>; - -template -using ArrayBuffer = Array, Size>; - -typedef vk::SpirvType>, vk::Literal>> Int; - -typedef Array ArrayInt; - -// CHECK: %struct.S = type { target("spirv.Type", target("spirv.Image", float, 5, 2, 0, 0, 2, 0), target("spirv.IntegralConstant", i64, 4), 28, 0, 0), target("spirv.Type", target("spirv.Literal", 32), target("spirv.Literal", 0), 21, 4, 32) } -struct S { - ArrayBuffer<4> b; - Int i; -}; - -// CHECK: define hidden spir_func target("spirv.Type", target("spirv.Image", float, 5, 2, 0, 0, 2, 0), target("spirv.IntegralConstant", i64, 4), 28, 0, 0) @_Z14getArrayBufferu17spirv_type_28_0_0U5_TypeN4hlsl8RWBufferIfEEU6_ConstLm4E(target("spirv.Type", target("spirv.Image", float, 5, 2, 0, 0, 2, 0), target("spirv.IntegralConstant", i64, 4), 28, 0, 0) %v) #0 -ArrayBuffer<4> getArrayBuffer(ArrayBuffer<4> v) { - return v; -} - -// CHECK: define hidden spir_func target("spirv.Type", target("spirv.Literal", 32), target("spirv.Literal", 0), 21, 4, 32) @_Z6getIntu18spirv_type_21_4_32U4_LitLi32EU4_LitLi0E(target("spirv.Type", target("spirv.Literal", 32), target("spirv.Literal", 0), 21, 4, 32) %v) #0 -Int getInt(Int v) { - return v; -} - -// TODO: uncomment and test once CBuffer handles are implemented for SPIR-V -// ArrayBuffer<4> g_buffers; -// Int g_word; - -[numthreads(1, 1, 1)] -void main() { - // CHECK: [[buffers:%.*]] = alloca target("spirv.Type", target("spirv.Image", float, 5, 2, 0, 0, 2, 0), target("spirv.IntegralConstant", i64, 4), 28, 0, 0), align 4 - ArrayBuffer<4> buffers; - - // CHECK: [[longBuffers:%.*]] = alloca target("spirv.Type", target("spirv.Image", float, 5, 2, 0, 0, 2, 0), target("spirv.IntegralConstant", i64, 591751049, 1), 28, 0, 0), align 4 - ArrayBuffer<0x123456789> longBuffers; - - // CHECK: [[word:%.*]] = alloca target("spirv.Type", target("spirv.Literal", 32), target("spirv.Literal", 0), 21, 4, 32), align 4 - Int word; - - // CHECK: [[words:%.*]] = alloca [4 x target("spirv.Type", target("spirv.Literal", 32), target("spirv.Literal", 0), 21, 4, 32)], align 4 - Int words[4]; - - // CHECK: [[words2:%.*]] = alloca target("spirv.Type", target("spirv.Type", target("spirv.Literal", 32), target("spirv.Literal", 0), 21, 4, 32), target("spirv.IntegralConstant", i64, 5), 28, 0, 0), align 4 - ArrayInt words2; - - // CHECK: [[value:%.*]] = alloca %struct.S, align 1 - S value; - - // CHECK: [[buffers2:%.*]] = alloca target("spirv.Type", target("spirv.Image", float, 5, 2, 0, 0, 2, 0), target("spirv.IntegralConstant", i64, 4), 28, 0, 0), align 4 - // CHECK: [[word2:%.*]] = alloca target("spirv.Type", target("spirv.Literal", 32), target("spirv.Literal", 0), 21, 4, 32), align 4 - - - // CHECK: [[loaded:%[0-9]+]] = load target("spirv.Type", target("spirv.Image", float, 5, 2, 0, 0, 2, 0), target("spirv.IntegralConstant", i64, 4), 28, 0, 0), ptr [[buffers]], align 4 - // CHECK: %call1 = call spir_func target("spirv.Type", target("spirv.Image", float, 5, 2, 0, 0, 2, 0), target("spirv.IntegralConstant", i64, 4), 28, 0, 0) @_Z14getArrayBufferu17spirv_type_28_0_0U5_TypeN4hlsl8RWBufferIfEEU6_ConstLm4E(target("spirv.Type", target("spirv.Image", float, 5, 2, 0, 0, 2, 0), target("spirv.IntegralConstant", i64, 4), 28, 0, 0) [[loaded]]) - // CHECK: store target("spirv.Type", target("spirv.Image", float, 5, 2, 0, 0, 2, 0), target("spirv.IntegralConstant", i64, 4), 28, 0, 0) %call1, ptr [[buffers2]], align 4 - ArrayBuffer<4> buffers2 = getArrayBuffer(buffers); - - // CHECK: [[loaded:%[0-9]+]] = load target("spirv.Type", target("spirv.Literal", 32), target("spirv.Literal", 0), 21, 4, 32), ptr [[word]], align 4 - // CHECK: %call2 = call spir_func target("spirv.Type", target("spirv.Literal", 32), target("spirv.Literal", 0), 21, 4, 32) @_Z6getIntu18spirv_type_21_4_32U4_LitLi32EU4_LitLi0E(target("spirv.Type", target("spirv.Literal", 32), target("spirv.Literal", 0), 21, 4, 32) [[loaded]]) - // CHECK: store target("spirv.Type", target("spirv.Literal", 32), target("spirv.Literal", 0), 21, 4, 32) %call2, ptr [[word2]], align 4 - Int word2 = getInt(word); -} diff --git a/clang/test/CodeGenHLSL/vk-features/SpirvType.alignment.hlsl b/clang/test/CodeGenHLSL/vk-features/SpirvType.alignment.hlsl new file mode 100644 index 000000000000..41cdd7d21bcb --- /dev/null +++ b/clang/test/CodeGenHLSL/vk-features/SpirvType.alignment.hlsl @@ -0,0 +1,16 @@ +// RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \ +// RUN: spirv-unknown-vulkan-compute %s -emit-llvm -disable-llvm-passes \ +// RUN: -o - | FileCheck %s + +using Int = vk::SpirvType>, vk::Literal>>; + +// CHECK: %struct.S = type <{ i32, target("spirv.Type", target("spirv.Literal", 8), target("spirv.Literal", 0), 21, 4, 64), [4 x i8] }> +struct S { + int a; + Int b; +}; + +[numthreads(1,1,1)] +void main() { + S value; +} diff --git a/clang/test/CodeGenHLSL/vk-features/SpirvType.hlsl b/clang/test/CodeGenHLSL/vk-features/SpirvType.hlsl new file mode 100644 index 000000000000..7149be0122f4 --- /dev/null +++ b/clang/test/CodeGenHLSL/vk-features/SpirvType.hlsl @@ -0,0 +1,68 @@ +// RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \ +// RUN: spirv-unknown-vulkan-compute %s -emit-llvm -disable-llvm-passes \ +// RUN: -o - | FileCheck %s + +template +using Array = vk::SpirvOpaqueType>; + +template +using ArrayBuffer = Array, Size>; + +typedef vk::SpirvType>, vk::Literal>> Int; + +typedef Array ArrayInt; + +// CHECK: %struct.S = type { target("spirv.Type", target("spirv.Image", float, 5, 2, 0, 0, 2, 0), target("spirv.IntegralConstant", i64, 4), 28, 0, 0), target("spirv.Type", target("spirv.Literal", 32), target("spirv.Literal", 0), 21, 4, 32) } +struct S { + ArrayBuffer<4> b; + Int i; +}; + +// CHECK: define hidden spir_func target("spirv.Type", target("spirv.Image", float, 5, 2, 0, 0, 2, 0), target("spirv.IntegralConstant", i64, 4), 28, 0, 0) @_Z14getArrayBufferu17spirv_type_28_0_0U5_TypeN4hlsl8RWBufferIfEEU6_ConstLm4E(target("spirv.Type", target("spirv.Image", float, 5, 2, 0, 0, 2, 0), target("spirv.IntegralConstant", i64, 4), 28, 0, 0) %v) #0 +ArrayBuffer<4> getArrayBuffer(ArrayBuffer<4> v) { + return v; +} + +// CHECK: define hidden spir_func target("spirv.Type", target("spirv.Literal", 32), target("spirv.Literal", 0), 21, 4, 32) @_Z6getIntu18spirv_type_21_4_32U4_LitLi32EU4_LitLi0E(target("spirv.Type", target("spirv.Literal", 32), target("spirv.Literal", 0), 21, 4, 32) %v) #0 +Int getInt(Int v) { + return v; +} + +// TODO: uncomment and test once CBuffer handles are implemented for SPIR-V +// ArrayBuffer<4> g_buffers; +// Int g_word; + +[numthreads(1, 1, 1)] +void main() { + // CHECK: [[buffers:%.*]] = alloca target("spirv.Type", target("spirv.Image", float, 5, 2, 0, 0, 2, 0), target("spirv.IntegralConstant", i64, 4), 28, 0, 0), align 4 + ArrayBuffer<4> buffers; + + // CHECK: [[longBuffers:%.*]] = alloca target("spirv.Type", target("spirv.Image", float, 5, 2, 0, 0, 2, 0), target("spirv.IntegralConstant", i64, 591751049, 1), 28, 0, 0), align 4 + ArrayBuffer<0x123456789> longBuffers; + + // CHECK: [[word:%.*]] = alloca target("spirv.Type", target("spirv.Literal", 32), target("spirv.Literal", 0), 21, 4, 32), align 4 + Int word; + + // CHECK: [[words:%.*]] = alloca [4 x target("spirv.Type", target("spirv.Literal", 32), target("spirv.Literal", 0), 21, 4, 32)], align 4 + Int words[4]; + + // CHECK: [[words2:%.*]] = alloca target("spirv.Type", target("spirv.Type", target("spirv.Literal", 32), target("spirv.Literal", 0), 21, 4, 32), target("spirv.IntegralConstant", i64, 5), 28, 0, 0), align 4 + ArrayInt words2; + + // CHECK: [[value:%.*]] = alloca %struct.S, align 1 + S value; + + // CHECK: [[buffers2:%.*]] = alloca target("spirv.Type", target("spirv.Image", float, 5, 2, 0, 0, 2, 0), target("spirv.IntegralConstant", i64, 4), 28, 0, 0), align 4 + // CHECK: [[word2:%.*]] = alloca target("spirv.Type", target("spirv.Literal", 32), target("spirv.Literal", 0), 21, 4, 32), align 4 + + + // CHECK: [[loaded:%[0-9]+]] = load target("spirv.Type", target("spirv.Image", float, 5, 2, 0, 0, 2, 0), target("spirv.IntegralConstant", i64, 4), 28, 0, 0), ptr [[buffers]], align 4 + // CHECK: %call1 = call spir_func target("spirv.Type", target("spirv.Image", float, 5, 2, 0, 0, 2, 0), target("spirv.IntegralConstant", i64, 4), 28, 0, 0) @_Z14getArrayBufferu17spirv_type_28_0_0U5_TypeN4hlsl8RWBufferIfEEU6_ConstLm4E(target("spirv.Type", target("spirv.Image", float, 5, 2, 0, 0, 2, 0), target("spirv.IntegralConstant", i64, 4), 28, 0, 0) [[loaded]]) + // CHECK: store target("spirv.Type", target("spirv.Image", float, 5, 2, 0, 0, 2, 0), target("spirv.IntegralConstant", i64, 4), 28, 0, 0) %call1, ptr [[buffers2]], align 4 + ArrayBuffer<4> buffers2 = getArrayBuffer(buffers); + + // CHECK: [[loaded:%[0-9]+]] = load target("spirv.Type", target("spirv.Literal", 32), target("spirv.Literal", 0), 21, 4, 32), ptr [[word]], align 4 + // CHECK: %call2 = call spir_func target("spirv.Type", target("spirv.Literal", 32), target("spirv.Literal", 0), 21, 4, 32) @_Z6getIntu18spirv_type_21_4_32U4_LitLi32EU4_LitLi0E(target("spirv.Type", target("spirv.Literal", 32), target("spirv.Literal", 0), 21, 4, 32) [[loaded]]) + // CHECK: store target("spirv.Type", target("spirv.Literal", 32), target("spirv.Literal", 0), 21, 4, 32) %call2, ptr [[word2]], align 4 + Int word2 = getInt(word); +} diff --git a/clang/test/CodeGenHLSL/vk-features/vk.spec-constant.hlsl b/clang/test/CodeGenHLSL/vk-features/vk.spec-constant.hlsl new file mode 100644 index 000000000000..cbc1fa61eae2 --- /dev/null +++ b/clang/test/CodeGenHLSL/vk-features/vk.spec-constant.hlsl @@ -0,0 +1,210 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --check-globals all --include-generated-funcs --version 5 +// RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \ +// RUN: spirv-unknown-vulkan-compute %s -emit-llvm -disable-llvm-passes \ +// RUN: -o - | FileCheck %s + +[[vk::constant_id(1)]] +const bool bool_const = true; + +[[vk::constant_id(1)]] +const short short_const = 4; + +[[vk::constant_id(3)]] +const int int_const = 5; + +[[vk::constant_id(4)]] +const long long long_const = 8; + +[[vk::constant_id(5)]] +const unsigned short ushort_const = 10; + +[[vk::constant_id(6)]] +const unsigned int uint_const = 12; + +[[vk::constant_id(7)]] +const unsigned long long ulong_const = 25; + +[[vk::constant_id(8)]] +const half half_const = 40.4; + +[[vk::constant_id(8)]] +const float float_const = 50.5; + +[[vk::constant_id(9)]] +const double double_const = 100.2; + +enum E { + e0 = 10, + e1 = 20, + e2 = 30 +}; + +[[vk::constant_id(10)]] +const E enum_const = e2; + +[numthreads(1,1,1)] +void main() { + bool b = bool_const; + short s = short_const; + int i = int_const; + long long l = long_const; + unsigned short us = ushort_const; + unsigned int ui = uint_const; + unsigned long long ul = ulong_const; + half h = half_const; + float f = float_const; + double d = double_const; + E e = enum_const; +} +//. +// CHECK: @_ZL10bool_const = internal addrspace(10) global i32 0, align 4 +// CHECK: @_ZL11short_const = internal addrspace(10) global i16 0, align 2 +// CHECK: @_ZL9int_const = internal addrspace(10) global i32 0, align 4 +// CHECK: @_ZL10long_const = internal addrspace(10) global i64 0, align 8 +// CHECK: @_ZL12ushort_const = internal addrspace(10) global i16 0, align 2 +// CHECK: @_ZL10uint_const = internal addrspace(10) global i32 0, align 4 +// CHECK: @_ZL11ulong_const = internal addrspace(10) global i64 0, align 8 +// CHECK: @_ZL10half_const = internal addrspace(10) global float 0.000000e+00, align 4 +// CHECK: @_ZL11float_const = internal addrspace(10) global float 0.000000e+00, align 4 +// CHECK: @_ZL12double_const = internal addrspace(10) global double 0.000000e+00, align 8 +// CHECK: @_ZL10enum_const = internal addrspace(10) global i32 0, align 4 +//. +// CHECK-LABEL: define internal spir_func void @_Z4mainv( +// CHECK-SAME: ) #[[ATTR0:[0-9]+]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = call token @llvm.experimental.convergence.entry() +// CHECK-NEXT: [[B:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[S:%.*]] = alloca i16, align 2 +// CHECK-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[L:%.*]] = alloca i64, align 8 +// CHECK-NEXT: [[US:%.*]] = alloca i16, align 2 +// CHECK-NEXT: [[UI:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[UL:%.*]] = alloca i64, align 8 +// CHECK-NEXT: [[H:%.*]] = alloca float, align 4 +// CHECK-NEXT: [[F:%.*]] = alloca float, align 4 +// CHECK-NEXT: [[D:%.*]] = alloca double, align 8 +// CHECK-NEXT: [[E:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(10) @_ZL10bool_const, align 4 +// CHECK-NEXT: [[LOADEDV:%.*]] = trunc i32 [[TMP1]] to i1 +// CHECK-NEXT: [[STOREDV:%.*]] = zext i1 [[LOADEDV]] to i32 +// CHECK-NEXT: store i32 [[STOREDV]], ptr [[B]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = load i16, ptr addrspace(10) @_ZL11short_const, align 2 +// CHECK-NEXT: store i16 [[TMP2]], ptr [[S]], align 2 +// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(10) @_ZL9int_const, align 4 +// CHECK-NEXT: store i32 [[TMP3]], ptr [[I]], align 4 +// CHECK-NEXT: [[TMP4:%.*]] = load i64, ptr addrspace(10) @_ZL10long_const, align 8 +// CHECK-NEXT: store i64 [[TMP4]], ptr [[L]], align 8 +// CHECK-NEXT: [[TMP5:%.*]] = load i16, ptr addrspace(10) @_ZL12ushort_const, align 2 +// CHECK-NEXT: store i16 [[TMP5]], ptr [[US]], align 2 +// CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr addrspace(10) @_ZL10uint_const, align 4 +// CHECK-NEXT: store i32 [[TMP6]], ptr [[UI]], align 4 +// CHECK-NEXT: [[TMP7:%.*]] = load i64, ptr addrspace(10) @_ZL11ulong_const, align 8 +// CHECK-NEXT: store i64 [[TMP7]], ptr [[UL]], align 8 +// CHECK-NEXT: [[TMP8:%.*]] = load float, ptr addrspace(10) @_ZL10half_const, align 4 +// CHECK-NEXT: store float [[TMP8]], ptr [[H]], align 4 +// CHECK-NEXT: [[TMP9:%.*]] = load float, ptr addrspace(10) @_ZL11float_const, align 4 +// CHECK-NEXT: store float [[TMP9]], ptr [[F]], align 4 +// CHECK-NEXT: [[TMP10:%.*]] = load double, ptr addrspace(10) @_ZL12double_const, align 8 +// CHECK-NEXT: store double [[TMP10]], ptr [[D]], align 8 +// CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr addrspace(10) @_ZL10enum_const, align 4 +// CHECK-NEXT: store i32 [[TMP11]], ptr [[E]], align 4 +// CHECK-NEXT: ret void +// +// CHECK-LABEL: define internal spir_func void @__cxx_global_var_init( +// CHECK-SAME: ) #[[ATTR3:[0-9]+]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = call token @llvm.experimental.convergence.entry() +// CHECK-NEXT: [[TMP1:%.*]] = call i1 @_Z20__spirv_SpecConstantib(i32 1, i1 true) +// CHECK-NEXT: [[STOREDV:%.*]] = zext i1 [[TMP1]] to i32 +// CHECK-NEXT: store i32 [[STOREDV]], ptr addrspace(10) @_ZL10bool_const, align 4 +// CHECK-NEXT: ret void +// +// +// CHECK-LABEL: define internal spir_func void @__cxx_global_var_init.1( +// CHECK-SAME: ) #[[ATTR3]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = call token @llvm.experimental.convergence.entry() +// CHECK-NEXT: [[TMP1:%.*]] = call i16 @_Z20__spirv_SpecConstantis(i32 1, i16 4) +// CHECK-NEXT: store i16 [[TMP1]], ptr addrspace(10) @_ZL11short_const, align 2 +// CHECK-NEXT: ret void +// +// +// CHECK-LABEL: define internal spir_func void @__cxx_global_var_init.2( +// CHECK-SAME: ) #[[ATTR3]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = call token @llvm.experimental.convergence.entry() +// CHECK-NEXT: [[TMP1:%.*]] = call i32 @_Z20__spirv_SpecConstantii(i32 3, i32 5) +// CHECK-NEXT: store i32 [[TMP1]], ptr addrspace(10) @_ZL9int_const, align 4 +// CHECK-NEXT: ret void +// +// +// CHECK-LABEL: define internal spir_func void @__cxx_global_var_init.3( +// CHECK-SAME: ) #[[ATTR3]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = call token @llvm.experimental.convergence.entry() +// CHECK-NEXT: [[TMP1:%.*]] = call i64 @_Z20__spirv_SpecConstantix(i32 4, i64 8) +// CHECK-NEXT: store i64 [[TMP1]], ptr addrspace(10) @_ZL10long_const, align 8 +// CHECK-NEXT: ret void +// +// +// CHECK-LABEL: define internal spir_func void @__cxx_global_var_init.4( +// CHECK-SAME: ) #[[ATTR3]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = call token @llvm.experimental.convergence.entry() +// CHECK-NEXT: [[TMP1:%.*]] = call i16 @_Z20__spirv_SpecConstantit(i32 5, i16 10) +// CHECK-NEXT: store i16 [[TMP1]], ptr addrspace(10) @_ZL12ushort_const, align 2 +// CHECK-NEXT: ret void +// +// +// CHECK-LABEL: define internal spir_func void @__cxx_global_var_init.5( +// CHECK-SAME: ) #[[ATTR3]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = call token @llvm.experimental.convergence.entry() +// CHECK-NEXT: [[TMP1:%.*]] = call i32 @_Z20__spirv_SpecConstantij(i32 6, i32 12) +// CHECK-NEXT: store i32 [[TMP1]], ptr addrspace(10) @_ZL10uint_const, align 4 +// CHECK-NEXT: ret void +// +// +// CHECK-LABEL: define internal spir_func void @__cxx_global_var_init.6( +// CHECK-SAME: ) #[[ATTR3]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = call token @llvm.experimental.convergence.entry() +// CHECK-NEXT: [[TMP1:%.*]] = call i64 @_Z20__spirv_SpecConstantiy(i32 7, i64 25) +// CHECK-NEXT: store i64 [[TMP1]], ptr addrspace(10) @_ZL11ulong_const, align 8 +// CHECK-NEXT: ret void +// +// +// CHECK-LABEL: define internal spir_func void @__cxx_global_var_init.7( +// CHECK-SAME: ) #[[ATTR3]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = call token @llvm.experimental.convergence.entry() +// CHECK-NEXT: [[TMP1:%.*]] = call reassoc nnan ninf nsz arcp afn float @_Z20__spirv_SpecConstantiDh(i32 8, float 0x4044333340000000) +// CHECK-NEXT: store float [[TMP1]], ptr addrspace(10) @_ZL10half_const, align 4 +// CHECK-NEXT: ret void +// +// +// CHECK-LABEL: define internal spir_func void @__cxx_global_var_init.8( +// CHECK-SAME: ) #[[ATTR3]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = call token @llvm.experimental.convergence.entry() +// CHECK-NEXT: [[TMP1:%.*]] = call reassoc nnan ninf nsz arcp afn float @_Z20__spirv_SpecConstantif(i32 8, float 5.050000e+01) +// CHECK-NEXT: store float [[TMP1]], ptr addrspace(10) @_ZL11float_const, align 4 +// CHECK-NEXT: ret void +// +// +// CHECK-LABEL: define internal spir_func void @__cxx_global_var_init.9( +// CHECK-SAME: ) #[[ATTR3]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = call token @llvm.experimental.convergence.entry() +// CHECK-NEXT: [[TMP1:%.*]] = call reassoc nnan ninf nsz arcp afn double @_Z20__spirv_SpecConstantid(i32 9, double 0x40590CCCC0000000) +// CHECK-NEXT: store double [[TMP1]], ptr addrspace(10) @_ZL12double_const, align 8 +// CHECK-NEXT: ret void +// +// +// CHECK-LABEL: define internal spir_func void @__cxx_global_var_init.10( +// CHECK-SAME: ) #[[ATTR3]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = call token @llvm.experimental.convergence.entry() +// CHECK-NEXT: [[TMP1:%.*]] = call i32 @_Z20__spirv_SpecConstantii(i32 10, i32 30) +// CHECK-NEXT: store i32 [[TMP1]], ptr addrspace(10) @_ZL10enum_const, align 4 +// CHECK-NEXT: ret void diff --git a/clang/test/SemaHLSL/vk.spec-constant.error.hlsl b/clang/test/SemaHLSL/vk.spec-constant.error.hlsl new file mode 100644 index 000000000000..24873d272a54 --- /dev/null +++ b/clang/test/SemaHLSL/vk.spec-constant.error.hlsl @@ -0,0 +1,37 @@ +// RUN: %clang_cc1 -finclude-default-header -triple spirv-pc-vulkan1.3-compute -verify %s +// RUN: %clang_cc1 -finclude-default-header -triple dxil-pc-shadermodel6.8-compute -verify %s + +#ifndef __spirv__ +// expected-warning@+2{{'constant_id' attribute ignored}} +#endif +[[vk::constant_id(0)]] +const bool sc0 = true; + +#ifdef __spirv__ +// expected-error@+2{{variable with 'vk::constant_id' attribute must be a const int/float/enum/bool and be initialized with a literal}} +[[vk::constant_id(1)]] +const bool sc1 = sc0; // error + +// expected-warning@+1{{'constant_id' attribute only applies to external global variables}} +[[vk::constant_id(2)]] +static const bool sc2 = false; // error + +// expected-error@+2{{variable with 'vk::constant_id' attribute must be a const int/float/enum/bool and be initialized with a literal}} +[[vk::constant_id(3)]] +const bool sc3; // error + +// expected-error@+2{{variable with 'vk::constant_id' attribute must be a const int/float/enum/bool and be initialized with a literal}} +[[vk::constant_id(4)]] +bool sc4 = false; // error + +// expected-error@+2{{variable with 'vk::constant_id' attribute must be a const int/float/enum/bool and be initialized with a literal}} +[[vk::constant_id(5)]] +const int2 sc5 = {0,0}; // error + +[numthreads(1,1,1)] +void main() { + // expected-warning@+1{{'constant_id' attribute only applies to external global variables}} + [[vk::constant_id(6)]] + const bool sc6 = false; // error +} +#endif -- cgit v1.2.3 From d3441f7348203cc2a1d9c44fd24c1113954aa2b2 Mon Sep 17 00:00:00 2001 From: Paul Walker Date: Wed, 18 Jun 2025 11:45:20 +0100 Subject: [LV] Change getSmallBestKnownTC to return an ElementCount (NFC) (#141793) This is prep work for enabling better UF calculations when using vscale based VFs to vectorise loops with vscale based tripcounts. NOTE: NFC because All uses remain fixed-length until a following PR changes LoopVectorize's version of getSmallConstantTripCount(). --- llvm/lib/Transforms/Vectorize/LoopVectorize.cpp | 50 +++++++++++++++---------- 1 file changed, 30 insertions(+), 20 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 16d48b06dce4..2f4416d2782e 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -419,6 +419,13 @@ static bool hasIrregularType(Type *Ty, const DataLayout &DL) { return DL.getTypeAllocSizeInBits(Ty) != DL.getTypeSizeInBits(Ty); } +/// A version of ScalarEvolution::getSmallConstantTripCount that returns an +/// ElementCount to include loops whose trip count is a function of vscale. +static ElementCount getSmallConstantTripCount(ScalarEvolution *SE, + const Loop *L) { + return ElementCount::getFixed(SE->getSmallConstantTripCount(L)); +} + /// Returns "best known" trip count, which is either a valid positive trip count /// or std::nullopt when an estimate cannot be made (including when the trip /// count would overflow), for the specified loop \p L as defined by the @@ -427,24 +434,24 @@ static bool hasIrregularType(Type *Ty, const DataLayout &DL) { /// 2) Returns expected trip count according to profile data if any. /// 3) Returns upper bound estimate if known, and if \p CanUseConstantMax. /// 4) Returns std::nullopt if all of the above failed. -static std::optional +static std::optional getSmallBestKnownTC(PredicatedScalarEvolution &PSE, Loop *L, bool CanUseConstantMax = true) { // Check if exact trip count is known. - if (unsigned ExpectedTC = PSE.getSE()->getSmallConstantTripCount(L)) + if (auto ExpectedTC = getSmallConstantTripCount(PSE.getSE(), L)) return ExpectedTC; // Check if there is an expected trip count available from profile data. if (LoopVectorizeWithBlockFrequency) if (auto EstimatedTC = getLoopEstimatedTripCount(L)) - return *EstimatedTC; + return ElementCount::getFixed(*EstimatedTC); if (!CanUseConstantMax) return std::nullopt; // Check if upper bound estimate is known. if (unsigned ExpectedTC = PSE.getSmallConstantMaxTripCount()) - return ExpectedTC; + return ElementCount::getFixed(ExpectedTC); return std::nullopt; } @@ -1960,7 +1967,8 @@ public: // Get the best known TC estimate. if (auto EstimatedTC = getSmallBestKnownTC( PSE, OuterLoop, /* CanUseConstantMax = */ false)) - BestTripCount = *EstimatedTC; + if (EstimatedTC->isFixed()) + BestTripCount = EstimatedTC->getFixedValue(); InstructionCost NewMemCheckCost = MemCheckCost / BestTripCount; @@ -3750,12 +3758,12 @@ LoopVectorizationCostModel::computeMaxVF(ElementCount UserVF, unsigned UserIC) { } ScalarEvolution *SE = PSE.getSE(); - unsigned TC = SE->getSmallConstantTripCount(TheLoop); + ElementCount TC = getSmallConstantTripCount(SE, TheLoop); unsigned MaxTC = PSE.getSmallConstantMaxTripCount(); LLVM_DEBUG(dbgs() << "LV: Found trip count: " << TC << '\n'); - if (TC != MaxTC) + if (TC != ElementCount::getFixed(MaxTC)) LLVM_DEBUG(dbgs() << "LV: Found maximum trip count: " << MaxTC << '\n'); - if (TC == 1) { + if (TC.isScalar()) { reportVectorizationFailure("Single iteration (non) loop", "loop trip count is one, irrelevant for vectorization", "SingleIterationLoop", ORE, TheLoop); @@ -3869,7 +3877,9 @@ LoopVectorizationCostModel::computeMaxVF(ElementCount UserVF, unsigned UserIC) { } auto ExpectedTC = getSmallBestKnownTC(PSE, TheLoop); - if (ExpectedTC && ExpectedTC <= TTI.getMinTripCountTailFoldingThreshold()) { + if (ExpectedTC && ExpectedTC->isFixed() && + ExpectedTC->getFixedValue() <= + TTI.getMinTripCountTailFoldingThreshold()) { if (MaxPowerOf2RuntimeVF > 0u) { // If we have a low-trip-count, and the fixed-width VF is known to divide // the trip count but the scalable factor does not, use the fixed-width @@ -3927,7 +3937,7 @@ LoopVectorizationCostModel::computeMaxVF(ElementCount UserVF, unsigned UserIC) { return FixedScalableVFPair::getNone(); } - if (TC == 0) { + if (TC.isZero()) { reportVectorizationFailure( "unable to calculate the loop count due to complex control flow", "UnknownLoopCountComplexCFG", ORE, TheLoop); @@ -4816,13 +4826,13 @@ LoopVectorizationCostModel::selectInterleaveCount(VPlan &Plan, ElementCount VF, // At least one iteration must be scalar when this constraint holds. So the // maximum available iterations for interleaving is one less. unsigned AvailableTC = requiresScalarEpilogue(VF.isVector()) - ? (*BestKnownTC) - 1 - : *BestKnownTC; + ? BestKnownTC->getFixedValue() - 1 + : BestKnownTC->getFixedValue(); unsigned InterleaveCountLB = bit_floor(std::max( 1u, std::min(AvailableTC / (EstimatedVF * 2), MaxInterleaveCount))); - if (PSE.getSE()->getSmallConstantTripCount(TheLoop) > 0) { + if (getSmallConstantTripCount(PSE.getSE(), TheLoop).isNonZero()) { // If the best known trip count is exact, we select between two // prospective ICs, where // @@ -5182,8 +5192,8 @@ InstructionCost LoopVectorizationCostModel::expectedCost(ElementCount VF) { // costs of comparison and induction instructions, as they'll get simplified // away. SmallPtrSet ValuesToIgnoreForVF; - auto TC = PSE.getSE()->getSmallConstantTripCount(TheLoop); - if (VF.isFixed() && TC == VF.getFixedValue() && !foldTailByMasking()) + auto TC = getSmallConstantTripCount(PSE.getSE(), TheLoop); + if (TC == VF && !foldTailByMasking()) addFullyUnrolledInstructionsToIgnore(TheLoop, Legal->getInductionVars(), ValuesToIgnoreForVF); @@ -6878,8 +6888,8 @@ LoopVectorizationPlanner::precomputeCosts(VPlan &Plan, ElementCount VF, // simplified away. // TODO: Remove this code after stepping away from the legacy cost model and // adding code to simplify VPlans before calculating their costs. - auto TC = PSE.getSE()->getSmallConstantTripCount(OrigLoop); - if (VF.isFixed() && TC == VF.getFixedValue() && !CM.foldTailByMasking()) + auto TC = getSmallConstantTripCount(PSE.getSE(), OrigLoop); + if (TC == VF && !CM.foldTailByMasking()) addFullyUnrolledInstructionsToIgnore(OrigLoop, Legal->getInductionVars(), CostCtx.SkipCostComputation); @@ -9647,8 +9657,7 @@ static bool isOutsideLoopWorkProfitable(GeneratedRTChecks &Checks, // Skip vectorization if the expected trip count is less than the minimum // required trip count. if (auto ExpectedTC = getSmallBestKnownTC(PSE, L)) { - if (ElementCount::isKnownLT(ElementCount::getFixed(*ExpectedTC), - VF.MinProfitableTripCount)) { + if (ElementCount::isKnownLT(*ExpectedTC, VF.MinProfitableTripCount)) { LLVM_DEBUG(dbgs() << "LV: Vectorization is not beneficial: expected " "trip count < minimum profitable VF (" << *ExpectedTC << " < " << VF.MinProfitableTripCount @@ -10018,7 +10027,8 @@ bool LoopVectorizePass::processLoop(Loop *L) { // Check the loop for a trip count threshold: vectorize loops with a tiny trip // count by optimizing for size, to minimize overheads. auto ExpectedTC = getSmallBestKnownTC(PSE, L); - if (ExpectedTC && *ExpectedTC < TinyTripCountVectorThreshold) { + if (ExpectedTC && ExpectedTC->isFixed() && + ExpectedTC->getFixedValue() < TinyTripCountVectorThreshold) { LLVM_DEBUG(dbgs() << "LV: Found a loop with a very small trip count. " << "This loop is worth vectorizing only if no scalar " << "iteration overheads are incurred."); -- cgit v1.2.3 From b7ef5dbac91f9ccaf335ae4dd998e5783523f24e Mon Sep 17 00:00:00 2001 From: Paul Walker Date: Wed, 18 Jun 2025 11:53:27 +0100 Subject: [LLVM][ComplexDeinterleaving] Update splat identification to include vector ConstantInt/FP. (#144516) --- llvm/lib/CodeGen/ComplexDeinterleavingPass.cpp | 3 +++ llvm/test/CodeGen/AArch64/complex-deinterleaving-splat-scalable.ll | 1 + 2 files changed, 4 insertions(+) diff --git a/llvm/lib/CodeGen/ComplexDeinterleavingPass.cpp b/llvm/lib/CodeGen/ComplexDeinterleavingPass.cpp index ae12423d827d..8855740f0cc8 100644 --- a/llvm/lib/CodeGen/ComplexDeinterleavingPass.cpp +++ b/llvm/lib/CodeGen/ComplexDeinterleavingPass.cpp @@ -2005,6 +2005,9 @@ ComplexDeinterleavingGraph::identifySplat(Value *R, Value *I) { if (isa(V)) return true; + if (isa(V) || isa(V)) + return isa(V->getType()); + VectorType *VTy; ArrayRef Mask; // Splats are represented differently depending on whether the repeated diff --git a/llvm/test/CodeGen/AArch64/complex-deinterleaving-splat-scalable.ll b/llvm/test/CodeGen/AArch64/complex-deinterleaving-splat-scalable.ll index 661531361315..e7a00fc90e31 100644 --- a/llvm/test/CodeGen/AArch64/complex-deinterleaving-splat-scalable.ll +++ b/llvm/test/CodeGen/AArch64/complex-deinterleaving-splat-scalable.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s --mattr=+sve -o - | FileCheck %s +; RUN: llc -use-constant-int-for-scalable-splat -use-constant-fp-for-scalable-splat < %s --mattr=+sve -o - | FileCheck %s target triple = "aarch64" -- cgit v1.2.3 From b5967264b0fbfd502b3a7edec27409e966fb68be Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Wed, 18 Jun 2025 19:56:26 +0900 Subject: CodeGen: Move ABI option enums to support (#142912) Move these out of TargetOptions and into Support to avoid the dependency on Target. There are similar ABI options already in Support/CodeGen.h. --- llvm/include/llvm/Support/CodeGen.h | 16 ++++++++++++++++ llvm/include/llvm/Target/TargetOptions.h | 17 +---------------- 2 files changed, 17 insertions(+), 16 deletions(-) diff --git a/llvm/include/llvm/Support/CodeGen.h b/llvm/include/llvm/Support/CodeGen.h index 48745f7f4d2a..90733b50385a 100644 --- a/llvm/include/llvm/Support/CodeGen.h +++ b/llvm/include/llvm/Support/CodeGen.h @@ -50,6 +50,22 @@ namespace llvm { }; } + namespace FloatABI { + enum ABIType { + Default, // Target-specific (either soft or hard depending on triple, etc). + Soft, // Soft float. + Hard // Hard float. + }; + } + + enum class EABI { + Unknown, + Default, // Default means not specified + EABI4, // Target-specific (either 4, 5 or gnu depending on triple). + EABI5, + GNU + }; + /// Code generation optimization level. enum class CodeGenOptLevel { None = 0, ///< -O0 diff --git a/llvm/include/llvm/Target/TargetOptions.h b/llvm/include/llvm/Target/TargetOptions.h index a7c46921255b..f420798aa46f 100644 --- a/llvm/include/llvm/Target/TargetOptions.h +++ b/llvm/include/llvm/Target/TargetOptions.h @@ -16,6 +16,7 @@ #include "llvm/ADT/FloatingPointMode.h" #include "llvm/MC/MCTargetOptions.h" +#include "llvm/Support/CodeGen.h" #include "llvm/Support/Compiler.h" #include @@ -25,14 +26,6 @@ struct fltSemantics; class MachineFunction; class MemoryBuffer; -namespace FloatABI { -enum ABIType { - Default, // Target-specific (either soft or hard depending on triple, etc). - Soft, // Soft float. - Hard // Hard float. -}; -} - namespace FPOpFusion { enum FPOpFusionMode { Fast, // Enable fusion of FP ops wherever it's profitable. @@ -71,14 +64,6 @@ enum class BasicBlockSection { None // Do not use Basic Block Sections. }; -enum class EABI { - Unknown, - Default, // Default means not specified - EABI4, // Target-specific (either 4, 5 or gnu depending on triple). - EABI5, - GNU -}; - /// Identify a debugger for "tuning" the debug info. /// /// The "debugger tuning" concept allows us to present a more intuitive -- cgit v1.2.3 From 4aca3dc48b0919b81bd86302b141f29869266c45 Mon Sep 17 00:00:00 2001 From: Robert Imschweiler Date: Wed, 18 Jun 2025 13:04:24 +0200 Subject: Reland: [GlobalISel] prevent G_UNMERGE_VALUES for vectors with different elements (#144661) This commit prevents building a G_UNMERGE_VALUES instruction with different source and destination vector elements in `LegalizationArtifactCombiner::ArtifactValueFinder::tryCombineMergeLike()`, e.g.: `%1:_(<2 x s8>), %2:_(<2 x s8>) = G_UNMERGE_VALUES %0:_(<2 x s16>)` This LLVM defect was identified via the AMD Fuzzing project. --- .../GlobalISel/LegalizationArtifactCombiner.h | 5 ++- .../CodeGen/AMDGPU/GlobalISel/insertelement.ll | 44 ++++++++++++++++++++++ .../buffer-fat-pointers-contents-legalization.ll | 10 ++--- 3 files changed, 53 insertions(+), 6 deletions(-) diff --git a/llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h b/llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h index 22f6a5fde546..8f560c42082f 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h @@ -997,6 +997,7 @@ public: // Recognize UnmergeSrc that can be unmerged to DstTy directly. // Types have to be either both vector or both non-vector types. + // In case of vector types, the scalar elements need to match. // Merge-like opcodes are combined one at the time. First one creates new // unmerge, following should use the same unmerge (builder performs CSE). // @@ -1005,7 +1006,9 @@ public: // %AnotherDst:_(DstTy) = G_merge_like_opcode %2:_(EltTy), %3 // // %Dst:_(DstTy), %AnotherDst = G_UNMERGE_VALUES %UnmergeSrc - if ((DstTy.isVector() == UnmergeSrcTy.isVector()) && + if (((!DstTy.isVector() && !UnmergeSrcTy.isVector()) || + (DstTy.isVector() && UnmergeSrcTy.isVector() && + DstTy.getScalarType() == UnmergeSrcTy.getScalarType())) && (Elt0UnmergeIdx % NumMIElts == 0) && getCoverTy(UnmergeSrcTy, DstTy) == UnmergeSrcTy) { if (!isSequenceFromUnmerge(MI, 0, Unmerge, Elt0UnmergeIdx, NumMIElts, diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement.ll index 8134eb3ca2af..51d0b225b2a2 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement.ll @@ -6506,3 +6506,47 @@ entry: %insert = insertelement <5 x double> %vec, double %val, i32 %idx ret <5 x double> %insert } + +; Found by fuzzer, reduced with llvm-reduce. +define void @insert_very_small_from_very_large(<32 x i16> %L3, ptr %ptr) { +; GPRIDX-LABEL: insert_very_small_from_very_large: +; GPRIDX: ; %bb.0: ; %bb +; GPRIDX-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GPRIDX-NEXT: v_lshrrev_b32_e32 v0, 1, v0 +; GPRIDX-NEXT: v_and_b32_e32 v0, 1, v0 +; GPRIDX-NEXT: v_lshlrev_b16_e32 v0, 1, v0 +; GPRIDX-NEXT: v_and_b32_e32 v0, 3, v0 +; GPRIDX-NEXT: flat_store_byte v[16:17], v0 +; GPRIDX-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GPRIDX-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-LABEL: insert_very_small_from_very_large: +; GFX10: ; %bb.0: ; %bb +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: v_lshrrev_b32_e32 v0, 1, v0 +; GFX10-NEXT: v_and_b32_e32 v0, 1, v0 +; GFX10-NEXT: v_lshlrev_b16 v0, 1, v0 +; GFX10-NEXT: v_and_b32_e32 v0, 3, v0 +; GFX10-NEXT: flat_store_byte v[16:17], v0 +; GFX10-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: insert_very_small_from_very_large: +; GFX11: ; %bb.0: ; %bb +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: v_lshrrev_b16 v0.l, 1, v0.l +; GFX11-NEXT: v_and_b16 v0.l, v0.l, 1 +; GFX11-NEXT: v_lshlrev_b16 v0.l, 1, v0.l +; GFX11-NEXT: v_and_b32_e32 v0, 3, v0 +; GFX11-NEXT: flat_store_b8 v[16:17], v0 +; GFX11-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-NEXT: s_setpc_b64 s[30:31] +bb: + %a = bitcast <32 x i16> %L3 to i512 + %b = trunc i512 %a to i8 + %c = trunc i8 %b to i2 + %d = bitcast i2 %c to <2 x i1> + %insert = insertelement <2 x i1> %d, i1 false, i32 0 + store <2 x i1> %insert, ptr %ptr, align 1 + ret void +} diff --git a/llvm/test/CodeGen/AMDGPU/buffer-fat-pointers-contents-legalization.ll b/llvm/test/CodeGen/AMDGPU/buffer-fat-pointers-contents-legalization.ll index fdc1dd6cce8e..53b2542cf9a7 100644 --- a/llvm/test/CodeGen/AMDGPU/buffer-fat-pointers-contents-legalization.ll +++ b/llvm/test/CodeGen/AMDGPU/buffer-fat-pointers-contents-legalization.ll @@ -2166,14 +2166,14 @@ define <6 x i8> @load_v6i8(ptr addrspace(8) inreg %buf) { ; GISEL-LABEL: load_v6i8: ; GISEL: ; %bb.0: ; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GISEL-NEXT: buffer_load_ushort v4, off, s[16:19], 0 offset:4 ; GISEL-NEXT: buffer_load_dword v0, off, s[16:19], 0 +; GISEL-NEXT: buffer_load_ushort v4, off, s[16:19], 0 offset:4 ; GISEL-NEXT: s_waitcnt vmcnt(1) -; GISEL-NEXT: v_lshrrev_b32_e32 v5, 8, v4 -; GISEL-NEXT: s_waitcnt vmcnt(0) ; GISEL-NEXT: v_lshrrev_b32_e32 v1, 8, v0 ; GISEL-NEXT: v_lshrrev_b32_e32 v2, 16, v0 ; GISEL-NEXT: v_lshrrev_b32_e32 v3, 24, v0 +; GISEL-NEXT: s_waitcnt vmcnt(0) +; GISEL-NEXT: v_lshrrev_b32_e32 v5, 8, v4 ; GISEL-NEXT: s_setpc_b64 s[30:31] %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7) %ret = load <6 x i8>, ptr addrspace(7) %p @@ -3630,10 +3630,10 @@ define <6 x i8> @volatile_load_v6i8(ptr addrspace(8) inreg %buf) { ; GISEL-NEXT: buffer_load_ushort v4, off, s[16:19], 0 offset:4 glc ; GISEL-NEXT: s_waitcnt vmcnt(1) ; GISEL-NEXT: v_lshrrev_b32_e32 v1, 8, v0 -; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: v_lshrrev_b32_e32 v5, 8, v4 ; GISEL-NEXT: v_lshrrev_b32_e32 v2, 16, v0 ; GISEL-NEXT: v_lshrrev_b32_e32 v3, 24, v0 +; GISEL-NEXT: s_waitcnt vmcnt(0) +; GISEL-NEXT: v_lshrrev_b32_e32 v5, 8, v4 ; GISEL-NEXT: s_setpc_b64 s[30:31] %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7) %ret = load volatile <6 x i8>, ptr addrspace(7) %p -- cgit v1.2.3 From c3efe7d64cebcd8679bec3ba7ff8154f8b0a1fa4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20Storsj=C3=B6?= Date: Wed, 18 Jun 2025 14:12:14 +0300 Subject: [libcxx] [test] Fix odr_signature tests with optimizations enabled (#144317) If optimization is enabled, the inline `f()` function actually gets inlined, meaning that the functions `tu1()` and `tu2()` trivially return 1 and 2, instead of actually referencing the potentially linker deduplicated function `f()`, which is what the test tries to test. Therefore, this test previously actually failed to test what it was supposed to test, if optimization was enabled. Mark the inline functions with `TEST_NOINLINE` to make sure that they don't get inlined even with optimizations enabled. Also update the TODO comments to explain why we have an XFAIL for msvc mode here. This avoids these tests unexpectedly passing if building in msvc mode, with optimizations enabled (`-DLIBCXX_TEST_PARAMS="optimization=speed"`). --- libcxx/test/libcxx/odr_signature.exceptions.sh.cpp | 10 +++++++--- libcxx/test/libcxx/odr_signature.hardening.sh.cpp | 14 +++++++++----- 2 files changed, 16 insertions(+), 8 deletions(-) diff --git a/libcxx/test/libcxx/odr_signature.exceptions.sh.cpp b/libcxx/test/libcxx/odr_signature.exceptions.sh.cpp index 6bf60b5e82d3..c0ba48eb245d 100644 --- a/libcxx/test/libcxx/odr_signature.exceptions.sh.cpp +++ b/libcxx/test/libcxx/odr_signature.exceptions.sh.cpp @@ -6,9 +6,11 @@ // //===----------------------------------------------------------------------===// -// TODO: Investigate +// ABI tags have no effect in MSVC mode. // XFAIL: msvc +// XFAIL: FROZEN-CXX03-HEADERS-FIXME + // Test that we encode whether exceptions are supported in an ABI tag to avoid // ODR violations when linking TUs that have different values for it. @@ -18,17 +20,19 @@ // RUN: %{cxx} %t.tu1.o %t.tu2.o %t.main.o %{flags} %{link_flags} -o %t.exe // RUN: %{exec} %t.exe +#include "test_macros.h" + // -fno-exceptions #ifdef TU1 # include <__config> -_LIBCPP_HIDE_FROM_ABI inline int f() { return 1; } +_LIBCPP_HIDE_FROM_ABI TEST_NOINLINE inline int f() { return 1; } int tu1() { return f(); } #endif // TU1 // -fexceptions #ifdef TU2 # include <__config> -_LIBCPP_HIDE_FROM_ABI inline int f() { return 2; } +_LIBCPP_HIDE_FROM_ABI TEST_NOINLINE inline int f() { return 2; } int tu2() { return f(); } #endif // TU2 diff --git a/libcxx/test/libcxx/odr_signature.hardening.sh.cpp b/libcxx/test/libcxx/odr_signature.hardening.sh.cpp index 0dc280bf2818..8daf3f3fd046 100644 --- a/libcxx/test/libcxx/odr_signature.hardening.sh.cpp +++ b/libcxx/test/libcxx/odr_signature.hardening.sh.cpp @@ -6,9 +6,11 @@ // //===----------------------------------------------------------------------===// -// TODO: Investigate +// ABI tags have no effect in MSVC mode. // XFAIL: msvc +// XFAIL: FROZEN-CXX03-HEADERS-FIXME + // Test that we encode the hardening mode in an ABI tag to avoid ODR violations // when linking TUs that have different values for it. @@ -21,31 +23,33 @@ // RUN: %{cxx} %t.tu1.o %t.tu2.o %t.tu3.o %t.tu4.o %t.main.o %{flags} %{link_flags} -o %t.exe // RUN: %{exec} %t.exe +#include "test_macros.h" + // fast hardening mode #ifdef TU1 # include <__config> -_LIBCPP_HIDE_FROM_ABI inline int f() { return 1; } +_LIBCPP_HIDE_FROM_ABI TEST_NOINLINE inline int f() { return 1; } int tu1() { return f(); } #endif // TU1 // extensive hardening mode #ifdef TU2 # include <__config> -_LIBCPP_HIDE_FROM_ABI inline int f() { return 2; } +_LIBCPP_HIDE_FROM_ABI TEST_NOINLINE inline int f() { return 2; } int tu2() { return f(); } #endif // TU2 // debug hardening mode #ifdef TU3 # include <__config> -_LIBCPP_HIDE_FROM_ABI inline int f() { return 3; } +_LIBCPP_HIDE_FROM_ABI TEST_NOINLINE inline int f() { return 3; } int tu3() { return f(); } #endif // TU3 // No hardening #ifdef TU4 # include <__config> -_LIBCPP_HIDE_FROM_ABI inline int f() { return 4; } +_LIBCPP_HIDE_FROM_ABI TEST_NOINLINE inline int f() { return 4; } int tu4() { return f(); } #endif // TU4 -- cgit v1.2.3 From 66d6964a55014e7fabb7c80fbba19d2145262b6b Mon Sep 17 00:00:00 2001 From: Garvit Gupta Date: Wed, 18 Jun 2025 16:50:48 +0530 Subject: Fix tests failing on fuchsia clang x86_64 builders (#144655) Fuchsia sets CLANG_DEFAULT_UNWINDLIB to libunwind. As a result, when rtlib is set to libgcc and unwindlib is not explicitly specified, tests using Fuchsia as the default platform will fail. To address this, the affected tests are now xfailed This change fixes the following tests introduced in https://github.com/llvm/llvm-project/commit/45ea46c44636094e9fcdbbeabfd11f9d0fad5e38: clang/test/Driver/aarch64-toolchain-extra.c clang/test/Driver/arm-toolchain-extra.c clang/test/Driver/aarch64-toolchain.c clang/test/Driver/arm-toolchain.c --- clang/test/Driver/aarch64-toolchain-extra.c | 1 + clang/test/Driver/aarch64-toolchain.c | 1 + clang/test/Driver/arm-toolchain-extra.c | 1 + clang/test/Driver/arm-toolchain.c | 1 + 4 files changed, 4 insertions(+) diff --git a/clang/test/Driver/aarch64-toolchain-extra.c b/clang/test/Driver/aarch64-toolchain-extra.c index 2610e962bd69..eb8c741ae1ad 100644 --- a/clang/test/Driver/aarch64-toolchain-extra.c +++ b/clang/test/Driver/aarch64-toolchain-extra.c @@ -3,6 +3,7 @@ // The tests here are similar to those in aarch64-toolchain.c, however // these tests need to create symlinks to test directory trees in order to // set up the environment and therefore shell support is required. +// XFAIL: target={{.*}}-fuchsia{{.*}} // REQUIRES: shell // UNSUPPORTED: system-windows diff --git a/clang/test/Driver/aarch64-toolchain.c b/clang/test/Driver/aarch64-toolchain.c index 7f2c01d928e4..74841eec598b 100644 --- a/clang/test/Driver/aarch64-toolchain.c +++ b/clang/test/Driver/aarch64-toolchain.c @@ -1,3 +1,4 @@ +// XFAIL: target={{.*}}-fuchsia{{.*}} // UNSUPPORTED: system-windows // RUN: %clang -### %s -fuse-ld= \ diff --git a/clang/test/Driver/arm-toolchain-extra.c b/clang/test/Driver/arm-toolchain-extra.c index 114de0a8154a..67206818f211 100644 --- a/clang/test/Driver/arm-toolchain-extra.c +++ b/clang/test/Driver/arm-toolchain-extra.c @@ -3,6 +3,7 @@ // The tests here are similar to those in arm-toolchain.c, however // these tests need to create symlinks to test directory trees in order to // set up the environment and therefore shell support is required. +// XFAIL: target={{.*}}-fuchsia{{.*}} // REQUIRES: shell // UNSUPPORTED: system-windows diff --git a/clang/test/Driver/arm-toolchain.c b/clang/test/Driver/arm-toolchain.c index 2e38461fb7a3..56a0e0de7ba7 100644 --- a/clang/test/Driver/arm-toolchain.c +++ b/clang/test/Driver/arm-toolchain.c @@ -1,3 +1,4 @@ +// XFAIL: target={{.*}}-fuchsia{{.*}} // UNSUPPORTED: system-windows // RUN: %clang -### %s -fuse-ld= \ -- cgit v1.2.3 From 8a469da8b2342dd9104faf25deeddd8ad66ca6a6 Mon Sep 17 00:00:00 2001 From: "Oleksandr \"Alex\" Zinenko" Date: Wed, 18 Jun 2025 13:32:46 +0200 Subject: [mlir] remove unnecessary atomic_rmw expansions (#144515) The expansion of `memref.atomic_rmw` into a `memref.generic_atomic_rmw` for floating-point min/max operations is no longer necessary as those are now supported by the LLVM dialect and LLVM IR. Furthermore, combining this expansion with direct lowering of `generic_atomic_rmw` could leads to invalid LLVM dialect IR with `cmpxchg` operating on floating-point values that it does not support. --- mlir/lib/Dialect/MemRef/Transforms/ExpandOps.cpp | 56 +----------------------- mlir/test/Dialect/MemRef/expand-ops.mlir | 38 ++-------------- 2 files changed, 5 insertions(+), 89 deletions(-) diff --git a/mlir/lib/Dialect/MemRef/Transforms/ExpandOps.cpp b/mlir/lib/Dialect/MemRef/Transforms/ExpandOps.cpp index 020aabd9db6d..a617029ce470 100644 --- a/mlir/lib/Dialect/MemRef/Transforms/ExpandOps.cpp +++ b/mlir/lib/Dialect/MemRef/Transforms/ExpandOps.cpp @@ -1,16 +1,10 @@ -//===- StdExpandDivs.cpp - Code to prepare Std for lowering Divs to LLVM -===// +//===- ExpandDivs.cpp - Expansion patterns for MemRef operations ----------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// -// -// This file Std transformations to expand Divs operation to help for the -// lowering to LLVM. Currently implemented transformations are Ceil and Floor -// for Signed Integers. -// -//===----------------------------------------------------------------------===// #include "mlir/Dialect/MemRef/Transforms/Passes.h" @@ -33,44 +27,6 @@ using namespace mlir; namespace { -/// Converts `atomic_rmw` that cannot be lowered to a simple atomic op with -/// AtomicRMWOpLowering pattern, such as minimum and maximum operations for -/// floating-point numbers, to `memref.generic_atomic_rmw` with the expanded -/// code. -/// -/// %x = atomic_rmw maximumf %fval, %F[%i] : (f32, memref<10xf32>) -> f32 -/// -/// will be lowered to -/// -/// %x = memref.generic_atomic_rmw %F[%i] : memref<10xf32> { -/// ^bb0(%current: f32): -/// %1 = arith.maximumf %current, %fval : f32 -/// memref.atomic_yield %1 : f32 -/// } -struct AtomicRMWOpConverter : public OpRewritePattern { -public: - using OpRewritePattern::OpRewritePattern; - - LogicalResult matchAndRewrite(memref::AtomicRMWOp op, - PatternRewriter &rewriter) const final { - auto loc = op.getLoc(); - auto genericOp = rewriter.create( - loc, op.getMemref(), op.getIndices()); - OpBuilder bodyBuilder = - OpBuilder::atBlockEnd(genericOp.getBody(), rewriter.getListener()); - - Value lhs = genericOp.getCurrentValue(); - Value rhs = op.getValue(); - - Value arithOp = - mlir::arith::getReductionOp(op.getKind(), bodyBuilder, loc, lhs, rhs); - bodyBuilder.create(loc, arithOp); - - rewriter.replaceOp(op, genericOp.getResult()); - return success(); - } -}; - /// Converts `memref.reshape` that has a target shape of a statically-known /// size to `memref.reinterpret_cast`. struct MemRefReshapeOpConverter : public OpRewritePattern { @@ -139,13 +95,6 @@ struct ExpandOpsPass : public memref::impl::ExpandOpsPassBase { ConversionTarget target(ctx); target.addLegalDialect(); - target.addDynamicallyLegalOp( - [](memref::AtomicRMWOp op) { - constexpr std::array shouldBeExpandedKinds = { - arith::AtomicRMWKind::maximumf, arith::AtomicRMWKind::minimumf, - arith::AtomicRMWKind::minnumf, arith::AtomicRMWKind::maxnumf}; - return !llvm::is_contained(shouldBeExpandedKinds, op.getKind()); - }); target.addDynamicallyLegalOp([](memref::ReshapeOp op) { return !cast(op.getShape().getType()).hasStaticShape(); }); @@ -158,6 +107,5 @@ struct ExpandOpsPass : public memref::impl::ExpandOpsPassBase { } // namespace void mlir::memref::populateExpandOpsPatterns(RewritePatternSet &patterns) { - patterns.add( - patterns.getContext()); + patterns.add(patterns.getContext()); } diff --git a/mlir/test/Dialect/MemRef/expand-ops.mlir b/mlir/test/Dialect/MemRef/expand-ops.mlir index 65932b5814a6..fc8db546d918 100644 --- a/mlir/test/Dialect/MemRef/expand-ops.mlir +++ b/mlir/test/Dialect/MemRef/expand-ops.mlir @@ -1,42 +1,10 @@ // RUN: mlir-opt -memref-expand %s -split-input-file | FileCheck %s -// CHECK-LABEL: func @atomic_rmw_to_generic -// CHECK-SAME: ([[F:%.*]]: memref<10xf32>, [[f:%.*]]: f32, [[i:%.*]]: index) -func.func @atomic_rmw_to_generic(%F: memref<10xf32>, %f: f32, %i: index) -> f32 { - %a = memref.atomic_rmw maximumf %f, %F[%i] : (f32, memref<10xf32>) -> f32 - %b = memref.atomic_rmw minimumf %f, %F[%i] : (f32, memref<10xf32>) -> f32 - %c = memref.atomic_rmw maxnumf %f, %F[%i] : (f32, memref<10xf32>) -> f32 - %d = memref.atomic_rmw minnumf %f, %F[%i] : (f32, memref<10xf32>) -> f32 - return %a : f32 -} -// CHECK: [[RESULT:%.*]] = memref.generic_atomic_rmw %arg0[%arg2] : memref<10xf32> { -// CHECK: ^bb0([[CUR_VAL:%.*]]: f32): -// CHECK: [[MAXIMUM:%.*]] = arith.maximumf [[CUR_VAL]], [[f]] : f32 -// CHECK: memref.atomic_yield [[MAXIMUM]] : f32 -// CHECK: } -// CHECK: memref.generic_atomic_rmw %arg0[%arg2] : memref<10xf32> { -// CHECK: ^bb0([[CUR_VAL:%.*]]: f32): -// CHECK: [[MINIMUM:%.*]] = arith.minimumf [[CUR_VAL]], [[f]] : f32 -// CHECK: memref.atomic_yield [[MINIMUM]] : f32 -// CHECK: } -// CHECK: memref.generic_atomic_rmw %arg0[%arg2] : memref<10xf32> { -// CHECK: ^bb0([[CUR_VAL:%.*]]: f32): -// CHECK: [[MAXNUM:%.*]] = arith.maxnumf [[CUR_VAL]], [[f]] : f32 -// CHECK: memref.atomic_yield [[MAXNUM]] : f32 -// CHECK: } -// CHECK: memref.generic_atomic_rmw %arg0[%arg2] : memref<10xf32> { -// CHECK: ^bb0([[CUR_VAL:%.*]]: f32): -// CHECK: [[MINNUM:%.*]] = arith.minnumf [[CUR_VAL]], [[f]] : f32 -// CHECK: memref.atomic_yield [[MINNUM]] : f32 -// CHECK: } -// CHECK: return [[RESULT]] : f32 - -// ----- - // CHECK-LABEL: func @atomic_rmw_no_conversion -func.func @atomic_rmw_no_conversion(%F: memref<10xf32>, %f: f32, %i: index) -> f32 { +func.func @atomic_rmw_no_conversion(%F: memref<10xf32>, %f: f32, %i: index) -> (f32, f32) { %x = memref.atomic_rmw addf %f, %F[%i] : (f32, memref<10xf32>) -> f32 - return %x : f32 + %y = memref.atomic_rmw maximumf %f, %F[%i] : (f32, memref<10xf32>) -> f32 + return %x, %y : f32, f32 } // CHECK-NOT: generic_atomic_rmw -- cgit v1.2.3 From d8e8ab79773f739c602c5869f80c6c5b5962c558 Mon Sep 17 00:00:00 2001 From: Benjamin Maxwell Date: Wed, 18 Jun 2025 12:58:17 +0100 Subject: [AArch64][SME] Fix restoring callee-saves from FP with hazard padding (#143371) Currently, when hazard-padding is enabled a (fixed-size) hazard slot is placed in the CS area, just after the frame record. The size of this slot is part of the "CalleeSaveBaseToFrameRecordOffset". The SVE epilogue emission code assumed this offset was always zero, and incorrectly setting the stack pointer, resulting in all SVE registers being reloaded from incorrect offsets. ``` | prev_lr | | prev_fp | | (a.k.a. "frame record") | |-----------------------------------| <- fp(=x29) | | |-----------------------------------| <- callee-saved base | | | callee-saved fp/simd/SVE regs | | | |-----------------------------------| <- SVE callee-save base ``` i.e. in the above diagram, the code assumed `fp == callee-saved base`. --- llvm/lib/Target/AArch64/AArch64FrameLowering.cpp | 37 +- llvm/test/CodeGen/AArch64/stack-hazard.ll | 1173 ++++++++++++++++++++++ 2 files changed, 1198 insertions(+), 12 deletions(-) diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp index 7ffe779f2408..a71668e71c23 100644 --- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp @@ -2578,20 +2578,33 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF, DeallocateAfter, TII, MachineInstr::FrameDestroy, false, NeedsWinCFI, &HasWinCFI); } else if (SVEStackSize) { - // If we have stack realignment or variable sized objects on the stack, - // restore the stack pointer from the frame pointer prior to SVE CSR - // restoration. - if (AFI->isStackRealigned() || MFI.hasVarSizedObjects()) { - if (int64_t CalleeSavedSize = AFI->getSVECalleeSavedStackSize()) { - // Set SP to start of SVE callee-save area from which they can - // be reloaded. The code below will deallocate the stack space - // space by moving FP -> SP. - emitFrameOffset(MBB, RestoreBegin, DL, AArch64::SP, AArch64::FP, - StackOffset::getScalable(-CalleeSavedSize), TII, + int64_t SVECalleeSavedSize = AFI->getSVECalleeSavedStackSize(); + // If we have stack realignment or variable-sized objects we must use the + // FP to restore SVE callee saves (as there is an unknown amount of + // data/padding between the SP and SVE CS area). + Register BaseForSVEDealloc = + (AFI->isStackRealigned() || MFI.hasVarSizedObjects()) ? AArch64::FP + : AArch64::SP; + if (SVECalleeSavedSize && BaseForSVEDealloc == AArch64::FP) { + Register CalleeSaveBase = AArch64::FP; + if (int64_t CalleeSaveBaseOffset = + AFI->getCalleeSaveBaseToFrameRecordOffset()) { + // If we have have an non-zero offset to the non-SVE CS base we need to + // compute the base address by subtracting the offest in a temporary + // register first (to avoid briefly deallocating the SVE CS). + CalleeSaveBase = MBB.getParent()->getRegInfo().createVirtualRegister( + &AArch64::GPR64RegClass); + emitFrameOffset(MBB, RestoreBegin, DL, CalleeSaveBase, AArch64::FP, + StackOffset::getFixed(-CalleeSaveBaseOffset), TII, MachineInstr::FrameDestroy); } - } else { - if (AFI->getSVECalleeSavedStackSize()) { + // The code below will deallocate the stack space space by moving the + // SP to the start of the SVE callee-save area. + emitFrameOffset(MBB, RestoreBegin, DL, AArch64::SP, CalleeSaveBase, + StackOffset::getScalable(-SVECalleeSavedSize), TII, + MachineInstr::FrameDestroy); + } else if (BaseForSVEDealloc == AArch64::SP) { + if (SVECalleeSavedSize) { // Deallocate the non-SVE locals first before we can deallocate (and // restore callee saves) from the SVE area. emitFrameOffset( diff --git a/llvm/test/CodeGen/AArch64/stack-hazard.ll b/llvm/test/CodeGen/AArch64/stack-hazard.ll index e169b199733b..3a3340520013 100644 --- a/llvm/test/CodeGen/AArch64/stack-hazard.ll +++ b/llvm/test/CodeGen/AArch64/stack-hazard.ll @@ -3143,3 +3143,1176 @@ entry: call void @bar(ptr noundef nonnull %b) ret i32 0 } + + +define i32 @svecc_call_dynamic_alloca(<4 x i16> %P0, i32 %P1, i32 %P2, %P3, i16 %P4) "aarch64_pstate_sm_compatible" { +; CHECK0-LABEL: svecc_call_dynamic_alloca: +; CHECK0: // %bb.0: // %entry +; CHECK0-NEXT: stp x29, x30, [sp, #-64]! // 16-byte Folded Spill +; CHECK0-NEXT: .cfi_def_cfa_offset 64 +; CHECK0-NEXT: cntd x9 +; CHECK0-NEXT: stp x27, x26, [sp, #32] // 16-byte Folded Spill +; CHECK0-NEXT: stp x9, x28, [sp, #16] // 16-byte Folded Spill +; CHECK0-NEXT: stp x20, x19, [sp, #48] // 16-byte Folded Spill +; CHECK0-NEXT: mov x29, sp +; CHECK0-NEXT: .cfi_def_cfa w29, 64 +; CHECK0-NEXT: .cfi_offset w19, -8 +; CHECK0-NEXT: .cfi_offset w20, -16 +; CHECK0-NEXT: .cfi_offset w26, -24 +; CHECK0-NEXT: .cfi_offset w27, -32 +; CHECK0-NEXT: .cfi_offset w28, -40 +; CHECK0-NEXT: .cfi_offset w30, -56 +; CHECK0-NEXT: .cfi_offset w29, -64 +; CHECK0-NEXT: addvl sp, sp, #-18 +; CHECK0-NEXT: str p15, [sp, #4, mul vl] // 2-byte Folded Spill +; CHECK0-NEXT: str p14, [sp, #5, mul vl] // 2-byte Folded Spill +; CHECK0-NEXT: str p13, [sp, #6, mul vl] // 2-byte Folded Spill +; CHECK0-NEXT: str p12, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK0-NEXT: str p11, [sp, #8, mul vl] // 2-byte Folded Spill +; CHECK0-NEXT: str p10, [sp, #9, mul vl] // 2-byte Folded Spill +; CHECK0-NEXT: str p9, [sp, #10, mul vl] // 2-byte Folded Spill +; CHECK0-NEXT: str p8, [sp, #11, mul vl] // 2-byte Folded Spill +; CHECK0-NEXT: str p7, [sp, #12, mul vl] // 2-byte Folded Spill +; CHECK0-NEXT: str p6, [sp, #13, mul vl] // 2-byte Folded Spill +; CHECK0-NEXT: str p5, [sp, #14, mul vl] // 2-byte Folded Spill +; CHECK0-NEXT: str p4, [sp, #15, mul vl] // 2-byte Folded Spill +; CHECK0-NEXT: str z23, [sp, #2, mul vl] // 16-byte Folded Spill +; CHECK0-NEXT: str z22, [sp, #3, mul vl] // 16-byte Folded Spill +; CHECK0-NEXT: str z21, [sp, #4, mul vl] // 16-byte Folded Spill +; CHECK0-NEXT: str z20, [sp, #5, mul vl] // 16-byte Folded Spill +; CHECK0-NEXT: str z19, [sp, #6, mul vl] // 16-byte Folded Spill +; CHECK0-NEXT: str z18, [sp, #7, mul vl] // 16-byte Folded Spill +; CHECK0-NEXT: str z17, [sp, #8, mul vl] // 16-byte Folded Spill +; CHECK0-NEXT: str z16, [sp, #9, mul vl] // 16-byte Folded Spill +; CHECK0-NEXT: str z15, [sp, #10, mul vl] // 16-byte Folded Spill +; CHECK0-NEXT: str z14, [sp, #11, mul vl] // 16-byte Folded Spill +; CHECK0-NEXT: str z13, [sp, #12, mul vl] // 16-byte Folded Spill +; CHECK0-NEXT: str z12, [sp, #13, mul vl] // 16-byte Folded Spill +; CHECK0-NEXT: str z11, [sp, #14, mul vl] // 16-byte Folded Spill +; CHECK0-NEXT: str z10, [sp, #15, mul vl] // 16-byte Folded Spill +; CHECK0-NEXT: str z9, [sp, #16, mul vl] // 16-byte Folded Spill +; CHECK0-NEXT: str z8, [sp, #17, mul vl] // 16-byte Folded Spill +; CHECK0-NEXT: .cfi_escape 0x10, 0x48, 0x0a, 0x11, 0x40, 0x22, 0x11, 0x78, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d8 @ cfa - 64 - 8 * VG +; CHECK0-NEXT: .cfi_escape 0x10, 0x49, 0x0a, 0x11, 0x40, 0x22, 0x11, 0x70, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d9 @ cfa - 64 - 16 * VG +; CHECK0-NEXT: .cfi_escape 0x10, 0x4a, 0x0a, 0x11, 0x40, 0x22, 0x11, 0x68, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d10 @ cfa - 64 - 24 * VG +; CHECK0-NEXT: .cfi_escape 0x10, 0x4b, 0x0a, 0x11, 0x40, 0x22, 0x11, 0x60, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d11 @ cfa - 64 - 32 * VG +; CHECK0-NEXT: .cfi_escape 0x10, 0x4c, 0x0a, 0x11, 0x40, 0x22, 0x11, 0x58, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d12 @ cfa - 64 - 40 * VG +; CHECK0-NEXT: .cfi_escape 0x10, 0x4d, 0x0a, 0x11, 0x40, 0x22, 0x11, 0x50, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d13 @ cfa - 64 - 48 * VG +; CHECK0-NEXT: .cfi_escape 0x10, 0x4e, 0x0a, 0x11, 0x40, 0x22, 0x11, 0x48, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d14 @ cfa - 64 - 56 * VG +; CHECK0-NEXT: .cfi_escape 0x10, 0x4f, 0x0a, 0x11, 0x40, 0x22, 0x11, 0x40, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d15 @ cfa - 64 - 64 * VG +; CHECK0-NEXT: mov w9, w0 +; CHECK0-NEXT: mov x8, sp +; CHECK0-NEXT: mov w2, w1 +; CHECK0-NEXT: add x9, x9, #15 +; CHECK0-NEXT: mov x19, sp +; CHECK0-NEXT: and x9, x9, #0x1fffffff0 +; CHECK0-NEXT: sub x8, x8, x9 +; CHECK0-NEXT: mov sp, x8 +; CHECK0-NEXT: //APP +; CHECK0-NEXT: //NO_APP +; CHECK0-NEXT: bl __arm_sme_state +; CHECK0-NEXT: and x20, x0, #0x1 +; CHECK0-NEXT: .cfi_offset vg, -48 +; CHECK0-NEXT: tbz w20, #0, .LBB35_2 +; CHECK0-NEXT: // %bb.1: // %entry +; CHECK0-NEXT: smstop sm +; CHECK0-NEXT: .LBB35_2: // %entry +; CHECK0-NEXT: mov x0, x8 +; CHECK0-NEXT: mov w1, #45 // =0x2d +; CHECK0-NEXT: bl memset +; CHECK0-NEXT: tbz w20, #0, .LBB35_4 +; CHECK0-NEXT: // %bb.3: // %entry +; CHECK0-NEXT: smstart sm +; CHECK0-NEXT: .LBB35_4: // %entry +; CHECK0-NEXT: mov w0, #22647 // =0x5877 +; CHECK0-NEXT: movk w0, #59491, lsl #16 +; CHECK0-NEXT: .cfi_restore vg +; CHECK0-NEXT: addvl sp, x29, #-18 +; CHECK0-NEXT: ldr z23, [sp, #2, mul vl] // 16-byte Folded Reload +; CHECK0-NEXT: ldr z22, [sp, #3, mul vl] // 16-byte Folded Reload +; CHECK0-NEXT: ldr z21, [sp, #4, mul vl] // 16-byte Folded Reload +; CHECK0-NEXT: ldr z20, [sp, #5, mul vl] // 16-byte Folded Reload +; CHECK0-NEXT: ldr z19, [sp, #6, mul vl] // 16-byte Folded Reload +; CHECK0-NEXT: ldr z18, [sp, #7, mul vl] // 16-byte Folded Reload +; CHECK0-NEXT: ldr z17, [sp, #8, mul vl] // 16-byte Folded Reload +; CHECK0-NEXT: ldr z16, [sp, #9, mul vl] // 16-byte Folded Reload +; CHECK0-NEXT: ldr z15, [sp, #10, mul vl] // 16-byte Folded Reload +; CHECK0-NEXT: ldr z14, [sp, #11, mul vl] // 16-byte Folded Reload +; CHECK0-NEXT: ldr z13, [sp, #12, mul vl] // 16-byte Folded Reload +; CHECK0-NEXT: ldr z12, [sp, #13, mul vl] // 16-byte Folded Reload +; CHECK0-NEXT: ldr z11, [sp, #14, mul vl] // 16-byte Folded Reload +; CHECK0-NEXT: ldr z10, [sp, #15, mul vl] // 16-byte Folded Reload +; CHECK0-NEXT: ldr z9, [sp, #16, mul vl] // 16-byte Folded Reload +; CHECK0-NEXT: ldr z8, [sp, #17, mul vl] // 16-byte Folded Reload +; CHECK0-NEXT: ldr p15, [sp, #4, mul vl] // 2-byte Folded Reload +; CHECK0-NEXT: ldr p14, [sp, #5, mul vl] // 2-byte Folded Reload +; CHECK0-NEXT: ldr p13, [sp, #6, mul vl] // 2-byte Folded Reload +; CHECK0-NEXT: ldr p12, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK0-NEXT: ldr p11, [sp, #8, mul vl] // 2-byte Folded Reload +; CHECK0-NEXT: ldr p10, [sp, #9, mul vl] // 2-byte Folded Reload +; CHECK0-NEXT: ldr p9, [sp, #10, mul vl] // 2-byte Folded Reload +; CHECK0-NEXT: ldr p8, [sp, #11, mul vl] // 2-byte Folded Reload +; CHECK0-NEXT: ldr p7, [sp, #12, mul vl] // 2-byte Folded Reload +; CHECK0-NEXT: ldr p6, [sp, #13, mul vl] // 2-byte Folded Reload +; CHECK0-NEXT: ldr p5, [sp, #14, mul vl] // 2-byte Folded Reload +; CHECK0-NEXT: ldr p4, [sp, #15, mul vl] // 2-byte Folded Reload +; CHECK0-NEXT: .cfi_restore z8 +; CHECK0-NEXT: .cfi_restore z9 +; CHECK0-NEXT: .cfi_restore z10 +; CHECK0-NEXT: .cfi_restore z11 +; CHECK0-NEXT: .cfi_restore z12 +; CHECK0-NEXT: .cfi_restore z13 +; CHECK0-NEXT: .cfi_restore z14 +; CHECK0-NEXT: .cfi_restore z15 +; CHECK0-NEXT: mov sp, x29 +; CHECK0-NEXT: .cfi_def_cfa wsp, 64 +; CHECK0-NEXT: ldp x20, x19, [sp, #48] // 16-byte Folded Reload +; CHECK0-NEXT: ldr x28, [sp, #24] // 8-byte Folded Reload +; CHECK0-NEXT: ldp x27, x26, [sp, #32] // 16-byte Folded Reload +; CHECK0-NEXT: ldp x29, x30, [sp], #64 // 16-byte Folded Reload +; CHECK0-NEXT: .cfi_def_cfa_offset 0 +; CHECK0-NEXT: .cfi_restore w19 +; CHECK0-NEXT: .cfi_restore w20 +; CHECK0-NEXT: .cfi_restore w26 +; CHECK0-NEXT: .cfi_restore w27 +; CHECK0-NEXT: .cfi_restore w28 +; CHECK0-NEXT: .cfi_restore w30 +; CHECK0-NEXT: .cfi_restore w29 +; CHECK0-NEXT: ret +; +; CHECK64-LABEL: svecc_call_dynamic_alloca: +; CHECK64: // %bb.0: // %entry +; CHECK64-NEXT: sub sp, sp, #128 +; CHECK64-NEXT: .cfi_def_cfa_offset 128 +; CHECK64-NEXT: cntd x9 +; CHECK64-NEXT: stp x29, x30, [sp, #64] // 16-byte Folded Spill +; CHECK64-NEXT: stp x9, x28, [sp, #80] // 16-byte Folded Spill +; CHECK64-NEXT: stp x27, x26, [sp, #96] // 16-byte Folded Spill +; CHECK64-NEXT: stp x20, x19, [sp, #112] // 16-byte Folded Spill +; CHECK64-NEXT: add x29, sp, #64 +; CHECK64-NEXT: .cfi_def_cfa w29, 64 +; CHECK64-NEXT: .cfi_offset w19, -8 +; CHECK64-NEXT: .cfi_offset w20, -16 +; CHECK64-NEXT: .cfi_offset w26, -24 +; CHECK64-NEXT: .cfi_offset w27, -32 +; CHECK64-NEXT: .cfi_offset w28, -40 +; CHECK64-NEXT: .cfi_offset w30, -56 +; CHECK64-NEXT: .cfi_offset w29, -64 +; CHECK64-NEXT: addvl sp, sp, #-18 +; CHECK64-NEXT: str p15, [sp, #4, mul vl] // 2-byte Folded Spill +; CHECK64-NEXT: str p14, [sp, #5, mul vl] // 2-byte Folded Spill +; CHECK64-NEXT: str p13, [sp, #6, mul vl] // 2-byte Folded Spill +; CHECK64-NEXT: str p12, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK64-NEXT: str p11, [sp, #8, mul vl] // 2-byte Folded Spill +; CHECK64-NEXT: str p10, [sp, #9, mul vl] // 2-byte Folded Spill +; CHECK64-NEXT: str p9, [sp, #10, mul vl] // 2-byte Folded Spill +; CHECK64-NEXT: str p8, [sp, #11, mul vl] // 2-byte Folded Spill +; CHECK64-NEXT: str p7, [sp, #12, mul vl] // 2-byte Folded Spill +; CHECK64-NEXT: str p6, [sp, #13, mul vl] // 2-byte Folded Spill +; CHECK64-NEXT: str p5, [sp, #14, mul vl] // 2-byte Folded Spill +; CHECK64-NEXT: str p4, [sp, #15, mul vl] // 2-byte Folded Spill +; CHECK64-NEXT: str z23, [sp, #2, mul vl] // 16-byte Folded Spill +; CHECK64-NEXT: str z22, [sp, #3, mul vl] // 16-byte Folded Spill +; CHECK64-NEXT: str z21, [sp, #4, mul vl] // 16-byte Folded Spill +; CHECK64-NEXT: str z20, [sp, #5, mul vl] // 16-byte Folded Spill +; CHECK64-NEXT: str z19, [sp, #6, mul vl] // 16-byte Folded Spill +; CHECK64-NEXT: str z18, [sp, #7, mul vl] // 16-byte Folded Spill +; CHECK64-NEXT: str z17, [sp, #8, mul vl] // 16-byte Folded Spill +; CHECK64-NEXT: str z16, [sp, #9, mul vl] // 16-byte Folded Spill +; CHECK64-NEXT: str z15, [sp, #10, mul vl] // 16-byte Folded Spill +; CHECK64-NEXT: str z14, [sp, #11, mul vl] // 16-byte Folded Spill +; CHECK64-NEXT: str z13, [sp, #12, mul vl] // 16-byte Folded Spill +; CHECK64-NEXT: str z12, [sp, #13, mul vl] // 16-byte Folded Spill +; CHECK64-NEXT: str z11, [sp, #14, mul vl] // 16-byte Folded Spill +; CHECK64-NEXT: str z10, [sp, #15, mul vl] // 16-byte Folded Spill +; CHECK64-NEXT: str z9, [sp, #16, mul vl] // 16-byte Folded Spill +; CHECK64-NEXT: str z8, [sp, #17, mul vl] // 16-byte Folded Spill +; CHECK64-NEXT: .cfi_escape 0x10, 0x48, 0x0b, 0x11, 0x80, 0x7f, 0x22, 0x11, 0x78, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d8 @ cfa - 128 - 8 * VG +; CHECK64-NEXT: .cfi_escape 0x10, 0x49, 0x0b, 0x11, 0x80, 0x7f, 0x22, 0x11, 0x70, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d9 @ cfa - 128 - 16 * VG +; CHECK64-NEXT: .cfi_escape 0x10, 0x4a, 0x0b, 0x11, 0x80, 0x7f, 0x22, 0x11, 0x68, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d10 @ cfa - 128 - 24 * VG +; CHECK64-NEXT: .cfi_escape 0x10, 0x4b, 0x0b, 0x11, 0x80, 0x7f, 0x22, 0x11, 0x60, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d11 @ cfa - 128 - 32 * VG +; CHECK64-NEXT: .cfi_escape 0x10, 0x4c, 0x0b, 0x11, 0x80, 0x7f, 0x22, 0x11, 0x58, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d12 @ cfa - 128 - 40 * VG +; CHECK64-NEXT: .cfi_escape 0x10, 0x4d, 0x0b, 0x11, 0x80, 0x7f, 0x22, 0x11, 0x50, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d13 @ cfa - 128 - 48 * VG +; CHECK64-NEXT: .cfi_escape 0x10, 0x4e, 0x0b, 0x11, 0x80, 0x7f, 0x22, 0x11, 0x48, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d14 @ cfa - 128 - 56 * VG +; CHECK64-NEXT: .cfi_escape 0x10, 0x4f, 0x0b, 0x11, 0x80, 0x7f, 0x22, 0x11, 0x40, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d15 @ cfa - 128 - 64 * VG +; CHECK64-NEXT: sub sp, sp, #64 +; CHECK64-NEXT: mov w9, w0 +; CHECK64-NEXT: mov x8, sp +; CHECK64-NEXT: mov w2, w1 +; CHECK64-NEXT: add x9, x9, #15 +; CHECK64-NEXT: mov x19, sp +; CHECK64-NEXT: and x9, x9, #0x1fffffff0 +; CHECK64-NEXT: sub x8, x8, x9 +; CHECK64-NEXT: mov sp, x8 +; CHECK64-NEXT: //APP +; CHECK64-NEXT: //NO_APP +; CHECK64-NEXT: bl __arm_sme_state +; CHECK64-NEXT: and x20, x0, #0x1 +; CHECK64-NEXT: .cfi_offset vg, -48 +; CHECK64-NEXT: tbz w20, #0, .LBB35_2 +; CHECK64-NEXT: // %bb.1: // %entry +; CHECK64-NEXT: smstop sm +; CHECK64-NEXT: .LBB35_2: // %entry +; CHECK64-NEXT: mov x0, x8 +; CHECK64-NEXT: mov w1, #45 // =0x2d +; CHECK64-NEXT: bl memset +; CHECK64-NEXT: tbz w20, #0, .LBB35_4 +; CHECK64-NEXT: // %bb.3: // %entry +; CHECK64-NEXT: smstart sm +; CHECK64-NEXT: .LBB35_4: // %entry +; CHECK64-NEXT: mov w0, #22647 // =0x5877 +; CHECK64-NEXT: movk w0, #59491, lsl #16 +; CHECK64-NEXT: .cfi_restore vg +; CHECK64-NEXT: sub x8, x29, #64 +; CHECK64-NEXT: addvl sp, x8, #-18 +; CHECK64-NEXT: ldr z23, [sp, #2, mul vl] // 16-byte Folded Reload +; CHECK64-NEXT: ldr z22, [sp, #3, mul vl] // 16-byte Folded Reload +; CHECK64-NEXT: ldr z21, [sp, #4, mul vl] // 16-byte Folded Reload +; CHECK64-NEXT: ldr z20, [sp, #5, mul vl] // 16-byte Folded Reload +; CHECK64-NEXT: ldr z19, [sp, #6, mul vl] // 16-byte Folded Reload +; CHECK64-NEXT: ldr z18, [sp, #7, mul vl] // 16-byte Folded Reload +; CHECK64-NEXT: ldr z17, [sp, #8, mul vl] // 16-byte Folded Reload +; CHECK64-NEXT: ldr z16, [sp, #9, mul vl] // 16-byte Folded Reload +; CHECK64-NEXT: ldr z15, [sp, #10, mul vl] // 16-byte Folded Reload +; CHECK64-NEXT: ldr z14, [sp, #11, mul vl] // 16-byte Folded Reload +; CHECK64-NEXT: ldr z13, [sp, #12, mul vl] // 16-byte Folded Reload +; CHECK64-NEXT: ldr z12, [sp, #13, mul vl] // 16-byte Folded Reload +; CHECK64-NEXT: ldr z11, [sp, #14, mul vl] // 16-byte Folded Reload +; CHECK64-NEXT: ldr z10, [sp, #15, mul vl] // 16-byte Folded Reload +; CHECK64-NEXT: ldr z9, [sp, #16, mul vl] // 16-byte Folded Reload +; CHECK64-NEXT: ldr z8, [sp, #17, mul vl] // 16-byte Folded Reload +; CHECK64-NEXT: ldr p15, [sp, #4, mul vl] // 2-byte Folded Reload +; CHECK64-NEXT: ldr p14, [sp, #5, mul vl] // 2-byte Folded Reload +; CHECK64-NEXT: ldr p13, [sp, #6, mul vl] // 2-byte Folded Reload +; CHECK64-NEXT: ldr p12, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK64-NEXT: ldr p11, [sp, #8, mul vl] // 2-byte Folded Reload +; CHECK64-NEXT: ldr p10, [sp, #9, mul vl] // 2-byte Folded Reload +; CHECK64-NEXT: ldr p9, [sp, #10, mul vl] // 2-byte Folded Reload +; CHECK64-NEXT: ldr p8, [sp, #11, mul vl] // 2-byte Folded Reload +; CHECK64-NEXT: ldr p7, [sp, #12, mul vl] // 2-byte Folded Reload +; CHECK64-NEXT: ldr p6, [sp, #13, mul vl] // 2-byte Folded Reload +; CHECK64-NEXT: ldr p5, [sp, #14, mul vl] // 2-byte Folded Reload +; CHECK64-NEXT: ldr p4, [sp, #15, mul vl] // 2-byte Folded Reload +; CHECK64-NEXT: .cfi_restore z8 +; CHECK64-NEXT: .cfi_restore z9 +; CHECK64-NEXT: .cfi_restore z10 +; CHECK64-NEXT: .cfi_restore z11 +; CHECK64-NEXT: .cfi_restore z12 +; CHECK64-NEXT: .cfi_restore z13 +; CHECK64-NEXT: .cfi_restore z14 +; CHECK64-NEXT: .cfi_restore z15 +; CHECK64-NEXT: sub sp, x29, #64 +; CHECK64-NEXT: .cfi_def_cfa wsp, 128 +; CHECK64-NEXT: ldp x20, x19, [sp, #112] // 16-byte Folded Reload +; CHECK64-NEXT: ldr x28, [sp, #88] // 8-byte Folded Reload +; CHECK64-NEXT: ldp x27, x26, [sp, #96] // 16-byte Folded Reload +; CHECK64-NEXT: ldp x29, x30, [sp, #64] // 16-byte Folded Reload +; CHECK64-NEXT: add sp, sp, #128 +; CHECK64-NEXT: .cfi_def_cfa_offset 0 +; CHECK64-NEXT: .cfi_restore w19 +; CHECK64-NEXT: .cfi_restore w20 +; CHECK64-NEXT: .cfi_restore w26 +; CHECK64-NEXT: .cfi_restore w27 +; CHECK64-NEXT: .cfi_restore w28 +; CHECK64-NEXT: .cfi_restore w30 +; CHECK64-NEXT: .cfi_restore w29 +; CHECK64-NEXT: ret +; +; CHECK1024-LABEL: svecc_call_dynamic_alloca: +; CHECK1024: // %bb.0: // %entry +; CHECK1024-NEXT: sub sp, sp, #1088 +; CHECK1024-NEXT: .cfi_def_cfa_offset 1088 +; CHECK1024-NEXT: cntd x9 +; CHECK1024-NEXT: str x29, [sp, #1024] // 8-byte Folded Spill +; CHECK1024-NEXT: str x30, [sp, #1032] // 8-byte Folded Spill +; CHECK1024-NEXT: str x9, [sp, #1040] // 8-byte Folded Spill +; CHECK1024-NEXT: str x28, [sp, #1048] // 8-byte Folded Spill +; CHECK1024-NEXT: str x27, [sp, #1056] // 8-byte Folded Spill +; CHECK1024-NEXT: str x26, [sp, #1064] // 8-byte Folded Spill +; CHECK1024-NEXT: str x20, [sp, #1072] // 8-byte Folded Spill +; CHECK1024-NEXT: str x19, [sp, #1080] // 8-byte Folded Spill +; CHECK1024-NEXT: add x29, sp, #1024 +; CHECK1024-NEXT: .cfi_def_cfa w29, 64 +; CHECK1024-NEXT: .cfi_offset w19, -8 +; CHECK1024-NEXT: .cfi_offset w20, -16 +; CHECK1024-NEXT: .cfi_offset w26, -24 +; CHECK1024-NEXT: .cfi_offset w27, -32 +; CHECK1024-NEXT: .cfi_offset w28, -40 +; CHECK1024-NEXT: .cfi_offset w30, -56 +; CHECK1024-NEXT: .cfi_offset w29, -64 +; CHECK1024-NEXT: addvl sp, sp, #-18 +; CHECK1024-NEXT: str p15, [sp, #4, mul vl] // 2-byte Folded Spill +; CHECK1024-NEXT: str p14, [sp, #5, mul vl] // 2-byte Folded Spill +; CHECK1024-NEXT: str p13, [sp, #6, mul vl] // 2-byte Folded Spill +; CHECK1024-NEXT: str p12, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK1024-NEXT: str p11, [sp, #8, mul vl] // 2-byte Folded Spill +; CHECK1024-NEXT: str p10, [sp, #9, mul vl] // 2-byte Folded Spill +; CHECK1024-NEXT: str p9, [sp, #10, mul vl] // 2-byte Folded Spill +; CHECK1024-NEXT: str p8, [sp, #11, mul vl] // 2-byte Folded Spill +; CHECK1024-NEXT: str p7, [sp, #12, mul vl] // 2-byte Folded Spill +; CHECK1024-NEXT: str p6, [sp, #13, mul vl] // 2-byte Folded Spill +; CHECK1024-NEXT: str p5, [sp, #14, mul vl] // 2-byte Folded Spill +; CHECK1024-NEXT: str p4, [sp, #15, mul vl] // 2-byte Folded Spill +; CHECK1024-NEXT: str z23, [sp, #2, mul vl] // 16-byte Folded Spill +; CHECK1024-NEXT: str z22, [sp, #3, mul vl] // 16-byte Folded Spill +; CHECK1024-NEXT: str z21, [sp, #4, mul vl] // 16-byte Folded Spill +; CHECK1024-NEXT: str z20, [sp, #5, mul vl] // 16-byte Folded Spill +; CHECK1024-NEXT: str z19, [sp, #6, mul vl] // 16-byte Folded Spill +; CHECK1024-NEXT: str z18, [sp, #7, mul vl] // 16-byte Folded Spill +; CHECK1024-NEXT: str z17, [sp, #8, mul vl] // 16-byte Folded Spill +; CHECK1024-NEXT: str z16, [sp, #9, mul vl] // 16-byte Folded Spill +; CHECK1024-NEXT: str z15, [sp, #10, mul vl] // 16-byte Folded Spill +; CHECK1024-NEXT: str z14, [sp, #11, mul vl] // 16-byte Folded Spill +; CHECK1024-NEXT: str z13, [sp, #12, mul vl] // 16-byte Folded Spill +; CHECK1024-NEXT: str z12, [sp, #13, mul vl] // 16-byte Folded Spill +; CHECK1024-NEXT: str z11, [sp, #14, mul vl] // 16-byte Folded Spill +; CHECK1024-NEXT: str z10, [sp, #15, mul vl] // 16-byte Folded Spill +; CHECK1024-NEXT: str z9, [sp, #16, mul vl] // 16-byte Folded Spill +; CHECK1024-NEXT: str z8, [sp, #17, mul vl] // 16-byte Folded Spill +; CHECK1024-NEXT: .cfi_escape 0x10, 0x48, 0x0b, 0x11, 0xc0, 0x77, 0x22, 0x11, 0x78, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d8 @ cfa - 1088 - 8 * VG +; CHECK1024-NEXT: .cfi_escape 0x10, 0x49, 0x0b, 0x11, 0xc0, 0x77, 0x22, 0x11, 0x70, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d9 @ cfa - 1088 - 16 * VG +; CHECK1024-NEXT: .cfi_escape 0x10, 0x4a, 0x0b, 0x11, 0xc0, 0x77, 0x22, 0x11, 0x68, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d10 @ cfa - 1088 - 24 * VG +; CHECK1024-NEXT: .cfi_escape 0x10, 0x4b, 0x0b, 0x11, 0xc0, 0x77, 0x22, 0x11, 0x60, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d11 @ cfa - 1088 - 32 * VG +; CHECK1024-NEXT: .cfi_escape 0x10, 0x4c, 0x0b, 0x11, 0xc0, 0x77, 0x22, 0x11, 0x58, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d12 @ cfa - 1088 - 40 * VG +; CHECK1024-NEXT: .cfi_escape 0x10, 0x4d, 0x0b, 0x11, 0xc0, 0x77, 0x22, 0x11, 0x50, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d13 @ cfa - 1088 - 48 * VG +; CHECK1024-NEXT: .cfi_escape 0x10, 0x4e, 0x0b, 0x11, 0xc0, 0x77, 0x22, 0x11, 0x48, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d14 @ cfa - 1088 - 56 * VG +; CHECK1024-NEXT: .cfi_escape 0x10, 0x4f, 0x0b, 0x11, 0xc0, 0x77, 0x22, 0x11, 0x40, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d15 @ cfa - 1088 - 64 * VG +; CHECK1024-NEXT: sub sp, sp, #1024 +; CHECK1024-NEXT: mov w9, w0 +; CHECK1024-NEXT: mov x8, sp +; CHECK1024-NEXT: mov w2, w1 +; CHECK1024-NEXT: add x9, x9, #15 +; CHECK1024-NEXT: mov x19, sp +; CHECK1024-NEXT: and x9, x9, #0x1fffffff0 +; CHECK1024-NEXT: sub x8, x8, x9 +; CHECK1024-NEXT: mov sp, x8 +; CHECK1024-NEXT: //APP +; CHECK1024-NEXT: //NO_APP +; CHECK1024-NEXT: bl __arm_sme_state +; CHECK1024-NEXT: and x20, x0, #0x1 +; CHECK1024-NEXT: .cfi_offset vg, -48 +; CHECK1024-NEXT: tbz w20, #0, .LBB35_2 +; CHECK1024-NEXT: // %bb.1: // %entry +; CHECK1024-NEXT: smstop sm +; CHECK1024-NEXT: .LBB35_2: // %entry +; CHECK1024-NEXT: mov x0, x8 +; CHECK1024-NEXT: mov w1, #45 // =0x2d +; CHECK1024-NEXT: bl memset +; CHECK1024-NEXT: tbz w20, #0, .LBB35_4 +; CHECK1024-NEXT: // %bb.3: // %entry +; CHECK1024-NEXT: smstart sm +; CHECK1024-NEXT: .LBB35_4: // %entry +; CHECK1024-NEXT: mov w0, #22647 // =0x5877 +; CHECK1024-NEXT: movk w0, #59491, lsl #16 +; CHECK1024-NEXT: .cfi_restore vg +; CHECK1024-NEXT: sub x8, x29, #1024 +; CHECK1024-NEXT: addvl sp, x8, #-18 +; CHECK1024-NEXT: ldr z23, [sp, #2, mul vl] // 16-byte Folded Reload +; CHECK1024-NEXT: ldr z22, [sp, #3, mul vl] // 16-byte Folded Reload +; CHECK1024-NEXT: ldr z21, [sp, #4, mul vl] // 16-byte Folded Reload +; CHECK1024-NEXT: ldr z20, [sp, #5, mul vl] // 16-byte Folded Reload +; CHECK1024-NEXT: ldr z19, [sp, #6, mul vl] // 16-byte Folded Reload +; CHECK1024-NEXT: ldr z18, [sp, #7, mul vl] // 16-byte Folded Reload +; CHECK1024-NEXT: ldr z17, [sp, #8, mul vl] // 16-byte Folded Reload +; CHECK1024-NEXT: ldr z16, [sp, #9, mul vl] // 16-byte Folded Reload +; CHECK1024-NEXT: ldr z15, [sp, #10, mul vl] // 16-byte Folded Reload +; CHECK1024-NEXT: ldr z14, [sp, #11, mul vl] // 16-byte Folded Reload +; CHECK1024-NEXT: ldr z13, [sp, #12, mul vl] // 16-byte Folded Reload +; CHECK1024-NEXT: ldr z12, [sp, #13, mul vl] // 16-byte Folded Reload +; CHECK1024-NEXT: ldr z11, [sp, #14, mul vl] // 16-byte Folded Reload +; CHECK1024-NEXT: ldr z10, [sp, #15, mul vl] // 16-byte Folded Reload +; CHECK1024-NEXT: ldr z9, [sp, #16, mul vl] // 16-byte Folded Reload +; CHECK1024-NEXT: ldr z8, [sp, #17, mul vl] // 16-byte Folded Reload +; CHECK1024-NEXT: ldr p15, [sp, #4, mul vl] // 2-byte Folded Reload +; CHECK1024-NEXT: ldr p14, [sp, #5, mul vl] // 2-byte Folded Reload +; CHECK1024-NEXT: ldr p13, [sp, #6, mul vl] // 2-byte Folded Reload +; CHECK1024-NEXT: ldr p12, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK1024-NEXT: ldr p11, [sp, #8, mul vl] // 2-byte Folded Reload +; CHECK1024-NEXT: ldr p10, [sp, #9, mul vl] // 2-byte Folded Reload +; CHECK1024-NEXT: ldr p9, [sp, #10, mul vl] // 2-byte Folded Reload +; CHECK1024-NEXT: ldr p8, [sp, #11, mul vl] // 2-byte Folded Reload +; CHECK1024-NEXT: ldr p7, [sp, #12, mul vl] // 2-byte Folded Reload +; CHECK1024-NEXT: ldr p6, [sp, #13, mul vl] // 2-byte Folded Reload +; CHECK1024-NEXT: ldr p5, [sp, #14, mul vl] // 2-byte Folded Reload +; CHECK1024-NEXT: ldr p4, [sp, #15, mul vl] // 2-byte Folded Reload +; CHECK1024-NEXT: .cfi_restore z8 +; CHECK1024-NEXT: .cfi_restore z9 +; CHECK1024-NEXT: .cfi_restore z10 +; CHECK1024-NEXT: .cfi_restore z11 +; CHECK1024-NEXT: .cfi_restore z12 +; CHECK1024-NEXT: .cfi_restore z13 +; CHECK1024-NEXT: .cfi_restore z14 +; CHECK1024-NEXT: .cfi_restore z15 +; CHECK1024-NEXT: sub sp, x29, #1024 +; CHECK1024-NEXT: .cfi_def_cfa wsp, 1088 +; CHECK1024-NEXT: ldr x19, [sp, #1080] // 8-byte Folded Reload +; CHECK1024-NEXT: ldr x20, [sp, #1072] // 8-byte Folded Reload +; CHECK1024-NEXT: ldr x26, [sp, #1064] // 8-byte Folded Reload +; CHECK1024-NEXT: ldr x27, [sp, #1056] // 8-byte Folded Reload +; CHECK1024-NEXT: ldr x28, [sp, #1048] // 8-byte Folded Reload +; CHECK1024-NEXT: ldr x30, [sp, #1032] // 8-byte Folded Reload +; CHECK1024-NEXT: ldr x29, [sp, #1024] // 8-byte Folded Reload +; CHECK1024-NEXT: add sp, sp, #1088 +; CHECK1024-NEXT: .cfi_def_cfa_offset 0 +; CHECK1024-NEXT: .cfi_restore w19 +; CHECK1024-NEXT: .cfi_restore w20 +; CHECK1024-NEXT: .cfi_restore w26 +; CHECK1024-NEXT: .cfi_restore w27 +; CHECK1024-NEXT: .cfi_restore w28 +; CHECK1024-NEXT: .cfi_restore w30 +; CHECK1024-NEXT: .cfi_restore w29 +; CHECK1024-NEXT: ret +entry: + %ptr = alloca i8, i32 %P1 + tail call void asm sideeffect "", "~{x0},~{x28},~{x27},~{x3}"() #2 + %call = call ptr @memset(ptr noundef nonnull %ptr, i32 noundef 45, i32 noundef %P2) + ret i32 -396142473 +} + + +define i32 @svecc_call_realign(<4 x i16> %P0, i32 %P1, i32 %P2, %P3, i16 %P4) "aarch64_pstate_sm_compatible" { +; CHECK0-LABEL: svecc_call_realign: +; CHECK0: // %bb.0: // %entry +; CHECK0-NEXT: stp x29, x30, [sp, #-64]! // 16-byte Folded Spill +; CHECK0-NEXT: .cfi_def_cfa_offset 64 +; CHECK0-NEXT: cntd x9 +; CHECK0-NEXT: stp x28, x27, [sp, #32] // 16-byte Folded Spill +; CHECK0-NEXT: str x9, [sp, #16] // 8-byte Folded Spill +; CHECK0-NEXT: stp x26, x19, [sp, #48] // 16-byte Folded Spill +; CHECK0-NEXT: mov x29, sp +; CHECK0-NEXT: .cfi_def_cfa w29, 64 +; CHECK0-NEXT: .cfi_offset w19, -8 +; CHECK0-NEXT: .cfi_offset w26, -16 +; CHECK0-NEXT: .cfi_offset w27, -24 +; CHECK0-NEXT: .cfi_offset w28, -32 +; CHECK0-NEXT: .cfi_offset w30, -56 +; CHECK0-NEXT: .cfi_offset w29, -64 +; CHECK0-NEXT: addvl sp, sp, #-18 +; CHECK0-NEXT: str p15, [sp, #4, mul vl] // 2-byte Folded Spill +; CHECK0-NEXT: str p14, [sp, #5, mul vl] // 2-byte Folded Spill +; CHECK0-NEXT: str p13, [sp, #6, mul vl] // 2-byte Folded Spill +; CHECK0-NEXT: str p12, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK0-NEXT: str p11, [sp, #8, mul vl] // 2-byte Folded Spill +; CHECK0-NEXT: str p10, [sp, #9, mul vl] // 2-byte Folded Spill +; CHECK0-NEXT: str p9, [sp, #10, mul vl] // 2-byte Folded Spill +; CHECK0-NEXT: str p8, [sp, #11, mul vl] // 2-byte Folded Spill +; CHECK0-NEXT: str p7, [sp, #12, mul vl] // 2-byte Folded Spill +; CHECK0-NEXT: str p6, [sp, #13, mul vl] // 2-byte Folded Spill +; CHECK0-NEXT: str p5, [sp, #14, mul vl] // 2-byte Folded Spill +; CHECK0-NEXT: str p4, [sp, #15, mul vl] // 2-byte Folded Spill +; CHECK0-NEXT: str z23, [sp, #2, mul vl] // 16-byte Folded Spill +; CHECK0-NEXT: str z22, [sp, #3, mul vl] // 16-byte Folded Spill +; CHECK0-NEXT: str z21, [sp, #4, mul vl] // 16-byte Folded Spill +; CHECK0-NEXT: str z20, [sp, #5, mul vl] // 16-byte Folded Spill +; CHECK0-NEXT: str z19, [sp, #6, mul vl] // 16-byte Folded Spill +; CHECK0-NEXT: str z18, [sp, #7, mul vl] // 16-byte Folded Spill +; CHECK0-NEXT: str z17, [sp, #8, mul vl] // 16-byte Folded Spill +; CHECK0-NEXT: str z16, [sp, #9, mul vl] // 16-byte Folded Spill +; CHECK0-NEXT: str z15, [sp, #10, mul vl] // 16-byte Folded Spill +; CHECK0-NEXT: str z14, [sp, #11, mul vl] // 16-byte Folded Spill +; CHECK0-NEXT: str z13, [sp, #12, mul vl] // 16-byte Folded Spill +; CHECK0-NEXT: str z12, [sp, #13, mul vl] // 16-byte Folded Spill +; CHECK0-NEXT: str z11, [sp, #14, mul vl] // 16-byte Folded Spill +; CHECK0-NEXT: str z10, [sp, #15, mul vl] // 16-byte Folded Spill +; CHECK0-NEXT: str z9, [sp, #16, mul vl] // 16-byte Folded Spill +; CHECK0-NEXT: str z8, [sp, #17, mul vl] // 16-byte Folded Spill +; CHECK0-NEXT: .cfi_escape 0x10, 0x48, 0x0a, 0x11, 0x40, 0x22, 0x11, 0x78, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d8 @ cfa - 64 - 8 * VG +; CHECK0-NEXT: .cfi_escape 0x10, 0x49, 0x0a, 0x11, 0x40, 0x22, 0x11, 0x70, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d9 @ cfa - 64 - 16 * VG +; CHECK0-NEXT: .cfi_escape 0x10, 0x4a, 0x0a, 0x11, 0x40, 0x22, 0x11, 0x68, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d10 @ cfa - 64 - 24 * VG +; CHECK0-NEXT: .cfi_escape 0x10, 0x4b, 0x0a, 0x11, 0x40, 0x22, 0x11, 0x60, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d11 @ cfa - 64 - 32 * VG +; CHECK0-NEXT: .cfi_escape 0x10, 0x4c, 0x0a, 0x11, 0x40, 0x22, 0x11, 0x58, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d12 @ cfa - 64 - 40 * VG +; CHECK0-NEXT: .cfi_escape 0x10, 0x4d, 0x0a, 0x11, 0x40, 0x22, 0x11, 0x50, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d13 @ cfa - 64 - 48 * VG +; CHECK0-NEXT: .cfi_escape 0x10, 0x4e, 0x0a, 0x11, 0x40, 0x22, 0x11, 0x48, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d14 @ cfa - 64 - 56 * VG +; CHECK0-NEXT: .cfi_escape 0x10, 0x4f, 0x0a, 0x11, 0x40, 0x22, 0x11, 0x40, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d15 @ cfa - 64 - 64 * VG +; CHECK0-NEXT: sub x9, sp, #1024 +; CHECK0-NEXT: and sp, x9, #0xffffffffffffffe0 +; CHECK0-NEXT: mov w2, w1 +; CHECK0-NEXT: //APP +; CHECK0-NEXT: //NO_APP +; CHECK0-NEXT: bl __arm_sme_state +; CHECK0-NEXT: and x19, x0, #0x1 +; CHECK0-NEXT: .cfi_offset vg, -48 +; CHECK0-NEXT: tbz w19, #0, .LBB36_2 +; CHECK0-NEXT: // %bb.1: // %entry +; CHECK0-NEXT: smstop sm +; CHECK0-NEXT: .LBB36_2: // %entry +; CHECK0-NEXT: mov x0, sp +; CHECK0-NEXT: mov w1, #45 // =0x2d +; CHECK0-NEXT: bl memset +; CHECK0-NEXT: tbz w19, #0, .LBB36_4 +; CHECK0-NEXT: // %bb.3: // %entry +; CHECK0-NEXT: smstart sm +; CHECK0-NEXT: .LBB36_4: // %entry +; CHECK0-NEXT: mov w0, #22647 // =0x5877 +; CHECK0-NEXT: movk w0, #59491, lsl #16 +; CHECK0-NEXT: .cfi_restore vg +; CHECK0-NEXT: addvl sp, x29, #-18 +; CHECK0-NEXT: ldr z23, [sp, #2, mul vl] // 16-byte Folded Reload +; CHECK0-NEXT: ldr z22, [sp, #3, mul vl] // 16-byte Folded Reload +; CHECK0-NEXT: ldr z21, [sp, #4, mul vl] // 16-byte Folded Reload +; CHECK0-NEXT: ldr z20, [sp, #5, mul vl] // 16-byte Folded Reload +; CHECK0-NEXT: ldr z19, [sp, #6, mul vl] // 16-byte Folded Reload +; CHECK0-NEXT: ldr z18, [sp, #7, mul vl] // 16-byte Folded Reload +; CHECK0-NEXT: ldr z17, [sp, #8, mul vl] // 16-byte Folded Reload +; CHECK0-NEXT: ldr z16, [sp, #9, mul vl] // 16-byte Folded Reload +; CHECK0-NEXT: ldr z15, [sp, #10, mul vl] // 16-byte Folded Reload +; CHECK0-NEXT: ldr z14, [sp, #11, mul vl] // 16-byte Folded Reload +; CHECK0-NEXT: ldr z13, [sp, #12, mul vl] // 16-byte Folded Reload +; CHECK0-NEXT: ldr z12, [sp, #13, mul vl] // 16-byte Folded Reload +; CHECK0-NEXT: ldr z11, [sp, #14, mul vl] // 16-byte Folded Reload +; CHECK0-NEXT: ldr z10, [sp, #15, mul vl] // 16-byte Folded Reload +; CHECK0-NEXT: ldr z9, [sp, #16, mul vl] // 16-byte Folded Reload +; CHECK0-NEXT: ldr z8, [sp, #17, mul vl] // 16-byte Folded Reload +; CHECK0-NEXT: ldr p15, [sp, #4, mul vl] // 2-byte Folded Reload +; CHECK0-NEXT: ldr p14, [sp, #5, mul vl] // 2-byte Folded Reload +; CHECK0-NEXT: ldr p13, [sp, #6, mul vl] // 2-byte Folded Reload +; CHECK0-NEXT: ldr p12, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK0-NEXT: ldr p11, [sp, #8, mul vl] // 2-byte Folded Reload +; CHECK0-NEXT: ldr p10, [sp, #9, mul vl] // 2-byte Folded Reload +; CHECK0-NEXT: ldr p9, [sp, #10, mul vl] // 2-byte Folded Reload +; CHECK0-NEXT: ldr p8, [sp, #11, mul vl] // 2-byte Folded Reload +; CHECK0-NEXT: ldr p7, [sp, #12, mul vl] // 2-byte Folded Reload +; CHECK0-NEXT: ldr p6, [sp, #13, mul vl] // 2-byte Folded Reload +; CHECK0-NEXT: ldr p5, [sp, #14, mul vl] // 2-byte Folded Reload +; CHECK0-NEXT: ldr p4, [sp, #15, mul vl] // 2-byte Folded Reload +; CHECK0-NEXT: .cfi_restore z8 +; CHECK0-NEXT: .cfi_restore z9 +; CHECK0-NEXT: .cfi_restore z10 +; CHECK0-NEXT: .cfi_restore z11 +; CHECK0-NEXT: .cfi_restore z12 +; CHECK0-NEXT: .cfi_restore z13 +; CHECK0-NEXT: .cfi_restore z14 +; CHECK0-NEXT: .cfi_restore z15 +; CHECK0-NEXT: mov sp, x29 +; CHECK0-NEXT: .cfi_def_cfa wsp, 64 +; CHECK0-NEXT: ldp x26, x19, [sp, #48] // 16-byte Folded Reload +; CHECK0-NEXT: ldp x28, x27, [sp, #32] // 16-byte Folded Reload +; CHECK0-NEXT: ldp x29, x30, [sp], #64 // 16-byte Folded Reload +; CHECK0-NEXT: .cfi_def_cfa_offset 0 +; CHECK0-NEXT: .cfi_restore w19 +; CHECK0-NEXT: .cfi_restore w26 +; CHECK0-NEXT: .cfi_restore w27 +; CHECK0-NEXT: .cfi_restore w28 +; CHECK0-NEXT: .cfi_restore w30 +; CHECK0-NEXT: .cfi_restore w29 +; CHECK0-NEXT: ret +; +; CHECK64-LABEL: svecc_call_realign: +; CHECK64: // %bb.0: // %entry +; CHECK64-NEXT: sub sp, sp, #128 +; CHECK64-NEXT: .cfi_def_cfa_offset 128 +; CHECK64-NEXT: cntd x9 +; CHECK64-NEXT: stp x29, x30, [sp, #64] // 16-byte Folded Spill +; CHECK64-NEXT: stp x9, x28, [sp, #80] // 16-byte Folded Spill +; CHECK64-NEXT: stp x27, x26, [sp, #96] // 16-byte Folded Spill +; CHECK64-NEXT: str x19, [sp, #112] // 8-byte Folded Spill +; CHECK64-NEXT: add x29, sp, #64 +; CHECK64-NEXT: .cfi_def_cfa w29, 64 +; CHECK64-NEXT: .cfi_offset w19, -16 +; CHECK64-NEXT: .cfi_offset w26, -24 +; CHECK64-NEXT: .cfi_offset w27, -32 +; CHECK64-NEXT: .cfi_offset w28, -40 +; CHECK64-NEXT: .cfi_offset w30, -56 +; CHECK64-NEXT: .cfi_offset w29, -64 +; CHECK64-NEXT: addvl sp, sp, #-18 +; CHECK64-NEXT: str p15, [sp, #4, mul vl] // 2-byte Folded Spill +; CHECK64-NEXT: str p14, [sp, #5, mul vl] // 2-byte Folded Spill +; CHECK64-NEXT: str p13, [sp, #6, mul vl] // 2-byte Folded Spill +; CHECK64-NEXT: str p12, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK64-NEXT: str p11, [sp, #8, mul vl] // 2-byte Folded Spill +; CHECK64-NEXT: str p10, [sp, #9, mul vl] // 2-byte Folded Spill +; CHECK64-NEXT: str p9, [sp, #10, mul vl] // 2-byte Folded Spill +; CHECK64-NEXT: str p8, [sp, #11, mul vl] // 2-byte Folded Spill +; CHECK64-NEXT: str p7, [sp, #12, mul vl] // 2-byte Folded Spill +; CHECK64-NEXT: str p6, [sp, #13, mul vl] // 2-byte Folded Spill +; CHECK64-NEXT: str p5, [sp, #14, mul vl] // 2-byte Folded Spill +; CHECK64-NEXT: str p4, [sp, #15, mul vl] // 2-byte Folded Spill +; CHECK64-NEXT: str z23, [sp, #2, mul vl] // 16-byte Folded Spill +; CHECK64-NEXT: str z22, [sp, #3, mul vl] // 16-byte Folded Spill +; CHECK64-NEXT: str z21, [sp, #4, mul vl] // 16-byte Folded Spill +; CHECK64-NEXT: str z20, [sp, #5, mul vl] // 16-byte Folded Spill +; CHECK64-NEXT: str z19, [sp, #6, mul vl] // 16-byte Folded Spill +; CHECK64-NEXT: str z18, [sp, #7, mul vl] // 16-byte Folded Spill +; CHECK64-NEXT: str z17, [sp, #8, mul vl] // 16-byte Folded Spill +; CHECK64-NEXT: str z16, [sp, #9, mul vl] // 16-byte Folded Spill +; CHECK64-NEXT: str z15, [sp, #10, mul vl] // 16-byte Folded Spill +; CHECK64-NEXT: str z14, [sp, #11, mul vl] // 16-byte Folded Spill +; CHECK64-NEXT: str z13, [sp, #12, mul vl] // 16-byte Folded Spill +; CHECK64-NEXT: str z12, [sp, #13, mul vl] // 16-byte Folded Spill +; CHECK64-NEXT: str z11, [sp, #14, mul vl] // 16-byte Folded Spill +; CHECK64-NEXT: str z10, [sp, #15, mul vl] // 16-byte Folded Spill +; CHECK64-NEXT: str z9, [sp, #16, mul vl] // 16-byte Folded Spill +; CHECK64-NEXT: str z8, [sp, #17, mul vl] // 16-byte Folded Spill +; CHECK64-NEXT: .cfi_escape 0x10, 0x48, 0x0b, 0x11, 0x80, 0x7f, 0x22, 0x11, 0x78, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d8 @ cfa - 128 - 8 * VG +; CHECK64-NEXT: .cfi_escape 0x10, 0x49, 0x0b, 0x11, 0x80, 0x7f, 0x22, 0x11, 0x70, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d9 @ cfa - 128 - 16 * VG +; CHECK64-NEXT: .cfi_escape 0x10, 0x4a, 0x0b, 0x11, 0x80, 0x7f, 0x22, 0x11, 0x68, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d10 @ cfa - 128 - 24 * VG +; CHECK64-NEXT: .cfi_escape 0x10, 0x4b, 0x0b, 0x11, 0x80, 0x7f, 0x22, 0x11, 0x60, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d11 @ cfa - 128 - 32 * VG +; CHECK64-NEXT: .cfi_escape 0x10, 0x4c, 0x0b, 0x11, 0x80, 0x7f, 0x22, 0x11, 0x58, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d12 @ cfa - 128 - 40 * VG +; CHECK64-NEXT: .cfi_escape 0x10, 0x4d, 0x0b, 0x11, 0x80, 0x7f, 0x22, 0x11, 0x50, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d13 @ cfa - 128 - 48 * VG +; CHECK64-NEXT: .cfi_escape 0x10, 0x4e, 0x0b, 0x11, 0x80, 0x7f, 0x22, 0x11, 0x48, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d14 @ cfa - 128 - 56 * VG +; CHECK64-NEXT: .cfi_escape 0x10, 0x4f, 0x0b, 0x11, 0x80, 0x7f, 0x22, 0x11, 0x40, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d15 @ cfa - 128 - 64 * VG +; CHECK64-NEXT: sub x9, sp, #1088 +; CHECK64-NEXT: and sp, x9, #0xffffffffffffffe0 +; CHECK64-NEXT: mov w2, w1 +; CHECK64-NEXT: //APP +; CHECK64-NEXT: //NO_APP +; CHECK64-NEXT: bl __arm_sme_state +; CHECK64-NEXT: and x19, x0, #0x1 +; CHECK64-NEXT: .cfi_offset vg, -48 +; CHECK64-NEXT: tbz w19, #0, .LBB36_2 +; CHECK64-NEXT: // %bb.1: // %entry +; CHECK64-NEXT: smstop sm +; CHECK64-NEXT: .LBB36_2: // %entry +; CHECK64-NEXT: mov x0, sp +; CHECK64-NEXT: mov w1, #45 // =0x2d +; CHECK64-NEXT: bl memset +; CHECK64-NEXT: tbz w19, #0, .LBB36_4 +; CHECK64-NEXT: // %bb.3: // %entry +; CHECK64-NEXT: smstart sm +; CHECK64-NEXT: .LBB36_4: // %entry +; CHECK64-NEXT: mov w0, #22647 // =0x5877 +; CHECK64-NEXT: movk w0, #59491, lsl #16 +; CHECK64-NEXT: .cfi_restore vg +; CHECK64-NEXT: sub x8, x29, #64 +; CHECK64-NEXT: addvl sp, x8, #-18 +; CHECK64-NEXT: ldr z23, [sp, #2, mul vl] // 16-byte Folded Reload +; CHECK64-NEXT: ldr z22, [sp, #3, mul vl] // 16-byte Folded Reload +; CHECK64-NEXT: ldr z21, [sp, #4, mul vl] // 16-byte Folded Reload +; CHECK64-NEXT: ldr z20, [sp, #5, mul vl] // 16-byte Folded Reload +; CHECK64-NEXT: ldr z19, [sp, #6, mul vl] // 16-byte Folded Reload +; CHECK64-NEXT: ldr z18, [sp, #7, mul vl] // 16-byte Folded Reload +; CHECK64-NEXT: ldr z17, [sp, #8, mul vl] // 16-byte Folded Reload +; CHECK64-NEXT: ldr z16, [sp, #9, mul vl] // 16-byte Folded Reload +; CHECK64-NEXT: ldr z15, [sp, #10, mul vl] // 16-byte Folded Reload +; CHECK64-NEXT: ldr z14, [sp, #11, mul vl] // 16-byte Folded Reload +; CHECK64-NEXT: ldr z13, [sp, #12, mul vl] // 16-byte Folded Reload +; CHECK64-NEXT: ldr z12, [sp, #13, mul vl] // 16-byte Folded Reload +; CHECK64-NEXT: ldr z11, [sp, #14, mul vl] // 16-byte Folded Reload +; CHECK64-NEXT: ldr z10, [sp, #15, mul vl] // 16-byte Folded Reload +; CHECK64-NEXT: ldr z9, [sp, #16, mul vl] // 16-byte Folded Reload +; CHECK64-NEXT: ldr z8, [sp, #17, mul vl] // 16-byte Folded Reload +; CHECK64-NEXT: ldr p15, [sp, #4, mul vl] // 2-byte Folded Reload +; CHECK64-NEXT: ldr p14, [sp, #5, mul vl] // 2-byte Folded Reload +; CHECK64-NEXT: ldr p13, [sp, #6, mul vl] // 2-byte Folded Reload +; CHECK64-NEXT: ldr p12, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK64-NEXT: ldr p11, [sp, #8, mul vl] // 2-byte Folded Reload +; CHECK64-NEXT: ldr p10, [sp, #9, mul vl] // 2-byte Folded Reload +; CHECK64-NEXT: ldr p9, [sp, #10, mul vl] // 2-byte Folded Reload +; CHECK64-NEXT: ldr p8, [sp, #11, mul vl] // 2-byte Folded Reload +; CHECK64-NEXT: ldr p7, [sp, #12, mul vl] // 2-byte Folded Reload +; CHECK64-NEXT: ldr p6, [sp, #13, mul vl] // 2-byte Folded Reload +; CHECK64-NEXT: ldr p5, [sp, #14, mul vl] // 2-byte Folded Reload +; CHECK64-NEXT: ldr p4, [sp, #15, mul vl] // 2-byte Folded Reload +; CHECK64-NEXT: .cfi_restore z8 +; CHECK64-NEXT: .cfi_restore z9 +; CHECK64-NEXT: .cfi_restore z10 +; CHECK64-NEXT: .cfi_restore z11 +; CHECK64-NEXT: .cfi_restore z12 +; CHECK64-NEXT: .cfi_restore z13 +; CHECK64-NEXT: .cfi_restore z14 +; CHECK64-NEXT: .cfi_restore z15 +; CHECK64-NEXT: sub sp, x29, #64 +; CHECK64-NEXT: .cfi_def_cfa wsp, 128 +; CHECK64-NEXT: ldp x26, x19, [sp, #104] // 16-byte Folded Reload +; CHECK64-NEXT: ldp x28, x27, [sp, #88] // 16-byte Folded Reload +; CHECK64-NEXT: ldp x29, x30, [sp, #64] // 16-byte Folded Reload +; CHECK64-NEXT: add sp, sp, #128 +; CHECK64-NEXT: .cfi_def_cfa_offset 0 +; CHECK64-NEXT: .cfi_restore w19 +; CHECK64-NEXT: .cfi_restore w26 +; CHECK64-NEXT: .cfi_restore w27 +; CHECK64-NEXT: .cfi_restore w28 +; CHECK64-NEXT: .cfi_restore w30 +; CHECK64-NEXT: .cfi_restore w29 +; CHECK64-NEXT: ret +; +; CHECK1024-LABEL: svecc_call_realign: +; CHECK1024: // %bb.0: // %entry +; CHECK1024-NEXT: sub sp, sp, #1088 +; CHECK1024-NEXT: .cfi_def_cfa_offset 1088 +; CHECK1024-NEXT: cntd x9 +; CHECK1024-NEXT: str x29, [sp, #1024] // 8-byte Folded Spill +; CHECK1024-NEXT: str x30, [sp, #1032] // 8-byte Folded Spill +; CHECK1024-NEXT: str x9, [sp, #1040] // 8-byte Folded Spill +; CHECK1024-NEXT: str x28, [sp, #1048] // 8-byte Folded Spill +; CHECK1024-NEXT: str x27, [sp, #1056] // 8-byte Folded Spill +; CHECK1024-NEXT: str x26, [sp, #1064] // 8-byte Folded Spill +; CHECK1024-NEXT: str x19, [sp, #1072] // 8-byte Folded Spill +; CHECK1024-NEXT: add x29, sp, #1024 +; CHECK1024-NEXT: .cfi_def_cfa w29, 64 +; CHECK1024-NEXT: .cfi_offset w19, -16 +; CHECK1024-NEXT: .cfi_offset w26, -24 +; CHECK1024-NEXT: .cfi_offset w27, -32 +; CHECK1024-NEXT: .cfi_offset w28, -40 +; CHECK1024-NEXT: .cfi_offset w30, -56 +; CHECK1024-NEXT: .cfi_offset w29, -64 +; CHECK1024-NEXT: addvl sp, sp, #-18 +; CHECK1024-NEXT: str p15, [sp, #4, mul vl] // 2-byte Folded Spill +; CHECK1024-NEXT: str p14, [sp, #5, mul vl] // 2-byte Folded Spill +; CHECK1024-NEXT: str p13, [sp, #6, mul vl] // 2-byte Folded Spill +; CHECK1024-NEXT: str p12, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK1024-NEXT: str p11, [sp, #8, mul vl] // 2-byte Folded Spill +; CHECK1024-NEXT: str p10, [sp, #9, mul vl] // 2-byte Folded Spill +; CHECK1024-NEXT: str p9, [sp, #10, mul vl] // 2-byte Folded Spill +; CHECK1024-NEXT: str p8, [sp, #11, mul vl] // 2-byte Folded Spill +; CHECK1024-NEXT: str p7, [sp, #12, mul vl] // 2-byte Folded Spill +; CHECK1024-NEXT: str p6, [sp, #13, mul vl] // 2-byte Folded Spill +; CHECK1024-NEXT: str p5, [sp, #14, mul vl] // 2-byte Folded Spill +; CHECK1024-NEXT: str p4, [sp, #15, mul vl] // 2-byte Folded Spill +; CHECK1024-NEXT: str z23, [sp, #2, mul vl] // 16-byte Folded Spill +; CHECK1024-NEXT: str z22, [sp, #3, mul vl] // 16-byte Folded Spill +; CHECK1024-NEXT: str z21, [sp, #4, mul vl] // 16-byte Folded Spill +; CHECK1024-NEXT: str z20, [sp, #5, mul vl] // 16-byte Folded Spill +; CHECK1024-NEXT: str z19, [sp, #6, mul vl] // 16-byte Folded Spill +; CHECK1024-NEXT: str z18, [sp, #7, mul vl] // 16-byte Folded Spill +; CHECK1024-NEXT: str z17, [sp, #8, mul vl] // 16-byte Folded Spill +; CHECK1024-NEXT: str z16, [sp, #9, mul vl] // 16-byte Folded Spill +; CHECK1024-NEXT: str z15, [sp, #10, mul vl] // 16-byte Folded Spill +; CHECK1024-NEXT: str z14, [sp, #11, mul vl] // 16-byte Folded Spill +; CHECK1024-NEXT: str z13, [sp, #12, mul vl] // 16-byte Folded Spill +; CHECK1024-NEXT: str z12, [sp, #13, mul vl] // 16-byte Folded Spill +; CHECK1024-NEXT: str z11, [sp, #14, mul vl] // 16-byte Folded Spill +; CHECK1024-NEXT: str z10, [sp, #15, mul vl] // 16-byte Folded Spill +; CHECK1024-NEXT: str z9, [sp, #16, mul vl] // 16-byte Folded Spill +; CHECK1024-NEXT: str z8, [sp, #17, mul vl] // 16-byte Folded Spill +; CHECK1024-NEXT: .cfi_escape 0x10, 0x48, 0x0b, 0x11, 0xc0, 0x77, 0x22, 0x11, 0x78, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d8 @ cfa - 1088 - 8 * VG +; CHECK1024-NEXT: .cfi_escape 0x10, 0x49, 0x0b, 0x11, 0xc0, 0x77, 0x22, 0x11, 0x70, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d9 @ cfa - 1088 - 16 * VG +; CHECK1024-NEXT: .cfi_escape 0x10, 0x4a, 0x0b, 0x11, 0xc0, 0x77, 0x22, 0x11, 0x68, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d10 @ cfa - 1088 - 24 * VG +; CHECK1024-NEXT: .cfi_escape 0x10, 0x4b, 0x0b, 0x11, 0xc0, 0x77, 0x22, 0x11, 0x60, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d11 @ cfa - 1088 - 32 * VG +; CHECK1024-NEXT: .cfi_escape 0x10, 0x4c, 0x0b, 0x11, 0xc0, 0x77, 0x22, 0x11, 0x58, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d12 @ cfa - 1088 - 40 * VG +; CHECK1024-NEXT: .cfi_escape 0x10, 0x4d, 0x0b, 0x11, 0xc0, 0x77, 0x22, 0x11, 0x50, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d13 @ cfa - 1088 - 48 * VG +; CHECK1024-NEXT: .cfi_escape 0x10, 0x4e, 0x0b, 0x11, 0xc0, 0x77, 0x22, 0x11, 0x48, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d14 @ cfa - 1088 - 56 * VG +; CHECK1024-NEXT: .cfi_escape 0x10, 0x4f, 0x0b, 0x11, 0xc0, 0x77, 0x22, 0x11, 0x40, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d15 @ cfa - 1088 - 64 * VG +; CHECK1024-NEXT: sub x9, sp, #2048 +; CHECK1024-NEXT: and sp, x9, #0xffffffffffffffe0 +; CHECK1024-NEXT: mov w2, w1 +; CHECK1024-NEXT: //APP +; CHECK1024-NEXT: //NO_APP +; CHECK1024-NEXT: bl __arm_sme_state +; CHECK1024-NEXT: and x19, x0, #0x1 +; CHECK1024-NEXT: .cfi_offset vg, -48 +; CHECK1024-NEXT: tbz w19, #0, .LBB36_2 +; CHECK1024-NEXT: // %bb.1: // %entry +; CHECK1024-NEXT: smstop sm +; CHECK1024-NEXT: .LBB36_2: // %entry +; CHECK1024-NEXT: mov x0, sp +; CHECK1024-NEXT: mov w1, #45 // =0x2d +; CHECK1024-NEXT: bl memset +; CHECK1024-NEXT: tbz w19, #0, .LBB36_4 +; CHECK1024-NEXT: // %bb.3: // %entry +; CHECK1024-NEXT: smstart sm +; CHECK1024-NEXT: .LBB36_4: // %entry +; CHECK1024-NEXT: mov w0, #22647 // =0x5877 +; CHECK1024-NEXT: movk w0, #59491, lsl #16 +; CHECK1024-NEXT: .cfi_restore vg +; CHECK1024-NEXT: sub x8, x29, #1024 +; CHECK1024-NEXT: addvl sp, x8, #-18 +; CHECK1024-NEXT: ldr z23, [sp, #2, mul vl] // 16-byte Folded Reload +; CHECK1024-NEXT: ldr z22, [sp, #3, mul vl] // 16-byte Folded Reload +; CHECK1024-NEXT: ldr z21, [sp, #4, mul vl] // 16-byte Folded Reload +; CHECK1024-NEXT: ldr z20, [sp, #5, mul vl] // 16-byte Folded Reload +; CHECK1024-NEXT: ldr z19, [sp, #6, mul vl] // 16-byte Folded Reload +; CHECK1024-NEXT: ldr z18, [sp, #7, mul vl] // 16-byte Folded Reload +; CHECK1024-NEXT: ldr z17, [sp, #8, mul vl] // 16-byte Folded Reload +; CHECK1024-NEXT: ldr z16, [sp, #9, mul vl] // 16-byte Folded Reload +; CHECK1024-NEXT: ldr z15, [sp, #10, mul vl] // 16-byte Folded Reload +; CHECK1024-NEXT: ldr z14, [sp, #11, mul vl] // 16-byte Folded Reload +; CHECK1024-NEXT: ldr z13, [sp, #12, mul vl] // 16-byte Folded Reload +; CHECK1024-NEXT: ldr z12, [sp, #13, mul vl] // 16-byte Folded Reload +; CHECK1024-NEXT: ldr z11, [sp, #14, mul vl] // 16-byte Folded Reload +; CHECK1024-NEXT: ldr z10, [sp, #15, mul vl] // 16-byte Folded Reload +; CHECK1024-NEXT: ldr z9, [sp, #16, mul vl] // 16-byte Folded Reload +; CHECK1024-NEXT: ldr z8, [sp, #17, mul vl] // 16-byte Folded Reload +; CHECK1024-NEXT: ldr p15, [sp, #4, mul vl] // 2-byte Folded Reload +; CHECK1024-NEXT: ldr p14, [sp, #5, mul vl] // 2-byte Folded Reload +; CHECK1024-NEXT: ldr p13, [sp, #6, mul vl] // 2-byte Folded Reload +; CHECK1024-NEXT: ldr p12, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK1024-NEXT: ldr p11, [sp, #8, mul vl] // 2-byte Folded Reload +; CHECK1024-NEXT: ldr p10, [sp, #9, mul vl] // 2-byte Folded Reload +; CHECK1024-NEXT: ldr p9, [sp, #10, mul vl] // 2-byte Folded Reload +; CHECK1024-NEXT: ldr p8, [sp, #11, mul vl] // 2-byte Folded Reload +; CHECK1024-NEXT: ldr p7, [sp, #12, mul vl] // 2-byte Folded Reload +; CHECK1024-NEXT: ldr p6, [sp, #13, mul vl] // 2-byte Folded Reload +; CHECK1024-NEXT: ldr p5, [sp, #14, mul vl] // 2-byte Folded Reload +; CHECK1024-NEXT: ldr p4, [sp, #15, mul vl] // 2-byte Folded Reload +; CHECK1024-NEXT: .cfi_restore z8 +; CHECK1024-NEXT: .cfi_restore z9 +; CHECK1024-NEXT: .cfi_restore z10 +; CHECK1024-NEXT: .cfi_restore z11 +; CHECK1024-NEXT: .cfi_restore z12 +; CHECK1024-NEXT: .cfi_restore z13 +; CHECK1024-NEXT: .cfi_restore z14 +; CHECK1024-NEXT: .cfi_restore z15 +; CHECK1024-NEXT: sub sp, x29, #1024 +; CHECK1024-NEXT: .cfi_def_cfa wsp, 1088 +; CHECK1024-NEXT: ldr x19, [sp, #1072] // 8-byte Folded Reload +; CHECK1024-NEXT: ldr x26, [sp, #1064] // 8-byte Folded Reload +; CHECK1024-NEXT: ldr x27, [sp, #1056] // 8-byte Folded Reload +; CHECK1024-NEXT: ldr x28, [sp, #1048] // 8-byte Folded Reload +; CHECK1024-NEXT: ldr x30, [sp, #1032] // 8-byte Folded Reload +; CHECK1024-NEXT: ldr x29, [sp, #1024] // 8-byte Folded Reload +; CHECK1024-NEXT: add sp, sp, #1088 +; CHECK1024-NEXT: .cfi_def_cfa_offset 0 +; CHECK1024-NEXT: .cfi_restore w19 +; CHECK1024-NEXT: .cfi_restore w26 +; CHECK1024-NEXT: .cfi_restore w27 +; CHECK1024-NEXT: .cfi_restore w28 +; CHECK1024-NEXT: .cfi_restore w30 +; CHECK1024-NEXT: .cfi_restore w29 +; CHECK1024-NEXT: ret +entry: + %ptr = alloca i8, i32 1000, align 32 + tail call void asm sideeffect "", "~{x0},~{x28},~{x27},~{x3}"() #2 + %call = call ptr @memset(ptr noundef nonnull %ptr, i32 noundef 45, i32 noundef %P2) + ret i32 -396142473 +} + + +define i32 @svecc_call_dynamic_and_scalable_alloca(<4 x i16> %P0, i32 %P1, i32 %P2, %P3, i16 %P4) "aarch64_pstate_sm_compatible" { +; CHECK0-LABEL: svecc_call_dynamic_and_scalable_alloca: +; CHECK0: // %bb.0: // %entry +; CHECK0-NEXT: stp x29, x30, [sp, #-64]! // 16-byte Folded Spill +; CHECK0-NEXT: str x28, [sp, #16] // 8-byte Folded Spill +; CHECK0-NEXT: mov x29, sp +; CHECK0-NEXT: stp x27, x26, [sp, #32] // 16-byte Folded Spill +; CHECK0-NEXT: stp x20, x19, [sp, #48] // 16-byte Folded Spill +; CHECK0-NEXT: addvl sp, sp, #-18 +; CHECK0-NEXT: str p15, [sp, #4, mul vl] // 2-byte Folded Spill +; CHECK0-NEXT: str p14, [sp, #5, mul vl] // 2-byte Folded Spill +; CHECK0-NEXT: str p13, [sp, #6, mul vl] // 2-byte Folded Spill +; CHECK0-NEXT: str p12, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK0-NEXT: str p11, [sp, #8, mul vl] // 2-byte Folded Spill +; CHECK0-NEXT: str p10, [sp, #9, mul vl] // 2-byte Folded Spill +; CHECK0-NEXT: str p9, [sp, #10, mul vl] // 2-byte Folded Spill +; CHECK0-NEXT: str p8, [sp, #11, mul vl] // 2-byte Folded Spill +; CHECK0-NEXT: str p7, [sp, #12, mul vl] // 2-byte Folded Spill +; CHECK0-NEXT: str p6, [sp, #13, mul vl] // 2-byte Folded Spill +; CHECK0-NEXT: str p5, [sp, #14, mul vl] // 2-byte Folded Spill +; CHECK0-NEXT: str p4, [sp, #15, mul vl] // 2-byte Folded Spill +; CHECK0-NEXT: str z23, [sp, #2, mul vl] // 16-byte Folded Spill +; CHECK0-NEXT: str z22, [sp, #3, mul vl] // 16-byte Folded Spill +; CHECK0-NEXT: str z21, [sp, #4, mul vl] // 16-byte Folded Spill +; CHECK0-NEXT: str z20, [sp, #5, mul vl] // 16-byte Folded Spill +; CHECK0-NEXT: str z19, [sp, #6, mul vl] // 16-byte Folded Spill +; CHECK0-NEXT: str z18, [sp, #7, mul vl] // 16-byte Folded Spill +; CHECK0-NEXT: str z17, [sp, #8, mul vl] // 16-byte Folded Spill +; CHECK0-NEXT: str z16, [sp, #9, mul vl] // 16-byte Folded Spill +; CHECK0-NEXT: str z15, [sp, #10, mul vl] // 16-byte Folded Spill +; CHECK0-NEXT: str z14, [sp, #11, mul vl] // 16-byte Folded Spill +; CHECK0-NEXT: str z13, [sp, #12, mul vl] // 16-byte Folded Spill +; CHECK0-NEXT: str z12, [sp, #13, mul vl] // 16-byte Folded Spill +; CHECK0-NEXT: str z11, [sp, #14, mul vl] // 16-byte Folded Spill +; CHECK0-NEXT: str z10, [sp, #15, mul vl] // 16-byte Folded Spill +; CHECK0-NEXT: str z9, [sp, #16, mul vl] // 16-byte Folded Spill +; CHECK0-NEXT: str z8, [sp, #17, mul vl] // 16-byte Folded Spill +; CHECK0-NEXT: sub sp, sp, #48 +; CHECK0-NEXT: addvl sp, sp, #-1 +; CHECK0-NEXT: mov x19, sp +; CHECK0-NEXT: .cfi_def_cfa w29, 64 +; CHECK0-NEXT: .cfi_offset w19, -8 +; CHECK0-NEXT: .cfi_offset w20, -16 +; CHECK0-NEXT: .cfi_offset w26, -24 +; CHECK0-NEXT: .cfi_offset w27, -32 +; CHECK0-NEXT: .cfi_offset w28, -48 +; CHECK0-NEXT: .cfi_offset w30, -56 +; CHECK0-NEXT: .cfi_offset w29, -64 +; CHECK0-NEXT: .cfi_escape 0x10, 0x48, 0x0a, 0x11, 0x40, 0x22, 0x11, 0x78, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d8 @ cfa - 64 - 8 * VG +; CHECK0-NEXT: .cfi_escape 0x10, 0x49, 0x0a, 0x11, 0x40, 0x22, 0x11, 0x70, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d9 @ cfa - 64 - 16 * VG +; CHECK0-NEXT: .cfi_escape 0x10, 0x4a, 0x0a, 0x11, 0x40, 0x22, 0x11, 0x68, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d10 @ cfa - 64 - 24 * VG +; CHECK0-NEXT: .cfi_escape 0x10, 0x4b, 0x0a, 0x11, 0x40, 0x22, 0x11, 0x60, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d11 @ cfa - 64 - 32 * VG +; CHECK0-NEXT: .cfi_escape 0x10, 0x4c, 0x0a, 0x11, 0x40, 0x22, 0x11, 0x58, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d12 @ cfa - 64 - 40 * VG +; CHECK0-NEXT: .cfi_escape 0x10, 0x4d, 0x0a, 0x11, 0x40, 0x22, 0x11, 0x50, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d13 @ cfa - 64 - 48 * VG +; CHECK0-NEXT: .cfi_escape 0x10, 0x4e, 0x0a, 0x11, 0x40, 0x22, 0x11, 0x48, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d14 @ cfa - 64 - 56 * VG +; CHECK0-NEXT: .cfi_escape 0x10, 0x4f, 0x0a, 0x11, 0x40, 0x22, 0x11, 0x40, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d15 @ cfa - 64 - 64 * VG +; CHECK0-NEXT: // kill: def $w0 killed $w0 def $x0 +; CHECK0-NEXT: ubfiz x8, x0, #2, #32 +; CHECK0-NEXT: mov x9, sp +; CHECK0-NEXT: add x8, x8, #15 +; CHECK0-NEXT: and x8, x8, #0x7fffffff0 +; CHECK0-NEXT: sub x20, x9, x8 +; CHECK0-NEXT: mov sp, x20 +; CHECK0-NEXT: //APP +; CHECK0-NEXT: //NO_APP +; CHECK0-NEXT: add x0, x19, #8 +; CHECK0-NEXT: bl bar +; CHECK0-NEXT: addvl x0, x29, #-19 +; CHECK0-NEXT: bl bar +; CHECK0-NEXT: mov x0, x20 +; CHECK0-NEXT: bl bar +; CHECK0-NEXT: mov w0, #22647 // =0x5877 +; CHECK0-NEXT: movk w0, #59491, lsl #16 +; CHECK0-NEXT: addvl sp, x29, #-18 +; CHECK0-NEXT: ldr z23, [sp, #2, mul vl] // 16-byte Folded Reload +; CHECK0-NEXT: ldr z22, [sp, #3, mul vl] // 16-byte Folded Reload +; CHECK0-NEXT: ldr z21, [sp, #4, mul vl] // 16-byte Folded Reload +; CHECK0-NEXT: ldr z20, [sp, #5, mul vl] // 16-byte Folded Reload +; CHECK0-NEXT: ldr z19, [sp, #6, mul vl] // 16-byte Folded Reload +; CHECK0-NEXT: ldr z18, [sp, #7, mul vl] // 16-byte Folded Reload +; CHECK0-NEXT: ldr z17, [sp, #8, mul vl] // 16-byte Folded Reload +; CHECK0-NEXT: ldr z16, [sp, #9, mul vl] // 16-byte Folded Reload +; CHECK0-NEXT: ldr z15, [sp, #10, mul vl] // 16-byte Folded Reload +; CHECK0-NEXT: ldr z14, [sp, #11, mul vl] // 16-byte Folded Reload +; CHECK0-NEXT: ldr z13, [sp, #12, mul vl] // 16-byte Folded Reload +; CHECK0-NEXT: ldr z12, [sp, #13, mul vl] // 16-byte Folded Reload +; CHECK0-NEXT: ldr z11, [sp, #14, mul vl] // 16-byte Folded Reload +; CHECK0-NEXT: ldr z10, [sp, #15, mul vl] // 16-byte Folded Reload +; CHECK0-NEXT: ldr z9, [sp, #16, mul vl] // 16-byte Folded Reload +; CHECK0-NEXT: ldr z8, [sp, #17, mul vl] // 16-byte Folded Reload +; CHECK0-NEXT: ldr p15, [sp, #4, mul vl] // 2-byte Folded Reload +; CHECK0-NEXT: ldr p14, [sp, #5, mul vl] // 2-byte Folded Reload +; CHECK0-NEXT: ldr p13, [sp, #6, mul vl] // 2-byte Folded Reload +; CHECK0-NEXT: ldr p12, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK0-NEXT: ldr p11, [sp, #8, mul vl] // 2-byte Folded Reload +; CHECK0-NEXT: ldr p10, [sp, #9, mul vl] // 2-byte Folded Reload +; CHECK0-NEXT: ldr p9, [sp, #10, mul vl] // 2-byte Folded Reload +; CHECK0-NEXT: ldr p8, [sp, #11, mul vl] // 2-byte Folded Reload +; CHECK0-NEXT: ldr p7, [sp, #12, mul vl] // 2-byte Folded Reload +; CHECK0-NEXT: ldr p6, [sp, #13, mul vl] // 2-byte Folded Reload +; CHECK0-NEXT: ldr p5, [sp, #14, mul vl] // 2-byte Folded Reload +; CHECK0-NEXT: ldr p4, [sp, #15, mul vl] // 2-byte Folded Reload +; CHECK0-NEXT: mov sp, x29 +; CHECK0-NEXT: ldp x20, x19, [sp, #48] // 16-byte Folded Reload +; CHECK0-NEXT: ldr x28, [sp, #16] // 8-byte Folded Reload +; CHECK0-NEXT: ldp x27, x26, [sp, #32] // 16-byte Folded Reload +; CHECK0-NEXT: ldp x29, x30, [sp], #64 // 16-byte Folded Reload +; CHECK0-NEXT: ret +; +; CHECK64-LABEL: svecc_call_dynamic_and_scalable_alloca: +; CHECK64: // %bb.0: // %entry +; CHECK64-NEXT: sub sp, sp, #128 +; CHECK64-NEXT: stp x29, x30, [sp, #64] // 16-byte Folded Spill +; CHECK64-NEXT: add x29, sp, #64 +; CHECK64-NEXT: stp x28, x27, [sp, #80] // 16-byte Folded Spill +; CHECK64-NEXT: stp x26, x20, [sp, #96] // 16-byte Folded Spill +; CHECK64-NEXT: str x19, [sp, #112] // 8-byte Folded Spill +; CHECK64-NEXT: addvl sp, sp, #-18 +; CHECK64-NEXT: str p15, [sp, #4, mul vl] // 2-byte Folded Spill +; CHECK64-NEXT: str p14, [sp, #5, mul vl] // 2-byte Folded Spill +; CHECK64-NEXT: str p13, [sp, #6, mul vl] // 2-byte Folded Spill +; CHECK64-NEXT: str p12, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK64-NEXT: str p11, [sp, #8, mul vl] // 2-byte Folded Spill +; CHECK64-NEXT: str p10, [sp, #9, mul vl] // 2-byte Folded Spill +; CHECK64-NEXT: str p9, [sp, #10, mul vl] // 2-byte Folded Spill +; CHECK64-NEXT: str p8, [sp, #11, mul vl] // 2-byte Folded Spill +; CHECK64-NEXT: str p7, [sp, #12, mul vl] // 2-byte Folded Spill +; CHECK64-NEXT: str p6, [sp, #13, mul vl] // 2-byte Folded Spill +; CHECK64-NEXT: str p5, [sp, #14, mul vl] // 2-byte Folded Spill +; CHECK64-NEXT: str p4, [sp, #15, mul vl] // 2-byte Folded Spill +; CHECK64-NEXT: str z23, [sp, #2, mul vl] // 16-byte Folded Spill +; CHECK64-NEXT: str z22, [sp, #3, mul vl] // 16-byte Folded Spill +; CHECK64-NEXT: str z21, [sp, #4, mul vl] // 16-byte Folded Spill +; CHECK64-NEXT: str z20, [sp, #5, mul vl] // 16-byte Folded Spill +; CHECK64-NEXT: str z19, [sp, #6, mul vl] // 16-byte Folded Spill +; CHECK64-NEXT: str z18, [sp, #7, mul vl] // 16-byte Folded Spill +; CHECK64-NEXT: str z17, [sp, #8, mul vl] // 16-byte Folded Spill +; CHECK64-NEXT: str z16, [sp, #9, mul vl] // 16-byte Folded Spill +; CHECK64-NEXT: str z15, [sp, #10, mul vl] // 16-byte Folded Spill +; CHECK64-NEXT: str z14, [sp, #11, mul vl] // 16-byte Folded Spill +; CHECK64-NEXT: str z13, [sp, #12, mul vl] // 16-byte Folded Spill +; CHECK64-NEXT: str z12, [sp, #13, mul vl] // 16-byte Folded Spill +; CHECK64-NEXT: str z11, [sp, #14, mul vl] // 16-byte Folded Spill +; CHECK64-NEXT: str z10, [sp, #15, mul vl] // 16-byte Folded Spill +; CHECK64-NEXT: str z9, [sp, #16, mul vl] // 16-byte Folded Spill +; CHECK64-NEXT: str z8, [sp, #17, mul vl] // 16-byte Folded Spill +; CHECK64-NEXT: sub sp, sp, #112 +; CHECK64-NEXT: addvl sp, sp, #-1 +; CHECK64-NEXT: mov x19, sp +; CHECK64-NEXT: .cfi_def_cfa w29, 64 +; CHECK64-NEXT: .cfi_offset w19, -16 +; CHECK64-NEXT: .cfi_offset w20, -24 +; CHECK64-NEXT: .cfi_offset w26, -32 +; CHECK64-NEXT: .cfi_offset w27, -40 +; CHECK64-NEXT: .cfi_offset w28, -48 +; CHECK64-NEXT: .cfi_offset w30, -56 +; CHECK64-NEXT: .cfi_offset w29, -64 +; CHECK64-NEXT: .cfi_escape 0x10, 0x48, 0x0b, 0x11, 0x80, 0x7f, 0x22, 0x11, 0x78, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d8 @ cfa - 128 - 8 * VG +; CHECK64-NEXT: .cfi_escape 0x10, 0x49, 0x0b, 0x11, 0x80, 0x7f, 0x22, 0x11, 0x70, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d9 @ cfa - 128 - 16 * VG +; CHECK64-NEXT: .cfi_escape 0x10, 0x4a, 0x0b, 0x11, 0x80, 0x7f, 0x22, 0x11, 0x68, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d10 @ cfa - 128 - 24 * VG +; CHECK64-NEXT: .cfi_escape 0x10, 0x4b, 0x0b, 0x11, 0x80, 0x7f, 0x22, 0x11, 0x60, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d11 @ cfa - 128 - 32 * VG +; CHECK64-NEXT: .cfi_escape 0x10, 0x4c, 0x0b, 0x11, 0x80, 0x7f, 0x22, 0x11, 0x58, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d12 @ cfa - 128 - 40 * VG +; CHECK64-NEXT: .cfi_escape 0x10, 0x4d, 0x0b, 0x11, 0x80, 0x7f, 0x22, 0x11, 0x50, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d13 @ cfa - 128 - 48 * VG +; CHECK64-NEXT: .cfi_escape 0x10, 0x4e, 0x0b, 0x11, 0x80, 0x7f, 0x22, 0x11, 0x48, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d14 @ cfa - 128 - 56 * VG +; CHECK64-NEXT: .cfi_escape 0x10, 0x4f, 0x0b, 0x11, 0x80, 0x7f, 0x22, 0x11, 0x40, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d15 @ cfa - 128 - 64 * VG +; CHECK64-NEXT: // kill: def $w0 killed $w0 def $x0 +; CHECK64-NEXT: ubfiz x8, x0, #2, #32 +; CHECK64-NEXT: mov x9, sp +; CHECK64-NEXT: add x8, x8, #15 +; CHECK64-NEXT: and x8, x8, #0x7fffffff0 +; CHECK64-NEXT: sub x20, x9, x8 +; CHECK64-NEXT: mov sp, x20 +; CHECK64-NEXT: //APP +; CHECK64-NEXT: //NO_APP +; CHECK64-NEXT: add x0, x19, #8 +; CHECK64-NEXT: bl bar +; CHECK64-NEXT: sub x0, x29, #64 +; CHECK64-NEXT: addvl x0, x0, #-19 +; CHECK64-NEXT: bl bar +; CHECK64-NEXT: mov x0, x20 +; CHECK64-NEXT: bl bar +; CHECK64-NEXT: mov w0, #22647 // =0x5877 +; CHECK64-NEXT: sub x8, x29, #64 +; CHECK64-NEXT: movk w0, #59491, lsl #16 +; CHECK64-NEXT: addvl sp, x8, #-18 +; CHECK64-NEXT: ldr z23, [sp, #2, mul vl] // 16-byte Folded Reload +; CHECK64-NEXT: ldr z22, [sp, #3, mul vl] // 16-byte Folded Reload +; CHECK64-NEXT: ldr z21, [sp, #4, mul vl] // 16-byte Folded Reload +; CHECK64-NEXT: ldr z20, [sp, #5, mul vl] // 16-byte Folded Reload +; CHECK64-NEXT: ldr z19, [sp, #6, mul vl] // 16-byte Folded Reload +; CHECK64-NEXT: ldr z18, [sp, #7, mul vl] // 16-byte Folded Reload +; CHECK64-NEXT: ldr z17, [sp, #8, mul vl] // 16-byte Folded Reload +; CHECK64-NEXT: ldr z16, [sp, #9, mul vl] // 16-byte Folded Reload +; CHECK64-NEXT: ldr z15, [sp, #10, mul vl] // 16-byte Folded Reload +; CHECK64-NEXT: ldr z14, [sp, #11, mul vl] // 16-byte Folded Reload +; CHECK64-NEXT: ldr z13, [sp, #12, mul vl] // 16-byte Folded Reload +; CHECK64-NEXT: ldr z12, [sp, #13, mul vl] // 16-byte Folded Reload +; CHECK64-NEXT: ldr z11, [sp, #14, mul vl] // 16-byte Folded Reload +; CHECK64-NEXT: ldr z10, [sp, #15, mul vl] // 16-byte Folded Reload +; CHECK64-NEXT: ldr z9, [sp, #16, mul vl] // 16-byte Folded Reload +; CHECK64-NEXT: ldr z8, [sp, #17, mul vl] // 16-byte Folded Reload +; CHECK64-NEXT: ldr p15, [sp, #4, mul vl] // 2-byte Folded Reload +; CHECK64-NEXT: ldr p14, [sp, #5, mul vl] // 2-byte Folded Reload +; CHECK64-NEXT: ldr p13, [sp, #6, mul vl] // 2-byte Folded Reload +; CHECK64-NEXT: ldr p12, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK64-NEXT: ldr p11, [sp, #8, mul vl] // 2-byte Folded Reload +; CHECK64-NEXT: ldr p10, [sp, #9, mul vl] // 2-byte Folded Reload +; CHECK64-NEXT: ldr p9, [sp, #10, mul vl] // 2-byte Folded Reload +; CHECK64-NEXT: ldr p8, [sp, #11, mul vl] // 2-byte Folded Reload +; CHECK64-NEXT: ldr p7, [sp, #12, mul vl] // 2-byte Folded Reload +; CHECK64-NEXT: ldr p6, [sp, #13, mul vl] // 2-byte Folded Reload +; CHECK64-NEXT: ldr p5, [sp, #14, mul vl] // 2-byte Folded Reload +; CHECK64-NEXT: ldr p4, [sp, #15, mul vl] // 2-byte Folded Reload +; CHECK64-NEXT: sub sp, x29, #64 +; CHECK64-NEXT: ldp x20, x19, [sp, #104] // 16-byte Folded Reload +; CHECK64-NEXT: ldr x29, [sp, #64] // 8-byte Folded Reload +; CHECK64-NEXT: ldp x27, x26, [sp, #88] // 16-byte Folded Reload +; CHECK64-NEXT: ldp x30, x28, [sp, #72] // 16-byte Folded Reload +; CHECK64-NEXT: add sp, sp, #128 +; CHECK64-NEXT: ret +; +; CHECK1024-LABEL: svecc_call_dynamic_and_scalable_alloca: +; CHECK1024: // %bb.0: // %entry +; CHECK1024-NEXT: sub sp, sp, #1088 +; CHECK1024-NEXT: str x29, [sp, #1024] // 8-byte Folded Spill +; CHECK1024-NEXT: add x29, sp, #1024 +; CHECK1024-NEXT: str x30, [sp, #1032] // 8-byte Folded Spill +; CHECK1024-NEXT: str x28, [sp, #1040] // 8-byte Folded Spill +; CHECK1024-NEXT: str x27, [sp, #1048] // 8-byte Folded Spill +; CHECK1024-NEXT: str x26, [sp, #1056] // 8-byte Folded Spill +; CHECK1024-NEXT: str x20, [sp, #1064] // 8-byte Folded Spill +; CHECK1024-NEXT: str x19, [sp, #1072] // 8-byte Folded Spill +; CHECK1024-NEXT: addvl sp, sp, #-18 +; CHECK1024-NEXT: str p15, [sp, #4, mul vl] // 2-byte Folded Spill +; CHECK1024-NEXT: str p14, [sp, #5, mul vl] // 2-byte Folded Spill +; CHECK1024-NEXT: str p13, [sp, #6, mul vl] // 2-byte Folded Spill +; CHECK1024-NEXT: str p12, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK1024-NEXT: str p11, [sp, #8, mul vl] // 2-byte Folded Spill +; CHECK1024-NEXT: str p10, [sp, #9, mul vl] // 2-byte Folded Spill +; CHECK1024-NEXT: str p9, [sp, #10, mul vl] // 2-byte Folded Spill +; CHECK1024-NEXT: str p8, [sp, #11, mul vl] // 2-byte Folded Spill +; CHECK1024-NEXT: str p7, [sp, #12, mul vl] // 2-byte Folded Spill +; CHECK1024-NEXT: str p6, [sp, #13, mul vl] // 2-byte Folded Spill +; CHECK1024-NEXT: str p5, [sp, #14, mul vl] // 2-byte Folded Spill +; CHECK1024-NEXT: str p4, [sp, #15, mul vl] // 2-byte Folded Spill +; CHECK1024-NEXT: str z23, [sp, #2, mul vl] // 16-byte Folded Spill +; CHECK1024-NEXT: str z22, [sp, #3, mul vl] // 16-byte Folded Spill +; CHECK1024-NEXT: str z21, [sp, #4, mul vl] // 16-byte Folded Spill +; CHECK1024-NEXT: str z20, [sp, #5, mul vl] // 16-byte Folded Spill +; CHECK1024-NEXT: str z19, [sp, #6, mul vl] // 16-byte Folded Spill +; CHECK1024-NEXT: str z18, [sp, #7, mul vl] // 16-byte Folded Spill +; CHECK1024-NEXT: str z17, [sp, #8, mul vl] // 16-byte Folded Spill +; CHECK1024-NEXT: str z16, [sp, #9, mul vl] // 16-byte Folded Spill +; CHECK1024-NEXT: str z15, [sp, #10, mul vl] // 16-byte Folded Spill +; CHECK1024-NEXT: str z14, [sp, #11, mul vl] // 16-byte Folded Spill +; CHECK1024-NEXT: str z13, [sp, #12, mul vl] // 16-byte Folded Spill +; CHECK1024-NEXT: str z12, [sp, #13, mul vl] // 16-byte Folded Spill +; CHECK1024-NEXT: str z11, [sp, #14, mul vl] // 16-byte Folded Spill +; CHECK1024-NEXT: str z10, [sp, #15, mul vl] // 16-byte Folded Spill +; CHECK1024-NEXT: str z9, [sp, #16, mul vl] // 16-byte Folded Spill +; CHECK1024-NEXT: str z8, [sp, #17, mul vl] // 16-byte Folded Spill +; CHECK1024-NEXT: sub sp, sp, #1072 +; CHECK1024-NEXT: addvl sp, sp, #-1 +; CHECK1024-NEXT: mov x19, sp +; CHECK1024-NEXT: .cfi_def_cfa w29, 64 +; CHECK1024-NEXT: .cfi_offset w19, -16 +; CHECK1024-NEXT: .cfi_offset w20, -24 +; CHECK1024-NEXT: .cfi_offset w26, -32 +; CHECK1024-NEXT: .cfi_offset w27, -40 +; CHECK1024-NEXT: .cfi_offset w28, -48 +; CHECK1024-NEXT: .cfi_offset w30, -56 +; CHECK1024-NEXT: .cfi_offset w29, -64 +; CHECK1024-NEXT: .cfi_escape 0x10, 0x48, 0x0b, 0x11, 0xc0, 0x77, 0x22, 0x11, 0x78, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d8 @ cfa - 1088 - 8 * VG +; CHECK1024-NEXT: .cfi_escape 0x10, 0x49, 0x0b, 0x11, 0xc0, 0x77, 0x22, 0x11, 0x70, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d9 @ cfa - 1088 - 16 * VG +; CHECK1024-NEXT: .cfi_escape 0x10, 0x4a, 0x0b, 0x11, 0xc0, 0x77, 0x22, 0x11, 0x68, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d10 @ cfa - 1088 - 24 * VG +; CHECK1024-NEXT: .cfi_escape 0x10, 0x4b, 0x0b, 0x11, 0xc0, 0x77, 0x22, 0x11, 0x60, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d11 @ cfa - 1088 - 32 * VG +; CHECK1024-NEXT: .cfi_escape 0x10, 0x4c, 0x0b, 0x11, 0xc0, 0x77, 0x22, 0x11, 0x58, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d12 @ cfa - 1088 - 40 * VG +; CHECK1024-NEXT: .cfi_escape 0x10, 0x4d, 0x0b, 0x11, 0xc0, 0x77, 0x22, 0x11, 0x50, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d13 @ cfa - 1088 - 48 * VG +; CHECK1024-NEXT: .cfi_escape 0x10, 0x4e, 0x0b, 0x11, 0xc0, 0x77, 0x22, 0x11, 0x48, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d14 @ cfa - 1088 - 56 * VG +; CHECK1024-NEXT: .cfi_escape 0x10, 0x4f, 0x0b, 0x11, 0xc0, 0x77, 0x22, 0x11, 0x40, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d15 @ cfa - 1088 - 64 * VG +; CHECK1024-NEXT: // kill: def $w0 killed $w0 def $x0 +; CHECK1024-NEXT: ubfiz x8, x0, #2, #32 +; CHECK1024-NEXT: mov x9, sp +; CHECK1024-NEXT: add x8, x8, #15 +; CHECK1024-NEXT: and x8, x8, #0x7fffffff0 +; CHECK1024-NEXT: sub x20, x9, x8 +; CHECK1024-NEXT: mov sp, x20 +; CHECK1024-NEXT: //APP +; CHECK1024-NEXT: //NO_APP +; CHECK1024-NEXT: add x0, x19, #8 +; CHECK1024-NEXT: bl bar +; CHECK1024-NEXT: sub x0, x29, #1024 +; CHECK1024-NEXT: addvl x0, x0, #-19 +; CHECK1024-NEXT: bl bar +; CHECK1024-NEXT: mov x0, x20 +; CHECK1024-NEXT: bl bar +; CHECK1024-NEXT: mov w0, #22647 // =0x5877 +; CHECK1024-NEXT: sub x8, x29, #1024 +; CHECK1024-NEXT: movk w0, #59491, lsl #16 +; CHECK1024-NEXT: addvl sp, x8, #-18 +; CHECK1024-NEXT: ldr z23, [sp, #2, mul vl] // 16-byte Folded Reload +; CHECK1024-NEXT: ldr z22, [sp, #3, mul vl] // 16-byte Folded Reload +; CHECK1024-NEXT: ldr z21, [sp, #4, mul vl] // 16-byte Folded Reload +; CHECK1024-NEXT: ldr z20, [sp, #5, mul vl] // 16-byte Folded Reload +; CHECK1024-NEXT: ldr z19, [sp, #6, mul vl] // 16-byte Folded Reload +; CHECK1024-NEXT: ldr z18, [sp, #7, mul vl] // 16-byte Folded Reload +; CHECK1024-NEXT: ldr z17, [sp, #8, mul vl] // 16-byte Folded Reload +; CHECK1024-NEXT: ldr z16, [sp, #9, mul vl] // 16-byte Folded Reload +; CHECK1024-NEXT: ldr z15, [sp, #10, mul vl] // 16-byte Folded Reload +; CHECK1024-NEXT: ldr z14, [sp, #11, mul vl] // 16-byte Folded Reload +; CHECK1024-NEXT: ldr z13, [sp, #12, mul vl] // 16-byte Folded Reload +; CHECK1024-NEXT: ldr z12, [sp, #13, mul vl] // 16-byte Folded Reload +; CHECK1024-NEXT: ldr z11, [sp, #14, mul vl] // 16-byte Folded Reload +; CHECK1024-NEXT: ldr z10, [sp, #15, mul vl] // 16-byte Folded Reload +; CHECK1024-NEXT: ldr z9, [sp, #16, mul vl] // 16-byte Folded Reload +; CHECK1024-NEXT: ldr z8, [sp, #17, mul vl] // 16-byte Folded Reload +; CHECK1024-NEXT: ldr p15, [sp, #4, mul vl] // 2-byte Folded Reload +; CHECK1024-NEXT: ldr p14, [sp, #5, mul vl] // 2-byte Folded Reload +; CHECK1024-NEXT: ldr p13, [sp, #6, mul vl] // 2-byte Folded Reload +; CHECK1024-NEXT: ldr p12, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK1024-NEXT: ldr p11, [sp, #8, mul vl] // 2-byte Folded Reload +; CHECK1024-NEXT: ldr p10, [sp, #9, mul vl] // 2-byte Folded Reload +; CHECK1024-NEXT: ldr p9, [sp, #10, mul vl] // 2-byte Folded Reload +; CHECK1024-NEXT: ldr p8, [sp, #11, mul vl] // 2-byte Folded Reload +; CHECK1024-NEXT: ldr p7, [sp, #12, mul vl] // 2-byte Folded Reload +; CHECK1024-NEXT: ldr p6, [sp, #13, mul vl] // 2-byte Folded Reload +; CHECK1024-NEXT: ldr p5, [sp, #14, mul vl] // 2-byte Folded Reload +; CHECK1024-NEXT: ldr p4, [sp, #15, mul vl] // 2-byte Folded Reload +; CHECK1024-NEXT: sub sp, x29, #1024 +; CHECK1024-NEXT: ldr x19, [sp, #1072] // 8-byte Folded Reload +; CHECK1024-NEXT: ldr x20, [sp, #1064] // 8-byte Folded Reload +; CHECK1024-NEXT: ldr x26, [sp, #1056] // 8-byte Folded Reload +; CHECK1024-NEXT: ldr x27, [sp, #1048] // 8-byte Folded Reload +; CHECK1024-NEXT: ldr x28, [sp, #1040] // 8-byte Folded Reload +; CHECK1024-NEXT: ldr x30, [sp, #1032] // 8-byte Folded Reload +; CHECK1024-NEXT: ldr x29, [sp, #1024] // 8-byte Folded Reload +; CHECK1024-NEXT: add sp, sp, #1088 +; CHECK1024-NEXT: ret +entry: + %a = alloca i32, i32 10 + %b = alloca + %c = alloca i32, i32 %P1, align 4 + tail call void asm sideeffect "", "~{x0},~{x28},~{x27},~{x3}"() #2 + call void @bar(ptr noundef nonnull %a) + call void @bar(ptr noundef nonnull %b) + call void @bar(ptr noundef nonnull %c) + ret i32 -396142473 +} -- cgit v1.2.3 From 34a48941498d95ec2682f7adaeb6115b7b4d70ba Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Wed, 18 Jun 2025 13:06:49 +0100 Subject: [X86] detectZextAbsDiff - use SDPatternMatch::m_Abs() matcher. NFC. --- llvm/lib/Target/X86/X86ISelLowering.cpp | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 1ca5fc5376f0..4751361c71f2 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -46009,11 +46009,9 @@ static bool detectZextAbsDiff(SDValue Abs, SDValue &Op0, SDValue &Op1) { // Check if the operands of the sub are zero-extended from vectors of i8. EVT SrcVT0, SrcVT1; - return sd_match( - Abs, - m_UnaryOp(ISD::ABS, - m_Sub(m_AllOf(m_Value(Op0), m_ZExt(m_VT(SrcVT0))), - m_AllOf(m_Value(Op1), m_ZExt(m_VT(SrcVT1)))))) && + return sd_match(Abs, + m_Abs(m_Sub(m_AllOf(m_Value(Op0), m_ZExt(m_VT(SrcVT0))), + m_AllOf(m_Value(Op1), m_ZExt(m_VT(SrcVT1)))))) && SrcVT0.getVectorElementType() == MVT::i8 && SrcVT1.getVectorElementType() == MVT::i8; } -- cgit v1.2.3 From 7c15edb306932e41c159f3d69c161ed0d89d47b7 Mon Sep 17 00:00:00 2001 From: Timm Baeder Date: Wed, 18 Jun 2025 14:37:29 +0200 Subject: =?UTF-8?q?Reapply=20"[clang][bytecode]=20Allocate=20IntegralAP=20?= =?UTF-8?q?and=20Floating=20types=20usi=E2=80=A6=20(#144676)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit …ng an allocator (#144246)" This reverts commit 57828fec760f086b334ce0cb1c465fc559dcaea4. --- clang/lib/AST/ByteCode/Compiler.cpp | 112 ++++--- clang/lib/AST/ByteCode/Compiler.h | 1 + clang/lib/AST/ByteCode/Descriptor.cpp | 2 +- clang/lib/AST/ByteCode/Disasm.cpp | 60 ++-- clang/lib/AST/ByteCode/Floating.h | 252 ++++++++++----- clang/lib/AST/ByteCode/Integral.h | 3 + clang/lib/AST/ByteCode/IntegralAP.h | 234 ++++++++------ clang/lib/AST/ByteCode/Interp.cpp | 106 ++++++- clang/lib/AST/ByteCode/Interp.h | 337 +++++++++++++++++---- clang/lib/AST/ByteCode/InterpBuiltin.cpp | 55 +++- clang/lib/AST/ByteCode/InterpBuiltinBitCast.cpp | 4 +- clang/lib/AST/ByteCode/InterpState.h | 30 ++ clang/lib/AST/ByteCode/Opcodes.td | 14 +- clang/lib/AST/ByteCode/PrimType.h | 17 ++ clang/lib/AST/ByteCode/Program.h | 24 +- .../AST/ByteCode/builtin-bit-cast-long-double.cpp | 10 +- clang/test/AST/ByteCode/builtin-functions.cpp | 12 +- 17 files changed, 930 insertions(+), 343 deletions(-) diff --git a/clang/lib/AST/ByteCode/Compiler.cpp b/clang/lib/AST/ByteCode/Compiler.cpp index 9fe4803ce98e..3f884ed8d094 100644 --- a/clang/lib/AST/ByteCode/Compiler.cpp +++ b/clang/lib/AST/ByteCode/Compiler.cpp @@ -748,7 +748,8 @@ bool Compiler::VisitFloatingLiteral(const FloatingLiteral *E) { if (DiscardResult) return true; - return this->emitConstFloat(E->getValue(), E); + APFloat F = E->getValue(); + return this->emitFloat(F, E); } template @@ -4185,8 +4186,10 @@ bool Compiler::visitZeroInitializer(PrimType T, QualType QT, nullptr, E); case PT_MemberPtr: return this->emitNullMemberPtr(0, nullptr, E); - case PT_Float: - return this->emitConstFloat(APFloat::getZero(Ctx.getFloatSemantics(QT)), E); + case PT_Float: { + APFloat F = APFloat::getZero(Ctx.getFloatSemantics(QT)); + return this->emitFloat(F, E); + } case PT_FixedPoint: { auto Sem = Ctx.getASTContext().getFixedPointSemantics(E->getType()); return this->emitConstFixedPoint(FixedPoint::zero(Sem), E); @@ -4674,10 +4677,7 @@ VarCreationState Compiler::visitVarDecl(const VarDecl *VD, if (!visitInitializer(Init)) return false; - if (!this->emitFinishInit(Init)) - return false; - - return this->emitPopPtr(Init); + return this->emitFinishInitGlobal(Init); }; DeclScope LocalScope(this, VD); @@ -4698,51 +4698,45 @@ VarCreationState Compiler::visitVarDecl(const VarDecl *VD, return false; return !Init || (checkDecl() && initGlobal(*GlobalIndex)); - } else { - InitLinkScope ILS(this, InitLink::Decl(VD)); - - if (VarT) { - unsigned Offset = this->allocateLocalPrimitive( - VD, *VarT, VD->getType().isConstQualified(), nullptr, - ScopeKind::Block, IsConstexprUnknown); - if (Init) { - // If this is a toplevel declaration, create a scope for the - // initializer. - if (Toplevel) { - LocalScope Scope(this); - if (!this->visit(Init)) - return false; - return this->emitSetLocal(*VarT, Offset, VD) && Scope.destroyLocals(); - } else { - if (!this->visit(Init)) - return false; - return this->emitSetLocal(*VarT, Offset, VD); - } - } - } else { - if (std::optional Offset = - this->allocateLocal(VD, VD->getType(), nullptr, ScopeKind::Block, - IsConstexprUnknown)) { - if (!Init) - return true; + } + // Local variables. + InitLinkScope ILS(this, InitLink::Decl(VD)); - if (!this->emitGetPtrLocal(*Offset, Init)) + if (VarT) { + unsigned Offset = this->allocateLocalPrimitive( + VD, *VarT, VD->getType().isConstQualified(), nullptr, ScopeKind::Block, + IsConstexprUnknown); + if (Init) { + // If this is a toplevel declaration, create a scope for the + // initializer. + if (Toplevel) { + LocalScope Scope(this); + if (!this->visit(Init)) return false; - - if (!visitInitializer(Init)) + return this->emitSetLocal(*VarT, Offset, VD) && Scope.destroyLocals(); + } else { + if (!this->visit(Init)) return false; + return this->emitSetLocal(*VarT, Offset, VD); + } + } + } else { + if (std::optional Offset = this->allocateLocal( + VD, VD->getType(), nullptr, ScopeKind::Block, IsConstexprUnknown)) { + if (!Init) + return true; - if (!this->emitFinishInit(Init)) - return false; + if (!this->emitGetPtrLocal(*Offset, Init)) + return false; - return this->emitPopPtr(Init); - } - return false; + if (!visitInitializer(Init)) + return false; + + return this->emitFinishInitPop(Init); } - return true; + return false; } - - return false; + return true; } template @@ -4751,8 +4745,10 @@ bool Compiler::visitAPValue(const APValue &Val, PrimType ValType, assert(!DiscardResult); if (Val.isInt()) return this->emitConst(Val.getInt(), ValType, E); - else if (Val.isFloat()) - return this->emitConstFloat(Val.getFloat(), E); + else if (Val.isFloat()) { + APFloat F = Val.getFloat(); + return this->emitFloat(F, E); + } if (Val.isLValue()) { if (Val.isNullPointer()) @@ -6133,8 +6129,10 @@ bool Compiler::VisitUnaryOperator(const UnaryOperator *E) { const auto &TargetSemantics = Ctx.getFloatSemantics(E->getType()); if (!this->emitLoadFloat(E)) return false; - if (!this->emitConstFloat(llvm::APFloat(TargetSemantics, 1), E)) + APFloat F(TargetSemantics, 1); + if (!this->emitFloat(F, E)) return false; + if (!this->emitAddf(getFPOptions(E), E)) return false; if (!this->emitStoreFloat(E)) @@ -6176,8 +6174,10 @@ bool Compiler::VisitUnaryOperator(const UnaryOperator *E) { const auto &TargetSemantics = Ctx.getFloatSemantics(E->getType()); if (!this->emitLoadFloat(E)) return false; - if (!this->emitConstFloat(llvm::APFloat(TargetSemantics, 1), E)) + APFloat F(TargetSemantics, 1); + if (!this->emitFloat(F, E)) return false; + if (!this->emitSubf(getFPOptions(E), E)) return false; if (!this->emitStoreFloat(E)) @@ -6953,6 +6953,20 @@ bool Compiler::emitDummyPtr(const DeclTy &D, const Expr *E) { return true; } +template +bool Compiler::emitFloat(const APFloat &F, const Expr *E) { + assert(!DiscardResult && "Should've been checked before"); + + if (Floating::singleWord(F.getSemantics())) + return this->emitConstFloat(Floating(F), E); + + APInt I = F.bitcastToAPInt(); + return this->emitConstFloat( + Floating(const_cast(I.getRawData()), + llvm::APFloatBase::SemanticsToEnum(F.getSemantics())), + E); +} + // This function is constexpr if and only if To, From, and the types of // all subobjects of To and From are types T such that... // (3.1) - is_union_v is false; diff --git a/clang/lib/AST/ByteCode/Compiler.h b/clang/lib/AST/ByteCode/Compiler.h index ac3ad84766dc..a1d068cc7e0a 100644 --- a/clang/lib/AST/ByteCode/Compiler.h +++ b/clang/lib/AST/ByteCode/Compiler.h @@ -391,6 +391,7 @@ private: bool emitRecordDestruction(const Record *R, SourceInfo Loc); bool emitDestruction(const Descriptor *Desc, SourceInfo Loc); bool emitDummyPtr(const DeclTy &D, const Expr *E); + bool emitFloat(const APFloat &F, const Expr *E); unsigned collectBaseOffset(const QualType BaseType, const QualType DerivedType); bool emitLambdaStaticInvokerBody(const CXXMethodDecl *MD); diff --git a/clang/lib/AST/ByteCode/Descriptor.cpp b/clang/lib/AST/ByteCode/Descriptor.cpp index 5531295dfa2f..46e4d0d940b3 100644 --- a/clang/lib/AST/ByteCode/Descriptor.cpp +++ b/clang/lib/AST/ByteCode/Descriptor.cpp @@ -368,7 +368,7 @@ Descriptor::Descriptor(const DeclTy &D, PrimType Type, MetadataSize MD, bool IsTemporary, bool IsConst, UnknownSize) : Source(D), ElemSize(primSize(Type)), Size(UnknownSizeMark), MDSize(MD.value_or(0)), - AllocSize(MDSize + sizeof(InitMapPtr) + alignof(void *)), + AllocSize(MDSize + sizeof(InitMapPtr) + alignof(void *)), PrimT(Type), IsConst(IsConst), IsMutable(false), IsTemporary(IsTemporary), IsArray(true), CtorFn(getCtorArrayPrim(Type)), DtorFn(getDtorArrayPrim(Type)), MoveFn(getMoveArrayPrim(Type)) { diff --git a/clang/lib/AST/ByteCode/Disasm.cpp b/clang/lib/AST/ByteCode/Disasm.cpp index 846dc2fe92a7..7c6b78386b14 100644 --- a/clang/lib/AST/ByteCode/Disasm.cpp +++ b/clang/lib/AST/ByteCode/Disasm.cpp @@ -50,34 +50,56 @@ inline static std::string printArg(Program &P, CodePtr &OpPC) { } template <> inline std::string printArg(Program &P, CodePtr &OpPC) { - auto F = Floating::deserialize(*OpPC); - OpPC += align(F.bytesToSerialize()); + auto Sem = Floating::deserializeSemantics(*OpPC); - std::string Result; - llvm::raw_string_ostream SS(Result); - SS << F; - return Result; + unsigned BitWidth = llvm::APFloatBase::semanticsSizeInBits( + llvm::APFloatBase::EnumToSemantics(Sem)); + auto Memory = + std::make_unique(llvm::APInt::getNumWords(BitWidth)); + Floating Result(Memory.get(), Sem); + Floating::deserialize(*OpPC, &Result); + + OpPC += align(Result.bytesToSerialize()); + + std::string S; + llvm::raw_string_ostream SS(S); + SS << Result; + return S; } template <> inline std::string printArg>(Program &P, CodePtr &OpPC) { - auto F = IntegralAP::deserialize(*OpPC); - OpPC += align(F.bytesToSerialize()); - - std::string Result; - llvm::raw_string_ostream SS(Result); - SS << F; - return Result; + using T = IntegralAP; + unsigned BitWidth = T::deserializeSize(*OpPC); + auto Memory = + std::make_unique(llvm::APInt::getNumWords(BitWidth)); + + T Result(Memory.get(), BitWidth); + T::deserialize(*OpPC, &Result); + + OpPC += Result.bytesToSerialize(); + std::string Str; + llvm::raw_string_ostream SS(Str); + SS << Result; + return Str; } + template <> inline std::string printArg>(Program &P, CodePtr &OpPC) { - auto F = IntegralAP::deserialize(*OpPC); - OpPC += align(F.bytesToSerialize()); + using T = IntegralAP; + unsigned BitWidth = T::deserializeSize(*OpPC); + auto Memory = + std::make_unique(llvm::APInt::getNumWords(BitWidth)); - std::string Result; - llvm::raw_string_ostream SS(Result); - SS << F; - return Result; + T Result(Memory.get(), BitWidth); + T::deserialize(*OpPC, &Result); + + std::string Str; + llvm::raw_string_ostream SS(Str); + SS << Result; + + OpPC += Result.bytesToSerialize(); + return Str; } template <> inline std::string printArg(Program &P, CodePtr &OpPC) { diff --git a/clang/lib/AST/ByteCode/Floating.h b/clang/lib/AST/ByteCode/Floating.h index 3750568fc23c..659892e720ab 100644 --- a/clang/lib/AST/ByteCode/Floating.h +++ b/clang/lib/AST/ByteCode/Floating.h @@ -17,63 +17,79 @@ #include "clang/AST/APValue.h" #include "llvm/ADT/APFloat.h" +// XXX This is just a debugging help. Setting this to 1 will heap-allocate ALL +// floating values. +#define ALLOCATE_ALL 0 + namespace clang { namespace interp { using APFloat = llvm::APFloat; using APSInt = llvm::APSInt; +using APInt = llvm::APInt; +/// If a Floating is constructed from Memory, it DOES NOT OWN THAT MEMORY. +/// It will NOT copy the memory (unless, of course, copy() is called) and it +/// won't alllocate anything. The allocation should happen via InterpState or +/// Program. class Floating final { private: - // The underlying value storage. - APFloat F; + union { + uint64_t Val = 0; + uint64_t *Memory; + }; + llvm::APFloatBase::Semantics Semantics; + + APFloat getValue() const { + unsigned BitWidth = bitWidth(); + if (singleWord()) + return APFloat(getSemantics(), APInt(BitWidth, Val)); + unsigned NumWords = numWords(); + return APFloat(getSemantics(), APInt(BitWidth, NumWords, Memory)); + } public: - /// Zero-initializes a Floating. - Floating() : F(0.0f) {} - Floating(const APFloat &F) : F(F) {} + Floating() = default; + Floating(llvm::APFloatBase::Semantics Semantics) + : Val(0), Semantics(Semantics) {} + Floating(const APFloat &F) { - // Static constructors for special floating point values. - static Floating getInf(const llvm::fltSemantics &Sem) { - return Floating(APFloat::getInf(Sem)); + Semantics = llvm::APFloatBase::SemanticsToEnum(F.getSemantics()); + this->copy(F); } - const APFloat &getAPFloat() const { return F; } + Floating(uint64_t *Memory, llvm::APFloatBase::Semantics Semantics) + : Memory(Memory), Semantics(Semantics) {} + + APFloat getAPFloat() const { return getValue(); } - bool operator<(Floating RHS) const { return F < RHS.F; } - bool operator>(Floating RHS) const { return F > RHS.F; } - bool operator<=(Floating RHS) const { return F <= RHS.F; } - bool operator>=(Floating RHS) const { return F >= RHS.F; } - bool operator==(Floating RHS) const { return F == RHS.F; } - bool operator!=(Floating RHS) const { return F != RHS.F; } - Floating operator-() const { return Floating(-F); } + bool operator<(Floating RHS) const { return getValue() < RHS.getValue(); } + bool operator>(Floating RHS) const { return getValue() > RHS.getValue(); } + bool operator<=(Floating RHS) const { return getValue() <= RHS.getValue(); } + bool operator>=(Floating RHS) const { return getValue() >= RHS.getValue(); } APFloat::opStatus convertToInteger(APSInt &Result) const { bool IsExact; - return F.convertToInteger(Result, llvm::APFloat::rmTowardZero, &IsExact); + return getValue().convertToInteger(Result, llvm::APFloat::rmTowardZero, + &IsExact); } - Floating toSemantics(const llvm::fltSemantics *Sem, - llvm::RoundingMode RM) const { - APFloat Copy = F; + void toSemantics(const llvm::fltSemantics *Sem, llvm::RoundingMode RM, + Floating *Result) const { + APFloat Copy = getValue(); bool LosesInfo; Copy.convert(*Sem, RM, &LosesInfo); (void)LosesInfo; - return Floating(Copy); - } - - /// Convert this Floating to one with the same semantics as \Other. - Floating toSemantics(const Floating &Other, llvm::RoundingMode RM) const { - return toSemantics(&Other.F.getSemantics(), RM); + Result->copy(Copy); } APSInt toAPSInt(unsigned NumBits = 0) const { - return APSInt(F.bitcastToAPInt()); + return APSInt(getValue().bitcastToAPInt()); } - APValue toAPValue(const ASTContext &) const { return APValue(F); } + APValue toAPValue(const ASTContext &) const { return APValue(getValue()); } void print(llvm::raw_ostream &OS) const { // Can't use APFloat::print() since it appends a newline. SmallVector Buffer; - F.toString(Buffer); + getValue().toString(Buffer); OS << Buffer; } std::string toDiagnosticString(const ASTContext &Ctx) const { @@ -83,25 +99,62 @@ public: return NameStr; } - unsigned bitWidth() const { return F.semanticsSizeInBits(F.getSemantics()); } + unsigned bitWidth() const { + return llvm::APFloatBase::semanticsSizeInBits(getSemantics()); + } + unsigned numWords() const { return llvm::APInt::getNumWords(bitWidth()); } + bool singleWord() const { +#if ALLOCATE_ALL + return false; +#endif + return numWords() == 1; + } + static bool singleWord(const llvm::fltSemantics &Sem) { +#if ALLOCATE_ALL + return false; +#endif + return APInt::getNumWords(llvm::APFloatBase::getSizeInBits(Sem)) == 1; + } + const llvm::fltSemantics &getSemantics() const { + return llvm::APFloatBase::EnumToSemantics(Semantics); + } + + void copy(const APFloat &F) { + if (singleWord()) { + Val = F.bitcastToAPInt().getZExtValue(); + } else { + assert(Memory); + std::memcpy(Memory, F.bitcastToAPInt().getRawData(), + numWords() * sizeof(uint64_t)); + } + } + + void take(uint64_t *NewMemory) { + if (singleWord()) + return; + + if (Memory) + std::memcpy(NewMemory, Memory, numWords() * sizeof(uint64_t)); + Memory = NewMemory; + } bool isSigned() const { return true; } - bool isNegative() const { return F.isNegative(); } - bool isZero() const { return F.isZero(); } - bool isNonZero() const { return F.isNonZero(); } - bool isMin() const { return F.isSmallest(); } - bool isMinusOne() const { return F.isExactlyValue(-1.0); } - bool isNan() const { return F.isNaN(); } - bool isSignaling() const { return F.isSignaling(); } - bool isInf() const { return F.isInfinity(); } - bool isFinite() const { return F.isFinite(); } - bool isNormal() const { return F.isNormal(); } - bool isDenormal() const { return F.isDenormal(); } - llvm::FPClassTest classify() const { return F.classify(); } - APFloat::fltCategory getCategory() const { return F.getCategory(); } + bool isNegative() const { return getValue().isNegative(); } + bool isZero() const { return getValue().isZero(); } + bool isNonZero() const { return getValue().isNonZero(); } + bool isMin() const { return getValue().isSmallest(); } + bool isMinusOne() const { return getValue().isExactlyValue(-1.0); } + bool isNan() const { return getValue().isNaN(); } + bool isSignaling() const { return getValue().isSignaling(); } + bool isInf() const { return getValue().isInfinity(); } + bool isFinite() const { return getValue().isFinite(); } + bool isNormal() const { return getValue().isNormal(); } + bool isDenormal() const { return getValue().isDenormal(); } + llvm::FPClassTest classify() const { return getValue().classify(); } + APFloat::fltCategory getCategory() const { return getValue().getCategory(); } ComparisonCategoryResult compare(const Floating &RHS) const { - llvm::APFloatBase::cmpResult CmpRes = F.compare(RHS.F); + llvm::APFloatBase::cmpResult CmpRes = getValue().compare(RHS.getValue()); switch (CmpRes) { case llvm::APFloatBase::cmpLessThan: return ComparisonCategoryResult::Less; @@ -118,97 +171,130 @@ public: static APFloat::opStatus fromIntegral(APSInt Val, const llvm::fltSemantics &Sem, llvm::RoundingMode RM, - Floating &Result) { + Floating *Result) { APFloat F = APFloat(Sem); APFloat::opStatus Status = F.convertFromAPInt(Val, Val.isSigned(), RM); - Result = Floating(F); + Result->copy(F); return Status; } - static Floating bitcastFromMemory(const std::byte *Buff, - const llvm::fltSemantics &Sem) { + static void bitcastFromMemory(const std::byte *Buff, + const llvm::fltSemantics &Sem, + Floating *Result) { size_t Size = APFloat::semanticsSizeInBits(Sem); llvm::APInt API(Size, true); llvm::LoadIntFromMemory(API, (const uint8_t *)Buff, Size / 8); - - return Floating(APFloat(Sem, API)); + Result->copy(APFloat(Sem, API)); } void bitcastToMemory(std::byte *Buff) const { - llvm::APInt API = F.bitcastToAPInt(); + llvm::APInt API = getValue().bitcastToAPInt(); llvm::StoreIntToMemory(API, (uint8_t *)Buff, bitWidth() / 8); } // === Serialization support === size_t bytesToSerialize() const { - return sizeof(llvm::fltSemantics *) + - (APFloat::semanticsSizeInBits(F.getSemantics()) / 8); + return sizeof(Semantics) + (numWords() * sizeof(uint64_t)); } void serialize(std::byte *Buff) const { - // Semantics followed by an APInt. - *reinterpret_cast(Buff) = &F.getSemantics(); - - llvm::APInt API = F.bitcastToAPInt(); - llvm::StoreIntToMemory(API, (uint8_t *)(Buff + sizeof(void *)), - bitWidth() / 8); + std::memcpy(Buff, &Semantics, sizeof(Semantics)); + if (singleWord()) { + std::memcpy(Buff + sizeof(Semantics), &Val, sizeof(uint64_t)); + } else { + std::memcpy(Buff + sizeof(Semantics), Memory, + numWords() * sizeof(uint64_t)); + } } - static Floating deserialize(const std::byte *Buff) { - const llvm::fltSemantics *Sem; - std::memcpy((void *)&Sem, Buff, sizeof(void *)); - return bitcastFromMemory(Buff + sizeof(void *), *Sem); + static llvm::APFloatBase::Semantics + deserializeSemantics(const std::byte *Buff) { + return *reinterpret_cast(Buff); } - static Floating abs(const Floating &F) { - APFloat V = F.F; - if (V.isNegative()) - V.changeSign(); - return Floating(V); + static void deserialize(const std::byte *Buff, Floating *Result) { + llvm::APFloatBase::Semantics Semantics; + std::memcpy(&Semantics, Buff, sizeof(Semantics)); + + unsigned BitWidth = llvm::APFloat::semanticsSizeInBits( + llvm::APFloatBase::EnumToSemantics(Semantics)); + unsigned NumWords = llvm::APInt::getNumWords(BitWidth); + + Result->Semantics = Semantics; + if (NumWords == 1 && !ALLOCATE_ALL) { + std::memcpy(&Result->Val, Buff + sizeof(Semantics), sizeof(uint64_t)); + } else { + assert(Result->Memory); + std::memcpy(Result->Memory, Buff + sizeof(Semantics), + NumWords * sizeof(uint64_t)); + } } // ------- static APFloat::opStatus add(const Floating &A, const Floating &B, llvm::RoundingMode RM, Floating *R) { - *R = Floating(A.F); - return R->F.add(B.F, RM); + APFloat LHS = A.getValue(); + APFloat RHS = B.getValue(); + + auto Status = LHS.add(RHS, RM); + R->copy(LHS); + return Status; } static APFloat::opStatus increment(const Floating &A, llvm::RoundingMode RM, Floating *R) { - APFloat One(A.F.getSemantics(), 1); - *R = Floating(A.F); - return R->F.add(One, RM); + APFloat One(A.getSemantics(), 1); + APFloat LHS = A.getValue(); + + auto Status = LHS.add(One, RM); + R->copy(LHS); + return Status; } static APFloat::opStatus sub(const Floating &A, const Floating &B, llvm::RoundingMode RM, Floating *R) { - *R = Floating(A.F); - return R->F.subtract(B.F, RM); + APFloat LHS = A.getValue(); + APFloat RHS = B.getValue(); + + auto Status = LHS.subtract(RHS, RM); + R->copy(LHS); + return Status; } static APFloat::opStatus decrement(const Floating &A, llvm::RoundingMode RM, Floating *R) { - APFloat One(A.F.getSemantics(), 1); - *R = Floating(A.F); - return R->F.subtract(One, RM); + APFloat One(A.getSemantics(), 1); + APFloat LHS = A.getValue(); + + auto Status = LHS.subtract(One, RM); + R->copy(LHS); + return Status; } static APFloat::opStatus mul(const Floating &A, const Floating &B, llvm::RoundingMode RM, Floating *R) { - *R = Floating(A.F); - return R->F.multiply(B.F, RM); + + APFloat LHS = A.getValue(); + APFloat RHS = B.getValue(); + + auto Status = LHS.multiply(RHS, RM); + R->copy(LHS); + return Status; } static APFloat::opStatus div(const Floating &A, const Floating &B, llvm::RoundingMode RM, Floating *R) { - *R = Floating(A.F); - return R->F.divide(B.F, RM); + APFloat LHS = A.getValue(); + APFloat RHS = B.getValue(); + + auto Status = LHS.divide(RHS, RM); + R->copy(LHS); + return Status; } static bool neg(const Floating &A, Floating *R) { - *R = -A; + R->copy(-A.getValue()); return false; } }; diff --git a/clang/lib/AST/ByteCode/Integral.h b/clang/lib/AST/ByteCode/Integral.h index 13fdb5369f2b..af5cd2d13ecc 100644 --- a/clang/lib/AST/ByteCode/Integral.h +++ b/clang/lib/AST/ByteCode/Integral.h @@ -99,6 +99,9 @@ public: bool operator>=(Integral RHS) const { return V >= RHS.V; } bool operator==(Integral RHS) const { return V == RHS.V; } bool operator!=(Integral RHS) const { return V != RHS.V; } + bool operator>=(unsigned RHS) const { + return static_cast(V) >= RHS; + } bool operator>(unsigned RHS) const { return V >= 0 && static_cast(V) > RHS; diff --git a/clang/lib/AST/ByteCode/IntegralAP.h b/clang/lib/AST/ByteCode/IntegralAP.h index 8ee08dfb5cfe..61cbd14ad174 100644 --- a/clang/lib/AST/ByteCode/IntegralAP.h +++ b/clang/lib/AST/ByteCode/IntegralAP.h @@ -28,12 +28,19 @@ namespace interp { using APInt = llvm::APInt; using APSInt = llvm::APSInt; -template class Integral; +/// If an IntegralAP is constructed from Memory, it DOES NOT OWN THAT MEMORY. +/// It will NOT copy the memory (unless, of course, copy() is called) and it +/// won't alllocate anything. The allocation should happen via InterpState or +/// Program. template class IntegralAP final { -private: +public: + union { + uint64_t *Memory = nullptr; + uint64_t Val; + }; + uint32_t BitWidth = 0; friend IntegralAP; - APInt V; template static T truncateCast(const APInt &V) { @@ -52,106 +59,133 @@ private: : V.trunc(BitSize).getZExtValue(); } + APInt getValue() const { + if (singleWord()) + return APInt(BitWidth, Val, Signed); + unsigned NumWords = llvm::APInt::getNumWords(BitWidth); + return llvm::APInt(BitWidth, NumWords, Memory); + } + public: using AsUnsigned = IntegralAP; - template - IntegralAP(T Value, unsigned BitWidth) - : V(APInt(BitWidth, static_cast(Value), Signed)) {} + void take(uint64_t *NewMemory) { + assert(!singleWord()); + std::memcpy(NewMemory, Memory, numWords() * sizeof(uint64_t)); + Memory = NewMemory; + } - IntegralAP(APInt V) : V(V) {} - /// Arbitrary value for uninitialized variables. - IntegralAP() : IntegralAP(Signed ? -1 : 7, 3) {} + void copy(const APInt &V) { + assert(BitWidth == V.getBitWidth()); + assert(numWords() == V.getNumWords()); - IntegralAP operator-() const { return IntegralAP(-V); } + if (V.isSingleWord()) { + if constexpr (Signed) + Val = V.getSExtValue(); + else + Val = V.getZExtValue(); + return; + } + assert(Memory); + std::memcpy(Memory, V.getRawData(), V.getNumWords() * sizeof(uint64_t)); + } + + // Constructors. + IntegralAP() = default; + IntegralAP(unsigned BitWidth) : BitWidth(BitWidth) {} + IntegralAP(uint64_t *Memory, unsigned BitWidth) + : Memory(Memory), BitWidth(BitWidth) {} + IntegralAP(const APInt &V) : BitWidth(V.getBitWidth()) { + if (V.isSingleWord()) { + Val = Signed ? V.getSExtValue() : V.getZExtValue(); + } else { + Memory = const_cast(V.getRawData()); + } + } + + IntegralAP operator-() const { return IntegralAP(-getValue()); } IntegralAP operator-(const IntegralAP &Other) const { - return IntegralAP(V - Other.V); + return IntegralAP(getValue() - Other.getValue()); } bool operator>(const IntegralAP &RHS) const { if constexpr (Signed) - return V.ugt(RHS.V); - return V.sgt(RHS.V); + return getValue().sgt(RHS.getValue()); + return getValue().ugt(RHS.getValue()); } - bool operator>=(IntegralAP RHS) const { + bool operator>=(unsigned RHS) const { if constexpr (Signed) - return V.uge(RHS.V); - return V.sge(RHS.V); + return getValue().sge(RHS); + return getValue().uge(RHS); } bool operator<(IntegralAP RHS) const { if constexpr (Signed) - return V.slt(RHS.V); - return V.slt(RHS.V); - } - bool operator<=(IntegralAP RHS) const { - if constexpr (Signed) - return V.ult(RHS.V); - return V.ult(RHS.V); + return getValue().slt(RHS.getValue()); + return getValue().ult(RHS.getValue()); } template >> explicit operator Ty() const { - return truncateCast(V); + return truncateCast(getValue()); } template static IntegralAP from(T Value, unsigned NumBits = 0) { + if (NumBits == 0) + NumBits = sizeof(T) * 8; assert(NumBits > 0); APInt Copy = APInt(NumBits, static_cast(Value), Signed); - + assert(false); return IntegralAP(Copy); } + static IntegralAP from(const APInt &Value) { + return IntegralAP(Value); + } + template static IntegralAP from(IntegralAP V, unsigned NumBits = 0) { if (NumBits == 0) NumBits = V.bitWidth(); if constexpr (InputSigned) - return IntegralAP(V.V.sextOrTrunc(NumBits)); - return IntegralAP(V.V.zextOrTrunc(NumBits)); - } - - template - static IntegralAP from(Integral I, unsigned BitWidth) { - return IntegralAP(I.toAPInt(BitWidth)); - } - - static IntegralAP zero(int32_t BitWidth) { - APInt V = APInt(BitWidth, 0LL, Signed); - return IntegralAP(V); + return IntegralAP(V.getValue().sextOrTrunc(NumBits)); + return IntegralAP(V.getValue().zextOrTrunc(NumBits)); } - constexpr unsigned bitWidth() const { return V.getBitWidth(); } + constexpr unsigned bitWidth() const { return BitWidth; } + constexpr unsigned numWords() const { return APInt::getNumWords(BitWidth); } + constexpr bool singleWord() const { return numWords() == 1; } APSInt toAPSInt(unsigned Bits = 0) const { if (Bits == 0) Bits = bitWidth(); + APInt V = getValue(); if constexpr (Signed) - return APSInt(V.sext(Bits), !Signed); + return APSInt(getValue().sext(Bits), !Signed); else - return APSInt(V.zext(Bits), !Signed); + return APSInt(getValue().zext(Bits), !Signed); } APValue toAPValue(const ASTContext &) const { return APValue(toAPSInt()); } - bool isZero() const { return V.isZero(); } + bool isZero() const { return getValue().isZero(); } bool isPositive() const { if constexpr (Signed) - return V.isNonNegative(); + return getValue().isNonNegative(); return true; } bool isNegative() const { if constexpr (Signed) - return !V.isNonNegative(); + return !getValue().isNonNegative(); return false; } - bool isMin() const { return V.isMinValue(); } - bool isMax() const { return V.isMaxValue(); } + bool isMin() const { return getValue().isMinValue(); } + bool isMax() const { return getValue().isMaxValue(); } static constexpr bool isSigned() { return Signed; } - bool isMinusOne() const { return Signed && V == -1; } + bool isMinusOne() const { return Signed && getValue().isAllOnes(); } - unsigned countLeadingZeros() const { return V.countl_zero(); } + unsigned countLeadingZeros() const { return getValue().countl_zero(); } - void print(llvm::raw_ostream &OS) const { V.print(OS, Signed);} + void print(llvm::raw_ostream &OS) const { getValue().print(OS, Signed); } std::string toDiagnosticString(const ASTContext &Ctx) const { std::string NameStr; llvm::raw_string_ostream OS(NameStr); @@ -161,53 +195,57 @@ public: IntegralAP truncate(unsigned BitWidth) const { if constexpr (Signed) - return IntegralAP(V.trunc(BitWidth).sextOrTrunc(this->bitWidth())); + return IntegralAP( + getValue().trunc(BitWidth).sextOrTrunc(this->bitWidth())); else - return IntegralAP(V.trunc(BitWidth).zextOrTrunc(this->bitWidth())); + return IntegralAP( + getValue().trunc(BitWidth).zextOrTrunc(this->bitWidth())); } IntegralAP toUnsigned() const { - APInt Copy = V; - return IntegralAP(Copy); + return IntegralAP(Memory, BitWidth); } void bitcastToMemory(std::byte *Dest) const { - llvm::StoreIntToMemory(V, (uint8_t *)Dest, bitWidth() / 8); + llvm::StoreIntToMemory(getValue(), (uint8_t *)Dest, bitWidth() / 8); } - static IntegralAP bitcastFromMemory(const std::byte *Src, unsigned BitWidth) { + static void bitcastFromMemory(const std::byte *Src, unsigned BitWidth, + IntegralAP *Result) { APInt V(BitWidth, static_cast(0), Signed); llvm::LoadIntFromMemory(V, (const uint8_t *)Src, BitWidth / 8); - return IntegralAP(V); + Result->copy(V); } ComparisonCategoryResult compare(const IntegralAP &RHS) const { assert(Signed == RHS.isSigned()); assert(bitWidth() == RHS.bitWidth()); + APInt V1 = getValue(); + APInt V2 = RHS.getValue(); if constexpr (Signed) { - if (V.slt(RHS.V)) + if (V1.slt(V2)) return ComparisonCategoryResult::Less; - if (V.sgt(RHS.V)) + if (V1.sgt(V2)) return ComparisonCategoryResult::Greater; return ComparisonCategoryResult::Equal; } assert(!Signed); - if (V.ult(RHS.V)) + if (V1.ult(V2)) return ComparisonCategoryResult::Less; - if (V.ugt(RHS.V)) + if (V1.ugt(V2)) return ComparisonCategoryResult::Greater; return ComparisonCategoryResult::Equal; } static bool increment(IntegralAP A, IntegralAP *R) { - IntegralAP One(1, A.bitWidth()); - return add(A, One, A.bitWidth() + 1, R); + APSInt One(APInt(A.bitWidth(), 1ull, Signed), !Signed); + return add(A, IntegralAP(One), A.bitWidth() + 1, R); } static bool decrement(IntegralAP A, IntegralAP *R) { - IntegralAP One(1, A.bitWidth()); - return sub(A, One, A.bitWidth() + 1, R); + APSInt One(APInt(A.bitWidth(), 1ull, Signed), !Signed); + return sub(A, IntegralAP(One), A.bitWidth() + 1, R); } static bool add(IntegralAP A, IntegralAP B, unsigned OpBits, IntegralAP *R) { @@ -224,87 +262,97 @@ public: static bool rem(IntegralAP A, IntegralAP B, unsigned OpBits, IntegralAP *R) { if constexpr (Signed) - *R = IntegralAP(A.V.srem(B.V)); + R->copy(A.getValue().srem(B.getValue())); else - *R = IntegralAP(A.V.urem(B.V)); + R->copy(A.getValue().urem(B.getValue())); return false; } static bool div(IntegralAP A, IntegralAP B, unsigned OpBits, IntegralAP *R) { if constexpr (Signed) - *R = IntegralAP(A.V.sdiv(B.V)); + R->copy(A.getValue().sdiv(B.getValue())); else - *R = IntegralAP(A.V.udiv(B.V)); + R->copy(A.getValue().udiv(B.getValue())); return false; } static bool bitAnd(IntegralAP A, IntegralAP B, unsigned OpBits, IntegralAP *R) { - *R = IntegralAP(A.V & B.V); + R->copy(A.getValue() & B.getValue()); return false; } static bool bitOr(IntegralAP A, IntegralAP B, unsigned OpBits, IntegralAP *R) { - *R = IntegralAP(A.V | B.V); + R->copy(A.getValue() | B.getValue()); return false; } static bool bitXor(IntegralAP A, IntegralAP B, unsigned OpBits, IntegralAP *R) { - *R = IntegralAP(A.V ^ B.V); + R->copy(A.getValue() ^ B.getValue()); return false; } static bool neg(const IntegralAP &A, IntegralAP *R) { - APInt AI = A.V; + APInt AI = A.getValue(); AI.negate(); - *R = IntegralAP(AI); + R->copy(AI); return false; } static bool comp(IntegralAP A, IntegralAP *R) { - *R = IntegralAP(~A.V); + R->copy(~A.getValue()); return false; } static void shiftLeft(const IntegralAP A, const IntegralAP B, unsigned OpBits, IntegralAP *R) { - *R = IntegralAP(A.V.shl(B.V.getZExtValue())); + *R = IntegralAP(A.getValue().shl(B.getValue().getZExtValue())); } static void shiftRight(const IntegralAP A, const IntegralAP B, unsigned OpBits, IntegralAP *R) { - unsigned ShiftAmount = B.V.getZExtValue(); + unsigned ShiftAmount = B.getValue().getZExtValue(); if constexpr (Signed) - *R = IntegralAP(A.V.ashr(ShiftAmount)); + R->copy(A.getValue().ashr(ShiftAmount)); else - *R = IntegralAP(A.V.lshr(ShiftAmount)); + R->copy(A.getValue().lshr(ShiftAmount)); } // === Serialization support === size_t bytesToSerialize() const { - // 4 bytes for the BitWidth followed by N bytes for the actual APInt. - return sizeof(uint32_t) + (V.getBitWidth() / CHAR_BIT); + assert(BitWidth != 0); + uint64_t NumWords = llvm::APInt::getNumWords(bitWidth()); + return sizeof(uint32_t) + (NumWords * sizeof(uint64_t)); } void serialize(std::byte *Buff) const { - assert(V.getBitWidth() < std::numeric_limits::max()); - uint32_t BitWidth = V.getBitWidth(); - std::memcpy(Buff, &BitWidth, sizeof(uint32_t)); - llvm::StoreIntToMemory(V, (uint8_t *)(Buff + sizeof(uint32_t)), - BitWidth / CHAR_BIT); + if (singleWord()) + std::memcpy(Buff + sizeof(uint32_t), &Val, sizeof(uint64_t)); + else { + uint64_t NumWords = llvm::APInt::getNumWords(bitWidth()); + std::memcpy(Buff + sizeof(uint32_t), Memory, NumWords * sizeof(uint64_t)); + } } - static IntegralAP deserialize(const std::byte *Buff) { - uint32_t BitWidth; - std::memcpy(&BitWidth, Buff, sizeof(uint32_t)); - IntegralAP Val(APInt(BitWidth, 0ull, !Signed)); + static uint64_t deserializeSize(const std::byte *Buff) { + return *reinterpret_cast(Buff); + } + + static void deserialize(const std::byte *Buff, IntegralAP *Result) { + uint64_t BitWidth = Result->BitWidth; + uint64_t NumWords = llvm::APInt::getNumWords(BitWidth); + assert(BitWidth == Result->BitWidth); - llvm::LoadIntFromMemory(Val.V, (const uint8_t *)Buff + sizeof(uint32_t), - BitWidth / CHAR_BIT); - return Val; + if (NumWords == 1) + std::memcpy(&Result->Val, Buff + sizeof(uint32_t), sizeof(uint64_t)); + else { + assert(Result->Memory); + std::memcpy(Result->Memory, Buff + sizeof(uint32_t), + NumWords * sizeof(uint64_t)); + } } private: @@ -312,7 +360,7 @@ private: static bool CheckAddSubMulUB(const IntegralAP &A, const IntegralAP &B, unsigned BitWidth, IntegralAP *R) { if constexpr (!Signed) { - R->V = Op{}(A.V, B.V); + R->copy(Op{}(A.getValue(), B.getValue())); return false; } @@ -320,7 +368,7 @@ private: const APSInt &RHS = B.toAPSInt(); APSInt Value = Op{}(LHS.extend(BitWidth), RHS.extend(BitWidth)); APSInt Result = Value.trunc(LHS.getBitWidth()); - R->V = Result; + R->copy(Result); return Result.extend(BitWidth) != Value; } diff --git a/clang/lib/AST/ByteCode/Interp.cpp b/clang/lib/AST/ByteCode/Interp.cpp index 5c8abffb3a99..1e2032feabb6 100644 --- a/clang/lib/AST/ByteCode/Interp.cpp +++ b/clang/lib/AST/ByteCode/Interp.cpp @@ -1935,8 +1935,10 @@ bool CastPointerIntegralAP(InterpState &S, CodePtr OpPC, uint32_t BitWidth) { if (!CheckPointerToIntegralCast(S, OpPC, Ptr, BitWidth)) return false; - S.Stk.push>( - IntegralAP::from(Ptr.getIntegerRepresentation(), BitWidth)); + auto Result = S.allocAP>(BitWidth); + Result.copy(APInt(BitWidth, Ptr.getIntegerRepresentation())); + + S.Stk.push>(Result); return true; } @@ -1946,8 +1948,10 @@ bool CastPointerIntegralAPS(InterpState &S, CodePtr OpPC, uint32_t BitWidth) { if (!CheckPointerToIntegralCast(S, OpPC, Ptr, BitWidth)) return false; - S.Stk.push>( - IntegralAP::from(Ptr.getIntegerRepresentation(), BitWidth)); + auto Result = S.allocAP>(BitWidth); + Result.copy(APInt(BitWidth, Ptr.getIntegerRepresentation())); + + S.Stk.push>(Result); return true; } @@ -2053,6 +2057,100 @@ bool arePotentiallyOverlappingStringLiterals(const Pointer &LHS, return Shorter == Longer.take_front(Shorter.size()); } +static void copyPrimitiveMemory(InterpState &S, const Pointer &Ptr, + PrimType T) { + + if (T == PT_IntAPS) { + auto &Val = Ptr.deref>(); + if (!Val.singleWord()) { + uint64_t *NewMemory = new (S.P) uint64_t[Val.numWords()]; + Val.take(NewMemory); + } + } else if (T == PT_IntAP) { + auto &Val = Ptr.deref>(); + if (!Val.singleWord()) { + uint64_t *NewMemory = new (S.P) uint64_t[Val.numWords()]; + Val.take(NewMemory); + } + } else if (T == PT_Float) { + auto &Val = Ptr.deref(); + if (!Val.singleWord()) { + uint64_t *NewMemory = new (S.P) uint64_t[Val.numWords()]; + Val.take(NewMemory); + } + } +} + +template +static void copyPrimitiveMemory(InterpState &S, const Pointer &Ptr) { + assert(needsAlloc()); + auto &Val = Ptr.deref(); + if (!Val.singleWord()) { + uint64_t *NewMemory = new (S.P) uint64_t[Val.numWords()]; + Val.take(NewMemory); + } +} + +static void finishGlobalRecurse(InterpState &S, const Pointer &Ptr) { + if (const Record *R = Ptr.getRecord()) { + for (const Record::Field &Fi : R->fields()) { + if (Fi.Desc->isPrimitive()) { + TYPE_SWITCH_ALLOC(Fi.Desc->getPrimType(), { + copyPrimitiveMemory(S, Ptr.atField(Fi.Offset)); + }); + copyPrimitiveMemory(S, Ptr.atField(Fi.Offset), Fi.Desc->getPrimType()); + } else + finishGlobalRecurse(S, Ptr.atField(Fi.Offset)); + } + return; + } + + if (const Descriptor *D = Ptr.getFieldDesc(); D && D->isArray()) { + unsigned NumElems = D->getNumElems(); + if (NumElems == 0) + return; + + if (D->isPrimitiveArray()) { + PrimType PT = D->getPrimType(); + if (!needsAlloc(PT)) + return; + assert(NumElems >= 1); + const Pointer EP = Ptr.atIndex(0); + bool AllSingleWord = true; + TYPE_SWITCH_ALLOC(PT, { + if (!EP.deref().singleWord()) { + copyPrimitiveMemory(S, EP); + AllSingleWord = false; + } + }); + if (AllSingleWord) + return; + for (unsigned I = 1; I != D->getNumElems(); ++I) { + const Pointer EP = Ptr.atIndex(I); + copyPrimitiveMemory(S, EP, PT); + } + } else { + assert(D->isCompositeArray()); + for (unsigned I = 0; I != D->getNumElems(); ++I) { + const Pointer EP = Ptr.atIndex(I).narrow(); + finishGlobalRecurse(S, EP); + } + } + } +} + +bool FinishInitGlobal(InterpState &S, CodePtr OpPC) { + const Pointer &Ptr = S.Stk.pop(); + + finishGlobalRecurse(S, Ptr); + if (Ptr.canBeInitialized()) { + Ptr.initialize(); + Ptr.activate(); + } + + return true; +} + // https://github.com/llvm/llvm-project/issues/102513 #if defined(_MSC_VER) && !defined(__clang__) && !defined(NDEBUG) #pragma optimize("", off) diff --git a/clang/lib/AST/ByteCode/Interp.h b/clang/lib/AST/ByteCode/Interp.h index ae3d4a441a79..66d3e6d79e8b 100644 --- a/clang/lib/AST/ByteCode/Interp.h +++ b/clang/lib/AST/ByteCode/Interp.h @@ -189,7 +189,7 @@ bool CheckShift(InterpState &S, CodePtr OpPC, const LT &LHS, const RT &RHS, // C++11 [expr.shift]p1: Shift width must be less than the bit width of // the shifted type. - if (Bits > 1 && RHS >= RT::from(Bits, RHS.bitWidth())) { + if (Bits > 1 && RHS >= Bits) { const Expr *E = S.Current->getExpr(OpPC); const APSInt Val = RHS.toAPSInt(); QualType Ty = E->getType(); @@ -370,6 +370,9 @@ bool AddSubMulHelper(InterpState &S, CodePtr OpPC, unsigned Bits, const T &LHS, const T &RHS) { // Fast path - add the numbers with fixed width. T Result; + if constexpr (needsAlloc()) + Result = S.allocAP(LHS.bitWidth()); + if (!OpFW(LHS, RHS, Bits, &Result)) { S.Stk.push(Result); return true; @@ -408,6 +411,7 @@ bool Add(InterpState &S, CodePtr OpPC) { const T &RHS = S.Stk.pop(); const T &LHS = S.Stk.pop(); const unsigned Bits = RHS.bitWidth() + 1; + return AddSubMulHelper(S, OpPC, Bits, LHS, RHS); } @@ -423,7 +427,7 @@ inline bool Addf(InterpState &S, CodePtr OpPC, uint32_t FPOI) { const Floating &LHS = S.Stk.pop(); FPOptions FPO = FPOptions::getFromOpaqueInt(FPOI); - Floating Result; + Floating Result = S.allocFloat(LHS.getSemantics()); auto Status = Floating::add(LHS, RHS, getRoundingMode(FPO), &Result); S.Stk.push(Result); return CheckFloatResult(S, OpPC, Result, Status, FPO); @@ -434,6 +438,7 @@ bool Sub(InterpState &S, CodePtr OpPC) { const T &RHS = S.Stk.pop(); const T &LHS = S.Stk.pop(); const unsigned Bits = RHS.bitWidth() + 1; + return AddSubMulHelper(S, OpPC, Bits, LHS, RHS); } @@ -442,7 +447,7 @@ inline bool Subf(InterpState &S, CodePtr OpPC, uint32_t FPOI) { const Floating &LHS = S.Stk.pop(); FPOptions FPO = FPOptions::getFromOpaqueInt(FPOI); - Floating Result; + Floating Result = S.allocFloat(LHS.getSemantics()); auto Status = Floating::sub(LHS, RHS, getRoundingMode(FPO), &Result); S.Stk.push(Result); return CheckFloatResult(S, OpPC, Result, Status, FPO); @@ -453,6 +458,7 @@ bool Mul(InterpState &S, CodePtr OpPC) { const T &RHS = S.Stk.pop(); const T &LHS = S.Stk.pop(); const unsigned Bits = RHS.bitWidth() * 2; + return AddSubMulHelper(S, OpPC, Bits, LHS, RHS); } @@ -461,8 +467,10 @@ inline bool Mulf(InterpState &S, CodePtr OpPC, uint32_t FPOI) { const Floating &LHS = S.Stk.pop(); FPOptions FPO = FPOptions::getFromOpaqueInt(FPOI); - Floating Result; + Floating Result = S.allocFloat(LHS.getSemantics()); + auto Status = Floating::mul(LHS, RHS, getRoundingMode(FPO), &Result); + S.Stk.push(Result); return CheckFloatResult(S, OpPC, Result, Status, FPO); } @@ -484,9 +492,14 @@ inline bool Mulc(InterpState &S, CodePtr OpPC) { HandleComplexComplexMul(A, B, C, D, ResR, ResI); // Copy into the result. - Result.atIndex(0).deref() = Floating(ResR); + Floating RA = S.allocFloat(A.getSemantics()); + RA.copy(ResR); + Result.atIndex(0).deref() = RA; // Floating(ResR); Result.atIndex(0).initialize(); - Result.atIndex(1).deref() = Floating(ResI); + + Floating RI = S.allocFloat(A.getSemantics()); + RI.copy(ResI); + Result.atIndex(1).deref() = RI; // Floating(ResI); Result.atIndex(1).initialize(); Result.initialize(); } else { @@ -539,10 +552,20 @@ inline bool Divc(InterpState &S, CodePtr OpPC) { HandleComplexComplexDiv(A, B, C, D, ResR, ResI); // Copy into the result. - Result.atIndex(0).deref() = Floating(ResR); + // Result.atIndex(0).deref() = Floating(ResR); + // Result.atIndex(0).initialize(); + // Result.atIndex(1).deref() = Floating(ResI); + // Result.atIndex(1).initialize(); + + Floating RA = S.allocFloat(A.getSemantics()); + RA.copy(ResR); + Result.atIndex(0).deref() = RA; // Floating(ResR); Result.atIndex(0).initialize(); - Result.atIndex(1).deref() = Floating(ResI); - Result.atIndex(1).initialize(); + + Floating RI = S.allocFloat(A.getSemantics()); + RI.copy(ResI); + Result.atIndex(1).deref() = RI; // Floating(ResI); + Result.initialize(); } else { // Integer element type. @@ -608,9 +631,12 @@ template ::T> bool BitAnd(InterpState &S, CodePtr OpPC) { const T &RHS = S.Stk.pop(); const T &LHS = S.Stk.pop(); - unsigned Bits = RHS.bitWidth(); + T Result; + if constexpr (needsAlloc()) + Result = S.allocAP(Bits); + if (!T::bitAnd(LHS, RHS, Bits, &Result)) { S.Stk.push(Result); return true; @@ -625,9 +651,12 @@ template ::T> bool BitOr(InterpState &S, CodePtr OpPC) { const T &RHS = S.Stk.pop(); const T &LHS = S.Stk.pop(); - unsigned Bits = RHS.bitWidth(); + T Result; + if constexpr (needsAlloc()) + Result = S.allocAP(Bits); + if (!T::bitOr(LHS, RHS, Bits, &Result)) { S.Stk.push(Result); return true; @@ -644,7 +673,11 @@ bool BitXor(InterpState &S, CodePtr OpPC) { const T &LHS = S.Stk.pop(); unsigned Bits = RHS.bitWidth(); + T Result; + if constexpr (needsAlloc()) + Result = S.allocAP(Bits); + if (!T::bitXor(LHS, RHS, Bits, &Result)) { S.Stk.push(Result); return true; @@ -659,12 +692,15 @@ template ::T> bool Rem(InterpState &S, CodePtr OpPC) { const T &RHS = S.Stk.pop(); const T &LHS = S.Stk.pop(); + const unsigned Bits = RHS.bitWidth() * 2; if (!CheckDivRem(S, OpPC, LHS, RHS)) return false; - const unsigned Bits = RHS.bitWidth() * 2; T Result; + if constexpr (needsAlloc()) + Result = S.allocAP(LHS.bitWidth()); + if (!T::rem(LHS, RHS, Bits, &Result)) { S.Stk.push(Result); return true; @@ -679,12 +715,15 @@ template ::T> bool Div(InterpState &S, CodePtr OpPC) { const T &RHS = S.Stk.pop(); const T &LHS = S.Stk.pop(); + const unsigned Bits = RHS.bitWidth() * 2; if (!CheckDivRem(S, OpPC, LHS, RHS)) return false; - const unsigned Bits = RHS.bitWidth() * 2; T Result; + if constexpr (needsAlloc()) + Result = S.allocAP(LHS.bitWidth()); + if (!T::div(LHS, RHS, Bits, &Result)) { S.Stk.push(Result); return true; @@ -707,8 +746,10 @@ inline bool Divf(InterpState &S, CodePtr OpPC, uint32_t FPOI) { return false; FPOptions FPO = FPOptions::getFromOpaqueInt(FPOI); - Floating Result; + + Floating Result = S.allocFloat(LHS.getSemantics()); auto Status = Floating::div(LHS, RHS, getRoundingMode(FPO), &Result); + S.Stk.push(Result); return CheckFloatResult(S, OpPC, Result, Status, FPO); } @@ -730,31 +771,44 @@ inline bool Inv(InterpState &S, CodePtr OpPC) { template ::T> bool Neg(InterpState &S, CodePtr OpPC) { const T &Value = S.Stk.pop(); - T Result; - if (!T::neg(Value, &Result)) { + if constexpr (std::is_same_v) { + T Result = S.allocFloat(Value.getSemantics()); + + if (!T::neg(Value, &Result)) { + S.Stk.push(Result); + return true; + } + return false; + } else { + T Result; + if constexpr (needsAlloc()) + Result = S.allocAP(Value.bitWidth()); + + if (!T::neg(Value, &Result)) { + S.Stk.push(Result); + return true; + } + + assert(isIntegralType(Name) && + "don't expect other types to fail at constexpr negation"); S.Stk.push(Result); - return true; - } - assert(isIntegralType(Name) && - "don't expect other types to fail at constexpr negation"); - S.Stk.push(Result); + APSInt NegatedValue = -Value.toAPSInt(Value.bitWidth() + 1); + if (S.checkingForUndefinedBehavior()) { + const Expr *E = S.Current->getExpr(OpPC); + QualType Type = E->getType(); + SmallString<32> Trunc; + NegatedValue.trunc(Result.bitWidth()) + .toString(Trunc, 10, Result.isSigned(), /*formatAsCLiteral=*/false, + /*UpperCase=*/true, /*InsertSeparators=*/true); + S.report(E->getExprLoc(), diag::warn_integer_constant_overflow) + << Trunc << Type << E->getSourceRange(); + return true; + } - APSInt NegatedValue = -Value.toAPSInt(Value.bitWidth() + 1); - if (S.checkingForUndefinedBehavior()) { - const Expr *E = S.Current->getExpr(OpPC); - QualType Type = E->getType(); - SmallString<32> Trunc; - NegatedValue.trunc(Result.bitWidth()) - .toString(Trunc, 10, Result.isSigned(), /*formatAsCLiteral=*/false, - /*UpperCase=*/true, /*InsertSeparators=*/true); - S.report(E->getExprLoc(), diag::warn_integer_constant_overflow) - << Trunc << Type << E->getSourceRange(); - return true; + return handleOverflow(S, OpPC, NegatedValue); } - - return handleOverflow(S, OpPC, NegatedValue); } enum class PushVal : bool { @@ -783,6 +837,8 @@ bool IncDecHelper(InterpState &S, CodePtr OpPC, const Pointer &Ptr, const T &Value = Ptr.deref(); T Result; + if constexpr (needsAlloc()) + Result = S.allocAP(Value.bitWidth()); if constexpr (DoPush == PushVal::Yes) S.Stk.push(Value); @@ -890,7 +946,6 @@ bool PreDec(InterpState &S, CodePtr OpPC, bool CanOverflow) { const Pointer &Ptr = S.Stk.peek(); if (!CheckLoad(S, OpPC, Ptr, AK_Decrement)) return false; - return IncDecHelper(S, OpPC, Ptr, CanOverflow); } @@ -898,7 +953,7 @@ template bool IncDecFloatHelper(InterpState &S, CodePtr OpPC, const Pointer &Ptr, uint32_t FPOI) { Floating Value = Ptr.deref(); - Floating Result; + Floating Result = S.allocFloat(Value.getSemantics()); if constexpr (DoPush == PushVal::Yes) S.Stk.push(Value); @@ -952,12 +1007,15 @@ inline bool DecfPop(InterpState &S, CodePtr OpPC, uint32_t FPOI) { template ::T> bool Comp(InterpState &S, CodePtr OpPC) { const T &Val = S.Stk.pop(); + T Result; + if constexpr (needsAlloc()) + Result = S.allocAP(Val.bitWidth()); + if (!T::comp(Val, &Result)) { S.Stk.push(Result); return true; } - return false; } @@ -1325,10 +1383,23 @@ bool Flip(InterpState &S, CodePtr OpPC) { template ::T> bool Const(InterpState &S, CodePtr OpPC, const T &Arg) { + if constexpr (needsAlloc()) { + T Result = S.allocAP(Arg.bitWidth()); + Result.copy(Arg.toAPSInt()); + S.Stk.push(Result); + return true; + } S.Stk.push(Arg); return true; } +inline bool ConstFloat(InterpState &S, CodePtr OpPC, const Floating &F) { + Floating Result = S.allocFloat(F.getSemantics()); + Result.copy(F.getAPFloat()); + S.Stk.push(Result); + return true; +} + //===----------------------------------------------------------------------===// // Get/Set Local/Param/Global/This //===----------------------------------------------------------------------===// @@ -1483,7 +1554,24 @@ bool SetGlobal(InterpState &S, CodePtr OpPC, uint32_t I) { template ::T> bool InitGlobal(InterpState &S, CodePtr OpPC, uint32_t I) { const Pointer &P = S.P.getGlobal(I); + P.deref() = S.Stk.pop(); + + if constexpr (std::is_same_v) { + auto &Val = P.deref(); + if (!Val.singleWord()) { + uint64_t *NewMemory = new (S.P) uint64_t[Val.numWords()]; + Val.take(NewMemory); + } + + } else if constexpr (needsAlloc()) { + auto &Val = P.deref(); + if (!Val.singleWord()) { + uint64_t *NewMemory = new (S.P) uint64_t[Val.numWords()]; + Val.take(NewMemory); + } + } + P.initialize(); return true; } @@ -1585,7 +1673,22 @@ bool InitBitField(InterpState &S, CodePtr OpPC, const Record::Field *F) { assert(F->isBitField()); const T &Value = S.Stk.pop(); const Pointer &Field = S.Stk.peek().atField(F->Offset); - Field.deref() = Value.truncate(F->Decl->getBitWidthValue()); + + if constexpr (needsAlloc()) { + T Result = S.allocAP(Value.bitWidth()); + if (T::isSigned()) + Result.copy(Value.toAPSInt() + .trunc(F->Decl->getBitWidthValue()) + .sextOrTrunc(Value.bitWidth())); + else + Result.copy(Value.toAPSInt() + .trunc(F->Decl->getBitWidthValue()) + .zextOrTrunc(Value.bitWidth())); + + Field.deref() = Result; + } else { + Field.deref() = Value.truncate(F->Decl->getBitWidthValue()); + } Field.activate(); Field.initialize(); return true; @@ -1765,6 +1868,8 @@ inline bool FinishInit(InterpState &S, CodePtr OpPC) { return true; } +bool FinishInitGlobal(InterpState &S, CodePtr OpPC); + inline bool Dump(InterpState &S, CodePtr OpPC) { S.Stk.dump(); return true; @@ -2271,7 +2376,8 @@ template bool Cast(InterpState &S, CodePtr OpPC) { inline bool CastFP(InterpState &S, CodePtr OpPC, const llvm::fltSemantics *Sem, llvm::RoundingMode RM) { Floating F = S.Stk.pop(); - Floating Result = F.toSemantics(Sem, RM); + Floating Result = S.allocFloat(*Sem); + F.toSemantics(Sem, RM, &Result); S.Stk.push(Result); return true; } @@ -2295,15 +2401,25 @@ inline bool CastFixedPoint(InterpState &S, CodePtr OpPC, uint32_t FPS) { /// to know what bitwidth the result should be. template ::T> bool CastAP(InterpState &S, CodePtr OpPC, uint32_t BitWidth) { - S.Stk.push>( - IntegralAP::from(S.Stk.pop(), BitWidth)); + auto Result = S.allocAP>(BitWidth); + // Copy data. + { + APInt Source = S.Stk.pop().toAPSInt().extOrTrunc(BitWidth); + Result.copy(Source); + } + S.Stk.push>(Result); return true; } template ::T> bool CastAPS(InterpState &S, CodePtr OpPC, uint32_t BitWidth) { - S.Stk.push>( - IntegralAP::from(S.Stk.pop(), BitWidth)); + auto Result = S.allocAP>(BitWidth); + // Copy data. + { + APInt Source = S.Stk.pop().toAPSInt().extOrTrunc(BitWidth); + Result.copy(Source); + } + S.Stk.push>(Result); return true; } @@ -2312,11 +2428,11 @@ bool CastIntegralFloating(InterpState &S, CodePtr OpPC, const llvm::fltSemantics *Sem, uint32_t FPOI) { const T &From = S.Stk.pop(); APSInt FromAP = From.toAPSInt(); - Floating Result; FPOptions FPO = FPOptions::getFromOpaqueInt(FPOI); + Floating Result = S.allocFloat(*Sem); auto Status = - Floating::fromIntegral(FromAP, *Sem, getRoundingMode(FPO), Result); + Floating::fromIntegral(FromAP, *Sem, getRoundingMode(FPO), &Result); S.Stk.push(Result); return CheckFloatResult(S, OpPC, Result, Status, FPO); @@ -2365,7 +2481,12 @@ static inline bool CastFloatingIntegralAP(InterpState &S, CodePtr OpPC, return handleOverflow(S, OpPC, F.getAPFloat()); FPOptions FPO = FPOptions::getFromOpaqueInt(FPOI); - S.Stk.push>(IntegralAP(Result)); + + auto ResultAP = S.allocAP>(BitWidth); + ResultAP.copy(Result); + + S.Stk.push>(ResultAP); + return CheckFloatResult(S, OpPC, F, Status, FPO); } @@ -2381,7 +2502,12 @@ static inline bool CastFloatingIntegralAPS(InterpState &S, CodePtr OpPC, return handleOverflow(S, OpPC, F.getAPFloat()); FPOptions FPO = FPOptions::getFromOpaqueInt(FPOI); - S.Stk.push>(IntegralAP(Result)); + + auto ResultAP = S.allocAP>(BitWidth); + ResultAP.copy(Result); + + S.Stk.push>(ResultAP); + return CheckFloatResult(S, OpPC, F, Status, FPO); } @@ -2441,8 +2567,9 @@ static inline bool CastFloatingFixedPoint(InterpState &S, CodePtr OpPC, static inline bool CastFixedPointFloating(InterpState &S, CodePtr OpPC, const llvm::fltSemantics *Sem) { const auto &Fixed = S.Stk.pop(); - - S.Stk.push(Fixed.toFloat(Sem)); + Floating Result = S.allocFloat(*Sem); + Result.copy(Fixed.toFloat(Sem)); + S.Stk.push(Result); return true; } @@ -2506,12 +2633,18 @@ bool Zero(InterpState &S, CodePtr OpPC) { } static inline bool ZeroIntAP(InterpState &S, CodePtr OpPC, uint32_t BitWidth) { - S.Stk.push>(IntegralAP::zero(BitWidth)); + auto Result = S.allocAP>(BitWidth); + if (!Result.singleWord()) + std::memset(Result.Memory, 0, Result.numWords() * sizeof(uint64_t)); + S.Stk.push>(Result); return true; } static inline bool ZeroIntAPS(InterpState &S, CodePtr OpPC, uint32_t BitWidth) { - S.Stk.push>(IntegralAP::zero(BitWidth)); + auto Result = S.allocAP>(BitWidth); + if (!Result.singleWord()) + std::memset(Result.Memory, 0, Result.numWords() * sizeof(uint64_t)); + S.Stk.push>(Result); return true; } @@ -2578,7 +2711,9 @@ inline bool RVOPtr(InterpState &S, CodePtr OpPC) { //===----------------------------------------------------------------------===// template -inline bool DoShift(InterpState &S, CodePtr OpPC, LT &LHS, RT &RHS) { +inline bool DoShift(InterpState &S, CodePtr OpPC, LT &LHS, RT &RHS, + LT *Result) { + const unsigned Bits = LHS.bitWidth(); // OpenCL 6.3j: shift values are effectively % word size of LHS. @@ -2596,7 +2731,7 @@ inline bool DoShift(InterpState &S, CodePtr OpPC, LT &LHS, RT &RHS) { RHS = -RHS; return DoShift( - S, OpPC, LHS, RHS); + S, OpPC, LHS, RHS, Result); } if (!CheckShift(S, OpPC, LHS, RHS, Bits)) @@ -2644,6 +2779,7 @@ inline bool DoShift(InterpState &S, CodePtr OpPC, LT &LHS, RT &RHS) { // Do the shift on potentially signed LT, then convert to unsigned type. LT A; LT::shiftRight(LHS, LT::from(RHS, Bits), Bits, &A); + // LT::shiftRight(LHS, LT(RHSTemp), Bits, &A); R = LT::AsUnsigned::from(A); } } @@ -2652,6 +2788,48 @@ inline bool DoShift(InterpState &S, CodePtr OpPC, LT &LHS, RT &RHS) { return true; } +/// A version of DoShift that works on IntegralAP. +template +inline bool DoShiftAP(InterpState &S, CodePtr OpPC, LT &LHS, RT &RHS, + LT *Result) { + const unsigned Bits = LHS.bitWidth(); + const APSInt &LHSAP = LHS.toAPSInt(); + APSInt RHSAP = RHS.toAPSInt(); + + // OpenCL 6.3j: shift values are effectively % word size of LHS. + if (S.getLangOpts().OpenCL) + RHSAP &= APSInt(llvm::APInt(RHSAP.getBitWidth(), + static_cast(LHSAP.getBitWidth() - 1)), + RHSAP.isUnsigned()); + + if (RHS.isNegative()) { + // During constant-folding, a negative shift is an opposite shift. Such a + // shift is not a constant expression. + const SourceInfo &Loc = S.Current->getSource(OpPC); + S.CCEDiag(Loc, diag::note_constexpr_negative_shift) << RHS.toAPSInt(); + if (!S.noteUndefinedBehavior()) + return false; + RHS = -RHS; + return DoShiftAP( + S, OpPC, LHS, RHS, Result); + } + + if (!CheckShift(S, OpPC, LHS, RHS, Bits)) + return false; + + if constexpr (Dir == ShiftDir::Left) { + unsigned SA = (unsigned)RHSAP.getLimitedValue(LHS.bitWidth() - 1); + Result->copy(LHSAP << SA); + } else { + unsigned SA = (unsigned)RHSAP.getLimitedValue(LHS.bitWidth() - 1); + Result->copy(LHSAP >> SA); + } + + S.Stk.push(*Result); + return true; +} + template inline bool Shr(InterpState &S, CodePtr OpPC) { using LT = typename PrimConv::T; @@ -2659,7 +2837,13 @@ inline bool Shr(InterpState &S, CodePtr OpPC) { auto RHS = S.Stk.pop(); auto LHS = S.Stk.pop(); - return DoShift(S, OpPC, LHS, RHS); + if constexpr (needsAlloc()) { + LT Result = S.allocAP(LHS.bitWidth()); + return DoShiftAP(S, OpPC, LHS, RHS, &Result); + } else { + LT Result; + return DoShift(S, OpPC, LHS, RHS, &Result); + } } template @@ -2668,8 +2852,13 @@ inline bool Shl(InterpState &S, CodePtr OpPC) { using RT = typename PrimConv::T; auto RHS = S.Stk.pop(); auto LHS = S.Stk.pop(); - - return DoShift(S, OpPC, LHS, RHS); + if constexpr (needsAlloc()) { + LT Result = S.allocAP(LHS.bitWidth()); + return DoShiftAP(S, OpPC, LHS, RHS, &Result); + } else { + LT Result; + return DoShift(S, OpPC, LHS, RHS, &Result); + } } static inline bool ShiftFixedPoint(InterpState &S, CodePtr OpPC, bool Left) { @@ -3252,7 +3441,15 @@ inline bool BitCastPrim(InterpState &S, CodePtr OpPC, bool TargetIsUCharOrByte, if constexpr (std::is_same_v) { assert(Sem); - S.Stk.push(T::bitcastFromMemory(Buff.data(), *Sem)); + Floating Result = S.allocFloat(*Sem); + Floating::bitcastFromMemory(Buff.data(), *Sem, &Result); + S.Stk.push(Result); + + // S.Stk.push(T::bitcastFromMemory(Buff.data(), *Sem)); + } else if constexpr (needsAlloc()) { + T Result = S.allocAP(ResultBitWidth); + T::bitcastFromMemory(Buff.data(), ResultBitWidth, &Result); + S.Stk.push(Result); } else { assert(!Sem); S.Stk.push(T::bitcastFromMemory(Buff.data(), ResultBitWidth)); @@ -3310,7 +3507,11 @@ template inline T ReadArg(InterpState &S, CodePtr &OpPC) { } template <> inline Floating ReadArg(InterpState &S, CodePtr &OpPC) { - Floating F = Floating::deserialize(*OpPC); + auto &Semantics = + llvm::APFloatBase::EnumToSemantics(Floating::deserializeSemantics(*OpPC)); + + auto F = S.allocFloat(Semantics); + Floating::deserialize(*OpPC, &F); OpPC += align(F.bytesToSerialize()); return F; } @@ -3318,17 +3519,25 @@ template <> inline Floating ReadArg(InterpState &S, CodePtr &OpPC) { template <> inline IntegralAP ReadArg>(InterpState &S, CodePtr &OpPC) { - IntegralAP I = IntegralAP::deserialize(*OpPC); - OpPC += align(I.bytesToSerialize()); - return I; + uint32_t BitWidth = IntegralAP::deserializeSize(*OpPC); + auto Result = S.allocAP>(BitWidth); + assert(Result.bitWidth() == BitWidth); + + IntegralAP::deserialize(*OpPC, &Result); + OpPC += align(Result.bytesToSerialize()); + return Result; } template <> inline IntegralAP ReadArg>(InterpState &S, CodePtr &OpPC) { - IntegralAP I = IntegralAP::deserialize(*OpPC); - OpPC += align(I.bytesToSerialize()); - return I; + uint32_t BitWidth = IntegralAP::deserializeSize(*OpPC); + auto Result = S.allocAP>(BitWidth); + assert(Result.bitWidth() == BitWidth); + + IntegralAP::deserialize(*OpPC, &Result); + OpPC += align(Result.bytesToSerialize()); + return Result; } template <> diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp index d01e3d042a8b..5304bd77f2c0 100644 --- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp +++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp @@ -57,6 +57,21 @@ static void pushInteger(InterpState &S, const APSInt &Val, QualType QT) { assert(T); unsigned BitWidth = S.getASTContext().getTypeSize(QT); + + if (T == PT_IntAPS) { + auto Result = S.allocAP>(BitWidth); + Result.copy(Val); + S.Stk.push>(Result); + return; + } + + if (T == PT_IntAP) { + auto Result = S.allocAP>(BitWidth); + Result.copy(Val); + S.Stk.push>(Result); + return; + } + if (QT->isSignedIntegerOrEnumerationType()) { int64_t V = Val.getSExtValue(); INT_TYPE_SWITCH(*T, { S.Stk.push(T::from(V, BitWidth)); }); @@ -327,13 +342,13 @@ static bool interp__builtin_nan(InterpState &S, CodePtr OpPC, S.getASTContext().getFloatTypeSemantics( Call->getDirectCallee()->getReturnType()); - Floating Result; + Floating Result = S.allocFloat(TargetSemantics); if (S.getASTContext().getTargetInfo().isNan2008()) { if (Signaling) - Result = Floating( + Result.copy( llvm::APFloat::getSNaN(TargetSemantics, /*Negative=*/false, &Fill)); else - Result = Floating( + Result.copy( llvm::APFloat::getQNaN(TargetSemantics, /*Negative=*/false, &Fill)); } else { // Prior to IEEE 754-2008, architectures were allowed to choose whether @@ -342,10 +357,10 @@ static bool interp__builtin_nan(InterpState &S, CodePtr OpPC, // 2008 revisions, MIPS interpreted sNaN-2008 as qNan and qNaN-2008 as // sNaN. This is now known as "legacy NaN" encoding. if (Signaling) - Result = Floating( + Result.copy( llvm::APFloat::getQNaN(TargetSemantics, /*Negative=*/false, &Fill)); else - Result = Floating( + Result.copy( llvm::APFloat::getSNaN(TargetSemantics, /*Negative=*/false, &Fill)); } @@ -360,7 +375,9 @@ static bool interp__builtin_inf(InterpState &S, CodePtr OpPC, S.getASTContext().getFloatTypeSemantics( Call->getDirectCallee()->getReturnType()); - S.Stk.push(Floating::getInf(TargetSemantics)); + Floating Result = S.allocFloat(TargetSemantics); + Result.copy(APFloat::getInf(TargetSemantics)); + S.Stk.push(Result); return true; } @@ -368,10 +385,12 @@ static bool interp__builtin_copysign(InterpState &S, CodePtr OpPC, const InterpFrame *Frame) { const Floating &Arg2 = S.Stk.pop(); const Floating &Arg1 = S.Stk.pop(); + Floating Result = S.allocFloat(Arg1.getSemantics()); APFloat Copy = Arg1.getAPFloat(); Copy.copySign(Arg2.getAPFloat()); - S.Stk.push(Floating(Copy)); + Result.copy(Copy); + S.Stk.push(Result); return true; } @@ -380,11 +399,13 @@ static bool interp__builtin_fmin(InterpState &S, CodePtr OpPC, const InterpFrame *Frame, bool IsNumBuiltin) { const Floating &RHS = S.Stk.pop(); const Floating &LHS = S.Stk.pop(); + Floating Result = S.allocFloat(LHS.getSemantics()); if (IsNumBuiltin) - S.Stk.push(llvm::minimumnum(LHS.getAPFloat(), RHS.getAPFloat())); + Result.copy(llvm::minimumnum(LHS.getAPFloat(), RHS.getAPFloat())); else - S.Stk.push(minnum(LHS.getAPFloat(), RHS.getAPFloat())); + Result.copy(minnum(LHS.getAPFloat(), RHS.getAPFloat())); + S.Stk.push(Result); return true; } @@ -392,11 +413,13 @@ static bool interp__builtin_fmax(InterpState &S, CodePtr OpPC, const InterpFrame *Frame, bool IsNumBuiltin) { const Floating &RHS = S.Stk.pop(); const Floating &LHS = S.Stk.pop(); + Floating Result = S.allocFloat(LHS.getSemantics()); if (IsNumBuiltin) - S.Stk.push(llvm::maximumnum(LHS.getAPFloat(), RHS.getAPFloat())); + Result.copy(llvm::maximumnum(LHS.getAPFloat(), RHS.getAPFloat())); else - S.Stk.push(maxnum(LHS.getAPFloat(), RHS.getAPFloat())); + Result.copy(maxnum(LHS.getAPFloat(), RHS.getAPFloat())); + S.Stk.push(Result); return true; } @@ -571,8 +594,16 @@ static bool interp__builtin_fpclassify(InterpState &S, CodePtr OpPC, static bool interp__builtin_fabs(InterpState &S, CodePtr OpPC, const InterpFrame *Frame) { const Floating &Val = S.Stk.pop(); + APFloat F = Val.getAPFloat(); + if (!F.isNegative()) { + S.Stk.push(Val); + return true; + } - S.Stk.push(Floating::abs(Val)); + Floating Result = S.allocFloat(Val.getSemantics()); + F.changeSign(); + Result.copy(F); + S.Stk.push(Result); return true; } diff --git a/clang/lib/AST/ByteCode/InterpBuiltinBitCast.cpp b/clang/lib/AST/ByteCode/InterpBuiltinBitCast.cpp index 239b3104e89f..2569cac018b3 100644 --- a/clang/lib/AST/ByteCode/InterpBuiltinBitCast.cpp +++ b/clang/lib/AST/ByteCode/InterpBuiltinBitCast.cpp @@ -402,7 +402,9 @@ bool clang::interp::DoBitCastPtr(InterpState &S, CodePtr OpPC, if (llvm::sys::IsBigEndianHost) swapBytes(M.get(), NumBits.roundToBytes()); - P.deref() = Floating::bitcastFromMemory(M.get(), Semantics); + Floating R = S.allocFloat(Semantics); + Floating::bitcastFromMemory(M.get(), Semantics, &R); + P.deref() = R; P.initialize(); return true; } diff --git a/clang/lib/AST/ByteCode/InterpState.h b/clang/lib/AST/ByteCode/InterpState.h index e8dc6f0483d6..08765561985e 100644 --- a/clang/lib/AST/ByteCode/InterpState.h +++ b/clang/lib/AST/ByteCode/InterpState.h @@ -15,6 +15,7 @@ #include "Context.h" #include "DynamicAllocator.h" +#include "Floating.h" #include "Function.h" #include "InterpFrame.h" #include "InterpStack.h" @@ -126,6 +127,33 @@ public: StdAllocatorCaller getStdAllocatorCaller(StringRef Name) const; + void *allocate(size_t Size, unsigned Align = 8) const { + return Allocator.Allocate(Size, Align); + } + template T *allocate(size_t Num = 1) const { + return static_cast(allocate(Num * sizeof(T), alignof(T))); + } + + template T allocAP(unsigned BitWidth) { + unsigned NumWords = APInt::getNumWords(BitWidth); + if (NumWords == 1) + return T(BitWidth); + uint64_t *Mem = (uint64_t *)this->allocate(NumWords * sizeof(uint64_t)); + // std::memset(Mem, 0, NumWords * sizeof(uint64_t)); // Debug + return T(Mem, BitWidth); + } + + Floating allocFloat(const llvm::fltSemantics &Sem) { + if (Floating::singleWord(Sem)) + return Floating(llvm::APFloatBase::SemanticsToEnum(Sem)); + + unsigned NumWords = + APInt::getNumWords(llvm::APFloatBase::getSizeInBits(Sem)); + uint64_t *Mem = (uint64_t *)this->allocate(NumWords * sizeof(uint64_t)); + // std::memset(Mem, 0, NumWords * sizeof(uint64_t)); // Debug + return Floating(Mem, llvm::APFloatBase::SemanticsToEnum(Sem)); + } + private: friend class EvaluationResult; friend class InterpStateCCOverride; @@ -161,6 +189,8 @@ public: llvm::SmallVector< std::pair> SeenGlobalTemporaries; + + mutable llvm::BumpPtrAllocator Allocator; }; class InterpStateCCOverride final { diff --git a/clang/lib/AST/ByteCode/Opcodes.td b/clang/lib/AST/ByteCode/Opcodes.td index c76ac5f8ae86..57e01f7bd9da 100644 --- a/clang/lib/AST/ByteCode/Opcodes.td +++ b/clang/lib/AST/ByteCode/Opcodes.td @@ -48,6 +48,7 @@ def ArgUint64 : ArgType { let Name = "uint64_t"; } def ArgIntAP : ArgType { let Name = "IntegralAP"; let AsRef = true; } def ArgIntAPS : ArgType { let Name = "IntegralAP"; let AsRef = true; } def ArgFloat : ArgType { let Name = "Floating"; let AsRef = true; } + def ArgBool : ArgType { let Name = "bool"; } def ArgFixedPoint : ArgType { let Name = "FixedPoint"; let AsRef = true; } @@ -88,6 +89,9 @@ def IntegerAndFixedTypeClass : TypeClass { Uint32, Sint64, Uint64, IntAP, IntAPS, FixedPoint]; } +def IntegralTypeClass : TypeClass { + let Types = !listconcat(IntegerTypeClass.Types, [Bool]); +} def FixedSizeIntegralTypeClass : TypeClass { let Types = [Sint8, Uint8, Sint16, Uint16, Sint32, Uint32, Sint64, Uint64, Bool]; @@ -265,12 +269,13 @@ def ConstSint32 : ConstOpcode; def ConstUint32 : ConstOpcode; def ConstSint64 : ConstOpcode; def ConstUint64 : ConstOpcode; -def ConstFloat : ConstOpcode; -def constIntAP : ConstOpcode; -def constIntAPS : ConstOpcode; +def ConstIntAP : ConstOpcode; +def ConstIntAPS : ConstOpcode; def ConstBool : ConstOpcode; def ConstFixedPoint : ConstOpcode; +def ConstFloat : Opcode { let Args = [ArgFloat]; } + // [] -> [Integer] def Zero : Opcode { let Types = [FixedSizeIntegralTypeClass]; @@ -328,6 +333,7 @@ def GetMemberPtrBasePop : Opcode { def FinishInitPop : Opcode; def FinishInit : Opcode; +def FinishInitGlobal : Opcode; def GetPtrDerivedPop : Opcode { let Args = [ArgUint32, ArgBool, ArgTypePtr]; } @@ -389,7 +395,7 @@ class AccessOpcode : Opcode { } class BitFieldOpcode : Opcode { - let Types = [AluTypeClass]; + let Types = [IntegralTypeClass]; let Args = [ArgRecordField]; let HasGroup = 1; } diff --git a/clang/lib/AST/ByteCode/PrimType.h b/clang/lib/AST/ByteCode/PrimType.h index 6152fbfbe3a7..a156cccbb3c1 100644 --- a/clang/lib/AST/ByteCode/PrimType.h +++ b/clang/lib/AST/ByteCode/PrimType.h @@ -76,6 +76,13 @@ inline llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, } constexpr bool isIntegralType(PrimType T) { return T <= PT_FixedPoint; } +template constexpr bool needsAlloc() { + return std::is_same_v> || + std::is_same_v> || std::is_same_v; +} +constexpr bool needsAlloc(PrimType T) { + return T == PT_IntAP || T == PT_IntAPS || T == PT_Float; +} /// Mapping from primitive types to their representation. template struct PrimConv; @@ -209,6 +216,16 @@ static inline bool aligned(const void *P) { } \ } while (0) +#define TYPE_SWITCH_ALLOC(Expr, B) \ + do { \ + switch (Expr) { \ + TYPE_SWITCH_CASE(PT_Float, B) \ + TYPE_SWITCH_CASE(PT_IntAP, B) \ + TYPE_SWITCH_CASE(PT_IntAPS, B) \ + default:; \ + } \ + } while (0) + #define COMPOSITE_TYPE_SWITCH(Expr, B, D) \ do { \ switch (Expr) { \ diff --git a/clang/lib/AST/ByteCode/Program.h b/clang/lib/AST/ByteCode/Program.h index 23ba1bbd193b..5d9c42244749 100644 --- a/clang/lib/AST/ByteCode/Program.h +++ b/clang/lib/AST/ByteCode/Program.h @@ -132,6 +132,14 @@ public: bool IsMutable = false, bool IsVolatile = false, const Expr *Init = nullptr); + void *Allocate(size_t Size, unsigned Align = 8) const { + return Allocator.Allocate(Size, Align); + } + template T *Allocate(size_t Num = 1) const { + return static_cast(Allocate(Num * sizeof(T), alignof(T))); + } + void Deallocate(void *Ptr) const {} + /// Context to manage declaration lifetimes. class DeclScope { public: @@ -204,7 +212,7 @@ private: }; /// Allocator for globals. - PoolAllocTy Allocator; + mutable PoolAllocTy Allocator; /// Global objects. std::vector Globals; @@ -238,4 +246,18 @@ public: } // namespace interp } // namespace clang +inline void *operator new(size_t Bytes, const clang::interp::Program &C, + size_t Alignment = 8) { + return C.Allocate(Bytes, Alignment); +} + +inline void operator delete(void *Ptr, const clang::interp::Program &C, + size_t) { + C.Deallocate(Ptr); +} +inline void *operator new[](size_t Bytes, const clang::interp::Program &C, + size_t Alignment = 8) { + return C.Allocate(Bytes, Alignment); +} + #endif diff --git a/clang/test/AST/ByteCode/builtin-bit-cast-long-double.cpp b/clang/test/AST/ByteCode/builtin-bit-cast-long-double.cpp index 710612bef8fd..1013a771d13b 100644 --- a/clang/test/AST/ByteCode/builtin-bit-cast-long-double.cpp +++ b/clang/test/AST/ByteCode/builtin-bit-cast-long-double.cpp @@ -21,6 +21,9 @@ template constexpr To bit_cast(const From &from) { static_assert(sizeof(To) == sizeof(From)); return __builtin_bit_cast(To, from); +#if __x86_64 + // both-note@-2 {{indeterminate value can only initialize an object of type}} +#endif } template @@ -38,11 +41,8 @@ constexpr Init round_trip(const Init &init) { namespace test_long_double { #if __x86_64 -/// FIXME: We could enable this, but since it aborts, it causes the usual mempory leak. -#if 0 -constexpr __int128_t test_cast_to_int128 = bit_cast<__int128_t>((long double)0); // expected-error{{must be initialized by a constant expression}}\ - // expected-note{{in call}} -#endif +constexpr __int128_t test_cast_to_int128 = bit_cast<__int128_t>((long double)0); // both-error{{must be initialized by a constant expression}}\ + // both-note{{in call}} constexpr long double ld = 3.1425926539; struct bytes { diff --git a/clang/test/AST/ByteCode/builtin-functions.cpp b/clang/test/AST/ByteCode/builtin-functions.cpp index 21dca15a4577..174c1ffa79a4 100644 --- a/clang/test/AST/ByteCode/builtin-functions.cpp +++ b/clang/test/AST/ByteCode/builtin-functions.cpp @@ -208,7 +208,7 @@ namespace nan { constexpr double NaN3 = __builtin_nan("foo"); // both-error {{must be initialized by a constant expression}} constexpr float NaN4 = __builtin_nanf(""); - //constexpr long double NaN5 = __builtin_nanf128(""); + constexpr long double NaN5 = __builtin_nanf128(""); /// FIXME: This should be accepted by the current interpreter as well. constexpr char f[] = {'0', 'x', 'A', 'E', '\0'}; @@ -655,8 +655,6 @@ void test_noexcept(int *i) { } // end namespace test_launder -/// FIXME: The commented out tests here use a IntAP value and fail. -/// This currently means we will leak the IntAP value since nothing cleans it up. namespace clz { char clz1[__builtin_clz(1) == BITSIZE(int) - 1 ? 1 : -1]; char clz2[__builtin_clz(7) == BITSIZE(int) - 3 ? 1 : -1]; @@ -709,7 +707,7 @@ namespace clz { char clz48[__builtin_clzg(1ULL << (BITSIZE(long long) - 1)) == 0 ? 1 : -1]; char clz49[__builtin_clzg(1ULL << (BITSIZE(long long) - 1), 42) == 0 ? 1 : -1]; #ifdef __SIZEOF_INT128__ - // int clz50 = __builtin_clzg((unsigned __int128)0); + int clz50 = __builtin_clzg((unsigned __int128)0); char clz51[__builtin_clzg((unsigned __int128)0, 42) == 42 ? 1 : -1]; char clz52[__builtin_clzg((unsigned __int128)0x1) == BITSIZE(__int128) - 1 ? 1 : -1]; char clz53[__builtin_clzg((unsigned __int128)0x1, 42) == BITSIZE(__int128) - 1 ? 1 : -1]; @@ -717,7 +715,7 @@ namespace clz { char clz55[__builtin_clzg((unsigned __int128)0xf, 42) == BITSIZE(__int128) - 4 ? 1 : -1]; #endif #ifndef __AVR__ - // int clz58 = __builtin_clzg((unsigned _BitInt(128))0); + int clz58 = __builtin_clzg((unsigned _BitInt(128))0); char clz59[__builtin_clzg((unsigned _BitInt(128))0, 42) == 42 ? 1 : -1]; char clz60[__builtin_clzg((unsigned _BitInt(128))0x1) == BITSIZE(_BitInt(128)) - 1 ? 1 : -1]; char clz61[__builtin_clzg((unsigned _BitInt(128))0x1, 42) == BITSIZE(_BitInt(128)) - 1 ? 1 : -1]; @@ -775,7 +773,7 @@ namespace ctz { char ctz46[__builtin_ctzg(1ULL << (BITSIZE(long long) - 1)) == BITSIZE(long long) - 1 ? 1 : -1]; char ctz47[__builtin_ctzg(1ULL << (BITSIZE(long long) - 1), 42) == BITSIZE(long long) - 1 ? 1 : -1]; #ifdef __SIZEOF_INT128__ - // int ctz48 = __builtin_ctzg((unsigned __int128)0); + int ctz48 = __builtin_ctzg((unsigned __int128)0); char ctz49[__builtin_ctzg((unsigned __int128)0, 42) == 42 ? 1 : -1]; char ctz50[__builtin_ctzg((unsigned __int128)0x1) == 0 ? 1 : -1]; char ctz51[__builtin_ctzg((unsigned __int128)0x1, 42) == 0 ? 1 : -1]; @@ -785,7 +783,7 @@ namespace ctz { char ctz55[__builtin_ctzg((unsigned __int128)1 << (BITSIZE(__int128) - 1), 42) == BITSIZE(__int128) - 1 ? 1 : -1]; #endif #ifndef __AVR__ - // int ctz56 = __builtin_ctzg((unsigned _BitInt(128))0); + int ctz56 = __builtin_ctzg((unsigned _BitInt(128))0); char ctz57[__builtin_ctzg((unsigned _BitInt(128))0, 42) == 42 ? 1 : -1]; char ctz58[__builtin_ctzg((unsigned _BitInt(128))0x1) == 0 ? 1 : -1]; char ctz59[__builtin_ctzg((unsigned _BitInt(128))0x1, 42) == 0 ? 1 : -1]; -- cgit v1.2.3 From 6265ca686dfe18e6032e59637f144bad7ea6cf2b Mon Sep 17 00:00:00 2001 From: Ties Stuij Date: Wed, 18 Jun 2025 13:38:49 +0100 Subject: [AArch64] Add Cortex-A320 scheduling model (#144385) Instead of using the Cortex-A510 scheduling model, Cortex-A320 now uses its own scheduling model, based off of the Cortex-A320 Software Optimization Guide: https://developer.arm.com/documentation/110285/r0p1 --------- Co-authored-by: Nashe Mncube --- llvm/lib/Target/AArch64/AArch64.td | 1 + llvm/lib/Target/AArch64/AArch64Processors.td | 2 +- llvm/lib/Target/AArch64/AArch64SchedA320.td | 1415 +++ .../AArch64/Cortex/A320-basic-instructions.s | 3721 +++++++ .../AArch64/Cortex/A320-neon-instructions.s | 3208 ++++++ .../AArch64/Cortex/A320-sve-instructions.s | 10258 +++++++++++++++++++ 6 files changed, 18604 insertions(+), 1 deletion(-) create mode 100644 llvm/lib/Target/AArch64/AArch64SchedA320.td create mode 100644 llvm/test/tools/llvm-mca/AArch64/Cortex/A320-basic-instructions.s create mode 100644 llvm/test/tools/llvm-mca/AArch64/Cortex/A320-neon-instructions.s create mode 100644 llvm/test/tools/llvm-mca/AArch64/Cortex/A320-sve-instructions.s diff --git a/llvm/lib/Target/AArch64/AArch64.td b/llvm/lib/Target/AArch64/AArch64.td index f303819f411d..eb5a5199b895 100644 --- a/llvm/lib/Target/AArch64/AArch64.td +++ b/llvm/lib/Target/AArch64/AArch64.td @@ -103,6 +103,7 @@ def MTEUnsupported : AArch64Unsupported { let F = [HasPAuth, HasPAuthLR] in def PAUnsupported : AArch64Unsupported; +include "AArch64SchedA320.td" include "AArch64SchedA53.td" include "AArch64SchedA55.td" include "AArch64SchedA510.td" diff --git a/llvm/lib/Target/AArch64/AArch64Processors.td b/llvm/lib/Target/AArch64/AArch64Processors.td index c7ea6393e2ad..e1b82953aad8 100644 --- a/llvm/lib/Target/AArch64/AArch64Processors.td +++ b/llvm/lib/Target/AArch64/AArch64Processors.td @@ -1131,7 +1131,7 @@ def : ProcessorModel<"cortex-a34", CortexA53Model, ProcessorFeatures.A53, [TuneA35]>; def : ProcessorModel<"cortex-a35", CortexA53Model, ProcessorFeatures.A53, [TuneA35]>; -def : ProcessorModel<"cortex-a320", CortexA510Model, ProcessorFeatures.A320, +def : ProcessorModel<"cortex-a320", CortexA320Model, ProcessorFeatures.A320, [TuneA320]>; def : ProcessorModel<"cortex-a53", CortexA53Model, ProcessorFeatures.A53, [TuneA53]>; diff --git a/llvm/lib/Target/AArch64/AArch64SchedA320.td b/llvm/lib/Target/AArch64/AArch64SchedA320.td new file mode 100644 index 000000000000..89ed13389daf --- /dev/null +++ b/llvm/lib/Target/AArch64/AArch64SchedA320.td @@ -0,0 +1,1415 @@ +//==- AArch64SchedCortexA320.td - ARM Cortex-A320 Scheduling Definitions -*- tablegen -*-=// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines the machine model for the ARM Cortex-A320 processor. +// +//===----------------------------------------------------------------------===// + +// ===---------------------------------------------------------------------===// +// The following definitions describe the per-operand machine model. +// This works with MachineScheduler. See MCSchedModel.h for details. + +// Cortex-A320 machine model for scheduling and other instruction cost heuristics. +def CortexA320Model : SchedMachineModel { + let MicroOpBufferSize = 0; // Cortex-A320 is an in-order processor + let IssueWidth = 1; // Cortex-A320 is a single-issue processor + let LoadLatency = 5; + let PostRAScheduler = 1; // Enable PostRA scheduler pass. + let CompleteModel = 0; // Covers instructions applicable to Cortex-A320. + + let FullInstRWOverlapCheck = 0; +} + + +//===----------------------------------------------------------------------===// +// Subtarget-specific SchedWrite types + +let SchedModel = CortexA320Model in { + +//===----------------------------------------------------------------------===// +// Define each kind of processor resource and number available. + +// Modeling each pipeline as a ProcResource using the BufferSize = 0 since the +// Cortex-A320 is in-order. +let BufferSize = 0 in { + def CortexA320UnitALU : ProcResource<1>; // Int ALU + def CortexA320UnitMAC : ProcResource<1>; // Int MAC, 64-bit wide + def CortexA320UnitDiv : ProcResource<1>; // Int Division, not pipelined + def CortexA320UnitLdSt : ProcResource<1>; // Load/Store shared pipe + def CortexA320UnitB : ProcResource<1>; // Branch + def CortexA320UnitPAC : ProcResource<1>; // Pointer Authentication (PAC) pipe + + // The FP DIV/SQRT instructions execute totally differently from the FP ALU + // instructions; that's why for now we model them with 2 resources. + def CortexA320UnitVALU : ProcResource<1>; // SIMD/FP/SVE ALU + def CortexA320UnitVMAC : ProcResource<1>; // SIMD/FP/SVE MAC + def CortexA320UnitVMC : ProcResource<1>; // SIMD/FP/SVE multicycle instrs (e.g Div, SQRT, cryptography) +} + +// These latencies are modeled without taking into account forwarding paths +// (the software optimisation guide lists latencies taking into account +// typical forwarding paths). +def : WriteRes { let Latency = 1; } // MOVN, MOVZ +def : WriteRes { let Latency = 1; } // ALU +def : WriteRes { let Latency = 2; } // ALU of Shifted-Reg +def : WriteRes { let Latency = 2; } // ALU of Extended-Reg +def : WriteRes { let Latency = 2; } // EXTR from a reg pair +def : WriteRes { let Latency = 2; } // Shift/Scale + +// MAC +def : WriteRes { let Latency = 3; } // 32-bit Multiply +def : WriteRes { let Latency = 5; let ReleaseAtCycles = [2];} // 64-bit Multiply + +// Div +def : WriteRes { + let Latency = 12; let ReleaseAtCycles = [12]; +} +def : WriteRes { + let Latency = 20; let ReleaseAtCycles = [20]; +} + +//===----------------------------------------------------------------------===// +// Define customized scheduler read/write types specific to Cortex-A320 + +//===----------------------------------------------------------------------===// +class CortexA320Write : SchedWriteRes<[res]> { + let Latency = n; +} + +class CortexA320MCWrite : SchedWriteRes<[res]> { + let Latency = n; + let ReleaseAtCycles = [m]; + let BeginGroup = 1; +} + +class CortexA320MC_RC0Write : SchedWriteRes<[res]> { + let Latency = n; + let BeginGroup = 1; +} + +//===----------------------------------------------------------------------===// + +// Define generic 2 micro-op types +def CortexA320Write_11cyc_1VMAC_1VALU : SchedWriteRes<[CortexA320UnitVALU, CortexA320UnitVMAC]> { + let Latency = 11; + let NumMicroOps = 2; +} + +def CortexA320Write_16cyc_1VMAC_1VALU : SchedWriteRes<[CortexA320UnitVALU, CortexA320UnitVMAC]> { + let Latency = 16; + let NumMicroOps = 2; +} + +class CortexA320Write_PAC_B : SchedWriteRes<[CortexA320UnitPAC, CortexA320UnitB]> { + let Latency = lat; + let NumMicroOps = 2; +} + +// Load +def : WriteRes { let Latency = 4; } +def : WriteRes { let Latency = 4; } +def : WriteRes { let Latency = 4; } + +def CortexA320WriteVLD1 : SchedWriteRes<[CortexA320UnitLdSt]> { let Latency = 3; } +def CortexA320WriteVLD1SI : SchedWriteRes<[CortexA320UnitLdSt]> { let Latency = 3; let SingleIssue = 1; } + +def CortexA320WriteVLD2 : SchedWriteRes<[CortexA320UnitLdSt]> { let Latency = 4; + let ReleaseAtCycles = [2]; } + +def CortexA320WriteVLD3 : SchedWriteRes<[CortexA320UnitLdSt]> { let Latency = 5; + let ReleaseAtCycles = [3]; } + +def CortexA320WriteVLD4 : SchedWriteRes<[CortexA320UnitLdSt]> { let Latency = 6; + let ReleaseAtCycles = [4]; } + +def CortexA320WriteVLD6 : SchedWriteRes<[CortexA320UnitLdSt]> { let Latency = 5; + let ReleaseAtCycles = [3]; } + +def CortexA320WriteVLD8 : SchedWriteRes<[CortexA320UnitLdSt]> { let Latency = 6; + let ReleaseAtCycles = [4]; } + +def CortexA320WriteLDP1 : SchedWriteRes<[CortexA320UnitLdSt]> { let Latency = 4; } +def CortexA320WriteLDP2 : SchedWriteRes<[CortexA320UnitLdSt]> { let Latency = 4; } +def CortexA320WriteLDP4 : SchedWriteRes<[CortexA320UnitLdSt]> { let Latency = 4; } + +// Pre/Post Indexing - Performed as part of address generation +def : WriteRes { let Latency = 0; } + +// Store +let RetireOOO = 1 in { +def : WriteRes { let Latency = 1; } +def : WriteRes { let Latency = 1; } +def : WriteRes { let Latency = 1; } +} +def : WriteRes { let Latency = 3; } + +// Vector Store - Similar to vector loads, can take 1-3 cycles to issue. +def : WriteRes { let Latency = 5; + let ReleaseAtCycles = [2];} +def CortexA320WriteVST1 : SchedWriteRes<[CortexA320UnitLdSt]> { let Latency = 4; } +def CortexA320WriteVST2 : SchedWriteRes<[CortexA320UnitLdSt]> { let Latency = 5; + let ReleaseAtCycles = [2]; } +def CortexA320WriteVST3 : SchedWriteRes<[CortexA320UnitLdSt]> { let Latency = 5; + let ReleaseAtCycles = [3]; } +def CortexA320WriteVST4 : SchedWriteRes<[CortexA320UnitLdSt]> { let Latency = 5; + let ReleaseAtCycles = [4]; } + +def : WriteRes { let Unsupported = 1; } + +// Branch +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; + +// FP ALU +// As WriteF result is produced in F5 and it can be mostly forwarded +// to consumer at F1, the effectively Latency is set as 4. +def : WriteRes { let Latency = 4; } +def : WriteRes { let Latency = 3; } +def : WriteRes { let Latency = 4; } +def : WriteRes { let Latency = 3; } +def : WriteRes { let Latency = 3; } + +class CortexA320VSt : SchedWriteRes<[CortexA320UnitLdSt]> { + let RetireOOO = 1; + let ReleaseAtCycles = [n]; +} + +def CortexA320VSt0 : SchedWriteRes<[CortexA320UnitLdSt]> { + let RetireOOO = 1; +} + +def : SchedAlias>; +def : SchedAlias>; + +// FP ALU specific new schedwrite definitions +def CortexA320WriteFPALU_F3 : SchedWriteRes<[CortexA320UnitVALU]> { let Latency = 3;} +def CortexA320WriteFPALU_F4 : SchedWriteRes<[CortexA320UnitVALU]> { let Latency = 4;} + +// FP Mul, Div, Sqrt. Div/Sqrt are not pipelined +def : WriteRes { let Latency = 4; } + +let RetireOOO = 1 in { +def : WriteRes { let Latency = 22; + let ReleaseAtCycles = [29]; } +def CortexA320WriteVMAC : SchedWriteRes<[CortexA320UnitVMAC]> { let Latency = 4; } +def CortexA320WriteFDivHP : SchedWriteRes<[CortexA320UnitVMC]> { let Latency = 8; + let ReleaseAtCycles = [5]; } +def CortexA320WriteFDivSP : SchedWriteRes<[CortexA320UnitVMC]> { let Latency = 13; + let ReleaseAtCycles = [10]; } +def CortexA320WriteFDivDP : SchedWriteRes<[CortexA320UnitVMC]> { let Latency = 22; + let ReleaseAtCycles = [19]; } +def CortexA320WriteFSqrtHP : SchedWriteRes<[CortexA320UnitVMC]> { let Latency = 8; + let ReleaseAtCycles = [5]; } +def CortexA320WriteFSqrtSP : SchedWriteRes<[CortexA320UnitVMC]> { let Latency = 12; + let ReleaseAtCycles = [9]; } +def CortexA320WriteFSqrtDP : SchedWriteRes<[CortexA320UnitVMC]> { let Latency = 22; + let ReleaseAtCycles = [19]; } +} + +//===----------------------------------------------------------------------===// +// Subtarget-specific SchedRead types. + +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; + +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; + + +// MUL +def : ReadAdvance; +def : ReadAdvance; + +// Div +def : ReadAdvance; + +//===----------------------------------------------------------------------===// +// Subtarget-specific InstRWs. + +def CortexA320WriteISReg : SchedWriteVariant<[ + SchedVar, + SchedVar]>; +def : InstRW<[CortexA320WriteISReg], (instregex ".*rs$")>; +def : InstRW<[WriteIS], (instrs RBITWr, RBITXr)>; + +// Pointer Authentication Instructions (v8.3 PAC) +// ----------------------------------------------------------------------------- + +// Authenticate data address +// Authenticate instruction address +// Compute pointer authentication code for data address +// Compute pointer authentication code, using generic key +// Compute pointer authentication code for instruction address +def : InstRW<[CortexA320Write<4, CortexA320UnitPAC>], (instregex "^AUT", "^PAC")>; + +// Branch and link, register, with pointer authentication +// Branch, register, with pointer authentication +// Branch, return, with pointer authentication +def : InstRW<[CortexA320Write_PAC_B<1>], (instrs BLRAA, BLRAAZ, BLRAB, BLRABZ, BRAA, + BRAAZ, BRAB, BRABZ, RETAA, RETAB, + ERETAA, ERETAB)>; + +// Load register, with pointer authentication +def : InstRW<[CortexA320Write<2, CortexA320UnitPAC>], (instregex "^LDRA[AB](indexed|writeback)")>; + +// Strip pointer authentication code +def : InstRW<[CortexA320Write<5, CortexA320UnitPAC>], (instrs XPACD, XPACI, XPACLRI)>; +//--- +// Miscellaneous +//--- +def : InstRW<[CortexA320WriteVLD1SI,CortexA320WriteLDP1], (instregex "LDPS?Wi")>; +def : InstRW<[CortexA320WriteVLD1,CortexA320WriteLDP1], (instregex "LDPSi")>; +def : InstRW<[CortexA320WriteVLD1,CortexA320WriteLDP2], (instregex "LDP(X|D)i")>; +def : InstRW<[CortexA320WriteVLD1,CortexA320WriteLDP4], (instregex "LDPQi")>; +def : InstRW<[WriteAdr, CortexA320WriteVLD1SI,CortexA320WriteLDP1], (instregex "LDPS?W(pre|post)")>; +def : InstRW<[WriteAdr, CortexA320WriteVLD1,CortexA320WriteLDP1], (instregex "LDPS(pre|post)")>; +def : InstRW<[WriteAdr, CortexA320WriteVLD1,CortexA320WriteLDP2], (instregex "LDP(X|D)(pre|post)")>; +def : InstRW<[WriteAdr, CortexA320WriteVLD1,CortexA320WriteLDP4], (instregex "LDPQ(pre|post)")>; +def : InstRW<[WriteI], (instrs COPY)>; +//--- +// Vector Loads - 128-bit per cycle +//--- +// 1-element structures +def CortexA320WriteVLD1Latency3: SchedWriteRes<[CortexA320UnitLdSt]> { let Latency = 3; let ReleaseAtCycles = [1]; } +def CortexA320WriteVLD1Latency4: SchedWriteRes<[CortexA320UnitLdSt]> { let Latency = 4; let ReleaseAtCycles = [2]; } +def CortexA320WriteVLD1Latency5: SchedWriteRes<[CortexA320UnitLdSt]> { let Latency = 5; let ReleaseAtCycles = [3]; } +def CortexA320WriteVLD1Latency6: SchedWriteRes<[CortexA320UnitLdSt]> { let Latency = 6; let ReleaseAtCycles = [4]; } + +def : InstRW<[CortexA320WriteVLD1Latency3], (instregex "LD1Onev(8b|16b|4h|8h|2s|4s|1d|2d)$")>; +def : InstRW<[CortexA320WriteVLD1Latency3], (instregex "LD1Twov(8b|4h|2s|1d)$")>; +def : InstRW<[CortexA320WriteVLD1Latency4], (instregex "LD1Twov(16b|8h|4s|2d)$")>; +def : InstRW<[CortexA320WriteVLD1Latency4], (instregex "LD1Threev(8b|4h|2s|1d)$")>; +def : InstRW<[CortexA320WriteVLD1Latency5], (instregex "LD1Threev(16b|8h|4s|2d)$")>; +def : InstRW<[CortexA320WriteVLD1Latency4], (instregex "LD1Fourv(8b|4h|2s|1d)$")>; +def : InstRW<[CortexA320WriteVLD1Latency6], (instregex "LD1Fourv(16b|8h|4s|2d)$")>; +def : InstRW<[CortexA320WriteVLD1Latency3], (instregex "LD1i(8|16|32|64)$")>; // single element +def : InstRW<[CortexA320WriteVLD1Latency3], (instregex "LD1Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>; // replicate + +def : InstRW<[WriteAdr, CortexA320WriteVLD1Latency3], (instregex "LD1Onev(8b|16b|4h|8h|2s|4s|1d|2d)_POST$")>; +def : InstRW<[WriteAdr, CortexA320WriteVLD1Latency3], (instregex "LD1Twov(8b|4h|2s|1d)_POST$")>; +def : InstRW<[WriteAdr, CortexA320WriteVLD1Latency4], (instregex "LD1Twov(16b|8h|4s|2d)_POST$")>; +def : InstRW<[WriteAdr, CortexA320WriteVLD1Latency4], (instregex "LD1Threev(8b|4h|2s|1d)_POST$")>; +def : InstRW<[WriteAdr, CortexA320WriteVLD1Latency5], (instregex "LD1Threev(16b|8h|4s|2d)_POST$")>; +def : InstRW<[WriteAdr, CortexA320WriteVLD1Latency4], (instregex "LD1Fourv(8b|4h|2s|1d)_POST$")>; +def : InstRW<[WriteAdr, CortexA320WriteVLD1Latency6], (instregex "LD1Fourv(16b|8h|4s|2d)_POST$")>; +def : InstRW<[WriteAdr, CortexA320WriteVLD1Latency3], (instregex "LD1i(8|16|32|64)_POST$")>; // single element +def : InstRW<[WriteAdr, CortexA320WriteVLD1Latency3], (instregex "LD1Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; // replicate + +// 2-element structures +def CortexA320WriteVLD2Latency3: SchedWriteRes<[CortexA320UnitLdSt]> { let Latency = 3; let ReleaseAtCycles = [2]; } +def CortexA320WriteVLD2Latency4Release1: SchedWriteRes<[CortexA320UnitLdSt]> { let Latency = 4; let ReleaseAtCycles = [1]; } +def CortexA320WriteVLD2Latency4Release2: SchedWriteRes<[CortexA320UnitLdSt]> { let Latency = 4; let ReleaseAtCycles = [2]; } +def CortexA320WriteVLD2Latency4Release6: SchedWriteRes<[CortexA320UnitLdSt]> { let Latency = 4; let ReleaseAtCycles = [6]; } + +def : InstRW<[CortexA320WriteVLD2Latency4Release1], (instregex "LD2Twov(8b|4h|2s|1d)$")>; +def : InstRW<[CortexA320WriteVLD2Latency4Release2], (instregex "LD2Twov(16b|8h|4s|2d)$")>; +def : InstRW<[CortexA320WriteVLD2Latency4Release6], (instregex "LD2i(8|16|32|64)$")>; +def : InstRW<[CortexA320WriteVLD2Latency3], (instregex "LD2Rv(8b|16b|4h|8h|2s|4s|1d|2d)$")>; + +def : InstRW<[WriteAdr, CortexA320WriteVLD2Latency4Release1], (instregex "LD2Twov(8b|4h|2s|1d)_POST$")>; +def : InstRW<[WriteAdr, CortexA320WriteVLD2Latency4Release2], (instregex "LD2Twov(16b|8h|4s|2d)_POST$")>; +def : InstRW<[WriteAdr, CortexA320WriteVLD2Latency4Release6], (instregex "LD2i(8|16|32|64)_POST$")>; +def : InstRW<[WriteAdr, CortexA320WriteVLD2Latency3], (instregex "LD2Rv(8b|16b|4h|8h|2s|4s|1d|2d)_POST$")>; + +// 3-element structures +def CortexA320WriteVLD3Latency4: SchedWriteRes<[CortexA320UnitLdSt]> { let Latency = 4; let ReleaseAtCycles = [3]; } +def CortexA320WriteVLD3Latency5Release6: SchedWriteRes<[CortexA320UnitLdSt]> { let Latency = 5; let ReleaseAtCycles = [6]; } +def CortexA320WriteVLD3Latency5Release7: SchedWriteRes<[CortexA320UnitLdSt]> { let Latency = 5; let ReleaseAtCycles = [7]; } + +def : InstRW<[CortexA320WriteVLD3Latency5Release6], (instregex "LD3Threev(8b|16b|4h|8h|2s|4s|1d|2d)$")>; +def : InstRW<[CortexA320WriteVLD3Latency5Release7], (instregex "LD3i(8|16|32|64)$")>; +def : InstRW<[CortexA320WriteVLD3Latency4], (instregex "LD3Rv(8b|16b|4h|8h|2s|4s|1d|2d)$")>; + +def : InstRW<[WriteAdr, CortexA320WriteVLD3Latency5Release6], (instregex "LD3Threev(8b|16b|4h|8h|2s|4s|1d|2d)_POST$")>; +def : InstRW<[WriteAdr, CortexA320WriteVLD3Latency5Release7], (instregex "LD3i(8|16|32|64)_POST$")>; +def : InstRW<[WriteAdr, CortexA320WriteVLD3Latency4], (instregex "LD3Rv(8b|16b|4h|8h|2s|4s|1d|2d)_POST$")>; + +// 4-element structures +def CortexA320WriteVLD4Latency4: SchedWriteRes<[CortexA320UnitLdSt]> { let Latency = 4; let ReleaseAtCycles = [4]; } +def CortexA320WriteVLD4Latency5Release7: SchedWriteRes<[CortexA320UnitLdSt]> { let Latency = 5; let ReleaseAtCycles = [7]; } +def CortexA320WriteVLD4Latency5Release8: SchedWriteRes<[CortexA320UnitLdSt]> { let Latency = 5; let ReleaseAtCycles = [8]; } +def CortexA320WriteVLD4Latency6: SchedWriteRes<[CortexA320UnitLdSt]> { let Latency = 6; let ReleaseAtCycles = [7]; } + +def : InstRW<[CortexA320WriteVLD4Latency5Release7], (instregex "LD4Fourv(8b|4h|2s|1d)$")>; +def : InstRW<[CortexA320WriteVLD4Latency5Release8], (instregex "LD4Fourv(16b|8h|4s|2d)$")>; +def : InstRW<[CortexA320WriteVLD4Latency6], (instregex "LD4i(8|16|32|64)$")>; +def : InstRW<[CortexA320WriteVLD4Latency4], (instregex "LD4Rv(8b|16b|4h|8b|2s|4s|1d|2d)$")>; + +def : InstRW<[WriteAdr, CortexA320WriteVLD4Latency5Release7], (instregex "LD4Fourv(8b|4h|2s|1d)_POST$")>; +def : InstRW<[WriteAdr, CortexA320WriteVLD4Latency5Release8], (instregex "LD4Fourv(16b|8h|4s|2d)_POST$")>; +def : InstRW<[WriteAdr, CortexA320WriteVLD4Latency6], (instregex "LD4i(8|16|32|64)_POST$")>; +def : InstRW<[WriteAdr, CortexA320WriteVLD4Latency4], (instregex "LD4Rv(8b|16b|4h|8b|2s|4s|1d|2d)_POST$")>; +//--- +// Vector Stores +//--- +// 1 Element structures +def : InstRW<[CortexA320WriteVST1], (instregex "ST1i(8|16|32|64)$")>; +def : InstRW<[CortexA320WriteVST1], (instregex "ST1Onev(8b|4h|2s|1d)$")>; +def : InstRW<[CortexA320WriteVST2], (instregex "ST1Onev(16b|8h|4s|2d)$")>; +def : InstRW<[CortexA320WriteVST1], (instregex "ST1Twov(8b|4h|2s|1d)$")>; +def : InstRW<[CortexA320WriteVST2], (instregex "ST1Twov(16b|8h|4s|2d)$")>; +def : InstRW<[CortexA320WriteVST2], (instregex "ST1Threev(8b|4h|2s|1d|16b|8h|4s|2d)$")>; +def : InstRW<[CortexA320WriteVST4], (instregex "ST1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)$")>; +def : InstRW<[WriteAdr, CortexA320WriteVST1], (instregex "ST1i(8|16|32|64)_POST$")>; +def : InstRW<[WriteAdr, CortexA320WriteVST1], (instregex "ST1Onev(8b|4h|2s|1d)_POST$")>; +def : InstRW<[WriteAdr, CortexA320WriteVST2], (instregex "ST1Onev(16b|8h|4s|2d)_POST$")>; +def : InstRW<[WriteAdr, CortexA320WriteVST1], (instregex "ST1Twov(8b|4h|2s|1d)_POST$")>; +def : InstRW<[WriteAdr, CortexA320WriteVST2], (instregex "ST1Twov(16b|8h|4s|2d)_POST$")>; +def : InstRW<[WriteAdr, CortexA320WriteVST2], (instregex "ST1Threev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; +def : InstRW<[WriteAdr, CortexA320WriteVST4], (instregex "ST1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; + +// 2 Element structures +def : InstRW<[CortexA320WriteVST2], (instregex "ST2i(8|16|32|64)$")>; +def : InstRW<[CortexA320WriteVST2], (instregex "ST2Twov(8b|4h|2s)$")>; +def : InstRW<[CortexA320WriteVST4], (instregex "ST2Twov(16b|8h|4s|2d)$")>; + +def : InstRW<[WriteAdr, CortexA320WriteVST2], (instregex "ST2i(8|16|32|64)_POST$")>; +def : InstRW<[WriteAdr, CortexA320WriteVST2], (instregex "ST2Twov(8b|4h|2s)_POST$")>; +def : InstRW<[WriteAdr, CortexA320WriteVST4], (instregex "ST2Twov(16b|8h|4s|2d)_POST$")>; + +// 3 Element structures +def : InstRW<[CortexA320WriteVST2], (instregex "ST3i(8|16|32|64)$")>; +def : InstRW<[CortexA320WriteVST4], (instregex "ST3Threev(8b|4h|2s|1d|16b|8h|4s|2d)$")>; + +def : InstRW<[WriteAdr, CortexA320WriteVST2], (instregex "ST3i(8|16|32|64)_POST$")>; +def : InstRW<[WriteAdr, CortexA320WriteVST4], (instregex "ST3Threev(8b|4h|2s|1d|2d|16b|8h|4s|4d)_POST$")>; + +// 4 Element structures +def : InstRW<[CortexA320WriteVST2], (instregex "ST4i(8|16|32|64)$")>; +def : InstRW<[CortexA320WriteVST4], (instregex "ST4Fourv(8b|4h|2s|1d|16b|8h|4s|2d)$")>; + +def : InstRW<[WriteAdr, CortexA320WriteVST2], (instregex "ST4i(8|16|32|64)_POST$")>; +def : InstRW<[WriteAdr, CortexA320WriteVST4], (instregex "ST4Fourv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; + +//--- +// Floating Point Conversions, MAC, DIV, SQRT +//--- +def : InstRW<[CortexA320WriteFPALU_F3], (instregex "^DUP(v2i64|v4i32|v8i16|v16i8)")>; +def : InstRW<[CortexA320WriteFPALU_F4], (instregex "^XTN")>; +def : InstRW<[CortexA320WriteFPALU_F4], (instregex "^FCVT[ALMNPZ][SU](S|U)?(W|X)")>; +def : InstRW<[CortexA320WriteFPALU_F4], (instregex "^FCVT(X)?[ALMNPXZ](S|U|N)?v")>; + +def : InstRW<[CortexA320WriteFPALU_F4], (instregex "^(S|U)CVTF(S|U)(W|X)(H|S|D)")>; +def : InstRW<[CortexA320WriteFPALU_F4], (instregex "^(S|U)CVTF(h|s|d)")>; +def : InstRW<[CortexA320WriteFPALU_F4], (instregex "^(S|U)CVTFv")>; + +def : InstRW<[CortexA320WriteVMAC], (instregex "^FN?M(ADD|SUB).*")>; +def : InstRW<[CortexA320WriteVMAC], (instregex "^FML(A|S)v.*")>; +def : InstRW<[CortexA320WriteFDivHP], (instrs FDIVHrr)>; +def : InstRW<[CortexA320WriteFDivSP], (instrs FDIVSrr)>; +def : InstRW<[CortexA320WriteFDivDP], (instrs FDIVDrr)>; +def : InstRW<[CortexA320WriteFDivHP], (instregex "^FDIVv.*16$")>; +def : InstRW<[CortexA320WriteFDivSP], (instregex "^FDIVv.*32$")>; +def : InstRW<[CortexA320WriteFDivDP], (instregex "^FDIVv.*64$")>; +def : InstRW<[CortexA320WriteFSqrtHP], (instregex "^.*SQRT.*16$")>; +def : InstRW<[CortexA320WriteFSqrtSP], (instregex "^.*SQRT.*32$")>; +def : InstRW<[CortexA320WriteFSqrtDP], (instregex "^.*SQRT.*64$")>; + +def : InstRW<[CortexA320WriteFPALU_F3], (instrs FCSELHrrr, FCSELSrrr, FCSELDrrr)>; + +// Advanced SIMD integer instructions +// ASIMD absolute diff +def : InstRW<[CortexA320Write<3, CortexA320UnitVALU>], (instregex "[SU]ABDv(2i32|4i16|8i8)")>; +def : InstRW<[CortexA320Write<3, CortexA320UnitVALU>], (instregex "[SU]ABDv(16i8|4i32|8i16)")>; +// ASIMD absolute diff accum +def : InstRW<[CortexA320Write<6, CortexA320UnitVALU>], (instregex "[SU]ABAL?v")>; +// ASIMD absolute diff long +def : InstRW<[CortexA320Write<3, CortexA320UnitVALU>], (instregex "[SU]ABDLv")>; +// ASIMD arith #1 +def : InstRW<[CortexA320Write<3, CortexA320UnitVALU>], (instregex "(ADD|SUB|NEG)v", + "[SU]R?HADDv", "[SU]HSUBv")>; +// ASIMD arith #2 +def : InstRW<[CortexA320Write<3, CortexA320UnitVALU>], (instregex "ABSv(1i64|2i32|4i16|8i8)$", + "[SU]ADDLPv(2i32_v1i64|4i16_v2i32|8i8_v4i16)$", + "ADDPv(2i32|4i16|8i8)$")>; +def : InstRW<[CortexA320Write<4, CortexA320UnitVALU>], (instregex "([SU]QADD|[SU]QSUB|SQNEG|SUQADD|USQADD)v(1i16|1i32|1i64|1i8|2i32|4i16|8i8)$")>; +def : InstRW<[CortexA320Write<3, CortexA320UnitVALU>], (instregex "ABSv(2i64|4i32|8i16|16i8)$", + "[SU]ADDLPv(16i8_v8i16|4i32_v2i64|8i16_v4i32)$", + "ADDPv(16i8|2i64|4i32|8i16)$")>; +def : InstRW<[CortexA320Write<4, CortexA320UnitVALU>], (instregex "([SU]QADD|[SU]QSUB|SQNEG|SUQADD|USQADD)v(16i8|2i64|4i32|8i16)$")>; +// ASIMD arith #3 +def : InstRW<[CortexA320Write<3, CortexA320UnitVALU>], (instregex "SADDLv", "UADDLv", "SADDWv", + "UADDWv", "SSUBLv", "USUBLv", "SSUBWv", "USUBWv")>; +def : InstRW<[CortexA320Write<4, CortexA320UnitVALU>], (instregex "ADDHNv", "SUBHNv")>; +// ASIMD arith #5 +def : InstRW<[CortexA320Write<8, CortexA320UnitVALU>], (instregex "RADDHNv", "RSUBHNv")>; +// ASIMD arith, reduce +def : InstRW<[CortexA320Write<3, CortexA320UnitVALU>], (instregex "ADDVv")>; +def : InstRW<[CortexA320Write<4, CortexA320UnitVALU>], (instregex "SADDLVv", "UADDLVv")>; +// ASIMD compare #1 +def : InstRW<[CortexA320Write<3, CortexA320UnitVALU>], (instregex "CM(EQ|GE|GT|HI|HS|LE|LT)v(1i64|2i32|4i16|8i8)")>; +def : InstRW<[CortexA320Write<3, CortexA320UnitVALU>], (instregex "CM(EQ|GE|GT|HI|HS|LE|LT)v(2i64|4i32|8i16|16i8)")>; +// ASIMD compare #2 +def : InstRW<[CortexA320Write<4, CortexA320UnitVALU>], (instregex "CMTSTv(1i64|2i32|4i16|8i8)")>; +def : InstRW<[CortexA320Write<4, CortexA320UnitVALU>], (instregex "CMTSTv(2i64|4i32|8i16|16i8)")>; +// ASIMD logical $1 +def : InstRW<[CortexA320Write<3, CortexA320UnitVALU>], (instregex "(AND|EOR|NOT|ORN)v8i8", + "(ORR|BIC)v(2i32|4i16|8i8)$", "MVNIv(2i|2s|4i16)")>; +def : InstRW<[CortexA320Write<3, CortexA320UnitVALU>], (instregex "(AND|EOR|NOT|ORN)v16i8", + "(ORR|BIC)v(16i8|4i32|8i16)$", "MVNIv(4i32|4s|8i16)")>; +// ASIMD max/min, basic +def : InstRW<[CortexA320Write<3, CortexA320UnitVALU>], (instregex "[SU](MIN|MAX)P?v(2i32|4i16|8i8)")>; +def : InstRW<[CortexA320Write<3, CortexA320UnitVALU>], (instregex "[SU](MIN|MAX)P?v(16i8|4i132|8i16)")>; +// SIMD max/min, reduce +def : InstRW<[CortexA320Write<4, CortexA320UnitVALU>], (instregex "[SU](MAX|MIN)Vv")>; +// ASIMD multiply, by element +def : InstRW<[CortexA320Write<4, CortexA320UnitVALU>], (instregex "MULv(2i32|4i16|4i32|8i16)_indexed$", + "SQR?DMULHv(1i16|1i32|2i32|4i16|4i32|8i16)_indexed$")>; +// ASIMD multiply +def : InstRW<[CortexA320Write<4, CortexA320UnitVALU>], (instrs PMULv8i8)>; +def : InstRW<[CortexA320Write<4, CortexA320UnitVALU>], (instrs PMULv16i8)>; +// ASIMD multiply accumulate +def : InstRW<[CortexA320Write<4, CortexA320UnitVALU>], (instregex "ML[AS]v(2i32|4i16|8i8)$")>; +def : InstRW<[CortexA320Write<4, CortexA320UnitVALU>], (instregex "ML[AS]v(16i8|4i32|8i16)$")>; +def : InstRW<[CortexA320Write<4, CortexA320UnitVALU>], (instregex "ML[AS]v(2i32|4i16|4i32|8i16)_indexed$")>; +// ASIMD multiply accumulate half +def : InstRW<[CortexA320Write<4, CortexA320UnitVALU>], (instregex "SQRDML[AS]H[vi]")>; +// ASIMD multiply accumulate long +def : InstRW<[CortexA320Write<4, CortexA320UnitVALU>], (instregex "[SU]ML[AS]Lv")>; +// ASIMD multiply accumulate long #2 +def : InstRW<[CortexA320Write<4, CortexA320UnitVALU>], (instregex "SQDML[AS]L[iv]")>; +// ASIMD dot product +def : InstRW<[CortexA320Write<4, CortexA320UnitVALU>], (instregex "[SU]DOTv8i8")>; +def : InstRW<[CortexA320Write<4, CortexA320UnitVALU>], (instregex "[SU]DOTv16i8")>; +// ASIMD dot product, by scalar +def : InstRW<[CortexA320Write<4, CortexA320UnitVALU>], (instregex "[SU]DOTlanev")>; +// ASIMD multiply long +def : InstRW<[CortexA320Write<4, CortexA320UnitVALU>], (instregex "[SU]MULLv", "SQDMULL[iv]")>; +// ASIMD polynomial (8x8) multiply long +def : InstRW<[CortexA320Write<4, CortexA320UnitVALU>], (instrs PMULLv8i8, PMULLv16i8)>; +// ASIMD pairwise add and accumulate +def : InstRW<[CortexA320MCWrite<7, 2, CortexA320UnitVALU>], (instregex "[SU]ADALPv")>; +// ASIMD shift accumulate +def : InstRW<[CortexA320Write<3, CortexA320UnitVALU>], (instregex "[SU]SRA(d|v2i32|v4i16|v8i8)")>; +def : InstRW<[CortexA320Write<3, CortexA320UnitVALU>], (instregex "[SU]SRAv(16i8|2i64|4i32|8i16)")>; +// ASIMD shift accumulate #2 +def : InstRW<[CortexA320MCWrite<7, 2, CortexA320UnitVALU>], (instregex "[SU]RSRA[vd]")>; +// ASIMD shift by immed +def : InstRW<[CortexA320Write<3, CortexA320UnitVALU>], (instregex "SHLd$", "SHLv", + "SLId$", "SRId$", "[SU]SHR[vd]", "SHRNv")>; +// ASIMD shift by immed +// SXTL and UXTL are aliases for SHLL +def : InstRW<[CortexA320Write<3, CortexA320UnitVALU>], (instregex "[US]?SHLLv")>; +// ASIMD shift by immed #2 +def : InstRW<[CortexA320Write<3, CortexA320UnitVALU>], (instregex "[SU]RSHR(d|v2i32|v4i16|v8i8)", + "[SU]RSHRv(16i8|2i64|4i32|8i16)")>; +def : InstRW<[CortexA320Write<4, CortexA320UnitVALU>], (instregex "RSHRNv(2i32|4i16|8i8)", + "RSHRNv(16i8|4i32|8i16)")>; +// ASIMD shift by register +def : InstRW<[CortexA320Write<3, CortexA320UnitVALU>], (instregex "[SU]SHLv(1i64|2i32|4i16|8i8)")>; +def : InstRW<[CortexA320Write<3, CortexA320UnitVALU>], (instregex "[SU]SHLv(2i64|4i32|8i16|16i8)")>; +// ASIMD shift by register #2 +def : InstRW<[CortexA320Write<3, CortexA320UnitVALU>], (instregex "[SU]RSHLv(1i64|2i32|4i16|8i8)")>; +def : InstRW<[CortexA320Write<3, CortexA320UnitVALU>], (instregex "[SU]RSHLv(2i64|4i32|8i16|16i8)")>; + +def : InstRW<[CortexA320Write<4, CortexA320UnitVALU>], (instregex "[SU]QSHLv(1i64|2i32|4i16|8i8)")>; +def : InstRW<[CortexA320Write<4, CortexA320UnitVALU>], (instregex "[SU]QSHLv(2i64|4i32|8i16|16i8)")>; + +def : InstRW<[CortexA320Write<4, CortexA320UnitVALU>], (instregex "[SU]QRSHLv(1i64|2i32|4i16|8i8)")>; +def : InstRW<[CortexA320Write<4, CortexA320UnitVALU>], (instregex "[SU]QRSHLv(2i64|4i32|8i16|16i8)")>; + +// Cryptography extensions +// ----------------------------------------------------------------------------- + +// Crypto AES ops +def : InstRW<[CortexA320Write<3, CortexA320UnitVALU>], (instregex "^AES[DE]rr$", "^AESI?MCrr")>; + +// Crypto polynomial (64x64) multiply long +def : InstRW<[CortexA320MCWrite<4, 0, CortexA320UnitVMC>], (instrs PMULLv1i64, PMULLv2i64)>; + +// Crypto SHA1 hash acceleration op +// Crypto SHA1 schedule acceleration ops +def : InstRW<[CortexA320Write<3, CortexA320UnitVALU>], (instregex "^SHA1(H|SU0|SU1)")>; + +// Crypto SHA1 hash acceleration ops +// Crypto SHA256 hash acceleration ops +def : InstRW<[CortexA320MCWrite<4, 0, CortexA320UnitVMC>], (instregex "^SHA1[CMP]", "^SHA256H2?")>; + +// Crypto SHA256 schedule acceleration ops +def : InstRW<[CortexA320MCWrite<4, 0, CortexA320UnitVMC>], (instregex "^SHA256SU[01]")>; + +// Crypto SHA512 hash acceleration ops +def : InstRW<[CortexA320MCWrite<9, 0, CortexA320UnitVMC>], (instregex "^SHA512(H|H2|SU0|SU1)")>; + +// Crypto SHA3 ops +def : InstRW<[CortexA320Write<3, CortexA320UnitVALU>], (instrs BCAX, EOR3)>; +def : InstRW<[CortexA320Write<4, CortexA320UnitVALU>], (instrs XAR)>; +def : InstRW<[CortexA320MCWrite<9, 0, CortexA320UnitVMC>], (instrs RAX1)>; + + +// Crypto SM3 ops +def : InstRW<[CortexA320MCWrite<9, 0, CortexA320UnitVMC>], (instregex "^SM3PARTW[12]$", "^SM3SS1$", + "^SM3TT[12][AB]$")>; + +// Crypto SM4 ops +def : InstRW<[CortexA320MCWrite<9, 0, CortexA320UnitVMC>], (instrs SM4E, SM4ENCKEY)>; + +// CRC +// ----------------------------------------------------------------------------- + +def : InstRW<[CortexA320MCWrite<2, 0, CortexA320UnitMAC>], (instregex "^CRC32")>; + +// SVE Predicate instructions + +// Loop control, based on predicate +def : InstRW<[CortexA320Write<2, CortexA320UnitVALU>], (instrs BRKA_PPmP, BRKA_PPzP, + BRKB_PPmP, BRKB_PPzP)>; + +// Loop control, based on predicate and flag setting +def : InstRW<[CortexA320Write<2, CortexA320UnitVALU>], (instrs BRKAS_PPzP, BRKBS_PPzP)>; + +// Loop control, propagating +def : InstRW<[CortexA320Write<2, CortexA320UnitVALU>], (instrs BRKN_PPzP, BRKPA_PPzPP, BRKPB_PPzPP)>; + +// Loop control, propagating and flag setting +def : InstRW<[CortexA320Write<2, CortexA320UnitVALU>], (instrs BRKNS_PPzP)>; +def : InstRW<[CortexA320Write<2, CortexA320UnitVALU>], (instrs BRKPAS_PPzPP, BRKPBS_PPzPP)>; + + +// Loop control, based on GPR +def : InstRW<[CortexA320Write<2, CortexA320UnitVALU>], + (instregex "^WHILE(GE|GT|HI|HS|LE|LO|LS|LT)_P(WW|XX)_[BHSD]")>; + +def : InstRW<[CortexA320Write<2, CortexA320UnitVALU>], (instregex "^WHILE(RW|WR)_PXX_[BHSD]")>; + +// Loop terminate +def : InstRW<[CortexA320Write<1, CortexA320UnitALU>], (instregex "^CTERM(EQ|NE)_(WW|XX)")>; + +// Predicate counting scalar +def : InstRW<[CortexA320Write<1, CortexA320UnitALU>], (instrs ADDPL_XXI, ADDVL_XXI, RDVLI_XI)>; + +def : InstRW<[CortexA320Write<3, CortexA320UnitALU>], + (instregex "^CNT[BHWD]_XPiI")>; + +def : InstRW<[CortexA320Write<3, CortexA320UnitALU>], + (instregex "^(INC|DEC)[BHWD]_XPiI")>; + +def : InstRW<[CortexA320Write<5, CortexA320UnitALU>], + (instregex "^(SQINC|SQDEC|UQINC|UQDEC)[BHWD]_[XW]Pi(Wd)?I")>; + +// Predicate counting scalar, active predicate +def : InstRW<[CortexA320Write<1, CortexA320UnitVALU>], + (instregex "^CNTP_XPP_[BHSD]")>; + +def : InstRW<[CortexA320Write<1, CortexA320UnitVALU>], + (instregex "^(DEC|INC)P_XP_[BHSD]")>; + +def : InstRW<[CortexA320Write<9, CortexA320UnitVALU>], + (instregex "^(SQDEC|SQINC|UQDEC|UQINC)P_XP_[BHSD]", + "^(UQDEC|UQINC)P_WP_[BHSD]", + "^(SQDEC|SQINC|UQDEC|UQINC)P_XPWd_[BHSD]")>; + + +// Predicate counting vector, active predicate +def : InstRW<[CortexA320Write<3, CortexA320UnitVALU>], + (instregex "^(DEC|INC|SQDEC|SQINC|UQDEC|UQINC)P_ZP_[HSD]")>; + +// Predicate logical +def : InstRW<[CortexA320Write<2, CortexA320UnitVALU>], + (instregex "^(AND|BIC|EOR|NAND|NOR|ORN|ORR)_PPzPP")>; + +// Predicate logical, flag setting +def : InstRW<[CortexA320Write<2, CortexA320UnitVALU>], + (instregex "^(ANDS|BICS|EORS|NANDS|NORS|ORNS|ORRS)_PPzPP")>; + +// Predicate reverse +def : InstRW<[CortexA320Write<2, CortexA320UnitVALU>], (instregex "^REV_PP_[BHSD]")>; + +// Predicate select +def : InstRW<[CortexA320Write<2, CortexA320UnitVALU>], (instrs SEL_PPPP)>; + +// Predicate set +def : InstRW<[CortexA320Write<2, CortexA320UnitVALU>], (instregex "^PFALSE", "^PTRUE_[BHSD]")>; + +// Predicate set/initialize, set flags +def : InstRW<[CortexA320Write<2, CortexA320UnitVALU>], (instregex "^PTRUES_[BHSD]")>; + +// Predicate find first/next +def : InstRW<[CortexA320Write<2, CortexA320UnitVALU>], (instregex "^PFIRST_B", "^PNEXT_[BHSD]")>; + +// Predicate test +def : InstRW<[CortexA320Write<2, CortexA320UnitVALU>], (instrs PTEST_PP)>; + +// Predicate transpose +def : InstRW<[CortexA320Write<2, CortexA320UnitVALU>], (instregex "^TRN[12]_PPP_[BHSDQ]")>; + +// Predicate unpack and widen +def : InstRW<[CortexA320Write<2, CortexA320UnitVALU>], (instrs PUNPKHI_PP, PUNPKLO_PP)>; + +// Predicate zip/unzip +def : InstRW<[CortexA320Write<2, CortexA320UnitVALU>], (instregex "^(ZIP|UZP)[12]_PPP_[BHSDQ]")>; + + +// SVE integer instructions +// ----------------------------------------------------------------------------- +// Arithmetic, absolute diff +def : InstRW<[CortexA320Write<3, CortexA320UnitVALU>], (instregex "^[SU]ABD_(ZPmZ|ZPZZ)_[BHSD]")>; + +// Arithmetic, absolute diff accum +def : InstRW<[CortexA320MCWrite<6, 2, CortexA320UnitVALU>], (instregex "^[SU]ABA_ZZZ_[BHSD]")>; + +// Arithmetic, absolute diff accum long +def : InstRW<[CortexA320MCWrite<6, 2, CortexA320UnitVALU>], (instregex "^[SU]ABAL[TB]_ZZZ_[HSD]")>; + +// Arithmetic, absolute diff long +def : InstRW<[CortexA320Write<3, CortexA320UnitVALU>], (instregex "^[SU]ABDL[TB]_ZZZ_[HSD]")>; + +// Arithmetic, basic +def : InstRW<[CortexA320Write<3, CortexA320UnitVALU>], + (instregex "^(ABS|CNOT|NEG)_ZPmZ_[BHSD]", + "^(ADD|SUB|SUBR)_ZPmZ_[BHSD]", + "^(ADD|SUB|SUBR)_ZPZZ_[BHSD]", + "^(ADD|SUB)_ZZZ_[BHSD]", + "^(ADD|SUB|SUBR)_ZI_[BHSD]", + "^ADR_[SU]XTW_ZZZ_D_[0123]", + "^ADR_LSL_ZZZ_[SD]_[0123]", + "^[SU]H(ADD|SUB|SUBR)_ZPmZ_[BHSD]")>; +def : InstRW<[CortexA320Write<4, CortexA320UnitVALU>], + (instregex "^[SU](ADD|SUB)[LW][BT]_ZZZ_[HSD]", + "^SADDLBT_ZZZ_[HSD]", + "^SSUBL(BT|TB)_ZZZ_[HSD]")>; + +// Arithmetic, complex +def : InstRW<[CortexA320Write<4, CortexA320UnitVALU>], + (instregex "^SQ(ABS|NEG)_ZPmZ_[BHSD]", + "^SQ(ADD|SUB|SUBR)_ZPmZ_?[BHSD]", + "^[SU]Q(ADD|SUB)_ZZZ_[BHSD]", + "^[SU]Q(ADD|SUB)_ZI_[BHSD]", + "^(SRH|SUQ|UQ|USQ|URH)ADD_ZPmZ_[BHSD]", + "^(UQSUB|UQSUBR)_ZPmZ_[BHSD]")>; +def : InstRW<[CortexA320Write<8, CortexA320UnitVALU>], + (instregex "^R?(ADD|SUB)HN[BT]_ZZZ_[BHS]")>; + +// Arithmetic, large integer +def : InstRW<[CortexA320Write<4, CortexA320UnitVALU>], (instregex "^(AD|SB)CL[BT]_ZZZ_[SD]")>; + +// Arithmetic, pairwise add +def : InstRW<[CortexA320Write<3, CortexA320UnitVALU>], (instregex "^ADDP_ZPmZ_[BHSD]")>; + +// Arithmetic, pairwise add and accum long +def : InstRW<[CortexA320MCWrite<7, 2, CortexA320UnitVALU>], (instregex "^[SU]ADALP_ZPmZ_[HSD]")>; + +// Arithmetic, shift +def : InstRW<[CortexA320Write<3, CortexA320UnitVALU>], + (instregex "^(ASR|LSL|LSR)_WIDE_ZPmZ_[BHS]", + "^(ASR|LSL|LSR)_WIDE_ZZZ_[BHS]", + "^(ASR|LSL|LSR)_ZPmI_[BHSD]", + "^(ASR|LSL|LSR)_ZPZI_[BHSD]", + "^(ASR|LSL|LSR)_ZPmZ_[BHSD]", + "^(ASR|LSL|LSR)_ZPZZ_[BHSD]", + "^(ASR|LSL|LSR)_ZZI_[BHSD]", + "^(ASRR|LSLR|LSRR)_ZPmZ_[BHSD]")>; +// Arithmetic, shift right for divide +def : InstRW<[CortexA320Write<4, CortexA320UnitVALU>], + (instregex "^ASRD_ZPmI_[BHSD]", + "^ASRD_ZPZI_[BHSD]")>; + +// Arithmetic, shift and accumulate +def : InstRW<[CortexA320Write<4, CortexA320UnitVALU>], + (instregex "^(SSRA|USRA)_ZZI_[BHSD]")>; + +def : InstRW<[CortexA320MCWrite<7, 2, CortexA320UnitVALU>], + (instregex "^(SRSRA|URSRA)_ZZI_[BHSD]")>; + + +// Arithmetic, shift by immediate +// Arithmetic, shift by immediate and insert +def : InstRW<[CortexA320Write<3, CortexA320UnitVALU>], + (instregex "^(SHRNB|SHRNT|SSHLLB|SSHLLT|USHLLB|USHLLT|SLI|SRI)_ZZI_[BHSD]")>; + +// Arithmetic, shift complex +def : InstRW<[CortexA320Write<4, CortexA320UnitVALU>], + (instregex "^(SQ)?RSHRU?N[BT]_ZZI_[BHS]", + "^(SQRSHL|SQRSHLR|SQSHL|SQSHLR|UQRSHL|UQRSHLR|UQSHL|UQSHLR)_(ZPmZ|ZPZZ)_[BHSD]", + "^(SQSHL|SQSHLU|UQSHL)_(ZPmI|ZPZI)_[BHSD]", + "^SQSHRU?N[BT]_ZZI_[BHS]", + "^UQR?SHRN[BT]_ZZI_[BHS]")>; + +// Arithmetic, shift rounding +def : InstRW<[CortexA320Write<4, CortexA320UnitVALU>], + (instregex "^(SRSHL|SRSHR|SRSHLR|URSHL|URSHLR|URSHR)_(ZPmZ|ZPZZ|ZPZI)_[BHSD]", + "^[SU]RSHR_ZPmI_[BHSD]")>; + +// Bit manipulation +def : InstRW<[CortexA320MCWrite<13, 12, CortexA320UnitVMC>], + (instregex "^(BDEP|BEXT|BGRP)_ZZZ_B")>; + +def : InstRW<[CortexA320MCWrite<21, 20, CortexA320UnitVMC>], + (instregex "^(BDEP|BEXT|BGRP)_ZZZ_H")>; + +def : InstRW<[CortexA320MCWrite<37, 36, CortexA320UnitVMC>], + (instregex "^(BDEP|BEXT|BGRP)_ZZZ_S")>; + +def : InstRW<[CortexA320MCWrite<68, 67, CortexA320UnitVMC>], + (instregex "^(BDEP|BEXT|BGRP)_ZZZ_D")>; + + +// Bitwise select +def : InstRW<[CortexA320Write<3, CortexA320UnitVALU>], (instregex "^(BSL|BSL1N|BSL2N|NBSL)_ZZZZ")>; + +// Count/reverse bits +def : InstRW<[CortexA320Write<3, CortexA320UnitVALU>], (instregex "^(CLS|CLZ|RBIT)_ZPmZ_[BHSD]")>; +def : InstRW<[CortexA320Write<3, CortexA320UnitVALU>], (instregex "^CNT_ZPmZ_[BH]")>; +def : InstRW<[CortexA320Write<8, CortexA320UnitVALU>], (instregex "^CNT_ZPmZ_S")>; +def : InstRW<[CortexA320Write<12, CortexA320UnitVALU>], (instregex "^CNT_ZPmZ_D")>; +// Broadcast logical bitmask immediate to vector +def : InstRW<[CortexA320Write<4, CortexA320UnitVALU>], (instrs DUPM_ZI)>; + +// Compare and set flags +def : InstRW<[CortexA320Write<5, CortexA320UnitVALU>], + (instregex "^CMP(EQ|GE|GT|HI|HS|LE|LO|LS|LT|NE)_PPzZ[IZ]_[BHSD]", + "^CMP(EQ|GE|GT|HI|HS|LE|LO|LS|LT|NE)_WIDE_PPzZZ_[BHS]")>; + +// Complex add +def : InstRW<[CortexA320Write<3, CortexA320UnitVALU>], (instregex "^CADD_ZZI_[BHSD]")>; + +def : InstRW<[CortexA320Write<4, CortexA320UnitVALU>], (instregex "^SQCADD_ZZI_[BHSD]")>; + +// Complex dot product 8-bit element +def : InstRW<[CortexA320Write<4, CortexA320UnitVMAC>], (instrs CDOT_ZZZ_S, CDOT_ZZZI_S)>; + +// Complex dot product 16-bit element +def : InstRW<[CortexA320Write<4, CortexA320UnitVMAC>], (instrs CDOT_ZZZ_D, CDOT_ZZZI_D)>; + +// Complex multiply-add B, H, S element size +def : InstRW<[CortexA320Write<4, CortexA320UnitVMAC>], (instregex "^CMLA_ZZZ_[BHS]", + "^CMLA_ZZZI_[HS]")>; + +// Complex multiply-add D element size +def : InstRW<[CortexA320Write<4, CortexA320UnitVMAC>], (instrs CMLA_ZZZ_D)>; + +// Conditional extract operations, scalar form +def : InstRW<[CortexA320MCWrite<8, 2, CortexA320UnitVALU>], (instregex "^CLAST[AB]_RPZ_[BHSD]")>; + +// Conditional extract operations, SIMD&FP scalar and vector forms +def : InstRW<[CortexA320Write<4, CortexA320UnitVALU>], (instregex "^CLAST[AB]_[VZ]PZ_[BHSD]", + "^COMPACT_ZPZ_[SD]", + "^SPLICE_ZPZZ?_[BHSD]")>; + +// Convert to floating point, 64b to float or convert to double +def : InstRW<[CortexA320Write<4, CortexA320UnitVALU>], (instregex "^[SU]CVTF_ZPmZ_Dto[SD]")>; + +// Convert to floating point, 64b to half +def : InstRW<[CortexA320Write<4, CortexA320UnitVALU>], (instregex "^[SU]CVTF_ZPmZ_DtoH")>; + +// Convert to floating point, 32b to single or half +def : InstRW<[CortexA320Write<4, CortexA320UnitVALU>], (instregex "^[SU]CVTF_ZPmZ_Sto[HS]")>; + +// Convert to floating point, 32b to double +def : InstRW<[CortexA320Write<4, CortexA320UnitVALU>], (instregex "^[SU]CVTF_ZPmZ_StoD")>; + +// Convert to floating point, 16b to half +def : InstRW<[CortexA320Write<4, CortexA320UnitVALU>], (instregex "^[SU]CVTF_ZPmZ_HtoH")>; + +// Copy, scalar +def : InstRW<[CortexA320Write<3, CortexA320UnitVALU>],(instregex "^CPY_ZPmR_[BHSD]")>; + +// Copy, scalar SIMD&FP or imm +def : InstRW<[CortexA320Write<3, CortexA320UnitVALU>], (instregex "^CPY_ZPm[IV]_[BHSD]", + "^CPY_ZPzI_[BHSD]")>; + +// Divides, 32 bit +def : InstRW<[CortexA320MCWrite<15, 12, CortexA320UnitVMC>], (instregex "^[SU]DIVR?_(ZPmZ|ZPZZ)_S")>; + +// Divides, 64 bit +def : InstRW<[CortexA320MCWrite<26, 23, CortexA320UnitVMC>], (instregex "^[SU]DIVR?_(ZPmZ|ZPZZ)_D")>; + +// Dot product, 8 bit +def : InstRW<[CortexA320Write<4, CortexA320UnitVMAC>], (instregex "^[SU]DOT_ZZZI?_S")>; + +// Dot product, 8 bit, using signed and unsigned integers +def : InstRW<[CortexA320Write<4, CortexA320UnitVMAC>], (instrs SUDOT_ZZZI, USDOT_ZZZI, USDOT_ZZZ)>; + +// Dot product, 16 bit +def : InstRW<[CortexA320Write<4, CortexA320UnitVMAC>], (instregex "^[SU]DOT_ZZZI?_D")>; + +// Duplicate, immediate and indexed form +def : InstRW<[CortexA320Write<3, CortexA320UnitVALU>], (instregex "^DUP_ZI_[BHSD]", + "^DUP_ZZI_[BHSDQ]")>; + +// Duplicate, scalar form +def : InstRW<[CortexA320Write<3, CortexA320UnitVALU>], (instregex "^DUP_ZR_[BHSD]")>; + +// Extend, sign or zero +def : InstRW<[CortexA320Write<3, CortexA320UnitVALU>], (instregex "^[SU]XTB_ZPmZ_[HSD]", + "^[SU]XTH_ZPmZ_[SD]", + "^[SU]XTW_ZPmZ_[D]")>; + +// Extract +def : InstRW<[CortexA320Write<3, CortexA320UnitVALU>], (instrs EXT_ZZI, EXT_ZZI_B)>; + +// Extract narrow saturating +def : InstRW<[CortexA320Write<4, CortexA320UnitVALU>], (instregex "^[SU]QXTN[BT]_ZZ_[BHS]", + "^SQXTUN[BT]_ZZ_[BHS]")>; + +// Extract/insert operation, SIMD and FP scalar form +def : InstRW<[CortexA320Write<4, CortexA320UnitVALU>], (instregex "^LAST[AB]_VPZ_[BHSD]", + "^INSR_ZV_[BHSD]")>; + +// Extract/insert operation, scalar +def : InstRW<[CortexA320MCWrite<8, 2, CortexA320UnitVALU>], (instregex "^LAST[AB]_RPZ_[BHSD]", + "^INSR_ZR_[BHSD]")>; + +// Histogram operations +def : InstRW<[CortexA320MCWrite<8, 2, CortexA320UnitVALU>], (instregex "^HISTCNT_ZPzZZ_[SD]", + "^HISTSEG_ZZZ")>; + +// Horizontal operations, B, H, S form, immediate operands only +def : InstRW<[CortexA320Write<4, CortexA320UnitVMAC>], (instregex "^INDEX_II_[BHS]")>; + +// Horizontal operations, B, H, S form, scalar, immediate operands/ scalar +// operands only / immediate, scalar operands +def : InstRW<[CortexA320Write<4, CortexA320UnitVMAC>], (instregex "^INDEX_(IR|RI|RR)_[BHS]")>; + +// Horizontal operations, D form, immediate operands only +def : InstRW<[CortexA320Write<4, CortexA320UnitVMAC>], (instrs INDEX_II_D)>; + +// Horizontal operations, D form, scalar, immediate operands)/ scalar operands +// only / immediate, scalar operands +def : InstRW<[CortexA320Write<4, CortexA320UnitVMAC>], (instregex "^INDEX_(IR|RI|RR)_D")>; + +// Logical +def : InstRW<[CortexA320Write<3, CortexA320UnitVALU>], + (instregex "^(AND|EOR|ORR)_ZI", + "^(AND|BIC|EOR|EOR|ORR)_ZZZ", + "^(AND|BIC|EOR|NOT|ORR)_ZPmZ_[BHSD]", + "^(AND|BIC|EOR|NOT|ORR)_ZPZZ_[BHSD]")>; + +def : InstRW<[CortexA320Write<4, CortexA320UnitVALU>], + (instregex "^EOR(BT|TB)_ZZZ_[BHSD]")>; + +// Max/min, basic and pairwise +def : InstRW<[CortexA320Write<3, CortexA320UnitVALU>], (instregex "^[SU](MAX|MIN)_ZI_[BHSD]", + "^[SU](MAX|MIN)P?_(ZPmZ|ZPZZ)_[BHSD]")>; + +// Matching operations +def : InstRW<[CortexA320MCWrite<9, 2, CortexA320UnitVALU>], (instregex "^N?MATCH_PPzZZ_[BH]")>; + +// Matrix multiply-accumulate +def : InstRW<[CortexA320Write<4, CortexA320UnitVMAC>], (instrs SMMLA_ZZZ, UMMLA_ZZZ, USMMLA_ZZZ)>; + +// Move prefix +def : InstRW<[CortexA320Write<3, CortexA320UnitVALU>], (instregex "^MOVPRFX_ZP[mz]Z_[BHSD]", + "^MOVPRFX_ZZ")>; + +// Multiply, B, H, S element size +def : InstRW<[CortexA320Write<4, CortexA320UnitVMAC>], (instregex "^MUL_(ZI|ZPmZ|ZZZI|ZZZ|ZPZZ)_[BHS]", + "^[SU]MULH_(ZPmZ|ZZZ|ZPZZ)_[BHS]")>; + +// Multiply, D element size +def : InstRW<[CortexA320Write<4, CortexA320UnitVMAC>], (instregex "^MUL_(ZI|ZPmZ|ZZZI|ZZZ|ZPZZ)_D", + "^[SU]MULH_(ZPmZ|ZZZ|ZPZZ)_D")>; + +// Multiply long +def : InstRW<[CortexA320Write<4, CortexA320UnitVMAC>], (instregex "^[SU]MULL[BT]_ZZZI_[SD]", + "^[SU]MULL[BT]_ZZZ_[HSD]")>; + +// Multiply accumulate, B, H, S element size +def : InstRW<[CortexA320Write<4, CortexA320UnitVMAC>], (instregex "^ML[AS]_(ZZZI|ZPZZZ)_[BHS]", + "^(ML[AS]|MAD|MSB)_ZPmZZ_[BHS]")>; + +// Multiply accumulate, D element size +def : InstRW<[CortexA320Write<4, CortexA320UnitVMAC>], (instregex "^ML[AS]_(ZZZI|ZPZZZ)_D", + "^(ML[AS]|MAD|MSB)_ZPmZZ_D")>; + +// Multiply accumulate long +def : InstRW<[CortexA320Write<4, CortexA320UnitVMAC>], (instregex "^[SU]ML[AS]L[BT]_ZZZ_[HSD]", + "^[SU]ML[AS]L[BT]_ZZZI_[SD]")>; + +// Multiply accumulate saturating doubling long regular +def : InstRW<[CortexA320Write<4, CortexA320UnitVMAC>], (instregex "^SQDML[AS](LB|LT|LBT)_ZZZ_[HSD]", + "^SQDML[AS](LB|LT)_ZZZI_[SD]")>; + +// Multiply saturating doubling high, B, H, S element size +def : InstRW<[CortexA320Write<4, CortexA320UnitVMAC>], (instregex "^SQDMULH_ZZZ_[BHS]", + "^SQDMULH_ZZZI_[HS]")>; + +// Multiply saturating doubling high, D element size +def : InstRW<[CortexA320Write<4, CortexA320UnitVMAC>], (instrs SQDMULH_ZZZ_D, SQDMULH_ZZZI_D)>; + +// Multiply saturating doubling long +def : InstRW<[CortexA320Write<4, CortexA320UnitVMAC>], (instregex "^SQDMULL[BT]_ZZZ_[HSD]", + "^SQDMULL[BT]_ZZZI_[SD]")>; + +// Multiply saturating rounding doubling regular/complex accumulate, B, H, S +// element size +def : InstRW<[CortexA320Write<4, CortexA320UnitVMAC>], (instregex "^SQRDML[AS]H_ZZZ_[BHS]", + "^SQRDCMLAH_ZZZ_[BHS]", + "^SQRDML[AS]H_ZZZI_[HS]", + "^SQRDCMLAH_ZZZI_[HS]")>; + +// Multiply saturating rounding doubling regular/complex accumulate, D element +// size +def : InstRW<[CortexA320Write<4, CortexA320UnitVMAC>], (instregex "^SQRDML[AS]H_ZZZI?_D", + "^SQRDCMLAH_ZZZ_D")>; + +// Multiply saturating rounding doubling regular/complex, B, H, S element size +def : InstRW<[CortexA320Write<4, CortexA320UnitVMAC>], (instregex "^SQRDMULH_ZZZ_[BHS]", + "^SQRDMULH_ZZZI_[HS]")>; + +// Multiply saturating rounding doubling regular/complex, D element size +def : InstRW<[CortexA320Write<4, CortexA320UnitVMAC>], (instregex "^SQRDMULH_ZZZI?_D")>; + +// Multiply/multiply long, (8x8) polynomial +def : InstRW<[CortexA320Write<4, CortexA320UnitVALU>], (instregex "^PMUL_ZZZ_B")>; + +def : InstRW<[CortexA320Write<9, CortexA320UnitVMC>], (instregex "^PMULL[BT]_ZZZ_[HDQ]")>; + + +// Predicate counting vector +def : InstRW<[CortexA320Write<3, CortexA320UnitVALU>], + (instregex "^(DEC|INC)[HWD]_ZPiI")>; +def : InstRW<[CortexA320Write<4, CortexA320UnitVALU>], + (instregex "^(SQDEC|SQINC|UQDEC|UQINC)[HWD]_ZPiI")>; + +// Reciprocal estimate +def : InstRW<[CortexA320Write<4, CortexA320UnitVMAC>], (instregex "^URECPE_ZPmZ_S", "^URSQRTE_ZPmZ_S")>; + +// Reduction, arithmetic, B form +def : InstRW<[CortexA320Write<4, CortexA320UnitVALU>], (instregex "^[SU](ADD|MAX|MIN)V_VPZ_B")>; + +// Reduction, arithmetic, H form +def : InstRW<[CortexA320Write<4, CortexA320UnitVALU>], (instregex "^[SU](ADD|MAX|MIN)V_VPZ_H")>; + +// Reduction, arithmetic, S form +def : InstRW<[CortexA320Write<4, CortexA320UnitVALU>], (instregex "^[SU](ADD|MAX|MIN)V_VPZ_S")>; + +// Reduction, arithmetic, D form +def : InstRW<[CortexA320Write<4, CortexA320UnitVALU>], (instregex "^[SU](ADD|MAX|MIN)V_VPZ_D")>; + +// Reduction, logical +def : InstRW<[CortexA320Write<4, CortexA320UnitVALU>], (instregex "^(ANDV|EORV|ORV)_VPZ_[BHSD]")>; + +// Reverse, vector +def : InstRW<[CortexA320Write<3, CortexA320UnitVALU>], (instregex "^REV_ZZ_[BHSD]", + "^REVB_ZPmZ_[HSD]", + "^REVH_ZPmZ_[SD]", + "^REVW_ZPmZ_D")>; + +// Select, vector form +def : InstRW<[CortexA320Write<3, CortexA320UnitVALU>], (instregex "^SEL_ZPZZ_[BHSD]")>; + +// Table lookup +def : InstRW<[CortexA320Write<4, CortexA320UnitVALU>], (instregex "^TBL_ZZZZ?_[BHSD]")>; + +// Table lookup extension +def : InstRW<[CortexA320Write<8, CortexA320UnitVALU>], (instregex "^TBX_ZZZ_[BHSD]")>; + +// Transpose, vector form +def : InstRW<[CortexA320Write<3, CortexA320UnitVALU>], (instregex "^TRN[12]_ZZZ_[BHSDQ]")>; + +// Unpack and extend +def : InstRW<[CortexA320Write<4, CortexA320UnitVALU>], (instregex "^[SU]UNPK(HI|LO)_ZZ_[HSD]")>; + +// Zip/unzip +def : InstRW<[CortexA320Write<3, CortexA320UnitVALU>], (instregex "^(UZP|ZIP)[12]_ZZZ_[BHSDQ]")>; + +// SVE floating-point instructions +// ----------------------------------------------------------------------------- + +// Floating point absolute value/difference +def : InstRW<[CortexA320Write<4, CortexA320UnitVALU>], (instregex "^FAB[SD]_ZPmZ_[HSD]", + "^FAB[SD]_ZPZZ_[HSD]")>; + +// Floating point arithmetic +def : InstRW<[CortexA320Write<4, CortexA320UnitVALU>], (instregex "^F(ADD|SUB)_(ZPm[IZ]|ZZZ|ZPZI|ZPZZ)_[HSD]", + "^FADDP_ZPmZZ_[HSD]", + "^FNEG_ZPmZ_[HSD]", + "^FSUBR_(ZPm[IZ]|ZPZ[IZ])_[HSD]")>; + +// Floating point associative add, F16 +def : InstRW<[CortexA320MCWrite<32, 29, CortexA320UnitVALU>], (instrs FADDA_VPZ_H)>; + +// Floating point associative add, F32 +def : InstRW<[CortexA320MCWrite<16, 13, CortexA320UnitVALU>], (instrs FADDA_VPZ_S)>; + +// Floating point associative add, F64 +def : InstRW<[CortexA320MCWrite<8, 5, CortexA320UnitVALU>], (instrs FADDA_VPZ_D)>; + +// Floating point compare +def : InstRW<[CortexA320Write<5, CortexA320UnitVALU>], (instregex "^FACG[ET]_PPzZZ_[HSD]", + "^FCM(EQ|GE|GT|NE)_PPzZ[0Z]_[HSD]", + "^FCM(LE|LT)_PPzZ0_[HSD]", + "^FCMUO_PPzZZ_[HSD]")>; + +// Floating point complex add +def : InstRW<[CortexA320Write<4, CortexA320UnitVALU>], (instregex "^FCADD_ZPmZ_[HSD]")>; + +// Floating point complex multiply add +def : InstRW<[CortexA320Write<4, CortexA320UnitVMAC>], (instregex "^FCMLA_ZPmZZ_[HSD]", + "^FCMLA_ZZZI_[HS]")>; + +// Floating point convert, long or narrow (F16 to F32 or F32 to F16) +def : InstRW<[CortexA320Write<4, CortexA320UnitVALU>], (instregex "^FCVT_ZPmZ_(HtoS|StoH)", + "^FCVTLT_ZPmZ_HtoS", + "^FCVTNT_ZPmZ_StoH")>; + +// Floating point convert, long or narrow (F16 to F64, F32 to F64, F64 to F32 +// or F64 to F16) +def : InstRW<[CortexA320Write<4, CortexA320UnitVALU>], (instregex "^FCVT_ZPmZ_(HtoD|StoD|DtoS|DtoH)", + "^FCVTLT_ZPmZ_StoD", + "^FCVTNT_ZPmZ_DtoS")>; + +// Floating point convert, round to odd +def : InstRW<[CortexA320Write<4, CortexA320UnitVALU>], (instregex "^FCVTX_ZPmZ_DtoS", "FCVTXNT_ZPmZ_DtoS")>; + +// Floating point base2 log, F16 +def : InstRW<[CortexA320Write<4, CortexA320UnitVMAC>], (instregex "^FLOGB_(ZPmZ|ZPZZ)_H")>; + +// Floating point base2 log, F32 +def : InstRW<[CortexA320Write<4, CortexA320UnitVMAC>], (instregex "^FLOGB_(ZPmZ|ZPZZ)_S")>; + +// Floating point base2 log, F64 +def : InstRW<[CortexA320Write<4, CortexA320UnitVMAC>], (instregex "^FLOGB_(ZPmZ|ZPZZ)_D")>; + +// Floating point convert to integer, F16 +def : InstRW<[CortexA320Write<4, CortexA320UnitVALU>], (instregex "^FCVTZ[SU]_ZPmZ_HtoH")>; + +// Floating point convert to integer, F32 +def : InstRW<[CortexA320Write<4, CortexA320UnitVALU>], (instregex "^FCVTZ[SU]_ZPmZ_(HtoS|StoS)")>; + +// Floating point convert to integer, F64 +def : InstRW<[CortexA320Write<4, CortexA320UnitVALU>], + (instregex "^FCVTZ[SU]_ZPmZ_(HtoD|StoD|DtoS|DtoD)")>; + +// Floating point copy +def : InstRW<[CortexA320Write<3, CortexA320UnitVALU>], (instregex "^FCPY_ZPmI_[HSD]", + "^FDUP_ZI_[HSD]")>; + +// Floating point divide, F16 +def : InstRW<[CortexA320MCWrite<8, 5, CortexA320UnitVMC>], (instregex "^FDIVR?_(ZPmZ|ZPZZ)_H")>; + +// Floating point divide, F32 +def : InstRW<[CortexA320MCWrite<13, 10, CortexA320UnitVMC>], (instregex "^FDIVR?_(ZPmZ|ZPZZ)_S")>; + +// Floating point divide, F64 +def : InstRW<[CortexA320MCWrite<22, 19, CortexA320UnitVMC>], (instregex "^FDIVR?_(ZPmZ|ZPZZ)_D")>; + +// Floating point min/max pairwise +def : InstRW<[CortexA320Write<4, CortexA320UnitVALU>], (instregex "^F(MAX|MIN)(NM)?P_ZPmZZ_[HSD]")>; + +// Floating point min/max +def : InstRW<[CortexA320Write<4, CortexA320UnitVALU>], (instregex "^F(MAX|MIN)(NM)?_(ZPm[IZ]|ZPZZ|ZPZI)_[HSD]")>; + +// Floating point multiply +def : InstRW<[CortexA320Write<4, CortexA320UnitVMAC>], (instregex "^(FSCALE|FMULX)_(ZPmZ|ZPZZ)_[HSD]", + "^FMUL_(ZPm[IZ]|ZZZI?|ZPZI|ZPZZ)_[HSD]")>; + +// Floating point multiply accumulate +def : InstRW<[CortexA320Write<4, CortexA320UnitVMAC>], + (instregex "^FML[AS]_(ZPmZZ|ZZZI|ZPZZZ)_[HSD]", + "^(FMAD|FNMAD|FNML[AS]|FN?MSB)_(ZPmZZ|ZPZZZ)_[HSD]")>; + +// Floating point multiply add/sub accumulate long +def : InstRW<[CortexA320Write<4, CortexA320UnitVMAC>], (instregex "^FML[AS]L[BT]_ZZZI?_SHH")>; + +// Floating point reciprocal estimate, F16 +def : InstRW<[CortexA320Write<4, CortexA320UnitVMAC>], (instregex "^FRECPE_ZZ_H", "^FRECPX_ZPmZ_H", + "^FRSQRTE_ZZ_H")>; + +// Floating point reciprocal estimate, F32 +def : InstRW<[CortexA320Write<4, CortexA320UnitVMAC>], (instregex "^FRECPE_ZZ_S", "^FRECPX_ZPmZ_S", + "^FRSQRTE_ZZ_S")>; +// Floating point reciprocal estimate, F64 +def : InstRW<[CortexA320Write<4, CortexA320UnitVMAC>],(instregex "^FRECPE_ZZ_D", "^FRECPX_ZPmZ_D", + "^FRSQRTE_ZZ_D")>; + +// Floating point reciprocal step +def : InstRW<[CortexA320Write<4, CortexA320UnitVMAC>], (instregex "^F(RECPS|RSQRTS)_ZZZ_[HSD]")>; + +// Floating point reduction, F16 +def : InstRW<[CortexA320Write<4, CortexA320UnitVALU>], + (instregex "^(FMAXNMV|FMAXV|FMINNMV|FMINV)_VPZ_[HSD]")>; + +// Floating point reduction, F32 +def : InstRW<[CortexA320MCWrite<12, 11, CortexA320UnitVALU>], + (instregex "^FADDV_VPZ_H")>; + +def : InstRW<[CortexA320MCWrite<8, 5, CortexA320UnitVALU>], + (instregex "^FADDV_VPZ_S")>; + +def : InstRW<[CortexA320Write<4, CortexA320UnitVALU>], + (instregex "^FADDV_VPZ_D")>; + + +// Floating point round to integral, F16 +def : InstRW<[CortexA320Write<4, CortexA320UnitVALU>], (instregex "^FRINT[AIMNPXZ]_ZPmZ_H")>; + +// Floating point round to integral, F32 +def : InstRW<[CortexA320Write<4, CortexA320UnitVALU>], (instregex "^FRINT[AIMNPXZ]_ZPmZ_S")>; + +// Floating point round to integral, F64 +def : InstRW<[CortexA320Write<4, CortexA320UnitVALU>], (instregex "^FRINT[AIMNPXZ]_ZPmZ_D")>; + +// Floating point square root, F16 +def : InstRW<[CortexA320MCWrite<11, 5, CortexA320UnitVMC>], (instregex "^FSQRT_ZPmZ_H")>; + +// Floating point square root, F32 +def : InstRW<[CortexA320MCWrite<14, 9, CortexA320UnitVMC>], (instregex "^FSQRT_ZPmZ_S")>; + +// Floating point square root, F64 +def : InstRW<[CortexA320MCWrite<25, 19, CortexA320UnitVMC>], (instregex "^FSQRT_ZPmZ_D")>; + +// Floating point trigonometric exponentiation +def : InstRW<[CortexA320Write<4, CortexA320UnitVMAC>], (instregex "^FEXPA_ZZ_[HSD]")>; + +// Floating point trigonometric multiply add +def : InstRW<[CortexA320Write<4, CortexA320UnitVMAC>], (instregex "^FTMAD_ZZI_[HSD]")>; + +// Floating point trigonometric, miscellaneous +def : InstRW<[CortexA320Write<4, CortexA320UnitVMAC>], (instregex "^FTSMUL_ZZZ_[HSD]")>; +def : InstRW<[CortexA320Write<3, CortexA320UnitVALU>], (instregex "^FTSSEL_ZZZ_[HSD]")>; + + +// SVE BFloat16 (BF16) instructions +// ----------------------------------------------------------------------------- + +// Convert, F32 to BF16 +def : InstRW<[CortexA320Write<4, CortexA320UnitVALU>], (instrs BFCVT_ZPmZ, BFCVTNT_ZPmZ)>; + +// Dot product +def : InstRW<[CortexA320Write_11cyc_1VMAC_1VALU], (instrs BFDOT_ZZI, BFDOT_ZZZ)>; + +// Matrix multiply accumulate +def : InstRW<[CortexA320Write_16cyc_1VMAC_1VALU], (instrs BFMMLA_ZZZ)>; + +// Multiply accumulate long +def : InstRW<[CortexA320Write<4, CortexA320UnitVMAC>], (instregex "^BFMLAL[BT]_ZZZ(I)?")>; + +// SVE Load instructions +// ----------------------------------------------------------------------------- + +// Load vector +def : InstRW<[CortexA320Write<3, CortexA320UnitLdSt>], (instrs LDR_ZXI)>; + +// Load predicate +def : InstRW<[CortexA320Write<3, CortexA320UnitLdSt>], (instrs LDR_PXI)>; + +// Contiguous load, scalar + imm +def : InstRW<[CortexA320Write<3, CortexA320UnitLdSt>], (instregex "^LD1[BHWD]_IMM$", + "^LD1S?B_[HSD]_IMM$", + "^LD1S?H_[SD]_IMM$", + "^LD1S?W_D_IMM$" )>; +// Contiguous load, scalar + scalar +def : InstRW<[CortexA320Write<3, CortexA320UnitLdSt>], (instregex "^LD1[BHWD]$", + "^LD1S?B_[HSD]$", + "^LD1S?H_[SD]$", + "^LD1S?W_D$" )>; + +// Contiguous load broadcast, scalar + imm +def : InstRW<[CortexA320Write<3, CortexA320UnitLdSt>], (instregex "^LD1R[BHWD]_IMM$", + "^LD1RSW_IMM$", + "^LD1RS?B_[HSD]_IMM$", + "^LD1RS?H_[SD]_IMM$", + "^LD1RS?W_D_IMM$", + "^LD1RQ_[BHWD]_IMM$")>; + +// Contiguous load broadcast, scalar + scalar +def : InstRW<[CortexA320Write<3, CortexA320UnitLdSt>], (instregex "^LD1RQ_[BHWD]$")>; + +// Non temporal load, scalar + imm +def : InstRW<[CortexA320Write<3, CortexA320UnitLdSt>], (instregex "^LDNT1[BHWD]_ZRI$")>; + +// Non temporal load, scalar + scalar +def : InstRW<[CortexA320Write<3, CortexA320UnitLdSt>], (instregex "^LDNT1[BHWD]_ZRR$")>; + +// Non temporal gather load, vector + scalar 32-bit element size +def : InstRW<[CortexA320MCWrite<9, 9, CortexA320UnitLdSt>], (instregex "^LDNT1[BHW]_ZZR_S$", + "^LDNT1S[BH]_ZZR_S$")>; + +// Non temporal gather load, vector + scalar 64-bit element size +def : InstRW<[CortexA320MCWrite<7, 7, CortexA320UnitLdSt>], (instregex "^LDNT1S?[BHW]_ZZR_D$")>; +def : InstRW<[CortexA320MCWrite<7, 7, CortexA320UnitLdSt>], (instrs LDNT1D_ZZR_D)>; + +// Contiguous first faulting load, scalar + scalar +def : InstRW<[CortexA320Write<3, CortexA320UnitLdSt>], (instregex "^LDFF1[BHWD]$", + "^LDFF1S?B_[HSD]$", + "^LDFF1S?H_[SD]$", + "^LDFF1S?W_D$")>; + +// Contiguous non faulting load, scalar + imm +def : InstRW<[CortexA320Write<3, CortexA320UnitLdSt>], (instregex "^LDNF1[BHWD]_IMM$", + "^LDNF1S?B_[HSD]_IMM$", + "^LDNF1S?H_[SD]_IMM$", + "^LDNF1S?W_D_IMM$")>; + +// Contiguous Load two structures to two vectors, scalar + imm +def : InstRW<[CortexA320MCWrite<3, 1, CortexA320UnitLdSt>], (instregex "^LD2[BHWD]_IMM$")>; + +// Contiguous Load two structures to two vectors, scalar + scalar +def : InstRW<[CortexA320MCWrite<3, 2, CortexA320UnitLdSt>], (instregex "^LD2[BHWD]$")>; + +// Contiguous Load three structures to three vectors, scalar + imm +def : InstRW<[CortexA320MCWrite<5, 3, CortexA320UnitLdSt>], (instregex "^LD3[BHWD]_IMM$")>; + +// Contiguous Load three structures to three vectors, scalar + scalar +def : InstRW<[CortexA320MCWrite<5, 3, CortexA320UnitLdSt>], (instregex "^LD3[BHWD]$")>; + +// Contiguous Load four structures to four vectors, scalar + imm +def : InstRW<[CortexA320MCWrite<5, 3, CortexA320UnitLdSt>], (instregex "^LD4[BHWD]_IMM$")>; + +// Contiguous Load four structures to four vectors, scalar + scalar +def : InstRW<[CortexA320MCWrite<5, 3, CortexA320UnitLdSt>], (instregex "^LD4[BHWD]$")>; + +// Gather load, vector + imm, 32-bit element size +def : InstRW<[CortexA320MCWrite<9, 9, CortexA320UnitLdSt>], (instregex "^GLD(FF)?1S?[BH]_S_IMM$", + "^GLD(FF)?1W_IMM$")>; + +// Gather load, vector + imm, 64-bit element size +def : InstRW<[CortexA320MCWrite<7, 7, CortexA320UnitLdSt>], (instregex "^GLD(FF)?1S?[BHW]_D_IMM$", + "^GLD(FF)?1D_IMM$")>; + +// Gather load, 64-bit element size +def : InstRW<[CortexA320MCWrite<7, 7, CortexA320UnitLdSt>], + (instregex "^GLD(FF)?1S?[BHW]_D_[SU]XTW(_SCALED)?$", + "^GLD(FF)?1S?[BHW]_D(_SCALED)?$", + "^GLD(FF)?1D_[SU]XTW(_SCALED)?$", + "^GLD(FF)?1D(_SCALED)?$")>; + +// Gather load, 32-bit scaled offset +def : InstRW<[CortexA320MCWrite<7, 7, CortexA320UnitLdSt>], + (instregex "^GLD(FF)?1S?[HW]_S_[SU]XTW_SCALED$", + "^GLD(FF)?1W_[SU]XTW_SCALED")>; + +// Gather load, 32-bit unpacked unscaled offset +def : InstRW<[CortexA320MCWrite<7, 7, CortexA320UnitLdSt>], (instregex "^GLD(FF)?1S?[BH]_S_[SU]XTW$", + "^GLD(FF)?1W_[SU]XTW$")>; + +def : InstRW<[CortexA320Write<0, CortexA320UnitVALU>], (instregex "^PRF(B|H|W|D).*")>; +// SVE Store instructions +// ----------------------------------------------------------------------------- + +// Store from predicate reg +def : InstRW<[CortexA320VSt0], (instrs STR_PXI)>; + +// Store from vector reg +def : InstRW<[CortexA320VSt0], (instrs STR_ZXI)>; + +// Contiguous store, scalar + imm +def : InstRW<[CortexA320VSt0], (instregex "^ST1[BHWD]_IMM$", + "^ST1B_[HSD]_IMM$", + "^ST1H_[SD]_IMM$", + "^ST1W_D_IMM$")>; + +// Contiguous store, scalar + scalar +def : InstRW<[CortexA320VSt0], (instregex "^ST1H(_[SD])?$")>; +def : InstRW<[CortexA320VSt0], (instregex "^ST1[BWD]$", + "^ST1B_[HSD]$", + "^ST1W_D$")>; + +// Contiguous store two structures from two vectors, scalar + imm +def : InstRW<[CortexA320VSt<11>], (instregex "^ST2[BHWD]_IMM$")>; + +// Contiguous store two structures from two vectors, scalar + scalar +def : InstRW<[CortexA320VSt<11>], (instrs ST2H)>; + +// Contiguous store two structures from two vectors, scalar + scalar +def : InstRW<[CortexA320VSt<11>], (instregex "^ST2[BWD]$")>; + +// Contiguous store three structures from three vectors, scalar + imm +def : InstRW<[CortexA320VSt<25>], (instregex "^ST3[BHW]_IMM$")>; +def : InstRW<[CortexA320VSt<14>], (instregex "^ST3D_IMM$")>; + +// Contiguous store three structures from three vectors, scalar + scalar +def : InstRW<[CortexA320VSt<25>], (instregex "^ST3[BHW]$")>; +def : InstRW<[CortexA320VSt<14>], (instregex "^ST3D$")>; + +// Contiguous store four structures from four vectors, scalar + imm +def : InstRW<[CortexA320VSt<50>], (instregex "^ST4[BHW]_IMM$")>; +def : InstRW<[CortexA320VSt<25>], (instregex "^ST4D_IMM$")>; + +// Contiguous store four structures from four vectors, scalar + scalar +def : InstRW<[CortexA320VSt<50>], (instregex "^ST4[BHW]$")>; + +// Contiguous store four structures from four vectors, scalar + scalar +def : InstRW<[CortexA320VSt<25>], (instregex "^ST4D$")>; + +// Non temporal store, scalar + imm +def : InstRW<[CortexA320VSt0], (instregex "^STNT1[BHWD]_ZRI$")>; + +// Non temporal store, scalar + scalar +def : InstRW<[CortexA320VSt0], (instrs STNT1H_ZRR)>; +def : InstRW<[CortexA320VSt0], (instregex "^STNT1[BWD]_ZRR$")>; + +// Scatter non temporal store, vector + scalar 32-bit element size +def : InstRW<[CortexA320VSt<9>], (instregex "^STNT1[BHW]_ZZR_S")>; + +// Scatter non temporal store, vector + scalar 64-bit element size +def : InstRW<[CortexA320VSt<7>], (instregex "^STNT1[BHWD]_ZZR_D")>; + +// Scatter store vector + imm 32-bit element size +def : InstRW<[CortexA320VSt<9>], (instregex "^SST1[BH]_S_IMM$", + "^SST1W_IMM$")>; + +// Scatter store vector + imm 64-bit element size +def : InstRW<[CortexA320VSt<7>], (instregex "^SST1[BHW]_D_IMM$", + "^SST1D_IMM$")>; + +// Scatter store, 32-bit scaled offset +def : InstRW<[CortexA320VSt<8>], + (instregex "^SST1(H_S|W)_[SU]XTW_SCALED$")>; + +// Scatter store, 32-bit unpacked unscaled offset +def : InstRW<[CortexA320VSt<8>], (instregex "^SST1[BHW]_D_[SU]XTW$", + "^SST1D_[SU]XTW$")>; + +// Scatter store, 32-bit unpacked scaled offset +def : InstRW<[CortexA320VSt<8>], (instregex "^SST1[HW]_D_[SU]XTW_SCALED$", + "^SST1D_[SU]XTW_SCALED$")>; + +// Scatter store, 32-bit unscaled offset +def : InstRW<[CortexA320VSt<8>], (instregex "^SST1[BH]_S_[SU]XTW$", + "^SST1W_[SU]XTW$")>; + +// Scatter store, 64-bit scaled offset +def : InstRW<[CortexA320VSt<8>], (instregex "^SST1[HW]_D_SCALED$", + "^SST1D_SCALED$")>; + +// Scatter store, 64-bit unscaled offset +def : InstRW<[CortexA320VSt<8>], (instregex "^SST1[BHW]_D$", + "^SST1D$")>; + +// SVE Miscellaneous instructions +// ----------------------------------------------------------------------------- + +// Read first fault register, unpredicated +def : InstRW<[CortexA320Write<1, CortexA320UnitALU>], (instrs RDFFR_P)>; + +// Read first fault register, predicated +def : InstRW<[CortexA320Write<3, CortexA320UnitALU>], (instrs RDFFR_PPz)>; + +// Read first fault register and set flags +def : InstRW<[CortexA320Write<3, CortexA320UnitALU>], (instrs RDFFRS_PPz)>; + +// Set first fault register +// Write to first fault register +def : InstRW<[CortexA320Write<1, CortexA320UnitALU>], (instrs SETFFR, WRFFR)>; + +// SVE Cryptographic instructions +// ----------------------------------------------------------------------------- + +// Crypto AES ops +def : InstRW<[CortexA320Write<3, CortexA320UnitVALU>], (instregex "^AES[DE]_ZZZ_B$", + "^AESI?MC_ZZ_B$")>; + +// Crypto SHA3 ops +def : InstRW<[CortexA320Write<4, CortexA320UnitVALU>], (instregex "^(BCAX|EOR3)_ZZZZ$", + "^XAR_ZZZI_[BHSD]$")>; + +def : InstRW<[CortexA320MC_RC0Write<9, CortexA320UnitVMC>], (instregex "^RAX1_ZZZ_D$")>; + +// Crypto SM4 ops +def : InstRW<[CortexA320MC_RC0Write<9, CortexA320UnitVMC>], (instregex "^SM4E(KEY)?_ZZZ_S$")>; + +} diff --git a/llvm/test/tools/llvm-mca/AArch64/Cortex/A320-basic-instructions.s b/llvm/test/tools/llvm-mca/AArch64/Cortex/A320-basic-instructions.s new file mode 100644 index 000000000000..35b5d5b2ce43 --- /dev/null +++ b/llvm/test/tools/llvm-mca/AArch64/Cortex/A320-basic-instructions.s @@ -0,0 +1,3721 @@ +# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py +# RUN: llvm-mca -mtriple=aarch64 -mcpu=cortex-a320 -instruction-tables < %s | FileCheck %s + +#------------------------------------------------------------------------------ +# Add/sub (immediate) +#------------------------------------------------------------------------------ + +add w2, w3, #4095 +add w30, w29, #1, lsl #12 +add w13, w5, #4095, lsl #12 +add x5, x7, #1638 +add w20, wsp, #801 +add wsp, wsp, #1104 +add wsp, w30, #4084 +add x0, x24, #291 +add x3, x24, #4095, lsl #12 +add x8, sp, #1074 +add sp, x29, #3816 +sub w0, wsp, #4077 +sub w4, w20, #546, lsl #12 +sub sp, sp, #288 +sub wsp, w19, #16 +adds w13, w23, #291, lsl #12 +cmn w2, #4095 +adds w20, wsp, #0 +cmn x3, #1, lsl #12 +cmp sp, #20, lsl #12 +cmp x30, #4095 +subs x4, sp, #3822 +cmn w3, #291, lsl #12 +cmn wsp, #1365 +cmn sp, #1092, lsl #12 +mov sp, x30 +mov wsp, w20 +mov x11, sp +mov w24, wsp + +#------------------------------------------------------------------------------ +# Add-subtract (shifted register) +#------------------------------------------------------------------------------ + +add w3, w5, w7 +add wzr, w3, w5 +add w20, wzr, w4 +add w4, w6, wzr +add w11, w13, w15 +add w9, w3, wzr, lsl #10 +add w17, w29, w20, lsl #31 +add w21, w22, w23, lsr #0 +add w24, w25, w26, lsr #18 +add w27, w28, w29, lsr #31 +add w2, w3, w4, asr #0 +add w5, w6, w7, asr #21 +add w8, w9, w10, asr #31 +add x3, x5, x7 +add xzr, x3, x5 +add x20, xzr, x4 +add x4, x6, xzr +add x11, x13, x15 +add x9, x3, xzr, lsl #10 +add x17, x29, x20, lsl #63 +add x21, x22, x23, lsr #0 +add x24, x25, x26, lsr #18 +add x27, x28, x29, lsr #63 +add x2, x3, x4, asr #0 +add x5, x6, x7, asr #21 +add x8, x9, x10, asr #63 +adds w3, w5, w7 +cmn w3, w5 +adds w20, wzr, w4 +adds w4, w6, wzr +adds w11, w13, w15 +adds w9, w3, wzr, lsl #10 +adds w17, w29, w20, lsl #31 +adds w21, w22, w23, lsr #0 +adds w24, w25, w26, lsr #18 +adds w27, w28, w29, lsr #31 +adds w2, w3, w4, asr #0 +adds w5, w6, w7, asr #21 +adds w8, w9, w10, asr #31 +adds x3, x5, x7 +cmn x3, x5 +adds x20, xzr, x4 +adds x4, x6, xzr +adds x11, x13, x15 +adds x9, x3, xzr, lsl #10 +adds x17, x29, x20, lsl #63 +adds x21, x22, x23, lsr #0 +adds x24, x25, x26, lsr #18 +adds x27, x28, x29, lsr #63 +adds x2, x3, x4, asr #0 +adds x5, x6, x7, asr #21 +adds x8, x9, x10, asr #63 +sub w3, w5, w7 +sub wzr, w3, w5 +sub w4, w6, wzr +sub w11, w13, w15 +sub w9, w3, wzr, lsl #10 +sub w17, w29, w20, lsl #31 +sub w21, w22, w23, lsr #0 +sub w24, w25, w26, lsr #18 +sub w27, w28, w29, lsr #31 +sub w2, w3, w4, asr #0 +sub w5, w6, w7, asr #21 +sub w8, w9, w10, asr #31 +sub x3, x5, x7 +sub xzr, x3, x5 +sub x4, x6, xzr +sub x11, x13, x15 +sub x9, x3, xzr, lsl #10 +sub x17, x29, x20, lsl #63 +sub x21, x22, x23, lsr #0 +sub x24, x25, x26, lsr #18 +sub x27, x28, x29, lsr #63 +sub x2, x3, x4, asr #0 +sub x5, x6, x7, asr #21 +sub x8, x9, x10, asr #63 +subs w3, w5, w7 +cmp w3, w5 +subs w4, w6, wzr +subs w11, w13, w15 +subs w9, w3, wzr, lsl #10 +subs w17, w29, w20, lsl #31 +subs w21, w22, w23, lsr #0 +subs w24, w25, w26, lsr #18 +subs w27, w28, w29, lsr #31 +subs w2, w3, w4, asr #0 +subs w5, w6, w7, asr #21 +subs w8, w9, w10, asr #31 +subs x3, x5, x7 +cmp x3, x5 +subs x4, x6, xzr +subs x11, x13, x15 +subs x9, x3, xzr, lsl #10 +subs x17, x29, x20, lsl #63 +subs x21, x22, x23, lsr #0 +subs x24, x25, x26, lsr #18 +subs x27, x28, x29, lsr #63 +subs x2, x3, x4, asr #0 +subs x5, x6, x7, asr #21 +subs x8, x9, x10, asr #63 +cmn wzr, w4 +cmn w5, wzr +cmn w6, w7 +cmn w8, w9, lsl #15 +cmn w10, w11, lsl #31 +cmn w12, w13, lsr #0 +cmn w14, w15, lsr #21 +cmn w16, w17, lsr #31 +cmn w18, w19, asr #0 +cmn w20, w21, asr #22 +cmn w22, w23, asr #31 +cmn x0, x3 +cmn xzr, x4 +cmn x5, xzr +cmn x6, x7 +cmn x8, x9, lsl #15 +cmn x10, x11, lsl #63 +cmn x12, x13, lsr #0 +cmn x14, x15, lsr #41 +cmn x16, x17, lsr #63 +cmn x18, x19, asr #0 +cmn x20, x21, asr #55 +cmn x22, x23, asr #63 +cmp w0, w3 +cmp wzr, w4 +cmp w5, wzr +cmp w6, w7 +cmp w8, w9, lsl #15 +cmp w10, w11, lsl #31 +cmp w12, w13, lsr #0 +cmp w14, w15, lsr #21 +cmp w18, w19, asr #0 +cmp w20, w21, asr #22 +cmp w22, w23, asr #31 +cmp x0, x3 +cmp xzr, x4 +cmp x5, xzr +cmp x6, x7 +cmp x8, x9, lsl #15 +cmp x10, x11, lsl #63 +cmp x12, x13, lsr #0 +cmp x14, x15, lsr #41 +cmp x16, x17, lsr #63 +cmp x18, x19, asr #0 +cmp x20, x21, asr #55 +cmp x22, x23, asr #63 +cmp wzr, w0 +cmp xzr, x0 + +#------------------------------------------------------------------------------ +# Add-subtract (shifted register) +#------------------------------------------------------------------------------ + +adc w29, w27, w25 +adc wzr, w3, w4 +adc w9, wzr, w10 +adc w20, w0, wzr +adc x29, x27, x25 +adc xzr, x3, x4 +adc x9, xzr, x10 +adc x20, x0, xzr +adcs w29, w27, w25 +adcs wzr, w3, w4 +adcs w9, wzr, w10 +adcs w20, w0, wzr +adcs x29, x27, x25 +adcs xzr, x3, x4 +adcs x9, xzr, x10 +adcs x20, x0, xzr +sbc w29, w27, w25 +sbc wzr, w3, w4 +ngc w9, w10 +sbc w20, w0, wzr +sbc x29, x27, x25 +sbc xzr, x3, x4 +ngc x9, x10 +sbc x20, x0, xzr +sbcs w29, w27, w25 +sbcs wzr, w3, w4 +ngcs w9, w10 +sbcs w20, w0, wzr +sbcs x29, x27, x25 +sbcs xzr, x3, x4 +ngcs x9, x10 +sbcs x20, x0, xzr +ngc w3, w12 +ngc wzr, w9 +ngc w23, wzr +ngc x29, x30 +ngc xzr, x0 +ngc x0, xzr +ngcs w3, w12 +ngcs wzr, w9 +ngcs w23, wzr +ngcs x29, x30 +ngcs xzr, x0 +ngcs x0, xzr + +#------------------------------------------------------------------------------ +# Compare and branch (immediate) +#------------------------------------------------------------------------------ + +sbfx x1, x2, #3, #2 +asr x3, x4, #63 +asr wzr, wzr, #31 +sbfx w12, w9, #0, #1 +ubfiz x4, x5, #52, #11 +ubfx xzr, x4, #0, #1 +ubfiz x4, xzr, #1, #6 +lsr x5, x6, #12 +bfi x4, x5, #52, #11 +bfxil xzr, x4, #0, #1 +bfi x4, xzr, #1, #6 +bfxil x5, x6, #12, #52 +sxtb w1, w2 +sxtb xzr, w3 +sxth w9, w10 +sxth x0, w1 +sxtw x3, w30 +uxtb w1, w2 +uxth w9, w10 +ubfx x3, x30, #0, #32 +asr w3, w2, #0 +asr w9, w10, #31 +asr x20, x21, #63 +asr w1, wzr, #3 +lsr w3, w2, #0 +lsr w9, w10, #31 +lsr x20, x21, #63 +lsr wzr, wzr, #3 +lsr w3, w2, #0 +lsl w9, w10, #31 +lsl x20, x21, #63 +lsl w1, wzr, #3 +sbfx w9, w10, #0, #1 +sbfiz x2, x3, #63, #1 +asr x19, x20, #0 +sbfiz x9, x10, #5, #59 +asr w9, w10, #0 +sbfiz w11, w12, #31, #1 +sbfiz w13, w14, #29, #3 +sbfiz xzr, xzr, #10, #11 +sbfx w9, w10, #0, #1 +asr x2, x3, #63 +asr x19, x20, #0 +asr x9, x10, #5 +asr w9, w10, #0 +asr w11, w12, #31 +asr w13, w14, #29 +sbfx xzr, xzr, #10, #11 +bfxil w9, w10, #0, #1 +bfi x2, x3, #63, #1 +bfxil x19, x20, #0, #64 +bfi x9, x10, #5, #59 +bfxil w9, w10, #0, #32 +bfi w11, w12, #31, #1 +bfi w13, w14, #29, #3 +bfi xzr, xzr, #10, #11 +bfxil w9, w10, #0, #1 +bfxil x2, x3, #63, #1 +bfxil x19, x20, #0, #64 +bfxil x9, x10, #5, #59 +bfxil w9, w10, #0, #32 +bfxil w11, w12, #31, #1 +bfxil w13, w14, #29, #3 +bfxil xzr, xzr, #10, #11 +ubfx w9, w10, #0, #1 +lsl x2, x3, #63 +lsr x19, x20, #0 +lsl x9, x10, #5 +lsr w9, w10, #0 +lsl w11, w12, #31 +lsl w13, w14, #29 +ubfiz xzr, xzr, #10, #11 +ubfx w9, w10, #0, #1 +lsr x2, x3, #63 +lsr x19, x20, #0 +lsr x9, x10, #5 +lsr w9, w10, #0 +lsr w11, w12, #31 +lsr w13, w14, #29 +ubfx xzr, xzr, #10, #11 + +#------------------------------------------------------------------------------ +# Compare and branch (immediate) +#------------------------------------------------------------------------------ + +cbz w5, #4 +cbz x5, #0 +cbnz x2, #-4 +cbnz x26, #1048572 +cbz wzr, #0 +cbnz xzr, #0 + +#------------------------------------------------------------------------------ +# Conditional branch (immediate) +#------------------------------------------------------------------------------ + +b.ne #4 +b.ge #1048572 +b.ge #-4 + +#------------------------------------------------------------------------------ +# Conditional compare (immediate) +#------------------------------------------------------------------------------ + +ccmp w1, #31, #0, eq +ccmp w3, #0, #15, hs +ccmp wzr, #15, #13, hs +ccmp x9, #31, #0, le +ccmp x3, #0, #15, gt +ccmp xzr, #5, #7, ne +ccmn w1, #31, #0, eq +ccmn w3, #0, #15, hs +ccmn wzr, #15, #13, hs +ccmn x9, #31, #0, le +ccmn x3, #0, #15, gt +ccmn xzr, #5, #7, ne + +#------------------------------------------------------------------------------ +# Conditional compare (register) +#------------------------------------------------------------------------------ + +ccmp w1, wzr, #0, eq +ccmp w3, w0, #15, hs +ccmp wzr, w15, #13, hs +ccmp x9, xzr, #0, le +ccmp x3, x0, #15, gt +ccmp xzr, x5, #7, ne +ccmn w1, wzr, #0, eq +ccmn w3, w0, #15, hs +ccmn wzr, w15, #13, hs +ccmn x9, xzr, #0, le +ccmn x3, x0, #15, gt +ccmn xzr, x5, #7, ne + +#------------------------------------------------------------------------------ +# Conditional branch (immediate) +#------------------------------------------------------------------------------ + +csel w1, w0, w19, ne +csel wzr, w5, w9, eq +csel w9, wzr, w30, gt +csel w1, w28, wzr, mi +csel x19, x23, x29, lt +csel xzr, x3, x4, ge +csel x5, xzr, x6, hs +csel x7, x8, xzr, lo +csinc w1, w0, w19, ne +csinc wzr, w5, w9, eq +csinc w9, wzr, w30, gt +csinc w1, w28, wzr, mi +csinc x19, x23, x29, lt +csinc xzr, x3, x4, ge +csinc x5, xzr, x6, hs +csinc x7, x8, xzr, lo +csinv w1, w0, w19, ne +csinv wzr, w5, w9, eq +csinv w9, wzr, w30, gt +csinv w1, w28, wzr, mi +csinv x19, x23, x29, lt +csinv xzr, x3, x4, ge +csinv x5, xzr, x6, hs +csinv x7, x8, xzr, lo +csneg w1, w0, w19, ne +csneg wzr, w5, w9, eq +csneg w9, wzr, w30, gt +csneg w1, w28, wzr, mi +csneg x19, x23, x29, lt +csneg xzr, x3, x4, ge +csneg x5, xzr, x6, hs +csneg x7, x8, xzr, lo +cset w3, eq +cset x9, pl +csetm w20, ne +csetm x30, ge +csinc w2, wzr, wzr, al +csinv x3, xzr, xzr, nv +cinc w3, w5, gt +cinc wzr, w4, le +cset w9, lt +cinc x3, x5, gt +cinc xzr, x4, le +cset x9, lt +csinc w5, w6, w6, nv +csinc x1, x2, x2, al +cinv w3, w5, gt +cinv wzr, w4, le +csetm w9, lt +cinv x3, x5, gt +cinv xzr, x4, le +csetm x9, lt +csinv x1, x0, x0, al +csinv w9, w8, w8, nv +cneg w3, w5, gt +cneg wzr, w4, le +cneg w9, wzr, lt +cneg x3, x5, gt +cneg xzr, x4, le +cneg x9, xzr, lt +csneg x4, x8, x8, al +csinv w9, w8, w8, nv + +#------------------------------------------------------------------------------ +# Data-processing (1 source) +#------------------------------------------------------------------------------ + +rbit w0, w7 +rbit x18, x3 +rev16 w17, w1 +rev16 x5, x2 +rev w18, w0 +rev32 x20, x1 +rev x22, x2 +clz w24, w3 +clz x26, x4 +cls w3, w5 +cls x20, x5 + +#------------------------------------------------------------------------------ +# Data-processing (2 source) +#------------------------------------------------------------------------------ + +udiv w0, w7, w10 +udiv x9, x22, x4 +sdiv w12, w21, w0 +sdiv x13, x2, x1 +lsl w11, w12, w13 +lsl x14, x15, x16 +lsr w17, w18, w19 +lsr x20, x21, x22 +asr w23, w24, w25 +asr x26, x27, x28 +ror w0, w1, w2 +ror x3, x4, x5 +lsl w6, w7, w8 +lsl x9, x10, x11 +lsr w12, w13, w14 +lsr x15, x16, x17 +asr w18, w19, w20 +asr x21, x22, x23 +ror w24, w25, w26 +ror x27, x28, x29 + +#------------------------------------------------------------------------------ +# Data-processing (3 sources) +#------------------------------------------------------------------------------ + +smulh x30, x29, x28 +smulh xzr, x27, x26 +umulh x30, x29, x28 +umulh x23, x30, xzr +madd w1, w3, w7, w4 +madd wzr, w0, w9, w11 +madd w13, wzr, w4, w4 +madd w19, w30, wzr, w29 +mul w4, w5, w6 +madd x1, x3, x7, x4 +madd xzr, x0, x9, x11 +madd x13, xzr, x4, x4 +madd x19, x30, xzr, x29 +mul x4, x5, x6 +msub w1, w3, w7, w4 +msub wzr, w0, w9, w11 +msub w13, wzr, w4, w4 +msub w19, w30, wzr, w29 +mneg w4, w5, w6 +msub x1, x3, x7, x4 +msub xzr, x0, x9, x11 +msub x13, xzr, x4, x4 +msub x19, x30, xzr, x29 +mneg x4, x5, x6 +smaddl x3, w5, w2, x9 +smaddl xzr, w10, w11, x12 +smaddl x13, wzr, w14, x15 +smaddl x16, w17, wzr, x18 +smull x19, w20, w21 +smsubl x3, w5, w2, x9 +smsubl xzr, w10, w11, x12 +smsubl x13, wzr, w14, x15 +smsubl x16, w17, wzr, x18 +smnegl x19, w20, w21 +umaddl x3, w5, w2, x9 +umaddl xzr, w10, w11, x12 +umaddl x13, wzr, w14, x15 +umaddl x16, w17, wzr, x18 +umull x19, w20, w21 +umsubl x3, w5, w2, x9 +umsubl x16, w17, wzr, x18 +umnegl x19, w20, w21 +smulh x30, x29, x28 +smulh x23, x22, xzr +umulh x23, x22, xzr +mul x19, x20, xzr +mneg w21, w22, w23 +smull x11, w13, w17 +umull x11, w13, w17 +smnegl x11, w13, w17 +umnegl x11, w13, w17 + +#------------------------------------------------------------------------------ +# Extract (immediate) +#------------------------------------------------------------------------------ + +extr w3, w5, w7, #0 +extr w11, w13, w17, #31 +extr x3, x5, x7, #15 +extr x11, x13, x17, #63 +ror x19, x23, #24 +ror x29, xzr, #63 +ror w9, w13, #31 + +#------------------------------------------------------------------------------ +# Floating-point compare +#------------------------------------------------------------------------------ + +fcmp s3, s5 +fcmp s31, #0.0 +fcmp s31, #0.0 +fcmpe s29, s30 +fcmpe s15, #0.0 +fcmpe s15, #0.0 +fcmp d4, d12 +fcmp d23, #0.0 +fcmp d23, #0.0 +fcmpe d26, d22 +fcmpe d29, #0.0 +fcmpe d29, #0.0 + +#------------------------------------------------------------------------------ +# Floating-point conditional compare +#------------------------------------------------------------------------------ + +fccmp s1, s31, #0, eq +fccmp s3, s0, #15, hs +fccmp s31, s15, #13, hs +fccmp d9, d31, #0, le +fccmp d3, d0, #15, gt +fccmp d31, d5, #7, ne +fccmpe s1, s31, #0, eq +fccmpe s3, s0, #15, hs +fccmpe s31, s15, #13, hs +fccmpe d9, d31, #0, le +fccmpe d3, d0, #15, gt +fccmpe d31, d5, #7, ne + +#------------------------------------------------------------------------------- +# Floating-point conditional compare +#------------------------------------------------------------------------------- + +fcsel s3, s20, s9, pl +fcsel d9, d10, d11, mi + +#------------------------------------------------------------------------------ +# Floating-point data-processing (1 source) +#------------------------------------------------------------------------------ + +fmov s0, s1 +fabs s2, s3 +fneg s4, s5 +fsqrt s6, s7 +fcvt d8, s9 +fcvt h10, s11 +frintn s12, s13 +frintp s14, s15 +frintm s16, s17 +frintz s18, s19 +frinta s20, s21 +frintx s22, s23 +frinti s24, s25 +fmov d0, d1 +fabs d2, d3 +fneg d4, d5 +fsqrt d6, d7 +fcvt s8, d9 +fcvt h10, d11 +frintn d12, d13 +frintp d14, d15 +frintm d16, d17 +frintz d18, d19 +frinta d20, d21 +frintx d22, d23 +frinti d24, d25 +fcvt s26, h27 +fcvt d28, h29 + +#------------------------------------------------------------------------------ +# Floating-point data-processing (2 sources) +#------------------------------------------------------------------------------ + +fmul s20, s19, s17 +fdiv s1, s2, s3 +fadd s4, s5, s6 +fsub s7, s8, s9 +fmax s10, s11, s12 +fmin s13, s14, s15 +fmaxnm s16, s17, s18 +fminnm s19, s20, s21 +fnmul s22, s23, s2 +fmul d20, d19, d17 +fdiv d1, d2, d3 +fadd d4, d5, d6 +fsub d7, d8, d9 +fmax d10, d11, d12 +fmin d13, d14, d15 +fmaxnm d16, d17, d18 +fminnm d19, d20, d21 +fnmul d22, d23, d24 + +#------------------------------------------------------------------------------ +# Floating-point data-processing (1 source) +#------------------------------------------------------------------------------ + +fmadd s3, s5, s6, s31 +fmadd d3, d13, d0, d23 +fmsub s3, s5, s6, s31 +fmsub d3, d13, d0, d23 +fnmadd s3, s5, s6, s31 +fnmadd d3, d13, d0, d23 +fnmsub s3, s5, s6, s31 +fnmsub d3, d13, d0, d23 + +#------------------------------------------------------------------------------ +# Floating-point <-> fixed-point conversion +#------------------------------------------------------------------------------ + +fcvtzs w3, h5, #1 +fcvtzs wzr, h20, #13 +fcvtzs w19, h0, #32 +fcvtzs x3, h5, #1 +fcvtzs x12, h30, #45 +fcvtzs x19, h0, #64 +fcvtzs w3, s5, #1 +fcvtzs wzr, s20, #13 +fcvtzs w19, s0, #32 +fcvtzs x3, s5, #1 +fcvtzs x12, s30, #45 +fcvtzs x19, s0, #64 +fcvtzs w3, d5, #1 +fcvtzs wzr, d20, #13 +fcvtzs w19, d0, #32 +fcvtzs x3, d5, #1 +fcvtzs x12, d30, #45 +fcvtzs x19, d0, #64 +fcvtzu w3, h5, #1 +fcvtzu wzr, h20, #13 +fcvtzu w19, h0, #32 +fcvtzu x3, h5, #1 +fcvtzu x12, h30, #45 +fcvtzu x19, h0, #64 +fcvtzu w3, s5, #1 +fcvtzu wzr, s20, #13 +fcvtzu w19, s0, #32 +fcvtzu x3, s5, #1 +fcvtzu x12, s30, #45 +fcvtzu x19, s0, #64 +fcvtzu w3, d5, #1 +fcvtzu wzr, d20, #13 +fcvtzu w19, d0, #32 +fcvtzu x3, d5, #1 +fcvtzu x12, d30, #45 +fcvtzu x19, d0, #64 +scvtf h23, w19, #1 +scvtf h31, wzr, #20 +scvtf h14, w0, #32 +scvtf h23, x19, #1 +scvtf h31, xzr, #20 +scvtf h14, x0, #64 +scvtf s23, w19, #1 +scvtf s31, wzr, #20 +scvtf s14, w0, #32 +scvtf s23, x19, #1 +scvtf s31, xzr, #20 +scvtf s14, x0, #64 +scvtf d23, w19, #1 +scvtf d31, wzr, #20 +scvtf d14, w0, #32 +scvtf d23, x19, #1 +scvtf d31, xzr, #20 +scvtf d14, x0, #64 +ucvtf h23, w19, #1 +ucvtf h31, wzr, #20 +ucvtf h14, w0, #32 +ucvtf h23, x19, #1 +ucvtf h31, xzr, #20 +ucvtf h14, x0, #64 +ucvtf s23, w19, #1 +ucvtf s31, wzr, #20 +ucvtf s14, w0, #32 +ucvtf s23, x19, #1 +ucvtf s31, xzr, #20 +ucvtf s14, x0, #64 +ucvtf d23, w19, #1 +ucvtf d31, wzr, #20 +ucvtf d14, w0, #32 +ucvtf d23, x19, #1 +ucvtf d31, xzr, #20 +ucvtf d14, x0, #64 + +#------------------------------------------------------------------------------ +# Floating-point <-> integer conversion +#------------------------------------------------------------------------------ + +fcvtns w3, h31 +fcvtns xzr, h12 +fcvtnu wzr, h12 +fcvtnu x0, h0 +fcvtps wzr, h9 +fcvtps x12, h20 +fcvtpu w30, h23 +fcvtpu x29, h3 +fcvtms w2, h3 +fcvtms x4, h5 +fcvtmu w6, h7 +fcvtmu x8, h9 +fcvtzs w10, h11 +fcvtzs x12, h13 +fcvtzu w14, h15 +fcvtzu x15, h16 +scvtf h17, w18 +scvtf h19, x20 +ucvtf h21, w22 +scvtf h23, x24 +fcvtas w25, h26 +fcvtas x27, h28 +fcvtau w29, h30 +fcvtau xzr, h0 +fcvtns w3, s31 +fcvtns xzr, s12 +fcvtnu wzr, s12 +fcvtnu x0, s0 +fcvtps wzr, s9 +fcvtps x12, s20 +fcvtpu w30, s23 +fcvtpu x29, s3 +fcvtms w2, s3 +fcvtms x4, s5 +fcvtmu w6, s7 +fcvtmu x8, s9 +fcvtzs w10, s11 +fcvtzs x12, s13 +fcvtzu w14, s15 +fcvtzu x15, s16 +scvtf s17, w18 +scvtf s19, x20 +ucvtf s21, w22 +scvtf s23, x24 +fcvtas w25, s26 +fcvtas x27, s28 +fcvtau w29, s30 +fcvtau xzr, s0 +fcvtns w3, d31 +fcvtns xzr, d12 +fcvtnu wzr, d12 +fcvtnu x0, d0 +fcvtps wzr, d9 +fcvtps x12, d20 +fcvtpu w30, d23 +fcvtpu x29, d3 +fcvtms w2, d3 +fcvtms x4, d5 +fcvtmu w6, d7 +fcvtmu x8, d9 +fcvtzs w10, d11 +fcvtzs x12, d13 +fcvtzu w14, d15 +fcvtzu x15, d16 +scvtf d17, w18 +scvtf d19, x20 +ucvtf d21, w22 +ucvtf d23, x24 +fcvtas w25, d26 +fcvtas x27, d28 +fcvtau w29, d30 +fcvtau xzr, d0 +fmov w3, s9 +fmov s9, w3 +fmov x20, d31 +fmov d1, x15 +fmov x3, v12.d[1] +fmov v1.d[1], x19 + +#------------------------------------------------------------------------------ +# Floating-point immediate +#------------------------------------------------------------------------------ + +fmov s2, #0.12500000 +fmov s3, #1.00000000 +fmov d30, #16.00000000 +fmov s4, #1.06250000 +fmov d10, #1.93750000 +fmov s12, #-1.00000000 +fmov d16, #8.50000000 + +#------------------------------------------------------------------------------ +# Load-register (literal) +#------------------------------------------------------------------------------ + +ldr w3, #0 +ldr x29, #4 +ldrsw xzr, #-4 +ldr s0, #8 +ldr d0, #1048572 +ldr q0, #-1048576 +prfm pldl1strm, #0 +prfm #22, #0 + +#------------------------------------------------------------------------------ +# Load/store exclusive +#------------------------------------------------------------------------------ + +stxrb w18, w8, [sp] +stxrh w24, w15, [x16] +stxr w5, w6, [x17] +stxr w1, x10, [x21] +ldxrb w30, [x0] +ldxrh w17, [x4] +ldxr w22, [sp] +ldxr x11, [x29] +ldxr x11, [x29] +ldxr x11, [x29] +stxp w12, w11, w10, [sp] +stxp wzr, x27, x9, [x12] +ldxp w0, wzr, [sp] +ldxp x17, x0, [x18] +ldxp x17, x0, [x18] +stlxrb w12, w22, [x0] +stlxrh w10, w1, [x1] +stlxr w9, w2, [x2] +stlxr w9, x3, [sp] +ldaxrb w8, [x4] +ldaxrh w7, [x5] +ldaxr w6, [sp] +ldaxr x5, [x6] +ldaxr x5, [x6] +ldaxr x5, [x6] +stlxp w4, w5, w6, [sp] +stlxp wzr, x6, x7, [x1] +ldaxp w5, w18, [sp] +ldaxp x6, x19, [x22] +ldaxp x6, x19, [x22] +stlrb w24, [sp] +stlrh w25, [x30] +stlr w26, [x29] +stlr x27, [x28] +stlr x27, [x28] +stlr x27, [x28] +ldarb w23, [sp] +ldarh w22, [x30] +ldar wzr, [x29] +ldar x21, [x28] +ldar x21, [x28] +ldar x21, [x28] + +#------------------------------------------------------------------------------ +# Load/store (unscaled immediate) +#------------------------------------------------------------------------------ + +sturb w9, [sp] +sturh wzr, [x12, #255] +stur w16, [x0, #-256] +stur x28, [x14, #1] +ldurb w1, [x20, #255] +ldurh w20, [x1, #255] +ldur w12, [sp, #255] +ldur xzr, [x12, #255] +ldursb x9, [x7, #-256] +ldursh x17, [x19, #-256] +ldursw x20, [x15, #-256] +prfum pldl2keep, [sp, #-256] +ldursb w19, [x1, #-256] +ldursh w15, [x21, #-256] +stur b0, [sp, #1] +stur h12, [x12, #-1] +stur s15, [x0, #255] +stur d31, [x5, #25] +stur q9, [x5] +ldur b3, [sp] +ldur h5, [x4, #-256] +ldur s7, [x12, #-1] +ldur d11, [x19, #4] +ldur q13, [x1, #2] + +#------------------------------------------------------------------------------ +# Load/store (immediate post-indexed) +#------------------------------------------------------------------------------ + +strb w9, [x2], #255 +strb w10, [x3], #1 +strb w10, [x3], #-256 +strh w9, [x2], #255 +strh w9, [x2], #1 +strh w10, [x3], #-256 +str w19, [sp], #255 +str w20, [x30], #1 +str w21, [x12], #-256 +str xzr, [x9], #255 +str x2, [x3], #1 +str x19, [x12], #-256 +ldrb w9, [x2], #255 +ldrb w10, [x3], #1 +ldrb w10, [x3], #-256 +ldrh w9, [x2], #255 +ldrh w9, [x2], #1 +ldrh w10, [x3], #-256 +ldr w19, [sp], #255 +ldr w20, [x30], #1 +ldr w21, [x12], #-256 +ldr xzr, [x9], #255 +ldr x2, [x3], #1 +ldr x19, [x12], #-256 +ldrsb xzr, [x9], #255 +ldrsb x2, [x3], #1 +ldrsb x19, [x12], #-256 +ldrsh xzr, [x9], #255 +ldrsh x2, [x3], #1 +ldrsh x19, [x12], #-256 +ldrsw xzr, [x9], #255 +ldrsw x2, [x3], #1 +ldrsw x19, [x12], #-256 +ldrsb wzr, [x9], #255 +ldrsb w2, [x3], #1 +ldrsb w19, [x12], #-256 +ldrsh wzr, [x9], #255 +ldrsh w2, [x3], #1 +ldrsh w19, [x12], #-256 +str b0, [x0], #255 +str b3, [x3], #1 +str b5, [sp], #-256 +str h10, [x10], #255 +str h13, [x23], #1 +str h15, [sp], #-256 +str s20, [x20], #255 +str s23, [x23], #1 +str s25, [x0], #-256 +str d20, [x20], #255 +str d23, [x23], #1 +str d25, [x0], #-256 +ldr b0, [x0], #255 +ldr b3, [x3], #1 +ldr b5, [sp], #-256 +ldr h10, [x10], #255 +ldr h13, [x23], #1 +ldr h15, [sp], #-256 +ldr s20, [x20], #255 +ldr s23, [x23], #1 +ldr s25, [x0], #-256 +ldr d20, [x20], #255 +ldr d23, [x23], #1 +ldr d25, [x0], #-256 +ldr q20, [x1], #255 +ldr q23, [x9], #1 +ldr q25, [x20], #-256 +str q10, [x1], #255 +str q22, [sp], #1 +str q21, [x20], #-256 + +#------------------------------------------------------------------------------- +# Load-store register (immediate pre-indexed) +#------------------------------------------------------------------------------- + +ldr x3, [x4, #0]! +strb w9, [x2, #255]! +strb w10, [x3, #1]! +strb w10, [x3, #-256]! +strh w9, [x2, #255]! +strh w9, [x2, #1]! +strh w10, [x3, #-256]! +str w19, [sp, #255]! +str w20, [x30, #1]! +str w21, [x12, #-256]! +str xzr, [x9, #255]! +str x2, [x3, #1]! +str x19, [x12, #-256]! +ldrb w9, [x2, #255]! +ldrb w10, [x3, #1]! +ldrb w10, [x3, #-256]! +ldrh w9, [x2, #255]! +ldrh w9, [x2, #1]! +ldrh w10, [x3, #-256]! +ldr w19, [sp, #255]! +ldr w20, [x30, #1]! +ldr w21, [x12, #-256]! +ldr xzr, [x9, #255]! +ldr x2, [x3, #1]! +ldr x19, [x12, #-256]! +ldrsb xzr, [x9, #255]! +ldrsb x2, [x3, #1]! +ldrsb x19, [x12, #-256]! +ldrsh xzr, [x9, #255]! +ldrsh x2, [x3, #1]! +ldrsh x19, [x12, #-256]! +ldrsw xzr, [x9, #255]! +ldrsw x2, [x3, #1]! +ldrsw x19, [x12, #-256]! +ldrsb wzr, [x9, #255]! +ldrsb w2, [x3, #1]! +ldrsb w19, [x12, #-256]! +ldrsh wzr, [x9, #255]! +ldrsh w2, [x3, #1]! +ldrsh w19, [x12, #-256]! +str b0, [x0, #255]! +str b3, [x3, #1]! +str b5, [sp, #-256]! +str h10, [x10, #255]! +str h13, [x23, #1]! +str h15, [sp, #-256]! +str s20, [x20, #255]! +str s23, [x23, #1]! +str s25, [x0, #-256]! +str d20, [x20, #255]! +str d23, [x23, #1]! +str d25, [x0, #-256]! +ldr b0, [x0, #255]! +ldr b3, [x3, #1]! +ldr b5, [sp, #-256]! +ldr h10, [x10, #255]! +ldr h13, [x23, #1]! +ldr h15, [sp, #-256]! +ldr s20, [x20, #255]! +ldr s23, [x23, #1]! +ldr s25, [x0, #-256]! +ldr d20, [x20, #255]! +ldr d23, [x23, #1]! +ldr d25, [x0, #-256]! +ldr q20, [x1, #255]! +ldr q23, [x9, #1]! +ldr q25, [x20, #-256]! +str q10, [x1, #255]! +str q22, [sp, #1]! +str q21, [x20, #-256]! + +#------------------------------------------------------------------------------ +# Load/store (unprivileged) +#------------------------------------------------------------------------------ + +sttrb w9, [sp] +sttrh wzr, [x12, #255] +sttr w16, [x0, #-256] +sttr x28, [x14, #1] +ldtrb w1, [x20, #255] +ldtrh w20, [x1, #255] +ldtr w12, [sp, #255] +ldtr xzr, [x12, #255] +ldtrsb x9, [x7, #-256] +ldtrsh x17, [x19, #-256] +ldtrsw x20, [x15, #-256] +ldtrsb w19, [x1, #-256] +ldtrsh w15, [x21, #-256] + +#------------------------------------------------------------------------------ +# Load/store (unsigned immediate) +#------------------------------------------------------------------------------ + +ldr x4, [x29] +ldr x30, [x12, #32760] +ldr x20, [sp, #8] +ldr xzr, [sp] +ldr w2, [sp] +ldr w17, [sp, #16380] +ldr w13, [x2, #4] +ldrsw x2, [x5, #4] +ldrsw x23, [sp, #16380] +ldrh w2, [x4] +ldrsh w23, [x6, #8190] +ldrsh wzr, [sp, #2] +ldrsh x29, [x2, #2] +ldrb w26, [x3, #121] +ldrb w12, [x2] +ldrsb w27, [sp, #4095] +ldrsb xzr, [x15] +str x30, [sp] +str w20, [x4, #16380] +strh w17, [sp, #8190] +strb w23, [x3, #4095] +strb wzr, [x2] +ldr b31, [sp, #4095] +ldr h20, [x2, #8190] +ldr s10, [x19, #16380] +ldr d3, [x10, #32760] +str q12, [sp, #65520] + +#------------------------------------------------------------------------------ +# Load/store (register offset) +#------------------------------------------------------------------------------ + +ldrb w3, [sp, x5] +ldrb w9, [x27, x6] +ldrsb w10, [x30, x7] +ldrb w11, [x29, x3, sxtx] +strb w12, [x28, xzr, sxtx] +ldrb w14, [x26, w6, uxtw] +ldrsb w15, [x25, w7, uxtw] +ldrb w17, [x23, w9, sxtw] +ldrsb x18, [x22, w10, sxtw] +ldrsh w3, [sp, x5] +ldrsh w9, [x27, x6] +ldrh w10, [x30, x7, lsl #1] +strh w11, [x29, x3, sxtx] +ldrh w12, [x28, xzr, sxtx] +ldrsh x13, [x27, x5, sxtx #1] +ldrh w14, [x26, w6, uxtw] +ldrh w15, [x25, w7, uxtw] +ldrsh w16, [x24, w8, uxtw #1] +ldrh w17, [x23, w9, sxtw] +ldrh w18, [x22, w10, sxtw] +strh w19, [x21, wzr, sxtw #1] +ldr w3, [sp, x5] +ldr s9, [x27, x6] +ldr w10, [x30, x7, lsl #2] +ldr w11, [x29, x3, sxtx] +str s12, [x28, xzr, sxtx] +str w13, [x27, x5, sxtx #2] +str w14, [x26, w6, uxtw] +ldr w15, [x25, w7, uxtw] +ldr w16, [x24, w8, uxtw #2] +ldrsw x17, [x23, w9, sxtw] +ldr w18, [x22, w10, sxtw] +ldrsw x19, [x21, wzr, sxtw #2] +ldr x3, [sp, x5] +str x9, [x27, x6] +ldr d10, [x30, x7, lsl #3] +str x11, [x29, x3, sxtx] +ldr x12, [x28, xzr, sxtx] +ldr x13, [x27, x5, sxtx #3] +prfm pldl1keep, [x26, w6, uxtw] +ldr x15, [x25, w7, uxtw] +ldr x16, [x24, w8, uxtw #3] +ldr x17, [x23, w9, sxtw] +ldr x18, [x22, w10, sxtw] +str d19, [x21, wzr, sxtw #3] +ldr q3, [sp, x5] +ldr q9, [x27, x6] +ldr q10, [x30, x7, lsl #4] +str q11, [x29, x3, sxtx] +str q12, [x28, xzr, sxtx] +str q13, [x27, x5, sxtx #4] +ldr q14, [x26, w6, uxtw] +ldr q15, [x25, w7, uxtw] +ldr q16, [x24, w8, uxtw #4] +ldr q17, [x23, w9, sxtw] +str q18, [x22, w10, sxtw] +ldr q19, [x21, wzr, sxtw #4] + +#------------------------------------------------------------------------------ +# Load/store register pair (offset) +#------------------------------------------------------------------------------ + +ldp w3, w5, [sp] +stp wzr, w9, [sp, #252] +ldp w2, wzr, [sp, #-256] +ldp w9, w10, [sp, #4] +ldpsw x9, x10, [sp, #4] +ldpsw x9, x10, [x2, #-256] +ldpsw x20, x30, [sp, #252] +ldp x21, x29, [x2, #504] +ldp x22, x23, [x3, #-512] +ldp x24, x25, [x4, #8] +ldp s29, s28, [sp, #252] +stp s27, s26, [sp, #-256] +ldp s1, s2, [x3, #44] +stp d3, d5, [x9, #504] +stp d7, d11, [x10, #-512] +ldp d2, d3, [x30, #-8] +stp q3, q5, [sp] +stp q17, q19, [sp, #1008] +ldp q23, q29, [x1, #-1024] + +#------------------------------------------------------------------------------ +# Load/store register pair (post-indexed) +#------------------------------------------------------------------------------ + +ldp w3, w5, [sp], #0 +stp wzr, w9, [sp], #252 +ldp w2, wzr, [sp], #-256 +ldp w9, w10, [sp], #4 +ldpsw x9, x10, [sp], #4 +ldpsw x9, x10, [x2], #-256 +ldpsw x20, x30, [sp], #252 +ldp x21, x29, [x2], #504 +ldp x22, x23, [x3], #-512 +ldp x24, x25, [x4], #8 +ldp s29, s28, [sp], #252 +stp s27, s26, [sp], #-256 +ldp s1, s2, [x3], #44 +stp d3, d5, [x9], #504 +stp d7, d11, [x10], #-512 +ldp d2, d3, [x30], #-8 +stp q3, q5, [sp], #0 +stp q17, q19, [sp], #1008 +ldp q23, q29, [x1], #-1024 + +#------------------------------------------------------------------------------ +# Load/store register pair (pre-indexed) +#------------------------------------------------------------------------------ + +ldp w3, w5, [sp, #0]! +stp wzr, w9, [sp, #252]! +ldp w2, wzr, [sp, #-256]! +ldp w9, w10, [sp, #4]! +ldpsw x9, x10, [sp, #4]! +ldpsw x9, x10, [x2, #-256]! +ldpsw x20, x30, [sp, #252]! +ldp x21, x29, [x2, #504]! +ldp x22, x23, [x3, #-512]! +ldp x24, x25, [x4, #8]! +ldp s29, s28, [sp, #252]! +stp s27, s26, [sp, #-256]! +ldp s1, s2, [x3, #44]! +stp d3, d5, [x9, #504]! +stp d7, d11, [x10, #-512]! +ldp d2, d3, [x30, #-8]! +stp q3, q5, [sp, #0]! +stp q17, q19, [sp, #1008]! +ldp q23, q29, [x1, #-1024]! + +#------------------------------------------------------------------------------ +# Load/store register pair (offset) +#------------------------------------------------------------------------------ + +ldnp w3, w5, [sp] +stnp wzr, w9, [sp, #252] +ldnp w2, wzr, [sp, #-256] +ldnp w9, w10, [sp, #4] +ldnp x21, x29, [x2, #504] +ldnp x22, x23, [x3, #-512] +ldnp x24, x25, [x4, #8] +ldnp s29, s28, [sp, #252] +stnp s27, s26, [sp, #-256] +ldnp s1, s2, [x3, #44] +stnp d3, d5, [x9, #504] +stnp d7, d11, [x10, #-512] +ldnp d2, d3, [x30, #-8] +stnp q3, q5, [sp] +stnp q17, q19, [sp, #1008] +ldnp q23, q29, [x1, #-1024] + +#------------------------------------------------------------------------------ +# Logical (immediate) +#------------------------------------------------------------------------------ + +mov w3, #983055 +mov x10, #-6148914691236517206 + +#------------------------------------------------------------------------------ +# Logical (shifted register) +#------------------------------------------------------------------------------ + +and w12, w23, w21 +and w16, w15, w1, lsl #1 +and w9, w4, w10, lsl #31 +and w3, w30, w11 +and x3, x5, x7, lsl #63 +and x5, x14, x19, asr #4 +and w3, w17, w19, ror #31 +and w0, w2, wzr, lsr #17 +and w3, w30, w11, asr #2 +and xzr, x4, x26 +and w3, wzr, w20, ror #2 +and x7, x20, xzr, asr #63 +bic x13, x20, x14, lsl #47 +bic w2, w7, w9 +orr w2, w7, w0, asr #31 +orr x8, x9, x10, lsl #12 +orn x3, x5, x7, asr #2 +orn w2, w5, w29 +ands w7, wzr, w9, lsl #1 +ands x3, x5, x20, ror #63 +bics w3, w5, w7 +bics x3, xzr, x3, lsl #1 +tst w3, w7, lsl #31 +tst x2, x20, asr #2 +mov x3, x6 +mov x3, xzr +mov wzr, w2 +mov w3, w5 + +#------------------------------------------------------------------------------ +# Move wide (immediate) +#------------------------------------------------------------------------------ + +movz w2, #0, lsl #16 +mov w2, #-1235 +mov x2, #5299989643264 +mov x2, #0 +movk w3, #0 +movz x4, #0, lsl #16 +movk w5, #0, lsl #16 +movz x6, #0, lsl #32 +movk x7, #0, lsl #32 +movz x8, #0, lsl #48 +movk x9, #0, lsl #48 + +#------------------------------------------------------------------------------ +# PC-relative addressing +#------------------------------------------------------------------------------ + +adr x2, #1600 +adrp x21, #6553600 +adr x0, #262144 + +#------------------------------------------------------------------------------ +# Test and branch (immediate) +#------------------------------------------------------------------------------ + +tbz x12, #62, #0 +tbz x12, #62, #4 +tbz x12, #62, #-32768 +tbnz x12, #60, #32764 + +#------------------------------------------------------------------------------ +# Unconditional branch (immediate) +#------------------------------------------------------------------------------ + +b #4 +b #-4 +b #134217724 + +#------------------------------------------------------------------------------ +# Unconditional branch (register) +#------------------------------------------------------------------------------ + +br x20 +blr xzr +ret x10 +ret +eret +drps + +# CHECK: Instruction Info: +# CHECK-NEXT: [1]: #uOps +# CHECK-NEXT: [2]: Latency +# CHECK-NEXT: [3]: RThroughput +# CHECK-NEXT: [4]: MayLoad +# CHECK-NEXT: [5]: MayStore +# CHECK-NEXT: [6]: HasSideEffects (U) + +# CHECK: [1] [2] [3] [4] [5] [6] Instructions: +# CHECK-NEXT: 1 1 1.00 add w2, w3, #4095 +# CHECK-NEXT: 1 1 1.00 add w30, w29, #1, lsl #12 +# CHECK-NEXT: 1 1 1.00 add w13, w5, #4095, lsl #12 +# CHECK-NEXT: 1 1 1.00 add x5, x7, #1638 +# CHECK-NEXT: 1 1 1.00 add w20, wsp, #801 +# CHECK-NEXT: 1 1 1.00 add wsp, wsp, #1104 +# CHECK-NEXT: 1 1 1.00 add wsp, w30, #4084 +# CHECK-NEXT: 1 1 1.00 add x0, x24, #291 +# CHECK-NEXT: 1 1 1.00 add x3, x24, #4095, lsl #12 +# CHECK-NEXT: 1 1 1.00 add x8, sp, #1074 +# CHECK-NEXT: 1 1 1.00 add sp, x29, #3816 +# CHECK-NEXT: 1 1 1.00 sub w0, wsp, #4077 +# CHECK-NEXT: 1 1 1.00 sub w4, w20, #546, lsl #12 +# CHECK-NEXT: 1 1 1.00 sub sp, sp, #288 +# CHECK-NEXT: 1 1 1.00 sub wsp, w19, #16 +# CHECK-NEXT: 1 1 1.00 adds w13, w23, #291, lsl #12 +# CHECK-NEXT: 1 1 1.00 cmn w2, #4095 +# CHECK-NEXT: 1 1 1.00 adds w20, wsp, #0 +# CHECK-NEXT: 1 1 1.00 cmn x3, #1, lsl #12 +# CHECK-NEXT: 1 1 1.00 cmp sp, #20, lsl #12 +# CHECK-NEXT: 1 1 1.00 cmp x30, #4095 +# CHECK-NEXT: 1 1 1.00 subs x4, sp, #3822 +# CHECK-NEXT: 1 1 1.00 cmn w3, #291, lsl #12 +# CHECK-NEXT: 1 1 1.00 cmn wsp, #1365 +# CHECK-NEXT: 1 1 1.00 cmn sp, #1092, lsl #12 +# CHECK-NEXT: 1 1 1.00 mov sp, x30 +# CHECK-NEXT: 1 1 1.00 mov wsp, w20 +# CHECK-NEXT: 1 1 1.00 mov x11, sp +# CHECK-NEXT: 1 1 1.00 mov w24, wsp +# CHECK-NEXT: 1 1 1.00 add w3, w5, w7 +# CHECK-NEXT: 1 1 1.00 add wzr, w3, w5 +# CHECK-NEXT: 1 1 1.00 add w20, wzr, w4 +# CHECK-NEXT: 1 1 1.00 add w4, w6, wzr +# CHECK-NEXT: 1 1 1.00 add w11, w13, w15 +# CHECK-NEXT: 1 2 1.00 add w9, w3, wzr, lsl #10 +# CHECK-NEXT: 1 2 1.00 add w17, w29, w20, lsl #31 +# CHECK-NEXT: 1 2 1.00 add w21, w22, w23, lsr #0 +# CHECK-NEXT: 1 2 1.00 add w24, w25, w26, lsr #18 +# CHECK-NEXT: 1 2 1.00 add w27, w28, w29, lsr #31 +# CHECK-NEXT: 1 2 1.00 add w2, w3, w4, asr #0 +# CHECK-NEXT: 1 2 1.00 add w5, w6, w7, asr #21 +# CHECK-NEXT: 1 2 1.00 add w8, w9, w10, asr #31 +# CHECK-NEXT: 1 1 1.00 add x3, x5, x7 +# CHECK-NEXT: 1 1 1.00 add xzr, x3, x5 +# CHECK-NEXT: 1 1 1.00 add x20, xzr, x4 +# CHECK-NEXT: 1 1 1.00 add x4, x6, xzr +# CHECK-NEXT: 1 1 1.00 add x11, x13, x15 +# CHECK-NEXT: 1 2 1.00 add x9, x3, xzr, lsl #10 +# CHECK-NEXT: 1 2 1.00 add x17, x29, x20, lsl #63 +# CHECK-NEXT: 1 2 1.00 add x21, x22, x23, lsr #0 +# CHECK-NEXT: 1 2 1.00 add x24, x25, x26, lsr #18 +# CHECK-NEXT: 1 2 1.00 add x27, x28, x29, lsr #63 +# CHECK-NEXT: 1 2 1.00 add x2, x3, x4, asr #0 +# CHECK-NEXT: 1 2 1.00 add x5, x6, x7, asr #21 +# CHECK-NEXT: 1 2 1.00 add x8, x9, x10, asr #63 +# CHECK-NEXT: 1 1 1.00 adds w3, w5, w7 +# CHECK-NEXT: 1 1 1.00 cmn w3, w5 +# CHECK-NEXT: 1 1 1.00 adds w20, wzr, w4 +# CHECK-NEXT: 1 1 1.00 adds w4, w6, wzr +# CHECK-NEXT: 1 1 1.00 adds w11, w13, w15 +# CHECK-NEXT: 1 2 1.00 adds w9, w3, wzr, lsl #10 +# CHECK-NEXT: 1 2 1.00 adds w17, w29, w20, lsl #31 +# CHECK-NEXT: 1 2 1.00 adds w21, w22, w23, lsr #0 +# CHECK-NEXT: 1 2 1.00 adds w24, w25, w26, lsr #18 +# CHECK-NEXT: 1 2 1.00 adds w27, w28, w29, lsr #31 +# CHECK-NEXT: 1 2 1.00 adds w2, w3, w4, asr #0 +# CHECK-NEXT: 1 2 1.00 adds w5, w6, w7, asr #21 +# CHECK-NEXT: 1 2 1.00 adds w8, w9, w10, asr #31 +# CHECK-NEXT: 1 1 1.00 adds x3, x5, x7 +# CHECK-NEXT: 1 1 1.00 cmn x3, x5 +# CHECK-NEXT: 1 1 1.00 adds x20, xzr, x4 +# CHECK-NEXT: 1 1 1.00 adds x4, x6, xzr +# CHECK-NEXT: 1 1 1.00 adds x11, x13, x15 +# CHECK-NEXT: 1 2 1.00 adds x9, x3, xzr, lsl #10 +# CHECK-NEXT: 1 2 1.00 adds x17, x29, x20, lsl #63 +# CHECK-NEXT: 1 2 1.00 adds x21, x22, x23, lsr #0 +# CHECK-NEXT: 1 2 1.00 adds x24, x25, x26, lsr #18 +# CHECK-NEXT: 1 2 1.00 adds x27, x28, x29, lsr #63 +# CHECK-NEXT: 1 2 1.00 adds x2, x3, x4, asr #0 +# CHECK-NEXT: 1 2 1.00 adds x5, x6, x7, asr #21 +# CHECK-NEXT: 1 2 1.00 adds x8, x9, x10, asr #63 +# CHECK-NEXT: 1 1 1.00 sub w3, w5, w7 +# CHECK-NEXT: 1 1 1.00 sub wzr, w3, w5 +# CHECK-NEXT: 1 1 1.00 sub w4, w6, wzr +# CHECK-NEXT: 1 1 1.00 sub w11, w13, w15 +# CHECK-NEXT: 1 2 1.00 sub w9, w3, wzr, lsl #10 +# CHECK-NEXT: 1 2 1.00 sub w17, w29, w20, lsl #31 +# CHECK-NEXT: 1 2 1.00 sub w21, w22, w23, lsr #0 +# CHECK-NEXT: 1 2 1.00 sub w24, w25, w26, lsr #18 +# CHECK-NEXT: 1 2 1.00 sub w27, w28, w29, lsr #31 +# CHECK-NEXT: 1 2 1.00 sub w2, w3, w4, asr #0 +# CHECK-NEXT: 1 2 1.00 sub w5, w6, w7, asr #21 +# CHECK-NEXT: 1 2 1.00 sub w8, w9, w10, asr #31 +# CHECK-NEXT: 1 1 1.00 sub x3, x5, x7 +# CHECK-NEXT: 1 1 1.00 sub xzr, x3, x5 +# CHECK-NEXT: 1 1 1.00 sub x4, x6, xzr +# CHECK-NEXT: 1 1 1.00 sub x11, x13, x15 +# CHECK-NEXT: 1 2 1.00 sub x9, x3, xzr, lsl #10 +# CHECK-NEXT: 1 2 1.00 sub x17, x29, x20, lsl #63 +# CHECK-NEXT: 1 2 1.00 sub x21, x22, x23, lsr #0 +# CHECK-NEXT: 1 2 1.00 sub x24, x25, x26, lsr #18 +# CHECK-NEXT: 1 2 1.00 sub x27, x28, x29, lsr #63 +# CHECK-NEXT: 1 2 1.00 sub x2, x3, x4, asr #0 +# CHECK-NEXT: 1 2 1.00 sub x5, x6, x7, asr #21 +# CHECK-NEXT: 1 2 1.00 sub x8, x9, x10, asr #63 +# CHECK-NEXT: 1 1 1.00 subs w3, w5, w7 +# CHECK-NEXT: 1 1 1.00 cmp w3, w5 +# CHECK-NEXT: 1 1 1.00 subs w4, w6, wzr +# CHECK-NEXT: 1 1 1.00 subs w11, w13, w15 +# CHECK-NEXT: 1 2 1.00 subs w9, w3, wzr, lsl #10 +# CHECK-NEXT: 1 2 1.00 subs w17, w29, w20, lsl #31 +# CHECK-NEXT: 1 2 1.00 subs w21, w22, w23, lsr #0 +# CHECK-NEXT: 1 2 1.00 subs w24, w25, w26, lsr #18 +# CHECK-NEXT: 1 2 1.00 subs w27, w28, w29, lsr #31 +# CHECK-NEXT: 1 2 1.00 subs w2, w3, w4, asr #0 +# CHECK-NEXT: 1 2 1.00 subs w5, w6, w7, asr #21 +# CHECK-NEXT: 1 2 1.00 subs w8, w9, w10, asr #31 +# CHECK-NEXT: 1 1 1.00 subs x3, x5, x7 +# CHECK-NEXT: 1 1 1.00 cmp x3, x5 +# CHECK-NEXT: 1 1 1.00 subs x4, x6, xzr +# CHECK-NEXT: 1 1 1.00 subs x11, x13, x15 +# CHECK-NEXT: 1 2 1.00 subs x9, x3, xzr, lsl #10 +# CHECK-NEXT: 1 2 1.00 subs x17, x29, x20, lsl #63 +# CHECK-NEXT: 1 2 1.00 subs x21, x22, x23, lsr #0 +# CHECK-NEXT: 1 2 1.00 subs x24, x25, x26, lsr #18 +# CHECK-NEXT: 1 2 1.00 subs x27, x28, x29, lsr #63 +# CHECK-NEXT: 1 2 1.00 subs x2, x3, x4, asr #0 +# CHECK-NEXT: 1 2 1.00 subs x5, x6, x7, asr #21 +# CHECK-NEXT: 1 2 1.00 subs x8, x9, x10, asr #63 +# CHECK-NEXT: 1 1 1.00 cmn wzr, w4 +# CHECK-NEXT: 1 1 1.00 cmn w5, wzr +# CHECK-NEXT: 1 1 1.00 cmn w6, w7 +# CHECK-NEXT: 1 2 1.00 cmn w8, w9, lsl #15 +# CHECK-NEXT: 1 2 1.00 cmn w10, w11, lsl #31 +# CHECK-NEXT: 1 2 1.00 cmn w12, w13, lsr #0 +# CHECK-NEXT: 1 2 1.00 cmn w14, w15, lsr #21 +# CHECK-NEXT: 1 2 1.00 cmn w16, w17, lsr #31 +# CHECK-NEXT: 1 2 1.00 cmn w18, w19, asr #0 +# CHECK-NEXT: 1 2 1.00 cmn w20, w21, asr #22 +# CHECK-NEXT: 1 2 1.00 cmn w22, w23, asr #31 +# CHECK-NEXT: 1 1 1.00 cmn x0, x3 +# CHECK-NEXT: 1 1 1.00 cmn xzr, x4 +# CHECK-NEXT: 1 1 1.00 cmn x5, xzr +# CHECK-NEXT: 1 1 1.00 cmn x6, x7 +# CHECK-NEXT: 1 2 1.00 cmn x8, x9, lsl #15 +# CHECK-NEXT: 1 2 1.00 cmn x10, x11, lsl #63 +# CHECK-NEXT: 1 2 1.00 cmn x12, x13, lsr #0 +# CHECK-NEXT: 1 2 1.00 cmn x14, x15, lsr #41 +# CHECK-NEXT: 1 2 1.00 cmn x16, x17, lsr #63 +# CHECK-NEXT: 1 2 1.00 cmn x18, x19, asr #0 +# CHECK-NEXT: 1 2 1.00 cmn x20, x21, asr #55 +# CHECK-NEXT: 1 2 1.00 cmn x22, x23, asr #63 +# CHECK-NEXT: 1 1 1.00 cmp w0, w3 +# CHECK-NEXT: 1 1 1.00 cmp wzr, w4 +# CHECK-NEXT: 1 1 1.00 cmp w5, wzr +# CHECK-NEXT: 1 1 1.00 cmp w6, w7 +# CHECK-NEXT: 1 2 1.00 cmp w8, w9, lsl #15 +# CHECK-NEXT: 1 2 1.00 cmp w10, w11, lsl #31 +# CHECK-NEXT: 1 2 1.00 cmp w12, w13, lsr #0 +# CHECK-NEXT: 1 2 1.00 cmp w14, w15, lsr #21 +# CHECK-NEXT: 1 2 1.00 cmp w18, w19, asr #0 +# CHECK-NEXT: 1 2 1.00 cmp w20, w21, asr #22 +# CHECK-NEXT: 1 2 1.00 cmp w22, w23, asr #31 +# CHECK-NEXT: 1 1 1.00 cmp x0, x3 +# CHECK-NEXT: 1 1 1.00 cmp xzr, x4 +# CHECK-NEXT: 1 1 1.00 cmp x5, xzr +# CHECK-NEXT: 1 1 1.00 cmp x6, x7 +# CHECK-NEXT: 1 2 1.00 cmp x8, x9, lsl #15 +# CHECK-NEXT: 1 2 1.00 cmp x10, x11, lsl #63 +# CHECK-NEXT: 1 2 1.00 cmp x12, x13, lsr #0 +# CHECK-NEXT: 1 2 1.00 cmp x14, x15, lsr #41 +# CHECK-NEXT: 1 2 1.00 cmp x16, x17, lsr #63 +# CHECK-NEXT: 1 2 1.00 cmp x18, x19, asr #0 +# CHECK-NEXT: 1 2 1.00 cmp x20, x21, asr #55 +# CHECK-NEXT: 1 2 1.00 cmp x22, x23, asr #63 +# CHECK-NEXT: 1 1 1.00 cmp wzr, w0 +# CHECK-NEXT: 1 1 1.00 cmp xzr, x0 +# CHECK-NEXT: 1 1 1.00 adc w29, w27, w25 +# CHECK-NEXT: 1 1 1.00 adc wzr, w3, w4 +# CHECK-NEXT: 1 1 1.00 adc w9, wzr, w10 +# CHECK-NEXT: 1 1 1.00 adc w20, w0, wzr +# CHECK-NEXT: 1 1 1.00 adc x29, x27, x25 +# CHECK-NEXT: 1 1 1.00 adc xzr, x3, x4 +# CHECK-NEXT: 1 1 1.00 adc x9, xzr, x10 +# CHECK-NEXT: 1 1 1.00 adc x20, x0, xzr +# CHECK-NEXT: 1 1 1.00 adcs w29, w27, w25 +# CHECK-NEXT: 1 1 1.00 adcs wzr, w3, w4 +# CHECK-NEXT: 1 1 1.00 adcs w9, wzr, w10 +# CHECK-NEXT: 1 1 1.00 adcs w20, w0, wzr +# CHECK-NEXT: 1 1 1.00 adcs x29, x27, x25 +# CHECK-NEXT: 1 1 1.00 adcs xzr, x3, x4 +# CHECK-NEXT: 1 1 1.00 adcs x9, xzr, x10 +# CHECK-NEXT: 1 1 1.00 adcs x20, x0, xzr +# CHECK-NEXT: 1 1 1.00 sbc w29, w27, w25 +# CHECK-NEXT: 1 1 1.00 sbc wzr, w3, w4 +# CHECK-NEXT: 1 1 1.00 ngc w9, w10 +# CHECK-NEXT: 1 1 1.00 sbc w20, w0, wzr +# CHECK-NEXT: 1 1 1.00 sbc x29, x27, x25 +# CHECK-NEXT: 1 1 1.00 sbc xzr, x3, x4 +# CHECK-NEXT: 1 1 1.00 ngc x9, x10 +# CHECK-NEXT: 1 1 1.00 sbc x20, x0, xzr +# CHECK-NEXT: 1 1 1.00 sbcs w29, w27, w25 +# CHECK-NEXT: 1 1 1.00 sbcs wzr, w3, w4 +# CHECK-NEXT: 1 1 1.00 ngcs w9, w10 +# CHECK-NEXT: 1 1 1.00 sbcs w20, w0, wzr +# CHECK-NEXT: 1 1 1.00 sbcs x29, x27, x25 +# CHECK-NEXT: 1 1 1.00 sbcs xzr, x3, x4 +# CHECK-NEXT: 1 1 1.00 ngcs x9, x10 +# CHECK-NEXT: 1 1 1.00 sbcs x20, x0, xzr +# CHECK-NEXT: 1 1 1.00 ngc w3, w12 +# CHECK-NEXT: 1 1 1.00 ngc wzr, w9 +# CHECK-NEXT: 1 1 1.00 ngc w23, wzr +# CHECK-NEXT: 1 1 1.00 ngc x29, x30 +# CHECK-NEXT: 1 1 1.00 ngc xzr, x0 +# CHECK-NEXT: 1 1 1.00 ngc x0, xzr +# CHECK-NEXT: 1 1 1.00 ngcs w3, w12 +# CHECK-NEXT: 1 1 1.00 ngcs wzr, w9 +# CHECK-NEXT: 1 1 1.00 ngcs w23, wzr +# CHECK-NEXT: 1 1 1.00 ngcs x29, x30 +# CHECK-NEXT: 1 1 1.00 ngcs xzr, x0 +# CHECK-NEXT: 1 1 1.00 ngcs x0, xzr +# CHECK-NEXT: 1 2 1.00 sbfx x1, x2, #3, #2 +# CHECK-NEXT: 1 2 1.00 asr x3, x4, #63 +# CHECK-NEXT: 1 2 1.00 asr wzr, wzr, #31 +# CHECK-NEXT: 1 2 1.00 sbfx w12, w9, #0, #1 +# CHECK-NEXT: 1 2 1.00 ubfiz x4, x5, #52, #11 +# CHECK-NEXT: 1 2 1.00 ubfx xzr, x4, #0, #1 +# CHECK-NEXT: 1 2 1.00 ubfiz x4, xzr, #1, #6 +# CHECK-NEXT: 1 2 1.00 lsr x5, x6, #12 +# CHECK-NEXT: 1 2 1.00 bfi x4, x5, #52, #11 +# CHECK-NEXT: 1 2 1.00 bfxil xzr, x4, #0, #1 +# CHECK-NEXT: 1 2 1.00 bfc x4, #1, #6 +# CHECK-NEXT: 1 2 1.00 bfxil x5, x6, #12, #52 +# CHECK-NEXT: 1 2 1.00 sxtb w1, w2 +# CHECK-NEXT: 1 2 1.00 sxtb xzr, w3 +# CHECK-NEXT: 1 2 1.00 sxth w9, w10 +# CHECK-NEXT: 1 2 1.00 sxth x0, w1 +# CHECK-NEXT: 1 2 1.00 sxtw x3, w30 +# CHECK-NEXT: 1 2 1.00 uxtb w1, w2 +# CHECK-NEXT: 1 2 1.00 uxth w9, w10 +# CHECK-NEXT: 1 2 1.00 ubfx x3, x30, #0, #32 +# CHECK-NEXT: 1 2 1.00 asr w3, w2, #0 +# CHECK-NEXT: 1 2 1.00 asr w9, w10, #31 +# CHECK-NEXT: 1 2 1.00 asr x20, x21, #63 +# CHECK-NEXT: 1 2 1.00 asr w1, wzr, #3 +# CHECK-NEXT: 1 2 1.00 lsr w3, w2, #0 +# CHECK-NEXT: 1 2 1.00 lsr w9, w10, #31 +# CHECK-NEXT: 1 2 1.00 lsr x20, x21, #63 +# CHECK-NEXT: 1 2 1.00 lsr wzr, wzr, #3 +# CHECK-NEXT: 1 2 1.00 lsr w3, w2, #0 +# CHECK-NEXT: 1 2 1.00 lsl w9, w10, #31 +# CHECK-NEXT: 1 2 1.00 lsl x20, x21, #63 +# CHECK-NEXT: 1 2 1.00 lsl w1, wzr, #3 +# CHECK-NEXT: 1 2 1.00 sbfx w9, w10, #0, #1 +# CHECK-NEXT: 1 2 1.00 sbfiz x2, x3, #63, #1 +# CHECK-NEXT: 1 2 1.00 asr x19, x20, #0 +# CHECK-NEXT: 1 2 1.00 sbfiz x9, x10, #5, #59 +# CHECK-NEXT: 1 2 1.00 asr w9, w10, #0 +# CHECK-NEXT: 1 2 1.00 sbfiz w11, w12, #31, #1 +# CHECK-NEXT: 1 2 1.00 sbfiz w13, w14, #29, #3 +# CHECK-NEXT: 1 2 1.00 sbfiz xzr, xzr, #10, #11 +# CHECK-NEXT: 1 2 1.00 sbfx w9, w10, #0, #1 +# CHECK-NEXT: 1 2 1.00 asr x2, x3, #63 +# CHECK-NEXT: 1 2 1.00 asr x19, x20, #0 +# CHECK-NEXT: 1 2 1.00 asr x9, x10, #5 +# CHECK-NEXT: 1 2 1.00 asr w9, w10, #0 +# CHECK-NEXT: 1 2 1.00 asr w11, w12, #31 +# CHECK-NEXT: 1 2 1.00 asr w13, w14, #29 +# CHECK-NEXT: 1 2 1.00 sbfx xzr, xzr, #10, #11 +# CHECK-NEXT: 1 2 1.00 bfxil w9, w10, #0, #1 +# CHECK-NEXT: 1 2 1.00 bfi x2, x3, #63, #1 +# CHECK-NEXT: 1 2 1.00 bfxil x19, x20, #0, #64 +# CHECK-NEXT: 1 2 1.00 bfi x9, x10, #5, #59 +# CHECK-NEXT: 1 2 1.00 bfxil w9, w10, #0, #32 +# CHECK-NEXT: 1 2 1.00 bfi w11, w12, #31, #1 +# CHECK-NEXT: 1 2 1.00 bfi w13, w14, #29, #3 +# CHECK-NEXT: 1 2 1.00 bfc xzr, #10, #11 +# CHECK-NEXT: 1 2 1.00 bfxil w9, w10, #0, #1 +# CHECK-NEXT: 1 2 1.00 bfxil x2, x3, #63, #1 +# CHECK-NEXT: 1 2 1.00 bfxil x19, x20, #0, #64 +# CHECK-NEXT: 1 2 1.00 bfxil x9, x10, #5, #59 +# CHECK-NEXT: 1 2 1.00 bfxil w9, w10, #0, #32 +# CHECK-NEXT: 1 2 1.00 bfxil w11, w12, #31, #1 +# CHECK-NEXT: 1 2 1.00 bfxil w13, w14, #29, #3 +# CHECK-NEXT: 1 2 1.00 bfxil xzr, xzr, #10, #11 +# CHECK-NEXT: 1 2 1.00 ubfx w9, w10, #0, #1 +# CHECK-NEXT: 1 2 1.00 lsl x2, x3, #63 +# CHECK-NEXT: 1 2 1.00 lsr x19, x20, #0 +# CHECK-NEXT: 1 2 1.00 lsl x9, x10, #5 +# CHECK-NEXT: 1 2 1.00 lsr w9, w10, #0 +# CHECK-NEXT: 1 2 1.00 lsl w11, w12, #31 +# CHECK-NEXT: 1 2 1.00 lsl w13, w14, #29 +# CHECK-NEXT: 1 2 1.00 ubfiz xzr, xzr, #10, #11 +# CHECK-NEXT: 1 2 1.00 ubfx w9, w10, #0, #1 +# CHECK-NEXT: 1 2 1.00 lsr x2, x3, #63 +# CHECK-NEXT: 1 2 1.00 lsr x19, x20, #0 +# CHECK-NEXT: 1 2 1.00 lsr x9, x10, #5 +# CHECK-NEXT: 1 2 1.00 lsr w9, w10, #0 +# CHECK-NEXT: 1 2 1.00 lsr w11, w12, #31 +# CHECK-NEXT: 1 2 1.00 lsr w13, w14, #29 +# CHECK-NEXT: 1 2 1.00 ubfx xzr, xzr, #10, #11 +# CHECK-NEXT: 1 1 1.00 cbz w5, #4 +# CHECK-NEXT: 1 1 1.00 cbz x5, #0 +# CHECK-NEXT: 1 1 1.00 cbnz x2, #-4 +# CHECK-NEXT: 1 1 1.00 cbnz x26, #1048572 +# CHECK-NEXT: 1 1 1.00 cbz wzr, #0 +# CHECK-NEXT: 1 1 1.00 cbnz xzr, #0 +# CHECK-NEXT: 1 1 1.00 b.ne #4 +# CHECK-NEXT: 1 1 1.00 b.ge #1048572 +# CHECK-NEXT: 1 1 1.00 b.ge #-4 +# CHECK-NEXT: 1 1 1.00 ccmp w1, #31, #0, eq +# CHECK-NEXT: 1 1 1.00 ccmp w3, #0, #15, hs +# CHECK-NEXT: 1 1 1.00 ccmp wzr, #15, #13, hs +# CHECK-NEXT: 1 1 1.00 ccmp x9, #31, #0, le +# CHECK-NEXT: 1 1 1.00 ccmp x3, #0, #15, gt +# CHECK-NEXT: 1 1 1.00 ccmp xzr, #5, #7, ne +# CHECK-NEXT: 1 1 1.00 ccmn w1, #31, #0, eq +# CHECK-NEXT: 1 1 1.00 ccmn w3, #0, #15, hs +# CHECK-NEXT: 1 1 1.00 ccmn wzr, #15, #13, hs +# CHECK-NEXT: 1 1 1.00 ccmn x9, #31, #0, le +# CHECK-NEXT: 1 1 1.00 ccmn x3, #0, #15, gt +# CHECK-NEXT: 1 1 1.00 ccmn xzr, #5, #7, ne +# CHECK-NEXT: 1 1 1.00 ccmp w1, wzr, #0, eq +# CHECK-NEXT: 1 1 1.00 ccmp w3, w0, #15, hs +# CHECK-NEXT: 1 1 1.00 ccmp wzr, w15, #13, hs +# CHECK-NEXT: 1 1 1.00 ccmp x9, xzr, #0, le +# CHECK-NEXT: 1 1 1.00 ccmp x3, x0, #15, gt +# CHECK-NEXT: 1 1 1.00 ccmp xzr, x5, #7, ne +# CHECK-NEXT: 1 1 1.00 ccmn w1, wzr, #0, eq +# CHECK-NEXT: 1 1 1.00 ccmn w3, w0, #15, hs +# CHECK-NEXT: 1 1 1.00 ccmn wzr, w15, #13, hs +# CHECK-NEXT: 1 1 1.00 ccmn x9, xzr, #0, le +# CHECK-NEXT: 1 1 1.00 ccmn x3, x0, #15, gt +# CHECK-NEXT: 1 1 1.00 ccmn xzr, x5, #7, ne +# CHECK-NEXT: 1 1 1.00 csel w1, w0, w19, ne +# CHECK-NEXT: 1 1 1.00 csel wzr, w5, w9, eq +# CHECK-NEXT: 1 1 1.00 csel w9, wzr, w30, gt +# CHECK-NEXT: 1 1 1.00 csel w1, w28, wzr, mi +# CHECK-NEXT: 1 1 1.00 csel x19, x23, x29, lt +# CHECK-NEXT: 1 1 1.00 csel xzr, x3, x4, ge +# CHECK-NEXT: 1 1 1.00 csel x5, xzr, x6, hs +# CHECK-NEXT: 1 1 1.00 csel x7, x8, xzr, lo +# CHECK-NEXT: 1 1 1.00 csinc w1, w0, w19, ne +# CHECK-NEXT: 1 1 1.00 csinc wzr, w5, w9, eq +# CHECK-NEXT: 1 1 1.00 csinc w9, wzr, w30, gt +# CHECK-NEXT: 1 1 1.00 csinc w1, w28, wzr, mi +# CHECK-NEXT: 1 1 1.00 csinc x19, x23, x29, lt +# CHECK-NEXT: 1 1 1.00 csinc xzr, x3, x4, ge +# CHECK-NEXT: 1 1 1.00 csinc x5, xzr, x6, hs +# CHECK-NEXT: 1 1 1.00 csinc x7, x8, xzr, lo +# CHECK-NEXT: 1 1 1.00 csinv w1, w0, w19, ne +# CHECK-NEXT: 1 1 1.00 csinv wzr, w5, w9, eq +# CHECK-NEXT: 1 1 1.00 csinv w9, wzr, w30, gt +# CHECK-NEXT: 1 1 1.00 csinv w1, w28, wzr, mi +# CHECK-NEXT: 1 1 1.00 csinv x19, x23, x29, lt +# CHECK-NEXT: 1 1 1.00 csinv xzr, x3, x4, ge +# CHECK-NEXT: 1 1 1.00 csinv x5, xzr, x6, hs +# CHECK-NEXT: 1 1 1.00 csinv x7, x8, xzr, lo +# CHECK-NEXT: 1 1 1.00 csneg w1, w0, w19, ne +# CHECK-NEXT: 1 1 1.00 csneg wzr, w5, w9, eq +# CHECK-NEXT: 1 1 1.00 csneg w9, wzr, w30, gt +# CHECK-NEXT: 1 1 1.00 csneg w1, w28, wzr, mi +# CHECK-NEXT: 1 1 1.00 csneg x19, x23, x29, lt +# CHECK-NEXT: 1 1 1.00 csneg xzr, x3, x4, ge +# CHECK-NEXT: 1 1 1.00 csneg x5, xzr, x6, hs +# CHECK-NEXT: 1 1 1.00 csneg x7, x8, xzr, lo +# CHECK-NEXT: 1 1 1.00 cset w3, eq +# CHECK-NEXT: 1 1 1.00 cset x9, pl +# CHECK-NEXT: 1 1 1.00 csetm w20, ne +# CHECK-NEXT: 1 1 1.00 csetm x30, ge +# CHECK-NEXT: 1 1 1.00 csinc w2, wzr, wzr, al +# CHECK-NEXT: 1 1 1.00 csinv x3, xzr, xzr, nv +# CHECK-NEXT: 1 1 1.00 cinc w3, w5, gt +# CHECK-NEXT: 1 1 1.00 cinc wzr, w4, le +# CHECK-NEXT: 1 1 1.00 cset w9, lt +# CHECK-NEXT: 1 1 1.00 cinc x3, x5, gt +# CHECK-NEXT: 1 1 1.00 cinc xzr, x4, le +# CHECK-NEXT: 1 1 1.00 cset x9, lt +# CHECK-NEXT: 1 1 1.00 csinc w5, w6, w6, nv +# CHECK-NEXT: 1 1 1.00 csinc x1, x2, x2, al +# CHECK-NEXT: 1 1 1.00 cinv w3, w5, gt +# CHECK-NEXT: 1 1 1.00 cinv wzr, w4, le +# CHECK-NEXT: 1 1 1.00 csetm w9, lt +# CHECK-NEXT: 1 1 1.00 cinv x3, x5, gt +# CHECK-NEXT: 1 1 1.00 cinv xzr, x4, le +# CHECK-NEXT: 1 1 1.00 csetm x9, lt +# CHECK-NEXT: 1 1 1.00 csinv x1, x0, x0, al +# CHECK-NEXT: 1 1 1.00 csinv w9, w8, w8, nv +# CHECK-NEXT: 1 1 1.00 cneg w3, w5, gt +# CHECK-NEXT: 1 1 1.00 cneg wzr, w4, le +# CHECK-NEXT: 1 1 1.00 cneg w9, wzr, lt +# CHECK-NEXT: 1 1 1.00 cneg x3, x5, gt +# CHECK-NEXT: 1 1 1.00 cneg xzr, x4, le +# CHECK-NEXT: 1 1 1.00 cneg x9, xzr, lt +# CHECK-NEXT: 1 1 1.00 csneg x4, x8, x8, al +# CHECK-NEXT: 1 1 1.00 csinv w9, w8, w8, nv +# CHECK-NEXT: 1 2 1.00 rbit w0, w7 +# CHECK-NEXT: 1 2 1.00 rbit x18, x3 +# CHECK-NEXT: 1 1 1.00 rev16 w17, w1 +# CHECK-NEXT: 1 1 1.00 rev16 x5, x2 +# CHECK-NEXT: 1 1 1.00 rev w18, w0 +# CHECK-NEXT: 1 1 1.00 rev32 x20, x1 +# CHECK-NEXT: 1 1 1.00 rev x22, x2 +# CHECK-NEXT: 1 1 1.00 clz w24, w3 +# CHECK-NEXT: 1 1 1.00 clz x26, x4 +# CHECK-NEXT: 1 1 1.00 cls w3, w5 +# CHECK-NEXT: 1 1 1.00 cls x20, x5 +# CHECK-NEXT: 1 12 12.00 udiv w0, w7, w10 +# CHECK-NEXT: 1 20 20.00 udiv x9, x22, x4 +# CHECK-NEXT: 1 12 12.00 sdiv w12, w21, w0 +# CHECK-NEXT: 1 20 20.00 sdiv x13, x2, x1 +# CHECK-NEXT: 1 2 1.00 lsl w11, w12, w13 +# CHECK-NEXT: 1 2 1.00 lsl x14, x15, x16 +# CHECK-NEXT: 1 2 1.00 lsr w17, w18, w19 +# CHECK-NEXT: 1 2 1.00 lsr x20, x21, x22 +# CHECK-NEXT: 1 2 1.00 asr w23, w24, w25 +# CHECK-NEXT: 1 2 1.00 asr x26, x27, x28 +# CHECK-NEXT: 1 2 1.00 ror w0, w1, w2 +# CHECK-NEXT: 1 2 1.00 ror x3, x4, x5 +# CHECK-NEXT: 1 2 1.00 lsl w6, w7, w8 +# CHECK-NEXT: 1 2 1.00 lsl x9, x10, x11 +# CHECK-NEXT: 1 2 1.00 lsr w12, w13, w14 +# CHECK-NEXT: 1 2 1.00 lsr x15, x16, x17 +# CHECK-NEXT: 1 2 1.00 asr w18, w19, w20 +# CHECK-NEXT: 1 2 1.00 asr x21, x22, x23 +# CHECK-NEXT: 1 2 1.00 ror w24, w25, w26 +# CHECK-NEXT: 1 2 1.00 ror x27, x28, x29 +# CHECK-NEXT: 1 5 2.00 smulh x30, x29, x28 +# CHECK-NEXT: 1 5 2.00 smulh xzr, x27, x26 +# CHECK-NEXT: 1 5 2.00 umulh x30, x29, x28 +# CHECK-NEXT: 1 5 2.00 umulh x23, x30, xzr +# CHECK-NEXT: 1 3 1.00 madd w1, w3, w7, w4 +# CHECK-NEXT: 1 3 1.00 madd wzr, w0, w9, w11 +# CHECK-NEXT: 1 3 1.00 madd w13, wzr, w4, w4 +# CHECK-NEXT: 1 3 1.00 madd w19, w30, wzr, w29 +# CHECK-NEXT: 1 3 1.00 mul w4, w5, w6 +# CHECK-NEXT: 1 5 2.00 madd x1, x3, x7, x4 +# CHECK-NEXT: 1 5 2.00 madd xzr, x0, x9, x11 +# CHECK-NEXT: 1 5 2.00 madd x13, xzr, x4, x4 +# CHECK-NEXT: 1 5 2.00 madd x19, x30, xzr, x29 +# CHECK-NEXT: 1 5 2.00 mul x4, x5, x6 +# CHECK-NEXT: 1 3 1.00 msub w1, w3, w7, w4 +# CHECK-NEXT: 1 3 1.00 msub wzr, w0, w9, w11 +# CHECK-NEXT: 1 3 1.00 msub w13, wzr, w4, w4 +# CHECK-NEXT: 1 3 1.00 msub w19, w30, wzr, w29 +# CHECK-NEXT: 1 3 1.00 mneg w4, w5, w6 +# CHECK-NEXT: 1 5 2.00 msub x1, x3, x7, x4 +# CHECK-NEXT: 1 5 2.00 msub xzr, x0, x9, x11 +# CHECK-NEXT: 1 5 2.00 msub x13, xzr, x4, x4 +# CHECK-NEXT: 1 5 2.00 msub x19, x30, xzr, x29 +# CHECK-NEXT: 1 5 2.00 mneg x4, x5, x6 +# CHECK-NEXT: 1 3 1.00 smaddl x3, w5, w2, x9 +# CHECK-NEXT: 1 3 1.00 smaddl xzr, w10, w11, x12 +# CHECK-NEXT: 1 3 1.00 smaddl x13, wzr, w14, x15 +# CHECK-NEXT: 1 3 1.00 smaddl x16, w17, wzr, x18 +# CHECK-NEXT: 1 3 1.00 smull x19, w20, w21 +# CHECK-NEXT: 1 3 1.00 smsubl x3, w5, w2, x9 +# CHECK-NEXT: 1 3 1.00 smsubl xzr, w10, w11, x12 +# CHECK-NEXT: 1 3 1.00 smsubl x13, wzr, w14, x15 +# CHECK-NEXT: 1 3 1.00 smsubl x16, w17, wzr, x18 +# CHECK-NEXT: 1 3 1.00 smnegl x19, w20, w21 +# CHECK-NEXT: 1 3 1.00 umaddl x3, w5, w2, x9 +# CHECK-NEXT: 1 3 1.00 umaddl xzr, w10, w11, x12 +# CHECK-NEXT: 1 3 1.00 umaddl x13, wzr, w14, x15 +# CHECK-NEXT: 1 3 1.00 umaddl x16, w17, wzr, x18 +# CHECK-NEXT: 1 3 1.00 umull x19, w20, w21 +# CHECK-NEXT: 1 3 1.00 umsubl x3, w5, w2, x9 +# CHECK-NEXT: 1 3 1.00 umsubl x16, w17, wzr, x18 +# CHECK-NEXT: 1 3 1.00 umnegl x19, w20, w21 +# CHECK-NEXT: 1 5 2.00 smulh x30, x29, x28 +# CHECK-NEXT: 1 5 2.00 smulh x23, x22, xzr +# CHECK-NEXT: 1 5 2.00 umulh x23, x22, xzr +# CHECK-NEXT: 1 5 2.00 mul x19, x20, xzr +# CHECK-NEXT: 1 3 1.00 mneg w21, w22, w23 +# CHECK-NEXT: 1 3 1.00 smull x11, w13, w17 +# CHECK-NEXT: 1 3 1.00 umull x11, w13, w17 +# CHECK-NEXT: 1 3 1.00 smnegl x11, w13, w17 +# CHECK-NEXT: 1 3 1.00 umnegl x11, w13, w17 +# CHECK-NEXT: 1 2 1.00 extr w3, w5, w7, #0 +# CHECK-NEXT: 1 2 1.00 extr w11, w13, w17, #31 +# CHECK-NEXT: 1 2 1.00 extr x3, x5, x7, #15 +# CHECK-NEXT: 1 2 1.00 extr x11, x13, x17, #63 +# CHECK-NEXT: 1 2 1.00 ror x19, x23, #24 +# CHECK-NEXT: 1 2 1.00 ror x29, xzr, #63 +# CHECK-NEXT: 1 2 1.00 ror w9, w13, #31 +# CHECK-NEXT: 1 3 1.00 fcmp s3, s5 +# CHECK-NEXT: 1 3 1.00 fcmp s31, #0.0 +# CHECK-NEXT: 1 3 1.00 fcmp s31, #0.0 +# CHECK-NEXT: 1 3 1.00 fcmpe s29, s30 +# CHECK-NEXT: 1 3 1.00 fcmpe s15, #0.0 +# CHECK-NEXT: 1 3 1.00 fcmpe s15, #0.0 +# CHECK-NEXT: 1 3 1.00 fcmp d4, d12 +# CHECK-NEXT: 1 3 1.00 fcmp d23, #0.0 +# CHECK-NEXT: 1 3 1.00 fcmp d23, #0.0 +# CHECK-NEXT: 1 3 1.00 fcmpe d26, d22 +# CHECK-NEXT: 1 3 1.00 fcmpe d29, #0.0 +# CHECK-NEXT: 1 3 1.00 fcmpe d29, #0.0 +# CHECK-NEXT: 1 3 1.00 fccmp s1, s31, #0, eq +# CHECK-NEXT: 1 3 1.00 fccmp s3, s0, #15, hs +# CHECK-NEXT: 1 3 1.00 fccmp s31, s15, #13, hs +# CHECK-NEXT: 1 3 1.00 fccmp d9, d31, #0, le +# CHECK-NEXT: 1 3 1.00 fccmp d3, d0, #15, gt +# CHECK-NEXT: 1 3 1.00 fccmp d31, d5, #7, ne +# CHECK-NEXT: 1 3 1.00 fccmpe s1, s31, #0, eq +# CHECK-NEXT: 1 3 1.00 fccmpe s3, s0, #15, hs +# CHECK-NEXT: 1 3 1.00 fccmpe s31, s15, #13, hs +# CHECK-NEXT: 1 3 1.00 fccmpe d9, d31, #0, le +# CHECK-NEXT: 1 3 1.00 fccmpe d3, d0, #15, gt +# CHECK-NEXT: 1 3 1.00 fccmpe d31, d5, #7, ne +# CHECK-NEXT: 1 3 1.00 fcsel s3, s20, s9, pl +# CHECK-NEXT: 1 3 1.00 fcsel d9, d10, d11, mi +# CHECK-NEXT: 1 4 1.00 fmov s0, s1 +# CHECK-NEXT: 1 4 1.00 fabs s2, s3 +# CHECK-NEXT: 1 4 1.00 fneg s4, s5 +# CHECK-NEXT: 1 22 29.00 fsqrt s6, s7 +# CHECK-NEXT: 1 4 1.00 fcvt d8, s9 +# CHECK-NEXT: 1 4 1.00 fcvt h10, s11 +# CHECK-NEXT: 1 4 1.00 frintn s12, s13 +# CHECK-NEXT: 1 4 1.00 frintp s14, s15 +# CHECK-NEXT: 1 4 1.00 frintm s16, s17 +# CHECK-NEXT: 1 4 1.00 frintz s18, s19 +# CHECK-NEXT: 1 4 1.00 frinta s20, s21 +# CHECK-NEXT: 1 4 1.00 frintx s22, s23 +# CHECK-NEXT: 1 4 1.00 frinti s24, s25 +# CHECK-NEXT: 1 4 1.00 fmov d0, d1 +# CHECK-NEXT: 1 4 1.00 fabs d2, d3 +# CHECK-NEXT: 1 4 1.00 fneg d4, d5 +# CHECK-NEXT: 1 22 29.00 fsqrt d6, d7 +# CHECK-NEXT: 1 4 1.00 fcvt s8, d9 +# CHECK-NEXT: 1 4 1.00 fcvt h10, d11 +# CHECK-NEXT: 1 4 1.00 frintn d12, d13 +# CHECK-NEXT: 1 4 1.00 frintp d14, d15 +# CHECK-NEXT: 1 4 1.00 frintm d16, d17 +# CHECK-NEXT: 1 4 1.00 frintz d18, d19 +# CHECK-NEXT: 1 4 1.00 frinta d20, d21 +# CHECK-NEXT: 1 4 1.00 frintx d22, d23 +# CHECK-NEXT: 1 4 1.00 frinti d24, d25 +# CHECK-NEXT: 1 4 1.00 fcvt s26, h27 +# CHECK-NEXT: 1 4 1.00 fcvt d28, h29 +# CHECK-NEXT: 1 4 1.00 fmul s20, s19, s17 +# CHECK-NEXT: 1 13 10.00 fdiv s1, s2, s3 +# CHECK-NEXT: 1 4 1.00 fadd s4, s5, s6 +# CHECK-NEXT: 1 4 1.00 fsub s7, s8, s9 +# CHECK-NEXT: 1 4 1.00 fmax s10, s11, s12 +# CHECK-NEXT: 1 4 1.00 fmin s13, s14, s15 +# CHECK-NEXT: 1 4 1.00 fmaxnm s16, s17, s18 +# CHECK-NEXT: 1 4 1.00 fminnm s19, s20, s21 +# CHECK-NEXT: 1 4 1.00 fnmul s22, s23, s2 +# CHECK-NEXT: 1 4 1.00 fmul d20, d19, d17 +# CHECK-NEXT: 1 22 19.00 fdiv d1, d2, d3 +# CHECK-NEXT: 1 4 1.00 fadd d4, d5, d6 +# CHECK-NEXT: 1 4 1.00 fsub d7, d8, d9 +# CHECK-NEXT: 1 4 1.00 fmax d10, d11, d12 +# CHECK-NEXT: 1 4 1.00 fmin d13, d14, d15 +# CHECK-NEXT: 1 4 1.00 fmaxnm d16, d17, d18 +# CHECK-NEXT: 1 4 1.00 fminnm d19, d20, d21 +# CHECK-NEXT: 1 4 1.00 fnmul d22, d23, d24 +# CHECK-NEXT: 1 4 1.00 fmadd s3, s5, s6, s31 +# CHECK-NEXT: 1 4 1.00 fmadd d3, d13, d0, d23 +# CHECK-NEXT: 1 4 1.00 fmsub s3, s5, s6, s31 +# CHECK-NEXT: 1 4 1.00 fmsub d3, d13, d0, d23 +# CHECK-NEXT: 1 4 1.00 fnmadd s3, s5, s6, s31 +# CHECK-NEXT: 1 4 1.00 fnmadd d3, d13, d0, d23 +# CHECK-NEXT: 1 4 1.00 fnmsub s3, s5, s6, s31 +# CHECK-NEXT: 1 4 1.00 fnmsub d3, d13, d0, d23 +# CHECK-NEXT: 1 4 1.00 fcvtzs w3, h5, #1 +# CHECK-NEXT: 1 4 1.00 fcvtzs wzr, h20, #13 +# CHECK-NEXT: 1 4 1.00 fcvtzs w19, h0, #32 +# CHECK-NEXT: 1 4 1.00 fcvtzs x3, h5, #1 +# CHECK-NEXT: 1 4 1.00 fcvtzs x12, h30, #45 +# CHECK-NEXT: 1 4 1.00 fcvtzs x19, h0, #64 +# CHECK-NEXT: 1 4 1.00 fcvtzs w3, s5, #1 +# CHECK-NEXT: 1 4 1.00 fcvtzs wzr, s20, #13 +# CHECK-NEXT: 1 4 1.00 fcvtzs w19, s0, #32 +# CHECK-NEXT: 1 4 1.00 fcvtzs x3, s5, #1 +# CHECK-NEXT: 1 4 1.00 fcvtzs x12, s30, #45 +# CHECK-NEXT: 1 4 1.00 fcvtzs x19, s0, #64 +# CHECK-NEXT: 1 4 1.00 fcvtzs w3, d5, #1 +# CHECK-NEXT: 1 4 1.00 fcvtzs wzr, d20, #13 +# CHECK-NEXT: 1 4 1.00 fcvtzs w19, d0, #32 +# CHECK-NEXT: 1 4 1.00 fcvtzs x3, d5, #1 +# CHECK-NEXT: 1 4 1.00 fcvtzs x12, d30, #45 +# CHECK-NEXT: 1 4 1.00 fcvtzs x19, d0, #64 +# CHECK-NEXT: 1 4 1.00 fcvtzu w3, h5, #1 +# CHECK-NEXT: 1 4 1.00 fcvtzu wzr, h20, #13 +# CHECK-NEXT: 1 4 1.00 fcvtzu w19, h0, #32 +# CHECK-NEXT: 1 4 1.00 fcvtzu x3, h5, #1 +# CHECK-NEXT: 1 4 1.00 fcvtzu x12, h30, #45 +# CHECK-NEXT: 1 4 1.00 fcvtzu x19, h0, #64 +# CHECK-NEXT: 1 4 1.00 fcvtzu w3, s5, #1 +# CHECK-NEXT: 1 4 1.00 fcvtzu wzr, s20, #13 +# CHECK-NEXT: 1 4 1.00 fcvtzu w19, s0, #32 +# CHECK-NEXT: 1 4 1.00 fcvtzu x3, s5, #1 +# CHECK-NEXT: 1 4 1.00 fcvtzu x12, s30, #45 +# CHECK-NEXT: 1 4 1.00 fcvtzu x19, s0, #64 +# CHECK-NEXT: 1 4 1.00 fcvtzu w3, d5, #1 +# CHECK-NEXT: 1 4 1.00 fcvtzu wzr, d20, #13 +# CHECK-NEXT: 1 4 1.00 fcvtzu w19, d0, #32 +# CHECK-NEXT: 1 4 1.00 fcvtzu x3, d5, #1 +# CHECK-NEXT: 1 4 1.00 fcvtzu x12, d30, #45 +# CHECK-NEXT: 1 4 1.00 fcvtzu x19, d0, #64 +# CHECK-NEXT: 1 4 1.00 scvtf h23, w19, #1 +# CHECK-NEXT: 1 4 1.00 scvtf h31, wzr, #20 +# CHECK-NEXT: 1 4 1.00 scvtf h14, w0, #32 +# CHECK-NEXT: 1 4 1.00 scvtf h23, x19, #1 +# CHECK-NEXT: 1 4 1.00 scvtf h31, xzr, #20 +# CHECK-NEXT: 1 4 1.00 scvtf h14, x0, #64 +# CHECK-NEXT: 1 4 1.00 scvtf s23, w19, #1 +# CHECK-NEXT: 1 4 1.00 scvtf s31, wzr, #20 +# CHECK-NEXT: 1 4 1.00 scvtf s14, w0, #32 +# CHECK-NEXT: 1 4 1.00 scvtf s23, x19, #1 +# CHECK-NEXT: 1 4 1.00 scvtf s31, xzr, #20 +# CHECK-NEXT: 1 4 1.00 scvtf s14, x0, #64 +# CHECK-NEXT: 1 4 1.00 scvtf d23, w19, #1 +# CHECK-NEXT: 1 4 1.00 scvtf d31, wzr, #20 +# CHECK-NEXT: 1 4 1.00 scvtf d14, w0, #32 +# CHECK-NEXT: 1 4 1.00 scvtf d23, x19, #1 +# CHECK-NEXT: 1 4 1.00 scvtf d31, xzr, #20 +# CHECK-NEXT: 1 4 1.00 scvtf d14, x0, #64 +# CHECK-NEXT: 1 4 1.00 ucvtf h23, w19, #1 +# CHECK-NEXT: 1 4 1.00 ucvtf h31, wzr, #20 +# CHECK-NEXT: 1 4 1.00 ucvtf h14, w0, #32 +# CHECK-NEXT: 1 4 1.00 ucvtf h23, x19, #1 +# CHECK-NEXT: 1 4 1.00 ucvtf h31, xzr, #20 +# CHECK-NEXT: 1 4 1.00 ucvtf h14, x0, #64 +# CHECK-NEXT: 1 4 1.00 ucvtf s23, w19, #1 +# CHECK-NEXT: 1 4 1.00 ucvtf s31, wzr, #20 +# CHECK-NEXT: 1 4 1.00 ucvtf s14, w0, #32 +# CHECK-NEXT: 1 4 1.00 ucvtf s23, x19, #1 +# CHECK-NEXT: 1 4 1.00 ucvtf s31, xzr, #20 +# CHECK-NEXT: 1 4 1.00 ucvtf s14, x0, #64 +# CHECK-NEXT: 1 4 1.00 ucvtf d23, w19, #1 +# CHECK-NEXT: 1 4 1.00 ucvtf d31, wzr, #20 +# CHECK-NEXT: 1 4 1.00 ucvtf d14, w0, #32 +# CHECK-NEXT: 1 4 1.00 ucvtf d23, x19, #1 +# CHECK-NEXT: 1 4 1.00 ucvtf d31, xzr, #20 +# CHECK-NEXT: 1 4 1.00 ucvtf d14, x0, #64 +# CHECK-NEXT: 1 4 1.00 fcvtns w3, h31 +# CHECK-NEXT: 1 4 1.00 fcvtns xzr, h12 +# CHECK-NEXT: 1 4 1.00 fcvtnu wzr, h12 +# CHECK-NEXT: 1 4 1.00 fcvtnu x0, h0 +# CHECK-NEXT: 1 4 1.00 fcvtps wzr, h9 +# CHECK-NEXT: 1 4 1.00 fcvtps x12, h20 +# CHECK-NEXT: 1 4 1.00 fcvtpu w30, h23 +# CHECK-NEXT: 1 4 1.00 fcvtpu x29, h3 +# CHECK-NEXT: 1 4 1.00 fcvtms w2, h3 +# CHECK-NEXT: 1 4 1.00 fcvtms x4, h5 +# CHECK-NEXT: 1 4 1.00 fcvtmu w6, h7 +# CHECK-NEXT: 1 4 1.00 fcvtmu x8, h9 +# CHECK-NEXT: 1 4 1.00 fcvtzs w10, h11 +# CHECK-NEXT: 1 4 1.00 fcvtzs x12, h13 +# CHECK-NEXT: 1 4 1.00 fcvtzu w14, h15 +# CHECK-NEXT: 1 4 1.00 fcvtzu x15, h16 +# CHECK-NEXT: 1 4 1.00 scvtf h17, w18 +# CHECK-NEXT: 1 4 1.00 scvtf h19, x20 +# CHECK-NEXT: 1 4 1.00 ucvtf h21, w22 +# CHECK-NEXT: 1 4 1.00 scvtf h23, x24 +# CHECK-NEXT: 1 4 1.00 fcvtas w25, h26 +# CHECK-NEXT: 1 4 1.00 fcvtas x27, h28 +# CHECK-NEXT: 1 4 1.00 fcvtau w29, h30 +# CHECK-NEXT: 1 4 1.00 fcvtau xzr, h0 +# CHECK-NEXT: 1 4 1.00 fcvtns w3, s31 +# CHECK-NEXT: 1 4 1.00 fcvtns xzr, s12 +# CHECK-NEXT: 1 4 1.00 fcvtnu wzr, s12 +# CHECK-NEXT: 1 4 1.00 fcvtnu x0, s0 +# CHECK-NEXT: 1 4 1.00 fcvtps wzr, s9 +# CHECK-NEXT: 1 4 1.00 fcvtps x12, s20 +# CHECK-NEXT: 1 4 1.00 fcvtpu w30, s23 +# CHECK-NEXT: 1 4 1.00 fcvtpu x29, s3 +# CHECK-NEXT: 1 4 1.00 fcvtms w2, s3 +# CHECK-NEXT: 1 4 1.00 fcvtms x4, s5 +# CHECK-NEXT: 1 4 1.00 fcvtmu w6, s7 +# CHECK-NEXT: 1 4 1.00 fcvtmu x8, s9 +# CHECK-NEXT: 1 4 1.00 fcvtzs w10, s11 +# CHECK-NEXT: 1 4 1.00 fcvtzs x12, s13 +# CHECK-NEXT: 1 4 1.00 fcvtzu w14, s15 +# CHECK-NEXT: 1 4 1.00 fcvtzu x15, s16 +# CHECK-NEXT: 1 4 1.00 scvtf s17, w18 +# CHECK-NEXT: 1 4 1.00 scvtf s19, x20 +# CHECK-NEXT: 1 4 1.00 ucvtf s21, w22 +# CHECK-NEXT: 1 4 1.00 scvtf s23, x24 +# CHECK-NEXT: 1 4 1.00 fcvtas w25, s26 +# CHECK-NEXT: 1 4 1.00 fcvtas x27, s28 +# CHECK-NEXT: 1 4 1.00 fcvtau w29, s30 +# CHECK-NEXT: 1 4 1.00 fcvtau xzr, s0 +# CHECK-NEXT: 1 4 1.00 fcvtns w3, d31 +# CHECK-NEXT: 1 4 1.00 fcvtns xzr, d12 +# CHECK-NEXT: 1 4 1.00 fcvtnu wzr, d12 +# CHECK-NEXT: 1 4 1.00 fcvtnu x0, d0 +# CHECK-NEXT: 1 4 1.00 fcvtps wzr, d9 +# CHECK-NEXT: 1 4 1.00 fcvtps x12, d20 +# CHECK-NEXT: 1 4 1.00 fcvtpu w30, d23 +# CHECK-NEXT: 1 4 1.00 fcvtpu x29, d3 +# CHECK-NEXT: 1 4 1.00 fcvtms w2, d3 +# CHECK-NEXT: 1 4 1.00 fcvtms x4, d5 +# CHECK-NEXT: 1 4 1.00 fcvtmu w6, d7 +# CHECK-NEXT: 1 4 1.00 fcvtmu x8, d9 +# CHECK-NEXT: 1 4 1.00 fcvtzs w10, d11 +# CHECK-NEXT: 1 4 1.00 fcvtzs x12, d13 +# CHECK-NEXT: 1 4 1.00 fcvtzu w14, d15 +# CHECK-NEXT: 1 4 1.00 fcvtzu x15, d16 +# CHECK-NEXT: 1 4 1.00 scvtf d17, w18 +# CHECK-NEXT: 1 4 1.00 scvtf d19, x20 +# CHECK-NEXT: 1 4 1.00 ucvtf d21, w22 +# CHECK-NEXT: 1 4 1.00 ucvtf d23, x24 +# CHECK-NEXT: 1 4 1.00 fcvtas w25, d26 +# CHECK-NEXT: 1 4 1.00 fcvtas x27, d28 +# CHECK-NEXT: 1 4 1.00 fcvtau w29, d30 +# CHECK-NEXT: 1 4 1.00 fcvtau xzr, d0 +# CHECK-NEXT: 1 3 1.00 fmov w3, s9 +# CHECK-NEXT: 1 3 1.00 fmov s9, w3 +# CHECK-NEXT: 1 3 1.00 fmov x20, d31 +# CHECK-NEXT: 1 3 1.00 fmov d1, x15 +# CHECK-NEXT: 1 3 1.00 fmov x3, v12.d[1] +# CHECK-NEXT: 1 3 1.00 fmov v1.d[1], x19 +# CHECK-NEXT: 1 3 1.00 fmov s2, #0.12500000 +# CHECK-NEXT: 1 3 1.00 fmov s3, #1.00000000 +# CHECK-NEXT: 1 3 1.00 fmov d30, #16.00000000 +# CHECK-NEXT: 1 3 1.00 fmov s4, #1.06250000 +# CHECK-NEXT: 1 3 1.00 fmov d10, #1.93750000 +# CHECK-NEXT: 1 3 1.00 fmov s12, #-1.00000000 +# CHECK-NEXT: 1 3 1.00 fmov d16, #8.50000000 +# CHECK-NEXT: 1 4 1.00 * ldr w3, #0 +# CHECK-NEXT: 1 4 1.00 * ldr x29, #4 +# CHECK-NEXT: 1 4 1.00 * ldrsw xzr, #-4 +# CHECK-NEXT: 1 4 1.00 * ldr s0, #8 +# CHECK-NEXT: 1 4 1.00 * ldr d0, #1048572 +# CHECK-NEXT: 1 4 1.00 * ldr q0, #-1048576 +# CHECK-NEXT: 1 4 1.00 U prfm pldl1strm, #0 +# CHECK-NEXT: 1 4 1.00 U prfm #22, #0 +# CHECK-NEXT: 2 5 2.00 * * U stxrb w18, w8, [sp] +# CHECK-NEXT: 2 5 2.00 * * U stxrh w24, w15, [x16] +# CHECK-NEXT: 2 5 2.00 * * U stxr w5, w6, [x17] +# CHECK-NEXT: 2 5 2.00 * * U stxr w1, x10, [x21] +# CHECK-NEXT: 1 4 1.00 * * U ldxrb w30, [x0] +# CHECK-NEXT: 1 4 1.00 * * U ldxrh w17, [x4] +# CHECK-NEXT: 1 4 1.00 * * U ldxr w22, [sp] +# CHECK-NEXT: 1 4 1.00 * * U ldxr x11, [x29] +# CHECK-NEXT: 1 4 1.00 * * U ldxr x11, [x29] +# CHECK-NEXT: 1 4 1.00 * * U ldxr x11, [x29] +# CHECK-NEXT: 2 5 2.00 * * U stxp w12, w11, w10, [sp] +# CHECK-NEXT: 2 5 2.00 * * U stxp wzr, x27, x9, [x12] +# CHECK-NEXT: 2 4 2.00 * * U ldxp w0, wzr, [sp] +# CHECK-NEXT: 2 4 2.00 * * U ldxp x17, x0, [x18] +# CHECK-NEXT: 2 4 2.00 * * U ldxp x17, x0, [x18] +# CHECK-NEXT: 2 5 2.00 * * U stlxrb w12, w22, [x0] +# CHECK-NEXT: 2 5 2.00 * * U stlxrh w10, w1, [x1] +# CHECK-NEXT: 2 5 2.00 * * U stlxr w9, w2, [x2] +# CHECK-NEXT: 2 5 2.00 * * U stlxr w9, x3, [sp] +# CHECK-NEXT: 1 4 1.00 * * U ldaxrb w8, [x4] +# CHECK-NEXT: 1 4 1.00 * * U ldaxrh w7, [x5] +# CHECK-NEXT: 1 4 1.00 * * U ldaxr w6, [sp] +# CHECK-NEXT: 1 4 1.00 * * U ldaxr x5, [x6] +# CHECK-NEXT: 1 4 1.00 * * U ldaxr x5, [x6] +# CHECK-NEXT: 1 4 1.00 * * U ldaxr x5, [x6] +# CHECK-NEXT: 2 5 2.00 * * U stlxp w4, w5, w6, [sp] +# CHECK-NEXT: 2 5 2.00 * * U stlxp wzr, x6, x7, [x1] +# CHECK-NEXT: 2 4 2.00 * * U ldaxp w5, w18, [sp] +# CHECK-NEXT: 2 4 2.00 * * U ldaxp x6, x19, [x22] +# CHECK-NEXT: 2 4 2.00 * * U ldaxp x6, x19, [x22] +# CHECK-NEXT: 1 1 1.00 * U stlrb w24, [sp] +# CHECK-NEXT: 1 1 1.00 * U stlrh w25, [x30] +# CHECK-NEXT: 1 1 1.00 * U stlr w26, [x29] +# CHECK-NEXT: 1 1 1.00 * U stlr x27, [x28] +# CHECK-NEXT: 1 1 1.00 * U stlr x27, [x28] +# CHECK-NEXT: 1 1 1.00 * U stlr x27, [x28] +# CHECK-NEXT: 1 4 1.00 * U ldarb w23, [sp] +# CHECK-NEXT: 1 4 1.00 * U ldarh w22, [x30] +# CHECK-NEXT: 1 4 1.00 * U ldar wzr, [x29] +# CHECK-NEXT: 1 4 1.00 * U ldar x21, [x28] +# CHECK-NEXT: 1 4 1.00 * U ldar x21, [x28] +# CHECK-NEXT: 1 4 1.00 * U ldar x21, [x28] +# CHECK-NEXT: 1 1 1.00 * sturb w9, [sp] +# CHECK-NEXT: 1 1 1.00 * sturh wzr, [x12, #255] +# CHECK-NEXT: 1 1 1.00 * stur w16, [x0, #-256] +# CHECK-NEXT: 1 1 1.00 * stur x28, [x14, #1] +# CHECK-NEXT: 1 4 1.00 * ldurb w1, [x20, #255] +# CHECK-NEXT: 1 4 1.00 * ldurh w20, [x1, #255] +# CHECK-NEXT: 1 4 1.00 * ldur w12, [sp, #255] +# CHECK-NEXT: 1 4 1.00 * ldur xzr, [x12, #255] +# CHECK-NEXT: 1 4 1.00 * ldursb x9, [x7, #-256] +# CHECK-NEXT: 1 4 1.00 * ldursh x17, [x19, #-256] +# CHECK-NEXT: 1 4 1.00 * ldursw x20, [x15, #-256] +# CHECK-NEXT: 1 4 1.00 U prfum pldl2keep, [sp, #-256] +# CHECK-NEXT: 1 4 1.00 * ldursb w19, [x1, #-256] +# CHECK-NEXT: 1 4 1.00 * ldursh w15, [x21, #-256] +# CHECK-NEXT: 1 1 1.00 * stur b0, [sp, #1] +# CHECK-NEXT: 1 1 1.00 * stur h12, [x12, #-1] +# CHECK-NEXT: 1 1 1.00 * stur s15, [x0, #255] +# CHECK-NEXT: 1 1 1.00 * stur d31, [x5, #25] +# CHECK-NEXT: 1 1 1.00 * stur q9, [x5] +# CHECK-NEXT: 1 4 1.00 * ldur b3, [sp] +# CHECK-NEXT: 1 4 1.00 * ldur h5, [x4, #-256] +# CHECK-NEXT: 1 4 1.00 * ldur s7, [x12, #-1] +# CHECK-NEXT: 1 4 1.00 * ldur d11, [x19, #4] +# CHECK-NEXT: 1 4 1.00 * ldur q13, [x1, #2] +# CHECK-NEXT: 2 1 1.00 * strb w9, [x2], #255 +# CHECK-NEXT: 2 1 1.00 * strb w10, [x3], #1 +# CHECK-NEXT: 2 1 1.00 * strb w10, [x3], #-256 +# CHECK-NEXT: 2 1 1.00 * strh w9, [x2], #255 +# CHECK-NEXT: 2 1 1.00 * strh w9, [x2], #1 +# CHECK-NEXT: 2 1 1.00 * strh w10, [x3], #-256 +# CHECK-NEXT: 2 1 1.00 * str w19, [sp], #255 +# CHECK-NEXT: 2 1 1.00 * str w20, [x30], #1 +# CHECK-NEXT: 2 1 1.00 * str w21, [x12], #-256 +# CHECK-NEXT: 2 1 1.00 * str xzr, [x9], #255 +# CHECK-NEXT: 2 1 1.00 * str x2, [x3], #1 +# CHECK-NEXT: 2 1 1.00 * str x19, [x12], #-256 +# CHECK-NEXT: 2 4 1.00 * ldrb w9, [x2], #255 +# CHECK-NEXT: 2 4 1.00 * ldrb w10, [x3], #1 +# CHECK-NEXT: 2 4 1.00 * ldrb w10, [x3], #-256 +# CHECK-NEXT: 2 4 1.00 * ldrh w9, [x2], #255 +# CHECK-NEXT: 2 4 1.00 * ldrh w9, [x2], #1 +# CHECK-NEXT: 2 4 1.00 * ldrh w10, [x3], #-256 +# CHECK-NEXT: 2 4 1.00 * ldr w19, [sp], #255 +# CHECK-NEXT: 2 4 1.00 * ldr w20, [x30], #1 +# CHECK-NEXT: 2 4 1.00 * ldr w21, [x12], #-256 +# CHECK-NEXT: 2 4 1.00 * ldr xzr, [x9], #255 +# CHECK-NEXT: 2 4 1.00 * ldr x2, [x3], #1 +# CHECK-NEXT: 2 4 1.00 * ldr x19, [x12], #-256 +# CHECK-NEXT: 2 4 1.00 * ldrsb xzr, [x9], #255 +# CHECK-NEXT: 2 4 1.00 * ldrsb x2, [x3], #1 +# CHECK-NEXT: 2 4 1.00 * ldrsb x19, [x12], #-256 +# CHECK-NEXT: 2 4 1.00 * ldrsh xzr, [x9], #255 +# CHECK-NEXT: 2 4 1.00 * ldrsh x2, [x3], #1 +# CHECK-NEXT: 2 4 1.00 * ldrsh x19, [x12], #-256 +# CHECK-NEXT: 2 4 1.00 * ldrsw xzr, [x9], #255 +# CHECK-NEXT: 2 4 1.00 * ldrsw x2, [x3], #1 +# CHECK-NEXT: 2 4 1.00 * ldrsw x19, [x12], #-256 +# CHECK-NEXT: 2 4 1.00 * ldrsb wzr, [x9], #255 +# CHECK-NEXT: 2 4 1.00 * ldrsb w2, [x3], #1 +# CHECK-NEXT: 2 4 1.00 * ldrsb w19, [x12], #-256 +# CHECK-NEXT: 2 4 1.00 * ldrsh wzr, [x9], #255 +# CHECK-NEXT: 2 4 1.00 * ldrsh w2, [x3], #1 +# CHECK-NEXT: 2 4 1.00 * ldrsh w19, [x12], #-256 +# CHECK-NEXT: 2 1 1.00 * str b0, [x0], #255 +# CHECK-NEXT: 2 1 1.00 * str b3, [x3], #1 +# CHECK-NEXT: 2 1 1.00 * str b5, [sp], #-256 +# CHECK-NEXT: 2 1 1.00 * str h10, [x10], #255 +# CHECK-NEXT: 2 1 1.00 * str h13, [x23], #1 +# CHECK-NEXT: 2 1 1.00 * str h15, [sp], #-256 +# CHECK-NEXT: 2 1 1.00 * str s20, [x20], #255 +# CHECK-NEXT: 2 1 1.00 * str s23, [x23], #1 +# CHECK-NEXT: 2 1 1.00 * str s25, [x0], #-256 +# CHECK-NEXT: 2 1 1.00 * str d20, [x20], #255 +# CHECK-NEXT: 2 1 1.00 * str d23, [x23], #1 +# CHECK-NEXT: 2 1 1.00 * str d25, [x0], #-256 +# CHECK-NEXT: 2 4 1.00 * ldr b0, [x0], #255 +# CHECK-NEXT: 2 4 1.00 * ldr b3, [x3], #1 +# CHECK-NEXT: 2 4 1.00 * ldr b5, [sp], #-256 +# CHECK-NEXT: 2 4 1.00 * ldr h10, [x10], #255 +# CHECK-NEXT: 2 4 1.00 * ldr h13, [x23], #1 +# CHECK-NEXT: 2 4 1.00 * ldr h15, [sp], #-256 +# CHECK-NEXT: 2 4 1.00 * ldr s20, [x20], #255 +# CHECK-NEXT: 2 4 1.00 * ldr s23, [x23], #1 +# CHECK-NEXT: 2 4 1.00 * ldr s25, [x0], #-256 +# CHECK-NEXT: 2 4 1.00 * ldr d20, [x20], #255 +# CHECK-NEXT: 2 4 1.00 * ldr d23, [x23], #1 +# CHECK-NEXT: 2 4 1.00 * ldr d25, [x0], #-256 +# CHECK-NEXT: 2 4 1.00 * ldr q20, [x1], #255 +# CHECK-NEXT: 2 4 1.00 * ldr q23, [x9], #1 +# CHECK-NEXT: 2 4 1.00 * ldr q25, [x20], #-256 +# CHECK-NEXT: 2 1 1.00 * str q10, [x1], #255 +# CHECK-NEXT: 2 1 1.00 * str q22, [sp], #1 +# CHECK-NEXT: 2 1 1.00 * str q21, [x20], #-256 +# CHECK-NEXT: 2 4 1.00 * ldr x3, [x4, #0]! +# CHECK-NEXT: 2 1 1.00 * strb w9, [x2, #255]! +# CHECK-NEXT: 2 1 1.00 * strb w10, [x3, #1]! +# CHECK-NEXT: 2 1 1.00 * strb w10, [x3, #-256]! +# CHECK-NEXT: 2 1 1.00 * strh w9, [x2, #255]! +# CHECK-NEXT: 2 1 1.00 * strh w9, [x2, #1]! +# CHECK-NEXT: 2 1 1.00 * strh w10, [x3, #-256]! +# CHECK-NEXT: 2 1 1.00 * str w19, [sp, #255]! +# CHECK-NEXT: 2 1 1.00 * str w20, [x30, #1]! +# CHECK-NEXT: 2 1 1.00 * str w21, [x12, #-256]! +# CHECK-NEXT: 2 1 1.00 * str xzr, [x9, #255]! +# CHECK-NEXT: 2 1 1.00 * str x2, [x3, #1]! +# CHECK-NEXT: 2 1 1.00 * str x19, [x12, #-256]! +# CHECK-NEXT: 2 4 1.00 * ldrb w9, [x2, #255]! +# CHECK-NEXT: 2 4 1.00 * ldrb w10, [x3, #1]! +# CHECK-NEXT: 2 4 1.00 * ldrb w10, [x3, #-256]! +# CHECK-NEXT: 2 4 1.00 * ldrh w9, [x2, #255]! +# CHECK-NEXT: 2 4 1.00 * ldrh w9, [x2, #1]! +# CHECK-NEXT: 2 4 1.00 * ldrh w10, [x3, #-256]! +# CHECK-NEXT: 2 4 1.00 * ldr w19, [sp, #255]! +# CHECK-NEXT: 2 4 1.00 * ldr w20, [x30, #1]! +# CHECK-NEXT: 2 4 1.00 * ldr w21, [x12, #-256]! +# CHECK-NEXT: 2 4 1.00 * ldr xzr, [x9, #255]! +# CHECK-NEXT: 2 4 1.00 * ldr x2, [x3, #1]! +# CHECK-NEXT: 2 4 1.00 * ldr x19, [x12, #-256]! +# CHECK-NEXT: 2 4 1.00 * ldrsb xzr, [x9, #255]! +# CHECK-NEXT: 2 4 1.00 * ldrsb x2, [x3, #1]! +# CHECK-NEXT: 2 4 1.00 * ldrsb x19, [x12, #-256]! +# CHECK-NEXT: 2 4 1.00 * ldrsh xzr, [x9, #255]! +# CHECK-NEXT: 2 4 1.00 * ldrsh x2, [x3, #1]! +# CHECK-NEXT: 2 4 1.00 * ldrsh x19, [x12, #-256]! +# CHECK-NEXT: 2 4 1.00 * ldrsw xzr, [x9, #255]! +# CHECK-NEXT: 2 4 1.00 * ldrsw x2, [x3, #1]! +# CHECK-NEXT: 2 4 1.00 * ldrsw x19, [x12, #-256]! +# CHECK-NEXT: 2 4 1.00 * ldrsb wzr, [x9, #255]! +# CHECK-NEXT: 2 4 1.00 * ldrsb w2, [x3, #1]! +# CHECK-NEXT: 2 4 1.00 * ldrsb w19, [x12, #-256]! +# CHECK-NEXT: 2 4 1.00 * ldrsh wzr, [x9, #255]! +# CHECK-NEXT: 2 4 1.00 * ldrsh w2, [x3, #1]! +# CHECK-NEXT: 2 4 1.00 * ldrsh w19, [x12, #-256]! +# CHECK-NEXT: 2 1 1.00 * str b0, [x0, #255]! +# CHECK-NEXT: 2 1 1.00 * str b3, [x3, #1]! +# CHECK-NEXT: 2 1 1.00 * str b5, [sp, #-256]! +# CHECK-NEXT: 2 1 1.00 * str h10, [x10, #255]! +# CHECK-NEXT: 2 1 1.00 * str h13, [x23, #1]! +# CHECK-NEXT: 2 1 1.00 * str h15, [sp, #-256]! +# CHECK-NEXT: 2 1 1.00 * str s20, [x20, #255]! +# CHECK-NEXT: 2 1 1.00 * str s23, [x23, #1]! +# CHECK-NEXT: 2 1 1.00 * str s25, [x0, #-256]! +# CHECK-NEXT: 2 1 1.00 * str d20, [x20, #255]! +# CHECK-NEXT: 2 1 1.00 * str d23, [x23, #1]! +# CHECK-NEXT: 2 1 1.00 * str d25, [x0, #-256]! +# CHECK-NEXT: 2 4 1.00 * ldr b0, [x0, #255]! +# CHECK-NEXT: 2 4 1.00 * ldr b3, [x3, #1]! +# CHECK-NEXT: 2 4 1.00 * ldr b5, [sp, #-256]! +# CHECK-NEXT: 2 4 1.00 * ldr h10, [x10, #255]! +# CHECK-NEXT: 2 4 1.00 * ldr h13, [x23, #1]! +# CHECK-NEXT: 2 4 1.00 * ldr h15, [sp, #-256]! +# CHECK-NEXT: 2 4 1.00 * ldr s20, [x20, #255]! +# CHECK-NEXT: 2 4 1.00 * ldr s23, [x23, #1]! +# CHECK-NEXT: 2 4 1.00 * ldr s25, [x0, #-256]! +# CHECK-NEXT: 2 4 1.00 * ldr d20, [x20, #255]! +# CHECK-NEXT: 2 4 1.00 * ldr d23, [x23, #1]! +# CHECK-NEXT: 2 4 1.00 * ldr d25, [x0, #-256]! +# CHECK-NEXT: 2 4 1.00 * ldr q20, [x1, #255]! +# CHECK-NEXT: 2 4 1.00 * ldr q23, [x9, #1]! +# CHECK-NEXT: 2 4 1.00 * ldr q25, [x20, #-256]! +# CHECK-NEXT: 2 1 1.00 * str q10, [x1, #255]! +# CHECK-NEXT: 2 1 1.00 * str q22, [sp, #1]! +# CHECK-NEXT: 2 1 1.00 * str q21, [x20, #-256]! +# CHECK-NEXT: 1 1 1.00 * sttrb w9, [sp] +# CHECK-NEXT: 1 1 1.00 * sttrh wzr, [x12, #255] +# CHECK-NEXT: 1 1 1.00 * sttr w16, [x0, #-256] +# CHECK-NEXT: 1 1 1.00 * sttr x28, [x14, #1] +# CHECK-NEXT: 1 4 1.00 * ldtrb w1, [x20, #255] +# CHECK-NEXT: 1 4 1.00 * ldtrh w20, [x1, #255] +# CHECK-NEXT: 1 4 1.00 * ldtr w12, [sp, #255] +# CHECK-NEXT: 1 4 1.00 * ldtr xzr, [x12, #255] +# CHECK-NEXT: 1 4 1.00 * ldtrsb x9, [x7, #-256] +# CHECK-NEXT: 1 4 1.00 * ldtrsh x17, [x19, #-256] +# CHECK-NEXT: 1 4 1.00 * ldtrsw x20, [x15, #-256] +# CHECK-NEXT: 1 4 1.00 * ldtrsb w19, [x1, #-256] +# CHECK-NEXT: 1 4 1.00 * ldtrsh w15, [x21, #-256] +# CHECK-NEXT: 1 4 1.00 * ldr x4, [x29] +# CHECK-NEXT: 1 4 1.00 * ldr x30, [x12, #32760] +# CHECK-NEXT: 1 4 1.00 * ldr x20, [sp, #8] +# CHECK-NEXT: 1 4 1.00 * ldr xzr, [sp] +# CHECK-NEXT: 1 4 1.00 * ldr w2, [sp] +# CHECK-NEXT: 1 4 1.00 * ldr w17, [sp, #16380] +# CHECK-NEXT: 1 4 1.00 * ldr w13, [x2, #4] +# CHECK-NEXT: 1 4 1.00 * ldrsw x2, [x5, #4] +# CHECK-NEXT: 1 4 1.00 * ldrsw x23, [sp, #16380] +# CHECK-NEXT: 1 4 1.00 * ldrh w2, [x4] +# CHECK-NEXT: 1 4 1.00 * ldrsh w23, [x6, #8190] +# CHECK-NEXT: 1 4 1.00 * ldrsh wzr, [sp, #2] +# CHECK-NEXT: 1 4 1.00 * ldrsh x29, [x2, #2] +# CHECK-NEXT: 1 4 1.00 * ldrb w26, [x3, #121] +# CHECK-NEXT: 1 4 1.00 * ldrb w12, [x2] +# CHECK-NEXT: 1 4 1.00 * ldrsb w27, [sp, #4095] +# CHECK-NEXT: 1 4 1.00 * ldrsb xzr, [x15] +# CHECK-NEXT: 1 1 1.00 * str x30, [sp] +# CHECK-NEXT: 1 1 1.00 * str w20, [x4, #16380] +# CHECK-NEXT: 1 1 1.00 * strh w17, [sp, #8190] +# CHECK-NEXT: 1 1 1.00 * strb w23, [x3, #4095] +# CHECK-NEXT: 1 1 1.00 * strb wzr, [x2] +# CHECK-NEXT: 1 4 1.00 * ldr b31, [sp, #4095] +# CHECK-NEXT: 1 4 1.00 * ldr h20, [x2, #8190] +# CHECK-NEXT: 1 4 1.00 * ldr s10, [x19, #16380] +# CHECK-NEXT: 1 4 1.00 * ldr d3, [x10, #32760] +# CHECK-NEXT: 1 1 1.00 * str q12, [sp, #65520] +# CHECK-NEXT: 1 4 1.00 * ldrb w3, [sp, x5] +# CHECK-NEXT: 1 4 1.00 * ldrb w9, [x27, x6] +# CHECK-NEXT: 1 4 1.00 * ldrsb w10, [x30, x7] +# CHECK-NEXT: 1 4 1.00 * ldrb w11, [x29, x3, sxtx] +# CHECK-NEXT: 1 1 1.00 * strb w12, [x28, xzr, sxtx] +# CHECK-NEXT: 1 4 1.00 * ldrb w14, [x26, w6, uxtw] +# CHECK-NEXT: 1 4 1.00 * ldrsb w15, [x25, w7, uxtw] +# CHECK-NEXT: 1 4 1.00 * ldrb w17, [x23, w9, sxtw] +# CHECK-NEXT: 1 4 1.00 * ldrsb x18, [x22, w10, sxtw] +# CHECK-NEXT: 1 4 1.00 * ldrsh w3, [sp, x5] +# CHECK-NEXT: 1 4 1.00 * ldrsh w9, [x27, x6] +# CHECK-NEXT: 1 4 1.00 * ldrh w10, [x30, x7, lsl #1] +# CHECK-NEXT: 1 1 1.00 * strh w11, [x29, x3, sxtx] +# CHECK-NEXT: 1 4 1.00 * ldrh w12, [x28, xzr, sxtx] +# CHECK-NEXT: 1 4 1.00 * ldrsh x13, [x27, x5, sxtx #1] +# CHECK-NEXT: 1 4 1.00 * ldrh w14, [x26, w6, uxtw] +# CHECK-NEXT: 1 4 1.00 * ldrh w15, [x25, w7, uxtw] +# CHECK-NEXT: 1 4 1.00 * ldrsh w16, [x24, w8, uxtw #1] +# CHECK-NEXT: 1 4 1.00 * ldrh w17, [x23, w9, sxtw] +# CHECK-NEXT: 1 4 1.00 * ldrh w18, [x22, w10, sxtw] +# CHECK-NEXT: 1 1 1.00 * strh w19, [x21, wzr, sxtw #1] +# CHECK-NEXT: 1 4 1.00 * ldr w3, [sp, x5] +# CHECK-NEXT: 1 4 1.00 * ldr s9, [x27, x6] +# CHECK-NEXT: 1 4 1.00 * ldr w10, [x30, x7, lsl #2] +# CHECK-NEXT: 1 4 1.00 * ldr w11, [x29, x3, sxtx] +# CHECK-NEXT: 1 1 1.00 * str s12, [x28, xzr, sxtx] +# CHECK-NEXT: 1 1 1.00 * str w13, [x27, x5, sxtx #2] +# CHECK-NEXT: 1 1 1.00 * str w14, [x26, w6, uxtw] +# CHECK-NEXT: 1 4 1.00 * ldr w15, [x25, w7, uxtw] +# CHECK-NEXT: 1 4 1.00 * ldr w16, [x24, w8, uxtw #2] +# CHECK-NEXT: 1 4 1.00 * ldrsw x17, [x23, w9, sxtw] +# CHECK-NEXT: 1 4 1.00 * ldr w18, [x22, w10, sxtw] +# CHECK-NEXT: 1 4 1.00 * ldrsw x19, [x21, wzr, sxtw #2] +# CHECK-NEXT: 1 4 1.00 * ldr x3, [sp, x5] +# CHECK-NEXT: 1 1 1.00 * str x9, [x27, x6] +# CHECK-NEXT: 1 4 1.00 * ldr d10, [x30, x7, lsl #3] +# CHECK-NEXT: 1 1 1.00 * str x11, [x29, x3, sxtx] +# CHECK-NEXT: 1 4 1.00 * ldr x12, [x28, xzr, sxtx] +# CHECK-NEXT: 1 4 1.00 * ldr x13, [x27, x5, sxtx #3] +# CHECK-NEXT: 1 4 1.00 U prfm pldl1keep, [x26, w6, uxtw] +# CHECK-NEXT: 1 4 1.00 * ldr x15, [x25, w7, uxtw] +# CHECK-NEXT: 1 4 1.00 * ldr x16, [x24, w8, uxtw #3] +# CHECK-NEXT: 1 4 1.00 * ldr x17, [x23, w9, sxtw] +# CHECK-NEXT: 1 4 1.00 * ldr x18, [x22, w10, sxtw] +# CHECK-NEXT: 1 1 1.00 * str d19, [x21, wzr, sxtw #3] +# CHECK-NEXT: 1 4 1.00 * ldr q3, [sp, x5] +# CHECK-NEXT: 1 4 1.00 * ldr q9, [x27, x6] +# CHECK-NEXT: 1 4 1.00 * ldr q10, [x30, x7, lsl #4] +# CHECK-NEXT: 1 1 1.00 * str q11, [x29, x3, sxtx] +# CHECK-NEXT: 1 1 1.00 * str q12, [x28, xzr, sxtx] +# CHECK-NEXT: 1 1 1.00 * str q13, [x27, x5, sxtx #4] +# CHECK-NEXT: 1 4 1.00 * ldr q14, [x26, w6, uxtw] +# CHECK-NEXT: 1 4 1.00 * ldr q15, [x25, w7, uxtw] +# CHECK-NEXT: 1 4 1.00 * ldr q16, [x24, w8, uxtw #4] +# CHECK-NEXT: 1 4 1.00 * ldr q17, [x23, w9, sxtw] +# CHECK-NEXT: 1 1 1.00 * str q18, [x22, w10, sxtw] +# CHECK-NEXT: 1 4 1.00 * ldr q19, [x21, wzr, sxtw #4] +# CHECK-NEXT: 2 4 2.00 * ldp w3, w5, [sp] +# CHECK-NEXT: 1 1 1.00 * stp wzr, w9, [sp, #252] +# CHECK-NEXT: 2 4 2.00 * ldp w2, wzr, [sp, #-256] +# CHECK-NEXT: 2 4 2.00 * ldp w9, w10, [sp, #4] +# CHECK-NEXT: 2 4 2.00 * ldpsw x9, x10, [sp, #4] +# CHECK-NEXT: 2 4 2.00 * ldpsw x9, x10, [x2, #-256] +# CHECK-NEXT: 2 4 2.00 * ldpsw x20, x30, [sp, #252] +# CHECK-NEXT: 2 4 2.00 * ldp x21, x29, [x2, #504] +# CHECK-NEXT: 2 4 2.00 * ldp x22, x23, [x3, #-512] +# CHECK-NEXT: 2 4 2.00 * ldp x24, x25, [x4, #8] +# CHECK-NEXT: 2 4 2.00 * ldp s29, s28, [sp, #252] +# CHECK-NEXT: 1 1 1.00 * stp s27, s26, [sp, #-256] +# CHECK-NEXT: 2 4 2.00 * ldp s1, s2, [x3, #44] +# CHECK-NEXT: 1 1 1.00 * stp d3, d5, [x9, #504] +# CHECK-NEXT: 1 1 1.00 * stp d7, d11, [x10, #-512] +# CHECK-NEXT: 2 4 2.00 * ldp d2, d3, [x30, #-8] +# CHECK-NEXT: 1 1 1.00 * stp q3, q5, [sp] +# CHECK-NEXT: 1 1 1.00 * stp q17, q19, [sp, #1008] +# CHECK-NEXT: 2 4 2.00 * ldp q23, q29, [x1, #-1024] +# CHECK-NEXT: 3 4 2.00 * ldp w3, w5, [sp], #0 +# CHECK-NEXT: 2 1 1.00 * stp wzr, w9, [sp], #252 +# CHECK-NEXT: 3 4 2.00 * ldp w2, wzr, [sp], #-256 +# CHECK-NEXT: 3 4 2.00 * ldp w9, w10, [sp], #4 +# CHECK-NEXT: 3 4 2.00 * ldpsw x9, x10, [sp], #4 +# CHECK-NEXT: 3 4 2.00 * ldpsw x9, x10, [x2], #-256 +# CHECK-NEXT: 3 4 2.00 * ldpsw x20, x30, [sp], #252 +# CHECK-NEXT: 3 4 2.00 * ldp x21, x29, [x2], #504 +# CHECK-NEXT: 3 4 2.00 * ldp x22, x23, [x3], #-512 +# CHECK-NEXT: 3 4 2.00 * ldp x24, x25, [x4], #8 +# CHECK-NEXT: 3 4 2.00 * ldp s29, s28, [sp], #252 +# CHECK-NEXT: 2 1 1.00 * stp s27, s26, [sp], #-256 +# CHECK-NEXT: 3 4 2.00 * ldp s1, s2, [x3], #44 +# CHECK-NEXT: 2 1 1.00 * stp d3, d5, [x9], #504 +# CHECK-NEXT: 2 1 1.00 * stp d7, d11, [x10], #-512 +# CHECK-NEXT: 3 4 2.00 * ldp d2, d3, [x30], #-8 +# CHECK-NEXT: 2 1 1.00 * stp q3, q5, [sp], #0 +# CHECK-NEXT: 2 1 1.00 * stp q17, q19, [sp], #1008 +# CHECK-NEXT: 3 4 2.00 * ldp q23, q29, [x1], #-1024 +# CHECK-NEXT: 3 4 2.00 * ldp w3, w5, [sp, #0]! +# CHECK-NEXT: 2 1 1.00 * stp wzr, w9, [sp, #252]! +# CHECK-NEXT: 3 4 2.00 * ldp w2, wzr, [sp, #-256]! +# CHECK-NEXT: 3 4 2.00 * ldp w9, w10, [sp, #4]! +# CHECK-NEXT: 3 4 2.00 * ldpsw x9, x10, [sp, #4]! +# CHECK-NEXT: 3 4 2.00 * ldpsw x9, x10, [x2, #-256]! +# CHECK-NEXT: 3 4 2.00 * ldpsw x20, x30, [sp, #252]! +# CHECK-NEXT: 3 4 2.00 * ldp x21, x29, [x2, #504]! +# CHECK-NEXT: 3 4 2.00 * ldp x22, x23, [x3, #-512]! +# CHECK-NEXT: 3 4 2.00 * ldp x24, x25, [x4, #8]! +# CHECK-NEXT: 3 4 2.00 * ldp s29, s28, [sp, #252]! +# CHECK-NEXT: 2 1 1.00 * stp s27, s26, [sp, #-256]! +# CHECK-NEXT: 3 4 2.00 * ldp s1, s2, [x3, #44]! +# CHECK-NEXT: 2 1 1.00 * stp d3, d5, [x9, #504]! +# CHECK-NEXT: 2 1 1.00 * stp d7, d11, [x10, #-512]! +# CHECK-NEXT: 3 4 2.00 * ldp d2, d3, [x30, #-8]! +# CHECK-NEXT: 2 1 1.00 * stp q3, q5, [sp, #0]! +# CHECK-NEXT: 2 1 1.00 * stp q17, q19, [sp, #1008]! +# CHECK-NEXT: 3 4 2.00 * ldp q23, q29, [x1, #-1024]! +# CHECK-NEXT: 2 4 2.00 * ldnp w3, w5, [sp] +# CHECK-NEXT: 1 1 1.00 * stnp wzr, w9, [sp, #252] +# CHECK-NEXT: 2 4 2.00 * ldnp w2, wzr, [sp, #-256] +# CHECK-NEXT: 2 4 2.00 * ldnp w9, w10, [sp, #4] +# CHECK-NEXT: 2 4 2.00 * ldnp x21, x29, [x2, #504] +# CHECK-NEXT: 2 4 2.00 * ldnp x22, x23, [x3, #-512] +# CHECK-NEXT: 2 4 2.00 * ldnp x24, x25, [x4, #8] +# CHECK-NEXT: 2 4 2.00 * ldnp s29, s28, [sp, #252] +# CHECK-NEXT: 1 1 1.00 * stnp s27, s26, [sp, #-256] +# CHECK-NEXT: 2 4 2.00 * ldnp s1, s2, [x3, #44] +# CHECK-NEXT: 1 1 1.00 * stnp d3, d5, [x9, #504] +# CHECK-NEXT: 1 1 1.00 * stnp d7, d11, [x10, #-512] +# CHECK-NEXT: 2 4 2.00 * ldnp d2, d3, [x30, #-8] +# CHECK-NEXT: 1 1 1.00 * stnp q3, q5, [sp] +# CHECK-NEXT: 1 1 1.00 * stnp q17, q19, [sp, #1008] +# CHECK-NEXT: 2 4 2.00 * ldnp q23, q29, [x1, #-1024] +# CHECK-NEXT: 1 1 1.00 mov w3, #983055 +# CHECK-NEXT: 1 1 1.00 mov x10, #-6148914691236517206 +# CHECK-NEXT: 1 1 1.00 and w12, w23, w21 +# CHECK-NEXT: 1 2 1.00 and w16, w15, w1, lsl #1 +# CHECK-NEXT: 1 2 1.00 and w9, w4, w10, lsl #31 +# CHECK-NEXT: 1 1 1.00 and w3, w30, w11 +# CHECK-NEXT: 1 2 1.00 and x3, x5, x7, lsl #63 +# CHECK-NEXT: 1 2 1.00 and x5, x14, x19, asr #4 +# CHECK-NEXT: 1 2 1.00 and w3, w17, w19, ror #31 +# CHECK-NEXT: 1 2 1.00 and w0, w2, wzr, lsr #17 +# CHECK-NEXT: 1 2 1.00 and w3, w30, w11, asr #2 +# CHECK-NEXT: 1 1 1.00 and xzr, x4, x26 +# CHECK-NEXT: 1 2 1.00 and w3, wzr, w20, ror #2 +# CHECK-NEXT: 1 2 1.00 and x7, x20, xzr, asr #63 +# CHECK-NEXT: 1 2 1.00 bic x13, x20, x14, lsl #47 +# CHECK-NEXT: 1 1 1.00 bic w2, w7, w9 +# CHECK-NEXT: 1 2 1.00 orr w2, w7, w0, asr #31 +# CHECK-NEXT: 1 2 1.00 orr x8, x9, x10, lsl #12 +# CHECK-NEXT: 1 2 1.00 orn x3, x5, x7, asr #2 +# CHECK-NEXT: 1 1 1.00 orn w2, w5, w29 +# CHECK-NEXT: 1 2 1.00 ands w7, wzr, w9, lsl #1 +# CHECK-NEXT: 1 2 1.00 ands x3, x5, x20, ror #63 +# CHECK-NEXT: 1 1 1.00 bics w3, w5, w7 +# CHECK-NEXT: 1 2 1.00 bics x3, xzr, x3, lsl #1 +# CHECK-NEXT: 1 2 1.00 tst w3, w7, lsl #31 +# CHECK-NEXT: 1 2 1.00 tst x2, x20, asr #2 +# CHECK-NEXT: 1 1 1.00 mov x3, x6 +# CHECK-NEXT: 1 1 1.00 mov x3, xzr +# CHECK-NEXT: 1 1 1.00 mov wzr, w2 +# CHECK-NEXT: 1 1 1.00 mov w3, w5 +# CHECK-NEXT: 1 1 1.00 movz w2, #0, lsl #16 +# CHECK-NEXT: 1 1 1.00 mov w2, #-1235 +# CHECK-NEXT: 1 1 1.00 mov x2, #5299989643264 +# CHECK-NEXT: 1 1 1.00 mov x2, #0 +# CHECK-NEXT: 1 1 1.00 movk w3, #0 +# CHECK-NEXT: 1 1 1.00 movz x4, #0, lsl #16 +# CHECK-NEXT: 1 1 1.00 movk w5, #0, lsl #16 +# CHECK-NEXT: 1 1 1.00 movz x6, #0, lsl #32 +# CHECK-NEXT: 1 1 1.00 movk x7, #0, lsl #32 +# CHECK-NEXT: 1 1 1.00 movz x8, #0, lsl #48 +# CHECK-NEXT: 1 1 1.00 movk x9, #0, lsl #48 +# CHECK-NEXT: 1 1 1.00 adr x2, #1600 +# CHECK-NEXT: 1 1 1.00 adrp x21, #6553600 +# CHECK-NEXT: 1 1 1.00 adr x0, #262144 +# CHECK-NEXT: 1 1 1.00 tbz x12, #62, #0 +# CHECK-NEXT: 1 1 1.00 tbz x12, #62, #4 +# CHECK-NEXT: 1 1 1.00 tbz x12, #62, #-32768 +# CHECK-NEXT: 1 1 1.00 tbnz x12, #60, #32764 +# CHECK-NEXT: 1 1 1.00 b #4 +# CHECK-NEXT: 1 1 1.00 b #-4 +# CHECK-NEXT: 1 1 1.00 b #134217724 +# CHECK-NEXT: 1 1 1.00 br x20 +# CHECK-NEXT: 1 1 1.00 blr xzr +# CHECK-NEXT: 1 1 1.00 U ret x10 +# CHECK-NEXT: 1 1 1.00 U ret +# CHECK-NEXT: 1 1 1.00 U eret +# CHECK-NEXT: 1 1 1.00 U drps + +# CHECK: Resources: +# CHECK-NEXT: [0] - CortexA320UnitALU +# CHECK-NEXT: [1] - CortexA320UnitB +# CHECK-NEXT: [2] - CortexA320UnitDiv +# CHECK-NEXT: [3] - CortexA320UnitLdSt +# CHECK-NEXT: [4] - CortexA320UnitMAC +# CHECK-NEXT: [5] - CortexA320UnitPAC +# CHECK-NEXT: [6] - CortexA320UnitVALU +# CHECK-NEXT: [7] - CortexA320UnitVMAC +# CHECK-NEXT: [8] - CortexA320UnitVMC + +# CHECK: Resource pressure per iteration: +# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] +# CHECK-NEXT: 465.00 22.00 64.00 450.00 69.00 - 221.00 12.00 87.00 + +# CHECK: Resource pressure by instruction: +# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] Instructions: +# CHECK-NEXT: 1.00 - - - - - - - - add w2, w3, #4095 +# CHECK-NEXT: 1.00 - - - - - - - - add w30, w29, #1, lsl #12 +# CHECK-NEXT: 1.00 - - - - - - - - add w13, w5, #4095, lsl #12 +# CHECK-NEXT: 1.00 - - - - - - - - add x5, x7, #1638 +# CHECK-NEXT: 1.00 - - - - - - - - add w20, wsp, #801 +# CHECK-NEXT: 1.00 - - - - - - - - add wsp, wsp, #1104 +# CHECK-NEXT: 1.00 - - - - - - - - add wsp, w30, #4084 +# CHECK-NEXT: 1.00 - - - - - - - - add x0, x24, #291 +# CHECK-NEXT: 1.00 - - - - - - - - add x3, x24, #4095, lsl #12 +# CHECK-NEXT: 1.00 - - - - - - - - add x8, sp, #1074 +# CHECK-NEXT: 1.00 - - - - - - - - add sp, x29, #3816 +# CHECK-NEXT: 1.00 - - - - - - - - sub w0, wsp, #4077 +# CHECK-NEXT: 1.00 - - - - - - - - sub w4, w20, #546, lsl #12 +# CHECK-NEXT: 1.00 - - - - - - - - sub sp, sp, #288 +# CHECK-NEXT: 1.00 - - - - - - - - sub wsp, w19, #16 +# CHECK-NEXT: 1.00 - - - - - - - - adds w13, w23, #291, lsl #12 +# CHECK-NEXT: 1.00 - - - - - - - - cmn w2, #4095 +# CHECK-NEXT: 1.00 - - - - - - - - adds w20, wsp, #0 +# CHECK-NEXT: 1.00 - - - - - - - - cmn x3, #1, lsl #12 +# CHECK-NEXT: 1.00 - - - - - - - - cmp sp, #20, lsl #12 +# CHECK-NEXT: 1.00 - - - - - - - - cmp x30, #4095 +# CHECK-NEXT: 1.00 - - - - - - - - subs x4, sp, #3822 +# CHECK-NEXT: 1.00 - - - - - - - - cmn w3, #291, lsl #12 +# CHECK-NEXT: 1.00 - - - - - - - - cmn wsp, #1365 +# CHECK-NEXT: 1.00 - - - - - - - - cmn sp, #1092, lsl #12 +# CHECK-NEXT: 1.00 - - - - - - - - mov sp, x30 +# CHECK-NEXT: 1.00 - - - - - - - - mov wsp, w20 +# CHECK-NEXT: 1.00 - - - - - - - - mov x11, sp +# CHECK-NEXT: 1.00 - - - - - - - - mov w24, wsp +# CHECK-NEXT: 1.00 - - - - - - - - add w3, w5, w7 +# CHECK-NEXT: 1.00 - - - - - - - - add wzr, w3, w5 +# CHECK-NEXT: 1.00 - - - - - - - - add w20, wzr, w4 +# CHECK-NEXT: 1.00 - - - - - - - - add w4, w6, wzr +# CHECK-NEXT: 1.00 - - - - - - - - add w11, w13, w15 +# CHECK-NEXT: 1.00 - - - - - - - - add w9, w3, wzr, lsl #10 +# CHECK-NEXT: 1.00 - - - - - - - - add w17, w29, w20, lsl #31 +# CHECK-NEXT: 1.00 - - - - - - - - add w21, w22, w23, lsr #0 +# CHECK-NEXT: 1.00 - - - - - - - - add w24, w25, w26, lsr #18 +# CHECK-NEXT: 1.00 - - - - - - - - add w27, w28, w29, lsr #31 +# CHECK-NEXT: 1.00 - - - - - - - - add w2, w3, w4, asr #0 +# CHECK-NEXT: 1.00 - - - - - - - - add w5, w6, w7, asr #21 +# CHECK-NEXT: 1.00 - - - - - - - - add w8, w9, w10, asr #31 +# CHECK-NEXT: 1.00 - - - - - - - - add x3, x5, x7 +# CHECK-NEXT: 1.00 - - - - - - - - add xzr, x3, x5 +# CHECK-NEXT: 1.00 - - - - - - - - add x20, xzr, x4 +# CHECK-NEXT: 1.00 - - - - - - - - add x4, x6, xzr +# CHECK-NEXT: 1.00 - - - - - - - - add x11, x13, x15 +# CHECK-NEXT: 1.00 - - - - - - - - add x9, x3, xzr, lsl #10 +# CHECK-NEXT: 1.00 - - - - - - - - add x17, x29, x20, lsl #63 +# CHECK-NEXT: 1.00 - - - - - - - - add x21, x22, x23, lsr #0 +# CHECK-NEXT: 1.00 - - - - - - - - add x24, x25, x26, lsr #18 +# CHECK-NEXT: 1.00 - - - - - - - - add x27, x28, x29, lsr #63 +# CHECK-NEXT: 1.00 - - - - - - - - add x2, x3, x4, asr #0 +# CHECK-NEXT: 1.00 - - - - - - - - add x5, x6, x7, asr #21 +# CHECK-NEXT: 1.00 - - - - - - - - add x8, x9, x10, asr #63 +# CHECK-NEXT: 1.00 - - - - - - - - adds w3, w5, w7 +# CHECK-NEXT: 1.00 - - - - - - - - cmn w3, w5 +# CHECK-NEXT: 1.00 - - - - - - - - adds w20, wzr, w4 +# CHECK-NEXT: 1.00 - - - - - - - - adds w4, w6, wzr +# CHECK-NEXT: 1.00 - - - - - - - - adds w11, w13, w15 +# CHECK-NEXT: 1.00 - - - - - - - - adds w9, w3, wzr, lsl #10 +# CHECK-NEXT: 1.00 - - - - - - - - adds w17, w29, w20, lsl #31 +# CHECK-NEXT: 1.00 - - - - - - - - adds w21, w22, w23, lsr #0 +# CHECK-NEXT: 1.00 - - - - - - - - adds w24, w25, w26, lsr #18 +# CHECK-NEXT: 1.00 - - - - - - - - adds w27, w28, w29, lsr #31 +# CHECK-NEXT: 1.00 - - - - - - - - adds w2, w3, w4, asr #0 +# CHECK-NEXT: 1.00 - - - - - - - - adds w5, w6, w7, asr #21 +# CHECK-NEXT: 1.00 - - - - - - - - adds w8, w9, w10, asr #31 +# CHECK-NEXT: 1.00 - - - - - - - - adds x3, x5, x7 +# CHECK-NEXT: 1.00 - - - - - - - - cmn x3, x5 +# CHECK-NEXT: 1.00 - - - - - - - - adds x20, xzr, x4 +# CHECK-NEXT: 1.00 - - - - - - - - adds x4, x6, xzr +# CHECK-NEXT: 1.00 - - - - - - - - adds x11, x13, x15 +# CHECK-NEXT: 1.00 - - - - - - - - adds x9, x3, xzr, lsl #10 +# CHECK-NEXT: 1.00 - - - - - - - - adds x17, x29, x20, lsl #63 +# CHECK-NEXT: 1.00 - - - - - - - - adds x21, x22, x23, lsr #0 +# CHECK-NEXT: 1.00 - - - - - - - - adds x24, x25, x26, lsr #18 +# CHECK-NEXT: 1.00 - - - - - - - - adds x27, x28, x29, lsr #63 +# CHECK-NEXT: 1.00 - - - - - - - - adds x2, x3, x4, asr #0 +# CHECK-NEXT: 1.00 - - - - - - - - adds x5, x6, x7, asr #21 +# CHECK-NEXT: 1.00 - - - - - - - - adds x8, x9, x10, asr #63 +# CHECK-NEXT: 1.00 - - - - - - - - sub w3, w5, w7 +# CHECK-NEXT: 1.00 - - - - - - - - sub wzr, w3, w5 +# CHECK-NEXT: 1.00 - - - - - - - - sub w4, w6, wzr +# CHECK-NEXT: 1.00 - - - - - - - - sub w11, w13, w15 +# CHECK-NEXT: 1.00 - - - - - - - - sub w9, w3, wzr, lsl #10 +# CHECK-NEXT: 1.00 - - - - - - - - sub w17, w29, w20, lsl #31 +# CHECK-NEXT: 1.00 - - - - - - - - sub w21, w22, w23, lsr #0 +# CHECK-NEXT: 1.00 - - - - - - - - sub w24, w25, w26, lsr #18 +# CHECK-NEXT: 1.00 - - - - - - - - sub w27, w28, w29, lsr #31 +# CHECK-NEXT: 1.00 - - - - - - - - sub w2, w3, w4, asr #0 +# CHECK-NEXT: 1.00 - - - - - - - - sub w5, w6, w7, asr #21 +# CHECK-NEXT: 1.00 - - - - - - - - sub w8, w9, w10, asr #31 +# CHECK-NEXT: 1.00 - - - - - - - - sub x3, x5, x7 +# CHECK-NEXT: 1.00 - - - - - - - - sub xzr, x3, x5 +# CHECK-NEXT: 1.00 - - - - - - - - sub x4, x6, xzr +# CHECK-NEXT: 1.00 - - - - - - - - sub x11, x13, x15 +# CHECK-NEXT: 1.00 - - - - - - - - sub x9, x3, xzr, lsl #10 +# CHECK-NEXT: 1.00 - - - - - - - - sub x17, x29, x20, lsl #63 +# CHECK-NEXT: 1.00 - - - - - - - - sub x21, x22, x23, lsr #0 +# CHECK-NEXT: 1.00 - - - - - - - - sub x24, x25, x26, lsr #18 +# CHECK-NEXT: 1.00 - - - - - - - - sub x27, x28, x29, lsr #63 +# CHECK-NEXT: 1.00 - - - - - - - - sub x2, x3, x4, asr #0 +# CHECK-NEXT: 1.00 - - - - - - - - sub x5, x6, x7, asr #21 +# CHECK-NEXT: 1.00 - - - - - - - - sub x8, x9, x10, asr #63 +# CHECK-NEXT: 1.00 - - - - - - - - subs w3, w5, w7 +# CHECK-NEXT: 1.00 - - - - - - - - cmp w3, w5 +# CHECK-NEXT: 1.00 - - - - - - - - subs w4, w6, wzr +# CHECK-NEXT: 1.00 - - - - - - - - subs w11, w13, w15 +# CHECK-NEXT: 1.00 - - - - - - - - subs w9, w3, wzr, lsl #10 +# CHECK-NEXT: 1.00 - - - - - - - - subs w17, w29, w20, lsl #31 +# CHECK-NEXT: 1.00 - - - - - - - - subs w21, w22, w23, lsr #0 +# CHECK-NEXT: 1.00 - - - - - - - - subs w24, w25, w26, lsr #18 +# CHECK-NEXT: 1.00 - - - - - - - - subs w27, w28, w29, lsr #31 +# CHECK-NEXT: 1.00 - - - - - - - - subs w2, w3, w4, asr #0 +# CHECK-NEXT: 1.00 - - - - - - - - subs w5, w6, w7, asr #21 +# CHECK-NEXT: 1.00 - - - - - - - - subs w8, w9, w10, asr #31 +# CHECK-NEXT: 1.00 - - - - - - - - subs x3, x5, x7 +# CHECK-NEXT: 1.00 - - - - - - - - cmp x3, x5 +# CHECK-NEXT: 1.00 - - - - - - - - subs x4, x6, xzr +# CHECK-NEXT: 1.00 - - - - - - - - subs x11, x13, x15 +# CHECK-NEXT: 1.00 - - - - - - - - subs x9, x3, xzr, lsl #10 +# CHECK-NEXT: 1.00 - - - - - - - - subs x17, x29, x20, lsl #63 +# CHECK-NEXT: 1.00 - - - - - - - - subs x21, x22, x23, lsr #0 +# CHECK-NEXT: 1.00 - - - - - - - - subs x24, x25, x26, lsr #18 +# CHECK-NEXT: 1.00 - - - - - - - - subs x27, x28, x29, lsr #63 +# CHECK-NEXT: 1.00 - - - - - - - - subs x2, x3, x4, asr #0 +# CHECK-NEXT: 1.00 - - - - - - - - subs x5, x6, x7, asr #21 +# CHECK-NEXT: 1.00 - - - - - - - - subs x8, x9, x10, asr #63 +# CHECK-NEXT: 1.00 - - - - - - - - cmn wzr, w4 +# CHECK-NEXT: 1.00 - - - - - - - - cmn w5, wzr +# CHECK-NEXT: 1.00 - - - - - - - - cmn w6, w7 +# CHECK-NEXT: 1.00 - - - - - - - - cmn w8, w9, lsl #15 +# CHECK-NEXT: 1.00 - - - - - - - - cmn w10, w11, lsl #31 +# CHECK-NEXT: 1.00 - - - - - - - - cmn w12, w13, lsr #0 +# CHECK-NEXT: 1.00 - - - - - - - - cmn w14, w15, lsr #21 +# CHECK-NEXT: 1.00 - - - - - - - - cmn w16, w17, lsr #31 +# CHECK-NEXT: 1.00 - - - - - - - - cmn w18, w19, asr #0 +# CHECK-NEXT: 1.00 - - - - - - - - cmn w20, w21, asr #22 +# CHECK-NEXT: 1.00 - - - - - - - - cmn w22, w23, asr #31 +# CHECK-NEXT: 1.00 - - - - - - - - cmn x0, x3 +# CHECK-NEXT: 1.00 - - - - - - - - cmn xzr, x4 +# CHECK-NEXT: 1.00 - - - - - - - - cmn x5, xzr +# CHECK-NEXT: 1.00 - - - - - - - - cmn x6, x7 +# CHECK-NEXT: 1.00 - - - - - - - - cmn x8, x9, lsl #15 +# CHECK-NEXT: 1.00 - - - - - - - - cmn x10, x11, lsl #63 +# CHECK-NEXT: 1.00 - - - - - - - - cmn x12, x13, lsr #0 +# CHECK-NEXT: 1.00 - - - - - - - - cmn x14, x15, lsr #41 +# CHECK-NEXT: 1.00 - - - - - - - - cmn x16, x17, lsr #63 +# CHECK-NEXT: 1.00 - - - - - - - - cmn x18, x19, asr #0 +# CHECK-NEXT: 1.00 - - - - - - - - cmn x20, x21, asr #55 +# CHECK-NEXT: 1.00 - - - - - - - - cmn x22, x23, asr #63 +# CHECK-NEXT: 1.00 - - - - - - - - cmp w0, w3 +# CHECK-NEXT: 1.00 - - - - - - - - cmp wzr, w4 +# CHECK-NEXT: 1.00 - - - - - - - - cmp w5, wzr +# CHECK-NEXT: 1.00 - - - - - - - - cmp w6, w7 +# CHECK-NEXT: 1.00 - - - - - - - - cmp w8, w9, lsl #15 +# CHECK-NEXT: 1.00 - - - - - - - - cmp w10, w11, lsl #31 +# CHECK-NEXT: 1.00 - - - - - - - - cmp w12, w13, lsr #0 +# CHECK-NEXT: 1.00 - - - - - - - - cmp w14, w15, lsr #21 +# CHECK-NEXT: 1.00 - - - - - - - - cmp w18, w19, asr #0 +# CHECK-NEXT: 1.00 - - - - - - - - cmp w20, w21, asr #22 +# CHECK-NEXT: 1.00 - - - - - - - - cmp w22, w23, asr #31 +# CHECK-NEXT: 1.00 - - - - - - - - cmp x0, x3 +# CHECK-NEXT: 1.00 - - - - - - - - cmp xzr, x4 +# CHECK-NEXT: 1.00 - - - - - - - - cmp x5, xzr +# CHECK-NEXT: 1.00 - - - - - - - - cmp x6, x7 +# CHECK-NEXT: 1.00 - - - - - - - - cmp x8, x9, lsl #15 +# CHECK-NEXT: 1.00 - - - - - - - - cmp x10, x11, lsl #63 +# CHECK-NEXT: 1.00 - - - - - - - - cmp x12, x13, lsr #0 +# CHECK-NEXT: 1.00 - - - - - - - - cmp x14, x15, lsr #41 +# CHECK-NEXT: 1.00 - - - - - - - - cmp x16, x17, lsr #63 +# CHECK-NEXT: 1.00 - - - - - - - - cmp x18, x19, asr #0 +# CHECK-NEXT: 1.00 - - - - - - - - cmp x20, x21, asr #55 +# CHECK-NEXT: 1.00 - - - - - - - - cmp x22, x23, asr #63 +# CHECK-NEXT: 1.00 - - - - - - - - cmp wzr, w0 +# CHECK-NEXT: 1.00 - - - - - - - - cmp xzr, x0 +# CHECK-NEXT: 1.00 - - - - - - - - adc w29, w27, w25 +# CHECK-NEXT: 1.00 - - - - - - - - adc wzr, w3, w4 +# CHECK-NEXT: 1.00 - - - - - - - - adc w9, wzr, w10 +# CHECK-NEXT: 1.00 - - - - - - - - adc w20, w0, wzr +# CHECK-NEXT: 1.00 - - - - - - - - adc x29, x27, x25 +# CHECK-NEXT: 1.00 - - - - - - - - adc xzr, x3, x4 +# CHECK-NEXT: 1.00 - - - - - - - - adc x9, xzr, x10 +# CHECK-NEXT: 1.00 - - - - - - - - adc x20, x0, xzr +# CHECK-NEXT: 1.00 - - - - - - - - adcs w29, w27, w25 +# CHECK-NEXT: 1.00 - - - - - - - - adcs wzr, w3, w4 +# CHECK-NEXT: 1.00 - - - - - - - - adcs w9, wzr, w10 +# CHECK-NEXT: 1.00 - - - - - - - - adcs w20, w0, wzr +# CHECK-NEXT: 1.00 - - - - - - - - adcs x29, x27, x25 +# CHECK-NEXT: 1.00 - - - - - - - - adcs xzr, x3, x4 +# CHECK-NEXT: 1.00 - - - - - - - - adcs x9, xzr, x10 +# CHECK-NEXT: 1.00 - - - - - - - - adcs x20, x0, xzr +# CHECK-NEXT: 1.00 - - - - - - - - sbc w29, w27, w25 +# CHECK-NEXT: 1.00 - - - - - - - - sbc wzr, w3, w4 +# CHECK-NEXT: 1.00 - - - - - - - - ngc w9, w10 +# CHECK-NEXT: 1.00 - - - - - - - - sbc w20, w0, wzr +# CHECK-NEXT: 1.00 - - - - - - - - sbc x29, x27, x25 +# CHECK-NEXT: 1.00 - - - - - - - - sbc xzr, x3, x4 +# CHECK-NEXT: 1.00 - - - - - - - - ngc x9, x10 +# CHECK-NEXT: 1.00 - - - - - - - - sbc x20, x0, xzr +# CHECK-NEXT: 1.00 - - - - - - - - sbcs w29, w27, w25 +# CHECK-NEXT: 1.00 - - - - - - - - sbcs wzr, w3, w4 +# CHECK-NEXT: 1.00 - - - - - - - - ngcs w9, w10 +# CHECK-NEXT: 1.00 - - - - - - - - sbcs w20, w0, wzr +# CHECK-NEXT: 1.00 - - - - - - - - sbcs x29, x27, x25 +# CHECK-NEXT: 1.00 - - - - - - - - sbcs xzr, x3, x4 +# CHECK-NEXT: 1.00 - - - - - - - - ngcs x9, x10 +# CHECK-NEXT: 1.00 - - - - - - - - sbcs x20, x0, xzr +# CHECK-NEXT: 1.00 - - - - - - - - ngc w3, w12 +# CHECK-NEXT: 1.00 - - - - - - - - ngc wzr, w9 +# CHECK-NEXT: 1.00 - - - - - - - - ngc w23, wzr +# CHECK-NEXT: 1.00 - - - - - - - - ngc x29, x30 +# CHECK-NEXT: 1.00 - - - - - - - - ngc xzr, x0 +# CHECK-NEXT: 1.00 - - - - - - - - ngc x0, xzr +# CHECK-NEXT: 1.00 - - - - - - - - ngcs w3, w12 +# CHECK-NEXT: 1.00 - - - - - - - - ngcs wzr, w9 +# CHECK-NEXT: 1.00 - - - - - - - - ngcs w23, wzr +# CHECK-NEXT: 1.00 - - - - - - - - ngcs x29, x30 +# CHECK-NEXT: 1.00 - - - - - - - - ngcs xzr, x0 +# CHECK-NEXT: 1.00 - - - - - - - - ngcs x0, xzr +# CHECK-NEXT: 1.00 - - - - - - - - sbfx x1, x2, #3, #2 +# CHECK-NEXT: 1.00 - - - - - - - - asr x3, x4, #63 +# CHECK-NEXT: 1.00 - - - - - - - - asr wzr, wzr, #31 +# CHECK-NEXT: 1.00 - - - - - - - - sbfx w12, w9, #0, #1 +# CHECK-NEXT: 1.00 - - - - - - - - ubfiz x4, x5, #52, #11 +# CHECK-NEXT: 1.00 - - - - - - - - ubfx xzr, x4, #0, #1 +# CHECK-NEXT: 1.00 - - - - - - - - ubfiz x4, xzr, #1, #6 +# CHECK-NEXT: 1.00 - - - - - - - - lsr x5, x6, #12 +# CHECK-NEXT: 1.00 - - - - - - - - bfi x4, x5, #52, #11 +# CHECK-NEXT: 1.00 - - - - - - - - bfxil xzr, x4, #0, #1 +# CHECK-NEXT: 1.00 - - - - - - - - bfc x4, #1, #6 +# CHECK-NEXT: 1.00 - - - - - - - - bfxil x5, x6, #12, #52 +# CHECK-NEXT: 1.00 - - - - - - - - sxtb w1, w2 +# CHECK-NEXT: 1.00 - - - - - - - - sxtb xzr, w3 +# CHECK-NEXT: 1.00 - - - - - - - - sxth w9, w10 +# CHECK-NEXT: 1.00 - - - - - - - - sxth x0, w1 +# CHECK-NEXT: 1.00 - - - - - - - - sxtw x3, w30 +# CHECK-NEXT: 1.00 - - - - - - - - uxtb w1, w2 +# CHECK-NEXT: 1.00 - - - - - - - - uxth w9, w10 +# CHECK-NEXT: 1.00 - - - - - - - - ubfx x3, x30, #0, #32 +# CHECK-NEXT: 1.00 - - - - - - - - asr w3, w2, #0 +# CHECK-NEXT: 1.00 - - - - - - - - asr w9, w10, #31 +# CHECK-NEXT: 1.00 - - - - - - - - asr x20, x21, #63 +# CHECK-NEXT: 1.00 - - - - - - - - asr w1, wzr, #3 +# CHECK-NEXT: 1.00 - - - - - - - - lsr w3, w2, #0 +# CHECK-NEXT: 1.00 - - - - - - - - lsr w9, w10, #31 +# CHECK-NEXT: 1.00 - - - - - - - - lsr x20, x21, #63 +# CHECK-NEXT: 1.00 - - - - - - - - lsr wzr, wzr, #3 +# CHECK-NEXT: 1.00 - - - - - - - - lsr w3, w2, #0 +# CHECK-NEXT: 1.00 - - - - - - - - lsl w9, w10, #31 +# CHECK-NEXT: 1.00 - - - - - - - - lsl x20, x21, #63 +# CHECK-NEXT: 1.00 - - - - - - - - lsl w1, wzr, #3 +# CHECK-NEXT: 1.00 - - - - - - - - sbfx w9, w10, #0, #1 +# CHECK-NEXT: 1.00 - - - - - - - - sbfiz x2, x3, #63, #1 +# CHECK-NEXT: 1.00 - - - - - - - - asr x19, x20, #0 +# CHECK-NEXT: 1.00 - - - - - - - - sbfiz x9, x10, #5, #59 +# CHECK-NEXT: 1.00 - - - - - - - - asr w9, w10, #0 +# CHECK-NEXT: 1.00 - - - - - - - - sbfiz w11, w12, #31, #1 +# CHECK-NEXT: 1.00 - - - - - - - - sbfiz w13, w14, #29, #3 +# CHECK-NEXT: 1.00 - - - - - - - - sbfiz xzr, xzr, #10, #11 +# CHECK-NEXT: 1.00 - - - - - - - - sbfx w9, w10, #0, #1 +# CHECK-NEXT: 1.00 - - - - - - - - asr x2, x3, #63 +# CHECK-NEXT: 1.00 - - - - - - - - asr x19, x20, #0 +# CHECK-NEXT: 1.00 - - - - - - - - asr x9, x10, #5 +# CHECK-NEXT: 1.00 - - - - - - - - asr w9, w10, #0 +# CHECK-NEXT: 1.00 - - - - - - - - asr w11, w12, #31 +# CHECK-NEXT: 1.00 - - - - - - - - asr w13, w14, #29 +# CHECK-NEXT: 1.00 - - - - - - - - sbfx xzr, xzr, #10, #11 +# CHECK-NEXT: 1.00 - - - - - - - - bfxil w9, w10, #0, #1 +# CHECK-NEXT: 1.00 - - - - - - - - bfi x2, x3, #63, #1 +# CHECK-NEXT: 1.00 - - - - - - - - bfxil x19, x20, #0, #64 +# CHECK-NEXT: 1.00 - - - - - - - - bfi x9, x10, #5, #59 +# CHECK-NEXT: 1.00 - - - - - - - - bfxil w9, w10, #0, #32 +# CHECK-NEXT: 1.00 - - - - - - - - bfi w11, w12, #31, #1 +# CHECK-NEXT: 1.00 - - - - - - - - bfi w13, w14, #29, #3 +# CHECK-NEXT: 1.00 - - - - - - - - bfc xzr, #10, #11 +# CHECK-NEXT: 1.00 - - - - - - - - bfxil w9, w10, #0, #1 +# CHECK-NEXT: 1.00 - - - - - - - - bfxil x2, x3, #63, #1 +# CHECK-NEXT: 1.00 - - - - - - - - bfxil x19, x20, #0, #64 +# CHECK-NEXT: 1.00 - - - - - - - - bfxil x9, x10, #5, #59 +# CHECK-NEXT: 1.00 - - - - - - - - bfxil w9, w10, #0, #32 +# CHECK-NEXT: 1.00 - - - - - - - - bfxil w11, w12, #31, #1 +# CHECK-NEXT: 1.00 - - - - - - - - bfxil w13, w14, #29, #3 +# CHECK-NEXT: 1.00 - - - - - - - - bfxil xzr, xzr, #10, #11 +# CHECK-NEXT: 1.00 - - - - - - - - ubfx w9, w10, #0, #1 +# CHECK-NEXT: 1.00 - - - - - - - - lsl x2, x3, #63 +# CHECK-NEXT: 1.00 - - - - - - - - lsr x19, x20, #0 +# CHECK-NEXT: 1.00 - - - - - - - - lsl x9, x10, #5 +# CHECK-NEXT: 1.00 - - - - - - - - lsr w9, w10, #0 +# CHECK-NEXT: 1.00 - - - - - - - - lsl w11, w12, #31 +# CHECK-NEXT: 1.00 - - - - - - - - lsl w13, w14, #29 +# CHECK-NEXT: 1.00 - - - - - - - - ubfiz xzr, xzr, #10, #11 +# CHECK-NEXT: 1.00 - - - - - - - - ubfx w9, w10, #0, #1 +# CHECK-NEXT: 1.00 - - - - - - - - lsr x2, x3, #63 +# CHECK-NEXT: 1.00 - - - - - - - - lsr x19, x20, #0 +# CHECK-NEXT: 1.00 - - - - - - - - lsr x9, x10, #5 +# CHECK-NEXT: 1.00 - - - - - - - - lsr w9, w10, #0 +# CHECK-NEXT: 1.00 - - - - - - - - lsr w11, w12, #31 +# CHECK-NEXT: 1.00 - - - - - - - - lsr w13, w14, #29 +# CHECK-NEXT: 1.00 - - - - - - - - ubfx xzr, xzr, #10, #11 +# CHECK-NEXT: - 1.00 - - - - - - - cbz w5, #4 +# CHECK-NEXT: - 1.00 - - - - - - - cbz x5, #0 +# CHECK-NEXT: - 1.00 - - - - - - - cbnz x2, #-4 +# CHECK-NEXT: - 1.00 - - - - - - - cbnz x26, #1048572 +# CHECK-NEXT: - 1.00 - - - - - - - cbz wzr, #0 +# CHECK-NEXT: - 1.00 - - - - - - - cbnz xzr, #0 +# CHECK-NEXT: - 1.00 - - - - - - - b.ne #4 +# CHECK-NEXT: - 1.00 - - - - - - - b.ge #1048572 +# CHECK-NEXT: - 1.00 - - - - - - - b.ge #-4 +# CHECK-NEXT: 1.00 - - - - - - - - ccmp w1, #31, #0, eq +# CHECK-NEXT: 1.00 - - - - - - - - ccmp w3, #0, #15, hs +# CHECK-NEXT: 1.00 - - - - - - - - ccmp wzr, #15, #13, hs +# CHECK-NEXT: 1.00 - - - - - - - - ccmp x9, #31, #0, le +# CHECK-NEXT: 1.00 - - - - - - - - ccmp x3, #0, #15, gt +# CHECK-NEXT: 1.00 - - - - - - - - ccmp xzr, #5, #7, ne +# CHECK-NEXT: 1.00 - - - - - - - - ccmn w1, #31, #0, eq +# CHECK-NEXT: 1.00 - - - - - - - - ccmn w3, #0, #15, hs +# CHECK-NEXT: 1.00 - - - - - - - - ccmn wzr, #15, #13, hs +# CHECK-NEXT: 1.00 - - - - - - - - ccmn x9, #31, #0, le +# CHECK-NEXT: 1.00 - - - - - - - - ccmn x3, #0, #15, gt +# CHECK-NEXT: 1.00 - - - - - - - - ccmn xzr, #5, #7, ne +# CHECK-NEXT: 1.00 - - - - - - - - ccmp w1, wzr, #0, eq +# CHECK-NEXT: 1.00 - - - - - - - - ccmp w3, w0, #15, hs +# CHECK-NEXT: 1.00 - - - - - - - - ccmp wzr, w15, #13, hs +# CHECK-NEXT: 1.00 - - - - - - - - ccmp x9, xzr, #0, le +# CHECK-NEXT: 1.00 - - - - - - - - ccmp x3, x0, #15, gt +# CHECK-NEXT: 1.00 - - - - - - - - ccmp xzr, x5, #7, ne +# CHECK-NEXT: 1.00 - - - - - - - - ccmn w1, wzr, #0, eq +# CHECK-NEXT: 1.00 - - - - - - - - ccmn w3, w0, #15, hs +# CHECK-NEXT: 1.00 - - - - - - - - ccmn wzr, w15, #13, hs +# CHECK-NEXT: 1.00 - - - - - - - - ccmn x9, xzr, #0, le +# CHECK-NEXT: 1.00 - - - - - - - - ccmn x3, x0, #15, gt +# CHECK-NEXT: 1.00 - - - - - - - - ccmn xzr, x5, #7, ne +# CHECK-NEXT: 1.00 - - - - - - - - csel w1, w0, w19, ne +# CHECK-NEXT: 1.00 - - - - - - - - csel wzr, w5, w9, eq +# CHECK-NEXT: 1.00 - - - - - - - - csel w9, wzr, w30, gt +# CHECK-NEXT: 1.00 - - - - - - - - csel w1, w28, wzr, mi +# CHECK-NEXT: 1.00 - - - - - - - - csel x19, x23, x29, lt +# CHECK-NEXT: 1.00 - - - - - - - - csel xzr, x3, x4, ge +# CHECK-NEXT: 1.00 - - - - - - - - csel x5, xzr, x6, hs +# CHECK-NEXT: 1.00 - - - - - - - - csel x7, x8, xzr, lo +# CHECK-NEXT: 1.00 - - - - - - - - csinc w1, w0, w19, ne +# CHECK-NEXT: 1.00 - - - - - - - - csinc wzr, w5, w9, eq +# CHECK-NEXT: 1.00 - - - - - - - - csinc w9, wzr, w30, gt +# CHECK-NEXT: 1.00 - - - - - - - - csinc w1, w28, wzr, mi +# CHECK-NEXT: 1.00 - - - - - - - - csinc x19, x23, x29, lt +# CHECK-NEXT: 1.00 - - - - - - - - csinc xzr, x3, x4, ge +# CHECK-NEXT: 1.00 - - - - - - - - csinc x5, xzr, x6, hs +# CHECK-NEXT: 1.00 - - - - - - - - csinc x7, x8, xzr, lo +# CHECK-NEXT: 1.00 - - - - - - - - csinv w1, w0, w19, ne +# CHECK-NEXT: 1.00 - - - - - - - - csinv wzr, w5, w9, eq +# CHECK-NEXT: 1.00 - - - - - - - - csinv w9, wzr, w30, gt +# CHECK-NEXT: 1.00 - - - - - - - - csinv w1, w28, wzr, mi +# CHECK-NEXT: 1.00 - - - - - - - - csinv x19, x23, x29, lt +# CHECK-NEXT: 1.00 - - - - - - - - csinv xzr, x3, x4, ge +# CHECK-NEXT: 1.00 - - - - - - - - csinv x5, xzr, x6, hs +# CHECK-NEXT: 1.00 - - - - - - - - csinv x7, x8, xzr, lo +# CHECK-NEXT: 1.00 - - - - - - - - csneg w1, w0, w19, ne +# CHECK-NEXT: 1.00 - - - - - - - - csneg wzr, w5, w9, eq +# CHECK-NEXT: 1.00 - - - - - - - - csneg w9, wzr, w30, gt +# CHECK-NEXT: 1.00 - - - - - - - - csneg w1, w28, wzr, mi +# CHECK-NEXT: 1.00 - - - - - - - - csneg x19, x23, x29, lt +# CHECK-NEXT: 1.00 - - - - - - - - csneg xzr, x3, x4, ge +# CHECK-NEXT: 1.00 - - - - - - - - csneg x5, xzr, x6, hs +# CHECK-NEXT: 1.00 - - - - - - - - csneg x7, x8, xzr, lo +# CHECK-NEXT: 1.00 - - - - - - - - cset w3, eq +# CHECK-NEXT: 1.00 - - - - - - - - cset x9, pl +# CHECK-NEXT: 1.00 - - - - - - - - csetm w20, ne +# CHECK-NEXT: 1.00 - - - - - - - - csetm x30, ge +# CHECK-NEXT: 1.00 - - - - - - - - csinc w2, wzr, wzr, al +# CHECK-NEXT: 1.00 - - - - - - - - csinv x3, xzr, xzr, nv +# CHECK-NEXT: 1.00 - - - - - - - - cinc w3, w5, gt +# CHECK-NEXT: 1.00 - - - - - - - - cinc wzr, w4, le +# CHECK-NEXT: 1.00 - - - - - - - - cset w9, lt +# CHECK-NEXT: 1.00 - - - - - - - - cinc x3, x5, gt +# CHECK-NEXT: 1.00 - - - - - - - - cinc xzr, x4, le +# CHECK-NEXT: 1.00 - - - - - - - - cset x9, lt +# CHECK-NEXT: 1.00 - - - - - - - - csinc w5, w6, w6, nv +# CHECK-NEXT: 1.00 - - - - - - - - csinc x1, x2, x2, al +# CHECK-NEXT: 1.00 - - - - - - - - cinv w3, w5, gt +# CHECK-NEXT: 1.00 - - - - - - - - cinv wzr, w4, le +# CHECK-NEXT: 1.00 - - - - - - - - csetm w9, lt +# CHECK-NEXT: 1.00 - - - - - - - - cinv x3, x5, gt +# CHECK-NEXT: 1.00 - - - - - - - - cinv xzr, x4, le +# CHECK-NEXT: 1.00 - - - - - - - - csetm x9, lt +# CHECK-NEXT: 1.00 - - - - - - - - csinv x1, x0, x0, al +# CHECK-NEXT: 1.00 - - - - - - - - csinv w9, w8, w8, nv +# CHECK-NEXT: 1.00 - - - - - - - - cneg w3, w5, gt +# CHECK-NEXT: 1.00 - - - - - - - - cneg wzr, w4, le +# CHECK-NEXT: 1.00 - - - - - - - - cneg w9, wzr, lt +# CHECK-NEXT: 1.00 - - - - - - - - cneg x3, x5, gt +# CHECK-NEXT: 1.00 - - - - - - - - cneg xzr, x4, le +# CHECK-NEXT: 1.00 - - - - - - - - cneg x9, xzr, lt +# CHECK-NEXT: 1.00 - - - - - - - - csneg x4, x8, x8, al +# CHECK-NEXT: 1.00 - - - - - - - - csinv w9, w8, w8, nv +# CHECK-NEXT: 1.00 - - - - - - - - rbit w0, w7 +# CHECK-NEXT: 1.00 - - - - - - - - rbit x18, x3 +# CHECK-NEXT: 1.00 - - - - - - - - rev16 w17, w1 +# CHECK-NEXT: 1.00 - - - - - - - - rev16 x5, x2 +# CHECK-NEXT: 1.00 - - - - - - - - rev w18, w0 +# CHECK-NEXT: 1.00 - - - - - - - - rev32 x20, x1 +# CHECK-NEXT: 1.00 - - - - - - - - rev x22, x2 +# CHECK-NEXT: 1.00 - - - - - - - - clz w24, w3 +# CHECK-NEXT: 1.00 - - - - - - - - clz x26, x4 +# CHECK-NEXT: 1.00 - - - - - - - - cls w3, w5 +# CHECK-NEXT: 1.00 - - - - - - - - cls x20, x5 +# CHECK-NEXT: - - 12.00 - - - - - - udiv w0, w7, w10 +# CHECK-NEXT: - - 20.00 - - - - - - udiv x9, x22, x4 +# CHECK-NEXT: - - 12.00 - - - - - - sdiv w12, w21, w0 +# CHECK-NEXT: - - 20.00 - - - - - - sdiv x13, x2, x1 +# CHECK-NEXT: 1.00 - - - - - - - - lsl w11, w12, w13 +# CHECK-NEXT: 1.00 - - - - - - - - lsl x14, x15, x16 +# CHECK-NEXT: 1.00 - - - - - - - - lsr w17, w18, w19 +# CHECK-NEXT: 1.00 - - - - - - - - lsr x20, x21, x22 +# CHECK-NEXT: 1.00 - - - - - - - - asr w23, w24, w25 +# CHECK-NEXT: 1.00 - - - - - - - - asr x26, x27, x28 +# CHECK-NEXT: 1.00 - - - - - - - - ror w0, w1, w2 +# CHECK-NEXT: 1.00 - - - - - - - - ror x3, x4, x5 +# CHECK-NEXT: 1.00 - - - - - - - - lsl w6, w7, w8 +# CHECK-NEXT: 1.00 - - - - - - - - lsl x9, x10, x11 +# CHECK-NEXT: 1.00 - - - - - - - - lsr w12, w13, w14 +# CHECK-NEXT: 1.00 - - - - - - - - lsr x15, x16, x17 +# CHECK-NEXT: 1.00 - - - - - - - - asr w18, w19, w20 +# CHECK-NEXT: 1.00 - - - - - - - - asr x21, x22, x23 +# CHECK-NEXT: 1.00 - - - - - - - - ror w24, w25, w26 +# CHECK-NEXT: 1.00 - - - - - - - - ror x27, x28, x29 +# CHECK-NEXT: - - - - 2.00 - - - - smulh x30, x29, x28 +# CHECK-NEXT: - - - - 2.00 - - - - smulh xzr, x27, x26 +# CHECK-NEXT: - - - - 2.00 - - - - umulh x30, x29, x28 +# CHECK-NEXT: - - - - 2.00 - - - - umulh x23, x30, xzr +# CHECK-NEXT: - - - - 1.00 - - - - madd w1, w3, w7, w4 +# CHECK-NEXT: - - - - 1.00 - - - - madd wzr, w0, w9, w11 +# CHECK-NEXT: - - - - 1.00 - - - - madd w13, wzr, w4, w4 +# CHECK-NEXT: - - - - 1.00 - - - - madd w19, w30, wzr, w29 +# CHECK-NEXT: - - - - 1.00 - - - - mul w4, w5, w6 +# CHECK-NEXT: - - - - 2.00 - - - - madd x1, x3, x7, x4 +# CHECK-NEXT: - - - - 2.00 - - - - madd xzr, x0, x9, x11 +# CHECK-NEXT: - - - - 2.00 - - - - madd x13, xzr, x4, x4 +# CHECK-NEXT: - - - - 2.00 - - - - madd x19, x30, xzr, x29 +# CHECK-NEXT: - - - - 2.00 - - - - mul x4, x5, x6 +# CHECK-NEXT: - - - - 1.00 - - - - msub w1, w3, w7, w4 +# CHECK-NEXT: - - - - 1.00 - - - - msub wzr, w0, w9, w11 +# CHECK-NEXT: - - - - 1.00 - - - - msub w13, wzr, w4, w4 +# CHECK-NEXT: - - - - 1.00 - - - - msub w19, w30, wzr, w29 +# CHECK-NEXT: - - - - 1.00 - - - - mneg w4, w5, w6 +# CHECK-NEXT: - - - - 2.00 - - - - msub x1, x3, x7, x4 +# CHECK-NEXT: - - - - 2.00 - - - - msub xzr, x0, x9, x11 +# CHECK-NEXT: - - - - 2.00 - - - - msub x13, xzr, x4, x4 +# CHECK-NEXT: - - - - 2.00 - - - - msub x19, x30, xzr, x29 +# CHECK-NEXT: - - - - 2.00 - - - - mneg x4, x5, x6 +# CHECK-NEXT: - - - - 1.00 - - - - smaddl x3, w5, w2, x9 +# CHECK-NEXT: - - - - 1.00 - - - - smaddl xzr, w10, w11, x12 +# CHECK-NEXT: - - - - 1.00 - - - - smaddl x13, wzr, w14, x15 +# CHECK-NEXT: - - - - 1.00 - - - - smaddl x16, w17, wzr, x18 +# CHECK-NEXT: - - - - 1.00 - - - - smull x19, w20, w21 +# CHECK-NEXT: - - - - 1.00 - - - - smsubl x3, w5, w2, x9 +# CHECK-NEXT: - - - - 1.00 - - - - smsubl xzr, w10, w11, x12 +# CHECK-NEXT: - - - - 1.00 - - - - smsubl x13, wzr, w14, x15 +# CHECK-NEXT: - - - - 1.00 - - - - smsubl x16, w17, wzr, x18 +# CHECK-NEXT: - - - - 1.00 - - - - smnegl x19, w20, w21 +# CHECK-NEXT: - - - - 1.00 - - - - umaddl x3, w5, w2, x9 +# CHECK-NEXT: - - - - 1.00 - - - - umaddl xzr, w10, w11, x12 +# CHECK-NEXT: - - - - 1.00 - - - - umaddl x13, wzr, w14, x15 +# CHECK-NEXT: - - - - 1.00 - - - - umaddl x16, w17, wzr, x18 +# CHECK-NEXT: - - - - 1.00 - - - - umull x19, w20, w21 +# CHECK-NEXT: - - - - 1.00 - - - - umsubl x3, w5, w2, x9 +# CHECK-NEXT: - - - - 1.00 - - - - umsubl x16, w17, wzr, x18 +# CHECK-NEXT: - - - - 1.00 - - - - umnegl x19, w20, w21 +# CHECK-NEXT: - - - - 2.00 - - - - smulh x30, x29, x28 +# CHECK-NEXT: - - - - 2.00 - - - - smulh x23, x22, xzr +# CHECK-NEXT: - - - - 2.00 - - - - umulh x23, x22, xzr +# CHECK-NEXT: - - - - 2.00 - - - - mul x19, x20, xzr +# CHECK-NEXT: - - - - 1.00 - - - - mneg w21, w22, w23 +# CHECK-NEXT: - - - - 1.00 - - - - smull x11, w13, w17 +# CHECK-NEXT: - - - - 1.00 - - - - umull x11, w13, w17 +# CHECK-NEXT: - - - - 1.00 - - - - smnegl x11, w13, w17 +# CHECK-NEXT: - - - - 1.00 - - - - umnegl x11, w13, w17 +# CHECK-NEXT: 1.00 - - - - - - - - extr w3, w5, w7, #0 +# CHECK-NEXT: 1.00 - - - - - - - - extr w11, w13, w17, #31 +# CHECK-NEXT: 1.00 - - - - - - - - extr x3, x5, x7, #15 +# CHECK-NEXT: 1.00 - - - - - - - - extr x11, x13, x17, #63 +# CHECK-NEXT: 1.00 - - - - - - - - ror x19, x23, #24 +# CHECK-NEXT: 1.00 - - - - - - - - ror x29, xzr, #63 +# CHECK-NEXT: 1.00 - - - - - - - - ror w9, w13, #31 +# CHECK-NEXT: - - - - - - 1.00 - - fcmp s3, s5 +# CHECK-NEXT: - - - - - - 1.00 - - fcmp s31, #0.0 +# CHECK-NEXT: - - - - - - 1.00 - - fcmp s31, #0.0 +# CHECK-NEXT: - - - - - - 1.00 - - fcmpe s29, s30 +# CHECK-NEXT: - - - - - - 1.00 - - fcmpe s15, #0.0 +# CHECK-NEXT: - - - - - - 1.00 - - fcmpe s15, #0.0 +# CHECK-NEXT: - - - - - - 1.00 - - fcmp d4, d12 +# CHECK-NEXT: - - - - - - 1.00 - - fcmp d23, #0.0 +# CHECK-NEXT: - - - - - - 1.00 - - fcmp d23, #0.0 +# CHECK-NEXT: - - - - - - 1.00 - - fcmpe d26, d22 +# CHECK-NEXT: - - - - - - 1.00 - - fcmpe d29, #0.0 +# CHECK-NEXT: - - - - - - 1.00 - - fcmpe d29, #0.0 +# CHECK-NEXT: - - - - - - 1.00 - - fccmp s1, s31, #0, eq +# CHECK-NEXT: - - - - - - 1.00 - - fccmp s3, s0, #15, hs +# CHECK-NEXT: - - - - - - 1.00 - - fccmp s31, s15, #13, hs +# CHECK-NEXT: - - - - - - 1.00 - - fccmp d9, d31, #0, le +# CHECK-NEXT: - - - - - - 1.00 - - fccmp d3, d0, #15, gt +# CHECK-NEXT: - - - - - - 1.00 - - fccmp d31, d5, #7, ne +# CHECK-NEXT: - - - - - - 1.00 - - fccmpe s1, s31, #0, eq +# CHECK-NEXT: - - - - - - 1.00 - - fccmpe s3, s0, #15, hs +# CHECK-NEXT: - - - - - - 1.00 - - fccmpe s31, s15, #13, hs +# CHECK-NEXT: - - - - - - 1.00 - - fccmpe d9, d31, #0, le +# CHECK-NEXT: - - - - - - 1.00 - - fccmpe d3, d0, #15, gt +# CHECK-NEXT: - - - - - - 1.00 - - fccmpe d31, d5, #7, ne +# CHECK-NEXT: - - - - - - 1.00 - - fcsel s3, s20, s9, pl +# CHECK-NEXT: - - - - - - 1.00 - - fcsel d9, d10, d11, mi +# CHECK-NEXT: - - - - - - 1.00 - - fmov s0, s1 +# CHECK-NEXT: - - - - - - 1.00 - - fabs s2, s3 +# CHECK-NEXT: - - - - - - 1.00 - - fneg s4, s5 +# CHECK-NEXT: - - - - - - - - 29.00 fsqrt s6, s7 +# CHECK-NEXT: - - - - - - 1.00 - - fcvt d8, s9 +# CHECK-NEXT: - - - - - - 1.00 - - fcvt h10, s11 +# CHECK-NEXT: - - - - - - 1.00 - - frintn s12, s13 +# CHECK-NEXT: - - - - - - 1.00 - - frintp s14, s15 +# CHECK-NEXT: - - - - - - 1.00 - - frintm s16, s17 +# CHECK-NEXT: - - - - - - 1.00 - - frintz s18, s19 +# CHECK-NEXT: - - - - - - 1.00 - - frinta s20, s21 +# CHECK-NEXT: - - - - - - 1.00 - - frintx s22, s23 +# CHECK-NEXT: - - - - - - 1.00 - - frinti s24, s25 +# CHECK-NEXT: - - - - - - 1.00 - - fmov d0, d1 +# CHECK-NEXT: - - - - - - 1.00 - - fabs d2, d3 +# CHECK-NEXT: - - - - - - 1.00 - - fneg d4, d5 +# CHECK-NEXT: - - - - - - - - 29.00 fsqrt d6, d7 +# CHECK-NEXT: - - - - - - 1.00 - - fcvt s8, d9 +# CHECK-NEXT: - - - - - - 1.00 - - fcvt h10, d11 +# CHECK-NEXT: - - - - - - 1.00 - - frintn d12, d13 +# CHECK-NEXT: - - - - - - 1.00 - - frintp d14, d15 +# CHECK-NEXT: - - - - - - 1.00 - - frintm d16, d17 +# CHECK-NEXT: - - - - - - 1.00 - - frintz d18, d19 +# CHECK-NEXT: - - - - - - 1.00 - - frinta d20, d21 +# CHECK-NEXT: - - - - - - 1.00 - - frintx d22, d23 +# CHECK-NEXT: - - - - - - 1.00 - - frinti d24, d25 +# CHECK-NEXT: - - - - - - 1.00 - - fcvt s26, h27 +# CHECK-NEXT: - - - - - - 1.00 - - fcvt d28, h29 +# CHECK-NEXT: - - - - - - - 1.00 - fmul s20, s19, s17 +# CHECK-NEXT: - - - - - - - - 10.00 fdiv s1, s2, s3 +# CHECK-NEXT: - - - - - - 1.00 - - fadd s4, s5, s6 +# CHECK-NEXT: - - - - - - 1.00 - - fsub s7, s8, s9 +# CHECK-NEXT: - - - - - - 1.00 - - fmax s10, s11, s12 +# CHECK-NEXT: - - - - - - 1.00 - - fmin s13, s14, s15 +# CHECK-NEXT: - - - - - - 1.00 - - fmaxnm s16, s17, s18 +# CHECK-NEXT: - - - - - - 1.00 - - fminnm s19, s20, s21 +# CHECK-NEXT: - - - - - - - 1.00 - fnmul s22, s23, s2 +# CHECK-NEXT: - - - - - - - 1.00 - fmul d20, d19, d17 +# CHECK-NEXT: - - - - - - - - 19.00 fdiv d1, d2, d3 +# CHECK-NEXT: - - - - - - 1.00 - - fadd d4, d5, d6 +# CHECK-NEXT: - - - - - - 1.00 - - fsub d7, d8, d9 +# CHECK-NEXT: - - - - - - 1.00 - - fmax d10, d11, d12 +# CHECK-NEXT: - - - - - - 1.00 - - fmin d13, d14, d15 +# CHECK-NEXT: - - - - - - 1.00 - - fmaxnm d16, d17, d18 +# CHECK-NEXT: - - - - - - 1.00 - - fminnm d19, d20, d21 +# CHECK-NEXT: - - - - - - - 1.00 - fnmul d22, d23, d24 +# CHECK-NEXT: - - - - - - - 1.00 - fmadd s3, s5, s6, s31 +# CHECK-NEXT: - - - - - - - 1.00 - fmadd d3, d13, d0, d23 +# CHECK-NEXT: - - - - - - - 1.00 - fmsub s3, s5, s6, s31 +# CHECK-NEXT: - - - - - - - 1.00 - fmsub d3, d13, d0, d23 +# CHECK-NEXT: - - - - - - - 1.00 - fnmadd s3, s5, s6, s31 +# CHECK-NEXT: - - - - - - - 1.00 - fnmadd d3, d13, d0, d23 +# CHECK-NEXT: - - - - - - - 1.00 - fnmsub s3, s5, s6, s31 +# CHECK-NEXT: - - - - - - - 1.00 - fnmsub d3, d13, d0, d23 +# CHECK-NEXT: - - - - - - 1.00 - - fcvtzs w3, h5, #1 +# CHECK-NEXT: - - - - - - 1.00 - - fcvtzs wzr, h20, #13 +# CHECK-NEXT: - - - - - - 1.00 - - fcvtzs w19, h0, #32 +# CHECK-NEXT: - - - - - - 1.00 - - fcvtzs x3, h5, #1 +# CHECK-NEXT: - - - - - - 1.00 - - fcvtzs x12, h30, #45 +# CHECK-NEXT: - - - - - - 1.00 - - fcvtzs x19, h0, #64 +# CHECK-NEXT: - - - - - - 1.00 - - fcvtzs w3, s5, #1 +# CHECK-NEXT: - - - - - - 1.00 - - fcvtzs wzr, s20, #13 +# CHECK-NEXT: - - - - - - 1.00 - - fcvtzs w19, s0, #32 +# CHECK-NEXT: - - - - - - 1.00 - - fcvtzs x3, s5, #1 +# CHECK-NEXT: - - - - - - 1.00 - - fcvtzs x12, s30, #45 +# CHECK-NEXT: - - - - - - 1.00 - - fcvtzs x19, s0, #64 +# CHECK-NEXT: - - - - - - 1.00 - - fcvtzs w3, d5, #1 +# CHECK-NEXT: - - - - - - 1.00 - - fcvtzs wzr, d20, #13 +# CHECK-NEXT: - - - - - - 1.00 - - fcvtzs w19, d0, #32 +# CHECK-NEXT: - - - - - - 1.00 - - fcvtzs x3, d5, #1 +# CHECK-NEXT: - - - - - - 1.00 - - fcvtzs x12, d30, #45 +# CHECK-NEXT: - - - - - - 1.00 - - fcvtzs x19, d0, #64 +# CHECK-NEXT: - - - - - - 1.00 - - fcvtzu w3, h5, #1 +# CHECK-NEXT: - - - - - - 1.00 - - fcvtzu wzr, h20, #13 +# CHECK-NEXT: - - - - - - 1.00 - - fcvtzu w19, h0, #32 +# CHECK-NEXT: - - - - - - 1.00 - - fcvtzu x3, h5, #1 +# CHECK-NEXT: - - - - - - 1.00 - - fcvtzu x12, h30, #45 +# CHECK-NEXT: - - - - - - 1.00 - - fcvtzu x19, h0, #64 +# CHECK-NEXT: - - - - - - 1.00 - - fcvtzu w3, s5, #1 +# CHECK-NEXT: - - - - - - 1.00 - - fcvtzu wzr, s20, #13 +# CHECK-NEXT: - - - - - - 1.00 - - fcvtzu w19, s0, #32 +# CHECK-NEXT: - - - - - - 1.00 - - fcvtzu x3, s5, #1 +# CHECK-NEXT: - - - - - - 1.00 - - fcvtzu x12, s30, #45 +# CHECK-NEXT: - - - - - - 1.00 - - fcvtzu x19, s0, #64 +# CHECK-NEXT: - - - - - - 1.00 - - fcvtzu w3, d5, #1 +# CHECK-NEXT: - - - - - - 1.00 - - fcvtzu wzr, d20, #13 +# CHECK-NEXT: - - - - - - 1.00 - - fcvtzu w19, d0, #32 +# CHECK-NEXT: - - - - - - 1.00 - - fcvtzu x3, d5, #1 +# CHECK-NEXT: - - - - - - 1.00 - - fcvtzu x12, d30, #45 +# CHECK-NEXT: - - - - - - 1.00 - - fcvtzu x19, d0, #64 +# CHECK-NEXT: - - - - - - 1.00 - - scvtf h23, w19, #1 +# CHECK-NEXT: - - - - - - 1.00 - - scvtf h31, wzr, #20 +# CHECK-NEXT: - - - - - - 1.00 - - scvtf h14, w0, #32 +# CHECK-NEXT: - - - - - - 1.00 - - scvtf h23, x19, #1 +# CHECK-NEXT: - - - - - - 1.00 - - scvtf h31, xzr, #20 +# CHECK-NEXT: - - - - - - 1.00 - - scvtf h14, x0, #64 +# CHECK-NEXT: - - - - - - 1.00 - - scvtf s23, w19, #1 +# CHECK-NEXT: - - - - - - 1.00 - - scvtf s31, wzr, #20 +# CHECK-NEXT: - - - - - - 1.00 - - scvtf s14, w0, #32 +# CHECK-NEXT: - - - - - - 1.00 - - scvtf s23, x19, #1 +# CHECK-NEXT: - - - - - - 1.00 - - scvtf s31, xzr, #20 +# CHECK-NEXT: - - - - - - 1.00 - - scvtf s14, x0, #64 +# CHECK-NEXT: - - - - - - 1.00 - - scvtf d23, w19, #1 +# CHECK-NEXT: - - - - - - 1.00 - - scvtf d31, wzr, #20 +# CHECK-NEXT: - - - - - - 1.00 - - scvtf d14, w0, #32 +# CHECK-NEXT: - - - - - - 1.00 - - scvtf d23, x19, #1 +# CHECK-NEXT: - - - - - - 1.00 - - scvtf d31, xzr, #20 +# CHECK-NEXT: - - - - - - 1.00 - - scvtf d14, x0, #64 +# CHECK-NEXT: - - - - - - 1.00 - - ucvtf h23, w19, #1 +# CHECK-NEXT: - - - - - - 1.00 - - ucvtf h31, wzr, #20 +# CHECK-NEXT: - - - - - - 1.00 - - ucvtf h14, w0, #32 +# CHECK-NEXT: - - - - - - 1.00 - - ucvtf h23, x19, #1 +# CHECK-NEXT: - - - - - - 1.00 - - ucvtf h31, xzr, #20 +# CHECK-NEXT: - - - - - - 1.00 - - ucvtf h14, x0, #64 +# CHECK-NEXT: - - - - - - 1.00 - - ucvtf s23, w19, #1 +# CHECK-NEXT: - - - - - - 1.00 - - ucvtf s31, wzr, #20 +# CHECK-NEXT: - - - - - - 1.00 - - ucvtf s14, w0, #32 +# CHECK-NEXT: - - - - - - 1.00 - - ucvtf s23, x19, #1 +# CHECK-NEXT: - - - - - - 1.00 - - ucvtf s31, xzr, #20 +# CHECK-NEXT: - - - - - - 1.00 - - ucvtf s14, x0, #64 +# CHECK-NEXT: - - - - - - 1.00 - - ucvtf d23, w19, #1 +# CHECK-NEXT: - - - - - - 1.00 - - ucvtf d31, wzr, #20 +# CHECK-NEXT: - - - - - - 1.00 - - ucvtf d14, w0, #32 +# CHECK-NEXT: - - - - - - 1.00 - - ucvtf d23, x19, #1 +# CHECK-NEXT: - - - - - - 1.00 - - ucvtf d31, xzr, #20 +# CHECK-NEXT: - - - - - - 1.00 - - ucvtf d14, x0, #64 +# CHECK-NEXT: - - - - - - 1.00 - - fcvtns w3, h31 +# CHECK-NEXT: - - - - - - 1.00 - - fcvtns xzr, h12 +# CHECK-NEXT: - - - - - - 1.00 - - fcvtnu wzr, h12 +# CHECK-NEXT: - - - - - - 1.00 - - fcvtnu x0, h0 +# CHECK-NEXT: - - - - - - 1.00 - - fcvtps wzr, h9 +# CHECK-NEXT: - - - - - - 1.00 - - fcvtps x12, h20 +# CHECK-NEXT: - - - - - - 1.00 - - fcvtpu w30, h23 +# CHECK-NEXT: - - - - - - 1.00 - - fcvtpu x29, h3 +# CHECK-NEXT: - - - - - - 1.00 - - fcvtms w2, h3 +# CHECK-NEXT: - - - - - - 1.00 - - fcvtms x4, h5 +# CHECK-NEXT: - - - - - - 1.00 - - fcvtmu w6, h7 +# CHECK-NEXT: - - - - - - 1.00 - - fcvtmu x8, h9 +# CHECK-NEXT: - - - - - - 1.00 - - fcvtzs w10, h11 +# CHECK-NEXT: - - - - - - 1.00 - - fcvtzs x12, h13 +# CHECK-NEXT: - - - - - - 1.00 - - fcvtzu w14, h15 +# CHECK-NEXT: - - - - - - 1.00 - - fcvtzu x15, h16 +# CHECK-NEXT: - - - - - - 1.00 - - scvtf h17, w18 +# CHECK-NEXT: - - - - - - 1.00 - - scvtf h19, x20 +# CHECK-NEXT: - - - - - - 1.00 - - ucvtf h21, w22 +# CHECK-NEXT: - - - - - - 1.00 - - scvtf h23, x24 +# CHECK-NEXT: - - - - - - 1.00 - - fcvtas w25, h26 +# CHECK-NEXT: - - - - - - 1.00 - - fcvtas x27, h28 +# CHECK-NEXT: - - - - - - 1.00 - - fcvtau w29, h30 +# CHECK-NEXT: - - - - - - 1.00 - - fcvtau xzr, h0 +# CHECK-NEXT: - - - - - - 1.00 - - fcvtns w3, s31 +# CHECK-NEXT: - - - - - - 1.00 - - fcvtns xzr, s12 +# CHECK-NEXT: - - - - - - 1.00 - - fcvtnu wzr, s12 +# CHECK-NEXT: - - - - - - 1.00 - - fcvtnu x0, s0 +# CHECK-NEXT: - - - - - - 1.00 - - fcvtps wzr, s9 +# CHECK-NEXT: - - - - - - 1.00 - - fcvtps x12, s20 +# CHECK-NEXT: - - - - - - 1.00 - - fcvtpu w30, s23 +# CHECK-NEXT: - - - - - - 1.00 - - fcvtpu x29, s3 +# CHECK-NEXT: - - - - - - 1.00 - - fcvtms w2, s3 +# CHECK-NEXT: - - - - - - 1.00 - - fcvtms x4, s5 +# CHECK-NEXT: - - - - - - 1.00 - - fcvtmu w6, s7 +# CHECK-NEXT: - - - - - - 1.00 - - fcvtmu x8, s9 +# CHECK-NEXT: - - - - - - 1.00 - - fcvtzs w10, s11 +# CHECK-NEXT: - - - - - - 1.00 - - fcvtzs x12, s13 +# CHECK-NEXT: - - - - - - 1.00 - - fcvtzu w14, s15 +# CHECK-NEXT: - - - - - - 1.00 - - fcvtzu x15, s16 +# CHECK-NEXT: - - - - - - 1.00 - - scvtf s17, w18 +# CHECK-NEXT: - - - - - - 1.00 - - scvtf s19, x20 +# CHECK-NEXT: - - - - - - 1.00 - - ucvtf s21, w22 +# CHECK-NEXT: - - - - - - 1.00 - - scvtf s23, x24 +# CHECK-NEXT: - - - - - - 1.00 - - fcvtas w25, s26 +# CHECK-NEXT: - - - - - - 1.00 - - fcvtas x27, s28 +# CHECK-NEXT: - - - - - - 1.00 - - fcvtau w29, s30 +# CHECK-NEXT: - - - - - - 1.00 - - fcvtau xzr, s0 +# CHECK-NEXT: - - - - - - 1.00 - - fcvtns w3, d31 +# CHECK-NEXT: - - - - - - 1.00 - - fcvtns xzr, d12 +# CHECK-NEXT: - - - - - - 1.00 - - fcvtnu wzr, d12 +# CHECK-NEXT: - - - - - - 1.00 - - fcvtnu x0, d0 +# CHECK-NEXT: - - - - - - 1.00 - - fcvtps wzr, d9 +# CHECK-NEXT: - - - - - - 1.00 - - fcvtps x12, d20 +# CHECK-NEXT: - - - - - - 1.00 - - fcvtpu w30, d23 +# CHECK-NEXT: - - - - - - 1.00 - - fcvtpu x29, d3 +# CHECK-NEXT: - - - - - - 1.00 - - fcvtms w2, d3 +# CHECK-NEXT: - - - - - - 1.00 - - fcvtms x4, d5 +# CHECK-NEXT: - - - - - - 1.00 - - fcvtmu w6, d7 +# CHECK-NEXT: - - - - - - 1.00 - - fcvtmu x8, d9 +# CHECK-NEXT: - - - - - - 1.00 - - fcvtzs w10, d11 +# CHECK-NEXT: - - - - - - 1.00 - - fcvtzs x12, d13 +# CHECK-NEXT: - - - - - - 1.00 - - fcvtzu w14, d15 +# CHECK-NEXT: - - - - - - 1.00 - - fcvtzu x15, d16 +# CHECK-NEXT: - - - - - - 1.00 - - scvtf d17, w18 +# CHECK-NEXT: - - - - - - 1.00 - - scvtf d19, x20 +# CHECK-NEXT: - - - - - - 1.00 - - ucvtf d21, w22 +# CHECK-NEXT: - - - - - - 1.00 - - ucvtf d23, x24 +# CHECK-NEXT: - - - - - - 1.00 - - fcvtas w25, d26 +# CHECK-NEXT: - - - - - - 1.00 - - fcvtas x27, d28 +# CHECK-NEXT: - - - - - - 1.00 - - fcvtau w29, d30 +# CHECK-NEXT: - - - - - - 1.00 - - fcvtau xzr, d0 +# CHECK-NEXT: - - - - - - 1.00 - - fmov w3, s9 +# CHECK-NEXT: - - - - - - 1.00 - - fmov s9, w3 +# CHECK-NEXT: - - - - - - 1.00 - - fmov x20, d31 +# CHECK-NEXT: - - - - - - 1.00 - - fmov d1, x15 +# CHECK-NEXT: - - - - - - 1.00 - - fmov x3, v12.d[1] +# CHECK-NEXT: - - - - - - 1.00 - - fmov v1.d[1], x19 +# CHECK-NEXT: - - - - - - 1.00 - - fmov s2, #0.12500000 +# CHECK-NEXT: - - - - - - 1.00 - - fmov s3, #1.00000000 +# CHECK-NEXT: - - - - - - 1.00 - - fmov d30, #16.00000000 +# CHECK-NEXT: - - - - - - 1.00 - - fmov s4, #1.06250000 +# CHECK-NEXT: - - - - - - 1.00 - - fmov d10, #1.93750000 +# CHECK-NEXT: - - - - - - 1.00 - - fmov s12, #-1.00000000 +# CHECK-NEXT: - - - - - - 1.00 - - fmov d16, #8.50000000 +# CHECK-NEXT: - - - 1.00 - - - - - ldr w3, #0 +# CHECK-NEXT: - - - 1.00 - - - - - ldr x29, #4 +# CHECK-NEXT: - - - 1.00 - - - - - ldrsw xzr, #-4 +# CHECK-NEXT: - - - 1.00 - - - - - ldr s0, #8 +# CHECK-NEXT: - - - 1.00 - - - - - ldr d0, #1048572 +# CHECK-NEXT: - - - 1.00 - - - - - ldr q0, #-1048576 +# CHECK-NEXT: - - - 1.00 - - - - - prfm pldl1strm, #0 +# CHECK-NEXT: - - - 1.00 - - - - - prfm #22, #0 +# CHECK-NEXT: - - - 2.00 - - - - - stxrb w18, w8, [sp] +# CHECK-NEXT: - - - 2.00 - - - - - stxrh w24, w15, [x16] +# CHECK-NEXT: - - - 2.00 - - - - - stxr w5, w6, [x17] +# CHECK-NEXT: - - - 2.00 - - - - - stxr w1, x10, [x21] +# CHECK-NEXT: - - - 1.00 - - - - - ldxrb w30, [x0] +# CHECK-NEXT: - - - 1.00 - - - - - ldxrh w17, [x4] +# CHECK-NEXT: - - - 1.00 - - - - - ldxr w22, [sp] +# CHECK-NEXT: - - - 1.00 - - - - - ldxr x11, [x29] +# CHECK-NEXT: - - - 1.00 - - - - - ldxr x11, [x29] +# CHECK-NEXT: - - - 1.00 - - - - - ldxr x11, [x29] +# CHECK-NEXT: - - - 2.00 - - - - - stxp w12, w11, w10, [sp] +# CHECK-NEXT: - - - 2.00 - - - - - stxp wzr, x27, x9, [x12] +# CHECK-NEXT: - - - 2.00 - - - - - ldxp w0, wzr, [sp] +# CHECK-NEXT: - - - 2.00 - - - - - ldxp x17, x0, [x18] +# CHECK-NEXT: - - - 2.00 - - - - - ldxp x17, x0, [x18] +# CHECK-NEXT: - - - 2.00 - - - - - stlxrb w12, w22, [x0] +# CHECK-NEXT: - - - 2.00 - - - - - stlxrh w10, w1, [x1] +# CHECK-NEXT: - - - 2.00 - - - - - stlxr w9, w2, [x2] +# CHECK-NEXT: - - - 2.00 - - - - - stlxr w9, x3, [sp] +# CHECK-NEXT: - - - 1.00 - - - - - ldaxrb w8, [x4] +# CHECK-NEXT: - - - 1.00 - - - - - ldaxrh w7, [x5] +# CHECK-NEXT: - - - 1.00 - - - - - ldaxr w6, [sp] +# CHECK-NEXT: - - - 1.00 - - - - - ldaxr x5, [x6] +# CHECK-NEXT: - - - 1.00 - - - - - ldaxr x5, [x6] +# CHECK-NEXT: - - - 1.00 - - - - - ldaxr x5, [x6] +# CHECK-NEXT: - - - 2.00 - - - - - stlxp w4, w5, w6, [sp] +# CHECK-NEXT: - - - 2.00 - - - - - stlxp wzr, x6, x7, [x1] +# CHECK-NEXT: - - - 2.00 - - - - - ldaxp w5, w18, [sp] +# CHECK-NEXT: - - - 2.00 - - - - - ldaxp x6, x19, [x22] +# CHECK-NEXT: - - - 2.00 - - - - - ldaxp x6, x19, [x22] +# CHECK-NEXT: - - - 1.00 - - - - - stlrb w24, [sp] +# CHECK-NEXT: - - - 1.00 - - - - - stlrh w25, [x30] +# CHECK-NEXT: - - - 1.00 - - - - - stlr w26, [x29] +# CHECK-NEXT: - - - 1.00 - - - - - stlr x27, [x28] +# CHECK-NEXT: - - - 1.00 - - - - - stlr x27, [x28] +# CHECK-NEXT: - - - 1.00 - - - - - stlr x27, [x28] +# CHECK-NEXT: - - - 1.00 - - - - - ldarb w23, [sp] +# CHECK-NEXT: - - - 1.00 - - - - - ldarh w22, [x30] +# CHECK-NEXT: - - - 1.00 - - - - - ldar wzr, [x29] +# CHECK-NEXT: - - - 1.00 - - - - - ldar x21, [x28] +# CHECK-NEXT: - - - 1.00 - - - - - ldar x21, [x28] +# CHECK-NEXT: - - - 1.00 - - - - - ldar x21, [x28] +# CHECK-NEXT: - - - 1.00 - - - - - sturb w9, [sp] +# CHECK-NEXT: - - - 1.00 - - - - - sturh wzr, [x12, #255] +# CHECK-NEXT: - - - 1.00 - - - - - stur w16, [x0, #-256] +# CHECK-NEXT: - - - 1.00 - - - - - stur x28, [x14, #1] +# CHECK-NEXT: - - - 1.00 - - - - - ldurb w1, [x20, #255] +# CHECK-NEXT: - - - 1.00 - - - - - ldurh w20, [x1, #255] +# CHECK-NEXT: - - - 1.00 - - - - - ldur w12, [sp, #255] +# CHECK-NEXT: - - - 1.00 - - - - - ldur xzr, [x12, #255] +# CHECK-NEXT: - - - 1.00 - - - - - ldursb x9, [x7, #-256] +# CHECK-NEXT: - - - 1.00 - - - - - ldursh x17, [x19, #-256] +# CHECK-NEXT: - - - 1.00 - - - - - ldursw x20, [x15, #-256] +# CHECK-NEXT: - - - 1.00 - - - - - prfum pldl2keep, [sp, #-256] +# CHECK-NEXT: - - - 1.00 - - - - - ldursb w19, [x1, #-256] +# CHECK-NEXT: - - - 1.00 - - - - - ldursh w15, [x21, #-256] +# CHECK-NEXT: - - - 1.00 - - - - - stur b0, [sp, #1] +# CHECK-NEXT: - - - 1.00 - - - - - stur h12, [x12, #-1] +# CHECK-NEXT: - - - 1.00 - - - - - stur s15, [x0, #255] +# CHECK-NEXT: - - - 1.00 - - - - - stur d31, [x5, #25] +# CHECK-NEXT: - - - 1.00 - - - - - stur q9, [x5] +# CHECK-NEXT: - - - 1.00 - - - - - ldur b3, [sp] +# CHECK-NEXT: - - - 1.00 - - - - - ldur h5, [x4, #-256] +# CHECK-NEXT: - - - 1.00 - - - - - ldur s7, [x12, #-1] +# CHECK-NEXT: - - - 1.00 - - - - - ldur d11, [x19, #4] +# CHECK-NEXT: - - - 1.00 - - - - - ldur q13, [x1, #2] +# CHECK-NEXT: - - - 1.00 - - - - - strb w9, [x2], #255 +# CHECK-NEXT: - - - 1.00 - - - - - strb w10, [x3], #1 +# CHECK-NEXT: - - - 1.00 - - - - - strb w10, [x3], #-256 +# CHECK-NEXT: - - - 1.00 - - - - - strh w9, [x2], #255 +# CHECK-NEXT: - - - 1.00 - - - - - strh w9, [x2], #1 +# CHECK-NEXT: - - - 1.00 - - - - - strh w10, [x3], #-256 +# CHECK-NEXT: - - - 1.00 - - - - - str w19, [sp], #255 +# CHECK-NEXT: - - - 1.00 - - - - - str w20, [x30], #1 +# CHECK-NEXT: - - - 1.00 - - - - - str w21, [x12], #-256 +# CHECK-NEXT: - - - 1.00 - - - - - str xzr, [x9], #255 +# CHECK-NEXT: - - - 1.00 - - - - - str x2, [x3], #1 +# CHECK-NEXT: - - - 1.00 - - - - - str x19, [x12], #-256 +# CHECK-NEXT: - - - 1.00 - - - - - ldrb w9, [x2], #255 +# CHECK-NEXT: - - - 1.00 - - - - - ldrb w10, [x3], #1 +# CHECK-NEXT: - - - 1.00 - - - - - ldrb w10, [x3], #-256 +# CHECK-NEXT: - - - 1.00 - - - - - ldrh w9, [x2], #255 +# CHECK-NEXT: - - - 1.00 - - - - - ldrh w9, [x2], #1 +# CHECK-NEXT: - - - 1.00 - - - - - ldrh w10, [x3], #-256 +# CHECK-NEXT: - - - 1.00 - - - - - ldr w19, [sp], #255 +# CHECK-NEXT: - - - 1.00 - - - - - ldr w20, [x30], #1 +# CHECK-NEXT: - - - 1.00 - - - - - ldr w21, [x12], #-256 +# CHECK-NEXT: - - - 1.00 - - - - - ldr xzr, [x9], #255 +# CHECK-NEXT: - - - 1.00 - - - - - ldr x2, [x3], #1 +# CHECK-NEXT: - - - 1.00 - - - - - ldr x19, [x12], #-256 +# CHECK-NEXT: - - - 1.00 - - - - - ldrsb xzr, [x9], #255 +# CHECK-NEXT: - - - 1.00 - - - - - ldrsb x2, [x3], #1 +# CHECK-NEXT: - - - 1.00 - - - - - ldrsb x19, [x12], #-256 +# CHECK-NEXT: - - - 1.00 - - - - - ldrsh xzr, [x9], #255 +# CHECK-NEXT: - - - 1.00 - - - - - ldrsh x2, [x3], #1 +# CHECK-NEXT: - - - 1.00 - - - - - ldrsh x19, [x12], #-256 +# CHECK-NEXT: - - - 1.00 - - - - - ldrsw xzr, [x9], #255 +# CHECK-NEXT: - - - 1.00 - - - - - ldrsw x2, [x3], #1 +# CHECK-NEXT: - - - 1.00 - - - - - ldrsw x19, [x12], #-256 +# CHECK-NEXT: - - - 1.00 - - - - - ldrsb wzr, [x9], #255 +# CHECK-NEXT: - - - 1.00 - - - - - ldrsb w2, [x3], #1 +# CHECK-NEXT: - - - 1.00 - - - - - ldrsb w19, [x12], #-256 +# CHECK-NEXT: - - - 1.00 - - - - - ldrsh wzr, [x9], #255 +# CHECK-NEXT: - - - 1.00 - - - - - ldrsh w2, [x3], #1 +# CHECK-NEXT: - - - 1.00 - - - - - ldrsh w19, [x12], #-256 +# CHECK-NEXT: - - - 1.00 - - - - - str b0, [x0], #255 +# CHECK-NEXT: - - - 1.00 - - - - - str b3, [x3], #1 +# CHECK-NEXT: - - - 1.00 - - - - - str b5, [sp], #-256 +# CHECK-NEXT: - - - 1.00 - - - - - str h10, [x10], #255 +# CHECK-NEXT: - - - 1.00 - - - - - str h13, [x23], #1 +# CHECK-NEXT: - - - 1.00 - - - - - str h15, [sp], #-256 +# CHECK-NEXT: - - - 1.00 - - - - - str s20, [x20], #255 +# CHECK-NEXT: - - - 1.00 - - - - - str s23, [x23], #1 +# CHECK-NEXT: - - - 1.00 - - - - - str s25, [x0], #-256 +# CHECK-NEXT: - - - 1.00 - - - - - str d20, [x20], #255 +# CHECK-NEXT: - - - 1.00 - - - - - str d23, [x23], #1 +# CHECK-NEXT: - - - 1.00 - - - - - str d25, [x0], #-256 +# CHECK-NEXT: - - - 1.00 - - - - - ldr b0, [x0], #255 +# CHECK-NEXT: - - - 1.00 - - - - - ldr b3, [x3], #1 +# CHECK-NEXT: - - - 1.00 - - - - - ldr b5, [sp], #-256 +# CHECK-NEXT: - - - 1.00 - - - - - ldr h10, [x10], #255 +# CHECK-NEXT: - - - 1.00 - - - - - ldr h13, [x23], #1 +# CHECK-NEXT: - - - 1.00 - - - - - ldr h15, [sp], #-256 +# CHECK-NEXT: - - - 1.00 - - - - - ldr s20, [x20], #255 +# CHECK-NEXT: - - - 1.00 - - - - - ldr s23, [x23], #1 +# CHECK-NEXT: - - - 1.00 - - - - - ldr s25, [x0], #-256 +# CHECK-NEXT: - - - 1.00 - - - - - ldr d20, [x20], #255 +# CHECK-NEXT: - - - 1.00 - - - - - ldr d23, [x23], #1 +# CHECK-NEXT: - - - 1.00 - - - - - ldr d25, [x0], #-256 +# CHECK-NEXT: - - - 1.00 - - - - - ldr q20, [x1], #255 +# CHECK-NEXT: - - - 1.00 - - - - - ldr q23, [x9], #1 +# CHECK-NEXT: - - - 1.00 - - - - - ldr q25, [x20], #-256 +# CHECK-NEXT: - - - 1.00 - - - - - str q10, [x1], #255 +# CHECK-NEXT: - - - 1.00 - - - - - str q22, [sp], #1 +# CHECK-NEXT: - - - 1.00 - - - - - str q21, [x20], #-256 +# CHECK-NEXT: - - - 1.00 - - - - - ldr x3, [x4, #0]! +# CHECK-NEXT: - - - 1.00 - - - - - strb w9, [x2, #255]! +# CHECK-NEXT: - - - 1.00 - - - - - strb w10, [x3, #1]! +# CHECK-NEXT: - - - 1.00 - - - - - strb w10, [x3, #-256]! +# CHECK-NEXT: - - - 1.00 - - - - - strh w9, [x2, #255]! +# CHECK-NEXT: - - - 1.00 - - - - - strh w9, [x2, #1]! +# CHECK-NEXT: - - - 1.00 - - - - - strh w10, [x3, #-256]! +# CHECK-NEXT: - - - 1.00 - - - - - str w19, [sp, #255]! +# CHECK-NEXT: - - - 1.00 - - - - - str w20, [x30, #1]! +# CHECK-NEXT: - - - 1.00 - - - - - str w21, [x12, #-256]! +# CHECK-NEXT: - - - 1.00 - - - - - str xzr, [x9, #255]! +# CHECK-NEXT: - - - 1.00 - - - - - str x2, [x3, #1]! +# CHECK-NEXT: - - - 1.00 - - - - - str x19, [x12, #-256]! +# CHECK-NEXT: - - - 1.00 - - - - - ldrb w9, [x2, #255]! +# CHECK-NEXT: - - - 1.00 - - - - - ldrb w10, [x3, #1]! +# CHECK-NEXT: - - - 1.00 - - - - - ldrb w10, [x3, #-256]! +# CHECK-NEXT: - - - 1.00 - - - - - ldrh w9, [x2, #255]! +# CHECK-NEXT: - - - 1.00 - - - - - ldrh w9, [x2, #1]! +# CHECK-NEXT: - - - 1.00 - - - - - ldrh w10, [x3, #-256]! +# CHECK-NEXT: - - - 1.00 - - - - - ldr w19, [sp, #255]! +# CHECK-NEXT: - - - 1.00 - - - - - ldr w20, [x30, #1]! +# CHECK-NEXT: - - - 1.00 - - - - - ldr w21, [x12, #-256]! +# CHECK-NEXT: - - - 1.00 - - - - - ldr xzr, [x9, #255]! +# CHECK-NEXT: - - - 1.00 - - - - - ldr x2, [x3, #1]! +# CHECK-NEXT: - - - 1.00 - - - - - ldr x19, [x12, #-256]! +# CHECK-NEXT: - - - 1.00 - - - - - ldrsb xzr, [x9, #255]! +# CHECK-NEXT: - - - 1.00 - - - - - ldrsb x2, [x3, #1]! +# CHECK-NEXT: - - - 1.00 - - - - - ldrsb x19, [x12, #-256]! +# CHECK-NEXT: - - - 1.00 - - - - - ldrsh xzr, [x9, #255]! +# CHECK-NEXT: - - - 1.00 - - - - - ldrsh x2, [x3, #1]! +# CHECK-NEXT: - - - 1.00 - - - - - ldrsh x19, [x12, #-256]! +# CHECK-NEXT: - - - 1.00 - - - - - ldrsw xzr, [x9, #255]! +# CHECK-NEXT: - - - 1.00 - - - - - ldrsw x2, [x3, #1]! +# CHECK-NEXT: - - - 1.00 - - - - - ldrsw x19, [x12, #-256]! +# CHECK-NEXT: - - - 1.00 - - - - - ldrsb wzr, [x9, #255]! +# CHECK-NEXT: - - - 1.00 - - - - - ldrsb w2, [x3, #1]! +# CHECK-NEXT: - - - 1.00 - - - - - ldrsb w19, [x12, #-256]! +# CHECK-NEXT: - - - 1.00 - - - - - ldrsh wzr, [x9, #255]! +# CHECK-NEXT: - - - 1.00 - - - - - ldrsh w2, [x3, #1]! +# CHECK-NEXT: - - - 1.00 - - - - - ldrsh w19, [x12, #-256]! +# CHECK-NEXT: - - - 1.00 - - - - - str b0, [x0, #255]! +# CHECK-NEXT: - - - 1.00 - - - - - str b3, [x3, #1]! +# CHECK-NEXT: - - - 1.00 - - - - - str b5, [sp, #-256]! +# CHECK-NEXT: - - - 1.00 - - - - - str h10, [x10, #255]! +# CHECK-NEXT: - - - 1.00 - - - - - str h13, [x23, #1]! +# CHECK-NEXT: - - - 1.00 - - - - - str h15, [sp, #-256]! +# CHECK-NEXT: - - - 1.00 - - - - - str s20, [x20, #255]! +# CHECK-NEXT: - - - 1.00 - - - - - str s23, [x23, #1]! +# CHECK-NEXT: - - - 1.00 - - - - - str s25, [x0, #-256]! +# CHECK-NEXT: - - - 1.00 - - - - - str d20, [x20, #255]! +# CHECK-NEXT: - - - 1.00 - - - - - str d23, [x23, #1]! +# CHECK-NEXT: - - - 1.00 - - - - - str d25, [x0, #-256]! +# CHECK-NEXT: - - - 1.00 - - - - - ldr b0, [x0, #255]! +# CHECK-NEXT: - - - 1.00 - - - - - ldr b3, [x3, #1]! +# CHECK-NEXT: - - - 1.00 - - - - - ldr b5, [sp, #-256]! +# CHECK-NEXT: - - - 1.00 - - - - - ldr h10, [x10, #255]! +# CHECK-NEXT: - - - 1.00 - - - - - ldr h13, [x23, #1]! +# CHECK-NEXT: - - - 1.00 - - - - - ldr h15, [sp, #-256]! +# CHECK-NEXT: - - - 1.00 - - - - - ldr s20, [x20, #255]! +# CHECK-NEXT: - - - 1.00 - - - - - ldr s23, [x23, #1]! +# CHECK-NEXT: - - - 1.00 - - - - - ldr s25, [x0, #-256]! +# CHECK-NEXT: - - - 1.00 - - - - - ldr d20, [x20, #255]! +# CHECK-NEXT: - - - 1.00 - - - - - ldr d23, [x23, #1]! +# CHECK-NEXT: - - - 1.00 - - - - - ldr d25, [x0, #-256]! +# CHECK-NEXT: - - - 1.00 - - - - - ldr q20, [x1, #255]! +# CHECK-NEXT: - - - 1.00 - - - - - ldr q23, [x9, #1]! +# CHECK-NEXT: - - - 1.00 - - - - - ldr q25, [x20, #-256]! +# CHECK-NEXT: - - - 1.00 - - - - - str q10, [x1, #255]! +# CHECK-NEXT: - - - 1.00 - - - - - str q22, [sp, #1]! +# CHECK-NEXT: - - - 1.00 - - - - - str q21, [x20, #-256]! +# CHECK-NEXT: - - - 1.00 - - - - - sttrb w9, [sp] +# CHECK-NEXT: - - - 1.00 - - - - - sttrh wzr, [x12, #255] +# CHECK-NEXT: - - - 1.00 - - - - - sttr w16, [x0, #-256] +# CHECK-NEXT: - - - 1.00 - - - - - sttr x28, [x14, #1] +# CHECK-NEXT: - - - 1.00 - - - - - ldtrb w1, [x20, #255] +# CHECK-NEXT: - - - 1.00 - - - - - ldtrh w20, [x1, #255] +# CHECK-NEXT: - - - 1.00 - - - - - ldtr w12, [sp, #255] +# CHECK-NEXT: - - - 1.00 - - - - - ldtr xzr, [x12, #255] +# CHECK-NEXT: - - - 1.00 - - - - - ldtrsb x9, [x7, #-256] +# CHECK-NEXT: - - - 1.00 - - - - - ldtrsh x17, [x19, #-256] +# CHECK-NEXT: - - - 1.00 - - - - - ldtrsw x20, [x15, #-256] +# CHECK-NEXT: - - - 1.00 - - - - - ldtrsb w19, [x1, #-256] +# CHECK-NEXT: - - - 1.00 - - - - - ldtrsh w15, [x21, #-256] +# CHECK-NEXT: - - - 1.00 - - - - - ldr x4, [x29] +# CHECK-NEXT: - - - 1.00 - - - - - ldr x30, [x12, #32760] +# CHECK-NEXT: - - - 1.00 - - - - - ldr x20, [sp, #8] +# CHECK-NEXT: - - - 1.00 - - - - - ldr xzr, [sp] +# CHECK-NEXT: - - - 1.00 - - - - - ldr w2, [sp] +# CHECK-NEXT: - - - 1.00 - - - - - ldr w17, [sp, #16380] +# CHECK-NEXT: - - - 1.00 - - - - - ldr w13, [x2, #4] +# CHECK-NEXT: - - - 1.00 - - - - - ldrsw x2, [x5, #4] +# CHECK-NEXT: - - - 1.00 - - - - - ldrsw x23, [sp, #16380] +# CHECK-NEXT: - - - 1.00 - - - - - ldrh w2, [x4] +# CHECK-NEXT: - - - 1.00 - - - - - ldrsh w23, [x6, #8190] +# CHECK-NEXT: - - - 1.00 - - - - - ldrsh wzr, [sp, #2] +# CHECK-NEXT: - - - 1.00 - - - - - ldrsh x29, [x2, #2] +# CHECK-NEXT: - - - 1.00 - - - - - ldrb w26, [x3, #121] +# CHECK-NEXT: - - - 1.00 - - - - - ldrb w12, [x2] +# CHECK-NEXT: - - - 1.00 - - - - - ldrsb w27, [sp, #4095] +# CHECK-NEXT: - - - 1.00 - - - - - ldrsb xzr, [x15] +# CHECK-NEXT: - - - 1.00 - - - - - str x30, [sp] +# CHECK-NEXT: - - - 1.00 - - - - - str w20, [x4, #16380] +# CHECK-NEXT: - - - 1.00 - - - - - strh w17, [sp, #8190] +# CHECK-NEXT: - - - 1.00 - - - - - strb w23, [x3, #4095] +# CHECK-NEXT: - - - 1.00 - - - - - strb wzr, [x2] +# CHECK-NEXT: - - - 1.00 - - - - - ldr b31, [sp, #4095] +# CHECK-NEXT: - - - 1.00 - - - - - ldr h20, [x2, #8190] +# CHECK-NEXT: - - - 1.00 - - - - - ldr s10, [x19, #16380] +# CHECK-NEXT: - - - 1.00 - - - - - ldr d3, [x10, #32760] +# CHECK-NEXT: - - - 1.00 - - - - - str q12, [sp, #65520] +# CHECK-NEXT: - - - 1.00 - - - - - ldrb w3, [sp, x5] +# CHECK-NEXT: - - - 1.00 - - - - - ldrb w9, [x27, x6] +# CHECK-NEXT: - - - 1.00 - - - - - ldrsb w10, [x30, x7] +# CHECK-NEXT: - - - 1.00 - - - - - ldrb w11, [x29, x3, sxtx] +# CHECK-NEXT: - - - 1.00 - - - - - strb w12, [x28, xzr, sxtx] +# CHECK-NEXT: - - - 1.00 - - - - - ldrb w14, [x26, w6, uxtw] +# CHECK-NEXT: - - - 1.00 - - - - - ldrsb w15, [x25, w7, uxtw] +# CHECK-NEXT: - - - 1.00 - - - - - ldrb w17, [x23, w9, sxtw] +# CHECK-NEXT: - - - 1.00 - - - - - ldrsb x18, [x22, w10, sxtw] +# CHECK-NEXT: - - - 1.00 - - - - - ldrsh w3, [sp, x5] +# CHECK-NEXT: - - - 1.00 - - - - - ldrsh w9, [x27, x6] +# CHECK-NEXT: - - - 1.00 - - - - - ldrh w10, [x30, x7, lsl #1] +# CHECK-NEXT: - - - 1.00 - - - - - strh w11, [x29, x3, sxtx] +# CHECK-NEXT: - - - 1.00 - - - - - ldrh w12, [x28, xzr, sxtx] +# CHECK-NEXT: - - - 1.00 - - - - - ldrsh x13, [x27, x5, sxtx #1] +# CHECK-NEXT: - - - 1.00 - - - - - ldrh w14, [x26, w6, uxtw] +# CHECK-NEXT: - - - 1.00 - - - - - ldrh w15, [x25, w7, uxtw] +# CHECK-NEXT: - - - 1.00 - - - - - ldrsh w16, [x24, w8, uxtw #1] +# CHECK-NEXT: - - - 1.00 - - - - - ldrh w17, [x23, w9, sxtw] +# CHECK-NEXT: - - - 1.00 - - - - - ldrh w18, [x22, w10, sxtw] +# CHECK-NEXT: - - - 1.00 - - - - - strh w19, [x21, wzr, sxtw #1] +# CHECK-NEXT: - - - 1.00 - - - - - ldr w3, [sp, x5] +# CHECK-NEXT: - - - 1.00 - - - - - ldr s9, [x27, x6] +# CHECK-NEXT: - - - 1.00 - - - - - ldr w10, [x30, x7, lsl #2] +# CHECK-NEXT: - - - 1.00 - - - - - ldr w11, [x29, x3, sxtx] +# CHECK-NEXT: - - - 1.00 - - - - - str s12, [x28, xzr, sxtx] +# CHECK-NEXT: - - - 1.00 - - - - - str w13, [x27, x5, sxtx #2] +# CHECK-NEXT: - - - 1.00 - - - - - str w14, [x26, w6, uxtw] +# CHECK-NEXT: - - - 1.00 - - - - - ldr w15, [x25, w7, uxtw] +# CHECK-NEXT: - - - 1.00 - - - - - ldr w16, [x24, w8, uxtw #2] +# CHECK-NEXT: - - - 1.00 - - - - - ldrsw x17, [x23, w9, sxtw] +# CHECK-NEXT: - - - 1.00 - - - - - ldr w18, [x22, w10, sxtw] +# CHECK-NEXT: - - - 1.00 - - - - - ldrsw x19, [x21, wzr, sxtw #2] +# CHECK-NEXT: - - - 1.00 - - - - - ldr x3, [sp, x5] +# CHECK-NEXT: - - - 1.00 - - - - - str x9, [x27, x6] +# CHECK-NEXT: - - - 1.00 - - - - - ldr d10, [x30, x7, lsl #3] +# CHECK-NEXT: - - - 1.00 - - - - - str x11, [x29, x3, sxtx] +# CHECK-NEXT: - - - 1.00 - - - - - ldr x12, [x28, xzr, sxtx] +# CHECK-NEXT: - - - 1.00 - - - - - ldr x13, [x27, x5, sxtx #3] +# CHECK-NEXT: - - - 1.00 - - - - - prfm pldl1keep, [x26, w6, uxtw] +# CHECK-NEXT: - - - 1.00 - - - - - ldr x15, [x25, w7, uxtw] +# CHECK-NEXT: - - - 1.00 - - - - - ldr x16, [x24, w8, uxtw #3] +# CHECK-NEXT: - - - 1.00 - - - - - ldr x17, [x23, w9, sxtw] +# CHECK-NEXT: - - - 1.00 - - - - - ldr x18, [x22, w10, sxtw] +# CHECK-NEXT: - - - 1.00 - - - - - str d19, [x21, wzr, sxtw #3] +# CHECK-NEXT: - - - 1.00 - - - - - ldr q3, [sp, x5] +# CHECK-NEXT: - - - 1.00 - - - - - ldr q9, [x27, x6] +# CHECK-NEXT: - - - 1.00 - - - - - ldr q10, [x30, x7, lsl #4] +# CHECK-NEXT: - - - 1.00 - - - - - str q11, [x29, x3, sxtx] +# CHECK-NEXT: - - - 1.00 - - - - - str q12, [x28, xzr, sxtx] +# CHECK-NEXT: - - - 1.00 - - - - - str q13, [x27, x5, sxtx #4] +# CHECK-NEXT: - - - 1.00 - - - - - ldr q14, [x26, w6, uxtw] +# CHECK-NEXT: - - - 1.00 - - - - - ldr q15, [x25, w7, uxtw] +# CHECK-NEXT: - - - 1.00 - - - - - ldr q16, [x24, w8, uxtw #4] +# CHECK-NEXT: - - - 1.00 - - - - - ldr q17, [x23, w9, sxtw] +# CHECK-NEXT: - - - 1.00 - - - - - str q18, [x22, w10, sxtw] +# CHECK-NEXT: - - - 1.00 - - - - - ldr q19, [x21, wzr, sxtw #4] +# CHECK-NEXT: - - - 2.00 - - - - - ldp w3, w5, [sp] +# CHECK-NEXT: - - - 1.00 - - - - - stp wzr, w9, [sp, #252] +# CHECK-NEXT: - - - 2.00 - - - - - ldp w2, wzr, [sp, #-256] +# CHECK-NEXT: - - - 2.00 - - - - - ldp w9, w10, [sp, #4] +# CHECK-NEXT: - - - 2.00 - - - - - ldpsw x9, x10, [sp, #4] +# CHECK-NEXT: - - - 2.00 - - - - - ldpsw x9, x10, [x2, #-256] +# CHECK-NEXT: - - - 2.00 - - - - - ldpsw x20, x30, [sp, #252] +# CHECK-NEXT: - - - 2.00 - - - - - ldp x21, x29, [x2, #504] +# CHECK-NEXT: - - - 2.00 - - - - - ldp x22, x23, [x3, #-512] +# CHECK-NEXT: - - - 2.00 - - - - - ldp x24, x25, [x4, #8] +# CHECK-NEXT: - - - 2.00 - - - - - ldp s29, s28, [sp, #252] +# CHECK-NEXT: - - - 1.00 - - - - - stp s27, s26, [sp, #-256] +# CHECK-NEXT: - - - 2.00 - - - - - ldp s1, s2, [x3, #44] +# CHECK-NEXT: - - - 1.00 - - - - - stp d3, d5, [x9, #504] +# CHECK-NEXT: - - - 1.00 - - - - - stp d7, d11, [x10, #-512] +# CHECK-NEXT: - - - 2.00 - - - - - ldp d2, d3, [x30, #-8] +# CHECK-NEXT: - - - 1.00 - - - - - stp q3, q5, [sp] +# CHECK-NEXT: - - - 1.00 - - - - - stp q17, q19, [sp, #1008] +# CHECK-NEXT: - - - 2.00 - - - - - ldp q23, q29, [x1, #-1024] +# CHECK-NEXT: - - - 2.00 - - - - - ldp w3, w5, [sp], #0 +# CHECK-NEXT: - - - 1.00 - - - - - stp wzr, w9, [sp], #252 +# CHECK-NEXT: - - - 2.00 - - - - - ldp w2, wzr, [sp], #-256 +# CHECK-NEXT: - - - 2.00 - - - - - ldp w9, w10, [sp], #4 +# CHECK-NEXT: - - - 2.00 - - - - - ldpsw x9, x10, [sp], #4 +# CHECK-NEXT: - - - 2.00 - - - - - ldpsw x9, x10, [x2], #-256 +# CHECK-NEXT: - - - 2.00 - - - - - ldpsw x20, x30, [sp], #252 +# CHECK-NEXT: - - - 2.00 - - - - - ldp x21, x29, [x2], #504 +# CHECK-NEXT: - - - 2.00 - - - - - ldp x22, x23, [x3], #-512 +# CHECK-NEXT: - - - 2.00 - - - - - ldp x24, x25, [x4], #8 +# CHECK-NEXT: - - - 2.00 - - - - - ldp s29, s28, [sp], #252 +# CHECK-NEXT: - - - 1.00 - - - - - stp s27, s26, [sp], #-256 +# CHECK-NEXT: - - - 2.00 - - - - - ldp s1, s2, [x3], #44 +# CHECK-NEXT: - - - 1.00 - - - - - stp d3, d5, [x9], #504 +# CHECK-NEXT: - - - 1.00 - - - - - stp d7, d11, [x10], #-512 +# CHECK-NEXT: - - - 2.00 - - - - - ldp d2, d3, [x30], #-8 +# CHECK-NEXT: - - - 1.00 - - - - - stp q3, q5, [sp], #0 +# CHECK-NEXT: - - - 1.00 - - - - - stp q17, q19, [sp], #1008 +# CHECK-NEXT: - - - 2.00 - - - - - ldp q23, q29, [x1], #-1024 +# CHECK-NEXT: - - - 2.00 - - - - - ldp w3, w5, [sp, #0]! +# CHECK-NEXT: - - - 1.00 - - - - - stp wzr, w9, [sp, #252]! +# CHECK-NEXT: - - - 2.00 - - - - - ldp w2, wzr, [sp, #-256]! +# CHECK-NEXT: - - - 2.00 - - - - - ldp w9, w10, [sp, #4]! +# CHECK-NEXT: - - - 2.00 - - - - - ldpsw x9, x10, [sp, #4]! +# CHECK-NEXT: - - - 2.00 - - - - - ldpsw x9, x10, [x2, #-256]! +# CHECK-NEXT: - - - 2.00 - - - - - ldpsw x20, x30, [sp, #252]! +# CHECK-NEXT: - - - 2.00 - - - - - ldp x21, x29, [x2, #504]! +# CHECK-NEXT: - - - 2.00 - - - - - ldp x22, x23, [x3, #-512]! +# CHECK-NEXT: - - - 2.00 - - - - - ldp x24, x25, [x4, #8]! +# CHECK-NEXT: - - - 2.00 - - - - - ldp s29, s28, [sp, #252]! +# CHECK-NEXT: - - - 1.00 - - - - - stp s27, s26, [sp, #-256]! +# CHECK-NEXT: - - - 2.00 - - - - - ldp s1, s2, [x3, #44]! +# CHECK-NEXT: - - - 1.00 - - - - - stp d3, d5, [x9, #504]! +# CHECK-NEXT: - - - 1.00 - - - - - stp d7, d11, [x10, #-512]! +# CHECK-NEXT: - - - 2.00 - - - - - ldp d2, d3, [x30, #-8]! +# CHECK-NEXT: - - - 1.00 - - - - - stp q3, q5, [sp, #0]! +# CHECK-NEXT: - - - 1.00 - - - - - stp q17, q19, [sp, #1008]! +# CHECK-NEXT: - - - 2.00 - - - - - ldp q23, q29, [x1, #-1024]! +# CHECK-NEXT: - - - 2.00 - - - - - ldnp w3, w5, [sp] +# CHECK-NEXT: - - - 1.00 - - - - - stnp wzr, w9, [sp, #252] +# CHECK-NEXT: - - - 2.00 - - - - - ldnp w2, wzr, [sp, #-256] +# CHECK-NEXT: - - - 2.00 - - - - - ldnp w9, w10, [sp, #4] +# CHECK-NEXT: - - - 2.00 - - - - - ldnp x21, x29, [x2, #504] +# CHECK-NEXT: - - - 2.00 - - - - - ldnp x22, x23, [x3, #-512] +# CHECK-NEXT: - - - 2.00 - - - - - ldnp x24, x25, [x4, #8] +# CHECK-NEXT: - - - 2.00 - - - - - ldnp s29, s28, [sp, #252] +# CHECK-NEXT: - - - 1.00 - - - - - stnp s27, s26, [sp, #-256] +# CHECK-NEXT: - - - 2.00 - - - - - ldnp s1, s2, [x3, #44] +# CHECK-NEXT: - - - 1.00 - - - - - stnp d3, d5, [x9, #504] +# CHECK-NEXT: - - - 1.00 - - - - - stnp d7, d11, [x10, #-512] +# CHECK-NEXT: - - - 2.00 - - - - - ldnp d2, d3, [x30, #-8] +# CHECK-NEXT: - - - 1.00 - - - - - stnp q3, q5, [sp] +# CHECK-NEXT: - - - 1.00 - - - - - stnp q17, q19, [sp, #1008] +# CHECK-NEXT: - - - 2.00 - - - - - ldnp q23, q29, [x1, #-1024] +# CHECK-NEXT: 1.00 - - - - - - - - mov w3, #983055 +# CHECK-NEXT: 1.00 - - - - - - - - mov x10, #-6148914691236517206 +# CHECK-NEXT: 1.00 - - - - - - - - and w12, w23, w21 +# CHECK-NEXT: 1.00 - - - - - - - - and w16, w15, w1, lsl #1 +# CHECK-NEXT: 1.00 - - - - - - - - and w9, w4, w10, lsl #31 +# CHECK-NEXT: 1.00 - - - - - - - - and w3, w30, w11 +# CHECK-NEXT: 1.00 - - - - - - - - and x3, x5, x7, lsl #63 +# CHECK-NEXT: 1.00 - - - - - - - - and x5, x14, x19, asr #4 +# CHECK-NEXT: 1.00 - - - - - - - - and w3, w17, w19, ror #31 +# CHECK-NEXT: 1.00 - - - - - - - - and w0, w2, wzr, lsr #17 +# CHECK-NEXT: 1.00 - - - - - - - - and w3, w30, w11, asr #2 +# CHECK-NEXT: 1.00 - - - - - - - - and xzr, x4, x26 +# CHECK-NEXT: 1.00 - - - - - - - - and w3, wzr, w20, ror #2 +# CHECK-NEXT: 1.00 - - - - - - - - and x7, x20, xzr, asr #63 +# CHECK-NEXT: 1.00 - - - - - - - - bic x13, x20, x14, lsl #47 +# CHECK-NEXT: 1.00 - - - - - - - - bic w2, w7, w9 +# CHECK-NEXT: 1.00 - - - - - - - - orr w2, w7, w0, asr #31 +# CHECK-NEXT: 1.00 - - - - - - - - orr x8, x9, x10, lsl #12 +# CHECK-NEXT: 1.00 - - - - - - - - orn x3, x5, x7, asr #2 +# CHECK-NEXT: 1.00 - - - - - - - - orn w2, w5, w29 +# CHECK-NEXT: 1.00 - - - - - - - - ands w7, wzr, w9, lsl #1 +# CHECK-NEXT: 1.00 - - - - - - - - ands x3, x5, x20, ror #63 +# CHECK-NEXT: 1.00 - - - - - - - - bics w3, w5, w7 +# CHECK-NEXT: 1.00 - - - - - - - - bics x3, xzr, x3, lsl #1 +# CHECK-NEXT: 1.00 - - - - - - - - tst w3, w7, lsl #31 +# CHECK-NEXT: 1.00 - - - - - - - - tst x2, x20, asr #2 +# CHECK-NEXT: 1.00 - - - - - - - - mov x3, x6 +# CHECK-NEXT: 1.00 - - - - - - - - mov x3, xzr +# CHECK-NEXT: 1.00 - - - - - - - - mov wzr, w2 +# CHECK-NEXT: 1.00 - - - - - - - - mov w3, w5 +# CHECK-NEXT: 1.00 - - - - - - - - movz w2, #0, lsl #16 +# CHECK-NEXT: 1.00 - - - - - - - - mov w2, #-1235 +# CHECK-NEXT: 1.00 - - - - - - - - mov x2, #5299989643264 +# CHECK-NEXT: 1.00 - - - - - - - - mov x2, #0 +# CHECK-NEXT: 1.00 - - - - - - - - movk w3, #0 +# CHECK-NEXT: 1.00 - - - - - - - - movz x4, #0, lsl #16 +# CHECK-NEXT: 1.00 - - - - - - - - movk w5, #0, lsl #16 +# CHECK-NEXT: 1.00 - - - - - - - - movz x6, #0, lsl #32 +# CHECK-NEXT: 1.00 - - - - - - - - movk x7, #0, lsl #32 +# CHECK-NEXT: 1.00 - - - - - - - - movz x8, #0, lsl #48 +# CHECK-NEXT: 1.00 - - - - - - - - movk x9, #0, lsl #48 +# CHECK-NEXT: 1.00 - - - - - - - - adr x2, #1600 +# CHECK-NEXT: 1.00 - - - - - - - - adrp x21, #6553600 +# CHECK-NEXT: 1.00 - - - - - - - - adr x0, #262144 +# CHECK-NEXT: - 1.00 - - - - - - - tbz x12, #62, #0 +# CHECK-NEXT: - 1.00 - - - - - - - tbz x12, #62, #4 +# CHECK-NEXT: - 1.00 - - - - - - - tbz x12, #62, #-32768 +# CHECK-NEXT: - 1.00 - - - - - - - tbnz x12, #60, #32764 +# CHECK-NEXT: - 1.00 - - - - - - - b #4 +# CHECK-NEXT: - 1.00 - - - - - - - b #-4 +# CHECK-NEXT: - 1.00 - - - - - - - b #134217724 +# CHECK-NEXT: - 1.00 - - - - - - - br x20 +# CHECK-NEXT: - 1.00 - - - - - - - blr xzr +# CHECK-NEXT: - 1.00 - - - - - - - ret x10 +# CHECK-NEXT: - 1.00 - - - - - - - ret +# CHECK-NEXT: - 1.00 - - - - - - - eret +# CHECK-NEXT: - 1.00 - - - - - - - drps diff --git a/llvm/test/tools/llvm-mca/AArch64/Cortex/A320-neon-instructions.s b/llvm/test/tools/llvm-mca/AArch64/Cortex/A320-neon-instructions.s new file mode 100644 index 000000000000..147da4d2ef07 --- /dev/null +++ b/llvm/test/tools/llvm-mca/AArch64/Cortex/A320-neon-instructions.s @@ -0,0 +1,3208 @@ +# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py +# RUN: llvm-mca -mtriple=aarch64 -mcpu=cortex-a320 -instruction-tables < %s | FileCheck %s + +abs d29, d24 +abs v0.16b, v0.16b +abs v0.2d, v0.2d +abs v0.2s, v0.2s +abs v0.4h, v0.4h +abs v0.4s, v0.4s +abs v0.8b, v0.8b +abs v0.8h, v0.8h +add d17, d31, d29 +add v0.8b, v0.8b, v0.8b +addhn v0.2s, v0.2d, v0.2d +addhn v0.4h, v0.4s, v0.4s +addhn v0.8b, v0.8h, v0.8h +addhn2 v0.16b, v0.8h, v0.8h +addhn2 v0.4s, v0.2d, v0.2d +addhn2 v0.8h, v0.4s, v0.4s +addp v0.2d, v0.2d, v0.2d +addp v0.8b, v0.8b, v0.8b +and v0.8b, v0.8b, v0.8b +bic v0.4h, #15, lsl #8 +bic v0.8b, v0.8b, v0.8b +bif v0.16b, v0.16b, v0.16b +bit v0.16b, v0.16b, v0.16b +bsl v0.8b, v0.8b, v0.8b +cls v0.16b, v0.16b +cls v0.2s, v0.2s +cls v0.4h, v0.4h +cls v0.4s, v0.4s +cls v0.8b, v0.8b +cls v0.8h, v0.8h +clz v0.16b, v0.16b +clz v0.2s, v0.2s +clz v0.4h, v0.4h +clz v0.4s, v0.4s +clz v0.8b, v0.8b +clz v0.8h, v0.8h +cmeq d20, d21, 0 +cmeq d20, d21, d22 +cmeq v0.16b, v0.16b, 0 +cmeq v0.16b, v0.16b, v0.16b +cmge d20, d21, 0 +cmge d20, d21, d22 +cmge v0.4h, v0.4h, v0.4h +cmge v0.8b, v0.8b, 0 +cmgt d20, d21, 0 +cmgt d20, d21, d22 +cmgt v0.2s, v0.2s, 0 +cmgt v0.4s, v0.4s, v0.4s +cmhi d20, d21, d22 +cmhi v0.8h, v0.8h, v0.8h +cmhs d20, d21, d22 +cmhs v0.8b, v0.8b, v0.8b +cmle d20, d21, 0 +cmle v0.2d, v0.2d, 0 +cmlt d20, d21, 0 +cmlt v0.8h, v0.8h, 0 +cmtst d20, d21, d22 +cmtst v0.2s, v0.2s, v0.2s +cnt v0.16b, v0.16b +cnt v0.8b, v0.8b +dup v0.16b,w28 +dup v0.2d,x28 +dup v0.2s,w28 +dup v0.4h,w28 +dup v0.4s,w28 +dup v0.8b,w28 +dup v0.8h,w28 +eor v0.16b, v0.16b, v0.16b +ext v0.16b, v0.16b, v0.16b, #3 +ext v0.8b, v0.8b, v0.8b, #3 +fabd d29, d24, d20 +fabd s29, s24, s20 +fabd v0.4s, v0.4s, v0.4s +fabs v0.2d, v0.2d +fabs v0.2s, v0.2s +fabs v0.4h, v0.4h +fabs v0.4s, v0.4s +fabs v0.8h, v0.8h +facge d20, d21, d22 +facge s10, s11, s12 +facge v0.4s, v0.4s, v0.4s +facgt d20, d21, d22 +facgt s10, s11, s12 +facgt v0.2d, v0.2d, v0.2d +fadd v0.4s, v0.4s, v0.4s +faddp v0.2s, v0.2s, v0.2s +faddp v0.4s, v0.4s, v0.4s +fcmeq d20, d21, #0.0 +fcmeq d20, d21, d22 +fcmeq s10, s11, #0.0 +fcmeq s10, s11, s12 +fcmeq v0.2s, v0.2s, #0.0 +fcmeq v0.2s, v0.2s, v0.2s +fcmge d20, d21, #0.0 +fcmge d20, d21, d22 +fcmge s10, s11, #0.0 +fcmge s10, s11, s12 +fcmge v0.2d, v0.2d, #0.0 +fcmge v0.4s, v0.4s, v0.4s +fcmgt d20, d21, #0.0 +fcmgt d20, d21, d22 +fcmgt s10, s11, #0.0 +fcmgt s10, s11, s12 +fcmgt v0.4s, v0.4s, #0.0 +fcmgt v0.4s, v0.4s, v0.4s +fcmle d20, d21, #0.0 +fcmle s10, s11, #0.0 +fcmle v0.2d, v0.2d, #0.0 +fcmlt d20, d21, #0.0 +fcmlt s10, s11, #0.0 +fcmlt v0.4s, v0.4s, #0.0 +fcvtas d21, d14 +fcvtas s12, s13 +fcvtas v0.2d, v0.2d +fcvtas v0.2s, v0.2s +fcvtas v0.4h, v0.4h +fcvtas v0.4s, v0.4s +fcvtas v0.8h, v0.8h +fcvtau d21, d14 +fcvtau s12, s13 +fcvtau v0.2d, v0.2d +fcvtau v0.2s, v0.2s +fcvtau v0.4h, v0.4h +fcvtau v0.4s, v0.4s +fcvtau v0.8h, v0.8h +fcvtl v0.2d, v0.2s +fcvtl v0.4s, v0.4h +fcvtl2 v0.2d, v0.4s +fcvtl2 v0.4s, v0.8h +fcvtms d21, d14 +fcvtms s22, s13 +fcvtms v0.2d, v0.2d +fcvtms v0.2s, v0.2s +fcvtms v0.4h, v0.4h +fcvtms v0.4s, v0.4s +fcvtms v0.8h, v0.8h +fcvtmu d21, d14 +fcvtmu s12, s13 +fcvtmu v0.2d, v0.2d +fcvtmu v0.2s, v0.2s +fcvtmu v0.4h, v0.4h +fcvtmu v0.4s, v0.4s +fcvtmu v0.8h, v0.8h +fcvtn v0.2s, v0.2d +fcvtn v0.4h, v0.4s +fcvtn2 v0.4s, v0.2d +fcvtn2 v0.8h, v0.4s +fcvtns d21, d14 +fcvtns s22, s13 +fcvtns v0.2d, v0.2d +fcvtns v0.2s, v0.2s +fcvtns v0.4h, v0.4h +fcvtns v0.4s, v0.4s +fcvtns v0.8h, v0.8h +fcvtnu d21, d14 +fcvtnu s12, s13 +fcvtnu v0.2d, v0.2d +fcvtnu v0.2s, v0.2s +fcvtnu v0.4h, v0.4h +fcvtnu v0.4s, v0.4s +fcvtnu v0.8h, v0.8h +fcvtps d21, d14 +fcvtps s22, s13 +fcvtps v0.2d, v0.2d +fcvtps v0.2s, v0.2s +fcvtps v0.4h, v0.4h +fcvtps v0.4s, v0.4s +fcvtps v0.8h, v0.8h +fcvtpu d21, d14 +fcvtpu s12, s13 +fcvtpu v0.2d, v0.2d +fcvtpu v0.2s, v0.2s +fcvtpu v0.4h, v0.4h +fcvtpu v0.4s, v0.4s +fcvtpu v0.8h, v0.8h +fcvtxn s22, d13 +fcvtxn v0.2s, v0.2d +fcvtxn2 v0.4s, v0.2d +fcvtzs d21, d12, #1 +fcvtzs d21, d14 +fcvtzs s12, s13 +fcvtzs s21, s12, #1 +fcvtzs v0.2d, v0.2d +fcvtzs v0.2d, v0.2d, #3 +fcvtzs v0.2s, v0.2s +fcvtzs v0.2s, v0.2s, #3 +fcvtzs v0.4h, v0.4h +fcvtzs v0.4s, v0.4s +fcvtzs v0.4s, v0.4s, #3 +fcvtzs v0.8h, v0.8h +fcvtzu d21, d12, #1 +fcvtzu d21, d14 +fcvtzu s12, s13 +fcvtzu s21, s12, #1 +fcvtzu v0.2d, v0.2d +fcvtzu v0.2d, v0.2d, #3 +fcvtzu v0.2s, v0.2s +fcvtzu v0.2s, v0.2s, #3 +fcvtzu v0.4h, v0.4h +fcvtzu v0.4s, v0.4s +fcvtzu v0.4s, v0.4s, #3 +fcvtzu v0.8h, v0.8h +fdiv v0.2s, v0.2s, v0.2s +fmax v0.2d, v0.2d, v0.2d +fmax v0.2s, v0.2s, v0.2s +fmax v0.4s, v0.4s, v0.4s +fmaxnm v0.2d, v0.2d, v0.2d +fmaxnm v0.2s, v0.2s, v0.2s +fmaxnm v0.4s, v0.4s, v0.4s +fmaxnmp v0.2d, v0.2d, v0.2d +fmaxnmp v0.2s, v0.2s, v0.2s +fmaxnmp v0.4s, v0.4s, v0.4s +fmaxp v0.2d, v0.2d, v0.2d +fmaxp v0.2s, v0.2s, v0.2s +fmaxp v0.4s, v0.4s, v0.4s +fmin v0.2d, v0.2d, v0.2d +fmin v0.2s, v0.2s, v0.2s +fmin v0.4s, v0.4s, v0.4s +fminnm v0.2d, v0.2d, v0.2d +fminnm v0.2s, v0.2s, v0.2s +fminnm v0.4s, v0.4s, v0.4s +fminnmp v0.2d, v0.2d, v0.2d +fminnmp v0.2s, v0.2s, v0.2s +fminnmp v0.4s, v0.4s, v0.4s +fminp v0.2d, v0.2d, v0.2d +fminp v0.2s, v0.2s, v0.2s +fminp v0.4s, v0.4s, v0.4s +fmla d0, d1, v0.d[1] +fmla s0, s1, v0.s[3] +fmla v0.2s, v0.2s, v0.2s +fmls d0, d4, v0.d[1] +fmls s3, s5, v0.s[3] +fmls v0.2s, v0.2s, v0.2s +fmov v0.2d, #-1.25 +fmov v0.2s, #13.0 +fmov v0.4s, #1.0 +fmul d0, d1, v0.d[1] +fmul s0, s1, v0.s[3] +fmul v0.2s, v0.2s, v0.2s +fmulx d0, d4, v0.d[1] +fmulx d23, d11, d1 +fmulx s20, s22, s15 +fmulx s3, s5, v0.s[3] +fmulx v0.2d, v0.2d, v0.2d +fmulx v0.2s, v0.2s, v0.2s +fmulx v0.4s, v0.4s, v0.4s +fneg v0.2d, v0.2d +fneg v0.2s, v0.2s +fneg v0.4h, v0.4h +fneg v0.4s, v0.4s +fneg v0.8h, v0.8h +frecpe d13, d13 +frecpe s19, s14 +frecpe v0.2d, v0.2d +frecpe v0.2s, v0.2s +frecpe v0.4h, v0.4h +frecpe v0.4s, v0.4s +frecpe v0.8h, v0.8h +frecps v0.4s, v0.4s, v0.4s +frecps d22, d30, d21 +frecps s21, s16, s13 +frecpx d16, d19 +frecpx s18, s10 +frinta v0.2d, v0.2d +frinta v0.2s, v0.2s +frinta v0.4h, v0.4h +frinta v0.4s, v0.4s +frinta v0.8h, v0.8h +frinti v0.2d, v0.2d +frinti v0.2s, v0.2s +frinti v0.4h, v0.4h +frinti v0.4s, v0.4s +frinti v0.8h, v0.8h +frintm v0.2d, v0.2d +frintm v0.2s, v0.2s +frintm v0.4h, v0.4h +frintm v0.4s, v0.4s +frintm v0.8h, v0.8h +frintn v0.2d, v0.2d +frintn v0.2s, v0.2s +frintn v0.4h, v0.4h +frintn v0.4s, v0.4s +frintn v0.8h, v0.8h +frintp v0.2d, v0.2d +frintp v0.2s, v0.2s +frintp v0.4h, v0.4h +frintp v0.4s, v0.4s +frintp v0.8h, v0.8h +frintx v0.2d, v0.2d +frintx v0.2s, v0.2s +frintx v0.4h, v0.4h +frintx v0.4s, v0.4s +frintx v0.8h, v0.8h +frintz v0.2d, v0.2d +frintz v0.2s, v0.2s +frintz v0.4h, v0.4h +frintz v0.4s, v0.4s +frintz v0.8h, v0.8h +frsqrte d21, d12 +frsqrte s22, s13 +frsqrte v0.2d, v0.2d +frsqrte v0.2s, v0.2s +frsqrte v0.4h, v0.4h +frsqrte v0.4s, v0.4s +frsqrte v0.8h, v0.8h +frsqrts d8, d22, d18 +frsqrts s21, s5, s12 +frsqrts v0.2d, v0.2d, v0.2d +fsqrt v0.2d, v0.2d +fsqrt v0.2s, v0.2s +fsqrt v0.4h, v0.4h +fsqrt v0.4s, v0.4s +fsqrt v0.8h, v0.8h +fsub v0.2s, v0.2s, v0.2s +ld1 { v0.16b }, [x0] +ld1 { v0.2d, v1.2d, v2.2d }, [x0], #48 +ld1 { v0.2d, v1.2d, v2.2d, v3.2d }, [x0] +ld1 { v0.4s, v1.4s }, [sp], #32 +ld1 { v0.4s, v1.4s, v2.4s }, [sp] +ld1 { v0.8b, v1.8b, v2.8b, v3.8b }, [x0], x3 +ld1 { v0.8h }, [x15], x2 +ld1 { v0.8h, v1.8h }, [x15] +ld1 { v0.b }[9], [x0] +ld1 { v0.b }[9], [x0], #1 +ld1r { v0.16b }, [x0] +ld1r { v0.16b }, [x0], #1 +ld1r { v0.8h }, [x15] +ld1r { v0.8h }, [x15], #2 +ld2 { v0.16b, v1.16b }, [x0], x1 +ld2 { v0.8b, v1.8b }, [x0] +ld2 { v0.h, v1.h }[7], [x15] +ld2 { v0.h, v1.h }[7], [x15], #4 +ld2r { v0.2d, v1.2d }, [x0] +ld2r { v0.2d, v1.2d }, [x0], #16 +ld2r { v0.4s, v1.4s }, [sp] +ld2r { v0.4s, v1.4s }, [sp], #8 +ld3 { v0.4h, v1.4h, v2.4h }, [x15] +ld3 { v0.8h, v1.8h, v2.8h }, [x15], x2 +ld3 { v0.s, v1.s, v2.s }[3], [sp] +ld3 { v0.s, v1.s, v2.s }[3], [sp], x3 +ld3r { v0.4h, v1.4h, v2.4h }, [x15] +ld3r { v0.4h, v1.4h, v2.4h }, [x15], #6 +ld3r { v0.8b, v1.8b, v2.8b }, [x0] +ld3r { v0.8b, v1.8b, v2.8b }, [x0], #3 +ld4 { v0.2s, v1.2s, v2.2s, v3.2s }, [sp] +ld4 { v0.4s, v1.4s, v2.4s, v3.4s }, [sp], #64 +ld4 { v0.d, v1.d, v2.d, v3.d }[1], [x0] +ld4 { v0.d, v1.d, v2.d, v3.d }[1], [x0], #32 +ld4 { v0.h, v1.h, v2.h, v3.h }[7], [x0], x0 +ld4r { v0.1d, v1.1d, v2.1d, v3.1d }, [sp] +ld4r { v0.1d, v1.1d, v2.1d, v3.1d }, [sp], x7 +ld4r { v0.2s, v1.2s, v2.2s, v3.2s }, [sp] +ld4r { v0.2s, v1.2s, v2.2s, v3.2s }, [sp], x30 +mla v0.8b, v0.8b, v0.8b +mls v0.4h, v0.4h, v0.4h +mov b0, v0.b[15] +mov d6, v0.d[1] +mov h2, v0.h[5] +mov s17, v0.s[2] +mov v0.16b, v0.16b +mov v0.8b, v0.8b +movi d15, #0xff00ff00ff00ff +movi v0.16b, #31 +movi v0.2d, #0xff0000ff0000ffff +movi v0.2s, #8, msl #8 +movi v0.4s, #255, lsl #24 +movi v0.8b, #255 +mul v0.8b, v0.8b, v0.8b +mvni v0.2s, 0 +mvni v0.4s, #16, msl #16 +neg d29, d24 +neg v0.16b, v0.16b +neg v0.2d, v0.2d +neg v0.2s, v0.2s +neg v0.4h, v0.4h +neg v0.4s, v0.4s +neg v0.8b, v0.8b +neg v0.8h, v0.8h +not v0.16b, v0.16b +not v0.8b, v0.8b +orn v0.16b, v0.16b, v0.16b +orr v0.16b, v0.16b, v0.16b +orr v0.8h, #31 +pmul v0.16b, v0.16b, v0.16b +pmul v0.8b, v0.8b, v0.8b +pmull v0.8h, v0.8b, v0.8b +pmull2 v0.8h, v0.16b, v0.16b +raddhn v0.2s, v0.2d, v0.2d +raddhn v0.4h, v0.4s, v0.4s +raddhn v0.8b, v0.8h, v0.8h +raddhn2 v0.16b, v0.8h, v0.8h +raddhn2 v0.4s, v0.2d, v0.2d +raddhn2 v0.8h, v0.4s, v0.4s +rbit v0.16b, v0.16b +rbit v0.8b, v0.8b +rev16 v21.8b, v1.8b +rev16 v30.16b, v31.16b +rev32 v0.4h, v9.4h +rev32 v21.8b, v1.8b +rev32 v30.16b, v31.16b +rev32 v4.8h, v7.8h +rev64 v0.16b, v31.16b +rev64 v1.8b, v9.8b +rev64 v13.4h, v21.4h +rev64 v2.8h, v4.8h +rev64 v4.2s, v0.2s +rev64 v6.4s, v8.4s +rshrn v0.2s, v0.2d, #3 +rshrn v0.4h, v0.4s, #3 +rshrn v0.8b, v0.8h, #3 +rshrn2 v0.16b, v0.8h, #3 +rshrn2 v0.4s, v0.2d, #3 +rshrn2 v0.8h, v0.4s, #3 +rsubhn v0.2s, v0.2d, v0.2d +rsubhn v0.4h, v0.4s, v0.4s +rsubhn v0.8b, v0.8h, v0.8h +rsubhn2 v0.16b, v0.8h, v0.8h +rsubhn2 v0.4s, v0.2d, v0.2d +rsubhn2 v0.8h, v0.4s, v0.4s +saba v0.16b, v0.16b, v0.16b +sabal v0.2d, v0.2s, v0.2s +sabal v0.4s, v0.4h, v0.4h +sabal v0.8h, v0.8b, v0.8b +sabal2 v0.2d, v0.4s, v0.4s +sabal2 v0.4s, v0.8h, v0.8h +sabal2 v0.8h, v0.16b, v0.16b +sabd v0.4h, v0.4h, v0.4h +sabdl v0.2d, v0.2s, v0.2s +sabdl v0.4s, v0.4h, v0.4h +sabdl v0.8h, v0.8b, v0.8b +sabdl2 v0.2d, v0.4s, v0.4s +sabdl2 v0.4s, v0.8h, v0.8h +sabdl2 v0.8h, v0.16b, v0.16b +sadalp v0.1d, v0.2s +sadalp v0.2d, v0.4s +sadalp v0.2s, v0.4h +sadalp v0.4h, v0.8b +sadalp v0.4s, v0.8h +sadalp v0.8h, v0.16b +saddl v0.2d, v0.2s, v0.2s +saddl v0.4s, v0.4h, v0.4h +saddl v0.8h, v0.8b, v0.8b +saddl2 v0.2d, v0.4s, v0.4s +saddl2 v0.4s, v0.8h, v0.8h +saddl2 v0.8h, v0.16b, v0.16b +saddlp v0.1d, v0.2s +saddlp v0.2d, v0.4s +saddlp v0.2s, v0.4h +saddlp v0.4h, v0.8b +saddlp v0.4s, v0.8h +saddlp v0.8h, v0.16b +saddw v0.2d, v0.2d, v0.2s +saddw v0.4s, v0.4s, v0.4h +saddw v0.8h, v0.8h, v0.8b +saddw2 v0.2d, v0.2d, v0.4s +saddw2 v0.4s, v0.4s, v0.8h +saddw2 v0.8h, v0.8h, v0.16b +scvtf d21, d12 +scvtf d21, d12, #64 +scvtf s22, s13 +scvtf s22, s13, #32 +scvtf v0.2d, v0.2d +scvtf v0.2d, v0.2d, #3 +scvtf v0.2s, v0.2s +scvtf v0.2s, v0.2s, #3 +scvtf v0.4h, v0.4h +scvtf v0.4s, v0.4s +scvtf v0.4s, v0.4s, #3 +scvtf v0.8h, v0.8h +shadd v0.8b, v0.8b, v0.8b +shl d7, d10, #12 +shl v0.16b, v0.16b, #3 +shl v0.2d, v0.2d, #3 +shl v0.4h, v0.4h, #3 +shl v0.4s, v0.4s, #3 +shll v0.2d, v0.2s, #32 +shll v0.4s, v0.4h, #16 +shll v0.8h, v0.8b, #8 +shll v0.2d, v0.2s, #32 +shll v0.4s, v0.4h, #16 +shll v0.8h, v0.8b, #8 +shll2 v0.2d, v0.4s, #32 +shll2 v0.4s, v0.8h, #16 +shll2 v0.8h, v0.16b, #8 +shll2 v0.2d, v0.4s, #32 +shll2 v0.4s, v0.8h, #16 +shll2 v0.8h, v0.16b, #8 +shrn v0.2s, v0.2d, #3 +shrn v0.4h, v0.4s, #3 +shrn v0.8b, v0.8h, #3 +shrn2 v0.16b, v0.8h, #3 +shrn2 v0.4s, v0.2d, #3 +shrn2 v0.8h, v0.4s, #3 +shsub v0.2s, v0.2s, v0.2s +shsub v0.4h, v0.4h, v0.4h +sli d10, d14, #12 +sli v0.16b, v0.16b, #3 +sli v0.2d, v0.2d, #3 +sli v0.2s, v0.2s, #3 +sli v0.4h, v0.4h, #3 +sli v0.4s, v0.4s, #3 +sli v0.8b, v0.8b, #3 +sli v0.8h, v0.8h, #3 +smax v0.2s, v0.2s, v0.2s +smax v0.4h, v0.4h, v0.4h +smax v0.8b, v0.8b, v0.8b +smaxp v0.2s, v0.2s, v0.2s +smaxp v0.4h, v0.4h, v0.4h +smaxp v0.8b, v0.8b, v0.8b +smin v0.16b, v0.16b, v0.16b +smin v0.4s, v0.4s, v0.4s +smin v0.8h, v0.8h, v0.8h +sminp v0.16b, v0.16b, v0.16b +sminp v0.4s, v0.4s, v0.4s +sminp v0.8h, v0.8h, v0.8h +smlal v0.2d, v0.2s, v0.2s +smlal v0.4s, v0.4h, v0.4h +smlal v0.8h, v0.8b, v0.8b +smlal2 v0.2d, v0.4s, v0.4s +smlal2 v0.4s, v0.8h, v0.8h +smlal2 v0.8h, v0.16b, v0.16b +smlsl v0.2d, v0.2s, v0.2s +smlsl v0.4s, v0.4h, v0.4h +smlsl v0.8h, v0.8b, v0.8b +smlsl2 v0.2d, v0.4s, v0.4s +smlsl2 v0.4s, v0.8h, v0.8h +smlsl2 v0.8h, v0.16b, v0.16b +smull v0.2d, v0.2s, v0.2s +smull v0.4s, v0.4h, v0.4h +smull v0.8h, v0.8b, v0.8b +smull2 v0.2d, v0.4s, v0.4s +smull2 v0.4s, v0.8h, v0.8h +smull2 v0.8h, v0.16b, v0.16b +sqabs b19, b14 +sqabs d18, d12 +sqabs h21, h15 +sqabs s20, s12 +sqabs v0.16b, v0.16b +sqabs v0.2d, v0.2d +sqabs v0.2s, v0.2s +sqabs v0.4h, v0.4h +sqabs v0.4s, v0.4s +sqabs v0.8b, v0.8b +sqabs v0.8h, v0.8h +sqadd b20, b11, b15 +sqadd v0.16b, v0.16b, v0.16b +sqadd v0.2s, v0.2s, v0.2s +sqdmlal d19, s24, s12 +sqdmlal d8, s9, v0.s[1] +sqdmlal s0, h0, v0.h[3] +sqdmlal s17, h27, h12 +sqdmlal v0.2d, v0.2s, v0.2s +sqdmlal v0.4s, v0.4h, v0.4h +sqdmlal2 v0.2d, v0.4s, v0.4s +sqdmlal2 v0.4s, v0.8h, v0.8h +sqdmlsl d12, s23, s13 +sqdmlsl d8, s9, v0.s[1] +sqdmlsl s0, h0, v0.h[3] +sqdmlsl s14, h12, h25 +sqdmlsl v0.2d, v0.2s, v0.2s +sqdmlsl v0.4s, v0.4h, v0.4h +sqdmlsl2 v0.2d, v0.4s, v0.4s +sqdmlsl2 v0.4s, v0.8h, v0.8h +sqdmulh h10, h11, h12 +sqdmulh h7, h15, v0.h[3] +sqdmulh s15, s14, v0.s[1] +sqdmulh s20, s21, s2 +sqdmulh v0.2s, v0.2s, v0.2s +sqdmulh v0.4s, v0.4s, v0.4s +sqdmull d1, s1, v0.s[1] +sqdmull d15, s22, s12 +sqdmull s1, h1, v0.h[3] +sqdmull s12, h22, h12 +sqdmull v0.2d, v0.2s, v0.2s +sqdmull v0.4s, v0.4h, v0.4h +sqdmull2 v0.2d, v0.4s, v0.4s +sqdmull2 v0.4s, v0.8h, v0.8h +sqneg b19, b14 +sqneg d18, d12 +sqneg h21, h15 +sqneg s20, s12 +sqneg v0.16b, v0.16b +sqneg v0.2d, v0.2d +sqneg v0.2s, v0.2s +sqneg v0.4h, v0.4h +sqneg v0.4s, v0.4s +sqneg v0.8b, v0.8b +sqneg v0.8h, v0.8h +sqrdmulh h10, h11, h12 +sqrdmulh h7, h15, v0.h[3] +sqrdmulh s15, s14, v0.s[1] +sqrdmulh s20, s21, s2 +sqrdmulh v0.4h, v0.4h, v0.4h +sqrdmulh v0.8h, v0.8h, v0.8h +sqrshl d31, d31, d31 +sqrshl h3, h4, h15 +sqrshl v0.2s, v0.2s, v0.2s +sqrshl v0.4h, v0.4h, v0.4h +sqrshl v0.8b, v0.8b, v0.8b +sqrshrn b10, h13, #2 +sqrshrn h15, s10, #6 +sqrshrn s15, d12, #9 +sqrshrn v0.2s, v0.2d, #3 +sqrshrn v0.4h, v0.4s, #3 +sqrshrn v0.8b, v0.8h, #3 +sqrshrn2 v0.16b, v0.8h, #3 +sqrshrn2 v0.4s, v0.2d, #3 +sqrshrn2 v0.8h, v0.4s, #3 +sqrshrun b17, h10, #6 +sqrshrun h10, s13, #15 +sqrshrun s22, d16, #31 +sqrshrun v0.2s, v0.2d, #3 +sqrshrun v0.4h, v0.4s, #3 +sqrshrun v0.8b, v0.8h, #3 +sqrshrun2 v0.16b, v0.8h, #3 +sqrshrun2 v0.4s, v0.2d, #3 +sqrshrun2 v0.8h, v0.4s, #3 +sqshl b11, b19, #7 +sqshl d15, d16, #51 +sqshl d31, d31, d31 +sqshl h13, h18, #11 +sqshl h3, h4, h15 +sqshl s14, s17, #22 +sqshl v0.16b, v0.16b, #3 +sqshl v0.2d, v0.2d, #3 +sqshl v0.2s, v0.2s, #3 +sqshl v0.2s, v0.2s, v0.2s +sqshl v0.4h, v0.4h, #3 +sqshl v0.4h, v0.4h, v0.4h +sqshl v0.4s, v0.4s, #3 +sqshl v0.8b, v0.8b, #3 +sqshl v0.8b, v0.8b, v0.8b +sqshl v0.8h, v0.8h, #3 +sqshlu b15, b18, #6 +sqshlu d11, d13, #32 +sqshlu h19, h17, #6 +sqshlu s16, s14, #25 +sqshlu v0.16b, v0.16b, #3 +sqshlu v0.2d, v0.2d, #3 +sqshlu v0.2s, v0.2s, #3 +sqshlu v0.4h, v0.4h, #3 +sqshlu v0.4s, v0.4s, #3 +sqshlu v0.8b, v0.8b, #3 +sqshlu v0.8h, v0.8h, #3 +sqshrn b10, h15, #5 +sqshrn h17, s10, #4 +sqshrn s18, d10, #31 +sqshrn v0.2s, v0.2d, #3 +sqshrn v0.4h, v0.4s, #3 +sqshrn v0.8b, v0.8h, #3 +sqshrn2 v0.16b, v0.8h, #3 +sqshrn2 v0.4s, v0.2d, #3 +sqshrn2 v0.8h, v0.4s, #3 +sqshrun b15, h10, #7 +sqshrun h20, s14, #3 +sqshrun s10, d15, #15 +sqshrun v0.2s, v0.2d, #3 +sqshrun v0.4h, v0.4s, #3 +sqshrun v0.8b, v0.8h, #3 +sqshrun2 v0.16b, v0.8h, #3 +sqshrun2 v0.4s, v0.2d, #3 +sqshrun2 v0.8h, v0.4s, #3 +sqsub s20, s10, s7 +sqsub v0.2d, v0.2d, v0.2d +sqsub v0.4s, v0.4s, v0.4s +sqsub v0.8b, v0.8b, v0.8b +sqxtn b18, h18 +sqxtn h20, s17 +sqxtn s19, d14 +sqxtn v0.2s, v0.2d +sqxtn v0.4h, v0.4s +sqxtn v0.8b, v0.8h +sqxtn2 v0.16b, v0.8h +sqxtn2 v0.4s, v0.2d +sqxtn2 v0.8h, v0.4s +sqxtun b19, h14 +sqxtun h21, s15 +sqxtun s20, d12 +sqxtun v0.2s, v0.2d +sqxtun v0.4h, v0.4s +sqxtun v0.8b, v0.8h +sqxtun2 v0.16b, v0.8h +sqxtun2 v0.4s, v0.2d +sqxtun2 v0.8h, v0.4s +srhadd v0.2s, v0.2s, v0.2s +srhadd v0.4h, v0.4h, v0.4h +srhadd v0.8b, v0.8b, v0.8b +sri d10, d12, #14 +sri v0.16b, v0.16b, #3 +sri v0.2d, v0.2d, #3 +sri v0.2s, v0.2s, #3 +sri v0.4h, v0.4h, #3 +sri v0.4s, v0.4s, #3 +sri v0.8b, v0.8b, #3 +sri v0.8h, v0.8h, #3 +srshl d16, d16, d16 +srshl v0.2s, v0.2s, v0.2s +srshl v0.4h, v0.4h, v0.4h +srshl v0.8b, v0.8b, v0.8b +srshr d19, d18, #7 +srshr v0.16b, v0.16b, #3 +srshr v0.2d, v0.2d, #3 +srshr v0.2s, v0.2s, #3 +srshr v0.4h, v0.4h, #3 +srshr v0.4s, v0.4s, #3 +srshr v0.8b, v0.8b, #3 +srshr v0.8h, v0.8h, #3 +srsra d15, d11, #19 +srsra v0.16b, v0.16b, #3 +srsra v0.2d, v0.2d, #3 +srsra v0.2s, v0.2s, #3 +srsra v0.4h, v0.4h, #3 +srsra v0.4s, v0.4s, #3 +srsra v0.8b, v0.8b, #3 +srsra v0.8h, v0.8h, #3 +sshl d31, d31, d31 +sshl v0.2d, v0.2d, v0.2d +sshl v0.2s, v0.2s, v0.2s +sshl v0.4h, v0.4h, v0.4h +sshl v0.8b, v0.8b, v0.8b +sshll v0.2d, v0.2s, #3 +sshll2 v0.4s, v0.8h, #3 +sshr d15, d16, #12 +sshr v0.16b, v0.16b, #3 +sshr v0.2d, v0.2d, #3 +sshr v0.2s, v0.2s, #3 +sshr v0.4h, v0.4h, #3 +sshr v0.4s, v0.4s, #3 +sshr v0.8b, v0.8b, #3 +sshr v0.8h, v0.8h, #3 +ssra d18, d12, #21 +ssra v0.16b, v0.16b, #3 +ssra v0.2d, v0.2d, #3 +ssra v0.2s, v0.2s, #3 +ssra v0.4h, v0.4h, #3 +ssra v0.4s, v0.4s, #3 +ssra v0.8b, v0.8b, #3 +ssra v0.8h, v0.8h, #3 +ssubl v0.2d, v0.2s, v0.2s +ssubl v0.4s, v0.4h, v0.4h +ssubl v0.8h, v0.8b, v0.8b +ssubl2 v0.2d, v0.4s, v0.4s +ssubl2 v0.4s, v0.8h, v0.8h +ssubl2 v0.8h, v0.16b, v0.16b +ssubw v0.2d, v0.2d, v0.2s +ssubw v0.4s, v0.4s, v0.4h +ssubw v0.8h, v0.8h, v0.8b +ssubw2 v0.2d, v0.2d, v0.4s +ssubw2 v0.4s, v0.4s, v0.8h +ssubw2 v0.8h, v0.8h, v0.16b +st1 { v0.16b }, [x0] +st1 { v0.2d, v1.2d, v2.2d }, [x0], #48 +st1 { v0.2d, v1.2d, v2.2d, v3.2d }, [x0] +st1 { v0.4s, v1.4s }, [sp], #32 +st1 { v0.4s, v1.4s, v2.4s }, [sp] +st1 { v0.8b, v1.8b, v2.8b, v3.8b }, [x0], x3 +st1 { v0.8h }, [x15], x2 +st1 { v0.8h, v1.8h }, [x15] +st1 { v0.d }[1], [x0] +st1 { v0.d }[1], [x0], #8 +st2 { v0.16b, v1.16b }, [x0], x1 +st2 { v0.8b, v1.8b }, [x0] +st2 { v0.s, v1.s }[3], [sp] +st2 { v0.s, v1.s }[3], [sp], #8 +st3 { v0.4h, v1.4h, v2.4h }, [x15] +st3 { v0.8h, v1.8h, v2.8h }, [x15], x2 +st3 { v0.h, v1.h, v2.h }[7], [x15] +st3 { v0.h, v1.h, v2.h }[7], [x15], #6 +st4 { v0.2s, v1.2s, v2.2s, v3.2s }, [sp] +st4 { v0.4s, v1.4s, v2.4s, v3.4s }, [sp], #64 +st4 { v0.b, v1.b, v2.b, v3.b }[9], [x0] +st4 { v0.b, v1.b, v2.b, v3.b }[9], [x0], x5 +sub d15, d5, d16 +sub v0.2d, v0.2d, v0.2d +suqadd b19, b14 +suqadd d18, d22 +suqadd h20, h15 +suqadd s21, s12 +suqadd v0.16b, v0.16b +suqadd v0.2d, v0.2d +suqadd v0.2s, v0.2s +suqadd v0.4h, v0.4h +suqadd v0.4s, v0.4s +suqadd v0.8b, v0.8b +suqadd v0.8h, v0.8h +tbl v0.16b, { v0.16b }, v0.16b +tbl v0.16b, { v0.16b, v1.16b }, v0.16b +tbl v0.16b, { v0.16b, v1.16b, v2.16b }, v0.16b +tbl v0.16b, { v0.16b, v1.16b, v2.16b, v3.16b }, v0.16b +tbl v0.8b, { v0.16b }, v0.8b +tbl v0.8b, { v0.16b, v1.16b }, v0.8b +tbl v0.8b, { v0.16b, v1.16b, v2.16b }, v0.8b +tbl v0.8b, { v0.16b, v1.16b, v2.16b, v3.16b }, v0.8b +tbx v0.16b, { v0.16b }, v0.16b +tbx v0.16b, { v0.16b, v1.16b }, v0.16b +tbx v0.16b, { v0.16b, v1.16b, v2.16b }, v0.16b +tbx v0.16b, { v0.16b, v1.16b, v2.16b, v3.16b }, v0.16b +tbx v0.8b, { v0.16b }, v0.8b +tbx v0.8b, { v0.16b, v1.16b }, v0.8b +tbx v0.8b, { v0.16b, v1.16b, v2.16b }, v0.8b +tbx v0.8b, { v0.16b, v1.16b, v2.16b, v3.16b }, v0.8b +trn1 v0.16b, v0.16b, v0.16b +trn1 v0.2d, v0.2d, v0.2d +trn1 v0.2s, v0.2s, v0.2s +trn1 v0.4h, v0.4h, v0.4h +trn1 v0.4s, v0.4s, v0.4s +trn1 v0.8b, v0.8b, v0.8b +trn1 v0.8h, v0.8h, v0.8h +trn2 v0.16b, v0.16b, v0.16b +trn2 v0.2d, v0.2d, v0.2d +trn2 v0.2s, v0.2s, v0.2s +trn2 v0.4h, v0.4h, v0.4h +trn2 v0.4s, v0.4s, v0.4s +trn2 v0.8b, v0.8b, v0.8b +trn2 v0.8h, v0.8h, v0.8h +uaba v0.8b, v0.8b, v0.8b +uabal v0.2d, v0.2s, v0.2s +uabal v0.4s, v0.4h, v0.4h +uabal v0.8h, v0.8b, v0.8b +uabal2 v0.2d, v0.4s, v0.4s +uabal2 v0.4s, v0.8h, v0.8h +uabal2 v0.8h, v0.16b, v0.16b +uabd v0.4h, v0.4h, v0.4h +uabdl v0.2d, v0.2s, v0.2s +uabdl v0.4s, v0.4h, v0.4h +uabdl v0.8h, v0.8b, v0.8b +uabdl2 v0.2d, v0.4s, v0.4s +uabdl2 v0.4s, v0.8h, v0.8h +uabdl2 v0.8h, v0.16b, v0.16b +uadalp v0.1d, v0.2s +uadalp v0.2d, v0.4s +uadalp v0.2s, v0.4h +uadalp v0.4h, v0.8b +uadalp v0.4s, v0.8h +uadalp v0.8h, v0.16b +uaddl v0.2d, v0.2s, v0.2s +uaddl v0.4s, v0.4h, v0.4h +uaddl v0.8h, v0.8b, v0.8b +uaddl2 v0.2d, v0.4s, v0.4s +uaddl2 v0.4s, v0.8h, v0.8h +uaddl2 v0.8h, v0.16b, v0.16b +uaddlp v0.1d, v0.2s +uaddlp v0.2d, v0.4s +uaddlp v0.2s, v0.4h +uaddlp v0.4h, v0.8b +uaddlp v0.4s, v0.8h +uaddlp v0.8h, v0.16b +uaddw v0.2d, v0.2d, v0.2s +uaddw v0.4s, v0.4s, v0.4h +uaddw v0.8h, v0.8h, v0.8b +uaddw2 v0.2d, v0.2d, v0.4s +uaddw2 v0.4s, v0.4s, v0.8h +uaddw2 v0.8h, v0.8h, v0.16b +ucvtf d21, d14 +ucvtf d21, d14, #64 +ucvtf s22, s13 +ucvtf s22, s13, #32 +ucvtf v0.2d, v0.2d +ucvtf v0.2d, v0.2d, #3 +ucvtf v0.2s, v0.2s +ucvtf v0.2s, v0.2s, #3 +ucvtf v0.4h, v0.4h +ucvtf v0.4s, v0.4s +ucvtf v0.4s, v0.4s, #3 +ucvtf v0.8h, v0.8h +uhadd v0.16b, v0.16b, v0.16b +uhadd v0.8h, v0.8h, v0.8h +uhsub v0.4s, v0.4s, v0.4s +umax v0.16b, v0.16b, v0.16b +umax v0.4s, v0.4s, v0.4s +umax v0.8h, v0.8h, v0.8h +umaxp v0.16b, v0.16b, v0.16b +umaxp v0.4s, v0.4s, v0.4s +umaxp v0.8h, v0.8h, v0.8h +umin v0.2s, v0.2s, v0.2s +umin v0.4h, v0.4h, v0.4h +umin v0.8b, v0.8b, v0.8b +uminp v0.2s, v0.2s, v0.2s +uminp v0.4h, v0.4h, v0.4h +uminp v0.8b, v0.8b, v0.8b +umlal v0.2d, v0.2s, v0.2s +umlal v0.4s, v0.4h, v0.4h +umlal v0.8h, v0.8b, v0.8b +umlal2 v0.2d, v0.4s, v0.4s +umlal2 v0.4s, v0.8h, v0.8h +umlal2 v0.8h, v0.16b, v0.16b +umlsl v0.2d, v0.2s, v0.2s +umlsl v0.4s, v0.4h, v0.4h +umlsl v0.8h, v0.8b, v0.8b +umlsl2 v0.2d, v0.4s, v0.4s +umlsl2 v0.4s, v0.8h, v0.8h +umlsl2 v0.8h, v0.16b, v0.16b +umull v0.2d, v0.2s, v0.2s +umull v0.4s, v0.4h, v0.4h +umull v0.8h, v0.8b, v0.8b +umull2 v0.2d, v0.4s, v0.4s +umull2 v0.4s, v0.8h, v0.8h +umull2 v0.8h, v0.16b, v0.16b +uqadd h0, h1, h5 +uqadd v0.8h, v0.8h, v0.8h +uqrshl b11, b20, b30 +uqrshl s23, s20, s16 +uqrshl v0.16b, v0.16b, v0.16b +uqrshl v0.4s, v0.4s, v0.4s +uqrshl v0.4s, v0.4s, v0.4s +uqrshl v0.8h, v0.8h, v0.8h +uqrshrn b10, h12, #5 +uqrshrn h12, s10, #14 +uqrshrn s10, d10, #25 +uqrshrn v0.2s, v0.2d, #3 +uqrshrn v0.4h, v0.4s, #3 +uqrshrn v0.8b, v0.8h, #3 +uqrshrn2 v0.16b, v0.8h, #3 +uqrshrn2 v0.4s, v0.2d, #3 +uqrshrn2 v0.8h, v0.4s, #3 +uqshl b11, b20, b30 +uqshl b18, b15, #6 +uqshl d15, d12, #19 +uqshl h11, h18, #7 +uqshl s14, s19, #18 +uqshl s23, s20, s16 +uqshl v0.16b, v0.16b, #3 +uqshl v0.16b, v0.16b, v0.16b +uqshl v0.2d, v0.2d, #3 +uqshl v0.2d, v0.2d, v0.2d +uqshl v0.2s, v0.2s, #3 +uqshl v0.4h, v0.4h, #3 +uqshl v0.4s, v0.4s, #3 +uqshl v0.4s, v0.4s, v0.4s +uqshl v0.8b, v0.8b, #3 +uqshl v0.8h, v0.8h, #3 +uqshl v0.8h, v0.8h, v0.8h +uqshrn b12, h10, #7 +uqshrn h10, s14, #5 +uqshrn s10, d12, #13 +uqshrn v0.2s, v0.2d, #3 +uqshrn v0.4h, v0.4s, #3 +uqshrn v0.8b, v0.8h, #3 +uqshrn2 v0.16b, v0.8h, #3 +uqshrn2 v0.4s, v0.2d, #3 +uqshrn2 v0.8h, v0.4s, #3 +uqsub d16, d16, d16 +uqsub v0.4h, v0.4h, v0.4h +uqxtn b18, h18 +uqxtn h20, s17 +uqxtn s19, d14 +uqxtn v0.2s, v0.2d +uqxtn v0.4h, v0.4s +uqxtn v0.8b, v0.8h +uqxtn2 v0.16b, v0.8h +uqxtn2 v0.4s, v0.2d +uqxtn2 v0.8h, v0.4s +urecpe v0.2s, v0.2s +urecpe v0.4s, v0.4s +urhadd v0.16b, v0.16b, v0.16b +urhadd v0.4s, v0.4s, v0.4s +urhadd v0.8h, v0.8h, v0.8h +urshl d8, d7, d4 +urshl v0.16b, v0.16b, v0.16b +urshl v0.2d, v0.2d, v0.2d +urshl v0.4s, v0.4s, v0.4s +urshl v0.8h, v0.8h, v0.8h +urshr d20, d23, #31 +urshr v0.16b, v0.16b, #3 +urshr v0.2d, v0.2d, #3 +urshr v0.2s, v0.2s, #3 +urshr v0.4h, v0.4h, #3 +urshr v0.4s, v0.4s, #3 +urshr v0.8b, v0.8b, #3 +urshr v0.8h, v0.8h, #3 +ursqrte v0.2s, v0.2s +ursqrte v0.4s, v0.4s +ursra d18, d10, #13 +ursra v0.16b, v0.16b, #3 +ursra v0.2d, v0.2d, #3 +ursra v0.2s, v0.2s, #3 +ursra v0.4h, v0.4h, #3 +ursra v0.4s, v0.4s, #3 +ursra v0.8b, v0.8b, #3 +ursra v0.8h, v0.8h, #3 +ushl d0, d0, d0 +ushl v0.16b, v0.16b, v0.16b +ushl v0.4s, v0.4s, v0.4s +ushl v0.8h, v0.8h, v0.8h +ushll v0.4s, v0.4h, #3 +ushll2 v0.8h, v0.16b, #3 +ushr d10, d17, #18 +ushr v0.16b, v0.16b, #3 +ushr v0.2d, v0.2d, #3 +ushr v0.2s, v0.2s, #3 +ushr v0.4h, v0.4h, #3 +ushr v0.4s, v0.4s, #3 +ushr v0.8b, v0.8b, #3 +ushr v0.8h, v0.8h, #3 +usqadd b19, b14 +usqadd d18, d22 +usqadd h20, h15 +usqadd s21, s12 +usqadd v0.16b, v0.16b +usqadd v0.2d, v0.2d +usqadd v0.2s, v0.2s +usqadd v0.4h, v0.4h +usqadd v0.4s, v0.4s +usqadd v0.8b, v0.8b +usqadd v0.8h, v0.8h +usra d20, d13, #61 +usra v0.16b, v0.16b, #3 +usra v0.2d, v0.2d, #3 +usra v0.2s, v0.2s, #3 +usra v0.4h, v0.4h, #3 +usra v0.4s, v0.4s, #3 +usra v0.8b, v0.8b, #3 +usra v0.8h, v0.8h, #3 +usubl v0.2d, v0.2s, v0.2s +usubl v0.4s, v0.4h, v0.4h +usubl v0.8h, v0.8b, v0.8b +usubl2 v0.2d, v0.4s, v0.4s +usubl2 v0.4s, v0.8h, v0.8h +usubl2 v0.8h, v0.16b, v0.16b +usubw v0.2d, v0.2d, v0.2s +usubw v0.4s, v0.4s, v0.4h +usubw v0.8h, v0.8h, v0.8b +usubw2 v0.2d, v0.2d, v0.4s +usubw2 v0.4s, v0.4s, v0.8h +usubw2 v0.8h, v0.8h, v0.16b +uzp1 v0.16b, v0.16b, v0.16b +uzp1 v0.2d, v0.2d, v0.2d +uzp1 v0.2s, v0.2s, v0.2s +uzp1 v0.4h, v0.4h, v0.4h +uzp1 v0.4s, v0.4s, v0.4s +uzp1 v0.8b, v0.8b, v0.8b +uzp1 v0.8h, v0.8h, v0.8h +uzp2 v0.16b, v0.16b, v0.16b +uzp2 v0.2d, v0.2d, v0.2d +uzp2 v0.2s, v0.2s, v0.2s +uzp2 v0.4h, v0.4h, v0.4h +uzp2 v0.4s, v0.4s, v0.4s +uzp2 v0.8b, v0.8b, v0.8b +uzp2 v0.8h, v0.8h, v0.8h +xtn v0.2s, v0.2d +xtn v0.4h, v0.4s +xtn v0.8b, v0.8h +xtn2 v0.16b, v0.8h +xtn2 v0.4s, v0.2d +xtn2 v0.8h, v0.4s +zip1 v0.16b, v0.16b, v0.16b +zip1 v0.2d, v0.2d, v0.2d +zip1 v0.2s, v0.2s, v0.2s +zip1 v0.4h, v0.4h, v0.4h +zip1 v0.4s, v0.4s, v0.4s +zip1 v0.8b, v0.8b, v0.8b +zip1 v0.8h, v0.8h, v0.8h +zip2 v0.16b, v0.16b, v0.16b +zip2 v0.2d, v0.2d, v0.2d +zip2 v0.2s, v0.2s, v0.2s +zip2 v0.4h, v0.4h, v0.4h +zip2 v0.4s, v0.4s, v0.4s +zip2 v0.8b, v0.8b, v0.8b +zip2 v0.8h, v0.8h, v0.8h + +# CHECK: Instruction Info: +# CHECK-NEXT: [1]: #uOps +# CHECK-NEXT: [2]: Latency +# CHECK-NEXT: [3]: RThroughput +# CHECK-NEXT: [4]: MayLoad +# CHECK-NEXT: [5]: MayStore +# CHECK-NEXT: [6]: HasSideEffects (U) + +# CHECK: [1] [2] [3] [4] [5] [6] Instructions: +# CHECK-NEXT: 1 3 1.00 abs d29, d24 +# CHECK-NEXT: 1 3 1.00 abs v0.16b, v0.16b +# CHECK-NEXT: 1 3 1.00 abs v0.2d, v0.2d +# CHECK-NEXT: 1 3 1.00 abs v0.2s, v0.2s +# CHECK-NEXT: 1 3 1.00 abs v0.4h, v0.4h +# CHECK-NEXT: 1 3 1.00 abs v0.4s, v0.4s +# CHECK-NEXT: 1 3 1.00 abs v0.8b, v0.8b +# CHECK-NEXT: 1 3 1.00 abs v0.8h, v0.8h +# CHECK-NEXT: 1 3 1.00 add d17, d31, d29 +# CHECK-NEXT: 1 3 1.00 add v0.8b, v0.8b, v0.8b +# CHECK-NEXT: 1 4 1.00 addhn v0.2s, v0.2d, v0.2d +# CHECK-NEXT: 1 4 1.00 addhn v0.4h, v0.4s, v0.4s +# CHECK-NEXT: 1 4 1.00 addhn v0.8b, v0.8h, v0.8h +# CHECK-NEXT: 1 4 1.00 addhn2 v0.16b, v0.8h, v0.8h +# CHECK-NEXT: 1 4 1.00 addhn2 v0.4s, v0.2d, v0.2d +# CHECK-NEXT: 1 4 1.00 addhn2 v0.8h, v0.4s, v0.4s +# CHECK-NEXT: 1 3 1.00 addp v0.2d, v0.2d, v0.2d +# CHECK-NEXT: 1 3 1.00 addp v0.8b, v0.8b, v0.8b +# CHECK-NEXT: 1 3 1.00 and v0.8b, v0.8b, v0.8b +# CHECK-NEXT: 1 3 1.00 bic v0.4h, #15, lsl #8 +# CHECK-NEXT: 1 3 1.00 bic v0.8b, v0.8b, v0.8b +# CHECK-NEXT: 1 4 1.00 bif v0.16b, v0.16b, v0.16b +# CHECK-NEXT: 1 4 1.00 bit v0.16b, v0.16b, v0.16b +# CHECK-NEXT: 1 4 1.00 bsl v0.8b, v0.8b, v0.8b +# CHECK-NEXT: 1 4 1.00 cls v0.16b, v0.16b +# CHECK-NEXT: 1 4 1.00 cls v0.2s, v0.2s +# CHECK-NEXT: 1 4 1.00 cls v0.4h, v0.4h +# CHECK-NEXT: 1 4 1.00 cls v0.4s, v0.4s +# CHECK-NEXT: 1 4 1.00 cls v0.8b, v0.8b +# CHECK-NEXT: 1 4 1.00 cls v0.8h, v0.8h +# CHECK-NEXT: 1 4 1.00 clz v0.16b, v0.16b +# CHECK-NEXT: 1 4 1.00 clz v0.2s, v0.2s +# CHECK-NEXT: 1 4 1.00 clz v0.4h, v0.4h +# CHECK-NEXT: 1 4 1.00 clz v0.4s, v0.4s +# CHECK-NEXT: 1 4 1.00 clz v0.8b, v0.8b +# CHECK-NEXT: 1 4 1.00 clz v0.8h, v0.8h +# CHECK-NEXT: 1 3 1.00 cmeq d20, d21, #0 +# CHECK-NEXT: 1 3 1.00 cmeq d20, d21, d22 +# CHECK-NEXT: 1 3 1.00 cmeq v0.16b, v0.16b, #0 +# CHECK-NEXT: 1 3 1.00 cmeq v0.16b, v0.16b, v0.16b +# CHECK-NEXT: 1 3 1.00 cmge d20, d21, #0 +# CHECK-NEXT: 1 3 1.00 cmge d20, d21, d22 +# CHECK-NEXT: 1 3 1.00 cmge v0.4h, v0.4h, v0.4h +# CHECK-NEXT: 1 3 1.00 cmge v0.8b, v0.8b, #0 +# CHECK-NEXT: 1 3 1.00 cmgt d20, d21, #0 +# CHECK-NEXT: 1 3 1.00 cmgt d20, d21, d22 +# CHECK-NEXT: 1 3 1.00 cmgt v0.2s, v0.2s, #0 +# CHECK-NEXT: 1 3 1.00 cmgt v0.4s, v0.4s, v0.4s +# CHECK-NEXT: 1 3 1.00 cmhi d20, d21, d22 +# CHECK-NEXT: 1 3 1.00 cmhi v0.8h, v0.8h, v0.8h +# CHECK-NEXT: 1 3 1.00 cmhs d20, d21, d22 +# CHECK-NEXT: 1 3 1.00 cmhs v0.8b, v0.8b, v0.8b +# CHECK-NEXT: 1 3 1.00 cmle d20, d21, #0 +# CHECK-NEXT: 1 3 1.00 cmle v0.2d, v0.2d, #0 +# CHECK-NEXT: 1 3 1.00 cmlt d20, d21, #0 +# CHECK-NEXT: 1 3 1.00 cmlt v0.8h, v0.8h, #0 +# CHECK-NEXT: 1 4 1.00 cmtst d20, d21, d22 +# CHECK-NEXT: 1 4 1.00 cmtst v0.2s, v0.2s, v0.2s +# CHECK-NEXT: 1 4 1.00 cnt v0.16b, v0.16b +# CHECK-NEXT: 1 4 1.00 cnt v0.8b, v0.8b +# CHECK-NEXT: 1 3 1.00 dup v0.16b, w28 +# CHECK-NEXT: 1 3 1.00 dup v0.2d, x28 +# CHECK-NEXT: 1 4 1.00 dup v0.2s, w28 +# CHECK-NEXT: 1 4 1.00 dup v0.4h, w28 +# CHECK-NEXT: 1 3 1.00 dup v0.4s, w28 +# CHECK-NEXT: 1 4 1.00 dup v0.8b, w28 +# CHECK-NEXT: 1 3 1.00 dup v0.8h, w28 +# CHECK-NEXT: 1 3 1.00 eor v0.16b, v0.16b, v0.16b +# CHECK-NEXT: 1 4 1.00 ext v0.16b, v0.16b, v0.16b, #3 +# CHECK-NEXT: 1 4 1.00 ext v0.8b, v0.8b, v0.8b, #3 +# CHECK-NEXT: 1 4 1.00 fabd d29, d24, d20 +# CHECK-NEXT: 1 4 1.00 fabd s29, s24, s20 +# CHECK-NEXT: 1 4 1.00 fabd v0.4s, v0.4s, v0.4s +# CHECK-NEXT: 1 4 1.00 fabs v0.2d, v0.2d +# CHECK-NEXT: 1 4 1.00 fabs v0.2s, v0.2s +# CHECK-NEXT: 1 4 1.00 fabs v0.4h, v0.4h +# CHECK-NEXT: 1 4 1.00 fabs v0.4s, v0.4s +# CHECK-NEXT: 1 4 1.00 fabs v0.8h, v0.8h +# CHECK-NEXT: 1 4 1.00 facge d20, d21, d22 +# CHECK-NEXT: 1 4 1.00 facge s10, s11, s12 +# CHECK-NEXT: 1 4 1.00 facge v0.4s, v0.4s, v0.4s +# CHECK-NEXT: 1 4 1.00 facgt d20, d21, d22 +# CHECK-NEXT: 1 4 1.00 facgt s10, s11, s12 +# CHECK-NEXT: 1 4 1.00 facgt v0.2d, v0.2d, v0.2d +# CHECK-NEXT: 1 4 1.00 fadd v0.4s, v0.4s, v0.4s +# CHECK-NEXT: 1 4 1.00 faddp v0.2s, v0.2s, v0.2s +# CHECK-NEXT: 1 4 1.00 faddp v0.4s, v0.4s, v0.4s +# CHECK-NEXT: 1 4 1.00 fcmeq d20, d21, #0.0 +# CHECK-NEXT: 1 4 1.00 fcmeq d20, d21, d22 +# CHECK-NEXT: 1 4 1.00 fcmeq s10, s11, #0.0 +# CHECK-NEXT: 1 4 1.00 fcmeq s10, s11, s12 +# CHECK-NEXT: 1 4 1.00 fcmeq v0.2s, v0.2s, #0.0 +# CHECK-NEXT: 1 4 1.00 fcmeq v0.2s, v0.2s, v0.2s +# CHECK-NEXT: 1 4 1.00 fcmge d20, d21, #0.0 +# CHECK-NEXT: 1 4 1.00 fcmge d20, d21, d22 +# CHECK-NEXT: 1 4 1.00 fcmge s10, s11, #0.0 +# CHECK-NEXT: 1 4 1.00 fcmge s10, s11, s12 +# CHECK-NEXT: 1 4 1.00 fcmge v0.2d, v0.2d, #0.0 +# CHECK-NEXT: 1 4 1.00 fcmge v0.4s, v0.4s, v0.4s +# CHECK-NEXT: 1 4 1.00 fcmgt d20, d21, #0.0 +# CHECK-NEXT: 1 4 1.00 fcmgt d20, d21, d22 +# CHECK-NEXT: 1 4 1.00 fcmgt s10, s11, #0.0 +# CHECK-NEXT: 1 4 1.00 fcmgt s10, s11, s12 +# CHECK-NEXT: 1 4 1.00 fcmgt v0.4s, v0.4s, #0.0 +# CHECK-NEXT: 1 4 1.00 fcmgt v0.4s, v0.4s, v0.4s +# CHECK-NEXT: 1 4 1.00 fcmle d20, d21, #0.0 +# CHECK-NEXT: 1 4 1.00 fcmle s10, s11, #0.0 +# CHECK-NEXT: 1 4 1.00 fcmle v0.2d, v0.2d, #0.0 +# CHECK-NEXT: 1 4 1.00 fcmlt d20, d21, #0.0 +# CHECK-NEXT: 1 4 1.00 fcmlt s10, s11, #0.0 +# CHECK-NEXT: 1 4 1.00 fcmlt v0.4s, v0.4s, #0.0 +# CHECK-NEXT: 1 4 1.00 fcvtas d21, d14 +# CHECK-NEXT: 1 4 1.00 fcvtas s12, s13 +# CHECK-NEXT: 1 4 1.00 fcvtas v0.2d, v0.2d +# CHECK-NEXT: 1 4 1.00 fcvtas v0.2s, v0.2s +# CHECK-NEXT: 1 4 1.00 fcvtas v0.4h, v0.4h +# CHECK-NEXT: 1 4 1.00 fcvtas v0.4s, v0.4s +# CHECK-NEXT: 1 4 1.00 fcvtas v0.8h, v0.8h +# CHECK-NEXT: 1 4 1.00 fcvtau d21, d14 +# CHECK-NEXT: 1 4 1.00 fcvtau s12, s13 +# CHECK-NEXT: 1 4 1.00 fcvtau v0.2d, v0.2d +# CHECK-NEXT: 1 4 1.00 fcvtau v0.2s, v0.2s +# CHECK-NEXT: 1 4 1.00 fcvtau v0.4h, v0.4h +# CHECK-NEXT: 1 4 1.00 fcvtau v0.4s, v0.4s +# CHECK-NEXT: 1 4 1.00 fcvtau v0.8h, v0.8h +# CHECK-NEXT: 1 4 1.00 fcvtl v0.2d, v0.2s +# CHECK-NEXT: 1 4 1.00 fcvtl v0.4s, v0.4h +# CHECK-NEXT: 1 4 1.00 fcvtl2 v0.2d, v0.4s +# CHECK-NEXT: 1 4 1.00 fcvtl2 v0.4s, v0.8h +# CHECK-NEXT: 1 4 1.00 fcvtms d21, d14 +# CHECK-NEXT: 1 4 1.00 fcvtms s22, s13 +# CHECK-NEXT: 1 4 1.00 fcvtms v0.2d, v0.2d +# CHECK-NEXT: 1 4 1.00 fcvtms v0.2s, v0.2s +# CHECK-NEXT: 1 4 1.00 fcvtms v0.4h, v0.4h +# CHECK-NEXT: 1 4 1.00 fcvtms v0.4s, v0.4s +# CHECK-NEXT: 1 4 1.00 fcvtms v0.8h, v0.8h +# CHECK-NEXT: 1 4 1.00 fcvtmu d21, d14 +# CHECK-NEXT: 1 4 1.00 fcvtmu s12, s13 +# CHECK-NEXT: 1 4 1.00 fcvtmu v0.2d, v0.2d +# CHECK-NEXT: 1 4 1.00 fcvtmu v0.2s, v0.2s +# CHECK-NEXT: 1 4 1.00 fcvtmu v0.4h, v0.4h +# CHECK-NEXT: 1 4 1.00 fcvtmu v0.4s, v0.4s +# CHECK-NEXT: 1 4 1.00 fcvtmu v0.8h, v0.8h +# CHECK-NEXT: 1 4 1.00 fcvtn v0.2s, v0.2d +# CHECK-NEXT: 1 4 1.00 fcvtn v0.4h, v0.4s +# CHECK-NEXT: 1 4 1.00 fcvtn2 v0.4s, v0.2d +# CHECK-NEXT: 1 4 1.00 fcvtn2 v0.8h, v0.4s +# CHECK-NEXT: 1 4 1.00 fcvtns d21, d14 +# CHECK-NEXT: 1 4 1.00 fcvtns s22, s13 +# CHECK-NEXT: 1 4 1.00 fcvtns v0.2d, v0.2d +# CHECK-NEXT: 1 4 1.00 fcvtns v0.2s, v0.2s +# CHECK-NEXT: 1 4 1.00 fcvtns v0.4h, v0.4h +# CHECK-NEXT: 1 4 1.00 fcvtns v0.4s, v0.4s +# CHECK-NEXT: 1 4 1.00 fcvtns v0.8h, v0.8h +# CHECK-NEXT: 1 4 1.00 fcvtnu d21, d14 +# CHECK-NEXT: 1 4 1.00 fcvtnu s12, s13 +# CHECK-NEXT: 1 4 1.00 fcvtnu v0.2d, v0.2d +# CHECK-NEXT: 1 4 1.00 fcvtnu v0.2s, v0.2s +# CHECK-NEXT: 1 4 1.00 fcvtnu v0.4h, v0.4h +# CHECK-NEXT: 1 4 1.00 fcvtnu v0.4s, v0.4s +# CHECK-NEXT: 1 4 1.00 fcvtnu v0.8h, v0.8h +# CHECK-NEXT: 1 4 1.00 fcvtps d21, d14 +# CHECK-NEXT: 1 4 1.00 fcvtps s22, s13 +# CHECK-NEXT: 1 4 1.00 fcvtps v0.2d, v0.2d +# CHECK-NEXT: 1 4 1.00 fcvtps v0.2s, v0.2s +# CHECK-NEXT: 1 4 1.00 fcvtps v0.4h, v0.4h +# CHECK-NEXT: 1 4 1.00 fcvtps v0.4s, v0.4s +# CHECK-NEXT: 1 4 1.00 fcvtps v0.8h, v0.8h +# CHECK-NEXT: 1 4 1.00 fcvtpu d21, d14 +# CHECK-NEXT: 1 4 1.00 fcvtpu s12, s13 +# CHECK-NEXT: 1 4 1.00 fcvtpu v0.2d, v0.2d +# CHECK-NEXT: 1 4 1.00 fcvtpu v0.2s, v0.2s +# CHECK-NEXT: 1 4 1.00 fcvtpu v0.4h, v0.4h +# CHECK-NEXT: 1 4 1.00 fcvtpu v0.4s, v0.4s +# CHECK-NEXT: 1 4 1.00 fcvtpu v0.8h, v0.8h +# CHECK-NEXT: 1 4 1.00 fcvtxn s22, d13 +# CHECK-NEXT: 1 4 1.00 fcvtxn v0.2s, v0.2d +# CHECK-NEXT: 1 4 1.00 fcvtxn2 v0.4s, v0.2d +# CHECK-NEXT: 1 4 1.00 fcvtzs d21, d12, #1 +# CHECK-NEXT: 1 4 1.00 fcvtzs d21, d14 +# CHECK-NEXT: 1 4 1.00 fcvtzs s12, s13 +# CHECK-NEXT: 1 4 1.00 fcvtzs s21, s12, #1 +# CHECK-NEXT: 1 4 1.00 fcvtzs v0.2d, v0.2d +# CHECK-NEXT: 1 4 1.00 fcvtzs v0.2d, v0.2d, #3 +# CHECK-NEXT: 1 4 1.00 fcvtzs v0.2s, v0.2s +# CHECK-NEXT: 1 4 1.00 fcvtzs v0.2s, v0.2s, #3 +# CHECK-NEXT: 1 4 1.00 fcvtzs v0.4h, v0.4h +# CHECK-NEXT: 1 4 1.00 fcvtzs v0.4s, v0.4s +# CHECK-NEXT: 1 4 1.00 fcvtzs v0.4s, v0.4s, #3 +# CHECK-NEXT: 1 4 1.00 fcvtzs v0.8h, v0.8h +# CHECK-NEXT: 1 4 1.00 fcvtzu d21, d12, #1 +# CHECK-NEXT: 1 4 1.00 fcvtzu d21, d14 +# CHECK-NEXT: 1 4 1.00 fcvtzu s12, s13 +# CHECK-NEXT: 1 4 1.00 fcvtzu s21, s12, #1 +# CHECK-NEXT: 1 4 1.00 fcvtzu v0.2d, v0.2d +# CHECK-NEXT: 1 4 1.00 fcvtzu v0.2d, v0.2d, #3 +# CHECK-NEXT: 1 4 1.00 fcvtzu v0.2s, v0.2s +# CHECK-NEXT: 1 4 1.00 fcvtzu v0.2s, v0.2s, #3 +# CHECK-NEXT: 1 4 1.00 fcvtzu v0.4h, v0.4h +# CHECK-NEXT: 1 4 1.00 fcvtzu v0.4s, v0.4s +# CHECK-NEXT: 1 4 1.00 fcvtzu v0.4s, v0.4s, #3 +# CHECK-NEXT: 1 4 1.00 fcvtzu v0.8h, v0.8h +# CHECK-NEXT: 1 13 10.00 fdiv v0.2s, v0.2s, v0.2s +# CHECK-NEXT: 1 4 1.00 fmax v0.2d, v0.2d, v0.2d +# CHECK-NEXT: 1 4 1.00 fmax v0.2s, v0.2s, v0.2s +# CHECK-NEXT: 1 4 1.00 fmax v0.4s, v0.4s, v0.4s +# CHECK-NEXT: 1 4 1.00 fmaxnm v0.2d, v0.2d, v0.2d +# CHECK-NEXT: 1 4 1.00 fmaxnm v0.2s, v0.2s, v0.2s +# CHECK-NEXT: 1 4 1.00 fmaxnm v0.4s, v0.4s, v0.4s +# CHECK-NEXT: 1 4 1.00 fmaxnmp v0.2d, v0.2d, v0.2d +# CHECK-NEXT: 1 4 1.00 fmaxnmp v0.2s, v0.2s, v0.2s +# CHECK-NEXT: 1 4 1.00 fmaxnmp v0.4s, v0.4s, v0.4s +# CHECK-NEXT: 1 4 1.00 fmaxp v0.2d, v0.2d, v0.2d +# CHECK-NEXT: 1 4 1.00 fmaxp v0.2s, v0.2s, v0.2s +# CHECK-NEXT: 1 4 1.00 fmaxp v0.4s, v0.4s, v0.4s +# CHECK-NEXT: 1 4 1.00 fmin v0.2d, v0.2d, v0.2d +# CHECK-NEXT: 1 4 1.00 fmin v0.2s, v0.2s, v0.2s +# CHECK-NEXT: 1 4 1.00 fmin v0.4s, v0.4s, v0.4s +# CHECK-NEXT: 1 4 1.00 fminnm v0.2d, v0.2d, v0.2d +# CHECK-NEXT: 1 4 1.00 fminnm v0.2s, v0.2s, v0.2s +# CHECK-NEXT: 1 4 1.00 fminnm v0.4s, v0.4s, v0.4s +# CHECK-NEXT: 1 4 1.00 fminnmp v0.2d, v0.2d, v0.2d +# CHECK-NEXT: 1 4 1.00 fminnmp v0.2s, v0.2s, v0.2s +# CHECK-NEXT: 1 4 1.00 fminnmp v0.4s, v0.4s, v0.4s +# CHECK-NEXT: 1 4 1.00 fminp v0.2d, v0.2d, v0.2d +# CHECK-NEXT: 1 4 1.00 fminp v0.2s, v0.2s, v0.2s +# CHECK-NEXT: 1 4 1.00 fminp v0.4s, v0.4s, v0.4s +# CHECK-NEXT: 1 4 1.00 fmla d0, d1, v0.d[1] +# CHECK-NEXT: 1 4 1.00 fmla s0, s1, v0.s[3] +# CHECK-NEXT: 1 4 1.00 fmla v0.2s, v0.2s, v0.2s +# CHECK-NEXT: 1 4 1.00 fmls d0, d4, v0.d[1] +# CHECK-NEXT: 1 4 1.00 fmls s3, s5, v0.s[3] +# CHECK-NEXT: 1 4 1.00 fmls v0.2s, v0.2s, v0.2s +# CHECK-NEXT: 1 4 1.00 fmov v0.2d, #-1.25000000 +# CHECK-NEXT: 1 4 1.00 fmov v0.2s, #13.00000000 +# CHECK-NEXT: 1 4 1.00 fmov v0.4s, #1.00000000 +# CHECK-NEXT: 1 4 1.00 fmul d0, d1, v0.d[1] +# CHECK-NEXT: 1 4 1.00 fmul s0, s1, v0.s[3] +# CHECK-NEXT: 1 4 1.00 fmul v0.2s, v0.2s, v0.2s +# CHECK-NEXT: 1 4 1.00 fmulx d0, d4, v0.d[1] +# CHECK-NEXT: 1 4 1.00 fmulx d23, d11, d1 +# CHECK-NEXT: 1 4 1.00 fmulx s20, s22, s15 +# CHECK-NEXT: 1 4 1.00 fmulx s3, s5, v0.s[3] +# CHECK-NEXT: 1 4 1.00 fmulx v0.2d, v0.2d, v0.2d +# CHECK-NEXT: 1 4 1.00 fmulx v0.2s, v0.2s, v0.2s +# CHECK-NEXT: 1 4 1.00 fmulx v0.4s, v0.4s, v0.4s +# CHECK-NEXT: 1 4 1.00 fneg v0.2d, v0.2d +# CHECK-NEXT: 1 4 1.00 fneg v0.2s, v0.2s +# CHECK-NEXT: 1 4 1.00 fneg v0.4h, v0.4h +# CHECK-NEXT: 1 4 1.00 fneg v0.4s, v0.4s +# CHECK-NEXT: 1 4 1.00 fneg v0.8h, v0.8h +# CHECK-NEXT: 1 4 1.00 frecpe d13, d13 +# CHECK-NEXT: 1 4 1.00 frecpe s19, s14 +# CHECK-NEXT: 1 4 1.00 frecpe v0.2d, v0.2d +# CHECK-NEXT: 1 4 1.00 frecpe v0.2s, v0.2s +# CHECK-NEXT: 1 4 1.00 frecpe v0.4h, v0.4h +# CHECK-NEXT: 1 4 1.00 frecpe v0.4s, v0.4s +# CHECK-NEXT: 1 4 1.00 frecpe v0.8h, v0.8h +# CHECK-NEXT: 1 4 1.00 frecps v0.4s, v0.4s, v0.4s +# CHECK-NEXT: 1 4 1.00 frecps d22, d30, d21 +# CHECK-NEXT: 1 4 1.00 frecps s21, s16, s13 +# CHECK-NEXT: 1 4 1.00 frecpx d16, d19 +# CHECK-NEXT: 1 4 1.00 frecpx s18, s10 +# CHECK-NEXT: 1 4 1.00 frinta v0.2d, v0.2d +# CHECK-NEXT: 1 4 1.00 frinta v0.2s, v0.2s +# CHECK-NEXT: 1 4 1.00 frinta v0.4h, v0.4h +# CHECK-NEXT: 1 4 1.00 frinta v0.4s, v0.4s +# CHECK-NEXT: 1 4 1.00 frinta v0.8h, v0.8h +# CHECK-NEXT: 1 4 1.00 frinti v0.2d, v0.2d +# CHECK-NEXT: 1 4 1.00 frinti v0.2s, v0.2s +# CHECK-NEXT: 1 4 1.00 frinti v0.4h, v0.4h +# CHECK-NEXT: 1 4 1.00 frinti v0.4s, v0.4s +# CHECK-NEXT: 1 4 1.00 frinti v0.8h, v0.8h +# CHECK-NEXT: 1 4 1.00 frintm v0.2d, v0.2d +# CHECK-NEXT: 1 4 1.00 frintm v0.2s, v0.2s +# CHECK-NEXT: 1 4 1.00 frintm v0.4h, v0.4h +# CHECK-NEXT: 1 4 1.00 frintm v0.4s, v0.4s +# CHECK-NEXT: 1 4 1.00 frintm v0.8h, v0.8h +# CHECK-NEXT: 1 4 1.00 frintn v0.2d, v0.2d +# CHECK-NEXT: 1 4 1.00 frintn v0.2s, v0.2s +# CHECK-NEXT: 1 4 1.00 frintn v0.4h, v0.4h +# CHECK-NEXT: 1 4 1.00 frintn v0.4s, v0.4s +# CHECK-NEXT: 1 4 1.00 frintn v0.8h, v0.8h +# CHECK-NEXT: 1 4 1.00 frintp v0.2d, v0.2d +# CHECK-NEXT: 1 4 1.00 frintp v0.2s, v0.2s +# CHECK-NEXT: 1 4 1.00 frintp v0.4h, v0.4h +# CHECK-NEXT: 1 4 1.00 frintp v0.4s, v0.4s +# CHECK-NEXT: 1 4 1.00 frintp v0.8h, v0.8h +# CHECK-NEXT: 1 4 1.00 frintx v0.2d, v0.2d +# CHECK-NEXT: 1 4 1.00 frintx v0.2s, v0.2s +# CHECK-NEXT: 1 4 1.00 frintx v0.4h, v0.4h +# CHECK-NEXT: 1 4 1.00 frintx v0.4s, v0.4s +# CHECK-NEXT: 1 4 1.00 frintx v0.8h, v0.8h +# CHECK-NEXT: 1 4 1.00 frintz v0.2d, v0.2d +# CHECK-NEXT: 1 4 1.00 frintz v0.2s, v0.2s +# CHECK-NEXT: 1 4 1.00 frintz v0.4h, v0.4h +# CHECK-NEXT: 1 4 1.00 frintz v0.4s, v0.4s +# CHECK-NEXT: 1 4 1.00 frintz v0.8h, v0.8h +# CHECK-NEXT: 1 22 19.00 frsqrte d21, d12 +# CHECK-NEXT: 1 12 9.00 frsqrte s22, s13 +# CHECK-NEXT: 1 22 19.00 frsqrte v0.2d, v0.2d +# CHECK-NEXT: 1 12 9.00 frsqrte v0.2s, v0.2s +# CHECK-NEXT: 1 8 5.00 frsqrte v0.4h, v0.4h +# CHECK-NEXT: 1 12 9.00 frsqrte v0.4s, v0.4s +# CHECK-NEXT: 1 8 5.00 frsqrte v0.8h, v0.8h +# CHECK-NEXT: 1 22 19.00 frsqrts d8, d22, d18 +# CHECK-NEXT: 1 12 9.00 frsqrts s21, s5, s12 +# CHECK-NEXT: 1 22 19.00 frsqrts v0.2d, v0.2d, v0.2d +# CHECK-NEXT: 1 22 19.00 fsqrt v0.2d, v0.2d +# CHECK-NEXT: 1 12 9.00 fsqrt v0.2s, v0.2s +# CHECK-NEXT: 1 8 5.00 fsqrt v0.4h, v0.4h +# CHECK-NEXT: 1 12 9.00 fsqrt v0.4s, v0.4s +# CHECK-NEXT: 1 8 5.00 fsqrt v0.8h, v0.8h +# CHECK-NEXT: 1 4 1.00 fsub v0.2s, v0.2s, v0.2s +# CHECK-NEXT: 1 3 1.00 * ld1 { v0.16b }, [x0] +# CHECK-NEXT: 2 5 3.00 * ld1 { v0.2d, v1.2d, v2.2d }, [x0], #48 +# CHECK-NEXT: 1 6 4.00 * ld1 { v0.2d, v1.2d, v2.2d, v3.2d }, [x0] +# CHECK-NEXT: 2 4 2.00 * ld1 { v0.4s, v1.4s }, [sp], #32 +# CHECK-NEXT: 1 5 3.00 * ld1 { v0.4s, v1.4s, v2.4s }, [sp] +# CHECK-NEXT: 2 4 2.00 * ld1 { v0.8b, v1.8b, v2.8b, v3.8b }, [x0], x3 +# CHECK-NEXT: 2 3 1.00 * ld1 { v0.8h }, [x15], x2 +# CHECK-NEXT: 1 4 2.00 * ld1 { v0.8h, v1.8h }, [x15] +# CHECK-NEXT: 1 3 1.00 * ld1 { v0.b }[9], [x0] +# CHECK-NEXT: 2 3 1.00 * ld1 { v0.b }[9], [x0], #1 +# CHECK-NEXT: 1 3 1.00 * ld1r { v0.16b }, [x0] +# CHECK-NEXT: 2 3 1.00 * ld1r { v0.16b }, [x0], #1 +# CHECK-NEXT: 1 3 1.00 * ld1r { v0.8h }, [x15] +# CHECK-NEXT: 2 3 1.00 * ld1r { v0.8h }, [x15], #2 +# CHECK-NEXT: 2 4 2.00 * ld2 { v0.16b, v1.16b }, [x0], x1 +# CHECK-NEXT: 1 4 1.00 * ld2 { v0.8b, v1.8b }, [x0] +# CHECK-NEXT: 1 4 6.00 * ld2 { v0.h, v1.h }[7], [x15] +# CHECK-NEXT: 2 4 6.00 * ld2 { v0.h, v1.h }[7], [x15], #4 +# CHECK-NEXT: 1 3 2.00 * ld2r { v0.2d, v1.2d }, [x0] +# CHECK-NEXT: 2 3 2.00 * ld2r { v0.2d, v1.2d }, [x0], #16 +# CHECK-NEXT: 1 3 2.00 * ld2r { v0.4s, v1.4s }, [sp] +# CHECK-NEXT: 2 3 2.00 * ld2r { v0.4s, v1.4s }, [sp], #8 +# CHECK-NEXT: 1 5 6.00 * ld3 { v0.4h, v1.4h, v2.4h }, [x15] +# CHECK-NEXT: 2 5 6.00 * ld3 { v0.8h, v1.8h, v2.8h }, [x15], x2 +# CHECK-NEXT: 1 5 7.00 * ld3 { v0.s, v1.s, v2.s }[3], [sp] +# CHECK-NEXT: 2 5 7.00 * ld3 { v0.s, v1.s, v2.s }[3], [sp], x3 +# CHECK-NEXT: 1 4 3.00 * ld3r { v0.4h, v1.4h, v2.4h }, [x15] +# CHECK-NEXT: 2 4 3.00 * ld3r { v0.4h, v1.4h, v2.4h }, [x15], #6 +# CHECK-NEXT: 1 4 3.00 * ld3r { v0.8b, v1.8b, v2.8b }, [x0] +# CHECK-NEXT: 2 4 3.00 * ld3r { v0.8b, v1.8b, v2.8b }, [x0], #3 +# CHECK-NEXT: 1 5 7.00 * ld4 { v0.2s, v1.2s, v2.2s, v3.2s }, [sp] +# CHECK-NEXT: 2 5 8.00 * ld4 { v0.4s, v1.4s, v2.4s, v3.4s }, [sp], #64 +# CHECK-NEXT: 1 6 7.00 * ld4 { v0.d, v1.d, v2.d, v3.d }[1], [x0] +# CHECK-NEXT: 2 6 7.00 * ld4 { v0.d, v1.d, v2.d, v3.d }[1], [x0], #32 +# CHECK-NEXT: 2 6 7.00 * ld4 { v0.h, v1.h, v2.h, v3.h }[7], [x0], x0 +# CHECK-NEXT: 1 4 4.00 * ld4r { v0.1d, v1.1d, v2.1d, v3.1d }, [sp] +# CHECK-NEXT: 2 4 4.00 * ld4r { v0.1d, v1.1d, v2.1d, v3.1d }, [sp], x7 +# CHECK-NEXT: 1 4 4.00 * ld4r { v0.2s, v1.2s, v2.2s, v3.2s }, [sp] +# CHECK-NEXT: 2 4 4.00 * ld4r { v0.2s, v1.2s, v2.2s, v3.2s }, [sp], x30 +# CHECK-NEXT: 1 4 1.00 mla v0.8b, v0.8b, v0.8b +# CHECK-NEXT: 1 4 1.00 mls v0.4h, v0.4h, v0.4h +# CHECK-NEXT: 1 4 1.00 mov b0, v0.b[15] +# CHECK-NEXT: 1 4 1.00 mov d6, v0.d[1] +# CHECK-NEXT: 1 4 1.00 mov h2, v0.h[5] +# CHECK-NEXT: 1 4 1.00 mov s17, v0.s[2] +# CHECK-NEXT: 1 3 1.00 mov v0.16b, v0.16b +# CHECK-NEXT: 1 3 1.00 mov v0.8b, v0.8b +# CHECK-NEXT: 1 4 1.00 movi d15, #0xff00ff00ff00ff +# CHECK-NEXT: 1 4 1.00 movi v0.16b, #31 +# CHECK-NEXT: 1 4 1.00 movi v0.2d, #0xff0000ff0000ffff +# CHECK-NEXT: 1 4 1.00 movi v0.2s, #8, msl #8 +# CHECK-NEXT: 1 4 1.00 movi v0.4s, #255, lsl #24 +# CHECK-NEXT: 1 4 1.00 movi v0.8b, #255 +# CHECK-NEXT: 1 4 1.00 mul v0.8b, v0.8b, v0.8b +# CHECK-NEXT: 1 3 1.00 mvni v0.2s, #0 +# CHECK-NEXT: 1 3 1.00 mvni v0.4s, #16, msl #16 +# CHECK-NEXT: 1 3 1.00 neg d29, d24 +# CHECK-NEXT: 1 3 1.00 neg v0.16b, v0.16b +# CHECK-NEXT: 1 3 1.00 neg v0.2d, v0.2d +# CHECK-NEXT: 1 3 1.00 neg v0.2s, v0.2s +# CHECK-NEXT: 1 3 1.00 neg v0.4h, v0.4h +# CHECK-NEXT: 1 3 1.00 neg v0.4s, v0.4s +# CHECK-NEXT: 1 3 1.00 neg v0.8b, v0.8b +# CHECK-NEXT: 1 3 1.00 neg v0.8h, v0.8h +# CHECK-NEXT: 1 3 1.00 mvn v0.16b, v0.16b +# CHECK-NEXT: 1 3 1.00 mvn v0.8b, v0.8b +# CHECK-NEXT: 1 3 1.00 orn v0.16b, v0.16b, v0.16b +# CHECK-NEXT: 1 3 1.00 mov v0.16b, v0.16b +# CHECK-NEXT: 1 3 1.00 orr v0.8h, #31 +# CHECK-NEXT: 1 4 1.00 pmul v0.16b, v0.16b, v0.16b +# CHECK-NEXT: 1 4 1.00 pmul v0.8b, v0.8b, v0.8b +# CHECK-NEXT: 1 4 1.00 pmull v0.8h, v0.8b, v0.8b +# CHECK-NEXT: 1 4 1.00 pmull2 v0.8h, v0.16b, v0.16b +# CHECK-NEXT: 1 8 1.00 raddhn v0.2s, v0.2d, v0.2d +# CHECK-NEXT: 1 8 1.00 raddhn v0.4h, v0.4s, v0.4s +# CHECK-NEXT: 1 8 1.00 raddhn v0.8b, v0.8h, v0.8h +# CHECK-NEXT: 1 8 1.00 raddhn2 v0.16b, v0.8h, v0.8h +# CHECK-NEXT: 1 8 1.00 raddhn2 v0.4s, v0.2d, v0.2d +# CHECK-NEXT: 1 8 1.00 raddhn2 v0.8h, v0.4s, v0.4s +# CHECK-NEXT: 1 4 1.00 rbit v0.16b, v0.16b +# CHECK-NEXT: 1 4 1.00 rbit v0.8b, v0.8b +# CHECK-NEXT: 1 4 1.00 rev16 v21.8b, v1.8b +# CHECK-NEXT: 1 4 1.00 rev16 v30.16b, v31.16b +# CHECK-NEXT: 1 4 1.00 rev32 v0.4h, v9.4h +# CHECK-NEXT: 1 4 1.00 rev32 v21.8b, v1.8b +# CHECK-NEXT: 1 4 1.00 rev32 v30.16b, v31.16b +# CHECK-NEXT: 1 4 1.00 rev32 v4.8h, v7.8h +# CHECK-NEXT: 1 4 1.00 rev64 v0.16b, v31.16b +# CHECK-NEXT: 1 4 1.00 rev64 v1.8b, v9.8b +# CHECK-NEXT: 1 4 1.00 rev64 v13.4h, v21.4h +# CHECK-NEXT: 1 4 1.00 rev64 v2.8h, v4.8h +# CHECK-NEXT: 1 4 1.00 rev64 v4.2s, v0.2s +# CHECK-NEXT: 1 4 1.00 rev64 v6.4s, v8.4s +# CHECK-NEXT: 1 4 1.00 rshrn v0.2s, v0.2d, #3 +# CHECK-NEXT: 1 4 1.00 rshrn v0.4h, v0.4s, #3 +# CHECK-NEXT: 1 4 1.00 rshrn v0.8b, v0.8h, #3 +# CHECK-NEXT: 1 4 1.00 rshrn2 v0.16b, v0.8h, #3 +# CHECK-NEXT: 1 4 1.00 rshrn2 v0.4s, v0.2d, #3 +# CHECK-NEXT: 1 4 1.00 rshrn2 v0.8h, v0.4s, #3 +# CHECK-NEXT: 1 8 1.00 rsubhn v0.2s, v0.2d, v0.2d +# CHECK-NEXT: 1 8 1.00 rsubhn v0.4h, v0.4s, v0.4s +# CHECK-NEXT: 1 8 1.00 rsubhn v0.8b, v0.8h, v0.8h +# CHECK-NEXT: 1 8 1.00 rsubhn2 v0.16b, v0.8h, v0.8h +# CHECK-NEXT: 1 8 1.00 rsubhn2 v0.4s, v0.2d, v0.2d +# CHECK-NEXT: 1 8 1.00 rsubhn2 v0.8h, v0.4s, v0.4s +# CHECK-NEXT: 1 6 1.00 saba v0.16b, v0.16b, v0.16b +# CHECK-NEXT: 1 6 1.00 sabal v0.2d, v0.2s, v0.2s +# CHECK-NEXT: 1 6 1.00 sabal v0.4s, v0.4h, v0.4h +# CHECK-NEXT: 1 6 1.00 sabal v0.8h, v0.8b, v0.8b +# CHECK-NEXT: 1 6 1.00 sabal2 v0.2d, v0.4s, v0.4s +# CHECK-NEXT: 1 6 1.00 sabal2 v0.4s, v0.8h, v0.8h +# CHECK-NEXT: 1 6 1.00 sabal2 v0.8h, v0.16b, v0.16b +# CHECK-NEXT: 1 3 1.00 sabd v0.4h, v0.4h, v0.4h +# CHECK-NEXT: 1 3 1.00 sabdl v0.2d, v0.2s, v0.2s +# CHECK-NEXT: 1 3 1.00 sabdl v0.4s, v0.4h, v0.4h +# CHECK-NEXT: 1 3 1.00 sabdl v0.8h, v0.8b, v0.8b +# CHECK-NEXT: 1 3 1.00 sabdl2 v0.2d, v0.4s, v0.4s +# CHECK-NEXT: 1 3 1.00 sabdl2 v0.4s, v0.8h, v0.8h +# CHECK-NEXT: 1 3 1.00 sabdl2 v0.8h, v0.16b, v0.16b +# CHECK-NEXT: 1 7 2.00 sadalp v0.1d, v0.2s +# CHECK-NEXT: 1 7 2.00 sadalp v0.2d, v0.4s +# CHECK-NEXT: 1 7 2.00 sadalp v0.2s, v0.4h +# CHECK-NEXT: 1 7 2.00 sadalp v0.4h, v0.8b +# CHECK-NEXT: 1 7 2.00 sadalp v0.4s, v0.8h +# CHECK-NEXT: 1 7 2.00 sadalp v0.8h, v0.16b +# CHECK-NEXT: 1 3 1.00 saddl v0.2d, v0.2s, v0.2s +# CHECK-NEXT: 1 3 1.00 saddl v0.4s, v0.4h, v0.4h +# CHECK-NEXT: 1 3 1.00 saddl v0.8h, v0.8b, v0.8b +# CHECK-NEXT: 1 3 1.00 saddl2 v0.2d, v0.4s, v0.4s +# CHECK-NEXT: 1 3 1.00 saddl2 v0.4s, v0.8h, v0.8h +# CHECK-NEXT: 1 3 1.00 saddl2 v0.8h, v0.16b, v0.16b +# CHECK-NEXT: 1 3 1.00 saddlp v0.1d, v0.2s +# CHECK-NEXT: 1 3 1.00 saddlp v0.2d, v0.4s +# CHECK-NEXT: 1 3 1.00 saddlp v0.2s, v0.4h +# CHECK-NEXT: 1 3 1.00 saddlp v0.4h, v0.8b +# CHECK-NEXT: 1 3 1.00 saddlp v0.4s, v0.8h +# CHECK-NEXT: 1 3 1.00 saddlp v0.8h, v0.16b +# CHECK-NEXT: 1 3 1.00 saddw v0.2d, v0.2d, v0.2s +# CHECK-NEXT: 1 3 1.00 saddw v0.4s, v0.4s, v0.4h +# CHECK-NEXT: 1 3 1.00 saddw v0.8h, v0.8h, v0.8b +# CHECK-NEXT: 1 3 1.00 saddw2 v0.2d, v0.2d, v0.4s +# CHECK-NEXT: 1 3 1.00 saddw2 v0.4s, v0.4s, v0.8h +# CHECK-NEXT: 1 3 1.00 saddw2 v0.8h, v0.8h, v0.16b +# CHECK-NEXT: 1 4 1.00 scvtf d21, d12 +# CHECK-NEXT: 1 4 1.00 scvtf d21, d12, #64 +# CHECK-NEXT: 1 4 1.00 scvtf s22, s13 +# CHECK-NEXT: 1 4 1.00 scvtf s22, s13, #32 +# CHECK-NEXT: 1 4 1.00 scvtf v0.2d, v0.2d +# CHECK-NEXT: 1 4 1.00 scvtf v0.2d, v0.2d, #3 +# CHECK-NEXT: 1 4 1.00 scvtf v0.2s, v0.2s +# CHECK-NEXT: 1 4 1.00 scvtf v0.2s, v0.2s, #3 +# CHECK-NEXT: 1 4 1.00 scvtf v0.4h, v0.4h +# CHECK-NEXT: 1 4 1.00 scvtf v0.4s, v0.4s +# CHECK-NEXT: 1 4 1.00 scvtf v0.4s, v0.4s, #3 +# CHECK-NEXT: 1 4 1.00 scvtf v0.8h, v0.8h +# CHECK-NEXT: 1 3 1.00 shadd v0.8b, v0.8b, v0.8b +# CHECK-NEXT: 1 3 1.00 shl d7, d10, #12 +# CHECK-NEXT: 1 3 1.00 shl v0.16b, v0.16b, #3 +# CHECK-NEXT: 1 3 1.00 shl v0.2d, v0.2d, #3 +# CHECK-NEXT: 1 3 1.00 shl v0.4h, v0.4h, #3 +# CHECK-NEXT: 1 3 1.00 shl v0.4s, v0.4s, #3 +# CHECK-NEXT: 1 3 1.00 shll v0.2d, v0.2s, #32 +# CHECK-NEXT: 1 3 1.00 shll v0.4s, v0.4h, #16 +# CHECK-NEXT: 1 3 1.00 shll v0.8h, v0.8b, #8 +# CHECK-NEXT: 1 3 1.00 shll v0.2d, v0.2s, #32 +# CHECK-NEXT: 1 3 1.00 shll v0.4s, v0.4h, #16 +# CHECK-NEXT: 1 3 1.00 shll v0.8h, v0.8b, #8 +# CHECK-NEXT: 1 3 1.00 shll2 v0.2d, v0.4s, #32 +# CHECK-NEXT: 1 3 1.00 shll2 v0.4s, v0.8h, #16 +# CHECK-NEXT: 1 3 1.00 shll2 v0.8h, v0.16b, #8 +# CHECK-NEXT: 1 3 1.00 shll2 v0.2d, v0.4s, #32 +# CHECK-NEXT: 1 3 1.00 shll2 v0.4s, v0.8h, #16 +# CHECK-NEXT: 1 3 1.00 shll2 v0.8h, v0.16b, #8 +# CHECK-NEXT: 1 3 1.00 shrn v0.2s, v0.2d, #3 +# CHECK-NEXT: 1 3 1.00 shrn v0.4h, v0.4s, #3 +# CHECK-NEXT: 1 3 1.00 shrn v0.8b, v0.8h, #3 +# CHECK-NEXT: 1 3 1.00 shrn2 v0.16b, v0.8h, #3 +# CHECK-NEXT: 1 3 1.00 shrn2 v0.4s, v0.2d, #3 +# CHECK-NEXT: 1 3 1.00 shrn2 v0.8h, v0.4s, #3 +# CHECK-NEXT: 1 3 1.00 shsub v0.2s, v0.2s, v0.2s +# CHECK-NEXT: 1 3 1.00 shsub v0.4h, v0.4h, v0.4h +# CHECK-NEXT: 1 3 1.00 sli d10, d14, #12 +# CHECK-NEXT: 1 4 1.00 sli v0.16b, v0.16b, #3 +# CHECK-NEXT: 1 4 1.00 sli v0.2d, v0.2d, #3 +# CHECK-NEXT: 1 4 1.00 sli v0.2s, v0.2s, #3 +# CHECK-NEXT: 1 4 1.00 sli v0.4h, v0.4h, #3 +# CHECK-NEXT: 1 4 1.00 sli v0.4s, v0.4s, #3 +# CHECK-NEXT: 1 4 1.00 sli v0.8b, v0.8b, #3 +# CHECK-NEXT: 1 4 1.00 sli v0.8h, v0.8h, #3 +# CHECK-NEXT: 1 3 1.00 smax v0.2s, v0.2s, v0.2s +# CHECK-NEXT: 1 3 1.00 smax v0.4h, v0.4h, v0.4h +# CHECK-NEXT: 1 3 1.00 smax v0.8b, v0.8b, v0.8b +# CHECK-NEXT: 1 3 1.00 smaxp v0.2s, v0.2s, v0.2s +# CHECK-NEXT: 1 3 1.00 smaxp v0.4h, v0.4h, v0.4h +# CHECK-NEXT: 1 3 1.00 smaxp v0.8b, v0.8b, v0.8b +# CHECK-NEXT: 1 3 1.00 smin v0.16b, v0.16b, v0.16b +# CHECK-NEXT: 1 4 1.00 smin v0.4s, v0.4s, v0.4s +# CHECK-NEXT: 1 3 1.00 smin v0.8h, v0.8h, v0.8h +# CHECK-NEXT: 1 3 1.00 sminp v0.16b, v0.16b, v0.16b +# CHECK-NEXT: 1 4 1.00 sminp v0.4s, v0.4s, v0.4s +# CHECK-NEXT: 1 3 1.00 sminp v0.8h, v0.8h, v0.8h +# CHECK-NEXT: 1 4 1.00 smlal v0.2d, v0.2s, v0.2s +# CHECK-NEXT: 1 4 1.00 smlal v0.4s, v0.4h, v0.4h +# CHECK-NEXT: 1 4 1.00 smlal v0.8h, v0.8b, v0.8b +# CHECK-NEXT: 1 4 1.00 smlal2 v0.2d, v0.4s, v0.4s +# CHECK-NEXT: 1 4 1.00 smlal2 v0.4s, v0.8h, v0.8h +# CHECK-NEXT: 1 4 1.00 smlal2 v0.8h, v0.16b, v0.16b +# CHECK-NEXT: 1 4 1.00 smlsl v0.2d, v0.2s, v0.2s +# CHECK-NEXT: 1 4 1.00 smlsl v0.4s, v0.4h, v0.4h +# CHECK-NEXT: 1 4 1.00 smlsl v0.8h, v0.8b, v0.8b +# CHECK-NEXT: 1 4 1.00 smlsl2 v0.2d, v0.4s, v0.4s +# CHECK-NEXT: 1 4 1.00 smlsl2 v0.4s, v0.8h, v0.8h +# CHECK-NEXT: 1 4 1.00 smlsl2 v0.8h, v0.16b, v0.16b +# CHECK-NEXT: 1 4 1.00 smull v0.2d, v0.2s, v0.2s +# CHECK-NEXT: 1 4 1.00 smull v0.4s, v0.4h, v0.4h +# CHECK-NEXT: 1 4 1.00 smull v0.8h, v0.8b, v0.8b +# CHECK-NEXT: 1 4 1.00 smull2 v0.2d, v0.4s, v0.4s +# CHECK-NEXT: 1 4 1.00 smull2 v0.4s, v0.8h, v0.8h +# CHECK-NEXT: 1 4 1.00 smull2 v0.8h, v0.16b, v0.16b +# CHECK-NEXT: 1 4 1.00 sqabs b19, b14 +# CHECK-NEXT: 1 4 1.00 sqabs d18, d12 +# CHECK-NEXT: 1 4 1.00 sqabs h21, h15 +# CHECK-NEXT: 1 4 1.00 sqabs s20, s12 +# CHECK-NEXT: 1 4 1.00 sqabs v0.16b, v0.16b +# CHECK-NEXT: 1 4 1.00 sqabs v0.2d, v0.2d +# CHECK-NEXT: 1 4 1.00 sqabs v0.2s, v0.2s +# CHECK-NEXT: 1 4 1.00 sqabs v0.4h, v0.4h +# CHECK-NEXT: 1 4 1.00 sqabs v0.4s, v0.4s +# CHECK-NEXT: 1 4 1.00 sqabs v0.8b, v0.8b +# CHECK-NEXT: 1 4 1.00 sqabs v0.8h, v0.8h +# CHECK-NEXT: 1 4 1.00 sqadd b20, b11, b15 +# CHECK-NEXT: 1 4 1.00 sqadd v0.16b, v0.16b, v0.16b +# CHECK-NEXT: 1 4 1.00 sqadd v0.2s, v0.2s, v0.2s +# CHECK-NEXT: 1 4 1.00 sqdmlal d19, s24, s12 +# CHECK-NEXT: 1 4 1.00 sqdmlal d8, s9, v0.s[1] +# CHECK-NEXT: 1 4 1.00 sqdmlal s0, h0, v0.h[3] +# CHECK-NEXT: 1 4 1.00 sqdmlal s17, h27, h12 +# CHECK-NEXT: 1 4 1.00 sqdmlal v0.2d, v0.2s, v0.2s +# CHECK-NEXT: 1 4 1.00 sqdmlal v0.4s, v0.4h, v0.4h +# CHECK-NEXT: 1 4 1.00 sqdmlal2 v0.2d, v0.4s, v0.4s +# CHECK-NEXT: 1 4 1.00 sqdmlal2 v0.4s, v0.8h, v0.8h +# CHECK-NEXT: 1 4 1.00 sqdmlsl d12, s23, s13 +# CHECK-NEXT: 1 4 1.00 sqdmlsl d8, s9, v0.s[1] +# CHECK-NEXT: 1 4 1.00 sqdmlsl s0, h0, v0.h[3] +# CHECK-NEXT: 1 4 1.00 sqdmlsl s14, h12, h25 +# CHECK-NEXT: 1 4 1.00 sqdmlsl v0.2d, v0.2s, v0.2s +# CHECK-NEXT: 1 4 1.00 sqdmlsl v0.4s, v0.4h, v0.4h +# CHECK-NEXT: 1 4 1.00 sqdmlsl2 v0.2d, v0.4s, v0.4s +# CHECK-NEXT: 1 4 1.00 sqdmlsl2 v0.4s, v0.8h, v0.8h +# CHECK-NEXT: 1 4 1.00 sqdmulh h10, h11, h12 +# CHECK-NEXT: 1 4 1.00 sqdmulh h7, h15, v0.h[3] +# CHECK-NEXT: 1 4 1.00 sqdmulh s15, s14, v0.s[1] +# CHECK-NEXT: 1 4 1.00 sqdmulh s20, s21, s2 +# CHECK-NEXT: 1 4 1.00 sqdmulh v0.2s, v0.2s, v0.2s +# CHECK-NEXT: 1 4 1.00 sqdmulh v0.4s, v0.4s, v0.4s +# CHECK-NEXT: 1 4 1.00 sqdmull d1, s1, v0.s[1] +# CHECK-NEXT: 1 4 1.00 sqdmull d15, s22, s12 +# CHECK-NEXT: 1 4 1.00 sqdmull s1, h1, v0.h[3] +# CHECK-NEXT: 1 4 1.00 sqdmull s12, h22, h12 +# CHECK-NEXT: 1 4 1.00 sqdmull v0.2d, v0.2s, v0.2s +# CHECK-NEXT: 1 4 1.00 sqdmull v0.4s, v0.4h, v0.4h +# CHECK-NEXT: 1 4 1.00 sqdmull2 v0.2d, v0.4s, v0.4s +# CHECK-NEXT: 1 4 1.00 sqdmull2 v0.4s, v0.8h, v0.8h +# CHECK-NEXT: 1 4 1.00 sqneg b19, b14 +# CHECK-NEXT: 1 4 1.00 sqneg d18, d12 +# CHECK-NEXT: 1 4 1.00 sqneg h21, h15 +# CHECK-NEXT: 1 4 1.00 sqneg s20, s12 +# CHECK-NEXT: 1 4 1.00 sqneg v0.16b, v0.16b +# CHECK-NEXT: 1 4 1.00 sqneg v0.2d, v0.2d +# CHECK-NEXT: 1 4 1.00 sqneg v0.2s, v0.2s +# CHECK-NEXT: 1 4 1.00 sqneg v0.4h, v0.4h +# CHECK-NEXT: 1 4 1.00 sqneg v0.4s, v0.4s +# CHECK-NEXT: 1 4 1.00 sqneg v0.8b, v0.8b +# CHECK-NEXT: 1 4 1.00 sqneg v0.8h, v0.8h +# CHECK-NEXT: 1 4 1.00 sqrdmulh h10, h11, h12 +# CHECK-NEXT: 1 4 1.00 sqrdmulh h7, h15, v0.h[3] +# CHECK-NEXT: 1 4 1.00 sqrdmulh s15, s14, v0.s[1] +# CHECK-NEXT: 1 4 1.00 sqrdmulh s20, s21, s2 +# CHECK-NEXT: 1 4 1.00 sqrdmulh v0.4h, v0.4h, v0.4h +# CHECK-NEXT: 1 4 1.00 sqrdmulh v0.8h, v0.8h, v0.8h +# CHECK-NEXT: 1 4 1.00 sqrshl d31, d31, d31 +# CHECK-NEXT: 1 4 1.00 sqrshl h3, h4, h15 +# CHECK-NEXT: 1 4 1.00 sqrshl v0.2s, v0.2s, v0.2s +# CHECK-NEXT: 1 4 1.00 sqrshl v0.4h, v0.4h, v0.4h +# CHECK-NEXT: 1 4 1.00 sqrshl v0.8b, v0.8b, v0.8b +# CHECK-NEXT: 1 4 1.00 sqrshrn b10, h13, #2 +# CHECK-NEXT: 1 4 1.00 sqrshrn h15, s10, #6 +# CHECK-NEXT: 1 4 1.00 sqrshrn s15, d12, #9 +# CHECK-NEXT: 1 4 1.00 sqrshrn v0.2s, v0.2d, #3 +# CHECK-NEXT: 1 4 1.00 sqrshrn v0.4h, v0.4s, #3 +# CHECK-NEXT: 1 4 1.00 sqrshrn v0.8b, v0.8h, #3 +# CHECK-NEXT: 1 4 1.00 sqrshrn2 v0.16b, v0.8h, #3 +# CHECK-NEXT: 1 4 1.00 sqrshrn2 v0.4s, v0.2d, #3 +# CHECK-NEXT: 1 4 1.00 sqrshrn2 v0.8h, v0.4s, #3 +# CHECK-NEXT: 1 4 1.00 sqrshrun b17, h10, #6 +# CHECK-NEXT: 1 4 1.00 sqrshrun h10, s13, #15 +# CHECK-NEXT: 1 4 1.00 sqrshrun s22, d16, #31 +# CHECK-NEXT: 1 4 1.00 sqrshrun v0.2s, v0.2d, #3 +# CHECK-NEXT: 1 4 1.00 sqrshrun v0.4h, v0.4s, #3 +# CHECK-NEXT: 1 4 1.00 sqrshrun v0.8b, v0.8h, #3 +# CHECK-NEXT: 1 4 1.00 sqrshrun2 v0.16b, v0.8h, #3 +# CHECK-NEXT: 1 4 1.00 sqrshrun2 v0.4s, v0.2d, #3 +# CHECK-NEXT: 1 4 1.00 sqrshrun2 v0.8h, v0.4s, #3 +# CHECK-NEXT: 1 4 1.00 sqshl b11, b19, #7 +# CHECK-NEXT: 1 4 1.00 sqshl d15, d16, #51 +# CHECK-NEXT: 1 4 1.00 sqshl d31, d31, d31 +# CHECK-NEXT: 1 4 1.00 sqshl h13, h18, #11 +# CHECK-NEXT: 1 4 1.00 sqshl h3, h4, h15 +# CHECK-NEXT: 1 4 1.00 sqshl s14, s17, #22 +# CHECK-NEXT: 1 4 1.00 sqshl v0.16b, v0.16b, #3 +# CHECK-NEXT: 1 4 1.00 sqshl v0.2d, v0.2d, #3 +# CHECK-NEXT: 1 4 1.00 sqshl v0.2s, v0.2s, #3 +# CHECK-NEXT: 1 4 1.00 sqshl v0.2s, v0.2s, v0.2s +# CHECK-NEXT: 1 4 1.00 sqshl v0.4h, v0.4h, #3 +# CHECK-NEXT: 1 4 1.00 sqshl v0.4h, v0.4h, v0.4h +# CHECK-NEXT: 1 4 1.00 sqshl v0.4s, v0.4s, #3 +# CHECK-NEXT: 1 4 1.00 sqshl v0.8b, v0.8b, #3 +# CHECK-NEXT: 1 4 1.00 sqshl v0.8b, v0.8b, v0.8b +# CHECK-NEXT: 1 4 1.00 sqshl v0.8h, v0.8h, #3 +# CHECK-NEXT: 1 4 1.00 sqshlu b15, b18, #6 +# CHECK-NEXT: 1 4 1.00 sqshlu d11, d13, #32 +# CHECK-NEXT: 1 4 1.00 sqshlu h19, h17, #6 +# CHECK-NEXT: 1 4 1.00 sqshlu s16, s14, #25 +# CHECK-NEXT: 1 4 1.00 sqshlu v0.16b, v0.16b, #3 +# CHECK-NEXT: 1 4 1.00 sqshlu v0.2d, v0.2d, #3 +# CHECK-NEXT: 1 4 1.00 sqshlu v0.2s, v0.2s, #3 +# CHECK-NEXT: 1 4 1.00 sqshlu v0.4h, v0.4h, #3 +# CHECK-NEXT: 1 4 1.00 sqshlu v0.4s, v0.4s, #3 +# CHECK-NEXT: 1 4 1.00 sqshlu v0.8b, v0.8b, #3 +# CHECK-NEXT: 1 4 1.00 sqshlu v0.8h, v0.8h, #3 +# CHECK-NEXT: 1 4 1.00 sqshrn b10, h15, #5 +# CHECK-NEXT: 1 4 1.00 sqshrn h17, s10, #4 +# CHECK-NEXT: 1 4 1.00 sqshrn s18, d10, #31 +# CHECK-NEXT: 1 4 1.00 sqshrn v0.2s, v0.2d, #3 +# CHECK-NEXT: 1 4 1.00 sqshrn v0.4h, v0.4s, #3 +# CHECK-NEXT: 1 4 1.00 sqshrn v0.8b, v0.8h, #3 +# CHECK-NEXT: 1 4 1.00 sqshrn2 v0.16b, v0.8h, #3 +# CHECK-NEXT: 1 4 1.00 sqshrn2 v0.4s, v0.2d, #3 +# CHECK-NEXT: 1 4 1.00 sqshrn2 v0.8h, v0.4s, #3 +# CHECK-NEXT: 1 4 1.00 sqshrun b15, h10, #7 +# CHECK-NEXT: 1 4 1.00 sqshrun h20, s14, #3 +# CHECK-NEXT: 1 4 1.00 sqshrun s10, d15, #15 +# CHECK-NEXT: 1 4 1.00 sqshrun v0.2s, v0.2d, #3 +# CHECK-NEXT: 1 4 1.00 sqshrun v0.4h, v0.4s, #3 +# CHECK-NEXT: 1 4 1.00 sqshrun v0.8b, v0.8h, #3 +# CHECK-NEXT: 1 4 1.00 sqshrun2 v0.16b, v0.8h, #3 +# CHECK-NEXT: 1 4 1.00 sqshrun2 v0.4s, v0.2d, #3 +# CHECK-NEXT: 1 4 1.00 sqshrun2 v0.8h, v0.4s, #3 +# CHECK-NEXT: 1 4 1.00 sqsub s20, s10, s7 +# CHECK-NEXT: 1 4 1.00 sqsub v0.2d, v0.2d, v0.2d +# CHECK-NEXT: 1 4 1.00 sqsub v0.4s, v0.4s, v0.4s +# CHECK-NEXT: 1 4 1.00 sqsub v0.8b, v0.8b, v0.8b +# CHECK-NEXT: 1 4 1.00 sqxtn b18, h18 +# CHECK-NEXT: 1 4 1.00 sqxtn h20, s17 +# CHECK-NEXT: 1 4 1.00 sqxtn s19, d14 +# CHECK-NEXT: 1 4 1.00 sqxtn v0.2s, v0.2d +# CHECK-NEXT: 1 4 1.00 sqxtn v0.4h, v0.4s +# CHECK-NEXT: 1 4 1.00 sqxtn v0.8b, v0.8h +# CHECK-NEXT: 1 4 1.00 sqxtn2 v0.16b, v0.8h +# CHECK-NEXT: 1 4 1.00 sqxtn2 v0.4s, v0.2d +# CHECK-NEXT: 1 4 1.00 sqxtn2 v0.8h, v0.4s +# CHECK-NEXT: 1 4 1.00 sqxtun b19, h14 +# CHECK-NEXT: 1 4 1.00 sqxtun h21, s15 +# CHECK-NEXT: 1 4 1.00 sqxtun s20, d12 +# CHECK-NEXT: 1 4 1.00 sqxtun v0.2s, v0.2d +# CHECK-NEXT: 1 4 1.00 sqxtun v0.4h, v0.4s +# CHECK-NEXT: 1 4 1.00 sqxtun v0.8b, v0.8h +# CHECK-NEXT: 1 4 1.00 sqxtun2 v0.16b, v0.8h +# CHECK-NEXT: 1 4 1.00 sqxtun2 v0.4s, v0.2d +# CHECK-NEXT: 1 4 1.00 sqxtun2 v0.8h, v0.4s +# CHECK-NEXT: 1 3 1.00 srhadd v0.2s, v0.2s, v0.2s +# CHECK-NEXT: 1 3 1.00 srhadd v0.4h, v0.4h, v0.4h +# CHECK-NEXT: 1 3 1.00 srhadd v0.8b, v0.8b, v0.8b +# CHECK-NEXT: 1 3 1.00 sri d10, d12, #14 +# CHECK-NEXT: 1 4 1.00 sri v0.16b, v0.16b, #3 +# CHECK-NEXT: 1 4 1.00 sri v0.2d, v0.2d, #3 +# CHECK-NEXT: 1 4 1.00 sri v0.2s, v0.2s, #3 +# CHECK-NEXT: 1 4 1.00 sri v0.4h, v0.4h, #3 +# CHECK-NEXT: 1 4 1.00 sri v0.4s, v0.4s, #3 +# CHECK-NEXT: 1 4 1.00 sri v0.8b, v0.8b, #3 +# CHECK-NEXT: 1 4 1.00 sri v0.8h, v0.8h, #3 +# CHECK-NEXT: 1 3 1.00 srshl d16, d16, d16 +# CHECK-NEXT: 1 3 1.00 srshl v0.2s, v0.2s, v0.2s +# CHECK-NEXT: 1 3 1.00 srshl v0.4h, v0.4h, v0.4h +# CHECK-NEXT: 1 3 1.00 srshl v0.8b, v0.8b, v0.8b +# CHECK-NEXT: 1 3 1.00 srshr d19, d18, #7 +# CHECK-NEXT: 1 3 1.00 srshr v0.16b, v0.16b, #3 +# CHECK-NEXT: 1 3 1.00 srshr v0.2d, v0.2d, #3 +# CHECK-NEXT: 1 3 1.00 srshr v0.2s, v0.2s, #3 +# CHECK-NEXT: 1 3 1.00 srshr v0.4h, v0.4h, #3 +# CHECK-NEXT: 1 3 1.00 srshr v0.4s, v0.4s, #3 +# CHECK-NEXT: 1 3 1.00 srshr v0.8b, v0.8b, #3 +# CHECK-NEXT: 1 3 1.00 srshr v0.8h, v0.8h, #3 +# CHECK-NEXT: 1 7 2.00 srsra d15, d11, #19 +# CHECK-NEXT: 1 7 2.00 srsra v0.16b, v0.16b, #3 +# CHECK-NEXT: 1 7 2.00 srsra v0.2d, v0.2d, #3 +# CHECK-NEXT: 1 7 2.00 srsra v0.2s, v0.2s, #3 +# CHECK-NEXT: 1 7 2.00 srsra v0.4h, v0.4h, #3 +# CHECK-NEXT: 1 7 2.00 srsra v0.4s, v0.4s, #3 +# CHECK-NEXT: 1 7 2.00 srsra v0.8b, v0.8b, #3 +# CHECK-NEXT: 1 7 2.00 srsra v0.8h, v0.8h, #3 +# CHECK-NEXT: 1 3 1.00 sshl d31, d31, d31 +# CHECK-NEXT: 1 3 1.00 sshl v0.2d, v0.2d, v0.2d +# CHECK-NEXT: 1 3 1.00 sshl v0.2s, v0.2s, v0.2s +# CHECK-NEXT: 1 3 1.00 sshl v0.4h, v0.4h, v0.4h +# CHECK-NEXT: 1 3 1.00 sshl v0.8b, v0.8b, v0.8b +# CHECK-NEXT: 1 3 1.00 sshll v0.2d, v0.2s, #3 +# CHECK-NEXT: 1 3 1.00 sshll2 v0.4s, v0.8h, #3 +# CHECK-NEXT: 1 3 1.00 sshr d15, d16, #12 +# CHECK-NEXT: 1 3 1.00 sshr v0.16b, v0.16b, #3 +# CHECK-NEXT: 1 3 1.00 sshr v0.2d, v0.2d, #3 +# CHECK-NEXT: 1 3 1.00 sshr v0.2s, v0.2s, #3 +# CHECK-NEXT: 1 3 1.00 sshr v0.4h, v0.4h, #3 +# CHECK-NEXT: 1 3 1.00 sshr v0.4s, v0.4s, #3 +# CHECK-NEXT: 1 3 1.00 sshr v0.8b, v0.8b, #3 +# CHECK-NEXT: 1 3 1.00 sshr v0.8h, v0.8h, #3 +# CHECK-NEXT: 1 3 1.00 ssra d18, d12, #21 +# CHECK-NEXT: 1 3 1.00 ssra v0.16b, v0.16b, #3 +# CHECK-NEXT: 1 3 1.00 ssra v0.2d, v0.2d, #3 +# CHECK-NEXT: 1 3 1.00 ssra v0.2s, v0.2s, #3 +# CHECK-NEXT: 1 3 1.00 ssra v0.4h, v0.4h, #3 +# CHECK-NEXT: 1 3 1.00 ssra v0.4s, v0.4s, #3 +# CHECK-NEXT: 1 3 1.00 ssra v0.8b, v0.8b, #3 +# CHECK-NEXT: 1 3 1.00 ssra v0.8h, v0.8h, #3 +# CHECK-NEXT: 1 3 1.00 ssubl v0.2d, v0.2s, v0.2s +# CHECK-NEXT: 1 3 1.00 ssubl v0.4s, v0.4h, v0.4h +# CHECK-NEXT: 1 3 1.00 ssubl v0.8h, v0.8b, v0.8b +# CHECK-NEXT: 1 3 1.00 ssubl2 v0.2d, v0.4s, v0.4s +# CHECK-NEXT: 1 3 1.00 ssubl2 v0.4s, v0.8h, v0.8h +# CHECK-NEXT: 1 3 1.00 ssubl2 v0.8h, v0.16b, v0.16b +# CHECK-NEXT: 1 3 1.00 ssubw v0.2d, v0.2d, v0.2s +# CHECK-NEXT: 1 3 1.00 ssubw v0.4s, v0.4s, v0.4h +# CHECK-NEXT: 1 3 1.00 ssubw v0.8h, v0.8h, v0.8b +# CHECK-NEXT: 1 3 1.00 ssubw2 v0.2d, v0.2d, v0.4s +# CHECK-NEXT: 1 3 1.00 ssubw2 v0.4s, v0.4s, v0.8h +# CHECK-NEXT: 1 3 1.00 ssubw2 v0.8h, v0.8h, v0.16b +# CHECK-NEXT: 1 5 2.00 * st1 { v0.16b }, [x0] +# CHECK-NEXT: 2 5 2.00 * st1 { v0.2d, v1.2d, v2.2d }, [x0], #48 +# CHECK-NEXT: 1 5 4.00 * st1 { v0.2d, v1.2d, v2.2d, v3.2d }, [x0] +# CHECK-NEXT: 2 5 2.00 * st1 { v0.4s, v1.4s }, [sp], #32 +# CHECK-NEXT: 1 5 2.00 * st1 { v0.4s, v1.4s, v2.4s }, [sp] +# CHECK-NEXT: 2 5 4.00 * st1 { v0.8b, v1.8b, v2.8b, v3.8b }, [x0], x3 +# CHECK-NEXT: 2 5 2.00 * st1 { v0.8h }, [x15], x2 +# CHECK-NEXT: 1 5 2.00 * st1 { v0.8h, v1.8h }, [x15] +# CHECK-NEXT: 1 4 1.00 * st1 { v0.d }[1], [x0] +# CHECK-NEXT: 2 4 1.00 * st1 { v0.d }[1], [x0], #8 +# CHECK-NEXT: 2 5 4.00 * st2 { v0.16b, v1.16b }, [x0], x1 +# CHECK-NEXT: 1 5 2.00 * st2 { v0.8b, v1.8b }, [x0] +# CHECK-NEXT: 1 5 2.00 * st2 { v0.s, v1.s }[3], [sp] +# CHECK-NEXT: 2 5 2.00 * st2 { v0.s, v1.s }[3], [sp], #8 +# CHECK-NEXT: 1 5 4.00 * st3 { v0.4h, v1.4h, v2.4h }, [x15] +# CHECK-NEXT: 2 5 4.00 * st3 { v0.8h, v1.8h, v2.8h }, [x15], x2 +# CHECK-NEXT: 1 5 2.00 * st3 { v0.h, v1.h, v2.h }[7], [x15] +# CHECK-NEXT: 2 5 2.00 * st3 { v0.h, v1.h, v2.h }[7], [x15], #6 +# CHECK-NEXT: 1 5 4.00 * st4 { v0.2s, v1.2s, v2.2s, v3.2s }, [sp] +# CHECK-NEXT: 2 5 4.00 * st4 { v0.4s, v1.4s, v2.4s, v3.4s }, [sp], #64 +# CHECK-NEXT: 1 5 2.00 * st4 { v0.b, v1.b, v2.b, v3.b }[9], [x0] +# CHECK-NEXT: 2 5 2.00 * st4 { v0.b, v1.b, v2.b, v3.b }[9], [x0], x5 +# CHECK-NEXT: 1 3 1.00 sub d15, d5, d16 +# CHECK-NEXT: 1 3 1.00 sub v0.2d, v0.2d, v0.2d +# CHECK-NEXT: 1 4 1.00 suqadd b19, b14 +# CHECK-NEXT: 1 4 1.00 suqadd d18, d22 +# CHECK-NEXT: 1 4 1.00 suqadd h20, h15 +# CHECK-NEXT: 1 4 1.00 suqadd s21, s12 +# CHECK-NEXT: 1 4 1.00 suqadd v0.16b, v0.16b +# CHECK-NEXT: 1 4 1.00 suqadd v0.2d, v0.2d +# CHECK-NEXT: 1 4 1.00 suqadd v0.2s, v0.2s +# CHECK-NEXT: 1 4 1.00 suqadd v0.4h, v0.4h +# CHECK-NEXT: 1 4 1.00 suqadd v0.4s, v0.4s +# CHECK-NEXT: 1 4 1.00 suqadd v0.8b, v0.8b +# CHECK-NEXT: 1 4 1.00 suqadd v0.8h, v0.8h +# CHECK-NEXT: 1 4 1.00 tbl v0.16b, { v0.16b }, v0.16b +# CHECK-NEXT: 1 4 1.00 tbl v0.16b, { v0.16b, v1.16b }, v0.16b +# CHECK-NEXT: 1 4 1.00 tbl v0.16b, { v0.16b, v1.16b, v2.16b }, v0.16b +# CHECK-NEXT: 1 4 1.00 tbl v0.16b, { v0.16b, v1.16b, v2.16b, v3.16b }, v0.16b +# CHECK-NEXT: 1 4 1.00 tbl v0.8b, { v0.16b }, v0.8b +# CHECK-NEXT: 1 4 1.00 tbl v0.8b, { v0.16b, v1.16b }, v0.8b +# CHECK-NEXT: 1 4 1.00 tbl v0.8b, { v0.16b, v1.16b, v2.16b }, v0.8b +# CHECK-NEXT: 1 4 1.00 tbl v0.8b, { v0.16b, v1.16b, v2.16b, v3.16b }, v0.8b +# CHECK-NEXT: 1 4 1.00 tbx v0.16b, { v0.16b }, v0.16b +# CHECK-NEXT: 1 4 1.00 tbx v0.16b, { v0.16b, v1.16b }, v0.16b +# CHECK-NEXT: 1 4 1.00 tbx v0.16b, { v0.16b, v1.16b, v2.16b }, v0.16b +# CHECK-NEXT: 1 4 1.00 tbx v0.16b, { v0.16b, v1.16b, v2.16b, v3.16b }, v0.16b +# CHECK-NEXT: 1 4 1.00 tbx v0.8b, { v0.16b }, v0.8b +# CHECK-NEXT: 1 4 1.00 tbx v0.8b, { v0.16b, v1.16b }, v0.8b +# CHECK-NEXT: 1 4 1.00 tbx v0.8b, { v0.16b, v1.16b, v2.16b }, v0.8b +# CHECK-NEXT: 1 4 1.00 tbx v0.8b, { v0.16b, v1.16b, v2.16b, v3.16b }, v0.8b +# CHECK-NEXT: 1 4 1.00 trn1 v0.16b, v0.16b, v0.16b +# CHECK-NEXT: 1 4 1.00 trn1 v0.2d, v0.2d, v0.2d +# CHECK-NEXT: 1 4 1.00 trn1 v0.2s, v0.2s, v0.2s +# CHECK-NEXT: 1 4 1.00 trn1 v0.4h, v0.4h, v0.4h +# CHECK-NEXT: 1 4 1.00 trn1 v0.4s, v0.4s, v0.4s +# CHECK-NEXT: 1 4 1.00 trn1 v0.8b, v0.8b, v0.8b +# CHECK-NEXT: 1 4 1.00 trn1 v0.8h, v0.8h, v0.8h +# CHECK-NEXT: 1 4 1.00 trn2 v0.16b, v0.16b, v0.16b +# CHECK-NEXT: 1 4 1.00 trn2 v0.2d, v0.2d, v0.2d +# CHECK-NEXT: 1 4 1.00 trn2 v0.2s, v0.2s, v0.2s +# CHECK-NEXT: 1 4 1.00 trn2 v0.4h, v0.4h, v0.4h +# CHECK-NEXT: 1 4 1.00 trn2 v0.4s, v0.4s, v0.4s +# CHECK-NEXT: 1 4 1.00 trn2 v0.8b, v0.8b, v0.8b +# CHECK-NEXT: 1 4 1.00 trn2 v0.8h, v0.8h, v0.8h +# CHECK-NEXT: 1 6 1.00 uaba v0.8b, v0.8b, v0.8b +# CHECK-NEXT: 1 6 1.00 uabal v0.2d, v0.2s, v0.2s +# CHECK-NEXT: 1 6 1.00 uabal v0.4s, v0.4h, v0.4h +# CHECK-NEXT: 1 6 1.00 uabal v0.8h, v0.8b, v0.8b +# CHECK-NEXT: 1 6 1.00 uabal2 v0.2d, v0.4s, v0.4s +# CHECK-NEXT: 1 6 1.00 uabal2 v0.4s, v0.8h, v0.8h +# CHECK-NEXT: 1 6 1.00 uabal2 v0.8h, v0.16b, v0.16b +# CHECK-NEXT: 1 3 1.00 uabd v0.4h, v0.4h, v0.4h +# CHECK-NEXT: 1 3 1.00 uabdl v0.2d, v0.2s, v0.2s +# CHECK-NEXT: 1 3 1.00 uabdl v0.4s, v0.4h, v0.4h +# CHECK-NEXT: 1 3 1.00 uabdl v0.8h, v0.8b, v0.8b +# CHECK-NEXT: 1 3 1.00 uabdl2 v0.2d, v0.4s, v0.4s +# CHECK-NEXT: 1 3 1.00 uabdl2 v0.4s, v0.8h, v0.8h +# CHECK-NEXT: 1 3 1.00 uabdl2 v0.8h, v0.16b, v0.16b +# CHECK-NEXT: 1 7 2.00 uadalp v0.1d, v0.2s +# CHECK-NEXT: 1 7 2.00 uadalp v0.2d, v0.4s +# CHECK-NEXT: 1 7 2.00 uadalp v0.2s, v0.4h +# CHECK-NEXT: 1 7 2.00 uadalp v0.4h, v0.8b +# CHECK-NEXT: 1 7 2.00 uadalp v0.4s, v0.8h +# CHECK-NEXT: 1 7 2.00 uadalp v0.8h, v0.16b +# CHECK-NEXT: 1 3 1.00 uaddl v0.2d, v0.2s, v0.2s +# CHECK-NEXT: 1 3 1.00 uaddl v0.4s, v0.4h, v0.4h +# CHECK-NEXT: 1 3 1.00 uaddl v0.8h, v0.8b, v0.8b +# CHECK-NEXT: 1 3 1.00 uaddl2 v0.2d, v0.4s, v0.4s +# CHECK-NEXT: 1 3 1.00 uaddl2 v0.4s, v0.8h, v0.8h +# CHECK-NEXT: 1 3 1.00 uaddl2 v0.8h, v0.16b, v0.16b +# CHECK-NEXT: 1 3 1.00 uaddlp v0.1d, v0.2s +# CHECK-NEXT: 1 3 1.00 uaddlp v0.2d, v0.4s +# CHECK-NEXT: 1 3 1.00 uaddlp v0.2s, v0.4h +# CHECK-NEXT: 1 3 1.00 uaddlp v0.4h, v0.8b +# CHECK-NEXT: 1 3 1.00 uaddlp v0.4s, v0.8h +# CHECK-NEXT: 1 3 1.00 uaddlp v0.8h, v0.16b +# CHECK-NEXT: 1 3 1.00 uaddw v0.2d, v0.2d, v0.2s +# CHECK-NEXT: 1 3 1.00 uaddw v0.4s, v0.4s, v0.4h +# CHECK-NEXT: 1 3 1.00 uaddw v0.8h, v0.8h, v0.8b +# CHECK-NEXT: 1 3 1.00 uaddw2 v0.2d, v0.2d, v0.4s +# CHECK-NEXT: 1 3 1.00 uaddw2 v0.4s, v0.4s, v0.8h +# CHECK-NEXT: 1 3 1.00 uaddw2 v0.8h, v0.8h, v0.16b +# CHECK-NEXT: 1 4 1.00 ucvtf d21, d14 +# CHECK-NEXT: 1 4 1.00 ucvtf d21, d14, #64 +# CHECK-NEXT: 1 4 1.00 ucvtf s22, s13 +# CHECK-NEXT: 1 4 1.00 ucvtf s22, s13, #32 +# CHECK-NEXT: 1 4 1.00 ucvtf v0.2d, v0.2d +# CHECK-NEXT: 1 4 1.00 ucvtf v0.2d, v0.2d, #3 +# CHECK-NEXT: 1 4 1.00 ucvtf v0.2s, v0.2s +# CHECK-NEXT: 1 4 1.00 ucvtf v0.2s, v0.2s, #3 +# CHECK-NEXT: 1 4 1.00 ucvtf v0.4h, v0.4h +# CHECK-NEXT: 1 4 1.00 ucvtf v0.4s, v0.4s +# CHECK-NEXT: 1 4 1.00 ucvtf v0.4s, v0.4s, #3 +# CHECK-NEXT: 1 4 1.00 ucvtf v0.8h, v0.8h +# CHECK-NEXT: 1 3 1.00 uhadd v0.16b, v0.16b, v0.16b +# CHECK-NEXT: 1 3 1.00 uhadd v0.8h, v0.8h, v0.8h +# CHECK-NEXT: 1 3 1.00 uhsub v0.4s, v0.4s, v0.4s +# CHECK-NEXT: 1 3 1.00 umax v0.16b, v0.16b, v0.16b +# CHECK-NEXT: 1 4 1.00 umax v0.4s, v0.4s, v0.4s +# CHECK-NEXT: 1 3 1.00 umax v0.8h, v0.8h, v0.8h +# CHECK-NEXT: 1 3 1.00 umaxp v0.16b, v0.16b, v0.16b +# CHECK-NEXT: 1 4 1.00 umaxp v0.4s, v0.4s, v0.4s +# CHECK-NEXT: 1 3 1.00 umaxp v0.8h, v0.8h, v0.8h +# CHECK-NEXT: 1 3 1.00 umin v0.2s, v0.2s, v0.2s +# CHECK-NEXT: 1 3 1.00 umin v0.4h, v0.4h, v0.4h +# CHECK-NEXT: 1 3 1.00 umin v0.8b, v0.8b, v0.8b +# CHECK-NEXT: 1 3 1.00 uminp v0.2s, v0.2s, v0.2s +# CHECK-NEXT: 1 3 1.00 uminp v0.4h, v0.4h, v0.4h +# CHECK-NEXT: 1 3 1.00 uminp v0.8b, v0.8b, v0.8b +# CHECK-NEXT: 1 4 1.00 umlal v0.2d, v0.2s, v0.2s +# CHECK-NEXT: 1 4 1.00 umlal v0.4s, v0.4h, v0.4h +# CHECK-NEXT: 1 4 1.00 umlal v0.8h, v0.8b, v0.8b +# CHECK-NEXT: 1 4 1.00 umlal2 v0.2d, v0.4s, v0.4s +# CHECK-NEXT: 1 4 1.00 umlal2 v0.4s, v0.8h, v0.8h +# CHECK-NEXT: 1 4 1.00 umlal2 v0.8h, v0.16b, v0.16b +# CHECK-NEXT: 1 4 1.00 umlsl v0.2d, v0.2s, v0.2s +# CHECK-NEXT: 1 4 1.00 umlsl v0.4s, v0.4h, v0.4h +# CHECK-NEXT: 1 4 1.00 umlsl v0.8h, v0.8b, v0.8b +# CHECK-NEXT: 1 4 1.00 umlsl2 v0.2d, v0.4s, v0.4s +# CHECK-NEXT: 1 4 1.00 umlsl2 v0.4s, v0.8h, v0.8h +# CHECK-NEXT: 1 4 1.00 umlsl2 v0.8h, v0.16b, v0.16b +# CHECK-NEXT: 1 4 1.00 umull v0.2d, v0.2s, v0.2s +# CHECK-NEXT: 1 4 1.00 umull v0.4s, v0.4h, v0.4h +# CHECK-NEXT: 1 4 1.00 umull v0.8h, v0.8b, v0.8b +# CHECK-NEXT: 1 4 1.00 umull2 v0.2d, v0.4s, v0.4s +# CHECK-NEXT: 1 4 1.00 umull2 v0.4s, v0.8h, v0.8h +# CHECK-NEXT: 1 4 1.00 umull2 v0.8h, v0.16b, v0.16b +# CHECK-NEXT: 1 4 1.00 uqadd h0, h1, h5 +# CHECK-NEXT: 1 4 1.00 uqadd v0.8h, v0.8h, v0.8h +# CHECK-NEXT: 1 4 1.00 uqrshl b11, b20, b30 +# CHECK-NEXT: 1 4 1.00 uqrshl s23, s20, s16 +# CHECK-NEXT: 1 4 1.00 uqrshl v0.16b, v0.16b, v0.16b +# CHECK-NEXT: 1 4 1.00 uqrshl v0.4s, v0.4s, v0.4s +# CHECK-NEXT: 1 4 1.00 uqrshl v0.4s, v0.4s, v0.4s +# CHECK-NEXT: 1 4 1.00 uqrshl v0.8h, v0.8h, v0.8h +# CHECK-NEXT: 1 4 1.00 uqrshrn b10, h12, #5 +# CHECK-NEXT: 1 4 1.00 uqrshrn h12, s10, #14 +# CHECK-NEXT: 1 4 1.00 uqrshrn s10, d10, #25 +# CHECK-NEXT: 1 4 1.00 uqrshrn v0.2s, v0.2d, #3 +# CHECK-NEXT: 1 4 1.00 uqrshrn v0.4h, v0.4s, #3 +# CHECK-NEXT: 1 4 1.00 uqrshrn v0.8b, v0.8h, #3 +# CHECK-NEXT: 1 4 1.00 uqrshrn2 v0.16b, v0.8h, #3 +# CHECK-NEXT: 1 4 1.00 uqrshrn2 v0.4s, v0.2d, #3 +# CHECK-NEXT: 1 4 1.00 uqrshrn2 v0.8h, v0.4s, #3 +# CHECK-NEXT: 1 4 1.00 uqshl b11, b20, b30 +# CHECK-NEXT: 1 4 1.00 uqshl b18, b15, #6 +# CHECK-NEXT: 1 4 1.00 uqshl d15, d12, #19 +# CHECK-NEXT: 1 4 1.00 uqshl h11, h18, #7 +# CHECK-NEXT: 1 4 1.00 uqshl s14, s19, #18 +# CHECK-NEXT: 1 4 1.00 uqshl s23, s20, s16 +# CHECK-NEXT: 1 4 1.00 uqshl v0.16b, v0.16b, #3 +# CHECK-NEXT: 1 4 1.00 uqshl v0.16b, v0.16b, v0.16b +# CHECK-NEXT: 1 4 1.00 uqshl v0.2d, v0.2d, #3 +# CHECK-NEXT: 1 4 1.00 uqshl v0.2d, v0.2d, v0.2d +# CHECK-NEXT: 1 4 1.00 uqshl v0.2s, v0.2s, #3 +# CHECK-NEXT: 1 4 1.00 uqshl v0.4h, v0.4h, #3 +# CHECK-NEXT: 1 4 1.00 uqshl v0.4s, v0.4s, #3 +# CHECK-NEXT: 1 4 1.00 uqshl v0.4s, v0.4s, v0.4s +# CHECK-NEXT: 1 4 1.00 uqshl v0.8b, v0.8b, #3 +# CHECK-NEXT: 1 4 1.00 uqshl v0.8h, v0.8h, #3 +# CHECK-NEXT: 1 4 1.00 uqshl v0.8h, v0.8h, v0.8h +# CHECK-NEXT: 1 4 1.00 uqshrn b12, h10, #7 +# CHECK-NEXT: 1 4 1.00 uqshrn h10, s14, #5 +# CHECK-NEXT: 1 4 1.00 uqshrn s10, d12, #13 +# CHECK-NEXT: 1 4 1.00 uqshrn v0.2s, v0.2d, #3 +# CHECK-NEXT: 1 4 1.00 uqshrn v0.4h, v0.4s, #3 +# CHECK-NEXT: 1 4 1.00 uqshrn v0.8b, v0.8h, #3 +# CHECK-NEXT: 1 4 1.00 uqshrn2 v0.16b, v0.8h, #3 +# CHECK-NEXT: 1 4 1.00 uqshrn2 v0.4s, v0.2d, #3 +# CHECK-NEXT: 1 4 1.00 uqshrn2 v0.8h, v0.4s, #3 +# CHECK-NEXT: 1 4 1.00 uqsub d16, d16, d16 +# CHECK-NEXT: 1 4 1.00 uqsub v0.4h, v0.4h, v0.4h +# CHECK-NEXT: 1 4 1.00 uqxtn b18, h18 +# CHECK-NEXT: 1 4 1.00 uqxtn h20, s17 +# CHECK-NEXT: 1 4 1.00 uqxtn s19, d14 +# CHECK-NEXT: 1 4 1.00 uqxtn v0.2s, v0.2d +# CHECK-NEXT: 1 4 1.00 uqxtn v0.4h, v0.4s +# CHECK-NEXT: 1 4 1.00 uqxtn v0.8b, v0.8h +# CHECK-NEXT: 1 4 1.00 uqxtn2 v0.16b, v0.8h +# CHECK-NEXT: 1 4 1.00 uqxtn2 v0.4s, v0.2d +# CHECK-NEXT: 1 4 1.00 uqxtn2 v0.8h, v0.4s +# CHECK-NEXT: 1 4 1.00 urecpe v0.2s, v0.2s +# CHECK-NEXT: 1 4 1.00 urecpe v0.4s, v0.4s +# CHECK-NEXT: 1 3 1.00 urhadd v0.16b, v0.16b, v0.16b +# CHECK-NEXT: 1 3 1.00 urhadd v0.4s, v0.4s, v0.4s +# CHECK-NEXT: 1 3 1.00 urhadd v0.8h, v0.8h, v0.8h +# CHECK-NEXT: 1 3 1.00 urshl d8, d7, d4 +# CHECK-NEXT: 1 3 1.00 urshl v0.16b, v0.16b, v0.16b +# CHECK-NEXT: 1 3 1.00 urshl v0.2d, v0.2d, v0.2d +# CHECK-NEXT: 1 3 1.00 urshl v0.4s, v0.4s, v0.4s +# CHECK-NEXT: 1 3 1.00 urshl v0.8h, v0.8h, v0.8h +# CHECK-NEXT: 1 3 1.00 urshr d20, d23, #31 +# CHECK-NEXT: 1 3 1.00 urshr v0.16b, v0.16b, #3 +# CHECK-NEXT: 1 3 1.00 urshr v0.2d, v0.2d, #3 +# CHECK-NEXT: 1 3 1.00 urshr v0.2s, v0.2s, #3 +# CHECK-NEXT: 1 3 1.00 urshr v0.4h, v0.4h, #3 +# CHECK-NEXT: 1 3 1.00 urshr v0.4s, v0.4s, #3 +# CHECK-NEXT: 1 3 1.00 urshr v0.8b, v0.8b, #3 +# CHECK-NEXT: 1 3 1.00 urshr v0.8h, v0.8h, #3 +# CHECK-NEXT: 1 12 9.00 ursqrte v0.2s, v0.2s +# CHECK-NEXT: 1 12 9.00 ursqrte v0.4s, v0.4s +# CHECK-NEXT: 1 7 2.00 ursra d18, d10, #13 +# CHECK-NEXT: 1 7 2.00 ursra v0.16b, v0.16b, #3 +# CHECK-NEXT: 1 7 2.00 ursra v0.2d, v0.2d, #3 +# CHECK-NEXT: 1 7 2.00 ursra v0.2s, v0.2s, #3 +# CHECK-NEXT: 1 7 2.00 ursra v0.4h, v0.4h, #3 +# CHECK-NEXT: 1 7 2.00 ursra v0.4s, v0.4s, #3 +# CHECK-NEXT: 1 7 2.00 ursra v0.8b, v0.8b, #3 +# CHECK-NEXT: 1 7 2.00 ursra v0.8h, v0.8h, #3 +# CHECK-NEXT: 1 3 1.00 ushl d0, d0, d0 +# CHECK-NEXT: 1 3 1.00 ushl v0.16b, v0.16b, v0.16b +# CHECK-NEXT: 1 3 1.00 ushl v0.4s, v0.4s, v0.4s +# CHECK-NEXT: 1 3 1.00 ushl v0.8h, v0.8h, v0.8h +# CHECK-NEXT: 1 3 1.00 ushll v0.4s, v0.4h, #3 +# CHECK-NEXT: 1 3 1.00 ushll2 v0.8h, v0.16b, #3 +# CHECK-NEXT: 1 3 1.00 ushr d10, d17, #18 +# CHECK-NEXT: 1 3 1.00 ushr v0.16b, v0.16b, #3 +# CHECK-NEXT: 1 3 1.00 ushr v0.2d, v0.2d, #3 +# CHECK-NEXT: 1 3 1.00 ushr v0.2s, v0.2s, #3 +# CHECK-NEXT: 1 3 1.00 ushr v0.4h, v0.4h, #3 +# CHECK-NEXT: 1 3 1.00 ushr v0.4s, v0.4s, #3 +# CHECK-NEXT: 1 3 1.00 ushr v0.8b, v0.8b, #3 +# CHECK-NEXT: 1 3 1.00 ushr v0.8h, v0.8h, #3 +# CHECK-NEXT: 1 4 1.00 usqadd b19, b14 +# CHECK-NEXT: 1 4 1.00 usqadd d18, d22 +# CHECK-NEXT: 1 4 1.00 usqadd h20, h15 +# CHECK-NEXT: 1 4 1.00 usqadd s21, s12 +# CHECK-NEXT: 1 4 1.00 usqadd v0.16b, v0.16b +# CHECK-NEXT: 1 4 1.00 usqadd v0.2d, v0.2d +# CHECK-NEXT: 1 4 1.00 usqadd v0.2s, v0.2s +# CHECK-NEXT: 1 4 1.00 usqadd v0.4h, v0.4h +# CHECK-NEXT: 1 4 1.00 usqadd v0.4s, v0.4s +# CHECK-NEXT: 1 4 1.00 usqadd v0.8b, v0.8b +# CHECK-NEXT: 1 4 1.00 usqadd v0.8h, v0.8h +# CHECK-NEXT: 1 3 1.00 usra d20, d13, #61 +# CHECK-NEXT: 1 3 1.00 usra v0.16b, v0.16b, #3 +# CHECK-NEXT: 1 3 1.00 usra v0.2d, v0.2d, #3 +# CHECK-NEXT: 1 3 1.00 usra v0.2s, v0.2s, #3 +# CHECK-NEXT: 1 3 1.00 usra v0.4h, v0.4h, #3 +# CHECK-NEXT: 1 3 1.00 usra v0.4s, v0.4s, #3 +# CHECK-NEXT: 1 3 1.00 usra v0.8b, v0.8b, #3 +# CHECK-NEXT: 1 3 1.00 usra v0.8h, v0.8h, #3 +# CHECK-NEXT: 1 3 1.00 usubl v0.2d, v0.2s, v0.2s +# CHECK-NEXT: 1 3 1.00 usubl v0.4s, v0.4h, v0.4h +# CHECK-NEXT: 1 3 1.00 usubl v0.8h, v0.8b, v0.8b +# CHECK-NEXT: 1 3 1.00 usubl2 v0.2d, v0.4s, v0.4s +# CHECK-NEXT: 1 3 1.00 usubl2 v0.4s, v0.8h, v0.8h +# CHECK-NEXT: 1 3 1.00 usubl2 v0.8h, v0.16b, v0.16b +# CHECK-NEXT: 1 3 1.00 usubw v0.2d, v0.2d, v0.2s +# CHECK-NEXT: 1 3 1.00 usubw v0.4s, v0.4s, v0.4h +# CHECK-NEXT: 1 3 1.00 usubw v0.8h, v0.8h, v0.8b +# CHECK-NEXT: 1 3 1.00 usubw2 v0.2d, v0.2d, v0.4s +# CHECK-NEXT: 1 3 1.00 usubw2 v0.4s, v0.4s, v0.8h +# CHECK-NEXT: 1 3 1.00 usubw2 v0.8h, v0.8h, v0.16b +# CHECK-NEXT: 1 4 1.00 uzp1 v0.16b, v0.16b, v0.16b +# CHECK-NEXT: 1 4 1.00 uzp1 v0.2d, v0.2d, v0.2d +# CHECK-NEXT: 1 4 1.00 uzp1 v0.2s, v0.2s, v0.2s +# CHECK-NEXT: 1 4 1.00 uzp1 v0.4h, v0.4h, v0.4h +# CHECK-NEXT: 1 4 1.00 uzp1 v0.4s, v0.4s, v0.4s +# CHECK-NEXT: 1 4 1.00 uzp1 v0.8b, v0.8b, v0.8b +# CHECK-NEXT: 1 4 1.00 uzp1 v0.8h, v0.8h, v0.8h +# CHECK-NEXT: 1 4 1.00 uzp2 v0.16b, v0.16b, v0.16b +# CHECK-NEXT: 1 4 1.00 uzp2 v0.2d, v0.2d, v0.2d +# CHECK-NEXT: 1 4 1.00 uzp2 v0.2s, v0.2s, v0.2s +# CHECK-NEXT: 1 4 1.00 uzp2 v0.4h, v0.4h, v0.4h +# CHECK-NEXT: 1 4 1.00 uzp2 v0.4s, v0.4s, v0.4s +# CHECK-NEXT: 1 4 1.00 uzp2 v0.8b, v0.8b, v0.8b +# CHECK-NEXT: 1 4 1.00 uzp2 v0.8h, v0.8h, v0.8h +# CHECK-NEXT: 1 4 1.00 xtn v0.2s, v0.2d +# CHECK-NEXT: 1 4 1.00 xtn v0.4h, v0.4s +# CHECK-NEXT: 1 4 1.00 xtn v0.8b, v0.8h +# CHECK-NEXT: 1 4 1.00 xtn2 v0.16b, v0.8h +# CHECK-NEXT: 1 4 1.00 xtn2 v0.4s, v0.2d +# CHECK-NEXT: 1 4 1.00 xtn2 v0.8h, v0.4s +# CHECK-NEXT: 1 4 1.00 zip1 v0.16b, v0.16b, v0.16b +# CHECK-NEXT: 1 4 1.00 zip1 v0.2d, v0.2d, v0.2d +# CHECK-NEXT: 1 4 1.00 zip1 v0.2s, v0.2s, v0.2s +# CHECK-NEXT: 1 4 1.00 zip1 v0.4h, v0.4h, v0.4h +# CHECK-NEXT: 1 4 1.00 zip1 v0.4s, v0.4s, v0.4s +# CHECK-NEXT: 1 4 1.00 zip1 v0.8b, v0.8b, v0.8b +# CHECK-NEXT: 1 4 1.00 zip1 v0.8h, v0.8h, v0.8h +# CHECK-NEXT: 1 4 1.00 zip2 v0.16b, v0.16b, v0.16b +# CHECK-NEXT: 1 4 1.00 zip2 v0.2d, v0.2d, v0.2d +# CHECK-NEXT: 1 4 1.00 zip2 v0.2s, v0.2s, v0.2s +# CHECK-NEXT: 1 4 1.00 zip2 v0.4h, v0.4h, v0.4h +# CHECK-NEXT: 1 4 1.00 zip2 v0.4s, v0.4s, v0.4s +# CHECK-NEXT: 1 4 1.00 zip2 v0.8b, v0.8b, v0.8b +# CHECK-NEXT: 1 4 1.00 zip2 v0.8h, v0.8h, v0.8h + +# CHECK: Resources: +# CHECK-NEXT: [0] - CortexA320UnitALU +# CHECK-NEXT: [1] - CortexA320UnitB +# CHECK-NEXT: [2] - CortexA320UnitDiv +# CHECK-NEXT: [3] - CortexA320UnitLdSt +# CHECK-NEXT: [4] - CortexA320UnitMAC +# CHECK-NEXT: [5] - CortexA320UnitPAC +# CHECK-NEXT: [6] - CortexA320UnitVALU +# CHECK-NEXT: [7] - CortexA320UnitVMAC +# CHECK-NEXT: [8] - CortexA320UnitVMC + +# CHECK: Resource pressure per iteration: +# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] +# CHECK-NEXT: - - - 193.00 - - 1002.00 6.00 197.00 + +# CHECK: Resource pressure by instruction: +# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] Instructions: +# CHECK-NEXT: - - - - - - 1.00 - - abs d29, d24 +# CHECK-NEXT: - - - - - - 1.00 - - abs v0.16b, v0.16b +# CHECK-NEXT: - - - - - - 1.00 - - abs v0.2d, v0.2d +# CHECK-NEXT: - - - - - - 1.00 - - abs v0.2s, v0.2s +# CHECK-NEXT: - - - - - - 1.00 - - abs v0.4h, v0.4h +# CHECK-NEXT: - - - - - - 1.00 - - abs v0.4s, v0.4s +# CHECK-NEXT: - - - - - - 1.00 - - abs v0.8b, v0.8b +# CHECK-NEXT: - - - - - - 1.00 - - abs v0.8h, v0.8h +# CHECK-NEXT: - - - - - - 1.00 - - add d17, d31, d29 +# CHECK-NEXT: - - - - - - 1.00 - - add v0.8b, v0.8b, v0.8b +# CHECK-NEXT: - - - - - - 1.00 - - addhn v0.2s, v0.2d, v0.2d +# CHECK-NEXT: - - - - - - 1.00 - - addhn v0.4h, v0.4s, v0.4s +# CHECK-NEXT: - - - - - - 1.00 - - addhn v0.8b, v0.8h, v0.8h +# CHECK-NEXT: - - - - - - 1.00 - - addhn2 v0.16b, v0.8h, v0.8h +# CHECK-NEXT: - - - - - - 1.00 - - addhn2 v0.4s, v0.2d, v0.2d +# CHECK-NEXT: - - - - - - 1.00 - - addhn2 v0.8h, v0.4s, v0.4s +# CHECK-NEXT: - - - - - - 1.00 - - addp v0.2d, v0.2d, v0.2d +# CHECK-NEXT: - - - - - - 1.00 - - addp v0.8b, v0.8b, v0.8b +# CHECK-NEXT: - - - - - - 1.00 - - and v0.8b, v0.8b, v0.8b +# CHECK-NEXT: - - - - - - 1.00 - - bic v0.4h, #15, lsl #8 +# CHECK-NEXT: - - - - - - 1.00 - - bic v0.8b, v0.8b, v0.8b +# CHECK-NEXT: - - - - - - 1.00 - - bif v0.16b, v0.16b, v0.16b +# CHECK-NEXT: - - - - - - 1.00 - - bit v0.16b, v0.16b, v0.16b +# CHECK-NEXT: - - - - - - 1.00 - - bsl v0.8b, v0.8b, v0.8b +# CHECK-NEXT: - - - - - - 1.00 - - cls v0.16b, v0.16b +# CHECK-NEXT: - - - - - - 1.00 - - cls v0.2s, v0.2s +# CHECK-NEXT: - - - - - - 1.00 - - cls v0.4h, v0.4h +# CHECK-NEXT: - - - - - - 1.00 - - cls v0.4s, v0.4s +# CHECK-NEXT: - - - - - - 1.00 - - cls v0.8b, v0.8b +# CHECK-NEXT: - - - - - - 1.00 - - cls v0.8h, v0.8h +# CHECK-NEXT: - - - - - - 1.00 - - clz v0.16b, v0.16b +# CHECK-NEXT: - - - - - - 1.00 - - clz v0.2s, v0.2s +# CHECK-NEXT: - - - - - - 1.00 - - clz v0.4h, v0.4h +# CHECK-NEXT: - - - - - - 1.00 - - clz v0.4s, v0.4s +# CHECK-NEXT: - - - - - - 1.00 - - clz v0.8b, v0.8b +# CHECK-NEXT: - - - - - - 1.00 - - clz v0.8h, v0.8h +# CHECK-NEXT: - - - - - - 1.00 - - cmeq d20, d21, #0 +# CHECK-NEXT: - - - - - - 1.00 - - cmeq d20, d21, d22 +# CHECK-NEXT: - - - - - - 1.00 - - cmeq v0.16b, v0.16b, #0 +# CHECK-NEXT: - - - - - - 1.00 - - cmeq v0.16b, v0.16b, v0.16b +# CHECK-NEXT: - - - - - - 1.00 - - cmge d20, d21, #0 +# CHECK-NEXT: - - - - - - 1.00 - - cmge d20, d21, d22 +# CHECK-NEXT: - - - - - - 1.00 - - cmge v0.4h, v0.4h, v0.4h +# CHECK-NEXT: - - - - - - 1.00 - - cmge v0.8b, v0.8b, #0 +# CHECK-NEXT: - - - - - - 1.00 - - cmgt d20, d21, #0 +# CHECK-NEXT: - - - - - - 1.00 - - cmgt d20, d21, d22 +# CHECK-NEXT: - - - - - - 1.00 - - cmgt v0.2s, v0.2s, #0 +# CHECK-NEXT: - - - - - - 1.00 - - cmgt v0.4s, v0.4s, v0.4s +# CHECK-NEXT: - - - - - - 1.00 - - cmhi d20, d21, d22 +# CHECK-NEXT: - - - - - - 1.00 - - cmhi v0.8h, v0.8h, v0.8h +# CHECK-NEXT: - - - - - - 1.00 - - cmhs d20, d21, d22 +# CHECK-NEXT: - - - - - - 1.00 - - cmhs v0.8b, v0.8b, v0.8b +# CHECK-NEXT: - - - - - - 1.00 - - cmle d20, d21, #0 +# CHECK-NEXT: - - - - - - 1.00 - - cmle v0.2d, v0.2d, #0 +# CHECK-NEXT: - - - - - - 1.00 - - cmlt d20, d21, #0 +# CHECK-NEXT: - - - - - - 1.00 - - cmlt v0.8h, v0.8h, #0 +# CHECK-NEXT: - - - - - - 1.00 - - cmtst d20, d21, d22 +# CHECK-NEXT: - - - - - - 1.00 - - cmtst v0.2s, v0.2s, v0.2s +# CHECK-NEXT: - - - - - - 1.00 - - cnt v0.16b, v0.16b +# CHECK-NEXT: - - - - - - 1.00 - - cnt v0.8b, v0.8b +# CHECK-NEXT: - - - - - - 1.00 - - dup v0.16b, w28 +# CHECK-NEXT: - - - - - - 1.00 - - dup v0.2d, x28 +# CHECK-NEXT: - - - - - - 1.00 - - dup v0.2s, w28 +# CHECK-NEXT: - - - - - - 1.00 - - dup v0.4h, w28 +# CHECK-NEXT: - - - - - - 1.00 - - dup v0.4s, w28 +# CHECK-NEXT: - - - - - - 1.00 - - dup v0.8b, w28 +# CHECK-NEXT: - - - - - - 1.00 - - dup v0.8h, w28 +# CHECK-NEXT: - - - - - - 1.00 - - eor v0.16b, v0.16b, v0.16b +# CHECK-NEXT: - - - - - - 1.00 - - ext v0.16b, v0.16b, v0.16b, #3 +# CHECK-NEXT: - - - - - - 1.00 - - ext v0.8b, v0.8b, v0.8b, #3 +# CHECK-NEXT: - - - - - - 1.00 - - fabd d29, d24, d20 +# CHECK-NEXT: - - - - - - 1.00 - - fabd s29, s24, s20 +# CHECK-NEXT: - - - - - - 1.00 - - fabd v0.4s, v0.4s, v0.4s +# CHECK-NEXT: - - - - - - 1.00 - - fabs v0.2d, v0.2d +# CHECK-NEXT: - - - - - - 1.00 - - fabs v0.2s, v0.2s +# CHECK-NEXT: - - - - - - 1.00 - - fabs v0.4h, v0.4h +# CHECK-NEXT: - - - - - - 1.00 - - fabs v0.4s, v0.4s +# CHECK-NEXT: - - - - - - 1.00 - - fabs v0.8h, v0.8h +# CHECK-NEXT: - - - - - - 1.00 - - facge d20, d21, d22 +# CHECK-NEXT: - - - - - - 1.00 - - facge s10, s11, s12 +# CHECK-NEXT: - - - - - - 1.00 - - facge v0.4s, v0.4s, v0.4s +# CHECK-NEXT: - - - - - - 1.00 - - facgt d20, d21, d22 +# CHECK-NEXT: - - - - - - 1.00 - - facgt s10, s11, s12 +# CHECK-NEXT: - - - - - - 1.00 - - facgt v0.2d, v0.2d, v0.2d +# CHECK-NEXT: - - - - - - 1.00 - - fadd v0.4s, v0.4s, v0.4s +# CHECK-NEXT: - - - - - - 1.00 - - faddp v0.2s, v0.2s, v0.2s +# CHECK-NEXT: - - - - - - 1.00 - - faddp v0.4s, v0.4s, v0.4s +# CHECK-NEXT: - - - - - - 1.00 - - fcmeq d20, d21, #0.0 +# CHECK-NEXT: - - - - - - 1.00 - - fcmeq d20, d21, d22 +# CHECK-NEXT: - - - - - - 1.00 - - fcmeq s10, s11, #0.0 +# CHECK-NEXT: - - - - - - 1.00 - - fcmeq s10, s11, s12 +# CHECK-NEXT: - - - - - - 1.00 - - fcmeq v0.2s, v0.2s, #0.0 +# CHECK-NEXT: - - - - - - 1.00 - - fcmeq v0.2s, v0.2s, v0.2s +# CHECK-NEXT: - - - - - - 1.00 - - fcmge d20, d21, #0.0 +# CHECK-NEXT: - - - - - - 1.00 - - fcmge d20, d21, d22 +# CHECK-NEXT: - - - - - - 1.00 - - fcmge s10, s11, #0.0 +# CHECK-NEXT: - - - - - - 1.00 - - fcmge s10, s11, s12 +# CHECK-NEXT: - - - - - - 1.00 - - fcmge v0.2d, v0.2d, #0.0 +# CHECK-NEXT: - - - - - - 1.00 - - fcmge v0.4s, v0.4s, v0.4s +# CHECK-NEXT: - - - - - - 1.00 - - fcmgt d20, d21, #0.0 +# CHECK-NEXT: - - - - - - 1.00 - - fcmgt d20, d21, d22 +# CHECK-NEXT: - - - - - - 1.00 - - fcmgt s10, s11, #0.0 +# CHECK-NEXT: - - - - - - 1.00 - - fcmgt s10, s11, s12 +# CHECK-NEXT: - - - - - - 1.00 - - fcmgt v0.4s, v0.4s, #0.0 +# CHECK-NEXT: - - - - - - 1.00 - - fcmgt v0.4s, v0.4s, v0.4s +# CHECK-NEXT: - - - - - - 1.00 - - fcmle d20, d21, #0.0 +# CHECK-NEXT: - - - - - - 1.00 - - fcmle s10, s11, #0.0 +# CHECK-NEXT: - - - - - - 1.00 - - fcmle v0.2d, v0.2d, #0.0 +# CHECK-NEXT: - - - - - - 1.00 - - fcmlt d20, d21, #0.0 +# CHECK-NEXT: - - - - - - 1.00 - - fcmlt s10, s11, #0.0 +# CHECK-NEXT: - - - - - - 1.00 - - fcmlt v0.4s, v0.4s, #0.0 +# CHECK-NEXT: - - - - - - 1.00 - - fcvtas d21, d14 +# CHECK-NEXT: - - - - - - 1.00 - - fcvtas s12, s13 +# CHECK-NEXT: - - - - - - 1.00 - - fcvtas v0.2d, v0.2d +# CHECK-NEXT: - - - - - - 1.00 - - fcvtas v0.2s, v0.2s +# CHECK-NEXT: - - - - - - 1.00 - - fcvtas v0.4h, v0.4h +# CHECK-NEXT: - - - - - - 1.00 - - fcvtas v0.4s, v0.4s +# CHECK-NEXT: - - - - - - 1.00 - - fcvtas v0.8h, v0.8h +# CHECK-NEXT: - - - - - - 1.00 - - fcvtau d21, d14 +# CHECK-NEXT: - - - - - - 1.00 - - fcvtau s12, s13 +# CHECK-NEXT: - - - - - - 1.00 - - fcvtau v0.2d, v0.2d +# CHECK-NEXT: - - - - - - 1.00 - - fcvtau v0.2s, v0.2s +# CHECK-NEXT: - - - - - - 1.00 - - fcvtau v0.4h, v0.4h +# CHECK-NEXT: - - - - - - 1.00 - - fcvtau v0.4s, v0.4s +# CHECK-NEXT: - - - - - - 1.00 - - fcvtau v0.8h, v0.8h +# CHECK-NEXT: - - - - - - 1.00 - - fcvtl v0.2d, v0.2s +# CHECK-NEXT: - - - - - - 1.00 - - fcvtl v0.4s, v0.4h +# CHECK-NEXT: - - - - - - 1.00 - - fcvtl2 v0.2d, v0.4s +# CHECK-NEXT: - - - - - - 1.00 - - fcvtl2 v0.4s, v0.8h +# CHECK-NEXT: - - - - - - 1.00 - - fcvtms d21, d14 +# CHECK-NEXT: - - - - - - 1.00 - - fcvtms s22, s13 +# CHECK-NEXT: - - - - - - 1.00 - - fcvtms v0.2d, v0.2d +# CHECK-NEXT: - - - - - - 1.00 - - fcvtms v0.2s, v0.2s +# CHECK-NEXT: - - - - - - 1.00 - - fcvtms v0.4h, v0.4h +# CHECK-NEXT: - - - - - - 1.00 - - fcvtms v0.4s, v0.4s +# CHECK-NEXT: - - - - - - 1.00 - - fcvtms v0.8h, v0.8h +# CHECK-NEXT: - - - - - - 1.00 - - fcvtmu d21, d14 +# CHECK-NEXT: - - - - - - 1.00 - - fcvtmu s12, s13 +# CHECK-NEXT: - - - - - - 1.00 - - fcvtmu v0.2d, v0.2d +# CHECK-NEXT: - - - - - - 1.00 - - fcvtmu v0.2s, v0.2s +# CHECK-NEXT: - - - - - - 1.00 - - fcvtmu v0.4h, v0.4h +# CHECK-NEXT: - - - - - - 1.00 - - fcvtmu v0.4s, v0.4s +# CHECK-NEXT: - - - - - - 1.00 - - fcvtmu v0.8h, v0.8h +# CHECK-NEXT: - - - - - - 1.00 - - fcvtn v0.2s, v0.2d +# CHECK-NEXT: - - - - - - 1.00 - - fcvtn v0.4h, v0.4s +# CHECK-NEXT: - - - - - - 1.00 - - fcvtn2 v0.4s, v0.2d +# CHECK-NEXT: - - - - - - 1.00 - - fcvtn2 v0.8h, v0.4s +# CHECK-NEXT: - - - - - - 1.00 - - fcvtns d21, d14 +# CHECK-NEXT: - - - - - - 1.00 - - fcvtns s22, s13 +# CHECK-NEXT: - - - - - - 1.00 - - fcvtns v0.2d, v0.2d +# CHECK-NEXT: - - - - - - 1.00 - - fcvtns v0.2s, v0.2s +# CHECK-NEXT: - - - - - - 1.00 - - fcvtns v0.4h, v0.4h +# CHECK-NEXT: - - - - - - 1.00 - - fcvtns v0.4s, v0.4s +# CHECK-NEXT: - - - - - - 1.00 - - fcvtns v0.8h, v0.8h +# CHECK-NEXT: - - - - - - 1.00 - - fcvtnu d21, d14 +# CHECK-NEXT: - - - - - - 1.00 - - fcvtnu s12, s13 +# CHECK-NEXT: - - - - - - 1.00 - - fcvtnu v0.2d, v0.2d +# CHECK-NEXT: - - - - - - 1.00 - - fcvtnu v0.2s, v0.2s +# CHECK-NEXT: - - - - - - 1.00 - - fcvtnu v0.4h, v0.4h +# CHECK-NEXT: - - - - - - 1.00 - - fcvtnu v0.4s, v0.4s +# CHECK-NEXT: - - - - - - 1.00 - - fcvtnu v0.8h, v0.8h +# CHECK-NEXT: - - - - - - 1.00 - - fcvtps d21, d14 +# CHECK-NEXT: - - - - - - 1.00 - - fcvtps s22, s13 +# CHECK-NEXT: - - - - - - 1.00 - - fcvtps v0.2d, v0.2d +# CHECK-NEXT: - - - - - - 1.00 - - fcvtps v0.2s, v0.2s +# CHECK-NEXT: - - - - - - 1.00 - - fcvtps v0.4h, v0.4h +# CHECK-NEXT: - - - - - - 1.00 - - fcvtps v0.4s, v0.4s +# CHECK-NEXT: - - - - - - 1.00 - - fcvtps v0.8h, v0.8h +# CHECK-NEXT: - - - - - - 1.00 - - fcvtpu d21, d14 +# CHECK-NEXT: - - - - - - 1.00 - - fcvtpu s12, s13 +# CHECK-NEXT: - - - - - - 1.00 - - fcvtpu v0.2d, v0.2d +# CHECK-NEXT: - - - - - - 1.00 - - fcvtpu v0.2s, v0.2s +# CHECK-NEXT: - - - - - - 1.00 - - fcvtpu v0.4h, v0.4h +# CHECK-NEXT: - - - - - - 1.00 - - fcvtpu v0.4s, v0.4s +# CHECK-NEXT: - - - - - - 1.00 - - fcvtpu v0.8h, v0.8h +# CHECK-NEXT: - - - - - - 1.00 - - fcvtxn s22, d13 +# CHECK-NEXT: - - - - - - 1.00 - - fcvtxn v0.2s, v0.2d +# CHECK-NEXT: - - - - - - 1.00 - - fcvtxn2 v0.4s, v0.2d +# CHECK-NEXT: - - - - - - 1.00 - - fcvtzs d21, d12, #1 +# CHECK-NEXT: - - - - - - 1.00 - - fcvtzs d21, d14 +# CHECK-NEXT: - - - - - - 1.00 - - fcvtzs s12, s13 +# CHECK-NEXT: - - - - - - 1.00 - - fcvtzs s21, s12, #1 +# CHECK-NEXT: - - - - - - 1.00 - - fcvtzs v0.2d, v0.2d +# CHECK-NEXT: - - - - - - 1.00 - - fcvtzs v0.2d, v0.2d, #3 +# CHECK-NEXT: - - - - - - 1.00 - - fcvtzs v0.2s, v0.2s +# CHECK-NEXT: - - - - - - 1.00 - - fcvtzs v0.2s, v0.2s, #3 +# CHECK-NEXT: - - - - - - 1.00 - - fcvtzs v0.4h, v0.4h +# CHECK-NEXT: - - - - - - 1.00 - - fcvtzs v0.4s, v0.4s +# CHECK-NEXT: - - - - - - 1.00 - - fcvtzs v0.4s, v0.4s, #3 +# CHECK-NEXT: - - - - - - 1.00 - - fcvtzs v0.8h, v0.8h +# CHECK-NEXT: - - - - - - 1.00 - - fcvtzu d21, d12, #1 +# CHECK-NEXT: - - - - - - 1.00 - - fcvtzu d21, d14 +# CHECK-NEXT: - - - - - - 1.00 - - fcvtzu s12, s13 +# CHECK-NEXT: - - - - - - 1.00 - - fcvtzu s21, s12, #1 +# CHECK-NEXT: - - - - - - 1.00 - - fcvtzu v0.2d, v0.2d +# CHECK-NEXT: - - - - - - 1.00 - - fcvtzu v0.2d, v0.2d, #3 +# CHECK-NEXT: - - - - - - 1.00 - - fcvtzu v0.2s, v0.2s +# CHECK-NEXT: - - - - - - 1.00 - - fcvtzu v0.2s, v0.2s, #3 +# CHECK-NEXT: - - - - - - 1.00 - - fcvtzu v0.4h, v0.4h +# CHECK-NEXT: - - - - - - 1.00 - - fcvtzu v0.4s, v0.4s +# CHECK-NEXT: - - - - - - 1.00 - - fcvtzu v0.4s, v0.4s, #3 +# CHECK-NEXT: - - - - - - 1.00 - - fcvtzu v0.8h, v0.8h +# CHECK-NEXT: - - - - - - - - 10.00 fdiv v0.2s, v0.2s, v0.2s +# CHECK-NEXT: - - - - - - 1.00 - - fmax v0.2d, v0.2d, v0.2d +# CHECK-NEXT: - - - - - - 1.00 - - fmax v0.2s, v0.2s, v0.2s +# CHECK-NEXT: - - - - - - 1.00 - - fmax v0.4s, v0.4s, v0.4s +# CHECK-NEXT: - - - - - - 1.00 - - fmaxnm v0.2d, v0.2d, v0.2d +# CHECK-NEXT: - - - - - - 1.00 - - fmaxnm v0.2s, v0.2s, v0.2s +# CHECK-NEXT: - - - - - - 1.00 - - fmaxnm v0.4s, v0.4s, v0.4s +# CHECK-NEXT: - - - - - - 1.00 - - fmaxnmp v0.2d, v0.2d, v0.2d +# CHECK-NEXT: - - - - - - 1.00 - - fmaxnmp v0.2s, v0.2s, v0.2s +# CHECK-NEXT: - - - - - - 1.00 - - fmaxnmp v0.4s, v0.4s, v0.4s +# CHECK-NEXT: - - - - - - 1.00 - - fmaxp v0.2d, v0.2d, v0.2d +# CHECK-NEXT: - - - - - - 1.00 - - fmaxp v0.2s, v0.2s, v0.2s +# CHECK-NEXT: - - - - - - 1.00 - - fmaxp v0.4s, v0.4s, v0.4s +# CHECK-NEXT: - - - - - - 1.00 - - fmin v0.2d, v0.2d, v0.2d +# CHECK-NEXT: - - - - - - 1.00 - - fmin v0.2s, v0.2s, v0.2s +# CHECK-NEXT: - - - - - - 1.00 - - fmin v0.4s, v0.4s, v0.4s +# CHECK-NEXT: - - - - - - 1.00 - - fminnm v0.2d, v0.2d, v0.2d +# CHECK-NEXT: - - - - - - 1.00 - - fminnm v0.2s, v0.2s, v0.2s +# CHECK-NEXT: - - - - - - 1.00 - - fminnm v0.4s, v0.4s, v0.4s +# CHECK-NEXT: - - - - - - 1.00 - - fminnmp v0.2d, v0.2d, v0.2d +# CHECK-NEXT: - - - - - - 1.00 - - fminnmp v0.2s, v0.2s, v0.2s +# CHECK-NEXT: - - - - - - 1.00 - - fminnmp v0.4s, v0.4s, v0.4s +# CHECK-NEXT: - - - - - - 1.00 - - fminp v0.2d, v0.2d, v0.2d +# CHECK-NEXT: - - - - - - 1.00 - - fminp v0.2s, v0.2s, v0.2s +# CHECK-NEXT: - - - - - - 1.00 - - fminp v0.4s, v0.4s, v0.4s +# CHECK-NEXT: - - - - - - - 1.00 - fmla d0, d1, v0.d[1] +# CHECK-NEXT: - - - - - - - 1.00 - fmla s0, s1, v0.s[3] +# CHECK-NEXT: - - - - - - - 1.00 - fmla v0.2s, v0.2s, v0.2s +# CHECK-NEXT: - - - - - - - 1.00 - fmls d0, d4, v0.d[1] +# CHECK-NEXT: - - - - - - - 1.00 - fmls s3, s5, v0.s[3] +# CHECK-NEXT: - - - - - - - 1.00 - fmls v0.2s, v0.2s, v0.2s +# CHECK-NEXT: - - - - - - 1.00 - - fmov v0.2d, #-1.25000000 +# CHECK-NEXT: - - - - - - 1.00 - - fmov v0.2s, #13.00000000 +# CHECK-NEXT: - - - - - - 1.00 - - fmov v0.4s, #1.00000000 +# CHECK-NEXT: - - - - - - 1.00 - - fmul d0, d1, v0.d[1] +# CHECK-NEXT: - - - - - - 1.00 - - fmul s0, s1, v0.s[3] +# CHECK-NEXT: - - - - - - 1.00 - - fmul v0.2s, v0.2s, v0.2s +# CHECK-NEXT: - - - - - - 1.00 - - fmulx d0, d4, v0.d[1] +# CHECK-NEXT: - - - - - - 1.00 - - fmulx d23, d11, d1 +# CHECK-NEXT: - - - - - - 1.00 - - fmulx s20, s22, s15 +# CHECK-NEXT: - - - - - - 1.00 - - fmulx s3, s5, v0.s[3] +# CHECK-NEXT: - - - - - - 1.00 - - fmulx v0.2d, v0.2d, v0.2d +# CHECK-NEXT: - - - - - - 1.00 - - fmulx v0.2s, v0.2s, v0.2s +# CHECK-NEXT: - - - - - - 1.00 - - fmulx v0.4s, v0.4s, v0.4s +# CHECK-NEXT: - - - - - - 1.00 - - fneg v0.2d, v0.2d +# CHECK-NEXT: - - - - - - 1.00 - - fneg v0.2s, v0.2s +# CHECK-NEXT: - - - - - - 1.00 - - fneg v0.4h, v0.4h +# CHECK-NEXT: - - - - - - 1.00 - - fneg v0.4s, v0.4s +# CHECK-NEXT: - - - - - - 1.00 - - fneg v0.8h, v0.8h +# CHECK-NEXT: - - - - - - 1.00 - - frecpe d13, d13 +# CHECK-NEXT: - - - - - - 1.00 - - frecpe s19, s14 +# CHECK-NEXT: - - - - - - 1.00 - - frecpe v0.2d, v0.2d +# CHECK-NEXT: - - - - - - 1.00 - - frecpe v0.2s, v0.2s +# CHECK-NEXT: - - - - - - 1.00 - - frecpe v0.4h, v0.4h +# CHECK-NEXT: - - - - - - 1.00 - - frecpe v0.4s, v0.4s +# CHECK-NEXT: - - - - - - 1.00 - - frecpe v0.8h, v0.8h +# CHECK-NEXT: - - - - - - 1.00 - - frecps v0.4s, v0.4s, v0.4s +# CHECK-NEXT: - - - - - - 1.00 - - frecps d22, d30, d21 +# CHECK-NEXT: - - - - - - 1.00 - - frecps s21, s16, s13 +# CHECK-NEXT: - - - - - - 1.00 - - frecpx d16, d19 +# CHECK-NEXT: - - - - - - 1.00 - - frecpx s18, s10 +# CHECK-NEXT: - - - - - - 1.00 - - frinta v0.2d, v0.2d +# CHECK-NEXT: - - - - - - 1.00 - - frinta v0.2s, v0.2s +# CHECK-NEXT: - - - - - - 1.00 - - frinta v0.4h, v0.4h +# CHECK-NEXT: - - - - - - 1.00 - - frinta v0.4s, v0.4s +# CHECK-NEXT: - - - - - - 1.00 - - frinta v0.8h, v0.8h +# CHECK-NEXT: - - - - - - 1.00 - - frinti v0.2d, v0.2d +# CHECK-NEXT: - - - - - - 1.00 - - frinti v0.2s, v0.2s +# CHECK-NEXT: - - - - - - 1.00 - - frinti v0.4h, v0.4h +# CHECK-NEXT: - - - - - - 1.00 - - frinti v0.4s, v0.4s +# CHECK-NEXT: - - - - - - 1.00 - - frinti v0.8h, v0.8h +# CHECK-NEXT: - - - - - - 1.00 - - frintm v0.2d, v0.2d +# CHECK-NEXT: - - - - - - 1.00 - - frintm v0.2s, v0.2s +# CHECK-NEXT: - - - - - - 1.00 - - frintm v0.4h, v0.4h +# CHECK-NEXT: - - - - - - 1.00 - - frintm v0.4s, v0.4s +# CHECK-NEXT: - - - - - - 1.00 - - frintm v0.8h, v0.8h +# CHECK-NEXT: - - - - - - 1.00 - - frintn v0.2d, v0.2d +# CHECK-NEXT: - - - - - - 1.00 - - frintn v0.2s, v0.2s +# CHECK-NEXT: - - - - - - 1.00 - - frintn v0.4h, v0.4h +# CHECK-NEXT: - - - - - - 1.00 - - frintn v0.4s, v0.4s +# CHECK-NEXT: - - - - - - 1.00 - - frintn v0.8h, v0.8h +# CHECK-NEXT: - - - - - - 1.00 - - frintp v0.2d, v0.2d +# CHECK-NEXT: - - - - - - 1.00 - - frintp v0.2s, v0.2s +# CHECK-NEXT: - - - - - - 1.00 - - frintp v0.4h, v0.4h +# CHECK-NEXT: - - - - - - 1.00 - - frintp v0.4s, v0.4s +# CHECK-NEXT: - - - - - - 1.00 - - frintp v0.8h, v0.8h +# CHECK-NEXT: - - - - - - 1.00 - - frintx v0.2d, v0.2d +# CHECK-NEXT: - - - - - - 1.00 - - frintx v0.2s, v0.2s +# CHECK-NEXT: - - - - - - 1.00 - - frintx v0.4h, v0.4h +# CHECK-NEXT: - - - - - - 1.00 - - frintx v0.4s, v0.4s +# CHECK-NEXT: - - - - - - 1.00 - - frintx v0.8h, v0.8h +# CHECK-NEXT: - - - - - - 1.00 - - frintz v0.2d, v0.2d +# CHECK-NEXT: - - - - - - 1.00 - - frintz v0.2s, v0.2s +# CHECK-NEXT: - - - - - - 1.00 - - frintz v0.4h, v0.4h +# CHECK-NEXT: - - - - - - 1.00 - - frintz v0.4s, v0.4s +# CHECK-NEXT: - - - - - - 1.00 - - frintz v0.8h, v0.8h +# CHECK-NEXT: - - - - - - - - 19.00 frsqrte d21, d12 +# CHECK-NEXT: - - - - - - - - 9.00 frsqrte s22, s13 +# CHECK-NEXT: - - - - - - - - 19.00 frsqrte v0.2d, v0.2d +# CHECK-NEXT: - - - - - - - - 9.00 frsqrte v0.2s, v0.2s +# CHECK-NEXT: - - - - - - - - 5.00 frsqrte v0.4h, v0.4h +# CHECK-NEXT: - - - - - - - - 9.00 frsqrte v0.4s, v0.4s +# CHECK-NEXT: - - - - - - - - 5.00 frsqrte v0.8h, v0.8h +# CHECK-NEXT: - - - - - - - - 19.00 frsqrts d8, d22, d18 +# CHECK-NEXT: - - - - - - - - 9.00 frsqrts s21, s5, s12 +# CHECK-NEXT: - - - - - - - - 19.00 frsqrts v0.2d, v0.2d, v0.2d +# CHECK-NEXT: - - - - - - - - 19.00 fsqrt v0.2d, v0.2d +# CHECK-NEXT: - - - - - - - - 9.00 fsqrt v0.2s, v0.2s +# CHECK-NEXT: - - - - - - - - 5.00 fsqrt v0.4h, v0.4h +# CHECK-NEXT: - - - - - - - - 9.00 fsqrt v0.4s, v0.4s +# CHECK-NEXT: - - - - - - - - 5.00 fsqrt v0.8h, v0.8h +# CHECK-NEXT: - - - - - - 1.00 - - fsub v0.2s, v0.2s, v0.2s +# CHECK-NEXT: - - - 1.00 - - - - - ld1 { v0.16b }, [x0] +# CHECK-NEXT: - - - 3.00 - - - - - ld1 { v0.2d, v1.2d, v2.2d }, [x0], #48 +# CHECK-NEXT: - - - 4.00 - - - - - ld1 { v0.2d, v1.2d, v2.2d, v3.2d }, [x0] +# CHECK-NEXT: - - - 2.00 - - - - - ld1 { v0.4s, v1.4s }, [sp], #32 +# CHECK-NEXT: - - - 3.00 - - - - - ld1 { v0.4s, v1.4s, v2.4s }, [sp] +# CHECK-NEXT: - - - 2.00 - - - - - ld1 { v0.8b, v1.8b, v2.8b, v3.8b }, [x0], x3 +# CHECK-NEXT: - - - 1.00 - - - - - ld1 { v0.8h }, [x15], x2 +# CHECK-NEXT: - - - 2.00 - - - - - ld1 { v0.8h, v1.8h }, [x15] +# CHECK-NEXT: - - - 1.00 - - - - - ld1 { v0.b }[9], [x0] +# CHECK-NEXT: - - - 1.00 - - - - - ld1 { v0.b }[9], [x0], #1 +# CHECK-NEXT: - - - 1.00 - - - - - ld1r { v0.16b }, [x0] +# CHECK-NEXT: - - - 1.00 - - - - - ld1r { v0.16b }, [x0], #1 +# CHECK-NEXT: - - - 1.00 - - - - - ld1r { v0.8h }, [x15] +# CHECK-NEXT: - - - 1.00 - - - - - ld1r { v0.8h }, [x15], #2 +# CHECK-NEXT: - - - 2.00 - - - - - ld2 { v0.16b, v1.16b }, [x0], x1 +# CHECK-NEXT: - - - 1.00 - - - - - ld2 { v0.8b, v1.8b }, [x0] +# CHECK-NEXT: - - - 6.00 - - - - - ld2 { v0.h, v1.h }[7], [x15] +# CHECK-NEXT: - - - 6.00 - - - - - ld2 { v0.h, v1.h }[7], [x15], #4 +# CHECK-NEXT: - - - 2.00 - - - - - ld2r { v0.2d, v1.2d }, [x0] +# CHECK-NEXT: - - - 2.00 - - - - - ld2r { v0.2d, v1.2d }, [x0], #16 +# CHECK-NEXT: - - - 2.00 - - - - - ld2r { v0.4s, v1.4s }, [sp] +# CHECK-NEXT: - - - 2.00 - - - - - ld2r { v0.4s, v1.4s }, [sp], #8 +# CHECK-NEXT: - - - 6.00 - - - - - ld3 { v0.4h, v1.4h, v2.4h }, [x15] +# CHECK-NEXT: - - - 6.00 - - - - - ld3 { v0.8h, v1.8h, v2.8h }, [x15], x2 +# CHECK-NEXT: - - - 7.00 - - - - - ld3 { v0.s, v1.s, v2.s }[3], [sp] +# CHECK-NEXT: - - - 7.00 - - - - - ld3 { v0.s, v1.s, v2.s }[3], [sp], x3 +# CHECK-NEXT: - - - 3.00 - - - - - ld3r { v0.4h, v1.4h, v2.4h }, [x15] +# CHECK-NEXT: - - - 3.00 - - - - - ld3r { v0.4h, v1.4h, v2.4h }, [x15], #6 +# CHECK-NEXT: - - - 3.00 - - - - - ld3r { v0.8b, v1.8b, v2.8b }, [x0] +# CHECK-NEXT: - - - 3.00 - - - - - ld3r { v0.8b, v1.8b, v2.8b }, [x0], #3 +# CHECK-NEXT: - - - 7.00 - - - - - ld4 { v0.2s, v1.2s, v2.2s, v3.2s }, [sp] +# CHECK-NEXT: - - - 8.00 - - - - - ld4 { v0.4s, v1.4s, v2.4s, v3.4s }, [sp], #64 +# CHECK-NEXT: - - - 7.00 - - - - - ld4 { v0.d, v1.d, v2.d, v3.d }[1], [x0] +# CHECK-NEXT: - - - 7.00 - - - - - ld4 { v0.d, v1.d, v2.d, v3.d }[1], [x0], #32 +# CHECK-NEXT: - - - 7.00 - - - - - ld4 { v0.h, v1.h, v2.h, v3.h }[7], [x0], x0 +# CHECK-NEXT: - - - 4.00 - - - - - ld4r { v0.1d, v1.1d, v2.1d, v3.1d }, [sp] +# CHECK-NEXT: - - - 4.00 - - - - - ld4r { v0.1d, v1.1d, v2.1d, v3.1d }, [sp], x7 +# CHECK-NEXT: - - - 4.00 - - - - - ld4r { v0.2s, v1.2s, v2.2s, v3.2s }, [sp] +# CHECK-NEXT: - - - 4.00 - - - - - ld4r { v0.2s, v1.2s, v2.2s, v3.2s }, [sp], x30 +# CHECK-NEXT: - - - - - - 1.00 - - mla v0.8b, v0.8b, v0.8b +# CHECK-NEXT: - - - - - - 1.00 - - mls v0.4h, v0.4h, v0.4h +# CHECK-NEXT: - - - - - - 1.00 - - mov b0, v0.b[15] +# CHECK-NEXT: - - - - - - 1.00 - - mov d6, v0.d[1] +# CHECK-NEXT: - - - - - - 1.00 - - mov h2, v0.h[5] +# CHECK-NEXT: - - - - - - 1.00 - - mov s17, v0.s[2] +# CHECK-NEXT: - - - - - - 1.00 - - mov v0.16b, v0.16b +# CHECK-NEXT: - - - - - - 1.00 - - mov v0.8b, v0.8b +# CHECK-NEXT: - - - - - - 1.00 - - movi d15, #0xff00ff00ff00ff +# CHECK-NEXT: - - - - - - 1.00 - - movi v0.16b, #31 +# CHECK-NEXT: - - - - - - 1.00 - - movi v0.2d, #0xff0000ff0000ffff +# CHECK-NEXT: - - - - - - 1.00 - - movi v0.2s, #8, msl #8 +# CHECK-NEXT: - - - - - - 1.00 - - movi v0.4s, #255, lsl #24 +# CHECK-NEXT: - - - - - - 1.00 - - movi v0.8b, #255 +# CHECK-NEXT: - - - - - - 1.00 - - mul v0.8b, v0.8b, v0.8b +# CHECK-NEXT: - - - - - - 1.00 - - mvni v0.2s, #0 +# CHECK-NEXT: - - - - - - 1.00 - - mvni v0.4s, #16, msl #16 +# CHECK-NEXT: - - - - - - 1.00 - - neg d29, d24 +# CHECK-NEXT: - - - - - - 1.00 - - neg v0.16b, v0.16b +# CHECK-NEXT: - - - - - - 1.00 - - neg v0.2d, v0.2d +# CHECK-NEXT: - - - - - - 1.00 - - neg v0.2s, v0.2s +# CHECK-NEXT: - - - - - - 1.00 - - neg v0.4h, v0.4h +# CHECK-NEXT: - - - - - - 1.00 - - neg v0.4s, v0.4s +# CHECK-NEXT: - - - - - - 1.00 - - neg v0.8b, v0.8b +# CHECK-NEXT: - - - - - - 1.00 - - neg v0.8h, v0.8h +# CHECK-NEXT: - - - - - - 1.00 - - mvn v0.16b, v0.16b +# CHECK-NEXT: - - - - - - 1.00 - - mvn v0.8b, v0.8b +# CHECK-NEXT: - - - - - - 1.00 - - orn v0.16b, v0.16b, v0.16b +# CHECK-NEXT: - - - - - - 1.00 - - mov v0.16b, v0.16b +# CHECK-NEXT: - - - - - - 1.00 - - orr v0.8h, #31 +# CHECK-NEXT: - - - - - - 1.00 - - pmul v0.16b, v0.16b, v0.16b +# CHECK-NEXT: - - - - - - 1.00 - - pmul v0.8b, v0.8b, v0.8b +# CHECK-NEXT: - - - - - - 1.00 - - pmull v0.8h, v0.8b, v0.8b +# CHECK-NEXT: - - - - - - 1.00 - - pmull2 v0.8h, v0.16b, v0.16b +# CHECK-NEXT: - - - - - - 1.00 - - raddhn v0.2s, v0.2d, v0.2d +# CHECK-NEXT: - - - - - - 1.00 - - raddhn v0.4h, v0.4s, v0.4s +# CHECK-NEXT: - - - - - - 1.00 - - raddhn v0.8b, v0.8h, v0.8h +# CHECK-NEXT: - - - - - - 1.00 - - raddhn2 v0.16b, v0.8h, v0.8h +# CHECK-NEXT: - - - - - - 1.00 - - raddhn2 v0.4s, v0.2d, v0.2d +# CHECK-NEXT: - - - - - - 1.00 - - raddhn2 v0.8h, v0.4s, v0.4s +# CHECK-NEXT: - - - - - - 1.00 - - rbit v0.16b, v0.16b +# CHECK-NEXT: - - - - - - 1.00 - - rbit v0.8b, v0.8b +# CHECK-NEXT: - - - - - - 1.00 - - rev16 v21.8b, v1.8b +# CHECK-NEXT: - - - - - - 1.00 - - rev16 v30.16b, v31.16b +# CHECK-NEXT: - - - - - - 1.00 - - rev32 v0.4h, v9.4h +# CHECK-NEXT: - - - - - - 1.00 - - rev32 v21.8b, v1.8b +# CHECK-NEXT: - - - - - - 1.00 - - rev32 v30.16b, v31.16b +# CHECK-NEXT: - - - - - - 1.00 - - rev32 v4.8h, v7.8h +# CHECK-NEXT: - - - - - - 1.00 - - rev64 v0.16b, v31.16b +# CHECK-NEXT: - - - - - - 1.00 - - rev64 v1.8b, v9.8b +# CHECK-NEXT: - - - - - - 1.00 - - rev64 v13.4h, v21.4h +# CHECK-NEXT: - - - - - - 1.00 - - rev64 v2.8h, v4.8h +# CHECK-NEXT: - - - - - - 1.00 - - rev64 v4.2s, v0.2s +# CHECK-NEXT: - - - - - - 1.00 - - rev64 v6.4s, v8.4s +# CHECK-NEXT: - - - - - - 1.00 - - rshrn v0.2s, v0.2d, #3 +# CHECK-NEXT: - - - - - - 1.00 - - rshrn v0.4h, v0.4s, #3 +# CHECK-NEXT: - - - - - - 1.00 - - rshrn v0.8b, v0.8h, #3 +# CHECK-NEXT: - - - - - - 1.00 - - rshrn2 v0.16b, v0.8h, #3 +# CHECK-NEXT: - - - - - - 1.00 - - rshrn2 v0.4s, v0.2d, #3 +# CHECK-NEXT: - - - - - - 1.00 - - rshrn2 v0.8h, v0.4s, #3 +# CHECK-NEXT: - - - - - - 1.00 - - rsubhn v0.2s, v0.2d, v0.2d +# CHECK-NEXT: - - - - - - 1.00 - - rsubhn v0.4h, v0.4s, v0.4s +# CHECK-NEXT: - - - - - - 1.00 - - rsubhn v0.8b, v0.8h, v0.8h +# CHECK-NEXT: - - - - - - 1.00 - - rsubhn2 v0.16b, v0.8h, v0.8h +# CHECK-NEXT: - - - - - - 1.00 - - rsubhn2 v0.4s, v0.2d, v0.2d +# CHECK-NEXT: - - - - - - 1.00 - - rsubhn2 v0.8h, v0.4s, v0.4s +# CHECK-NEXT: - - - - - - 1.00 - - saba v0.16b, v0.16b, v0.16b +# CHECK-NEXT: - - - - - - 1.00 - - sabal v0.2d, v0.2s, v0.2s +# CHECK-NEXT: - - - - - - 1.00 - - sabal v0.4s, v0.4h, v0.4h +# CHECK-NEXT: - - - - - - 1.00 - - sabal v0.8h, v0.8b, v0.8b +# CHECK-NEXT: - - - - - - 1.00 - - sabal2 v0.2d, v0.4s, v0.4s +# CHECK-NEXT: - - - - - - 1.00 - - sabal2 v0.4s, v0.8h, v0.8h +# CHECK-NEXT: - - - - - - 1.00 - - sabal2 v0.8h, v0.16b, v0.16b +# CHECK-NEXT: - - - - - - 1.00 - - sabd v0.4h, v0.4h, v0.4h +# CHECK-NEXT: - - - - - - 1.00 - - sabdl v0.2d, v0.2s, v0.2s +# CHECK-NEXT: - - - - - - 1.00 - - sabdl v0.4s, v0.4h, v0.4h +# CHECK-NEXT: - - - - - - 1.00 - - sabdl v0.8h, v0.8b, v0.8b +# CHECK-NEXT: - - - - - - 1.00 - - sabdl2 v0.2d, v0.4s, v0.4s +# CHECK-NEXT: - - - - - - 1.00 - - sabdl2 v0.4s, v0.8h, v0.8h +# CHECK-NEXT: - - - - - - 1.00 - - sabdl2 v0.8h, v0.16b, v0.16b +# CHECK-NEXT: - - - - - - 2.00 - - sadalp v0.1d, v0.2s +# CHECK-NEXT: - - - - - - 2.00 - - sadalp v0.2d, v0.4s +# CHECK-NEXT: - - - - - - 2.00 - - sadalp v0.2s, v0.4h +# CHECK-NEXT: - - - - - - 2.00 - - sadalp v0.4h, v0.8b +# CHECK-NEXT: - - - - - - 2.00 - - sadalp v0.4s, v0.8h +# CHECK-NEXT: - - - - - - 2.00 - - sadalp v0.8h, v0.16b +# CHECK-NEXT: - - - - - - 1.00 - - saddl v0.2d, v0.2s, v0.2s +# CHECK-NEXT: - - - - - - 1.00 - - saddl v0.4s, v0.4h, v0.4h +# CHECK-NEXT: - - - - - - 1.00 - - saddl v0.8h, v0.8b, v0.8b +# CHECK-NEXT: - - - - - - 1.00 - - saddl2 v0.2d, v0.4s, v0.4s +# CHECK-NEXT: - - - - - - 1.00 - - saddl2 v0.4s, v0.8h, v0.8h +# CHECK-NEXT: - - - - - - 1.00 - - saddl2 v0.8h, v0.16b, v0.16b +# CHECK-NEXT: - - - - - - 1.00 - - saddlp v0.1d, v0.2s +# CHECK-NEXT: - - - - - - 1.00 - - saddlp v0.2d, v0.4s +# CHECK-NEXT: - - - - - - 1.00 - - saddlp v0.2s, v0.4h +# CHECK-NEXT: - - - - - - 1.00 - - saddlp v0.4h, v0.8b +# CHECK-NEXT: - - - - - - 1.00 - - saddlp v0.4s, v0.8h +# CHECK-NEXT: - - - - - - 1.00 - - saddlp v0.8h, v0.16b +# CHECK-NEXT: - - - - - - 1.00 - - saddw v0.2d, v0.2d, v0.2s +# CHECK-NEXT: - - - - - - 1.00 - - saddw v0.4s, v0.4s, v0.4h +# CHECK-NEXT: - - - - - - 1.00 - - saddw v0.8h, v0.8h, v0.8b +# CHECK-NEXT: - - - - - - 1.00 - - saddw2 v0.2d, v0.2d, v0.4s +# CHECK-NEXT: - - - - - - 1.00 - - saddw2 v0.4s, v0.4s, v0.8h +# CHECK-NEXT: - - - - - - 1.00 - - saddw2 v0.8h, v0.8h, v0.16b +# CHECK-NEXT: - - - - - - 1.00 - - scvtf d21, d12 +# CHECK-NEXT: - - - - - - 1.00 - - scvtf d21, d12, #64 +# CHECK-NEXT: - - - - - - 1.00 - - scvtf s22, s13 +# CHECK-NEXT: - - - - - - 1.00 - - scvtf s22, s13, #32 +# CHECK-NEXT: - - - - - - 1.00 - - scvtf v0.2d, v0.2d +# CHECK-NEXT: - - - - - - 1.00 - - scvtf v0.2d, v0.2d, #3 +# CHECK-NEXT: - - - - - - 1.00 - - scvtf v0.2s, v0.2s +# CHECK-NEXT: - - - - - - 1.00 - - scvtf v0.2s, v0.2s, #3 +# CHECK-NEXT: - - - - - - 1.00 - - scvtf v0.4h, v0.4h +# CHECK-NEXT: - - - - - - 1.00 - - scvtf v0.4s, v0.4s +# CHECK-NEXT: - - - - - - 1.00 - - scvtf v0.4s, v0.4s, #3 +# CHECK-NEXT: - - - - - - 1.00 - - scvtf v0.8h, v0.8h +# CHECK-NEXT: - - - - - - 1.00 - - shadd v0.8b, v0.8b, v0.8b +# CHECK-NEXT: - - - - - - 1.00 - - shl d7, d10, #12 +# CHECK-NEXT: - - - - - - 1.00 - - shl v0.16b, v0.16b, #3 +# CHECK-NEXT: - - - - - - 1.00 - - shl v0.2d, v0.2d, #3 +# CHECK-NEXT: - - - - - - 1.00 - - shl v0.4h, v0.4h, #3 +# CHECK-NEXT: - - - - - - 1.00 - - shl v0.4s, v0.4s, #3 +# CHECK-NEXT: - - - - - - 1.00 - - shll v0.2d, v0.2s, #32 +# CHECK-NEXT: - - - - - - 1.00 - - shll v0.4s, v0.4h, #16 +# CHECK-NEXT: - - - - - - 1.00 - - shll v0.8h, v0.8b, #8 +# CHECK-NEXT: - - - - - - 1.00 - - shll v0.2d, v0.2s, #32 +# CHECK-NEXT: - - - - - - 1.00 - - shll v0.4s, v0.4h, #16 +# CHECK-NEXT: - - - - - - 1.00 - - shll v0.8h, v0.8b, #8 +# CHECK-NEXT: - - - - - - 1.00 - - shll2 v0.2d, v0.4s, #32 +# CHECK-NEXT: - - - - - - 1.00 - - shll2 v0.4s, v0.8h, #16 +# CHECK-NEXT: - - - - - - 1.00 - - shll2 v0.8h, v0.16b, #8 +# CHECK-NEXT: - - - - - - 1.00 - - shll2 v0.2d, v0.4s, #32 +# CHECK-NEXT: - - - - - - 1.00 - - shll2 v0.4s, v0.8h, #16 +# CHECK-NEXT: - - - - - - 1.00 - - shll2 v0.8h, v0.16b, #8 +# CHECK-NEXT: - - - - - - 1.00 - - shrn v0.2s, v0.2d, #3 +# CHECK-NEXT: - - - - - - 1.00 - - shrn v0.4h, v0.4s, #3 +# CHECK-NEXT: - - - - - - 1.00 - - shrn v0.8b, v0.8h, #3 +# CHECK-NEXT: - - - - - - 1.00 - - shrn2 v0.16b, v0.8h, #3 +# CHECK-NEXT: - - - - - - 1.00 - - shrn2 v0.4s, v0.2d, #3 +# CHECK-NEXT: - - - - - - 1.00 - - shrn2 v0.8h, v0.4s, #3 +# CHECK-NEXT: - - - - - - 1.00 - - shsub v0.2s, v0.2s, v0.2s +# CHECK-NEXT: - - - - - - 1.00 - - shsub v0.4h, v0.4h, v0.4h +# CHECK-NEXT: - - - - - - 1.00 - - sli d10, d14, #12 +# CHECK-NEXT: - - - - - - 1.00 - - sli v0.16b, v0.16b, #3 +# CHECK-NEXT: - - - - - - 1.00 - - sli v0.2d, v0.2d, #3 +# CHECK-NEXT: - - - - - - 1.00 - - sli v0.2s, v0.2s, #3 +# CHECK-NEXT: - - - - - - 1.00 - - sli v0.4h, v0.4h, #3 +# CHECK-NEXT: - - - - - - 1.00 - - sli v0.4s, v0.4s, #3 +# CHECK-NEXT: - - - - - - 1.00 - - sli v0.8b, v0.8b, #3 +# CHECK-NEXT: - - - - - - 1.00 - - sli v0.8h, v0.8h, #3 +# CHECK-NEXT: - - - - - - 1.00 - - smax v0.2s, v0.2s, v0.2s +# CHECK-NEXT: - - - - - - 1.00 - - smax v0.4h, v0.4h, v0.4h +# CHECK-NEXT: - - - - - - 1.00 - - smax v0.8b, v0.8b, v0.8b +# CHECK-NEXT: - - - - - - 1.00 - - smaxp v0.2s, v0.2s, v0.2s +# CHECK-NEXT: - - - - - - 1.00 - - smaxp v0.4h, v0.4h, v0.4h +# CHECK-NEXT: - - - - - - 1.00 - - smaxp v0.8b, v0.8b, v0.8b +# CHECK-NEXT: - - - - - - 1.00 - - smin v0.16b, v0.16b, v0.16b +# CHECK-NEXT: - - - - - - 1.00 - - smin v0.4s, v0.4s, v0.4s +# CHECK-NEXT: - - - - - - 1.00 - - smin v0.8h, v0.8h, v0.8h +# CHECK-NEXT: - - - - - - 1.00 - - sminp v0.16b, v0.16b, v0.16b +# CHECK-NEXT: - - - - - - 1.00 - - sminp v0.4s, v0.4s, v0.4s +# CHECK-NEXT: - - - - - - 1.00 - - sminp v0.8h, v0.8h, v0.8h +# CHECK-NEXT: - - - - - - 1.00 - - smlal v0.2d, v0.2s, v0.2s +# CHECK-NEXT: - - - - - - 1.00 - - smlal v0.4s, v0.4h, v0.4h +# CHECK-NEXT: - - - - - - 1.00 - - smlal v0.8h, v0.8b, v0.8b +# CHECK-NEXT: - - - - - - 1.00 - - smlal2 v0.2d, v0.4s, v0.4s +# CHECK-NEXT: - - - - - - 1.00 - - smlal2 v0.4s, v0.8h, v0.8h +# CHECK-NEXT: - - - - - - 1.00 - - smlal2 v0.8h, v0.16b, v0.16b +# CHECK-NEXT: - - - - - - 1.00 - - smlsl v0.2d, v0.2s, v0.2s +# CHECK-NEXT: - - - - - - 1.00 - - smlsl v0.4s, v0.4h, v0.4h +# CHECK-NEXT: - - - - - - 1.00 - - smlsl v0.8h, v0.8b, v0.8b +# CHECK-NEXT: - - - - - - 1.00 - - smlsl2 v0.2d, v0.4s, v0.4s +# CHECK-NEXT: - - - - - - 1.00 - - smlsl2 v0.4s, v0.8h, v0.8h +# CHECK-NEXT: - - - - - - 1.00 - - smlsl2 v0.8h, v0.16b, v0.16b +# CHECK-NEXT: - - - - - - 1.00 - - smull v0.2d, v0.2s, v0.2s +# CHECK-NEXT: - - - - - - 1.00 - - smull v0.4s, v0.4h, v0.4h +# CHECK-NEXT: - - - - - - 1.00 - - smull v0.8h, v0.8b, v0.8b +# CHECK-NEXT: - - - - - - 1.00 - - smull2 v0.2d, v0.4s, v0.4s +# CHECK-NEXT: - - - - - - 1.00 - - smull2 v0.4s, v0.8h, v0.8h +# CHECK-NEXT: - - - - - - 1.00 - - smull2 v0.8h, v0.16b, v0.16b +# CHECK-NEXT: - - - - - - 1.00 - - sqabs b19, b14 +# CHECK-NEXT: - - - - - - 1.00 - - sqabs d18, d12 +# CHECK-NEXT: - - - - - - 1.00 - - sqabs h21, h15 +# CHECK-NEXT: - - - - - - 1.00 - - sqabs s20, s12 +# CHECK-NEXT: - - - - - - 1.00 - - sqabs v0.16b, v0.16b +# CHECK-NEXT: - - - - - - 1.00 - - sqabs v0.2d, v0.2d +# CHECK-NEXT: - - - - - - 1.00 - - sqabs v0.2s, v0.2s +# CHECK-NEXT: - - - - - - 1.00 - - sqabs v0.4h, v0.4h +# CHECK-NEXT: - - - - - - 1.00 - - sqabs v0.4s, v0.4s +# CHECK-NEXT: - - - - - - 1.00 - - sqabs v0.8b, v0.8b +# CHECK-NEXT: - - - - - - 1.00 - - sqabs v0.8h, v0.8h +# CHECK-NEXT: - - - - - - 1.00 - - sqadd b20, b11, b15 +# CHECK-NEXT: - - - - - - 1.00 - - sqadd v0.16b, v0.16b, v0.16b +# CHECK-NEXT: - - - - - - 1.00 - - sqadd v0.2s, v0.2s, v0.2s +# CHECK-NEXT: - - - - - - 1.00 - - sqdmlal d19, s24, s12 +# CHECK-NEXT: - - - - - - 1.00 - - sqdmlal d8, s9, v0.s[1] +# CHECK-NEXT: - - - - - - 1.00 - - sqdmlal s0, h0, v0.h[3] +# CHECK-NEXT: - - - - - - 1.00 - - sqdmlal s17, h27, h12 +# CHECK-NEXT: - - - - - - 1.00 - - sqdmlal v0.2d, v0.2s, v0.2s +# CHECK-NEXT: - - - - - - 1.00 - - sqdmlal v0.4s, v0.4h, v0.4h +# CHECK-NEXT: - - - - - - 1.00 - - sqdmlal2 v0.2d, v0.4s, v0.4s +# CHECK-NEXT: - - - - - - 1.00 - - sqdmlal2 v0.4s, v0.8h, v0.8h +# CHECK-NEXT: - - - - - - 1.00 - - sqdmlsl d12, s23, s13 +# CHECK-NEXT: - - - - - - 1.00 - - sqdmlsl d8, s9, v0.s[1] +# CHECK-NEXT: - - - - - - 1.00 - - sqdmlsl s0, h0, v0.h[3] +# CHECK-NEXT: - - - - - - 1.00 - - sqdmlsl s14, h12, h25 +# CHECK-NEXT: - - - - - - 1.00 - - sqdmlsl v0.2d, v0.2s, v0.2s +# CHECK-NEXT: - - - - - - 1.00 - - sqdmlsl v0.4s, v0.4h, v0.4h +# CHECK-NEXT: - - - - - - 1.00 - - sqdmlsl2 v0.2d, v0.4s, v0.4s +# CHECK-NEXT: - - - - - - 1.00 - - sqdmlsl2 v0.4s, v0.8h, v0.8h +# CHECK-NEXT: - - - - - - 1.00 - - sqdmulh h10, h11, h12 +# CHECK-NEXT: - - - - - - 1.00 - - sqdmulh h7, h15, v0.h[3] +# CHECK-NEXT: - - - - - - 1.00 - - sqdmulh s15, s14, v0.s[1] +# CHECK-NEXT: - - - - - - 1.00 - - sqdmulh s20, s21, s2 +# CHECK-NEXT: - - - - - - 1.00 - - sqdmulh v0.2s, v0.2s, v0.2s +# CHECK-NEXT: - - - - - - 1.00 - - sqdmulh v0.4s, v0.4s, v0.4s +# CHECK-NEXT: - - - - - - 1.00 - - sqdmull d1, s1, v0.s[1] +# CHECK-NEXT: - - - - - - 1.00 - - sqdmull d15, s22, s12 +# CHECK-NEXT: - - - - - - 1.00 - - sqdmull s1, h1, v0.h[3] +# CHECK-NEXT: - - - - - - 1.00 - - sqdmull s12, h22, h12 +# CHECK-NEXT: - - - - - - 1.00 - - sqdmull v0.2d, v0.2s, v0.2s +# CHECK-NEXT: - - - - - - 1.00 - - sqdmull v0.4s, v0.4h, v0.4h +# CHECK-NEXT: - - - - - - 1.00 - - sqdmull2 v0.2d, v0.4s, v0.4s +# CHECK-NEXT: - - - - - - 1.00 - - sqdmull2 v0.4s, v0.8h, v0.8h +# CHECK-NEXT: - - - - - - 1.00 - - sqneg b19, b14 +# CHECK-NEXT: - - - - - - 1.00 - - sqneg d18, d12 +# CHECK-NEXT: - - - - - - 1.00 - - sqneg h21, h15 +# CHECK-NEXT: - - - - - - 1.00 - - sqneg s20, s12 +# CHECK-NEXT: - - - - - - 1.00 - - sqneg v0.16b, v0.16b +# CHECK-NEXT: - - - - - - 1.00 - - sqneg v0.2d, v0.2d +# CHECK-NEXT: - - - - - - 1.00 - - sqneg v0.2s, v0.2s +# CHECK-NEXT: - - - - - - 1.00 - - sqneg v0.4h, v0.4h +# CHECK-NEXT: - - - - - - 1.00 - - sqneg v0.4s, v0.4s +# CHECK-NEXT: - - - - - - 1.00 - - sqneg v0.8b, v0.8b +# CHECK-NEXT: - - - - - - 1.00 - - sqneg v0.8h, v0.8h +# CHECK-NEXT: - - - - - - 1.00 - - sqrdmulh h10, h11, h12 +# CHECK-NEXT: - - - - - - 1.00 - - sqrdmulh h7, h15, v0.h[3] +# CHECK-NEXT: - - - - - - 1.00 - - sqrdmulh s15, s14, v0.s[1] +# CHECK-NEXT: - - - - - - 1.00 - - sqrdmulh s20, s21, s2 +# CHECK-NEXT: - - - - - - 1.00 - - sqrdmulh v0.4h, v0.4h, v0.4h +# CHECK-NEXT: - - - - - - 1.00 - - sqrdmulh v0.8h, v0.8h, v0.8h +# CHECK-NEXT: - - - - - - 1.00 - - sqrshl d31, d31, d31 +# CHECK-NEXT: - - - - - - 1.00 - - sqrshl h3, h4, h15 +# CHECK-NEXT: - - - - - - 1.00 - - sqrshl v0.2s, v0.2s, v0.2s +# CHECK-NEXT: - - - - - - 1.00 - - sqrshl v0.4h, v0.4h, v0.4h +# CHECK-NEXT: - - - - - - 1.00 - - sqrshl v0.8b, v0.8b, v0.8b +# CHECK-NEXT: - - - - - - 1.00 - - sqrshrn b10, h13, #2 +# CHECK-NEXT: - - - - - - 1.00 - - sqrshrn h15, s10, #6 +# CHECK-NEXT: - - - - - - 1.00 - - sqrshrn s15, d12, #9 +# CHECK-NEXT: - - - - - - 1.00 - - sqrshrn v0.2s, v0.2d, #3 +# CHECK-NEXT: - - - - - - 1.00 - - sqrshrn v0.4h, v0.4s, #3 +# CHECK-NEXT: - - - - - - 1.00 - - sqrshrn v0.8b, v0.8h, #3 +# CHECK-NEXT: - - - - - - 1.00 - - sqrshrn2 v0.16b, v0.8h, #3 +# CHECK-NEXT: - - - - - - 1.00 - - sqrshrn2 v0.4s, v0.2d, #3 +# CHECK-NEXT: - - - - - - 1.00 - - sqrshrn2 v0.8h, v0.4s, #3 +# CHECK-NEXT: - - - - - - 1.00 - - sqrshrun b17, h10, #6 +# CHECK-NEXT: - - - - - - 1.00 - - sqrshrun h10, s13, #15 +# CHECK-NEXT: - - - - - - 1.00 - - sqrshrun s22, d16, #31 +# CHECK-NEXT: - - - - - - 1.00 - - sqrshrun v0.2s, v0.2d, #3 +# CHECK-NEXT: - - - - - - 1.00 - - sqrshrun v0.4h, v0.4s, #3 +# CHECK-NEXT: - - - - - - 1.00 - - sqrshrun v0.8b, v0.8h, #3 +# CHECK-NEXT: - - - - - - 1.00 - - sqrshrun2 v0.16b, v0.8h, #3 +# CHECK-NEXT: - - - - - - 1.00 - - sqrshrun2 v0.4s, v0.2d, #3 +# CHECK-NEXT: - - - - - - 1.00 - - sqrshrun2 v0.8h, v0.4s, #3 +# CHECK-NEXT: - - - - - - 1.00 - - sqshl b11, b19, #7 +# CHECK-NEXT: - - - - - - 1.00 - - sqshl d15, d16, #51 +# CHECK-NEXT: - - - - - - 1.00 - - sqshl d31, d31, d31 +# CHECK-NEXT: - - - - - - 1.00 - - sqshl h13, h18, #11 +# CHECK-NEXT: - - - - - - 1.00 - - sqshl h3, h4, h15 +# CHECK-NEXT: - - - - - - 1.00 - - sqshl s14, s17, #22 +# CHECK-NEXT: - - - - - - 1.00 - - sqshl v0.16b, v0.16b, #3 +# CHECK-NEXT: - - - - - - 1.00 - - sqshl v0.2d, v0.2d, #3 +# CHECK-NEXT: - - - - - - 1.00 - - sqshl v0.2s, v0.2s, #3 +# CHECK-NEXT: - - - - - - 1.00 - - sqshl v0.2s, v0.2s, v0.2s +# CHECK-NEXT: - - - - - - 1.00 - - sqshl v0.4h, v0.4h, #3 +# CHECK-NEXT: - - - - - - 1.00 - - sqshl v0.4h, v0.4h, v0.4h +# CHECK-NEXT: - - - - - - 1.00 - - sqshl v0.4s, v0.4s, #3 +# CHECK-NEXT: - - - - - - 1.00 - - sqshl v0.8b, v0.8b, #3 +# CHECK-NEXT: - - - - - - 1.00 - - sqshl v0.8b, v0.8b, v0.8b +# CHECK-NEXT: - - - - - - 1.00 - - sqshl v0.8h, v0.8h, #3 +# CHECK-NEXT: - - - - - - 1.00 - - sqshlu b15, b18, #6 +# CHECK-NEXT: - - - - - - 1.00 - - sqshlu d11, d13, #32 +# CHECK-NEXT: - - - - - - 1.00 - - sqshlu h19, h17, #6 +# CHECK-NEXT: - - - - - - 1.00 - - sqshlu s16, s14, #25 +# CHECK-NEXT: - - - - - - 1.00 - - sqshlu v0.16b, v0.16b, #3 +# CHECK-NEXT: - - - - - - 1.00 - - sqshlu v0.2d, v0.2d, #3 +# CHECK-NEXT: - - - - - - 1.00 - - sqshlu v0.2s, v0.2s, #3 +# CHECK-NEXT: - - - - - - 1.00 - - sqshlu v0.4h, v0.4h, #3 +# CHECK-NEXT: - - - - - - 1.00 - - sqshlu v0.4s, v0.4s, #3 +# CHECK-NEXT: - - - - - - 1.00 - - sqshlu v0.8b, v0.8b, #3 +# CHECK-NEXT: - - - - - - 1.00 - - sqshlu v0.8h, v0.8h, #3 +# CHECK-NEXT: - - - - - - 1.00 - - sqshrn b10, h15, #5 +# CHECK-NEXT: - - - - - - 1.00 - - sqshrn h17, s10, #4 +# CHECK-NEXT: - - - - - - 1.00 - - sqshrn s18, d10, #31 +# CHECK-NEXT: - - - - - - 1.00 - - sqshrn v0.2s, v0.2d, #3 +# CHECK-NEXT: - - - - - - 1.00 - - sqshrn v0.4h, v0.4s, #3 +# CHECK-NEXT: - - - - - - 1.00 - - sqshrn v0.8b, v0.8h, #3 +# CHECK-NEXT: - - - - - - 1.00 - - sqshrn2 v0.16b, v0.8h, #3 +# CHECK-NEXT: - - - - - - 1.00 - - sqshrn2 v0.4s, v0.2d, #3 +# CHECK-NEXT: - - - - - - 1.00 - - sqshrn2 v0.8h, v0.4s, #3 +# CHECK-NEXT: - - - - - - 1.00 - - sqshrun b15, h10, #7 +# CHECK-NEXT: - - - - - - 1.00 - - sqshrun h20, s14, #3 +# CHECK-NEXT: - - - - - - 1.00 - - sqshrun s10, d15, #15 +# CHECK-NEXT: - - - - - - 1.00 - - sqshrun v0.2s, v0.2d, #3 +# CHECK-NEXT: - - - - - - 1.00 - - sqshrun v0.4h, v0.4s, #3 +# CHECK-NEXT: - - - - - - 1.00 - - sqshrun v0.8b, v0.8h, #3 +# CHECK-NEXT: - - - - - - 1.00 - - sqshrun2 v0.16b, v0.8h, #3 +# CHECK-NEXT: - - - - - - 1.00 - - sqshrun2 v0.4s, v0.2d, #3 +# CHECK-NEXT: - - - - - - 1.00 - - sqshrun2 v0.8h, v0.4s, #3 +# CHECK-NEXT: - - - - - - 1.00 - - sqsub s20, s10, s7 +# CHECK-NEXT: - - - - - - 1.00 - - sqsub v0.2d, v0.2d, v0.2d +# CHECK-NEXT: - - - - - - 1.00 - - sqsub v0.4s, v0.4s, v0.4s +# CHECK-NEXT: - - - - - - 1.00 - - sqsub v0.8b, v0.8b, v0.8b +# CHECK-NEXT: - - - - - - 1.00 - - sqxtn b18, h18 +# CHECK-NEXT: - - - - - - 1.00 - - sqxtn h20, s17 +# CHECK-NEXT: - - - - - - 1.00 - - sqxtn s19, d14 +# CHECK-NEXT: - - - - - - 1.00 - - sqxtn v0.2s, v0.2d +# CHECK-NEXT: - - - - - - 1.00 - - sqxtn v0.4h, v0.4s +# CHECK-NEXT: - - - - - - 1.00 - - sqxtn v0.8b, v0.8h +# CHECK-NEXT: - - - - - - 1.00 - - sqxtn2 v0.16b, v0.8h +# CHECK-NEXT: - - - - - - 1.00 - - sqxtn2 v0.4s, v0.2d +# CHECK-NEXT: - - - - - - 1.00 - - sqxtn2 v0.8h, v0.4s +# CHECK-NEXT: - - - - - - 1.00 - - sqxtun b19, h14 +# CHECK-NEXT: - - - - - - 1.00 - - sqxtun h21, s15 +# CHECK-NEXT: - - - - - - 1.00 - - sqxtun s20, d12 +# CHECK-NEXT: - - - - - - 1.00 - - sqxtun v0.2s, v0.2d +# CHECK-NEXT: - - - - - - 1.00 - - sqxtun v0.4h, v0.4s +# CHECK-NEXT: - - - - - - 1.00 - - sqxtun v0.8b, v0.8h +# CHECK-NEXT: - - - - - - 1.00 - - sqxtun2 v0.16b, v0.8h +# CHECK-NEXT: - - - - - - 1.00 - - sqxtun2 v0.4s, v0.2d +# CHECK-NEXT: - - - - - - 1.00 - - sqxtun2 v0.8h, v0.4s +# CHECK-NEXT: - - - - - - 1.00 - - srhadd v0.2s, v0.2s, v0.2s +# CHECK-NEXT: - - - - - - 1.00 - - srhadd v0.4h, v0.4h, v0.4h +# CHECK-NEXT: - - - - - - 1.00 - - srhadd v0.8b, v0.8b, v0.8b +# CHECK-NEXT: - - - - - - 1.00 - - sri d10, d12, #14 +# CHECK-NEXT: - - - - - - 1.00 - - sri v0.16b, v0.16b, #3 +# CHECK-NEXT: - - - - - - 1.00 - - sri v0.2d, v0.2d, #3 +# CHECK-NEXT: - - - - - - 1.00 - - sri v0.2s, v0.2s, #3 +# CHECK-NEXT: - - - - - - 1.00 - - sri v0.4h, v0.4h, #3 +# CHECK-NEXT: - - - - - - 1.00 - - sri v0.4s, v0.4s, #3 +# CHECK-NEXT: - - - - - - 1.00 - - sri v0.8b, v0.8b, #3 +# CHECK-NEXT: - - - - - - 1.00 - - sri v0.8h, v0.8h, #3 +# CHECK-NEXT: - - - - - - 1.00 - - srshl d16, d16, d16 +# CHECK-NEXT: - - - - - - 1.00 - - srshl v0.2s, v0.2s, v0.2s +# CHECK-NEXT: - - - - - - 1.00 - - srshl v0.4h, v0.4h, v0.4h +# CHECK-NEXT: - - - - - - 1.00 - - srshl v0.8b, v0.8b, v0.8b +# CHECK-NEXT: - - - - - - 1.00 - - srshr d19, d18, #7 +# CHECK-NEXT: - - - - - - 1.00 - - srshr v0.16b, v0.16b, #3 +# CHECK-NEXT: - - - - - - 1.00 - - srshr v0.2d, v0.2d, #3 +# CHECK-NEXT: - - - - - - 1.00 - - srshr v0.2s, v0.2s, #3 +# CHECK-NEXT: - - - - - - 1.00 - - srshr v0.4h, v0.4h, #3 +# CHECK-NEXT: - - - - - - 1.00 - - srshr v0.4s, v0.4s, #3 +# CHECK-NEXT: - - - - - - 1.00 - - srshr v0.8b, v0.8b, #3 +# CHECK-NEXT: - - - - - - 1.00 - - srshr v0.8h, v0.8h, #3 +# CHECK-NEXT: - - - - - - 2.00 - - srsra d15, d11, #19 +# CHECK-NEXT: - - - - - - 2.00 - - srsra v0.16b, v0.16b, #3 +# CHECK-NEXT: - - - - - - 2.00 - - srsra v0.2d, v0.2d, #3 +# CHECK-NEXT: - - - - - - 2.00 - - srsra v0.2s, v0.2s, #3 +# CHECK-NEXT: - - - - - - 2.00 - - srsra v0.4h, v0.4h, #3 +# CHECK-NEXT: - - - - - - 2.00 - - srsra v0.4s, v0.4s, #3 +# CHECK-NEXT: - - - - - - 2.00 - - srsra v0.8b, v0.8b, #3 +# CHECK-NEXT: - - - - - - 2.00 - - srsra v0.8h, v0.8h, #3 +# CHECK-NEXT: - - - - - - 1.00 - - sshl d31, d31, d31 +# CHECK-NEXT: - - - - - - 1.00 - - sshl v0.2d, v0.2d, v0.2d +# CHECK-NEXT: - - - - - - 1.00 - - sshl v0.2s, v0.2s, v0.2s +# CHECK-NEXT: - - - - - - 1.00 - - sshl v0.4h, v0.4h, v0.4h +# CHECK-NEXT: - - - - - - 1.00 - - sshl v0.8b, v0.8b, v0.8b +# CHECK-NEXT: - - - - - - 1.00 - - sshll v0.2d, v0.2s, #3 +# CHECK-NEXT: - - - - - - 1.00 - - sshll2 v0.4s, v0.8h, #3 +# CHECK-NEXT: - - - - - - 1.00 - - sshr d15, d16, #12 +# CHECK-NEXT: - - - - - - 1.00 - - sshr v0.16b, v0.16b, #3 +# CHECK-NEXT: - - - - - - 1.00 - - sshr v0.2d, v0.2d, #3 +# CHECK-NEXT: - - - - - - 1.00 - - sshr v0.2s, v0.2s, #3 +# CHECK-NEXT: - - - - - - 1.00 - - sshr v0.4h, v0.4h, #3 +# CHECK-NEXT: - - - - - - 1.00 - - sshr v0.4s, v0.4s, #3 +# CHECK-NEXT: - - - - - - 1.00 - - sshr v0.8b, v0.8b, #3 +# CHECK-NEXT: - - - - - - 1.00 - - sshr v0.8h, v0.8h, #3 +# CHECK-NEXT: - - - - - - 1.00 - - ssra d18, d12, #21 +# CHECK-NEXT: - - - - - - 1.00 - - ssra v0.16b, v0.16b, #3 +# CHECK-NEXT: - - - - - - 1.00 - - ssra v0.2d, v0.2d, #3 +# CHECK-NEXT: - - - - - - 1.00 - - ssra v0.2s, v0.2s, #3 +# CHECK-NEXT: - - - - - - 1.00 - - ssra v0.4h, v0.4h, #3 +# CHECK-NEXT: - - - - - - 1.00 - - ssra v0.4s, v0.4s, #3 +# CHECK-NEXT: - - - - - - 1.00 - - ssra v0.8b, v0.8b, #3 +# CHECK-NEXT: - - - - - - 1.00 - - ssra v0.8h, v0.8h, #3 +# CHECK-NEXT: - - - - - - 1.00 - - ssubl v0.2d, v0.2s, v0.2s +# CHECK-NEXT: - - - - - - 1.00 - - ssubl v0.4s, v0.4h, v0.4h +# CHECK-NEXT: - - - - - - 1.00 - - ssubl v0.8h, v0.8b, v0.8b +# CHECK-NEXT: - - - - - - 1.00 - - ssubl2 v0.2d, v0.4s, v0.4s +# CHECK-NEXT: - - - - - - 1.00 - - ssubl2 v0.4s, v0.8h, v0.8h +# CHECK-NEXT: - - - - - - 1.00 - - ssubl2 v0.8h, v0.16b, v0.16b +# CHECK-NEXT: - - - - - - 1.00 - - ssubw v0.2d, v0.2d, v0.2s +# CHECK-NEXT: - - - - - - 1.00 - - ssubw v0.4s, v0.4s, v0.4h +# CHECK-NEXT: - - - - - - 1.00 - - ssubw v0.8h, v0.8h, v0.8b +# CHECK-NEXT: - - - - - - 1.00 - - ssubw2 v0.2d, v0.2d, v0.4s +# CHECK-NEXT: - - - - - - 1.00 - - ssubw2 v0.4s, v0.4s, v0.8h +# CHECK-NEXT: - - - - - - 1.00 - - ssubw2 v0.8h, v0.8h, v0.16b +# CHECK-NEXT: - - - 2.00 - - - - - st1 { v0.16b }, [x0] +# CHECK-NEXT: - - - 2.00 - - - - - st1 { v0.2d, v1.2d, v2.2d }, [x0], #48 +# CHECK-NEXT: - - - 4.00 - - - - - st1 { v0.2d, v1.2d, v2.2d, v3.2d }, [x0] +# CHECK-NEXT: - - - 2.00 - - - - - st1 { v0.4s, v1.4s }, [sp], #32 +# CHECK-NEXT: - - - 2.00 - - - - - st1 { v0.4s, v1.4s, v2.4s }, [sp] +# CHECK-NEXT: - - - 4.00 - - - - - st1 { v0.8b, v1.8b, v2.8b, v3.8b }, [x0], x3 +# CHECK-NEXT: - - - 2.00 - - - - - st1 { v0.8h }, [x15], x2 +# CHECK-NEXT: - - - 2.00 - - - - - st1 { v0.8h, v1.8h }, [x15] +# CHECK-NEXT: - - - 1.00 - - - - - st1 { v0.d }[1], [x0] +# CHECK-NEXT: - - - 1.00 - - - - - st1 { v0.d }[1], [x0], #8 +# CHECK-NEXT: - - - 4.00 - - - - - st2 { v0.16b, v1.16b }, [x0], x1 +# CHECK-NEXT: - - - 2.00 - - - - - st2 { v0.8b, v1.8b }, [x0] +# CHECK-NEXT: - - - 2.00 - - - - - st2 { v0.s, v1.s }[3], [sp] +# CHECK-NEXT: - - - 2.00 - - - - - st2 { v0.s, v1.s }[3], [sp], #8 +# CHECK-NEXT: - - - 4.00 - - - - - st3 { v0.4h, v1.4h, v2.4h }, [x15] +# CHECK-NEXT: - - - 4.00 - - - - - st3 { v0.8h, v1.8h, v2.8h }, [x15], x2 +# CHECK-NEXT: - - - 2.00 - - - - - st3 { v0.h, v1.h, v2.h }[7], [x15] +# CHECK-NEXT: - - - 2.00 - - - - - st3 { v0.h, v1.h, v2.h }[7], [x15], #6 +# CHECK-NEXT: - - - 4.00 - - - - - st4 { v0.2s, v1.2s, v2.2s, v3.2s }, [sp] +# CHECK-NEXT: - - - 4.00 - - - - - st4 { v0.4s, v1.4s, v2.4s, v3.4s }, [sp], #64 +# CHECK-NEXT: - - - 2.00 - - - - - st4 { v0.b, v1.b, v2.b, v3.b }[9], [x0] +# CHECK-NEXT: - - - 2.00 - - - - - st4 { v0.b, v1.b, v2.b, v3.b }[9], [x0], x5 +# CHECK-NEXT: - - - - - - 1.00 - - sub d15, d5, d16 +# CHECK-NEXT: - - - - - - 1.00 - - sub v0.2d, v0.2d, v0.2d +# CHECK-NEXT: - - - - - - 1.00 - - suqadd b19, b14 +# CHECK-NEXT: - - - - - - 1.00 - - suqadd d18, d22 +# CHECK-NEXT: - - - - - - 1.00 - - suqadd h20, h15 +# CHECK-NEXT: - - - - - - 1.00 - - suqadd s21, s12 +# CHECK-NEXT: - - - - - - 1.00 - - suqadd v0.16b, v0.16b +# CHECK-NEXT: - - - - - - 1.00 - - suqadd v0.2d, v0.2d +# CHECK-NEXT: - - - - - - 1.00 - - suqadd v0.2s, v0.2s +# CHECK-NEXT: - - - - - - 1.00 - - suqadd v0.4h, v0.4h +# CHECK-NEXT: - - - - - - 1.00 - - suqadd v0.4s, v0.4s +# CHECK-NEXT: - - - - - - 1.00 - - suqadd v0.8b, v0.8b +# CHECK-NEXT: - - - - - - 1.00 - - suqadd v0.8h, v0.8h +# CHECK-NEXT: - - - - - - 1.00 - - tbl v0.16b, { v0.16b }, v0.16b +# CHECK-NEXT: - - - - - - 1.00 - - tbl v0.16b, { v0.16b, v1.16b }, v0.16b +# CHECK-NEXT: - - - - - - 1.00 - - tbl v0.16b, { v0.16b, v1.16b, v2.16b }, v0.16b +# CHECK-NEXT: - - - - - - 1.00 - - tbl v0.16b, { v0.16b, v1.16b, v2.16b, v3.16b }, v0.16b +# CHECK-NEXT: - - - - - - 1.00 - - tbl v0.8b, { v0.16b }, v0.8b +# CHECK-NEXT: - - - - - - 1.00 - - tbl v0.8b, { v0.16b, v1.16b }, v0.8b +# CHECK-NEXT: - - - - - - 1.00 - - tbl v0.8b, { v0.16b, v1.16b, v2.16b }, v0.8b +# CHECK-NEXT: - - - - - - 1.00 - - tbl v0.8b, { v0.16b, v1.16b, v2.16b, v3.16b }, v0.8b +# CHECK-NEXT: - - - - - - 1.00 - - tbx v0.16b, { v0.16b }, v0.16b +# CHECK-NEXT: - - - - - - 1.00 - - tbx v0.16b, { v0.16b, v1.16b }, v0.16b +# CHECK-NEXT: - - - - - - 1.00 - - tbx v0.16b, { v0.16b, v1.16b, v2.16b }, v0.16b +# CHECK-NEXT: - - - - - - 1.00 - - tbx v0.16b, { v0.16b, v1.16b, v2.16b, v3.16b }, v0.16b +# CHECK-NEXT: - - - - - - 1.00 - - tbx v0.8b, { v0.16b }, v0.8b +# CHECK-NEXT: - - - - - - 1.00 - - tbx v0.8b, { v0.16b, v1.16b }, v0.8b +# CHECK-NEXT: - - - - - - 1.00 - - tbx v0.8b, { v0.16b, v1.16b, v2.16b }, v0.8b +# CHECK-NEXT: - - - - - - 1.00 - - tbx v0.8b, { v0.16b, v1.16b, v2.16b, v3.16b }, v0.8b +# CHECK-NEXT: - - - - - - 1.00 - - trn1 v0.16b, v0.16b, v0.16b +# CHECK-NEXT: - - - - - - 1.00 - - trn1 v0.2d, v0.2d, v0.2d +# CHECK-NEXT: - - - - - - 1.00 - - trn1 v0.2s, v0.2s, v0.2s +# CHECK-NEXT: - - - - - - 1.00 - - trn1 v0.4h, v0.4h, v0.4h +# CHECK-NEXT: - - - - - - 1.00 - - trn1 v0.4s, v0.4s, v0.4s +# CHECK-NEXT: - - - - - - 1.00 - - trn1 v0.8b, v0.8b, v0.8b +# CHECK-NEXT: - - - - - - 1.00 - - trn1 v0.8h, v0.8h, v0.8h +# CHECK-NEXT: - - - - - - 1.00 - - trn2 v0.16b, v0.16b, v0.16b +# CHECK-NEXT: - - - - - - 1.00 - - trn2 v0.2d, v0.2d, v0.2d +# CHECK-NEXT: - - - - - - 1.00 - - trn2 v0.2s, v0.2s, v0.2s +# CHECK-NEXT: - - - - - - 1.00 - - trn2 v0.4h, v0.4h, v0.4h +# CHECK-NEXT: - - - - - - 1.00 - - trn2 v0.4s, v0.4s, v0.4s +# CHECK-NEXT: - - - - - - 1.00 - - trn2 v0.8b, v0.8b, v0.8b +# CHECK-NEXT: - - - - - - 1.00 - - trn2 v0.8h, v0.8h, v0.8h +# CHECK-NEXT: - - - - - - 1.00 - - uaba v0.8b, v0.8b, v0.8b +# CHECK-NEXT: - - - - - - 1.00 - - uabal v0.2d, v0.2s, v0.2s +# CHECK-NEXT: - - - - - - 1.00 - - uabal v0.4s, v0.4h, v0.4h +# CHECK-NEXT: - - - - - - 1.00 - - uabal v0.8h, v0.8b, v0.8b +# CHECK-NEXT: - - - - - - 1.00 - - uabal2 v0.2d, v0.4s, v0.4s +# CHECK-NEXT: - - - - - - 1.00 - - uabal2 v0.4s, v0.8h, v0.8h +# CHECK-NEXT: - - - - - - 1.00 - - uabal2 v0.8h, v0.16b, v0.16b +# CHECK-NEXT: - - - - - - 1.00 - - uabd v0.4h, v0.4h, v0.4h +# CHECK-NEXT: - - - - - - 1.00 - - uabdl v0.2d, v0.2s, v0.2s +# CHECK-NEXT: - - - - - - 1.00 - - uabdl v0.4s, v0.4h, v0.4h +# CHECK-NEXT: - - - - - - 1.00 - - uabdl v0.8h, v0.8b, v0.8b +# CHECK-NEXT: - - - - - - 1.00 - - uabdl2 v0.2d, v0.4s, v0.4s +# CHECK-NEXT: - - - - - - 1.00 - - uabdl2 v0.4s, v0.8h, v0.8h +# CHECK-NEXT: - - - - - - 1.00 - - uabdl2 v0.8h, v0.16b, v0.16b +# CHECK-NEXT: - - - - - - 2.00 - - uadalp v0.1d, v0.2s +# CHECK-NEXT: - - - - - - 2.00 - - uadalp v0.2d, v0.4s +# CHECK-NEXT: - - - - - - 2.00 - - uadalp v0.2s, v0.4h +# CHECK-NEXT: - - - - - - 2.00 - - uadalp v0.4h, v0.8b +# CHECK-NEXT: - - - - - - 2.00 - - uadalp v0.4s, v0.8h +# CHECK-NEXT: - - - - - - 2.00 - - uadalp v0.8h, v0.16b +# CHECK-NEXT: - - - - - - 1.00 - - uaddl v0.2d, v0.2s, v0.2s +# CHECK-NEXT: - - - - - - 1.00 - - uaddl v0.4s, v0.4h, v0.4h +# CHECK-NEXT: - - - - - - 1.00 - - uaddl v0.8h, v0.8b, v0.8b +# CHECK-NEXT: - - - - - - 1.00 - - uaddl2 v0.2d, v0.4s, v0.4s +# CHECK-NEXT: - - - - - - 1.00 - - uaddl2 v0.4s, v0.8h, v0.8h +# CHECK-NEXT: - - - - - - 1.00 - - uaddl2 v0.8h, v0.16b, v0.16b +# CHECK-NEXT: - - - - - - 1.00 - - uaddlp v0.1d, v0.2s +# CHECK-NEXT: - - - - - - 1.00 - - uaddlp v0.2d, v0.4s +# CHECK-NEXT: - - - - - - 1.00 - - uaddlp v0.2s, v0.4h +# CHECK-NEXT: - - - - - - 1.00 - - uaddlp v0.4h, v0.8b +# CHECK-NEXT: - - - - - - 1.00 - - uaddlp v0.4s, v0.8h +# CHECK-NEXT: - - - - - - 1.00 - - uaddlp v0.8h, v0.16b +# CHECK-NEXT: - - - - - - 1.00 - - uaddw v0.2d, v0.2d, v0.2s +# CHECK-NEXT: - - - - - - 1.00 - - uaddw v0.4s, v0.4s, v0.4h +# CHECK-NEXT: - - - - - - 1.00 - - uaddw v0.8h, v0.8h, v0.8b +# CHECK-NEXT: - - - - - - 1.00 - - uaddw2 v0.2d, v0.2d, v0.4s +# CHECK-NEXT: - - - - - - 1.00 - - uaddw2 v0.4s, v0.4s, v0.8h +# CHECK-NEXT: - - - - - - 1.00 - - uaddw2 v0.8h, v0.8h, v0.16b +# CHECK-NEXT: - - - - - - 1.00 - - ucvtf d21, d14 +# CHECK-NEXT: - - - - - - 1.00 - - ucvtf d21, d14, #64 +# CHECK-NEXT: - - - - - - 1.00 - - ucvtf s22, s13 +# CHECK-NEXT: - - - - - - 1.00 - - ucvtf s22, s13, #32 +# CHECK-NEXT: - - - - - - 1.00 - - ucvtf v0.2d, v0.2d +# CHECK-NEXT: - - - - - - 1.00 - - ucvtf v0.2d, v0.2d, #3 +# CHECK-NEXT: - - - - - - 1.00 - - ucvtf v0.2s, v0.2s +# CHECK-NEXT: - - - - - - 1.00 - - ucvtf v0.2s, v0.2s, #3 +# CHECK-NEXT: - - - - - - 1.00 - - ucvtf v0.4h, v0.4h +# CHECK-NEXT: - - - - - - 1.00 - - ucvtf v0.4s, v0.4s +# CHECK-NEXT: - - - - - - 1.00 - - ucvtf v0.4s, v0.4s, #3 +# CHECK-NEXT: - - - - - - 1.00 - - ucvtf v0.8h, v0.8h +# CHECK-NEXT: - - - - - - 1.00 - - uhadd v0.16b, v0.16b, v0.16b +# CHECK-NEXT: - - - - - - 1.00 - - uhadd v0.8h, v0.8h, v0.8h +# CHECK-NEXT: - - - - - - 1.00 - - uhsub v0.4s, v0.4s, v0.4s +# CHECK-NEXT: - - - - - - 1.00 - - umax v0.16b, v0.16b, v0.16b +# CHECK-NEXT: - - - - - - 1.00 - - umax v0.4s, v0.4s, v0.4s +# CHECK-NEXT: - - - - - - 1.00 - - umax v0.8h, v0.8h, v0.8h +# CHECK-NEXT: - - - - - - 1.00 - - umaxp v0.16b, v0.16b, v0.16b +# CHECK-NEXT: - - - - - - 1.00 - - umaxp v0.4s, v0.4s, v0.4s +# CHECK-NEXT: - - - - - - 1.00 - - umaxp v0.8h, v0.8h, v0.8h +# CHECK-NEXT: - - - - - - 1.00 - - umin v0.2s, v0.2s, v0.2s +# CHECK-NEXT: - - - - - - 1.00 - - umin v0.4h, v0.4h, v0.4h +# CHECK-NEXT: - - - - - - 1.00 - - umin v0.8b, v0.8b, v0.8b +# CHECK-NEXT: - - - - - - 1.00 - - uminp v0.2s, v0.2s, v0.2s +# CHECK-NEXT: - - - - - - 1.00 - - uminp v0.4h, v0.4h, v0.4h +# CHECK-NEXT: - - - - - - 1.00 - - uminp v0.8b, v0.8b, v0.8b +# CHECK-NEXT: - - - - - - 1.00 - - umlal v0.2d, v0.2s, v0.2s +# CHECK-NEXT: - - - - - - 1.00 - - umlal v0.4s, v0.4h, v0.4h +# CHECK-NEXT: - - - - - - 1.00 - - umlal v0.8h, v0.8b, v0.8b +# CHECK-NEXT: - - - - - - 1.00 - - umlal2 v0.2d, v0.4s, v0.4s +# CHECK-NEXT: - - - - - - 1.00 - - umlal2 v0.4s, v0.8h, v0.8h +# CHECK-NEXT: - - - - - - 1.00 - - umlal2 v0.8h, v0.16b, v0.16b +# CHECK-NEXT: - - - - - - 1.00 - - umlsl v0.2d, v0.2s, v0.2s +# CHECK-NEXT: - - - - - - 1.00 - - umlsl v0.4s, v0.4h, v0.4h +# CHECK-NEXT: - - - - - - 1.00 - - umlsl v0.8h, v0.8b, v0.8b +# CHECK-NEXT: - - - - - - 1.00 - - umlsl2 v0.2d, v0.4s, v0.4s +# CHECK-NEXT: - - - - - - 1.00 - - umlsl2 v0.4s, v0.8h, v0.8h +# CHECK-NEXT: - - - - - - 1.00 - - umlsl2 v0.8h, v0.16b, v0.16b +# CHECK-NEXT: - - - - - - 1.00 - - umull v0.2d, v0.2s, v0.2s +# CHECK-NEXT: - - - - - - 1.00 - - umull v0.4s, v0.4h, v0.4h +# CHECK-NEXT: - - - - - - 1.00 - - umull v0.8h, v0.8b, v0.8b +# CHECK-NEXT: - - - - - - 1.00 - - umull2 v0.2d, v0.4s, v0.4s +# CHECK-NEXT: - - - - - - 1.00 - - umull2 v0.4s, v0.8h, v0.8h +# CHECK-NEXT: - - - - - - 1.00 - - umull2 v0.8h, v0.16b, v0.16b +# CHECK-NEXT: - - - - - - 1.00 - - uqadd h0, h1, h5 +# CHECK-NEXT: - - - - - - 1.00 - - uqadd v0.8h, v0.8h, v0.8h +# CHECK-NEXT: - - - - - - 1.00 - - uqrshl b11, b20, b30 +# CHECK-NEXT: - - - - - - 1.00 - - uqrshl s23, s20, s16 +# CHECK-NEXT: - - - - - - 1.00 - - uqrshl v0.16b, v0.16b, v0.16b +# CHECK-NEXT: - - - - - - 1.00 - - uqrshl v0.4s, v0.4s, v0.4s +# CHECK-NEXT: - - - - - - 1.00 - - uqrshl v0.4s, v0.4s, v0.4s +# CHECK-NEXT: - - - - - - 1.00 - - uqrshl v0.8h, v0.8h, v0.8h +# CHECK-NEXT: - - - - - - 1.00 - - uqrshrn b10, h12, #5 +# CHECK-NEXT: - - - - - - 1.00 - - uqrshrn h12, s10, #14 +# CHECK-NEXT: - - - - - - 1.00 - - uqrshrn s10, d10, #25 +# CHECK-NEXT: - - - - - - 1.00 - - uqrshrn v0.2s, v0.2d, #3 +# CHECK-NEXT: - - - - - - 1.00 - - uqrshrn v0.4h, v0.4s, #3 +# CHECK-NEXT: - - - - - - 1.00 - - uqrshrn v0.8b, v0.8h, #3 +# CHECK-NEXT: - - - - - - 1.00 - - uqrshrn2 v0.16b, v0.8h, #3 +# CHECK-NEXT: - - - - - - 1.00 - - uqrshrn2 v0.4s, v0.2d, #3 +# CHECK-NEXT: - - - - - - 1.00 - - uqrshrn2 v0.8h, v0.4s, #3 +# CHECK-NEXT: - - - - - - 1.00 - - uqshl b11, b20, b30 +# CHECK-NEXT: - - - - - - 1.00 - - uqshl b18, b15, #6 +# CHECK-NEXT: - - - - - - 1.00 - - uqshl d15, d12, #19 +# CHECK-NEXT: - - - - - - 1.00 - - uqshl h11, h18, #7 +# CHECK-NEXT: - - - - - - 1.00 - - uqshl s14, s19, #18 +# CHECK-NEXT: - - - - - - 1.00 - - uqshl s23, s20, s16 +# CHECK-NEXT: - - - - - - 1.00 - - uqshl v0.16b, v0.16b, #3 +# CHECK-NEXT: - - - - - - 1.00 - - uqshl v0.16b, v0.16b, v0.16b +# CHECK-NEXT: - - - - - - 1.00 - - uqshl v0.2d, v0.2d, #3 +# CHECK-NEXT: - - - - - - 1.00 - - uqshl v0.2d, v0.2d, v0.2d +# CHECK-NEXT: - - - - - - 1.00 - - uqshl v0.2s, v0.2s, #3 +# CHECK-NEXT: - - - - - - 1.00 - - uqshl v0.4h, v0.4h, #3 +# CHECK-NEXT: - - - - - - 1.00 - - uqshl v0.4s, v0.4s, #3 +# CHECK-NEXT: - - - - - - 1.00 - - uqshl v0.4s, v0.4s, v0.4s +# CHECK-NEXT: - - - - - - 1.00 - - uqshl v0.8b, v0.8b, #3 +# CHECK-NEXT: - - - - - - 1.00 - - uqshl v0.8h, v0.8h, #3 +# CHECK-NEXT: - - - - - - 1.00 - - uqshl v0.8h, v0.8h, v0.8h +# CHECK-NEXT: - - - - - - 1.00 - - uqshrn b12, h10, #7 +# CHECK-NEXT: - - - - - - 1.00 - - uqshrn h10, s14, #5 +# CHECK-NEXT: - - - - - - 1.00 - - uqshrn s10, d12, #13 +# CHECK-NEXT: - - - - - - 1.00 - - uqshrn v0.2s, v0.2d, #3 +# CHECK-NEXT: - - - - - - 1.00 - - uqshrn v0.4h, v0.4s, #3 +# CHECK-NEXT: - - - - - - 1.00 - - uqshrn v0.8b, v0.8h, #3 +# CHECK-NEXT: - - - - - - 1.00 - - uqshrn2 v0.16b, v0.8h, #3 +# CHECK-NEXT: - - - - - - 1.00 - - uqshrn2 v0.4s, v0.2d, #3 +# CHECK-NEXT: - - - - - - 1.00 - - uqshrn2 v0.8h, v0.4s, #3 +# CHECK-NEXT: - - - - - - 1.00 - - uqsub d16, d16, d16 +# CHECK-NEXT: - - - - - - 1.00 - - uqsub v0.4h, v0.4h, v0.4h +# CHECK-NEXT: - - - - - - 1.00 - - uqxtn b18, h18 +# CHECK-NEXT: - - - - - - 1.00 - - uqxtn h20, s17 +# CHECK-NEXT: - - - - - - 1.00 - - uqxtn s19, d14 +# CHECK-NEXT: - - - - - - 1.00 - - uqxtn v0.2s, v0.2d +# CHECK-NEXT: - - - - - - 1.00 - - uqxtn v0.4h, v0.4s +# CHECK-NEXT: - - - - - - 1.00 - - uqxtn v0.8b, v0.8h +# CHECK-NEXT: - - - - - - 1.00 - - uqxtn2 v0.16b, v0.8h +# CHECK-NEXT: - - - - - - 1.00 - - uqxtn2 v0.4s, v0.2d +# CHECK-NEXT: - - - - - - 1.00 - - uqxtn2 v0.8h, v0.4s +# CHECK-NEXT: - - - - - - 1.00 - - urecpe v0.2s, v0.2s +# CHECK-NEXT: - - - - - - 1.00 - - urecpe v0.4s, v0.4s +# CHECK-NEXT: - - - - - - 1.00 - - urhadd v0.16b, v0.16b, v0.16b +# CHECK-NEXT: - - - - - - 1.00 - - urhadd v0.4s, v0.4s, v0.4s +# CHECK-NEXT: - - - - - - 1.00 - - urhadd v0.8h, v0.8h, v0.8h +# CHECK-NEXT: - - - - - - 1.00 - - urshl d8, d7, d4 +# CHECK-NEXT: - - - - - - 1.00 - - urshl v0.16b, v0.16b, v0.16b +# CHECK-NEXT: - - - - - - 1.00 - - urshl v0.2d, v0.2d, v0.2d +# CHECK-NEXT: - - - - - - 1.00 - - urshl v0.4s, v0.4s, v0.4s +# CHECK-NEXT: - - - - - - 1.00 - - urshl v0.8h, v0.8h, v0.8h +# CHECK-NEXT: - - - - - - 1.00 - - urshr d20, d23, #31 +# CHECK-NEXT: - - - - - - 1.00 - - urshr v0.16b, v0.16b, #3 +# CHECK-NEXT: - - - - - - 1.00 - - urshr v0.2d, v0.2d, #3 +# CHECK-NEXT: - - - - - - 1.00 - - urshr v0.2s, v0.2s, #3 +# CHECK-NEXT: - - - - - - 1.00 - - urshr v0.4h, v0.4h, #3 +# CHECK-NEXT: - - - - - - 1.00 - - urshr v0.4s, v0.4s, #3 +# CHECK-NEXT: - - - - - - 1.00 - - urshr v0.8b, v0.8b, #3 +# CHECK-NEXT: - - - - - - 1.00 - - urshr v0.8h, v0.8h, #3 +# CHECK-NEXT: - - - - - - - - 9.00 ursqrte v0.2s, v0.2s +# CHECK-NEXT: - - - - - - - - 9.00 ursqrte v0.4s, v0.4s +# CHECK-NEXT: - - - - - - 2.00 - - ursra d18, d10, #13 +# CHECK-NEXT: - - - - - - 2.00 - - ursra v0.16b, v0.16b, #3 +# CHECK-NEXT: - - - - - - 2.00 - - ursra v0.2d, v0.2d, #3 +# CHECK-NEXT: - - - - - - 2.00 - - ursra v0.2s, v0.2s, #3 +# CHECK-NEXT: - - - - - - 2.00 - - ursra v0.4h, v0.4h, #3 +# CHECK-NEXT: - - - - - - 2.00 - - ursra v0.4s, v0.4s, #3 +# CHECK-NEXT: - - - - - - 2.00 - - ursra v0.8b, v0.8b, #3 +# CHECK-NEXT: - - - - - - 2.00 - - ursra v0.8h, v0.8h, #3 +# CHECK-NEXT: - - - - - - 1.00 - - ushl d0, d0, d0 +# CHECK-NEXT: - - - - - - 1.00 - - ushl v0.16b, v0.16b, v0.16b +# CHECK-NEXT: - - - - - - 1.00 - - ushl v0.4s, v0.4s, v0.4s +# CHECK-NEXT: - - - - - - 1.00 - - ushl v0.8h, v0.8h, v0.8h +# CHECK-NEXT: - - - - - - 1.00 - - ushll v0.4s, v0.4h, #3 +# CHECK-NEXT: - - - - - - 1.00 - - ushll2 v0.8h, v0.16b, #3 +# CHECK-NEXT: - - - - - - 1.00 - - ushr d10, d17, #18 +# CHECK-NEXT: - - - - - - 1.00 - - ushr v0.16b, v0.16b, #3 +# CHECK-NEXT: - - - - - - 1.00 - - ushr v0.2d, v0.2d, #3 +# CHECK-NEXT: - - - - - - 1.00 - - ushr v0.2s, v0.2s, #3 +# CHECK-NEXT: - - - - - - 1.00 - - ushr v0.4h, v0.4h, #3 +# CHECK-NEXT: - - - - - - 1.00 - - ushr v0.4s, v0.4s, #3 +# CHECK-NEXT: - - - - - - 1.00 - - ushr v0.8b, v0.8b, #3 +# CHECK-NEXT: - - - - - - 1.00 - - ushr v0.8h, v0.8h, #3 +# CHECK-NEXT: - - - - - - 1.00 - - usqadd b19, b14 +# CHECK-NEXT: - - - - - - 1.00 - - usqadd d18, d22 +# CHECK-NEXT: - - - - - - 1.00 - - usqadd h20, h15 +# CHECK-NEXT: - - - - - - 1.00 - - usqadd s21, s12 +# CHECK-NEXT: - - - - - - 1.00 - - usqadd v0.16b, v0.16b +# CHECK-NEXT: - - - - - - 1.00 - - usqadd v0.2d, v0.2d +# CHECK-NEXT: - - - - - - 1.00 - - usqadd v0.2s, v0.2s +# CHECK-NEXT: - - - - - - 1.00 - - usqadd v0.4h, v0.4h +# CHECK-NEXT: - - - - - - 1.00 - - usqadd v0.4s, v0.4s +# CHECK-NEXT: - - - - - - 1.00 - - usqadd v0.8b, v0.8b +# CHECK-NEXT: - - - - - - 1.00 - - usqadd v0.8h, v0.8h +# CHECK-NEXT: - - - - - - 1.00 - - usra d20, d13, #61 +# CHECK-NEXT: - - - - - - 1.00 - - usra v0.16b, v0.16b, #3 +# CHECK-NEXT: - - - - - - 1.00 - - usra v0.2d, v0.2d, #3 +# CHECK-NEXT: - - - - - - 1.00 - - usra v0.2s, v0.2s, #3 +# CHECK-NEXT: - - - - - - 1.00 - - usra v0.4h, v0.4h, #3 +# CHECK-NEXT: - - - - - - 1.00 - - usra v0.4s, v0.4s, #3 +# CHECK-NEXT: - - - - - - 1.00 - - usra v0.8b, v0.8b, #3 +# CHECK-NEXT: - - - - - - 1.00 - - usra v0.8h, v0.8h, #3 +# CHECK-NEXT: - - - - - - 1.00 - - usubl v0.2d, v0.2s, v0.2s +# CHECK-NEXT: - - - - - - 1.00 - - usubl v0.4s, v0.4h, v0.4h +# CHECK-NEXT: - - - - - - 1.00 - - usubl v0.8h, v0.8b, v0.8b +# CHECK-NEXT: - - - - - - 1.00 - - usubl2 v0.2d, v0.4s, v0.4s +# CHECK-NEXT: - - - - - - 1.00 - - usubl2 v0.4s, v0.8h, v0.8h +# CHECK-NEXT: - - - - - - 1.00 - - usubl2 v0.8h, v0.16b, v0.16b +# CHECK-NEXT: - - - - - - 1.00 - - usubw v0.2d, v0.2d, v0.2s +# CHECK-NEXT: - - - - - - 1.00 - - usubw v0.4s, v0.4s, v0.4h +# CHECK-NEXT: - - - - - - 1.00 - - usubw v0.8h, v0.8h, v0.8b +# CHECK-NEXT: - - - - - - 1.00 - - usubw2 v0.2d, v0.2d, v0.4s +# CHECK-NEXT: - - - - - - 1.00 - - usubw2 v0.4s, v0.4s, v0.8h +# CHECK-NEXT: - - - - - - 1.00 - - usubw2 v0.8h, v0.8h, v0.16b +# CHECK-NEXT: - - - - - - 1.00 - - uzp1 v0.16b, v0.16b, v0.16b +# CHECK-NEXT: - - - - - - 1.00 - - uzp1 v0.2d, v0.2d, v0.2d +# CHECK-NEXT: - - - - - - 1.00 - - uzp1 v0.2s, v0.2s, v0.2s +# CHECK-NEXT: - - - - - - 1.00 - - uzp1 v0.4h, v0.4h, v0.4h +# CHECK-NEXT: - - - - - - 1.00 - - uzp1 v0.4s, v0.4s, v0.4s +# CHECK-NEXT: - - - - - - 1.00 - - uzp1 v0.8b, v0.8b, v0.8b +# CHECK-NEXT: - - - - - - 1.00 - - uzp1 v0.8h, v0.8h, v0.8h +# CHECK-NEXT: - - - - - - 1.00 - - uzp2 v0.16b, v0.16b, v0.16b +# CHECK-NEXT: - - - - - - 1.00 - - uzp2 v0.2d, v0.2d, v0.2d +# CHECK-NEXT: - - - - - - 1.00 - - uzp2 v0.2s, v0.2s, v0.2s +# CHECK-NEXT: - - - - - - 1.00 - - uzp2 v0.4h, v0.4h, v0.4h +# CHECK-NEXT: - - - - - - 1.00 - - uzp2 v0.4s, v0.4s, v0.4s +# CHECK-NEXT: - - - - - - 1.00 - - uzp2 v0.8b, v0.8b, v0.8b +# CHECK-NEXT: - - - - - - 1.00 - - uzp2 v0.8h, v0.8h, v0.8h +# CHECK-NEXT: - - - - - - 1.00 - - xtn v0.2s, v0.2d +# CHECK-NEXT: - - - - - - 1.00 - - xtn v0.4h, v0.4s +# CHECK-NEXT: - - - - - - 1.00 - - xtn v0.8b, v0.8h +# CHECK-NEXT: - - - - - - 1.00 - - xtn2 v0.16b, v0.8h +# CHECK-NEXT: - - - - - - 1.00 - - xtn2 v0.4s, v0.2d +# CHECK-NEXT: - - - - - - 1.00 - - xtn2 v0.8h, v0.4s +# CHECK-NEXT: - - - - - - 1.00 - - zip1 v0.16b, v0.16b, v0.16b +# CHECK-NEXT: - - - - - - 1.00 - - zip1 v0.2d, v0.2d, v0.2d +# CHECK-NEXT: - - - - - - 1.00 - - zip1 v0.2s, v0.2s, v0.2s +# CHECK-NEXT: - - - - - - 1.00 - - zip1 v0.4h, v0.4h, v0.4h +# CHECK-NEXT: - - - - - - 1.00 - - zip1 v0.4s, v0.4s, v0.4s +# CHECK-NEXT: - - - - - - 1.00 - - zip1 v0.8b, v0.8b, v0.8b +# CHECK-NEXT: - - - - - - 1.00 - - zip1 v0.8h, v0.8h, v0.8h +# CHECK-NEXT: - - - - - - 1.00 - - zip2 v0.16b, v0.16b, v0.16b +# CHECK-NEXT: - - - - - - 1.00 - - zip2 v0.2d, v0.2d, v0.2d +# CHECK-NEXT: - - - - - - 1.00 - - zip2 v0.2s, v0.2s, v0.2s +# CHECK-NEXT: - - - - - - 1.00 - - zip2 v0.4h, v0.4h, v0.4h +# CHECK-NEXT: - - - - - - 1.00 - - zip2 v0.4s, v0.4s, v0.4s +# CHECK-NEXT: - - - - - - 1.00 - - zip2 v0.8b, v0.8b, v0.8b +# CHECK-NEXT: - - - - - - 1.00 - - zip2 v0.8h, v0.8h, v0.8h diff --git a/llvm/test/tools/llvm-mca/AArch64/Cortex/A320-sve-instructions.s b/llvm/test/tools/llvm-mca/AArch64/Cortex/A320-sve-instructions.s new file mode 100644 index 000000000000..ad8d57bdb32d --- /dev/null +++ b/llvm/test/tools/llvm-mca/AArch64/Cortex/A320-sve-instructions.s @@ -0,0 +1,10258 @@ +# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py +# RUN: llvm-mca -mtriple=aarch64 -mcpu=cortex-a320 -mattr=+sve2-aes,+sve2-sha3,+sve2-sm4 -instruction-tables < %s | FileCheck %s + +abs z0.b, p0/m, z0.b +abs z0.d, p0/m, z0.d +abs z0.h, p0/m, z0.h +abs z0.s, p0/m, z0.s +abs z31.b, p7/m, z31.b +abs z31.d, p7/m, z31.d +abs z31.h, p7/m, z31.h +abs z31.s, p7/m, z31.s +adclb z0.d, z1.d, z31.d +adclb z0.s, z1.s, z31.s +adclt z0.d, z1.d, z31.d +adclt z0.s, z1.s, z31.s +add z0.b, p0/m, z0.b, z0.b +add z0.b, z0.b, #0 +add z0.b, z0.b, z0.b +add z0.d, p0/m, z0.d, z0.d +add z0.d, z0.d, #0 +add z0.d, z0.d, #0, lsl #8 +add z0.d, z0.d, z0.d +add z0.h, p0/m, z0.h, z0.h +add z0.h, z0.h, #0 +add z0.h, z0.h, #0, lsl #8 +add z0.h, z0.h, z0.h +add z0.s, p0/m, z0.s, z0.s +add z0.s, z0.s, #0 +add z0.s, z0.s, #0, lsl #8 +add z0.s, z0.s, z0.s +add z0.s, z1.s, z2.s +add z21.b, p5/m, z21.b, z10.b +add z21.b, z10.b, z21.b +add z21.d, p5/m, z21.d, z10.d +add z21.d, z10.d, z21.d +add z21.h, p5/m, z21.h, z10.h +add z21.h, z10.h, z21.h +add z21.s, p5/m, z21.s, z10.s +add z21.s, z10.s, z21.s +add z23.b, p3/m, z23.b, z13.b +add z23.b, z13.b, z8.b +add z23.d, p3/m, z23.d, z13.d +add z23.d, z13.d, z8.d +add z23.h, p3/m, z23.h, z13.h +add z23.h, z13.h, z8.h +add z23.s, p3/m, z23.s, z13.s +add z23.s, z13.s, z8.s +add z31.b, p7/m, z31.b, z31.b +add z31.b, z31.b, #255 +add z31.b, z31.b, z31.b +add z31.d, p7/m, z31.d, z31.d +add z31.d, z31.d, #65280 +add z31.d, z31.d, z31.d +add z31.h, p7/m, z31.h, z31.h +add z31.h, z31.h, #65280 +add z31.h, z31.h, z31.h +add z31.s, p7/m, z31.s, z31.s +add z31.s, z31.s, #65280 +add z31.s, z31.s, z31.s +addhnb z0.b, z1.h, z31.h +addhnb z0.h, z1.s, z31.s +addhnb z0.s, z1.d, z31.d +addhnt z0.b, z1.h, z31.h +addhnt z0.h, z1.s, z31.s +addhnt z0.s, z1.d, z31.d +addp z0.b, p0/m, z0.b, z1.b +addp z0.h, p0/m, z0.h, z1.h +addp z29.s, p7/m, z29.s, z30.s +addp z31.d, p7/m, z31.d, z30.d +addpl sp, sp, #31 +addpl x0, x0, #-32 +addpl x21, x21, #0 +addpl x23, x8, #-1 +addvl sp, sp, #31 +addvl x0, x0, #-32 +addvl x21, x21, #0 +addvl x23, x8, #-1 +adr z0.d, [z0.d, z0.d, lsl #1] +adr z0.d, [z0.d, z0.d, lsl #2] +adr z0.d, [z0.d, z0.d, lsl #3] +adr z0.d, [z0.d, z0.d, sxtw #1] +adr z0.d, [z0.d, z0.d, sxtw #2] +adr z0.d, [z0.d, z0.d, sxtw #3] +adr z0.d, [z0.d, z0.d, sxtw] +adr z0.d, [z0.d, z0.d, uxtw #1] +adr z0.d, [z0.d, z0.d, uxtw #2] +adr z0.d, [z0.d, z0.d, uxtw #3] +adr z0.d, [z0.d, z0.d, uxtw] +adr z0.d, [z0.d, z0.d] +adr z0.s, [z0.s, z0.s, lsl #1] +adr z0.s, [z0.s, z0.s, lsl #2] +adr z0.s, [z0.s, z0.s, lsl #3] +adr z0.s, [z0.s, z0.s] +aesd z0.b, z0.b, z31.b +aese z0.b, z0.b, z31.b +aesimc z0.b, z0.b +aesimc z31.b, z31.b +aesmc z0.b, z0.b +aesmc z31.b, z31.b +and p0.b, p0/z, p0.b, p1.b +and z0.d, z0.d, #0x6 +and z0.d, z0.d, #0xfffffffffffffff9 +and z0.d, z0.d, z0.d +and z0.s, z0.s, #0x6 +and z0.s, z0.s, #0xfffffff9 +and z23.d, z13.d, z8.d +and z23.h, z23.h, #0x6 +and z23.h, z23.h, #0xfff9 +and z31.b, p7/m, z31.b, z31.b +and z31.d, p7/m, z31.d, z31.d +and z31.h, p7/m, z31.h, z31.h +and z31.s, p7/m, z31.s, z31.s +and z5.b, z5.b, #0x6 +and z5.b, z5.b, #0xf9 +ands p0.b, p0/z, p0.b, p1.b +andv b0, p7, z31.b +andv d0, p7, z31.d +andv h0, p7, z31.h +andv s0, p7, z31.s +asr z0.b, p0/m, z0.b, #1 +asr z0.b, p0/m, z0.b, z0.b +asr z0.b, p0/m, z0.b, z1.d +asr z0.b, z0.b, #1 +asr z0.b, z1.b, z2.d +asr z0.d, p0/m, z0.d, #1 +asr z0.d, p0/m, z0.d, z0.d +asr z0.d, z0.d, #1 +asr z0.h, p0/m, z0.h, #1 +asr z0.h, p0/m, z0.h, z0.h +asr z0.h, p0/m, z0.h, z1.d +asr z0.h, z0.h, #1 +asr z0.h, z1.h, z2.d +asr z0.s, p0/m, z0.s, #1 +asr z0.s, p0/m, z0.s, z0.s +asr z0.s, p0/m, z0.s, z1.d +asr z0.s, z0.s, #1 +asr z0.s, z1.s, z2.d +asr z31.b, p0/m, z31.b, #8 +asr z31.b, z31.b, #8 +asr z31.d, p0/m, z31.d, #64 +asr z31.d, z31.d, #64 +asr z31.h, p0/m, z31.h, #16 +asr z31.h, z31.h, #16 +asr z31.s, p0/m, z31.s, #32 +asr z31.s, z31.s, #32 +asrd z0.b, p0/m, z0.b, #1 +asrd z0.d, p0/m, z0.d, #1 +asrd z0.h, p0/m, z0.h, #1 +asrd z0.s, p0/m, z0.s, #1 +asrd z31.b, p0/m, z31.b, #8 +asrd z31.d, p0/m, z31.d, #64 +asrd z31.h, p0/m, z31.h, #16 +asrd z31.s, p0/m, z31.s, #32 +asrr z0.b, p0/m, z0.b, z0.b +asrr z0.d, p0/m, z0.d, z0.d +asrr z0.h, p0/m, z0.h, z0.h +asrr z0.s, p0/m, z0.s, z0.s +bcax z29.d, z29.d, z30.d, z31.d +bdep z0.b, z1.b, z31.b +bdep z0.d, z1.d, z31.d +bdep z0.h, z1.h, z31.h +bdep z0.s, z1.s, z31.s +bext z0.b, z1.b, z31.b +bext z0.d, z1.d, z31.d +bext z0.h, z1.h, z31.h +bext z0.s, z1.s, z31.s +bfcvt z0.h, p0/m, z1.s +bfcvtnt z0.h, p0/m, z1.s +bfdot z0.s, z1.h, z2.h +bfdot z0.s, z1.h, z2.h[0] +bfdot z0.s, z1.h, z2.h[3] +bfmlalb z0.s, z1.h, z2.h +bfmlalb z0.s, z1.h, z2.h[0] +bfmlalb z0.s, z1.h, z2.h[7] +bfmlalb z10.s, z21.h, z14.h +bfmlalb z21.s, z14.h, z3.h[2] +bfmlalt z0.s, z1.h, z2.h +bfmlalt z0.s, z1.h, z2.h[0] +bfmlalt z0.s, z1.h, z2.h[7] +bfmlalt z0.s, z1.h, z7.h[7] +bfmlalt z14.s, z10.h, z21.h +bfmmla z0.s, z1.h, z2.h +bgrp z0.b, z1.b, z31.b +bgrp z0.d, z1.d, z31.d +bgrp z0.h, z1.h, z31.h +bgrp z0.s, z1.s, z31.s +bic p0.b, p0/z, p0.b, p0.b +bic p15.b, p15/z, p15.b, p15.b +bic z0.d, z0.d, z0.d +bic z23.d, z13.d, z8.d +bic z31.b, p7/m, z31.b, z31.b +bic z31.d, p7/m, z31.d, z31.d +bic z31.h, p7/m, z31.h, z31.h +bic z31.s, p7/m, z31.s, z31.s +bics p0.b, p0/z, p0.b, p0.b +bics p15.b, p15/z, p15.b, p15.b +brka p0.b, p15/m, p15.b +brka p0.b, p15/z, p15.b +brkas p0.b, p15/z, p15.b +brkb p0.b, p15/m, p15.b +brkb p0.b, p15/z, p15.b +brkbs p0.b, p15/z, p15.b +brkn p0.b, p15/z, p1.b, p0.b +brkn p15.b, p15/z, p15.b, p15.b +brkns p0.b, p15/z, p1.b, p0.b +brkns p15.b, p15/z, p15.b, p15.b +brkpa p0.b, p15/z, p1.b, p2.b +brkpa p15.b, p15/z, p15.b, p15.b +brkpas p0.b, p15/z, p1.b, p2.b +brkpas p15.b, p15/z, p15.b, p15.b +brkpb p0.b, p15/z, p1.b, p2.b +brkpb p15.b, p15/z, p15.b, p15.b +brkpbs p0.b, p15/z, p1.b, p2.b +brkpbs p15.b, p15/z, p15.b, p15.b +bsl z0.d, z0.d, z1.d, z2.d +bsl1n z0.d, z0.d, z1.d, z2.d +bsl2n z0.d, z0.d, z1.d, z2.d +cadd z0.b, z0.b, z0.b, #90 +cadd z0.d, z0.d, z0.d, #90 +cadd z0.h, z0.h, z0.h, #90 +cadd z0.s, z0.s, z0.s, #90 +cadd z31.b, z31.b, z31.b, #270 +cadd z31.d, z31.d, z31.d, #270 +cadd z31.h, z31.h, z31.h, #270 +cadd z31.s, z31.s, z31.s, #270 +cdot z0.d, z1.h, z15.h[1], #0 +cdot z0.d, z1.h, z31.h, #0 +cdot z0.d, z1.h, z31.h, #180 +cdot z0.d, z1.h, z31.h, #270 +cdot z0.d, z1.h, z31.h, #90 +cdot z0.s, z1.b, z31.b, #0 +cdot z0.s, z1.b, z7.b[3], #0 +cdot z29.d, z30.h, z0.h[0], #180 +cdot z31.d, z30.h, z7.h[1], #270 +cdot z5.d, z6.h, z3.h[0], #90 +clasta b0, p7, b0, z31.b +clasta d0, p7, d0, z31.d +clasta h0, p7, h0, z31.h +clasta s0, p7, s0, z31.s +clasta w0, p7, w0, z31.b +clasta w0, p7, w0, z31.h +clasta w0, p7, w0, z31.s +clasta x0, p7, x0, z31.d +clasta z0.b, p7, z0.b, z31.b +clasta z0.d, p7, z0.d, z31.d +clasta z0.h, p7, z0.h, z31.h +clasta z0.s, p7, z0.s, z31.s +clastb b0, p7, b0, z31.b +clastb d0, p7, d0, z31.d +clastb h0, p7, h0, z31.h +clastb s0, p7, s0, z31.s +clastb w0, p7, w0, z31.b +clastb w0, p7, w0, z31.h +clastb w0, p7, w0, z31.s +clastb x0, p7, x0, z31.d +clastb z0.b, p7, z0.b, z31.b +clastb z0.d, p7, z0.d, z31.d +clastb z0.h, p7, z0.h, z31.h +clastb z0.s, p7, z0.s, z31.s +cls z31.b, p7/m, z31.b +cls z31.d, p7/m, z31.d +cls z31.h, p7/m, z31.h +cls z31.s, p7/m, z31.s +clz z31.b, p7/m, z31.b +clz z31.d, p7/m, z31.d +clz z31.h, p7/m, z31.h +clz z31.s, p7/m, z31.s +cmla z0.b, z1.b, z2.b, #0 +cmla z0.d, z1.d, z2.d, #0 +cmla z0.h, z1.h, z2.h, #0 +cmla z0.h, z1.h, z2.h[0], #0 +cmla z0.s, z1.s, z2.s, #0 +cmla z0.s, z1.s, z2.s[0], #0 +cmla z15.b, z16.b, z17.b, #270 +cmla z15.d, z16.d, z17.d, #270 +cmla z15.h, z16.h, z17.h, #270 +cmla z15.s, z16.s, z17.s, #270 +cmla z29.b, z30.b, z31.b, #90 +cmla z29.d, z30.d, z31.d, #90 +cmla z29.h, z30.h, z31.h, #90 +cmla z29.s, z30.s, z31.s, #90 +cmla z31.b, z31.b, z31.b, #180 +cmla z31.d, z31.d, z31.d, #180 +cmla z31.h, z30.h, z7.h[0], #180 +cmla z31.h, z31.h, z31.h, #180 +cmla z31.s, z30.s, z7.s[0], #180 +cmla z31.s, z31.s, z31.s, #180 +cmpeq p0.b, p0/z, z0.b, #-16 +cmpeq p0.b, p0/z, z0.b, #15 +cmpeq p0.b, p0/z, z0.b, z0.b +cmpeq p0.b, p0/z, z0.b, z0.d +cmpeq p0.d, p0/z, z0.d, #-16 +cmpeq p0.d, p0/z, z0.d, #15 +cmpeq p0.d, p0/z, z0.d, z0.d +cmpeq p0.h, p0/z, z0.h, #-16 +cmpeq p0.h, p0/z, z0.h, #15 +cmpeq p0.h, p0/z, z0.h, z0.d +cmpeq p0.h, p0/z, z0.h, z0.h +cmpeq p0.s, p0/z, z0.s, #-16 +cmpeq p0.s, p0/z, z0.s, #15 +cmpeq p0.s, p0/z, z0.s, z0.d +cmpeq p0.s, p0/z, z0.s, z0.s +cmpge p0.b, p0/z, z0.b, #-16 +cmpge p0.b, p0/z, z0.b, #15 +cmpge p0.b, p0/z, z0.b, z0.b +cmpge p0.b, p0/z, z0.b, z0.d +cmpge p0.b, p0/z, z1.b, z0.b +cmpge p0.d, p0/z, z0.d, #-16 +cmpge p0.d, p0/z, z0.d, #15 +cmpge p0.d, p0/z, z0.d, z0.d +cmpge p0.d, p0/z, z1.d, z0.d +cmpge p0.h, p0/z, z0.h, #-16 +cmpge p0.h, p0/z, z0.h, #15 +cmpge p0.h, p0/z, z0.h, z0.d +cmpge p0.h, p0/z, z0.h, z0.h +cmpge p0.h, p0/z, z1.h, z0.h +cmpge p0.s, p0/z, z0.s, #-16 +cmpge p0.s, p0/z, z0.s, #15 +cmpge p0.s, p0/z, z0.s, z0.d +cmpge p0.s, p0/z, z0.s, z0.s +cmpge p0.s, p0/z, z1.s, z0.s +cmpgt p0.b, p0/z, z0.b, #-16 +cmpgt p0.b, p0/z, z0.b, #15 +cmpgt p0.b, p0/z, z0.b, z0.b +cmpgt p0.b, p0/z, z0.b, z0.d +cmpgt p0.b, p0/z, z1.b, z0.b +cmpgt p0.d, p0/z, z0.d, #-16 +cmpgt p0.d, p0/z, z0.d, #15 +cmpgt p0.d, p0/z, z0.d, z0.d +cmpgt p0.d, p0/z, z1.d, z0.d +cmpgt p0.h, p0/z, z0.h, #-16 +cmpgt p0.h, p0/z, z0.h, #15 +cmpgt p0.h, p0/z, z0.h, z0.d +cmpgt p0.h, p0/z, z0.h, z0.h +cmpgt p0.h, p0/z, z1.h, z0.h +cmpgt p0.s, p0/z, z0.s, #-16 +cmpgt p0.s, p0/z, z0.s, #15 +cmpgt p0.s, p0/z, z0.s, z0.d +cmpgt p0.s, p0/z, z0.s, z0.s +cmpgt p0.s, p0/z, z1.s, z0.s +cmphi p0.b, p0/z, z0.b, #0 +cmphi p0.b, p0/z, z0.b, #127 +cmphi p0.b, p0/z, z0.b, z0.b +cmphi p0.b, p0/z, z0.b, z0.d +cmphi p0.b, p0/z, z1.b, z0.b +cmphi p0.d, p0/z, z0.d, #0 +cmphi p0.d, p0/z, z0.d, #127 +cmphi p0.d, p0/z, z0.d, z0.d +cmphi p0.d, p0/z, z1.d, z0.d +cmphi p0.h, p0/z, z0.h, #0 +cmphi p0.h, p0/z, z0.h, #127 +cmphi p0.h, p0/z, z0.h, z0.d +cmphi p0.h, p0/z, z0.h, z0.h +cmphi p0.h, p0/z, z1.h, z0.h +cmphi p0.s, p0/z, z0.s, #0 +cmphi p0.s, p0/z, z0.s, #127 +cmphi p0.s, p0/z, z0.s, z0.d +cmphi p0.s, p0/z, z0.s, z0.s +cmphi p0.s, p0/z, z1.s, z0.s +cmphs p0.b, p0/z, z0.b, #0 +cmphs p0.b, p0/z, z0.b, #127 +cmphs p0.b, p0/z, z0.b, z0.b +cmphs p0.b, p0/z, z0.b, z0.d +cmphs p0.b, p0/z, z1.b, z0.b +cmphs p0.d, p0/z, z0.d, #0 +cmphs p0.d, p0/z, z0.d, #127 +cmphs p0.d, p0/z, z0.d, z0.d +cmphs p0.d, p0/z, z1.d, z0.d +cmphs p0.h, p0/z, z0.h, #0 +cmphs p0.h, p0/z, z0.h, #127 +cmphs p0.h, p0/z, z0.h, z0.d +cmphs p0.h, p0/z, z0.h, z0.h +cmphs p0.h, p0/z, z1.h, z0.h +cmphs p0.s, p0/z, z0.s, #0 +cmphs p0.s, p0/z, z0.s, #127 +cmphs p0.s, p0/z, z0.s, z0.d +cmphs p0.s, p0/z, z0.s, z0.s +cmphs p0.s, p0/z, z1.s, z0.s +cmple p0.b, p0/z, z0.b, #-16 +cmple p0.b, p0/z, z0.b, #15 +cmple p0.b, p0/z, z0.b, z0.d +cmple p0.d, p0/z, z0.d, #-16 +cmple p0.d, p0/z, z0.d, #15 +cmple p0.h, p0/z, z0.h, #-16 +cmple p0.h, p0/z, z0.h, #15 +cmple p0.h, p0/z, z0.h, z0.d +cmple p0.s, p0/z, z0.s, #-16 +cmple p0.s, p0/z, z0.s, #15 +cmple p0.s, p0/z, z0.s, z0.d +cmplo p0.b, p0/z, z0.b, #0 +cmplo p0.b, p0/z, z0.b, #127 +cmplo p0.b, p0/z, z0.b, z0.d +cmplo p0.d, p0/z, z0.d, #0 +cmplo p0.d, p0/z, z0.d, #127 +cmplo p0.h, p0/z, z0.h, #0 +cmplo p0.h, p0/z, z0.h, #127 +cmplo p0.h, p0/z, z0.h, z0.d +cmplo p0.s, p0/z, z0.s, #0 +cmplo p0.s, p0/z, z0.s, #127 +cmplo p0.s, p0/z, z0.s, z0.d +cmpls p0.b, p0/z, z0.b, #0 +cmpls p0.b, p0/z, z0.b, #127 +cmpls p0.b, p0/z, z0.b, z0.d +cmpls p0.d, p0/z, z0.d, #0 +cmpls p0.d, p0/z, z0.d, #127 +cmpls p0.h, p0/z, z0.h, #0 +cmpls p0.h, p0/z, z0.h, #127 +cmpls p0.h, p0/z, z0.h, z0.d +cmpls p0.s, p0/z, z0.s, #0 +cmpls p0.s, p0/z, z0.s, #127 +cmpls p0.s, p0/z, z0.s, z0.d +cmplt p0.b, p0/z, z0.b, #-16 +cmplt p0.b, p0/z, z0.b, #15 +cmplt p0.b, p0/z, z0.b, z0.d +cmplt p0.d, p0/z, z0.d, #-16 +cmplt p0.d, p0/z, z0.d, #15 +cmplt p0.h, p0/z, z0.h, #-16 +cmplt p0.h, p0/z, z0.h, #15 +cmplt p0.h, p0/z, z0.h, z0.d +cmplt p0.s, p0/z, z0.s, #-16 +cmplt p0.s, p0/z, z0.s, #15 +cmplt p0.s, p0/z, z0.s, z0.d +cmpne p0.b, p0/z, z0.b, #-16 +cmpne p0.b, p0/z, z0.b, #15 +cmpne p0.b, p0/z, z0.b, z0.b +cmpne p0.b, p0/z, z0.b, z0.d +cmpne p0.d, p0/z, z0.d, #-16 +cmpne p0.d, p0/z, z0.d, #15 +cmpne p0.d, p0/z, z0.d, z0.d +cmpne p0.h, p0/z, z0.h, #-16 +cmpne p0.h, p0/z, z0.h, #15 +cmpne p0.h, p0/z, z0.h, z0.d +cmpne p0.h, p0/z, z0.h, z0.h +cmpne p0.s, p0/z, z0.s, #-16 +cmpne p0.s, p0/z, z0.s, #15 +cmpne p0.s, p0/z, z0.s, z0.d +cmpne p0.s, p0/z, z0.s, z0.s +cnot z31.b, p7/m, z31.b +cnot z31.d, p7/m, z31.d +cnot z31.h, p7/m, z31.h +cnot z31.s, p7/m, z31.s +cnt z31.b, p7/m, z31.b +cnt z31.d, p7/m, z31.d +cnt z31.h, p7/m, z31.h +cnt z31.s, p7/m, z31.s +cntb x0 +cntb x0, #28 +cntb x0, all, mul #16 +cntb x0, pow2 +cntd x0 +cntd x0, #28 +cntd x0, all, mul #16 +cntd x0, pow2 +cnth x0 +cnth x0, #28 +cnth x0, all, mul #16 +cnth x0, pow2 +cntp x0, p15, p0.b +cntp x0, p15, p0.d +cntp x0, p15, p0.h +cntp x0, p15, p0.s +cntw x0 +cntw x0, #28 +cntw x0, all, mul #16 +cntw x0, pow2 +compact z31.d, p7, z31.d +compact z31.s, p7, z31.s +ctermeq w30, wzr +ctermeq wzr, w30 +ctermeq x30, xzr +ctermeq xzr, x30 +ctermne w30, wzr +ctermne wzr, w30 +ctermne x30, xzr +ctermne xzr, x30 +decb x0 +decb x0, #14 +decb x0, all, mul #16 +decb x0, pow2 +decb x0, vl1 +decd x0 +decd x0, #14 +decd x0, all, mul #16 +decd x0, pow2 +decd x0, vl1 +dech x0 +dech x0, #14 +dech x0, all, mul #16 +dech x0, pow2 +dech x0, vl1 +decp x0, p0.b +decp x0, p0.d +decp x0, p0.h +decp x0, p0.s +decp xzr, p15.b +decp xzr, p15.d +decp xzr, p15.h +decp xzr, p15.s +decp z31.d, p15.d +decp z31.h, p15.h +decp z31.s, p15.s +decw x0 +decw x0, #14 +decw x0, all, mul #16 +decw x0, pow2 +decw x0, vl1 +dupm z0.d, #0xfffffffffffffff9 +dupm z0.s, #0xfffffff9 +dupm z23.h, #0xfff9 +dupm z5.b, #0xf9 +eor p0.b, p0/z, p0.b, p1.b +eor z0.d, z0.d, #0x6 +eor z0.d, z0.d, #0xfffffffffffffff9 +eor z0.d, z0.d, z0.d +eor z0.s, z0.s, #0x6 +eor z0.s, z0.s, #0xfffffff9 +eor z23.d, z13.d, z8.d +eor z23.h, z23.h, #0x6 +eor z23.h, z23.h, #0xfff9 +eor z31.b, p7/m, z31.b, z31.b +eor z31.d, p7/m, z31.d, z31.d +eor z31.h, p7/m, z31.h, z31.h +eor z31.s, p7/m, z31.s, z31.s +eor z5.b, z5.b, #0x6 +eor z5.b, z5.b, #0xf9 +eor3 z29.d, z29.d, z30.d, z31.d +eorbt z0.b, z1.b, z31.b +eorbt z0.d, z1.d, z31.d +eorbt z0.h, z1.h, z31.h +eorbt z0.s, z1.s, z31.s +eors p0.b, p0/z, p0.b, p1.b +eortb z0.b, z1.b, z31.b +eortb z0.d, z1.d, z31.d +eortb z0.h, z1.h, z31.h +eortb z0.s, z1.s, z31.s +eorv b0, p7, z31.b +eorv d0, p7, z31.d +eorv h0, p7, z31.h +eorv s0, p7, z31.s +ext z0.b, { z1.b, z2.b }, #0 +ext z31.b, z31.b, z0.b, #0 +ext z31.b, z31.b, z0.b, #255 +ext z31.b, { z30.b, z31.b }, #255 +fabd z0.d, p7/m, z0.d, z31.d +fabd z0.h, p7/m, z0.h, z31.h +fabd z0.s, p7/m, z0.s, z31.s +fabs z31.d, p7/m, z31.d +fabs z31.h, p7/m, z31.h +fabs z31.s, p7/m, z31.s +facge p0.d, p0/z, z0.d, z1.d +facge p0.d, p0/z, z1.d, z0.d +facge p0.h, p0/z, z0.h, z1.h +facge p0.h, p0/z, z1.h, z0.h +facge p0.s, p0/z, z0.s, z1.s +facge p0.s, p0/z, z1.s, z0.s +facgt p0.d, p0/z, z0.d, z1.d +facgt p0.d, p0/z, z1.d, z0.d +facgt p0.h, p0/z, z0.h, z1.h +facgt p0.h, p0/z, z1.h, z0.h +facgt p0.s, p0/z, z0.s, z1.s +facgt p0.s, p0/z, z1.s, z0.s +fadd z0.d, p0/m, z0.d, #0.5 +fadd z0.d, p7/m, z0.d, z31.d +fadd z0.d, z1.d, z31.d +fadd z0.h, p0/m, z0.h, #0.5 +fadd z0.h, p7/m, z0.h, z31.h +fadd z0.h, z1.h, z31.h +fadd z0.s, p0/m, z0.s, #0.5 +fadd z0.s, p7/m, z0.s, z31.s +fadd z0.s, z1.s, z31.s +fadd z31.d, p7/m, z31.d, #1.0 +fadd z31.h, p7/m, z31.h, #1.0 +fadd z31.s, p7/m, z31.s, #1.0 +fadda d0, p7, d0, z31.d +fadda h0, p7, h0, z31.h +fadda s0, p7, s0, z31.s +faddp z0.h, p0/m, z0.h, z1.h +faddp z29.s, p3/m, z29.s, z30.s +faddp z31.d, p7/m, z31.d, z30.d +faddv d0, p7, z31.d +faddv h0, p7, z31.h +faddv s0, p7, z31.s +fcadd z0.d, p0/m, z0.d, z0.d, #90 +fcadd z0.h, p0/m, z0.h, z0.h, #90 +fcadd z0.s, p0/m, z0.s, z0.s, #90 +fcadd z31.d, p7/m, z31.d, z31.d, #270 +fcadd z31.h, p7/m, z31.h, z31.h, #270 +fcadd z31.s, p7/m, z31.s, z31.s, #270 +fcmeq p0.d, p0/z, z0.d, #0.0 +fcmeq p0.d, p0/z, z0.d, z1.d +fcmeq p0.h, p0/z, z0.h, #0.0 +fcmeq p0.h, p0/z, z0.h, z1.h +fcmeq p0.s, p0/z, z0.s, #0.0 +fcmeq p0.s, p0/z, z0.s, z1.s +fcmge p0.d, p0/z, z0.d, #0.0 +fcmge p0.d, p0/z, z0.d, z1.d +fcmge p0.d, p0/z, z1.d, z0.d +fcmge p0.h, p0/z, z0.h, #0.0 +fcmge p0.h, p0/z, z0.h, z1.h +fcmge p0.h, p0/z, z1.h, z0.h +fcmge p0.s, p0/z, z0.s, #0.0 +fcmge p0.s, p0/z, z0.s, z1.s +fcmge p0.s, p0/z, z1.s, z0.s +fcmgt p0.d, p0/z, z0.d, #0.0 +fcmgt p0.d, p0/z, z0.d, z1.d +fcmgt p0.d, p0/z, z1.d, z0.d +fcmgt p0.h, p0/z, z0.h, #0.0 +fcmgt p0.h, p0/z, z0.h, z1.h +fcmgt p0.h, p0/z, z1.h, z0.h +fcmgt p0.s, p0/z, z0.s, #0.0 +fcmgt p0.s, p0/z, z0.s, z1.s +fcmgt p0.s, p0/z, z1.s, z0.s +fcmla z0.d, p0/m, z0.d, z0.d, #0 +fcmla z0.d, p0/m, z1.d, z2.d, #90 +fcmla z0.h, p0/m, z0.h, z0.h, #0 +fcmla z0.h, p0/m, z1.h, z2.h, #90 +fcmla z0.h, z0.h, z0.h[0], #0 +fcmla z0.s, p0/m, z0.s, z0.s, #0 +fcmla z0.s, p0/m, z1.s, z2.s, #90 +fcmla z21.s, z10.s, z5.s[1], #90 +fcmla z23.s, z13.s, z8.s[0], #270 +fcmla z29.d, p7/m, z30.d, z31.d, #180 +fcmla z29.h, p7/m, z30.h, z31.h, #180 +fcmla z29.s, p7/m, z30.s, z31.s, #180 +fcmla z31.d, p7/m, z31.d, z31.d, #270 +fcmla z31.h, p7/m, z31.h, z31.h, #270 +fcmla z31.h, z31.h, z7.h[3], #270 +fcmla z31.s, p7/m, z31.s, z31.s, #270 +fcmle p0.d, p0/z, z0.d, #0.0 +fcmle p0.h, p0/z, z0.h, #0.0 +fcmle p0.s, p0/z, z0.s, #0.0 +fcmlt p0.d, p0/z, z0.d, #0.0 +fcmlt p0.h, p0/z, z0.h, #0.0 +fcmlt p0.s, p0/z, z0.s, #0.0 +fcmne p0.d, p0/z, z0.d, #0.0 +fcmne p0.d, p0/z, z0.d, z1.d +fcmne p0.h, p0/z, z0.h, #0.0 +fcmne p0.h, p0/z, z0.h, z1.h +fcmne p0.s, p0/z, z0.s, #0.0 +fcmne p0.s, p0/z, z0.s, z1.s +fcmuo p0.d, p0/z, z0.d, z1.d +fcmuo p0.h, p0/z, z0.h, z1.h +fcmuo p0.s, p0/z, z0.s, z1.s +fcvt z0.d, p0/m, z0.h +fcvt z0.d, p0/m, z0.s +fcvt z0.h, p0/m, z0.d +fcvt z0.h, p0/m, z0.s +fcvt z0.s, p0/m, z0.d +fcvt z0.s, p0/m, z0.h +fcvtlt z0.s, p0/m, z1.h +fcvtlt z30.d, p7/m, z31.s +fcvtnt z0.h, p0/m, z1.s +fcvtnt z30.s, p7/m, z31.d +fcvtx z0.s, p0/m, z0.d +fcvtx z30.s, p7/m, z31.d +fcvtxnt z0.s, p0/m, z1.d +fcvtxnt z30.s, p7/m, z31.d +fcvtzs z0.d, p0/m, z0.d +fcvtzs z0.d, p0/m, z0.h +fcvtzs z0.d, p0/m, z0.s +fcvtzs z0.h, p0/m, z0.h +fcvtzs z0.s, p0/m, z0.d +fcvtzs z0.s, p0/m, z0.h +fcvtzs z0.s, p0/m, z0.s +fcvtzu z0.d, p0/m, z0.d +fcvtzu z0.d, p0/m, z0.h +fcvtzu z0.d, p0/m, z0.s +fcvtzu z0.h, p0/m, z0.h +fcvtzu z0.s, p0/m, z0.d +fcvtzu z0.s, p0/m, z0.h +fcvtzu z0.s, p0/m, z0.s +fdiv z0.d, p7/m, z0.d, z31.d +fdiv z0.h, p7/m, z0.h, z31.h +fdiv z0.s, p7/m, z0.s, z31.s +fdivr z0.d, p7/m, z0.d, z31.d +fdivr z0.h, p7/m, z0.h, z31.h +fdivr z0.s, p7/m, z0.s, z31.s +fexpa z0.d, z31.d +fexpa z0.h, z31.h +fexpa z0.s, z31.s +flogb z31.d, p7/m, z31.d +flogb z31.h, p7/m, z31.h +flogb z31.s, p7/m, z31.s +fmad z0.d, p7/m, z1.d, z31.d +fmad z0.h, p7/m, z1.h, z31.h +fmad z0.s, p7/m, z1.s, z31.s +fmax z0.d, p0/m, z0.d, #0.0 +fmax z0.d, p7/m, z0.d, z31.d +fmax z0.h, p0/m, z0.h, #0.0 +fmax z0.h, p7/m, z0.h, z31.h +fmax z0.s, p0/m, z0.s, #0.0 +fmax z0.s, p7/m, z0.s, z31.s +fmax z31.d, p7/m, z31.d, #1.0 +fmax z31.h, p7/m, z31.h, #1.0 +fmax z31.s, p7/m, z31.s, #1.0 +fmaxnm z0.d, p0/m, z0.d, #0.0 +fmaxnm z0.d, p7/m, z0.d, z31.d +fmaxnm z0.h, p0/m, z0.h, #0.0 +fmaxnm z0.h, p7/m, z0.h, z31.h +fmaxnm z0.s, p0/m, z0.s, #0.0 +fmaxnm z0.s, p7/m, z0.s, z31.s +fmaxnm z31.d, p7/m, z31.d, #1.0 +fmaxnm z31.h, p7/m, z31.h, #1.0 +fmaxnm z31.s, p7/m, z31.s, #1.0 +fmaxnmp z0.h, p0/m, z0.h, z1.h +fmaxnmp z29.s, p3/m, z29.s, z30.s +fmaxnmp z31.d, p7/m, z31.d, z30.d +fmaxnmv d0, p7, z31.d +fmaxnmv h0, p7, z31.h +fmaxnmv s0, p7, z31.s +fmaxp z0.h, p0/m, z0.h, z1.h +fmaxp z29.s, p3/m, z29.s, z30.s +fmaxp z31.d, p7/m, z31.d, z30.d +fmaxv d0, p7, z31.d +fmaxv h0, p7, z31.h +fmaxv s0, p7, z31.s +fmin z0.d, p0/m, z0.d, #0.0 +fmin z0.d, p7/m, z0.d, z31.d +fmin z0.h, p0/m, z0.h, #0.0 +fmin z0.h, p7/m, z0.h, z31.h +fmin z0.s, p0/m, z0.s, #0.0 +fmin z0.s, p7/m, z0.s, z31.s +fmin z31.d, p7/m, z31.d, #1.0 +fmin z31.h, p7/m, z31.h, #1.0 +fmin z31.s, p7/m, z31.s, #1.0 +fminnm z0.d, p0/m, z0.d, #0.0 +fminnm z0.d, p7/m, z0.d, z31.d +fminnm z0.h, p0/m, z0.h, #0.0 +fminnm z0.h, p7/m, z0.h, z31.h +fminnm z0.s, p0/m, z0.s, #0.0 +fminnm z0.s, p7/m, z0.s, z31.s +fminnm z31.d, p7/m, z31.d, #1.0 +fminnm z31.h, p7/m, z31.h, #1.0 +fminnm z31.s, p7/m, z31.s, #1.0 +fminnmp z0.h, p0/m, z0.h, z1.h +fminnmp z29.s, p3/m, z29.s, z30.s +fminnmp z31.d, p7/m, z31.d, z30.d +fminnmv d0, p7, z31.d +fminnmv h0, p7, z31.h +fminnmv s0, p7, z31.s +fminp z0.h, p0/m, z0.h, z1.h +fminp z29.s, p3/m, z29.s, z30.s +fminp z31.d, p7/m, z31.d, z30.d +fminv d0, p7, z31.d +fminv h0, p7, z31.h +fminv s0, p7, z31.s +fmla z0.d, p7/m, z1.d, z31.d +fmla z0.d, z1.d, z7.d[1] +fmla z0.h, p7/m, z1.h, z31.h +fmla z0.h, z1.h, z7.h[7] +fmla z0.s, p7/m, z1.s, z31.s +fmla z0.s, z1.s, z7.s[3] +fmlalb z0.s, z1.h, z7.h[0] +fmlalb z29.s, z30.h, z31.h +fmlalb z30.s, z31.h, z7.h[7] +fmlalt z0.s, z1.h, z7.h[0] +fmlalt z29.s, z30.h, z31.h +fmlalt z30.s, z31.h, z7.h[7] +fmls z0.d, p7/m, z1.d, z31.d +fmls z0.d, z1.d, z7.d[1] +fmls z0.h, p7/m, z1.h, z31.h +fmls z0.h, z1.h, z7.h[7] +fmls z0.s, p7/m, z1.s, z31.s +fmls z0.s, z1.s, z7.s[3] +fmlslb z0.s, z1.h, z7.h[0] +fmlslb z29.s, z30.h, z31.h +fmlslb z30.s, z31.h, z7.h[7] +fmlslt z0.s, z1.h, z7.h[0] +fmlslt z29.s, z30.h, z31.h +fmlslt z30.s, z31.h, z7.h[7] +fmov z0.d, #-10.00000000 +fmov z0.d, #0.12500000 +fmov z0.d, p0/m, #-10.00000000 +fmov z0.d, p0/m, #0.12500000 +fmov z0.h, #-0.12500000 +fmov z0.h, p0/m, #-0.12500000 +fmov z0.s, #-0.12500000 +fmov z0.s, p0/m, #-0.12500000 +fmsb z0.d, p7/m, z1.d, z31.d +fmsb z0.h, p7/m, z1.h, z31.h +fmsb z0.s, p7/m, z1.s, z31.s +fmul z0.d, p0/m, z0.d, #0.5 +fmul z0.d, p7/m, z0.d, z31.d +fmul z0.d, z0.d, z0.d[0] +fmul z0.d, z1.d, z31.d +fmul z0.h, p0/m, z0.h, #0.5 +fmul z0.h, p7/m, z0.h, z31.h +fmul z0.h, z0.h, z0.h[0] +fmul z0.h, z1.h, z31.h +fmul z0.s, p0/m, z0.s, #0.5 +fmul z0.s, p7/m, z0.s, z31.s +fmul z0.s, z0.s, z0.s[0] +fmul z0.s, z1.s, z31.s +fmul z31.d, p7/m, z31.d, #2.0 +fmul z31.d, z31.d, z15.d[1] +fmul z31.h, p7/m, z31.h, #2.0 +fmul z31.h, z31.h, z7.h[7] +fmul z31.s, p7/m, z31.s, #2.0 +fmul z31.s, z31.s, z7.s[3] +fmulx z0.d, p7/m, z0.d, z31.d +fmulx z0.h, p7/m, z0.h, z31.h +fmulx z0.s, p7/m, z0.s, z31.s +fneg z31.d, p7/m, z31.d +fneg z31.h, p7/m, z31.h +fneg z31.s, p7/m, z31.s +fnmad z0.d, p7/m, z1.d, z31.d +fnmad z0.h, p7/m, z1.h, z31.h +fnmad z0.s, p7/m, z1.s, z31.s +fnmla z0.d, p7/m, z1.d, z31.d +fnmla z0.h, p7/m, z1.h, z31.h +fnmla z0.s, p7/m, z1.s, z31.s +fnmls z0.d, p7/m, z1.d, z31.d +fnmls z0.h, p7/m, z1.h, z31.h +fnmls z0.s, p7/m, z1.s, z31.s +fnmsb z0.d, p7/m, z1.d, z31.d +fnmsb z0.h, p7/m, z1.h, z31.h +fnmsb z0.s, p7/m, z1.s, z31.s +frecpe z0.d, z31.d +frecpe z0.h, z31.h +frecpe z0.s, z31.s +frecps z0.d, z1.d, z31.d +frecps z0.h, z1.h, z31.h +frecps z0.s, z1.s, z31.s +frecpx z31.d, p7/m, z31.d +frecpx z31.h, p7/m, z31.h +frecpx z31.s, p7/m, z31.s +frinta z31.d, p7/m, z31.d +frinta z31.h, p7/m, z31.h +frinta z31.s, p7/m, z31.s +frinti z31.d, p7/m, z31.d +frinti z31.h, p7/m, z31.h +frinti z31.s, p7/m, z31.s +frintm z31.d, p7/m, z31.d +frintm z31.h, p7/m, z31.h +frintm z31.s, p7/m, z31.s +frintn z31.d, p7/m, z31.d +frintn z31.h, p7/m, z31.h +frintn z31.s, p7/m, z31.s +frintp z31.d, p7/m, z31.d +frintp z31.h, p7/m, z31.h +frintp z31.s, p7/m, z31.s +frintx z31.d, p7/m, z31.d +frintx z31.h, p7/m, z31.h +frintx z31.s, p7/m, z31.s +frintz z31.d, p7/m, z31.d +frintz z31.h, p7/m, z31.h +frintz z31.s, p7/m, z31.s +frsqrte z0.d, z31.d +frsqrte z0.h, z31.h +frsqrte z0.s, z31.s +frsqrts z0.d, z1.d, z31.d +frsqrts z0.h, z1.h, z31.h +frsqrts z0.s, z1.s, z31.s +fscale z0.d, p7/m, z0.d, z31.d +fscale z0.h, p7/m, z0.h, z31.h +fscale z0.s, p7/m, z0.s, z31.s +fsqrt z31.d, p7/m, z31.d +fsqrt z31.h, p7/m, z31.h +fsqrt z31.s, p7/m, z31.s +fsub z0.d, p0/m, z0.d, #0.5 +fsub z0.d, p7/m, z0.d, z31.d +fsub z0.d, z1.d, z31.d +fsub z0.h, p0/m, z0.h, #0.5 +fsub z0.h, p7/m, z0.h, z31.h +fsub z0.h, z1.h, z31.h +fsub z0.s, p0/m, z0.s, #0.5 +fsub z0.s, p7/m, z0.s, z31.s +fsub z0.s, z1.s, z31.s +fsub z31.d, p7/m, z31.d, #1.0 +fsub z31.h, p7/m, z31.h, #1.0 +fsub z31.s, p7/m, z31.s, #1.0 +fsubr z0.d, p0/m, z0.d, #0.5 +fsubr z0.d, p7/m, z0.d, z31.d +fsubr z0.h, p0/m, z0.h, #0.5 +fsubr z0.h, p7/m, z0.h, z31.h +fsubr z0.s, p0/m, z0.s, #0.5 +fsubr z0.s, p7/m, z0.s, z31.s +fsubr z31.d, p7/m, z31.d, #1.0 +fsubr z31.h, p7/m, z31.h, #1.0 +fsubr z31.s, p7/m, z31.s, #1.0 +ftmad z0.d, z0.d, z31.d, #7 +ftmad z0.h, z0.h, z31.h, #7 +ftmad z0.s, z0.s, z31.s, #7 +ftsmul z0.d, z1.d, z31.d +ftsmul z0.h, z1.h, z31.h +ftsmul z0.s, z1.s, z31.s +ftssel z0.d, z1.d, z31.d +ftssel z0.h, z1.h, z31.h +ftssel z0.s, z1.s, z31.s +histcnt z0.s, p0/z, z1.s, z2.s +histcnt z29.d, p7/z, z30.d, z31.d +histseg z0.b, z1.b, z31.b +incb x0 +incb x0, #14 +incb x0, all, mul #16 +incb x0, pow2 +incb x0, vl1 +incd x0 +incd x0, #14 +incd x0, all, mul #16 +incd x0, pow2 +incd x0, vl1 +incd z0.d +incd z0.d, all, mul #16 +inch x0 +inch x0, #14 +inch x0, all, mul #16 +inch x0, pow2 +inch x0, vl1 +inch z0.h +inch z0.h, all, mul #16 +incp x0, p0.b +incp x0, p0.d +incp x0, p0.h +incp x0, p0.s +incp xzr, p15.b +incp xzr, p15.d +incp xzr, p15.h +incp xzr, p15.s +incp z31.d, p15.d +incp z31.h, p15.h +incp z31.s, p15.s +incw x0 +incw x0, #14 +incw x0, all, mul #16 +incw x0, pow2 +incw x0, vl1 +incw z0.s +incw z0.s, all, mul #16 +index z0.b, #0, #0 +index z0.d, #0, #0 +index z0.h, #0, #0 +index z0.h, w0, w0 +index z0.s, #0, #0 +index z21.b, w10, w21 +index z21.d, x10, x21 +index z21.s, w10, w21 +index z23.b, #13, w8 +index z23.b, w13, #8 +index z23.d, #13, x8 +index z23.d, x13, #8 +index z23.h, #13, w8 +index z23.h, w13, #8 +index z23.s, #13, w8 +index z23.s, w13, #8 +index z31.b, #-1, #-1 +index z31.b, #-1, wzr +index z31.b, wzr, #-1 +index z31.b, wzr, wzr +index z31.d, #-1, #-1 +index z31.d, #-1, xzr +index z31.d, xzr, #-1 +index z31.d, xzr, xzr +index z31.h, #-1, #-1 +index z31.h, #-1, wzr +index z31.h, wzr, #-1 +index z31.h, wzr, wzr +index z31.s, #-1, #-1 +index z31.s, #-1, wzr +index z31.s, wzr, #-1 +index z31.s, wzr, wzr +insr z0.b, w0 +insr z0.d, x0 +insr z0.h, w0 +insr z0.s, w0 +insr z31.b, b31 +insr z31.b, wzr +insr z31.d, d31 +insr z31.d, xzr +insr z31.h, h31 +insr z31.h, wzr +insr z31.s, s31 +insr z31.s, wzr +lasta b0, p7, z31.b +lasta d0, p7, z31.d +lasta h0, p7, z31.h +lasta s0, p7, z31.s +lasta w0, p7, z31.b +lasta w0, p7, z31.h +lasta w0, p7, z31.s +lasta x0, p7, z31.d +lastb b0, p7, z31.b +lastb d0, p7, z31.d +lastb h0, p7, z31.h +lastb s0, p7, z31.s +lastb w0, p7, z31.b +lastb w0, p7, z31.h +lastb w0, p7, z31.s +lastb x0, p7, z31.d +ld1b { z0.b }, p0/z, [sp, x0] +ld1b { z0.b }, p0/z, [x0, x0] +ld1b { z0.b }, p0/z, [x0] +ld1b { z0.d }, p0/z, [x0] +ld1b { z0.d }, p0/z, [z0.d] +ld1b { z0.h }, p0/z, [x0] +ld1b { z0.s }, p0/z, [x0, z0.s, sxtw] +ld1b { z0.s }, p0/z, [x0, z0.s, uxtw] +ld1b { z0.s }, p0/z, [x0] +ld1b { z0.s }, p0/z, [z0.s] +ld1b { z21.b }, p5/z, [x10, #5, mul vl] +ld1b { z21.d }, p5/z, [x10, #5, mul vl] +ld1b { z21.d }, p5/z, [x10, z21.d, sxtw] +ld1b { z21.d }, p5/z, [x10, z21.d, uxtw] +ld1b { z21.h }, p5/z, [x10, #5, mul vl] +ld1b { z21.s }, p5/z, [x10, #5, mul vl] +ld1b { z21.s }, p5/z, [x10, x21] +ld1b { z23.d }, p3/z, [x13, x8] +ld1b { z31.b }, p7/z, [sp, #-1, mul vl] +ld1b { z31.d }, p7/z, [sp, #-1, mul vl] +ld1b { z31.d }, p7/z, [sp, z31.d] +ld1b { z31.d }, p7/z, [z31.d, #31] +ld1b { z31.h }, p7/z, [sp, #-1, mul vl] +ld1b { z31.s }, p7/z, [sp, #-1, mul vl] +ld1b { z31.s }, p7/z, [z31.s, #31] +ld1b { z5.h }, p3/z, [x17, x16] +ld1d { z0.d }, p0/z, [x0, z0.d, sxtw #3] +ld1d { z0.d }, p0/z, [x0, z0.d, uxtw #3] +ld1d { z0.d }, p0/z, [x0] +ld1d { z0.d }, p0/z, [z0.d] +ld1d { z21.d }, p5/z, [x10, #5, mul vl] +ld1d { z21.d }, p5/z, [x10, z21.d, sxtw] +ld1d { z21.d }, p5/z, [x10, z21.d, uxtw] +ld1d { z23.d }, p3/z, [sp, x8, lsl #3] +ld1d { z23.d }, p3/z, [x13, x8, lsl #3] +ld1d { z23.d }, p3/z, [x13, z8.d, lsl #3] +ld1d { z31.d }, p7/z, [sp, #-1, mul vl] +ld1d { z31.d }, p7/z, [sp, z31.d] +ld1d { z31.d }, p7/z, [z31.d, #248] +ld1h { z0.d }, p0/z, [x0, z0.d, sxtw #1] +ld1h { z0.d }, p0/z, [x0, z0.d, uxtw #1] +ld1h { z0.d }, p0/z, [x0] +ld1h { z0.d }, p0/z, [z0.d] +ld1h { z0.h }, p0/z, [x0] +ld1h { z0.s }, p0/z, [x0, z0.s, sxtw] +ld1h { z0.s }, p0/z, [x0, z0.s, uxtw] +ld1h { z0.s }, p0/z, [x0] +ld1h { z0.s }, p0/z, [z0.s] +ld1h { z21.d }, p5/z, [x10, #5, mul vl] +ld1h { z21.d }, p5/z, [x10, z21.d, sxtw] +ld1h { z21.d }, p5/z, [x10, z21.d, uxtw] +ld1h { z21.h }, p5/z, [x10, #5, mul vl] +ld1h { z21.s }, p5/z, [x10, #5, mul vl] +ld1h { z21.s }, p5/z, [x10, x21, lsl #1] +ld1h { z23.d }, p3/z, [x13, x8, lsl #1] +ld1h { z23.d }, p3/z, [x13, z8.d, lsl #1] +ld1h { z31.d }, p7/z, [sp, #-1, mul vl] +ld1h { z31.d }, p7/z, [sp, z31.d] +ld1h { z31.d }, p7/z, [z31.d, #62] +ld1h { z31.h }, p7/z, [sp, #-1, mul vl] +ld1h { z31.s }, p7/z, [sp, #-1, mul vl] +ld1h { z31.s }, p7/z, [sp, z31.s, sxtw #1] +ld1h { z31.s }, p7/z, [sp, z31.s, uxtw #1] +ld1h { z31.s }, p7/z, [z31.s, #62] +ld1h { z5.h }, p3/z, [sp, x16, lsl #1] +ld1h { z5.h }, p3/z, [x17, x16, lsl #1] +ld1rb { z0.b }, p0/z, [x0] +ld1rb { z0.d }, p0/z, [x0] +ld1rb { z0.h }, p0/z, [x0] +ld1rb { z0.s }, p0/z, [x0] +ld1rb { z31.b }, p7/z, [sp, #63] +ld1rb { z31.d }, p7/z, [sp, #63] +ld1rb { z31.h }, p7/z, [sp, #63] +ld1rb { z31.s }, p7/z, [sp, #63] +ld1rd { z0.d }, p0/z, [x0] +ld1rd { z31.d }, p7/z, [sp, #504] +ld1rh { z0.d }, p0/z, [x0] +ld1rh { z0.h }, p0/z, [x0] +ld1rh { z0.s }, p0/z, [x0] +ld1rh { z31.d }, p7/z, [sp, #126] +ld1rh { z31.h }, p7/z, [sp, #126] +ld1rh { z31.s }, p7/z, [sp, #126] +ld1rqb { z0.b }, p0/z, [x0, x0] +ld1rqb { z0.b }, p0/z, [x0] +ld1rqb { z21.b }, p5/z, [x10, #112] +ld1rqb { z23.b }, p3/z, [x13, #-128] +ld1rqb { z31.b }, p7/z, [sp, #-16] +ld1rqd { z0.d }, p0/z, [x0, x0, lsl #3] +ld1rqd { z0.d }, p0/z, [x0] +ld1rqd { z23.d }, p3/z, [x13, #-128] +ld1rqd { z23.d }, p3/z, [x13, #112] +ld1rqd { z31.d }, p7/z, [sp, #-16] +ld1rqh { z0.h }, p0/z, [x0, x0, lsl #1] +ld1rqh { z0.h }, p0/z, [x0] +ld1rqh { z23.h }, p3/z, [x13, #-128] +ld1rqh { z23.h }, p3/z, [x13, #112] +ld1rqh { z31.h }, p7/z, [sp, #-16] +ld1rqw { z0.s }, p0/z, [x0, x0, lsl #2] +ld1rqw { z0.s }, p0/z, [x0] +ld1rqw { z23.s }, p3/z, [x13, #-128] +ld1rqw { z23.s }, p3/z, [x13, #112] +ld1rqw { z31.s }, p7/z, [sp, #-16] +ld1rsb { z0.d }, p0/z, [x0] +ld1rsb { z0.h }, p0/z, [x0] +ld1rsb { z0.s }, p0/z, [x0] +ld1rsb { z31.d }, p7/z, [sp, #63] +ld1rsb { z31.h }, p7/z, [sp, #63] +ld1rsb { z31.s }, p7/z, [sp, #63] +ld1rsh { z0.d }, p0/z, [x0] +ld1rsh { z0.s }, p0/z, [x0] +ld1rsh { z31.d }, p7/z, [sp, #126] +ld1rsh { z31.s }, p7/z, [sp, #126] +ld1rsw { z0.d }, p0/z, [x0] +ld1rsw { z31.d }, p7/z, [sp, #252] +ld1rw { z0.d }, p0/z, [x0] +ld1rw { z0.s }, p0/z, [x0] +ld1rw { z31.d }, p7/z, [sp, #252] +ld1rw { z31.s }, p7/z, [sp, #252] +ld1sb { z0.d }, p0/z, [x0] +ld1sb { z0.d }, p0/z, [z0.d] +ld1sb { z0.h }, p0/z, [sp, x0] +ld1sb { z0.h }, p0/z, [x0, x0] +ld1sb { z0.h }, p0/z, [x0] +ld1sb { z0.s }, p0/z, [x0, z0.s, sxtw] +ld1sb { z0.s }, p0/z, [x0] +ld1sb { z0.s }, p0/z, [z0.s] +ld1sb { z21.d }, p5/z, [x10, #5, mul vl] +ld1sb { z21.d }, p5/z, [x10, z21.d, sxtw] +ld1sb { z21.d }, p5/z, [x10, z21.d, uxtw] +ld1sb { z21.h }, p5/z, [x10, #5, mul vl] +ld1sb { z21.s }, p5/z, [x10, #5, mul vl] +ld1sb { z21.s }, p5/z, [x10, x21] +ld1sb { z23.d }, p3/z, [x13, x8] +ld1sb { z31.d }, p7/z, [sp, #-1, mul vl] +ld1sb { z31.d }, p7/z, [sp, z31.d] +ld1sb { z31.d }, p7/z, [z31.d, #31] +ld1sb { z31.h }, p7/z, [sp, #-1, mul vl] +ld1sb { z31.s }, p7/z, [sp, #-1, mul vl] +ld1sb { z31.s }, p7/z, [z31.s, #31] +ld1sh { z0.d }, p0/z, [x0, z0.d, sxtw #1] +ld1sh { z0.d }, p0/z, [x0, z0.d, uxtw #1] +ld1sh { z0.d }, p0/z, [x0] +ld1sh { z0.d }, p0/z, [z0.d] +ld1sh { z0.s }, p0/z, [x0, z0.s, sxtw] +ld1sh { z0.s }, p0/z, [x0, z0.s, uxtw] +ld1sh { z0.s }, p0/z, [x0] +ld1sh { z0.s }, p0/z, [z0.s] +ld1sh { z21.d }, p5/z, [x10, #5, mul vl] +ld1sh { z21.d }, p5/z, [x10, z21.d, sxtw] +ld1sh { z21.d }, p5/z, [x10, z21.d, uxtw] +ld1sh { z21.s }, p5/z, [sp, x21, lsl #1] +ld1sh { z21.s }, p5/z, [x10, #5, mul vl] +ld1sh { z21.s }, p5/z, [x10, x21, lsl #1] +ld1sh { z23.d }, p3/z, [x13, x8, lsl #1] +ld1sh { z23.d }, p3/z, [x13, z8.d, lsl #1] +ld1sh { z31.d }, p7/z, [sp, #-1, mul vl] +ld1sh { z31.d }, p7/z, [sp, z31.d] +ld1sh { z31.d }, p7/z, [z31.d, #62] +ld1sh { z31.s }, p7/z, [sp, #-1, mul vl] +ld1sh { z31.s }, p7/z, [sp, z31.s, sxtw #1] +ld1sh { z31.s }, p7/z, [sp, z31.s, uxtw #1] +ld1sh { z31.s }, p7/z, [z31.s, #62] +ld1sw { z0.d }, p0/z, [x0, z0.d, sxtw #2] +ld1sw { z0.d }, p0/z, [x0, z0.d, uxtw #2] +ld1sw { z0.d }, p0/z, [x0] +ld1sw { z0.d }, p0/z, [z0.d] +ld1sw { z21.d }, p5/z, [x10, #5, mul vl] +ld1sw { z21.d }, p5/z, [x10, z21.d, sxtw] +ld1sw { z21.d }, p5/z, [x10, z21.d, uxtw] +ld1sw { z23.d }, p3/z, [sp, x8, lsl #2] +ld1sw { z23.d }, p3/z, [x13, x8, lsl #2] +ld1sw { z23.d }, p3/z, [x13, z8.d, lsl #2] +ld1sw { z31.d }, p7/z, [sp, #-1, mul vl] +ld1sw { z31.d }, p7/z, [sp, z31.d] +ld1sw { z31.d }, p7/z, [z31.d, #124] +ld1w { z0.d }, p0/z, [x0, z0.d, sxtw #2] +ld1w { z0.d }, p0/z, [x0, z0.d, uxtw #2] +ld1w { z0.d }, p0/z, [x0] +ld1w { z0.d }, p0/z, [z0.d] +ld1w { z0.s }, p0/z, [x0, z0.s, sxtw] +ld1w { z0.s }, p0/z, [x0, z0.s, uxtw] +ld1w { z0.s }, p0/z, [x0] +ld1w { z0.s }, p0/z, [z0.s] +ld1w { z21.d }, p5/z, [x10, #5, mul vl] +ld1w { z21.d }, p5/z, [x10, z21.d, sxtw] +ld1w { z21.d }, p5/z, [x10, z21.d, uxtw] +ld1w { z21.s }, p5/z, [sp, x21, lsl #2] +ld1w { z21.s }, p5/z, [x10, #5, mul vl] +ld1w { z21.s }, p5/z, [x10, x21, lsl #2] +ld1w { z23.d }, p3/z, [x13, x8, lsl #2] +ld1w { z23.d }, p3/z, [x13, z8.d, lsl #2] +ld1w { z31.d }, p7/z, [sp, #-1, mul vl] +ld1w { z31.d }, p7/z, [sp, z31.d] +ld1w { z31.d }, p7/z, [z31.d, #124] +ld1w { z31.s }, p7/z, [sp, #-1, mul vl] +ld1w { z31.s }, p7/z, [sp, z31.s, sxtw #2] +ld1w { z31.s }, p7/z, [sp, z31.s, uxtw #2] +ld1w { z31.s }, p7/z, [z31.s, #124] +ld2b { z0.b, z1.b }, p0/z, [x0, x0] +ld2b { z0.b, z1.b }, p0/z, [x0] +ld2b { z21.b, z22.b }, p5/z, [x10, #10, mul vl] +ld2b { z23.b, z24.b }, p3/z, [x13, #-16, mul vl] +ld2b { z5.b, z6.b }, p3/z, [x17, x16] +ld2d { z0.d, z1.d }, p0/z, [x0, x0, lsl #3] +ld2d { z0.d, z1.d }, p0/z, [x0] +ld2d { z21.d, z22.d }, p5/z, [x10, #10, mul vl] +ld2d { z23.d, z24.d }, p3/z, [x13, #-16, mul vl] +ld2d { z5.d, z6.d }, p3/z, [x17, x16, lsl #3] +ld2h { z0.h, z1.h }, p0/z, [x0, x0, lsl #1] +ld2h { z0.h, z1.h }, p0/z, [x0] +ld2h { z21.h, z22.h }, p5/z, [x10, #10, mul vl] +ld2h { z23.h, z24.h }, p3/z, [x13, #-16, mul vl] +ld2h { z5.h, z6.h }, p3/z, [x17, x16, lsl #1] +ld2w { z0.s, z1.s }, p0/z, [x0, x0, lsl #2] +ld2w { z0.s, z1.s }, p0/z, [x0] +ld2w { z21.s, z22.s }, p5/z, [x10, #10, mul vl] +ld2w { z23.s, z24.s }, p3/z, [x13, #-16, mul vl] +ld2w { z5.s, z6.s }, p3/z, [x17, x16, lsl #2] +ld3b { z0.b, z1.b, z2.b }, p0/z, [x0, x0] +ld3b { z0.b, z1.b, z2.b }, p0/z, [x0] +ld3b { z21.b, z22.b, z23.b }, p5/z, [x10, #15, mul vl] +ld3b { z23.b, z24.b, z25.b }, p3/z, [x13, #-24, mul vl] +ld3b { z5.b, z6.b, z7.b }, p3/z, [x17, x16] +ld3d { z0.d, z1.d, z2.d }, p0/z, [x0, x0, lsl #3] +ld3d { z0.d, z1.d, z2.d }, p0/z, [x0] +ld3d { z21.d, z22.d, z23.d }, p5/z, [x10, #15, mul vl] +ld3d { z23.d, z24.d, z25.d }, p3/z, [x13, #-24, mul vl] +ld3d { z5.d, z6.d, z7.d }, p3/z, [x17, x16, lsl #3] +ld3h { z0.h, z1.h, z2.h }, p0/z, [x0, x0, lsl #1] +ld3h { z0.h, z1.h, z2.h }, p0/z, [x0] +ld3h { z21.h, z22.h, z23.h }, p5/z, [x10, #15, mul vl] +ld3h { z23.h, z24.h, z25.h }, p3/z, [x13, #-24, mul vl] +ld3h { z5.h, z6.h, z7.h }, p3/z, [x17, x16, lsl #1] +ld3w { z0.s, z1.s, z2.s }, p0/z, [x0, x0, lsl #2] +ld3w { z0.s, z1.s, z2.s }, p0/z, [x0] +ld3w { z21.s, z22.s, z23.s }, p5/z, [x10, #15, mul vl] +ld3w { z23.s, z24.s, z25.s }, p3/z, [x13, #-24, mul vl] +ld3w { z5.s, z6.s, z7.s }, p3/z, [x17, x16, lsl #2] +ld4b { z0.b, z1.b, z2.b, z3.b }, p0/z, [x0, x0] +ld4b { z0.b, z1.b, z2.b, z3.b }, p0/z, [x0] +ld4b { z21.b, z22.b, z23.b, z24.b }, p5/z, [x10, #20, mul vl] +ld4b { z23.b, z24.b, z25.b, z26.b }, p3/z, [x13, #-32, mul vl] +ld4b { z5.b, z6.b, z7.b, z8.b }, p3/z, [x17, x16] +ld4d { z0.d, z1.d, z2.d, z3.d }, p0/z, [x0, x0, lsl #3] +ld4d { z0.d, z1.d, z2.d, z3.d }, p0/z, [x0] +ld4d { z21.d, z22.d, z23.d, z24.d }, p5/z, [x10, #20, mul vl] +ld4d { z23.d, z24.d, z25.d, z26.d }, p3/z, [x13, #-32, mul vl] +ld4d { z5.d, z6.d, z7.d, z8.d }, p3/z, [x17, x16, lsl #3] +ld4h { z0.h, z1.h, z2.h, z3.h }, p0/z, [x0, x0, lsl #1] +ld4h { z0.h, z1.h, z2.h, z3.h }, p0/z, [x0] +ld4h { z21.h, z22.h, z23.h, z24.h }, p5/z, [x10, #20, mul vl] +ld4h { z23.h, z24.h, z25.h, z26.h }, p3/z, [x13, #-32, mul vl] +ld4h { z5.h, z6.h, z7.h, z8.h }, p3/z, [x17, x16, lsl #1] +ld4w { z0.s, z1.s, z2.s, z3.s }, p0/z, [x0, x0, lsl #2] +ld4w { z0.s, z1.s, z2.s, z3.s }, p0/z, [x0] +ld4w { z21.s, z22.s, z23.s, z24.s }, p5/z, [x10, #20, mul vl] +ld4w { z23.s, z24.s, z25.s, z26.s }, p3/z, [x13, #-32, mul vl] +ld4w { z5.s, z6.s, z7.s, z8.s }, p3/z, [x17, x16, lsl #2] +ldff1b { z0.d }, p0/z, [x0, x0] +ldff1b { z0.d }, p0/z, [z0.d] +ldff1b { z0.h }, p0/z, [x0, x0] +ldff1b { z0.s }, p0/z, [x0, x0] +ldff1b { z0.s }, p0/z, [x0, z0.s, sxtw] +ldff1b { z0.s }, p0/z, [x0, z0.s, uxtw] +ldff1b { z0.s }, p0/z, [z0.s] +ldff1b { z21.d }, p5/z, [x10, z21.d, sxtw] +ldff1b { z21.d }, p5/z, [x10, z21.d, uxtw] +ldff1b { z31.b }, p7/z, [sp] +ldff1b { z31.d }, p7/z, [sp, z31.d] +ldff1b { z31.d }, p7/z, [sp] +ldff1b { z31.d }, p7/z, [z31.d, #31] +ldff1b { z31.h }, p7/z, [sp] +ldff1b { z31.s }, p7/z, [sp] +ldff1b { z31.s }, p7/z, [z31.s, #31] +ldff1d { z0.d }, p0/z, [x0, x0, lsl #3] +ldff1d { z0.d }, p0/z, [x0, z0.d, sxtw #3] +ldff1d { z0.d }, p0/z, [x0, z0.d, uxtw #3] +ldff1d { z0.d }, p0/z, [z0.d] +ldff1d { z21.d }, p5/z, [x10, z21.d, sxtw] +ldff1d { z21.d }, p5/z, [x10, z21.d, uxtw] +ldff1d { z23.d }, p3/z, [x13, z8.d, lsl #3] +ldff1d { z31.d }, p7/z, [sp, z31.d] +ldff1d { z31.d }, p7/z, [sp] +ldff1d { z31.d }, p7/z, [z31.d, #248] +ldff1h { z0.d }, p0/z, [x0, x0, lsl #1] +ldff1h { z0.d }, p0/z, [x0, z0.d, sxtw #1] +ldff1h { z0.d }, p0/z, [x0, z0.d, uxtw #1] +ldff1h { z0.d }, p0/z, [z0.d] +ldff1h { z0.h }, p0/z, [x0, x0, lsl #1] +ldff1h { z0.s }, p0/z, [x0, x0, lsl #1] +ldff1h { z0.s }, p0/z, [x0, z0.s, sxtw] +ldff1h { z0.s }, p0/z, [x0, z0.s, uxtw] +ldff1h { z0.s }, p0/z, [z0.s] +ldff1h { z21.d }, p5/z, [x10, z21.d, sxtw] +ldff1h { z21.d }, p5/z, [x10, z21.d, uxtw] +ldff1h { z23.d }, p3/z, [x13, z8.d, lsl #1] +ldff1h { z31.d }, p7/z, [sp, z31.d] +ldff1h { z31.d }, p7/z, [sp] +ldff1h { z31.d }, p7/z, [z31.d, #62] +ldff1h { z31.h }, p7/z, [sp] +ldff1h { z31.s }, p7/z, [sp, z31.s, sxtw #1] +ldff1h { z31.s }, p7/z, [sp, z31.s, uxtw #1] +ldff1h { z31.s }, p7/z, [sp] +ldff1h { z31.s }, p7/z, [z31.s, #62] +ldff1sb { z0.d }, p0/z, [x0, x0] +ldff1sb { z0.d }, p0/z, [z0.d] +ldff1sb { z0.h }, p0/z, [x0, x0] +ldff1sb { z0.s }, p0/z, [x0, x0] +ldff1sb { z0.s }, p0/z, [x0, z0.s, sxtw] +ldff1sb { z0.s }, p0/z, [x0, z0.s, uxtw] +ldff1sb { z0.s }, p0/z, [z0.s] +ldff1sb { z21.d }, p5/z, [x10, z21.d, sxtw] +ldff1sb { z21.d }, p5/z, [x10, z21.d, uxtw] +ldff1sb { z31.d }, p7/z, [sp, z31.d] +ldff1sb { z31.d }, p7/z, [sp] +ldff1sb { z31.d }, p7/z, [z31.d, #31] +ldff1sb { z31.h }, p7/z, [sp] +ldff1sb { z31.s }, p7/z, [sp] +ldff1sb { z31.s }, p7/z, [z31.s, #31] +ldff1sh { z0.d }, p0/z, [x0, x0, lsl #1] +ldff1sh { z0.d }, p0/z, [x0, z0.d, sxtw #1] +ldff1sh { z0.d }, p0/z, [x0, z0.d, uxtw #1] +ldff1sh { z0.d }, p0/z, [z0.d] +ldff1sh { z0.s }, p0/z, [x0, x0, lsl #1] +ldff1sh { z0.s }, p0/z, [x0, z0.s, sxtw] +ldff1sh { z0.s }, p0/z, [x0, z0.s, uxtw] +ldff1sh { z0.s }, p0/z, [z0.s] +ldff1sh { z21.d }, p5/z, [x10, z21.d, sxtw] +ldff1sh { z21.d }, p5/z, [x10, z21.d, uxtw] +ldff1sh { z23.d }, p3/z, [x13, z8.d, lsl #1] +ldff1sh { z31.d }, p7/z, [sp, z31.d] +ldff1sh { z31.d }, p7/z, [sp] +ldff1sh { z31.d }, p7/z, [z31.d, #62] +ldff1sh { z31.s }, p7/z, [sp, z31.s, sxtw #1] +ldff1sh { z31.s }, p7/z, [sp, z31.s, uxtw #1] +ldff1sh { z31.s }, p7/z, [sp] +ldff1sh { z31.s }, p7/z, [z31.s, #62] +ldff1sw { z0.d }, p0/z, [x0, x0, lsl #2] +ldff1sw { z0.d }, p0/z, [x0, z0.d, sxtw #2] +ldff1sw { z0.d }, p0/z, [x0, z0.d, uxtw #2] +ldff1sw { z0.d }, p0/z, [z0.d] +ldff1sw { z21.d }, p5/z, [x10, z21.d, sxtw] +ldff1sw { z21.d }, p5/z, [x10, z21.d, uxtw] +ldff1sw { z23.d }, p3/z, [x13, z8.d, lsl #2] +ldff1sw { z31.d }, p7/z, [sp, z31.d] +ldff1sw { z31.d }, p7/z, [sp] +ldff1sw { z31.d }, p7/z, [z31.d, #124] +ldff1w { z0.d }, p0/z, [x0, x0, lsl #2] +ldff1w { z0.d }, p0/z, [x0, z0.d, sxtw #2] +ldff1w { z0.d }, p0/z, [x0, z0.d, uxtw #2] +ldff1w { z0.d }, p0/z, [z0.d] +ldff1w { z0.s }, p0/z, [x0, x0, lsl #2] +ldff1w { z0.s }, p0/z, [x0, z0.s, sxtw] +ldff1w { z0.s }, p0/z, [x0, z0.s, uxtw] +ldff1w { z0.s }, p0/z, [z0.s] +ldff1w { z21.d }, p5/z, [x10, z21.d, sxtw] +ldff1w { z21.d }, p5/z, [x10, z21.d, uxtw] +ldff1w { z23.d }, p3/z, [x13, z8.d, lsl #2] +ldff1w { z31.d }, p7/z, [sp, z31.d] +ldff1w { z31.d }, p7/z, [sp] +ldff1w { z31.d }, p7/z, [z31.d, #124] +ldff1w { z31.s }, p7/z, [sp, z31.s, sxtw #2] +ldff1w { z31.s }, p7/z, [sp, z31.s, uxtw #2] +ldff1w { z31.s }, p7/z, [sp] +ldff1w { z31.s }, p7/z, [z31.s, #124] +ldnf1b { z0.b }, p0/z, [x0] +ldnf1b { z0.d }, p0/z, [x0] +ldnf1b { z0.h }, p0/z, [x0] +ldnf1b { z0.s }, p0/z, [x0] +ldnf1b { z21.b }, p5/z, [x10, #5, mul vl] +ldnf1b { z21.d }, p5/z, [x10, #5, mul vl] +ldnf1b { z21.h }, p5/z, [x10, #5, mul vl] +ldnf1b { z21.s }, p5/z, [x10, #5, mul vl] +ldnf1b { z31.b }, p7/z, [sp, #-1, mul vl] +ldnf1b { z31.d }, p7/z, [sp, #-1, mul vl] +ldnf1b { z31.h }, p7/z, [sp, #-1, mul vl] +ldnf1b { z31.s }, p7/z, [sp, #-1, mul vl] +ldnf1d { z0.d }, p0/z, [x0] +ldnf1d { z21.d }, p5/z, [x10, #5, mul vl] +ldnf1d { z31.d }, p7/z, [sp, #-1, mul vl] +ldnf1h { z0.d }, p0/z, [x0] +ldnf1h { z0.h }, p0/z, [x0] +ldnf1h { z0.s }, p0/z, [x0] +ldnf1h { z21.d }, p5/z, [x10, #5, mul vl] +ldnf1h { z21.h }, p5/z, [x10, #5, mul vl] +ldnf1h { z21.s }, p5/z, [x10, #5, mul vl] +ldnf1h { z31.d }, p7/z, [sp, #-1, mul vl] +ldnf1h { z31.h }, p7/z, [sp, #-1, mul vl] +ldnf1h { z31.s }, p7/z, [sp, #-1, mul vl] +ldnf1sb { z0.d }, p0/z, [x0] +ldnf1sb { z0.h }, p0/z, [x0] +ldnf1sb { z0.s }, p0/z, [x0] +ldnf1sb { z21.d }, p5/z, [x10, #5, mul vl] +ldnf1sb { z21.h }, p5/z, [x10, #5, mul vl] +ldnf1sb { z21.s }, p5/z, [x10, #5, mul vl] +ldnf1sb { z31.d }, p7/z, [sp, #-1, mul vl] +ldnf1sb { z31.h }, p7/z, [sp, #-1, mul vl] +ldnf1sb { z31.s }, p7/z, [sp, #-1, mul vl] +ldnf1sh { z0.d }, p0/z, [x0] +ldnf1sh { z0.s }, p0/z, [x0] +ldnf1sh { z21.d }, p5/z, [x10, #5, mul vl] +ldnf1sh { z21.s }, p5/z, [x10, #5, mul vl] +ldnf1sh { z31.d }, p7/z, [sp, #-1, mul vl] +ldnf1sh { z31.s }, p7/z, [sp, #-1, mul vl] +ldnf1sw { z0.d }, p0/z, [x0] +ldnf1sw { z21.d }, p5/z, [x10, #5, mul vl] +ldnf1sw { z31.d }, p7/z, [sp, #-1, mul vl] +ldnf1w { z0.d }, p0/z, [x0] +ldnf1w { z0.s }, p0/z, [x0] +ldnf1w { z21.d }, p5/z, [x10, #5, mul vl] +ldnf1w { z21.s }, p5/z, [x10, #5, mul vl] +ldnf1w { z31.d }, p7/z, [sp, #-1, mul vl] +ldnf1w { z31.s }, p7/z, [sp, #-1, mul vl] +ldnt1b { z0.b }, p0/z, [x0, x0] +ldnt1b { z0.b }, p0/z, [x0] +ldnt1b { z0.d }, p0/z, [z1.d] +ldnt1b { z0.s }, p0/z, [z1.s] +ldnt1b { z21.b }, p5/z, [x10, #7, mul vl] +ldnt1b { z23.b }, p3/z, [x13, #-8, mul vl] +ldnt1b { z31.d }, p7/z, [z31.d, x0] +ldnt1b { z31.d }, p7/z, [z31.d] +ldnt1b { z31.s }, p7/z, [z31.s, x0] +ldnt1b { z31.s }, p7/z, [z31.s] +ldnt1d { z0.d }, p0/z, [x0, x0, lsl #3] +ldnt1d { z0.d }, p0/z, [x0] +ldnt1d { z0.d }, p0/z, [z1.d] +ldnt1d { z21.d }, p5/z, [x10, #7, mul vl] +ldnt1d { z23.d }, p3/z, [x13, #-8, mul vl] +ldnt1d { z31.d }, p7/z, [z31.d, x0] +ldnt1d { z31.d }, p7/z, [z31.d] +ldnt1h { z0.d }, p0/z, [z1.d] +ldnt1h { z0.h }, p0/z, [x0, x0, lsl #1] +ldnt1h { z0.h }, p0/z, [x0] +ldnt1h { z0.s }, p0/z, [z1.s] +ldnt1h { z21.h }, p5/z, [x10, #7, mul vl] +ldnt1h { z23.h }, p3/z, [x13, #-8, mul vl] +ldnt1h { z31.d }, p7/z, [z31.d, x0] +ldnt1h { z31.d }, p7/z, [z31.d] +ldnt1h { z31.s }, p7/z, [z31.s, x0] +ldnt1h { z31.s }, p7/z, [z31.s] +ldnt1sb { z0.d }, p0/z, [z1.d] +ldnt1sb { z0.s }, p0/z, [z1.s] +ldnt1sb { z31.d }, p7/z, [z31.d, x0] +ldnt1sb { z31.d }, p7/z, [z31.d] +ldnt1sb { z31.s }, p7/z, [z31.s, x0] +ldnt1sb { z31.s }, p7/z, [z31.s] +ldnt1sh { z0.d }, p0/z, [z1.d] +ldnt1sh { z0.s }, p0/z, [z1.s] +ldnt1sh { z31.d }, p7/z, [z31.d, x0] +ldnt1sh { z31.d }, p7/z, [z31.d] +ldnt1sh { z31.s }, p7/z, [z31.s, x0] +ldnt1sh { z31.s }, p7/z, [z31.s] +ldnt1sw { z0.d }, p0/z, [z1.d] +ldnt1sw { z31.d }, p7/z, [z31.d, x0] +ldnt1sw { z31.d }, p7/z, [z31.d] +ldnt1w { z0.d }, p0/z, [z1.d] +ldnt1w { z0.s }, p0/z, [x0, x0, lsl #2] +ldnt1w { z0.s }, p0/z, [x0] +ldnt1w { z0.s }, p0/z, [z1.s] +ldnt1w { z21.s }, p5/z, [x10, #7, mul vl] +ldnt1w { z23.s }, p3/z, [x13, #-8, mul vl] +ldnt1w { z31.d }, p7/z, [z31.d, x0] +ldnt1w { z31.d }, p7/z, [z31.d] +ldnt1w { z31.s }, p7/z, [z31.s, x0] +ldnt1w { z31.s }, p7/z, [z31.s] +ldr p0, [x0] +ldr p5, [x10, #255, mul vl] +ldr p7, [x13, #-256, mul vl] +ldr z0, [x0] +ldr z23, [x13, #255, mul vl] +ldr z31, [sp, #-256, mul vl] +lsl z0.b, p0/m, z0.b, #0 +lsl z0.b, p0/m, z0.b, z0.b +lsl z0.b, p0/m, z0.b, z1.d +lsl z0.b, z0.b, #0 +lsl z0.b, z1.b, z2.d +lsl z0.d, p0/m, z0.d, #0 +lsl z0.d, p0/m, z0.d, z0.d +lsl z0.d, z0.d, #0 +lsl z0.h, p0/m, z0.h, #0 +lsl z0.h, p0/m, z0.h, z0.h +lsl z0.h, p0/m, z0.h, z1.d +lsl z0.h, z0.h, #0 +lsl z0.h, z1.h, z2.d +lsl z0.s, p0/m, z0.s, #0 +lsl z0.s, p0/m, z0.s, z0.s +lsl z0.s, p0/m, z0.s, z1.d +lsl z0.s, z0.s, #0 +lsl z0.s, z1.s, z2.d +lsl z31.b, p0/m, z31.b, #7 +lsl z31.b, z31.b, #7 +lsl z31.d, p0/m, z31.d, #63 +lsl z31.d, z31.d, #63 +lsl z31.h, p0/m, z31.h, #15 +lsl z31.h, z31.h, #15 +lsl z31.s, p0/m, z31.s, #31 +lsl z31.s, z31.s, #31 +lslr z0.b, p0/m, z0.b, z0.b +lslr z0.d, p0/m, z0.d, z0.d +lslr z0.h, p0/m, z0.h, z0.h +lslr z0.s, p0/m, z0.s, z0.s +lsr z0.b, p0/m, z0.b, #1 +lsr z0.b, p0/m, z0.b, z0.b +lsr z0.b, p0/m, z0.b, z1.d +lsr z0.b, z0.b, #1 +lsr z0.b, z1.b, z2.d +lsr z0.d, p0/m, z0.d, #1 +lsr z0.d, p0/m, z0.d, z0.d +lsr z0.d, z0.d, #1 +lsr z0.h, p0/m, z0.h, #1 +lsr z0.h, p0/m, z0.h, z0.h +lsr z0.h, p0/m, z0.h, z1.d +lsr z0.h, z0.h, #1 +lsr z0.h, z1.h, z2.d +lsr z0.s, p0/m, z0.s, #1 +lsr z0.s, p0/m, z0.s, z0.s +lsr z0.s, p0/m, z0.s, z1.d +lsr z0.s, z0.s, #1 +lsr z0.s, z1.s, z2.d +lsr z31.b, p0/m, z31.b, #8 +lsr z31.b, z31.b, #8 +lsr z31.d, p0/m, z31.d, #64 +lsr z31.d, z31.d, #64 +lsr z31.h, p0/m, z31.h, #16 +lsr z31.h, z31.h, #16 +lsr z31.s, p0/m, z31.s, #32 +lsr z31.s, z31.s, #32 +lsrr z0.b, p0/m, z0.b, z0.b +lsrr z0.d, p0/m, z0.d, z0.d +lsrr z0.h, p0/m, z0.h, z0.h +lsrr z0.s, p0/m, z0.s, z0.s +mad z0.b, p7/m, z1.b, z31.b +mad z0.d, p7/m, z1.d, z31.d +mad z0.h, p7/m, z1.h, z31.h +mad z0.s, p7/m, z1.s, z31.s +match p0.b, p0/z, z0.b, z0.b +match p0.h, p0/z, z0.h, z0.h +match p15.b, p7/z, z30.b, z31.b +match p15.h, p7/z, z30.h, z31.h +mla z0.b, p7/m, z1.b, z31.b +mla z0.d, p7/m, z1.d, z31.d +mla z0.d, z1.d, z7.d[1] +mla z0.h, p7/m, z1.h, z31.h +mla z0.h, z1.h, z7.h[7] +mla z0.s, p7/m, z1.s, z31.s +mla z0.s, z1.s, z7.s[3] +mls z0.b, p7/m, z1.b, z31.b +mls z0.d, p7/m, z1.d, z31.d +mls z0.d, z1.d, z7.d[1] +mls z0.h, p7/m, z1.h, z31.h +mls z0.h, z1.h, z7.h[7] +mls z0.s, p7/m, z1.s, z31.s +mls z0.s, z1.s, z7.s[3] +mov p0.b, p0.b +mov p0.b, p0/m, p0.b +mov p0.b, p0/z, p0.b +mov p15.b, p15.b +mov p15.b, p15/m, p15.b +mov p15.b, p15/z, p15.b +mov z0.b, #127 +mov z0.b, b0 +mov z0.b, p0/m, b0 +mov z0.b, p0/m, w0 +mov z0.b, p0/z, #127 +mov z0.b, w0 +mov z0.d, #0 +mov z0.d, #0xe0000000000003ff +mov z0.d, #0xffffffffffff7fff +mov z0.d, #32768 +mov z0.d, d0 +mov z0.d, p0/m, d0 +mov z0.d, p0/m, x0 +mov z0.d, x0 +mov z0.d, z0.d +mov z0.h, #-256 +mov z0.h, #-32768 +mov z0.h, #0 +mov z0.h, #32512 +mov z0.h, #32767 +mov z0.h, h0 +mov z0.h, p0/m, h0 +mov z0.h, p0/m, w0 +mov z0.h, p0/z, #32512 +mov z0.h, w0 +mov z0.q, q0 +mov z0.s, #0 +mov z0.s, #0xffff7fff +mov z0.s, #32768 +mov z0.s, p0/m, s0 +mov z0.s, p0/m, w0 +mov z0.s, s0 +mov z0.s, w0 +mov z21.d, #-128 +mov z21.d, #-32768 +mov z21.d, #127 +mov z21.d, #32512 +mov z21.d, p0/z, #-128 +mov z21.d, p0/z, #-32768 +mov z21.d, p0/z, #127 +mov z21.d, p0/z, #32512 +mov z21.d, p15/m, #-128 +mov z21.d, p15/m, #-32768 +mov z21.h, #-128 +mov z21.h, #-32768 +mov z21.h, #127 +mov z21.h, #32512 +mov z21.h, p0/z, #-128 +mov z21.h, p0/z, #-32768 +mov z21.h, p0/z, #127 +mov z21.h, p0/z, #32512 +mov z21.h, p15/m, #-128 +mov z21.h, p15/m, #-32768 +mov z21.s, #-128 +mov z21.s, #-32768 +mov z21.s, #127 +mov z21.s, #32512 +mov z21.s, p0/z, #-128 +mov z21.s, p0/z, #-32768 +mov z21.s, p0/z, #127 +mov z21.s, p0/z, #32512 +mov z21.s, p15/m, #-128 +mov z21.s, p15/m, #-32768 +mov z31.b, p15/m, z31.b +mov z31.b, p7/m, b31 +movprfx z31, z6 +mov z31.b, p7/m, wsp +mov z31.b, wsp +mov z31.b, z31.b[63] +mov z31.d, p15/m, z31.d +mov z31.d, p7/m, d31 +movprfx z31.d, p7/z, z6.d +mov z31.d, p7/m, sp +mov z31.d, sp +mov z31.d, z0.d +mov z31.d, z31.d[7] +mov z31.h, p15/m, z31.h +mov z31.h, p7/m, h31 +mov z31.h, p7/m, wsp +mov z31.h, wsp +mov z31.h, z31.h[31] +mov z31.s, p15/m, z31.s +mov z31.s, p7/m, s31 +mov z31.s, p7/m, wsp +mov z31.s, wsp +mov z31.s, z31.s[15] +mov z5.b, #-1 +mov z5.b, #-128 +mov z5.b, #127 +mov z5.b, p0/z, #-1 +mov z5.b, p0/z, #-128 +mov z5.b, p0/z, #127 +mov z5.b, p15/m, #-128 +mov z5.d, #-6 +mov z5.h, #-6 +mov z5.q, z17.q[3] +mov z5.s, #-6 +movs p0.b, p0.b +movs p0.b, p0/z, p0.b +movs p15.b, p15.b +movs p15.b, p15/z, p15.b +mrs x3, ID_AA64ZFR0_EL1 +mrs x3, ZCR_EL1 +mrs x3, ZCR_EL12 +mrs x3, ZCR_EL2 +mrs x3, ZCR_EL3 +msb z0.b, p7/m, z1.b, z31.b +msb z0.d, p7/m, z1.d, z31.d +msb z0.h, p7/m, z1.h, z31.h +msb z0.s, p7/m, z1.s, z31.s +msr ZCR_EL1, x3 +msr ZCR_EL12, x3 +msr ZCR_EL2, x3 +msr ZCR_EL3, x3 +mul z0.b, p7/m, z0.b, z31.b +mul z0.b, z1.b, z2.b +mul z0.d, p7/m, z0.d, z31.d +mul z0.d, z1.d, z15.d[1] +mul z0.h, p7/m, z0.h, z31.h +mul z0.h, z1.h, z2.h +mul z0.h, z1.h, z7.h[7] +mul z0.s, p7/m, z0.s, z31.s +mul z0.s, z1.s, z7.s[3] +mul z29.s, z30.s, z31.s +mul z31.b, z31.b, #-128 +mul z31.b, z31.b, #127 +mul z31.d, z31.d, #-128 +mul z31.d, z31.d, #127 +mul z31.d, z31.d, z31.d +mul z31.h, z31.h, #-128 +mul z31.h, z31.h, #127 +mul z31.s, z31.s, #-128 +mul z31.s, z31.s, #127 +nand p0.b, p0/z, p0.b, p0.b +nand p15.b, p15/z, p15.b, p15.b +nands p0.b, p0/z, p0.b, p0.b +nands p15.b, p15/z, p15.b, p15.b +nbsl z0.d, z0.d, z1.d, z2.d +neg z0.b, p0/m, z0.b +neg z0.d, p0/m, z0.d +neg z0.h, p0/m, z0.h +neg z0.s, p0/m, z0.s +neg z31.b, p7/m, z31.b +neg z31.d, p7/m, z31.d +neg z31.h, p7/m, z31.h +neg z31.s, p7/m, z31.s +nmatch p0.b, p0/z, z0.b, z0.b +nmatch p0.h, p0/z, z0.h, z0.h +nmatch p15.b, p7/z, z30.b, z31.b +nmatch p15.h, p7/z, z30.h, z31.h +nor p0.b, p0/z, p0.b, p0.b +nor p15.b, p15/z, p15.b, p15.b +nors p0.b, p0/z, p0.b, p0.b +nors p15.b, p15/z, p15.b, p15.b +not p0.b, p0/z, p0.b +not p15.b, p15/z, p15.b +not z31.b, p7/m, z31.b +not z31.d, p7/m, z31.d +not z31.h, p7/m, z31.h +not z31.s, p7/m, z31.s +nots p0.b, p0/z, p0.b +nots p15.b, p15/z, p15.b +orn p0.b, p0/z, p0.b, p0.b +orn p15.b, p15/z, p15.b, p15.b +orns p0.b, p0/z, p0.b, p0.b +orns p15.b, p15/z, p15.b, p15.b +orr p0.b, p0/z, p0.b, p1.b +orr z0.d, z0.d, #0x6 +orr z0.d, z0.d, #0xfffffffffffffff9 +orr z0.s, z0.s, #0x6 +orr z0.s, z0.s, #0xfffffff9 +orr z23.d, z13.d, z8.d +orr z23.h, z23.h, #0x6 +orr z23.h, z23.h, #0xfff9 +orr z31.b, p7/m, z31.b, z31.b +orr z31.d, p7/m, z31.d, z31.d +orr z31.h, p7/m, z31.h, z31.h +orr z31.s, p7/m, z31.s, z31.s +orr z5.b, z5.b, #0x6 +orr z5.b, z5.b, #0xf9 +orrs p0.b, p0/z, p0.b, p1.b +orv b0, p7, z31.b +orv d0, p7, z31.d +orv h0, p7, z31.h +orv s0, p7, z31.s +pfalse p15.b +pfirst p0.b, p15, p0.b +pfirst p15.b, p15, p15.b +pmul z0.b, z1.b, z2.b +pmul z29.b, z30.b, z31.b +pmullb z0.h, z1.b, z2.b +pmullb z29.q, z30.d, z31.d +pmullb z31.d, z31.s, z31.s +pmullt z0.h, z1.b, z2.b +pmullt z29.q, z30.d, z31.d +pmullt z31.d, z31.s, z31.s +pnext p0.b, p15, p0.b +pnext p0.d, p15, p0.d +pnext p0.h, p15, p0.h +pnext p0.s, p15, p0.s +pnext p15.b, p15, p15.b +prfb #14, p0, [x0] +prfb #15, p0, [x0] +prfb #6, p0, [x0] +prfb #7, p0, [x0] +prfb #7, p3, [z13.s, #31] +prfb #7, p3, [z13.s] +prfb pldl1keep, p0, [x0, z0.d, uxtw] +prfb pldl1keep, p0, [x0, z0.d] +prfb pldl1keep, p0, [x0, z0.s, uxtw] +prfb pldl1keep, p0, [x0] +prfb pldl1strm, p0, [x0, #-32, mul vl] +prfb pldl1strm, p0, [x0, #31, mul vl] +prfb pldl1strm, p0, [x0] +prfb pldl2keep, p0, [x0] +prfb pldl2strm, p0, [x0] +prfb pldl3keep, p0, [x0] +prfb pldl3strm, p0, [x0] +prfb pldl3strm, p5, [x10, z21.d, sxtw] +prfb pldl3strm, p5, [x10, z21.s, uxtw] +prfb pldl3strm, p5, [z10.d, #31] +prfb pldl3strm, p5, [z10.d] +prfb pstl1keep, p0, [x0] +prfb pstl1strm, p0, [x0] +prfb pstl2keep, p0, [x0] +prfb pstl2strm, p0, [x0] +prfb pstl3keep, p0, [x0] +prfb pstl3strm, p0, [x0] +prfd #14, p0, [x0] +prfd #15, p0, [x0] +prfd #15, p7, [z31.d, #248] +prfd #15, p7, [z31.d] +prfd #15, p7, [z31.s, #248] +prfd #15, p7, [z31.s] +prfd #6, p0, [x0] +prfd #7, p0, [x0] +prfd pldl1keep, p0, [x0, z0.d, lsl #3] +prfd pldl1keep, p0, [x0, z0.d, sxtw #3] +prfd pldl1keep, p0, [x0, z0.d, uxtw #3] +prfd pldl1keep, p0, [x0, z0.s, sxtw #3] +prfd pldl1keep, p0, [x0, z0.s, uxtw #3] +prfd pldl1keep, p0, [x0] +prfd pldl1strm, p0, [x0, #-32, mul vl] +prfd pldl1strm, p0, [x0, #31, mul vl] +prfd pldl1strm, p0, [x0] +prfd pldl2keep, p0, [x0] +prfd pldl2strm, p0, [x0] +prfd pldl3keep, p0, [x0] +prfd pldl3strm, p0, [x0] +prfd pstl1keep, p0, [x0] +prfd pstl1strm, p0, [x0] +prfd pstl2keep, p0, [x0] +prfd pstl2strm, p0, [x0] +prfd pstl3keep, p0, [x0] +prfd pstl3strm, p0, [x0] +prfh #14, p0, [x0] +prfh #15, p0, [x0] +prfh #15, p7, [z31.d, #62] +prfh #15, p7, [z31.d] +prfh #15, p7, [z31.s, #62] +prfh #15, p7, [z31.s] +prfh #6, p0, [x0] +prfh #7, p0, [x0] +prfh pldl1keep, p0, [x0, z0.d, lsl #1] +prfh pldl1keep, p0, [x0] +prfh pldl1strm, p0, [x0, #-32, mul vl] +prfh pldl1strm, p0, [x0, #31, mul vl] +prfh pldl1strm, p0, [x0] +prfh pldl2keep, p0, [x0] +prfh pldl2strm, p0, [x0] +prfh pldl3keep, p0, [x0] +prfh pldl3strm, p0, [x0] +prfh pldl3strm, p5, [x10, z21.d, sxtw #1] +prfh pldl3strm, p5, [x10, z21.d, uxtw #1] +prfh pldl3strm, p5, [x10, z21.s, sxtw #1] +prfh pldl3strm, p5, [x10, z21.s, uxtw #1] +prfh pstl1keep, p0, [x0] +prfh pstl1strm, p0, [x0] +prfh pstl2keep, p0, [x0] +prfh pstl2strm, p0, [x0] +prfh pstl3keep, p0, [x0] +prfh pstl3strm, p0, [x0] +prfw #14, p0, [x0] +prfw #15, p0, [x0] +prfw #15, p7, [z31.d, #124] +prfw #15, p7, [z31.d] +prfw #15, p7, [z31.s, #124] +prfw #15, p7, [z31.s] +prfw #6, p0, [x0] +prfw #7, p0, [x0] +prfw #7, p3, [x13, z8.d, uxtw #2] +prfw pldl1keep, p0, [x0, z0.d, sxtw #2] +prfw pldl1keep, p0, [x0, z0.s, uxtw #2] +prfw pldl1keep, p0, [x0] +prfw pldl1strm, p0, [x0, #-32, mul vl] +prfw pldl1strm, p0, [x0, #31, mul vl] +prfw pldl1strm, p0, [x0] +prfw pldl2keep, p0, [x0] +prfw pldl2strm, p0, [x0] +prfw pldl3keep, p0, [x0] +prfw pldl3strm, p0, [x0] +prfw pldl3strm, p5, [x10, z21.d, lsl #2] +prfw pldl3strm, p5, [x10, z21.s, sxtw #2] +prfw pstl1keep, p0, [x0] +prfw pstl1strm, p0, [x0] +prfw pstl2keep, p0, [x0] +prfw pstl2strm, p0, [x0] +prfw pstl3keep, p0, [x0] +prfw pstl3strm, p0, [x0] +ptest p15, p0.b +ptest p15, p15.b +ptrue p0.b, pow2 +ptrue p0.d, pow2 +ptrue p0.h, pow2 +ptrue p0.s, pow2 +ptrue p15.b +ptrue p15.d +ptrue p15.h +ptrue p15.s +ptrue p7.s +ptrue p7.s, #14 +ptrue p7.s, #15 +ptrue p7.s, #16 +ptrue p7.s, #17 +ptrue p7.s, #18 +ptrue p7.s, #19 +ptrue p7.s, #20 +ptrue p7.s, #21 +ptrue p7.s, #22 +ptrue p7.s, #23 +ptrue p7.s, #24 +ptrue p7.s, #25 +ptrue p7.s, #26 +ptrue p7.s, #27 +ptrue p7.s, #28 +ptrue p7.s, mul3 +ptrue p7.s, mul4 +ptrue p7.s, vl1 +ptrue p7.s, vl128 +ptrue p7.s, vl16 +ptrue p7.s, vl2 +ptrue p7.s, vl256 +ptrue p7.s, vl3 +ptrue p7.s, vl32 +ptrue p7.s, vl4 +ptrue p7.s, vl5 +ptrue p7.s, vl6 +ptrue p7.s, vl64 +ptrue p7.s, vl7 +ptrue p7.s, vl8 +ptrues p0.b, pow2 +ptrues p0.d, pow2 +ptrues p0.h, pow2 +ptrues p0.s, pow2 +ptrues p15.b +ptrues p15.d +ptrues p15.h +ptrues p15.s +ptrues p7.s +ptrues p7.s, #14 +ptrues p7.s, #15 +ptrues p7.s, #16 +ptrues p7.s, #17 +ptrues p7.s, #18 +ptrues p7.s, #19 +ptrues p7.s, #20 +ptrues p7.s, #21 +ptrues p7.s, #22 +ptrues p7.s, #23 +ptrues p7.s, #24 +ptrues p7.s, #25 +ptrues p7.s, #26 +ptrues p7.s, #27 +ptrues p7.s, #28 +ptrues p7.s, mul3 +ptrues p7.s, mul4 +ptrues p7.s, vl1 +ptrues p7.s, vl128 +ptrues p7.s, vl16 +ptrues p7.s, vl2 +ptrues p7.s, vl256 +ptrues p7.s, vl3 +ptrues p7.s, vl32 +ptrues p7.s, vl4 +ptrues p7.s, vl5 +ptrues p7.s, vl6 +ptrues p7.s, vl64 +ptrues p7.s, vl7 +ptrues p7.s, vl8 +punpkhi p0.h, p0.b +punpkhi p15.h, p15.b +punpklo p0.h, p0.b +punpklo p15.h, p15.b +raddhnb z0.b, z1.h, z31.h +raddhnb z0.h, z1.s, z31.s +raddhnb z0.s, z1.d, z31.d +raddhnt z0.b, z1.h, z31.h +raddhnt z0.h, z1.s, z31.s +raddhnt z0.s, z1.d, z31.d +rax1 z0.d, z1.d, z31.d +rbit z0.b, p7/m, z31.b +rbit z0.d, p7/m, z31.d +rbit z0.h, p7/m, z31.h +rbit z0.s, p7/m, z31.s +rdffr p0.b +rdffr p0.b, p0/z +rdffr p15.b +rdffr p15.b, p15/z +rdffrs p0.b, p0/z +rdffrs p15.b, p15/z +rdvl x0, #0 +rdvl x21, #-32 +rdvl x23, #31 +rdvl xzr, #-1 +rev z0.b, z31.b +rev z0.d, z31.d +rev z0.h, z31.h +rev z0.s, z31.s +revb z0.d, p7/m, z31.d +revb z0.h, p7/m, z31.h +revb z0.s, p7/m, z31.s +revh z0.d, p7/m, z31.d +revh z0.s, p7/m, z31.s +revw z0.d, p7/m, z31.d +rshrnb z0.b, z0.h, #1 +rshrnb z0.h, z0.s, #1 +rshrnb z0.s, z0.d, #1 +rshrnb z31.b, z31.h, #8 +rshrnb z31.h, z31.s, #16 +rshrnb z31.s, z31.d, #32 +rshrnt z0.b, z0.h, #1 +rshrnt z0.h, z0.s, #1 +rshrnt z0.s, z0.d, #1 +rshrnt z31.b, z31.h, #8 +rshrnt z31.h, z31.s, #16 +rshrnt z31.s, z31.d, #32 +rsubhnb z0.b, z1.h, z31.h +rsubhnb z0.h, z1.s, z31.s +rsubhnb z0.s, z1.d, z31.d +rsubhnt z0.b, z1.h, z31.h +rsubhnt z0.h, z1.s, z31.s +rsubhnt z0.s, z1.d, z31.d +saba z0.b, z1.b, z31.b +saba z0.d, z1.d, z31.d +saba z0.h, z1.h, z31.h +saba z0.s, z1.s, z31.s +sabalb z0.d, z1.s, z31.s +sabalb z0.h, z1.b, z31.b +sabalb z0.s, z1.h, z31.h +sabalt z0.d, z1.s, z31.s +sabalt z0.h, z1.b, z31.b +sabalt z0.s, z1.h, z31.h +sabd z31.b, p7/m, z31.b, z31.b +sabd z31.d, p7/m, z31.d, z31.d +sabd z31.h, p7/m, z31.h, z31.h +sabd z31.s, p7/m, z31.s, z31.s +sabdlb z0.h, z1.b, z2.b +sabdlb z29.s, z30.h, z31.h +sabdlb z31.d, z31.s, z31.s +sabdlt z0.h, z1.b, z2.b +sabdlt z29.s, z30.h, z31.h +sabdlt z31.d, z31.s, z31.s +sadalp z0.h, p0/m, z1.b +sadalp z29.s, p0/m, z30.h +sadalp z30.d, p7/m, z31.s +saddlb z0.h, z1.b, z2.b +saddlb z29.s, z30.h, z31.h +saddlb z31.d, z31.s, z31.s +saddlbt z0.d, z1.s, z31.s +saddlbt z0.h, z1.b, z31.b +saddlbt z0.s, z1.h, z31.h +saddlt z0.h, z1.b, z2.b +saddlt z29.s, z30.h, z31.h +saddlt z31.d, z31.s, z31.s +saddv d0, p7, z31.b +saddv d0, p7, z31.h +saddv d0, p7, z31.s +saddwb z0.h, z1.h, z2.b +saddwb z29.s, z30.s, z31.h +saddwb z31.d, z31.d, z31.s +saddwt z0.h, z1.h, z2.b +saddwt z29.s, z30.s, z31.h +saddwt z31.d, z31.d, z31.s +sbclb z0.d, z1.d, z31.d +sbclb z0.s, z1.s, z31.s +sbclt z0.d, z1.d, z31.d +sbclt z0.s, z1.s, z31.s +scvtf z0.d, p0/m, z0.d +scvtf z0.d, p0/m, z0.s +scvtf z0.h, p0/m, z0.d +scvtf z0.h, p0/m, z0.h +scvtf z0.h, p0/m, z0.s +scvtf z0.s, p0/m, z0.d +scvtf z0.s, p0/m, z0.s +sdiv z0.d, p7/m, z0.d, z31.d +sdiv z0.s, p7/m, z0.s, z31.s +sdivr z0.d, p7/m, z0.d, z31.d +sdivr z0.s, p7/m, z0.s, z31.s +sdot z0.d, z1.h, z15.h[1] +sdot z0.d, z1.h, z31.h +sdot z0.s, z1.b, z31.b +sdot z0.s, z1.b, z7.b[3] +sel z23.b, p11, z13.b, z8.b +sel z23.d, p11, z13.d, z8.d +sel z23.h, p11, z13.h, z8.h +sel z23.s, p11, z13.s, z8.s +setffr +shadd z0.b, p0/m, z0.b, z1.b +shadd z0.h, p0/m, z0.h, z1.h +shadd z29.s, p7/m, z29.s, z30.s +shadd z31.d, p7/m, z31.d, z30.d +shrnb z0.b, z0.h, #1 +shrnb z0.h, z0.s, #1 +shrnb z0.s, z0.d, #1 +shrnb z31.b, z31.h, #8 +shrnb z31.h, z31.s, #16 +shrnb z31.s, z31.d, #32 +shrnt z0.b, z0.h, #1 +shrnt z0.h, z0.s, #1 +shrnt z0.s, z0.d, #1 +shrnt z31.b, z31.h, #8 +shrnt z31.h, z31.s, #16 +shrnt z31.s, z31.d, #32 +shsub z0.b, p0/m, z0.b, z1.b +shsub z0.h, p0/m, z0.h, z1.h +shsub z29.s, p7/m, z29.s, z30.s +shsub z31.d, p7/m, z31.d, z30.d +shsubr z0.b, p0/m, z0.b, z1.b +shsubr z0.h, p0/m, z0.h, z1.h +shsubr z29.s, p7/m, z29.s, z30.s +shsubr z31.d, p7/m, z31.d, z30.d +sli z0.b, z0.b, #0 +sli z0.d, z0.d, #0 +sli z0.h, z0.h, #0 +sli z0.s, z0.s, #0 +sli z31.b, z31.b, #7 +sli z31.d, z31.d, #63 +sli z31.h, z31.h, #15 +sli z31.s, z31.s, #31 +sm4e z0.s, z0.s, z31.s +sm4ekey z0.s, z1.s, z31.s +smax z0.b, z0.b, #-128 +smax z0.d, z0.d, #-128 +smax z0.h, z0.h, #-128 +smax z0.s, z0.s, #-128 +smax z31.b, p7/m, z31.b, z31.b +smax z31.b, z31.b, #127 +smax z31.d, p7/m, z31.d, z31.d +smax z31.d, z31.d, #127 +smax z31.h, p7/m, z31.h, z31.h +smax z31.h, z31.h, #127 +smax z31.s, p7/m, z31.s, z31.s +smax z31.s, z31.s, #127 +smaxp z0.b, p0/m, z0.b, z1.b +smaxp z0.h, p0/m, z0.h, z1.h +smaxp z29.s, p7/m, z29.s, z30.s +smaxp z31.d, p7/m, z31.d, z30.d +smaxv b0, p7, z31.b +smaxv d0, p7, z31.d +smaxv h0, p7, z31.h +smaxv s0, p7, z31.s +smin z0.b, z0.b, #-128 +smin z0.d, z0.d, #-128 +smin z0.h, z0.h, #-128 +smin z0.s, z0.s, #-128 +smin z31.b, p7/m, z31.b, z31.b +smin z31.b, z31.b, #127 +smin z31.d, p7/m, z31.d, z31.d +smin z31.d, z31.d, #127 +smin z31.h, p7/m, z31.h, z31.h +smin z31.h, z31.h, #127 +smin z31.s, p7/m, z31.s, z31.s +smin z31.s, z31.s, #127 +sminp z0.b, p0/m, z0.b, z1.b +sminp z0.h, p0/m, z0.h, z1.h +sminp z29.s, p7/m, z29.s, z30.s +sminp z31.d, p7/m, z31.d, z30.d +sminv b0, p7, z31.b +sminv d0, p7, z31.d +sminv h0, p7, z31.h +sminv s0, p7, z31.s +smlalb z0.d, z1.s, z15.s[1] +smlalb z0.d, z1.s, z31.s +smlalb z0.h, z1.b, z31.b +smlalb z0.s, z1.h, z31.h +smlalb z0.s, z1.h, z7.h[7] +smlalt z0.d, z1.s, z15.s[1] +smlalt z0.d, z1.s, z31.s +smlalt z0.h, z1.b, z31.b +smlalt z0.s, z1.h, z31.h +smlalt z0.s, z1.h, z7.h[7] +smlslb z0.d, z1.s, z15.s[1] +smlslb z0.d, z1.s, z31.s +smlslb z0.h, z1.b, z31.b +smlslb z0.s, z1.h, z31.h +smlslb z0.s, z1.h, z7.h[7] +smlslt z0.d, z1.s, z15.s[1] +smlslt z0.d, z1.s, z31.s +smlslt z0.h, z1.b, z31.b +smlslt z0.s, z1.h, z31.h +smlslt z0.s, z1.h, z7.h[7] +smmla z0.s, z1.b, z2.b +smulh z0.b, p7/m, z0.b, z31.b +smulh z0.b, z1.b, z2.b +smulh z0.d, p7/m, z0.d, z31.d +smulh z0.h, p7/m, z0.h, z31.h +smulh z0.h, z1.h, z2.h +smulh z0.s, p7/m, z0.s, z31.s +smulh z29.s, z30.s, z31.s +smulh z31.d, z31.d, z31.d +smullb z0.d, z1.s, z15.s[1] +smullb z0.h, z1.b, z2.b +smullb z0.s, z1.h, z7.h[7] +smullb z29.s, z30.h, z31.h +smullb z31.d, z31.s, z31.s +smullt z0.d, z1.s, z15.s[1] +smullt z0.h, z1.b, z2.b +smullt z0.s, z1.h, z7.h[7] +smullt z29.s, z30.h, z31.h +smullt z31.d, z31.s, z31.s +splice z29.b, p7, { z30.b, z31.b } +splice z29.d, p7, { z30.d, z31.d } +splice z29.h, p7, { z30.h, z31.h } +splice z29.s, p7, { z30.s, z31.s } +splice z31.b, p7, z31.b, z31.b +splice z31.d, p7, z31.d, z31.d +splice z31.h, p7, z31.h, z31.h +splice z31.s, p7, z31.s, z31.s +sqabs z31.b, p7/m, z31.b +sqabs z31.d, p7/m, z31.d +sqabs z31.h, p7/m, z31.h +sqabs z31.s, p7/m, z31.s +sqadd z0.b, p0/m, z0.b, z1.b +sqadd z0.b, z0.b, #0 +sqadd z0.b, z0.b, z0.b +sqadd z0.d, z0.d, #0 +sqadd z0.d, z0.d, #0, lsl #8 +sqadd z0.d, z0.d, z0.d +sqadd z0.h, p0/m, z0.h, z1.h +sqadd z0.h, z0.h, #0 +sqadd z0.h, z0.h, #0, lsl #8 +sqadd z0.h, z0.h, z0.h +sqadd z0.s, z0.s, #0 +sqadd z0.s, z0.s, #0, lsl #8 +sqadd z0.s, z0.s, z0.s +sqadd z29.s, p7/m, z29.s, z30.s +sqadd z31.b, z31.b, #255 +sqadd z31.d, p7/m, z31.d, z30.d +sqadd z31.d, z31.d, #65280 +sqadd z31.h, z31.h, #65280 +sqadd z31.s, z31.s, #65280 +sqcadd z0.b, z0.b, z0.b, #90 +sqcadd z0.d, z0.d, z0.d, #90 +sqcadd z0.h, z0.h, z0.h, #90 +sqcadd z0.s, z0.s, z0.s, #90 +sqcadd z31.b, z31.b, z31.b, #270 +sqcadd z31.d, z31.d, z31.d, #270 +sqcadd z31.h, z31.h, z31.h, #270 +sqcadd z31.s, z31.s, z31.s, #270 +sqdecb x0 +sqdecb x0, #14 +sqdecb x0, all, mul #16 +sqdecb x0, pow2 +sqdecb x0, vl1 +sqdecb x0, w0 +sqdecb x0, w0, all, mul #16 +sqdecb x0, w0, pow2 +sqdecb x0, w0, pow2, mul #16 +sqdecd x0 +sqdecd x0, #14 +sqdecd x0, all, mul #16 +sqdecd x0, pow2 +sqdecd x0, vl1 +sqdecd x0, w0 +sqdecd x0, w0, all, mul #16 +sqdecd x0, w0, pow2 +sqdecd x0, w0, pow2, mul #16 +sqdecd z0.d +sqdecd z0.d, all, mul #16 +sqdecd z0.d, pow2 +sqdecd z0.d, pow2, mul #16 +sqdech x0 +sqdech x0, #14 +sqdech x0, all, mul #16 +sqdech x0, pow2 +sqdech x0, vl1 +sqdech x0, w0 +sqdech x0, w0, all, mul #16 +sqdech x0, w0, pow2 +sqdech x0, w0, pow2, mul #16 +sqdech z0.h +sqdech z0.h, all, mul #16 +sqdech z0.h, pow2 +sqdech z0.h, pow2, mul #16 +sqdecp x0, p0.b +sqdecp x0, p0.d +sqdecp x0, p0.h +sqdecp x0, p0.s +sqdecp xzr, p15.b, wzr +sqdecp xzr, p15.d, wzr +sqdecp xzr, p15.h, wzr +sqdecp xzr, p15.s, wzr +sqdecp z0.d, p0.d +sqdecp z0.h, p0.h +sqdecp z0.s, p0.s +sqdecw x0 +sqdecw x0, #14 +sqdecw x0, all, mul #16 +sqdecw x0, pow2 +sqdecw x0, vl1 +sqdecw x0, w0 +sqdecw x0, w0, all, mul #16 +sqdecw x0, w0, pow2 +sqdecw x0, w0, pow2, mul #16 +sqdecw z0.s +sqdecw z0.s, all, mul #16 +sqdecw z0.s, pow2 +sqdecw z0.s, pow2, mul #16 +sqdmlalb z0.d, z1.s, z15.s[3] +sqdmlalb z0.d, z1.s, z31.s +sqdmlalb z0.h, z1.b, z31.b +sqdmlalb z0.s, z1.h, z31.h +sqdmlalb z0.s, z1.h, z7.h[7] +sqdmlalbt z0.d, z1.s, z31.s +sqdmlalbt z0.h, z1.b, z31.b +sqdmlalbt z0.s, z1.h, z31.h +sqdmlalt z0.d, z1.s, z15.s[3] +sqdmlalt z0.d, z1.s, z31.s +sqdmlalt z0.h, z1.b, z31.b +sqdmlalt z0.s, z1.h, z31.h +sqdmlalt z0.s, z1.h, z7.h[7] +sqdmlslb z0.d, z1.s, z15.s[3] +sqdmlslb z0.d, z1.s, z31.s +sqdmlslb z0.h, z1.b, z31.b +sqdmlslb z0.s, z1.h, z31.h +sqdmlslb z0.s, z1.h, z7.h[7] +sqdmlslbt z0.d, z1.s, z31.s +sqdmlslbt z0.h, z1.b, z31.b +sqdmlslbt z0.s, z1.h, z31.h +sqdmlslt z0.d, z1.s, z15.s[3] +sqdmlslt z0.d, z1.s, z31.s +sqdmlslt z0.h, z1.b, z31.b +sqdmlslt z0.s, z1.h, z31.h +sqdmlslt z0.s, z1.h, z7.h[7] +sqdmulh z0.b, z1.b, z2.b +sqdmulh z0.d, z1.d, z15.d[1] +sqdmulh z0.h, z1.h, z2.h +sqdmulh z0.h, z1.h, z7.h[7] +sqdmulh z0.s, z1.s, z7.s[3] +sqdmulh z29.s, z30.s, z31.s +sqdmulh z31.d, z31.d, z31.d +sqdmullb z0.d, z1.s, z15.s[1] +sqdmullb z0.h, z1.b, z2.b +sqdmullb z0.s, z1.h, z7.h[7] +sqdmullb z29.s, z30.h, z31.h +sqdmullb z31.d, z31.s, z31.s +sqdmullt z0.d, z1.s, z15.s[1] +sqdmullt z0.h, z1.b, z2.b +sqdmullt z0.s, z1.h, z7.h[7] +sqdmullt z29.s, z30.h, z31.h +sqdmullt z31.d, z31.s, z31.s +sqincb x0 +sqincb x0, #14 +sqincb x0, all, mul #16 +sqincb x0, pow2 +sqincb x0, vl1 +sqincb x0, w0 +sqincb x0, w0, all, mul #16 +sqincb x0, w0, pow2 +sqincb x0, w0, pow2, mul #16 +sqincd x0 +sqincd x0, #14 +sqincd x0, all, mul #16 +sqincd x0, pow2 +sqincd x0, vl1 +sqincd x0, w0 +sqincd x0, w0, all, mul #16 +sqincd x0, w0, pow2 +sqincd x0, w0, pow2, mul #16 +sqincd z0.d +sqincd z0.d, all, mul #16 +sqincd z0.d, pow2 +sqincd z0.d, pow2, mul #16 +sqinch x0 +sqinch x0, #14 +sqinch x0, all, mul #16 +sqinch x0, pow2 +sqinch x0, vl1 +sqinch x0, w0 +sqinch x0, w0, all, mul #16 +sqinch x0, w0, pow2 +sqinch x0, w0, pow2, mul #16 +sqinch z0.h +sqinch z0.h, all, mul #16 +sqinch z0.h, pow2 +sqinch z0.h, pow2, mul #16 +sqincp x0, p0.b +sqincp x0, p0.d +sqincp x0, p0.h +sqincp x0, p0.s +sqincp xzr, p15.b, wzr +sqincp xzr, p15.d, wzr +sqincp xzr, p15.h, wzr +sqincp xzr, p15.s, wzr +sqincp z0.d, p0.d +sqincp z0.h, p0.h +sqincp z0.s, p0.s +sqincw x0 +sqincw x0, #14 +sqincw x0, all, mul #16 +sqincw x0, pow2 +sqincw x0, vl1 +sqincw x0, w0 +sqincw x0, w0, all, mul #16 +sqincw x0, w0, pow2 +sqincw x0, w0, pow2, mul #16 +sqincw z0.s +sqincw z0.s, all, mul #16 +sqincw z0.s, pow2 +sqincw z0.s, pow2, mul #16 +sqneg z31.b, p7/m, z31.b +sqneg z31.d, p7/m, z31.d +sqneg z31.h, p7/m, z31.h +sqneg z31.s, p7/m, z31.s +sqrdcmlah z0.b, z1.b, z2.b, #0 +sqrdcmlah z0.d, z1.d, z2.d, #0 +sqrdcmlah z0.h, z1.h, z2.h, #0 +sqrdcmlah z0.h, z1.h, z2.h[0], #0 +sqrdcmlah z0.s, z1.s, z2.s, #0 +sqrdcmlah z0.s, z1.s, z2.s[0], #0 +sqrdcmlah z15.b, z16.b, z17.b, #270 +sqrdcmlah z15.d, z16.d, z17.d, #270 +sqrdcmlah z15.h, z16.h, z17.h, #270 +sqrdcmlah z15.s, z16.s, z17.s, #270 +sqrdcmlah z29.b, z30.b, z31.b, #90 +sqrdcmlah z29.d, z30.d, z31.d, #90 +sqrdcmlah z29.h, z30.h, z31.h, #90 +sqrdcmlah z29.s, z30.s, z31.s, #90 +sqrdcmlah z31.b, z31.b, z31.b, #180 +sqrdcmlah z31.d, z31.d, z31.d, #180 +sqrdcmlah z31.h, z30.h, z7.h[0], #180 +sqrdcmlah z31.h, z31.h, z31.h, #180 +sqrdcmlah z31.s, z30.s, z7.s[0], #180 +sqrdcmlah z31.s, z31.s, z31.s, #180 +sqrdmlah z0.b, z1.b, z31.b +sqrdmlah z0.d, z1.d, z15.d[1] +sqrdmlah z0.d, z1.d, z31.d +sqrdmlah z0.h, z1.h, z31.h +sqrdmlah z0.h, z1.h, z7.h[7] +sqrdmlah z0.s, z1.s, z31.s +sqrdmlah z0.s, z1.s, z7.s[3] +sqrdmlsh z0.b, z1.b, z31.b +sqrdmlsh z0.d, z1.d, z15.d[1] +sqrdmlsh z0.d, z1.d, z31.d +sqrdmlsh z0.h, z1.h, z31.h +sqrdmlsh z0.h, z1.h, z7.h[7] +sqrdmlsh z0.s, z1.s, z31.s +sqrdmlsh z0.s, z1.s, z7.s[3] +sqrdmulh z0.b, z1.b, z2.b +sqrdmulh z0.d, z1.d, z15.d[1] +sqrdmulh z0.h, z1.h, z2.h +sqrdmulh z0.h, z1.h, z7.h[7] +sqrdmulh z0.s, z1.s, z7.s[3] +sqrdmulh z29.s, z30.s, z31.s +sqrdmulh z31.d, z31.d, z31.d +sqrshl z0.b, p0/m, z0.b, z1.b +sqrshl z0.h, p0/m, z0.h, z1.h +sqrshl z29.s, p7/m, z29.s, z30.s +sqrshl z31.d, p7/m, z31.d, z30.d +sqrshlr z0.b, p0/m, z0.b, z1.b +sqrshlr z0.h, p0/m, z0.h, z1.h +sqrshlr z29.s, p7/m, z29.s, z30.s +sqrshlr z31.d, p7/m, z31.d, z30.d +sqrshrnb z0.b, z0.h, #1 +sqrshrnb z0.h, z0.s, #1 +sqrshrnb z0.s, z0.d, #1 +sqrshrnb z31.b, z31.h, #8 +sqrshrnb z31.h, z31.s, #16 +sqrshrnb z31.s, z31.d, #32 +sqrshrnt z0.b, z0.h, #1 +sqrshrnt z0.h, z0.s, #1 +sqrshrnt z0.s, z0.d, #1 +sqrshrnt z31.b, z31.h, #8 +sqrshrnt z31.h, z31.s, #16 +sqrshrnt z31.s, z31.d, #32 +sqrshrunb z0.b, z0.h, #1 +sqrshrunb z0.h, z0.s, #1 +sqrshrunb z0.s, z0.d, #1 +sqrshrunb z31.b, z31.h, #8 +sqrshrunb z31.h, z31.s, #16 +sqrshrunb z31.s, z31.d, #32 +sqrshrunt z0.b, z0.h, #1 +sqrshrunt z0.h, z0.s, #1 +sqrshrunt z0.s, z0.d, #1 +sqrshrunt z31.b, z31.h, #8 +sqrshrunt z31.h, z31.s, #16 +sqrshrunt z31.s, z31.d, #32 +sqshl z0.b, p0/m, z0.b, #0 +sqshl z0.b, p0/m, z0.b, z1.b +sqshl z0.d, p0/m, z0.d, #0 +sqshl z0.h, p0/m, z0.h, #0 +sqshl z0.h, p0/m, z0.h, z1.h +sqshl z0.s, p0/m, z0.s, #0 +sqshl z29.s, p7/m, z29.s, z30.s +sqshl z31.b, p0/m, z31.b, #7 +sqshl z31.d, p0/m, z31.d, #63 +sqshl z31.d, p7/m, z31.d, z30.d +sqshl z31.h, p0/m, z31.h, #15 +sqshl z31.s, p0/m, z31.s, #31 +sqshlr z0.b, p0/m, z0.b, z1.b +sqshlr z0.h, p0/m, z0.h, z1.h +sqshlr z29.s, p7/m, z29.s, z30.s +sqshlr z31.d, p7/m, z31.d, z30.d +sqshlu z0.b, p0/m, z0.b, #0 +sqshlu z0.d, p0/m, z0.d, #0 +sqshlu z0.h, p0/m, z0.h, #0 +sqshlu z0.s, p0/m, z0.s, #0 +sqshlu z31.b, p0/m, z31.b, #7 +sqshlu z31.d, p0/m, z31.d, #63 +sqshlu z31.h, p0/m, z31.h, #15 +sqshlu z31.s, p0/m, z31.s, #31 +sqshrnb z0.b, z0.h, #1 +sqshrnb z0.h, z0.s, #1 +sqshrnb z0.s, z0.d, #1 +sqshrnb z31.b, z31.h, #8 +sqshrnb z31.h, z31.s, #16 +sqshrnb z31.s, z31.d, #32 +sqshrnt z0.b, z0.h, #1 +sqshrnt z0.h, z0.s, #1 +sqshrnt z0.s, z0.d, #1 +sqshrnt z31.b, z31.h, #8 +sqshrnt z31.h, z31.s, #16 +sqshrnt z31.s, z31.d, #32 +sqshrunb z0.b, z0.h, #1 +sqshrunb z0.h, z0.s, #1 +sqshrunb z0.s, z0.d, #1 +sqshrunb z31.b, z31.h, #8 +sqshrunb z31.h, z31.s, #16 +sqshrunb z31.s, z31.d, #32 +sqshrunt z0.b, z0.h, #1 +sqshrunt z0.h, z0.s, #1 +sqshrunt z0.s, z0.d, #1 +sqshrunt z31.b, z31.h, #8 +sqshrunt z31.h, z31.s, #16 +sqshrunt z31.s, z31.d, #32 +sqsub z0.b, p0/m, z0.b, z1.b +sqsub z0.b, z0.b, #0 +sqsub z0.b, z0.b, z0.b +sqsub z0.d, z0.d, #0 +sqsub z0.d, z0.d, #0, lsl #8 +sqsub z0.d, z0.d, z0.d +sqsub z0.h, p0/m, z0.h, z1.h +sqsub z0.h, z0.h, #0 +sqsub z0.h, z0.h, #0, lsl #8 +sqsub z0.h, z0.h, z0.h +sqsub z0.s, z0.s, #0 +sqsub z0.s, z0.s, #0, lsl #8 +sqsub z0.s, z0.s, z0.s +sqsub z29.s, p7/m, z29.s, z30.s +sqsub z31.b, z31.b, #255 +sqsub z31.d, p7/m, z31.d, z30.d +sqsub z31.d, z31.d, #65280 +sqsub z31.h, z31.h, #65280 +sqsub z31.s, z31.s, #65280 +sqsubr z0.b, p0/m, z0.b, z1.b +sqsubr z0.h, p0/m, z0.h, z1.h +sqsubr z29.s, p7/m, z29.s, z30.s +sqsubr z31.d, p7/m, z31.d, z30.d +sqxtnb z0.b, z31.h +sqxtnb z0.h, z31.s +sqxtnb z0.s, z31.d +sqxtnt z0.b, z31.h +sqxtnt z0.h, z31.s +sqxtnt z0.s, z31.d +sqxtunb z0.b, z31.h +sqxtunb z0.h, z31.s +sqxtunb z0.s, z31.d +sqxtunt z0.b, z31.h +sqxtunt z0.h, z31.s +sqxtunt z0.s, z31.d +srhadd z0.b, p0/m, z0.b, z1.b +srhadd z0.h, p0/m, z0.h, z1.h +srhadd z29.s, p7/m, z29.s, z30.s +srhadd z31.d, p7/m, z31.d, z30.d +sri z0.b, z0.b, #1 +sri z0.d, z0.d, #1 +sri z0.h, z0.h, #1 +sri z0.s, z0.s, #1 +sri z31.b, z31.b, #8 +sri z31.d, z31.d, #64 +sri z31.h, z31.h, #16 +sri z31.s, z31.s, #32 +srshl z0.b, p0/m, z0.b, z1.b +srshl z0.h, p0/m, z0.h, z1.h +srshl z29.s, p7/m, z29.s, z30.s +srshl z31.d, p7/m, z31.d, z30.d +srshlr z0.b, p0/m, z0.b, z1.b +srshlr z0.h, p0/m, z0.h, z1.h +srshlr z29.s, p7/m, z29.s, z30.s +srshlr z31.d, p7/m, z31.d, z30.d +srshr z0.b, p0/m, z0.b, #1 +srshr z0.d, p0/m, z0.d, #1 +srshr z0.h, p0/m, z0.h, #1 +srshr z0.s, p0/m, z0.s, #1 +srshr z31.b, p0/m, z31.b, #8 +srshr z31.d, p0/m, z31.d, #64 +srshr z31.h, p0/m, z31.h, #16 +srshr z31.s, p0/m, z31.s, #32 +srsra z0.b, z0.b, #1 +srsra z0.d, z0.d, #1 +srsra z0.h, z0.h, #1 +srsra z0.s, z0.s, #1 +srsra z31.b, z31.b, #8 +srsra z31.d, z31.d, #64 +srsra z31.h, z31.h, #16 +srsra z31.s, z31.s, #32 +sshllb z0.d, z0.s, #0 +sshllb z0.h, z0.b, #0 +sshllb z0.s, z0.h, #0 +sshllb z31.d, z31.s, #31 +sshllb z31.h, z31.b, #7 +sshllb z31.s, z31.h, #15 +sshllt z0.d, z0.s, #0 +sshllt z0.h, z0.b, #0 +sshllt z0.s, z0.h, #0 +sshllt z31.d, z31.s, #31 +sshllt z31.h, z31.b, #7 +sshllt z31.s, z31.h, #15 +ssra z0.b, z0.b, #1 +ssra z0.d, z0.d, #1 +ssra z0.h, z0.h, #1 +ssra z0.s, z0.s, #1 +ssra z31.b, z31.b, #8 +ssra z31.d, z31.d, #64 +ssra z31.h, z31.h, #16 +ssra z31.s, z31.s, #32 +ssublb z0.h, z1.b, z2.b +ssublb z29.s, z30.h, z31.h +ssublb z31.d, z31.s, z31.s +ssublbt z0.d, z1.s, z31.s +ssublbt z0.h, z1.b, z31.b +ssublbt z0.s, z1.h, z31.h +ssublt z0.h, z1.b, z2.b +ssublt z29.s, z30.h, z31.h +ssublt z31.d, z31.s, z31.s +ssubltb z0.d, z1.s, z31.s +ssubltb z0.h, z1.b, z31.b +ssubltb z0.s, z1.h, z31.h +ssubwb z0.h, z1.h, z2.b +ssubwb z29.s, z30.s, z31.h +ssubwb z31.d, z31.d, z31.s +ssubwt z0.h, z1.h, z2.b +ssubwt z29.s, z30.s, z31.h +ssubwt z31.d, z31.d, z31.s +st1b { z0.b }, p0, [x0, x0] +st1b { z0.b }, p0, [x0] +st1b { z0.d }, p0, [x0, x0] +st1b { z0.d }, p0, [x0, z0.d, sxtw] +st1b { z0.d }, p0, [x0, z0.d, uxtw] +st1b { z0.d }, p0, [x0, z0.d] +st1b { z0.d }, p0, [x0] +st1b { z0.d }, p7, [z0.d] +st1b { z0.h }, p0, [x0, x0] +st1b { z0.h }, p0, [x0] +st1b { z0.s }, p0, [x0, x0] +st1b { z0.s }, p0, [x0, z0.s, sxtw] +st1b { z0.s }, p0, [x0, z0.s, uxtw] +st1b { z0.s }, p0, [x0] +st1b { z0.s }, p7, [z0.s] +st1b { z21.b }, p5, [x10, #5, mul vl] +st1b { z21.d }, p5, [x10, #5, mul vl] +st1b { z21.h }, p5, [x10, #5, mul vl] +st1b { z21.s }, p5, [x10, #5, mul vl] +st1b { z31.b }, p7, [sp, #-1, mul vl] +st1b { z31.d }, p7, [sp, #-1, mul vl] +st1b { z31.d }, p7, [z31.d, #31] +st1b { z31.h }, p7, [sp, #-1, mul vl] +st1b { z31.s }, p7, [sp, #-1, mul vl] +st1b { z31.s }, p7, [z31.s, #31] +st1d { z0.d }, p0, [x0, x0, lsl #3] +st1d { z0.d }, p0, [x0, z0.d, lsl #3] +st1d { z0.d }, p0, [x0, z0.d, sxtw #3] +st1d { z0.d }, p0, [x0, z0.d, sxtw] +st1d { z0.d }, p0, [x0, z0.d, uxtw #3] +st1d { z0.d }, p0, [x0, z0.d, uxtw] +st1d { z0.d }, p0, [x0, z0.d] +st1d { z0.d }, p0, [x0] +st1d { z0.d }, p7, [z0.d] +st1d { z21.d }, p5, [x10, #5, mul vl] +st1d { z31.d }, p7, [sp, #-1, mul vl] +st1d { z31.d }, p7, [z31.d, #248] +st1h { z0.d }, p0, [x0, x0, lsl #1] +st1h { z0.d }, p0, [x0, z0.d, lsl #1] +st1h { z0.d }, p0, [x0, z0.d, sxtw #1] +st1h { z0.d }, p0, [x0, z0.d, sxtw] +st1h { z0.d }, p0, [x0, z0.d, uxtw #1] +st1h { z0.d }, p0, [x0, z0.d, uxtw] +st1h { z0.d }, p0, [x0, z0.d] +st1h { z0.d }, p0, [x0] +st1h { z0.d }, p7, [z0.d] +st1h { z0.h }, p0, [x0, x0, lsl #1] +st1h { z0.h }, p0, [x0] +st1h { z0.s }, p0, [x0, x0, lsl #1] +st1h { z0.s }, p0, [x0, z0.s, sxtw #1] +st1h { z0.s }, p0, [x0, z0.s, sxtw] +st1h { z0.s }, p0, [x0, z0.s, uxtw #1] +st1h { z0.s }, p0, [x0, z0.s, uxtw] +st1h { z0.s }, p0, [x0] +st1h { z0.s }, p7, [z0.s] +st1h { z21.d }, p5, [x10, #5, mul vl] +st1h { z21.h }, p5, [x10, #5, mul vl] +st1h { z21.s }, p5, [x10, #5, mul vl] +st1h { z31.d }, p7, [sp, #-1, mul vl] +st1h { z31.d }, p7, [z31.d, #62] +st1h { z31.h }, p7, [sp, #-1, mul vl] +st1h { z31.s }, p7, [sp, #-1, mul vl] +st1h { z31.s }, p7, [z31.s, #62] +st1w { z0.d }, p0, [x0, x0, lsl #2] +st1w { z0.d }, p0, [x0, z0.d, lsl #2] +st1w { z0.d }, p0, [x0, z0.d, sxtw #2] +st1w { z0.d }, p0, [x0, z0.d, sxtw] +st1w { z0.d }, p0, [x0, z0.d, uxtw #2] +st1w { z0.d }, p0, [x0, z0.d, uxtw] +st1w { z0.d }, p0, [x0, z0.d] +st1w { z0.d }, p0, [x0] +st1w { z0.d }, p7, [z0.d] +st1w { z0.s }, p0, [x0, x0, lsl #2] +st1w { z0.s }, p0, [x0, z0.s, sxtw #2] +st1w { z0.s }, p0, [x0, z0.s, sxtw] +st1w { z0.s }, p0, [x0, z0.s, uxtw #2] +st1w { z0.s }, p0, [x0, z0.s, uxtw] +st1w { z0.s }, p0, [x0] +st1w { z0.s }, p7, [z0.s] +st1w { z21.d }, p5, [x10, #5, mul vl] +st1w { z21.s }, p5, [x10, #5, mul vl] +st1w { z31.d }, p7, [sp, #-1, mul vl] +st1w { z31.d }, p7, [z31.d, #124] +st1w { z31.s }, p7, [sp, #-1, mul vl] +st1w { z31.s }, p7, [z31.s, #124] +st2b { z0.b, z1.b }, p0, [x0, x0] +st2b { z0.b, z1.b }, p0, [x0] +st2b { z21.b, z22.b }, p5, [x10, #10, mul vl] +st2b { z23.b, z24.b }, p3, [x13, #-16, mul vl] +st2b { z5.b, z6.b }, p3, [x17, x16] +st2d { z0.d, z1.d }, p0, [x0, x0, lsl #3] +st2d { z0.d, z1.d }, p0, [x0] +st2d { z21.d, z22.d }, p5, [x10, #10, mul vl] +st2d { z23.d, z24.d }, p3, [x13, #-16, mul vl] +st2d { z5.d, z6.d }, p3, [x17, x16, lsl #3] +st2h { z0.h, z1.h }, p0, [x0, x0, lsl #1] +st2h { z0.h, z1.h }, p0, [x0] +st2h { z21.h, z22.h }, p5, [x10, #10, mul vl] +st2h { z23.h, z24.h }, p3, [x13, #-16, mul vl] +st2h { z5.h, z6.h }, p3, [x17, x16, lsl #1] +st2w { z0.s, z1.s }, p0, [x0, x0, lsl #2] +st2w { z0.s, z1.s }, p0, [x0] +st2w { z21.s, z22.s }, p5, [x10, #10, mul vl] +st2w { z23.s, z24.s }, p3, [x13, #-16, mul vl] +st2w { z5.s, z6.s }, p3, [x17, x16, lsl #2] +st3b { z0.b, z1.b, z2.b }, p0, [x0, x0] +st3b { z0.b, z1.b, z2.b }, p0, [x0] +st3b { z21.b, z22.b, z23.b }, p5, [x10, #15, mul vl] +st3b { z23.b, z24.b, z25.b }, p3, [x13, #-24, mul vl] +st3b { z5.b, z6.b, z7.b }, p3, [x17, x16] +st3d { z0.d, z1.d, z2.d }, p0, [x0, x0, lsl #3] +st3d { z0.d, z1.d, z2.d }, p0, [x0] +st3d { z21.d, z22.d, z23.d }, p5, [x10, #15, mul vl] +st3d { z23.d, z24.d, z25.d }, p3, [x13, #-24, mul vl] +st3d { z5.d, z6.d, z7.d }, p3, [x17, x16, lsl #3] +st3h { z0.h, z1.h, z2.h }, p0, [x0, x0, lsl #1] +st3h { z0.h, z1.h, z2.h }, p0, [x0] +st3h { z21.h, z22.h, z23.h }, p5, [x10, #15, mul vl] +st3h { z23.h, z24.h, z25.h }, p3, [x13, #-24, mul vl] +st3h { z5.h, z6.h, z7.h }, p3, [x17, x16, lsl #1] +st3w { z0.s, z1.s, z2.s }, p0, [x0, x0, lsl #2] +st3w { z0.s, z1.s, z2.s }, p0, [x0] +st3w { z21.s, z22.s, z23.s }, p5, [x10, #15, mul vl] +st3w { z23.s, z24.s, z25.s }, p3, [x13, #-24, mul vl] +st3w { z5.s, z6.s, z7.s }, p3, [x17, x16, lsl #2] +st4b { z0.b, z1.b, z2.b, z3.b }, p0, [x0, x0] +st4b { z0.b, z1.b, z2.b, z3.b }, p0, [x0] +st4b { z21.b, z22.b, z23.b, z24.b }, p5, [x10, #20, mul vl] +st4b { z23.b, z24.b, z25.b, z26.b }, p3, [x13, #-32, mul vl] +st4b { z5.b, z6.b, z7.b, z8.b }, p3, [x17, x16] +st4d { z0.d, z1.d, z2.d, z3.d }, p0, [x0, x0, lsl #3] +st4d { z0.d, z1.d, z2.d, z3.d }, p0, [x0] +st4d { z21.d, z22.d, z23.d, z24.d }, p5, [x10, #20, mul vl] +st4d { z23.d, z24.d, z25.d, z26.d }, p3, [x13, #-32, mul vl] +st4d { z5.d, z6.d, z7.d, z8.d }, p3, [x17, x16, lsl #3] +st4h { z0.h, z1.h, z2.h, z3.h }, p0, [x0, x0, lsl #1] +st4h { z0.h, z1.h, z2.h, z3.h }, p0, [x0] +st4h { z21.h, z22.h, z23.h, z24.h }, p5, [x10, #20, mul vl] +st4h { z23.h, z24.h, z25.h, z26.h }, p3, [x13, #-32, mul vl] +st4h { z5.h, z6.h, z7.h, z8.h }, p3, [x17, x16, lsl #1] +st4w { z0.s, z1.s, z2.s, z3.s }, p0, [x0, x0, lsl #2] +st4w { z0.s, z1.s, z2.s, z3.s }, p0, [x0] +st4w { z21.s, z22.s, z23.s, z24.s }, p5, [x10, #20, mul vl] +st4w { z23.s, z24.s, z25.s, z26.s }, p3, [x13, #-32, mul vl] +st4w { z5.s, z6.s, z7.s, z8.s }, p3, [x17, x16, lsl #2] +stnt1b { z0.b }, p0, [x0, x0] +stnt1b { z0.b }, p0, [x0] +stnt1b { z0.d }, p0, [z1.d] +stnt1b { z0.s }, p0, [z1.s] +stnt1b { z21.b }, p5, [x10, #7, mul vl] +stnt1b { z23.b }, p3, [x13, #-8, mul vl] +stnt1b { z31.d }, p7, [z31.d, x0] +stnt1b { z31.d }, p7, [z31.d] +stnt1b { z31.s }, p7, [z31.s, x0] +stnt1b { z31.s }, p7, [z31.s] +stnt1d { z0.d }, p0, [x0, x0, lsl #3] +stnt1d { z0.d }, p0, [x0] +stnt1d { z0.d }, p0, [z1.d] +stnt1d { z21.d }, p5, [x10, #7, mul vl] +stnt1d { z23.d }, p3, [x13, #-8, mul vl] +stnt1d { z31.d }, p7, [z31.d, x0] +stnt1d { z31.d }, p7, [z31.d] +stnt1h { z0.d }, p0, [z1.d] +stnt1h { z0.h }, p0, [x0, x0, lsl #1] +stnt1h { z0.h }, p0, [x0] +stnt1h { z0.s }, p0, [z1.s] +stnt1h { z21.h }, p5, [x10, #7, mul vl] +stnt1h { z23.h }, p3, [x13, #-8, mul vl] +stnt1h { z31.d }, p7, [z31.d, x0] +stnt1h { z31.d }, p7, [z31.d] +stnt1h { z31.s }, p7, [z31.s, x0] +stnt1h { z31.s }, p7, [z31.s] +stnt1w { z0.d }, p0, [z1.d] +stnt1w { z0.s }, p0, [x0, x0, lsl #2] +stnt1w { z0.s }, p0, [x0] +stnt1w { z0.s }, p0, [z1.s] +stnt1w { z21.s }, p5, [x10, #7, mul vl] +stnt1w { z23.s }, p3, [x13, #-8, mul vl] +stnt1w { z31.d }, p7, [z31.d, x0] +stnt1w { z31.d }, p7, [z31.d] +stnt1w { z31.s }, p7, [z31.s, x0] +stnt1w { z31.s }, p7, [z31.s] +str p0, [x0] +str p15, [sp, #-256, mul vl] +str p5, [x10, #255, mul vl] +str z0, [x0] +str z21, [x10, #-256, mul vl] +str z31, [sp, #255, mul vl] +sub z0.b, p0/m, z0.b, z0.b +sub z0.b, z0.b, #0 +sub z0.b, z0.b, z0.b +sub z0.d, p0/m, z0.d, z0.d +sub z0.d, z0.d, #0 +sub z0.d, z0.d, #0, lsl #8 +sub z0.d, z0.d, z0.d +sub z0.h, p0/m, z0.h, z0.h +sub z0.h, z0.h, #0 +sub z0.h, z0.h, #0, lsl #8 +sub z0.h, z0.h, z0.h +sub z0.s, p0/m, z0.s, z0.s +sub z0.s, z0.s, #0 +sub z0.s, z0.s, #0, lsl #8 +sub z0.s, z0.s, z0.s +sub z21.b, p5/m, z21.b, z10.b +sub z21.b, z10.b, z21.b +sub z21.d, p5/m, z21.d, z10.d +sub z21.d, z10.d, z21.d +sub z21.h, p5/m, z21.h, z10.h +sub z21.h, z10.h, z21.h +sub z21.s, p5/m, z21.s, z10.s +sub z21.s, z10.s, z21.s +sub z23.b, p3/m, z23.b, z13.b +sub z23.b, z13.b, z8.b +sub z23.d, p3/m, z23.d, z13.d +sub z23.d, z13.d, z8.d +sub z23.h, p3/m, z23.h, z13.h +sub z23.h, z13.h, z8.h +sub z23.s, p3/m, z23.s, z13.s +sub z23.s, z13.s, z8.s +sub z31.b, p7/m, z31.b, z31.b +sub z31.b, z31.b, #255 +sub z31.b, z31.b, z31.b +sub z31.d, p7/m, z31.d, z31.d +sub z31.d, z31.d, #65280 +sub z31.d, z31.d, z31.d +sub z31.h, p7/m, z31.h, z31.h +sub z31.h, z31.h, #65280 +sub z31.h, z31.h, z31.h +sub z31.s, p7/m, z31.s, z31.s +sub z31.s, z31.s, #65280 +sub z31.s, z31.s, z31.s +subhnb z0.b, z1.h, z31.h +subhnb z0.h, z1.s, z31.s +subhnb z0.s, z1.d, z31.d +subhnt z0.b, z1.h, z31.h +subhnt z0.h, z1.s, z31.s +subhnt z0.s, z1.d, z31.d +subr z0.b, p0/m, z0.b, z0.b +subr z0.b, z0.b, #0 +subr z0.d, p0/m, z0.d, z0.d +subr z0.d, z0.d, #0 +subr z0.d, z0.d, #0, lsl #8 +subr z0.h, p0/m, z0.h, z0.h +subr z0.h, z0.h, #0 +subr z0.h, z0.h, #0, lsl #8 +subr z0.s, p0/m, z0.s, z0.s +subr z0.s, z0.s, #0 +subr z0.s, z0.s, #0, lsl #8 +subr z31.b, z31.b, #255 +subr z31.d, z31.d, #65280 +subr z31.h, z31.h, #65280 +subr z31.s, z31.s, #65280 +sunpkhi z31.d, z31.s +sunpkhi z31.h, z31.b +sunpkhi z31.s, z31.h +sunpklo z31.d, z31.s +sunpklo z31.h, z31.b +sunpklo z31.s, z31.h +suqadd z0.b, p0/m, z0.b, z1.b +suqadd z0.h, p0/m, z0.h, z1.h +suqadd z29.s, p7/m, z29.s, z30.s +suqadd z31.d, p7/m, z31.d, z30.d +sxtb z0.d, p0/m, z0.d +sxtb z0.h, p0/m, z0.h +sxtb z0.s, p0/m, z0.s +sxtb z31.d, p7/m, z31.d +sxtb z31.h, p7/m, z31.h +sxtb z31.s, p7/m, z31.s +sxth z0.d, p0/m, z0.d +sxth z0.s, p0/m, z0.s +sxth z31.d, p7/m, z31.d +sxth z31.s, p7/m, z31.s +sxtw z0.d, p0/m, z0.d +sxtw z31.d, p7/m, z31.d +tbl z28.b, { z29.b, z30.b }, z31.b +tbl z28.d, { z29.d, z30.d }, z31.d +tbl z28.h, { z29.h, z30.h }, z31.h +tbl z28.s, { z29.s, z30.s }, z31.s +tbl z31.b, { z31.b }, z31.b +tbl z31.d, { z31.d }, z31.d +tbl z31.h, { z31.h }, z31.h +tbl z31.s, { z31.s }, z31.s +tbx z31.b, z31.b, z31.b +tbx z31.d, z31.d, z31.d +tbx z31.h, z31.h, z31.h +tbx z31.s, z31.s, z31.s +trn1 p15.b, p15.b, p15.b +trn1 p15.d, p15.d, p15.d +trn1 p15.h, p15.h, p15.h +trn1 p15.s, p15.s, p15.s +trn1 z31.b, z31.b, z31.b +trn1 z31.d, z31.d, z31.d +trn1 z31.h, z31.h, z31.h +trn1 z31.s, z31.s, z31.s +trn2 p15.b, p15.b, p15.b +trn2 p15.d, p15.d, p15.d +trn2 p15.h, p15.h, p15.h +trn2 p15.s, p15.s, p15.s +trn2 z31.b, z31.b, z31.b +trn2 z31.d, z31.d, z31.d +trn2 z31.h, z31.h, z31.h +trn2 z31.s, z31.s, z31.s +uaba z0.b, z1.b, z31.b +uaba z0.d, z1.d, z31.d +uaba z0.h, z1.h, z31.h +uaba z0.s, z1.s, z31.s +uabalb z0.d, z1.s, z31.s +uabalb z0.h, z1.b, z31.b +uabalb z0.s, z1.h, z31.h +uabalt z0.d, z1.s, z31.s +uabalt z0.h, z1.b, z31.b +uabalt z0.s, z1.h, z31.h +uabd z31.b, p7/m, z31.b, z31.b +uabd z31.d, p7/m, z31.d, z31.d +uabd z31.h, p7/m, z31.h, z31.h +uabd z31.s, p7/m, z31.s, z31.s +uabdlb z0.h, z1.b, z2.b +uabdlb z29.s, z30.h, z31.h +uabdlb z31.d, z31.s, z31.s +uabdlt z0.h, z1.b, z2.b +uabdlt z29.s, z30.h, z31.h +uabdlt z31.d, z31.s, z31.s +uadalp z0.h, p0/m, z1.b +uadalp z29.s, p0/m, z30.h +uadalp z30.d, p7/m, z31.s +uaddlb z0.h, z1.b, z2.b +uaddlb z29.s, z30.h, z31.h +uaddlb z31.d, z31.s, z31.s +uaddlt z0.h, z1.b, z2.b +uaddlt z29.s, z30.h, z31.h +uaddlt z31.d, z31.s, z31.s +uaddv d0, p7, z31.b +uaddv d0, p7, z31.d +uaddv d0, p7, z31.h +uaddv d0, p7, z31.s +uaddwb z0.h, z1.h, z2.b +uaddwb z29.s, z30.s, z31.h +uaddwb z31.d, z31.d, z31.s +uaddwt z0.h, z1.h, z2.b +uaddwt z29.s, z30.s, z31.h +uaddwt z31.d, z31.d, z31.s +ucvtf z0.d, p0/m, z0.d +ucvtf z0.d, p0/m, z0.s +ucvtf z0.h, p0/m, z0.d +ucvtf z0.h, p0/m, z0.h +ucvtf z0.h, p0/m, z0.s +ucvtf z0.s, p0/m, z0.d +ucvtf z0.s, p0/m, z0.s +udiv z0.d, p7/m, z0.d, z31.d +udiv z0.s, p7/m, z0.s, z31.s +udivr z0.d, p7/m, z0.d, z31.d +udivr z0.s, p7/m, z0.s, z31.s +udot z0.d, z1.h, z15.h[1] +udot z0.d, z1.h, z31.h +udot z0.s, z1.b, z31.b +udot z0.s, z1.b, z7.b[3] +uhadd z0.b, p0/m, z0.b, z1.b +uhadd z0.h, p0/m, z0.h, z1.h +uhadd z29.s, p7/m, z29.s, z30.s +uhadd z31.d, p7/m, z31.d, z30.d +uhsub z0.b, p0/m, z0.b, z1.b +uhsub z0.h, p0/m, z0.h, z1.h +uhsub z29.s, p7/m, z29.s, z30.s +uhsub z31.d, p7/m, z31.d, z30.d +uhsubr z0.b, p0/m, z0.b, z1.b +uhsubr z0.h, p0/m, z0.h, z1.h +uhsubr z29.s, p7/m, z29.s, z30.s +uhsubr z31.d, p7/m, z31.d, z30.d +umax z0.b, z0.b, #0 +umax z31.b, p7/m, z31.b, z31.b +umax z31.b, z31.b, #255 +umax z31.d, p7/m, z31.d, z31.d +umax z31.h, p7/m, z31.h, z31.h +umax z31.s, p7/m, z31.s, z31.s +umaxp z0.b, p0/m, z0.b, z1.b +umaxp z0.h, p0/m, z0.h, z1.h +umaxp z29.s, p7/m, z29.s, z30.s +umaxp z31.d, p7/m, z31.d, z30.d +umaxv b0, p7, z31.b +umaxv d0, p7, z31.d +umaxv h0, p7, z31.h +umaxv s0, p7, z31.s +umin z0.b, z0.b, #0 +umin z31.b, p7/m, z31.b, z31.b +umin z31.b, z31.b, #255 +umin z31.d, p7/m, z31.d, z31.d +umin z31.h, p7/m, z31.h, z31.h +umin z31.s, p7/m, z31.s, z31.s +uminp z0.b, p0/m, z0.b, z1.b +uminp z0.h, p0/m, z0.h, z1.h +uminp z29.s, p7/m, z29.s, z30.s +uminp z31.d, p7/m, z31.d, z30.d +uminv b0, p7, z31.b +uminv d0, p7, z31.d +uminv h0, p7, z31.h +uminv s0, p7, z31.s +umlalb z0.d, z1.s, z15.s[1] +umlalb z0.d, z1.s, z31.s +umlalb z0.h, z1.b, z31.b +umlalb z0.s, z1.h, z31.h +umlalb z0.s, z1.h, z7.h[7] +umlalt z0.d, z1.s, z15.s[1] +umlalt z0.d, z1.s, z31.s +umlalt z0.h, z1.b, z31.b +umlalt z0.s, z1.h, z31.h +umlalt z0.s, z1.h, z7.h[7] +umlslb z0.d, z1.s, z15.s[1] +umlslb z0.d, z1.s, z31.s +umlslb z0.h, z1.b, z31.b +umlslb z0.s, z1.h, z31.h +umlslb z0.s, z1.h, z7.h[7] +umlslt z0.d, z1.s, z15.s[1] +umlslt z0.d, z1.s, z31.s +umlslt z0.h, z1.b, z31.b +umlslt z0.s, z1.h, z31.h +umlslt z0.s, z1.h, z7.h[7] +ummla z0.s, z1.b, z2.b +umulh z0.b, p7/m, z0.b, z31.b +umulh z0.b, z1.b, z2.b +umulh z0.d, p7/m, z0.d, z31.d +umulh z0.h, p7/m, z0.h, z31.h +umulh z0.h, z1.h, z2.h +umulh z0.s, p7/m, z0.s, z31.s +umulh z29.s, z30.s, z31.s +umulh z31.d, z31.d, z31.d +umullb z0.d, z1.s, z15.s[1] +umullb z0.h, z1.b, z2.b +umullb z0.s, z1.h, z7.h[7] +umullb z29.s, z30.h, z31.h +umullb z31.d, z31.s, z31.s +umullt z0.d, z1.s, z15.s[1] +umullt z0.h, z1.b, z2.b +umullt z0.s, z1.h, z7.h[7] +umullt z29.s, z30.h, z31.h +umullt z31.d, z31.s, z31.s +uqadd z0.b, p0/m, z0.b, z1.b +uqadd z0.b, z0.b, #0 +uqadd z0.b, z0.b, z0.b +uqadd z0.d, z0.d, #0 +uqadd z0.d, z0.d, #0, lsl #8 +uqadd z0.d, z0.d, z0.d +uqadd z0.h, p0/m, z0.h, z1.h +uqadd z0.h, z0.h, #0 +uqadd z0.h, z0.h, #0, lsl #8 +uqadd z0.h, z0.h, z0.h +uqadd z0.s, z0.s, #0 +uqadd z0.s, z0.s, #0, lsl #8 +uqadd z0.s, z0.s, z0.s +uqadd z29.s, p7/m, z29.s, z30.s +uqadd z31.b, z31.b, #255 +uqadd z31.d, p7/m, z31.d, z30.d +uqadd z31.d, z31.d, #65280 +uqadd z31.h, z31.h, #65280 +uqadd z31.s, z31.s, #65280 +uqdecb w0 +uqdecb w0, all, mul #16 +uqdecb w0, pow2 +uqdecb w0, pow2, mul #16 +uqdecb x0 +uqdecb x0, #14 +uqdecb x0, all, mul #16 +uqdecb x0, pow2 +uqdecb x0, vl1 +uqdecd w0 +uqdecd w0, all, mul #16 +uqdecd w0, pow2 +uqdecd w0, pow2, mul #16 +uqdecd x0 +uqdecd x0, #14 +uqdecd x0, all, mul #16 +uqdecd x0, pow2 +uqdecd x0, vl1 +uqdecd z0.d +uqdecd z0.d, all, mul #16 +uqdecd z0.d, pow2 +uqdecd z0.d, pow2, mul #16 +uqdech w0 +uqdech w0, all, mul #16 +uqdech w0, pow2 +uqdech w0, pow2, mul #16 +uqdech x0 +uqdech x0, #14 +uqdech x0, all, mul #16 +uqdech x0, pow2 +uqdech x0, vl1 +uqdech z0.h +uqdech z0.h, all, mul #16 +uqdech z0.h, pow2 +uqdech z0.h, pow2, mul #16 +uqdecp wzr, p15.b +uqdecp wzr, p15.d +uqdecp wzr, p15.h +uqdecp wzr, p15.s +uqdecp x0, p0.b +uqdecp x0, p0.d +uqdecp x0, p0.h +uqdecp x0, p0.s +uqdecp z0.d, p0.d +uqdecp z0.h, p0.h +uqdecp z0.s, p0.s +uqdecw w0 +uqdecw w0, all, mul #16 +uqdecw w0, pow2 +uqdecw w0, pow2, mul #16 +uqdecw x0 +uqdecw x0, #14 +uqdecw x0, all, mul #16 +uqdecw x0, pow2 +uqdecw x0, vl1 +uqdecw z0.s +uqdecw z0.s, all, mul #16 +uqdecw z0.s, pow2 +uqdecw z0.s, pow2, mul #16 +uqincb w0 +uqincb w0, all, mul #16 +uqincb w0, pow2 +uqincb w0, pow2, mul #16 +uqincb x0 +uqincb x0, #14 +uqincb x0, all, mul #16 +uqincb x0, pow2 +uqincb x0, vl1 +uqincd w0 +uqincd w0, all, mul #16 +uqincd w0, pow2 +uqincd w0, pow2, mul #16 +uqincd x0 +uqincd x0, #14 +uqincd x0, all, mul #16 +uqincd x0, pow2 +uqincd x0, vl1 +uqincd z0.d +uqincd z0.d, all, mul #16 +uqincd z0.d, pow2 +uqincd z0.d, pow2, mul #16 +uqinch w0 +uqinch w0, all, mul #16 +uqinch w0, pow2 +uqinch w0, pow2, mul #16 +uqinch x0 +uqinch x0, #14 +uqinch x0, all, mul #16 +uqinch x0, pow2 +uqinch x0, vl1 +uqinch z0.h +uqinch z0.h, all, mul #16 +uqinch z0.h, pow2 +uqinch z0.h, pow2, mul #16 +uqincp wzr, p15.b +uqincp wzr, p15.d +uqincp wzr, p15.h +uqincp wzr, p15.s +uqincp x0, p0.b +uqincp x0, p0.d +uqincp x0, p0.h +uqincp x0, p0.s +uqincp z0.d, p0.d +uqincp z0.h, p0.h +uqincp z0.s, p0.s +uqincw w0 +uqincw w0, all, mul #16 +uqincw w0, pow2 +uqincw w0, pow2, mul #16 +uqincw x0 +uqincw x0, #14 +uqincw x0, all, mul #16 +uqincw x0, pow2 +uqincw x0, vl1 +uqincw z0.s +uqincw z0.s, all, mul #16 +uqincw z0.s, pow2 +uqincw z0.s, pow2, mul #16 +uqrshl z0.b, p0/m, z0.b, z1.b +uqrshl z0.h, p0/m, z0.h, z1.h +uqrshl z29.s, p7/m, z29.s, z30.s +uqrshl z31.d, p7/m, z31.d, z30.d +uqrshlr z0.b, p0/m, z0.b, z1.b +uqrshlr z0.h, p0/m, z0.h, z1.h +uqrshlr z29.s, p7/m, z29.s, z30.s +uqrshlr z31.d, p7/m, z31.d, z30.d +uqrshrnb z0.b, z0.h, #1 +uqrshrnb z0.h, z0.s, #1 +uqrshrnb z0.s, z0.d, #1 +uqrshrnb z31.b, z31.h, #8 +uqrshrnb z31.h, z31.s, #16 +uqrshrnb z31.s, z31.d, #32 +uqrshrnt z0.b, z0.h, #1 +uqrshrnt z0.h, z0.s, #1 +uqrshrnt z0.s, z0.d, #1 +uqrshrnt z31.b, z31.h, #8 +uqrshrnt z31.h, z31.s, #16 +uqrshrnt z31.s, z31.d, #32 +uqshl z0.b, p0/m, z0.b, #0 +uqshl z0.b, p0/m, z0.b, z1.b +uqshl z0.d, p0/m, z0.d, #0 +uqshl z0.h, p0/m, z0.h, #0 +uqshl z0.h, p0/m, z0.h, z1.h +uqshl z0.s, p0/m, z0.s, #0 +uqshl z29.s, p7/m, z29.s, z30.s +uqshl z31.b, p0/m, z31.b, #7 +uqshl z31.d, p0/m, z31.d, #63 +uqshl z31.d, p7/m, z31.d, z30.d +uqshl z31.h, p0/m, z31.h, #15 +uqshl z31.s, p0/m, z31.s, #31 +uqshlr z0.b, p0/m, z0.b, z1.b +uqshlr z0.h, p0/m, z0.h, z1.h +uqshlr z29.s, p7/m, z29.s, z30.s +uqshlr z31.d, p7/m, z31.d, z30.d +uqshrnb z0.b, z0.h, #1 +uqshrnb z0.h, z0.s, #1 +uqshrnb z0.s, z0.d, #1 +uqshrnb z31.b, z31.h, #8 +uqshrnb z31.h, z31.s, #16 +uqshrnb z31.s, z31.d, #32 +uqshrnt z0.b, z0.h, #1 +uqshrnt z0.h, z0.s, #1 +uqshrnt z0.s, z0.d, #1 +uqshrnt z31.b, z31.h, #8 +uqshrnt z31.h, z31.s, #16 +uqshrnt z31.s, z31.d, #32 +uqsub z0.b, p0/m, z0.b, z1.b +uqsub z0.b, z0.b, #0 +uqsub z0.b, z0.b, z0.b +uqsub z0.d, z0.d, #0 +uqsub z0.d, z0.d, #0, lsl #8 +uqsub z0.d, z0.d, z0.d +uqsub z0.h, p0/m, z0.h, z1.h +uqsub z0.h, z0.h, #0 +uqsub z0.h, z0.h, #0, lsl #8 +uqsub z0.h, z0.h, z0.h +uqsub z0.s, z0.s, #0 +uqsub z0.s, z0.s, #0, lsl #8 +uqsub z0.s, z0.s, z0.s +uqsub z29.s, p7/m, z29.s, z30.s +uqsub z31.b, z31.b, #255 +uqsub z31.d, p7/m, z31.d, z30.d +uqsub z31.d, z31.d, #65280 +uqsub z31.h, z31.h, #65280 +uqsub z31.s, z31.s, #65280 +uqsubr z0.b, p0/m, z0.b, z1.b +uqsubr z0.h, p0/m, z0.h, z1.h +uqsubr z29.s, p7/m, z29.s, z30.s +uqsubr z31.d, p7/m, z31.d, z30.d +uqxtnb z0.b, z31.h +uqxtnb z0.h, z31.s +uqxtnb z0.s, z31.d +uqxtnt z0.b, z31.h +uqxtnt z0.h, z31.s +uqxtnt z0.s, z31.d +urecpe z31.s, p7/m, z31.s +urhadd z0.b, p0/m, z0.b, z1.b +urhadd z0.h, p0/m, z0.h, z1.h +urhadd z29.s, p7/m, z29.s, z30.s +urhadd z31.d, p7/m, z31.d, z30.d +urshl z0.b, p0/m, z0.b, z1.b +urshl z0.h, p0/m, z0.h, z1.h +urshl z29.s, p7/m, z29.s, z30.s +urshl z31.d, p7/m, z31.d, z30.d +urshlr z0.b, p0/m, z0.b, z1.b +urshlr z0.h, p0/m, z0.h, z1.h +urshlr z29.s, p7/m, z29.s, z30.s +urshlr z31.d, p7/m, z31.d, z30.d +urshr z0.b, p0/m, z0.b, #1 +urshr z0.d, p0/m, z0.d, #1 +urshr z0.h, p0/m, z0.h, #1 +urshr z0.s, p0/m, z0.s, #1 +urshr z31.b, p0/m, z31.b, #8 +urshr z31.d, p0/m, z31.d, #64 +urshr z31.h, p0/m, z31.h, #16 +urshr z31.s, p0/m, z31.s, #32 +ursqrte z31.s, p7/m, z31.s +ursra z0.b, z0.b, #1 +ursra z0.d, z0.d, #1 +ursra z0.h, z0.h, #1 +ursra z0.s, z0.s, #1 +ursra z31.b, z31.b, #8 +ursra z31.d, z31.d, #64 +ursra z31.h, z31.h, #16 +ursra z31.s, z31.s, #32 +ushllb z0.d, z0.s, #0 +ushllb z0.h, z0.b, #0 +ushllb z0.s, z0.h, #0 +ushllb z31.d, z31.s, #31 +ushllb z31.h, z31.b, #7 +ushllb z31.s, z31.h, #15 +ushllt z0.d, z0.s, #0 +ushllt z0.h, z0.b, #0 +ushllt z0.s, z0.h, #0 +ushllt z31.d, z31.s, #31 +ushllt z31.h, z31.b, #7 +ushllt z31.s, z31.h, #15 +usmmla z0.s, z1.b, z2.b +usqadd z0.b, p0/m, z0.b, z1.b +usqadd z0.h, p0/m, z0.h, z1.h +usqadd z29.s, p7/m, z29.s, z30.s +usqadd z31.d, p7/m, z31.d, z30.d +usra z0.b, z0.b, #1 +usra z0.d, z0.d, #1 +usra z0.h, z0.h, #1 +usra z0.s, z0.s, #1 +usra z31.b, z31.b, #8 +usra z31.d, z31.d, #64 +usra z31.h, z31.h, #16 +usra z31.s, z31.s, #32 +usublb z0.h, z1.b, z2.b +usublb z29.s, z30.h, z31.h +usublb z31.d, z31.s, z31.s +usublt z0.h, z1.b, z2.b +usublt z29.s, z30.h, z31.h +usublt z31.d, z31.s, z31.s +usubwb z0.h, z1.h, z2.b +usubwb z29.s, z30.s, z31.h +usubwb z31.d, z31.d, z31.s +usubwt z0.h, z1.h, z2.b +usubwt z29.s, z30.s, z31.h +usubwt z31.d, z31.d, z31.s +uunpkhi z31.d, z31.s +uunpkhi z31.h, z31.b +uunpkhi z31.s, z31.h +uunpklo z31.d, z31.s +uunpklo z31.h, z31.b +uunpklo z31.s, z31.h +uxtb z0.d, p0/m, z0.d +uxtb z0.h, p0/m, z0.h +uxtb z0.s, p0/m, z0.s +uxtb z31.d, p7/m, z31.d +uxtb z31.h, p7/m, z31.h +uxtb z31.s, p7/m, z31.s +uxth z0.d, p0/m, z0.d +uxth z0.s, p0/m, z0.s +uxth z31.d, p7/m, z31.d +uxth z31.s, p7/m, z31.s +uxtw z0.d, p0/m, z0.d +uxtw z31.d, p7/m, z31.d +uzp1 p15.b, p15.b, p15.b +uzp1 p15.d, p15.d, p15.d +uzp1 p15.h, p15.h, p15.h +uzp1 p15.s, p15.s, p15.s +uzp1 z31.b, z31.b, z31.b +uzp1 z31.d, z31.d, z31.d +uzp1 z31.h, z31.h, z31.h +uzp1 z31.s, z31.s, z31.s +uzp2 p15.b, p15.b, p15.b +uzp2 p15.d, p15.d, p15.d +uzp2 p15.h, p15.h, p15.h +uzp2 p15.s, p15.s, p15.s +uzp2 z31.b, z31.b, z31.b +uzp2 z31.d, z31.d, z31.d +uzp2 z31.h, z31.h, z31.h +uzp2 z31.s, z31.s, z31.s +whilege p15.b, w0, wzr +whilege p15.b, wzr, w0 +whilege p15.b, x0, xzr +whilege p15.b, xzr, x0 +whilege p15.d, w0, wzr +whilege p15.d, x0, xzr +whilege p15.h, w0, wzr +whilege p15.h, x0, xzr +whilege p15.s, w0, wzr +whilege p15.s, x0, xzr +whilerw p15.b, x30, x30 +whilerw p15.d, x30, x30 +whilerw p15.h, x30, x30 +whilerw p15.s, x30, x30 +whilewr p15.b, x30, x30 +whilewr p15.d, x30, x30 +whilewr p15.h, x30, x30 +whilewr p15.s, x30, x30 +wrffr p0.b +wrffr p15.b +xar z0.b, z0.b, z1.b, #1 +xar z0.d, z0.d, z1.d, #1 +xar z0.h, z0.h, z1.h, #1 +xar z0.s, z0.s, z1.s, #1 +xar z31.b, z31.b, z30.b, #8 +xar z31.d, z31.d, z30.d, #64 +xar z31.h, z31.h, z30.h, #16 +xar z31.s, z31.s, z30.s, #32 +zip1 p0.b, p0.b, p0.b +zip1 p0.d, p0.d, p0.d +zip1 p0.h, p0.h, p0.h +zip1 p0.s, p0.s, p0.s +zip1 p15.b, p15.b, p15.b +zip1 p15.d, p15.d, p15.d +zip1 p15.h, p15.h, p15.h +zip1 p15.s, p15.s, p15.s +zip1 z0.b, z0.b, z0.b +zip1 z0.d, z0.d, z0.d +zip1 z0.h, z0.h, z0.h +zip1 z0.s, z0.s, z0.s +zip1 z31.b, z31.b, z31.b +zip1 z31.d, z31.d, z31.d +zip1 z31.h, z31.h, z31.h +zip1 z31.s, z31.s, z31.s +zip2 p0.b, p0.b, p0.b +zip2 p0.d, p0.d, p0.d +zip2 p0.h, p0.h, p0.h +zip2 p0.s, p0.s, p0.s +zip2 p15.b, p15.b, p15.b +zip2 p15.d, p15.d, p15.d +zip2 p15.h, p15.h, p15.h +zip2 p15.s, p15.s, p15.s +zip2 z0.b, z0.b, z0.b +zip2 z0.d, z0.d, z0.d +zip2 z0.h, z0.h, z0.h +zip2 z0.s, z0.s, z0.s +zip2 z31.b, z31.b, z31.b +zip2 z31.d, z31.d, z31.d +zip2 z31.h, z31.h, z31.h +zip2 z31.s, z31.s, z31.s + +# CHECK: Instruction Info: +# CHECK-NEXT: [1]: #uOps +# CHECK-NEXT: [2]: Latency +# CHECK-NEXT: [3]: RThroughput +# CHECK-NEXT: [4]: MayLoad +# CHECK-NEXT: [5]: MayStore +# CHECK-NEXT: [6]: HasSideEffects (U) + +# CHECK: [1] [2] [3] [4] [5] [6] Instructions: +# CHECK-NEXT: 1 3 1.00 abs z0.b, p0/m, z0.b +# CHECK-NEXT: 1 3 1.00 abs z0.d, p0/m, z0.d +# CHECK-NEXT: 1 3 1.00 abs z0.h, p0/m, z0.h +# CHECK-NEXT: 1 3 1.00 abs z0.s, p0/m, z0.s +# CHECK-NEXT: 1 3 1.00 abs z31.b, p7/m, z31.b +# CHECK-NEXT: 1 3 1.00 abs z31.d, p7/m, z31.d +# CHECK-NEXT: 1 3 1.00 abs z31.h, p7/m, z31.h +# CHECK-NEXT: 1 3 1.00 abs z31.s, p7/m, z31.s +# CHECK-NEXT: 1 4 1.00 adclb z0.d, z1.d, z31.d +# CHECK-NEXT: 1 4 1.00 adclb z0.s, z1.s, z31.s +# CHECK-NEXT: 1 4 1.00 adclt z0.d, z1.d, z31.d +# CHECK-NEXT: 1 4 1.00 adclt z0.s, z1.s, z31.s +# CHECK-NEXT: 1 3 1.00 add z0.b, p0/m, z0.b, z0.b +# CHECK-NEXT: 1 3 1.00 add z0.b, z0.b, #0 +# CHECK-NEXT: 1 3 1.00 add z0.b, z0.b, z0.b +# CHECK-NEXT: 1 3 1.00 add z0.d, p0/m, z0.d, z0.d +# CHECK-NEXT: 1 3 1.00 add z0.d, z0.d, #0 +# CHECK-NEXT: 1 3 1.00 add z0.d, z0.d, #0, lsl #8 +# CHECK-NEXT: 1 3 1.00 add z0.d, z0.d, z0.d +# CHECK-NEXT: 1 3 1.00 add z0.h, p0/m, z0.h, z0.h +# CHECK-NEXT: 1 3 1.00 add z0.h, z0.h, #0 +# CHECK-NEXT: 1 3 1.00 add z0.h, z0.h, #0, lsl #8 +# CHECK-NEXT: 1 3 1.00 add z0.h, z0.h, z0.h +# CHECK-NEXT: 1 3 1.00 add z0.s, p0/m, z0.s, z0.s +# CHECK-NEXT: 1 3 1.00 add z0.s, z0.s, #0 +# CHECK-NEXT: 1 3 1.00 add z0.s, z0.s, #0, lsl #8 +# CHECK-NEXT: 1 3 1.00 add z0.s, z0.s, z0.s +# CHECK-NEXT: 1 3 1.00 add z0.s, z1.s, z2.s +# CHECK-NEXT: 1 3 1.00 add z21.b, p5/m, z21.b, z10.b +# CHECK-NEXT: 1 3 1.00 add z21.b, z10.b, z21.b +# CHECK-NEXT: 1 3 1.00 add z21.d, p5/m, z21.d, z10.d +# CHECK-NEXT: 1 3 1.00 add z21.d, z10.d, z21.d +# CHECK-NEXT: 1 3 1.00 add z21.h, p5/m, z21.h, z10.h +# CHECK-NEXT: 1 3 1.00 add z21.h, z10.h, z21.h +# CHECK-NEXT: 1 3 1.00 add z21.s, p5/m, z21.s, z10.s +# CHECK-NEXT: 1 3 1.00 add z21.s, z10.s, z21.s +# CHECK-NEXT: 1 3 1.00 add z23.b, p3/m, z23.b, z13.b +# CHECK-NEXT: 1 3 1.00 add z23.b, z13.b, z8.b +# CHECK-NEXT: 1 3 1.00 add z23.d, p3/m, z23.d, z13.d +# CHECK-NEXT: 1 3 1.00 add z23.d, z13.d, z8.d +# CHECK-NEXT: 1 3 1.00 add z23.h, p3/m, z23.h, z13.h +# CHECK-NEXT: 1 3 1.00 add z23.h, z13.h, z8.h +# CHECK-NEXT: 1 3 1.00 add z23.s, p3/m, z23.s, z13.s +# CHECK-NEXT: 1 3 1.00 add z23.s, z13.s, z8.s +# CHECK-NEXT: 1 3 1.00 add z31.b, p7/m, z31.b, z31.b +# CHECK-NEXT: 1 3 1.00 add z31.b, z31.b, #255 +# CHECK-NEXT: 1 3 1.00 add z31.b, z31.b, z31.b +# CHECK-NEXT: 1 3 1.00 add z31.d, p7/m, z31.d, z31.d +# CHECK-NEXT: 1 3 1.00 add z31.d, z31.d, #65280 +# CHECK-NEXT: 1 3 1.00 add z31.d, z31.d, z31.d +# CHECK-NEXT: 1 3 1.00 add z31.h, p7/m, z31.h, z31.h +# CHECK-NEXT: 1 3 1.00 add z31.h, z31.h, #65280 +# CHECK-NEXT: 1 3 1.00 add z31.h, z31.h, z31.h +# CHECK-NEXT: 1 3 1.00 add z31.s, p7/m, z31.s, z31.s +# CHECK-NEXT: 1 3 1.00 add z31.s, z31.s, #65280 +# CHECK-NEXT: 1 3 1.00 add z31.s, z31.s, z31.s +# CHECK-NEXT: 1 8 1.00 addhnb z0.b, z1.h, z31.h +# CHECK-NEXT: 1 8 1.00 addhnb z0.h, z1.s, z31.s +# CHECK-NEXT: 1 8 1.00 addhnb z0.s, z1.d, z31.d +# CHECK-NEXT: 1 8 1.00 addhnt z0.b, z1.h, z31.h +# CHECK-NEXT: 1 8 1.00 addhnt z0.h, z1.s, z31.s +# CHECK-NEXT: 1 8 1.00 addhnt z0.s, z1.d, z31.d +# CHECK-NEXT: 1 3 1.00 addp z0.b, p0/m, z0.b, z1.b +# CHECK-NEXT: 1 3 1.00 addp z0.h, p0/m, z0.h, z1.h +# CHECK-NEXT: 1 3 1.00 addp z29.s, p7/m, z29.s, z30.s +# CHECK-NEXT: 1 3 1.00 addp z31.d, p7/m, z31.d, z30.d +# CHECK-NEXT: 1 1 1.00 addpl sp, sp, #31 +# CHECK-NEXT: 1 1 1.00 addpl x0, x0, #-32 +# CHECK-NEXT: 1 1 1.00 addpl x21, x21, #0 +# CHECK-NEXT: 1 1 1.00 addpl x23, x8, #-1 +# CHECK-NEXT: 1 1 1.00 addvl sp, sp, #31 +# CHECK-NEXT: 1 1 1.00 addvl x0, x0, #-32 +# CHECK-NEXT: 1 1 1.00 addvl x21, x21, #0 +# CHECK-NEXT: 1 1 1.00 addvl x23, x8, #-1 +# CHECK-NEXT: 1 3 1.00 adr z0.d, [z0.d, z0.d, lsl #1] +# CHECK-NEXT: 1 3 1.00 adr z0.d, [z0.d, z0.d, lsl #2] +# CHECK-NEXT: 1 3 1.00 adr z0.d, [z0.d, z0.d, lsl #3] +# CHECK-NEXT: 1 3 1.00 adr z0.d, [z0.d, z0.d, sxtw #1] +# CHECK-NEXT: 1 3 1.00 adr z0.d, [z0.d, z0.d, sxtw #2] +# CHECK-NEXT: 1 3 1.00 adr z0.d, [z0.d, z0.d, sxtw #3] +# CHECK-NEXT: 1 3 1.00 adr z0.d, [z0.d, z0.d, sxtw] +# CHECK-NEXT: 1 3 1.00 adr z0.d, [z0.d, z0.d, uxtw #1] +# CHECK-NEXT: 1 3 1.00 adr z0.d, [z0.d, z0.d, uxtw #2] +# CHECK-NEXT: 1 3 1.00 adr z0.d, [z0.d, z0.d, uxtw #3] +# CHECK-NEXT: 1 3 1.00 adr z0.d, [z0.d, z0.d, uxtw] +# CHECK-NEXT: 1 3 1.00 adr z0.d, [z0.d, z0.d] +# CHECK-NEXT: 1 3 1.00 adr z0.s, [z0.s, z0.s, lsl #1] +# CHECK-NEXT: 1 3 1.00 adr z0.s, [z0.s, z0.s, lsl #2] +# CHECK-NEXT: 1 3 1.00 adr z0.s, [z0.s, z0.s, lsl #3] +# CHECK-NEXT: 1 3 1.00 adr z0.s, [z0.s, z0.s] +# CHECK-NEXT: 1 3 1.00 aesd z0.b, z0.b, z31.b +# CHECK-NEXT: 1 3 1.00 aese z0.b, z0.b, z31.b +# CHECK-NEXT: 1 3 1.00 aesimc z0.b, z0.b +# CHECK-NEXT: 1 3 1.00 aesimc z31.b, z31.b +# CHECK-NEXT: 1 3 1.00 aesmc z0.b, z0.b +# CHECK-NEXT: 1 3 1.00 aesmc z31.b, z31.b +# CHECK-NEXT: 1 2 1.00 and p0.b, p0/z, p0.b, p1.b +# CHECK-NEXT: 1 3 1.00 and z0.d, z0.d, #0x6 +# CHECK-NEXT: 1 3 1.00 and z0.d, z0.d, #0xfffffffffffffff9 +# CHECK-NEXT: 1 3 1.00 and z0.d, z0.d, z0.d +# CHECK-NEXT: 1 3 1.00 and z0.s, z0.s, #0x6 +# CHECK-NEXT: 1 3 1.00 and z0.s, z0.s, #0xfffffff9 +# CHECK-NEXT: 1 3 1.00 and z23.d, z13.d, z8.d +# CHECK-NEXT: 1 3 1.00 and z23.h, z23.h, #0x6 +# CHECK-NEXT: 1 3 1.00 and z23.h, z23.h, #0xfff9 +# CHECK-NEXT: 1 3 1.00 and z31.b, p7/m, z31.b, z31.b +# CHECK-NEXT: 1 3 1.00 and z31.d, p7/m, z31.d, z31.d +# CHECK-NEXT: 1 3 1.00 and z31.h, p7/m, z31.h, z31.h +# CHECK-NEXT: 1 3 1.00 and z31.s, p7/m, z31.s, z31.s +# CHECK-NEXT: 1 3 1.00 and z5.b, z5.b, #0x6 +# CHECK-NEXT: 1 3 1.00 and z5.b, z5.b, #0xf9 +# CHECK-NEXT: 1 2 1.00 ands p0.b, p0/z, p0.b, p1.b +# CHECK-NEXT: 1 4 1.00 andv b0, p7, z31.b +# CHECK-NEXT: 1 4 1.00 andv d0, p7, z31.d +# CHECK-NEXT: 1 4 1.00 andv h0, p7, z31.h +# CHECK-NEXT: 1 4 1.00 andv s0, p7, z31.s +# CHECK-NEXT: 1 3 1.00 asr z0.b, p0/m, z0.b, #1 +# CHECK-NEXT: 1 3 1.00 asr z0.b, p0/m, z0.b, z0.b +# CHECK-NEXT: 1 3 1.00 asr z0.b, p0/m, z0.b, z1.d +# CHECK-NEXT: 1 3 1.00 asr z0.b, z0.b, #1 +# CHECK-NEXT: 1 3 1.00 asr z0.b, z1.b, z2.d +# CHECK-NEXT: 1 3 1.00 asr z0.d, p0/m, z0.d, #1 +# CHECK-NEXT: 1 3 1.00 asr z0.d, p0/m, z0.d, z0.d +# CHECK-NEXT: 1 3 1.00 asr z0.d, z0.d, #1 +# CHECK-NEXT: 1 3 1.00 asr z0.h, p0/m, z0.h, #1 +# CHECK-NEXT: 1 3 1.00 asr z0.h, p0/m, z0.h, z0.h +# CHECK-NEXT: 1 3 1.00 asr z0.h, p0/m, z0.h, z1.d +# CHECK-NEXT: 1 3 1.00 asr z0.h, z0.h, #1 +# CHECK-NEXT: 1 3 1.00 asr z0.h, z1.h, z2.d +# CHECK-NEXT: 1 3 1.00 asr z0.s, p0/m, z0.s, #1 +# CHECK-NEXT: 1 3 1.00 asr z0.s, p0/m, z0.s, z0.s +# CHECK-NEXT: 1 3 1.00 asr z0.s, p0/m, z0.s, z1.d +# CHECK-NEXT: 1 3 1.00 asr z0.s, z0.s, #1 +# CHECK-NEXT: 1 3 1.00 asr z0.s, z1.s, z2.d +# CHECK-NEXT: 1 3 1.00 asr z31.b, p0/m, z31.b, #8 +# CHECK-NEXT: 1 3 1.00 asr z31.b, z31.b, #8 +# CHECK-NEXT: 1 3 1.00 asr z31.d, p0/m, z31.d, #64 +# CHECK-NEXT: 1 3 1.00 asr z31.d, z31.d, #64 +# CHECK-NEXT: 1 3 1.00 asr z31.h, p0/m, z31.h, #16 +# CHECK-NEXT: 1 3 1.00 asr z31.h, z31.h, #16 +# CHECK-NEXT: 1 3 1.00 asr z31.s, p0/m, z31.s, #32 +# CHECK-NEXT: 1 3 1.00 asr z31.s, z31.s, #32 +# CHECK-NEXT: 1 4 1.00 asrd z0.b, p0/m, z0.b, #1 +# CHECK-NEXT: 1 4 1.00 asrd z0.d, p0/m, z0.d, #1 +# CHECK-NEXT: 1 4 1.00 asrd z0.h, p0/m, z0.h, #1 +# CHECK-NEXT: 1 4 1.00 asrd z0.s, p0/m, z0.s, #1 +# CHECK-NEXT: 1 4 1.00 asrd z31.b, p0/m, z31.b, #8 +# CHECK-NEXT: 1 4 1.00 asrd z31.d, p0/m, z31.d, #64 +# CHECK-NEXT: 1 4 1.00 asrd z31.h, p0/m, z31.h, #16 +# CHECK-NEXT: 1 4 1.00 asrd z31.s, p0/m, z31.s, #32 +# CHECK-NEXT: 1 3 1.00 asrr z0.b, p0/m, z0.b, z0.b +# CHECK-NEXT: 1 3 1.00 asrr z0.d, p0/m, z0.d, z0.d +# CHECK-NEXT: 1 3 1.00 asrr z0.h, p0/m, z0.h, z0.h +# CHECK-NEXT: 1 3 1.00 asrr z0.s, p0/m, z0.s, z0.s +# CHECK-NEXT: 1 4 1.00 bcax z29.d, z29.d, z30.d, z31.d +# CHECK-NEXT: 1 13 12.00 bdep z0.b, z1.b, z31.b +# CHECK-NEXT: 1 68 67.00 bdep z0.d, z1.d, z31.d +# CHECK-NEXT: 1 21 20.00 bdep z0.h, z1.h, z31.h +# CHECK-NEXT: 1 37 36.00 bdep z0.s, z1.s, z31.s +# CHECK-NEXT: 1 13 12.00 bext z0.b, z1.b, z31.b +# CHECK-NEXT: 1 68 67.00 bext z0.d, z1.d, z31.d +# CHECK-NEXT: 1 21 20.00 bext z0.h, z1.h, z31.h +# CHECK-NEXT: 1 37 36.00 bext z0.s, z1.s, z31.s +# CHECK-NEXT: 1 4 1.00 bfcvt z0.h, p0/m, z1.s +# CHECK-NEXT: 1 4 1.00 bfcvtnt z0.h, p0/m, z1.s +# CHECK-NEXT: 2 11 1.00 bfdot z0.s, z1.h, z2.h +# CHECK-NEXT: 2 11 1.00 bfdot z0.s, z1.h, z2.h[0] +# CHECK-NEXT: 2 11 1.00 bfdot z0.s, z1.h, z2.h[3] +# CHECK-NEXT: 1 4 1.00 bfmlalb z0.s, z1.h, z2.h +# CHECK-NEXT: 1 4 1.00 bfmlalb z0.s, z1.h, z2.h[0] +# CHECK-NEXT: 1 4 1.00 bfmlalb z0.s, z1.h, z2.h[7] +# CHECK-NEXT: 1 4 1.00 bfmlalb z10.s, z21.h, z14.h +# CHECK-NEXT: 1 4 1.00 bfmlalb z21.s, z14.h, z3.h[2] +# CHECK-NEXT: 1 4 1.00 bfmlalt z0.s, z1.h, z2.h +# CHECK-NEXT: 1 4 1.00 bfmlalt z0.s, z1.h, z2.h[0] +# CHECK-NEXT: 1 4 1.00 bfmlalt z0.s, z1.h, z2.h[7] +# CHECK-NEXT: 1 4 1.00 bfmlalt z0.s, z1.h, z7.h[7] +# CHECK-NEXT: 1 4 1.00 bfmlalt z14.s, z10.h, z21.h +# CHECK-NEXT: 2 16 1.00 bfmmla z0.s, z1.h, z2.h +# CHECK-NEXT: 1 13 12.00 bgrp z0.b, z1.b, z31.b +# CHECK-NEXT: 1 68 67.00 bgrp z0.d, z1.d, z31.d +# CHECK-NEXT: 1 21 20.00 bgrp z0.h, z1.h, z31.h +# CHECK-NEXT: 1 37 36.00 bgrp z0.s, z1.s, z31.s +# CHECK-NEXT: 1 2 1.00 bic p0.b, p0/z, p0.b, p0.b +# CHECK-NEXT: 1 2 1.00 bic p15.b, p15/z, p15.b, p15.b +# CHECK-NEXT: 1 3 1.00 bic z0.d, z0.d, z0.d +# CHECK-NEXT: 1 3 1.00 bic z23.d, z13.d, z8.d +# CHECK-NEXT: 1 3 1.00 bic z31.b, p7/m, z31.b, z31.b +# CHECK-NEXT: 1 3 1.00 bic z31.d, p7/m, z31.d, z31.d +# CHECK-NEXT: 1 3 1.00 bic z31.h, p7/m, z31.h, z31.h +# CHECK-NEXT: 1 3 1.00 bic z31.s, p7/m, z31.s, z31.s +# CHECK-NEXT: 1 2 1.00 bics p0.b, p0/z, p0.b, p0.b +# CHECK-NEXT: 1 2 1.00 bics p15.b, p15/z, p15.b, p15.b +# CHECK-NEXT: 1 2 1.00 brka p0.b, p15/m, p15.b +# CHECK-NEXT: 1 2 1.00 brka p0.b, p15/z, p15.b +# CHECK-NEXT: 1 2 1.00 brkas p0.b, p15/z, p15.b +# CHECK-NEXT: 1 2 1.00 brkb p0.b, p15/m, p15.b +# CHECK-NEXT: 1 2 1.00 brkb p0.b, p15/z, p15.b +# CHECK-NEXT: 1 2 1.00 brkbs p0.b, p15/z, p15.b +# CHECK-NEXT: 1 2 1.00 brkn p0.b, p15/z, p1.b, p0.b +# CHECK-NEXT: 1 2 1.00 brkn p15.b, p15/z, p15.b, p15.b +# CHECK-NEXT: 1 2 1.00 brkns p0.b, p15/z, p1.b, p0.b +# CHECK-NEXT: 1 2 1.00 brkns p15.b, p15/z, p15.b, p15.b +# CHECK-NEXT: 1 2 1.00 brkpa p0.b, p15/z, p1.b, p2.b +# CHECK-NEXT: 1 2 1.00 brkpa p15.b, p15/z, p15.b, p15.b +# CHECK-NEXT: 1 2 1.00 brkpas p0.b, p15/z, p1.b, p2.b +# CHECK-NEXT: 1 2 1.00 brkpas p15.b, p15/z, p15.b, p15.b +# CHECK-NEXT: 1 2 1.00 brkpb p0.b, p15/z, p1.b, p2.b +# CHECK-NEXT: 1 2 1.00 brkpb p15.b, p15/z, p15.b, p15.b +# CHECK-NEXT: 1 2 1.00 brkpbs p0.b, p15/z, p1.b, p2.b +# CHECK-NEXT: 1 2 1.00 brkpbs p15.b, p15/z, p15.b, p15.b +# CHECK-NEXT: 1 3 1.00 bsl z0.d, z0.d, z1.d, z2.d +# CHECK-NEXT: 1 3 1.00 bsl1n z0.d, z0.d, z1.d, z2.d +# CHECK-NEXT: 1 3 1.00 bsl2n z0.d, z0.d, z1.d, z2.d +# CHECK-NEXT: 1 3 1.00 cadd z0.b, z0.b, z0.b, #90 +# CHECK-NEXT: 1 3 1.00 cadd z0.d, z0.d, z0.d, #90 +# CHECK-NEXT: 1 3 1.00 cadd z0.h, z0.h, z0.h, #90 +# CHECK-NEXT: 1 3 1.00 cadd z0.s, z0.s, z0.s, #90 +# CHECK-NEXT: 1 3 1.00 cadd z31.b, z31.b, z31.b, #270 +# CHECK-NEXT: 1 3 1.00 cadd z31.d, z31.d, z31.d, #270 +# CHECK-NEXT: 1 3 1.00 cadd z31.h, z31.h, z31.h, #270 +# CHECK-NEXT: 1 3 1.00 cadd z31.s, z31.s, z31.s, #270 +# CHECK-NEXT: 1 4 1.00 cdot z0.d, z1.h, z15.h[1], #0 +# CHECK-NEXT: 1 4 1.00 cdot z0.d, z1.h, z31.h, #0 +# CHECK-NEXT: 1 4 1.00 cdot z0.d, z1.h, z31.h, #180 +# CHECK-NEXT: 1 4 1.00 cdot z0.d, z1.h, z31.h, #270 +# CHECK-NEXT: 1 4 1.00 cdot z0.d, z1.h, z31.h, #90 +# CHECK-NEXT: 1 4 1.00 cdot z0.s, z1.b, z31.b, #0 +# CHECK-NEXT: 1 4 1.00 cdot z0.s, z1.b, z7.b[3], #0 +# CHECK-NEXT: 1 4 1.00 cdot z29.d, z30.h, z0.h[0], #180 +# CHECK-NEXT: 1 4 1.00 cdot z31.d, z30.h, z7.h[1], #270 +# CHECK-NEXT: 1 4 1.00 cdot z5.d, z6.h, z3.h[0], #90 +# CHECK-NEXT: 1 4 1.00 clasta b0, p7, b0, z31.b +# CHECK-NEXT: 1 4 1.00 clasta d0, p7, d0, z31.d +# CHECK-NEXT: 1 4 1.00 clasta h0, p7, h0, z31.h +# CHECK-NEXT: 1 4 1.00 clasta s0, p7, s0, z31.s +# CHECK-NEXT: 1 8 2.00 clasta w0, p7, w0, z31.b +# CHECK-NEXT: 1 8 2.00 clasta w0, p7, w0, z31.h +# CHECK-NEXT: 1 8 2.00 clasta w0, p7, w0, z31.s +# CHECK-NEXT: 1 8 2.00 clasta x0, p7, x0, z31.d +# CHECK-NEXT: 1 4 1.00 clasta z0.b, p7, z0.b, z31.b +# CHECK-NEXT: 1 4 1.00 clasta z0.d, p7, z0.d, z31.d +# CHECK-NEXT: 1 4 1.00 clasta z0.h, p7, z0.h, z31.h +# CHECK-NEXT: 1 4 1.00 clasta z0.s, p7, z0.s, z31.s +# CHECK-NEXT: 1 4 1.00 clastb b0, p7, b0, z31.b +# CHECK-NEXT: 1 4 1.00 clastb d0, p7, d0, z31.d +# CHECK-NEXT: 1 4 1.00 clastb h0, p7, h0, z31.h +# CHECK-NEXT: 1 4 1.00 clastb s0, p7, s0, z31.s +# CHECK-NEXT: 1 8 2.00 clastb w0, p7, w0, z31.b +# CHECK-NEXT: 1 8 2.00 clastb w0, p7, w0, z31.h +# CHECK-NEXT: 1 8 2.00 clastb w0, p7, w0, z31.s +# CHECK-NEXT: 1 8 2.00 clastb x0, p7, x0, z31.d +# CHECK-NEXT: 1 4 1.00 clastb z0.b, p7, z0.b, z31.b +# CHECK-NEXT: 1 4 1.00 clastb z0.d, p7, z0.d, z31.d +# CHECK-NEXT: 1 4 1.00 clastb z0.h, p7, z0.h, z31.h +# CHECK-NEXT: 1 4 1.00 clastb z0.s, p7, z0.s, z31.s +# CHECK-NEXT: 1 3 1.00 cls z31.b, p7/m, z31.b +# CHECK-NEXT: 1 3 1.00 cls z31.d, p7/m, z31.d +# CHECK-NEXT: 1 3 1.00 cls z31.h, p7/m, z31.h +# CHECK-NEXT: 1 3 1.00 cls z31.s, p7/m, z31.s +# CHECK-NEXT: 1 3 1.00 clz z31.b, p7/m, z31.b +# CHECK-NEXT: 1 3 1.00 clz z31.d, p7/m, z31.d +# CHECK-NEXT: 1 3 1.00 clz z31.h, p7/m, z31.h +# CHECK-NEXT: 1 3 1.00 clz z31.s, p7/m, z31.s +# CHECK-NEXT: 1 4 1.00 cmla z0.b, z1.b, z2.b, #0 +# CHECK-NEXT: 1 4 1.00 cmla z0.d, z1.d, z2.d, #0 +# CHECK-NEXT: 1 4 1.00 cmla z0.h, z1.h, z2.h, #0 +# CHECK-NEXT: 1 4 1.00 cmla z0.h, z1.h, z2.h[0], #0 +# CHECK-NEXT: 1 4 1.00 cmla z0.s, z1.s, z2.s, #0 +# CHECK-NEXT: 1 4 1.00 cmla z0.s, z1.s, z2.s[0], #0 +# CHECK-NEXT: 1 4 1.00 cmla z15.b, z16.b, z17.b, #270 +# CHECK-NEXT: 1 4 1.00 cmla z15.d, z16.d, z17.d, #270 +# CHECK-NEXT: 1 4 1.00 cmla z15.h, z16.h, z17.h, #270 +# CHECK-NEXT: 1 4 1.00 cmla z15.s, z16.s, z17.s, #270 +# CHECK-NEXT: 1 4 1.00 cmla z29.b, z30.b, z31.b, #90 +# CHECK-NEXT: 1 4 1.00 cmla z29.d, z30.d, z31.d, #90 +# CHECK-NEXT: 1 4 1.00 cmla z29.h, z30.h, z31.h, #90 +# CHECK-NEXT: 1 4 1.00 cmla z29.s, z30.s, z31.s, #90 +# CHECK-NEXT: 1 4 1.00 cmla z31.b, z31.b, z31.b, #180 +# CHECK-NEXT: 1 4 1.00 cmla z31.d, z31.d, z31.d, #180 +# CHECK-NEXT: 1 4 1.00 cmla z31.h, z30.h, z7.h[0], #180 +# CHECK-NEXT: 1 4 1.00 cmla z31.h, z31.h, z31.h, #180 +# CHECK-NEXT: 1 4 1.00 cmla z31.s, z30.s, z7.s[0], #180 +# CHECK-NEXT: 1 4 1.00 cmla z31.s, z31.s, z31.s, #180 +# CHECK-NEXT: 1 5 1.00 cmpeq p0.b, p0/z, z0.b, #-16 +# CHECK-NEXT: 1 5 1.00 cmpeq p0.b, p0/z, z0.b, #15 +# CHECK-NEXT: 1 5 1.00 cmpeq p0.b, p0/z, z0.b, z0.b +# CHECK-NEXT: 1 5 1.00 cmpeq p0.b, p0/z, z0.b, z0.d +# CHECK-NEXT: 1 5 1.00 cmpeq p0.d, p0/z, z0.d, #-16 +# CHECK-NEXT: 1 5 1.00 cmpeq p0.d, p0/z, z0.d, #15 +# CHECK-NEXT: 1 5 1.00 cmpeq p0.d, p0/z, z0.d, z0.d +# CHECK-NEXT: 1 5 1.00 cmpeq p0.h, p0/z, z0.h, #-16 +# CHECK-NEXT: 1 5 1.00 cmpeq p0.h, p0/z, z0.h, #15 +# CHECK-NEXT: 1 5 1.00 cmpeq p0.h, p0/z, z0.h, z0.d +# CHECK-NEXT: 1 5 1.00 cmpeq p0.h, p0/z, z0.h, z0.h +# CHECK-NEXT: 1 5 1.00 cmpeq p0.s, p0/z, z0.s, #-16 +# CHECK-NEXT: 1 5 1.00 cmpeq p0.s, p0/z, z0.s, #15 +# CHECK-NEXT: 1 5 1.00 cmpeq p0.s, p0/z, z0.s, z0.d +# CHECK-NEXT: 1 5 1.00 cmpeq p0.s, p0/z, z0.s, z0.s +# CHECK-NEXT: 1 5 1.00 cmpge p0.b, p0/z, z0.b, #-16 +# CHECK-NEXT: 1 5 1.00 cmpge p0.b, p0/z, z0.b, #15 +# CHECK-NEXT: 1 5 1.00 cmpge p0.b, p0/z, z0.b, z0.b +# CHECK-NEXT: 1 5 1.00 cmpge p0.b, p0/z, z0.b, z0.d +# CHECK-NEXT: 1 5 1.00 cmpge p0.b, p0/z, z1.b, z0.b +# CHECK-NEXT: 1 5 1.00 cmpge p0.d, p0/z, z0.d, #-16 +# CHECK-NEXT: 1 5 1.00 cmpge p0.d, p0/z, z0.d, #15 +# CHECK-NEXT: 1 5 1.00 cmpge p0.d, p0/z, z0.d, z0.d +# CHECK-NEXT: 1 5 1.00 cmpge p0.d, p0/z, z1.d, z0.d +# CHECK-NEXT: 1 5 1.00 cmpge p0.h, p0/z, z0.h, #-16 +# CHECK-NEXT: 1 5 1.00 cmpge p0.h, p0/z, z0.h, #15 +# CHECK-NEXT: 1 5 1.00 cmpge p0.h, p0/z, z0.h, z0.d +# CHECK-NEXT: 1 5 1.00 cmpge p0.h, p0/z, z0.h, z0.h +# CHECK-NEXT: 1 5 1.00 cmpge p0.h, p0/z, z1.h, z0.h +# CHECK-NEXT: 1 5 1.00 cmpge p0.s, p0/z, z0.s, #-16 +# CHECK-NEXT: 1 5 1.00 cmpge p0.s, p0/z, z0.s, #15 +# CHECK-NEXT: 1 5 1.00 cmpge p0.s, p0/z, z0.s, z0.d +# CHECK-NEXT: 1 5 1.00 cmpge p0.s, p0/z, z0.s, z0.s +# CHECK-NEXT: 1 5 1.00 cmpge p0.s, p0/z, z1.s, z0.s +# CHECK-NEXT: 1 5 1.00 cmpgt p0.b, p0/z, z0.b, #-16 +# CHECK-NEXT: 1 5 1.00 cmpgt p0.b, p0/z, z0.b, #15 +# CHECK-NEXT: 1 5 1.00 cmpgt p0.b, p0/z, z0.b, z0.b +# CHECK-NEXT: 1 5 1.00 cmpgt p0.b, p0/z, z0.b, z0.d +# CHECK-NEXT: 1 5 1.00 cmpgt p0.b, p0/z, z1.b, z0.b +# CHECK-NEXT: 1 5 1.00 cmpgt p0.d, p0/z, z0.d, #-16 +# CHECK-NEXT: 1 5 1.00 cmpgt p0.d, p0/z, z0.d, #15 +# CHECK-NEXT: 1 5 1.00 cmpgt p0.d, p0/z, z0.d, z0.d +# CHECK-NEXT: 1 5 1.00 cmpgt p0.d, p0/z, z1.d, z0.d +# CHECK-NEXT: 1 5 1.00 cmpgt p0.h, p0/z, z0.h, #-16 +# CHECK-NEXT: 1 5 1.00 cmpgt p0.h, p0/z, z0.h, #15 +# CHECK-NEXT: 1 5 1.00 cmpgt p0.h, p0/z, z0.h, z0.d +# CHECK-NEXT: 1 5 1.00 cmpgt p0.h, p0/z, z0.h, z0.h +# CHECK-NEXT: 1 5 1.00 cmpgt p0.h, p0/z, z1.h, z0.h +# CHECK-NEXT: 1 5 1.00 cmpgt p0.s, p0/z, z0.s, #-16 +# CHECK-NEXT: 1 5 1.00 cmpgt p0.s, p0/z, z0.s, #15 +# CHECK-NEXT: 1 5 1.00 cmpgt p0.s, p0/z, z0.s, z0.d +# CHECK-NEXT: 1 5 1.00 cmpgt p0.s, p0/z, z0.s, z0.s +# CHECK-NEXT: 1 5 1.00 cmpgt p0.s, p0/z, z1.s, z0.s +# CHECK-NEXT: 1 5 1.00 cmphi p0.b, p0/z, z0.b, #0 +# CHECK-NEXT: 1 5 1.00 cmphi p0.b, p0/z, z0.b, #127 +# CHECK-NEXT: 1 5 1.00 cmphi p0.b, p0/z, z0.b, z0.b +# CHECK-NEXT: 1 5 1.00 cmphi p0.b, p0/z, z0.b, z0.d +# CHECK-NEXT: 1 5 1.00 cmphi p0.b, p0/z, z1.b, z0.b +# CHECK-NEXT: 1 5 1.00 cmphi p0.d, p0/z, z0.d, #0 +# CHECK-NEXT: 1 5 1.00 cmphi p0.d, p0/z, z0.d, #127 +# CHECK-NEXT: 1 5 1.00 cmphi p0.d, p0/z, z0.d, z0.d +# CHECK-NEXT: 1 5 1.00 cmphi p0.d, p0/z, z1.d, z0.d +# CHECK-NEXT: 1 5 1.00 cmphi p0.h, p0/z, z0.h, #0 +# CHECK-NEXT: 1 5 1.00 cmphi p0.h, p0/z, z0.h, #127 +# CHECK-NEXT: 1 5 1.00 cmphi p0.h, p0/z, z0.h, z0.d +# CHECK-NEXT: 1 5 1.00 cmphi p0.h, p0/z, z0.h, z0.h +# CHECK-NEXT: 1 5 1.00 cmphi p0.h, p0/z, z1.h, z0.h +# CHECK-NEXT: 1 5 1.00 cmphi p0.s, p0/z, z0.s, #0 +# CHECK-NEXT: 1 5 1.00 cmphi p0.s, p0/z, z0.s, #127 +# CHECK-NEXT: 1 5 1.00 cmphi p0.s, p0/z, z0.s, z0.d +# CHECK-NEXT: 1 5 1.00 cmphi p0.s, p0/z, z0.s, z0.s +# CHECK-NEXT: 1 5 1.00 cmphi p0.s, p0/z, z1.s, z0.s +# CHECK-NEXT: 1 5 1.00 cmphs p0.b, p0/z, z0.b, #0 +# CHECK-NEXT: 1 5 1.00 cmphs p0.b, p0/z, z0.b, #127 +# CHECK-NEXT: 1 5 1.00 cmphs p0.b, p0/z, z0.b, z0.b +# CHECK-NEXT: 1 5 1.00 cmphs p0.b, p0/z, z0.b, z0.d +# CHECK-NEXT: 1 5 1.00 cmphs p0.b, p0/z, z1.b, z0.b +# CHECK-NEXT: 1 5 1.00 cmphs p0.d, p0/z, z0.d, #0 +# CHECK-NEXT: 1 5 1.00 cmphs p0.d, p0/z, z0.d, #127 +# CHECK-NEXT: 1 5 1.00 cmphs p0.d, p0/z, z0.d, z0.d +# CHECK-NEXT: 1 5 1.00 cmphs p0.d, p0/z, z1.d, z0.d +# CHECK-NEXT: 1 5 1.00 cmphs p0.h, p0/z, z0.h, #0 +# CHECK-NEXT: 1 5 1.00 cmphs p0.h, p0/z, z0.h, #127 +# CHECK-NEXT: 1 5 1.00 cmphs p0.h, p0/z, z0.h, z0.d +# CHECK-NEXT: 1 5 1.00 cmphs p0.h, p0/z, z0.h, z0.h +# CHECK-NEXT: 1 5 1.00 cmphs p0.h, p0/z, z1.h, z0.h +# CHECK-NEXT: 1 5 1.00 cmphs p0.s, p0/z, z0.s, #0 +# CHECK-NEXT: 1 5 1.00 cmphs p0.s, p0/z, z0.s, #127 +# CHECK-NEXT: 1 5 1.00 cmphs p0.s, p0/z, z0.s, z0.d +# CHECK-NEXT: 1 5 1.00 cmphs p0.s, p0/z, z0.s, z0.s +# CHECK-NEXT: 1 5 1.00 cmphs p0.s, p0/z, z1.s, z0.s +# CHECK-NEXT: 1 5 1.00 cmple p0.b, p0/z, z0.b, #-16 +# CHECK-NEXT: 1 5 1.00 cmple p0.b, p0/z, z0.b, #15 +# CHECK-NEXT: 1 5 1.00 cmple p0.b, p0/z, z0.b, z0.d +# CHECK-NEXT: 1 5 1.00 cmple p0.d, p0/z, z0.d, #-16 +# CHECK-NEXT: 1 5 1.00 cmple p0.d, p0/z, z0.d, #15 +# CHECK-NEXT: 1 5 1.00 cmple p0.h, p0/z, z0.h, #-16 +# CHECK-NEXT: 1 5 1.00 cmple p0.h, p0/z, z0.h, #15 +# CHECK-NEXT: 1 5 1.00 cmple p0.h, p0/z, z0.h, z0.d +# CHECK-NEXT: 1 5 1.00 cmple p0.s, p0/z, z0.s, #-16 +# CHECK-NEXT: 1 5 1.00 cmple p0.s, p0/z, z0.s, #15 +# CHECK-NEXT: 1 5 1.00 cmple p0.s, p0/z, z0.s, z0.d +# CHECK-NEXT: 1 5 1.00 cmplo p0.b, p0/z, z0.b, #0 +# CHECK-NEXT: 1 5 1.00 cmplo p0.b, p0/z, z0.b, #127 +# CHECK-NEXT: 1 5 1.00 cmplo p0.b, p0/z, z0.b, z0.d +# CHECK-NEXT: 1 5 1.00 cmplo p0.d, p0/z, z0.d, #0 +# CHECK-NEXT: 1 5 1.00 cmplo p0.d, p0/z, z0.d, #127 +# CHECK-NEXT: 1 5 1.00 cmplo p0.h, p0/z, z0.h, #0 +# CHECK-NEXT: 1 5 1.00 cmplo p0.h, p0/z, z0.h, #127 +# CHECK-NEXT: 1 5 1.00 cmplo p0.h, p0/z, z0.h, z0.d +# CHECK-NEXT: 1 5 1.00 cmplo p0.s, p0/z, z0.s, #0 +# CHECK-NEXT: 1 5 1.00 cmplo p0.s, p0/z, z0.s, #127 +# CHECK-NEXT: 1 5 1.00 cmplo p0.s, p0/z, z0.s, z0.d +# CHECK-NEXT: 1 5 1.00 cmpls p0.b, p0/z, z0.b, #0 +# CHECK-NEXT: 1 5 1.00 cmpls p0.b, p0/z, z0.b, #127 +# CHECK-NEXT: 1 5 1.00 cmpls p0.b, p0/z, z0.b, z0.d +# CHECK-NEXT: 1 5 1.00 cmpls p0.d, p0/z, z0.d, #0 +# CHECK-NEXT: 1 5 1.00 cmpls p0.d, p0/z, z0.d, #127 +# CHECK-NEXT: 1 5 1.00 cmpls p0.h, p0/z, z0.h, #0 +# CHECK-NEXT: 1 5 1.00 cmpls p0.h, p0/z, z0.h, #127 +# CHECK-NEXT: 1 5 1.00 cmpls p0.h, p0/z, z0.h, z0.d +# CHECK-NEXT: 1 5 1.00 cmpls p0.s, p0/z, z0.s, #0 +# CHECK-NEXT: 1 5 1.00 cmpls p0.s, p0/z, z0.s, #127 +# CHECK-NEXT: 1 5 1.00 cmpls p0.s, p0/z, z0.s, z0.d +# CHECK-NEXT: 1 5 1.00 cmplt p0.b, p0/z, z0.b, #-16 +# CHECK-NEXT: 1 5 1.00 cmplt p0.b, p0/z, z0.b, #15 +# CHECK-NEXT: 1 5 1.00 cmplt p0.b, p0/z, z0.b, z0.d +# CHECK-NEXT: 1 5 1.00 cmplt p0.d, p0/z, z0.d, #-16 +# CHECK-NEXT: 1 5 1.00 cmplt p0.d, p0/z, z0.d, #15 +# CHECK-NEXT: 1 5 1.00 cmplt p0.h, p0/z, z0.h, #-16 +# CHECK-NEXT: 1 5 1.00 cmplt p0.h, p0/z, z0.h, #15 +# CHECK-NEXT: 1 5 1.00 cmplt p0.h, p0/z, z0.h, z0.d +# CHECK-NEXT: 1 5 1.00 cmplt p0.s, p0/z, z0.s, #-16 +# CHECK-NEXT: 1 5 1.00 cmplt p0.s, p0/z, z0.s, #15 +# CHECK-NEXT: 1 5 1.00 cmplt p0.s, p0/z, z0.s, z0.d +# CHECK-NEXT: 1 5 1.00 cmpne p0.b, p0/z, z0.b, #-16 +# CHECK-NEXT: 1 5 1.00 cmpne p0.b, p0/z, z0.b, #15 +# CHECK-NEXT: 1 5 1.00 cmpne p0.b, p0/z, z0.b, z0.b +# CHECK-NEXT: 1 5 1.00 cmpne p0.b, p0/z, z0.b, z0.d +# CHECK-NEXT: 1 5 1.00 cmpne p0.d, p0/z, z0.d, #-16 +# CHECK-NEXT: 1 5 1.00 cmpne p0.d, p0/z, z0.d, #15 +# CHECK-NEXT: 1 5 1.00 cmpne p0.d, p0/z, z0.d, z0.d +# CHECK-NEXT: 1 5 1.00 cmpne p0.h, p0/z, z0.h, #-16 +# CHECK-NEXT: 1 5 1.00 cmpne p0.h, p0/z, z0.h, #15 +# CHECK-NEXT: 1 5 1.00 cmpne p0.h, p0/z, z0.h, z0.d +# CHECK-NEXT: 1 5 1.00 cmpne p0.h, p0/z, z0.h, z0.h +# CHECK-NEXT: 1 5 1.00 cmpne p0.s, p0/z, z0.s, #-16 +# CHECK-NEXT: 1 5 1.00 cmpne p0.s, p0/z, z0.s, #15 +# CHECK-NEXT: 1 5 1.00 cmpne p0.s, p0/z, z0.s, z0.d +# CHECK-NEXT: 1 5 1.00 cmpne p0.s, p0/z, z0.s, z0.s +# CHECK-NEXT: 1 3 1.00 cnot z31.b, p7/m, z31.b +# CHECK-NEXT: 1 3 1.00 cnot z31.d, p7/m, z31.d +# CHECK-NEXT: 1 3 1.00 cnot z31.h, p7/m, z31.h +# CHECK-NEXT: 1 3 1.00 cnot z31.s, p7/m, z31.s +# CHECK-NEXT: 1 3 1.00 cnt z31.b, p7/m, z31.b +# CHECK-NEXT: 1 12 1.00 cnt z31.d, p7/m, z31.d +# CHECK-NEXT: 1 3 1.00 cnt z31.h, p7/m, z31.h +# CHECK-NEXT: 1 8 1.00 cnt z31.s, p7/m, z31.s +# CHECK-NEXT: 1 3 1.00 cntb x0 +# CHECK-NEXT: 1 3 1.00 cntb x0, #28 +# CHECK-NEXT: 1 3 1.00 cntb x0, all, mul #16 +# CHECK-NEXT: 1 3 1.00 cntb x0, pow2 +# CHECK-NEXT: 1 3 1.00 cntd x0 +# CHECK-NEXT: 1 3 1.00 cntd x0, #28 +# CHECK-NEXT: 1 3 1.00 cntd x0, all, mul #16 +# CHECK-NEXT: 1 3 1.00 cntd x0, pow2 +# CHECK-NEXT: 1 3 1.00 cnth x0 +# CHECK-NEXT: 1 3 1.00 cnth x0, #28 +# CHECK-NEXT: 1 3 1.00 cnth x0, all, mul #16 +# CHECK-NEXT: 1 3 1.00 cnth x0, pow2 +# CHECK-NEXT: 1 1 1.00 cntp x0, p15, p0.b +# CHECK-NEXT: 1 1 1.00 cntp x0, p15, p0.d +# CHECK-NEXT: 1 1 1.00 cntp x0, p15, p0.h +# CHECK-NEXT: 1 1 1.00 cntp x0, p15, p0.s +# CHECK-NEXT: 1 3 1.00 cntw x0 +# CHECK-NEXT: 1 3 1.00 cntw x0, #28 +# CHECK-NEXT: 1 3 1.00 cntw x0, all, mul #16 +# CHECK-NEXT: 1 3 1.00 cntw x0, pow2 +# CHECK-NEXT: 1 4 1.00 compact z31.d, p7, z31.d +# CHECK-NEXT: 1 4 1.00 compact z31.s, p7, z31.s +# CHECK-NEXT: 1 1 1.00 ctermeq w30, wzr +# CHECK-NEXT: 1 1 1.00 ctermeq wzr, w30 +# CHECK-NEXT: 1 1 1.00 ctermeq x30, xzr +# CHECK-NEXT: 1 1 1.00 ctermeq xzr, x30 +# CHECK-NEXT: 1 1 1.00 ctermne w30, wzr +# CHECK-NEXT: 1 1 1.00 ctermne wzr, w30 +# CHECK-NEXT: 1 1 1.00 ctermne x30, xzr +# CHECK-NEXT: 1 1 1.00 ctermne xzr, x30 +# CHECK-NEXT: 1 3 1.00 decb x0 +# CHECK-NEXT: 1 3 1.00 decb x0, #14 +# CHECK-NEXT: 1 3 1.00 decb x0, all, mul #16 +# CHECK-NEXT: 1 3 1.00 decb x0, pow2 +# CHECK-NEXT: 1 3 1.00 decb x0, vl1 +# CHECK-NEXT: 1 3 1.00 decd x0 +# CHECK-NEXT: 1 3 1.00 decd x0, #14 +# CHECK-NEXT: 1 3 1.00 decd x0, all, mul #16 +# CHECK-NEXT: 1 3 1.00 decd x0, pow2 +# CHECK-NEXT: 1 3 1.00 decd x0, vl1 +# CHECK-NEXT: 1 3 1.00 dech x0 +# CHECK-NEXT: 1 3 1.00 dech x0, #14 +# CHECK-NEXT: 1 3 1.00 dech x0, all, mul #16 +# CHECK-NEXT: 1 3 1.00 dech x0, pow2 +# CHECK-NEXT: 1 3 1.00 dech x0, vl1 +# CHECK-NEXT: 1 1 1.00 decp x0, p0.b +# CHECK-NEXT: 1 1 1.00 decp x0, p0.d +# CHECK-NEXT: 1 1 1.00 decp x0, p0.h +# CHECK-NEXT: 1 1 1.00 decp x0, p0.s +# CHECK-NEXT: 1 1 1.00 decp xzr, p15.b +# CHECK-NEXT: 1 1 1.00 decp xzr, p15.d +# CHECK-NEXT: 1 1 1.00 decp xzr, p15.h +# CHECK-NEXT: 1 1 1.00 decp xzr, p15.s +# CHECK-NEXT: 1 3 1.00 decp z31.d, p15.d +# CHECK-NEXT: 1 3 1.00 decp z31.h, p15.h +# CHECK-NEXT: 1 3 1.00 decp z31.s, p15.s +# CHECK-NEXT: 1 3 1.00 decw x0 +# CHECK-NEXT: 1 3 1.00 decw x0, #14 +# CHECK-NEXT: 1 3 1.00 decw x0, all, mul #16 +# CHECK-NEXT: 1 3 1.00 decw x0, pow2 +# CHECK-NEXT: 1 3 1.00 decw x0, vl1 +# CHECK-NEXT: 1 4 1.00 dupm z0.d, #0xfffffffffffffff9 +# CHECK-NEXT: 1 4 1.00 dupm z0.s, #0xfffffff9 +# CHECK-NEXT: 1 4 1.00 dupm z23.h, #0xfff9 +# CHECK-NEXT: 1 4 1.00 dupm z5.b, #0xf9 +# CHECK-NEXT: 1 2 1.00 eor p0.b, p0/z, p0.b, p1.b +# CHECK-NEXT: 1 3 1.00 eor z0.d, z0.d, #0x6 +# CHECK-NEXT: 1 3 1.00 eor z0.d, z0.d, #0xfffffffffffffff9 +# CHECK-NEXT: 1 3 1.00 eor z0.d, z0.d, z0.d +# CHECK-NEXT: 1 3 1.00 eor z0.s, z0.s, #0x6 +# CHECK-NEXT: 1 3 1.00 eor z0.s, z0.s, #0xfffffff9 +# CHECK-NEXT: 1 3 1.00 eor z23.d, z13.d, z8.d +# CHECK-NEXT: 1 3 1.00 eor z23.h, z23.h, #0x6 +# CHECK-NEXT: 1 3 1.00 eor z23.h, z23.h, #0xfff9 +# CHECK-NEXT: 1 3 1.00 eor z31.b, p7/m, z31.b, z31.b +# CHECK-NEXT: 1 3 1.00 eor z31.d, p7/m, z31.d, z31.d +# CHECK-NEXT: 1 3 1.00 eor z31.h, p7/m, z31.h, z31.h +# CHECK-NEXT: 1 3 1.00 eor z31.s, p7/m, z31.s, z31.s +# CHECK-NEXT: 1 3 1.00 eor z5.b, z5.b, #0x6 +# CHECK-NEXT: 1 3 1.00 eor z5.b, z5.b, #0xf9 +# CHECK-NEXT: 1 4 1.00 eor3 z29.d, z29.d, z30.d, z31.d +# CHECK-NEXT: 1 4 1.00 eorbt z0.b, z1.b, z31.b +# CHECK-NEXT: 1 4 1.00 eorbt z0.d, z1.d, z31.d +# CHECK-NEXT: 1 4 1.00 eorbt z0.h, z1.h, z31.h +# CHECK-NEXT: 1 4 1.00 eorbt z0.s, z1.s, z31.s +# CHECK-NEXT: 1 2 1.00 eors p0.b, p0/z, p0.b, p1.b +# CHECK-NEXT: 1 4 1.00 eortb z0.b, z1.b, z31.b +# CHECK-NEXT: 1 4 1.00 eortb z0.d, z1.d, z31.d +# CHECK-NEXT: 1 4 1.00 eortb z0.h, z1.h, z31.h +# CHECK-NEXT: 1 4 1.00 eortb z0.s, z1.s, z31.s +# CHECK-NEXT: 1 4 1.00 eorv b0, p7, z31.b +# CHECK-NEXT: 1 4 1.00 eorv d0, p7, z31.d +# CHECK-NEXT: 1 4 1.00 eorv h0, p7, z31.h +# CHECK-NEXT: 1 4 1.00 eorv s0, p7, z31.s +# CHECK-NEXT: 1 3 1.00 ext z0.b, { z1.b, z2.b }, #0 +# CHECK-NEXT: 1 3 1.00 ext z31.b, z31.b, z0.b, #0 +# CHECK-NEXT: 1 3 1.00 ext z31.b, z31.b, z0.b, #255 +# CHECK-NEXT: 1 3 1.00 ext z31.b, { z30.b, z31.b }, #255 +# CHECK-NEXT: 1 4 1.00 fabd z0.d, p7/m, z0.d, z31.d +# CHECK-NEXT: 1 4 1.00 fabd z0.h, p7/m, z0.h, z31.h +# CHECK-NEXT: 1 4 1.00 fabd z0.s, p7/m, z0.s, z31.s +# CHECK-NEXT: 1 4 1.00 fabs z31.d, p7/m, z31.d +# CHECK-NEXT: 1 4 1.00 fabs z31.h, p7/m, z31.h +# CHECK-NEXT: 1 4 1.00 fabs z31.s, p7/m, z31.s +# CHECK-NEXT: 1 5 1.00 facge p0.d, p0/z, z0.d, z1.d +# CHECK-NEXT: 1 5 1.00 facge p0.d, p0/z, z1.d, z0.d +# CHECK-NEXT: 1 5 1.00 facge p0.h, p0/z, z0.h, z1.h +# CHECK-NEXT: 1 5 1.00 facge p0.h, p0/z, z1.h, z0.h +# CHECK-NEXT: 1 5 1.00 facge p0.s, p0/z, z0.s, z1.s +# CHECK-NEXT: 1 5 1.00 facge p0.s, p0/z, z1.s, z0.s +# CHECK-NEXT: 1 5 1.00 facgt p0.d, p0/z, z0.d, z1.d +# CHECK-NEXT: 1 5 1.00 facgt p0.d, p0/z, z1.d, z0.d +# CHECK-NEXT: 1 5 1.00 facgt p0.h, p0/z, z0.h, z1.h +# CHECK-NEXT: 1 5 1.00 facgt p0.h, p0/z, z1.h, z0.h +# CHECK-NEXT: 1 5 1.00 facgt p0.s, p0/z, z0.s, z1.s +# CHECK-NEXT: 1 5 1.00 facgt p0.s, p0/z, z1.s, z0.s +# CHECK-NEXT: 1 4 1.00 fadd z0.d, p0/m, z0.d, #0.5 +# CHECK-NEXT: 1 4 1.00 fadd z0.d, p7/m, z0.d, z31.d +# CHECK-NEXT: 1 4 1.00 fadd z0.d, z1.d, z31.d +# CHECK-NEXT: 1 4 1.00 fadd z0.h, p0/m, z0.h, #0.5 +# CHECK-NEXT: 1 4 1.00 fadd z0.h, p7/m, z0.h, z31.h +# CHECK-NEXT: 1 4 1.00 fadd z0.h, z1.h, z31.h +# CHECK-NEXT: 1 4 1.00 fadd z0.s, p0/m, z0.s, #0.5 +# CHECK-NEXT: 1 4 1.00 fadd z0.s, p7/m, z0.s, z31.s +# CHECK-NEXT: 1 4 1.00 fadd z0.s, z1.s, z31.s +# CHECK-NEXT: 1 4 1.00 fadd z31.d, p7/m, z31.d, #1.0 +# CHECK-NEXT: 1 4 1.00 fadd z31.h, p7/m, z31.h, #1.0 +# CHECK-NEXT: 1 4 1.00 fadd z31.s, p7/m, z31.s, #1.0 +# CHECK-NEXT: 1 8 5.00 fadda d0, p7, d0, z31.d +# CHECK-NEXT: 1 32 29.00 fadda h0, p7, h0, z31.h +# CHECK-NEXT: 1 16 13.00 fadda s0, p7, s0, z31.s +# CHECK-NEXT: 1 4 1.00 faddp z0.h, p0/m, z0.h, z1.h +# CHECK-NEXT: 1 4 1.00 faddp z29.s, p3/m, z29.s, z30.s +# CHECK-NEXT: 1 4 1.00 faddp z31.d, p7/m, z31.d, z30.d +# CHECK-NEXT: 1 4 1.00 faddv d0, p7, z31.d +# CHECK-NEXT: 1 12 11.00 faddv h0, p7, z31.h +# CHECK-NEXT: 1 8 5.00 faddv s0, p7, z31.s +# CHECK-NEXT: 1 4 1.00 fcadd z0.d, p0/m, z0.d, z0.d, #90 +# CHECK-NEXT: 1 4 1.00 fcadd z0.h, p0/m, z0.h, z0.h, #90 +# CHECK-NEXT: 1 4 1.00 fcadd z0.s, p0/m, z0.s, z0.s, #90 +# CHECK-NEXT: 1 4 1.00 fcadd z31.d, p7/m, z31.d, z31.d, #270 +# CHECK-NEXT: 1 4 1.00 fcadd z31.h, p7/m, z31.h, z31.h, #270 +# CHECK-NEXT: 1 4 1.00 fcadd z31.s, p7/m, z31.s, z31.s, #270 +# CHECK-NEXT: 1 5 1.00 fcmeq p0.d, p0/z, z0.d, #0.0 +# CHECK-NEXT: 1 5 1.00 fcmeq p0.d, p0/z, z0.d, z1.d +# CHECK-NEXT: 1 5 1.00 fcmeq p0.h, p0/z, z0.h, #0.0 +# CHECK-NEXT: 1 5 1.00 fcmeq p0.h, p0/z, z0.h, z1.h +# CHECK-NEXT: 1 5 1.00 fcmeq p0.s, p0/z, z0.s, #0.0 +# CHECK-NEXT: 1 5 1.00 fcmeq p0.s, p0/z, z0.s, z1.s +# CHECK-NEXT: 1 5 1.00 fcmge p0.d, p0/z, z0.d, #0.0 +# CHECK-NEXT: 1 5 1.00 fcmge p0.d, p0/z, z0.d, z1.d +# CHECK-NEXT: 1 5 1.00 fcmge p0.d, p0/z, z1.d, z0.d +# CHECK-NEXT: 1 5 1.00 fcmge p0.h, p0/z, z0.h, #0.0 +# CHECK-NEXT: 1 5 1.00 fcmge p0.h, p0/z, z0.h, z1.h +# CHECK-NEXT: 1 5 1.00 fcmge p0.h, p0/z, z1.h, z0.h +# CHECK-NEXT: 1 5 1.00 fcmge p0.s, p0/z, z0.s, #0.0 +# CHECK-NEXT: 1 5 1.00 fcmge p0.s, p0/z, z0.s, z1.s +# CHECK-NEXT: 1 5 1.00 fcmge p0.s, p0/z, z1.s, z0.s +# CHECK-NEXT: 1 5 1.00 fcmgt p0.d, p0/z, z0.d, #0.0 +# CHECK-NEXT: 1 5 1.00 fcmgt p0.d, p0/z, z0.d, z1.d +# CHECK-NEXT: 1 5 1.00 fcmgt p0.d, p0/z, z1.d, z0.d +# CHECK-NEXT: 1 5 1.00 fcmgt p0.h, p0/z, z0.h, #0.0 +# CHECK-NEXT: 1 5 1.00 fcmgt p0.h, p0/z, z0.h, z1.h +# CHECK-NEXT: 1 5 1.00 fcmgt p0.h, p0/z, z1.h, z0.h +# CHECK-NEXT: 1 5 1.00 fcmgt p0.s, p0/z, z0.s, #0.0 +# CHECK-NEXT: 1 5 1.00 fcmgt p0.s, p0/z, z0.s, z1.s +# CHECK-NEXT: 1 5 1.00 fcmgt p0.s, p0/z, z1.s, z0.s +# CHECK-NEXT: 1 4 1.00 fcmla z0.d, p0/m, z0.d, z0.d, #0 +# CHECK-NEXT: 1 4 1.00 fcmla z0.d, p0/m, z1.d, z2.d, #90 +# CHECK-NEXT: 1 4 1.00 fcmla z0.h, p0/m, z0.h, z0.h, #0 +# CHECK-NEXT: 1 4 1.00 fcmla z0.h, p0/m, z1.h, z2.h, #90 +# CHECK-NEXT: 1 4 1.00 fcmla z0.h, z0.h, z0.h[0], #0 +# CHECK-NEXT: 1 4 1.00 fcmla z0.s, p0/m, z0.s, z0.s, #0 +# CHECK-NEXT: 1 4 1.00 fcmla z0.s, p0/m, z1.s, z2.s, #90 +# CHECK-NEXT: 1 4 1.00 fcmla z21.s, z10.s, z5.s[1], #90 +# CHECK-NEXT: 1 4 1.00 fcmla z23.s, z13.s, z8.s[0], #270 +# CHECK-NEXT: 1 4 1.00 fcmla z29.d, p7/m, z30.d, z31.d, #180 +# CHECK-NEXT: 1 4 1.00 fcmla z29.h, p7/m, z30.h, z31.h, #180 +# CHECK-NEXT: 1 4 1.00 fcmla z29.s, p7/m, z30.s, z31.s, #180 +# CHECK-NEXT: 1 4 1.00 fcmla z31.d, p7/m, z31.d, z31.d, #270 +# CHECK-NEXT: 1 4 1.00 fcmla z31.h, p7/m, z31.h, z31.h, #270 +# CHECK-NEXT: 1 4 1.00 fcmla z31.h, z31.h, z7.h[3], #270 +# CHECK-NEXT: 1 4 1.00 fcmla z31.s, p7/m, z31.s, z31.s, #270 +# CHECK-NEXT: 1 5 1.00 fcmle p0.d, p0/z, z0.d, #0.0 +# CHECK-NEXT: 1 5 1.00 fcmle p0.h, p0/z, z0.h, #0.0 +# CHECK-NEXT: 1 5 1.00 fcmle p0.s, p0/z, z0.s, #0.0 +# CHECK-NEXT: 1 5 1.00 fcmlt p0.d, p0/z, z0.d, #0.0 +# CHECK-NEXT: 1 5 1.00 fcmlt p0.h, p0/z, z0.h, #0.0 +# CHECK-NEXT: 1 5 1.00 fcmlt p0.s, p0/z, z0.s, #0.0 +# CHECK-NEXT: 1 5 1.00 fcmne p0.d, p0/z, z0.d, #0.0 +# CHECK-NEXT: 1 5 1.00 fcmne p0.d, p0/z, z0.d, z1.d +# CHECK-NEXT: 1 5 1.00 fcmne p0.h, p0/z, z0.h, #0.0 +# CHECK-NEXT: 1 5 1.00 fcmne p0.h, p0/z, z0.h, z1.h +# CHECK-NEXT: 1 5 1.00 fcmne p0.s, p0/z, z0.s, #0.0 +# CHECK-NEXT: 1 5 1.00 fcmne p0.s, p0/z, z0.s, z1.s +# CHECK-NEXT: 1 5 1.00 fcmuo p0.d, p0/z, z0.d, z1.d +# CHECK-NEXT: 1 5 1.00 fcmuo p0.h, p0/z, z0.h, z1.h +# CHECK-NEXT: 1 5 1.00 fcmuo p0.s, p0/z, z0.s, z1.s +# CHECK-NEXT: 1 4 1.00 fcvt z0.d, p0/m, z0.h +# CHECK-NEXT: 1 4 1.00 fcvt z0.d, p0/m, z0.s +# CHECK-NEXT: 1 4 1.00 fcvt z0.h, p0/m, z0.d +# CHECK-NEXT: 1 4 1.00 fcvt z0.h, p0/m, z0.s +# CHECK-NEXT: 1 4 1.00 fcvt z0.s, p0/m, z0.d +# CHECK-NEXT: 1 4 1.00 fcvt z0.s, p0/m, z0.h +# CHECK-NEXT: 1 4 1.00 fcvtlt z0.s, p0/m, z1.h +# CHECK-NEXT: 1 4 1.00 fcvtlt z30.d, p7/m, z31.s +# CHECK-NEXT: 1 4 1.00 fcvtnt z0.h, p0/m, z1.s +# CHECK-NEXT: 1 4 1.00 fcvtnt z30.s, p7/m, z31.d +# CHECK-NEXT: 1 4 1.00 fcvtx z0.s, p0/m, z0.d +# CHECK-NEXT: 1 4 1.00 fcvtx z30.s, p7/m, z31.d +# CHECK-NEXT: 1 4 1.00 fcvtxnt z0.s, p0/m, z1.d +# CHECK-NEXT: 1 4 1.00 fcvtxnt z30.s, p7/m, z31.d +# CHECK-NEXT: 1 4 1.00 fcvtzs z0.d, p0/m, z0.d +# CHECK-NEXT: 1 4 1.00 fcvtzs z0.d, p0/m, z0.h +# CHECK-NEXT: 1 4 1.00 fcvtzs z0.d, p0/m, z0.s +# CHECK-NEXT: 1 4 1.00 fcvtzs z0.h, p0/m, z0.h +# CHECK-NEXT: 1 4 1.00 fcvtzs z0.s, p0/m, z0.d +# CHECK-NEXT: 1 4 1.00 fcvtzs z0.s, p0/m, z0.h +# CHECK-NEXT: 1 4 1.00 fcvtzs z0.s, p0/m, z0.s +# CHECK-NEXT: 1 4 1.00 fcvtzu z0.d, p0/m, z0.d +# CHECK-NEXT: 1 4 1.00 fcvtzu z0.d, p0/m, z0.h +# CHECK-NEXT: 1 4 1.00 fcvtzu z0.d, p0/m, z0.s +# CHECK-NEXT: 1 4 1.00 fcvtzu z0.h, p0/m, z0.h +# CHECK-NEXT: 1 4 1.00 fcvtzu z0.s, p0/m, z0.d +# CHECK-NEXT: 1 4 1.00 fcvtzu z0.s, p0/m, z0.h +# CHECK-NEXT: 1 4 1.00 fcvtzu z0.s, p0/m, z0.s +# CHECK-NEXT: 1 22 19.00 fdiv z0.d, p7/m, z0.d, z31.d +# CHECK-NEXT: 1 8 5.00 fdiv z0.h, p7/m, z0.h, z31.h +# CHECK-NEXT: 1 13 10.00 fdiv z0.s, p7/m, z0.s, z31.s +# CHECK-NEXT: 1 22 19.00 fdivr z0.d, p7/m, z0.d, z31.d +# CHECK-NEXT: 1 8 5.00 fdivr z0.h, p7/m, z0.h, z31.h +# CHECK-NEXT: 1 13 10.00 fdivr z0.s, p7/m, z0.s, z31.s +# CHECK-NEXT: 1 4 1.00 fexpa z0.d, z31.d +# CHECK-NEXT: 1 4 1.00 fexpa z0.h, z31.h +# CHECK-NEXT: 1 4 1.00 fexpa z0.s, z31.s +# CHECK-NEXT: 1 4 1.00 flogb z31.d, p7/m, z31.d +# CHECK-NEXT: 1 4 1.00 flogb z31.h, p7/m, z31.h +# CHECK-NEXT: 1 4 1.00 flogb z31.s, p7/m, z31.s +# CHECK-NEXT: 1 4 1.00 fmad z0.d, p7/m, z1.d, z31.d +# CHECK-NEXT: 1 4 1.00 fmad z0.h, p7/m, z1.h, z31.h +# CHECK-NEXT: 1 4 1.00 fmad z0.s, p7/m, z1.s, z31.s +# CHECK-NEXT: 1 4 1.00 fmax z0.d, p0/m, z0.d, #0.0 +# CHECK-NEXT: 1 4 1.00 fmax z0.d, p7/m, z0.d, z31.d +# CHECK-NEXT: 1 4 1.00 fmax z0.h, p0/m, z0.h, #0.0 +# CHECK-NEXT: 1 4 1.00 fmax z0.h, p7/m, z0.h, z31.h +# CHECK-NEXT: 1 4 1.00 fmax z0.s, p0/m, z0.s, #0.0 +# CHECK-NEXT: 1 4 1.00 fmax z0.s, p7/m, z0.s, z31.s +# CHECK-NEXT: 1 4 1.00 fmax z31.d, p7/m, z31.d, #1.0 +# CHECK-NEXT: 1 4 1.00 fmax z31.h, p7/m, z31.h, #1.0 +# CHECK-NEXT: 1 4 1.00 fmax z31.s, p7/m, z31.s, #1.0 +# CHECK-NEXT: 1 4 1.00 fmaxnm z0.d, p0/m, z0.d, #0.0 +# CHECK-NEXT: 1 4 1.00 fmaxnm z0.d, p7/m, z0.d, z31.d +# CHECK-NEXT: 1 4 1.00 fmaxnm z0.h, p0/m, z0.h, #0.0 +# CHECK-NEXT: 1 4 1.00 fmaxnm z0.h, p7/m, z0.h, z31.h +# CHECK-NEXT: 1 4 1.00 fmaxnm z0.s, p0/m, z0.s, #0.0 +# CHECK-NEXT: 1 4 1.00 fmaxnm z0.s, p7/m, z0.s, z31.s +# CHECK-NEXT: 1 4 1.00 fmaxnm z31.d, p7/m, z31.d, #1.0 +# CHECK-NEXT: 1 4 1.00 fmaxnm z31.h, p7/m, z31.h, #1.0 +# CHECK-NEXT: 1 4 1.00 fmaxnm z31.s, p7/m, z31.s, #1.0 +# CHECK-NEXT: 1 4 1.00 fmaxnmp z0.h, p0/m, z0.h, z1.h +# CHECK-NEXT: 1 4 1.00 fmaxnmp z29.s, p3/m, z29.s, z30.s +# CHECK-NEXT: 1 4 1.00 fmaxnmp z31.d, p7/m, z31.d, z30.d +# CHECK-NEXT: 1 4 1.00 fmaxnmv d0, p7, z31.d +# CHECK-NEXT: 1 4 1.00 fmaxnmv h0, p7, z31.h +# CHECK-NEXT: 1 4 1.00 fmaxnmv s0, p7, z31.s +# CHECK-NEXT: 1 4 1.00 fmaxp z0.h, p0/m, z0.h, z1.h +# CHECK-NEXT: 1 4 1.00 fmaxp z29.s, p3/m, z29.s, z30.s +# CHECK-NEXT: 1 4 1.00 fmaxp z31.d, p7/m, z31.d, z30.d +# CHECK-NEXT: 1 4 1.00 fmaxv d0, p7, z31.d +# CHECK-NEXT: 1 4 1.00 fmaxv h0, p7, z31.h +# CHECK-NEXT: 1 4 1.00 fmaxv s0, p7, z31.s +# CHECK-NEXT: 1 4 1.00 fmin z0.d, p0/m, z0.d, #0.0 +# CHECK-NEXT: 1 4 1.00 fmin z0.d, p7/m, z0.d, z31.d +# CHECK-NEXT: 1 4 1.00 fmin z0.h, p0/m, z0.h, #0.0 +# CHECK-NEXT: 1 4 1.00 fmin z0.h, p7/m, z0.h, z31.h +# CHECK-NEXT: 1 4 1.00 fmin z0.s, p0/m, z0.s, #0.0 +# CHECK-NEXT: 1 4 1.00 fmin z0.s, p7/m, z0.s, z31.s +# CHECK-NEXT: 1 4 1.00 fmin z31.d, p7/m, z31.d, #1.0 +# CHECK-NEXT: 1 4 1.00 fmin z31.h, p7/m, z31.h, #1.0 +# CHECK-NEXT: 1 4 1.00 fmin z31.s, p7/m, z31.s, #1.0 +# CHECK-NEXT: 1 4 1.00 fminnm z0.d, p0/m, z0.d, #0.0 +# CHECK-NEXT: 1 4 1.00 fminnm z0.d, p7/m, z0.d, z31.d +# CHECK-NEXT: 1 4 1.00 fminnm z0.h, p0/m, z0.h, #0.0 +# CHECK-NEXT: 1 4 1.00 fminnm z0.h, p7/m, z0.h, z31.h +# CHECK-NEXT: 1 4 1.00 fminnm z0.s, p0/m, z0.s, #0.0 +# CHECK-NEXT: 1 4 1.00 fminnm z0.s, p7/m, z0.s, z31.s +# CHECK-NEXT: 1 4 1.00 fminnm z31.d, p7/m, z31.d, #1.0 +# CHECK-NEXT: 1 4 1.00 fminnm z31.h, p7/m, z31.h, #1.0 +# CHECK-NEXT: 1 4 1.00 fminnm z31.s, p7/m, z31.s, #1.0 +# CHECK-NEXT: 1 4 1.00 fminnmp z0.h, p0/m, z0.h, z1.h +# CHECK-NEXT: 1 4 1.00 fminnmp z29.s, p3/m, z29.s, z30.s +# CHECK-NEXT: 1 4 1.00 fminnmp z31.d, p7/m, z31.d, z30.d +# CHECK-NEXT: 1 4 1.00 fminnmv d0, p7, z31.d +# CHECK-NEXT: 1 4 1.00 fminnmv h0, p7, z31.h +# CHECK-NEXT: 1 4 1.00 fminnmv s0, p7, z31.s +# CHECK-NEXT: 1 4 1.00 fminp z0.h, p0/m, z0.h, z1.h +# CHECK-NEXT: 1 4 1.00 fminp z29.s, p3/m, z29.s, z30.s +# CHECK-NEXT: 1 4 1.00 fminp z31.d, p7/m, z31.d, z30.d +# CHECK-NEXT: 1 4 1.00 fminv d0, p7, z31.d +# CHECK-NEXT: 1 4 1.00 fminv h0, p7, z31.h +# CHECK-NEXT: 1 4 1.00 fminv s0, p7, z31.s +# CHECK-NEXT: 1 4 1.00 fmla z0.d, p7/m, z1.d, z31.d +# CHECK-NEXT: 1 4 1.00 fmla z0.d, z1.d, z7.d[1] +# CHECK-NEXT: 1 4 1.00 fmla z0.h, p7/m, z1.h, z31.h +# CHECK-NEXT: 1 4 1.00 fmla z0.h, z1.h, z7.h[7] +# CHECK-NEXT: 1 4 1.00 fmla z0.s, p7/m, z1.s, z31.s +# CHECK-NEXT: 1 4 1.00 fmla z0.s, z1.s, z7.s[3] +# CHECK-NEXT: 1 4 1.00 fmlalb z0.s, z1.h, z7.h[0] +# CHECK-NEXT: 1 4 1.00 fmlalb z29.s, z30.h, z31.h +# CHECK-NEXT: 1 4 1.00 fmlalb z30.s, z31.h, z7.h[7] +# CHECK-NEXT: 1 4 1.00 fmlalt z0.s, z1.h, z7.h[0] +# CHECK-NEXT: 1 4 1.00 fmlalt z29.s, z30.h, z31.h +# CHECK-NEXT: 1 4 1.00 fmlalt z30.s, z31.h, z7.h[7] +# CHECK-NEXT: 1 4 1.00 fmls z0.d, p7/m, z1.d, z31.d +# CHECK-NEXT: 1 4 1.00 fmls z0.d, z1.d, z7.d[1] +# CHECK-NEXT: 1 4 1.00 fmls z0.h, p7/m, z1.h, z31.h +# CHECK-NEXT: 1 4 1.00 fmls z0.h, z1.h, z7.h[7] +# CHECK-NEXT: 1 4 1.00 fmls z0.s, p7/m, z1.s, z31.s +# CHECK-NEXT: 1 4 1.00 fmls z0.s, z1.s, z7.s[3] +# CHECK-NEXT: 1 4 1.00 fmlslb z0.s, z1.h, z7.h[0] +# CHECK-NEXT: 1 4 1.00 fmlslb z29.s, z30.h, z31.h +# CHECK-NEXT: 1 4 1.00 fmlslb z30.s, z31.h, z7.h[7] +# CHECK-NEXT: 1 4 1.00 fmlslt z0.s, z1.h, z7.h[0] +# CHECK-NEXT: 1 4 1.00 fmlslt z29.s, z30.h, z31.h +# CHECK-NEXT: 1 4 1.00 fmlslt z30.s, z31.h, z7.h[7] +# CHECK-NEXT: 1 3 1.00 fmov z0.d, #-10.00000000 +# CHECK-NEXT: 1 3 1.00 fmov z0.d, #0.12500000 +# CHECK-NEXT: 1 3 1.00 fmov z0.d, p0/m, #-10.00000000 +# CHECK-NEXT: 1 3 1.00 fmov z0.d, p0/m, #0.12500000 +# CHECK-NEXT: 1 3 1.00 fmov z0.h, #-0.12500000 +# CHECK-NEXT: 1 3 1.00 fmov z0.h, p0/m, #-0.12500000 +# CHECK-NEXT: 1 3 1.00 fmov z0.s, #-0.12500000 +# CHECK-NEXT: 1 3 1.00 fmov z0.s, p0/m, #-0.12500000 +# CHECK-NEXT: 1 4 1.00 fmsb z0.d, p7/m, z1.d, z31.d +# CHECK-NEXT: 1 4 1.00 fmsb z0.h, p7/m, z1.h, z31.h +# CHECK-NEXT: 1 4 1.00 fmsb z0.s, p7/m, z1.s, z31.s +# CHECK-NEXT: 1 4 1.00 fmul z0.d, p0/m, z0.d, #0.5 +# CHECK-NEXT: 1 4 1.00 fmul z0.d, p7/m, z0.d, z31.d +# CHECK-NEXT: 1 4 1.00 fmul z0.d, z0.d, z0.d[0] +# CHECK-NEXT: 1 4 1.00 fmul z0.d, z1.d, z31.d +# CHECK-NEXT: 1 4 1.00 fmul z0.h, p0/m, z0.h, #0.5 +# CHECK-NEXT: 1 4 1.00 fmul z0.h, p7/m, z0.h, z31.h +# CHECK-NEXT: 1 4 1.00 fmul z0.h, z0.h, z0.h[0] +# CHECK-NEXT: 1 4 1.00 fmul z0.h, z1.h, z31.h +# CHECK-NEXT: 1 4 1.00 fmul z0.s, p0/m, z0.s, #0.5 +# CHECK-NEXT: 1 4 1.00 fmul z0.s, p7/m, z0.s, z31.s +# CHECK-NEXT: 1 4 1.00 fmul z0.s, z0.s, z0.s[0] +# CHECK-NEXT: 1 4 1.00 fmul z0.s, z1.s, z31.s +# CHECK-NEXT: 1 4 1.00 fmul z31.d, p7/m, z31.d, #2.0 +# CHECK-NEXT: 1 4 1.00 fmul z31.d, z31.d, z15.d[1] +# CHECK-NEXT: 1 4 1.00 fmul z31.h, p7/m, z31.h, #2.0 +# CHECK-NEXT: 1 4 1.00 fmul z31.h, z31.h, z7.h[7] +# CHECK-NEXT: 1 4 1.00 fmul z31.s, p7/m, z31.s, #2.0 +# CHECK-NEXT: 1 4 1.00 fmul z31.s, z31.s, z7.s[3] +# CHECK-NEXT: 1 4 1.00 fmulx z0.d, p7/m, z0.d, z31.d +# CHECK-NEXT: 1 4 1.00 fmulx z0.h, p7/m, z0.h, z31.h +# CHECK-NEXT: 1 4 1.00 fmulx z0.s, p7/m, z0.s, z31.s +# CHECK-NEXT: 1 4 1.00 fneg z31.d, p7/m, z31.d +# CHECK-NEXT: 1 4 1.00 fneg z31.h, p7/m, z31.h +# CHECK-NEXT: 1 4 1.00 fneg z31.s, p7/m, z31.s +# CHECK-NEXT: 1 4 1.00 fnmad z0.d, p7/m, z1.d, z31.d +# CHECK-NEXT: 1 4 1.00 fnmad z0.h, p7/m, z1.h, z31.h +# CHECK-NEXT: 1 4 1.00 fnmad z0.s, p7/m, z1.s, z31.s +# CHECK-NEXT: 1 4 1.00 fnmla z0.d, p7/m, z1.d, z31.d +# CHECK-NEXT: 1 4 1.00 fnmla z0.h, p7/m, z1.h, z31.h +# CHECK-NEXT: 1 4 1.00 fnmla z0.s, p7/m, z1.s, z31.s +# CHECK-NEXT: 1 4 1.00 fnmls z0.d, p7/m, z1.d, z31.d +# CHECK-NEXT: 1 4 1.00 fnmls z0.h, p7/m, z1.h, z31.h +# CHECK-NEXT: 1 4 1.00 fnmls z0.s, p7/m, z1.s, z31.s +# CHECK-NEXT: 1 4 1.00 fnmsb z0.d, p7/m, z1.d, z31.d +# CHECK-NEXT: 1 4 1.00 fnmsb z0.h, p7/m, z1.h, z31.h +# CHECK-NEXT: 1 4 1.00 fnmsb z0.s, p7/m, z1.s, z31.s +# CHECK-NEXT: 1 4 1.00 frecpe z0.d, z31.d +# CHECK-NEXT: 1 4 1.00 frecpe z0.h, z31.h +# CHECK-NEXT: 1 4 1.00 frecpe z0.s, z31.s +# CHECK-NEXT: 1 4 1.00 frecps z0.d, z1.d, z31.d +# CHECK-NEXT: 1 4 1.00 frecps z0.h, z1.h, z31.h +# CHECK-NEXT: 1 4 1.00 frecps z0.s, z1.s, z31.s +# CHECK-NEXT: 1 4 1.00 frecpx z31.d, p7/m, z31.d +# CHECK-NEXT: 1 4 1.00 frecpx z31.h, p7/m, z31.h +# CHECK-NEXT: 1 4 1.00 frecpx z31.s, p7/m, z31.s +# CHECK-NEXT: 1 4 1.00 frinta z31.d, p7/m, z31.d +# CHECK-NEXT: 1 4 1.00 frinta z31.h, p7/m, z31.h +# CHECK-NEXT: 1 4 1.00 frinta z31.s, p7/m, z31.s +# CHECK-NEXT: 1 4 1.00 frinti z31.d, p7/m, z31.d +# CHECK-NEXT: 1 4 1.00 frinti z31.h, p7/m, z31.h +# CHECK-NEXT: 1 4 1.00 frinti z31.s, p7/m, z31.s +# CHECK-NEXT: 1 4 1.00 frintm z31.d, p7/m, z31.d +# CHECK-NEXT: 1 4 1.00 frintm z31.h, p7/m, z31.h +# CHECK-NEXT: 1 4 1.00 frintm z31.s, p7/m, z31.s +# CHECK-NEXT: 1 4 1.00 frintn z31.d, p7/m, z31.d +# CHECK-NEXT: 1 4 1.00 frintn z31.h, p7/m, z31.h +# CHECK-NEXT: 1 4 1.00 frintn z31.s, p7/m, z31.s +# CHECK-NEXT: 1 4 1.00 frintp z31.d, p7/m, z31.d +# CHECK-NEXT: 1 4 1.00 frintp z31.h, p7/m, z31.h +# CHECK-NEXT: 1 4 1.00 frintp z31.s, p7/m, z31.s +# CHECK-NEXT: 1 4 1.00 frintx z31.d, p7/m, z31.d +# CHECK-NEXT: 1 4 1.00 frintx z31.h, p7/m, z31.h +# CHECK-NEXT: 1 4 1.00 frintx z31.s, p7/m, z31.s +# CHECK-NEXT: 1 4 1.00 frintz z31.d, p7/m, z31.d +# CHECK-NEXT: 1 4 1.00 frintz z31.h, p7/m, z31.h +# CHECK-NEXT: 1 4 1.00 frintz z31.s, p7/m, z31.s +# CHECK-NEXT: 1 4 1.00 frsqrte z0.d, z31.d +# CHECK-NEXT: 1 4 1.00 frsqrte z0.h, z31.h +# CHECK-NEXT: 1 4 1.00 frsqrte z0.s, z31.s +# CHECK-NEXT: 1 4 1.00 frsqrts z0.d, z1.d, z31.d +# CHECK-NEXT: 1 4 1.00 frsqrts z0.h, z1.h, z31.h +# CHECK-NEXT: 1 4 1.00 frsqrts z0.s, z1.s, z31.s +# CHECK-NEXT: 1 4 1.00 fscale z0.d, p7/m, z0.d, z31.d +# CHECK-NEXT: 1 4 1.00 fscale z0.h, p7/m, z0.h, z31.h +# CHECK-NEXT: 1 4 1.00 fscale z0.s, p7/m, z0.s, z31.s +# CHECK-NEXT: 1 25 19.00 fsqrt z31.d, p7/m, z31.d +# CHECK-NEXT: 1 11 5.00 fsqrt z31.h, p7/m, z31.h +# CHECK-NEXT: 1 14 9.00 fsqrt z31.s, p7/m, z31.s +# CHECK-NEXT: 1 4 1.00 fsub z0.d, p0/m, z0.d, #0.5 +# CHECK-NEXT: 1 4 1.00 fsub z0.d, p7/m, z0.d, z31.d +# CHECK-NEXT: 1 4 1.00 fsub z0.d, z1.d, z31.d +# CHECK-NEXT: 1 4 1.00 fsub z0.h, p0/m, z0.h, #0.5 +# CHECK-NEXT: 1 4 1.00 fsub z0.h, p7/m, z0.h, z31.h +# CHECK-NEXT: 1 4 1.00 fsub z0.h, z1.h, z31.h +# CHECK-NEXT: 1 4 1.00 fsub z0.s, p0/m, z0.s, #0.5 +# CHECK-NEXT: 1 4 1.00 fsub z0.s, p7/m, z0.s, z31.s +# CHECK-NEXT: 1 4 1.00 fsub z0.s, z1.s, z31.s +# CHECK-NEXT: 1 4 1.00 fsub z31.d, p7/m, z31.d, #1.0 +# CHECK-NEXT: 1 4 1.00 fsub z31.h, p7/m, z31.h, #1.0 +# CHECK-NEXT: 1 4 1.00 fsub z31.s, p7/m, z31.s, #1.0 +# CHECK-NEXT: 1 4 1.00 fsubr z0.d, p0/m, z0.d, #0.5 +# CHECK-NEXT: 1 4 1.00 fsubr z0.d, p7/m, z0.d, z31.d +# CHECK-NEXT: 1 4 1.00 fsubr z0.h, p0/m, z0.h, #0.5 +# CHECK-NEXT: 1 4 1.00 fsubr z0.h, p7/m, z0.h, z31.h +# CHECK-NEXT: 1 4 1.00 fsubr z0.s, p0/m, z0.s, #0.5 +# CHECK-NEXT: 1 4 1.00 fsubr z0.s, p7/m, z0.s, z31.s +# CHECK-NEXT: 1 4 1.00 fsubr z31.d, p7/m, z31.d, #1.0 +# CHECK-NEXT: 1 4 1.00 fsubr z31.h, p7/m, z31.h, #1.0 +# CHECK-NEXT: 1 4 1.00 fsubr z31.s, p7/m, z31.s, #1.0 +# CHECK-NEXT: 1 4 1.00 ftmad z0.d, z0.d, z31.d, #7 +# CHECK-NEXT: 1 4 1.00 ftmad z0.h, z0.h, z31.h, #7 +# CHECK-NEXT: 1 4 1.00 ftmad z0.s, z0.s, z31.s, #7 +# CHECK-NEXT: 1 4 1.00 ftsmul z0.d, z1.d, z31.d +# CHECK-NEXT: 1 4 1.00 ftsmul z0.h, z1.h, z31.h +# CHECK-NEXT: 1 4 1.00 ftsmul z0.s, z1.s, z31.s +# CHECK-NEXT: 1 3 1.00 ftssel z0.d, z1.d, z31.d +# CHECK-NEXT: 1 3 1.00 ftssel z0.h, z1.h, z31.h +# CHECK-NEXT: 1 3 1.00 ftssel z0.s, z1.s, z31.s +# CHECK-NEXT: 1 8 2.00 histcnt z0.s, p0/z, z1.s, z2.s +# CHECK-NEXT: 1 8 2.00 histcnt z29.d, p7/z, z30.d, z31.d +# CHECK-NEXT: 1 8 2.00 histseg z0.b, z1.b, z31.b +# CHECK-NEXT: 1 3 1.00 incb x0 +# CHECK-NEXT: 1 3 1.00 incb x0, #14 +# CHECK-NEXT: 1 3 1.00 incb x0, all, mul #16 +# CHECK-NEXT: 1 3 1.00 incb x0, pow2 +# CHECK-NEXT: 1 3 1.00 incb x0, vl1 +# CHECK-NEXT: 1 3 1.00 incd x0 +# CHECK-NEXT: 1 3 1.00 incd x0, #14 +# CHECK-NEXT: 1 3 1.00 incd x0, all, mul #16 +# CHECK-NEXT: 1 3 1.00 incd x0, pow2 +# CHECK-NEXT: 1 3 1.00 incd x0, vl1 +# CHECK-NEXT: 1 3 1.00 incd z0.d +# CHECK-NEXT: 1 3 1.00 incd z0.d, all, mul #16 +# CHECK-NEXT: 1 3 1.00 inch x0 +# CHECK-NEXT: 1 3 1.00 inch x0, #14 +# CHECK-NEXT: 1 3 1.00 inch x0, all, mul #16 +# CHECK-NEXT: 1 3 1.00 inch x0, pow2 +# CHECK-NEXT: 1 3 1.00 inch x0, vl1 +# CHECK-NEXT: 1 3 1.00 inch z0.h +# CHECK-NEXT: 1 3 1.00 inch z0.h, all, mul #16 +# CHECK-NEXT: 1 1 1.00 incp x0, p0.b +# CHECK-NEXT: 1 1 1.00 incp x0, p0.d +# CHECK-NEXT: 1 1 1.00 incp x0, p0.h +# CHECK-NEXT: 1 1 1.00 incp x0, p0.s +# CHECK-NEXT: 1 1 1.00 incp xzr, p15.b +# CHECK-NEXT: 1 1 1.00 incp xzr, p15.d +# CHECK-NEXT: 1 1 1.00 incp xzr, p15.h +# CHECK-NEXT: 1 1 1.00 incp xzr, p15.s +# CHECK-NEXT: 1 3 1.00 incp z31.d, p15.d +# CHECK-NEXT: 1 3 1.00 incp z31.h, p15.h +# CHECK-NEXT: 1 3 1.00 incp z31.s, p15.s +# CHECK-NEXT: 1 3 1.00 incw x0 +# CHECK-NEXT: 1 3 1.00 incw x0, #14 +# CHECK-NEXT: 1 3 1.00 incw x0, all, mul #16 +# CHECK-NEXT: 1 3 1.00 incw x0, pow2 +# CHECK-NEXT: 1 3 1.00 incw x0, vl1 +# CHECK-NEXT: 1 3 1.00 incw z0.s +# CHECK-NEXT: 1 3 1.00 incw z0.s, all, mul #16 +# CHECK-NEXT: 1 4 1.00 index z0.b, #0, #0 +# CHECK-NEXT: 1 4 1.00 index z0.d, #0, #0 +# CHECK-NEXT: 1 4 1.00 index z0.h, #0, #0 +# CHECK-NEXT: 1 4 1.00 index z0.h, w0, w0 +# CHECK-NEXT: 1 4 1.00 index z0.s, #0, #0 +# CHECK-NEXT: 1 4 1.00 index z21.b, w10, w21 +# CHECK-NEXT: 1 4 1.00 index z21.d, x10, x21 +# CHECK-NEXT: 1 4 1.00 index z21.s, w10, w21 +# CHECK-NEXT: 1 4 1.00 index z23.b, #13, w8 +# CHECK-NEXT: 1 4 1.00 index z23.b, w13, #8 +# CHECK-NEXT: 1 4 1.00 index z23.d, #13, x8 +# CHECK-NEXT: 1 4 1.00 index z23.d, x13, #8 +# CHECK-NEXT: 1 4 1.00 index z23.h, #13, w8 +# CHECK-NEXT: 1 4 1.00 index z23.h, w13, #8 +# CHECK-NEXT: 1 4 1.00 index z23.s, #13, w8 +# CHECK-NEXT: 1 4 1.00 index z23.s, w13, #8 +# CHECK-NEXT: 1 4 1.00 index z31.b, #-1, #-1 +# CHECK-NEXT: 1 4 1.00 index z31.b, #-1, wzr +# CHECK-NEXT: 1 4 1.00 index z31.b, wzr, #-1 +# CHECK-NEXT: 1 4 1.00 index z31.b, wzr, wzr +# CHECK-NEXT: 1 4 1.00 index z31.d, #-1, #-1 +# CHECK-NEXT: 1 4 1.00 index z31.d, #-1, xzr +# CHECK-NEXT: 1 4 1.00 index z31.d, xzr, #-1 +# CHECK-NEXT: 1 4 1.00 index z31.d, xzr, xzr +# CHECK-NEXT: 1 4 1.00 index z31.h, #-1, #-1 +# CHECK-NEXT: 1 4 1.00 index z31.h, #-1, wzr +# CHECK-NEXT: 1 4 1.00 index z31.h, wzr, #-1 +# CHECK-NEXT: 1 4 1.00 index z31.h, wzr, wzr +# CHECK-NEXT: 1 4 1.00 index z31.s, #-1, #-1 +# CHECK-NEXT: 1 4 1.00 index z31.s, #-1, wzr +# CHECK-NEXT: 1 4 1.00 index z31.s, wzr, #-1 +# CHECK-NEXT: 1 4 1.00 index z31.s, wzr, wzr +# CHECK-NEXT: 1 8 2.00 insr z0.b, w0 +# CHECK-NEXT: 1 8 2.00 insr z0.d, x0 +# CHECK-NEXT: 1 8 2.00 insr z0.h, w0 +# CHECK-NEXT: 1 8 2.00 insr z0.s, w0 +# CHECK-NEXT: 1 4 1.00 insr z31.b, b31 +# CHECK-NEXT: 1 8 2.00 insr z31.b, wzr +# CHECK-NEXT: 1 4 1.00 insr z31.d, d31 +# CHECK-NEXT: 1 8 2.00 insr z31.d, xzr +# CHECK-NEXT: 1 4 1.00 insr z31.h, h31 +# CHECK-NEXT: 1 8 2.00 insr z31.h, wzr +# CHECK-NEXT: 1 4 1.00 insr z31.s, s31 +# CHECK-NEXT: 1 8 2.00 insr z31.s, wzr +# CHECK-NEXT: 1 4 1.00 lasta b0, p7, z31.b +# CHECK-NEXT: 1 4 1.00 lasta d0, p7, z31.d +# CHECK-NEXT: 1 4 1.00 lasta h0, p7, z31.h +# CHECK-NEXT: 1 4 1.00 lasta s0, p7, z31.s +# CHECK-NEXT: 1 8 2.00 lasta w0, p7, z31.b +# CHECK-NEXT: 1 8 2.00 lasta w0, p7, z31.h +# CHECK-NEXT: 1 8 2.00 lasta w0, p7, z31.s +# CHECK-NEXT: 1 8 2.00 lasta x0, p7, z31.d +# CHECK-NEXT: 1 4 1.00 lastb b0, p7, z31.b +# CHECK-NEXT: 1 4 1.00 lastb d0, p7, z31.d +# CHECK-NEXT: 1 4 1.00 lastb h0, p7, z31.h +# CHECK-NEXT: 1 4 1.00 lastb s0, p7, z31.s +# CHECK-NEXT: 1 8 2.00 lastb w0, p7, z31.b +# CHECK-NEXT: 1 8 2.00 lastb w0, p7, z31.h +# CHECK-NEXT: 1 8 2.00 lastb w0, p7, z31.s +# CHECK-NEXT: 1 8 2.00 lastb x0, p7, z31.d +# CHECK-NEXT: 1 3 1.00 * ld1b { z0.b }, p0/z, [sp, x0] +# CHECK-NEXT: 1 3 1.00 * ld1b { z0.b }, p0/z, [x0, x0] +# CHECK-NEXT: 1 3 1.00 * ld1b { z0.b }, p0/z, [x0] +# CHECK-NEXT: 1 3 1.00 * ld1b { z0.d }, p0/z, [x0] +# CHECK-NEXT: 1 7 7.00 * ld1b { z0.d }, p0/z, [z0.d] +# CHECK-NEXT: 1 3 1.00 * ld1b { z0.h }, p0/z, [x0] +# CHECK-NEXT: 1 7 7.00 * ld1b { z0.s }, p0/z, [x0, z0.s, sxtw] +# CHECK-NEXT: 1 7 7.00 * ld1b { z0.s }, p0/z, [x0, z0.s, uxtw] +# CHECK-NEXT: 1 3 1.00 * ld1b { z0.s }, p0/z, [x0] +# CHECK-NEXT: 1 9 9.00 * ld1b { z0.s }, p0/z, [z0.s] +# CHECK-NEXT: 1 3 1.00 * ld1b { z21.b }, p5/z, [x10, #5, mul vl] +# CHECK-NEXT: 1 3 1.00 * ld1b { z21.d }, p5/z, [x10, #5, mul vl] +# CHECK-NEXT: 1 7 7.00 * ld1b { z21.d }, p5/z, [x10, z21.d, sxtw] +# CHECK-NEXT: 1 7 7.00 * ld1b { z21.d }, p5/z, [x10, z21.d, uxtw] +# CHECK-NEXT: 1 3 1.00 * ld1b { z21.h }, p5/z, [x10, #5, mul vl] +# CHECK-NEXT: 1 3 1.00 * ld1b { z21.s }, p5/z, [x10, #5, mul vl] +# CHECK-NEXT: 1 3 1.00 * ld1b { z21.s }, p5/z, [x10, x21] +# CHECK-NEXT: 1 3 1.00 * ld1b { z23.d }, p3/z, [x13, x8] +# CHECK-NEXT: 1 3 1.00 * ld1b { z31.b }, p7/z, [sp, #-1, mul vl] +# CHECK-NEXT: 1 3 1.00 * ld1b { z31.d }, p7/z, [sp, #-1, mul vl] +# CHECK-NEXT: 1 7 7.00 * ld1b { z31.d }, p7/z, [sp, z31.d] +# CHECK-NEXT: 1 7 7.00 * ld1b { z31.d }, p7/z, [z31.d, #31] +# CHECK-NEXT: 1 3 1.00 * ld1b { z31.h }, p7/z, [sp, #-1, mul vl] +# CHECK-NEXT: 1 3 1.00 * ld1b { z31.s }, p7/z, [sp, #-1, mul vl] +# CHECK-NEXT: 1 9 9.00 * ld1b { z31.s }, p7/z, [z31.s, #31] +# CHECK-NEXT: 1 3 1.00 * ld1b { z5.h }, p3/z, [x17, x16] +# CHECK-NEXT: 1 7 7.00 * ld1d { z0.d }, p0/z, [x0, z0.d, sxtw #3] +# CHECK-NEXT: 1 7 7.00 * ld1d { z0.d }, p0/z, [x0, z0.d, uxtw #3] +# CHECK-NEXT: 1 3 1.00 * ld1d { z0.d }, p0/z, [x0] +# CHECK-NEXT: 1 7 7.00 * ld1d { z0.d }, p0/z, [z0.d] +# CHECK-NEXT: 1 3 1.00 * ld1d { z21.d }, p5/z, [x10, #5, mul vl] +# CHECK-NEXT: 1 7 7.00 * ld1d { z21.d }, p5/z, [x10, z21.d, sxtw] +# CHECK-NEXT: 1 7 7.00 * ld1d { z21.d }, p5/z, [x10, z21.d, uxtw] +# CHECK-NEXT: 1 3 1.00 * ld1d { z23.d }, p3/z, [sp, x8, lsl #3] +# CHECK-NEXT: 1 3 1.00 * ld1d { z23.d }, p3/z, [x13, x8, lsl #3] +# CHECK-NEXT: 1 7 7.00 * ld1d { z23.d }, p3/z, [x13, z8.d, lsl #3] +# CHECK-NEXT: 1 3 1.00 * ld1d { z31.d }, p7/z, [sp, #-1, mul vl] +# CHECK-NEXT: 1 7 7.00 * ld1d { z31.d }, p7/z, [sp, z31.d] +# CHECK-NEXT: 1 7 7.00 * ld1d { z31.d }, p7/z, [z31.d, #248] +# CHECK-NEXT: 1 7 7.00 * ld1h { z0.d }, p0/z, [x0, z0.d, sxtw #1] +# CHECK-NEXT: 1 7 7.00 * ld1h { z0.d }, p0/z, [x0, z0.d, uxtw #1] +# CHECK-NEXT: 1 3 1.00 * ld1h { z0.d }, p0/z, [x0] +# CHECK-NEXT: 1 7 7.00 * ld1h { z0.d }, p0/z, [z0.d] +# CHECK-NEXT: 1 3 1.00 * ld1h { z0.h }, p0/z, [x0] +# CHECK-NEXT: 1 7 7.00 * ld1h { z0.s }, p0/z, [x0, z0.s, sxtw] +# CHECK-NEXT: 1 7 7.00 * ld1h { z0.s }, p0/z, [x0, z0.s, uxtw] +# CHECK-NEXT: 1 3 1.00 * ld1h { z0.s }, p0/z, [x0] +# CHECK-NEXT: 1 9 9.00 * ld1h { z0.s }, p0/z, [z0.s] +# CHECK-NEXT: 1 3 1.00 * ld1h { z21.d }, p5/z, [x10, #5, mul vl] +# CHECK-NEXT: 1 7 7.00 * ld1h { z21.d }, p5/z, [x10, z21.d, sxtw] +# CHECK-NEXT: 1 7 7.00 * ld1h { z21.d }, p5/z, [x10, z21.d, uxtw] +# CHECK-NEXT: 1 3 1.00 * ld1h { z21.h }, p5/z, [x10, #5, mul vl] +# CHECK-NEXT: 1 3 1.00 * ld1h { z21.s }, p5/z, [x10, #5, mul vl] +# CHECK-NEXT: 1 3 1.00 * ld1h { z21.s }, p5/z, [x10, x21, lsl #1] +# CHECK-NEXT: 1 3 1.00 * ld1h { z23.d }, p3/z, [x13, x8, lsl #1] +# CHECK-NEXT: 1 7 7.00 * ld1h { z23.d }, p3/z, [x13, z8.d, lsl #1] +# CHECK-NEXT: 1 3 1.00 * ld1h { z31.d }, p7/z, [sp, #-1, mul vl] +# CHECK-NEXT: 1 7 7.00 * ld1h { z31.d }, p7/z, [sp, z31.d] +# CHECK-NEXT: 1 7 7.00 * ld1h { z31.d }, p7/z, [z31.d, #62] +# CHECK-NEXT: 1 3 1.00 * ld1h { z31.h }, p7/z, [sp, #-1, mul vl] +# CHECK-NEXT: 1 3 1.00 * ld1h { z31.s }, p7/z, [sp, #-1, mul vl] +# CHECK-NEXT: 1 7 7.00 * ld1h { z31.s }, p7/z, [sp, z31.s, sxtw #1] +# CHECK-NEXT: 1 7 7.00 * ld1h { z31.s }, p7/z, [sp, z31.s, uxtw #1] +# CHECK-NEXT: 1 9 9.00 * ld1h { z31.s }, p7/z, [z31.s, #62] +# CHECK-NEXT: 1 3 1.00 * ld1h { z5.h }, p3/z, [sp, x16, lsl #1] +# CHECK-NEXT: 1 3 1.00 * ld1h { z5.h }, p3/z, [x17, x16, lsl #1] +# CHECK-NEXT: 1 3 1.00 * ld1rb { z0.b }, p0/z, [x0] +# CHECK-NEXT: 1 3 1.00 * ld1rb { z0.d }, p0/z, [x0] +# CHECK-NEXT: 1 3 1.00 * ld1rb { z0.h }, p0/z, [x0] +# CHECK-NEXT: 1 3 1.00 * ld1rb { z0.s }, p0/z, [x0] +# CHECK-NEXT: 1 3 1.00 * ld1rb { z31.b }, p7/z, [sp, #63] +# CHECK-NEXT: 1 3 1.00 * ld1rb { z31.d }, p7/z, [sp, #63] +# CHECK-NEXT: 1 3 1.00 * ld1rb { z31.h }, p7/z, [sp, #63] +# CHECK-NEXT: 1 3 1.00 * ld1rb { z31.s }, p7/z, [sp, #63] +# CHECK-NEXT: 1 3 1.00 * ld1rd { z0.d }, p0/z, [x0] +# CHECK-NEXT: 1 3 1.00 * ld1rd { z31.d }, p7/z, [sp, #504] +# CHECK-NEXT: 1 3 1.00 * ld1rh { z0.d }, p0/z, [x0] +# CHECK-NEXT: 1 3 1.00 * ld1rh { z0.h }, p0/z, [x0] +# CHECK-NEXT: 1 3 1.00 * ld1rh { z0.s }, p0/z, [x0] +# CHECK-NEXT: 1 3 1.00 * ld1rh { z31.d }, p7/z, [sp, #126] +# CHECK-NEXT: 1 3 1.00 * ld1rh { z31.h }, p7/z, [sp, #126] +# CHECK-NEXT: 1 3 1.00 * ld1rh { z31.s }, p7/z, [sp, #126] +# CHECK-NEXT: 1 3 1.00 * ld1rqb { z0.b }, p0/z, [x0, x0] +# CHECK-NEXT: 1 3 1.00 * ld1rqb { z0.b }, p0/z, [x0] +# CHECK-NEXT: 1 3 1.00 * ld1rqb { z21.b }, p5/z, [x10, #112] +# CHECK-NEXT: 1 3 1.00 * ld1rqb { z23.b }, p3/z, [x13, #-128] +# CHECK-NEXT: 1 3 1.00 * ld1rqb { z31.b }, p7/z, [sp, #-16] +# CHECK-NEXT: 1 3 1.00 * ld1rqd { z0.d }, p0/z, [x0, x0, lsl #3] +# CHECK-NEXT: 1 3 1.00 * ld1rqd { z0.d }, p0/z, [x0] +# CHECK-NEXT: 1 3 1.00 * ld1rqd { z23.d }, p3/z, [x13, #-128] +# CHECK-NEXT: 1 3 1.00 * ld1rqd { z23.d }, p3/z, [x13, #112] +# CHECK-NEXT: 1 3 1.00 * ld1rqd { z31.d }, p7/z, [sp, #-16] +# CHECK-NEXT: 1 3 1.00 * ld1rqh { z0.h }, p0/z, [x0, x0, lsl #1] +# CHECK-NEXT: 1 3 1.00 * ld1rqh { z0.h }, p0/z, [x0] +# CHECK-NEXT: 1 3 1.00 * ld1rqh { z23.h }, p3/z, [x13, #-128] +# CHECK-NEXT: 1 3 1.00 * ld1rqh { z23.h }, p3/z, [x13, #112] +# CHECK-NEXT: 1 3 1.00 * ld1rqh { z31.h }, p7/z, [sp, #-16] +# CHECK-NEXT: 1 3 1.00 * ld1rqw { z0.s }, p0/z, [x0, x0, lsl #2] +# CHECK-NEXT: 1 3 1.00 * ld1rqw { z0.s }, p0/z, [x0] +# CHECK-NEXT: 1 3 1.00 * ld1rqw { z23.s }, p3/z, [x13, #-128] +# CHECK-NEXT: 1 3 1.00 * ld1rqw { z23.s }, p3/z, [x13, #112] +# CHECK-NEXT: 1 3 1.00 * ld1rqw { z31.s }, p7/z, [sp, #-16] +# CHECK-NEXT: 1 3 1.00 * ld1rsb { z0.d }, p0/z, [x0] +# CHECK-NEXT: 1 3 1.00 * ld1rsb { z0.h }, p0/z, [x0] +# CHECK-NEXT: 1 3 1.00 * ld1rsb { z0.s }, p0/z, [x0] +# CHECK-NEXT: 1 3 1.00 * ld1rsb { z31.d }, p7/z, [sp, #63] +# CHECK-NEXT: 1 3 1.00 * ld1rsb { z31.h }, p7/z, [sp, #63] +# CHECK-NEXT: 1 3 1.00 * ld1rsb { z31.s }, p7/z, [sp, #63] +# CHECK-NEXT: 1 3 1.00 * ld1rsh { z0.d }, p0/z, [x0] +# CHECK-NEXT: 1 3 1.00 * ld1rsh { z0.s }, p0/z, [x0] +# CHECK-NEXT: 1 3 1.00 * ld1rsh { z31.d }, p7/z, [sp, #126] +# CHECK-NEXT: 1 3 1.00 * ld1rsh { z31.s }, p7/z, [sp, #126] +# CHECK-NEXT: 1 3 1.00 * ld1rsw { z0.d }, p0/z, [x0] +# CHECK-NEXT: 1 3 1.00 * ld1rsw { z31.d }, p7/z, [sp, #252] +# CHECK-NEXT: 1 3 1.00 * ld1rw { z0.d }, p0/z, [x0] +# CHECK-NEXT: 1 3 1.00 * ld1rw { z0.s }, p0/z, [x0] +# CHECK-NEXT: 1 3 1.00 * ld1rw { z31.d }, p7/z, [sp, #252] +# CHECK-NEXT: 1 3 1.00 * ld1rw { z31.s }, p7/z, [sp, #252] +# CHECK-NEXT: 1 3 1.00 * ld1sb { z0.d }, p0/z, [x0] +# CHECK-NEXT: 1 7 7.00 * ld1sb { z0.d }, p0/z, [z0.d] +# CHECK-NEXT: 1 3 1.00 * ld1sb { z0.h }, p0/z, [sp, x0] +# CHECK-NEXT: 1 3 1.00 * ld1sb { z0.h }, p0/z, [x0, x0] +# CHECK-NEXT: 1 3 1.00 * ld1sb { z0.h }, p0/z, [x0] +# CHECK-NEXT: 1 7 7.00 * ld1sb { z0.s }, p0/z, [x0, z0.s, sxtw] +# CHECK-NEXT: 1 3 1.00 * ld1sb { z0.s }, p0/z, [x0] +# CHECK-NEXT: 1 9 9.00 * ld1sb { z0.s }, p0/z, [z0.s] +# CHECK-NEXT: 1 3 1.00 * ld1sb { z21.d }, p5/z, [x10, #5, mul vl] +# CHECK-NEXT: 1 7 7.00 * ld1sb { z21.d }, p5/z, [x10, z21.d, sxtw] +# CHECK-NEXT: 1 7 7.00 * ld1sb { z21.d }, p5/z, [x10, z21.d, uxtw] +# CHECK-NEXT: 1 3 1.00 * ld1sb { z21.h }, p5/z, [x10, #5, mul vl] +# CHECK-NEXT: 1 3 1.00 * ld1sb { z21.s }, p5/z, [x10, #5, mul vl] +# CHECK-NEXT: 1 3 1.00 * ld1sb { z21.s }, p5/z, [x10, x21] +# CHECK-NEXT: 1 3 1.00 * ld1sb { z23.d }, p3/z, [x13, x8] +# CHECK-NEXT: 1 3 1.00 * ld1sb { z31.d }, p7/z, [sp, #-1, mul vl] +# CHECK-NEXT: 1 7 7.00 * ld1sb { z31.d }, p7/z, [sp, z31.d] +# CHECK-NEXT: 1 7 7.00 * ld1sb { z31.d }, p7/z, [z31.d, #31] +# CHECK-NEXT: 1 3 1.00 * ld1sb { z31.h }, p7/z, [sp, #-1, mul vl] +# CHECK-NEXT: 1 3 1.00 * ld1sb { z31.s }, p7/z, [sp, #-1, mul vl] +# CHECK-NEXT: 1 9 9.00 * ld1sb { z31.s }, p7/z, [z31.s, #31] +# CHECK-NEXT: 1 7 7.00 * ld1sh { z0.d }, p0/z, [x0, z0.d, sxtw #1] +# CHECK-NEXT: 1 7 7.00 * ld1sh { z0.d }, p0/z, [x0, z0.d, uxtw #1] +# CHECK-NEXT: 1 3 1.00 * ld1sh { z0.d }, p0/z, [x0] +# CHECK-NEXT: 1 7 7.00 * ld1sh { z0.d }, p0/z, [z0.d] +# CHECK-NEXT: 1 7 7.00 * ld1sh { z0.s }, p0/z, [x0, z0.s, sxtw] +# CHECK-NEXT: 1 7 7.00 * ld1sh { z0.s }, p0/z, [x0, z0.s, uxtw] +# CHECK-NEXT: 1 3 1.00 * ld1sh { z0.s }, p0/z, [x0] +# CHECK-NEXT: 1 9 9.00 * ld1sh { z0.s }, p0/z, [z0.s] +# CHECK-NEXT: 1 3 1.00 * ld1sh { z21.d }, p5/z, [x10, #5, mul vl] +# CHECK-NEXT: 1 7 7.00 * ld1sh { z21.d }, p5/z, [x10, z21.d, sxtw] +# CHECK-NEXT: 1 7 7.00 * ld1sh { z21.d }, p5/z, [x10, z21.d, uxtw] +# CHECK-NEXT: 1 3 1.00 * ld1sh { z21.s }, p5/z, [sp, x21, lsl #1] +# CHECK-NEXT: 1 3 1.00 * ld1sh { z21.s }, p5/z, [x10, #5, mul vl] +# CHECK-NEXT: 1 3 1.00 * ld1sh { z21.s }, p5/z, [x10, x21, lsl #1] +# CHECK-NEXT: 1 3 1.00 * ld1sh { z23.d }, p3/z, [x13, x8, lsl #1] +# CHECK-NEXT: 1 7 7.00 * ld1sh { z23.d }, p3/z, [x13, z8.d, lsl #1] +# CHECK-NEXT: 1 3 1.00 * ld1sh { z31.d }, p7/z, [sp, #-1, mul vl] +# CHECK-NEXT: 1 7 7.00 * ld1sh { z31.d }, p7/z, [sp, z31.d] +# CHECK-NEXT: 1 7 7.00 * ld1sh { z31.d }, p7/z, [z31.d, #62] +# CHECK-NEXT: 1 3 1.00 * ld1sh { z31.s }, p7/z, [sp, #-1, mul vl] +# CHECK-NEXT: 1 7 7.00 * ld1sh { z31.s }, p7/z, [sp, z31.s, sxtw #1] +# CHECK-NEXT: 1 7 7.00 * ld1sh { z31.s }, p7/z, [sp, z31.s, uxtw #1] +# CHECK-NEXT: 1 9 9.00 * ld1sh { z31.s }, p7/z, [z31.s, #62] +# CHECK-NEXT: 1 7 7.00 * ld1sw { z0.d }, p0/z, [x0, z0.d, sxtw #2] +# CHECK-NEXT: 1 7 7.00 * ld1sw { z0.d }, p0/z, [x0, z0.d, uxtw #2] +# CHECK-NEXT: 1 3 1.00 * ld1sw { z0.d }, p0/z, [x0] +# CHECK-NEXT: 1 7 7.00 * ld1sw { z0.d }, p0/z, [z0.d] +# CHECK-NEXT: 1 3 1.00 * ld1sw { z21.d }, p5/z, [x10, #5, mul vl] +# CHECK-NEXT: 1 7 7.00 * ld1sw { z21.d }, p5/z, [x10, z21.d, sxtw] +# CHECK-NEXT: 1 7 7.00 * ld1sw { z21.d }, p5/z, [x10, z21.d, uxtw] +# CHECK-NEXT: 1 3 1.00 * ld1sw { z23.d }, p3/z, [sp, x8, lsl #2] +# CHECK-NEXT: 1 3 1.00 * ld1sw { z23.d }, p3/z, [x13, x8, lsl #2] +# CHECK-NEXT: 1 7 7.00 * ld1sw { z23.d }, p3/z, [x13, z8.d, lsl #2] +# CHECK-NEXT: 1 3 1.00 * ld1sw { z31.d }, p7/z, [sp, #-1, mul vl] +# CHECK-NEXT: 1 7 7.00 * ld1sw { z31.d }, p7/z, [sp, z31.d] +# CHECK-NEXT: 1 7 7.00 * ld1sw { z31.d }, p7/z, [z31.d, #124] +# CHECK-NEXT: 1 7 7.00 * ld1w { z0.d }, p0/z, [x0, z0.d, sxtw #2] +# CHECK-NEXT: 1 7 7.00 * ld1w { z0.d }, p0/z, [x0, z0.d, uxtw #2] +# CHECK-NEXT: 1 3 1.00 * ld1w { z0.d }, p0/z, [x0] +# CHECK-NEXT: 1 7 7.00 * ld1w { z0.d }, p0/z, [z0.d] +# CHECK-NEXT: 1 7 7.00 * ld1w { z0.s }, p0/z, [x0, z0.s, sxtw] +# CHECK-NEXT: 1 7 7.00 * ld1w { z0.s }, p0/z, [x0, z0.s, uxtw] +# CHECK-NEXT: 1 3 1.00 * ld1w { z0.s }, p0/z, [x0] +# CHECK-NEXT: 1 9 9.00 * ld1w { z0.s }, p0/z, [z0.s] +# CHECK-NEXT: 1 3 1.00 * ld1w { z21.d }, p5/z, [x10, #5, mul vl] +# CHECK-NEXT: 1 7 7.00 * ld1w { z21.d }, p5/z, [x10, z21.d, sxtw] +# CHECK-NEXT: 1 7 7.00 * ld1w { z21.d }, p5/z, [x10, z21.d, uxtw] +# CHECK-NEXT: 1 3 1.00 * ld1w { z21.s }, p5/z, [sp, x21, lsl #2] +# CHECK-NEXT: 1 3 1.00 * ld1w { z21.s }, p5/z, [x10, #5, mul vl] +# CHECK-NEXT: 1 3 1.00 * ld1w { z21.s }, p5/z, [x10, x21, lsl #2] +# CHECK-NEXT: 1 3 1.00 * ld1w { z23.d }, p3/z, [x13, x8, lsl #2] +# CHECK-NEXT: 1 7 7.00 * ld1w { z23.d }, p3/z, [x13, z8.d, lsl #2] +# CHECK-NEXT: 1 3 1.00 * ld1w { z31.d }, p7/z, [sp, #-1, mul vl] +# CHECK-NEXT: 1 7 7.00 * ld1w { z31.d }, p7/z, [sp, z31.d] +# CHECK-NEXT: 1 7 7.00 * ld1w { z31.d }, p7/z, [z31.d, #124] +# CHECK-NEXT: 1 3 1.00 * ld1w { z31.s }, p7/z, [sp, #-1, mul vl] +# CHECK-NEXT: 1 7 7.00 * ld1w { z31.s }, p7/z, [sp, z31.s, sxtw #2] +# CHECK-NEXT: 1 7 7.00 * ld1w { z31.s }, p7/z, [sp, z31.s, uxtw #2] +# CHECK-NEXT: 1 9 9.00 * ld1w { z31.s }, p7/z, [z31.s, #124] +# CHECK-NEXT: 1 3 2.00 * ld2b { z0.b, z1.b }, p0/z, [x0, x0] +# CHECK-NEXT: 1 3 1.00 * ld2b { z0.b, z1.b }, p0/z, [x0] +# CHECK-NEXT: 1 3 1.00 * ld2b { z21.b, z22.b }, p5/z, [x10, #10, mul vl] +# CHECK-NEXT: 1 3 1.00 * ld2b { z23.b, z24.b }, p3/z, [x13, #-16, mul vl] +# CHECK-NEXT: 1 3 2.00 * ld2b { z5.b, z6.b }, p3/z, [x17, x16] +# CHECK-NEXT: 1 3 2.00 * ld2d { z0.d, z1.d }, p0/z, [x0, x0, lsl #3] +# CHECK-NEXT: 1 3 1.00 * ld2d { z0.d, z1.d }, p0/z, [x0] +# CHECK-NEXT: 1 3 1.00 * ld2d { z21.d, z22.d }, p5/z, [x10, #10, mul vl] +# CHECK-NEXT: 1 3 1.00 * ld2d { z23.d, z24.d }, p3/z, [x13, #-16, mul vl] +# CHECK-NEXT: 1 3 2.00 * ld2d { z5.d, z6.d }, p3/z, [x17, x16, lsl #3] +# CHECK-NEXT: 1 3 2.00 * ld2h { z0.h, z1.h }, p0/z, [x0, x0, lsl #1] +# CHECK-NEXT: 1 3 1.00 * ld2h { z0.h, z1.h }, p0/z, [x0] +# CHECK-NEXT: 1 3 1.00 * ld2h { z21.h, z22.h }, p5/z, [x10, #10, mul vl] +# CHECK-NEXT: 1 3 1.00 * ld2h { z23.h, z24.h }, p3/z, [x13, #-16, mul vl] +# CHECK-NEXT: 1 3 2.00 * ld2h { z5.h, z6.h }, p3/z, [x17, x16, lsl #1] +# CHECK-NEXT: 1 3 2.00 * ld2w { z0.s, z1.s }, p0/z, [x0, x0, lsl #2] +# CHECK-NEXT: 1 3 1.00 * ld2w { z0.s, z1.s }, p0/z, [x0] +# CHECK-NEXT: 1 3 1.00 * ld2w { z21.s, z22.s }, p5/z, [x10, #10, mul vl] +# CHECK-NEXT: 1 3 1.00 * ld2w { z23.s, z24.s }, p3/z, [x13, #-16, mul vl] +# CHECK-NEXT: 1 3 2.00 * ld2w { z5.s, z6.s }, p3/z, [x17, x16, lsl #2] +# CHECK-NEXT: 1 5 3.00 * ld3b { z0.b - z2.b }, p0/z, [x0, x0] +# CHECK-NEXT: 1 5 3.00 * ld3b { z0.b - z2.b }, p0/z, [x0] +# CHECK-NEXT: 1 5 3.00 * ld3b { z21.b - z23.b }, p5/z, [x10, #15, mul vl] +# CHECK-NEXT: 1 5 3.00 * ld3b { z23.b - z25.b }, p3/z, [x13, #-24, mul vl] +# CHECK-NEXT: 1 5 3.00 * ld3b { z5.b - z7.b }, p3/z, [x17, x16] +# CHECK-NEXT: 1 5 3.00 * ld3d { z0.d - z2.d }, p0/z, [x0, x0, lsl #3] +# CHECK-NEXT: 1 5 3.00 * ld3d { z0.d - z2.d }, p0/z, [x0] +# CHECK-NEXT: 1 5 3.00 * ld3d { z21.d - z23.d }, p5/z, [x10, #15, mul vl] +# CHECK-NEXT: 1 5 3.00 * ld3d { z23.d - z25.d }, p3/z, [x13, #-24, mul vl] +# CHECK-NEXT: 1 5 3.00 * ld3d { z5.d - z7.d }, p3/z, [x17, x16, lsl #3] +# CHECK-NEXT: 1 5 3.00 * ld3h { z0.h - z2.h }, p0/z, [x0, x0, lsl #1] +# CHECK-NEXT: 1 5 3.00 * ld3h { z0.h - z2.h }, p0/z, [x0] +# CHECK-NEXT: 1 5 3.00 * ld3h { z21.h - z23.h }, p5/z, [x10, #15, mul vl] +# CHECK-NEXT: 1 5 3.00 * ld3h { z23.h - z25.h }, p3/z, [x13, #-24, mul vl] +# CHECK-NEXT: 1 5 3.00 * ld3h { z5.h - z7.h }, p3/z, [x17, x16, lsl #1] +# CHECK-NEXT: 1 5 3.00 * ld3w { z0.s - z2.s }, p0/z, [x0, x0, lsl #2] +# CHECK-NEXT: 1 5 3.00 * ld3w { z0.s - z2.s }, p0/z, [x0] +# CHECK-NEXT: 1 5 3.00 * ld3w { z21.s - z23.s }, p5/z, [x10, #15, mul vl] +# CHECK-NEXT: 1 5 3.00 * ld3w { z23.s - z25.s }, p3/z, [x13, #-24, mul vl] +# CHECK-NEXT: 1 5 3.00 * ld3w { z5.s - z7.s }, p3/z, [x17, x16, lsl #2] +# CHECK-NEXT: 1 5 3.00 * ld4b { z0.b - z3.b }, p0/z, [x0, x0] +# CHECK-NEXT: 1 5 3.00 * ld4b { z0.b - z3.b }, p0/z, [x0] +# CHECK-NEXT: 1 5 3.00 * ld4b { z21.b - z24.b }, p5/z, [x10, #20, mul vl] +# CHECK-NEXT: 1 5 3.00 * ld4b { z23.b - z26.b }, p3/z, [x13, #-32, mul vl] +# CHECK-NEXT: 1 5 3.00 * ld4b { z5.b - z8.b }, p3/z, [x17, x16] +# CHECK-NEXT: 1 5 3.00 * ld4d { z0.d - z3.d }, p0/z, [x0, x0, lsl #3] +# CHECK-NEXT: 1 5 3.00 * ld4d { z0.d - z3.d }, p0/z, [x0] +# CHECK-NEXT: 1 5 3.00 * ld4d { z21.d - z24.d }, p5/z, [x10, #20, mul vl] +# CHECK-NEXT: 1 5 3.00 * ld4d { z23.d - z26.d }, p3/z, [x13, #-32, mul vl] +# CHECK-NEXT: 1 5 3.00 * ld4d { z5.d - z8.d }, p3/z, [x17, x16, lsl #3] +# CHECK-NEXT: 1 5 3.00 * ld4h { z0.h - z3.h }, p0/z, [x0, x0, lsl #1] +# CHECK-NEXT: 1 5 3.00 * ld4h { z0.h - z3.h }, p0/z, [x0] +# CHECK-NEXT: 1 5 3.00 * ld4h { z21.h - z24.h }, p5/z, [x10, #20, mul vl] +# CHECK-NEXT: 1 5 3.00 * ld4h { z23.h - z26.h }, p3/z, [x13, #-32, mul vl] +# CHECK-NEXT: 1 5 3.00 * ld4h { z5.h - z8.h }, p3/z, [x17, x16, lsl #1] +# CHECK-NEXT: 1 5 3.00 * ld4w { z0.s - z3.s }, p0/z, [x0, x0, lsl #2] +# CHECK-NEXT: 1 5 3.00 * ld4w { z0.s - z3.s }, p0/z, [x0] +# CHECK-NEXT: 1 5 3.00 * ld4w { z21.s - z24.s }, p5/z, [x10, #20, mul vl] +# CHECK-NEXT: 1 5 3.00 * ld4w { z23.s - z26.s }, p3/z, [x13, #-32, mul vl] +# CHECK-NEXT: 1 5 3.00 * ld4w { z5.s - z8.s }, p3/z, [x17, x16, lsl #2] +# CHECK-NEXT: 1 3 1.00 * U ldff1b { z0.d }, p0/z, [x0, x0] +# CHECK-NEXT: 1 7 7.00 * U ldff1b { z0.d }, p0/z, [z0.d] +# CHECK-NEXT: 1 3 1.00 * U ldff1b { z0.h }, p0/z, [x0, x0] +# CHECK-NEXT: 1 3 1.00 * U ldff1b { z0.s }, p0/z, [x0, x0] +# CHECK-NEXT: 1 7 7.00 * U ldff1b { z0.s }, p0/z, [x0, z0.s, sxtw] +# CHECK-NEXT: 1 7 7.00 * U ldff1b { z0.s }, p0/z, [x0, z0.s, uxtw] +# CHECK-NEXT: 1 9 9.00 * U ldff1b { z0.s }, p0/z, [z0.s] +# CHECK-NEXT: 1 7 7.00 * U ldff1b { z21.d }, p5/z, [x10, z21.d, sxtw] +# CHECK-NEXT: 1 7 7.00 * U ldff1b { z21.d }, p5/z, [x10, z21.d, uxtw] +# CHECK-NEXT: 1 3 1.00 * U ldff1b { z31.b }, p7/z, [sp] +# CHECK-NEXT: 1 7 7.00 * U ldff1b { z31.d }, p7/z, [sp, z31.d] +# CHECK-NEXT: 1 3 1.00 * U ldff1b { z31.d }, p7/z, [sp] +# CHECK-NEXT: 1 7 7.00 * U ldff1b { z31.d }, p7/z, [z31.d, #31] +# CHECK-NEXT: 1 3 1.00 * U ldff1b { z31.h }, p7/z, [sp] +# CHECK-NEXT: 1 3 1.00 * U ldff1b { z31.s }, p7/z, [sp] +# CHECK-NEXT: 1 9 9.00 * U ldff1b { z31.s }, p7/z, [z31.s, #31] +# CHECK-NEXT: 1 3 1.00 * U ldff1d { z0.d }, p0/z, [x0, x0, lsl #3] +# CHECK-NEXT: 1 7 7.00 * U ldff1d { z0.d }, p0/z, [x0, z0.d, sxtw #3] +# CHECK-NEXT: 1 7 7.00 * U ldff1d { z0.d }, p0/z, [x0, z0.d, uxtw #3] +# CHECK-NEXT: 1 7 7.00 * U ldff1d { z0.d }, p0/z, [z0.d] +# CHECK-NEXT: 1 7 7.00 * U ldff1d { z21.d }, p5/z, [x10, z21.d, sxtw] +# CHECK-NEXT: 1 7 7.00 * U ldff1d { z21.d }, p5/z, [x10, z21.d, uxtw] +# CHECK-NEXT: 1 7 7.00 * U ldff1d { z23.d }, p3/z, [x13, z8.d, lsl #3] +# CHECK-NEXT: 1 7 7.00 * U ldff1d { z31.d }, p7/z, [sp, z31.d] +# CHECK-NEXT: 1 3 1.00 * U ldff1d { z31.d }, p7/z, [sp] +# CHECK-NEXT: 1 7 7.00 * U ldff1d { z31.d }, p7/z, [z31.d, #248] +# CHECK-NEXT: 1 3 1.00 * U ldff1h { z0.d }, p0/z, [x0, x0, lsl #1] +# CHECK-NEXT: 1 7 7.00 * U ldff1h { z0.d }, p0/z, [x0, z0.d, sxtw #1] +# CHECK-NEXT: 1 7 7.00 * U ldff1h { z0.d }, p0/z, [x0, z0.d, uxtw #1] +# CHECK-NEXT: 1 7 7.00 * U ldff1h { z0.d }, p0/z, [z0.d] +# CHECK-NEXT: 1 3 1.00 * U ldff1h { z0.h }, p0/z, [x0, x0, lsl #1] +# CHECK-NEXT: 1 3 1.00 * U ldff1h { z0.s }, p0/z, [x0, x0, lsl #1] +# CHECK-NEXT: 1 7 7.00 * U ldff1h { z0.s }, p0/z, [x0, z0.s, sxtw] +# CHECK-NEXT: 1 7 7.00 * U ldff1h { z0.s }, p0/z, [x0, z0.s, uxtw] +# CHECK-NEXT: 1 9 9.00 * U ldff1h { z0.s }, p0/z, [z0.s] +# CHECK-NEXT: 1 7 7.00 * U ldff1h { z21.d }, p5/z, [x10, z21.d, sxtw] +# CHECK-NEXT: 1 7 7.00 * U ldff1h { z21.d }, p5/z, [x10, z21.d, uxtw] +# CHECK-NEXT: 1 7 7.00 * U ldff1h { z23.d }, p3/z, [x13, z8.d, lsl #1] +# CHECK-NEXT: 1 7 7.00 * U ldff1h { z31.d }, p7/z, [sp, z31.d] +# CHECK-NEXT: 1 3 1.00 * U ldff1h { z31.d }, p7/z, [sp] +# CHECK-NEXT: 1 7 7.00 * U ldff1h { z31.d }, p7/z, [z31.d, #62] +# CHECK-NEXT: 1 3 1.00 * U ldff1h { z31.h }, p7/z, [sp] +# CHECK-NEXT: 1 7 7.00 * U ldff1h { z31.s }, p7/z, [sp, z31.s, sxtw #1] +# CHECK-NEXT: 1 7 7.00 * U ldff1h { z31.s }, p7/z, [sp, z31.s, uxtw #1] +# CHECK-NEXT: 1 3 1.00 * U ldff1h { z31.s }, p7/z, [sp] +# CHECK-NEXT: 1 9 9.00 * U ldff1h { z31.s }, p7/z, [z31.s, #62] +# CHECK-NEXT: 1 3 1.00 * U ldff1sb { z0.d }, p0/z, [x0, x0] +# CHECK-NEXT: 1 7 7.00 * U ldff1sb { z0.d }, p0/z, [z0.d] +# CHECK-NEXT: 1 3 1.00 * U ldff1sb { z0.h }, p0/z, [x0, x0] +# CHECK-NEXT: 1 3 1.00 * U ldff1sb { z0.s }, p0/z, [x0, x0] +# CHECK-NEXT: 1 7 7.00 * U ldff1sb { z0.s }, p0/z, [x0, z0.s, sxtw] +# CHECK-NEXT: 1 7 7.00 * U ldff1sb { z0.s }, p0/z, [x0, z0.s, uxtw] +# CHECK-NEXT: 1 9 9.00 * U ldff1sb { z0.s }, p0/z, [z0.s] +# CHECK-NEXT: 1 7 7.00 * U ldff1sb { z21.d }, p5/z, [x10, z21.d, sxtw] +# CHECK-NEXT: 1 7 7.00 * U ldff1sb { z21.d }, p5/z, [x10, z21.d, uxtw] +# CHECK-NEXT: 1 7 7.00 * U ldff1sb { z31.d }, p7/z, [sp, z31.d] +# CHECK-NEXT: 1 3 1.00 * U ldff1sb { z31.d }, p7/z, [sp] +# CHECK-NEXT: 1 7 7.00 * U ldff1sb { z31.d }, p7/z, [z31.d, #31] +# CHECK-NEXT: 1 3 1.00 * U ldff1sb { z31.h }, p7/z, [sp] +# CHECK-NEXT: 1 3 1.00 * U ldff1sb { z31.s }, p7/z, [sp] +# CHECK-NEXT: 1 9 9.00 * U ldff1sb { z31.s }, p7/z, [z31.s, #31] +# CHECK-NEXT: 1 3 1.00 * U ldff1sh { z0.d }, p0/z, [x0, x0, lsl #1] +# CHECK-NEXT: 1 7 7.00 * U ldff1sh { z0.d }, p0/z, [x0, z0.d, sxtw #1] +# CHECK-NEXT: 1 7 7.00 * U ldff1sh { z0.d }, p0/z, [x0, z0.d, uxtw #1] +# CHECK-NEXT: 1 7 7.00 * U ldff1sh { z0.d }, p0/z, [z0.d] +# CHECK-NEXT: 1 3 1.00 * U ldff1sh { z0.s }, p0/z, [x0, x0, lsl #1] +# CHECK-NEXT: 1 7 7.00 * U ldff1sh { z0.s }, p0/z, [x0, z0.s, sxtw] +# CHECK-NEXT: 1 7 7.00 * U ldff1sh { z0.s }, p0/z, [x0, z0.s, uxtw] +# CHECK-NEXT: 1 9 9.00 * U ldff1sh { z0.s }, p0/z, [z0.s] +# CHECK-NEXT: 1 7 7.00 * U ldff1sh { z21.d }, p5/z, [x10, z21.d, sxtw] +# CHECK-NEXT: 1 7 7.00 * U ldff1sh { z21.d }, p5/z, [x10, z21.d, uxtw] +# CHECK-NEXT: 1 7 7.00 * U ldff1sh { z23.d }, p3/z, [x13, z8.d, lsl #1] +# CHECK-NEXT: 1 7 7.00 * U ldff1sh { z31.d }, p7/z, [sp, z31.d] +# CHECK-NEXT: 1 3 1.00 * U ldff1sh { z31.d }, p7/z, [sp] +# CHECK-NEXT: 1 7 7.00 * U ldff1sh { z31.d }, p7/z, [z31.d, #62] +# CHECK-NEXT: 1 7 7.00 * U ldff1sh { z31.s }, p7/z, [sp, z31.s, sxtw #1] +# CHECK-NEXT: 1 7 7.00 * U ldff1sh { z31.s }, p7/z, [sp, z31.s, uxtw #1] +# CHECK-NEXT: 1 3 1.00 * U ldff1sh { z31.s }, p7/z, [sp] +# CHECK-NEXT: 1 9 9.00 * U ldff1sh { z31.s }, p7/z, [z31.s, #62] +# CHECK-NEXT: 1 3 1.00 * U ldff1sw { z0.d }, p0/z, [x0, x0, lsl #2] +# CHECK-NEXT: 1 7 7.00 * U ldff1sw { z0.d }, p0/z, [x0, z0.d, sxtw #2] +# CHECK-NEXT: 1 7 7.00 * U ldff1sw { z0.d }, p0/z, [x0, z0.d, uxtw #2] +# CHECK-NEXT: 1 7 7.00 * U ldff1sw { z0.d }, p0/z, [z0.d] +# CHECK-NEXT: 1 7 7.00 * U ldff1sw { z21.d }, p5/z, [x10, z21.d, sxtw] +# CHECK-NEXT: 1 7 7.00 * U ldff1sw { z21.d }, p5/z, [x10, z21.d, uxtw] +# CHECK-NEXT: 1 7 7.00 * U ldff1sw { z23.d }, p3/z, [x13, z8.d, lsl #2] +# CHECK-NEXT: 1 7 7.00 * U ldff1sw { z31.d }, p7/z, [sp, z31.d] +# CHECK-NEXT: 1 3 1.00 * U ldff1sw { z31.d }, p7/z, [sp] +# CHECK-NEXT: 1 7 7.00 * U ldff1sw { z31.d }, p7/z, [z31.d, #124] +# CHECK-NEXT: 1 3 1.00 * U ldff1w { z0.d }, p0/z, [x0, x0, lsl #2] +# CHECK-NEXT: 1 7 7.00 * U ldff1w { z0.d }, p0/z, [x0, z0.d, sxtw #2] +# CHECK-NEXT: 1 7 7.00 * U ldff1w { z0.d }, p0/z, [x0, z0.d, uxtw #2] +# CHECK-NEXT: 1 7 7.00 * U ldff1w { z0.d }, p0/z, [z0.d] +# CHECK-NEXT: 1 3 1.00 * U ldff1w { z0.s }, p0/z, [x0, x0, lsl #2] +# CHECK-NEXT: 1 7 7.00 * U ldff1w { z0.s }, p0/z, [x0, z0.s, sxtw] +# CHECK-NEXT: 1 7 7.00 * U ldff1w { z0.s }, p0/z, [x0, z0.s, uxtw] +# CHECK-NEXT: 1 9 9.00 * U ldff1w { z0.s }, p0/z, [z0.s] +# CHECK-NEXT: 1 7 7.00 * U ldff1w { z21.d }, p5/z, [x10, z21.d, sxtw] +# CHECK-NEXT: 1 7 7.00 * U ldff1w { z21.d }, p5/z, [x10, z21.d, uxtw] +# CHECK-NEXT: 1 7 7.00 * U ldff1w { z23.d }, p3/z, [x13, z8.d, lsl #2] +# CHECK-NEXT: 1 7 7.00 * U ldff1w { z31.d }, p7/z, [sp, z31.d] +# CHECK-NEXT: 1 3 1.00 * U ldff1w { z31.d }, p7/z, [sp] +# CHECK-NEXT: 1 7 7.00 * U ldff1w { z31.d }, p7/z, [z31.d, #124] +# CHECK-NEXT: 1 7 7.00 * U ldff1w { z31.s }, p7/z, [sp, z31.s, sxtw #2] +# CHECK-NEXT: 1 7 7.00 * U ldff1w { z31.s }, p7/z, [sp, z31.s, uxtw #2] +# CHECK-NEXT: 1 3 1.00 * U ldff1w { z31.s }, p7/z, [sp] +# CHECK-NEXT: 1 9 9.00 * U ldff1w { z31.s }, p7/z, [z31.s, #124] +# CHECK-NEXT: 1 3 1.00 * U ldnf1b { z0.b }, p0/z, [x0] +# CHECK-NEXT: 1 3 1.00 * U ldnf1b { z0.d }, p0/z, [x0] +# CHECK-NEXT: 1 3 1.00 * U ldnf1b { z0.h }, p0/z, [x0] +# CHECK-NEXT: 1 3 1.00 * U ldnf1b { z0.s }, p0/z, [x0] +# CHECK-NEXT: 1 3 1.00 * U ldnf1b { z21.b }, p5/z, [x10, #5, mul vl] +# CHECK-NEXT: 1 3 1.00 * U ldnf1b { z21.d }, p5/z, [x10, #5, mul vl] +# CHECK-NEXT: 1 3 1.00 * U ldnf1b { z21.h }, p5/z, [x10, #5, mul vl] +# CHECK-NEXT: 1 3 1.00 * U ldnf1b { z21.s }, p5/z, [x10, #5, mul vl] +# CHECK-NEXT: 1 3 1.00 * U ldnf1b { z31.b }, p7/z, [sp, #-1, mul vl] +# CHECK-NEXT: 1 3 1.00 * U ldnf1b { z31.d }, p7/z, [sp, #-1, mul vl] +# CHECK-NEXT: 1 3 1.00 * U ldnf1b { z31.h }, p7/z, [sp, #-1, mul vl] +# CHECK-NEXT: 1 3 1.00 * U ldnf1b { z31.s }, p7/z, [sp, #-1, mul vl] +# CHECK-NEXT: 1 3 1.00 * U ldnf1d { z0.d }, p0/z, [x0] +# CHECK-NEXT: 1 3 1.00 * U ldnf1d { z21.d }, p5/z, [x10, #5, mul vl] +# CHECK-NEXT: 1 3 1.00 * U ldnf1d { z31.d }, p7/z, [sp, #-1, mul vl] +# CHECK-NEXT: 1 3 1.00 * U ldnf1h { z0.d }, p0/z, [x0] +# CHECK-NEXT: 1 3 1.00 * U ldnf1h { z0.h }, p0/z, [x0] +# CHECK-NEXT: 1 3 1.00 * U ldnf1h { z0.s }, p0/z, [x0] +# CHECK-NEXT: 1 3 1.00 * U ldnf1h { z21.d }, p5/z, [x10, #5, mul vl] +# CHECK-NEXT: 1 3 1.00 * U ldnf1h { z21.h }, p5/z, [x10, #5, mul vl] +# CHECK-NEXT: 1 3 1.00 * U ldnf1h { z21.s }, p5/z, [x10, #5, mul vl] +# CHECK-NEXT: 1 3 1.00 * U ldnf1h { z31.d }, p7/z, [sp, #-1, mul vl] +# CHECK-NEXT: 1 3 1.00 * U ldnf1h { z31.h }, p7/z, [sp, #-1, mul vl] +# CHECK-NEXT: 1 3 1.00 * U ldnf1h { z31.s }, p7/z, [sp, #-1, mul vl] +# CHECK-NEXT: 1 3 1.00 * U ldnf1sb { z0.d }, p0/z, [x0] +# CHECK-NEXT: 1 3 1.00 * U ldnf1sb { z0.h }, p0/z, [x0] +# CHECK-NEXT: 1 3 1.00 * U ldnf1sb { z0.s }, p0/z, [x0] +# CHECK-NEXT: 1 3 1.00 * U ldnf1sb { z21.d }, p5/z, [x10, #5, mul vl] +# CHECK-NEXT: 1 3 1.00 * U ldnf1sb { z21.h }, p5/z, [x10, #5, mul vl] +# CHECK-NEXT: 1 3 1.00 * U ldnf1sb { z21.s }, p5/z, [x10, #5, mul vl] +# CHECK-NEXT: 1 3 1.00 * U ldnf1sb { z31.d }, p7/z, [sp, #-1, mul vl] +# CHECK-NEXT: 1 3 1.00 * U ldnf1sb { z31.h }, p7/z, [sp, #-1, mul vl] +# CHECK-NEXT: 1 3 1.00 * U ldnf1sb { z31.s }, p7/z, [sp, #-1, mul vl] +# CHECK-NEXT: 1 3 1.00 * U ldnf1sh { z0.d }, p0/z, [x0] +# CHECK-NEXT: 1 3 1.00 * U ldnf1sh { z0.s }, p0/z, [x0] +# CHECK-NEXT: 1 3 1.00 * U ldnf1sh { z21.d }, p5/z, [x10, #5, mul vl] +# CHECK-NEXT: 1 3 1.00 * U ldnf1sh { z21.s }, p5/z, [x10, #5, mul vl] +# CHECK-NEXT: 1 3 1.00 * U ldnf1sh { z31.d }, p7/z, [sp, #-1, mul vl] +# CHECK-NEXT: 1 3 1.00 * U ldnf1sh { z31.s }, p7/z, [sp, #-1, mul vl] +# CHECK-NEXT: 1 3 1.00 * U ldnf1sw { z0.d }, p0/z, [x0] +# CHECK-NEXT: 1 3 1.00 * U ldnf1sw { z21.d }, p5/z, [x10, #5, mul vl] +# CHECK-NEXT: 1 3 1.00 * U ldnf1sw { z31.d }, p7/z, [sp, #-1, mul vl] +# CHECK-NEXT: 1 3 1.00 * U ldnf1w { z0.d }, p0/z, [x0] +# CHECK-NEXT: 1 3 1.00 * U ldnf1w { z0.s }, p0/z, [x0] +# CHECK-NEXT: 1 3 1.00 * U ldnf1w { z21.d }, p5/z, [x10, #5, mul vl] +# CHECK-NEXT: 1 3 1.00 * U ldnf1w { z21.s }, p5/z, [x10, #5, mul vl] +# CHECK-NEXT: 1 3 1.00 * U ldnf1w { z31.d }, p7/z, [sp, #-1, mul vl] +# CHECK-NEXT: 1 3 1.00 * U ldnf1w { z31.s }, p7/z, [sp, #-1, mul vl] +# CHECK-NEXT: 1 3 1.00 * ldnt1b { z0.b }, p0/z, [x0, x0] +# CHECK-NEXT: 1 3 1.00 * ldnt1b { z0.b }, p0/z, [x0] +# CHECK-NEXT: 1 7 7.00 * ldnt1b { z0.d }, p0/z, [z1.d] +# CHECK-NEXT: 1 9 9.00 * ldnt1b { z0.s }, p0/z, [z1.s] +# CHECK-NEXT: 1 3 1.00 * ldnt1b { z21.b }, p5/z, [x10, #7, mul vl] +# CHECK-NEXT: 1 3 1.00 * ldnt1b { z23.b }, p3/z, [x13, #-8, mul vl] +# CHECK-NEXT: 1 7 7.00 * ldnt1b { z31.d }, p7/z, [z31.d, x0] +# CHECK-NEXT: 1 7 7.00 * ldnt1b { z31.d }, p7/z, [z31.d] +# CHECK-NEXT: 1 9 9.00 * ldnt1b { z31.s }, p7/z, [z31.s, x0] +# CHECK-NEXT: 1 9 9.00 * ldnt1b { z31.s }, p7/z, [z31.s] +# CHECK-NEXT: 1 3 1.00 * ldnt1d { z0.d }, p0/z, [x0, x0, lsl #3] +# CHECK-NEXT: 1 3 1.00 * ldnt1d { z0.d }, p0/z, [x0] +# CHECK-NEXT: 1 7 7.00 * ldnt1d { z0.d }, p0/z, [z1.d] +# CHECK-NEXT: 1 3 1.00 * ldnt1d { z21.d }, p5/z, [x10, #7, mul vl] +# CHECK-NEXT: 1 3 1.00 * ldnt1d { z23.d }, p3/z, [x13, #-8, mul vl] +# CHECK-NEXT: 1 7 7.00 * ldnt1d { z31.d }, p7/z, [z31.d, x0] +# CHECK-NEXT: 1 7 7.00 * ldnt1d { z31.d }, p7/z, [z31.d] +# CHECK-NEXT: 1 7 7.00 * ldnt1h { z0.d }, p0/z, [z1.d] +# CHECK-NEXT: 1 3 1.00 * ldnt1h { z0.h }, p0/z, [x0, x0, lsl #1] +# CHECK-NEXT: 1 3 1.00 * ldnt1h { z0.h }, p0/z, [x0] +# CHECK-NEXT: 1 9 9.00 * ldnt1h { z0.s }, p0/z, [z1.s] +# CHECK-NEXT: 1 3 1.00 * ldnt1h { z21.h }, p5/z, [x10, #7, mul vl] +# CHECK-NEXT: 1 3 1.00 * ldnt1h { z23.h }, p3/z, [x13, #-8, mul vl] +# CHECK-NEXT: 1 7 7.00 * ldnt1h { z31.d }, p7/z, [z31.d, x0] +# CHECK-NEXT: 1 7 7.00 * ldnt1h { z31.d }, p7/z, [z31.d] +# CHECK-NEXT: 1 9 9.00 * ldnt1h { z31.s }, p7/z, [z31.s, x0] +# CHECK-NEXT: 1 9 9.00 * ldnt1h { z31.s }, p7/z, [z31.s] +# CHECK-NEXT: 1 7 7.00 * ldnt1sb { z0.d }, p0/z, [z1.d] +# CHECK-NEXT: 1 9 9.00 * ldnt1sb { z0.s }, p0/z, [z1.s] +# CHECK-NEXT: 1 7 7.00 * ldnt1sb { z31.d }, p7/z, [z31.d, x0] +# CHECK-NEXT: 1 7 7.00 * ldnt1sb { z31.d }, p7/z, [z31.d] +# CHECK-NEXT: 1 9 9.00 * ldnt1sb { z31.s }, p7/z, [z31.s, x0] +# CHECK-NEXT: 1 9 9.00 * ldnt1sb { z31.s }, p7/z, [z31.s] +# CHECK-NEXT: 1 7 7.00 * ldnt1sh { z0.d }, p0/z, [z1.d] +# CHECK-NEXT: 1 9 9.00 * ldnt1sh { z0.s }, p0/z, [z1.s] +# CHECK-NEXT: 1 7 7.00 * ldnt1sh { z31.d }, p7/z, [z31.d, x0] +# CHECK-NEXT: 1 7 7.00 * ldnt1sh { z31.d }, p7/z, [z31.d] +# CHECK-NEXT: 1 9 9.00 * ldnt1sh { z31.s }, p7/z, [z31.s, x0] +# CHECK-NEXT: 1 9 9.00 * ldnt1sh { z31.s }, p7/z, [z31.s] +# CHECK-NEXT: 1 7 7.00 * ldnt1sw { z0.d }, p0/z, [z1.d] +# CHECK-NEXT: 1 7 7.00 * ldnt1sw { z31.d }, p7/z, [z31.d, x0] +# CHECK-NEXT: 1 7 7.00 * ldnt1sw { z31.d }, p7/z, [z31.d] +# CHECK-NEXT: 1 7 7.00 * ldnt1w { z0.d }, p0/z, [z1.d] +# CHECK-NEXT: 1 3 1.00 * ldnt1w { z0.s }, p0/z, [x0, x0, lsl #2] +# CHECK-NEXT: 1 3 1.00 * ldnt1w { z0.s }, p0/z, [x0] +# CHECK-NEXT: 1 9 9.00 * ldnt1w { z0.s }, p0/z, [z1.s] +# CHECK-NEXT: 1 3 1.00 * ldnt1w { z21.s }, p5/z, [x10, #7, mul vl] +# CHECK-NEXT: 1 3 1.00 * ldnt1w { z23.s }, p3/z, [x13, #-8, mul vl] +# CHECK-NEXT: 1 7 7.00 * ldnt1w { z31.d }, p7/z, [z31.d, x0] +# CHECK-NEXT: 1 7 7.00 * ldnt1w { z31.d }, p7/z, [z31.d] +# CHECK-NEXT: 1 9 9.00 * ldnt1w { z31.s }, p7/z, [z31.s, x0] +# CHECK-NEXT: 1 9 9.00 * ldnt1w { z31.s }, p7/z, [z31.s] +# CHECK-NEXT: 1 3 1.00 * ldr p0, [x0] +# CHECK-NEXT: 1 3 1.00 * ldr p5, [x10, #255, mul vl] +# CHECK-NEXT: 1 3 1.00 * ldr p7, [x13, #-256, mul vl] +# CHECK-NEXT: 1 3 1.00 * ldr z0, [x0] +# CHECK-NEXT: 1 3 1.00 * ldr z23, [x13, #255, mul vl] +# CHECK-NEXT: 1 3 1.00 * ldr z31, [sp, #-256, mul vl] +# CHECK-NEXT: 1 3 1.00 lsl z0.b, p0/m, z0.b, #0 +# CHECK-NEXT: 1 3 1.00 lsl z0.b, p0/m, z0.b, z0.b +# CHECK-NEXT: 1 3 1.00 lsl z0.b, p0/m, z0.b, z1.d +# CHECK-NEXT: 1 3 1.00 lsl z0.b, z0.b, #0 +# CHECK-NEXT: 1 3 1.00 lsl z0.b, z1.b, z2.d +# CHECK-NEXT: 1 3 1.00 lsl z0.d, p0/m, z0.d, #0 +# CHECK-NEXT: 1 3 1.00 lsl z0.d, p0/m, z0.d, z0.d +# CHECK-NEXT: 1 3 1.00 lsl z0.d, z0.d, #0 +# CHECK-NEXT: 1 3 1.00 lsl z0.h, p0/m, z0.h, #0 +# CHECK-NEXT: 1 3 1.00 lsl z0.h, p0/m, z0.h, z0.h +# CHECK-NEXT: 1 3 1.00 lsl z0.h, p0/m, z0.h, z1.d +# CHECK-NEXT: 1 3 1.00 lsl z0.h, z0.h, #0 +# CHECK-NEXT: 1 3 1.00 lsl z0.h, z1.h, z2.d +# CHECK-NEXT: 1 3 1.00 lsl z0.s, p0/m, z0.s, #0 +# CHECK-NEXT: 1 3 1.00 lsl z0.s, p0/m, z0.s, z0.s +# CHECK-NEXT: 1 3 1.00 lsl z0.s, p0/m, z0.s, z1.d +# CHECK-NEXT: 1 3 1.00 lsl z0.s, z0.s, #0 +# CHECK-NEXT: 1 3 1.00 lsl z0.s, z1.s, z2.d +# CHECK-NEXT: 1 3 1.00 lsl z31.b, p0/m, z31.b, #7 +# CHECK-NEXT: 1 3 1.00 lsl z31.b, z31.b, #7 +# CHECK-NEXT: 1 3 1.00 lsl z31.d, p0/m, z31.d, #63 +# CHECK-NEXT: 1 3 1.00 lsl z31.d, z31.d, #63 +# CHECK-NEXT: 1 3 1.00 lsl z31.h, p0/m, z31.h, #15 +# CHECK-NEXT: 1 3 1.00 lsl z31.h, z31.h, #15 +# CHECK-NEXT: 1 3 1.00 lsl z31.s, p0/m, z31.s, #31 +# CHECK-NEXT: 1 3 1.00 lsl z31.s, z31.s, #31 +# CHECK-NEXT: 1 3 1.00 lslr z0.b, p0/m, z0.b, z0.b +# CHECK-NEXT: 1 3 1.00 lslr z0.d, p0/m, z0.d, z0.d +# CHECK-NEXT: 1 3 1.00 lslr z0.h, p0/m, z0.h, z0.h +# CHECK-NEXT: 1 3 1.00 lslr z0.s, p0/m, z0.s, z0.s +# CHECK-NEXT: 1 3 1.00 lsr z0.b, p0/m, z0.b, #1 +# CHECK-NEXT: 1 3 1.00 lsr z0.b, p0/m, z0.b, z0.b +# CHECK-NEXT: 1 3 1.00 lsr z0.b, p0/m, z0.b, z1.d +# CHECK-NEXT: 1 3 1.00 lsr z0.b, z0.b, #1 +# CHECK-NEXT: 1 3 1.00 lsr z0.b, z1.b, z2.d +# CHECK-NEXT: 1 3 1.00 lsr z0.d, p0/m, z0.d, #1 +# CHECK-NEXT: 1 3 1.00 lsr z0.d, p0/m, z0.d, z0.d +# CHECK-NEXT: 1 3 1.00 lsr z0.d, z0.d, #1 +# CHECK-NEXT: 1 3 1.00 lsr z0.h, p0/m, z0.h, #1 +# CHECK-NEXT: 1 3 1.00 lsr z0.h, p0/m, z0.h, z0.h +# CHECK-NEXT: 1 3 1.00 lsr z0.h, p0/m, z0.h, z1.d +# CHECK-NEXT: 1 3 1.00 lsr z0.h, z0.h, #1 +# CHECK-NEXT: 1 3 1.00 lsr z0.h, z1.h, z2.d +# CHECK-NEXT: 1 3 1.00 lsr z0.s, p0/m, z0.s, #1 +# CHECK-NEXT: 1 3 1.00 lsr z0.s, p0/m, z0.s, z0.s +# CHECK-NEXT: 1 3 1.00 lsr z0.s, p0/m, z0.s, z1.d +# CHECK-NEXT: 1 3 1.00 lsr z0.s, z0.s, #1 +# CHECK-NEXT: 1 3 1.00 lsr z0.s, z1.s, z2.d +# CHECK-NEXT: 1 3 1.00 lsr z31.b, p0/m, z31.b, #8 +# CHECK-NEXT: 1 3 1.00 lsr z31.b, z31.b, #8 +# CHECK-NEXT: 1 3 1.00 lsr z31.d, p0/m, z31.d, #64 +# CHECK-NEXT: 1 3 1.00 lsr z31.d, z31.d, #64 +# CHECK-NEXT: 1 3 1.00 lsr z31.h, p0/m, z31.h, #16 +# CHECK-NEXT: 1 3 1.00 lsr z31.h, z31.h, #16 +# CHECK-NEXT: 1 3 1.00 lsr z31.s, p0/m, z31.s, #32 +# CHECK-NEXT: 1 3 1.00 lsr z31.s, z31.s, #32 +# CHECK-NEXT: 1 3 1.00 lsrr z0.b, p0/m, z0.b, z0.b +# CHECK-NEXT: 1 3 1.00 lsrr z0.d, p0/m, z0.d, z0.d +# CHECK-NEXT: 1 3 1.00 lsrr z0.h, p0/m, z0.h, z0.h +# CHECK-NEXT: 1 3 1.00 lsrr z0.s, p0/m, z0.s, z0.s +# CHECK-NEXT: 1 4 1.00 mad z0.b, p7/m, z1.b, z31.b +# CHECK-NEXT: 1 4 1.00 mad z0.d, p7/m, z1.d, z31.d +# CHECK-NEXT: 1 4 1.00 mad z0.h, p7/m, z1.h, z31.h +# CHECK-NEXT: 1 4 1.00 mad z0.s, p7/m, z1.s, z31.s +# CHECK-NEXT: 1 9 2.00 match p0.b, p0/z, z0.b, z0.b +# CHECK-NEXT: 1 9 2.00 match p0.h, p0/z, z0.h, z0.h +# CHECK-NEXT: 1 9 2.00 match p15.b, p7/z, z30.b, z31.b +# CHECK-NEXT: 1 9 2.00 match p15.h, p7/z, z30.h, z31.h +# CHECK-NEXT: 1 4 1.00 mla z0.b, p7/m, z1.b, z31.b +# CHECK-NEXT: 1 4 1.00 mla z0.d, p7/m, z1.d, z31.d +# CHECK-NEXT: 1 4 1.00 mla z0.d, z1.d, z7.d[1] +# CHECK-NEXT: 1 4 1.00 mla z0.h, p7/m, z1.h, z31.h +# CHECK-NEXT: 1 4 1.00 mla z0.h, z1.h, z7.h[7] +# CHECK-NEXT: 1 4 1.00 mla z0.s, p7/m, z1.s, z31.s +# CHECK-NEXT: 1 4 1.00 mla z0.s, z1.s, z7.s[3] +# CHECK-NEXT: 1 4 1.00 mls z0.b, p7/m, z1.b, z31.b +# CHECK-NEXT: 1 4 1.00 mls z0.d, p7/m, z1.d, z31.d +# CHECK-NEXT: 1 4 1.00 mls z0.d, z1.d, z7.d[1] +# CHECK-NEXT: 1 4 1.00 mls z0.h, p7/m, z1.h, z31.h +# CHECK-NEXT: 1 4 1.00 mls z0.h, z1.h, z7.h[7] +# CHECK-NEXT: 1 4 1.00 mls z0.s, p7/m, z1.s, z31.s +# CHECK-NEXT: 1 4 1.00 mls z0.s, z1.s, z7.s[3] +# CHECK-NEXT: 1 2 1.00 mov p0.b, p0.b +# CHECK-NEXT: 1 2 1.00 mov p0.b, p0/m, p0.b +# CHECK-NEXT: 1 2 1.00 mov p0.b, p0/z, p0.b +# CHECK-NEXT: 1 2 1.00 mov p15.b, p15.b +# CHECK-NEXT: 1 2 1.00 mov p15.b, p15/m, p15.b +# CHECK-NEXT: 1 2 1.00 mov p15.b, p15/z, p15.b +# CHECK-NEXT: 1 3 1.00 mov z0.b, #127 +# CHECK-NEXT: 1 3 1.00 mov z0.b, b0 +# CHECK-NEXT: 1 3 1.00 mov z0.b, p0/m, b0 +# CHECK-NEXT: 1 3 1.00 mov z0.b, p0/m, w0 +# CHECK-NEXT: 1 3 1.00 mov z0.b, p0/z, #127 +# CHECK-NEXT: 1 3 1.00 mov z0.b, w0 +# CHECK-NEXT: 1 3 1.00 mov z0.d, #0 +# CHECK-NEXT: 1 4 1.00 mov z0.d, #0xe0000000000003ff +# CHECK-NEXT: 1 4 1.00 mov z0.d, #0xffffffffffff7fff +# CHECK-NEXT: 1 4 1.00 mov z0.d, #32768 +# CHECK-NEXT: 1 3 1.00 mov z0.d, d0 +# CHECK-NEXT: 1 3 1.00 mov z0.d, p0/m, d0 +# CHECK-NEXT: 1 3 1.00 mov z0.d, p0/m, x0 +# CHECK-NEXT: 1 3 1.00 mov z0.d, x0 +# CHECK-NEXT: 1 3 1.00 mov z0.d, z0.d +# CHECK-NEXT: 1 3 1.00 mov z0.h, #-256 +# CHECK-NEXT: 1 3 1.00 mov z0.h, #-32768 +# CHECK-NEXT: 1 3 1.00 mov z0.h, #0 +# CHECK-NEXT: 1 3 1.00 mov z0.h, #32512 +# CHECK-NEXT: 1 4 1.00 mov z0.h, #32767 +# CHECK-NEXT: 1 3 1.00 mov z0.h, h0 +# CHECK-NEXT: 1 3 1.00 mov z0.h, p0/m, h0 +# CHECK-NEXT: 1 3 1.00 mov z0.h, p0/m, w0 +# CHECK-NEXT: 1 3 1.00 mov z0.h, p0/z, #32512 +# CHECK-NEXT: 1 3 1.00 mov z0.h, w0 +# CHECK-NEXT: 1 3 1.00 mov z0.q, q0 +# CHECK-NEXT: 1 3 1.00 mov z0.s, #0 +# CHECK-NEXT: 1 4 1.00 mov z0.s, #0xffff7fff +# CHECK-NEXT: 1 4 1.00 mov z0.s, #32768 +# CHECK-NEXT: 1 3 1.00 mov z0.s, p0/m, s0 +# CHECK-NEXT: 1 3 1.00 mov z0.s, p0/m, w0 +# CHECK-NEXT: 1 3 1.00 mov z0.s, s0 +# CHECK-NEXT: 1 3 1.00 mov z0.s, w0 +# CHECK-NEXT: 1 3 1.00 mov z21.d, #-128 +# CHECK-NEXT: 1 3 1.00 mov z21.d, #-32768 +# CHECK-NEXT: 1 3 1.00 mov z21.d, #127 +# CHECK-NEXT: 1 3 1.00 mov z21.d, #32512 +# CHECK-NEXT: 1 3 1.00 mov z21.d, p0/z, #-128 +# CHECK-NEXT: 1 3 1.00 mov z21.d, p0/z, #-32768 +# CHECK-NEXT: 1 3 1.00 mov z21.d, p0/z, #127 +# CHECK-NEXT: 1 3 1.00 mov z21.d, p0/z, #32512 +# CHECK-NEXT: 1 3 1.00 mov z21.d, p15/m, #-128 +# CHECK-NEXT: 1 3 1.00 mov z21.d, p15/m, #-32768 +# CHECK-NEXT: 1 3 1.00 mov z21.h, #-128 +# CHECK-NEXT: 1 3 1.00 mov z21.h, #-32768 +# CHECK-NEXT: 1 3 1.00 mov z21.h, #127 +# CHECK-NEXT: 1 3 1.00 mov z21.h, #32512 +# CHECK-NEXT: 1 3 1.00 mov z21.h, p0/z, #-128 +# CHECK-NEXT: 1 3 1.00 mov z21.h, p0/z, #-32768 +# CHECK-NEXT: 1 3 1.00 mov z21.h, p0/z, #127 +# CHECK-NEXT: 1 3 1.00 mov z21.h, p0/z, #32512 +# CHECK-NEXT: 1 3 1.00 mov z21.h, p15/m, #-128 +# CHECK-NEXT: 1 3 1.00 mov z21.h, p15/m, #-32768 +# CHECK-NEXT: 1 3 1.00 mov z21.s, #-128 +# CHECK-NEXT: 1 3 1.00 mov z21.s, #-32768 +# CHECK-NEXT: 1 3 1.00 mov z21.s, #127 +# CHECK-NEXT: 1 3 1.00 mov z21.s, #32512 +# CHECK-NEXT: 1 3 1.00 mov z21.s, p0/z, #-128 +# CHECK-NEXT: 1 3 1.00 mov z21.s, p0/z, #-32768 +# CHECK-NEXT: 1 3 1.00 mov z21.s, p0/z, #127 +# CHECK-NEXT: 1 3 1.00 mov z21.s, p0/z, #32512 +# CHECK-NEXT: 1 3 1.00 mov z21.s, p15/m, #-128 +# CHECK-NEXT: 1 3 1.00 mov z21.s, p15/m, #-32768 +# CHECK-NEXT: 1 3 1.00 mov z31.b, p15/m, z31.b +# CHECK-NEXT: 1 3 1.00 mov z31.b, p7/m, b31 +# CHECK-NEXT: 1 3 1.00 movprfx z31, z6 +# CHECK-NEXT: 1 3 1.00 mov z31.b, p7/m, wsp +# CHECK-NEXT: 1 3 1.00 mov z31.b, wsp +# CHECK-NEXT: 1 3 1.00 mov z31.b, z31.b[63] +# CHECK-NEXT: 1 3 1.00 mov z31.d, p15/m, z31.d +# CHECK-NEXT: 1 3 1.00 mov z31.d, p7/m, d31 +# CHECK-NEXT: 1 3 1.00 movprfx z31.d, p7/z, z6.d +# CHECK-NEXT: 1 3 1.00 mov z31.d, p7/m, sp +# CHECK-NEXT: 1 3 1.00 mov z31.d, sp +# CHECK-NEXT: 1 3 1.00 mov z31.d, z0.d +# CHECK-NEXT: 1 3 1.00 mov z31.d, z31.d[7] +# CHECK-NEXT: 1 3 1.00 mov z31.h, p15/m, z31.h +# CHECK-NEXT: 1 3 1.00 mov z31.h, p7/m, h31 +# CHECK-NEXT: 1 3 1.00 mov z31.h, p7/m, wsp +# CHECK-NEXT: 1 3 1.00 mov z31.h, wsp +# CHECK-NEXT: 1 3 1.00 mov z31.h, z31.h[31] +# CHECK-NEXT: 1 3 1.00 mov z31.s, p15/m, z31.s +# CHECK-NEXT: 1 3 1.00 mov z31.s, p7/m, s31 +# CHECK-NEXT: 1 3 1.00 mov z31.s, p7/m, wsp +# CHECK-NEXT: 1 3 1.00 mov z31.s, wsp +# CHECK-NEXT: 1 3 1.00 mov z31.s, z31.s[15] +# CHECK-NEXT: 1 3 1.00 mov z5.b, #-1 +# CHECK-NEXT: 1 3 1.00 mov z5.b, #-128 +# CHECK-NEXT: 1 3 1.00 mov z5.b, #127 +# CHECK-NEXT: 1 3 1.00 mov z5.b, p0/z, #-1 +# CHECK-NEXT: 1 3 1.00 mov z5.b, p0/z, #-128 +# CHECK-NEXT: 1 3 1.00 mov z5.b, p0/z, #127 +# CHECK-NEXT: 1 3 1.00 mov z5.b, p15/m, #-128 +# CHECK-NEXT: 1 3 1.00 mov z5.d, #-6 +# CHECK-NEXT: 1 3 1.00 mov z5.h, #-6 +# CHECK-NEXT: 1 3 1.00 mov z5.q, z17.q[3] +# CHECK-NEXT: 1 3 1.00 mov z5.s, #-6 +# CHECK-NEXT: 1 2 1.00 movs p0.b, p0.b +# CHECK-NEXT: 1 2 1.00 movs p0.b, p0/z, p0.b +# CHECK-NEXT: 1 2 1.00 movs p15.b, p15.b +# CHECK-NEXT: 1 2 1.00 movs p15.b, p15/z, p15.b +# CHECK-NEXT: 1 1 1.00 U mrs x3, ID_AA64ZFR0_EL1 +# CHECK-NEXT: 1 1 1.00 U mrs x3, ZCR_EL1 +# CHECK-NEXT: 1 1 1.00 U mrs x3, ZCR_EL12 +# CHECK-NEXT: 1 1 1.00 U mrs x3, ZCR_EL2 +# CHECK-NEXT: 1 1 1.00 U mrs x3, ZCR_EL3 +# CHECK-NEXT: 1 4 1.00 msb z0.b, p7/m, z1.b, z31.b +# CHECK-NEXT: 1 4 1.00 msb z0.d, p7/m, z1.d, z31.d +# CHECK-NEXT: 1 4 1.00 msb z0.h, p7/m, z1.h, z31.h +# CHECK-NEXT: 1 4 1.00 msb z0.s, p7/m, z1.s, z31.s +# CHECK-NEXT: 1 1 1.00 U msr ZCR_EL1, x3 +# CHECK-NEXT: 1 1 1.00 U msr ZCR_EL12, x3 +# CHECK-NEXT: 1 1 1.00 U msr ZCR_EL2, x3 +# CHECK-NEXT: 1 1 1.00 U msr ZCR_EL3, x3 +# CHECK-NEXT: 1 4 1.00 mul z0.b, p7/m, z0.b, z31.b +# CHECK-NEXT: 1 4 1.00 mul z0.b, z1.b, z2.b +# CHECK-NEXT: 1 4 1.00 mul z0.d, p7/m, z0.d, z31.d +# CHECK-NEXT: 1 4 1.00 mul z0.d, z1.d, z15.d[1] +# CHECK-NEXT: 1 4 1.00 mul z0.h, p7/m, z0.h, z31.h +# CHECK-NEXT: 1 4 1.00 mul z0.h, z1.h, z2.h +# CHECK-NEXT: 1 4 1.00 mul z0.h, z1.h, z7.h[7] +# CHECK-NEXT: 1 4 1.00 mul z0.s, p7/m, z0.s, z31.s +# CHECK-NEXT: 1 4 1.00 mul z0.s, z1.s, z7.s[3] +# CHECK-NEXT: 1 4 1.00 mul z29.s, z30.s, z31.s +# CHECK-NEXT: 1 4 1.00 mul z31.b, z31.b, #-128 +# CHECK-NEXT: 1 4 1.00 mul z31.b, z31.b, #127 +# CHECK-NEXT: 1 4 1.00 mul z31.d, z31.d, #-128 +# CHECK-NEXT: 1 4 1.00 mul z31.d, z31.d, #127 +# CHECK-NEXT: 1 4 1.00 mul z31.d, z31.d, z31.d +# CHECK-NEXT: 1 4 1.00 mul z31.h, z31.h, #-128 +# CHECK-NEXT: 1 4 1.00 mul z31.h, z31.h, #127 +# CHECK-NEXT: 1 4 1.00 mul z31.s, z31.s, #-128 +# CHECK-NEXT: 1 4 1.00 mul z31.s, z31.s, #127 +# CHECK-NEXT: 1 2 1.00 nand p0.b, p0/z, p0.b, p0.b +# CHECK-NEXT: 1 2 1.00 nand p15.b, p15/z, p15.b, p15.b +# CHECK-NEXT: 1 2 1.00 nands p0.b, p0/z, p0.b, p0.b +# CHECK-NEXT: 1 2 1.00 nands p15.b, p15/z, p15.b, p15.b +# CHECK-NEXT: 1 3 1.00 nbsl z0.d, z0.d, z1.d, z2.d +# CHECK-NEXT: 1 3 1.00 neg z0.b, p0/m, z0.b +# CHECK-NEXT: 1 3 1.00 neg z0.d, p0/m, z0.d +# CHECK-NEXT: 1 3 1.00 neg z0.h, p0/m, z0.h +# CHECK-NEXT: 1 3 1.00 neg z0.s, p0/m, z0.s +# CHECK-NEXT: 1 3 1.00 neg z31.b, p7/m, z31.b +# CHECK-NEXT: 1 3 1.00 neg z31.d, p7/m, z31.d +# CHECK-NEXT: 1 3 1.00 neg z31.h, p7/m, z31.h +# CHECK-NEXT: 1 3 1.00 neg z31.s, p7/m, z31.s +# CHECK-NEXT: 1 9 2.00 nmatch p0.b, p0/z, z0.b, z0.b +# CHECK-NEXT: 1 9 2.00 nmatch p0.h, p0/z, z0.h, z0.h +# CHECK-NEXT: 1 9 2.00 nmatch p15.b, p7/z, z30.b, z31.b +# CHECK-NEXT: 1 9 2.00 nmatch p15.h, p7/z, z30.h, z31.h +# CHECK-NEXT: 1 2 1.00 nor p0.b, p0/z, p0.b, p0.b +# CHECK-NEXT: 1 2 1.00 nor p15.b, p15/z, p15.b, p15.b +# CHECK-NEXT: 1 2 1.00 nors p0.b, p0/z, p0.b, p0.b +# CHECK-NEXT: 1 2 1.00 nors p15.b, p15/z, p15.b, p15.b +# CHECK-NEXT: 1 2 1.00 not p0.b, p0/z, p0.b +# CHECK-NEXT: 1 2 1.00 not p15.b, p15/z, p15.b +# CHECK-NEXT: 1 3 1.00 not z31.b, p7/m, z31.b +# CHECK-NEXT: 1 3 1.00 not z31.d, p7/m, z31.d +# CHECK-NEXT: 1 3 1.00 not z31.h, p7/m, z31.h +# CHECK-NEXT: 1 3 1.00 not z31.s, p7/m, z31.s +# CHECK-NEXT: 1 2 1.00 nots p0.b, p0/z, p0.b +# CHECK-NEXT: 1 2 1.00 nots p15.b, p15/z, p15.b +# CHECK-NEXT: 1 2 1.00 orn p0.b, p0/z, p0.b, p0.b +# CHECK-NEXT: 1 2 1.00 orn p15.b, p15/z, p15.b, p15.b +# CHECK-NEXT: 1 2 1.00 orns p0.b, p0/z, p0.b, p0.b +# CHECK-NEXT: 1 2 1.00 orns p15.b, p15/z, p15.b, p15.b +# CHECK-NEXT: 1 2 1.00 orr p0.b, p0/z, p0.b, p1.b +# CHECK-NEXT: 1 3 1.00 orr z0.d, z0.d, #0x6 +# CHECK-NEXT: 1 3 1.00 orr z0.d, z0.d, #0xfffffffffffffff9 +# CHECK-NEXT: 1 3 1.00 orr z0.s, z0.s, #0x6 +# CHECK-NEXT: 1 3 1.00 orr z0.s, z0.s, #0xfffffff9 +# CHECK-NEXT: 1 3 1.00 orr z23.d, z13.d, z8.d +# CHECK-NEXT: 1 3 1.00 orr z23.h, z23.h, #0x6 +# CHECK-NEXT: 1 3 1.00 orr z23.h, z23.h, #0xfff9 +# CHECK-NEXT: 1 3 1.00 orr z31.b, p7/m, z31.b, z31.b +# CHECK-NEXT: 1 3 1.00 orr z31.d, p7/m, z31.d, z31.d +# CHECK-NEXT: 1 3 1.00 orr z31.h, p7/m, z31.h, z31.h +# CHECK-NEXT: 1 3 1.00 orr z31.s, p7/m, z31.s, z31.s +# CHECK-NEXT: 1 3 1.00 orr z5.b, z5.b, #0x6 +# CHECK-NEXT: 1 3 1.00 orr z5.b, z5.b, #0xf9 +# CHECK-NEXT: 1 2 1.00 orrs p0.b, p0/z, p0.b, p1.b +# CHECK-NEXT: 1 4 1.00 orv b0, p7, z31.b +# CHECK-NEXT: 1 4 1.00 orv d0, p7, z31.d +# CHECK-NEXT: 1 4 1.00 orv h0, p7, z31.h +# CHECK-NEXT: 1 4 1.00 orv s0, p7, z31.s +# CHECK-NEXT: 1 2 1.00 pfalse p15.b +# CHECK-NEXT: 1 2 1.00 pfirst p0.b, p15, p0.b +# CHECK-NEXT: 1 2 1.00 pfirst p15.b, p15, p15.b +# CHECK-NEXT: 1 4 1.00 pmul z0.b, z1.b, z2.b +# CHECK-NEXT: 1 4 1.00 pmul z29.b, z30.b, z31.b +# CHECK-NEXT: 1 9 1.00 pmullb z0.h, z1.b, z2.b +# CHECK-NEXT: 1 9 1.00 pmullb z29.q, z30.d, z31.d +# CHECK-NEXT: 1 9 1.00 pmullb z31.d, z31.s, z31.s +# CHECK-NEXT: 1 9 1.00 pmullt z0.h, z1.b, z2.b +# CHECK-NEXT: 1 9 1.00 pmullt z29.q, z30.d, z31.d +# CHECK-NEXT: 1 9 1.00 pmullt z31.d, z31.s, z31.s +# CHECK-NEXT: 1 2 1.00 pnext p0.b, p15, p0.b +# CHECK-NEXT: 1 2 1.00 pnext p0.d, p15, p0.d +# CHECK-NEXT: 1 2 1.00 pnext p0.h, p15, p0.h +# CHECK-NEXT: 1 2 1.00 pnext p0.s, p15, p0.s +# CHECK-NEXT: 1 2 1.00 pnext p15.b, p15, p15.b +# CHECK-NEXT: 1 0 1.00 * * U prfb #14, p0, [x0] +# CHECK-NEXT: 1 0 1.00 * * U prfb #15, p0, [x0] +# CHECK-NEXT: 1 0 1.00 * * U prfb #6, p0, [x0] +# CHECK-NEXT: 1 0 1.00 * * U prfb #7, p0, [x0] +# CHECK-NEXT: 1 0 1.00 * * U prfb #7, p3, [z13.s, #31] +# CHECK-NEXT: 1 0 1.00 * * U prfb #7, p3, [z13.s] +# CHECK-NEXT: 1 0 1.00 * * U prfb pldl1keep, p0, [x0, z0.d, uxtw] +# CHECK-NEXT: 1 0 1.00 * * U prfb pldl1keep, p0, [x0, z0.d] +# CHECK-NEXT: 1 0 1.00 * * U prfb pldl1keep, p0, [x0, z0.s, uxtw] +# CHECK-NEXT: 1 0 1.00 * * U prfb pldl1keep, p0, [x0] +# CHECK-NEXT: 1 0 1.00 * * U prfb pldl1strm, p0, [x0, #-32, mul vl] +# CHECK-NEXT: 1 0 1.00 * * U prfb pldl1strm, p0, [x0, #31, mul vl] +# CHECK-NEXT: 1 0 1.00 * * U prfb pldl1strm, p0, [x0] +# CHECK-NEXT: 1 0 1.00 * * U prfb pldl2keep, p0, [x0] +# CHECK-NEXT: 1 0 1.00 * * U prfb pldl2strm, p0, [x0] +# CHECK-NEXT: 1 0 1.00 * * U prfb pldl3keep, p0, [x0] +# CHECK-NEXT: 1 0 1.00 * * U prfb pldl3strm, p0, [x0] +# CHECK-NEXT: 1 0 1.00 * * U prfb pldl3strm, p5, [x10, z21.d, sxtw] +# CHECK-NEXT: 1 0 1.00 * * U prfb pldl3strm, p5, [x10, z21.s, uxtw] +# CHECK-NEXT: 1 0 1.00 * * U prfb pldl3strm, p5, [z10.d, #31] +# CHECK-NEXT: 1 0 1.00 * * U prfb pldl3strm, p5, [z10.d] +# CHECK-NEXT: 1 0 1.00 * * U prfb pstl1keep, p0, [x0] +# CHECK-NEXT: 1 0 1.00 * * U prfb pstl1strm, p0, [x0] +# CHECK-NEXT: 1 0 1.00 * * U prfb pstl2keep, p0, [x0] +# CHECK-NEXT: 1 0 1.00 * * U prfb pstl2strm, p0, [x0] +# CHECK-NEXT: 1 0 1.00 * * U prfb pstl3keep, p0, [x0] +# CHECK-NEXT: 1 0 1.00 * * U prfb pstl3strm, p0, [x0] +# CHECK-NEXT: 1 0 1.00 * * U prfd #14, p0, [x0] +# CHECK-NEXT: 1 0 1.00 * * U prfd #15, p0, [x0] +# CHECK-NEXT: 1 0 1.00 * * U prfd #15, p7, [z31.d, #248] +# CHECK-NEXT: 1 0 1.00 * * U prfd #15, p7, [z31.d] +# CHECK-NEXT: 1 0 1.00 * * U prfd #15, p7, [z31.s, #248] +# CHECK-NEXT: 1 0 1.00 * * U prfd #15, p7, [z31.s] +# CHECK-NEXT: 1 0 1.00 * * U prfd #6, p0, [x0] +# CHECK-NEXT: 1 0 1.00 * * U prfd #7, p0, [x0] +# CHECK-NEXT: 1 0 1.00 * * U prfd pldl1keep, p0, [x0, z0.d, lsl #3] +# CHECK-NEXT: 1 0 1.00 * * U prfd pldl1keep, p0, [x0, z0.d, sxtw #3] +# CHECK-NEXT: 1 0 1.00 * * U prfd pldl1keep, p0, [x0, z0.d, uxtw #3] +# CHECK-NEXT: 1 0 1.00 * * U prfd pldl1keep, p0, [x0, z0.s, sxtw #3] +# CHECK-NEXT: 1 0 1.00 * * U prfd pldl1keep, p0, [x0, z0.s, uxtw #3] +# CHECK-NEXT: 1 0 1.00 * * U prfd pldl1keep, p0, [x0] +# CHECK-NEXT: 1 0 1.00 * * U prfd pldl1strm, p0, [x0, #-32, mul vl] +# CHECK-NEXT: 1 0 1.00 * * U prfd pldl1strm, p0, [x0, #31, mul vl] +# CHECK-NEXT: 1 0 1.00 * * U prfd pldl1strm, p0, [x0] +# CHECK-NEXT: 1 0 1.00 * * U prfd pldl2keep, p0, [x0] +# CHECK-NEXT: 1 0 1.00 * * U prfd pldl2strm, p0, [x0] +# CHECK-NEXT: 1 0 1.00 * * U prfd pldl3keep, p0, [x0] +# CHECK-NEXT: 1 0 1.00 * * U prfd pldl3strm, p0, [x0] +# CHECK-NEXT: 1 0 1.00 * * U prfd pstl1keep, p0, [x0] +# CHECK-NEXT: 1 0 1.00 * * U prfd pstl1strm, p0, [x0] +# CHECK-NEXT: 1 0 1.00 * * U prfd pstl2keep, p0, [x0] +# CHECK-NEXT: 1 0 1.00 * * U prfd pstl2strm, p0, [x0] +# CHECK-NEXT: 1 0 1.00 * * U prfd pstl3keep, p0, [x0] +# CHECK-NEXT: 1 0 1.00 * * U prfd pstl3strm, p0, [x0] +# CHECK-NEXT: 1 0 1.00 * * U prfh #14, p0, [x0] +# CHECK-NEXT: 1 0 1.00 * * U prfh #15, p0, [x0] +# CHECK-NEXT: 1 0 1.00 * * U prfh #15, p7, [z31.d, #62] +# CHECK-NEXT: 1 0 1.00 * * U prfh #15, p7, [z31.d] +# CHECK-NEXT: 1 0 1.00 * * U prfh #15, p7, [z31.s, #62] +# CHECK-NEXT: 1 0 1.00 * * U prfh #15, p7, [z31.s] +# CHECK-NEXT: 1 0 1.00 * * U prfh #6, p0, [x0] +# CHECK-NEXT: 1 0 1.00 * * U prfh #7, p0, [x0] +# CHECK-NEXT: 1 0 1.00 * * U prfh pldl1keep, p0, [x0, z0.d, lsl #1] +# CHECK-NEXT: 1 0 1.00 * * U prfh pldl1keep, p0, [x0] +# CHECK-NEXT: 1 0 1.00 * * U prfh pldl1strm, p0, [x0, #-32, mul vl] +# CHECK-NEXT: 1 0 1.00 * * U prfh pldl1strm, p0, [x0, #31, mul vl] +# CHECK-NEXT: 1 0 1.00 * * U prfh pldl1strm, p0, [x0] +# CHECK-NEXT: 1 0 1.00 * * U prfh pldl2keep, p0, [x0] +# CHECK-NEXT: 1 0 1.00 * * U prfh pldl2strm, p0, [x0] +# CHECK-NEXT: 1 0 1.00 * * U prfh pldl3keep, p0, [x0] +# CHECK-NEXT: 1 0 1.00 * * U prfh pldl3strm, p0, [x0] +# CHECK-NEXT: 1 0 1.00 * * U prfh pldl3strm, p5, [x10, z21.d, sxtw #1] +# CHECK-NEXT: 1 0 1.00 * * U prfh pldl3strm, p5, [x10, z21.d, uxtw #1] +# CHECK-NEXT: 1 0 1.00 * * U prfh pldl3strm, p5, [x10, z21.s, sxtw #1] +# CHECK-NEXT: 1 0 1.00 * * U prfh pldl3strm, p5, [x10, z21.s, uxtw #1] +# CHECK-NEXT: 1 0 1.00 * * U prfh pstl1keep, p0, [x0] +# CHECK-NEXT: 1 0 1.00 * * U prfh pstl1strm, p0, [x0] +# CHECK-NEXT: 1 0 1.00 * * U prfh pstl2keep, p0, [x0] +# CHECK-NEXT: 1 0 1.00 * * U prfh pstl2strm, p0, [x0] +# CHECK-NEXT: 1 0 1.00 * * U prfh pstl3keep, p0, [x0] +# CHECK-NEXT: 1 0 1.00 * * U prfh pstl3strm, p0, [x0] +# CHECK-NEXT: 1 0 1.00 * * U prfw #14, p0, [x0] +# CHECK-NEXT: 1 0 1.00 * * U prfw #15, p0, [x0] +# CHECK-NEXT: 1 0 1.00 * * U prfw #15, p7, [z31.d, #124] +# CHECK-NEXT: 1 0 1.00 * * U prfw #15, p7, [z31.d] +# CHECK-NEXT: 1 0 1.00 * * U prfw #15, p7, [z31.s, #124] +# CHECK-NEXT: 1 0 1.00 * * U prfw #15, p7, [z31.s] +# CHECK-NEXT: 1 0 1.00 * * U prfw #6, p0, [x0] +# CHECK-NEXT: 1 0 1.00 * * U prfw #7, p0, [x0] +# CHECK-NEXT: 1 0 1.00 * * U prfw #7, p3, [x13, z8.d, uxtw #2] +# CHECK-NEXT: 1 0 1.00 * * U prfw pldl1keep, p0, [x0, z0.d, sxtw #2] +# CHECK-NEXT: 1 0 1.00 * * U prfw pldl1keep, p0, [x0, z0.s, uxtw #2] +# CHECK-NEXT: 1 0 1.00 * * U prfw pldl1keep, p0, [x0] +# CHECK-NEXT: 1 0 1.00 * * U prfw pldl1strm, p0, [x0, #-32, mul vl] +# CHECK-NEXT: 1 0 1.00 * * U prfw pldl1strm, p0, [x0, #31, mul vl] +# CHECK-NEXT: 1 0 1.00 * * U prfw pldl1strm, p0, [x0] +# CHECK-NEXT: 1 0 1.00 * * U prfw pldl2keep, p0, [x0] +# CHECK-NEXT: 1 0 1.00 * * U prfw pldl2strm, p0, [x0] +# CHECK-NEXT: 1 0 1.00 * * U prfw pldl3keep, p0, [x0] +# CHECK-NEXT: 1 0 1.00 * * U prfw pldl3strm, p0, [x0] +# CHECK-NEXT: 1 0 1.00 * * U prfw pldl3strm, p5, [x10, z21.d, lsl #2] +# CHECK-NEXT: 1 0 1.00 * * U prfw pldl3strm, p5, [x10, z21.s, sxtw #2] +# CHECK-NEXT: 1 0 1.00 * * U prfw pstl1keep, p0, [x0] +# CHECK-NEXT: 1 0 1.00 * * U prfw pstl1strm, p0, [x0] +# CHECK-NEXT: 1 0 1.00 * * U prfw pstl2keep, p0, [x0] +# CHECK-NEXT: 1 0 1.00 * * U prfw pstl2strm, p0, [x0] +# CHECK-NEXT: 1 0 1.00 * * U prfw pstl3keep, p0, [x0] +# CHECK-NEXT: 1 0 1.00 * * U prfw pstl3strm, p0, [x0] +# CHECK-NEXT: 1 2 1.00 ptest p15, p0.b +# CHECK-NEXT: 1 2 1.00 ptest p15, p15.b +# CHECK-NEXT: 1 2 1.00 ptrue p0.b, pow2 +# CHECK-NEXT: 1 2 1.00 ptrue p0.d, pow2 +# CHECK-NEXT: 1 2 1.00 ptrue p0.h, pow2 +# CHECK-NEXT: 1 2 1.00 ptrue p0.s, pow2 +# CHECK-NEXT: 1 2 1.00 ptrue p15.b +# CHECK-NEXT: 1 2 1.00 ptrue p15.d +# CHECK-NEXT: 1 2 1.00 ptrue p15.h +# CHECK-NEXT: 1 2 1.00 ptrue p15.s +# CHECK-NEXT: 1 2 1.00 ptrue p7.s +# CHECK-NEXT: 1 2 1.00 ptrue p7.s, #14 +# CHECK-NEXT: 1 2 1.00 ptrue p7.s, #15 +# CHECK-NEXT: 1 2 1.00 ptrue p7.s, #16 +# CHECK-NEXT: 1 2 1.00 ptrue p7.s, #17 +# CHECK-NEXT: 1 2 1.00 ptrue p7.s, #18 +# CHECK-NEXT: 1 2 1.00 ptrue p7.s, #19 +# CHECK-NEXT: 1 2 1.00 ptrue p7.s, #20 +# CHECK-NEXT: 1 2 1.00 ptrue p7.s, #21 +# CHECK-NEXT: 1 2 1.00 ptrue p7.s, #22 +# CHECK-NEXT: 1 2 1.00 ptrue p7.s, #23 +# CHECK-NEXT: 1 2 1.00 ptrue p7.s, #24 +# CHECK-NEXT: 1 2 1.00 ptrue p7.s, #25 +# CHECK-NEXT: 1 2 1.00 ptrue p7.s, #26 +# CHECK-NEXT: 1 2 1.00 ptrue p7.s, #27 +# CHECK-NEXT: 1 2 1.00 ptrue p7.s, #28 +# CHECK-NEXT: 1 2 1.00 ptrue p7.s, mul3 +# CHECK-NEXT: 1 2 1.00 ptrue p7.s, mul4 +# CHECK-NEXT: 1 2 1.00 ptrue p7.s, vl1 +# CHECK-NEXT: 1 2 1.00 ptrue p7.s, vl128 +# CHECK-NEXT: 1 2 1.00 ptrue p7.s, vl16 +# CHECK-NEXT: 1 2 1.00 ptrue p7.s, vl2 +# CHECK-NEXT: 1 2 1.00 ptrue p7.s, vl256 +# CHECK-NEXT: 1 2 1.00 ptrue p7.s, vl3 +# CHECK-NEXT: 1 2 1.00 ptrue p7.s, vl32 +# CHECK-NEXT: 1 2 1.00 ptrue p7.s, vl4 +# CHECK-NEXT: 1 2 1.00 ptrue p7.s, vl5 +# CHECK-NEXT: 1 2 1.00 ptrue p7.s, vl6 +# CHECK-NEXT: 1 2 1.00 ptrue p7.s, vl64 +# CHECK-NEXT: 1 2 1.00 ptrue p7.s, vl7 +# CHECK-NEXT: 1 2 1.00 ptrue p7.s, vl8 +# CHECK-NEXT: 1 2 1.00 ptrues p0.b, pow2 +# CHECK-NEXT: 1 2 1.00 ptrues p0.d, pow2 +# CHECK-NEXT: 1 2 1.00 ptrues p0.h, pow2 +# CHECK-NEXT: 1 2 1.00 ptrues p0.s, pow2 +# CHECK-NEXT: 1 2 1.00 ptrues p15.b +# CHECK-NEXT: 1 2 1.00 ptrues p15.d +# CHECK-NEXT: 1 2 1.00 ptrues p15.h +# CHECK-NEXT: 1 2 1.00 ptrues p15.s +# CHECK-NEXT: 1 2 1.00 ptrues p7.s +# CHECK-NEXT: 1 2 1.00 ptrues p7.s, #14 +# CHECK-NEXT: 1 2 1.00 ptrues p7.s, #15 +# CHECK-NEXT: 1 2 1.00 ptrues p7.s, #16 +# CHECK-NEXT: 1 2 1.00 ptrues p7.s, #17 +# CHECK-NEXT: 1 2 1.00 ptrues p7.s, #18 +# CHECK-NEXT: 1 2 1.00 ptrues p7.s, #19 +# CHECK-NEXT: 1 2 1.00 ptrues p7.s, #20 +# CHECK-NEXT: 1 2 1.00 ptrues p7.s, #21 +# CHECK-NEXT: 1 2 1.00 ptrues p7.s, #22 +# CHECK-NEXT: 1 2 1.00 ptrues p7.s, #23 +# CHECK-NEXT: 1 2 1.00 ptrues p7.s, #24 +# CHECK-NEXT: 1 2 1.00 ptrues p7.s, #25 +# CHECK-NEXT: 1 2 1.00 ptrues p7.s, #26 +# CHECK-NEXT: 1 2 1.00 ptrues p7.s, #27 +# CHECK-NEXT: 1 2 1.00 ptrues p7.s, #28 +# CHECK-NEXT: 1 2 1.00 ptrues p7.s, mul3 +# CHECK-NEXT: 1 2 1.00 ptrues p7.s, mul4 +# CHECK-NEXT: 1 2 1.00 ptrues p7.s, vl1 +# CHECK-NEXT: 1 2 1.00 ptrues p7.s, vl128 +# CHECK-NEXT: 1 2 1.00 ptrues p7.s, vl16 +# CHECK-NEXT: 1 2 1.00 ptrues p7.s, vl2 +# CHECK-NEXT: 1 2 1.00 ptrues p7.s, vl256 +# CHECK-NEXT: 1 2 1.00 ptrues p7.s, vl3 +# CHECK-NEXT: 1 2 1.00 ptrues p7.s, vl32 +# CHECK-NEXT: 1 2 1.00 ptrues p7.s, vl4 +# CHECK-NEXT: 1 2 1.00 ptrues p7.s, vl5 +# CHECK-NEXT: 1 2 1.00 ptrues p7.s, vl6 +# CHECK-NEXT: 1 2 1.00 ptrues p7.s, vl64 +# CHECK-NEXT: 1 2 1.00 ptrues p7.s, vl7 +# CHECK-NEXT: 1 2 1.00 ptrues p7.s, vl8 +# CHECK-NEXT: 1 2 1.00 punpkhi p0.h, p0.b +# CHECK-NEXT: 1 2 1.00 punpkhi p15.h, p15.b +# CHECK-NEXT: 1 2 1.00 punpklo p0.h, p0.b +# CHECK-NEXT: 1 2 1.00 punpklo p15.h, p15.b +# CHECK-NEXT: 1 8 1.00 raddhnb z0.b, z1.h, z31.h +# CHECK-NEXT: 1 8 1.00 raddhnb z0.h, z1.s, z31.s +# CHECK-NEXT: 1 8 1.00 raddhnb z0.s, z1.d, z31.d +# CHECK-NEXT: 1 8 1.00 raddhnt z0.b, z1.h, z31.h +# CHECK-NEXT: 1 8 1.00 raddhnt z0.h, z1.s, z31.s +# CHECK-NEXT: 1 8 1.00 raddhnt z0.s, z1.d, z31.d +# CHECK-NEXT: 1 9 1.00 rax1 z0.d, z1.d, z31.d +# CHECK-NEXT: 1 3 1.00 rbit z0.b, p7/m, z31.b +# CHECK-NEXT: 1 3 1.00 rbit z0.d, p7/m, z31.d +# CHECK-NEXT: 1 3 1.00 rbit z0.h, p7/m, z31.h +# CHECK-NEXT: 1 3 1.00 rbit z0.s, p7/m, z31.s +# CHECK-NEXT: 1 1 1.00 * U rdffr p0.b +# CHECK-NEXT: 1 3 1.00 * U rdffr p0.b, p0/z +# CHECK-NEXT: 1 1 1.00 * U rdffr p15.b +# CHECK-NEXT: 1 3 1.00 * U rdffr p15.b, p15/z +# CHECK-NEXT: 1 3 1.00 U rdffrs p0.b, p0/z +# CHECK-NEXT: 1 3 1.00 U rdffrs p15.b, p15/z +# CHECK-NEXT: 1 1 1.00 rdvl x0, #0 +# CHECK-NEXT: 1 1 1.00 rdvl x21, #-32 +# CHECK-NEXT: 1 1 1.00 rdvl x23, #31 +# CHECK-NEXT: 1 1 1.00 rdvl xzr, #-1 +# CHECK-NEXT: 1 3 1.00 rev z0.b, z31.b +# CHECK-NEXT: 1 3 1.00 rev z0.d, z31.d +# CHECK-NEXT: 1 3 1.00 rev z0.h, z31.h +# CHECK-NEXT: 1 3 1.00 rev z0.s, z31.s +# CHECK-NEXT: 1 3 1.00 revb z0.d, p7/m, z31.d +# CHECK-NEXT: 1 3 1.00 revb z0.h, p7/m, z31.h +# CHECK-NEXT: 1 3 1.00 revb z0.s, p7/m, z31.s +# CHECK-NEXT: 1 3 1.00 revh z0.d, p7/m, z31.d +# CHECK-NEXT: 1 3 1.00 revh z0.s, p7/m, z31.s +# CHECK-NEXT: 1 3 1.00 revw z0.d, p7/m, z31.d +# CHECK-NEXT: 1 4 1.00 rshrnb z0.b, z0.h, #1 +# CHECK-NEXT: 1 4 1.00 rshrnb z0.h, z0.s, #1 +# CHECK-NEXT: 1 4 1.00 rshrnb z0.s, z0.d, #1 +# CHECK-NEXT: 1 4 1.00 rshrnb z31.b, z31.h, #8 +# CHECK-NEXT: 1 4 1.00 rshrnb z31.h, z31.s, #16 +# CHECK-NEXT: 1 4 1.00 rshrnb z31.s, z31.d, #32 +# CHECK-NEXT: 1 4 1.00 rshrnt z0.b, z0.h, #1 +# CHECK-NEXT: 1 4 1.00 rshrnt z0.h, z0.s, #1 +# CHECK-NEXT: 1 4 1.00 rshrnt z0.s, z0.d, #1 +# CHECK-NEXT: 1 4 1.00 rshrnt z31.b, z31.h, #8 +# CHECK-NEXT: 1 4 1.00 rshrnt z31.h, z31.s, #16 +# CHECK-NEXT: 1 4 1.00 rshrnt z31.s, z31.d, #32 +# CHECK-NEXT: 1 8 1.00 rsubhnb z0.b, z1.h, z31.h +# CHECK-NEXT: 1 8 1.00 rsubhnb z0.h, z1.s, z31.s +# CHECK-NEXT: 1 8 1.00 rsubhnb z0.s, z1.d, z31.d +# CHECK-NEXT: 1 8 1.00 rsubhnt z0.b, z1.h, z31.h +# CHECK-NEXT: 1 8 1.00 rsubhnt z0.h, z1.s, z31.s +# CHECK-NEXT: 1 8 1.00 rsubhnt z0.s, z1.d, z31.d +# CHECK-NEXT: 1 6 2.00 saba z0.b, z1.b, z31.b +# CHECK-NEXT: 1 6 2.00 saba z0.d, z1.d, z31.d +# CHECK-NEXT: 1 6 2.00 saba z0.h, z1.h, z31.h +# CHECK-NEXT: 1 6 2.00 saba z0.s, z1.s, z31.s +# CHECK-NEXT: 1 6 2.00 sabalb z0.d, z1.s, z31.s +# CHECK-NEXT: 1 6 2.00 sabalb z0.h, z1.b, z31.b +# CHECK-NEXT: 1 6 2.00 sabalb z0.s, z1.h, z31.h +# CHECK-NEXT: 1 6 2.00 sabalt z0.d, z1.s, z31.s +# CHECK-NEXT: 1 6 2.00 sabalt z0.h, z1.b, z31.b +# CHECK-NEXT: 1 6 2.00 sabalt z0.s, z1.h, z31.h +# CHECK-NEXT: 1 3 1.00 sabd z31.b, p7/m, z31.b, z31.b +# CHECK-NEXT: 1 3 1.00 sabd z31.d, p7/m, z31.d, z31.d +# CHECK-NEXT: 1 3 1.00 sabd z31.h, p7/m, z31.h, z31.h +# CHECK-NEXT: 1 3 1.00 sabd z31.s, p7/m, z31.s, z31.s +# CHECK-NEXT: 1 3 1.00 sabdlb z0.h, z1.b, z2.b +# CHECK-NEXT: 1 3 1.00 sabdlb z29.s, z30.h, z31.h +# CHECK-NEXT: 1 3 1.00 sabdlb z31.d, z31.s, z31.s +# CHECK-NEXT: 1 3 1.00 sabdlt z0.h, z1.b, z2.b +# CHECK-NEXT: 1 3 1.00 sabdlt z29.s, z30.h, z31.h +# CHECK-NEXT: 1 3 1.00 sabdlt z31.d, z31.s, z31.s +# CHECK-NEXT: 1 7 2.00 sadalp z0.h, p0/m, z1.b +# CHECK-NEXT: 1 7 2.00 sadalp z29.s, p0/m, z30.h +# CHECK-NEXT: 1 7 2.00 sadalp z30.d, p7/m, z31.s +# CHECK-NEXT: 1 4 1.00 saddlb z0.h, z1.b, z2.b +# CHECK-NEXT: 1 4 1.00 saddlb z29.s, z30.h, z31.h +# CHECK-NEXT: 1 4 1.00 saddlb z31.d, z31.s, z31.s +# CHECK-NEXT: 1 4 1.00 saddlbt z0.d, z1.s, z31.s +# CHECK-NEXT: 1 4 1.00 saddlbt z0.h, z1.b, z31.b +# CHECK-NEXT: 1 4 1.00 saddlbt z0.s, z1.h, z31.h +# CHECK-NEXT: 1 4 1.00 saddlt z0.h, z1.b, z2.b +# CHECK-NEXT: 1 4 1.00 saddlt z29.s, z30.h, z31.h +# CHECK-NEXT: 1 4 1.00 saddlt z31.d, z31.s, z31.s +# CHECK-NEXT: 1 4 1.00 saddv d0, p7, z31.b +# CHECK-NEXT: 1 4 1.00 saddv d0, p7, z31.h +# CHECK-NEXT: 1 4 1.00 saddv d0, p7, z31.s +# CHECK-NEXT: 1 4 1.00 saddwb z0.h, z1.h, z2.b +# CHECK-NEXT: 1 4 1.00 saddwb z29.s, z30.s, z31.h +# CHECK-NEXT: 1 4 1.00 saddwb z31.d, z31.d, z31.s +# CHECK-NEXT: 1 4 1.00 saddwt z0.h, z1.h, z2.b +# CHECK-NEXT: 1 4 1.00 saddwt z29.s, z30.s, z31.h +# CHECK-NEXT: 1 4 1.00 saddwt z31.d, z31.d, z31.s +# CHECK-NEXT: 1 4 1.00 sbclb z0.d, z1.d, z31.d +# CHECK-NEXT: 1 4 1.00 sbclb z0.s, z1.s, z31.s +# CHECK-NEXT: 1 4 1.00 sbclt z0.d, z1.d, z31.d +# CHECK-NEXT: 1 4 1.00 sbclt z0.s, z1.s, z31.s +# CHECK-NEXT: 1 4 1.00 scvtf z0.d, p0/m, z0.d +# CHECK-NEXT: 1 4 1.00 scvtf z0.d, p0/m, z0.s +# CHECK-NEXT: 1 4 1.00 scvtf z0.h, p0/m, z0.d +# CHECK-NEXT: 1 4 1.00 scvtf z0.h, p0/m, z0.h +# CHECK-NEXT: 1 4 1.00 scvtf z0.h, p0/m, z0.s +# CHECK-NEXT: 1 4 1.00 scvtf z0.s, p0/m, z0.d +# CHECK-NEXT: 1 4 1.00 scvtf z0.s, p0/m, z0.s +# CHECK-NEXT: 1 26 23.00 sdiv z0.d, p7/m, z0.d, z31.d +# CHECK-NEXT: 1 15 12.00 sdiv z0.s, p7/m, z0.s, z31.s +# CHECK-NEXT: 1 26 23.00 sdivr z0.d, p7/m, z0.d, z31.d +# CHECK-NEXT: 1 15 12.00 sdivr z0.s, p7/m, z0.s, z31.s +# CHECK-NEXT: 1 4 1.00 sdot z0.d, z1.h, z15.h[1] +# CHECK-NEXT: 1 4 1.00 sdot z0.d, z1.h, z31.h +# CHECK-NEXT: 1 4 1.00 sdot z0.s, z1.b, z31.b +# CHECK-NEXT: 1 4 1.00 sdot z0.s, z1.b, z7.b[3] +# CHECK-NEXT: 1 3 1.00 sel z23.b, p11, z13.b, z8.b +# CHECK-NEXT: 1 3 1.00 sel z23.d, p11, z13.d, z8.d +# CHECK-NEXT: 1 3 1.00 sel z23.h, p11, z13.h, z8.h +# CHECK-NEXT: 1 3 1.00 sel z23.s, p11, z13.s, z8.s +# CHECK-NEXT: 1 1 1.00 * U setffr +# CHECK-NEXT: 1 3 1.00 shadd z0.b, p0/m, z0.b, z1.b +# CHECK-NEXT: 1 3 1.00 shadd z0.h, p0/m, z0.h, z1.h +# CHECK-NEXT: 1 3 1.00 shadd z29.s, p7/m, z29.s, z30.s +# CHECK-NEXT: 1 3 1.00 shadd z31.d, p7/m, z31.d, z30.d +# CHECK-NEXT: 1 3 1.00 shrnb z0.b, z0.h, #1 +# CHECK-NEXT: 1 3 1.00 shrnb z0.h, z0.s, #1 +# CHECK-NEXT: 1 3 1.00 shrnb z0.s, z0.d, #1 +# CHECK-NEXT: 1 3 1.00 shrnb z31.b, z31.h, #8 +# CHECK-NEXT: 1 3 1.00 shrnb z31.h, z31.s, #16 +# CHECK-NEXT: 1 3 1.00 shrnb z31.s, z31.d, #32 +# CHECK-NEXT: 1 3 1.00 shrnt z0.b, z0.h, #1 +# CHECK-NEXT: 1 3 1.00 shrnt z0.h, z0.s, #1 +# CHECK-NEXT: 1 3 1.00 shrnt z0.s, z0.d, #1 +# CHECK-NEXT: 1 3 1.00 shrnt z31.b, z31.h, #8 +# CHECK-NEXT: 1 3 1.00 shrnt z31.h, z31.s, #16 +# CHECK-NEXT: 1 3 1.00 shrnt z31.s, z31.d, #32 +# CHECK-NEXT: 1 3 1.00 shsub z0.b, p0/m, z0.b, z1.b +# CHECK-NEXT: 1 3 1.00 shsub z0.h, p0/m, z0.h, z1.h +# CHECK-NEXT: 1 3 1.00 shsub z29.s, p7/m, z29.s, z30.s +# CHECK-NEXT: 1 3 1.00 shsub z31.d, p7/m, z31.d, z30.d +# CHECK-NEXT: 1 3 1.00 shsubr z0.b, p0/m, z0.b, z1.b +# CHECK-NEXT: 1 3 1.00 shsubr z0.h, p0/m, z0.h, z1.h +# CHECK-NEXT: 1 3 1.00 shsubr z29.s, p7/m, z29.s, z30.s +# CHECK-NEXT: 1 3 1.00 shsubr z31.d, p7/m, z31.d, z30.d +# CHECK-NEXT: 1 3 1.00 sli z0.b, z0.b, #0 +# CHECK-NEXT: 1 3 1.00 sli z0.d, z0.d, #0 +# CHECK-NEXT: 1 3 1.00 sli z0.h, z0.h, #0 +# CHECK-NEXT: 1 3 1.00 sli z0.s, z0.s, #0 +# CHECK-NEXT: 1 3 1.00 sli z31.b, z31.b, #7 +# CHECK-NEXT: 1 3 1.00 sli z31.d, z31.d, #63 +# CHECK-NEXT: 1 3 1.00 sli z31.h, z31.h, #15 +# CHECK-NEXT: 1 3 1.00 sli z31.s, z31.s, #31 +# CHECK-NEXT: 1 9 1.00 sm4e z0.s, z0.s, z31.s +# CHECK-NEXT: 1 9 1.00 sm4ekey z0.s, z1.s, z31.s +# CHECK-NEXT: 1 3 1.00 smax z0.b, z0.b, #-128 +# CHECK-NEXT: 1 3 1.00 smax z0.d, z0.d, #-128 +# CHECK-NEXT: 1 3 1.00 smax z0.h, z0.h, #-128 +# CHECK-NEXT: 1 3 1.00 smax z0.s, z0.s, #-128 +# CHECK-NEXT: 1 3 1.00 smax z31.b, p7/m, z31.b, z31.b +# CHECK-NEXT: 1 3 1.00 smax z31.b, z31.b, #127 +# CHECK-NEXT: 1 3 1.00 smax z31.d, p7/m, z31.d, z31.d +# CHECK-NEXT: 1 3 1.00 smax z31.d, z31.d, #127 +# CHECK-NEXT: 1 3 1.00 smax z31.h, p7/m, z31.h, z31.h +# CHECK-NEXT: 1 3 1.00 smax z31.h, z31.h, #127 +# CHECK-NEXT: 1 3 1.00 smax z31.s, p7/m, z31.s, z31.s +# CHECK-NEXT: 1 3 1.00 smax z31.s, z31.s, #127 +# CHECK-NEXT: 1 3 1.00 smaxp z0.b, p0/m, z0.b, z1.b +# CHECK-NEXT: 1 3 1.00 smaxp z0.h, p0/m, z0.h, z1.h +# CHECK-NEXT: 1 3 1.00 smaxp z29.s, p7/m, z29.s, z30.s +# CHECK-NEXT: 1 3 1.00 smaxp z31.d, p7/m, z31.d, z30.d +# CHECK-NEXT: 1 4 1.00 smaxv b0, p7, z31.b +# CHECK-NEXT: 1 4 1.00 smaxv d0, p7, z31.d +# CHECK-NEXT: 1 4 1.00 smaxv h0, p7, z31.h +# CHECK-NEXT: 1 4 1.00 smaxv s0, p7, z31.s +# CHECK-NEXT: 1 3 1.00 smin z0.b, z0.b, #-128 +# CHECK-NEXT: 1 3 1.00 smin z0.d, z0.d, #-128 +# CHECK-NEXT: 1 3 1.00 smin z0.h, z0.h, #-128 +# CHECK-NEXT: 1 3 1.00 smin z0.s, z0.s, #-128 +# CHECK-NEXT: 1 3 1.00 smin z31.b, p7/m, z31.b, z31.b +# CHECK-NEXT: 1 3 1.00 smin z31.b, z31.b, #127 +# CHECK-NEXT: 1 3 1.00 smin z31.d, p7/m, z31.d, z31.d +# CHECK-NEXT: 1 3 1.00 smin z31.d, z31.d, #127 +# CHECK-NEXT: 1 3 1.00 smin z31.h, p7/m, z31.h, z31.h +# CHECK-NEXT: 1 3 1.00 smin z31.h, z31.h, #127 +# CHECK-NEXT: 1 3 1.00 smin z31.s, p7/m, z31.s, z31.s +# CHECK-NEXT: 1 3 1.00 smin z31.s, z31.s, #127 +# CHECK-NEXT: 1 3 1.00 sminp z0.b, p0/m, z0.b, z1.b +# CHECK-NEXT: 1 3 1.00 sminp z0.h, p0/m, z0.h, z1.h +# CHECK-NEXT: 1 3 1.00 sminp z29.s, p7/m, z29.s, z30.s +# CHECK-NEXT: 1 3 1.00 sminp z31.d, p7/m, z31.d, z30.d +# CHECK-NEXT: 1 4 1.00 sminv b0, p7, z31.b +# CHECK-NEXT: 1 4 1.00 sminv d0, p7, z31.d +# CHECK-NEXT: 1 4 1.00 sminv h0, p7, z31.h +# CHECK-NEXT: 1 4 1.00 sminv s0, p7, z31.s +# CHECK-NEXT: 1 4 1.00 smlalb z0.d, z1.s, z15.s[1] +# CHECK-NEXT: 1 4 1.00 smlalb z0.d, z1.s, z31.s +# CHECK-NEXT: 1 4 1.00 smlalb z0.h, z1.b, z31.b +# CHECK-NEXT: 1 4 1.00 smlalb z0.s, z1.h, z31.h +# CHECK-NEXT: 1 4 1.00 smlalb z0.s, z1.h, z7.h[7] +# CHECK-NEXT: 1 4 1.00 smlalt z0.d, z1.s, z15.s[1] +# CHECK-NEXT: 1 4 1.00 smlalt z0.d, z1.s, z31.s +# CHECK-NEXT: 1 4 1.00 smlalt z0.h, z1.b, z31.b +# CHECK-NEXT: 1 4 1.00 smlalt z0.s, z1.h, z31.h +# CHECK-NEXT: 1 4 1.00 smlalt z0.s, z1.h, z7.h[7] +# CHECK-NEXT: 1 4 1.00 smlslb z0.d, z1.s, z15.s[1] +# CHECK-NEXT: 1 4 1.00 smlslb z0.d, z1.s, z31.s +# CHECK-NEXT: 1 4 1.00 smlslb z0.h, z1.b, z31.b +# CHECK-NEXT: 1 4 1.00 smlslb z0.s, z1.h, z31.h +# CHECK-NEXT: 1 4 1.00 smlslb z0.s, z1.h, z7.h[7] +# CHECK-NEXT: 1 4 1.00 smlslt z0.d, z1.s, z15.s[1] +# CHECK-NEXT: 1 4 1.00 smlslt z0.d, z1.s, z31.s +# CHECK-NEXT: 1 4 1.00 smlslt z0.h, z1.b, z31.b +# CHECK-NEXT: 1 4 1.00 smlslt z0.s, z1.h, z31.h +# CHECK-NEXT: 1 4 1.00 smlslt z0.s, z1.h, z7.h[7] +# CHECK-NEXT: 1 4 1.00 smmla z0.s, z1.b, z2.b +# CHECK-NEXT: 1 4 1.00 smulh z0.b, p7/m, z0.b, z31.b +# CHECK-NEXT: 1 4 1.00 smulh z0.b, z1.b, z2.b +# CHECK-NEXT: 1 4 1.00 smulh z0.d, p7/m, z0.d, z31.d +# CHECK-NEXT: 1 4 1.00 smulh z0.h, p7/m, z0.h, z31.h +# CHECK-NEXT: 1 4 1.00 smulh z0.h, z1.h, z2.h +# CHECK-NEXT: 1 4 1.00 smulh z0.s, p7/m, z0.s, z31.s +# CHECK-NEXT: 1 4 1.00 smulh z29.s, z30.s, z31.s +# CHECK-NEXT: 1 4 1.00 smulh z31.d, z31.d, z31.d +# CHECK-NEXT: 1 4 1.00 smullb z0.d, z1.s, z15.s[1] +# CHECK-NEXT: 1 4 1.00 smullb z0.h, z1.b, z2.b +# CHECK-NEXT: 1 4 1.00 smullb z0.s, z1.h, z7.h[7] +# CHECK-NEXT: 1 4 1.00 smullb z29.s, z30.h, z31.h +# CHECK-NEXT: 1 4 1.00 smullb z31.d, z31.s, z31.s +# CHECK-NEXT: 1 4 1.00 smullt z0.d, z1.s, z15.s[1] +# CHECK-NEXT: 1 4 1.00 smullt z0.h, z1.b, z2.b +# CHECK-NEXT: 1 4 1.00 smullt z0.s, z1.h, z7.h[7] +# CHECK-NEXT: 1 4 1.00 smullt z29.s, z30.h, z31.h +# CHECK-NEXT: 1 4 1.00 smullt z31.d, z31.s, z31.s +# CHECK-NEXT: 1 4 1.00 splice z29.b, p7, { z30.b, z31.b } +# CHECK-NEXT: 1 4 1.00 splice z29.d, p7, { z30.d, z31.d } +# CHECK-NEXT: 1 4 1.00 splice z29.h, p7, { z30.h, z31.h } +# CHECK-NEXT: 1 4 1.00 splice z29.s, p7, { z30.s, z31.s } +# CHECK-NEXT: 1 4 1.00 splice z31.b, p7, z31.b, z31.b +# CHECK-NEXT: 1 4 1.00 splice z31.d, p7, z31.d, z31.d +# CHECK-NEXT: 1 4 1.00 splice z31.h, p7, z31.h, z31.h +# CHECK-NEXT: 1 4 1.00 splice z31.s, p7, z31.s, z31.s +# CHECK-NEXT: 1 4 1.00 sqabs z31.b, p7/m, z31.b +# CHECK-NEXT: 1 4 1.00 sqabs z31.d, p7/m, z31.d +# CHECK-NEXT: 1 4 1.00 sqabs z31.h, p7/m, z31.h +# CHECK-NEXT: 1 4 1.00 sqabs z31.s, p7/m, z31.s +# CHECK-NEXT: 1 4 1.00 sqadd z0.b, p0/m, z0.b, z1.b +# CHECK-NEXT: 1 4 1.00 sqadd z0.b, z0.b, #0 +# CHECK-NEXT: 1 4 1.00 sqadd z0.b, z0.b, z0.b +# CHECK-NEXT: 1 4 1.00 sqadd z0.d, z0.d, #0 +# CHECK-NEXT: 1 4 1.00 sqadd z0.d, z0.d, #0, lsl #8 +# CHECK-NEXT: 1 4 1.00 sqadd z0.d, z0.d, z0.d +# CHECK-NEXT: 1 4 1.00 sqadd z0.h, p0/m, z0.h, z1.h +# CHECK-NEXT: 1 4 1.00 sqadd z0.h, z0.h, #0 +# CHECK-NEXT: 1 4 1.00 sqadd z0.h, z0.h, #0, lsl #8 +# CHECK-NEXT: 1 4 1.00 sqadd z0.h, z0.h, z0.h +# CHECK-NEXT: 1 4 1.00 sqadd z0.s, z0.s, #0 +# CHECK-NEXT: 1 4 1.00 sqadd z0.s, z0.s, #0, lsl #8 +# CHECK-NEXT: 1 4 1.00 sqadd z0.s, z0.s, z0.s +# CHECK-NEXT: 1 4 1.00 sqadd z29.s, p7/m, z29.s, z30.s +# CHECK-NEXT: 1 4 1.00 sqadd z31.b, z31.b, #255 +# CHECK-NEXT: 1 4 1.00 sqadd z31.d, p7/m, z31.d, z30.d +# CHECK-NEXT: 1 4 1.00 sqadd z31.d, z31.d, #65280 +# CHECK-NEXT: 1 4 1.00 sqadd z31.h, z31.h, #65280 +# CHECK-NEXT: 1 4 1.00 sqadd z31.s, z31.s, #65280 +# CHECK-NEXT: 1 4 1.00 sqcadd z0.b, z0.b, z0.b, #90 +# CHECK-NEXT: 1 4 1.00 sqcadd z0.d, z0.d, z0.d, #90 +# CHECK-NEXT: 1 4 1.00 sqcadd z0.h, z0.h, z0.h, #90 +# CHECK-NEXT: 1 4 1.00 sqcadd z0.s, z0.s, z0.s, #90 +# CHECK-NEXT: 1 4 1.00 sqcadd z31.b, z31.b, z31.b, #270 +# CHECK-NEXT: 1 4 1.00 sqcadd z31.d, z31.d, z31.d, #270 +# CHECK-NEXT: 1 4 1.00 sqcadd z31.h, z31.h, z31.h, #270 +# CHECK-NEXT: 1 4 1.00 sqcadd z31.s, z31.s, z31.s, #270 +# CHECK-NEXT: 1 5 1.00 sqdecb x0 +# CHECK-NEXT: 1 5 1.00 sqdecb x0, #14 +# CHECK-NEXT: 1 5 1.00 sqdecb x0, all, mul #16 +# CHECK-NEXT: 1 5 1.00 sqdecb x0, pow2 +# CHECK-NEXT: 1 5 1.00 sqdecb x0, vl1 +# CHECK-NEXT: 1 5 1.00 sqdecb x0, w0 +# CHECK-NEXT: 1 5 1.00 sqdecb x0, w0, all, mul #16 +# CHECK-NEXT: 1 5 1.00 sqdecb x0, w0, pow2 +# CHECK-NEXT: 1 5 1.00 sqdecb x0, w0, pow2, mul #16 +# CHECK-NEXT: 1 5 1.00 sqdecd x0 +# CHECK-NEXT: 1 5 1.00 sqdecd x0, #14 +# CHECK-NEXT: 1 5 1.00 sqdecd x0, all, mul #16 +# CHECK-NEXT: 1 5 1.00 sqdecd x0, pow2 +# CHECK-NEXT: 1 5 1.00 sqdecd x0, vl1 +# CHECK-NEXT: 1 5 1.00 sqdecd x0, w0 +# CHECK-NEXT: 1 5 1.00 sqdecd x0, w0, all, mul #16 +# CHECK-NEXT: 1 5 1.00 sqdecd x0, w0, pow2 +# CHECK-NEXT: 1 5 1.00 sqdecd x0, w0, pow2, mul #16 +# CHECK-NEXT: 1 4 1.00 sqdecd z0.d +# CHECK-NEXT: 1 4 1.00 sqdecd z0.d, all, mul #16 +# CHECK-NEXT: 1 4 1.00 sqdecd z0.d, pow2 +# CHECK-NEXT: 1 4 1.00 sqdecd z0.d, pow2, mul #16 +# CHECK-NEXT: 1 5 1.00 sqdech x0 +# CHECK-NEXT: 1 5 1.00 sqdech x0, #14 +# CHECK-NEXT: 1 5 1.00 sqdech x0, all, mul #16 +# CHECK-NEXT: 1 5 1.00 sqdech x0, pow2 +# CHECK-NEXT: 1 5 1.00 sqdech x0, vl1 +# CHECK-NEXT: 1 5 1.00 sqdech x0, w0 +# CHECK-NEXT: 1 5 1.00 sqdech x0, w0, all, mul #16 +# CHECK-NEXT: 1 5 1.00 sqdech x0, w0, pow2 +# CHECK-NEXT: 1 5 1.00 sqdech x0, w0, pow2, mul #16 +# CHECK-NEXT: 1 4 1.00 sqdech z0.h +# CHECK-NEXT: 1 4 1.00 sqdech z0.h, all, mul #16 +# CHECK-NEXT: 1 4 1.00 sqdech z0.h, pow2 +# CHECK-NEXT: 1 4 1.00 sqdech z0.h, pow2, mul #16 +# CHECK-NEXT: 1 9 1.00 sqdecp x0, p0.b +# CHECK-NEXT: 1 9 1.00 sqdecp x0, p0.d +# CHECK-NEXT: 1 9 1.00 sqdecp x0, p0.h +# CHECK-NEXT: 1 9 1.00 sqdecp x0, p0.s +# CHECK-NEXT: 1 9 1.00 sqdecp xzr, p15.b, wzr +# CHECK-NEXT: 1 9 1.00 sqdecp xzr, p15.d, wzr +# CHECK-NEXT: 1 9 1.00 sqdecp xzr, p15.h, wzr +# CHECK-NEXT: 1 9 1.00 sqdecp xzr, p15.s, wzr +# CHECK-NEXT: 1 3 1.00 sqdecp z0.d, p0.d +# CHECK-NEXT: 1 3 1.00 sqdecp z0.h, p0.h +# CHECK-NEXT: 1 3 1.00 sqdecp z0.s, p0.s +# CHECK-NEXT: 1 5 1.00 sqdecw x0 +# CHECK-NEXT: 1 5 1.00 sqdecw x0, #14 +# CHECK-NEXT: 1 5 1.00 sqdecw x0, all, mul #16 +# CHECK-NEXT: 1 5 1.00 sqdecw x0, pow2 +# CHECK-NEXT: 1 5 1.00 sqdecw x0, vl1 +# CHECK-NEXT: 1 5 1.00 sqdecw x0, w0 +# CHECK-NEXT: 1 5 1.00 sqdecw x0, w0, all, mul #16 +# CHECK-NEXT: 1 5 1.00 sqdecw x0, w0, pow2 +# CHECK-NEXT: 1 5 1.00 sqdecw x0, w0, pow2, mul #16 +# CHECK-NEXT: 1 4 1.00 sqdecw z0.s +# CHECK-NEXT: 1 4 1.00 sqdecw z0.s, all, mul #16 +# CHECK-NEXT: 1 4 1.00 sqdecw z0.s, pow2 +# CHECK-NEXT: 1 4 1.00 sqdecw z0.s, pow2, mul #16 +# CHECK-NEXT: 1 4 1.00 sqdmlalb z0.d, z1.s, z15.s[3] +# CHECK-NEXT: 1 4 1.00 sqdmlalb z0.d, z1.s, z31.s +# CHECK-NEXT: 1 4 1.00 sqdmlalb z0.h, z1.b, z31.b +# CHECK-NEXT: 1 4 1.00 sqdmlalb z0.s, z1.h, z31.h +# CHECK-NEXT: 1 4 1.00 sqdmlalb z0.s, z1.h, z7.h[7] +# CHECK-NEXT: 1 4 1.00 sqdmlalbt z0.d, z1.s, z31.s +# CHECK-NEXT: 1 4 1.00 sqdmlalbt z0.h, z1.b, z31.b +# CHECK-NEXT: 1 4 1.00 sqdmlalbt z0.s, z1.h, z31.h +# CHECK-NEXT: 1 4 1.00 sqdmlalt z0.d, z1.s, z15.s[3] +# CHECK-NEXT: 1 4 1.00 sqdmlalt z0.d, z1.s, z31.s +# CHECK-NEXT: 1 4 1.00 sqdmlalt z0.h, z1.b, z31.b +# CHECK-NEXT: 1 4 1.00 sqdmlalt z0.s, z1.h, z31.h +# CHECK-NEXT: 1 4 1.00 sqdmlalt z0.s, z1.h, z7.h[7] +# CHECK-NEXT: 1 4 1.00 sqdmlslb z0.d, z1.s, z15.s[3] +# CHECK-NEXT: 1 4 1.00 sqdmlslb z0.d, z1.s, z31.s +# CHECK-NEXT: 1 4 1.00 sqdmlslb z0.h, z1.b, z31.b +# CHECK-NEXT: 1 4 1.00 sqdmlslb z0.s, z1.h, z31.h +# CHECK-NEXT: 1 4 1.00 sqdmlslb z0.s, z1.h, z7.h[7] +# CHECK-NEXT: 1 4 1.00 sqdmlslbt z0.d, z1.s, z31.s +# CHECK-NEXT: 1 4 1.00 sqdmlslbt z0.h, z1.b, z31.b +# CHECK-NEXT: 1 4 1.00 sqdmlslbt z0.s, z1.h, z31.h +# CHECK-NEXT: 1 4 1.00 sqdmlslt z0.d, z1.s, z15.s[3] +# CHECK-NEXT: 1 4 1.00 sqdmlslt z0.d, z1.s, z31.s +# CHECK-NEXT: 1 4 1.00 sqdmlslt z0.h, z1.b, z31.b +# CHECK-NEXT: 1 4 1.00 sqdmlslt z0.s, z1.h, z31.h +# CHECK-NEXT: 1 4 1.00 sqdmlslt z0.s, z1.h, z7.h[7] +# CHECK-NEXT: 1 4 1.00 sqdmulh z0.b, z1.b, z2.b +# CHECK-NEXT: 1 4 1.00 sqdmulh z0.d, z1.d, z15.d[1] +# CHECK-NEXT: 1 4 1.00 sqdmulh z0.h, z1.h, z2.h +# CHECK-NEXT: 1 4 1.00 sqdmulh z0.h, z1.h, z7.h[7] +# CHECK-NEXT: 1 4 1.00 sqdmulh z0.s, z1.s, z7.s[3] +# CHECK-NEXT: 1 4 1.00 sqdmulh z29.s, z30.s, z31.s +# CHECK-NEXT: 1 4 1.00 sqdmulh z31.d, z31.d, z31.d +# CHECK-NEXT: 1 4 1.00 sqdmullb z0.d, z1.s, z15.s[1] +# CHECK-NEXT: 1 4 1.00 sqdmullb z0.h, z1.b, z2.b +# CHECK-NEXT: 1 4 1.00 sqdmullb z0.s, z1.h, z7.h[7] +# CHECK-NEXT: 1 4 1.00 sqdmullb z29.s, z30.h, z31.h +# CHECK-NEXT: 1 4 1.00 sqdmullb z31.d, z31.s, z31.s +# CHECK-NEXT: 1 4 1.00 sqdmullt z0.d, z1.s, z15.s[1] +# CHECK-NEXT: 1 4 1.00 sqdmullt z0.h, z1.b, z2.b +# CHECK-NEXT: 1 4 1.00 sqdmullt z0.s, z1.h, z7.h[7] +# CHECK-NEXT: 1 4 1.00 sqdmullt z29.s, z30.h, z31.h +# CHECK-NEXT: 1 4 1.00 sqdmullt z31.d, z31.s, z31.s +# CHECK-NEXT: 1 5 1.00 sqincb x0 +# CHECK-NEXT: 1 5 1.00 sqincb x0, #14 +# CHECK-NEXT: 1 5 1.00 sqincb x0, all, mul #16 +# CHECK-NEXT: 1 5 1.00 sqincb x0, pow2 +# CHECK-NEXT: 1 5 1.00 sqincb x0, vl1 +# CHECK-NEXT: 1 5 1.00 sqincb x0, w0 +# CHECK-NEXT: 1 5 1.00 sqincb x0, w0, all, mul #16 +# CHECK-NEXT: 1 5 1.00 sqincb x0, w0, pow2 +# CHECK-NEXT: 1 5 1.00 sqincb x0, w0, pow2, mul #16 +# CHECK-NEXT: 1 5 1.00 sqincd x0 +# CHECK-NEXT: 1 5 1.00 sqincd x0, #14 +# CHECK-NEXT: 1 5 1.00 sqincd x0, all, mul #16 +# CHECK-NEXT: 1 5 1.00 sqincd x0, pow2 +# CHECK-NEXT: 1 5 1.00 sqincd x0, vl1 +# CHECK-NEXT: 1 5 1.00 sqincd x0, w0 +# CHECK-NEXT: 1 5 1.00 sqincd x0, w0, all, mul #16 +# CHECK-NEXT: 1 5 1.00 sqincd x0, w0, pow2 +# CHECK-NEXT: 1 5 1.00 sqincd x0, w0, pow2, mul #16 +# CHECK-NEXT: 1 4 1.00 sqincd z0.d +# CHECK-NEXT: 1 4 1.00 sqincd z0.d, all, mul #16 +# CHECK-NEXT: 1 4 1.00 sqincd z0.d, pow2 +# CHECK-NEXT: 1 4 1.00 sqincd z0.d, pow2, mul #16 +# CHECK-NEXT: 1 5 1.00 sqinch x0 +# CHECK-NEXT: 1 5 1.00 sqinch x0, #14 +# CHECK-NEXT: 1 5 1.00 sqinch x0, all, mul #16 +# CHECK-NEXT: 1 5 1.00 sqinch x0, pow2 +# CHECK-NEXT: 1 5 1.00 sqinch x0, vl1 +# CHECK-NEXT: 1 5 1.00 sqinch x0, w0 +# CHECK-NEXT: 1 5 1.00 sqinch x0, w0, all, mul #16 +# CHECK-NEXT: 1 5 1.00 sqinch x0, w0, pow2 +# CHECK-NEXT: 1 5 1.00 sqinch x0, w0, pow2, mul #16 +# CHECK-NEXT: 1 4 1.00 sqinch z0.h +# CHECK-NEXT: 1 4 1.00 sqinch z0.h, all, mul #16 +# CHECK-NEXT: 1 4 1.00 sqinch z0.h, pow2 +# CHECK-NEXT: 1 4 1.00 sqinch z0.h, pow2, mul #16 +# CHECK-NEXT: 1 9 1.00 sqincp x0, p0.b +# CHECK-NEXT: 1 9 1.00 sqincp x0, p0.d +# CHECK-NEXT: 1 9 1.00 sqincp x0, p0.h +# CHECK-NEXT: 1 9 1.00 sqincp x0, p0.s +# CHECK-NEXT: 1 9 1.00 sqincp xzr, p15.b, wzr +# CHECK-NEXT: 1 9 1.00 sqincp xzr, p15.d, wzr +# CHECK-NEXT: 1 9 1.00 sqincp xzr, p15.h, wzr +# CHECK-NEXT: 1 9 1.00 sqincp xzr, p15.s, wzr +# CHECK-NEXT: 1 3 1.00 sqincp z0.d, p0.d +# CHECK-NEXT: 1 3 1.00 sqincp z0.h, p0.h +# CHECK-NEXT: 1 3 1.00 sqincp z0.s, p0.s +# CHECK-NEXT: 1 5 1.00 sqincw x0 +# CHECK-NEXT: 1 5 1.00 sqincw x0, #14 +# CHECK-NEXT: 1 5 1.00 sqincw x0, all, mul #16 +# CHECK-NEXT: 1 5 1.00 sqincw x0, pow2 +# CHECK-NEXT: 1 5 1.00 sqincw x0, vl1 +# CHECK-NEXT: 1 5 1.00 sqincw x0, w0 +# CHECK-NEXT: 1 5 1.00 sqincw x0, w0, all, mul #16 +# CHECK-NEXT: 1 5 1.00 sqincw x0, w0, pow2 +# CHECK-NEXT: 1 5 1.00 sqincw x0, w0, pow2, mul #16 +# CHECK-NEXT: 1 4 1.00 sqincw z0.s +# CHECK-NEXT: 1 4 1.00 sqincw z0.s, all, mul #16 +# CHECK-NEXT: 1 4 1.00 sqincw z0.s, pow2 +# CHECK-NEXT: 1 4 1.00 sqincw z0.s, pow2, mul #16 +# CHECK-NEXT: 1 4 1.00 sqneg z31.b, p7/m, z31.b +# CHECK-NEXT: 1 4 1.00 sqneg z31.d, p7/m, z31.d +# CHECK-NEXT: 1 4 1.00 sqneg z31.h, p7/m, z31.h +# CHECK-NEXT: 1 4 1.00 sqneg z31.s, p7/m, z31.s +# CHECK-NEXT: 1 4 1.00 sqrdcmlah z0.b, z1.b, z2.b, #0 +# CHECK-NEXT: 1 4 1.00 sqrdcmlah z0.d, z1.d, z2.d, #0 +# CHECK-NEXT: 1 4 1.00 sqrdcmlah z0.h, z1.h, z2.h, #0 +# CHECK-NEXT: 1 4 1.00 sqrdcmlah z0.h, z1.h, z2.h[0], #0 +# CHECK-NEXT: 1 4 1.00 sqrdcmlah z0.s, z1.s, z2.s, #0 +# CHECK-NEXT: 1 4 1.00 sqrdcmlah z0.s, z1.s, z2.s[0], #0 +# CHECK-NEXT: 1 4 1.00 sqrdcmlah z15.b, z16.b, z17.b, #270 +# CHECK-NEXT: 1 4 1.00 sqrdcmlah z15.d, z16.d, z17.d, #270 +# CHECK-NEXT: 1 4 1.00 sqrdcmlah z15.h, z16.h, z17.h, #270 +# CHECK-NEXT: 1 4 1.00 sqrdcmlah z15.s, z16.s, z17.s, #270 +# CHECK-NEXT: 1 4 1.00 sqrdcmlah z29.b, z30.b, z31.b, #90 +# CHECK-NEXT: 1 4 1.00 sqrdcmlah z29.d, z30.d, z31.d, #90 +# CHECK-NEXT: 1 4 1.00 sqrdcmlah z29.h, z30.h, z31.h, #90 +# CHECK-NEXT: 1 4 1.00 sqrdcmlah z29.s, z30.s, z31.s, #90 +# CHECK-NEXT: 1 4 1.00 sqrdcmlah z31.b, z31.b, z31.b, #180 +# CHECK-NEXT: 1 4 1.00 sqrdcmlah z31.d, z31.d, z31.d, #180 +# CHECK-NEXT: 1 4 1.00 sqrdcmlah z31.h, z30.h, z7.h[0], #180 +# CHECK-NEXT: 1 4 1.00 sqrdcmlah z31.h, z31.h, z31.h, #180 +# CHECK-NEXT: 1 4 1.00 sqrdcmlah z31.s, z30.s, z7.s[0], #180 +# CHECK-NEXT: 1 4 1.00 sqrdcmlah z31.s, z31.s, z31.s, #180 +# CHECK-NEXT: 1 4 1.00 sqrdmlah z0.b, z1.b, z31.b +# CHECK-NEXT: 1 4 1.00 sqrdmlah z0.d, z1.d, z15.d[1] +# CHECK-NEXT: 1 4 1.00 sqrdmlah z0.d, z1.d, z31.d +# CHECK-NEXT: 1 4 1.00 sqrdmlah z0.h, z1.h, z31.h +# CHECK-NEXT: 1 4 1.00 sqrdmlah z0.h, z1.h, z7.h[7] +# CHECK-NEXT: 1 4 1.00 sqrdmlah z0.s, z1.s, z31.s +# CHECK-NEXT: 1 4 1.00 sqrdmlah z0.s, z1.s, z7.s[3] +# CHECK-NEXT: 1 4 1.00 sqrdmlsh z0.b, z1.b, z31.b +# CHECK-NEXT: 1 4 1.00 sqrdmlsh z0.d, z1.d, z15.d[1] +# CHECK-NEXT: 1 4 1.00 sqrdmlsh z0.d, z1.d, z31.d +# CHECK-NEXT: 1 4 1.00 sqrdmlsh z0.h, z1.h, z31.h +# CHECK-NEXT: 1 4 1.00 sqrdmlsh z0.h, z1.h, z7.h[7] +# CHECK-NEXT: 1 4 1.00 sqrdmlsh z0.s, z1.s, z31.s +# CHECK-NEXT: 1 4 1.00 sqrdmlsh z0.s, z1.s, z7.s[3] +# CHECK-NEXT: 1 4 1.00 sqrdmulh z0.b, z1.b, z2.b +# CHECK-NEXT: 1 4 1.00 sqrdmulh z0.d, z1.d, z15.d[1] +# CHECK-NEXT: 1 4 1.00 sqrdmulh z0.h, z1.h, z2.h +# CHECK-NEXT: 1 4 1.00 sqrdmulh z0.h, z1.h, z7.h[7] +# CHECK-NEXT: 1 4 1.00 sqrdmulh z0.s, z1.s, z7.s[3] +# CHECK-NEXT: 1 4 1.00 sqrdmulh z29.s, z30.s, z31.s +# CHECK-NEXT: 1 4 1.00 sqrdmulh z31.d, z31.d, z31.d +# CHECK-NEXT: 1 4 1.00 sqrshl z0.b, p0/m, z0.b, z1.b +# CHECK-NEXT: 1 4 1.00 sqrshl z0.h, p0/m, z0.h, z1.h +# CHECK-NEXT: 1 4 1.00 sqrshl z29.s, p7/m, z29.s, z30.s +# CHECK-NEXT: 1 4 1.00 sqrshl z31.d, p7/m, z31.d, z30.d +# CHECK-NEXT: 1 4 1.00 sqrshlr z0.b, p0/m, z0.b, z1.b +# CHECK-NEXT: 1 4 1.00 sqrshlr z0.h, p0/m, z0.h, z1.h +# CHECK-NEXT: 1 4 1.00 sqrshlr z29.s, p7/m, z29.s, z30.s +# CHECK-NEXT: 1 4 1.00 sqrshlr z31.d, p7/m, z31.d, z30.d +# CHECK-NEXT: 1 4 1.00 sqrshrnb z0.b, z0.h, #1 +# CHECK-NEXT: 1 4 1.00 sqrshrnb z0.h, z0.s, #1 +# CHECK-NEXT: 1 4 1.00 sqrshrnb z0.s, z0.d, #1 +# CHECK-NEXT: 1 4 1.00 sqrshrnb z31.b, z31.h, #8 +# CHECK-NEXT: 1 4 1.00 sqrshrnb z31.h, z31.s, #16 +# CHECK-NEXT: 1 4 1.00 sqrshrnb z31.s, z31.d, #32 +# CHECK-NEXT: 1 4 1.00 sqrshrnt z0.b, z0.h, #1 +# CHECK-NEXT: 1 4 1.00 sqrshrnt z0.h, z0.s, #1 +# CHECK-NEXT: 1 4 1.00 sqrshrnt z0.s, z0.d, #1 +# CHECK-NEXT: 1 4 1.00 sqrshrnt z31.b, z31.h, #8 +# CHECK-NEXT: 1 4 1.00 sqrshrnt z31.h, z31.s, #16 +# CHECK-NEXT: 1 4 1.00 sqrshrnt z31.s, z31.d, #32 +# CHECK-NEXT: 1 4 1.00 sqrshrunb z0.b, z0.h, #1 +# CHECK-NEXT: 1 4 1.00 sqrshrunb z0.h, z0.s, #1 +# CHECK-NEXT: 1 4 1.00 sqrshrunb z0.s, z0.d, #1 +# CHECK-NEXT: 1 4 1.00 sqrshrunb z31.b, z31.h, #8 +# CHECK-NEXT: 1 4 1.00 sqrshrunb z31.h, z31.s, #16 +# CHECK-NEXT: 1 4 1.00 sqrshrunb z31.s, z31.d, #32 +# CHECK-NEXT: 1 4 1.00 sqrshrunt z0.b, z0.h, #1 +# CHECK-NEXT: 1 4 1.00 sqrshrunt z0.h, z0.s, #1 +# CHECK-NEXT: 1 4 1.00 sqrshrunt z0.s, z0.d, #1 +# CHECK-NEXT: 1 4 1.00 sqrshrunt z31.b, z31.h, #8 +# CHECK-NEXT: 1 4 1.00 sqrshrunt z31.h, z31.s, #16 +# CHECK-NEXT: 1 4 1.00 sqrshrunt z31.s, z31.d, #32 +# CHECK-NEXT: 1 4 1.00 sqshl z0.b, p0/m, z0.b, #0 +# CHECK-NEXT: 1 4 1.00 sqshl z0.b, p0/m, z0.b, z1.b +# CHECK-NEXT: 1 4 1.00 sqshl z0.d, p0/m, z0.d, #0 +# CHECK-NEXT: 1 4 1.00 sqshl z0.h, p0/m, z0.h, #0 +# CHECK-NEXT: 1 4 1.00 sqshl z0.h, p0/m, z0.h, z1.h +# CHECK-NEXT: 1 4 1.00 sqshl z0.s, p0/m, z0.s, #0 +# CHECK-NEXT: 1 4 1.00 sqshl z29.s, p7/m, z29.s, z30.s +# CHECK-NEXT: 1 4 1.00 sqshl z31.b, p0/m, z31.b, #7 +# CHECK-NEXT: 1 4 1.00 sqshl z31.d, p0/m, z31.d, #63 +# CHECK-NEXT: 1 4 1.00 sqshl z31.d, p7/m, z31.d, z30.d +# CHECK-NEXT: 1 4 1.00 sqshl z31.h, p0/m, z31.h, #15 +# CHECK-NEXT: 1 4 1.00 sqshl z31.s, p0/m, z31.s, #31 +# CHECK-NEXT: 1 4 1.00 sqshlr z0.b, p0/m, z0.b, z1.b +# CHECK-NEXT: 1 4 1.00 sqshlr z0.h, p0/m, z0.h, z1.h +# CHECK-NEXT: 1 4 1.00 sqshlr z29.s, p7/m, z29.s, z30.s +# CHECK-NEXT: 1 4 1.00 sqshlr z31.d, p7/m, z31.d, z30.d +# CHECK-NEXT: 1 4 1.00 sqshlu z0.b, p0/m, z0.b, #0 +# CHECK-NEXT: 1 4 1.00 sqshlu z0.d, p0/m, z0.d, #0 +# CHECK-NEXT: 1 4 1.00 sqshlu z0.h, p0/m, z0.h, #0 +# CHECK-NEXT: 1 4 1.00 sqshlu z0.s, p0/m, z0.s, #0 +# CHECK-NEXT: 1 4 1.00 sqshlu z31.b, p0/m, z31.b, #7 +# CHECK-NEXT: 1 4 1.00 sqshlu z31.d, p0/m, z31.d, #63 +# CHECK-NEXT: 1 4 1.00 sqshlu z31.h, p0/m, z31.h, #15 +# CHECK-NEXT: 1 4 1.00 sqshlu z31.s, p0/m, z31.s, #31 +# CHECK-NEXT: 1 4 1.00 sqshrnb z0.b, z0.h, #1 +# CHECK-NEXT: 1 4 1.00 sqshrnb z0.h, z0.s, #1 +# CHECK-NEXT: 1 4 1.00 sqshrnb z0.s, z0.d, #1 +# CHECK-NEXT: 1 4 1.00 sqshrnb z31.b, z31.h, #8 +# CHECK-NEXT: 1 4 1.00 sqshrnb z31.h, z31.s, #16 +# CHECK-NEXT: 1 4 1.00 sqshrnb z31.s, z31.d, #32 +# CHECK-NEXT: 1 4 1.00 sqshrnt z0.b, z0.h, #1 +# CHECK-NEXT: 1 4 1.00 sqshrnt z0.h, z0.s, #1 +# CHECK-NEXT: 1 4 1.00 sqshrnt z0.s, z0.d, #1 +# CHECK-NEXT: 1 4 1.00 sqshrnt z31.b, z31.h, #8 +# CHECK-NEXT: 1 4 1.00 sqshrnt z31.h, z31.s, #16 +# CHECK-NEXT: 1 4 1.00 sqshrnt z31.s, z31.d, #32 +# CHECK-NEXT: 1 4 1.00 sqshrunb z0.b, z0.h, #1 +# CHECK-NEXT: 1 4 1.00 sqshrunb z0.h, z0.s, #1 +# CHECK-NEXT: 1 4 1.00 sqshrunb z0.s, z0.d, #1 +# CHECK-NEXT: 1 4 1.00 sqshrunb z31.b, z31.h, #8 +# CHECK-NEXT: 1 4 1.00 sqshrunb z31.h, z31.s, #16 +# CHECK-NEXT: 1 4 1.00 sqshrunb z31.s, z31.d, #32 +# CHECK-NEXT: 1 4 1.00 sqshrunt z0.b, z0.h, #1 +# CHECK-NEXT: 1 4 1.00 sqshrunt z0.h, z0.s, #1 +# CHECK-NEXT: 1 4 1.00 sqshrunt z0.s, z0.d, #1 +# CHECK-NEXT: 1 4 1.00 sqshrunt z31.b, z31.h, #8 +# CHECK-NEXT: 1 4 1.00 sqshrunt z31.h, z31.s, #16 +# CHECK-NEXT: 1 4 1.00 sqshrunt z31.s, z31.d, #32 +# CHECK-NEXT: 1 4 1.00 sqsub z0.b, p0/m, z0.b, z1.b +# CHECK-NEXT: 1 4 1.00 sqsub z0.b, z0.b, #0 +# CHECK-NEXT: 1 4 1.00 sqsub z0.b, z0.b, z0.b +# CHECK-NEXT: 1 4 1.00 sqsub z0.d, z0.d, #0 +# CHECK-NEXT: 1 4 1.00 sqsub z0.d, z0.d, #0, lsl #8 +# CHECK-NEXT: 1 4 1.00 sqsub z0.d, z0.d, z0.d +# CHECK-NEXT: 1 4 1.00 sqsub z0.h, p0/m, z0.h, z1.h +# CHECK-NEXT: 1 4 1.00 sqsub z0.h, z0.h, #0 +# CHECK-NEXT: 1 4 1.00 sqsub z0.h, z0.h, #0, lsl #8 +# CHECK-NEXT: 1 4 1.00 sqsub z0.h, z0.h, z0.h +# CHECK-NEXT: 1 4 1.00 sqsub z0.s, z0.s, #0 +# CHECK-NEXT: 1 4 1.00 sqsub z0.s, z0.s, #0, lsl #8 +# CHECK-NEXT: 1 4 1.00 sqsub z0.s, z0.s, z0.s +# CHECK-NEXT: 1 4 1.00 sqsub z29.s, p7/m, z29.s, z30.s +# CHECK-NEXT: 1 4 1.00 sqsub z31.b, z31.b, #255 +# CHECK-NEXT: 1 4 1.00 sqsub z31.d, p7/m, z31.d, z30.d +# CHECK-NEXT: 1 4 1.00 sqsub z31.d, z31.d, #65280 +# CHECK-NEXT: 1 4 1.00 sqsub z31.h, z31.h, #65280 +# CHECK-NEXT: 1 4 1.00 sqsub z31.s, z31.s, #65280 +# CHECK-NEXT: 1 4 1.00 sqsubr z0.b, p0/m, z0.b, z1.b +# CHECK-NEXT: 1 4 1.00 sqsubr z0.h, p0/m, z0.h, z1.h +# CHECK-NEXT: 1 4 1.00 sqsubr z29.s, p7/m, z29.s, z30.s +# CHECK-NEXT: 1 4 1.00 sqsubr z31.d, p7/m, z31.d, z30.d +# CHECK-NEXT: 1 4 1.00 sqxtnb z0.b, z31.h +# CHECK-NEXT: 1 4 1.00 sqxtnb z0.h, z31.s +# CHECK-NEXT: 1 4 1.00 sqxtnb z0.s, z31.d +# CHECK-NEXT: 1 4 1.00 sqxtnt z0.b, z31.h +# CHECK-NEXT: 1 4 1.00 sqxtnt z0.h, z31.s +# CHECK-NEXT: 1 4 1.00 sqxtnt z0.s, z31.d +# CHECK-NEXT: 1 4 1.00 sqxtunb z0.b, z31.h +# CHECK-NEXT: 1 4 1.00 sqxtunb z0.h, z31.s +# CHECK-NEXT: 1 4 1.00 sqxtunb z0.s, z31.d +# CHECK-NEXT: 1 4 1.00 sqxtunt z0.b, z31.h +# CHECK-NEXT: 1 4 1.00 sqxtunt z0.h, z31.s +# CHECK-NEXT: 1 4 1.00 sqxtunt z0.s, z31.d +# CHECK-NEXT: 1 4 1.00 srhadd z0.b, p0/m, z0.b, z1.b +# CHECK-NEXT: 1 4 1.00 srhadd z0.h, p0/m, z0.h, z1.h +# CHECK-NEXT: 1 4 1.00 srhadd z29.s, p7/m, z29.s, z30.s +# CHECK-NEXT: 1 4 1.00 srhadd z31.d, p7/m, z31.d, z30.d +# CHECK-NEXT: 1 3 1.00 sri z0.b, z0.b, #1 +# CHECK-NEXT: 1 3 1.00 sri z0.d, z0.d, #1 +# CHECK-NEXT: 1 3 1.00 sri z0.h, z0.h, #1 +# CHECK-NEXT: 1 3 1.00 sri z0.s, z0.s, #1 +# CHECK-NEXT: 1 3 1.00 sri z31.b, z31.b, #8 +# CHECK-NEXT: 1 3 1.00 sri z31.d, z31.d, #64 +# CHECK-NEXT: 1 3 1.00 sri z31.h, z31.h, #16 +# CHECK-NEXT: 1 3 1.00 sri z31.s, z31.s, #32 +# CHECK-NEXT: 1 4 1.00 srshl z0.b, p0/m, z0.b, z1.b +# CHECK-NEXT: 1 4 1.00 srshl z0.h, p0/m, z0.h, z1.h +# CHECK-NEXT: 1 4 1.00 srshl z29.s, p7/m, z29.s, z30.s +# CHECK-NEXT: 1 4 1.00 srshl z31.d, p7/m, z31.d, z30.d +# CHECK-NEXT: 1 4 1.00 srshlr z0.b, p0/m, z0.b, z1.b +# CHECK-NEXT: 1 4 1.00 srshlr z0.h, p0/m, z0.h, z1.h +# CHECK-NEXT: 1 4 1.00 srshlr z29.s, p7/m, z29.s, z30.s +# CHECK-NEXT: 1 4 1.00 srshlr z31.d, p7/m, z31.d, z30.d +# CHECK-NEXT: 1 4 1.00 srshr z0.b, p0/m, z0.b, #1 +# CHECK-NEXT: 1 4 1.00 srshr z0.d, p0/m, z0.d, #1 +# CHECK-NEXT: 1 4 1.00 srshr z0.h, p0/m, z0.h, #1 +# CHECK-NEXT: 1 4 1.00 srshr z0.s, p0/m, z0.s, #1 +# CHECK-NEXT: 1 4 1.00 srshr z31.b, p0/m, z31.b, #8 +# CHECK-NEXT: 1 4 1.00 srshr z31.d, p0/m, z31.d, #64 +# CHECK-NEXT: 1 4 1.00 srshr z31.h, p0/m, z31.h, #16 +# CHECK-NEXT: 1 4 1.00 srshr z31.s, p0/m, z31.s, #32 +# CHECK-NEXT: 1 7 2.00 srsra z0.b, z0.b, #1 +# CHECK-NEXT: 1 7 2.00 srsra z0.d, z0.d, #1 +# CHECK-NEXT: 1 7 2.00 srsra z0.h, z0.h, #1 +# CHECK-NEXT: 1 7 2.00 srsra z0.s, z0.s, #1 +# CHECK-NEXT: 1 7 2.00 srsra z31.b, z31.b, #8 +# CHECK-NEXT: 1 7 2.00 srsra z31.d, z31.d, #64 +# CHECK-NEXT: 1 7 2.00 srsra z31.h, z31.h, #16 +# CHECK-NEXT: 1 7 2.00 srsra z31.s, z31.s, #32 +# CHECK-NEXT: 1 3 1.00 sshllb z0.d, z0.s, #0 +# CHECK-NEXT: 1 3 1.00 sshllb z0.h, z0.b, #0 +# CHECK-NEXT: 1 3 1.00 sshllb z0.s, z0.h, #0 +# CHECK-NEXT: 1 3 1.00 sshllb z31.d, z31.s, #31 +# CHECK-NEXT: 1 3 1.00 sshllb z31.h, z31.b, #7 +# CHECK-NEXT: 1 3 1.00 sshllb z31.s, z31.h, #15 +# CHECK-NEXT: 1 3 1.00 sshllt z0.d, z0.s, #0 +# CHECK-NEXT: 1 3 1.00 sshllt z0.h, z0.b, #0 +# CHECK-NEXT: 1 3 1.00 sshllt z0.s, z0.h, #0 +# CHECK-NEXT: 1 3 1.00 sshllt z31.d, z31.s, #31 +# CHECK-NEXT: 1 3 1.00 sshllt z31.h, z31.b, #7 +# CHECK-NEXT: 1 3 1.00 sshllt z31.s, z31.h, #15 +# CHECK-NEXT: 1 4 1.00 ssra z0.b, z0.b, #1 +# CHECK-NEXT: 1 4 1.00 ssra z0.d, z0.d, #1 +# CHECK-NEXT: 1 4 1.00 ssra z0.h, z0.h, #1 +# CHECK-NEXT: 1 4 1.00 ssra z0.s, z0.s, #1 +# CHECK-NEXT: 1 4 1.00 ssra z31.b, z31.b, #8 +# CHECK-NEXT: 1 4 1.00 ssra z31.d, z31.d, #64 +# CHECK-NEXT: 1 4 1.00 ssra z31.h, z31.h, #16 +# CHECK-NEXT: 1 4 1.00 ssra z31.s, z31.s, #32 +# CHECK-NEXT: 1 4 1.00 ssublb z0.h, z1.b, z2.b +# CHECK-NEXT: 1 4 1.00 ssublb z29.s, z30.h, z31.h +# CHECK-NEXT: 1 4 1.00 ssublb z31.d, z31.s, z31.s +# CHECK-NEXT: 1 4 1.00 ssublbt z0.d, z1.s, z31.s +# CHECK-NEXT: 1 4 1.00 ssublbt z0.h, z1.b, z31.b +# CHECK-NEXT: 1 4 1.00 ssublbt z0.s, z1.h, z31.h +# CHECK-NEXT: 1 4 1.00 ssublt z0.h, z1.b, z2.b +# CHECK-NEXT: 1 4 1.00 ssublt z29.s, z30.h, z31.h +# CHECK-NEXT: 1 4 1.00 ssublt z31.d, z31.s, z31.s +# CHECK-NEXT: 1 4 1.00 ssubltb z0.d, z1.s, z31.s +# CHECK-NEXT: 1 4 1.00 ssubltb z0.h, z1.b, z31.b +# CHECK-NEXT: 1 4 1.00 ssubltb z0.s, z1.h, z31.h +# CHECK-NEXT: 1 4 1.00 ssubwb z0.h, z1.h, z2.b +# CHECK-NEXT: 1 4 1.00 ssubwb z29.s, z30.s, z31.h +# CHECK-NEXT: 1 4 1.00 ssubwb z31.d, z31.d, z31.s +# CHECK-NEXT: 1 4 1.00 ssubwt z0.h, z1.h, z2.b +# CHECK-NEXT: 1 4 1.00 ssubwt z29.s, z30.s, z31.h +# CHECK-NEXT: 1 4 1.00 ssubwt z31.d, z31.d, z31.s +# CHECK-NEXT: 1 1 1.00 * st1b { z0.b }, p0, [x0, x0] +# CHECK-NEXT: 1 1 1.00 * st1b { z0.b }, p0, [x0] +# CHECK-NEXT: 1 1 1.00 * st1b { z0.d }, p0, [x0, x0] +# CHECK-NEXT: 1 1 8.00 * st1b { z0.d }, p0, [x0, z0.d, sxtw] +# CHECK-NEXT: 1 1 8.00 * st1b { z0.d }, p0, [x0, z0.d, uxtw] +# CHECK-NEXT: 1 1 8.00 * st1b { z0.d }, p0, [x0, z0.d] +# CHECK-NEXT: 1 1 1.00 * st1b { z0.d }, p0, [x0] +# CHECK-NEXT: 1 1 7.00 * st1b { z0.d }, p7, [z0.d] +# CHECK-NEXT: 1 1 1.00 * st1b { z0.h }, p0, [x0, x0] +# CHECK-NEXT: 1 1 1.00 * st1b { z0.h }, p0, [x0] +# CHECK-NEXT: 1 1 1.00 * st1b { z0.s }, p0, [x0, x0] +# CHECK-NEXT: 1 1 8.00 * st1b { z0.s }, p0, [x0, z0.s, sxtw] +# CHECK-NEXT: 1 1 8.00 * st1b { z0.s }, p0, [x0, z0.s, uxtw] +# CHECK-NEXT: 1 1 1.00 * st1b { z0.s }, p0, [x0] +# CHECK-NEXT: 1 1 9.00 * st1b { z0.s }, p7, [z0.s] +# CHECK-NEXT: 1 1 1.00 * st1b { z21.b }, p5, [x10, #5, mul vl] +# CHECK-NEXT: 1 1 1.00 * st1b { z21.d }, p5, [x10, #5, mul vl] +# CHECK-NEXT: 1 1 1.00 * st1b { z21.h }, p5, [x10, #5, mul vl] +# CHECK-NEXT: 1 1 1.00 * st1b { z21.s }, p5, [x10, #5, mul vl] +# CHECK-NEXT: 1 1 1.00 * st1b { z31.b }, p7, [sp, #-1, mul vl] +# CHECK-NEXT: 1 1 1.00 * st1b { z31.d }, p7, [sp, #-1, mul vl] +# CHECK-NEXT: 1 1 7.00 * st1b { z31.d }, p7, [z31.d, #31] +# CHECK-NEXT: 1 1 1.00 * st1b { z31.h }, p7, [sp, #-1, mul vl] +# CHECK-NEXT: 1 1 1.00 * st1b { z31.s }, p7, [sp, #-1, mul vl] +# CHECK-NEXT: 1 1 9.00 * st1b { z31.s }, p7, [z31.s, #31] +# CHECK-NEXT: 1 1 1.00 * st1d { z0.d }, p0, [x0, x0, lsl #3] +# CHECK-NEXT: 1 1 8.00 * st1d { z0.d }, p0, [x0, z0.d, lsl #3] +# CHECK-NEXT: 1 1 8.00 * st1d { z0.d }, p0, [x0, z0.d, sxtw #3] +# CHECK-NEXT: 1 1 8.00 * st1d { z0.d }, p0, [x0, z0.d, sxtw] +# CHECK-NEXT: 1 1 8.00 * st1d { z0.d }, p0, [x0, z0.d, uxtw #3] +# CHECK-NEXT: 1 1 8.00 * st1d { z0.d }, p0, [x0, z0.d, uxtw] +# CHECK-NEXT: 1 1 8.00 * st1d { z0.d }, p0, [x0, z0.d] +# CHECK-NEXT: 1 1 1.00 * st1d { z0.d }, p0, [x0] +# CHECK-NEXT: 1 1 7.00 * st1d { z0.d }, p7, [z0.d] +# CHECK-NEXT: 1 1 1.00 * st1d { z21.d }, p5, [x10, #5, mul vl] +# CHECK-NEXT: 1 1 1.00 * st1d { z31.d }, p7, [sp, #-1, mul vl] +# CHECK-NEXT: 1 1 7.00 * st1d { z31.d }, p7, [z31.d, #248] +# CHECK-NEXT: 1 1 1.00 * st1h { z0.d }, p0, [x0, x0, lsl #1] +# CHECK-NEXT: 1 1 8.00 * st1h { z0.d }, p0, [x0, z0.d, lsl #1] +# CHECK-NEXT: 1 1 8.00 * st1h { z0.d }, p0, [x0, z0.d, sxtw #1] +# CHECK-NEXT: 1 1 8.00 * st1h { z0.d }, p0, [x0, z0.d, sxtw] +# CHECK-NEXT: 1 1 8.00 * st1h { z0.d }, p0, [x0, z0.d, uxtw #1] +# CHECK-NEXT: 1 1 8.00 * st1h { z0.d }, p0, [x0, z0.d, uxtw] +# CHECK-NEXT: 1 1 8.00 * st1h { z0.d }, p0, [x0, z0.d] +# CHECK-NEXT: 1 1 1.00 * st1h { z0.d }, p0, [x0] +# CHECK-NEXT: 1 1 7.00 * st1h { z0.d }, p7, [z0.d] +# CHECK-NEXT: 1 1 1.00 * st1h { z0.h }, p0, [x0, x0, lsl #1] +# CHECK-NEXT: 1 1 1.00 * st1h { z0.h }, p0, [x0] +# CHECK-NEXT: 1 1 1.00 * st1h { z0.s }, p0, [x0, x0, lsl #1] +# CHECK-NEXT: 1 1 8.00 * st1h { z0.s }, p0, [x0, z0.s, sxtw #1] +# CHECK-NEXT: 1 1 8.00 * st1h { z0.s }, p0, [x0, z0.s, sxtw] +# CHECK-NEXT: 1 1 8.00 * st1h { z0.s }, p0, [x0, z0.s, uxtw #1] +# CHECK-NEXT: 1 1 8.00 * st1h { z0.s }, p0, [x0, z0.s, uxtw] +# CHECK-NEXT: 1 1 1.00 * st1h { z0.s }, p0, [x0] +# CHECK-NEXT: 1 1 9.00 * st1h { z0.s }, p7, [z0.s] +# CHECK-NEXT: 1 1 1.00 * st1h { z21.d }, p5, [x10, #5, mul vl] +# CHECK-NEXT: 1 1 1.00 * st1h { z21.h }, p5, [x10, #5, mul vl] +# CHECK-NEXT: 1 1 1.00 * st1h { z21.s }, p5, [x10, #5, mul vl] +# CHECK-NEXT: 1 1 1.00 * st1h { z31.d }, p7, [sp, #-1, mul vl] +# CHECK-NEXT: 1 1 7.00 * st1h { z31.d }, p7, [z31.d, #62] +# CHECK-NEXT: 1 1 1.00 * st1h { z31.h }, p7, [sp, #-1, mul vl] +# CHECK-NEXT: 1 1 1.00 * st1h { z31.s }, p7, [sp, #-1, mul vl] +# CHECK-NEXT: 1 1 9.00 * st1h { z31.s }, p7, [z31.s, #62] +# CHECK-NEXT: 1 1 1.00 * st1w { z0.d }, p0, [x0, x0, lsl #2] +# CHECK-NEXT: 1 1 8.00 * st1w { z0.d }, p0, [x0, z0.d, lsl #2] +# CHECK-NEXT: 1 1 8.00 * st1w { z0.d }, p0, [x0, z0.d, sxtw #2] +# CHECK-NEXT: 1 1 8.00 * st1w { z0.d }, p0, [x0, z0.d, sxtw] +# CHECK-NEXT: 1 1 8.00 * st1w { z0.d }, p0, [x0, z0.d, uxtw #2] +# CHECK-NEXT: 1 1 8.00 * st1w { z0.d }, p0, [x0, z0.d, uxtw] +# CHECK-NEXT: 1 1 8.00 * st1w { z0.d }, p0, [x0, z0.d] +# CHECK-NEXT: 1 1 1.00 * st1w { z0.d }, p0, [x0] +# CHECK-NEXT: 1 1 7.00 * st1w { z0.d }, p7, [z0.d] +# CHECK-NEXT: 1 1 1.00 * st1w { z0.s }, p0, [x0, x0, lsl #2] +# CHECK-NEXT: 1 1 8.00 * st1w { z0.s }, p0, [x0, z0.s, sxtw #2] +# CHECK-NEXT: 1 1 8.00 * st1w { z0.s }, p0, [x0, z0.s, sxtw] +# CHECK-NEXT: 1 1 8.00 * st1w { z0.s }, p0, [x0, z0.s, uxtw #2] +# CHECK-NEXT: 1 1 8.00 * st1w { z0.s }, p0, [x0, z0.s, uxtw] +# CHECK-NEXT: 1 1 1.00 * st1w { z0.s }, p0, [x0] +# CHECK-NEXT: 1 1 9.00 * st1w { z0.s }, p7, [z0.s] +# CHECK-NEXT: 1 1 1.00 * st1w { z21.d }, p5, [x10, #5, mul vl] +# CHECK-NEXT: 1 1 1.00 * st1w { z21.s }, p5, [x10, #5, mul vl] +# CHECK-NEXT: 1 1 1.00 * st1w { z31.d }, p7, [sp, #-1, mul vl] +# CHECK-NEXT: 1 1 7.00 * st1w { z31.d }, p7, [z31.d, #124] +# CHECK-NEXT: 1 1 1.00 * st1w { z31.s }, p7, [sp, #-1, mul vl] +# CHECK-NEXT: 1 1 9.00 * st1w { z31.s }, p7, [z31.s, #124] +# CHECK-NEXT: 1 1 11.00 * st2b { z0.b, z1.b }, p0, [x0, x0] +# CHECK-NEXT: 1 1 11.00 * st2b { z0.b, z1.b }, p0, [x0] +# CHECK-NEXT: 1 1 11.00 * st2b { z21.b, z22.b }, p5, [x10, #10, mul vl] +# CHECK-NEXT: 1 1 11.00 * st2b { z23.b, z24.b }, p3, [x13, #-16, mul vl] +# CHECK-NEXT: 1 1 11.00 * st2b { z5.b, z6.b }, p3, [x17, x16] +# CHECK-NEXT: 1 1 11.00 * st2d { z0.d, z1.d }, p0, [x0, x0, lsl #3] +# CHECK-NEXT: 1 1 11.00 * st2d { z0.d, z1.d }, p0, [x0] +# CHECK-NEXT: 1 1 11.00 * st2d { z21.d, z22.d }, p5, [x10, #10, mul vl] +# CHECK-NEXT: 1 1 11.00 * st2d { z23.d, z24.d }, p3, [x13, #-16, mul vl] +# CHECK-NEXT: 1 1 11.00 * st2d { z5.d, z6.d }, p3, [x17, x16, lsl #3] +# CHECK-NEXT: 1 1 11.00 * st2h { z0.h, z1.h }, p0, [x0, x0, lsl #1] +# CHECK-NEXT: 1 1 11.00 * st2h { z0.h, z1.h }, p0, [x0] +# CHECK-NEXT: 1 1 11.00 * st2h { z21.h, z22.h }, p5, [x10, #10, mul vl] +# CHECK-NEXT: 1 1 11.00 * st2h { z23.h, z24.h }, p3, [x13, #-16, mul vl] +# CHECK-NEXT: 1 1 11.00 * st2h { z5.h, z6.h }, p3, [x17, x16, lsl #1] +# CHECK-NEXT: 1 1 11.00 * st2w { z0.s, z1.s }, p0, [x0, x0, lsl #2] +# CHECK-NEXT: 1 1 11.00 * st2w { z0.s, z1.s }, p0, [x0] +# CHECK-NEXT: 1 1 11.00 * st2w { z21.s, z22.s }, p5, [x10, #10, mul vl] +# CHECK-NEXT: 1 1 11.00 * st2w { z23.s, z24.s }, p3, [x13, #-16, mul vl] +# CHECK-NEXT: 1 1 11.00 * st2w { z5.s, z6.s }, p3, [x17, x16, lsl #2] +# CHECK-NEXT: 1 1 25.00 * st3b { z0.b - z2.b }, p0, [x0, x0] +# CHECK-NEXT: 1 1 25.00 * st3b { z0.b - z2.b }, p0, [x0] +# CHECK-NEXT: 1 1 25.00 * st3b { z21.b - z23.b }, p5, [x10, #15, mul vl] +# CHECK-NEXT: 1 1 25.00 * st3b { z23.b - z25.b }, p3, [x13, #-24, mul vl] +# CHECK-NEXT: 1 1 25.00 * st3b { z5.b - z7.b }, p3, [x17, x16] +# CHECK-NEXT: 1 1 14.00 * st3d { z0.d - z2.d }, p0, [x0, x0, lsl #3] +# CHECK-NEXT: 1 1 14.00 * st3d { z0.d - z2.d }, p0, [x0] +# CHECK-NEXT: 1 1 14.00 * st3d { z21.d - z23.d }, p5, [x10, #15, mul vl] +# CHECK-NEXT: 1 1 14.00 * st3d { z23.d - z25.d }, p3, [x13, #-24, mul vl] +# CHECK-NEXT: 1 1 14.00 * st3d { z5.d - z7.d }, p3, [x17, x16, lsl #3] +# CHECK-NEXT: 1 1 25.00 * st3h { z0.h - z2.h }, p0, [x0, x0, lsl #1] +# CHECK-NEXT: 1 1 25.00 * st3h { z0.h - z2.h }, p0, [x0] +# CHECK-NEXT: 1 1 25.00 * st3h { z21.h - z23.h }, p5, [x10, #15, mul vl] +# CHECK-NEXT: 1 1 25.00 * st3h { z23.h - z25.h }, p3, [x13, #-24, mul vl] +# CHECK-NEXT: 1 1 25.00 * st3h { z5.h - z7.h }, p3, [x17, x16, lsl #1] +# CHECK-NEXT: 1 1 25.00 * st3w { z0.s - z2.s }, p0, [x0, x0, lsl #2] +# CHECK-NEXT: 1 1 25.00 * st3w { z0.s - z2.s }, p0, [x0] +# CHECK-NEXT: 1 1 25.00 * st3w { z21.s - z23.s }, p5, [x10, #15, mul vl] +# CHECK-NEXT: 1 1 25.00 * st3w { z23.s - z25.s }, p3, [x13, #-24, mul vl] +# CHECK-NEXT: 1 1 25.00 * st3w { z5.s - z7.s }, p3, [x17, x16, lsl #2] +# CHECK-NEXT: 1 1 50.00 * st4b { z0.b - z3.b }, p0, [x0, x0] +# CHECK-NEXT: 1 1 50.00 * st4b { z0.b - z3.b }, p0, [x0] +# CHECK-NEXT: 1 1 50.00 * st4b { z21.b - z24.b }, p5, [x10, #20, mul vl] +# CHECK-NEXT: 1 1 50.00 * st4b { z23.b - z26.b }, p3, [x13, #-32, mul vl] +# CHECK-NEXT: 1 1 50.00 * st4b { z5.b - z8.b }, p3, [x17, x16] +# CHECK-NEXT: 1 1 25.00 * st4d { z0.d - z3.d }, p0, [x0, x0, lsl #3] +# CHECK-NEXT: 1 1 25.00 * st4d { z0.d - z3.d }, p0, [x0] +# CHECK-NEXT: 1 1 25.00 * st4d { z21.d - z24.d }, p5, [x10, #20, mul vl] +# CHECK-NEXT: 1 1 25.00 * st4d { z23.d - z26.d }, p3, [x13, #-32, mul vl] +# CHECK-NEXT: 1 1 25.00 * st4d { z5.d - z8.d }, p3, [x17, x16, lsl #3] +# CHECK-NEXT: 1 1 50.00 * st4h { z0.h - z3.h }, p0, [x0, x0, lsl #1] +# CHECK-NEXT: 1 1 50.00 * st4h { z0.h - z3.h }, p0, [x0] +# CHECK-NEXT: 1 1 50.00 * st4h { z21.h - z24.h }, p5, [x10, #20, mul vl] +# CHECK-NEXT: 1 1 50.00 * st4h { z23.h - z26.h }, p3, [x13, #-32, mul vl] +# CHECK-NEXT: 1 1 50.00 * st4h { z5.h - z8.h }, p3, [x17, x16, lsl #1] +# CHECK-NEXT: 1 1 50.00 * st4w { z0.s - z3.s }, p0, [x0, x0, lsl #2] +# CHECK-NEXT: 1 1 50.00 * st4w { z0.s - z3.s }, p0, [x0] +# CHECK-NEXT: 1 1 50.00 * st4w { z21.s - z24.s }, p5, [x10, #20, mul vl] +# CHECK-NEXT: 1 1 50.00 * st4w { z23.s - z26.s }, p3, [x13, #-32, mul vl] +# CHECK-NEXT: 1 1 50.00 * st4w { z5.s - z8.s }, p3, [x17, x16, lsl #2] +# CHECK-NEXT: 1 1 1.00 * stnt1b { z0.b }, p0, [x0, x0] +# CHECK-NEXT: 1 1 1.00 * stnt1b { z0.b }, p0, [x0] +# CHECK-NEXT: 1 1 7.00 * stnt1b { z0.d }, p0, [z1.d] +# CHECK-NEXT: 1 1 9.00 * stnt1b { z0.s }, p0, [z1.s] +# CHECK-NEXT: 1 1 1.00 * stnt1b { z21.b }, p5, [x10, #7, mul vl] +# CHECK-NEXT: 1 1 1.00 * stnt1b { z23.b }, p3, [x13, #-8, mul vl] +# CHECK-NEXT: 1 1 7.00 * stnt1b { z31.d }, p7, [z31.d, x0] +# CHECK-NEXT: 1 1 7.00 * stnt1b { z31.d }, p7, [z31.d] +# CHECK-NEXT: 1 1 9.00 * stnt1b { z31.s }, p7, [z31.s, x0] +# CHECK-NEXT: 1 1 9.00 * stnt1b { z31.s }, p7, [z31.s] +# CHECK-NEXT: 1 1 1.00 * stnt1d { z0.d }, p0, [x0, x0, lsl #3] +# CHECK-NEXT: 1 1 1.00 * stnt1d { z0.d }, p0, [x0] +# CHECK-NEXT: 1 1 7.00 * stnt1d { z0.d }, p0, [z1.d] +# CHECK-NEXT: 1 1 1.00 * stnt1d { z21.d }, p5, [x10, #7, mul vl] +# CHECK-NEXT: 1 1 1.00 * stnt1d { z23.d }, p3, [x13, #-8, mul vl] +# CHECK-NEXT: 1 1 7.00 * stnt1d { z31.d }, p7, [z31.d, x0] +# CHECK-NEXT: 1 1 7.00 * stnt1d { z31.d }, p7, [z31.d] +# CHECK-NEXT: 1 1 7.00 * stnt1h { z0.d }, p0, [z1.d] +# CHECK-NEXT: 1 1 1.00 * stnt1h { z0.h }, p0, [x0, x0, lsl #1] +# CHECK-NEXT: 1 1 1.00 * stnt1h { z0.h }, p0, [x0] +# CHECK-NEXT: 1 1 9.00 * stnt1h { z0.s }, p0, [z1.s] +# CHECK-NEXT: 1 1 1.00 * stnt1h { z21.h }, p5, [x10, #7, mul vl] +# CHECK-NEXT: 1 1 1.00 * stnt1h { z23.h }, p3, [x13, #-8, mul vl] +# CHECK-NEXT: 1 1 7.00 * stnt1h { z31.d }, p7, [z31.d, x0] +# CHECK-NEXT: 1 1 7.00 * stnt1h { z31.d }, p7, [z31.d] +# CHECK-NEXT: 1 1 9.00 * stnt1h { z31.s }, p7, [z31.s, x0] +# CHECK-NEXT: 1 1 9.00 * stnt1h { z31.s }, p7, [z31.s] +# CHECK-NEXT: 1 1 7.00 * stnt1w { z0.d }, p0, [z1.d] +# CHECK-NEXT: 1 1 1.00 * stnt1w { z0.s }, p0, [x0, x0, lsl #2] +# CHECK-NEXT: 1 1 1.00 * stnt1w { z0.s }, p0, [x0] +# CHECK-NEXT: 1 1 9.00 * stnt1w { z0.s }, p0, [z1.s] +# CHECK-NEXT: 1 1 1.00 * stnt1w { z21.s }, p5, [x10, #7, mul vl] +# CHECK-NEXT: 1 1 1.00 * stnt1w { z23.s }, p3, [x13, #-8, mul vl] +# CHECK-NEXT: 1 1 7.00 * stnt1w { z31.d }, p7, [z31.d, x0] +# CHECK-NEXT: 1 1 7.00 * stnt1w { z31.d }, p7, [z31.d] +# CHECK-NEXT: 1 1 9.00 * stnt1w { z31.s }, p7, [z31.s, x0] +# CHECK-NEXT: 1 1 9.00 * stnt1w { z31.s }, p7, [z31.s] +# CHECK-NEXT: 1 1 1.00 * str p0, [x0] +# CHECK-NEXT: 1 1 1.00 * str p15, [sp, #-256, mul vl] +# CHECK-NEXT: 1 1 1.00 * str p5, [x10, #255, mul vl] +# CHECK-NEXT: 1 1 1.00 * str z0, [x0] +# CHECK-NEXT: 1 1 1.00 * str z21, [x10, #-256, mul vl] +# CHECK-NEXT: 1 1 1.00 * str z31, [sp, #255, mul vl] +# CHECK-NEXT: 1 3 1.00 sub z0.b, p0/m, z0.b, z0.b +# CHECK-NEXT: 1 3 1.00 sub z0.b, z0.b, #0 +# CHECK-NEXT: 1 3 1.00 sub z0.b, z0.b, z0.b +# CHECK-NEXT: 1 3 1.00 sub z0.d, p0/m, z0.d, z0.d +# CHECK-NEXT: 1 3 1.00 sub z0.d, z0.d, #0 +# CHECK-NEXT: 1 3 1.00 sub z0.d, z0.d, #0, lsl #8 +# CHECK-NEXT: 1 3 1.00 sub z0.d, z0.d, z0.d +# CHECK-NEXT: 1 3 1.00 sub z0.h, p0/m, z0.h, z0.h +# CHECK-NEXT: 1 3 1.00 sub z0.h, z0.h, #0 +# CHECK-NEXT: 1 3 1.00 sub z0.h, z0.h, #0, lsl #8 +# CHECK-NEXT: 1 3 1.00 sub z0.h, z0.h, z0.h +# CHECK-NEXT: 1 3 1.00 sub z0.s, p0/m, z0.s, z0.s +# CHECK-NEXT: 1 3 1.00 sub z0.s, z0.s, #0 +# CHECK-NEXT: 1 3 1.00 sub z0.s, z0.s, #0, lsl #8 +# CHECK-NEXT: 1 3 1.00 sub z0.s, z0.s, z0.s +# CHECK-NEXT: 1 3 1.00 sub z21.b, p5/m, z21.b, z10.b +# CHECK-NEXT: 1 3 1.00 sub z21.b, z10.b, z21.b +# CHECK-NEXT: 1 3 1.00 sub z21.d, p5/m, z21.d, z10.d +# CHECK-NEXT: 1 3 1.00 sub z21.d, z10.d, z21.d +# CHECK-NEXT: 1 3 1.00 sub z21.h, p5/m, z21.h, z10.h +# CHECK-NEXT: 1 3 1.00 sub z21.h, z10.h, z21.h +# CHECK-NEXT: 1 3 1.00 sub z21.s, p5/m, z21.s, z10.s +# CHECK-NEXT: 1 3 1.00 sub z21.s, z10.s, z21.s +# CHECK-NEXT: 1 3 1.00 sub z23.b, p3/m, z23.b, z13.b +# CHECK-NEXT: 1 3 1.00 sub z23.b, z13.b, z8.b +# CHECK-NEXT: 1 3 1.00 sub z23.d, p3/m, z23.d, z13.d +# CHECK-NEXT: 1 3 1.00 sub z23.d, z13.d, z8.d +# CHECK-NEXT: 1 3 1.00 sub z23.h, p3/m, z23.h, z13.h +# CHECK-NEXT: 1 3 1.00 sub z23.h, z13.h, z8.h +# CHECK-NEXT: 1 3 1.00 sub z23.s, p3/m, z23.s, z13.s +# CHECK-NEXT: 1 3 1.00 sub z23.s, z13.s, z8.s +# CHECK-NEXT: 1 3 1.00 sub z31.b, p7/m, z31.b, z31.b +# CHECK-NEXT: 1 3 1.00 sub z31.b, z31.b, #255 +# CHECK-NEXT: 1 3 1.00 sub z31.b, z31.b, z31.b +# CHECK-NEXT: 1 3 1.00 sub z31.d, p7/m, z31.d, z31.d +# CHECK-NEXT: 1 3 1.00 sub z31.d, z31.d, #65280 +# CHECK-NEXT: 1 3 1.00 sub z31.d, z31.d, z31.d +# CHECK-NEXT: 1 3 1.00 sub z31.h, p7/m, z31.h, z31.h +# CHECK-NEXT: 1 3 1.00 sub z31.h, z31.h, #65280 +# CHECK-NEXT: 1 3 1.00 sub z31.h, z31.h, z31.h +# CHECK-NEXT: 1 3 1.00 sub z31.s, p7/m, z31.s, z31.s +# CHECK-NEXT: 1 3 1.00 sub z31.s, z31.s, #65280 +# CHECK-NEXT: 1 3 1.00 sub z31.s, z31.s, z31.s +# CHECK-NEXT: 1 8 1.00 subhnb z0.b, z1.h, z31.h +# CHECK-NEXT: 1 8 1.00 subhnb z0.h, z1.s, z31.s +# CHECK-NEXT: 1 8 1.00 subhnb z0.s, z1.d, z31.d +# CHECK-NEXT: 1 8 1.00 subhnt z0.b, z1.h, z31.h +# CHECK-NEXT: 1 8 1.00 subhnt z0.h, z1.s, z31.s +# CHECK-NEXT: 1 8 1.00 subhnt z0.s, z1.d, z31.d +# CHECK-NEXT: 1 3 1.00 subr z0.b, p0/m, z0.b, z0.b +# CHECK-NEXT: 1 3 1.00 subr z0.b, z0.b, #0 +# CHECK-NEXT: 1 3 1.00 subr z0.d, p0/m, z0.d, z0.d +# CHECK-NEXT: 1 3 1.00 subr z0.d, z0.d, #0 +# CHECK-NEXT: 1 3 1.00 subr z0.d, z0.d, #0, lsl #8 +# CHECK-NEXT: 1 3 1.00 subr z0.h, p0/m, z0.h, z0.h +# CHECK-NEXT: 1 3 1.00 subr z0.h, z0.h, #0 +# CHECK-NEXT: 1 3 1.00 subr z0.h, z0.h, #0, lsl #8 +# CHECK-NEXT: 1 3 1.00 subr z0.s, p0/m, z0.s, z0.s +# CHECK-NEXT: 1 3 1.00 subr z0.s, z0.s, #0 +# CHECK-NEXT: 1 3 1.00 subr z0.s, z0.s, #0, lsl #8 +# CHECK-NEXT: 1 3 1.00 subr z31.b, z31.b, #255 +# CHECK-NEXT: 1 3 1.00 subr z31.d, z31.d, #65280 +# CHECK-NEXT: 1 3 1.00 subr z31.h, z31.h, #65280 +# CHECK-NEXT: 1 3 1.00 subr z31.s, z31.s, #65280 +# CHECK-NEXT: 1 4 1.00 sunpkhi z31.d, z31.s +# CHECK-NEXT: 1 4 1.00 sunpkhi z31.h, z31.b +# CHECK-NEXT: 1 4 1.00 sunpkhi z31.s, z31.h +# CHECK-NEXT: 1 4 1.00 sunpklo z31.d, z31.s +# CHECK-NEXT: 1 4 1.00 sunpklo z31.h, z31.b +# CHECK-NEXT: 1 4 1.00 sunpklo z31.s, z31.h +# CHECK-NEXT: 1 4 1.00 suqadd z0.b, p0/m, z0.b, z1.b +# CHECK-NEXT: 1 4 1.00 suqadd z0.h, p0/m, z0.h, z1.h +# CHECK-NEXT: 1 4 1.00 suqadd z29.s, p7/m, z29.s, z30.s +# CHECK-NEXT: 1 4 1.00 suqadd z31.d, p7/m, z31.d, z30.d +# CHECK-NEXT: 1 3 1.00 sxtb z0.d, p0/m, z0.d +# CHECK-NEXT: 1 3 1.00 sxtb z0.h, p0/m, z0.h +# CHECK-NEXT: 1 3 1.00 sxtb z0.s, p0/m, z0.s +# CHECK-NEXT: 1 3 1.00 sxtb z31.d, p7/m, z31.d +# CHECK-NEXT: 1 3 1.00 sxtb z31.h, p7/m, z31.h +# CHECK-NEXT: 1 3 1.00 sxtb z31.s, p7/m, z31.s +# CHECK-NEXT: 1 3 1.00 sxth z0.d, p0/m, z0.d +# CHECK-NEXT: 1 3 1.00 sxth z0.s, p0/m, z0.s +# CHECK-NEXT: 1 3 1.00 sxth z31.d, p7/m, z31.d +# CHECK-NEXT: 1 3 1.00 sxth z31.s, p7/m, z31.s +# CHECK-NEXT: 1 3 1.00 sxtw z0.d, p0/m, z0.d +# CHECK-NEXT: 1 3 1.00 sxtw z31.d, p7/m, z31.d +# CHECK-NEXT: 1 4 1.00 tbl z28.b, { z29.b, z30.b }, z31.b +# CHECK-NEXT: 1 4 1.00 tbl z28.d, { z29.d, z30.d }, z31.d +# CHECK-NEXT: 1 4 1.00 tbl z28.h, { z29.h, z30.h }, z31.h +# CHECK-NEXT: 1 4 1.00 tbl z28.s, { z29.s, z30.s }, z31.s +# CHECK-NEXT: 1 4 1.00 tbl z31.b, { z31.b }, z31.b +# CHECK-NEXT: 1 4 1.00 tbl z31.d, { z31.d }, z31.d +# CHECK-NEXT: 1 4 1.00 tbl z31.h, { z31.h }, z31.h +# CHECK-NEXT: 1 4 1.00 tbl z31.s, { z31.s }, z31.s +# CHECK-NEXT: 1 8 1.00 tbx z31.b, z31.b, z31.b +# CHECK-NEXT: 1 8 1.00 tbx z31.d, z31.d, z31.d +# CHECK-NEXT: 1 8 1.00 tbx z31.h, z31.h, z31.h +# CHECK-NEXT: 1 8 1.00 tbx z31.s, z31.s, z31.s +# CHECK-NEXT: 1 2 1.00 trn1 p15.b, p15.b, p15.b +# CHECK-NEXT: 1 2 1.00 trn1 p15.d, p15.d, p15.d +# CHECK-NEXT: 1 2 1.00 trn1 p15.h, p15.h, p15.h +# CHECK-NEXT: 1 2 1.00 trn1 p15.s, p15.s, p15.s +# CHECK-NEXT: 1 3 1.00 trn1 z31.b, z31.b, z31.b +# CHECK-NEXT: 1 3 1.00 trn1 z31.d, z31.d, z31.d +# CHECK-NEXT: 1 3 1.00 trn1 z31.h, z31.h, z31.h +# CHECK-NEXT: 1 3 1.00 trn1 z31.s, z31.s, z31.s +# CHECK-NEXT: 1 2 1.00 trn2 p15.b, p15.b, p15.b +# CHECK-NEXT: 1 2 1.00 trn2 p15.d, p15.d, p15.d +# CHECK-NEXT: 1 2 1.00 trn2 p15.h, p15.h, p15.h +# CHECK-NEXT: 1 2 1.00 trn2 p15.s, p15.s, p15.s +# CHECK-NEXT: 1 3 1.00 trn2 z31.b, z31.b, z31.b +# CHECK-NEXT: 1 3 1.00 trn2 z31.d, z31.d, z31.d +# CHECK-NEXT: 1 3 1.00 trn2 z31.h, z31.h, z31.h +# CHECK-NEXT: 1 3 1.00 trn2 z31.s, z31.s, z31.s +# CHECK-NEXT: 1 6 2.00 uaba z0.b, z1.b, z31.b +# CHECK-NEXT: 1 6 2.00 uaba z0.d, z1.d, z31.d +# CHECK-NEXT: 1 6 2.00 uaba z0.h, z1.h, z31.h +# CHECK-NEXT: 1 6 2.00 uaba z0.s, z1.s, z31.s +# CHECK-NEXT: 1 6 2.00 uabalb z0.d, z1.s, z31.s +# CHECK-NEXT: 1 6 2.00 uabalb z0.h, z1.b, z31.b +# CHECK-NEXT: 1 6 2.00 uabalb z0.s, z1.h, z31.h +# CHECK-NEXT: 1 6 2.00 uabalt z0.d, z1.s, z31.s +# CHECK-NEXT: 1 6 2.00 uabalt z0.h, z1.b, z31.b +# CHECK-NEXT: 1 6 2.00 uabalt z0.s, z1.h, z31.h +# CHECK-NEXT: 1 3 1.00 uabd z31.b, p7/m, z31.b, z31.b +# CHECK-NEXT: 1 3 1.00 uabd z31.d, p7/m, z31.d, z31.d +# CHECK-NEXT: 1 3 1.00 uabd z31.h, p7/m, z31.h, z31.h +# CHECK-NEXT: 1 3 1.00 uabd z31.s, p7/m, z31.s, z31.s +# CHECK-NEXT: 1 3 1.00 uabdlb z0.h, z1.b, z2.b +# CHECK-NEXT: 1 3 1.00 uabdlb z29.s, z30.h, z31.h +# CHECK-NEXT: 1 3 1.00 uabdlb z31.d, z31.s, z31.s +# CHECK-NEXT: 1 3 1.00 uabdlt z0.h, z1.b, z2.b +# CHECK-NEXT: 1 3 1.00 uabdlt z29.s, z30.h, z31.h +# CHECK-NEXT: 1 3 1.00 uabdlt z31.d, z31.s, z31.s +# CHECK-NEXT: 1 7 2.00 uadalp z0.h, p0/m, z1.b +# CHECK-NEXT: 1 7 2.00 uadalp z29.s, p0/m, z30.h +# CHECK-NEXT: 1 7 2.00 uadalp z30.d, p7/m, z31.s +# CHECK-NEXT: 1 4 1.00 uaddlb z0.h, z1.b, z2.b +# CHECK-NEXT: 1 4 1.00 uaddlb z29.s, z30.h, z31.h +# CHECK-NEXT: 1 4 1.00 uaddlb z31.d, z31.s, z31.s +# CHECK-NEXT: 1 4 1.00 uaddlt z0.h, z1.b, z2.b +# CHECK-NEXT: 1 4 1.00 uaddlt z29.s, z30.h, z31.h +# CHECK-NEXT: 1 4 1.00 uaddlt z31.d, z31.s, z31.s +# CHECK-NEXT: 1 4 1.00 uaddv d0, p7, z31.b +# CHECK-NEXT: 1 4 1.00 uaddv d0, p7, z31.d +# CHECK-NEXT: 1 4 1.00 uaddv d0, p7, z31.h +# CHECK-NEXT: 1 4 1.00 uaddv d0, p7, z31.s +# CHECK-NEXT: 1 4 1.00 uaddwb z0.h, z1.h, z2.b +# CHECK-NEXT: 1 4 1.00 uaddwb z29.s, z30.s, z31.h +# CHECK-NEXT: 1 4 1.00 uaddwb z31.d, z31.d, z31.s +# CHECK-NEXT: 1 4 1.00 uaddwt z0.h, z1.h, z2.b +# CHECK-NEXT: 1 4 1.00 uaddwt z29.s, z30.s, z31.h +# CHECK-NEXT: 1 4 1.00 uaddwt z31.d, z31.d, z31.s +# CHECK-NEXT: 1 4 1.00 ucvtf z0.d, p0/m, z0.d +# CHECK-NEXT: 1 4 1.00 ucvtf z0.d, p0/m, z0.s +# CHECK-NEXT: 1 4 1.00 ucvtf z0.h, p0/m, z0.d +# CHECK-NEXT: 1 4 1.00 ucvtf z0.h, p0/m, z0.h +# CHECK-NEXT: 1 4 1.00 ucvtf z0.h, p0/m, z0.s +# CHECK-NEXT: 1 4 1.00 ucvtf z0.s, p0/m, z0.d +# CHECK-NEXT: 1 4 1.00 ucvtf z0.s, p0/m, z0.s +# CHECK-NEXT: 1 26 23.00 udiv z0.d, p7/m, z0.d, z31.d +# CHECK-NEXT: 1 15 12.00 udiv z0.s, p7/m, z0.s, z31.s +# CHECK-NEXT: 1 26 23.00 udivr z0.d, p7/m, z0.d, z31.d +# CHECK-NEXT: 1 15 12.00 udivr z0.s, p7/m, z0.s, z31.s +# CHECK-NEXT: 1 4 1.00 udot z0.d, z1.h, z15.h[1] +# CHECK-NEXT: 1 4 1.00 udot z0.d, z1.h, z31.h +# CHECK-NEXT: 1 4 1.00 udot z0.s, z1.b, z31.b +# CHECK-NEXT: 1 4 1.00 udot z0.s, z1.b, z7.b[3] +# CHECK-NEXT: 1 3 1.00 uhadd z0.b, p0/m, z0.b, z1.b +# CHECK-NEXT: 1 3 1.00 uhadd z0.h, p0/m, z0.h, z1.h +# CHECK-NEXT: 1 3 1.00 uhadd z29.s, p7/m, z29.s, z30.s +# CHECK-NEXT: 1 3 1.00 uhadd z31.d, p7/m, z31.d, z30.d +# CHECK-NEXT: 1 3 1.00 uhsub z0.b, p0/m, z0.b, z1.b +# CHECK-NEXT: 1 3 1.00 uhsub z0.h, p0/m, z0.h, z1.h +# CHECK-NEXT: 1 3 1.00 uhsub z29.s, p7/m, z29.s, z30.s +# CHECK-NEXT: 1 3 1.00 uhsub z31.d, p7/m, z31.d, z30.d +# CHECK-NEXT: 1 3 1.00 uhsubr z0.b, p0/m, z0.b, z1.b +# CHECK-NEXT: 1 3 1.00 uhsubr z0.h, p0/m, z0.h, z1.h +# CHECK-NEXT: 1 3 1.00 uhsubr z29.s, p7/m, z29.s, z30.s +# CHECK-NEXT: 1 3 1.00 uhsubr z31.d, p7/m, z31.d, z30.d +# CHECK-NEXT: 1 3 1.00 umax z0.b, z0.b, #0 +# CHECK-NEXT: 1 3 1.00 umax z31.b, p7/m, z31.b, z31.b +# CHECK-NEXT: 1 3 1.00 umax z31.b, z31.b, #255 +# CHECK-NEXT: 1 3 1.00 umax z31.d, p7/m, z31.d, z31.d +# CHECK-NEXT: 1 3 1.00 umax z31.h, p7/m, z31.h, z31.h +# CHECK-NEXT: 1 3 1.00 umax z31.s, p7/m, z31.s, z31.s +# CHECK-NEXT: 1 3 1.00 umaxp z0.b, p0/m, z0.b, z1.b +# CHECK-NEXT: 1 3 1.00 umaxp z0.h, p0/m, z0.h, z1.h +# CHECK-NEXT: 1 3 1.00 umaxp z29.s, p7/m, z29.s, z30.s +# CHECK-NEXT: 1 3 1.00 umaxp z31.d, p7/m, z31.d, z30.d +# CHECK-NEXT: 1 4 1.00 umaxv b0, p7, z31.b +# CHECK-NEXT: 1 4 1.00 umaxv d0, p7, z31.d +# CHECK-NEXT: 1 4 1.00 umaxv h0, p7, z31.h +# CHECK-NEXT: 1 4 1.00 umaxv s0, p7, z31.s +# CHECK-NEXT: 1 3 1.00 umin z0.b, z0.b, #0 +# CHECK-NEXT: 1 3 1.00 umin z31.b, p7/m, z31.b, z31.b +# CHECK-NEXT: 1 3 1.00 umin z31.b, z31.b, #255 +# CHECK-NEXT: 1 3 1.00 umin z31.d, p7/m, z31.d, z31.d +# CHECK-NEXT: 1 3 1.00 umin z31.h, p7/m, z31.h, z31.h +# CHECK-NEXT: 1 3 1.00 umin z31.s, p7/m, z31.s, z31.s +# CHECK-NEXT: 1 3 1.00 uminp z0.b, p0/m, z0.b, z1.b +# CHECK-NEXT: 1 3 1.00 uminp z0.h, p0/m, z0.h, z1.h +# CHECK-NEXT: 1 3 1.00 uminp z29.s, p7/m, z29.s, z30.s +# CHECK-NEXT: 1 3 1.00 uminp z31.d, p7/m, z31.d, z30.d +# CHECK-NEXT: 1 4 1.00 uminv b0, p7, z31.b +# CHECK-NEXT: 1 4 1.00 uminv d0, p7, z31.d +# CHECK-NEXT: 1 4 1.00 uminv h0, p7, z31.h +# CHECK-NEXT: 1 4 1.00 uminv s0, p7, z31.s +# CHECK-NEXT: 1 4 1.00 umlalb z0.d, z1.s, z15.s[1] +# CHECK-NEXT: 1 4 1.00 umlalb z0.d, z1.s, z31.s +# CHECK-NEXT: 1 4 1.00 umlalb z0.h, z1.b, z31.b +# CHECK-NEXT: 1 4 1.00 umlalb z0.s, z1.h, z31.h +# CHECK-NEXT: 1 4 1.00 umlalb z0.s, z1.h, z7.h[7] +# CHECK-NEXT: 1 4 1.00 umlalt z0.d, z1.s, z15.s[1] +# CHECK-NEXT: 1 4 1.00 umlalt z0.d, z1.s, z31.s +# CHECK-NEXT: 1 4 1.00 umlalt z0.h, z1.b, z31.b +# CHECK-NEXT: 1 4 1.00 umlalt z0.s, z1.h, z31.h +# CHECK-NEXT: 1 4 1.00 umlalt z0.s, z1.h, z7.h[7] +# CHECK-NEXT: 1 4 1.00 umlslb z0.d, z1.s, z15.s[1] +# CHECK-NEXT: 1 4 1.00 umlslb z0.d, z1.s, z31.s +# CHECK-NEXT: 1 4 1.00 umlslb z0.h, z1.b, z31.b +# CHECK-NEXT: 1 4 1.00 umlslb z0.s, z1.h, z31.h +# CHECK-NEXT: 1 4 1.00 umlslb z0.s, z1.h, z7.h[7] +# CHECK-NEXT: 1 4 1.00 umlslt z0.d, z1.s, z15.s[1] +# CHECK-NEXT: 1 4 1.00 umlslt z0.d, z1.s, z31.s +# CHECK-NEXT: 1 4 1.00 umlslt z0.h, z1.b, z31.b +# CHECK-NEXT: 1 4 1.00 umlslt z0.s, z1.h, z31.h +# CHECK-NEXT: 1 4 1.00 umlslt z0.s, z1.h, z7.h[7] +# CHECK-NEXT: 1 4 1.00 ummla z0.s, z1.b, z2.b +# CHECK-NEXT: 1 4 1.00 umulh z0.b, p7/m, z0.b, z31.b +# CHECK-NEXT: 1 4 1.00 umulh z0.b, z1.b, z2.b +# CHECK-NEXT: 1 4 1.00 umulh z0.d, p7/m, z0.d, z31.d +# CHECK-NEXT: 1 4 1.00 umulh z0.h, p7/m, z0.h, z31.h +# CHECK-NEXT: 1 4 1.00 umulh z0.h, z1.h, z2.h +# CHECK-NEXT: 1 4 1.00 umulh z0.s, p7/m, z0.s, z31.s +# CHECK-NEXT: 1 4 1.00 umulh z29.s, z30.s, z31.s +# CHECK-NEXT: 1 4 1.00 umulh z31.d, z31.d, z31.d +# CHECK-NEXT: 1 4 1.00 umullb z0.d, z1.s, z15.s[1] +# CHECK-NEXT: 1 4 1.00 umullb z0.h, z1.b, z2.b +# CHECK-NEXT: 1 4 1.00 umullb z0.s, z1.h, z7.h[7] +# CHECK-NEXT: 1 4 1.00 umullb z29.s, z30.h, z31.h +# CHECK-NEXT: 1 4 1.00 umullb z31.d, z31.s, z31.s +# CHECK-NEXT: 1 4 1.00 umullt z0.d, z1.s, z15.s[1] +# CHECK-NEXT: 1 4 1.00 umullt z0.h, z1.b, z2.b +# CHECK-NEXT: 1 4 1.00 umullt z0.s, z1.h, z7.h[7] +# CHECK-NEXT: 1 4 1.00 umullt z29.s, z30.h, z31.h +# CHECK-NEXT: 1 4 1.00 umullt z31.d, z31.s, z31.s +# CHECK-NEXT: 1 4 1.00 uqadd z0.b, p0/m, z0.b, z1.b +# CHECK-NEXT: 1 4 1.00 uqadd z0.b, z0.b, #0 +# CHECK-NEXT: 1 4 1.00 uqadd z0.b, z0.b, z0.b +# CHECK-NEXT: 1 4 1.00 uqadd z0.d, z0.d, #0 +# CHECK-NEXT: 1 4 1.00 uqadd z0.d, z0.d, #0, lsl #8 +# CHECK-NEXT: 1 4 1.00 uqadd z0.d, z0.d, z0.d +# CHECK-NEXT: 1 4 1.00 uqadd z0.h, p0/m, z0.h, z1.h +# CHECK-NEXT: 1 4 1.00 uqadd z0.h, z0.h, #0 +# CHECK-NEXT: 1 4 1.00 uqadd z0.h, z0.h, #0, lsl #8 +# CHECK-NEXT: 1 4 1.00 uqadd z0.h, z0.h, z0.h +# CHECK-NEXT: 1 4 1.00 uqadd z0.s, z0.s, #0 +# CHECK-NEXT: 1 4 1.00 uqadd z0.s, z0.s, #0, lsl #8 +# CHECK-NEXT: 1 4 1.00 uqadd z0.s, z0.s, z0.s +# CHECK-NEXT: 1 4 1.00 uqadd z29.s, p7/m, z29.s, z30.s +# CHECK-NEXT: 1 4 1.00 uqadd z31.b, z31.b, #255 +# CHECK-NEXT: 1 4 1.00 uqadd z31.d, p7/m, z31.d, z30.d +# CHECK-NEXT: 1 4 1.00 uqadd z31.d, z31.d, #65280 +# CHECK-NEXT: 1 4 1.00 uqadd z31.h, z31.h, #65280 +# CHECK-NEXT: 1 4 1.00 uqadd z31.s, z31.s, #65280 +# CHECK-NEXT: 1 5 1.00 uqdecb w0 +# CHECK-NEXT: 1 5 1.00 uqdecb w0, all, mul #16 +# CHECK-NEXT: 1 5 1.00 uqdecb w0, pow2 +# CHECK-NEXT: 1 5 1.00 uqdecb w0, pow2, mul #16 +# CHECK-NEXT: 1 5 1.00 uqdecb x0 +# CHECK-NEXT: 1 5 1.00 uqdecb x0, #14 +# CHECK-NEXT: 1 5 1.00 uqdecb x0, all, mul #16 +# CHECK-NEXT: 1 5 1.00 uqdecb x0, pow2 +# CHECK-NEXT: 1 5 1.00 uqdecb x0, vl1 +# CHECK-NEXT: 1 5 1.00 uqdecd w0 +# CHECK-NEXT: 1 5 1.00 uqdecd w0, all, mul #16 +# CHECK-NEXT: 1 5 1.00 uqdecd w0, pow2 +# CHECK-NEXT: 1 5 1.00 uqdecd w0, pow2, mul #16 +# CHECK-NEXT: 1 5 1.00 uqdecd x0 +# CHECK-NEXT: 1 5 1.00 uqdecd x0, #14 +# CHECK-NEXT: 1 5 1.00 uqdecd x0, all, mul #16 +# CHECK-NEXT: 1 5 1.00 uqdecd x0, pow2 +# CHECK-NEXT: 1 5 1.00 uqdecd x0, vl1 +# CHECK-NEXT: 1 4 1.00 uqdecd z0.d +# CHECK-NEXT: 1 4 1.00 uqdecd z0.d, all, mul #16 +# CHECK-NEXT: 1 4 1.00 uqdecd z0.d, pow2 +# CHECK-NEXT: 1 4 1.00 uqdecd z0.d, pow2, mul #16 +# CHECK-NEXT: 1 5 1.00 uqdech w0 +# CHECK-NEXT: 1 5 1.00 uqdech w0, all, mul #16 +# CHECK-NEXT: 1 5 1.00 uqdech w0, pow2 +# CHECK-NEXT: 1 5 1.00 uqdech w0, pow2, mul #16 +# CHECK-NEXT: 1 5 1.00 uqdech x0 +# CHECK-NEXT: 1 5 1.00 uqdech x0, #14 +# CHECK-NEXT: 1 5 1.00 uqdech x0, all, mul #16 +# CHECK-NEXT: 1 5 1.00 uqdech x0, pow2 +# CHECK-NEXT: 1 5 1.00 uqdech x0, vl1 +# CHECK-NEXT: 1 4 1.00 uqdech z0.h +# CHECK-NEXT: 1 4 1.00 uqdech z0.h, all, mul #16 +# CHECK-NEXT: 1 4 1.00 uqdech z0.h, pow2 +# CHECK-NEXT: 1 4 1.00 uqdech z0.h, pow2, mul #16 +# CHECK-NEXT: 1 9 1.00 uqdecp wzr, p15.b +# CHECK-NEXT: 1 9 1.00 uqdecp wzr, p15.d +# CHECK-NEXT: 1 9 1.00 uqdecp wzr, p15.h +# CHECK-NEXT: 1 9 1.00 uqdecp wzr, p15.s +# CHECK-NEXT: 1 9 1.00 uqdecp x0, p0.b +# CHECK-NEXT: 1 9 1.00 uqdecp x0, p0.d +# CHECK-NEXT: 1 9 1.00 uqdecp x0, p0.h +# CHECK-NEXT: 1 9 1.00 uqdecp x0, p0.s +# CHECK-NEXT: 1 3 1.00 uqdecp z0.d, p0.d +# CHECK-NEXT: 1 3 1.00 uqdecp z0.h, p0.h +# CHECK-NEXT: 1 3 1.00 uqdecp z0.s, p0.s +# CHECK-NEXT: 1 5 1.00 uqdecw w0 +# CHECK-NEXT: 1 5 1.00 uqdecw w0, all, mul #16 +# CHECK-NEXT: 1 5 1.00 uqdecw w0, pow2 +# CHECK-NEXT: 1 5 1.00 uqdecw w0, pow2, mul #16 +# CHECK-NEXT: 1 5 1.00 uqdecw x0 +# CHECK-NEXT: 1 5 1.00 uqdecw x0, #14 +# CHECK-NEXT: 1 5 1.00 uqdecw x0, all, mul #16 +# CHECK-NEXT: 1 5 1.00 uqdecw x0, pow2 +# CHECK-NEXT: 1 5 1.00 uqdecw x0, vl1 +# CHECK-NEXT: 1 4 1.00 uqdecw z0.s +# CHECK-NEXT: 1 4 1.00 uqdecw z0.s, all, mul #16 +# CHECK-NEXT: 1 4 1.00 uqdecw z0.s, pow2 +# CHECK-NEXT: 1 4 1.00 uqdecw z0.s, pow2, mul #16 +# CHECK-NEXT: 1 5 1.00 uqincb w0 +# CHECK-NEXT: 1 5 1.00 uqincb w0, all, mul #16 +# CHECK-NEXT: 1 5 1.00 uqincb w0, pow2 +# CHECK-NEXT: 1 5 1.00 uqincb w0, pow2, mul #16 +# CHECK-NEXT: 1 5 1.00 uqincb x0 +# CHECK-NEXT: 1 5 1.00 uqincb x0, #14 +# CHECK-NEXT: 1 5 1.00 uqincb x0, all, mul #16 +# CHECK-NEXT: 1 5 1.00 uqincb x0, pow2 +# CHECK-NEXT: 1 5 1.00 uqincb x0, vl1 +# CHECK-NEXT: 1 5 1.00 uqincd w0 +# CHECK-NEXT: 1 5 1.00 uqincd w0, all, mul #16 +# CHECK-NEXT: 1 5 1.00 uqincd w0, pow2 +# CHECK-NEXT: 1 5 1.00 uqincd w0, pow2, mul #16 +# CHECK-NEXT: 1 5 1.00 uqincd x0 +# CHECK-NEXT: 1 5 1.00 uqincd x0, #14 +# CHECK-NEXT: 1 5 1.00 uqincd x0, all, mul #16 +# CHECK-NEXT: 1 5 1.00 uqincd x0, pow2 +# CHECK-NEXT: 1 5 1.00 uqincd x0, vl1 +# CHECK-NEXT: 1 4 1.00 uqincd z0.d +# CHECK-NEXT: 1 4 1.00 uqincd z0.d, all, mul #16 +# CHECK-NEXT: 1 4 1.00 uqincd z0.d, pow2 +# CHECK-NEXT: 1 4 1.00 uqincd z0.d, pow2, mul #16 +# CHECK-NEXT: 1 5 1.00 uqinch w0 +# CHECK-NEXT: 1 5 1.00 uqinch w0, all, mul #16 +# CHECK-NEXT: 1 5 1.00 uqinch w0, pow2 +# CHECK-NEXT: 1 5 1.00 uqinch w0, pow2, mul #16 +# CHECK-NEXT: 1 5 1.00 uqinch x0 +# CHECK-NEXT: 1 5 1.00 uqinch x0, #14 +# CHECK-NEXT: 1 5 1.00 uqinch x0, all, mul #16 +# CHECK-NEXT: 1 5 1.00 uqinch x0, pow2 +# CHECK-NEXT: 1 5 1.00 uqinch x0, vl1 +# CHECK-NEXT: 1 4 1.00 uqinch z0.h +# CHECK-NEXT: 1 4 1.00 uqinch z0.h, all, mul #16 +# CHECK-NEXT: 1 4 1.00 uqinch z0.h, pow2 +# CHECK-NEXT: 1 4 1.00 uqinch z0.h, pow2, mul #16 +# CHECK-NEXT: 1 9 1.00 uqincp wzr, p15.b +# CHECK-NEXT: 1 9 1.00 uqincp wzr, p15.d +# CHECK-NEXT: 1 9 1.00 uqincp wzr, p15.h +# CHECK-NEXT: 1 9 1.00 uqincp wzr, p15.s +# CHECK-NEXT: 1 9 1.00 uqincp x0, p0.b +# CHECK-NEXT: 1 9 1.00 uqincp x0, p0.d +# CHECK-NEXT: 1 9 1.00 uqincp x0, p0.h +# CHECK-NEXT: 1 9 1.00 uqincp x0, p0.s +# CHECK-NEXT: 1 3 1.00 uqincp z0.d, p0.d +# CHECK-NEXT: 1 3 1.00 uqincp z0.h, p0.h +# CHECK-NEXT: 1 3 1.00 uqincp z0.s, p0.s +# CHECK-NEXT: 1 5 1.00 uqincw w0 +# CHECK-NEXT: 1 5 1.00 uqincw w0, all, mul #16 +# CHECK-NEXT: 1 5 1.00 uqincw w0, pow2 +# CHECK-NEXT: 1 5 1.00 uqincw w0, pow2, mul #16 +# CHECK-NEXT: 1 5 1.00 uqincw x0 +# CHECK-NEXT: 1 5 1.00 uqincw x0, #14 +# CHECK-NEXT: 1 5 1.00 uqincw x0, all, mul #16 +# CHECK-NEXT: 1 5 1.00 uqincw x0, pow2 +# CHECK-NEXT: 1 5 1.00 uqincw x0, vl1 +# CHECK-NEXT: 1 4 1.00 uqincw z0.s +# CHECK-NEXT: 1 4 1.00 uqincw z0.s, all, mul #16 +# CHECK-NEXT: 1 4 1.00 uqincw z0.s, pow2 +# CHECK-NEXT: 1 4 1.00 uqincw z0.s, pow2, mul #16 +# CHECK-NEXT: 1 4 1.00 uqrshl z0.b, p0/m, z0.b, z1.b +# CHECK-NEXT: 1 4 1.00 uqrshl z0.h, p0/m, z0.h, z1.h +# CHECK-NEXT: 1 4 1.00 uqrshl z29.s, p7/m, z29.s, z30.s +# CHECK-NEXT: 1 4 1.00 uqrshl z31.d, p7/m, z31.d, z30.d +# CHECK-NEXT: 1 4 1.00 uqrshlr z0.b, p0/m, z0.b, z1.b +# CHECK-NEXT: 1 4 1.00 uqrshlr z0.h, p0/m, z0.h, z1.h +# CHECK-NEXT: 1 4 1.00 uqrshlr z29.s, p7/m, z29.s, z30.s +# CHECK-NEXT: 1 4 1.00 uqrshlr z31.d, p7/m, z31.d, z30.d +# CHECK-NEXT: 1 4 1.00 uqrshrnb z0.b, z0.h, #1 +# CHECK-NEXT: 1 4 1.00 uqrshrnb z0.h, z0.s, #1 +# CHECK-NEXT: 1 4 1.00 uqrshrnb z0.s, z0.d, #1 +# CHECK-NEXT: 1 4 1.00 uqrshrnb z31.b, z31.h, #8 +# CHECK-NEXT: 1 4 1.00 uqrshrnb z31.h, z31.s, #16 +# CHECK-NEXT: 1 4 1.00 uqrshrnb z31.s, z31.d, #32 +# CHECK-NEXT: 1 4 1.00 uqrshrnt z0.b, z0.h, #1 +# CHECK-NEXT: 1 4 1.00 uqrshrnt z0.h, z0.s, #1 +# CHECK-NEXT: 1 4 1.00 uqrshrnt z0.s, z0.d, #1 +# CHECK-NEXT: 1 4 1.00 uqrshrnt z31.b, z31.h, #8 +# CHECK-NEXT: 1 4 1.00 uqrshrnt z31.h, z31.s, #16 +# CHECK-NEXT: 1 4 1.00 uqrshrnt z31.s, z31.d, #32 +# CHECK-NEXT: 1 4 1.00 uqshl z0.b, p0/m, z0.b, #0 +# CHECK-NEXT: 1 4 1.00 uqshl z0.b, p0/m, z0.b, z1.b +# CHECK-NEXT: 1 4 1.00 uqshl z0.d, p0/m, z0.d, #0 +# CHECK-NEXT: 1 4 1.00 uqshl z0.h, p0/m, z0.h, #0 +# CHECK-NEXT: 1 4 1.00 uqshl z0.h, p0/m, z0.h, z1.h +# CHECK-NEXT: 1 4 1.00 uqshl z0.s, p0/m, z0.s, #0 +# CHECK-NEXT: 1 4 1.00 uqshl z29.s, p7/m, z29.s, z30.s +# CHECK-NEXT: 1 4 1.00 uqshl z31.b, p0/m, z31.b, #7 +# CHECK-NEXT: 1 4 1.00 uqshl z31.d, p0/m, z31.d, #63 +# CHECK-NEXT: 1 4 1.00 uqshl z31.d, p7/m, z31.d, z30.d +# CHECK-NEXT: 1 4 1.00 uqshl z31.h, p0/m, z31.h, #15 +# CHECK-NEXT: 1 4 1.00 uqshl z31.s, p0/m, z31.s, #31 +# CHECK-NEXT: 1 4 1.00 uqshlr z0.b, p0/m, z0.b, z1.b +# CHECK-NEXT: 1 4 1.00 uqshlr z0.h, p0/m, z0.h, z1.h +# CHECK-NEXT: 1 4 1.00 uqshlr z29.s, p7/m, z29.s, z30.s +# CHECK-NEXT: 1 4 1.00 uqshlr z31.d, p7/m, z31.d, z30.d +# CHECK-NEXT: 1 4 1.00 uqshrnb z0.b, z0.h, #1 +# CHECK-NEXT: 1 4 1.00 uqshrnb z0.h, z0.s, #1 +# CHECK-NEXT: 1 4 1.00 uqshrnb z0.s, z0.d, #1 +# CHECK-NEXT: 1 4 1.00 uqshrnb z31.b, z31.h, #8 +# CHECK-NEXT: 1 4 1.00 uqshrnb z31.h, z31.s, #16 +# CHECK-NEXT: 1 4 1.00 uqshrnb z31.s, z31.d, #32 +# CHECK-NEXT: 1 4 1.00 uqshrnt z0.b, z0.h, #1 +# CHECK-NEXT: 1 4 1.00 uqshrnt z0.h, z0.s, #1 +# CHECK-NEXT: 1 4 1.00 uqshrnt z0.s, z0.d, #1 +# CHECK-NEXT: 1 4 1.00 uqshrnt z31.b, z31.h, #8 +# CHECK-NEXT: 1 4 1.00 uqshrnt z31.h, z31.s, #16 +# CHECK-NEXT: 1 4 1.00 uqshrnt z31.s, z31.d, #32 +# CHECK-NEXT: 1 4 1.00 uqsub z0.b, p0/m, z0.b, z1.b +# CHECK-NEXT: 1 4 1.00 uqsub z0.b, z0.b, #0 +# CHECK-NEXT: 1 4 1.00 uqsub z0.b, z0.b, z0.b +# CHECK-NEXT: 1 4 1.00 uqsub z0.d, z0.d, #0 +# CHECK-NEXT: 1 4 1.00 uqsub z0.d, z0.d, #0, lsl #8 +# CHECK-NEXT: 1 4 1.00 uqsub z0.d, z0.d, z0.d +# CHECK-NEXT: 1 4 1.00 uqsub z0.h, p0/m, z0.h, z1.h +# CHECK-NEXT: 1 4 1.00 uqsub z0.h, z0.h, #0 +# CHECK-NEXT: 1 4 1.00 uqsub z0.h, z0.h, #0, lsl #8 +# CHECK-NEXT: 1 4 1.00 uqsub z0.h, z0.h, z0.h +# CHECK-NEXT: 1 4 1.00 uqsub z0.s, z0.s, #0 +# CHECK-NEXT: 1 4 1.00 uqsub z0.s, z0.s, #0, lsl #8 +# CHECK-NEXT: 1 4 1.00 uqsub z0.s, z0.s, z0.s +# CHECK-NEXT: 1 4 1.00 uqsub z29.s, p7/m, z29.s, z30.s +# CHECK-NEXT: 1 4 1.00 uqsub z31.b, z31.b, #255 +# CHECK-NEXT: 1 4 1.00 uqsub z31.d, p7/m, z31.d, z30.d +# CHECK-NEXT: 1 4 1.00 uqsub z31.d, z31.d, #65280 +# CHECK-NEXT: 1 4 1.00 uqsub z31.h, z31.h, #65280 +# CHECK-NEXT: 1 4 1.00 uqsub z31.s, z31.s, #65280 +# CHECK-NEXT: 1 4 1.00 uqsubr z0.b, p0/m, z0.b, z1.b +# CHECK-NEXT: 1 4 1.00 uqsubr z0.h, p0/m, z0.h, z1.h +# CHECK-NEXT: 1 4 1.00 uqsubr z29.s, p7/m, z29.s, z30.s +# CHECK-NEXT: 1 4 1.00 uqsubr z31.d, p7/m, z31.d, z30.d +# CHECK-NEXT: 1 4 1.00 uqxtnb z0.b, z31.h +# CHECK-NEXT: 1 4 1.00 uqxtnb z0.h, z31.s +# CHECK-NEXT: 1 4 1.00 uqxtnb z0.s, z31.d +# CHECK-NEXT: 1 4 1.00 uqxtnt z0.b, z31.h +# CHECK-NEXT: 1 4 1.00 uqxtnt z0.h, z31.s +# CHECK-NEXT: 1 4 1.00 uqxtnt z0.s, z31.d +# CHECK-NEXT: 1 4 1.00 urecpe z31.s, p7/m, z31.s +# CHECK-NEXT: 1 4 1.00 urhadd z0.b, p0/m, z0.b, z1.b +# CHECK-NEXT: 1 4 1.00 urhadd z0.h, p0/m, z0.h, z1.h +# CHECK-NEXT: 1 4 1.00 urhadd z29.s, p7/m, z29.s, z30.s +# CHECK-NEXT: 1 4 1.00 urhadd z31.d, p7/m, z31.d, z30.d +# CHECK-NEXT: 1 4 1.00 urshl z0.b, p0/m, z0.b, z1.b +# CHECK-NEXT: 1 4 1.00 urshl z0.h, p0/m, z0.h, z1.h +# CHECK-NEXT: 1 4 1.00 urshl z29.s, p7/m, z29.s, z30.s +# CHECK-NEXT: 1 4 1.00 urshl z31.d, p7/m, z31.d, z30.d +# CHECK-NEXT: 1 4 1.00 urshlr z0.b, p0/m, z0.b, z1.b +# CHECK-NEXT: 1 4 1.00 urshlr z0.h, p0/m, z0.h, z1.h +# CHECK-NEXT: 1 4 1.00 urshlr z29.s, p7/m, z29.s, z30.s +# CHECK-NEXT: 1 4 1.00 urshlr z31.d, p7/m, z31.d, z30.d +# CHECK-NEXT: 1 4 1.00 urshr z0.b, p0/m, z0.b, #1 +# CHECK-NEXT: 1 4 1.00 urshr z0.d, p0/m, z0.d, #1 +# CHECK-NEXT: 1 4 1.00 urshr z0.h, p0/m, z0.h, #1 +# CHECK-NEXT: 1 4 1.00 urshr z0.s, p0/m, z0.s, #1 +# CHECK-NEXT: 1 4 1.00 urshr z31.b, p0/m, z31.b, #8 +# CHECK-NEXT: 1 4 1.00 urshr z31.d, p0/m, z31.d, #64 +# CHECK-NEXT: 1 4 1.00 urshr z31.h, p0/m, z31.h, #16 +# CHECK-NEXT: 1 4 1.00 urshr z31.s, p0/m, z31.s, #32 +# CHECK-NEXT: 1 4 1.00 ursqrte z31.s, p7/m, z31.s +# CHECK-NEXT: 1 7 2.00 ursra z0.b, z0.b, #1 +# CHECK-NEXT: 1 7 2.00 ursra z0.d, z0.d, #1 +# CHECK-NEXT: 1 7 2.00 ursra z0.h, z0.h, #1 +# CHECK-NEXT: 1 7 2.00 ursra z0.s, z0.s, #1 +# CHECK-NEXT: 1 7 2.00 ursra z31.b, z31.b, #8 +# CHECK-NEXT: 1 7 2.00 ursra z31.d, z31.d, #64 +# CHECK-NEXT: 1 7 2.00 ursra z31.h, z31.h, #16 +# CHECK-NEXT: 1 7 2.00 ursra z31.s, z31.s, #32 +# CHECK-NEXT: 1 3 1.00 ushllb z0.d, z0.s, #0 +# CHECK-NEXT: 1 3 1.00 ushllb z0.h, z0.b, #0 +# CHECK-NEXT: 1 3 1.00 ushllb z0.s, z0.h, #0 +# CHECK-NEXT: 1 3 1.00 ushllb z31.d, z31.s, #31 +# CHECK-NEXT: 1 3 1.00 ushllb z31.h, z31.b, #7 +# CHECK-NEXT: 1 3 1.00 ushllb z31.s, z31.h, #15 +# CHECK-NEXT: 1 3 1.00 ushllt z0.d, z0.s, #0 +# CHECK-NEXT: 1 3 1.00 ushllt z0.h, z0.b, #0 +# CHECK-NEXT: 1 3 1.00 ushllt z0.s, z0.h, #0 +# CHECK-NEXT: 1 3 1.00 ushllt z31.d, z31.s, #31 +# CHECK-NEXT: 1 3 1.00 ushllt z31.h, z31.b, #7 +# CHECK-NEXT: 1 3 1.00 ushllt z31.s, z31.h, #15 +# CHECK-NEXT: 1 4 1.00 usmmla z0.s, z1.b, z2.b +# CHECK-NEXT: 1 4 1.00 usqadd z0.b, p0/m, z0.b, z1.b +# CHECK-NEXT: 1 4 1.00 usqadd z0.h, p0/m, z0.h, z1.h +# CHECK-NEXT: 1 4 1.00 usqadd z29.s, p7/m, z29.s, z30.s +# CHECK-NEXT: 1 4 1.00 usqadd z31.d, p7/m, z31.d, z30.d +# CHECK-NEXT: 1 4 1.00 usra z0.b, z0.b, #1 +# CHECK-NEXT: 1 4 1.00 usra z0.d, z0.d, #1 +# CHECK-NEXT: 1 4 1.00 usra z0.h, z0.h, #1 +# CHECK-NEXT: 1 4 1.00 usra z0.s, z0.s, #1 +# CHECK-NEXT: 1 4 1.00 usra z31.b, z31.b, #8 +# CHECK-NEXT: 1 4 1.00 usra z31.d, z31.d, #64 +# CHECK-NEXT: 1 4 1.00 usra z31.h, z31.h, #16 +# CHECK-NEXT: 1 4 1.00 usra z31.s, z31.s, #32 +# CHECK-NEXT: 1 4 1.00 usublb z0.h, z1.b, z2.b +# CHECK-NEXT: 1 4 1.00 usublb z29.s, z30.h, z31.h +# CHECK-NEXT: 1 4 1.00 usublb z31.d, z31.s, z31.s +# CHECK-NEXT: 1 4 1.00 usublt z0.h, z1.b, z2.b +# CHECK-NEXT: 1 4 1.00 usublt z29.s, z30.h, z31.h +# CHECK-NEXT: 1 4 1.00 usublt z31.d, z31.s, z31.s +# CHECK-NEXT: 1 4 1.00 usubwb z0.h, z1.h, z2.b +# CHECK-NEXT: 1 4 1.00 usubwb z29.s, z30.s, z31.h +# CHECK-NEXT: 1 4 1.00 usubwb z31.d, z31.d, z31.s +# CHECK-NEXT: 1 4 1.00 usubwt z0.h, z1.h, z2.b +# CHECK-NEXT: 1 4 1.00 usubwt z29.s, z30.s, z31.h +# CHECK-NEXT: 1 4 1.00 usubwt z31.d, z31.d, z31.s +# CHECK-NEXT: 1 4 1.00 uunpkhi z31.d, z31.s +# CHECK-NEXT: 1 4 1.00 uunpkhi z31.h, z31.b +# CHECK-NEXT: 1 4 1.00 uunpkhi z31.s, z31.h +# CHECK-NEXT: 1 4 1.00 uunpklo z31.d, z31.s +# CHECK-NEXT: 1 4 1.00 uunpklo z31.h, z31.b +# CHECK-NEXT: 1 4 1.00 uunpklo z31.s, z31.h +# CHECK-NEXT: 1 3 1.00 uxtb z0.d, p0/m, z0.d +# CHECK-NEXT: 1 3 1.00 uxtb z0.h, p0/m, z0.h +# CHECK-NEXT: 1 3 1.00 uxtb z0.s, p0/m, z0.s +# CHECK-NEXT: 1 3 1.00 uxtb z31.d, p7/m, z31.d +# CHECK-NEXT: 1 3 1.00 uxtb z31.h, p7/m, z31.h +# CHECK-NEXT: 1 3 1.00 uxtb z31.s, p7/m, z31.s +# CHECK-NEXT: 1 3 1.00 uxth z0.d, p0/m, z0.d +# CHECK-NEXT: 1 3 1.00 uxth z0.s, p0/m, z0.s +# CHECK-NEXT: 1 3 1.00 uxth z31.d, p7/m, z31.d +# CHECK-NEXT: 1 3 1.00 uxth z31.s, p7/m, z31.s +# CHECK-NEXT: 1 3 1.00 uxtw z0.d, p0/m, z0.d +# CHECK-NEXT: 1 3 1.00 uxtw z31.d, p7/m, z31.d +# CHECK-NEXT: 1 2 1.00 uzp1 p15.b, p15.b, p15.b +# CHECK-NEXT: 1 2 1.00 uzp1 p15.d, p15.d, p15.d +# CHECK-NEXT: 1 2 1.00 uzp1 p15.h, p15.h, p15.h +# CHECK-NEXT: 1 2 1.00 uzp1 p15.s, p15.s, p15.s +# CHECK-NEXT: 1 3 1.00 uzp1 z31.b, z31.b, z31.b +# CHECK-NEXT: 1 3 1.00 uzp1 z31.d, z31.d, z31.d +# CHECK-NEXT: 1 3 1.00 uzp1 z31.h, z31.h, z31.h +# CHECK-NEXT: 1 3 1.00 uzp1 z31.s, z31.s, z31.s +# CHECK-NEXT: 1 2 1.00 uzp2 p15.b, p15.b, p15.b +# CHECK-NEXT: 1 2 1.00 uzp2 p15.d, p15.d, p15.d +# CHECK-NEXT: 1 2 1.00 uzp2 p15.h, p15.h, p15.h +# CHECK-NEXT: 1 2 1.00 uzp2 p15.s, p15.s, p15.s +# CHECK-NEXT: 1 3 1.00 uzp2 z31.b, z31.b, z31.b +# CHECK-NEXT: 1 3 1.00 uzp2 z31.d, z31.d, z31.d +# CHECK-NEXT: 1 3 1.00 uzp2 z31.h, z31.h, z31.h +# CHECK-NEXT: 1 3 1.00 uzp2 z31.s, z31.s, z31.s +# CHECK-NEXT: 1 2 1.00 whilege p15.b, w0, wzr +# CHECK-NEXT: 1 2 1.00 whilege p15.b, wzr, w0 +# CHECK-NEXT: 1 2 1.00 whilege p15.b, x0, xzr +# CHECK-NEXT: 1 2 1.00 whilege p15.b, xzr, x0 +# CHECK-NEXT: 1 2 1.00 whilege p15.d, w0, wzr +# CHECK-NEXT: 1 2 1.00 whilege p15.d, x0, xzr +# CHECK-NEXT: 1 2 1.00 whilege p15.h, w0, wzr +# CHECK-NEXT: 1 2 1.00 whilege p15.h, x0, xzr +# CHECK-NEXT: 1 2 1.00 whilege p15.s, w0, wzr +# CHECK-NEXT: 1 2 1.00 whilege p15.s, x0, xzr +# CHECK-NEXT: 1 2 1.00 whilerw p15.b, x30, x30 +# CHECK-NEXT: 1 2 1.00 whilerw p15.d, x30, x30 +# CHECK-NEXT: 1 2 1.00 whilerw p15.h, x30, x30 +# CHECK-NEXT: 1 2 1.00 whilerw p15.s, x30, x30 +# CHECK-NEXT: 1 2 1.00 whilewr p15.b, x30, x30 +# CHECK-NEXT: 1 2 1.00 whilewr p15.d, x30, x30 +# CHECK-NEXT: 1 2 1.00 whilewr p15.h, x30, x30 +# CHECK-NEXT: 1 2 1.00 whilewr p15.s, x30, x30 +# CHECK-NEXT: 1 1 1.00 * U wrffr p0.b +# CHECK-NEXT: 1 1 1.00 * U wrffr p15.b +# CHECK-NEXT: 1 4 1.00 xar z0.b, z0.b, z1.b, #1 +# CHECK-NEXT: 1 4 1.00 xar z0.d, z0.d, z1.d, #1 +# CHECK-NEXT: 1 4 1.00 xar z0.h, z0.h, z1.h, #1 +# CHECK-NEXT: 1 4 1.00 xar z0.s, z0.s, z1.s, #1 +# CHECK-NEXT: 1 4 1.00 xar z31.b, z31.b, z30.b, #8 +# CHECK-NEXT: 1 4 1.00 xar z31.d, z31.d, z30.d, #64 +# CHECK-NEXT: 1 4 1.00 xar z31.h, z31.h, z30.h, #16 +# CHECK-NEXT: 1 4 1.00 xar z31.s, z31.s, z30.s, #32 +# CHECK-NEXT: 1 2 1.00 zip1 p0.b, p0.b, p0.b +# CHECK-NEXT: 1 2 1.00 zip1 p0.d, p0.d, p0.d +# CHECK-NEXT: 1 2 1.00 zip1 p0.h, p0.h, p0.h +# CHECK-NEXT: 1 2 1.00 zip1 p0.s, p0.s, p0.s +# CHECK-NEXT: 1 2 1.00 zip1 p15.b, p15.b, p15.b +# CHECK-NEXT: 1 2 1.00 zip1 p15.d, p15.d, p15.d +# CHECK-NEXT: 1 2 1.00 zip1 p15.h, p15.h, p15.h +# CHECK-NEXT: 1 2 1.00 zip1 p15.s, p15.s, p15.s +# CHECK-NEXT: 1 3 1.00 zip1 z0.b, z0.b, z0.b +# CHECK-NEXT: 1 3 1.00 zip1 z0.d, z0.d, z0.d +# CHECK-NEXT: 1 3 1.00 zip1 z0.h, z0.h, z0.h +# CHECK-NEXT: 1 3 1.00 zip1 z0.s, z0.s, z0.s +# CHECK-NEXT: 1 3 1.00 zip1 z31.b, z31.b, z31.b +# CHECK-NEXT: 1 3 1.00 zip1 z31.d, z31.d, z31.d +# CHECK-NEXT: 1 3 1.00 zip1 z31.h, z31.h, z31.h +# CHECK-NEXT: 1 3 1.00 zip1 z31.s, z31.s, z31.s +# CHECK-NEXT: 1 2 1.00 zip2 p0.b, p0.b, p0.b +# CHECK-NEXT: 1 2 1.00 zip2 p0.d, p0.d, p0.d +# CHECK-NEXT: 1 2 1.00 zip2 p0.h, p0.h, p0.h +# CHECK-NEXT: 1 2 1.00 zip2 p0.s, p0.s, p0.s +# CHECK-NEXT: 1 2 1.00 zip2 p15.b, p15.b, p15.b +# CHECK-NEXT: 1 2 1.00 zip2 p15.d, p15.d, p15.d +# CHECK-NEXT: 1 2 1.00 zip2 p15.h, p15.h, p15.h +# CHECK-NEXT: 1 2 1.00 zip2 p15.s, p15.s, p15.s +# CHECK-NEXT: 1 3 1.00 zip2 z0.b, z0.b, z0.b +# CHECK-NEXT: 1 3 1.00 zip2 z0.d, z0.d, z0.d +# CHECK-NEXT: 1 3 1.00 zip2 z0.h, z0.h, z0.h +# CHECK-NEXT: 1 3 1.00 zip2 z0.s, z0.s, z0.s +# CHECK-NEXT: 1 3 1.00 zip2 z31.b, z31.b, z31.b +# CHECK-NEXT: 1 3 1.00 zip2 z31.d, z31.d, z31.d +# CHECK-NEXT: 1 3 1.00 zip2 z31.h, z31.h, z31.h +# CHECK-NEXT: 1 3 1.00 zip2 z31.s, z31.s, z31.s + +# CHECK: Resources: +# CHECK-NEXT: [0] - CortexA320UnitALU +# CHECK-NEXT: [1] - CortexA320UnitB +# CHECK-NEXT: [2] - CortexA320UnitDiv +# CHECK-NEXT: [3] - CortexA320UnitLdSt +# CHECK-NEXT: [4] - CortexA320UnitMAC +# CHECK-NEXT: [5] - CortexA320UnitPAC +# CHECK-NEXT: [6] - CortexA320UnitVALU +# CHECK-NEXT: [7] - CortexA320UnitVMAC +# CHECK-NEXT: [8] - CortexA320UnitVMC + +# CHECK: Resource pressure per iteration: +# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] +# CHECK-NEXT: 229.00 9.00 - 3876.00 - - 2214.00 399.00 655.00 + +# CHECK: Resource pressure by instruction: +# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] Instructions: +# CHECK-NEXT: - - - - - - 1.00 - - abs z0.b, p0/m, z0.b +# CHECK-NEXT: - - - - - - 1.00 - - abs z0.d, p0/m, z0.d +# CHECK-NEXT: - - - - - - 1.00 - - abs z0.h, p0/m, z0.h +# CHECK-NEXT: - - - - - - 1.00 - - abs z0.s, p0/m, z0.s +# CHECK-NEXT: - - - - - - 1.00 - - abs z31.b, p7/m, z31.b +# CHECK-NEXT: - - - - - - 1.00 - - abs z31.d, p7/m, z31.d +# CHECK-NEXT: - - - - - - 1.00 - - abs z31.h, p7/m, z31.h +# CHECK-NEXT: - - - - - - 1.00 - - abs z31.s, p7/m, z31.s +# CHECK-NEXT: - - - - - - 1.00 - - adclb z0.d, z1.d, z31.d +# CHECK-NEXT: - - - - - - 1.00 - - adclb z0.s, z1.s, z31.s +# CHECK-NEXT: - - - - - - 1.00 - - adclt z0.d, z1.d, z31.d +# CHECK-NEXT: - - - - - - 1.00 - - adclt z0.s, z1.s, z31.s +# CHECK-NEXT: - - - - - - 1.00 - - add z0.b, p0/m, z0.b, z0.b +# CHECK-NEXT: - - - - - - 1.00 - - add z0.b, z0.b, #0 +# CHECK-NEXT: - - - - - - 1.00 - - add z0.b, z0.b, z0.b +# CHECK-NEXT: - - - - - - 1.00 - - add z0.d, p0/m, z0.d, z0.d +# CHECK-NEXT: - - - - - - 1.00 - - add z0.d, z0.d, #0 +# CHECK-NEXT: - - - - - - 1.00 - - add z0.d, z0.d, #0, lsl #8 +# CHECK-NEXT: - - - - - - 1.00 - - add z0.d, z0.d, z0.d +# CHECK-NEXT: - - - - - - 1.00 - - add z0.h, p0/m, z0.h, z0.h +# CHECK-NEXT: - - - - - - 1.00 - - add z0.h, z0.h, #0 +# CHECK-NEXT: - - - - - - 1.00 - - add z0.h, z0.h, #0, lsl #8 +# CHECK-NEXT: - - - - - - 1.00 - - add z0.h, z0.h, z0.h +# CHECK-NEXT: - - - - - - 1.00 - - add z0.s, p0/m, z0.s, z0.s +# CHECK-NEXT: - - - - - - 1.00 - - add z0.s, z0.s, #0 +# CHECK-NEXT: - - - - - - 1.00 - - add z0.s, z0.s, #0, lsl #8 +# CHECK-NEXT: - - - - - - 1.00 - - add z0.s, z0.s, z0.s +# CHECK-NEXT: - - - - - - 1.00 - - add z0.s, z1.s, z2.s +# CHECK-NEXT: - - - - - - 1.00 - - add z21.b, p5/m, z21.b, z10.b +# CHECK-NEXT: - - - - - - 1.00 - - add z21.b, z10.b, z21.b +# CHECK-NEXT: - - - - - - 1.00 - - add z21.d, p5/m, z21.d, z10.d +# CHECK-NEXT: - - - - - - 1.00 - - add z21.d, z10.d, z21.d +# CHECK-NEXT: - - - - - - 1.00 - - add z21.h, p5/m, z21.h, z10.h +# CHECK-NEXT: - - - - - - 1.00 - - add z21.h, z10.h, z21.h +# CHECK-NEXT: - - - - - - 1.00 - - add z21.s, p5/m, z21.s, z10.s +# CHECK-NEXT: - - - - - - 1.00 - - add z21.s, z10.s, z21.s +# CHECK-NEXT: - - - - - - 1.00 - - add z23.b, p3/m, z23.b, z13.b +# CHECK-NEXT: - - - - - - 1.00 - - add z23.b, z13.b, z8.b +# CHECK-NEXT: - - - - - - 1.00 - - add z23.d, p3/m, z23.d, z13.d +# CHECK-NEXT: - - - - - - 1.00 - - add z23.d, z13.d, z8.d +# CHECK-NEXT: - - - - - - 1.00 - - add z23.h, p3/m, z23.h, z13.h +# CHECK-NEXT: - - - - - - 1.00 - - add z23.h, z13.h, z8.h +# CHECK-NEXT: - - - - - - 1.00 - - add z23.s, p3/m, z23.s, z13.s +# CHECK-NEXT: - - - - - - 1.00 - - add z23.s, z13.s, z8.s +# CHECK-NEXT: - - - - - - 1.00 - - add z31.b, p7/m, z31.b, z31.b +# CHECK-NEXT: - - - - - - 1.00 - - add z31.b, z31.b, #255 +# CHECK-NEXT: - - - - - - 1.00 - - add z31.b, z31.b, z31.b +# CHECK-NEXT: - - - - - - 1.00 - - add z31.d, p7/m, z31.d, z31.d +# CHECK-NEXT: - - - - - - 1.00 - - add z31.d, z31.d, #65280 +# CHECK-NEXT: - - - - - - 1.00 - - add z31.d, z31.d, z31.d +# CHECK-NEXT: - - - - - - 1.00 - - add z31.h, p7/m, z31.h, z31.h +# CHECK-NEXT: - - - - - - 1.00 - - add z31.h, z31.h, #65280 +# CHECK-NEXT: - - - - - - 1.00 - - add z31.h, z31.h, z31.h +# CHECK-NEXT: - - - - - - 1.00 - - add z31.s, p7/m, z31.s, z31.s +# CHECK-NEXT: - - - - - - 1.00 - - add z31.s, z31.s, #65280 +# CHECK-NEXT: - - - - - - 1.00 - - add z31.s, z31.s, z31.s +# CHECK-NEXT: - - - - - - 1.00 - - addhnb z0.b, z1.h, z31.h +# CHECK-NEXT: - - - - - - 1.00 - - addhnb z0.h, z1.s, z31.s +# CHECK-NEXT: - - - - - - 1.00 - - addhnb z0.s, z1.d, z31.d +# CHECK-NEXT: - - - - - - 1.00 - - addhnt z0.b, z1.h, z31.h +# CHECK-NEXT: - - - - - - 1.00 - - addhnt z0.h, z1.s, z31.s +# CHECK-NEXT: - - - - - - 1.00 - - addhnt z0.s, z1.d, z31.d +# CHECK-NEXT: - - - - - - 1.00 - - addp z0.b, p0/m, z0.b, z1.b +# CHECK-NEXT: - - - - - - 1.00 - - addp z0.h, p0/m, z0.h, z1.h +# CHECK-NEXT: - - - - - - 1.00 - - addp z29.s, p7/m, z29.s, z30.s +# CHECK-NEXT: - - - - - - 1.00 - - addp z31.d, p7/m, z31.d, z30.d +# CHECK-NEXT: 1.00 - - - - - - - - addpl sp, sp, #31 +# CHECK-NEXT: 1.00 - - - - - - - - addpl x0, x0, #-32 +# CHECK-NEXT: 1.00 - - - - - - - - addpl x21, x21, #0 +# CHECK-NEXT: 1.00 - - - - - - - - addpl x23, x8, #-1 +# CHECK-NEXT: 1.00 - - - - - - - - addvl sp, sp, #31 +# CHECK-NEXT: 1.00 - - - - - - - - addvl x0, x0, #-32 +# CHECK-NEXT: 1.00 - - - - - - - - addvl x21, x21, #0 +# CHECK-NEXT: 1.00 - - - - - - - - addvl x23, x8, #-1 +# CHECK-NEXT: - - - - - - 1.00 - - adr z0.d, [z0.d, z0.d, lsl #1] +# CHECK-NEXT: - - - - - - 1.00 - - adr z0.d, [z0.d, z0.d, lsl #2] +# CHECK-NEXT: - - - - - - 1.00 - - adr z0.d, [z0.d, z0.d, lsl #3] +# CHECK-NEXT: - - - - - - 1.00 - - adr z0.d, [z0.d, z0.d, sxtw #1] +# CHECK-NEXT: - - - - - - 1.00 - - adr z0.d, [z0.d, z0.d, sxtw #2] +# CHECK-NEXT: - - - - - - 1.00 - - adr z0.d, [z0.d, z0.d, sxtw #3] +# CHECK-NEXT: - - - - - - 1.00 - - adr z0.d, [z0.d, z0.d, sxtw] +# CHECK-NEXT: - - - - - - 1.00 - - adr z0.d, [z0.d, z0.d, uxtw #1] +# CHECK-NEXT: - - - - - - 1.00 - - adr z0.d, [z0.d, z0.d, uxtw #2] +# CHECK-NEXT: - - - - - - 1.00 - - adr z0.d, [z0.d, z0.d, uxtw #3] +# CHECK-NEXT: - - - - - - 1.00 - - adr z0.d, [z0.d, z0.d, uxtw] +# CHECK-NEXT: - - - - - - 1.00 - - adr z0.d, [z0.d, z0.d] +# CHECK-NEXT: - - - - - - 1.00 - - adr z0.s, [z0.s, z0.s, lsl #1] +# CHECK-NEXT: - - - - - - 1.00 - - adr z0.s, [z0.s, z0.s, lsl #2] +# CHECK-NEXT: - - - - - - 1.00 - - adr z0.s, [z0.s, z0.s, lsl #3] +# CHECK-NEXT: - - - - - - 1.00 - - adr z0.s, [z0.s, z0.s] +# CHECK-NEXT: - - - - - - 1.00 - - aesd z0.b, z0.b, z31.b +# CHECK-NEXT: - - - - - - 1.00 - - aese z0.b, z0.b, z31.b +# CHECK-NEXT: - - - - - - 1.00 - - aesimc z0.b, z0.b +# CHECK-NEXT: - - - - - - 1.00 - - aesimc z31.b, z31.b +# CHECK-NEXT: - - - - - - 1.00 - - aesmc z0.b, z0.b +# CHECK-NEXT: - - - - - - 1.00 - - aesmc z31.b, z31.b +# CHECK-NEXT: - - - - - - 1.00 - - and p0.b, p0/z, p0.b, p1.b +# CHECK-NEXT: - - - - - - 1.00 - - and z0.d, z0.d, #0x6 +# CHECK-NEXT: - - - - - - 1.00 - - and z0.d, z0.d, #0xfffffffffffffff9 +# CHECK-NEXT: - - - - - - 1.00 - - and z0.d, z0.d, z0.d +# CHECK-NEXT: - - - - - - 1.00 - - and z0.s, z0.s, #0x6 +# CHECK-NEXT: - - - - - - 1.00 - - and z0.s, z0.s, #0xfffffff9 +# CHECK-NEXT: - - - - - - 1.00 - - and z23.d, z13.d, z8.d +# CHECK-NEXT: - - - - - - 1.00 - - and z23.h, z23.h, #0x6 +# CHECK-NEXT: - - - - - - 1.00 - - and z23.h, z23.h, #0xfff9 +# CHECK-NEXT: - - - - - - 1.00 - - and z31.b, p7/m, z31.b, z31.b +# CHECK-NEXT: - - - - - - 1.00 - - and z31.d, p7/m, z31.d, z31.d +# CHECK-NEXT: - - - - - - 1.00 - - and z31.h, p7/m, z31.h, z31.h +# CHECK-NEXT: - - - - - - 1.00 - - and z31.s, p7/m, z31.s, z31.s +# CHECK-NEXT: - - - - - - 1.00 - - and z5.b, z5.b, #0x6 +# CHECK-NEXT: - - - - - - 1.00 - - and z5.b, z5.b, #0xf9 +# CHECK-NEXT: - - - - - - 1.00 - - ands p0.b, p0/z, p0.b, p1.b +# CHECK-NEXT: - - - - - - 1.00 - - andv b0, p7, z31.b +# CHECK-NEXT: - - - - - - 1.00 - - andv d0, p7, z31.d +# CHECK-NEXT: - - - - - - 1.00 - - andv h0, p7, z31.h +# CHECK-NEXT: - - - - - - 1.00 - - andv s0, p7, z31.s +# CHECK-NEXT: - - - - - - 1.00 - - asr z0.b, p0/m, z0.b, #1 +# CHECK-NEXT: - - - - - - 1.00 - - asr z0.b, p0/m, z0.b, z0.b +# CHECK-NEXT: - - - - - - 1.00 - - asr z0.b, p0/m, z0.b, z1.d +# CHECK-NEXT: - - - - - - 1.00 - - asr z0.b, z0.b, #1 +# CHECK-NEXT: - - - - - - 1.00 - - asr z0.b, z1.b, z2.d +# CHECK-NEXT: - - - - - - 1.00 - - asr z0.d, p0/m, z0.d, #1 +# CHECK-NEXT: - - - - - - 1.00 - - asr z0.d, p0/m, z0.d, z0.d +# CHECK-NEXT: - - - - - - 1.00 - - asr z0.d, z0.d, #1 +# CHECK-NEXT: - - - - - - 1.00 - - asr z0.h, p0/m, z0.h, #1 +# CHECK-NEXT: - - - - - - 1.00 - - asr z0.h, p0/m, z0.h, z0.h +# CHECK-NEXT: - - - - - - 1.00 - - asr z0.h, p0/m, z0.h, z1.d +# CHECK-NEXT: - - - - - - 1.00 - - asr z0.h, z0.h, #1 +# CHECK-NEXT: - - - - - - 1.00 - - asr z0.h, z1.h, z2.d +# CHECK-NEXT: - - - - - - 1.00 - - asr z0.s, p0/m, z0.s, #1 +# CHECK-NEXT: - - - - - - 1.00 - - asr z0.s, p0/m, z0.s, z0.s +# CHECK-NEXT: - - - - - - 1.00 - - asr z0.s, p0/m, z0.s, z1.d +# CHECK-NEXT: - - - - - - 1.00 - - asr z0.s, z0.s, #1 +# CHECK-NEXT: - - - - - - 1.00 - - asr z0.s, z1.s, z2.d +# CHECK-NEXT: - - - - - - 1.00 - - asr z31.b, p0/m, z31.b, #8 +# CHECK-NEXT: - - - - - - 1.00 - - asr z31.b, z31.b, #8 +# CHECK-NEXT: - - - - - - 1.00 - - asr z31.d, p0/m, z31.d, #64 +# CHECK-NEXT: - - - - - - 1.00 - - asr z31.d, z31.d, #64 +# CHECK-NEXT: - - - - - - 1.00 - - asr z31.h, p0/m, z31.h, #16 +# CHECK-NEXT: - - - - - - 1.00 - - asr z31.h, z31.h, #16 +# CHECK-NEXT: - - - - - - 1.00 - - asr z31.s, p0/m, z31.s, #32 +# CHECK-NEXT: - - - - - - 1.00 - - asr z31.s, z31.s, #32 +# CHECK-NEXT: - - - - - - 1.00 - - asrd z0.b, p0/m, z0.b, #1 +# CHECK-NEXT: - - - - - - 1.00 - - asrd z0.d, p0/m, z0.d, #1 +# CHECK-NEXT: - - - - - - 1.00 - - asrd z0.h, p0/m, z0.h, #1 +# CHECK-NEXT: - - - - - - 1.00 - - asrd z0.s, p0/m, z0.s, #1 +# CHECK-NEXT: - - - - - - 1.00 - - asrd z31.b, p0/m, z31.b, #8 +# CHECK-NEXT: - - - - - - 1.00 - - asrd z31.d, p0/m, z31.d, #64 +# CHECK-NEXT: - - - - - - 1.00 - - asrd z31.h, p0/m, z31.h, #16 +# CHECK-NEXT: - - - - - - 1.00 - - asrd z31.s, p0/m, z31.s, #32 +# CHECK-NEXT: - - - - - - 1.00 - - asrr z0.b, p0/m, z0.b, z0.b +# CHECK-NEXT: - - - - - - 1.00 - - asrr z0.d, p0/m, z0.d, z0.d +# CHECK-NEXT: - - - - - - 1.00 - - asrr z0.h, p0/m, z0.h, z0.h +# CHECK-NEXT: - - - - - - 1.00 - - asrr z0.s, p0/m, z0.s, z0.s +# CHECK-NEXT: - - - - - - 1.00 - - bcax z29.d, z29.d, z30.d, z31.d +# CHECK-NEXT: - - - - - - - - 12.00 bdep z0.b, z1.b, z31.b +# CHECK-NEXT: - - - - - - - - 67.00 bdep z0.d, z1.d, z31.d +# CHECK-NEXT: - - - - - - - - 20.00 bdep z0.h, z1.h, z31.h +# CHECK-NEXT: - - - - - - - - 36.00 bdep z0.s, z1.s, z31.s +# CHECK-NEXT: - - - - - - - - 12.00 bext z0.b, z1.b, z31.b +# CHECK-NEXT: - - - - - - - - 67.00 bext z0.d, z1.d, z31.d +# CHECK-NEXT: - - - - - - - - 20.00 bext z0.h, z1.h, z31.h +# CHECK-NEXT: - - - - - - - - 36.00 bext z0.s, z1.s, z31.s +# CHECK-NEXT: - - - - - - 1.00 - - bfcvt z0.h, p0/m, z1.s +# CHECK-NEXT: - - - - - - 1.00 - - bfcvtnt z0.h, p0/m, z1.s +# CHECK-NEXT: - - - - - - 1.00 1.00 - bfdot z0.s, z1.h, z2.h +# CHECK-NEXT: - - - - - - 1.00 1.00 - bfdot z0.s, z1.h, z2.h[0] +# CHECK-NEXT: - - - - - - 1.00 1.00 - bfdot z0.s, z1.h, z2.h[3] +# CHECK-NEXT: - - - - - - - 1.00 - bfmlalb z0.s, z1.h, z2.h +# CHECK-NEXT: - - - - - - - 1.00 - bfmlalb z0.s, z1.h, z2.h[0] +# CHECK-NEXT: - - - - - - - 1.00 - bfmlalb z0.s, z1.h, z2.h[7] +# CHECK-NEXT: - - - - - - - 1.00 - bfmlalb z10.s, z21.h, z14.h +# CHECK-NEXT: - - - - - - - 1.00 - bfmlalb z21.s, z14.h, z3.h[2] +# CHECK-NEXT: - - - - - - - 1.00 - bfmlalt z0.s, z1.h, z2.h +# CHECK-NEXT: - - - - - - - 1.00 - bfmlalt z0.s, z1.h, z2.h[0] +# CHECK-NEXT: - - - - - - - 1.00 - bfmlalt z0.s, z1.h, z2.h[7] +# CHECK-NEXT: - - - - - - - 1.00 - bfmlalt z0.s, z1.h, z7.h[7] +# CHECK-NEXT: - - - - - - - 1.00 - bfmlalt z14.s, z10.h, z21.h +# CHECK-NEXT: - - - - - - 1.00 1.00 - bfmmla z0.s, z1.h, z2.h +# CHECK-NEXT: - - - - - - - - 12.00 bgrp z0.b, z1.b, z31.b +# CHECK-NEXT: - - - - - - - - 67.00 bgrp z0.d, z1.d, z31.d +# CHECK-NEXT: - - - - - - - - 20.00 bgrp z0.h, z1.h, z31.h +# CHECK-NEXT: - - - - - - - - 36.00 bgrp z0.s, z1.s, z31.s +# CHECK-NEXT: - - - - - - 1.00 - - bic p0.b, p0/z, p0.b, p0.b +# CHECK-NEXT: - - - - - - 1.00 - - bic p15.b, p15/z, p15.b, p15.b +# CHECK-NEXT: - - - - - - 1.00 - - bic z0.d, z0.d, z0.d +# CHECK-NEXT: - - - - - - 1.00 - - bic z23.d, z13.d, z8.d +# CHECK-NEXT: - - - - - - 1.00 - - bic z31.b, p7/m, z31.b, z31.b +# CHECK-NEXT: - - - - - - 1.00 - - bic z31.d, p7/m, z31.d, z31.d +# CHECK-NEXT: - - - - - - 1.00 - - bic z31.h, p7/m, z31.h, z31.h +# CHECK-NEXT: - - - - - - 1.00 - - bic z31.s, p7/m, z31.s, z31.s +# CHECK-NEXT: - - - - - - 1.00 - - bics p0.b, p0/z, p0.b, p0.b +# CHECK-NEXT: - - - - - - 1.00 - - bics p15.b, p15/z, p15.b, p15.b +# CHECK-NEXT: - - - - - - 1.00 - - brka p0.b, p15/m, p15.b +# CHECK-NEXT: - - - - - - 1.00 - - brka p0.b, p15/z, p15.b +# CHECK-NEXT: - - - - - - 1.00 - - brkas p0.b, p15/z, p15.b +# CHECK-NEXT: - - - - - - 1.00 - - brkb p0.b, p15/m, p15.b +# CHECK-NEXT: - - - - - - 1.00 - - brkb p0.b, p15/z, p15.b +# CHECK-NEXT: - - - - - - 1.00 - - brkbs p0.b, p15/z, p15.b +# CHECK-NEXT: - - - - - - 1.00 - - brkn p0.b, p15/z, p1.b, p0.b +# CHECK-NEXT: - - - - - - 1.00 - - brkn p15.b, p15/z, p15.b, p15.b +# CHECK-NEXT: - - - - - - 1.00 - - brkns p0.b, p15/z, p1.b, p0.b +# CHECK-NEXT: - - - - - - 1.00 - - brkns p15.b, p15/z, p15.b, p15.b +# CHECK-NEXT: - - - - - - 1.00 - - brkpa p0.b, p15/z, p1.b, p2.b +# CHECK-NEXT: - - - - - - 1.00 - - brkpa p15.b, p15/z, p15.b, p15.b +# CHECK-NEXT: - - - - - - 1.00 - - brkpas p0.b, p15/z, p1.b, p2.b +# CHECK-NEXT: - - - - - - 1.00 - - brkpas p15.b, p15/z, p15.b, p15.b +# CHECK-NEXT: - - - - - - 1.00 - - brkpb p0.b, p15/z, p1.b, p2.b +# CHECK-NEXT: - - - - - - 1.00 - - brkpb p15.b, p15/z, p15.b, p15.b +# CHECK-NEXT: - - - - - - 1.00 - - brkpbs p0.b, p15/z, p1.b, p2.b +# CHECK-NEXT: - - - - - - 1.00 - - brkpbs p15.b, p15/z, p15.b, p15.b +# CHECK-NEXT: - - - - - - 1.00 - - bsl z0.d, z0.d, z1.d, z2.d +# CHECK-NEXT: - - - - - - 1.00 - - bsl1n z0.d, z0.d, z1.d, z2.d +# CHECK-NEXT: - - - - - - 1.00 - - bsl2n z0.d, z0.d, z1.d, z2.d +# CHECK-NEXT: - - - - - - 1.00 - - cadd z0.b, z0.b, z0.b, #90 +# CHECK-NEXT: - - - - - - 1.00 - - cadd z0.d, z0.d, z0.d, #90 +# CHECK-NEXT: - - - - - - 1.00 - - cadd z0.h, z0.h, z0.h, #90 +# CHECK-NEXT: - - - - - - 1.00 - - cadd z0.s, z0.s, z0.s, #90 +# CHECK-NEXT: - - - - - - 1.00 - - cadd z31.b, z31.b, z31.b, #270 +# CHECK-NEXT: - - - - - - 1.00 - - cadd z31.d, z31.d, z31.d, #270 +# CHECK-NEXT: - - - - - - 1.00 - - cadd z31.h, z31.h, z31.h, #270 +# CHECK-NEXT: - - - - - - 1.00 - - cadd z31.s, z31.s, z31.s, #270 +# CHECK-NEXT: - - - - - - - 1.00 - cdot z0.d, z1.h, z15.h[1], #0 +# CHECK-NEXT: - - - - - - - 1.00 - cdot z0.d, z1.h, z31.h, #0 +# CHECK-NEXT: - - - - - - - 1.00 - cdot z0.d, z1.h, z31.h, #180 +# CHECK-NEXT: - - - - - - - 1.00 - cdot z0.d, z1.h, z31.h, #270 +# CHECK-NEXT: - - - - - - - 1.00 - cdot z0.d, z1.h, z31.h, #90 +# CHECK-NEXT: - - - - - - - 1.00 - cdot z0.s, z1.b, z31.b, #0 +# CHECK-NEXT: - - - - - - - 1.00 - cdot z0.s, z1.b, z7.b[3], #0 +# CHECK-NEXT: - - - - - - - 1.00 - cdot z29.d, z30.h, z0.h[0], #180 +# CHECK-NEXT: - - - - - - - 1.00 - cdot z31.d, z30.h, z7.h[1], #270 +# CHECK-NEXT: - - - - - - - 1.00 - cdot z5.d, z6.h, z3.h[0], #90 +# CHECK-NEXT: - - - - - - 1.00 - - clasta b0, p7, b0, z31.b +# CHECK-NEXT: - - - - - - 1.00 - - clasta d0, p7, d0, z31.d +# CHECK-NEXT: - - - - - - 1.00 - - clasta h0, p7, h0, z31.h +# CHECK-NEXT: - - - - - - 1.00 - - clasta s0, p7, s0, z31.s +# CHECK-NEXT: - - - - - - 2.00 - - clasta w0, p7, w0, z31.b +# CHECK-NEXT: - - - - - - 2.00 - - clasta w0, p7, w0, z31.h +# CHECK-NEXT: - - - - - - 2.00 - - clasta w0, p7, w0, z31.s +# CHECK-NEXT: - - - - - - 2.00 - - clasta x0, p7, x0, z31.d +# CHECK-NEXT: - - - - - - 1.00 - - clasta z0.b, p7, z0.b, z31.b +# CHECK-NEXT: - - - - - - 1.00 - - clasta z0.d, p7, z0.d, z31.d +# CHECK-NEXT: - - - - - - 1.00 - - clasta z0.h, p7, z0.h, z31.h +# CHECK-NEXT: - - - - - - 1.00 - - clasta z0.s, p7, z0.s, z31.s +# CHECK-NEXT: - - - - - - 1.00 - - clastb b0, p7, b0, z31.b +# CHECK-NEXT: - - - - - - 1.00 - - clastb d0, p7, d0, z31.d +# CHECK-NEXT: - - - - - - 1.00 - - clastb h0, p7, h0, z31.h +# CHECK-NEXT: - - - - - - 1.00 - - clastb s0, p7, s0, z31.s +# CHECK-NEXT: - - - - - - 2.00 - - clastb w0, p7, w0, z31.b +# CHECK-NEXT: - - - - - - 2.00 - - clastb w0, p7, w0, z31.h +# CHECK-NEXT: - - - - - - 2.00 - - clastb w0, p7, w0, z31.s +# CHECK-NEXT: - - - - - - 2.00 - - clastb x0, p7, x0, z31.d +# CHECK-NEXT: - - - - - - 1.00 - - clastb z0.b, p7, z0.b, z31.b +# CHECK-NEXT: - - - - - - 1.00 - - clastb z0.d, p7, z0.d, z31.d +# CHECK-NEXT: - - - - - - 1.00 - - clastb z0.h, p7, z0.h, z31.h +# CHECK-NEXT: - - - - - - 1.00 - - clastb z0.s, p7, z0.s, z31.s +# CHECK-NEXT: - - - - - - 1.00 - - cls z31.b, p7/m, z31.b +# CHECK-NEXT: - - - - - - 1.00 - - cls z31.d, p7/m, z31.d +# CHECK-NEXT: - - - - - - 1.00 - - cls z31.h, p7/m, z31.h +# CHECK-NEXT: - - - - - - 1.00 - - cls z31.s, p7/m, z31.s +# CHECK-NEXT: - - - - - - 1.00 - - clz z31.b, p7/m, z31.b +# CHECK-NEXT: - - - - - - 1.00 - - clz z31.d, p7/m, z31.d +# CHECK-NEXT: - - - - - - 1.00 - - clz z31.h, p7/m, z31.h +# CHECK-NEXT: - - - - - - 1.00 - - clz z31.s, p7/m, z31.s +# CHECK-NEXT: - - - - - - - 1.00 - cmla z0.b, z1.b, z2.b, #0 +# CHECK-NEXT: - - - - - - - 1.00 - cmla z0.d, z1.d, z2.d, #0 +# CHECK-NEXT: - - - - - - - 1.00 - cmla z0.h, z1.h, z2.h, #0 +# CHECK-NEXT: - - - - - - - 1.00 - cmla z0.h, z1.h, z2.h[0], #0 +# CHECK-NEXT: - - - - - - - 1.00 - cmla z0.s, z1.s, z2.s, #0 +# CHECK-NEXT: - - - - - - - 1.00 - cmla z0.s, z1.s, z2.s[0], #0 +# CHECK-NEXT: - - - - - - - 1.00 - cmla z15.b, z16.b, z17.b, #270 +# CHECK-NEXT: - - - - - - - 1.00 - cmla z15.d, z16.d, z17.d, #270 +# CHECK-NEXT: - - - - - - - 1.00 - cmla z15.h, z16.h, z17.h, #270 +# CHECK-NEXT: - - - - - - - 1.00 - cmla z15.s, z16.s, z17.s, #270 +# CHECK-NEXT: - - - - - - - 1.00 - cmla z29.b, z30.b, z31.b, #90 +# CHECK-NEXT: - - - - - - - 1.00 - cmla z29.d, z30.d, z31.d, #90 +# CHECK-NEXT: - - - - - - - 1.00 - cmla z29.h, z30.h, z31.h, #90 +# CHECK-NEXT: - - - - - - - 1.00 - cmla z29.s, z30.s, z31.s, #90 +# CHECK-NEXT: - - - - - - - 1.00 - cmla z31.b, z31.b, z31.b, #180 +# CHECK-NEXT: - - - - - - - 1.00 - cmla z31.d, z31.d, z31.d, #180 +# CHECK-NEXT: - - - - - - - 1.00 - cmla z31.h, z30.h, z7.h[0], #180 +# CHECK-NEXT: - - - - - - - 1.00 - cmla z31.h, z31.h, z31.h, #180 +# CHECK-NEXT: - - - - - - - 1.00 - cmla z31.s, z30.s, z7.s[0], #180 +# CHECK-NEXT: - - - - - - - 1.00 - cmla z31.s, z31.s, z31.s, #180 +# CHECK-NEXT: - - - - - - 1.00 - - cmpeq p0.b, p0/z, z0.b, #-16 +# CHECK-NEXT: - - - - - - 1.00 - - cmpeq p0.b, p0/z, z0.b, #15 +# CHECK-NEXT: - - - - - - 1.00 - - cmpeq p0.b, p0/z, z0.b, z0.b +# CHECK-NEXT: - - - - - - 1.00 - - cmpeq p0.b, p0/z, z0.b, z0.d +# CHECK-NEXT: - - - - - - 1.00 - - cmpeq p0.d, p0/z, z0.d, #-16 +# CHECK-NEXT: - - - - - - 1.00 - - cmpeq p0.d, p0/z, z0.d, #15 +# CHECK-NEXT: - - - - - - 1.00 - - cmpeq p0.d, p0/z, z0.d, z0.d +# CHECK-NEXT: - - - - - - 1.00 - - cmpeq p0.h, p0/z, z0.h, #-16 +# CHECK-NEXT: - - - - - - 1.00 - - cmpeq p0.h, p0/z, z0.h, #15 +# CHECK-NEXT: - - - - - - 1.00 - - cmpeq p0.h, p0/z, z0.h, z0.d +# CHECK-NEXT: - - - - - - 1.00 - - cmpeq p0.h, p0/z, z0.h, z0.h +# CHECK-NEXT: - - - - - - 1.00 - - cmpeq p0.s, p0/z, z0.s, #-16 +# CHECK-NEXT: - - - - - - 1.00 - - cmpeq p0.s, p0/z, z0.s, #15 +# CHECK-NEXT: - - - - - - 1.00 - - cmpeq p0.s, p0/z, z0.s, z0.d +# CHECK-NEXT: - - - - - - 1.00 - - cmpeq p0.s, p0/z, z0.s, z0.s +# CHECK-NEXT: - - - - - - 1.00 - - cmpge p0.b, p0/z, z0.b, #-16 +# CHECK-NEXT: - - - - - - 1.00 - - cmpge p0.b, p0/z, z0.b, #15 +# CHECK-NEXT: - - - - - - 1.00 - - cmpge p0.b, p0/z, z0.b, z0.b +# CHECK-NEXT: - - - - - - 1.00 - - cmpge p0.b, p0/z, z0.b, z0.d +# CHECK-NEXT: - - - - - - 1.00 - - cmpge p0.b, p0/z, z1.b, z0.b +# CHECK-NEXT: - - - - - - 1.00 - - cmpge p0.d, p0/z, z0.d, #-16 +# CHECK-NEXT: - - - - - - 1.00 - - cmpge p0.d, p0/z, z0.d, #15 +# CHECK-NEXT: - - - - - - 1.00 - - cmpge p0.d, p0/z, z0.d, z0.d +# CHECK-NEXT: - - - - - - 1.00 - - cmpge p0.d, p0/z, z1.d, z0.d +# CHECK-NEXT: - - - - - - 1.00 - - cmpge p0.h, p0/z, z0.h, #-16 +# CHECK-NEXT: - - - - - - 1.00 - - cmpge p0.h, p0/z, z0.h, #15 +# CHECK-NEXT: - - - - - - 1.00 - - cmpge p0.h, p0/z, z0.h, z0.d +# CHECK-NEXT: - - - - - - 1.00 - - cmpge p0.h, p0/z, z0.h, z0.h +# CHECK-NEXT: - - - - - - 1.00 - - cmpge p0.h, p0/z, z1.h, z0.h +# CHECK-NEXT: - - - - - - 1.00 - - cmpge p0.s, p0/z, z0.s, #-16 +# CHECK-NEXT: - - - - - - 1.00 - - cmpge p0.s, p0/z, z0.s, #15 +# CHECK-NEXT: - - - - - - 1.00 - - cmpge p0.s, p0/z, z0.s, z0.d +# CHECK-NEXT: - - - - - - 1.00 - - cmpge p0.s, p0/z, z0.s, z0.s +# CHECK-NEXT: - - - - - - 1.00 - - cmpge p0.s, p0/z, z1.s, z0.s +# CHECK-NEXT: - - - - - - 1.00 - - cmpgt p0.b, p0/z, z0.b, #-16 +# CHECK-NEXT: - - - - - - 1.00 - - cmpgt p0.b, p0/z, z0.b, #15 +# CHECK-NEXT: - - - - - - 1.00 - - cmpgt p0.b, p0/z, z0.b, z0.b +# CHECK-NEXT: - - - - - - 1.00 - - cmpgt p0.b, p0/z, z0.b, z0.d +# CHECK-NEXT: - - - - - - 1.00 - - cmpgt p0.b, p0/z, z1.b, z0.b +# CHECK-NEXT: - - - - - - 1.00 - - cmpgt p0.d, p0/z, z0.d, #-16 +# CHECK-NEXT: - - - - - - 1.00 - - cmpgt p0.d, p0/z, z0.d, #15 +# CHECK-NEXT: - - - - - - 1.00 - - cmpgt p0.d, p0/z, z0.d, z0.d +# CHECK-NEXT: - - - - - - 1.00 - - cmpgt p0.d, p0/z, z1.d, z0.d +# CHECK-NEXT: - - - - - - 1.00 - - cmpgt p0.h, p0/z, z0.h, #-16 +# CHECK-NEXT: - - - - - - 1.00 - - cmpgt p0.h, p0/z, z0.h, #15 +# CHECK-NEXT: - - - - - - 1.00 - - cmpgt p0.h, p0/z, z0.h, z0.d +# CHECK-NEXT: - - - - - - 1.00 - - cmpgt p0.h, p0/z, z0.h, z0.h +# CHECK-NEXT: - - - - - - 1.00 - - cmpgt p0.h, p0/z, z1.h, z0.h +# CHECK-NEXT: - - - - - - 1.00 - - cmpgt p0.s, p0/z, z0.s, #-16 +# CHECK-NEXT: - - - - - - 1.00 - - cmpgt p0.s, p0/z, z0.s, #15 +# CHECK-NEXT: - - - - - - 1.00 - - cmpgt p0.s, p0/z, z0.s, z0.d +# CHECK-NEXT: - - - - - - 1.00 - - cmpgt p0.s, p0/z, z0.s, z0.s +# CHECK-NEXT: - - - - - - 1.00 - - cmpgt p0.s, p0/z, z1.s, z0.s +# CHECK-NEXT: - - - - - - 1.00 - - cmphi p0.b, p0/z, z0.b, #0 +# CHECK-NEXT: - - - - - - 1.00 - - cmphi p0.b, p0/z, z0.b, #127 +# CHECK-NEXT: - - - - - - 1.00 - - cmphi p0.b, p0/z, z0.b, z0.b +# CHECK-NEXT: - - - - - - 1.00 - - cmphi p0.b, p0/z, z0.b, z0.d +# CHECK-NEXT: - - - - - - 1.00 - - cmphi p0.b, p0/z, z1.b, z0.b +# CHECK-NEXT: - - - - - - 1.00 - - cmphi p0.d, p0/z, z0.d, #0 +# CHECK-NEXT: - - - - - - 1.00 - - cmphi p0.d, p0/z, z0.d, #127 +# CHECK-NEXT: - - - - - - 1.00 - - cmphi p0.d, p0/z, z0.d, z0.d +# CHECK-NEXT: - - - - - - 1.00 - - cmphi p0.d, p0/z, z1.d, z0.d +# CHECK-NEXT: - - - - - - 1.00 - - cmphi p0.h, p0/z, z0.h, #0 +# CHECK-NEXT: - - - - - - 1.00 - - cmphi p0.h, p0/z, z0.h, #127 +# CHECK-NEXT: - - - - - - 1.00 - - cmphi p0.h, p0/z, z0.h, z0.d +# CHECK-NEXT: - - - - - - 1.00 - - cmphi p0.h, p0/z, z0.h, z0.h +# CHECK-NEXT: - - - - - - 1.00 - - cmphi p0.h, p0/z, z1.h, z0.h +# CHECK-NEXT: - - - - - - 1.00 - - cmphi p0.s, p0/z, z0.s, #0 +# CHECK-NEXT: - - - - - - 1.00 - - cmphi p0.s, p0/z, z0.s, #127 +# CHECK-NEXT: - - - - - - 1.00 - - cmphi p0.s, p0/z, z0.s, z0.d +# CHECK-NEXT: - - - - - - 1.00 - - cmphi p0.s, p0/z, z0.s, z0.s +# CHECK-NEXT: - - - - - - 1.00 - - cmphi p0.s, p0/z, z1.s, z0.s +# CHECK-NEXT: - - - - - - 1.00 - - cmphs p0.b, p0/z, z0.b, #0 +# CHECK-NEXT: - - - - - - 1.00 - - cmphs p0.b, p0/z, z0.b, #127 +# CHECK-NEXT: - - - - - - 1.00 - - cmphs p0.b, p0/z, z0.b, z0.b +# CHECK-NEXT: - - - - - - 1.00 - - cmphs p0.b, p0/z, z0.b, z0.d +# CHECK-NEXT: - - - - - - 1.00 - - cmphs p0.b, p0/z, z1.b, z0.b +# CHECK-NEXT: - - - - - - 1.00 - - cmphs p0.d, p0/z, z0.d, #0 +# CHECK-NEXT: - - - - - - 1.00 - - cmphs p0.d, p0/z, z0.d, #127 +# CHECK-NEXT: - - - - - - 1.00 - - cmphs p0.d, p0/z, z0.d, z0.d +# CHECK-NEXT: - - - - - - 1.00 - - cmphs p0.d, p0/z, z1.d, z0.d +# CHECK-NEXT: - - - - - - 1.00 - - cmphs p0.h, p0/z, z0.h, #0 +# CHECK-NEXT: - - - - - - 1.00 - - cmphs p0.h, p0/z, z0.h, #127 +# CHECK-NEXT: - - - - - - 1.00 - - cmphs p0.h, p0/z, z0.h, z0.d +# CHECK-NEXT: - - - - - - 1.00 - - cmphs p0.h, p0/z, z0.h, z0.h +# CHECK-NEXT: - - - - - - 1.00 - - cmphs p0.h, p0/z, z1.h, z0.h +# CHECK-NEXT: - - - - - - 1.00 - - cmphs p0.s, p0/z, z0.s, #0 +# CHECK-NEXT: - - - - - - 1.00 - - cmphs p0.s, p0/z, z0.s, #127 +# CHECK-NEXT: - - - - - - 1.00 - - cmphs p0.s, p0/z, z0.s, z0.d +# CHECK-NEXT: - - - - - - 1.00 - - cmphs p0.s, p0/z, z0.s, z0.s +# CHECK-NEXT: - - - - - - 1.00 - - cmphs p0.s, p0/z, z1.s, z0.s +# CHECK-NEXT: - - - - - - 1.00 - - cmple p0.b, p0/z, z0.b, #-16 +# CHECK-NEXT: - - - - - - 1.00 - - cmple p0.b, p0/z, z0.b, #15 +# CHECK-NEXT: - - - - - - 1.00 - - cmple p0.b, p0/z, z0.b, z0.d +# CHECK-NEXT: - - - - - - 1.00 - - cmple p0.d, p0/z, z0.d, #-16 +# CHECK-NEXT: - - - - - - 1.00 - - cmple p0.d, p0/z, z0.d, #15 +# CHECK-NEXT: - - - - - - 1.00 - - cmple p0.h, p0/z, z0.h, #-16 +# CHECK-NEXT: - - - - - - 1.00 - - cmple p0.h, p0/z, z0.h, #15 +# CHECK-NEXT: - - - - - - 1.00 - - cmple p0.h, p0/z, z0.h, z0.d +# CHECK-NEXT: - - - - - - 1.00 - - cmple p0.s, p0/z, z0.s, #-16 +# CHECK-NEXT: - - - - - - 1.00 - - cmple p0.s, p0/z, z0.s, #15 +# CHECK-NEXT: - - - - - - 1.00 - - cmple p0.s, p0/z, z0.s, z0.d +# CHECK-NEXT: - - - - - - 1.00 - - cmplo p0.b, p0/z, z0.b, #0 +# CHECK-NEXT: - - - - - - 1.00 - - cmplo p0.b, p0/z, z0.b, #127 +# CHECK-NEXT: - - - - - - 1.00 - - cmplo p0.b, p0/z, z0.b, z0.d +# CHECK-NEXT: - - - - - - 1.00 - - cmplo p0.d, p0/z, z0.d, #0 +# CHECK-NEXT: - - - - - - 1.00 - - cmplo p0.d, p0/z, z0.d, #127 +# CHECK-NEXT: - - - - - - 1.00 - - cmplo p0.h, p0/z, z0.h, #0 +# CHECK-NEXT: - - - - - - 1.00 - - cmplo p0.h, p0/z, z0.h, #127 +# CHECK-NEXT: - - - - - - 1.00 - - cmplo p0.h, p0/z, z0.h, z0.d +# CHECK-NEXT: - - - - - - 1.00 - - cmplo p0.s, p0/z, z0.s, #0 +# CHECK-NEXT: - - - - - - 1.00 - - cmplo p0.s, p0/z, z0.s, #127 +# CHECK-NEXT: - - - - - - 1.00 - - cmplo p0.s, p0/z, z0.s, z0.d +# CHECK-NEXT: - - - - - - 1.00 - - cmpls p0.b, p0/z, z0.b, #0 +# CHECK-NEXT: - - - - - - 1.00 - - cmpls p0.b, p0/z, z0.b, #127 +# CHECK-NEXT: - - - - - - 1.00 - - cmpls p0.b, p0/z, z0.b, z0.d +# CHECK-NEXT: - - - - - - 1.00 - - cmpls p0.d, p0/z, z0.d, #0 +# CHECK-NEXT: - - - - - - 1.00 - - cmpls p0.d, p0/z, z0.d, #127 +# CHECK-NEXT: - - - - - - 1.00 - - cmpls p0.h, p0/z, z0.h, #0 +# CHECK-NEXT: - - - - - - 1.00 - - cmpls p0.h, p0/z, z0.h, #127 +# CHECK-NEXT: - - - - - - 1.00 - - cmpls p0.h, p0/z, z0.h, z0.d +# CHECK-NEXT: - - - - - - 1.00 - - cmpls p0.s, p0/z, z0.s, #0 +# CHECK-NEXT: - - - - - - 1.00 - - cmpls p0.s, p0/z, z0.s, #127 +# CHECK-NEXT: - - - - - - 1.00 - - cmpls p0.s, p0/z, z0.s, z0.d +# CHECK-NEXT: - - - - - - 1.00 - - cmplt p0.b, p0/z, z0.b, #-16 +# CHECK-NEXT: - - - - - - 1.00 - - cmplt p0.b, p0/z, z0.b, #15 +# CHECK-NEXT: - - - - - - 1.00 - - cmplt p0.b, p0/z, z0.b, z0.d +# CHECK-NEXT: - - - - - - 1.00 - - cmplt p0.d, p0/z, z0.d, #-16 +# CHECK-NEXT: - - - - - - 1.00 - - cmplt p0.d, p0/z, z0.d, #15 +# CHECK-NEXT: - - - - - - 1.00 - - cmplt p0.h, p0/z, z0.h, #-16 +# CHECK-NEXT: - - - - - - 1.00 - - cmplt p0.h, p0/z, z0.h, #15 +# CHECK-NEXT: - - - - - - 1.00 - - cmplt p0.h, p0/z, z0.h, z0.d +# CHECK-NEXT: - - - - - - 1.00 - - cmplt p0.s, p0/z, z0.s, #-16 +# CHECK-NEXT: - - - - - - 1.00 - - cmplt p0.s, p0/z, z0.s, #15 +# CHECK-NEXT: - - - - - - 1.00 - - cmplt p0.s, p0/z, z0.s, z0.d +# CHECK-NEXT: - - - - - - 1.00 - - cmpne p0.b, p0/z, z0.b, #-16 +# CHECK-NEXT: - - - - - - 1.00 - - cmpne p0.b, p0/z, z0.b, #15 +# CHECK-NEXT: - - - - - - 1.00 - - cmpne p0.b, p0/z, z0.b, z0.b +# CHECK-NEXT: - - - - - - 1.00 - - cmpne p0.b, p0/z, z0.b, z0.d +# CHECK-NEXT: - - - - - - 1.00 - - cmpne p0.d, p0/z, z0.d, #-16 +# CHECK-NEXT: - - - - - - 1.00 - - cmpne p0.d, p0/z, z0.d, #15 +# CHECK-NEXT: - - - - - - 1.00 - - cmpne p0.d, p0/z, z0.d, z0.d +# CHECK-NEXT: - - - - - - 1.00 - - cmpne p0.h, p0/z, z0.h, #-16 +# CHECK-NEXT: - - - - - - 1.00 - - cmpne p0.h, p0/z, z0.h, #15 +# CHECK-NEXT: - - - - - - 1.00 - - cmpne p0.h, p0/z, z0.h, z0.d +# CHECK-NEXT: - - - - - - 1.00 - - cmpne p0.h, p0/z, z0.h, z0.h +# CHECK-NEXT: - - - - - - 1.00 - - cmpne p0.s, p0/z, z0.s, #-16 +# CHECK-NEXT: - - - - - - 1.00 - - cmpne p0.s, p0/z, z0.s, #15 +# CHECK-NEXT: - - - - - - 1.00 - - cmpne p0.s, p0/z, z0.s, z0.d +# CHECK-NEXT: - - - - - - 1.00 - - cmpne p0.s, p0/z, z0.s, z0.s +# CHECK-NEXT: - - - - - - 1.00 - - cnot z31.b, p7/m, z31.b +# CHECK-NEXT: - - - - - - 1.00 - - cnot z31.d, p7/m, z31.d +# CHECK-NEXT: - - - - - - 1.00 - - cnot z31.h, p7/m, z31.h +# CHECK-NEXT: - - - - - - 1.00 - - cnot z31.s, p7/m, z31.s +# CHECK-NEXT: - - - - - - 1.00 - - cnt z31.b, p7/m, z31.b +# CHECK-NEXT: - - - - - - 1.00 - - cnt z31.d, p7/m, z31.d +# CHECK-NEXT: - - - - - - 1.00 - - cnt z31.h, p7/m, z31.h +# CHECK-NEXT: - - - - - - 1.00 - - cnt z31.s, p7/m, z31.s +# CHECK-NEXT: 1.00 - - - - - - - - cntb x0 +# CHECK-NEXT: 1.00 - - - - - - - - cntb x0, #28 +# CHECK-NEXT: 1.00 - - - - - - - - cntb x0, all, mul #16 +# CHECK-NEXT: 1.00 - - - - - - - - cntb x0, pow2 +# CHECK-NEXT: 1.00 - - - - - - - - cntd x0 +# CHECK-NEXT: 1.00 - - - - - - - - cntd x0, #28 +# CHECK-NEXT: 1.00 - - - - - - - - cntd x0, all, mul #16 +# CHECK-NEXT: 1.00 - - - - - - - - cntd x0, pow2 +# CHECK-NEXT: 1.00 - - - - - - - - cnth x0 +# CHECK-NEXT: 1.00 - - - - - - - - cnth x0, #28 +# CHECK-NEXT: 1.00 - - - - - - - - cnth x0, all, mul #16 +# CHECK-NEXT: 1.00 - - - - - - - - cnth x0, pow2 +# CHECK-NEXT: - - - - - - 1.00 - - cntp x0, p15, p0.b +# CHECK-NEXT: - - - - - - 1.00 - - cntp x0, p15, p0.d +# CHECK-NEXT: - - - - - - 1.00 - - cntp x0, p15, p0.h +# CHECK-NEXT: - - - - - - 1.00 - - cntp x0, p15, p0.s +# CHECK-NEXT: 1.00 - - - - - - - - cntw x0 +# CHECK-NEXT: 1.00 - - - - - - - - cntw x0, #28 +# CHECK-NEXT: 1.00 - - - - - - - - cntw x0, all, mul #16 +# CHECK-NEXT: 1.00 - - - - - - - - cntw x0, pow2 +# CHECK-NEXT: - - - - - - 1.00 - - compact z31.d, p7, z31.d +# CHECK-NEXT: - - - - - - 1.00 - - compact z31.s, p7, z31.s +# CHECK-NEXT: 1.00 - - - - - - - - ctermeq w30, wzr +# CHECK-NEXT: 1.00 - - - - - - - - ctermeq wzr, w30 +# CHECK-NEXT: 1.00 - - - - - - - - ctermeq x30, xzr +# CHECK-NEXT: 1.00 - - - - - - - - ctermeq xzr, x30 +# CHECK-NEXT: 1.00 - - - - - - - - ctermne w30, wzr +# CHECK-NEXT: 1.00 - - - - - - - - ctermne wzr, w30 +# CHECK-NEXT: 1.00 - - - - - - - - ctermne x30, xzr +# CHECK-NEXT: 1.00 - - - - - - - - ctermne xzr, x30 +# CHECK-NEXT: 1.00 - - - - - - - - decb x0 +# CHECK-NEXT: 1.00 - - - - - - - - decb x0, #14 +# CHECK-NEXT: 1.00 - - - - - - - - decb x0, all, mul #16 +# CHECK-NEXT: 1.00 - - - - - - - - decb x0, pow2 +# CHECK-NEXT: 1.00 - - - - - - - - decb x0, vl1 +# CHECK-NEXT: 1.00 - - - - - - - - decd x0 +# CHECK-NEXT: 1.00 - - - - - - - - decd x0, #14 +# CHECK-NEXT: 1.00 - - - - - - - - decd x0, all, mul #16 +# CHECK-NEXT: 1.00 - - - - - - - - decd x0, pow2 +# CHECK-NEXT: 1.00 - - - - - - - - decd x0, vl1 +# CHECK-NEXT: 1.00 - - - - - - - - dech x0 +# CHECK-NEXT: 1.00 - - - - - - - - dech x0, #14 +# CHECK-NEXT: 1.00 - - - - - - - - dech x0, all, mul #16 +# CHECK-NEXT: 1.00 - - - - - - - - dech x0, pow2 +# CHECK-NEXT: 1.00 - - - - - - - - dech x0, vl1 +# CHECK-NEXT: - - - - - - 1.00 - - decp x0, p0.b +# CHECK-NEXT: - - - - - - 1.00 - - decp x0, p0.d +# CHECK-NEXT: - - - - - - 1.00 - - decp x0, p0.h +# CHECK-NEXT: - - - - - - 1.00 - - decp x0, p0.s +# CHECK-NEXT: - - - - - - 1.00 - - decp xzr, p15.b +# CHECK-NEXT: - - - - - - 1.00 - - decp xzr, p15.d +# CHECK-NEXT: - - - - - - 1.00 - - decp xzr, p15.h +# CHECK-NEXT: - - - - - - 1.00 - - decp xzr, p15.s +# CHECK-NEXT: - - - - - - 1.00 - - decp z31.d, p15.d +# CHECK-NEXT: - - - - - - 1.00 - - decp z31.h, p15.h +# CHECK-NEXT: - - - - - - 1.00 - - decp z31.s, p15.s +# CHECK-NEXT: 1.00 - - - - - - - - decw x0 +# CHECK-NEXT: 1.00 - - - - - - - - decw x0, #14 +# CHECK-NEXT: 1.00 - - - - - - - - decw x0, all, mul #16 +# CHECK-NEXT: 1.00 - - - - - - - - decw x0, pow2 +# CHECK-NEXT: 1.00 - - - - - - - - decw x0, vl1 +# CHECK-NEXT: - - - - - - 1.00 - - dupm z0.d, #0xfffffffffffffff9 +# CHECK-NEXT: - - - - - - 1.00 - - dupm z0.s, #0xfffffff9 +# CHECK-NEXT: - - - - - - 1.00 - - dupm z23.h, #0xfff9 +# CHECK-NEXT: - - - - - - 1.00 - - dupm z5.b, #0xf9 +# CHECK-NEXT: - - - - - - 1.00 - - eor p0.b, p0/z, p0.b, p1.b +# CHECK-NEXT: - - - - - - 1.00 - - eor z0.d, z0.d, #0x6 +# CHECK-NEXT: - - - - - - 1.00 - - eor z0.d, z0.d, #0xfffffffffffffff9 +# CHECK-NEXT: - - - - - - 1.00 - - eor z0.d, z0.d, z0.d +# CHECK-NEXT: - - - - - - 1.00 - - eor z0.s, z0.s, #0x6 +# CHECK-NEXT: - - - - - - 1.00 - - eor z0.s, z0.s, #0xfffffff9 +# CHECK-NEXT: - - - - - - 1.00 - - eor z23.d, z13.d, z8.d +# CHECK-NEXT: - - - - - - 1.00 - - eor z23.h, z23.h, #0x6 +# CHECK-NEXT: - - - - - - 1.00 - - eor z23.h, z23.h, #0xfff9 +# CHECK-NEXT: - - - - - - 1.00 - - eor z31.b, p7/m, z31.b, z31.b +# CHECK-NEXT: - - - - - - 1.00 - - eor z31.d, p7/m, z31.d, z31.d +# CHECK-NEXT: - - - - - - 1.00 - - eor z31.h, p7/m, z31.h, z31.h +# CHECK-NEXT: - - - - - - 1.00 - - eor z31.s, p7/m, z31.s, z31.s +# CHECK-NEXT: - - - - - - 1.00 - - eor z5.b, z5.b, #0x6 +# CHECK-NEXT: - - - - - - 1.00 - - eor z5.b, z5.b, #0xf9 +# CHECK-NEXT: - - - - - - 1.00 - - eor3 z29.d, z29.d, z30.d, z31.d +# CHECK-NEXT: - - - - - - 1.00 - - eorbt z0.b, z1.b, z31.b +# CHECK-NEXT: - - - - - - 1.00 - - eorbt z0.d, z1.d, z31.d +# CHECK-NEXT: - - - - - - 1.00 - - eorbt z0.h, z1.h, z31.h +# CHECK-NEXT: - - - - - - 1.00 - - eorbt z0.s, z1.s, z31.s +# CHECK-NEXT: - - - - - - 1.00 - - eors p0.b, p0/z, p0.b, p1.b +# CHECK-NEXT: - - - - - - 1.00 - - eortb z0.b, z1.b, z31.b +# CHECK-NEXT: - - - - - - 1.00 - - eortb z0.d, z1.d, z31.d +# CHECK-NEXT: - - - - - - 1.00 - - eortb z0.h, z1.h, z31.h +# CHECK-NEXT: - - - - - - 1.00 - - eortb z0.s, z1.s, z31.s +# CHECK-NEXT: - - - - - - 1.00 - - eorv b0, p7, z31.b +# CHECK-NEXT: - - - - - - 1.00 - - eorv d0, p7, z31.d +# CHECK-NEXT: - - - - - - 1.00 - - eorv h0, p7, z31.h +# CHECK-NEXT: - - - - - - 1.00 - - eorv s0, p7, z31.s +# CHECK-NEXT: - - - - - - 1.00 - - ext z0.b, { z1.b, z2.b }, #0 +# CHECK-NEXT: - - - - - - 1.00 - - ext z31.b, z31.b, z0.b, #0 +# CHECK-NEXT: - - - - - - 1.00 - - ext z31.b, z31.b, z0.b, #255 +# CHECK-NEXT: - - - - - - 1.00 - - ext z31.b, { z30.b, z31.b }, #255 +# CHECK-NEXT: - - - - - - 1.00 - - fabd z0.d, p7/m, z0.d, z31.d +# CHECK-NEXT: - - - - - - 1.00 - - fabd z0.h, p7/m, z0.h, z31.h +# CHECK-NEXT: - - - - - - 1.00 - - fabd z0.s, p7/m, z0.s, z31.s +# CHECK-NEXT: - - - - - - 1.00 - - fabs z31.d, p7/m, z31.d +# CHECK-NEXT: - - - - - - 1.00 - - fabs z31.h, p7/m, z31.h +# CHECK-NEXT: - - - - - - 1.00 - - fabs z31.s, p7/m, z31.s +# CHECK-NEXT: - - - - - - 1.00 - - facge p0.d, p0/z, z0.d, z1.d +# CHECK-NEXT: - - - - - - 1.00 - - facge p0.d, p0/z, z1.d, z0.d +# CHECK-NEXT: - - - - - - 1.00 - - facge p0.h, p0/z, z0.h, z1.h +# CHECK-NEXT: - - - - - - 1.00 - - facge p0.h, p0/z, z1.h, z0.h +# CHECK-NEXT: - - - - - - 1.00 - - facge p0.s, p0/z, z0.s, z1.s +# CHECK-NEXT: - - - - - - 1.00 - - facge p0.s, p0/z, z1.s, z0.s +# CHECK-NEXT: - - - - - - 1.00 - - facgt p0.d, p0/z, z0.d, z1.d +# CHECK-NEXT: - - - - - - 1.00 - - facgt p0.d, p0/z, z1.d, z0.d +# CHECK-NEXT: - - - - - - 1.00 - - facgt p0.h, p0/z, z0.h, z1.h +# CHECK-NEXT: - - - - - - 1.00 - - facgt p0.h, p0/z, z1.h, z0.h +# CHECK-NEXT: - - - - - - 1.00 - - facgt p0.s, p0/z, z0.s, z1.s +# CHECK-NEXT: - - - - - - 1.00 - - facgt p0.s, p0/z, z1.s, z0.s +# CHECK-NEXT: - - - - - - 1.00 - - fadd z0.d, p0/m, z0.d, #0.5 +# CHECK-NEXT: - - - - - - 1.00 - - fadd z0.d, p7/m, z0.d, z31.d +# CHECK-NEXT: - - - - - - 1.00 - - fadd z0.d, z1.d, z31.d +# CHECK-NEXT: - - - - - - 1.00 - - fadd z0.h, p0/m, z0.h, #0.5 +# CHECK-NEXT: - - - - - - 1.00 - - fadd z0.h, p7/m, z0.h, z31.h +# CHECK-NEXT: - - - - - - 1.00 - - fadd z0.h, z1.h, z31.h +# CHECK-NEXT: - - - - - - 1.00 - - fadd z0.s, p0/m, z0.s, #0.5 +# CHECK-NEXT: - - - - - - 1.00 - - fadd z0.s, p7/m, z0.s, z31.s +# CHECK-NEXT: - - - - - - 1.00 - - fadd z0.s, z1.s, z31.s +# CHECK-NEXT: - - - - - - 1.00 - - fadd z31.d, p7/m, z31.d, #1.0 +# CHECK-NEXT: - - - - - - 1.00 - - fadd z31.h, p7/m, z31.h, #1.0 +# CHECK-NEXT: - - - - - - 1.00 - - fadd z31.s, p7/m, z31.s, #1.0 +# CHECK-NEXT: - - - - - - 5.00 - - fadda d0, p7, d0, z31.d +# CHECK-NEXT: - - - - - - 29.00 - - fadda h0, p7, h0, z31.h +# CHECK-NEXT: - - - - - - 13.00 - - fadda s0, p7, s0, z31.s +# CHECK-NEXT: - - - - - - 1.00 - - faddp z0.h, p0/m, z0.h, z1.h +# CHECK-NEXT: - - - - - - 1.00 - - faddp z29.s, p3/m, z29.s, z30.s +# CHECK-NEXT: - - - - - - 1.00 - - faddp z31.d, p7/m, z31.d, z30.d +# CHECK-NEXT: - - - - - - 1.00 - - faddv d0, p7, z31.d +# CHECK-NEXT: - - - - - - 11.00 - - faddv h0, p7, z31.h +# CHECK-NEXT: - - - - - - 5.00 - - faddv s0, p7, z31.s +# CHECK-NEXT: - - - - - - 1.00 - - fcadd z0.d, p0/m, z0.d, z0.d, #90 +# CHECK-NEXT: - - - - - - 1.00 - - fcadd z0.h, p0/m, z0.h, z0.h, #90 +# CHECK-NEXT: - - - - - - 1.00 - - fcadd z0.s, p0/m, z0.s, z0.s, #90 +# CHECK-NEXT: - - - - - - 1.00 - - fcadd z31.d, p7/m, z31.d, z31.d, #270 +# CHECK-NEXT: - - - - - - 1.00 - - fcadd z31.h, p7/m, z31.h, z31.h, #270 +# CHECK-NEXT: - - - - - - 1.00 - - fcadd z31.s, p7/m, z31.s, z31.s, #270 +# CHECK-NEXT: - - - - - - 1.00 - - fcmeq p0.d, p0/z, z0.d, #0.0 +# CHECK-NEXT: - - - - - - 1.00 - - fcmeq p0.d, p0/z, z0.d, z1.d +# CHECK-NEXT: - - - - - - 1.00 - - fcmeq p0.h, p0/z, z0.h, #0.0 +# CHECK-NEXT: - - - - - - 1.00 - - fcmeq p0.h, p0/z, z0.h, z1.h +# CHECK-NEXT: - - - - - - 1.00 - - fcmeq p0.s, p0/z, z0.s, #0.0 +# CHECK-NEXT: - - - - - - 1.00 - - fcmeq p0.s, p0/z, z0.s, z1.s +# CHECK-NEXT: - - - - - - 1.00 - - fcmge p0.d, p0/z, z0.d, #0.0 +# CHECK-NEXT: - - - - - - 1.00 - - fcmge p0.d, p0/z, z0.d, z1.d +# CHECK-NEXT: - - - - - - 1.00 - - fcmge p0.d, p0/z, z1.d, z0.d +# CHECK-NEXT: - - - - - - 1.00 - - fcmge p0.h, p0/z, z0.h, #0.0 +# CHECK-NEXT: - - - - - - 1.00 - - fcmge p0.h, p0/z, z0.h, z1.h +# CHECK-NEXT: - - - - - - 1.00 - - fcmge p0.h, p0/z, z1.h, z0.h +# CHECK-NEXT: - - - - - - 1.00 - - fcmge p0.s, p0/z, z0.s, #0.0 +# CHECK-NEXT: - - - - - - 1.00 - - fcmge p0.s, p0/z, z0.s, z1.s +# CHECK-NEXT: - - - - - - 1.00 - - fcmge p0.s, p0/z, z1.s, z0.s +# CHECK-NEXT: - - - - - - 1.00 - - fcmgt p0.d, p0/z, z0.d, #0.0 +# CHECK-NEXT: - - - - - - 1.00 - - fcmgt p0.d, p0/z, z0.d, z1.d +# CHECK-NEXT: - - - - - - 1.00 - - fcmgt p0.d, p0/z, z1.d, z0.d +# CHECK-NEXT: - - - - - - 1.00 - - fcmgt p0.h, p0/z, z0.h, #0.0 +# CHECK-NEXT: - - - - - - 1.00 - - fcmgt p0.h, p0/z, z0.h, z1.h +# CHECK-NEXT: - - - - - - 1.00 - - fcmgt p0.h, p0/z, z1.h, z0.h +# CHECK-NEXT: - - - - - - 1.00 - - fcmgt p0.s, p0/z, z0.s, #0.0 +# CHECK-NEXT: - - - - - - 1.00 - - fcmgt p0.s, p0/z, z0.s, z1.s +# CHECK-NEXT: - - - - - - 1.00 - - fcmgt p0.s, p0/z, z1.s, z0.s +# CHECK-NEXT: - - - - - - - 1.00 - fcmla z0.d, p0/m, z0.d, z0.d, #0 +# CHECK-NEXT: - - - - - - - 1.00 - fcmla z0.d, p0/m, z1.d, z2.d, #90 +# CHECK-NEXT: - - - - - - - 1.00 - fcmla z0.h, p0/m, z0.h, z0.h, #0 +# CHECK-NEXT: - - - - - - - 1.00 - fcmla z0.h, p0/m, z1.h, z2.h, #90 +# CHECK-NEXT: - - - - - - - 1.00 - fcmla z0.h, z0.h, z0.h[0], #0 +# CHECK-NEXT: - - - - - - - 1.00 - fcmla z0.s, p0/m, z0.s, z0.s, #0 +# CHECK-NEXT: - - - - - - - 1.00 - fcmla z0.s, p0/m, z1.s, z2.s, #90 +# CHECK-NEXT: - - - - - - - 1.00 - fcmla z21.s, z10.s, z5.s[1], #90 +# CHECK-NEXT: - - - - - - - 1.00 - fcmla z23.s, z13.s, z8.s[0], #270 +# CHECK-NEXT: - - - - - - - 1.00 - fcmla z29.d, p7/m, z30.d, z31.d, #180 +# CHECK-NEXT: - - - - - - - 1.00 - fcmla z29.h, p7/m, z30.h, z31.h, #180 +# CHECK-NEXT: - - - - - - - 1.00 - fcmla z29.s, p7/m, z30.s, z31.s, #180 +# CHECK-NEXT: - - - - - - - 1.00 - fcmla z31.d, p7/m, z31.d, z31.d, #270 +# CHECK-NEXT: - - - - - - - 1.00 - fcmla z31.h, p7/m, z31.h, z31.h, #270 +# CHECK-NEXT: - - - - - - - 1.00 - fcmla z31.h, z31.h, z7.h[3], #270 +# CHECK-NEXT: - - - - - - - 1.00 - fcmla z31.s, p7/m, z31.s, z31.s, #270 +# CHECK-NEXT: - - - - - - 1.00 - - fcmle p0.d, p0/z, z0.d, #0.0 +# CHECK-NEXT: - - - - - - 1.00 - - fcmle p0.h, p0/z, z0.h, #0.0 +# CHECK-NEXT: - - - - - - 1.00 - - fcmle p0.s, p0/z, z0.s, #0.0 +# CHECK-NEXT: - - - - - - 1.00 - - fcmlt p0.d, p0/z, z0.d, #0.0 +# CHECK-NEXT: - - - - - - 1.00 - - fcmlt p0.h, p0/z, z0.h, #0.0 +# CHECK-NEXT: - - - - - - 1.00 - - fcmlt p0.s, p0/z, z0.s, #0.0 +# CHECK-NEXT: - - - - - - 1.00 - - fcmne p0.d, p0/z, z0.d, #0.0 +# CHECK-NEXT: - - - - - - 1.00 - - fcmne p0.d, p0/z, z0.d, z1.d +# CHECK-NEXT: - - - - - - 1.00 - - fcmne p0.h, p0/z, z0.h, #0.0 +# CHECK-NEXT: - - - - - - 1.00 - - fcmne p0.h, p0/z, z0.h, z1.h +# CHECK-NEXT: - - - - - - 1.00 - - fcmne p0.s, p0/z, z0.s, #0.0 +# CHECK-NEXT: - - - - - - 1.00 - - fcmne p0.s, p0/z, z0.s, z1.s +# CHECK-NEXT: - - - - - - 1.00 - - fcmuo p0.d, p0/z, z0.d, z1.d +# CHECK-NEXT: - - - - - - 1.00 - - fcmuo p0.h, p0/z, z0.h, z1.h +# CHECK-NEXT: - - - - - - 1.00 - - fcmuo p0.s, p0/z, z0.s, z1.s +# CHECK-NEXT: - - - - - - 1.00 - - fcvt z0.d, p0/m, z0.h +# CHECK-NEXT: - - - - - - 1.00 - - fcvt z0.d, p0/m, z0.s +# CHECK-NEXT: - - - - - - 1.00 - - fcvt z0.h, p0/m, z0.d +# CHECK-NEXT: - - - - - - 1.00 - - fcvt z0.h, p0/m, z0.s +# CHECK-NEXT: - - - - - - 1.00 - - fcvt z0.s, p0/m, z0.d +# CHECK-NEXT: - - - - - - 1.00 - - fcvt z0.s, p0/m, z0.h +# CHECK-NEXT: - - - - - - 1.00 - - fcvtlt z0.s, p0/m, z1.h +# CHECK-NEXT: - - - - - - 1.00 - - fcvtlt z30.d, p7/m, z31.s +# CHECK-NEXT: - - - - - - 1.00 - - fcvtnt z0.h, p0/m, z1.s +# CHECK-NEXT: - - - - - - 1.00 - - fcvtnt z30.s, p7/m, z31.d +# CHECK-NEXT: - - - - - - 1.00 - - fcvtx z0.s, p0/m, z0.d +# CHECK-NEXT: - - - - - - 1.00 - - fcvtx z30.s, p7/m, z31.d +# CHECK-NEXT: - - - - - - 1.00 - - fcvtxnt z0.s, p0/m, z1.d +# CHECK-NEXT: - - - - - - 1.00 - - fcvtxnt z30.s, p7/m, z31.d +# CHECK-NEXT: - - - - - - 1.00 - - fcvtzs z0.d, p0/m, z0.d +# CHECK-NEXT: - - - - - - 1.00 - - fcvtzs z0.d, p0/m, z0.h +# CHECK-NEXT: - - - - - - 1.00 - - fcvtzs z0.d, p0/m, z0.s +# CHECK-NEXT: - - - - - - 1.00 - - fcvtzs z0.h, p0/m, z0.h +# CHECK-NEXT: - - - - - - 1.00 - - fcvtzs z0.s, p0/m, z0.d +# CHECK-NEXT: - - - - - - 1.00 - - fcvtzs z0.s, p0/m, z0.h +# CHECK-NEXT: - - - - - - 1.00 - - fcvtzs z0.s, p0/m, z0.s +# CHECK-NEXT: - - - - - - 1.00 - - fcvtzu z0.d, p0/m, z0.d +# CHECK-NEXT: - - - - - - 1.00 - - fcvtzu z0.d, p0/m, z0.h +# CHECK-NEXT: - - - - - - 1.00 - - fcvtzu z0.d, p0/m, z0.s +# CHECK-NEXT: - - - - - - 1.00 - - fcvtzu z0.h, p0/m, z0.h +# CHECK-NEXT: - - - - - - 1.00 - - fcvtzu z0.s, p0/m, z0.d +# CHECK-NEXT: - - - - - - 1.00 - - fcvtzu z0.s, p0/m, z0.h +# CHECK-NEXT: - - - - - - 1.00 - - fcvtzu z0.s, p0/m, z0.s +# CHECK-NEXT: - - - - - - - - 19.00 fdiv z0.d, p7/m, z0.d, z31.d +# CHECK-NEXT: - - - - - - - - 5.00 fdiv z0.h, p7/m, z0.h, z31.h +# CHECK-NEXT: - - - - - - - - 10.00 fdiv z0.s, p7/m, z0.s, z31.s +# CHECK-NEXT: - - - - - - - - 19.00 fdivr z0.d, p7/m, z0.d, z31.d +# CHECK-NEXT: - - - - - - - - 5.00 fdivr z0.h, p7/m, z0.h, z31.h +# CHECK-NEXT: - - - - - - - - 10.00 fdivr z0.s, p7/m, z0.s, z31.s +# CHECK-NEXT: - - - - - - - 1.00 - fexpa z0.d, z31.d +# CHECK-NEXT: - - - - - - - 1.00 - fexpa z0.h, z31.h +# CHECK-NEXT: - - - - - - - 1.00 - fexpa z0.s, z31.s +# CHECK-NEXT: - - - - - - - 1.00 - flogb z31.d, p7/m, z31.d +# CHECK-NEXT: - - - - - - - 1.00 - flogb z31.h, p7/m, z31.h +# CHECK-NEXT: - - - - - - - 1.00 - flogb z31.s, p7/m, z31.s +# CHECK-NEXT: - - - - - - - 1.00 - fmad z0.d, p7/m, z1.d, z31.d +# CHECK-NEXT: - - - - - - - 1.00 - fmad z0.h, p7/m, z1.h, z31.h +# CHECK-NEXT: - - - - - - - 1.00 - fmad z0.s, p7/m, z1.s, z31.s +# CHECK-NEXT: - - - - - - 1.00 - - fmax z0.d, p0/m, z0.d, #0.0 +# CHECK-NEXT: - - - - - - 1.00 - - fmax z0.d, p7/m, z0.d, z31.d +# CHECK-NEXT: - - - - - - 1.00 - - fmax z0.h, p0/m, z0.h, #0.0 +# CHECK-NEXT: - - - - - - 1.00 - - fmax z0.h, p7/m, z0.h, z31.h +# CHECK-NEXT: - - - - - - 1.00 - - fmax z0.s, p0/m, z0.s, #0.0 +# CHECK-NEXT: - - - - - - 1.00 - - fmax z0.s, p7/m, z0.s, z31.s +# CHECK-NEXT: - - - - - - 1.00 - - fmax z31.d, p7/m, z31.d, #1.0 +# CHECK-NEXT: - - - - - - 1.00 - - fmax z31.h, p7/m, z31.h, #1.0 +# CHECK-NEXT: - - - - - - 1.00 - - fmax z31.s, p7/m, z31.s, #1.0 +# CHECK-NEXT: - - - - - - 1.00 - - fmaxnm z0.d, p0/m, z0.d, #0.0 +# CHECK-NEXT: - - - - - - 1.00 - - fmaxnm z0.d, p7/m, z0.d, z31.d +# CHECK-NEXT: - - - - - - 1.00 - - fmaxnm z0.h, p0/m, z0.h, #0.0 +# CHECK-NEXT: - - - - - - 1.00 - - fmaxnm z0.h, p7/m, z0.h, z31.h +# CHECK-NEXT: - - - - - - 1.00 - - fmaxnm z0.s, p0/m, z0.s, #0.0 +# CHECK-NEXT: - - - - - - 1.00 - - fmaxnm z0.s, p7/m, z0.s, z31.s +# CHECK-NEXT: - - - - - - 1.00 - - fmaxnm z31.d, p7/m, z31.d, #1.0 +# CHECK-NEXT: - - - - - - 1.00 - - fmaxnm z31.h, p7/m, z31.h, #1.0 +# CHECK-NEXT: - - - - - - 1.00 - - fmaxnm z31.s, p7/m, z31.s, #1.0 +# CHECK-NEXT: - - - - - - 1.00 - - fmaxnmp z0.h, p0/m, z0.h, z1.h +# CHECK-NEXT: - - - - - - 1.00 - - fmaxnmp z29.s, p3/m, z29.s, z30.s +# CHECK-NEXT: - - - - - - 1.00 - - fmaxnmp z31.d, p7/m, z31.d, z30.d +# CHECK-NEXT: - - - - - - 1.00 - - fmaxnmv d0, p7, z31.d +# CHECK-NEXT: - - - - - - 1.00 - - fmaxnmv h0, p7, z31.h +# CHECK-NEXT: - - - - - - 1.00 - - fmaxnmv s0, p7, z31.s +# CHECK-NEXT: - - - - - - 1.00 - - fmaxp z0.h, p0/m, z0.h, z1.h +# CHECK-NEXT: - - - - - - 1.00 - - fmaxp z29.s, p3/m, z29.s, z30.s +# CHECK-NEXT: - - - - - - 1.00 - - fmaxp z31.d, p7/m, z31.d, z30.d +# CHECK-NEXT: - - - - - - 1.00 - - fmaxv d0, p7, z31.d +# CHECK-NEXT: - - - - - - 1.00 - - fmaxv h0, p7, z31.h +# CHECK-NEXT: - - - - - - 1.00 - - fmaxv s0, p7, z31.s +# CHECK-NEXT: - - - - - - 1.00 - - fmin z0.d, p0/m, z0.d, #0.0 +# CHECK-NEXT: - - - - - - 1.00 - - fmin z0.d, p7/m, z0.d, z31.d +# CHECK-NEXT: - - - - - - 1.00 - - fmin z0.h, p0/m, z0.h, #0.0 +# CHECK-NEXT: - - - - - - 1.00 - - fmin z0.h, p7/m, z0.h, z31.h +# CHECK-NEXT: - - - - - - 1.00 - - fmin z0.s, p0/m, z0.s, #0.0 +# CHECK-NEXT: - - - - - - 1.00 - - fmin z0.s, p7/m, z0.s, z31.s +# CHECK-NEXT: - - - - - - 1.00 - - fmin z31.d, p7/m, z31.d, #1.0 +# CHECK-NEXT: - - - - - - 1.00 - - fmin z31.h, p7/m, z31.h, #1.0 +# CHECK-NEXT: - - - - - - 1.00 - - fmin z31.s, p7/m, z31.s, #1.0 +# CHECK-NEXT: - - - - - - 1.00 - - fminnm z0.d, p0/m, z0.d, #0.0 +# CHECK-NEXT: - - - - - - 1.00 - - fminnm z0.d, p7/m, z0.d, z31.d +# CHECK-NEXT: - - - - - - 1.00 - - fminnm z0.h, p0/m, z0.h, #0.0 +# CHECK-NEXT: - - - - - - 1.00 - - fminnm z0.h, p7/m, z0.h, z31.h +# CHECK-NEXT: - - - - - - 1.00 - - fminnm z0.s, p0/m, z0.s, #0.0 +# CHECK-NEXT: - - - - - - 1.00 - - fminnm z0.s, p7/m, z0.s, z31.s +# CHECK-NEXT: - - - - - - 1.00 - - fminnm z31.d, p7/m, z31.d, #1.0 +# CHECK-NEXT: - - - - - - 1.00 - - fminnm z31.h, p7/m, z31.h, #1.0 +# CHECK-NEXT: - - - - - - 1.00 - - fminnm z31.s, p7/m, z31.s, #1.0 +# CHECK-NEXT: - - - - - - 1.00 - - fminnmp z0.h, p0/m, z0.h, z1.h +# CHECK-NEXT: - - - - - - 1.00 - - fminnmp z29.s, p3/m, z29.s, z30.s +# CHECK-NEXT: - - - - - - 1.00 - - fminnmp z31.d, p7/m, z31.d, z30.d +# CHECK-NEXT: - - - - - - 1.00 - - fminnmv d0, p7, z31.d +# CHECK-NEXT: - - - - - - 1.00 - - fminnmv h0, p7, z31.h +# CHECK-NEXT: - - - - - - 1.00 - - fminnmv s0, p7, z31.s +# CHECK-NEXT: - - - - - - 1.00 - - fminp z0.h, p0/m, z0.h, z1.h +# CHECK-NEXT: - - - - - - 1.00 - - fminp z29.s, p3/m, z29.s, z30.s +# CHECK-NEXT: - - - - - - 1.00 - - fminp z31.d, p7/m, z31.d, z30.d +# CHECK-NEXT: - - - - - - 1.00 - - fminv d0, p7, z31.d +# CHECK-NEXT: - - - - - - 1.00 - - fminv h0, p7, z31.h +# CHECK-NEXT: - - - - - - 1.00 - - fminv s0, p7, z31.s +# CHECK-NEXT: - - - - - - - 1.00 - fmla z0.d, p7/m, z1.d, z31.d +# CHECK-NEXT: - - - - - - - 1.00 - fmla z0.d, z1.d, z7.d[1] +# CHECK-NEXT: - - - - - - - 1.00 - fmla z0.h, p7/m, z1.h, z31.h +# CHECK-NEXT: - - - - - - - 1.00 - fmla z0.h, z1.h, z7.h[7] +# CHECK-NEXT: - - - - - - - 1.00 - fmla z0.s, p7/m, z1.s, z31.s +# CHECK-NEXT: - - - - - - - 1.00 - fmla z0.s, z1.s, z7.s[3] +# CHECK-NEXT: - - - - - - - 1.00 - fmlalb z0.s, z1.h, z7.h[0] +# CHECK-NEXT: - - - - - - - 1.00 - fmlalb z29.s, z30.h, z31.h +# CHECK-NEXT: - - - - - - - 1.00 - fmlalb z30.s, z31.h, z7.h[7] +# CHECK-NEXT: - - - - - - - 1.00 - fmlalt z0.s, z1.h, z7.h[0] +# CHECK-NEXT: - - - - - - - 1.00 - fmlalt z29.s, z30.h, z31.h +# CHECK-NEXT: - - - - - - - 1.00 - fmlalt z30.s, z31.h, z7.h[7] +# CHECK-NEXT: - - - - - - - 1.00 - fmls z0.d, p7/m, z1.d, z31.d +# CHECK-NEXT: - - - - - - - 1.00 - fmls z0.d, z1.d, z7.d[1] +# CHECK-NEXT: - - - - - - - 1.00 - fmls z0.h, p7/m, z1.h, z31.h +# CHECK-NEXT: - - - - - - - 1.00 - fmls z0.h, z1.h, z7.h[7] +# CHECK-NEXT: - - - - - - - 1.00 - fmls z0.s, p7/m, z1.s, z31.s +# CHECK-NEXT: - - - - - - - 1.00 - fmls z0.s, z1.s, z7.s[3] +# CHECK-NEXT: - - - - - - - 1.00 - fmlslb z0.s, z1.h, z7.h[0] +# CHECK-NEXT: - - - - - - - 1.00 - fmlslb z29.s, z30.h, z31.h +# CHECK-NEXT: - - - - - - - 1.00 - fmlslb z30.s, z31.h, z7.h[7] +# CHECK-NEXT: - - - - - - - 1.00 - fmlslt z0.s, z1.h, z7.h[0] +# CHECK-NEXT: - - - - - - - 1.00 - fmlslt z29.s, z30.h, z31.h +# CHECK-NEXT: - - - - - - - 1.00 - fmlslt z30.s, z31.h, z7.h[7] +# CHECK-NEXT: - - - - - - 1.00 - - fmov z0.d, #-10.00000000 +# CHECK-NEXT: - - - - - - 1.00 - - fmov z0.d, #0.12500000 +# CHECK-NEXT: - - - - - - 1.00 - - fmov z0.d, p0/m, #-10.00000000 +# CHECK-NEXT: - - - - - - 1.00 - - fmov z0.d, p0/m, #0.12500000 +# CHECK-NEXT: - - - - - - 1.00 - - fmov z0.h, #-0.12500000 +# CHECK-NEXT: - - - - - - 1.00 - - fmov z0.h, p0/m, #-0.12500000 +# CHECK-NEXT: - - - - - - 1.00 - - fmov z0.s, #-0.12500000 +# CHECK-NEXT: - - - - - - 1.00 - - fmov z0.s, p0/m, #-0.12500000 +# CHECK-NEXT: - - - - - - - 1.00 - fmsb z0.d, p7/m, z1.d, z31.d +# CHECK-NEXT: - - - - - - - 1.00 - fmsb z0.h, p7/m, z1.h, z31.h +# CHECK-NEXT: - - - - - - - 1.00 - fmsb z0.s, p7/m, z1.s, z31.s +# CHECK-NEXT: - - - - - - - 1.00 - fmul z0.d, p0/m, z0.d, #0.5 +# CHECK-NEXT: - - - - - - - 1.00 - fmul z0.d, p7/m, z0.d, z31.d +# CHECK-NEXT: - - - - - - - 1.00 - fmul z0.d, z0.d, z0.d[0] +# CHECK-NEXT: - - - - - - - 1.00 - fmul z0.d, z1.d, z31.d +# CHECK-NEXT: - - - - - - - 1.00 - fmul z0.h, p0/m, z0.h, #0.5 +# CHECK-NEXT: - - - - - - - 1.00 - fmul z0.h, p7/m, z0.h, z31.h +# CHECK-NEXT: - - - - - - - 1.00 - fmul z0.h, z0.h, z0.h[0] +# CHECK-NEXT: - - - - - - - 1.00 - fmul z0.h, z1.h, z31.h +# CHECK-NEXT: - - - - - - - 1.00 - fmul z0.s, p0/m, z0.s, #0.5 +# CHECK-NEXT: - - - - - - - 1.00 - fmul z0.s, p7/m, z0.s, z31.s +# CHECK-NEXT: - - - - - - - 1.00 - fmul z0.s, z0.s, z0.s[0] +# CHECK-NEXT: - - - - - - - 1.00 - fmul z0.s, z1.s, z31.s +# CHECK-NEXT: - - - - - - - 1.00 - fmul z31.d, p7/m, z31.d, #2.0 +# CHECK-NEXT: - - - - - - - 1.00 - fmul z31.d, z31.d, z15.d[1] +# CHECK-NEXT: - - - - - - - 1.00 - fmul z31.h, p7/m, z31.h, #2.0 +# CHECK-NEXT: - - - - - - - 1.00 - fmul z31.h, z31.h, z7.h[7] +# CHECK-NEXT: - - - - - - - 1.00 - fmul z31.s, p7/m, z31.s, #2.0 +# CHECK-NEXT: - - - - - - - 1.00 - fmul z31.s, z31.s, z7.s[3] +# CHECK-NEXT: - - - - - - - 1.00 - fmulx z0.d, p7/m, z0.d, z31.d +# CHECK-NEXT: - - - - - - - 1.00 - fmulx z0.h, p7/m, z0.h, z31.h +# CHECK-NEXT: - - - - - - - 1.00 - fmulx z0.s, p7/m, z0.s, z31.s +# CHECK-NEXT: - - - - - - 1.00 - - fneg z31.d, p7/m, z31.d +# CHECK-NEXT: - - - - - - 1.00 - - fneg z31.h, p7/m, z31.h +# CHECK-NEXT: - - - - - - 1.00 - - fneg z31.s, p7/m, z31.s +# CHECK-NEXT: - - - - - - - 1.00 - fnmad z0.d, p7/m, z1.d, z31.d +# CHECK-NEXT: - - - - - - - 1.00 - fnmad z0.h, p7/m, z1.h, z31.h +# CHECK-NEXT: - - - - - - - 1.00 - fnmad z0.s, p7/m, z1.s, z31.s +# CHECK-NEXT: - - - - - - - 1.00 - fnmla z0.d, p7/m, z1.d, z31.d +# CHECK-NEXT: - - - - - - - 1.00 - fnmla z0.h, p7/m, z1.h, z31.h +# CHECK-NEXT: - - - - - - - 1.00 - fnmla z0.s, p7/m, z1.s, z31.s +# CHECK-NEXT: - - - - - - - 1.00 - fnmls z0.d, p7/m, z1.d, z31.d +# CHECK-NEXT: - - - - - - - 1.00 - fnmls z0.h, p7/m, z1.h, z31.h +# CHECK-NEXT: - - - - - - - 1.00 - fnmls z0.s, p7/m, z1.s, z31.s +# CHECK-NEXT: - - - - - - - 1.00 - fnmsb z0.d, p7/m, z1.d, z31.d +# CHECK-NEXT: - - - - - - - 1.00 - fnmsb z0.h, p7/m, z1.h, z31.h +# CHECK-NEXT: - - - - - - - 1.00 - fnmsb z0.s, p7/m, z1.s, z31.s +# CHECK-NEXT: - - - - - - - 1.00 - frecpe z0.d, z31.d +# CHECK-NEXT: - - - - - - - 1.00 - frecpe z0.h, z31.h +# CHECK-NEXT: - - - - - - - 1.00 - frecpe z0.s, z31.s +# CHECK-NEXT: - - - - - - - 1.00 - frecps z0.d, z1.d, z31.d +# CHECK-NEXT: - - - - - - - 1.00 - frecps z0.h, z1.h, z31.h +# CHECK-NEXT: - - - - - - - 1.00 - frecps z0.s, z1.s, z31.s +# CHECK-NEXT: - - - - - - - 1.00 - frecpx z31.d, p7/m, z31.d +# CHECK-NEXT: - - - - - - - 1.00 - frecpx z31.h, p7/m, z31.h +# CHECK-NEXT: - - - - - - - 1.00 - frecpx z31.s, p7/m, z31.s +# CHECK-NEXT: - - - - - - 1.00 - - frinta z31.d, p7/m, z31.d +# CHECK-NEXT: - - - - - - 1.00 - - frinta z31.h, p7/m, z31.h +# CHECK-NEXT: - - - - - - 1.00 - - frinta z31.s, p7/m, z31.s +# CHECK-NEXT: - - - - - - 1.00 - - frinti z31.d, p7/m, z31.d +# CHECK-NEXT: - - - - - - 1.00 - - frinti z31.h, p7/m, z31.h +# CHECK-NEXT: - - - - - - 1.00 - - frinti z31.s, p7/m, z31.s +# CHECK-NEXT: - - - - - - 1.00 - - frintm z31.d, p7/m, z31.d +# CHECK-NEXT: - - - - - - 1.00 - - frintm z31.h, p7/m, z31.h +# CHECK-NEXT: - - - - - - 1.00 - - frintm z31.s, p7/m, z31.s +# CHECK-NEXT: - - - - - - 1.00 - - frintn z31.d, p7/m, z31.d +# CHECK-NEXT: - - - - - - 1.00 - - frintn z31.h, p7/m, z31.h +# CHECK-NEXT: - - - - - - 1.00 - - frintn z31.s, p7/m, z31.s +# CHECK-NEXT: - - - - - - 1.00 - - frintp z31.d, p7/m, z31.d +# CHECK-NEXT: - - - - - - 1.00 - - frintp z31.h, p7/m, z31.h +# CHECK-NEXT: - - - - - - 1.00 - - frintp z31.s, p7/m, z31.s +# CHECK-NEXT: - - - - - - 1.00 - - frintx z31.d, p7/m, z31.d +# CHECK-NEXT: - - - - - - 1.00 - - frintx z31.h, p7/m, z31.h +# CHECK-NEXT: - - - - - - 1.00 - - frintx z31.s, p7/m, z31.s +# CHECK-NEXT: - - - - - - 1.00 - - frintz z31.d, p7/m, z31.d +# CHECK-NEXT: - - - - - - 1.00 - - frintz z31.h, p7/m, z31.h +# CHECK-NEXT: - - - - - - 1.00 - - frintz z31.s, p7/m, z31.s +# CHECK-NEXT: - - - - - - - 1.00 - frsqrte z0.d, z31.d +# CHECK-NEXT: - - - - - - - 1.00 - frsqrte z0.h, z31.h +# CHECK-NEXT: - - - - - - - 1.00 - frsqrte z0.s, z31.s +# CHECK-NEXT: - - - - - - - 1.00 - frsqrts z0.d, z1.d, z31.d +# CHECK-NEXT: - - - - - - - 1.00 - frsqrts z0.h, z1.h, z31.h +# CHECK-NEXT: - - - - - - - 1.00 - frsqrts z0.s, z1.s, z31.s +# CHECK-NEXT: - - - - - - - 1.00 - fscale z0.d, p7/m, z0.d, z31.d +# CHECK-NEXT: - - - - - - - 1.00 - fscale z0.h, p7/m, z0.h, z31.h +# CHECK-NEXT: - - - - - - - 1.00 - fscale z0.s, p7/m, z0.s, z31.s +# CHECK-NEXT: - - - - - - - - 19.00 fsqrt z31.d, p7/m, z31.d +# CHECK-NEXT: - - - - - - - - 5.00 fsqrt z31.h, p7/m, z31.h +# CHECK-NEXT: - - - - - - - - 9.00 fsqrt z31.s, p7/m, z31.s +# CHECK-NEXT: - - - - - - 1.00 - - fsub z0.d, p0/m, z0.d, #0.5 +# CHECK-NEXT: - - - - - - 1.00 - - fsub z0.d, p7/m, z0.d, z31.d +# CHECK-NEXT: - - - - - - 1.00 - - fsub z0.d, z1.d, z31.d +# CHECK-NEXT: - - - - - - 1.00 - - fsub z0.h, p0/m, z0.h, #0.5 +# CHECK-NEXT: - - - - - - 1.00 - - fsub z0.h, p7/m, z0.h, z31.h +# CHECK-NEXT: - - - - - - 1.00 - - fsub z0.h, z1.h, z31.h +# CHECK-NEXT: - - - - - - 1.00 - - fsub z0.s, p0/m, z0.s, #0.5 +# CHECK-NEXT: - - - - - - 1.00 - - fsub z0.s, p7/m, z0.s, z31.s +# CHECK-NEXT: - - - - - - 1.00 - - fsub z0.s, z1.s, z31.s +# CHECK-NEXT: - - - - - - 1.00 - - fsub z31.d, p7/m, z31.d, #1.0 +# CHECK-NEXT: - - - - - - 1.00 - - fsub z31.h, p7/m, z31.h, #1.0 +# CHECK-NEXT: - - - - - - 1.00 - - fsub z31.s, p7/m, z31.s, #1.0 +# CHECK-NEXT: - - - - - - 1.00 - - fsubr z0.d, p0/m, z0.d, #0.5 +# CHECK-NEXT: - - - - - - 1.00 - - fsubr z0.d, p7/m, z0.d, z31.d +# CHECK-NEXT: - - - - - - 1.00 - - fsubr z0.h, p0/m, z0.h, #0.5 +# CHECK-NEXT: - - - - - - 1.00 - - fsubr z0.h, p7/m, z0.h, z31.h +# CHECK-NEXT: - - - - - - 1.00 - - fsubr z0.s, p0/m, z0.s, #0.5 +# CHECK-NEXT: - - - - - - 1.00 - - fsubr z0.s, p7/m, z0.s, z31.s +# CHECK-NEXT: - - - - - - 1.00 - - fsubr z31.d, p7/m, z31.d, #1.0 +# CHECK-NEXT: - - - - - - 1.00 - - fsubr z31.h, p7/m, z31.h, #1.0 +# CHECK-NEXT: - - - - - - 1.00 - - fsubr z31.s, p7/m, z31.s, #1.0 +# CHECK-NEXT: - - - - - - - 1.00 - ftmad z0.d, z0.d, z31.d, #7 +# CHECK-NEXT: - - - - - - - 1.00 - ftmad z0.h, z0.h, z31.h, #7 +# CHECK-NEXT: - - - - - - - 1.00 - ftmad z0.s, z0.s, z31.s, #7 +# CHECK-NEXT: - - - - - - - 1.00 - ftsmul z0.d, z1.d, z31.d +# CHECK-NEXT: - - - - - - - 1.00 - ftsmul z0.h, z1.h, z31.h +# CHECK-NEXT: - - - - - - - 1.00 - ftsmul z0.s, z1.s, z31.s +# CHECK-NEXT: - - - - - - 1.00 - - ftssel z0.d, z1.d, z31.d +# CHECK-NEXT: - - - - - - 1.00 - - ftssel z0.h, z1.h, z31.h +# CHECK-NEXT: - - - - - - 1.00 - - ftssel z0.s, z1.s, z31.s +# CHECK-NEXT: - - - - - - 2.00 - - histcnt z0.s, p0/z, z1.s, z2.s +# CHECK-NEXT: - - - - - - 2.00 - - histcnt z29.d, p7/z, z30.d, z31.d +# CHECK-NEXT: - - - - - - 2.00 - - histseg z0.b, z1.b, z31.b +# CHECK-NEXT: 1.00 - - - - - - - - incb x0 +# CHECK-NEXT: 1.00 - - - - - - - - incb x0, #14 +# CHECK-NEXT: 1.00 - - - - - - - - incb x0, all, mul #16 +# CHECK-NEXT: 1.00 - - - - - - - - incb x0, pow2 +# CHECK-NEXT: 1.00 - - - - - - - - incb x0, vl1 +# CHECK-NEXT: 1.00 - - - - - - - - incd x0 +# CHECK-NEXT: 1.00 - - - - - - - - incd x0, #14 +# CHECK-NEXT: 1.00 - - - - - - - - incd x0, all, mul #16 +# CHECK-NEXT: 1.00 - - - - - - - - incd x0, pow2 +# CHECK-NEXT: 1.00 - - - - - - - - incd x0, vl1 +# CHECK-NEXT: - - - - - - 1.00 - - incd z0.d +# CHECK-NEXT: - - - - - - 1.00 - - incd z0.d, all, mul #16 +# CHECK-NEXT: 1.00 - - - - - - - - inch x0 +# CHECK-NEXT: 1.00 - - - - - - - - inch x0, #14 +# CHECK-NEXT: 1.00 - - - - - - - - inch x0, all, mul #16 +# CHECK-NEXT: 1.00 - - - - - - - - inch x0, pow2 +# CHECK-NEXT: 1.00 - - - - - - - - inch x0, vl1 +# CHECK-NEXT: - - - - - - 1.00 - - inch z0.h +# CHECK-NEXT: - - - - - - 1.00 - - inch z0.h, all, mul #16 +# CHECK-NEXT: - - - - - - 1.00 - - incp x0, p0.b +# CHECK-NEXT: - - - - - - 1.00 - - incp x0, p0.d +# CHECK-NEXT: - - - - - - 1.00 - - incp x0, p0.h +# CHECK-NEXT: - - - - - - 1.00 - - incp x0, p0.s +# CHECK-NEXT: - - - - - - 1.00 - - incp xzr, p15.b +# CHECK-NEXT: - - - - - - 1.00 - - incp xzr, p15.d +# CHECK-NEXT: - - - - - - 1.00 - - incp xzr, p15.h +# CHECK-NEXT: - - - - - - 1.00 - - incp xzr, p15.s +# CHECK-NEXT: - - - - - - 1.00 - - incp z31.d, p15.d +# CHECK-NEXT: - - - - - - 1.00 - - incp z31.h, p15.h +# CHECK-NEXT: - - - - - - 1.00 - - incp z31.s, p15.s +# CHECK-NEXT: 1.00 - - - - - - - - incw x0 +# CHECK-NEXT: 1.00 - - - - - - - - incw x0, #14 +# CHECK-NEXT: 1.00 - - - - - - - - incw x0, all, mul #16 +# CHECK-NEXT: 1.00 - - - - - - - - incw x0, pow2 +# CHECK-NEXT: 1.00 - - - - - - - - incw x0, vl1 +# CHECK-NEXT: - - - - - - 1.00 - - incw z0.s +# CHECK-NEXT: - - - - - - 1.00 - - incw z0.s, all, mul #16 +# CHECK-NEXT: - - - - - - - 1.00 - index z0.b, #0, #0 +# CHECK-NEXT: - - - - - - - 1.00 - index z0.d, #0, #0 +# CHECK-NEXT: - - - - - - - 1.00 - index z0.h, #0, #0 +# CHECK-NEXT: - - - - - - - 1.00 - index z0.h, w0, w0 +# CHECK-NEXT: - - - - - - - 1.00 - index z0.s, #0, #0 +# CHECK-NEXT: - - - - - - - 1.00 - index z21.b, w10, w21 +# CHECK-NEXT: - - - - - - - 1.00 - index z21.d, x10, x21 +# CHECK-NEXT: - - - - - - - 1.00 - index z21.s, w10, w21 +# CHECK-NEXT: - - - - - - - 1.00 - index z23.b, #13, w8 +# CHECK-NEXT: - - - - - - - 1.00 - index z23.b, w13, #8 +# CHECK-NEXT: - - - - - - - 1.00 - index z23.d, #13, x8 +# CHECK-NEXT: - - - - - - - 1.00 - index z23.d, x13, #8 +# CHECK-NEXT: - - - - - - - 1.00 - index z23.h, #13, w8 +# CHECK-NEXT: - - - - - - - 1.00 - index z23.h, w13, #8 +# CHECK-NEXT: - - - - - - - 1.00 - index z23.s, #13, w8 +# CHECK-NEXT: - - - - - - - 1.00 - index z23.s, w13, #8 +# CHECK-NEXT: - - - - - - - 1.00 - index z31.b, #-1, #-1 +# CHECK-NEXT: - - - - - - - 1.00 - index z31.b, #-1, wzr +# CHECK-NEXT: - - - - - - - 1.00 - index z31.b, wzr, #-1 +# CHECK-NEXT: - - - - - - - 1.00 - index z31.b, wzr, wzr +# CHECK-NEXT: - - - - - - - 1.00 - index z31.d, #-1, #-1 +# CHECK-NEXT: - - - - - - - 1.00 - index z31.d, #-1, xzr +# CHECK-NEXT: - - - - - - - 1.00 - index z31.d, xzr, #-1 +# CHECK-NEXT: - - - - - - - 1.00 - index z31.d, xzr, xzr +# CHECK-NEXT: - - - - - - - 1.00 - index z31.h, #-1, #-1 +# CHECK-NEXT: - - - - - - - 1.00 - index z31.h, #-1, wzr +# CHECK-NEXT: - - - - - - - 1.00 - index z31.h, wzr, #-1 +# CHECK-NEXT: - - - - - - - 1.00 - index z31.h, wzr, wzr +# CHECK-NEXT: - - - - - - - 1.00 - index z31.s, #-1, #-1 +# CHECK-NEXT: - - - - - - - 1.00 - index z31.s, #-1, wzr +# CHECK-NEXT: - - - - - - - 1.00 - index z31.s, wzr, #-1 +# CHECK-NEXT: - - - - - - - 1.00 - index z31.s, wzr, wzr +# CHECK-NEXT: - - - - - - 2.00 - - insr z0.b, w0 +# CHECK-NEXT: - - - - - - 2.00 - - insr z0.d, x0 +# CHECK-NEXT: - - - - - - 2.00 - - insr z0.h, w0 +# CHECK-NEXT: - - - - - - 2.00 - - insr z0.s, w0 +# CHECK-NEXT: - - - - - - 1.00 - - insr z31.b, b31 +# CHECK-NEXT: - - - - - - 2.00 - - insr z31.b, wzr +# CHECK-NEXT: - - - - - - 1.00 - - insr z31.d, d31 +# CHECK-NEXT: - - - - - - 2.00 - - insr z31.d, xzr +# CHECK-NEXT: - - - - - - 1.00 - - insr z31.h, h31 +# CHECK-NEXT: - - - - - - 2.00 - - insr z31.h, wzr +# CHECK-NEXT: - - - - - - 1.00 - - insr z31.s, s31 +# CHECK-NEXT: - - - - - - 2.00 - - insr z31.s, wzr +# CHECK-NEXT: - - - - - - 1.00 - - lasta b0, p7, z31.b +# CHECK-NEXT: - - - - - - 1.00 - - lasta d0, p7, z31.d +# CHECK-NEXT: - - - - - - 1.00 - - lasta h0, p7, z31.h +# CHECK-NEXT: - - - - - - 1.00 - - lasta s0, p7, z31.s +# CHECK-NEXT: - - - - - - 2.00 - - lasta w0, p7, z31.b +# CHECK-NEXT: - - - - - - 2.00 - - lasta w0, p7, z31.h +# CHECK-NEXT: - - - - - - 2.00 - - lasta w0, p7, z31.s +# CHECK-NEXT: - - - - - - 2.00 - - lasta x0, p7, z31.d +# CHECK-NEXT: - - - - - - 1.00 - - lastb b0, p7, z31.b +# CHECK-NEXT: - - - - - - 1.00 - - lastb d0, p7, z31.d +# CHECK-NEXT: - - - - - - 1.00 - - lastb h0, p7, z31.h +# CHECK-NEXT: - - - - - - 1.00 - - lastb s0, p7, z31.s +# CHECK-NEXT: - - - - - - 2.00 - - lastb w0, p7, z31.b +# CHECK-NEXT: - - - - - - 2.00 - - lastb w0, p7, z31.h +# CHECK-NEXT: - - - - - - 2.00 - - lastb w0, p7, z31.s +# CHECK-NEXT: - - - - - - 2.00 - - lastb x0, p7, z31.d +# CHECK-NEXT: - - - 1.00 - - - - - ld1b { z0.b }, p0/z, [sp, x0] +# CHECK-NEXT: - - - 1.00 - - - - - ld1b { z0.b }, p0/z, [x0, x0] +# CHECK-NEXT: - - - 1.00 - - - - - ld1b { z0.b }, p0/z, [x0] +# CHECK-NEXT: - - - 1.00 - - - - - ld1b { z0.d }, p0/z, [x0] +# CHECK-NEXT: - - - 7.00 - - - - - ld1b { z0.d }, p0/z, [z0.d] +# CHECK-NEXT: - - - 1.00 - - - - - ld1b { z0.h }, p0/z, [x0] +# CHECK-NEXT: - - - 7.00 - - - - - ld1b { z0.s }, p0/z, [x0, z0.s, sxtw] +# CHECK-NEXT: - - - 7.00 - - - - - ld1b { z0.s }, p0/z, [x0, z0.s, uxtw] +# CHECK-NEXT: - - - 1.00 - - - - - ld1b { z0.s }, p0/z, [x0] +# CHECK-NEXT: - - - 9.00 - - - - - ld1b { z0.s }, p0/z, [z0.s] +# CHECK-NEXT: - - - 1.00 - - - - - ld1b { z21.b }, p5/z, [x10, #5, mul vl] +# CHECK-NEXT: - - - 1.00 - - - - - ld1b { z21.d }, p5/z, [x10, #5, mul vl] +# CHECK-NEXT: - - - 7.00 - - - - - ld1b { z21.d }, p5/z, [x10, z21.d, sxtw] +# CHECK-NEXT: - - - 7.00 - - - - - ld1b { z21.d }, p5/z, [x10, z21.d, uxtw] +# CHECK-NEXT: - - - 1.00 - - - - - ld1b { z21.h }, p5/z, [x10, #5, mul vl] +# CHECK-NEXT: - - - 1.00 - - - - - ld1b { z21.s }, p5/z, [x10, #5, mul vl] +# CHECK-NEXT: - - - 1.00 - - - - - ld1b { z21.s }, p5/z, [x10, x21] +# CHECK-NEXT: - - - 1.00 - - - - - ld1b { z23.d }, p3/z, [x13, x8] +# CHECK-NEXT: - - - 1.00 - - - - - ld1b { z31.b }, p7/z, [sp, #-1, mul vl] +# CHECK-NEXT: - - - 1.00 - - - - - ld1b { z31.d }, p7/z, [sp, #-1, mul vl] +# CHECK-NEXT: - - - 7.00 - - - - - ld1b { z31.d }, p7/z, [sp, z31.d] +# CHECK-NEXT: - - - 7.00 - - - - - ld1b { z31.d }, p7/z, [z31.d, #31] +# CHECK-NEXT: - - - 1.00 - - - - - ld1b { z31.h }, p7/z, [sp, #-1, mul vl] +# CHECK-NEXT: - - - 1.00 - - - - - ld1b { z31.s }, p7/z, [sp, #-1, mul vl] +# CHECK-NEXT: - - - 9.00 - - - - - ld1b { z31.s }, p7/z, [z31.s, #31] +# CHECK-NEXT: - - - 1.00 - - - - - ld1b { z5.h }, p3/z, [x17, x16] +# CHECK-NEXT: - - - 7.00 - - - - - ld1d { z0.d }, p0/z, [x0, z0.d, sxtw #3] +# CHECK-NEXT: - - - 7.00 - - - - - ld1d { z0.d }, p0/z, [x0, z0.d, uxtw #3] +# CHECK-NEXT: - - - 1.00 - - - - - ld1d { z0.d }, p0/z, [x0] +# CHECK-NEXT: - - - 7.00 - - - - - ld1d { z0.d }, p0/z, [z0.d] +# CHECK-NEXT: - - - 1.00 - - - - - ld1d { z21.d }, p5/z, [x10, #5, mul vl] +# CHECK-NEXT: - - - 7.00 - - - - - ld1d { z21.d }, p5/z, [x10, z21.d, sxtw] +# CHECK-NEXT: - - - 7.00 - - - - - ld1d { z21.d }, p5/z, [x10, z21.d, uxtw] +# CHECK-NEXT: - - - 1.00 - - - - - ld1d { z23.d }, p3/z, [sp, x8, lsl #3] +# CHECK-NEXT: - - - 1.00 - - - - - ld1d { z23.d }, p3/z, [x13, x8, lsl #3] +# CHECK-NEXT: - - - 7.00 - - - - - ld1d { z23.d }, p3/z, [x13, z8.d, lsl #3] +# CHECK-NEXT: - - - 1.00 - - - - - ld1d { z31.d }, p7/z, [sp, #-1, mul vl] +# CHECK-NEXT: - - - 7.00 - - - - - ld1d { z31.d }, p7/z, [sp, z31.d] +# CHECK-NEXT: - - - 7.00 - - - - - ld1d { z31.d }, p7/z, [z31.d, #248] +# CHECK-NEXT: - - - 7.00 - - - - - ld1h { z0.d }, p0/z, [x0, z0.d, sxtw #1] +# CHECK-NEXT: - - - 7.00 - - - - - ld1h { z0.d }, p0/z, [x0, z0.d, uxtw #1] +# CHECK-NEXT: - - - 1.00 - - - - - ld1h { z0.d }, p0/z, [x0] +# CHECK-NEXT: - - - 7.00 - - - - - ld1h { z0.d }, p0/z, [z0.d] +# CHECK-NEXT: - - - 1.00 - - - - - ld1h { z0.h }, p0/z, [x0] +# CHECK-NEXT: - - - 7.00 - - - - - ld1h { z0.s }, p0/z, [x0, z0.s, sxtw] +# CHECK-NEXT: - - - 7.00 - - - - - ld1h { z0.s }, p0/z, [x0, z0.s, uxtw] +# CHECK-NEXT: - - - 1.00 - - - - - ld1h { z0.s }, p0/z, [x0] +# CHECK-NEXT: - - - 9.00 - - - - - ld1h { z0.s }, p0/z, [z0.s] +# CHECK-NEXT: - - - 1.00 - - - - - ld1h { z21.d }, p5/z, [x10, #5, mul vl] +# CHECK-NEXT: - - - 7.00 - - - - - ld1h { z21.d }, p5/z, [x10, z21.d, sxtw] +# CHECK-NEXT: - - - 7.00 - - - - - ld1h { z21.d }, p5/z, [x10, z21.d, uxtw] +# CHECK-NEXT: - - - 1.00 - - - - - ld1h { z21.h }, p5/z, [x10, #5, mul vl] +# CHECK-NEXT: - - - 1.00 - - - - - ld1h { z21.s }, p5/z, [x10, #5, mul vl] +# CHECK-NEXT: - - - 1.00 - - - - - ld1h { z21.s }, p5/z, [x10, x21, lsl #1] +# CHECK-NEXT: - - - 1.00 - - - - - ld1h { z23.d }, p3/z, [x13, x8, lsl #1] +# CHECK-NEXT: - - - 7.00 - - - - - ld1h { z23.d }, p3/z, [x13, z8.d, lsl #1] +# CHECK-NEXT: - - - 1.00 - - - - - ld1h { z31.d }, p7/z, [sp, #-1, mul vl] +# CHECK-NEXT: - - - 7.00 - - - - - ld1h { z31.d }, p7/z, [sp, z31.d] +# CHECK-NEXT: - - - 7.00 - - - - - ld1h { z31.d }, p7/z, [z31.d, #62] +# CHECK-NEXT: - - - 1.00 - - - - - ld1h { z31.h }, p7/z, [sp, #-1, mul vl] +# CHECK-NEXT: - - - 1.00 - - - - - ld1h { z31.s }, p7/z, [sp, #-1, mul vl] +# CHECK-NEXT: - - - 7.00 - - - - - ld1h { z31.s }, p7/z, [sp, z31.s, sxtw #1] +# CHECK-NEXT: - - - 7.00 - - - - - ld1h { z31.s }, p7/z, [sp, z31.s, uxtw #1] +# CHECK-NEXT: - - - 9.00 - - - - - ld1h { z31.s }, p7/z, [z31.s, #62] +# CHECK-NEXT: - - - 1.00 - - - - - ld1h { z5.h }, p3/z, [sp, x16, lsl #1] +# CHECK-NEXT: - - - 1.00 - - - - - ld1h { z5.h }, p3/z, [x17, x16, lsl #1] +# CHECK-NEXT: - - - 1.00 - - - - - ld1rb { z0.b }, p0/z, [x0] +# CHECK-NEXT: - - - 1.00 - - - - - ld1rb { z0.d }, p0/z, [x0] +# CHECK-NEXT: - - - 1.00 - - - - - ld1rb { z0.h }, p0/z, [x0] +# CHECK-NEXT: - - - 1.00 - - - - - ld1rb { z0.s }, p0/z, [x0] +# CHECK-NEXT: - - - 1.00 - - - - - ld1rb { z31.b }, p7/z, [sp, #63] +# CHECK-NEXT: - - - 1.00 - - - - - ld1rb { z31.d }, p7/z, [sp, #63] +# CHECK-NEXT: - - - 1.00 - - - - - ld1rb { z31.h }, p7/z, [sp, #63] +# CHECK-NEXT: - - - 1.00 - - - - - ld1rb { z31.s }, p7/z, [sp, #63] +# CHECK-NEXT: - - - 1.00 - - - - - ld1rd { z0.d }, p0/z, [x0] +# CHECK-NEXT: - - - 1.00 - - - - - ld1rd { z31.d }, p7/z, [sp, #504] +# CHECK-NEXT: - - - 1.00 - - - - - ld1rh { z0.d }, p0/z, [x0] +# CHECK-NEXT: - - - 1.00 - - - - - ld1rh { z0.h }, p0/z, [x0] +# CHECK-NEXT: - - - 1.00 - - - - - ld1rh { z0.s }, p0/z, [x0] +# CHECK-NEXT: - - - 1.00 - - - - - ld1rh { z31.d }, p7/z, [sp, #126] +# CHECK-NEXT: - - - 1.00 - - - - - ld1rh { z31.h }, p7/z, [sp, #126] +# CHECK-NEXT: - - - 1.00 - - - - - ld1rh { z31.s }, p7/z, [sp, #126] +# CHECK-NEXT: - - - 1.00 - - - - - ld1rqb { z0.b }, p0/z, [x0, x0] +# CHECK-NEXT: - - - 1.00 - - - - - ld1rqb { z0.b }, p0/z, [x0] +# CHECK-NEXT: - - - 1.00 - - - - - ld1rqb { z21.b }, p5/z, [x10, #112] +# CHECK-NEXT: - - - 1.00 - - - - - ld1rqb { z23.b }, p3/z, [x13, #-128] +# CHECK-NEXT: - - - 1.00 - - - - - ld1rqb { z31.b }, p7/z, [sp, #-16] +# CHECK-NEXT: - - - 1.00 - - - - - ld1rqd { z0.d }, p0/z, [x0, x0, lsl #3] +# CHECK-NEXT: - - - 1.00 - - - - - ld1rqd { z0.d }, p0/z, [x0] +# CHECK-NEXT: - - - 1.00 - - - - - ld1rqd { z23.d }, p3/z, [x13, #-128] +# CHECK-NEXT: - - - 1.00 - - - - - ld1rqd { z23.d }, p3/z, [x13, #112] +# CHECK-NEXT: - - - 1.00 - - - - - ld1rqd { z31.d }, p7/z, [sp, #-16] +# CHECK-NEXT: - - - 1.00 - - - - - ld1rqh { z0.h }, p0/z, [x0, x0, lsl #1] +# CHECK-NEXT: - - - 1.00 - - - - - ld1rqh { z0.h }, p0/z, [x0] +# CHECK-NEXT: - - - 1.00 - - - - - ld1rqh { z23.h }, p3/z, [x13, #-128] +# CHECK-NEXT: - - - 1.00 - - - - - ld1rqh { z23.h }, p3/z, [x13, #112] +# CHECK-NEXT: - - - 1.00 - - - - - ld1rqh { z31.h }, p7/z, [sp, #-16] +# CHECK-NEXT: - - - 1.00 - - - - - ld1rqw { z0.s }, p0/z, [x0, x0, lsl #2] +# CHECK-NEXT: - - - 1.00 - - - - - ld1rqw { z0.s }, p0/z, [x0] +# CHECK-NEXT: - - - 1.00 - - - - - ld1rqw { z23.s }, p3/z, [x13, #-128] +# CHECK-NEXT: - - - 1.00 - - - - - ld1rqw { z23.s }, p3/z, [x13, #112] +# CHECK-NEXT: - - - 1.00 - - - - - ld1rqw { z31.s }, p7/z, [sp, #-16] +# CHECK-NEXT: - - - 1.00 - - - - - ld1rsb { z0.d }, p0/z, [x0] +# CHECK-NEXT: - - - 1.00 - - - - - ld1rsb { z0.h }, p0/z, [x0] +# CHECK-NEXT: - - - 1.00 - - - - - ld1rsb { z0.s }, p0/z, [x0] +# CHECK-NEXT: - - - 1.00 - - - - - ld1rsb { z31.d }, p7/z, [sp, #63] +# CHECK-NEXT: - - - 1.00 - - - - - ld1rsb { z31.h }, p7/z, [sp, #63] +# CHECK-NEXT: - - - 1.00 - - - - - ld1rsb { z31.s }, p7/z, [sp, #63] +# CHECK-NEXT: - - - 1.00 - - - - - ld1rsh { z0.d }, p0/z, [x0] +# CHECK-NEXT: - - - 1.00 - - - - - ld1rsh { z0.s }, p0/z, [x0] +# CHECK-NEXT: - - - 1.00 - - - - - ld1rsh { z31.d }, p7/z, [sp, #126] +# CHECK-NEXT: - - - 1.00 - - - - - ld1rsh { z31.s }, p7/z, [sp, #126] +# CHECK-NEXT: - - - 1.00 - - - - - ld1rsw { z0.d }, p0/z, [x0] +# CHECK-NEXT: - - - 1.00 - - - - - ld1rsw { z31.d }, p7/z, [sp, #252] +# CHECK-NEXT: - - - 1.00 - - - - - ld1rw { z0.d }, p0/z, [x0] +# CHECK-NEXT: - - - 1.00 - - - - - ld1rw { z0.s }, p0/z, [x0] +# CHECK-NEXT: - - - 1.00 - - - - - ld1rw { z31.d }, p7/z, [sp, #252] +# CHECK-NEXT: - - - 1.00 - - - - - ld1rw { z31.s }, p7/z, [sp, #252] +# CHECK-NEXT: - - - 1.00 - - - - - ld1sb { z0.d }, p0/z, [x0] +# CHECK-NEXT: - - - 7.00 - - - - - ld1sb { z0.d }, p0/z, [z0.d] +# CHECK-NEXT: - - - 1.00 - - - - - ld1sb { z0.h }, p0/z, [sp, x0] +# CHECK-NEXT: - - - 1.00 - - - - - ld1sb { z0.h }, p0/z, [x0, x0] +# CHECK-NEXT: - - - 1.00 - - - - - ld1sb { z0.h }, p0/z, [x0] +# CHECK-NEXT: - - - 7.00 - - - - - ld1sb { z0.s }, p0/z, [x0, z0.s, sxtw] +# CHECK-NEXT: - - - 1.00 - - - - - ld1sb { z0.s }, p0/z, [x0] +# CHECK-NEXT: - - - 9.00 - - - - - ld1sb { z0.s }, p0/z, [z0.s] +# CHECK-NEXT: - - - 1.00 - - - - - ld1sb { z21.d }, p5/z, [x10, #5, mul vl] +# CHECK-NEXT: - - - 7.00 - - - - - ld1sb { z21.d }, p5/z, [x10, z21.d, sxtw] +# CHECK-NEXT: - - - 7.00 - - - - - ld1sb { z21.d }, p5/z, [x10, z21.d, uxtw] +# CHECK-NEXT: - - - 1.00 - - - - - ld1sb { z21.h }, p5/z, [x10, #5, mul vl] +# CHECK-NEXT: - - - 1.00 - - - - - ld1sb { z21.s }, p5/z, [x10, #5, mul vl] +# CHECK-NEXT: - - - 1.00 - - - - - ld1sb { z21.s }, p5/z, [x10, x21] +# CHECK-NEXT: - - - 1.00 - - - - - ld1sb { z23.d }, p3/z, [x13, x8] +# CHECK-NEXT: - - - 1.00 - - - - - ld1sb { z31.d }, p7/z, [sp, #-1, mul vl] +# CHECK-NEXT: - - - 7.00 - - - - - ld1sb { z31.d }, p7/z, [sp, z31.d] +# CHECK-NEXT: - - - 7.00 - - - - - ld1sb { z31.d }, p7/z, [z31.d, #31] +# CHECK-NEXT: - - - 1.00 - - - - - ld1sb { z31.h }, p7/z, [sp, #-1, mul vl] +# CHECK-NEXT: - - - 1.00 - - - - - ld1sb { z31.s }, p7/z, [sp, #-1, mul vl] +# CHECK-NEXT: - - - 9.00 - - - - - ld1sb { z31.s }, p7/z, [z31.s, #31] +# CHECK-NEXT: - - - 7.00 - - - - - ld1sh { z0.d }, p0/z, [x0, z0.d, sxtw #1] +# CHECK-NEXT: - - - 7.00 - - - - - ld1sh { z0.d }, p0/z, [x0, z0.d, uxtw #1] +# CHECK-NEXT: - - - 1.00 - - - - - ld1sh { z0.d }, p0/z, [x0] +# CHECK-NEXT: - - - 7.00 - - - - - ld1sh { z0.d }, p0/z, [z0.d] +# CHECK-NEXT: - - - 7.00 - - - - - ld1sh { z0.s }, p0/z, [x0, z0.s, sxtw] +# CHECK-NEXT: - - - 7.00 - - - - - ld1sh { z0.s }, p0/z, [x0, z0.s, uxtw] +# CHECK-NEXT: - - - 1.00 - - - - - ld1sh { z0.s }, p0/z, [x0] +# CHECK-NEXT: - - - 9.00 - - - - - ld1sh { z0.s }, p0/z, [z0.s] +# CHECK-NEXT: - - - 1.00 - - - - - ld1sh { z21.d }, p5/z, [x10, #5, mul vl] +# CHECK-NEXT: - - - 7.00 - - - - - ld1sh { z21.d }, p5/z, [x10, z21.d, sxtw] +# CHECK-NEXT: - - - 7.00 - - - - - ld1sh { z21.d }, p5/z, [x10, z21.d, uxtw] +# CHECK-NEXT: - - - 1.00 - - - - - ld1sh { z21.s }, p5/z, [sp, x21, lsl #1] +# CHECK-NEXT: - - - 1.00 - - - - - ld1sh { z21.s }, p5/z, [x10, #5, mul vl] +# CHECK-NEXT: - - - 1.00 - - - - - ld1sh { z21.s }, p5/z, [x10, x21, lsl #1] +# CHECK-NEXT: - - - 1.00 - - - - - ld1sh { z23.d }, p3/z, [x13, x8, lsl #1] +# CHECK-NEXT: - - - 7.00 - - - - - ld1sh { z23.d }, p3/z, [x13, z8.d, lsl #1] +# CHECK-NEXT: - - - 1.00 - - - - - ld1sh { z31.d }, p7/z, [sp, #-1, mul vl] +# CHECK-NEXT: - - - 7.00 - - - - - ld1sh { z31.d }, p7/z, [sp, z31.d] +# CHECK-NEXT: - - - 7.00 - - - - - ld1sh { z31.d }, p7/z, [z31.d, #62] +# CHECK-NEXT: - - - 1.00 - - - - - ld1sh { z31.s }, p7/z, [sp, #-1, mul vl] +# CHECK-NEXT: - - - 7.00 - - - - - ld1sh { z31.s }, p7/z, [sp, z31.s, sxtw #1] +# CHECK-NEXT: - - - 7.00 - - - - - ld1sh { z31.s }, p7/z, [sp, z31.s, uxtw #1] +# CHECK-NEXT: - - - 9.00 - - - - - ld1sh { z31.s }, p7/z, [z31.s, #62] +# CHECK-NEXT: - - - 7.00 - - - - - ld1sw { z0.d }, p0/z, [x0, z0.d, sxtw #2] +# CHECK-NEXT: - - - 7.00 - - - - - ld1sw { z0.d }, p0/z, [x0, z0.d, uxtw #2] +# CHECK-NEXT: - - - 1.00 - - - - - ld1sw { z0.d }, p0/z, [x0] +# CHECK-NEXT: - - - 7.00 - - - - - ld1sw { z0.d }, p0/z, [z0.d] +# CHECK-NEXT: - - - 1.00 - - - - - ld1sw { z21.d }, p5/z, [x10, #5, mul vl] +# CHECK-NEXT: - - - 7.00 - - - - - ld1sw { z21.d }, p5/z, [x10, z21.d, sxtw] +# CHECK-NEXT: - - - 7.00 - - - - - ld1sw { z21.d }, p5/z, [x10, z21.d, uxtw] +# CHECK-NEXT: - - - 1.00 - - - - - ld1sw { z23.d }, p3/z, [sp, x8, lsl #2] +# CHECK-NEXT: - - - 1.00 - - - - - ld1sw { z23.d }, p3/z, [x13, x8, lsl #2] +# CHECK-NEXT: - - - 7.00 - - - - - ld1sw { z23.d }, p3/z, [x13, z8.d, lsl #2] +# CHECK-NEXT: - - - 1.00 - - - - - ld1sw { z31.d }, p7/z, [sp, #-1, mul vl] +# CHECK-NEXT: - - - 7.00 - - - - - ld1sw { z31.d }, p7/z, [sp, z31.d] +# CHECK-NEXT: - - - 7.00 - - - - - ld1sw { z31.d }, p7/z, [z31.d, #124] +# CHECK-NEXT: - - - 7.00 - - - - - ld1w { z0.d }, p0/z, [x0, z0.d, sxtw #2] +# CHECK-NEXT: - - - 7.00 - - - - - ld1w { z0.d }, p0/z, [x0, z0.d, uxtw #2] +# CHECK-NEXT: - - - 1.00 - - - - - ld1w { z0.d }, p0/z, [x0] +# CHECK-NEXT: - - - 7.00 - - - - - ld1w { z0.d }, p0/z, [z0.d] +# CHECK-NEXT: - - - 7.00 - - - - - ld1w { z0.s }, p0/z, [x0, z0.s, sxtw] +# CHECK-NEXT: - - - 7.00 - - - - - ld1w { z0.s }, p0/z, [x0, z0.s, uxtw] +# CHECK-NEXT: - - - 1.00 - - - - - ld1w { z0.s }, p0/z, [x0] +# CHECK-NEXT: - - - 9.00 - - - - - ld1w { z0.s }, p0/z, [z0.s] +# CHECK-NEXT: - - - 1.00 - - - - - ld1w { z21.d }, p5/z, [x10, #5, mul vl] +# CHECK-NEXT: - - - 7.00 - - - - - ld1w { z21.d }, p5/z, [x10, z21.d, sxtw] +# CHECK-NEXT: - - - 7.00 - - - - - ld1w { z21.d }, p5/z, [x10, z21.d, uxtw] +# CHECK-NEXT: - - - 1.00 - - - - - ld1w { z21.s }, p5/z, [sp, x21, lsl #2] +# CHECK-NEXT: - - - 1.00 - - - - - ld1w { z21.s }, p5/z, [x10, #5, mul vl] +# CHECK-NEXT: - - - 1.00 - - - - - ld1w { z21.s }, p5/z, [x10, x21, lsl #2] +# CHECK-NEXT: - - - 1.00 - - - - - ld1w { z23.d }, p3/z, [x13, x8, lsl #2] +# CHECK-NEXT: - - - 7.00 - - - - - ld1w { z23.d }, p3/z, [x13, z8.d, lsl #2] +# CHECK-NEXT: - - - 1.00 - - - - - ld1w { z31.d }, p7/z, [sp, #-1, mul vl] +# CHECK-NEXT: - - - 7.00 - - - - - ld1w { z31.d }, p7/z, [sp, z31.d] +# CHECK-NEXT: - - - 7.00 - - - - - ld1w { z31.d }, p7/z, [z31.d, #124] +# CHECK-NEXT: - - - 1.00 - - - - - ld1w { z31.s }, p7/z, [sp, #-1, mul vl] +# CHECK-NEXT: - - - 7.00 - - - - - ld1w { z31.s }, p7/z, [sp, z31.s, sxtw #2] +# CHECK-NEXT: - - - 7.00 - - - - - ld1w { z31.s }, p7/z, [sp, z31.s, uxtw #2] +# CHECK-NEXT: - - - 9.00 - - - - - ld1w { z31.s }, p7/z, [z31.s, #124] +# CHECK-NEXT: - - - 2.00 - - - - - ld2b { z0.b, z1.b }, p0/z, [x0, x0] +# CHECK-NEXT: - - - 1.00 - - - - - ld2b { z0.b, z1.b }, p0/z, [x0] +# CHECK-NEXT: - - - 1.00 - - - - - ld2b { z21.b, z22.b }, p5/z, [x10, #10, mul vl] +# CHECK-NEXT: - - - 1.00 - - - - - ld2b { z23.b, z24.b }, p3/z, [x13, #-16, mul vl] +# CHECK-NEXT: - - - 2.00 - - - - - ld2b { z5.b, z6.b }, p3/z, [x17, x16] +# CHECK-NEXT: - - - 2.00 - - - - - ld2d { z0.d, z1.d }, p0/z, [x0, x0, lsl #3] +# CHECK-NEXT: - - - 1.00 - - - - - ld2d { z0.d, z1.d }, p0/z, [x0] +# CHECK-NEXT: - - - 1.00 - - - - - ld2d { z21.d, z22.d }, p5/z, [x10, #10, mul vl] +# CHECK-NEXT: - - - 1.00 - - - - - ld2d { z23.d, z24.d }, p3/z, [x13, #-16, mul vl] +# CHECK-NEXT: - - - 2.00 - - - - - ld2d { z5.d, z6.d }, p3/z, [x17, x16, lsl #3] +# CHECK-NEXT: - - - 2.00 - - - - - ld2h { z0.h, z1.h }, p0/z, [x0, x0, lsl #1] +# CHECK-NEXT: - - - 1.00 - - - - - ld2h { z0.h, z1.h }, p0/z, [x0] +# CHECK-NEXT: - - - 1.00 - - - - - ld2h { z21.h, z22.h }, p5/z, [x10, #10, mul vl] +# CHECK-NEXT: - - - 1.00 - - - - - ld2h { z23.h, z24.h }, p3/z, [x13, #-16, mul vl] +# CHECK-NEXT: - - - 2.00 - - - - - ld2h { z5.h, z6.h }, p3/z, [x17, x16, lsl #1] +# CHECK-NEXT: - - - 2.00 - - - - - ld2w { z0.s, z1.s }, p0/z, [x0, x0, lsl #2] +# CHECK-NEXT: - - - 1.00 - - - - - ld2w { z0.s, z1.s }, p0/z, [x0] +# CHECK-NEXT: - - - 1.00 - - - - - ld2w { z21.s, z22.s }, p5/z, [x10, #10, mul vl] +# CHECK-NEXT: - - - 1.00 - - - - - ld2w { z23.s, z24.s }, p3/z, [x13, #-16, mul vl] +# CHECK-NEXT: - - - 2.00 - - - - - ld2w { z5.s, z6.s }, p3/z, [x17, x16, lsl #2] +# CHECK-NEXT: - - - 3.00 - - - - - ld3b { z0.b - z2.b }, p0/z, [x0, x0] +# CHECK-NEXT: - - - 3.00 - - - - - ld3b { z0.b - z2.b }, p0/z, [x0] +# CHECK-NEXT: - - - 3.00 - - - - - ld3b { z21.b - z23.b }, p5/z, [x10, #15, mul vl] +# CHECK-NEXT: - - - 3.00 - - - - - ld3b { z23.b - z25.b }, p3/z, [x13, #-24, mul vl] +# CHECK-NEXT: - - - 3.00 - - - - - ld3b { z5.b - z7.b }, p3/z, [x17, x16] +# CHECK-NEXT: - - - 3.00 - - - - - ld3d { z0.d - z2.d }, p0/z, [x0, x0, lsl #3] +# CHECK-NEXT: - - - 3.00 - - - - - ld3d { z0.d - z2.d }, p0/z, [x0] +# CHECK-NEXT: - - - 3.00 - - - - - ld3d { z21.d - z23.d }, p5/z, [x10, #15, mul vl] +# CHECK-NEXT: - - - 3.00 - - - - - ld3d { z23.d - z25.d }, p3/z, [x13, #-24, mul vl] +# CHECK-NEXT: - - - 3.00 - - - - - ld3d { z5.d - z7.d }, p3/z, [x17, x16, lsl #3] +# CHECK-NEXT: - - - 3.00 - - - - - ld3h { z0.h - z2.h }, p0/z, [x0, x0, lsl #1] +# CHECK-NEXT: - - - 3.00 - - - - - ld3h { z0.h - z2.h }, p0/z, [x0] +# CHECK-NEXT: - - - 3.00 - - - - - ld3h { z21.h - z23.h }, p5/z, [x10, #15, mul vl] +# CHECK-NEXT: - - - 3.00 - - - - - ld3h { z23.h - z25.h }, p3/z, [x13, #-24, mul vl] +# CHECK-NEXT: - - - 3.00 - - - - - ld3h { z5.h - z7.h }, p3/z, [x17, x16, lsl #1] +# CHECK-NEXT: - - - 3.00 - - - - - ld3w { z0.s - z2.s }, p0/z, [x0, x0, lsl #2] +# CHECK-NEXT: - - - 3.00 - - - - - ld3w { z0.s - z2.s }, p0/z, [x0] +# CHECK-NEXT: - - - 3.00 - - - - - ld3w { z21.s - z23.s }, p5/z, [x10, #15, mul vl] +# CHECK-NEXT: - - - 3.00 - - - - - ld3w { z23.s - z25.s }, p3/z, [x13, #-24, mul vl] +# CHECK-NEXT: - - - 3.00 - - - - - ld3w { z5.s - z7.s }, p3/z, [x17, x16, lsl #2] +# CHECK-NEXT: - - - 3.00 - - - - - ld4b { z0.b - z3.b }, p0/z, [x0, x0] +# CHECK-NEXT: - - - 3.00 - - - - - ld4b { z0.b - z3.b }, p0/z, [x0] +# CHECK-NEXT: - - - 3.00 - - - - - ld4b { z21.b - z24.b }, p5/z, [x10, #20, mul vl] +# CHECK-NEXT: - - - 3.00 - - - - - ld4b { z23.b - z26.b }, p3/z, [x13, #-32, mul vl] +# CHECK-NEXT: - - - 3.00 - - - - - ld4b { z5.b - z8.b }, p3/z, [x17, x16] +# CHECK-NEXT: - - - 3.00 - - - - - ld4d { z0.d - z3.d }, p0/z, [x0, x0, lsl #3] +# CHECK-NEXT: - - - 3.00 - - - - - ld4d { z0.d - z3.d }, p0/z, [x0] +# CHECK-NEXT: - - - 3.00 - - - - - ld4d { z21.d - z24.d }, p5/z, [x10, #20, mul vl] +# CHECK-NEXT: - - - 3.00 - - - - - ld4d { z23.d - z26.d }, p3/z, [x13, #-32, mul vl] +# CHECK-NEXT: - - - 3.00 - - - - - ld4d { z5.d - z8.d }, p3/z, [x17, x16, lsl #3] +# CHECK-NEXT: - - - 3.00 - - - - - ld4h { z0.h - z3.h }, p0/z, [x0, x0, lsl #1] +# CHECK-NEXT: - - - 3.00 - - - - - ld4h { z0.h - z3.h }, p0/z, [x0] +# CHECK-NEXT: - - - 3.00 - - - - - ld4h { z21.h - z24.h }, p5/z, [x10, #20, mul vl] +# CHECK-NEXT: - - - 3.00 - - - - - ld4h { z23.h - z26.h }, p3/z, [x13, #-32, mul vl] +# CHECK-NEXT: - - - 3.00 - - - - - ld4h { z5.h - z8.h }, p3/z, [x17, x16, lsl #1] +# CHECK-NEXT: - - - 3.00 - - - - - ld4w { z0.s - z3.s }, p0/z, [x0, x0, lsl #2] +# CHECK-NEXT: - - - 3.00 - - - - - ld4w { z0.s - z3.s }, p0/z, [x0] +# CHECK-NEXT: - - - 3.00 - - - - - ld4w { z21.s - z24.s }, p5/z, [x10, #20, mul vl] +# CHECK-NEXT: - - - 3.00 - - - - - ld4w { z23.s - z26.s }, p3/z, [x13, #-32, mul vl] +# CHECK-NEXT: - - - 3.00 - - - - - ld4w { z5.s - z8.s }, p3/z, [x17, x16, lsl #2] +# CHECK-NEXT: - - - 1.00 - - - - - ldff1b { z0.d }, p0/z, [x0, x0] +# CHECK-NEXT: - - - 7.00 - - - - - ldff1b { z0.d }, p0/z, [z0.d] +# CHECK-NEXT: - - - 1.00 - - - - - ldff1b { z0.h }, p0/z, [x0, x0] +# CHECK-NEXT: - - - 1.00 - - - - - ldff1b { z0.s }, p0/z, [x0, x0] +# CHECK-NEXT: - - - 7.00 - - - - - ldff1b { z0.s }, p0/z, [x0, z0.s, sxtw] +# CHECK-NEXT: - - - 7.00 - - - - - ldff1b { z0.s }, p0/z, [x0, z0.s, uxtw] +# CHECK-NEXT: - - - 9.00 - - - - - ldff1b { z0.s }, p0/z, [z0.s] +# CHECK-NEXT: - - - 7.00 - - - - - ldff1b { z21.d }, p5/z, [x10, z21.d, sxtw] +# CHECK-NEXT: - - - 7.00 - - - - - ldff1b { z21.d }, p5/z, [x10, z21.d, uxtw] +# CHECK-NEXT: - - - 1.00 - - - - - ldff1b { z31.b }, p7/z, [sp] +# CHECK-NEXT: - - - 7.00 - - - - - ldff1b { z31.d }, p7/z, [sp, z31.d] +# CHECK-NEXT: - - - 1.00 - - - - - ldff1b { z31.d }, p7/z, [sp] +# CHECK-NEXT: - - - 7.00 - - - - - ldff1b { z31.d }, p7/z, [z31.d, #31] +# CHECK-NEXT: - - - 1.00 - - - - - ldff1b { z31.h }, p7/z, [sp] +# CHECK-NEXT: - - - 1.00 - - - - - ldff1b { z31.s }, p7/z, [sp] +# CHECK-NEXT: - - - 9.00 - - - - - ldff1b { z31.s }, p7/z, [z31.s, #31] +# CHECK-NEXT: - - - 1.00 - - - - - ldff1d { z0.d }, p0/z, [x0, x0, lsl #3] +# CHECK-NEXT: - - - 7.00 - - - - - ldff1d { z0.d }, p0/z, [x0, z0.d, sxtw #3] +# CHECK-NEXT: - - - 7.00 - - - - - ldff1d { z0.d }, p0/z, [x0, z0.d, uxtw #3] +# CHECK-NEXT: - - - 7.00 - - - - - ldff1d { z0.d }, p0/z, [z0.d] +# CHECK-NEXT: - - - 7.00 - - - - - ldff1d { z21.d }, p5/z, [x10, z21.d, sxtw] +# CHECK-NEXT: - - - 7.00 - - - - - ldff1d { z21.d }, p5/z, [x10, z21.d, uxtw] +# CHECK-NEXT: - - - 7.00 - - - - - ldff1d { z23.d }, p3/z, [x13, z8.d, lsl #3] +# CHECK-NEXT: - - - 7.00 - - - - - ldff1d { z31.d }, p7/z, [sp, z31.d] +# CHECK-NEXT: - - - 1.00 - - - - - ldff1d { z31.d }, p7/z, [sp] +# CHECK-NEXT: - - - 7.00 - - - - - ldff1d { z31.d }, p7/z, [z31.d, #248] +# CHECK-NEXT: - - - 1.00 - - - - - ldff1h { z0.d }, p0/z, [x0, x0, lsl #1] +# CHECK-NEXT: - - - 7.00 - - - - - ldff1h { z0.d }, p0/z, [x0, z0.d, sxtw #1] +# CHECK-NEXT: - - - 7.00 - - - - - ldff1h { z0.d }, p0/z, [x0, z0.d, uxtw #1] +# CHECK-NEXT: - - - 7.00 - - - - - ldff1h { z0.d }, p0/z, [z0.d] +# CHECK-NEXT: - - - 1.00 - - - - - ldff1h { z0.h }, p0/z, [x0, x0, lsl #1] +# CHECK-NEXT: - - - 1.00 - - - - - ldff1h { z0.s }, p0/z, [x0, x0, lsl #1] +# CHECK-NEXT: - - - 7.00 - - - - - ldff1h { z0.s }, p0/z, [x0, z0.s, sxtw] +# CHECK-NEXT: - - - 7.00 - - - - - ldff1h { z0.s }, p0/z, [x0, z0.s, uxtw] +# CHECK-NEXT: - - - 9.00 - - - - - ldff1h { z0.s }, p0/z, [z0.s] +# CHECK-NEXT: - - - 7.00 - - - - - ldff1h { z21.d }, p5/z, [x10, z21.d, sxtw] +# CHECK-NEXT: - - - 7.00 - - - - - ldff1h { z21.d }, p5/z, [x10, z21.d, uxtw] +# CHECK-NEXT: - - - 7.00 - - - - - ldff1h { z23.d }, p3/z, [x13, z8.d, lsl #1] +# CHECK-NEXT: - - - 7.00 - - - - - ldff1h { z31.d }, p7/z, [sp, z31.d] +# CHECK-NEXT: - - - 1.00 - - - - - ldff1h { z31.d }, p7/z, [sp] +# CHECK-NEXT: - - - 7.00 - - - - - ldff1h { z31.d }, p7/z, [z31.d, #62] +# CHECK-NEXT: - - - 1.00 - - - - - ldff1h { z31.h }, p7/z, [sp] +# CHECK-NEXT: - - - 7.00 - - - - - ldff1h { z31.s }, p7/z, [sp, z31.s, sxtw #1] +# CHECK-NEXT: - - - 7.00 - - - - - ldff1h { z31.s }, p7/z, [sp, z31.s, uxtw #1] +# CHECK-NEXT: - - - 1.00 - - - - - ldff1h { z31.s }, p7/z, [sp] +# CHECK-NEXT: - - - 9.00 - - - - - ldff1h { z31.s }, p7/z, [z31.s, #62] +# CHECK-NEXT: - - - 1.00 - - - - - ldff1sb { z0.d }, p0/z, [x0, x0] +# CHECK-NEXT: - - - 7.00 - - - - - ldff1sb { z0.d }, p0/z, [z0.d] +# CHECK-NEXT: - - - 1.00 - - - - - ldff1sb { z0.h }, p0/z, [x0, x0] +# CHECK-NEXT: - - - 1.00 - - - - - ldff1sb { z0.s }, p0/z, [x0, x0] +# CHECK-NEXT: - - - 7.00 - - - - - ldff1sb { z0.s }, p0/z, [x0, z0.s, sxtw] +# CHECK-NEXT: - - - 7.00 - - - - - ldff1sb { z0.s }, p0/z, [x0, z0.s, uxtw] +# CHECK-NEXT: - - - 9.00 - - - - - ldff1sb { z0.s }, p0/z, [z0.s] +# CHECK-NEXT: - - - 7.00 - - - - - ldff1sb { z21.d }, p5/z, [x10, z21.d, sxtw] +# CHECK-NEXT: - - - 7.00 - - - - - ldff1sb { z21.d }, p5/z, [x10, z21.d, uxtw] +# CHECK-NEXT: - - - 7.00 - - - - - ldff1sb { z31.d }, p7/z, [sp, z31.d] +# CHECK-NEXT: - - - 1.00 - - - - - ldff1sb { z31.d }, p7/z, [sp] +# CHECK-NEXT: - - - 7.00 - - - - - ldff1sb { z31.d }, p7/z, [z31.d, #31] +# CHECK-NEXT: - - - 1.00 - - - - - ldff1sb { z31.h }, p7/z, [sp] +# CHECK-NEXT: - - - 1.00 - - - - - ldff1sb { z31.s }, p7/z, [sp] +# CHECK-NEXT: - - - 9.00 - - - - - ldff1sb { z31.s }, p7/z, [z31.s, #31] +# CHECK-NEXT: - - - 1.00 - - - - - ldff1sh { z0.d }, p0/z, [x0, x0, lsl #1] +# CHECK-NEXT: - - - 7.00 - - - - - ldff1sh { z0.d }, p0/z, [x0, z0.d, sxtw #1] +# CHECK-NEXT: - - - 7.00 - - - - - ldff1sh { z0.d }, p0/z, [x0, z0.d, uxtw #1] +# CHECK-NEXT: - - - 7.00 - - - - - ldff1sh { z0.d }, p0/z, [z0.d] +# CHECK-NEXT: - - - 1.00 - - - - - ldff1sh { z0.s }, p0/z, [x0, x0, lsl #1] +# CHECK-NEXT: - - - 7.00 - - - - - ldff1sh { z0.s }, p0/z, [x0, z0.s, sxtw] +# CHECK-NEXT: - - - 7.00 - - - - - ldff1sh { z0.s }, p0/z, [x0, z0.s, uxtw] +# CHECK-NEXT: - - - 9.00 - - - - - ldff1sh { z0.s }, p0/z, [z0.s] +# CHECK-NEXT: - - - 7.00 - - - - - ldff1sh { z21.d }, p5/z, [x10, z21.d, sxtw] +# CHECK-NEXT: - - - 7.00 - - - - - ldff1sh { z21.d }, p5/z, [x10, z21.d, uxtw] +# CHECK-NEXT: - - - 7.00 - - - - - ldff1sh { z23.d }, p3/z, [x13, z8.d, lsl #1] +# CHECK-NEXT: - - - 7.00 - - - - - ldff1sh { z31.d }, p7/z, [sp, z31.d] +# CHECK-NEXT: - - - 1.00 - - - - - ldff1sh { z31.d }, p7/z, [sp] +# CHECK-NEXT: - - - 7.00 - - - - - ldff1sh { z31.d }, p7/z, [z31.d, #62] +# CHECK-NEXT: - - - 7.00 - - - - - ldff1sh { z31.s }, p7/z, [sp, z31.s, sxtw #1] +# CHECK-NEXT: - - - 7.00 - - - - - ldff1sh { z31.s }, p7/z, [sp, z31.s, uxtw #1] +# CHECK-NEXT: - - - 1.00 - - - - - ldff1sh { z31.s }, p7/z, [sp] +# CHECK-NEXT: - - - 9.00 - - - - - ldff1sh { z31.s }, p7/z, [z31.s, #62] +# CHECK-NEXT: - - - 1.00 - - - - - ldff1sw { z0.d }, p0/z, [x0, x0, lsl #2] +# CHECK-NEXT: - - - 7.00 - - - - - ldff1sw { z0.d }, p0/z, [x0, z0.d, sxtw #2] +# CHECK-NEXT: - - - 7.00 - - - - - ldff1sw { z0.d }, p0/z, [x0, z0.d, uxtw #2] +# CHECK-NEXT: - - - 7.00 - - - - - ldff1sw { z0.d }, p0/z, [z0.d] +# CHECK-NEXT: - - - 7.00 - - - - - ldff1sw { z21.d }, p5/z, [x10, z21.d, sxtw] +# CHECK-NEXT: - - - 7.00 - - - - - ldff1sw { z21.d }, p5/z, [x10, z21.d, uxtw] +# CHECK-NEXT: - - - 7.00 - - - - - ldff1sw { z23.d }, p3/z, [x13, z8.d, lsl #2] +# CHECK-NEXT: - - - 7.00 - - - - - ldff1sw { z31.d }, p7/z, [sp, z31.d] +# CHECK-NEXT: - - - 1.00 - - - - - ldff1sw { z31.d }, p7/z, [sp] +# CHECK-NEXT: - - - 7.00 - - - - - ldff1sw { z31.d }, p7/z, [z31.d, #124] +# CHECK-NEXT: - - - 1.00 - - - - - ldff1w { z0.d }, p0/z, [x0, x0, lsl #2] +# CHECK-NEXT: - - - 7.00 - - - - - ldff1w { z0.d }, p0/z, [x0, z0.d, sxtw #2] +# CHECK-NEXT: - - - 7.00 - - - - - ldff1w { z0.d }, p0/z, [x0, z0.d, uxtw #2] +# CHECK-NEXT: - - - 7.00 - - - - - ldff1w { z0.d }, p0/z, [z0.d] +# CHECK-NEXT: - - - 1.00 - - - - - ldff1w { z0.s }, p0/z, [x0, x0, lsl #2] +# CHECK-NEXT: - - - 7.00 - - - - - ldff1w { z0.s }, p0/z, [x0, z0.s, sxtw] +# CHECK-NEXT: - - - 7.00 - - - - - ldff1w { z0.s }, p0/z, [x0, z0.s, uxtw] +# CHECK-NEXT: - - - 9.00 - - - - - ldff1w { z0.s }, p0/z, [z0.s] +# CHECK-NEXT: - - - 7.00 - - - - - ldff1w { z21.d }, p5/z, [x10, z21.d, sxtw] +# CHECK-NEXT: - - - 7.00 - - - - - ldff1w { z21.d }, p5/z, [x10, z21.d, uxtw] +# CHECK-NEXT: - - - 7.00 - - - - - ldff1w { z23.d }, p3/z, [x13, z8.d, lsl #2] +# CHECK-NEXT: - - - 7.00 - - - - - ldff1w { z31.d }, p7/z, [sp, z31.d] +# CHECK-NEXT: - - - 1.00 - - - - - ldff1w { z31.d }, p7/z, [sp] +# CHECK-NEXT: - - - 7.00 - - - - - ldff1w { z31.d }, p7/z, [z31.d, #124] +# CHECK-NEXT: - - - 7.00 - - - - - ldff1w { z31.s }, p7/z, [sp, z31.s, sxtw #2] +# CHECK-NEXT: - - - 7.00 - - - - - ldff1w { z31.s }, p7/z, [sp, z31.s, uxtw #2] +# CHECK-NEXT: - - - 1.00 - - - - - ldff1w { z31.s }, p7/z, [sp] +# CHECK-NEXT: - - - 9.00 - - - - - ldff1w { z31.s }, p7/z, [z31.s, #124] +# CHECK-NEXT: - - - 1.00 - - - - - ldnf1b { z0.b }, p0/z, [x0] +# CHECK-NEXT: - - - 1.00 - - - - - ldnf1b { z0.d }, p0/z, [x0] +# CHECK-NEXT: - - - 1.00 - - - - - ldnf1b { z0.h }, p0/z, [x0] +# CHECK-NEXT: - - - 1.00 - - - - - ldnf1b { z0.s }, p0/z, [x0] +# CHECK-NEXT: - - - 1.00 - - - - - ldnf1b { z21.b }, p5/z, [x10, #5, mul vl] +# CHECK-NEXT: - - - 1.00 - - - - - ldnf1b { z21.d }, p5/z, [x10, #5, mul vl] +# CHECK-NEXT: - - - 1.00 - - - - - ldnf1b { z21.h }, p5/z, [x10, #5, mul vl] +# CHECK-NEXT: - - - 1.00 - - - - - ldnf1b { z21.s }, p5/z, [x10, #5, mul vl] +# CHECK-NEXT: - - - 1.00 - - - - - ldnf1b { z31.b }, p7/z, [sp, #-1, mul vl] +# CHECK-NEXT: - - - 1.00 - - - - - ldnf1b { z31.d }, p7/z, [sp, #-1, mul vl] +# CHECK-NEXT: - - - 1.00 - - - - - ldnf1b { z31.h }, p7/z, [sp, #-1, mul vl] +# CHECK-NEXT: - - - 1.00 - - - - - ldnf1b { z31.s }, p7/z, [sp, #-1, mul vl] +# CHECK-NEXT: - - - 1.00 - - - - - ldnf1d { z0.d }, p0/z, [x0] +# CHECK-NEXT: - - - 1.00 - - - - - ldnf1d { z21.d }, p5/z, [x10, #5, mul vl] +# CHECK-NEXT: - - - 1.00 - - - - - ldnf1d { z31.d }, p7/z, [sp, #-1, mul vl] +# CHECK-NEXT: - - - 1.00 - - - - - ldnf1h { z0.d }, p0/z, [x0] +# CHECK-NEXT: - - - 1.00 - - - - - ldnf1h { z0.h }, p0/z, [x0] +# CHECK-NEXT: - - - 1.00 - - - - - ldnf1h { z0.s }, p0/z, [x0] +# CHECK-NEXT: - - - 1.00 - - - - - ldnf1h { z21.d }, p5/z, [x10, #5, mul vl] +# CHECK-NEXT: - - - 1.00 - - - - - ldnf1h { z21.h }, p5/z, [x10, #5, mul vl] +# CHECK-NEXT: - - - 1.00 - - - - - ldnf1h { z21.s }, p5/z, [x10, #5, mul vl] +# CHECK-NEXT: - - - 1.00 - - - - - ldnf1h { z31.d }, p7/z, [sp, #-1, mul vl] +# CHECK-NEXT: - - - 1.00 - - - - - ldnf1h { z31.h }, p7/z, [sp, #-1, mul vl] +# CHECK-NEXT: - - - 1.00 - - - - - ldnf1h { z31.s }, p7/z, [sp, #-1, mul vl] +# CHECK-NEXT: - - - 1.00 - - - - - ldnf1sb { z0.d }, p0/z, [x0] +# CHECK-NEXT: - - - 1.00 - - - - - ldnf1sb { z0.h }, p0/z, [x0] +# CHECK-NEXT: - - - 1.00 - - - - - ldnf1sb { z0.s }, p0/z, [x0] +# CHECK-NEXT: - - - 1.00 - - - - - ldnf1sb { z21.d }, p5/z, [x10, #5, mul vl] +# CHECK-NEXT: - - - 1.00 - - - - - ldnf1sb { z21.h }, p5/z, [x10, #5, mul vl] +# CHECK-NEXT: - - - 1.00 - - - - - ldnf1sb { z21.s }, p5/z, [x10, #5, mul vl] +# CHECK-NEXT: - - - 1.00 - - - - - ldnf1sb { z31.d }, p7/z, [sp, #-1, mul vl] +# CHECK-NEXT: - - - 1.00 - - - - - ldnf1sb { z31.h }, p7/z, [sp, #-1, mul vl] +# CHECK-NEXT: - - - 1.00 - - - - - ldnf1sb { z31.s }, p7/z, [sp, #-1, mul vl] +# CHECK-NEXT: - - - 1.00 - - - - - ldnf1sh { z0.d }, p0/z, [x0] +# CHECK-NEXT: - - - 1.00 - - - - - ldnf1sh { z0.s }, p0/z, [x0] +# CHECK-NEXT: - - - 1.00 - - - - - ldnf1sh { z21.d }, p5/z, [x10, #5, mul vl] +# CHECK-NEXT: - - - 1.00 - - - - - ldnf1sh { z21.s }, p5/z, [x10, #5, mul vl] +# CHECK-NEXT: - - - 1.00 - - - - - ldnf1sh { z31.d }, p7/z, [sp, #-1, mul vl] +# CHECK-NEXT: - - - 1.00 - - - - - ldnf1sh { z31.s }, p7/z, [sp, #-1, mul vl] +# CHECK-NEXT: - - - 1.00 - - - - - ldnf1sw { z0.d }, p0/z, [x0] +# CHECK-NEXT: - - - 1.00 - - - - - ldnf1sw { z21.d }, p5/z, [x10, #5, mul vl] +# CHECK-NEXT: - - - 1.00 - - - - - ldnf1sw { z31.d }, p7/z, [sp, #-1, mul vl] +# CHECK-NEXT: - - - 1.00 - - - - - ldnf1w { z0.d }, p0/z, [x0] +# CHECK-NEXT: - - - 1.00 - - - - - ldnf1w { z0.s }, p0/z, [x0] +# CHECK-NEXT: - - - 1.00 - - - - - ldnf1w { z21.d }, p5/z, [x10, #5, mul vl] +# CHECK-NEXT: - - - 1.00 - - - - - ldnf1w { z21.s }, p5/z, [x10, #5, mul vl] +# CHECK-NEXT: - - - 1.00 - - - - - ldnf1w { z31.d }, p7/z, [sp, #-1, mul vl] +# CHECK-NEXT: - - - 1.00 - - - - - ldnf1w { z31.s }, p7/z, [sp, #-1, mul vl] +# CHECK-NEXT: - - - 1.00 - - - - - ldnt1b { z0.b }, p0/z, [x0, x0] +# CHECK-NEXT: - - - 1.00 - - - - - ldnt1b { z0.b }, p0/z, [x0] +# CHECK-NEXT: - - - 7.00 - - - - - ldnt1b { z0.d }, p0/z, [z1.d] +# CHECK-NEXT: - - - 9.00 - - - - - ldnt1b { z0.s }, p0/z, [z1.s] +# CHECK-NEXT: - - - 1.00 - - - - - ldnt1b { z21.b }, p5/z, [x10, #7, mul vl] +# CHECK-NEXT: - - - 1.00 - - - - - ldnt1b { z23.b }, p3/z, [x13, #-8, mul vl] +# CHECK-NEXT: - - - 7.00 - - - - - ldnt1b { z31.d }, p7/z, [z31.d, x0] +# CHECK-NEXT: - - - 7.00 - - - - - ldnt1b { z31.d }, p7/z, [z31.d] +# CHECK-NEXT: - - - 9.00 - - - - - ldnt1b { z31.s }, p7/z, [z31.s, x0] +# CHECK-NEXT: - - - 9.00 - - - - - ldnt1b { z31.s }, p7/z, [z31.s] +# CHECK-NEXT: - - - 1.00 - - - - - ldnt1d { z0.d }, p0/z, [x0, x0, lsl #3] +# CHECK-NEXT: - - - 1.00 - - - - - ldnt1d { z0.d }, p0/z, [x0] +# CHECK-NEXT: - - - 7.00 - - - - - ldnt1d { z0.d }, p0/z, [z1.d] +# CHECK-NEXT: - - - 1.00 - - - - - ldnt1d { z21.d }, p5/z, [x10, #7, mul vl] +# CHECK-NEXT: - - - 1.00 - - - - - ldnt1d { z23.d }, p3/z, [x13, #-8, mul vl] +# CHECK-NEXT: - - - 7.00 - - - - - ldnt1d { z31.d }, p7/z, [z31.d, x0] +# CHECK-NEXT: - - - 7.00 - - - - - ldnt1d { z31.d }, p7/z, [z31.d] +# CHECK-NEXT: - - - 7.00 - - - - - ldnt1h { z0.d }, p0/z, [z1.d] +# CHECK-NEXT: - - - 1.00 - - - - - ldnt1h { z0.h }, p0/z, [x0, x0, lsl #1] +# CHECK-NEXT: - - - 1.00 - - - - - ldnt1h { z0.h }, p0/z, [x0] +# CHECK-NEXT: - - - 9.00 - - - - - ldnt1h { z0.s }, p0/z, [z1.s] +# CHECK-NEXT: - - - 1.00 - - - - - ldnt1h { z21.h }, p5/z, [x10, #7, mul vl] +# CHECK-NEXT: - - - 1.00 - - - - - ldnt1h { z23.h }, p3/z, [x13, #-8, mul vl] +# CHECK-NEXT: - - - 7.00 - - - - - ldnt1h { z31.d }, p7/z, [z31.d, x0] +# CHECK-NEXT: - - - 7.00 - - - - - ldnt1h { z31.d }, p7/z, [z31.d] +# CHECK-NEXT: - - - 9.00 - - - - - ldnt1h { z31.s }, p7/z, [z31.s, x0] +# CHECK-NEXT: - - - 9.00 - - - - - ldnt1h { z31.s }, p7/z, [z31.s] +# CHECK-NEXT: - - - 7.00 - - - - - ldnt1sb { z0.d }, p0/z, [z1.d] +# CHECK-NEXT: - - - 9.00 - - - - - ldnt1sb { z0.s }, p0/z, [z1.s] +# CHECK-NEXT: - - - 7.00 - - - - - ldnt1sb { z31.d }, p7/z, [z31.d, x0] +# CHECK-NEXT: - - - 7.00 - - - - - ldnt1sb { z31.d }, p7/z, [z31.d] +# CHECK-NEXT: - - - 9.00 - - - - - ldnt1sb { z31.s }, p7/z, [z31.s, x0] +# CHECK-NEXT: - - - 9.00 - - - - - ldnt1sb { z31.s }, p7/z, [z31.s] +# CHECK-NEXT: - - - 7.00 - - - - - ldnt1sh { z0.d }, p0/z, [z1.d] +# CHECK-NEXT: - - - 9.00 - - - - - ldnt1sh { z0.s }, p0/z, [z1.s] +# CHECK-NEXT: - - - 7.00 - - - - - ldnt1sh { z31.d }, p7/z, [z31.d, x0] +# CHECK-NEXT: - - - 7.00 - - - - - ldnt1sh { z31.d }, p7/z, [z31.d] +# CHECK-NEXT: - - - 9.00 - - - - - ldnt1sh { z31.s }, p7/z, [z31.s, x0] +# CHECK-NEXT: - - - 9.00 - - - - - ldnt1sh { z31.s }, p7/z, [z31.s] +# CHECK-NEXT: - - - 7.00 - - - - - ldnt1sw { z0.d }, p0/z, [z1.d] +# CHECK-NEXT: - - - 7.00 - - - - - ldnt1sw { z31.d }, p7/z, [z31.d, x0] +# CHECK-NEXT: - - - 7.00 - - - - - ldnt1sw { z31.d }, p7/z, [z31.d] +# CHECK-NEXT: - - - 7.00 - - - - - ldnt1w { z0.d }, p0/z, [z1.d] +# CHECK-NEXT: - - - 1.00 - - - - - ldnt1w { z0.s }, p0/z, [x0, x0, lsl #2] +# CHECK-NEXT: - - - 1.00 - - - - - ldnt1w { z0.s }, p0/z, [x0] +# CHECK-NEXT: - - - 9.00 - - - - - ldnt1w { z0.s }, p0/z, [z1.s] +# CHECK-NEXT: - - - 1.00 - - - - - ldnt1w { z21.s }, p5/z, [x10, #7, mul vl] +# CHECK-NEXT: - - - 1.00 - - - - - ldnt1w { z23.s }, p3/z, [x13, #-8, mul vl] +# CHECK-NEXT: - - - 7.00 - - - - - ldnt1w { z31.d }, p7/z, [z31.d, x0] +# CHECK-NEXT: - - - 7.00 - - - - - ldnt1w { z31.d }, p7/z, [z31.d] +# CHECK-NEXT: - - - 9.00 - - - - - ldnt1w { z31.s }, p7/z, [z31.s, x0] +# CHECK-NEXT: - - - 9.00 - - - - - ldnt1w { z31.s }, p7/z, [z31.s] +# CHECK-NEXT: - - - 1.00 - - - - - ldr p0, [x0] +# CHECK-NEXT: - - - 1.00 - - - - - ldr p5, [x10, #255, mul vl] +# CHECK-NEXT: - - - 1.00 - - - - - ldr p7, [x13, #-256, mul vl] +# CHECK-NEXT: - - - 1.00 - - - - - ldr z0, [x0] +# CHECK-NEXT: - - - 1.00 - - - - - ldr z23, [x13, #255, mul vl] +# CHECK-NEXT: - - - 1.00 - - - - - ldr z31, [sp, #-256, mul vl] +# CHECK-NEXT: - - - - - - 1.00 - - lsl z0.b, p0/m, z0.b, #0 +# CHECK-NEXT: - - - - - - 1.00 - - lsl z0.b, p0/m, z0.b, z0.b +# CHECK-NEXT: - - - - - - 1.00 - - lsl z0.b, p0/m, z0.b, z1.d +# CHECK-NEXT: - - - - - - 1.00 - - lsl z0.b, z0.b, #0 +# CHECK-NEXT: - - - - - - 1.00 - - lsl z0.b, z1.b, z2.d +# CHECK-NEXT: - - - - - - 1.00 - - lsl z0.d, p0/m, z0.d, #0 +# CHECK-NEXT: - - - - - - 1.00 - - lsl z0.d, p0/m, z0.d, z0.d +# CHECK-NEXT: - - - - - - 1.00 - - lsl z0.d, z0.d, #0 +# CHECK-NEXT: - - - - - - 1.00 - - lsl z0.h, p0/m, z0.h, #0 +# CHECK-NEXT: - - - - - - 1.00 - - lsl z0.h, p0/m, z0.h, z0.h +# CHECK-NEXT: - - - - - - 1.00 - - lsl z0.h, p0/m, z0.h, z1.d +# CHECK-NEXT: - - - - - - 1.00 - - lsl z0.h, z0.h, #0 +# CHECK-NEXT: - - - - - - 1.00 - - lsl z0.h, z1.h, z2.d +# CHECK-NEXT: - - - - - - 1.00 - - lsl z0.s, p0/m, z0.s, #0 +# CHECK-NEXT: - - - - - - 1.00 - - lsl z0.s, p0/m, z0.s, z0.s +# CHECK-NEXT: - - - - - - 1.00 - - lsl z0.s, p0/m, z0.s, z1.d +# CHECK-NEXT: - - - - - - 1.00 - - lsl z0.s, z0.s, #0 +# CHECK-NEXT: - - - - - - 1.00 - - lsl z0.s, z1.s, z2.d +# CHECK-NEXT: - - - - - - 1.00 - - lsl z31.b, p0/m, z31.b, #7 +# CHECK-NEXT: - - - - - - 1.00 - - lsl z31.b, z31.b, #7 +# CHECK-NEXT: - - - - - - 1.00 - - lsl z31.d, p0/m, z31.d, #63 +# CHECK-NEXT: - - - - - - 1.00 - - lsl z31.d, z31.d, #63 +# CHECK-NEXT: - - - - - - 1.00 - - lsl z31.h, p0/m, z31.h, #15 +# CHECK-NEXT: - - - - - - 1.00 - - lsl z31.h, z31.h, #15 +# CHECK-NEXT: - - - - - - 1.00 - - lsl z31.s, p0/m, z31.s, #31 +# CHECK-NEXT: - - - - - - 1.00 - - lsl z31.s, z31.s, #31 +# CHECK-NEXT: - - - - - - 1.00 - - lslr z0.b, p0/m, z0.b, z0.b +# CHECK-NEXT: - - - - - - 1.00 - - lslr z0.d, p0/m, z0.d, z0.d +# CHECK-NEXT: - - - - - - 1.00 - - lslr z0.h, p0/m, z0.h, z0.h +# CHECK-NEXT: - - - - - - 1.00 - - lslr z0.s, p0/m, z0.s, z0.s +# CHECK-NEXT: - - - - - - 1.00 - - lsr z0.b, p0/m, z0.b, #1 +# CHECK-NEXT: - - - - - - 1.00 - - lsr z0.b, p0/m, z0.b, z0.b +# CHECK-NEXT: - - - - - - 1.00 - - lsr z0.b, p0/m, z0.b, z1.d +# CHECK-NEXT: - - - - - - 1.00 - - lsr z0.b, z0.b, #1 +# CHECK-NEXT: - - - - - - 1.00 - - lsr z0.b, z1.b, z2.d +# CHECK-NEXT: - - - - - - 1.00 - - lsr z0.d, p0/m, z0.d, #1 +# CHECK-NEXT: - - - - - - 1.00 - - lsr z0.d, p0/m, z0.d, z0.d +# CHECK-NEXT: - - - - - - 1.00 - - lsr z0.d, z0.d, #1 +# CHECK-NEXT: - - - - - - 1.00 - - lsr z0.h, p0/m, z0.h, #1 +# CHECK-NEXT: - - - - - - 1.00 - - lsr z0.h, p0/m, z0.h, z0.h +# CHECK-NEXT: - - - - - - 1.00 - - lsr z0.h, p0/m, z0.h, z1.d +# CHECK-NEXT: - - - - - - 1.00 - - lsr z0.h, z0.h, #1 +# CHECK-NEXT: - - - - - - 1.00 - - lsr z0.h, z1.h, z2.d +# CHECK-NEXT: - - - - - - 1.00 - - lsr z0.s, p0/m, z0.s, #1 +# CHECK-NEXT: - - - - - - 1.00 - - lsr z0.s, p0/m, z0.s, z0.s +# CHECK-NEXT: - - - - - - 1.00 - - lsr z0.s, p0/m, z0.s, z1.d +# CHECK-NEXT: - - - - - - 1.00 - - lsr z0.s, z0.s, #1 +# CHECK-NEXT: - - - - - - 1.00 - - lsr z0.s, z1.s, z2.d +# CHECK-NEXT: - - - - - - 1.00 - - lsr z31.b, p0/m, z31.b, #8 +# CHECK-NEXT: - - - - - - 1.00 - - lsr z31.b, z31.b, #8 +# CHECK-NEXT: - - - - - - 1.00 - - lsr z31.d, p0/m, z31.d, #64 +# CHECK-NEXT: - - - - - - 1.00 - - lsr z31.d, z31.d, #64 +# CHECK-NEXT: - - - - - - 1.00 - - lsr z31.h, p0/m, z31.h, #16 +# CHECK-NEXT: - - - - - - 1.00 - - lsr z31.h, z31.h, #16 +# CHECK-NEXT: - - - - - - 1.00 - - lsr z31.s, p0/m, z31.s, #32 +# CHECK-NEXT: - - - - - - 1.00 - - lsr z31.s, z31.s, #32 +# CHECK-NEXT: - - - - - - 1.00 - - lsrr z0.b, p0/m, z0.b, z0.b +# CHECK-NEXT: - - - - - - 1.00 - - lsrr z0.d, p0/m, z0.d, z0.d +# CHECK-NEXT: - - - - - - 1.00 - - lsrr z0.h, p0/m, z0.h, z0.h +# CHECK-NEXT: - - - - - - 1.00 - - lsrr z0.s, p0/m, z0.s, z0.s +# CHECK-NEXT: - - - - - - - 1.00 - mad z0.b, p7/m, z1.b, z31.b +# CHECK-NEXT: - - - - - - - 1.00 - mad z0.d, p7/m, z1.d, z31.d +# CHECK-NEXT: - - - - - - - 1.00 - mad z0.h, p7/m, z1.h, z31.h +# CHECK-NEXT: - - - - - - - 1.00 - mad z0.s, p7/m, z1.s, z31.s +# CHECK-NEXT: - - - - - - 2.00 - - match p0.b, p0/z, z0.b, z0.b +# CHECK-NEXT: - - - - - - 2.00 - - match p0.h, p0/z, z0.h, z0.h +# CHECK-NEXT: - - - - - - 2.00 - - match p15.b, p7/z, z30.b, z31.b +# CHECK-NEXT: - - - - - - 2.00 - - match p15.h, p7/z, z30.h, z31.h +# CHECK-NEXT: - - - - - - - 1.00 - mla z0.b, p7/m, z1.b, z31.b +# CHECK-NEXT: - - - - - - - 1.00 - mla z0.d, p7/m, z1.d, z31.d +# CHECK-NEXT: - - - - - - - 1.00 - mla z0.d, z1.d, z7.d[1] +# CHECK-NEXT: - - - - - - - 1.00 - mla z0.h, p7/m, z1.h, z31.h +# CHECK-NEXT: - - - - - - - 1.00 - mla z0.h, z1.h, z7.h[7] +# CHECK-NEXT: - - - - - - - 1.00 - mla z0.s, p7/m, z1.s, z31.s +# CHECK-NEXT: - - - - - - - 1.00 - mla z0.s, z1.s, z7.s[3] +# CHECK-NEXT: - - - - - - - 1.00 - mls z0.b, p7/m, z1.b, z31.b +# CHECK-NEXT: - - - - - - - 1.00 - mls z0.d, p7/m, z1.d, z31.d +# CHECK-NEXT: - - - - - - - 1.00 - mls z0.d, z1.d, z7.d[1] +# CHECK-NEXT: - - - - - - - 1.00 - mls z0.h, p7/m, z1.h, z31.h +# CHECK-NEXT: - - - - - - - 1.00 - mls z0.h, z1.h, z7.h[7] +# CHECK-NEXT: - - - - - - - 1.00 - mls z0.s, p7/m, z1.s, z31.s +# CHECK-NEXT: - - - - - - - 1.00 - mls z0.s, z1.s, z7.s[3] +# CHECK-NEXT: - - - - - - 1.00 - - mov p0.b, p0.b +# CHECK-NEXT: - - - - - - 1.00 - - mov p0.b, p0/m, p0.b +# CHECK-NEXT: - - - - - - 1.00 - - mov p0.b, p0/z, p0.b +# CHECK-NEXT: - - - - - - 1.00 - - mov p15.b, p15.b +# CHECK-NEXT: - - - - - - 1.00 - - mov p15.b, p15/m, p15.b +# CHECK-NEXT: - - - - - - 1.00 - - mov p15.b, p15/z, p15.b +# CHECK-NEXT: - - - - - - 1.00 - - mov z0.b, #127 +# CHECK-NEXT: - - - - - - 1.00 - - mov z0.b, b0 +# CHECK-NEXT: - - - - - - 1.00 - - mov z0.b, p0/m, b0 +# CHECK-NEXT: - - - - - - 1.00 - - mov z0.b, p0/m, w0 +# CHECK-NEXT: - - - - - - 1.00 - - mov z0.b, p0/z, #127 +# CHECK-NEXT: - - - - - - 1.00 - - mov z0.b, w0 +# CHECK-NEXT: - - - - - - 1.00 - - mov z0.d, #0 +# CHECK-NEXT: - - - - - - 1.00 - - mov z0.d, #0xe0000000000003ff +# CHECK-NEXT: - - - - - - 1.00 - - mov z0.d, #0xffffffffffff7fff +# CHECK-NEXT: - - - - - - 1.00 - - mov z0.d, #32768 +# CHECK-NEXT: - - - - - - 1.00 - - mov z0.d, d0 +# CHECK-NEXT: - - - - - - 1.00 - - mov z0.d, p0/m, d0 +# CHECK-NEXT: - - - - - - 1.00 - - mov z0.d, p0/m, x0 +# CHECK-NEXT: - - - - - - 1.00 - - mov z0.d, x0 +# CHECK-NEXT: - - - - - - 1.00 - - mov z0.d, z0.d +# CHECK-NEXT: - - - - - - 1.00 - - mov z0.h, #-256 +# CHECK-NEXT: - - - - - - 1.00 - - mov z0.h, #-32768 +# CHECK-NEXT: - - - - - - 1.00 - - mov z0.h, #0 +# CHECK-NEXT: - - - - - - 1.00 - - mov z0.h, #32512 +# CHECK-NEXT: - - - - - - 1.00 - - mov z0.h, #32767 +# CHECK-NEXT: - - - - - - 1.00 - - mov z0.h, h0 +# CHECK-NEXT: - - - - - - 1.00 - - mov z0.h, p0/m, h0 +# CHECK-NEXT: - - - - - - 1.00 - - mov z0.h, p0/m, w0 +# CHECK-NEXT: - - - - - - 1.00 - - mov z0.h, p0/z, #32512 +# CHECK-NEXT: - - - - - - 1.00 - - mov z0.h, w0 +# CHECK-NEXT: - - - - - - 1.00 - - mov z0.q, q0 +# CHECK-NEXT: - - - - - - 1.00 - - mov z0.s, #0 +# CHECK-NEXT: - - - - - - 1.00 - - mov z0.s, #0xffff7fff +# CHECK-NEXT: - - - - - - 1.00 - - mov z0.s, #32768 +# CHECK-NEXT: - - - - - - 1.00 - - mov z0.s, p0/m, s0 +# CHECK-NEXT: - - - - - - 1.00 - - mov z0.s, p0/m, w0 +# CHECK-NEXT: - - - - - - 1.00 - - mov z0.s, s0 +# CHECK-NEXT: - - - - - - 1.00 - - mov z0.s, w0 +# CHECK-NEXT: - - - - - - 1.00 - - mov z21.d, #-128 +# CHECK-NEXT: - - - - - - 1.00 - - mov z21.d, #-32768 +# CHECK-NEXT: - - - - - - 1.00 - - mov z21.d, #127 +# CHECK-NEXT: - - - - - - 1.00 - - mov z21.d, #32512 +# CHECK-NEXT: - - - - - - 1.00 - - mov z21.d, p0/z, #-128 +# CHECK-NEXT: - - - - - - 1.00 - - mov z21.d, p0/z, #-32768 +# CHECK-NEXT: - - - - - - 1.00 - - mov z21.d, p0/z, #127 +# CHECK-NEXT: - - - - - - 1.00 - - mov z21.d, p0/z, #32512 +# CHECK-NEXT: - - - - - - 1.00 - - mov z21.d, p15/m, #-128 +# CHECK-NEXT: - - - - - - 1.00 - - mov z21.d, p15/m, #-32768 +# CHECK-NEXT: - - - - - - 1.00 - - mov z21.h, #-128 +# CHECK-NEXT: - - - - - - 1.00 - - mov z21.h, #-32768 +# CHECK-NEXT: - - - - - - 1.00 - - mov z21.h, #127 +# CHECK-NEXT: - - - - - - 1.00 - - mov z21.h, #32512 +# CHECK-NEXT: - - - - - - 1.00 - - mov z21.h, p0/z, #-128 +# CHECK-NEXT: - - - - - - 1.00 - - mov z21.h, p0/z, #-32768 +# CHECK-NEXT: - - - - - - 1.00 - - mov z21.h, p0/z, #127 +# CHECK-NEXT: - - - - - - 1.00 - - mov z21.h, p0/z, #32512 +# CHECK-NEXT: - - - - - - 1.00 - - mov z21.h, p15/m, #-128 +# CHECK-NEXT: - - - - - - 1.00 - - mov z21.h, p15/m, #-32768 +# CHECK-NEXT: - - - - - - 1.00 - - mov z21.s, #-128 +# CHECK-NEXT: - - - - - - 1.00 - - mov z21.s, #-32768 +# CHECK-NEXT: - - - - - - 1.00 - - mov z21.s, #127 +# CHECK-NEXT: - - - - - - 1.00 - - mov z21.s, #32512 +# CHECK-NEXT: - - - - - - 1.00 - - mov z21.s, p0/z, #-128 +# CHECK-NEXT: - - - - - - 1.00 - - mov z21.s, p0/z, #-32768 +# CHECK-NEXT: - - - - - - 1.00 - - mov z21.s, p0/z, #127 +# CHECK-NEXT: - - - - - - 1.00 - - mov z21.s, p0/z, #32512 +# CHECK-NEXT: - - - - - - 1.00 - - mov z21.s, p15/m, #-128 +# CHECK-NEXT: - - - - - - 1.00 - - mov z21.s, p15/m, #-32768 +# CHECK-NEXT: - - - - - - 1.00 - - mov z31.b, p15/m, z31.b +# CHECK-NEXT: - - - - - - 1.00 - - mov z31.b, p7/m, b31 +# CHECK-NEXT: - - - - - - 1.00 - - movprfx z31, z6 +# CHECK-NEXT: - - - - - - 1.00 - - mov z31.b, p7/m, wsp +# CHECK-NEXT: - - - - - - 1.00 - - mov z31.b, wsp +# CHECK-NEXT: - - - - - - 1.00 - - mov z31.b, z31.b[63] +# CHECK-NEXT: - - - - - - 1.00 - - mov z31.d, p15/m, z31.d +# CHECK-NEXT: - - - - - - 1.00 - - mov z31.d, p7/m, d31 +# CHECK-NEXT: - - - - - - 1.00 - - movprfx z31.d, p7/z, z6.d +# CHECK-NEXT: - - - - - - 1.00 - - mov z31.d, p7/m, sp +# CHECK-NEXT: - - - - - - 1.00 - - mov z31.d, sp +# CHECK-NEXT: - - - - - - 1.00 - - mov z31.d, z0.d +# CHECK-NEXT: - - - - - - 1.00 - - mov z31.d, z31.d[7] +# CHECK-NEXT: - - - - - - 1.00 - - mov z31.h, p15/m, z31.h +# CHECK-NEXT: - - - - - - 1.00 - - mov z31.h, p7/m, h31 +# CHECK-NEXT: - - - - - - 1.00 - - mov z31.h, p7/m, wsp +# CHECK-NEXT: - - - - - - 1.00 - - mov z31.h, wsp +# CHECK-NEXT: - - - - - - 1.00 - - mov z31.h, z31.h[31] +# CHECK-NEXT: - - - - - - 1.00 - - mov z31.s, p15/m, z31.s +# CHECK-NEXT: - - - - - - 1.00 - - mov z31.s, p7/m, s31 +# CHECK-NEXT: - - - - - - 1.00 - - mov z31.s, p7/m, wsp +# CHECK-NEXT: - - - - - - 1.00 - - mov z31.s, wsp +# CHECK-NEXT: - - - - - - 1.00 - - mov z31.s, z31.s[15] +# CHECK-NEXT: - - - - - - 1.00 - - mov z5.b, #-1 +# CHECK-NEXT: - - - - - - 1.00 - - mov z5.b, #-128 +# CHECK-NEXT: - - - - - - 1.00 - - mov z5.b, #127 +# CHECK-NEXT: - - - - - - 1.00 - - mov z5.b, p0/z, #-1 +# CHECK-NEXT: - - - - - - 1.00 - - mov z5.b, p0/z, #-128 +# CHECK-NEXT: - - - - - - 1.00 - - mov z5.b, p0/z, #127 +# CHECK-NEXT: - - - - - - 1.00 - - mov z5.b, p15/m, #-128 +# CHECK-NEXT: - - - - - - 1.00 - - mov z5.d, #-6 +# CHECK-NEXT: - - - - - - 1.00 - - mov z5.h, #-6 +# CHECK-NEXT: - - - - - - 1.00 - - mov z5.q, z17.q[3] +# CHECK-NEXT: - - - - - - 1.00 - - mov z5.s, #-6 +# CHECK-NEXT: - - - - - - 1.00 - - movs p0.b, p0.b +# CHECK-NEXT: - - - - - - 1.00 - - movs p0.b, p0/z, p0.b +# CHECK-NEXT: - - - - - - 1.00 - - movs p15.b, p15.b +# CHECK-NEXT: - - - - - - 1.00 - - movs p15.b, p15/z, p15.b +# CHECK-NEXT: - 1.00 - - - - - - - mrs x3, ID_AA64ZFR0_EL1 +# CHECK-NEXT: - 1.00 - - - - - - - mrs x3, ZCR_EL1 +# CHECK-NEXT: - 1.00 - - - - - - - mrs x3, ZCR_EL12 +# CHECK-NEXT: - 1.00 - - - - - - - mrs x3, ZCR_EL2 +# CHECK-NEXT: - 1.00 - - - - - - - mrs x3, ZCR_EL3 +# CHECK-NEXT: - - - - - - - 1.00 - msb z0.b, p7/m, z1.b, z31.b +# CHECK-NEXT: - - - - - - - 1.00 - msb z0.d, p7/m, z1.d, z31.d +# CHECK-NEXT: - - - - - - - 1.00 - msb z0.h, p7/m, z1.h, z31.h +# CHECK-NEXT: - - - - - - - 1.00 - msb z0.s, p7/m, z1.s, z31.s +# CHECK-NEXT: - 1.00 - - - - - - - msr ZCR_EL1, x3 +# CHECK-NEXT: - 1.00 - - - - - - - msr ZCR_EL12, x3 +# CHECK-NEXT: - 1.00 - - - - - - - msr ZCR_EL2, x3 +# CHECK-NEXT: - 1.00 - - - - - - - msr ZCR_EL3, x3 +# CHECK-NEXT: - - - - - - - 1.00 - mul z0.b, p7/m, z0.b, z31.b +# CHECK-NEXT: - - - - - - - 1.00 - mul z0.b, z1.b, z2.b +# CHECK-NEXT: - - - - - - - 1.00 - mul z0.d, p7/m, z0.d, z31.d +# CHECK-NEXT: - - - - - - - 1.00 - mul z0.d, z1.d, z15.d[1] +# CHECK-NEXT: - - - - - - - 1.00 - mul z0.h, p7/m, z0.h, z31.h +# CHECK-NEXT: - - - - - - - 1.00 - mul z0.h, z1.h, z2.h +# CHECK-NEXT: - - - - - - - 1.00 - mul z0.h, z1.h, z7.h[7] +# CHECK-NEXT: - - - - - - - 1.00 - mul z0.s, p7/m, z0.s, z31.s +# CHECK-NEXT: - - - - - - - 1.00 - mul z0.s, z1.s, z7.s[3] +# CHECK-NEXT: - - - - - - - 1.00 - mul z29.s, z30.s, z31.s +# CHECK-NEXT: - - - - - - - 1.00 - mul z31.b, z31.b, #-128 +# CHECK-NEXT: - - - - - - - 1.00 - mul z31.b, z31.b, #127 +# CHECK-NEXT: - - - - - - - 1.00 - mul z31.d, z31.d, #-128 +# CHECK-NEXT: - - - - - - - 1.00 - mul z31.d, z31.d, #127 +# CHECK-NEXT: - - - - - - - 1.00 - mul z31.d, z31.d, z31.d +# CHECK-NEXT: - - - - - - - 1.00 - mul z31.h, z31.h, #-128 +# CHECK-NEXT: - - - - - - - 1.00 - mul z31.h, z31.h, #127 +# CHECK-NEXT: - - - - - - - 1.00 - mul z31.s, z31.s, #-128 +# CHECK-NEXT: - - - - - - - 1.00 - mul z31.s, z31.s, #127 +# CHECK-NEXT: - - - - - - 1.00 - - nand p0.b, p0/z, p0.b, p0.b +# CHECK-NEXT: - - - - - - 1.00 - - nand p15.b, p15/z, p15.b, p15.b +# CHECK-NEXT: - - - - - - 1.00 - - nands p0.b, p0/z, p0.b, p0.b +# CHECK-NEXT: - - - - - - 1.00 - - nands p15.b, p15/z, p15.b, p15.b +# CHECK-NEXT: - - - - - - 1.00 - - nbsl z0.d, z0.d, z1.d, z2.d +# CHECK-NEXT: - - - - - - 1.00 - - neg z0.b, p0/m, z0.b +# CHECK-NEXT: - - - - - - 1.00 - - neg z0.d, p0/m, z0.d +# CHECK-NEXT: - - - - - - 1.00 - - neg z0.h, p0/m, z0.h +# CHECK-NEXT: - - - - - - 1.00 - - neg z0.s, p0/m, z0.s +# CHECK-NEXT: - - - - - - 1.00 - - neg z31.b, p7/m, z31.b +# CHECK-NEXT: - - - - - - 1.00 - - neg z31.d, p7/m, z31.d +# CHECK-NEXT: - - - - - - 1.00 - - neg z31.h, p7/m, z31.h +# CHECK-NEXT: - - - - - - 1.00 - - neg z31.s, p7/m, z31.s +# CHECK-NEXT: - - - - - - 2.00 - - nmatch p0.b, p0/z, z0.b, z0.b +# CHECK-NEXT: - - - - - - 2.00 - - nmatch p0.h, p0/z, z0.h, z0.h +# CHECK-NEXT: - - - - - - 2.00 - - nmatch p15.b, p7/z, z30.b, z31.b +# CHECK-NEXT: - - - - - - 2.00 - - nmatch p15.h, p7/z, z30.h, z31.h +# CHECK-NEXT: - - - - - - 1.00 - - nor p0.b, p0/z, p0.b, p0.b +# CHECK-NEXT: - - - - - - 1.00 - - nor p15.b, p15/z, p15.b, p15.b +# CHECK-NEXT: - - - - - - 1.00 - - nors p0.b, p0/z, p0.b, p0.b +# CHECK-NEXT: - - - - - - 1.00 - - nors p15.b, p15/z, p15.b, p15.b +# CHECK-NEXT: - - - - - - 1.00 - - not p0.b, p0/z, p0.b +# CHECK-NEXT: - - - - - - 1.00 - - not p15.b, p15/z, p15.b +# CHECK-NEXT: - - - - - - 1.00 - - not z31.b, p7/m, z31.b +# CHECK-NEXT: - - - - - - 1.00 - - not z31.d, p7/m, z31.d +# CHECK-NEXT: - - - - - - 1.00 - - not z31.h, p7/m, z31.h +# CHECK-NEXT: - - - - - - 1.00 - - not z31.s, p7/m, z31.s +# CHECK-NEXT: - - - - - - 1.00 - - nots p0.b, p0/z, p0.b +# CHECK-NEXT: - - - - - - 1.00 - - nots p15.b, p15/z, p15.b +# CHECK-NEXT: - - - - - - 1.00 - - orn p0.b, p0/z, p0.b, p0.b +# CHECK-NEXT: - - - - - - 1.00 - - orn p15.b, p15/z, p15.b, p15.b +# CHECK-NEXT: - - - - - - 1.00 - - orns p0.b, p0/z, p0.b, p0.b +# CHECK-NEXT: - - - - - - 1.00 - - orns p15.b, p15/z, p15.b, p15.b +# CHECK-NEXT: - - - - - - 1.00 - - orr p0.b, p0/z, p0.b, p1.b +# CHECK-NEXT: - - - - - - 1.00 - - orr z0.d, z0.d, #0x6 +# CHECK-NEXT: - - - - - - 1.00 - - orr z0.d, z0.d, #0xfffffffffffffff9 +# CHECK-NEXT: - - - - - - 1.00 - - orr z0.s, z0.s, #0x6 +# CHECK-NEXT: - - - - - - 1.00 - - orr z0.s, z0.s, #0xfffffff9 +# CHECK-NEXT: - - - - - - 1.00 - - orr z23.d, z13.d, z8.d +# CHECK-NEXT: - - - - - - 1.00 - - orr z23.h, z23.h, #0x6 +# CHECK-NEXT: - - - - - - 1.00 - - orr z23.h, z23.h, #0xfff9 +# CHECK-NEXT: - - - - - - 1.00 - - orr z31.b, p7/m, z31.b, z31.b +# CHECK-NEXT: - - - - - - 1.00 - - orr z31.d, p7/m, z31.d, z31.d +# CHECK-NEXT: - - - - - - 1.00 - - orr z31.h, p7/m, z31.h, z31.h +# CHECK-NEXT: - - - - - - 1.00 - - orr z31.s, p7/m, z31.s, z31.s +# CHECK-NEXT: - - - - - - 1.00 - - orr z5.b, z5.b, #0x6 +# CHECK-NEXT: - - - - - - 1.00 - - orr z5.b, z5.b, #0xf9 +# CHECK-NEXT: - - - - - - 1.00 - - orrs p0.b, p0/z, p0.b, p1.b +# CHECK-NEXT: - - - - - - 1.00 - - orv b0, p7, z31.b +# CHECK-NEXT: - - - - - - 1.00 - - orv d0, p7, z31.d +# CHECK-NEXT: - - - - - - 1.00 - - orv h0, p7, z31.h +# CHECK-NEXT: - - - - - - 1.00 - - orv s0, p7, z31.s +# CHECK-NEXT: - - - - - - 1.00 - - pfalse p15.b +# CHECK-NEXT: - - - - - - 1.00 - - pfirst p0.b, p15, p0.b +# CHECK-NEXT: - - - - - - 1.00 - - pfirst p15.b, p15, p15.b +# CHECK-NEXT: - - - - - - 1.00 - - pmul z0.b, z1.b, z2.b +# CHECK-NEXT: - - - - - - 1.00 - - pmul z29.b, z30.b, z31.b +# CHECK-NEXT: - - - - - - - - 1.00 pmullb z0.h, z1.b, z2.b +# CHECK-NEXT: - - - - - - - - 1.00 pmullb z29.q, z30.d, z31.d +# CHECK-NEXT: - - - - - - - - 1.00 pmullb z31.d, z31.s, z31.s +# CHECK-NEXT: - - - - - - - - 1.00 pmullt z0.h, z1.b, z2.b +# CHECK-NEXT: - - - - - - - - 1.00 pmullt z29.q, z30.d, z31.d +# CHECK-NEXT: - - - - - - - - 1.00 pmullt z31.d, z31.s, z31.s +# CHECK-NEXT: - - - - - - 1.00 - - pnext p0.b, p15, p0.b +# CHECK-NEXT: - - - - - - 1.00 - - pnext p0.d, p15, p0.d +# CHECK-NEXT: - - - - - - 1.00 - - pnext p0.h, p15, p0.h +# CHECK-NEXT: - - - - - - 1.00 - - pnext p0.s, p15, p0.s +# CHECK-NEXT: - - - - - - 1.00 - - pnext p15.b, p15, p15.b +# CHECK-NEXT: - - - - - - 1.00 - - prfb #14, p0, [x0] +# CHECK-NEXT: - - - - - - 1.00 - - prfb #15, p0, [x0] +# CHECK-NEXT: - - - - - - 1.00 - - prfb #6, p0, [x0] +# CHECK-NEXT: - - - - - - 1.00 - - prfb #7, p0, [x0] +# CHECK-NEXT: - - - - - - 1.00 - - prfb #7, p3, [z13.s, #31] +# CHECK-NEXT: - - - - - - 1.00 - - prfb #7, p3, [z13.s] +# CHECK-NEXT: - - - - - - 1.00 - - prfb pldl1keep, p0, [x0, z0.d, uxtw] +# CHECK-NEXT: - - - - - - 1.00 - - prfb pldl1keep, p0, [x0, z0.d] +# CHECK-NEXT: - - - - - - 1.00 - - prfb pldl1keep, p0, [x0, z0.s, uxtw] +# CHECK-NEXT: - - - - - - 1.00 - - prfb pldl1keep, p0, [x0] +# CHECK-NEXT: - - - - - - 1.00 - - prfb pldl1strm, p0, [x0, #-32, mul vl] +# CHECK-NEXT: - - - - - - 1.00 - - prfb pldl1strm, p0, [x0, #31, mul vl] +# CHECK-NEXT: - - - - - - 1.00 - - prfb pldl1strm, p0, [x0] +# CHECK-NEXT: - - - - - - 1.00 - - prfb pldl2keep, p0, [x0] +# CHECK-NEXT: - - - - - - 1.00 - - prfb pldl2strm, p0, [x0] +# CHECK-NEXT: - - - - - - 1.00 - - prfb pldl3keep, p0, [x0] +# CHECK-NEXT: - - - - - - 1.00 - - prfb pldl3strm, p0, [x0] +# CHECK-NEXT: - - - - - - 1.00 - - prfb pldl3strm, p5, [x10, z21.d, sxtw] +# CHECK-NEXT: - - - - - - 1.00 - - prfb pldl3strm, p5, [x10, z21.s, uxtw] +# CHECK-NEXT: - - - - - - 1.00 - - prfb pldl3strm, p5, [z10.d, #31] +# CHECK-NEXT: - - - - - - 1.00 - - prfb pldl3strm, p5, [z10.d] +# CHECK-NEXT: - - - - - - 1.00 - - prfb pstl1keep, p0, [x0] +# CHECK-NEXT: - - - - - - 1.00 - - prfb pstl1strm, p0, [x0] +# CHECK-NEXT: - - - - - - 1.00 - - prfb pstl2keep, p0, [x0] +# CHECK-NEXT: - - - - - - 1.00 - - prfb pstl2strm, p0, [x0] +# CHECK-NEXT: - - - - - - 1.00 - - prfb pstl3keep, p0, [x0] +# CHECK-NEXT: - - - - - - 1.00 - - prfb pstl3strm, p0, [x0] +# CHECK-NEXT: - - - - - - 1.00 - - prfd #14, p0, [x0] +# CHECK-NEXT: - - - - - - 1.00 - - prfd #15, p0, [x0] +# CHECK-NEXT: - - - - - - 1.00 - - prfd #15, p7, [z31.d, #248] +# CHECK-NEXT: - - - - - - 1.00 - - prfd #15, p7, [z31.d] +# CHECK-NEXT: - - - - - - 1.00 - - prfd #15, p7, [z31.s, #248] +# CHECK-NEXT: - - - - - - 1.00 - - prfd #15, p7, [z31.s] +# CHECK-NEXT: - - - - - - 1.00 - - prfd #6, p0, [x0] +# CHECK-NEXT: - - - - - - 1.00 - - prfd #7, p0, [x0] +# CHECK-NEXT: - - - - - - 1.00 - - prfd pldl1keep, p0, [x0, z0.d, lsl #3] +# CHECK-NEXT: - - - - - - 1.00 - - prfd pldl1keep, p0, [x0, z0.d, sxtw #3] +# CHECK-NEXT: - - - - - - 1.00 - - prfd pldl1keep, p0, [x0, z0.d, uxtw #3] +# CHECK-NEXT: - - - - - - 1.00 - - prfd pldl1keep, p0, [x0, z0.s, sxtw #3] +# CHECK-NEXT: - - - - - - 1.00 - - prfd pldl1keep, p0, [x0, z0.s, uxtw #3] +# CHECK-NEXT: - - - - - - 1.00 - - prfd pldl1keep, p0, [x0] +# CHECK-NEXT: - - - - - - 1.00 - - prfd pldl1strm, p0, [x0, #-32, mul vl] +# CHECK-NEXT: - - - - - - 1.00 - - prfd pldl1strm, p0, [x0, #31, mul vl] +# CHECK-NEXT: - - - - - - 1.00 - - prfd pldl1strm, p0, [x0] +# CHECK-NEXT: - - - - - - 1.00 - - prfd pldl2keep, p0, [x0] +# CHECK-NEXT: - - - - - - 1.00 - - prfd pldl2strm, p0, [x0] +# CHECK-NEXT: - - - - - - 1.00 - - prfd pldl3keep, p0, [x0] +# CHECK-NEXT: - - - - - - 1.00 - - prfd pldl3strm, p0, [x0] +# CHECK-NEXT: - - - - - - 1.00 - - prfd pstl1keep, p0, [x0] +# CHECK-NEXT: - - - - - - 1.00 - - prfd pstl1strm, p0, [x0] +# CHECK-NEXT: - - - - - - 1.00 - - prfd pstl2keep, p0, [x0] +# CHECK-NEXT: - - - - - - 1.00 - - prfd pstl2strm, p0, [x0] +# CHECK-NEXT: - - - - - - 1.00 - - prfd pstl3keep, p0, [x0] +# CHECK-NEXT: - - - - - - 1.00 - - prfd pstl3strm, p0, [x0] +# CHECK-NEXT: - - - - - - 1.00 - - prfh #14, p0, [x0] +# CHECK-NEXT: - - - - - - 1.00 - - prfh #15, p0, [x0] +# CHECK-NEXT: - - - - - - 1.00 - - prfh #15, p7, [z31.d, #62] +# CHECK-NEXT: - - - - - - 1.00 - - prfh #15, p7, [z31.d] +# CHECK-NEXT: - - - - - - 1.00 - - prfh #15, p7, [z31.s, #62] +# CHECK-NEXT: - - - - - - 1.00 - - prfh #15, p7, [z31.s] +# CHECK-NEXT: - - - - - - 1.00 - - prfh #6, p0, [x0] +# CHECK-NEXT: - - - - - - 1.00 - - prfh #7, p0, [x0] +# CHECK-NEXT: - - - - - - 1.00 - - prfh pldl1keep, p0, [x0, z0.d, lsl #1] +# CHECK-NEXT: - - - - - - 1.00 - - prfh pldl1keep, p0, [x0] +# CHECK-NEXT: - - - - - - 1.00 - - prfh pldl1strm, p0, [x0, #-32, mul vl] +# CHECK-NEXT: - - - - - - 1.00 - - prfh pldl1strm, p0, [x0, #31, mul vl] +# CHECK-NEXT: - - - - - - 1.00 - - prfh pldl1strm, p0, [x0] +# CHECK-NEXT: - - - - - - 1.00 - - prfh pldl2keep, p0, [x0] +# CHECK-NEXT: - - - - - - 1.00 - - prfh pldl2strm, p0, [x0] +# CHECK-NEXT: - - - - - - 1.00 - - prfh pldl3keep, p0, [x0] +# CHECK-NEXT: - - - - - - 1.00 - - prfh pldl3strm, p0, [x0] +# CHECK-NEXT: - - - - - - 1.00 - - prfh pldl3strm, p5, [x10, z21.d, sxtw #1] +# CHECK-NEXT: - - - - - - 1.00 - - prfh pldl3strm, p5, [x10, z21.d, uxtw #1] +# CHECK-NEXT: - - - - - - 1.00 - - prfh pldl3strm, p5, [x10, z21.s, sxtw #1] +# CHECK-NEXT: - - - - - - 1.00 - - prfh pldl3strm, p5, [x10, z21.s, uxtw #1] +# CHECK-NEXT: - - - - - - 1.00 - - prfh pstl1keep, p0, [x0] +# CHECK-NEXT: - - - - - - 1.00 - - prfh pstl1strm, p0, [x0] +# CHECK-NEXT: - - - - - - 1.00 - - prfh pstl2keep, p0, [x0] +# CHECK-NEXT: - - - - - - 1.00 - - prfh pstl2strm, p0, [x0] +# CHECK-NEXT: - - - - - - 1.00 - - prfh pstl3keep, p0, [x0] +# CHECK-NEXT: - - - - - - 1.00 - - prfh pstl3strm, p0, [x0] +# CHECK-NEXT: - - - - - - 1.00 - - prfw #14, p0, [x0] +# CHECK-NEXT: - - - - - - 1.00 - - prfw #15, p0, [x0] +# CHECK-NEXT: - - - - - - 1.00 - - prfw #15, p7, [z31.d, #124] +# CHECK-NEXT: - - - - - - 1.00 - - prfw #15, p7, [z31.d] +# CHECK-NEXT: - - - - - - 1.00 - - prfw #15, p7, [z31.s, #124] +# CHECK-NEXT: - - - - - - 1.00 - - prfw #15, p7, [z31.s] +# CHECK-NEXT: - - - - - - 1.00 - - prfw #6, p0, [x0] +# CHECK-NEXT: - - - - - - 1.00 - - prfw #7, p0, [x0] +# CHECK-NEXT: - - - - - - 1.00 - - prfw #7, p3, [x13, z8.d, uxtw #2] +# CHECK-NEXT: - - - - - - 1.00 - - prfw pldl1keep, p0, [x0, z0.d, sxtw #2] +# CHECK-NEXT: - - - - - - 1.00 - - prfw pldl1keep, p0, [x0, z0.s, uxtw #2] +# CHECK-NEXT: - - - - - - 1.00 - - prfw pldl1keep, p0, [x0] +# CHECK-NEXT: - - - - - - 1.00 - - prfw pldl1strm, p0, [x0, #-32, mul vl] +# CHECK-NEXT: - - - - - - 1.00 - - prfw pldl1strm, p0, [x0, #31, mul vl] +# CHECK-NEXT: - - - - - - 1.00 - - prfw pldl1strm, p0, [x0] +# CHECK-NEXT: - - - - - - 1.00 - - prfw pldl2keep, p0, [x0] +# CHECK-NEXT: - - - - - - 1.00 - - prfw pldl2strm, p0, [x0] +# CHECK-NEXT: - - - - - - 1.00 - - prfw pldl3keep, p0, [x0] +# CHECK-NEXT: - - - - - - 1.00 - - prfw pldl3strm, p0, [x0] +# CHECK-NEXT: - - - - - - 1.00 - - prfw pldl3strm, p5, [x10, z21.d, lsl #2] +# CHECK-NEXT: - - - - - - 1.00 - - prfw pldl3strm, p5, [x10, z21.s, sxtw #2] +# CHECK-NEXT: - - - - - - 1.00 - - prfw pstl1keep, p0, [x0] +# CHECK-NEXT: - - - - - - 1.00 - - prfw pstl1strm, p0, [x0] +# CHECK-NEXT: - - - - - - 1.00 - - prfw pstl2keep, p0, [x0] +# CHECK-NEXT: - - - - - - 1.00 - - prfw pstl2strm, p0, [x0] +# CHECK-NEXT: - - - - - - 1.00 - - prfw pstl3keep, p0, [x0] +# CHECK-NEXT: - - - - - - 1.00 - - prfw pstl3strm, p0, [x0] +# CHECK-NEXT: - - - - - - 1.00 - - ptest p15, p0.b +# CHECK-NEXT: - - - - - - 1.00 - - ptest p15, p15.b +# CHECK-NEXT: - - - - - - 1.00 - - ptrue p0.b, pow2 +# CHECK-NEXT: - - - - - - 1.00 - - ptrue p0.d, pow2 +# CHECK-NEXT: - - - - - - 1.00 - - ptrue p0.h, pow2 +# CHECK-NEXT: - - - - - - 1.00 - - ptrue p0.s, pow2 +# CHECK-NEXT: - - - - - - 1.00 - - ptrue p15.b +# CHECK-NEXT: - - - - - - 1.00 - - ptrue p15.d +# CHECK-NEXT: - - - - - - 1.00 - - ptrue p15.h +# CHECK-NEXT: - - - - - - 1.00 - - ptrue p15.s +# CHECK-NEXT: - - - - - - 1.00 - - ptrue p7.s +# CHECK-NEXT: - - - - - - 1.00 - - ptrue p7.s, #14 +# CHECK-NEXT: - - - - - - 1.00 - - ptrue p7.s, #15 +# CHECK-NEXT: - - - - - - 1.00 - - ptrue p7.s, #16 +# CHECK-NEXT: - - - - - - 1.00 - - ptrue p7.s, #17 +# CHECK-NEXT: - - - - - - 1.00 - - ptrue p7.s, #18 +# CHECK-NEXT: - - - - - - 1.00 - - ptrue p7.s, #19 +# CHECK-NEXT: - - - - - - 1.00 - - ptrue p7.s, #20 +# CHECK-NEXT: - - - - - - 1.00 - - ptrue p7.s, #21 +# CHECK-NEXT: - - - - - - 1.00 - - ptrue p7.s, #22 +# CHECK-NEXT: - - - - - - 1.00 - - ptrue p7.s, #23 +# CHECK-NEXT: - - - - - - 1.00 - - ptrue p7.s, #24 +# CHECK-NEXT: - - - - - - 1.00 - - ptrue p7.s, #25 +# CHECK-NEXT: - - - - - - 1.00 - - ptrue p7.s, #26 +# CHECK-NEXT: - - - - - - 1.00 - - ptrue p7.s, #27 +# CHECK-NEXT: - - - - - - 1.00 - - ptrue p7.s, #28 +# CHECK-NEXT: - - - - - - 1.00 - - ptrue p7.s, mul3 +# CHECK-NEXT: - - - - - - 1.00 - - ptrue p7.s, mul4 +# CHECK-NEXT: - - - - - - 1.00 - - ptrue p7.s, vl1 +# CHECK-NEXT: - - - - - - 1.00 - - ptrue p7.s, vl128 +# CHECK-NEXT: - - - - - - 1.00 - - ptrue p7.s, vl16 +# CHECK-NEXT: - - - - - - 1.00 - - ptrue p7.s, vl2 +# CHECK-NEXT: - - - - - - 1.00 - - ptrue p7.s, vl256 +# CHECK-NEXT: - - - - - - 1.00 - - ptrue p7.s, vl3 +# CHECK-NEXT: - - - - - - 1.00 - - ptrue p7.s, vl32 +# CHECK-NEXT: - - - - - - 1.00 - - ptrue p7.s, vl4 +# CHECK-NEXT: - - - - - - 1.00 - - ptrue p7.s, vl5 +# CHECK-NEXT: - - - - - - 1.00 - - ptrue p7.s, vl6 +# CHECK-NEXT: - - - - - - 1.00 - - ptrue p7.s, vl64 +# CHECK-NEXT: - - - - - - 1.00 - - ptrue p7.s, vl7 +# CHECK-NEXT: - - - - - - 1.00 - - ptrue p7.s, vl8 +# CHECK-NEXT: - - - - - - 1.00 - - ptrues p0.b, pow2 +# CHECK-NEXT: - - - - - - 1.00 - - ptrues p0.d, pow2 +# CHECK-NEXT: - - - - - - 1.00 - - ptrues p0.h, pow2 +# CHECK-NEXT: - - - - - - 1.00 - - ptrues p0.s, pow2 +# CHECK-NEXT: - - - - - - 1.00 - - ptrues p15.b +# CHECK-NEXT: - - - - - - 1.00 - - ptrues p15.d +# CHECK-NEXT: - - - - - - 1.00 - - ptrues p15.h +# CHECK-NEXT: - - - - - - 1.00 - - ptrues p15.s +# CHECK-NEXT: - - - - - - 1.00 - - ptrues p7.s +# CHECK-NEXT: - - - - - - 1.00 - - ptrues p7.s, #14 +# CHECK-NEXT: - - - - - - 1.00 - - ptrues p7.s, #15 +# CHECK-NEXT: - - - - - - 1.00 - - ptrues p7.s, #16 +# CHECK-NEXT: - - - - - - 1.00 - - ptrues p7.s, #17 +# CHECK-NEXT: - - - - - - 1.00 - - ptrues p7.s, #18 +# CHECK-NEXT: - - - - - - 1.00 - - ptrues p7.s, #19 +# CHECK-NEXT: - - - - - - 1.00 - - ptrues p7.s, #20 +# CHECK-NEXT: - - - - - - 1.00 - - ptrues p7.s, #21 +# CHECK-NEXT: - - - - - - 1.00 - - ptrues p7.s, #22 +# CHECK-NEXT: - - - - - - 1.00 - - ptrues p7.s, #23 +# CHECK-NEXT: - - - - - - 1.00 - - ptrues p7.s, #24 +# CHECK-NEXT: - - - - - - 1.00 - - ptrues p7.s, #25 +# CHECK-NEXT: - - - - - - 1.00 - - ptrues p7.s, #26 +# CHECK-NEXT: - - - - - - 1.00 - - ptrues p7.s, #27 +# CHECK-NEXT: - - - - - - 1.00 - - ptrues p7.s, #28 +# CHECK-NEXT: - - - - - - 1.00 - - ptrues p7.s, mul3 +# CHECK-NEXT: - - - - - - 1.00 - - ptrues p7.s, mul4 +# CHECK-NEXT: - - - - - - 1.00 - - ptrues p7.s, vl1 +# CHECK-NEXT: - - - - - - 1.00 - - ptrues p7.s, vl128 +# CHECK-NEXT: - - - - - - 1.00 - - ptrues p7.s, vl16 +# CHECK-NEXT: - - - - - - 1.00 - - ptrues p7.s, vl2 +# CHECK-NEXT: - - - - - - 1.00 - - ptrues p7.s, vl256 +# CHECK-NEXT: - - - - - - 1.00 - - ptrues p7.s, vl3 +# CHECK-NEXT: - - - - - - 1.00 - - ptrues p7.s, vl32 +# CHECK-NEXT: - - - - - - 1.00 - - ptrues p7.s, vl4 +# CHECK-NEXT: - - - - - - 1.00 - - ptrues p7.s, vl5 +# CHECK-NEXT: - - - - - - 1.00 - - ptrues p7.s, vl6 +# CHECK-NEXT: - - - - - - 1.00 - - ptrues p7.s, vl64 +# CHECK-NEXT: - - - - - - 1.00 - - ptrues p7.s, vl7 +# CHECK-NEXT: - - - - - - 1.00 - - ptrues p7.s, vl8 +# CHECK-NEXT: - - - - - - 1.00 - - punpkhi p0.h, p0.b +# CHECK-NEXT: - - - - - - 1.00 - - punpkhi p15.h, p15.b +# CHECK-NEXT: - - - - - - 1.00 - - punpklo p0.h, p0.b +# CHECK-NEXT: - - - - - - 1.00 - - punpklo p15.h, p15.b +# CHECK-NEXT: - - - - - - 1.00 - - raddhnb z0.b, z1.h, z31.h +# CHECK-NEXT: - - - - - - 1.00 - - raddhnb z0.h, z1.s, z31.s +# CHECK-NEXT: - - - - - - 1.00 - - raddhnb z0.s, z1.d, z31.d +# CHECK-NEXT: - - - - - - 1.00 - - raddhnt z0.b, z1.h, z31.h +# CHECK-NEXT: - - - - - - 1.00 - - raddhnt z0.h, z1.s, z31.s +# CHECK-NEXT: - - - - - - 1.00 - - raddhnt z0.s, z1.d, z31.d +# CHECK-NEXT: - - - - - - - - 1.00 rax1 z0.d, z1.d, z31.d +# CHECK-NEXT: - - - - - - 1.00 - - rbit z0.b, p7/m, z31.b +# CHECK-NEXT: - - - - - - 1.00 - - rbit z0.d, p7/m, z31.d +# CHECK-NEXT: - - - - - - 1.00 - - rbit z0.h, p7/m, z31.h +# CHECK-NEXT: - - - - - - 1.00 - - rbit z0.s, p7/m, z31.s +# CHECK-NEXT: 1.00 - - - - - - - - rdffr p0.b +# CHECK-NEXT: 1.00 - - - - - - - - rdffr p0.b, p0/z +# CHECK-NEXT: 1.00 - - - - - - - - rdffr p15.b +# CHECK-NEXT: 1.00 - - - - - - - - rdffr p15.b, p15/z +# CHECK-NEXT: 1.00 - - - - - - - - rdffrs p0.b, p0/z +# CHECK-NEXT: 1.00 - - - - - - - - rdffrs p15.b, p15/z +# CHECK-NEXT: 1.00 - - - - - - - - rdvl x0, #0 +# CHECK-NEXT: 1.00 - - - - - - - - rdvl x21, #-32 +# CHECK-NEXT: 1.00 - - - - - - - - rdvl x23, #31 +# CHECK-NEXT: 1.00 - - - - - - - - rdvl xzr, #-1 +# CHECK-NEXT: - - - - - - 1.00 - - rev z0.b, z31.b +# CHECK-NEXT: - - - - - - 1.00 - - rev z0.d, z31.d +# CHECK-NEXT: - - - - - - 1.00 - - rev z0.h, z31.h +# CHECK-NEXT: - - - - - - 1.00 - - rev z0.s, z31.s +# CHECK-NEXT: - - - - - - 1.00 - - revb z0.d, p7/m, z31.d +# CHECK-NEXT: - - - - - - 1.00 - - revb z0.h, p7/m, z31.h +# CHECK-NEXT: - - - - - - 1.00 - - revb z0.s, p7/m, z31.s +# CHECK-NEXT: - - - - - - 1.00 - - revh z0.d, p7/m, z31.d +# CHECK-NEXT: - - - - - - 1.00 - - revh z0.s, p7/m, z31.s +# CHECK-NEXT: - - - - - - 1.00 - - revw z0.d, p7/m, z31.d +# CHECK-NEXT: - - - - - - 1.00 - - rshrnb z0.b, z0.h, #1 +# CHECK-NEXT: - - - - - - 1.00 - - rshrnb z0.h, z0.s, #1 +# CHECK-NEXT: - - - - - - 1.00 - - rshrnb z0.s, z0.d, #1 +# CHECK-NEXT: - - - - - - 1.00 - - rshrnb z31.b, z31.h, #8 +# CHECK-NEXT: - - - - - - 1.00 - - rshrnb z31.h, z31.s, #16 +# CHECK-NEXT: - - - - - - 1.00 - - rshrnb z31.s, z31.d, #32 +# CHECK-NEXT: - - - - - - 1.00 - - rshrnt z0.b, z0.h, #1 +# CHECK-NEXT: - - - - - - 1.00 - - rshrnt z0.h, z0.s, #1 +# CHECK-NEXT: - - - - - - 1.00 - - rshrnt z0.s, z0.d, #1 +# CHECK-NEXT: - - - - - - 1.00 - - rshrnt z31.b, z31.h, #8 +# CHECK-NEXT: - - - - - - 1.00 - - rshrnt z31.h, z31.s, #16 +# CHECK-NEXT: - - - - - - 1.00 - - rshrnt z31.s, z31.d, #32 +# CHECK-NEXT: - - - - - - 1.00 - - rsubhnb z0.b, z1.h, z31.h +# CHECK-NEXT: - - - - - - 1.00 - - rsubhnb z0.h, z1.s, z31.s +# CHECK-NEXT: - - - - - - 1.00 - - rsubhnb z0.s, z1.d, z31.d +# CHECK-NEXT: - - - - - - 1.00 - - rsubhnt z0.b, z1.h, z31.h +# CHECK-NEXT: - - - - - - 1.00 - - rsubhnt z0.h, z1.s, z31.s +# CHECK-NEXT: - - - - - - 1.00 - - rsubhnt z0.s, z1.d, z31.d +# CHECK-NEXT: - - - - - - 2.00 - - saba z0.b, z1.b, z31.b +# CHECK-NEXT: - - - - - - 2.00 - - saba z0.d, z1.d, z31.d +# CHECK-NEXT: - - - - - - 2.00 - - saba z0.h, z1.h, z31.h +# CHECK-NEXT: - - - - - - 2.00 - - saba z0.s, z1.s, z31.s +# CHECK-NEXT: - - - - - - 2.00 - - sabalb z0.d, z1.s, z31.s +# CHECK-NEXT: - - - - - - 2.00 - - sabalb z0.h, z1.b, z31.b +# CHECK-NEXT: - - - - - - 2.00 - - sabalb z0.s, z1.h, z31.h +# CHECK-NEXT: - - - - - - 2.00 - - sabalt z0.d, z1.s, z31.s +# CHECK-NEXT: - - - - - - 2.00 - - sabalt z0.h, z1.b, z31.b +# CHECK-NEXT: - - - - - - 2.00 - - sabalt z0.s, z1.h, z31.h +# CHECK-NEXT: - - - - - - 1.00 - - sabd z31.b, p7/m, z31.b, z31.b +# CHECK-NEXT: - - - - - - 1.00 - - sabd z31.d, p7/m, z31.d, z31.d +# CHECK-NEXT: - - - - - - 1.00 - - sabd z31.h, p7/m, z31.h, z31.h +# CHECK-NEXT: - - - - - - 1.00 - - sabd z31.s, p7/m, z31.s, z31.s +# CHECK-NEXT: - - - - - - 1.00 - - sabdlb z0.h, z1.b, z2.b +# CHECK-NEXT: - - - - - - 1.00 - - sabdlb z29.s, z30.h, z31.h +# CHECK-NEXT: - - - - - - 1.00 - - sabdlb z31.d, z31.s, z31.s +# CHECK-NEXT: - - - - - - 1.00 - - sabdlt z0.h, z1.b, z2.b +# CHECK-NEXT: - - - - - - 1.00 - - sabdlt z29.s, z30.h, z31.h +# CHECK-NEXT: - - - - - - 1.00 - - sabdlt z31.d, z31.s, z31.s +# CHECK-NEXT: - - - - - - 2.00 - - sadalp z0.h, p0/m, z1.b +# CHECK-NEXT: - - - - - - 2.00 - - sadalp z29.s, p0/m, z30.h +# CHECK-NEXT: - - - - - - 2.00 - - sadalp z30.d, p7/m, z31.s +# CHECK-NEXT: - - - - - - 1.00 - - saddlb z0.h, z1.b, z2.b +# CHECK-NEXT: - - - - - - 1.00 - - saddlb z29.s, z30.h, z31.h +# CHECK-NEXT: - - - - - - 1.00 - - saddlb z31.d, z31.s, z31.s +# CHECK-NEXT: - - - - - - 1.00 - - saddlbt z0.d, z1.s, z31.s +# CHECK-NEXT: - - - - - - 1.00 - - saddlbt z0.h, z1.b, z31.b +# CHECK-NEXT: - - - - - - 1.00 - - saddlbt z0.s, z1.h, z31.h +# CHECK-NEXT: - - - - - - 1.00 - - saddlt z0.h, z1.b, z2.b +# CHECK-NEXT: - - - - - - 1.00 - - saddlt z29.s, z30.h, z31.h +# CHECK-NEXT: - - - - - - 1.00 - - saddlt z31.d, z31.s, z31.s +# CHECK-NEXT: - - - - - - 1.00 - - saddv d0, p7, z31.b +# CHECK-NEXT: - - - - - - 1.00 - - saddv d0, p7, z31.h +# CHECK-NEXT: - - - - - - 1.00 - - saddv d0, p7, z31.s +# CHECK-NEXT: - - - - - - 1.00 - - saddwb z0.h, z1.h, z2.b +# CHECK-NEXT: - - - - - - 1.00 - - saddwb z29.s, z30.s, z31.h +# CHECK-NEXT: - - - - - - 1.00 - - saddwb z31.d, z31.d, z31.s +# CHECK-NEXT: - - - - - - 1.00 - - saddwt z0.h, z1.h, z2.b +# CHECK-NEXT: - - - - - - 1.00 - - saddwt z29.s, z30.s, z31.h +# CHECK-NEXT: - - - - - - 1.00 - - saddwt z31.d, z31.d, z31.s +# CHECK-NEXT: - - - - - - 1.00 - - sbclb z0.d, z1.d, z31.d +# CHECK-NEXT: - - - - - - 1.00 - - sbclb z0.s, z1.s, z31.s +# CHECK-NEXT: - - - - - - 1.00 - - sbclt z0.d, z1.d, z31.d +# CHECK-NEXT: - - - - - - 1.00 - - sbclt z0.s, z1.s, z31.s +# CHECK-NEXT: - - - - - - 1.00 - - scvtf z0.d, p0/m, z0.d +# CHECK-NEXT: - - - - - - 1.00 - - scvtf z0.d, p0/m, z0.s +# CHECK-NEXT: - - - - - - 1.00 - - scvtf z0.h, p0/m, z0.d +# CHECK-NEXT: - - - - - - 1.00 - - scvtf z0.h, p0/m, z0.h +# CHECK-NEXT: - - - - - - 1.00 - - scvtf z0.h, p0/m, z0.s +# CHECK-NEXT: - - - - - - 1.00 - - scvtf z0.s, p0/m, z0.d +# CHECK-NEXT: - - - - - - 1.00 - - scvtf z0.s, p0/m, z0.s +# CHECK-NEXT: - - - - - - - - 23.00 sdiv z0.d, p7/m, z0.d, z31.d +# CHECK-NEXT: - - - - - - - - 12.00 sdiv z0.s, p7/m, z0.s, z31.s +# CHECK-NEXT: - - - - - - - - 23.00 sdivr z0.d, p7/m, z0.d, z31.d +# CHECK-NEXT: - - - - - - - - 12.00 sdivr z0.s, p7/m, z0.s, z31.s +# CHECK-NEXT: - - - - - - - 1.00 - sdot z0.d, z1.h, z15.h[1] +# CHECK-NEXT: - - - - - - - 1.00 - sdot z0.d, z1.h, z31.h +# CHECK-NEXT: - - - - - - - 1.00 - sdot z0.s, z1.b, z31.b +# CHECK-NEXT: - - - - - - - 1.00 - sdot z0.s, z1.b, z7.b[3] +# CHECK-NEXT: - - - - - - 1.00 - - sel z23.b, p11, z13.b, z8.b +# CHECK-NEXT: - - - - - - 1.00 - - sel z23.d, p11, z13.d, z8.d +# CHECK-NEXT: - - - - - - 1.00 - - sel z23.h, p11, z13.h, z8.h +# CHECK-NEXT: - - - - - - 1.00 - - sel z23.s, p11, z13.s, z8.s +# CHECK-NEXT: 1.00 - - - - - - - - setffr +# CHECK-NEXT: - - - - - - 1.00 - - shadd z0.b, p0/m, z0.b, z1.b +# CHECK-NEXT: - - - - - - 1.00 - - shadd z0.h, p0/m, z0.h, z1.h +# CHECK-NEXT: - - - - - - 1.00 - - shadd z29.s, p7/m, z29.s, z30.s +# CHECK-NEXT: - - - - - - 1.00 - - shadd z31.d, p7/m, z31.d, z30.d +# CHECK-NEXT: - - - - - - 1.00 - - shrnb z0.b, z0.h, #1 +# CHECK-NEXT: - - - - - - 1.00 - - shrnb z0.h, z0.s, #1 +# CHECK-NEXT: - - - - - - 1.00 - - shrnb z0.s, z0.d, #1 +# CHECK-NEXT: - - - - - - 1.00 - - shrnb z31.b, z31.h, #8 +# CHECK-NEXT: - - - - - - 1.00 - - shrnb z31.h, z31.s, #16 +# CHECK-NEXT: - - - - - - 1.00 - - shrnb z31.s, z31.d, #32 +# CHECK-NEXT: - - - - - - 1.00 - - shrnt z0.b, z0.h, #1 +# CHECK-NEXT: - - - - - - 1.00 - - shrnt z0.h, z0.s, #1 +# CHECK-NEXT: - - - - - - 1.00 - - shrnt z0.s, z0.d, #1 +# CHECK-NEXT: - - - - - - 1.00 - - shrnt z31.b, z31.h, #8 +# CHECK-NEXT: - - - - - - 1.00 - - shrnt z31.h, z31.s, #16 +# CHECK-NEXT: - - - - - - 1.00 - - shrnt z31.s, z31.d, #32 +# CHECK-NEXT: - - - - - - 1.00 - - shsub z0.b, p0/m, z0.b, z1.b +# CHECK-NEXT: - - - - - - 1.00 - - shsub z0.h, p0/m, z0.h, z1.h +# CHECK-NEXT: - - - - - - 1.00 - - shsub z29.s, p7/m, z29.s, z30.s +# CHECK-NEXT: - - - - - - 1.00 - - shsub z31.d, p7/m, z31.d, z30.d +# CHECK-NEXT: - - - - - - 1.00 - - shsubr z0.b, p0/m, z0.b, z1.b +# CHECK-NEXT: - - - - - - 1.00 - - shsubr z0.h, p0/m, z0.h, z1.h +# CHECK-NEXT: - - - - - - 1.00 - - shsubr z29.s, p7/m, z29.s, z30.s +# CHECK-NEXT: - - - - - - 1.00 - - shsubr z31.d, p7/m, z31.d, z30.d +# CHECK-NEXT: - - - - - - 1.00 - - sli z0.b, z0.b, #0 +# CHECK-NEXT: - - - - - - 1.00 - - sli z0.d, z0.d, #0 +# CHECK-NEXT: - - - - - - 1.00 - - sli z0.h, z0.h, #0 +# CHECK-NEXT: - - - - - - 1.00 - - sli z0.s, z0.s, #0 +# CHECK-NEXT: - - - - - - 1.00 - - sli z31.b, z31.b, #7 +# CHECK-NEXT: - - - - - - 1.00 - - sli z31.d, z31.d, #63 +# CHECK-NEXT: - - - - - - 1.00 - - sli z31.h, z31.h, #15 +# CHECK-NEXT: - - - - - - 1.00 - - sli z31.s, z31.s, #31 +# CHECK-NEXT: - - - - - - - - 1.00 sm4e z0.s, z0.s, z31.s +# CHECK-NEXT: - - - - - - - - 1.00 sm4ekey z0.s, z1.s, z31.s +# CHECK-NEXT: - - - - - - 1.00 - - smax z0.b, z0.b, #-128 +# CHECK-NEXT: - - - - - - 1.00 - - smax z0.d, z0.d, #-128 +# CHECK-NEXT: - - - - - - 1.00 - - smax z0.h, z0.h, #-128 +# CHECK-NEXT: - - - - - - 1.00 - - smax z0.s, z0.s, #-128 +# CHECK-NEXT: - - - - - - 1.00 - - smax z31.b, p7/m, z31.b, z31.b +# CHECK-NEXT: - - - - - - 1.00 - - smax z31.b, z31.b, #127 +# CHECK-NEXT: - - - - - - 1.00 - - smax z31.d, p7/m, z31.d, z31.d +# CHECK-NEXT: - - - - - - 1.00 - - smax z31.d, z31.d, #127 +# CHECK-NEXT: - - - - - - 1.00 - - smax z31.h, p7/m, z31.h, z31.h +# CHECK-NEXT: - - - - - - 1.00 - - smax z31.h, z31.h, #127 +# CHECK-NEXT: - - - - - - 1.00 - - smax z31.s, p7/m, z31.s, z31.s +# CHECK-NEXT: - - - - - - 1.00 - - smax z31.s, z31.s, #127 +# CHECK-NEXT: - - - - - - 1.00 - - smaxp z0.b, p0/m, z0.b, z1.b +# CHECK-NEXT: - - - - - - 1.00 - - smaxp z0.h, p0/m, z0.h, z1.h +# CHECK-NEXT: - - - - - - 1.00 - - smaxp z29.s, p7/m, z29.s, z30.s +# CHECK-NEXT: - - - - - - 1.00 - - smaxp z31.d, p7/m, z31.d, z30.d +# CHECK-NEXT: - - - - - - 1.00 - - smaxv b0, p7, z31.b +# CHECK-NEXT: - - - - - - 1.00 - - smaxv d0, p7, z31.d +# CHECK-NEXT: - - - - - - 1.00 - - smaxv h0, p7, z31.h +# CHECK-NEXT: - - - - - - 1.00 - - smaxv s0, p7, z31.s +# CHECK-NEXT: - - - - - - 1.00 - - smin z0.b, z0.b, #-128 +# CHECK-NEXT: - - - - - - 1.00 - - smin z0.d, z0.d, #-128 +# CHECK-NEXT: - - - - - - 1.00 - - smin z0.h, z0.h, #-128 +# CHECK-NEXT: - - - - - - 1.00 - - smin z0.s, z0.s, #-128 +# CHECK-NEXT: - - - - - - 1.00 - - smin z31.b, p7/m, z31.b, z31.b +# CHECK-NEXT: - - - - - - 1.00 - - smin z31.b, z31.b, #127 +# CHECK-NEXT: - - - - - - 1.00 - - smin z31.d, p7/m, z31.d, z31.d +# CHECK-NEXT: - - - - - - 1.00 - - smin z31.d, z31.d, #127 +# CHECK-NEXT: - - - - - - 1.00 - - smin z31.h, p7/m, z31.h, z31.h +# CHECK-NEXT: - - - - - - 1.00 - - smin z31.h, z31.h, #127 +# CHECK-NEXT: - - - - - - 1.00 - - smin z31.s, p7/m, z31.s, z31.s +# CHECK-NEXT: - - - - - - 1.00 - - smin z31.s, z31.s, #127 +# CHECK-NEXT: - - - - - - 1.00 - - sminp z0.b, p0/m, z0.b, z1.b +# CHECK-NEXT: - - - - - - 1.00 - - sminp z0.h, p0/m, z0.h, z1.h +# CHECK-NEXT: - - - - - - 1.00 - - sminp z29.s, p7/m, z29.s, z30.s +# CHECK-NEXT: - - - - - - 1.00 - - sminp z31.d, p7/m, z31.d, z30.d +# CHECK-NEXT: - - - - - - 1.00 - - sminv b0, p7, z31.b +# CHECK-NEXT: - - - - - - 1.00 - - sminv d0, p7, z31.d +# CHECK-NEXT: - - - - - - 1.00 - - sminv h0, p7, z31.h +# CHECK-NEXT: - - - - - - 1.00 - - sminv s0, p7, z31.s +# CHECK-NEXT: - - - - - - - 1.00 - smlalb z0.d, z1.s, z15.s[1] +# CHECK-NEXT: - - - - - - - 1.00 - smlalb z0.d, z1.s, z31.s +# CHECK-NEXT: - - - - - - - 1.00 - smlalb z0.h, z1.b, z31.b +# CHECK-NEXT: - - - - - - - 1.00 - smlalb z0.s, z1.h, z31.h +# CHECK-NEXT: - - - - - - - 1.00 - smlalb z0.s, z1.h, z7.h[7] +# CHECK-NEXT: - - - - - - - 1.00 - smlalt z0.d, z1.s, z15.s[1] +# CHECK-NEXT: - - - - - - - 1.00 - smlalt z0.d, z1.s, z31.s +# CHECK-NEXT: - - - - - - - 1.00 - smlalt z0.h, z1.b, z31.b +# CHECK-NEXT: - - - - - - - 1.00 - smlalt z0.s, z1.h, z31.h +# CHECK-NEXT: - - - - - - - 1.00 - smlalt z0.s, z1.h, z7.h[7] +# CHECK-NEXT: - - - - - - - 1.00 - smlslb z0.d, z1.s, z15.s[1] +# CHECK-NEXT: - - - - - - - 1.00 - smlslb z0.d, z1.s, z31.s +# CHECK-NEXT: - - - - - - - 1.00 - smlslb z0.h, z1.b, z31.b +# CHECK-NEXT: - - - - - - - 1.00 - smlslb z0.s, z1.h, z31.h +# CHECK-NEXT: - - - - - - - 1.00 - smlslb z0.s, z1.h, z7.h[7] +# CHECK-NEXT: - - - - - - - 1.00 - smlslt z0.d, z1.s, z15.s[1] +# CHECK-NEXT: - - - - - - - 1.00 - smlslt z0.d, z1.s, z31.s +# CHECK-NEXT: - - - - - - - 1.00 - smlslt z0.h, z1.b, z31.b +# CHECK-NEXT: - - - - - - - 1.00 - smlslt z0.s, z1.h, z31.h +# CHECK-NEXT: - - - - - - - 1.00 - smlslt z0.s, z1.h, z7.h[7] +# CHECK-NEXT: - - - - - - - 1.00 - smmla z0.s, z1.b, z2.b +# CHECK-NEXT: - - - - - - - 1.00 - smulh z0.b, p7/m, z0.b, z31.b +# CHECK-NEXT: - - - - - - - 1.00 - smulh z0.b, z1.b, z2.b +# CHECK-NEXT: - - - - - - - 1.00 - smulh z0.d, p7/m, z0.d, z31.d +# CHECK-NEXT: - - - - - - - 1.00 - smulh z0.h, p7/m, z0.h, z31.h +# CHECK-NEXT: - - - - - - - 1.00 - smulh z0.h, z1.h, z2.h +# CHECK-NEXT: - - - - - - - 1.00 - smulh z0.s, p7/m, z0.s, z31.s +# CHECK-NEXT: - - - - - - - 1.00 - smulh z29.s, z30.s, z31.s +# CHECK-NEXT: - - - - - - - 1.00 - smulh z31.d, z31.d, z31.d +# CHECK-NEXT: - - - - - - - 1.00 - smullb z0.d, z1.s, z15.s[1] +# CHECK-NEXT: - - - - - - - 1.00 - smullb z0.h, z1.b, z2.b +# CHECK-NEXT: - - - - - - - 1.00 - smullb z0.s, z1.h, z7.h[7] +# CHECK-NEXT: - - - - - - - 1.00 - smullb z29.s, z30.h, z31.h +# CHECK-NEXT: - - - - - - - 1.00 - smullb z31.d, z31.s, z31.s +# CHECK-NEXT: - - - - - - - 1.00 - smullt z0.d, z1.s, z15.s[1] +# CHECK-NEXT: - - - - - - - 1.00 - smullt z0.h, z1.b, z2.b +# CHECK-NEXT: - - - - - - - 1.00 - smullt z0.s, z1.h, z7.h[7] +# CHECK-NEXT: - - - - - - - 1.00 - smullt z29.s, z30.h, z31.h +# CHECK-NEXT: - - - - - - - 1.00 - smullt z31.d, z31.s, z31.s +# CHECK-NEXT: - - - - - - 1.00 - - splice z29.b, p7, { z30.b, z31.b } +# CHECK-NEXT: - - - - - - 1.00 - - splice z29.d, p7, { z30.d, z31.d } +# CHECK-NEXT: - - - - - - 1.00 - - splice z29.h, p7, { z30.h, z31.h } +# CHECK-NEXT: - - - - - - 1.00 - - splice z29.s, p7, { z30.s, z31.s } +# CHECK-NEXT: - - - - - - 1.00 - - splice z31.b, p7, z31.b, z31.b +# CHECK-NEXT: - - - - - - 1.00 - - splice z31.d, p7, z31.d, z31.d +# CHECK-NEXT: - - - - - - 1.00 - - splice z31.h, p7, z31.h, z31.h +# CHECK-NEXT: - - - - - - 1.00 - - splice z31.s, p7, z31.s, z31.s +# CHECK-NEXT: - - - - - - 1.00 - - sqabs z31.b, p7/m, z31.b +# CHECK-NEXT: - - - - - - 1.00 - - sqabs z31.d, p7/m, z31.d +# CHECK-NEXT: - - - - - - 1.00 - - sqabs z31.h, p7/m, z31.h +# CHECK-NEXT: - - - - - - 1.00 - - sqabs z31.s, p7/m, z31.s +# CHECK-NEXT: - - - - - - 1.00 - - sqadd z0.b, p0/m, z0.b, z1.b +# CHECK-NEXT: - - - - - - 1.00 - - sqadd z0.b, z0.b, #0 +# CHECK-NEXT: - - - - - - 1.00 - - sqadd z0.b, z0.b, z0.b +# CHECK-NEXT: - - - - - - 1.00 - - sqadd z0.d, z0.d, #0 +# CHECK-NEXT: - - - - - - 1.00 - - sqadd z0.d, z0.d, #0, lsl #8 +# CHECK-NEXT: - - - - - - 1.00 - - sqadd z0.d, z0.d, z0.d +# CHECK-NEXT: - - - - - - 1.00 - - sqadd z0.h, p0/m, z0.h, z1.h +# CHECK-NEXT: - - - - - - 1.00 - - sqadd z0.h, z0.h, #0 +# CHECK-NEXT: - - - - - - 1.00 - - sqadd z0.h, z0.h, #0, lsl #8 +# CHECK-NEXT: - - - - - - 1.00 - - sqadd z0.h, z0.h, z0.h +# CHECK-NEXT: - - - - - - 1.00 - - sqadd z0.s, z0.s, #0 +# CHECK-NEXT: - - - - - - 1.00 - - sqadd z0.s, z0.s, #0, lsl #8 +# CHECK-NEXT: - - - - - - 1.00 - - sqadd z0.s, z0.s, z0.s +# CHECK-NEXT: - - - - - - 1.00 - - sqadd z29.s, p7/m, z29.s, z30.s +# CHECK-NEXT: - - - - - - 1.00 - - sqadd z31.b, z31.b, #255 +# CHECK-NEXT: - - - - - - 1.00 - - sqadd z31.d, p7/m, z31.d, z30.d +# CHECK-NEXT: - - - - - - 1.00 - - sqadd z31.d, z31.d, #65280 +# CHECK-NEXT: - - - - - - 1.00 - - sqadd z31.h, z31.h, #65280 +# CHECK-NEXT: - - - - - - 1.00 - - sqadd z31.s, z31.s, #65280 +# CHECK-NEXT: - - - - - - 1.00 - - sqcadd z0.b, z0.b, z0.b, #90 +# CHECK-NEXT: - - - - - - 1.00 - - sqcadd z0.d, z0.d, z0.d, #90 +# CHECK-NEXT: - - - - - - 1.00 - - sqcadd z0.h, z0.h, z0.h, #90 +# CHECK-NEXT: - - - - - - 1.00 - - sqcadd z0.s, z0.s, z0.s, #90 +# CHECK-NEXT: - - - - - - 1.00 - - sqcadd z31.b, z31.b, z31.b, #270 +# CHECK-NEXT: - - - - - - 1.00 - - sqcadd z31.d, z31.d, z31.d, #270 +# CHECK-NEXT: - - - - - - 1.00 - - sqcadd z31.h, z31.h, z31.h, #270 +# CHECK-NEXT: - - - - - - 1.00 - - sqcadd z31.s, z31.s, z31.s, #270 +# CHECK-NEXT: 1.00 - - - - - - - - sqdecb x0 +# CHECK-NEXT: 1.00 - - - - - - - - sqdecb x0, #14 +# CHECK-NEXT: 1.00 - - - - - - - - sqdecb x0, all, mul #16 +# CHECK-NEXT: 1.00 - - - - - - - - sqdecb x0, pow2 +# CHECK-NEXT: 1.00 - - - - - - - - sqdecb x0, vl1 +# CHECK-NEXT: 1.00 - - - - - - - - sqdecb x0, w0 +# CHECK-NEXT: 1.00 - - - - - - - - sqdecb x0, w0, all, mul #16 +# CHECK-NEXT: 1.00 - - - - - - - - sqdecb x0, w0, pow2 +# CHECK-NEXT: 1.00 - - - - - - - - sqdecb x0, w0, pow2, mul #16 +# CHECK-NEXT: 1.00 - - - - - - - - sqdecd x0 +# CHECK-NEXT: 1.00 - - - - - - - - sqdecd x0, #14 +# CHECK-NEXT: 1.00 - - - - - - - - sqdecd x0, all, mul #16 +# CHECK-NEXT: 1.00 - - - - - - - - sqdecd x0, pow2 +# CHECK-NEXT: 1.00 - - - - - - - - sqdecd x0, vl1 +# CHECK-NEXT: 1.00 - - - - - - - - sqdecd x0, w0 +# CHECK-NEXT: 1.00 - - - - - - - - sqdecd x0, w0, all, mul #16 +# CHECK-NEXT: 1.00 - - - - - - - - sqdecd x0, w0, pow2 +# CHECK-NEXT: 1.00 - - - - - - - - sqdecd x0, w0, pow2, mul #16 +# CHECK-NEXT: - - - - - - 1.00 - - sqdecd z0.d +# CHECK-NEXT: - - - - - - 1.00 - - sqdecd z0.d, all, mul #16 +# CHECK-NEXT: - - - - - - 1.00 - - sqdecd z0.d, pow2 +# CHECK-NEXT: - - - - - - 1.00 - - sqdecd z0.d, pow2, mul #16 +# CHECK-NEXT: 1.00 - - - - - - - - sqdech x0 +# CHECK-NEXT: 1.00 - - - - - - - - sqdech x0, #14 +# CHECK-NEXT: 1.00 - - - - - - - - sqdech x0, all, mul #16 +# CHECK-NEXT: 1.00 - - - - - - - - sqdech x0, pow2 +# CHECK-NEXT: 1.00 - - - - - - - - sqdech x0, vl1 +# CHECK-NEXT: 1.00 - - - - - - - - sqdech x0, w0 +# CHECK-NEXT: 1.00 - - - - - - - - sqdech x0, w0, all, mul #16 +# CHECK-NEXT: 1.00 - - - - - - - - sqdech x0, w0, pow2 +# CHECK-NEXT: 1.00 - - - - - - - - sqdech x0, w0, pow2, mul #16 +# CHECK-NEXT: - - - - - - 1.00 - - sqdech z0.h +# CHECK-NEXT: - - - - - - 1.00 - - sqdech z0.h, all, mul #16 +# CHECK-NEXT: - - - - - - 1.00 - - sqdech z0.h, pow2 +# CHECK-NEXT: - - - - - - 1.00 - - sqdech z0.h, pow2, mul #16 +# CHECK-NEXT: - - - - - - 1.00 - - sqdecp x0, p0.b +# CHECK-NEXT: - - - - - - 1.00 - - sqdecp x0, p0.d +# CHECK-NEXT: - - - - - - 1.00 - - sqdecp x0, p0.h +# CHECK-NEXT: - - - - - - 1.00 - - sqdecp x0, p0.s +# CHECK-NEXT: - - - - - - 1.00 - - sqdecp xzr, p15.b, wzr +# CHECK-NEXT: - - - - - - 1.00 - - sqdecp xzr, p15.d, wzr +# CHECK-NEXT: - - - - - - 1.00 - - sqdecp xzr, p15.h, wzr +# CHECK-NEXT: - - - - - - 1.00 - - sqdecp xzr, p15.s, wzr +# CHECK-NEXT: - - - - - - 1.00 - - sqdecp z0.d, p0.d +# CHECK-NEXT: - - - - - - 1.00 - - sqdecp z0.h, p0.h +# CHECK-NEXT: - - - - - - 1.00 - - sqdecp z0.s, p0.s +# CHECK-NEXT: 1.00 - - - - - - - - sqdecw x0 +# CHECK-NEXT: 1.00 - - - - - - - - sqdecw x0, #14 +# CHECK-NEXT: 1.00 - - - - - - - - sqdecw x0, all, mul #16 +# CHECK-NEXT: 1.00 - - - - - - - - sqdecw x0, pow2 +# CHECK-NEXT: 1.00 - - - - - - - - sqdecw x0, vl1 +# CHECK-NEXT: 1.00 - - - - - - - - sqdecw x0, w0 +# CHECK-NEXT: 1.00 - - - - - - - - sqdecw x0, w0, all, mul #16 +# CHECK-NEXT: 1.00 - - - - - - - - sqdecw x0, w0, pow2 +# CHECK-NEXT: 1.00 - - - - - - - - sqdecw x0, w0, pow2, mul #16 +# CHECK-NEXT: - - - - - - 1.00 - - sqdecw z0.s +# CHECK-NEXT: - - - - - - 1.00 - - sqdecw z0.s, all, mul #16 +# CHECK-NEXT: - - - - - - 1.00 - - sqdecw z0.s, pow2 +# CHECK-NEXT: - - - - - - 1.00 - - sqdecw z0.s, pow2, mul #16 +# CHECK-NEXT: - - - - - - - 1.00 - sqdmlalb z0.d, z1.s, z15.s[3] +# CHECK-NEXT: - - - - - - - 1.00 - sqdmlalb z0.d, z1.s, z31.s +# CHECK-NEXT: - - - - - - - 1.00 - sqdmlalb z0.h, z1.b, z31.b +# CHECK-NEXT: - - - - - - - 1.00 - sqdmlalb z0.s, z1.h, z31.h +# CHECK-NEXT: - - - - - - - 1.00 - sqdmlalb z0.s, z1.h, z7.h[7] +# CHECK-NEXT: - - - - - - - 1.00 - sqdmlalbt z0.d, z1.s, z31.s +# CHECK-NEXT: - - - - - - - 1.00 - sqdmlalbt z0.h, z1.b, z31.b +# CHECK-NEXT: - - - - - - - 1.00 - sqdmlalbt z0.s, z1.h, z31.h +# CHECK-NEXT: - - - - - - - 1.00 - sqdmlalt z0.d, z1.s, z15.s[3] +# CHECK-NEXT: - - - - - - - 1.00 - sqdmlalt z0.d, z1.s, z31.s +# CHECK-NEXT: - - - - - - - 1.00 - sqdmlalt z0.h, z1.b, z31.b +# CHECK-NEXT: - - - - - - - 1.00 - sqdmlalt z0.s, z1.h, z31.h +# CHECK-NEXT: - - - - - - - 1.00 - sqdmlalt z0.s, z1.h, z7.h[7] +# CHECK-NEXT: - - - - - - - 1.00 - sqdmlslb z0.d, z1.s, z15.s[3] +# CHECK-NEXT: - - - - - - - 1.00 - sqdmlslb z0.d, z1.s, z31.s +# CHECK-NEXT: - - - - - - - 1.00 - sqdmlslb z0.h, z1.b, z31.b +# CHECK-NEXT: - - - - - - - 1.00 - sqdmlslb z0.s, z1.h, z31.h +# CHECK-NEXT: - - - - - - - 1.00 - sqdmlslb z0.s, z1.h, z7.h[7] +# CHECK-NEXT: - - - - - - - 1.00 - sqdmlslbt z0.d, z1.s, z31.s +# CHECK-NEXT: - - - - - - - 1.00 - sqdmlslbt z0.h, z1.b, z31.b +# CHECK-NEXT: - - - - - - - 1.00 - sqdmlslbt z0.s, z1.h, z31.h +# CHECK-NEXT: - - - - - - - 1.00 - sqdmlslt z0.d, z1.s, z15.s[3] +# CHECK-NEXT: - - - - - - - 1.00 - sqdmlslt z0.d, z1.s, z31.s +# CHECK-NEXT: - - - - - - - 1.00 - sqdmlslt z0.h, z1.b, z31.b +# CHECK-NEXT: - - - - - - - 1.00 - sqdmlslt z0.s, z1.h, z31.h +# CHECK-NEXT: - - - - - - - 1.00 - sqdmlslt z0.s, z1.h, z7.h[7] +# CHECK-NEXT: - - - - - - - 1.00 - sqdmulh z0.b, z1.b, z2.b +# CHECK-NEXT: - - - - - - - 1.00 - sqdmulh z0.d, z1.d, z15.d[1] +# CHECK-NEXT: - - - - - - - 1.00 - sqdmulh z0.h, z1.h, z2.h +# CHECK-NEXT: - - - - - - - 1.00 - sqdmulh z0.h, z1.h, z7.h[7] +# CHECK-NEXT: - - - - - - - 1.00 - sqdmulh z0.s, z1.s, z7.s[3] +# CHECK-NEXT: - - - - - - - 1.00 - sqdmulh z29.s, z30.s, z31.s +# CHECK-NEXT: - - - - - - - 1.00 - sqdmulh z31.d, z31.d, z31.d +# CHECK-NEXT: - - - - - - - 1.00 - sqdmullb z0.d, z1.s, z15.s[1] +# CHECK-NEXT: - - - - - - - 1.00 - sqdmullb z0.h, z1.b, z2.b +# CHECK-NEXT: - - - - - - - 1.00 - sqdmullb z0.s, z1.h, z7.h[7] +# CHECK-NEXT: - - - - - - - 1.00 - sqdmullb z29.s, z30.h, z31.h +# CHECK-NEXT: - - - - - - - 1.00 - sqdmullb z31.d, z31.s, z31.s +# CHECK-NEXT: - - - - - - - 1.00 - sqdmullt z0.d, z1.s, z15.s[1] +# CHECK-NEXT: - - - - - - - 1.00 - sqdmullt z0.h, z1.b, z2.b +# CHECK-NEXT: - - - - - - - 1.00 - sqdmullt z0.s, z1.h, z7.h[7] +# CHECK-NEXT: - - - - - - - 1.00 - sqdmullt z29.s, z30.h, z31.h +# CHECK-NEXT: - - - - - - - 1.00 - sqdmullt z31.d, z31.s, z31.s +# CHECK-NEXT: 1.00 - - - - - - - - sqincb x0 +# CHECK-NEXT: 1.00 - - - - - - - - sqincb x0, #14 +# CHECK-NEXT: 1.00 - - - - - - - - sqincb x0, all, mul #16 +# CHECK-NEXT: 1.00 - - - - - - - - sqincb x0, pow2 +# CHECK-NEXT: 1.00 - - - - - - - - sqincb x0, vl1 +# CHECK-NEXT: 1.00 - - - - - - - - sqincb x0, w0 +# CHECK-NEXT: 1.00 - - - - - - - - sqincb x0, w0, all, mul #16 +# CHECK-NEXT: 1.00 - - - - - - - - sqincb x0, w0, pow2 +# CHECK-NEXT: 1.00 - - - - - - - - sqincb x0, w0, pow2, mul #16 +# CHECK-NEXT: 1.00 - - - - - - - - sqincd x0 +# CHECK-NEXT: 1.00 - - - - - - - - sqincd x0, #14 +# CHECK-NEXT: 1.00 - - - - - - - - sqincd x0, all, mul #16 +# CHECK-NEXT: 1.00 - - - - - - - - sqincd x0, pow2 +# CHECK-NEXT: 1.00 - - - - - - - - sqincd x0, vl1 +# CHECK-NEXT: 1.00 - - - - - - - - sqincd x0, w0 +# CHECK-NEXT: 1.00 - - - - - - - - sqincd x0, w0, all, mul #16 +# CHECK-NEXT: 1.00 - - - - - - - - sqincd x0, w0, pow2 +# CHECK-NEXT: 1.00 - - - - - - - - sqincd x0, w0, pow2, mul #16 +# CHECK-NEXT: - - - - - - 1.00 - - sqincd z0.d +# CHECK-NEXT: - - - - - - 1.00 - - sqincd z0.d, all, mul #16 +# CHECK-NEXT: - - - - - - 1.00 - - sqincd z0.d, pow2 +# CHECK-NEXT: - - - - - - 1.00 - - sqincd z0.d, pow2, mul #16 +# CHECK-NEXT: 1.00 - - - - - - - - sqinch x0 +# CHECK-NEXT: 1.00 - - - - - - - - sqinch x0, #14 +# CHECK-NEXT: 1.00 - - - - - - - - sqinch x0, all, mul #16 +# CHECK-NEXT: 1.00 - - - - - - - - sqinch x0, pow2 +# CHECK-NEXT: 1.00 - - - - - - - - sqinch x0, vl1 +# CHECK-NEXT: 1.00 - - - - - - - - sqinch x0, w0 +# CHECK-NEXT: 1.00 - - - - - - - - sqinch x0, w0, all, mul #16 +# CHECK-NEXT: 1.00 - - - - - - - - sqinch x0, w0, pow2 +# CHECK-NEXT: 1.00 - - - - - - - - sqinch x0, w0, pow2, mul #16 +# CHECK-NEXT: - - - - - - 1.00 - - sqinch z0.h +# CHECK-NEXT: - - - - - - 1.00 - - sqinch z0.h, all, mul #16 +# CHECK-NEXT: - - - - - - 1.00 - - sqinch z0.h, pow2 +# CHECK-NEXT: - - - - - - 1.00 - - sqinch z0.h, pow2, mul #16 +# CHECK-NEXT: - - - - - - 1.00 - - sqincp x0, p0.b +# CHECK-NEXT: - - - - - - 1.00 - - sqincp x0, p0.d +# CHECK-NEXT: - - - - - - 1.00 - - sqincp x0, p0.h +# CHECK-NEXT: - - - - - - 1.00 - - sqincp x0, p0.s +# CHECK-NEXT: - - - - - - 1.00 - - sqincp xzr, p15.b, wzr +# CHECK-NEXT: - - - - - - 1.00 - - sqincp xzr, p15.d, wzr +# CHECK-NEXT: - - - - - - 1.00 - - sqincp xzr, p15.h, wzr +# CHECK-NEXT: - - - - - - 1.00 - - sqincp xzr, p15.s, wzr +# CHECK-NEXT: - - - - - - 1.00 - - sqincp z0.d, p0.d +# CHECK-NEXT: - - - - - - 1.00 - - sqincp z0.h, p0.h +# CHECK-NEXT: - - - - - - 1.00 - - sqincp z0.s, p0.s +# CHECK-NEXT: 1.00 - - - - - - - - sqincw x0 +# CHECK-NEXT: 1.00 - - - - - - - - sqincw x0, #14 +# CHECK-NEXT: 1.00 - - - - - - - - sqincw x0, all, mul #16 +# CHECK-NEXT: 1.00 - - - - - - - - sqincw x0, pow2 +# CHECK-NEXT: 1.00 - - - - - - - - sqincw x0, vl1 +# CHECK-NEXT: 1.00 - - - - - - - - sqincw x0, w0 +# CHECK-NEXT: 1.00 - - - - - - - - sqincw x0, w0, all, mul #16 +# CHECK-NEXT: 1.00 - - - - - - - - sqincw x0, w0, pow2 +# CHECK-NEXT: 1.00 - - - - - - - - sqincw x0, w0, pow2, mul #16 +# CHECK-NEXT: - - - - - - 1.00 - - sqincw z0.s +# CHECK-NEXT: - - - - - - 1.00 - - sqincw z0.s, all, mul #16 +# CHECK-NEXT: - - - - - - 1.00 - - sqincw z0.s, pow2 +# CHECK-NEXT: - - - - - - 1.00 - - sqincw z0.s, pow2, mul #16 +# CHECK-NEXT: - - - - - - 1.00 - - sqneg z31.b, p7/m, z31.b +# CHECK-NEXT: - - - - - - 1.00 - - sqneg z31.d, p7/m, z31.d +# CHECK-NEXT: - - - - - - 1.00 - - sqneg z31.h, p7/m, z31.h +# CHECK-NEXT: - - - - - - 1.00 - - sqneg z31.s, p7/m, z31.s +# CHECK-NEXT: - - - - - - - 1.00 - sqrdcmlah z0.b, z1.b, z2.b, #0 +# CHECK-NEXT: - - - - - - - 1.00 - sqrdcmlah z0.d, z1.d, z2.d, #0 +# CHECK-NEXT: - - - - - - - 1.00 - sqrdcmlah z0.h, z1.h, z2.h, #0 +# CHECK-NEXT: - - - - - - - 1.00 - sqrdcmlah z0.h, z1.h, z2.h[0], #0 +# CHECK-NEXT: - - - - - - - 1.00 - sqrdcmlah z0.s, z1.s, z2.s, #0 +# CHECK-NEXT: - - - - - - - 1.00 - sqrdcmlah z0.s, z1.s, z2.s[0], #0 +# CHECK-NEXT: - - - - - - - 1.00 - sqrdcmlah z15.b, z16.b, z17.b, #270 +# CHECK-NEXT: - - - - - - - 1.00 - sqrdcmlah z15.d, z16.d, z17.d, #270 +# CHECK-NEXT: - - - - - - - 1.00 - sqrdcmlah z15.h, z16.h, z17.h, #270 +# CHECK-NEXT: - - - - - - - 1.00 - sqrdcmlah z15.s, z16.s, z17.s, #270 +# CHECK-NEXT: - - - - - - - 1.00 - sqrdcmlah z29.b, z30.b, z31.b, #90 +# CHECK-NEXT: - - - - - - - 1.00 - sqrdcmlah z29.d, z30.d, z31.d, #90 +# CHECK-NEXT: - - - - - - - 1.00 - sqrdcmlah z29.h, z30.h, z31.h, #90 +# CHECK-NEXT: - - - - - - - 1.00 - sqrdcmlah z29.s, z30.s, z31.s, #90 +# CHECK-NEXT: - - - - - - - 1.00 - sqrdcmlah z31.b, z31.b, z31.b, #180 +# CHECK-NEXT: - - - - - - - 1.00 - sqrdcmlah z31.d, z31.d, z31.d, #180 +# CHECK-NEXT: - - - - - - - 1.00 - sqrdcmlah z31.h, z30.h, z7.h[0], #180 +# CHECK-NEXT: - - - - - - - 1.00 - sqrdcmlah z31.h, z31.h, z31.h, #180 +# CHECK-NEXT: - - - - - - - 1.00 - sqrdcmlah z31.s, z30.s, z7.s[0], #180 +# CHECK-NEXT: - - - - - - - 1.00 - sqrdcmlah z31.s, z31.s, z31.s, #180 +# CHECK-NEXT: - - - - - - - 1.00 - sqrdmlah z0.b, z1.b, z31.b +# CHECK-NEXT: - - - - - - - 1.00 - sqrdmlah z0.d, z1.d, z15.d[1] +# CHECK-NEXT: - - - - - - - 1.00 - sqrdmlah z0.d, z1.d, z31.d +# CHECK-NEXT: - - - - - - - 1.00 - sqrdmlah z0.h, z1.h, z31.h +# CHECK-NEXT: - - - - - - - 1.00 - sqrdmlah z0.h, z1.h, z7.h[7] +# CHECK-NEXT: - - - - - - - 1.00 - sqrdmlah z0.s, z1.s, z31.s +# CHECK-NEXT: - - - - - - - 1.00 - sqrdmlah z0.s, z1.s, z7.s[3] +# CHECK-NEXT: - - - - - - - 1.00 - sqrdmlsh z0.b, z1.b, z31.b +# CHECK-NEXT: - - - - - - - 1.00 - sqrdmlsh z0.d, z1.d, z15.d[1] +# CHECK-NEXT: - - - - - - - 1.00 - sqrdmlsh z0.d, z1.d, z31.d +# CHECK-NEXT: - - - - - - - 1.00 - sqrdmlsh z0.h, z1.h, z31.h +# CHECK-NEXT: - - - - - - - 1.00 - sqrdmlsh z0.h, z1.h, z7.h[7] +# CHECK-NEXT: - - - - - - - 1.00 - sqrdmlsh z0.s, z1.s, z31.s +# CHECK-NEXT: - - - - - - - 1.00 - sqrdmlsh z0.s, z1.s, z7.s[3] +# CHECK-NEXT: - - - - - - - 1.00 - sqrdmulh z0.b, z1.b, z2.b +# CHECK-NEXT: - - - - - - - 1.00 - sqrdmulh z0.d, z1.d, z15.d[1] +# CHECK-NEXT: - - - - - - - 1.00 - sqrdmulh z0.h, z1.h, z2.h +# CHECK-NEXT: - - - - - - - 1.00 - sqrdmulh z0.h, z1.h, z7.h[7] +# CHECK-NEXT: - - - - - - - 1.00 - sqrdmulh z0.s, z1.s, z7.s[3] +# CHECK-NEXT: - - - - - - - 1.00 - sqrdmulh z29.s, z30.s, z31.s +# CHECK-NEXT: - - - - - - - 1.00 - sqrdmulh z31.d, z31.d, z31.d +# CHECK-NEXT: - - - - - - 1.00 - - sqrshl z0.b, p0/m, z0.b, z1.b +# CHECK-NEXT: - - - - - - 1.00 - - sqrshl z0.h, p0/m, z0.h, z1.h +# CHECK-NEXT: - - - - - - 1.00 - - sqrshl z29.s, p7/m, z29.s, z30.s +# CHECK-NEXT: - - - - - - 1.00 - - sqrshl z31.d, p7/m, z31.d, z30.d +# CHECK-NEXT: - - - - - - 1.00 - - sqrshlr z0.b, p0/m, z0.b, z1.b +# CHECK-NEXT: - - - - - - 1.00 - - sqrshlr z0.h, p0/m, z0.h, z1.h +# CHECK-NEXT: - - - - - - 1.00 - - sqrshlr z29.s, p7/m, z29.s, z30.s +# CHECK-NEXT: - - - - - - 1.00 - - sqrshlr z31.d, p7/m, z31.d, z30.d +# CHECK-NEXT: - - - - - - 1.00 - - sqrshrnb z0.b, z0.h, #1 +# CHECK-NEXT: - - - - - - 1.00 - - sqrshrnb z0.h, z0.s, #1 +# CHECK-NEXT: - - - - - - 1.00 - - sqrshrnb z0.s, z0.d, #1 +# CHECK-NEXT: - - - - - - 1.00 - - sqrshrnb z31.b, z31.h, #8 +# CHECK-NEXT: - - - - - - 1.00 - - sqrshrnb z31.h, z31.s, #16 +# CHECK-NEXT: - - - - - - 1.00 - - sqrshrnb z31.s, z31.d, #32 +# CHECK-NEXT: - - - - - - 1.00 - - sqrshrnt z0.b, z0.h, #1 +# CHECK-NEXT: - - - - - - 1.00 - - sqrshrnt z0.h, z0.s, #1 +# CHECK-NEXT: - - - - - - 1.00 - - sqrshrnt z0.s, z0.d, #1 +# CHECK-NEXT: - - - - - - 1.00 - - sqrshrnt z31.b, z31.h, #8 +# CHECK-NEXT: - - - - - - 1.00 - - sqrshrnt z31.h, z31.s, #16 +# CHECK-NEXT: - - - - - - 1.00 - - sqrshrnt z31.s, z31.d, #32 +# CHECK-NEXT: - - - - - - 1.00 - - sqrshrunb z0.b, z0.h, #1 +# CHECK-NEXT: - - - - - - 1.00 - - sqrshrunb z0.h, z0.s, #1 +# CHECK-NEXT: - - - - - - 1.00 - - sqrshrunb z0.s, z0.d, #1 +# CHECK-NEXT: - - - - - - 1.00 - - sqrshrunb z31.b, z31.h, #8 +# CHECK-NEXT: - - - - - - 1.00 - - sqrshrunb z31.h, z31.s, #16 +# CHECK-NEXT: - - - - - - 1.00 - - sqrshrunb z31.s, z31.d, #32 +# CHECK-NEXT: - - - - - - 1.00 - - sqrshrunt z0.b, z0.h, #1 +# CHECK-NEXT: - - - - - - 1.00 - - sqrshrunt z0.h, z0.s, #1 +# CHECK-NEXT: - - - - - - 1.00 - - sqrshrunt z0.s, z0.d, #1 +# CHECK-NEXT: - - - - - - 1.00 - - sqrshrunt z31.b, z31.h, #8 +# CHECK-NEXT: - - - - - - 1.00 - - sqrshrunt z31.h, z31.s, #16 +# CHECK-NEXT: - - - - - - 1.00 - - sqrshrunt z31.s, z31.d, #32 +# CHECK-NEXT: - - - - - - 1.00 - - sqshl z0.b, p0/m, z0.b, #0 +# CHECK-NEXT: - - - - - - 1.00 - - sqshl z0.b, p0/m, z0.b, z1.b +# CHECK-NEXT: - - - - - - 1.00 - - sqshl z0.d, p0/m, z0.d, #0 +# CHECK-NEXT: - - - - - - 1.00 - - sqshl z0.h, p0/m, z0.h, #0 +# CHECK-NEXT: - - - - - - 1.00 - - sqshl z0.h, p0/m, z0.h, z1.h +# CHECK-NEXT: - - - - - - 1.00 - - sqshl z0.s, p0/m, z0.s, #0 +# CHECK-NEXT: - - - - - - 1.00 - - sqshl z29.s, p7/m, z29.s, z30.s +# CHECK-NEXT: - - - - - - 1.00 - - sqshl z31.b, p0/m, z31.b, #7 +# CHECK-NEXT: - - - - - - 1.00 - - sqshl z31.d, p0/m, z31.d, #63 +# CHECK-NEXT: - - - - - - 1.00 - - sqshl z31.d, p7/m, z31.d, z30.d +# CHECK-NEXT: - - - - - - 1.00 - - sqshl z31.h, p0/m, z31.h, #15 +# CHECK-NEXT: - - - - - - 1.00 - - sqshl z31.s, p0/m, z31.s, #31 +# CHECK-NEXT: - - - - - - 1.00 - - sqshlr z0.b, p0/m, z0.b, z1.b +# CHECK-NEXT: - - - - - - 1.00 - - sqshlr z0.h, p0/m, z0.h, z1.h +# CHECK-NEXT: - - - - - - 1.00 - - sqshlr z29.s, p7/m, z29.s, z30.s +# CHECK-NEXT: - - - - - - 1.00 - - sqshlr z31.d, p7/m, z31.d, z30.d +# CHECK-NEXT: - - - - - - 1.00 - - sqshlu z0.b, p0/m, z0.b, #0 +# CHECK-NEXT: - - - - - - 1.00 - - sqshlu z0.d, p0/m, z0.d, #0 +# CHECK-NEXT: - - - - - - 1.00 - - sqshlu z0.h, p0/m, z0.h, #0 +# CHECK-NEXT: - - - - - - 1.00 - - sqshlu z0.s, p0/m, z0.s, #0 +# CHECK-NEXT: - - - - - - 1.00 - - sqshlu z31.b, p0/m, z31.b, #7 +# CHECK-NEXT: - - - - - - 1.00 - - sqshlu z31.d, p0/m, z31.d, #63 +# CHECK-NEXT: - - - - - - 1.00 - - sqshlu z31.h, p0/m, z31.h, #15 +# CHECK-NEXT: - - - - - - 1.00 - - sqshlu z31.s, p0/m, z31.s, #31 +# CHECK-NEXT: - - - - - - 1.00 - - sqshrnb z0.b, z0.h, #1 +# CHECK-NEXT: - - - - - - 1.00 - - sqshrnb z0.h, z0.s, #1 +# CHECK-NEXT: - - - - - - 1.00 - - sqshrnb z0.s, z0.d, #1 +# CHECK-NEXT: - - - - - - 1.00 - - sqshrnb z31.b, z31.h, #8 +# CHECK-NEXT: - - - - - - 1.00 - - sqshrnb z31.h, z31.s, #16 +# CHECK-NEXT: - - - - - - 1.00 - - sqshrnb z31.s, z31.d, #32 +# CHECK-NEXT: - - - - - - 1.00 - - sqshrnt z0.b, z0.h, #1 +# CHECK-NEXT: - - - - - - 1.00 - - sqshrnt z0.h, z0.s, #1 +# CHECK-NEXT: - - - - - - 1.00 - - sqshrnt z0.s, z0.d, #1 +# CHECK-NEXT: - - - - - - 1.00 - - sqshrnt z31.b, z31.h, #8 +# CHECK-NEXT: - - - - - - 1.00 - - sqshrnt z31.h, z31.s, #16 +# CHECK-NEXT: - - - - - - 1.00 - - sqshrnt z31.s, z31.d, #32 +# CHECK-NEXT: - - - - - - 1.00 - - sqshrunb z0.b, z0.h, #1 +# CHECK-NEXT: - - - - - - 1.00 - - sqshrunb z0.h, z0.s, #1 +# CHECK-NEXT: - - - - - - 1.00 - - sqshrunb z0.s, z0.d, #1 +# CHECK-NEXT: - - - - - - 1.00 - - sqshrunb z31.b, z31.h, #8 +# CHECK-NEXT: - - - - - - 1.00 - - sqshrunb z31.h, z31.s, #16 +# CHECK-NEXT: - - - - - - 1.00 - - sqshrunb z31.s, z31.d, #32 +# CHECK-NEXT: - - - - - - 1.00 - - sqshrunt z0.b, z0.h, #1 +# CHECK-NEXT: - - - - - - 1.00 - - sqshrunt z0.h, z0.s, #1 +# CHECK-NEXT: - - - - - - 1.00 - - sqshrunt z0.s, z0.d, #1 +# CHECK-NEXT: - - - - - - 1.00 - - sqshrunt z31.b, z31.h, #8 +# CHECK-NEXT: - - - - - - 1.00 - - sqshrunt z31.h, z31.s, #16 +# CHECK-NEXT: - - - - - - 1.00 - - sqshrunt z31.s, z31.d, #32 +# CHECK-NEXT: - - - - - - 1.00 - - sqsub z0.b, p0/m, z0.b, z1.b +# CHECK-NEXT: - - - - - - 1.00 - - sqsub z0.b, z0.b, #0 +# CHECK-NEXT: - - - - - - 1.00 - - sqsub z0.b, z0.b, z0.b +# CHECK-NEXT: - - - - - - 1.00 - - sqsub z0.d, z0.d, #0 +# CHECK-NEXT: - - - - - - 1.00 - - sqsub z0.d, z0.d, #0, lsl #8 +# CHECK-NEXT: - - - - - - 1.00 - - sqsub z0.d, z0.d, z0.d +# CHECK-NEXT: - - - - - - 1.00 - - sqsub z0.h, p0/m, z0.h, z1.h +# CHECK-NEXT: - - - - - - 1.00 - - sqsub z0.h, z0.h, #0 +# CHECK-NEXT: - - - - - - 1.00 - - sqsub z0.h, z0.h, #0, lsl #8 +# CHECK-NEXT: - - - - - - 1.00 - - sqsub z0.h, z0.h, z0.h +# CHECK-NEXT: - - - - - - 1.00 - - sqsub z0.s, z0.s, #0 +# CHECK-NEXT: - - - - - - 1.00 - - sqsub z0.s, z0.s, #0, lsl #8 +# CHECK-NEXT: - - - - - - 1.00 - - sqsub z0.s, z0.s, z0.s +# CHECK-NEXT: - - - - - - 1.00 - - sqsub z29.s, p7/m, z29.s, z30.s +# CHECK-NEXT: - - - - - - 1.00 - - sqsub z31.b, z31.b, #255 +# CHECK-NEXT: - - - - - - 1.00 - - sqsub z31.d, p7/m, z31.d, z30.d +# CHECK-NEXT: - - - - - - 1.00 - - sqsub z31.d, z31.d, #65280 +# CHECK-NEXT: - - - - - - 1.00 - - sqsub z31.h, z31.h, #65280 +# CHECK-NEXT: - - - - - - 1.00 - - sqsub z31.s, z31.s, #65280 +# CHECK-NEXT: - - - - - - 1.00 - - sqsubr z0.b, p0/m, z0.b, z1.b +# CHECK-NEXT: - - - - - - 1.00 - - sqsubr z0.h, p0/m, z0.h, z1.h +# CHECK-NEXT: - - - - - - 1.00 - - sqsubr z29.s, p7/m, z29.s, z30.s +# CHECK-NEXT: - - - - - - 1.00 - - sqsubr z31.d, p7/m, z31.d, z30.d +# CHECK-NEXT: - - - - - - 1.00 - - sqxtnb z0.b, z31.h +# CHECK-NEXT: - - - - - - 1.00 - - sqxtnb z0.h, z31.s +# CHECK-NEXT: - - - - - - 1.00 - - sqxtnb z0.s, z31.d +# CHECK-NEXT: - - - - - - 1.00 - - sqxtnt z0.b, z31.h +# CHECK-NEXT: - - - - - - 1.00 - - sqxtnt z0.h, z31.s +# CHECK-NEXT: - - - - - - 1.00 - - sqxtnt z0.s, z31.d +# CHECK-NEXT: - - - - - - 1.00 - - sqxtunb z0.b, z31.h +# CHECK-NEXT: - - - - - - 1.00 - - sqxtunb z0.h, z31.s +# CHECK-NEXT: - - - - - - 1.00 - - sqxtunb z0.s, z31.d +# CHECK-NEXT: - - - - - - 1.00 - - sqxtunt z0.b, z31.h +# CHECK-NEXT: - - - - - - 1.00 - - sqxtunt z0.h, z31.s +# CHECK-NEXT: - - - - - - 1.00 - - sqxtunt z0.s, z31.d +# CHECK-NEXT: - - - - - - 1.00 - - srhadd z0.b, p0/m, z0.b, z1.b +# CHECK-NEXT: - - - - - - 1.00 - - srhadd z0.h, p0/m, z0.h, z1.h +# CHECK-NEXT: - - - - - - 1.00 - - srhadd z29.s, p7/m, z29.s, z30.s +# CHECK-NEXT: - - - - - - 1.00 - - srhadd z31.d, p7/m, z31.d, z30.d +# CHECK-NEXT: - - - - - - 1.00 - - sri z0.b, z0.b, #1 +# CHECK-NEXT: - - - - - - 1.00 - - sri z0.d, z0.d, #1 +# CHECK-NEXT: - - - - - - 1.00 - - sri z0.h, z0.h, #1 +# CHECK-NEXT: - - - - - - 1.00 - - sri z0.s, z0.s, #1 +# CHECK-NEXT: - - - - - - 1.00 - - sri z31.b, z31.b, #8 +# CHECK-NEXT: - - - - - - 1.00 - - sri z31.d, z31.d, #64 +# CHECK-NEXT: - - - - - - 1.00 - - sri z31.h, z31.h, #16 +# CHECK-NEXT: - - - - - - 1.00 - - sri z31.s, z31.s, #32 +# CHECK-NEXT: - - - - - - 1.00 - - srshl z0.b, p0/m, z0.b, z1.b +# CHECK-NEXT: - - - - - - 1.00 - - srshl z0.h, p0/m, z0.h, z1.h +# CHECK-NEXT: - - - - - - 1.00 - - srshl z29.s, p7/m, z29.s, z30.s +# CHECK-NEXT: - - - - - - 1.00 - - srshl z31.d, p7/m, z31.d, z30.d +# CHECK-NEXT: - - - - - - 1.00 - - srshlr z0.b, p0/m, z0.b, z1.b +# CHECK-NEXT: - - - - - - 1.00 - - srshlr z0.h, p0/m, z0.h, z1.h +# CHECK-NEXT: - - - - - - 1.00 - - srshlr z29.s, p7/m, z29.s, z30.s +# CHECK-NEXT: - - - - - - 1.00 - - srshlr z31.d, p7/m, z31.d, z30.d +# CHECK-NEXT: - - - - - - 1.00 - - srshr z0.b, p0/m, z0.b, #1 +# CHECK-NEXT: - - - - - - 1.00 - - srshr z0.d, p0/m, z0.d, #1 +# CHECK-NEXT: - - - - - - 1.00 - - srshr z0.h, p0/m, z0.h, #1 +# CHECK-NEXT: - - - - - - 1.00 - - srshr z0.s, p0/m, z0.s, #1 +# CHECK-NEXT: - - - - - - 1.00 - - srshr z31.b, p0/m, z31.b, #8 +# CHECK-NEXT: - - - - - - 1.00 - - srshr z31.d, p0/m, z31.d, #64 +# CHECK-NEXT: - - - - - - 1.00 - - srshr z31.h, p0/m, z31.h, #16 +# CHECK-NEXT: - - - - - - 1.00 - - srshr z31.s, p0/m, z31.s, #32 +# CHECK-NEXT: - - - - - - 2.00 - - srsra z0.b, z0.b, #1 +# CHECK-NEXT: - - - - - - 2.00 - - srsra z0.d, z0.d, #1 +# CHECK-NEXT: - - - - - - 2.00 - - srsra z0.h, z0.h, #1 +# CHECK-NEXT: - - - - - - 2.00 - - srsra z0.s, z0.s, #1 +# CHECK-NEXT: - - - - - - 2.00 - - srsra z31.b, z31.b, #8 +# CHECK-NEXT: - - - - - - 2.00 - - srsra z31.d, z31.d, #64 +# CHECK-NEXT: - - - - - - 2.00 - - srsra z31.h, z31.h, #16 +# CHECK-NEXT: - - - - - - 2.00 - - srsra z31.s, z31.s, #32 +# CHECK-NEXT: - - - - - - 1.00 - - sshllb z0.d, z0.s, #0 +# CHECK-NEXT: - - - - - - 1.00 - - sshllb z0.h, z0.b, #0 +# CHECK-NEXT: - - - - - - 1.00 - - sshllb z0.s, z0.h, #0 +# CHECK-NEXT: - - - - - - 1.00 - - sshllb z31.d, z31.s, #31 +# CHECK-NEXT: - - - - - - 1.00 - - sshllb z31.h, z31.b, #7 +# CHECK-NEXT: - - - - - - 1.00 - - sshllb z31.s, z31.h, #15 +# CHECK-NEXT: - - - - - - 1.00 - - sshllt z0.d, z0.s, #0 +# CHECK-NEXT: - - - - - - 1.00 - - sshllt z0.h, z0.b, #0 +# CHECK-NEXT: - - - - - - 1.00 - - sshllt z0.s, z0.h, #0 +# CHECK-NEXT: - - - - - - 1.00 - - sshllt z31.d, z31.s, #31 +# CHECK-NEXT: - - - - - - 1.00 - - sshllt z31.h, z31.b, #7 +# CHECK-NEXT: - - - - - - 1.00 - - sshllt z31.s, z31.h, #15 +# CHECK-NEXT: - - - - - - 1.00 - - ssra z0.b, z0.b, #1 +# CHECK-NEXT: - - - - - - 1.00 - - ssra z0.d, z0.d, #1 +# CHECK-NEXT: - - - - - - 1.00 - - ssra z0.h, z0.h, #1 +# CHECK-NEXT: - - - - - - 1.00 - - ssra z0.s, z0.s, #1 +# CHECK-NEXT: - - - - - - 1.00 - - ssra z31.b, z31.b, #8 +# CHECK-NEXT: - - - - - - 1.00 - - ssra z31.d, z31.d, #64 +# CHECK-NEXT: - - - - - - 1.00 - - ssra z31.h, z31.h, #16 +# CHECK-NEXT: - - - - - - 1.00 - - ssra z31.s, z31.s, #32 +# CHECK-NEXT: - - - - - - 1.00 - - ssublb z0.h, z1.b, z2.b +# CHECK-NEXT: - - - - - - 1.00 - - ssublb z29.s, z30.h, z31.h +# CHECK-NEXT: - - - - - - 1.00 - - ssublb z31.d, z31.s, z31.s +# CHECK-NEXT: - - - - - - 1.00 - - ssublbt z0.d, z1.s, z31.s +# CHECK-NEXT: - - - - - - 1.00 - - ssublbt z0.h, z1.b, z31.b +# CHECK-NEXT: - - - - - - 1.00 - - ssublbt z0.s, z1.h, z31.h +# CHECK-NEXT: - - - - - - 1.00 - - ssublt z0.h, z1.b, z2.b +# CHECK-NEXT: - - - - - - 1.00 - - ssublt z29.s, z30.h, z31.h +# CHECK-NEXT: - - - - - - 1.00 - - ssublt z31.d, z31.s, z31.s +# CHECK-NEXT: - - - - - - 1.00 - - ssubltb z0.d, z1.s, z31.s +# CHECK-NEXT: - - - - - - 1.00 - - ssubltb z0.h, z1.b, z31.b +# CHECK-NEXT: - - - - - - 1.00 - - ssubltb z0.s, z1.h, z31.h +# CHECK-NEXT: - - - - - - 1.00 - - ssubwb z0.h, z1.h, z2.b +# CHECK-NEXT: - - - - - - 1.00 - - ssubwb z29.s, z30.s, z31.h +# CHECK-NEXT: - - - - - - 1.00 - - ssubwb z31.d, z31.d, z31.s +# CHECK-NEXT: - - - - - - 1.00 - - ssubwt z0.h, z1.h, z2.b +# CHECK-NEXT: - - - - - - 1.00 - - ssubwt z29.s, z30.s, z31.h +# CHECK-NEXT: - - - - - - 1.00 - - ssubwt z31.d, z31.d, z31.s +# CHECK-NEXT: - - - 1.00 - - - - - st1b { z0.b }, p0, [x0, x0] +# CHECK-NEXT: - - - 1.00 - - - - - st1b { z0.b }, p0, [x0] +# CHECK-NEXT: - - - 1.00 - - - - - st1b { z0.d }, p0, [x0, x0] +# CHECK-NEXT: - - - 8.00 - - - - - st1b { z0.d }, p0, [x0, z0.d, sxtw] +# CHECK-NEXT: - - - 8.00 - - - - - st1b { z0.d }, p0, [x0, z0.d, uxtw] +# CHECK-NEXT: - - - 8.00 - - - - - st1b { z0.d }, p0, [x0, z0.d] +# CHECK-NEXT: - - - 1.00 - - - - - st1b { z0.d }, p0, [x0] +# CHECK-NEXT: - - - 7.00 - - - - - st1b { z0.d }, p7, [z0.d] +# CHECK-NEXT: - - - 1.00 - - - - - st1b { z0.h }, p0, [x0, x0] +# CHECK-NEXT: - - - 1.00 - - - - - st1b { z0.h }, p0, [x0] +# CHECK-NEXT: - - - 1.00 - - - - - st1b { z0.s }, p0, [x0, x0] +# CHECK-NEXT: - - - 8.00 - - - - - st1b { z0.s }, p0, [x0, z0.s, sxtw] +# CHECK-NEXT: - - - 8.00 - - - - - st1b { z0.s }, p0, [x0, z0.s, uxtw] +# CHECK-NEXT: - - - 1.00 - - - - - st1b { z0.s }, p0, [x0] +# CHECK-NEXT: - - - 9.00 - - - - - st1b { z0.s }, p7, [z0.s] +# CHECK-NEXT: - - - 1.00 - - - - - st1b { z21.b }, p5, [x10, #5, mul vl] +# CHECK-NEXT: - - - 1.00 - - - - - st1b { z21.d }, p5, [x10, #5, mul vl] +# CHECK-NEXT: - - - 1.00 - - - - - st1b { z21.h }, p5, [x10, #5, mul vl] +# CHECK-NEXT: - - - 1.00 - - - - - st1b { z21.s }, p5, [x10, #5, mul vl] +# CHECK-NEXT: - - - 1.00 - - - - - st1b { z31.b }, p7, [sp, #-1, mul vl] +# CHECK-NEXT: - - - 1.00 - - - - - st1b { z31.d }, p7, [sp, #-1, mul vl] +# CHECK-NEXT: - - - 7.00 - - - - - st1b { z31.d }, p7, [z31.d, #31] +# CHECK-NEXT: - - - 1.00 - - - - - st1b { z31.h }, p7, [sp, #-1, mul vl] +# CHECK-NEXT: - - - 1.00 - - - - - st1b { z31.s }, p7, [sp, #-1, mul vl] +# CHECK-NEXT: - - - 9.00 - - - - - st1b { z31.s }, p7, [z31.s, #31] +# CHECK-NEXT: - - - 1.00 - - - - - st1d { z0.d }, p0, [x0, x0, lsl #3] +# CHECK-NEXT: - - - 8.00 - - - - - st1d { z0.d }, p0, [x0, z0.d, lsl #3] +# CHECK-NEXT: - - - 8.00 - - - - - st1d { z0.d }, p0, [x0, z0.d, sxtw #3] +# CHECK-NEXT: - - - 8.00 - - - - - st1d { z0.d }, p0, [x0, z0.d, sxtw] +# CHECK-NEXT: - - - 8.00 - - - - - st1d { z0.d }, p0, [x0, z0.d, uxtw #3] +# CHECK-NEXT: - - - 8.00 - - - - - st1d { z0.d }, p0, [x0, z0.d, uxtw] +# CHECK-NEXT: - - - 8.00 - - - - - st1d { z0.d }, p0, [x0, z0.d] +# CHECK-NEXT: - - - 1.00 - - - - - st1d { z0.d }, p0, [x0] +# CHECK-NEXT: - - - 7.00 - - - - - st1d { z0.d }, p7, [z0.d] +# CHECK-NEXT: - - - 1.00 - - - - - st1d { z21.d }, p5, [x10, #5, mul vl] +# CHECK-NEXT: - - - 1.00 - - - - - st1d { z31.d }, p7, [sp, #-1, mul vl] +# CHECK-NEXT: - - - 7.00 - - - - - st1d { z31.d }, p7, [z31.d, #248] +# CHECK-NEXT: - - - 1.00 - - - - - st1h { z0.d }, p0, [x0, x0, lsl #1] +# CHECK-NEXT: - - - 8.00 - - - - - st1h { z0.d }, p0, [x0, z0.d, lsl #1] +# CHECK-NEXT: - - - 8.00 - - - - - st1h { z0.d }, p0, [x0, z0.d, sxtw #1] +# CHECK-NEXT: - - - 8.00 - - - - - st1h { z0.d }, p0, [x0, z0.d, sxtw] +# CHECK-NEXT: - - - 8.00 - - - - - st1h { z0.d }, p0, [x0, z0.d, uxtw #1] +# CHECK-NEXT: - - - 8.00 - - - - - st1h { z0.d }, p0, [x0, z0.d, uxtw] +# CHECK-NEXT: - - - 8.00 - - - - - st1h { z0.d }, p0, [x0, z0.d] +# CHECK-NEXT: - - - 1.00 - - - - - st1h { z0.d }, p0, [x0] +# CHECK-NEXT: - - - 7.00 - - - - - st1h { z0.d }, p7, [z0.d] +# CHECK-NEXT: - - - 1.00 - - - - - st1h { z0.h }, p0, [x0, x0, lsl #1] +# CHECK-NEXT: - - - 1.00 - - - - - st1h { z0.h }, p0, [x0] +# CHECK-NEXT: - - - 1.00 - - - - - st1h { z0.s }, p0, [x0, x0, lsl #1] +# CHECK-NEXT: - - - 8.00 - - - - - st1h { z0.s }, p0, [x0, z0.s, sxtw #1] +# CHECK-NEXT: - - - 8.00 - - - - - st1h { z0.s }, p0, [x0, z0.s, sxtw] +# CHECK-NEXT: - - - 8.00 - - - - - st1h { z0.s }, p0, [x0, z0.s, uxtw #1] +# CHECK-NEXT: - - - 8.00 - - - - - st1h { z0.s }, p0, [x0, z0.s, uxtw] +# CHECK-NEXT: - - - 1.00 - - - - - st1h { z0.s }, p0, [x0] +# CHECK-NEXT: - - - 9.00 - - - - - st1h { z0.s }, p7, [z0.s] +# CHECK-NEXT: - - - 1.00 - - - - - st1h { z21.d }, p5, [x10, #5, mul vl] +# CHECK-NEXT: - - - 1.00 - - - - - st1h { z21.h }, p5, [x10, #5, mul vl] +# CHECK-NEXT: - - - 1.00 - - - - - st1h { z21.s }, p5, [x10, #5, mul vl] +# CHECK-NEXT: - - - 1.00 - - - - - st1h { z31.d }, p7, [sp, #-1, mul vl] +# CHECK-NEXT: - - - 7.00 - - - - - st1h { z31.d }, p7, [z31.d, #62] +# CHECK-NEXT: - - - 1.00 - - - - - st1h { z31.h }, p7, [sp, #-1, mul vl] +# CHECK-NEXT: - - - 1.00 - - - - - st1h { z31.s }, p7, [sp, #-1, mul vl] +# CHECK-NEXT: - - - 9.00 - - - - - st1h { z31.s }, p7, [z31.s, #62] +# CHECK-NEXT: - - - 1.00 - - - - - st1w { z0.d }, p0, [x0, x0, lsl #2] +# CHECK-NEXT: - - - 8.00 - - - - - st1w { z0.d }, p0, [x0, z0.d, lsl #2] +# CHECK-NEXT: - - - 8.00 - - - - - st1w { z0.d }, p0, [x0, z0.d, sxtw #2] +# CHECK-NEXT: - - - 8.00 - - - - - st1w { z0.d }, p0, [x0, z0.d, sxtw] +# CHECK-NEXT: - - - 8.00 - - - - - st1w { z0.d }, p0, [x0, z0.d, uxtw #2] +# CHECK-NEXT: - - - 8.00 - - - - - st1w { z0.d }, p0, [x0, z0.d, uxtw] +# CHECK-NEXT: - - - 8.00 - - - - - st1w { z0.d }, p0, [x0, z0.d] +# CHECK-NEXT: - - - 1.00 - - - - - st1w { z0.d }, p0, [x0] +# CHECK-NEXT: - - - 7.00 - - - - - st1w { z0.d }, p7, [z0.d] +# CHECK-NEXT: - - - 1.00 - - - - - st1w { z0.s }, p0, [x0, x0, lsl #2] +# CHECK-NEXT: - - - 8.00 - - - - - st1w { z0.s }, p0, [x0, z0.s, sxtw #2] +# CHECK-NEXT: - - - 8.00 - - - - - st1w { z0.s }, p0, [x0, z0.s, sxtw] +# CHECK-NEXT: - - - 8.00 - - - - - st1w { z0.s }, p0, [x0, z0.s, uxtw #2] +# CHECK-NEXT: - - - 8.00 - - - - - st1w { z0.s }, p0, [x0, z0.s, uxtw] +# CHECK-NEXT: - - - 1.00 - - - - - st1w { z0.s }, p0, [x0] +# CHECK-NEXT: - - - 9.00 - - - - - st1w { z0.s }, p7, [z0.s] +# CHECK-NEXT: - - - 1.00 - - - - - st1w { z21.d }, p5, [x10, #5, mul vl] +# CHECK-NEXT: - - - 1.00 - - - - - st1w { z21.s }, p5, [x10, #5, mul vl] +# CHECK-NEXT: - - - 1.00 - - - - - st1w { z31.d }, p7, [sp, #-1, mul vl] +# CHECK-NEXT: - - - 7.00 - - - - - st1w { z31.d }, p7, [z31.d, #124] +# CHECK-NEXT: - - - 1.00 - - - - - st1w { z31.s }, p7, [sp, #-1, mul vl] +# CHECK-NEXT: - - - 9.00 - - - - - st1w { z31.s }, p7, [z31.s, #124] +# CHECK-NEXT: - - - 11.00 - - - - - st2b { z0.b, z1.b }, p0, [x0, x0] +# CHECK-NEXT: - - - 11.00 - - - - - st2b { z0.b, z1.b }, p0, [x0] +# CHECK-NEXT: - - - 11.00 - - - - - st2b { z21.b, z22.b }, p5, [x10, #10, mul vl] +# CHECK-NEXT: - - - 11.00 - - - - - st2b { z23.b, z24.b }, p3, [x13, #-16, mul vl] +# CHECK-NEXT: - - - 11.00 - - - - - st2b { z5.b, z6.b }, p3, [x17, x16] +# CHECK-NEXT: - - - 11.00 - - - - - st2d { z0.d, z1.d }, p0, [x0, x0, lsl #3] +# CHECK-NEXT: - - - 11.00 - - - - - st2d { z0.d, z1.d }, p0, [x0] +# CHECK-NEXT: - - - 11.00 - - - - - st2d { z21.d, z22.d }, p5, [x10, #10, mul vl] +# CHECK-NEXT: - - - 11.00 - - - - - st2d { z23.d, z24.d }, p3, [x13, #-16, mul vl] +# CHECK-NEXT: - - - 11.00 - - - - - st2d { z5.d, z6.d }, p3, [x17, x16, lsl #3] +# CHECK-NEXT: - - - 11.00 - - - - - st2h { z0.h, z1.h }, p0, [x0, x0, lsl #1] +# CHECK-NEXT: - - - 11.00 - - - - - st2h { z0.h, z1.h }, p0, [x0] +# CHECK-NEXT: - - - 11.00 - - - - - st2h { z21.h, z22.h }, p5, [x10, #10, mul vl] +# CHECK-NEXT: - - - 11.00 - - - - - st2h { z23.h, z24.h }, p3, [x13, #-16, mul vl] +# CHECK-NEXT: - - - 11.00 - - - - - st2h { z5.h, z6.h }, p3, [x17, x16, lsl #1] +# CHECK-NEXT: - - - 11.00 - - - - - st2w { z0.s, z1.s }, p0, [x0, x0, lsl #2] +# CHECK-NEXT: - - - 11.00 - - - - - st2w { z0.s, z1.s }, p0, [x0] +# CHECK-NEXT: - - - 11.00 - - - - - st2w { z21.s, z22.s }, p5, [x10, #10, mul vl] +# CHECK-NEXT: - - - 11.00 - - - - - st2w { z23.s, z24.s }, p3, [x13, #-16, mul vl] +# CHECK-NEXT: - - - 11.00 - - - - - st2w { z5.s, z6.s }, p3, [x17, x16, lsl #2] +# CHECK-NEXT: - - - 25.00 - - - - - st3b { z0.b - z2.b }, p0, [x0, x0] +# CHECK-NEXT: - - - 25.00 - - - - - st3b { z0.b - z2.b }, p0, [x0] +# CHECK-NEXT: - - - 25.00 - - - - - st3b { z21.b - z23.b }, p5, [x10, #15, mul vl] +# CHECK-NEXT: - - - 25.00 - - - - - st3b { z23.b - z25.b }, p3, [x13, #-24, mul vl] +# CHECK-NEXT: - - - 25.00 - - - - - st3b { z5.b - z7.b }, p3, [x17, x16] +# CHECK-NEXT: - - - 14.00 - - - - - st3d { z0.d - z2.d }, p0, [x0, x0, lsl #3] +# CHECK-NEXT: - - - 14.00 - - - - - st3d { z0.d - z2.d }, p0, [x0] +# CHECK-NEXT: - - - 14.00 - - - - - st3d { z21.d - z23.d }, p5, [x10, #15, mul vl] +# CHECK-NEXT: - - - 14.00 - - - - - st3d { z23.d - z25.d }, p3, [x13, #-24, mul vl] +# CHECK-NEXT: - - - 14.00 - - - - - st3d { z5.d - z7.d }, p3, [x17, x16, lsl #3] +# CHECK-NEXT: - - - 25.00 - - - - - st3h { z0.h - z2.h }, p0, [x0, x0, lsl #1] +# CHECK-NEXT: - - - 25.00 - - - - - st3h { z0.h - z2.h }, p0, [x0] +# CHECK-NEXT: - - - 25.00 - - - - - st3h { z21.h - z23.h }, p5, [x10, #15, mul vl] +# CHECK-NEXT: - - - 25.00 - - - - - st3h { z23.h - z25.h }, p3, [x13, #-24, mul vl] +# CHECK-NEXT: - - - 25.00 - - - - - st3h { z5.h - z7.h }, p3, [x17, x16, lsl #1] +# CHECK-NEXT: - - - 25.00 - - - - - st3w { z0.s - z2.s }, p0, [x0, x0, lsl #2] +# CHECK-NEXT: - - - 25.00 - - - - - st3w { z0.s - z2.s }, p0, [x0] +# CHECK-NEXT: - - - 25.00 - - - - - st3w { z21.s - z23.s }, p5, [x10, #15, mul vl] +# CHECK-NEXT: - - - 25.00 - - - - - st3w { z23.s - z25.s }, p3, [x13, #-24, mul vl] +# CHECK-NEXT: - - - 25.00 - - - - - st3w { z5.s - z7.s }, p3, [x17, x16, lsl #2] +# CHECK-NEXT: - - - 50.00 - - - - - st4b { z0.b - z3.b }, p0, [x0, x0] +# CHECK-NEXT: - - - 50.00 - - - - - st4b { z0.b - z3.b }, p0, [x0] +# CHECK-NEXT: - - - 50.00 - - - - - st4b { z21.b - z24.b }, p5, [x10, #20, mul vl] +# CHECK-NEXT: - - - 50.00 - - - - - st4b { z23.b - z26.b }, p3, [x13, #-32, mul vl] +# CHECK-NEXT: - - - 50.00 - - - - - st4b { z5.b - z8.b }, p3, [x17, x16] +# CHECK-NEXT: - - - 25.00 - - - - - st4d { z0.d - z3.d }, p0, [x0, x0, lsl #3] +# CHECK-NEXT: - - - 25.00 - - - - - st4d { z0.d - z3.d }, p0, [x0] +# CHECK-NEXT: - - - 25.00 - - - - - st4d { z21.d - z24.d }, p5, [x10, #20, mul vl] +# CHECK-NEXT: - - - 25.00 - - - - - st4d { z23.d - z26.d }, p3, [x13, #-32, mul vl] +# CHECK-NEXT: - - - 25.00 - - - - - st4d { z5.d - z8.d }, p3, [x17, x16, lsl #3] +# CHECK-NEXT: - - - 50.00 - - - - - st4h { z0.h - z3.h }, p0, [x0, x0, lsl #1] +# CHECK-NEXT: - - - 50.00 - - - - - st4h { z0.h - z3.h }, p0, [x0] +# CHECK-NEXT: - - - 50.00 - - - - - st4h { z21.h - z24.h }, p5, [x10, #20, mul vl] +# CHECK-NEXT: - - - 50.00 - - - - - st4h { z23.h - z26.h }, p3, [x13, #-32, mul vl] +# CHECK-NEXT: - - - 50.00 - - - - - st4h { z5.h - z8.h }, p3, [x17, x16, lsl #1] +# CHECK-NEXT: - - - 50.00 - - - - - st4w { z0.s - z3.s }, p0, [x0, x0, lsl #2] +# CHECK-NEXT: - - - 50.00 - - - - - st4w { z0.s - z3.s }, p0, [x0] +# CHECK-NEXT: - - - 50.00 - - - - - st4w { z21.s - z24.s }, p5, [x10, #20, mul vl] +# CHECK-NEXT: - - - 50.00 - - - - - st4w { z23.s - z26.s }, p3, [x13, #-32, mul vl] +# CHECK-NEXT: - - - 50.00 - - - - - st4w { z5.s - z8.s }, p3, [x17, x16, lsl #2] +# CHECK-NEXT: - - - 1.00 - - - - - stnt1b { z0.b }, p0, [x0, x0] +# CHECK-NEXT: - - - 1.00 - - - - - stnt1b { z0.b }, p0, [x0] +# CHECK-NEXT: - - - 7.00 - - - - - stnt1b { z0.d }, p0, [z1.d] +# CHECK-NEXT: - - - 9.00 - - - - - stnt1b { z0.s }, p0, [z1.s] +# CHECK-NEXT: - - - 1.00 - - - - - stnt1b { z21.b }, p5, [x10, #7, mul vl] +# CHECK-NEXT: - - - 1.00 - - - - - stnt1b { z23.b }, p3, [x13, #-8, mul vl] +# CHECK-NEXT: - - - 7.00 - - - - - stnt1b { z31.d }, p7, [z31.d, x0] +# CHECK-NEXT: - - - 7.00 - - - - - stnt1b { z31.d }, p7, [z31.d] +# CHECK-NEXT: - - - 9.00 - - - - - stnt1b { z31.s }, p7, [z31.s, x0] +# CHECK-NEXT: - - - 9.00 - - - - - stnt1b { z31.s }, p7, [z31.s] +# CHECK-NEXT: - - - 1.00 - - - - - stnt1d { z0.d }, p0, [x0, x0, lsl #3] +# CHECK-NEXT: - - - 1.00 - - - - - stnt1d { z0.d }, p0, [x0] +# CHECK-NEXT: - - - 7.00 - - - - - stnt1d { z0.d }, p0, [z1.d] +# CHECK-NEXT: - - - 1.00 - - - - - stnt1d { z21.d }, p5, [x10, #7, mul vl] +# CHECK-NEXT: - - - 1.00 - - - - - stnt1d { z23.d }, p3, [x13, #-8, mul vl] +# CHECK-NEXT: - - - 7.00 - - - - - stnt1d { z31.d }, p7, [z31.d, x0] +# CHECK-NEXT: - - - 7.00 - - - - - stnt1d { z31.d }, p7, [z31.d] +# CHECK-NEXT: - - - 7.00 - - - - - stnt1h { z0.d }, p0, [z1.d] +# CHECK-NEXT: - - - 1.00 - - - - - stnt1h { z0.h }, p0, [x0, x0, lsl #1] +# CHECK-NEXT: - - - 1.00 - - - - - stnt1h { z0.h }, p0, [x0] +# CHECK-NEXT: - - - 9.00 - - - - - stnt1h { z0.s }, p0, [z1.s] +# CHECK-NEXT: - - - 1.00 - - - - - stnt1h { z21.h }, p5, [x10, #7, mul vl] +# CHECK-NEXT: - - - 1.00 - - - - - stnt1h { z23.h }, p3, [x13, #-8, mul vl] +# CHECK-NEXT: - - - 7.00 - - - - - stnt1h { z31.d }, p7, [z31.d, x0] +# CHECK-NEXT: - - - 7.00 - - - - - stnt1h { z31.d }, p7, [z31.d] +# CHECK-NEXT: - - - 9.00 - - - - - stnt1h { z31.s }, p7, [z31.s, x0] +# CHECK-NEXT: - - - 9.00 - - - - - stnt1h { z31.s }, p7, [z31.s] +# CHECK-NEXT: - - - 7.00 - - - - - stnt1w { z0.d }, p0, [z1.d] +# CHECK-NEXT: - - - 1.00 - - - - - stnt1w { z0.s }, p0, [x0, x0, lsl #2] +# CHECK-NEXT: - - - 1.00 - - - - - stnt1w { z0.s }, p0, [x0] +# CHECK-NEXT: - - - 9.00 - - - - - stnt1w { z0.s }, p0, [z1.s] +# CHECK-NEXT: - - - 1.00 - - - - - stnt1w { z21.s }, p5, [x10, #7, mul vl] +# CHECK-NEXT: - - - 1.00 - - - - - stnt1w { z23.s }, p3, [x13, #-8, mul vl] +# CHECK-NEXT: - - - 7.00 - - - - - stnt1w { z31.d }, p7, [z31.d, x0] +# CHECK-NEXT: - - - 7.00 - - - - - stnt1w { z31.d }, p7, [z31.d] +# CHECK-NEXT: - - - 9.00 - - - - - stnt1w { z31.s }, p7, [z31.s, x0] +# CHECK-NEXT: - - - 9.00 - - - - - stnt1w { z31.s }, p7, [z31.s] +# CHECK-NEXT: - - - 1.00 - - - - - str p0, [x0] +# CHECK-NEXT: - - - 1.00 - - - - - str p15, [sp, #-256, mul vl] +# CHECK-NEXT: - - - 1.00 - - - - - str p5, [x10, #255, mul vl] +# CHECK-NEXT: - - - 1.00 - - - - - str z0, [x0] +# CHECK-NEXT: - - - 1.00 - - - - - str z21, [x10, #-256, mul vl] +# CHECK-NEXT: - - - 1.00 - - - - - str z31, [sp, #255, mul vl] +# CHECK-NEXT: - - - - - - 1.00 - - sub z0.b, p0/m, z0.b, z0.b +# CHECK-NEXT: - - - - - - 1.00 - - sub z0.b, z0.b, #0 +# CHECK-NEXT: - - - - - - 1.00 - - sub z0.b, z0.b, z0.b +# CHECK-NEXT: - - - - - - 1.00 - - sub z0.d, p0/m, z0.d, z0.d +# CHECK-NEXT: - - - - - - 1.00 - - sub z0.d, z0.d, #0 +# CHECK-NEXT: - - - - - - 1.00 - - sub z0.d, z0.d, #0, lsl #8 +# CHECK-NEXT: - - - - - - 1.00 - - sub z0.d, z0.d, z0.d +# CHECK-NEXT: - - - - - - 1.00 - - sub z0.h, p0/m, z0.h, z0.h +# CHECK-NEXT: - - - - - - 1.00 - - sub z0.h, z0.h, #0 +# CHECK-NEXT: - - - - - - 1.00 - - sub z0.h, z0.h, #0, lsl #8 +# CHECK-NEXT: - - - - - - 1.00 - - sub z0.h, z0.h, z0.h +# CHECK-NEXT: - - - - - - 1.00 - - sub z0.s, p0/m, z0.s, z0.s +# CHECK-NEXT: - - - - - - 1.00 - - sub z0.s, z0.s, #0 +# CHECK-NEXT: - - - - - - 1.00 - - sub z0.s, z0.s, #0, lsl #8 +# CHECK-NEXT: - - - - - - 1.00 - - sub z0.s, z0.s, z0.s +# CHECK-NEXT: - - - - - - 1.00 - - sub z21.b, p5/m, z21.b, z10.b +# CHECK-NEXT: - - - - - - 1.00 - - sub z21.b, z10.b, z21.b +# CHECK-NEXT: - - - - - - 1.00 - - sub z21.d, p5/m, z21.d, z10.d +# CHECK-NEXT: - - - - - - 1.00 - - sub z21.d, z10.d, z21.d +# CHECK-NEXT: - - - - - - 1.00 - - sub z21.h, p5/m, z21.h, z10.h +# CHECK-NEXT: - - - - - - 1.00 - - sub z21.h, z10.h, z21.h +# CHECK-NEXT: - - - - - - 1.00 - - sub z21.s, p5/m, z21.s, z10.s +# CHECK-NEXT: - - - - - - 1.00 - - sub z21.s, z10.s, z21.s +# CHECK-NEXT: - - - - - - 1.00 - - sub z23.b, p3/m, z23.b, z13.b +# CHECK-NEXT: - - - - - - 1.00 - - sub z23.b, z13.b, z8.b +# CHECK-NEXT: - - - - - - 1.00 - - sub z23.d, p3/m, z23.d, z13.d +# CHECK-NEXT: - - - - - - 1.00 - - sub z23.d, z13.d, z8.d +# CHECK-NEXT: - - - - - - 1.00 - - sub z23.h, p3/m, z23.h, z13.h +# CHECK-NEXT: - - - - - - 1.00 - - sub z23.h, z13.h, z8.h +# CHECK-NEXT: - - - - - - 1.00 - - sub z23.s, p3/m, z23.s, z13.s +# CHECK-NEXT: - - - - - - 1.00 - - sub z23.s, z13.s, z8.s +# CHECK-NEXT: - - - - - - 1.00 - - sub z31.b, p7/m, z31.b, z31.b +# CHECK-NEXT: - - - - - - 1.00 - - sub z31.b, z31.b, #255 +# CHECK-NEXT: - - - - - - 1.00 - - sub z31.b, z31.b, z31.b +# CHECK-NEXT: - - - - - - 1.00 - - sub z31.d, p7/m, z31.d, z31.d +# CHECK-NEXT: - - - - - - 1.00 - - sub z31.d, z31.d, #65280 +# CHECK-NEXT: - - - - - - 1.00 - - sub z31.d, z31.d, z31.d +# CHECK-NEXT: - - - - - - 1.00 - - sub z31.h, p7/m, z31.h, z31.h +# CHECK-NEXT: - - - - - - 1.00 - - sub z31.h, z31.h, #65280 +# CHECK-NEXT: - - - - - - 1.00 - - sub z31.h, z31.h, z31.h +# CHECK-NEXT: - - - - - - 1.00 - - sub z31.s, p7/m, z31.s, z31.s +# CHECK-NEXT: - - - - - - 1.00 - - sub z31.s, z31.s, #65280 +# CHECK-NEXT: - - - - - - 1.00 - - sub z31.s, z31.s, z31.s +# CHECK-NEXT: - - - - - - 1.00 - - subhnb z0.b, z1.h, z31.h +# CHECK-NEXT: - - - - - - 1.00 - - subhnb z0.h, z1.s, z31.s +# CHECK-NEXT: - - - - - - 1.00 - - subhnb z0.s, z1.d, z31.d +# CHECK-NEXT: - - - - - - 1.00 - - subhnt z0.b, z1.h, z31.h +# CHECK-NEXT: - - - - - - 1.00 - - subhnt z0.h, z1.s, z31.s +# CHECK-NEXT: - - - - - - 1.00 - - subhnt z0.s, z1.d, z31.d +# CHECK-NEXT: - - - - - - 1.00 - - subr z0.b, p0/m, z0.b, z0.b +# CHECK-NEXT: - - - - - - 1.00 - - subr z0.b, z0.b, #0 +# CHECK-NEXT: - - - - - - 1.00 - - subr z0.d, p0/m, z0.d, z0.d +# CHECK-NEXT: - - - - - - 1.00 - - subr z0.d, z0.d, #0 +# CHECK-NEXT: - - - - - - 1.00 - - subr z0.d, z0.d, #0, lsl #8 +# CHECK-NEXT: - - - - - - 1.00 - - subr z0.h, p0/m, z0.h, z0.h +# CHECK-NEXT: - - - - - - 1.00 - - subr z0.h, z0.h, #0 +# CHECK-NEXT: - - - - - - 1.00 - - subr z0.h, z0.h, #0, lsl #8 +# CHECK-NEXT: - - - - - - 1.00 - - subr z0.s, p0/m, z0.s, z0.s +# CHECK-NEXT: - - - - - - 1.00 - - subr z0.s, z0.s, #0 +# CHECK-NEXT: - - - - - - 1.00 - - subr z0.s, z0.s, #0, lsl #8 +# CHECK-NEXT: - - - - - - 1.00 - - subr z31.b, z31.b, #255 +# CHECK-NEXT: - - - - - - 1.00 - - subr z31.d, z31.d, #65280 +# CHECK-NEXT: - - - - - - 1.00 - - subr z31.h, z31.h, #65280 +# CHECK-NEXT: - - - - - - 1.00 - - subr z31.s, z31.s, #65280 +# CHECK-NEXT: - - - - - - 1.00 - - sunpkhi z31.d, z31.s +# CHECK-NEXT: - - - - - - 1.00 - - sunpkhi z31.h, z31.b +# CHECK-NEXT: - - - - - - 1.00 - - sunpkhi z31.s, z31.h +# CHECK-NEXT: - - - - - - 1.00 - - sunpklo z31.d, z31.s +# CHECK-NEXT: - - - - - - 1.00 - - sunpklo z31.h, z31.b +# CHECK-NEXT: - - - - - - 1.00 - - sunpklo z31.s, z31.h +# CHECK-NEXT: - - - - - - 1.00 - - suqadd z0.b, p0/m, z0.b, z1.b +# CHECK-NEXT: - - - - - - 1.00 - - suqadd z0.h, p0/m, z0.h, z1.h +# CHECK-NEXT: - - - - - - 1.00 - - suqadd z29.s, p7/m, z29.s, z30.s +# CHECK-NEXT: - - - - - - 1.00 - - suqadd z31.d, p7/m, z31.d, z30.d +# CHECK-NEXT: - - - - - - 1.00 - - sxtb z0.d, p0/m, z0.d +# CHECK-NEXT: - - - - - - 1.00 - - sxtb z0.h, p0/m, z0.h +# CHECK-NEXT: - - - - - - 1.00 - - sxtb z0.s, p0/m, z0.s +# CHECK-NEXT: - - - - - - 1.00 - - sxtb z31.d, p7/m, z31.d +# CHECK-NEXT: - - - - - - 1.00 - - sxtb z31.h, p7/m, z31.h +# CHECK-NEXT: - - - - - - 1.00 - - sxtb z31.s, p7/m, z31.s +# CHECK-NEXT: - - - - - - 1.00 - - sxth z0.d, p0/m, z0.d +# CHECK-NEXT: - - - - - - 1.00 - - sxth z0.s, p0/m, z0.s +# CHECK-NEXT: - - - - - - 1.00 - - sxth z31.d, p7/m, z31.d +# CHECK-NEXT: - - - - - - 1.00 - - sxth z31.s, p7/m, z31.s +# CHECK-NEXT: - - - - - - 1.00 - - sxtw z0.d, p0/m, z0.d +# CHECK-NEXT: - - - - - - 1.00 - - sxtw z31.d, p7/m, z31.d +# CHECK-NEXT: - - - - - - 1.00 - - tbl z28.b, { z29.b, z30.b }, z31.b +# CHECK-NEXT: - - - - - - 1.00 - - tbl z28.d, { z29.d, z30.d }, z31.d +# CHECK-NEXT: - - - - - - 1.00 - - tbl z28.h, { z29.h, z30.h }, z31.h +# CHECK-NEXT: - - - - - - 1.00 - - tbl z28.s, { z29.s, z30.s }, z31.s +# CHECK-NEXT: - - - - - - 1.00 - - tbl z31.b, { z31.b }, z31.b +# CHECK-NEXT: - - - - - - 1.00 - - tbl z31.d, { z31.d }, z31.d +# CHECK-NEXT: - - - - - - 1.00 - - tbl z31.h, { z31.h }, z31.h +# CHECK-NEXT: - - - - - - 1.00 - - tbl z31.s, { z31.s }, z31.s +# CHECK-NEXT: - - - - - - 1.00 - - tbx z31.b, z31.b, z31.b +# CHECK-NEXT: - - - - - - 1.00 - - tbx z31.d, z31.d, z31.d +# CHECK-NEXT: - - - - - - 1.00 - - tbx z31.h, z31.h, z31.h +# CHECK-NEXT: - - - - - - 1.00 - - tbx z31.s, z31.s, z31.s +# CHECK-NEXT: - - - - - - 1.00 - - trn1 p15.b, p15.b, p15.b +# CHECK-NEXT: - - - - - - 1.00 - - trn1 p15.d, p15.d, p15.d +# CHECK-NEXT: - - - - - - 1.00 - - trn1 p15.h, p15.h, p15.h +# CHECK-NEXT: - - - - - - 1.00 - - trn1 p15.s, p15.s, p15.s +# CHECK-NEXT: - - - - - - 1.00 - - trn1 z31.b, z31.b, z31.b +# CHECK-NEXT: - - - - - - 1.00 - - trn1 z31.d, z31.d, z31.d +# CHECK-NEXT: - - - - - - 1.00 - - trn1 z31.h, z31.h, z31.h +# CHECK-NEXT: - - - - - - 1.00 - - trn1 z31.s, z31.s, z31.s +# CHECK-NEXT: - - - - - - 1.00 - - trn2 p15.b, p15.b, p15.b +# CHECK-NEXT: - - - - - - 1.00 - - trn2 p15.d, p15.d, p15.d +# CHECK-NEXT: - - - - - - 1.00 - - trn2 p15.h, p15.h, p15.h +# CHECK-NEXT: - - - - - - 1.00 - - trn2 p15.s, p15.s, p15.s +# CHECK-NEXT: - - - - - - 1.00 - - trn2 z31.b, z31.b, z31.b +# CHECK-NEXT: - - - - - - 1.00 - - trn2 z31.d, z31.d, z31.d +# CHECK-NEXT: - - - - - - 1.00 - - trn2 z31.h, z31.h, z31.h +# CHECK-NEXT: - - - - - - 1.00 - - trn2 z31.s, z31.s, z31.s +# CHECK-NEXT: - - - - - - 2.00 - - uaba z0.b, z1.b, z31.b +# CHECK-NEXT: - - - - - - 2.00 - - uaba z0.d, z1.d, z31.d +# CHECK-NEXT: - - - - - - 2.00 - - uaba z0.h, z1.h, z31.h +# CHECK-NEXT: - - - - - - 2.00 - - uaba z0.s, z1.s, z31.s +# CHECK-NEXT: - - - - - - 2.00 - - uabalb z0.d, z1.s, z31.s +# CHECK-NEXT: - - - - - - 2.00 - - uabalb z0.h, z1.b, z31.b +# CHECK-NEXT: - - - - - - 2.00 - - uabalb z0.s, z1.h, z31.h +# CHECK-NEXT: - - - - - - 2.00 - - uabalt z0.d, z1.s, z31.s +# CHECK-NEXT: - - - - - - 2.00 - - uabalt z0.h, z1.b, z31.b +# CHECK-NEXT: - - - - - - 2.00 - - uabalt z0.s, z1.h, z31.h +# CHECK-NEXT: - - - - - - 1.00 - - uabd z31.b, p7/m, z31.b, z31.b +# CHECK-NEXT: - - - - - - 1.00 - - uabd z31.d, p7/m, z31.d, z31.d +# CHECK-NEXT: - - - - - - 1.00 - - uabd z31.h, p7/m, z31.h, z31.h +# CHECK-NEXT: - - - - - - 1.00 - - uabd z31.s, p7/m, z31.s, z31.s +# CHECK-NEXT: - - - - - - 1.00 - - uabdlb z0.h, z1.b, z2.b +# CHECK-NEXT: - - - - - - 1.00 - - uabdlb z29.s, z30.h, z31.h +# CHECK-NEXT: - - - - - - 1.00 - - uabdlb z31.d, z31.s, z31.s +# CHECK-NEXT: - - - - - - 1.00 - - uabdlt z0.h, z1.b, z2.b +# CHECK-NEXT: - - - - - - 1.00 - - uabdlt z29.s, z30.h, z31.h +# CHECK-NEXT: - - - - - - 1.00 - - uabdlt z31.d, z31.s, z31.s +# CHECK-NEXT: - - - - - - 2.00 - - uadalp z0.h, p0/m, z1.b +# CHECK-NEXT: - - - - - - 2.00 - - uadalp z29.s, p0/m, z30.h +# CHECK-NEXT: - - - - - - 2.00 - - uadalp z30.d, p7/m, z31.s +# CHECK-NEXT: - - - - - - 1.00 - - uaddlb z0.h, z1.b, z2.b +# CHECK-NEXT: - - - - - - 1.00 - - uaddlb z29.s, z30.h, z31.h +# CHECK-NEXT: - - - - - - 1.00 - - uaddlb z31.d, z31.s, z31.s +# CHECK-NEXT: - - - - - - 1.00 - - uaddlt z0.h, z1.b, z2.b +# CHECK-NEXT: - - - - - - 1.00 - - uaddlt z29.s, z30.h, z31.h +# CHECK-NEXT: - - - - - - 1.00 - - uaddlt z31.d, z31.s, z31.s +# CHECK-NEXT: - - - - - - 1.00 - - uaddv d0, p7, z31.b +# CHECK-NEXT: - - - - - - 1.00 - - uaddv d0, p7, z31.d +# CHECK-NEXT: - - - - - - 1.00 - - uaddv d0, p7, z31.h +# CHECK-NEXT: - - - - - - 1.00 - - uaddv d0, p7, z31.s +# CHECK-NEXT: - - - - - - 1.00 - - uaddwb z0.h, z1.h, z2.b +# CHECK-NEXT: - - - - - - 1.00 - - uaddwb z29.s, z30.s, z31.h +# CHECK-NEXT: - - - - - - 1.00 - - uaddwb z31.d, z31.d, z31.s +# CHECK-NEXT: - - - - - - 1.00 - - uaddwt z0.h, z1.h, z2.b +# CHECK-NEXT: - - - - - - 1.00 - - uaddwt z29.s, z30.s, z31.h +# CHECK-NEXT: - - - - - - 1.00 - - uaddwt z31.d, z31.d, z31.s +# CHECK-NEXT: - - - - - - 1.00 - - ucvtf z0.d, p0/m, z0.d +# CHECK-NEXT: - - - - - - 1.00 - - ucvtf z0.d, p0/m, z0.s +# CHECK-NEXT: - - - - - - 1.00 - - ucvtf z0.h, p0/m, z0.d +# CHECK-NEXT: - - - - - - 1.00 - - ucvtf z0.h, p0/m, z0.h +# CHECK-NEXT: - - - - - - 1.00 - - ucvtf z0.h, p0/m, z0.s +# CHECK-NEXT: - - - - - - 1.00 - - ucvtf z0.s, p0/m, z0.d +# CHECK-NEXT: - - - - - - 1.00 - - ucvtf z0.s, p0/m, z0.s +# CHECK-NEXT: - - - - - - - - 23.00 udiv z0.d, p7/m, z0.d, z31.d +# CHECK-NEXT: - - - - - - - - 12.00 udiv z0.s, p7/m, z0.s, z31.s +# CHECK-NEXT: - - - - - - - - 23.00 udivr z0.d, p7/m, z0.d, z31.d +# CHECK-NEXT: - - - - - - - - 12.00 udivr z0.s, p7/m, z0.s, z31.s +# CHECK-NEXT: - - - - - - - 1.00 - udot z0.d, z1.h, z15.h[1] +# CHECK-NEXT: - - - - - - - 1.00 - udot z0.d, z1.h, z31.h +# CHECK-NEXT: - - - - - - - 1.00 - udot z0.s, z1.b, z31.b +# CHECK-NEXT: - - - - - - - 1.00 - udot z0.s, z1.b, z7.b[3] +# CHECK-NEXT: - - - - - - 1.00 - - uhadd z0.b, p0/m, z0.b, z1.b +# CHECK-NEXT: - - - - - - 1.00 - - uhadd z0.h, p0/m, z0.h, z1.h +# CHECK-NEXT: - - - - - - 1.00 - - uhadd z29.s, p7/m, z29.s, z30.s +# CHECK-NEXT: - - - - - - 1.00 - - uhadd z31.d, p7/m, z31.d, z30.d +# CHECK-NEXT: - - - - - - 1.00 - - uhsub z0.b, p0/m, z0.b, z1.b +# CHECK-NEXT: - - - - - - 1.00 - - uhsub z0.h, p0/m, z0.h, z1.h +# CHECK-NEXT: - - - - - - 1.00 - - uhsub z29.s, p7/m, z29.s, z30.s +# CHECK-NEXT: - - - - - - 1.00 - - uhsub z31.d, p7/m, z31.d, z30.d +# CHECK-NEXT: - - - - - - 1.00 - - uhsubr z0.b, p0/m, z0.b, z1.b +# CHECK-NEXT: - - - - - - 1.00 - - uhsubr z0.h, p0/m, z0.h, z1.h +# CHECK-NEXT: - - - - - - 1.00 - - uhsubr z29.s, p7/m, z29.s, z30.s +# CHECK-NEXT: - - - - - - 1.00 - - uhsubr z31.d, p7/m, z31.d, z30.d +# CHECK-NEXT: - - - - - - 1.00 - - umax z0.b, z0.b, #0 +# CHECK-NEXT: - - - - - - 1.00 - - umax z31.b, p7/m, z31.b, z31.b +# CHECK-NEXT: - - - - - - 1.00 - - umax z31.b, z31.b, #255 +# CHECK-NEXT: - - - - - - 1.00 - - umax z31.d, p7/m, z31.d, z31.d +# CHECK-NEXT: - - - - - - 1.00 - - umax z31.h, p7/m, z31.h, z31.h +# CHECK-NEXT: - - - - - - 1.00 - - umax z31.s, p7/m, z31.s, z31.s +# CHECK-NEXT: - - - - - - 1.00 - - umaxp z0.b, p0/m, z0.b, z1.b +# CHECK-NEXT: - - - - - - 1.00 - - umaxp z0.h, p0/m, z0.h, z1.h +# CHECK-NEXT: - - - - - - 1.00 - - umaxp z29.s, p7/m, z29.s, z30.s +# CHECK-NEXT: - - - - - - 1.00 - - umaxp z31.d, p7/m, z31.d, z30.d +# CHECK-NEXT: - - - - - - 1.00 - - umaxv b0, p7, z31.b +# CHECK-NEXT: - - - - - - 1.00 - - umaxv d0, p7, z31.d +# CHECK-NEXT: - - - - - - 1.00 - - umaxv h0, p7, z31.h +# CHECK-NEXT: - - - - - - 1.00 - - umaxv s0, p7, z31.s +# CHECK-NEXT: - - - - - - 1.00 - - umin z0.b, z0.b, #0 +# CHECK-NEXT: - - - - - - 1.00 - - umin z31.b, p7/m, z31.b, z31.b +# CHECK-NEXT: - - - - - - 1.00 - - umin z31.b, z31.b, #255 +# CHECK-NEXT: - - - - - - 1.00 - - umin z31.d, p7/m, z31.d, z31.d +# CHECK-NEXT: - - - - - - 1.00 - - umin z31.h, p7/m, z31.h, z31.h +# CHECK-NEXT: - - - - - - 1.00 - - umin z31.s, p7/m, z31.s, z31.s +# CHECK-NEXT: - - - - - - 1.00 - - uminp z0.b, p0/m, z0.b, z1.b +# CHECK-NEXT: - - - - - - 1.00 - - uminp z0.h, p0/m, z0.h, z1.h +# CHECK-NEXT: - - - - - - 1.00 - - uminp z29.s, p7/m, z29.s, z30.s +# CHECK-NEXT: - - - - - - 1.00 - - uminp z31.d, p7/m, z31.d, z30.d +# CHECK-NEXT: - - - - - - 1.00 - - uminv b0, p7, z31.b +# CHECK-NEXT: - - - - - - 1.00 - - uminv d0, p7, z31.d +# CHECK-NEXT: - - - - - - 1.00 - - uminv h0, p7, z31.h +# CHECK-NEXT: - - - - - - 1.00 - - uminv s0, p7, z31.s +# CHECK-NEXT: - - - - - - - 1.00 - umlalb z0.d, z1.s, z15.s[1] +# CHECK-NEXT: - - - - - - - 1.00 - umlalb z0.d, z1.s, z31.s +# CHECK-NEXT: - - - - - - - 1.00 - umlalb z0.h, z1.b, z31.b +# CHECK-NEXT: - - - - - - - 1.00 - umlalb z0.s, z1.h, z31.h +# CHECK-NEXT: - - - - - - - 1.00 - umlalb z0.s, z1.h, z7.h[7] +# CHECK-NEXT: - - - - - - - 1.00 - umlalt z0.d, z1.s, z15.s[1] +# CHECK-NEXT: - - - - - - - 1.00 - umlalt z0.d, z1.s, z31.s +# CHECK-NEXT: - - - - - - - 1.00 - umlalt z0.h, z1.b, z31.b +# CHECK-NEXT: - - - - - - - 1.00 - umlalt z0.s, z1.h, z31.h +# CHECK-NEXT: - - - - - - - 1.00 - umlalt z0.s, z1.h, z7.h[7] +# CHECK-NEXT: - - - - - - - 1.00 - umlslb z0.d, z1.s, z15.s[1] +# CHECK-NEXT: - - - - - - - 1.00 - umlslb z0.d, z1.s, z31.s +# CHECK-NEXT: - - - - - - - 1.00 - umlslb z0.h, z1.b, z31.b +# CHECK-NEXT: - - - - - - - 1.00 - umlslb z0.s, z1.h, z31.h +# CHECK-NEXT: - - - - - - - 1.00 - umlslb z0.s, z1.h, z7.h[7] +# CHECK-NEXT: - - - - - - - 1.00 - umlslt z0.d, z1.s, z15.s[1] +# CHECK-NEXT: - - - - - - - 1.00 - umlslt z0.d, z1.s, z31.s +# CHECK-NEXT: - - - - - - - 1.00 - umlslt z0.h, z1.b, z31.b +# CHECK-NEXT: - - - - - - - 1.00 - umlslt z0.s, z1.h, z31.h +# CHECK-NEXT: - - - - - - - 1.00 - umlslt z0.s, z1.h, z7.h[7] +# CHECK-NEXT: - - - - - - - 1.00 - ummla z0.s, z1.b, z2.b +# CHECK-NEXT: - - - - - - - 1.00 - umulh z0.b, p7/m, z0.b, z31.b +# CHECK-NEXT: - - - - - - - 1.00 - umulh z0.b, z1.b, z2.b +# CHECK-NEXT: - - - - - - - 1.00 - umulh z0.d, p7/m, z0.d, z31.d +# CHECK-NEXT: - - - - - - - 1.00 - umulh z0.h, p7/m, z0.h, z31.h +# CHECK-NEXT: - - - - - - - 1.00 - umulh z0.h, z1.h, z2.h +# CHECK-NEXT: - - - - - - - 1.00 - umulh z0.s, p7/m, z0.s, z31.s +# CHECK-NEXT: - - - - - - - 1.00 - umulh z29.s, z30.s, z31.s +# CHECK-NEXT: - - - - - - - 1.00 - umulh z31.d, z31.d, z31.d +# CHECK-NEXT: - - - - - - - 1.00 - umullb z0.d, z1.s, z15.s[1] +# CHECK-NEXT: - - - - - - - 1.00 - umullb z0.h, z1.b, z2.b +# CHECK-NEXT: - - - - - - - 1.00 - umullb z0.s, z1.h, z7.h[7] +# CHECK-NEXT: - - - - - - - 1.00 - umullb z29.s, z30.h, z31.h +# CHECK-NEXT: - - - - - - - 1.00 - umullb z31.d, z31.s, z31.s +# CHECK-NEXT: - - - - - - - 1.00 - umullt z0.d, z1.s, z15.s[1] +# CHECK-NEXT: - - - - - - - 1.00 - umullt z0.h, z1.b, z2.b +# CHECK-NEXT: - - - - - - - 1.00 - umullt z0.s, z1.h, z7.h[7] +# CHECK-NEXT: - - - - - - - 1.00 - umullt z29.s, z30.h, z31.h +# CHECK-NEXT: - - - - - - - 1.00 - umullt z31.d, z31.s, z31.s +# CHECK-NEXT: - - - - - - 1.00 - - uqadd z0.b, p0/m, z0.b, z1.b +# CHECK-NEXT: - - - - - - 1.00 - - uqadd z0.b, z0.b, #0 +# CHECK-NEXT: - - - - - - 1.00 - - uqadd z0.b, z0.b, z0.b +# CHECK-NEXT: - - - - - - 1.00 - - uqadd z0.d, z0.d, #0 +# CHECK-NEXT: - - - - - - 1.00 - - uqadd z0.d, z0.d, #0, lsl #8 +# CHECK-NEXT: - - - - - - 1.00 - - uqadd z0.d, z0.d, z0.d +# CHECK-NEXT: - - - - - - 1.00 - - uqadd z0.h, p0/m, z0.h, z1.h +# CHECK-NEXT: - - - - - - 1.00 - - uqadd z0.h, z0.h, #0 +# CHECK-NEXT: - - - - - - 1.00 - - uqadd z0.h, z0.h, #0, lsl #8 +# CHECK-NEXT: - - - - - - 1.00 - - uqadd z0.h, z0.h, z0.h +# CHECK-NEXT: - - - - - - 1.00 - - uqadd z0.s, z0.s, #0 +# CHECK-NEXT: - - - - - - 1.00 - - uqadd z0.s, z0.s, #0, lsl #8 +# CHECK-NEXT: - - - - - - 1.00 - - uqadd z0.s, z0.s, z0.s +# CHECK-NEXT: - - - - - - 1.00 - - uqadd z29.s, p7/m, z29.s, z30.s +# CHECK-NEXT: - - - - - - 1.00 - - uqadd z31.b, z31.b, #255 +# CHECK-NEXT: - - - - - - 1.00 - - uqadd z31.d, p7/m, z31.d, z30.d +# CHECK-NEXT: - - - - - - 1.00 - - uqadd z31.d, z31.d, #65280 +# CHECK-NEXT: - - - - - - 1.00 - - uqadd z31.h, z31.h, #65280 +# CHECK-NEXT: - - - - - - 1.00 - - uqadd z31.s, z31.s, #65280 +# CHECK-NEXT: 1.00 - - - - - - - - uqdecb w0 +# CHECK-NEXT: 1.00 - - - - - - - - uqdecb w0, all, mul #16 +# CHECK-NEXT: 1.00 - - - - - - - - uqdecb w0, pow2 +# CHECK-NEXT: 1.00 - - - - - - - - uqdecb w0, pow2, mul #16 +# CHECK-NEXT: 1.00 - - - - - - - - uqdecb x0 +# CHECK-NEXT: 1.00 - - - - - - - - uqdecb x0, #14 +# CHECK-NEXT: 1.00 - - - - - - - - uqdecb x0, all, mul #16 +# CHECK-NEXT: 1.00 - - - - - - - - uqdecb x0, pow2 +# CHECK-NEXT: 1.00 - - - - - - - - uqdecb x0, vl1 +# CHECK-NEXT: 1.00 - - - - - - - - uqdecd w0 +# CHECK-NEXT: 1.00 - - - - - - - - uqdecd w0, all, mul #16 +# CHECK-NEXT: 1.00 - - - - - - - - uqdecd w0, pow2 +# CHECK-NEXT: 1.00 - - - - - - - - uqdecd w0, pow2, mul #16 +# CHECK-NEXT: 1.00 - - - - - - - - uqdecd x0 +# CHECK-NEXT: 1.00 - - - - - - - - uqdecd x0, #14 +# CHECK-NEXT: 1.00 - - - - - - - - uqdecd x0, all, mul #16 +# CHECK-NEXT: 1.00 - - - - - - - - uqdecd x0, pow2 +# CHECK-NEXT: 1.00 - - - - - - - - uqdecd x0, vl1 +# CHECK-NEXT: - - - - - - 1.00 - - uqdecd z0.d +# CHECK-NEXT: - - - - - - 1.00 - - uqdecd z0.d, all, mul #16 +# CHECK-NEXT: - - - - - - 1.00 - - uqdecd z0.d, pow2 +# CHECK-NEXT: - - - - - - 1.00 - - uqdecd z0.d, pow2, mul #16 +# CHECK-NEXT: 1.00 - - - - - - - - uqdech w0 +# CHECK-NEXT: 1.00 - - - - - - - - uqdech w0, all, mul #16 +# CHECK-NEXT: 1.00 - - - - - - - - uqdech w0, pow2 +# CHECK-NEXT: 1.00 - - - - - - - - uqdech w0, pow2, mul #16 +# CHECK-NEXT: 1.00 - - - - - - - - uqdech x0 +# CHECK-NEXT: 1.00 - - - - - - - - uqdech x0, #14 +# CHECK-NEXT: 1.00 - - - - - - - - uqdech x0, all, mul #16 +# CHECK-NEXT: 1.00 - - - - - - - - uqdech x0, pow2 +# CHECK-NEXT: 1.00 - - - - - - - - uqdech x0, vl1 +# CHECK-NEXT: - - - - - - 1.00 - - uqdech z0.h +# CHECK-NEXT: - - - - - - 1.00 - - uqdech z0.h, all, mul #16 +# CHECK-NEXT: - - - - - - 1.00 - - uqdech z0.h, pow2 +# CHECK-NEXT: - - - - - - 1.00 - - uqdech z0.h, pow2, mul #16 +# CHECK-NEXT: - - - - - - 1.00 - - uqdecp wzr, p15.b +# CHECK-NEXT: - - - - - - 1.00 - - uqdecp wzr, p15.d +# CHECK-NEXT: - - - - - - 1.00 - - uqdecp wzr, p15.h +# CHECK-NEXT: - - - - - - 1.00 - - uqdecp wzr, p15.s +# CHECK-NEXT: - - - - - - 1.00 - - uqdecp x0, p0.b +# CHECK-NEXT: - - - - - - 1.00 - - uqdecp x0, p0.d +# CHECK-NEXT: - - - - - - 1.00 - - uqdecp x0, p0.h +# CHECK-NEXT: - - - - - - 1.00 - - uqdecp x0, p0.s +# CHECK-NEXT: - - - - - - 1.00 - - uqdecp z0.d, p0.d +# CHECK-NEXT: - - - - - - 1.00 - - uqdecp z0.h, p0.h +# CHECK-NEXT: - - - - - - 1.00 - - uqdecp z0.s, p0.s +# CHECK-NEXT: 1.00 - - - - - - - - uqdecw w0 +# CHECK-NEXT: 1.00 - - - - - - - - uqdecw w0, all, mul #16 +# CHECK-NEXT: 1.00 - - - - - - - - uqdecw w0, pow2 +# CHECK-NEXT: 1.00 - - - - - - - - uqdecw w0, pow2, mul #16 +# CHECK-NEXT: 1.00 - - - - - - - - uqdecw x0 +# CHECK-NEXT: 1.00 - - - - - - - - uqdecw x0, #14 +# CHECK-NEXT: 1.00 - - - - - - - - uqdecw x0, all, mul #16 +# CHECK-NEXT: 1.00 - - - - - - - - uqdecw x0, pow2 +# CHECK-NEXT: 1.00 - - - - - - - - uqdecw x0, vl1 +# CHECK-NEXT: - - - - - - 1.00 - - uqdecw z0.s +# CHECK-NEXT: - - - - - - 1.00 - - uqdecw z0.s, all, mul #16 +# CHECK-NEXT: - - - - - - 1.00 - - uqdecw z0.s, pow2 +# CHECK-NEXT: - - - - - - 1.00 - - uqdecw z0.s, pow2, mul #16 +# CHECK-NEXT: 1.00 - - - - - - - - uqincb w0 +# CHECK-NEXT: 1.00 - - - - - - - - uqincb w0, all, mul #16 +# CHECK-NEXT: 1.00 - - - - - - - - uqincb w0, pow2 +# CHECK-NEXT: 1.00 - - - - - - - - uqincb w0, pow2, mul #16 +# CHECK-NEXT: 1.00 - - - - - - - - uqincb x0 +# CHECK-NEXT: 1.00 - - - - - - - - uqincb x0, #14 +# CHECK-NEXT: 1.00 - - - - - - - - uqincb x0, all, mul #16 +# CHECK-NEXT: 1.00 - - - - - - - - uqincb x0, pow2 +# CHECK-NEXT: 1.00 - - - - - - - - uqincb x0, vl1 +# CHECK-NEXT: 1.00 - - - - - - - - uqincd w0 +# CHECK-NEXT: 1.00 - - - - - - - - uqincd w0, all, mul #16 +# CHECK-NEXT: 1.00 - - - - - - - - uqincd w0, pow2 +# CHECK-NEXT: 1.00 - - - - - - - - uqincd w0, pow2, mul #16 +# CHECK-NEXT: 1.00 - - - - - - - - uqincd x0 +# CHECK-NEXT: 1.00 - - - - - - - - uqincd x0, #14 +# CHECK-NEXT: 1.00 - - - - - - - - uqincd x0, all, mul #16 +# CHECK-NEXT: 1.00 - - - - - - - - uqincd x0, pow2 +# CHECK-NEXT: 1.00 - - - - - - - - uqincd x0, vl1 +# CHECK-NEXT: - - - - - - 1.00 - - uqincd z0.d +# CHECK-NEXT: - - - - - - 1.00 - - uqincd z0.d, all, mul #16 +# CHECK-NEXT: - - - - - - 1.00 - - uqincd z0.d, pow2 +# CHECK-NEXT: - - - - - - 1.00 - - uqincd z0.d, pow2, mul #16 +# CHECK-NEXT: 1.00 - - - - - - - - uqinch w0 +# CHECK-NEXT: 1.00 - - - - - - - - uqinch w0, all, mul #16 +# CHECK-NEXT: 1.00 - - - - - - - - uqinch w0, pow2 +# CHECK-NEXT: 1.00 - - - - - - - - uqinch w0, pow2, mul #16 +# CHECK-NEXT: 1.00 - - - - - - - - uqinch x0 +# CHECK-NEXT: 1.00 - - - - - - - - uqinch x0, #14 +# CHECK-NEXT: 1.00 - - - - - - - - uqinch x0, all, mul #16 +# CHECK-NEXT: 1.00 - - - - - - - - uqinch x0, pow2 +# CHECK-NEXT: 1.00 - - - - - - - - uqinch x0, vl1 +# CHECK-NEXT: - - - - - - 1.00 - - uqinch z0.h +# CHECK-NEXT: - - - - - - 1.00 - - uqinch z0.h, all, mul #16 +# CHECK-NEXT: - - - - - - 1.00 - - uqinch z0.h, pow2 +# CHECK-NEXT: - - - - - - 1.00 - - uqinch z0.h, pow2, mul #16 +# CHECK-NEXT: - - - - - - 1.00 - - uqincp wzr, p15.b +# CHECK-NEXT: - - - - - - 1.00 - - uqincp wzr, p15.d +# CHECK-NEXT: - - - - - - 1.00 - - uqincp wzr, p15.h +# CHECK-NEXT: - - - - - - 1.00 - - uqincp wzr, p15.s +# CHECK-NEXT: - - - - - - 1.00 - - uqincp x0, p0.b +# CHECK-NEXT: - - - - - - 1.00 - - uqincp x0, p0.d +# CHECK-NEXT: - - - - - - 1.00 - - uqincp x0, p0.h +# CHECK-NEXT: - - - - - - 1.00 - - uqincp x0, p0.s +# CHECK-NEXT: - - - - - - 1.00 - - uqincp z0.d, p0.d +# CHECK-NEXT: - - - - - - 1.00 - - uqincp z0.h, p0.h +# CHECK-NEXT: - - - - - - 1.00 - - uqincp z0.s, p0.s +# CHECK-NEXT: 1.00 - - - - - - - - uqincw w0 +# CHECK-NEXT: 1.00 - - - - - - - - uqincw w0, all, mul #16 +# CHECK-NEXT: 1.00 - - - - - - - - uqincw w0, pow2 +# CHECK-NEXT: 1.00 - - - - - - - - uqincw w0, pow2, mul #16 +# CHECK-NEXT: 1.00 - - - - - - - - uqincw x0 +# CHECK-NEXT: 1.00 - - - - - - - - uqincw x0, #14 +# CHECK-NEXT: 1.00 - - - - - - - - uqincw x0, all, mul #16 +# CHECK-NEXT: 1.00 - - - - - - - - uqincw x0, pow2 +# CHECK-NEXT: 1.00 - - - - - - - - uqincw x0, vl1 +# CHECK-NEXT: - - - - - - 1.00 - - uqincw z0.s +# CHECK-NEXT: - - - - - - 1.00 - - uqincw z0.s, all, mul #16 +# CHECK-NEXT: - - - - - - 1.00 - - uqincw z0.s, pow2 +# CHECK-NEXT: - - - - - - 1.00 - - uqincw z0.s, pow2, mul #16 +# CHECK-NEXT: - - - - - - 1.00 - - uqrshl z0.b, p0/m, z0.b, z1.b +# CHECK-NEXT: - - - - - - 1.00 - - uqrshl z0.h, p0/m, z0.h, z1.h +# CHECK-NEXT: - - - - - - 1.00 - - uqrshl z29.s, p7/m, z29.s, z30.s +# CHECK-NEXT: - - - - - - 1.00 - - uqrshl z31.d, p7/m, z31.d, z30.d +# CHECK-NEXT: - - - - - - 1.00 - - uqrshlr z0.b, p0/m, z0.b, z1.b +# CHECK-NEXT: - - - - - - 1.00 - - uqrshlr z0.h, p0/m, z0.h, z1.h +# CHECK-NEXT: - - - - - - 1.00 - - uqrshlr z29.s, p7/m, z29.s, z30.s +# CHECK-NEXT: - - - - - - 1.00 - - uqrshlr z31.d, p7/m, z31.d, z30.d +# CHECK-NEXT: - - - - - - 1.00 - - uqrshrnb z0.b, z0.h, #1 +# CHECK-NEXT: - - - - - - 1.00 - - uqrshrnb z0.h, z0.s, #1 +# CHECK-NEXT: - - - - - - 1.00 - - uqrshrnb z0.s, z0.d, #1 +# CHECK-NEXT: - - - - - - 1.00 - - uqrshrnb z31.b, z31.h, #8 +# CHECK-NEXT: - - - - - - 1.00 - - uqrshrnb z31.h, z31.s, #16 +# CHECK-NEXT: - - - - - - 1.00 - - uqrshrnb z31.s, z31.d, #32 +# CHECK-NEXT: - - - - - - 1.00 - - uqrshrnt z0.b, z0.h, #1 +# CHECK-NEXT: - - - - - - 1.00 - - uqrshrnt z0.h, z0.s, #1 +# CHECK-NEXT: - - - - - - 1.00 - - uqrshrnt z0.s, z0.d, #1 +# CHECK-NEXT: - - - - - - 1.00 - - uqrshrnt z31.b, z31.h, #8 +# CHECK-NEXT: - - - - - - 1.00 - - uqrshrnt z31.h, z31.s, #16 +# CHECK-NEXT: - - - - - - 1.00 - - uqrshrnt z31.s, z31.d, #32 +# CHECK-NEXT: - - - - - - 1.00 - - uqshl z0.b, p0/m, z0.b, #0 +# CHECK-NEXT: - - - - - - 1.00 - - uqshl z0.b, p0/m, z0.b, z1.b +# CHECK-NEXT: - - - - - - 1.00 - - uqshl z0.d, p0/m, z0.d, #0 +# CHECK-NEXT: - - - - - - 1.00 - - uqshl z0.h, p0/m, z0.h, #0 +# CHECK-NEXT: - - - - - - 1.00 - - uqshl z0.h, p0/m, z0.h, z1.h +# CHECK-NEXT: - - - - - - 1.00 - - uqshl z0.s, p0/m, z0.s, #0 +# CHECK-NEXT: - - - - - - 1.00 - - uqshl z29.s, p7/m, z29.s, z30.s +# CHECK-NEXT: - - - - - - 1.00 - - uqshl z31.b, p0/m, z31.b, #7 +# CHECK-NEXT: - - - - - - 1.00 - - uqshl z31.d, p0/m, z31.d, #63 +# CHECK-NEXT: - - - - - - 1.00 - - uqshl z31.d, p7/m, z31.d, z30.d +# CHECK-NEXT: - - - - - - 1.00 - - uqshl z31.h, p0/m, z31.h, #15 +# CHECK-NEXT: - - - - - - 1.00 - - uqshl z31.s, p0/m, z31.s, #31 +# CHECK-NEXT: - - - - - - 1.00 - - uqshlr z0.b, p0/m, z0.b, z1.b +# CHECK-NEXT: - - - - - - 1.00 - - uqshlr z0.h, p0/m, z0.h, z1.h +# CHECK-NEXT: - - - - - - 1.00 - - uqshlr z29.s, p7/m, z29.s, z30.s +# CHECK-NEXT: - - - - - - 1.00 - - uqshlr z31.d, p7/m, z31.d, z30.d +# CHECK-NEXT: - - - - - - 1.00 - - uqshrnb z0.b, z0.h, #1 +# CHECK-NEXT: - - - - - - 1.00 - - uqshrnb z0.h, z0.s, #1 +# CHECK-NEXT: - - - - - - 1.00 - - uqshrnb z0.s, z0.d, #1 +# CHECK-NEXT: - - - - - - 1.00 - - uqshrnb z31.b, z31.h, #8 +# CHECK-NEXT: - - - - - - 1.00 - - uqshrnb z31.h, z31.s, #16 +# CHECK-NEXT: - - - - - - 1.00 - - uqshrnb z31.s, z31.d, #32 +# CHECK-NEXT: - - - - - - 1.00 - - uqshrnt z0.b, z0.h, #1 +# CHECK-NEXT: - - - - - - 1.00 - - uqshrnt z0.h, z0.s, #1 +# CHECK-NEXT: - - - - - - 1.00 - - uqshrnt z0.s, z0.d, #1 +# CHECK-NEXT: - - - - - - 1.00 - - uqshrnt z31.b, z31.h, #8 +# CHECK-NEXT: - - - - - - 1.00 - - uqshrnt z31.h, z31.s, #16 +# CHECK-NEXT: - - - - - - 1.00 - - uqshrnt z31.s, z31.d, #32 +# CHECK-NEXT: - - - - - - 1.00 - - uqsub z0.b, p0/m, z0.b, z1.b +# CHECK-NEXT: - - - - - - 1.00 - - uqsub z0.b, z0.b, #0 +# CHECK-NEXT: - - - - - - 1.00 - - uqsub z0.b, z0.b, z0.b +# CHECK-NEXT: - - - - - - 1.00 - - uqsub z0.d, z0.d, #0 +# CHECK-NEXT: - - - - - - 1.00 - - uqsub z0.d, z0.d, #0, lsl #8 +# CHECK-NEXT: - - - - - - 1.00 - - uqsub z0.d, z0.d, z0.d +# CHECK-NEXT: - - - - - - 1.00 - - uqsub z0.h, p0/m, z0.h, z1.h +# CHECK-NEXT: - - - - - - 1.00 - - uqsub z0.h, z0.h, #0 +# CHECK-NEXT: - - - - - - 1.00 - - uqsub z0.h, z0.h, #0, lsl #8 +# CHECK-NEXT: - - - - - - 1.00 - - uqsub z0.h, z0.h, z0.h +# CHECK-NEXT: - - - - - - 1.00 - - uqsub z0.s, z0.s, #0 +# CHECK-NEXT: - - - - - - 1.00 - - uqsub z0.s, z0.s, #0, lsl #8 +# CHECK-NEXT: - - - - - - 1.00 - - uqsub z0.s, z0.s, z0.s +# CHECK-NEXT: - - - - - - 1.00 - - uqsub z29.s, p7/m, z29.s, z30.s +# CHECK-NEXT: - - - - - - 1.00 - - uqsub z31.b, z31.b, #255 +# CHECK-NEXT: - - - - - - 1.00 - - uqsub z31.d, p7/m, z31.d, z30.d +# CHECK-NEXT: - - - - - - 1.00 - - uqsub z31.d, z31.d, #65280 +# CHECK-NEXT: - - - - - - 1.00 - - uqsub z31.h, z31.h, #65280 +# CHECK-NEXT: - - - - - - 1.00 - - uqsub z31.s, z31.s, #65280 +# CHECK-NEXT: - - - - - - 1.00 - - uqsubr z0.b, p0/m, z0.b, z1.b +# CHECK-NEXT: - - - - - - 1.00 - - uqsubr z0.h, p0/m, z0.h, z1.h +# CHECK-NEXT: - - - - - - 1.00 - - uqsubr z29.s, p7/m, z29.s, z30.s +# CHECK-NEXT: - - - - - - 1.00 - - uqsubr z31.d, p7/m, z31.d, z30.d +# CHECK-NEXT: - - - - - - 1.00 - - uqxtnb z0.b, z31.h +# CHECK-NEXT: - - - - - - 1.00 - - uqxtnb z0.h, z31.s +# CHECK-NEXT: - - - - - - 1.00 - - uqxtnb z0.s, z31.d +# CHECK-NEXT: - - - - - - 1.00 - - uqxtnt z0.b, z31.h +# CHECK-NEXT: - - - - - - 1.00 - - uqxtnt z0.h, z31.s +# CHECK-NEXT: - - - - - - 1.00 - - uqxtnt z0.s, z31.d +# CHECK-NEXT: - - - - - - - 1.00 - urecpe z31.s, p7/m, z31.s +# CHECK-NEXT: - - - - - - 1.00 - - urhadd z0.b, p0/m, z0.b, z1.b +# CHECK-NEXT: - - - - - - 1.00 - - urhadd z0.h, p0/m, z0.h, z1.h +# CHECK-NEXT: - - - - - - 1.00 - - urhadd z29.s, p7/m, z29.s, z30.s +# CHECK-NEXT: - - - - - - 1.00 - - urhadd z31.d, p7/m, z31.d, z30.d +# CHECK-NEXT: - - - - - - 1.00 - - urshl z0.b, p0/m, z0.b, z1.b +# CHECK-NEXT: - - - - - - 1.00 - - urshl z0.h, p0/m, z0.h, z1.h +# CHECK-NEXT: - - - - - - 1.00 - - urshl z29.s, p7/m, z29.s, z30.s +# CHECK-NEXT: - - - - - - 1.00 - - urshl z31.d, p7/m, z31.d, z30.d +# CHECK-NEXT: - - - - - - 1.00 - - urshlr z0.b, p0/m, z0.b, z1.b +# CHECK-NEXT: - - - - - - 1.00 - - urshlr z0.h, p0/m, z0.h, z1.h +# CHECK-NEXT: - - - - - - 1.00 - - urshlr z29.s, p7/m, z29.s, z30.s +# CHECK-NEXT: - - - - - - 1.00 - - urshlr z31.d, p7/m, z31.d, z30.d +# CHECK-NEXT: - - - - - - 1.00 - - urshr z0.b, p0/m, z0.b, #1 +# CHECK-NEXT: - - - - - - 1.00 - - urshr z0.d, p0/m, z0.d, #1 +# CHECK-NEXT: - - - - - - 1.00 - - urshr z0.h, p0/m, z0.h, #1 +# CHECK-NEXT: - - - - - - 1.00 - - urshr z0.s, p0/m, z0.s, #1 +# CHECK-NEXT: - - - - - - 1.00 - - urshr z31.b, p0/m, z31.b, #8 +# CHECK-NEXT: - - - - - - 1.00 - - urshr z31.d, p0/m, z31.d, #64 +# CHECK-NEXT: - - - - - - 1.00 - - urshr z31.h, p0/m, z31.h, #16 +# CHECK-NEXT: - - - - - - 1.00 - - urshr z31.s, p0/m, z31.s, #32 +# CHECK-NEXT: - - - - - - - 1.00 - ursqrte z31.s, p7/m, z31.s +# CHECK-NEXT: - - - - - - 2.00 - - ursra z0.b, z0.b, #1 +# CHECK-NEXT: - - - - - - 2.00 - - ursra z0.d, z0.d, #1 +# CHECK-NEXT: - - - - - - 2.00 - - ursra z0.h, z0.h, #1 +# CHECK-NEXT: - - - - - - 2.00 - - ursra z0.s, z0.s, #1 +# CHECK-NEXT: - - - - - - 2.00 - - ursra z31.b, z31.b, #8 +# CHECK-NEXT: - - - - - - 2.00 - - ursra z31.d, z31.d, #64 +# CHECK-NEXT: - - - - - - 2.00 - - ursra z31.h, z31.h, #16 +# CHECK-NEXT: - - - - - - 2.00 - - ursra z31.s, z31.s, #32 +# CHECK-NEXT: - - - - - - 1.00 - - ushllb z0.d, z0.s, #0 +# CHECK-NEXT: - - - - - - 1.00 - - ushllb z0.h, z0.b, #0 +# CHECK-NEXT: - - - - - - 1.00 - - ushllb z0.s, z0.h, #0 +# CHECK-NEXT: - - - - - - 1.00 - - ushllb z31.d, z31.s, #31 +# CHECK-NEXT: - - - - - - 1.00 - - ushllb z31.h, z31.b, #7 +# CHECK-NEXT: - - - - - - 1.00 - - ushllb z31.s, z31.h, #15 +# CHECK-NEXT: - - - - - - 1.00 - - ushllt z0.d, z0.s, #0 +# CHECK-NEXT: - - - - - - 1.00 - - ushllt z0.h, z0.b, #0 +# CHECK-NEXT: - - - - - - 1.00 - - ushllt z0.s, z0.h, #0 +# CHECK-NEXT: - - - - - - 1.00 - - ushllt z31.d, z31.s, #31 +# CHECK-NEXT: - - - - - - 1.00 - - ushllt z31.h, z31.b, #7 +# CHECK-NEXT: - - - - - - 1.00 - - ushllt z31.s, z31.h, #15 +# CHECK-NEXT: - - - - - - - 1.00 - usmmla z0.s, z1.b, z2.b +# CHECK-NEXT: - - - - - - 1.00 - - usqadd z0.b, p0/m, z0.b, z1.b +# CHECK-NEXT: - - - - - - 1.00 - - usqadd z0.h, p0/m, z0.h, z1.h +# CHECK-NEXT: - - - - - - 1.00 - - usqadd z29.s, p7/m, z29.s, z30.s +# CHECK-NEXT: - - - - - - 1.00 - - usqadd z31.d, p7/m, z31.d, z30.d +# CHECK-NEXT: - - - - - - 1.00 - - usra z0.b, z0.b, #1 +# CHECK-NEXT: - - - - - - 1.00 - - usra z0.d, z0.d, #1 +# CHECK-NEXT: - - - - - - 1.00 - - usra z0.h, z0.h, #1 +# CHECK-NEXT: - - - - - - 1.00 - - usra z0.s, z0.s, #1 +# CHECK-NEXT: - - - - - - 1.00 - - usra z31.b, z31.b, #8 +# CHECK-NEXT: - - - - - - 1.00 - - usra z31.d, z31.d, #64 +# CHECK-NEXT: - - - - - - 1.00 - - usra z31.h, z31.h, #16 +# CHECK-NEXT: - - - - - - 1.00 - - usra z31.s, z31.s, #32 +# CHECK-NEXT: - - - - - - 1.00 - - usublb z0.h, z1.b, z2.b +# CHECK-NEXT: - - - - - - 1.00 - - usublb z29.s, z30.h, z31.h +# CHECK-NEXT: - - - - - - 1.00 - - usublb z31.d, z31.s, z31.s +# CHECK-NEXT: - - - - - - 1.00 - - usublt z0.h, z1.b, z2.b +# CHECK-NEXT: - - - - - - 1.00 - - usublt z29.s, z30.h, z31.h +# CHECK-NEXT: - - - - - - 1.00 - - usublt z31.d, z31.s, z31.s +# CHECK-NEXT: - - - - - - 1.00 - - usubwb z0.h, z1.h, z2.b +# CHECK-NEXT: - - - - - - 1.00 - - usubwb z29.s, z30.s, z31.h +# CHECK-NEXT: - - - - - - 1.00 - - usubwb z31.d, z31.d, z31.s +# CHECK-NEXT: - - - - - - 1.00 - - usubwt z0.h, z1.h, z2.b +# CHECK-NEXT: - - - - - - 1.00 - - usubwt z29.s, z30.s, z31.h +# CHECK-NEXT: - - - - - - 1.00 - - usubwt z31.d, z31.d, z31.s +# CHECK-NEXT: - - - - - - 1.00 - - uunpkhi z31.d, z31.s +# CHECK-NEXT: - - - - - - 1.00 - - uunpkhi z31.h, z31.b +# CHECK-NEXT: - - - - - - 1.00 - - uunpkhi z31.s, z31.h +# CHECK-NEXT: - - - - - - 1.00 - - uunpklo z31.d, z31.s +# CHECK-NEXT: - - - - - - 1.00 - - uunpklo z31.h, z31.b +# CHECK-NEXT: - - - - - - 1.00 - - uunpklo z31.s, z31.h +# CHECK-NEXT: - - - - - - 1.00 - - uxtb z0.d, p0/m, z0.d +# CHECK-NEXT: - - - - - - 1.00 - - uxtb z0.h, p0/m, z0.h +# CHECK-NEXT: - - - - - - 1.00 - - uxtb z0.s, p0/m, z0.s +# CHECK-NEXT: - - - - - - 1.00 - - uxtb z31.d, p7/m, z31.d +# CHECK-NEXT: - - - - - - 1.00 - - uxtb z31.h, p7/m, z31.h +# CHECK-NEXT: - - - - - - 1.00 - - uxtb z31.s, p7/m, z31.s +# CHECK-NEXT: - - - - - - 1.00 - - uxth z0.d, p0/m, z0.d +# CHECK-NEXT: - - - - - - 1.00 - - uxth z0.s, p0/m, z0.s +# CHECK-NEXT: - - - - - - 1.00 - - uxth z31.d, p7/m, z31.d +# CHECK-NEXT: - - - - - - 1.00 - - uxth z31.s, p7/m, z31.s +# CHECK-NEXT: - - - - - - 1.00 - - uxtw z0.d, p0/m, z0.d +# CHECK-NEXT: - - - - - - 1.00 - - uxtw z31.d, p7/m, z31.d +# CHECK-NEXT: - - - - - - 1.00 - - uzp1 p15.b, p15.b, p15.b +# CHECK-NEXT: - - - - - - 1.00 - - uzp1 p15.d, p15.d, p15.d +# CHECK-NEXT: - - - - - - 1.00 - - uzp1 p15.h, p15.h, p15.h +# CHECK-NEXT: - - - - - - 1.00 - - uzp1 p15.s, p15.s, p15.s +# CHECK-NEXT: - - - - - - 1.00 - - uzp1 z31.b, z31.b, z31.b +# CHECK-NEXT: - - - - - - 1.00 - - uzp1 z31.d, z31.d, z31.d +# CHECK-NEXT: - - - - - - 1.00 - - uzp1 z31.h, z31.h, z31.h +# CHECK-NEXT: - - - - - - 1.00 - - uzp1 z31.s, z31.s, z31.s +# CHECK-NEXT: - - - - - - 1.00 - - uzp2 p15.b, p15.b, p15.b +# CHECK-NEXT: - - - - - - 1.00 - - uzp2 p15.d, p15.d, p15.d +# CHECK-NEXT: - - - - - - 1.00 - - uzp2 p15.h, p15.h, p15.h +# CHECK-NEXT: - - - - - - 1.00 - - uzp2 p15.s, p15.s, p15.s +# CHECK-NEXT: - - - - - - 1.00 - - uzp2 z31.b, z31.b, z31.b +# CHECK-NEXT: - - - - - - 1.00 - - uzp2 z31.d, z31.d, z31.d +# CHECK-NEXT: - - - - - - 1.00 - - uzp2 z31.h, z31.h, z31.h +# CHECK-NEXT: - - - - - - 1.00 - - uzp2 z31.s, z31.s, z31.s +# CHECK-NEXT: - - - - - - 1.00 - - whilege p15.b, w0, wzr +# CHECK-NEXT: - - - - - - 1.00 - - whilege p15.b, wzr, w0 +# CHECK-NEXT: - - - - - - 1.00 - - whilege p15.b, x0, xzr +# CHECK-NEXT: - - - - - - 1.00 - - whilege p15.b, xzr, x0 +# CHECK-NEXT: - - - - - - 1.00 - - whilege p15.d, w0, wzr +# CHECK-NEXT: - - - - - - 1.00 - - whilege p15.d, x0, xzr +# CHECK-NEXT: - - - - - - 1.00 - - whilege p15.h, w0, wzr +# CHECK-NEXT: - - - - - - 1.00 - - whilege p15.h, x0, xzr +# CHECK-NEXT: - - - - - - 1.00 - - whilege p15.s, w0, wzr +# CHECK-NEXT: - - - - - - 1.00 - - whilege p15.s, x0, xzr +# CHECK-NEXT: - - - - - - 1.00 - - whilerw p15.b, x30, x30 +# CHECK-NEXT: - - - - - - 1.00 - - whilerw p15.d, x30, x30 +# CHECK-NEXT: - - - - - - 1.00 - - whilerw p15.h, x30, x30 +# CHECK-NEXT: - - - - - - 1.00 - - whilerw p15.s, x30, x30 +# CHECK-NEXT: - - - - - - 1.00 - - whilewr p15.b, x30, x30 +# CHECK-NEXT: - - - - - - 1.00 - - whilewr p15.d, x30, x30 +# CHECK-NEXT: - - - - - - 1.00 - - whilewr p15.h, x30, x30 +# CHECK-NEXT: - - - - - - 1.00 - - whilewr p15.s, x30, x30 +# CHECK-NEXT: 1.00 - - - - - - - - wrffr p0.b +# CHECK-NEXT: 1.00 - - - - - - - - wrffr p15.b +# CHECK-NEXT: - - - - - - 1.00 - - xar z0.b, z0.b, z1.b, #1 +# CHECK-NEXT: - - - - - - 1.00 - - xar z0.d, z0.d, z1.d, #1 +# CHECK-NEXT: - - - - - - 1.00 - - xar z0.h, z0.h, z1.h, #1 +# CHECK-NEXT: - - - - - - 1.00 - - xar z0.s, z0.s, z1.s, #1 +# CHECK-NEXT: - - - - - - 1.00 - - xar z31.b, z31.b, z30.b, #8 +# CHECK-NEXT: - - - - - - 1.00 - - xar z31.d, z31.d, z30.d, #64 +# CHECK-NEXT: - - - - - - 1.00 - - xar z31.h, z31.h, z30.h, #16 +# CHECK-NEXT: - - - - - - 1.00 - - xar z31.s, z31.s, z30.s, #32 +# CHECK-NEXT: - - - - - - 1.00 - - zip1 p0.b, p0.b, p0.b +# CHECK-NEXT: - - - - - - 1.00 - - zip1 p0.d, p0.d, p0.d +# CHECK-NEXT: - - - - - - 1.00 - - zip1 p0.h, p0.h, p0.h +# CHECK-NEXT: - - - - - - 1.00 - - zip1 p0.s, p0.s, p0.s +# CHECK-NEXT: - - - - - - 1.00 - - zip1 p15.b, p15.b, p15.b +# CHECK-NEXT: - - - - - - 1.00 - - zip1 p15.d, p15.d, p15.d +# CHECK-NEXT: - - - - - - 1.00 - - zip1 p15.h, p15.h, p15.h +# CHECK-NEXT: - - - - - - 1.00 - - zip1 p15.s, p15.s, p15.s +# CHECK-NEXT: - - - - - - 1.00 - - zip1 z0.b, z0.b, z0.b +# CHECK-NEXT: - - - - - - 1.00 - - zip1 z0.d, z0.d, z0.d +# CHECK-NEXT: - - - - - - 1.00 - - zip1 z0.h, z0.h, z0.h +# CHECK-NEXT: - - - - - - 1.00 - - zip1 z0.s, z0.s, z0.s +# CHECK-NEXT: - - - - - - 1.00 - - zip1 z31.b, z31.b, z31.b +# CHECK-NEXT: - - - - - - 1.00 - - zip1 z31.d, z31.d, z31.d +# CHECK-NEXT: - - - - - - 1.00 - - zip1 z31.h, z31.h, z31.h +# CHECK-NEXT: - - - - - - 1.00 - - zip1 z31.s, z31.s, z31.s +# CHECK-NEXT: - - - - - - 1.00 - - zip2 p0.b, p0.b, p0.b +# CHECK-NEXT: - - - - - - 1.00 - - zip2 p0.d, p0.d, p0.d +# CHECK-NEXT: - - - - - - 1.00 - - zip2 p0.h, p0.h, p0.h +# CHECK-NEXT: - - - - - - 1.00 - - zip2 p0.s, p0.s, p0.s +# CHECK-NEXT: - - - - - - 1.00 - - zip2 p15.b, p15.b, p15.b +# CHECK-NEXT: - - - - - - 1.00 - - zip2 p15.d, p15.d, p15.d +# CHECK-NEXT: - - - - - - 1.00 - - zip2 p15.h, p15.h, p15.h +# CHECK-NEXT: - - - - - - 1.00 - - zip2 p15.s, p15.s, p15.s +# CHECK-NEXT: - - - - - - 1.00 - - zip2 z0.b, z0.b, z0.b +# CHECK-NEXT: - - - - - - 1.00 - - zip2 z0.d, z0.d, z0.d +# CHECK-NEXT: - - - - - - 1.00 - - zip2 z0.h, z0.h, z0.h +# CHECK-NEXT: - - - - - - 1.00 - - zip2 z0.s, z0.s, z0.s +# CHECK-NEXT: - - - - - - 1.00 - - zip2 z31.b, z31.b, z31.b +# CHECK-NEXT: - - - - - - 1.00 - - zip2 z31.d, z31.d, z31.d +# CHECK-NEXT: - - - - - - 1.00 - - zip2 z31.h, z31.h, z31.h +# CHECK-NEXT: - - - - - - 1.00 - - zip2 z31.s, z31.s, z31.s -- cgit v1.2.3 From a1c2a712939897251729b6fc436a2db7db6f03fc Mon Sep 17 00:00:00 2001 From: Andrei Golubev Date: Wed, 18 Jun 2025 14:38:58 +0200 Subject: [mlir][bufferization] Use Type instead of Value in unknown conversion (#144658) Generally, bufferization should be able to create a memref from a tensor without needing to know more than just a mlir::Type. Thus, change BufferizationOptions::UnknownTypeConverterFn to accept just a type (mlir::TensorType for now) instead of mlir::Value. Additionally, apply the same rationale to getMemRefType() helper function. Both changes are prerequisites to enable custom types support in one-shot bufferization. --- .../Bufferization/IR/BufferizableOpInterface.h | 9 +++++---- .../Bufferization/IR/BufferizableOpInterface.cpp | 19 +++++++++---------- .../Dialect/Bufferization/Transforms/Bufferize.cpp | 4 ++-- .../Transforms/SparsificationAndBufferizationPass.cpp | 6 +++--- 4 files changed, 19 insertions(+), 19 deletions(-) diff --git a/mlir/include/mlir/Dialect/Bufferization/IR/BufferizableOpInterface.h b/mlir/include/mlir/Dialect/Bufferization/IR/BufferizableOpInterface.h index adccbef754ec..2fb795f16ae2 100644 --- a/mlir/include/mlir/Dialect/Bufferization/IR/BufferizableOpInterface.h +++ b/mlir/include/mlir/Dialect/Bufferization/IR/BufferizableOpInterface.h @@ -265,9 +265,9 @@ struct BufferizationOptions { std::function; /// Tensor -> MemRef type converter. - /// Parameters: Value, memory space, bufferization options + /// Parameters: tensor type, memory space, bufferization options using UnknownTypeConverterFn = std::function; + TensorType, Attribute memorySpace, const BufferizationOptions &)>; // Produce a MemorySpace attribute from a tensor type using DefaultMemorySpaceFn = std::function(TensorType t)>; @@ -655,7 +655,7 @@ OpTy replaceOpWithNewBufferizedOp(RewriterBase &rewriter, Operation *op, return newOp; } -/// Return a MemRefType to which the type of the given value can be bufferized. +/// Return a MemRefType to which the TensorType can be bufferized. /// /// If possible, op bufferization implementations should not use this function /// and instead infer precise memref types for tensor results by themselves. @@ -667,7 +667,8 @@ OpTy replaceOpWithNewBufferizedOp(RewriterBase &rewriter, Operation *op, /// Note: Canonicalization patterns could clean up layout maps and infer more /// precise layout maps after bufferization. However, many possible /// canonicalizations are currently not implemented. -BaseMemRefType getMemRefType(Value value, const BufferizationOptions &options, +BaseMemRefType getMemRefType(TensorType tensorType, + const BufferizationOptions &options, MemRefLayoutAttrInterface layout = {}, Attribute memorySpace = nullptr); diff --git a/mlir/lib/Dialect/Bufferization/IR/BufferizableOpInterface.cpp b/mlir/lib/Dialect/Bufferization/IR/BufferizableOpInterface.cpp index 1d6e1bdaf80f..dd43647682ea 100644 --- a/mlir/lib/Dialect/Bufferization/IR/BufferizableOpInterface.cpp +++ b/mlir/lib/Dialect/Bufferization/IR/BufferizableOpInterface.cpp @@ -345,10 +345,9 @@ defaultFunctionArgTypeConverter(TensorType type, Attribute memorySpace, } /// Default unknown type converter: Use a fully dynamic layout map. BaseMemRefType -defaultUnknownTypeConverter(Value value, Attribute memorySpace, +defaultUnknownTypeConverter(TensorType tensorType, Attribute memorySpace, const BufferizationOptions &options) { - return getMemRefTypeWithFullyDynamicLayout( - llvm::cast(value.getType()), memorySpace); + return getMemRefTypeWithFullyDynamicLayout(tensorType, memorySpace); } } // namespace @@ -724,7 +723,8 @@ bufferization::getBufferType(Value value, const BufferizationOptions &options, if (!memSpace.has_value()) return op->emitError("could not infer memory space"); - return getMemRefType(value, options, /*layout=*/{}, *memSpace); + return getMemRefType(cast(value.getType()), options, + /*layout=*/{}, *memSpace); } bool bufferization::hasTensorSemantics(Operation *op) { @@ -797,12 +797,10 @@ LogicalResult BufferizationOptions::createMemCpy(OpBuilder &b, Location loc, // Bufferization-specific IRMapping support with debugging. //===----------------------------------------------------------------------===// -BaseMemRefType bufferization::getMemRefType(Value value, +BaseMemRefType bufferization::getMemRefType(TensorType tensorType, const BufferizationOptions &options, MemRefLayoutAttrInterface layout, Attribute memorySpace) { - auto tensorType = llvm::cast(value.getType()); - // Case 1: Unranked memref type. if (auto unrankedTensorType = llvm::dyn_cast(tensorType)) { @@ -819,7 +817,7 @@ BaseMemRefType bufferization::getMemRefType(Value value, memorySpace); } - return options.unknownTypeConverterFn(value, memorySpace, options); + return options.unknownTypeConverterFn(tensorType, memorySpace, options); } BaseMemRefType @@ -955,10 +953,11 @@ FailureOr bufferization::detail::defaultGetBufferType( const BufferizationState &bufferizationState, SmallVector &invocationStack) { assert(llvm::isa(value.getType()) && "expected tensor type"); + auto tensorType = cast(value.getType()); // No further analysis is possible for a block argument. if (llvm::isa(value)) - return bufferization::getMemRefType(value, options); + return bufferization::getMemRefType(tensorType, options); // Value is an OpResult. Operation *op = getOwnerOfValue(value); @@ -981,7 +980,7 @@ FailureOr bufferization::detail::defaultGetBufferType( if (!memSpace.has_value()) return op->emitError("could not infer memory space"); - return getMemRefType(value, options, /*layout=*/{}, *memSpace); + return getMemRefType(tensorType, options, /*layout=*/{}, *memSpace); } bool bufferization::detail::defaultIsRepetitiveRegion( diff --git a/mlir/lib/Dialect/Bufferization/Transforms/Bufferize.cpp b/mlir/lib/Dialect/Bufferization/Transforms/Bufferize.cpp index c7681d309a4a..7e9b9119ce94 100644 --- a/mlir/lib/Dialect/Bufferization/Transforms/Bufferize.cpp +++ b/mlir/lib/Dialect/Bufferization/Transforms/Bufferize.cpp @@ -109,9 +109,9 @@ struct OneShotBufferizePass "'unknown-type-conversion'"); return signalPassFailure(); } - opt.unknownTypeConverterFn = [=](Value value, Attribute memorySpace, + opt.unknownTypeConverterFn = [=](TensorType tensorType, + Attribute memorySpace, const BufferizationOptions &options) { - auto tensorType = cast(value.getType()); if (unknownTypeConversionOption == LayoutMapOption::IdentityLayoutMap) return bufferization::getMemRefTypeWithStaticIdentityLayout( tensorType, memorySpace); diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/SparsificationAndBufferizationPass.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/SparsificationAndBufferizationPass.cpp index a3ab53d81811..15e5102462ad 100644 --- a/mlir/lib/Dialect/SparseTensor/Transforms/SparsificationAndBufferizationPass.cpp +++ b/mlir/lib/Dialect/SparseTensor/Transforms/SparsificationAndBufferizationPass.cpp @@ -223,10 +223,10 @@ mlir::getBufferizationOptionsForSparsification(bool analysisOnly) { OneShotBufferizationOptions options; options.bufferizeFunctionBoundaries = true; options.setFunctionBoundaryTypeConversion(LayoutMapOption::IdentityLayoutMap); - options.unknownTypeConverterFn = [](Value value, Attribute memorySpace, + options.unknownTypeConverterFn = [](TensorType tensorType, + Attribute memorySpace, const BufferizationOptions &options) { - return getMemRefTypeWithStaticIdentityLayout( - cast(value.getType()), memorySpace); + return getMemRefTypeWithStaticIdentityLayout(tensorType, memorySpace); }; if (analysisOnly) { options.testAnalysisOnly = true; -- cgit v1.2.3 From 66580f77b826e71a9727f1d6287bec6a6101f620 Mon Sep 17 00:00:00 2001 From: Matthias Springer Date: Wed, 18 Jun 2025 14:42:09 +0200 Subject: [mlir][Transforms][NFC] Dialect Conversion: Keep `unresolvedMaterializations` up to date (#144254) `unresolvedMaterializations` is a mapping from `UnrealizedConversionCastOp` to `UnresolvedMaterializationRewrite`. This mapping is needed to find the correct type converter for an unresolved materialization. With this commit, `unresolvedMaterializations` is updated immediately when an op is being erased. This also cleans up the code base a bit: `SingleEraseRewriter` is now used only during the "cleanup" phase and no longer needed as a field of `ConversionRewriterImpl`. This commit is in preparation of the One-Shot Dialect Conversion refactoring: `allowPatternRollback = false` will in the future trigger immediate materialization of all IR changes. --- mlir/lib/Transforms/Utils/DialectConversion.cpp | 33 +++++++++++++++---------- 1 file changed, 20 insertions(+), 13 deletions(-) diff --git a/mlir/lib/Transforms/Utils/DialectConversion.cpp b/mlir/lib/Transforms/Utils/DialectConversion.cpp index 7de26d7cfa84..c4b85ec4f67d 100644 --- a/mlir/lib/Transforms/Utils/DialectConversion.cpp +++ b/mlir/lib/Transforms/Utils/DialectConversion.cpp @@ -848,7 +848,7 @@ namespace detail { struct ConversionPatternRewriterImpl : public RewriterBase::Listener { explicit ConversionPatternRewriterImpl(MLIRContext *ctx, const ConversionConfig &config) - : context(ctx), eraseRewriter(ctx), config(config) {} + : context(ctx), config(config) {} //===--------------------------------------------------------------------===// // State Management @@ -981,8 +981,11 @@ struct ConversionPatternRewriterImpl : public RewriterBase::Listener { /// no new IR is created between calls to `eraseOp`/`eraseBlock`. struct SingleEraseRewriter : public RewriterBase, RewriterBase::Listener { public: - SingleEraseRewriter(MLIRContext *context) - : RewriterBase(context, /*listener=*/this) {} + SingleEraseRewriter( + MLIRContext *context, + std::function opErasedCallback = nullptr) + : RewriterBase(context, /*listener=*/this), + opErasedCallback(opErasedCallback) {} /// Erase the given op (unless it was already erased). void eraseOp(Operation *op) override { @@ -1003,13 +1006,20 @@ struct ConversionPatternRewriterImpl : public RewriterBase::Listener { bool wasErased(void *ptr) const { return erased.contains(ptr); } - void notifyOperationErased(Operation *op) override { erased.insert(op); } + void notifyOperationErased(Operation *op) override { + erased.insert(op); + if (opErasedCallback) + opErasedCallback(op); + } void notifyBlockErased(Block *block) override { erased.insert(block); } private: /// Pointers to all erased operations and blocks. DenseSet erased; + + /// A callback that is invoked when an operation is erased. + std::function opErasedCallback; }; //===--------------------------------------------------------------------===// @@ -1019,11 +1029,6 @@ struct ConversionPatternRewriterImpl : public RewriterBase::Listener { /// MLIR context. MLIRContext *context; - /// A rewriter that keeps track of ops/block that were already erased and - /// skips duplicate op/block erasures. This rewriter is used during the - /// "cleanup" phase. - SingleEraseRewriter eraseRewriter; - // Mapping between replaced values that differ in type. This happens when // replacing a value with one of a different type. ConversionValueMapping mapping; @@ -1195,6 +1200,11 @@ void ConversionPatternRewriterImpl::applyRewrites() { rewrites[i]->commit(rewriter); // Clean up all rewrites. + SingleEraseRewriter eraseRewriter( + context, /*opErasedCallback=*/[&](Operation *op) { + if (auto castOp = dyn_cast(op)) + unresolvedMaterializations.erase(castOp); + }); for (auto &rewrite : rewrites) rewrite->cleanup(eraseRewriter); } @@ -2714,11 +2724,8 @@ LogicalResult OperationConverter::convertOperations(ArrayRef ops) { SmallVector allCastOps; const DenseMap &materializations = rewriterImpl.unresolvedMaterializations; - for (auto it : materializations) { - if (rewriterImpl.eraseRewriter.wasErased(it.first)) - continue; + for (auto it : materializations) allCastOps.push_back(it.first); - } // Reconcile all UnrealizedConversionCastOps that were inserted by the // dialect conversion frameworks. (Not the one that were inserted by -- cgit v1.2.3 From 4b2ab1494bc07493087252dff4e5e19808703048 Mon Sep 17 00:00:00 2001 From: Krzysztof Parzyszek Date: Wed, 18 Jun 2025 07:46:49 -0500 Subject: [flang][OpenMP] Don't crash on iterator modifier in declare mapper (#144359) Both the declare mapper directive argument, and the iterator modifier can contain declaration-type-spec, so make sure that the processing of one ends before processing of the other begins in semantic analysis. --- flang/lib/Semantics/resolve-names.cpp | 2 +- flang/test/Lower/OpenMP/Todo/declare-mapper-iterator.f90 | 11 +++++++++++ 2 files changed, 12 insertions(+), 1 deletion(-) create mode 100644 flang/test/Lower/OpenMP/Todo/declare-mapper-iterator.f90 diff --git a/flang/lib/Semantics/resolve-names.cpp b/flang/lib/Semantics/resolve-names.cpp index f66918e5c140..9e465f8ff3e1 100644 --- a/flang/lib/Semantics/resolve-names.cpp +++ b/flang/lib/Semantics/resolve-names.cpp @@ -1800,9 +1800,9 @@ void OmpVisitor::ProcessMapperSpecifier(const parser::OmpMapperSpecifier &spec, Walk(std::get(spec.t)); auto &varName{std::get(spec.t)}; DeclareObjectEntity(varName); + EndDeclTypeSpec(); Walk(clauses); - EndDeclTypeSpec(); PopScope(); } diff --git a/flang/test/Lower/OpenMP/Todo/declare-mapper-iterator.f90 b/flang/test/Lower/OpenMP/Todo/declare-mapper-iterator.f90 new file mode 100644 index 000000000000..dacd6d624659 --- /dev/null +++ b/flang/test/Lower/OpenMP/Todo/declare-mapper-iterator.f90 @@ -0,0 +1,11 @@ +!RUN: %not_todo_cmd %flang_fc1 -emit-hlfir -fopenmp -fopenmp-version=52 -o - %s 2>&1 | FileCheck %s + +!CHECK: Support for iterator modifiers is not implemented yet +subroutine f(arg) + type :: s + integer :: a(10) + end type + type(s) :: arg(:) + + !$omp declare mapper(m: s :: v) map(mapper(m), iterator(i = 1:10): v%a(i)) +end -- cgit v1.2.3 From a83d3362f686725bac76bfb9562663908de25f15 Mon Sep 17 00:00:00 2001 From: Tom Eccles Date: Wed, 18 Jun 2025 14:02:11 +0100 Subject: [flang][OpenMP] Don't allow DO CONCURRENT inside of a loop nest (#144506) I don't think DO CONCURRENT fits the definition of a Canonical Loop Nest (OpenMP 6.0 section 6.4.1). It is however explicitly allowed for the LOOP construct (6.0 section 13.8). There's some obscure language in OpenMP 6.0 for the LOOP construct: > If the collapsed loop is a DO CONCURRENT loop, neither the > data-sharing attribute clauses nor the collapse clause may be specified. From the surrounding context, I think "collapsed loop" just means the loop that the LOOP construct applies to. So I will interpret this to mean that DO CONCURRENT can only be used with the LOOP construct if it does not contain the COLLAPSE clause. This also fixes a bug where the associated clause was never cleared after it was set. Fixes #144178 --- flang/lib/Semantics/resolve-directives.cpp | 38 +++++++++++++++++---- flang/test/Lower/OpenMP/Todo/omp-doconcurrent.f90 | 10 ------ .../Semantics/OpenMP/do-concurrent-collapse.f90 | 39 ++++++++++++++++++++++ 3 files changed, 71 insertions(+), 16 deletions(-) delete mode 100644 flang/test/Lower/OpenMP/Todo/omp-doconcurrent.f90 create mode 100644 flang/test/Semantics/OpenMP/do-concurrent-collapse.f90 diff --git a/flang/lib/Semantics/resolve-directives.cpp b/flang/lib/Semantics/resolve-directives.cpp index 282660684e78..57db76e2160d 100644 --- a/flang/lib/Semantics/resolve-directives.cpp +++ b/flang/lib/Semantics/resolve-directives.cpp @@ -23,6 +23,7 @@ #include "flang/Semantics/openmp-modifiers.h" #include "flang/Semantics/symbol.h" #include "flang/Semantics/tools.h" +#include "llvm/Frontend/OpenMP/OMP.h.inc" #include "llvm/Support/Debug.h" #include #include @@ -740,9 +741,7 @@ public: } const parser::OmpClause *associatedClause{nullptr}; - void SetAssociatedClause(const parser::OmpClause &c) { - associatedClause = &c; - } + void SetAssociatedClause(const parser::OmpClause *c) { associatedClause = c; } const parser::OmpClause *GetAssociatedClause() { return associatedClause; } private: @@ -1919,12 +1918,17 @@ std::int64_t OmpAttributeVisitor::GetAssociatedLoopLevelFromClauses( } if (orderedLevel && (!collapseLevel || orderedLevel >= collapseLevel)) { - SetAssociatedClause(*ordClause); + SetAssociatedClause(ordClause); return orderedLevel; } else if (!orderedLevel && collapseLevel) { - SetAssociatedClause(*collClause); + SetAssociatedClause(collClause); return collapseLevel; - } // orderedLevel < collapseLevel is an error handled in structural checks + } else { + SetAssociatedClause(nullptr); + } + // orderedLevel < collapseLevel is an error handled in structural + // checks + return 1; // default is outermost loop } @@ -1952,9 +1956,31 @@ void OmpAttributeVisitor::PrivatizeAssociatedLoopIndexAndCheckLoopLevel( ivDSA = Symbol::Flag::OmpLastPrivate; } + bool isLoopConstruct{ + GetContext().directive == llvm::omp::Directive::OMPD_loop}; + const parser::OmpClause *clause{GetAssociatedClause()}; + bool hasCollapseClause{ + clause ? (clause->Id() == llvm::omp::OMPC_collapse) : false}; + const auto &outer{std::get>(x.t)}; if (outer.has_value()) { for (const parser::DoConstruct *loop{&*outer}; loop && level > 0; --level) { + if (loop->IsDoConcurrent()) { + // DO CONCURRENT is explicitly allowed for the LOOP construct so long as + // there isn't a COLLAPSE clause + if (isLoopConstruct) { + if (hasCollapseClause) { + // hasCollapseClause implies clause != nullptr + context_.Say(clause->source, + "DO CONCURRENT loops cannot be used with the COLLAPSE clause."_err_en_US); + } + } else { + auto &stmt = + std::get>(loop->t); + context_.Say(stmt.source, + "DO CONCURRENT loops cannot form part of a loop nest."_err_en_US); + } + } // go through all the nested do-loops and resolve index variables const parser::Name *iv{GetLoopIndex(*loop)}; if (iv) { diff --git a/flang/test/Lower/OpenMP/Todo/omp-doconcurrent.f90 b/flang/test/Lower/OpenMP/Todo/omp-doconcurrent.f90 deleted file mode 100644 index a6d70fa44592..000000000000 --- a/flang/test/Lower/OpenMP/Todo/omp-doconcurrent.f90 +++ /dev/null @@ -1,10 +0,0 @@ -! RUN: %not_todo_cmd bbc -emit-fir -fopenmp -o - %s 2>&1 | FileCheck %s -! RUN: %not_todo_cmd %flang_fc1 -emit-fir -fopenmp -o - %s 2>&1 | FileCheck %s - -! CHECK: not yet implemented: Do Concurrent in Worksharing loop construct -subroutine sb() - !$omp do - do concurrent(i=1:10) - print *, i - end do -end subroutine diff --git a/flang/test/Semantics/OpenMP/do-concurrent-collapse.f90 b/flang/test/Semantics/OpenMP/do-concurrent-collapse.f90 new file mode 100644 index 000000000000..bb1929249183 --- /dev/null +++ b/flang/test/Semantics/OpenMP/do-concurrent-collapse.f90 @@ -0,0 +1,39 @@ +!RUN: %python %S/../test_errors.py %s %flang -fopenmp + +integer :: i, j +!$omp parallel do collapse(2) +do i = 1, 1 + ! ERROR: DO CONCURRENT loops cannot form part of a loop nest. + do concurrent (j = 1:2) + print *, j + end do +end do + +!$omp parallel do +do i = 1, 1 + ! This should not lead to an error because it is not part of a loop nest: + do concurrent (j = 1:2) + print *, j + end do +end do + +!$omp parallel do +! ERROR: DO CONCURRENT loops cannot form part of a loop nest. +do concurrent (j = 1:2) + print *, j +end do + +!$omp loop +! Do concurrent is explicitly allowed inside of omp loop +do concurrent (j = 1:2) + print *, j +end do + +! ERROR: DO CONCURRENT loops cannot be used with the COLLAPSE clause. +!$omp loop collapse(2) +do i = 1, 1 + do concurrent (j = 1:2) + print *, j + end do +end do +end -- cgit v1.2.3 From 8584abb05a84d3bf4e84cdfe4154d7ade8bdfd04 Mon Sep 17 00:00:00 2001 From: Frank Schlimbach Date: Wed, 18 Jun 2025 15:04:55 +0200 Subject: [mlir] mlir/test/lit.local.cfg -> mlir/test/Target/SPIRV/lit.local.cfg (#144685) renamed: mlir/test/lit.local.cfg -> mlir/test/Target/SPIRV/lit.local.cfg --- mlir/test/Target/SPIRV/lit.local.cfg | 7 +++++++ mlir/test/lit.local.cfg | 7 ------- 2 files changed, 7 insertions(+), 7 deletions(-) create mode 100644 mlir/test/Target/SPIRV/lit.local.cfg delete mode 100644 mlir/test/lit.local.cfg diff --git a/mlir/test/Target/SPIRV/lit.local.cfg b/mlir/test/Target/SPIRV/lit.local.cfg new file mode 100644 index 000000000000..167c454db518 --- /dev/null +++ b/mlir/test/Target/SPIRV/lit.local.cfg @@ -0,0 +1,7 @@ +if not "SPIRV" in config.root.targets: + config.unsupported = True + +if config.spirv_tools_tests: + config.available_features.add("spirv-tools") + config.substitutions.append(("spirv-as", os.path.join(config.llvm_tools_dir, "spirv-as"))) + config.substitutions.append(("spirv-val", os.path.join(config.llvm_tools_dir, "spirv-val"))) diff --git a/mlir/test/lit.local.cfg b/mlir/test/lit.local.cfg deleted file mode 100644 index 167c454db518..000000000000 --- a/mlir/test/lit.local.cfg +++ /dev/null @@ -1,7 +0,0 @@ -if not "SPIRV" in config.root.targets: - config.unsupported = True - -if config.spirv_tools_tests: - config.available_features.add("spirv-tools") - config.substitutions.append(("spirv-as", os.path.join(config.llvm_tools_dir, "spirv-as"))) - config.substitutions.append(("spirv-val", os.path.join(config.llvm_tools_dir, "spirv-val"))) -- cgit v1.2.3 From 68471d29eed2c49f9b439e505b3f24d387d54f97 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Timm=20B=C3=A4der?= Date: Wed, 18 Jun 2025 15:17:53 +0200 Subject: =?UTF-8?q?Revert=20"Reapply=20"[clang][bytecode]=20Allocate=20Int?= =?UTF-8?q?egralAP=20and=20Floating=20types=20usi=E2=80=A6=20(#144676)"?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This reverts commit 7c15edb306932e41c159f3d69c161ed0d89d47b7. This still breaks clang-armv8-quick: https://lab.llvm.org/buildbot/#/builders/154/builds/17587 --- clang/lib/AST/ByteCode/Compiler.cpp | 112 +++---- clang/lib/AST/ByteCode/Compiler.h | 1 - clang/lib/AST/ByteCode/Descriptor.cpp | 2 +- clang/lib/AST/ByteCode/Disasm.cpp | 60 ++-- clang/lib/AST/ByteCode/Floating.h | 252 +++++---------- clang/lib/AST/ByteCode/Integral.h | 3 - clang/lib/AST/ByteCode/IntegralAP.h | 234 ++++++-------- clang/lib/AST/ByteCode/Interp.cpp | 106 +------ clang/lib/AST/ByteCode/Interp.h | 337 ++++----------------- clang/lib/AST/ByteCode/InterpBuiltin.cpp | 55 +--- clang/lib/AST/ByteCode/InterpBuiltinBitCast.cpp | 4 +- clang/lib/AST/ByteCode/InterpState.h | 30 -- clang/lib/AST/ByteCode/Opcodes.td | 14 +- clang/lib/AST/ByteCode/PrimType.h | 17 -- clang/lib/AST/ByteCode/Program.h | 24 +- .../AST/ByteCode/builtin-bit-cast-long-double.cpp | 10 +- clang/test/AST/ByteCode/builtin-functions.cpp | 12 +- 17 files changed, 343 insertions(+), 930 deletions(-) diff --git a/clang/lib/AST/ByteCode/Compiler.cpp b/clang/lib/AST/ByteCode/Compiler.cpp index 3f884ed8d094..9fe4803ce98e 100644 --- a/clang/lib/AST/ByteCode/Compiler.cpp +++ b/clang/lib/AST/ByteCode/Compiler.cpp @@ -748,8 +748,7 @@ bool Compiler::VisitFloatingLiteral(const FloatingLiteral *E) { if (DiscardResult) return true; - APFloat F = E->getValue(); - return this->emitFloat(F, E); + return this->emitConstFloat(E->getValue(), E); } template @@ -4186,10 +4185,8 @@ bool Compiler::visitZeroInitializer(PrimType T, QualType QT, nullptr, E); case PT_MemberPtr: return this->emitNullMemberPtr(0, nullptr, E); - case PT_Float: { - APFloat F = APFloat::getZero(Ctx.getFloatSemantics(QT)); - return this->emitFloat(F, E); - } + case PT_Float: + return this->emitConstFloat(APFloat::getZero(Ctx.getFloatSemantics(QT)), E); case PT_FixedPoint: { auto Sem = Ctx.getASTContext().getFixedPointSemantics(E->getType()); return this->emitConstFixedPoint(FixedPoint::zero(Sem), E); @@ -4677,7 +4674,10 @@ VarCreationState Compiler::visitVarDecl(const VarDecl *VD, if (!visitInitializer(Init)) return false; - return this->emitFinishInitGlobal(Init); + if (!this->emitFinishInit(Init)) + return false; + + return this->emitPopPtr(Init); }; DeclScope LocalScope(this, VD); @@ -4698,45 +4698,51 @@ VarCreationState Compiler::visitVarDecl(const VarDecl *VD, return false; return !Init || (checkDecl() && initGlobal(*GlobalIndex)); - } - // Local variables. - InitLinkScope ILS(this, InitLink::Decl(VD)); + } else { + InitLinkScope ILS(this, InitLink::Decl(VD)); - if (VarT) { - unsigned Offset = this->allocateLocalPrimitive( - VD, *VarT, VD->getType().isConstQualified(), nullptr, ScopeKind::Block, - IsConstexprUnknown); - if (Init) { - // If this is a toplevel declaration, create a scope for the - // initializer. - if (Toplevel) { - LocalScope Scope(this); - if (!this->visit(Init)) - return false; - return this->emitSetLocal(*VarT, Offset, VD) && Scope.destroyLocals(); - } else { - if (!this->visit(Init)) - return false; - return this->emitSetLocal(*VarT, Offset, VD); + if (VarT) { + unsigned Offset = this->allocateLocalPrimitive( + VD, *VarT, VD->getType().isConstQualified(), nullptr, + ScopeKind::Block, IsConstexprUnknown); + if (Init) { + // If this is a toplevel declaration, create a scope for the + // initializer. + if (Toplevel) { + LocalScope Scope(this); + if (!this->visit(Init)) + return false; + return this->emitSetLocal(*VarT, Offset, VD) && Scope.destroyLocals(); + } else { + if (!this->visit(Init)) + return false; + return this->emitSetLocal(*VarT, Offset, VD); + } } - } - } else { - if (std::optional Offset = this->allocateLocal( - VD, VD->getType(), nullptr, ScopeKind::Block, IsConstexprUnknown)) { - if (!Init) - return true; + } else { + if (std::optional Offset = + this->allocateLocal(VD, VD->getType(), nullptr, ScopeKind::Block, + IsConstexprUnknown)) { + if (!Init) + return true; - if (!this->emitGetPtrLocal(*Offset, Init)) - return false; + if (!this->emitGetPtrLocal(*Offset, Init)) + return false; - if (!visitInitializer(Init)) - return false; + if (!visitInitializer(Init)) + return false; + + if (!this->emitFinishInit(Init)) + return false; - return this->emitFinishInitPop(Init); + return this->emitPopPtr(Init); + } + return false; } - return false; + return true; } - return true; + + return false; } template @@ -4745,10 +4751,8 @@ bool Compiler::visitAPValue(const APValue &Val, PrimType ValType, assert(!DiscardResult); if (Val.isInt()) return this->emitConst(Val.getInt(), ValType, E); - else if (Val.isFloat()) { - APFloat F = Val.getFloat(); - return this->emitFloat(F, E); - } + else if (Val.isFloat()) + return this->emitConstFloat(Val.getFloat(), E); if (Val.isLValue()) { if (Val.isNullPointer()) @@ -6129,10 +6133,8 @@ bool Compiler::VisitUnaryOperator(const UnaryOperator *E) { const auto &TargetSemantics = Ctx.getFloatSemantics(E->getType()); if (!this->emitLoadFloat(E)) return false; - APFloat F(TargetSemantics, 1); - if (!this->emitFloat(F, E)) + if (!this->emitConstFloat(llvm::APFloat(TargetSemantics, 1), E)) return false; - if (!this->emitAddf(getFPOptions(E), E)) return false; if (!this->emitStoreFloat(E)) @@ -6174,10 +6176,8 @@ bool Compiler::VisitUnaryOperator(const UnaryOperator *E) { const auto &TargetSemantics = Ctx.getFloatSemantics(E->getType()); if (!this->emitLoadFloat(E)) return false; - APFloat F(TargetSemantics, 1); - if (!this->emitFloat(F, E)) + if (!this->emitConstFloat(llvm::APFloat(TargetSemantics, 1), E)) return false; - if (!this->emitSubf(getFPOptions(E), E)) return false; if (!this->emitStoreFloat(E)) @@ -6953,20 +6953,6 @@ bool Compiler::emitDummyPtr(const DeclTy &D, const Expr *E) { return true; } -template -bool Compiler::emitFloat(const APFloat &F, const Expr *E) { - assert(!DiscardResult && "Should've been checked before"); - - if (Floating::singleWord(F.getSemantics())) - return this->emitConstFloat(Floating(F), E); - - APInt I = F.bitcastToAPInt(); - return this->emitConstFloat( - Floating(const_cast(I.getRawData()), - llvm::APFloatBase::SemanticsToEnum(F.getSemantics())), - E); -} - // This function is constexpr if and only if To, From, and the types of // all subobjects of To and From are types T such that... // (3.1) - is_union_v is false; diff --git a/clang/lib/AST/ByteCode/Compiler.h b/clang/lib/AST/ByteCode/Compiler.h index a1d068cc7e0a..ac3ad84766dc 100644 --- a/clang/lib/AST/ByteCode/Compiler.h +++ b/clang/lib/AST/ByteCode/Compiler.h @@ -391,7 +391,6 @@ private: bool emitRecordDestruction(const Record *R, SourceInfo Loc); bool emitDestruction(const Descriptor *Desc, SourceInfo Loc); bool emitDummyPtr(const DeclTy &D, const Expr *E); - bool emitFloat(const APFloat &F, const Expr *E); unsigned collectBaseOffset(const QualType BaseType, const QualType DerivedType); bool emitLambdaStaticInvokerBody(const CXXMethodDecl *MD); diff --git a/clang/lib/AST/ByteCode/Descriptor.cpp b/clang/lib/AST/ByteCode/Descriptor.cpp index 46e4d0d940b3..5531295dfa2f 100644 --- a/clang/lib/AST/ByteCode/Descriptor.cpp +++ b/clang/lib/AST/ByteCode/Descriptor.cpp @@ -368,7 +368,7 @@ Descriptor::Descriptor(const DeclTy &D, PrimType Type, MetadataSize MD, bool IsTemporary, bool IsConst, UnknownSize) : Source(D), ElemSize(primSize(Type)), Size(UnknownSizeMark), MDSize(MD.value_or(0)), - AllocSize(MDSize + sizeof(InitMapPtr) + alignof(void *)), PrimT(Type), + AllocSize(MDSize + sizeof(InitMapPtr) + alignof(void *)), IsConst(IsConst), IsMutable(false), IsTemporary(IsTemporary), IsArray(true), CtorFn(getCtorArrayPrim(Type)), DtorFn(getDtorArrayPrim(Type)), MoveFn(getMoveArrayPrim(Type)) { diff --git a/clang/lib/AST/ByteCode/Disasm.cpp b/clang/lib/AST/ByteCode/Disasm.cpp index 7c6b78386b14..846dc2fe92a7 100644 --- a/clang/lib/AST/ByteCode/Disasm.cpp +++ b/clang/lib/AST/ByteCode/Disasm.cpp @@ -50,56 +50,34 @@ inline static std::string printArg(Program &P, CodePtr &OpPC) { } template <> inline std::string printArg(Program &P, CodePtr &OpPC) { - auto Sem = Floating::deserializeSemantics(*OpPC); - - unsigned BitWidth = llvm::APFloatBase::semanticsSizeInBits( - llvm::APFloatBase::EnumToSemantics(Sem)); - auto Memory = - std::make_unique(llvm::APInt::getNumWords(BitWidth)); - Floating Result(Memory.get(), Sem); - Floating::deserialize(*OpPC, &Result); - - OpPC += align(Result.bytesToSerialize()); + auto F = Floating::deserialize(*OpPC); + OpPC += align(F.bytesToSerialize()); - std::string S; - llvm::raw_string_ostream SS(S); - SS << Result; - return S; + std::string Result; + llvm::raw_string_ostream SS(Result); + SS << F; + return Result; } template <> inline std::string printArg>(Program &P, CodePtr &OpPC) { - using T = IntegralAP; - unsigned BitWidth = T::deserializeSize(*OpPC); - auto Memory = - std::make_unique(llvm::APInt::getNumWords(BitWidth)); - - T Result(Memory.get(), BitWidth); - T::deserialize(*OpPC, &Result); - - OpPC += Result.bytesToSerialize(); - std::string Str; - llvm::raw_string_ostream SS(Str); - SS << Result; - return Str; -} + auto F = IntegralAP::deserialize(*OpPC); + OpPC += align(F.bytesToSerialize()); + std::string Result; + llvm::raw_string_ostream SS(Result); + SS << F; + return Result; +} template <> inline std::string printArg>(Program &P, CodePtr &OpPC) { - using T = IntegralAP; - unsigned BitWidth = T::deserializeSize(*OpPC); - auto Memory = - std::make_unique(llvm::APInt::getNumWords(BitWidth)); - - T Result(Memory.get(), BitWidth); - T::deserialize(*OpPC, &Result); - - std::string Str; - llvm::raw_string_ostream SS(Str); - SS << Result; + auto F = IntegralAP::deserialize(*OpPC); + OpPC += align(F.bytesToSerialize()); - OpPC += Result.bytesToSerialize(); - return Str; + std::string Result; + llvm::raw_string_ostream SS(Result); + SS << F; + return Result; } template <> inline std::string printArg(Program &P, CodePtr &OpPC) { diff --git a/clang/lib/AST/ByteCode/Floating.h b/clang/lib/AST/ByteCode/Floating.h index 659892e720ab..3750568fc23c 100644 --- a/clang/lib/AST/ByteCode/Floating.h +++ b/clang/lib/AST/ByteCode/Floating.h @@ -17,79 +17,63 @@ #include "clang/AST/APValue.h" #include "llvm/ADT/APFloat.h" -// XXX This is just a debugging help. Setting this to 1 will heap-allocate ALL -// floating values. -#define ALLOCATE_ALL 0 - namespace clang { namespace interp { using APFloat = llvm::APFloat; using APSInt = llvm::APSInt; -using APInt = llvm::APInt; -/// If a Floating is constructed from Memory, it DOES NOT OWN THAT MEMORY. -/// It will NOT copy the memory (unless, of course, copy() is called) and it -/// won't alllocate anything. The allocation should happen via InterpState or -/// Program. class Floating final { private: - union { - uint64_t Val = 0; - uint64_t *Memory; - }; - llvm::APFloatBase::Semantics Semantics; - - APFloat getValue() const { - unsigned BitWidth = bitWidth(); - if (singleWord()) - return APFloat(getSemantics(), APInt(BitWidth, Val)); - unsigned NumWords = numWords(); - return APFloat(getSemantics(), APInt(BitWidth, NumWords, Memory)); - } + // The underlying value storage. + APFloat F; public: - Floating() = default; - Floating(llvm::APFloatBase::Semantics Semantics) - : Val(0), Semantics(Semantics) {} - Floating(const APFloat &F) { + /// Zero-initializes a Floating. + Floating() : F(0.0f) {} + Floating(const APFloat &F) : F(F) {} - Semantics = llvm::APFloatBase::SemanticsToEnum(F.getSemantics()); - this->copy(F); + // Static constructors for special floating point values. + static Floating getInf(const llvm::fltSemantics &Sem) { + return Floating(APFloat::getInf(Sem)); } - Floating(uint64_t *Memory, llvm::APFloatBase::Semantics Semantics) - : Memory(Memory), Semantics(Semantics) {} - - APFloat getAPFloat() const { return getValue(); } + const APFloat &getAPFloat() const { return F; } - bool operator<(Floating RHS) const { return getValue() < RHS.getValue(); } - bool operator>(Floating RHS) const { return getValue() > RHS.getValue(); } - bool operator<=(Floating RHS) const { return getValue() <= RHS.getValue(); } - bool operator>=(Floating RHS) const { return getValue() >= RHS.getValue(); } + bool operator<(Floating RHS) const { return F < RHS.F; } + bool operator>(Floating RHS) const { return F > RHS.F; } + bool operator<=(Floating RHS) const { return F <= RHS.F; } + bool operator>=(Floating RHS) const { return F >= RHS.F; } + bool operator==(Floating RHS) const { return F == RHS.F; } + bool operator!=(Floating RHS) const { return F != RHS.F; } + Floating operator-() const { return Floating(-F); } APFloat::opStatus convertToInteger(APSInt &Result) const { bool IsExact; - return getValue().convertToInteger(Result, llvm::APFloat::rmTowardZero, - &IsExact); + return F.convertToInteger(Result, llvm::APFloat::rmTowardZero, &IsExact); } - void toSemantics(const llvm::fltSemantics *Sem, llvm::RoundingMode RM, - Floating *Result) const { - APFloat Copy = getValue(); + Floating toSemantics(const llvm::fltSemantics *Sem, + llvm::RoundingMode RM) const { + APFloat Copy = F; bool LosesInfo; Copy.convert(*Sem, RM, &LosesInfo); (void)LosesInfo; - Result->copy(Copy); + return Floating(Copy); + } + + /// Convert this Floating to one with the same semantics as \Other. + Floating toSemantics(const Floating &Other, llvm::RoundingMode RM) const { + return toSemantics(&Other.F.getSemantics(), RM); } APSInt toAPSInt(unsigned NumBits = 0) const { - return APSInt(getValue().bitcastToAPInt()); + return APSInt(F.bitcastToAPInt()); } - APValue toAPValue(const ASTContext &) const { return APValue(getValue()); } + APValue toAPValue(const ASTContext &) const { return APValue(F); } void print(llvm::raw_ostream &OS) const { // Can't use APFloat::print() since it appends a newline. SmallVector Buffer; - getValue().toString(Buffer); + F.toString(Buffer); OS << Buffer; } std::string toDiagnosticString(const ASTContext &Ctx) const { @@ -99,62 +83,25 @@ public: return NameStr; } - unsigned bitWidth() const { - return llvm::APFloatBase::semanticsSizeInBits(getSemantics()); - } - unsigned numWords() const { return llvm::APInt::getNumWords(bitWidth()); } - bool singleWord() const { -#if ALLOCATE_ALL - return false; -#endif - return numWords() == 1; - } - static bool singleWord(const llvm::fltSemantics &Sem) { -#if ALLOCATE_ALL - return false; -#endif - return APInt::getNumWords(llvm::APFloatBase::getSizeInBits(Sem)) == 1; - } - const llvm::fltSemantics &getSemantics() const { - return llvm::APFloatBase::EnumToSemantics(Semantics); - } - - void copy(const APFloat &F) { - if (singleWord()) { - Val = F.bitcastToAPInt().getZExtValue(); - } else { - assert(Memory); - std::memcpy(Memory, F.bitcastToAPInt().getRawData(), - numWords() * sizeof(uint64_t)); - } - } - - void take(uint64_t *NewMemory) { - if (singleWord()) - return; - - if (Memory) - std::memcpy(NewMemory, Memory, numWords() * sizeof(uint64_t)); - Memory = NewMemory; - } + unsigned bitWidth() const { return F.semanticsSizeInBits(F.getSemantics()); } bool isSigned() const { return true; } - bool isNegative() const { return getValue().isNegative(); } - bool isZero() const { return getValue().isZero(); } - bool isNonZero() const { return getValue().isNonZero(); } - bool isMin() const { return getValue().isSmallest(); } - bool isMinusOne() const { return getValue().isExactlyValue(-1.0); } - bool isNan() const { return getValue().isNaN(); } - bool isSignaling() const { return getValue().isSignaling(); } - bool isInf() const { return getValue().isInfinity(); } - bool isFinite() const { return getValue().isFinite(); } - bool isNormal() const { return getValue().isNormal(); } - bool isDenormal() const { return getValue().isDenormal(); } - llvm::FPClassTest classify() const { return getValue().classify(); } - APFloat::fltCategory getCategory() const { return getValue().getCategory(); } + bool isNegative() const { return F.isNegative(); } + bool isZero() const { return F.isZero(); } + bool isNonZero() const { return F.isNonZero(); } + bool isMin() const { return F.isSmallest(); } + bool isMinusOne() const { return F.isExactlyValue(-1.0); } + bool isNan() const { return F.isNaN(); } + bool isSignaling() const { return F.isSignaling(); } + bool isInf() const { return F.isInfinity(); } + bool isFinite() const { return F.isFinite(); } + bool isNormal() const { return F.isNormal(); } + bool isDenormal() const { return F.isDenormal(); } + llvm::FPClassTest classify() const { return F.classify(); } + APFloat::fltCategory getCategory() const { return F.getCategory(); } ComparisonCategoryResult compare(const Floating &RHS) const { - llvm::APFloatBase::cmpResult CmpRes = getValue().compare(RHS.getValue()); + llvm::APFloatBase::cmpResult CmpRes = F.compare(RHS.F); switch (CmpRes) { case llvm::APFloatBase::cmpLessThan: return ComparisonCategoryResult::Less; @@ -171,130 +118,97 @@ public: static APFloat::opStatus fromIntegral(APSInt Val, const llvm::fltSemantics &Sem, llvm::RoundingMode RM, - Floating *Result) { + Floating &Result) { APFloat F = APFloat(Sem); APFloat::opStatus Status = F.convertFromAPInt(Val, Val.isSigned(), RM); - Result->copy(F); + Result = Floating(F); return Status; } - static void bitcastFromMemory(const std::byte *Buff, - const llvm::fltSemantics &Sem, - Floating *Result) { + static Floating bitcastFromMemory(const std::byte *Buff, + const llvm::fltSemantics &Sem) { size_t Size = APFloat::semanticsSizeInBits(Sem); llvm::APInt API(Size, true); llvm::LoadIntFromMemory(API, (const uint8_t *)Buff, Size / 8); - Result->copy(APFloat(Sem, API)); + + return Floating(APFloat(Sem, API)); } void bitcastToMemory(std::byte *Buff) const { - llvm::APInt API = getValue().bitcastToAPInt(); + llvm::APInt API = F.bitcastToAPInt(); llvm::StoreIntToMemory(API, (uint8_t *)Buff, bitWidth() / 8); } // === Serialization support === size_t bytesToSerialize() const { - return sizeof(Semantics) + (numWords() * sizeof(uint64_t)); + return sizeof(llvm::fltSemantics *) + + (APFloat::semanticsSizeInBits(F.getSemantics()) / 8); } void serialize(std::byte *Buff) const { - std::memcpy(Buff, &Semantics, sizeof(Semantics)); - if (singleWord()) { - std::memcpy(Buff + sizeof(Semantics), &Val, sizeof(uint64_t)); - } else { - std::memcpy(Buff + sizeof(Semantics), Memory, - numWords() * sizeof(uint64_t)); - } + // Semantics followed by an APInt. + *reinterpret_cast(Buff) = &F.getSemantics(); + + llvm::APInt API = F.bitcastToAPInt(); + llvm::StoreIntToMemory(API, (uint8_t *)(Buff + sizeof(void *)), + bitWidth() / 8); } - static llvm::APFloatBase::Semantics - deserializeSemantics(const std::byte *Buff) { - return *reinterpret_cast(Buff); + static Floating deserialize(const std::byte *Buff) { + const llvm::fltSemantics *Sem; + std::memcpy((void *)&Sem, Buff, sizeof(void *)); + return bitcastFromMemory(Buff + sizeof(void *), *Sem); } - static void deserialize(const std::byte *Buff, Floating *Result) { - llvm::APFloatBase::Semantics Semantics; - std::memcpy(&Semantics, Buff, sizeof(Semantics)); - - unsigned BitWidth = llvm::APFloat::semanticsSizeInBits( - llvm::APFloatBase::EnumToSemantics(Semantics)); - unsigned NumWords = llvm::APInt::getNumWords(BitWidth); - - Result->Semantics = Semantics; - if (NumWords == 1 && !ALLOCATE_ALL) { - std::memcpy(&Result->Val, Buff + sizeof(Semantics), sizeof(uint64_t)); - } else { - assert(Result->Memory); - std::memcpy(Result->Memory, Buff + sizeof(Semantics), - NumWords * sizeof(uint64_t)); - } + static Floating abs(const Floating &F) { + APFloat V = F.F; + if (V.isNegative()) + V.changeSign(); + return Floating(V); } // ------- static APFloat::opStatus add(const Floating &A, const Floating &B, llvm::RoundingMode RM, Floating *R) { - APFloat LHS = A.getValue(); - APFloat RHS = B.getValue(); - - auto Status = LHS.add(RHS, RM); - R->copy(LHS); - return Status; + *R = Floating(A.F); + return R->F.add(B.F, RM); } static APFloat::opStatus increment(const Floating &A, llvm::RoundingMode RM, Floating *R) { - APFloat One(A.getSemantics(), 1); - APFloat LHS = A.getValue(); - - auto Status = LHS.add(One, RM); - R->copy(LHS); - return Status; + APFloat One(A.F.getSemantics(), 1); + *R = Floating(A.F); + return R->F.add(One, RM); } static APFloat::opStatus sub(const Floating &A, const Floating &B, llvm::RoundingMode RM, Floating *R) { - APFloat LHS = A.getValue(); - APFloat RHS = B.getValue(); - - auto Status = LHS.subtract(RHS, RM); - R->copy(LHS); - return Status; + *R = Floating(A.F); + return R->F.subtract(B.F, RM); } static APFloat::opStatus decrement(const Floating &A, llvm::RoundingMode RM, Floating *R) { - APFloat One(A.getSemantics(), 1); - APFloat LHS = A.getValue(); - - auto Status = LHS.subtract(One, RM); - R->copy(LHS); - return Status; + APFloat One(A.F.getSemantics(), 1); + *R = Floating(A.F); + return R->F.subtract(One, RM); } static APFloat::opStatus mul(const Floating &A, const Floating &B, llvm::RoundingMode RM, Floating *R) { - - APFloat LHS = A.getValue(); - APFloat RHS = B.getValue(); - - auto Status = LHS.multiply(RHS, RM); - R->copy(LHS); - return Status; + *R = Floating(A.F); + return R->F.multiply(B.F, RM); } static APFloat::opStatus div(const Floating &A, const Floating &B, llvm::RoundingMode RM, Floating *R) { - APFloat LHS = A.getValue(); - APFloat RHS = B.getValue(); - - auto Status = LHS.divide(RHS, RM); - R->copy(LHS); - return Status; + *R = Floating(A.F); + return R->F.divide(B.F, RM); } static bool neg(const Floating &A, Floating *R) { - R->copy(-A.getValue()); + *R = -A; return false; } }; diff --git a/clang/lib/AST/ByteCode/Integral.h b/clang/lib/AST/ByteCode/Integral.h index af5cd2d13ecc..13fdb5369f2b 100644 --- a/clang/lib/AST/ByteCode/Integral.h +++ b/clang/lib/AST/ByteCode/Integral.h @@ -99,9 +99,6 @@ public: bool operator>=(Integral RHS) const { return V >= RHS.V; } bool operator==(Integral RHS) const { return V == RHS.V; } bool operator!=(Integral RHS) const { return V != RHS.V; } - bool operator>=(unsigned RHS) const { - return static_cast(V) >= RHS; - } bool operator>(unsigned RHS) const { return V >= 0 && static_cast(V) > RHS; diff --git a/clang/lib/AST/ByteCode/IntegralAP.h b/clang/lib/AST/ByteCode/IntegralAP.h index 61cbd14ad174..8ee08dfb5cfe 100644 --- a/clang/lib/AST/ByteCode/IntegralAP.h +++ b/clang/lib/AST/ByteCode/IntegralAP.h @@ -28,19 +28,12 @@ namespace interp { using APInt = llvm::APInt; using APSInt = llvm::APSInt; +template class Integral; -/// If an IntegralAP is constructed from Memory, it DOES NOT OWN THAT MEMORY. -/// It will NOT copy the memory (unless, of course, copy() is called) and it -/// won't alllocate anything. The allocation should happen via InterpState or -/// Program. template class IntegralAP final { -public: - union { - uint64_t *Memory = nullptr; - uint64_t Val; - }; - uint32_t BitWidth = 0; +private: friend IntegralAP; + APInt V; template static T truncateCast(const APInt &V) { @@ -59,86 +52,52 @@ public: : V.trunc(BitSize).getZExtValue(); } - APInt getValue() const { - if (singleWord()) - return APInt(BitWidth, Val, Signed); - unsigned NumWords = llvm::APInt::getNumWords(BitWidth); - return llvm::APInt(BitWidth, NumWords, Memory); - } - public: using AsUnsigned = IntegralAP; - void take(uint64_t *NewMemory) { - assert(!singleWord()); - std::memcpy(NewMemory, Memory, numWords() * sizeof(uint64_t)); - Memory = NewMemory; - } + template + IntegralAP(T Value, unsigned BitWidth) + : V(APInt(BitWidth, static_cast(Value), Signed)) {} - void copy(const APInt &V) { - assert(BitWidth == V.getBitWidth()); - assert(numWords() == V.getNumWords()); + IntegralAP(APInt V) : V(V) {} + /// Arbitrary value for uninitialized variables. + IntegralAP() : IntegralAP(Signed ? -1 : 7, 3) {} - if (V.isSingleWord()) { - if constexpr (Signed) - Val = V.getSExtValue(); - else - Val = V.getZExtValue(); - return; - } - assert(Memory); - std::memcpy(Memory, V.getRawData(), V.getNumWords() * sizeof(uint64_t)); - } - - // Constructors. - IntegralAP() = default; - IntegralAP(unsigned BitWidth) : BitWidth(BitWidth) {} - IntegralAP(uint64_t *Memory, unsigned BitWidth) - : Memory(Memory), BitWidth(BitWidth) {} - IntegralAP(const APInt &V) : BitWidth(V.getBitWidth()) { - if (V.isSingleWord()) { - Val = Signed ? V.getSExtValue() : V.getZExtValue(); - } else { - Memory = const_cast(V.getRawData()); - } - } - - IntegralAP operator-() const { return IntegralAP(-getValue()); } + IntegralAP operator-() const { return IntegralAP(-V); } IntegralAP operator-(const IntegralAP &Other) const { - return IntegralAP(getValue() - Other.getValue()); + return IntegralAP(V - Other.V); } bool operator>(const IntegralAP &RHS) const { if constexpr (Signed) - return getValue().sgt(RHS.getValue()); - return getValue().ugt(RHS.getValue()); + return V.ugt(RHS.V); + return V.sgt(RHS.V); } - bool operator>=(unsigned RHS) const { + bool operator>=(IntegralAP RHS) const { if constexpr (Signed) - return getValue().sge(RHS); - return getValue().uge(RHS); + return V.uge(RHS.V); + return V.sge(RHS.V); } bool operator<(IntegralAP RHS) const { if constexpr (Signed) - return getValue().slt(RHS.getValue()); - return getValue().ult(RHS.getValue()); + return V.slt(RHS.V); + return V.slt(RHS.V); + } + bool operator<=(IntegralAP RHS) const { + if constexpr (Signed) + return V.ult(RHS.V); + return V.ult(RHS.V); } template >> explicit operator Ty() const { - return truncateCast(getValue()); + return truncateCast(V); } template static IntegralAP from(T Value, unsigned NumBits = 0) { - if (NumBits == 0) - NumBits = sizeof(T) * 8; assert(NumBits > 0); APInt Copy = APInt(NumBits, static_cast(Value), Signed); - assert(false); - return IntegralAP(Copy); - } - static IntegralAP from(const APInt &Value) { - return IntegralAP(Value); + return IntegralAP(Copy); } template @@ -147,45 +106,52 @@ public: NumBits = V.bitWidth(); if constexpr (InputSigned) - return IntegralAP(V.getValue().sextOrTrunc(NumBits)); - return IntegralAP(V.getValue().zextOrTrunc(NumBits)); + return IntegralAP(V.V.sextOrTrunc(NumBits)); + return IntegralAP(V.V.zextOrTrunc(NumBits)); + } + + template + static IntegralAP from(Integral I, unsigned BitWidth) { + return IntegralAP(I.toAPInt(BitWidth)); + } + + static IntegralAP zero(int32_t BitWidth) { + APInt V = APInt(BitWidth, 0LL, Signed); + return IntegralAP(V); } - constexpr unsigned bitWidth() const { return BitWidth; } - constexpr unsigned numWords() const { return APInt::getNumWords(BitWidth); } - constexpr bool singleWord() const { return numWords() == 1; } + constexpr unsigned bitWidth() const { return V.getBitWidth(); } APSInt toAPSInt(unsigned Bits = 0) const { if (Bits == 0) Bits = bitWidth(); - APInt V = getValue(); if constexpr (Signed) - return APSInt(getValue().sext(Bits), !Signed); + return APSInt(V.sext(Bits), !Signed); else - return APSInt(getValue().zext(Bits), !Signed); + return APSInt(V.zext(Bits), !Signed); } APValue toAPValue(const ASTContext &) const { return APValue(toAPSInt()); } - bool isZero() const { return getValue().isZero(); } + bool isZero() const { return V.isZero(); } bool isPositive() const { if constexpr (Signed) - return getValue().isNonNegative(); + return V.isNonNegative(); return true; } bool isNegative() const { if constexpr (Signed) - return !getValue().isNonNegative(); + return !V.isNonNegative(); return false; } - bool isMin() const { return getValue().isMinValue(); } - bool isMax() const { return getValue().isMaxValue(); } + bool isMin() const { return V.isMinValue(); } + bool isMax() const { return V.isMaxValue(); } static constexpr bool isSigned() { return Signed; } - bool isMinusOne() const { return Signed && getValue().isAllOnes(); } + bool isMinusOne() const { return Signed && V == -1; } - unsigned countLeadingZeros() const { return getValue().countl_zero(); } + unsigned countLeadingZeros() const { return V.countl_zero(); } - void print(llvm::raw_ostream &OS) const { getValue().print(OS, Signed); } + void print(llvm::raw_ostream &OS) const { V.print(OS, Signed);} std::string toDiagnosticString(const ASTContext &Ctx) const { std::string NameStr; llvm::raw_string_ostream OS(NameStr); @@ -195,57 +161,53 @@ public: IntegralAP truncate(unsigned BitWidth) const { if constexpr (Signed) - return IntegralAP( - getValue().trunc(BitWidth).sextOrTrunc(this->bitWidth())); + return IntegralAP(V.trunc(BitWidth).sextOrTrunc(this->bitWidth())); else - return IntegralAP( - getValue().trunc(BitWidth).zextOrTrunc(this->bitWidth())); + return IntegralAP(V.trunc(BitWidth).zextOrTrunc(this->bitWidth())); } IntegralAP toUnsigned() const { - return IntegralAP(Memory, BitWidth); + APInt Copy = V; + return IntegralAP(Copy); } void bitcastToMemory(std::byte *Dest) const { - llvm::StoreIntToMemory(getValue(), (uint8_t *)Dest, bitWidth() / 8); + llvm::StoreIntToMemory(V, (uint8_t *)Dest, bitWidth() / 8); } - static void bitcastFromMemory(const std::byte *Src, unsigned BitWidth, - IntegralAP *Result) { + static IntegralAP bitcastFromMemory(const std::byte *Src, unsigned BitWidth) { APInt V(BitWidth, static_cast(0), Signed); llvm::LoadIntFromMemory(V, (const uint8_t *)Src, BitWidth / 8); - Result->copy(V); + return IntegralAP(V); } ComparisonCategoryResult compare(const IntegralAP &RHS) const { assert(Signed == RHS.isSigned()); assert(bitWidth() == RHS.bitWidth()); - APInt V1 = getValue(); - APInt V2 = RHS.getValue(); if constexpr (Signed) { - if (V1.slt(V2)) + if (V.slt(RHS.V)) return ComparisonCategoryResult::Less; - if (V1.sgt(V2)) + if (V.sgt(RHS.V)) return ComparisonCategoryResult::Greater; return ComparisonCategoryResult::Equal; } assert(!Signed); - if (V1.ult(V2)) + if (V.ult(RHS.V)) return ComparisonCategoryResult::Less; - if (V1.ugt(V2)) + if (V.ugt(RHS.V)) return ComparisonCategoryResult::Greater; return ComparisonCategoryResult::Equal; } static bool increment(IntegralAP A, IntegralAP *R) { - APSInt One(APInt(A.bitWidth(), 1ull, Signed), !Signed); - return add(A, IntegralAP(One), A.bitWidth() + 1, R); + IntegralAP One(1, A.bitWidth()); + return add(A, One, A.bitWidth() + 1, R); } static bool decrement(IntegralAP A, IntegralAP *R) { - APSInt One(APInt(A.bitWidth(), 1ull, Signed), !Signed); - return sub(A, IntegralAP(One), A.bitWidth() + 1, R); + IntegralAP One(1, A.bitWidth()); + return sub(A, One, A.bitWidth() + 1, R); } static bool add(IntegralAP A, IntegralAP B, unsigned OpBits, IntegralAP *R) { @@ -262,97 +224,87 @@ public: static bool rem(IntegralAP A, IntegralAP B, unsigned OpBits, IntegralAP *R) { if constexpr (Signed) - R->copy(A.getValue().srem(B.getValue())); + *R = IntegralAP(A.V.srem(B.V)); else - R->copy(A.getValue().urem(B.getValue())); + *R = IntegralAP(A.V.urem(B.V)); return false; } static bool div(IntegralAP A, IntegralAP B, unsigned OpBits, IntegralAP *R) { if constexpr (Signed) - R->copy(A.getValue().sdiv(B.getValue())); + *R = IntegralAP(A.V.sdiv(B.V)); else - R->copy(A.getValue().udiv(B.getValue())); + *R = IntegralAP(A.V.udiv(B.V)); return false; } static bool bitAnd(IntegralAP A, IntegralAP B, unsigned OpBits, IntegralAP *R) { - R->copy(A.getValue() & B.getValue()); + *R = IntegralAP(A.V & B.V); return false; } static bool bitOr(IntegralAP A, IntegralAP B, unsigned OpBits, IntegralAP *R) { - R->copy(A.getValue() | B.getValue()); + *R = IntegralAP(A.V | B.V); return false; } static bool bitXor(IntegralAP A, IntegralAP B, unsigned OpBits, IntegralAP *R) { - R->copy(A.getValue() ^ B.getValue()); + *R = IntegralAP(A.V ^ B.V); return false; } static bool neg(const IntegralAP &A, IntegralAP *R) { - APInt AI = A.getValue(); + APInt AI = A.V; AI.negate(); - R->copy(AI); + *R = IntegralAP(AI); return false; } static bool comp(IntegralAP A, IntegralAP *R) { - R->copy(~A.getValue()); + *R = IntegralAP(~A.V); return false; } static void shiftLeft(const IntegralAP A, const IntegralAP B, unsigned OpBits, IntegralAP *R) { - *R = IntegralAP(A.getValue().shl(B.getValue().getZExtValue())); + *R = IntegralAP(A.V.shl(B.V.getZExtValue())); } static void shiftRight(const IntegralAP A, const IntegralAP B, unsigned OpBits, IntegralAP *R) { - unsigned ShiftAmount = B.getValue().getZExtValue(); + unsigned ShiftAmount = B.V.getZExtValue(); if constexpr (Signed) - R->copy(A.getValue().ashr(ShiftAmount)); + *R = IntegralAP(A.V.ashr(ShiftAmount)); else - R->copy(A.getValue().lshr(ShiftAmount)); + *R = IntegralAP(A.V.lshr(ShiftAmount)); } // === Serialization support === size_t bytesToSerialize() const { - assert(BitWidth != 0); - uint64_t NumWords = llvm::APInt::getNumWords(bitWidth()); - return sizeof(uint32_t) + (NumWords * sizeof(uint64_t)); + // 4 bytes for the BitWidth followed by N bytes for the actual APInt. + return sizeof(uint32_t) + (V.getBitWidth() / CHAR_BIT); } void serialize(std::byte *Buff) const { - std::memcpy(Buff, &BitWidth, sizeof(uint32_t)); - if (singleWord()) - std::memcpy(Buff + sizeof(uint32_t), &Val, sizeof(uint64_t)); - else { - uint64_t NumWords = llvm::APInt::getNumWords(bitWidth()); - std::memcpy(Buff + sizeof(uint32_t), Memory, NumWords * sizeof(uint64_t)); - } - } + assert(V.getBitWidth() < std::numeric_limits::max()); + uint32_t BitWidth = V.getBitWidth(); - static uint64_t deserializeSize(const std::byte *Buff) { - return *reinterpret_cast(Buff); + std::memcpy(Buff, &BitWidth, sizeof(uint32_t)); + llvm::StoreIntToMemory(V, (uint8_t *)(Buff + sizeof(uint32_t)), + BitWidth / CHAR_BIT); } - static void deserialize(const std::byte *Buff, IntegralAP *Result) { - uint64_t BitWidth = Result->BitWidth; - uint64_t NumWords = llvm::APInt::getNumWords(BitWidth); - assert(BitWidth == Result->BitWidth); + static IntegralAP deserialize(const std::byte *Buff) { + uint32_t BitWidth; + std::memcpy(&BitWidth, Buff, sizeof(uint32_t)); + IntegralAP Val(APInt(BitWidth, 0ull, !Signed)); - if (NumWords == 1) - std::memcpy(&Result->Val, Buff + sizeof(uint32_t), sizeof(uint64_t)); - else { - assert(Result->Memory); - std::memcpy(Result->Memory, Buff + sizeof(uint32_t), - NumWords * sizeof(uint64_t)); - } + llvm::LoadIntFromMemory(Val.V, (const uint8_t *)Buff + sizeof(uint32_t), + BitWidth / CHAR_BIT); + return Val; } private: @@ -360,7 +312,7 @@ private: static bool CheckAddSubMulUB(const IntegralAP &A, const IntegralAP &B, unsigned BitWidth, IntegralAP *R) { if constexpr (!Signed) { - R->copy(Op{}(A.getValue(), B.getValue())); + R->V = Op{}(A.V, B.V); return false; } @@ -368,7 +320,7 @@ private: const APSInt &RHS = B.toAPSInt(); APSInt Value = Op{}(LHS.extend(BitWidth), RHS.extend(BitWidth)); APSInt Result = Value.trunc(LHS.getBitWidth()); - R->copy(Result); + R->V = Result; return Result.extend(BitWidth) != Value; } diff --git a/clang/lib/AST/ByteCode/Interp.cpp b/clang/lib/AST/ByteCode/Interp.cpp index 1e2032feabb6..5c8abffb3a99 100644 --- a/clang/lib/AST/ByteCode/Interp.cpp +++ b/clang/lib/AST/ByteCode/Interp.cpp @@ -1935,10 +1935,8 @@ bool CastPointerIntegralAP(InterpState &S, CodePtr OpPC, uint32_t BitWidth) { if (!CheckPointerToIntegralCast(S, OpPC, Ptr, BitWidth)) return false; - auto Result = S.allocAP>(BitWidth); - Result.copy(APInt(BitWidth, Ptr.getIntegerRepresentation())); - - S.Stk.push>(Result); + S.Stk.push>( + IntegralAP::from(Ptr.getIntegerRepresentation(), BitWidth)); return true; } @@ -1948,10 +1946,8 @@ bool CastPointerIntegralAPS(InterpState &S, CodePtr OpPC, uint32_t BitWidth) { if (!CheckPointerToIntegralCast(S, OpPC, Ptr, BitWidth)) return false; - auto Result = S.allocAP>(BitWidth); - Result.copy(APInt(BitWidth, Ptr.getIntegerRepresentation())); - - S.Stk.push>(Result); + S.Stk.push>( + IntegralAP::from(Ptr.getIntegerRepresentation(), BitWidth)); return true; } @@ -2057,100 +2053,6 @@ bool arePotentiallyOverlappingStringLiterals(const Pointer &LHS, return Shorter == Longer.take_front(Shorter.size()); } -static void copyPrimitiveMemory(InterpState &S, const Pointer &Ptr, - PrimType T) { - - if (T == PT_IntAPS) { - auto &Val = Ptr.deref>(); - if (!Val.singleWord()) { - uint64_t *NewMemory = new (S.P) uint64_t[Val.numWords()]; - Val.take(NewMemory); - } - } else if (T == PT_IntAP) { - auto &Val = Ptr.deref>(); - if (!Val.singleWord()) { - uint64_t *NewMemory = new (S.P) uint64_t[Val.numWords()]; - Val.take(NewMemory); - } - } else if (T == PT_Float) { - auto &Val = Ptr.deref(); - if (!Val.singleWord()) { - uint64_t *NewMemory = new (S.P) uint64_t[Val.numWords()]; - Val.take(NewMemory); - } - } -} - -template -static void copyPrimitiveMemory(InterpState &S, const Pointer &Ptr) { - assert(needsAlloc()); - auto &Val = Ptr.deref(); - if (!Val.singleWord()) { - uint64_t *NewMemory = new (S.P) uint64_t[Val.numWords()]; - Val.take(NewMemory); - } -} - -static void finishGlobalRecurse(InterpState &S, const Pointer &Ptr) { - if (const Record *R = Ptr.getRecord()) { - for (const Record::Field &Fi : R->fields()) { - if (Fi.Desc->isPrimitive()) { - TYPE_SWITCH_ALLOC(Fi.Desc->getPrimType(), { - copyPrimitiveMemory(S, Ptr.atField(Fi.Offset)); - }); - copyPrimitiveMemory(S, Ptr.atField(Fi.Offset), Fi.Desc->getPrimType()); - } else - finishGlobalRecurse(S, Ptr.atField(Fi.Offset)); - } - return; - } - - if (const Descriptor *D = Ptr.getFieldDesc(); D && D->isArray()) { - unsigned NumElems = D->getNumElems(); - if (NumElems == 0) - return; - - if (D->isPrimitiveArray()) { - PrimType PT = D->getPrimType(); - if (!needsAlloc(PT)) - return; - assert(NumElems >= 1); - const Pointer EP = Ptr.atIndex(0); - bool AllSingleWord = true; - TYPE_SWITCH_ALLOC(PT, { - if (!EP.deref().singleWord()) { - copyPrimitiveMemory(S, EP); - AllSingleWord = false; - } - }); - if (AllSingleWord) - return; - for (unsigned I = 1; I != D->getNumElems(); ++I) { - const Pointer EP = Ptr.atIndex(I); - copyPrimitiveMemory(S, EP, PT); - } - } else { - assert(D->isCompositeArray()); - for (unsigned I = 0; I != D->getNumElems(); ++I) { - const Pointer EP = Ptr.atIndex(I).narrow(); - finishGlobalRecurse(S, EP); - } - } - } -} - -bool FinishInitGlobal(InterpState &S, CodePtr OpPC) { - const Pointer &Ptr = S.Stk.pop(); - - finishGlobalRecurse(S, Ptr); - if (Ptr.canBeInitialized()) { - Ptr.initialize(); - Ptr.activate(); - } - - return true; -} - // https://github.com/llvm/llvm-project/issues/102513 #if defined(_MSC_VER) && !defined(__clang__) && !defined(NDEBUG) #pragma optimize("", off) diff --git a/clang/lib/AST/ByteCode/Interp.h b/clang/lib/AST/ByteCode/Interp.h index 66d3e6d79e8b..ae3d4a441a79 100644 --- a/clang/lib/AST/ByteCode/Interp.h +++ b/clang/lib/AST/ByteCode/Interp.h @@ -189,7 +189,7 @@ bool CheckShift(InterpState &S, CodePtr OpPC, const LT &LHS, const RT &RHS, // C++11 [expr.shift]p1: Shift width must be less than the bit width of // the shifted type. - if (Bits > 1 && RHS >= Bits) { + if (Bits > 1 && RHS >= RT::from(Bits, RHS.bitWidth())) { const Expr *E = S.Current->getExpr(OpPC); const APSInt Val = RHS.toAPSInt(); QualType Ty = E->getType(); @@ -370,9 +370,6 @@ bool AddSubMulHelper(InterpState &S, CodePtr OpPC, unsigned Bits, const T &LHS, const T &RHS) { // Fast path - add the numbers with fixed width. T Result; - if constexpr (needsAlloc()) - Result = S.allocAP(LHS.bitWidth()); - if (!OpFW(LHS, RHS, Bits, &Result)) { S.Stk.push(Result); return true; @@ -411,7 +408,6 @@ bool Add(InterpState &S, CodePtr OpPC) { const T &RHS = S.Stk.pop(); const T &LHS = S.Stk.pop(); const unsigned Bits = RHS.bitWidth() + 1; - return AddSubMulHelper(S, OpPC, Bits, LHS, RHS); } @@ -427,7 +423,7 @@ inline bool Addf(InterpState &S, CodePtr OpPC, uint32_t FPOI) { const Floating &LHS = S.Stk.pop(); FPOptions FPO = FPOptions::getFromOpaqueInt(FPOI); - Floating Result = S.allocFloat(LHS.getSemantics()); + Floating Result; auto Status = Floating::add(LHS, RHS, getRoundingMode(FPO), &Result); S.Stk.push(Result); return CheckFloatResult(S, OpPC, Result, Status, FPO); @@ -438,7 +434,6 @@ bool Sub(InterpState &S, CodePtr OpPC) { const T &RHS = S.Stk.pop(); const T &LHS = S.Stk.pop(); const unsigned Bits = RHS.bitWidth() + 1; - return AddSubMulHelper(S, OpPC, Bits, LHS, RHS); } @@ -447,7 +442,7 @@ inline bool Subf(InterpState &S, CodePtr OpPC, uint32_t FPOI) { const Floating &LHS = S.Stk.pop(); FPOptions FPO = FPOptions::getFromOpaqueInt(FPOI); - Floating Result = S.allocFloat(LHS.getSemantics()); + Floating Result; auto Status = Floating::sub(LHS, RHS, getRoundingMode(FPO), &Result); S.Stk.push(Result); return CheckFloatResult(S, OpPC, Result, Status, FPO); @@ -458,7 +453,6 @@ bool Mul(InterpState &S, CodePtr OpPC) { const T &RHS = S.Stk.pop(); const T &LHS = S.Stk.pop(); const unsigned Bits = RHS.bitWidth() * 2; - return AddSubMulHelper(S, OpPC, Bits, LHS, RHS); } @@ -467,10 +461,8 @@ inline bool Mulf(InterpState &S, CodePtr OpPC, uint32_t FPOI) { const Floating &LHS = S.Stk.pop(); FPOptions FPO = FPOptions::getFromOpaqueInt(FPOI); - Floating Result = S.allocFloat(LHS.getSemantics()); - + Floating Result; auto Status = Floating::mul(LHS, RHS, getRoundingMode(FPO), &Result); - S.Stk.push(Result); return CheckFloatResult(S, OpPC, Result, Status, FPO); } @@ -492,14 +484,9 @@ inline bool Mulc(InterpState &S, CodePtr OpPC) { HandleComplexComplexMul(A, B, C, D, ResR, ResI); // Copy into the result. - Floating RA = S.allocFloat(A.getSemantics()); - RA.copy(ResR); - Result.atIndex(0).deref() = RA; // Floating(ResR); + Result.atIndex(0).deref() = Floating(ResR); Result.atIndex(0).initialize(); - - Floating RI = S.allocFloat(A.getSemantics()); - RI.copy(ResI); - Result.atIndex(1).deref() = RI; // Floating(ResI); + Result.atIndex(1).deref() = Floating(ResI); Result.atIndex(1).initialize(); Result.initialize(); } else { @@ -552,20 +539,10 @@ inline bool Divc(InterpState &S, CodePtr OpPC) { HandleComplexComplexDiv(A, B, C, D, ResR, ResI); // Copy into the result. - // Result.atIndex(0).deref() = Floating(ResR); - // Result.atIndex(0).initialize(); - // Result.atIndex(1).deref() = Floating(ResI); - // Result.atIndex(1).initialize(); - - Floating RA = S.allocFloat(A.getSemantics()); - RA.copy(ResR); - Result.atIndex(0).deref() = RA; // Floating(ResR); + Result.atIndex(0).deref() = Floating(ResR); Result.atIndex(0).initialize(); - - Floating RI = S.allocFloat(A.getSemantics()); - RI.copy(ResI); - Result.atIndex(1).deref() = RI; // Floating(ResI); - + Result.atIndex(1).deref() = Floating(ResI); + Result.atIndex(1).initialize(); Result.initialize(); } else { // Integer element type. @@ -631,12 +608,9 @@ template ::T> bool BitAnd(InterpState &S, CodePtr OpPC) { const T &RHS = S.Stk.pop(); const T &LHS = S.Stk.pop(); - unsigned Bits = RHS.bitWidth(); + unsigned Bits = RHS.bitWidth(); T Result; - if constexpr (needsAlloc()) - Result = S.allocAP(Bits); - if (!T::bitAnd(LHS, RHS, Bits, &Result)) { S.Stk.push(Result); return true; @@ -651,12 +625,9 @@ template ::T> bool BitOr(InterpState &S, CodePtr OpPC) { const T &RHS = S.Stk.pop(); const T &LHS = S.Stk.pop(); - unsigned Bits = RHS.bitWidth(); + unsigned Bits = RHS.bitWidth(); T Result; - if constexpr (needsAlloc()) - Result = S.allocAP(Bits); - if (!T::bitOr(LHS, RHS, Bits, &Result)) { S.Stk.push(Result); return true; @@ -673,11 +644,7 @@ bool BitXor(InterpState &S, CodePtr OpPC) { const T &LHS = S.Stk.pop(); unsigned Bits = RHS.bitWidth(); - T Result; - if constexpr (needsAlloc()) - Result = S.allocAP(Bits); - if (!T::bitXor(LHS, RHS, Bits, &Result)) { S.Stk.push(Result); return true; @@ -692,15 +659,12 @@ template ::T> bool Rem(InterpState &S, CodePtr OpPC) { const T &RHS = S.Stk.pop(); const T &LHS = S.Stk.pop(); - const unsigned Bits = RHS.bitWidth() * 2; if (!CheckDivRem(S, OpPC, LHS, RHS)) return false; + const unsigned Bits = RHS.bitWidth() * 2; T Result; - if constexpr (needsAlloc()) - Result = S.allocAP(LHS.bitWidth()); - if (!T::rem(LHS, RHS, Bits, &Result)) { S.Stk.push(Result); return true; @@ -715,15 +679,12 @@ template ::T> bool Div(InterpState &S, CodePtr OpPC) { const T &RHS = S.Stk.pop(); const T &LHS = S.Stk.pop(); - const unsigned Bits = RHS.bitWidth() * 2; if (!CheckDivRem(S, OpPC, LHS, RHS)) return false; + const unsigned Bits = RHS.bitWidth() * 2; T Result; - if constexpr (needsAlloc()) - Result = S.allocAP(LHS.bitWidth()); - if (!T::div(LHS, RHS, Bits, &Result)) { S.Stk.push(Result); return true; @@ -746,10 +707,8 @@ inline bool Divf(InterpState &S, CodePtr OpPC, uint32_t FPOI) { return false; FPOptions FPO = FPOptions::getFromOpaqueInt(FPOI); - - Floating Result = S.allocFloat(LHS.getSemantics()); + Floating Result; auto Status = Floating::div(LHS, RHS, getRoundingMode(FPO), &Result); - S.Stk.push(Result); return CheckFloatResult(S, OpPC, Result, Status, FPO); } @@ -771,44 +730,31 @@ inline bool Inv(InterpState &S, CodePtr OpPC) { template ::T> bool Neg(InterpState &S, CodePtr OpPC) { const T &Value = S.Stk.pop(); + T Result; - if constexpr (std::is_same_v) { - T Result = S.allocFloat(Value.getSemantics()); - - if (!T::neg(Value, &Result)) { - S.Stk.push(Result); - return true; - } - return false; - } else { - T Result; - if constexpr (needsAlloc()) - Result = S.allocAP(Value.bitWidth()); - - if (!T::neg(Value, &Result)) { - S.Stk.push(Result); - return true; - } - - assert(isIntegralType(Name) && - "don't expect other types to fail at constexpr negation"); + if (!T::neg(Value, &Result)) { S.Stk.push(Result); + return true; + } - APSInt NegatedValue = -Value.toAPSInt(Value.bitWidth() + 1); - if (S.checkingForUndefinedBehavior()) { - const Expr *E = S.Current->getExpr(OpPC); - QualType Type = E->getType(); - SmallString<32> Trunc; - NegatedValue.trunc(Result.bitWidth()) - .toString(Trunc, 10, Result.isSigned(), /*formatAsCLiteral=*/false, - /*UpperCase=*/true, /*InsertSeparators=*/true); - S.report(E->getExprLoc(), diag::warn_integer_constant_overflow) - << Trunc << Type << E->getSourceRange(); - return true; - } + assert(isIntegralType(Name) && + "don't expect other types to fail at constexpr negation"); + S.Stk.push(Result); - return handleOverflow(S, OpPC, NegatedValue); + APSInt NegatedValue = -Value.toAPSInt(Value.bitWidth() + 1); + if (S.checkingForUndefinedBehavior()) { + const Expr *E = S.Current->getExpr(OpPC); + QualType Type = E->getType(); + SmallString<32> Trunc; + NegatedValue.trunc(Result.bitWidth()) + .toString(Trunc, 10, Result.isSigned(), /*formatAsCLiteral=*/false, + /*UpperCase=*/true, /*InsertSeparators=*/true); + S.report(E->getExprLoc(), diag::warn_integer_constant_overflow) + << Trunc << Type << E->getSourceRange(); + return true; } + + return handleOverflow(S, OpPC, NegatedValue); } enum class PushVal : bool { @@ -837,8 +783,6 @@ bool IncDecHelper(InterpState &S, CodePtr OpPC, const Pointer &Ptr, const T &Value = Ptr.deref(); T Result; - if constexpr (needsAlloc()) - Result = S.allocAP(Value.bitWidth()); if constexpr (DoPush == PushVal::Yes) S.Stk.push(Value); @@ -946,6 +890,7 @@ bool PreDec(InterpState &S, CodePtr OpPC, bool CanOverflow) { const Pointer &Ptr = S.Stk.peek(); if (!CheckLoad(S, OpPC, Ptr, AK_Decrement)) return false; + return IncDecHelper(S, OpPC, Ptr, CanOverflow); } @@ -953,7 +898,7 @@ template bool IncDecFloatHelper(InterpState &S, CodePtr OpPC, const Pointer &Ptr, uint32_t FPOI) { Floating Value = Ptr.deref(); - Floating Result = S.allocFloat(Value.getSemantics()); + Floating Result; if constexpr (DoPush == PushVal::Yes) S.Stk.push(Value); @@ -1007,15 +952,12 @@ inline bool DecfPop(InterpState &S, CodePtr OpPC, uint32_t FPOI) { template ::T> bool Comp(InterpState &S, CodePtr OpPC) { const T &Val = S.Stk.pop(); - T Result; - if constexpr (needsAlloc()) - Result = S.allocAP(Val.bitWidth()); - if (!T::comp(Val, &Result)) { S.Stk.push(Result); return true; } + return false; } @@ -1383,23 +1325,10 @@ bool Flip(InterpState &S, CodePtr OpPC) { template ::T> bool Const(InterpState &S, CodePtr OpPC, const T &Arg) { - if constexpr (needsAlloc()) { - T Result = S.allocAP(Arg.bitWidth()); - Result.copy(Arg.toAPSInt()); - S.Stk.push(Result); - return true; - } S.Stk.push(Arg); return true; } -inline bool ConstFloat(InterpState &S, CodePtr OpPC, const Floating &F) { - Floating Result = S.allocFloat(F.getSemantics()); - Result.copy(F.getAPFloat()); - S.Stk.push(Result); - return true; -} - //===----------------------------------------------------------------------===// // Get/Set Local/Param/Global/This //===----------------------------------------------------------------------===// @@ -1554,24 +1483,7 @@ bool SetGlobal(InterpState &S, CodePtr OpPC, uint32_t I) { template ::T> bool InitGlobal(InterpState &S, CodePtr OpPC, uint32_t I) { const Pointer &P = S.P.getGlobal(I); - P.deref() = S.Stk.pop(); - - if constexpr (std::is_same_v) { - auto &Val = P.deref(); - if (!Val.singleWord()) { - uint64_t *NewMemory = new (S.P) uint64_t[Val.numWords()]; - Val.take(NewMemory); - } - - } else if constexpr (needsAlloc()) { - auto &Val = P.deref(); - if (!Val.singleWord()) { - uint64_t *NewMemory = new (S.P) uint64_t[Val.numWords()]; - Val.take(NewMemory); - } - } - P.initialize(); return true; } @@ -1673,22 +1585,7 @@ bool InitBitField(InterpState &S, CodePtr OpPC, const Record::Field *F) { assert(F->isBitField()); const T &Value = S.Stk.pop(); const Pointer &Field = S.Stk.peek().atField(F->Offset); - - if constexpr (needsAlloc()) { - T Result = S.allocAP(Value.bitWidth()); - if (T::isSigned()) - Result.copy(Value.toAPSInt() - .trunc(F->Decl->getBitWidthValue()) - .sextOrTrunc(Value.bitWidth())); - else - Result.copy(Value.toAPSInt() - .trunc(F->Decl->getBitWidthValue()) - .zextOrTrunc(Value.bitWidth())); - - Field.deref() = Result; - } else { - Field.deref() = Value.truncate(F->Decl->getBitWidthValue()); - } + Field.deref() = Value.truncate(F->Decl->getBitWidthValue()); Field.activate(); Field.initialize(); return true; @@ -1868,8 +1765,6 @@ inline bool FinishInit(InterpState &S, CodePtr OpPC) { return true; } -bool FinishInitGlobal(InterpState &S, CodePtr OpPC); - inline bool Dump(InterpState &S, CodePtr OpPC) { S.Stk.dump(); return true; @@ -2376,8 +2271,7 @@ template bool Cast(InterpState &S, CodePtr OpPC) { inline bool CastFP(InterpState &S, CodePtr OpPC, const llvm::fltSemantics *Sem, llvm::RoundingMode RM) { Floating F = S.Stk.pop(); - Floating Result = S.allocFloat(*Sem); - F.toSemantics(Sem, RM, &Result); + Floating Result = F.toSemantics(Sem, RM); S.Stk.push(Result); return true; } @@ -2401,25 +2295,15 @@ inline bool CastFixedPoint(InterpState &S, CodePtr OpPC, uint32_t FPS) { /// to know what bitwidth the result should be. template ::T> bool CastAP(InterpState &S, CodePtr OpPC, uint32_t BitWidth) { - auto Result = S.allocAP>(BitWidth); - // Copy data. - { - APInt Source = S.Stk.pop().toAPSInt().extOrTrunc(BitWidth); - Result.copy(Source); - } - S.Stk.push>(Result); + S.Stk.push>( + IntegralAP::from(S.Stk.pop(), BitWidth)); return true; } template ::T> bool CastAPS(InterpState &S, CodePtr OpPC, uint32_t BitWidth) { - auto Result = S.allocAP>(BitWidth); - // Copy data. - { - APInt Source = S.Stk.pop().toAPSInt().extOrTrunc(BitWidth); - Result.copy(Source); - } - S.Stk.push>(Result); + S.Stk.push>( + IntegralAP::from(S.Stk.pop(), BitWidth)); return true; } @@ -2428,11 +2312,11 @@ bool CastIntegralFloating(InterpState &S, CodePtr OpPC, const llvm::fltSemantics *Sem, uint32_t FPOI) { const T &From = S.Stk.pop(); APSInt FromAP = From.toAPSInt(); + Floating Result; FPOptions FPO = FPOptions::getFromOpaqueInt(FPOI); - Floating Result = S.allocFloat(*Sem); auto Status = - Floating::fromIntegral(FromAP, *Sem, getRoundingMode(FPO), &Result); + Floating::fromIntegral(FromAP, *Sem, getRoundingMode(FPO), Result); S.Stk.push(Result); return CheckFloatResult(S, OpPC, Result, Status, FPO); @@ -2481,12 +2365,7 @@ static inline bool CastFloatingIntegralAP(InterpState &S, CodePtr OpPC, return handleOverflow(S, OpPC, F.getAPFloat()); FPOptions FPO = FPOptions::getFromOpaqueInt(FPOI); - - auto ResultAP = S.allocAP>(BitWidth); - ResultAP.copy(Result); - - S.Stk.push>(ResultAP); - + S.Stk.push>(IntegralAP(Result)); return CheckFloatResult(S, OpPC, F, Status, FPO); } @@ -2502,12 +2381,7 @@ static inline bool CastFloatingIntegralAPS(InterpState &S, CodePtr OpPC, return handleOverflow(S, OpPC, F.getAPFloat()); FPOptions FPO = FPOptions::getFromOpaqueInt(FPOI); - - auto ResultAP = S.allocAP>(BitWidth); - ResultAP.copy(Result); - - S.Stk.push>(ResultAP); - + S.Stk.push>(IntegralAP(Result)); return CheckFloatResult(S, OpPC, F, Status, FPO); } @@ -2567,9 +2441,8 @@ static inline bool CastFloatingFixedPoint(InterpState &S, CodePtr OpPC, static inline bool CastFixedPointFloating(InterpState &S, CodePtr OpPC, const llvm::fltSemantics *Sem) { const auto &Fixed = S.Stk.pop(); - Floating Result = S.allocFloat(*Sem); - Result.copy(Fixed.toFloat(Sem)); - S.Stk.push(Result); + + S.Stk.push(Fixed.toFloat(Sem)); return true; } @@ -2633,18 +2506,12 @@ bool Zero(InterpState &S, CodePtr OpPC) { } static inline bool ZeroIntAP(InterpState &S, CodePtr OpPC, uint32_t BitWidth) { - auto Result = S.allocAP>(BitWidth); - if (!Result.singleWord()) - std::memset(Result.Memory, 0, Result.numWords() * sizeof(uint64_t)); - S.Stk.push>(Result); + S.Stk.push>(IntegralAP::zero(BitWidth)); return true; } static inline bool ZeroIntAPS(InterpState &S, CodePtr OpPC, uint32_t BitWidth) { - auto Result = S.allocAP>(BitWidth); - if (!Result.singleWord()) - std::memset(Result.Memory, 0, Result.numWords() * sizeof(uint64_t)); - S.Stk.push>(Result); + S.Stk.push>(IntegralAP::zero(BitWidth)); return true; } @@ -2711,9 +2578,7 @@ inline bool RVOPtr(InterpState &S, CodePtr OpPC) { //===----------------------------------------------------------------------===// template -inline bool DoShift(InterpState &S, CodePtr OpPC, LT &LHS, RT &RHS, - LT *Result) { - +inline bool DoShift(InterpState &S, CodePtr OpPC, LT &LHS, RT &RHS) { const unsigned Bits = LHS.bitWidth(); // OpenCL 6.3j: shift values are effectively % word size of LHS. @@ -2731,7 +2596,7 @@ inline bool DoShift(InterpState &S, CodePtr OpPC, LT &LHS, RT &RHS, RHS = -RHS; return DoShift( - S, OpPC, LHS, RHS, Result); + S, OpPC, LHS, RHS); } if (!CheckShift(S, OpPC, LHS, RHS, Bits)) @@ -2779,7 +2644,6 @@ inline bool DoShift(InterpState &S, CodePtr OpPC, LT &LHS, RT &RHS, // Do the shift on potentially signed LT, then convert to unsigned type. LT A; LT::shiftRight(LHS, LT::from(RHS, Bits), Bits, &A); - // LT::shiftRight(LHS, LT(RHSTemp), Bits, &A); R = LT::AsUnsigned::from(A); } } @@ -2788,48 +2652,6 @@ inline bool DoShift(InterpState &S, CodePtr OpPC, LT &LHS, RT &RHS, return true; } -/// A version of DoShift that works on IntegralAP. -template -inline bool DoShiftAP(InterpState &S, CodePtr OpPC, LT &LHS, RT &RHS, - LT *Result) { - const unsigned Bits = LHS.bitWidth(); - const APSInt &LHSAP = LHS.toAPSInt(); - APSInt RHSAP = RHS.toAPSInt(); - - // OpenCL 6.3j: shift values are effectively % word size of LHS. - if (S.getLangOpts().OpenCL) - RHSAP &= APSInt(llvm::APInt(RHSAP.getBitWidth(), - static_cast(LHSAP.getBitWidth() - 1)), - RHSAP.isUnsigned()); - - if (RHS.isNegative()) { - // During constant-folding, a negative shift is an opposite shift. Such a - // shift is not a constant expression. - const SourceInfo &Loc = S.Current->getSource(OpPC); - S.CCEDiag(Loc, diag::note_constexpr_negative_shift) << RHS.toAPSInt(); - if (!S.noteUndefinedBehavior()) - return false; - RHS = -RHS; - return DoShiftAP( - S, OpPC, LHS, RHS, Result); - } - - if (!CheckShift(S, OpPC, LHS, RHS, Bits)) - return false; - - if constexpr (Dir == ShiftDir::Left) { - unsigned SA = (unsigned)RHSAP.getLimitedValue(LHS.bitWidth() - 1); - Result->copy(LHSAP << SA); - } else { - unsigned SA = (unsigned)RHSAP.getLimitedValue(LHS.bitWidth() - 1); - Result->copy(LHSAP >> SA); - } - - S.Stk.push(*Result); - return true; -} - template inline bool Shr(InterpState &S, CodePtr OpPC) { using LT = typename PrimConv::T; @@ -2837,13 +2659,7 @@ inline bool Shr(InterpState &S, CodePtr OpPC) { auto RHS = S.Stk.pop(); auto LHS = S.Stk.pop(); - if constexpr (needsAlloc()) { - LT Result = S.allocAP(LHS.bitWidth()); - return DoShiftAP(S, OpPC, LHS, RHS, &Result); - } else { - LT Result; - return DoShift(S, OpPC, LHS, RHS, &Result); - } + return DoShift(S, OpPC, LHS, RHS); } template @@ -2852,13 +2668,8 @@ inline bool Shl(InterpState &S, CodePtr OpPC) { using RT = typename PrimConv::T; auto RHS = S.Stk.pop(); auto LHS = S.Stk.pop(); - if constexpr (needsAlloc()) { - LT Result = S.allocAP(LHS.bitWidth()); - return DoShiftAP(S, OpPC, LHS, RHS, &Result); - } else { - LT Result; - return DoShift(S, OpPC, LHS, RHS, &Result); - } + + return DoShift(S, OpPC, LHS, RHS); } static inline bool ShiftFixedPoint(InterpState &S, CodePtr OpPC, bool Left) { @@ -3441,15 +3252,7 @@ inline bool BitCastPrim(InterpState &S, CodePtr OpPC, bool TargetIsUCharOrByte, if constexpr (std::is_same_v) { assert(Sem); - Floating Result = S.allocFloat(*Sem); - Floating::bitcastFromMemory(Buff.data(), *Sem, &Result); - S.Stk.push(Result); - - // S.Stk.push(T::bitcastFromMemory(Buff.data(), *Sem)); - } else if constexpr (needsAlloc()) { - T Result = S.allocAP(ResultBitWidth); - T::bitcastFromMemory(Buff.data(), ResultBitWidth, &Result); - S.Stk.push(Result); + S.Stk.push(T::bitcastFromMemory(Buff.data(), *Sem)); } else { assert(!Sem); S.Stk.push(T::bitcastFromMemory(Buff.data(), ResultBitWidth)); @@ -3507,11 +3310,7 @@ template inline T ReadArg(InterpState &S, CodePtr &OpPC) { } template <> inline Floating ReadArg(InterpState &S, CodePtr &OpPC) { - auto &Semantics = - llvm::APFloatBase::EnumToSemantics(Floating::deserializeSemantics(*OpPC)); - - auto F = S.allocFloat(Semantics); - Floating::deserialize(*OpPC, &F); + Floating F = Floating::deserialize(*OpPC); OpPC += align(F.bytesToSerialize()); return F; } @@ -3519,25 +3318,17 @@ template <> inline Floating ReadArg(InterpState &S, CodePtr &OpPC) { template <> inline IntegralAP ReadArg>(InterpState &S, CodePtr &OpPC) { - uint32_t BitWidth = IntegralAP::deserializeSize(*OpPC); - auto Result = S.allocAP>(BitWidth); - assert(Result.bitWidth() == BitWidth); - - IntegralAP::deserialize(*OpPC, &Result); - OpPC += align(Result.bytesToSerialize()); - return Result; + IntegralAP I = IntegralAP::deserialize(*OpPC); + OpPC += align(I.bytesToSerialize()); + return I; } template <> inline IntegralAP ReadArg>(InterpState &S, CodePtr &OpPC) { - uint32_t BitWidth = IntegralAP::deserializeSize(*OpPC); - auto Result = S.allocAP>(BitWidth); - assert(Result.bitWidth() == BitWidth); - - IntegralAP::deserialize(*OpPC, &Result); - OpPC += align(Result.bytesToSerialize()); - return Result; + IntegralAP I = IntegralAP::deserialize(*OpPC); + OpPC += align(I.bytesToSerialize()); + return I; } template <> diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp index 5304bd77f2c0..d01e3d042a8b 100644 --- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp +++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp @@ -57,21 +57,6 @@ static void pushInteger(InterpState &S, const APSInt &Val, QualType QT) { assert(T); unsigned BitWidth = S.getASTContext().getTypeSize(QT); - - if (T == PT_IntAPS) { - auto Result = S.allocAP>(BitWidth); - Result.copy(Val); - S.Stk.push>(Result); - return; - } - - if (T == PT_IntAP) { - auto Result = S.allocAP>(BitWidth); - Result.copy(Val); - S.Stk.push>(Result); - return; - } - if (QT->isSignedIntegerOrEnumerationType()) { int64_t V = Val.getSExtValue(); INT_TYPE_SWITCH(*T, { S.Stk.push(T::from(V, BitWidth)); }); @@ -342,13 +327,13 @@ static bool interp__builtin_nan(InterpState &S, CodePtr OpPC, S.getASTContext().getFloatTypeSemantics( Call->getDirectCallee()->getReturnType()); - Floating Result = S.allocFloat(TargetSemantics); + Floating Result; if (S.getASTContext().getTargetInfo().isNan2008()) { if (Signaling) - Result.copy( + Result = Floating( llvm::APFloat::getSNaN(TargetSemantics, /*Negative=*/false, &Fill)); else - Result.copy( + Result = Floating( llvm::APFloat::getQNaN(TargetSemantics, /*Negative=*/false, &Fill)); } else { // Prior to IEEE 754-2008, architectures were allowed to choose whether @@ -357,10 +342,10 @@ static bool interp__builtin_nan(InterpState &S, CodePtr OpPC, // 2008 revisions, MIPS interpreted sNaN-2008 as qNan and qNaN-2008 as // sNaN. This is now known as "legacy NaN" encoding. if (Signaling) - Result.copy( + Result = Floating( llvm::APFloat::getQNaN(TargetSemantics, /*Negative=*/false, &Fill)); else - Result.copy( + Result = Floating( llvm::APFloat::getSNaN(TargetSemantics, /*Negative=*/false, &Fill)); } @@ -375,9 +360,7 @@ static bool interp__builtin_inf(InterpState &S, CodePtr OpPC, S.getASTContext().getFloatTypeSemantics( Call->getDirectCallee()->getReturnType()); - Floating Result = S.allocFloat(TargetSemantics); - Result.copy(APFloat::getInf(TargetSemantics)); - S.Stk.push(Result); + S.Stk.push(Floating::getInf(TargetSemantics)); return true; } @@ -385,12 +368,10 @@ static bool interp__builtin_copysign(InterpState &S, CodePtr OpPC, const InterpFrame *Frame) { const Floating &Arg2 = S.Stk.pop(); const Floating &Arg1 = S.Stk.pop(); - Floating Result = S.allocFloat(Arg1.getSemantics()); APFloat Copy = Arg1.getAPFloat(); Copy.copySign(Arg2.getAPFloat()); - Result.copy(Copy); - S.Stk.push(Result); + S.Stk.push(Floating(Copy)); return true; } @@ -399,13 +380,11 @@ static bool interp__builtin_fmin(InterpState &S, CodePtr OpPC, const InterpFrame *Frame, bool IsNumBuiltin) { const Floating &RHS = S.Stk.pop(); const Floating &LHS = S.Stk.pop(); - Floating Result = S.allocFloat(LHS.getSemantics()); if (IsNumBuiltin) - Result.copy(llvm::minimumnum(LHS.getAPFloat(), RHS.getAPFloat())); + S.Stk.push(llvm::minimumnum(LHS.getAPFloat(), RHS.getAPFloat())); else - Result.copy(minnum(LHS.getAPFloat(), RHS.getAPFloat())); - S.Stk.push(Result); + S.Stk.push(minnum(LHS.getAPFloat(), RHS.getAPFloat())); return true; } @@ -413,13 +392,11 @@ static bool interp__builtin_fmax(InterpState &S, CodePtr OpPC, const InterpFrame *Frame, bool IsNumBuiltin) { const Floating &RHS = S.Stk.pop(); const Floating &LHS = S.Stk.pop(); - Floating Result = S.allocFloat(LHS.getSemantics()); if (IsNumBuiltin) - Result.copy(llvm::maximumnum(LHS.getAPFloat(), RHS.getAPFloat())); + S.Stk.push(llvm::maximumnum(LHS.getAPFloat(), RHS.getAPFloat())); else - Result.copy(maxnum(LHS.getAPFloat(), RHS.getAPFloat())); - S.Stk.push(Result); + S.Stk.push(maxnum(LHS.getAPFloat(), RHS.getAPFloat())); return true; } @@ -594,16 +571,8 @@ static bool interp__builtin_fpclassify(InterpState &S, CodePtr OpPC, static bool interp__builtin_fabs(InterpState &S, CodePtr OpPC, const InterpFrame *Frame) { const Floating &Val = S.Stk.pop(); - APFloat F = Val.getAPFloat(); - if (!F.isNegative()) { - S.Stk.push(Val); - return true; - } - Floating Result = S.allocFloat(Val.getSemantics()); - F.changeSign(); - Result.copy(F); - S.Stk.push(Result); + S.Stk.push(Floating::abs(Val)); return true; } diff --git a/clang/lib/AST/ByteCode/InterpBuiltinBitCast.cpp b/clang/lib/AST/ByteCode/InterpBuiltinBitCast.cpp index 2569cac018b3..239b3104e89f 100644 --- a/clang/lib/AST/ByteCode/InterpBuiltinBitCast.cpp +++ b/clang/lib/AST/ByteCode/InterpBuiltinBitCast.cpp @@ -402,9 +402,7 @@ bool clang::interp::DoBitCastPtr(InterpState &S, CodePtr OpPC, if (llvm::sys::IsBigEndianHost) swapBytes(M.get(), NumBits.roundToBytes()); - Floating R = S.allocFloat(Semantics); - Floating::bitcastFromMemory(M.get(), Semantics, &R); - P.deref() = R; + P.deref() = Floating::bitcastFromMemory(M.get(), Semantics); P.initialize(); return true; } diff --git a/clang/lib/AST/ByteCode/InterpState.h b/clang/lib/AST/ByteCode/InterpState.h index 08765561985e..e8dc6f0483d6 100644 --- a/clang/lib/AST/ByteCode/InterpState.h +++ b/clang/lib/AST/ByteCode/InterpState.h @@ -15,7 +15,6 @@ #include "Context.h" #include "DynamicAllocator.h" -#include "Floating.h" #include "Function.h" #include "InterpFrame.h" #include "InterpStack.h" @@ -127,33 +126,6 @@ public: StdAllocatorCaller getStdAllocatorCaller(StringRef Name) const; - void *allocate(size_t Size, unsigned Align = 8) const { - return Allocator.Allocate(Size, Align); - } - template T *allocate(size_t Num = 1) const { - return static_cast(allocate(Num * sizeof(T), alignof(T))); - } - - template T allocAP(unsigned BitWidth) { - unsigned NumWords = APInt::getNumWords(BitWidth); - if (NumWords == 1) - return T(BitWidth); - uint64_t *Mem = (uint64_t *)this->allocate(NumWords * sizeof(uint64_t)); - // std::memset(Mem, 0, NumWords * sizeof(uint64_t)); // Debug - return T(Mem, BitWidth); - } - - Floating allocFloat(const llvm::fltSemantics &Sem) { - if (Floating::singleWord(Sem)) - return Floating(llvm::APFloatBase::SemanticsToEnum(Sem)); - - unsigned NumWords = - APInt::getNumWords(llvm::APFloatBase::getSizeInBits(Sem)); - uint64_t *Mem = (uint64_t *)this->allocate(NumWords * sizeof(uint64_t)); - // std::memset(Mem, 0, NumWords * sizeof(uint64_t)); // Debug - return Floating(Mem, llvm::APFloatBase::SemanticsToEnum(Sem)); - } - private: friend class EvaluationResult; friend class InterpStateCCOverride; @@ -189,8 +161,6 @@ public: llvm::SmallVector< std::pair> SeenGlobalTemporaries; - - mutable llvm::BumpPtrAllocator Allocator; }; class InterpStateCCOverride final { diff --git a/clang/lib/AST/ByteCode/Opcodes.td b/clang/lib/AST/ByteCode/Opcodes.td index 57e01f7bd9da..c76ac5f8ae86 100644 --- a/clang/lib/AST/ByteCode/Opcodes.td +++ b/clang/lib/AST/ByteCode/Opcodes.td @@ -48,7 +48,6 @@ def ArgUint64 : ArgType { let Name = "uint64_t"; } def ArgIntAP : ArgType { let Name = "IntegralAP"; let AsRef = true; } def ArgIntAPS : ArgType { let Name = "IntegralAP"; let AsRef = true; } def ArgFloat : ArgType { let Name = "Floating"; let AsRef = true; } - def ArgBool : ArgType { let Name = "bool"; } def ArgFixedPoint : ArgType { let Name = "FixedPoint"; let AsRef = true; } @@ -89,9 +88,6 @@ def IntegerAndFixedTypeClass : TypeClass { Uint32, Sint64, Uint64, IntAP, IntAPS, FixedPoint]; } -def IntegralTypeClass : TypeClass { - let Types = !listconcat(IntegerTypeClass.Types, [Bool]); -} def FixedSizeIntegralTypeClass : TypeClass { let Types = [Sint8, Uint8, Sint16, Uint16, Sint32, Uint32, Sint64, Uint64, Bool]; @@ -269,13 +265,12 @@ def ConstSint32 : ConstOpcode; def ConstUint32 : ConstOpcode; def ConstSint64 : ConstOpcode; def ConstUint64 : ConstOpcode; -def ConstIntAP : ConstOpcode; -def ConstIntAPS : ConstOpcode; +def ConstFloat : ConstOpcode; +def constIntAP : ConstOpcode; +def constIntAPS : ConstOpcode; def ConstBool : ConstOpcode; def ConstFixedPoint : ConstOpcode; -def ConstFloat : Opcode { let Args = [ArgFloat]; } - // [] -> [Integer] def Zero : Opcode { let Types = [FixedSizeIntegralTypeClass]; @@ -333,7 +328,6 @@ def GetMemberPtrBasePop : Opcode { def FinishInitPop : Opcode; def FinishInit : Opcode; -def FinishInitGlobal : Opcode; def GetPtrDerivedPop : Opcode { let Args = [ArgUint32, ArgBool, ArgTypePtr]; } @@ -395,7 +389,7 @@ class AccessOpcode : Opcode { } class BitFieldOpcode : Opcode { - let Types = [IntegralTypeClass]; + let Types = [AluTypeClass]; let Args = [ArgRecordField]; let HasGroup = 1; } diff --git a/clang/lib/AST/ByteCode/PrimType.h b/clang/lib/AST/ByteCode/PrimType.h index a156cccbb3c1..6152fbfbe3a7 100644 --- a/clang/lib/AST/ByteCode/PrimType.h +++ b/clang/lib/AST/ByteCode/PrimType.h @@ -76,13 +76,6 @@ inline llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, } constexpr bool isIntegralType(PrimType T) { return T <= PT_FixedPoint; } -template constexpr bool needsAlloc() { - return std::is_same_v> || - std::is_same_v> || std::is_same_v; -} -constexpr bool needsAlloc(PrimType T) { - return T == PT_IntAP || T == PT_IntAPS || T == PT_Float; -} /// Mapping from primitive types to their representation. template struct PrimConv; @@ -216,16 +209,6 @@ static inline bool aligned(const void *P) { } \ } while (0) -#define TYPE_SWITCH_ALLOC(Expr, B) \ - do { \ - switch (Expr) { \ - TYPE_SWITCH_CASE(PT_Float, B) \ - TYPE_SWITCH_CASE(PT_IntAP, B) \ - TYPE_SWITCH_CASE(PT_IntAPS, B) \ - default:; \ - } \ - } while (0) - #define COMPOSITE_TYPE_SWITCH(Expr, B, D) \ do { \ switch (Expr) { \ diff --git a/clang/lib/AST/ByteCode/Program.h b/clang/lib/AST/ByteCode/Program.h index 5d9c42244749..23ba1bbd193b 100644 --- a/clang/lib/AST/ByteCode/Program.h +++ b/clang/lib/AST/ByteCode/Program.h @@ -132,14 +132,6 @@ public: bool IsMutable = false, bool IsVolatile = false, const Expr *Init = nullptr); - void *Allocate(size_t Size, unsigned Align = 8) const { - return Allocator.Allocate(Size, Align); - } - template T *Allocate(size_t Num = 1) const { - return static_cast(Allocate(Num * sizeof(T), alignof(T))); - } - void Deallocate(void *Ptr) const {} - /// Context to manage declaration lifetimes. class DeclScope { public: @@ -212,7 +204,7 @@ private: }; /// Allocator for globals. - mutable PoolAllocTy Allocator; + PoolAllocTy Allocator; /// Global objects. std::vector Globals; @@ -246,18 +238,4 @@ public: } // namespace interp } // namespace clang -inline void *operator new(size_t Bytes, const clang::interp::Program &C, - size_t Alignment = 8) { - return C.Allocate(Bytes, Alignment); -} - -inline void operator delete(void *Ptr, const clang::interp::Program &C, - size_t) { - C.Deallocate(Ptr); -} -inline void *operator new[](size_t Bytes, const clang::interp::Program &C, - size_t Alignment = 8) { - return C.Allocate(Bytes, Alignment); -} - #endif diff --git a/clang/test/AST/ByteCode/builtin-bit-cast-long-double.cpp b/clang/test/AST/ByteCode/builtin-bit-cast-long-double.cpp index 1013a771d13b..710612bef8fd 100644 --- a/clang/test/AST/ByteCode/builtin-bit-cast-long-double.cpp +++ b/clang/test/AST/ByteCode/builtin-bit-cast-long-double.cpp @@ -21,9 +21,6 @@ template constexpr To bit_cast(const From &from) { static_assert(sizeof(To) == sizeof(From)); return __builtin_bit_cast(To, from); -#if __x86_64 - // both-note@-2 {{indeterminate value can only initialize an object of type}} -#endif } template @@ -41,8 +38,11 @@ constexpr Init round_trip(const Init &init) { namespace test_long_double { #if __x86_64 -constexpr __int128_t test_cast_to_int128 = bit_cast<__int128_t>((long double)0); // both-error{{must be initialized by a constant expression}}\ - // both-note{{in call}} +/// FIXME: We could enable this, but since it aborts, it causes the usual mempory leak. +#if 0 +constexpr __int128_t test_cast_to_int128 = bit_cast<__int128_t>((long double)0); // expected-error{{must be initialized by a constant expression}}\ + // expected-note{{in call}} +#endif constexpr long double ld = 3.1425926539; struct bytes { diff --git a/clang/test/AST/ByteCode/builtin-functions.cpp b/clang/test/AST/ByteCode/builtin-functions.cpp index 174c1ffa79a4..21dca15a4577 100644 --- a/clang/test/AST/ByteCode/builtin-functions.cpp +++ b/clang/test/AST/ByteCode/builtin-functions.cpp @@ -208,7 +208,7 @@ namespace nan { constexpr double NaN3 = __builtin_nan("foo"); // both-error {{must be initialized by a constant expression}} constexpr float NaN4 = __builtin_nanf(""); - constexpr long double NaN5 = __builtin_nanf128(""); + //constexpr long double NaN5 = __builtin_nanf128(""); /// FIXME: This should be accepted by the current interpreter as well. constexpr char f[] = {'0', 'x', 'A', 'E', '\0'}; @@ -655,6 +655,8 @@ void test_noexcept(int *i) { } // end namespace test_launder +/// FIXME: The commented out tests here use a IntAP value and fail. +/// This currently means we will leak the IntAP value since nothing cleans it up. namespace clz { char clz1[__builtin_clz(1) == BITSIZE(int) - 1 ? 1 : -1]; char clz2[__builtin_clz(7) == BITSIZE(int) - 3 ? 1 : -1]; @@ -707,7 +709,7 @@ namespace clz { char clz48[__builtin_clzg(1ULL << (BITSIZE(long long) - 1)) == 0 ? 1 : -1]; char clz49[__builtin_clzg(1ULL << (BITSIZE(long long) - 1), 42) == 0 ? 1 : -1]; #ifdef __SIZEOF_INT128__ - int clz50 = __builtin_clzg((unsigned __int128)0); + // int clz50 = __builtin_clzg((unsigned __int128)0); char clz51[__builtin_clzg((unsigned __int128)0, 42) == 42 ? 1 : -1]; char clz52[__builtin_clzg((unsigned __int128)0x1) == BITSIZE(__int128) - 1 ? 1 : -1]; char clz53[__builtin_clzg((unsigned __int128)0x1, 42) == BITSIZE(__int128) - 1 ? 1 : -1]; @@ -715,7 +717,7 @@ namespace clz { char clz55[__builtin_clzg((unsigned __int128)0xf, 42) == BITSIZE(__int128) - 4 ? 1 : -1]; #endif #ifndef __AVR__ - int clz58 = __builtin_clzg((unsigned _BitInt(128))0); + // int clz58 = __builtin_clzg((unsigned _BitInt(128))0); char clz59[__builtin_clzg((unsigned _BitInt(128))0, 42) == 42 ? 1 : -1]; char clz60[__builtin_clzg((unsigned _BitInt(128))0x1) == BITSIZE(_BitInt(128)) - 1 ? 1 : -1]; char clz61[__builtin_clzg((unsigned _BitInt(128))0x1, 42) == BITSIZE(_BitInt(128)) - 1 ? 1 : -1]; @@ -773,7 +775,7 @@ namespace ctz { char ctz46[__builtin_ctzg(1ULL << (BITSIZE(long long) - 1)) == BITSIZE(long long) - 1 ? 1 : -1]; char ctz47[__builtin_ctzg(1ULL << (BITSIZE(long long) - 1), 42) == BITSIZE(long long) - 1 ? 1 : -1]; #ifdef __SIZEOF_INT128__ - int ctz48 = __builtin_ctzg((unsigned __int128)0); + // int ctz48 = __builtin_ctzg((unsigned __int128)0); char ctz49[__builtin_ctzg((unsigned __int128)0, 42) == 42 ? 1 : -1]; char ctz50[__builtin_ctzg((unsigned __int128)0x1) == 0 ? 1 : -1]; char ctz51[__builtin_ctzg((unsigned __int128)0x1, 42) == 0 ? 1 : -1]; @@ -783,7 +785,7 @@ namespace ctz { char ctz55[__builtin_ctzg((unsigned __int128)1 << (BITSIZE(__int128) - 1), 42) == BITSIZE(__int128) - 1 ? 1 : -1]; #endif #ifndef __AVR__ - int ctz56 = __builtin_ctzg((unsigned _BitInt(128))0); + // int ctz56 = __builtin_ctzg((unsigned _BitInt(128))0); char ctz57[__builtin_ctzg((unsigned _BitInt(128))0, 42) == 42 ? 1 : -1]; char ctz58[__builtin_ctzg((unsigned _BitInt(128))0x1) == 0 ? 1 : -1]; char ctz59[__builtin_ctzg((unsigned _BitInt(128))0x1, 42) == 0 ? 1 : -1]; -- cgit v1.2.3 From 6729da647afa2b0ee040ccd4f06153e45d6ca738 Mon Sep 17 00:00:00 2001 From: Kunwar Grover Date: Wed, 18 Jun 2025 14:19:17 +0100 Subject: [mlir][amdgpu][nfc] Add PatternBenefit to populate methods (#144663) --- mlir/include/mlir/Dialect/AMDGPU/Transforms/Passes.h | 10 +++++++--- mlir/lib/Dialect/AMDGPU/Transforms/EmulateAtomics.cpp | 5 +++-- mlir/lib/Dialect/AMDGPU/Transforms/ResolveStridedMetadata.cpp | 4 ++-- mlir/lib/Dialect/AMDGPU/Transforms/TransferReadToLoad.cpp | 4 ++-- 4 files changed, 14 insertions(+), 9 deletions(-) diff --git a/mlir/include/mlir/Dialect/AMDGPU/Transforms/Passes.h b/mlir/include/mlir/Dialect/AMDGPU/Transforms/Passes.h index 94dd9e3a2933..a52ee2ee89ca 100644 --- a/mlir/include/mlir/Dialect/AMDGPU/Transforms/Passes.h +++ b/mlir/include/mlir/Dialect/AMDGPU/Transforms/Passes.h @@ -14,6 +14,7 @@ #define MLIR_DIALECT_AMDGPU_TRANSFORMS_PASSES_H_ #include "mlir/Dialect/AMDGPU/Utils/Chipset.h" +#include "mlir/IR/PatternMatch.h" #include "mlir/Pass/Pass.h" namespace mlir { @@ -28,11 +29,14 @@ namespace amdgpu { void populateAmdgpuEmulateAtomicsPatterns(ConversionTarget &target, RewritePatternSet &patterns, - Chipset chipset); + Chipset chipset, + PatternBenefit benefit = 1); -void populateAmdgpuResolveStridedMetadataPatterns(RewritePatternSet &patterns); +void populateAmdgpuResolveStridedMetadataPatterns(RewritePatternSet &patterns, + PatternBenefit benefit = 1); -void populateAmdgpuTransferReadToLoadPatterns(RewritePatternSet &patterns); +void populateAmdgpuTransferReadToLoadPatterns(RewritePatternSet &patterns, + PatternBenefit benefit = 1); } // namespace amdgpu } // namespace mlir diff --git a/mlir/lib/Dialect/AMDGPU/Transforms/EmulateAtomics.cpp b/mlir/lib/Dialect/AMDGPU/Transforms/EmulateAtomics.cpp index 7dd4be66d2bd..fd2ba0683786 100644 --- a/mlir/lib/Dialect/AMDGPU/Transforms/EmulateAtomics.cpp +++ b/mlir/lib/Dialect/AMDGPU/Transforms/EmulateAtomics.cpp @@ -164,7 +164,8 @@ LogicalResult RawBufferAtomicByCasPattern::matchAndRewrite( } void mlir::amdgpu::populateAmdgpuEmulateAtomicsPatterns( - ConversionTarget &target, RewritePatternSet &patterns, Chipset chipset) { + ConversionTarget &target, RewritePatternSet &patterns, Chipset chipset, + PatternBenefit benefit) { // gfx10 has no atomic adds. if (chipset.majorVersion == 10 || chipset < Chipset(9, 0, 8)) { target.addIllegalOp(); @@ -204,7 +205,7 @@ void mlir::amdgpu::populateAmdgpuEmulateAtomicsPatterns( RawBufferAtomicByCasPattern, RawBufferAtomicByCasPattern, RawBufferAtomicByCasPattern>( - patterns.getContext()); + patterns.getContext(), benefit); } void AmdgpuEmulateAtomicsPass::runOnOperation() { diff --git a/mlir/lib/Dialect/AMDGPU/Transforms/ResolveStridedMetadata.cpp b/mlir/lib/Dialect/AMDGPU/Transforms/ResolveStridedMetadata.cpp index 4b3d94b4ce2a..195f59d62555 100644 --- a/mlir/lib/Dialect/AMDGPU/Transforms/ResolveStridedMetadata.cpp +++ b/mlir/lib/Dialect/AMDGPU/Transforms/ResolveStridedMetadata.cpp @@ -66,9 +66,9 @@ struct ExtractStridedMetadataOnFatRawBufferCastFolder final } // namespace void mlir::amdgpu::populateAmdgpuResolveStridedMetadataPatterns( - RewritePatternSet &patterns) { + RewritePatternSet &patterns, PatternBenefit benefit) { patterns.add( - patterns.getContext()); + patterns.getContext(), benefit); } void AmdgpuResolveStridedMetadataPass::runOnOperation() { diff --git a/mlir/lib/Dialect/AMDGPU/Transforms/TransferReadToLoad.cpp b/mlir/lib/Dialect/AMDGPU/Transforms/TransferReadToLoad.cpp index 96925dbf9f28..f5b12a9524cc 100644 --- a/mlir/lib/Dialect/AMDGPU/Transforms/TransferReadToLoad.cpp +++ b/mlir/lib/Dialect/AMDGPU/Transforms/TransferReadToLoad.cpp @@ -222,8 +222,8 @@ struct TransferReadLowering final : OpRewritePattern { } // namespace void mlir::amdgpu::populateAmdgpuTransferReadToLoadPatterns( - RewritePatternSet &patterns) { - patterns.add(patterns.getContext()); + RewritePatternSet &patterns, PatternBenefit benefit) { + patterns.add(patterns.getContext(), benefit); } struct AmdgpuTransferReadToLoadPass final -- cgit v1.2.3 From c4d99704e22097703c57ee67baea96fdabfd68ab Mon Sep 17 00:00:00 2001 From: Garvit Gupta Date: Wed, 18 Jun 2025 18:53:45 +0530 Subject: =?UTF-8?q?Revert=20"Reland=20[Driver]=20Add=20support=20for=20GCC?= =?UTF-8?q?=20installation=20detection=20in=E2=80=A6=20(#144684)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit … Baremetal toolchain (#144640)" This reverts commit 45ea46c44636094e9fcdbbeabfd11f9d0fad5e38. --- clang/docs/Toolchain.rst | 5 - clang/include/clang/Basic/DiagnosticDriverKinds.td | 3 - clang/lib/Driver/ToolChains/BareMetal.cpp | 235 ++++++--------------- clang/lib/Driver/ToolChains/BareMetal.h | 19 +- .../aarch64-none-elf/include/c++/8.2.1/.keep | 0 .../aarch64-none-elf/lib/.keep | 0 .../aarch64-none-elf/lib/crt0.o | 0 .../basic_aarch64_gcc_tree/bin/aarch64-none-elf-ld | 1 - .../lib/gcc/aarch64-none-elf/8.2.1/crtbegin.o | 0 .../lib/gcc/aarch64-none-elf/8.2.1/crtend.o | 0 .../aarch64-none-elf/lib/crt0.o | 0 .../aarch64-none-elf/lib/crtbegin.o | 0 .../aarch64-none-elf/lib/crtend.o | 0 .../bin/aarch64-none-elf-ld | 1 - .../armv6m-none-eabi/include/c++/8.2.1/.keep | 0 .../basic_arm_gcc_tree/armv6m-none-eabi/lib/.keep | 0 .../basic_arm_gcc_tree/armv6m-none-eabi/lib/crt0.o | 0 .../basic_arm_gcc_tree/bin/armv6m-none-eabi-ld | 1 - .../lib/gcc/armv6m-none-eabi/8.2.1/crtbegin.o | 0 .../lib/gcc/armv6m-none-eabi/8.2.1/crtend.o | 0 .../armv6m-none-eabi/lib/crt0.o | 0 .../armv6m-none-eabi/lib/crtbegin.o | 0 .../armv6m-none-eabi/lib/crtend.o | 0 .../basic_arm_nogcc_tree/bin/armv6m-none-eabi-ld | 1 - clang/test/Driver/aarch64-gnutools.c | 4 - clang/test/Driver/aarch64-toolchain-extra.c | 29 --- clang/test/Driver/aarch64-toolchain.c | 62 ------ clang/test/Driver/arm-gnutools.c | 6 - clang/test/Driver/arm-toolchain-extra.c | 30 --- clang/test/Driver/arm-toolchain.c | 63 ------ clang/test/Driver/baremetal.cpp | 16 -- clang/test/Driver/check-no-multlib-warning.c | 10 - 32 files changed, 64 insertions(+), 422 deletions(-) delete mode 100644 clang/test/Driver/Inputs/basic_aarch64_gcc_tree/aarch64-none-elf/include/c++/8.2.1/.keep delete mode 100644 clang/test/Driver/Inputs/basic_aarch64_gcc_tree/aarch64-none-elf/lib/.keep delete mode 100644 clang/test/Driver/Inputs/basic_aarch64_gcc_tree/aarch64-none-elf/lib/crt0.o delete mode 100755 clang/test/Driver/Inputs/basic_aarch64_gcc_tree/bin/aarch64-none-elf-ld delete mode 100644 clang/test/Driver/Inputs/basic_aarch64_gcc_tree/lib/gcc/aarch64-none-elf/8.2.1/crtbegin.o delete mode 100644 clang/test/Driver/Inputs/basic_aarch64_gcc_tree/lib/gcc/aarch64-none-elf/8.2.1/crtend.o delete mode 100644 clang/test/Driver/Inputs/basic_aarch64_nogcc_tree/aarch64-none-elf/lib/crt0.o delete mode 100644 clang/test/Driver/Inputs/basic_aarch64_nogcc_tree/aarch64-none-elf/lib/crtbegin.o delete mode 100644 clang/test/Driver/Inputs/basic_aarch64_nogcc_tree/aarch64-none-elf/lib/crtend.o delete mode 100755 clang/test/Driver/Inputs/basic_aarch64_nogcc_tree/bin/aarch64-none-elf-ld delete mode 100644 clang/test/Driver/Inputs/basic_arm_gcc_tree/armv6m-none-eabi/include/c++/8.2.1/.keep delete mode 100644 clang/test/Driver/Inputs/basic_arm_gcc_tree/armv6m-none-eabi/lib/.keep delete mode 100644 clang/test/Driver/Inputs/basic_arm_gcc_tree/armv6m-none-eabi/lib/crt0.o delete mode 100755 clang/test/Driver/Inputs/basic_arm_gcc_tree/bin/armv6m-none-eabi-ld delete mode 100644 clang/test/Driver/Inputs/basic_arm_gcc_tree/lib/gcc/armv6m-none-eabi/8.2.1/crtbegin.o delete mode 100644 clang/test/Driver/Inputs/basic_arm_gcc_tree/lib/gcc/armv6m-none-eabi/8.2.1/crtend.o delete mode 100644 clang/test/Driver/Inputs/basic_arm_nogcc_tree/armv6m-none-eabi/lib/crt0.o delete mode 100644 clang/test/Driver/Inputs/basic_arm_nogcc_tree/armv6m-none-eabi/lib/crtbegin.o delete mode 100644 clang/test/Driver/Inputs/basic_arm_nogcc_tree/armv6m-none-eabi/lib/crtend.o delete mode 100755 clang/test/Driver/Inputs/basic_arm_nogcc_tree/bin/armv6m-none-eabi-ld delete mode 100644 clang/test/Driver/aarch64-gnutools.c delete mode 100644 clang/test/Driver/aarch64-toolchain-extra.c delete mode 100644 clang/test/Driver/aarch64-toolchain.c delete mode 100644 clang/test/Driver/arm-gnutools.c delete mode 100644 clang/test/Driver/arm-toolchain-extra.c delete mode 100644 clang/test/Driver/arm-toolchain.c delete mode 100644 clang/test/Driver/check-no-multlib-warning.c diff --git a/clang/docs/Toolchain.rst b/clang/docs/Toolchain.rst index d56b21d74c7e..958199eb7a2e 100644 --- a/clang/docs/Toolchain.rst +++ b/clang/docs/Toolchain.rst @@ -347,8 +347,3 @@ workarounds for issues discovered in libstdc++, and these are removed as fixed libstdc++ becomes sufficiently old. You can instruct Clang to use libstdc++ with the ``-stdlib=libstdc++`` flag. - -GCC Installation -================= -Users can point to their GCC installation by using the ``-gcc-toolchain`` or by -using ``-gcc-install-dir`` flag. diff --git a/clang/include/clang/Basic/DiagnosticDriverKinds.td b/clang/include/clang/Basic/DiagnosticDriverKinds.td index 94224e103875..29f6480ba935 100644 --- a/clang/include/clang/Basic/DiagnosticDriverKinds.td +++ b/clang/include/clang/Basic/DiagnosticDriverKinds.td @@ -847,9 +847,6 @@ def note_drv_available_multilibs : Note< "available multilibs are:%0">; def err_drv_multilib_custom_error : Error< "multilib configuration error: %0">; -def warn_drv_multilib_not_available_for_target: Warning< - "no multilib structure encoded for Arm, Aarch64 and PPC targets">, - InGroup>; def err_drv_experimental_crel : Error< "-Wa,--allow-experimental-crel must be specified to use -Wa,--crel. " diff --git a/clang/lib/Driver/ToolChains/BareMetal.cpp b/clang/lib/Driver/ToolChains/BareMetal.cpp index 0fbfe6c77f34..d8168ed15feb 100644 --- a/clang/lib/Driver/ToolChains/BareMetal.cpp +++ b/clang/lib/Driver/ToolChains/BareMetal.cpp @@ -31,40 +31,6 @@ using namespace clang::driver; using namespace clang::driver::tools; using namespace clang::driver::toolchains; -/// Is the triple {aarch64.aarch64_be}-none-elf? -static bool isAArch64BareMetal(const llvm::Triple &Triple) { - if (Triple.getArch() != llvm::Triple::aarch64 && - Triple.getArch() != llvm::Triple::aarch64_be) - return false; - - if (Triple.getVendor() != llvm::Triple::UnknownVendor) - return false; - - if (Triple.getOS() != llvm::Triple::UnknownOS) - return false; - - return Triple.getEnvironmentName() == "elf"; -} - -static bool isRISCVBareMetal(const llvm::Triple &Triple) { - if (!Triple.isRISCV()) - return false; - - if (Triple.getVendor() != llvm::Triple::UnknownVendor) - return false; - - if (Triple.getOS() != llvm::Triple::UnknownOS) - return false; - - return Triple.getEnvironmentName() == "elf"; -} - -/// Is the triple powerpc[64][le]-*-none-eabi? -static bool isPPCBareMetal(const llvm::Triple &Triple) { - return Triple.isPPC() && Triple.getOS() == llvm::Triple::UnknownOS && - Triple.getEnvironment() == llvm::Triple::EABI; -} - static bool findRISCVMultilibs(const Driver &D, const llvm::Triple &TargetTriple, const ArgList &Args, DetectedMultilibs &Result) { @@ -129,8 +95,7 @@ static bool findRISCVMultilibs(const Driver &D, return false; } -static std::string computeClangRuntimesSysRoot(const Driver &D, - bool IncludeTriple) { +static std::string computeBaseSysRoot(const Driver &D, bool IncludeTriple) { if (!D.SysRoot.empty()) return D.SysRoot; @@ -143,123 +108,56 @@ static std::string computeClangRuntimesSysRoot(const Driver &D, return std::string(SysRootDir); } -// Only consider the GCC toolchain based on the values provided through the -// `--gcc-toolchain` and `--gcc-install-dir` flags. The function below returns -// whether the GCC toolchain was initialized successfully. -bool BareMetal::initGCCInstallation(const llvm::Triple &Triple, - const llvm::opt::ArgList &Args) { - if (Args.getLastArg(options::OPT_gcc_toolchain) || - Args.getLastArg(clang::driver::options::OPT_gcc_install_dir_EQ)) { - GCCInstallation.init(Triple, Args); - return GCCInstallation.isValid(); +BareMetal::BareMetal(const Driver &D, const llvm::Triple &Triple, + const ArgList &Args) + : ToolChain(D, Triple, Args), + SysRoot(computeBaseSysRoot(D, /*IncludeTriple=*/true)) { + getProgramPaths().push_back(getDriver().Dir); + + findMultilibs(D, Triple, Args); + SmallString<128> SysRoot(computeSysRoot()); + if (!SysRoot.empty()) { + for (const Multilib &M : getOrderedMultilibs()) { + SmallString<128> Dir(SysRoot); + llvm::sys::path::append(Dir, M.osSuffix(), "lib"); + getFilePaths().push_back(std::string(Dir)); + getLibraryPaths().push_back(std::string(Dir)); + } } - return false; } -// This logic is adapted from RISCVToolChain.cpp as part of the ongoing effort -// to merge RISCVToolChain into the Baremetal toolchain. It infers the presence -// of a valid GCC toolchain by checking whether the `crt0.o` file exists in the -// `bin/..//lib` directory. -static bool detectGCCToolchainAdjacent(const Driver &D) { - SmallString<128> GCCDir; - llvm::sys::path::append(GCCDir, D.Dir, "..", D.getTargetTriple(), - "lib/crt0.o"); - return llvm::sys::fs::exists(GCCDir); -} +/// Is the triple {aarch64.aarch64_be}-none-elf? +static bool isAArch64BareMetal(const llvm::Triple &Triple) { + if (Triple.getArch() != llvm::Triple::aarch64 && + Triple.getArch() != llvm::Triple::aarch64_be) + return false; -// If no sysroot is provided the driver will first attempt to infer it from the -// values of `--gcc-install-dir` or `--gcc-toolchain`, which specify the -// location of a GCC toolchain. -// If neither flag is used, the sysroot defaults to either: -//    - `bin/../` -//    - `bin/../lib/clang-runtimes/` -// -// To use the `clang-runtimes` path, ensure that `..//lib/crt0.o` -// does not exist relative to the driver. -std::string BareMetal::computeSysRoot() const { - // Use Baremetal::sysroot if it has already been set. - if (!SysRoot.empty()) - return SysRoot; - - // Use the sysroot specified via the `--sysroot` command-line flag, if - // provided. - const Driver &D = getDriver(); - if (!D.SysRoot.empty()) - return D.SysRoot; + if (Triple.getVendor() != llvm::Triple::UnknownVendor) + return false; - // Attempt to infer sysroot from a valid GCC installation. - // If no valid GCC installation, check for a GCC toolchain alongside Clang. - SmallString<128> inferredSysRoot; - if (IsGCCInstallationValid) { - llvm::sys::path::append(inferredSysRoot, GCCInstallation.getParentLibPath(), - "..", GCCInstallation.getTriple().str()); - } else if (detectGCCToolchainAdjacent(D)) { - // Use the triple as provided to the driver. Unlike the parsed triple - // this has not been normalized to always contain every field. - llvm::sys::path::append(inferredSysRoot, D.Dir, "..", D.getTargetTriple()); - } - // If a valid sysroot was inferred and exists, use it - if (!inferredSysRoot.empty() && llvm::sys::fs::exists(inferredSysRoot)) - return std::string(inferredSysRoot); + if (Triple.getOS() != llvm::Triple::UnknownOS) + return false; - // Use the clang-runtimes path. - return computeClangRuntimesSysRoot(D, /*IncludeTriple*/ true); + return Triple.getEnvironmentName() == "elf"; } -static void addMultilibsFilePaths(const Driver &D, const MultilibSet &Multilibs, - const Multilib &Multilib, - StringRef InstallPath, - ToolChain::path_list &Paths) { - if (const auto &PathsCallback = Multilibs.filePathsCallback()) - for (const auto &Path : PathsCallback(Multilib)) - addPathIfExists(D, InstallPath + Path, Paths); +static bool isRISCVBareMetal(const llvm::Triple &Triple) { + if (!Triple.isRISCV()) + return false; + + if (Triple.getVendor() != llvm::Triple::UnknownVendor) + return false; + + if (Triple.getOS() != llvm::Triple::UnknownOS) + return false; + + return Triple.getEnvironmentName() == "elf"; } -// GCC mutltilibs will only work for those targets that have their multlib -// structure encoded into GCCInstallation. Baremetal toolchain supports ARM, -// AArch64, RISCV and PPC and of these only RISCV have GCC multilibs hardcoded -// in GCCInstallation. -BareMetal::BareMetal(const Driver &D, const llvm::Triple &Triple, - const ArgList &Args) - : Generic_ELF(D, Triple, Args) { - IsGCCInstallationValid = initGCCInstallation(Triple, Args); - std::string ComputedSysRoot = computeSysRoot(); - if (IsGCCInstallationValid) { - if (!isRISCVBareMetal(Triple)) - D.Diag(clang::diag::warn_drv_multilib_not_available_for_target); - - Multilibs = GCCInstallation.getMultilibs(); - SelectedMultilibs.assign({GCCInstallation.getMultilib()}); - - path_list &Paths = getFilePaths(); - // Add toolchain/multilib specific file paths. - addMultilibsFilePaths(D, Multilibs, SelectedMultilibs.back(), - GCCInstallation.getInstallPath(), Paths); - // Adding filepath for locating crt{begin,end}.o files. - Paths.push_back(GCCInstallation.getInstallPath().str()); - // Adding filepath for locating crt0.o file. - Paths.push_back(ComputedSysRoot + "/lib"); - - ToolChain::path_list &PPaths = getProgramPaths(); - // Multilib cross-compiler GCC installations put ld in a triple-prefixed - // directory off of the parent of the GCC installation. - PPaths.push_back(Twine(GCCInstallation.getParentLibPath() + "/../" + - GCCInstallation.getTriple().str() + "/bin") - .str()); - PPaths.push_back((GCCInstallation.getParentLibPath() + "/../bin").str()); - } else { - getProgramPaths().push_back(getDriver().Dir); - findMultilibs(D, Triple, Args); - const SmallString<128> SysRootDir(computeSysRoot()); - if (!SysRootDir.empty()) { - for (const Multilib &M : getOrderedMultilibs()) { - SmallString<128> Dir(SysRootDir); - llvm::sys::path::append(Dir, M.osSuffix(), "lib"); - getFilePaths().push_back(std::string(Dir)); - getLibraryPaths().push_back(std::string(Dir)); - } - } - } +/// Is the triple powerpc[64][le]-*-none-eabi? +static bool isPPCBareMetal(const llvm::Triple &Triple) { + return Triple.isPPC() && Triple.getOS() == llvm::Triple::UnknownOS && + Triple.getEnvironment() == llvm::Triple::EABI; } static void @@ -318,7 +216,7 @@ getMultilibConfigPath(const Driver &D, const llvm::Triple &Triple, return {}; } } else { - MultilibPath = computeClangRuntimesSysRoot(D, /*IncludeTriple=*/false); + MultilibPath = computeBaseSysRoot(D, /*IncludeTriple=*/false); llvm::sys::path::append(MultilibPath, MultilibFilename); } return MultilibPath; @@ -336,7 +234,7 @@ void BareMetal::findMultilibs(const Driver &D, const llvm::Triple &Triple, if (D.getVFS().exists(*MultilibPath)) { // If multilib.yaml is found, update sysroot so it doesn't use a target // specific suffix - SysRoot = computeClangRuntimesSysRoot(D, /*IncludeTriple=*/false); + SysRoot = computeBaseSysRoot(D, /*IncludeTriple=*/false); SmallVector CustomFlagMacroDefines; findMultilibsFromYAML(*this, D, *MultilibPath, Args, Result, CustomFlagMacroDefines); @@ -344,7 +242,7 @@ void BareMetal::findMultilibs(const Driver &D, const llvm::Triple &Triple, Multilibs = Result.Multilibs; MultilibMacroDefines.append(CustomFlagMacroDefines.begin(), CustomFlagMacroDefines.end()); - } else if (isRISCVBareMetal(Triple) && !detectGCCToolchainAdjacent(D)) { + } else if (isRISCVBareMetal(Triple)) { if (findRISCVMultilibs(D, Triple, Args, Result)) { SelectedMultilibs = Result.SelectedMultilibs; Multilibs = Result.Multilibs; @@ -365,6 +263,8 @@ Tool *BareMetal::buildStaticLibTool() const { return new tools::baremetal::StaticLibTool(*this); } +std::string BareMetal::computeSysRoot() const { return SysRoot; } + BareMetal::OrderedMultilibs BareMetal::getOrderedMultilibs() const { // Get multilibs in reverse order because they're ordered most-specific last. if (!SelectedMultilibs.empty()) @@ -392,10 +292,10 @@ void BareMetal::AddClangSystemIncludeArgs(const ArgList &DriverArgs, if (std::optional Path = getStdlibIncludePath()) addSystemInclude(DriverArgs, CC1Args, *Path); - const SmallString<128> SysRootDir(computeSysRoot()); - if (!SysRootDir.empty()) { + const SmallString<128> SysRoot(computeSysRoot()); + if (!SysRoot.empty()) { for (const Multilib &M : getOrderedMultilibs()) { - SmallString<128> Dir(SysRootDir); + SmallString<128> Dir(SysRoot); llvm::sys::path::append(Dir, M.includeSuffix()); llvm::sys::path::append(Dir, "include"); addSystemInclude(DriverArgs, CC1Args, Dir.str()); @@ -409,19 +309,6 @@ void BareMetal::addClangTargetOptions(const ArgList &DriverArgs, CC1Args.push_back("-nostdsysteminc"); } -void BareMetal::addLibStdCxxIncludePaths( - const llvm::opt::ArgList &DriverArgs, - llvm::opt::ArgStringList &CC1Args) const { - if (!IsGCCInstallationValid) - return; - const GCCVersion &Version = GCCInstallation.getVersion(); - StringRef TripleStr = GCCInstallation.getTriple().str(); - const Multilib &Multilib = GCCInstallation.getMultilib(); - addLibStdCXXIncludePaths(computeSysRoot() + "/include/c++/" + Version.Text, - TripleStr, Multilib.includeSuffix(), DriverArgs, - CC1Args); -} - void BareMetal::AddClangCXXStdlibIncludeArgs(const ArgList &DriverArgs, ArgStringList &CC1Args) const { if (DriverArgs.hasArg(options::OPT_nostdinc, options::OPT_nostdlibinc, @@ -452,23 +339,23 @@ void BareMetal::AddClangCXXStdlibIncludeArgs(const ArgList &DriverArgs, }; switch (GetCXXStdlibType(DriverArgs)) { - case ToolChain::CST_Libcxx: { - SmallString<128> P(D.Dir); - llvm::sys::path::append(P, "..", "include"); - AddCXXIncludePath(P); - break; - } - case ToolChain::CST_Libstdcxx: - addLibStdCxxIncludePaths(DriverArgs, CC1Args); - break; + case ToolChain::CST_Libcxx: { + SmallString<128> P(D.Dir); + llvm::sys::path::append(P, "..", "include"); + AddCXXIncludePath(P); + break; + } + case ToolChain::CST_Libstdcxx: + // We only support libc++ toolchain installation. + break; } - std::string SysRootDir(computeSysRoot()); - if (SysRootDir.empty()) + std::string SysRoot(computeSysRoot()); + if (SysRoot.empty()) return; for (const Multilib &M : getOrderedMultilibs()) { - SmallString<128> Dir(SysRootDir); + SmallString<128> Dir(SysRoot); llvm::sys::path::append(Dir, M.gccSuffix()); switch (GetCXXStdlibType(DriverArgs)) { case ToolChain::CST_Libcxx: { diff --git a/clang/lib/Driver/ToolChains/BareMetal.h b/clang/lib/Driver/ToolChains/BareMetal.h index 930f8584e643..f6295bda0a6a 100644 --- a/clang/lib/Driver/ToolChains/BareMetal.h +++ b/clang/lib/Driver/ToolChains/BareMetal.h @@ -9,7 +9,6 @@ #ifndef LLVM_CLANG_LIB_DRIVER_TOOLCHAINS_BAREMETAL_H #define LLVM_CLANG_LIB_DRIVER_TOOLCHAINS_BAREMETAL_H -#include "ToolChains/Gnu.h" #include "clang/Driver/Tool.h" #include "clang/Driver/ToolChain.h" @@ -20,7 +19,7 @@ namespace driver { namespace toolchains { -class LLVM_LIBRARY_VISIBILITY BareMetal : public Generic_ELF { +class LLVM_LIBRARY_VISIBILITY BareMetal : public ToolChain { public: BareMetal(const Driver &D, const llvm::Triple &Triple, const llvm::opt::ArgList &Args); @@ -36,8 +35,7 @@ protected: Tool *buildStaticLibTool() const override; public: - bool initGCCInstallation(const llvm::Triple &Triple, - const llvm::opt::ArgList &Args); + bool useIntegratedAs() const override { return true; } bool isBareMetal() const override { return true; } bool isCrossCompiling() const override { return true; } bool HasNativeLLVMSupport() const override { return true; } @@ -50,15 +48,9 @@ public: StringRef getOSLibName() const override { return "baremetal"; } - UnwindTableLevel - getDefaultUnwindTableLevel(const llvm::opt::ArgList &Args) const override { - return UnwindTableLevel::None; - } - RuntimeLibType GetDefaultRuntimeLibType() const override { return ToolChain::RLT_CompilerRT; } - CXXStdlibType GetDefaultCXXStdlibType() const override { return ToolChain::CST_Libcxx; } @@ -75,9 +67,6 @@ public: void AddClangCXXStdlibIncludeArgs( const llvm::opt::ArgList &DriverArgs, llvm::opt::ArgStringList &CC1Args) const override; - void - addLibStdCxxIncludePaths(const llvm::opt::ArgList &DriverArgs, - llvm::opt::ArgStringList &CC1Args) const override; std::string computeSysRoot() const override; SanitizerMask getSupportedSanitizers() const override; @@ -91,8 +80,6 @@ private: std::string SysRoot; - bool IsGCCInstallationValid; - SmallVector MultilibMacroDefines; }; @@ -117,7 +104,7 @@ public: class LLVM_LIBRARY_VISIBILITY Linker final : public Tool { public: - Linker(const ToolChain &TC) : Tool("baremetal::Linker", "linker", TC) {} + Linker(const ToolChain &TC) : Tool("baremetal::Linker", "ld.lld", TC) {} bool isLinkJob() const override { return true; } bool hasIntegratedCPP() const override { return false; } void ConstructJob(Compilation &C, const JobAction &JA, diff --git a/clang/test/Driver/Inputs/basic_aarch64_gcc_tree/aarch64-none-elf/include/c++/8.2.1/.keep b/clang/test/Driver/Inputs/basic_aarch64_gcc_tree/aarch64-none-elf/include/c++/8.2.1/.keep deleted file mode 100644 index e69de29bb2d1..000000000000 diff --git a/clang/test/Driver/Inputs/basic_aarch64_gcc_tree/aarch64-none-elf/lib/.keep b/clang/test/Driver/Inputs/basic_aarch64_gcc_tree/aarch64-none-elf/lib/.keep deleted file mode 100644 index e69de29bb2d1..000000000000 diff --git a/clang/test/Driver/Inputs/basic_aarch64_gcc_tree/aarch64-none-elf/lib/crt0.o b/clang/test/Driver/Inputs/basic_aarch64_gcc_tree/aarch64-none-elf/lib/crt0.o deleted file mode 100644 index e69de29bb2d1..000000000000 diff --git a/clang/test/Driver/Inputs/basic_aarch64_gcc_tree/bin/aarch64-none-elf-ld b/clang/test/Driver/Inputs/basic_aarch64_gcc_tree/bin/aarch64-none-elf-ld deleted file mode 100755 index b23e55619b2f..000000000000 --- a/clang/test/Driver/Inputs/basic_aarch64_gcc_tree/bin/aarch64-none-elf-ld +++ /dev/null @@ -1 +0,0 @@ -#!/bin/true diff --git a/clang/test/Driver/Inputs/basic_aarch64_gcc_tree/lib/gcc/aarch64-none-elf/8.2.1/crtbegin.o b/clang/test/Driver/Inputs/basic_aarch64_gcc_tree/lib/gcc/aarch64-none-elf/8.2.1/crtbegin.o deleted file mode 100644 index e69de29bb2d1..000000000000 diff --git a/clang/test/Driver/Inputs/basic_aarch64_gcc_tree/lib/gcc/aarch64-none-elf/8.2.1/crtend.o b/clang/test/Driver/Inputs/basic_aarch64_gcc_tree/lib/gcc/aarch64-none-elf/8.2.1/crtend.o deleted file mode 100644 index e69de29bb2d1..000000000000 diff --git a/clang/test/Driver/Inputs/basic_aarch64_nogcc_tree/aarch64-none-elf/lib/crt0.o b/clang/test/Driver/Inputs/basic_aarch64_nogcc_tree/aarch64-none-elf/lib/crt0.o deleted file mode 100644 index e69de29bb2d1..000000000000 diff --git a/clang/test/Driver/Inputs/basic_aarch64_nogcc_tree/aarch64-none-elf/lib/crtbegin.o b/clang/test/Driver/Inputs/basic_aarch64_nogcc_tree/aarch64-none-elf/lib/crtbegin.o deleted file mode 100644 index e69de29bb2d1..000000000000 diff --git a/clang/test/Driver/Inputs/basic_aarch64_nogcc_tree/aarch64-none-elf/lib/crtend.o b/clang/test/Driver/Inputs/basic_aarch64_nogcc_tree/aarch64-none-elf/lib/crtend.o deleted file mode 100644 index e69de29bb2d1..000000000000 diff --git a/clang/test/Driver/Inputs/basic_aarch64_nogcc_tree/bin/aarch64-none-elf-ld b/clang/test/Driver/Inputs/basic_aarch64_nogcc_tree/bin/aarch64-none-elf-ld deleted file mode 100755 index b23e55619b2f..000000000000 --- a/clang/test/Driver/Inputs/basic_aarch64_nogcc_tree/bin/aarch64-none-elf-ld +++ /dev/null @@ -1 +0,0 @@ -#!/bin/true diff --git a/clang/test/Driver/Inputs/basic_arm_gcc_tree/armv6m-none-eabi/include/c++/8.2.1/.keep b/clang/test/Driver/Inputs/basic_arm_gcc_tree/armv6m-none-eabi/include/c++/8.2.1/.keep deleted file mode 100644 index e69de29bb2d1..000000000000 diff --git a/clang/test/Driver/Inputs/basic_arm_gcc_tree/armv6m-none-eabi/lib/.keep b/clang/test/Driver/Inputs/basic_arm_gcc_tree/armv6m-none-eabi/lib/.keep deleted file mode 100644 index e69de29bb2d1..000000000000 diff --git a/clang/test/Driver/Inputs/basic_arm_gcc_tree/armv6m-none-eabi/lib/crt0.o b/clang/test/Driver/Inputs/basic_arm_gcc_tree/armv6m-none-eabi/lib/crt0.o deleted file mode 100644 index e69de29bb2d1..000000000000 diff --git a/clang/test/Driver/Inputs/basic_arm_gcc_tree/bin/armv6m-none-eabi-ld b/clang/test/Driver/Inputs/basic_arm_gcc_tree/bin/armv6m-none-eabi-ld deleted file mode 100755 index b23e55619b2f..000000000000 --- a/clang/test/Driver/Inputs/basic_arm_gcc_tree/bin/armv6m-none-eabi-ld +++ /dev/null @@ -1 +0,0 @@ -#!/bin/true diff --git a/clang/test/Driver/Inputs/basic_arm_gcc_tree/lib/gcc/armv6m-none-eabi/8.2.1/crtbegin.o b/clang/test/Driver/Inputs/basic_arm_gcc_tree/lib/gcc/armv6m-none-eabi/8.2.1/crtbegin.o deleted file mode 100644 index e69de29bb2d1..000000000000 diff --git a/clang/test/Driver/Inputs/basic_arm_gcc_tree/lib/gcc/armv6m-none-eabi/8.2.1/crtend.o b/clang/test/Driver/Inputs/basic_arm_gcc_tree/lib/gcc/armv6m-none-eabi/8.2.1/crtend.o deleted file mode 100644 index e69de29bb2d1..000000000000 diff --git a/clang/test/Driver/Inputs/basic_arm_nogcc_tree/armv6m-none-eabi/lib/crt0.o b/clang/test/Driver/Inputs/basic_arm_nogcc_tree/armv6m-none-eabi/lib/crt0.o deleted file mode 100644 index e69de29bb2d1..000000000000 diff --git a/clang/test/Driver/Inputs/basic_arm_nogcc_tree/armv6m-none-eabi/lib/crtbegin.o b/clang/test/Driver/Inputs/basic_arm_nogcc_tree/armv6m-none-eabi/lib/crtbegin.o deleted file mode 100644 index e69de29bb2d1..000000000000 diff --git a/clang/test/Driver/Inputs/basic_arm_nogcc_tree/armv6m-none-eabi/lib/crtend.o b/clang/test/Driver/Inputs/basic_arm_nogcc_tree/armv6m-none-eabi/lib/crtend.o deleted file mode 100644 index e69de29bb2d1..000000000000 diff --git a/clang/test/Driver/Inputs/basic_arm_nogcc_tree/bin/armv6m-none-eabi-ld b/clang/test/Driver/Inputs/basic_arm_nogcc_tree/bin/armv6m-none-eabi-ld deleted file mode 100755 index b23e55619b2f..000000000000 --- a/clang/test/Driver/Inputs/basic_arm_nogcc_tree/bin/armv6m-none-eabi-ld +++ /dev/null @@ -1 +0,0 @@ -#!/bin/true diff --git a/clang/test/Driver/aarch64-gnutools.c b/clang/test/Driver/aarch64-gnutools.c deleted file mode 100644 index 0214639ed380..000000000000 --- a/clang/test/Driver/aarch64-gnutools.c +++ /dev/null @@ -1,4 +0,0 @@ -// RUN: %clang --target=aarch64-none-elf --gcc-toolchain=%S/Inputs/basic_aarch64_gcc_tree -fno-integrated-as %s -### -c \ -// RUN: 2>&1 | FileCheck %s - -// CHECK: "{{.*}}as{{(.exe)?}}" diff --git a/clang/test/Driver/aarch64-toolchain-extra.c b/clang/test/Driver/aarch64-toolchain-extra.c deleted file mode 100644 index eb8c741ae1ad..000000000000 --- a/clang/test/Driver/aarch64-toolchain-extra.c +++ /dev/null @@ -1,29 +0,0 @@ -// A basic clang -cc1 command-line, and simple environment check. - -// The tests here are similar to those in aarch64-toolchain.c, however -// these tests need to create symlinks to test directory trees in order to -// set up the environment and therefore shell support is required. -// XFAIL: target={{.*}}-fuchsia{{.*}} -// REQUIRES: shell -// UNSUPPORTED: system-windows - -// If there is no GCC install detected then the driver searches for executables -// and runtime starting from the directory tree above the driver itself. -// The test below checks that the driver correctly finds the linker and -// runtime if and only if they exist. -// -// RUN: rm -rf %t -// RUN: mkdir -p %t/aarch64-nogcc/bin -// RUN: ln -s %clang %t/aarch64-nogcc/bin/clang -// RUN: ln -s %S/Inputs/basic_aarch64_nogcc_tree/aarch64-none-elf %t/aarch64-nogcc/aarch64-none-elf -// RUN: %t/aarch64-nogcc/bin/clang %s -### -no-canonical-prefixes \ -// RUN: --gcc-toolchain=%t/aarch64-nogcc/invalid \ -// RUN: --target=aarch64-none-elf --rtlib=libgcc -fuse-ld=ld 2>&1 \ -// RUN: | FileCheck -check-prefix=C-ARM-BAREMETAL-NOGCC %s - -// RUN: %t/aarch64-nogcc/bin/clang %s -### -no-canonical-prefixes \ -// RUN: --sysroot=%t/aarch64-nogcc/bin/../aarch64-none-elf \ -// RUN: --target=aarch64-none-elf --rtlib=libgcc -fuse-ld=ld 2>&1 \ -// RUN: | FileCheck -check-prefix=C-ARM-BAREMETAL-NOGCC %s - -// C-ARM-BAREMETAL-NOGCC: "-internal-isystem" "{{.*}}/aarch64-nogcc/bin/../aarch64-none-elf/include" diff --git a/clang/test/Driver/aarch64-toolchain.c b/clang/test/Driver/aarch64-toolchain.c deleted file mode 100644 index 74841eec598b..000000000000 --- a/clang/test/Driver/aarch64-toolchain.c +++ /dev/null @@ -1,62 +0,0 @@ -// XFAIL: target={{.*}}-fuchsia{{.*}} -// UNSUPPORTED: system-windows - -// RUN: %clang -### %s -fuse-ld= \ -// RUN: --target=aarch64-none-elf --rtlib=libgcc \ -// RUN: --gcc-toolchain=%S/Inputs/basic_aarch64_gcc_tree \ -// RUN: --sysroot=%S/Inputs/basic_aarch64_gcc_tree/aarch64-none-elf 2>&1 \ -// RUN: | FileCheck -check-prefix=C-AARCH64-BAREMETAL %s - -// C-AARCH64-BAREMETAL: "-cc1" "-triple" "aarch64-unknown-none-elf" -// C-AARCH64-BAREMETAL: "-isysroot" "{{.*}}Inputs/basic_aarch64_gcc_tree/aarch64-none-elf" -// C-AARCH64-BAREMETAL: "-internal-isystem" "{{.*}}Inputs/basic_aarch64_gcc_tree/aarch64-none-elf/include" - -// RUN: %clang -### %s -fuse-ld= \ -// RUN: --target=aarch64-none-elf --rtlib=libgcc \ -// RUN: --gcc-toolchain=%S/Inputs/basic_aarch64_gcc_tree \ -// RUN: --sysroot= 2>&1 \ -// RUN: | FileCheck -check-prefix=C-AARCH64-BAREMETAL-NOSYSROOT %s - -// C-AARCH64-BAREMETAL-NOSYSROOT: "-cc1" "-triple" "aarch64-unknown-none-elf" -// C-AARCH64-BAREMETAL-NOSYSROOT: "-internal-isystem" "{{.*}}/Inputs/basic_aarch64_gcc_tree/lib/gcc/aarch64-none-elf/8.2.1/../../../../aarch64-none-elf/include" - -// RUN: %clangxx -### %s -fuse-ld= \ -// RUN: --target=aarch64-none-elf -stdlib=libstdc++ --rtlib=libgcc \ -// RUN: --gcc-toolchain=%S/Inputs/basic_aarch64_gcc_tree \ -// RUN: --sysroot=%S/Inputs/basic_aarch64_gcc_tree/aarch64-none-elf 2>&1 \ -// RUN: | FileCheck -check-prefix=CXX-AARCH64-BAREMETAL %s - -// CXX-AARCH64-BAREMETAL: "-internal-isystem" "{{.*}}/Inputs/basic_aarch64_gcc_tree/aarch64-none-elf/include/c++/8.2.1/aarch64-none-elf" -// CXX-AARCH64-BAREMETAL: "-internal-isystem" "{{.*}}/Inputs/basic_aarch64_gcc_tree/aarch64-none-elf/include/c++/8.2.1/backward" -// CXX-AARCH64-BAREMETAL: "-internal-isystem" "{{.*}}/Inputs/basic_aarch64_gcc_tree/aarch64-none-elf/include/c++/8.2.1" -// CXX-AARCH64-BAREMETAL: "-internal-isystem" "{{.*}}/Inputs/basic_aarch64_gcc_tree/aarch64-none-elf/include" - -// RUN: %clangxx -### %s -fuse-ld= \ -// RUN: --target=aarch64-none-elf -stdlib=libstdc++ --rtlib=libgcc \ -// RUN: --gcc-toolchain=%S/Inputs/basic_aarch64_gcc_tree \ -// RUN: --sysroot= 2>&1 \ -// RUN: | FileCheck -check-prefix=CXX-AARCH64-BAREMETAL-NOSYSROOT %s - -// CXX-AARCH64-BAREMETAL-NOSYSROOT: "-internal-isystem" "{{.*}}/Inputs/basic_aarch64_gcc_tree/lib/gcc/aarch64-none-elf/8.2.1/../../../../aarch64-none-elf/include/c++/8.2.1/aarch64-none-elf" -// CXX-AARCH64-BAREMETAL-NOSYSROOT: "-internal-isystem" "{{.*}}/Inputs/basic_aarch64_gcc_tree/lib/gcc/aarch64-none-elf/8.2.1/../../../../aarch64-none-elf/include/c++/8.2.1/backward" -// CXX-AARCH64-BAREMETAL-NOSYSROOT: "-internal-isystem" "{{.*}}/Inputs/basic_aarch64_gcc_tree/lib/gcc/aarch64-none-elf/8.2.1/../../../../aarch64-none-elf/include/c++/8.2.1" -// CXX-AARCH64-BAREMETAL-NOSYSROOT: "-internal-isystem" "{{.*}}/Inputs/basic_aarch64_gcc_tree/lib/gcc/aarch64-none-elf/8.2.1/../../../../aarch64-none-elf/include" - -// RUN: %clangxx -### %s -fuse-ld= \ -// RUN: --target=aarch64-none-elf -stdlib=libc++ --rtlib=libgcc \ -// RUN: --gcc-toolchain=%S/Inputs/basic_aarch64_gcc_tree \ -// RUN: --sysroot=%S/Inputs/basic_aarch64_gcc_tree/aarch64-none-elf 2>&1 \ -// RUN: | FileCheck -check-prefix=CXX-AARCH64-BAREMETAL-LIBCXX %s - -// CXX-AARCH64-BAREMETAL-LIBCXX: "-isysroot" "{{.*}}Inputs/basic_aarch64_gcc_tree/aarch64-none-elf" -// CXX-AARCH64-BAREMETAL-LIBCXX: "-internal-isystem" "{{.*}}/Inputs/basic_aarch64_gcc_tree/aarch64-none-elf/include/c++/v1" -// CXX-AARCH64-BAREMETAL-LIBCXX: "-internal-isystem" "{{.*}}/Inputs/basic_aarch64_gcc_tree/aarch64-none-elf/include" - -// RUN: %clangxx -### %s -fuse-ld= \ -// RUN: --target=aarch64-none-elf -stdlib=libc++ --rtlib=libgcc \ -// RUN: --gcc-toolchain=%S/Inputs/basic_aarch64_gcc_tree \ -// RUN: --sysroot= 2>&1 \ -// RUN: | FileCheck -check-prefix=CXX-AARCH64-BAREMETAL-NOSYSROOT-LIBCXX %s - -// CXX-AARCH64-BAREMETAL-NOSYSROOT-LIBCXX: "-internal-isystem" "{{.*}}/Inputs/basic_aarch64_gcc_tree/lib/gcc/aarch64-none-elf/8.2.1/../../../../aarch64-none-elf/include/c++/v1" -// CXX-AARCH64-BAREMETAL-NOSYSROOT-LIBCXX: "-internal-isystem" "{{.*}}/Inputs/basic_aarch64_gcc_tree/lib/gcc/aarch64-none-elf/8.2.1/../../../../aarch64-none-elf/include" diff --git a/clang/test/Driver/arm-gnutools.c b/clang/test/Driver/arm-gnutools.c deleted file mode 100644 index 6e107f19dabc..000000000000 --- a/clang/test/Driver/arm-gnutools.c +++ /dev/null @@ -1,6 +0,0 @@ -// check that gnu assembler is invoked with arm baremetal as well - -// RUN: %clang --target=armv6m-none-eabi --gcc-toolchain=%S/Inputs/basic_arm_gcc_tree -fno-integrated-as %s -### -c \ -// RUN: 2>&1 | FileCheck %s - -// CHECK: "{{.*}}as{{(.exe)?}}" diff --git a/clang/test/Driver/arm-toolchain-extra.c b/clang/test/Driver/arm-toolchain-extra.c deleted file mode 100644 index 67206818f211..000000000000 --- a/clang/test/Driver/arm-toolchain-extra.c +++ /dev/null @@ -1,30 +0,0 @@ -// A basic clang -cc1 command-line, and simple environment check. - -// The tests here are similar to those in arm-toolchain.c, however -// these tests need to create symlinks to test directory trees in order to -// set up the environment and therefore shell support is required. -// XFAIL: target={{.*}}-fuchsia{{.*}} -// REQUIRES: shell -// UNSUPPORTED: system-windows - -// If there is no GCC install detected then the driver searches for executables -// and runtime starting from the directory tree above the driver itself. -// The test below checks that the driver correctly finds the linker and -// runtime if and only if they exist. -// -// RUN: rm -rf %t -// RUN: mkdir -p %t/arm-nogcc/bin -// RUN: ln -s %clang %t/arm-nogcc/bin/clang -// RUN: ln -s %S/Inputs/basic_arm_nogcc_tree/armv6m-none-eabi %t/arm-nogcc/armv6m-none-eabi -// RUN: %t/arm-nogcc/bin/clang %s -### -no-canonical-prefixes \ -// RUN: --gcc-toolchain=%t/arm-nogcc/invalid \ -// RUN: --target=armv6m-none-eabi --rtlib=libgcc -fuse-ld=ld 2>&1 \ -// RUN: | FileCheck -check-prefix=C-ARM-BAREMETAL-NOGCC %s - -// RUN: %t/arm-nogcc/bin/clang %s -### -no-canonical-prefixes \ -// RUN: --sysroot=%t/arm-nogcc/bin/../armv6m-none-eabi \ -// RUN: --target=armv6m-none-eabi --rtlib=libgcc -fuse-ld=ld 2>&1 \ -// RUN: | FileCheck -check-prefix=C-ARM-BAREMETAL-NOGCC %s - -// C-ARM-BAREMETAL-NOGCC: "-internal-isystem" "{{.*}}/arm-nogcc/bin/../armv6m-none-eabi/include" - diff --git a/clang/test/Driver/arm-toolchain.c b/clang/test/Driver/arm-toolchain.c deleted file mode 100644 index 56a0e0de7ba7..000000000000 --- a/clang/test/Driver/arm-toolchain.c +++ /dev/null @@ -1,63 +0,0 @@ -// XFAIL: target={{.*}}-fuchsia{{.*}} -// UNSUPPORTED: system-windows - -// RUN: %clang -### %s -fuse-ld= \ -// RUN: --target=armv6m-none-eabi --rtlib=libgcc \ -// RUN: --gcc-toolchain=%S/Inputs/basic_arm_gcc_tree \ -// RUN: --sysroot=%S/Inputs/basic_arm_gcc_tree/armv6m-none-eabi 2>&1 \ -// RUN: | FileCheck -check-prefix=C-ARM-BAREMETAL %s - -// C-ARM-BAREMETAL: "-cc1" "-triple" "thumbv6m-unknown-none-eabi" -// C-ARM-BAREMETAL: "-isysroot" "{{.*}}Inputs/basic_arm_gcc_tree/armv6m-none-eabi" -// C-ARM-BAREMETAL: "-internal-isystem" "{{.*}}Inputs/basic_arm_gcc_tree/armv6m-none-eabi/include" - -// RUN: %clang -### %s -fuse-ld= \ -// RUN: --target=armv6m-none-eabi --rtlib=libgcc \ -// RUN: --gcc-toolchain=%S/Inputs/basic_arm_gcc_tree \ -// RUN: --sysroot= 2>&1 \ -// RUN: | FileCheck -check-prefix=C-ARM-BAREMETAL-NOSYSROOT %s - -// C-ARM-BAREMETAL-NOSYSROOT: "-cc1" "-triple" "thumbv6m-unknown-none-eabi" -// C-ARM-BAREMETAL-NOSYSROOT: "-internal-isystem" "{{.*}}/Inputs/basic_arm_gcc_tree/lib/gcc/armv6m-none-eabi/8.2.1/../../../../armv6m-none-eabi/include" - -// RUN: %clangxx -### %s -fuse-ld= \ -// RUN: --target=armv6m-none-eabi -stdlib=libstdc++ --rtlib=libgcc \ -// RUN: --gcc-toolchain=%S/Inputs/basic_arm_gcc_tree \ -// RUN: --sysroot=%S/Inputs/basic_arm_gcc_tree/armv6m-none-eabi 2>&1 \ -// RUN: | FileCheck -check-prefix=CXX-ARM-BAREMETAL %s - -// CXX-ARM-BAREMETAL: "-isysroot" "{{.*}}Inputs/basic_arm_gcc_tree/armv6m-none-eabi" -// CXX-ARM-BAREMETAL: "-internal-isystem" "{{.*}}/Inputs/basic_arm_gcc_tree/armv6m-none-eabi/include/c++/8.2.1/armv6m-none-eabi" -// CXX-ARM-BAREMETAL: "-internal-isystem" "{{.*}}/Inputs/basic_arm_gcc_tree/armv6m-none-eabi/include/c++/8.2.1/backward" -// CXX-ARM-BAREMETAL: "-internal-isystem" "{{.*}}/Inputs/basic_arm_gcc_tree/armv6m-none-eabi/include/c++/8.2.1" -// CXX-ARM-BAREMETAL: "-internal-isystem" "{{.*}}/Inputs/basic_arm_gcc_tree/armv6m-none-eabi/include" - -// RUN: %clangxx -### %s -fuse-ld= \ -// RUN: --target=armv6m-none-eabi -stdlib=libstdc++ --rtlib=libgcc \ -// RUN: --gcc-toolchain=%S/Inputs/basic_arm_gcc_tree \ -// RUN: --sysroot= 2>&1 \ -// RUN: | FileCheck -check-prefix=CXX-ARM-BAREMETAL-NOSYSROOT %s - -// CXX-ARM-BAREMETAL-NOSYSROOT: "-internal-isystem" "{{.*}}/Inputs/basic_arm_gcc_tree/lib/gcc/armv6m-none-eabi/8.2.1/../../../../armv6m-none-eabi/include/c++/8.2.1/armv6m-none-eabi" -// CXX-ARM-BAREMETAL-NOSYSROOT: "-internal-isystem" "{{.*}}/Inputs/basic_arm_gcc_tree/lib/gcc/armv6m-none-eabi/8.2.1/../../../../armv6m-none-eabi/include/c++/8.2.1/backward" -// CXX-ARM-BAREMETAL-NOSYSROOT: "-internal-isystem" "{{.*}}/Inputs/basic_arm_gcc_tree/lib/gcc/armv6m-none-eabi/8.2.1/../../../../armv6m-none-eabi/include/c++/8.2.1" -// CXX-ARM-BAREMETAL-NOSYSROOT: "-internal-isystem" "{{.*}}/Inputs/basic_arm_gcc_tree/lib/gcc/armv6m-none-eabi/8.2.1/../../../../armv6m-none-eabi/include" - -// RUN: %clangxx -### %s -fuse-ld= \ -// RUN: --target=armv6m-none-eabi -stdlib=libc++ --rtlib=libgcc \ -// RUN: --gcc-toolchain=%S/Inputs/basic_arm_gcc_tree \ -// RUN: --sysroot=%S/Inputs/basic_arm_gcc_tree/armv6m-none-eabi 2>&1 \ -// RUN: | FileCheck -check-prefix=CXX-ARM-BAREMETAL-LIBCXX %s - -// CXX-ARM-BAREMETAL-LIBCXX: "-isysroot" "{{.*}}Inputs/basic_arm_gcc_tree/armv6m-none-eabi" -// CXX-ARM-BAREMETAL-LIBCXX: "-internal-isystem" "{{.*}}/Inputs/basic_arm_gcc_tree/armv6m-none-eabi/include/c++/v1" -// CXX-ARM-BAREMETAL-LIBCXX: "-internal-isystem" "{{.*}}/Inputs/basic_arm_gcc_tree/armv6m-none-eabi/include" - -// RUN: %clangxx -### %s -fuse-ld= \ -// RUN: --target=armv6m-none-eabi -stdlib=libc++ --rtlib=libgcc \ -// RUN: --gcc-toolchain=%S/Inputs/basic_arm_gcc_tree \ -// RUN: --sysroot= 2>&1 \ -// RUN: | FileCheck -check-prefix=CXX-ARM-BAREMETAL-NOSYSROOT-LIBCXX %s - -// CXX-ARM-BAREMETAL-NOSYSROOT-LIBCXX: "-internal-isystem" "{{.*}}/Inputs/basic_arm_gcc_tree/lib/gcc/armv6m-none-eabi/8.2.1/../../../../armv6m-none-eabi/include/c++/v1" -// CXX-ARM-BAREMETAL-NOSYSROOT-LIBCXX: "-internal-isystem" "{{.*}}/Inputs/basic_arm_gcc_tree/lib/gcc/armv6m-none-eabi/8.2.1/../../../../armv6m-none-eabi/include diff --git a/clang/test/Driver/baremetal.cpp b/clang/test/Driver/baremetal.cpp index 2ac83402dda3..a80aa9b43711 100644 --- a/clang/test/Driver/baremetal.cpp +++ b/clang/test/Driver/baremetal.cpp @@ -196,22 +196,6 @@ // CHECK-AARCH64-NO-HOST-INC-SAME: "-internal-isystem" "[[RESOURCE]]{{[/\\]+}}include" // CHECK-AARCH64-NO-HOST-INC-SAME: "-internal-isystem" "[[INSTALLEDDIR]]{{[/\\]+}}..{{[/\\]+}}lib{{[/\\]+}}clang-runtimes{{[/\\]+[^"]*}}include" -// RUN: %clang -no-canonical-prefixes %s -### --target=riscv32-unknown-elf 2>&1 \ -// RUN: | FileCheck --check-prefix=CHECK-RISCV32-NO-HOST-INC %s -// CHECK-RISCV32-NO-HOST-INC: InstalledDir: [[INSTALLEDDIR:.+]] -// CHECK-RISCV32-NO-HOST-INC: "-resource-dir" "[[RESOURCE:[^"]+]]" -// CHECK-RISCV32-NO-HOST-INC-SAME: "-internal-isystem" "[[INSTALLEDDIR]]{{[/\\]+}}..{{[/\\]+}}lib{{[/\\]+}}clang-runtimes{{[/\\]+[^"]*}}include{{[/\\]+}}c++{{[/\\]+}}v1" -// CHECK-RISCV32-NO-HOST-INC-SAME: "-internal-isystem" "[[RESOURCE]]{{[/\\]+}}include" -// CHECK-RISCV32-NO-HOST-INC-SAME: "-internal-isystem" "[[INSTALLEDDIR]]{{[/\\]+}}..{{[/\\]+}}lib{{[/\\]+}}clang-runtimes{{[/\\]+[^"]*}}include" - -// RUN: %clang -no-canonical-prefixes %s -### --target=riscv64-unknown-elf 2>&1 \ -// RUN: | FileCheck --check-prefix=CHECK-RISCV64-NO-HOST-INC %s -// CHECK-RISCV64-NO-HOST-INC: InstalledDir: [[INSTALLEDDIR:.+]] -// CHECK-RISCV64-NO-HOST-INC: "-resource-dir" "[[RESOURCE:[^"]+]]" -// CHECK-RISCV64-NO-HOST-INC-SAME: "-internal-isystem" "[[INSTALLEDDIR]]{{[/\\]+}}..{{[/\\]+}}lib{{[/\\]+}}clang-runtimes{{[/\\]+[^"]*}}include{{[/\\]+}}c++{{[/\\]+}}v1" -// CHECK-RISCV64-NO-HOST-INC-SAME: "-internal-isystem" "[[RESOURCE]]{{[/\\]+}}include" -// CHECK-RISCV64-NO-HOST-INC-SAME: "-internal-isystem" "[[INSTALLEDDIR]]{{[/\\]+}}..{{[/\\]+}}lib{{[/\\]+}}clang-runtimes{{[/\\]+[^"]*}}include" - // RUN: %clang %s -### --target=riscv64-unknown-elf -o %t.out -L some/directory/user/asked/for \ // RUN: --sysroot=%S/Inputs/basic_riscv64_tree/riscv64-unknown-elf 2>&1 \ // RUN: | FileCheck --check-prefix=CHECK-RV64 %s diff --git a/clang/test/Driver/check-no-multlib-warning.c b/clang/test/Driver/check-no-multlib-warning.c deleted file mode 100644 index 9a0d7cee450a..000000000000 --- a/clang/test/Driver/check-no-multlib-warning.c +++ /dev/null @@ -1,10 +0,0 @@ -// UNSUPPORTED: system-windows - - -// RUN: %clang --target=armv6m-none-eabi --gcc-toolchain=%S/Inputs/basic_arm_gcc_tree -### 2>&1 | FileCheck %s -// RUN: %clang --target=aarch64-none-elf --gcc-toolchain=%S/Inputs/basic_aarch64_gcc_tree -### 2>&1 | FileCheck %s -// RUN: %clang --target=riscv32-unknown-elf --gcc-toolchain=%S/Inputs/basic_riscv32_tree -### 2>&1 | FileCheck --check-prefix=NOCHECK %s -// RUN: %clang --target=riscv64-unknown-elf --gcc-toolchain=%S/Inputs/basic_riscv64_tree -### 2>&1 | FileCheck --check-prefix=NOCHECK %s - -// CHECK: warning: no multilib structure encoded for Arm, Aarch64 and PPC targets -// NOCHECK-NOT: warning: no multilib structure encoded for Arm, Aarch64 and PPC targets -- cgit v1.2.3 From 1f34d68c4f086e7ea6ef9a529f9606476b38bbbb Mon Sep 17 00:00:00 2001 From: Tobias Stadler Date: Wed, 18 Jun 2025 14:25:41 +0100 Subject: [Remarks] Remove yaml-strtab format (#144527) Background: The yaml-strtab format looks just like the yaml format, except that the values in the key/value pairs of the remarks are deduplicated and replaced by indices into a string table (see removed test cases for examples). The motivation behind this format was to reduce size of the remarks files. However, it was quickly superseded by the bitstream format. Therefore, remove the yaml-strtab format, as it doesn't have a good usecase anymore: - It isn't particularly efficient - It isn't human-readable - It isn't straightforward to parse in external tools that can't use the remarks library. We don't even support it in opt-viewer. llvm-remarkutil is also missing options to parse/convert yaml-strtab, so the chance that anyone is actually using this format is low. --- llvm/docs/CommandGuide/llvm-opt-report.rst | 1 - llvm/docs/Remarks.rst | 32 +--- llvm/include/llvm/Remarks/RemarkFormat.h | 2 +- llvm/include/llvm/Remarks/RemarkParser.h | 5 - llvm/include/llvm/Remarks/YAMLRemarkSerializer.h | 41 ------ llvm/lib/Remarks/BitstreamRemarkParser.cpp | 7 +- llvm/lib/Remarks/BitstreamRemarkParser.h | 7 +- llvm/lib/Remarks/RemarkFormat.cpp | 4 +- llvm/lib/Remarks/RemarkLinker.cpp | 2 +- llvm/lib/Remarks/RemarkParser.cpp | 40 +---- llvm/lib/Remarks/RemarkSerializer.cpp | 5 - llvm/lib/Remarks/RemarkStreamer.cpp | 4 +- llvm/lib/Remarks/YAMLRemarkParser.cpp | 71 +-------- llvm/lib/Remarks/YAMLRemarkParser.h | 19 +-- llvm/lib/Remarks/YAMLRemarkSerializer.cpp | 103 +++---------- llvm/test/CodeGen/X86/remarks-section.ll | 8 - llvm/unittests/Remarks/RemarksLinkingTest.cpp | 35 ++--- llvm/unittests/Remarks/YAMLRemarksParsingTest.cpp | 133 +---------------- .../Remarks/YAMLRemarksSerializerTest.cpp | 163 --------------------- 19 files changed, 64 insertions(+), 618 deletions(-) diff --git a/llvm/docs/CommandGuide/llvm-opt-report.rst b/llvm/docs/CommandGuide/llvm-opt-report.rst index 4a666a4aa7af..ba10ba34578a 100644 --- a/llvm/docs/CommandGuide/llvm-opt-report.rst +++ b/llvm/docs/CommandGuide/llvm-opt-report.rst @@ -94,7 +94,6 @@ be sent to standard output. The Argument is one of the following: - yaml - - yaml-strtab - bitstream .. option:: --no-demangle diff --git a/llvm/docs/Remarks.rst b/llvm/docs/Remarks.rst index b6cec12b326f..3be66e5adac9 100644 --- a/llvm/docs/Remarks.rst +++ b/llvm/docs/Remarks.rst @@ -112,7 +112,6 @@ following options: Supported formats: * :ref:`yaml ` (default) - * :ref:`yaml-strtab ` * :ref:`bitstream ` ``Content configuration`` @@ -213,30 +212,6 @@ fields are required: * ```` * ```` -.. _yamlstrtabremarks: - -YAML with a string table ------------------------- - -The YAML serialization supports the usage of a string table by using the -``yaml-strtab`` format. - -This format replaces strings in the YAML output with integers representing the -index in the string table that can be provided separately through metadata. - -The following entries can take advantage of the string table while respecting -YAML rules: - -* ```` -* ```` -* ```` -* ```` -* ```` -* ```` - -Currently, none of the tools in :ref:`the opt-viewer directory ` -support this format. - .. _optviewer: YAML metadata @@ -246,9 +221,9 @@ The metadata used together with the YAML format is: * a magic number: "REMARKS\\0" * the version number: a little-endian uint64_t -* the total size of the string table (the size itself excluded): - little-endian uint64_t -* a list of null-terminated strings +* 8 zero bytes. This space was previously used to encode the size of a string + table. String table support for YAML remarks has been removed, use the + bitstream format instead. Optional: @@ -584,7 +559,6 @@ Emitting remark diagnostics in the object file A section containing metadata on remark diagnostics will be emitted for the following formats: -* ``yaml-strtab`` * ``bitstream`` This can be overridden by using the flag ``-remarks-section=``. diff --git a/llvm/include/llvm/Remarks/RemarkFormat.h b/llvm/include/llvm/Remarks/RemarkFormat.h index 64d08bcc9b8a..a39a013dcf90 100644 --- a/llvm/include/llvm/Remarks/RemarkFormat.h +++ b/llvm/include/llvm/Remarks/RemarkFormat.h @@ -23,7 +23,7 @@ namespace remarks { constexpr StringLiteral Magic("REMARKS"); /// The format used for serializing/deserializing remarks. -enum class Format { Unknown, YAML, YAMLStrTab, Bitstream }; +enum class Format { Unknown, YAML, Bitstream }; /// Parse and validate a string for the remark format. LLVM_ABI Expected parseFormat(StringRef FormatStr); diff --git a/llvm/include/llvm/Remarks/RemarkParser.h b/llvm/include/llvm/Remarks/RemarkParser.h index abb1fb86a87e..e3df74436348 100644 --- a/llvm/include/llvm/Remarks/RemarkParser.h +++ b/llvm/include/llvm/Remarks/RemarkParser.h @@ -80,13 +80,8 @@ struct ParsedStringTable { LLVM_ABI Expected> createRemarkParser(Format ParserFormat, StringRef Buf); -LLVM_ABI Expected> -createRemarkParser(Format ParserFormat, StringRef Buf, - ParsedStringTable StrTab); - LLVM_ABI Expected> createRemarkParserFromMeta( Format ParserFormat, StringRef Buf, - std::optional StrTab = std::nullopt, std::optional ExternalFilePrependPath = std::nullopt); } // end namespace remarks diff --git a/llvm/include/llvm/Remarks/YAMLRemarkSerializer.h b/llvm/include/llvm/Remarks/YAMLRemarkSerializer.h index a2214c349e1c..d80464c0fe74 100644 --- a/llvm/include/llvm/Remarks/YAMLRemarkSerializer.h +++ b/llvm/include/llvm/Remarks/YAMLRemarkSerializer.h @@ -63,47 +63,6 @@ struct LLVM_ABI YAMLMetaSerializer : public MetaSerializer { void emit() override; }; -/// Serialize the remarks to YAML using a string table. An remark entry looks -/// like the regular YAML remark but instead of string entries it's using -/// numbers that map to an index in the string table. -struct LLVM_ABI YAMLStrTabRemarkSerializer : public YAMLRemarkSerializer { - /// Wether we already emitted the metadata in standalone mode. - /// This should be set to true after the first invocation of `emit`. - bool DidEmitMeta = false; - - YAMLStrTabRemarkSerializer(raw_ostream &OS, SerializerMode Mode) - : YAMLRemarkSerializer(Format::YAMLStrTab, OS, Mode) { - // We always need a string table for this type of serializer. - StrTab.emplace(); - } - YAMLStrTabRemarkSerializer(raw_ostream &OS, SerializerMode Mode, - StringTable StrTab) - : YAMLRemarkSerializer(Format::YAMLStrTab, OS, Mode, std::move(StrTab)) {} - - /// Override to emit the metadata if necessary. - void emit(const Remark &Remark) override; - - std::unique_ptr metaSerializer( - raw_ostream &OS, - std::optional ExternalFilename = std::nullopt) override; - - static bool classof(const RemarkSerializer *S) { - return S->SerializerFormat == Format::YAMLStrTab; - } -}; - -struct LLVM_ABI YAMLStrTabMetaSerializer : public YAMLMetaSerializer { - /// The string table is part of the metadata. - const StringTable &StrTab; - - YAMLStrTabMetaSerializer(raw_ostream &OS, - std::optional ExternalFilename, - const StringTable &StrTab) - : YAMLMetaSerializer(OS, ExternalFilename), StrTab(StrTab) {} - - void emit() override; -}; - } // end namespace remarks } // end namespace llvm diff --git a/llvm/lib/Remarks/BitstreamRemarkParser.cpp b/llvm/lib/Remarks/BitstreamRemarkParser.cpp index 6dd032f07e72..312886013598 100644 --- a/llvm/lib/Remarks/BitstreamRemarkParser.cpp +++ b/llvm/lib/Remarks/BitstreamRemarkParser.cpp @@ -308,8 +308,7 @@ static Error advanceToMetaBlock(BitstreamParserHelper &Helper) { Expected> remarks::createBitstreamParserFromMeta( - StringRef Buf, std::optional StrTab, - std::optional ExternalFilePrependPath) { + StringRef Buf, std::optional ExternalFilePrependPath) { BitstreamParserHelper Helper(Buf); Expected> MagicNumber = Helper.parseMagic(); if (!MagicNumber) @@ -319,9 +318,7 @@ remarks::createBitstreamParserFromMeta( StringRef(MagicNumber->data(), MagicNumber->size()))) return std::move(E); - auto Parser = - StrTab ? std::make_unique(Buf, std::move(*StrTab)) - : std::make_unique(Buf); + auto Parser = std::make_unique(Buf); if (ExternalFilePrependPath) Parser->ExternalFilePrependPath = std::string(*ExternalFilePrependPath); diff --git a/llvm/lib/Remarks/BitstreamRemarkParser.h b/llvm/lib/Remarks/BitstreamRemarkParser.h index fc786fc57622..f6f79ef199f7 100644 --- a/llvm/lib/Remarks/BitstreamRemarkParser.h +++ b/llvm/lib/Remarks/BitstreamRemarkParser.h @@ -48,11 +48,6 @@ struct BitstreamRemarkParser : public RemarkParser { explicit BitstreamRemarkParser(StringRef Buf) : RemarkParser(Format::Bitstream), ParserHelper(Buf) {} - /// Create a parser that uses a pre-parsed string table. - BitstreamRemarkParser(StringRef Buf, ParsedStringTable StrTab) - : RemarkParser(Format::Bitstream), ParserHelper(Buf), - StrTab(std::move(StrTab)) {} - Expected> next() override; static bool classof(const RemarkParser *P) { @@ -77,7 +72,7 @@ private: }; Expected> createBitstreamParserFromMeta( - StringRef Buf, std::optional StrTab = std::nullopt, + StringRef Buf, std::optional ExternalFilePrependPath = std::nullopt); } // end namespace remarks diff --git a/llvm/lib/Remarks/RemarkFormat.cpp b/llvm/lib/Remarks/RemarkFormat.cpp index 5006421a3c63..800f5bffe70d 100644 --- a/llvm/lib/Remarks/RemarkFormat.cpp +++ b/llvm/lib/Remarks/RemarkFormat.cpp @@ -20,7 +20,6 @@ using namespace llvm::remarks; Expected llvm::remarks::parseFormat(StringRef FormatStr) { auto Result = StringSwitch(FormatStr) .Cases("", "yaml", Format::YAML) - .Case("yaml-strtab", Format::YAMLStrTab) .Case("bitstream", Format::Bitstream) .Default(Format::Unknown); @@ -36,7 +35,8 @@ Expected llvm::remarks::magicToFormat(StringRef MagicStr) { auto Result = StringSwitch(MagicStr) .StartsWith("--- ", Format::YAML) // This is only an assumption. - .StartsWith(remarks::Magic, Format::YAMLStrTab) + .StartsWith(remarks::Magic, + Format::YAML) // Needed for remark meta section .StartsWith(remarks::ContainerMagic, Format::Bitstream) .Default(Format::Unknown); diff --git a/llvm/lib/Remarks/RemarkLinker.cpp b/llvm/lib/Remarks/RemarkLinker.cpp index b70b06d706bd..b8395aa135d8 100644 --- a/llvm/lib/Remarks/RemarkLinker.cpp +++ b/llvm/lib/Remarks/RemarkLinker.cpp @@ -76,7 +76,7 @@ Error RemarkLinker::link(StringRef Buffer, std::optional RemarkFormat) { Expected> MaybeParser = createRemarkParserFromMeta( - *RemarkFormat, Buffer, /*StrTab=*/std::nullopt, + *RemarkFormat, Buffer, PrependPath ? std::optional(StringRef(*PrependPath)) : std::optional()); if (!MaybeParser) diff --git a/llvm/lib/Remarks/RemarkParser.cpp b/llvm/lib/Remarks/RemarkParser.cpp index 7fccb94014b9..5c1690aaa0fe 100644 --- a/llvm/lib/Remarks/RemarkParser.cpp +++ b/llvm/lib/Remarks/RemarkParser.cpp @@ -53,10 +53,6 @@ llvm::remarks::createRemarkParser(Format ParserFormat, StringRef Buf) { switch (ParserFormat) { case Format::YAML: return std::make_unique(Buf); - case Format::YAMLStrTab: - return createStringError( - std::make_error_code(std::errc::invalid_argument), - "The YAML with string table format requires a parsed string table."); case Format::Bitstream: return std::make_unique(Buf); case Format::Unknown: @@ -66,38 +62,15 @@ llvm::remarks::createRemarkParser(Format ParserFormat, StringRef Buf) { llvm_unreachable("unhandled ParseFormat"); } -Expected> -llvm::remarks::createRemarkParser(Format ParserFormat, StringRef Buf, - ParsedStringTable StrTab) { - switch (ParserFormat) { - case Format::YAML: - return createStringError(std::make_error_code(std::errc::invalid_argument), - "The YAML format can't be used with a string " - "table. Use yaml-strtab instead."); - case Format::YAMLStrTab: - return std::make_unique(Buf, std::move(StrTab)); - case Format::Bitstream: - return std::make_unique(Buf, std::move(StrTab)); - case Format::Unknown: - return createStringError(std::make_error_code(std::errc::invalid_argument), - "Unknown remark parser format."); - } - llvm_unreachable("unhandled ParseFormat"); -} - Expected> llvm::remarks::createRemarkParserFromMeta( - Format ParserFormat, StringRef Buf, std::optional StrTab, + Format ParserFormat, StringRef Buf, std::optional ExternalFilePrependPath) { switch (ParserFormat) { - // Depending on the metadata, the format can be either yaml or yaml-strtab, - // regardless of the input argument. case Format::YAML: - case Format::YAMLStrTab: - return createYAMLParserFromMeta(Buf, std::move(StrTab), - std::move(ExternalFilePrependPath)); + return createYAMLParserFromMeta(Buf, std::move(ExternalFilePrependPath)); case Format::Bitstream: - return createBitstreamParserFromMeta(Buf, std::move(StrTab), + return createBitstreamParserFromMeta(Buf, std::move(ExternalFilePrependPath)); case Format::Unknown: return createStringError(std::make_error_code(std::errc::invalid_argument), @@ -112,11 +85,8 @@ struct CParser { std::unique_ptr TheParser; std::optional Err; - CParser(Format ParserFormat, StringRef Buf, - std::optional StrTab = std::nullopt) - : TheParser(cantFail( - StrTab ? createRemarkParser(ParserFormat, Buf, std::move(*StrTab)) - : createRemarkParser(ParserFormat, Buf))) {} + CParser(Format ParserFormat, StringRef Buf) + : TheParser(cantFail(createRemarkParser(ParserFormat, Buf))) {} void handleError(Error E) { Err.emplace(toString(std::move(E))); } bool hasError() const { return Err.has_value(); } diff --git a/llvm/lib/Remarks/RemarkSerializer.cpp b/llvm/lib/Remarks/RemarkSerializer.cpp index ab19c84bbadb..cc10b91f287a 100644 --- a/llvm/lib/Remarks/RemarkSerializer.cpp +++ b/llvm/lib/Remarks/RemarkSerializer.cpp @@ -26,8 +26,6 @@ remarks::createRemarkSerializer(Format RemarksFormat, SerializerMode Mode, "Unknown remark serializer format."); case Format::YAML: return std::make_unique(OS, Mode); - case Format::YAMLStrTab: - return std::make_unique(OS, Mode); case Format::Bitstream: return std::make_unique(OS, Mode); } @@ -43,9 +41,6 @@ remarks::createRemarkSerializer(Format RemarksFormat, SerializerMode Mode, "Unknown remark serializer format."); case Format::YAML: return std::make_unique(OS, Mode, std::move(StrTab)); - case Format::YAMLStrTab: - return std::make_unique(OS, Mode, - std::move(StrTab)); case Format::Bitstream: return std::make_unique(OS, Mode, std::move(StrTab)); diff --git a/llvm/lib/Remarks/RemarkStreamer.cpp b/llvm/lib/Remarks/RemarkStreamer.cpp index 9f4676ce37ab..bb62c8b5c2fd 100644 --- a/llvm/lib/Remarks/RemarkStreamer.cpp +++ b/llvm/lib/Remarks/RemarkStreamer.cpp @@ -21,7 +21,7 @@ static cl::opt EnableRemarksSection( "remarks-section", cl::desc( "Emit a section containing remark diagnostics metadata. By default, " - "this is enabled for the following formats: yaml-strtab, bitstream."), + "this is enabled for the following formats: bitstream."), cl::init(cl::BOU_UNSET), cl::Hidden); RemarkStreamer::RemarkStreamer( @@ -63,9 +63,7 @@ bool RemarkStreamer::needsSection() const { // Only some formats need a section: // * bitstream - // * yaml-strtab switch (RemarkSerializer->SerializerFormat) { - case remarks::Format::YAMLStrTab: case remarks::Format::Bitstream: return true; default: diff --git a/llvm/lib/Remarks/YAMLRemarkParser.cpp b/llvm/lib/Remarks/YAMLRemarkParser.cpp index a287ef574255..5ff42fe6b9a9 100644 --- a/llvm/lib/Remarks/YAMLRemarkParser.cpp +++ b/llvm/lib/Remarks/YAMLRemarkParser.cpp @@ -95,21 +95,8 @@ static Expected parseStrTabSize(StringRef &Buf) { return StrTabSize; } -static Expected parseStrTab(StringRef &Buf, - uint64_t StrTabSize) { - if (Buf.size() < StrTabSize) - return createStringError(std::errc::illegal_byte_sequence, - "Expecting string table."); - - // Attach the string table to the parser. - ParsedStringTable Result(StringRef(Buf.data(), StrTabSize)); - Buf = Buf.drop_front(StrTabSize); - return Expected(std::move(Result)); -} - Expected> remarks::createYAMLParserFromMeta( - StringRef Buf, std::optional StrTab, - std::optional ExternalFilePrependPath) { + StringRef Buf, std::optional ExternalFilePrependPath) { // We now have a magic number. The metadata has to be correct. Expected isMeta = parseMagic(Buf); if (!isMeta) @@ -125,15 +112,9 @@ Expected> remarks::createYAMLParserFromMeta( if (!StrTabSize) return StrTabSize.takeError(); - // If the size of string table is not 0, try to build one. if (*StrTabSize != 0) { - if (StrTab) - return createStringError(std::errc::illegal_byte_sequence, - "String table already provided."); - Expected MaybeStrTab = parseStrTab(Buf, *StrTabSize); - if (!MaybeStrTab) - return MaybeStrTab.takeError(); - StrTab = std::move(*MaybeStrTab); + return createStringError(std::errc::illegal_byte_sequence, + "String table unsupported for YAML format."); } // If it starts with "---", there is no external file. if (!Buf.starts_with("---")) { @@ -157,21 +138,15 @@ Expected> remarks::createYAMLParserFromMeta( } std::unique_ptr Result = - StrTab - ? std::make_unique(Buf, std::move(*StrTab)) - : std::make_unique(Buf); + std::make_unique(Buf); if (SeparateBuf) Result->SeparateBuf = std::move(SeparateBuf); return std::move(Result); } YAMLRemarkParser::YAMLRemarkParser(StringRef Buf) - : YAMLRemarkParser(Buf, std::nullopt) {} - -YAMLRemarkParser::YAMLRemarkParser(StringRef Buf, - std::optional StrTab) - : RemarkParser{Format::YAML}, StrTab(std::move(StrTab)), - SM(setupSM(LastErrorMessage)), Stream(Buf, SM), YAMLIt(Stream.begin()) {} + : RemarkParser{Format::YAML}, SM(setupSM(LastErrorMessage)), + Stream(Buf, SM), YAMLIt(Stream.begin()) {} Error YAMLRemarkParser::error(StringRef Message, yaml::Node &Node) { return make_error(Message, SM, Stream, Node); @@ -208,8 +183,8 @@ YAMLRemarkParser::parseRemark(yaml::Document &RemarkEntry) { Expected T = parseType(*Root); if (!T) return T.takeError(); - else - TheRemark.RemarkType = *T; + + TheRemark.RemarkType = *T; // Then, parse the fields, one by one. for (yaml::KeyValueNode &RemarkField : *Root) { @@ -428,33 +403,3 @@ Expected> YAMLRemarkParser::next() { return std::move(*MaybeResult); } - -Expected YAMLStrTabRemarkParser::parseStr(yaml::KeyValueNode &Node) { - auto *Value = dyn_cast(Node.getValue()); - yaml::BlockScalarNode *ValueBlock; - StringRef Result; - if (!Value) { - // Try to parse the value as a block node. - ValueBlock = dyn_cast(Node.getValue()); - if (!ValueBlock) - return error("expected a value of scalar type.", Node); - Result = ValueBlock->getValue(); - } else - Result = Value->getRawValue(); - // If we have a string table, parse it as an unsigned. - unsigned StrID = 0; - if (Expected MaybeStrID = parseUnsigned(Node)) - StrID = *MaybeStrID; - else - return MaybeStrID.takeError(); - - if (Expected Str = (*StrTab)[StrID]) - Result = *Str; - else - return Str.takeError(); - - Result.consume_front("\'"); - Result.consume_back("\'"); - - return Result; -} diff --git a/llvm/lib/Remarks/YAMLRemarkParser.h b/llvm/lib/Remarks/YAMLRemarkParser.h index 8ef72e16be74..9a30e9e295cb 100644 --- a/llvm/lib/Remarks/YAMLRemarkParser.h +++ b/llvm/lib/Remarks/YAMLRemarkParser.h @@ -46,8 +46,6 @@ private: /// Regular YAML to Remark parser. struct YAMLRemarkParser : public RemarkParser { - /// The string table used for parsing strings. - std::optional StrTab; /// Last error message that can come from the YAML parser diagnostics. /// We need this for catching errors in the constructor. std::string LastErrorMessage; @@ -70,7 +68,6 @@ struct YAMLRemarkParser : public RemarkParser { } protected: - YAMLRemarkParser(StringRef Buf, std::optional StrTab); /// Create a YAMLParseError error from an existing error generated by the YAML /// parser. /// If there is no error, this returns Success. @@ -93,22 +90,8 @@ protected: Expected parseArg(yaml::Node &Node); }; -/// YAML with a string table to Remark parser. -struct YAMLStrTabRemarkParser : public YAMLRemarkParser { - YAMLStrTabRemarkParser(StringRef Buf, ParsedStringTable StrTab) - : YAMLRemarkParser(Buf, std::move(StrTab)) {} - - static bool classof(const RemarkParser *P) { - return P->ParserFormat == Format::YAMLStrTab; - } - -protected: - /// Parse one value to a string. - Expected parseStr(yaml::KeyValueNode &Node) override; -}; - Expected> createYAMLParserFromMeta( - StringRef Buf, std::optional StrTab = std::nullopt, + StringRef Buf, std::optional ExternalFilePrependPath = std::nullopt); } // end namespace remarks diff --git a/llvm/lib/Remarks/YAMLRemarkSerializer.cpp b/llvm/lib/Remarks/YAMLRemarkSerializer.cpp index 68285c3dde1b..846a72182d8f 100644 --- a/llvm/lib/Remarks/YAMLRemarkSerializer.cpp +++ b/llvm/lib/Remarks/YAMLRemarkSerializer.cpp @@ -21,11 +21,10 @@ using namespace llvm::remarks; // Use the same keys whether we use a string table or not (respectively, T is an // unsigned or a StringRef). -template -static void mapRemarkHeader(yaml::IO &io, T PassName, T RemarkName, - std::optional RL, T FunctionName, - std::optional Hotness, - ArrayRef Args) { +static void +mapRemarkHeader(yaml::IO &io, StringRef PassName, StringRef RemarkName, + std::optional RL, StringRef FunctionName, + std::optional Hotness, ArrayRef Args) { io.mapRequired("Pass", PassName); io.mapRequired("Name", RemarkName); io.mapOptional("DebugLoc", RL); @@ -58,19 +57,8 @@ template <> struct MappingTraits { else llvm_unreachable("Unknown remark type"); - if (auto *Serializer = dyn_cast( - reinterpret_cast(io.getContext()))) { - assert(Serializer->StrTab && "YAMLStrTabSerializer with no StrTab."); - StringTable &StrTab = *Serializer->StrTab; - unsigned PassID = StrTab.add(Remark->PassName).first; - unsigned NameID = StrTab.add(Remark->RemarkName).first; - unsigned FunctionID = StrTab.add(Remark->FunctionName).first; - mapRemarkHeader(io, PassID, NameID, Remark->Loc, FunctionID, - Remark->Hotness, Remark->Args); - } else { - mapRemarkHeader(io, Remark->PassName, Remark->RemarkName, Remark->Loc, - Remark->FunctionName, Remark->Hotness, Remark->Args); - } + mapRemarkHeader(io, Remark->PassName, Remark->RemarkName, Remark->Loc, + Remark->FunctionName, Remark->Hotness, Remark->Args); } }; @@ -82,15 +70,7 @@ template <> struct MappingTraits { unsigned Line = RL.SourceLine; unsigned Col = RL.SourceColumn; - if (auto *Serializer = dyn_cast( - reinterpret_cast(io.getContext()))) { - assert(Serializer->StrTab && "YAMLStrTabSerializer with no StrTab."); - StringTable &StrTab = *Serializer->StrTab; - unsigned FileID = StrTab.add(File).first; - io.mapRequired("File", FileID); - } else { - io.mapRequired("File", File); - } + io.mapRequired("File", File); io.mapRequired("Line", Line); io.mapRequired("Column", Col); @@ -136,13 +116,7 @@ template <> struct MappingTraits { static void mapping(IO &io, Argument &A) { assert(io.outputting() && "input not yet implemented"); - if (auto *Serializer = dyn_cast( - reinterpret_cast(io.getContext()))) { - assert(Serializer->StrTab && "YAMLStrTabSerializer with no StrTab."); - StringTable &StrTab = *Serializer->StrTab; - auto ValueID = StrTab.add(A.Val).first; - io.mapRequired(A.Key.data(), ValueID); - } else if (StringRef(A.Val).count('\n') > 1) { + if (StringRef(A.Val).count('\n') > 1) { StringBlockVal S(A.Val); io.mapRequired(A.Key.data(), S); } else { @@ -159,12 +133,7 @@ LLVM_YAML_IS_SEQUENCE_VECTOR(Argument) YAMLRemarkSerializer::YAMLRemarkSerializer(raw_ostream &OS, SerializerMode Mode, std::optional StrTabIn) - : YAMLRemarkSerializer(Format::YAML, OS, Mode, std::move(StrTabIn)) {} - -YAMLRemarkSerializer::YAMLRemarkSerializer(Format SerializerFormat, - raw_ostream &OS, SerializerMode Mode, - std::optional StrTabIn) - : RemarkSerializer(SerializerFormat, OS, Mode), + : RemarkSerializer(Format::YAML, OS, Mode), YAMLOutput(OS, reinterpret_cast(this)) { StrTab = std::move(StrTabIn); } @@ -172,7 +141,7 @@ YAMLRemarkSerializer::YAMLRemarkSerializer(Format SerializerFormat, void YAMLRemarkSerializer::emit(const Remark &Remark) { // Again, YAMLTraits expect a non-const object for inputting, but we're not // using that here. - auto R = const_cast(&Remark); + auto *R = const_cast(&Remark); YAMLOutput << R; } @@ -181,27 +150,6 @@ std::unique_ptr YAMLRemarkSerializer::metaSerializer( return std::make_unique(OS, ExternalFilename); } -void YAMLStrTabRemarkSerializer::emit(const Remark &Remark) { - // In standalone mode, for the serializer with a string table, emit the - // metadata first and set DidEmitMeta to avoid emitting it again. - if (Mode == SerializerMode::Standalone && !DidEmitMeta) { - std::unique_ptr MetaSerializer = - metaSerializer(OS, /*ExternalFilename=*/std::nullopt); - MetaSerializer->emit(); - DidEmitMeta = true; - } - - // Then do the usual remark emission. - YAMLRemarkSerializer::emit(Remark); -} - -std::unique_ptr YAMLStrTabRemarkSerializer::metaSerializer( - raw_ostream &OS, std::optional ExternalFilename) { - assert(StrTab); - return std::make_unique(OS, ExternalFilename, - *StrTab); -} - static void emitMagic(raw_ostream &OS) { // Emit the magic number. OS << remarks::Magic; @@ -216,20 +164,6 @@ static void emitVersion(raw_ostream &OS) { OS.write(Version.data(), Version.size()); } -static void emitStrTab(raw_ostream &OS, - std::optional StrTab) { - // Emit the string table in the section. - uint64_t StrTabSize = StrTab ? (*StrTab)->SerializedSize : 0; - // Emit the total size of the string table (the size itself excluded): - // little-endian uint64_t. - // Note: even if no string table is used, emit 0. - std::array StrTabSizeBuf; - support::endian::write64le(StrTabSizeBuf.data(), StrTabSize); - OS.write(StrTabSizeBuf.data(), StrTabSizeBuf.size()); - if (StrTab) - (*StrTab)->serialize(OS); -} - static void emitExternalFile(raw_ostream &OS, StringRef Filename) { // Emit the null-terminated absolute path to the remark file. SmallString<128> FilenameBuf = Filename; @@ -242,15 +176,16 @@ static void emitExternalFile(raw_ostream &OS, StringRef Filename) { void YAMLMetaSerializer::emit() { emitMagic(OS); emitVersion(OS); - emitStrTab(OS, std::nullopt); - if (ExternalFilename) - emitExternalFile(OS, *ExternalFilename); -} -void YAMLStrTabMetaSerializer::emit() { - emitMagic(OS); - emitVersion(OS); - emitStrTab(OS, &StrTab); + // Emit StringTable with size 0. This is left over after removing StringTable + // support from the YAML format. For now, don't unnecessarily change how the + // the metadata is serialized. When changing the format, we should think about + // just reusing the bitstream remark meta for this. + uint64_t StrTabSize = 0; + std::array StrTabSizeBuf; + support::endian::write64le(StrTabSizeBuf.data(), StrTabSize); + + OS.write(StrTabSizeBuf.data(), StrTabSizeBuf.size()); if (ExternalFilename) emitExternalFile(OS, *ExternalFilename); } diff --git a/llvm/test/CodeGen/X86/remarks-section.ll b/llvm/test/CodeGen/X86/remarks-section.ll index dba20d428a69..e67c3579b759 100644 --- a/llvm/test/CodeGen/X86/remarks-section.ll +++ b/llvm/test/CodeGen/X86/remarks-section.ll @@ -1,8 +1,6 @@ ; RUN: llc < %s -mtriple=x86_64-darwin -remarks-section -pass-remarks-output=%/t.yaml | FileCheck --check-prefix=CHECK-DARWIN -DPATH=%/t.yaml %s -; RUN: llc < %s -mtriple=x86_64-darwin --pass-remarks-format=yaml-strtab -remarks-section -pass-remarks-output=%/t.yaml | FileCheck --check-prefix=CHECK-DARWIN-STRTAB -DPATH=%/t.yaml %s ; RUN: llc < %s -mtriple=x86_64-darwin -pass-remarks-output=%/t.yaml | FileCheck --check-prefix=CHECK-DARWIN-DEFAULT %s -; RUN: llc < %s -mtriple=x86_64-darwin --pass-remarks-format=yaml-strtab -pass-remarks-output=%/t.yaml | FileCheck --check-prefix=CHECK-DARWIN-DEFAULT-YAML-STRTAB %s ; RUN: llc < %s -mtriple=x86_64-darwin --pass-remarks-format=bitstream -pass-remarks-output=%/t.yaml | FileCheck --check-prefix=CHECK-DARWIN-DEFAULT-BITSTREAM %s ; RUN: llc < %s -mtriple=x86_64-darwin --pass-remarks-format=bitstream -remarks-section=false -pass-remarks-output=%/t.yaml | FileCheck --check-prefix=CHECK-DARWIN-OVERRIDE-BITSTREAM %s ; RUN: llc < %s -mtriple=x86_64-darwin --pass-remarks-format=yaml -remarks-section=true -pass-remarks-output=%/t.yaml | FileCheck --check-prefix=CHECK-DARWIN-OVERRIDE-YAML %s @@ -10,15 +8,9 @@ ; CHECK-DARWIN: .section __LLVM,__remarks,regular,debug ; CHECK-DARWIN-NEXT: .byte -; CHECK-DARWIN-STRTAB: .section __LLVM,__remarks,regular,debug -; CHECK-DARWIN-STRTAB-NEXT: .byte - ; By default, the format is YAML which does not need a section. ; CHECK-DARWIN-DEFAULT-NOT: .section __LLVM,__remarks -; yaml-strtab needs a section. -; CHECK-DARWIN-DEFAULT-YAML-STRTAB: .section __LLVM,__remarks - ; bitstream needs a section. ; CHECK-DARWIN-DEFAULT-BITSTREAM: .section __LLVM,__remarks diff --git a/llvm/unittests/Remarks/RemarksLinkingTest.cpp b/llvm/unittests/Remarks/RemarksLinkingTest.cpp index ff2aec669f2f..dcd598aaeb5c 100644 --- a/llvm/unittests/Remarks/RemarksLinkingTest.cpp +++ b/llvm/unittests/Remarks/RemarksLinkingTest.cpp @@ -207,22 +207,22 @@ TEST(Remarks, LinkingGoodStrTab) { "DebugLoc: { File: file.c, Line: 3, Column: 12 }\n" "Function: foo\n" "...\n", - remarks::Format::YAMLStrTab, - StringRef("REMARKS\0\0\0\0\0\0\0\0\0\x22\0\0\0\0\0\0\0" - "inline\0NoDefinition\0foo\0file.c\0Ok\0" - "--- !Passed\n" - "Pass: 0\n" - "Name: 4\n" - "DebugLoc: { File: 3, Line: 3, Column: 12 }\n" - "Function: 2\n" - "...\n" - "--- !Missed\n" - "Pass: 0\n" - "Name: 1\n" - "DebugLoc: { File: 3, Line: 3, Column: 12 }\n" - "Function: 2\n" - "...\n", - 304)); + remarks::Format::Bitstream, + "\n" + "\n" + " \n" + " \n" + " blob data = " + "'inline\\x00NoDefinition\\x00foo\\x00file.c\\x00Ok\\x00'\n" + "\n" + "\n" + " \n" + " \n" + "\n" + "\n" + " \n" + " \n" + "\n"); } // Check that we propagate parsing errors. @@ -241,11 +241,12 @@ TEST(Remarks, LinkingError) { { // Check that the prepend path is propagated and fails with the full path. + // Also ensures that the remark format is correctly auto-detected. RL.setExternalFilePrependPath("/baddir/"); Error E = RL.link( StringRef("REMARKS\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0badfile.opt.yaml", 40), - remarks::Format::YAMLStrTab); + /*RemarkFormat=*/std::nullopt); EXPECT_TRUE(static_cast(E)); std::string ErrorMessage = toString(std::move(E)); EXPECT_EQ(StringRef(ErrorMessage).lower(), diff --git a/llvm/unittests/Remarks/YAMLRemarksParsingTest.cpp b/llvm/unittests/Remarks/YAMLRemarksParsingTest.cpp index 3c740ddc8a55..824813aa5af7 100644 --- a/llvm/unittests/Remarks/YAMLRemarksParsingTest.cpp +++ b/llvm/unittests/Remarks/YAMLRemarksParsingTest.cpp @@ -77,7 +77,6 @@ void parseExpectErrorMeta( Expected> MaybeParser = remarks::createRemarkParserFromMeta(remarks::Format::YAML, Buf, - /*StrTab=*/std::nullopt, std::move(ExternalFilePrependPath)); handleAllErrors(MaybeParser.takeError(), [&](const ErrorInfoBase &EIB) { EIB.log(Stream); }); @@ -558,124 +557,6 @@ TEST(YAMLRemarks, ContentsCAPI) { LLVMRemarkParserDispose(Parser); } -TEST(YAMLRemarks, ContentsStrTab) { - StringRef Buf = "\n" - "--- !Missed\n" - "Pass: 0\n" - "Name: 1\n" - "DebugLoc: { File: 2, Line: 3, Column: 12 }\n" - "Function: 3\n" - "Hotness: 4\n" - "Args:\n" - " - Callee: 5\n" - " - String: 7\n" - " - Caller: 3\n" - " DebugLoc: { File: 2, Line: 2, Column: 0 }\n" - " - String: 8\n" - "\n"; - - StringRef StrTabBuf = - StringRef("inline\0NoDefinition\0file.c\0foo\0Callee\0bar\0String\0 " - "will not be inlined into \0 because its definition is " - "unavailable", - 115); - - remarks::ParsedStringTable StrTab(StrTabBuf); - Expected> MaybeParser = - remarks::createRemarkParser(remarks::Format::YAMLStrTab, Buf, - std::move(StrTab)); - EXPECT_FALSE(errorToBool(MaybeParser.takeError())); - EXPECT_TRUE(*MaybeParser != nullptr); - - remarks::RemarkParser &Parser = **MaybeParser; - Expected> MaybeRemark = Parser.next(); - EXPECT_FALSE( - errorToBool(MaybeRemark.takeError())); // Check for parsing errors. - EXPECT_TRUE(*MaybeRemark != nullptr); // At least one remark. - - const remarks::Remark &Remark = **MaybeRemark; - EXPECT_EQ(Remark.RemarkType, remarks::Type::Missed); - EXPECT_EQ(checkStr(Remark.PassName, 6), "inline"); - EXPECT_EQ(checkStr(Remark.RemarkName, 12), "NoDefinition"); - EXPECT_EQ(checkStr(Remark.FunctionName, 3), "foo"); - EXPECT_TRUE(Remark.Loc); - const remarks::RemarkLocation &RL = *Remark.Loc; - EXPECT_EQ(checkStr(RL.SourceFilePath, 6), "file.c"); - EXPECT_EQ(RL.SourceLine, 3U); - EXPECT_EQ(RL.SourceColumn, 12U); - EXPECT_TRUE(Remark.Hotness); - EXPECT_EQ(*Remark.Hotness, 4U); - EXPECT_EQ(Remark.Args.size(), 4U); - - unsigned ArgID = 0; - for (const remarks::Argument &Arg : Remark.Args) { - switch (ArgID) { - case 0: - EXPECT_EQ(checkStr(Arg.Key, 6), "Callee"); - EXPECT_EQ(checkStr(Arg.Val, 3), "bar"); - EXPECT_FALSE(Arg.Loc); - break; - case 1: - EXPECT_EQ(checkStr(Arg.Key, 6), "String"); - EXPECT_EQ(checkStr(Arg.Val, 26), " will not be inlined into "); - EXPECT_FALSE(Arg.Loc); - break; - case 2: { - EXPECT_EQ(checkStr(Arg.Key, 6), "Caller"); - EXPECT_EQ(checkStr(Arg.Val, 3), "foo"); - EXPECT_TRUE(Arg.Loc); - const remarks::RemarkLocation &RL = *Arg.Loc; - EXPECT_EQ(checkStr(RL.SourceFilePath, 6), "file.c"); - EXPECT_EQ(RL.SourceLine, 2U); - EXPECT_EQ(RL.SourceColumn, 0U); - break; - } - case 3: - EXPECT_EQ(checkStr(Arg.Key, 6), "String"); - EXPECT_EQ(checkStr(Arg.Val, 38), - " because its definition is unavailable"); - EXPECT_FALSE(Arg.Loc); - break; - default: - break; - } - ++ArgID; - } - - MaybeRemark = Parser.next(); - Error E = MaybeRemark.takeError(); - EXPECT_TRUE(E.isA()); - EXPECT_TRUE(errorToBool(std::move(E))); // Check for parsing errors. -} - -TEST(YAMLRemarks, ParsingBadStringTableIndex) { - StringRef Buf = "\n" - "--- !Missed\n" - "Pass: 50\n" - "\n"; - - StringRef StrTabBuf = StringRef("inline"); - - remarks::ParsedStringTable StrTab(StrTabBuf); - Expected> MaybeParser = - remarks::createRemarkParser(remarks::Format::YAMLStrTab, Buf, - std::move(StrTab)); - EXPECT_FALSE(errorToBool(MaybeParser.takeError())); - EXPECT_TRUE(*MaybeParser != nullptr); - - remarks::RemarkParser &Parser = **MaybeParser; - Expected> MaybeRemark = Parser.next(); - EXPECT_FALSE(MaybeRemark); // Expect an error here. - - std::string ErrorStr; - raw_string_ostream Stream(ErrorStr); - handleAllErrors(MaybeRemark.takeError(), - [&](const ErrorInfoBase &EIB) { EIB.log(Stream); }); - EXPECT_TRUE( - StringRef(Stream.str()) - .contains("String with index 50 is out of bounds (size = 1).")); -} - TEST(YAMLRemarks, ParsingGoodMeta) { // No metadata should also work. parseGoodMeta("--- !Missed\n" @@ -692,17 +573,6 @@ TEST(YAMLRemarks, ParsingGoodMeta) { "Name: NoDefinition\n" "Function: foo\n", 82)); - - // Use the string table from the metadata. - parseGoodMeta(StringRef("REMARKS\0" - "\0\0\0\0\0\0\0\0" - "\x02\0\0\0\0\0\0\0" - "a\0" - "--- !Missed\n" - "Pass: 0\n" - "Name: 0\n" - "Function: 0\n", - 66)); } TEST(YAMLRemarks, ParsingBadMeta) { @@ -727,7 +597,8 @@ TEST(YAMLRemarks, ParsingBadMeta) { "\0\0\0\0\0\0\0\0" "\x01\0\0\0\0\0\0\0", 24), - "Expecting string table.", CmpType::Equal); + "String table unsupported for YAML format.", + CmpType::Equal); parseExpectErrorMeta(StringRef("REMARKS\0" "\0\0\0\0\0\0\0\0" diff --git a/llvm/unittests/Remarks/YAMLRemarksSerializerTest.cpp b/llvm/unittests/Remarks/YAMLRemarksSerializerTest.cpp index 442c24b9fd95..7e994ac4d58b 100644 --- a/llvm/unittests/Remarks/YAMLRemarksSerializerTest.cpp +++ b/llvm/unittests/Remarks/YAMLRemarksSerializerTest.cpp @@ -131,78 +131,6 @@ TEST(YAMLRemarks, SerializerRemarkStandalone) { "...\n")); } -TEST(YAMLRemarks, SerializerRemarkStrTab) { - remarks::Remark R; - R.RemarkType = remarks::Type::Missed; - R.PassName = "pass"; - R.RemarkName = "name"; - R.FunctionName = "func"; - R.Loc = remarks::RemarkLocation{"path", 3, 4}; - R.Hotness = 5; - R.Args.emplace_back(); - R.Args.back().Key = "key"; - R.Args.back().Val = "value"; - R.Args.emplace_back(); - R.Args.back().Key = "keydebug"; - R.Args.back().Val = "valuedebug"; - R.Args.back().Loc = remarks::RemarkLocation{"argpath", 6, 7}; - check(remarks::Format::YAMLStrTab, R, - "--- !Missed\n" - "Pass: 0\n" - "Name: 1\n" - "DebugLoc: { File: 3, Line: 3, Column: 4 }\n" - "Function: 2\n" - "Hotness: 5\n" - "Args:\n" - " - key: 4\n" - " - keydebug: 5\n" - " DebugLoc: { File: 6, Line: 6, Column: 7 }\n" - "...\n", - StringRef("REMARKS\0" - "\0\0\0\0\0\0\0\0" - "\x2d\0\0\0\0\0\0\0" - "pass\0name\0func\0path\0value\0valuedebug\0argpath" - "\0" EXTERNALFILETESTPATH "\0", - 83)); -} - -TEST(YAMLRemarks, SerializerRemarkParsedStrTab) { - StringRef StrTab("pass\0name\0func\0path\0value\0valuedebug\0argpath\0", 45); - remarks::Remark R; - R.RemarkType = remarks::Type::Missed; - R.PassName = "pass"; - R.RemarkName = "name"; - R.FunctionName = "func"; - R.Loc = remarks::RemarkLocation{"path", 3, 4}; - R.Hotness = 5; - R.Args.emplace_back(); - R.Args.back().Key = "key"; - R.Args.back().Val = "value"; - R.Args.emplace_back(); - R.Args.back().Key = "keydebug"; - R.Args.back().Val = "valuedebug"; - R.Args.back().Loc = remarks::RemarkLocation{"argpath", 6, 7}; - check(remarks::Format::YAMLStrTab, R, - "--- !Missed\n" - "Pass: 0\n" - "Name: 1\n" - "DebugLoc: { File: 3, Line: 3, Column: 4 }\n" - "Function: 2\n" - "Hotness: 5\n" - "Args:\n" - " - key: 4\n" - " - keydebug: 5\n" - " DebugLoc: { File: 6, Line: 6, Column: 7 }\n" - "...\n", - StringRef("REMARKS\0" - "\0\0\0\0\0\0\0\0" - "\x2d\0\0\0\0\0\0\0" - "pass\0name\0func\0path\0value\0valuedebug\0argpath" - "\0" EXTERNALFILETESTPATH "\0", - 83), - remarks::StringTable(remarks::ParsedStringTable(StrTab))); -} - TEST(YAMLRemarks, SerializerRemarkParsedStrTabStandaloneNoStrTab) { // Check that we don't use the string table even if it was provided. StringRef StrTab("pass\0name\0func\0path\0value\0valuedebug\0argpath\0", 45); @@ -237,94 +165,3 @@ TEST(YAMLRemarks, SerializerRemarkParsedStrTabStandaloneNoStrTab) { "...\n"), std::move(PreFilledStrTab)); } - -TEST(YAMLRemarks, SerializerRemarkParsedStrTabStandalone) { - StringRef StrTab("pass\0name\0func\0path\0value\0valuedebug\0argpath\0", 45); - remarks::ParsedStringTable ParsedStrTab(StrTab); - remarks::StringTable PreFilledStrTab(ParsedStrTab); - remarks::Remark R; - R.RemarkType = remarks::Type::Missed; - R.PassName = "pass"; - R.RemarkName = "name"; - R.FunctionName = "func"; - R.Loc = remarks::RemarkLocation{"path", 3, 4}; - R.Hotness = 5; - R.Args.emplace_back(); - R.Args.back().Key = "key"; - R.Args.back().Val = "value"; - R.Args.emplace_back(); - R.Args.back().Key = "keydebug"; - R.Args.back().Val = "valuedebug"; - R.Args.back().Loc = remarks::RemarkLocation{"argpath", 6, 7}; - checkStandalone( - remarks::Format::YAMLStrTab, R, - StringRef("REMARKS\0" - "\0\0\0\0\0\0\0\0" - "\x2d\0\0\0\0\0\0\0" - "pass\0name\0func\0path\0value\0valuedebug\0argpath\0" - "--- !Missed\n" - "Pass: 0\n" - "Name: 1\n" - "DebugLoc: { File: 3, Line: 3, Column: 4 }\n" - "Function: 2\n" - "Hotness: 5\n" - "Args:\n" - " - key: 4\n" - " - keydebug: 5\n" - " DebugLoc: { File: 6, Line: 6, Column: 7 }\n" - "...\n", - 315), - std::move(PreFilledStrTab)); -} - -TEST(YAMLRemarks, SerializerRemarkParsedStrTabStandaloneMultipleRemarks) { - StringRef StrTab("pass\0name\0func\0path\0value\0valuedebug\0argpath\0", 45); - remarks::ParsedStringTable ParsedStrTab(StrTab); - remarks::StringTable PreFilledStrTab(ParsedStrTab); - SmallVector Rs; - remarks::Remark R; - R.RemarkType = remarks::Type::Missed; - R.PassName = "pass"; - R.RemarkName = "name"; - R.FunctionName = "func"; - R.Loc = remarks::RemarkLocation{"path", 3, 4}; - R.Hotness = 5; - R.Args.emplace_back(); - R.Args.back().Key = "key"; - R.Args.back().Val = "value"; - R.Args.emplace_back(); - R.Args.back().Key = "keydebug"; - R.Args.back().Val = "valuedebug"; - R.Args.back().Loc = remarks::RemarkLocation{"argpath", 6, 7}; - Rs.emplace_back(R.clone()); - Rs.emplace_back(std::move(R)); - check(remarks::Format::YAMLStrTab, remarks::SerializerMode::Standalone, Rs, - StringRef("REMARKS\0" - "\0\0\0\0\0\0\0\0" - "\x2d\0\0\0\0\0\0\0" - "pass\0name\0func\0path\0value\0valuedebug\0argpath\0" - "--- !Missed\n" - "Pass: 0\n" - "Name: 1\n" - "DebugLoc: { File: 3, Line: 3, Column: 4 }\n" - "Function: 2\n" - "Hotness: 5\n" - "Args:\n" - " - key: 4\n" - " - keydebug: 5\n" - " DebugLoc: { File: 6, Line: 6, Column: 7 }\n" - "...\n" - "--- !Missed\n" - "Pass: 0\n" - "Name: 1\n" - "DebugLoc: { File: 3, Line: 3, Column: 4 }\n" - "Function: 2\n" - "Hotness: 5\n" - "Args:\n" - " - key: 4\n" - " - keydebug: 5\n" - " DebugLoc: { File: 6, Line: 6, Column: 7 }\n" - "...\n", - 561), - /*ExpectedMeta=*/std::nullopt, std::move(PreFilledStrTab)); -} -- cgit v1.2.3 From 671caef379c603d2bcc428a00e3535b230162941 Mon Sep 17 00:00:00 2001 From: Jack Styles Date: Wed, 18 Jun 2025 14:35:53 +0100 Subject: [Flang][OpenMP] Update relevant warnings to emit when OMP >= v5.2 (#144492) There has been a number of deprecation warnings that have been added to Flang, however these features are only deprecated when the OpenMP Version being used is 5.2 or later. Previously, flang did not consider the version with the warnings so would always be emitted. Flang now ensures warnings are emitted for the appropriate version of OpenMP, and tests are updated to reflect this change. --- flang/lib/Semantics/resolve-directives.cpp | 20 +++++++++++++------- flang/test/Semantics/OpenMP/allocate-align01.f90 | 2 +- flang/test/Semantics/OpenMP/allocate01.f90 | 2 +- flang/test/Semantics/OpenMP/allocate02.f90 | 2 -- flang/test/Semantics/OpenMP/allocate03.f90 | 1 - flang/test/Semantics/OpenMP/allocate05.f90 | 2 -- flang/test/Semantics/OpenMP/allocate06.f90 | 1 - flang/test/Semantics/OpenMP/allocate09.f90 | 5 ----- flang/test/Semantics/OpenMP/clause-validity01.f90 | 9 ++++++++- flang/test/Semantics/OpenMP/deprecation.f90 | 2 +- flang/test/Semantics/OpenMP/flush02.f90 | 1 - flang/test/Semantics/OpenMP/nested-barrier.f90 | 2 -- flang/test/Semantics/OpenMP/nested-master.f90 | 12 ------------ flang/test/Semantics/OpenMP/nested-teams.f90 | 1 - flang/test/Semantics/OpenMP/ordered-simd.f90 | 4 ---- flang/test/Semantics/OpenMP/parallel-master-goto.f90 | 1 - 16 files changed, 24 insertions(+), 43 deletions(-) diff --git a/flang/lib/Semantics/resolve-directives.cpp b/flang/lib/Semantics/resolve-directives.cpp index 57db76e2160d..885c02e6ec74 100644 --- a/flang/lib/Semantics/resolve-directives.cpp +++ b/flang/lib/Semantics/resolve-directives.cpp @@ -835,8 +835,8 @@ private: void AddOmpRequiresToScope(Scope &, WithOmpDeclarative::RequiresFlags, std::optional); - void IssueNonConformanceWarning( - llvm::omp::Directive D, parser::CharBlock source); + void IssueNonConformanceWarning(llvm::omp::Directive D, + parser::CharBlock source, unsigned EmitFromVersion); void CreateImplicitSymbols(const Symbol *symbol); @@ -1668,7 +1668,7 @@ bool OmpAttributeVisitor::Pre(const parser::OpenMPBlockConstruct &x) { } if (beginDir.v == llvm::omp::Directive::OMPD_master || beginDir.v == llvm::omp::Directive::OMPD_parallel_master) - IssueNonConformanceWarning(beginDir.v, beginDir.source); + IssueNonConformanceWarning(beginDir.v, beginDir.source, 52); ClearDataSharingAttributeObjects(); ClearPrivateDataSharingAttributeObjects(); ClearAllocateNames(); @@ -1791,7 +1791,7 @@ bool OmpAttributeVisitor::Pre(const parser::OpenMPLoopConstruct &x) { beginDir.v == llvm::omp::OMPD_parallel_master_taskloop || beginDir.v == llvm::omp::OMPD_parallel_master_taskloop_simd || beginDir.v == llvm::omp::Directive::OMPD_target_loop) - IssueNonConformanceWarning(beginDir.v, beginDir.source); + IssueNonConformanceWarning(beginDir.v, beginDir.source, 52); ClearDataSharingAttributeObjects(); SetContextAssociatedLoopLevel(GetAssociatedLoopLevelFromClauses(clauseList)); @@ -2108,7 +2108,8 @@ bool OmpAttributeVisitor::Pre(const parser::OpenMPDispatchConstruct &x) { } bool OmpAttributeVisitor::Pre(const parser::OpenMPExecutableAllocate &x) { - IssueNonConformanceWarning(llvm::omp::Directive::OMPD_allocate, x.source); + IssueNonConformanceWarning(llvm::omp::Directive::OMPD_allocate, x.source, 52); + PushContext(x.source, llvm::omp::Directive::OMPD_allocate); const auto &list{std::get>(x.t)}; if (list) { @@ -3172,11 +3173,16 @@ void OmpAttributeVisitor::AddOmpRequiresToScope(Scope &scope, } while (!scopeIter->IsGlobal()); } -void OmpAttributeVisitor::IssueNonConformanceWarning( - llvm::omp::Directive D, parser::CharBlock source) { +void OmpAttributeVisitor::IssueNonConformanceWarning(llvm::omp::Directive D, + parser::CharBlock source, unsigned EmitFromVersion) { std::string warnStr; llvm::raw_string_ostream warnStrOS(warnStr); unsigned version{context_.langOptions().OpenMPVersion}; + // We only want to emit the warning when the version being used has the + // directive deprecated + if (version < EmitFromVersion) { + return; + } warnStrOS << "OpenMP directive " << parser::ToUpperCaseLetters( llvm::omp::getOpenMPDirectiveName(D, version).str()) diff --git a/flang/test/Semantics/OpenMP/allocate-align01.f90 b/flang/test/Semantics/OpenMP/allocate-align01.f90 index 4974f5e18397..bc17d7047bbb 100644 --- a/flang/test/Semantics/OpenMP/allocate-align01.f90 +++ b/flang/test/Semantics/OpenMP/allocate-align01.f90 @@ -1,6 +1,6 @@ ! REQUIRES: openmp_runtime -! RUN: %python %S/../test_errors.py %s %flang_fc1 %openmp_flags -fopenmp-version=51 +! RUN: %python %S/../test_errors.py %s %flang_fc1 %openmp_flags -fopenmp-version=52 ! OpenMP Version 5.2 ! The allocate clause's allocator modifier must be of type allocator_handle ! and the align modifier must be constant, positive integer expression diff --git a/flang/test/Semantics/OpenMP/allocate01.f90 b/flang/test/Semantics/OpenMP/allocate01.f90 index 8a680eee743e..b205b2c79d65 100644 --- a/flang/test/Semantics/OpenMP/allocate01.f90 +++ b/flang/test/Semantics/OpenMP/allocate01.f90 @@ -1,6 +1,6 @@ ! REQUIRES: openmp_runtime -! RUN: %python %S/../test_errors.py %s %flang_fc1 %openmp_flags +! RUN: %python %S/../test_errors.py %s %flang_fc1 %openmp_flags -fopenmp-version=52 ! OpenMP Version 5.0 ! 2.11.3 allocate Directive ! The allocate directive must appear in the same scope as the declarations of diff --git a/flang/test/Semantics/OpenMP/allocate02.f90 b/flang/test/Semantics/OpenMP/allocate02.f90 index 80ef60b31e70..8f0579e810bb 100644 --- a/flang/test/Semantics/OpenMP/allocate02.f90 +++ b/flang/test/Semantics/OpenMP/allocate02.f90 @@ -16,11 +16,9 @@ use omp_lib !ERROR: At most one ALLOCATOR clause can appear on the ALLOCATE directive !$omp allocate(x, y) allocator(omp_default_mem_alloc) allocator(omp_default_mem_alloc) - !WARNING: OpenMP directive ALLOCATE has been deprecated, please use ALLOCATORS instead. !$omp allocate(darray) allocator(omp_default_mem_alloc) allocate ( darray(a, b) ) - !WARNING: OpenMP directive ALLOCATE has been deprecated, please use ALLOCATORS instead. !ERROR: At most one ALLOCATOR clause can appear on the ALLOCATE directive !$omp allocate(darray) allocator(omp_default_mem_alloc) allocator(omp_default_mem_alloc) allocate ( darray(a, b) ) diff --git a/flang/test/Semantics/OpenMP/allocate03.f90 b/flang/test/Semantics/OpenMP/allocate03.f90 index b8c6b8e5dee7..e35115f3897c 100644 --- a/flang/test/Semantics/OpenMP/allocate03.f90 +++ b/flang/test/Semantics/OpenMP/allocate03.f90 @@ -18,7 +18,6 @@ use omp_lib !ERROR: A variable that is part of another variable (as an array or structure element) cannot appear on the ALLOCATE directive !$omp allocate(my_var%array) - !WARNING: OpenMP directive ALLOCATE has been deprecated, please use ALLOCATORS instead. !ERROR: A variable that is part of another variable (as an array or structure element) cannot appear on the ALLOCATE directive !$omp allocate(darray, my_var%array) allocator(omp_default_mem_alloc) allocate ( darray(a, b) ) diff --git a/flang/test/Semantics/OpenMP/allocate05.f90 b/flang/test/Semantics/OpenMP/allocate05.f90 index 2c81c4dbc82c..a787e8bb32a4 100644 --- a/flang/test/Semantics/OpenMP/allocate05.f90 +++ b/flang/test/Semantics/OpenMP/allocate05.f90 @@ -13,13 +13,11 @@ use omp_lib real, dimension (:,:), allocatable :: darray !$omp target - !WARNING: OpenMP directive ALLOCATE has been deprecated, please use ALLOCATORS instead. !$omp allocate allocator(omp_default_mem_alloc) allocate ( darray(a, b) ) !$omp end target !$omp target - !WARNING: OpenMP directive ALLOCATE has been deprecated, please use ALLOCATORS instead. !ERROR: ALLOCATE directives that appear in a TARGET region must specify an allocator clause !$omp allocate allocate ( darray(a, b) ) diff --git a/flang/test/Semantics/OpenMP/allocate06.f90 b/flang/test/Semantics/OpenMP/allocate06.f90 index 7196bcac2b9b..e14134cd0730 100644 --- a/flang/test/Semantics/OpenMP/allocate06.f90 +++ b/flang/test/Semantics/OpenMP/allocate06.f90 @@ -14,7 +14,6 @@ use omp_lib !ERROR: List items specified in the ALLOCATE directive must not have the ALLOCATABLE attribute unless the directive is associated with an ALLOCATE statement !$omp allocate(darray) allocator(omp_default_mem_alloc) - !WARNING: OpenMP directive ALLOCATE has been deprecated, please use ALLOCATORS instead. !$omp allocate(darray) allocator(omp_default_mem_alloc) allocate(darray(a, b)) diff --git a/flang/test/Semantics/OpenMP/allocate09.f90 b/flang/test/Semantics/OpenMP/allocate09.f90 index 645e97a3a33f..0f93a340fe1e 100644 --- a/flang/test/Semantics/OpenMP/allocate09.f90 +++ b/flang/test/Semantics/OpenMP/allocate09.f90 @@ -12,28 +12,23 @@ use omp_lib integer, dimension(:), allocatable :: a, b, c, d, e, f, & g, h, i, j, k, l - !WARNING: OpenMP directive ALLOCATE has been deprecated, please use ALLOCATORS instead. !$omp allocate(a) allocator(omp_default_mem_alloc) allocate(a(1), b(2)) - !WARNING: OpenMP directive ALLOCATE has been deprecated, please use ALLOCATORS instead. !$omp allocate(c, d) allocator(omp_default_mem_alloc) allocate(c(3), d(4)) !$omp allocate(e) allocator(omp_default_mem_alloc) !$omp allocate(f, g) allocator(omp_default_mem_alloc) - !WARNING: OpenMP directive ALLOCATE has been deprecated, please use ALLOCATORS instead. !$omp allocate allocate(e(5), f(6), g(7)) - !WARNING: OpenMP directive ALLOCATE has been deprecated, please use ALLOCATORS instead. !ERROR: Object 'i' in ALLOCATE directive not found in corresponding ALLOCATE statement !$omp allocate(h, i) allocator(omp_default_mem_alloc) allocate(h(8)) !ERROR: Object 'j' in ALLOCATE directive not found in corresponding ALLOCATE statement !$omp allocate(j, k) allocator(omp_default_mem_alloc) - !WARNING: OpenMP directive ALLOCATE has been deprecated, please use ALLOCATORS instead. !$omp allocate(l) allocator(omp_default_mem_alloc) allocate(k(9), l(10)) diff --git a/flang/test/Semantics/OpenMP/clause-validity01.f90 b/flang/test/Semantics/OpenMP/clause-validity01.f90 index 5e0d91914c44..6989a183e83e 100644 --- a/flang/test/Semantics/OpenMP/clause-validity01.f90 +++ b/flang/test/Semantics/OpenMP/clause-validity01.f90 @@ -1,6 +1,6 @@ ! REQUIRES: openmp_runtime -! RUN: %python %S/../test_errors.py %s %flang_fc1 %openmp_flags %openmp_module_flag -fopenmp-version=51 +! RUN: %python %S/../test_errors.py %s %flang_fc1 %openmp_flags %openmp_module_flag -fopenmp-version=52 use omp_lib ! Check OpenMP clause validity for the following directives: ! @@ -502,6 +502,7 @@ use omp_lib !$omp taskyield !$omp barrier !$omp taskwait + !WARNING: SOURCE dependence type is deprecated in OpenMP v5.2 !ERROR: The SINK and SOURCE dependence types can only be used with the ORDERED directive, used here in the TASKWAIT construct !$omp taskwait depend(source) ! !$omp taskwait depend(sink:i-1) @@ -509,12 +510,18 @@ use omp_lib ! !$omp target update from(arrayA) to(arrayB) ! !$omp target exit data map(from:arrayA) map(delete:arrayB) !$omp flush (c) + !WARNING: The syntax "FLUSH clause (object, ...)" has been deprecated, use "FLUSH(object, ...) clause" instead !$omp flush acq_rel + !WARNING: The syntax "FLUSH clause (object, ...)" has been deprecated, use "FLUSH(object, ...) clause" instead !$omp flush release + !WARNING: The syntax "FLUSH clause (object, ...)" has been deprecated, use "FLUSH(object, ...) clause" instead !$omp flush acquire + !WARNING: The syntax "FLUSH clause (object, ...)" has been deprecated, use "FLUSH(object, ...) clause" instead !ERROR: If memory-order-clause is RELEASE, ACQUIRE, or ACQ_REL, list items must not be specified on the FLUSH directive !$omp flush release (c) + !WARNING: The syntax "FLUSH clause (object, ...)" has been deprecated, use "FLUSH(object, ...) clause" instead !$omp flush seq_cst + !WARNING: The syntax "FLUSH clause (object, ...)" has been deprecated, use "FLUSH(object, ...) clause" instead !ERROR: RELAXED clause is not allowed on the FLUSH directive !$omp flush relaxed diff --git a/flang/test/Semantics/OpenMP/deprecation.f90 b/flang/test/Semantics/OpenMP/deprecation.f90 index e04f43026bbc..df15c3bcc0b1 100644 --- a/flang/test/Semantics/OpenMP/deprecation.f90 +++ b/flang/test/Semantics/OpenMP/deprecation.f90 @@ -1,4 +1,4 @@ -! RUN: %python %S/../test_errors.py %s %flang_fc1 -fopenmp -Werror +! RUN: %python %S/../test_errors.py %s %flang_fc1 -fopenmp -Werror -fopenmp-version=52 ! Check for deprecation of master directive and its combined/composite variants diff --git a/flang/test/Semantics/OpenMP/flush02.f90 b/flang/test/Semantics/OpenMP/flush02.f90 index 615332c6cf31..a7b170d58db5 100644 --- a/flang/test/Semantics/OpenMP/flush02.f90 +++ b/flang/test/Semantics/OpenMP/flush02.f90 @@ -78,7 +78,6 @@ use omp_lib !$omp parallel num_threads(4) array = (/1, 2, 3, 4, 5, 6, 7, 8, 9, 10/) - !WARNING: OpenMP directive MASTER has been deprecated, please use MASKED instead. !$omp master !$omp flush (array) !$omp end master diff --git a/flang/test/Semantics/OpenMP/nested-barrier.f90 b/flang/test/Semantics/OpenMP/nested-barrier.f90 index 5f51363d59e5..8565a09a18cd 100644 --- a/flang/test/Semantics/OpenMP/nested-barrier.f90 +++ b/flang/test/Semantics/OpenMP/nested-barrier.f90 @@ -75,7 +75,6 @@ program omp_nest_barrier end do !$omp end critical - !WARNING: OpenMP directive MASTER has been deprecated, please use MASKED instead. !$omp master do i = 1, 10 k = k + 1 @@ -108,7 +107,6 @@ program omp_nest_barrier end do !$omp end ordered - !WARNING: OpenMP directive MASTER has been deprecated, please use MASKED instead. !$omp master do i = 1, 10 !ERROR: `DISTRIBUTE` region has to be strictly nested inside `TEAMS` region. diff --git a/flang/test/Semantics/OpenMP/nested-master.f90 b/flang/test/Semantics/OpenMP/nested-master.f90 index d51e366eb584..7e4bb32bb7be 100644 --- a/flang/test/Semantics/OpenMP/nested-master.f90 +++ b/flang/test/Semantics/OpenMP/nested-master.f90 @@ -9,7 +9,6 @@ program omp_nest_master !$omp do do i = 1, 10 k = k + 1 - !WARNING: OpenMP directive MASTER has been deprecated, please use MASKED instead. !ERROR: `MASTER` region may not be closely nested inside of `WORKSHARING`, `LOOP`, `TASK`, `TASKLOOP`, or `ATOMIC` region. !$omp master j = j -1 @@ -17,7 +16,6 @@ program omp_nest_master end do !$omp sections - !WARNING: OpenMP directive MASTER has been deprecated, please use MASKED instead. !ERROR: `MASTER` region may not be closely nested inside of `WORKSHARING`, `LOOP`, `TASK`, `TASKLOOP`, or `ATOMIC` region. !$omp master do i = 1, 10 @@ -27,7 +25,6 @@ program omp_nest_master !$omp end sections !$omp single - !WARNING: OpenMP directive MASTER has been deprecated, please use MASKED instead. !ERROR: `MASTER` region may not be closely nested inside of `WORKSHARING`, `LOOP`, `TASK`, `TASKLOOP`, or `ATOMIC` region. !$omp master do i = 1, 10 @@ -41,7 +38,6 @@ program omp_nest_master !$omp task do i = 1, 10 k = k + 1 - !WARNING: OpenMP directive MASTER has been deprecated, please use MASKED instead. !ERROR: `MASTER` region may not be closely nested inside of `WORKSHARING`, `LOOP`, `TASK`, `TASKLOOP`, or `ATOMIC` region. !$omp master j = j -1 @@ -52,7 +48,6 @@ program omp_nest_master !$omp taskloop do i = 1, 10 k = k + 1 - !WARNING: OpenMP directive MASTER has been deprecated, please use MASKED instead. !ERROR: `MASTER` region may not be closely nested inside of `WORKSHARING`, `LOOP`, `TASK`, `TASKLOOP`, or `ATOMIC` region. !$omp master j = j -1 @@ -63,7 +58,6 @@ program omp_nest_master !$omp target parallel do simd do i = 1, 10 k = k + 1 - !WARNING: OpenMP directive MASTER has been deprecated, please use MASKED instead. !ERROR: The only OpenMP constructs that can be encountered during execution of a 'SIMD' region are the `ATOMIC` construct, the `LOOP` construct, the `SIMD` construct, the `SCAN` construct and the `ORDERED` construct with the `SIMD` clause. !ERROR: `MASTER` region may not be closely nested inside of `WORKSHARING`, `LOOP`, `TASK`, `TASKLOOP`, or `ATOMIC` region. !$omp master @@ -75,7 +69,6 @@ program omp_nest_master !$omp critical do i = 1, 10 k = k + 1 - !WARNING: OpenMP directive MASTER has been deprecated, please use MASKED instead. !$omp master j = j -1 !$omp end master @@ -85,7 +78,6 @@ program omp_nest_master !$omp ordered do i = 1, 10 k = k + 1 - !WARNING: OpenMP directive MASTER has been deprecated, please use MASKED instead. !$omp master j = j -1 !$omp end master @@ -99,7 +91,6 @@ program omp_nest_master !$omp distribute do k =1, 10 print *, "hello" - !WARNING: OpenMP directive MASTER has been deprecated, please use MASKED instead. !$omp master j = j -1 !$omp end master @@ -116,7 +107,6 @@ program omp_nest_master !$omp distribute do k =1, 10 print *, "hello" - !WARNING: OpenMP directive MASTER has been deprecated, please use MASKED instead. !$omp master j = j -1 !$omp end master @@ -133,7 +123,6 @@ program omp_nest_master !$omp distribute do k =1, 10 print *, "hello" - !WARNING: OpenMP directive MASTER has been deprecated, please use MASKED instead. !ERROR: `MASTER` region may not be closely nested inside of `WORKSHARING`, `LOOP`, `TASK`, `TASKLOOP`, or `ATOMIC` region. !$omp master j = j -1 @@ -151,7 +140,6 @@ program omp_nest_master !$omp distribute do k =1, 10 print *, "hello" - !WARNING: OpenMP directive MASTER has been deprecated, please use MASKED instead. !ERROR: `MASTER` region may not be closely nested inside of `WORKSHARING`, `LOOP`, `TASK`, `TASKLOOP`, or `ATOMIC` region. !$omp master j = j -1 diff --git a/flang/test/Semantics/OpenMP/nested-teams.f90 b/flang/test/Semantics/OpenMP/nested-teams.f90 index 974172ee9717..3c193ee00b95 100644 --- a/flang/test/Semantics/OpenMP/nested-teams.f90 +++ b/flang/test/Semantics/OpenMP/nested-teams.f90 @@ -42,7 +42,6 @@ program main !$omp end teams end do - !WARNING: OpenMP directive MASTER has been deprecated, please use MASKED instead. !$omp master !ERROR: TEAMS region can only be strictly nested within the implicit parallel region or TARGET region !$omp teams diff --git a/flang/test/Semantics/OpenMP/ordered-simd.f90 b/flang/test/Semantics/OpenMP/ordered-simd.f90 index c90ffb3bd1c5..50560139ea24 100644 --- a/flang/test/Semantics/OpenMP/ordered-simd.f90 +++ b/flang/test/Semantics/OpenMP/ordered-simd.f90 @@ -95,7 +95,6 @@ SUBROUTINE ORDERED_BAD(N) !$OMP CRITICAL C = C - A * B - !WARNING: OpenMP directive MASTER has been deprecated, please use MASKED instead. !$OMP MASTER DO I = 1,N !ERROR: `ORDERED` region may not be closely nested inside of `CRITICAL`, `ORDERED`, explicit `TASK` or `TASKLOOP` region. @@ -108,7 +107,6 @@ SUBROUTINE ORDERED_BAD(N) !$OMP ORDERED C = C - A * B - !WARNING: OpenMP directive MASTER has been deprecated, please use MASKED instead. !$OMP MASTER DO I = 1,N !ERROR: `ORDERED` region may not be closely nested inside of `CRITICAL`, `ORDERED`, explicit `TASK` or `TASKLOOP` region. @@ -121,7 +119,6 @@ SUBROUTINE ORDERED_BAD(N) !$OMP TASK C = C - A * B - !WARNING: OpenMP directive MASTER has been deprecated, please use MASKED instead. !ERROR: `MASTER` region may not be closely nested inside of `WORKSHARING`, `LOOP`, `TASK`, `TASKLOOP`, or `ATOMIC` region. !$OMP MASTER DO I = 1,N @@ -136,7 +133,6 @@ SUBROUTINE ORDERED_BAD(N) !$OMP TASKLOOP DO J= 1,N C = C - A * B - !WARNING: OpenMP directive MASTER has been deprecated, please use MASKED instead. !ERROR: `MASTER` region may not be closely nested inside of `WORKSHARING`, `LOOP`, `TASK`, `TASKLOOP`, or `ATOMIC` region. !$OMP MASTER DO I = 1,N diff --git a/flang/test/Semantics/OpenMP/parallel-master-goto.f90 b/flang/test/Semantics/OpenMP/parallel-master-goto.f90 index 72c8002ab4c5..01d14aaa46d3 100644 --- a/flang/test/Semantics/OpenMP/parallel-master-goto.f90 +++ b/flang/test/Semantics/OpenMP/parallel-master-goto.f90 @@ -7,7 +7,6 @@ do i = 1, 2 !ERROR: invalid branch leaving an OpenMP structured block goto 10 end do -!WARNING: OpenMP directive MASTER has been deprecated, please use MASKED instead. !$omp master 10 print *, i !$omp end master -- cgit v1.2.3 From fda6b751f1b1356e65816f85fbc5b98e78337940 Mon Sep 17 00:00:00 2001 From: Eric Fiselier Date: Wed, 18 Jun 2025 09:30:18 -0400 Subject: Fix libc++ restarter job. A while ago, the test workflow was updated with a new preemption regex, however it was only applied to the test job, and not the job that's actually restarting the failed libc++ test runs. This fix should correct the issue and get the restarter working again. --- .github/workflows/libcxx-restart-preempted-jobs.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/libcxx-restart-preempted-jobs.yaml b/.github/workflows/libcxx-restart-preempted-jobs.yaml index 7b341d7f22e4..9706f0459922 100644 --- a/.github/workflows/libcxx-restart-preempted-jobs.yaml +++ b/.github/workflows/libcxx-restart-preempted-jobs.yaml @@ -33,7 +33,7 @@ jobs: with: script: | const failure_regex = /Process completed with exit code 1./ - const preemption_regex = /The runner has received a shutdown signal/ + const preemption_regex = /(The runner has received a shutdown signal)|(The operation was canceled)/ const wf_run = context.payload.workflow_run core.notice(`Running on "${wf_run.display_title}" by @${wf_run.actor.login} (event: ${wf_run.event})\nWorkflow run URL: ${wf_run.html_url}`) -- cgit v1.2.3 From bdac9580f3bc341ccbeeb743ecca656756f5aaec Mon Sep 17 00:00:00 2001 From: Mircea Trofin Date: Wed, 18 Jun 2025 06:40:06 -0700 Subject: [nfc][jt] Drop `std::optional` pointers (#144548) The `std::optional` didn't add any semantics that couldn't be modeled with the pointers being `nullptr`. --- llvm/include/llvm/Transforms/Scalar/JumpThreading.h | 7 +++---- llvm/lib/Transforms/Scalar/JumpThreading.cpp | 14 +++++++------- 2 files changed, 10 insertions(+), 11 deletions(-) diff --git a/llvm/include/llvm/Transforms/Scalar/JumpThreading.h b/llvm/include/llvm/Transforms/Scalar/JumpThreading.h index 75b5cf2371fd..a03a38466b27 100644 --- a/llvm/include/llvm/Transforms/Scalar/JumpThreading.h +++ b/llvm/include/llvm/Transforms/Scalar/JumpThreading.h @@ -85,8 +85,8 @@ class JumpThreadingPass : public PassInfoMixin { LazyValueInfo *LVI = nullptr; AAResults *AA = nullptr; std::unique_ptr DTU; - std::optional BFI; - std::optional BPI; + BlockFrequencyInfo *BFI = nullptr; + BranchProbabilityInfo *BPI = nullptr; bool ChangedSinceLastAnalysisUpdate = false; bool HasGuards = false; #ifndef LLVM_ENABLE_ABI_BREAKING_CHECKS @@ -110,8 +110,7 @@ public: TargetLibraryInfo *TLI, TargetTransformInfo *TTI, LazyValueInfo *LVI, AAResults *AA, std::unique_ptr DTU, - std::optional BFI, - std::optional BPI); + BlockFrequencyInfo *BFI, BranchProbabilityInfo *BPI); LLVM_ABI PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); diff --git a/llvm/lib/Transforms/Scalar/JumpThreading.cpp b/llvm/lib/Transforms/Scalar/JumpThreading.cpp index 37b85bf9de81..b5dbef13289a 100644 --- a/llvm/lib/Transforms/Scalar/JumpThreading.cpp +++ b/llvm/lib/Transforms/Scalar/JumpThreading.cpp @@ -249,7 +249,7 @@ PreservedAnalyses JumpThreadingPass::run(Function &F, runImpl(F, &AM, &TLI, &TTI, &LVI, &AA, std::make_unique( &DT, nullptr, DomTreeUpdater::UpdateStrategy::Lazy), - std::nullopt, std::nullopt); + nullptr, nullptr); if (!Changed) return PreservedAnalyses::all(); @@ -283,8 +283,8 @@ bool JumpThreadingPass::runImpl(Function &F_, FunctionAnalysisManager *FAM_, TargetTransformInfo *TTI_, LazyValueInfo *LVI_, AliasAnalysis *AA_, std::unique_ptr DTU_, - std::optional BFI_, - std::optional BPI_) { + BlockFrequencyInfo *BFI_, + BranchProbabilityInfo *BPI_) { LLVM_DEBUG(dbgs() << "Jump threading on function '" << F_.getName() << "'\n"); F = &F_; FAM = FAM_; @@ -3215,7 +3215,7 @@ BranchProbabilityInfo *JumpThreadingPass::getBPI() { assert(FAM && "Can't create BPI without FunctionAnalysisManager"); BPI = FAM->getCachedResult(*F); } - return *BPI; + return BPI; } BlockFrequencyInfo *JumpThreadingPass::getBFI() { @@ -3223,7 +3223,7 @@ BlockFrequencyInfo *JumpThreadingPass::getBFI() { assert(FAM && "Can't create BFI without FunctionAnalysisManager"); BFI = FAM->getCachedResult(*F); } - return *BFI; + return BFI; } // Important note on validity of BPI/BFI. JumpThreading tries to preserve @@ -3237,7 +3237,7 @@ BranchProbabilityInfo *JumpThreadingPass::getOrCreateBPI(bool Force) { if (Force) BPI = runExternalAnalysis(); - return *BPI; + return BPI; } BlockFrequencyInfo *JumpThreadingPass::getOrCreateBFI(bool Force) { @@ -3248,5 +3248,5 @@ BlockFrequencyInfo *JumpThreadingPass::getOrCreateBFI(bool Force) { if (Force) BFI = runExternalAnalysis(); - return *BFI; + return BFI; } -- cgit v1.2.3 From c5613dc8635000bc0e8396b8156d5639195776ab Mon Sep 17 00:00:00 2001 From: lorenzo chelini Date: Wed, 18 Jun 2025 15:49:00 +0200 Subject: [MLIR] Mark LLVM::FMAOp as legal (#144671) Mark LLVM::FMAOp as legal in configureGpuToNVVMConversionLegality, since we can handle intrinsic lowering in the NVPTX backend and emit fma.rn.f32. --- mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp | 8 ++++---- mlir/test/Conversion/GPUToNVVM/gpu-to-nvvm.mlir | 2 +- mlir/test/Integration/GPU/CUDA/dump-ptx.mlir | 14 +++++++++++++- 3 files changed, 18 insertions(+), 6 deletions(-) diff --git a/mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp b/mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp index 958d0d085fce..cef250232daf 100644 --- a/mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp +++ b/mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp @@ -429,10 +429,10 @@ void mlir::configureGpuToNVVMConversionLegality(ConversionTarget &target) { target.addLegalDialect<::mlir::NVVM::NVVMDialect>(); target.addIllegalDialect(); target.addIllegalOp(); + LLVM::FAbsOp, LLVM::FCeilOp, LLVM::FFloorOp, LLVM::FRemOp, + LLVM::LogOp, LLVM::Log10Op, LLVM::Log2Op, LLVM::PowOp, + LLVM::RoundEvenOp, LLVM::RoundOp, LLVM::SinOp, + LLVM::SqrtOp>(); // TODO: Remove once we support replacing non-root ops. target.addLegalOp(); diff --git a/mlir/test/Conversion/GPUToNVVM/gpu-to-nvvm.mlir b/mlir/test/Conversion/GPUToNVVM/gpu-to-nvvm.mlir index 6d4555e815b6..ef06af3ad316 100644 --- a/mlir/test/Conversion/GPUToNVVM/gpu-to-nvvm.mlir +++ b/mlir/test/Conversion/GPUToNVVM/gpu-to-nvvm.mlir @@ -1027,7 +1027,7 @@ module attributes {transform.with_named_sequence} { legal_ops = ["func.func", "gpu.module", "gpu.yield"], illegal_dialects = ["gpu"], illegal_ops = ["llvm.copysign", "llvm.cos", "llvm.exp", "llvm.exp2", "llvm.fabs", "llvm.fceil", - "llvm.ffloor", "llvm.fma", "llvm.frem", "llvm.log", "llvm.log10", "llvm.log2", "llvm.pow", + "llvm.ffloor", "llvm.frem", "llvm.log", "llvm.log10", "llvm.log2", "llvm.pow", "llvm.roundeven", "llvm.round", "llvm.sin", "llvm.sqrt"], partial_conversion } : !transform.any_op diff --git a/mlir/test/Integration/GPU/CUDA/dump-ptx.mlir b/mlir/test/Integration/GPU/CUDA/dump-ptx.mlir index 0cc5d8645bb3..27ec1ec435fe 100644 --- a/mlir/test/Integration/GPU/CUDA/dump-ptx.mlir +++ b/mlir/test/Integration/GPU/CUDA/dump-ptx.mlir @@ -2,7 +2,7 @@ // RUN: | mlir-opt -gpu-lower-to-nvvm-pipeline -debug-only=serialize-to-isa \ // RUN: 2>&1 | FileCheck %s -// CHECK: Generated by LLVM NVPTX Back-End +// CHECK-LABEL: Generated by LLVM NVPTX Back-End // CHECK: .visible .func kernel_a() // CHECK: ret; gpu.module @bar { @@ -11,3 +11,15 @@ gpu.module @bar { llvm.return } } + +// CHECK-LABEL: Generated by LLVM NVPTX Back-End +// CHECK: .visible .func ({{.+}}) fma( +// CHECK: fma.rn.f32 + +gpu.module @foo { + llvm.func @fma(%arg0: f32, %arg1: f32) -> f32 + attributes { gpu.kernel } { + %res = llvm.intr.fma (%arg0, %arg1, %arg1) : (f32, f32, f32) -> f32 + llvm.return %res : f32 + } +} -- cgit v1.2.3 From 1d6f1029f7e8cf5468309078da3e85201844b625 Mon Sep 17 00:00:00 2001 From: Sergei Lebedev <185856+superbobry@users.noreply.github.com> Date: Wed, 18 Jun 2025 14:53:20 +0100 Subject: [mlir] [python] Fixed the return type of `MemRefType.get_strides_and_offset` (#144523) Previously, the return type for `offset` was `list[int]`, which clearly is not right. --- mlir/python/mlir/_mlir_libs/_mlir/ir.pyi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mlir/python/mlir/_mlir_libs/_mlir/ir.pyi b/mlir/python/mlir/_mlir_libs/_mlir/ir.pyi index 6c5f91d757cd..70bca3c75d84 100644 --- a/mlir/python/mlir/_mlir_libs/_mlir/ir.pyi +++ b/mlir/python/mlir/_mlir_libs/_mlir/ir.pyi @@ -2119,7 +2119,7 @@ class MemRefType(ShapedType): """ @property def typeid(self) -> TypeID: ... - def get_strides_and_offset(self) -> tuple[list[int], list[int]]: + def get_strides_and_offset(self) -> tuple[list[int], int]: """ The strides and offset of the MemRef type. """ -- cgit v1.2.3 From 9db7502d229b48817521429c2a5d3fb84543fdf9 Mon Sep 17 00:00:00 2001 From: Nikolas Klauser Date: Wed, 18 Jun 2025 15:55:06 +0200 Subject: [libc++] Move __has_iterator_typedefs to the up-to-C++17 implementation of iterator_traits (#144265) `__has_iterator_typedefs` is only used in the up-to-C++17 implementation of `type_traits`. To make that clearer the struct is moved into that code block. --- libcxx/include/__iterator/iterator_traits.h | 34 ++++++++++++++--------------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/libcxx/include/__iterator/iterator_traits.h b/libcxx/include/__iterator/iterator_traits.h index 221d36614db0..f727e8ff36df 100644 --- a/libcxx/include/__iterator/iterator_traits.h +++ b/libcxx/include/__iterator/iterator_traits.h @@ -71,23 +71,6 @@ struct random_access_iterator_tag : public bidirectional_iterator_tag {}; struct contiguous_iterator_tag : public random_access_iterator_tag {}; #endif -template -struct __has_iterator_typedefs { -private: - template - static false_type __test(...); - template - static true_type - __test(__void_t* = nullptr, - __void_t* = nullptr, - __void_t* = nullptr, - __void_t* = nullptr, - __void_t* = nullptr); - -public: - static const bool value = decltype(__test<_Tp>(nullptr, nullptr, nullptr, nullptr, nullptr))::value; -}; - #if _LIBCPP_STD_VER >= 20 // The `cpp17-*-iterator` exposition-only concepts have very similar names to the `Cpp17*Iterator` named requirements @@ -322,6 +305,23 @@ struct __iterator_traits<_Iter, true> is_convertible::value || is_convertible::value > {}; +template +struct __has_iterator_typedefs { +private: + template + static false_type __test(...); + template + static true_type + __test(__void_t* = nullptr, + __void_t* = nullptr, + __void_t* = nullptr, + __void_t* = nullptr, + __void_t* = nullptr); + +public: + static const bool value = decltype(__test<_Tp>(nullptr, nullptr, nullptr, nullptr, nullptr))::value; +}; + // iterator_traits will only have the nested types if Iterator::iterator_category // exists. Else iterator_traits will be an empty class. This is a // conforming extension which allows some programs to compile and behave as -- cgit v1.2.3 From 40d2f392106f43a60eea79f433b47a5ce44fc4a4 Mon Sep 17 00:00:00 2001 From: Akira Hatanaka Date: Wed, 18 Jun 2025 07:08:32 -0700 Subject: [Sema][ObjC] Loosen restrictions on reinterpret_cast involving indirect ARC-managed pointers (#144458) Allow using reinterpret_cast for conversions between indirect ARC pointers and other pointer types. rdar://152905399 --- clang/docs/ReleaseNotes.rst | 3 +++ clang/include/clang/Sema/SemaObjC.h | 3 ++- clang/lib/Sema/SemaCast.cpp | 11 +++++++---- clang/lib/Sema/SemaExprObjC.cpp | 12 ++++++++---- clang/test/SemaObjCXX/arc-type-conversion.mm | 23 +++++++++++++++++++++-- 5 files changed, 41 insertions(+), 11 deletions(-) diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index 12816eed2e8b..18234188101f 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -327,6 +327,9 @@ Non-comprehensive list of changes in this release ``__reference_constructs_from_temporary`` should be used instead. (#GH44056) - Added `__builtin_get_vtable_pointer` to directly load the primary vtable pointer from a polymorphic object. +- Clang no longer rejects reinterpret_cast conversions between indirect + ARC-managed pointers and other pointer types. The prior behavior was overly + strict and inconsistent with the ARC specification. New Compiler Flags ------------------ diff --git a/clang/include/clang/Sema/SemaObjC.h b/clang/include/clang/Sema/SemaObjC.h index b629c6d29140..ed08ff0acf89 100644 --- a/clang/include/clang/Sema/SemaObjC.h +++ b/clang/include/clang/Sema/SemaObjC.h @@ -812,7 +812,8 @@ public: CheckedConversionKind CCK, bool Diagnose = true, bool DiagnoseCFAudited = false, - BinaryOperatorKind Opc = BO_PtrMemD); + BinaryOperatorKind Opc = BO_PtrMemD, + bool IsReinterpretCast = false); Expr *stripARCUnbridgedCast(Expr *e); void diagnoseARCUnbridgedCast(Expr *e); diff --git a/clang/lib/Sema/SemaCast.cpp b/clang/lib/Sema/SemaCast.cpp index 14e16bc39eb3..e15a43c11651 100644 --- a/clang/lib/Sema/SemaCast.cpp +++ b/clang/lib/Sema/SemaCast.cpp @@ -161,12 +161,14 @@ namespace { Self.CheckCastAlign(SrcExpr.get(), DestType, OpRange); } - void checkObjCConversion(CheckedConversionKind CCK) { + void checkObjCConversion(CheckedConversionKind CCK, + bool IsReinterpretCast = false) { assert(Self.getLangOpts().allowsNonTrivialObjCLifetimeQualifiers()); Expr *src = SrcExpr.get(); - if (Self.ObjC().CheckObjCConversion(OpRange, DestType, src, CCK) == - SemaObjC::ACR_unbridged) + if (Self.ObjC().CheckObjCConversion( + OpRange, DestType, src, CCK, true, false, BO_PtrMemD, + IsReinterpretCast) == SemaObjC::ACR_unbridged) IsARCUnbridgedCast = true; SrcExpr = src; } @@ -1263,7 +1265,8 @@ void CastOperation::CheckReinterpretCast() { if (isValidCast(tcr)) { if (Self.getLangOpts().allowsNonTrivialObjCLifetimeQualifiers()) - checkObjCConversion(CheckedConversionKind::OtherCast); + checkObjCConversion(CheckedConversionKind::OtherCast, + /*IsReinterpretCast=*/true); DiagnoseReinterpretUpDownCast(Self, SrcExpr.get(), DestType, OpRange); if (unsigned DiagID = checkCastFunctionType(Self, SrcExpr, DestType)) diff --git a/clang/lib/Sema/SemaExprObjC.cpp b/clang/lib/Sema/SemaExprObjC.cpp index 3505d9f38d23..395f2f340dbd 100644 --- a/clang/lib/Sema/SemaExprObjC.cpp +++ b/clang/lib/Sema/SemaExprObjC.cpp @@ -4390,7 +4390,7 @@ SemaObjC::ARCConversionResult SemaObjC::CheckObjCConversion(SourceRange castRange, QualType castType, Expr *&castExpr, CheckedConversionKind CCK, bool Diagnose, bool DiagnoseCFAudited, - BinaryOperatorKind Opc) { + BinaryOperatorKind Opc, bool IsReinterpretCast) { ASTContext &Context = getASTContext(); QualType castExprType = castExpr->getType(); @@ -4450,13 +4450,17 @@ SemaObjC::CheckObjCConversion(SourceRange castRange, QualType castType, // must be explicit. // Allow conversions between pointers to lifetime types and coreFoundation // pointers too, but only when the conversions are explicit. + // Allow conversions requested with a reinterpret_cast that converts an + // expression of type T* to type U*. if (exprACTC == ACTC_indirectRetainable && (castACTC == ACTC_voidPtr || - (castACTC == ACTC_coreFoundation && SemaRef.isCast(CCK)))) + (castACTC == ACTC_coreFoundation && SemaRef.isCast(CCK)) || + (IsReinterpretCast && effCastType->isAnyPointerType()))) return ACR_okay; if (castACTC == ACTC_indirectRetainable && - (exprACTC == ACTC_voidPtr || exprACTC == ACTC_coreFoundation) && - SemaRef.isCast(CCK)) + (((exprACTC == ACTC_voidPtr || exprACTC == ACTC_coreFoundation) && + SemaRef.isCast(CCK)) || + (IsReinterpretCast && castExprType->isAnyPointerType()))) return ACR_okay; switch (ARCCastChecker(Context, exprACTC, castACTC, false).Visit(castExpr)) { diff --git a/clang/test/SemaObjCXX/arc-type-conversion.mm b/clang/test/SemaObjCXX/arc-type-conversion.mm index 64cfd02ec18c..0d281bf3e5c4 100644 --- a/clang/test/SemaObjCXX/arc-type-conversion.mm +++ b/clang/test/SemaObjCXX/arc-type-conversion.mm @@ -1,5 +1,8 @@ // RUN: %clang_cc1 -fobjc-runtime-has-weak -fsyntax-only -fobjc-arc -verify -fblocks %s +@class NSString; +typedef unsigned __INTPTR_TYPE__ uintptr_t; + void * cvt(id arg) // expected-note{{candidate function not viable: cannot convert argument of incomplete type 'void *' to '__strong id'}} { void* voidp_val; @@ -72,6 +75,24 @@ void test_reinterpret_cast(__strong id *sip, __weak id *wip, (void)reinterpret_cast<__weak id *>(cwip); // expected-error{{reinterpret_cast from '__weak id const *' to '__weak id *' casts away qualifiers}} (void)reinterpret_cast<__weak id *>(csip); // expected-error{{reinterpret_cast from '__strong id const *' to '__weak id *' casts away qualifiers}} (void)reinterpret_cast<__strong id *>(cwip); // expected-error{{reinterpret_cast from '__weak id const *' to '__strong id *' casts away qualifiers}} + + auto *ul = reinterpret_cast(sip); + (void)reinterpret_cast<__strong id *>(ul); + auto *wp = reinterpret_cast<__weak NSString *>(sip); + (void)reinterpret_cast<__strong id *>(wp); + (void)reinterpret_cast(csip); // expected-error {{reinterpret_cast from '__strong id const *' to 'unsigned long *' casts away qualifiers}} + (void)reinterpret_cast(csip); + const unsigned long *cul = nullptr; + (void)reinterpret_cast<__strong id *>(cul); // expected-error {{reinterpret_cast from 'const unsigned long *' to '__strong id *' casts away qualifiers}} + (void)reinterpret_cast(cul); + volatile __strong id *vsip = nullptr; + (void)reinterpret_cast(vsip); // expected-error {{reinterpret_cast from '__strong id volatile *' to 'unsigned long *' casts away qualifiers}} + (void)reinterpret_cast(vsip); + volatile unsigned long *vul = nullptr; + (void)reinterpret_cast<__strong id *>(vul); // expected-error {{reinterpret_cast from 'volatile unsigned long *' to '__strong id *' casts away qualifiers}} + (void)reinterpret_cast(vul); + auto uip = reinterpret_cast(sip); + (void)reinterpret_cast<__strong id *>(uip); // expected-error {{to '__strong id *' is disallowed with ARC}} } void test_cstyle_cast(__strong id *sip, __weak id *wip, @@ -194,8 +215,6 @@ typedef void (^Block)(); typedef void (^Block_strong)() __strong; typedef void (^Block_autoreleasing)() __autoreleasing; -@class NSString; - void ownership_transfer_in_cast(void *vp, Block *pblk) { __strong NSString **sip2 = static_cast(static_cast<__strong id *>(vp)); __strong NSString **&si2pref = static_cast(sip2); -- cgit v1.2.3 From ee070d08163ac09842d9bf0c1315f311df39faf1 Mon Sep 17 00:00:00 2001 From: Andrei Golubev Date: Wed, 18 Jun 2025 16:18:12 +0200 Subject: [mlir][bufferization] Support custom types (1/N) (#142986) Following the addition of TensorLike and BufferLike type interfaces (see 00eaff3e9c897c263a879416d0f151d7ca7eeaff), introduce minimal changes required to bufferize a custom tensor operation into a custom buffer operation. To achieve this, new interface methods are added to TensorLike type interface that abstract away the differences between existing (tensor -> memref) and custom conversions. The scope of the changes is intentionally limited (for example, BufferizableOpInterface is untouched) in order to first understand the basics and reach consensus design-wise. --- Notable changes: * mlir::bufferization::getBufferType() returns BufferLikeType (instead of BaseMemRefType) * ToTensorOp / ToBufferOp operate on TensorLikeType / BufferLikeType. Operation argument "memref" renamed to "buffer" * ToTensorOp's tensor type inferring builder is dropped (users now need to provide the tensor type explicitly) --- .../Bufferization/IR/BufferizableOpInterface.h | 18 +++++- .../Dialect/Bufferization/IR/BufferizationOps.td | 59 ++++++++++---------- .../Bufferization/IR/BufferizationTypeInterfaces.h | 7 +++ .../IR/BufferizationTypeInterfaces.td | 26 ++++++++- .../Bufferization/IR/UnstructuredControlFlow.h | 5 +- .../Transforms/BufferizableOpInterfaceImpl.cpp | 14 +++-- .../Bufferization/IR/BufferizableOpInterface.cpp | 65 +++++++++++++--------- .../Bufferization/IR/BufferizationDialect.cpp | 32 ++++++++++- .../Dialect/Bufferization/IR/BufferizationOps.cpp | 26 +++++---- .../IR/BufferizationTypeInterfaces.cpp | 21 +++++++ mlir/lib/Dialect/Bufferization/IR/CMakeLists.txt | 1 + .../Dialect/Bufferization/Transforms/Bufferize.cpp | 8 +-- .../Transforms/FuncBufferizableOpInterfaceImpl.cpp | 8 +-- .../Transforms/ConvertToDestinationStyle.cpp | 9 ++- .../SCF/Transforms/BufferizableOpInterfaceImpl.cpp | 51 +++++++++-------- .../Transforms/BufferizableOpInterfaceImpl.cpp | 2 +- .../SparseTensor/Transforms/SparseGPUCodegen.cpp | 15 +++-- .../Transforms/SparseTensorCodegen.cpp | 3 +- .../Transforms/SparseTensorConversion.cpp | 4 +- .../SparseTensor/Transforms/Utils/CodegenUtils.cpp | 4 +- .../Transforms/BufferizableOpInterfaceImpl.cpp | 14 +++-- .../Transforms/one-shot-bufferize.mlir | 21 ++++++- mlir/test/lib/Dialect/Test/TestOpDefs.cpp | 23 ++++++++ mlir/test/lib/Dialect/Test/TestOps.h | 1 + mlir/test/lib/Dialect/Test/TestOps.td | 58 ++++++++++++++++++- mlir/test/lib/Dialect/Test/TestTypeDefs.td | 9 +++ mlir/test/lib/Dialect/Test/TestTypes.cpp | 20 +++++++ 27 files changed, 389 insertions(+), 135 deletions(-) create mode 100644 mlir/lib/Dialect/Bufferization/IR/BufferizationTypeInterfaces.cpp diff --git a/mlir/include/mlir/Dialect/Bufferization/IR/BufferizableOpInterface.h b/mlir/include/mlir/Dialect/Bufferization/IR/BufferizableOpInterface.h index 2fb795f16ae2..c1529a36465a 100644 --- a/mlir/include/mlir/Dialect/Bufferization/IR/BufferizableOpInterface.h +++ b/mlir/include/mlir/Dialect/Bufferization/IR/BufferizableOpInterface.h @@ -17,6 +17,7 @@ #include #include "mlir/Dialect/Bufferization/IR/BufferizationEnums.h.inc" +#include "mlir/Dialect/Bufferization/IR/BufferizationTypeInterfaces.h" namespace mlir { class OpBuilder; @@ -615,7 +616,7 @@ FailureOr getBuffer(RewriterBase &rewriter, Value value, /// IR, this function can be used. /// /// This function is a wrapper around BufferizableOpInterface::getBufferType. -FailureOr getBufferType(Value value, +FailureOr getBufferType(Value value, const BufferizationOptions &options, const BufferizationState &state); @@ -629,7 +630,7 @@ FailureOr getBufferType(Value value, /// IR, this function can be used. /// /// This function is a wrapper around `BufferizableOpInterface::getBufferType`. -FailureOr getBufferType(Value value, +FailureOr getBufferType(Value value, const BufferizationOptions &options, const BufferizationState &state, SmallVector &invocationStack); @@ -739,6 +740,19 @@ AliasingValueList unknownGetAliasingValues(OpOperand &opOperand); /// This is the default implementation of /// BufferizableOpInterface::hasTensorSemantics bool defaultHasTensorSemantics(Operation *op); + +/// This is a helper function used when buffer type is guaranteed to be memref. +/// It performs two actions: failure state checking and an explicit llvm::cast<> +/// from the buffer-like type interface to a BaseMemRefType. This allows easier +/// management of differences in C++ types at the API boundaries. Valid buffer +/// type is casted to the memref type. Otherwise, the failure state is +/// propagated i.e. asMemRefType(mlir::failure()) returns mlir::failure(). +FailureOr asMemRefType(FailureOr bufferType); + +/// This function is a free-standing helper that relies on +/// bufferization::TensorLikeTypeInterface to verify the types in tensor and +/// buffer worlds match. +bool typesMatchAfterBufferization(Operation &op, Value tensor, Value buffer); } // namespace detail } // namespace bufferization diff --git a/mlir/include/mlir/Dialect/Bufferization/IR/BufferizationOps.td b/mlir/include/mlir/Dialect/Bufferization/IR/BufferizationOps.td index 6051aea84997..32c53ea9c494 100644 --- a/mlir/include/mlir/Dialect/Bufferization/IR/BufferizationOps.td +++ b/mlir/include/mlir/Dialect/Bufferization/IR/BufferizationOps.td @@ -12,6 +12,7 @@ include "mlir/Dialect/Bufferization/IR/AllocationOpInterface.td" include "mlir/Dialect/Bufferization/IR/BufferViewFlowOpInterface.td" include "mlir/Dialect/Bufferization/IR/BufferizableOpInterface.td" +include "mlir/Dialect/Bufferization/IR/BufferizationTypeInterfaces.td" include "mlir/Dialect/Bufferization/IR/BufferizationBase.td" include "mlir/Interfaces/DestinationStyleOpInterface.td" include "mlir/Interfaces/InferTypeOpInterface.td" @@ -386,20 +387,31 @@ def Bufferization_DeallocTensorOp : Bufferization_Op<"dealloc_tensor", // ToTensorOp //===----------------------------------------------------------------------===// +class Bufferization_TensorAndBufferMatch : PredOpTrait< + "specified tensor and buffer types match", + CPred< + "::mlir::bufferization::detail::typesMatchAfterBufferization(" + "$_op, $" # tensor # ", $" # buffer #")" + > +>; + def Bufferization_ToTensorOp : Bufferization_Op<"to_tensor", [ BufferizableOpInterface, SameOperandsAndResultShape, SameOperandsAndResultElementType, - AllElementTypesMatch<["memref", "result"]> + Bufferization_TensorAndBufferMatch<"result", "buffer"> ]> { - let summary = "create a tensor from a `memref`"; + let summary = "create a buffer-like type from a tensor-like type"; let description = [{ - An operation that creates a tensor from a `memref`. The result value is a - tensor whose shape and element type match the memref operand. + An operation that creates a tensor from a buffer. The result value is a + tensor-like type that must match the corresponding buffer-like operand as + per TensorLikeType::verifyCompatibleBufferType(). For builtins (TensorType + and BaseMemRefType), this means that shapes and element types match between + the tensor and the buffer. The opposite of this op is `to_buffer`. Together, these two ops are useful for source/target materializations when doing type conversions - involving tensors and memrefs. + involving tensors and buffers. Example: @@ -441,19 +453,16 @@ def Bufferization_ToTensorOp : Bufferization_Op<"to_tensor", [ away. However, such IR is no longer bufferizable with One-Shot Bufferize. }]; - let arguments = (ins Arg]>:$memref, + [MemReadAt<0, FullEffect>]>:$buffer, UnitAttr:$restrict, UnitAttr:$writable); - let results = (outs AnyTensor:$result); + let results = (outs Bufferization_TensorLikeTypeInterface:$result); let extraClassDeclaration = [{ /// The result of a to_tensor is always a tensor. - TensorType getType() { - Type resultType = getResult().getType(); - if (::llvm::isa(resultType)) - return ::llvm::cast(resultType); - return {}; + ::mlir::bufferization::TensorLikeType getType() { + return getResult().getType(); } //===------------------------------------------------------------------===// @@ -472,22 +481,15 @@ def Bufferization_ToTensorOp : Bufferization_Op<"to_tensor", [ FailureOr getBufferType( Value value, const BufferizationOptions &options, const BufferizationState &state, SmallVector &invocationStack) { - return ::llvm::cast(getMemref().getType()); + return ::llvm::cast(getBuffer().getType()); } }]; let assemblyFormat = [{ - $memref (`restrict` $restrict^)? (`writable` $writable^)? attr-dict - `:` type($memref) `to` type($result) + $buffer (`restrict` $restrict^)? (`writable` $writable^)? attr-dict + `:` type($buffer) `to` type($result) }]; - let builders = [ - OpBuilder<(ins "Value":$memref, CArg<"bool", "false">:$restrict, CArg<"bool", "false">:$writeable), [{ - auto rtt = memref::getTensorTypeFromMemRefType(memref.getType()); - build($_builder, $_state, rtt, memref, restrict, writeable); - }]> - ]; - let hasCanonicalizer = 1; let hasFolder = 1; } @@ -502,10 +504,9 @@ def Bufferization_ToBufferOp : Bufferization_Op<"to_buffer", [ SameOperandsAndResultShape, SameOperandsAndResultElementType, Pure, - AllShapesMatch<["memref", "tensor"]>, - AllElementTypesMatch<["memref", "tensor"]> + Bufferization_TensorAndBufferMatch<"tensor", "buffer"> ]> { - let summary = "cast a tensor to memref"; + let summary = "cast a tensor-like type to buffer-like type"; let description = [{ An operation that returns the future buffer of a `tensor`. @@ -523,8 +524,8 @@ def Bufferization_ToBufferOp : Bufferization_Op<"to_buffer", [ the returned buffer) will not be written to. }]; - let arguments = (ins AnyTensor:$tensor, UnitAttr:$read_only); - let results = (outs AnyRankedOrUnrankedMemRef:$memref); + let arguments = (ins Bufferization_TensorLikeTypeInterface:$tensor, UnitAttr:$read_only); + let results = (outs Bufferization_BufferLikeTypeInterface:$buffer); let extraClassDeclaration = [{ //===------------------------------------------------------------------===// @@ -559,7 +560,7 @@ def Bufferization_ToBufferOp : Bufferization_Op<"to_buffer", [ }]; let assemblyFormat = [{ - $tensor (`read_only` $read_only^)? attr-dict `:` type($tensor) `to` type($memref) + $tensor (`read_only` $read_only^)? attr-dict `:` type($tensor) `to` type($buffer) }]; let hasFolder = 1; diff --git a/mlir/include/mlir/Dialect/Bufferization/IR/BufferizationTypeInterfaces.h b/mlir/include/mlir/Dialect/Bufferization/IR/BufferizationTypeInterfaces.h index 5faa1479ee54..cbb6054fcf88 100644 --- a/mlir/include/mlir/Dialect/Bufferization/IR/BufferizationTypeInterfaces.h +++ b/mlir/include/mlir/Dialect/Bufferization/IR/BufferizationTypeInterfaces.h @@ -13,8 +13,15 @@ // Bufferization Type Interfaces //===----------------------------------------------------------------------===// +#include "mlir/IR/Diagnostics.h" #include "mlir/IR/Types.h" +namespace mlir::bufferization { +struct BufferizationOptions; +class BufferizationState; +class BufferLikeType; +} // namespace mlir::bufferization + #include "mlir/Dialect/Bufferization/IR/BufferizationTypeInterfaces.h.inc" #endif // MLIR_DIALECT_BUFFERIZATION_IR_BUFFERIZATIONTYPEINTERFACES_H_ diff --git a/mlir/include/mlir/Dialect/Bufferization/IR/BufferizationTypeInterfaces.td b/mlir/include/mlir/Dialect/Bufferization/IR/BufferizationTypeInterfaces.td index f19224a29564..fb6fc4f5ad96 100644 --- a/mlir/include/mlir/Dialect/Bufferization/IR/BufferizationTypeInterfaces.td +++ b/mlir/include/mlir/Dialect/Bufferization/IR/BufferizationTypeInterfaces.td @@ -21,10 +21,30 @@ def Bufferization_TensorLikeTypeInterface let description = [{ Indicates that this type is a tensor type (similarly to a MLIR builtin tensor) for bufferization purposes. - - The interface currently has no methods as it is used by types to opt into - being supported by the bufferization procedures. }]; + + let methods = [ + InterfaceMethod<[{ + Returns a BufferLike type for this TensorLike type. + }], + /*retTy=*/"::mlir::FailureOr<::mlir::bufferization::BufferLikeType>", + /*methodName=*/"getBufferType", + /*args=*/(ins + "const ::mlir::bufferization::BufferizationOptions &":$options, + "::llvm::function_ref<::mlir::InFlightDiagnostic()>":$emitError + ) + >, + InterfaceMethod<[{ + Returns whether a BufferLike type is compatible to this TensorLike type. + The BufferLike type is assumed to be created by getBufferType(). + }], + /*retTy=*/"::mlir::LogicalResult", + /*methodName=*/"verifyCompatibleBufferType", + /*args=*/(ins + "::mlir::bufferization::BufferLikeType":$bufferType, + "::llvm::function_ref<::mlir::InFlightDiagnostic()>":$emitError) + > + ]; } def Bufferization_BufferLikeTypeInterface diff --git a/mlir/include/mlir/Dialect/Bufferization/IR/UnstructuredControlFlow.h b/mlir/include/mlir/Dialect/Bufferization/IR/UnstructuredControlFlow.h index a441b8b66659..f56c10555f02 100644 --- a/mlir/include/mlir/Dialect/Bufferization/IR/UnstructuredControlFlow.h +++ b/mlir/include/mlir/Dialect/Bufferization/IR/UnstructuredControlFlow.h @@ -65,12 +65,13 @@ struct OpWithUnstructuredControlFlowBufferizableOpInterfaceExternalModel // The operand was already bufferized. Take its type directly. callerType = memrefType; } else { - FailureOr maybeCallerType = + FailureOr maybeCallerType = bufferization::getBufferType(opOperand->get(), options, state, invocationStack); if (failed(maybeCallerType)) return failure(); - callerType = *maybeCallerType; + assert(isa(*maybeCallerType) && "expected memref type"); + callerType = cast(*maybeCallerType); } if (!bufferType) { diff --git a/mlir/lib/Dialect/Arith/Transforms/BufferizableOpInterfaceImpl.cpp b/mlir/lib/Dialect/Arith/Transforms/BufferizableOpInterfaceImpl.cpp index a57d58ab28d2..85d1b5ac73bf 100644 --- a/mlir/lib/Dialect/Arith/Transforms/BufferizableOpInterfaceImpl.cpp +++ b/mlir/lib/Dialect/Arith/Transforms/BufferizableOpInterfaceImpl.cpp @@ -164,8 +164,8 @@ struct SelectOpInterface // buffers have different types, they differ only in their layout map. Cast // both of them to the most dynamic MemRef type. if (trueBuffer.getType() != falseBuffer.getType()) { - auto targetType = - bufferization::getBufferType(selectOp.getResult(), options, state); + auto targetType = bufferization::detail::asMemRefType( + bufferization::getBufferType(selectOp.getResult(), options, state)); if (failed(targetType)) return failure(); if (trueBuffer.getType() != *targetType) @@ -187,10 +187,12 @@ struct SelectOpInterface SmallVector &invocationStack) const { auto selectOp = cast(op); assert(value == selectOp.getResult() && "invalid value"); - auto trueType = bufferization::getBufferType( - selectOp.getTrueValue(), options, state, invocationStack); - auto falseType = bufferization::getBufferType( - selectOp.getFalseValue(), options, state, invocationStack); + auto trueType = + bufferization::detail::asMemRefType(bufferization::getBufferType( + selectOp.getTrueValue(), options, state, invocationStack)); + auto falseType = + bufferization::detail::asMemRefType(bufferization::getBufferType( + selectOp.getFalseValue(), options, state, invocationStack)); if (failed(trueType) || failed(falseType)) return failure(); if (*trueType == *falseType) diff --git a/mlir/lib/Dialect/Bufferization/IR/BufferizableOpInterface.cpp b/mlir/lib/Dialect/Bufferization/IR/BufferizableOpInterface.cpp index dd43647682ea..2ab182c9b7b2 100644 --- a/mlir/lib/Dialect/Bufferization/IR/BufferizableOpInterface.cpp +++ b/mlir/lib/Dialect/Bufferization/IR/BufferizableOpInterface.cpp @@ -171,7 +171,9 @@ FailureOr bufferization::allocateTensorForShapedValue( if (llvm::isa(shapedValue.getType())) { tensor = shapedValue; } else if (llvm::isa(shapedValue.getType())) { - tensor = b.create(loc, shapedValue); + tensor = b.create( + loc, memref::getTensorTypeFromMemRefType(shapedValue.getType()), + shapedValue); } else if (llvm::isa(shapedValue.getType()) || llvm::isa(shapedValue.getType())) { return getOwnerOfValue(shapedValue) @@ -211,8 +213,8 @@ FailureOr bufferization::allocateTensorForShapedValue( // Add 'memory_space' attribute. Not needed if 'copy' operand is specified. if (copy) return allocTensorOp.getResult(); - FailureOr copyBufferType = - getBufferType(tensor, options, state); + auto copyBufferType = + detail::asMemRefType(getBufferType(tensor, options, state)); if (failed(copyBufferType)) return failure(); std::optional memorySpace = copyBufferType->getMemorySpace(); @@ -672,28 +674,28 @@ FailureOr bufferization::getBuffer(RewriterBase &rewriter, Value value, const BufferizationOptions &options, const BufferizationState &state) { #ifndef NDEBUG - auto tensorType = llvm::dyn_cast(value.getType()); + auto tensorType = llvm::dyn_cast(value.getType()); assert(tensorType && "unexpected non-tensor type"); #endif // NDEBUG // Replace "%t = to_tensor %m" with %m. if (auto toTensorOp = value.getDefiningOp()) - return toTensorOp.getMemref(); + return toTensorOp.getBuffer(); // Insert to_buffer op. OpBuilder::InsertionGuard g(rewriter); setInsertionPointAfter(rewriter, value); - FailureOr memrefType = getBufferType(value, options, state); - if (failed(memrefType)) + FailureOr bufferType = getBufferType(value, options, state); + if (failed(bufferType)) return failure(); - ensureToBufferOpIsValid(value, *memrefType); + ensureToBufferOpIsValid(value, *bufferType); return rewriter - .create(value.getLoc(), *memrefType, value) + .create(value.getLoc(), *bufferType, value) .getResult(); } /// Return the buffer type for a given Value (tensor) after bufferization. -FailureOr +FailureOr bufferization::getBufferType(Value value, const BufferizationOptions &options, const BufferizationState &state) { SmallVector invocationStack; @@ -701,11 +703,11 @@ bufferization::getBufferType(Value value, const BufferizationOptions &options, } /// Return the buffer type for a given Value (tensor) after bufferization. -FailureOr +FailureOr bufferization::getBufferType(Value value, const BufferizationOptions &options, const BufferizationState &state, SmallVector &invocationStack) { - assert(llvm::isa(value.getType()) && + assert(llvm::isa(value.getType()) && "unexpected non-tensor type"); invocationStack.push_back(value); auto popFromStack = @@ -718,13 +720,9 @@ bufferization::getBufferType(Value value, const BufferizationOptions &options, return bufferizableOp.getBufferType(value, options, state, invocationStack); // Op is not bufferizable. - auto memSpace = - options.defaultMemorySpaceFn(cast(value.getType())); - if (!memSpace.has_value()) - return op->emitError("could not infer memory space"); - - return getMemRefType(cast(value.getType()), options, - /*layout=*/{}, *memSpace); + return cast(value.getType()).getBufferType(options, [&]() { + return op->emitError(); + }); } bool bufferization::hasTensorSemantics(Operation *op) { @@ -744,12 +742,11 @@ void bufferization::replaceOpWithBufferizedValues(RewriterBase &rewriter, SmallVector replacements; for (OpResult opResult : op->getOpResults()) { Value replacement = values[opResult.getResultNumber()]; - if (llvm::isa(opResult.getType())) { + if (llvm::isa(opResult.getType())) { // The OpResult is a tensor. Such values are replaced with memrefs during // bufferization. - assert((llvm::isa(replacement.getType()) || - llvm::isa(replacement.getType())) && - "tensor op result should be replaced with a memref value"); + assert(llvm::isa(replacement.getType()) && + "tensor op result should be replaced with a buffer value"); // The existing uses of the OpResult still expect a tensor. Insert a // ToTensorOp. Throughout bufferization, this ToTensorOp will gradually // loose all of its users and eventually DCE away. @@ -969,8 +966,8 @@ FailureOr bufferization::detail::defaultGetBufferType( // If the OpResult has an equivalent OpOperand, both OpResult and // OpOperand bufferize to the exact same buffer type. Value equivalentOperand = aliases.getAliases().front().opOperand->get(); - return getBufferType(equivalentOperand, options, bufferizationState, - invocationStack); + return asMemRefType(getBufferType(equivalentOperand, options, + bufferizationState, invocationStack)); } // If we do not know the memory space and there is no default memory space, @@ -1030,7 +1027,7 @@ bufferization::detail::unknownGetAliasingValues(OpOperand &opOperand) { } bool bufferization::detail::defaultHasTensorSemantics(Operation *op) { - auto isaTensor = [](Type t) { return isa(t); }; + auto isaTensor = [](Type t) { return isa(t); }; bool hasTensorBlockArgument = any_of(op->getRegions(), [&](Region &r) { return any_of(r.getBlocks(), [&](Block &b) { return any_of(b.getArguments(), [&](BlockArgument bbArg) { @@ -1045,3 +1042,19 @@ bool bufferization::detail::defaultHasTensorSemantics(Operation *op) { return true; return any_of(op->getOperandTypes(), isaTensor); } + +FailureOr +bufferization::detail::asMemRefType(FailureOr bufferType) { + if (failed(bufferType)) + return failure(); + return cast(*bufferType); +} + +bool bufferization::detail::typesMatchAfterBufferization(Operation &op, + Value tensor, + Value buffer) { + return mlir::succeeded( + cast(tensor.getType()) + .verifyCompatibleBufferType(cast(buffer.getType()), + [&]() { return op.emitError(); })); +} diff --git a/mlir/lib/Dialect/Bufferization/IR/BufferizationDialect.cpp b/mlir/lib/Dialect/Bufferization/IR/BufferizationDialect.cpp index d8eac01c2dea..6c08cdfb669f 100644 --- a/mlir/lib/Dialect/Bufferization/IR/BufferizationDialect.cpp +++ b/mlir/lib/Dialect/Bufferization/IR/BufferizationDialect.cpp @@ -57,7 +57,37 @@ struct BufferizationInlinerInterface : public DialectInlinerInterface { template struct BuiltinTensorExternalModel : TensorLikeType::ExternalModel, - Tensor> {}; + Tensor> { + llvm::FailureOr getBufferType( + mlir::Type tensor, const BufferizationOptions &options, + llvm::function_ref emitError) const { + auto tensorType = cast(tensor); + auto memSpace = options.defaultMemorySpaceFn(tensorType); + if (!memSpace.has_value()) + return emitError() << "could not infer memory space"; + + return cast( + getMemRefType(tensorType, options, /*layout=*/{}, *memSpace)); + } + + mlir::LogicalResult verifyCompatibleBufferType( + mlir::Type tensor, BufferLikeType bufferType, + llvm::function_ref emitError) const { + assert(isa(tensor) && "expected tensor type"); + assert(isa(bufferType) && "expected memref type"); + + auto tensorType = cast(tensor); + auto memrefType = cast(bufferType); + + if (tensorType.getShape() != memrefType.getShape()) + return emitError() << "shapes do not match"; + + if (tensorType.getElementType() != memrefType.getElementType()) + return emitError() << "element types do not match"; + + return mlir::success(); + } +}; template struct BuiltinMemRefExternalModel diff --git a/mlir/lib/Dialect/Bufferization/IR/BufferizationOps.cpp b/mlir/lib/Dialect/Bufferization/IR/BufferizationOps.cpp index dc54ac94aed3..9bd87d66c7d3 100644 --- a/mlir/lib/Dialect/Bufferization/IR/BufferizationOps.cpp +++ b/mlir/lib/Dialect/Bufferization/IR/BufferizationOps.cpp @@ -90,12 +90,12 @@ LogicalResult mlir::bufferization::foldToBufferToTensorPair( if (!bufferToTensor) return failure(); - Type srcType = bufferToTensor.getMemref().getType(); + Type srcType = bufferToTensor.getBuffer().getType(); Type destType = toBuffer.getType(); // Directly rewrite if the type did not change. if (srcType == destType) { - rewriter.replaceOp(toBuffer, bufferToTensor.getMemref()); + rewriter.replaceOp(toBuffer, bufferToTensor.getBuffer()); return success(); } @@ -106,7 +106,7 @@ LogicalResult mlir::bufferization::foldToBufferToTensorPair( // Ranked memref -> Ranked memref cast. if (rankedSrcType && rankedDestType) { FailureOr replacement = castOrReallocMemRefValue( - rewriter, bufferToTensor.getMemref(), rankedDestType, options); + rewriter, bufferToTensor.getBuffer(), rankedDestType, options); if (failed(replacement)) return failure(); @@ -124,7 +124,7 @@ LogicalResult mlir::bufferization::foldToBufferToTensorPair( assert(memref::CastOp::areCastCompatible(srcType, destType) && "expected that types are cast compatible"); rewriter.replaceOpWithNewOp(toBuffer, destType, - bufferToTensor.getMemref()); + bufferToTensor.getBuffer()); return success(); } @@ -233,8 +233,9 @@ AllocTensorOp::getBufferType(Value value, const BufferizationOptions &options, if (getMemorySpace().has_value()) { memorySpace = *getMemorySpace(); } else if (getCopy()) { - auto copyBufferType = bufferization::getBufferType(getCopy(), options, - state, invocationStack); + auto copyBufferType = + bufferization::detail::asMemRefType(bufferization::getBufferType( + getCopy(), options, state, invocationStack)); if (failed(copyBufferType)) return failure(); memorySpace = copyBufferType->getMemorySpace(); @@ -642,8 +643,9 @@ Value MaterializeInDestinationOp::buildSubsetExtraction(OpBuilder &builder, assert(getRestrict() && "expected that ops with memrefs dest have 'restrict'"); setRestrict(false); - return builder.create(loc, getDest(), /*restrict=*/true, - getWritable()); + return builder.create( + loc, memref::getTensorTypeFromMemRefType(getDest().getType()), getDest(), + /*restrict=*/true, getWritable()); } bool MaterializeInDestinationOp::isEquivalentSubset( @@ -744,7 +746,7 @@ bool ToTensorOp::isWritable(Value value, const AnalysisState &state) { } OpFoldResult ToTensorOp::fold(FoldAdaptor) { - if (auto toBuffer = getMemref().getDefiningOp()) + if (auto toBuffer = getBuffer().getDefiningOp()) // Approximate alias analysis by conservatively folding only when no there // is no interleaved operation. if (toBuffer->getBlock() == this->getOperation()->getBlock() && @@ -764,7 +766,7 @@ struct DimOfToTensorFolder : public OpRewritePattern { return failure(); rewriter.replaceOpWithNewOp( - dimOp, memrefToTensorOp.getMemref(), dimOp.getIndex()); + dimOp, memrefToTensorOp.getBuffer(), dimOp.getIndex()); return success(); } }; @@ -781,8 +783,8 @@ void ToTensorOp::getCanonicalizationPatterns(RewritePatternSet &results, OpFoldResult ToBufferOp::fold(FoldAdaptor) { if (auto memrefToTensor = getTensor().getDefiningOp()) - if (memrefToTensor.getMemref().getType() == getType()) - return memrefToTensor.getMemref(); + if (memrefToTensor.getBuffer().getType() == getType()) + return memrefToTensor.getBuffer(); return {}; } diff --git a/mlir/lib/Dialect/Bufferization/IR/BufferizationTypeInterfaces.cpp b/mlir/lib/Dialect/Bufferization/IR/BufferizationTypeInterfaces.cpp new file mode 100644 index 000000000000..0e973915c6fc --- /dev/null +++ b/mlir/lib/Dialect/Bufferization/IR/BufferizationTypeInterfaces.cpp @@ -0,0 +1,21 @@ +//===- BufferizationTypeInterfaces.cpp - Type Interfaces --------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "mlir/Dialect/Bufferization/IR/BufferizationTypeInterfaces.h" + +//===----------------------------------------------------------------------===// +// Bufferization Type Interfaces +//===----------------------------------------------------------------------===// + +namespace mlir { +namespace bufferization { + +#include "mlir/Dialect/Bufferization/IR/BufferizationTypeInterfaces.cpp.inc" + +} // namespace bufferization +} // namespace mlir diff --git a/mlir/lib/Dialect/Bufferization/IR/CMakeLists.txt b/mlir/lib/Dialect/Bufferization/IR/CMakeLists.txt index 63dcc1eb233e..5d8f0060f2c3 100644 --- a/mlir/lib/Dialect/Bufferization/IR/CMakeLists.txt +++ b/mlir/lib/Dialect/Bufferization/IR/CMakeLists.txt @@ -6,6 +6,7 @@ add_mlir_dialect_library(MLIRBufferizationDialect BufferizationDialect.cpp BufferViewFlowOpInterface.cpp UnstructuredControlFlow.cpp + BufferizationTypeInterfaces.cpp ADDITIONAL_HEADER_DIRS ${MLIR_MAIN_INCLUDE_DIR}/mlir/Dialect/Bufferization diff --git a/mlir/lib/Dialect/Bufferization/Transforms/Bufferize.cpp b/mlir/lib/Dialect/Bufferization/Transforms/Bufferize.cpp index 7e9b9119ce94..6472ef3eff2a 100644 --- a/mlir/lib/Dialect/Bufferization/Transforms/Bufferize.cpp +++ b/mlir/lib/Dialect/Bufferization/Transforms/Bufferize.cpp @@ -412,11 +412,11 @@ bufferization::bufferizeBlockSignature(Block *block, RewriterBase &rewriter, continue; } - FailureOr memrefType = + FailureOr bufferType = bufferization::getBufferType(bbArg, options, state); - if (failed(memrefType)) + if (failed(bufferType)) return failure(); - newTypes.push_back(*memrefType); + newTypes.push_back(*bufferType); } // Change the type of all block arguments. @@ -463,7 +463,7 @@ bufferization::bufferizeBlockSignature(Block *block, RewriterBase &rewriter, newOperands.push_back(operand); continue; } - FailureOr operandBufferType = + FailureOr operandBufferType = bufferization::getBufferType(operand, options, state); if (failed(operandBufferType)) return failure(); diff --git a/mlir/lib/Dialect/Bufferization/Transforms/FuncBufferizableOpInterfaceImpl.cpp b/mlir/lib/Dialect/Bufferization/Transforms/FuncBufferizableOpInterfaceImpl.cpp index a0168da44b7b..453ed43bcadd 100644 --- a/mlir/lib/Dialect/Bufferization/Transforms/FuncBufferizableOpInterfaceImpl.cpp +++ b/mlir/lib/Dialect/Bufferization/Transforms/FuncBufferizableOpInterfaceImpl.cpp @@ -255,7 +255,7 @@ struct CallOpInterface } // Returning a memref. - FailureOr resultType = + FailureOr resultType = bufferization::getBufferType(result, options, state); if (failed(resultType)) return failure(); @@ -290,13 +290,13 @@ struct CallOpInterface // The called function was not bufferized yet. This can happen when // there cycles in the function call graph. Compute the bufferized // result type. - FailureOr maybeMemRefType = + FailureOr maybeBufferType = bufferization::getBufferType( funcOp.getArgument(opOperand.getOperandNumber()), options, state); - if (failed(maybeMemRefType)) + if (failed(maybeBufferType)) return failure(); - memRefType = *maybeMemRefType; + memRefType = *maybeBufferType; } // Since we don't yet have a clear layout story, to_buffer may diff --git a/mlir/lib/Dialect/Linalg/Transforms/ConvertToDestinationStyle.cpp b/mlir/lib/Dialect/Linalg/Transforms/ConvertToDestinationStyle.cpp index 94a4b9011c16..573420f6a9aa 100644 --- a/mlir/lib/Dialect/Linalg/Transforms/ConvertToDestinationStyle.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/ConvertToDestinationStyle.cpp @@ -252,7 +252,8 @@ Value linalg::bufferizeToAllocation( // Create bufferization.to_tensor with "restrict" and "writable". The returned // tensor is a new buffer allocation, so it does not alias with any buffer. Value toTensorOp = rewriter.create( - loc, alloc, /*restrict=*/true, /*writable=*/true); + loc, padOp.getResult().getType(), alloc, /*restrict=*/true, + /*writable=*/true); rewriter.replaceOp(padOp, toTensorOp); return alloc; } @@ -340,7 +341,8 @@ Value linalg::bufferizeToAllocation( // Create bufferization.to_tensor with "restrict" and "writable". The returned // tensor is a new buffer allocation, so it does not alias with any buffer. Value toTensorOp = rewriter.create( - loc, alloc, /*restrict=*/true, /*writable=*/true); + loc, allocTensorOp.getResult().getType(), alloc, /*restrict=*/true, + /*writable=*/true); rewriter.replaceOp(allocTensorOp, toTensorOp); return alloc; } @@ -567,7 +569,8 @@ Value linalg::bufferizeToAllocation( createMemcpy(rewriter, op->getLoc(), operand->get(), alloc, options); } rewriter.modifyOpInPlace(op, [&]() { - auto toTensorOp = rewriter.create(op->getLoc(), alloc); + auto toTensorOp = rewriter.create( + op->getLoc(), operand->get().getType(), alloc); operand->set(toTensorOp); if (options.bufferizeDestinationOnly) { rewriter.modifyOpInPlace(toTensorOp, [&]() { diff --git a/mlir/lib/Dialect/SCF/Transforms/BufferizableOpInterfaceImpl.cpp b/mlir/lib/Dialect/SCF/Transforms/BufferizableOpInterfaceImpl.cpp index 46fa77a7dc4e..58562536be61 100644 --- a/mlir/lib/Dialect/SCF/Transforms/BufferizableOpInterfaceImpl.cpp +++ b/mlir/lib/Dialect/SCF/Transforms/BufferizableOpInterfaceImpl.cpp @@ -108,7 +108,7 @@ struct ConditionOpInterface getBuffer(rewriter, value, options, state); if (failed(maybeBuffer)) return failure(); - FailureOr resultType = bufferization::getBufferType( + FailureOr resultType = bufferization::getBufferType( whileOp.getAfterArguments()[it.index()], options, state); if (failed(resultType)) return failure(); @@ -292,8 +292,9 @@ struct IfOpInterface // True branch was already bufferized. thenBufferType = cast(thenValue.getType()); } else { - auto maybeBufferType = bufferization::getBufferType( - thenValue, options, state, invocationStack); + auto maybeBufferType = + bufferization::detail::asMemRefType(bufferization::getBufferType( + thenValue, options, state, invocationStack)); if (failed(maybeBufferType)) return failure(); thenBufferType = *maybeBufferType; @@ -302,8 +303,9 @@ struct IfOpInterface // False branch was already bufferized. elseBufferType = cast(elseValue.getType()); } else { - auto maybeBufferType = bufferization::getBufferType( - elseValue, options, state, invocationStack); + auto maybeBufferType = + bufferization::detail::asMemRefType(bufferization::getBufferType( + elseValue, options, state, invocationStack)); if (failed(maybeBufferType)) return failure(); elseBufferType = *maybeBufferType; @@ -406,9 +408,7 @@ struct IndexSwitchOpInterface return bufferType; auto maybeBufferType = bufferization::getBufferType( yieldedValue, options, state, invocationStack); - if (failed(maybeBufferType)) - return failure(); - return maybeBufferType; + return bufferization::detail::asMemRefType(maybeBufferType); }; // Compute buffer type of the default case. @@ -527,8 +527,8 @@ static FailureOr computeLoopRegionIterArgBufferType( const BufferizationOptions &options, const BufferizationState &state, SmallVector &invocationStack) { // Determine the buffer type of the init_arg. - auto initArgBufferType = - bufferization::getBufferType(initArg, options, state, invocationStack); + auto initArgBufferType = bufferization::detail::asMemRefType( + bufferization::getBufferType(initArg, options, state, invocationStack)); if (failed(initArgBufferType)) return failure(); @@ -554,8 +554,9 @@ static FailureOr computeLoopRegionIterArgBufferType( } else { // Note: This typically triggers a recursive call for the buffer type of // the iter_arg. - auto maybeBufferType = bufferization::getBufferType(yieldedValue, options, - state, invocationStack); + auto maybeBufferType = + bufferization::detail::asMemRefType(bufferization::getBufferType( + yieldedValue, options, state, invocationStack)); if (failed(maybeBufferType)) return failure(); yieldedValueBufferType = *maybeBufferType; @@ -718,8 +719,12 @@ struct ForOpInterface if (auto opResult = dyn_cast(value)) { // The type of an OpResult must match the corresponding iter_arg type. BlockArgument bbArg = forOp.getTiedLoopRegionIterArg(opResult); - return bufferization::getBufferType(bbArg, options, state, - invocationStack); + auto bufferType = + bufferization::getBufferType(bbArg, options, state, invocationStack); + if (failed(bufferType)) + return failure(); + assert(isa(*bufferType) && "expected memref type"); + return cast(*bufferType); } // Compute result/argument number. @@ -1078,8 +1083,8 @@ struct WhileOpInterface // scf.condition was already bufferized. return cast(conditionYieldedVal.getType()); } - return bufferization::getBufferType(conditionYieldedVal, options, state, - invocationStack); + return bufferization::detail::asMemRefType(bufferization::getBufferType( + conditionYieldedVal, options, state, invocationStack)); } /// Assert that yielded values of an scf.while op are equivalent to their @@ -1185,14 +1190,14 @@ struct YieldOpInterface // We may have to cast the value before yielding it. if (isa( yieldOp->getParentOp())) { - FailureOr resultType = bufferization::getBufferType( + FailureOr resultType = bufferization::getBufferType( yieldOp->getParentOp()->getResult(it.index()), options, state); if (failed(resultType)) return failure(); buffer = castBuffer(rewriter, buffer, *resultType); } else if (auto whileOp = dyn_cast(yieldOp->getParentOp())) { - FailureOr resultType = bufferization::getBufferType( + FailureOr resultType = bufferization::getBufferType( whileOp.getBeforeArguments()[it.index()], options, state); if (failed(resultType)) return failure(); @@ -1307,15 +1312,15 @@ struct ForallOpInterface if (auto bbArg = dyn_cast(value)) // A tensor block argument has the same bufferized type as the // corresponding output operand. - return bufferization::getBufferType( - forallOp.getTiedOpOperand(bbArg)->get(), options, state, - invocationStack); + return bufferization::detail::asMemRefType( + bufferization::getBufferType(forallOp.getTiedOpOperand(bbArg)->get(), + options, state, invocationStack)); // The bufferized result type is the same as the bufferized type of the // corresponding output operand. - return bufferization::getBufferType( + return bufferization::detail::asMemRefType(bufferization::getBufferType( forallOp.getOutputs()[cast(value).getResultNumber()], options, - state, invocationStack); + state, invocationStack)); } bool isRepetitiveRegion(Operation *op, unsigned index) const { diff --git a/mlir/lib/Dialect/Shape/Transforms/BufferizableOpInterfaceImpl.cpp b/mlir/lib/Dialect/Shape/Transforms/BufferizableOpInterfaceImpl.cpp index dc91117a5193..8a471c12d21e 100644 --- a/mlir/lib/Dialect/Shape/Transforms/BufferizableOpInterfaceImpl.cpp +++ b/mlir/lib/Dialect/Shape/Transforms/BufferizableOpInterfaceImpl.cpp @@ -67,7 +67,7 @@ struct AssumingOpInterface for (const auto &it : llvm::enumerate(assumingOp->getResultTypes())) { if (isa(it.value())) { newResults.push_back(rewriter.create( - assumingOp.getLoc(), newOp->getResult(it.index()))); + assumingOp.getLoc(), it.value(), newOp->getResult(it.index()))); } else { newResults.push_back(newOp->getResult(it.index())); } diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/SparseGPUCodegen.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/SparseGPUCodegen.cpp index e5f2418367a5..e89b34d457ff 100644 --- a/mlir/lib/Dialect/SparseTensor/Transforms/SparseGPUCodegen.cpp +++ b/mlir/lib/Dialect/SparseTensor/Transforms/SparseGPUCodegen.cpp @@ -651,7 +651,7 @@ static LogicalResult rewriteSpMV(PatternRewriter &rewriter, tokens.clear(); // Done. - rewriter.replaceOpWithNewOp(op, memY); + rewriter.replaceOpWithNewOp(op, y.getType(), memY); return success(); } @@ -752,7 +752,7 @@ static LogicalResult rewriteSpMM(PatternRewriter &rewriter, tokens.clear(); // Done. - rewriter.replaceOpWithNewOp(op, bufC); + rewriter.replaceOpWithNewOp(op, c.getType(), bufC); return success(); } @@ -925,9 +925,12 @@ static LogicalResult rewriteSpGEMM(PatternRewriter &rewriter, tokens.clear(); // Done. - Value vt = rewriter.create(loc, valH); - Value rt = rewriter.create(loc, rowH); - Value ct = rewriter.create(loc, colH); + Value vt = rewriter.create( + loc, memref::getTensorTypeFromMemRefType(valH.getType()), valH); + Value rt = rewriter.create( + loc, memref::getTensorTypeFromMemRefType(rowH.getType()), rowH); + Value ct = rewriter.create( + loc, memref::getTensorTypeFromMemRefType(colH.getType()), colH); rewriter.replaceOpWithNewOp(op, c.getType(), ValueRange{rt, ct}, vt); return success(); @@ -1043,7 +1046,7 @@ static LogicalResult rewrite2To4SpMM(PatternRewriter &rewriter, tokens.clear(); // Done. - rewriter.replaceOpWithNewOp(op, bufC); + rewriter.replaceOpWithNewOp(op, C.getType(), bufC); return success(); } diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorCodegen.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorCodegen.cpp index e5f9717c3fba..14ced56b8365 100644 --- a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorCodegen.cpp +++ b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorCodegen.cpp @@ -1471,7 +1471,8 @@ struct SparseDisassembleOpConverter // Converts MemRefs back to Tensors. SmallVector retValues = llvm::to_vector( llvm::map_range(retMem, [&rewriter, loc](Value v) -> Value { - return rewriter.create(loc, v); + return rewriter.create( + loc, memref::getTensorTypeFromMemRefType(v.getType()), v); })); // Appends the actual memory length used in each buffer returned. retValues.append(retLen.begin(), retLen.end()); diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorConversion.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorConversion.cpp index 9ffa64dc821d..7f0b65768744 100644 --- a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorConversion.cpp +++ b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorConversion.cpp @@ -867,7 +867,9 @@ public: // Converts MemRefs back to Tensors. assert(retVal.size() + retLen.size() == op.getNumResults()); for (unsigned i = 0, sz = retVal.size(); i < sz; i++) { - auto tensor = rewriter.create(loc, retVal[i]); + auto tensor = rewriter.create( + loc, memref::getTensorTypeFromMemRefType(retVal[i].getType()), + retVal[i]); retVal[i] = rewriter.create(loc, op.getResultTypes()[i], tensor); } diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/Utils/CodegenUtils.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/Utils/CodegenUtils.cpp index 57291064eba2..1bd9563b3db0 100644 --- a/mlir/lib/Dialect/SparseTensor/Transforms/Utils/CodegenUtils.cpp +++ b/mlir/lib/Dialect/SparseTensor/Transforms/Utils/CodegenUtils.cpp @@ -549,8 +549,8 @@ TypedValue sparse_tensor::genToMemref(OpBuilder &builder, Location loc, Value tensor) { auto tTp = llvm::cast(tensor.getType()); auto mTp = MemRefType::get(tTp.getShape(), tTp.getElementType()); - return builder.create(loc, mTp, tensor) - .getResult(); + return cast>( + builder.create(loc, mTp, tensor).getResult()); } Value sparse_tensor::createOrFoldSliceOffsetOp(OpBuilder &builder, Location loc, diff --git a/mlir/lib/Dialect/Tensor/Transforms/BufferizableOpInterfaceImpl.cpp b/mlir/lib/Dialect/Tensor/Transforms/BufferizableOpInterfaceImpl.cpp index 4b778b768d13..729c048db456 100644 --- a/mlir/lib/Dialect/Tensor/Transforms/BufferizableOpInterfaceImpl.cpp +++ b/mlir/lib/Dialect/Tensor/Transforms/BufferizableOpInterfaceImpl.cpp @@ -54,8 +54,9 @@ struct CastOpInterface const BufferizationState &state, SmallVector &invocationStack) const { auto castOp = cast(op); - auto maybeSrcBufferType = bufferization::getBufferType( - castOp.getSource(), options, state, invocationStack); + auto maybeSrcBufferType = + bufferization::detail::asMemRefType(bufferization::getBufferType( + castOp.getSource(), options, state, invocationStack)); if (failed(maybeSrcBufferType)) return failure(); Attribute memorySpace = maybeSrcBufferType->getMemorySpace(); @@ -500,8 +501,8 @@ struct FromElementsOpInterface /*copy=*/false); if (failed(tensorAlloc)) return failure(); - FailureOr memrefType = - bufferization::getBufferType(*tensorAlloc, options, state); + FailureOr memrefType = bufferization::detail::asMemRefType( + bufferization::getBufferType(*tensorAlloc, options, state)); if (failed(memrefType)) return failure(); Value buffer = rewriter.create( @@ -758,8 +759,9 @@ struct PadOpInterface SmallVector &invocationStack) const { // Infer memory space from the source tensor. auto padOp = cast(op); - auto maybeSrcBufferType = bufferization::getBufferType( - padOp.getSource(), options, state, invocationStack); + auto maybeSrcBufferType = + bufferization::detail::asMemRefType(bufferization::getBufferType( + padOp.getSource(), options, state, invocationStack)); if (failed(maybeSrcBufferType)) return failure(); MemRefLayoutAttrInterface layout; diff --git a/mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize.mlir b/mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize.mlir index cd19e3a5e82a..da3c26ce36ba 100644 --- a/mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize.mlir +++ b/mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize.mlir @@ -268,4 +268,23 @@ func.func @materialize_in_dest_raw(%f: f32, %f2: f32, %idx: index) -> (tensor<5x %r = tensor.extract %dest_filled[%idx] : tensor<5xf32> return %0, %r : tensor<5xf32>, f32 -} \ No newline at end of file +} + +// ----- + +// CHECK-LABEL: func.func @test_dialect_op( +// CHECK-SAME: %[[ARG:.*]]: !test.test_tensor<[32, 64], f64> +// CHECK-SAME: ) -> !test.test_tensor<[32, 128], f64> { +func.func @test_dialect_op(%arg: !test.test_tensor<[32, 64], f64>) + -> !test.test_tensor<[32, 128], f64> { + // CHECK: %[[MEMREF:.*]] = bufferization.to_buffer %[[ARG]] + // CHECK: %[[DUMMY:.*]] = "test.dummy_memref_op"(%[[MEMREF]]) + // CHECK-SAME: : (!test.test_memref<[32, 64], f64>) + // CHECK-SAME: -> !test.test_memref<[32, 128], f64> + // CHECK: %[[OUT:.*]] = bufferization.to_tensor %[[DUMMY]] + %out = "test.dummy_tensor_op"(%arg) : (!test.test_tensor<[32, 64], f64>) + -> !test.test_tensor<[32, 128], f64> + + // CHECK: return %[[OUT]] + return %out : !test.test_tensor<[32, 128], f64> +} diff --git a/mlir/test/lib/Dialect/Test/TestOpDefs.cpp b/mlir/test/lib/Dialect/Test/TestOpDefs.cpp index b5a8bd10d6b6..78e44c6ec7a9 100644 --- a/mlir/test/lib/Dialect/Test/TestOpDefs.cpp +++ b/mlir/test/lib/Dialect/Test/TestOpDefs.cpp @@ -8,6 +8,7 @@ #include "TestDialect.h" #include "TestOps.h" +#include "mlir/Dialect/Bufferization/IR/Bufferization.h" #include "mlir/Dialect/Tensor/IR/Tensor.h" #include "mlir/IR/Verifier.h" #include "mlir/Interfaces/FunctionImplementation.h" @@ -1387,3 +1388,25 @@ TestMultiSlotAlloca::handleDestructuringComplete( const DestructurableMemorySlot &slot, OpBuilder &builder) { return createNewMultiAllocaWithoutSlot(slot, builder, *this); } + +::mlir::LogicalResult test::TestDummyTensorOp::bufferize( + ::mlir::RewriterBase &rewriter, + const ::mlir::bufferization::BufferizationOptions &options, + ::mlir::bufferization::BufferizationState &state) { + auto buffer = + mlir::bufferization::getBuffer(rewriter, getInput(), options, state); + if (mlir::failed(buffer)) + return failure(); + + const auto outType = getOutput().getType(); + const auto bufferizedOutType = test::TestMemrefType::get( + getContext(), outType.getShape(), outType.getElementType(), nullptr); + // replace op with memref analogy + auto dummyMemrefOp = rewriter.create( + getLoc(), bufferizedOutType, *buffer); + + mlir::bufferization::replaceOpWithBufferizedValues(rewriter, getOperation(), + dummyMemrefOp.getResult()); + + return mlir::success(); +} diff --git a/mlir/test/lib/Dialect/Test/TestOps.h b/mlir/test/lib/Dialect/Test/TestOps.h index c2ee5f9ab9a5..b414b47c8742 100644 --- a/mlir/test/lib/Dialect/Test/TestOps.h +++ b/mlir/test/lib/Dialect/Test/TestOps.h @@ -13,6 +13,7 @@ #include "TestInterfaces.h" #include "TestTypes.h" #include "mlir/Bytecode/BytecodeImplementation.h" +#include "mlir/Dialect/Bufferization/IR/BufferizableOpInterface.h" #include "mlir/Dialect/DLTI/DLTI.h" #include "mlir/Dialect/DLTI/Traits.h" #include "mlir/Dialect/Func/IR/FuncOps.h" diff --git a/mlir/test/lib/Dialect/Test/TestOps.td b/mlir/test/lib/Dialect/Test/TestOps.td index 59330fdb1bb2..79bcd9c2e0a9 100644 --- a/mlir/test/lib/Dialect/Test/TestOps.td +++ b/mlir/test/lib/Dialect/Test/TestOps.td @@ -31,7 +31,7 @@ include "mlir/Interfaces/InferTypeOpInterface.td" include "mlir/Interfaces/LoopLikeInterface.td" include "mlir/Interfaces/MemorySlotInterfaces.td" include "mlir/Interfaces/SideEffectInterfaces.td" - +include "mlir/Dialect/Bufferization/IR/BufferizableOpInterface.td" // Include the attribute definitions. include "TestAttrDefs.td" @@ -2825,7 +2825,7 @@ def TestNVVMRequiresSMArchCondOp : let assemblyFormat = "attr-dict"; } -def TestNVVMRequirestSMArchCondMultiOp : +def TestNVVMRequirestSMArchCondMultiOp : TEST_Op<"nvvm_requires_sm_90a_or_sm_100a", [NVVMRequiresSMa<[90, 100]>]> { let arguments = (ins ); let assemblyFormat = "attr-dict"; @@ -3552,4 +3552,58 @@ def TestAllocWithMultipleResults : TEST_Op<"alloc_with_multiple_results"> { }]; } +//===----------------------------------------------------------------------===// +// Test Ops bufferization +//===----------------------------------------------------------------------===// + +def TestDummyTensorOp : TEST_Op<"dummy_tensor_op", [BufferizableOpInterface]> { + let arguments = (ins + Arg:$input + ); + let results = (outs + Arg:$output + ); + let extraClassDeclaration = [{ + // BufferizableOpInterface + bool bufferizesToMemoryRead(mlir::OpOperand&, + const mlir::bufferization::AnalysisState&); + + bool bufferizesToMemoryWrite(mlir::OpOperand&, + const mlir::bufferization::AnalysisState&); + + mlir::bufferization::AliasingValueList getAliasingValues(mlir::OpOperand&, + const mlir::bufferization::AnalysisState&); + + mlir::LogicalResult bufferize( + mlir::RewriterBase& rewriter, + const mlir::bufferization::BufferizationOptions& options, + mlir::bufferization::BufferizationState &state); + }]; + + let extraClassDefinition = [{ + bool test::TestDummyTensorOp::bufferizesToMemoryRead(::mlir::OpOperand&, + const ::mlir::bufferization::AnalysisState&) { + return true; + } + bool test::TestDummyTensorOp::bufferizesToMemoryWrite(::mlir::OpOperand&, + const ::mlir::bufferization::AnalysisState&) { + return true; + } + ::mlir::bufferization::AliasingValueList + test::TestDummyTensorOp::getAliasingValues(::mlir::OpOperand&, + const ::mlir::bufferization::AnalysisState&) { + return {}; + } + }]; +} + +def TestDummyMemrefOp : TEST_Op<"dummy_memref_op", []> { + let arguments = (ins + Arg:$input + ); + let results = (outs + Arg:$output + ); +} + #endif // TEST_OPS diff --git a/mlir/test/lib/Dialect/Test/TestTypeDefs.td b/mlir/test/lib/Dialect/Test/TestTypeDefs.td index 09294e84960f..03261f37c815 100644 --- a/mlir/test/lib/Dialect/Test/TestTypeDefs.td +++ b/mlir/test/lib/Dialect/Test/TestTypeDefs.td @@ -428,6 +428,15 @@ def TestTensorType : Test_Type<"TestTensor", return test::TestTensorType::get( getContext(), shape.value_or(getShape()), elementType); } + + // TensorLikeTypeInterface: + ::mlir::FailureOr<::mlir::bufferization::BufferLikeType> + getBufferType(const ::mlir::bufferization::BufferizationOptions& options, + ::llvm::function_ref<::mlir::InFlightDiagnostic()> emitError); + + ::mlir::LogicalResult verifyCompatibleBufferType( + ::mlir::bufferization::BufferLikeType bufferType, + ::llvm::function_ref<::mlir::InFlightDiagnostic()> emitError); }]; } diff --git a/mlir/test/lib/Dialect/Test/TestTypes.cpp b/mlir/test/lib/Dialect/Test/TestTypes.cpp index 5c784dcee6e1..2fc2f90ef6bc 100644 --- a/mlir/test/lib/Dialect/Test/TestTypes.cpp +++ b/mlir/test/lib/Dialect/Test/TestTypes.cpp @@ -545,3 +545,23 @@ TestTypeOpAsmTypeInterfaceType::getAlias(::llvm::raw_ostream &os) const { os << "op_asm_type_interface_type"; return ::mlir::OpAsmDialectInterface::AliasResult::FinalAlias; } + +::mlir::FailureOr<::mlir::bufferization::BufferLikeType> +TestTensorType::getBufferType( + const ::mlir::bufferization::BufferizationOptions &, + ::llvm::function_ref<::mlir::InFlightDiagnostic()>) { + return cast( + TestMemrefType::get(getContext(), getShape(), getElementType(), nullptr)); +} + +::mlir::LogicalResult TestTensorType::verifyCompatibleBufferType( + ::mlir::bufferization::BufferLikeType bufferType, + ::llvm::function_ref<::mlir::InFlightDiagnostic()> emitError) { + auto testMemref = dyn_cast(bufferType); + if (!testMemref) + return emitError() << "expected TestMemrefType"; + + const bool valid = getShape() == testMemref.getShape() && + getElementType() == testMemref.getElementType(); + return mlir::success(valid); +} -- cgit v1.2.3 From 6f4add34801e6ce02a5ebc96df4d1ca479125649 Mon Sep 17 00:00:00 2001 From: Omair Javaid Date: Wed, 18 Jun 2025 19:23:54 +0500 Subject: [compiler-rt] [Fuzzer] Fix ARMv7 test link failure by linking unwinder (#144495) compiler-rt/lib/fuzzer/tests build was failing on armv7, with undefined references to unwinder symbols, such as __aeabi_unwind_cpp_pr0. This occurs because the test is built with `-nostdlib++` but `libunwind` is not explicitly linked to the final test executable. This patch resolves the issue by adding CMake logic to explicitly link the required unwinder to the fuzzer tests, inspired by the same solution used to fix Scudo build failures by https://reviews.llvm.org/D142888. --- compiler-rt/lib/fuzzer/tests/CMakeLists.txt | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/compiler-rt/lib/fuzzer/tests/CMakeLists.txt b/compiler-rt/lib/fuzzer/tests/CMakeLists.txt index adfae3d63e64..543f486a9d50 100644 --- a/compiler-rt/lib/fuzzer/tests/CMakeLists.txt +++ b/compiler-rt/lib/fuzzer/tests/CMakeLists.txt @@ -35,6 +35,27 @@ if(CMAKE_SYSTEM_NAME STREQUAL "Linux" AND COMPILER_RT_LIBCXXABI_PATH) list(APPEND LIBFUZZER_UNITTEST_CFLAGS -nostdinc++ -fno-exceptions) list(APPEND LIBFUZZER_UNITTEST_LINK_FLAGS -nostdlib++ -fno-exceptions) + + # When we use -nostdlib++, we remove the default C++ runtime which normally + # provides the stack unwinding symbols (like __aeabi_unwind_cpp_pr0). + # We must now manually find and link a suitable unwinder library. + set(FUZZER_UNWINDER_LIBS) + if(COMPILER_RT_USE_LLVM_UNWINDER) + # Prefer LLVM's own libunwind. + list(APPEND FUZZER_UNWINDER_LIBS ${COMPILER_RT_UNWINDER_LINK_LIBS}) + elseif(COMPILER_RT_HAS_GCC_S_LIB) + # As a fallback, use the shared libgcc_s library. + list(APPEND FUZZER_UNWINDER_LIBS gcc_s) + elseif(COMPILER_RT_HAS_GCC_LIB) + # As a final fallback, use the static libgcc library. + list(APPEND FUZZER_UNWINDER_LIBS gcc) + elseif(NOT COMPILER_RT_USE_BUILTINS_LIBRARY) + # If no unwinder is found and we aren't using the builtins library + message(FATAL_ERROR "Fuzzer tests require a suitable unwinder, but none was found.") + endif() + # Add the detected unwinder library to our link flags. + list(APPEND LIBFUZZER_UNITTEST_LINK_FLAGS ${FUZZER_UNWINDER_LIBS}) + endif() if ("-fvisibility=hidden" IN_LIST LIBFUZZER_CFLAGS) -- cgit v1.2.3 From 36038a1048b2aab87ed18f982e960c044ad97670 Mon Sep 17 00:00:00 2001 From: Orlando Cazalet-Hyams Date: Wed, 18 Jun 2025 16:04:18 +0100 Subject: [RemoveDIs][NFC] Remove dbg intrinsic handling code from SelectionDAG ISel (#144702) --- llvm/include/llvm/CodeGen/FunctionLoweringInfo.h | 4 +- llvm/lib/CodeGen/SelectionDAG/FastISel.cpp | 45 ---------------- .../CodeGen/SelectionDAG/FunctionLoweringInfo.cpp | 1 - .../CodeGen/SelectionDAG/SelectionDAGBuilder.cpp | 63 ---------------------- .../lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h | 1 - llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp | 4 -- 6 files changed, 1 insertion(+), 117 deletions(-) diff --git a/llvm/include/llvm/CodeGen/FunctionLoweringInfo.h b/llvm/include/llvm/CodeGen/FunctionLoweringInfo.h index 392da3f512df..b892a0e222a4 100644 --- a/llvm/include/llvm/CodeGen/FunctionLoweringInfo.h +++ b/llvm/include/llvm/CodeGen/FunctionLoweringInfo.h @@ -35,7 +35,6 @@ namespace llvm { class Argument; class BasicBlock; class BranchProbabilityInfo; -class DbgDeclareInst; class Function; class Instruction; class MachineFunction; @@ -191,9 +190,8 @@ public: /// The current call site index being processed, if any. 0 if none. unsigned CurCallSite = 0; - /// Collection of dbg.declare instructions handled after argument + /// Collection of dbg_declare instructions handled after argument /// lowering and before ISel proper. - SmallPtrSet PreprocessedDbgDeclares; SmallPtrSet PreprocessedDVRDeclares; /// set - Initialize this FunctionLoweringInfo with the given Function diff --git a/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp b/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp index e8a3df3366b2..fb9eff942a46 100644 --- a/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp @@ -1395,51 +1395,6 @@ bool FastISel::selectIntrinsicCall(const IntrinsicInst *II) { // Neither does the llvm.experimental.noalias.scope.decl intrinsic case Intrinsic::experimental_noalias_scope_decl: return true; - case Intrinsic::dbg_declare: { - const DbgDeclareInst *DI = cast(II); - assert(DI->getVariable() && "Missing variable"); - if (FuncInfo.PreprocessedDbgDeclares.contains(DI)) - return true; - - const Value *Address = DI->getAddress(); - if (!lowerDbgDeclare(Address, DI->getExpression(), DI->getVariable(), - MIMD.getDL())) - LLVM_DEBUG(dbgs() << "Dropping debug info for " << *DI); - - return true; - } - case Intrinsic::dbg_assign: - // A dbg.assign is a dbg.value with more information, typically produced - // during optimisation. If one reaches fastisel then something odd has - // happened (such as an optimised function being always-inlined into an - // optnone function). We will not be using the extra information in the - // dbg.assign in that case, just use its dbg.value fields. - [[fallthrough]]; - case Intrinsic::dbg_value: { - // This form of DBG_VALUE is target-independent. - const DbgValueInst *DI = cast(II); - const Value *V = DI->getValue(); - DIExpression *Expr = DI->getExpression(); - DILocalVariable *Var = DI->getVariable(); - if (DI->hasArgList()) - // Signal that we don't have a location for this. - V = nullptr; - - assert(Var->isValidLocationForIntrinsic(MIMD.getDL()) && - "Expected inlined-at fields to agree"); - - if (!lowerDbgValue(V, Expr, Var, MIMD.getDL())) - LLVM_DEBUG(dbgs() << "Dropping debug info for " << *DI << "\n"); - - return true; - } - case Intrinsic::dbg_label: { - const DbgLabelInst *DI = cast(II); - assert(DI->getLabel() && "Missing label"); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, - TII.get(TargetOpcode::DBG_LABEL)).addMetadata(DI->getLabel()); - return true; - } case Intrinsic::objectsize: llvm_unreachable("llvm.objectsize.* should have been lowered already"); diff --git a/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp b/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp index d4ed158729ca..098005b6adfa 100644 --- a/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp @@ -369,7 +369,6 @@ void FunctionLoweringInfo::clear() { StatepointStackSlots.clear(); StatepointRelocationMaps.clear(); PreferredExtendType.clear(); - PreprocessedDbgDeclares.clear(); PreprocessedDVRDeclares.clear(); } diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index ec0c5473b0db..c01f1e792847 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -6674,69 +6674,6 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, DAG.setRoot(Res.getValue(1)); return; } - case Intrinsic::dbg_declare: { - const auto &DI = cast(I); - // Debug intrinsics are handled separately in assignment tracking mode. - // Some intrinsics are handled right after Argument lowering. - if (AssignmentTrackingEnabled || - FuncInfo.PreprocessedDbgDeclares.count(&DI)) - return; - LLVM_DEBUG(dbgs() << "SelectionDAG visiting dbg_declare: " << DI << "\n"); - DILocalVariable *Variable = DI.getVariable(); - DIExpression *Expression = DI.getExpression(); - dropDanglingDebugInfo(Variable, Expression); - // Assume dbg.declare can not currently use DIArgList, i.e. - // it is non-variadic. - assert(!DI.hasArgList() && "Only dbg.value should currently use DIArgList"); - handleDebugDeclare(DI.getVariableLocationOp(0), Variable, Expression, - DI.getDebugLoc()); - return; - } - case Intrinsic::dbg_label: { - const DbgLabelInst &DI = cast(I); - DILabel *Label = DI.getLabel(); - assert(Label && "Missing label"); - - SDDbgLabel *SDV; - SDV = DAG.getDbgLabel(Label, dl, SDNodeOrder); - DAG.AddDbgLabel(SDV); - return; - } - case Intrinsic::dbg_assign: { - // Debug intrinsics are handled separately in assignment tracking mode. - if (AssignmentTrackingEnabled) - return; - // If assignment tracking hasn't been enabled then fall through and treat - // the dbg.assign as a dbg.value. - [[fallthrough]]; - } - case Intrinsic::dbg_value: { - // Debug intrinsics are handled separately in assignment tracking mode. - if (AssignmentTrackingEnabled) - return; - const DbgValueInst &DI = cast(I); - assert(DI.getVariable() && "Missing variable"); - - DILocalVariable *Variable = DI.getVariable(); - DIExpression *Expression = DI.getExpression(); - dropDanglingDebugInfo(Variable, Expression); - - if (DI.isKillLocation()) { - handleKillDebugValue(Variable, Expression, DI.getDebugLoc(), SDNodeOrder); - return; - } - - SmallVector Values(DI.getValues()); - if (Values.empty()) - return; - - bool IsVariadic = DI.hasArgList(); - if (!handleDebugValue(Values, Variable, Expression, DI.getDebugLoc(), - SDNodeOrder, IsVariadic)) - addDanglingDebugInfo(Values, Variable, Expression, IsVariadic, - DI.getDebugLoc(), SDNodeOrder); - return; - } case Intrinsic::eh_typeid_for: { // Find the type id for the given typeinfo. diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h index 35c15bc269d4..1c278076a219 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h @@ -56,7 +56,6 @@ class CleanupPadInst; class CleanupReturnInst; class Constant; class ConstrainedFPIntrinsic; -class DbgValueInst; class DataLayout; class DIExpression; class DILocalVariable; diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp index ac6d25f141ec..4b98d87fcc63 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp @@ -1592,10 +1592,6 @@ static bool processDbgDeclare(FunctionLoweringInfo &FuncInfo, /// in case the declarations refer to arguments. static void processDbgDeclares(FunctionLoweringInfo &FuncInfo) { for (const auto &I : instructions(*FuncInfo.Fn)) { - const auto *DI = dyn_cast(&I); - if (DI && processDbgDeclare(FuncInfo, DI->getAddress(), DI->getExpression(), - DI->getVariable(), DI->getDebugLoc())) - FuncInfo.PreprocessedDbgDeclares.insert(DI); for (const DbgVariableRecord &DVR : filterDbgVars(I.getDbgRecordRange())) { if (DVR.Type == DbgVariableRecord::LocationType::Declare && processDbgDeclare(FuncInfo, DVR.getVariableLocationOp(0), -- cgit v1.2.3 From 8fc20bffabe7fe6cdc4a9ec1bc79202eba5f1f23 Mon Sep 17 00:00:00 2001 From: Karlo Basioli Date: Wed, 18 Jun 2025 16:07:56 +0100 Subject: Fix bazel build issue caused by 142986 (#144721) --- utils/bazel/llvm-project-overlay/mlir/BUILD.bazel | 2 ++ utils/bazel/llvm-project-overlay/mlir/test/BUILD.bazel | 2 ++ 2 files changed, 4 insertions(+) diff --git a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel index 0b4441c15794..48f2d0900d3e 100644 --- a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel @@ -12630,6 +12630,7 @@ td_library( ":AllocationOpInterfaceTdFiles", ":BufferViewFlowOpInterfaceTdFiles", ":BufferizableOpInterfaceTdFiles", + ":BufferizationTypeInterfacesTdFiles", ":CopyOpInterfaceTdFiles", ":DestinationStyleOpInterfaceTdFiles", ":InferTypeOpInterfaceTdFiles", @@ -12811,6 +12812,7 @@ cc_library( ":BufferDeallocationOpInterfaceIncGen", ":BufferViewFlowOpInterfaceIncGen", ":BufferizableOpInterfaceIncGen", + ":BufferizationTypeInterfacesIncGen", ":BufferizationBaseIncGen", ":BufferizationInterfaces", ":BufferizationOpsIncGen", diff --git a/utils/bazel/llvm-project-overlay/mlir/test/BUILD.bazel b/utils/bazel/llvm-project-overlay/mlir/test/BUILD.bazel index a2fb5ade7324..0eaf86da7f27 100644 --- a/utils/bazel/llvm-project-overlay/mlir/test/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/mlir/test/BUILD.bazel @@ -111,6 +111,7 @@ td_library( name = "TestOpTdFiles", srcs = glob(["lib/Dialect/Test/*.td"]), deps = [ + "//mlir:BufferizableOpInterfaceTdFiles", "//mlir:BufferizationTypeInterfacesTdFiles", "//mlir:BuiltinDialectTdFiles", "//mlir:CallInterfacesTdFiles", @@ -242,6 +243,7 @@ gentbl_cc_library( test = True, deps = [ ":TestOpTdFiles", + "//mlir:BufferizableOpInterfaceTdFiles", "//mlir:BufferizationTypeInterfacesTdFiles", "//mlir:BuiltinDialectTdFiles", ], -- cgit v1.2.3 From e4c3b037bc7f5d9a8089de4c509d3e6034735891 Mon Sep 17 00:00:00 2001 From: amordo Date: Wed, 18 Jun 2025 17:12:31 +0200 Subject: [InstCombine] Fold `tan(x) * cos(x) => sin(x)` (#136319) This patch enables folding `tan(x) * cos(x) -> sin(x)` under the `contract` flag. Fixes https://github.com/llvm/llvm-project/issues/34950. --- .../InstCombine/InstCombineMulDivRem.cpp | 12 ++ llvm/test/Transforms/InstCombine/fmul-tan-cos.ll | 182 +++++++++++++++++++++ 2 files changed, 194 insertions(+) create mode 100644 llvm/test/Transforms/InstCombine/fmul-tan-cos.ll diff --git a/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp b/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp index 457199a72510..fcf4613b5d13 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp @@ -1072,6 +1072,18 @@ Instruction *InstCombinerImpl::visitFMul(BinaryOperator &I) { return Result; } + // tan(X) * cos(X) -> sin(X) + if (I.hasAllowContract() && + match(&I, + m_c_FMul(m_OneUse(m_Intrinsic(m_Value(X))), + m_OneUse(m_Intrinsic(m_Deferred(X)))))) { + auto *Sin = Builder.CreateUnaryIntrinsic(Intrinsic::sin, X, &I); + if (auto *Metadata = I.getMetadata(LLVMContext::MD_fpmath)) { + Sin->setMetadata(LLVMContext::MD_fpmath, Metadata); + } + return replaceInstUsesWith(I, Sin); + } + return nullptr; } diff --git a/llvm/test/Transforms/InstCombine/fmul-tan-cos.ll b/llvm/test/Transforms/InstCombine/fmul-tan-cos.ll new file mode 100644 index 000000000000..a85661f14670 --- /dev/null +++ b/llvm/test/Transforms/InstCombine/fmul-tan-cos.ll @@ -0,0 +1,182 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt -S -passes=instcombine < %s | FileCheck %s + +define double @fmul_tan_cos(double %a) { +; CHECK-LABEL: define double @fmul_tan_cos( +; CHECK-SAME: double [[A:%.*]]) { +; CHECK-NEXT: [[TAN:%.*]] = call double @llvm.tan.f64(double [[A]]) +; CHECK-NEXT: [[COS:%.*]] = call double @llvm.cos.f64(double [[A]]) +; CHECK-NEXT: [[RES:%.*]] = fmul double [[TAN]], [[COS]] +; CHECK-NEXT: ret double [[RES]] +; + %tan = call double @llvm.tan.f64(double %a) + %cos = call double @llvm.cos.f64(double %a) + %res = fmul double %tan, %cos + ret double %res +} + +define double @fmul_strict_tan_strict_cos_contract(double %a) { +; CHECK-LABEL: define double @fmul_strict_tan_strict_cos_contract( +; CHECK-SAME: double [[A:%.*]]) { +; CHECK-NEXT: [[TAN:%.*]] = call double @llvm.tan.f64(double [[A]]) +; CHECK-NEXT: [[COS:%.*]] = call contract double @llvm.cos.f64(double [[A]]) +; CHECK-NEXT: [[RES:%.*]] = fmul double [[TAN]], [[COS]] +; CHECK-NEXT: ret double [[RES]] +; + %tan = call double @llvm.tan.f64(double %a) + %cos = call contract double @llvm.cos.f64(double %a) + %res = fmul double %tan, %cos + ret double %res +} + +define double @fmul_contract_tan_strict_cos_strict(double %a) { +; CHECK-LABEL: define double @fmul_contract_tan_strict_cos_strict( +; CHECK-SAME: double [[A:%.*]]) { +; CHECK-NEXT: [[RES:%.*]] = call contract double @llvm.sin.f64(double [[A]]) +; CHECK-NEXT: ret double [[RES]] +; + %tan = call double @llvm.tan.f64(double %a) + %cos = call double @llvm.cos.f64(double %a) + %res = fmul contract double %tan, %cos + ret double %res +} + +define double @fmul_contract_tan_contract_cos_strict(double %a) { +; CHECK-LABEL: define double @fmul_contract_tan_contract_cos_strict( +; CHECK-SAME: double [[A:%.*]]) { +; CHECK-NEXT: [[RES:%.*]] = call contract double @llvm.sin.f64(double [[A]]) +; CHECK-NEXT: ret double [[RES]] +; + %tan = call contract double @llvm.tan.f64(double %a) + %cos = call double @llvm.cos.f64(double %a) + %res = fmul contract double %tan, %cos + ret double %res +} + +define double @fmul_tan_cos_contract_multiple_uses(double %a) { +; CHECK-LABEL: define double @fmul_tan_cos_contract_multiple_uses( +; CHECK-SAME: double [[A:%.*]]) { +; CHECK-NEXT: [[TAN:%.*]] = call contract double @llvm.tan.f64(double [[A]]) +; CHECK-NEXT: [[COS:%.*]] = call contract double @llvm.cos.f64(double [[A]]) +; CHECK-NEXT: [[RES:%.*]] = fmul contract double [[TAN]], [[COS]] +; CHECK-NEXT: call void @use(double [[COS]]) +; CHECK-NEXT: ret double [[RES]] +; + %tan = call contract double @llvm.tan.f64(double %a) + %cos = call contract double @llvm.cos.f64(double %a) + %res = fmul contract double %tan, %cos + call void @use(double %cos) + ret double %res +} + +define double @fmul_tan_cos_contract(double %a) { +; CHECK-LABEL: define double @fmul_tan_cos_contract( +; CHECK-SAME: double [[A:%.*]]) { +; CHECK-NEXT: [[RES:%.*]] = call contract double @llvm.sin.f64(double [[A]]) +; CHECK-NEXT: ret double [[RES]] +; + %tan = call contract double @llvm.tan.f64(double %a) + %cos = call contract double @llvm.cos.f64(double %a) + %res = fmul contract double %tan, %cos + ret double %res +} + +define float @fmul_tanf_cosf_contract(float %a) { +; CHECK-LABEL: define float @fmul_tanf_cosf_contract( +; CHECK-SAME: float [[A:%.*]]) { +; CHECK-NEXT: [[RES:%.*]] = call contract float @llvm.sin.f32(float [[A]]) +; CHECK-NEXT: ret float [[RES]] +; + %tan = call contract float @llvm.tan.f32(float %a) + %cos = call contract float @llvm.cos.f32(float %a) + %res = fmul contract float %tan, %cos + ret float %res +} + +define fp128 @fmul_tanfp128_cosfp128_contract(fp128 %a) { +; CHECK-LABEL: define fp128 @fmul_tanfp128_cosfp128_contract( +; CHECK-SAME: fp128 [[A:%.*]]) { +; CHECK-NEXT: [[RES:%.*]] = call contract fp128 @llvm.sin.f128(fp128 [[A]]) +; CHECK-NEXT: ret fp128 [[RES]] +; + %tan = call contract fp128 @llvm.tan.fp128(fp128 %a) + %cos = call contract fp128 @llvm.cos.fp128(fp128 %a) + %res = fmul contract fp128 %tan, %cos + ret fp128 %res +} + + +define double @commutativity_cos_tan(double %a) { +; CHECK-LABEL: define double @commutativity_cos_tan( +; CHECK-SAME: double [[A:%.*]]) { +; CHECK-NEXT: [[RES:%.*]] = call contract double @llvm.sin.f64(double [[A]]) +; CHECK-NEXT: ret double [[RES]] +; + %cos = call contract double @llvm.cos.f64(double %a) + %tan = call contract double @llvm.tan.f64(double %a) + %res = fmul contract double %cos, %tan + ret double %res +} + + +define double @tan_cos_value_mismatch(double %a, double %b) { +; CHECK-LABEL: define double @tan_cos_value_mismatch( +; CHECK-SAME: double [[A:%.*]], double [[B:%.*]]) { +; CHECK-NEXT: [[TAN:%.*]] = call contract double @llvm.tan.f64(double [[A]]) +; CHECK-NEXT: [[COS:%.*]] = call contract double @llvm.cos.f64(double [[B]]) +; CHECK-NEXT: [[RES:%.*]] = fmul contract double [[TAN]], [[COS]] +; CHECK-NEXT: ret double [[RES]] +; + %tan = call contract double @llvm.tan.f64(double %a) + %cos = call contract double @llvm.cos.f64(double %b) + %res = fmul contract double %tan, %cos + ret double %res +} + + +define <2 x double> @fmul_tan_cos_vector(<2 x double> %a) { +; CHECK-LABEL: define <2 x double> @fmul_tan_cos_vector( +; CHECK-SAME: <2 x double> [[A:%.*]]) { +; CHECK-NEXT: [[RES:%.*]] = call contract <2 x double> @llvm.sin.v2f64(<2 x double> [[A]]) +; CHECK-NEXT: ret <2 x double> [[RES]] +; + %tan = call contract <2 x double> @llvm.tan.v2f64(<2 x double> %a) + %cos = call contract <2 x double> @llvm.cos.v2f64(<2 x double> %a) + %res = fmul contract <2 x double> %tan, %cos + ret <2 x double> %res +} + + +define double @fmul_tan_cos_nnan_preservation(double %a) { +; CHECK-LABEL: define double @fmul_tan_cos_nnan_preservation( +; CHECK-SAME: double [[A:%.*]]) { +; CHECK-NEXT: [[RES:%.*]] = call nnan contract double @llvm.sin.f64(double [[A]]) +; CHECK-NEXT: ret double [[RES]] +; + %tan = call contract double @llvm.tan.f64(double %a) + %cos = call contract double @llvm.cos.f64(double %a) + %res = fmul contract nnan double %tan, %cos + ret double %res +} + + +define double @fmul_tan_cos_fpmath_metadata_preservation(double %a) { +; CHECK-LABEL: define double @fmul_tan_cos_fpmath_metadata_preservation( +; CHECK-SAME: double [[A:%.*]]) { +; CHECK-NEXT: [[RES:%.*]] = call contract double @llvm.sin.f64(double [[A]]), !fpmath [[META0:![0-9]+]] +; CHECK-NEXT: ret double [[RES]] +; + %tan = call contract double @llvm.tan.f64(double %a) + %cos = call contract double @llvm.cos.f64(double %a) + %res = fmul contract double %tan, %cos, !fpmath !0 + ret double %res +} + +declare void @use(double) + +!0 = !{ float 2.5 } + + +;. +; CHECK: [[META0]] = !{float 2.500000e+00} +;. -- cgit v1.2.3 From b53c1e4ee810ac21dab5d27413af1f31a6a4cbfa Mon Sep 17 00:00:00 2001 From: John Brawn Date: Wed, 18 Jun 2025 16:16:52 +0100 Subject: [AArch64] Add ISel for postindex ld1/st1 in big-endian (#144387) When big-endian we need to use ld1/st1 for vector loads and stores so that we get the elements in the correct order, but this prevents postindex addressing from being used. Fix this by adding the appropriate ISel patterns, plus the relevant changes in ISelLowering and ISelDAGToDAG to cause postindex addressing to be used. --- llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp | 52 +- llvm/lib/Target/AArch64/AArch64ISelLowering.cpp | 12 + llvm/lib/Target/AArch64/AArch64InstrInfo.td | 84 +- llvm/test/CodeGen/AArch64/vector-ldst-offset.ll | 2108 +++++++++++++++++++++++ llvm/test/CodeGen/AArch64/zext-to-tbl.ll | 30 +- 5 files changed, 2231 insertions(+), 55 deletions(-) create mode 100644 llvm/test/CodeGen/AArch64/vector-ldst-offset.ll diff --git a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp index 009d69b2b943..da617b7e1926 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp @@ -1583,6 +1583,8 @@ bool AArch64DAGToDAGISel::tryIndexedLoad(SDNode *N) { EVT DstVT = N->getValueType(0); ISD::MemIndexedMode AM = LD->getAddressingMode(); bool IsPre = AM == ISD::PRE_INC || AM == ISD::PRE_DEC; + ConstantSDNode *OffsetOp = cast(LD->getOffset()); + int OffsetVal = (int)OffsetOp->getZExtValue(); // We're not doing validity checking here. That was done when checking // if we should mark the load as indexed or not. We're just selecting @@ -1637,18 +1639,58 @@ bool AArch64DAGToDAGISel::tryIndexedLoad(SDNode *N) { Opcode = IsPre ? AArch64::LDRHpre : AArch64::LDRHpost; } else if (VT == MVT::f32) { Opcode = IsPre ? AArch64::LDRSpre : AArch64::LDRSpost; - } else if (VT == MVT::f64 || VT.is64BitVector()) { + } else if (VT == MVT::f64 || + (VT.is64BitVector() && Subtarget->isLittleEndian())) { Opcode = IsPre ? AArch64::LDRDpre : AArch64::LDRDpost; - } else if (VT.is128BitVector()) { + } else if (VT.is128BitVector() && Subtarget->isLittleEndian()) { Opcode = IsPre ? AArch64::LDRQpre : AArch64::LDRQpost; + } else if (VT.is64BitVector()) { + if (IsPre || OffsetVal != 8) + return false; + switch (VT.getScalarSizeInBits()) { + case 8: + Opcode = AArch64::LD1Onev8b_POST; + break; + case 16: + Opcode = AArch64::LD1Onev4h_POST; + break; + case 32: + Opcode = AArch64::LD1Onev2s_POST; + break; + case 64: + Opcode = AArch64::LD1Onev1d_POST; + break; + default: + llvm_unreachable("Expected vector element to be a power of 2"); + } + } else if (VT.is128BitVector()) { + if (IsPre || OffsetVal != 16) + return false; + switch (VT.getScalarSizeInBits()) { + case 8: + Opcode = AArch64::LD1Onev16b_POST; + break; + case 16: + Opcode = AArch64::LD1Onev8h_POST; + break; + case 32: + Opcode = AArch64::LD1Onev4s_POST; + break; + case 64: + Opcode = AArch64::LD1Onev2d_POST; + break; + default: + llvm_unreachable("Expected vector element to be a power of 2"); + } } else return false; SDValue Chain = LD->getChain(); SDValue Base = LD->getBasePtr(); - ConstantSDNode *OffsetOp = cast(LD->getOffset()); - int OffsetVal = (int)OffsetOp->getZExtValue(); SDLoc dl(N); - SDValue Offset = CurDAG->getTargetConstant(OffsetVal, dl, MVT::i64); + // LD1 encodes an immediate offset by using XZR as the offset register. + SDValue Offset = (VT.isVector() && !Subtarget->isLittleEndian()) + ? CurDAG->getRegister(AArch64::XZR, MVT::i64) + : CurDAG->getTargetConstant(OffsetVal, dl, MVT::i64); SDValue Ops[] = { Base, Offset, Chain }; SDNode *Res = CurDAG->getMachineNode(Opcode, dl, MVT::i64, DstVT, MVT::Other, Ops); diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 1169efce3123..22c497d3de64 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -2087,12 +2087,18 @@ void AArch64TargetLowering::addTypeForNEON(MVT VT) { setOperationAction(ISD::STRICT_FSETCC, VT, Expand); setOperationAction(ISD::STRICT_FSETCCS, VT, Expand); + // When little-endian we can use ordinary d and q register loads/stores for + // vector types, but when big-endian we need to use structure load/store which + // only allow post-index addressing. if (Subtarget->isLittleEndian()) { for (unsigned im = (unsigned)ISD::PRE_INC; im != (unsigned)ISD::LAST_INDEXED_MODE; ++im) { setIndexedLoadAction(im, VT, Legal); setIndexedStoreAction(im, VT, Legal); } + } else { + setIndexedLoadAction(ISD::POST_INC, VT, Legal); + setIndexedStoreAction(ISD::POST_INC, VT, Legal); } if (Subtarget->hasD128()) { @@ -27047,6 +27053,12 @@ bool AArch64TargetLowering::getIndexedAddressParts(SDNode *N, SDNode *Op, RHSC = -(uint64_t)RHSC; if (!isInt<9>(RHSC)) return false; + // When big-endian VLD1/VST1 are used for vector load and store, and these + // only allow an offset that's equal to the store size. + EVT MemType = cast(N)->getMemoryVT(); + if (!Subtarget->isLittleEndian() && MemType.isVector() && + RHSC != MemType.getStoreSize()) + return false; // Always emit pre-inc/post-inc addressing mode. Use negated constant offset // when dealing with subtraction. Offset = DAG.getConstant(RHSC, SDLoc(N), RHS->getValueType(0)); diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td index f90f12b5ac3c..400ffff5d567 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td @@ -4942,39 +4942,42 @@ def : Pat<(post_truncsti8 GPR64:$Rt, GPR64sp:$addr, simm9:$off), def : Pat<(post_store (bf16 FPR16:$Rt), GPR64sp:$addr, simm9:$off), (STRHpost FPR16:$Rt, GPR64sp:$addr, simm9:$off)>; -def : Pat<(post_store (v8i8 FPR64:$Rt), GPR64sp:$addr, simm9:$off), - (STRDpost FPR64:$Rt, GPR64sp:$addr, simm9:$off)>; -def : Pat<(post_store (v4i16 FPR64:$Rt), GPR64sp:$addr, simm9:$off), - (STRDpost FPR64:$Rt, GPR64sp:$addr, simm9:$off)>; -def : Pat<(post_store (v2i32 FPR64:$Rt), GPR64sp:$addr, simm9:$off), - (STRDpost FPR64:$Rt, GPR64sp:$addr, simm9:$off)>; -def : Pat<(post_store (v2f32 FPR64:$Rt), GPR64sp:$addr, simm9:$off), - (STRDpost FPR64:$Rt, GPR64sp:$addr, simm9:$off)>; -def : Pat<(post_store (v1i64 FPR64:$Rt), GPR64sp:$addr, simm9:$off), - (STRDpost FPR64:$Rt, GPR64sp:$addr, simm9:$off)>; -def : Pat<(post_store (v1f64 FPR64:$Rt), GPR64sp:$addr, simm9:$off), - (STRDpost FPR64:$Rt, GPR64sp:$addr, simm9:$off)>; -def : Pat<(post_store (v4f16 FPR64:$Rt), GPR64sp:$addr, simm9:$off), - (STRDpost FPR64:$Rt, GPR64sp:$addr, simm9:$off)>; -def : Pat<(post_store (v4bf16 FPR64:$Rt), GPR64sp:$addr, simm9:$off), - (STRDpost FPR64:$Rt, GPR64sp:$addr, simm9:$off)>; - -def : Pat<(post_store (v16i8 FPR128:$Rt), GPR64sp:$addr, simm9:$off), - (STRQpost FPR128:$Rt, GPR64sp:$addr, simm9:$off)>; -def : Pat<(post_store (v8i16 FPR128:$Rt), GPR64sp:$addr, simm9:$off), - (STRQpost FPR128:$Rt, GPR64sp:$addr, simm9:$off)>; -def : Pat<(post_store (v4i32 FPR128:$Rt), GPR64sp:$addr, simm9:$off), - (STRQpost FPR128:$Rt, GPR64sp:$addr, simm9:$off)>; -def : Pat<(post_store (v4f32 FPR128:$Rt), GPR64sp:$addr, simm9:$off), - (STRQpost FPR128:$Rt, GPR64sp:$addr, simm9:$off)>; -def : Pat<(post_store (v2i64 FPR128:$Rt), GPR64sp:$addr, simm9:$off), - (STRQpost FPR128:$Rt, GPR64sp:$addr, simm9:$off)>; -def : Pat<(post_store (v2f64 FPR128:$Rt), GPR64sp:$addr, simm9:$off), - (STRQpost FPR128:$Rt, GPR64sp:$addr, simm9:$off)>; -def : Pat<(post_store (v8f16 FPR128:$Rt), GPR64sp:$addr, simm9:$off), - (STRQpost FPR128:$Rt, GPR64sp:$addr, simm9:$off)>; -def : Pat<(post_store (v8bf16 FPR128:$Rt), GPR64sp:$addr, simm9:$off), - (STRQpost FPR128:$Rt, GPR64sp:$addr, simm9:$off)>; +let Predicates = [IsLE] in { + // We must use ST1 to store vectors in big-endian. + def : Pat<(post_store(v8i8 FPR64:$Rt), GPR64sp:$addr, simm9:$off), + (STRDpost FPR64:$Rt, GPR64sp:$addr, simm9:$off)>; + def : Pat<(post_store(v4i16 FPR64:$Rt), GPR64sp:$addr, simm9:$off), + (STRDpost FPR64:$Rt, GPR64sp:$addr, simm9:$off)>; + def : Pat<(post_store(v2i32 FPR64:$Rt), GPR64sp:$addr, simm9:$off), + (STRDpost FPR64:$Rt, GPR64sp:$addr, simm9:$off)>; + def : Pat<(post_store(v2f32 FPR64:$Rt), GPR64sp:$addr, simm9:$off), + (STRDpost FPR64:$Rt, GPR64sp:$addr, simm9:$off)>; + def : Pat<(post_store(v1i64 FPR64:$Rt), GPR64sp:$addr, simm9:$off), + (STRDpost FPR64:$Rt, GPR64sp:$addr, simm9:$off)>; + def : Pat<(post_store(v1f64 FPR64:$Rt), GPR64sp:$addr, simm9:$off), + (STRDpost FPR64:$Rt, GPR64sp:$addr, simm9:$off)>; + def : Pat<(post_store(v4f16 FPR64:$Rt), GPR64sp:$addr, simm9:$off), + (STRDpost FPR64:$Rt, GPR64sp:$addr, simm9:$off)>; + def : Pat<(post_store(v4bf16 FPR64:$Rt), GPR64sp:$addr, simm9:$off), + (STRDpost FPR64:$Rt, GPR64sp:$addr, simm9:$off)>; + + def : Pat<(post_store(v16i8 FPR128:$Rt), GPR64sp:$addr, simm9:$off), + (STRQpost FPR128:$Rt, GPR64sp:$addr, simm9:$off)>; + def : Pat<(post_store(v8i16 FPR128:$Rt), GPR64sp:$addr, simm9:$off), + (STRQpost FPR128:$Rt, GPR64sp:$addr, simm9:$off)>; + def : Pat<(post_store(v4i32 FPR128:$Rt), GPR64sp:$addr, simm9:$off), + (STRQpost FPR128:$Rt, GPR64sp:$addr, simm9:$off)>; + def : Pat<(post_store(v4f32 FPR128:$Rt), GPR64sp:$addr, simm9:$off), + (STRQpost FPR128:$Rt, GPR64sp:$addr, simm9:$off)>; + def : Pat<(post_store(v2i64 FPR128:$Rt), GPR64sp:$addr, simm9:$off), + (STRQpost FPR128:$Rt, GPR64sp:$addr, simm9:$off)>; + def : Pat<(post_store(v2f64 FPR128:$Rt), GPR64sp:$addr, simm9:$off), + (STRQpost FPR128:$Rt, GPR64sp:$addr, simm9:$off)>; + def : Pat<(post_store(v8f16 FPR128:$Rt), GPR64sp:$addr, simm9:$off), + (STRQpost FPR128:$Rt, GPR64sp:$addr, simm9:$off)>; + def : Pat<(post_store(v8bf16 FPR128:$Rt), GPR64sp:$addr, simm9:$off), + (STRQpost FPR128:$Rt, GPR64sp:$addr, simm9:$off)>; +} //===----------------------------------------------------------------------===// // Load/store exclusive instructions. @@ -8925,6 +8928,21 @@ def : St1Pat; def : St1Pat; def : St1Pat; +class St1PostPat + : Pat<(post_store ty:$Vt, GPR64sp:$Rn, (i64 off)), + (INST ty:$Vt, GPR64sp:$Rn, XZR)>; + +let Predicates = [IsBE] in { + def : St1PostPat; + def : St1PostPat; + def : St1PostPat; + def : St1PostPat; + def : St1PostPat; + def : St1PostPat; + def : St1PostPat; + def : St1PostPat; +} + //--- // Single-element //--- diff --git a/llvm/test/CodeGen/AArch64/vector-ldst-offset.ll b/llvm/test/CodeGen/AArch64/vector-ldst-offset.ll new file mode 100644 index 000000000000..b31ba46893bd --- /dev/null +++ b/llvm/test/CodeGen/AArch64/vector-ldst-offset.ll @@ -0,0 +1,2108 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc -mtriple=aarch64 < %s -o - | FileCheck %s --check-prefixes=CHECK-LE +; RUN: llc -mtriple=aarch64_be < %s -o - | FileCheck %s --check-prefixes=CHECK-BE + +; Check that we use the correct offset mode for vector loads and stores, and in +; particular for big-endian we use ld1/st1 which only allows postindex immediate +; offset of the same size as the memory access size. +; FIXME: Currently we fail to make use of postindex register offset ld1/st1. + +define [2 x ptr] @v8i8_postidx_same_size(ptr %ldptr, ptr %stptr) { +; CHECK-LE-LABEL: v8i8_postidx_same_size: +; CHECK-LE: // %bb.0: // %entry +; CHECK-LE-NEXT: ldr d0, [x0], #8 +; CHECK-LE-NEXT: str d0, [x1], #8 +; CHECK-LE-NEXT: ret +; +; CHECK-BE-LABEL: v8i8_postidx_same_size: +; CHECK-BE: // %bb.0: // %entry +; CHECK-BE-NEXT: ld1 { v0.8b }, [x0], #8 +; CHECK-BE-NEXT: st1 { v0.8b }, [x1], #8 +; CHECK-BE-NEXT: ret +entry: + %val = load <8 x i8>, ptr %ldptr, align 2 + store <8 x i8> %val, ptr %stptr, align 2 + %add.ldptr = getelementptr inbounds nuw i8, ptr %ldptr, i64 8 + %add.stptr = getelementptr inbounds nuw i8, ptr %stptr, i64 8 + %ret1 = insertvalue [2 x ptr] poison, ptr %add.ldptr, 0 + %ret2 = insertvalue [2 x ptr] %ret1, ptr %add.stptr, 1 + ret [2 x ptr] %ret2 +} + +define [2 x ptr] @v8i8_preidx_same_size(ptr %ldptr, ptr %stptr) { +; CHECK-LE-LABEL: v8i8_preidx_same_size: +; CHECK-LE: // %bb.0: // %entry +; CHECK-LE-NEXT: ldr d0, [x0, #8]! +; CHECK-LE-NEXT: str d0, [x1, #8]! +; CHECK-LE-NEXT: ret +; +; CHECK-BE-LABEL: v8i8_preidx_same_size: +; CHECK-BE: // %bb.0: // %entry +; CHECK-BE-NEXT: add x0, x0, #8 +; CHECK-BE-NEXT: add x1, x1, #8 +; CHECK-BE-NEXT: ld1 { v0.8b }, [x0] +; CHECK-BE-NEXT: st1 { v0.8b }, [x1] +; CHECK-BE-NEXT: ret +entry: + %add.ldptr = getelementptr inbounds nuw i8, ptr %ldptr, i64 8 + %add.stptr = getelementptr inbounds nuw i8, ptr %stptr, i64 8 + %val = load <8 x i8>, ptr %add.ldptr, align 2 + store <8 x i8> %val, ptr %add.stptr, align 2 + %ret1 = insertvalue [2 x ptr] poison, ptr %add.ldptr, 0 + %ret2 = insertvalue [2 x ptr] %ret1, ptr %add.stptr, 1 + ret [2 x ptr] %ret2 +} + +define [2 x ptr] @v8i8_postidx_different_size(ptr %ldptr, ptr %stptr) { +; CHECK-LE-LABEL: v8i8_postidx_different_size: +; CHECK-LE: // %bb.0: // %entry +; CHECK-LE-NEXT: ldr d0, [x0], #16 +; CHECK-LE-NEXT: str d0, [x1], #16 +; CHECK-LE-NEXT: ret +; +; CHECK-BE-LABEL: v8i8_postidx_different_size: +; CHECK-BE: // %bb.0: // %entry +; CHECK-BE-NEXT: ld1 { v0.8b }, [x0] +; CHECK-BE-NEXT: mov x8, x1 +; CHECK-BE-NEXT: add x0, x0, #16 +; CHECK-BE-NEXT: add x1, x1, #16 +; CHECK-BE-NEXT: st1 { v0.8b }, [x8] +; CHECK-BE-NEXT: ret +entry: + %val = load <8 x i8>, ptr %ldptr, align 2 + store <8 x i8> %val, ptr %stptr, align 2 + %add.ldptr = getelementptr inbounds nuw i8, ptr %ldptr, i64 16 + %add.stptr = getelementptr inbounds nuw i8, ptr %stptr, i64 16 + %ret1 = insertvalue [2 x ptr] poison, ptr %add.ldptr, 0 + %ret2 = insertvalue [2 x ptr] %ret1, ptr %add.stptr, 1 + ret [2 x ptr] %ret2 +} + +define [2 x ptr] @v8i8_preidx_different_size(ptr %ldptr, ptr %stptr) { +; CHECK-LE-LABEL: v8i8_preidx_different_size: +; CHECK-LE: // %bb.0: // %entry +; CHECK-LE-NEXT: ldr d0, [x0, #16]! +; CHECK-LE-NEXT: str d0, [x1, #16]! +; CHECK-LE-NEXT: ret +; +; CHECK-BE-LABEL: v8i8_preidx_different_size: +; CHECK-BE: // %bb.0: // %entry +; CHECK-BE-NEXT: add x0, x0, #16 +; CHECK-BE-NEXT: add x1, x1, #16 +; CHECK-BE-NEXT: ld1 { v0.8b }, [x0] +; CHECK-BE-NEXT: st1 { v0.8b }, [x1] +; CHECK-BE-NEXT: ret +entry: + %add.ldptr = getelementptr inbounds nuw i8, ptr %ldptr, i64 16 + %add.stptr = getelementptr inbounds nuw i8, ptr %stptr, i64 16 + %val = load <8 x i8>, ptr %add.ldptr, align 2 + store <8 x i8> %val, ptr %add.stptr, align 2 + %ret1 = insertvalue [2 x ptr] poison, ptr %add.ldptr, 0 + %ret2 = insertvalue [2 x ptr] %ret1, ptr %add.stptr, 1 + ret [2 x ptr] %ret2 +} + +define [2 x ptr] @v8i8_postidx_reg(ptr %ldptr, ptr %stptr, i64 %off) { +; CHECK-LE-LABEL: v8i8_postidx_reg: +; CHECK-LE: // %bb.0: // %entry +; CHECK-LE-NEXT: ldr d0, [x0] +; CHECK-LE-NEXT: mov x8, x1 +; CHECK-LE-NEXT: add x0, x0, x2 +; CHECK-LE-NEXT: add x1, x1, x2 +; CHECK-LE-NEXT: str d0, [x8] +; CHECK-LE-NEXT: ret +; +; CHECK-BE-LABEL: v8i8_postidx_reg: +; CHECK-BE: // %bb.0: // %entry +; CHECK-BE-NEXT: ld1 { v0.8b }, [x0] +; CHECK-BE-NEXT: mov x8, x1 +; CHECK-BE-NEXT: add x0, x0, x2 +; CHECK-BE-NEXT: add x1, x1, x2 +; CHECK-BE-NEXT: st1 { v0.8b }, [x8] +; CHECK-BE-NEXT: ret +entry: + %val = load <8 x i8>, ptr %ldptr, align 2 + store <8 x i8> %val, ptr %stptr, align 2 + %add.ldptr = getelementptr inbounds nuw i8, ptr %ldptr, i64 %off + %add.stptr = getelementptr inbounds nuw i8, ptr %stptr, i64 %off + %ret1 = insertvalue [2 x ptr] poison, ptr %add.ldptr, 0 + %ret2 = insertvalue [2 x ptr] %ret1, ptr %add.stptr, 1 + ret [2 x ptr] %ret2 +} + +define [2 x ptr] @v8i8_preidx_reg(ptr %ldptr, ptr %stptr, i64 %off) { +; CHECK-LE-LABEL: v8i8_preidx_reg: +; CHECK-LE: // %bb.0: // %entry +; CHECK-LE-NEXT: mov x8, x1 +; CHECK-LE-NEXT: ldr d0, [x0, x2] +; CHECK-LE-NEXT: add x0, x0, x2 +; CHECK-LE-NEXT: add x1, x1, x2 +; CHECK-LE-NEXT: str d0, [x8, x2] +; CHECK-LE-NEXT: ret +; +; CHECK-BE-LABEL: v8i8_preidx_reg: +; CHECK-BE: // %bb.0: // %entry +; CHECK-BE-NEXT: add x0, x0, x2 +; CHECK-BE-NEXT: add x1, x1, x2 +; CHECK-BE-NEXT: ld1 { v0.8b }, [x0] +; CHECK-BE-NEXT: st1 { v0.8b }, [x1] +; CHECK-BE-NEXT: ret +entry: + %add.ldptr = getelementptr inbounds nuw i8, ptr %ldptr, i64 %off + %add.stptr = getelementptr inbounds nuw i8, ptr %stptr, i64 %off + %val = load <8 x i8>, ptr %add.ldptr, align 2 + store <8 x i8> %val, ptr %add.stptr, align 2 + %ret1 = insertvalue [2 x ptr] poison, ptr %add.ldptr, 0 + %ret2 = insertvalue [2 x ptr] %ret1, ptr %add.stptr, 1 + ret [2 x ptr] %ret2 +} + +define [2 x ptr] @v4i16_postidx_same_size(ptr %ldptr, ptr %stptr) { +; CHECK-LE-LABEL: v4i16_postidx_same_size: +; CHECK-LE: // %bb.0: // %entry +; CHECK-LE-NEXT: ldr d0, [x0], #8 +; CHECK-LE-NEXT: str d0, [x1], #8 +; CHECK-LE-NEXT: ret +; +; CHECK-BE-LABEL: v4i16_postidx_same_size: +; CHECK-BE: // %bb.0: // %entry +; CHECK-BE-NEXT: ld1 { v0.4h }, [x0], #8 +; CHECK-BE-NEXT: st1 { v0.4h }, [x1], #8 +; CHECK-BE-NEXT: ret +entry: + %val = load <4 x i16>, ptr %ldptr, align 2 + store <4 x i16> %val, ptr %stptr, align 2 + %add.ldptr = getelementptr inbounds nuw i8, ptr %ldptr, i64 8 + %add.stptr = getelementptr inbounds nuw i8, ptr %stptr, i64 8 + %ret1 = insertvalue [2 x ptr] poison, ptr %add.ldptr, 0 + %ret2 = insertvalue [2 x ptr] %ret1, ptr %add.stptr, 1 + ret [2 x ptr] %ret2 +} + +define [2 x ptr] @v4i16_preidx_same_size(ptr %ldptr, ptr %stptr) { +; CHECK-LE-LABEL: v4i16_preidx_same_size: +; CHECK-LE: // %bb.0: // %entry +; CHECK-LE-NEXT: ldr d0, [x0, #8]! +; CHECK-LE-NEXT: str d0, [x1, #8]! +; CHECK-LE-NEXT: ret +; +; CHECK-BE-LABEL: v4i16_preidx_same_size: +; CHECK-BE: // %bb.0: // %entry +; CHECK-BE-NEXT: add x0, x0, #8 +; CHECK-BE-NEXT: add x1, x1, #8 +; CHECK-BE-NEXT: ld1 { v0.4h }, [x0] +; CHECK-BE-NEXT: st1 { v0.4h }, [x1] +; CHECK-BE-NEXT: ret +entry: + %add.ldptr = getelementptr inbounds nuw i8, ptr %ldptr, i64 8 + %add.stptr = getelementptr inbounds nuw i8, ptr %stptr, i64 8 + %val = load <4 x i16>, ptr %add.ldptr, align 2 + store <4 x i16> %val, ptr %add.stptr, align 2 + %ret1 = insertvalue [2 x ptr] poison, ptr %add.ldptr, 0 + %ret2 = insertvalue [2 x ptr] %ret1, ptr %add.stptr, 1 + ret [2 x ptr] %ret2 +} + +define [2 x ptr] @v4i16_postidx_different_size(ptr %ldptr, ptr %stptr) { +; CHECK-LE-LABEL: v4i16_postidx_different_size: +; CHECK-LE: // %bb.0: // %entry +; CHECK-LE-NEXT: ldr d0, [x0], #16 +; CHECK-LE-NEXT: str d0, [x1], #16 +; CHECK-LE-NEXT: ret +; +; CHECK-BE-LABEL: v4i16_postidx_different_size: +; CHECK-BE: // %bb.0: // %entry +; CHECK-BE-NEXT: ld1 { v0.4h }, [x0] +; CHECK-BE-NEXT: mov x8, x1 +; CHECK-BE-NEXT: add x0, x0, #16 +; CHECK-BE-NEXT: add x1, x1, #16 +; CHECK-BE-NEXT: st1 { v0.4h }, [x8] +; CHECK-BE-NEXT: ret +entry: + %val = load <4 x i16>, ptr %ldptr, align 2 + store <4 x i16> %val, ptr %stptr, align 2 + %add.ldptr = getelementptr inbounds nuw i8, ptr %ldptr, i64 16 + %add.stptr = getelementptr inbounds nuw i8, ptr %stptr, i64 16 + %ret1 = insertvalue [2 x ptr] poison, ptr %add.ldptr, 0 + %ret2 = insertvalue [2 x ptr] %ret1, ptr %add.stptr, 1 + ret [2 x ptr] %ret2 +} + +define [2 x ptr] @v4i16_preidx_different_size(ptr %ldptr, ptr %stptr) { +; CHECK-LE-LABEL: v4i16_preidx_different_size: +; CHECK-LE: // %bb.0: // %entry +; CHECK-LE-NEXT: ldr d0, [x0, #16]! +; CHECK-LE-NEXT: str d0, [x1, #16]! +; CHECK-LE-NEXT: ret +; +; CHECK-BE-LABEL: v4i16_preidx_different_size: +; CHECK-BE: // %bb.0: // %entry +; CHECK-BE-NEXT: add x0, x0, #16 +; CHECK-BE-NEXT: add x1, x1, #16 +; CHECK-BE-NEXT: ld1 { v0.4h }, [x0] +; CHECK-BE-NEXT: st1 { v0.4h }, [x1] +; CHECK-BE-NEXT: ret +entry: + %add.ldptr = getelementptr inbounds nuw i8, ptr %ldptr, i64 16 + %add.stptr = getelementptr inbounds nuw i8, ptr %stptr, i64 16 + %val = load <4 x i16>, ptr %add.ldptr, align 2 + store <4 x i16> %val, ptr %add.stptr, align 2 + %ret1 = insertvalue [2 x ptr] poison, ptr %add.ldptr, 0 + %ret2 = insertvalue [2 x ptr] %ret1, ptr %add.stptr, 1 + ret [2 x ptr] %ret2 +} + +define [2 x ptr] @v4i16_postidx_reg(ptr %ldptr, ptr %stptr, i64 %off) { +; CHECK-LE-LABEL: v4i16_postidx_reg: +; CHECK-LE: // %bb.0: // %entry +; CHECK-LE-NEXT: ldr d0, [x0] +; CHECK-LE-NEXT: mov x8, x1 +; CHECK-LE-NEXT: add x0, x0, x2 +; CHECK-LE-NEXT: add x1, x1, x2 +; CHECK-LE-NEXT: str d0, [x8] +; CHECK-LE-NEXT: ret +; +; CHECK-BE-LABEL: v4i16_postidx_reg: +; CHECK-BE: // %bb.0: // %entry +; CHECK-BE-NEXT: ld1 { v0.4h }, [x0] +; CHECK-BE-NEXT: mov x8, x1 +; CHECK-BE-NEXT: add x0, x0, x2 +; CHECK-BE-NEXT: add x1, x1, x2 +; CHECK-BE-NEXT: st1 { v0.4h }, [x8] +; CHECK-BE-NEXT: ret +entry: + %val = load <4 x i16>, ptr %ldptr, align 2 + store <4 x i16> %val, ptr %stptr, align 2 + %add.ldptr = getelementptr inbounds nuw i8, ptr %ldptr, i64 %off + %add.stptr = getelementptr inbounds nuw i8, ptr %stptr, i64 %off + %ret1 = insertvalue [2 x ptr] poison, ptr %add.ldptr, 0 + %ret2 = insertvalue [2 x ptr] %ret1, ptr %add.stptr, 1 + ret [2 x ptr] %ret2 +} + +define [2 x ptr] @v4i16_preidx_reg(ptr %ldptr, ptr %stptr, i64 %off) { +; CHECK-LE-LABEL: v4i16_preidx_reg: +; CHECK-LE: // %bb.0: // %entry +; CHECK-LE-NEXT: mov x8, x1 +; CHECK-LE-NEXT: ldr d0, [x0, x2] +; CHECK-LE-NEXT: add x0, x0, x2 +; CHECK-LE-NEXT: add x1, x1, x2 +; CHECK-LE-NEXT: str d0, [x8, x2] +; CHECK-LE-NEXT: ret +; +; CHECK-BE-LABEL: v4i16_preidx_reg: +; CHECK-BE: // %bb.0: // %entry +; CHECK-BE-NEXT: add x0, x0, x2 +; CHECK-BE-NEXT: add x1, x1, x2 +; CHECK-BE-NEXT: ld1 { v0.4h }, [x0] +; CHECK-BE-NEXT: st1 { v0.4h }, [x1] +; CHECK-BE-NEXT: ret +entry: + %add.ldptr = getelementptr inbounds nuw i8, ptr %ldptr, i64 %off + %add.stptr = getelementptr inbounds nuw i8, ptr %stptr, i64 %off + %val = load <4 x i16>, ptr %add.ldptr, align 2 + store <4 x i16> %val, ptr %add.stptr, align 2 + %ret1 = insertvalue [2 x ptr] poison, ptr %add.ldptr, 0 + %ret2 = insertvalue [2 x ptr] %ret1, ptr %add.stptr, 1 + ret [2 x ptr] %ret2 +} + +define [2 x ptr] @v2i32_postidx_same_size(ptr %ldptr, ptr %stptr) { +; CHECK-LE-LABEL: v2i32_postidx_same_size: +; CHECK-LE: // %bb.0: // %entry +; CHECK-LE-NEXT: ldr d0, [x0], #8 +; CHECK-LE-NEXT: str d0, [x1], #8 +; CHECK-LE-NEXT: ret +; +; CHECK-BE-LABEL: v2i32_postidx_same_size: +; CHECK-BE: // %bb.0: // %entry +; CHECK-BE-NEXT: ld1 { v0.2s }, [x0], #8 +; CHECK-BE-NEXT: st1 { v0.2s }, [x1], #8 +; CHECK-BE-NEXT: ret +entry: + %val = load <2 x i32>, ptr %ldptr, align 2 + store <2 x i32> %val, ptr %stptr, align 2 + %add.ldptr = getelementptr inbounds nuw i8, ptr %ldptr, i64 8 + %add.stptr = getelementptr inbounds nuw i8, ptr %stptr, i64 8 + %ret1 = insertvalue [2 x ptr] poison, ptr %add.ldptr, 0 + %ret2 = insertvalue [2 x ptr] %ret1, ptr %add.stptr, 1 + ret [2 x ptr] %ret2 +} + +define [2 x ptr] @v2i32_preidx_same_size(ptr %ldptr, ptr %stptr) { +; CHECK-LE-LABEL: v2i32_preidx_same_size: +; CHECK-LE: // %bb.0: // %entry +; CHECK-LE-NEXT: ldr d0, [x0, #8]! +; CHECK-LE-NEXT: str d0, [x1, #8]! +; CHECK-LE-NEXT: ret +; +; CHECK-BE-LABEL: v2i32_preidx_same_size: +; CHECK-BE: // %bb.0: // %entry +; CHECK-BE-NEXT: add x0, x0, #8 +; CHECK-BE-NEXT: add x1, x1, #8 +; CHECK-BE-NEXT: ld1 { v0.2s }, [x0] +; CHECK-BE-NEXT: st1 { v0.2s }, [x1] +; CHECK-BE-NEXT: ret +entry: + %add.ldptr = getelementptr inbounds nuw i8, ptr %ldptr, i64 8 + %add.stptr = getelementptr inbounds nuw i8, ptr %stptr, i64 8 + %val = load <2 x i32>, ptr %add.ldptr, align 2 + store <2 x i32> %val, ptr %add.stptr, align 2 + %ret1 = insertvalue [2 x ptr] poison, ptr %add.ldptr, 0 + %ret2 = insertvalue [2 x ptr] %ret1, ptr %add.stptr, 1 + ret [2 x ptr] %ret2 +} + +define [2 x ptr] @v2i32_postidx_different_size(ptr %ldptr, ptr %stptr) { +; CHECK-LE-LABEL: v2i32_postidx_different_size: +; CHECK-LE: // %bb.0: // %entry +; CHECK-LE-NEXT: ldr d0, [x0], #16 +; CHECK-LE-NEXT: str d0, [x1], #16 +; CHECK-LE-NEXT: ret +; +; CHECK-BE-LABEL: v2i32_postidx_different_size: +; CHECK-BE: // %bb.0: // %entry +; CHECK-BE-NEXT: ld1 { v0.2s }, [x0] +; CHECK-BE-NEXT: mov x8, x1 +; CHECK-BE-NEXT: add x0, x0, #16 +; CHECK-BE-NEXT: add x1, x1, #16 +; CHECK-BE-NEXT: st1 { v0.2s }, [x8] +; CHECK-BE-NEXT: ret +entry: + %val = load <2 x i32>, ptr %ldptr, align 2 + store <2 x i32> %val, ptr %stptr, align 2 + %add.ldptr = getelementptr inbounds nuw i8, ptr %ldptr, i64 16 + %add.stptr = getelementptr inbounds nuw i8, ptr %stptr, i64 16 + %ret1 = insertvalue [2 x ptr] poison, ptr %add.ldptr, 0 + %ret2 = insertvalue [2 x ptr] %ret1, ptr %add.stptr, 1 + ret [2 x ptr] %ret2 +} + +define [2 x ptr] @v2i32_preidx_different_size(ptr %ldptr, ptr %stptr) { +; CHECK-LE-LABEL: v2i32_preidx_different_size: +; CHECK-LE: // %bb.0: // %entry +; CHECK-LE-NEXT: ldr d0, [x0, #16]! +; CHECK-LE-NEXT: str d0, [x1, #16]! +; CHECK-LE-NEXT: ret +; +; CHECK-BE-LABEL: v2i32_preidx_different_size: +; CHECK-BE: // %bb.0: // %entry +; CHECK-BE-NEXT: add x0, x0, #16 +; CHECK-BE-NEXT: add x1, x1, #16 +; CHECK-BE-NEXT: ld1 { v0.2s }, [x0] +; CHECK-BE-NEXT: st1 { v0.2s }, [x1] +; CHECK-BE-NEXT: ret +entry: + %add.ldptr = getelementptr inbounds nuw i8, ptr %ldptr, i64 16 + %add.stptr = getelementptr inbounds nuw i8, ptr %stptr, i64 16 + %val = load <2 x i32>, ptr %add.ldptr, align 2 + store <2 x i32> %val, ptr %add.stptr, align 2 + %ret1 = insertvalue [2 x ptr] poison, ptr %add.ldptr, 0 + %ret2 = insertvalue [2 x ptr] %ret1, ptr %add.stptr, 1 + ret [2 x ptr] %ret2 +} + +define [2 x ptr] @v2i32_postidx_reg(ptr %ldptr, ptr %stptr, i64 %off) { +; CHECK-LE-LABEL: v2i32_postidx_reg: +; CHECK-LE: // %bb.0: // %entry +; CHECK-LE-NEXT: ldr d0, [x0] +; CHECK-LE-NEXT: mov x8, x1 +; CHECK-LE-NEXT: add x0, x0, x2 +; CHECK-LE-NEXT: add x1, x1, x2 +; CHECK-LE-NEXT: str d0, [x8] +; CHECK-LE-NEXT: ret +; +; CHECK-BE-LABEL: v2i32_postidx_reg: +; CHECK-BE: // %bb.0: // %entry +; CHECK-BE-NEXT: ld1 { v0.2s }, [x0] +; CHECK-BE-NEXT: mov x8, x1 +; CHECK-BE-NEXT: add x0, x0, x2 +; CHECK-BE-NEXT: add x1, x1, x2 +; CHECK-BE-NEXT: st1 { v0.2s }, [x8] +; CHECK-BE-NEXT: ret +entry: + %val = load <2 x i32>, ptr %ldptr, align 2 + store <2 x i32> %val, ptr %stptr, align 2 + %add.ldptr = getelementptr inbounds nuw i8, ptr %ldptr, i64 %off + %add.stptr = getelementptr inbounds nuw i8, ptr %stptr, i64 %off + %ret1 = insertvalue [2 x ptr] poison, ptr %add.ldptr, 0 + %ret2 = insertvalue [2 x ptr] %ret1, ptr %add.stptr, 1 + ret [2 x ptr] %ret2 +} + +define [2 x ptr] @v2i32_preidx_reg(ptr %ldptr, ptr %stptr, i64 %off) { +; CHECK-LE-LABEL: v2i32_preidx_reg: +; CHECK-LE: // %bb.0: // %entry +; CHECK-LE-NEXT: mov x8, x1 +; CHECK-LE-NEXT: ldr d0, [x0, x2] +; CHECK-LE-NEXT: add x0, x0, x2 +; CHECK-LE-NEXT: add x1, x1, x2 +; CHECK-LE-NEXT: str d0, [x8, x2] +; CHECK-LE-NEXT: ret +; +; CHECK-BE-LABEL: v2i32_preidx_reg: +; CHECK-BE: // %bb.0: // %entry +; CHECK-BE-NEXT: add x0, x0, x2 +; CHECK-BE-NEXT: add x1, x1, x2 +; CHECK-BE-NEXT: ld1 { v0.2s }, [x0] +; CHECK-BE-NEXT: st1 { v0.2s }, [x1] +; CHECK-BE-NEXT: ret +entry: + %add.ldptr = getelementptr inbounds nuw i8, ptr %ldptr, i64 %off + %add.stptr = getelementptr inbounds nuw i8, ptr %stptr, i64 %off + %val = load <2 x i32>, ptr %add.ldptr, align 2 + store <2 x i32> %val, ptr %add.stptr, align 2 + %ret1 = insertvalue [2 x ptr] poison, ptr %add.ldptr, 0 + %ret2 = insertvalue [2 x ptr] %ret1, ptr %add.stptr, 1 + ret [2 x ptr] %ret2 +} + +define [2 x ptr] @v1i64_postidx_same_size(ptr %ldptr, ptr %stptr) { +; CHECK-LE-LABEL: v1i64_postidx_same_size: +; CHECK-LE: // %bb.0: // %entry +; CHECK-LE-NEXT: ldr d0, [x0], #8 +; CHECK-LE-NEXT: str d0, [x1], #8 +; CHECK-LE-NEXT: ret +; +; CHECK-BE-LABEL: v1i64_postidx_same_size: +; CHECK-BE: // %bb.0: // %entry +; CHECK-BE-NEXT: ld1 { v0.1d }, [x0], #8 +; CHECK-BE-NEXT: st1 { v0.1d }, [x1], #8 +; CHECK-BE-NEXT: ret +entry: + %val = load <1 x i64>, ptr %ldptr, align 2 + store <1 x i64> %val, ptr %stptr, align 2 + %add.ldptr = getelementptr inbounds nuw i8, ptr %ldptr, i64 8 + %add.stptr = getelementptr inbounds nuw i8, ptr %stptr, i64 8 + %ret1 = insertvalue [2 x ptr] poison, ptr %add.ldptr, 0 + %ret2 = insertvalue [2 x ptr] %ret1, ptr %add.stptr, 1 + ret [2 x ptr] %ret2 +} + +define [2 x ptr] @v1i64_preidx_same_size(ptr %ldptr, ptr %stptr) { +; CHECK-LE-LABEL: v1i64_preidx_same_size: +; CHECK-LE: // %bb.0: // %entry +; CHECK-LE-NEXT: ldr d0, [x0, #8]! +; CHECK-LE-NEXT: str d0, [x1, #8]! +; CHECK-LE-NEXT: ret +; +; CHECK-BE-LABEL: v1i64_preidx_same_size: +; CHECK-BE: // %bb.0: // %entry +; CHECK-BE-NEXT: ldr d0, [x0, #8]! +; CHECK-BE-NEXT: mov x8, x1 +; CHECK-BE-NEXT: add x1, x1, #8 +; CHECK-BE-NEXT: str d0, [x8, #8] +; CHECK-BE-NEXT: ret +entry: + %add.ldptr = getelementptr inbounds nuw i8, ptr %ldptr, i64 8 + %add.stptr = getelementptr inbounds nuw i8, ptr %stptr, i64 8 + %val = load <1 x i64>, ptr %add.ldptr, align 2 + store <1 x i64> %val, ptr %add.stptr, align 2 + %ret1 = insertvalue [2 x ptr] poison, ptr %add.ldptr, 0 + %ret2 = insertvalue [2 x ptr] %ret1, ptr %add.stptr, 1 + ret [2 x ptr] %ret2 +} + +define [2 x ptr] @v1i64_postidx_different_size(ptr %ldptr, ptr %stptr) { +; CHECK-LE-LABEL: v1i64_postidx_different_size: +; CHECK-LE: // %bb.0: // %entry +; CHECK-LE-NEXT: ldr d0, [x0], #16 +; CHECK-LE-NEXT: str d0, [x1], #16 +; CHECK-LE-NEXT: ret +; +; CHECK-BE-LABEL: v1i64_postidx_different_size: +; CHECK-BE: // %bb.0: // %entry +; CHECK-BE-NEXT: ldr d0, [x0], #16 +; CHECK-BE-NEXT: mov x8, x1 +; CHECK-BE-NEXT: add x1, x1, #16 +; CHECK-BE-NEXT: str d0, [x8] +; CHECK-BE-NEXT: ret +entry: + %val = load <1 x i64>, ptr %ldptr, align 2 + store <1 x i64> %val, ptr %stptr, align 2 + %add.ldptr = getelementptr inbounds nuw i8, ptr %ldptr, i64 16 + %add.stptr = getelementptr inbounds nuw i8, ptr %stptr, i64 16 + %ret1 = insertvalue [2 x ptr] poison, ptr %add.ldptr, 0 + %ret2 = insertvalue [2 x ptr] %ret1, ptr %add.stptr, 1 + ret [2 x ptr] %ret2 +} + +define [2 x ptr] @v1i64_preidx_different_size(ptr %ldptr, ptr %stptr) { +; CHECK-LE-LABEL: v1i64_preidx_different_size: +; CHECK-LE: // %bb.0: // %entry +; CHECK-LE-NEXT: ldr d0, [x0, #16]! +; CHECK-LE-NEXT: str d0, [x1, #16]! +; CHECK-LE-NEXT: ret +; +; CHECK-BE-LABEL: v1i64_preidx_different_size: +; CHECK-BE: // %bb.0: // %entry +; CHECK-BE-NEXT: ldr d0, [x0, #16]! +; CHECK-BE-NEXT: mov x8, x1 +; CHECK-BE-NEXT: add x1, x1, #16 +; CHECK-BE-NEXT: str d0, [x8, #16] +; CHECK-BE-NEXT: ret +entry: + %add.ldptr = getelementptr inbounds nuw i8, ptr %ldptr, i64 16 + %add.stptr = getelementptr inbounds nuw i8, ptr %stptr, i64 16 + %val = load <1 x i64>, ptr %add.ldptr, align 2 + store <1 x i64> %val, ptr %add.stptr, align 2 + %ret1 = insertvalue [2 x ptr] poison, ptr %add.ldptr, 0 + %ret2 = insertvalue [2 x ptr] %ret1, ptr %add.stptr, 1 + ret [2 x ptr] %ret2 +} + +define [2 x ptr] @v1i64_postidx_reg(ptr %ldptr, ptr %stptr, i64 %off) { +; CHECK-LE-LABEL: v1i64_postidx_reg: +; CHECK-LE: // %bb.0: // %entry +; CHECK-LE-NEXT: ldr d0, [x0] +; CHECK-LE-NEXT: mov x8, x1 +; CHECK-LE-NEXT: add x0, x0, x2 +; CHECK-LE-NEXT: add x1, x1, x2 +; CHECK-LE-NEXT: str d0, [x8] +; CHECK-LE-NEXT: ret +; +; CHECK-BE-LABEL: v1i64_postidx_reg: +; CHECK-BE: // %bb.0: // %entry +; CHECK-BE-NEXT: ldr d0, [x0] +; CHECK-BE-NEXT: mov x8, x1 +; CHECK-BE-NEXT: add x0, x0, x2 +; CHECK-BE-NEXT: add x1, x1, x2 +; CHECK-BE-NEXT: str d0, [x8] +; CHECK-BE-NEXT: ret +entry: + %val = load <1 x i64>, ptr %ldptr, align 2 + store <1 x i64> %val, ptr %stptr, align 2 + %add.ldptr = getelementptr inbounds nuw i8, ptr %ldptr, i64 %off + %add.stptr = getelementptr inbounds nuw i8, ptr %stptr, i64 %off + %ret1 = insertvalue [2 x ptr] poison, ptr %add.ldptr, 0 + %ret2 = insertvalue [2 x ptr] %ret1, ptr %add.stptr, 1 + ret [2 x ptr] %ret2 +} + +define [2 x ptr] @v1i64_preidx_reg(ptr %ldptr, ptr %stptr, i64 %off) { +; CHECK-LE-LABEL: v1i64_preidx_reg: +; CHECK-LE: // %bb.0: // %entry +; CHECK-LE-NEXT: mov x8, x1 +; CHECK-LE-NEXT: ldr d0, [x0, x2] +; CHECK-LE-NEXT: add x0, x0, x2 +; CHECK-LE-NEXT: add x1, x1, x2 +; CHECK-LE-NEXT: str d0, [x8, x2] +; CHECK-LE-NEXT: ret +; +; CHECK-BE-LABEL: v1i64_preidx_reg: +; CHECK-BE: // %bb.0: // %entry +; CHECK-BE-NEXT: mov x8, x1 +; CHECK-BE-NEXT: ldr d0, [x0, x2] +; CHECK-BE-NEXT: add x0, x0, x2 +; CHECK-BE-NEXT: add x1, x1, x2 +; CHECK-BE-NEXT: str d0, [x8, x2] +; CHECK-BE-NEXT: ret +entry: + %add.ldptr = getelementptr inbounds nuw i8, ptr %ldptr, i64 %off + %add.stptr = getelementptr inbounds nuw i8, ptr %stptr, i64 %off + %val = load <1 x i64>, ptr %add.ldptr, align 2 + store <1 x i64> %val, ptr %add.stptr, align 2 + %ret1 = insertvalue [2 x ptr] poison, ptr %add.ldptr, 0 + %ret2 = insertvalue [2 x ptr] %ret1, ptr %add.stptr, 1 + ret [2 x ptr] %ret2 +} + +define [2 x ptr] @v4f16_postidx_same_size(ptr %ldptr, ptr %stptr) { +; CHECK-LE-LABEL: v4f16_postidx_same_size: +; CHECK-LE: // %bb.0: // %entry +; CHECK-LE-NEXT: ldr d0, [x0], #8 +; CHECK-LE-NEXT: str d0, [x1], #8 +; CHECK-LE-NEXT: ret +; +; CHECK-BE-LABEL: v4f16_postidx_same_size: +; CHECK-BE: // %bb.0: // %entry +; CHECK-BE-NEXT: ld1 { v0.4h }, [x0], #8 +; CHECK-BE-NEXT: st1 { v0.4h }, [x1], #8 +; CHECK-BE-NEXT: ret +entry: + %val = load <4 x half>, ptr %ldptr, align 2 + store <4 x half> %val, ptr %stptr, align 2 + %add.ldptr = getelementptr inbounds nuw i8, ptr %ldptr, i64 8 + %add.stptr = getelementptr inbounds nuw i8, ptr %stptr, i64 8 + %ret1 = insertvalue [2 x ptr] poison, ptr %add.ldptr, 0 + %ret2 = insertvalue [2 x ptr] %ret1, ptr %add.stptr, 1 + ret [2 x ptr] %ret2 +} + +define [2 x ptr] @v4f16_preidx_same_size(ptr %ldptr, ptr %stptr) { +; CHECK-LE-LABEL: v4f16_preidx_same_size: +; CHECK-LE: // %bb.0: // %entry +; CHECK-LE-NEXT: ldr d0, [x0, #8]! +; CHECK-LE-NEXT: str d0, [x1, #8]! +; CHECK-LE-NEXT: ret +; +; CHECK-BE-LABEL: v4f16_preidx_same_size: +; CHECK-BE: // %bb.0: // %entry +; CHECK-BE-NEXT: add x0, x0, #8 +; CHECK-BE-NEXT: add x1, x1, #8 +; CHECK-BE-NEXT: ld1 { v0.4h }, [x0] +; CHECK-BE-NEXT: st1 { v0.4h }, [x1] +; CHECK-BE-NEXT: ret +entry: + %add.ldptr = getelementptr inbounds nuw i8, ptr %ldptr, i64 8 + %add.stptr = getelementptr inbounds nuw i8, ptr %stptr, i64 8 + %val = load <4 x half>, ptr %add.ldptr, align 2 + store <4 x half> %val, ptr %add.stptr, align 2 + %ret1 = insertvalue [2 x ptr] poison, ptr %add.ldptr, 0 + %ret2 = insertvalue [2 x ptr] %ret1, ptr %add.stptr, 1 + ret [2 x ptr] %ret2 +} + +define [2 x ptr] @v4f16_postidx_different_size(ptr %ldptr, ptr %stptr) { +; CHECK-LE-LABEL: v4f16_postidx_different_size: +; CHECK-LE: // %bb.0: // %entry +; CHECK-LE-NEXT: ldr d0, [x0], #16 +; CHECK-LE-NEXT: str d0, [x1], #16 +; CHECK-LE-NEXT: ret +; +; CHECK-BE-LABEL: v4f16_postidx_different_size: +; CHECK-BE: // %bb.0: // %entry +; CHECK-BE-NEXT: ld1 { v0.4h }, [x0] +; CHECK-BE-NEXT: mov x8, x1 +; CHECK-BE-NEXT: add x0, x0, #16 +; CHECK-BE-NEXT: add x1, x1, #16 +; CHECK-BE-NEXT: st1 { v0.4h }, [x8] +; CHECK-BE-NEXT: ret +entry: + %val = load <4 x half>, ptr %ldptr, align 2 + store <4 x half> %val, ptr %stptr, align 2 + %add.ldptr = getelementptr inbounds nuw i8, ptr %ldptr, i64 16 + %add.stptr = getelementptr inbounds nuw i8, ptr %stptr, i64 16 + %ret1 = insertvalue [2 x ptr] poison, ptr %add.ldptr, 0 + %ret2 = insertvalue [2 x ptr] %ret1, ptr %add.stptr, 1 + ret [2 x ptr] %ret2 +} + +define [2 x ptr] @v4f16_preidx_different_size(ptr %ldptr, ptr %stptr) { +; CHECK-LE-LABEL: v4f16_preidx_different_size: +; CHECK-LE: // %bb.0: // %entry +; CHECK-LE-NEXT: ldr d0, [x0, #16]! +; CHECK-LE-NEXT: str d0, [x1, #16]! +; CHECK-LE-NEXT: ret +; +; CHECK-BE-LABEL: v4f16_preidx_different_size: +; CHECK-BE: // %bb.0: // %entry +; CHECK-BE-NEXT: add x0, x0, #16 +; CHECK-BE-NEXT: add x1, x1, #16 +; CHECK-BE-NEXT: ld1 { v0.4h }, [x0] +; CHECK-BE-NEXT: st1 { v0.4h }, [x1] +; CHECK-BE-NEXT: ret +entry: + %add.ldptr = getelementptr inbounds nuw i8, ptr %ldptr, i64 16 + %add.stptr = getelementptr inbounds nuw i8, ptr %stptr, i64 16 + %val = load <4 x half>, ptr %add.ldptr, align 2 + store <4 x half> %val, ptr %add.stptr, align 2 + %ret1 = insertvalue [2 x ptr] poison, ptr %add.ldptr, 0 + %ret2 = insertvalue [2 x ptr] %ret1, ptr %add.stptr, 1 + ret [2 x ptr] %ret2 +} + +define [2 x ptr] @v4f16_postidx_reg(ptr %ldptr, ptr %stptr, i64 %off) { +; CHECK-LE-LABEL: v4f16_postidx_reg: +; CHECK-LE: // %bb.0: // %entry +; CHECK-LE-NEXT: ldr d0, [x0] +; CHECK-LE-NEXT: mov x8, x1 +; CHECK-LE-NEXT: add x0, x0, x2 +; CHECK-LE-NEXT: add x1, x1, x2 +; CHECK-LE-NEXT: str d0, [x8] +; CHECK-LE-NEXT: ret +; +; CHECK-BE-LABEL: v4f16_postidx_reg: +; CHECK-BE: // %bb.0: // %entry +; CHECK-BE-NEXT: ld1 { v0.4h }, [x0] +; CHECK-BE-NEXT: mov x8, x1 +; CHECK-BE-NEXT: add x0, x0, x2 +; CHECK-BE-NEXT: add x1, x1, x2 +; CHECK-BE-NEXT: st1 { v0.4h }, [x8] +; CHECK-BE-NEXT: ret +entry: + %val = load <4 x half>, ptr %ldptr, align 2 + store <4 x half> %val, ptr %stptr, align 2 + %add.ldptr = getelementptr inbounds nuw i8, ptr %ldptr, i64 %off + %add.stptr = getelementptr inbounds nuw i8, ptr %stptr, i64 %off + %ret1 = insertvalue [2 x ptr] poison, ptr %add.ldptr, 0 + %ret2 = insertvalue [2 x ptr] %ret1, ptr %add.stptr, 1 + ret [2 x ptr] %ret2 +} + +define [2 x ptr] @v4f16_preidx_reg(ptr %ldptr, ptr %stptr, i64 %off) { +; CHECK-LE-LABEL: v4f16_preidx_reg: +; CHECK-LE: // %bb.0: // %entry +; CHECK-LE-NEXT: mov x8, x1 +; CHECK-LE-NEXT: ldr d0, [x0, x2] +; CHECK-LE-NEXT: add x0, x0, x2 +; CHECK-LE-NEXT: add x1, x1, x2 +; CHECK-LE-NEXT: str d0, [x8, x2] +; CHECK-LE-NEXT: ret +; +; CHECK-BE-LABEL: v4f16_preidx_reg: +; CHECK-BE: // %bb.0: // %entry +; CHECK-BE-NEXT: add x0, x0, x2 +; CHECK-BE-NEXT: add x1, x1, x2 +; CHECK-BE-NEXT: ld1 { v0.4h }, [x0] +; CHECK-BE-NEXT: st1 { v0.4h }, [x1] +; CHECK-BE-NEXT: ret +entry: + %add.ldptr = getelementptr inbounds nuw i8, ptr %ldptr, i64 %off + %add.stptr = getelementptr inbounds nuw i8, ptr %stptr, i64 %off + %val = load <4 x half>, ptr %add.ldptr, align 2 + store <4 x half> %val, ptr %add.stptr, align 2 + %ret1 = insertvalue [2 x ptr] poison, ptr %add.ldptr, 0 + %ret2 = insertvalue [2 x ptr] %ret1, ptr %add.stptr, 1 + ret [2 x ptr] %ret2 +} + +define [2 x ptr] @v2f32_postidx_same_size(ptr %ldptr, ptr %stptr) { +; CHECK-LE-LABEL: v2f32_postidx_same_size: +; CHECK-LE: // %bb.0: // %entry +; CHECK-LE-NEXT: ldr d0, [x0], #8 +; CHECK-LE-NEXT: str d0, [x1], #8 +; CHECK-LE-NEXT: ret +; +; CHECK-BE-LABEL: v2f32_postidx_same_size: +; CHECK-BE: // %bb.0: // %entry +; CHECK-BE-NEXT: ld1 { v0.2s }, [x0], #8 +; CHECK-BE-NEXT: st1 { v0.2s }, [x1], #8 +; CHECK-BE-NEXT: ret +entry: + %val = load <2 x float>, ptr %ldptr, align 2 + store <2 x float> %val, ptr %stptr, align 2 + %add.ldptr = getelementptr inbounds nuw i8, ptr %ldptr, i64 8 + %add.stptr = getelementptr inbounds nuw i8, ptr %stptr, i64 8 + %ret1 = insertvalue [2 x ptr] poison, ptr %add.ldptr, 0 + %ret2 = insertvalue [2 x ptr] %ret1, ptr %add.stptr, 1 + ret [2 x ptr] %ret2 +} + +define [2 x ptr] @v2f32_preidx_same_size(ptr %ldptr, ptr %stptr) { +; CHECK-LE-LABEL: v2f32_preidx_same_size: +; CHECK-LE: // %bb.0: // %entry +; CHECK-LE-NEXT: ldr d0, [x0, #8]! +; CHECK-LE-NEXT: str d0, [x1, #8]! +; CHECK-LE-NEXT: ret +; +; CHECK-BE-LABEL: v2f32_preidx_same_size: +; CHECK-BE: // %bb.0: // %entry +; CHECK-BE-NEXT: add x0, x0, #8 +; CHECK-BE-NEXT: add x1, x1, #8 +; CHECK-BE-NEXT: ld1 { v0.2s }, [x0] +; CHECK-BE-NEXT: st1 { v0.2s }, [x1] +; CHECK-BE-NEXT: ret +entry: + %add.ldptr = getelementptr inbounds nuw i8, ptr %ldptr, i64 8 + %add.stptr = getelementptr inbounds nuw i8, ptr %stptr, i64 8 + %val = load <2 x float>, ptr %add.ldptr, align 2 + store <2 x float> %val, ptr %add.stptr, align 2 + %ret1 = insertvalue [2 x ptr] poison, ptr %add.ldptr, 0 + %ret2 = insertvalue [2 x ptr] %ret1, ptr %add.stptr, 1 + ret [2 x ptr] %ret2 +} + +define [2 x ptr] @v2f32_postidx_different_size(ptr %ldptr, ptr %stptr) { +; CHECK-LE-LABEL: v2f32_postidx_different_size: +; CHECK-LE: // %bb.0: // %entry +; CHECK-LE-NEXT: ldr d0, [x0], #16 +; CHECK-LE-NEXT: str d0, [x1], #16 +; CHECK-LE-NEXT: ret +; +; CHECK-BE-LABEL: v2f32_postidx_different_size: +; CHECK-BE: // %bb.0: // %entry +; CHECK-BE-NEXT: ld1 { v0.2s }, [x0] +; CHECK-BE-NEXT: mov x8, x1 +; CHECK-BE-NEXT: add x0, x0, #16 +; CHECK-BE-NEXT: add x1, x1, #16 +; CHECK-BE-NEXT: st1 { v0.2s }, [x8] +; CHECK-BE-NEXT: ret +entry: + %val = load <2 x float>, ptr %ldptr, align 2 + store <2 x float> %val, ptr %stptr, align 2 + %add.ldptr = getelementptr inbounds nuw i8, ptr %ldptr, i64 16 + %add.stptr = getelementptr inbounds nuw i8, ptr %stptr, i64 16 + %ret1 = insertvalue [2 x ptr] poison, ptr %add.ldptr, 0 + %ret2 = insertvalue [2 x ptr] %ret1, ptr %add.stptr, 1 + ret [2 x ptr] %ret2 +} + +define [2 x ptr] @v2f32_preidx_different_size(ptr %ldptr, ptr %stptr) { +; CHECK-LE-LABEL: v2f32_preidx_different_size: +; CHECK-LE: // %bb.0: // %entry +; CHECK-LE-NEXT: ldr d0, [x0, #16]! +; CHECK-LE-NEXT: str d0, [x1, #16]! +; CHECK-LE-NEXT: ret +; +; CHECK-BE-LABEL: v2f32_preidx_different_size: +; CHECK-BE: // %bb.0: // %entry +; CHECK-BE-NEXT: add x0, x0, #16 +; CHECK-BE-NEXT: add x1, x1, #16 +; CHECK-BE-NEXT: ld1 { v0.2s }, [x0] +; CHECK-BE-NEXT: st1 { v0.2s }, [x1] +; CHECK-BE-NEXT: ret +entry: + %add.ldptr = getelementptr inbounds nuw i8, ptr %ldptr, i64 16 + %add.stptr = getelementptr inbounds nuw i8, ptr %stptr, i64 16 + %val = load <2 x float>, ptr %add.ldptr, align 2 + store <2 x float> %val, ptr %add.stptr, align 2 + %ret1 = insertvalue [2 x ptr] poison, ptr %add.ldptr, 0 + %ret2 = insertvalue [2 x ptr] %ret1, ptr %add.stptr, 1 + ret [2 x ptr] %ret2 +} + +define [2 x ptr] @v2f32_postidx_reg(ptr %ldptr, ptr %stptr, i64 %off) { +; CHECK-LE-LABEL: v2f32_postidx_reg: +; CHECK-LE: // %bb.0: // %entry +; CHECK-LE-NEXT: ldr d0, [x0] +; CHECK-LE-NEXT: mov x8, x1 +; CHECK-LE-NEXT: add x0, x0, x2 +; CHECK-LE-NEXT: add x1, x1, x2 +; CHECK-LE-NEXT: str d0, [x8] +; CHECK-LE-NEXT: ret +; +; CHECK-BE-LABEL: v2f32_postidx_reg: +; CHECK-BE: // %bb.0: // %entry +; CHECK-BE-NEXT: ld1 { v0.2s }, [x0] +; CHECK-BE-NEXT: mov x8, x1 +; CHECK-BE-NEXT: add x0, x0, x2 +; CHECK-BE-NEXT: add x1, x1, x2 +; CHECK-BE-NEXT: st1 { v0.2s }, [x8] +; CHECK-BE-NEXT: ret +entry: + %val = load <2 x float>, ptr %ldptr, align 2 + store <2 x float> %val, ptr %stptr, align 2 + %add.ldptr = getelementptr inbounds nuw i8, ptr %ldptr, i64 %off + %add.stptr = getelementptr inbounds nuw i8, ptr %stptr, i64 %off + %ret1 = insertvalue [2 x ptr] poison, ptr %add.ldptr, 0 + %ret2 = insertvalue [2 x ptr] %ret1, ptr %add.stptr, 1 + ret [2 x ptr] %ret2 +} + +define [2 x ptr] @v2f32_preidx_reg(ptr %ldptr, ptr %stptr, i64 %off) { +; CHECK-LE-LABEL: v2f32_preidx_reg: +; CHECK-LE: // %bb.0: // %entry +; CHECK-LE-NEXT: mov x8, x1 +; CHECK-LE-NEXT: ldr d0, [x0, x2] +; CHECK-LE-NEXT: add x0, x0, x2 +; CHECK-LE-NEXT: add x1, x1, x2 +; CHECK-LE-NEXT: str d0, [x8, x2] +; CHECK-LE-NEXT: ret +; +; CHECK-BE-LABEL: v2f32_preidx_reg: +; CHECK-BE: // %bb.0: // %entry +; CHECK-BE-NEXT: add x0, x0, x2 +; CHECK-BE-NEXT: add x1, x1, x2 +; CHECK-BE-NEXT: ld1 { v0.2s }, [x0] +; CHECK-BE-NEXT: st1 { v0.2s }, [x1] +; CHECK-BE-NEXT: ret +entry: + %add.ldptr = getelementptr inbounds nuw i8, ptr %ldptr, i64 %off + %add.stptr = getelementptr inbounds nuw i8, ptr %stptr, i64 %off + %val = load <2 x float>, ptr %add.ldptr, align 2 + store <2 x float> %val, ptr %add.stptr, align 2 + %ret1 = insertvalue [2 x ptr] poison, ptr %add.ldptr, 0 + %ret2 = insertvalue [2 x ptr] %ret1, ptr %add.stptr, 1 + ret [2 x ptr] %ret2 +} + +define [2 x ptr] @v1f64_postidx_same_size(ptr %ldptr, ptr %stptr) { +; CHECK-LE-LABEL: v1f64_postidx_same_size: +; CHECK-LE: // %bb.0: // %entry +; CHECK-LE-NEXT: ldr d0, [x0], #8 +; CHECK-LE-NEXT: str d0, [x1], #8 +; CHECK-LE-NEXT: ret +; +; CHECK-BE-LABEL: v1f64_postidx_same_size: +; CHECK-BE: // %bb.0: // %entry +; CHECK-BE-NEXT: ld1 { v0.1d }, [x0], #8 +; CHECK-BE-NEXT: st1 { v0.1d }, [x1], #8 +; CHECK-BE-NEXT: ret +entry: + %val = load <1 x double>, ptr %ldptr, align 2 + store <1 x double> %val, ptr %stptr, align 2 + %add.ldptr = getelementptr inbounds nuw i8, ptr %ldptr, i64 8 + %add.stptr = getelementptr inbounds nuw i8, ptr %stptr, i64 8 + %ret1 = insertvalue [2 x ptr] poison, ptr %add.ldptr, 0 + %ret2 = insertvalue [2 x ptr] %ret1, ptr %add.stptr, 1 + ret [2 x ptr] %ret2 +} + +define [2 x ptr] @v1f64_preidx_same_size(ptr %ldptr, ptr %stptr) { +; CHECK-LE-LABEL: v1f64_preidx_same_size: +; CHECK-LE: // %bb.0: // %entry +; CHECK-LE-NEXT: ldr d0, [x0, #8]! +; CHECK-LE-NEXT: str d0, [x1, #8]! +; CHECK-LE-NEXT: ret +; +; CHECK-BE-LABEL: v1f64_preidx_same_size: +; CHECK-BE: // %bb.0: // %entry +; CHECK-BE-NEXT: ldr d0, [x0, #8]! +; CHECK-BE-NEXT: mov x8, x1 +; CHECK-BE-NEXT: add x1, x1, #8 +; CHECK-BE-NEXT: str d0, [x8, #8] +; CHECK-BE-NEXT: ret +entry: + %add.ldptr = getelementptr inbounds nuw i8, ptr %ldptr, i64 8 + %add.stptr = getelementptr inbounds nuw i8, ptr %stptr, i64 8 + %val = load <1 x double>, ptr %add.ldptr, align 2 + store <1 x double> %val, ptr %add.stptr, align 2 + %ret1 = insertvalue [2 x ptr] poison, ptr %add.ldptr, 0 + %ret2 = insertvalue [2 x ptr] %ret1, ptr %add.stptr, 1 + ret [2 x ptr] %ret2 +} + +define [2 x ptr] @v1f64_postidx_different_size(ptr %ldptr, ptr %stptr) { +; CHECK-LE-LABEL: v1f64_postidx_different_size: +; CHECK-LE: // %bb.0: // %entry +; CHECK-LE-NEXT: ldr d0, [x0], #16 +; CHECK-LE-NEXT: str d0, [x1], #16 +; CHECK-LE-NEXT: ret +; +; CHECK-BE-LABEL: v1f64_postidx_different_size: +; CHECK-BE: // %bb.0: // %entry +; CHECK-BE-NEXT: ldr d0, [x0], #16 +; CHECK-BE-NEXT: mov x8, x1 +; CHECK-BE-NEXT: add x1, x1, #16 +; CHECK-BE-NEXT: str d0, [x8] +; CHECK-BE-NEXT: ret +entry: + %val = load <1 x double>, ptr %ldptr, align 2 + store <1 x double> %val, ptr %stptr, align 2 + %add.ldptr = getelementptr inbounds nuw i8, ptr %ldptr, i64 16 + %add.stptr = getelementptr inbounds nuw i8, ptr %stptr, i64 16 + %ret1 = insertvalue [2 x ptr] poison, ptr %add.ldptr, 0 + %ret2 = insertvalue [2 x ptr] %ret1, ptr %add.stptr, 1 + ret [2 x ptr] %ret2 +} + +define [2 x ptr] @v1f64_preidx_different_size(ptr %ldptr, ptr %stptr) { +; CHECK-LE-LABEL: v1f64_preidx_different_size: +; CHECK-LE: // %bb.0: // %entry +; CHECK-LE-NEXT: ldr d0, [x0, #16]! +; CHECK-LE-NEXT: str d0, [x1, #16]! +; CHECK-LE-NEXT: ret +; +; CHECK-BE-LABEL: v1f64_preidx_different_size: +; CHECK-BE: // %bb.0: // %entry +; CHECK-BE-NEXT: ldr d0, [x0, #16]! +; CHECK-BE-NEXT: mov x8, x1 +; CHECK-BE-NEXT: add x1, x1, #16 +; CHECK-BE-NEXT: str d0, [x8, #16] +; CHECK-BE-NEXT: ret +entry: + %add.ldptr = getelementptr inbounds nuw i8, ptr %ldptr, i64 16 + %add.stptr = getelementptr inbounds nuw i8, ptr %stptr, i64 16 + %val = load <1 x double>, ptr %add.ldptr, align 2 + store <1 x double> %val, ptr %add.stptr, align 2 + %ret1 = insertvalue [2 x ptr] poison, ptr %add.ldptr, 0 + %ret2 = insertvalue [2 x ptr] %ret1, ptr %add.stptr, 1 + ret [2 x ptr] %ret2 +} + +define [2 x ptr] @v1f64_postidx_reg(ptr %ldptr, ptr %stptr, i64 %off) { +; CHECK-LE-LABEL: v1f64_postidx_reg: +; CHECK-LE: // %bb.0: // %entry +; CHECK-LE-NEXT: ldr d0, [x0] +; CHECK-LE-NEXT: mov x8, x1 +; CHECK-LE-NEXT: add x0, x0, x2 +; CHECK-LE-NEXT: add x1, x1, x2 +; CHECK-LE-NEXT: str d0, [x8] +; CHECK-LE-NEXT: ret +; +; CHECK-BE-LABEL: v1f64_postidx_reg: +; CHECK-BE: // %bb.0: // %entry +; CHECK-BE-NEXT: ldr d0, [x0] +; CHECK-BE-NEXT: mov x8, x1 +; CHECK-BE-NEXT: add x0, x0, x2 +; CHECK-BE-NEXT: add x1, x1, x2 +; CHECK-BE-NEXT: str d0, [x8] +; CHECK-BE-NEXT: ret +entry: + %val = load <1 x double>, ptr %ldptr, align 2 + store <1 x double> %val, ptr %stptr, align 2 + %add.ldptr = getelementptr inbounds nuw i8, ptr %ldptr, i64 %off + %add.stptr = getelementptr inbounds nuw i8, ptr %stptr, i64 %off + %ret1 = insertvalue [2 x ptr] poison, ptr %add.ldptr, 0 + %ret2 = insertvalue [2 x ptr] %ret1, ptr %add.stptr, 1 + ret [2 x ptr] %ret2 +} + +define [2 x ptr] @v1f64_preidx_reg(ptr %ldptr, ptr %stptr, i64 %off) { +; CHECK-LE-LABEL: v1f64_preidx_reg: +; CHECK-LE: // %bb.0: // %entry +; CHECK-LE-NEXT: mov x8, x1 +; CHECK-LE-NEXT: ldr d0, [x0, x2] +; CHECK-LE-NEXT: add x0, x0, x2 +; CHECK-LE-NEXT: add x1, x1, x2 +; CHECK-LE-NEXT: str d0, [x8, x2] +; CHECK-LE-NEXT: ret +; +; CHECK-BE-LABEL: v1f64_preidx_reg: +; CHECK-BE: // %bb.0: // %entry +; CHECK-BE-NEXT: mov x8, x1 +; CHECK-BE-NEXT: ldr d0, [x0, x2] +; CHECK-BE-NEXT: add x0, x0, x2 +; CHECK-BE-NEXT: add x1, x1, x2 +; CHECK-BE-NEXT: str d0, [x8, x2] +; CHECK-BE-NEXT: ret +entry: + %add.ldptr = getelementptr inbounds nuw i8, ptr %ldptr, i64 %off + %add.stptr = getelementptr inbounds nuw i8, ptr %stptr, i64 %off + %val = load <1 x double>, ptr %add.ldptr, align 2 + store <1 x double> %val, ptr %add.stptr, align 2 + %ret1 = insertvalue [2 x ptr] poison, ptr %add.ldptr, 0 + %ret2 = insertvalue [2 x ptr] %ret1, ptr %add.stptr, 1 + ret [2 x ptr] %ret2 +} + +define [2 x ptr] @v16i8_postidx_same_size(ptr %ldptr, ptr %stptr) { +; CHECK-LE-LABEL: v16i8_postidx_same_size: +; CHECK-LE: // %bb.0: // %entry +; CHECK-LE-NEXT: ldr q0, [x0], #16 +; CHECK-LE-NEXT: str q0, [x1], #16 +; CHECK-LE-NEXT: ret +; +; CHECK-BE-LABEL: v16i8_postidx_same_size: +; CHECK-BE: // %bb.0: // %entry +; CHECK-BE-NEXT: ld1 { v0.16b }, [x0], #16 +; CHECK-BE-NEXT: st1 { v0.16b }, [x1], #16 +; CHECK-BE-NEXT: ret +entry: + %val = load <16 x i8>, ptr %ldptr, align 2 + store <16 x i8> %val, ptr %stptr, align 2 + %add.ldptr = getelementptr inbounds nuw i8, ptr %ldptr, i64 16 + %add.stptr = getelementptr inbounds nuw i8, ptr %stptr, i64 16 + %ret1 = insertvalue [2 x ptr] poison, ptr %add.ldptr, 0 + %ret2 = insertvalue [2 x ptr] %ret1, ptr %add.stptr, 1 + ret [2 x ptr] %ret2 +} + +define [2 x ptr] @v16i8_preidx_same_size(ptr %ldptr, ptr %stptr) { +; CHECK-LE-LABEL: v16i8_preidx_same_size: +; CHECK-LE: // %bb.0: // %entry +; CHECK-LE-NEXT: ldr q0, [x0, #16]! +; CHECK-LE-NEXT: str q0, [x1, #16]! +; CHECK-LE-NEXT: ret +; +; CHECK-BE-LABEL: v16i8_preidx_same_size: +; CHECK-BE: // %bb.0: // %entry +; CHECK-BE-NEXT: add x0, x0, #16 +; CHECK-BE-NEXT: add x1, x1, #16 +; CHECK-BE-NEXT: ld1 { v0.16b }, [x0] +; CHECK-BE-NEXT: st1 { v0.16b }, [x1] +; CHECK-BE-NEXT: ret +entry: + %add.ldptr = getelementptr inbounds nuw i8, ptr %ldptr, i64 16 + %add.stptr = getelementptr inbounds nuw i8, ptr %stptr, i64 16 + %val = load <16 x i8>, ptr %add.ldptr, align 2 + store <16 x i8> %val, ptr %add.stptr, align 2 + %ret1 = insertvalue [2 x ptr] poison, ptr %add.ldptr, 0 + %ret2 = insertvalue [2 x ptr] %ret1, ptr %add.stptr, 1 + ret [2 x ptr] %ret2 +} + +define [2 x ptr] @v16i8_postidx_different_size(ptr %ldptr, ptr %stptr) { +; CHECK-LE-LABEL: v16i8_postidx_different_size: +; CHECK-LE: // %bb.0: // %entry +; CHECK-LE-NEXT: ldr q0, [x0], #32 +; CHECK-LE-NEXT: str q0, [x1], #32 +; CHECK-LE-NEXT: ret +; +; CHECK-BE-LABEL: v16i8_postidx_different_size: +; CHECK-BE: // %bb.0: // %entry +; CHECK-BE-NEXT: ld1 { v0.16b }, [x0] +; CHECK-BE-NEXT: mov x8, x1 +; CHECK-BE-NEXT: add x0, x0, #32 +; CHECK-BE-NEXT: add x1, x1, #32 +; CHECK-BE-NEXT: st1 { v0.16b }, [x8] +; CHECK-BE-NEXT: ret +entry: + %val = load <16 x i8>, ptr %ldptr, align 2 + store <16 x i8> %val, ptr %stptr, align 2 + %add.ldptr = getelementptr inbounds nuw i8, ptr %ldptr, i64 32 + %add.stptr = getelementptr inbounds nuw i8, ptr %stptr, i64 32 + %ret1 = insertvalue [2 x ptr] poison, ptr %add.ldptr, 0 + %ret2 = insertvalue [2 x ptr] %ret1, ptr %add.stptr, 1 + ret [2 x ptr] %ret2 +} + +define [2 x ptr] @v16i8_preidx_different_size(ptr %ldptr, ptr %stptr) { +; CHECK-LE-LABEL: v16i8_preidx_different_size: +; CHECK-LE: // %bb.0: // %entry +; CHECK-LE-NEXT: ldr q0, [x0, #32]! +; CHECK-LE-NEXT: str q0, [x1, #32]! +; CHECK-LE-NEXT: ret +; +; CHECK-BE-LABEL: v16i8_preidx_different_size: +; CHECK-BE: // %bb.0: // %entry +; CHECK-BE-NEXT: add x0, x0, #32 +; CHECK-BE-NEXT: add x1, x1, #32 +; CHECK-BE-NEXT: ld1 { v0.16b }, [x0] +; CHECK-BE-NEXT: st1 { v0.16b }, [x1] +; CHECK-BE-NEXT: ret +entry: + %add.ldptr = getelementptr inbounds nuw i8, ptr %ldptr, i64 32 + %add.stptr = getelementptr inbounds nuw i8, ptr %stptr, i64 32 + %val = load <16 x i8>, ptr %add.ldptr, align 2 + store <16 x i8> %val, ptr %add.stptr, align 2 + %ret1 = insertvalue [2 x ptr] poison, ptr %add.ldptr, 0 + %ret2 = insertvalue [2 x ptr] %ret1, ptr %add.stptr, 1 + ret [2 x ptr] %ret2 +} + +define [2 x ptr] @v16i8_postidx_reg(ptr %ldptr, ptr %stptr, i64 %off) { +; CHECK-LE-LABEL: v16i8_postidx_reg: +; CHECK-LE: // %bb.0: // %entry +; CHECK-LE-NEXT: ldr q0, [x0] +; CHECK-LE-NEXT: mov x8, x1 +; CHECK-LE-NEXT: add x0, x0, x2 +; CHECK-LE-NEXT: add x1, x1, x2 +; CHECK-LE-NEXT: str q0, [x8] +; CHECK-LE-NEXT: ret +; +; CHECK-BE-LABEL: v16i8_postidx_reg: +; CHECK-BE: // %bb.0: // %entry +; CHECK-BE-NEXT: ld1 { v0.16b }, [x0] +; CHECK-BE-NEXT: mov x8, x1 +; CHECK-BE-NEXT: add x0, x0, x2 +; CHECK-BE-NEXT: add x1, x1, x2 +; CHECK-BE-NEXT: st1 { v0.16b }, [x8] +; CHECK-BE-NEXT: ret +entry: + %val = load <16 x i8>, ptr %ldptr, align 2 + store <16 x i8> %val, ptr %stptr, align 2 + %add.ldptr = getelementptr inbounds nuw i8, ptr %ldptr, i64 %off + %add.stptr = getelementptr inbounds nuw i8, ptr %stptr, i64 %off + %ret1 = insertvalue [2 x ptr] poison, ptr %add.ldptr, 0 + %ret2 = insertvalue [2 x ptr] %ret1, ptr %add.stptr, 1 + ret [2 x ptr] %ret2 +} + +define [2 x ptr] @v16i8_preidx_reg(ptr %ldptr, ptr %stptr, i64 %off) { +; CHECK-LE-LABEL: v16i8_preidx_reg: +; CHECK-LE: // %bb.0: // %entry +; CHECK-LE-NEXT: mov x8, x1 +; CHECK-LE-NEXT: ldr q0, [x0, x2] +; CHECK-LE-NEXT: add x0, x0, x2 +; CHECK-LE-NEXT: add x1, x1, x2 +; CHECK-LE-NEXT: str q0, [x8, x2] +; CHECK-LE-NEXT: ret +; +; CHECK-BE-LABEL: v16i8_preidx_reg: +; CHECK-BE: // %bb.0: // %entry +; CHECK-BE-NEXT: add x0, x0, x2 +; CHECK-BE-NEXT: add x1, x1, x2 +; CHECK-BE-NEXT: ld1 { v0.16b }, [x0] +; CHECK-BE-NEXT: st1 { v0.16b }, [x1] +; CHECK-BE-NEXT: ret +entry: + %add.ldptr = getelementptr inbounds nuw i8, ptr %ldptr, i64 %off + %add.stptr = getelementptr inbounds nuw i8, ptr %stptr, i64 %off + %val = load <16 x i8>, ptr %add.ldptr, align 2 + store <16 x i8> %val, ptr %add.stptr, align 2 + %ret1 = insertvalue [2 x ptr] poison, ptr %add.ldptr, 0 + %ret2 = insertvalue [2 x ptr] %ret1, ptr %add.stptr, 1 + ret [2 x ptr] %ret2 +} + +define [2 x ptr] @v8i16_postidx_same_size(ptr %ldptr, ptr %stptr) { +; CHECK-LE-LABEL: v8i16_postidx_same_size: +; CHECK-LE: // %bb.0: // %entry +; CHECK-LE-NEXT: ldr q0, [x0], #16 +; CHECK-LE-NEXT: str q0, [x1], #16 +; CHECK-LE-NEXT: ret +; +; CHECK-BE-LABEL: v8i16_postidx_same_size: +; CHECK-BE: // %bb.0: // %entry +; CHECK-BE-NEXT: ld1 { v0.8h }, [x0], #16 +; CHECK-BE-NEXT: st1 { v0.8h }, [x1], #16 +; CHECK-BE-NEXT: ret +entry: + %val = load <8 x i16>, ptr %ldptr, align 2 + store <8 x i16> %val, ptr %stptr, align 2 + %add.ldptr = getelementptr inbounds nuw i8, ptr %ldptr, i64 16 + %add.stptr = getelementptr inbounds nuw i8, ptr %stptr, i64 16 + %ret1 = insertvalue [2 x ptr] poison, ptr %add.ldptr, 0 + %ret2 = insertvalue [2 x ptr] %ret1, ptr %add.stptr, 1 + ret [2 x ptr] %ret2 +} + +define [2 x ptr] @v8i16_preidx_same_size(ptr %ldptr, ptr %stptr) { +; CHECK-LE-LABEL: v8i16_preidx_same_size: +; CHECK-LE: // %bb.0: // %entry +; CHECK-LE-NEXT: ldr q0, [x0, #16]! +; CHECK-LE-NEXT: str q0, [x1, #16]! +; CHECK-LE-NEXT: ret +; +; CHECK-BE-LABEL: v8i16_preidx_same_size: +; CHECK-BE: // %bb.0: // %entry +; CHECK-BE-NEXT: add x0, x0, #16 +; CHECK-BE-NEXT: add x1, x1, #16 +; CHECK-BE-NEXT: ld1 { v0.8h }, [x0] +; CHECK-BE-NEXT: st1 { v0.8h }, [x1] +; CHECK-BE-NEXT: ret +entry: + %add.ldptr = getelementptr inbounds nuw i8, ptr %ldptr, i64 16 + %add.stptr = getelementptr inbounds nuw i8, ptr %stptr, i64 16 + %val = load <8 x i16>, ptr %add.ldptr, align 2 + store <8 x i16> %val, ptr %add.stptr, align 2 + %ret1 = insertvalue [2 x ptr] poison, ptr %add.ldptr, 0 + %ret2 = insertvalue [2 x ptr] %ret1, ptr %add.stptr, 1 + ret [2 x ptr] %ret2 +} + +define [2 x ptr] @v8i16_postidx_different_size(ptr %ldptr, ptr %stptr) { +; CHECK-LE-LABEL: v8i16_postidx_different_size: +; CHECK-LE: // %bb.0: // %entry +; CHECK-LE-NEXT: ldr q0, [x0], #32 +; CHECK-LE-NEXT: str q0, [x1], #32 +; CHECK-LE-NEXT: ret +; +; CHECK-BE-LABEL: v8i16_postidx_different_size: +; CHECK-BE: // %bb.0: // %entry +; CHECK-BE-NEXT: ld1 { v0.8h }, [x0] +; CHECK-BE-NEXT: mov x8, x1 +; CHECK-BE-NEXT: add x0, x0, #32 +; CHECK-BE-NEXT: add x1, x1, #32 +; CHECK-BE-NEXT: st1 { v0.8h }, [x8] +; CHECK-BE-NEXT: ret +entry: + %val = load <8 x i16>, ptr %ldptr, align 2 + store <8 x i16> %val, ptr %stptr, align 2 + %add.ldptr = getelementptr inbounds nuw i8, ptr %ldptr, i64 32 + %add.stptr = getelementptr inbounds nuw i8, ptr %stptr, i64 32 + %ret1 = insertvalue [2 x ptr] poison, ptr %add.ldptr, 0 + %ret2 = insertvalue [2 x ptr] %ret1, ptr %add.stptr, 1 + ret [2 x ptr] %ret2 +} + +define [2 x ptr] @v8i16_preidx_different_size(ptr %ldptr, ptr %stptr) { +; CHECK-LE-LABEL: v8i16_preidx_different_size: +; CHECK-LE: // %bb.0: // %entry +; CHECK-LE-NEXT: ldr q0, [x0, #32]! +; CHECK-LE-NEXT: str q0, [x1, #32]! +; CHECK-LE-NEXT: ret +; +; CHECK-BE-LABEL: v8i16_preidx_different_size: +; CHECK-BE: // %bb.0: // %entry +; CHECK-BE-NEXT: add x0, x0, #32 +; CHECK-BE-NEXT: add x1, x1, #32 +; CHECK-BE-NEXT: ld1 { v0.8h }, [x0] +; CHECK-BE-NEXT: st1 { v0.8h }, [x1] +; CHECK-BE-NEXT: ret +entry: + %add.ldptr = getelementptr inbounds nuw i8, ptr %ldptr, i64 32 + %add.stptr = getelementptr inbounds nuw i8, ptr %stptr, i64 32 + %val = load <8 x i16>, ptr %add.ldptr, align 2 + store <8 x i16> %val, ptr %add.stptr, align 2 + %ret1 = insertvalue [2 x ptr] poison, ptr %add.ldptr, 0 + %ret2 = insertvalue [2 x ptr] %ret1, ptr %add.stptr, 1 + ret [2 x ptr] %ret2 +} + +define [2 x ptr] @v8i16_postidx_reg(ptr %ldptr, ptr %stptr, i64 %off) { +; CHECK-LE-LABEL: v8i16_postidx_reg: +; CHECK-LE: // %bb.0: // %entry +; CHECK-LE-NEXT: ldr q0, [x0] +; CHECK-LE-NEXT: mov x8, x1 +; CHECK-LE-NEXT: add x0, x0, x2 +; CHECK-LE-NEXT: add x1, x1, x2 +; CHECK-LE-NEXT: str q0, [x8] +; CHECK-LE-NEXT: ret +; +; CHECK-BE-LABEL: v8i16_postidx_reg: +; CHECK-BE: // %bb.0: // %entry +; CHECK-BE-NEXT: ld1 { v0.8h }, [x0] +; CHECK-BE-NEXT: mov x8, x1 +; CHECK-BE-NEXT: add x0, x0, x2 +; CHECK-BE-NEXT: add x1, x1, x2 +; CHECK-BE-NEXT: st1 { v0.8h }, [x8] +; CHECK-BE-NEXT: ret +entry: + %val = load <8 x i16>, ptr %ldptr, align 2 + store <8 x i16> %val, ptr %stptr, align 2 + %add.ldptr = getelementptr inbounds nuw i8, ptr %ldptr, i64 %off + %add.stptr = getelementptr inbounds nuw i8, ptr %stptr, i64 %off + %ret1 = insertvalue [2 x ptr] poison, ptr %add.ldptr, 0 + %ret2 = insertvalue [2 x ptr] %ret1, ptr %add.stptr, 1 + ret [2 x ptr] %ret2 +} + +define [2 x ptr] @v8i16_preidx_reg(ptr %ldptr, ptr %stptr, i64 %off) { +; CHECK-LE-LABEL: v8i16_preidx_reg: +; CHECK-LE: // %bb.0: // %entry +; CHECK-LE-NEXT: mov x8, x1 +; CHECK-LE-NEXT: ldr q0, [x0, x2] +; CHECK-LE-NEXT: add x0, x0, x2 +; CHECK-LE-NEXT: add x1, x1, x2 +; CHECK-LE-NEXT: str q0, [x8, x2] +; CHECK-LE-NEXT: ret +; +; CHECK-BE-LABEL: v8i16_preidx_reg: +; CHECK-BE: // %bb.0: // %entry +; CHECK-BE-NEXT: add x0, x0, x2 +; CHECK-BE-NEXT: add x1, x1, x2 +; CHECK-BE-NEXT: ld1 { v0.8h }, [x0] +; CHECK-BE-NEXT: st1 { v0.8h }, [x1] +; CHECK-BE-NEXT: ret +entry: + %add.ldptr = getelementptr inbounds nuw i8, ptr %ldptr, i64 %off + %add.stptr = getelementptr inbounds nuw i8, ptr %stptr, i64 %off + %val = load <8 x i16>, ptr %add.ldptr, align 2 + store <8 x i16> %val, ptr %add.stptr, align 2 + %ret1 = insertvalue [2 x ptr] poison, ptr %add.ldptr, 0 + %ret2 = insertvalue [2 x ptr] %ret1, ptr %add.stptr, 1 + ret [2 x ptr] %ret2 +} + +define [2 x ptr] @v4i32_postidx_same_size(ptr %ldptr, ptr %stptr) { +; CHECK-LE-LABEL: v4i32_postidx_same_size: +; CHECK-LE: // %bb.0: // %entry +; CHECK-LE-NEXT: ldr q0, [x0], #16 +; CHECK-LE-NEXT: str q0, [x1], #16 +; CHECK-LE-NEXT: ret +; +; CHECK-BE-LABEL: v4i32_postidx_same_size: +; CHECK-BE: // %bb.0: // %entry +; CHECK-BE-NEXT: ld1 { v0.4s }, [x0], #16 +; CHECK-BE-NEXT: st1 { v0.4s }, [x1], #16 +; CHECK-BE-NEXT: ret +entry: + %val = load <4 x i32>, ptr %ldptr, align 2 + store <4 x i32> %val, ptr %stptr, align 2 + %add.ldptr = getelementptr inbounds nuw i8, ptr %ldptr, i64 16 + %add.stptr = getelementptr inbounds nuw i8, ptr %stptr, i64 16 + %ret1 = insertvalue [2 x ptr] poison, ptr %add.ldptr, 0 + %ret2 = insertvalue [2 x ptr] %ret1, ptr %add.stptr, 1 + ret [2 x ptr] %ret2 +} + +define [2 x ptr] @v4i32_preidx_same_size(ptr %ldptr, ptr %stptr) { +; CHECK-LE-LABEL: v4i32_preidx_same_size: +; CHECK-LE: // %bb.0: // %entry +; CHECK-LE-NEXT: ldr q0, [x0, #16]! +; CHECK-LE-NEXT: str q0, [x1, #16]! +; CHECK-LE-NEXT: ret +; +; CHECK-BE-LABEL: v4i32_preidx_same_size: +; CHECK-BE: // %bb.0: // %entry +; CHECK-BE-NEXT: add x0, x0, #16 +; CHECK-BE-NEXT: add x1, x1, #16 +; CHECK-BE-NEXT: ld1 { v0.4s }, [x0] +; CHECK-BE-NEXT: st1 { v0.4s }, [x1] +; CHECK-BE-NEXT: ret +entry: + %add.ldptr = getelementptr inbounds nuw i8, ptr %ldptr, i64 16 + %add.stptr = getelementptr inbounds nuw i8, ptr %stptr, i64 16 + %val = load <4 x i32>, ptr %add.ldptr, align 2 + store <4 x i32> %val, ptr %add.stptr, align 2 + %ret1 = insertvalue [2 x ptr] poison, ptr %add.ldptr, 0 + %ret2 = insertvalue [2 x ptr] %ret1, ptr %add.stptr, 1 + ret [2 x ptr] %ret2 +} + +define [2 x ptr] @v4i32_postidx_different_size(ptr %ldptr, ptr %stptr) { +; CHECK-LE-LABEL: v4i32_postidx_different_size: +; CHECK-LE: // %bb.0: // %entry +; CHECK-LE-NEXT: ldr q0, [x0], #32 +; CHECK-LE-NEXT: str q0, [x1], #32 +; CHECK-LE-NEXT: ret +; +; CHECK-BE-LABEL: v4i32_postidx_different_size: +; CHECK-BE: // %bb.0: // %entry +; CHECK-BE-NEXT: ld1 { v0.4s }, [x0] +; CHECK-BE-NEXT: mov x8, x1 +; CHECK-BE-NEXT: add x0, x0, #32 +; CHECK-BE-NEXT: add x1, x1, #32 +; CHECK-BE-NEXT: st1 { v0.4s }, [x8] +; CHECK-BE-NEXT: ret +entry: + %val = load <4 x i32>, ptr %ldptr, align 2 + store <4 x i32> %val, ptr %stptr, align 2 + %add.ldptr = getelementptr inbounds nuw i8, ptr %ldptr, i64 32 + %add.stptr = getelementptr inbounds nuw i8, ptr %stptr, i64 32 + %ret1 = insertvalue [2 x ptr] poison, ptr %add.ldptr, 0 + %ret2 = insertvalue [2 x ptr] %ret1, ptr %add.stptr, 1 + ret [2 x ptr] %ret2 +} + +define [2 x ptr] @v4i32_preidx_different_size(ptr %ldptr, ptr %stptr) { +; CHECK-LE-LABEL: v4i32_preidx_different_size: +; CHECK-LE: // %bb.0: // %entry +; CHECK-LE-NEXT: ldr q0, [x0, #32]! +; CHECK-LE-NEXT: str q0, [x1, #32]! +; CHECK-LE-NEXT: ret +; +; CHECK-BE-LABEL: v4i32_preidx_different_size: +; CHECK-BE: // %bb.0: // %entry +; CHECK-BE-NEXT: add x0, x0, #32 +; CHECK-BE-NEXT: add x1, x1, #32 +; CHECK-BE-NEXT: ld1 { v0.4s }, [x0] +; CHECK-BE-NEXT: st1 { v0.4s }, [x1] +; CHECK-BE-NEXT: ret +entry: + %add.ldptr = getelementptr inbounds nuw i8, ptr %ldptr, i64 32 + %add.stptr = getelementptr inbounds nuw i8, ptr %stptr, i64 32 + %val = load <4 x i32>, ptr %add.ldptr, align 2 + store <4 x i32> %val, ptr %add.stptr, align 2 + %ret1 = insertvalue [2 x ptr] poison, ptr %add.ldptr, 0 + %ret2 = insertvalue [2 x ptr] %ret1, ptr %add.stptr, 1 + ret [2 x ptr] %ret2 +} + +define [2 x ptr] @v4i32_postidx_reg(ptr %ldptr, ptr %stptr, i64 %off) { +; CHECK-LE-LABEL: v4i32_postidx_reg: +; CHECK-LE: // %bb.0: // %entry +; CHECK-LE-NEXT: ldr q0, [x0] +; CHECK-LE-NEXT: mov x8, x1 +; CHECK-LE-NEXT: add x0, x0, x2 +; CHECK-LE-NEXT: add x1, x1, x2 +; CHECK-LE-NEXT: str q0, [x8] +; CHECK-LE-NEXT: ret +; +; CHECK-BE-LABEL: v4i32_postidx_reg: +; CHECK-BE: // %bb.0: // %entry +; CHECK-BE-NEXT: ld1 { v0.4s }, [x0] +; CHECK-BE-NEXT: mov x8, x1 +; CHECK-BE-NEXT: add x0, x0, x2 +; CHECK-BE-NEXT: add x1, x1, x2 +; CHECK-BE-NEXT: st1 { v0.4s }, [x8] +; CHECK-BE-NEXT: ret +entry: + %val = load <4 x i32>, ptr %ldptr, align 2 + store <4 x i32> %val, ptr %stptr, align 2 + %add.ldptr = getelementptr inbounds nuw i8, ptr %ldptr, i64 %off + %add.stptr = getelementptr inbounds nuw i8, ptr %stptr, i64 %off + %ret1 = insertvalue [2 x ptr] poison, ptr %add.ldptr, 0 + %ret2 = insertvalue [2 x ptr] %ret1, ptr %add.stptr, 1 + ret [2 x ptr] %ret2 +} + +define [2 x ptr] @v4i32_preidx_reg(ptr %ldptr, ptr %stptr, i64 %off) { +; CHECK-LE-LABEL: v4i32_preidx_reg: +; CHECK-LE: // %bb.0: // %entry +; CHECK-LE-NEXT: mov x8, x1 +; CHECK-LE-NEXT: ldr q0, [x0, x2] +; CHECK-LE-NEXT: add x0, x0, x2 +; CHECK-LE-NEXT: add x1, x1, x2 +; CHECK-LE-NEXT: str q0, [x8, x2] +; CHECK-LE-NEXT: ret +; +; CHECK-BE-LABEL: v4i32_preidx_reg: +; CHECK-BE: // %bb.0: // %entry +; CHECK-BE-NEXT: add x0, x0, x2 +; CHECK-BE-NEXT: add x1, x1, x2 +; CHECK-BE-NEXT: ld1 { v0.4s }, [x0] +; CHECK-BE-NEXT: st1 { v0.4s }, [x1] +; CHECK-BE-NEXT: ret +entry: + %add.ldptr = getelementptr inbounds nuw i8, ptr %ldptr, i64 %off + %add.stptr = getelementptr inbounds nuw i8, ptr %stptr, i64 %off + %val = load <4 x i32>, ptr %add.ldptr, align 2 + store <4 x i32> %val, ptr %add.stptr, align 2 + %ret1 = insertvalue [2 x ptr] poison, ptr %add.ldptr, 0 + %ret2 = insertvalue [2 x ptr] %ret1, ptr %add.stptr, 1 + ret [2 x ptr] %ret2 +} + +define [2 x ptr] @v2i64_postidx_same_size(ptr %ldptr, ptr %stptr) { +; CHECK-LE-LABEL: v2i64_postidx_same_size: +; CHECK-LE: // %bb.0: // %entry +; CHECK-LE-NEXT: ldr q0, [x0], #16 +; CHECK-LE-NEXT: str q0, [x1], #16 +; CHECK-LE-NEXT: ret +; +; CHECK-BE-LABEL: v2i64_postidx_same_size: +; CHECK-BE: // %bb.0: // %entry +; CHECK-BE-NEXT: ld1 { v0.2d }, [x0], #16 +; CHECK-BE-NEXT: st1 { v0.2d }, [x1], #16 +; CHECK-BE-NEXT: ret +entry: + %val = load <2 x i64>, ptr %ldptr, align 2 + store <2 x i64> %val, ptr %stptr, align 2 + %add.ldptr = getelementptr inbounds nuw i8, ptr %ldptr, i64 16 + %add.stptr = getelementptr inbounds nuw i8, ptr %stptr, i64 16 + %ret1 = insertvalue [2 x ptr] poison, ptr %add.ldptr, 0 + %ret2 = insertvalue [2 x ptr] %ret1, ptr %add.stptr, 1 + ret [2 x ptr] %ret2 +} + +define [2 x ptr] @v2i64_preidx_same_size(ptr %ldptr, ptr %stptr) { +; CHECK-LE-LABEL: v2i64_preidx_same_size: +; CHECK-LE: // %bb.0: // %entry +; CHECK-LE-NEXT: ldr q0, [x0, #16]! +; CHECK-LE-NEXT: str q0, [x1, #16]! +; CHECK-LE-NEXT: ret +; +; CHECK-BE-LABEL: v2i64_preidx_same_size: +; CHECK-BE: // %bb.0: // %entry +; CHECK-BE-NEXT: add x0, x0, #16 +; CHECK-BE-NEXT: add x1, x1, #16 +; CHECK-BE-NEXT: ld1 { v0.2d }, [x0] +; CHECK-BE-NEXT: st1 { v0.2d }, [x1] +; CHECK-BE-NEXT: ret +entry: + %add.ldptr = getelementptr inbounds nuw i8, ptr %ldptr, i64 16 + %add.stptr = getelementptr inbounds nuw i8, ptr %stptr, i64 16 + %val = load <2 x i64>, ptr %add.ldptr, align 2 + store <2 x i64> %val, ptr %add.stptr, align 2 + %ret1 = insertvalue [2 x ptr] poison, ptr %add.ldptr, 0 + %ret2 = insertvalue [2 x ptr] %ret1, ptr %add.stptr, 1 + ret [2 x ptr] %ret2 +} + +define [2 x ptr] @v2i64_postidx_different_size(ptr %ldptr, ptr %stptr) { +; CHECK-LE-LABEL: v2i64_postidx_different_size: +; CHECK-LE: // %bb.0: // %entry +; CHECK-LE-NEXT: ldr q0, [x0], #32 +; CHECK-LE-NEXT: str q0, [x1], #32 +; CHECK-LE-NEXT: ret +; +; CHECK-BE-LABEL: v2i64_postidx_different_size: +; CHECK-BE: // %bb.0: // %entry +; CHECK-BE-NEXT: ld1 { v0.2d }, [x0] +; CHECK-BE-NEXT: mov x8, x1 +; CHECK-BE-NEXT: add x0, x0, #32 +; CHECK-BE-NEXT: add x1, x1, #32 +; CHECK-BE-NEXT: st1 { v0.2d }, [x8] +; CHECK-BE-NEXT: ret +entry: + %val = load <2 x i64>, ptr %ldptr, align 2 + store <2 x i64> %val, ptr %stptr, align 2 + %add.ldptr = getelementptr inbounds nuw i8, ptr %ldptr, i64 32 + %add.stptr = getelementptr inbounds nuw i8, ptr %stptr, i64 32 + %ret1 = insertvalue [2 x ptr] poison, ptr %add.ldptr, 0 + %ret2 = insertvalue [2 x ptr] %ret1, ptr %add.stptr, 1 + ret [2 x ptr] %ret2 +} + +define [2 x ptr] @v2i64_preidx_different_size(ptr %ldptr, ptr %stptr) { +; CHECK-LE-LABEL: v2i64_preidx_different_size: +; CHECK-LE: // %bb.0: // %entry +; CHECK-LE-NEXT: ldr q0, [x0, #32]! +; CHECK-LE-NEXT: str q0, [x1, #32]! +; CHECK-LE-NEXT: ret +; +; CHECK-BE-LABEL: v2i64_preidx_different_size: +; CHECK-BE: // %bb.0: // %entry +; CHECK-BE-NEXT: add x0, x0, #32 +; CHECK-BE-NEXT: add x1, x1, #32 +; CHECK-BE-NEXT: ld1 { v0.2d }, [x0] +; CHECK-BE-NEXT: st1 { v0.2d }, [x1] +; CHECK-BE-NEXT: ret +entry: + %add.ldptr = getelementptr inbounds nuw i8, ptr %ldptr, i64 32 + %add.stptr = getelementptr inbounds nuw i8, ptr %stptr, i64 32 + %val = load <2 x i64>, ptr %add.ldptr, align 2 + store <2 x i64> %val, ptr %add.stptr, align 2 + %ret1 = insertvalue [2 x ptr] poison, ptr %add.ldptr, 0 + %ret2 = insertvalue [2 x ptr] %ret1, ptr %add.stptr, 1 + ret [2 x ptr] %ret2 +} + +define [2 x ptr] @v2i64_postidx_reg(ptr %ldptr, ptr %stptr, i64 %off) { +; CHECK-LE-LABEL: v2i64_postidx_reg: +; CHECK-LE: // %bb.0: // %entry +; CHECK-LE-NEXT: ldr q0, [x0] +; CHECK-LE-NEXT: mov x8, x1 +; CHECK-LE-NEXT: add x0, x0, x2 +; CHECK-LE-NEXT: add x1, x1, x2 +; CHECK-LE-NEXT: str q0, [x8] +; CHECK-LE-NEXT: ret +; +; CHECK-BE-LABEL: v2i64_postidx_reg: +; CHECK-BE: // %bb.0: // %entry +; CHECK-BE-NEXT: ld1 { v0.2d }, [x0] +; CHECK-BE-NEXT: mov x8, x1 +; CHECK-BE-NEXT: add x0, x0, x2 +; CHECK-BE-NEXT: add x1, x1, x2 +; CHECK-BE-NEXT: st1 { v0.2d }, [x8] +; CHECK-BE-NEXT: ret +entry: + %val = load <2 x i64>, ptr %ldptr, align 2 + store <2 x i64> %val, ptr %stptr, align 2 + %add.ldptr = getelementptr inbounds nuw i8, ptr %ldptr, i64 %off + %add.stptr = getelementptr inbounds nuw i8, ptr %stptr, i64 %off + %ret1 = insertvalue [2 x ptr] poison, ptr %add.ldptr, 0 + %ret2 = insertvalue [2 x ptr] %ret1, ptr %add.stptr, 1 + ret [2 x ptr] %ret2 +} + +define [2 x ptr] @v2i64_preidx_reg(ptr %ldptr, ptr %stptr, i64 %off) { +; CHECK-LE-LABEL: v2i64_preidx_reg: +; CHECK-LE: // %bb.0: // %entry +; CHECK-LE-NEXT: mov x8, x1 +; CHECK-LE-NEXT: ldr q0, [x0, x2] +; CHECK-LE-NEXT: add x0, x0, x2 +; CHECK-LE-NEXT: add x1, x1, x2 +; CHECK-LE-NEXT: str q0, [x8, x2] +; CHECK-LE-NEXT: ret +; +; CHECK-BE-LABEL: v2i64_preidx_reg: +; CHECK-BE: // %bb.0: // %entry +; CHECK-BE-NEXT: add x0, x0, x2 +; CHECK-BE-NEXT: add x1, x1, x2 +; CHECK-BE-NEXT: ld1 { v0.2d }, [x0] +; CHECK-BE-NEXT: st1 { v0.2d }, [x1] +; CHECK-BE-NEXT: ret +entry: + %add.ldptr = getelementptr inbounds nuw i8, ptr %ldptr, i64 %off + %add.stptr = getelementptr inbounds nuw i8, ptr %stptr, i64 %off + %val = load <2 x i64>, ptr %add.ldptr, align 2 + store <2 x i64> %val, ptr %add.stptr, align 2 + %ret1 = insertvalue [2 x ptr] poison, ptr %add.ldptr, 0 + %ret2 = insertvalue [2 x ptr] %ret1, ptr %add.stptr, 1 + ret [2 x ptr] %ret2 +} + +define [2 x ptr] @v8f16_postidx_same_size(ptr %ldptr, ptr %stptr) { +; CHECK-LE-LABEL: v8f16_postidx_same_size: +; CHECK-LE: // %bb.0: // %entry +; CHECK-LE-NEXT: ldr q0, [x0], #16 +; CHECK-LE-NEXT: str q0, [x1], #16 +; CHECK-LE-NEXT: ret +; +; CHECK-BE-LABEL: v8f16_postidx_same_size: +; CHECK-BE: // %bb.0: // %entry +; CHECK-BE-NEXT: ld1 { v0.8h }, [x0], #16 +; CHECK-BE-NEXT: st1 { v0.8h }, [x1], #16 +; CHECK-BE-NEXT: ret +entry: + %val = load <8 x half>, ptr %ldptr, align 2 + store <8 x half> %val, ptr %stptr, align 2 + %add.ldptr = getelementptr inbounds nuw i8, ptr %ldptr, i64 16 + %add.stptr = getelementptr inbounds nuw i8, ptr %stptr, i64 16 + %ret1 = insertvalue [2 x ptr] poison, ptr %add.ldptr, 0 + %ret2 = insertvalue [2 x ptr] %ret1, ptr %add.stptr, 1 + ret [2 x ptr] %ret2 +} + +define [2 x ptr] @v8f16_preidx_same_size(ptr %ldptr, ptr %stptr) { +; CHECK-LE-LABEL: v8f16_preidx_same_size: +; CHECK-LE: // %bb.0: // %entry +; CHECK-LE-NEXT: ldr q0, [x0, #16]! +; CHECK-LE-NEXT: str q0, [x1, #16]! +; CHECK-LE-NEXT: ret +; +; CHECK-BE-LABEL: v8f16_preidx_same_size: +; CHECK-BE: // %bb.0: // %entry +; CHECK-BE-NEXT: add x0, x0, #16 +; CHECK-BE-NEXT: add x1, x1, #16 +; CHECK-BE-NEXT: ld1 { v0.8h }, [x0] +; CHECK-BE-NEXT: st1 { v0.8h }, [x1] +; CHECK-BE-NEXT: ret +entry: + %add.ldptr = getelementptr inbounds nuw i8, ptr %ldptr, i64 16 + %add.stptr = getelementptr inbounds nuw i8, ptr %stptr, i64 16 + %val = load <8 x half>, ptr %add.ldptr, align 2 + store <8 x half> %val, ptr %add.stptr, align 2 + %ret1 = insertvalue [2 x ptr] poison, ptr %add.ldptr, 0 + %ret2 = insertvalue [2 x ptr] %ret1, ptr %add.stptr, 1 + ret [2 x ptr] %ret2 +} + +define [2 x ptr] @v8f16_postidx_different_size(ptr %ldptr, ptr %stptr) { +; CHECK-LE-LABEL: v8f16_postidx_different_size: +; CHECK-LE: // %bb.0: // %entry +; CHECK-LE-NEXT: ldr q0, [x0], #32 +; CHECK-LE-NEXT: str q0, [x1], #32 +; CHECK-LE-NEXT: ret +; +; CHECK-BE-LABEL: v8f16_postidx_different_size: +; CHECK-BE: // %bb.0: // %entry +; CHECK-BE-NEXT: ld1 { v0.8h }, [x0] +; CHECK-BE-NEXT: mov x8, x1 +; CHECK-BE-NEXT: add x0, x0, #32 +; CHECK-BE-NEXT: add x1, x1, #32 +; CHECK-BE-NEXT: st1 { v0.8h }, [x8] +; CHECK-BE-NEXT: ret +entry: + %val = load <8 x half>, ptr %ldptr, align 2 + store <8 x half> %val, ptr %stptr, align 2 + %add.ldptr = getelementptr inbounds nuw i8, ptr %ldptr, i64 32 + %add.stptr = getelementptr inbounds nuw i8, ptr %stptr, i64 32 + %ret1 = insertvalue [2 x ptr] poison, ptr %add.ldptr, 0 + %ret2 = insertvalue [2 x ptr] %ret1, ptr %add.stptr, 1 + ret [2 x ptr] %ret2 +} + +define [2 x ptr] @v8f16_preidx_different_size(ptr %ldptr, ptr %stptr) { +; CHECK-LE-LABEL: v8f16_preidx_different_size: +; CHECK-LE: // %bb.0: // %entry +; CHECK-LE-NEXT: ldr q0, [x0, #32]! +; CHECK-LE-NEXT: str q0, [x1, #32]! +; CHECK-LE-NEXT: ret +; +; CHECK-BE-LABEL: v8f16_preidx_different_size: +; CHECK-BE: // %bb.0: // %entry +; CHECK-BE-NEXT: add x0, x0, #32 +; CHECK-BE-NEXT: add x1, x1, #32 +; CHECK-BE-NEXT: ld1 { v0.8h }, [x0] +; CHECK-BE-NEXT: st1 { v0.8h }, [x1] +; CHECK-BE-NEXT: ret +entry: + %add.ldptr = getelementptr inbounds nuw i8, ptr %ldptr, i64 32 + %add.stptr = getelementptr inbounds nuw i8, ptr %stptr, i64 32 + %val = load <8 x half>, ptr %add.ldptr, align 2 + store <8 x half> %val, ptr %add.stptr, align 2 + %ret1 = insertvalue [2 x ptr] poison, ptr %add.ldptr, 0 + %ret2 = insertvalue [2 x ptr] %ret1, ptr %add.stptr, 1 + ret [2 x ptr] %ret2 +} + +define [2 x ptr] @v8f16_postidx_reg(ptr %ldptr, ptr %stptr, i64 %off) { +; CHECK-LE-LABEL: v8f16_postidx_reg: +; CHECK-LE: // %bb.0: // %entry +; CHECK-LE-NEXT: ldr q0, [x0] +; CHECK-LE-NEXT: mov x8, x1 +; CHECK-LE-NEXT: add x0, x0, x2 +; CHECK-LE-NEXT: add x1, x1, x2 +; CHECK-LE-NEXT: str q0, [x8] +; CHECK-LE-NEXT: ret +; +; CHECK-BE-LABEL: v8f16_postidx_reg: +; CHECK-BE: // %bb.0: // %entry +; CHECK-BE-NEXT: ld1 { v0.8h }, [x0] +; CHECK-BE-NEXT: mov x8, x1 +; CHECK-BE-NEXT: add x0, x0, x2 +; CHECK-BE-NEXT: add x1, x1, x2 +; CHECK-BE-NEXT: st1 { v0.8h }, [x8] +; CHECK-BE-NEXT: ret +entry: + %val = load <8 x half>, ptr %ldptr, align 2 + store <8 x half> %val, ptr %stptr, align 2 + %add.ldptr = getelementptr inbounds nuw i8, ptr %ldptr, i64 %off + %add.stptr = getelementptr inbounds nuw i8, ptr %stptr, i64 %off + %ret1 = insertvalue [2 x ptr] poison, ptr %add.ldptr, 0 + %ret2 = insertvalue [2 x ptr] %ret1, ptr %add.stptr, 1 + ret [2 x ptr] %ret2 +} + +define [2 x ptr] @v8f16_preidx_reg(ptr %ldptr, ptr %stptr, i64 %off) { +; CHECK-LE-LABEL: v8f16_preidx_reg: +; CHECK-LE: // %bb.0: // %entry +; CHECK-LE-NEXT: mov x8, x1 +; CHECK-LE-NEXT: ldr q0, [x0, x2] +; CHECK-LE-NEXT: add x0, x0, x2 +; CHECK-LE-NEXT: add x1, x1, x2 +; CHECK-LE-NEXT: str q0, [x8, x2] +; CHECK-LE-NEXT: ret +; +; CHECK-BE-LABEL: v8f16_preidx_reg: +; CHECK-BE: // %bb.0: // %entry +; CHECK-BE-NEXT: add x0, x0, x2 +; CHECK-BE-NEXT: add x1, x1, x2 +; CHECK-BE-NEXT: ld1 { v0.8h }, [x0] +; CHECK-BE-NEXT: st1 { v0.8h }, [x1] +; CHECK-BE-NEXT: ret +entry: + %add.ldptr = getelementptr inbounds nuw i8, ptr %ldptr, i64 %off + %add.stptr = getelementptr inbounds nuw i8, ptr %stptr, i64 %off + %val = load <8 x half>, ptr %add.ldptr, align 2 + store <8 x half> %val, ptr %add.stptr, align 2 + %ret1 = insertvalue [2 x ptr] poison, ptr %add.ldptr, 0 + %ret2 = insertvalue [2 x ptr] %ret1, ptr %add.stptr, 1 + ret [2 x ptr] %ret2 +} + +define [2 x ptr] @v4f32_postidx_same_size(ptr %ldptr, ptr %stptr) { +; CHECK-LE-LABEL: v4f32_postidx_same_size: +; CHECK-LE: // %bb.0: // %entry +; CHECK-LE-NEXT: ldr q0, [x0], #16 +; CHECK-LE-NEXT: str q0, [x1], #16 +; CHECK-LE-NEXT: ret +; +; CHECK-BE-LABEL: v4f32_postidx_same_size: +; CHECK-BE: // %bb.0: // %entry +; CHECK-BE-NEXT: ld1 { v0.4s }, [x0], #16 +; CHECK-BE-NEXT: st1 { v0.4s }, [x1], #16 +; CHECK-BE-NEXT: ret +entry: + %val = load <4 x float>, ptr %ldptr, align 2 + store <4 x float> %val, ptr %stptr, align 2 + %add.ldptr = getelementptr inbounds nuw i8, ptr %ldptr, i64 16 + %add.stptr = getelementptr inbounds nuw i8, ptr %stptr, i64 16 + %ret1 = insertvalue [2 x ptr] poison, ptr %add.ldptr, 0 + %ret2 = insertvalue [2 x ptr] %ret1, ptr %add.stptr, 1 + ret [2 x ptr] %ret2 +} + +define [2 x ptr] @v4f32_preidx_same_size(ptr %ldptr, ptr %stptr) { +; CHECK-LE-LABEL: v4f32_preidx_same_size: +; CHECK-LE: // %bb.0: // %entry +; CHECK-LE-NEXT: ldr q0, [x0, #16]! +; CHECK-LE-NEXT: str q0, [x1, #16]! +; CHECK-LE-NEXT: ret +; +; CHECK-BE-LABEL: v4f32_preidx_same_size: +; CHECK-BE: // %bb.0: // %entry +; CHECK-BE-NEXT: add x0, x0, #16 +; CHECK-BE-NEXT: add x1, x1, #16 +; CHECK-BE-NEXT: ld1 { v0.4s }, [x0] +; CHECK-BE-NEXT: st1 { v0.4s }, [x1] +; CHECK-BE-NEXT: ret +entry: + %add.ldptr = getelementptr inbounds nuw i8, ptr %ldptr, i64 16 + %add.stptr = getelementptr inbounds nuw i8, ptr %stptr, i64 16 + %val = load <4 x float>, ptr %add.ldptr, align 2 + store <4 x float> %val, ptr %add.stptr, align 2 + %ret1 = insertvalue [2 x ptr] poison, ptr %add.ldptr, 0 + %ret2 = insertvalue [2 x ptr] %ret1, ptr %add.stptr, 1 + ret [2 x ptr] %ret2 +} + +define [2 x ptr] @v4f32_postidx_different_size(ptr %ldptr, ptr %stptr) { +; CHECK-LE-LABEL: v4f32_postidx_different_size: +; CHECK-LE: // %bb.0: // %entry +; CHECK-LE-NEXT: ldr q0, [x0], #32 +; CHECK-LE-NEXT: str q0, [x1], #32 +; CHECK-LE-NEXT: ret +; +; CHECK-BE-LABEL: v4f32_postidx_different_size: +; CHECK-BE: // %bb.0: // %entry +; CHECK-BE-NEXT: ld1 { v0.4s }, [x0] +; CHECK-BE-NEXT: mov x8, x1 +; CHECK-BE-NEXT: add x0, x0, #32 +; CHECK-BE-NEXT: add x1, x1, #32 +; CHECK-BE-NEXT: st1 { v0.4s }, [x8] +; CHECK-BE-NEXT: ret +entry: + %val = load <4 x float>, ptr %ldptr, align 2 + store <4 x float> %val, ptr %stptr, align 2 + %add.ldptr = getelementptr inbounds nuw i8, ptr %ldptr, i64 32 + %add.stptr = getelementptr inbounds nuw i8, ptr %stptr, i64 32 + %ret1 = insertvalue [2 x ptr] poison, ptr %add.ldptr, 0 + %ret2 = insertvalue [2 x ptr] %ret1, ptr %add.stptr, 1 + ret [2 x ptr] %ret2 +} + +define [2 x ptr] @v4f32_preidx_different_size(ptr %ldptr, ptr %stptr) { +; CHECK-LE-LABEL: v4f32_preidx_different_size: +; CHECK-LE: // %bb.0: // %entry +; CHECK-LE-NEXT: ldr q0, [x0, #32]! +; CHECK-LE-NEXT: str q0, [x1, #32]! +; CHECK-LE-NEXT: ret +; +; CHECK-BE-LABEL: v4f32_preidx_different_size: +; CHECK-BE: // %bb.0: // %entry +; CHECK-BE-NEXT: add x0, x0, #32 +; CHECK-BE-NEXT: add x1, x1, #32 +; CHECK-BE-NEXT: ld1 { v0.4s }, [x0] +; CHECK-BE-NEXT: st1 { v0.4s }, [x1] +; CHECK-BE-NEXT: ret +entry: + %add.ldptr = getelementptr inbounds nuw i8, ptr %ldptr, i64 32 + %add.stptr = getelementptr inbounds nuw i8, ptr %stptr, i64 32 + %val = load <4 x float>, ptr %add.ldptr, align 2 + store <4 x float> %val, ptr %add.stptr, align 2 + %ret1 = insertvalue [2 x ptr] poison, ptr %add.ldptr, 0 + %ret2 = insertvalue [2 x ptr] %ret1, ptr %add.stptr, 1 + ret [2 x ptr] %ret2 +} + +define [2 x ptr] @v4f32_postidx_reg(ptr %ldptr, ptr %stptr, i64 %off) { +; CHECK-LE-LABEL: v4f32_postidx_reg: +; CHECK-LE: // %bb.0: // %entry +; CHECK-LE-NEXT: ldr q0, [x0] +; CHECK-LE-NEXT: mov x8, x1 +; CHECK-LE-NEXT: add x0, x0, x2 +; CHECK-LE-NEXT: add x1, x1, x2 +; CHECK-LE-NEXT: str q0, [x8] +; CHECK-LE-NEXT: ret +; +; CHECK-BE-LABEL: v4f32_postidx_reg: +; CHECK-BE: // %bb.0: // %entry +; CHECK-BE-NEXT: ld1 { v0.4s }, [x0] +; CHECK-BE-NEXT: mov x8, x1 +; CHECK-BE-NEXT: add x0, x0, x2 +; CHECK-BE-NEXT: add x1, x1, x2 +; CHECK-BE-NEXT: st1 { v0.4s }, [x8] +; CHECK-BE-NEXT: ret +entry: + %val = load <4 x float>, ptr %ldptr, align 2 + store <4 x float> %val, ptr %stptr, align 2 + %add.ldptr = getelementptr inbounds nuw i8, ptr %ldptr, i64 %off + %add.stptr = getelementptr inbounds nuw i8, ptr %stptr, i64 %off + %ret1 = insertvalue [2 x ptr] poison, ptr %add.ldptr, 0 + %ret2 = insertvalue [2 x ptr] %ret1, ptr %add.stptr, 1 + ret [2 x ptr] %ret2 +} + +define [2 x ptr] @v4f32_preidx_reg(ptr %ldptr, ptr %stptr, i64 %off) { +; CHECK-LE-LABEL: v4f32_preidx_reg: +; CHECK-LE: // %bb.0: // %entry +; CHECK-LE-NEXT: mov x8, x1 +; CHECK-LE-NEXT: ldr q0, [x0, x2] +; CHECK-LE-NEXT: add x0, x0, x2 +; CHECK-LE-NEXT: add x1, x1, x2 +; CHECK-LE-NEXT: str q0, [x8, x2] +; CHECK-LE-NEXT: ret +; +; CHECK-BE-LABEL: v4f32_preidx_reg: +; CHECK-BE: // %bb.0: // %entry +; CHECK-BE-NEXT: add x0, x0, x2 +; CHECK-BE-NEXT: add x1, x1, x2 +; CHECK-BE-NEXT: ld1 { v0.4s }, [x0] +; CHECK-BE-NEXT: st1 { v0.4s }, [x1] +; CHECK-BE-NEXT: ret +entry: + %add.ldptr = getelementptr inbounds nuw i8, ptr %ldptr, i64 %off + %add.stptr = getelementptr inbounds nuw i8, ptr %stptr, i64 %off + %val = load <4 x float>, ptr %add.ldptr, align 2 + store <4 x float> %val, ptr %add.stptr, align 2 + %ret1 = insertvalue [2 x ptr] poison, ptr %add.ldptr, 0 + %ret2 = insertvalue [2 x ptr] %ret1, ptr %add.stptr, 1 + ret [2 x ptr] %ret2 +} + +define [2 x ptr] @v2f64_postidx_same_size(ptr %ldptr, ptr %stptr) { +; CHECK-LE-LABEL: v2f64_postidx_same_size: +; CHECK-LE: // %bb.0: // %entry +; CHECK-LE-NEXT: ldr q0, [x0], #16 +; CHECK-LE-NEXT: str q0, [x1], #16 +; CHECK-LE-NEXT: ret +; +; CHECK-BE-LABEL: v2f64_postidx_same_size: +; CHECK-BE: // %bb.0: // %entry +; CHECK-BE-NEXT: ld1 { v0.2d }, [x0], #16 +; CHECK-BE-NEXT: st1 { v0.2d }, [x1], #16 +; CHECK-BE-NEXT: ret +entry: + %val = load <2 x double>, ptr %ldptr, align 2 + store <2 x double> %val, ptr %stptr, align 2 + %add.ldptr = getelementptr inbounds nuw i8, ptr %ldptr, i64 16 + %add.stptr = getelementptr inbounds nuw i8, ptr %stptr, i64 16 + %ret1 = insertvalue [2 x ptr] poison, ptr %add.ldptr, 0 + %ret2 = insertvalue [2 x ptr] %ret1, ptr %add.stptr, 1 + ret [2 x ptr] %ret2 +} + +define [2 x ptr] @v2f64_preidx_same_size(ptr %ldptr, ptr %stptr) { +; CHECK-LE-LABEL: v2f64_preidx_same_size: +; CHECK-LE: // %bb.0: // %entry +; CHECK-LE-NEXT: ldr q0, [x0, #16]! +; CHECK-LE-NEXT: str q0, [x1, #16]! +; CHECK-LE-NEXT: ret +; +; CHECK-BE-LABEL: v2f64_preidx_same_size: +; CHECK-BE: // %bb.0: // %entry +; CHECK-BE-NEXT: add x0, x0, #16 +; CHECK-BE-NEXT: add x1, x1, #16 +; CHECK-BE-NEXT: ld1 { v0.2d }, [x0] +; CHECK-BE-NEXT: st1 { v0.2d }, [x1] +; CHECK-BE-NEXT: ret +entry: + %add.ldptr = getelementptr inbounds nuw i8, ptr %ldptr, i64 16 + %add.stptr = getelementptr inbounds nuw i8, ptr %stptr, i64 16 + %val = load <2 x double>, ptr %add.ldptr, align 2 + store <2 x double> %val, ptr %add.stptr, align 2 + %ret1 = insertvalue [2 x ptr] poison, ptr %add.ldptr, 0 + %ret2 = insertvalue [2 x ptr] %ret1, ptr %add.stptr, 1 + ret [2 x ptr] %ret2 +} + +define [2 x ptr] @v2f64_postidx_different_size(ptr %ldptr, ptr %stptr) { +; CHECK-LE-LABEL: v2f64_postidx_different_size: +; CHECK-LE: // %bb.0: // %entry +; CHECK-LE-NEXT: ldr q0, [x0], #32 +; CHECK-LE-NEXT: str q0, [x1], #32 +; CHECK-LE-NEXT: ret +; +; CHECK-BE-LABEL: v2f64_postidx_different_size: +; CHECK-BE: // %bb.0: // %entry +; CHECK-BE-NEXT: ld1 { v0.2d }, [x0] +; CHECK-BE-NEXT: mov x8, x1 +; CHECK-BE-NEXT: add x0, x0, #32 +; CHECK-BE-NEXT: add x1, x1, #32 +; CHECK-BE-NEXT: st1 { v0.2d }, [x8] +; CHECK-BE-NEXT: ret +entry: + %val = load <2 x double>, ptr %ldptr, align 2 + store <2 x double> %val, ptr %stptr, align 2 + %add.ldptr = getelementptr inbounds nuw i8, ptr %ldptr, i64 32 + %add.stptr = getelementptr inbounds nuw i8, ptr %stptr, i64 32 + %ret1 = insertvalue [2 x ptr] poison, ptr %add.ldptr, 0 + %ret2 = insertvalue [2 x ptr] %ret1, ptr %add.stptr, 1 + ret [2 x ptr] %ret2 +} + +define [2 x ptr] @v2f64_preidx_different_size(ptr %ldptr, ptr %stptr) { +; CHECK-LE-LABEL: v2f64_preidx_different_size: +; CHECK-LE: // %bb.0: // %entry +; CHECK-LE-NEXT: ldr q0, [x0, #32]! +; CHECK-LE-NEXT: str q0, [x1, #32]! +; CHECK-LE-NEXT: ret +; +; CHECK-BE-LABEL: v2f64_preidx_different_size: +; CHECK-BE: // %bb.0: // %entry +; CHECK-BE-NEXT: add x0, x0, #32 +; CHECK-BE-NEXT: add x1, x1, #32 +; CHECK-BE-NEXT: ld1 { v0.2d }, [x0] +; CHECK-BE-NEXT: st1 { v0.2d }, [x1] +; CHECK-BE-NEXT: ret +entry: + %add.ldptr = getelementptr inbounds nuw i8, ptr %ldptr, i64 32 + %add.stptr = getelementptr inbounds nuw i8, ptr %stptr, i64 32 + %val = load <2 x double>, ptr %add.ldptr, align 2 + store <2 x double> %val, ptr %add.stptr, align 2 + %ret1 = insertvalue [2 x ptr] poison, ptr %add.ldptr, 0 + %ret2 = insertvalue [2 x ptr] %ret1, ptr %add.stptr, 1 + ret [2 x ptr] %ret2 +} + +define [2 x ptr] @v2f64_postidx_reg(ptr %ldptr, ptr %stptr, i64 %off) { +; CHECK-LE-LABEL: v2f64_postidx_reg: +; CHECK-LE: // %bb.0: // %entry +; CHECK-LE-NEXT: ldr q0, [x0] +; CHECK-LE-NEXT: mov x8, x1 +; CHECK-LE-NEXT: add x0, x0, x2 +; CHECK-LE-NEXT: add x1, x1, x2 +; CHECK-LE-NEXT: str q0, [x8] +; CHECK-LE-NEXT: ret +; +; CHECK-BE-LABEL: v2f64_postidx_reg: +; CHECK-BE: // %bb.0: // %entry +; CHECK-BE-NEXT: ld1 { v0.2d }, [x0] +; CHECK-BE-NEXT: mov x8, x1 +; CHECK-BE-NEXT: add x0, x0, x2 +; CHECK-BE-NEXT: add x1, x1, x2 +; CHECK-BE-NEXT: st1 { v0.2d }, [x8] +; CHECK-BE-NEXT: ret +entry: + %val = load <2 x double>, ptr %ldptr, align 2 + store <2 x double> %val, ptr %stptr, align 2 + %add.ldptr = getelementptr inbounds nuw i8, ptr %ldptr, i64 %off + %add.stptr = getelementptr inbounds nuw i8, ptr %stptr, i64 %off + %ret1 = insertvalue [2 x ptr] poison, ptr %add.ldptr, 0 + %ret2 = insertvalue [2 x ptr] %ret1, ptr %add.stptr, 1 + ret [2 x ptr] %ret2 +} + +define [2 x ptr] @v2f64_preidx_reg(ptr %ldptr, ptr %stptr, i64 %off) { +; CHECK-LE-LABEL: v2f64_preidx_reg: +; CHECK-LE: // %bb.0: // %entry +; CHECK-LE-NEXT: mov x8, x1 +; CHECK-LE-NEXT: ldr q0, [x0, x2] +; CHECK-LE-NEXT: add x0, x0, x2 +; CHECK-LE-NEXT: add x1, x1, x2 +; CHECK-LE-NEXT: str q0, [x8, x2] +; CHECK-LE-NEXT: ret +; +; CHECK-BE-LABEL: v2f64_preidx_reg: +; CHECK-BE: // %bb.0: // %entry +; CHECK-BE-NEXT: add x0, x0, x2 +; CHECK-BE-NEXT: add x1, x1, x2 +; CHECK-BE-NEXT: ld1 { v0.2d }, [x0] +; CHECK-BE-NEXT: st1 { v0.2d }, [x1] +; CHECK-BE-NEXT: ret +entry: + %add.ldptr = getelementptr inbounds nuw i8, ptr %ldptr, i64 %off + %add.stptr = getelementptr inbounds nuw i8, ptr %stptr, i64 %off + %val = load <2 x double>, ptr %add.ldptr, align 2 + store <2 x double> %val, ptr %add.stptr, align 2 + %ret1 = insertvalue [2 x ptr] poison, ptr %add.ldptr, 0 + %ret2 = insertvalue [2 x ptr] %ret1, ptr %add.stptr, 1 + ret [2 x ptr] %ret2 +} diff --git a/llvm/test/CodeGen/AArch64/zext-to-tbl.ll b/llvm/test/CodeGen/AArch64/zext-to-tbl.ll index 68a9dff81232..2a37183c47d5 100644 --- a/llvm/test/CodeGen/AArch64/zext-to-tbl.ll +++ b/llvm/test/CodeGen/AArch64/zext-to-tbl.ll @@ -2835,14 +2835,13 @@ define i32 @test_widening_instr_mull(ptr %p1, ptr %p2, i32 %h) { ; CHECK-BE: // %bb.0: // %entry ; CHECK-BE-NEXT: .LBB24_1: // %loop ; CHECK-BE-NEXT: // =>This Inner Loop Header: Depth=1 -; CHECK-BE-NEXT: ld1 { v0.16b }, [x1] -; CHECK-BE-NEXT: ld1 { v1.8h }, [x0] +; CHECK-BE-NEXT: ld1 { v0.16b }, [x1], #16 ; CHECK-BE-NEXT: add x8, x0, #16 +; CHECK-BE-NEXT: ld1 { v1.8h }, [x0] ; CHECK-BE-NEXT: ld1 { v3.8h }, [x8] ; CHECK-BE-NEXT: add x9, x0, #48 ; CHECK-BE-NEXT: add x10, x0, #32 ; CHECK-BE-NEXT: subs w2, w2, #1 -; CHECK-BE-NEXT: add x1, x1, #16 ; CHECK-BE-NEXT: ushll v2.8h, v0.8b, #0 ; CHECK-BE-NEXT: ushll2 v0.8h, v0.16b, #0 ; CHECK-BE-NEXT: umull v4.4s, v1.4h, v2.4h @@ -3094,7 +3093,7 @@ define i32 @test_widening_instr_mull_2(ptr %p1, ptr %p2, i32 %h) { ; CHECK-BE-NEXT: ld1 { v3.16b }, [x8] ; CHECK-BE-NEXT: .LBB26_1: // %loop ; CHECK-BE-NEXT: // =>This Inner Loop Header: Depth=1 -; CHECK-BE-NEXT: ld1 { v4.16b }, [x1] +; CHECK-BE-NEXT: ld1 { v4.16b }, [x1], #16 ; CHECK-BE-NEXT: add x8, x0, #32 ; CHECK-BE-NEXT: ld1 { v16.4s }, [x0] ; CHECK-BE-NEXT: add x9, x0, #48 @@ -3107,7 +3106,6 @@ define i32 @test_widening_instr_mull_2(ptr %p1, ptr %p2, i32 %h) { ; CHECK-BE-NEXT: tbl v6.16b, { v4.16b }, v3.16b ; CHECK-BE-NEXT: tbl v7.16b, { v4.16b }, v2.16b ; CHECK-BE-NEXT: tbl v4.16b, { v4.16b }, v0.16b -; CHECK-BE-NEXT: add x1, x1, #16 ; CHECK-BE-NEXT: rev32 v5.16b, v5.16b ; CHECK-BE-NEXT: rev32 v6.16b, v6.16b ; CHECK-BE-NEXT: rev32 v7.16b, v7.16b @@ -3175,19 +3173,18 @@ define i32 @mul_zext_16i8_sext_16i8(ptr %p1, ptr %p2, i32 %h) { ; CHECK-BE: // %bb.0: // %entry ; CHECK-BE-NEXT: .LBB27_1: // %loop ; CHECK-BE-NEXT: // =>This Inner Loop Header: Depth=1 -; CHECK-BE-NEXT: ld1 { v0.16b }, [x0] -; CHECK-BE-NEXT: ld1 { v1.16b }, [x1] +; CHECK-BE-NEXT: ld1 { v0.16b }, [x1], #16 ; CHECK-BE-NEXT: add x8, x0, #48 +; CHECK-BE-NEXT: ld1 { v1.16b }, [x0] ; CHECK-BE-NEXT: subs w2, w2, #1 -; CHECK-BE-NEXT: add x1, x1, #16 -; CHECK-BE-NEXT: sshll2 v2.8h, v0.16b, #0 -; CHECK-BE-NEXT: ushll2 v3.8h, v1.16b, #0 -; CHECK-BE-NEXT: sshll v0.8h, v0.8b, #0 -; CHECK-BE-NEXT: ushll v1.8h, v1.8b, #0 +; CHECK-BE-NEXT: sshll2 v2.8h, v1.16b, #0 +; CHECK-BE-NEXT: ushll2 v3.8h, v0.16b, #0 +; CHECK-BE-NEXT: sshll v1.8h, v1.8b, #0 +; CHECK-BE-NEXT: ushll v0.8h, v0.8b, #0 ; CHECK-BE-NEXT: smull2 v4.4s, v2.8h, v3.8h ; CHECK-BE-NEXT: smull v2.4s, v2.4h, v3.4h -; CHECK-BE-NEXT: smull v3.4s, v0.4h, v1.4h -; CHECK-BE-NEXT: smull2 v0.4s, v0.8h, v1.8h +; CHECK-BE-NEXT: smull v3.4s, v1.4h, v0.4h +; CHECK-BE-NEXT: smull2 v0.4s, v1.8h, v0.8h ; CHECK-BE-NEXT: st1 { v4.4s }, [x8] ; CHECK-BE-NEXT: add x8, x0, #32 ; CHECK-BE-NEXT: st1 { v3.4s }, [x0] @@ -3249,14 +3246,13 @@ define i32 @mul_zext_16i8_sext_16i16(ptr %p1, ptr %p2, i32 %h) { ; CHECK-BE: // %bb.0: // %entry ; CHECK-BE-NEXT: .LBB28_1: // %loop ; CHECK-BE-NEXT: // =>This Inner Loop Header: Depth=1 -; CHECK-BE-NEXT: ld1 { v0.16b }, [x1] -; CHECK-BE-NEXT: ld1 { v1.8h }, [x0] +; CHECK-BE-NEXT: ld1 { v0.16b }, [x1], #16 ; CHECK-BE-NEXT: add x8, x0, #16 +; CHECK-BE-NEXT: ld1 { v1.8h }, [x0] ; CHECK-BE-NEXT: ld1 { v3.8h }, [x8] ; CHECK-BE-NEXT: add x9, x0, #48 ; CHECK-BE-NEXT: add x10, x0, #32 ; CHECK-BE-NEXT: subs w2, w2, #1 -; CHECK-BE-NEXT: add x1, x1, #16 ; CHECK-BE-NEXT: ushll v2.8h, v0.8b, #0 ; CHECK-BE-NEXT: ushll2 v0.8h, v0.16b, #0 ; CHECK-BE-NEXT: smull v4.4s, v1.4h, v2.4h -- cgit v1.2.3 From 3af4d4e8100fda2a7e1bd0dbbe0914b584ad08d6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nathan=20Gau=C3=ABr?= Date: Wed, 18 Jun 2025 17:26:40 +0200 Subject: [HLSL][SPIR-V] Fix LinkageAttribute emission for BuiltIn (#144701) BuiltIn variables were missing the visibility attribute, which caused the Linkage capability to be emitted by the backend. --- clang/lib/CodeGen/CGHLSLRuntime.cpp | 1 + .../test/CodeGenHLSL/semantics/SV_Position.ps.hlsl | 2 +- .../CodeGen/SPIRV/linkage/link-attribute-vk.ll | 23 ++++++++++++++++++++++ 3 files changed, 25 insertions(+), 1 deletion(-) create mode 100644 llvm/test/CodeGen/SPIRV/linkage/link-attribute-vk.ll diff --git a/clang/lib/CodeGen/CGHLSLRuntime.cpp b/clang/lib/CodeGen/CGHLSLRuntime.cpp index 585411bc59e1..34960c34e109 100644 --- a/clang/lib/CodeGen/CGHLSLRuntime.cpp +++ b/clang/lib/CodeGen/CGHLSLRuntime.cpp @@ -375,6 +375,7 @@ static llvm::Value *createSPIRVBuiltinLoad(IRBuilder<> &B, llvm::Module &M, llvm::GlobalVariable::GeneralDynamicTLSModel, /* AddressSpace */ 7, /* isExternallyInitialized= */ true); addSPIRVBuiltinDecoration(GV, BuiltInID); + GV->setVisibility(llvm::GlobalValue::HiddenVisibility); return B.CreateLoad(Ty, GV); } diff --git a/clang/test/CodeGenHLSL/semantics/SV_Position.ps.hlsl b/clang/test/CodeGenHLSL/semantics/SV_Position.ps.hlsl index 58b91fc9264d..bdba38e028ed 100644 --- a/clang/test/CodeGenHLSL/semantics/SV_Position.ps.hlsl +++ b/clang/test/CodeGenHLSL/semantics/SV_Position.ps.hlsl @@ -1,6 +1,6 @@ // RUN: %clang_cc1 -triple spirv-unknown-vulkan1.3-pixel -x hlsl -emit-llvm -finclude-default-header -disable-llvm-passes -o - %s | FileCheck %s -// CHECK: @sv_position = external thread_local addrspace(7) externally_initialized constant <4 x float>, !spirv.Decorations !0 +// CHECK: @sv_position = external hidden thread_local addrspace(7) externally_initialized constant <4 x float>, !spirv.Decorations !0 // CHECK: define void @main() {{.*}} { float4 main(float4 p : SV_Position) { diff --git a/llvm/test/CodeGen/SPIRV/linkage/link-attribute-vk.ll b/llvm/test/CodeGen/SPIRV/linkage/link-attribute-vk.ll new file mode 100644 index 000000000000..d4ba61ff58d3 --- /dev/null +++ b/llvm/test/CodeGen/SPIRV/linkage/link-attribute-vk.ll @@ -0,0 +1,23 @@ +; RUN: llc -verify-machineinstrs -O0 -mtriple=spirv-unknown-vulkan1.3-pixel %s -o - | FileCheck %s +; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv-unknown-vulkan1.3-pixel %s -o - -filetype=obj | spirv-val --target-env vulkan1.3 %} + +@sv_position = external hidden thread_local local_unnamed_addr addrspace(7) externally_initialized constant <4 x float>, !spirv.Decorations !0 + +; CHECK-NOT: OpDecorate %[[#var]] LinkageAttributes "sv_position" Import + +; CHECK-DAG: %[[#float:]] = OpTypeFloat 32 +; CHECK-DAG: %[[#float4:]] = OpTypeVector %[[#float]] +; CHECK-DAG: %[[#type:]] = OpTypePointer Input %[[#float4]] +; CHECK-DAG: %[[#var:]] = OpVariable %[[#type]] Input + +; CHECK-NOT: OpDecorate %[[#var]] LinkageAttributes "sv_position" Import + +define void @main() #1 { +entry: + ret void +} + +attributes #1 = { "hlsl.shader"="pixel" } + +!0 = !{!1} +!1 = !{i32 11, i32 0} -- cgit v1.2.3 From 8b8a3699dbdbb5d7865b0fe330d972c3fa380f1e Mon Sep 17 00:00:00 2001 From: Graham Hunter Date: Wed, 18 Jun 2025 16:27:29 +0100 Subject: [AArch64] Use dupq (SVE2.1) for segmented lane splats (#144482) Use the dupq instructions (when available) to represent a splat of the same lane within each 128b segment of a wider fixed vector. --- llvm/lib/Target/AArch64/AArch64ISelLowering.cpp | 37 +++++++ .../test/CodeGen/AArch64/sve2p1-vector-shuffles.ll | 115 +++++++++++++++++++++ 2 files changed, 152 insertions(+) create mode 100644 llvm/test/CodeGen/AArch64/sve2p1-vector-shuffles.ll diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 22c497d3de64..0e28ccd0f655 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -13392,6 +13392,30 @@ static bool isUZP_v_undef_Mask(ArrayRef M, EVT VT, unsigned &WhichResult) { return true; } +/// isDUPQMask - matches a splat of equivalent lanes within 128b segments in +/// the first vector operand. +static std::optional isDUPQMask(ArrayRef M, EVT VT) { + assert(VT.getFixedSizeInBits() % 128 == 0 && "Unsupported SVE vector size"); + unsigned Lane = (unsigned)M[0]; + unsigned Segments = VT.getFixedSizeInBits() / 128; + unsigned SegmentElts = VT.getVectorNumElements() / Segments; + + // Make sure there's no size changes. + if (SegmentElts * Segments != M.size()) + return std::nullopt; + + // Check the first index corresponds to one of the lanes in the first segment. + if (Lane >= SegmentElts) + return std::nullopt; + + // Check that all lanes match the first, adjusted for segment. + for (unsigned I = 0; I < M.size(); ++I) + if ((unsigned)M[I] != (Lane + ((I / SegmentElts) * SegmentElts))) + return std::nullopt; + + return Lane; +} + /// isTRN_v_undef_Mask - Special case of isTRNMask for canonical form of /// "vector_shuffle v, v", i.e., "vector_shuffle v, undef". /// Mask is e.g., <0, 0, 2, 2> instead of <0, 4, 2, 6>. @@ -29981,6 +30005,19 @@ SDValue AArch64TargetLowering::LowerFixedLengthVECTOR_SHUFFLEToSVE( return convertFromScalableVector( DAG, VT, DAG.getNode(Opc, DL, ContainerVT, Op1, Op1)); } + + if (Subtarget->hasSVE2p1()) { + if (std::optional Lane = isDUPQMask(ShuffleMask, VT)) { + SDValue IID = + DAG.getConstant(Intrinsic::aarch64_sve_dup_laneq, DL, MVT::i64); + return convertFromScalableVector( + DAG, VT, + DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, ContainerVT, + {IID, Op1, + DAG.getConstant(*Lane, DL, MVT::i64, + /*isTarget=*/true)})); + } + } } // Try to widen the shuffle before generating a possibly expensive SVE TBL. diff --git a/llvm/test/CodeGen/AArch64/sve2p1-vector-shuffles.ll b/llvm/test/CodeGen/AArch64/sve2p1-vector-shuffles.ll new file mode 100644 index 000000000000..40d4d0ff6014 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sve2p1-vector-shuffles.ll @@ -0,0 +1,115 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc -mtriple=aarch64-linux-gnu < %s | FileCheck %s + +define void @dupq_i8_256b(ptr %addr) #0 { +; CHECK-LABEL: dupq_i8_256b: +; CHECK: // %bb.0: +; CHECK-NEXT: ldr z0, [x0] +; CHECK-NEXT: dupq z0.b, z0.b[15] +; CHECK-NEXT: str z0, [x0] +; CHECK-NEXT: ret + %load = load <32 x i8>, ptr %addr + %splat.lanes = shufflevector <32 x i8> %load, <32 x i8> poison, <32 x i32> + store <32 x i8> %splat.lanes, ptr %addr + ret void +} + +define void @dupq_i16_256b(ptr %addr) #0 { +; CHECK-LABEL: dupq_i16_256b: +; CHECK: // %bb.0: +; CHECK-NEXT: ldr z0, [x0] +; CHECK-NEXT: dupq z0.h, z0.h[2] +; CHECK-NEXT: str z0, [x0] +; CHECK-NEXT: ret + %load = load <16 x i16>, ptr %addr + %splat.lanes = shufflevector <16 x i16> %load, <16 x i16> poison, <16 x i32> + store <16 x i16> %splat.lanes, ptr %addr + ret void +} + +define void @dupq_i32_256b(ptr %addr) #0 { +; CHECK-LABEL: dupq_i32_256b: +; CHECK: // %bb.0: +; CHECK-NEXT: ldr z0, [x0] +; CHECK-NEXT: dupq z0.s, z0.s[3] +; CHECK-NEXT: str z0, [x0] +; CHECK-NEXT: ret + %load = load <8 x i32>, ptr %addr + %splat.lanes = shufflevector <8 x i32> %load, <8 x i32> poison, <8 x i32> + store <8 x i32> %splat.lanes, ptr %addr + ret void +} + +define void @dupq_i64_256b(ptr %addr) #0 { +; CHECK-LABEL: dupq_i64_256b: +; CHECK: // %bb.0: +; CHECK-NEXT: ldr z0, [x0] +; CHECK-NEXT: trn1 z0.d, z0.d, z0.d +; CHECK-NEXT: str z0, [x0] +; CHECK-NEXT: ret + %load = load <4 x i64>, ptr %addr + %splat.lanes = shufflevector <4 x i64> %load, <4 x i64> poison, <4 x i32> + store <4 x i64> %splat.lanes, ptr %addr + ret void +} + +define void @dupq_f16_256b(ptr %addr) #0 { +; CHECK-LABEL: dupq_f16_256b: +; CHECK: // %bb.0: +; CHECK-NEXT: ldr z0, [x0] +; CHECK-NEXT: dupq z0.h, z0.h[2] +; CHECK-NEXT: str z0, [x0] +; CHECK-NEXT: ret + %load = load <16 x half>, ptr %addr + %splat.lanes = shufflevector <16 x half> %load, <16 x half> poison, <16 x i32> + store <16 x half> %splat.lanes, ptr %addr + ret void +} + +define void @dupq_bf16_256b(ptr %addr) #0 { +; CHECK-LABEL: dupq_bf16_256b: +; CHECK: // %bb.0: +; CHECK-NEXT: ldp q0, q1, [x0] +; CHECK-NEXT: dup v0.8h, v0.h[2] +; CHECK-NEXT: dup v1.8h, v1.h[2] +; CHECK-NEXT: stp q0, q1, [x0] +; CHECK-NEXT: ret + %load = load <16 x bfloat>, ptr %addr + %splat.lanes = shufflevector <16 x bfloat> %load, <16 x bfloat> poison, <16 x i32> + store <16 x bfloat> %splat.lanes, ptr %addr + ret void +} + +define void @dupq_f32_256b(ptr %addr) #0 { +; CHECK-LABEL: dupq_f32_256b: +; CHECK: // %bb.0: +; CHECK-NEXT: ldr z0, [x0] +; CHECK-NEXT: dupq z0.s, z0.s[3] +; CHECK-NEXT: str z0, [x0] +; CHECK-NEXT: ret + %load = load <8 x float>, ptr %addr + %splat.lanes = shufflevector <8 x float> %load, <8 x float> poison, <8 x i32> + store <8 x float> %splat.lanes, ptr %addr + ret void +} + +define void @dupq_f64_256b(ptr %addr) #0 { +; CHECK-LABEL: dupq_f64_256b: +; CHECK: // %bb.0: +; CHECK-NEXT: ldr z0, [x0] +; CHECK-NEXT: trn1 z0.d, z0.d, z0.d +; CHECK-NEXT: str z0, [x0] +; CHECK-NEXT: ret + %load = load <4 x double>, ptr %addr + %splat.lanes = shufflevector <4 x double> %load, <4 x double> poison, <4 x i32> + store <4 x double> %splat.lanes, ptr %addr + ret void +} + +attributes #0 = { noinline vscale_range(2,2) "target-features"="+sve2p1,+bf16" } -- cgit v1.2.3 From 9da9d32670ddbf610f0788236e78b2382037f00b Mon Sep 17 00:00:00 2001 From: Brox Chen Date: Wed, 18 Jun 2025 11:30:53 -0400 Subject: [AMDGPU][True16][CodeGen] sext i16 inreg in true16 mode (#144024) update sext pattern in true16, setting up proper vgpr16 reg use --- llvm/lib/Target/AMDGPU/SIInstructions.td | 19 ++ llvm/lib/Target/AMDGPU/VOP3Instructions.td | 24 ++ llvm/test/CodeGen/AMDGPU/idot4s.ll | 69 +++--- llvm/test/CodeGen/AMDGPU/idot4u.ll | 79 +++---- llvm/test/CodeGen/AMDGPU/llvm.frexp.ll | 64 ++--- .../CodeGen/AMDGPU/sext-in-reg-vector-shuffle.ll | 86 +++++++ llvm/test/CodeGen/AMDGPU/vector-reduce-smax.ll | 260 ++++++++++++--------- llvm/test/CodeGen/AMDGPU/vector-reduce-smin.ll | 260 ++++++++++++--------- 8 files changed, 521 insertions(+), 340 deletions(-) create mode 100644 llvm/test/CodeGen/AMDGPU/sext-in-reg-vector-shuffle.ll diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td index 897c30948cf0..56b15c11a669 100644 --- a/llvm/lib/Target/AMDGPU/SIInstructions.td +++ b/llvm/lib/Target/AMDGPU/SIInstructions.td @@ -2623,6 +2623,8 @@ def : GCNPat< (i32 (DivergentSextInreg i32:$src)), (V_BFE_I32_e64 i32:$src, (i32 0), (i32 1))>; +foreach p = [NotHasTrue16BitInsts, UseFakeTrue16Insts] in +let True16Predicate = p in { def : GCNPat < (i16 (DivergentSextInreg i16:$src)), (V_BFE_I32_e64 $src, (i32 0), (i32 1)) @@ -2632,6 +2634,23 @@ def : GCNPat < (i16 (DivergentSextInreg i16:$src)), (V_BFE_I32_e64 $src, (i32 0), (i32 8)) >; +} + +let True16Predicate = UseRealTrue16Insts in { +def : GCNPat < + (i16 (DivergentSextInreg i16:$src)), + (V_BFE_I32_e64 + (REG_SEQUENCE VGPR_32, VGPR_16:$src, lo16, (i16 (IMPLICIT_DEF)), hi16), + (i32 0), (i32 1)) +>; + +def : GCNPat < + (i16 (DivergentSextInreg i16:$src)), + (V_BFE_I32_e64 + (REG_SEQUENCE VGPR_32, VGPR_16:$src, lo16, (i16 (IMPLICIT_DEF)), hi16), + (i32 0), (i32 8)) +>; +} def : GCNPat< (i32 (DivergentSextInreg i32:$src)), diff --git a/llvm/lib/Target/AMDGPU/VOP3Instructions.td b/llvm/lib/Target/AMDGPU/VOP3Instructions.td index 2dbc119f65cd..89a9ecc27c6e 100644 --- a/llvm/lib/Target/AMDGPU/VOP3Instructions.td +++ b/llvm/lib/Target/AMDGPU/VOP3Instructions.td @@ -319,11 +319,21 @@ let SchedRW = [Write64Bit] in { } // End SchedRW = [Write64Bit] } // End isReMaterializable = 1 +foreach p = [NotHasTrue16BitInsts, UseFakeTrue16Insts] in +let True16Predicate = p in def : GCNPat< (i32 (DivergentUnaryFrag i16:$src)), (i32 (V_BFE_I32_e64 i16:$src, (i32 0), (i32 0x10))) >; +let True16Predicate = UseRealTrue16Insts in +def : GCNPat< + (i32 (DivergentUnaryFrag i16:$src)), + (i32 (V_BFE_I32_e64 + (REG_SEQUENCE VGPR_32, VGPR_16:$src, lo16, (i16 (IMPLICIT_DEF)), hi16), + (i32 0), (i32 0x10))) +>; + let isReMaterializable = 1 in { let SubtargetPredicate = isGFX6GFX7GFX10Plus in { defm V_MULLIT_F32 : VOP3Inst <"v_mullit_f32", VOP3_Profile>; @@ -423,6 +433,8 @@ def V_INTERP_P1LV_F16 : VOP3Interp <"v_interp_p1lv_f16", VOP3_INTERP16<[f32, f32 } // End SubtargetPredicate = Has16BitInsts, isCommutable = 1 +foreach p = [NotHasTrue16BitInsts, UseFakeTrue16Insts] in +let True16Predicate = p in def : GCNPat< (i64 (DivergentUnaryFrag i16:$src)), (REG_SEQUENCE VReg_64, @@ -432,6 +444,18 @@ def : GCNPat< ), VGPR_32)), sub1) >; +let True16Predicate = UseRealTrue16Insts in +def : GCNPat< + (i64 (DivergentUnaryFrag i16:$src)), + (REG_SEQUENCE VReg_64, + (i32 (V_BFE_I32_e64 + (REG_SEQUENCE VGPR_32, VGPR_16:$src, lo16, (i16 (IMPLICIT_DEF)), hi16), + (S_MOV_B32 (i32 0)), (S_MOV_B32 (i32 0x10)))), sub0, + (i32 (COPY_TO_REGCLASS + (V_ASHRREV_I32_e32 (S_MOV_B32 (i32 0x1f)), (i32 (V_BFE_I32_e64 $src, (S_MOV_B32 (i32 0)), (S_MOV_B32 (i32 0x10)))) + ), VGPR_32)), sub1) +>; + let SubtargetPredicate = isGFX8Plus, Uses = [MODE, M0, EXEC], OtherPredicates = [isNotGFX90APlus] in { def V_INTERP_P1_F32_e64 : VOP3Interp <"v_interp_p1_f32", VOP3_INTERP>; def V_INTERP_P2_F32_e64 : VOP3Interp <"v_interp_p2_f32", VOP3_INTERP>; diff --git a/llvm/test/CodeGen/AMDGPU/idot4s.ll b/llvm/test/CodeGen/AMDGPU/idot4s.ll index 9e7968f1acb8..ab38bd21994e 100644 --- a/llvm/test/CodeGen/AMDGPU/idot4s.ll +++ b/llvm/test/CodeGen/AMDGPU/idot4s.ll @@ -1165,35 +1165,32 @@ define amdgpu_kernel void @idot4_acc16_vecMul(ptr addrspace(1) %src1, ; GFX11-DL-TRUE16-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; GFX11-DL-TRUE16-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-DL-TRUE16-NEXT: s_clause 0x1 -; GFX11-DL-TRUE16-NEXT: global_load_b32 v1, v0, s[2:3] -; GFX11-DL-TRUE16-NEXT: global_load_b32 v2, v0, s[0:1] +; GFX11-DL-TRUE16-NEXT: global_load_b32 v1, v0, s[0:1] +; GFX11-DL-TRUE16-NEXT: global_load_b32 v2, v0, s[2:3] ; GFX11-DL-TRUE16-NEXT: global_load_d16_b16 v0, v3, s[4:5] ; GFX11-DL-TRUE16-NEXT: s_waitcnt vmcnt(2) -; GFX11-DL-TRUE16-NEXT: v_mov_b16_e32 v4.l, v1.l +; GFX11-DL-TRUE16-NEXT: v_bfe_i32 v6, v1, 0, 8 ; GFX11-DL-TRUE16-NEXT: s_waitcnt vmcnt(1) -; GFX11-DL-TRUE16-NEXT: v_mov_b16_e32 v5.l, v2.l -; GFX11-DL-TRUE16-NEXT: v_ashrrev_i16 v6.h, 8, v2.l -; GFX11-DL-TRUE16-NEXT: v_mov_b16_e32 v7.l, v2.h -; GFX11-DL-TRUE16-NEXT: v_ashrrev_i16 v8.h, 8, v1.l -; GFX11-DL-TRUE16-NEXT: v_bfe_i32 v4, v4, 0, 8 -; GFX11-DL-TRUE16-NEXT: v_bfe_i32 v5, v5, 0, 8 -; GFX11-DL-TRUE16-NEXT: v_mov_b16_e32 v9.l, v1.h -; GFX11-DL-TRUE16-NEXT: v_ashrrev_i16 v2.h, 8, v2.h +; GFX11-DL-TRUE16-NEXT: v_bfe_i32 v5, v2, 0, 8 +; GFX11-DL-TRUE16-NEXT: v_ashrrev_i16 v4.h, 8, v1.l +; GFX11-DL-TRUE16-NEXT: v_mov_b16_e32 v1.l, v1.h +; GFX11-DL-TRUE16-NEXT: v_ashrrev_i16 v7.h, 8, v2.l +; GFX11-DL-TRUE16-NEXT: v_mov_b16_e32 v2.l, v2.h +; GFX11-DL-TRUE16-NEXT: v_mov_b16_e32 v7.l, v5.l +; GFX11-DL-TRUE16-NEXT: v_mov_b16_e32 v4.l, v6.l +; GFX11-DL-TRUE16-NEXT: v_bfe_i32 v6, v1, 0, 8 ; GFX11-DL-TRUE16-NEXT: v_ashrrev_i16 v1.h, 8, v1.h -; GFX11-DL-TRUE16-NEXT: v_mov_b16_e32 v8.l, v4.l -; GFX11-DL-TRUE16-NEXT: v_mov_b16_e32 v6.l, v5.l -; GFX11-DL-TRUE16-NEXT: v_bfe_i32 v4, v9, 0, 8 -; GFX11-DL-TRUE16-NEXT: v_bfe_i32 v5, v7, 0, 8 -; GFX11-DL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) -; GFX11-DL-TRUE16-NEXT: v_pk_mul_lo_u16 v6, v6, v8 -; GFX11-DL-TRUE16-NEXT: v_mov_b16_e32 v1.l, v4.l -; GFX11-DL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_3) +; GFX11-DL-TRUE16-NEXT: v_bfe_i32 v5, v2, 0, 8 +; GFX11-DL-TRUE16-NEXT: v_ashrrev_i16 v2.h, 8, v2.h +; GFX11-DL-TRUE16-NEXT: v_pk_mul_lo_u16 v4, v4, v7 +; GFX11-DL-TRUE16-NEXT: v_mov_b16_e32 v1.l, v6.l +; GFX11-DL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_3) ; GFX11-DL-TRUE16-NEXT: v_mov_b16_e32 v2.l, v5.l ; GFX11-DL-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-DL-TRUE16-NEXT: v_add_nc_u16 v0.l, v6.l, v0.l +; GFX11-DL-TRUE16-NEXT: v_add_nc_u16 v0.l, v4.l, v0.l ; GFX11-DL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-DL-TRUE16-NEXT: v_pk_mul_lo_u16 v1, v2, v1 -; GFX11-DL-TRUE16-NEXT: v_add_nc_u16 v0.l, v0.l, v6.h +; GFX11-DL-TRUE16-NEXT: v_pk_mul_lo_u16 v1, v1, v2 +; GFX11-DL-TRUE16-NEXT: v_add_nc_u16 v0.l, v0.l, v4.h ; GFX11-DL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-DL-TRUE16-NEXT: v_add_nc_u16 v0.l, v0.l, v1.l ; GFX11-DL-TRUE16-NEXT: v_add_nc_u16 v0.l, v0.l, v1.h @@ -3435,35 +3432,31 @@ define amdgpu_kernel void @idot4_nonstandard_signed(ptr addrspace(1) %src1, ; GFX11-DL-TRUE16-NEXT: global_load_b32 v2, v0, s[0:1] ; GFX11-DL-TRUE16-NEXT: global_load_b32 v3, v0, s[2:3] ; GFX11-DL-TRUE16-NEXT: s_waitcnt vmcnt(1) -; GFX11-DL-TRUE16-NEXT: v_mov_b16_e32 v0.l, v2.l -; GFX11-DL-TRUE16-NEXT: v_lshrrev_b32_e32 v1, 8, v2 +; GFX11-DL-TRUE16-NEXT: v_lshrrev_b32_e32 v4, 8, v2 +; GFX11-DL-TRUE16-NEXT: v_bfe_i32 v1, v2, 0, 8 ; GFX11-DL-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-DL-TRUE16-NEXT: v_lshrrev_b32_e32 v6, 8, v3 -; GFX11-DL-TRUE16-NEXT: v_mov_b16_e32 v7.l, v2.h -; GFX11-DL-TRUE16-NEXT: v_lshrrev_b32_e32 v2, 24, v2 -; GFX11-DL-TRUE16-NEXT: v_bfe_i32 v4, v0, 0, 8 -; GFX11-DL-TRUE16-NEXT: v_mov_b16_e32 v5.l, v1.l ; GFX11-DL-TRUE16-NEXT: v_and_b16 v0.l, 0xff, v3.l -; GFX11-DL-TRUE16-NEXT: v_and_b16 v0.h, 0xff, v6.l +; GFX11-DL-TRUE16-NEXT: v_lshrrev_b32_e32 v5, 8, v3 +; GFX11-DL-TRUE16-NEXT: v_mov_b16_e32 v6.l, v2.h +; GFX11-DL-TRUE16-NEXT: v_bfe_i32 v4, v4, 0, 8 ; GFX11-DL-TRUE16-NEXT: v_and_b16 v1.h, 0xff, v3.h +; GFX11-DL-TRUE16-NEXT: v_mul_lo_u16 v0.l, v1.l, v0.l +; GFX11-DL-TRUE16-NEXT: v_and_b16 v0.h, 0xff, v5.l +; GFX11-DL-TRUE16-NEXT: v_bfe_i32 v5, v6, 0, 8 ; GFX11-DL-TRUE16-NEXT: v_mov_b16_e32 v1.l, v4.l -; GFX11-DL-TRUE16-NEXT: v_bfe_i32 v4, v5, 0, 8 -; GFX11-DL-TRUE16-NEXT: v_bfe_i32 v5, v7, 0, 8 +; GFX11-DL-TRUE16-NEXT: v_lshrrev_b32_e32 v4, 24, v2 ; GFX11-DL-TRUE16-NEXT: v_lshrrev_b32_e32 v3, 24, v3 ; GFX11-DL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) -; GFX11-DL-TRUE16-NEXT: v_mul_lo_u16 v0.l, v1.l, v0.l -; GFX11-DL-TRUE16-NEXT: v_mov_b16_e32 v1.l, v4.l -; GFX11-DL-TRUE16-NEXT: v_mov_b16_e32 v4.l, v2.l ; GFX11-DL-TRUE16-NEXT: v_mov_b16_e32 v2.l, v5.l -; GFX11-DL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) ; GFX11-DL-TRUE16-NEXT: v_mad_u16 v0.l, v0.h, v1.l, v0.l +; GFX11-DL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_2) ; GFX11-DL-TRUE16-NEXT: v_bfe_i32 v4, v4, 0, 8 -; GFX11-DL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) ; GFX11-DL-TRUE16-NEXT: v_mad_u16 v0.l, v1.h, v2.l, v0.l +; GFX11-DL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-DL-TRUE16-NEXT: v_mov_b16_e32 v1.l, v4.l -; GFX11-DL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) ; GFX11-DL-TRUE16-NEXT: v_mad_u16 v0.l, v1.l, v3.l, v0.l ; GFX11-DL-TRUE16-NEXT: v_mov_b32_e32 v1, 0 +; GFX11-DL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) ; GFX11-DL-TRUE16-NEXT: v_bfe_i32 v0, v0, 0, 16 ; GFX11-DL-TRUE16-NEXT: global_store_b32 v1, v0, s[4:5] ; GFX11-DL-TRUE16-NEXT: s_endpgm diff --git a/llvm/test/CodeGen/AMDGPU/idot4u.ll b/llvm/test/CodeGen/AMDGPU/idot4u.ll index f995f426c637..5e502882a264 100644 --- a/llvm/test/CodeGen/AMDGPU/idot4u.ll +++ b/llvm/test/CodeGen/AMDGPU/idot4u.ll @@ -1669,40 +1669,38 @@ define amdgpu_kernel void @notdot4_mixedtypes(ptr addrspace(1) %src1, ; GFX11-DL-TRUE16-LABEL: notdot4_mixedtypes: ; GFX11-DL-TRUE16: ; %bb.0: ; %entry ; GFX11-DL-TRUE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 -; GFX11-DL-TRUE16-NEXT: v_dual_mov_b32 v5, 0 :: v_dual_and_b32 v0, 0x3ff, v0 +; GFX11-DL-TRUE16-NEXT: v_and_b32_e32 v0, 0x3ff, v0 ; GFX11-DL-TRUE16-NEXT: s_load_b64 s[4:5], s[4:5], 0x34 -; GFX11-DL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-DL-TRUE16-NEXT: v_mov_b32_e32 v6, 0 +; GFX11-DL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) ; GFX11-DL-TRUE16-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; GFX11-DL-TRUE16-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-DL-TRUE16-NEXT: s_clause 0x1 -; GFX11-DL-TRUE16-NEXT: global_load_b32 v3, v0, s[0:1] -; GFX11-DL-TRUE16-NEXT: global_load_b32 v4, v0, s[2:3] -; GFX11-DL-TRUE16-NEXT: global_load_d16_b16 v0, v5, s[4:5] +; GFX11-DL-TRUE16-NEXT: global_load_b32 v4, v0, s[0:1] +; GFX11-DL-TRUE16-NEXT: global_load_b32 v5, v0, s[2:3] +; GFX11-DL-TRUE16-NEXT: global_load_d16_b16 v0, v6, s[4:5] ; GFX11-DL-TRUE16-NEXT: s_waitcnt vmcnt(2) -; GFX11-DL-TRUE16-NEXT: v_lshrrev_b32_e32 v1, 8, v3 +; GFX11-DL-TRUE16-NEXT: v_lshrrev_b32_e32 v1, 8, v4 ; GFX11-DL-TRUE16-NEXT: s_waitcnt vmcnt(1) -; GFX11-DL-TRUE16-NEXT: v_lshrrev_b32_e32 v2, 8, v4 -; GFX11-DL-TRUE16-NEXT: v_mov_b16_e32 v6.l, v3.l -; GFX11-DL-TRUE16-NEXT: v_mov_b16_e32 v7.l, v4.l +; GFX11-DL-TRUE16-NEXT: v_lshrrev_b32_e32 v2, 8, v5 +; GFX11-DL-TRUE16-NEXT: v_bfe_i32 v3, v4, 0, 8 +; GFX11-DL-TRUE16-NEXT: v_bfe_i32 v7, v5, 0, 8 ; GFX11-DL-TRUE16-NEXT: v_and_b16 v0.h, 0xff, v1.l ; GFX11-DL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) ; GFX11-DL-TRUE16-NEXT: v_and_b16 v1.l, 0xff, v2.l -; GFX11-DL-TRUE16-NEXT: v_bfe_i32 v2, v6, 0, 8 +; GFX11-DL-TRUE16-NEXT: v_mov_b16_e32 v2.l, v3.l ; GFX11-DL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_3) -; GFX11-DL-TRUE16-NEXT: v_bfe_i32 v6, v7, 0, 8 +; GFX11-DL-TRUE16-NEXT: v_mov_b16_e32 v3.l, v7.l ; GFX11-DL-TRUE16-NEXT: s_waitcnt vmcnt(0) ; GFX11-DL-TRUE16-NEXT: v_mad_u16 v0.l, v0.h, v1.l, v0.l -; GFX11-DL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) -; GFX11-DL-TRUE16-NEXT: v_mov_b16_e32 v1.l, v2.l -; GFX11-DL-TRUE16-NEXT: v_mov_b16_e32 v2.l, v6.l -; GFX11-DL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_3) -; GFX11-DL-TRUE16-NEXT: v_mad_u16 v0.l, v1.l, v2.l, v0.l -; GFX11-DL-TRUE16-NEXT: v_perm_b32 v1, v4, v4, 0xc0c0302 -; GFX11-DL-TRUE16-NEXT: v_perm_b32 v2, v3, v3, 0xc0c0302 +; GFX11-DL-TRUE16-NEXT: v_perm_b32 v1, v5, v5, 0xc0c0302 +; GFX11-DL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2) +; GFX11-DL-TRUE16-NEXT: v_mad_u16 v0.l, v2.l, v3.l, v0.l +; GFX11-DL-TRUE16-NEXT: v_perm_b32 v2, v4, v4, 0xc0c0302 ; GFX11-DL-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 ; GFX11-DL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-DL-TRUE16-NEXT: v_dot4_u32_u8 v0, v2, v1, v0 -; GFX11-DL-TRUE16-NEXT: global_store_b16 v5, v0, s[4:5] +; GFX11-DL-TRUE16-NEXT: global_store_b16 v6, v0, s[4:5] ; GFX11-DL-TRUE16-NEXT: s_endpgm ; ; GFX11-DL-FAKE16-LABEL: notdot4_mixedtypes: @@ -1964,44 +1962,41 @@ define amdgpu_kernel void @notdot4_mixedtypes2(ptr addrspace(1) %src1, ; GFX11-DL-TRUE16-LABEL: notdot4_mixedtypes2: ; GFX11-DL-TRUE16: ; %bb.0: ; %entry ; GFX11-DL-TRUE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 -; GFX11-DL-TRUE16-NEXT: v_and_b32_e32 v0, 0x3ff, v0 +; GFX11-DL-TRUE16-NEXT: v_dual_mov_b32 v5, 0 :: v_dual_and_b32 v0, 0x3ff, v0 ; GFX11-DL-TRUE16-NEXT: s_load_b64 s[4:5], s[4:5], 0x34 -; GFX11-DL-TRUE16-NEXT: v_mov_b32_e32 v4, 0 -; GFX11-DL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) +; GFX11-DL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-DL-TRUE16-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; GFX11-DL-TRUE16-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-DL-TRUE16-NEXT: s_clause 0x1 -; GFX11-DL-TRUE16-NEXT: global_load_b32 v2, v0, s[2:3] -; GFX11-DL-TRUE16-NEXT: global_load_b32 v3, v0, s[0:1] -; GFX11-DL-TRUE16-NEXT: global_load_d16_b16 v0, v4, s[4:5] +; GFX11-DL-TRUE16-NEXT: global_load_b32 v3, v0, s[2:3] +; GFX11-DL-TRUE16-NEXT: global_load_b32 v4, v0, s[0:1] +; GFX11-DL-TRUE16-NEXT: global_load_d16_b16 v0, v5, s[4:5] ; GFX11-DL-TRUE16-NEXT: s_waitcnt vmcnt(2) -; GFX11-DL-TRUE16-NEXT: v_lshrrev_b32_e32 v1, 8, v2 +; GFX11-DL-TRUE16-NEXT: v_lshrrev_b32_e32 v1, 8, v3 ; GFX11-DL-TRUE16-NEXT: s_waitcnt vmcnt(1) -; GFX11-DL-TRUE16-NEXT: v_mov_b16_e32 v5.l, v3.l -; GFX11-DL-TRUE16-NEXT: v_lshrrev_b32_e32 v6, 8, v3 -; GFX11-DL-TRUE16-NEXT: v_mov_b16_e32 v7.l, v3.h -; GFX11-DL-TRUE16-NEXT: v_lshrrev_b32_e32 v3, 24, v3 +; GFX11-DL-TRUE16-NEXT: v_lshrrev_b32_e32 v2, 8, v4 +; GFX11-DL-TRUE16-NEXT: v_bfe_i32 v6, v4, 0, 8 +; GFX11-DL-TRUE16-NEXT: v_mov_b16_e32 v7.l, v4.h ; GFX11-DL-TRUE16-NEXT: v_bfe_i32 v1, v1, 0, 8 -; GFX11-DL-TRUE16-NEXT: v_bfe_i32 v5, v5, 0, 8 -; GFX11-DL-TRUE16-NEXT: v_and_b16 v0.h, 0xff, v6.l -; GFX11-DL-TRUE16-NEXT: v_lshrrev_b32_e32 v6, 24, v2 -; GFX11-DL-TRUE16-NEXT: v_and_b16 v1.h, 0xff, v2.l +; GFX11-DL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_4) +; GFX11-DL-TRUE16-NEXT: v_and_b16 v0.h, 0xff, v2.l +; GFX11-DL-TRUE16-NEXT: v_and_b16 v1.h, 0xff, v3.l ; GFX11-DL-TRUE16-NEXT: v_bfe_i32 v7, v7, 0, 8 -; GFX11-DL-TRUE16-NEXT: v_mov_b16_e32 v2.l, v5.l +; GFX11-DL-TRUE16-NEXT: v_mov_b16_e32 v2.l, v6.l +; GFX11-DL-TRUE16-NEXT: v_lshrrev_b32_e32 v6, 24, v3 ; GFX11-DL-TRUE16-NEXT: s_waitcnt vmcnt(0) ; GFX11-DL-TRUE16-NEXT: v_mad_u16 v0.l, v0.h, v1.l, v0.l -; GFX11-DL-TRUE16-NEXT: v_mov_b16_e32 v5.l, v6.l -; GFX11-DL-TRUE16-NEXT: v_and_b16 v0.h, 0xff, v2.h +; GFX11-DL-TRUE16-NEXT: v_and_b16 v0.h, 0xff, v3.h ; GFX11-DL-TRUE16-NEXT: v_mov_b16_e32 v1.l, v7.l -; GFX11-DL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) +; GFX11-DL-TRUE16-NEXT: v_lshrrev_b32_e32 v3, 24, v4 +; GFX11-DL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_2) ; GFX11-DL-TRUE16-NEXT: v_mad_u16 v0.l, v2.l, v1.h, v0.l -; GFX11-DL-TRUE16-NEXT: v_bfe_i32 v2, v5, 0, 8 -; GFX11-DL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-DL-TRUE16-NEXT: v_bfe_i32 v2, v6, 0, 8 ; GFX11-DL-TRUE16-NEXT: v_mad_u16 v0.l, v1.l, v0.h, v0.l +; GFX11-DL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-DL-TRUE16-NEXT: v_mov_b16_e32 v1.l, v2.l -; GFX11-DL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-DL-TRUE16-NEXT: v_mad_u16 v0.l, v3.l, v1.l, v0.l -; GFX11-DL-TRUE16-NEXT: global_store_b16 v4, v0, s[4:5] +; GFX11-DL-TRUE16-NEXT: global_store_b16 v5, v0, s[4:5] ; GFX11-DL-TRUE16-NEXT: s_endpgm ; ; GFX11-DL-FAKE16-LABEL: notdot4_mixedtypes2: diff --git a/llvm/test/CodeGen/AMDGPU/llvm.frexp.ll b/llvm/test/CodeGen/AMDGPU/llvm.frexp.ll index f44faf4f7edb..3a4bf1c81ed5 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.frexp.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.frexp.ll @@ -424,15 +424,15 @@ define { <2 x half>, <2 x i32> } @test_frexp_v2f16_v2i32(<2 x half> %a) { ; GFX11-SDAG-TRUE16-LABEL: test_frexp_v2f16_v2i32: ; GFX11-SDAG-TRUE16: ; %bb.0: ; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SDAG-TRUE16-NEXT: v_frexp_exp_i16_f16_e32 v1.l, v0.l -; GFX11-SDAG-TRUE16-NEXT: v_frexp_exp_i16_f16_e32 v2.l, v0.h -; GFX11-SDAG-TRUE16-NEXT: v_frexp_mant_f16_e32 v0.h, v0.h -; GFX11-SDAG-TRUE16-NEXT: v_frexp_mant_f16_e32 v0.l, v0.l -; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) -; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v1, v1, 0, 16 -; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v2, v2, 0, 16 +; GFX11-SDAG-TRUE16-NEXT: v_frexp_mant_f16_e32 v1.l, v0.h +; GFX11-SDAG-TRUE16-NEXT: v_frexp_mant_f16_e32 v1.h, v0.l +; GFX11-SDAG-TRUE16-NEXT: v_frexp_exp_i16_f16_e32 v2.l, v0.l +; GFX11-SDAG-TRUE16-NEXT: v_frexp_exp_i16_f16_e32 v3.l, v0.h +; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) +; GFX11-SDAG-TRUE16-NEXT: v_pack_b32_f16 v0, v1.h, v1.l +; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v1, v2, 0, 16 ; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) -; GFX11-SDAG-TRUE16-NEXT: v_pack_b32_f16 v0, v0.l, v0.h +; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v2, v3, 0, 16 ; GFX11-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-SDAG-FAKE16-LABEL: test_frexp_v2f16_v2i32: @@ -457,15 +457,15 @@ define { <2 x half>, <2 x i32> } @test_frexp_v2f16_v2i32(<2 x half> %a) { ; GFX12-SDAG-TRUE16-NEXT: s_wait_samplecnt 0x0 ; GFX12-SDAG-TRUE16-NEXT: s_wait_bvhcnt 0x0 ; GFX12-SDAG-TRUE16-NEXT: s_wait_kmcnt 0x0 -; GFX12-SDAG-TRUE16-NEXT: v_frexp_exp_i16_f16_e32 v1.l, v0.l -; GFX12-SDAG-TRUE16-NEXT: v_frexp_exp_i16_f16_e32 v2.l, v0.h -; GFX12-SDAG-TRUE16-NEXT: v_frexp_mant_f16_e32 v0.h, v0.h -; GFX12-SDAG-TRUE16-NEXT: v_frexp_mant_f16_e32 v0.l, v0.l -; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) -; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v1, v1, 0, 16 -; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v2, v2, 0, 16 +; GFX12-SDAG-TRUE16-NEXT: v_frexp_mant_f16_e32 v1.l, v0.h +; GFX12-SDAG-TRUE16-NEXT: v_frexp_mant_f16_e32 v1.h, v0.l +; GFX12-SDAG-TRUE16-NEXT: v_frexp_exp_i16_f16_e32 v2.l, v0.l +; GFX12-SDAG-TRUE16-NEXT: v_frexp_exp_i16_f16_e32 v3.l, v0.h +; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) +; GFX12-SDAG-TRUE16-NEXT: v_pack_b32_f16 v0, v1.h, v1.l +; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v1, v2, 0, 16 ; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) -; GFX12-SDAG-TRUE16-NEXT: v_pack_b32_f16 v0, v0.l, v0.h +; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v2, v3, 0, 16 ; GFX12-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31] ; ; GFX12-SDAG-FAKE16-LABEL: test_frexp_v2f16_v2i32: @@ -534,15 +534,15 @@ define { <2 x half>, <2 x i32> } @test_frexp_v2f16_v2i32(<2 x half> %a) { ; GFX11-GISEL-TRUE16-LABEL: test_frexp_v2f16_v2i32: ; GFX11-GISEL-TRUE16: ; %bb.0: ; GFX11-GISEL-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-GISEL-TRUE16-NEXT: v_frexp_exp_i16_f16_e32 v1.l, v0.l -; GFX11-GISEL-TRUE16-NEXT: v_frexp_exp_i16_f16_e32 v2.l, v0.h -; GFX11-GISEL-TRUE16-NEXT: v_frexp_mant_f16_e32 v0.l, v0.l -; GFX11-GISEL-TRUE16-NEXT: v_frexp_mant_f16_e32 v0.h, v0.h -; GFX11-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) -; GFX11-GISEL-TRUE16-NEXT: v_bfe_i32 v1, v1, 0, 16 -; GFX11-GISEL-TRUE16-NEXT: v_bfe_i32 v2, v2, 0, 16 +; GFX11-GISEL-TRUE16-NEXT: v_frexp_mant_f16_e32 v1.l, v0.l +; GFX11-GISEL-TRUE16-NEXT: v_frexp_mant_f16_e32 v1.h, v0.h +; GFX11-GISEL-TRUE16-NEXT: v_frexp_exp_i16_f16_e32 v2.l, v0.l +; GFX11-GISEL-TRUE16-NEXT: v_frexp_exp_i16_f16_e32 v3.l, v0.h +; GFX11-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) +; GFX11-GISEL-TRUE16-NEXT: v_pack_b32_f16 v0, v1.l, v1.h +; GFX11-GISEL-TRUE16-NEXT: v_bfe_i32 v1, v2, 0, 16 ; GFX11-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) -; GFX11-GISEL-TRUE16-NEXT: v_pack_b32_f16 v0, v0.l, v0.h +; GFX11-GISEL-TRUE16-NEXT: v_bfe_i32 v2, v3, 0, 16 ; GFX11-GISEL-TRUE16-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-GISEL-FAKE16-LABEL: test_frexp_v2f16_v2i32: @@ -567,15 +567,15 @@ define { <2 x half>, <2 x i32> } @test_frexp_v2f16_v2i32(<2 x half> %a) { ; GFX12-GISEL-TRUE16-NEXT: s_wait_samplecnt 0x0 ; GFX12-GISEL-TRUE16-NEXT: s_wait_bvhcnt 0x0 ; GFX12-GISEL-TRUE16-NEXT: s_wait_kmcnt 0x0 -; GFX12-GISEL-TRUE16-NEXT: v_frexp_exp_i16_f16_e32 v1.l, v0.l -; GFX12-GISEL-TRUE16-NEXT: v_frexp_exp_i16_f16_e32 v2.l, v0.h -; GFX12-GISEL-TRUE16-NEXT: v_frexp_mant_f16_e32 v0.l, v0.l -; GFX12-GISEL-TRUE16-NEXT: v_frexp_mant_f16_e32 v0.h, v0.h -; GFX12-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) -; GFX12-GISEL-TRUE16-NEXT: v_bfe_i32 v1, v1, 0, 16 -; GFX12-GISEL-TRUE16-NEXT: v_bfe_i32 v2, v2, 0, 16 +; GFX12-GISEL-TRUE16-NEXT: v_frexp_mant_f16_e32 v1.l, v0.l +; GFX12-GISEL-TRUE16-NEXT: v_frexp_mant_f16_e32 v1.h, v0.h +; GFX12-GISEL-TRUE16-NEXT: v_frexp_exp_i16_f16_e32 v2.l, v0.l +; GFX12-GISEL-TRUE16-NEXT: v_frexp_exp_i16_f16_e32 v3.l, v0.h +; GFX12-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) +; GFX12-GISEL-TRUE16-NEXT: v_pack_b32_f16 v0, v1.l, v1.h +; GFX12-GISEL-TRUE16-NEXT: v_bfe_i32 v1, v2, 0, 16 ; GFX12-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) -; GFX12-GISEL-TRUE16-NEXT: v_pack_b32_f16 v0, v0.l, v0.h +; GFX12-GISEL-TRUE16-NEXT: v_bfe_i32 v2, v3, 0, 16 ; GFX12-GISEL-TRUE16-NEXT: s_setpc_b64 s[30:31] ; ; GFX12-GISEL-FAKE16-LABEL: test_frexp_v2f16_v2i32: diff --git a/llvm/test/CodeGen/AMDGPU/sext-in-reg-vector-shuffle.ll b/llvm/test/CodeGen/AMDGPU/sext-in-reg-vector-shuffle.ll new file mode 100644 index 000000000000..49dec15f9f7d --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/sext-in-reg-vector-shuffle.ll @@ -0,0 +1,86 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc -mtriple=amdgcn-- -mcpu=gfx1100 -mattr=+real-true16 < %s | FileCheck -enable-var-scope --check-prefixes=GFX11-TRUE16 %s +; RUN: llc -mtriple=amdgcn-- -mcpu=gfx1100 -mattr=-real-true16 < %s | FileCheck -enable-var-scope --check-prefixes=GFX11-FAKE16 %s + +define amdgpu_kernel void @v_sext_in_reg_i8_i16_shuffle_vector(ptr addrspace(1) %out, ptr addrspace(1) %ptr) #0 { +; +; GFX11-TRUE16-LABEL: v_sext_in_reg_i8_i16_shuffle_vector: +; GFX11-TRUE16: ; %bb.0: +; GFX11-TRUE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 +; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0x3ff, v0 +; GFX11-TRUE16-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-TRUE16-NEXT: global_load_b64 v[1:2], v0, s[2:3] +; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v3.l, v2.h +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v4.l, v1.h +; GFX11-TRUE16-NEXT: v_bfe_i32 v7, v2, 0, 8 +; GFX11-TRUE16-NEXT: v_ashrrev_i32_e32 v5, 24, v2 +; GFX11-TRUE16-NEXT: v_ashrrev_i32_e32 v6, 24, v1 +; GFX11-TRUE16-NEXT: v_bfe_i32 v8, v3, 0, 8 +; GFX11-TRUE16-NEXT: v_ashrrev_i16 v0.l, 8, v1.l +; GFX11-TRUE16-NEXT: v_bfe_i32 v1, v1, 0, 8 +; GFX11-TRUE16-NEXT: v_ashrrev_i16 v0.h, 8, v2.l +; GFX11-TRUE16-NEXT: v_bfe_i32 v3, v4, 0, 8 +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v2.l, v7.l +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v4.l, v8.l +; GFX11-TRUE16-NEXT: v_cvt_f16_i16_e32 v0.l, v0.l +; GFX11-TRUE16-NEXT: v_cvt_f16_i16_e32 v0.h, v0.h +; GFX11-TRUE16-NEXT: v_cvt_f16_i16_e32 v2.h, v6.l +; GFX11-TRUE16-NEXT: v_cvt_f16_i16_e32 v4.h, v5.l +; GFX11-TRUE16-NEXT: v_cvt_f16_i16_e32 v1.l, v1.l +; GFX11-TRUE16-NEXT: v_cvt_f16_i16_e32 v1.h, v2.l +; GFX11-TRUE16-NEXT: v_cvt_f16_i16_e32 v2.l, v3.l +; GFX11-TRUE16-NEXT: v_cvt_f16_i16_e32 v4.l, v4.l +; GFX11-TRUE16-NEXT: v_mov_b32_e32 v5, 0 +; GFX11-TRUE16-NEXT: v_pack_b32_f16 v3, v0.l, v1.l +; GFX11-TRUE16-NEXT: v_pack_b32_f16 v1, v0.h, v1.h +; GFX11-TRUE16-NEXT: v_pack_b32_f16 v2, v2.h, v2.l +; GFX11-TRUE16-NEXT: v_pack_b32_f16 v0, v4.h, v4.l +; GFX11-TRUE16-NEXT: global_store_b128 v5, v[0:3], s[0:1] +; GFX11-TRUE16-NEXT: s_endpgm +; +; GFX11-FAKE16-LABEL: v_sext_in_reg_i8_i16_shuffle_vector: +; GFX11-FAKE16: ; %bb.0: +; GFX11-FAKE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 +; GFX11-FAKE16-NEXT: v_dual_mov_b32 v9, 0 :: v_dual_and_b32 v0, 0x3ff, v0 +; GFX11-FAKE16-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-FAKE16-NEXT: global_load_b64 v[0:1], v0, s[2:3] +; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v3, 16, v1 +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v4, 16, v0 +; GFX11-FAKE16-NEXT: v_ashrrev_i32_e32 v2, 24, v1 +; GFX11-FAKE16-NEXT: v_ashrrev_i32_e32 v5, 24, v0 +; GFX11-FAKE16-NEXT: v_ashrrev_i16 v6, 8, v1 +; GFX11-FAKE16-NEXT: v_bfe_i32 v7, v0, 0, 8 +; GFX11-FAKE16-NEXT: v_ashrrev_i16 v0, 8, v0 +; GFX11-FAKE16-NEXT: v_bfe_i32 v1, v1, 0, 8 +; GFX11-FAKE16-NEXT: v_bfe_i32 v3, v3, 0, 8 +; GFX11-FAKE16-NEXT: v_bfe_i32 v4, v4, 0, 8 +; GFX11-FAKE16-NEXT: v_cvt_f16_i16_e32 v7, v7 +; GFX11-FAKE16-NEXT: v_cvt_f16_i16_e32 v0, v0 +; GFX11-FAKE16-NEXT: v_cvt_f16_i16_e32 v1, v1 +; GFX11-FAKE16-NEXT: v_cvt_f16_i16_e32 v6, v6 +; GFX11-FAKE16-NEXT: v_cvt_f16_i16_e32 v5, v5 +; GFX11-FAKE16-NEXT: v_cvt_f16_i16_e32 v8, v2 +; GFX11-FAKE16-NEXT: v_cvt_f16_i16_e32 v2, v4 +; GFX11-FAKE16-NEXT: v_cvt_f16_i16_e32 v4, v3 +; GFX11-FAKE16-NEXT: v_pack_b32_f16 v3, v0, v7 +; GFX11-FAKE16-NEXT: v_pack_b32_f16 v1, v6, v1 +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) +; GFX11-FAKE16-NEXT: v_pack_b32_f16 v2, v5, v2 +; GFX11-FAKE16-NEXT: v_pack_b32_f16 v0, v8, v4 +; GFX11-FAKE16-NEXT: global_store_b128 v9, v[0:3], s[0:1] +; GFX11-FAKE16-NEXT: s_endpgm + %tid = call i32 @llvm.amdgcn.workitem.id.x() + %in.gep = getelementptr <{ [0 x i8] }>, ptr addrspace(1) %ptr, i64 0, i32 0, i32 %tid + %load = load <8 x i8>, ptr addrspace(1) %in.gep + %shuff = shufflevector <8 x i8> %load, <8 x i8> poison, <8 x i32> + %cast = sitofp <8 x i8> %shuff to <8 x half> + store <8 x half> %cast, ptr addrspace(1) %out + ret void +} + +declare i32 @llvm.amdgcn.workitem.id.x() #1 + +attributes #0 = { nounwind } +attributes #1 = { nounwind readnone } diff --git a/llvm/test/CodeGen/AMDGPU/vector-reduce-smax.ll b/llvm/test/CodeGen/AMDGPU/vector-reduce-smax.ll index 2d4c881b855e..16fbd1eabb30 100644 --- a/llvm/test/CodeGen/AMDGPU/vector-reduce-smax.ll +++ b/llvm/test/CodeGen/AMDGPU/vector-reduce-smax.ll @@ -208,11 +208,16 @@ define i8 @test_vector_reduce_smax_v3i8(<3 x i8> %v) { ; GFX11-SDAG-TRUE16-LABEL: test_vector_reduce_smax_v3i8: ; GFX11-SDAG-TRUE16: ; %bb.0: ; %entry ; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v0, v0, 0, 8 ; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v2, v2, 0, 8 -; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v1, v1, 0, 8 -; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-SDAG-TRUE16-NEXT: v_max_i16 v0.l, v0.l, v2.l +; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v3.l, v1.l +; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v0, v0, 0, 8 +; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) +; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v2.l +; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v2, v3, 0, 8 +; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-SDAG-TRUE16-NEXT: v_max_i16 v0.l, v0.l, v1.l +; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v2.l +; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-SDAG-TRUE16-NEXT: v_max3_i16 v0.l, v1.l, v0.l, 0xff80 ; GFX11-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31] ; @@ -245,11 +250,16 @@ define i8 @test_vector_reduce_smax_v3i8(<3 x i8> %v) { ; GFX12-SDAG-TRUE16-NEXT: s_wait_samplecnt 0x0 ; GFX12-SDAG-TRUE16-NEXT: s_wait_bvhcnt 0x0 ; GFX12-SDAG-TRUE16-NEXT: s_wait_kmcnt 0x0 -; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v0, v0, 0, 8 ; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v2, v2, 0, 8 -; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v1, v1, 0, 8 -; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX12-SDAG-TRUE16-NEXT: v_max_i16 v0.l, v0.l, v2.l +; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v3.l, v1.l +; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v0, v0, 0, 8 +; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) +; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v2.l +; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v2, v3, 0, 8 +; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX12-SDAG-TRUE16-NEXT: v_max_i16 v0.l, v0.l, v1.l +; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v2.l +; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX12-SDAG-TRUE16-NEXT: v_max3_i16 v0.l, v1.l, v0.l, 0xff80 ; GFX12-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31] ; @@ -790,7 +800,7 @@ define i8 @test_vector_reduce_smax_v8i8(<8 x i8> %v) { ; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v7, v7, 0, 8 ; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v8, v1, 0, 8 ; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v2, v2, 0, 8 -; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v4, v4, 0, 8 +; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) ; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v3.l ; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v3.l, v7.l ; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v7, v5, 0, 8 @@ -798,24 +808,27 @@ define i8 @test_vector_reduce_smax_v8i8(<8 x i8> %v) { ; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) ; GFX11-SDAG-TRUE16-NEXT: v_max_i16 v1.l, v1.l, v3.l ; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v3.l, v7.l -; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) +; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_4) ; GFX11-SDAG-TRUE16-NEXT: v_max3_i16 v1.l, v5.l, v3.l, v1.l ; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v3, v6, 0, 8 -; GFX11-SDAG-TRUE16-NEXT: v_lshlrev_b16 v1.l, 8, v1.l -; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_4) -; GFX11-SDAG-TRUE16-NEXT: v_lshrrev_b32_e32 v5, 8, v1 +; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v5.l, v0.l +; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v0.l, v2.l +; GFX11-SDAG-TRUE16-NEXT: v_lshlrev_b16 v6.l, 8, v1.l +; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) +; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v3.l +; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v2, v5, 0, 8 +; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v3, v4, 0, 8 +; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) +; GFX11-SDAG-TRUE16-NEXT: v_lshrrev_b32_e32 v4, 8, v6 +; GFX11-SDAG-TRUE16-NEXT: v_max_i16 v0.l, v0.l, v1.l +; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) ; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v2.l ; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v2.l, v3.l -; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v3, v0, 0, 8 -; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-SDAG-TRUE16-NEXT: v_max_i16 v0.l, v1.l, v2.l -; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v3.l -; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v2.l, v4.l -; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v3, v5, 0, 8 -; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v3, v4, 0, 8 ; GFX11-SDAG-TRUE16-NEXT: v_max3_i16 v0.l, v1.l, v2.l, v0.l +; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v3.l -; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-SDAG-TRUE16-NEXT: v_max_i16 v0.l, v0.l, v1.l ; GFX11-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31] ; @@ -897,7 +910,7 @@ define i8 @test_vector_reduce_smax_v8i8(<8 x i8> %v) { ; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v7, v7, 0, 8 ; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v8, v1, 0, 8 ; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v2, v2, 0, 8 -; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v4, v4, 0, 8 +; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) ; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v3.l ; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v3.l, v7.l ; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v7, v5, 0, 8 @@ -905,24 +918,27 @@ define i8 @test_vector_reduce_smax_v8i8(<8 x i8> %v) { ; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) ; GFX12-SDAG-TRUE16-NEXT: v_max_i16 v1.l, v1.l, v3.l ; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v3.l, v7.l -; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) +; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_4) ; GFX12-SDAG-TRUE16-NEXT: v_max3_i16 v1.l, v5.l, v3.l, v1.l ; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v3, v6, 0, 8 -; GFX12-SDAG-TRUE16-NEXT: v_lshlrev_b16 v1.l, 8, v1.l -; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_4) -; GFX12-SDAG-TRUE16-NEXT: v_lshrrev_b32_e32 v5, 8, v1 +; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v5.l, v0.l +; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v0.l, v2.l +; GFX12-SDAG-TRUE16-NEXT: v_lshlrev_b16 v6.l, 8, v1.l +; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) +; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v3.l +; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v2, v5, 0, 8 +; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v3, v4, 0, 8 +; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) +; GFX12-SDAG-TRUE16-NEXT: v_lshrrev_b32_e32 v4, 8, v6 +; GFX12-SDAG-TRUE16-NEXT: v_max_i16 v0.l, v0.l, v1.l +; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) ; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v2.l ; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v2.l, v3.l -; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v3, v0, 0, 8 -; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX12-SDAG-TRUE16-NEXT: v_max_i16 v0.l, v1.l, v2.l -; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v3.l -; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v2.l, v4.l -; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v3, v5, 0, 8 -; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v3, v4, 0, 8 ; GFX12-SDAG-TRUE16-NEXT: v_max3_i16 v0.l, v1.l, v2.l, v0.l +; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v3.l -; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX12-SDAG-TRUE16-NEXT: v_max_i16 v0.l, v0.l, v1.l ; GFX12-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31] ; @@ -1291,51 +1307,59 @@ define i8 @test_vector_reduce_smax_v16i8(<16 x i8> %v) { ; GFX11-SDAG-TRUE16-LABEL: test_vector_reduce_smax_v16i8: ; GFX11-SDAG-TRUE16: ; %bb.0: ; %entry ; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v7, v7, 0, 8 -; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v16, v11, 0, 8 -; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v17, v3, 0, 8 -; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v3, v15, 0, 8 -; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v11, v9, 0, 8 -; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v9, v13, 0, 8 -; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v13, v1, 0, 8 -; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v7.l -; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v5, v5, 0, 8 -; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v4, v4, 0, 8 -; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v7.l, v9.l -; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v9.l, v13.l -; GFX11-SDAG-TRUE16-NEXT: v_max_i16 v1.l, v1.l, v3.l -; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v3.l, v17.l -; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v13.l, v16.l -; GFX11-SDAG-TRUE16-NEXT: v_max_i16 v1.h, v5.l, v7.l -; GFX11-SDAG-TRUE16-NEXT: v_max_i16 v3.h, v9.l, v11.l +; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v16, v2, 0, 8 +; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v2.l, v3.l +; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v3.l, v15.l +; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v17.l, v0.l +; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v0.l, v9.l +; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v11, v11, 0, 8 +; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v15, v2, 0, 8 +; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v2, v3, 0, 8 +; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v3, v7, 0, 8 +; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v7, v14, 0, 8 +; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v14, v6, 0, 8 +; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v6.l, v13.l +; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v9.l, v4.l +; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v13, v0, 0, 8 +; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v4, v5, 0, 8 +; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v0.l, v3.l ; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v5, v6, 0, 8 -; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v6, v8, 0, 8 -; GFX11-SDAG-TRUE16-NEXT: v_max3_i16 v1.l, v3.l, v13.l, v1.l -; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v8, v0, 0, 8 +; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v6, v1, 0, 8 +; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v2.l +; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v2.l, v4.l ; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v10, v10, 0, 8 -; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v7, v2, 0, 8 -; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v2, v14, 0, 8 -; GFX11-SDAG-TRUE16-NEXT: v_max3_i16 v1.l, v3.h, v1.h, v1.l -; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v0.l, v4.l -; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v4.l, v8.l -; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v3, v12, 0, 8 -; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_2) | instid1(VALU_DEP_4) -; GFX11-SDAG-TRUE16-NEXT: v_lshlrev_b16 v9.l, 8, v1.l -; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v5.l -; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v5.l, v6.l -; GFX11-SDAG-TRUE16-NEXT: v_max_i16 v0.l, v0.l, v3.l -; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) -; GFX11-SDAG-TRUE16-NEXT: v_lshrrev_b32_e32 v8, 8, v9 -; GFX11-SDAG-TRUE16-NEXT: v_max_i16 v0.h, v1.l, v2.l -; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v7.l -; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v2.l, v10.l +; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v3.l, v5.l +; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v4.l, v6.l +; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v5.l, v13.l +; GFX11-SDAG-TRUE16-NEXT: v_max_i16 v0.l, v0.l, v1.l +; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v15.l +; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v6.l, v11.l +; GFX11-SDAG-TRUE16-NEXT: v_max_i16 v0.h, v2.l, v3.l ; GFX11-SDAG-TRUE16-NEXT: v_max_i16 v1.h, v4.l, v5.l -; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v6.l, v8.l -; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-SDAG-TRUE16-NEXT: v_max3_i16 v0.h, v1.l, v2.l, v0.h +; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v11.l, v12.l +; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v9, v9, 0, 8 +; GFX11-SDAG-TRUE16-NEXT: v_max3_i16 v0.l, v1.l, v6.l, v0.l +; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v6, v8, 0, 8 +; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v2.l, v14.l +; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v3.l, v7.l +; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v5, v17, 0, 8 +; GFX11-SDAG-TRUE16-NEXT: v_max3_i16 v0.l, v1.h, v0.h, v0.l +; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v4, v11, 0, 8 +; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v9.l +; GFX11-SDAG-TRUE16-NEXT: v_max_i16 v0.h, v2.l, v3.l +; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v2.l, v16.l +; GFX11-SDAG-TRUE16-NEXT: v_lshlrev_b16 v7.l, 8, v0.l +; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v0.l, v6.l +; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v3.l, v10.l +; GFX11-SDAG-TRUE16-NEXT: v_max_i16 v1.l, v1.l, v4.l +; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) +; GFX11-SDAG-TRUE16-NEXT: v_lshrrev_b32_e32 v6, 8, v7 +; GFX11-SDAG-TRUE16-NEXT: v_max_i16 v0.l, v5.l, v0.l +; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3) +; GFX11-SDAG-TRUE16-NEXT: v_max3_i16 v0.h, v2.l, v3.l, v0.h ; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v2, v6, 0, 8 ; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-SDAG-TRUE16-NEXT: v_max3_i16 v0.l, v1.h, v0.l, v0.h +; GFX11-SDAG-TRUE16-NEXT: v_max3_i16 v0.l, v0.l, v1.l, v0.h ; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v2.l ; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-SDAG-TRUE16-NEXT: v_max_i16 v0.l, v0.l, v1.l @@ -1444,51 +1468,59 @@ define i8 @test_vector_reduce_smax_v16i8(<16 x i8> %v) { ; GFX12-SDAG-TRUE16-NEXT: s_wait_samplecnt 0x0 ; GFX12-SDAG-TRUE16-NEXT: s_wait_bvhcnt 0x0 ; GFX12-SDAG-TRUE16-NEXT: s_wait_kmcnt 0x0 -; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v7, v7, 0, 8 -; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v16, v11, 0, 8 -; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v17, v3, 0, 8 -; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v3, v15, 0, 8 -; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v11, v9, 0, 8 -; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v9, v13, 0, 8 -; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v13, v1, 0, 8 -; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v7.l -; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v5, v5, 0, 8 -; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v4, v4, 0, 8 -; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v7.l, v9.l -; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v9.l, v13.l -; GFX12-SDAG-TRUE16-NEXT: v_max_i16 v1.l, v1.l, v3.l -; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v3.l, v17.l -; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v13.l, v16.l -; GFX12-SDAG-TRUE16-NEXT: v_max_i16 v1.h, v5.l, v7.l -; GFX12-SDAG-TRUE16-NEXT: v_max_i16 v3.h, v9.l, v11.l +; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v16, v2, 0, 8 +; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v2.l, v3.l +; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v3.l, v15.l +; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v17.l, v0.l +; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v0.l, v9.l +; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v11, v11, 0, 8 +; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v15, v2, 0, 8 +; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v2, v3, 0, 8 +; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v3, v7, 0, 8 +; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v7, v14, 0, 8 +; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v14, v6, 0, 8 +; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v6.l, v13.l +; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v9.l, v4.l +; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v13, v0, 0, 8 +; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v4, v5, 0, 8 +; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v0.l, v3.l ; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v5, v6, 0, 8 -; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v6, v8, 0, 8 -; GFX12-SDAG-TRUE16-NEXT: v_max3_i16 v1.l, v3.l, v13.l, v1.l -; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v8, v0, 0, 8 +; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v6, v1, 0, 8 +; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v2.l +; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v2.l, v4.l ; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v10, v10, 0, 8 -; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v7, v2, 0, 8 -; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v2, v14, 0, 8 -; GFX12-SDAG-TRUE16-NEXT: v_max3_i16 v1.l, v3.h, v1.h, v1.l -; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v0.l, v4.l -; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v4.l, v8.l -; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v3, v12, 0, 8 -; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_2) | instid1(VALU_DEP_4) -; GFX12-SDAG-TRUE16-NEXT: v_lshlrev_b16 v9.l, 8, v1.l -; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v5.l -; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v5.l, v6.l -; GFX12-SDAG-TRUE16-NEXT: v_max_i16 v0.l, v0.l, v3.l -; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) -; GFX12-SDAG-TRUE16-NEXT: v_lshrrev_b32_e32 v8, 8, v9 -; GFX12-SDAG-TRUE16-NEXT: v_max_i16 v0.h, v1.l, v2.l -; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v7.l -; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v2.l, v10.l +; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v3.l, v5.l +; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v4.l, v6.l +; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v5.l, v13.l +; GFX12-SDAG-TRUE16-NEXT: v_max_i16 v0.l, v0.l, v1.l +; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v15.l +; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v6.l, v11.l +; GFX12-SDAG-TRUE16-NEXT: v_max_i16 v0.h, v2.l, v3.l ; GFX12-SDAG-TRUE16-NEXT: v_max_i16 v1.h, v4.l, v5.l -; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v6.l, v8.l -; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX12-SDAG-TRUE16-NEXT: v_max3_i16 v0.h, v1.l, v2.l, v0.h +; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v11.l, v12.l +; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v9, v9, 0, 8 +; GFX12-SDAG-TRUE16-NEXT: v_max3_i16 v0.l, v1.l, v6.l, v0.l +; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v6, v8, 0, 8 +; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v2.l, v14.l +; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v3.l, v7.l +; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v5, v17, 0, 8 +; GFX12-SDAG-TRUE16-NEXT: v_max3_i16 v0.l, v1.h, v0.h, v0.l +; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v4, v11, 0, 8 +; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v9.l +; GFX12-SDAG-TRUE16-NEXT: v_max_i16 v0.h, v2.l, v3.l +; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v2.l, v16.l +; GFX12-SDAG-TRUE16-NEXT: v_lshlrev_b16 v7.l, 8, v0.l +; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v0.l, v6.l +; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v3.l, v10.l +; GFX12-SDAG-TRUE16-NEXT: v_max_i16 v1.l, v1.l, v4.l +; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) +; GFX12-SDAG-TRUE16-NEXT: v_lshrrev_b32_e32 v6, 8, v7 +; GFX12-SDAG-TRUE16-NEXT: v_max_i16 v0.l, v5.l, v0.l +; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3) +; GFX12-SDAG-TRUE16-NEXT: v_max3_i16 v0.h, v2.l, v3.l, v0.h ; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v2, v6, 0, 8 ; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX12-SDAG-TRUE16-NEXT: v_max3_i16 v0.l, v1.h, v0.l, v0.h +; GFX12-SDAG-TRUE16-NEXT: v_max3_i16 v0.l, v0.l, v1.l, v0.h ; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v2.l ; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX12-SDAG-TRUE16-NEXT: v_max_i16 v0.l, v0.l, v1.l diff --git a/llvm/test/CodeGen/AMDGPU/vector-reduce-smin.ll b/llvm/test/CodeGen/AMDGPU/vector-reduce-smin.ll index d9d9a6b9a4b1..bb868621c23d 100644 --- a/llvm/test/CodeGen/AMDGPU/vector-reduce-smin.ll +++ b/llvm/test/CodeGen/AMDGPU/vector-reduce-smin.ll @@ -208,11 +208,16 @@ define i8 @test_vector_reduce_smin_v3i8(<3 x i8> %v) { ; GFX11-SDAG-TRUE16-LABEL: test_vector_reduce_smin_v3i8: ; GFX11-SDAG-TRUE16: ; %bb.0: ; %entry ; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v0, v0, 0, 8 ; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v2, v2, 0, 8 -; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v1, v1, 0, 8 -; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-SDAG-TRUE16-NEXT: v_min_i16 v0.l, v0.l, v2.l +; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v3.l, v1.l +; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v0, v0, 0, 8 +; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) +; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v2.l +; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v2, v3, 0, 8 +; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-SDAG-TRUE16-NEXT: v_min_i16 v0.l, v0.l, v1.l +; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v2.l +; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-SDAG-TRUE16-NEXT: v_min3_i16 v0.l, v1.l, v0.l, 0x7f ; GFX11-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31] ; @@ -245,11 +250,16 @@ define i8 @test_vector_reduce_smin_v3i8(<3 x i8> %v) { ; GFX12-SDAG-TRUE16-NEXT: s_wait_samplecnt 0x0 ; GFX12-SDAG-TRUE16-NEXT: s_wait_bvhcnt 0x0 ; GFX12-SDAG-TRUE16-NEXT: s_wait_kmcnt 0x0 -; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v0, v0, 0, 8 ; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v2, v2, 0, 8 -; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v1, v1, 0, 8 -; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX12-SDAG-TRUE16-NEXT: v_min_i16 v0.l, v0.l, v2.l +; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v3.l, v1.l +; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v0, v0, 0, 8 +; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) +; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v2.l +; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v2, v3, 0, 8 +; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX12-SDAG-TRUE16-NEXT: v_min_i16 v0.l, v0.l, v1.l +; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v2.l +; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX12-SDAG-TRUE16-NEXT: v_min3_i16 v0.l, v1.l, v0.l, 0x7f ; GFX12-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31] ; @@ -790,7 +800,7 @@ define i8 @test_vector_reduce_smin_v8i8(<8 x i8> %v) { ; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v7, v7, 0, 8 ; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v8, v1, 0, 8 ; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v2, v2, 0, 8 -; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v4, v4, 0, 8 +; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) ; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v3.l ; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v3.l, v7.l ; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v7, v5, 0, 8 @@ -798,24 +808,27 @@ define i8 @test_vector_reduce_smin_v8i8(<8 x i8> %v) { ; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) ; GFX11-SDAG-TRUE16-NEXT: v_min_i16 v1.l, v1.l, v3.l ; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v3.l, v7.l -; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) +; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_4) ; GFX11-SDAG-TRUE16-NEXT: v_min3_i16 v1.l, v5.l, v3.l, v1.l ; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v3, v6, 0, 8 -; GFX11-SDAG-TRUE16-NEXT: v_lshlrev_b16 v1.l, 8, v1.l -; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_4) -; GFX11-SDAG-TRUE16-NEXT: v_lshrrev_b32_e32 v5, 8, v1 +; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v5.l, v0.l +; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v0.l, v2.l +; GFX11-SDAG-TRUE16-NEXT: v_lshlrev_b16 v6.l, 8, v1.l +; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) +; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v3.l +; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v2, v5, 0, 8 +; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v3, v4, 0, 8 +; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) +; GFX11-SDAG-TRUE16-NEXT: v_lshrrev_b32_e32 v4, 8, v6 +; GFX11-SDAG-TRUE16-NEXT: v_min_i16 v0.l, v0.l, v1.l +; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) ; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v2.l ; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v2.l, v3.l -; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v3, v0, 0, 8 -; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-SDAG-TRUE16-NEXT: v_min_i16 v0.l, v1.l, v2.l -; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v3.l -; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v2.l, v4.l -; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v3, v5, 0, 8 -; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v3, v4, 0, 8 ; GFX11-SDAG-TRUE16-NEXT: v_min3_i16 v0.l, v1.l, v2.l, v0.l +; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v3.l -; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-SDAG-TRUE16-NEXT: v_min_i16 v0.l, v0.l, v1.l ; GFX11-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31] ; @@ -897,7 +910,7 @@ define i8 @test_vector_reduce_smin_v8i8(<8 x i8> %v) { ; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v7, v7, 0, 8 ; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v8, v1, 0, 8 ; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v2, v2, 0, 8 -; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v4, v4, 0, 8 +; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) ; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v3.l ; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v3.l, v7.l ; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v7, v5, 0, 8 @@ -905,24 +918,27 @@ define i8 @test_vector_reduce_smin_v8i8(<8 x i8> %v) { ; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) ; GFX12-SDAG-TRUE16-NEXT: v_min_i16 v1.l, v1.l, v3.l ; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v3.l, v7.l -; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) +; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_4) ; GFX12-SDAG-TRUE16-NEXT: v_min3_i16 v1.l, v5.l, v3.l, v1.l ; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v3, v6, 0, 8 -; GFX12-SDAG-TRUE16-NEXT: v_lshlrev_b16 v1.l, 8, v1.l -; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_4) -; GFX12-SDAG-TRUE16-NEXT: v_lshrrev_b32_e32 v5, 8, v1 +; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v5.l, v0.l +; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v0.l, v2.l +; GFX12-SDAG-TRUE16-NEXT: v_lshlrev_b16 v6.l, 8, v1.l +; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) +; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v3.l +; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v2, v5, 0, 8 +; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v3, v4, 0, 8 +; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) +; GFX12-SDAG-TRUE16-NEXT: v_lshrrev_b32_e32 v4, 8, v6 +; GFX12-SDAG-TRUE16-NEXT: v_min_i16 v0.l, v0.l, v1.l +; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) ; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v2.l ; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v2.l, v3.l -; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v3, v0, 0, 8 -; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX12-SDAG-TRUE16-NEXT: v_min_i16 v0.l, v1.l, v2.l -; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v3.l -; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v2.l, v4.l -; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v3, v5, 0, 8 -; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v3, v4, 0, 8 ; GFX12-SDAG-TRUE16-NEXT: v_min3_i16 v0.l, v1.l, v2.l, v0.l +; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v3.l -; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX12-SDAG-TRUE16-NEXT: v_min_i16 v0.l, v0.l, v1.l ; GFX12-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31] ; @@ -1291,51 +1307,59 @@ define i8 @test_vector_reduce_smin_v16i8(<16 x i8> %v) { ; GFX11-SDAG-TRUE16-LABEL: test_vector_reduce_smin_v16i8: ; GFX11-SDAG-TRUE16: ; %bb.0: ; %entry ; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v7, v7, 0, 8 -; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v16, v11, 0, 8 -; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v17, v3, 0, 8 -; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v3, v15, 0, 8 -; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v11, v9, 0, 8 -; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v9, v13, 0, 8 -; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v13, v1, 0, 8 -; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v7.l -; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v5, v5, 0, 8 -; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v4, v4, 0, 8 -; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v7.l, v9.l -; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v9.l, v13.l -; GFX11-SDAG-TRUE16-NEXT: v_min_i16 v1.l, v1.l, v3.l -; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v3.l, v17.l -; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v13.l, v16.l -; GFX11-SDAG-TRUE16-NEXT: v_min_i16 v1.h, v5.l, v7.l -; GFX11-SDAG-TRUE16-NEXT: v_min_i16 v3.h, v9.l, v11.l +; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v16, v2, 0, 8 +; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v2.l, v3.l +; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v3.l, v15.l +; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v17.l, v0.l +; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v0.l, v9.l +; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v11, v11, 0, 8 +; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v15, v2, 0, 8 +; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v2, v3, 0, 8 +; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v3, v7, 0, 8 +; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v7, v14, 0, 8 +; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v14, v6, 0, 8 +; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v6.l, v13.l +; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v9.l, v4.l +; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v13, v0, 0, 8 +; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v4, v5, 0, 8 +; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v0.l, v3.l ; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v5, v6, 0, 8 -; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v6, v8, 0, 8 -; GFX11-SDAG-TRUE16-NEXT: v_min3_i16 v1.l, v3.l, v13.l, v1.l -; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v8, v0, 0, 8 +; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v6, v1, 0, 8 +; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v2.l +; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v2.l, v4.l ; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v10, v10, 0, 8 -; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v7, v2, 0, 8 -; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v2, v14, 0, 8 -; GFX11-SDAG-TRUE16-NEXT: v_min3_i16 v1.l, v3.h, v1.h, v1.l -; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v0.l, v4.l -; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v4.l, v8.l -; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v3, v12, 0, 8 -; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_2) | instid1(VALU_DEP_4) -; GFX11-SDAG-TRUE16-NEXT: v_lshlrev_b16 v9.l, 8, v1.l -; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v5.l -; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v5.l, v6.l -; GFX11-SDAG-TRUE16-NEXT: v_min_i16 v0.l, v0.l, v3.l -; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) -; GFX11-SDAG-TRUE16-NEXT: v_lshrrev_b32_e32 v8, 8, v9 -; GFX11-SDAG-TRUE16-NEXT: v_min_i16 v0.h, v1.l, v2.l -; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v7.l -; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v2.l, v10.l +; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v3.l, v5.l +; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v4.l, v6.l +; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v5.l, v13.l +; GFX11-SDAG-TRUE16-NEXT: v_min_i16 v0.l, v0.l, v1.l +; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v15.l +; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v6.l, v11.l +; GFX11-SDAG-TRUE16-NEXT: v_min_i16 v0.h, v2.l, v3.l ; GFX11-SDAG-TRUE16-NEXT: v_min_i16 v1.h, v4.l, v5.l -; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v6.l, v8.l -; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-SDAG-TRUE16-NEXT: v_min3_i16 v0.h, v1.l, v2.l, v0.h +; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v11.l, v12.l +; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v9, v9, 0, 8 +; GFX11-SDAG-TRUE16-NEXT: v_min3_i16 v0.l, v1.l, v6.l, v0.l +; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v6, v8, 0, 8 +; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v2.l, v14.l +; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v3.l, v7.l +; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v5, v17, 0, 8 +; GFX11-SDAG-TRUE16-NEXT: v_min3_i16 v0.l, v1.h, v0.h, v0.l +; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v4, v11, 0, 8 +; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v9.l +; GFX11-SDAG-TRUE16-NEXT: v_min_i16 v0.h, v2.l, v3.l +; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v2.l, v16.l +; GFX11-SDAG-TRUE16-NEXT: v_lshlrev_b16 v7.l, 8, v0.l +; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v0.l, v6.l +; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v3.l, v10.l +; GFX11-SDAG-TRUE16-NEXT: v_min_i16 v1.l, v1.l, v4.l +; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) +; GFX11-SDAG-TRUE16-NEXT: v_lshrrev_b32_e32 v6, 8, v7 +; GFX11-SDAG-TRUE16-NEXT: v_min_i16 v0.l, v5.l, v0.l +; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3) +; GFX11-SDAG-TRUE16-NEXT: v_min3_i16 v0.h, v2.l, v3.l, v0.h ; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v2, v6, 0, 8 ; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-SDAG-TRUE16-NEXT: v_min3_i16 v0.l, v1.h, v0.l, v0.h +; GFX11-SDAG-TRUE16-NEXT: v_min3_i16 v0.l, v0.l, v1.l, v0.h ; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v2.l ; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-SDAG-TRUE16-NEXT: v_min_i16 v0.l, v0.l, v1.l @@ -1444,51 +1468,59 @@ define i8 @test_vector_reduce_smin_v16i8(<16 x i8> %v) { ; GFX12-SDAG-TRUE16-NEXT: s_wait_samplecnt 0x0 ; GFX12-SDAG-TRUE16-NEXT: s_wait_bvhcnt 0x0 ; GFX12-SDAG-TRUE16-NEXT: s_wait_kmcnt 0x0 -; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v7, v7, 0, 8 -; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v16, v11, 0, 8 -; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v17, v3, 0, 8 -; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v3, v15, 0, 8 -; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v11, v9, 0, 8 -; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v9, v13, 0, 8 -; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v13, v1, 0, 8 -; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v7.l -; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v5, v5, 0, 8 -; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v4, v4, 0, 8 -; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v7.l, v9.l -; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v9.l, v13.l -; GFX12-SDAG-TRUE16-NEXT: v_min_i16 v1.l, v1.l, v3.l -; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v3.l, v17.l -; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v13.l, v16.l -; GFX12-SDAG-TRUE16-NEXT: v_min_i16 v1.h, v5.l, v7.l -; GFX12-SDAG-TRUE16-NEXT: v_min_i16 v3.h, v9.l, v11.l +; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v16, v2, 0, 8 +; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v2.l, v3.l +; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v3.l, v15.l +; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v17.l, v0.l +; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v0.l, v9.l +; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v11, v11, 0, 8 +; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v15, v2, 0, 8 +; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v2, v3, 0, 8 +; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v3, v7, 0, 8 +; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v7, v14, 0, 8 +; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v14, v6, 0, 8 +; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v6.l, v13.l +; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v9.l, v4.l +; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v13, v0, 0, 8 +; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v4, v5, 0, 8 +; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v0.l, v3.l ; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v5, v6, 0, 8 -; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v6, v8, 0, 8 -; GFX12-SDAG-TRUE16-NEXT: v_min3_i16 v1.l, v3.l, v13.l, v1.l -; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v8, v0, 0, 8 +; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v6, v1, 0, 8 +; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v2.l +; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v2.l, v4.l ; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v10, v10, 0, 8 -; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v7, v2, 0, 8 -; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v2, v14, 0, 8 -; GFX12-SDAG-TRUE16-NEXT: v_min3_i16 v1.l, v3.h, v1.h, v1.l -; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v0.l, v4.l -; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v4.l, v8.l -; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v3, v12, 0, 8 -; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_2) | instid1(VALU_DEP_4) -; GFX12-SDAG-TRUE16-NEXT: v_lshlrev_b16 v9.l, 8, v1.l -; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v5.l -; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v5.l, v6.l -; GFX12-SDAG-TRUE16-NEXT: v_min_i16 v0.l, v0.l, v3.l -; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) -; GFX12-SDAG-TRUE16-NEXT: v_lshrrev_b32_e32 v8, 8, v9 -; GFX12-SDAG-TRUE16-NEXT: v_min_i16 v0.h, v1.l, v2.l -; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v7.l -; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v2.l, v10.l +; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v3.l, v5.l +; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v4.l, v6.l +; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v5.l, v13.l +; GFX12-SDAG-TRUE16-NEXT: v_min_i16 v0.l, v0.l, v1.l +; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v15.l +; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v6.l, v11.l +; GFX12-SDAG-TRUE16-NEXT: v_min_i16 v0.h, v2.l, v3.l ; GFX12-SDAG-TRUE16-NEXT: v_min_i16 v1.h, v4.l, v5.l -; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v6.l, v8.l -; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX12-SDAG-TRUE16-NEXT: v_min3_i16 v0.h, v1.l, v2.l, v0.h +; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v11.l, v12.l +; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v9, v9, 0, 8 +; GFX12-SDAG-TRUE16-NEXT: v_min3_i16 v0.l, v1.l, v6.l, v0.l +; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v6, v8, 0, 8 +; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v2.l, v14.l +; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v3.l, v7.l +; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v5, v17, 0, 8 +; GFX12-SDAG-TRUE16-NEXT: v_min3_i16 v0.l, v1.h, v0.h, v0.l +; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v4, v11, 0, 8 +; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v9.l +; GFX12-SDAG-TRUE16-NEXT: v_min_i16 v0.h, v2.l, v3.l +; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v2.l, v16.l +; GFX12-SDAG-TRUE16-NEXT: v_lshlrev_b16 v7.l, 8, v0.l +; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v0.l, v6.l +; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v3.l, v10.l +; GFX12-SDAG-TRUE16-NEXT: v_min_i16 v1.l, v1.l, v4.l +; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) +; GFX12-SDAG-TRUE16-NEXT: v_lshrrev_b32_e32 v6, 8, v7 +; GFX12-SDAG-TRUE16-NEXT: v_min_i16 v0.l, v5.l, v0.l +; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3) +; GFX12-SDAG-TRUE16-NEXT: v_min3_i16 v0.h, v2.l, v3.l, v0.h ; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v2, v6, 0, 8 ; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX12-SDAG-TRUE16-NEXT: v_min3_i16 v0.l, v1.h, v0.l, v0.h +; GFX12-SDAG-TRUE16-NEXT: v_min3_i16 v0.l, v0.l, v1.l, v0.h ; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v2.l ; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX12-SDAG-TRUE16-NEXT: v_min_i16 v0.l, v0.l, v1.l -- cgit v1.2.3 From 5d502aeddf2a5d93c3fd93103054261acf4d92f3 Mon Sep 17 00:00:00 2001 From: Krzysztof Parzyszek Date: Wed, 18 Jun 2025 10:42:39 -0500 Subject: [flang][OpenMP] Clarify confusing error message (#144707) The message "The atomic variable x should occur exactly once among the arguments of the top-level [...] operator" was intended to convey that (1) an atomic variable should be an argument, and (2) it should be exactly one of the arguments. However, the wording turned out to be sowing confusion instead. Rework the corresponding check, and emit an individual error message for each problematic situation: - "atomic variable cannot be a proper subexpression of an argument", - "atomic variable should appear as an argument", - "atomic variable should be exactly one of the arguments". Fixes https://github.com/llvm/llvm-project/issues/144599 --- flang/lib/Semantics/check-omp-structure.cpp | 57 +++++++++++++------- flang/test/Semantics/OpenMP/atomic-update-only.f90 | 3 +- flang/test/Semantics/OpenMP/atomic03.f90 | 32 ++++++------ flang/test/Semantics/OpenMP/atomic04.f90 | 60 ++++++++++++---------- 4 files changed, 88 insertions(+), 64 deletions(-) diff --git a/flang/lib/Semantics/check-omp-structure.cpp b/flang/lib/Semantics/check-omp-structure.cpp index 83f4d1edf3c4..36d4bcb5d99f 100644 --- a/flang/lib/Semantics/check-omp-structure.cpp +++ b/flang/lib/Semantics/check-omp-structure.cpp @@ -3510,37 +3510,56 @@ void OmpStructureChecker::CheckAtomicUpdateAssignment( operation::ToString(top.first)); return; } - // Check if `atom` occurs exactly once in the argument list. + // Check how many times `atom` occurs as an argument, if it's a subexpression + // of an argument, and collect the non-atom arguments. std::vector nonAtom; - auto unique{[&]() { // -> iterator - auto found{top.second.end()}; - for (auto i{top.second.begin()}, e{top.second.end()}; i != e; ++i) { - if (IsSameOrConvertOf(*i, atom)) { - if (found != top.second.end()) { - return top.second.end(); - } - found = i; + MaybeExpr subExpr; + auto atomCount{[&]() { + int count{0}; + for (const SomeExpr &arg : top.second) { + if (IsSameOrConvertOf(arg, atom)) { + ++count; } else { - nonAtom.push_back(*i); + if (!subExpr && IsSubexpressionOf(atom, arg)) { + subExpr = arg; + } + nonAtom.push_back(arg); } } - return found; + return count; }()}; - if (unique == top.second.end()) { - if (top.first == operation::Operator::Identity) { - // This is "x = y". + bool hasError{false}; + if (subExpr) { + context_.Say(rsrc, + "The atomic variable %s cannot be a proper subexpression of an argument (here: %s) in the update operation"_err_en_US, + atom.AsFortran(), subExpr->AsFortran()); + hasError = true; + } + if (top.first == operation::Operator::Identity) { + // This is "x = y". + assert((atomCount == 0 || atomCount == 1) && "Unexpected count"); + if (atomCount == 0) { context_.Say(rsrc, "The atomic variable %s should appear as an argument in the update operation"_err_en_US, atom.AsFortran()); - } else { - assert(top.first != operation::Operator::Identity && - "Handle this separately"); + hasError = true; + } + } else { + if (atomCount == 0) { + context_.Say(rsrc, + "The atomic variable %s should appear as an argument of the top-level %s operator"_err_en_US, + atom.AsFortran(), operation::ToString(top.first)); + hasError = true; + } else if (atomCount > 1) { context_.Say(rsrc, - "The atomic variable %s should occur exactly once among the arguments of the top-level %s operator"_err_en_US, + "The atomic variable %s should be exactly one of the arguments of the top-level %s operator"_err_en_US, atom.AsFortran(), operation::ToString(top.first)); + hasError = true; } - } else { + } + + if (!hasError) { CheckStorageOverlap(atom, nonAtom, source); } } diff --git a/flang/test/Semantics/OpenMP/atomic-update-only.f90 b/flang/test/Semantics/OpenMP/atomic-update-only.f90 index 28d0e264359c..3c027924a142 100644 --- a/flang/test/Semantics/OpenMP/atomic-update-only.f90 +++ b/flang/test/Semantics/OpenMP/atomic-update-only.f90 @@ -30,7 +30,8 @@ subroutine f03 integer :: x, y !$omp atomic update - !ERROR: The atomic variable x should occur exactly once among the arguments of the top-level + operator + !ERROR: The atomic variable x cannot be a proper subexpression of an argument (here: (x+y)) in the update operation + !ERROR: The atomic variable x should appear as an argument of the top-level + operator x = (x + y) + 1 end diff --git a/flang/test/Semantics/OpenMP/atomic03.f90 b/flang/test/Semantics/OpenMP/atomic03.f90 index b3a3c0d5e7a1..691a483e6e80 100644 --- a/flang/test/Semantics/OpenMP/atomic03.f90 +++ b/flang/test/Semantics/OpenMP/atomic03.f90 @@ -25,19 +25,19 @@ program OmpAtomic y = MIN(y, 8) !$omp atomic - !ERROR: The atomic variable z should occur exactly once among the arguments of the top-level AND operator + !ERROR: The atomic variable z should appear as an argument of the top-level AND operator z = IAND(y, 4) !$omp atomic - !ERROR: The atomic variable z should occur exactly once among the arguments of the top-level OR operator + !ERROR: The atomic variable z should appear as an argument of the top-level OR operator z = IOR(y, 5) !$omp atomic - !ERROR: The atomic variable z should occur exactly once among the arguments of the top-level NEQV/EOR operator + !ERROR: The atomic variable z should appear as an argument of the top-level NEQV/EOR operator z = IEOR(y, 6) !$omp atomic - !ERROR: The atomic variable z should occur exactly once among the arguments of the top-level MAX operator + !ERROR: The atomic variable z should appear as an argument of the top-level MAX operator z = MAX(y, 7, b, c) !$omp atomic - !ERROR: The atomic variable z should occur exactly once among the arguments of the top-level MIN operator + !ERROR: The atomic variable z should appear as an argument of the top-level MIN operator z = MIN(y, 8, a, d) !$omp atomic @@ -58,19 +58,19 @@ program OmpAtomic y = MIN(y, 8) !$omp atomic update - !ERROR: The atomic variable z should occur exactly once among the arguments of the top-level AND operator + !ERROR: The atomic variable z should appear as an argument of the top-level AND operator z = IAND(y, 4) !$omp atomic update - !ERROR: The atomic variable z should occur exactly once among the arguments of the top-level OR operator + !ERROR: The atomic variable z should appear as an argument of the top-level OR operator z = IOR(y, 5) !$omp atomic update - !ERROR: The atomic variable z should occur exactly once among the arguments of the top-level NEQV/EOR operator + !ERROR: The atomic variable z should appear as an argument of the top-level NEQV/EOR operator z = IEOR(y, 6) !$omp atomic update - !ERROR: The atomic variable z should occur exactly once among the arguments of the top-level MAX operator + !ERROR: The atomic variable z should appear as an argument of the top-level MAX operator z = MAX(y, 7) !$omp atomic update - !ERROR: The atomic variable z should occur exactly once among the arguments of the top-level MIN operator + !ERROR: The atomic variable z should appear as an argument of the top-level MIN operator z = MIN(y, 8) !$omp atomic update @@ -90,7 +90,7 @@ subroutine conflicting_types() type(simple) ::s z = 1 !$omp atomic - !ERROR: The atomic variable z should occur exactly once among the arguments of the top-level AND operator + !ERROR: The atomic variable z should appear as an argument of the top-level AND operator z = IAND(s%z, 4) end subroutine @@ -103,22 +103,22 @@ subroutine more_invalid_atomic_update_stmts() type(some_type) :: s !$omp atomic update - !ERROR: The atomic variable a should occur exactly once among the arguments of the top-level MIN operator + !ERROR: The atomic variable a should be exactly one of the arguments of the top-level MIN operator a = min(a, a, b) !$omp atomic - !ERROR: The atomic variable a should occur exactly once among the arguments of the top-level MAX operator + !ERROR: The atomic variable a should be exactly one of the arguments of the top-level MAX operator a = max(b, a, b, a) !$omp atomic a = min(b, a, b) !$omp atomic - !ERROR: The atomic variable a should occur exactly once among the arguments of the top-level MAX operator + !ERROR: The atomic variable a should be exactly one of the arguments of the top-level MAX operator a = max(b, a, b, a, b) !$omp atomic update - !ERROR: The atomic variable y should occur exactly once among the arguments of the top-level MIN operator + !ERROR: The atomic variable y should appear as an argument of the top-level MIN operator y = min(z, x) !$omp atomic @@ -126,7 +126,7 @@ subroutine more_invalid_atomic_update_stmts() !$omp atomic update !ERROR: Atomic variable k should be a scalar - !ERROR: The atomic variable k should occur exactly once among the arguments of the top-level MAX operator + !ERROR: The atomic variable k should appear as an argument of the top-level MAX operator k = max(x, y) !$omp atomic diff --git a/flang/test/Semantics/OpenMP/atomic04.f90 b/flang/test/Semantics/OpenMP/atomic04.f90 index 0f69befed141..fb87ca518661 100644 --- a/flang/test/Semantics/OpenMP/atomic04.f90 +++ b/flang/test/Semantics/OpenMP/atomic04.f90 @@ -17,10 +17,10 @@ program OmpAtomic !$omp atomic x = 1 + x !$omp atomic - !ERROR: The atomic variable x should occur exactly once among the arguments of the top-level + operator + !ERROR: The atomic variable x should appear as an argument of the top-level + operator x = y + 1 !$omp atomic - !ERROR: The atomic variable x should occur exactly once among the arguments of the top-level + operator + !ERROR: The atomic variable x should appear as an argument of the top-level + operator x = 1 + y !$omp atomic @@ -28,10 +28,10 @@ program OmpAtomic !$omp atomic x = 1 - x !$omp atomic - !ERROR: The atomic variable x should occur exactly once among the arguments of the top-level - operator + !ERROR: The atomic variable x should appear as an argument of the top-level - operator x = y - 1 !$omp atomic - !ERROR: The atomic variable x should occur exactly once among the arguments of the top-level - operator + !ERROR: The atomic variable x should appear as an argument of the top-level - operator x = 1 - y !$omp atomic @@ -50,10 +50,10 @@ program OmpAtomic !$omp atomic x = 1/x !$omp atomic - !ERROR: The atomic variable x should occur exactly once among the arguments of the top-level / operator + !ERROR: The atomic variable x should appear as an argument of the top-level / operator x = y/1 !$omp atomic - !ERROR: The atomic variable x should occur exactly once among the arguments of the top-level / operator + !ERROR: The atomic variable x should appear as an argument of the top-level / operator x = 1/y !$omp atomic @@ -61,7 +61,7 @@ program OmpAtomic !$omp atomic m = n .AND. m !$omp atomic - !ERROR: The atomic variable m should occur exactly once among the arguments of the top-level AND operator + !ERROR: The atomic variable m should appear as an argument of the top-level AND operator m = n .AND. l !$omp atomic @@ -69,7 +69,7 @@ program OmpAtomic !$omp atomic m = n .OR. m !$omp atomic - !ERROR: The atomic variable m should occur exactly once among the arguments of the top-level OR operator + !ERROR: The atomic variable m should appear as an argument of the top-level OR operator m = n .OR. l !$omp atomic @@ -77,7 +77,7 @@ program OmpAtomic !$omp atomic m = n .EQV. m !$omp atomic - !ERROR: The atomic variable m should occur exactly once among the arguments of the top-level EQV operator + !ERROR: The atomic variable m should appear as an argument of the top-level EQV operator m = n .EQV. l !$omp atomic @@ -85,7 +85,7 @@ program OmpAtomic !$omp atomic m = n .NEQV. m !$omp atomic - !ERROR: The atomic variable m should occur exactly once among the arguments of the top-level NEQV/EOR operator + !ERROR: The atomic variable m should appear as an argument of the top-level NEQV/EOR operator m = n .NEQV. l !$omp atomic update @@ -93,10 +93,10 @@ program OmpAtomic !$omp atomic update x = 1 + x !$omp atomic update - !ERROR: The atomic variable x should occur exactly once among the arguments of the top-level + operator + !ERROR: The atomic variable x should appear as an argument of the top-level + operator x = y + 1 !$omp atomic update - !ERROR: The atomic variable x should occur exactly once among the arguments of the top-level + operator + !ERROR: The atomic variable x should appear as an argument of the top-level + operator x = 1 + y !$omp atomic update @@ -104,10 +104,10 @@ program OmpAtomic !$omp atomic update x = 1 - x !$omp atomic update - !ERROR: The atomic variable x should occur exactly once among the arguments of the top-level - operator + !ERROR: The atomic variable x should appear as an argument of the top-level - operator x = y - 1 !$omp atomic update - !ERROR: The atomic variable x should occur exactly once among the arguments of the top-level - operator + !ERROR: The atomic variable x should appear as an argument of the top-level - operator x = 1 - y !$omp atomic update @@ -126,10 +126,10 @@ program OmpAtomic !$omp atomic update x = 1/x !$omp atomic update - !ERROR: The atomic variable x should occur exactly once among the arguments of the top-level / operator + !ERROR: The atomic variable x should appear as an argument of the top-level / operator x = y/1 !$omp atomic update - !ERROR: The atomic variable x should occur exactly once among the arguments of the top-level / operator + !ERROR: The atomic variable x should appear as an argument of the top-level / operator x = 1/y !$omp atomic update @@ -137,7 +137,7 @@ program OmpAtomic !$omp atomic update m = n .AND. m !$omp atomic update - !ERROR: The atomic variable m should occur exactly once among the arguments of the top-level AND operator + !ERROR: The atomic variable m should appear as an argument of the top-level AND operator m = n .AND. l !$omp atomic update @@ -145,7 +145,7 @@ program OmpAtomic !$omp atomic update m = n .OR. m !$omp atomic update - !ERROR: The atomic variable m should occur exactly once among the arguments of the top-level OR operator + !ERROR: The atomic variable m should appear as an argument of the top-level OR operator m = n .OR. l !$omp atomic update @@ -153,7 +153,7 @@ program OmpAtomic !$omp atomic update m = n .EQV. m !$omp atomic update - !ERROR: The atomic variable m should occur exactly once among the arguments of the top-level EQV operator + !ERROR: The atomic variable m should appear as an argument of the top-level EQV operator m = n .EQV. l !$omp atomic update @@ -161,7 +161,7 @@ program OmpAtomic !$omp atomic update m = n .NEQV. m !$omp atomic update - !ERROR: The atomic variable m should occur exactly once among the arguments of the top-level NEQV/EOR operator + !ERROR: The atomic variable m should appear as an argument of the top-level NEQV/EOR operator m = n .NEQV. l end program OmpAtomic @@ -184,27 +184,30 @@ subroutine more_invalid_atomic_update_stmts() x = 1 !$omp atomic update - !ERROR: Within atomic operation a and a*b access the same storage + !ERROR: The atomic variable a cannot be a proper subexpression of an argument (here: a*b) in the update operation a = a * b + a !$omp atomic - !ERROR: The atomic variable a should occur exactly once among the arguments of the top-level * operator + !ERROR: The atomic variable a cannot be a proper subexpression of an argument (here: (a+9_4)) in the update operation + !ERROR: The atomic variable a should appear as an argument of the top-level * operator a = b * (a + 9) !$omp atomic update - !ERROR: Within atomic operation a and (a+b) access the same storage + !ERROR: The atomic variable a cannot be a proper subexpression of an argument (here: (a+b)) in the update operation a = a * (a + b) !$omp atomic - !ERROR: Within atomic operation a and (b+a) access the same storage + !ERROR: The atomic variable a cannot be a proper subexpression of an argument (here: (b+a)) in the update operation a = (b + a) * a !$omp atomic - !ERROR: The atomic variable a should occur exactly once among the arguments of the top-level + operator + !ERROR: The atomic variable a cannot be a proper subexpression of an argument (here: a*b) in the update operation + !ERROR: The atomic variable a should appear as an argument of the top-level + operator a = a * b + c !$omp atomic update - !ERROR: The atomic variable a should occur exactly once among the arguments of the top-level + operator + !ERROR: The atomic variable a cannot be a proper subexpression of an argument (here: a+b) in the update operation + !ERROR: The atomic variable a should appear as an argument of the top-level + operator a = a + b + c !$omp atomic @@ -219,11 +222,12 @@ subroutine more_invalid_atomic_update_stmts() !$omp atomic update !ERROR: No intrinsic or user-defined ASSIGNMENT(=) matches scalar REAL(4) and rank 1 array of REAL(4) - !ERROR: The atomic variable x should occur exactly once among the arguments of the top-level / operator + !ERROR: The atomic variable x cannot be a proper subexpression of an argument (here: x*y) in the update operation + !ERROR: The atomic variable x should appear as an argument of the top-level / operator x = x * y / z !$omp atomic - !ERROR: The atomic variable p%m should occur exactly once among the arguments of the top-level + operator + !ERROR: The atomic variable p%m should appear as an argument of the top-level + operator p%m = x + y !$omp atomic update -- cgit v1.2.3 From b5aaf9d988ff2dc652c86271b181bf0497eb97cb Mon Sep 17 00:00:00 2001 From: Philip Reames Date: Wed, 18 Jun 2025 08:53:45 -0700 Subject: [InstCombine] Implement vp.reverse reordering/elimination through binop/unop (#143963) This simply copies the structure of the vector.reverse patterns from just above, and reimplements them for the vp.reverse intrinsics when the mask is all ones and the EVLs exactly match. Its unfortunate that we have three different ways to represent a reverse (shuffle, vector.reverse, and vp.reverse) but I don't see an obvious way to remove any them because the semantics are slightly different. This significantly improves vectorization in TSVC_2's s112 and s1112 loops when using EVL tail folding. --- .../Transforms/InstCombine/InstCombineCalls.cpp | 19 ++++++ .../InstCombine/InstructionCombining.cpp | 33 +++++++++++ llvm/test/Transforms/InstCombine/vp-reverse.ll | 68 ++++++++++++++-------- 3 files changed, 97 insertions(+), 23 deletions(-) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp index 03897117861f..b6ed1dc4331d 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -3571,6 +3571,25 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) { } break; } + case Intrinsic::experimental_vp_reverse: { + Value *X; + Value *Vec = II->getArgOperand(0); + Value *Mask = II->getArgOperand(1); + if (!match(Mask, m_AllOnes())) + break; + Value *EVL = II->getArgOperand(2); + // rev(unop rev(X)) --> unop X + if (match(Vec, + m_OneUse(m_UnOp(m_Intrinsic( + m_Value(X), m_AllOnes(), m_Specific(EVL)))))) { + auto *OldUnOp = cast(Vec); + auto *NewUnOp = UnaryOperator::CreateWithCopiedFlags( + OldUnOp->getOpcode(), X, OldUnOp, OldUnOp->getName(), + II->getIterator()); + return replaceInstUsesWith(CI, NewUnOp); + } + break; + } case Intrinsic::vector_reduce_or: case Intrinsic::vector_reduce_and: { // Canonicalize logical or/and reductions: diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp index e2cd2a59fab9..afd3359e22ff 100644 --- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp +++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -2231,6 +2231,39 @@ Instruction *InstCombinerImpl::foldVectorBinop(BinaryOperator &Inst) { else if (isSplatValue(LHS) && match(RHS, m_OneUse(m_VecReverse(m_Value(V2))))) return createBinOpReverse(LHS, V2); + auto createBinOpVPReverse = [&](Value *X, Value *Y, Value *EVL) { + Value *V = Builder.CreateBinOp(Opcode, X, Y, Inst.getName()); + if (auto *BO = dyn_cast(V)) + BO->copyIRFlags(&Inst); + + ElementCount EC = cast(V->getType())->getElementCount(); + Value *AllTrueMask = Builder.CreateVectorSplat(EC, Builder.getTrue()); + Module *M = Inst.getModule(); + Function *F = Intrinsic::getOrInsertDeclaration( + M, Intrinsic::experimental_vp_reverse, V->getType()); + return CallInst::Create(F, {V, AllTrueMask, EVL}); + }; + + Value *EVL; + if (match(LHS, m_Intrinsic( + m_Value(V1), m_AllOnes(), m_Value(EVL)))) { + // Op(rev(V1), rev(V2)) -> rev(Op(V1, V2)) + if (match(RHS, m_Intrinsic( + m_Value(V2), m_AllOnes(), m_Specific(EVL))) && + (LHS->hasOneUse() || RHS->hasOneUse() || + (LHS == RHS && LHS->hasNUses(2)))) + return createBinOpVPReverse(V1, V2, EVL); + + // Op(rev(V1), RHSSplat)) -> rev(Op(V1, RHSSplat)) + if (LHS->hasOneUse() && isSplatValue(RHS)) + return createBinOpVPReverse(V1, RHS, EVL); + } + // Op(LHSSplat, rev(V2)) -> rev(Op(LHSSplat, V2)) + else if (isSplatValue(LHS) && + match(RHS, m_Intrinsic( + m_Value(V2), m_AllOnes(), m_Value(EVL)))) + return createBinOpVPReverse(LHS, V2, EVL); + // It may not be safe to reorder shuffles and things like div, urem, etc. // because we may trap when executing those ops on unknown vector elements. // See PR20059. diff --git a/llvm/test/Transforms/InstCombine/vp-reverse.ll b/llvm/test/Transforms/InstCombine/vp-reverse.ll index 79e6c47bdf1b..540b57da3475 100644 --- a/llvm/test/Transforms/InstCombine/vp-reverse.ll +++ b/llvm/test/Transforms/InstCombine/vp-reverse.ll @@ -3,11 +3,8 @@ define @binop_reverse_elim( %a, %b, i32 %evl) { ; CHECK-LABEL: @binop_reverse_elim( -; CHECK-NEXT: [[A:%.*]] = tail call @llvm.experimental.vp.reverse.nxv4i32( [[A1:%.*]], splat (i1 true), i32 [[EVL:%.*]]) -; CHECK-NEXT: [[B:%.*]] = tail call @llvm.experimental.vp.reverse.nxv4i32( [[B1:%.*]], splat (i1 true), i32 [[EVL]]) -; CHECK-NEXT: [[ADD1:%.*]] = add nsw [[A]], [[B]] -; CHECK-NEXT: [[ADD_REV:%.*]] = tail call @llvm.experimental.vp.reverse.nxv4i32( [[ADD1]], splat (i1 true), i32 [[EVL]]) -; CHECK-NEXT: ret [[ADD_REV]] +; CHECK-NEXT: [[ADD1:%.*]] = add nsw [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: ret [[ADD1]] ; %a.rev = tail call @llvm.experimental.vp.reverse( %a, splat (i1 true), i32 %evl) %b.rev = tail call @llvm.experimental.vp.reverse( %b, splat (i1 true), i32 %evl) @@ -16,8 +13,10 @@ define @binop_reverse_elim( %a, %add.rev } -define @binop_reverse_elim2( %a, %b, %m, i32 %evl) { -; CHECK-LABEL: @binop_reverse_elim2( +; Negative test - the mask needs to be reversed between the inner and +; the outer to be correct. +define @binop_reverse_elim_samemask( %a, %b, %m, i32 %evl) { +; CHECK-LABEL: @binop_reverse_elim_samemask( ; CHECK-NEXT: [[A_REV:%.*]] = tail call @llvm.experimental.vp.reverse.nxv4i32( [[A:%.*]], [[M:%.*]], i32 [[EVL:%.*]]) ; CHECK-NEXT: [[B_REV:%.*]] = tail call @llvm.experimental.vp.reverse.nxv4i32( [[B:%.*]], [[M]], i32 [[EVL]]) ; CHECK-NEXT: [[ADD:%.*]] = add nsw [[A_REV]], [[B_REV]] @@ -48,10 +47,9 @@ define @binop_reverse_elim_diffmask( %a, @binop_reverse_elim_diffevl( %a, %b, i32 %evl) { ; CHECK-LABEL: @binop_reverse_elim_diffevl( -; CHECK-NEXT: [[A_REV:%.*]] = tail call @llvm.experimental.vp.reverse.nxv4i32( [[A:%.*]], splat (i1 true), i32 [[EVL:%.*]]) -; CHECK-NEXT: [[B_REV:%.*]] = tail call @llvm.experimental.vp.reverse.nxv4i32( [[B:%.*]], splat (i1 true), i32 [[EVL]]) -; CHECK-NEXT: [[ADD:%.*]] = add nsw [[A_REV]], [[B_REV]] -; CHECK-NEXT: [[ADD_REV:%.*]] = tail call @llvm.experimental.vp.reverse.nxv4i32( [[ADD]], splat (i1 true), i32 10) +; CHECK-NEXT: [[ADD:%.*]] = add nsw [[A_REV:%.*]], [[B_REV:%.*]] +; CHECK-NEXT: [[ADD1:%.*]] = call @llvm.experimental.vp.reverse.nxv4i32( [[ADD]], splat (i1 true), i32 [[EVL:%.*]]) +; CHECK-NEXT: [[ADD_REV:%.*]] = tail call @llvm.experimental.vp.reverse.nxv4i32( [[ADD1]], splat (i1 true), i32 10) ; CHECK-NEXT: ret [[ADD_REV]] ; %a.rev = tail call @llvm.experimental.vp.reverse( %a, splat (i1 true), i32 %evl) @@ -63,10 +61,8 @@ define @binop_reverse_elim_diffevl( %a, @binop_reverse_splat_elim( %a, i32 %evl) { ; CHECK-LABEL: @binop_reverse_splat_elim( -; CHECK-NEXT: [[A:%.*]] = tail call @llvm.experimental.vp.reverse.nxv4i32( [[A1:%.*]], splat (i1 true), i32 [[EVL:%.*]]) -; CHECK-NEXT: [[ADD1:%.*]] = add nsw [[A]], splat (i32 22) -; CHECK-NEXT: [[ADD_REV:%.*]] = tail call @llvm.experimental.vp.reverse.nxv4i32( [[ADD1]], splat (i1 true), i32 [[EVL]]) -; CHECK-NEXT: ret [[ADD_REV]] +; CHECK-NEXT: [[ADD1:%.*]] = add nsw [[A:%.*]], splat (i32 22) +; CHECK-NEXT: ret [[ADD1]] ; %a.rev = tail call @llvm.experimental.vp.reverse( %a, splat (i1 true), i32 %evl) %add = add nsw %a.rev, splat (i32 22) @@ -76,10 +72,8 @@ define @binop_reverse_splat_elim( %a, i32 % define @binop_reverse_splat_elim2( %a, i32 %evl) { ; CHECK-LABEL: @binop_reverse_splat_elim2( -; CHECK-NEXT: [[A:%.*]] = tail call @llvm.experimental.vp.reverse.nxv4i32( [[A1:%.*]], splat (i1 true), i32 [[EVL:%.*]]) -; CHECK-NEXT: [[ADD1:%.*]] = add nsw [[A]], splat (i32 22) -; CHECK-NEXT: [[ADD_REV:%.*]] = tail call @llvm.experimental.vp.reverse.nxv4i32( [[ADD1]], splat (i1 true), i32 [[EVL]]) -; CHECK-NEXT: ret [[ADD_REV]] +; CHECK-NEXT: [[ADD1:%.*]] = add nsw [[A:%.*]], splat (i32 22) +; CHECK-NEXT: ret [[ADD1]] ; %a.rev = tail call @llvm.experimental.vp.reverse( %a, splat (i1 true), i32 %evl) %add = add nsw splat (i32 22), %a.rev @@ -87,12 +81,40 @@ define @binop_reverse_splat_elim2( %a, i32 ret %add.rev } +define @binop_reverse_splat_elim3( %a, i32 %b, i32 %evl) { +; CHECK-LABEL: @binop_reverse_splat_elim3( +; CHECK-NEXT: [[B_INS:%.*]] = insertelement poison, i32 [[B:%.*]], i64 0 +; CHECK-NEXT: [[B_VEC:%.*]] = shufflevector [[B_INS]], poison, zeroinitializer +; CHECK-NEXT: [[ADD:%.*]] = add nsw [[B_VEC]], [[A_REV:%.*]] +; CHECK-NEXT: ret [[ADD]] +; + %b.ins = insertelement poison, i32 %b, i32 0 + %b.vec = shufflevector %b.ins, poison, zeroinitializer + %a.rev = tail call @llvm.experimental.vp.reverse( %a, splat (i1 true), i32 %evl) + %add = add nsw %b.vec, %a.rev + %add.rev = tail call @llvm.experimental.vp.reverse( %add, splat (i1 true), i32 %evl) + ret %add.rev +} + +define @binop_reverse_splat_elim4( %a, i32 %b, i32 %evl) { +; CHECK-LABEL: @binop_reverse_splat_elim4( +; CHECK-NEXT: [[B_INS:%.*]] = insertelement poison, i32 [[B:%.*]], i64 0 +; CHECK-NEXT: [[B_VEC:%.*]] = shufflevector [[B_INS]], poison, zeroinitializer +; CHECK-NEXT: [[ADD1:%.*]] = add nsw [[A:%.*]], [[B_VEC]] +; CHECK-NEXT: ret [[ADD1]] +; + %b.ins = insertelement poison, i32 %b, i32 0 + %b.vec = shufflevector %b.ins, poison, zeroinitializer + %a.rev = tail call @llvm.experimental.vp.reverse( %a, splat (i1 true), i32 %evl) + %add = add nsw %a.rev, %b.vec + %add.rev = tail call @llvm.experimental.vp.reverse( %add, splat (i1 true), i32 %evl) + ret %add.rev +} + define @unop_reverse_splat_elim( %a, %b, i32 %evl) { ; CHECK-LABEL: @unop_reverse_splat_elim( -; CHECK-NEXT: [[A_REV:%.*]] = tail call @llvm.experimental.vp.reverse.nxv4f32( [[A:%.*]], splat (i1 true), i32 [[EVL:%.*]]) -; CHECK-NEXT: [[OP:%.*]] = fneg [[A_REV]] -; CHECK-NEXT: [[OP_REV:%.*]] = tail call @llvm.experimental.vp.reverse.nxv4f32( [[OP]], splat (i1 true), i32 [[EVL]]) -; CHECK-NEXT: ret [[OP_REV]] +; CHECK-NEXT: [[OP:%.*]] = fneg [[A_REV:%.*]] +; CHECK-NEXT: ret [[OP]] ; %a.rev = tail call @llvm.experimental.vp.reverse.nxv4f32( %a, splat (i1 true), i32 %evl) %op = fneg %a.rev -- cgit v1.2.3 From 0fa373c77ded203eddb973c79244c75ee5957eaf Mon Sep 17 00:00:00 2001 From: Jon Roelofs Date: Wed, 18 Jun 2025 12:00:48 -0400 Subject: [Matrix] Propagate shape information through PHI insts (#141681) ... and split them as we lower them, avoiding several shuffles in the process. --- .../Transforms/Scalar/LowerMatrixIntrinsics.cpp | 57 +- llvm/test/Transforms/LowerMatrixIntrinsics/phi.ll | 789 +++++++++++++++++++++ .../propagate-backwards-unsupported.ll | 261 ------- 3 files changed, 844 insertions(+), 263 deletions(-) create mode 100644 llvm/test/Transforms/LowerMatrixIntrinsics/phi.ll diff --git a/llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp b/llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp index 96b156494fd9..fa9e44617b7c 100644 --- a/llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp +++ b/llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp @@ -288,6 +288,7 @@ static bool isUniformShape(Value *V) { } switch (I->getOpcode()) { + case Instruction::PHI: case Instruction::FNeg: return true; default: @@ -1136,7 +1137,27 @@ public: Changed |= !FusedInsts.empty(); - // Fourth, lower remaining instructions with shape information. + // Fourth, pre-process all the PHINode's. The incoming values will be + // assigned later in VisitPHI. + for (Instruction *Inst : MatrixInsts) { + auto *PHI = dyn_cast(Inst); + if (!PHI) + continue; + + const ShapeInfo &SI = ShapeMap.at(Inst); + auto *EltTy = cast(PHI->getType())->getElementType(); + MatrixTy PhiM(SI.NumRows, SI.NumColumns, EltTy); + + IRBuilder<> Builder(Inst); + for (unsigned VI = 0, VE = PhiM.getNumVectors(); VI != VE; ++VI) + PhiM.setVector(VI, Builder.CreatePHI(PhiM.getVectorTy(), + PHI->getNumIncomingValues(), + PHI->getName())); + assert(!Inst2ColumnMatrix.contains(PHI) && "map already contains phi?"); + Inst2ColumnMatrix[PHI] = PhiM; + } + + // Fifth, lower remaining instructions with shape information. for (Instruction *Inst : MatrixInsts) { if (FusedInsts.count(Inst)) continue; @@ -1161,6 +1182,8 @@ public: Result = VisitLoad(cast(Inst), SI, Op1, Builder); else if (match(Inst, m_Store(m_Value(Op1), m_Value(Op2)))) Result = VisitStore(cast(Inst), SI, Op1, Op2, Builder); + else if (auto *PHI = dyn_cast(Inst)) + Result = VisitPHI(PHI, SI, Builder); else continue; @@ -1458,7 +1481,8 @@ public: IRBuilder<> &Builder) { auto inserted = Inst2ColumnMatrix.insert(std::make_pair(Inst, Matrix)); (void)inserted; - assert(inserted.second && "multiple matrix lowering mapping"); + assert((inserted.second || isa(Inst)) && + "multiple matrix lowering mapping"); ToRemove.push_back(Inst); Value *Flattened = nullptr; @@ -2239,6 +2263,35 @@ public: Builder); } + MatrixTy VisitPHI(PHINode *Inst, const ShapeInfo &SI, IRBuilder<> &Builder) { + auto BlockIP = Inst->getParent()->getFirstInsertionPt(); + Builder.SetInsertPoint(BlockIP); + MatrixTy PhiM = getMatrix(Inst, SI, Builder); + + for (auto [IncomingV, IncomingB] : + llvm::zip_equal(Inst->incoming_values(), Inst->blocks())) { + // getMatrix() may insert some instructions to help with reshaping. The + // safest place for those is at the top of the block after the rest of the + // PHI's. Even better, if we can put it in the incoming block. + Builder.SetInsertPoint(BlockIP); + if (auto *IncomingInst = dyn_cast(IncomingV)) + if (auto MaybeIP = IncomingInst->getInsertionPointAfterDef()) + Builder.SetInsertPoint(*MaybeIP); + + MatrixTy OpM = getMatrix(IncomingV, SI, Builder); + + for (unsigned VI = 0, VE = PhiM.getNumVectors(); VI != VE; ++VI) { + PHINode *NewPHI = cast(PhiM.getVector(VI)); + NewPHI->addIncoming(OpM.getVector(VI), IncomingB); + } + } + + // finalizeLowering() may also insert instructions in some cases. The safe + // place for those is at the end of the initial block of PHIs. + Builder.SetInsertPoint(BlockIP); + return PhiM; + } + /// Lower binary operators. MatrixTy VisitBinaryOperator(BinaryOperator *Inst, const ShapeInfo &SI, IRBuilder<> &Builder) { diff --git a/llvm/test/Transforms/LowerMatrixIntrinsics/phi.ll b/llvm/test/Transforms/LowerMatrixIntrinsics/phi.ll new file mode 100644 index 000000000000..9fdb2ce4dfa7 --- /dev/null +++ b/llvm/test/Transforms/LowerMatrixIntrinsics/phi.ll @@ -0,0 +1,789 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -matrix-allow-contract=false -passes='lower-matrix-intrinsics' -S < %s | FileCheck %s + +define void @matrix_phi_loop(ptr %in1, ptr %in2, i32 %count, ptr %out) { +; CHECK-LABEL: @matrix_phi_loop( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[COL_LOAD:%.*]] = load <3 x double>, ptr [[IN1:%.*]], align 8 +; CHECK-NEXT: [[VEC_GEP:%.*]] = getelementptr double, ptr [[IN1]], i64 3 +; CHECK-NEXT: [[COL_LOAD1:%.*]] = load <3 x double>, ptr [[VEC_GEP]], align 8 +; CHECK-NEXT: [[VEC_GEP2:%.*]] = getelementptr double, ptr [[IN1]], i64 6 +; CHECK-NEXT: [[COL_LOAD3:%.*]] = load <3 x double>, ptr [[VEC_GEP2]], align 8 +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[PHI9:%.*]] = phi <3 x double> [ [[COL_LOAD]], [[ENTRY:%.*]] ], [ [[TMP0:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[PHI10:%.*]] = phi <3 x double> [ [[COL_LOAD1]], [[ENTRY]] ], [ [[TMP1:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[PHI11:%.*]] = phi <3 x double> [ [[COL_LOAD3]], [[ENTRY]] ], [ [[TMP2:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[CTR:%.*]] = phi i32 [ [[COUNT:%.*]], [[ENTRY]] ], [ [[DEC:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[COL_LOAD4:%.*]] = load <3 x double>, ptr [[IN2:%.*]], align 8 +; CHECK-NEXT: [[VEC_GEP5:%.*]] = getelementptr double, ptr [[IN2]], i64 3 +; CHECK-NEXT: [[COL_LOAD6:%.*]] = load <3 x double>, ptr [[VEC_GEP5]], align 8 +; CHECK-NEXT: [[VEC_GEP7:%.*]] = getelementptr double, ptr [[IN2]], i64 6 +; CHECK-NEXT: [[COL_LOAD8:%.*]] = load <3 x double>, ptr [[VEC_GEP7]], align 8 +; CHECK-NEXT: [[TMP6:%.*]] = fadd <3 x double> [[PHI9]], [[COL_LOAD4]] +; CHECK-NEXT: [[TMP7:%.*]] = fadd <3 x double> [[PHI10]], [[COL_LOAD6]] +; CHECK-NEXT: [[TMP8:%.*]] = fadd <3 x double> [[PHI11]], [[COL_LOAD8]] +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <3 x double> [[TMP6]], <3 x double> [[TMP7]], <6 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <3 x double> [[TMP8]], <3 x double> poison, <6 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <6 x double> [[TMP3]], <6 x double> [[TMP4]], <9 x i32> +; CHECK-NEXT: [[TMP0]] = shufflevector <9 x double> [[TMP5]], <9 x double> poison, <3 x i32> +; CHECK-NEXT: [[TMP1]] = shufflevector <9 x double> [[TMP5]], <9 x double> poison, <3 x i32> +; CHECK-NEXT: [[TMP2]] = shufflevector <9 x double> [[TMP5]], <9 x double> poison, <3 x i32> +; CHECK-NEXT: [[DEC]] = sub i32 [[CTR]], 1 +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[DEC]], 0 +; CHECK-NEXT: br i1 [[CMP]], label [[EXIT:%.*]], label [[LOOP]] +; CHECK: exit: +; CHECK-NEXT: store <3 x double> [[TMP6]], ptr [[OUT:%.*]], align 128 +; CHECK-NEXT: [[VEC_GEP12:%.*]] = getelementptr double, ptr [[OUT]], i64 3 +; CHECK-NEXT: store <3 x double> [[TMP7]], ptr [[VEC_GEP12]], align 8 +; CHECK-NEXT: [[VEC_GEP13:%.*]] = getelementptr double, ptr [[OUT]], i64 6 +; CHECK-NEXT: store <3 x double> [[TMP8]], ptr [[VEC_GEP13]], align 16 +; CHECK-NEXT: ret void +; +entry: + %in1v = call <9 x double> @llvm.matrix.column.major.load(ptr %in1, i64 3, i1 false, i32 3, i32 3) + br label %loop + +loop: + %phi = phi <9 x double> [%in1v, %entry], [%sum, %loop] + %ctr = phi i32 [%count, %entry], [%dec, %loop] + + %in2v = call <9 x double> @llvm.matrix.column.major.load(ptr %in2, i64 3, i1 false, i32 3, i32 3) + + %sum = fadd <9 x double> %phi, %in2v + + %dec = sub i32 %ctr, 1 + %cmp = icmp eq i32 %dec, 0 + br i1 %cmp, label %exit, label %loop + +exit: + store <9 x double> %sum, ptr %out + ret void +} + +define void @matrix_phi_loop_zeroinitializer(ptr %in1, ptr %in2, i32 %count, ptr %out) { +; CHECK-LABEL: @matrix_phi_loop_zeroinitializer( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[PHI4:%.*]] = phi <3 x double> [ zeroinitializer, [[ENTRY:%.*]] ], [ [[TMP0:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[PHI5:%.*]] = phi <3 x double> [ zeroinitializer, [[ENTRY]] ], [ [[TMP1:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[PHI6:%.*]] = phi <3 x double> [ zeroinitializer, [[ENTRY]] ], [ [[TMP2:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[CTR:%.*]] = phi i32 [ [[COUNT:%.*]], [[ENTRY]] ], [ [[DEC:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[COL_LOAD:%.*]] = load <3 x double>, ptr [[IN2:%.*]], align 8 +; CHECK-NEXT: [[VEC_GEP:%.*]] = getelementptr double, ptr [[IN2]], i64 3 +; CHECK-NEXT: [[COL_LOAD1:%.*]] = load <3 x double>, ptr [[VEC_GEP]], align 8 +; CHECK-NEXT: [[VEC_GEP2:%.*]] = getelementptr double, ptr [[IN2]], i64 6 +; CHECK-NEXT: [[COL_LOAD3:%.*]] = load <3 x double>, ptr [[VEC_GEP2]], align 8 +; CHECK-NEXT: [[TMP6:%.*]] = fadd <3 x double> [[PHI4]], [[COL_LOAD]] +; CHECK-NEXT: [[TMP7:%.*]] = fadd <3 x double> [[PHI5]], [[COL_LOAD1]] +; CHECK-NEXT: [[TMP8:%.*]] = fadd <3 x double> [[PHI6]], [[COL_LOAD3]] +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <3 x double> [[TMP6]], <3 x double> [[TMP7]], <6 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <3 x double> [[TMP8]], <3 x double> poison, <6 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <6 x double> [[TMP3]], <6 x double> [[TMP4]], <9 x i32> +; CHECK-NEXT: [[TMP0]] = shufflevector <9 x double> [[TMP5]], <9 x double> poison, <3 x i32> +; CHECK-NEXT: [[TMP1]] = shufflevector <9 x double> [[TMP5]], <9 x double> poison, <3 x i32> +; CHECK-NEXT: [[TMP2]] = shufflevector <9 x double> [[TMP5]], <9 x double> poison, <3 x i32> +; CHECK-NEXT: [[DEC]] = sub i32 [[CTR]], 1 +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[DEC]], 0 +; CHECK-NEXT: br i1 [[CMP]], label [[EXIT:%.*]], label [[LOOP]] +; CHECK: exit: +; CHECK-NEXT: store <3 x double> [[TMP6]], ptr [[OUT:%.*]], align 128 +; CHECK-NEXT: [[VEC_GEP7:%.*]] = getelementptr double, ptr [[OUT]], i64 3 +; CHECK-NEXT: store <3 x double> [[TMP7]], ptr [[VEC_GEP7]], align 8 +; CHECK-NEXT: [[VEC_GEP8:%.*]] = getelementptr double, ptr [[OUT]], i64 6 +; CHECK-NEXT: store <3 x double> [[TMP8]], ptr [[VEC_GEP8]], align 16 +; CHECK-NEXT: ret void +; +entry: + br label %loop + +loop: + %phi = phi <9 x double> [zeroinitializer, %entry], [%sum, %loop] + %ctr = phi i32 [%count, %entry], [%dec, %loop] + + %inv = call <9 x double> @llvm.matrix.column.major.load(ptr %in2, i64 3, i1 false, i32 3, i32 3) + + %sum = fadd <9 x double> %phi, %inv + + %dec = sub i32 %ctr, 1 + %cmp = icmp eq i32 %dec, 0 + br i1 %cmp, label %exit, label %loop + +exit: + store <9 x double> %sum, ptr %out + ret void +} + +define void @matrix_phi_loop_undef(ptr %in, i32 %count, ptr %out) { +; CHECK-LABEL: @matrix_phi_loop_undef( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[PHI:%.*]] = phi <9 x double> [ undef, [[ENTRY:%.*]] ], [ [[SUM:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[CTR:%.*]] = phi i32 [ [[COUNT:%.*]], [[ENTRY]] ], [ [[DEC:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[INV:%.*]] = load <9 x double>, ptr [[IN:%.*]], align 128 +; CHECK-NEXT: [[SUM]] = fadd <9 x double> [[PHI]], [[INV]] +; CHECK-NEXT: [[DEC]] = sub i32 [[CTR]], 1 +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[DEC]], 0 +; CHECK-NEXT: br i1 [[CMP]], label [[EXIT:%.*]], label [[LOOP]] +; CHECK: exit: +; CHECK-NEXT: store <9 x double> [[SUM]], ptr [[OUT:%.*]], align 128 +; CHECK-NEXT: ret void +; +entry: + br label %loop + +loop: + %phi = phi <9 x double> [undef, %entry], [%sum, %loop] + %ctr = phi i32 [%count, %entry], [%dec, %loop] + + %inv = load <9 x double>, ptr %in + + %sum = fadd <9 x double> %phi, %inv + + %dec = sub i32 %ctr, 1 + %cmp = icmp eq i32 %dec, 0 + br i1 %cmp, label %exit, label %loop + +exit: + store <9 x double> %sum, ptr %out + ret void +} + +define void @matrix_phi_loop_poison(ptr %in, i32 %count, ptr %out) { +; CHECK-LABEL: @matrix_phi_loop_poison( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[PHI4:%.*]] = phi <3 x double> [ poison, [[ENTRY:%.*]] ], [ [[TMP0:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[PHI5:%.*]] = phi <3 x double> [ poison, [[ENTRY]] ], [ [[TMP1:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[PHI6:%.*]] = phi <3 x double> [ poison, [[ENTRY]] ], [ [[TMP2:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[CTR:%.*]] = phi i32 [ [[COUNT:%.*]], [[ENTRY]] ], [ [[DEC:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[COL_LOAD:%.*]] = load <3 x double>, ptr [[IN2:%.*]], align 8 +; CHECK-NEXT: [[VEC_GEP:%.*]] = getelementptr double, ptr [[IN2]], i64 3 +; CHECK-NEXT: [[COL_LOAD1:%.*]] = load <3 x double>, ptr [[VEC_GEP]], align 8 +; CHECK-NEXT: [[VEC_GEP2:%.*]] = getelementptr double, ptr [[IN2]], i64 6 +; CHECK-NEXT: [[COL_LOAD3:%.*]] = load <3 x double>, ptr [[VEC_GEP2]], align 8 +; CHECK-NEXT: [[TMP6:%.*]] = fadd <3 x double> [[PHI4]], [[COL_LOAD]] +; CHECK-NEXT: [[TMP7:%.*]] = fadd <3 x double> [[PHI5]], [[COL_LOAD1]] +; CHECK-NEXT: [[TMP8:%.*]] = fadd <3 x double> [[PHI6]], [[COL_LOAD3]] +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <3 x double> [[TMP6]], <3 x double> [[TMP7]], <6 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <3 x double> [[TMP8]], <3 x double> poison, <6 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <6 x double> [[TMP3]], <6 x double> [[TMP4]], <9 x i32> +; CHECK-NEXT: [[TMP0]] = shufflevector <9 x double> [[TMP5]], <9 x double> poison, <3 x i32> +; CHECK-NEXT: [[TMP1]] = shufflevector <9 x double> [[TMP5]], <9 x double> poison, <3 x i32> +; CHECK-NEXT: [[TMP2]] = shufflevector <9 x double> [[TMP5]], <9 x double> poison, <3 x i32> +; CHECK-NEXT: [[DEC]] = sub i32 [[CTR]], 1 +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[DEC]], 0 +; CHECK-NEXT: br i1 [[CMP]], label [[EXIT:%.*]], label [[LOOP]] +; CHECK: exit: +; CHECK-NEXT: store <3 x double> [[TMP6]], ptr [[OUT:%.*]], align 128 +; CHECK-NEXT: [[VEC_GEP7:%.*]] = getelementptr double, ptr [[OUT]], i64 3 +; CHECK-NEXT: store <3 x double> [[TMP7]], ptr [[VEC_GEP7]], align 8 +; CHECK-NEXT: [[VEC_GEP8:%.*]] = getelementptr double, ptr [[OUT]], i64 6 +; CHECK-NEXT: store <3 x double> [[TMP8]], ptr [[VEC_GEP8]], align 16 +; CHECK-NEXT: ret void +; +entry: + br label %loop + +loop: + %phi = phi <9 x double> [poison, %entry], [%sum, %loop] + %ctr = phi i32 [%count, %entry], [%dec, %loop] + + %inv = call <9 x double> @llvm.matrix.column.major.load(ptr %in, i64 3, i1 false, i32 3, i32 3) + + %sum = fadd <9 x double> %phi, %inv + + %dec = sub i32 %ctr, 1 + %cmp = icmp eq i32 %dec, 0 + br i1 %cmp, label %exit, label %loop + +exit: + store <9 x double> %sum, ptr %out + ret void +} + +define void @matrix_phi_loop_cdv(ptr %in, i32 %count, ptr %out) { +; CHECK-LABEL: @matrix_phi_loop_cdv( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[PHI4:%.*]] = phi <3 x double> [ , [[ENTRY:%.*]] ], [ [[TMP0:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[PHI5:%.*]] = phi <3 x double> [ , [[ENTRY]] ], [ [[TMP1:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[PHI6:%.*]] = phi <3 x double> [ , [[ENTRY]] ], [ [[TMP2:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[CTR:%.*]] = phi i32 [ [[COUNT:%.*]], [[ENTRY]] ], [ [[DEC:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[COL_LOAD:%.*]] = load <3 x double>, ptr [[IN2:%.*]], align 8 +; CHECK-NEXT: [[VEC_GEP:%.*]] = getelementptr double, ptr [[IN2]], i64 3 +; CHECK-NEXT: [[COL_LOAD1:%.*]] = load <3 x double>, ptr [[VEC_GEP]], align 8 +; CHECK-NEXT: [[VEC_GEP2:%.*]] = getelementptr double, ptr [[IN2]], i64 6 +; CHECK-NEXT: [[COL_LOAD3:%.*]] = load <3 x double>, ptr [[VEC_GEP2]], align 8 +; CHECK-NEXT: [[TMP6:%.*]] = fadd <3 x double> [[PHI4]], [[COL_LOAD]] +; CHECK-NEXT: [[TMP7:%.*]] = fadd <3 x double> [[PHI5]], [[COL_LOAD1]] +; CHECK-NEXT: [[TMP8:%.*]] = fadd <3 x double> [[PHI6]], [[COL_LOAD3]] +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <3 x double> [[TMP6]], <3 x double> [[TMP7]], <6 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <3 x double> [[TMP8]], <3 x double> poison, <6 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <6 x double> [[TMP3]], <6 x double> [[TMP4]], <9 x i32> +; CHECK-NEXT: [[TMP0]] = shufflevector <9 x double> [[TMP5]], <9 x double> poison, <3 x i32> +; CHECK-NEXT: [[TMP1]] = shufflevector <9 x double> [[TMP5]], <9 x double> poison, <3 x i32> +; CHECK-NEXT: [[TMP2]] = shufflevector <9 x double> [[TMP5]], <9 x double> poison, <3 x i32> +; CHECK-NEXT: [[DEC]] = sub i32 [[CTR]], 1 +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[DEC]], 0 +; CHECK-NEXT: br i1 [[CMP]], label [[EXIT:%.*]], label [[LOOP]] +; CHECK: exit: +; CHECK-NEXT: store <3 x double> [[TMP6]], ptr [[OUT:%.*]], align 128 +; CHECK-NEXT: [[VEC_GEP7:%.*]] = getelementptr double, ptr [[OUT]], i64 3 +; CHECK-NEXT: store <3 x double> [[TMP7]], ptr [[VEC_GEP7]], align 8 +; CHECK-NEXT: [[VEC_GEP8:%.*]] = getelementptr double, ptr [[OUT]], i64 6 +; CHECK-NEXT: store <3 x double> [[TMP8]], ptr [[VEC_GEP8]], align 16 +; CHECK-NEXT: ret void +; +entry: + br label %loop + +loop: + %phi = phi <9 x double> [, %entry], [%sum, %loop] + %ctr = phi i32 [%count, %entry], [%dec, %loop] + + %inv = call <9 x double> @llvm.matrix.column.major.load(ptr %in, i64 3, i1 false, i32 3, i32 3) + + %sum = fadd <9 x double> %phi, %inv + + %dec = sub i32 %ctr, 1 + %cmp = icmp eq i32 %dec, 0 + br i1 %cmp, label %exit, label %loop + +exit: + store <9 x double> %sum, ptr %out + ret void +} + +define void @matrix_phi_loop_delay(ptr %in1, ptr %in2, i32 %count, ptr %out) { +; CHECK-LABEL: @matrix_phi_loop_delay( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[COL_LOAD:%.*]] = load <3 x double>, ptr [[IN1:%.*]], align 8 +; CHECK-NEXT: [[VEC_GEP:%.*]] = getelementptr double, ptr [[IN1]], i64 3 +; CHECK-NEXT: [[COL_LOAD1:%.*]] = load <3 x double>, ptr [[VEC_GEP]], align 8 +; CHECK-NEXT: [[VEC_GEP2:%.*]] = getelementptr double, ptr [[IN1]], i64 6 +; CHECK-NEXT: [[COL_LOAD3:%.*]] = load <3 x double>, ptr [[VEC_GEP2]], align 8 +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[PHI14:%.*]] = phi <3 x double> [ [[COL_LOAD]], [[ENTRY:%.*]] ], [ [[TMP0:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[PHI15:%.*]] = phi <3 x double> [ [[COL_LOAD1]], [[ENTRY]] ], [ [[TMP1:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[PHI16:%.*]] = phi <3 x double> [ [[COL_LOAD3]], [[ENTRY]] ], [ [[TMP2:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[TMP0]] = phi <3 x double> [ [[COL_LOAD]], [[ENTRY]] ], [ [[SPLIT:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[TMP1]] = phi <3 x double> [ [[COL_LOAD1]], [[ENTRY]] ], [ [[SPLIT10:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[TMP2]] = phi <3 x double> [ [[COL_LOAD3]], [[ENTRY]] ], [ [[SPLIT11:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[CTR:%.*]] = phi i32 [ [[COUNT:%.*]], [[ENTRY]] ], [ [[DEC:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[COL_LOAD4:%.*]] = load <3 x double>, ptr [[IN2:%.*]], align 8 +; CHECK-NEXT: [[VEC_GEP5:%.*]] = getelementptr double, ptr [[IN2]], i64 3 +; CHECK-NEXT: [[COL_LOAD6:%.*]] = load <3 x double>, ptr [[VEC_GEP5]], align 8 +; CHECK-NEXT: [[VEC_GEP7:%.*]] = getelementptr double, ptr [[IN2]], i64 6 +; CHECK-NEXT: [[COL_LOAD8:%.*]] = load <3 x double>, ptr [[VEC_GEP7]], align 8 +; CHECK-NEXT: [[TMP6:%.*]] = fadd <3 x double> [[PHI14]], [[COL_LOAD4]] +; CHECK-NEXT: [[TMP7:%.*]] = fadd <3 x double> [[PHI15]], [[COL_LOAD6]] +; CHECK-NEXT: [[TMP8:%.*]] = fadd <3 x double> [[PHI16]], [[COL_LOAD8]] +; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <3 x double> [[TMP6]], <3 x double> [[TMP7]], <6 x i32> +; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <3 x double> [[TMP8]], <3 x double> poison, <6 x i32> +; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <6 x double> [[TMP9]], <6 x double> [[TMP10]], <9 x i32> +; CHECK-NEXT: [[SPLIT]] = shufflevector <9 x double> [[TMP11]], <9 x double> poison, <3 x i32> +; CHECK-NEXT: [[SPLIT10]] = shufflevector <9 x double> [[TMP11]], <9 x double> poison, <3 x i32> +; CHECK-NEXT: [[SPLIT11]] = shufflevector <9 x double> [[TMP11]], <9 x double> poison, <3 x i32> +; CHECK-NEXT: [[TMP12:%.*]] = fadd <3 x double> [[TMP6]], [[TMP0]] +; CHECK-NEXT: [[TMP13:%.*]] = fadd <3 x double> [[TMP7]], [[TMP1]] +; CHECK-NEXT: [[TMP14:%.*]] = fadd <3 x double> [[TMP8]], [[TMP2]] +; CHECK-NEXT: [[DEC]] = sub i32 [[CTR]], 1 +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[DEC]], 0 +; CHECK-NEXT: br i1 [[CMP]], label [[EXIT:%.*]], label [[LOOP]] +; CHECK: exit: +; CHECK-NEXT: store <3 x double> [[TMP12]], ptr [[OUT:%.*]], align 128 +; CHECK-NEXT: [[VEC_GEP12:%.*]] = getelementptr double, ptr [[OUT]], i64 3 +; CHECK-NEXT: store <3 x double> [[TMP13]], ptr [[VEC_GEP12]], align 8 +; CHECK-NEXT: [[VEC_GEP13:%.*]] = getelementptr double, ptr [[OUT]], i64 6 +; CHECK-NEXT: store <3 x double> [[TMP14]], ptr [[VEC_GEP13]], align 16 +; CHECK-NEXT: ret void +; +entry: + %in1v = call <9 x double> @llvm.matrix.column.major.load(ptr %in1, i64 3, i1 false, i32 3, i32 3) + br label %loop + +loop: + %phi2 = phi <9 x double> [%in1v, %entry], [%phi, %loop] + %phi = phi <9 x double> [%in1v, %entry], [%sum, %loop] + %ctr = phi i32 [%count, %entry], [%dec, %loop] + + %in2v = call <9 x double> @llvm.matrix.column.major.load(ptr %in2, i64 3, i1 false, i32 3, i32 3) + + %sum = fadd <9 x double> %phi2, %in2v + %sum2 = fadd <9 x double> %sum, %phi + + %dec = sub i32 %ctr, 1 + %cmp = icmp eq i32 %dec, 0 + br i1 %cmp, label %exit, label %loop + +exit: + store <9 x double> %sum2, ptr %out + ret void +} + +define void @matrix_phi_loop_delay_reshape(ptr %in1, ptr %in2, ptr %in3, i32 %count, ptr %out) { +; CHECK-LABEL: @matrix_phi_loop_delay_reshape( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[COL_LOAD1:%.*]] = load <2 x double>, ptr [[IN3:%.*]], align 8 +; CHECK-NEXT: [[VEC_GEP2:%.*]] = getelementptr double, ptr [[IN3]], i64 2 +; CHECK-NEXT: [[COL_LOAD8:%.*]] = load <2 x double>, ptr [[VEC_GEP2]], align 8 +; CHECK-NEXT: [[VEC_GEP1:%.*]] = getelementptr double, ptr [[IN3]], i64 4 +; CHECK-NEXT: [[COL_LOAD12:%.*]] = load <2 x double>, ptr [[VEC_GEP1]], align 8 +; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <2 x double> [[COL_LOAD1]], <2 x double> [[COL_LOAD8]], <4 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x double> [[COL_LOAD12]], <2 x double> poison, <4 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x double> [[TMP0]], <4 x double> [[TMP1]], <6 x i32> +; CHECK-NEXT: [[SPLIT:%.*]] = shufflevector <6 x double> [[TMP2]], <6 x double> poison, <3 x i32> +; CHECK-NEXT: [[COL_LOAD10:%.*]] = shufflevector <6 x double> [[TMP2]], <6 x double> poison, <3 x i32> +; CHECK-NEXT: [[COL_LOAD11:%.*]] = load <6 x double>, ptr [[IN2:%.*]], align 8 +; CHECK-NEXT: [[COL_LOAD:%.*]] = load <3 x double>, ptr [[IN1:%.*]], align 8 +; CHECK-NEXT: [[VEC_GEP:%.*]] = getelementptr double, ptr [[IN1]], i64 3 +; CHECK-NEXT: [[COL_LOAD14:%.*]] = load <3 x double>, ptr [[VEC_GEP]], align 8 +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[PHI2:%.*]] = phi <3 x double> [ [[SPLIT]], [[ENTRY:%.*]] ], [ [[PHI1:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[PHI39:%.*]] = phi <3 x double> [ [[COL_LOAD10]], [[ENTRY]] ], [ [[PHI4:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[PHI25:%.*]] = phi <6 x double> [ [[COL_LOAD11]], [[ENTRY]] ], [ [[PHI25]], [[LOOP]] ] +; CHECK-NEXT: [[PHI1]] = phi <3 x double> [ [[COL_LOAD]], [[ENTRY]] ], [ [[PHI2]], [[LOOP]] ] +; CHECK-NEXT: [[PHI4]] = phi <3 x double> [ [[COL_LOAD14]], [[ENTRY]] ], [ [[PHI39]], [[LOOP]] ] +; CHECK-NEXT: [[CTR:%.*]] = phi i32 [ [[COUNT:%.*]], [[ENTRY]] ], [ [[DEC:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <6 x double> [[PHI25]], <6 x double> poison, <3 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <6 x double> [[PHI25]], <6 x double> poison, <3 x i32> +; CHECK-NEXT: [[TMP7:%.*]] = fadd <3 x double> [[TMP3]], [[PHI1]] +; CHECK-NEXT: [[TMP8:%.*]] = fadd <3 x double> [[TMP4]], [[PHI4]] +; CHECK-NEXT: [[TMP5:%.*]] = fadd <3 x double> [[TMP7]], [[PHI2]] +; CHECK-NEXT: [[TMP6:%.*]] = fadd <3 x double> [[TMP8]], [[PHI39]] +; CHECK-NEXT: [[DEC]] = sub i32 [[CTR]], 1 +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[DEC]], 0 +; CHECK-NEXT: br i1 [[CMP]], label [[EXIT:%.*]], label [[LOOP]] +; CHECK: exit: +; CHECK-NEXT: store <3 x double> [[TMP5]], ptr [[OUT:%.*]], align 64 +; CHECK-NEXT: [[VEC_GEP30:%.*]] = getelementptr double, ptr [[OUT]], i64 3 +; CHECK-NEXT: store <3 x double> [[TMP6]], ptr [[VEC_GEP30]], align 8 +; CHECK-NEXT: ret void +; +entry: + %in1v = call <6 x double> @llvm.matrix.column.major.load(ptr %in3, i64 2, i1 false, i32 2, i32 3) + %in2v = call <6 x double> @llvm.matrix.column.major.load(ptr %in2, i64 6, i1 false, i32 6, i32 1) + %in3v = call <6 x double> @llvm.matrix.column.major.load(ptr %in1, i64 3, i1 false, i32 3, i32 2) + br label %loop + +loop: + %phi = phi <6 x double> [%in1v, %entry], [%phi3, %loop] + %phi2 = phi <6 x double> [%in2v, %entry], [%phi2, %loop] + %phi3 = phi <6 x double> [%in3v, %entry], [%phi, %loop] + %ctr = phi i32 [%count, %entry], [%dec, %loop] + + %sum = fadd <6 x double> %phi2, %phi3 + %sum2 = fadd <6 x double> %sum, %phi + + %dec = sub i32 %ctr, 1 + %cmp = icmp eq i32 %dec, 0 + br i1 %cmp, label %exit, label %loop + +exit: + store <6 x double> %sum2, ptr %out + ret void +} + +define void @matrix_phi_three_preds(i1 %cond1, i1 %cond2, ptr %a, ptr %b, ptr %c, ptr %out) { +; CHECK-LABEL: @matrix_phi_three_preds( +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 [[COND1:%.*]], label [[BB1:%.*]], label [[BBA:%.*]] +; CHECK: bb1: +; CHECK-NEXT: br i1 [[COND2:%.*]], label [[BBB:%.*]], label [[BBC:%.*]] +; CHECK: bba: +; CHECK-NEXT: [[COL_LOAD:%.*]] = load <3 x double>, ptr [[A:%.*]], align 8 +; CHECK-NEXT: [[VEC_GEP:%.*]] = getelementptr double, ptr [[A]], i64 3 +; CHECK-NEXT: [[COL_LOAD1:%.*]] = load <3 x double>, ptr [[VEC_GEP]], align 8 +; CHECK-NEXT: [[VEC_GEP2:%.*]] = getelementptr double, ptr [[A]], i64 6 +; CHECK-NEXT: [[COL_LOAD3:%.*]] = load <3 x double>, ptr [[VEC_GEP2]], align 8 +; CHECK-NEXT: br label [[EXIT:%.*]] +; CHECK: bbb: +; CHECK-NEXT: [[COL_LOAD9:%.*]] = load <3 x double>, ptr [[B:%.*]], align 8 +; CHECK-NEXT: [[VEC_GEP10:%.*]] = getelementptr double, ptr [[B]], i64 3 +; CHECK-NEXT: [[COL_LOAD11:%.*]] = load <3 x double>, ptr [[VEC_GEP10]], align 8 +; CHECK-NEXT: [[VEC_GEP12:%.*]] = getelementptr double, ptr [[B]], i64 6 +; CHECK-NEXT: [[COL_LOAD13:%.*]] = load <3 x double>, ptr [[VEC_GEP12]], align 8 +; CHECK-NEXT: br label [[EXIT]] +; CHECK: bbc: +; CHECK-NEXT: [[COL_LOAD4:%.*]] = load <3 x double>, ptr [[C:%.*]], align 8 +; CHECK-NEXT: [[VEC_GEP5:%.*]] = getelementptr double, ptr [[C]], i64 3 +; CHECK-NEXT: [[COL_LOAD6:%.*]] = load <3 x double>, ptr [[VEC_GEP5]], align 8 +; CHECK-NEXT: [[VEC_GEP7:%.*]] = getelementptr double, ptr [[C]], i64 6 +; CHECK-NEXT: [[COL_LOAD8:%.*]] = load <3 x double>, ptr [[VEC_GEP7]], align 8 +; CHECK-NEXT: br label [[EXIT]] +; CHECK: exit: +; CHECK-NEXT: [[PHI14:%.*]] = phi <3 x double> [ [[COL_LOAD]], [[BBA]] ], [ [[COL_LOAD9]], [[BBB]] ], [ [[COL_LOAD4]], [[BBC]] ] +; CHECK-NEXT: [[PHI15:%.*]] = phi <3 x double> [ [[COL_LOAD1]], [[BBA]] ], [ [[COL_LOAD11]], [[BBB]] ], [ [[COL_LOAD6]], [[BBC]] ] +; CHECK-NEXT: [[PHI16:%.*]] = phi <3 x double> [ [[COL_LOAD3]], [[BBA]] ], [ [[COL_LOAD13]], [[BBB]] ], [ [[COL_LOAD8]], [[BBC]] ] +; CHECK-NEXT: store <3 x double> [[PHI14]], ptr [[OUT:%.*]], align 8 +; CHECK-NEXT: [[VEC_GEP17:%.*]] = getelementptr double, ptr [[OUT]], i64 3 +; CHECK-NEXT: store <3 x double> [[PHI15]], ptr [[VEC_GEP17]], align 8 +; CHECK-NEXT: [[VEC_GEP18:%.*]] = getelementptr double, ptr [[OUT]], i64 6 +; CHECK-NEXT: store <3 x double> [[PHI16]], ptr [[VEC_GEP18]], align 8 +; CHECK-NEXT: ret void +; +entry: + br i1 %cond1, label %bb1, label %bba + +bb1: + br i1 %cond2, label %bbb, label %bbc + +bba: + %va = call <9 x double> @llvm.matrix.column.major.load(ptr %a, i64 3, i1 false, i32 3, i32 3) + br label %exit + +bbb: + %vb = call <9 x double> @llvm.matrix.column.major.load(ptr %b, i64 3, i1 false, i32 3, i32 3) + br label %exit + +bbc: + %vc = call <9 x double> @llvm.matrix.column.major.load(ptr %c, i64 3, i1 false, i32 3, i32 3) + br label %exit + +exit: + %phi = phi <9 x double> [%va, %bba], [%vb, %bbb], [%vc, %bbc] + call void @llvm.matrix.column.major.store(<9 x double> %phi, ptr %out, i64 3, i1 false, i32 3, i32 3) + ret void +} + +define void @matrix_phi_two_preds_shape_mismatch1(i1 %cond1, ptr %a, ptr %b, ptr %out) { +; CHECK-LABEL: @matrix_phi_two_preds_shape_mismatch1( +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 [[COND1:%.*]], label [[BBA:%.*]], label [[BBB:%.*]] +; CHECK: bba: +; CHECK-NEXT: [[COL_LOAD1:%.*]] = load <3 x double>, ptr [[A:%.*]], align 8 +; CHECK-NEXT: [[VEC_GEP:%.*]] = getelementptr double, ptr [[A]], i64 3 +; CHECK-NEXT: [[COL_LOAD2:%.*]] = load <3 x double>, ptr [[VEC_GEP]], align 8 +; CHECK-NEXT: [[VEC_GEP3:%.*]] = getelementptr double, ptr [[A]], i64 6 +; CHECK-NEXT: [[COL_LOAD4:%.*]] = load <3 x double>, ptr [[VEC_GEP3]], align 8 +; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <3 x double> [[COL_LOAD1]], <3 x double> [[COL_LOAD2]], <6 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <3 x double> [[COL_LOAD4]], <3 x double> poison, <6 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <6 x double> [[TMP0]], <6 x double> [[TMP1]], <9 x i32> +; CHECK-NEXT: [[SPLIT:%.*]] = shufflevector <9 x double> [[TMP2]], <9 x double> poison, <9 x i32> +; CHECK-NEXT: br label [[EXIT:%.*]] +; CHECK: bbb: +; CHECK-NEXT: [[COL_LOAD:%.*]] = load <9 x double>, ptr [[B:%.*]], align 8 +; CHECK-NEXT: br label [[EXIT]] +; CHECK: exit: +; CHECK-NEXT: [[PHI5:%.*]] = phi <9 x double> [ [[SPLIT]], [[BBA]] ], [ [[COL_LOAD]], [[BBB]] ] +; CHECK-NEXT: store <9 x double> [[PHI5]], ptr [[OUT:%.*]], align 128 +; CHECK-NEXT: ret void +; +entry: + br i1 %cond1, label %bba, label %bbb + +bba: + %va = call <9 x double> @llvm.matrix.column.major.load(ptr %a, i64 3, i1 false, i32 3, i32 3) + br label %exit + +bbb: + %vb = call <9 x double> @llvm.matrix.column.major.load(ptr %b, i64 9, i1 false, i32 9, i32 1) + br label %exit + +exit: + %phi = phi <9 x double> [%va, %bba], [%vb, %bbb] + store <9 x double> %phi, ptr %out + ret void +} + +define void @matrix_phi_two_preds_shape_mismatch2(i1 %cond1, ptr %a, ptr %b, ptr %out) { +; CHECK-LABEL: @matrix_phi_two_preds_shape_mismatch2( +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 [[COND1:%.*]], label [[BBA:%.*]], label [[BBB:%.*]] +; CHECK: bba: +; CHECK-NEXT: [[COL_LOAD4:%.*]] = load <9 x double>, ptr [[A:%.*]], align 8 +; CHECK-NEXT: [[SPLIT:%.*]] = shufflevector <9 x double> [[COL_LOAD4]], <9 x double> poison, <3 x i32> +; CHECK-NEXT: [[SPLIT8:%.*]] = shufflevector <9 x double> [[COL_LOAD4]], <9 x double> poison, <3 x i32> +; CHECK-NEXT: [[SPLIT9:%.*]] = shufflevector <9 x double> [[COL_LOAD4]], <9 x double> poison, <3 x i32> +; CHECK-NEXT: br label [[EXIT:%.*]] +; CHECK: bbb: +; CHECK-NEXT: [[COL_LOAD:%.*]] = load <3 x double>, ptr [[B:%.*]], align 8 +; CHECK-NEXT: [[VEC_GEP:%.*]] = getelementptr double, ptr [[B]], i64 3 +; CHECK-NEXT: [[COL_LOAD1:%.*]] = load <3 x double>, ptr [[VEC_GEP]], align 8 +; CHECK-NEXT: [[VEC_GEP2:%.*]] = getelementptr double, ptr [[B]], i64 6 +; CHECK-NEXT: [[COL_LOAD3:%.*]] = load <3 x double>, ptr [[VEC_GEP2]], align 8 +; CHECK-NEXT: br label [[EXIT]] +; CHECK: exit: +; CHECK-NEXT: [[PHI5:%.*]] = phi <3 x double> [ [[SPLIT]], [[BBA]] ], [ [[COL_LOAD]], [[BBB]] ] +; CHECK-NEXT: [[PHI6:%.*]] = phi <3 x double> [ [[SPLIT8]], [[BBA]] ], [ [[COL_LOAD1]], [[BBB]] ] +; CHECK-NEXT: [[PHI7:%.*]] = phi <3 x double> [ [[SPLIT9]], [[BBA]] ], [ [[COL_LOAD3]], [[BBB]] ] +; CHECK-NEXT: store <3 x double> [[PHI5]], ptr [[OUT:%.*]], align 128 +; CHECK-NEXT: [[VEC_GEP10:%.*]] = getelementptr double, ptr [[OUT]], i64 3 +; CHECK-NEXT: store <3 x double> [[PHI6]], ptr [[VEC_GEP10]], align 8 +; CHECK-NEXT: [[VEC_GEP11:%.*]] = getelementptr double, ptr [[OUT]], i64 6 +; CHECK-NEXT: store <3 x double> [[PHI7]], ptr [[VEC_GEP11]], align 16 +; CHECK-NEXT: ret void +; +entry: + br i1 %cond1, label %bba, label %bbb + +bba: + %va = call <9 x double> @llvm.matrix.column.major.load(ptr %a, i64 9, i1 false, i32 9, i32 1) + br label %exit + +bbb: + %vb = call <9 x double> @llvm.matrix.column.major.load(ptr %b, i64 3, i1 false, i32 3, i32 3) + br label %exit + +exit: + %phi = phi <9 x double> [%va, %bba], [%vb, %bbb] + store <9 x double> %phi, ptr %out + ret void +} + +define <9 x double> @matrix_phi_ifthenelse(i1 %cond, <9 x double> %A, <9 x double> %B, <9 x double> %C) { +; CHECK-LABEL: @matrix_phi_ifthenelse( +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 [[COND:%.*]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]] +; CHECK: if.then: +; CHECK-NEXT: [[SPLIT3:%.*]] = shufflevector <9 x double> [[A:%.*]], <9 x double> poison, <3 x i32> +; CHECK-NEXT: [[SPLIT4:%.*]] = shufflevector <9 x double> [[A]], <9 x double> poison, <3 x i32> +; CHECK-NEXT: [[SPLIT5:%.*]] = shufflevector <9 x double> [[A]], <9 x double> poison, <3 x i32> +; CHECK-NEXT: [[TMP0:%.*]] = extractelement <3 x double> [[SPLIT3]], i64 0 +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <3 x double> poison, double [[TMP0]], i64 0 +; CHECK-NEXT: [[TMP2:%.*]] = extractelement <3 x double> [[SPLIT4]], i64 0 +; CHECK-NEXT: [[TMP3:%.*]] = insertelement <3 x double> [[TMP1]], double [[TMP2]], i64 1 +; CHECK-NEXT: [[TMP4:%.*]] = extractelement <3 x double> [[SPLIT5]], i64 0 +; CHECK-NEXT: [[TMP5:%.*]] = insertelement <3 x double> [[TMP3]], double [[TMP4]], i64 2 +; CHECK-NEXT: [[TMP6:%.*]] = extractelement <3 x double> [[SPLIT3]], i64 1 +; CHECK-NEXT: [[TMP7:%.*]] = insertelement <3 x double> poison, double [[TMP6]], i64 0 +; CHECK-NEXT: [[TMP8:%.*]] = extractelement <3 x double> [[SPLIT4]], i64 1 +; CHECK-NEXT: [[TMP9:%.*]] = insertelement <3 x double> [[TMP7]], double [[TMP8]], i64 1 +; CHECK-NEXT: [[TMP10:%.*]] = extractelement <3 x double> [[SPLIT5]], i64 1 +; CHECK-NEXT: [[TMP11:%.*]] = insertelement <3 x double> [[TMP9]], double [[TMP10]], i64 2 +; CHECK-NEXT: [[TMP12:%.*]] = extractelement <3 x double> [[SPLIT3]], i64 2 +; CHECK-NEXT: [[TMP13:%.*]] = insertelement <3 x double> poison, double [[TMP12]], i64 0 +; CHECK-NEXT: [[TMP14:%.*]] = extractelement <3 x double> [[SPLIT4]], i64 2 +; CHECK-NEXT: [[TMP15:%.*]] = insertelement <3 x double> [[TMP13]], double [[TMP14]], i64 1 +; CHECK-NEXT: [[TMP16:%.*]] = extractelement <3 x double> [[SPLIT5]], i64 2 +; CHECK-NEXT: [[TMP17:%.*]] = insertelement <3 x double> [[TMP15]], double [[TMP16]], i64 2 +; CHECK-NEXT: br label [[IF_END:%.*]] +; CHECK: if.else: +; CHECK-NEXT: [[SPLIT:%.*]] = shufflevector <9 x double> [[B:%.*]], <9 x double> poison, <3 x i32> +; CHECK-NEXT: [[SPLIT1:%.*]] = shufflevector <9 x double> [[B]], <9 x double> poison, <3 x i32> +; CHECK-NEXT: [[SPLIT2:%.*]] = shufflevector <9 x double> [[B]], <9 x double> poison, <3 x i32> +; CHECK-NEXT: [[TMP21:%.*]] = extractelement <3 x double> [[SPLIT]], i64 0 +; CHECK-NEXT: [[TMP22:%.*]] = insertelement <3 x double> poison, double [[TMP21]], i64 0 +; CHECK-NEXT: [[TMP23:%.*]] = extractelement <3 x double> [[SPLIT1]], i64 0 +; CHECK-NEXT: [[TMP24:%.*]] = insertelement <3 x double> [[TMP22]], double [[TMP23]], i64 1 +; CHECK-NEXT: [[TMP25:%.*]] = extractelement <3 x double> [[SPLIT2]], i64 0 +; CHECK-NEXT: [[TMP26:%.*]] = insertelement <3 x double> [[TMP24]], double [[TMP25]], i64 2 +; CHECK-NEXT: [[TMP27:%.*]] = extractelement <3 x double> [[SPLIT]], i64 1 +; CHECK-NEXT: [[TMP28:%.*]] = insertelement <3 x double> poison, double [[TMP27]], i64 0 +; CHECK-NEXT: [[TMP29:%.*]] = extractelement <3 x double> [[SPLIT1]], i64 1 +; CHECK-NEXT: [[TMP30:%.*]] = insertelement <3 x double> [[TMP28]], double [[TMP29]], i64 1 +; CHECK-NEXT: [[TMP31:%.*]] = extractelement <3 x double> [[SPLIT2]], i64 1 +; CHECK-NEXT: [[TMP32:%.*]] = insertelement <3 x double> [[TMP30]], double [[TMP31]], i64 2 +; CHECK-NEXT: [[TMP33:%.*]] = extractelement <3 x double> [[SPLIT]], i64 2 +; CHECK-NEXT: [[TMP34:%.*]] = insertelement <3 x double> poison, double [[TMP33]], i64 0 +; CHECK-NEXT: [[TMP35:%.*]] = extractelement <3 x double> [[SPLIT1]], i64 2 +; CHECK-NEXT: [[TMP36:%.*]] = insertelement <3 x double> [[TMP34]], double [[TMP35]], i64 1 +; CHECK-NEXT: [[TMP37:%.*]] = extractelement <3 x double> [[SPLIT2]], i64 2 +; CHECK-NEXT: [[TMP38:%.*]] = insertelement <3 x double> [[TMP36]], double [[TMP37]], i64 2 +; CHECK-NEXT: br label [[IF_END]] +; CHECK: if.end: +; CHECK-NEXT: [[MERGE9:%.*]] = phi <3 x double> [ [[TMP5]], [[IF_THEN]] ], [ [[TMP26]], [[IF_ELSE]] ] +; CHECK-NEXT: [[MERGE10:%.*]] = phi <3 x double> [ [[TMP11]], [[IF_THEN]] ], [ [[TMP32]], [[IF_ELSE]] ] +; CHECK-NEXT: [[MERGE11:%.*]] = phi <3 x double> [ [[TMP17]], [[IF_THEN]] ], [ [[TMP38]], [[IF_ELSE]] ] +; CHECK-NEXT: [[SPLIT9:%.*]] = shufflevector <9 x double> [[MERGE:%.*]], <9 x double> poison, <3 x i32> +; CHECK-NEXT: [[SPLIT10:%.*]] = shufflevector <9 x double> [[MERGE]], <9 x double> poison, <3 x i32> +; CHECK-NEXT: [[SPLIT11:%.*]] = shufflevector <9 x double> [[MERGE]], <9 x double> poison, <3 x i32> +; CHECK-NEXT: [[BLOCK:%.*]] = shufflevector <3 x double> [[SPLIT9]], <3 x double> poison, <1 x i32> zeroinitializer +; CHECK-NEXT: [[TMP42:%.*]] = extractelement <3 x double> [[MERGE9]], i64 0 +; CHECK-NEXT: [[SPLAT_SPLATINSERT:%.*]] = insertelement <1 x double> poison, double [[TMP42]], i64 0 +; CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT]], <1 x double> poison, <1 x i32> zeroinitializer +; CHECK-NEXT: [[TMP43:%.*]] = fmul <1 x double> [[BLOCK]], [[SPLAT_SPLAT]] +; CHECK-NEXT: [[BLOCK12:%.*]] = shufflevector <3 x double> [[SPLIT10]], <3 x double> poison, <1 x i32> zeroinitializer +; CHECK-NEXT: [[TMP44:%.*]] = extractelement <3 x double> [[MERGE9]], i64 1 +; CHECK-NEXT: [[SPLAT_SPLATINSERT13:%.*]] = insertelement <1 x double> poison, double [[TMP44]], i64 0 +; CHECK-NEXT: [[SPLAT_SPLAT14:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT13]], <1 x double> poison, <1 x i32> zeroinitializer +; CHECK-NEXT: [[TMP45:%.*]] = fmul <1 x double> [[BLOCK12]], [[SPLAT_SPLAT14]] +; CHECK-NEXT: [[TMP46:%.*]] = fadd <1 x double> [[TMP43]], [[TMP45]] +; CHECK-NEXT: [[BLOCK15:%.*]] = shufflevector <3 x double> [[SPLIT11]], <3 x double> poison, <1 x i32> zeroinitializer +; CHECK-NEXT: [[TMP47:%.*]] = extractelement <3 x double> [[MERGE9]], i64 2 +; CHECK-NEXT: [[SPLAT_SPLATINSERT16:%.*]] = insertelement <1 x double> poison, double [[TMP47]], i64 0 +; CHECK-NEXT: [[SPLAT_SPLAT17:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT16]], <1 x double> poison, <1 x i32> zeroinitializer +; CHECK-NEXT: [[TMP48:%.*]] = fmul <1 x double> [[BLOCK15]], [[SPLAT_SPLAT17]] +; CHECK-NEXT: [[TMP49:%.*]] = fadd <1 x double> [[TMP46]], [[TMP48]] +; CHECK-NEXT: [[TMP50:%.*]] = shufflevector <1 x double> [[TMP49]], <1 x double> poison, <3 x i32> +; CHECK-NEXT: [[TMP51:%.*]] = shufflevector <3 x double> poison, <3 x double> [[TMP50]], <3 x i32> +; CHECK-NEXT: [[BLOCK18:%.*]] = shufflevector <3 x double> [[SPLIT9]], <3 x double> poison, <1 x i32> +; CHECK-NEXT: [[TMP52:%.*]] = extractelement <3 x double> [[MERGE9]], i64 0 +; CHECK-NEXT: [[SPLAT_SPLATINSERT19:%.*]] = insertelement <1 x double> poison, double [[TMP52]], i64 0 +; CHECK-NEXT: [[SPLAT_SPLAT20:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT19]], <1 x double> poison, <1 x i32> zeroinitializer +; CHECK-NEXT: [[TMP53:%.*]] = fmul <1 x double> [[BLOCK18]], [[SPLAT_SPLAT20]] +; CHECK-NEXT: [[BLOCK21:%.*]] = shufflevector <3 x double> [[SPLIT10]], <3 x double> poison, <1 x i32> +; CHECK-NEXT: [[TMP54:%.*]] = extractelement <3 x double> [[MERGE9]], i64 1 +; CHECK-NEXT: [[SPLAT_SPLATINSERT22:%.*]] = insertelement <1 x double> poison, double [[TMP54]], i64 0 +; CHECK-NEXT: [[SPLAT_SPLAT23:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT22]], <1 x double> poison, <1 x i32> zeroinitializer +; CHECK-NEXT: [[TMP55:%.*]] = fmul <1 x double> [[BLOCK21]], [[SPLAT_SPLAT23]] +; CHECK-NEXT: [[TMP56:%.*]] = fadd <1 x double> [[TMP53]], [[TMP55]] +; CHECK-NEXT: [[BLOCK24:%.*]] = shufflevector <3 x double> [[SPLIT11]], <3 x double> poison, <1 x i32> +; CHECK-NEXT: [[TMP57:%.*]] = extractelement <3 x double> [[MERGE9]], i64 2 +; CHECK-NEXT: [[SPLAT_SPLATINSERT25:%.*]] = insertelement <1 x double> poison, double [[TMP57]], i64 0 +; CHECK-NEXT: [[SPLAT_SPLAT26:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT25]], <1 x double> poison, <1 x i32> zeroinitializer +; CHECK-NEXT: [[TMP58:%.*]] = fmul <1 x double> [[BLOCK24]], [[SPLAT_SPLAT26]] +; CHECK-NEXT: [[TMP59:%.*]] = fadd <1 x double> [[TMP56]], [[TMP58]] +; CHECK-NEXT: [[TMP60:%.*]] = shufflevector <1 x double> [[TMP59]], <1 x double> poison, <3 x i32> +; CHECK-NEXT: [[TMP61:%.*]] = shufflevector <3 x double> [[TMP51]], <3 x double> [[TMP60]], <3 x i32> +; CHECK-NEXT: [[BLOCK27:%.*]] = shufflevector <3 x double> [[SPLIT9]], <3 x double> poison, <1 x i32> +; CHECK-NEXT: [[TMP62:%.*]] = extractelement <3 x double> [[MERGE9]], i64 0 +; CHECK-NEXT: [[SPLAT_SPLATINSERT28:%.*]] = insertelement <1 x double> poison, double [[TMP62]], i64 0 +; CHECK-NEXT: [[SPLAT_SPLAT29:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT28]], <1 x double> poison, <1 x i32> zeroinitializer +; CHECK-NEXT: [[TMP63:%.*]] = fmul <1 x double> [[BLOCK27]], [[SPLAT_SPLAT29]] +; CHECK-NEXT: [[BLOCK30:%.*]] = shufflevector <3 x double> [[SPLIT10]], <3 x double> poison, <1 x i32> +; CHECK-NEXT: [[TMP64:%.*]] = extractelement <3 x double> [[MERGE9]], i64 1 +; CHECK-NEXT: [[SPLAT_SPLATINSERT31:%.*]] = insertelement <1 x double> poison, double [[TMP64]], i64 0 +; CHECK-NEXT: [[SPLAT_SPLAT32:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT31]], <1 x double> poison, <1 x i32> zeroinitializer +; CHECK-NEXT: [[TMP65:%.*]] = fmul <1 x double> [[BLOCK30]], [[SPLAT_SPLAT32]] +; CHECK-NEXT: [[TMP66:%.*]] = fadd <1 x double> [[TMP63]], [[TMP65]] +; CHECK-NEXT: [[BLOCK33:%.*]] = shufflevector <3 x double> [[SPLIT11]], <3 x double> poison, <1 x i32> +; CHECK-NEXT: [[TMP67:%.*]] = extractelement <3 x double> [[MERGE9]], i64 2 +; CHECK-NEXT: [[SPLAT_SPLATINSERT34:%.*]] = insertelement <1 x double> poison, double [[TMP67]], i64 0 +; CHECK-NEXT: [[SPLAT_SPLAT35:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT34]], <1 x double> poison, <1 x i32> zeroinitializer +; CHECK-NEXT: [[TMP68:%.*]] = fmul <1 x double> [[BLOCK33]], [[SPLAT_SPLAT35]] +; CHECK-NEXT: [[TMP69:%.*]] = fadd <1 x double> [[TMP66]], [[TMP68]] +; CHECK-NEXT: [[TMP70:%.*]] = shufflevector <1 x double> [[TMP69]], <1 x double> poison, <3 x i32> +; CHECK-NEXT: [[TMP71:%.*]] = shufflevector <3 x double> [[TMP61]], <3 x double> [[TMP70]], <3 x i32> +; CHECK-NEXT: [[BLOCK36:%.*]] = shufflevector <3 x double> [[SPLIT9]], <3 x double> poison, <1 x i32> zeroinitializer +; CHECK-NEXT: [[TMP72:%.*]] = extractelement <3 x double> [[MERGE10]], i64 0 +; CHECK-NEXT: [[SPLAT_SPLATINSERT37:%.*]] = insertelement <1 x double> poison, double [[TMP72]], i64 0 +; CHECK-NEXT: [[SPLAT_SPLAT38:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT37]], <1 x double> poison, <1 x i32> zeroinitializer +; CHECK-NEXT: [[TMP73:%.*]] = fmul <1 x double> [[BLOCK36]], [[SPLAT_SPLAT38]] +; CHECK-NEXT: [[BLOCK39:%.*]] = shufflevector <3 x double> [[SPLIT10]], <3 x double> poison, <1 x i32> zeroinitializer +; CHECK-NEXT: [[TMP74:%.*]] = extractelement <3 x double> [[MERGE10]], i64 1 +; CHECK-NEXT: [[SPLAT_SPLATINSERT40:%.*]] = insertelement <1 x double> poison, double [[TMP74]], i64 0 +; CHECK-NEXT: [[SPLAT_SPLAT41:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT40]], <1 x double> poison, <1 x i32> zeroinitializer +; CHECK-NEXT: [[TMP75:%.*]] = fmul <1 x double> [[BLOCK39]], [[SPLAT_SPLAT41]] +; CHECK-NEXT: [[TMP76:%.*]] = fadd <1 x double> [[TMP73]], [[TMP75]] +; CHECK-NEXT: [[BLOCK42:%.*]] = shufflevector <3 x double> [[SPLIT11]], <3 x double> poison, <1 x i32> zeroinitializer +; CHECK-NEXT: [[TMP77:%.*]] = extractelement <3 x double> [[MERGE10]], i64 2 +; CHECK-NEXT: [[SPLAT_SPLATINSERT43:%.*]] = insertelement <1 x double> poison, double [[TMP77]], i64 0 +; CHECK-NEXT: [[SPLAT_SPLAT44:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT43]], <1 x double> poison, <1 x i32> zeroinitializer +; CHECK-NEXT: [[TMP78:%.*]] = fmul <1 x double> [[BLOCK42]], [[SPLAT_SPLAT44]] +; CHECK-NEXT: [[TMP79:%.*]] = fadd <1 x double> [[TMP76]], [[TMP78]] +; CHECK-NEXT: [[TMP80:%.*]] = shufflevector <1 x double> [[TMP79]], <1 x double> poison, <3 x i32> +; CHECK-NEXT: [[TMP81:%.*]] = shufflevector <3 x double> poison, <3 x double> [[TMP80]], <3 x i32> +; CHECK-NEXT: [[BLOCK45:%.*]] = shufflevector <3 x double> [[SPLIT9]], <3 x double> poison, <1 x i32> +; CHECK-NEXT: [[TMP82:%.*]] = extractelement <3 x double> [[MERGE10]], i64 0 +; CHECK-NEXT: [[SPLAT_SPLATINSERT46:%.*]] = insertelement <1 x double> poison, double [[TMP82]], i64 0 +; CHECK-NEXT: [[SPLAT_SPLAT47:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT46]], <1 x double> poison, <1 x i32> zeroinitializer +; CHECK-NEXT: [[TMP83:%.*]] = fmul <1 x double> [[BLOCK45]], [[SPLAT_SPLAT47]] +; CHECK-NEXT: [[BLOCK48:%.*]] = shufflevector <3 x double> [[SPLIT10]], <3 x double> poison, <1 x i32> +; CHECK-NEXT: [[TMP84:%.*]] = extractelement <3 x double> [[MERGE10]], i64 1 +; CHECK-NEXT: [[SPLAT_SPLATINSERT49:%.*]] = insertelement <1 x double> poison, double [[TMP84]], i64 0 +; CHECK-NEXT: [[SPLAT_SPLAT50:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT49]], <1 x double> poison, <1 x i32> zeroinitializer +; CHECK-NEXT: [[TMP85:%.*]] = fmul <1 x double> [[BLOCK48]], [[SPLAT_SPLAT50]] +; CHECK-NEXT: [[TMP86:%.*]] = fadd <1 x double> [[TMP83]], [[TMP85]] +; CHECK-NEXT: [[BLOCK51:%.*]] = shufflevector <3 x double> [[SPLIT11]], <3 x double> poison, <1 x i32> +; CHECK-NEXT: [[TMP87:%.*]] = extractelement <3 x double> [[MERGE10]], i64 2 +; CHECK-NEXT: [[SPLAT_SPLATINSERT52:%.*]] = insertelement <1 x double> poison, double [[TMP87]], i64 0 +; CHECK-NEXT: [[SPLAT_SPLAT53:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT52]], <1 x double> poison, <1 x i32> zeroinitializer +; CHECK-NEXT: [[TMP88:%.*]] = fmul <1 x double> [[BLOCK51]], [[SPLAT_SPLAT53]] +; CHECK-NEXT: [[TMP89:%.*]] = fadd <1 x double> [[TMP86]], [[TMP88]] +; CHECK-NEXT: [[TMP90:%.*]] = shufflevector <1 x double> [[TMP89]], <1 x double> poison, <3 x i32> +; CHECK-NEXT: [[TMP91:%.*]] = shufflevector <3 x double> [[TMP81]], <3 x double> [[TMP90]], <3 x i32> +; CHECK-NEXT: [[BLOCK54:%.*]] = shufflevector <3 x double> [[SPLIT9]], <3 x double> poison, <1 x i32> +; CHECK-NEXT: [[TMP92:%.*]] = extractelement <3 x double> [[MERGE10]], i64 0 +; CHECK-NEXT: [[SPLAT_SPLATINSERT55:%.*]] = insertelement <1 x double> poison, double [[TMP92]], i64 0 +; CHECK-NEXT: [[SPLAT_SPLAT56:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT55]], <1 x double> poison, <1 x i32> zeroinitializer +; CHECK-NEXT: [[TMP93:%.*]] = fmul <1 x double> [[BLOCK54]], [[SPLAT_SPLAT56]] +; CHECK-NEXT: [[BLOCK57:%.*]] = shufflevector <3 x double> [[SPLIT10]], <3 x double> poison, <1 x i32> +; CHECK-NEXT: [[TMP94:%.*]] = extractelement <3 x double> [[MERGE10]], i64 1 +; CHECK-NEXT: [[SPLAT_SPLATINSERT58:%.*]] = insertelement <1 x double> poison, double [[TMP94]], i64 0 +; CHECK-NEXT: [[SPLAT_SPLAT59:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT58]], <1 x double> poison, <1 x i32> zeroinitializer +; CHECK-NEXT: [[TMP95:%.*]] = fmul <1 x double> [[BLOCK57]], [[SPLAT_SPLAT59]] +; CHECK-NEXT: [[TMP96:%.*]] = fadd <1 x double> [[TMP93]], [[TMP95]] +; CHECK-NEXT: [[BLOCK60:%.*]] = shufflevector <3 x double> [[SPLIT11]], <3 x double> poison, <1 x i32> +; CHECK-NEXT: [[TMP97:%.*]] = extractelement <3 x double> [[MERGE10]], i64 2 +; CHECK-NEXT: [[SPLAT_SPLATINSERT61:%.*]] = insertelement <1 x double> poison, double [[TMP97]], i64 0 +; CHECK-NEXT: [[SPLAT_SPLAT62:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT61]], <1 x double> poison, <1 x i32> zeroinitializer +; CHECK-NEXT: [[TMP98:%.*]] = fmul <1 x double> [[BLOCK60]], [[SPLAT_SPLAT62]] +; CHECK-NEXT: [[TMP99:%.*]] = fadd <1 x double> [[TMP96]], [[TMP98]] +; CHECK-NEXT: [[TMP100:%.*]] = shufflevector <1 x double> [[TMP99]], <1 x double> poison, <3 x i32> +; CHECK-NEXT: [[TMP101:%.*]] = shufflevector <3 x double> [[TMP91]], <3 x double> [[TMP100]], <3 x i32> +; CHECK-NEXT: [[BLOCK63:%.*]] = shufflevector <3 x double> [[SPLIT9]], <3 x double> poison, <1 x i32> zeroinitializer +; CHECK-NEXT: [[TMP102:%.*]] = extractelement <3 x double> [[MERGE11]], i64 0 +; CHECK-NEXT: [[SPLAT_SPLATINSERT64:%.*]] = insertelement <1 x double> poison, double [[TMP102]], i64 0 +; CHECK-NEXT: [[SPLAT_SPLAT65:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT64]], <1 x double> poison, <1 x i32> zeroinitializer +; CHECK-NEXT: [[TMP103:%.*]] = fmul <1 x double> [[BLOCK63]], [[SPLAT_SPLAT65]] +; CHECK-NEXT: [[BLOCK66:%.*]] = shufflevector <3 x double> [[SPLIT10]], <3 x double> poison, <1 x i32> zeroinitializer +; CHECK-NEXT: [[TMP104:%.*]] = extractelement <3 x double> [[MERGE11]], i64 1 +; CHECK-NEXT: [[SPLAT_SPLATINSERT67:%.*]] = insertelement <1 x double> poison, double [[TMP104]], i64 0 +; CHECK-NEXT: [[SPLAT_SPLAT68:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT67]], <1 x double> poison, <1 x i32> zeroinitializer +; CHECK-NEXT: [[TMP105:%.*]] = fmul <1 x double> [[BLOCK66]], [[SPLAT_SPLAT68]] +; CHECK-NEXT: [[TMP106:%.*]] = fadd <1 x double> [[TMP103]], [[TMP105]] +; CHECK-NEXT: [[BLOCK69:%.*]] = shufflevector <3 x double> [[SPLIT11]], <3 x double> poison, <1 x i32> zeroinitializer +; CHECK-NEXT: [[TMP107:%.*]] = extractelement <3 x double> [[MERGE11]], i64 2 +; CHECK-NEXT: [[SPLAT_SPLATINSERT70:%.*]] = insertelement <1 x double> poison, double [[TMP107]], i64 0 +; CHECK-NEXT: [[SPLAT_SPLAT71:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT70]], <1 x double> poison, <1 x i32> zeroinitializer +; CHECK-NEXT: [[TMP108:%.*]] = fmul <1 x double> [[BLOCK69]], [[SPLAT_SPLAT71]] +; CHECK-NEXT: [[TMP109:%.*]] = fadd <1 x double> [[TMP106]], [[TMP108]] +; CHECK-NEXT: [[TMP110:%.*]] = shufflevector <1 x double> [[TMP109]], <1 x double> poison, <3 x i32> +; CHECK-NEXT: [[TMP111:%.*]] = shufflevector <3 x double> poison, <3 x double> [[TMP110]], <3 x i32> +; CHECK-NEXT: [[BLOCK72:%.*]] = shufflevector <3 x double> [[SPLIT9]], <3 x double> poison, <1 x i32> +; CHECK-NEXT: [[TMP112:%.*]] = extractelement <3 x double> [[MERGE11]], i64 0 +; CHECK-NEXT: [[SPLAT_SPLATINSERT73:%.*]] = insertelement <1 x double> poison, double [[TMP112]], i64 0 +; CHECK-NEXT: [[SPLAT_SPLAT74:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT73]], <1 x double> poison, <1 x i32> zeroinitializer +; CHECK-NEXT: [[TMP113:%.*]] = fmul <1 x double> [[BLOCK72]], [[SPLAT_SPLAT74]] +; CHECK-NEXT: [[BLOCK75:%.*]] = shufflevector <3 x double> [[SPLIT10]], <3 x double> poison, <1 x i32> +; CHECK-NEXT: [[TMP114:%.*]] = extractelement <3 x double> [[MERGE11]], i64 1 +; CHECK-NEXT: [[SPLAT_SPLATINSERT76:%.*]] = insertelement <1 x double> poison, double [[TMP114]], i64 0 +; CHECK-NEXT: [[SPLAT_SPLAT77:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT76]], <1 x double> poison, <1 x i32> zeroinitializer +; CHECK-NEXT: [[TMP115:%.*]] = fmul <1 x double> [[BLOCK75]], [[SPLAT_SPLAT77]] +; CHECK-NEXT: [[TMP116:%.*]] = fadd <1 x double> [[TMP113]], [[TMP115]] +; CHECK-NEXT: [[BLOCK78:%.*]] = shufflevector <3 x double> [[SPLIT11]], <3 x double> poison, <1 x i32> +; CHECK-NEXT: [[TMP117:%.*]] = extractelement <3 x double> [[MERGE11]], i64 2 +; CHECK-NEXT: [[SPLAT_SPLATINSERT79:%.*]] = insertelement <1 x double> poison, double [[TMP117]], i64 0 +; CHECK-NEXT: [[SPLAT_SPLAT80:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT79]], <1 x double> poison, <1 x i32> zeroinitializer +; CHECK-NEXT: [[TMP118:%.*]] = fmul <1 x double> [[BLOCK78]], [[SPLAT_SPLAT80]] +; CHECK-NEXT: [[TMP119:%.*]] = fadd <1 x double> [[TMP116]], [[TMP118]] +; CHECK-NEXT: [[TMP120:%.*]] = shufflevector <1 x double> [[TMP119]], <1 x double> poison, <3 x i32> +; CHECK-NEXT: [[TMP121:%.*]] = shufflevector <3 x double> [[TMP111]], <3 x double> [[TMP120]], <3 x i32> +; CHECK-NEXT: [[BLOCK81:%.*]] = shufflevector <3 x double> [[SPLIT9]], <3 x double> poison, <1 x i32> +; CHECK-NEXT: [[TMP122:%.*]] = extractelement <3 x double> [[MERGE11]], i64 0 +; CHECK-NEXT: [[SPLAT_SPLATINSERT82:%.*]] = insertelement <1 x double> poison, double [[TMP122]], i64 0 +; CHECK-NEXT: [[SPLAT_SPLAT83:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT82]], <1 x double> poison, <1 x i32> zeroinitializer +; CHECK-NEXT: [[TMP123:%.*]] = fmul <1 x double> [[BLOCK81]], [[SPLAT_SPLAT83]] +; CHECK-NEXT: [[BLOCK84:%.*]] = shufflevector <3 x double> [[SPLIT10]], <3 x double> poison, <1 x i32> +; CHECK-NEXT: [[TMP124:%.*]] = extractelement <3 x double> [[MERGE11]], i64 1 +; CHECK-NEXT: [[SPLAT_SPLATINSERT85:%.*]] = insertelement <1 x double> poison, double [[TMP124]], i64 0 +; CHECK-NEXT: [[SPLAT_SPLAT86:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT85]], <1 x double> poison, <1 x i32> zeroinitializer +; CHECK-NEXT: [[TMP125:%.*]] = fmul <1 x double> [[BLOCK84]], [[SPLAT_SPLAT86]] +; CHECK-NEXT: [[TMP126:%.*]] = fadd <1 x double> [[TMP123]], [[TMP125]] +; CHECK-NEXT: [[BLOCK87:%.*]] = shufflevector <3 x double> [[SPLIT11]], <3 x double> poison, <1 x i32> +; CHECK-NEXT: [[TMP127:%.*]] = extractelement <3 x double> [[MERGE11]], i64 2 +; CHECK-NEXT: [[SPLAT_SPLATINSERT88:%.*]] = insertelement <1 x double> poison, double [[TMP127]], i64 0 +; CHECK-NEXT: [[SPLAT_SPLAT89:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT88]], <1 x double> poison, <1 x i32> zeroinitializer +; CHECK-NEXT: [[TMP128:%.*]] = fmul <1 x double> [[BLOCK87]], [[SPLAT_SPLAT89]] +; CHECK-NEXT: [[TMP129:%.*]] = fadd <1 x double> [[TMP126]], [[TMP128]] +; CHECK-NEXT: [[TMP130:%.*]] = shufflevector <1 x double> [[TMP129]], <1 x double> poison, <3 x i32> +; CHECK-NEXT: [[TMP131:%.*]] = shufflevector <3 x double> [[TMP121]], <3 x double> [[TMP130]], <3 x i32> +; CHECK-NEXT: [[TMP132:%.*]] = shufflevector <3 x double> [[TMP71]], <3 x double> [[TMP101]], <6 x i32> +; CHECK-NEXT: [[TMP133:%.*]] = shufflevector <3 x double> [[TMP131]], <3 x double> poison, <6 x i32> +; CHECK-NEXT: [[TMP134:%.*]] = shufflevector <6 x double> [[TMP132]], <6 x double> [[TMP133]], <9 x i32> +; CHECK-NEXT: ret <9 x double> [[TMP134]] +; +entry: + br i1 %cond, label %if.then, label %if.else + +if.then: ; preds = %entry + %A.trans = tail call <9 x double> @llvm.matrix.transpose.v9f64(<9 x double> %A, i32 3, i32 3) + br label %if.end + +if.else: ; preds = %entry + %B.trans = tail call <9 x double> @llvm.matrix.transpose.v9f64(<9 x double> %B, i32 3, i32 3) + br label %if.end + +if.end: ; preds = %if.then, %if.else + %merge = phi <9 x double> [ %A.trans, %if.then], [ %B.trans, %if.else ] + %res = tail call <9 x double> @llvm.matrix.multiply.v9f64.v9f64.v9f64(<9 x double> %C, <9 x double> %merge, i32 3, i32 3, i32 3) + ret <9 x double> %res +} diff --git a/llvm/test/Transforms/LowerMatrixIntrinsics/propagate-backwards-unsupported.ll b/llvm/test/Transforms/LowerMatrixIntrinsics/propagate-backwards-unsupported.ll index 2af2c979f206..f07e1762d404 100644 --- a/llvm/test/Transforms/LowerMatrixIntrinsics/propagate-backwards-unsupported.ll +++ b/llvm/test/Transforms/LowerMatrixIntrinsics/propagate-backwards-unsupported.ll @@ -1,267 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt -passes='lower-matrix-intrinsics' -S < %s | FileCheck %s -; Check that we we use flattened vectors for PHI operands and extract the columns afterwards. -define <9 x double> @unsupported_phi(i1 %cond, <9 x double> %A, <9 x double> %B, <9 x double> %C) { -; CHECK-LABEL: @unsupported_phi( -; CHECK-NEXT: entry: -; CHECK-NEXT: br i1 [[COND:%.*]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]] -; CHECK: if.then: -; CHECK-NEXT: [[SPLIT3:%.*]] = shufflevector <9 x double> [[A:%.*]], <9 x double> poison, <3 x i32> -; CHECK-NEXT: [[SPLIT4:%.*]] = shufflevector <9 x double> [[A]], <9 x double> poison, <3 x i32> -; CHECK-NEXT: [[SPLIT5:%.*]] = shufflevector <9 x double> [[A]], <9 x double> poison, <3 x i32> -; CHECK-NEXT: [[TMP0:%.*]] = extractelement <3 x double> [[SPLIT3]], i64 0 -; CHECK-NEXT: [[TMP1:%.*]] = insertelement <3 x double> poison, double [[TMP0]], i64 0 -; CHECK-NEXT: [[TMP2:%.*]] = extractelement <3 x double> [[SPLIT4]], i64 0 -; CHECK-NEXT: [[TMP3:%.*]] = insertelement <3 x double> [[TMP1]], double [[TMP2]], i64 1 -; CHECK-NEXT: [[TMP4:%.*]] = extractelement <3 x double> [[SPLIT5]], i64 0 -; CHECK-NEXT: [[TMP5:%.*]] = insertelement <3 x double> [[TMP3]], double [[TMP4]], i64 2 -; CHECK-NEXT: [[TMP6:%.*]] = extractelement <3 x double> [[SPLIT3]], i64 1 -; CHECK-NEXT: [[TMP7:%.*]] = insertelement <3 x double> poison, double [[TMP6]], i64 0 -; CHECK-NEXT: [[TMP8:%.*]] = extractelement <3 x double> [[SPLIT4]], i64 1 -; CHECK-NEXT: [[TMP9:%.*]] = insertelement <3 x double> [[TMP7]], double [[TMP8]], i64 1 -; CHECK-NEXT: [[TMP10:%.*]] = extractelement <3 x double> [[SPLIT5]], i64 1 -; CHECK-NEXT: [[TMP11:%.*]] = insertelement <3 x double> [[TMP9]], double [[TMP10]], i64 2 -; CHECK-NEXT: [[TMP12:%.*]] = extractelement <3 x double> [[SPLIT3]], i64 2 -; CHECK-NEXT: [[TMP13:%.*]] = insertelement <3 x double> poison, double [[TMP12]], i64 0 -; CHECK-NEXT: [[TMP14:%.*]] = extractelement <3 x double> [[SPLIT4]], i64 2 -; CHECK-NEXT: [[TMP15:%.*]] = insertelement <3 x double> [[TMP13]], double [[TMP14]], i64 1 -; CHECK-NEXT: [[TMP16:%.*]] = extractelement <3 x double> [[SPLIT5]], i64 2 -; CHECK-NEXT: [[TMP17:%.*]] = insertelement <3 x double> [[TMP15]], double [[TMP16]], i64 2 -; CHECK-NEXT: [[TMP18:%.*]] = shufflevector <3 x double> [[TMP5]], <3 x double> [[TMP11]], <6 x i32> -; CHECK-NEXT: [[TMP19:%.*]] = shufflevector <3 x double> [[TMP17]], <3 x double> poison, <6 x i32> -; CHECK-NEXT: [[TMP20:%.*]] = shufflevector <6 x double> [[TMP18]], <6 x double> [[TMP19]], <9 x i32> -; CHECK-NEXT: br label [[IF_END:%.*]] -; CHECK: if.else: -; CHECK-NEXT: [[SPLIT:%.*]] = shufflevector <9 x double> [[B:%.*]], <9 x double> poison, <3 x i32> -; CHECK-NEXT: [[SPLIT1:%.*]] = shufflevector <9 x double> [[B]], <9 x double> poison, <3 x i32> -; CHECK-NEXT: [[SPLIT2:%.*]] = shufflevector <9 x double> [[B]], <9 x double> poison, <3 x i32> -; CHECK-NEXT: [[TMP21:%.*]] = extractelement <3 x double> [[SPLIT]], i64 0 -; CHECK-NEXT: [[TMP22:%.*]] = insertelement <3 x double> poison, double [[TMP21]], i64 0 -; CHECK-NEXT: [[TMP23:%.*]] = extractelement <3 x double> [[SPLIT1]], i64 0 -; CHECK-NEXT: [[TMP24:%.*]] = insertelement <3 x double> [[TMP22]], double [[TMP23]], i64 1 -; CHECK-NEXT: [[TMP25:%.*]] = extractelement <3 x double> [[SPLIT2]], i64 0 -; CHECK-NEXT: [[TMP26:%.*]] = insertelement <3 x double> [[TMP24]], double [[TMP25]], i64 2 -; CHECK-NEXT: [[TMP27:%.*]] = extractelement <3 x double> [[SPLIT]], i64 1 -; CHECK-NEXT: [[TMP28:%.*]] = insertelement <3 x double> poison, double [[TMP27]], i64 0 -; CHECK-NEXT: [[TMP29:%.*]] = extractelement <3 x double> [[SPLIT1]], i64 1 -; CHECK-NEXT: [[TMP30:%.*]] = insertelement <3 x double> [[TMP28]], double [[TMP29]], i64 1 -; CHECK-NEXT: [[TMP31:%.*]] = extractelement <3 x double> [[SPLIT2]], i64 1 -; CHECK-NEXT: [[TMP32:%.*]] = insertelement <3 x double> [[TMP30]], double [[TMP31]], i64 2 -; CHECK-NEXT: [[TMP33:%.*]] = extractelement <3 x double> [[SPLIT]], i64 2 -; CHECK-NEXT: [[TMP34:%.*]] = insertelement <3 x double> poison, double [[TMP33]], i64 0 -; CHECK-NEXT: [[TMP35:%.*]] = extractelement <3 x double> [[SPLIT1]], i64 2 -; CHECK-NEXT: [[TMP36:%.*]] = insertelement <3 x double> [[TMP34]], double [[TMP35]], i64 1 -; CHECK-NEXT: [[TMP37:%.*]] = extractelement <3 x double> [[SPLIT2]], i64 2 -; CHECK-NEXT: [[TMP38:%.*]] = insertelement <3 x double> [[TMP36]], double [[TMP37]], i64 2 -; CHECK-NEXT: [[TMP39:%.*]] = shufflevector <3 x double> [[TMP26]], <3 x double> [[TMP32]], <6 x i32> -; CHECK-NEXT: [[TMP40:%.*]] = shufflevector <3 x double> [[TMP38]], <3 x double> poison, <6 x i32> -; CHECK-NEXT: [[TMP41:%.*]] = shufflevector <6 x double> [[TMP39]], <6 x double> [[TMP40]], <9 x i32> -; CHECK-NEXT: br label [[IF_END]] -; CHECK: if.end: -; CHECK-NEXT: [[MERGE:%.*]] = phi <9 x double> [ [[TMP20]], [[IF_THEN]] ], [ [[TMP41]], [[IF_ELSE]] ] -; CHECK-NEXT: [[SPLIT6:%.*]] = shufflevector <9 x double> [[C:%.*]], <9 x double> poison, <3 x i32> -; CHECK-NEXT: [[SPLIT7:%.*]] = shufflevector <9 x double> [[C]], <9 x double> poison, <3 x i32> -; CHECK-NEXT: [[SPLIT8:%.*]] = shufflevector <9 x double> [[C]], <9 x double> poison, <3 x i32> -; CHECK-NEXT: [[SPLIT9:%.*]] = shufflevector <9 x double> [[MERGE]], <9 x double> poison, <3 x i32> -; CHECK-NEXT: [[SPLIT10:%.*]] = shufflevector <9 x double> [[MERGE]], <9 x double> poison, <3 x i32> -; CHECK-NEXT: [[SPLIT11:%.*]] = shufflevector <9 x double> [[MERGE]], <9 x double> poison, <3 x i32> -; CHECK-NEXT: [[BLOCK:%.*]] = shufflevector <3 x double> [[SPLIT6]], <3 x double> poison, <1 x i32> zeroinitializer -; CHECK-NEXT: [[TMP42:%.*]] = extractelement <3 x double> [[SPLIT9]], i64 0 -; CHECK-NEXT: [[SPLAT_SPLATINSERT:%.*]] = insertelement <1 x double> poison, double [[TMP42]], i64 0 -; CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT]], <1 x double> poison, <1 x i32> zeroinitializer -; CHECK-NEXT: [[TMP43:%.*]] = fmul <1 x double> [[BLOCK]], [[SPLAT_SPLAT]] -; CHECK-NEXT: [[BLOCK12:%.*]] = shufflevector <3 x double> [[SPLIT7]], <3 x double> poison, <1 x i32> zeroinitializer -; CHECK-NEXT: [[TMP44:%.*]] = extractelement <3 x double> [[SPLIT9]], i64 1 -; CHECK-NEXT: [[SPLAT_SPLATINSERT13:%.*]] = insertelement <1 x double> poison, double [[TMP44]], i64 0 -; CHECK-NEXT: [[SPLAT_SPLAT14:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT13]], <1 x double> poison, <1 x i32> zeroinitializer -; CHECK-NEXT: [[TMP45:%.*]] = fmul <1 x double> [[BLOCK12]], [[SPLAT_SPLAT14]] -; CHECK-NEXT: [[TMP46:%.*]] = fadd <1 x double> [[TMP43]], [[TMP45]] -; CHECK-NEXT: [[BLOCK15:%.*]] = shufflevector <3 x double> [[SPLIT8]], <3 x double> poison, <1 x i32> zeroinitializer -; CHECK-NEXT: [[TMP47:%.*]] = extractelement <3 x double> [[SPLIT9]], i64 2 -; CHECK-NEXT: [[SPLAT_SPLATINSERT16:%.*]] = insertelement <1 x double> poison, double [[TMP47]], i64 0 -; CHECK-NEXT: [[SPLAT_SPLAT17:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT16]], <1 x double> poison, <1 x i32> zeroinitializer -; CHECK-NEXT: [[TMP48:%.*]] = fmul <1 x double> [[BLOCK15]], [[SPLAT_SPLAT17]] -; CHECK-NEXT: [[TMP49:%.*]] = fadd <1 x double> [[TMP46]], [[TMP48]] -; CHECK-NEXT: [[TMP50:%.*]] = shufflevector <1 x double> [[TMP49]], <1 x double> poison, <3 x i32> -; CHECK-NEXT: [[TMP51:%.*]] = shufflevector <3 x double> poison, <3 x double> [[TMP50]], <3 x i32> -; CHECK-NEXT: [[BLOCK18:%.*]] = shufflevector <3 x double> [[SPLIT6]], <3 x double> poison, <1 x i32> -; CHECK-NEXT: [[TMP52:%.*]] = extractelement <3 x double> [[SPLIT9]], i64 0 -; CHECK-NEXT: [[SPLAT_SPLATINSERT19:%.*]] = insertelement <1 x double> poison, double [[TMP52]], i64 0 -; CHECK-NEXT: [[SPLAT_SPLAT20:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT19]], <1 x double> poison, <1 x i32> zeroinitializer -; CHECK-NEXT: [[TMP53:%.*]] = fmul <1 x double> [[BLOCK18]], [[SPLAT_SPLAT20]] -; CHECK-NEXT: [[BLOCK21:%.*]] = shufflevector <3 x double> [[SPLIT7]], <3 x double> poison, <1 x i32> -; CHECK-NEXT: [[TMP54:%.*]] = extractelement <3 x double> [[SPLIT9]], i64 1 -; CHECK-NEXT: [[SPLAT_SPLATINSERT22:%.*]] = insertelement <1 x double> poison, double [[TMP54]], i64 0 -; CHECK-NEXT: [[SPLAT_SPLAT23:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT22]], <1 x double> poison, <1 x i32> zeroinitializer -; CHECK-NEXT: [[TMP55:%.*]] = fmul <1 x double> [[BLOCK21]], [[SPLAT_SPLAT23]] -; CHECK-NEXT: [[TMP56:%.*]] = fadd <1 x double> [[TMP53]], [[TMP55]] -; CHECK-NEXT: [[BLOCK24:%.*]] = shufflevector <3 x double> [[SPLIT8]], <3 x double> poison, <1 x i32> -; CHECK-NEXT: [[TMP57:%.*]] = extractelement <3 x double> [[SPLIT9]], i64 2 -; CHECK-NEXT: [[SPLAT_SPLATINSERT25:%.*]] = insertelement <1 x double> poison, double [[TMP57]], i64 0 -; CHECK-NEXT: [[SPLAT_SPLAT26:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT25]], <1 x double> poison, <1 x i32> zeroinitializer -; CHECK-NEXT: [[TMP58:%.*]] = fmul <1 x double> [[BLOCK24]], [[SPLAT_SPLAT26]] -; CHECK-NEXT: [[TMP59:%.*]] = fadd <1 x double> [[TMP56]], [[TMP58]] -; CHECK-NEXT: [[TMP60:%.*]] = shufflevector <1 x double> [[TMP59]], <1 x double> poison, <3 x i32> -; CHECK-NEXT: [[TMP61:%.*]] = shufflevector <3 x double> [[TMP51]], <3 x double> [[TMP60]], <3 x i32> -; CHECK-NEXT: [[BLOCK27:%.*]] = shufflevector <3 x double> [[SPLIT6]], <3 x double> poison, <1 x i32> -; CHECK-NEXT: [[TMP62:%.*]] = extractelement <3 x double> [[SPLIT9]], i64 0 -; CHECK-NEXT: [[SPLAT_SPLATINSERT28:%.*]] = insertelement <1 x double> poison, double [[TMP62]], i64 0 -; CHECK-NEXT: [[SPLAT_SPLAT29:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT28]], <1 x double> poison, <1 x i32> zeroinitializer -; CHECK-NEXT: [[TMP63:%.*]] = fmul <1 x double> [[BLOCK27]], [[SPLAT_SPLAT29]] -; CHECK-NEXT: [[BLOCK30:%.*]] = shufflevector <3 x double> [[SPLIT7]], <3 x double> poison, <1 x i32> -; CHECK-NEXT: [[TMP64:%.*]] = extractelement <3 x double> [[SPLIT9]], i64 1 -; CHECK-NEXT: [[SPLAT_SPLATINSERT31:%.*]] = insertelement <1 x double> poison, double [[TMP64]], i64 0 -; CHECK-NEXT: [[SPLAT_SPLAT32:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT31]], <1 x double> poison, <1 x i32> zeroinitializer -; CHECK-NEXT: [[TMP65:%.*]] = fmul <1 x double> [[BLOCK30]], [[SPLAT_SPLAT32]] -; CHECK-NEXT: [[TMP66:%.*]] = fadd <1 x double> [[TMP63]], [[TMP65]] -; CHECK-NEXT: [[BLOCK33:%.*]] = shufflevector <3 x double> [[SPLIT8]], <3 x double> poison, <1 x i32> -; CHECK-NEXT: [[TMP67:%.*]] = extractelement <3 x double> [[SPLIT9]], i64 2 -; CHECK-NEXT: [[SPLAT_SPLATINSERT34:%.*]] = insertelement <1 x double> poison, double [[TMP67]], i64 0 -; CHECK-NEXT: [[SPLAT_SPLAT35:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT34]], <1 x double> poison, <1 x i32> zeroinitializer -; CHECK-NEXT: [[TMP68:%.*]] = fmul <1 x double> [[BLOCK33]], [[SPLAT_SPLAT35]] -; CHECK-NEXT: [[TMP69:%.*]] = fadd <1 x double> [[TMP66]], [[TMP68]] -; CHECK-NEXT: [[TMP70:%.*]] = shufflevector <1 x double> [[TMP69]], <1 x double> poison, <3 x i32> -; CHECK-NEXT: [[TMP71:%.*]] = shufflevector <3 x double> [[TMP61]], <3 x double> [[TMP70]], <3 x i32> -; CHECK-NEXT: [[BLOCK36:%.*]] = shufflevector <3 x double> [[SPLIT6]], <3 x double> poison, <1 x i32> zeroinitializer -; CHECK-NEXT: [[TMP72:%.*]] = extractelement <3 x double> [[SPLIT10]], i64 0 -; CHECK-NEXT: [[SPLAT_SPLATINSERT37:%.*]] = insertelement <1 x double> poison, double [[TMP72]], i64 0 -; CHECK-NEXT: [[SPLAT_SPLAT38:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT37]], <1 x double> poison, <1 x i32> zeroinitializer -; CHECK-NEXT: [[TMP73:%.*]] = fmul <1 x double> [[BLOCK36]], [[SPLAT_SPLAT38]] -; CHECK-NEXT: [[BLOCK39:%.*]] = shufflevector <3 x double> [[SPLIT7]], <3 x double> poison, <1 x i32> zeroinitializer -; CHECK-NEXT: [[TMP74:%.*]] = extractelement <3 x double> [[SPLIT10]], i64 1 -; CHECK-NEXT: [[SPLAT_SPLATINSERT40:%.*]] = insertelement <1 x double> poison, double [[TMP74]], i64 0 -; CHECK-NEXT: [[SPLAT_SPLAT41:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT40]], <1 x double> poison, <1 x i32> zeroinitializer -; CHECK-NEXT: [[TMP75:%.*]] = fmul <1 x double> [[BLOCK39]], [[SPLAT_SPLAT41]] -; CHECK-NEXT: [[TMP76:%.*]] = fadd <1 x double> [[TMP73]], [[TMP75]] -; CHECK-NEXT: [[BLOCK42:%.*]] = shufflevector <3 x double> [[SPLIT8]], <3 x double> poison, <1 x i32> zeroinitializer -; CHECK-NEXT: [[TMP77:%.*]] = extractelement <3 x double> [[SPLIT10]], i64 2 -; CHECK-NEXT: [[SPLAT_SPLATINSERT43:%.*]] = insertelement <1 x double> poison, double [[TMP77]], i64 0 -; CHECK-NEXT: [[SPLAT_SPLAT44:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT43]], <1 x double> poison, <1 x i32> zeroinitializer -; CHECK-NEXT: [[TMP78:%.*]] = fmul <1 x double> [[BLOCK42]], [[SPLAT_SPLAT44]] -; CHECK-NEXT: [[TMP79:%.*]] = fadd <1 x double> [[TMP76]], [[TMP78]] -; CHECK-NEXT: [[TMP80:%.*]] = shufflevector <1 x double> [[TMP79]], <1 x double> poison, <3 x i32> -; CHECK-NEXT: [[TMP81:%.*]] = shufflevector <3 x double> poison, <3 x double> [[TMP80]], <3 x i32> -; CHECK-NEXT: [[BLOCK45:%.*]] = shufflevector <3 x double> [[SPLIT6]], <3 x double> poison, <1 x i32> -; CHECK-NEXT: [[TMP82:%.*]] = extractelement <3 x double> [[SPLIT10]], i64 0 -; CHECK-NEXT: [[SPLAT_SPLATINSERT46:%.*]] = insertelement <1 x double> poison, double [[TMP82]], i64 0 -; CHECK-NEXT: [[SPLAT_SPLAT47:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT46]], <1 x double> poison, <1 x i32> zeroinitializer -; CHECK-NEXT: [[TMP83:%.*]] = fmul <1 x double> [[BLOCK45]], [[SPLAT_SPLAT47]] -; CHECK-NEXT: [[BLOCK48:%.*]] = shufflevector <3 x double> [[SPLIT7]], <3 x double> poison, <1 x i32> -; CHECK-NEXT: [[TMP84:%.*]] = extractelement <3 x double> [[SPLIT10]], i64 1 -; CHECK-NEXT: [[SPLAT_SPLATINSERT49:%.*]] = insertelement <1 x double> poison, double [[TMP84]], i64 0 -; CHECK-NEXT: [[SPLAT_SPLAT50:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT49]], <1 x double> poison, <1 x i32> zeroinitializer -; CHECK-NEXT: [[TMP85:%.*]] = fmul <1 x double> [[BLOCK48]], [[SPLAT_SPLAT50]] -; CHECK-NEXT: [[TMP86:%.*]] = fadd <1 x double> [[TMP83]], [[TMP85]] -; CHECK-NEXT: [[BLOCK51:%.*]] = shufflevector <3 x double> [[SPLIT8]], <3 x double> poison, <1 x i32> -; CHECK-NEXT: [[TMP87:%.*]] = extractelement <3 x double> [[SPLIT10]], i64 2 -; CHECK-NEXT: [[SPLAT_SPLATINSERT52:%.*]] = insertelement <1 x double> poison, double [[TMP87]], i64 0 -; CHECK-NEXT: [[SPLAT_SPLAT53:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT52]], <1 x double> poison, <1 x i32> zeroinitializer -; CHECK-NEXT: [[TMP88:%.*]] = fmul <1 x double> [[BLOCK51]], [[SPLAT_SPLAT53]] -; CHECK-NEXT: [[TMP89:%.*]] = fadd <1 x double> [[TMP86]], [[TMP88]] -; CHECK-NEXT: [[TMP90:%.*]] = shufflevector <1 x double> [[TMP89]], <1 x double> poison, <3 x i32> -; CHECK-NEXT: [[TMP91:%.*]] = shufflevector <3 x double> [[TMP81]], <3 x double> [[TMP90]], <3 x i32> -; CHECK-NEXT: [[BLOCK54:%.*]] = shufflevector <3 x double> [[SPLIT6]], <3 x double> poison, <1 x i32> -; CHECK-NEXT: [[TMP92:%.*]] = extractelement <3 x double> [[SPLIT10]], i64 0 -; CHECK-NEXT: [[SPLAT_SPLATINSERT55:%.*]] = insertelement <1 x double> poison, double [[TMP92]], i64 0 -; CHECK-NEXT: [[SPLAT_SPLAT56:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT55]], <1 x double> poison, <1 x i32> zeroinitializer -; CHECK-NEXT: [[TMP93:%.*]] = fmul <1 x double> [[BLOCK54]], [[SPLAT_SPLAT56]] -; CHECK-NEXT: [[BLOCK57:%.*]] = shufflevector <3 x double> [[SPLIT7]], <3 x double> poison, <1 x i32> -; CHECK-NEXT: [[TMP94:%.*]] = extractelement <3 x double> [[SPLIT10]], i64 1 -; CHECK-NEXT: [[SPLAT_SPLATINSERT58:%.*]] = insertelement <1 x double> poison, double [[TMP94]], i64 0 -; CHECK-NEXT: [[SPLAT_SPLAT59:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT58]], <1 x double> poison, <1 x i32> zeroinitializer -; CHECK-NEXT: [[TMP95:%.*]] = fmul <1 x double> [[BLOCK57]], [[SPLAT_SPLAT59]] -; CHECK-NEXT: [[TMP96:%.*]] = fadd <1 x double> [[TMP93]], [[TMP95]] -; CHECK-NEXT: [[BLOCK60:%.*]] = shufflevector <3 x double> [[SPLIT8]], <3 x double> poison, <1 x i32> -; CHECK-NEXT: [[TMP97:%.*]] = extractelement <3 x double> [[SPLIT10]], i64 2 -; CHECK-NEXT: [[SPLAT_SPLATINSERT61:%.*]] = insertelement <1 x double> poison, double [[TMP97]], i64 0 -; CHECK-NEXT: [[SPLAT_SPLAT62:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT61]], <1 x double> poison, <1 x i32> zeroinitializer -; CHECK-NEXT: [[TMP98:%.*]] = fmul <1 x double> [[BLOCK60]], [[SPLAT_SPLAT62]] -; CHECK-NEXT: [[TMP99:%.*]] = fadd <1 x double> [[TMP96]], [[TMP98]] -; CHECK-NEXT: [[TMP100:%.*]] = shufflevector <1 x double> [[TMP99]], <1 x double> poison, <3 x i32> -; CHECK-NEXT: [[TMP101:%.*]] = shufflevector <3 x double> [[TMP91]], <3 x double> [[TMP100]], <3 x i32> -; CHECK-NEXT: [[BLOCK63:%.*]] = shufflevector <3 x double> [[SPLIT6]], <3 x double> poison, <1 x i32> zeroinitializer -; CHECK-NEXT: [[TMP102:%.*]] = extractelement <3 x double> [[SPLIT11]], i64 0 -; CHECK-NEXT: [[SPLAT_SPLATINSERT64:%.*]] = insertelement <1 x double> poison, double [[TMP102]], i64 0 -; CHECK-NEXT: [[SPLAT_SPLAT65:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT64]], <1 x double> poison, <1 x i32> zeroinitializer -; CHECK-NEXT: [[TMP103:%.*]] = fmul <1 x double> [[BLOCK63]], [[SPLAT_SPLAT65]] -; CHECK-NEXT: [[BLOCK66:%.*]] = shufflevector <3 x double> [[SPLIT7]], <3 x double> poison, <1 x i32> zeroinitializer -; CHECK-NEXT: [[TMP104:%.*]] = extractelement <3 x double> [[SPLIT11]], i64 1 -; CHECK-NEXT: [[SPLAT_SPLATINSERT67:%.*]] = insertelement <1 x double> poison, double [[TMP104]], i64 0 -; CHECK-NEXT: [[SPLAT_SPLAT68:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT67]], <1 x double> poison, <1 x i32> zeroinitializer -; CHECK-NEXT: [[TMP105:%.*]] = fmul <1 x double> [[BLOCK66]], [[SPLAT_SPLAT68]] -; CHECK-NEXT: [[TMP106:%.*]] = fadd <1 x double> [[TMP103]], [[TMP105]] -; CHECK-NEXT: [[BLOCK69:%.*]] = shufflevector <3 x double> [[SPLIT8]], <3 x double> poison, <1 x i32> zeroinitializer -; CHECK-NEXT: [[TMP107:%.*]] = extractelement <3 x double> [[SPLIT11]], i64 2 -; CHECK-NEXT: [[SPLAT_SPLATINSERT70:%.*]] = insertelement <1 x double> poison, double [[TMP107]], i64 0 -; CHECK-NEXT: [[SPLAT_SPLAT71:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT70]], <1 x double> poison, <1 x i32> zeroinitializer -; CHECK-NEXT: [[TMP108:%.*]] = fmul <1 x double> [[BLOCK69]], [[SPLAT_SPLAT71]] -; CHECK-NEXT: [[TMP109:%.*]] = fadd <1 x double> [[TMP106]], [[TMP108]] -; CHECK-NEXT: [[TMP110:%.*]] = shufflevector <1 x double> [[TMP109]], <1 x double> poison, <3 x i32> -; CHECK-NEXT: [[TMP111:%.*]] = shufflevector <3 x double> poison, <3 x double> [[TMP110]], <3 x i32> -; CHECK-NEXT: [[BLOCK72:%.*]] = shufflevector <3 x double> [[SPLIT6]], <3 x double> poison, <1 x i32> -; CHECK-NEXT: [[TMP112:%.*]] = extractelement <3 x double> [[SPLIT11]], i64 0 -; CHECK-NEXT: [[SPLAT_SPLATINSERT73:%.*]] = insertelement <1 x double> poison, double [[TMP112]], i64 0 -; CHECK-NEXT: [[SPLAT_SPLAT74:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT73]], <1 x double> poison, <1 x i32> zeroinitializer -; CHECK-NEXT: [[TMP113:%.*]] = fmul <1 x double> [[BLOCK72]], [[SPLAT_SPLAT74]] -; CHECK-NEXT: [[BLOCK75:%.*]] = shufflevector <3 x double> [[SPLIT7]], <3 x double> poison, <1 x i32> -; CHECK-NEXT: [[TMP114:%.*]] = extractelement <3 x double> [[SPLIT11]], i64 1 -; CHECK-NEXT: [[SPLAT_SPLATINSERT76:%.*]] = insertelement <1 x double> poison, double [[TMP114]], i64 0 -; CHECK-NEXT: [[SPLAT_SPLAT77:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT76]], <1 x double> poison, <1 x i32> zeroinitializer -; CHECK-NEXT: [[TMP115:%.*]] = fmul <1 x double> [[BLOCK75]], [[SPLAT_SPLAT77]] -; CHECK-NEXT: [[TMP116:%.*]] = fadd <1 x double> [[TMP113]], [[TMP115]] -; CHECK-NEXT: [[BLOCK78:%.*]] = shufflevector <3 x double> [[SPLIT8]], <3 x double> poison, <1 x i32> -; CHECK-NEXT: [[TMP117:%.*]] = extractelement <3 x double> [[SPLIT11]], i64 2 -; CHECK-NEXT: [[SPLAT_SPLATINSERT79:%.*]] = insertelement <1 x double> poison, double [[TMP117]], i64 0 -; CHECK-NEXT: [[SPLAT_SPLAT80:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT79]], <1 x double> poison, <1 x i32> zeroinitializer -; CHECK-NEXT: [[TMP118:%.*]] = fmul <1 x double> [[BLOCK78]], [[SPLAT_SPLAT80]] -; CHECK-NEXT: [[TMP119:%.*]] = fadd <1 x double> [[TMP116]], [[TMP118]] -; CHECK-NEXT: [[TMP120:%.*]] = shufflevector <1 x double> [[TMP119]], <1 x double> poison, <3 x i32> -; CHECK-NEXT: [[TMP121:%.*]] = shufflevector <3 x double> [[TMP111]], <3 x double> [[TMP120]], <3 x i32> -; CHECK-NEXT: [[BLOCK81:%.*]] = shufflevector <3 x double> [[SPLIT6]], <3 x double> poison, <1 x i32> -; CHECK-NEXT: [[TMP122:%.*]] = extractelement <3 x double> [[SPLIT11]], i64 0 -; CHECK-NEXT: [[SPLAT_SPLATINSERT82:%.*]] = insertelement <1 x double> poison, double [[TMP122]], i64 0 -; CHECK-NEXT: [[SPLAT_SPLAT83:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT82]], <1 x double> poison, <1 x i32> zeroinitializer -; CHECK-NEXT: [[TMP123:%.*]] = fmul <1 x double> [[BLOCK81]], [[SPLAT_SPLAT83]] -; CHECK-NEXT: [[BLOCK84:%.*]] = shufflevector <3 x double> [[SPLIT7]], <3 x double> poison, <1 x i32> -; CHECK-NEXT: [[TMP124:%.*]] = extractelement <3 x double> [[SPLIT11]], i64 1 -; CHECK-NEXT: [[SPLAT_SPLATINSERT85:%.*]] = insertelement <1 x double> poison, double [[TMP124]], i64 0 -; CHECK-NEXT: [[SPLAT_SPLAT86:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT85]], <1 x double> poison, <1 x i32> zeroinitializer -; CHECK-NEXT: [[TMP125:%.*]] = fmul <1 x double> [[BLOCK84]], [[SPLAT_SPLAT86]] -; CHECK-NEXT: [[TMP126:%.*]] = fadd <1 x double> [[TMP123]], [[TMP125]] -; CHECK-NEXT: [[BLOCK87:%.*]] = shufflevector <3 x double> [[SPLIT8]], <3 x double> poison, <1 x i32> -; CHECK-NEXT: [[TMP127:%.*]] = extractelement <3 x double> [[SPLIT11]], i64 2 -; CHECK-NEXT: [[SPLAT_SPLATINSERT88:%.*]] = insertelement <1 x double> poison, double [[TMP127]], i64 0 -; CHECK-NEXT: [[SPLAT_SPLAT89:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT88]], <1 x double> poison, <1 x i32> zeroinitializer -; CHECK-NEXT: [[TMP128:%.*]] = fmul <1 x double> [[BLOCK87]], [[SPLAT_SPLAT89]] -; CHECK-NEXT: [[TMP129:%.*]] = fadd <1 x double> [[TMP126]], [[TMP128]] -; CHECK-NEXT: [[TMP130:%.*]] = shufflevector <1 x double> [[TMP129]], <1 x double> poison, <3 x i32> -; CHECK-NEXT: [[TMP131:%.*]] = shufflevector <3 x double> [[TMP121]], <3 x double> [[TMP130]], <3 x i32> -; CHECK-NEXT: [[TMP132:%.*]] = shufflevector <3 x double> [[TMP71]], <3 x double> [[TMP101]], <6 x i32> -; CHECK-NEXT: [[TMP133:%.*]] = shufflevector <3 x double> [[TMP131]], <3 x double> poison, <6 x i32> -; CHECK-NEXT: [[TMP134:%.*]] = shufflevector <6 x double> [[TMP132]], <6 x double> [[TMP133]], <9 x i32> -; CHECK-NEXT: ret <9 x double> [[TMP134]] -; - - - -entry: - br i1 %cond, label %if.then, label %if.else - -if.then: ; preds = %entry - %A.trans = tail call <9 x double> @llvm.matrix.transpose.v9f64(<9 x double> %A, i32 3, i32 3) - br label %if.end - -if.else: ; preds = %entry - %B.trans = tail call <9 x double> @llvm.matrix.transpose.v9f64(<9 x double> %B, i32 3, i32 3) - br label %if.end - -if.end: ; preds = %if.then, %if.else - %merge = phi <9 x double> [ %A.trans, %if.then], [ %B.trans, %if.else ] - %res = tail call <9 x double> @llvm.matrix.multiply.v9f64.v9f64.v9f64(<9 x double> %C, <9 x double> %merge, i32 3, i32 3, i32 3) - ret <9 x double> %res -} - ; Make sure we use a flattened vector when calling @foo and the use its flat vector result properly. define <9 x double> @unsupported_call(i1 %cond, <9 x double> %A, <9 x double> %B) { ; CHECK-LABEL: @unsupported_call( -- cgit v1.2.3 From a2cee05449636c8e0d630b2ccdc71f2d422227a9 Mon Sep 17 00:00:00 2001 From: Christopher Ferris Date: Wed, 18 Jun 2025 09:12:53 -0700 Subject: [scudo] Make report pointers const. (#144624) Mark as many of the reportXX functions that take pointers const. This avoid the need to use const_cast when calling these functions on an already const pointer. Fix reportHeaderCorruption calls where an argument was passed into an append call that didn't use them. --- compiler-rt/lib/scudo/standalone/chunk.h | 2 +- compiler-rt/lib/scudo/standalone/combined.h | 2 +- compiler-rt/lib/scudo/standalone/report.cpp | 15 +++++++-------- compiler-rt/lib/scudo/standalone/report.h | 11 ++++++----- 4 files changed, 15 insertions(+), 15 deletions(-) diff --git a/compiler-rt/lib/scudo/standalone/chunk.h b/compiler-rt/lib/scudo/standalone/chunk.h index a1b8e723d4cb..9da2dc57e71a 100644 --- a/compiler-rt/lib/scudo/standalone/chunk.h +++ b/compiler-rt/lib/scudo/standalone/chunk.h @@ -125,7 +125,7 @@ inline void loadHeader(u32 Cookie, const void *Ptr, *NewUnpackedHeader = bit_cast(NewPackedHeader); if (UNLIKELY(NewUnpackedHeader->Checksum != computeHeaderChecksum(Cookie, Ptr, NewUnpackedHeader))) - reportHeaderCorruption(NewUnpackedHeader, const_cast(Ptr)); + reportHeaderCorruption(NewUnpackedHeader, Ptr); } inline bool isValid(u32 Cookie, const void *Ptr, diff --git a/compiler-rt/lib/scudo/standalone/combined.h b/compiler-rt/lib/scudo/standalone/combined.h index 43655642843c..87acdec2a3ba 100644 --- a/compiler-rt/lib/scudo/standalone/combined.h +++ b/compiler-rt/lib/scudo/standalone/combined.h @@ -775,7 +775,7 @@ public: // Getting the alloc size of a chunk only makes sense if it's allocated. if (UNLIKELY(Header.State != Chunk::State::Allocated)) - reportInvalidChunkState(AllocatorAction::Sizing, const_cast(Ptr)); + reportInvalidChunkState(AllocatorAction::Sizing, Ptr); return getSize(Ptr, &Header); } diff --git a/compiler-rt/lib/scudo/standalone/report.cpp b/compiler-rt/lib/scudo/standalone/report.cpp index 14a4066d3720..b97a74b078c2 100644 --- a/compiler-rt/lib/scudo/standalone/report.cpp +++ b/compiler-rt/lib/scudo/standalone/report.cpp @@ -66,17 +66,16 @@ void NORETURN reportInvalidFlag(const char *FlagType, const char *Value) { // The checksum of a chunk header is invalid. This could be caused by an // {over,under}write of the header, a pointer that is not an actual chunk. -void NORETURN reportHeaderCorruption(void *Header, void *Ptr) { +void NORETURN reportHeaderCorruption(void *Header, const void *Ptr) { ScopedErrorReport Report; Report.append("corrupted chunk header at address %p", Ptr); if (*static_cast(Header) == 0U) { // Header all zero, which could indicate that this might be a pointer that // has been double freed but the memory has been released to the kernel. Report.append(": chunk header is zero and might indicate memory corruption " - "or a double free\n", - Ptr); + "or a double free\n"); } else { - Report.append(": most likely due to memory corruption\n", Ptr); + Report.append(": most likely due to memory corruption\n"); } } @@ -131,13 +130,13 @@ static const char *stringifyAction(AllocatorAction Action) { // The chunk is not in a state congruent with the operation we want to perform. // This is usually the case with a double-free, a realloc of a freed pointer. -void NORETURN reportInvalidChunkState(AllocatorAction Action, void *Ptr) { +void NORETURN reportInvalidChunkState(AllocatorAction Action, const void *Ptr) { ScopedErrorReport Report; Report.append("invalid chunk state when %s address %p\n", stringifyAction(Action), Ptr); } -void NORETURN reportMisalignedPointer(AllocatorAction Action, void *Ptr) { +void NORETURN reportMisalignedPointer(AllocatorAction Action, const void *Ptr) { ScopedErrorReport Report; Report.append("misaligned pointer when %s address %p\n", stringifyAction(Action), Ptr); @@ -145,7 +144,7 @@ void NORETURN reportMisalignedPointer(AllocatorAction Action, void *Ptr) { // The deallocation function used is at odds with the one used to allocate the // chunk (eg: new[]/delete or malloc/delete, and so on). -void NORETURN reportDeallocTypeMismatch(AllocatorAction Action, void *Ptr, +void NORETURN reportDeallocTypeMismatch(AllocatorAction Action, const void *Ptr, u8 TypeA, u8 TypeB) { ScopedErrorReport Report; Report.append("allocation type mismatch when %s address %p (%d vs %d)\n", @@ -154,7 +153,7 @@ void NORETURN reportDeallocTypeMismatch(AllocatorAction Action, void *Ptr, // The size specified to the delete operator does not match the one that was // passed to new when allocating the chunk. -void NORETURN reportDeleteSizeMismatch(void *Ptr, uptr Size, +void NORETURN reportDeleteSizeMismatch(const void *Ptr, uptr Size, uptr ExpectedSize) { ScopedErrorReport Report; Report.append( diff --git a/compiler-rt/lib/scudo/standalone/report.h b/compiler-rt/lib/scudo/standalone/report.h index c0214b51560e..c397dd3fc9c6 100644 --- a/compiler-rt/lib/scudo/standalone/report.h +++ b/compiler-rt/lib/scudo/standalone/report.h @@ -24,7 +24,7 @@ void NORETURN reportRawError(const char *Message); void NORETURN reportInvalidFlag(const char *FlagType, const char *Value); // Chunk header related errors. -void NORETURN reportHeaderCorruption(void *Header, void *Ptr); +void NORETURN reportHeaderCorruption(void *Header, const void *Ptr); // Sanity checks related error. void NORETURN reportSanityCheckError(const char *Field); @@ -41,11 +41,12 @@ enum class AllocatorAction : u8 { Reallocating, Sizing, }; -void NORETURN reportInvalidChunkState(AllocatorAction Action, void *Ptr); -void NORETURN reportMisalignedPointer(AllocatorAction Action, void *Ptr); -void NORETURN reportDeallocTypeMismatch(AllocatorAction Action, void *Ptr, +void NORETURN reportInvalidChunkState(AllocatorAction Action, const void *Ptr); +void NORETURN reportMisalignedPointer(AllocatorAction Action, const void *Ptr); +void NORETURN reportDeallocTypeMismatch(AllocatorAction Action, const void *Ptr, u8 TypeA, u8 TypeB); -void NORETURN reportDeleteSizeMismatch(void *Ptr, uptr Size, uptr ExpectedSize); +void NORETURN reportDeleteSizeMismatch(const void *Ptr, uptr Size, + uptr ExpectedSize); // C wrappers errors. void NORETURN reportAlignmentNotPowerOfTwo(uptr Alignment); -- cgit v1.2.3 From 13510c07364dc3ac30f34e73c98ac8dc75e7efc7 Mon Sep 17 00:00:00 2001 From: Peng Liu Date: Wed, 18 Jun 2025 12:13:50 -0400 Subject: [libc++] Make list constexpr as part of P3372R3 (#129799) This patch makes `std::list` constexpr as part of P3372R3. Fixes #128659. --- libcxx/docs/FeatureTestMacroTable.rst | 2 + libcxx/include/list | 552 ++++++++++-------- libcxx/include/version | 2 + .../std/containers/sequences/list/compare.pass.cpp | 24 +- .../sequences/list/compare.three_way.pass.cpp | 6 +- .../sequences/list/get_allocator.pass.cpp | 13 +- .../sequences/list/incomplete_type.pass.cpp | 13 +- .../containers/sequences/list/iterators.pass.cpp | 27 +- .../sequences/list/list.capacity/empty.pass.cpp | 13 +- .../sequences/list/list.capacity/max_size.pass.cpp | 13 +- .../list/list.capacity/resize_size.pass.cpp | 62 +- .../list/list.capacity/resize_size_value.pass.cpp | 13 +- .../sequences/list/list.capacity/size.pass.cpp | 13 +- .../sequences/list/list.cons/assign_copy.pass.cpp | 13 +- .../list.cons/assign_initializer_list.pass.cpp | 13 +- .../sequences/list/list.cons/assign_move.pass.cpp | 13 +- .../sequences/list/list.cons/copy.pass.cpp | 13 +- .../sequences/list/list.cons/copy_alloc.pass.cpp | 13 +- .../sequences/list/list.cons/default.pass.cpp | 13 +- .../list/list.cons/default_stack_alloc.pass.cpp | 13 +- .../sequences/list/list.cons/from_range.pass.cpp | 20 +- .../list/list.cons/initializer_list.pass.cpp | 13 +- .../list/list.cons/initializer_list_alloc.pass.cpp | 13 +- .../list/list.cons/input_iterator.pass.cpp | 27 +- .../sequences/list/list.cons/move.pass.cpp | 13 +- .../sequences/list/list.cons/move_alloc.pass.cpp | 13 +- .../list.cons/op_equal_initializer_list.pass.cpp | 13 +- .../sequences/list/list.cons/size_type.pass.cpp | 39 +- .../list/list.cons/size_value_alloc.pass.cpp | 13 +- .../sequences/list/list.erasure/erase.pass.cpp | 27 +- .../sequences/list/list.erasure/erase_if.pass.cpp | 27 +- .../list/list.modifiers/append_range.pass.cpp | 20 +- .../list/list.modifiers/assign_range.pass.cpp | 20 +- .../sequences/list/list.modifiers/clear.pass.cpp | 13 +- .../sequences/list/list.modifiers/emplace.pass.cpp | 19 +- .../list/list.modifiers/emplace_back.pass.cpp | 19 +- .../list/list.modifiers/emplace_front.pass.cpp | 19 +- .../list/list.modifiers/erase_iter.pass.cpp | 13 +- .../list/list.modifiers/erase_iter_iter.pass.cpp | 13 +- .../insert_iter_initializer_list.pass.cpp | 13 +- .../list.modifiers/insert_iter_iter_iter.pass.cpp | 59 +- .../list.modifiers/insert_iter_rvalue.pass.cpp | 13 +- .../list.modifiers/insert_iter_size_value.pass.cpp | 33 +- .../list/list.modifiers/insert_iter_value.pass.cpp | 33 +- .../list/list.modifiers/insert_range.pass.cpp | 23 +- .../list/list.modifiers/pop_back.pass.cpp | 13 +- .../list/list.modifiers/pop_front.pass.cpp | 13 +- .../list/list.modifiers/prepend_range.pass.cpp | 20 +- .../list/list.modifiers/push_back.pass.cpp | 13 +- .../list/list.modifiers/push_back_rvalue.pass.cpp | 13 +- .../list/list.modifiers/push_front.pass.cpp | 13 +- .../list/list.modifiers/push_front_rvalue.pass.cpp | 13 +- .../sequences/list/list.ops/merge.pass.cpp | 13 +- .../sequences/list/list.ops/merge_comp.pass.cpp | 13 +- .../sequences/list/list.ops/remove.pass.cpp | 25 +- .../sequences/list/list.ops/remove_if.pass.cpp | 25 +- .../sequences/list/list.ops/reverse.pass.cpp | 13 +- .../sequences/list/list.ops/sort.pass.cpp | 19 +- .../sequences/list/list.ops/sort_comp.pass.cpp | 57 +- .../list/list.ops/splice_pos_list.pass.cpp | 13 +- .../list/list.ops/splice_pos_list_iter.pass.cpp | 13 +- .../list.ops/splice_pos_list_iter_iter.pass.cpp | 13 +- .../sequences/list/list.ops/unique.pass.cpp | 13 +- .../sequences/list/list.ops/unique_pred.pass.cpp | 23 +- .../sequences/list/list.special/swap.pass.cpp | 13 +- .../list/list.special/swap_noexcept.pass.cpp | 13 +- .../list.version.compile.pass.cpp | 27 + .../version.version.compile.pass.cpp | 27 + libcxx/test/support/min_allocator.h | 647 ++++++++++----------- .../generate_feature_test_macro_components.py | 5 + 70 files changed, 1600 insertions(+), 842 deletions(-) diff --git a/libcxx/docs/FeatureTestMacroTable.rst b/libcxx/docs/FeatureTestMacroTable.rst index 3e6fd643f620..5ebc9bb7dcda 100644 --- a/libcxx/docs/FeatureTestMacroTable.rst +++ b/libcxx/docs/FeatureTestMacroTable.rst @@ -422,6 +422,8 @@ Status ---------------------------------------------------------- ----------------- ``__cpp_lib_constexpr_forward_list`` ``202502L`` ---------------------------------------------------------- ----------------- + ``__cpp_lib_constexpr_list`` ``202502L`` + ---------------------------------------------------------- ----------------- ``__cpp_lib_constexpr_new`` ``202406L`` ---------------------------------------------------------- ----------------- ``__cpp_lib_constexpr_queue`` ``202502L`` diff --git a/libcxx/include/list b/libcxx/include/list index 98610f59ed74..2896231203d9 100644 --- a/libcxx/include/list +++ b/libcxx/include/list @@ -297,14 +297,20 @@ struct __list_node_base { __base_pointer __prev_; __base_pointer __next_; - _LIBCPP_HIDE_FROM_ABI __list_node_base() : __prev_(__self()), __next_(__self()) {} + _LIBCPP_CONSTEXPR_SINCE_CXX26 _LIBCPP_HIDE_FROM_ABI __list_node_base() : __prev_(__self()), __next_(__self()) {} + _LIBCPP_CONSTEXPR_SINCE_CXX26 _LIBCPP_HIDE_FROM_ABI explicit __list_node_base(__base_pointer __prev, __base_pointer __next) : __prev_(__prev), __next_(__next) {} - _LIBCPP_HIDE_FROM_ABI __base_pointer __self() { return pointer_traits<__base_pointer>::pointer_to(*this); } + _LIBCPP_CONSTEXPR_SINCE_CXX26 _LIBCPP_HIDE_FROM_ABI __base_pointer __self() { + return pointer_traits<__base_pointer>::pointer_to(*this); + } - _LIBCPP_HIDE_FROM_ABI __node_pointer __as_node() { return static_cast<__node_pointer>(__self()); } + _LIBCPP_CONSTEXPR_SINCE_CXX26 _LIBCPP_HIDE_FROM_ABI __node_pointer __as_node() { + return pointer_traits<__node_pointer>::pointer_to( + *static_cast::element_type*>(this)); + } }; template @@ -319,7 +325,7 @@ private: }; public: - _LIBCPP_HIDE_FROM_ABI _Tp& __get_value() { return __value_; } + _LIBCPP_CONSTEXPR_SINCE_CXX26 _LIBCPP_HIDE_FROM_ABI _Tp& __get_value() { return __value_; } # else private: @@ -332,10 +338,14 @@ public: typedef __list_node_base<_Tp, _VoidPtr> __base; typedef typename __base::__base_pointer __base_pointer; - _LIBCPP_HIDE_FROM_ABI explicit __list_node(__base_pointer __prev, __base_pointer __next) : __base(__prev, __next) {} - _LIBCPP_HIDE_FROM_ABI ~__list_node() {} + _LIBCPP_CONSTEXPR_SINCE_CXX26 _LIBCPP_HIDE_FROM_ABI explicit __list_node(__base_pointer __prev, __base_pointer __next) + : __base(__prev, __next) {} + _LIBCPP_CONSTEXPR_SINCE_CXX26 _LIBCPP_HIDE_FROM_ABI ~__list_node() {} - _LIBCPP_HIDE_FROM_ABI __base_pointer __as_link() { return __base::__self(); } + _LIBCPP_CONSTEXPR_SINCE_CXX26 _LIBCPP_HIDE_FROM_ABI __base_pointer __as_link() { + return pointer_traits<__base_pointer>::pointer_to( + *static_cast::element_type*>(std::addressof(*this))); + } }; template > @@ -352,7 +362,8 @@ class __list_iterator { __base_pointer __ptr_; - _LIBCPP_HIDE_FROM_ABI explicit __list_iterator(__base_pointer __p) _NOEXCEPT : __ptr_(__p) {} + _LIBCPP_CONSTEXPR_SINCE_CXX26 _LIBCPP_HIDE_FROM_ABI explicit __list_iterator(__base_pointer __p) _NOEXCEPT + : __ptr_(__p) {} template friend class list; @@ -368,37 +379,41 @@ public: typedef __rebind_pointer_t<_VoidPtr, value_type> pointer; typedef typename pointer_traits::difference_type difference_type; - _LIBCPP_HIDE_FROM_ABI __list_iterator() _NOEXCEPT : __ptr_(nullptr) {} + _LIBCPP_CONSTEXPR_SINCE_CXX26 _LIBCPP_HIDE_FROM_ABI __list_iterator() _NOEXCEPT : __ptr_(nullptr) {} - _LIBCPP_HIDE_FROM_ABI reference operator*() const { return __ptr_->__as_node()->__get_value(); } - _LIBCPP_HIDE_FROM_ABI pointer operator->() const { + _LIBCPP_CONSTEXPR_SINCE_CXX26 _LIBCPP_HIDE_FROM_ABI reference operator*() const { + return __ptr_->__as_node()->__get_value(); + } + _LIBCPP_CONSTEXPR_SINCE_CXX26 _LIBCPP_HIDE_FROM_ABI pointer operator->() const { return pointer_traits::pointer_to(__ptr_->__as_node()->__get_value()); } - _LIBCPP_HIDE_FROM_ABI __list_iterator& operator++() { + _LIBCPP_CONSTEXPR_SINCE_CXX26 _LIBCPP_HIDE_FROM_ABI __list_iterator& operator++() { __ptr_ = __ptr_->__next_; return *this; } - _LIBCPP_HIDE_FROM_ABI __list_iterator operator++(int) { + _LIBCPP_CONSTEXPR_SINCE_CXX26 _LIBCPP_HIDE_FROM_ABI __list_iterator operator++(int) { __list_iterator __t(*this); ++(*this); return __t; } - _LIBCPP_HIDE_FROM_ABI __list_iterator& operator--() { + _LIBCPP_CONSTEXPR_SINCE_CXX26 _LIBCPP_HIDE_FROM_ABI __list_iterator& operator--() { __ptr_ = __ptr_->__prev_; return *this; } - _LIBCPP_HIDE_FROM_ABI __list_iterator operator--(int) { + _LIBCPP_CONSTEXPR_SINCE_CXX26 _LIBCPP_HIDE_FROM_ABI __list_iterator operator--(int) { __list_iterator __t(*this); --(*this); return __t; } - friend _LIBCPP_HIDE_FROM_ABI bool operator==(const __list_iterator& __x, const __list_iterator& __y) { + friend _LIBCPP_CONSTEXPR_SINCE_CXX26 _LIBCPP_HIDE_FROM_ABI bool + operator==(const __list_iterator& __x, const __list_iterator& __y) { return __x.__ptr_ == __y.__ptr_; } - friend _LIBCPP_HIDE_FROM_ABI bool operator!=(const __list_iterator& __x, const __list_iterator& __y) { + friend _LIBCPP_CONSTEXPR_SINCE_CXX26 _LIBCPP_HIDE_FROM_ABI bool + operator!=(const __list_iterator& __x, const __list_iterator& __y) { return !(__x == __y); } }; @@ -410,7 +425,8 @@ class __list_const_iterator { __base_pointer __ptr_; - _LIBCPP_HIDE_FROM_ABI explicit __list_const_iterator(__base_pointer __p) _NOEXCEPT : __ptr_(__p) {} + _LIBCPP_CONSTEXPR_SINCE_CXX26 _LIBCPP_HIDE_FROM_ABI explicit __list_const_iterator(__base_pointer __p) _NOEXCEPT + : __ptr_(__p) {} template friend class list; @@ -424,39 +440,43 @@ public: typedef __rebind_pointer_t<_VoidPtr, const value_type> pointer; typedef typename pointer_traits::difference_type difference_type; - _LIBCPP_HIDE_FROM_ABI __list_const_iterator() _NOEXCEPT : __ptr_(nullptr) {} - _LIBCPP_HIDE_FROM_ABI __list_const_iterator(const __list_iterator<_Tp, _VoidPtr>& __p) _NOEXCEPT - : __ptr_(__p.__ptr_) {} + _LIBCPP_CONSTEXPR_SINCE_CXX26 _LIBCPP_HIDE_FROM_ABI __list_const_iterator() _NOEXCEPT : __ptr_(nullptr) {} + _LIBCPP_CONSTEXPR_SINCE_CXX26 _LIBCPP_HIDE_FROM_ABI + __list_const_iterator(const __list_iterator<_Tp, _VoidPtr>& __p) _NOEXCEPT : __ptr_(__p.__ptr_) {} - _LIBCPP_HIDE_FROM_ABI reference operator*() const { return __ptr_->__as_node()->__get_value(); } - _LIBCPP_HIDE_FROM_ABI pointer operator->() const { + _LIBCPP_CONSTEXPR_SINCE_CXX26 _LIBCPP_HIDE_FROM_ABI reference operator*() const { + return __ptr_->__as_node()->__get_value(); + } + _LIBCPP_CONSTEXPR_SINCE_CXX26 _LIBCPP_HIDE_FROM_ABI pointer operator->() const { return pointer_traits::pointer_to(__ptr_->__as_node()->__get_value()); } - _LIBCPP_HIDE_FROM_ABI __list_const_iterator& operator++() { + _LIBCPP_CONSTEXPR_SINCE_CXX26 _LIBCPP_HIDE_FROM_ABI __list_const_iterator& operator++() { __ptr_ = __ptr_->__next_; return *this; } - _LIBCPP_HIDE_FROM_ABI __list_const_iterator operator++(int) { + _LIBCPP_CONSTEXPR_SINCE_CXX26 _LIBCPP_HIDE_FROM_ABI __list_const_iterator operator++(int) { __list_const_iterator __t(*this); ++(*this); return __t; } - _LIBCPP_HIDE_FROM_ABI __list_const_iterator& operator--() { + _LIBCPP_CONSTEXPR_SINCE_CXX26 _LIBCPP_HIDE_FROM_ABI __list_const_iterator& operator--() { __ptr_ = __ptr_->__prev_; return *this; } - _LIBCPP_HIDE_FROM_ABI __list_const_iterator operator--(int) { + _LIBCPP_CONSTEXPR_SINCE_CXX26 _LIBCPP_HIDE_FROM_ABI __list_const_iterator operator--(int) { __list_const_iterator __t(*this); --(*this); return __t; } - friend _LIBCPP_HIDE_FROM_ABI bool operator==(const __list_const_iterator& __x, const __list_const_iterator& __y) { + friend _LIBCPP_CONSTEXPR_SINCE_CXX26 _LIBCPP_HIDE_FROM_ABI bool + operator==(const __list_const_iterator& __x, const __list_const_iterator& __y) { return __x.__ptr_ == __y.__ptr_; } - friend _LIBCPP_HIDE_FROM_ABI bool operator!=(const __list_const_iterator& __x, const __list_const_iterator& __y) { + friend _LIBCPP_CONSTEXPR_SINCE_CXX26 _LIBCPP_HIDE_FROM_ABI bool + operator!=(const __list_const_iterator& __x, const __list_const_iterator& __y) { return !(__x == __y); } }; @@ -497,43 +517,49 @@ protected: __node_base __end_; _LIBCPP_COMPRESSED_PAIR(size_type, __size_, __node_allocator, __node_alloc_); - _LIBCPP_HIDE_FROM_ABI __base_pointer __end_as_link() const _NOEXCEPT { - return const_cast<__node_base&>(__end_).__self(); + _LIBCPP_CONSTEXPR_SINCE_CXX26 _LIBCPP_HIDE_FROM_ABI __base_pointer __end_as_link() const _NOEXCEPT { + return pointer_traits<__base_pointer>::pointer_to(const_cast<__node_base&>(__end_)); } - _LIBCPP_HIDE_FROM_ABI size_type __node_alloc_max_size() const _NOEXCEPT { + _LIBCPP_CONSTEXPR_SINCE_CXX26 _LIBCPP_HIDE_FROM_ABI size_type __node_alloc_max_size() const _NOEXCEPT { return __node_alloc_traits::max_size(__node_alloc_); } - _LIBCPP_HIDE_FROM_ABI static void __unlink_nodes(__base_pointer __f, __base_pointer __l) _NOEXCEPT; + _LIBCPP_CONSTEXPR_SINCE_CXX26 _LIBCPP_HIDE_FROM_ABI static void + __unlink_nodes(__base_pointer __f, __base_pointer __l) _NOEXCEPT; - _LIBCPP_HIDE_FROM_ABI __list_imp() _NOEXCEPT_(is_nothrow_default_constructible<__node_allocator>::value); - _LIBCPP_HIDE_FROM_ABI __list_imp(const allocator_type& __a); - _LIBCPP_HIDE_FROM_ABI __list_imp(const __node_allocator& __a); + _LIBCPP_CONSTEXPR_SINCE_CXX26 _LIBCPP_HIDE_FROM_ABI __list_imp() + _NOEXCEPT_(is_nothrow_default_constructible<__node_allocator>::value); + _LIBCPP_CONSTEXPR_SINCE_CXX26 _LIBCPP_HIDE_FROM_ABI __list_imp(const allocator_type& __a); + _LIBCPP_CONSTEXPR_SINCE_CXX26 _LIBCPP_HIDE_FROM_ABI __list_imp(const __node_allocator& __a); # ifndef _LIBCPP_CXX03_LANG - _LIBCPP_HIDE_FROM_ABI __list_imp(__node_allocator&& __a) _NOEXCEPT; + _LIBCPP_CONSTEXPR_SINCE_CXX26 _LIBCPP_HIDE_FROM_ABI __list_imp(__node_allocator&& __a) _NOEXCEPT; # endif - _LIBCPP_HIDE_FROM_ABI ~__list_imp(); - _LIBCPP_HIDE_FROM_ABI void clear() _NOEXCEPT; - _LIBCPP_HIDE_FROM_ABI bool empty() const _NOEXCEPT { return __size_ == 0; } + _LIBCPP_CONSTEXPR_SINCE_CXX26 _LIBCPP_HIDE_FROM_ABI ~__list_imp(); + _LIBCPP_CONSTEXPR_SINCE_CXX26 _LIBCPP_HIDE_FROM_ABI void clear() _NOEXCEPT; + _LIBCPP_CONSTEXPR_SINCE_CXX26 _LIBCPP_HIDE_FROM_ABI bool empty() const _NOEXCEPT { return __size_ == 0; } - _LIBCPP_HIDE_FROM_ABI iterator begin() _NOEXCEPT { return iterator(__end_.__next_); } - _LIBCPP_HIDE_FROM_ABI const_iterator begin() const _NOEXCEPT { return const_iterator(__end_.__next_); } - _LIBCPP_HIDE_FROM_ABI iterator end() _NOEXCEPT { return iterator(__end_as_link()); } - _LIBCPP_HIDE_FROM_ABI const_iterator end() const _NOEXCEPT { return const_iterator(__end_as_link()); } + _LIBCPP_CONSTEXPR_SINCE_CXX26 _LIBCPP_HIDE_FROM_ABI iterator begin() _NOEXCEPT { return iterator(__end_.__next_); } + _LIBCPP_CONSTEXPR_SINCE_CXX26 _LIBCPP_HIDE_FROM_ABI const_iterator begin() const _NOEXCEPT { + return const_iterator(__end_.__next_); + } + _LIBCPP_CONSTEXPR_SINCE_CXX26 _LIBCPP_HIDE_FROM_ABI iterator end() _NOEXCEPT { return iterator(__end_as_link()); } + _LIBCPP_CONSTEXPR_SINCE_CXX26 _LIBCPP_HIDE_FROM_ABI const_iterator end() const _NOEXCEPT { + return const_iterator(__end_as_link()); + } - _LIBCPP_HIDE_FROM_ABI void swap(__list_imp& __c) + _LIBCPP_CONSTEXPR_SINCE_CXX26 _LIBCPP_HIDE_FROM_ABI void swap(__list_imp& __c) # if _LIBCPP_STD_VER >= 14 _NOEXCEPT; # else _NOEXCEPT_(!__alloc_traits::propagate_on_container_swap::value || __is_nothrow_swappable_v); # endif - _LIBCPP_HIDE_FROM_ABI void __copy_assign_alloc(const __list_imp& __c) { + _LIBCPP_CONSTEXPR_SINCE_CXX26 _LIBCPP_HIDE_FROM_ABI void __copy_assign_alloc(const __list_imp& __c) { __copy_assign_alloc( __c, integral_constant()); } - _LIBCPP_HIDE_FROM_ABI void __move_assign_alloc(__list_imp& __c) + _LIBCPP_CONSTEXPR_SINCE_CXX26 _LIBCPP_HIDE_FROM_ABI void __move_assign_alloc(__list_imp& __c) _NOEXCEPT_(!__node_alloc_traits::propagate_on_container_move_assignment::value || is_nothrow_move_assignable<__node_allocator>::value) { __move_assign_alloc( @@ -541,7 +567,8 @@ protected: } template - _LIBCPP_HIDE_FROM_ABI __node_pointer __create_node(__base_pointer __prev, __base_pointer __next, _Args&&... __args) { + _LIBCPP_CONSTEXPR_SINCE_CXX26 _LIBCPP_HIDE_FROM_ABI __node_pointer + __create_node(__base_pointer __prev, __base_pointer __next, _Args&&... __args) { __allocation_guard<__node_allocator> __guard(__node_alloc_, 1); // Begin the lifetime of the node itself. Note that this doesn't begin the lifetime of the value // held inside the node, since we need to use the allocator's construct() method for that. @@ -557,7 +584,7 @@ protected: return __guard.__release_ptr(); } - _LIBCPP_HIDE_FROM_ABI void __delete_node(__node_pointer __node) { + _LIBCPP_CONSTEXPR_SINCE_CXX26 _LIBCPP_HIDE_FROM_ABI void __delete_node(__node_pointer __node) { // For the same reason as above, we use the allocator's destroy() method for the value_type, // but not for the node itself. __node_alloc_traits::destroy(__node_alloc_, std::addressof(__node->__get_value())); @@ -566,54 +593,57 @@ protected: } private: - _LIBCPP_HIDE_FROM_ABI void __copy_assign_alloc(const __list_imp& __c, true_type) { + _LIBCPP_CONSTEXPR_SINCE_CXX26 _LIBCPP_HIDE_FROM_ABI void __copy_assign_alloc(const __list_imp& __c, true_type) { if (__node_alloc_ != __c.__node_alloc_) clear(); __node_alloc_ = __c.__node_alloc_; } - _LIBCPP_HIDE_FROM_ABI void __copy_assign_alloc(const __list_imp&, false_type) {} + _LIBCPP_CONSTEXPR_SINCE_CXX26 _LIBCPP_HIDE_FROM_ABI void __copy_assign_alloc(const __list_imp&, false_type) {} - _LIBCPP_HIDE_FROM_ABI void __move_assign_alloc(__list_imp& __c, true_type) + _LIBCPP_CONSTEXPR_SINCE_CXX26 _LIBCPP_HIDE_FROM_ABI void __move_assign_alloc(__list_imp& __c, true_type) _NOEXCEPT_(is_nothrow_move_assignable<__node_allocator>::value) { __node_alloc_ = std::move(__c.__node_alloc_); } - _LIBCPP_HIDE_FROM_ABI void __move_assign_alloc(__list_imp&, false_type) _NOEXCEPT {} + _LIBCPP_CONSTEXPR_SINCE_CXX26 _LIBCPP_HIDE_FROM_ABI void __move_assign_alloc(__list_imp&, false_type) _NOEXCEPT {} }; // Unlink nodes [__f, __l] template -inline void __list_imp<_Tp, _Alloc>::__unlink_nodes(__base_pointer __f, __base_pointer __l) _NOEXCEPT { +_LIBCPP_CONSTEXPR_SINCE_CXX26 inline void +__list_imp<_Tp, _Alloc>::__unlink_nodes(__base_pointer __f, __base_pointer __l) _NOEXCEPT { __f->__prev_->__next_ = __l->__next_; __l->__next_->__prev_ = __f->__prev_; } template -inline __list_imp<_Tp, _Alloc>::__list_imp() _NOEXCEPT_(is_nothrow_default_constructible<__node_allocator>::value) +_LIBCPP_CONSTEXPR_SINCE_CXX26 inline __list_imp<_Tp, _Alloc>::__list_imp() + _NOEXCEPT_(is_nothrow_default_constructible<__node_allocator>::value) : __size_(0) {} template -inline __list_imp<_Tp, _Alloc>::__list_imp(const allocator_type& __a) +_LIBCPP_CONSTEXPR_SINCE_CXX26 inline __list_imp<_Tp, _Alloc>::__list_imp(const allocator_type& __a) : __size_(0), __node_alloc_(__node_allocator(__a)) {} template -inline __list_imp<_Tp, _Alloc>::__list_imp(const __node_allocator& __a) : __size_(0), __node_alloc_(__a) {} +_LIBCPP_CONSTEXPR_SINCE_CXX26 inline __list_imp<_Tp, _Alloc>::__list_imp(const __node_allocator& __a) + : __size_(0), __node_alloc_(__a) {} # ifndef _LIBCPP_CXX03_LANG template -inline __list_imp<_Tp, _Alloc>::__list_imp(__node_allocator&& __a) _NOEXCEPT +_LIBCPP_CONSTEXPR_SINCE_CXX26 inline __list_imp<_Tp, _Alloc>::__list_imp(__node_allocator&& __a) _NOEXCEPT : __size_(0), __node_alloc_(std::move(__a)) {} # endif template -__list_imp<_Tp, _Alloc>::~__list_imp() { +_LIBCPP_CONSTEXPR_SINCE_CXX26 __list_imp<_Tp, _Alloc>::~__list_imp() { clear(); } template -void __list_imp<_Tp, _Alloc>::clear() _NOEXCEPT { +_LIBCPP_CONSTEXPR_SINCE_CXX26 void __list_imp<_Tp, _Alloc>::clear() _NOEXCEPT { if (!empty()) { __base_pointer __f = __end_.__next_; __base_pointer __l = __end_as_link(); @@ -628,7 +658,7 @@ void __list_imp<_Tp, _Alloc>::clear() _NOEXCEPT { } template -void __list_imp<_Tp, _Alloc>::swap(__list_imp& __c) +_LIBCPP_CONSTEXPR_SINCE_CXX26 void __list_imp<_Tp, _Alloc>::swap(__list_imp& __c) # if _LIBCPP_STD_VER >= 14 _NOEXCEPT # else @@ -686,170 +716,204 @@ public: typedef void __remove_return_type; # endif - _LIBCPP_HIDE_FROM_ABI list() _NOEXCEPT_(is_nothrow_default_constructible<__node_allocator>::value) {} - _LIBCPP_HIDE_FROM_ABI explicit list(const allocator_type& __a) : __base(__a) {} - _LIBCPP_HIDE_FROM_ABI explicit list(size_type __n); + _LIBCPP_CONSTEXPR_SINCE_CXX26 _LIBCPP_HIDE_FROM_ABI list() + _NOEXCEPT_(is_nothrow_default_constructible<__node_allocator>::value) {} + _LIBCPP_CONSTEXPR_SINCE_CXX26 _LIBCPP_HIDE_FROM_ABI explicit list(const allocator_type& __a) : __base(__a) {} + _LIBCPP_CONSTEXPR_SINCE_CXX26 _LIBCPP_HIDE_FROM_ABI explicit list(size_type __n); # if _LIBCPP_STD_VER >= 14 - _LIBCPP_HIDE_FROM_ABI explicit list(size_type __n, const allocator_type& __a); + _LIBCPP_CONSTEXPR_SINCE_CXX26 _LIBCPP_HIDE_FROM_ABI explicit list(size_type __n, const allocator_type& __a); # endif - _LIBCPP_HIDE_FROM_ABI list(size_type __n, const value_type& __x); + _LIBCPP_CONSTEXPR_SINCE_CXX26 _LIBCPP_HIDE_FROM_ABI list(size_type __n, const value_type& __x); template <__enable_if_t<__is_allocator<_Alloc>::value, int> = 0> - _LIBCPP_HIDE_FROM_ABI list(size_type __n, const value_type& __x, const allocator_type& __a) : __base(__a) { + _LIBCPP_CONSTEXPR_SINCE_CXX26 _LIBCPP_HIDE_FROM_ABI + list(size_type __n, const value_type& __x, const allocator_type& __a) + : __base(__a) { for (; __n > 0; --__n) push_back(__x); } template ::value, int> = 0> - _LIBCPP_HIDE_FROM_ABI list(_InpIter __f, _InpIter __l); + _LIBCPP_CONSTEXPR_SINCE_CXX26 _LIBCPP_HIDE_FROM_ABI list(_InpIter __f, _InpIter __l); template ::value, int> = 0> - _LIBCPP_HIDE_FROM_ABI list(_InpIter __f, _InpIter __l, const allocator_type& __a); + _LIBCPP_CONSTEXPR_SINCE_CXX26 _LIBCPP_HIDE_FROM_ABI list(_InpIter __f, _InpIter __l, const allocator_type& __a); # if _LIBCPP_STD_VER >= 23 template <_ContainerCompatibleRange<_Tp> _Range> - _LIBCPP_HIDE_FROM_ABI list(from_range_t, _Range&& __range, const allocator_type& __a = allocator_type()) + _LIBCPP_CONSTEXPR_SINCE_CXX26 _LIBCPP_HIDE_FROM_ABI + list(from_range_t, _Range&& __range, const allocator_type& __a = allocator_type()) : __base(__a) { prepend_range(std::forward<_Range>(__range)); } # endif - _LIBCPP_HIDE_FROM_ABI list(const list& __c); - _LIBCPP_HIDE_FROM_ABI list(const list& __c, const __type_identity_t& __a); - _LIBCPP_HIDE_FROM_ABI list& operator=(const list& __c); + _LIBCPP_CONSTEXPR_SINCE_CXX26 _LIBCPP_HIDE_FROM_ABI list(const list& __c); + _LIBCPP_CONSTEXPR_SINCE_CXX26 _LIBCPP_HIDE_FROM_ABI + list(const list& __c, const __type_identity_t& __a); + _LIBCPP_CONSTEXPR_SINCE_CXX26 _LIBCPP_HIDE_FROM_ABI list& operator=(const list& __c); # ifndef _LIBCPP_CXX03_LANG - _LIBCPP_HIDE_FROM_ABI list(initializer_list __il); - _LIBCPP_HIDE_FROM_ABI list(initializer_list __il, const allocator_type& __a); - - _LIBCPP_HIDE_FROM_ABI list(list&& __c) _NOEXCEPT_(is_nothrow_move_constructible<__node_allocator>::value); - _LIBCPP_HIDE_FROM_ABI list(list&& __c, const __type_identity_t& __a); - _LIBCPP_HIDE_FROM_ABI list& operator=(list&& __c) noexcept( + _LIBCPP_CONSTEXPR_SINCE_CXX26 _LIBCPP_HIDE_FROM_ABI list(initializer_list __il); + _LIBCPP_CONSTEXPR_SINCE_CXX26 _LIBCPP_HIDE_FROM_ABI + list(initializer_list __il, const allocator_type& __a); + + _LIBCPP_CONSTEXPR_SINCE_CXX26 _LIBCPP_HIDE_FROM_ABI list(list&& __c) + _NOEXCEPT_(is_nothrow_move_constructible<__node_allocator>::value); + _LIBCPP_CONSTEXPR_SINCE_CXX26 _LIBCPP_HIDE_FROM_ABI list(list&& __c, const __type_identity_t& __a); + _LIBCPP_CONSTEXPR_SINCE_CXX26 _LIBCPP_HIDE_FROM_ABI list& operator=(list&& __c) noexcept( (__node_alloc_traits::propagate_on_container_move_assignment::value && is_nothrow_move_assignable<__node_allocator>::value) || allocator_traits::is_always_equal::value); - _LIBCPP_HIDE_FROM_ABI list& operator=(initializer_list __il) { + _LIBCPP_CONSTEXPR_SINCE_CXX26 _LIBCPP_HIDE_FROM_ABI list& operator=(initializer_list __il) { assign(__il.begin(), __il.end()); return *this; } - _LIBCPP_HIDE_FROM_ABI void assign(initializer_list __il) { assign(__il.begin(), __il.end()); } + _LIBCPP_CONSTEXPR_SINCE_CXX26 _LIBCPP_HIDE_FROM_ABI void assign(initializer_list __il) { + assign(__il.begin(), __il.end()); + } # endif // _LIBCPP_CXX03_LANG template ::value, int> = 0> - _LIBCPP_HIDE_FROM_ABI void assign(_InpIter __f, _InpIter __l); + _LIBCPP_CONSTEXPR_SINCE_CXX26 _LIBCPP_HIDE_FROM_ABI void assign(_InpIter __f, _InpIter __l); # if _LIBCPP_STD_VER >= 23 template <_ContainerCompatibleRange<_Tp> _Range> - _LIBCPP_HIDE_FROM_ABI void assign_range(_Range&& __range) { + _LIBCPP_CONSTEXPR_SINCE_CXX26 _LIBCPP_HIDE_FROM_ABI void assign_range(_Range&& __range) { __assign_with_sentinel(ranges::begin(__range), ranges::end(__range)); } # endif - _LIBCPP_HIDE_FROM_ABI void assign(size_type __n, const value_type& __x); + _LIBCPP_CONSTEXPR_SINCE_CXX26 _LIBCPP_HIDE_FROM_ABI void assign(size_type __n, const value_type& __x); - _LIBCPP_HIDE_FROM_ABI allocator_type get_allocator() const _NOEXCEPT; + _LIBCPP_CONSTEXPR_SINCE_CXX26 _LIBCPP_HIDE_FROM_ABI allocator_type get_allocator() const _NOEXCEPT; - _LIBCPP_HIDE_FROM_ABI size_type size() const _NOEXCEPT { return this->__size_; } - [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI bool empty() const _NOEXCEPT { return __base::empty(); } - _LIBCPP_HIDE_FROM_ABI size_type max_size() const _NOEXCEPT { + _LIBCPP_CONSTEXPR_SINCE_CXX26 _LIBCPP_HIDE_FROM_ABI size_type size() const _NOEXCEPT { return this->__size_; } + [[__nodiscard__]] _LIBCPP_CONSTEXPR_SINCE_CXX26 _LIBCPP_HIDE_FROM_ABI bool empty() const _NOEXCEPT { + return __base::empty(); + } + _LIBCPP_CONSTEXPR_SINCE_CXX26 _LIBCPP_HIDE_FROM_ABI size_type max_size() const _NOEXCEPT { return std::min(this->__node_alloc_max_size(), numeric_limits::max()); } - _LIBCPP_HIDE_FROM_ABI iterator begin() _NOEXCEPT { return __base::begin(); } - _LIBCPP_HIDE_FROM_ABI const_iterator begin() const _NOEXCEPT { return __base::begin(); } - _LIBCPP_HIDE_FROM_ABI iterator end() _NOEXCEPT { return __base::end(); } - _LIBCPP_HIDE_FROM_ABI const_iterator end() const _NOEXCEPT { return __base::end(); } - _LIBCPP_HIDE_FROM_ABI const_iterator cbegin() const _NOEXCEPT { return __base::begin(); } - _LIBCPP_HIDE_FROM_ABI const_iterator cend() const _NOEXCEPT { return __base::end(); } + _LIBCPP_CONSTEXPR_SINCE_CXX26 _LIBCPP_HIDE_FROM_ABI iterator begin() _NOEXCEPT { return __base::begin(); } + _LIBCPP_CONSTEXPR_SINCE_CXX26 _LIBCPP_HIDE_FROM_ABI const_iterator begin() const _NOEXCEPT { return __base::begin(); } + _LIBCPP_CONSTEXPR_SINCE_CXX26 _LIBCPP_HIDE_FROM_ABI iterator end() _NOEXCEPT { return __base::end(); } + _LIBCPP_CONSTEXPR_SINCE_CXX26 _LIBCPP_HIDE_FROM_ABI const_iterator end() const _NOEXCEPT { return __base::end(); } + _LIBCPP_CONSTEXPR_SINCE_CXX26 _LIBCPP_HIDE_FROM_ABI const_iterator cbegin() const _NOEXCEPT { + return __base::begin(); + } + _LIBCPP_CONSTEXPR_SINCE_CXX26 _LIBCPP_HIDE_FROM_ABI const_iterator cend() const _NOEXCEPT { return __base::end(); } - _LIBCPP_HIDE_FROM_ABI reverse_iterator rbegin() _NOEXCEPT { return reverse_iterator(end()); } - _LIBCPP_HIDE_FROM_ABI const_reverse_iterator rbegin() const _NOEXCEPT { return const_reverse_iterator(end()); } - _LIBCPP_HIDE_FROM_ABI reverse_iterator rend() _NOEXCEPT { return reverse_iterator(begin()); } - _LIBCPP_HIDE_FROM_ABI const_reverse_iterator rend() const _NOEXCEPT { return const_reverse_iterator(begin()); } - _LIBCPP_HIDE_FROM_ABI const_reverse_iterator crbegin() const _NOEXCEPT { return const_reverse_iterator(end()); } - _LIBCPP_HIDE_FROM_ABI const_reverse_iterator crend() const _NOEXCEPT { return const_reverse_iterator(begin()); } + _LIBCPP_CONSTEXPR_SINCE_CXX26 _LIBCPP_HIDE_FROM_ABI reverse_iterator rbegin() _NOEXCEPT { + return reverse_iterator(end()); + } + _LIBCPP_CONSTEXPR_SINCE_CXX26 _LIBCPP_HIDE_FROM_ABI const_reverse_iterator rbegin() const _NOEXCEPT { + return const_reverse_iterator(end()); + } + _LIBCPP_CONSTEXPR_SINCE_CXX26 _LIBCPP_HIDE_FROM_ABI reverse_iterator rend() _NOEXCEPT { + return reverse_iterator(begin()); + } + _LIBCPP_CONSTEXPR_SINCE_CXX26 _LIBCPP_HIDE_FROM_ABI const_reverse_iterator rend() const _NOEXCEPT { + return const_reverse_iterator(begin()); + } + _LIBCPP_CONSTEXPR_SINCE_CXX26 _LIBCPP_HIDE_FROM_ABI const_reverse_iterator crbegin() const _NOEXCEPT { + return const_reverse_iterator(end()); + } + _LIBCPP_CONSTEXPR_SINCE_CXX26 _LIBCPP_HIDE_FROM_ABI const_reverse_iterator crend() const _NOEXCEPT { + return const_reverse_iterator(begin()); + } - _LIBCPP_HIDE_FROM_ABI reference front() { + _LIBCPP_CONSTEXPR_SINCE_CXX26 _LIBCPP_HIDE_FROM_ABI reference front() { _LIBCPP_ASSERT_VALID_ELEMENT_ACCESS(!empty(), "list::front called on empty list"); return __base::__end_.__next_->__as_node()->__get_value(); } - _LIBCPP_HIDE_FROM_ABI const_reference front() const { + _LIBCPP_CONSTEXPR_SINCE_CXX26 _LIBCPP_HIDE_FROM_ABI const_reference front() const { _LIBCPP_ASSERT_VALID_ELEMENT_ACCESS(!empty(), "list::front called on empty list"); return __base::__end_.__next_->__as_node()->__get_value(); } - _LIBCPP_HIDE_FROM_ABI reference back() { + _LIBCPP_CONSTEXPR_SINCE_CXX26 _LIBCPP_HIDE_FROM_ABI reference back() { _LIBCPP_ASSERT_VALID_ELEMENT_ACCESS(!empty(), "list::back called on empty list"); return __base::__end_.__prev_->__as_node()->__get_value(); } - _LIBCPP_HIDE_FROM_ABI const_reference back() const { + _LIBCPP_CONSTEXPR_SINCE_CXX26 _LIBCPP_HIDE_FROM_ABI const_reference back() const { _LIBCPP_ASSERT_VALID_ELEMENT_ACCESS(!empty(), "list::back called on empty list"); return __base::__end_.__prev_->__as_node()->__get_value(); } # ifndef _LIBCPP_CXX03_LANG - _LIBCPP_HIDE_FROM_ABI void push_front(value_type&& __x); - _LIBCPP_HIDE_FROM_ABI void push_back(value_type&& __x); + _LIBCPP_CONSTEXPR_SINCE_CXX26 _LIBCPP_HIDE_FROM_ABI void push_front(value_type&& __x); + _LIBCPP_CONSTEXPR_SINCE_CXX26 _LIBCPP_HIDE_FROM_ABI void push_back(value_type&& __x); # if _LIBCPP_STD_VER >= 23 template <_ContainerCompatibleRange<_Tp> _Range> - _LIBCPP_HIDE_FROM_ABI void prepend_range(_Range&& __range) { + _LIBCPP_CONSTEXPR_SINCE_CXX26 _LIBCPP_HIDE_FROM_ABI void prepend_range(_Range&& __range) { insert_range(begin(), std::forward<_Range>(__range)); } template <_ContainerCompatibleRange<_Tp> _Range> - _LIBCPP_HIDE_FROM_ABI void append_range(_Range&& __range) { + _LIBCPP_CONSTEXPR_SINCE_CXX26 _LIBCPP_HIDE_FROM_ABI void append_range(_Range&& __range) { insert_range(end(), std::forward<_Range>(__range)); } # endif template + _LIBCPP_CONSTEXPR_SINCE_CXX26 # if _LIBCPP_STD_VER >= 17 - _LIBCPP_HIDE_FROM_ABI reference emplace_front(_Args&&... __args); + _LIBCPP_HIDE_FROM_ABI reference + emplace_front(_Args&&... __args); # else - _LIBCPP_HIDE_FROM_ABI void emplace_front(_Args&&... __args); + _LIBCPP_HIDE_FROM_ABI void + emplace_front(_Args&&... __args); # endif template + _LIBCPP_CONSTEXPR_SINCE_CXX26 # if _LIBCPP_STD_VER >= 17 - _LIBCPP_HIDE_FROM_ABI reference emplace_back(_Args&&... __args); + _LIBCPP_HIDE_FROM_ABI reference + emplace_back(_Args&&... __args); # else - _LIBCPP_HIDE_FROM_ABI void emplace_back(_Args&&... __args); + _LIBCPP_HIDE_FROM_ABI void + emplace_back(_Args&&... __args); # endif template - _LIBCPP_HIDE_FROM_ABI iterator emplace(const_iterator __p, _Args&&... __args); + _LIBCPP_CONSTEXPR_SINCE_CXX26 _LIBCPP_HIDE_FROM_ABI iterator emplace(const_iterator __p, _Args&&... __args); - _LIBCPP_HIDE_FROM_ABI iterator insert(const_iterator __p, value_type&& __x); + _LIBCPP_CONSTEXPR_SINCE_CXX26 _LIBCPP_HIDE_FROM_ABI iterator insert(const_iterator __p, value_type&& __x); - _LIBCPP_HIDE_FROM_ABI iterator insert(const_iterator __p, initializer_list __il) { + _LIBCPP_CONSTEXPR_SINCE_CXX26 _LIBCPP_HIDE_FROM_ABI iterator + insert(const_iterator __p, initializer_list __il) { return insert(__p, __il.begin(), __il.end()); } # endif // _LIBCPP_CXX03_LANG - _LIBCPP_HIDE_FROM_ABI void push_front(const value_type& __x); - _LIBCPP_HIDE_FROM_ABI void push_back(const value_type& __x); + _LIBCPP_CONSTEXPR_SINCE_CXX26 _LIBCPP_HIDE_FROM_ABI void push_front(const value_type& __x); + _LIBCPP_CONSTEXPR_SINCE_CXX26 _LIBCPP_HIDE_FROM_ABI void push_back(const value_type& __x); # ifndef _LIBCPP_CXX03_LANG template - _LIBCPP_HIDE_FROM_ABI void __emplace_back(_Arg&& __arg) { + _LIBCPP_CONSTEXPR_SINCE_CXX26 _LIBCPP_HIDE_FROM_ABI void __emplace_back(_Arg&& __arg) { emplace_back(std::forward<_Arg>(__arg)); } # else _LIBCPP_HIDE_FROM_ABI void __emplace_back(value_type const& __arg) { push_back(__arg); } # endif - _LIBCPP_HIDE_FROM_ABI iterator insert(const_iterator __p, const value_type& __x); - _LIBCPP_HIDE_FROM_ABI iterator insert(const_iterator __p, size_type __n, const value_type& __x); + _LIBCPP_CONSTEXPR_SINCE_CXX26 _LIBCPP_HIDE_FROM_ABI iterator insert(const_iterator __p, const value_type& __x); + _LIBCPP_CONSTEXPR_SINCE_CXX26 _LIBCPP_HIDE_FROM_ABI iterator + insert(const_iterator __p, size_type __n, const value_type& __x); template ::value, int> = 0> - _LIBCPP_HIDE_FROM_ABI iterator insert(const_iterator __p, _InpIter __f, _InpIter __l); + _LIBCPP_CONSTEXPR_SINCE_CXX26 _LIBCPP_HIDE_FROM_ABI iterator insert(const_iterator __p, _InpIter __f, _InpIter __l); # if _LIBCPP_STD_VER >= 23 template <_ContainerCompatibleRange<_Tp> _Range> - _LIBCPP_HIDE_FROM_ABI iterator insert_range(const_iterator __position, _Range&& __range) { + _LIBCPP_CONSTEXPR_SINCE_CXX26 _LIBCPP_HIDE_FROM_ABI iterator + insert_range(const_iterator __position, _Range&& __range) { return __insert_with_sentinel(__position, ranges::begin(__range), ranges::end(__range)); } # endif - _LIBCPP_HIDE_FROM_ABI void swap(list& __c) + _LIBCPP_CONSTEXPR_SINCE_CXX26 _LIBCPP_HIDE_FROM_ABI void swap(list& __c) # if _LIBCPP_STD_VER >= 14 _NOEXCEPT # else @@ -858,72 +922,80 @@ public: { __base::swap(__c); } - _LIBCPP_HIDE_FROM_ABI void clear() _NOEXCEPT { __base::clear(); } + _LIBCPP_CONSTEXPR_SINCE_CXX26 _LIBCPP_HIDE_FROM_ABI void clear() _NOEXCEPT { __base::clear(); } - _LIBCPP_HIDE_FROM_ABI void pop_front(); - _LIBCPP_HIDE_FROM_ABI void pop_back(); + _LIBCPP_CONSTEXPR_SINCE_CXX26 _LIBCPP_HIDE_FROM_ABI void pop_front(); + _LIBCPP_CONSTEXPR_SINCE_CXX26 _LIBCPP_HIDE_FROM_ABI void pop_back(); - _LIBCPP_HIDE_FROM_ABI iterator erase(const_iterator __p); - _LIBCPP_HIDE_FROM_ABI iterator erase(const_iterator __f, const_iterator __l); + _LIBCPP_CONSTEXPR_SINCE_CXX26 _LIBCPP_HIDE_FROM_ABI iterator erase(const_iterator __p); + _LIBCPP_CONSTEXPR_SINCE_CXX26 _LIBCPP_HIDE_FROM_ABI iterator erase(const_iterator __f, const_iterator __l); - _LIBCPP_HIDE_FROM_ABI void resize(size_type __n); - _LIBCPP_HIDE_FROM_ABI void resize(size_type __n, const value_type& __x); + _LIBCPP_CONSTEXPR_SINCE_CXX26 _LIBCPP_HIDE_FROM_ABI void resize(size_type __n); + _LIBCPP_CONSTEXPR_SINCE_CXX26 _LIBCPP_HIDE_FROM_ABI void resize(size_type __n, const value_type& __x); - _LIBCPP_HIDE_FROM_ABI void splice(const_iterator __p, list& __c); + _LIBCPP_CONSTEXPR_SINCE_CXX26 _LIBCPP_HIDE_FROM_ABI void splice(const_iterator __p, list& __c); # ifndef _LIBCPP_CXX03_LANG - _LIBCPP_HIDE_FROM_ABI void splice(const_iterator __p, list&& __c) { splice(__p, __c); } - _LIBCPP_HIDE_FROM_ABI void splice(const_iterator __p, list&& __c, const_iterator __i) { splice(__p, __c, __i); } - _LIBCPP_HIDE_FROM_ABI void splice(const_iterator __p, list&& __c, const_iterator __f, const_iterator __l) { + _LIBCPP_CONSTEXPR_SINCE_CXX26 _LIBCPP_HIDE_FROM_ABI void splice(const_iterator __p, list&& __c) { splice(__p, __c); } + _LIBCPP_CONSTEXPR_SINCE_CXX26 _LIBCPP_HIDE_FROM_ABI void splice(const_iterator __p, list&& __c, const_iterator __i) { + splice(__p, __c, __i); + } + _LIBCPP_CONSTEXPR_SINCE_CXX26 _LIBCPP_HIDE_FROM_ABI void + splice(const_iterator __p, list&& __c, const_iterator __f, const_iterator __l) { splice(__p, __c, __f, __l); } # endif - _LIBCPP_HIDE_FROM_ABI void splice(const_iterator __p, list& __c, const_iterator __i); - _LIBCPP_HIDE_FROM_ABI void splice(const_iterator __p, list& __c, const_iterator __f, const_iterator __l); + _LIBCPP_CONSTEXPR_SINCE_CXX26 _LIBCPP_HIDE_FROM_ABI void splice(const_iterator __p, list& __c, const_iterator __i); + _LIBCPP_CONSTEXPR_SINCE_CXX26 _LIBCPP_HIDE_FROM_ABI void + splice(const_iterator __p, list& __c, const_iterator __f, const_iterator __l); - _LIBCPP_HIDE_FROM_ABI __remove_return_type remove(const value_type& __x); + _LIBCPP_CONSTEXPR_SINCE_CXX26 _LIBCPP_HIDE_FROM_ABI __remove_return_type remove(const value_type& __x); template - _LIBCPP_HIDE_FROM_ABI __remove_return_type remove_if(_Pred __pred); - _LIBCPP_HIDE_FROM_ABI __remove_return_type unique() { return unique(__equal_to()); } + _LIBCPP_CONSTEXPR_SINCE_CXX26 _LIBCPP_HIDE_FROM_ABI __remove_return_type remove_if(_Pred __pred); + _LIBCPP_CONSTEXPR_SINCE_CXX26 _LIBCPP_HIDE_FROM_ABI __remove_return_type unique() { return unique(__equal_to()); } template - _LIBCPP_HIDE_FROM_ABI __remove_return_type unique(_BinaryPred __binary_pred); - _LIBCPP_HIDE_FROM_ABI void merge(list& __c); + _LIBCPP_CONSTEXPR_SINCE_CXX26 _LIBCPP_HIDE_FROM_ABI __remove_return_type unique(_BinaryPred __binary_pred); + _LIBCPP_CONSTEXPR_SINCE_CXX26 _LIBCPP_HIDE_FROM_ABI void merge(list& __c); # ifndef _LIBCPP_CXX03_LANG - _LIBCPP_HIDE_FROM_ABI void merge(list&& __c) { merge(__c); } + _LIBCPP_CONSTEXPR_SINCE_CXX26 _LIBCPP_HIDE_FROM_ABI void merge(list&& __c) { merge(__c); } template - _LIBCPP_HIDE_FROM_ABI void merge(list&& __c, _Comp __comp) { + _LIBCPP_CONSTEXPR_SINCE_CXX26 _LIBCPP_HIDE_FROM_ABI void merge(list&& __c, _Comp __comp) { merge(__c, __comp); } # endif template - _LIBCPP_HIDE_FROM_ABI void merge(list& __c, _Comp __comp); + _LIBCPP_CONSTEXPR_SINCE_CXX26 _LIBCPP_HIDE_FROM_ABI void merge(list& __c, _Comp __comp); - _LIBCPP_HIDE_FROM_ABI void sort(); + _LIBCPP_CONSTEXPR_SINCE_CXX26 _LIBCPP_HIDE_FROM_ABI void sort(); template - _LIBCPP_HIDE_FROM_ABI void sort(_Comp __comp); + _LIBCPP_CONSTEXPR_SINCE_CXX26 _LIBCPP_HIDE_FROM_ABI void sort(_Comp __comp); - _LIBCPP_HIDE_FROM_ABI void reverse() _NOEXCEPT; + _LIBCPP_CONSTEXPR_SINCE_CXX26 _LIBCPP_HIDE_FROM_ABI void reverse() _NOEXCEPT; - _LIBCPP_HIDE_FROM_ABI bool __invariants() const; + _LIBCPP_CONSTEXPR_SINCE_CXX26 _LIBCPP_HIDE_FROM_ABI bool __invariants() const; private: template - _LIBCPP_HIDE_FROM_ABI void __assign_with_sentinel(_Iterator __f, _Sentinel __l); + _LIBCPP_CONSTEXPR_SINCE_CXX26 _LIBCPP_HIDE_FROM_ABI void __assign_with_sentinel(_Iterator __f, _Sentinel __l); template - _LIBCPP_HIDE_FROM_ABI iterator __insert_with_sentinel(const_iterator __p, _Iterator __f, _Sentinel __l); - - _LIBCPP_HIDE_FROM_ABI static void __link_nodes(__base_pointer __p, __base_pointer __f, __base_pointer __l); - _LIBCPP_HIDE_FROM_ABI void __link_nodes_at_front(__base_pointer __f, __base_pointer __l); - _LIBCPP_HIDE_FROM_ABI void __link_nodes_at_back(__base_pointer __f, __base_pointer __l); - _LIBCPP_HIDE_FROM_ABI iterator __iterator(size_type __n); + _LIBCPP_CONSTEXPR_SINCE_CXX26 _LIBCPP_HIDE_FROM_ABI iterator + __insert_with_sentinel(const_iterator __p, _Iterator __f, _Sentinel __l); + + _LIBCPP_CONSTEXPR_SINCE_CXX26 _LIBCPP_HIDE_FROM_ABI static void + __link_nodes(__base_pointer __p, __base_pointer __f, __base_pointer __l); + _LIBCPP_CONSTEXPR_SINCE_CXX26 _LIBCPP_HIDE_FROM_ABI void + __link_nodes_at_front(__base_pointer __f, __base_pointer __l); + _LIBCPP_CONSTEXPR_SINCE_CXX26 _LIBCPP_HIDE_FROM_ABI void __link_nodes_at_back(__base_pointer __f, __base_pointer __l); + _LIBCPP_CONSTEXPR_SINCE_CXX26 _LIBCPP_HIDE_FROM_ABI iterator __iterator(size_type __n); // TODO: Make this _LIBCPP_HIDE_FROM_ABI template - _LIBCPP_HIDDEN static iterator __sort(iterator __f1, iterator __e2, size_type __n, _Comp& __comp); + _LIBCPP_CONSTEXPR_SINCE_CXX26 _LIBCPP_HIDDEN static iterator + __sort(iterator __f1, iterator __e2, size_type __n, _Comp& __comp); - _LIBCPP_HIDE_FROM_ABI void __move_assign(list& __c, true_type) + _LIBCPP_CONSTEXPR_SINCE_CXX26 _LIBCPP_HIDE_FROM_ABI void __move_assign(list& __c, true_type) _NOEXCEPT_(is_nothrow_move_assignable<__node_allocator>::value); - _LIBCPP_HIDE_FROM_ABI void __move_assign(list& __c, false_type); + _LIBCPP_CONSTEXPR_SINCE_CXX26 _LIBCPP_HIDE_FROM_ABI void __move_assign(list& __c, false_type); }; # if _LIBCPP_STD_VER >= 17 @@ -949,7 +1021,8 @@ list(from_range_t, _Range&&, _Alloc = _Alloc()) -> list -inline void list<_Tp, _Alloc>::__link_nodes(__base_pointer __p, __base_pointer __f, __base_pointer __l) { +_LIBCPP_CONSTEXPR_SINCE_CXX26 inline void +list<_Tp, _Alloc>::__link_nodes(__base_pointer __p, __base_pointer __f, __base_pointer __l) { __p->__prev_->__next_ = __f; __f->__prev_ = __p->__prev_; __p->__prev_ = __l; @@ -958,7 +1031,8 @@ inline void list<_Tp, _Alloc>::__link_nodes(__base_pointer __p, __base_pointer _ // Link in nodes [__f, __l] at the front of the list template -inline void list<_Tp, _Alloc>::__link_nodes_at_front(__base_pointer __f, __base_pointer __l) { +_LIBCPP_CONSTEXPR_SINCE_CXX26 inline void +list<_Tp, _Alloc>::__link_nodes_at_front(__base_pointer __f, __base_pointer __l) { __f->__prev_ = __base::__end_as_link(); __l->__next_ = __base::__end_.__next_; __l->__next_->__prev_ = __l; @@ -967,7 +1041,8 @@ inline void list<_Tp, _Alloc>::__link_nodes_at_front(__base_pointer __f, __base_ // Link in nodes [__f, __l] at the back of the list template -inline void list<_Tp, _Alloc>::__link_nodes_at_back(__base_pointer __f, __base_pointer __l) { +_LIBCPP_CONSTEXPR_SINCE_CXX26 inline void +list<_Tp, _Alloc>::__link_nodes_at_back(__base_pointer __f, __base_pointer __l) { __l->__next_ = __base::__end_as_link(); __f->__prev_ = __base::__end_.__prev_; __f->__prev_->__next_ = __f; @@ -975,12 +1050,12 @@ inline void list<_Tp, _Alloc>::__link_nodes_at_back(__base_pointer __f, __base_p } template -inline typename list<_Tp, _Alloc>::iterator list<_Tp, _Alloc>::__iterator(size_type __n) { +_LIBCPP_CONSTEXPR_SINCE_CXX26 inline typename list<_Tp, _Alloc>::iterator list<_Tp, _Alloc>::__iterator(size_type __n) { return __n <= this->__size_ / 2 ? std::next(begin(), __n) : std::prev(end(), this->__size_ - __n); } template -list<_Tp, _Alloc>::list(size_type __n) { +_LIBCPP_CONSTEXPR_SINCE_CXX26 list<_Tp, _Alloc>::list(size_type __n) { for (; __n > 0; --__n) # ifndef _LIBCPP_CXX03_LANG emplace_back(); @@ -991,41 +1066,43 @@ list<_Tp, _Alloc>::list(size_type __n) { # if _LIBCPP_STD_VER >= 14 template -list<_Tp, _Alloc>::list(size_type __n, const allocator_type& __a) : __base(__a) { +_LIBCPP_CONSTEXPR_SINCE_CXX26 list<_Tp, _Alloc>::list(size_type __n, const allocator_type& __a) : __base(__a) { for (; __n > 0; --__n) emplace_back(); } # endif template -list<_Tp, _Alloc>::list(size_type __n, const value_type& __x) { +_LIBCPP_CONSTEXPR_SINCE_CXX26 list<_Tp, _Alloc>::list(size_type __n, const value_type& __x) { for (; __n > 0; --__n) push_back(__x); } template template ::value, int> > -list<_Tp, _Alloc>::list(_InpIter __f, _InpIter __l) { +_LIBCPP_CONSTEXPR_SINCE_CXX26 list<_Tp, _Alloc>::list(_InpIter __f, _InpIter __l) { for (; __f != __l; ++__f) __emplace_back(*__f); } template template ::value, int> > -list<_Tp, _Alloc>::list(_InpIter __f, _InpIter __l, const allocator_type& __a) : __base(__a) { +_LIBCPP_CONSTEXPR_SINCE_CXX26 list<_Tp, _Alloc>::list(_InpIter __f, _InpIter __l, const allocator_type& __a) + : __base(__a) { for (; __f != __l; ++__f) __emplace_back(*__f); } template -list<_Tp, _Alloc>::list(const list& __c) +_LIBCPP_CONSTEXPR_SINCE_CXX26 list<_Tp, _Alloc>::list(const list& __c) : __base(__node_alloc_traits::select_on_container_copy_construction(__c.__node_alloc_)) { for (const_iterator __i = __c.begin(), __e = __c.end(); __i != __e; ++__i) push_back(*__i); } template -list<_Tp, _Alloc>::list(const list& __c, const __type_identity_t& __a) : __base(__a) { +_LIBCPP_CONSTEXPR_SINCE_CXX26 list<_Tp, _Alloc>::list(const list& __c, const __type_identity_t& __a) + : __base(__a) { for (const_iterator __i = __c.begin(), __e = __c.end(); __i != __e; ++__i) push_back(*__i); } @@ -1033,25 +1110,28 @@ list<_Tp, _Alloc>::list(const list& __c, const __type_identity_t # ifndef _LIBCPP_CXX03_LANG template -list<_Tp, _Alloc>::list(initializer_list __il, const allocator_type& __a) : __base(__a) { +_LIBCPP_CONSTEXPR_SINCE_CXX26 list<_Tp, _Alloc>::list(initializer_list __il, const allocator_type& __a) + : __base(__a) { for (typename initializer_list::const_iterator __i = __il.begin(), __e = __il.end(); __i != __e; ++__i) push_back(*__i); } template -list<_Tp, _Alloc>::list(initializer_list __il) { +_LIBCPP_CONSTEXPR_SINCE_CXX26 list<_Tp, _Alloc>::list(initializer_list __il) { for (typename initializer_list::const_iterator __i = __il.begin(), __e = __il.end(); __i != __e; ++__i) push_back(*__i); } template -inline list<_Tp, _Alloc>::list(list&& __c) noexcept(is_nothrow_move_constructible<__node_allocator>::value) +_LIBCPP_CONSTEXPR_SINCE_CXX26 inline list<_Tp, _Alloc>::list(list&& __c) noexcept( + is_nothrow_move_constructible<__node_allocator>::value) : __base(std::move(__c.__node_alloc_)) { splice(end(), __c); } template -inline list<_Tp, _Alloc>::list(list&& __c, const __type_identity_t& __a) : __base(__a) { +_LIBCPP_CONSTEXPR_SINCE_CXX26 inline list<_Tp, _Alloc>::list(list&& __c, const __type_identity_t& __a) + : __base(__a) { if (__a == __c.get_allocator()) splice(end(), __c); else { @@ -1061,7 +1141,7 @@ inline list<_Tp, _Alloc>::list(list&& __c, const __type_identity_t -inline list<_Tp, _Alloc>& list<_Tp, _Alloc>::operator=(list&& __c) noexcept( +_LIBCPP_CONSTEXPR_SINCE_CXX26 inline list<_Tp, _Alloc>& list<_Tp, _Alloc>::operator=(list&& __c) noexcept( (__node_alloc_traits::propagate_on_container_move_assignment::value && is_nothrow_move_assignable<__node_allocator>::value) || allocator_traits::is_always_equal::value) { @@ -1070,7 +1150,7 @@ inline list<_Tp, _Alloc>& list<_Tp, _Alloc>::operator=(list&& __c) noexcept( } template -void list<_Tp, _Alloc>::__move_assign(list& __c, false_type) { +_LIBCPP_CONSTEXPR_SINCE_CXX26 void list<_Tp, _Alloc>::__move_assign(list& __c, false_type) { if (this->__node_alloc_ != __c.__node_alloc_) { typedef move_iterator _Ip; assign(_Ip(__c.begin()), _Ip(__c.end())); @@ -1079,8 +1159,8 @@ void list<_Tp, _Alloc>::__move_assign(list& __c, false_type) { } template -void list<_Tp, _Alloc>::__move_assign(list& __c, - true_type) noexcept(is_nothrow_move_assignable<__node_allocator>::value) { +_LIBCPP_CONSTEXPR_SINCE_CXX26 void +list<_Tp, _Alloc>::__move_assign(list& __c, true_type) noexcept(is_nothrow_move_assignable<__node_allocator>::value) { clear(); __base::__move_assign_alloc(__c); splice(end(), __c); @@ -1089,7 +1169,7 @@ void list<_Tp, _Alloc>::__move_assign(list& __c, # endif // _LIBCPP_CXX03_LANG template -inline list<_Tp, _Alloc>& list<_Tp, _Alloc>::operator=(const list& __c) { +_LIBCPP_CONSTEXPR_SINCE_CXX26 inline list<_Tp, _Alloc>& list<_Tp, _Alloc>::operator=(const list& __c) { if (this != std::addressof(__c)) { __base::__copy_assign_alloc(__c); assign(__c.begin(), __c.end()); @@ -1099,13 +1179,14 @@ inline list<_Tp, _Alloc>& list<_Tp, _Alloc>::operator=(const list& __c) { template template ::value, int> > -void list<_Tp, _Alloc>::assign(_InpIter __f, _InpIter __l) { +_LIBCPP_CONSTEXPR_SINCE_CXX26 void list<_Tp, _Alloc>::assign(_InpIter __f, _InpIter __l) { __assign_with_sentinel(__f, __l); } template template -_LIBCPP_HIDE_FROM_ABI void list<_Tp, _Alloc>::__assign_with_sentinel(_Iterator __f, _Sentinel __l) { +_LIBCPP_CONSTEXPR_SINCE_CXX26 _LIBCPP_HIDE_FROM_ABI void +list<_Tp, _Alloc>::__assign_with_sentinel(_Iterator __f, _Sentinel __l) { iterator __i = begin(); iterator __e = end(); for (; __f != __l && __i != __e; ++__f, (void)++__i) @@ -1117,7 +1198,7 @@ _LIBCPP_HIDE_FROM_ABI void list<_Tp, _Alloc>::__assign_with_sentinel(_Iterator _ } template -void list<_Tp, _Alloc>::assign(size_type __n, const value_type& __x) { +_LIBCPP_CONSTEXPR_SINCE_CXX26 void list<_Tp, _Alloc>::assign(size_type __n, const value_type& __x) { iterator __i = begin(); iterator __e = end(); for (; __n > 0 && __i != __e; --__n, (void)++__i) @@ -1129,12 +1210,13 @@ void list<_Tp, _Alloc>::assign(size_type __n, const value_type& __x) { } template -inline _Alloc list<_Tp, _Alloc>::get_allocator() const _NOEXCEPT { +_LIBCPP_CONSTEXPR_SINCE_CXX26 inline _Alloc list<_Tp, _Alloc>::get_allocator() const _NOEXCEPT { return allocator_type(this->__node_alloc_); } template -typename list<_Tp, _Alloc>::iterator list<_Tp, _Alloc>::insert(const_iterator __p, const value_type& __x) { +_LIBCPP_CONSTEXPR_SINCE_CXX26 typename list<_Tp, _Alloc>::iterator +list<_Tp, _Alloc>::insert(const_iterator __p, const value_type& __x) { __node_pointer __node = this->__create_node(/* prev = */ nullptr, /* next = */ nullptr, __x); __link_nodes(__p.__ptr_, __node->__as_link(), __node->__as_link()); ++this->__size_; @@ -1142,7 +1224,7 @@ typename list<_Tp, _Alloc>::iterator list<_Tp, _Alloc>::insert(const_iterator __ } template -typename list<_Tp, _Alloc>::iterator +_LIBCPP_CONSTEXPR_SINCE_CXX26 typename list<_Tp, _Alloc>::iterator list<_Tp, _Alloc>::insert(const_iterator __p, size_type __n, const value_type& __x) { iterator __r(__p.__ptr_); if (__n > 0) { @@ -1178,13 +1260,14 @@ list<_Tp, _Alloc>::insert(const_iterator __p, size_type __n, const value_type& _ template template ::value, int> > -typename list<_Tp, _Alloc>::iterator list<_Tp, _Alloc>::insert(const_iterator __p, _InpIter __f, _InpIter __l) { +_LIBCPP_CONSTEXPR_SINCE_CXX26 typename list<_Tp, _Alloc>::iterator +list<_Tp, _Alloc>::insert(const_iterator __p, _InpIter __f, _InpIter __l) { return __insert_with_sentinel(__p, __f, __l); } template template -_LIBCPP_HIDE_FROM_ABI typename list<_Tp, _Alloc>::iterator +_LIBCPP_CONSTEXPR_SINCE_CXX26 _LIBCPP_HIDE_FROM_ABI typename list<_Tp, _Alloc>::iterator list<_Tp, _Alloc>::__insert_with_sentinel(const_iterator __p, _Iterator __f, _Sentinel __l) { iterator __r(__p.__ptr_); if (__f != __l) { @@ -1219,7 +1302,7 @@ list<_Tp, _Alloc>::__insert_with_sentinel(const_iterator __p, _Iterator __f, _Se } template -void list<_Tp, _Alloc>::push_front(const value_type& __x) { +_LIBCPP_CONSTEXPR_SINCE_CXX26 void list<_Tp, _Alloc>::push_front(const value_type& __x) { __node_pointer __node = this->__create_node(/* prev = */ nullptr, /* next = */ nullptr, __x); __base_pointer __nl = __node->__as_link(); __link_nodes_at_front(__nl, __nl); @@ -1227,7 +1310,7 @@ void list<_Tp, _Alloc>::push_front(const value_type& __x) { } template -void list<_Tp, _Alloc>::push_back(const value_type& __x) { +_LIBCPP_CONSTEXPR_SINCE_CXX26 void list<_Tp, _Alloc>::push_back(const value_type& __x) { __node_pointer __node = this->__create_node(/* prev = */ nullptr, /* next = */ nullptr, __x); __base_pointer __nl = __node->__as_link(); __link_nodes_at_back(__nl, __nl); @@ -1237,7 +1320,7 @@ void list<_Tp, _Alloc>::push_back(const value_type& __x) { # ifndef _LIBCPP_CXX03_LANG template -void list<_Tp, _Alloc>::push_front(value_type&& __x) { +_LIBCPP_CONSTEXPR_SINCE_CXX26 void list<_Tp, _Alloc>::push_front(value_type&& __x) { __node_pointer __node = this->__create_node(/* prev = */ nullptr, /* next = */ nullptr, std::move(__x)); __base_pointer __nl = __node->__as_link(); __link_nodes_at_front(__nl, __nl); @@ -1245,7 +1328,7 @@ void list<_Tp, _Alloc>::push_front(value_type&& __x) { } template -void list<_Tp, _Alloc>::push_back(value_type&& __x) { +_LIBCPP_CONSTEXPR_SINCE_CXX26 void list<_Tp, _Alloc>::push_back(value_type&& __x) { __node_pointer __node = this->__create_node(/* prev = */ nullptr, /* next = */ nullptr, std::move(__x)); __base_pointer __nl = __node->__as_link(); __link_nodes_at_back(__nl, __nl); @@ -1254,12 +1337,13 @@ void list<_Tp, _Alloc>::push_back(value_type&& __x) { template template +_LIBCPP_CONSTEXPR_SINCE_CXX26 # if _LIBCPP_STD_VER >= 17 -typename list<_Tp, _Alloc>::reference + typename list<_Tp, _Alloc>::reference # else -void + void # endif -list<_Tp, _Alloc>::emplace_front(_Args&&... __args) { + list<_Tp, _Alloc>::emplace_front(_Args&&... __args) { __node_pointer __node = this->__create_node(/* prev = */ nullptr, /* next = */ nullptr, std::forward<_Args>(__args)...); __base_pointer __nl = __node->__as_link(); @@ -1272,12 +1356,13 @@ list<_Tp, _Alloc>::emplace_front(_Args&&... __args) { template template +_LIBCPP_CONSTEXPR_SINCE_CXX26 # if _LIBCPP_STD_VER >= 17 -typename list<_Tp, _Alloc>::reference + typename list<_Tp, _Alloc>::reference # else -void + void # endif -list<_Tp, _Alloc>::emplace_back(_Args&&... __args) { + list<_Tp, _Alloc>::emplace_back(_Args&&... __args) { __node_pointer __node = this->__create_node(/* prev = */ nullptr, /* next = */ nullptr, std::forward<_Args>(__args)...); __base_pointer __nl = __node->__as_link(); @@ -1290,7 +1375,8 @@ list<_Tp, _Alloc>::emplace_back(_Args&&... __args) { template template -typename list<_Tp, _Alloc>::iterator list<_Tp, _Alloc>::emplace(const_iterator __p, _Args&&... __args) { +_LIBCPP_CONSTEXPR_SINCE_CXX26 typename list<_Tp, _Alloc>::iterator +list<_Tp, _Alloc>::emplace(const_iterator __p, _Args&&... __args) { __node_pointer __node = this->__create_node(/* prev = */ nullptr, /* next = */ nullptr, std::forward<_Args>(__args)...); __base_pointer __nl = __node->__as_link(); @@ -1300,7 +1386,8 @@ typename list<_Tp, _Alloc>::iterator list<_Tp, _Alloc>::emplace(const_iterator _ } template -typename list<_Tp, _Alloc>::iterator list<_Tp, _Alloc>::insert(const_iterator __p, value_type&& __x) { +_LIBCPP_CONSTEXPR_SINCE_CXX26 typename list<_Tp, _Alloc>::iterator +list<_Tp, _Alloc>::insert(const_iterator __p, value_type&& __x) { __node_pointer __node = this->__create_node(/* prev = */ nullptr, /* next = */ nullptr, std::move(__x)); __base_pointer __nl = __node->__as_link(); __link_nodes(__p.__ptr_, __nl, __nl); @@ -1311,7 +1398,7 @@ typename list<_Tp, _Alloc>::iterator list<_Tp, _Alloc>::insert(const_iterator __ # endif // _LIBCPP_CXX03_LANG template -void list<_Tp, _Alloc>::pop_front() { +_LIBCPP_CONSTEXPR_SINCE_CXX26 void list<_Tp, _Alloc>::pop_front() { _LIBCPP_ASSERT_VALID_ELEMENT_ACCESS(!empty(), "list::pop_front() called with empty list"); __base_pointer __n = __base::__end_.__next_; __base::__unlink_nodes(__n, __n); @@ -1320,7 +1407,7 @@ void list<_Tp, _Alloc>::pop_front() { } template -void list<_Tp, _Alloc>::pop_back() { +_LIBCPP_CONSTEXPR_SINCE_CXX26 void list<_Tp, _Alloc>::pop_back() { _LIBCPP_ASSERT_VALID_ELEMENT_ACCESS(!empty(), "list::pop_back() called on an empty list"); __base_pointer __n = __base::__end_.__prev_; __base::__unlink_nodes(__n, __n); @@ -1329,7 +1416,7 @@ void list<_Tp, _Alloc>::pop_back() { } template -typename list<_Tp, _Alloc>::iterator list<_Tp, _Alloc>::erase(const_iterator __p) { +_LIBCPP_CONSTEXPR_SINCE_CXX26 typename list<_Tp, _Alloc>::iterator list<_Tp, _Alloc>::erase(const_iterator __p) { _LIBCPP_ASSERT_VALID_ELEMENT_ACCESS(__p != end(), "list::erase(iterator) called with a non-dereferenceable iterator"); __base_pointer __n = __p.__ptr_; __base_pointer __r = __n->__next_; @@ -1340,7 +1427,8 @@ typename list<_Tp, _Alloc>::iterator list<_Tp, _Alloc>::erase(const_iterator __p } template -typename list<_Tp, _Alloc>::iterator list<_Tp, _Alloc>::erase(const_iterator __f, const_iterator __l) { +_LIBCPP_CONSTEXPR_SINCE_CXX26 typename list<_Tp, _Alloc>::iterator +list<_Tp, _Alloc>::erase(const_iterator __f, const_iterator __l) { if (__f != __l) { __base::__unlink_nodes(__f.__ptr_, __l.__ptr_->__prev_); while (__f != __l) { @@ -1354,7 +1442,7 @@ typename list<_Tp, _Alloc>::iterator list<_Tp, _Alloc>::erase(const_iterator __f } template -void list<_Tp, _Alloc>::resize(size_type __n) { +_LIBCPP_CONSTEXPR_SINCE_CXX26 void list<_Tp, _Alloc>::resize(size_type __n) { if (__n < this->__size_) erase(__iterator(__n), end()); else if (__n > this->__size_) { @@ -1389,7 +1477,7 @@ void list<_Tp, _Alloc>::resize(size_type __n) { } template -void list<_Tp, _Alloc>::resize(size_type __n, const value_type& __x) { +_LIBCPP_CONSTEXPR_SINCE_CXX26 void list<_Tp, _Alloc>::resize(size_type __n, const value_type& __x) { if (__n < this->__size_) erase(__iterator(__n), end()); else if (__n > this->__size_) { @@ -1425,7 +1513,7 @@ void list<_Tp, _Alloc>::resize(size_type __n, const value_type& __x) { } template -void list<_Tp, _Alloc>::splice(const_iterator __p, list& __c) { +_LIBCPP_CONSTEXPR_SINCE_CXX26 void list<_Tp, _Alloc>::splice(const_iterator __p, list& __c) { _LIBCPP_ASSERT_VALID_INPUT_RANGE( this != std::addressof(__c), "list::splice(iterator, list) called with this == &list"); if (!__c.empty()) { @@ -1439,7 +1527,7 @@ void list<_Tp, _Alloc>::splice(const_iterator __p, list& __c) { } template -void list<_Tp, _Alloc>::splice(const_iterator __p, list& __c, const_iterator __i) { +_LIBCPP_CONSTEXPR_SINCE_CXX26 void list<_Tp, _Alloc>::splice(const_iterator __p, list& __c, const_iterator __i) { if (__p.__ptr_ != __i.__ptr_ && __p.__ptr_ != __i.__ptr_->__next_) { __base_pointer __f = __i.__ptr_; __base::__unlink_nodes(__f, __f); @@ -1450,7 +1538,8 @@ void list<_Tp, _Alloc>::splice(const_iterator __p, list& __c, const_iterator __i } template -void list<_Tp, _Alloc>::splice(const_iterator __p, list& __c, const_iterator __f, const_iterator __l) { +_LIBCPP_CONSTEXPR_SINCE_CXX26 void +list<_Tp, _Alloc>::splice(const_iterator __p, list& __c, const_iterator __f, const_iterator __l) { if (__f != __l) { __base_pointer __first = __f.__ptr_; --__l; @@ -1466,7 +1555,8 @@ void list<_Tp, _Alloc>::splice(const_iterator __p, list& __c, const_iterator __f } template -typename list<_Tp, _Alloc>::__remove_return_type list<_Tp, _Alloc>::remove(const value_type& __x) { +_LIBCPP_CONSTEXPR_SINCE_CXX26 typename list<_Tp, _Alloc>::__remove_return_type +list<_Tp, _Alloc>::remove(const value_type& __x) { list<_Tp, _Alloc> __deleted_nodes(get_allocator()); // collect the nodes we're removing for (const_iterator __i = begin(), __e = end(); __i != __e;) { if (*__i == __x) { @@ -1486,7 +1576,8 @@ typename list<_Tp, _Alloc>::__remove_return_type list<_Tp, _Alloc>::remove(const template template -typename list<_Tp, _Alloc>::__remove_return_type list<_Tp, _Alloc>::remove_if(_Pred __pred) { +_LIBCPP_CONSTEXPR_SINCE_CXX26 typename list<_Tp, _Alloc>::__remove_return_type +list<_Tp, _Alloc>::remove_if(_Pred __pred) { list<_Tp, _Alloc> __deleted_nodes(get_allocator()); // collect the nodes we're removing for (iterator __i = begin(), __e = end(); __i != __e;) { if (__pred(*__i)) { @@ -1506,7 +1597,8 @@ typename list<_Tp, _Alloc>::__remove_return_type list<_Tp, _Alloc>::remove_if(_P template template -typename list<_Tp, _Alloc>::__remove_return_type list<_Tp, _Alloc>::unique(_BinaryPred __binary_pred) { +_LIBCPP_CONSTEXPR_SINCE_CXX26 typename list<_Tp, _Alloc>::__remove_return_type +list<_Tp, _Alloc>::unique(_BinaryPred __binary_pred) { list<_Tp, _Alloc> __deleted_nodes(get_allocator()); // collect the nodes we're removing for (iterator __i = begin(), __e = end(); __i != __e;) { iterator __j = std::next(__i); @@ -1522,13 +1614,13 @@ typename list<_Tp, _Alloc>::__remove_return_type list<_Tp, _Alloc>::unique(_Bina } template -inline void list<_Tp, _Alloc>::merge(list& __c) { +_LIBCPP_CONSTEXPR_SINCE_CXX26 inline void list<_Tp, _Alloc>::merge(list& __c) { merge(__c, __less<>()); } template template -void list<_Tp, _Alloc>::merge(list& __c, _Comp __comp) { +_LIBCPP_CONSTEXPR_SINCE_CXX26 void list<_Tp, _Alloc>::merge(list& __c, _Comp __comp) { if (this != std::addressof(__c)) { iterator __f1 = begin(); iterator __e1 = end(); @@ -1557,19 +1649,19 @@ void list<_Tp, _Alloc>::merge(list& __c, _Comp __comp) { } template -inline void list<_Tp, _Alloc>::sort() { +_LIBCPP_CONSTEXPR_SINCE_CXX26 inline void list<_Tp, _Alloc>::sort() { sort(__less<>()); } template template -inline void list<_Tp, _Alloc>::sort(_Comp __comp) { +_LIBCPP_CONSTEXPR_SINCE_CXX26 inline void list<_Tp, _Alloc>::sort(_Comp __comp) { __sort(begin(), end(), this->__size_, __comp); } template template -typename list<_Tp, _Alloc>::iterator +_LIBCPP_CONSTEXPR_SINCE_CXX26 typename list<_Tp, _Alloc>::iterator list<_Tp, _Alloc>::__sort(iterator __f1, iterator __e2, size_type __n, _Comp& __comp) { switch (__n) { case 0: @@ -1623,7 +1715,7 @@ list<_Tp, _Alloc>::__sort(iterator __f1, iterator __e2, size_type __n, _Comp& __ } template -void list<_Tp, _Alloc>::reverse() _NOEXCEPT { +_LIBCPP_CONSTEXPR_SINCE_CXX26 void list<_Tp, _Alloc>::reverse() _NOEXCEPT { if (this->__size_ > 1) { iterator __e = end(); for (iterator __i = begin(); __i.__ptr_ != __e.__ptr_;) { @@ -1635,46 +1727,52 @@ void list<_Tp, _Alloc>::reverse() _NOEXCEPT { } template -bool list<_Tp, _Alloc>::__invariants() const { +_LIBCPP_CONSTEXPR_SINCE_CXX26 bool list<_Tp, _Alloc>::__invariants() const { return size() == std::distance(begin(), end()); } template -inline _LIBCPP_HIDE_FROM_ABI bool operator==(const list<_Tp, _Alloc>& __x, const list<_Tp, _Alloc>& __y) { +_LIBCPP_CONSTEXPR_SINCE_CXX26 inline _LIBCPP_HIDE_FROM_ABI bool +operator==(const list<_Tp, _Alloc>& __x, const list<_Tp, _Alloc>& __y) { return __x.size() == __y.size() && std::equal(__x.begin(), __x.end(), __y.begin()); } # if _LIBCPP_STD_VER <= 17 template -inline _LIBCPP_HIDE_FROM_ABI bool operator<(const list<_Tp, _Alloc>& __x, const list<_Tp, _Alloc>& __y) { +_LIBCPP_CONSTEXPR_SINCE_CXX26 inline _LIBCPP_HIDE_FROM_ABI bool +operator<(const list<_Tp, _Alloc>& __x, const list<_Tp, _Alloc>& __y) { return std::lexicographical_compare(__x.begin(), __x.end(), __y.begin(), __y.end()); } template -inline _LIBCPP_HIDE_FROM_ABI bool operator!=(const list<_Tp, _Alloc>& __x, const list<_Tp, _Alloc>& __y) { +_LIBCPP_CONSTEXPR_SINCE_CXX26 inline _LIBCPP_HIDE_FROM_ABI bool +operator!=(const list<_Tp, _Alloc>& __x, const list<_Tp, _Alloc>& __y) { return !(__x == __y); } template -inline _LIBCPP_HIDE_FROM_ABI bool operator>(const list<_Tp, _Alloc>& __x, const list<_Tp, _Alloc>& __y) { +_LIBCPP_CONSTEXPR_SINCE_CXX26 inline _LIBCPP_HIDE_FROM_ABI bool +operator>(const list<_Tp, _Alloc>& __x, const list<_Tp, _Alloc>& __y) { return __y < __x; } template -inline _LIBCPP_HIDE_FROM_ABI bool operator>=(const list<_Tp, _Alloc>& __x, const list<_Tp, _Alloc>& __y) { +_LIBCPP_CONSTEXPR_SINCE_CXX26 inline _LIBCPP_HIDE_FROM_ABI bool +operator>=(const list<_Tp, _Alloc>& __x, const list<_Tp, _Alloc>& __y) { return !(__x < __y); } template -inline _LIBCPP_HIDE_FROM_ABI bool operator<=(const list<_Tp, _Alloc>& __x, const list<_Tp, _Alloc>& __y) { +_LIBCPP_CONSTEXPR_SINCE_CXX26 inline _LIBCPP_HIDE_FROM_ABI bool +operator<=(const list<_Tp, _Alloc>& __x, const list<_Tp, _Alloc>& __y) { return !(__y < __x); } # else // _LIBCPP_STD_VER <= 17 template -_LIBCPP_HIDE_FROM_ABI __synth_three_way_result<_Tp> +_LIBCPP_CONSTEXPR_SINCE_CXX26 _LIBCPP_HIDE_FROM_ABI __synth_three_way_result<_Tp> operator<=>(const list<_Tp, _Allocator>& __x, const list<_Tp, _Allocator>& __y) { return std::lexicographical_compare_three_way(__x.begin(), __x.end(), __y.begin(), __y.end(), std::__synth_three_way); } @@ -1682,20 +1780,20 @@ operator<=>(const list<_Tp, _Allocator>& __x, const list<_Tp, _Allocator>& __y) # endif // _LIBCPP_STD_VER <= 17 template -inline _LIBCPP_HIDE_FROM_ABI void swap(list<_Tp, _Alloc>& __x, list<_Tp, _Alloc>& __y) +_LIBCPP_CONSTEXPR_SINCE_CXX26 inline _LIBCPP_HIDE_FROM_ABI void swap(list<_Tp, _Alloc>& __x, list<_Tp, _Alloc>& __y) _NOEXCEPT_(_NOEXCEPT_(__x.swap(__y))) { __x.swap(__y); } # if _LIBCPP_STD_VER >= 20 template -inline _LIBCPP_HIDE_FROM_ABI typename list<_Tp, _Allocator>::size_type +_LIBCPP_CONSTEXPR_SINCE_CXX26 inline _LIBCPP_HIDE_FROM_ABI typename list<_Tp, _Allocator>::size_type erase_if(list<_Tp, _Allocator>& __c, _Predicate __pred) { return __c.remove_if(__pred); } template -inline _LIBCPP_HIDE_FROM_ABI typename list<_Tp, _Allocator>::size_type +_LIBCPP_CONSTEXPR_SINCE_CXX26 inline _LIBCPP_HIDE_FROM_ABI typename list<_Tp, _Allocator>::size_type erase(list<_Tp, _Allocator>& __c, const _Up& __v) { return std::erase_if(__c, [&](const auto& __elem) -> bool { return __elem == __v; }); } diff --git a/libcxx/include/version b/libcxx/include/version index 87c4ede9a7e5..7154cab92335 100644 --- a/libcxx/include/version +++ b/libcxx/include/version @@ -71,6 +71,7 @@ __cpp_lib_constexpr_dynamic_alloc 201907L __cpp_lib_constexpr_forward_list 202502L __cpp_lib_constexpr_functional 201907L __cpp_lib_constexpr_iterator 201811L +__cpp_lib_constexpr_list 202502L __cpp_lib_constexpr_memory 202202L 201811L // C++20 __cpp_lib_constexpr_new 202406L @@ -545,6 +546,7 @@ __cpp_lib_void_t 201411L # undef __cpp_lib_constexpr_algorithms # define __cpp_lib_constexpr_algorithms 202306L # define __cpp_lib_constexpr_forward_list 202502L +# define __cpp_lib_constexpr_list 202502L # if !defined(_LIBCPP_ABI_VCRUNTIME) # define __cpp_lib_constexpr_new 202406L # endif diff --git a/libcxx/test/std/containers/sequences/list/compare.pass.cpp b/libcxx/test/std/containers/sequences/list/compare.pass.cpp index ce00f57733bf..9705fd916198 100644 --- a/libcxx/test/std/containers/sequences/list/compare.pass.cpp +++ b/libcxx/test/std/containers/sequences/list/compare.pass.cpp @@ -10,34 +10,34 @@ // template< class T, class Alloc > // bool operator==( const std::list& lhs, -// const std::list& rhs ); +// const std::list& rhs ); // constexpr since C++26 // template< class T, class Alloc > // bool operator!=( const std::list& lhs, -// const std::list& rhs ); +// const std::list& rhs ); // constexpr since C++26 // template< class T, class Alloc > // bool operator<( const std::list& lhs, -// const std::list& rhs ); +// const std::list& rhs ); // constexpr since C++26 // template< class T, class Alloc > // bool operator<=( const std::list& lhs, -// const std::list& rhs ); +// const std::list& rhs ); // constexpr since C++26 // template< class T, class Alloc > // bool operator>( const std::list& lhs, -// const std::list& rhs ); +// const std::list& rhs ); // constexpr since C++26 // template< class T, class Alloc > // bool operator>=( const std::list& lhs, -// const std::list& rhs ); +// const std::list& rhs ); // constexpr since C++26 #include #include #include "test_comparisons.h" -int main(int, char**) { +TEST_CONSTEXPR_CXX26 bool test() { { const std::list l1, l2; assert(testComparisons(l1, l2, true, false)); @@ -113,5 +113,15 @@ int main(int, char**) { const std::list l2(items2, items2 + 2); assert(testComparisons(l1, l2, false, false)); } + + return true; +} + +int main(int, char**) { + assert(test()); +#if TEST_STD_VER >= 26 + static_assert(test()); +#endif + return 0; } diff --git a/libcxx/test/std/containers/sequences/list/compare.three_way.pass.cpp b/libcxx/test/std/containers/sequences/list/compare.three_way.pass.cpp index 059fba3c2626..7a23a653c0aa 100644 --- a/libcxx/test/std/containers/sequences/list/compare.three_way.pass.cpp +++ b/libcxx/test/std/containers/sequences/list/compare.three_way.pass.cpp @@ -11,7 +11,7 @@ // template constexpr // synth-three-way-result -// operator<=>(const list& x, const list& y); +// operator<=>(const list& x, const list& y); // constexpr since C++26 #include #include @@ -20,6 +20,8 @@ int main(int, char**) { assert(test_sequence_container_spaceship()); - // `std::list` is not constexpr, so no `static_assert` test here. +#if TEST_STD_VER >= 26 + static_assert(test_sequence_container_spaceship()); +#endif return 0; } diff --git a/libcxx/test/std/containers/sequences/list/get_allocator.pass.cpp b/libcxx/test/std/containers/sequences/list/get_allocator.pass.cpp index f1002f2ca811..9d724673d31e 100644 --- a/libcxx/test/std/containers/sequences/list/get_allocator.pass.cpp +++ b/libcxx/test/std/containers/sequences/list/get_allocator.pass.cpp @@ -10,7 +10,7 @@ // class list -// allocator_type get_allocator() const +// allocator_type get_allocator() const // constexpr since C++26 #include #include @@ -18,7 +18,7 @@ #include "test_allocator.h" #include "test_macros.h" -int main(int, char**) { +TEST_CONSTEXPR_CXX26 bool test() { { std::allocator alloc; const std::list l(alloc); @@ -30,5 +30,14 @@ int main(int, char**) { assert(l.get_allocator() == alloc); } + return true; +} + +int main(int, char**) { + assert(test()); +#if TEST_STD_VER >= 26 + static_assert(test()); +#endif + return 0; } diff --git a/libcxx/test/std/containers/sequences/list/incomplete_type.pass.cpp b/libcxx/test/std/containers/sequences/list/incomplete_type.pass.cpp index 1802e53ecf38..ac8a76097d0a 100644 --- a/libcxx/test/std/containers/sequences/list/incomplete_type.pass.cpp +++ b/libcxx/test/std/containers/sequences/list/incomplete_type.pass.cpp @@ -12,6 +12,7 @@ // type. #include +#include #include "test_macros.h" @@ -23,8 +24,18 @@ struct A { std::list::const_reverse_iterator crit; }; -int main(int, char**) { +TEST_CONSTEXPR_CXX26 bool test() { A a; + (void)a; + + return true; +} + +int main(int, char**) { + assert(test()); +#if TEST_STD_VER >= 26 + static_assert(test()); +#endif return 0; } diff --git a/libcxx/test/std/containers/sequences/list/iterators.pass.cpp b/libcxx/test/std/containers/sequences/list/iterators.pass.cpp index deaae31f2d27..b41a1899f2ff 100644 --- a/libcxx/test/std/containers/sequences/list/iterators.pass.cpp +++ b/libcxx/test/std/containers/sequences/list/iterators.pass.cpp @@ -8,12 +8,12 @@ // -// iterator begin(); -// iterator end(); -// const_iterator begin() const; -// const_iterator end() const; -// const_iterator cbegin() const; -// const_iterator cend() const; +// iterator begin(); // constexpr since C++26 +// iterator end(); // constexpr since C++26 +// const_iterator begin() const; // constexpr since C++26 +// const_iterator end() const; // constexpr since C++26 +// const_iterator cbegin() const; // constexpr since C++26 +// const_iterator cend() const; // constexpr since C++26 #include #include @@ -27,7 +27,7 @@ struct A { int second; }; -int main(int, char**) { +TEST_CONSTEXPR_CXX26 bool test() { { typedef int T; typedef std::list C; @@ -74,6 +74,8 @@ int main(int, char**) { typedef std::list C; C::iterator i; C::const_iterator j; + (void)i; + (void)j; } #if TEST_STD_VER >= 11 { @@ -122,6 +124,8 @@ int main(int, char**) { typedef std::list> C; C::iterator i; C::const_iterator j; + (void)i; + (void)j; } { typedef A T; @@ -150,5 +154,14 @@ int main(int, char**) { } #endif + return true; +} + +int main(int, char**) { + assert(test()); +#if TEST_STD_VER >= 26 + static_assert(test()); +#endif + return 0; } diff --git a/libcxx/test/std/containers/sequences/list/list.capacity/empty.pass.cpp b/libcxx/test/std/containers/sequences/list/list.capacity/empty.pass.cpp index 50ca23ff9c56..f368d8e700bb 100644 --- a/libcxx/test/std/containers/sequences/list/list.capacity/empty.pass.cpp +++ b/libcxx/test/std/containers/sequences/list/list.capacity/empty.pass.cpp @@ -10,7 +10,7 @@ // class list -// bool empty() const noexcept; +// bool empty() const noexcept; // constexpr since C++26 #include #include @@ -18,7 +18,7 @@ #include "test_macros.h" #include "min_allocator.h" -int main(int, char**) { +TEST_CONSTEXPR_CXX26 bool test() { { typedef std::list C; C c; @@ -42,5 +42,14 @@ int main(int, char**) { } #endif + return true; +} + +int main(int, char**) { + assert(test()); +#if TEST_STD_VER >= 26 + static_assert(test()); +#endif + return 0; } diff --git a/libcxx/test/std/containers/sequences/list/list.capacity/max_size.pass.cpp b/libcxx/test/std/containers/sequences/list/list.capacity/max_size.pass.cpp index 74c2ccfb1442..1f956b33e482 100644 --- a/libcxx/test/std/containers/sequences/list/list.capacity/max_size.pass.cpp +++ b/libcxx/test/std/containers/sequences/list/list.capacity/max_size.pass.cpp @@ -8,7 +8,7 @@ // -// size_type max_size() const noexcept +// size_type max_size() const noexcept // constexpr since C++26 #include #include @@ -18,7 +18,7 @@ #include "test_allocator.h" #include "test_macros.h" -int main(int, char**) { +TEST_CONSTEXPR_CXX26 bool test() { { typedef limited_allocator A; typedef std::list C; @@ -42,5 +42,14 @@ int main(int, char**) { assert(c.max_size() <= alloc_max_size(c.get_allocator())); } + return true; +} + +int main(int, char**) { + assert(test()); +#if TEST_STD_VER >= 26 + static_assert(test()); +#endif + return 0; } diff --git a/libcxx/test/std/containers/sequences/list/list.capacity/resize_size.pass.cpp b/libcxx/test/std/containers/sequences/list/list.capacity/resize_size.pass.cpp index 754d931646cc..f694d9ab2650 100644 --- a/libcxx/test/std/containers/sequences/list/list.capacity/resize_size.pass.cpp +++ b/libcxx/test/std/containers/sequences/list/list.capacity/resize_size.pass.cpp @@ -8,15 +8,16 @@ // -// void resize(size_type sz); +// void resize(size_type sz); // constexpr since C++26 #include #include + #include "test_macros.h" #include "DefaultOnly.h" #include "min_allocator.h" -int main(int, char**) { +TEST_CONSTEXPR_CXX26 bool test() { { std::list l(5, 2); l.resize(2); @@ -33,17 +34,31 @@ int main(int, char**) { assert(l.back() == 0); } #if TEST_STD_VER >= 11 - { - std::list l(10); - l.resize(5); - assert(l.size() == 5); - assert(std::distance(l.begin(), l.end()) == 5); - } - { - std::list l(10); - l.resize(20); - assert(l.size() == 20); - assert(std::distance(l.begin(), l.end()) == 20); + if (!TEST_IS_CONSTANT_EVALUATED) { + { + std::list l(10); + l.resize(5); + assert(l.size() == 5); + assert(std::distance(l.begin(), l.end()) == 5); + } + { + std::list l(10); + l.resize(20); + assert(l.size() == 20); + assert(std::distance(l.begin(), l.end()) == 20); + } + { + std::list> l(10); + l.resize(5); + assert(l.size() == 5); + assert(std::distance(l.begin(), l.end()) == 5); + } + { + std::list> l(10); + l.resize(20); + assert(l.size() == 20); + assert(std::distance(l.begin(), l.end()) == 20); + } } { std::list> l(5, 2); @@ -60,18 +75,15 @@ int main(int, char**) { assert(l.front() == 2); assert(l.back() == 0); } - { - std::list> l(10); - l.resize(5); - assert(l.size() == 5); - assert(std::distance(l.begin(), l.end()) == 5); - } - { - std::list> l(10); - l.resize(20); - assert(l.size() == 20); - assert(std::distance(l.begin(), l.end()) == 20); - } +#endif + + return true; +} + +int main(int, char**) { + assert(test()); +#if TEST_STD_VER >= 26 + static_assert(test()); #endif return 0; diff --git a/libcxx/test/std/containers/sequences/list/list.capacity/resize_size_value.pass.cpp b/libcxx/test/std/containers/sequences/list/list.capacity/resize_size_value.pass.cpp index 95fccddeca54..a93ec224bd6d 100644 --- a/libcxx/test/std/containers/sequences/list/list.capacity/resize_size_value.pass.cpp +++ b/libcxx/test/std/containers/sequences/list/list.capacity/resize_size_value.pass.cpp @@ -8,7 +8,7 @@ // -// void resize(size_type sz, const value_type& x); +// void resize(size_type sz, const value_type& x); // constexpr since C++26 #include #include @@ -16,7 +16,7 @@ #include "DefaultOnly.h" #include "min_allocator.h" -int main(int, char**) { +TEST_CONSTEXPR_CXX26 bool test() { { std::list l(5, 2); l.resize(2, 3.5); @@ -50,5 +50,14 @@ int main(int, char**) { } #endif + return true; +} + +int main(int, char**) { + assert(test()); +#if TEST_STD_VER >= 26 + static_assert(test()); +#endif + return 0; } diff --git a/libcxx/test/std/containers/sequences/list/list.capacity/size.pass.cpp b/libcxx/test/std/containers/sequences/list/list.capacity/size.pass.cpp index 930331205a9a..8aecfcaea027 100644 --- a/libcxx/test/std/containers/sequences/list/list.capacity/size.pass.cpp +++ b/libcxx/test/std/containers/sequences/list/list.capacity/size.pass.cpp @@ -10,7 +10,7 @@ // class list -// size_type size() const noexcept; +// size_type size() const noexcept; // constexpr since C++26 #include #include @@ -18,7 +18,7 @@ #include "test_macros.h" #include "min_allocator.h" -int main(int, char**) { +TEST_CONSTEXPR_CXX26 bool test() { { typedef std::list C; C c; @@ -58,5 +58,14 @@ int main(int, char**) { } #endif + return true; +} + +int main(int, char**) { + assert(test()); +#if TEST_STD_VER >= 26 + static_assert(test()); +#endif + return 0; } diff --git a/libcxx/test/std/containers/sequences/list/list.cons/assign_copy.pass.cpp b/libcxx/test/std/containers/sequences/list/list.cons/assign_copy.pass.cpp index ca468d870998..912975d55e1d 100644 --- a/libcxx/test/std/containers/sequences/list/list.cons/assign_copy.pass.cpp +++ b/libcxx/test/std/containers/sequences/list/list.cons/assign_copy.pass.cpp @@ -8,7 +8,7 @@ // -// list& operator=(const list& c); +// list& operator=(const list& c); // constexpr since C++26 #include #include @@ -16,7 +16,7 @@ #include "test_allocator.h" #include "min_allocator.h" -int main(int, char**) { +TEST_CONSTEXPR_CXX26 bool test() { { std::list > l(3, 2, test_allocator(5)); std::list > l2(l, test_allocator(3)); @@ -41,5 +41,14 @@ int main(int, char**) { } #endif + return true; +} + +int main(int, char**) { + assert(test()); +#if TEST_STD_VER >= 26 + static_assert(test()); +#endif + return 0; } diff --git a/libcxx/test/std/containers/sequences/list/list.cons/assign_initializer_list.pass.cpp b/libcxx/test/std/containers/sequences/list/list.cons/assign_initializer_list.pass.cpp index d4c1120df622..07b25f189a11 100644 --- a/libcxx/test/std/containers/sequences/list/list.cons/assign_initializer_list.pass.cpp +++ b/libcxx/test/std/containers/sequences/list/list.cons/assign_initializer_list.pass.cpp @@ -10,7 +10,7 @@ // -// void assign(initializer_list il); +// void assign(initializer_list il); // constexpr since C++26 #include #include @@ -18,7 +18,7 @@ #include "test_macros.h" #include "min_allocator.h" -int main(int, char**) { +TEST_CONSTEXPR_CXX26 bool test() { { std::list d; d.assign({3, 4, 5, 6}); @@ -40,5 +40,14 @@ int main(int, char**) { assert(*i++ == 6); } + return true; +} + +int main(int, char**) { + assert(test()); +#if TEST_STD_VER >= 26 + static_assert(test()); +#endif + return 0; } diff --git a/libcxx/test/std/containers/sequences/list/list.cons/assign_move.pass.cpp b/libcxx/test/std/containers/sequences/list/list.cons/assign_move.pass.cpp index 87faaaac2b21..aa199b05ed45 100644 --- a/libcxx/test/std/containers/sequences/list/list.cons/assign_move.pass.cpp +++ b/libcxx/test/std/containers/sequences/list/list.cons/assign_move.pass.cpp @@ -10,7 +10,7 @@ // -// list& operator=(list&& c); +// list& operator=(list&& c); // constexpr since C++26 #include #include @@ -19,7 +19,7 @@ #include "test_allocator.h" #include "min_allocator.h" -int main(int, char**) { +TEST_CONSTEXPR_CXX26 bool test() { { std::list > l(test_allocator(5)); std::list > lo(test_allocator(5)); @@ -79,5 +79,14 @@ int main(int, char**) { assert(it == l2.begin()); // Iterators remain valid } + return true; +} + +int main(int, char**) { + assert(test()); +#if TEST_STD_VER >= 26 + static_assert(test()); +#endif + return 0; } diff --git a/libcxx/test/std/containers/sequences/list/list.cons/copy.pass.cpp b/libcxx/test/std/containers/sequences/list/list.cons/copy.pass.cpp index de52da0fefab..a3e510d4d6eb 100644 --- a/libcxx/test/std/containers/sequences/list/list.cons/copy.pass.cpp +++ b/libcxx/test/std/containers/sequences/list/list.cons/copy.pass.cpp @@ -8,7 +8,7 @@ // -// list(const list& c); +// list(const list& c); // constexpr since C++26 #include #include @@ -18,7 +18,7 @@ #include "test_allocator.h" #include "min_allocator.h" -int main(int, char**) { +TEST_CONSTEXPR_CXX26 bool test() { { std::list l(3, 2); std::list l2 = l; @@ -50,5 +50,14 @@ int main(int, char**) { } #endif + return true; +} + +int main(int, char**) { + test(); +#if TEST_STD_VER >= 26 + static_assert(test()); +#endif + return 0; } diff --git a/libcxx/test/std/containers/sequences/list/list.cons/copy_alloc.pass.cpp b/libcxx/test/std/containers/sequences/list/list.cons/copy_alloc.pass.cpp index 0d6c6f431f09..5da17a9c9b59 100644 --- a/libcxx/test/std/containers/sequences/list/list.cons/copy_alloc.pass.cpp +++ b/libcxx/test/std/containers/sequences/list/list.cons/copy_alloc.pass.cpp @@ -8,7 +8,7 @@ // -// list(const list& c, const allocator_type& a); +// list(const list& c, const allocator_type& a); // constexpr since C++26 #include #include @@ -17,7 +17,7 @@ #include "test_allocator.h" #include "min_allocator.h" -int main(int, char**) { +TEST_CONSTEXPR_CXX26 bool test() { { std::list > l(3, 2, test_allocator(5)); std::list > l2(l, test_allocator(3)); @@ -39,5 +39,14 @@ int main(int, char**) { } #endif + return true; +} + +int main(int, char**) { + assert(test()); +#if TEST_STD_VER >= 26 + static_assert(test()); +#endif + return 0; } diff --git a/libcxx/test/std/containers/sequences/list/list.cons/default.pass.cpp b/libcxx/test/std/containers/sequences/list/list.cons/default.pass.cpp index 0cfd8f1e9c59..1256433659c6 100644 --- a/libcxx/test/std/containers/sequences/list/list.cons/default.pass.cpp +++ b/libcxx/test/std/containers/sequences/list/list.cons/default.pass.cpp @@ -8,7 +8,7 @@ // -// explicit list(const Alloc& = Alloc()); +// explicit list(const Alloc& = Alloc()); // constexpr since C++26 #include #include @@ -16,7 +16,7 @@ #include "DefaultOnly.h" #include "min_allocator.h" -int main(int, char**) { +TEST_CONSTEXPR_CXX26 bool test() { { std::list l; assert(l.size() == 0); @@ -65,5 +65,14 @@ int main(int, char**) { } #endif + return true; +} + +int main(int, char**) { + assert(test()); +#if TEST_STD_VER >= 26 + static_assert(test()); +#endif + return 0; } diff --git a/libcxx/test/std/containers/sequences/list/list.cons/default_stack_alloc.pass.cpp b/libcxx/test/std/containers/sequences/list/list.cons/default_stack_alloc.pass.cpp index e31a58d7b9a5..3a78d0e0e0d5 100644 --- a/libcxx/test/std/containers/sequences/list/list.cons/default_stack_alloc.pass.cpp +++ b/libcxx/test/std/containers/sequences/list/list.cons/default_stack_alloc.pass.cpp @@ -8,7 +8,7 @@ // -// explicit list(const Alloc& = Alloc()); +// explicit list(const Alloc& = Alloc()); // constexpr since C++26 #include #include @@ -16,7 +16,7 @@ #include "test_allocator.h" #include "min_allocator.h" -int main(int, char**) { +TEST_CONSTEXPR_CXX26 bool test() { { std::list l; assert(l.size() == 0); @@ -45,5 +45,14 @@ int main(int, char**) { } #endif + return true; +} + +int main(int, char**) { + assert(test()); +#if TEST_STD_VER >= 26 + static_assert(test()); +#endif + return 0; } diff --git a/libcxx/test/std/containers/sequences/list/list.cons/from_range.pass.cpp b/libcxx/test/std/containers/sequences/list/list.cons/from_range.pass.cpp index cc5ed5729b57..311c72d815d1 100644 --- a/libcxx/test/std/containers/sequences/list/list.cons/from_range.pass.cpp +++ b/libcxx/test/std/containers/sequences/list/list.cons/from_range.pass.cpp @@ -9,14 +9,15 @@ // UNSUPPORTED: c++03, c++11, c++14, c++17, c++20 // template R> -// list(from_range_t, R&& rg, const Allocator& = Allocator()); // C++23 +// list(from_range_t, R&& rg, const Allocator& = Allocator()); // C++23; constexpr since C++26 #include +#include #include "../../from_range_sequence_containers.h" #include "test_macros.h" -int main(int, char**) { +TEST_CONSTEXPR_CXX26 bool test() { for_all_iterators_and_allocators([]() { test_sequence_container([](const auto&) { // No additional validation to do. @@ -26,8 +27,19 @@ int main(int, char**) { static_assert(test_constraints()); - test_exception_safety_throwing_copy(); - test_exception_safety_throwing_allocator(); + if (!TEST_IS_CONSTANT_EVALUATED) { + test_exception_safety_throwing_copy(); + test_exception_safety_throwing_allocator(); + } + + return true; +} + +int main(int, char**) { + assert(test()); +#if TEST_STD_VER >= 26 + static_assert(test()); +#endif return 0; } diff --git a/libcxx/test/std/containers/sequences/list/list.cons/initializer_list.pass.cpp b/libcxx/test/std/containers/sequences/list/list.cons/initializer_list.pass.cpp index 3ba90d1337e9..9e3a71ed3bd1 100644 --- a/libcxx/test/std/containers/sequences/list/list.cons/initializer_list.pass.cpp +++ b/libcxx/test/std/containers/sequences/list/list.cons/initializer_list.pass.cpp @@ -10,7 +10,7 @@ // -// list(initializer_list il); +// list(initializer_list il); // constexpr since C++26 #include #include @@ -18,7 +18,7 @@ #include "test_macros.h" #include "min_allocator.h" -int main(int, char**) { +TEST_CONSTEXPR_CXX26 bool test() { { std::list d = {3, 4, 5, 6}; assert(d.size() == 4); @@ -38,5 +38,14 @@ int main(int, char**) { assert(*i++ == 6); } + return true; +} + +int main(int, char**) { + assert(test()); +#if TEST_STD_VER >= 26 + static_assert(test()); +#endif + return 0; } diff --git a/libcxx/test/std/containers/sequences/list/list.cons/initializer_list_alloc.pass.cpp b/libcxx/test/std/containers/sequences/list/list.cons/initializer_list_alloc.pass.cpp index e4779eb5a640..1b6b1e19c6eb 100644 --- a/libcxx/test/std/containers/sequences/list/list.cons/initializer_list_alloc.pass.cpp +++ b/libcxx/test/std/containers/sequences/list/list.cons/initializer_list_alloc.pass.cpp @@ -10,7 +10,7 @@ // -// list(initializer_list il, const Allocator& a = allocator_type()); +// list(initializer_list il, const Allocator& a = allocator_type()); // constexpr since C++26 #include #include @@ -19,7 +19,7 @@ #include "test_allocator.h" #include "min_allocator.h" -int main(int, char**) { +TEST_CONSTEXPR_CXX26 bool test() { { std::list> d({3, 4, 5, 6}, test_allocator(3)); assert(d.get_allocator() == test_allocator(3)); @@ -41,5 +41,14 @@ int main(int, char**) { assert(*i++ == 6); } + return true; +} + +int main(int, char**) { + assert(test()); +#if TEST_STD_VER >= 26 + static_assert(test()); +#endif + return 0; } diff --git a/libcxx/test/std/containers/sequences/list/list.cons/input_iterator.pass.cpp b/libcxx/test/std/containers/sequences/list/list.cons/input_iterator.pass.cpp index c99069f92f51..d92307283098 100644 --- a/libcxx/test/std/containers/sequences/list/list.cons/input_iterator.pass.cpp +++ b/libcxx/test/std/containers/sequences/list/list.cons/input_iterator.pass.cpp @@ -9,10 +9,11 @@ // // template -// list(InputIterator first, InputIterator last, const Allocator& = Allocator()); +// list(InputIterator first, InputIterator last, const Allocator& = Allocator()); // constexpr since C++26 #include #include + #include "test_macros.h" #include "test_iterators.h" #include "test_allocator.h" @@ -22,7 +23,7 @@ # include "container_test_types.h" #endif -void basic_test() { +TEST_CONSTEXPR_CXX26 void basic_test() { { int a[] = {0, 1, 2, 3}; std::list l( @@ -81,7 +82,7 @@ void basic_test() { #endif } -void test_emplacable_concept() { +TEST_CONSTEXPR_CXX26 void test_emplacable_concept() { #if TEST_STD_VER >= 11 int arr1[] = {42}; int arr2[] = {1, 101, 42}; @@ -126,7 +127,7 @@ void test_emplacable_concept() { #endif } -void test_emplacable_concept_with_alloc() { +TEST_CONSTEXPR_CXX26 void test_emplacable_concept_with_alloc() { #if TEST_STD_VER >= 11 int arr1[] = {42}; int arr2[] = {1, 101, 42}; @@ -239,12 +240,24 @@ void test_ctor_under_alloc_with_alloc() { #endif } -int main(int, char**) { +TEST_CONSTEXPR_CXX26 bool test() { basic_test(); test_emplacable_concept(); test_emplacable_concept_with_alloc(); - test_ctor_under_alloc(); - test_ctor_under_alloc_with_alloc(); + + if (!TEST_IS_CONSTANT_EVALUATED) { + test_ctor_under_alloc(); + test_ctor_under_alloc_with_alloc(); + } + + return true; +} + +int main(int, char**) { + assert(test()); +#if TEST_STD_VER >= 26 + static_assert(test()); +#endif return 0; } diff --git a/libcxx/test/std/containers/sequences/list/list.cons/move.pass.cpp b/libcxx/test/std/containers/sequences/list/list.cons/move.pass.cpp index 6703390f10b9..cae2886cf08b 100644 --- a/libcxx/test/std/containers/sequences/list/list.cons/move.pass.cpp +++ b/libcxx/test/std/containers/sequences/list/list.cons/move.pass.cpp @@ -10,7 +10,7 @@ // -// list(list&& c); +// list(list&& c); // constexpr since C++26 #include #include @@ -19,7 +19,7 @@ #include "test_allocator.h" #include "min_allocator.h" -int main(int, char**) { +TEST_CONSTEXPR_CXX26 bool test() { { std::list > l(test_allocator(5)); std::list > lo(test_allocator(5)); @@ -63,5 +63,14 @@ int main(int, char**) { assert(it == l2.begin()); // Iterators remain valid } + return true; +} + +int main(int, char**) { + assert(test()); +#if TEST_STD_VER >= 26 + static_assert(test()); +#endif + return 0; } diff --git a/libcxx/test/std/containers/sequences/list/list.cons/move_alloc.pass.cpp b/libcxx/test/std/containers/sequences/list/list.cons/move_alloc.pass.cpp index f6a1f2c33a63..dee0282c9978 100644 --- a/libcxx/test/std/containers/sequences/list/list.cons/move_alloc.pass.cpp +++ b/libcxx/test/std/containers/sequences/list/list.cons/move_alloc.pass.cpp @@ -10,7 +10,7 @@ // -// list(list&& c, const allocator_type& a); +// list(list&& c, const allocator_type& a); // constexpr since C++26 #include #include @@ -19,7 +19,7 @@ #include "test_allocator.h" #include "min_allocator.h" -int main(int, char**) { +TEST_CONSTEXPR_CXX26 bool test() { { std::list > l(test_allocator(5)); std::list > lo(test_allocator(5)); @@ -69,5 +69,14 @@ int main(int, char**) { assert(l2.get_allocator() == min_allocator()); } + return true; +} + +int main(int, char**) { + assert(test()); +#if TEST_STD_VER >= 26 + static_assert(test()); +#endif + return 0; } diff --git a/libcxx/test/std/containers/sequences/list/list.cons/op_equal_initializer_list.pass.cpp b/libcxx/test/std/containers/sequences/list/list.cons/op_equal_initializer_list.pass.cpp index a9ab30b82640..d7679931ee71 100644 --- a/libcxx/test/std/containers/sequences/list/list.cons/op_equal_initializer_list.pass.cpp +++ b/libcxx/test/std/containers/sequences/list/list.cons/op_equal_initializer_list.pass.cpp @@ -10,14 +10,14 @@ // -// list& operator=(initializer_list il); +// list& operator=(initializer_list il); // constexpr since C++26 #include #include #include "test_macros.h" #include "min_allocator.h" -int main(int, char**) { +TEST_CONSTEXPR_CXX26 bool test() { { std::list d; d = {3, 4, 5, 6}; @@ -39,5 +39,14 @@ int main(int, char**) { assert(*i++ == 6); } + return true; +} + +int main(int, char**) { + assert(test()); +#if TEST_STD_VER >= 26 + static_assert(test()); +#endif + return 0; } diff --git a/libcxx/test/std/containers/sequences/list/list.cons/size_type.pass.cpp b/libcxx/test/std/containers/sequences/list/list.cons/size_type.pass.cpp index 937a86a27e05..55371e8354a9 100644 --- a/libcxx/test/std/containers/sequences/list/list.cons/size_type.pass.cpp +++ b/libcxx/test/std/containers/sequences/list/list.cons/size_type.pass.cpp @@ -8,18 +8,19 @@ // -// explicit list(size_type n); +// explicit list(size_type n); // constexpr since C++26 #include #include #include + #include "test_macros.h" #include "DefaultOnly.h" #include "test_allocator.h" #include "min_allocator.h" template -void test3(unsigned n, Allocator const& alloc = Allocator()) { +TEST_CONSTEXPR_CXX26 void test1(unsigned n, Allocator const& alloc = Allocator()) { #if TEST_STD_VER > 11 typedef std::list C; { @@ -34,7 +35,7 @@ void test3(unsigned n, Allocator const& alloc = Allocator()) { #endif } -int main(int, char**) { +TEST_CONSTEXPR_CXX26 bool test() { { std::list l(3); assert(l.size() == 3); @@ -70,15 +71,10 @@ int main(int, char**) { assert(*i == 0); ++i; assert(*i == 0); - test3>(3); + test1>(3); } #endif #if TEST_STD_VER >= 11 - { - std::list l(3); - assert(l.size() == 3); - assert(std::distance(l.begin(), l.end()) == 3); - } { std::list> l(3); assert(l.size() == 3); @@ -90,12 +86,29 @@ int main(int, char**) { ++i; assert(*i == 0); } - { - std::list> l(3); - assert(l.size() == 3); - assert(std::distance(l.begin(), l.end()) == 3); + + if (!TEST_IS_CONSTANT_EVALUATED) { + { + std::list l(3); + assert(l.size() == 3); + assert(std::distance(l.begin(), l.end()) == 3); + } + { + std::list> l(3); + assert(l.size() == 3); + assert(std::distance(l.begin(), l.end()) == 3); + } } #endif + return true; +} + +int main(int, char**) { + assert(test()); +#if TEST_STD_VER >= 26 + static_assert(test()); +#endif + return 0; } diff --git a/libcxx/test/std/containers/sequences/list/list.cons/size_value_alloc.pass.cpp b/libcxx/test/std/containers/sequences/list/list.cons/size_value_alloc.pass.cpp index ff7982ce147d..42700c3ed658 100644 --- a/libcxx/test/std/containers/sequences/list/list.cons/size_value_alloc.pass.cpp +++ b/libcxx/test/std/containers/sequences/list/list.cons/size_value_alloc.pass.cpp @@ -8,7 +8,7 @@ // -// list(size_type n, const T& value, const Allocator& = Allocator()); +// list(size_type n, const T& value, const Allocator& = Allocator()); // constexpr since C++26 #include #include @@ -17,7 +17,7 @@ #include "test_allocator.h" #include "min_allocator.h" -int main(int, char**) { +TEST_CONSTEXPR_CXX26 bool test() { { std::list l(3, 2); assert(l.size() == 3); @@ -77,5 +77,14 @@ int main(int, char**) { } #endif + return true; +} + +int main(int, char**) { + assert(test()); +#if TEST_STD_VER >= 26 + static_assert(test()); +#endif + return 0; } diff --git a/libcxx/test/std/containers/sequences/list/list.erasure/erase.pass.cpp b/libcxx/test/std/containers/sequences/list/list.erasure/erase.pass.cpp index 77f9f8956037..babd4b2758e6 100644 --- a/libcxx/test/std/containers/sequences/list/list.erasure/erase.pass.cpp +++ b/libcxx/test/std/containers/sequences/list/list.erasure/erase.pass.cpp @@ -11,7 +11,7 @@ // template // typename list::size_type -// erase(list& c, const U& value); +// erase(list& c, const U& value); // constexpr since C++26 #include #include @@ -21,14 +21,14 @@ #include "min_allocator.h" template -void test0(S s, U val, S expected, std::size_t expected_erased_count) { +TEST_CONSTEXPR_CXX26 void test0(S s, U val, S expected, std::size_t expected_erased_count) { ASSERT_SAME_TYPE(typename S::size_type, decltype(std::erase(s, val))); assert(expected_erased_count == std::erase(s, val)); assert(s == expected); } template -void test() { +TEST_CONSTEXPR_CXX26 void test1() { test0(S(), 1, S(), 0); test0(S({1}), 1, S(), 1); @@ -62,13 +62,22 @@ void test() { test0(S({1, 2, 1}), opt(3), S({1, 2, 1}), 0); } -int main(int, char**) { - test>(); - test>>(); - test>>(); +TEST_CONSTEXPR_CXX26 bool test() { + test1>(); + test1>>(); + test1>>(); + + test1>(); + test1>(); - test>(); - test>(); + return true; +} + +int main(int, char**) { + assert(test()); +#if TEST_STD_VER >= 26 + static_assert(test()); +#endif return 0; } diff --git a/libcxx/test/std/containers/sequences/list/list.erasure/erase_if.pass.cpp b/libcxx/test/std/containers/sequences/list/list.erasure/erase_if.pass.cpp index 5352a2f454f8..e396330bc68c 100644 --- a/libcxx/test/std/containers/sequences/list/list.erasure/erase_if.pass.cpp +++ b/libcxx/test/std/containers/sequences/list/list.erasure/erase_if.pass.cpp @@ -11,7 +11,7 @@ // template // typename list::size_type -// erase_if(list& c, Predicate pred); +// erase_if(list& c, Predicate pred); // constexpr since C++26 #include @@ -20,14 +20,14 @@ #include "min_allocator.h" template -void test0(S s, Pred p, S expected, std::size_t expected_erased_count) { +TEST_CONSTEXPR_CXX26 void test0(S s, Pred p, S expected, std::size_t expected_erased_count) { ASSERT_SAME_TYPE(typename S::size_type, decltype(std::erase_if(s, p))); assert(expected_erased_count == std::erase_if(s, p)); assert(s == expected); } template -void test() { +TEST_CONSTEXPR_CXX26 void test1() { auto is1 = [](auto v) { return v == 1; }; auto is2 = [](auto v) { return v == 2; }; auto is3 = [](auto v) { return v == 3; }; @@ -64,13 +64,22 @@ void test() { test0(S({1, 2, 3}), False, S({1, 2, 3}), 0); } -int main(int, char**) { - test>(); - test>>(); - test>>(); +TEST_CONSTEXPR_CXX26 bool test() { + test1>(); + test1>>(); + test1>>(); + + test1>(); + test1>(); - test>(); - test>(); + return true; +} + +int main(int, char**) { + assert(test()); +#if TEST_STD_VER >= 26 + static_assert(test()); +#endif return 0; } diff --git a/libcxx/test/std/containers/sequences/list/list.modifiers/append_range.pass.cpp b/libcxx/test/std/containers/sequences/list/list.modifiers/append_range.pass.cpp index 46a99cb54844..4b47a8738e52 100644 --- a/libcxx/test/std/containers/sequences/list/list.modifiers/append_range.pass.cpp +++ b/libcxx/test/std/containers/sequences/list/list.modifiers/append_range.pass.cpp @@ -9,9 +9,10 @@ // UNSUPPORTED: c++03, c++11, c++14, c++17, c++20 // template R> -// constexpr void append_range(R&& rg); // C++23 +// constexpr void append_range(R&& rg); // C++23; constexpr since C++26 #include +#include #include "../../insert_range_sequence_containers.h" #include "test_macros.h" @@ -21,7 +22,7 @@ // {empty/one-element/full} container); // - appending move-only elements; // - an exception is thrown when copying the elements or when allocating new elements. -int main(int, char**) { +TEST_CONSTEXPR_CXX26 bool test() { static_assert(test_constraints_append_range()); for_all_iterators_and_allocators([]() { @@ -31,8 +32,19 @@ int main(int, char**) { }); test_sequence_append_range_move_only(); - test_append_range_exception_safety_throwing_copy(); - test_append_range_exception_safety_throwing_allocator(); + if (!TEST_IS_CONSTANT_EVALUATED) { + test_append_range_exception_safety_throwing_copy(); + test_append_range_exception_safety_throwing_allocator(); + } + + return true; +} + +int main(int, char**) { + assert(test()); +#if TEST_STD_VER >= 26 + static_assert(test()); +#endif return 0; } diff --git a/libcxx/test/std/containers/sequences/list/list.modifiers/assign_range.pass.cpp b/libcxx/test/std/containers/sequences/list/list.modifiers/assign_range.pass.cpp index d745786b6815..83a12879a041 100644 --- a/libcxx/test/std/containers/sequences/list/list.modifiers/assign_range.pass.cpp +++ b/libcxx/test/std/containers/sequences/list/list.modifiers/assign_range.pass.cpp @@ -9,9 +9,10 @@ // UNSUPPORTED: c++03, c++11, c++14, c++17, c++20 // template R> -// constexpr void assign_range(R&& rg); // C++23 +// constexpr void assign_range(R&& rg); // C++23; constexpr since C++26 #include +#include #include "../../insert_range_sequence_containers.h" #include "test_macros.h" @@ -21,7 +22,7 @@ // {empty/one-element/full} container); // - assigning move-only elements; // - an exception is thrown when copying the elements or when allocating new elements. -int main(int, char**) { +TEST_CONSTEXPR_CXX26 bool test() { static_assert(test_constraints_assign_range()); for_all_iterators_and_allocators([]() { @@ -31,8 +32,19 @@ int main(int, char**) { }); test_sequence_assign_range_move_only(); - test_assign_range_exception_safety_throwing_copy(); - test_assign_range_exception_safety_throwing_allocator(); + if (!TEST_IS_CONSTANT_EVALUATED) { + test_assign_range_exception_safety_throwing_copy(); + test_assign_range_exception_safety_throwing_allocator(); + } + + return true; +} + +int main(int, char**) { + assert(test()); +#if TEST_STD_VER >= 26 + static_assert(test()); +#endif return 0; } diff --git a/libcxx/test/std/containers/sequences/list/list.modifiers/clear.pass.cpp b/libcxx/test/std/containers/sequences/list/list.modifiers/clear.pass.cpp index 5931fd62d037..0b38ae05bd68 100644 --- a/libcxx/test/std/containers/sequences/list/list.modifiers/clear.pass.cpp +++ b/libcxx/test/std/containers/sequences/list/list.modifiers/clear.pass.cpp @@ -8,7 +8,7 @@ // -// void clear() noexcept; +// void clear() noexcept; // constexpr since C++26 #include #include @@ -16,7 +16,7 @@ #include "test_macros.h" #include "min_allocator.h" -int main(int, char**) { +TEST_CONSTEXPR_CXX26 bool test() { { int a[] = {1, 2, 3}; std::list c(a, a + 3); @@ -34,5 +34,14 @@ int main(int, char**) { } #endif + return true; +} + +int main(int, char**) { + assert(test()); +#if TEST_STD_VER >= 26 + static_assert(test()); +#endif + return 0; } diff --git a/libcxx/test/std/containers/sequences/list/list.modifiers/emplace.pass.cpp b/libcxx/test/std/containers/sequences/list/list.modifiers/emplace.pass.cpp index 2f83aa0d317b..9bd7a151d20e 100644 --- a/libcxx/test/std/containers/sequences/list/list.modifiers/emplace.pass.cpp +++ b/libcxx/test/std/containers/sequences/list/list.modifiers/emplace.pass.cpp @@ -10,7 +10,7 @@ // -// template void emplace(const_iterator p, Args&&... args); +// template void emplace(const_iterator p, Args&&... args); // constexpr since C++26 #include #include @@ -26,13 +26,13 @@ class A { A& operator=(const A&); public: - A(int i, double d) : i_(i), d_(d) {} + TEST_CONSTEXPR_CXX20 A(int i, double d) : i_(i), d_(d) {} - int geti() const { return i_; } - double getd() const { return d_; } + TEST_CONSTEXPR int geti() const { return i_; } + TEST_CONSTEXPR double getd() const { return d_; } }; -int main(int, char**) { +TEST_CONSTEXPR_CXX26 bool test() { { std::list c; c.emplace(c.cbegin(), 2, 3.5); @@ -60,5 +60,14 @@ int main(int, char**) { assert(c.back().getd() == 4.5); } + return true; +} + +int main(int, char**) { + assert(test()); +#if TEST_STD_VER >= 26 + static_assert(test()); +#endif + return 0; } diff --git a/libcxx/test/std/containers/sequences/list/list.modifiers/emplace_back.pass.cpp b/libcxx/test/std/containers/sequences/list/list.modifiers/emplace_back.pass.cpp index 900f8b83d3e6..5f84c4c7c05a 100644 --- a/libcxx/test/std/containers/sequences/list/list.modifiers/emplace_back.pass.cpp +++ b/libcxx/test/std/containers/sequences/list/list.modifiers/emplace_back.pass.cpp @@ -10,7 +10,7 @@ // -// template reference emplace_back(Args&&... args); +// template reference emplace_back(Args&&... args); // constexpr since C++26 // return type is 'reference' in C++17; 'void' before #include @@ -27,13 +27,13 @@ class A { A& operator=(const A&); public: - A(int i, double d) : i_(i), d_(d) {} + TEST_CONSTEXPR_CXX20 A(int i, double d) : i_(i), d_(d) {} - int geti() const { return i_; } - double getd() const { return d_; } + TEST_CONSTEXPR int geti() const { return i_; } + TEST_CONSTEXPR double getd() const { return d_; } }; -int main(int, char**) { +TEST_CONSTEXPR_CXX26 bool test() { { std::list c; #if TEST_STD_VER > 14 @@ -83,5 +83,14 @@ int main(int, char**) { assert(c.back().getd() == 4.5); } + return true; +} + +int main(int, char**) { + assert(test()); +#if TEST_STD_VER >= 26 + static_assert(test()); +#endif + return 0; } diff --git a/libcxx/test/std/containers/sequences/list/list.modifiers/emplace_front.pass.cpp b/libcxx/test/std/containers/sequences/list/list.modifiers/emplace_front.pass.cpp index 665f5077bd42..95474b52dbd0 100644 --- a/libcxx/test/std/containers/sequences/list/list.modifiers/emplace_front.pass.cpp +++ b/libcxx/test/std/containers/sequences/list/list.modifiers/emplace_front.pass.cpp @@ -10,7 +10,7 @@ // -// template reference emplace_front(Args&&... args); +// template reference emplace_front(Args&&... args); // constexpr since C++26 // return type is 'reference' in C++17; 'void' before #include @@ -27,13 +27,13 @@ class A { A& operator=(const A&); public: - A(int i, double d) : i_(i), d_(d) {} + TEST_CONSTEXPR_CXX20 A(int i, double d) : i_(i), d_(d) {} - int geti() const { return i_; } - double getd() const { return d_; } + TEST_CONSTEXPR int geti() const { return i_; } + TEST_CONSTEXPR double getd() const { return d_; } }; -int main(int, char**) { +TEST_CONSTEXPR_CXX26 bool test() { { std::list c; #if TEST_STD_VER > 14 @@ -84,5 +84,14 @@ int main(int, char**) { assert(c.back().getd() == 3.5); } + return true; +} + +int main(int, char**) { + assert(test()); +#if TEST_STD_VER >= 26 + static_assert(test()); +#endif + return 0; } diff --git a/libcxx/test/std/containers/sequences/list/list.modifiers/erase_iter.pass.cpp b/libcxx/test/std/containers/sequences/list/list.modifiers/erase_iter.pass.cpp index ba139b4367d7..79dae11a8263 100644 --- a/libcxx/test/std/containers/sequences/list/list.modifiers/erase_iter.pass.cpp +++ b/libcxx/test/std/containers/sequences/list/list.modifiers/erase_iter.pass.cpp @@ -8,7 +8,7 @@ // -// iterator erase(const_iterator position); +// iterator erase(const_iterator position); // constexpr since C++26 #include #include @@ -16,7 +16,7 @@ #include "test_macros.h" #include "min_allocator.h" -int main(int, char**) { +TEST_CONSTEXPR_CXX26 bool test() { { int a1[] = {1, 2, 3}; std::list l1(a1, a1 + 3); @@ -62,5 +62,14 @@ int main(int, char**) { } #endif + return true; +} + +int main(int, char**) { + assert(test()); +#if TEST_STD_VER >= 26 + static_assert(test()); +#endif + return 0; } diff --git a/libcxx/test/std/containers/sequences/list/list.modifiers/erase_iter_iter.pass.cpp b/libcxx/test/std/containers/sequences/list/list.modifiers/erase_iter_iter.pass.cpp index cc8d537032d0..fa6f8139ff75 100644 --- a/libcxx/test/std/containers/sequences/list/list.modifiers/erase_iter_iter.pass.cpp +++ b/libcxx/test/std/containers/sequences/list/list.modifiers/erase_iter_iter.pass.cpp @@ -8,7 +8,7 @@ // -// iterator erase(const_iterator first, const_iterator last); +// iterator erase(const_iterator first, const_iterator last); // constexpr since C++26 #include #include @@ -16,7 +16,7 @@ #include "test_macros.h" #include "min_allocator.h" -int main(int, char**) { +TEST_CONSTEXPR_CXX26 bool test() { int a1[] = {1, 2, 3}; { std::list l1(a1, a1 + 3); @@ -81,5 +81,14 @@ int main(int, char**) { } #endif + return true; +} + +int main(int, char**) { + assert(test()); +#if TEST_STD_VER >= 26 + static_assert(test()); +#endif + return 0; } diff --git a/libcxx/test/std/containers/sequences/list/list.modifiers/insert_iter_initializer_list.pass.cpp b/libcxx/test/std/containers/sequences/list/list.modifiers/insert_iter_initializer_list.pass.cpp index 8bd01c940d95..4475d27a7e73 100644 --- a/libcxx/test/std/containers/sequences/list/list.modifiers/insert_iter_initializer_list.pass.cpp +++ b/libcxx/test/std/containers/sequences/list/list.modifiers/insert_iter_initializer_list.pass.cpp @@ -10,7 +10,7 @@ // -// iterator insert(const_iterator p, initializer_list il); +// iterator insert(const_iterator p, initializer_list il); // constexpr since C++26 #include #include @@ -18,7 +18,7 @@ #include "test_macros.h" #include "min_allocator.h" -int main(int, char**) { +TEST_CONSTEXPR_CXX26 bool test() { { std::list d(10, 1); std::list::iterator i = d.insert(std::next(d.cbegin(), 2), {3, 4, 5, 6}); @@ -62,5 +62,14 @@ int main(int, char**) { assert(*i++ == 1); } + return true; +} + +int main(int, char**) { + assert(test()); +#if TEST_STD_VER >= 26 + static_assert(test()); +#endif + return 0; } diff --git a/libcxx/test/std/containers/sequences/list/list.modifiers/insert_iter_iter_iter.pass.cpp b/libcxx/test/std/containers/sequences/list/list.modifiers/insert_iter_iter_iter.pass.cpp index bab125ca6209..27db218511aa 100644 --- a/libcxx/test/std/containers/sequences/list/list.modifiers/insert_iter_iter_iter.pass.cpp +++ b/libcxx/test/std/containers/sequences/list/list.modifiers/insert_iter_iter_iter.pass.cpp @@ -9,7 +9,7 @@ // // template -// iterator insert(const_iterator position, Iter first, Iter last); +// iterator insert(const_iterator position, Iter first, Iter last); // constexpr since C++26 #include #include @@ -21,7 +21,7 @@ #include "count_new.h" template -void test() { +TEST_CONSTEXPR_CXX26 void test() { int a1[] = {1, 2, 3}; List l1; typename List::iterator i = l1.insert(l1.begin(), a1, a1 + 3); @@ -53,36 +53,47 @@ void test() { assert(*i == 3); #if !defined(TEST_HAS_NO_EXCEPTIONS) && !defined(DISABLE_NEW_COUNT) - globalMemCounter.throw_after = 2; - int save_count = globalMemCounter.outstanding_new; - try { - i = l1.insert(i, a2, a2 + 3); - assert(false); - } catch (...) { + if (!TEST_IS_CONSTANT_EVALUATED) { + globalMemCounter.throw_after = 2; + int save_count = globalMemCounter.outstanding_new; + try { + i = l1.insert(i, a2, a2 + 3); + assert(false); + } catch (...) { + } + assert(globalMemCounter.checkOutstandingNewEq(save_count)); + assert(l1.size() == 6); + assert(std::distance(l1.begin(), l1.end()) == 6); + i = l1.begin(); + assert(*i == 1); + ++i; + assert(*i == 2); + ++i; + assert(*i == 4); + ++i; + assert(*i == 5); + ++i; + assert(*i == 6); + ++i; + assert(*i == 3); } - assert(globalMemCounter.checkOutstandingNewEq(save_count)); - assert(l1.size() == 6); - assert(std::distance(l1.begin(), l1.end()) == 6); - i = l1.begin(); - assert(*i == 1); - ++i; - assert(*i == 2); - ++i; - assert(*i == 4); - ++i; - assert(*i == 5); - ++i; - assert(*i == 6); - ++i; - assert(*i == 3); #endif } -int main(int, char**) { +TEST_CONSTEXPR_CXX26 bool test() { test >(); #if TEST_STD_VER >= 11 test>>(); #endif + return true; +} + +int main(int, char**) { + assert(test()); +#if TEST_STD_VER >= 26 + static_assert(test()); +#endif + return 0; } diff --git a/libcxx/test/std/containers/sequences/list/list.modifiers/insert_iter_rvalue.pass.cpp b/libcxx/test/std/containers/sequences/list/list.modifiers/insert_iter_rvalue.pass.cpp index 8bb513208eb7..7d7b2f158a60 100644 --- a/libcxx/test/std/containers/sequences/list/list.modifiers/insert_iter_rvalue.pass.cpp +++ b/libcxx/test/std/containers/sequences/list/list.modifiers/insert_iter_rvalue.pass.cpp @@ -10,7 +10,7 @@ // -// iterator insert(const_iterator position, value_type&& x); +// iterator insert(const_iterator position, value_type&& x); // constexpr since C++26 #include #include @@ -19,7 +19,7 @@ #include "MoveOnly.h" #include "min_allocator.h" -int main(int, char**) { +TEST_CONSTEXPR_CXX26 bool test() { { std::list l1; l1.insert(l1.cend(), MoveOnly(1)); @@ -41,5 +41,14 @@ int main(int, char**) { assert(l1.back() == MoveOnly(1)); } + return true; +} + +int main(int, char**) { + assert(test()); +#if TEST_STD_VER >= 26 + static_assert(test()); +#endif + return 0; } diff --git a/libcxx/test/std/containers/sequences/list/list.modifiers/insert_iter_size_value.pass.cpp b/libcxx/test/std/containers/sequences/list/list.modifiers/insert_iter_size_value.pass.cpp index 32ee7a73406d..1056d997f9d8 100644 --- a/libcxx/test/std/containers/sequences/list/list.modifiers/insert_iter_size_value.pass.cpp +++ b/libcxx/test/std/containers/sequences/list/list.modifiers/insert_iter_size_value.pass.cpp @@ -8,7 +8,7 @@ // -// iterator insert(const_iterator position, size_type n, const value_type& x); +// iterator insert(const_iterator position, size_type n, const value_type& x); // constexpr since C++26 // UNSUPPORTED: sanitizer-new-delete @@ -21,7 +21,7 @@ #include "test_macros.h" template -void test() { +TEST_CONSTEXPR_CXX26 void test() { int a1[] = {1, 2, 3}; int a2[] = {1, 4, 4, 4, 4, 4, 2, 3}; List l1(a1, a1 + 3); @@ -29,23 +29,34 @@ void test() { assert(i == std::next(l1.begin())); assert(l1 == List(a2, a2 + 8)); #ifndef TEST_HAS_NO_EXCEPTIONS - globalMemCounter.throw_after = 4; - int save_count = globalMemCounter.outstanding_new; - try { - i = l1.insert(i, 5, 5); - assert(false); - } catch (...) { + if (!TEST_IS_CONSTANT_EVALUATED) { + globalMemCounter.throw_after = 4; + int save_count = globalMemCounter.outstanding_new; + try { + i = l1.insert(i, 5, 5); + assert(false); + } catch (...) { + } + assert(globalMemCounter.checkOutstandingNewEq(save_count)); + assert(l1 == List(a2, a2 + 8)); } - assert(globalMemCounter.checkOutstandingNewEq(save_count)); - assert(l1 == List(a2, a2 + 8)); #endif } -int main(int, char**) { +TEST_CONSTEXPR_CXX26 bool test() { test >(); #if TEST_STD_VER >= 11 test>>(); #endif + return true; +} + +int main(int, char**) { + assert(test()); +#if TEST_STD_VER >= 26 + static_assert(test()); +#endif + return 0; } diff --git a/libcxx/test/std/containers/sequences/list/list.modifiers/insert_iter_value.pass.cpp b/libcxx/test/std/containers/sequences/list/list.modifiers/insert_iter_value.pass.cpp index 129fe05cb39d..615bb5bb2b42 100644 --- a/libcxx/test/std/containers/sequences/list/list.modifiers/insert_iter_value.pass.cpp +++ b/libcxx/test/std/containers/sequences/list/list.modifiers/insert_iter_value.pass.cpp @@ -8,7 +8,7 @@ // -// iterator insert(const_iterator position, const value_type& x); +// iterator insert(const_iterator position, const value_type& x); // constexpr since C++26 #include #include @@ -19,7 +19,7 @@ #include "count_new.h" template -void test() { +TEST_CONSTEXPR_CXX26 void test() { int a1[] = {1, 2, 3}; int a2[] = {1, 4, 2, 3}; List l1(a1, a1 + 3); @@ -30,23 +30,34 @@ void test() { assert(l1 == List(a2, a2 + 4)); #if !defined(TEST_HAS_NO_EXCEPTIONS) && !defined(DISABLE_NEW_COUNT) - globalMemCounter.throw_after = 0; - int save_count = globalMemCounter.outstanding_new; - try { - i = l1.insert(i, 5); - assert(false); - } catch (...) { + if (!TEST_IS_CONSTANT_EVALUATED) { + globalMemCounter.throw_after = 0; + int save_count = globalMemCounter.outstanding_new; + try { + i = l1.insert(i, 5); + assert(false); + } catch (...) { + } + assert(globalMemCounter.checkOutstandingNewEq(save_count)); + assert(l1 == List(a2, a2 + 4)); } - assert(globalMemCounter.checkOutstandingNewEq(save_count)); - assert(l1 == List(a2, a2 + 4)); #endif } -int main(int, char**) { +TEST_CONSTEXPR_CXX26 bool test() { test >(); #if TEST_STD_VER >= 11 test>>(); #endif + return true; +} + +int main(int, char**) { + assert(test()); +#if TEST_STD_VER >= 26 + static_assert(test()); +#endif + return 0; } diff --git a/libcxx/test/std/containers/sequences/list/list.modifiers/insert_range.pass.cpp b/libcxx/test/std/containers/sequences/list/list.modifiers/insert_range.pass.cpp index eb3937eb8f9e..5908d40d0cc9 100644 --- a/libcxx/test/std/containers/sequences/list/list.modifiers/insert_range.pass.cpp +++ b/libcxx/test/std/containers/sequences/list/list.modifiers/insert_range.pass.cpp @@ -6,12 +6,16 @@ // //===----------------------------------------------------------------------===// +// ADDITIONAL_COMPILE_FLAGS(has-fconstexpr-steps): -fconstexpr-steps=20000000 +// ADDITIONAL_COMPILE_FLAGS(has-fconstexpr-ops-limit): -fconstexpr-ops-limit=80000000 + // UNSUPPORTED: c++03, c++11, c++14, c++17, c++20 // template R> -// constexpr iterator insert_range(const_iterator position, R&& rg); // C++23 +// constexpr iterator insert_range(const_iterator position, R&& rg); // C++23; constexpr since C++26 #include +#include #include "../../insert_range_sequence_containers.h" #include "test_macros.h" @@ -21,7 +25,7 @@ // {empty/one-element/full} container at the {beginning/middle/end}); // - inserting move-only elements; // - an exception is thrown when copying the elements or when allocating new elements. -int main(int, char**) { +TEST_CONSTEXPR_CXX26 bool test() { static_assert(test_constraints_insert_range()); for_all_iterators_and_allocators([]() { @@ -31,8 +35,19 @@ int main(int, char**) { }); test_sequence_insert_range_move_only(); - test_insert_range_exception_safety_throwing_copy(); - test_insert_range_exception_safety_throwing_allocator(); + if (!TEST_IS_CONSTANT_EVALUATED) { + test_insert_range_exception_safety_throwing_copy(); + test_insert_range_exception_safety_throwing_allocator(); + } + + return true; +} + +int main(int, char**) { + assert(test()); +#if TEST_STD_VER >= 26 + static_assert(test()); +#endif return 0; } diff --git a/libcxx/test/std/containers/sequences/list/list.modifiers/pop_back.pass.cpp b/libcxx/test/std/containers/sequences/list/list.modifiers/pop_back.pass.cpp index aaa225b14776..5bbac428d8d5 100644 --- a/libcxx/test/std/containers/sequences/list/list.modifiers/pop_back.pass.cpp +++ b/libcxx/test/std/containers/sequences/list/list.modifiers/pop_back.pass.cpp @@ -8,7 +8,7 @@ // -// void pop_back(); +// void pop_back(); // constexpr since C++26 #include #include @@ -16,7 +16,7 @@ #include "test_macros.h" #include "min_allocator.h" -int main(int, char**) { +TEST_CONSTEXPR_CXX26 bool test() { { int a[] = {1, 2, 3}; std::list c(a, a + 3); @@ -40,5 +40,14 @@ int main(int, char**) { } #endif + return true; +} + +int main(int, char**) { + assert(test()); +#if TEST_STD_VER >= 26 + static_assert(test()); +#endif + return 0; } diff --git a/libcxx/test/std/containers/sequences/list/list.modifiers/pop_front.pass.cpp b/libcxx/test/std/containers/sequences/list/list.modifiers/pop_front.pass.cpp index 33b8ff35c524..74b6a1cc319b 100644 --- a/libcxx/test/std/containers/sequences/list/list.modifiers/pop_front.pass.cpp +++ b/libcxx/test/std/containers/sequences/list/list.modifiers/pop_front.pass.cpp @@ -8,7 +8,7 @@ // -// void pop_front(); +// void pop_front(); // constexpr since C++26 #include #include @@ -16,7 +16,7 @@ #include "test_macros.h" #include "min_allocator.h" -int main(int, char**) { +TEST_CONSTEXPR_CXX26 bool test() { { int a[] = {1, 2, 3}; std::list c(a, a + 3); @@ -40,5 +40,14 @@ int main(int, char**) { } #endif + return true; +} + +int main(int, char**) { + assert(test()); +#if TEST_STD_VER >= 26 + static_assert(test()); +#endif + return 0; } diff --git a/libcxx/test/std/containers/sequences/list/list.modifiers/prepend_range.pass.cpp b/libcxx/test/std/containers/sequences/list/list.modifiers/prepend_range.pass.cpp index d5e4d4fabb76..41f7061c09d2 100644 --- a/libcxx/test/std/containers/sequences/list/list.modifiers/prepend_range.pass.cpp +++ b/libcxx/test/std/containers/sequences/list/list.modifiers/prepend_range.pass.cpp @@ -9,9 +9,10 @@ // UNSUPPORTED: c++03, c++11, c++14, c++17, c++20 // template R> -// constexpr void prepend_range(R&& rg); // C++23 +// constexpr void prepend_range(R&& rg); // C++23; constexpr since C++26 #include +#include #include "../../insert_range_sequence_containers.h" #include "test_macros.h" @@ -21,7 +22,7 @@ // {empty/one-element/full} container); // - prepending move-only elements; // - an exception is thrown when copying the elements or when allocating new elements. -int main(int, char**) { +TEST_CONSTEXPR_CXX26 bool test() { static_assert(test_constraints_prepend_range()); for_all_iterators_and_allocators([]() { @@ -31,8 +32,19 @@ int main(int, char**) { }); test_sequence_prepend_range_move_only(); - test_prepend_range_exception_safety_throwing_copy(); - test_prepend_range_exception_safety_throwing_allocator(); + if (!TEST_IS_CONSTANT_EVALUATED) { + test_prepend_range_exception_safety_throwing_copy(); + test_prepend_range_exception_safety_throwing_allocator(); + } + + return true; +} + +int main(int, char**) { + assert(test()); +#if TEST_STD_VER >= 26 + static_assert(test()); +#endif return 0; } diff --git a/libcxx/test/std/containers/sequences/list/list.modifiers/push_back.pass.cpp b/libcxx/test/std/containers/sequences/list/list.modifiers/push_back.pass.cpp index 582f4a200ac2..3ac9a60e7901 100644 --- a/libcxx/test/std/containers/sequences/list/list.modifiers/push_back.pass.cpp +++ b/libcxx/test/std/containers/sequences/list/list.modifiers/push_back.pass.cpp @@ -8,7 +8,7 @@ // -// void push_back(const value_type& x); +// void push_back(const value_type& x); // constexpr since C++26 #include #include @@ -16,7 +16,7 @@ #include "test_macros.h" #include "min_allocator.h" -int main(int, char**) { +TEST_CONSTEXPR_CXX26 bool test() { { std::list c; for (int i = 0; i < 5; ++i) @@ -34,5 +34,14 @@ int main(int, char**) { } #endif + return true; +} + +int main(int, char**) { + assert(test()); +#if TEST_STD_VER >= 26 + static_assert(test()); +#endif + return 0; } diff --git a/libcxx/test/std/containers/sequences/list/list.modifiers/push_back_rvalue.pass.cpp b/libcxx/test/std/containers/sequences/list/list.modifiers/push_back_rvalue.pass.cpp index 6a31d81d694f..764dd7da1832 100644 --- a/libcxx/test/std/containers/sequences/list/list.modifiers/push_back_rvalue.pass.cpp +++ b/libcxx/test/std/containers/sequences/list/list.modifiers/push_back_rvalue.pass.cpp @@ -10,7 +10,7 @@ // -// void push_back(value_type&& x); +// void push_back(value_type&& x); // constexpr since C++26 #include #include @@ -19,7 +19,7 @@ #include "MoveOnly.h" #include "min_allocator.h" -int main(int, char**) { +TEST_CONSTEXPR_CXX26 bool test() { { std::list l1; l1.push_back(MoveOnly(1)); @@ -41,5 +41,14 @@ int main(int, char**) { assert(l1.back() == MoveOnly(2)); } + return true; +} + +int main(int, char**) { + assert(test()); +#if TEST_STD_VER >= 26 + static_assert(test()); +#endif + return 0; } diff --git a/libcxx/test/std/containers/sequences/list/list.modifiers/push_front.pass.cpp b/libcxx/test/std/containers/sequences/list/list.modifiers/push_front.pass.cpp index 3b5f74a217a2..7ec18e841822 100644 --- a/libcxx/test/std/containers/sequences/list/list.modifiers/push_front.pass.cpp +++ b/libcxx/test/std/containers/sequences/list/list.modifiers/push_front.pass.cpp @@ -8,7 +8,7 @@ // -// void push_front(const value_type& x); +// void push_front(const value_type& x); // constexpr since C++26 #include #include @@ -16,7 +16,7 @@ #include "test_macros.h" #include "min_allocator.h" -int main(int, char**) { +TEST_CONSTEXPR_CXX26 bool test() { { std::list c; for (int i = 0; i < 5; ++i) @@ -34,5 +34,14 @@ int main(int, char**) { } #endif + return true; +} + +int main(int, char**) { + assert(test()); +#if TEST_STD_VER >= 26 + static_assert(test()); +#endif + return 0; } diff --git a/libcxx/test/std/containers/sequences/list/list.modifiers/push_front_rvalue.pass.cpp b/libcxx/test/std/containers/sequences/list/list.modifiers/push_front_rvalue.pass.cpp index 0d41b8fd8553..930b6af5f243 100644 --- a/libcxx/test/std/containers/sequences/list/list.modifiers/push_front_rvalue.pass.cpp +++ b/libcxx/test/std/containers/sequences/list/list.modifiers/push_front_rvalue.pass.cpp @@ -10,7 +10,7 @@ // -// void push_front(value_type&& x); +// void push_front(value_type&& x); // constexpr since C++26 #include #include @@ -19,7 +19,7 @@ #include "MoveOnly.h" #include "min_allocator.h" -int main(int, char**) { +TEST_CONSTEXPR_CXX26 bool test() { { std::list l1; l1.push_front(MoveOnly(1)); @@ -41,5 +41,14 @@ int main(int, char**) { assert(l1.back() == MoveOnly(1)); } + return true; +} + +int main(int, char**) { + assert(test()); +#if TEST_STD_VER >= 26 + static_assert(test()); +#endif + return 0; } diff --git a/libcxx/test/std/containers/sequences/list/list.ops/merge.pass.cpp b/libcxx/test/std/containers/sequences/list/list.ops/merge.pass.cpp index 7f82f65fd493..19ea940cb2a0 100644 --- a/libcxx/test/std/containers/sequences/list/list.ops/merge.pass.cpp +++ b/libcxx/test/std/containers/sequences/list/list.ops/merge.pass.cpp @@ -8,7 +8,7 @@ // -// void merge(list& x); +// void merge(list& x); // constexpr since C++26 // If (addressof(x) == this) does nothing; otherwise ... #include @@ -17,7 +17,7 @@ #include "test_macros.h" #include "min_allocator.h" -int main(int, char**) { +TEST_CONSTEXPR_CXX26 bool test() { { int a1[] = {1, 3, 7, 9, 10}; int a2[] = {0, 2, 4, 5, 6, 8, 11}; @@ -49,5 +49,14 @@ int main(int, char**) { } #endif + return true; +} + +int main(int, char**) { + assert(test()); +#if TEST_STD_VER >= 26 + static_assert(test()); +#endif + return 0; } diff --git a/libcxx/test/std/containers/sequences/list/list.ops/merge_comp.pass.cpp b/libcxx/test/std/containers/sequences/list/list.ops/merge_comp.pass.cpp index 13241909c6e5..974700926db6 100644 --- a/libcxx/test/std/containers/sequences/list/list.ops/merge_comp.pass.cpp +++ b/libcxx/test/std/containers/sequences/list/list.ops/merge_comp.pass.cpp @@ -8,7 +8,7 @@ // -// template void merge(list& x, Compare comp); +// template void merge(list& x, Compare comp); // constexpr since C++26 // If (addressof(x) == this) does nothing; otherwise ... #include @@ -18,7 +18,7 @@ #include "test_macros.h" #include "min_allocator.h" -int main(int, char**) { +TEST_CONSTEXPR_CXX26 bool test() { { int a1[] = {10, 9, 7, 3, 1}; int a2[] = {11, 8, 6, 5, 4, 2, 0}; @@ -49,5 +49,14 @@ int main(int, char**) { } #endif + return true; +} + +int main(int, char**) { + assert(test()); +#if TEST_STD_VER >= 26 + static_assert(test()); +#endif + return 0; } diff --git a/libcxx/test/std/containers/sequences/list/list.ops/remove.pass.cpp b/libcxx/test/std/containers/sequences/list/list.ops/remove.pass.cpp index 238ea9b69ea2..9bf677b8745c 100644 --- a/libcxx/test/std/containers/sequences/list/list.ops/remove.pass.cpp +++ b/libcxx/test/std/containers/sequences/list/list.ops/remove.pass.cpp @@ -9,7 +9,7 @@ // // void remove(const value_type& value); // pre-c++20 -// size_type remove(const value_type& value); // c++20 and later +// size_type remove(const value_type& value); // c++20 and later; constexpr since C++26 #include #include @@ -18,22 +18,22 @@ #include "min_allocator.h" struct S { - S(int i) : i_(new int(i)) {} - S(const S& rhs) : i_(new int(*rhs.i_)) {} - S& operator=(const S& rhs) { + TEST_CONSTEXPR_CXX20 S(int i) : i_(new int(i)) {} + TEST_CONSTEXPR_CXX20 S(const S& rhs) : i_(new int(*rhs.i_)) {} + TEST_CONSTEXPR_CXX14 S& operator=(const S& rhs) { *i_ = *rhs.i_; return *this; } - ~S() { + TEST_CONSTEXPR_CXX20 ~S() { delete i_; i_ = NULL; } - bool operator==(const S& rhs) const { return *i_ == *rhs.i_; } - int get() const { return *i_; } + TEST_CONSTEXPR bool operator==(const S& rhs) const { return *i_ == *rhs.i_; } + TEST_CONSTEXPR int get() const { return *i_; } int* i_; }; -int main(int, char**) { +TEST_CONSTEXPR_CXX26 bool test() { { int a1[] = {1, 2, 3, 4}; int a2[] = {1, 2, 4}; @@ -101,5 +101,14 @@ int main(int, char**) { } #endif + return true; +} + +int main(int, char**) { + assert(test()); +#if TEST_STD_VER >= 26 + static_assert(test()); +#endif + return 0; } diff --git a/libcxx/test/std/containers/sequences/list/list.ops/remove_if.pass.cpp b/libcxx/test/std/containers/sequences/list/list.ops/remove_if.pass.cpp index 510cb361142b..c7ee09530ed9 100644 --- a/libcxx/test/std/containers/sequences/list/list.ops/remove_if.pass.cpp +++ b/libcxx/test/std/containers/sequences/list/list.ops/remove_if.pass.cpp @@ -9,7 +9,7 @@ // // template void remove_if(Pred pred); // before C++20 -// template size_type remove_if(Pred pred); // c++20 and later +// template size_type remove_if(Pred pred); // c++20 and later; constexpr since C++26 #include #include @@ -19,22 +19,22 @@ #include "min_allocator.h" #include "counting_predicates.h" -bool even(int i) { return i % 2 == 0; } +TEST_CONSTEXPR bool even(int i) { return i % 2 == 0; } -bool g(int i) { return i < 3; } +TEST_CONSTEXPR bool g(int i) { return i < 3; } struct PredLWG526 { - PredLWG526(int i) : i_(i) {} - ~PredLWG526() { i_ = -32767; } - bool operator()(const PredLWG526& p) const { return p.i_ == i_; } + TEST_CONSTEXPR_CXX20 PredLWG526(int i) : i_(i) {} + TEST_CONSTEXPR_CXX20 ~PredLWG526() { i_ = -32767; } + TEST_CONSTEXPR bool operator()(const PredLWG526& p) const { return p.i_ == i_; } - bool operator==(int i) const { return i == i_; } + TEST_CONSTEXPR bool operator==(int i) const { return i == i_; } int i_; }; typedef unary_counting_predicate Predicate; -int main(int, char**) { +TEST_CONSTEXPR_CXX26 bool test() { { int a1[] = {1, 2, 3, 4}; int a2[] = {3, 4}; @@ -92,5 +92,14 @@ int main(int, char**) { } #endif + return true; +} + +int main(int, char**) { + assert(test()); +#if TEST_STD_VER >= 26 + static_assert(test()); +#endif + return 0; } diff --git a/libcxx/test/std/containers/sequences/list/list.ops/reverse.pass.cpp b/libcxx/test/std/containers/sequences/list/list.ops/reverse.pass.cpp index 5b91ad0224be..43e894f5e83a 100644 --- a/libcxx/test/std/containers/sequences/list/list.ops/reverse.pass.cpp +++ b/libcxx/test/std/containers/sequences/list/list.ops/reverse.pass.cpp @@ -8,7 +8,7 @@ // -// void reverse(); +// void reverse(); // constexpr since C++26 #include #include @@ -16,7 +16,7 @@ #include "test_macros.h" #include "min_allocator.h" -int main(int, char**) { +TEST_CONSTEXPR_CXX26 bool test() { { int a1[] = {11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0}; int a2[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11}; @@ -34,5 +34,14 @@ int main(int, char**) { } #endif + return true; +} + +int main(int, char**) { + assert(test()); +#if TEST_STD_VER >= 26 + static_assert(test()); +#endif + return 0; } diff --git a/libcxx/test/std/containers/sequences/list/list.ops/sort.pass.cpp b/libcxx/test/std/containers/sequences/list/list.ops/sort.pass.cpp index 892419f6ac96..34ead09110a3 100644 --- a/libcxx/test/std/containers/sequences/list/list.ops/sort.pass.cpp +++ b/libcxx/test/std/containers/sequences/list/list.ops/sort.pass.cpp @@ -8,7 +8,7 @@ // -// void sort(); +// void sort(); // constexpr since C++26 #include #include @@ -58,7 +58,7 @@ void test_stable(int N) { } } -int main(int, char**) { +TEST_CONSTEXPR_CXX26 bool test() { { int a1[] = {4, 8, 1, 0, 5, 7, 2, 3, 6, 11, 10, 9}; int a2[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11}; @@ -76,8 +76,19 @@ int main(int, char**) { } #endif - for (int i = 0; i < 40; ++i) - test_stable(i); + if (!TEST_IS_CONSTANT_EVALUATED) { + for (int i = 0; i < 40; ++i) + test_stable(i); + } + + return true; +} + +int main(int, char**) { + assert(test()); +#if TEST_STD_VER >= 26 + static_assert(test()); +#endif return 0; } diff --git a/libcxx/test/std/containers/sequences/list/list.ops/sort_comp.pass.cpp b/libcxx/test/std/containers/sequences/list/list.ops/sort_comp.pass.cpp index 499702281991..a24f187f4b4e 100644 --- a/libcxx/test/std/containers/sequences/list/list.ops/sort_comp.pass.cpp +++ b/libcxx/test/std/containers/sequences/list/list.ops/sort_comp.pass.cpp @@ -8,7 +8,7 @@ // -// template sort(Compare comp); +// template sort(Compare comp); // constexpr since C++26 #include #include @@ -76,7 +76,7 @@ void test_stable(int N) { } } -int main(int, char**) { +TEST_CONSTEXPR_CXX26 bool test() { { int a1[] = {4, 8, 1, 0, 5, 7, 2, 3, 6, 11, 10, 9}; int a2[] = {11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0}; @@ -85,37 +85,48 @@ int main(int, char**) { assert(c1 == std::list(a2, a2 + sizeof(a2) / sizeof(a2[0]))); } + if (!TEST_IS_CONSTANT_EVALUATED) { // Test with throwing comparison; make sure that nothing is lost. // This is (sort of) LWG #2824 #ifndef TEST_HAS_NO_EXCEPTIONS - { - int a1[] = {4, 8, 1, 0, 5, 7, 2, 3, 6, 11, 10, 9}; - const int sz = sizeof(a1) / sizeof(a1[0]); - for (int i = 0; i < 10; ++i) { - std::list c1(a1, a1 + sz); - try { - throwingLess comp(i); - c1.sort(std::cref(comp)); - } catch (int) { + { + int a1[] = {4, 8, 1, 0, 5, 7, 2, 3, 6, 11, 10, 9}; + const int sz = sizeof(a1) / sizeof(a1[0]); + for (int i = 0; i < 10; ++i) { + std::list c1(a1, a1 + sz); + try { + throwingLess comp(i); + c1.sort(std::cref(comp)); + } catch (int) { + } + assert((c1.size() == sz)); + assert((std::is_permutation(c1.begin(), c1.end(), a1))); } - assert((c1.size() == sz)); - assert((std::is_permutation(c1.begin(), c1.end(), a1))); } - } #endif #if TEST_STD_VER >= 11 - { - int a1[] = {4, 8, 1, 0, 5, 7, 2, 3, 6, 11, 10, 9}; - int a2[] = {11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0}; - std::list> c1(a1, a1 + sizeof(a1) / sizeof(a1[0])); - c1.sort(std::greater()); - assert((c1 == std::list>(a2, a2 + sizeof(a2) / sizeof(a2[0])))); - } + { + int a1[] = {4, 8, 1, 0, 5, 7, 2, 3, 6, 11, 10, 9}; + int a2[] = {11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0}; + std::list> c1(a1, a1 + sizeof(a1) / sizeof(a1[0])); + c1.sort(std::greater()); + assert((c1 == std::list>(a2, a2 + sizeof(a2) / sizeof(a2[0])))); + } #endif - for (int i = 0; i < 40; ++i) - test_stable(i); + for (int i = 0; i < 40; ++i) + test_stable(i); + } + + return true; +} + +int main(int, char**) { + assert(test()); +#if TEST_STD_VER >= 26 + static_assert(test()); +#endif return 0; } diff --git a/libcxx/test/std/containers/sequences/list/list.ops/splice_pos_list.pass.cpp b/libcxx/test/std/containers/sequences/list/list.ops/splice_pos_list.pass.cpp index 4b40876e3bb7..0f1cfefab34e 100644 --- a/libcxx/test/std/containers/sequences/list/list.ops/splice_pos_list.pass.cpp +++ b/libcxx/test/std/containers/sequences/list/list.ops/splice_pos_list.pass.cpp @@ -8,7 +8,7 @@ // -// void splice(const_iterator position, list& x); +// void splice(const_iterator position, list& x); // constexpr since C++26 #include #include @@ -16,7 +16,7 @@ #include "test_macros.h" #include "min_allocator.h" -int main(int, char**) { +TEST_CONSTEXPR_CXX26 bool test() { int a1[] = {1, 2, 3}; int a2[] = {4, 5, 6}; { @@ -780,5 +780,14 @@ int main(int, char**) { } #endif + return true; +} + +int main(int, char**) { + assert(test()); +#if TEST_STD_VER >= 26 + static_assert(test()); +#endif + return 0; } diff --git a/libcxx/test/std/containers/sequences/list/list.ops/splice_pos_list_iter.pass.cpp b/libcxx/test/std/containers/sequences/list/list.ops/splice_pos_list_iter.pass.cpp index db71fe17a06e..38dce58dc390 100644 --- a/libcxx/test/std/containers/sequences/list/list.ops/splice_pos_list_iter.pass.cpp +++ b/libcxx/test/std/containers/sequences/list/list.ops/splice_pos_list_iter.pass.cpp @@ -8,7 +8,7 @@ // -// void splice(const_iterator position, list& x, iterator i); +// void splice(const_iterator position, list& x, iterator i); // constexpr since C++26 #include #include @@ -16,7 +16,7 @@ #include "test_macros.h" #include "min_allocator.h" -int main(int, char**) { +TEST_CONSTEXPR_CXX26 bool test() { int a1[] = {1, 2, 3}; int a2[] = {4, 5, 6}; { @@ -334,5 +334,14 @@ int main(int, char**) { } #endif + return true; +} + +int main(int, char**) { + assert(test()); +#if TEST_STD_VER >= 26 + static_assert(test()); +#endif + return 0; } diff --git a/libcxx/test/std/containers/sequences/list/list.ops/splice_pos_list_iter_iter.pass.cpp b/libcxx/test/std/containers/sequences/list/list.ops/splice_pos_list_iter_iter.pass.cpp index b77b6a26440d..8fca21c81c66 100644 --- a/libcxx/test/std/containers/sequences/list/list.ops/splice_pos_list_iter_iter.pass.cpp +++ b/libcxx/test/std/containers/sequences/list/list.ops/splice_pos_list_iter_iter.pass.cpp @@ -8,7 +8,7 @@ // -// void splice(const_iterator position, list& x, iterator first, iterator last); +// void splice(const_iterator position, list& x, iterator first, iterator last); // constexpr since C++26 #include #include @@ -16,7 +16,7 @@ #include "test_macros.h" #include "min_allocator.h" -int main(int, char**) { +TEST_CONSTEXPR_CXX26 bool test() { int a1[] = {1, 2, 3}; int a2[] = {4, 5, 6}; { @@ -214,5 +214,14 @@ int main(int, char**) { } #endif + return true; +} + +int main(int, char**) { + assert(test()); +#if TEST_STD_VER >= 26 + static_assert(test()); +#endif + return 0; } diff --git a/libcxx/test/std/containers/sequences/list/list.ops/unique.pass.cpp b/libcxx/test/std/containers/sequences/list/list.ops/unique.pass.cpp index c08e348218f9..c2fa54f42553 100644 --- a/libcxx/test/std/containers/sequences/list/list.ops/unique.pass.cpp +++ b/libcxx/test/std/containers/sequences/list/list.ops/unique.pass.cpp @@ -9,7 +9,7 @@ // // void unique(); // before C++20 -// size_type unique(); // C++20 and later +// size_type unique(); // C++20 and later; constexpr since C++26 #include #include @@ -17,7 +17,7 @@ #include "test_macros.h" #include "min_allocator.h" -int main(int, char**) { +TEST_CONSTEXPR_CXX26 bool test() { { int a1[] = {2, 1, 1, 4, 4, 4, 4, 3, 3}; int a2[] = {2, 1, 4, 3}; @@ -46,5 +46,14 @@ int main(int, char**) { } #endif + return true; +} + +int main(int, char**) { + assert(test()); +#if TEST_STD_VER >= 26 + static_assert(test()); +#endif + return 0; } diff --git a/libcxx/test/std/containers/sequences/list/list.ops/unique_pred.pass.cpp b/libcxx/test/std/containers/sequences/list/list.ops/unique_pred.pass.cpp index 1d3a8e0c426a..830e54a3288d 100644 --- a/libcxx/test/std/containers/sequences/list/list.ops/unique_pred.pass.cpp +++ b/libcxx/test/std/containers/sequences/list/list.ops/unique_pred.pass.cpp @@ -9,7 +9,7 @@ // // template void unique(BinaryPred pred); // before C++20 -// template size_type unique(BinaryPred pred); // C++20 and later +// template size_type unique(BinaryPred pred); // C++20 and later; constexpr since C++26 #include #include @@ -18,18 +18,18 @@ #include "test_macros.h" #include "min_allocator.h" -bool g(int x, int y) { return x == y; } +TEST_CONSTEXPR bool g(int x, int y) { return x == y; } struct PredLWG526 { - PredLWG526(int i) : i_(i) {} - ~PredLWG526() { i_ = -32767; } - bool operator()(const PredLWG526& lhs, const PredLWG526& rhs) const { return lhs.i_ == rhs.i_; } + TEST_CONSTEXPR_CXX20 PredLWG526(int i) : i_(i) {} + TEST_CONSTEXPR_CXX20 ~PredLWG526() { i_ = -32767; } + TEST_CONSTEXPR bool operator()(const PredLWG526& lhs, const PredLWG526& rhs) const { return lhs.i_ == rhs.i_; } - bool operator==(int i) const { return i == i_; } + TEST_CONSTEXPR bool operator==(int i) const { return i == i_; } int i_; }; -int main(int, char**) { +TEST_CONSTEXPR_CXX26 bool test() { { int a1[] = {2, 1, 1, 4, 4, 4, 4, 3, 3}; int a2[] = {2, 1, 4, 3}; @@ -75,5 +75,14 @@ int main(int, char**) { } #endif + return true; +} + +int main(int, char**) { + assert(test()); +#if TEST_STD_VER >= 26 + static_assert(test()); +#endif + return 0; } diff --git a/libcxx/test/std/containers/sequences/list/list.special/swap.pass.cpp b/libcxx/test/std/containers/sequences/list/list.special/swap.pass.cpp index 1e9c71131d80..32efddb06920 100644 --- a/libcxx/test/std/containers/sequences/list/list.special/swap.pass.cpp +++ b/libcxx/test/std/containers/sequences/list/list.special/swap.pass.cpp @@ -9,7 +9,7 @@ // // template -// void swap(list& x, list& y); +// void swap(list& x, list& y); // constexpr since C++26 #include #include @@ -17,7 +17,7 @@ #include "test_allocator.h" #include "min_allocator.h" -int main(int, char**) { +TEST_CONSTEXPR_CXX26 bool test() { { int a1[] = {1, 3, 7, 9, 10}; int a2[] = {0, 2, 4, 5, 6, 8, 11}; @@ -133,5 +133,14 @@ int main(int, char**) { } #endif + return true; +} + +int main(int, char**) { + assert(test()); +#if TEST_STD_VER >= 26 + static_assert(test()); +#endif + return 0; } diff --git a/libcxx/test/std/containers/sequences/list/list.special/swap_noexcept.pass.cpp b/libcxx/test/std/containers/sequences/list/list.special/swap_noexcept.pass.cpp index a4b1622a04be..037c7d07c4cb 100644 --- a/libcxx/test/std/containers/sequences/list/list.special/swap_noexcept.pass.cpp +++ b/libcxx/test/std/containers/sequences/list/list.special/swap_noexcept.pass.cpp @@ -12,7 +12,7 @@ // void swap(list& c) // noexcept(!allocator_type::propagate_on_container_swap::value || -// __is_nothrow_swappable::value); +// __is_nothrow_swappable::value); // constexpr since C++26 // // In C++17, the standard says that swap shall have: // noexcept(allocator_traits::is_always_equal::value); @@ -52,7 +52,7 @@ struct some_alloc2 { typedef std::true_type is_always_equal; }; -int main(int, char**) { +TEST_CONSTEXPR_CXX26 bool test() { { typedef std::list C; static_assert(noexcept(swap(std::declval(), std::declval())), ""); @@ -84,5 +84,14 @@ int main(int, char**) { } #endif + return true; +} + +int main(int, char**) { + assert(test()); +#if TEST_STD_VER >= 26 + static_assert(test()); +#endif + return 0; } diff --git a/libcxx/test/std/language.support/support.limits/support.limits.general/list.version.compile.pass.cpp b/libcxx/test/std/language.support/support.limits/support.limits.general/list.version.compile.pass.cpp index 9fd638087fce..d10c61c0e9cf 100644 --- a/libcxx/test/std/language.support/support.limits/support.limits.general/list.version.compile.pass.cpp +++ b/libcxx/test/std/language.support/support.limits/support.limits.general/list.version.compile.pass.cpp @@ -24,6 +24,10 @@ # error "__cpp_lib_allocator_traits_is_always_equal should not be defined before c++17" # endif +# ifdef __cpp_lib_constexpr_list +# error "__cpp_lib_constexpr_list should not be defined before c++26" +# endif + # ifdef __cpp_lib_containers_ranges # error "__cpp_lib_containers_ranges should not be defined before c++23" # endif @@ -54,6 +58,10 @@ # error "__cpp_lib_allocator_traits_is_always_equal should not be defined before c++17" # endif +# ifdef __cpp_lib_constexpr_list +# error "__cpp_lib_constexpr_list should not be defined before c++26" +# endif + # ifdef __cpp_lib_containers_ranges # error "__cpp_lib_containers_ranges should not be defined before c++23" # endif @@ -87,6 +95,10 @@ # error "__cpp_lib_allocator_traits_is_always_equal should have the value 201411L in c++17" # endif +# ifdef __cpp_lib_constexpr_list +# error "__cpp_lib_constexpr_list should not be defined before c++26" +# endif + # ifdef __cpp_lib_containers_ranges # error "__cpp_lib_containers_ranges should not be defined before c++23" # endif @@ -126,6 +138,10 @@ # error "__cpp_lib_allocator_traits_is_always_equal should have the value 201411L in c++20" # endif +# ifdef __cpp_lib_constexpr_list +# error "__cpp_lib_constexpr_list should not be defined before c++26" +# endif + # ifdef __cpp_lib_containers_ranges # error "__cpp_lib_containers_ranges should not be defined before c++23" # endif @@ -171,6 +187,10 @@ # error "__cpp_lib_allocator_traits_is_always_equal should have the value 201411L in c++23" # endif +# ifdef __cpp_lib_constexpr_list +# error "__cpp_lib_constexpr_list should not be defined before c++26" +# endif + # ifndef __cpp_lib_containers_ranges # error "__cpp_lib_containers_ranges should be defined in c++23" # endif @@ -219,6 +239,13 @@ # error "__cpp_lib_allocator_traits_is_always_equal should have the value 201411L in c++26" # endif +# ifndef __cpp_lib_constexpr_list +# error "__cpp_lib_constexpr_list should be defined in c++26" +# endif +# if __cpp_lib_constexpr_list != 202502L +# error "__cpp_lib_constexpr_list should have the value 202502L in c++26" +# endif + # ifndef __cpp_lib_containers_ranges # error "__cpp_lib_containers_ranges should be defined in c++26" # endif diff --git a/libcxx/test/std/language.support/support.limits/support.limits.general/version.version.compile.pass.cpp b/libcxx/test/std/language.support/support.limits/support.limits.general/version.version.compile.pass.cpp index a13edacd1e46..e4fe9f994e2e 100644 --- a/libcxx/test/std/language.support/support.limits/support.limits.general/version.version.compile.pass.cpp +++ b/libcxx/test/std/language.support/support.limits/support.limits.general/version.version.compile.pass.cpp @@ -208,6 +208,10 @@ # error "__cpp_lib_constexpr_iterator should not be defined before c++20" # endif +# ifdef __cpp_lib_constexpr_list +# error "__cpp_lib_constexpr_list should not be defined before c++26" +# endif + # ifdef __cpp_lib_constexpr_memory # error "__cpp_lib_constexpr_memory should not be defined before c++20" # endif @@ -1100,6 +1104,10 @@ # error "__cpp_lib_constexpr_iterator should not be defined before c++20" # endif +# ifdef __cpp_lib_constexpr_list +# error "__cpp_lib_constexpr_list should not be defined before c++26" +# endif + # ifdef __cpp_lib_constexpr_memory # error "__cpp_lib_constexpr_memory should not be defined before c++20" # endif @@ -2094,6 +2102,10 @@ # error "__cpp_lib_constexpr_iterator should not be defined before c++20" # endif +# ifdef __cpp_lib_constexpr_list +# error "__cpp_lib_constexpr_list should not be defined before c++26" +# endif + # ifdef __cpp_lib_constexpr_memory # error "__cpp_lib_constexpr_memory should not be defined before c++20" # endif @@ -3334,6 +3346,10 @@ # error "__cpp_lib_constexpr_iterator should have the value 201811L in c++20" # endif +# ifdef __cpp_lib_constexpr_list +# error "__cpp_lib_constexpr_list should not be defined before c++26" +# endif + # ifndef __cpp_lib_constexpr_memory # error "__cpp_lib_constexpr_memory should be defined in c++20" # endif @@ -4790,6 +4806,10 @@ # error "__cpp_lib_constexpr_iterator should have the value 201811L in c++23" # endif +# ifdef __cpp_lib_constexpr_list +# error "__cpp_lib_constexpr_list should not be defined before c++26" +# endif + # ifndef __cpp_lib_constexpr_memory # error "__cpp_lib_constexpr_memory should be defined in c++23" # endif @@ -6468,6 +6488,13 @@ # error "__cpp_lib_constexpr_iterator should have the value 201811L in c++26" # endif +# ifndef __cpp_lib_constexpr_list +# error "__cpp_lib_constexpr_list should be defined in c++26" +# endif +# if __cpp_lib_constexpr_list != 202502L +# error "__cpp_lib_constexpr_list should have the value 202502L in c++26" +# endif + # ifndef __cpp_lib_constexpr_memory # error "__cpp_lib_constexpr_memory should be defined in c++26" # endif diff --git a/libcxx/test/support/min_allocator.h b/libcxx/test/support/min_allocator.h index 3b7d12af24ce..16775649f55c 100644 --- a/libcxx/test/support/min_allocator.h +++ b/libcxx/test/support/min_allocator.h @@ -22,384 +22,381 @@ #include "test_macros.h" template -class bare_allocator -{ +class bare_allocator { public: - typedef T value_type; + typedef T value_type; - bare_allocator() TEST_NOEXCEPT {} + bare_allocator() TEST_NOEXCEPT {} - template - bare_allocator(bare_allocator) TEST_NOEXCEPT {} + template + bare_allocator(bare_allocator) TEST_NOEXCEPT {} - T* allocate(std::size_t n) - { - return static_cast(::operator new(n*sizeof(T))); - } + T* allocate(std::size_t n) { return static_cast(::operator new(n * sizeof(T))); } - void deallocate(T* p, std::size_t) - { - return ::operator delete(static_cast(p)); - } + void deallocate(T* p, std::size_t) { return ::operator delete(static_cast(p)); } - friend bool operator==(bare_allocator, bare_allocator) {return true;} - friend bool operator!=(bare_allocator x, bare_allocator y) {return !(x == y);} + friend bool operator==(bare_allocator, bare_allocator) { return true; } + friend bool operator!=(bare_allocator x, bare_allocator y) { return !(x == y); } }; - template -class no_default_allocator -{ +class no_default_allocator { #if TEST_STD_VER >= 11 - no_default_allocator() = delete; + no_default_allocator() = delete; #else - no_default_allocator(); + no_default_allocator(); #endif - struct construct_tag {}; - explicit no_default_allocator(construct_tag) {} + struct construct_tag {}; + TEST_CONSTEXPR_CXX20 explicit no_default_allocator(construct_tag) {} public: - static no_default_allocator create() { - construct_tag tag; - return no_default_allocator(tag); - } + TEST_CONSTEXPR_CXX20 static no_default_allocator create() { + construct_tag tag; + return no_default_allocator(tag); + } public: - typedef T value_type; + typedef T value_type; - template - no_default_allocator(no_default_allocator) TEST_NOEXCEPT {} + template + TEST_CONSTEXPR_CXX20 no_default_allocator(no_default_allocator) TEST_NOEXCEPT {} - T* allocate(std::size_t n) - { - return static_cast(::operator new(n*sizeof(T))); - } + TEST_CONSTEXPR_CXX20 T* allocate(std::size_t n) { return static_cast(std::allocator().allocate(n)); } - void deallocate(T* p, std::size_t) - { - return ::operator delete(static_cast(p)); - } + TEST_CONSTEXPR_CXX20 void deallocate(T* p, std::size_t n) { std::allocator().deallocate(p, n); } - friend bool operator==(no_default_allocator, no_default_allocator) {return true;} - friend bool operator!=(no_default_allocator x, no_default_allocator y) {return !(x == y);} + friend TEST_CONSTEXPR bool operator==(no_default_allocator, no_default_allocator) { return true; } + friend TEST_CONSTEXPR bool operator!=(no_default_allocator x, no_default_allocator y) { return !(x == y); } }; struct malloc_allocator_base { - static std::size_t outstanding_bytes; - static std::size_t alloc_count; - static std::size_t dealloc_count; - static bool disable_default_constructor; - - static std::size_t outstanding_alloc() { - assert(alloc_count >= dealloc_count); - return (alloc_count - dealloc_count); - } - - static void reset() { - assert(outstanding_alloc() == 0); - disable_default_constructor = false; - outstanding_bytes = 0; - alloc_count = 0; - dealloc_count = 0; - } + static std::size_t outstanding_bytes; + static std::size_t alloc_count; + static std::size_t dealloc_count; + static bool disable_default_constructor; + + static std::size_t outstanding_alloc() { + assert(alloc_count >= dealloc_count); + return (alloc_count - dealloc_count); + } + + static void reset() { + assert(outstanding_alloc() == 0); + disable_default_constructor = false; + outstanding_bytes = 0; + alloc_count = 0; + dealloc_count = 0; + } }; -size_t malloc_allocator_base::outstanding_bytes = 0; -size_t malloc_allocator_base::alloc_count = 0; -size_t malloc_allocator_base::dealloc_count = 0; +size_t malloc_allocator_base::outstanding_bytes = 0; +size_t malloc_allocator_base::alloc_count = 0; +size_t malloc_allocator_base::dealloc_count = 0; bool malloc_allocator_base::disable_default_constructor = false; - template -class malloc_allocator : public malloc_allocator_base -{ +class malloc_allocator : public malloc_allocator_base { public: - typedef T value_type; - - malloc_allocator() TEST_NOEXCEPT { assert(!disable_default_constructor); } - - template - malloc_allocator(malloc_allocator) TEST_NOEXCEPT {} - - T* allocate(std::size_t n) - { - const std::size_t nbytes = n*sizeof(T); - ++alloc_count; - outstanding_bytes += nbytes; - return static_cast(std::malloc(nbytes)); - } - - void deallocate(T* p, std::size_t n) - { - const std::size_t nbytes = n*sizeof(T); - ++dealloc_count; - outstanding_bytes -= nbytes; - std::free(static_cast(p)); - } - - friend bool operator==(malloc_allocator, malloc_allocator) {return true;} - friend bool operator!=(malloc_allocator x, malloc_allocator y) {return !(x == y);} + typedef T value_type; + + malloc_allocator() TEST_NOEXCEPT { assert(!disable_default_constructor); } + + template + malloc_allocator(malloc_allocator) TEST_NOEXCEPT {} + + T* allocate(std::size_t n) { + const std::size_t nbytes = n * sizeof(T); + ++alloc_count; + outstanding_bytes += nbytes; + return static_cast(std::malloc(nbytes)); + } + + void deallocate(T* p, std::size_t n) { + const std::size_t nbytes = n * sizeof(T); + ++dealloc_count; + outstanding_bytes -= nbytes; + std::free(static_cast(p)); + } + + friend bool operator==(malloc_allocator, malloc_allocator) { return true; } + friend bool operator!=(malloc_allocator x, malloc_allocator y) { return !(x == y); } }; template -struct cpp03_allocator : bare_allocator -{ - typedef T value_type; - typedef value_type* pointer; - - static bool construct_called; - - // Returned value is not used but it's not prohibited. - pointer construct(pointer p, const value_type& val) - { - ::new(p) value_type(val); - construct_called = true; - return p; - } - - std::size_t max_size() const - { - return UINT_MAX / sizeof(T); - } +struct cpp03_allocator : bare_allocator { + typedef T value_type; + typedef value_type* pointer; + + static bool construct_called; + + // Returned value is not used but it's not prohibited. + pointer construct(pointer p, const value_type& val) { + ::new (p) value_type(val); + construct_called = true; + return p; + } + + std::size_t max_size() const { return UINT_MAX / sizeof(T); } }; -template bool cpp03_allocator::construct_called = false; +template +bool cpp03_allocator::construct_called = false; template -struct cpp03_overload_allocator : bare_allocator -{ - typedef T value_type; - typedef value_type* pointer; - - static bool construct_called; - - void construct(pointer p, const value_type& val) - { - construct(p, val, std::is_class()); - } - void construct(pointer p, const value_type& val, std::true_type) - { - ::new(p) value_type(val); - construct_called = true; - } - void construct(pointer p, const value_type& val, std::false_type) - { - ::new(p) value_type(val); - construct_called = true; - } - - std::size_t max_size() const - { - return UINT_MAX / sizeof(T); - } +struct cpp03_overload_allocator : bare_allocator { + typedef T value_type; + typedef value_type* pointer; + + static bool construct_called; + + void construct(pointer p, const value_type& val) { construct(p, val, std::is_class()); } + void construct(pointer p, const value_type& val, std::true_type) { + ::new (p) value_type(val); + construct_called = true; + } + void construct(pointer p, const value_type& val, std::false_type) { + ::new (p) value_type(val); + construct_called = true; + } + + std::size_t max_size() const { return UINT_MAX / sizeof(T); } }; -template bool cpp03_overload_allocator::construct_called = false; +template +bool cpp03_overload_allocator::construct_called = false; -template > class min_pointer; -template class min_pointer; -template class min_pointer; -template class min_pointer; -template class min_allocator; +template > +class min_pointer; +template +class min_pointer; +template +class min_pointer; +template +class min_pointer; +template +class min_allocator; template -class min_pointer -{ - const void* ptr_; +class min_pointer { + const void* ptr_; + public: - min_pointer() TEST_NOEXCEPT = default; - min_pointer(std::nullptr_t) TEST_NOEXCEPT : ptr_(nullptr) {} - template - min_pointer(min_pointer p) TEST_NOEXCEPT : ptr_(p.ptr_) {} + min_pointer() TEST_NOEXCEPT = default; + min_pointer(std::nullptr_t) TEST_NOEXCEPT : ptr_(nullptr) {} + template + min_pointer(min_pointer p) TEST_NOEXCEPT : ptr_(p.ptr_) {} - explicit operator bool() const {return ptr_ != nullptr;} + explicit operator bool() const { return ptr_ != nullptr; } - friend bool operator==(min_pointer x, min_pointer y) {return x.ptr_ == y.ptr_;} - friend bool operator!=(min_pointer x, min_pointer y) {return !(x == y);} - template friend class min_pointer; + friend bool operator==(min_pointer x, min_pointer y) { return x.ptr_ == y.ptr_; } + friend bool operator!=(min_pointer x, min_pointer y) { return !(x == y); } + template + friend class min_pointer; }; template -class min_pointer -{ - void* ptr_; +class min_pointer { + void* ptr_; + public: - min_pointer() TEST_NOEXCEPT = default; - TEST_CONSTEXPR_CXX14 min_pointer(std::nullptr_t) TEST_NOEXCEPT : ptr_(nullptr) {} - template ::value - >::type - > - TEST_CONSTEXPR_CXX14 min_pointer(min_pointer p) TEST_NOEXCEPT : ptr_(p.ptr_) {} - - TEST_CONSTEXPR_CXX14 explicit operator bool() const {return ptr_ != nullptr;} - - TEST_CONSTEXPR_CXX14 friend bool operator==(min_pointer x, min_pointer y) {return x.ptr_ == y.ptr_;} - TEST_CONSTEXPR_CXX14 friend bool operator!=(min_pointer x, min_pointer y) {return !(x == y);} - template friend class min_pointer; + min_pointer() TEST_NOEXCEPT = default; + TEST_CONSTEXPR_CXX14 min_pointer(std::nullptr_t) TEST_NOEXCEPT : ptr_(nullptr) {} + template ::value >::type > + TEST_CONSTEXPR_CXX14 min_pointer(min_pointer p) TEST_NOEXCEPT : ptr_(p.ptr_) {} + + TEST_CONSTEXPR_CXX14 explicit operator bool() const { return ptr_ != nullptr; } + + TEST_CONSTEXPR_CXX14 friend bool operator==(min_pointer x, min_pointer y) { return x.ptr_ == y.ptr_; } + TEST_CONSTEXPR_CXX14 friend bool operator!=(min_pointer x, min_pointer y) { return !(x == y); } + template + friend class min_pointer; }; template -class min_pointer -{ - T* ptr_; +class min_pointer { + T* ptr_; + + TEST_CONSTEXPR_CXX14 explicit min_pointer(T* p) TEST_NOEXCEPT : ptr_(p) {} - TEST_CONSTEXPR_CXX14 explicit min_pointer(T* p) TEST_NOEXCEPT : ptr_(p) {} public: - min_pointer() TEST_NOEXCEPT = default; - TEST_CONSTEXPR_CXX14 min_pointer(std::nullptr_t) TEST_NOEXCEPT : ptr_(nullptr) {} - TEST_CONSTEXPR_CXX14 explicit min_pointer(min_pointer p) TEST_NOEXCEPT : ptr_(static_cast(p.ptr_)) {} - - TEST_CONSTEXPR_CXX14 explicit operator bool() const {return ptr_ != nullptr;} - - typedef std::ptrdiff_t difference_type; - typedef T& reference; - typedef T* pointer; - typedef T value_type; - typedef std::random_access_iterator_tag iterator_category; - - TEST_CONSTEXPR_CXX14 reference operator*() const {return *ptr_;} - TEST_CONSTEXPR_CXX14 pointer operator->() const {return ptr_;} - - TEST_CONSTEXPR_CXX14 min_pointer& operator++() {++ptr_; return *this;} - TEST_CONSTEXPR_CXX14 min_pointer operator++(int) {min_pointer tmp(*this); ++ptr_; return tmp;} - - TEST_CONSTEXPR_CXX14 min_pointer& operator--() {--ptr_; return *this;} - TEST_CONSTEXPR_CXX14 min_pointer operator--(int) {min_pointer tmp(*this); --ptr_; return tmp;} - - TEST_CONSTEXPR_CXX14 min_pointer& operator+=(difference_type n) {ptr_ += n; return *this;} - TEST_CONSTEXPR_CXX14 min_pointer& operator-=(difference_type n) {ptr_ -= n; return *this;} - - TEST_CONSTEXPR_CXX14 min_pointer operator+(difference_type n) const - { - min_pointer tmp(*this); - tmp += n; - return tmp; - } - - friend TEST_CONSTEXPR_CXX14 min_pointer operator+(difference_type n, min_pointer x) - { - return x + n; - } - - TEST_CONSTEXPR_CXX14 min_pointer operator-(difference_type n) const - { - min_pointer tmp(*this); - tmp -= n; - return tmp; - } - - friend TEST_CONSTEXPR_CXX14 difference_type operator-(min_pointer x, min_pointer y) - { - return x.ptr_ - y.ptr_; - } - - TEST_CONSTEXPR_CXX14 reference operator[](difference_type n) const {return ptr_[n];} - - friend TEST_CONSTEXPR_CXX14 bool operator< (min_pointer x, min_pointer y) {return x.ptr_ < y.ptr_;} - friend TEST_CONSTEXPR_CXX14 bool operator> (min_pointer x, min_pointer y) {return y < x;} - friend TEST_CONSTEXPR_CXX14 bool operator<=(min_pointer x, min_pointer y) {return !(y < x);} - friend TEST_CONSTEXPR_CXX14 bool operator>=(min_pointer x, min_pointer y) {return !(x < y);} - - static TEST_CONSTEXPR_CXX14 min_pointer pointer_to(T& t) {return min_pointer(std::addressof(t));} - - friend TEST_CONSTEXPR_CXX14 bool operator==(min_pointer x, min_pointer y) {return x.ptr_ == y.ptr_;} - friend TEST_CONSTEXPR_CXX14 bool operator!=(min_pointer x, min_pointer y) {return !(x == y);} - template friend class min_pointer; - template friend class min_allocator; + min_pointer() TEST_NOEXCEPT = default; + TEST_CONSTEXPR_CXX14 min_pointer(std::nullptr_t) TEST_NOEXCEPT : ptr_(nullptr) {} + TEST_CONSTEXPR_CXX14 explicit min_pointer(min_pointer p) TEST_NOEXCEPT : ptr_(static_cast(p.ptr_)) {} + + TEST_CONSTEXPR_CXX14 explicit operator bool() const { return ptr_ != nullptr; } + + typedef std::ptrdiff_t difference_type; + typedef T& reference; + typedef T* pointer; + typedef T value_type; + typedef std::random_access_iterator_tag iterator_category; + + TEST_CONSTEXPR_CXX14 reference operator*() const { return *ptr_; } + TEST_CONSTEXPR_CXX14 pointer operator->() const { return ptr_; } + + TEST_CONSTEXPR_CXX14 min_pointer& operator++() { + ++ptr_; + return *this; + } + TEST_CONSTEXPR_CXX14 min_pointer operator++(int) { + min_pointer tmp(*this); + ++ptr_; + return tmp; + } + + TEST_CONSTEXPR_CXX14 min_pointer& operator--() { + --ptr_; + return *this; + } + TEST_CONSTEXPR_CXX14 min_pointer operator--(int) { + min_pointer tmp(*this); + --ptr_; + return tmp; + } + + TEST_CONSTEXPR_CXX14 min_pointer& operator+=(difference_type n) { + ptr_ += n; + return *this; + } + TEST_CONSTEXPR_CXX14 min_pointer& operator-=(difference_type n) { + ptr_ -= n; + return *this; + } + + TEST_CONSTEXPR_CXX14 min_pointer operator+(difference_type n) const { + min_pointer tmp(*this); + tmp += n; + return tmp; + } + + friend TEST_CONSTEXPR_CXX14 min_pointer operator+(difference_type n, min_pointer x) { return x + n; } + + TEST_CONSTEXPR_CXX14 min_pointer operator-(difference_type n) const { + min_pointer tmp(*this); + tmp -= n; + return tmp; + } + + friend TEST_CONSTEXPR_CXX14 difference_type operator-(min_pointer x, min_pointer y) { return x.ptr_ - y.ptr_; } + + TEST_CONSTEXPR_CXX14 reference operator[](difference_type n) const { return ptr_[n]; } + + friend TEST_CONSTEXPR_CXX14 bool operator<(min_pointer x, min_pointer y) { return x.ptr_ < y.ptr_; } + friend TEST_CONSTEXPR_CXX14 bool operator>(min_pointer x, min_pointer y) { return y < x; } + friend TEST_CONSTEXPR_CXX14 bool operator<=(min_pointer x, min_pointer y) { return !(y < x); } + friend TEST_CONSTEXPR_CXX14 bool operator>=(min_pointer x, min_pointer y) { return !(x < y); } + + static TEST_CONSTEXPR_CXX14 min_pointer pointer_to(T& t) { return min_pointer(std::addressof(t)); } + + friend TEST_CONSTEXPR_CXX14 bool operator==(min_pointer x, min_pointer y) { return x.ptr_ == y.ptr_; } + friend TEST_CONSTEXPR_CXX14 bool operator!=(min_pointer x, min_pointer y) { return !(x == y); } + template + friend class min_pointer; + template + friend class min_allocator; }; template -class min_pointer -{ - const T* ptr_; +class min_pointer { + const T* ptr_; + + TEST_CONSTEXPR_CXX14 explicit min_pointer(const T* p) : ptr_(p) {} - TEST_CONSTEXPR_CXX14 explicit min_pointer(const T* p) : ptr_(p) {} public: - min_pointer() TEST_NOEXCEPT = default; - TEST_CONSTEXPR_CXX14 min_pointer(std::nullptr_t) : ptr_(nullptr) {} - TEST_CONSTEXPR_CXX14 min_pointer(min_pointer p) : ptr_(p.ptr_) {} - TEST_CONSTEXPR_CXX14 explicit min_pointer(min_pointer p) : ptr_(static_cast(p.ptr_)) {} - - TEST_CONSTEXPR_CXX14 explicit operator bool() const {return ptr_ != nullptr;} - - typedef std::ptrdiff_t difference_type; - typedef const T& reference; - typedef const T* pointer; - typedef const T value_type; - typedef std::random_access_iterator_tag iterator_category; - - TEST_CONSTEXPR_CXX14 reference operator*() const {return *ptr_;} - TEST_CONSTEXPR_CXX14 pointer operator->() const {return ptr_;} - - TEST_CONSTEXPR_CXX14 min_pointer& operator++() {++ptr_; return *this;} - TEST_CONSTEXPR_CXX14 min_pointer operator++(int) {min_pointer tmp(*this); ++ptr_; return tmp;} - - TEST_CONSTEXPR_CXX14 min_pointer& operator--() {--ptr_; return *this;} - TEST_CONSTEXPR_CXX14 min_pointer operator--(int) {min_pointer tmp(*this); --ptr_; return tmp;} - - TEST_CONSTEXPR_CXX14 min_pointer& operator+=(difference_type n) {ptr_ += n; return *this;} - TEST_CONSTEXPR_CXX14 min_pointer& operator-=(difference_type n) {ptr_ -= n; return *this;} - - TEST_CONSTEXPR_CXX14 min_pointer operator+(difference_type n) const - { - min_pointer tmp(*this); - tmp += n; - return tmp; - } - - friend TEST_CONSTEXPR_CXX14 min_pointer operator+(difference_type n, min_pointer x) - { - return x + n; - } - - TEST_CONSTEXPR_CXX14 min_pointer operator-(difference_type n) const - { - min_pointer tmp(*this); - tmp -= n; - return tmp; - } - - friend TEST_CONSTEXPR_CXX14 difference_type operator-(min_pointer x, min_pointer y) - { - return x.ptr_ - y.ptr_; - } - - TEST_CONSTEXPR_CXX14 reference operator[](difference_type n) const {return ptr_[n];} - - friend TEST_CONSTEXPR_CXX14 bool operator< (min_pointer x, min_pointer y) {return x.ptr_ < y.ptr_;} - friend TEST_CONSTEXPR_CXX14 bool operator> (min_pointer x, min_pointer y) {return y < x;} - friend TEST_CONSTEXPR_CXX14 bool operator<=(min_pointer x, min_pointer y) {return !(y < x);} - friend TEST_CONSTEXPR_CXX14 bool operator>=(min_pointer x, min_pointer y) {return !(x < y);} - - static TEST_CONSTEXPR_CXX14 min_pointer pointer_to(const T& t) {return min_pointer(std::addressof(t));} - - friend TEST_CONSTEXPR_CXX14 bool operator==(min_pointer x, min_pointer y) {return x.ptr_ == y.ptr_;} - friend TEST_CONSTEXPR_CXX14 bool operator!=(min_pointer x, min_pointer y) {return x.ptr_ != y.ptr_;} - friend TEST_CONSTEXPR_CXX14 bool operator==(min_pointer x, std::nullptr_t) {return x.ptr_ == nullptr;} - friend TEST_CONSTEXPR_CXX14 bool operator!=(min_pointer x, std::nullptr_t) {return x.ptr_ != nullptr;} - friend TEST_CONSTEXPR_CXX14 bool operator==(std::nullptr_t, min_pointer x) {return x.ptr_ == nullptr;} - friend TEST_CONSTEXPR_CXX14 bool operator!=(std::nullptr_t, min_pointer x) {return x.ptr_ != nullptr;} - template friend class min_pointer; + min_pointer() TEST_NOEXCEPT = default; + TEST_CONSTEXPR_CXX14 min_pointer(std::nullptr_t) : ptr_(nullptr) {} + TEST_CONSTEXPR_CXX14 min_pointer(min_pointer p) : ptr_(p.ptr_) {} + TEST_CONSTEXPR_CXX14 explicit min_pointer(min_pointer p) : ptr_(static_cast(p.ptr_)) {} + + TEST_CONSTEXPR_CXX14 explicit operator bool() const { return ptr_ != nullptr; } + + typedef std::ptrdiff_t difference_type; + typedef const T& reference; + typedef const T* pointer; + typedef const T value_type; + typedef std::random_access_iterator_tag iterator_category; + + TEST_CONSTEXPR_CXX14 reference operator*() const { return *ptr_; } + TEST_CONSTEXPR_CXX14 pointer operator->() const { return ptr_; } + + TEST_CONSTEXPR_CXX14 min_pointer& operator++() { + ++ptr_; + return *this; + } + TEST_CONSTEXPR_CXX14 min_pointer operator++(int) { + min_pointer tmp(*this); + ++ptr_; + return tmp; + } + + TEST_CONSTEXPR_CXX14 min_pointer& operator--() { + --ptr_; + return *this; + } + TEST_CONSTEXPR_CXX14 min_pointer operator--(int) { + min_pointer tmp(*this); + --ptr_; + return tmp; + } + + TEST_CONSTEXPR_CXX14 min_pointer& operator+=(difference_type n) { + ptr_ += n; + return *this; + } + TEST_CONSTEXPR_CXX14 min_pointer& operator-=(difference_type n) { + ptr_ -= n; + return *this; + } + + TEST_CONSTEXPR_CXX14 min_pointer operator+(difference_type n) const { + min_pointer tmp(*this); + tmp += n; + return tmp; + } + + friend TEST_CONSTEXPR_CXX14 min_pointer operator+(difference_type n, min_pointer x) { return x + n; } + + TEST_CONSTEXPR_CXX14 min_pointer operator-(difference_type n) const { + min_pointer tmp(*this); + tmp -= n; + return tmp; + } + + friend TEST_CONSTEXPR_CXX14 difference_type operator-(min_pointer x, min_pointer y) { return x.ptr_ - y.ptr_; } + + TEST_CONSTEXPR_CXX14 reference operator[](difference_type n) const { return ptr_[n]; } + + friend TEST_CONSTEXPR_CXX14 bool operator<(min_pointer x, min_pointer y) { return x.ptr_ < y.ptr_; } + friend TEST_CONSTEXPR_CXX14 bool operator>(min_pointer x, min_pointer y) { return y < x; } + friend TEST_CONSTEXPR_CXX14 bool operator<=(min_pointer x, min_pointer y) { return !(y < x); } + friend TEST_CONSTEXPR_CXX14 bool operator>=(min_pointer x, min_pointer y) { return !(x < y); } + + static TEST_CONSTEXPR_CXX14 min_pointer pointer_to(const T& t) { return min_pointer(std::addressof(t)); } + + friend TEST_CONSTEXPR_CXX14 bool operator==(min_pointer x, min_pointer y) { return x.ptr_ == y.ptr_; } + friend TEST_CONSTEXPR_CXX14 bool operator!=(min_pointer x, min_pointer y) { return x.ptr_ != y.ptr_; } + friend TEST_CONSTEXPR_CXX14 bool operator==(min_pointer x, std::nullptr_t) { return x.ptr_ == nullptr; } + friend TEST_CONSTEXPR_CXX14 bool operator!=(min_pointer x, std::nullptr_t) { return x.ptr_ != nullptr; } + friend TEST_CONSTEXPR_CXX14 bool operator==(std::nullptr_t, min_pointer x) { return x.ptr_ == nullptr; } + friend TEST_CONSTEXPR_CXX14 bool operator!=(std::nullptr_t, min_pointer x) { return x.ptr_ != nullptr; } + template + friend class min_pointer; }; template -class min_allocator -{ +class min_allocator { public: - typedef T value_type; - typedef min_pointer pointer; + typedef T value_type; + typedef min_pointer pointer; - min_allocator() = default; - template - TEST_CONSTEXPR_CXX20 min_allocator(min_allocator) {} + min_allocator() = default; + template + TEST_CONSTEXPR_CXX20 min_allocator(min_allocator) {} - TEST_CONSTEXPR_CXX20 pointer allocate(std::size_t n) { return pointer(std::allocator().allocate(n)); } + TEST_CONSTEXPR_CXX20 pointer allocate(std::size_t n) { return pointer(std::allocator().allocate(n)); } - TEST_CONSTEXPR_CXX20 void deallocate(pointer p, std::size_t n) { std::allocator().deallocate(p.ptr_, n); } + TEST_CONSTEXPR_CXX20 void deallocate(pointer p, std::size_t n) { std::allocator().deallocate(p.ptr_, n); } - TEST_CONSTEXPR_CXX20 friend bool operator==(min_allocator, min_allocator) {return true;} - TEST_CONSTEXPR_CXX20 friend bool operator!=(min_allocator x, min_allocator y) {return !(x == y);} + TEST_CONSTEXPR_CXX20 friend bool operator==(min_allocator, min_allocator) { return true; } + TEST_CONSTEXPR_CXX20 friend bool operator!=(min_allocator x, min_allocator y) { return !(x == y); } }; template @@ -427,25 +424,19 @@ template class explicit_allocator { public: - typedef T value_type; + typedef T value_type; - TEST_CONSTEXPR_CXX20 explicit_allocator() TEST_NOEXCEPT {} + TEST_CONSTEXPR_CXX20 explicit_allocator() TEST_NOEXCEPT {} - template - TEST_CONSTEXPR_CXX20 explicit explicit_allocator(explicit_allocator) TEST_NOEXCEPT {} + template + TEST_CONSTEXPR_CXX20 explicit explicit_allocator(explicit_allocator) TEST_NOEXCEPT {} - TEST_CONSTEXPR_CXX20 T* allocate(std::size_t n) - { - return static_cast(std::allocator().allocate(n)); - } + TEST_CONSTEXPR_CXX20 T* allocate(std::size_t n) { return static_cast(std::allocator().allocate(n)); } - TEST_CONSTEXPR_CXX20 void deallocate(T* p, std::size_t n) - { - std::allocator().deallocate(p, n); - } + TEST_CONSTEXPR_CXX20 void deallocate(T* p, std::size_t n) { std::allocator().deallocate(p, n); } - TEST_CONSTEXPR_CXX20 friend bool operator==(explicit_allocator, explicit_allocator) {return true;} - TEST_CONSTEXPR_CXX20 friend bool operator!=(explicit_allocator x, explicit_allocator y) {return !(x == y);} + TEST_CONSTEXPR_CXX20 friend bool operator==(explicit_allocator, explicit_allocator) { return true; } + TEST_CONSTEXPR_CXX20 friend bool operator!=(explicit_allocator x, explicit_allocator y) { return !(x == y); } }; template diff --git a/libcxx/utils/generate_feature_test_macro_components.py b/libcxx/utils/generate_feature_test_macro_components.py index b59c7fdaf0a3..de06b9dd1bee 100644 --- a/libcxx/utils/generate_feature_test_macro_components.py +++ b/libcxx/utils/generate_feature_test_macro_components.py @@ -372,6 +372,11 @@ feature_test_macros = [ "values": {"c++20": 201811}, "headers": ["iterator"], }, + { + "name": "__cpp_lib_constexpr_list", + "values": {"c++26": 202502}, + "headers": ["list"], + }, { "name": "__cpp_lib_constexpr_memory", "values": {"c++20": 201811, "c++23": 202202}, -- cgit v1.2.3 From 6d785ca4218b18e77e39320bea7f8973c3ea2764 Mon Sep 17 00:00:00 2001 From: Ying Yi Date: Wed, 18 Jun 2025 17:14:33 +0100 Subject: [Clang] Fix the clang/test/PCH/ignored-pch.c test. (#144737) Change the test to check the exit status of the 'ls' command line (instead of error message) since the error message is different when running 'ls' command on the different Host machine. --- clang/test/PCH/ignored-pch.c | 58 +++++++++++++++++++++----------------------- 1 file changed, 27 insertions(+), 31 deletions(-) diff --git a/clang/test/PCH/ignored-pch.c b/clang/test/PCH/ignored-pch.c index 5b64582cba61..c6ef3fe74cee 100644 --- a/clang/test/PCH/ignored-pch.c +++ b/clang/test/PCH/ignored-pch.c @@ -1,96 +1,96 @@ // RUN: rm -rf %t.pch %t.ll // RUN: %clang -x c-header %S/Inputs/ignored-pch.h -o %t.pch // RUN: %clang -S -emit-llvm %s -include-pch %t.pch -o %t.ll -// RUN: ls %t.pch | FileCheck --check-prefix=CHECK-PCH %s -// RUN: ls %t.ll | FileCheck --check-prefix=CHECK-OBJ %s +// RUN: ls %t.pch +// RUN: ls %t.ll // RUN: rm -rf %t.pch %t.ll // RUN: %clang -x c-header %S/Inputs/ignored-pch.h -o %t.pch // RUN: %clang %s -emit-ast -include-pch %t.pch -o %t.ll -// RUN: ls %t.pch | FileCheck --check-prefix=CHECK-PCH %s -// RUN: ls %t.ll | FileCheck --check-prefix=CHECK-OBJ %s +// RUN: ls %t.pch +// RUN: ls %t.ll // Check that -ignore-pch causes -emit-pch and -include-pch options to be ignored. // RUN: rm -rf %t.pch %t.ll // RUN: %clang -x c-header %S/Inputs/ignored-pch.h -ignore-pch -o %t.pch // RUN: %clang -S -emit-llvm %s -include-pch %t.pch -ignore-pch -o %t.ll -// RUN: not ls %t.pch 2>&1 | FileCheck --check-prefix=CHECK-PCH-ERROR %s -// RUN: ls %t.ll 2>&1 | FileCheck --check-prefix=CHECK-OBJ %s +// RUN: not ls %t.pch +// RUN: ls %t.ll // RUN: rm -rf %t.pch %t.ll // RUN: %clang -emit-ast %s -include-pch %t.pch -ignore-pch -o %t.ll -// RUN: not ls %t.ll 2>&1 | FileCheck --check-prefix=CHECK-OBJ-ERROR %s +// RUN: not ls %t.ll // Check that -ignore-pch works for multiple PCH related options. // Test with -building-pch-with-obj. // RUN: rm -rf %t.pch %t.ll // RUN: %clang -x c-header %S/Inputs/ignored-pch.h -ignore-pch -Xclang -building-pch-with-obj -o %t.pch // RUN: %clang -S -emit-llvm %s -include-pch %t.pch -ignore-pch -Xclang -building-pch-with-obj -o %t.ll -// RUN: not ls %t.pch 2>&1 | FileCheck --check-prefix=CHECK-PCH-ERROR %s -// RUN: ls %t.ll | FileCheck --check-prefix=CHECK-OBJ %s +// RUN: not ls %t.pch +// RUN: ls %t.ll // Test with -fallow-pch-with-compiler-errors. // RUN: rm -rf %t.pch %t.ll // RUN: %clang -x c-header %S/Inputs/ignored-pch.h -ignore-pch -Xclang -fallow-pch-with-compiler-errors -o %t.pch // RUN: %clang -S -emit-llvm %s -include-pch %t.pch -ignore-pch -Xclang -fallow-pch-with-compiler-errors -o %t.ll -// RUN: not ls %t.pch 2>&1 | FileCheck --check-prefix=CHECK-PCH-ERROR %s -// RUN: ls %t.ll | FileCheck --check-prefix=CHECK-OBJ %s +// RUN: not ls %t.pch +// RUN: ls %t.ll // Test with -fallow-pch-with-different-modules-cache-path. // RUN: rm -rf %t.pch %t.ll // RUN: %clang -x c-header %S/Inputs/ignored-pch.h -ignore-pch -Xclang -fallow-pch-with-different-modules-cache-path -o %t.pch // RUN: %clang -S -emit-llvm %s -ignore-pch -include-pch %t.pch -Xclang -fallow-pch-with-different-modules-cache-path -o %t.ll -// RUN: not ls %t.pch 2>&1 | FileCheck --check-prefix=CHECK-PCH-ERROR %s -// RUN: ls %t.ll | FileCheck --check-prefix=CHECK-OBJ %s +// RUN: not ls %t.pch +// RUN: ls %t.ll // Test with -fpch-codegen. // RUN: rm -rf %t.pch %t.ll // RUN: %clang -x c-header %S/Inputs/ignored-pch.h -ignore-pch -fpch-codegen -o %t.pch // RUN: %clang -S -emit-llvm %s -include-pch %t.pch -ignore-pch -fpch-codegen -o %t.ll -// RUN: not ls %t.pch 2>&1 | FileCheck --check-prefix=CHECK-PCH-ERROR %s -// RUN: ls %t.ll | FileCheck --check-prefix=CHECK-OBJ %s +// RUN: not ls %t.pch +// RUN: ls %t.ll // Test with -fpch-debuginfo. // RUN: rm -rf %t.pch %t.ll // RUN: %clang -x c-header %S/Inputs/ignored-pch.h -ignore-pch -fpch-debuginfo -o %t.pch // RUN: %clang -S -emit-llvm %s -include-pch %t.pch -ignore-pch -fpch-debuginfo -o %t.ll -// RUN: not ls %t.pch 2>&1 | FileCheck --check-prefix=CHECK-PCH %s -// RUN: ls %t.ll | FileCheck --check-prefix=CHECK-OBJ %s +// RUN: not ls %t.pch +// RUN: ls %t.ll // Test with -fpch-instantiate-templates. // RUN: rm -rf %t.pch %t.ll // RUN: %clang -x c-header %S/Inputs/ignored-pch.h -ignore-pch -fpch-instantiate-templates -o %t.pch // RUN: %clang -S -emit-llvm %s -include-pch %t.pch -ignore-pch -fpch-instantiate-templates -o %t.ll -// RUN: not ls %t.pch 2>&1 | FileCheck --check-prefix=CHECK-PCH %s -// RUN: ls %t.ll | FileCheck --check-prefix=CHECK-OBJ %s +// RUN: not ls %t.pch +// RUN: ls %t.ll // Test with -fno-pch-timestamp. // RUN: rm -rf %t.pch %t.ll // RUN: %clang -x c-header %S/Inputs/ignored-pch.h -ignore-pch -Xclang -fno-pch-timestamp -o %t.pch // RUN: %clang -S -emit-llvm %s -include-pch %t.pch -ignore-pch -Xclang -fno-pch-timestamp -o %t.ll -// RUN: not ls %t.pch 2>&1 | FileCheck --check-prefix=CHECK-PCH %s -// RUN: ls %t.ll | FileCheck --check-prefix=CHECK-OBJ %s +// RUN: not ls %t.pch +// RUN: ls %t.ll // Test with -fno-validate-pch. // RUN: rm -rf %t.pch %t.ll // RUN: %clang -x c-header %S/Inputs/ignored-pch.h -ignore-pch -Xclang -fno-validate-pch -o %t.pch // RUN: %clang -S -emit-llvm %s -include-pch %t.pch -ignore-pch -Xclang -fno-validate-pch -o %t.ll -// RUN: not ls %t.pch 2>&1 | FileCheck --check-prefix=CHECK-PCH %s -// RUN: ls %t.ll | FileCheck --check-prefix=CHECK-OBJ %s +// RUN: not ls %t.pch +// RUN: ls %t.ll // Test with -relocatable-pch. // RUN: rm -rf %t.pch %t.ll // RUN: %clang -x c-header %S/Inputs/ignored-pch.h -ignore-pch -relocatable-pch -o %t.pch // RUN: %clang -S -emit-llvm %s -include-pch %t.pch -ignore-pch -relocatable-pch -o %t.ll -// RUN: not ls %t.pch 2>&1 | FileCheck --check-prefix=CHECK-PCH %s -// RUN: ls %t.ll | FileCheck --check-prefix=CHECK-OBJ %s +// RUN: not ls %t.pch +// RUN: ls %t.ll // Test with -pch-through-hdrstop-create/-pch-through-hdrstop-use // RUN: rm -rf %t.pch %t.ll // RUN: %clang -x c-header %S/Inputs/ignored-pch.h -ignore-pch -Xclang -pch-through-hdrstop-create -o %t.pch // RUN: %clang -S -emit-llvm %s -include-pch %t.pch -ignore-pch -Xclang -pch-through-hdrstop-use -o %t.ll -// RUN: not ls %t.pch 2>&1 | FileCheck --check-prefix=CHECK-PCH %s -// RUN: ls %t.ll | FileCheck --check-prefix=CHECK-OBJ %s +// RUN: not ls %t.pch +// RUN: ls %t.ll // Test with AST dump output: @@ -99,10 +99,6 @@ // RUN: %clang %s -include-pch %t.pch -Xclang -ast-dump-all -c | FileCheck --check-prefix=CHECK-AST-PCH %s // RUN: %clang %s -include-pch %t.pch -ignore-pch -Xclang -ast-dump-all -c | FileCheck --check-prefix=CHECK-AST %s -// CHECK-PCH: ignored-pch.c.{{.*}}.pch -// CHECK-OBJ: ignored-pch.c.{{.*}}.ll -// CHECK-PCH-ERROR: ignored-pch.c.{{.*}}.pch{{'?}}: No such file or directory -// CHECK-OBJ-ERROR: ignored-pch.c.{{.*}}.ll{{'?}}: No such file or directory // CHECK-AST-PCH: // CHECK-AST-NOT: -- cgit v1.2.3 From 2a41350aabd8b7d3e406141a55ce0bb6f5e70a76 Mon Sep 17 00:00:00 2001 From: Karlo Basioli Date: Wed, 18 Jun 2025 17:15:12 +0100 Subject: =?UTF-8?q?Fix=20bazel=20build=20issue=20caused=20by=20#142986=20s?= =?UTF-8?q?econd=20attempt=20(#144721=20didnt=E2=80=A6=20(#144743)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit … cover everything) --- utils/bazel/llvm-project-overlay/mlir/BUILD.bazel | 1 + utils/bazel/llvm-project-overlay/mlir/test/BUILD.bazel | 1 + 2 files changed, 2 insertions(+) diff --git a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel index 48f2d0900d3e..761a93ea7dfa 100644 --- a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel @@ -12796,6 +12796,7 @@ cc_library( "lib/Dialect/Bufferization/IR/BufferViewFlowOpInterface.cpp", "lib/Dialect/Bufferization/IR/BufferizableOpInterface.cpp", "lib/Dialect/Bufferization/IR/BufferizationDialect.cpp", + "lib/Dialect/Bufferization/IR/BufferizationTypeInterfaces.cpp", "lib/Dialect/Bufferization/IR/BufferizationOps.cpp", "lib/Dialect/Bufferization/IR/UnstructuredControlFlow.cpp", ], diff --git a/utils/bazel/llvm-project-overlay/mlir/test/BUILD.bazel b/utils/bazel/llvm-project-overlay/mlir/test/BUILD.bazel index 0eaf86da7f27..a439fdd50d21 100644 --- a/utils/bazel/llvm-project-overlay/mlir/test/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/mlir/test/BUILD.bazel @@ -375,6 +375,7 @@ cc_library( "//llvm:IRReader", "//llvm:Support", "//mlir:ArithDialect", + "//mlir:BufferizationDialect", "//mlir:BufferizationInterfaces", "//mlir:BytecodeOpInterface", "//mlir:CallOpInterfaces", -- cgit v1.2.3 From dd40c460c42d075c47f0d1a6d83f129655eafe10 Mon Sep 17 00:00:00 2001 From: Peng Liu Date: Wed, 18 Jun 2025 12:16:01 -0400 Subject: [libc++] Clean up casts in std::forward_list (#130310) The patch removes unnecessary casts to `void*` pointers, inline some casts, and eliminates an identity cast. --- libcxx/include/forward_list | 97 ++++++++++++++++++--------------------------- 1 file changed, 39 insertions(+), 58 deletions(-) diff --git a/libcxx/include/forward_list b/libcxx/include/forward_list index e9b2c860b89c..bad0c11b7c7e 100644 --- a/libcxx/include/forward_list +++ b/libcxx/include/forward_list @@ -282,7 +282,6 @@ struct __forward_node_traits { typedef _NodePtr __node_pointer; typedef __forward_begin_node<_NodePtr> __begin_node; typedef __rebind_pointer_t<_NodePtr, __begin_node> __begin_node_pointer; - typedef __rebind_pointer_t<_NodePtr, void> __void_pointer; // TODO(LLVM 22): Remove this check # ifndef _LIBCPP_ABI_FORWARD_LIST_REMOVE_NODE_POINTER_UB @@ -294,10 +293,6 @@ struct __forward_node_traits { "is being broken between LLVM 19 and LLVM 20. If you don't care about your ABI being broken, define " "the _LIBCPP_ABI_FORWARD_LIST_REMOVE_NODE_POINTER_UB macro to silence this diagnostic."); # endif - - _LIBCPP_CONSTEXPR_SINCE_CXX26 _LIBCPP_HIDE_FROM_ABI static __begin_node_pointer __as_iter_node(__node_pointer __p) { - return std::__static_fancy_pointer_cast<__begin_node_pointer>(__p); - } }; template @@ -309,10 +304,6 @@ struct __forward_begin_node { _LIBCPP_CONSTEXPR_SINCE_CXX26 _LIBCPP_HIDE_FROM_ABI __forward_begin_node() : __next_(nullptr) {} _LIBCPP_CONSTEXPR_SINCE_CXX26 _LIBCPP_HIDE_FROM_ABI explicit __forward_begin_node(pointer __n) : __next_(__n) {} - - _LIBCPP_CONSTEXPR_SINCE_CXX26 _LIBCPP_HIDE_FROM_ABI __begin_node_pointer __next_as_begin() const { - return std::__static_fancy_pointer_cast<__begin_node_pointer>(__next_); - } }; template @@ -361,15 +352,9 @@ class __forward_list_iterator { typedef typename __traits::__begin_node __begin_node_type; typedef typename __traits::__node_pointer __node_pointer; typedef typename __traits::__begin_node_pointer __begin_node_pointer; - typedef typename __traits::__void_pointer __void_pointer; __begin_node_pointer __ptr_; - _LIBCPP_CONSTEXPR_SINCE_CXX26 _LIBCPP_HIDE_FROM_ABI __begin_node_pointer __get_begin() const { return __ptr_; } - _LIBCPP_CONSTEXPR_SINCE_CXX26 _LIBCPP_HIDE_FROM_ABI __node_pointer __get_unsafe_node_pointer() const { - return std::__static_fancy_pointer_cast<__node_pointer>(__ptr_); - } - _LIBCPP_CONSTEXPR_SINCE_CXX26 _LIBCPP_HIDE_FROM_ABI explicit __forward_list_iterator(nullptr_t) _NOEXCEPT : __ptr_(nullptr) {} @@ -377,7 +362,7 @@ class __forward_list_iterator { _LIBCPP_HIDE_FROM_ABI explicit __forward_list_iterator(__begin_node_pointer __p) _NOEXCEPT : __ptr_(__p) {} _LIBCPP_CONSTEXPR_SINCE_CXX26 _LIBCPP_HIDE_FROM_ABI explicit __forward_list_iterator(__node_pointer __p) _NOEXCEPT - : __ptr_(__traits::__as_iter_node(__p)) {} + : __ptr_(std::__static_fancy_pointer_cast<__begin_node_pointer>(__p)) {} template friend class forward_list; @@ -394,14 +379,14 @@ public: _LIBCPP_CONSTEXPR_SINCE_CXX26 _LIBCPP_HIDE_FROM_ABI __forward_list_iterator() _NOEXCEPT : __ptr_(nullptr) {} _LIBCPP_CONSTEXPR_SINCE_CXX26 _LIBCPP_HIDE_FROM_ABI reference operator*() const { - return __get_unsafe_node_pointer()->__get_value(); + return std::__static_fancy_pointer_cast<__node_pointer>(__ptr_)->__get_value(); } _LIBCPP_CONSTEXPR_SINCE_CXX26 _LIBCPP_HIDE_FROM_ABI pointer operator->() const { - return pointer_traits::pointer_to(__get_unsafe_node_pointer()->__get_value()); + return pointer_traits::pointer_to(std::__static_fancy_pointer_cast<__node_pointer>(__ptr_)->__get_value()); } _LIBCPP_CONSTEXPR_SINCE_CXX26 _LIBCPP_HIDE_FROM_ABI __forward_list_iterator& operator++() { - __ptr_ = __traits::__as_iter_node(__ptr_->__next_); + __ptr_ = std::__static_fancy_pointer_cast<__begin_node_pointer>(__ptr_->__next_); return *this; } _LIBCPP_CONSTEXPR_SINCE_CXX26 _LIBCPP_HIDE_FROM_ABI __forward_list_iterator operator++(int) { @@ -430,15 +415,9 @@ class __forward_list_const_iterator { typedef typename __traits::__begin_node __begin_node_type; typedef typename __traits::__node_pointer __node_pointer; typedef typename __traits::__begin_node_pointer __begin_node_pointer; - typedef typename __traits::__void_pointer __void_pointer; __begin_node_pointer __ptr_; - _LIBCPP_CONSTEXPR_SINCE_CXX26 _LIBCPP_HIDE_FROM_ABI __begin_node_pointer __get_begin() const { return __ptr_; } - _LIBCPP_CONSTEXPR_SINCE_CXX26 _LIBCPP_HIDE_FROM_ABI __node_pointer __get_unsafe_node_pointer() const { - return std::__static_fancy_pointer_cast<__node_pointer>(__ptr_); - } - _LIBCPP_CONSTEXPR_SINCE_CXX26 _LIBCPP_HIDE_FROM_ABI explicit __forward_list_const_iterator(nullptr_t) _NOEXCEPT : __ptr_(nullptr) {} @@ -447,7 +426,7 @@ class __forward_list_const_iterator { _LIBCPP_CONSTEXPR_SINCE_CXX26 _LIBCPP_HIDE_FROM_ABI explicit __forward_list_const_iterator(__node_pointer __p) _NOEXCEPT - : __ptr_(__traits::__as_iter_node(__p)) {} + : __ptr_(std::__static_fancy_pointer_cast<__begin_node_pointer>(__p)) {} template friend class forward_list; @@ -464,14 +443,14 @@ public: __forward_list_const_iterator(__forward_list_iterator<__node_pointer> __p) _NOEXCEPT : __ptr_(__p.__ptr_) {} _LIBCPP_CONSTEXPR_SINCE_CXX26 _LIBCPP_HIDE_FROM_ABI reference operator*() const { - return __get_unsafe_node_pointer()->__get_value(); + return std::__static_fancy_pointer_cast<__node_pointer>(__ptr_)->__get_value(); } _LIBCPP_CONSTEXPR_SINCE_CXX26 _LIBCPP_HIDE_FROM_ABI pointer operator->() const { - return pointer_traits::pointer_to(__get_unsafe_node_pointer()->__get_value()); + return pointer_traits::pointer_to(std::__static_fancy_pointer_cast<__node_pointer>(__ptr_)->__get_value()); } _LIBCPP_CONSTEXPR_SINCE_CXX26 _LIBCPP_HIDE_FROM_ABI __forward_list_const_iterator& operator++() { - __ptr_ = __traits::__as_iter_node(__ptr_->__next_); + __ptr_ = std::__static_fancy_pointer_cast<__begin_node_pointer>(__ptr_->__next_); return *this; } _LIBCPP_CONSTEXPR_SINCE_CXX26 _LIBCPP_HIDE_FROM_ABI __forward_list_const_iterator operator++(int) { @@ -963,7 +942,8 @@ _LIBCPP_CONSTEXPR_SINCE_CXX26 inline forward_list<_Tp, _Alloc>::forward_list(con template _LIBCPP_CONSTEXPR_SINCE_CXX26 forward_list<_Tp, _Alloc>::forward_list(size_type __n) { if (__n > 0) { - for (__begin_node_pointer __p = __base::__before_begin(); __n > 0; --__n, __p = __p->__next_as_begin()) { + for (__begin_node_pointer __p = __base::__before_begin(); __n > 0; + --__n, __p = std::__static_fancy_pointer_cast<__begin_node_pointer>(__p->__next_)) { __p->__next_ = this->__create_node(/* next = */ nullptr); } } @@ -974,7 +954,8 @@ template _LIBCPP_CONSTEXPR_SINCE_CXX26 forward_list<_Tp, _Alloc>::forward_list(size_type __n, const allocator_type& __base_alloc) : __base(__base_alloc) { if (__n > 0) { - for (__begin_node_pointer __p = __base::__before_begin(); __n > 0; --__n, __p = __p->__next_as_begin()) { + for (__begin_node_pointer __p = __base::__before_begin(); __n > 0; + --__n, __p = std::__static_fancy_pointer_cast<__begin_node_pointer>(__p->__next_)) { __p->__next_ = this->__create_node(/* next = */ nullptr); } } @@ -1167,7 +1148,7 @@ template template _LIBCPP_CONSTEXPR_SINCE_CXX26 typename forward_list<_Tp, _Alloc>::iterator forward_list<_Tp, _Alloc>::emplace_after(const_iterator __p, _Args&&... __args) { - __begin_node_pointer const __r = __p.__get_begin(); + __begin_node_pointer const __r = __p.__ptr_; __r->__next_ = this->__create_node(/* next = */ __r->__next_, std::forward<_Args>(__args)...); return iterator(__r->__next_); } @@ -1175,7 +1156,7 @@ forward_list<_Tp, _Alloc>::emplace_after(const_iterator __p, _Args&&... __args) template _LIBCPP_CONSTEXPR_SINCE_CXX26 typename forward_list<_Tp, _Alloc>::iterator forward_list<_Tp, _Alloc>::insert_after(const_iterator __p, value_type&& __v) { - __begin_node_pointer const __r = __p.__get_begin(); + __begin_node_pointer const __r = __p.__ptr_; __r->__next_ = this->__create_node(/* next = */ __r->__next_, std::move(__v)); return iterator(__r->__next_); } @@ -1185,7 +1166,7 @@ forward_list<_Tp, _Alloc>::insert_after(const_iterator __p, value_type&& __v) { template _LIBCPP_CONSTEXPR_SINCE_CXX26 typename forward_list<_Tp, _Alloc>::iterator forward_list<_Tp, _Alloc>::insert_after(const_iterator __p, const value_type& __v) { - __begin_node_pointer const __r = __p.__get_begin(); + __begin_node_pointer const __r = __p.__ptr_; __r->__next_ = this->__create_node(/* next = */ __r->__next_, __v); return iterator(__r->__next_); } @@ -1194,7 +1175,7 @@ template template _LIBCPP_CONSTEXPR_SINCE_CXX26 typename forward_list<_Tp, _Alloc>::iterator forward_list<_Tp, _Alloc>::__insert_after(const_iterator __p, size_type __n, _Args&&... __args) { - __begin_node_pointer __r = __p.__get_begin(); + __begin_node_pointer __r = __p.__ptr_; if (__n > 0) { __node_pointer __first = this->__create_node(/* next = */ nullptr, std::forward<_Args>(__args)...); __node_pointer __last = __first; @@ -1216,7 +1197,7 @@ forward_list<_Tp, _Alloc>::__insert_after(const_iterator __p, size_type __n, _Ar # endif // _LIBCPP_HAS_EXCEPTIONS __last->__next_ = __r->__next_; __r->__next_ = __first; - __r = __forward_node_traits<__node_pointer>::__as_iter_node(__last); + __r = std::__static_fancy_pointer_cast<__begin_node_pointer>(__last); } return iterator(__r); } @@ -1232,7 +1213,7 @@ template template _LIBCPP_CONSTEXPR_SINCE_CXX26 _LIBCPP_HIDE_FROM_ABI typename forward_list<_Tp, _Alloc>::iterator forward_list<_Tp, _Alloc>::__insert_after_with_sentinel(const_iterator __p, _InputIterator __f, _Sentinel __l) { - __begin_node_pointer __r = __p.__get_begin(); + __begin_node_pointer __r = __p.__ptr_; if (__f != __l) { __node_pointer __first = this->__create_node(/* next = */ nullptr, *__f); @@ -1257,7 +1238,7 @@ forward_list<_Tp, _Alloc>::__insert_after_with_sentinel(const_iterator __p, _Inp __last->__next_ = __r->__next_; __r->__next_ = __first; - __r = __forward_node_traits<__node_pointer>::__as_iter_node(__last); + __r = std::__static_fancy_pointer_cast<__begin_node_pointer>(__last); } return iterator(__r); @@ -1266,7 +1247,7 @@ forward_list<_Tp, _Alloc>::__insert_after_with_sentinel(const_iterator __p, _Inp template _LIBCPP_CONSTEXPR_SINCE_CXX26 typename forward_list<_Tp, _Alloc>::iterator forward_list<_Tp, _Alloc>::erase_after(const_iterator __f) { - __begin_node_pointer __p = __f.__get_begin(); + __begin_node_pointer __p = __f.__ptr_; __node_pointer __n = __p->__next_; __p->__next_ = __n->__next_; this->__delete_node(__n); @@ -1276,9 +1257,9 @@ forward_list<_Tp, _Alloc>::erase_after(const_iterator __f) { template _LIBCPP_CONSTEXPR_SINCE_CXX26 typename forward_list<_Tp, _Alloc>::iterator forward_list<_Tp, _Alloc>::erase_after(const_iterator __f, const_iterator __l) { - __node_pointer __e = __l.__get_unsafe_node_pointer(); + __node_pointer __e = std::__static_fancy_pointer_cast<__node_pointer>(__l.__ptr_); if (__f != __l) { - __begin_node_pointer __bp = __f.__get_begin(); + __begin_node_pointer __bp = __f.__ptr_; __node_pointer __n = __bp->__next_; if (__n != __e) { @@ -1324,13 +1305,13 @@ _LIBCPP_CONSTEXPR_SINCE_CXX26 void forward_list<_Tp, _Alloc>::resize(size_type _ template _LIBCPP_CONSTEXPR_SINCE_CXX26 void forward_list<_Tp, _Alloc>::splice_after(const_iterator __p, forward_list& __x) { if (!__x.empty()) { - if (__p.__get_begin()->__next_ != nullptr) { + if (__p.__ptr_->__next_ != nullptr) { const_iterator __lm1 = __x.before_begin(); - while (__lm1.__get_begin()->__next_ != nullptr) + while (__lm1.__ptr_->__next_ != nullptr) ++__lm1; - __lm1.__get_begin()->__next_ = __p.__get_begin()->__next_; + __lm1.__ptr_->__next_ = __p.__ptr_->__next_; } - __p.__get_begin()->__next_ = __x.__before_begin()->__next_; + __p.__ptr_->__next_ = __x.__before_begin()->__next_; __x.__before_begin()->__next_ = nullptr; } } @@ -1340,9 +1321,9 @@ _LIBCPP_CONSTEXPR_SINCE_CXX26 void forward_list<_Tp, _Alloc>::splice_after(const_iterator __p, forward_list& /*__other*/, const_iterator __i) { const_iterator __lm1 = std::next(__i); if (__p != __i && __p != __lm1) { - __i.__get_begin()->__next_ = __lm1.__get_begin()->__next_; - __lm1.__get_begin()->__next_ = __p.__get_begin()->__next_; - __p.__get_begin()->__next_ = __lm1.__get_unsafe_node_pointer(); + __i.__ptr_->__next_ = __lm1.__ptr_->__next_; + __lm1.__ptr_->__next_ = __p.__ptr_->__next_; + __p.__ptr_->__next_ = std::__static_fancy_pointer_cast<__node_pointer>(__lm1.__ptr_); } } @@ -1351,12 +1332,12 @@ _LIBCPP_CONSTEXPR_SINCE_CXX26 void forward_list<_Tp, _Alloc>::splice_after( const_iterator __p, forward_list& /*__other*/, const_iterator __f, const_iterator __l) { if (__f != __l && __p != __f) { const_iterator __lm1 = __f; - while (__lm1.__get_begin()->__next_ != __l.__get_begin()) + while (__lm1.__ptr_->__next_ != __l.__ptr_) ++__lm1; if (__f != __lm1) { - __lm1.__get_begin()->__next_ = __p.__get_begin()->__next_; - __p.__get_begin()->__next_ = __f.__get_begin()->__next_; - __f.__get_begin()->__next_ = __l.__get_unsafe_node_pointer(); + __lm1.__ptr_->__next_ = __p.__ptr_->__next_; + __p.__ptr_->__next_ = __f.__ptr_->__next_; + __f.__ptr_->__next_ = std::__static_fancy_pointer_cast<__node_pointer>(__l.__ptr_); } } } @@ -1385,8 +1366,8 @@ forward_list<_Tp, _Alloc>::remove(const value_type& __v) { forward_list<_Tp, _Alloc> __deleted_nodes(get_allocator()); // collect the nodes we're removing typename forward_list<_Tp, _Alloc>::size_type __count_removed = 0; const iterator __e = end(); - for (iterator __i = before_begin(); __i.__get_begin()->__next_ != nullptr;) { - if (__i.__get_begin()->__next_->__get_value() == __v) { + for (iterator __i = before_begin(); __i.__ptr_->__next_ != nullptr;) { + if (__i.__ptr_->__next_->__get_value() == __v) { ++__count_removed; iterator __j = std::next(__i, 2); for (; __j != __e && *__j == __v; ++__j) @@ -1409,8 +1390,8 @@ forward_list<_Tp, _Alloc>::remove_if(_Predicate __pred) { forward_list<_Tp, _Alloc> __deleted_nodes(get_allocator()); // collect the nodes we're removing typename forward_list<_Tp, _Alloc>::size_type __count_removed = 0; const iterator __e = end(); - for (iterator __i = before_begin(); __i.__get_begin()->__next_ != nullptr;) { - if (__pred(__i.__get_begin()->__next_->__get_value())) { + for (iterator __i = before_begin(); __i.__ptr_->__next_ != nullptr;) { + if (__pred(__i.__ptr_->__next_->__get_value())) { ++__count_removed; iterator __j = std::next(__i, 2); for (; __j != __e && __pred(*__j); ++__j) @@ -1436,7 +1417,7 @@ forward_list<_Tp, _Alloc>::unique(_BinaryPredicate __binary_pred) { iterator __j = std::next(__i); for (; __j != __e && __binary_pred(*__i, *__j); ++__j) ++__count_removed; - if (__i.__get_begin()->__next_ != __j.__get_unsafe_node_pointer()) + if (__i.__ptr_->__next_ != std::__static_fancy_pointer_cast<__node_pointer>(__j.__ptr_)) __deleted_nodes.splice_after(__deleted_nodes.before_begin(), *this, __i, __j); __i = __j; } @@ -1516,7 +1497,7 @@ forward_list<_Tp, _Alloc>::__sort(__node_pointer __f1, difference_type __sz, _Co } difference_type __sz1 = __sz / 2; difference_type __sz2 = __sz - __sz1; - __node_pointer __t = std::next(iterator(__f1), __sz1 - 1).__get_unsafe_node_pointer(); + __node_pointer __t = std::__static_fancy_pointer_cast<__node_pointer>(std::next(iterator(__f1), __sz1 - 1).__ptr_); __node_pointer __f2 = __t->__next_; __t->__next_ = nullptr; return __merge(__sort(__f1, __sz1, __comp), __sort(__f2, __sz2, __comp), __comp); -- cgit v1.2.3 From 9827440f1e723423baf4c235e844eb8ac48a8f97 Mon Sep 17 00:00:00 2001 From: Peng Liu Date: Wed, 18 Jun 2025 12:22:47 -0400 Subject: [libc++] Optimize ranges::{for_each, for_each_n} for segmented iterators (#132896) Previously, the segmented iterator optimization was limited to `std::{for_each, for_each_n}`. This patch extends the optimization to `std::ranges::for_each` and `std::ranges::for_each_n`, ensuring consistent optimizations across these algorithms. This patch first generalizes the `std` algorithms by introducing a `Projection` parameter, which is set to `__identity` for the `std` algorithms. Then we let the `ranges` algorithms to directly call their `std` counterparts with a general `__proj` argument. Benchmarks demonstrate performance improvements of up to 21.4x for ``std::deque::iterator`` and 22.3x for ``join_view`` of ``vector>``. Addresses a subtask of #102817. --- libcxx/docs/ReleaseNotes/21.rst | 5 ++- libcxx/include/__algorithm/for_each.h | 35 ++++++++++------ libcxx/include/__algorithm/for_each_n.h | 26 +++++++----- libcxx/include/__algorithm/ranges_for_each.h | 18 +++++++-- libcxx/include/__algorithm/ranges_for_each_n.h | 9 ++--- libcxx/include/experimental/iterator | 1 + libcxx/include/mutex | 1 + libcxx/include/shared_mutex | 1 + .../algorithms/nonmodifying/for_each.bench.cpp | 43 ++++++++++++++++++-- .../algorithms/nonmodifying/for_each_n.bench.cpp | 23 +++++------ .../alg.foreach/ranges.for_each.pass.cpp | 46 +++++++++++++++++++--- .../alg.foreach/ranges.for_each_n.pass.cpp | 46 +++++++++++++++++++++- 12 files changed, 197 insertions(+), 57 deletions(-) diff --git a/libcxx/docs/ReleaseNotes/21.rst b/libcxx/docs/ReleaseNotes/21.rst index 2a5b90750eaf..8661e5898fbc 100644 --- a/libcxx/docs/ReleaseNotes/21.rst +++ b/libcxx/docs/ReleaseNotes/21.rst @@ -70,8 +70,9 @@ Improvements and New Features - The segmented iterator optimization for ``std::for_each`` has been backported to C++11. Previously it was only available in C++23 and later. -- The ``std::for_each_n`` algorithm has been optimized for segmented iterators, resulting in a performance improvement of - up to 17.7x for ``std::deque`` iterators, and up to 13.9x for ``std::join_view>>`` iterators. +- The ``std::for_each_n``, ``std::ranges::for_each`` and ``std::ranges::for_each_n`` algorithms have been optimized for + segmented iterators, resulting in a performance improvement of up to 17.7x for ``std::deque`` iterators, and up + to 13.9x for ``std::join_view>>`` iterators. - The ``bitset::to_string`` function has been optimized, resulting in a performance improvement of up to 8.3x for bitsets with uniformly distributed zeros and ones, and up to 13.5x and 16.1x for sparse and dense bitsets, respectively. diff --git a/libcxx/include/__algorithm/for_each.h b/libcxx/include/__algorithm/for_each.h index b6c2c7c056ed..4167eec3506e 100644 --- a/libcxx/include/__algorithm/for_each.h +++ b/libcxx/include/__algorithm/for_each.h @@ -12,41 +12,54 @@ #include <__algorithm/for_each_segment.h> #include <__config> +#include <__functional/identity.h> #include <__iterator/segmented_iterator.h> #include <__type_traits/enable_if.h> +#include <__type_traits/invoke.h> +#include <__utility/move.h> #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) # pragma GCC system_header #endif +_LIBCPP_PUSH_MACROS +#include <__undef_macros> + _LIBCPP_BEGIN_NAMESPACE_STD -template -_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 void __for_each(_InputIterator __first, _Sent __last, _Func& __f) { +template +_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _InputIterator +__for_each(_InputIterator __first, _Sent __last, _Func& __f, _Proj& __proj) { for (; __first != __last; ++__first) - __f(*__first); + std::__invoke(__f, std::__invoke(__proj, *__first)); + return __first; } #ifndef _LIBCPP_CXX03_LANG template ::value, int> = 0> -_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 void -__for_each(_SegmentedIterator __first, _SegmentedIterator __last, _Function& __func) { +_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _SegmentedIterator +__for_each(_SegmentedIterator __first, _SegmentedIterator __last, _Func& __func, _Proj& __proj) { using __local_iterator_t = typename __segmented_iterator_traits<_SegmentedIterator>::__local_iterator; std::__for_each_segment(__first, __last, [&](__local_iterator_t __lfirst, __local_iterator_t __llast) { - std::__for_each(__lfirst, __llast, __func); + std::__for_each(__lfirst, __llast, __func, __proj); }); + return __last; } #endif // !_LIBCPP_CXX03_LANG -template -_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _Function -for_each(_InputIterator __first, _InputIterator __last, _Function __f) { - std::__for_each(__first, __last, __f); +template +_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _Func +for_each(_InputIterator __first, _InputIterator __last, _Func __f) { + __identity __proj; + std::__for_each(__first, __last, __f, __proj); return __f; } _LIBCPP_END_NAMESPACE_STD +_LIBCPP_POP_MACROS + #endif // _LIBCPP___ALGORITHM_FOR_EACH_H diff --git a/libcxx/include/__algorithm/for_each_n.h b/libcxx/include/__algorithm/for_each_n.h index 29351ec39f4e..9a6c6bb5175d 100644 --- a/libcxx/include/__algorithm/for_each_n.h +++ b/libcxx/include/__algorithm/for_each_n.h @@ -13,10 +13,12 @@ #include <__algorithm/for_each.h> #include <__algorithm/for_each_n_segment.h> #include <__config> +#include <__functional/identity.h> #include <__iterator/iterator_traits.h> #include <__iterator/segmented_iterator.h> #include <__type_traits/disjunction.h> #include <__type_traits/enable_if.h> +#include <__type_traits/invoke.h> #include <__type_traits/negation.h> #include <__utility/convert_to_integral.h> #include <__utility/move.h> @@ -33,16 +35,17 @@ _LIBCPP_BEGIN_NAMESPACE_STD template ::value && _Or< _Not<__is_segmented_iterator<_InputIterator> >, _Not<__has_random_access_local_iterator<_InputIterator> > >::value, int> = 0> _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _InputIterator -__for_each_n(_InputIterator __first, _Size __orig_n, _Func& __f) { +__for_each_n(_InputIterator __first, _Size __orig_n, _Func& __f, _Proj& __proj) { typedef decltype(std::__convert_to_integral(__orig_n)) _IntegralSize; _IntegralSize __n = __orig_n; while (__n > 0) { - __f(*__first); + std::__invoke(__f, std::__invoke(__proj, *__first)); ++__first; --__n; } @@ -52,39 +55,42 @@ __for_each_n(_InputIterator __first, _Size __orig_n, _Func& __f) { template ::value, int> = 0> _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _RandIter -__for_each_n(_RandIter __first, _Size __orig_n, _Func& __f) { +__for_each_n(_RandIter __first, _Size __orig_n, _Func& __f, _Proj& __proj) { typename std::iterator_traits<_RandIter>::difference_type __n = __orig_n; auto __last = __first + __n; - std::__for_each(__first, __last, __f); - return std::move(__last); + std::__for_each(__first, __last, __f, __proj); + return __last; } #ifndef _LIBCPP_CXX03_LANG template ::value && __is_segmented_iterator<_SegmentedIterator>::value && __has_random_access_iterator_category< typename __segmented_iterator_traits<_SegmentedIterator>::__local_iterator>::value, int> = 0> _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _SegmentedIterator -__for_each_n(_SegmentedIterator __first, _Size __orig_n, _Func& __f) { +__for_each_n(_SegmentedIterator __first, _Size __orig_n, _Func& __f, _Proj& __proj) { using __local_iterator_t = typename __segmented_iterator_traits<_SegmentedIterator>::__local_iterator; return std::__for_each_n_segment(__first, __orig_n, [&](__local_iterator_t __lfirst, __local_iterator_t __llast) { - std::__for_each(__lfirst, __llast, __f); + std::__for_each(__lfirst, __llast, __f, __proj); }); } #endif // !_LIBCPP_CXX03_LANG #if _LIBCPP_STD_VER >= 17 -template +template inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _InputIterator -for_each_n(_InputIterator __first, _Size __orig_n, _Function __f) { - return std::__for_each_n(__first, __orig_n, __f); +for_each_n(_InputIterator __first, _Size __orig_n, _Func __f) { + __identity __proj; + return std::__for_each_n(__first, __orig_n, __f, __proj); } #endif // _LIBCPP_STD_VER >= 17 diff --git a/libcxx/include/__algorithm/ranges_for_each.h b/libcxx/include/__algorithm/ranges_for_each.h index de39bc552275..e9c84e8583f8 100644 --- a/libcxx/include/__algorithm/ranges_for_each.h +++ b/libcxx/include/__algorithm/ranges_for_each.h @@ -9,10 +9,12 @@ #ifndef _LIBCPP___ALGORITHM_RANGES_FOR_EACH_H #define _LIBCPP___ALGORITHM_RANGES_FOR_EACH_H +#include <__algorithm/for_each.h> +#include <__algorithm/for_each_n.h> #include <__algorithm/in_fun_result.h> +#include <__concepts/assignable.h> #include <__config> #include <__functional/identity.h> -#include <__functional/invoke.h> #include <__iterator/concepts.h> #include <__iterator/projected.h> #include <__ranges/access.h> @@ -41,9 +43,17 @@ private: template _LIBCPP_HIDE_FROM_ABI constexpr static for_each_result<_Iter, _Func> __for_each_impl(_Iter __first, _Sent __last, _Func& __func, _Proj& __proj) { - for (; __first != __last; ++__first) - std::invoke(__func, std::invoke(__proj, *__first)); - return {std::move(__first), std::move(__func)}; + // In the case where we have different iterator and sentinel types, the segmented iterator optimization + // in std::for_each will not kick in. Therefore, we prefer std::for_each_n in that case (whenever we can + // obtain the `n`). + if constexpr (!std::assignable_from<_Iter&, _Sent> && std::sized_sentinel_for<_Sent, _Iter>) { + auto __n = __last - __first; + auto __end = std::__for_each_n(std::move(__first), __n, __func, __proj); + return {std::move(__end), std::move(__func)}; + } else { + auto __end = std::__for_each(std::move(__first), std::move(__last), __func, __proj); + return {std::move(__end), std::move(__func)}; + } } public: diff --git a/libcxx/include/__algorithm/ranges_for_each_n.h b/libcxx/include/__algorithm/ranges_for_each_n.h index 603cb723233c..3aab1b79c10a 100644 --- a/libcxx/include/__algorithm/ranges_for_each_n.h +++ b/libcxx/include/__algorithm/ranges_for_each_n.h @@ -9,10 +9,10 @@ #ifndef _LIBCPP___ALGORITHM_RANGES_FOR_EACH_N_H #define _LIBCPP___ALGORITHM_RANGES_FOR_EACH_N_H +#include <__algorithm/for_each_n.h> #include <__algorithm/in_fun_result.h> #include <__config> #include <__functional/identity.h> -#include <__functional/invoke.h> #include <__iterator/concepts.h> #include <__iterator/incrementable_traits.h> #include <__iterator/iterator_traits.h> @@ -40,11 +40,8 @@ struct __for_each_n { template > _Func> _LIBCPP_HIDE_FROM_ABI constexpr for_each_n_result<_Iter, _Func> operator()(_Iter __first, iter_difference_t<_Iter> __count, _Func __func, _Proj __proj = {}) const { - while (__count-- > 0) { - std::invoke(__func, std::invoke(__proj, *__first)); - ++__first; - } - return {std::move(__first), std::move(__func)}; + auto __last = std::__for_each_n(std::move(__first), __count, __func, __proj); + return {std::move(__last), std::move(__func)}; } }; diff --git a/libcxx/include/experimental/iterator b/libcxx/include/experimental/iterator index d92613845a66..565bb83903ac 100644 --- a/libcxx/include/experimental/iterator +++ b/libcxx/include/experimental/iterator @@ -127,6 +127,7 @@ _LIBCPP_POP_MACROS # if !defined(_LIBCPP_REMOVE_TRANSITIVE_INCLUDES) && _LIBCPP_STD_VER <= 20 # include # include +# include # include # endif #endif // __cplusplus < 201103L && defined(_LIBCPP_USE_FROZEN_CXX03_HEADERS) diff --git a/libcxx/include/mutex b/libcxx/include/mutex index e058b3113073..f616bad3ac17 100644 --- a/libcxx/include/mutex +++ b/libcxx/include/mutex @@ -504,6 +504,7 @@ _LIBCPP_POP_MACROS # include # include # include +# include # include # include # include diff --git a/libcxx/include/shared_mutex b/libcxx/include/shared_mutex index e6759e413dfe..6469c02ca587 100644 --- a/libcxx/include/shared_mutex +++ b/libcxx/include/shared_mutex @@ -457,6 +457,7 @@ _LIBCPP_POP_MACROS # endif // _LIBCPP_HAS_THREADS # if !defined(_LIBCPP_REMOVE_TRANSITIVE_INCLUDES) && _LIBCPP_STD_VER <= 20 +# include # include # endif #endif // __cplusplus < 201103L && defined(_LIBCPP_USE_FROZEN_CXX03_HEADERS) diff --git a/libcxx/test/benchmarks/algorithms/nonmodifying/for_each.bench.cpp b/libcxx/test/benchmarks/algorithms/nonmodifying/for_each.bench.cpp index 760accbe4d92..f58f336f8b89 100644 --- a/libcxx/test/benchmarks/algorithms/nonmodifying/for_each.bench.cpp +++ b/libcxx/test/benchmarks/algorithms/nonmodifying/for_each.bench.cpp @@ -12,6 +12,7 @@ #include #include #include +#include #include #include @@ -23,6 +24,7 @@ int main(int argc, char** argv) { // {std,ranges}::for_each { auto bm = [](std::string name, auto for_each) { + using ElemType = typename Container::value_type; benchmark::RegisterBenchmark( name, [for_each](auto& st) { @@ -33,15 +35,14 @@ int main(int argc, char** argv) { for ([[maybe_unused]] auto _ : st) { benchmark::DoNotOptimize(c); - auto result = for_each(first, last, [](int& x) { x = std::clamp(x, 10, 100); }); + auto result = for_each(first, last, [](ElemType& x) { x = std::clamp(x, 10, 100); }); benchmark::DoNotOptimize(result); } }) ->Arg(8) ->Arg(32) ->Arg(50) // non power-of-two - ->Arg(8192) - ->Arg(1 << 20); + ->Arg(8192); }; bm.operator()>("std::for_each(vector)", std_for_each); bm.operator()>("std::for_each(deque)", std_for_each); @@ -51,6 +52,42 @@ int main(int argc, char** argv) { bm.operator()>("rng::for_each(list)", std::ranges::for_each); } + // {std,ranges}::for_each for join_view + { + auto bm = [](std::string name, auto for_each) { + using C1 = typename Container::value_type; + using ElemType = typename C1::value_type; + + benchmark::RegisterBenchmark( + name, + [for_each](auto& st) { + std::size_t const size = st.range(0); + std::size_t const seg_size = 256; + std::size_t const segments = (size + seg_size - 1) / seg_size; + Container c(segments); + for (std::size_t i = 0, n = size; i < segments; ++i, n -= seg_size) { + c[i].resize(std::min(seg_size, n), ElemType(1)); + } + + auto view = c | std::views::join; + auto first = view.begin(); + auto last = view.end(); + + for ([[maybe_unused]] auto _ : st) { + benchmark::DoNotOptimize(c); + auto result = for_each(first, last, [](ElemType& x) { x = std::clamp(x, 10, 100); }); + benchmark::DoNotOptimize(result); + } + }) + ->Arg(8) + ->Arg(32) + ->Arg(50) // non power-of-two + ->Arg(8192); + }; + bm.operator()>>("std::for_each(join_view(vector>))", std_for_each); + bm.operator()>>("rng::for_each(join_view(vector>)", std::ranges::for_each); + } + benchmark::Initialize(&argc, argv); benchmark::RunSpecifiedBenchmarks(); benchmark::Shutdown(); diff --git a/libcxx/test/benchmarks/algorithms/nonmodifying/for_each_n.bench.cpp b/libcxx/test/benchmarks/algorithms/nonmodifying/for_each_n.bench.cpp index 784708c7e01e..e643e647722c 100644 --- a/libcxx/test/benchmarks/algorithms/nonmodifying/for_each_n.bench.cpp +++ b/libcxx/test/benchmarks/algorithms/nonmodifying/for_each_n.bench.cpp @@ -21,7 +21,7 @@ int main(int argc, char** argv) { auto std_for_each_n = [](auto first, auto n, auto f) { return std::for_each_n(first, n, f); }; - // std::for_each_n + // {std,ranges}::for_each_n { auto bm = [](std::string name, auto for_each_n) { using ElemType = typename Container::value_type; @@ -41,19 +41,17 @@ int main(int argc, char** argv) { ->Arg(8) ->Arg(32) ->Arg(50) // non power-of-two - ->Arg(1024) - ->Arg(4096) - ->Arg(8192) - ->Arg(1 << 14) - ->Arg(1 << 16) - ->Arg(1 << 18); + ->Arg(8192); }; bm.operator()>("std::for_each_n(vector)", std_for_each_n); bm.operator()>("std::for_each_n(deque)", std_for_each_n); bm.operator()>("std::for_each_n(list)", std_for_each_n); + bm.operator()>("rng::for_each_n(vector)", std::ranges::for_each_n); + bm.operator()>("rng::for_each_n(deque)", std::ranges::for_each_n); + bm.operator()>("rng::for_each_n(list)", std::ranges::for_each_n); } - // std::for_each_n for join_view + // {std,ranges}::for_each_n for join_view { auto bm = [](std::string name, auto for_each_n) { using C1 = typename Container::value_type; @@ -81,14 +79,11 @@ int main(int argc, char** argv) { ->Arg(8) ->Arg(32) ->Arg(50) // non power-of-two - ->Arg(1024) - ->Arg(4096) - ->Arg(8192) - ->Arg(1 << 14) - ->Arg(1 << 16) - ->Arg(1 << 18); + ->Arg(8192); }; bm.operator()>>("std::for_each_n(join_view(vector>))", std_for_each_n); + bm.operator()>>( + "rng::for_each_n(join_view(vector>)", std::ranges::for_each_n); } benchmark::Initialize(&argc, argv); diff --git a/libcxx/test/std/algorithms/alg.nonmodifying/alg.foreach/ranges.for_each.pass.cpp b/libcxx/test/std/algorithms/alg.nonmodifying/alg.foreach/ranges.for_each.pass.cpp index 8b9b6e82cbcb..a6d0afde3186 100644 --- a/libcxx/test/std/algorithms/alg.nonmodifying/alg.foreach/ranges.for_each.pass.cpp +++ b/libcxx/test/std/algorithms/alg.nonmodifying/alg.foreach/ranges.for_each.pass.cpp @@ -20,7 +20,10 @@ #include #include +#include +#include #include +#include #include "almost_satisfies_types.h" #include "test_iterators.h" @@ -30,7 +33,7 @@ struct Callable { }; template -concept HasForEachIt = requires (Iter iter, Sent sent) { std::ranges::for_each(iter, sent, Callable{}); }; +concept HasForEachIt = requires(Iter iter, Sent sent) { std::ranges::for_each(iter, sent, Callable{}); }; static_assert(HasForEachIt); static_assert(!HasForEachIt); @@ -47,7 +50,7 @@ static_assert(!HasForEachItFunc); static_assert(!HasForEachItFunc); template -concept HasForEachR = requires (Range range) { std::ranges::for_each(range, Callable{}); }; +concept HasForEachR = requires(Range range) { std::ranges::for_each(range, Callable{}); }; static_assert(HasForEachR>); static_assert(!HasForEachR); @@ -68,7 +71,7 @@ constexpr void test_iterator() { { // simple test { auto func = [i = 0](int& a) mutable { a += i++; }; - int a[] = {1, 6, 3, 4}; + int a[] = {1, 6, 3, 4}; std::same_as> decltype(auto) ret = std::ranges::for_each(Iter(a), Sent(Iter(a + 4)), func); assert(a[0] == 1); @@ -81,8 +84,8 @@ constexpr void test_iterator() { assert(i == 4); } { - auto func = [i = 0](int& a) mutable { a += i++; }; - int a[] = {1, 6, 3, 4}; + auto func = [i = 0](int& a) mutable { a += i++; }; + int a[] = {1, 6, 3, 4}; auto range = std::ranges::subrange(Iter(a), Sent(Iter(a + 4))); std::same_as> decltype(auto) ret = std::ranges::for_each(range, func); @@ -110,6 +113,30 @@ constexpr void test_iterator() { } } +struct deque_test { + std::deque* d_; + int* i_; + + deque_test(std::deque& d, int& i) : d_(&d), i_(&i) {} + + void operator()(int& v) { + assert(&(*d_)[*i_] == &v); + ++*i_; + } +}; + +/*TEST_CONSTEXPR_CXX26*/ +void test_segmented_deque_iterator() { // TODO: Mark as TEST_CONSTEXPR_CXX26 once std::deque is constexpr + // check that segmented deque iterators work properly + int sizes[] = {0, 1, 2, 1023, 1024, 1025, 2047, 2048, 2049}; + for (const int size : sizes) { + std::deque d(size); + int index = 0; + + std::ranges::for_each(d, deque_test(d, index)); + } +} + constexpr bool test() { test_iterator, sentinel_wrapper>>(); test_iterator, sentinel_wrapper>>(); @@ -146,6 +173,15 @@ constexpr bool test() { } } + if (!TEST_IS_CONSTANT_EVALUATED) // TODO: Use TEST_STD_AT_LEAST_26_OR_RUNTIME_EVALUATED when std::deque is made constexpr + test_segmented_deque_iterator(); + + { + std::vector> vec = {{0}, {1, 2}, {3, 4, 5}, {6, 7, 8, 9}, {10}, {11, 12, 13}}; + auto v = vec | std::views::join; + std::ranges::for_each(v, [i = 0](int x) mutable { assert(x == 2 * i++); }, [](int x) { return 2 * x; }); + } + return true; } diff --git a/libcxx/test/std/algorithms/alg.nonmodifying/alg.foreach/ranges.for_each_n.pass.cpp b/libcxx/test/std/algorithms/alg.nonmodifying/alg.foreach/ranges.for_each_n.pass.cpp index d4b2d053d08c..157876369423 100644 --- a/libcxx/test/std/algorithms/alg.nonmodifying/alg.foreach/ranges.for_each_n.pass.cpp +++ b/libcxx/test/std/algorithms/alg.nonmodifying/alg.foreach/ranges.for_each_n.pass.cpp @@ -17,7 +17,12 @@ #include #include +#include +#include +#include #include +#include +#include #include "almost_satisfies_types.h" #include "test_iterators.h" @@ -27,7 +32,7 @@ struct Callable { }; template -concept HasForEachN = requires (Iter iter) { std::ranges::for_each_n(iter, 0, Callable{}); }; +concept HasForEachN = requires(Iter iter) { std::ranges::for_each_n(iter, 0, Callable{}); }; static_assert(HasForEachN); static_assert(!HasForEachN); @@ -45,7 +50,7 @@ template constexpr void test_iterator() { { // simple test auto func = [i = 0](int& a) mutable { a += i++; }; - int a[] = {1, 6, 3, 4}; + int a[] = {1, 6, 3, 4}; std::same_as> auto ret = std::ranges::for_each_n(Iter(a), 4, func); assert(a[0] == 1); @@ -64,6 +69,30 @@ constexpr void test_iterator() { } } +struct deque_test { + std::deque* d_; + int* i_; + + deque_test(std::deque& d, int& i) : d_(&d), i_(&i) {} + + void operator()(int& v) { + assert(&(*d_)[*i_] == &v); + ++*i_; + } +}; + +/*TEST_CONSTEXPR_CXX26*/ +void test_segmented_deque_iterator() { // TODO: Mark as TEST_CONSTEXPR_CXX26 once std::deque is constexpr + // check that segmented deque iterators work properly + int sizes[] = {0, 1, 2, 1023, 1024, 1025, 2047, 2048, 2049}; + for (const int size : sizes) { + std::deque d(size); + int index = 0; + + std::ranges::for_each_n(d.begin(), d.size(), deque_test(d, index)); + } +} + constexpr bool test() { test_iterator>(); test_iterator>(); @@ -89,6 +118,19 @@ constexpr bool test() { assert(a[2].other == 6); } + if (!TEST_IS_CONSTANT_EVALUATED) // TODO: Use TEST_STD_AT_LEAST_26_OR_RUNTIME_EVALUATED when std::deque is made constexpr + test_segmented_deque_iterator(); + + { + std::vector> vec = {{0}, {1, 2}, {3, 4, 5}, {6, 7, 8, 9}, {10}, {11, 12, 13}}; + auto v = vec | std::views::join; + std::ranges::for_each_n( + v.begin(), + std::ranges::distance(v), + [i = 0](int x) mutable { assert(x == 2 * i++); }, + [](int x) { return 2 * x; }); + } + return true; } -- cgit v1.2.3 From 00189211486d052b25429f11790ef5486cf9d3ce Mon Sep 17 00:00:00 2001 From: woruyu <99597449+woruyu@users.noreply.github.com> Date: Thu, 19 Jun 2025 00:22:53 +0800 Subject: [DAG] add (~a | x) & (a | y) -> (a & (x ^ y)) ^y for foldMaskedMerge (#144342) ### Summary This PR resolves https://github.com/llvm/llvm-project/issues/143864 Add (~a | x) & (a | y) -> (a & (x ^ y)) ^y for foldMaskedMerge func using SDPatternMatch aftering adding this pattern, run ```ninja check-llvm-codegen```, all other cases remain unchanged, so I add a testcase(fold-masked-merge-demorgan.ll) for it --------- Co-authored-by: Simon Pilgrim --- llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 24 +- .../test/CodeGen/X86/fold-masked-merge-demorgan.ll | 267 +++++++++++++++++++++ 2 files changed, 284 insertions(+), 7 deletions(-) create mode 100644 llvm/test/CodeGen/X86/fold-masked-merge-demorgan.ll diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 934199e414c7..0e078f9dd88b 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -7206,24 +7206,30 @@ static SDValue foldLogicTreeOfShifts(SDNode *N, SDValue LeftHand, return DAG.getNode(LogicOpcode, DL, VT, CombinedShifts, W); } -/// Fold "masked merge" expressions like `(m & x) | (~m & y)` into the -/// equivalent `((x ^ y) & m) ^ y)` pattern. -/// This is typically a better representation for targets without a fused -/// "and-not" operation. +/// Fold "masked merge" expressions like `(m & x) | (~m & y)` and its DeMorgan +/// variant `(~m | x) & (m | y)` into the equivalent `((x ^ y) & m) ^ y)` +/// pattern. This is typically a better representation for targets without a +/// fused "and-not" operation. static SDValue foldMaskedMerge(SDNode *Node, SelectionDAG &DAG, const TargetLowering &TLI, const SDLoc &DL) { // Note that masked-merge variants using XOR or ADD expressions are - // normalized to OR by InstCombine so we only check for OR. - assert(Node->getOpcode() == ISD::OR && "Must be called with ISD::OR node"); + // normalized to OR by InstCombine so we only check for OR or AND. + assert(Node->getOpcode() == ISD::OR || + Node->getOpcode() == ISD::AND && + "Must be called with ISD::OR or ISD::AND node"); // If the target supports and-not, don't fold this. if (TLI.hasAndNot(SDValue(Node, 0))) return SDValue(); SDValue M, X, Y; + if (sd_match(Node, m_Or(m_OneUse(m_And(m_OneUse(m_Not(m_Value(M))), m_Value(Y))), - m_OneUse(m_And(m_Deferred(M), m_Value(X)))))) { + m_OneUse(m_And(m_Deferred(M), m_Value(X))))) || + sd_match(Node, + m_And(m_OneUse(m_Or(m_OneUse(m_Not(m_Value(M))), m_Value(X))), + m_OneUse(m_Or(m_Deferred(M), m_Value(Y)))))) { EVT VT = M.getValueType(); SDValue Xor = DAG.getNode(ISD::XOR, DL, VT, X, Y); SDValue And = DAG.getNode(ISD::AND, DL, VT, Xor, M); @@ -7678,6 +7684,10 @@ SDValue DAGCombiner::visitAND(SDNode *N) { if (SDValue R = foldLogicTreeOfShifts(N, N0, N1, DAG)) return R; + if (VT.isScalarInteger() && VT != MVT::i1) + if (SDValue R = foldMaskedMerge(N, DAG, TLI, DL)) + return R; + return SDValue(); } diff --git a/llvm/test/CodeGen/X86/fold-masked-merge-demorgan.ll b/llvm/test/CodeGen/X86/fold-masked-merge-demorgan.ll new file mode 100644 index 000000000000..fe27b3c73be0 --- /dev/null +++ b/llvm/test/CodeGen/X86/fold-masked-merge-demorgan.ll @@ -0,0 +1,267 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -o - %s -mtriple=x86_64-- | FileCheck %s --check-prefixes=CHECK,NOBMI +; RUN: llc -o - %s -mtriple=x86_64-- -mattr=+bmi | FileCheck %s --check-prefixes=CHECK,BMI +; +; test that masked-merge code is generated as "xor;and;xor" sequence or +; "andn ; and; or" if and-not is available. + +define i32 @masked_merge0_demorgan(i32 %a0, i32 %a1, i32 %a2) { +; NOBMI-LABEL: masked_merge0_demorgan: +; NOBMI: # %bb.0: +; NOBMI-NEXT: movl %esi, %eax +; NOBMI-NEXT: xorl %edx, %eax +; NOBMI-NEXT: andl %edi, %eax +; NOBMI-NEXT: xorl %edx, %eax +; NOBMI-NEXT: retq +; +; BMI-LABEL: masked_merge0_demorgan: +; BMI: # %bb.0: +; BMI-NEXT: orl %edi, %edx +; BMI-NEXT: andnl %edi, %esi, %eax +; BMI-NEXT: andnl %edx, %eax, %eax +; BMI-NEXT: retq + %not = xor i32 %a0, -1 + %or0 = or i32 %not, %a1 + %or1 = or i32 %a0, %a2 + %and = and i32 %or0, %or1 + ret i32 %and +} + +define i16 @masked_merge1_demorgan(i16 %a0, i16 %a1, i16 %a2) { +; NOBMI-LABEL: masked_merge1_demorgan: +; NOBMI: # %bb.0: +; NOBMI-NEXT: movl %esi, %eax +; NOBMI-NEXT: xorl %edx, %eax +; NOBMI-NEXT: andl %edi, %eax +; NOBMI-NEXT: xorl %edx, %eax +; NOBMI-NEXT: # kill: def $ax killed $ax killed $eax +; NOBMI-NEXT: retq +; +; BMI-LABEL: masked_merge1_demorgan: +; BMI: # %bb.0: +; BMI-NEXT: andnl %edx, %edi, %eax +; BMI-NEXT: andl %edi, %esi +; BMI-NEXT: orl %esi, %eax +; BMI-NEXT: # kill: def $ax killed $ax killed $eax +; BMI-NEXT: retq + %not = xor i16 %a0, -1 + %or0 = or i16 %not, %a1 + %or1 = or i16 %a0, %a2 + %and = and i16 %or0, %or1 + ret i16 %and +} + +define i8 @masked_merge2_demorgan(i8 %a0, i8 %a1, i8 %a2) { +; CHECK-LABEL: masked_merge2_demorgan: +; CHECK: # %bb.0: +; CHECK-NEXT: movl %esi, %eax +; CHECK-NEXT: # kill: def $al killed $al killed $eax +; CHECK-NEXT: retq + %not = xor i8 %a0, -1 + %or0 = or i8 %not, %a1 + %or1 = or i8 %a0, %a1 + %and = and i8 %or0, %or1 + ret i8 %and +} + +define i64 @masked_merge3_demorgan(i64 %a0, i64 %a1, i64 %a2) { +; NOBMI-LABEL: masked_merge3_demorgan: +; NOBMI: # %bb.0: +; NOBMI-NEXT: movq %rsi, %rax +; NOBMI-NEXT: notq %rdx +; NOBMI-NEXT: xorq %rdx, %rax +; NOBMI-NEXT: notq %rax +; NOBMI-NEXT: andq %rdi, %rax +; NOBMI-NEXT: xorq %rdx, %rax +; NOBMI-NEXT: retq +; +; BMI-LABEL: masked_merge3_demorgan: +; BMI: # %bb.0: +; BMI-NEXT: andnq %rdx, %rdi, %rax +; BMI-NEXT: andq %rdi, %rsi +; BMI-NEXT: notq %rsi +; BMI-NEXT: andnq %rsi, %rax, %rax +; BMI-NEXT: retq + %not_a0 = xor i64 %a0, -1 + %not_a1 = xor i64 %a1, -1 + %not_a2 = xor i64 %a2, -1 + %or0 = or i64 %not_a0, %not_a1 + %or1 = or i64 %a0, %not_a2 + %and = and i64 %or0, %or1 + ret i64 %and +} + +define i32 @not_a_masked_merge0_demorgan(i32 %a0, i32 %a1, i32 %a2) { +; CHECK-LABEL: not_a_masked_merge0_demorgan: +; CHECK: # %bb.0: +; CHECK-NEXT: orl %edi, %edx +; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: negl %eax +; CHECK-NEXT: orl %esi, %eax +; CHECK-NEXT: andl %edx, %eax +; CHECK-NEXT: retq + %not_a_not = sub i32 0, %a0 + %or0 = or i32 %not_a_not, %a1 + %or1 = or i32 %a0, %a2 + %and = and i32 %or0, %or1 + ret i32 %and +} + +; not a masked merge: `not` operand does not match another `and`-operand. +define i32 @not_a_masked_merge1_demorgan(i32 %a0, i32 %a1, i32 %a2, i32 %a3) { +; NOBMI-LABEL: not_a_masked_merge1_demorgan: +; NOBMI: # %bb.0: +; NOBMI-NEXT: movl %ecx, %eax +; NOBMI-NEXT: orl %edx, %edi +; NOBMI-NEXT: notl %eax +; NOBMI-NEXT: orl %esi, %eax +; NOBMI-NEXT: andl %edi, %eax +; NOBMI-NEXT: retq +; +; BMI-LABEL: not_a_masked_merge1_demorgan: +; BMI: # %bb.0: +; BMI-NEXT: orl %edx, %edi +; BMI-NEXT: andnl %ecx, %esi, %eax +; BMI-NEXT: andnl %edi, %eax, %eax +; BMI-NEXT: retq + %or1 = or i32 %a0, %a2 + %not = xor i32 %a3, -1 + %or0 = or i32 %not, %a1 + %and = and i32 %or0, %or1 + ret i32 %and +} + +; not a masked merge: one of the operands of `and` is not an `or`. +define i32 @not_a_masked_merge2_demorgan(i32 %a0, i32 %a1, i32 %a2) { +; NOBMI-LABEL: not_a_masked_merge2_demorgan: +; NOBMI: # %bb.0: +; NOBMI-NEXT: movl %edi, %eax +; NOBMI-NEXT: andl %edi, %edx +; NOBMI-NEXT: notl %eax +; NOBMI-NEXT: orl %esi, %eax +; NOBMI-NEXT: andl %edx, %eax +; NOBMI-NEXT: retq +; +; BMI-LABEL: not_a_masked_merge2_demorgan: +; BMI: # %bb.0: +; BMI-NEXT: andl %edi, %edx +; BMI-NEXT: andnl %edi, %esi, %eax +; BMI-NEXT: andnl %edx, %eax, %eax +; BMI-NEXT: retq + %not_an_or1 = and i32 %a0, %a2 + %not = xor i32 %a0, -1 + %or0 = or i32 %not, %a1 + %and = and i32 %or0, %not_an_or1 + ret i32 %and +} + +define i32 @not_a_masked_merge3_demorgan(i32 %a0, i32 %a1, i32 %a2) { +; NOBMI-LABEL: not_a_masked_merge3_demorgan: +; NOBMI: # %bb.0: +; NOBMI-NEXT: movl %esi, %eax +; NOBMI-NEXT: orl %edi, %edx +; NOBMI-NEXT: xorl %edi, %eax +; NOBMI-NEXT: notl %eax +; NOBMI-NEXT: andl %edx, %eax +; NOBMI-NEXT: retq +; +; BMI-LABEL: not_a_masked_merge3_demorgan: +; BMI: # %bb.0: +; BMI-NEXT: orl %edi, %edx +; BMI-NEXT: xorl %edi, %esi +; BMI-NEXT: andnl %edx, %esi, %eax +; BMI-NEXT: retq + %or1 = or i32 %a0, %a2 + %not = xor i32 %a0, -1 + %not_an_or0 = xor i32 %not, %a1 + %and = and i32 %not_an_or0, %or1 + ret i32 %and +} + +; not a masked merge: `not` operand must not be on same `or`. +define i32 @not_a_masked_merge4_demorgan(i32 %a0, i32 %a1, i32 %a2) { +; CHECK-LABEL: not_a_masked_merge4_demorgan: +; CHECK: # %bb.0: +; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: orl %edx, %eax +; CHECK-NEXT: retq + %or1 = or i32 %a0, %a2 + %not = xor i32 %a1, -1 + %or0 = or i32 %not, %a1 + %and = and i32 %or0, %or1 + ret i32 %and +} + +; should not transform when operands have multiple users. +define i32 @masked_merge_no_transform0_demorgan(i32 %a0, i32 %a1, i32 %a2, ptr %p1) { +; NOBMI-LABEL: masked_merge_no_transform0_demorgan: +; NOBMI: # %bb.0: +; NOBMI-NEXT: orl %edi, %edx +; NOBMI-NEXT: movl %edi, %eax +; NOBMI-NEXT: notl %eax +; NOBMI-NEXT: orl %esi, %eax +; NOBMI-NEXT: andl %edx, %eax +; NOBMI-NEXT: movl %edx, (%rcx) +; NOBMI-NEXT: retq +; +; BMI-LABEL: masked_merge_no_transform0_demorgan: +; BMI: # %bb.0: +; BMI-NEXT: orl %edi, %edx +; BMI-NEXT: andnl %edi, %esi, %eax +; BMI-NEXT: andnl %edx, %eax, %eax +; BMI-NEXT: movl %edx, (%rcx) +; BMI-NEXT: retq + %not = xor i32 %a0, -1 + %or0 = or i32 %not, %a1 + %or1 = or i32 %a0, %a2 + %and = and i32 %or0, %or1 + store i32 %or1, ptr %p1 + ret i32 %and +} + +; should not transform when operands have multiple users. +define i32 @masked_merge_no_transform1_demorgan(i32 %a0, i32 %a1, i32 %a2, ptr %p1) { +; NOBMI-LABEL: masked_merge_no_transform1_demorgan: +; NOBMI: # %bb.0: +; NOBMI-NEXT: movl %edx, %eax +; NOBMI-NEXT: orl %edi, %eax +; NOBMI-NEXT: notl %edi +; NOBMI-NEXT: orl %edi, %esi +; NOBMI-NEXT: andl %esi, %eax +; NOBMI-NEXT: movl %edi, (%rcx) +; NOBMI-NEXT: retq +; +; BMI-LABEL: masked_merge_no_transform1_demorgan: +; BMI: # %bb.0: +; BMI-NEXT: orl %edi, %edx +; BMI-NEXT: andnl %edi, %esi, %eax +; BMI-NEXT: notl %edi +; BMI-NEXT: andnl %edx, %eax, %eax +; BMI-NEXT: movl %edi, (%rcx) +; BMI-NEXT: retq + %not = xor i32 %a0, -1 + %or0 = or i32 %not, %a1 + %or1 = or i32 %a0, %a2 + %and = and i32 %or0, %or1 + store i32 %not, ptr %p1 + ret i32 %and +} + +; should not transform when operands have multiple users. +define i32 @masked_merge_no_transform2_demorgan(i32 %a0, i32 %a1, i32 %a2, ptr %p1) { +; CHECK-LABEL: masked_merge_no_transform2_demorgan: +; CHECK: # %bb.0: +; CHECK-NEXT: movl %edx, %eax +; CHECK-NEXT: orl %edi, %eax +; CHECK-NEXT: notl %edi +; CHECK-NEXT: orl %esi, %edi +; CHECK-NEXT: andl %edi, %eax +; CHECK-NEXT: movl %edi, (%rcx) +; CHECK-NEXT: retq + %not = xor i32 %a0, -1 + %or0 = or i32 %not, %a1 + %or1 = or i32 %a0, %a2 + %and = and i32 %or0, %or1 + store i32 %or0, ptr %p1 + ret i32 %and +} -- cgit v1.2.3 From fe3933da15b5bc635bce156f1f8d11a784316a07 Mon Sep 17 00:00:00 2001 From: Yang Bai Date: Thu, 19 Jun 2025 00:26:04 +0800 Subject: [mlir][vector] Support complete folding in single pass for vector.insert/vector.extract (#142124) ### Description This patch improves the folding efficiency of `vector.insert` and `vector.extract` operations by not returning early after successfully converting dynamic indices to static indices. This PR also renames the test pass `TestConstantFold` to `TestSingleFold` and adds comprehensive documentation explaining the single-pass folding behavior. ### Motivation Since the `OpBuilder::createOrFold` function only calls `fold` **once**, the current `fold` methods of `vector.insert` and `vector.extract` may leave the op in a state that can be folded further. For example, consider the following un-folded IR: ``` %v1 = vector.insert %e1, %v0 [0] : f32 into vector<128xf32> %c0 = arith.constant 0 : index %e2 = vector.extract %v1[%c0] : f32 from vector<128xf32> ``` If we use `createOrFold` to create the `vector.extract` op, then the result will be: ``` %v1 = vector.insert %e1, %v0 [127] : f32 into vector<128xf32> %e2 = vector.extract %v1[0] : f32 from vector<128xf32> ``` But this is not the optimal result. `createOrFold` should have returned `%e1`. The reason is that the execution of fold returns immediately after `extractInsertFoldConstantOp`, causing subsequent folding logics to be skipped. --------- Co-authored-by: Yang Bai --- mlir/lib/Dialect/Vector/IR/VectorOps.cpp | 24 ++++--- mlir/test/Dialect/Affine/constant-fold.mlir | 2 +- mlir/test/Dialect/Linalg/mesh-spmdization.mlir | 2 +- mlir/test/Dialect/Mesh/spmdization.mlir | 2 +- mlir/test/Dialect/Tensor/mesh-spmdization.mlir | 2 +- mlir/test/Dialect/Tosa/constant_folding.mlir | 2 +- mlir/test/Dialect/Vector/constant-fold.mlir | 4 +- mlir/test/Dialect/Vector/single-fold.mlir | 38 ++++++++++ mlir/test/Transforms/constant-fold-debuginfo.mlir | 2 +- mlir/test/Transforms/constant-fold.mlir | 2 +- mlir/test/lib/Transforms/CMakeLists.txt | 2 +- mlir/test/lib/Transforms/TestConstantFold.cpp | 75 -------------------- mlir/test/lib/Transforms/TestSingleFold.cpp | 85 +++++++++++++++++++++++ mlir/tools/mlir-opt/mlir-opt.cpp | 4 +- 14 files changed, 150 insertions(+), 96 deletions(-) create mode 100644 mlir/test/Dialect/Vector/single-fold.mlir delete mode 100644 mlir/test/lib/Transforms/TestConstantFold.cpp create mode 100644 mlir/test/lib/Transforms/TestSingleFold.cpp diff --git a/mlir/lib/Dialect/Vector/IR/VectorOps.cpp b/mlir/lib/Dialect/Vector/IR/VectorOps.cpp index 2a2357319bd2..e576eeac2365 100644 --- a/mlir/lib/Dialect/Vector/IR/VectorOps.cpp +++ b/mlir/lib/Dialect/Vector/IR/VectorOps.cpp @@ -2063,6 +2063,7 @@ static Value extractInsertFoldConstantOp(OpType op, AdaptorType adaptor, if (opChange) { op.setStaticPosition(staticPosition); op.getOperation()->setOperands(operands); + // Return the original result to indicate an in-place folding happened. return op.getResult(); } return {}; @@ -2146,11 +2147,12 @@ OpFoldResult ExtractOp::fold(FoldAdaptor adaptor) { return getVector(); if (auto res = foldPoisonSrcExtractOp(adaptor.getVector())) return res; - // Fold `arith.constant` indices into the `vector.extract` operation. Make - // sure that patterns requiring constant indices are added after this fold. + // Fold `arith.constant` indices into the `vector.extract` operation. + // Do not stop here as this fold may enable subsequent folds that require + // constant indices. SmallVector operands = {getVector()}; - if (auto val = extractInsertFoldConstantOp(*this, adaptor, operands)) - return val; + auto inplaceFolded = extractInsertFoldConstantOp(*this, adaptor, operands); + if (auto res = foldPoisonIndexInsertExtractOp( getContext(), adaptor.getStaticPosition(), kPoisonIndex)) return res; @@ -2172,7 +2174,8 @@ OpFoldResult ExtractOp::fold(FoldAdaptor adaptor) { return val; if (auto val = foldScalarExtractFromFromElements(*this)) return val; - return OpFoldResult(); + + return inplaceFolded; } namespace { @@ -3272,11 +3275,12 @@ OpFoldResult vector::InsertOp::fold(FoldAdaptor adaptor) { // (type mismatch). if (getNumIndices() == 0 && getValueToStoreType() == getType()) return getValueToStore(); - // Fold `arith.constant` indices into the `vector.insert` operation. Make - // sure that patterns requiring constant indices are added after this fold. + // Fold `arith.constant` indices into the `vector.insert` operation. + // Do not stop here as this fold may enable subsequent folds that require + // constant indices. SmallVector operands = {getValueToStore(), getDest()}; - if (auto val = extractInsertFoldConstantOp(*this, adaptor, operands)) - return val; + auto inplaceFolded = extractInsertFoldConstantOp(*this, adaptor, operands); + if (auto res = foldPoisonIndexInsertExtractOp( getContext(), adaptor.getStaticPosition(), kPoisonIndex)) return res; @@ -3286,7 +3290,7 @@ OpFoldResult vector::InsertOp::fold(FoldAdaptor adaptor) { return res; } - return {}; + return inplaceFolded; } //===----------------------------------------------------------------------===// diff --git a/mlir/test/Dialect/Affine/constant-fold.mlir b/mlir/test/Dialect/Affine/constant-fold.mlir index ffc3946db08d..8bddacc02475 100644 --- a/mlir/test/Dialect/Affine/constant-fold.mlir +++ b/mlir/test/Dialect/Affine/constant-fold.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt -test-constant-fold -split-input-file %s | FileCheck %s +// RUN: mlir-opt -test-single-fold -split-input-file %s | FileCheck %s // CHECK-LABEL: func @affine_apply func.func @affine_apply(%variable : index) -> (index, index, index) { diff --git a/mlir/test/Dialect/Linalg/mesh-spmdization.mlir b/mlir/test/Dialect/Linalg/mesh-spmdization.mlir index 487cec00de16..9805ee4ea552 100644 --- a/mlir/test/Dialect/Linalg/mesh-spmdization.mlir +++ b/mlir/test/Dialect/Linalg/mesh-spmdization.mlir @@ -1,5 +1,5 @@ // RUN: mlir-opt \ -// RUN: --pass-pipeline="builtin.module(func.func(mesh-spmdization,test-constant-fold))" \ +// RUN: --pass-pipeline="builtin.module(func.func(mesh-spmdization,test-single-fold))" \ // RUN: --split-input-file \ // RUN: %s | FileCheck %s diff --git a/mlir/test/Dialect/Mesh/spmdization.mlir b/mlir/test/Dialect/Mesh/spmdization.mlir index 5c9fd29444f0..af4ab58ea50a 100644 --- a/mlir/test/Dialect/Mesh/spmdization.mlir +++ b/mlir/test/Dialect/Mesh/spmdization.mlir @@ -1,5 +1,5 @@ // RUN: mlir-opt \ -// RUN: --pass-pipeline="builtin.module(func.func(mesh-spmdization,test-constant-fold))" \ +// RUN: --pass-pipeline="builtin.module(func.func(mesh-spmdization,test-single-fold))" \ // RUN: %s | FileCheck %s mesh.mesh @mesh_1d(shape = 2) diff --git a/mlir/test/Dialect/Tensor/mesh-spmdization.mlir b/mlir/test/Dialect/Tensor/mesh-spmdization.mlir index 3fb842474550..8598d81ff6cf 100644 --- a/mlir/test/Dialect/Tensor/mesh-spmdization.mlir +++ b/mlir/test/Dialect/Tensor/mesh-spmdization.mlir @@ -1,5 +1,5 @@ // RUN: mlir-opt \ -// RUN: --pass-pipeline="builtin.module(func.func(mesh-spmdization,test-constant-fold))" \ +// RUN: --pass-pipeline="builtin.module(func.func(mesh-spmdization,test-single-fold))" \ // RUN: %s | FileCheck %s mesh.mesh @mesh_1d_4(shape = 4) diff --git a/mlir/test/Dialect/Tosa/constant_folding.mlir b/mlir/test/Dialect/Tosa/constant_folding.mlir index 9b6ccdb54c10..d477a2479e91 100644 --- a/mlir/test/Dialect/Tosa/constant_folding.mlir +++ b/mlir/test/Dialect/Tosa/constant_folding.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt --test-constant-fold %s | FileCheck %s +// RUN: mlir-opt --test-single-fold %s | FileCheck %s // CHECK-LABEL: func @test_const func.func @test_const(%arg0 : index) -> tensor<4xi32> { diff --git a/mlir/test/Dialect/Vector/constant-fold.mlir b/mlir/test/Dialect/Vector/constant-fold.mlir index 66c91d6b2041..cbb159fd59ff 100644 --- a/mlir/test/Dialect/Vector/constant-fold.mlir +++ b/mlir/test/Dialect/Vector/constant-fold.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt %s -split-input-file -test-constant-fold | FileCheck %s +// RUN: mlir-opt %s -split-input-file -test-single-fold | FileCheck %s // CHECK-LABEL: fold_extract_transpose_negative func.func @fold_extract_transpose_negative(%arg0: vector<4x4xf16>) -> vector<4x4xf16> { @@ -11,3 +11,5 @@ func.func @fold_extract_transpose_negative(%arg0: vector<4x4xf16>) -> vector<4x4 %2 = vector.extract %1[0] : vector<4x4xf16> from vector<1x4x4xf16> return %2 : vector<4x4xf16> } + + diff --git a/mlir/test/Dialect/Vector/single-fold.mlir b/mlir/test/Dialect/Vector/single-fold.mlir new file mode 100644 index 000000000000..baccdc3f51c0 --- /dev/null +++ b/mlir/test/Dialect/Vector/single-fold.mlir @@ -0,0 +1,38 @@ +// RUN: mlir-opt %s -split-input-file -test-single-fold | FileCheck %s + +// The tests in this file verify that fold() methods can handle complex +// optimization scenarios without requiring multiple folding iterations. +// This is important because: +// +// 1. OpBuilder::createOrFold() only calls fold() once, so operations must +// be fully optimized in that single call +// 2. Multiple rounds of folding would incur higher performance costs, +// so it's more efficient to complete all optimizations in one pass +// +// These tests ensure that folding implementations are robust and complete, +// avoiding situations where operations are left in intermediate states +// that could be further optimized. + +// CHECK-LABEL: fold_extract_in_single_pass +// CHECK-SAME: (%{{.*}}: vector<4xf16>, %[[ARG1:.+]]: f16) +func.func @fold_extract_in_single_pass(%arg0: vector<4xf16>, %arg1: f16) -> f16 { + %0 = vector.insert %arg1, %arg0 [1] : f16 into vector<4xf16> + %c1 = arith.constant 1 : index + // Verify that the fold is finished in a single pass even if the index is dynamic. + %1 = vector.extract %0[%c1] : f16 from vector<4xf16> + // CHECK: return %[[ARG1]] : f16 + return %1 : f16 +} + +// ----- + +// CHECK-LABEL: fold_insert_in_single_pass +func.func @fold_insert_in_single_pass() -> vector<2xf16> { + %cst = arith.constant dense<0.000000e+00> : vector<2xf16> + %c1 = arith.constant 1 : index + %c2 = arith.constant 2.5 : f16 + // Verify that the fold is finished in a single pass even if the index is dynamic. + // CHECK: arith.constant dense<[0.000000e+00, 2.500000e+00]> : vector<2xf16> + %0 = vector.insert %c2, %cst [%c1] : f16 into vector<2xf16> + return %0 : vector<2xf16> +} \ No newline at end of file diff --git a/mlir/test/Transforms/constant-fold-debuginfo.mlir b/mlir/test/Transforms/constant-fold-debuginfo.mlir index c308bc477bee..4fa7fb6698a2 100644 --- a/mlir/test/Transforms/constant-fold-debuginfo.mlir +++ b/mlir/test/Transforms/constant-fold-debuginfo.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt %s -split-input-file -test-constant-fold -mlir-print-debuginfo | FileCheck %s +// RUN: mlir-opt %s -split-input-file -test-single-fold -mlir-print-debuginfo | FileCheck %s // CHECK-LABEL: func @fold_and_merge func.func @fold_and_merge() -> (i32, i32) { diff --git a/mlir/test/Transforms/constant-fold.mlir b/mlir/test/Transforms/constant-fold.mlir index 981757aed9b1..0b393bf0556b 100644 --- a/mlir/test/Transforms/constant-fold.mlir +++ b/mlir/test/Transforms/constant-fold.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt -allow-unregistered-dialect %s -split-input-file -test-constant-fold | FileCheck %s +// RUN: mlir-opt -allow-unregistered-dialect %s -split-input-file -test-single-fold | FileCheck %s // ----- diff --git a/mlir/test/lib/Transforms/CMakeLists.txt b/mlir/test/lib/Transforms/CMakeLists.txt index 76041cd6cd79..ddc0a779e8f6 100644 --- a/mlir/test/lib/Transforms/CMakeLists.txt +++ b/mlir/test/lib/Transforms/CMakeLists.txt @@ -26,11 +26,11 @@ endif() add_mlir_library(MLIRTestTransforms TestCommutativityUtils.cpp TestCompositePass.cpp - TestConstantFold.cpp TestControlFlowSink.cpp TestInlining.cpp TestInliningCallback.cpp TestMakeIsolatedFromAbove.cpp + TestSingleFold.cpp TestTransformsOps.cpp ${MLIRTestTransformsPDLSrc} diff --git a/mlir/test/lib/Transforms/TestConstantFold.cpp b/mlir/test/lib/Transforms/TestConstantFold.cpp deleted file mode 100644 index c97ab9091cb6..000000000000 --- a/mlir/test/lib/Transforms/TestConstantFold.cpp +++ /dev/null @@ -1,75 +0,0 @@ -//===- TestConstantFold.cpp - Pass to test constant folding ---------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "mlir/Pass/Pass.h" -#include "mlir/Transforms/FoldUtils.h" - -using namespace mlir; - -namespace { -/// Simple constant folding pass. -struct TestConstantFold : public PassWrapper>, - public RewriterBase::Listener { - MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(TestConstantFold) - - StringRef getArgument() const final { return "test-constant-fold"; } - StringRef getDescription() const final { - return "Test operation constant folding"; - } - // All constants in the operation post folding. - SmallVector existingConstants; - - void foldOperation(Operation *op, OperationFolder &helper); - void runOnOperation() override; - - void notifyOperationInserted(Operation *op, - OpBuilder::InsertPoint previous) override { - existingConstants.push_back(op); - } - void notifyOperationErased(Operation *op) override { - auto *it = llvm::find(existingConstants, op); - if (it != existingConstants.end()) - existingConstants.erase(it); - } -}; -} // namespace - -void TestConstantFold::foldOperation(Operation *op, OperationFolder &helper) { - // Attempt to fold the specified operation, including handling unused or - // duplicated constants. - (void)helper.tryToFold(op); -} - -void TestConstantFold::runOnOperation() { - existingConstants.clear(); - - // Collect and fold the operations within the operation. - SmallVector ops; - getOperation()->walk([&](Operation *op) { ops.push_back(op); }); - - // Fold the constants in reverse so that the last generated constants from - // folding are at the beginning. This creates somewhat of a linear ordering to - // the newly generated constants that matches the operation order and improves - // the readability of test cases. - OperationFolder helper(&getContext(), /*listener=*/this); - for (Operation *op : llvm::reverse(ops)) - foldOperation(op, helper); - - // By the time we are done, we may have simplified a bunch of code, leaving - // around dead constants. Check for them now and remove them. - for (auto *cst : existingConstants) { - if (cst->use_empty()) - cst->erase(); - } -} - -namespace mlir { -namespace test { -void registerTestConstantFold() { PassRegistration(); } -} // namespace test -} // namespace mlir diff --git a/mlir/test/lib/Transforms/TestSingleFold.cpp b/mlir/test/lib/Transforms/TestSingleFold.cpp new file mode 100644 index 000000000000..5bd9dd2a1f07 --- /dev/null +++ b/mlir/test/lib/Transforms/TestSingleFold.cpp @@ -0,0 +1,85 @@ +//===- TestSingleFold.cpp - Pass to test single-pass folding --------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "mlir/Pass/Pass.h" +#include "mlir/Transforms/FoldUtils.h" + +using namespace mlir; + +namespace { +/// Test pass for single-pass constant folding. +/// +/// This pass tests the behavior of operations when folded exactly once. Unlike +/// canonicalization passes that may apply multiple rounds of folding, this pass +/// ensures that each operation is folded at most once, which is useful for +/// testing scenarios where the fold implementation should handle complex cases +/// without requiring multiple iterations. +/// +/// The pass also removes dead constants after folding to clean up unused +/// intermediate results. +struct TestSingleFold : public PassWrapper>, + public RewriterBase::Listener { + MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(TestSingleFold) + + StringRef getArgument() const final { return "test-single-fold"; } + StringRef getDescription() const final { + return "Test single-pass operation folding and dead constant elimination"; + } + // All constants in the operation post folding. + SmallVector existingConstants; + + void foldOperation(Operation *op, OperationFolder &helper); + void runOnOperation() override; + + void notifyOperationInserted(Operation *op, + OpBuilder::InsertPoint previous) override { + existingConstants.push_back(op); + } + void notifyOperationErased(Operation *op) override { + auto *it = llvm::find(existingConstants, op); + if (it != existingConstants.end()) + existingConstants.erase(it); + } +}; +} // namespace + +void TestSingleFold::foldOperation(Operation *op, OperationFolder &helper) { + // Attempt to fold the specified operation, including handling unused or + // duplicated constants. + (void)helper.tryToFold(op); +} + +void TestSingleFold::runOnOperation() { + existingConstants.clear(); + + // Collect and fold the operations within the operation. + SmallVector ops; + getOperation()->walk( + [&](Operation *op) { ops.push_back(op); }); + + // Fold the constants in reverse so that the last generated constants from + // folding are at the beginning. This creates somewhat of a linear ordering to + // the newly generated constants that matches the operation order and improves + // the readability of test cases. + OperationFolder helper(&getContext(), /*listener=*/this); + for (Operation *op : llvm::reverse(ops)) + foldOperation(op, helper); + + // By the time we are done, we may have simplified a bunch of code, leaving + // around dead constants. Check for them now and remove them. + for (auto *cst : existingConstants) { + if (cst->use_empty()) + cst->erase(); + } +} + +namespace mlir { +namespace test { +void registerTestSingleFold() { PassRegistration(); } +} // namespace test +} // namespace mlir diff --git a/mlir/tools/mlir-opt/mlir-opt.cpp b/mlir/tools/mlir-opt/mlir-opt.cpp index 6ef9ff8e8454..143a5e8e8f8d 100644 --- a/mlir/tools/mlir-opt/mlir-opt.cpp +++ b/mlir/tools/mlir-opt/mlir-opt.cpp @@ -87,7 +87,6 @@ void registerTestCfAssertPass(); void registerTestCFGLoopInfoPass(); void registerTestComposeSubView(); void registerTestCompositePass(); -void registerTestConstantFold(); void registerTestControlFlowSink(); void registerTestConvertToSPIRVPass(); void registerTestDataLayoutPropagation(); @@ -145,6 +144,7 @@ void registerTestSCFUtilsPass(); void registerTestSCFWhileOpBuilderPass(); void registerTestSCFWrapInZeroTripCheckPasses(); void registerTestShapeMappingPass(); +void registerTestSingleFold(); void registerTestSliceAnalysisPass(); void registerTestSPIRVCPURunnerPipeline(); void registerTestSPIRVFuncSignatureConversion(); @@ -233,7 +233,6 @@ void registerTestPasses() { mlir::test::registerTestCFGLoopInfoPass(); mlir::test::registerTestComposeSubView(); mlir::test::registerTestCompositePass(); - mlir::test::registerTestConstantFold(); mlir::test::registerTestControlFlowSink(); mlir::test::registerTestConvertToSPIRVPass(); mlir::test::registerTestDataLayoutPropagation(); @@ -291,6 +290,7 @@ void registerTestPasses() { mlir::test::registerTestSCFWhileOpBuilderPass(); mlir::test::registerTestSCFWrapInZeroTripCheckPasses(); mlir::test::registerTestShapeMappingPass(); + mlir::test::registerTestSingleFold(); mlir::test::registerTestSliceAnalysisPass(); mlir::test::registerTestSPIRVCPURunnerPipeline(); mlir::test::registerTestSPIRVFuncSignatureConversion(); -- cgit v1.2.3 From 4084ffcf1e69b962e864aa138bb54dabbcec912f Mon Sep 17 00:00:00 2001 From: Krzysztof Parzyszek Date: Wed, 18 Jun 2025 11:31:03 -0500 Subject: [flang] Show types in DumpEvExpr (#143743) When dumping evaluate::Expr, show type names which contain a lot of useful information. For example show ``` expr { expr > { expr > { ... ``` instead of ``` expr T { expr T { expr T { ... ``` --- flang/include/flang/Semantics/dump-expr.h | 56 ++++++++++++++++++++++++++----- flang/lib/Semantics/dump-expr.cpp | 2 +- 2 files changed, 48 insertions(+), 10 deletions(-) diff --git a/flang/include/flang/Semantics/dump-expr.h b/flang/include/flang/Semantics/dump-expr.h index 2f445429a10b..9cc52b4da487 100644 --- a/flang/include/flang/Semantics/dump-expr.h +++ b/flang/include/flang/Semantics/dump-expr.h @@ -16,6 +16,7 @@ #include #include +#include #include #include @@ -38,6 +39,43 @@ public: } private: + template struct TypeOf { + static constexpr std::string_view get() { +#if defined(__GNUC__) +#define DUMP_EXPR_SHOW_TYPE + std::string_view v(__PRETTY_FUNCTION__); + // Extract the "xyz" from the "pretty function" string: + // "... [with T = xyz; std::string_view = ...]" + std::string_view front("with T = "); + std::string_view back("; std::string_view ="); + +#elif defined(_MSC_VER) +#define DUMP_EXPR_SHOW_TYPE + std::string_view v(__FUNCSIG__); + // Extract the "xyz" from the "pretty function" string: + // "...TypeOf::get(void)" + std::string_view front("TypeOf<"); + std::string_view back(">::get(void)"); + +#endif + +#if defined(DUMP_EXPR_SHOW_TYPE) +#undef DUMP_EXPR_SHOW_TYPE + if (auto fpos{v.find(front)}; fpos != v.npos) { + v.remove_prefix(fpos + front.size()); + if (auto bpos{v.find(back)}; bpos != v.npos) { + v.remove_suffix(v.size() - bpos); + return v; + } + } +#endif + + return ""; + } + + static constexpr std::string_view name{TypeOf::get()}; + }; + template void Show(const common::Indirection &x) { Show(x.value()); } @@ -76,7 +114,7 @@ private: void Show(const evaluate::NullPointer &); template void Show(const evaluate::Constant &x) { if constexpr (T::category == common::TypeCategory::Derived) { - Indent("derived constant"); + Indent("derived constant "s + std::string(TypeOf::name)); for (const auto &map : x.values()) { for (const auto &pair : map) { Show(pair.second.value()); @@ -84,7 +122,7 @@ private: } Outdent(); } else { - Print("constant"); + Print("constant "s + std::string(TypeOf::name)); } } void Show(const Symbol &symbol); @@ -102,7 +140,7 @@ private: void Show(const evaluate::Substring &x); void Show(const evaluate::ComplexPart &x); template void Show(const evaluate::Designator &x) { - Indent("designator"); + Indent("designator "s + std::string(TypeOf::name)); Show(x.u); Outdent(); } @@ -117,7 +155,7 @@ private: Outdent(); } template void Show(const evaluate::FunctionRef &x) { - Indent("function ref"); + Indent("function ref "s + std::string(TypeOf::name)); Show(x.proc()); Show(x.arguments()); Outdent(); @@ -127,14 +165,14 @@ private: } template void Show(const evaluate::ArrayConstructorValues &x) { - Indent("array constructor value"); + Indent("array constructor value "s + std::string(TypeOf::name)); for (auto &v : x) { Show(v); } Outdent(); } template void Show(const evaluate::ImpliedDo &x) { - Indent("implied do"); + Indent("implied do "s + std::string(TypeOf::name)); Show(x.lower()); Show(x.upper()); Show(x.stride()); @@ -148,20 +186,20 @@ private: void Show(const evaluate::StructureConstructor &x); template void Show(const evaluate::Operation &op) { - Indent("unary op"); + Indent("unary op "s + std::string(TypeOf::name)); Show(op.left()); Outdent(); } template void Show(const evaluate::Operation &op) { - Indent("binary op"); + Indent("binary op "s + std::string(TypeOf::name)); Show(op.left()); Show(op.right()); Outdent(); } void Show(const evaluate::Relational &x); template void Show(const evaluate::Expr &x) { - Indent("expr T"); + Indent("expr <" + std::string(TypeOf::name) + ">"); Show(x.u); Outdent(); } diff --git a/flang/lib/Semantics/dump-expr.cpp b/flang/lib/Semantics/dump-expr.cpp index aa0b4e0f0339..66cedab94bfb 100644 --- a/flang/lib/Semantics/dump-expr.cpp +++ b/flang/lib/Semantics/dump-expr.cpp @@ -151,7 +151,7 @@ void DumpEvaluateExpr::Show(const evaluate::StructureConstructor &x) { } void DumpEvaluateExpr::Show(const evaluate::Relational &x) { - Indent("expr some type"); + Indent("relational some type"); Show(x.u); Outdent(); } -- cgit v1.2.3 From 2a8c65e983b3f4e1c83d8028d354f7bacc149015 Mon Sep 17 00:00:00 2001 From: Alexis Engelke Date: Wed, 18 Jun 2025 18:56:30 +0200 Subject: [CodeGen][NFC] Fix quadratic c-t for large jump tables MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Deleting a basic block removes all references from jump tables, which is O(n). When freeing a MachineFunction, all basic blocks are deleted before the jump tables, causing O(n^2) runtime. Fix this by deallocating the jump table first. Test case generator: import sys n = int(sys.argv[1]) print("define void @f(i64 %c, ptr %p) {") print(" switch i64 %c, label %d [") for i in range(n): print(f" i64 {i}, label %h{i}") print(f" ]") for i in range(n): print(f'h{i}:') print(f' store i64 {i*i}, ptr %p') print(f' ret void') print('d:') print(' ret void') print('}') Improvement at 5000 entries: Benchmark 1: ./llc.pre -filetype=obj -O0 ~MachineJumpTableInfo(); + Allocator.Deallocate(JumpTableInfo); + JumpTableInfo = nullptr; + } + // Don't call destructors on MachineInstr and MachineOperand. All of their // memory comes from the BumpPtrAllocator which is about to be purged. // @@ -287,11 +296,6 @@ void MachineFunction::clear() { ConstantPool->~MachineConstantPool(); Allocator.Deallocate(ConstantPool); - if (JumpTableInfo) { - JumpTableInfo->~MachineJumpTableInfo(); - Allocator.Deallocate(JumpTableInfo); - } - if (WinEHInfo) { WinEHInfo->~WinEHFuncInfo(); Allocator.Deallocate(WinEHInfo); -- cgit v1.2.3 From 77bc25485135b8a8cb2427910a8850fbc4e4be09 Mon Sep 17 00:00:00 2001 From: John Brawn Date: Wed, 18 Jun 2025 18:05:02 +0100 Subject: [AArch64] Fix build failure with -Werror (#144749) PR#144387 caused buildbot failures with -Werror due to a comparison between signed and unsigned types. Fix this with an explicit cast. --- llvm/lib/Target/AArch64/AArch64ISelLowering.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 0e28ccd0f655..d8b574719dae 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -27081,7 +27081,7 @@ bool AArch64TargetLowering::getIndexedAddressParts(SDNode *N, SDNode *Op, // only allow an offset that's equal to the store size. EVT MemType = cast(N)->getMemoryVT(); if (!Subtarget->isLittleEndian() && MemType.isVector() && - RHSC != MemType.getStoreSize()) + (uint64_t)RHSC != MemType.getStoreSize()) return false; // Always emit pre-inc/post-inc addressing mode. Use negated constant offset // when dealing with subtraction. -- cgit v1.2.3 From 298f1c276f4f9c18b25a79ffe6e619e89c5fbf7e Mon Sep 17 00:00:00 2001 From: Artem Belevich Date: Wed, 18 Jun 2025 10:08:27 -0700 Subject: Revert "Add missing intrinsics to cuda headers" (#144755) Reverts llvm/llvm-project#143664 as it breaks CUDA compilation. --- clang/lib/Headers/__clang_cuda_intrinsics.h | 284 ---------------------------- 1 file changed, 284 deletions(-) diff --git a/clang/lib/Headers/__clang_cuda_intrinsics.h b/clang/lib/Headers/__clang_cuda_intrinsics.h index 5e13f3f78df7..8b230af6f664 100644 --- a/clang/lib/Headers/__clang_cuda_intrinsics.h +++ b/clang/lib/Headers/__clang_cuda_intrinsics.h @@ -479,290 +479,6 @@ inline __device__ unsigned __funnelshift_rc(unsigned low32, unsigned high32, return ret; } -#pragma push_macro("__INTRINSIC_LOAD") -#define __INTRINSIC_LOAD(__FnName, __AsmOp, __DeclType, __TmpType, __AsmType, \ - __Clobber) \ - inline __device__ __DeclType __FnName(const __DeclType *__ptr) { \ - __TmpType __ret; \ - asm(__AsmOp " %0, [%1];" : __AsmType(__ret) : "l"(__ptr)__Clobber); \ - return (__DeclType)__ret; \ - } - -#pragma push_macro("__INTRINSIC_LOAD2") -#define __INTRINSIC_LOAD2(__FnName, __AsmOp, __DeclType, __TmpType, __AsmType, \ - __Clobber) \ - inline __device__ __DeclType __FnName(const __DeclType *__ptr) { \ - __DeclType __ret; \ - __TmpType __tmp; \ - asm(__AsmOp " {%0,%1}, [%2];" \ - : __AsmType(__tmp.x), __AsmType(__tmp.y) \ - : "l"(__ptr)__Clobber); \ - using __ElementType = decltype(__ret.x); \ - __ret.x = (__ElementType)(__tmp.x); \ - __ret.y = (__ElementType)__tmp.y; \ - return __ret; \ - } - -#pragma push_macro("__INTRINSIC_LOAD4") -#define __INTRINSIC_LOAD4(__FnName, __AsmOp, __DeclType, __TmpType, __AsmType, \ - __Clobber) \ - inline __device__ __DeclType __FnName(const __DeclType *__ptr) { \ - __DeclType __ret; \ - __TmpType __tmp; \ - asm(__AsmOp " {%0,%1,%2,%3}, [%4];" \ - : __AsmType(__tmp.x), __AsmType(__tmp.y), __AsmType(__tmp.z), \ - __AsmType(__tmp.w) \ - : "l"(__ptr)__Clobber); \ - using __ElementType = decltype(__ret.x); \ - __ret.x = (__ElementType)__tmp.x; \ - __ret.y = (__ElementType)__tmp.y; \ - __ret.z = (__ElementType)__tmp.z; \ - __ret.w = (__ElementType)__tmp.w; \ - return __ret; \ - } - -__INTRINSIC_LOAD(__ldcg, "ld.global.cg.s8", char, unsigned int, "=r", ); -__INTRINSIC_LOAD(__ldcg, "ld.global.cg.s8", signed char, unsigned int, "=r", ); -__INTRINSIC_LOAD(__ldcg, "ld.global.cg.s16", short, unsigned short, "=h", ); -__INTRINSIC_LOAD(__ldcg, "ld.global.cg.s32", int, unsigned int, "=r", ); -__INTRINSIC_LOAD(__ldcg, "ld.global.cg.s64", long long, unsigned long long, - "=l", ); - -__INTRINSIC_LOAD2(__ldcg, "ld.global.cg.v2.s8", char2, int2, "=r", ); -__INTRINSIC_LOAD4(__ldcg, "ld.global.cg.v4.s8", char4, int4, "=r", ); -__INTRINSIC_LOAD2(__ldcg, "ld.global.cg.v2.s16", short2, short2, "=h", ); -__INTRINSIC_LOAD4(__ldcg, "ld.global.cg.v4.s16", short4, short4, "=h", ); -__INTRINSIC_LOAD2(__ldcg, "ld.global.cg.v2.s32", int2, int2, "=r", ); -__INTRINSIC_LOAD4(__ldcg, "ld.global.cg.v4.s32", int4, int4, "=r", ); -__INTRINSIC_LOAD2(__ldcg, "ld.global.cg.v2.s64 ", longlong2, longlong2, "=l", ); - -__INTRINSIC_LOAD(__ldcg, "ld.global.cg.u8", unsigned char, unsigned int, - "=r", ); -__INTRINSIC_LOAD(__ldcg, "ld.global.cg.u16", unsigned short, unsigned short, - "=h", ); -__INTRINSIC_LOAD(__ldcg, "ld.global.cg.u32", unsigned int, unsigned int, - "=r", ); -__INTRINSIC_LOAD(__ldcg, "ld.global.cg.u64", unsigned long long, - unsigned long long, "=l", ); - -__INTRINSIC_LOAD2(__ldcg, "ld.global.cg.v2.u8", uchar2, int2, "=r", ); -__INTRINSIC_LOAD4(__ldcg, "ld.global.cg.v4.u8", uchar4, int4, "=r", ); -__INTRINSIC_LOAD2(__ldcg, "ld.global.cg.v2.u16", ushort2, ushort2, "=h", ); -__INTRINSIC_LOAD4(__ldcg, "ld.global.cg.v4.u16", ushort4, ushort4, "=h", ); -__INTRINSIC_LOAD2(__ldcg, "ld.global.cg.v2.u32", uint2, uint2, "=r", ); -__INTRINSIC_LOAD4(__ldcg, "ld.global.cg.v4.u32", uint4, uint4, "=r", ); -__INTRINSIC_LOAD2(__ldcg, "ld.global.cg.v2.u64", ulonglong2, ulonglong2, - "=l", ); - -__INTRINSIC_LOAD(__ldcg, "ld.global.cg.f32", float, float, "=f", ); -__INTRINSIC_LOAD(__ldcg, "ld.global.cg.f64", double, double, "=d", ); -__INTRINSIC_LOAD2(__ldcg, "ld.global.cg.v2.f32", float2, float2, "=f", ); -__INTRINSIC_LOAD4(__ldcg, "ld.global.cg.v4.f32", float4, float4, "=f", ); -__INTRINSIC_LOAD2(__ldcg, "ld.global.cg.v2.f64", double2, double2, "=d", ); - -inline __device__ long __ldcg(const long *__ptr) { - unsigned long __ret; - if (sizeof(long) == 8) { - asm("ld.global.cg.s64 %0, [%1];" : "=l"(__ret) : "l"(__ptr)); - } else { - asm("ld.global.cg.s32 %0, [%1];" : "=r"(__ret) : "l"(__ptr)); - } - return (long)__ret; -} - -__INTRINSIC_LOAD(__ldcv, "ld.global.cv.u8", unsigned char, unsigned int, - "=r", : "memory"); -__INTRINSIC_LOAD(__ldcv, "ld.global.cv.u16", unsigned short, unsigned short, - "=h", : "memory"); -__INTRINSIC_LOAD(__ldcv, "ld.global.cv.u32", unsigned int, unsigned int, - "=r", : "memory"); -__INTRINSIC_LOAD(__ldcv, "ld.global.cv.u64", unsigned long long, - unsigned long long, "=l", : "memory"); - -__INTRINSIC_LOAD(__ldcv, "ld.global.cv.s8", char, unsigned int, - "=r", : "memory"); -__INTRINSIC_LOAD(__ldcv, "ld.global.cv.s8", signed char, unsigned int, - "=r", : "memory"); -__INTRINSIC_LOAD(__ldcv, "ld.global.cv.s16", short, unsigned short, - "=h", : "memory"); -__INTRINSIC_LOAD(__ldcv, "ld.global.cv.s32", int, unsigned int, - "=r", : "memory"); -__INTRINSIC_LOAD(__ldcv, "ld.global.cv.s64", long long, unsigned long long, - "=l", : "memory"); - -__INTRINSIC_LOAD2(__ldcv, "ld.global.cv.v2.u8", uchar2, uint2, - "=r", : "memory"); -__INTRINSIC_LOAD4(__ldcv, "ld.global.cv.v4.u8", uchar4, uint4, - "=r", : "memory"); -__INTRINSIC_LOAD2(__ldcv, "ld.global.cv.v2.u16", ushort2, ushort2, - "=h", : "memory"); -__INTRINSIC_LOAD4(__ldcv, "ld.global.cv.v4.u16", ushort4, ushort4, - "=h", : "memory"); -__INTRINSIC_LOAD2(__ldcv, "ld.global.cv.v2.u32", uint2, uint2, - "=r", : "memory"); -__INTRINSIC_LOAD4(__ldcv, "ld.global.cv.v4.u32", uint4, uint4, - "=r", : "memory"); -__INTRINSIC_LOAD2(__ldcv, "ld.global.cv.v2.u64", ulonglong2, ulonglong2, - "=l", : "memory"); - -__INTRINSIC_LOAD2(__ldcv, "ld.global.cv.v2.s8", char2, int2, "=r", : "memory"); -__INTRINSIC_LOAD4(__ldcv, "ld.global.cv.v4.s8", char4, int4, "=r", : "memory"); -__INTRINSIC_LOAD2(__ldcv, "ld.global.cv.v2.s16", short2, short2, - "=h", : "memory"); -__INTRINSIC_LOAD4(__ldcv, "ld.global.cv.v4.s16", short4, short4, - "=h", : "memory"); -__INTRINSIC_LOAD2(__ldcv, "ld.global.cv.v2.s32", int2, int2, "=r", : "memory"); -__INTRINSIC_LOAD4(__ldcv, "ld.global.cv.v4.s32", int4, int4, "=r", : "memory"); -__INTRINSIC_LOAD2(__ldcv, "ld.global.cv.v2.s64", longlong2, longlong2, - "=l", : "memory"); - -__INTRINSIC_LOAD(__ldcv, "ld.global.cv.f32", float, float, "=f", : "memory"); -__INTRINSIC_LOAD(__ldcv, "ld.global.cv.f64", double, double, "=d", : "memory"); - -__INTRINSIC_LOAD2(__ldcv, "ld.global.cv.v2.f32", float2, float2, - "=f", : "memory"); -__INTRINSIC_LOAD4(__ldcv, "ld.global.cv.v4.f32", float4, float4, - "=f", : "memory"); -__INTRINSIC_LOAD2(__ldcv, "ld.global.cv.v2.f64", double2, double2, - "=d", : "memory"); - -inline __device__ long __ldcv(const long *__ptr) { - unsigned long __ret; - if (sizeof(long) == 8) { - asm("ld.global.cv.s64 %0, [%1];" : "=l"(__ret) : "l"(__ptr)); - } else { - asm("ld.global.cv.s32 %0, [%1];" : "=r"(__ret) : "l"(__ptr)); - } - return (long)__ret; -} - -__INTRINSIC_LOAD(__ldcs, "ld.global.cs.s8", char, unsigned int, "=r", ); -__INTRINSIC_LOAD(__ldcs, "ld.global.cs.s8", signed char, signed int, "=r", ); -__INTRINSIC_LOAD(__ldcs, "ld.global.cs.s16", short, unsigned short, "=h", ); -__INTRINSIC_LOAD(__ldcs, "ld.global.cs.s32", int, unsigned int, "=r", ); -__INTRINSIC_LOAD(__ldcs, "ld.global.cs.s64", long long, unsigned long long, - "=l", ); - -__INTRINSIC_LOAD2(__ldcs, "ld.global.cs.v2.s8", char2, int2, "=r", ); -__INTRINSIC_LOAD4(__ldcs, "ld.global.cs.v4.s8", char4, int4, "=r", ); -__INTRINSIC_LOAD2(__ldcs, "ld.global.cs.v2.s16", short2, short2, "=h", ); -__INTRINSIC_LOAD4(__ldcs, "ld.global.cs.v4.s16", short4, short4, "=h", ); -__INTRINSIC_LOAD2(__ldcs, "ld.global.cs.v2.s32", int2, int2, "=r", ); -__INTRINSIC_LOAD4(__ldcs, "ld.global.cs.v4.s32", int4, int4, "=r", ); -__INTRINSIC_LOAD2(__ldcs, "ld.global.cs.v2.s64", longlong2, longlong2, "=l", ); - -__INTRINSIC_LOAD(__ldcs, "ld.global.cs.u8", unsigned char, unsigned int, - "=r", ); -__INTRINSIC_LOAD(__ldcs, "ld.global.cs.u16", unsigned short, unsigned short, - "=h", ); -__INTRINSIC_LOAD(__ldcs, "ld.global.cs.u32", unsigned int, unsigned int, - "=r", ); -__INTRINSIC_LOAD(__ldcs, "ld.global.cs.u64", unsigned long long, - unsigned long long, "=l", ); - -__INTRINSIC_LOAD2(__ldcs, "ld.global.cs.v2.u8", uchar2, uint2, "=r", ); -__INTRINSIC_LOAD4(__ldcs, "ld.global.cs.v4.u8", uchar4, uint4, "=r", ); -__INTRINSIC_LOAD2(__ldcs, "ld.global.cs.v2.u16", ushort2, ushort2, "=h", ); -__INTRINSIC_LOAD4(__ldcs, "ld.global.cs.v4.u16", ushort4, ushort4, "=h", ); -__INTRINSIC_LOAD2(__ldcs, "ld.global.cs.v2.u32", uint2, uint2, "=r", ); -__INTRINSIC_LOAD4(__ldcs, "ld.global.cs.v4.u32", uint4, uint4, "=r", ); -__INTRINSIC_LOAD2(__ldcs, "ld.global.cs.v2.u64", ulonglong2, ulonglong2, - "=l", ); - -__INTRINSIC_LOAD(__ldcs, "ld.global.cs.f32", float, float, "=f", ); -__INTRINSIC_LOAD(__ldcs, "ld.global.cs.f64", double, double, "=d", ); -__INTRINSIC_LOAD2(__ldcs, "ld.global.cs.v2.f32", float2, float2, "=f", ); -__INTRINSIC_LOAD4(__ldcs, "ld.global.cs.v4.f32", float4, float4, "=f", ); -__INTRINSIC_LOAD2(__ldcs, "ld.global.cs.v2.f64", double2, double2, "=d", ); - -#pragma pop_macro("__INTRINSIC_LOAD") -#pragma pop_macro("__INTRINSIC_LOAD2") -#pragma pop_macro("__INTRINSIC_LOAD4") - -inline __device__ long __ldcs(const long *__ptr) { - unsigned long __ret; - if (sizeof(long) == 8) { - asm("ld.global.cs.s64 %0, [%1];" : "=l"(__ret) : "l"(__ptr)); - } else { - asm("ld.global.cs.s32 %0, [%1];" : "=r"(__ret) : "l"(__ptr)); - } - return (long)__ret; -} - -#pragma push_macro("__INTRINSIC_STORE") -#define __INTRINSIC_STORE(__FnName, __AsmOp, __DeclType, __TmpType, __AsmType) \ - inline __device__ void __FnName(__DeclType *__ptr, __DeclType __value) { \ - __TmpType __tmp = (__TmpType)__value; \ - asm(__AsmOp " [%0], %1;" ::"l"(__ptr), __AsmType(__tmp) : "memory"); \ - } - -#pragma push_macro("__INTRINSIC_STORE2") -#define __INTRINSIC_STORE2(__FnName, __AsmOp, __DeclType, __TmpType, \ - __AsmType) \ - inline __device__ void __FnName(__DeclType *__ptr, __DeclType __value) { \ - __TmpType __tmp; \ - using __ElementType = decltype(__tmp.x); \ - __tmp.x = (__ElementType)(__value.x); \ - __tmp.y = (__ElementType)(__value.y); \ - asm(__AsmOp " [%0], {%1,%2};" ::"l"(__ptr), __AsmType(__tmp.x), \ - __AsmType(__tmp.y) \ - : "memory"); \ - } - -#pragma push_macro("__INTRINSIC_STORE4") -#define __INTRINSIC_STORE4(__FnName, __AsmOp, __DeclType, __TmpType, \ - __AsmType) \ - inline __device__ void __FnName(__DeclType *__ptr, __DeclType __value) { \ - __TmpType __tmp; \ - using __ElementType = decltype(__tmp.x); \ - __tmp.x = (__ElementType)(__value.x); \ - __tmp.y = (__ElementType)(__value.y); \ - __tmp.z = (__ElementType)(__value.z); \ - __tmp.w = (__ElementType)(__value.w); \ - asm(__AsmOp " [%0], {%1,%2,%3,%4};" ::"l"(__ptr), __AsmType(__tmp.x), \ - __AsmType(__tmp.y), __AsmType(__tmp.z), __AsmType(__tmp.w) \ - : "memory"); \ - } - -__INTRINSIC_STORE(__stwt, "st.global.wt.s8", char, int, "r"); -__INTRINSIC_STORE(__stwt, "st.global.wt.s8", signed char, int, "r"); -__INTRINSIC_STORE(__stwt, "st.global.wt.s16", short, short, "h"); -__INTRINSIC_STORE(__stwt, "st.global.wt.s32", int, int, "r"); -__INTRINSIC_STORE(__stwt, "st.global.wt.s64", long long, long long, "l"); - -__INTRINSIC_STORE2(__stwt, "st.global.wt.v2.s8", char2, int2, "r"); -__INTRINSIC_STORE4(__stwt, "st.global.wt.v4.s8", char4, int4, "r"); -__INTRINSIC_STORE2(__stwt, "st.global.wt.v2.s16", short2, short2, "h"); -__INTRINSIC_STORE4(__stwt, "st.global.wt.v4.s16", short4, short4, "h"); -__INTRINSIC_STORE2(__stwt, "st.global.wt.v2.s32", int2, int2, "r"); -__INTRINSIC_STORE4(__stwt, "st.global.wt.v4.s32", int4, int4, "r"); -__INTRINSIC_STORE2(__stwt, "st.global.wt.v2.s64", longlong2, longlong2, "l"); - -__INTRINSIC_STORE(__stwt, "st.global.wt.u8", unsigned char, int, "r"); -__INTRINSIC_STORE(__stwt, "st.global.wt.u16", unsigned short, unsigned short, - "h"); -__INTRINSIC_STORE(__stwt, "st.global.wt.u32", unsigned int, unsigned int, "r"); -__INTRINSIC_STORE(__stwt, "st.global.wt.u64", unsigned long long, - unsigned long long, "l"); - -__INTRINSIC_STORE2(__stwt, "st.global.wt.v2.u8", uchar2, uchar2, "r"); -__INTRINSIC_STORE4(__stwt, "st.global.wt.v4.u8", uchar4, uint4, "r"); -__INTRINSIC_STORE2(__stwt, "st.global.wt.v2.u16", ushort2, ushort2, "h"); -__INTRINSIC_STORE4(__stwt, "st.global.wt.v4.u16", ushort4, ushort4, "h"); -__INTRINSIC_STORE2(__stwt, "st.global.wt.v2.u32", uint2, uint2, "r"); -__INTRINSIC_STORE4(__stwt, "st.global.wt.v4.u32", uint4, uint4, "r"); -__INTRINSIC_STORE2(__stwt, "st.global.wt.v2.u64", ulonglong2, ulonglong2, "l"); - -__INTRINSIC_STORE(__stwt, "st.global.wt.f32", float, float, "f"); -__INTRINSIC_STORE(__stwt, "st.global.wt.f64", double, double, "d"); -__INTRINSIC_STORE2(__stwt, "st.global.wt.v2.f32", float2, float2, "f"); -__INTRINSIC_STORE4(__stwt, "st.global.wt.v4.f32", float4, float4, "f"); -__INTRINSIC_STORE2(__stwt, "st.global.wt.v2.f64", double2, double2, "d"); - -#pragma pop_macro("__INTRINSIC_STORE") -#pragma pop_macro("__INTRINSIC_STORE2") -#pragma pop_macro("__INTRINSIC_STORE4") - #endif // !defined(__CUDA_ARCH__) || __CUDA_ARCH__ >= 320 #if CUDA_VERSION >= 11000 -- cgit v1.2.3 From d9f7979a63ceac88727632ecfd522c073288b6c1 Mon Sep 17 00:00:00 2001 From: Justin King Date: Wed, 18 Jun 2025 10:24:38 -0700 Subject: sanitizer_common: add unsupported test for free_sized and free_aligned_sized from C23 (#144727) Signed-off-by: Justin King --- .../sanitizer_common/TestCases/Linux/free_aligned_sized.c | 13 +++++++++++++ .../test/sanitizer_common/TestCases/Linux/free_sized.c | 15 +++++++++++++++ 2 files changed, 28 insertions(+) create mode 100644 compiler-rt/test/sanitizer_common/TestCases/Linux/free_aligned_sized.c create mode 100644 compiler-rt/test/sanitizer_common/TestCases/Linux/free_sized.c diff --git a/compiler-rt/test/sanitizer_common/TestCases/Linux/free_aligned_sized.c b/compiler-rt/test/sanitizer_common/TestCases/Linux/free_aligned_sized.c new file mode 100644 index 000000000000..f4c6c0f973bd --- /dev/null +++ b/compiler-rt/test/sanitizer_common/TestCases/Linux/free_aligned_sized.c @@ -0,0 +1,13 @@ +// RUN: %clang -std=c23 -O0 %s -o %t && %run %t +// UNSUPPORTED: asan, hwasan, rtsan, tsan, msan, lsan, ubsan + +#include +#include + +extern void free_aligned_sized(void *p, size_t alignment, size_t size); + +int main() { + volatile void *p = aligned_alloc(128, 1024); + free_aligned_sized((void *)p, 128, 1024); + return 0; +} diff --git a/compiler-rt/test/sanitizer_common/TestCases/Linux/free_sized.c b/compiler-rt/test/sanitizer_common/TestCases/Linux/free_sized.c new file mode 100644 index 000000000000..0ee2289684d0 --- /dev/null +++ b/compiler-rt/test/sanitizer_common/TestCases/Linux/free_sized.c @@ -0,0 +1,15 @@ +// RUN: %clang -std=c23 -O0 %s -o %t && %run %t +// UNSUPPORTED: asan, hwasan, rtsan, tsan, msan, lsan, ubsan + +#include +#include + +extern void *aligned_alloc(size_t alignment, size_t size); + +extern void free_sized(void *p, size_t size); + +int main() { + volatile void *p = malloc(64); + free_sized((void *)p, 64); + return 0; +} -- cgit v1.2.3 From 82acd8c377e9ed267195afdbde16eedebabc648c Mon Sep 17 00:00:00 2001 From: Lei Huang Date: Wed, 18 Jun 2025 13:50:57 -0400 Subject: [PowerPC] Add code to spill and restore DMRp registers (#142443) --- llvm/lib/Target/PowerPC/PPCInstrInfo.cpp | 2 +- llvm/lib/Target/PowerPC/PPCInstrInfo.h | 9 ++ llvm/lib/Target/PowerPC/PPCInstrMMA.td | 4 + llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp | 107 +++++++------- llvm/test/CodeGen/PowerPC/dmr-spill.ll | 36 ++--- llvm/test/CodeGen/PowerPC/dmrp-spill.ll | 213 ++++++++++++++++++++++++++++ 6 files changed, 300 insertions(+), 71 deletions(-) create mode 100644 llvm/test/CodeGen/PowerPC/dmrp-spill.ll diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp index 57c86d9e5de6..7c1550e99bae 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp +++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp @@ -1926,7 +1926,7 @@ unsigned PPCInstrInfo::getSpillIndex(const TargetRegisterClass *RC) const { } else if (PPC::DMRROWpRCRegClass.hasSubClassEq(RC)) { llvm_unreachable("TODO: Implement spill DMRROWp regclass!"); } else if (PPC::DMRpRCRegClass.hasSubClassEq(RC)) { - llvm_unreachable("TODO: Implement spill DMRp regclass!"); + OpcodeIndex = SOK_DMRpSpill; } else if (PPC::DMRRCRegClass.hasSubClassEq(RC)) { OpcodeIndex = SOK_DMRSpill; } else { diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.h b/llvm/lib/Target/PowerPC/PPCInstrInfo.h index a27b5718ec89..7931a9e3ae13 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrInfo.h +++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.h @@ -81,6 +81,7 @@ enum SpillOpcodeKey { SOK_AccumulatorSpill, SOK_UAccumulatorSpill, SOK_WAccumulatorSpill, + SOK_DMRpSpill, SOK_DMRSpill, SOK_SPESpill, SOK_PairedG8Spill, @@ -119,6 +120,7 @@ enum PPCMachineCombinerPattern : unsigned { NoInstr, \ NoInstr, \ NoInstr, \ + NoInstr, \ PPC::EVLDD, \ PPC::RESTORE_QUADWORD} @@ -140,6 +142,7 @@ enum PPCMachineCombinerPattern : unsigned { NoInstr, \ NoInstr, \ NoInstr, \ + NoInstr, \ PPC::RESTORE_QUADWORD} #define Pwr10LoadOpcodes \ @@ -160,6 +163,7 @@ enum PPCMachineCombinerPattern : unsigned { NoInstr, \ NoInstr, \ NoInstr, \ + NoInstr, \ PPC::RESTORE_QUADWORD} #define FutureLoadOpcodes \ @@ -178,6 +182,7 @@ enum PPCMachineCombinerPattern : unsigned { PPC::RESTORE_ACC, \ PPC::RESTORE_UACC, \ PPC::RESTORE_WACC, \ + PPC::RESTORE_DMRP, \ PPC::RESTORE_DMR, \ NoInstr, \ PPC::RESTORE_QUADWORD} @@ -199,6 +204,7 @@ enum PPCMachineCombinerPattern : unsigned { NoInstr, \ NoInstr, \ NoInstr, \ + NoInstr, \ PPC::EVSTDD, \ PPC::SPILL_QUADWORD} @@ -220,6 +226,7 @@ enum PPCMachineCombinerPattern : unsigned { NoInstr, \ NoInstr, \ NoInstr, \ + NoInstr, \ PPC::SPILL_QUADWORD} #define Pwr10StoreOpcodes \ @@ -240,6 +247,7 @@ enum PPCMachineCombinerPattern : unsigned { NoInstr, \ NoInstr, \ NoInstr, \ + NoInstr, \ PPC::SPILL_QUADWORD} #define FutureStoreOpcodes \ @@ -258,6 +266,7 @@ enum PPCMachineCombinerPattern : unsigned { PPC::SPILL_ACC, \ PPC::SPILL_UACC, \ PPC::SPILL_WACC, \ + PPC::SPILL_DMRP, \ PPC::SPILL_DMR, \ NoInstr, \ PPC::SPILL_QUADWORD} diff --git a/llvm/lib/Target/PowerPC/PPCInstrMMA.td b/llvm/lib/Target/PowerPC/PPCInstrMMA.td index 82e4a60e0a72..436715a0e4ab 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrMMA.td +++ b/llvm/lib/Target/PowerPC/PPCInstrMMA.td @@ -565,12 +565,16 @@ let Predicates = [MMA, IsISAFuture], isCodeGenOnly = 1 in { let mayStore = 1 in { def SPILL_WACC: PPCEmitTimePseudo<(outs), (ins wacc:$AT, memrix16:$dst), "#SPILL_WACC", []>; + def SPILL_DMRP: PPCEmitTimePseudo<(outs), (ins dmrp:$AT, memrix16:$dst), + "#SPILL_DMRP", []>; def SPILL_DMR: PPCEmitTimePseudo<(outs), (ins dmr:$AT, memrix16:$dst), "#SPILL_DMR", []>; } let mayLoad = 1, hasSideEffects = 0 in { def RESTORE_WACC: PPCEmitTimePseudo<(outs wacc:$AT), (ins memrix16:$src), "#RESTORE_WACC", []>; + def RESTORE_DMRP: PPCEmitTimePseudo<(outs dmrp:$AT), (ins memrix16:$src), + "#RESTORE_DMRP", []>; def RESTORE_DMR: PPCEmitTimePseudo<(outs dmr:$AT), (ins memrix16:$src), "#RESTORE_DMR", []>; } diff --git a/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp b/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp index ea34c1aba82e..76dca4794e05 100644 --- a/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp +++ b/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp @@ -1519,33 +1519,32 @@ void PPCRegisterInfo::lowerDMRSpilling(MachineBasicBlock::iterator II, // DMR is made up of WACC and WACC_HI, so DMXXEXTFDMR512 to spill // the corresponding 512 bits. const TargetRegisterClass *RC = &PPC::VSRpRCRegClass; - Register SrcReg = MI.getOperand(0).getReg(); - - Register VSRpReg0 = MF.getRegInfo().createVirtualRegister(RC); - Register VSRpReg1 = MF.getRegInfo().createVirtualRegister(RC); - Register VSRpReg2 = MF.getRegInfo().createVirtualRegister(RC); - Register VSRpReg3 = MF.getRegInfo().createVirtualRegister(RC); + auto spillDMR = [&](Register SrcReg, int BEIdx, int LEIdx) { + auto spillWACC = [&](unsigned Opc, unsigned RegIdx, int IdxBE, int IdxLE) { + Register VSRpReg0 = MF.getRegInfo().createVirtualRegister(RC); + Register VSRpReg1 = MF.getRegInfo().createVirtualRegister(RC); + + BuildMI(MBB, II, DL, TII.get(Opc), VSRpReg0) + .addDef(VSRpReg1) + .addReg(TargetRegisterInfo::getSubReg(SrcReg, RegIdx)); + + addFrameReference(BuildMI(MBB, II, DL, TII.get(PPC::STXVP)) + .addReg(VSRpReg0, RegState::Kill), + FrameIndex, IsLittleEndian ? IdxLE : IdxBE); + addFrameReference(BuildMI(MBB, II, DL, TII.get(PPC::STXVP)) + .addReg(VSRpReg1, RegState::Kill), + FrameIndex, IsLittleEndian ? IdxLE - 32 : IdxBE + 32); + }; + spillWACC(PPC::DMXXEXTFDMR512, PPC::sub_wacc_lo, BEIdx, LEIdx); + spillWACC(PPC::DMXXEXTFDMR512_HI, PPC::sub_wacc_hi, BEIdx + 64, LEIdx - 64); + }; - BuildMI(MBB, II, DL, TII.get(PPC::DMXXEXTFDMR512_HI), VSRpReg2) - .addDef(VSRpReg3) - .addReg(TargetRegisterInfo::getSubReg(SrcReg, PPC::sub_wacc_hi)); - - BuildMI(MBB, II, DL, TII.get(PPC::DMXXEXTFDMR512), VSRpReg0) - .addDef(VSRpReg1) - .addReg(TargetRegisterInfo::getSubReg(SrcReg, PPC::sub_wacc_lo)); - - addFrameReference(BuildMI(MBB, II, DL, TII.get(PPC::STXVP)) - .addReg(VSRpReg0, RegState::Kill), - FrameIndex, IsLittleEndian ? 96 : 0); - addFrameReference(BuildMI(MBB, II, DL, TII.get(PPC::STXVP)) - .addReg(VSRpReg1, RegState::Kill), - FrameIndex, IsLittleEndian ? 64 : 32); - addFrameReference(BuildMI(MBB, II, DL, TII.get(PPC::STXVP)) - .addReg(VSRpReg2, RegState::Kill), - FrameIndex, IsLittleEndian ? 32 : 64); - addFrameReference(BuildMI(MBB, II, DL, TII.get(PPC::STXVP)) - .addReg(VSRpReg3, RegState::Kill), - FrameIndex, IsLittleEndian ? 0 : 96); + Register SrcReg = MI.getOperand(0).getReg(); + if (MI.getOpcode() == PPC::SPILL_DMRP) { + spillDMR(TargetRegisterInfo::getSubReg(SrcReg, PPC::sub_dmr1), 0, 96); + spillDMR(TargetRegisterInfo::getSubReg(SrcReg, PPC::sub_dmr0), 128, 224); + } else + spillDMR(SrcReg, 0, 96); // Discard the pseudo instruction. MBB.erase(II); @@ -1554,7 +1553,7 @@ void PPCRegisterInfo::lowerDMRSpilling(MachineBasicBlock::iterator II, /// lowerDMRRestore - Generate the code to restore the DMR register. void PPCRegisterInfo::lowerDMRRestore(MachineBasicBlock::iterator II, unsigned FrameIndex) const { - MachineInstr &MI = *II; // = RESTORE_WACC + MachineInstr &MI = *II; // = RESTORE_DMR[P] MachineBasicBlock &MBB = *MI.getParent(); MachineFunction &MF = *MBB.getParent(); const PPCSubtarget &Subtarget = MF.getSubtarget(); @@ -1563,32 +1562,34 @@ void PPCRegisterInfo::lowerDMRRestore(MachineBasicBlock::iterator II, bool IsLittleEndian = Subtarget.isLittleEndian(); const TargetRegisterClass *RC = &PPC::VSRpRCRegClass; - Register DestReg = MI.getOperand(0).getReg(); - - Register VSRpReg0 = MF.getRegInfo().createVirtualRegister(RC); - Register VSRpReg1 = MF.getRegInfo().createVirtualRegister(RC); - Register VSRpReg2 = MF.getRegInfo().createVirtualRegister(RC); - Register VSRpReg3 = MF.getRegInfo().createVirtualRegister(RC); + auto restoreDMR = [&](Register DestReg, int BEIdx, int LEIdx) { + auto restoreWACC = [&](unsigned Opc, unsigned RegIdx, int IdxBE, + int IdxLE) { + Register VSRpReg0 = MF.getRegInfo().createVirtualRegister(RC); + Register VSRpReg1 = MF.getRegInfo().createVirtualRegister(RC); + + addFrameReference(BuildMI(MBB, II, DL, TII.get(PPC::LXVP), VSRpReg0), + FrameIndex, IsLittleEndian ? IdxLE : IdxBE); + addFrameReference(BuildMI(MBB, II, DL, TII.get(PPC::LXVP), VSRpReg1), + FrameIndex, IsLittleEndian ? IdxLE - 32 : IdxBE + 32); + + // Kill virtual registers (killedRegState::Killed). + BuildMI(MBB, II, DL, TII.get(Opc), + TargetRegisterInfo::getSubReg(DestReg, RegIdx)) + .addReg(VSRpReg0, RegState::Kill) + .addReg(VSRpReg1, RegState::Kill); + }; + restoreWACC(PPC::DMXXINSTDMR512, PPC::sub_wacc_lo, BEIdx, LEIdx); + restoreWACC(PPC::DMXXINSTDMR512_HI, PPC::sub_wacc_hi, BEIdx + 64, + LEIdx - 64); + }; - addFrameReference(BuildMI(MBB, II, DL, TII.get(PPC::LXVP), VSRpReg0), - FrameIndex, IsLittleEndian ? 96 : 0); - addFrameReference(BuildMI(MBB, II, DL, TII.get(PPC::LXVP), VSRpReg1), - FrameIndex, IsLittleEndian ? 64 : 32); - addFrameReference(BuildMI(MBB, II, DL, TII.get(PPC::LXVP), VSRpReg2), - FrameIndex, IsLittleEndian ? 32 : 64); - addFrameReference(BuildMI(MBB, II, DL, TII.get(PPC::LXVP), VSRpReg3), - FrameIndex, IsLittleEndian ? 0 : 96); - - // Kill virtual registers (killedRegState::Killed). - BuildMI(MBB, II, DL, TII.get(PPC::DMXXINSTDMR512_HI), - TargetRegisterInfo::getSubReg(DestReg, PPC::sub_wacc_hi)) - .addReg(VSRpReg2, RegState::Kill) - .addReg(VSRpReg3, RegState::Kill); - - BuildMI(MBB, II, DL, TII.get(PPC::DMXXINSTDMR512), - TargetRegisterInfo::getSubReg(DestReg, PPC::sub_wacc_lo)) - .addReg(VSRpReg0, RegState::Kill) - .addReg(VSRpReg1, RegState::Kill); + Register DestReg = MI.getOperand(0).getReg(); + if (MI.getOpcode() == PPC::RESTORE_DMRP) { + restoreDMR(TargetRegisterInfo::getSubReg(DestReg, PPC::sub_dmr1), 0, 96); + restoreDMR(TargetRegisterInfo::getSubReg(DestReg, PPC::sub_dmr0), 128, 224); + } else + restoreDMR(DestReg, 0, 96); // Discard the pseudo instruction. MBB.erase(II); @@ -1756,9 +1757,11 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, case PPC::RESTORE_WACC: lowerWACCRestore(II, FrameIndex); return true; + case PPC::SPILL_DMRP: case PPC::SPILL_DMR: lowerDMRSpilling(II, FrameIndex); return true; + case PPC::RESTORE_DMRP: case PPC::RESTORE_DMR: lowerDMRRestore(II, FrameIndex); return true; diff --git a/llvm/test/CodeGen/PowerPC/dmr-spill.ll b/llvm/test/CodeGen/PowerPC/dmr-spill.ll index c1b01cd2d3fd..983fce4127af 100644 --- a/llvm/test/CodeGen/PowerPC/dmr-spill.ll +++ b/llvm/test/CodeGen/PowerPC/dmr-spill.ll @@ -30,19 +30,19 @@ define void @spillDMRreg(ptr %vop, ptr %vpp, ptr %vcp, ptr %resp) nounwind { ; CHECK-NEXT: lxv v3, 0(r4) ; CHECK-NEXT: lxv vs0, 0(r5) ; CHECK-NEXT: dmxvbf16gerx2pp dmr0, vsp34, vs0 +; CHECK-NEXT: dmxxextfdmr512 vsp36, vsp34, wacc0, 0 +; CHECK-NEXT: stxvp vsp36, 128(r1) +; CHECK-NEXT: stxvp vsp34, 96(r1) ; CHECK-NEXT: dmxxextfdmr512 vsp36, vsp34, wacc_hi0, 1 -; CHECK-NEXT: dmxxextfdmr512 vsp38, vsp32, wacc0, 0 -; CHECK-NEXT: stxvp vsp38, 128(r1) -; CHECK-NEXT: stxvp vsp32, 96(r1) ; CHECK-NEXT: stxvp vsp36, 64(r1) ; CHECK-NEXT: stxvp vsp34, 32(r1) ; CHECK-NEXT: bl dummy_func@notoc ; CHECK-NEXT: lxvp vsp34, 128(r1) ; CHECK-NEXT: lxvp vsp36, 96(r1) -; CHECK-NEXT: lxvp vsp32, 64(r1) -; CHECK-NEXT: lxvp vsp38, 32(r1) -; CHECK-NEXT: dmxxinstdmr512 wacc_hi0, vsp32, vsp38, 1 ; CHECK-NEXT: dmxxinstdmr512 wacc0, vsp34, vsp36, 0 +; CHECK-NEXT: lxvp vsp34, 64(r1) +; CHECK-NEXT: lxvp vsp36, 32(r1) +; CHECK-NEXT: dmxxinstdmr512 wacc_hi0, vsp34, vsp36, 1 ; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 ; CHECK-NEXT: stxvp vsp34, 96(r30) ; CHECK-NEXT: stxvp vsp36, 64(r30) @@ -72,20 +72,20 @@ define void @spillDMRreg(ptr %vop, ptr %vpp, ptr %vcp, ptr %resp) nounwind { ; AIX-NEXT: lxv v3, 16(r4) ; AIX-NEXT: lxv vs0, 0(r5) ; AIX-NEXT: dmxvbf16gerx2pp dmr0, vsp34, vs0 +; AIX-NEXT: dmxxextfdmr512 vsp36, vsp34, wacc0, 0 +; AIX-NEXT: stxvp vsp36, 112(r1) +; AIX-NEXT: stxvp vsp34, 144(r1) ; AIX-NEXT: dmxxextfdmr512 vsp36, vsp34, wacc_hi0, 1 -; AIX-NEXT: dmxxextfdmr512 vsp38, vsp32, wacc0, 0 -; AIX-NEXT: stxvp vsp38, 112(r1) -; AIX-NEXT: stxvp vsp32, 144(r1) ; AIX-NEXT: stxvp vsp36, 176(r1) ; AIX-NEXT: stxvp vsp34, 208(r1) ; AIX-NEXT: bl .dummy_func[PR] ; AIX-NEXT: nop ; AIX-NEXT: lxvp vsp34, 112(r1) ; AIX-NEXT: lxvp vsp36, 144(r1) -; AIX-NEXT: lxvp vsp32, 176(r1) -; AIX-NEXT: lxvp vsp38, 208(r1) -; AIX-NEXT: dmxxinstdmr512 wacc_hi0, vsp32, vsp38, 1 ; AIX-NEXT: dmxxinstdmr512 wacc0, vsp34, vsp36, 0 +; AIX-NEXT: lxvp vsp34, 176(r1) +; AIX-NEXT: lxvp vsp36, 208(r1) +; AIX-NEXT: dmxxinstdmr512 wacc_hi0, vsp34, vsp36, 1 ; AIX-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc_hi0, 1 ; AIX-NEXT: stxvp vsp36, 96(r31) ; AIX-NEXT: stxvp vsp34, 64(r31) @@ -115,20 +115,20 @@ define void @spillDMRreg(ptr %vop, ptr %vpp, ptr %vcp, ptr %resp) nounwind { ; AIX32-NEXT: lxv v3, 16(r4) ; AIX32-NEXT: lxv vs0, 0(r5) ; AIX32-NEXT: dmxvbf16gerx2pp dmr0, vsp34, vs0 +; AIX32-NEXT: dmxxextfdmr512 vsp36, vsp34, wacc0, 0 +; AIX32-NEXT: stxvp vsp36, 64(r1) +; AIX32-NEXT: stxvp vsp34, 96(r1) ; AIX32-NEXT: dmxxextfdmr512 vsp36, vsp34, wacc_hi0, 1 -; AIX32-NEXT: dmxxextfdmr512 vsp38, vsp32, wacc0, 0 -; AIX32-NEXT: stxvp vsp38, 64(r1) -; AIX32-NEXT: stxvp vsp32, 96(r1) ; AIX32-NEXT: stxvp vsp36, 128(r1) ; AIX32-NEXT: stxvp vsp34, 160(r1) ; AIX32-NEXT: bl .dummy_func[PR] ; AIX32-NEXT: nop ; AIX32-NEXT: lxvp vsp34, 64(r1) ; AIX32-NEXT: lxvp vsp36, 96(r1) -; AIX32-NEXT: lxvp vsp32, 128(r1) -; AIX32-NEXT: lxvp vsp38, 160(r1) -; AIX32-NEXT: dmxxinstdmr512 wacc_hi0, vsp32, vsp38, 1 ; AIX32-NEXT: dmxxinstdmr512 wacc0, vsp34, vsp36, 0 +; AIX32-NEXT: lxvp vsp34, 128(r1) +; AIX32-NEXT: lxvp vsp36, 160(r1) +; AIX32-NEXT: dmxxinstdmr512 wacc_hi0, vsp34, vsp36, 1 ; AIX32-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc_hi0, 1 ; AIX32-NEXT: stxvp vsp36, 96(r31) ; AIX32-NEXT: stxvp vsp34, 64(r31) diff --git a/llvm/test/CodeGen/PowerPC/dmrp-spill.ll b/llvm/test/CodeGen/PowerPC/dmrp-spill.ll new file mode 100644 index 000000000000..62d42d4a26d5 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/dmrp-spill.ll @@ -0,0 +1,213 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \ +; RUN: -disable-auto-paired-vec-st=false -ppc-asm-full-reg-names \ +; RUN: -ppc-vsr-nums-as-vr -mcpu=future < %s | FileCheck %s +; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-aix \ +; RUN: -disable-auto-paired-vec-st=false -ppc-asm-full-reg-names \ +; RUN: -ppc-vsr-nums-as-vr -mcpu=future < %s | FileCheck %s --check-prefix=AIX +; RUN: llc -verify-machineinstrs -mtriple=powerpc-unknown-aix \ +; RUN: -disable-auto-paired-vec-st=false -ppc-asm-full-reg-names \ +; RUN: -ppc-vsr-nums-as-vr -mcpu=future < %s | FileCheck %s --check-prefix=AIX32 + +declare void @dummy_func() +declare <2048 x i1> @llvm.ppc.mma.dmsha3hash(<2048 x i1>, i32) + +define dso_local void @test_dmsha3hash(ptr %vopp, ptr %resp) nounwind { +; CHECK-LABEL: test_dmsha3hash: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: mflr r0 +; CHECK-NEXT: std r0, 16(r1) +; CHECK-NEXT: stdu r1, -304(r1) +; CHECK-NEXT: std r30, 288(r1) # 8-byte Folded Spill +; CHECK-NEXT: lxvp vsp34, 0(r3) +; CHECK-NEXT: lxvp vsp36, 32(r3) +; CHECK-NEXT: mr r30, r4 +; CHECK-NEXT: dmxxinstdmr512 wacc_hi1, vsp36, vsp34, 1 +; CHECK-NEXT: lxvp vsp34, 64(r3) +; CHECK-NEXT: lxvp vsp36, 96(r3) +; CHECK-NEXT: dmxxinstdmr512 wacc1, vsp36, vsp34, 0 +; CHECK-NEXT: lxvp vsp34, 128(r3) +; CHECK-NEXT: lxvp vsp36, 160(r3) +; CHECK-NEXT: dmxxinstdmr512 wacc_hi0, vsp36, vsp34, 1 +; CHECK-NEXT: lxvp vsp34, 192(r3) +; CHECK-NEXT: lxvp vsp36, 224(r3) +; CHECK-NEXT: dmxxinstdmr512 wacc0, vsp36, vsp34, 0 +; CHECK-NEXT: dmsha3hash dmrp0, 5 +; CHECK-NEXT: dmxxextfdmr512 vsp36, vsp34, wacc1, 0 +; CHECK-NEXT: stxvp vsp36, 128(r1) +; CHECK-NEXT: stxvp vsp34, 96(r1) +; CHECK-NEXT: dmxxextfdmr512 vsp36, vsp34, wacc_hi1, 1 +; CHECK-NEXT: stxvp vsp36, 64(r1) +; CHECK-NEXT: stxvp vsp34, 32(r1) +; CHECK-NEXT: dmxxextfdmr512 vsp36, vsp34, wacc0, 0 +; CHECK-NEXT: stxvp vsp36, 256(r1) +; CHECK-NEXT: stxvp vsp34, 224(r1) +; CHECK-NEXT: dmxxextfdmr512 vsp36, vsp34, wacc_hi0, 1 +; CHECK-NEXT: stxvp vsp36, 192(r1) +; CHECK-NEXT: stxvp vsp34, 160(r1) +; CHECK-NEXT: bl dummy_func@notoc +; CHECK-NEXT: lxvp vsp34, 128(r1) +; CHECK-NEXT: lxvp vsp36, 96(r1) +; CHECK-NEXT: dmxxinstdmr512 wacc1, vsp34, vsp36, 0 +; CHECK-NEXT: lxvp vsp34, 64(r1) +; CHECK-NEXT: lxvp vsp36, 32(r1) +; CHECK-NEXT: dmxxinstdmr512 wacc_hi1, vsp34, vsp36, 1 +; CHECK-NEXT: lxvp vsp34, 256(r1) +; CHECK-NEXT: lxvp vsp36, 224(r1) +; CHECK-NEXT: dmxxinstdmr512 wacc0, vsp34, vsp36, 0 +; CHECK-NEXT: lxvp vsp34, 192(r1) +; CHECK-NEXT: lxvp vsp36, 160(r1) +; CHECK-NEXT: dmxxinstdmr512 wacc_hi0, vsp34, vsp36, 1 +; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-NEXT: stxvp vsp34, 224(r30) +; CHECK-NEXT: stxvp vsp36, 192(r30) +; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc_hi0, 1 +; CHECK-NEXT: stxvp vsp34, 160(r30) +; CHECK-NEXT: stxvp vsp36, 128(r30) +; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc1, 0 +; CHECK-NEXT: stxvp vsp34, 96(r30) +; CHECK-NEXT: stxvp vsp36, 64(r30) +; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc_hi1, 1 +; CHECK-NEXT: stxvp vsp34, 32(r30) +; CHECK-NEXT: stxvp vsp36, 0(r30) +; CHECK-NEXT: ld r30, 288(r1) # 8-byte Folded Reload +; CHECK-NEXT: addi r1, r1, 304 +; CHECK-NEXT: ld r0, 16(r1) +; CHECK-NEXT: mtlr r0 +; CHECK-NEXT: blr +; +; AIX-LABEL: test_dmsha3hash: +; AIX: # %bb.0: # %entry +; AIX-NEXT: mflr r0 +; AIX-NEXT: std r0, 16(r1) +; AIX-NEXT: stdu r1, -384(r1) +; AIX-NEXT: std r31, 376(r1) # 8-byte Folded Spill +; AIX-NEXT: lxvp vsp34, 224(r3) +; AIX-NEXT: lxvp vsp36, 192(r3) +; AIX-NEXT: mr r31, r4 +; AIX-NEXT: dmxxinstdmr512 wacc_hi1, vsp36, vsp34, 1 +; AIX-NEXT: lxvp vsp34, 160(r3) +; AIX-NEXT: lxvp vsp36, 128(r3) +; AIX-NEXT: dmxxinstdmr512 wacc1, vsp36, vsp34, 0 +; AIX-NEXT: lxvp vsp34, 96(r3) +; AIX-NEXT: lxvp vsp36, 64(r3) +; AIX-NEXT: dmxxinstdmr512 wacc_hi0, vsp36, vsp34, 1 +; AIX-NEXT: lxvp vsp34, 32(r3) +; AIX-NEXT: lxvp vsp36, 0(r3) +; AIX-NEXT: dmxxinstdmr512 wacc0, vsp36, vsp34, 0 +; AIX-NEXT: dmsha3hash dmrp0, 5 +; AIX-NEXT: dmxxextfdmr512 vsp36, vsp34, wacc1, 0 +; AIX-NEXT: stxvp vsp36, 112(r1) +; AIX-NEXT: stxvp vsp34, 144(r1) +; AIX-NEXT: dmxxextfdmr512 vsp36, vsp34, wacc_hi1, 1 +; AIX-NEXT: stxvp vsp36, 176(r1) +; AIX-NEXT: stxvp vsp34, 208(r1) +; AIX-NEXT: dmxxextfdmr512 vsp36, vsp34, wacc0, 0 +; AIX-NEXT: stxvp vsp36, 240(r1) +; AIX-NEXT: stxvp vsp34, 272(r1) +; AIX-NEXT: dmxxextfdmr512 vsp36, vsp34, wacc_hi0, 1 +; AIX-NEXT: stxvp vsp36, 304(r1) +; AIX-NEXT: stxvp vsp34, 336(r1) +; AIX-NEXT: bl .dummy_func[PR] +; AIX-NEXT: nop +; AIX-NEXT: lxvp vsp34, 112(r1) +; AIX-NEXT: lxvp vsp36, 144(r1) +; AIX-NEXT: dmxxinstdmr512 wacc1, vsp34, vsp36, 0 +; AIX-NEXT: lxvp vsp34, 176(r1) +; AIX-NEXT: lxvp vsp36, 208(r1) +; AIX-NEXT: dmxxinstdmr512 wacc_hi1, vsp34, vsp36, 1 +; AIX-NEXT: lxvp vsp34, 240(r1) +; AIX-NEXT: lxvp vsp36, 272(r1) +; AIX-NEXT: dmxxinstdmr512 wacc0, vsp34, vsp36, 0 +; AIX-NEXT: lxvp vsp34, 304(r1) +; AIX-NEXT: lxvp vsp36, 336(r1) +; AIX-NEXT: dmxxinstdmr512 wacc_hi0, vsp34, vsp36, 1 +; AIX-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc_hi1, 1 +; AIX-NEXT: stxvp vsp36, 224(r31) +; AIX-NEXT: stxvp vsp34, 192(r31) +; AIX-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc1, 0 +; AIX-NEXT: stxvp vsp36, 160(r31) +; AIX-NEXT: stxvp vsp34, 128(r31) +; AIX-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc_hi0, 1 +; AIX-NEXT: stxvp vsp36, 96(r31) +; AIX-NEXT: stxvp vsp34, 64(r31) +; AIX-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; AIX-NEXT: stxvp vsp36, 32(r31) +; AIX-NEXT: stxvp vsp34, 0(r31) +; AIX-NEXT: ld r31, 376(r1) # 8-byte Folded Reload +; AIX-NEXT: addi r1, r1, 384 +; AIX-NEXT: ld r0, 16(r1) +; AIX-NEXT: mtlr r0 +; AIX-NEXT: blr +; +; AIX32-LABEL: test_dmsha3hash: +; AIX32: # %bb.0: # %entry +; AIX32-NEXT: mflr r0 +; AIX32-NEXT: stw r0, 8(r1) +; AIX32-NEXT: stwu r1, -336(r1) +; AIX32-NEXT: stw r31, 332(r1) # 4-byte Folded Spill +; AIX32-NEXT: lxvp vsp34, 224(r3) +; AIX32-NEXT: lxvp vsp36, 192(r3) +; AIX32-NEXT: mr r31, r4 +; AIX32-NEXT: dmxxinstdmr512 wacc_hi1, vsp36, vsp34, 1 +; AIX32-NEXT: lxvp vsp34, 160(r3) +; AIX32-NEXT: lxvp vsp36, 128(r3) +; AIX32-NEXT: dmxxinstdmr512 wacc1, vsp36, vsp34, 0 +; AIX32-NEXT: lxvp vsp34, 96(r3) +; AIX32-NEXT: lxvp vsp36, 64(r3) +; AIX32-NEXT: dmxxinstdmr512 wacc_hi0, vsp36, vsp34, 1 +; AIX32-NEXT: lxvp vsp34, 32(r3) +; AIX32-NEXT: lxvp vsp36, 0(r3) +; AIX32-NEXT: dmxxinstdmr512 wacc0, vsp36, vsp34, 0 +; AIX32-NEXT: dmsha3hash dmrp0, 5 +; AIX32-NEXT: dmxxextfdmr512 vsp36, vsp34, wacc1, 0 +; AIX32-NEXT: stxvp vsp36, 64(r1) +; AIX32-NEXT: stxvp vsp34, 96(r1) +; AIX32-NEXT: dmxxextfdmr512 vsp36, vsp34, wacc_hi1, 1 +; AIX32-NEXT: stxvp vsp36, 128(r1) +; AIX32-NEXT: stxvp vsp34, 160(r1) +; AIX32-NEXT: dmxxextfdmr512 vsp36, vsp34, wacc0, 0 +; AIX32-NEXT: stxvp vsp36, 192(r1) +; AIX32-NEXT: stxvp vsp34, 224(r1) +; AIX32-NEXT: dmxxextfdmr512 vsp36, vsp34, wacc_hi0, 1 +; AIX32-NEXT: stxvp vsp36, 256(r1) +; AIX32-NEXT: stxvp vsp34, 288(r1) +; AIX32-NEXT: bl .dummy_func[PR] +; AIX32-NEXT: nop +; AIX32-NEXT: lxvp vsp34, 64(r1) +; AIX32-NEXT: lxvp vsp36, 96(r1) +; AIX32-NEXT: dmxxinstdmr512 wacc1, vsp34, vsp36, 0 +; AIX32-NEXT: lxvp vsp34, 128(r1) +; AIX32-NEXT: lxvp vsp36, 160(r1) +; AIX32-NEXT: dmxxinstdmr512 wacc_hi1, vsp34, vsp36, 1 +; AIX32-NEXT: lxvp vsp34, 192(r1) +; AIX32-NEXT: lxvp vsp36, 224(r1) +; AIX32-NEXT: dmxxinstdmr512 wacc0, vsp34, vsp36, 0 +; AIX32-NEXT: lxvp vsp34, 256(r1) +; AIX32-NEXT: lxvp vsp36, 288(r1) +; AIX32-NEXT: dmxxinstdmr512 wacc_hi0, vsp34, vsp36, 1 +; AIX32-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc_hi1, 1 +; AIX32-NEXT: stxvp vsp36, 224(r31) +; AIX32-NEXT: stxvp vsp34, 192(r31) +; AIX32-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc1, 0 +; AIX32-NEXT: stxvp vsp36, 160(r31) +; AIX32-NEXT: stxvp vsp34, 128(r31) +; AIX32-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc_hi0, 1 +; AIX32-NEXT: stxvp vsp36, 96(r31) +; AIX32-NEXT: stxvp vsp34, 64(r31) +; AIX32-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; AIX32-NEXT: stxvp vsp36, 32(r31) +; AIX32-NEXT: stxvp vsp34, 0(r31) +; AIX32-NEXT: lwz r31, 332(r1) # 4-byte Folded Reload +; AIX32-NEXT: addi r1, r1, 336 +; AIX32-NEXT: lwz r0, 8(r1) +; AIX32-NEXT: mtlr r0 +; AIX32-NEXT: blr + entry: + %0 = load <2048 x i1>, ptr %vopp, align 64 + %2 = tail call <2048 x i1> @llvm.ppc.mma.dmsha3hash(<2048 x i1> %0, i32 5) + tail call void @dummy_func() + %3 = tail call <2048 x i1> @llvm.ppc.mma.dmsha3hash(<2048 x i1> %0, i32 5) + store <2048 x i1> %2, ptr %resp, align 64 + ret void +} -- cgit v1.2.3 From 835d3034fe96931cf907537b51b9cdd87b59d3ad Mon Sep 17 00:00:00 2001 From: Tomer Shafir Date: Wed, 18 Jun 2025 20:56:33 +0300 Subject: [AArch64] improve zero-cycle regmov test (#143680) - Add a `gpr32` suffix to test name to denote the specific register class being checked - Expand `-mtriple=arm64-apple-ios` to `-march=arm64` to broaden the test context to the generic architecture, as the specific triple is not required - Port `bl` match to Linux too via the regex: `{{_?foo}}` - Advance `-mcpu=cyclone` to the newer M series major `-mcpu=apple-m1` - Use `-mcpu` so that `-mattr=-zcm` has a real effect - Add a test that generic arm64 doesn't optimize for ZCM - Distinguish 4 different assembly layouts: NOTCPU, CPU, NOTATTR, ATTR - Fix broken test logic, for example: `; NOT: mov [[REG2:w[0-9]+]], w3` matched `mov w1, w3` then `REG2` captured `w1` but then `; NOT: mov w1, [[REG2]]` matched by prefix `mov, w1, w19` even though it should have matched `mov w1, w1`. This change adds explicit matches for all of the generated copies. --- .../AArch64/arm64-zero-cycle-regmov-gpr32.ll | 45 ++++++++++++++++++++++ .../CodeGen/AArch64/arm64-zero-cycle-regmov.ll | 23 ----------- 2 files changed, 45 insertions(+), 23 deletions(-) create mode 100644 llvm/test/CodeGen/AArch64/arm64-zero-cycle-regmov-gpr32.ll delete mode 100644 llvm/test/CodeGen/AArch64/arm64-zero-cycle-regmov.ll diff --git a/llvm/test/CodeGen/AArch64/arm64-zero-cycle-regmov-gpr32.ll b/llvm/test/CodeGen/AArch64/arm64-zero-cycle-regmov-gpr32.ll new file mode 100644 index 000000000000..5ef6d3e84805 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/arm64-zero-cycle-regmov-gpr32.ll @@ -0,0 +1,45 @@ +; RUN: llc < %s -march=arm64 | FileCheck %s -check-prefixes=NOTCPU --match-full-lines +; RUN: llc < %s -mtriple=arm64-apple-macosx -mcpu=apple-m1 | FileCheck %s -check-prefixes=CPU --match-full-lines +; RUN: llc < %s -mtriple=arm64-apple-macosx -mcpu=apple-m1 -mattr=-zcm | FileCheck %s -check-prefixes=NOTATTR --match-full-lines +; RUN: llc < %s -mtriple=arm64-apple-macosx -mattr=+zcm | FileCheck %s -check-prefixes=ATTR --match-full-lines + +define void @t(i32 %a, i32 %b, i32 %c, i32 %d) { +entry: +; CHECK-LABEL: t: +; NOTCPU: mov w0, w2 +; NOTCPU: mov w1, w3 +; NOTCPU: mov [[REG2:w[0-9]+]], w3 +; NOTCPU: mov [[REG1:w[0-9]+]], w2 +; NOTCPU-NEXT: bl {{_?foo}} +; NOTCPU: mov w0, [[REG1]] +; NOTCPU: mov w1, [[REG2]] + +; CPU: mov [[REG2:x[0-9]+]], x3 +; CPU: mov [[REG1:x[0-9]+]], x2 +; CPU: mov x0, x2 +; CPU: mov x1, x3 +; CPU-NEXT: bl {{_?foo}} +; CPU: mov x0, [[REG1]] +; CPU: mov x1, [[REG2]] + +; NOTATTR: mov [[REG2:w[0-9]+]], w3 +; NOTATTR: mov [[REG1:w[0-9]+]], w2 +; NOTATTR: mov w0, w2 +; NOTATTR: mov w1, w3 +; NOTATTR-NEXT: bl {{_?foo}} +; NOTATTR: mov w0, [[REG1]] +; NOTATTR: mov w1, [[REG2]] + +; ATTR: mov x0, x2 +; ATTR: mov x1, x3 +; ATTR: mov [[REG2:x[0-9]+]], x3 +; ATTR: mov [[REG1:x[0-9]+]], x2 +; ATTR-NEXT: bl {{_?foo}} +; ATTR: mov x0, [[REG1]] +; ATTR: mov x1, [[REG2]] + %call = call i32 @foo(i32 %c, i32 %d) + %call1 = call i32 @foo(i32 %c, i32 %d) + unreachable +} + +declare i32 @foo(i32, i32) diff --git a/llvm/test/CodeGen/AArch64/arm64-zero-cycle-regmov.ll b/llvm/test/CodeGen/AArch64/arm64-zero-cycle-regmov.ll deleted file mode 100644 index b390853d44bf..000000000000 --- a/llvm/test/CodeGen/AArch64/arm64-zero-cycle-regmov.ll +++ /dev/null @@ -1,23 +0,0 @@ -; RUN: llc < %s -mtriple=arm64-apple-ios -mattr=-zcm | FileCheck %s -check-prefixes=CHECK,NOT -; RUN: llc < %s -mtriple=arm64-apple-ios -mattr=+zcm | FileCheck %s -check-prefixes=CHECK,YES -; RUN: llc < %s -mtriple=arm64-apple-ios -mcpu=cyclone | FileCheck %s -check-prefixes=CHECK,YES - -; rdar://12254953 -define i32 @t(i32 %a, i32 %b, i32 %c, i32 %d) nounwind ssp { -entry: -; CHECK-LABEL: t: -; NOT: mov [[REG2:w[0-9]+]], w3 -; NOT: mov [[REG1:w[0-9]+]], w2 -; YES: mov [[REG2:x[0-9]+]], x3 -; YES: mov [[REG1:x[0-9]+]], x2 -; CHECK: bl _foo -; NOT: mov w0, [[REG1]] -; NOT: mov w1, [[REG2]] -; YES: mov x0, [[REG1]] -; YES: mov x1, [[REG2]] - %call = call i32 @foo(i32 %c, i32 %d) nounwind - %call1 = call i32 @foo(i32 %c, i32 %d) nounwind - unreachable -} - -declare i32 @foo(i32, i32) -- cgit v1.2.3 From 6f4e4ea17745d1414519651eb4067ce14031ea93 Mon Sep 17 00:00:00 2001 From: sribee8 Date: Wed, 18 Jun 2025 10:56:57 -0700 Subject: [libc] Internal getrandom implementation (#144427) Implemented an internal getrandom to avoid calls to the public one in table.h --------- Co-authored-by: Sriya Pratipati --- libc/src/__support/HashTable/CMakeLists.txt | 3 ++- libc/src/__support/HashTable/randomness.h | 16 ++++++------ libc/src/__support/OSUtil/linux/CMakeLists.txt | 13 ++++++++++ libc/src/__support/OSUtil/linux/getrandom.h | 35 ++++++++++++++++++++++++++ libc/src/sys/random/linux/getrandom.cpp | 13 +++++----- 5 files changed, 63 insertions(+), 17 deletions(-) create mode 100644 libc/src/__support/OSUtil/linux/getrandom.h diff --git a/libc/src/__support/HashTable/CMakeLists.txt b/libc/src/__support/HashTable/CMakeLists.txt index a1de0680cc7d..698b8d0dfa68 100644 --- a/libc/src/__support/HashTable/CMakeLists.txt +++ b/libc/src/__support/HashTable/CMakeLists.txt @@ -15,7 +15,8 @@ if (NOT ${getrandom_index} EQUAL -1) message(STATUS "Using getrandom for hashtable randomness") set(randomness_compile_flags -DLIBC_HASHTABLE_USE_GETRANDOM) set(randomness_extra_depends - libc.src.sys.random.getrandom libc.src.errno.errno) + libc.src.__support.OSUtil.linux.getrandom + libc.hdr.errno_macros) endif() diff --git a/libc/src/__support/HashTable/randomness.h b/libc/src/__support/HashTable/randomness.h index 6b58a4125f78..7e54c9aa6ad1 100644 --- a/libc/src/__support/HashTable/randomness.h +++ b/libc/src/__support/HashTable/randomness.h @@ -14,8 +14,8 @@ #include "src/__support/macros/attributes.h" #include "src/__support/macros/config.h" #if defined(LIBC_HASHTABLE_USE_GETRANDOM) -#include "src/__support/libc_errno.h" -#include "src/sys/random/getrandom.h" +#include "hdr/errno_macros.h" +#include "src/__support/OSUtil/linux/getrandom.h" #endif namespace LIBC_NAMESPACE_DECL { @@ -35,20 +35,18 @@ LIBC_INLINE uint64_t next_random_seed() { entropy[0] = reinterpret_cast(&entropy); entropy[1] = reinterpret_cast(&state); #if defined(LIBC_HASHTABLE_USE_GETRANDOM) - int errno_backup = libc_errno; size_t count = sizeof(entropy); uint8_t *buffer = reinterpret_cast(entropy); while (count > 0) { - ssize_t len = getrandom(buffer, count, 0); - if (len == -1) { - if (libc_errno == ENOSYS) + auto len = internal::getrandom(buffer, count, 0); + if (!len.has_value()) { + if (len.error() == ENOSYS) break; continue; } - count -= len; - buffer += len; + count -= len.value(); + buffer += len.value(); } - libc_errno = errno_backup; #endif state.update(&entropy, sizeof(entropy)); } diff --git a/libc/src/__support/OSUtil/linux/CMakeLists.txt b/libc/src/__support/OSUtil/linux/CMakeLists.txt index 4681d8c2bb73..f303e54ce7b3 100644 --- a/libc/src/__support/OSUtil/linux/CMakeLists.txt +++ b/libc/src/__support/OSUtil/linux/CMakeLists.txt @@ -24,6 +24,19 @@ add_object_library( libc.include.sys_syscall ) +add_header_library( + getrandom + HDRS + getrandom.h + DEPENDS + libc.src.__support.OSUtil.osutil + libc.src.__support.common + libc.src.__support.error_or + libc.src.__support.macros.config + libc.hdr.types.ssize_t + libc.include.sys_syscall +) + add_header_library( vdso_sym HDRS diff --git a/libc/src/__support/OSUtil/linux/getrandom.h b/libc/src/__support/OSUtil/linux/getrandom.h new file mode 100644 index 000000000000..793639472fee --- /dev/null +++ b/libc/src/__support/OSUtil/linux/getrandom.h @@ -0,0 +1,35 @@ +//===------------ Implementation of getrandom function ----------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC___SUPPORT_OSUTIL_GETRANDOM_H +#define LLVM_LIBC_SRC___SUPPORT_OSUTIL_GETRANDOM_H + +#include "hdr/types/ssize_t.h" +#include "src/__support/OSUtil/linux/syscall.h" // syscall_impl +#include "src/__support/common.h" +#include "src/__support/error_or.h" +#include "src/__support/macros/config.h" +#include // For syscall numbers + +namespace LIBC_NAMESPACE_DECL { +namespace internal { + +LIBC_INLINE static ErrorOr getrandom(void *buf, size_t buflen, + unsigned int flags) { + ssize_t ret = + LIBC_NAMESPACE::syscall_impl(SYS_getrandom, buf, buflen, flags); + if (ret < 0) { + return Error(-static_cast(ret)); + } + return ret; +} + +} // namespace internal +} // namespace LIBC_NAMESPACE_DECL + +#endif // LLVM_LIBC_SRC___SUPPORT_OSUTIL_GETRANDOM_H diff --git a/libc/src/sys/random/linux/getrandom.cpp b/libc/src/sys/random/linux/getrandom.cpp index 0b8471ed8b37..4a95bddfa428 100644 --- a/libc/src/sys/random/linux/getrandom.cpp +++ b/libc/src/sys/random/linux/getrandom.cpp @@ -8,24 +8,23 @@ #include "src/sys/random/getrandom.h" +#include "src/__support/OSUtil/linux/getrandom.h" #include "src/__support/OSUtil/syscall.h" // For internal syscall function. #include "src/__support/common.h" - +#include "src/__support/error_or.h" #include "src/__support/libc_errno.h" #include "src/__support/macros/config.h" -#include // For syscall numbers. namespace LIBC_NAMESPACE_DECL { LLVM_LIBC_FUNCTION(ssize_t, getrandom, (void *buf, size_t buflen, unsigned int flags)) { - ssize_t ret = - LIBC_NAMESPACE::syscall_impl(SYS_getrandom, buf, buflen, flags); - if (ret < 0) { - libc_errno = static_cast(-ret); + auto rand = internal::getrandom(buf, buflen, flags); + if (!rand.has_value()) { + libc_errno = static_cast(rand.error()); return -1; } - return ret; + return rand.value(); } } // namespace LIBC_NAMESPACE_DECL -- cgit v1.2.3 From dfe4d44d8de645d151d3483272c1c1f80c27ab31 Mon Sep 17 00:00:00 2001 From: Arthur Eubanks Date: Wed, 18 Jun 2025 11:00:13 -0700 Subject: Revert "[VPlan] Remove unnecessary DomTreeUpdater flush (NFC)." (#144758) This reverts commit 2e337349f436d75af112c081df5ec683871cbcc8. Causes breakages internally, will post reproducer later. --- llvm/lib/Transforms/Vectorize/VPlan.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp index 4332332ef5cc..773a5a4a829c 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp @@ -1015,6 +1015,8 @@ void VPlan::execute(VPTransformState *State) { for (VPBlockBase *Block : RPOT) Block->execute(State); + State->CFG.DTU.flush(); + VPBasicBlock *Header = vputils::getFirstLoopHeader(*this, State->VPDT); if (!Header) return; -- cgit v1.2.3 From 071a6feabd7aeec2c1239719f50f6912cf94d00a Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Wed, 18 Jun 2025 19:02:17 +0100 Subject: [TTI] Remove PPC hasActiveVectorLength impl, simplify interface (NFC). (#142310) PPCTTIImpl defines hasActiveVectorLength and also getVPMemoryOpCost, but they appear unused (i.e. no changes to tests). Remove them, as they complicate the interface for hasActiveVectorLength. This simplifies the only use in LV as now no placeholder values need to be passed. PR: https://github.com/llvm/llvm-project/pull/142310 --- llvm/include/llvm/Analysis/TargetTransformInfo.h | 8 +-- .../llvm/Analysis/TargetTransformInfoImpl.h | 5 +- llvm/lib/Analysis/TargetTransformInfo.cpp | 5 +- llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp | 82 ---------------------- llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h | 6 -- llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp | 2 +- llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h | 13 +--- llvm/lib/Transforms/Vectorize/LoopVectorize.cpp | 4 +- 8 files changed, 11 insertions(+), 114 deletions(-) diff --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h index 8f4ce80ada5e..9dc4eca82492 100644 --- a/llvm/include/llvm/Analysis/TargetTransformInfo.h +++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h @@ -1850,11 +1850,9 @@ public: /// \name Vector Predication Information /// @{ /// Whether the target supports the %evl parameter of VP intrinsic efficiently - /// in hardware, for the given opcode and type/alignment. (see LLVM Language - /// Reference - "Vector Predication Intrinsics"). - /// Use of %evl is discouraged when that is not the case. - LLVM_ABI bool hasActiveVectorLength(unsigned Opcode, Type *DataType, - Align Alignment) const; + /// in hardware. (see LLVM Language Reference - "Vector Predication + /// Intrinsics"). Use of %evl is discouraged when that is not the case. + LLVM_ABI bool hasActiveVectorLength() const; /// Return true if sinking I's operands to the same basic block as I is /// profitable, e.g. because the operands can be folded into a target diff --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h index a80b4c5179ba..d93375218394 100644 --- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h +++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h @@ -1108,10 +1108,7 @@ public: virtual bool enableScalableVectorization() const { return false; } - virtual bool hasActiveVectorLength(unsigned Opcode, Type *DataType, - Align Alignment) const { - return false; - } + virtual bool hasActiveVectorLength() const { return false; } virtual bool isProfitableToSinkOperands(Instruction *I, SmallVectorImpl &Ops) const { diff --git a/llvm/lib/Analysis/TargetTransformInfo.cpp b/llvm/lib/Analysis/TargetTransformInfo.cpp index 2d053e55bdfa..d9cb11de9c09 100644 --- a/llvm/lib/Analysis/TargetTransformInfo.cpp +++ b/llvm/lib/Analysis/TargetTransformInfo.cpp @@ -1454,9 +1454,8 @@ bool TargetTransformInfo::enableScalableVectorization() const { return TTIImpl->enableScalableVectorization(); } -bool TargetTransformInfo::hasActiveVectorLength(unsigned Opcode, Type *DataType, - Align Alignment) const { - return TTIImpl->hasActiveVectorLength(Opcode, DataType, Alignment); +bool TargetTransformInfo::hasActiveVectorLength() const { + return TTIImpl->hasActiveVectorLength(); } bool TargetTransformInfo::isProfitableToSinkOperands( diff --git a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp index f9e77f2abdca..cd9b226ca82d 100644 --- a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp +++ b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp @@ -1027,88 +1027,6 @@ bool PPCTTIImpl::getTgtMemIntrinsic(IntrinsicInst *Inst, return false; } -bool PPCTTIImpl::hasActiveVectorLength(unsigned Opcode, Type *DataType, - Align Alignment) const { - // Only load and stores instructions can have variable vector length on Power. - if (Opcode != Instruction::Load && Opcode != Instruction::Store) - return false; - // Loads/stores with length instructions use bits 0-7 of the GPR operand and - // therefore cannot be used in 32-bit mode. - if ((!ST->hasP9Vector() && !ST->hasP10Vector()) || !ST->isPPC64()) - return false; - if (isa(DataType)) { - unsigned VecWidth = DataType->getPrimitiveSizeInBits(); - return VecWidth == 128; - } - Type *ScalarTy = DataType->getScalarType(); - - if (ScalarTy->isPointerTy()) - return true; - - if (ScalarTy->isFloatTy() || ScalarTy->isDoubleTy()) - return true; - - if (!ScalarTy->isIntegerTy()) - return false; - - unsigned IntWidth = ScalarTy->getIntegerBitWidth(); - return IntWidth == 8 || IntWidth == 16 || IntWidth == 32 || IntWidth == 64; -} - -InstructionCost PPCTTIImpl::getVPMemoryOpCost(unsigned Opcode, Type *Src, - Align Alignment, - unsigned AddressSpace, - TTI::TargetCostKind CostKind, - const Instruction *I) const { - InstructionCost Cost = BaseT::getVPMemoryOpCost(Opcode, Src, Alignment, - AddressSpace, CostKind, I); - if (TLI->getValueType(DL, Src, true) == MVT::Other) - return Cost; - // TODO: Handle other cost kinds. - if (CostKind != TTI::TCK_RecipThroughput) - return Cost; - - assert((Opcode == Instruction::Load || Opcode == Instruction::Store) && - "Invalid Opcode"); - - auto *SrcVTy = dyn_cast(Src); - assert(SrcVTy && "Expected a vector type for VP memory operations"); - - if (hasActiveVectorLength(Opcode, Src, Alignment)) { - std::pair LT = getTypeLegalizationCost(SrcVTy); - - InstructionCost CostFactor = - vectorCostAdjustmentFactor(Opcode, Src, nullptr); - if (!CostFactor.isValid()) - return InstructionCost::getMax(); - - InstructionCost Cost = LT.first * CostFactor; - assert(Cost.isValid() && "Expected valid cost"); - - // On P9 but not on P10, if the op is misaligned then it will cause a - // pipeline flush. Otherwise the VSX masked memops cost the same as unmasked - // ones. - const Align DesiredAlignment(16); - if (Alignment >= DesiredAlignment || ST->getCPUDirective() != PPC::DIR_PWR9) - return Cost; - - // Since alignment may be under estimated, we try to compute the probability - // that the actual address is aligned to the desired boundary. For example - // an 8-byte aligned load is assumed to be actually 16-byte aligned half the - // time, while a 4-byte aligned load has a 25% chance of being 16-byte - // aligned. - float AlignmentProb = ((float)Alignment.value()) / DesiredAlignment.value(); - float MisalignmentProb = 1.0 - AlignmentProb; - return (MisalignmentProb * P9PipelineFlushEstimate) + - (AlignmentProb * Cost.getValue()); - } - - // Usually we should not get to this point, but the following is an attempt to - // model the cost of legalization. Currently we can only lower intrinsics with - // evl but no mask, on Power 9/10. Otherwise, we must scalarize. - return getMaskedMemoryOpCost(Opcode, Src, Alignment, AddressSpace, CostKind); -} - bool PPCTTIImpl::supportsTailCallFor(const CallBase *CB) const { return TLI->supportsTailCallFor(CB); } diff --git a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h index 8618f3064c18..bc5f7a4d06de 100644 --- a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h +++ b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h @@ -148,12 +148,6 @@ public: const Function *Callee) const override; bool areTypesABICompatible(const Function *Caller, const Function *Callee, const ArrayRef &Types) const override; - bool hasActiveVectorLength(unsigned Opcode, Type *DataType, - Align Alignment) const override; - InstructionCost - getVPMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, - unsigned AddressSpace, TTI::TargetCostKind CostKind, - const Instruction *I = nullptr) const override; bool supportsTailCallFor(const CallBase *CB) const override; private: diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp index 46e30ce4c18a..63c5f17a8487 100644 --- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp @@ -282,7 +282,7 @@ RISCVTTIImpl::getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, return TTI::TCC_Free; } -bool RISCVTTIImpl::hasActiveVectorLength(unsigned, Type *DataTy, Align) const { +bool RISCVTTIImpl::hasActiveVectorLength() const { return ST->hasVInstructions(); } diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h index dd7e9f7709f8..75d377abb0e7 100644 --- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h +++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h @@ -90,19 +90,12 @@ public: /// \name EVL Support for predicated vectorization. /// Whether the target supports the %evl parameter of VP intrinsic efficiently - /// in hardware, for the given opcode and type/alignment. (see LLVM Language - /// Reference - "Vector Predication Intrinsics", + /// in hardware. (see LLVM Language Reference - "Vector Predication + /// Intrinsics", /// https://llvm.org/docs/LangRef.html#vector-predication-intrinsics and /// "IR-level VP intrinsics", /// https://llvm.org/docs/Proposals/VectorPredication.html#ir-level-vp-intrinsics). - /// \param Opcode the opcode of the instruction checked for predicated version - /// support. - /// \param DataType the type of the instruction with the \p Opcode checked for - /// prediction support. - /// \param Alignment the alignment for memory access operation checked for - /// predicated version support. - bool hasActiveVectorLength(unsigned Opcode, Type *DataType, - Align Alignment) const override; + bool hasActiveVectorLength() const override; TargetTransformInfo::PopcntSupportKind getPopcntSupport(unsigned TyWidth) const override; diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 2f4416d2782e..3b16248f962b 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -1377,11 +1377,9 @@ public: if (ForceTailFoldingStyle != TailFoldingStyle::DataWithEVL) return; // Override forced styles if needed. - // FIXME: use actual opcode/data type for analysis here. // FIXME: Investigate opportunity for fixed vector factor. bool EVLIsLegal = UserIC <= 1 && IsScalableVF && - TTI.hasActiveVectorLength(0, nullptr, Align()) && - !EnableVPlanNativePath; + TTI.hasActiveVectorLength() && !EnableVPlanNativePath; if (!EVLIsLegal) { // If for some reason EVL mode is unsupported, fallback to // DataWithoutLaneMask to try to vectorize the loop with folded tail -- cgit v1.2.3 From 3f3526f36d23eac8d099e8e887a924c94000bbfa Mon Sep 17 00:00:00 2001 From: zhijian lin Date: Wed, 18 Jun 2025 14:15:30 -0400 Subject: [NFC][PowerPC] pre-commit running the update_llc_test_checks.py for all-atomics.ll,loop-comment.ll etc (#144411) Run the update_llc_test_checks.py for all-atomics.ll,loop-comment.ll ,PR35812-neg-cmpxchg.ll (Pre-commit patch for the https://github.com/llvm/llvm-project/pull/144089) --- llvm/test/CodeGen/PowerPC/PR35812-neg-cmpxchg.ll | 94 +- llvm/test/CodeGen/PowerPC/all-atomics.ll | 1929 +++++++++++----------- llvm/test/CodeGen/PowerPC/loop-comment.ll | 14 +- 3 files changed, 1020 insertions(+), 1017 deletions(-) diff --git a/llvm/test/CodeGen/PowerPC/PR35812-neg-cmpxchg.ll b/llvm/test/CodeGen/PowerPC/PR35812-neg-cmpxchg.ll index 1a8dabc5ad71..dac17dc3225e 100644 --- a/llvm/test/CodeGen/PowerPC/PR35812-neg-cmpxchg.ll +++ b/llvm/test/CodeGen/PowerPC/PR35812-neg-cmpxchg.ll @@ -18,54 +18,54 @@ define signext i32 @main() nounwind { ; CHECK-NEXT: sth 3, 46(1) ; CHECK-NEXT: addi 3, 1, 46 ; CHECK-NEXT: lharx 4, 0, 3 -; CHECK-NEXT: clrlwi 4, 4, 16 -; CHECK-NEXT: cmplwi 4, 33059 -; CHECK-NEXT: bne 0, .LBB0_4 -; CHECK-NEXT: # %bb.1: # %cmpxchg.fencedstore +; CHECK-NEXT: clrlwi 4, 4, 16 +; CHECK-NEXT: cmplwi 4, 33059 +; CHECK-NEXT: bne 0, .LBB0_4 +; CHECK-NEXT: # %bb.1: # %cmpxchg.fencedstore ; CHECK-NEXT: sync ; CHECK-NEXT: li 4, 234 -; CHECK-NEXT: .p2align 5 -; CHECK-NEXT: .LBB0_2: # %cmpxchg.trystore -; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: .p2align 5 +; CHECK-NEXT: .LBB0_2: # %cmpxchg.trystore +; CHECK-NEXT: # ; CHECK-NEXT: sthcx. 4, 0, 3 -; CHECK-NEXT: beq 0, .LBB0_7 -; CHECK-NEXT: # %bb.3: # %cmpxchg.releasedload -; CHECK-NEXT: # in Loop: Header=BB0_2 Depth=1 +; CHECK-NEXT: beq 0, .LBB0_7 +; CHECK-NEXT: # %bb.3: # %cmpxchg.releasedload +; CHECK-NEXT: # ; CHECK-NEXT: lharx 5, 0, 3 -; CHECK-NEXT: clrlwi 5, 5, 16 -; CHECK-NEXT: cmplwi 5, 33059 -; CHECK-NEXT: beq 0, .LBB0_2 -; CHECK-NEXT: .LBB0_4: # %cmpxchg.nostore +; CHECK-NEXT: clrlwi 5, 5, 16 +; CHECK-NEXT: cmplwi 5, 33059 +; CHECK-NEXT: beq 0, .LBB0_2 +; CHECK-NEXT: .LBB0_4: # %cmpxchg.nostore ; CHECK-NEXT: lwsync ; CHECK-NEXT: b .LBB0_8 -; CHECK-NEXT: .LBB0_5: # %L.B0000 +; CHECK-NEXT: .LBB0_5: # %L.B0000 ; CHECK-NEXT: lhz 3, 46(1) -; CHECK-NEXT: cmplwi 3, 234 -; CHECK-NEXT: bne 0, .LBB0_9 -; CHECK-NEXT: # %bb.6: # %L.B0001 +; CHECK-NEXT: cmplwi 3, 234 +; CHECK-NEXT: bne 0, .LBB0_9 +; CHECK-NEXT: # %bb.6: # %L.B0001 ; CHECK-NEXT: addis 3, 2, .L_MergedGlobals@toc@ha ; CHECK-NEXT: addi 3, 3, .L_MergedGlobals@toc@l ; CHECK-NEXT: bl puts ; CHECK-NEXT: nop ; CHECK-NEXT: li 3, 0 ; CHECK-NEXT: b .LBB0_11 -; CHECK-NEXT: .LBB0_7: # %cmpxchg.success +; CHECK-NEXT: .LBB0_7: # %cmpxchg.success ; CHECK-NEXT: lwsync ; CHECK-NEXT: b .LBB0_5 -; CHECK-NEXT: .LBB0_8: # %L.B0003 +; CHECK-NEXT: .LBB0_8: # %L.B0003 ; CHECK-NEXT: addis 3, 2, .L_MergedGlobals@toc@ha ; CHECK-NEXT: addi 3, 3, .L_MergedGlobals@toc@l ; CHECK-NEXT: addi 3, 3, 16 ; CHECK-NEXT: b .LBB0_10 -; CHECK-NEXT: .LBB0_9: # %L.B0005 +; CHECK-NEXT: .LBB0_9: # %L.B0005 ; CHECK-NEXT: addis 3, 2, .L_MergedGlobals@toc@ha ; CHECK-NEXT: addi 3, 3, .L_MergedGlobals@toc@l ; CHECK-NEXT: addi 3, 3, 64 -; CHECK-NEXT: .LBB0_10: # %L.B0003 +; CHECK-NEXT: .LBB0_10: # %L.B0003 ; CHECK-NEXT: bl puts ; CHECK-NEXT: nop ; CHECK-NEXT: li 3, 1 -; CHECK-NEXT: .LBB0_11: # %L.B0003 +; CHECK-NEXT: .LBB0_11: # %L.B0003 ; CHECK-NEXT: addi 1, 1, 48 ; CHECK-NEXT: ld 0, 16(1) ; CHECK-NEXT: mtlr 0 @@ -83,62 +83,62 @@ define signext i32 @main() nounwind { ; CHECK-P7-NEXT: rlwinm 4, 4, 3, 27, 27 ; CHECK-P7-NEXT: lwarx 5, 0, 3 ; CHECK-P7-NEXT: srw 6, 5, 4 -; CHECK-P7-NEXT: clrlwi 6, 6, 16 -; CHECK-P7-NEXT: cmplwi 6, 33059 -; CHECK-P7-NEXT: bne 0, .LBB0_4 -; CHECK-P7-NEXT: # %bb.1: # %cmpxchg.fencedstore +; CHECK-P7-NEXT: clrlwi 6, 6, 16 +; CHECK-P7-NEXT: cmplwi 6, 33059 +; CHECK-P7-NEXT: bne 0, .LBB0_4 +; CHECK-P7-NEXT: # %bb.1: # %cmpxchg.fencedstore ; CHECK-P7-NEXT: lis 6, 0 ; CHECK-P7-NEXT: li 7, 234 ; CHECK-P7-NEXT: sync ; CHECK-P7-NEXT: ori 6, 6, 65535 ; CHECK-P7-NEXT: slw 7, 7, 4 ; CHECK-P7-NEXT: slw 6, 6, 4 -; CHECK-P7-NEXT: not 6, 6 -; CHECK-P7-NEXT: .p2align 4 -; CHECK-P7-NEXT: .LBB0_2: # %cmpxchg.trystore -; CHECK-P7-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-P7-NEXT: not 6, 6 +; CHECK-P7-NEXT: .p2align 4 +; CHECK-P7-NEXT: .LBB0_2: # %cmpxchg.trystore +; CHECK-P7-NEXT: # ; CHECK-P7-NEXT: and 5, 5, 6 ; CHECK-P7-NEXT: or 5, 5, 7 ; CHECK-P7-NEXT: stwcx. 5, 0, 3 -; CHECK-P7-NEXT: beq 0, .LBB0_7 -; CHECK-P7-NEXT: # %bb.3: # %cmpxchg.releasedload -; CHECK-P7-NEXT: # in Loop: Header=BB0_2 Depth=1 +; CHECK-P7-NEXT: beq 0, .LBB0_7 +; CHECK-P7-NEXT: # %bb.3: # %cmpxchg.releasedload +; CHECK-P7-NEXT: # ; CHECK-P7-NEXT: lwarx 5, 0, 3 ; CHECK-P7-NEXT: srw 8, 5, 4 -; CHECK-P7-NEXT: clrlwi 8, 8, 16 -; CHECK-P7-NEXT: cmplwi 8, 33059 -; CHECK-P7-NEXT: beq 0, .LBB0_2 -; CHECK-P7-NEXT: .LBB0_4: # %cmpxchg.nostore +; CHECK-P7-NEXT: clrlwi 8, 8, 16 +; CHECK-P7-NEXT: cmplwi 8, 33059 +; CHECK-P7-NEXT: beq 0, .LBB0_2 +; CHECK-P7-NEXT: .LBB0_4: # %cmpxchg.nostore ; CHECK-P7-NEXT: lwsync ; CHECK-P7-NEXT: b .LBB0_8 -; CHECK-P7-NEXT: .LBB0_5: # %L.B0000 +; CHECK-P7-NEXT: .LBB0_5: # %L.B0000 ; CHECK-P7-NEXT: lhz 3, 46(1) -; CHECK-P7-NEXT: cmplwi 3, 234 -; CHECK-P7-NEXT: bne 0, .LBB0_9 -; CHECK-P7-NEXT: # %bb.6: # %L.B0001 +; CHECK-P7-NEXT: cmplwi 3, 234 +; CHECK-P7-NEXT: bne 0, .LBB0_9 +; CHECK-P7-NEXT: # %bb.6: # %L.B0001 ; CHECK-P7-NEXT: addis 3, 2, .L_MergedGlobals@toc@ha ; CHECK-P7-NEXT: addi 3, 3, .L_MergedGlobals@toc@l ; CHECK-P7-NEXT: bl puts ; CHECK-P7-NEXT: nop ; CHECK-P7-NEXT: li 3, 0 ; CHECK-P7-NEXT: b .LBB0_11 -; CHECK-P7-NEXT: .LBB0_7: # %cmpxchg.success +; CHECK-P7-NEXT: .LBB0_7: # %cmpxchg.success ; CHECK-P7-NEXT: lwsync ; CHECK-P7-NEXT: b .LBB0_5 -; CHECK-P7-NEXT: .LBB0_8: # %L.B0003 +; CHECK-P7-NEXT: .LBB0_8: # %L.B0003 ; CHECK-P7-NEXT: addis 3, 2, .L_MergedGlobals@toc@ha ; CHECK-P7-NEXT: addi 3, 3, .L_MergedGlobals@toc@l ; CHECK-P7-NEXT: addi 3, 3, 16 ; CHECK-P7-NEXT: b .LBB0_10 -; CHECK-P7-NEXT: .LBB0_9: # %L.B0005 +; CHECK-P7-NEXT: .LBB0_9: # %L.B0005 ; CHECK-P7-NEXT: addis 3, 2, .L_MergedGlobals@toc@ha ; CHECK-P7-NEXT: addi 3, 3, .L_MergedGlobals@toc@l ; CHECK-P7-NEXT: addi 3, 3, 64 -; CHECK-P7-NEXT: .LBB0_10: # %L.B0003 +; CHECK-P7-NEXT: .LBB0_10: # %L.B0003 ; CHECK-P7-NEXT: bl puts ; CHECK-P7-NEXT: nop ; CHECK-P7-NEXT: li 3, 1 -; CHECK-P7-NEXT: .LBB0_11: # %L.B0003 +; CHECK-P7-NEXT: .LBB0_11: # %L.B0003 ; CHECK-P7-NEXT: addi 1, 1, 48 ; CHECK-P7-NEXT: ld 0, 16(1) ; CHECK-P7-NEXT: mtlr 0 diff --git a/llvm/test/CodeGen/PowerPC/all-atomics.ll b/llvm/test/CodeGen/PowerPC/all-atomics.ll index 67cee358882f..5e14fbbb6ad6 100644 --- a/llvm/test/CodeGen/PowerPC/all-atomics.ll +++ b/llvm/test/CodeGen/PowerPC/all-atomics.ll @@ -4336,959 +4336,959 @@ entry: define dso_local void @test_compare_and_swap() local_unnamed_addr #0 { ; CHECK-LABEL: test_compare_and_swap: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addis 4, 2, sc@toc@ha -; CHECK-NEXT: addis 3, 2, uc@toc@ha -; CHECK-NEXT: std 27, -40(1) # 8-byte Folded Spill -; CHECK-NEXT: std 28, -32(1) # 8-byte Folded Spill -; CHECK-NEXT: std 29, -24(1) # 8-byte Folded Spill -; CHECK-NEXT: std 30, -16(1) # 8-byte Folded Spill -; CHECK-NEXT: addi 6, 4, sc@toc@l -; CHECK-NEXT: lbz 7, uc@toc@l(3) -; CHECK-NEXT: lbz 8, sc@toc@l(4) -; CHECK-NEXT: lbarx 5, 0, 6 -; CHECK-NEXT: clrlwi 9, 5, 24 -; CHECK-NEXT: cmplw 9, 7 -; CHECK-NEXT: bne 0, .LBB3_4 -; CHECK-NEXT: # %bb.1: # %cmpxchg.fencedstore276 -; CHECK-NEXT: sync -; CHECK-NEXT: .p2align 5 -; CHECK-NEXT: .LBB3_2: # %cmpxchg.trystore275 -; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: stbcx. 8, 0, 6 -; CHECK-NEXT: beq 0, .LBB3_4 -; CHECK-NEXT: # %bb.3: # %cmpxchg.releasedload274 -; CHECK-NEXT: # in Loop: Header=BB3_2 Depth=1 -; CHECK-NEXT: lbarx 5, 0, 6 -; CHECK-NEXT: clrlwi 9, 5, 24 -; CHECK-NEXT: cmplw 9, 7 -; CHECK-NEXT: beq 0, .LBB3_2 -; CHECK-NEXT: .LBB3_4: # %cmpxchg.nostore272 -; CHECK-NEXT: addi 7, 3, uc@toc@l -; CHECK-NEXT: lwsync -; CHECK-NEXT: stb 5, sc@toc@l(4) -; CHECK-NEXT: lbz 9, uc@toc@l(3) -; CHECK-NEXT: lbarx 8, 0, 7 -; CHECK-NEXT: clrlwi 10, 8, 24 -; CHECK-NEXT: cmplw 10, 9 -; CHECK-NEXT: bne 0, .LBB3_8 -; CHECK-NEXT: # %bb.5: # %cmpxchg.fencedstore257 -; CHECK-NEXT: sync -; CHECK-NEXT: clrlwi 5, 5, 24 -; CHECK-NEXT: .p2align 5 -; CHECK-NEXT: .LBB3_6: # %cmpxchg.trystore256 -; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: stbcx. 5, 0, 7 -; CHECK-NEXT: beq 0, .LBB3_8 -; CHECK-NEXT: # %bb.7: # %cmpxchg.releasedload255 -; CHECK-NEXT: # in Loop: Header=BB3_6 Depth=1 -; CHECK-NEXT: lbarx 8, 0, 7 -; CHECK-NEXT: clrlwi 10, 8, 24 -; CHECK-NEXT: cmplw 10, 9 -; CHECK-NEXT: beq 0, .LBB3_6 -; CHECK-NEXT: .LBB3_8: # %cmpxchg.nostore253 -; CHECK-NEXT: addis 5, 2, ss@toc@ha -; CHECK-NEXT: lwsync -; CHECK-NEXT: stb 8, uc@toc@l(3) -; CHECK-NEXT: clrlwi 10, 8, 24 -; CHECK-NEXT: lbz 11, sc@toc@l(4) -; CHECK-NEXT: addi 8, 5, ss@toc@l -; CHECK-NEXT: lharx 9, 0, 8 -; CHECK-NEXT: clrlwi 12, 9, 16 -; CHECK-NEXT: cmplw 12, 10 -; CHECK-NEXT: bne 0, .LBB3_12 -; CHECK-NEXT: # %bb.9: # %cmpxchg.fencedstore238 -; CHECK-NEXT: extsb 11, 11 -; CHECK-NEXT: sync -; CHECK-NEXT: clrlwi 11, 11, 16 -; CHECK-NEXT: .p2align 5 -; CHECK-NEXT: .LBB3_10: # %cmpxchg.trystore237 -; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: sthcx. 11, 0, 8 -; CHECK-NEXT: beq 0, .LBB3_12 -; CHECK-NEXT: # %bb.11: # %cmpxchg.releasedload236 -; CHECK-NEXT: # in Loop: Header=BB3_10 Depth=1 -; CHECK-NEXT: lharx 9, 0, 8 -; CHECK-NEXT: clrlwi 12, 9, 16 -; CHECK-NEXT: cmplw 12, 10 -; CHECK-NEXT: beq 0, .LBB3_10 -; CHECK-NEXT: .LBB3_12: # %cmpxchg.nostore234 -; CHECK-NEXT: lwsync -; CHECK-NEXT: sth 9, ss@toc@l(5) -; CHECK-NEXT: addis 5, 2, us@toc@ha -; CHECK-NEXT: lbz 11, uc@toc@l(3) -; CHECK-NEXT: lbz 12, sc@toc@l(4) -; CHECK-NEXT: addi 9, 5, us@toc@l -; CHECK-NEXT: lharx 10, 0, 9 -; CHECK-NEXT: clrlwi 0, 10, 16 -; CHECK-NEXT: cmplw 0, 11 -; CHECK-NEXT: bne 0, .LBB3_16 -; CHECK-NEXT: # %bb.13: # %cmpxchg.fencedstore219 -; CHECK-NEXT: extsb 12, 12 -; CHECK-NEXT: sync -; CHECK-NEXT: clrlwi 12, 12, 16 -; CHECK-NEXT: .p2align 5 -; CHECK-NEXT: .LBB3_14: # %cmpxchg.trystore218 -; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: sthcx. 12, 0, 9 -; CHECK-NEXT: beq 0, .LBB3_16 -; CHECK-NEXT: # %bb.15: # %cmpxchg.releasedload217 -; CHECK-NEXT: # in Loop: Header=BB3_14 Depth=1 -; CHECK-NEXT: lharx 10, 0, 9 -; CHECK-NEXT: clrlwi 0, 10, 16 -; CHECK-NEXT: cmplw 0, 11 -; CHECK-NEXT: beq 0, .LBB3_14 -; CHECK-NEXT: .LBB3_16: # %cmpxchg.nostore215 -; CHECK-NEXT: lwsync -; CHECK-NEXT: sth 10, us@toc@l(5) -; CHECK-NEXT: addis 5, 2, si@toc@ha -; CHECK-NEXT: lbz 12, uc@toc@l(3) -; CHECK-NEXT: lbz 0, sc@toc@l(4) -; CHECK-NEXT: addi 10, 5, si@toc@l -; CHECK-NEXT: lwarx 11, 0, 10 -; CHECK-NEXT: cmplw 11, 12 -; CHECK-NEXT: bne 0, .LBB3_20 -; CHECK-NEXT: # %bb.17: # %cmpxchg.fencedstore200 -; CHECK-NEXT: extsb 0, 0 -; CHECK-NEXT: sync -; CHECK-NEXT: .p2align 5 -; CHECK-NEXT: .LBB3_18: # %cmpxchg.trystore199 -; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: stwcx. 0, 0, 10 -; CHECK-NEXT: beq 0, .LBB3_20 -; CHECK-NEXT: # %bb.19: # %cmpxchg.releasedload198 -; CHECK-NEXT: # in Loop: Header=BB3_18 Depth=1 -; CHECK-NEXT: lwarx 11, 0, 10 -; CHECK-NEXT: cmplw 11, 12 -; CHECK-NEXT: beq 0, .LBB3_18 -; CHECK-NEXT: .LBB3_20: # %cmpxchg.nostore196 -; CHECK-NEXT: lwsync -; CHECK-NEXT: stw 11, si@toc@l(5) -; CHECK-NEXT: addis 5, 2, ui@toc@ha -; CHECK-NEXT: lbz 0, uc@toc@l(3) -; CHECK-NEXT: lbz 30, sc@toc@l(4) -; CHECK-NEXT: addi 11, 5, ui@toc@l -; CHECK-NEXT: lwarx 12, 0, 11 -; CHECK-NEXT: cmplw 12, 0 -; CHECK-NEXT: bne 0, .LBB3_24 -; CHECK-NEXT: # %bb.21: # %cmpxchg.fencedstore181 -; CHECK-NEXT: extsb 30, 30 -; CHECK-NEXT: sync -; CHECK-NEXT: .p2align 5 -; CHECK-NEXT: .LBB3_22: # %cmpxchg.trystore180 -; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: stwcx. 30, 0, 11 -; CHECK-NEXT: beq 0, .LBB3_24 -; CHECK-NEXT: # %bb.23: # %cmpxchg.releasedload179 -; CHECK-NEXT: # in Loop: Header=BB3_22 Depth=1 -; CHECK-NEXT: lwarx 12, 0, 11 -; CHECK-NEXT: cmplw 12, 0 -; CHECK-NEXT: beq 0, .LBB3_22 -; CHECK-NEXT: .LBB3_24: # %cmpxchg.nostore177 -; CHECK-NEXT: addis 30, 2, sll@toc@ha -; CHECK-NEXT: lwsync -; CHECK-NEXT: stw 12, ui@toc@l(5) -; CHECK-NEXT: lbz 29, uc@toc@l(3) -; CHECK-NEXT: lbz 28, sc@toc@l(4) -; CHECK-NEXT: addi 12, 30, sll@toc@l -; CHECK-NEXT: ldarx 0, 0, 12 -; CHECK-NEXT: cmpld 0, 29 -; CHECK-NEXT: bne 0, .LBB3_28 -; CHECK-NEXT: # %bb.25: # %cmpxchg.fencedstore162 -; CHECK-NEXT: extsb 28, 28 -; CHECK-NEXT: sync -; CHECK-NEXT: .p2align 5 -; CHECK-NEXT: .LBB3_26: # %cmpxchg.trystore161 -; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: stdcx. 28, 0, 12 -; CHECK-NEXT: beq 0, .LBB3_28 -; CHECK-NEXT: # %bb.27: # %cmpxchg.releasedload160 -; CHECK-NEXT: # in Loop: Header=BB3_26 Depth=1 -; CHECK-NEXT: ldarx 0, 0, 12 -; CHECK-NEXT: cmpld 0, 29 -; CHECK-NEXT: beq 0, .LBB3_26 -; CHECK-NEXT: .LBB3_28: # %cmpxchg.nostore158 -; CHECK-NEXT: lwsync -; CHECK-NEXT: std 0, sll@toc@l(30) -; CHECK-NEXT: addis 30, 2, ull@toc@ha -; CHECK-NEXT: lbz 28, uc@toc@l(3) -; CHECK-NEXT: lbz 27, sc@toc@l(4) -; CHECK-NEXT: addi 0, 30, ull@toc@l -; CHECK-NEXT: ldarx 29, 0, 0 -; CHECK-NEXT: cmpld 29, 28 -; CHECK-NEXT: bne 0, .LBB3_32 -; CHECK-NEXT: # %bb.29: # %cmpxchg.fencedstore143 -; CHECK-NEXT: extsb 27, 27 -; CHECK-NEXT: sync -; CHECK-NEXT: .p2align 5 -; CHECK-NEXT: .LBB3_30: # %cmpxchg.trystore142 -; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: stdcx. 27, 0, 0 -; CHECK-NEXT: beq 0, .LBB3_32 -; CHECK-NEXT: # %bb.31: # %cmpxchg.releasedload141 -; CHECK-NEXT: # in Loop: Header=BB3_30 Depth=1 -; CHECK-NEXT: ldarx 29, 0, 0 -; CHECK-NEXT: cmpld 29, 28 -; CHECK-NEXT: beq 0, .LBB3_30 -; CHECK-NEXT: .LBB3_32: # %cmpxchg.nostore139 -; CHECK-NEXT: lwsync -; CHECK-NEXT: std 29, ull@toc@l(30) -; CHECK-NEXT: lbz 30, uc@toc@l(3) -; CHECK-NEXT: lbz 29, sc@toc@l(4) -; CHECK-NEXT: lbarx 28, 0, 6 -; CHECK-NEXT: clrlwi 28, 28, 24 -; CHECK-NEXT: cmplw 28, 30 -; CHECK-NEXT: bne 0, .LBB3_36 -; CHECK-NEXT: # %bb.33: # %cmpxchg.fencedstore124 -; CHECK-NEXT: sync -; CHECK-NEXT: .p2align 5 -; CHECK-NEXT: .LBB3_34: # %cmpxchg.trystore123 -; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: stbcx. 29, 0, 6 -; CHECK-NEXT: beq 0, .LBB3_37 -; CHECK-NEXT: # %bb.35: # %cmpxchg.releasedload122 -; CHECK-NEXT: # in Loop: Header=BB3_34 Depth=1 -; CHECK-NEXT: lbarx 28, 0, 6 -; CHECK-NEXT: clrlwi 28, 28, 24 -; CHECK-NEXT: cmplw 28, 30 -; CHECK-NEXT: beq 0, .LBB3_34 -; CHECK-NEXT: .LBB3_36: # %cmpxchg.nostore120 -; CHECK-NEXT: lwsync -; CHECK-NEXT: crxor 20, 20, 20 -; CHECK-NEXT: b .LBB3_38 -; CHECK-NEXT: .LBB3_37: # %cmpxchg.success121 -; CHECK-NEXT: lwsync -; CHECK-NEXT: creqv 20, 20, 20 -; CHECK-NEXT: .LBB3_38: # %cmpxchg.end118 -; CHECK-NEXT: li 6, 0 -; CHECK-NEXT: li 30, 1 -; CHECK-NEXT: isel 6, 30, 6, 20 -; CHECK-NEXT: lbz 30, sc@toc@l(4) -; CHECK-NEXT: stw 6, ui@toc@l(5) -; CHECK-NEXT: lbz 6, uc@toc@l(3) -; CHECK-NEXT: lbarx 29, 0, 7 -; CHECK-NEXT: clrlwi 29, 29, 24 -; CHECK-NEXT: cmplw 29, 6 -; CHECK-NEXT: bne 0, .LBB3_42 -; CHECK-NEXT: # %bb.39: # %cmpxchg.fencedstore105 -; CHECK-NEXT: sync -; CHECK-NEXT: .p2align 5 -; CHECK-NEXT: .LBB3_40: # %cmpxchg.trystore104 -; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: stbcx. 30, 0, 7 -; CHECK-NEXT: beq 0, .LBB3_43 -; CHECK-NEXT: # %bb.41: # %cmpxchg.releasedload103 -; CHECK-NEXT: # in Loop: Header=BB3_40 Depth=1 -; CHECK-NEXT: lbarx 29, 0, 7 -; CHECK-NEXT: clrlwi 29, 29, 24 -; CHECK-NEXT: cmplw 29, 6 -; CHECK-NEXT: beq 0, .LBB3_40 -; CHECK-NEXT: .LBB3_42: # %cmpxchg.nostore101 -; CHECK-NEXT: lwsync -; CHECK-NEXT: crxor 20, 20, 20 -; CHECK-NEXT: b .LBB3_44 -; CHECK-NEXT: .LBB3_43: # %cmpxchg.success102 -; CHECK-NEXT: lwsync -; CHECK-NEXT: creqv 20, 20, 20 -; CHECK-NEXT: .LBB3_44: # %cmpxchg.end99 -; CHECK-NEXT: li 6, 0 -; CHECK-NEXT: li 7, 1 -; CHECK-NEXT: isel 6, 7, 6, 20 -; CHECK-NEXT: lbz 7, sc@toc@l(4) -; CHECK-NEXT: stw 6, ui@toc@l(5) -; CHECK-NEXT: lbz 6, uc@toc@l(3) -; CHECK-NEXT: lharx 30, 0, 8 -; CHECK-NEXT: clrlwi 30, 30, 16 -; CHECK-NEXT: cmplw 30, 6 -; CHECK-NEXT: bne 0, .LBB3_48 -; CHECK-NEXT: # %bb.45: # %cmpxchg.fencedstore86 -; CHECK-NEXT: extsb 7, 7 -; CHECK-NEXT: sync -; CHECK-NEXT: clrlwi 7, 7, 16 -; CHECK-NEXT: .p2align 5 -; CHECK-NEXT: .LBB3_46: # %cmpxchg.trystore85 -; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: sthcx. 7, 0, 8 -; CHECK-NEXT: beq 0, .LBB3_49 -; CHECK-NEXT: # %bb.47: # %cmpxchg.releasedload84 -; CHECK-NEXT: # in Loop: Header=BB3_46 Depth=1 -; CHECK-NEXT: lharx 30, 0, 8 -; CHECK-NEXT: clrlwi 30, 30, 16 -; CHECK-NEXT: cmplw 30, 6 -; CHECK-NEXT: beq 0, .LBB3_46 -; CHECK-NEXT: .LBB3_48: # %cmpxchg.nostore82 -; CHECK-NEXT: lwsync -; CHECK-NEXT: crxor 20, 20, 20 -; CHECK-NEXT: b .LBB3_50 -; CHECK-NEXT: .LBB3_49: # %cmpxchg.success83 -; CHECK-NEXT: lwsync -; CHECK-NEXT: creqv 20, 20, 20 -; CHECK-NEXT: .LBB3_50: # %cmpxchg.end80 -; CHECK-NEXT: li 6, 0 -; CHECK-NEXT: li 7, 1 -; CHECK-NEXT: isel 6, 7, 6, 20 -; CHECK-NEXT: lbz 7, sc@toc@l(4) -; CHECK-NEXT: stw 6, ui@toc@l(5) -; CHECK-NEXT: lbz 6, uc@toc@l(3) -; CHECK-NEXT: lharx 8, 0, 9 -; CHECK-NEXT: clrlwi 8, 8, 16 -; CHECK-NEXT: cmplw 8, 6 -; CHECK-NEXT: bne 0, .LBB3_54 -; CHECK-NEXT: # %bb.51: # %cmpxchg.fencedstore67 -; CHECK-NEXT: extsb 7, 7 -; CHECK-NEXT: sync -; CHECK-NEXT: clrlwi 7, 7, 16 -; CHECK-NEXT: .p2align 5 -; CHECK-NEXT: .LBB3_52: # %cmpxchg.trystore66 -; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: sthcx. 7, 0, 9 -; CHECK-NEXT: beq 0, .LBB3_55 -; CHECK-NEXT: # %bb.53: # %cmpxchg.releasedload65 -; CHECK-NEXT: # in Loop: Header=BB3_52 Depth=1 -; CHECK-NEXT: lharx 8, 0, 9 -; CHECK-NEXT: clrlwi 8, 8, 16 -; CHECK-NEXT: cmplw 8, 6 -; CHECK-NEXT: beq 0, .LBB3_52 -; CHECK-NEXT: .LBB3_54: # %cmpxchg.nostore63 -; CHECK-NEXT: lwsync -; CHECK-NEXT: crxor 20, 20, 20 -; CHECK-NEXT: b .LBB3_56 -; CHECK-NEXT: .LBB3_55: # %cmpxchg.success64 -; CHECK-NEXT: lwsync -; CHECK-NEXT: creqv 20, 20, 20 -; CHECK-NEXT: .LBB3_56: # %cmpxchg.end61 -; CHECK-NEXT: li 6, 0 -; CHECK-NEXT: li 7, 1 -; CHECK-NEXT: isel 6, 7, 6, 20 -; CHECK-NEXT: lbz 7, sc@toc@l(4) -; CHECK-NEXT: stw 6, ui@toc@l(5) -; CHECK-NEXT: lbz 6, uc@toc@l(3) -; CHECK-NEXT: lwarx 8, 0, 10 -; CHECK-NEXT: cmplw 8, 6 -; CHECK-NEXT: bne 0, .LBB3_60 -; CHECK-NEXT: # %bb.57: # %cmpxchg.fencedstore48 -; CHECK-NEXT: extsb 7, 7 -; CHECK-NEXT: sync -; CHECK-NEXT: .p2align 5 -; CHECK-NEXT: .LBB3_58: # %cmpxchg.trystore47 -; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: stwcx. 7, 0, 10 -; CHECK-NEXT: beq 0, .LBB3_61 -; CHECK-NEXT: # %bb.59: # %cmpxchg.releasedload46 -; CHECK-NEXT: # in Loop: Header=BB3_58 Depth=1 -; CHECK-NEXT: lwarx 8, 0, 10 -; CHECK-NEXT: cmplw 8, 6 -; CHECK-NEXT: beq 0, .LBB3_58 -; CHECK-NEXT: .LBB3_60: # %cmpxchg.nostore44 -; CHECK-NEXT: lwsync -; CHECK-NEXT: crxor 20, 20, 20 -; CHECK-NEXT: b .LBB3_62 -; CHECK-NEXT: .LBB3_61: # %cmpxchg.success45 -; CHECK-NEXT: lwsync -; CHECK-NEXT: creqv 20, 20, 20 -; CHECK-NEXT: .LBB3_62: # %cmpxchg.end42 -; CHECK-NEXT: li 6, 0 -; CHECK-NEXT: li 7, 1 -; CHECK-NEXT: isel 6, 7, 6, 20 -; CHECK-NEXT: lbz 7, sc@toc@l(4) -; CHECK-NEXT: stw 6, ui@toc@l(5) -; CHECK-NEXT: lbz 6, uc@toc@l(3) -; CHECK-NEXT: lwarx 8, 0, 11 -; CHECK-NEXT: cmplw 8, 6 -; CHECK-NEXT: bne 0, .LBB3_66 -; CHECK-NEXT: # %bb.63: # %cmpxchg.fencedstore29 -; CHECK-NEXT: extsb 7, 7 -; CHECK-NEXT: sync -; CHECK-NEXT: .p2align 5 -; CHECK-NEXT: .LBB3_64: # %cmpxchg.trystore28 -; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: stwcx. 7, 0, 11 -; CHECK-NEXT: beq 0, .LBB3_67 -; CHECK-NEXT: # %bb.65: # %cmpxchg.releasedload27 -; CHECK-NEXT: # in Loop: Header=BB3_64 Depth=1 -; CHECK-NEXT: lwarx 8, 0, 11 -; CHECK-NEXT: cmplw 8, 6 -; CHECK-NEXT: beq 0, .LBB3_64 -; CHECK-NEXT: .LBB3_66: # %cmpxchg.nostore25 -; CHECK-NEXT: lwsync -; CHECK-NEXT: crxor 20, 20, 20 -; CHECK-NEXT: b .LBB3_68 -; CHECK-NEXT: .LBB3_67: # %cmpxchg.success26 -; CHECK-NEXT: lwsync -; CHECK-NEXT: creqv 20, 20, 20 -; CHECK-NEXT: .LBB3_68: # %cmpxchg.end23 -; CHECK-NEXT: li 6, 0 -; CHECK-NEXT: li 7, 1 -; CHECK-NEXT: isel 6, 7, 6, 20 -; CHECK-NEXT: lbz 7, sc@toc@l(4) -; CHECK-NEXT: stw 6, ui@toc@l(5) -; CHECK-NEXT: lbz 6, uc@toc@l(3) -; CHECK-NEXT: ldarx 8, 0, 12 -; CHECK-NEXT: cmpld 8, 6 -; CHECK-NEXT: bne 0, .LBB3_72 -; CHECK-NEXT: # %bb.69: # %cmpxchg.fencedstore10 -; CHECK-NEXT: extsb 7, 7 -; CHECK-NEXT: sync -; CHECK-NEXT: .p2align 5 -; CHECK-NEXT: .LBB3_70: # %cmpxchg.trystore9 -; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: stdcx. 7, 0, 12 -; CHECK-NEXT: beq 0, .LBB3_73 -; CHECK-NEXT: # %bb.71: # %cmpxchg.releasedload8 -; CHECK-NEXT: # in Loop: Header=BB3_70 Depth=1 -; CHECK-NEXT: ldarx 8, 0, 12 -; CHECK-NEXT: cmpld 8, 6 -; CHECK-NEXT: beq 0, .LBB3_70 -; CHECK-NEXT: .LBB3_72: # %cmpxchg.nostore6 -; CHECK-NEXT: lwsync -; CHECK-NEXT: crxor 20, 20, 20 -; CHECK-NEXT: b .LBB3_74 -; CHECK-NEXT: .LBB3_73: # %cmpxchg.success7 -; CHECK-NEXT: lwsync -; CHECK-NEXT: creqv 20, 20, 20 -; CHECK-NEXT: .LBB3_74: # %cmpxchg.end4 -; CHECK-NEXT: li 6, 0 -; CHECK-NEXT: li 7, 1 -; CHECK-NEXT: lbz 3, uc@toc@l(3) -; CHECK-NEXT: lbz 4, sc@toc@l(4) -; CHECK-NEXT: isel 6, 7, 6, 20 -; CHECK-NEXT: stw 6, ui@toc@l(5) -; CHECK-NEXT: ldarx 6, 0, 0 -; CHECK-NEXT: cmpld 6, 3 -; CHECK-NEXT: bne 0, .LBB3_78 -; CHECK-NEXT: # %bb.75: # %cmpxchg.fencedstore -; CHECK-NEXT: extsb 4, 4 -; CHECK-NEXT: sync -; CHECK-NEXT: .p2align 5 -; CHECK-NEXT: .LBB3_76: # %cmpxchg.trystore -; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: stdcx. 4, 0, 0 -; CHECK-NEXT: beq 0, .LBB3_79 -; CHECK-NEXT: # %bb.77: # %cmpxchg.releasedload -; CHECK-NEXT: # in Loop: Header=BB3_76 Depth=1 -; CHECK-NEXT: ldarx 6, 0, 0 -; CHECK-NEXT: cmpld 6, 3 -; CHECK-NEXT: beq 0, .LBB3_76 -; CHECK-NEXT: .LBB3_78: # %cmpxchg.nostore -; CHECK-NEXT: lwsync -; CHECK-NEXT: crxor 20, 20, 20 -; CHECK-NEXT: b .LBB3_80 -; CHECK-NEXT: .LBB3_79: # %cmpxchg.success -; CHECK-NEXT: lwsync -; CHECK-NEXT: creqv 20, 20, 20 -; CHECK-NEXT: .LBB3_80: # %cmpxchg.end -; CHECK-NEXT: li 3, 0 -; CHECK-NEXT: li 4, 1 -; CHECK-NEXT: ld 30, -16(1) # 8-byte Folded Reload -; CHECK-NEXT: ld 29, -24(1) # 8-byte Folded Reload -; CHECK-NEXT: ld 28, -32(1) # 8-byte Folded Reload -; CHECK-NEXT: ld 27, -40(1) # 8-byte Folded Reload -; CHECK-NEXT: isel 3, 4, 3, 20 -; CHECK-NEXT: stw 3, ui@toc@l(5) -; CHECK-NEXT: blr +; CHECK-NEXT: addis 4, 2, sc@toc@ha +; CHECK-NEXT: addis 3, 2, uc@toc@ha +; CHECK-NEXT: std 27, -40(1) # 8-byte Folded Spill +; CHECK-NEXT: std 28, -32(1) # 8-byte Folded Spill +; CHECK-NEXT: std 29, -24(1) # 8-byte Folded Spill +; CHECK-NEXT: std 30, -16(1) # 8-byte Folded Spill +; CHECK-NEXT: addi 6, 4, sc@toc@l +; CHECK-NEXT: lbz 7, uc@toc@l(3) +; CHECK-NEXT: lbz 8, sc@toc@l(4) +; CHECK-NEXT: lbarx 5, 0, 6 +; CHECK-NEXT: clrlwi 9, 5, 24 +; CHECK-NEXT: cmplw 9, 7 +; CHECK-NEXT: bne 0, .LBB3_4 +; CHECK-NEXT: # %bb.1: # %cmpxchg.fencedstore276 +; CHECK-NEXT: sync +; CHECK-NEXT: .p2align 5 +; CHECK-NEXT: .LBB3_2: # %cmpxchg.trystore275 +; CHECK-NEXT: # +; CHECK-NEXT: stbcx. 8, 0, 6 +; CHECK-NEXT: beq 0, .LBB3_4 +; CHECK-NEXT: # %bb.3: # %cmpxchg.releasedload274 +; CHECK-NEXT: # +; CHECK-NEXT: lbarx 5, 0, 6 +; CHECK-NEXT: clrlwi 9, 5, 24 +; CHECK-NEXT: cmplw 9, 7 +; CHECK-NEXT: beq 0, .LBB3_2 +; CHECK-NEXT: .LBB3_4: # %cmpxchg.nostore272 +; CHECK-NEXT: addi 7, 3, uc@toc@l +; CHECK-NEXT: lwsync +; CHECK-NEXT: stb 5, sc@toc@l(4) +; CHECK-NEXT: lbz 9, uc@toc@l(3) +; CHECK-NEXT: lbarx 8, 0, 7 +; CHECK-NEXT: clrlwi 10, 8, 24 +; CHECK-NEXT: cmplw 10, 9 +; CHECK-NEXT: bne 0, .LBB3_8 +; CHECK-NEXT: # %bb.5: # %cmpxchg.fencedstore257 +; CHECK-NEXT: sync +; CHECK-NEXT: clrlwi 5, 5, 24 +; CHECK-NEXT: .p2align 5 +; CHECK-NEXT: .LBB3_6: # %cmpxchg.trystore256 +; CHECK-NEXT: # +; CHECK-NEXT: stbcx. 5, 0, 7 +; CHECK-NEXT: beq 0, .LBB3_8 +; CHECK-NEXT: # %bb.7: # %cmpxchg.releasedload255 +; CHECK-NEXT: # +; CHECK-NEXT: lbarx 8, 0, 7 +; CHECK-NEXT: clrlwi 10, 8, 24 +; CHECK-NEXT: cmplw 10, 9 +; CHECK-NEXT: beq 0, .LBB3_6 +; CHECK-NEXT: .LBB3_8: # %cmpxchg.nostore253 +; CHECK-NEXT: addis 5, 2, ss@toc@ha +; CHECK-NEXT: lwsync +; CHECK-NEXT: stb 8, uc@toc@l(3) +; CHECK-NEXT: clrlwi 10, 8, 24 +; CHECK-NEXT: lbz 11, sc@toc@l(4) +; CHECK-NEXT: addi 8, 5, ss@toc@l +; CHECK-NEXT: lharx 9, 0, 8 +; CHECK-NEXT: clrlwi 12, 9, 16 +; CHECK-NEXT: cmplw 12, 10 +; CHECK-NEXT: bne 0, .LBB3_12 +; CHECK-NEXT: # %bb.9: # %cmpxchg.fencedstore238 +; CHECK-NEXT: extsb 11, 11 +; CHECK-NEXT: sync +; CHECK-NEXT: clrlwi 11, 11, 16 +; CHECK-NEXT: .p2align 5 +; CHECK-NEXT: .LBB3_10: # %cmpxchg.trystore237 +; CHECK-NEXT: # +; CHECK-NEXT: sthcx. 11, 0, 8 +; CHECK-NEXT: beq 0, .LBB3_12 +; CHECK-NEXT: # %bb.11: # %cmpxchg.releasedload236 +; CHECK-NEXT: # +; CHECK-NEXT: lharx 9, 0, 8 +; CHECK-NEXT: clrlwi 12, 9, 16 +; CHECK-NEXT: cmplw 12, 10 +; CHECK-NEXT: beq 0, .LBB3_10 +; CHECK-NEXT: .LBB3_12: # %cmpxchg.nostore234 +; CHECK-NEXT: lwsync +; CHECK-NEXT: sth 9, ss@toc@l(5) +; CHECK-NEXT: addis 5, 2, us@toc@ha +; CHECK-NEXT: lbz 11, uc@toc@l(3) +; CHECK-NEXT: lbz 12, sc@toc@l(4) +; CHECK-NEXT: addi 9, 5, us@toc@l +; CHECK-NEXT: lharx 10, 0, 9 +; CHECK-NEXT: clrlwi 0, 10, 16 +; CHECK-NEXT: cmplw 0, 11 +; CHECK-NEXT: bne 0, .LBB3_16 +; CHECK-NEXT: # %bb.13: # %cmpxchg.fencedstore219 +; CHECK-NEXT: extsb 12, 12 +; CHECK-NEXT: sync +; CHECK-NEXT: clrlwi 12, 12, 16 +; CHECK-NEXT: .p2align 5 +; CHECK-NEXT: .LBB3_14: # %cmpxchg.trystore218 +; CHECK-NEXT: # +; CHECK-NEXT: sthcx. 12, 0, 9 +; CHECK-NEXT: beq 0, .LBB3_16 +; CHECK-NEXT: # %bb.15: # %cmpxchg.releasedload217 +; CHECK-NEXT: # +; CHECK-NEXT: lharx 10, 0, 9 +; CHECK-NEXT: clrlwi 0, 10, 16 +; CHECK-NEXT: cmplw 0, 11 +; CHECK-NEXT: beq 0, .LBB3_14 +; CHECK-NEXT: .LBB3_16: # %cmpxchg.nostore215 +; CHECK-NEXT: lwsync +; CHECK-NEXT: sth 10, us@toc@l(5) +; CHECK-NEXT: addis 5, 2, si@toc@ha +; CHECK-NEXT: lbz 12, uc@toc@l(3) +; CHECK-NEXT: lbz 0, sc@toc@l(4) +; CHECK-NEXT: addi 10, 5, si@toc@l +; CHECK-NEXT: lwarx 11, 0, 10 +; CHECK-NEXT: cmplw 11, 12 +; CHECK-NEXT: bne 0, .LBB3_20 +; CHECK-NEXT: # %bb.17: # %cmpxchg.fencedstore200 +; CHECK-NEXT: extsb 0, 0 +; CHECK-NEXT: sync +; CHECK-NEXT: .p2align 5 +; CHECK-NEXT: .LBB3_18: # %cmpxchg.trystore199 +; CHECK-NEXT: # +; CHECK-NEXT: stwcx. 0, 0, 10 +; CHECK-NEXT: beq 0, .LBB3_20 +; CHECK-NEXT: # %bb.19: # %cmpxchg.releasedload198 +; CHECK-NEXT: # +; CHECK-NEXT: lwarx 11, 0, 10 +; CHECK-NEXT: cmplw 11, 12 +; CHECK-NEXT: beq 0, .LBB3_18 +; CHECK-NEXT: .LBB3_20: # %cmpxchg.nostore196 +; CHECK-NEXT: lwsync +; CHECK-NEXT: stw 11, si@toc@l(5) +; CHECK-NEXT: addis 5, 2, ui@toc@ha +; CHECK-NEXT: lbz 0, uc@toc@l(3) +; CHECK-NEXT: lbz 30, sc@toc@l(4) +; CHECK-NEXT: addi 11, 5, ui@toc@l +; CHECK-NEXT: lwarx 12, 0, 11 +; CHECK-NEXT: cmplw 12, 0 +; CHECK-NEXT: bne 0, .LBB3_24 +; CHECK-NEXT: # %bb.21: # %cmpxchg.fencedstore181 +; CHECK-NEXT: extsb 30, 30 +; CHECK-NEXT: sync +; CHECK-NEXT: .p2align 5 +; CHECK-NEXT: .LBB3_22: # %cmpxchg.trystore180 +; CHECK-NEXT: # +; CHECK-NEXT: stwcx. 30, 0, 11 +; CHECK-NEXT: beq 0, .LBB3_24 +; CHECK-NEXT: # %bb.23: # %cmpxchg.releasedload179 +; CHECK-NEXT: # +; CHECK-NEXT: lwarx 12, 0, 11 +; CHECK-NEXT: cmplw 12, 0 +; CHECK-NEXT: beq 0, .LBB3_22 +; CHECK-NEXT: .LBB3_24: # %cmpxchg.nostore177 +; CHECK-NEXT: addis 30, 2, sll@toc@ha +; CHECK-NEXT: lwsync +; CHECK-NEXT: stw 12, ui@toc@l(5) +; CHECK-NEXT: lbz 29, uc@toc@l(3) +; CHECK-NEXT: lbz 28, sc@toc@l(4) +; CHECK-NEXT: addi 12, 30, sll@toc@l +; CHECK-NEXT: ldarx 0, 0, 12 +; CHECK-NEXT: cmpld 0, 29 +; CHECK-NEXT: bne 0, .LBB3_28 +; CHECK-NEXT: # %bb.25: # %cmpxchg.fencedstore162 +; CHECK-NEXT: extsb 28, 28 +; CHECK-NEXT: sync +; CHECK-NEXT: .p2align 5 +; CHECK-NEXT: .LBB3_26: # %cmpxchg.trystore161 +; CHECK-NEXT: # +; CHECK-NEXT: stdcx. 28, 0, 12 +; CHECK-NEXT: beq 0, .LBB3_28 +; CHECK-NEXT: # %bb.27: # %cmpxchg.releasedload160 +; CHECK-NEXT: # +; CHECK-NEXT: ldarx 0, 0, 12 +; CHECK-NEXT: cmpld 0, 29 +; CHECK-NEXT: beq 0, .LBB3_26 +; CHECK-NEXT: .LBB3_28: # %cmpxchg.nostore158 +; CHECK-NEXT: lwsync +; CHECK-NEXT: std 0, sll@toc@l(30) +; CHECK-NEXT: addis 30, 2, ull@toc@ha +; CHECK-NEXT: lbz 28, uc@toc@l(3) +; CHECK-NEXT: lbz 27, sc@toc@l(4) +; CHECK-NEXT: addi 0, 30, ull@toc@l +; CHECK-NEXT: ldarx 29, 0, 0 +; CHECK-NEXT: cmpld 29, 28 +; CHECK-NEXT: bne 0, .LBB3_32 +; CHECK-NEXT: # %bb.29: # %cmpxchg.fencedstore143 +; CHECK-NEXT: extsb 27, 27 +; CHECK-NEXT: sync +; CHECK-NEXT: .p2align 5 +; CHECK-NEXT: .LBB3_30: # %cmpxchg.trystore142 +; CHECK-NEXT: # +; CHECK-NEXT: stdcx. 27, 0, 0 +; CHECK-NEXT: beq 0, .LBB3_32 +; CHECK-NEXT: # %bb.31: # %cmpxchg.releasedload141 +; CHECK-NEXT: # +; CHECK-NEXT: ldarx 29, 0, 0 +; CHECK-NEXT: cmpld 29, 28 +; CHECK-NEXT: beq 0, .LBB3_30 +; CHECK-NEXT: .LBB3_32: # %cmpxchg.nostore139 +; CHECK-NEXT: lwsync +; CHECK-NEXT: std 29, ull@toc@l(30) +; CHECK-NEXT: lbz 30, uc@toc@l(3) +; CHECK-NEXT: lbz 29, sc@toc@l(4) +; CHECK-NEXT: lbarx 28, 0, 6 +; CHECK-NEXT: clrlwi 28, 28, 24 +; CHECK-NEXT: cmplw 28, 30 +; CHECK-NEXT: bne 0, .LBB3_36 +; CHECK-NEXT: # %bb.33: # %cmpxchg.fencedstore124 +; CHECK-NEXT: sync +; CHECK-NEXT: .p2align 5 +; CHECK-NEXT: .LBB3_34: # %cmpxchg.trystore123 +; CHECK-NEXT: # +; CHECK-NEXT: stbcx. 29, 0, 6 +; CHECK-NEXT: beq 0, .LBB3_37 +; CHECK-NEXT: # %bb.35: # %cmpxchg.releasedload122 +; CHECK-NEXT: # +; CHECK-NEXT: lbarx 28, 0, 6 +; CHECK-NEXT: clrlwi 28, 28, 24 +; CHECK-NEXT: cmplw 28, 30 +; CHECK-NEXT: beq 0, .LBB3_34 +; CHECK-NEXT: .LBB3_36: # %cmpxchg.nostore120 +; CHECK-NEXT: lwsync +; CHECK-NEXT: crxor 20, 20, 20 +; CHECK-NEXT: b .LBB3_38 +; CHECK-NEXT: .LBB3_37: # %cmpxchg.success121 +; CHECK-NEXT: lwsync +; CHECK-NEXT: creqv 20, 20, 20 +; CHECK-NEXT: .LBB3_38: # %cmpxchg.end118 +; CHECK-NEXT: li 6, 0 +; CHECK-NEXT: li 30, 1 +; CHECK-NEXT: isel 6, 30, 6, 20 +; CHECK-NEXT: lbz 30, sc@toc@l(4) +; CHECK-NEXT: stw 6, ui@toc@l(5) +; CHECK-NEXT: lbz 6, uc@toc@l(3) +; CHECK-NEXT: lbarx 29, 0, 7 +; CHECK-NEXT: clrlwi 29, 29, 24 +; CHECK-NEXT: cmplw 29, 6 +; CHECK-NEXT: bne 0, .LBB3_42 +; CHECK-NEXT: # %bb.39: # %cmpxchg.fencedstore105 +; CHECK-NEXT: sync +; CHECK-NEXT: .p2align 5 +; CHECK-NEXT: .LBB3_40: # %cmpxchg.trystore104 +; CHECK-NEXT: # +; CHECK-NEXT: stbcx. 30, 0, 7 +; CHECK-NEXT: beq 0, .LBB3_43 +; CHECK-NEXT: # %bb.41: # %cmpxchg.releasedload103 +; CHECK-NEXT: # +; CHECK-NEXT: lbarx 29, 0, 7 +; CHECK-NEXT: clrlwi 29, 29, 24 +; CHECK-NEXT: cmplw 29, 6 +; CHECK-NEXT: beq 0, .LBB3_40 +; CHECK-NEXT: .LBB3_42: # %cmpxchg.nostore101 +; CHECK-NEXT: lwsync +; CHECK-NEXT: crxor 20, 20, 20 +; CHECK-NEXT: b .LBB3_44 +; CHECK-NEXT: .LBB3_43: # %cmpxchg.success102 +; CHECK-NEXT: lwsync +; CHECK-NEXT: creqv 20, 20, 20 +; CHECK-NEXT: .LBB3_44: # %cmpxchg.end99 +; CHECK-NEXT: li 6, 0 +; CHECK-NEXT: li 7, 1 +; CHECK-NEXT: isel 6, 7, 6, 20 +; CHECK-NEXT: lbz 7, sc@toc@l(4) +; CHECK-NEXT: stw 6, ui@toc@l(5) +; CHECK-NEXT: lbz 6, uc@toc@l(3) +; CHECK-NEXT: lharx 30, 0, 8 +; CHECK-NEXT: clrlwi 30, 30, 16 +; CHECK-NEXT: cmplw 30, 6 +; CHECK-NEXT: bne 0, .LBB3_48 +; CHECK-NEXT: # %bb.45: # %cmpxchg.fencedstore86 +; CHECK-NEXT: extsb 7, 7 +; CHECK-NEXT: sync +; CHECK-NEXT: clrlwi 7, 7, 16 +; CHECK-NEXT: .p2align 5 +; CHECK-NEXT: .LBB3_46: # %cmpxchg.trystore85 +; CHECK-NEXT: # +; CHECK-NEXT: sthcx. 7, 0, 8 +; CHECK-NEXT: beq 0, .LBB3_49 +; CHECK-NEXT: # %bb.47: # %cmpxchg.releasedload84 +; CHECK-NEXT: # +; CHECK-NEXT: lharx 30, 0, 8 +; CHECK-NEXT: clrlwi 30, 30, 16 +; CHECK-NEXT: cmplw 30, 6 +; CHECK-NEXT: beq 0, .LBB3_46 +; CHECK-NEXT: .LBB3_48: # %cmpxchg.nostore82 +; CHECK-NEXT: lwsync +; CHECK-NEXT: crxor 20, 20, 20 +; CHECK-NEXT: b .LBB3_50 +; CHECK-NEXT: .LBB3_49: # %cmpxchg.success83 +; CHECK-NEXT: lwsync +; CHECK-NEXT: creqv 20, 20, 20 +; CHECK-NEXT: .LBB3_50: # %cmpxchg.end80 +; CHECK-NEXT: li 6, 0 +; CHECK-NEXT: li 7, 1 +; CHECK-NEXT: isel 6, 7, 6, 20 +; CHECK-NEXT: lbz 7, sc@toc@l(4) +; CHECK-NEXT: stw 6, ui@toc@l(5) +; CHECK-NEXT: lbz 6, uc@toc@l(3) +; CHECK-NEXT: lharx 8, 0, 9 +; CHECK-NEXT: clrlwi 8, 8, 16 +; CHECK-NEXT: cmplw 8, 6 +; CHECK-NEXT: bne 0, .LBB3_54 +; CHECK-NEXT: # %bb.51: # %cmpxchg.fencedstore67 +; CHECK-NEXT: extsb 7, 7 +; CHECK-NEXT: sync +; CHECK-NEXT: clrlwi 7, 7, 16 +; CHECK-NEXT: .p2align 5 +; CHECK-NEXT: .LBB3_52: # %cmpxchg.trystore66 +; CHECK-NEXT: # +; CHECK-NEXT: sthcx. 7, 0, 9 +; CHECK-NEXT: beq 0, .LBB3_55 +; CHECK-NEXT: # %bb.53: # %cmpxchg.releasedload65 +; CHECK-NEXT: # +; CHECK-NEXT: lharx 8, 0, 9 +; CHECK-NEXT: clrlwi 8, 8, 16 +; CHECK-NEXT: cmplw 8, 6 +; CHECK-NEXT: beq 0, .LBB3_52 +; CHECK-NEXT: .LBB3_54: # %cmpxchg.nostore63 +; CHECK-NEXT: lwsync +; CHECK-NEXT: crxor 20, 20, 20 +; CHECK-NEXT: b .LBB3_56 +; CHECK-NEXT: .LBB3_55: # %cmpxchg.success64 +; CHECK-NEXT: lwsync +; CHECK-NEXT: creqv 20, 20, 20 +; CHECK-NEXT: .LBB3_56: # %cmpxchg.end61 +; CHECK-NEXT: li 6, 0 +; CHECK-NEXT: li 7, 1 +; CHECK-NEXT: isel 6, 7, 6, 20 +; CHECK-NEXT: lbz 7, sc@toc@l(4) +; CHECK-NEXT: stw 6, ui@toc@l(5) +; CHECK-NEXT: lbz 6, uc@toc@l(3) +; CHECK-NEXT: lwarx 8, 0, 10 +; CHECK-NEXT: cmplw 8, 6 +; CHECK-NEXT: bne 0, .LBB3_60 +; CHECK-NEXT: # %bb.57: # %cmpxchg.fencedstore48 +; CHECK-NEXT: extsb 7, 7 +; CHECK-NEXT: sync +; CHECK-NEXT: .p2align 5 +; CHECK-NEXT: .LBB3_58: # %cmpxchg.trystore47 +; CHECK-NEXT: # +; CHECK-NEXT: stwcx. 7, 0, 10 +; CHECK-NEXT: beq 0, .LBB3_61 +; CHECK-NEXT: # %bb.59: # %cmpxchg.releasedload46 +; CHECK-NEXT: # +; CHECK-NEXT: lwarx 8, 0, 10 +; CHECK-NEXT: cmplw 8, 6 +; CHECK-NEXT: beq 0, .LBB3_58 +; CHECK-NEXT: .LBB3_60: # %cmpxchg.nostore44 +; CHECK-NEXT: lwsync +; CHECK-NEXT: crxor 20, 20, 20 +; CHECK-NEXT: b .LBB3_62 +; CHECK-NEXT: .LBB3_61: # %cmpxchg.success45 +; CHECK-NEXT: lwsync +; CHECK-NEXT: creqv 20, 20, 20 +; CHECK-NEXT: .LBB3_62: # %cmpxchg.end42 +; CHECK-NEXT: li 6, 0 +; CHECK-NEXT: li 7, 1 +; CHECK-NEXT: isel 6, 7, 6, 20 +; CHECK-NEXT: lbz 7, sc@toc@l(4) +; CHECK-NEXT: stw 6, ui@toc@l(5) +; CHECK-NEXT: lbz 6, uc@toc@l(3) +; CHECK-NEXT: lwarx 8, 0, 11 +; CHECK-NEXT: cmplw 8, 6 +; CHECK-NEXT: bne 0, .LBB3_66 +; CHECK-NEXT: # %bb.63: # %cmpxchg.fencedstore29 +; CHECK-NEXT: extsb 7, 7 +; CHECK-NEXT: sync +; CHECK-NEXT: .p2align 5 +; CHECK-NEXT: .LBB3_64: # %cmpxchg.trystore28 +; CHECK-NEXT: # +; CHECK-NEXT: stwcx. 7, 0, 11 +; CHECK-NEXT: beq 0, .LBB3_67 +; CHECK-NEXT: # %bb.65: # %cmpxchg.releasedload27 +; CHECK-NEXT: # +; CHECK-NEXT: lwarx 8, 0, 11 +; CHECK-NEXT: cmplw 8, 6 +; CHECK-NEXT: beq 0, .LBB3_64 +; CHECK-NEXT: .LBB3_66: # %cmpxchg.nostore25 +; CHECK-NEXT: lwsync +; CHECK-NEXT: crxor 20, 20, 20 +; CHECK-NEXT: b .LBB3_68 +; CHECK-NEXT: .LBB3_67: # %cmpxchg.success26 +; CHECK-NEXT: lwsync +; CHECK-NEXT: creqv 20, 20, 20 +; CHECK-NEXT: .LBB3_68: # %cmpxchg.end23 +; CHECK-NEXT: li 6, 0 +; CHECK-NEXT: li 7, 1 +; CHECK-NEXT: isel 6, 7, 6, 20 +; CHECK-NEXT: lbz 7, sc@toc@l(4) +; CHECK-NEXT: stw 6, ui@toc@l(5) +; CHECK-NEXT: lbz 6, uc@toc@l(3) +; CHECK-NEXT: ldarx 8, 0, 12 +; CHECK-NEXT: cmpld 8, 6 +; CHECK-NEXT: bne 0, .LBB3_72 +; CHECK-NEXT: # %bb.69: # %cmpxchg.fencedstore10 +; CHECK-NEXT: extsb 7, 7 +; CHECK-NEXT: sync +; CHECK-NEXT: .p2align 5 +; CHECK-NEXT: .LBB3_70: # %cmpxchg.trystore9 +; CHECK-NEXT: # +; CHECK-NEXT: stdcx. 7, 0, 12 +; CHECK-NEXT: beq 0, .LBB3_73 +; CHECK-NEXT: # %bb.71: # %cmpxchg.releasedload8 +; CHECK-NEXT: # +; CHECK-NEXT: ldarx 8, 0, 12 +; CHECK-NEXT: cmpld 8, 6 +; CHECK-NEXT: beq 0, .LBB3_70 +; CHECK-NEXT: .LBB3_72: # %cmpxchg.nostore6 +; CHECK-NEXT: lwsync +; CHECK-NEXT: crxor 20, 20, 20 +; CHECK-NEXT: b .LBB3_74 +; CHECK-NEXT: .LBB3_73: # %cmpxchg.success7 +; CHECK-NEXT: lwsync +; CHECK-NEXT: creqv 20, 20, 20 +; CHECK-NEXT: .LBB3_74: # %cmpxchg.end4 +; CHECK-NEXT: li 6, 0 +; CHECK-NEXT: li 7, 1 +; CHECK-NEXT: lbz 3, uc@toc@l(3) +; CHECK-NEXT: lbz 4, sc@toc@l(4) +; CHECK-NEXT: isel 6, 7, 6, 20 +; CHECK-NEXT: stw 6, ui@toc@l(5) +; CHECK-NEXT: ldarx 6, 0, 0 +; CHECK-NEXT: cmpld 6, 3 +; CHECK-NEXT: bne 0, .LBB3_78 +; CHECK-NEXT: # %bb.75: # %cmpxchg.fencedstore +; CHECK-NEXT: extsb 4, 4 +; CHECK-NEXT: sync +; CHECK-NEXT: .p2align 5 +; CHECK-NEXT: .LBB3_76: # %cmpxchg.trystore +; CHECK-NEXT: # +; CHECK-NEXT: stdcx. 4, 0, 0 +; CHECK-NEXT: beq 0, .LBB3_79 +; CHECK-NEXT: # %bb.77: # %cmpxchg.releasedload +; CHECK-NEXT: # +; CHECK-NEXT: ldarx 6, 0, 0 +; CHECK-NEXT: cmpld 6, 3 +; CHECK-NEXT: beq 0, .LBB3_76 +; CHECK-NEXT: .LBB3_78: # %cmpxchg.nostore +; CHECK-NEXT: lwsync +; CHECK-NEXT: crxor 20, 20, 20 +; CHECK-NEXT: b .LBB3_80 +; CHECK-NEXT: .LBB3_79: # %cmpxchg.success +; CHECK-NEXT: lwsync +; CHECK-NEXT: creqv 20, 20, 20 +; CHECK-NEXT: .LBB3_80: # %cmpxchg.end +; CHECK-NEXT: li 3, 0 +; CHECK-NEXT: li 4, 1 +; CHECK-NEXT: ld 30, -16(1) # 8-byte Folded Reload +; CHECK-NEXT: ld 29, -24(1) # 8-byte Folded Reload +; CHECK-NEXT: ld 28, -32(1) # 8-byte Folded Reload +; CHECK-NEXT: ld 27, -40(1) # 8-byte Folded Reload +; CHECK-NEXT: isel 3, 4, 3, 20 +; CHECK-NEXT: stw 3, ui@toc@l(5) +; CHECK-NEXT: blr ; ; AIX32-LABEL: test_compare_and_swap: ; AIX32: # %bb.0: # %entry ; AIX32-NEXT: mflr 0 -; AIX32-NEXT: stwu 1, -144(1) -; AIX32-NEXT: stw 0, 152(1) -; AIX32-NEXT: stw 29, 132(1) # 4-byte Folded Spill -; AIX32-NEXT: lwz 29, L..C0(2) # @sc -; AIX32-NEXT: stw 26, 120(1) # 4-byte Folded Spill -; AIX32-NEXT: not 3, 29 -; AIX32-NEXT: stw 30, 136(1) # 4-byte Folded Spill -; AIX32-NEXT: lwz 30, L..C1(2) # @uc -; AIX32-NEXT: lbz 4, 0(30) -; AIX32-NEXT: lbz 5, 0(29) -; AIX32-NEXT: stw 27, 124(1) # 4-byte Folded Spill -; AIX32-NEXT: rlwinm 27, 29, 0, 0, 29 -; AIX32-NEXT: stw 14, 72(1) # 4-byte Folded Spill -; AIX32-NEXT: stw 15, 76(1) # 4-byte Folded Spill -; AIX32-NEXT: rlwinm 26, 3, 3, 27, 28 -; AIX32-NEXT: li 3, 255 -; AIX32-NEXT: slw 3, 3, 26 -; AIX32-NEXT: stw 16, 80(1) # 4-byte Folded Spill -; AIX32-NEXT: stw 17, 84(1) # 4-byte Folded Spill -; AIX32-NEXT: stw 18, 88(1) # 4-byte Folded Spill -; AIX32-NEXT: stw 19, 92(1) # 4-byte Folded Spill -; AIX32-NEXT: stw 20, 96(1) # 4-byte Folded Spill -; AIX32-NEXT: stw 21, 100(1) # 4-byte Folded Spill -; AIX32-NEXT: stw 22, 104(1) # 4-byte Folded Spill -; AIX32-NEXT: stw 23, 108(1) # 4-byte Folded Spill -; AIX32-NEXT: stw 24, 112(1) # 4-byte Folded Spill -; AIX32-NEXT: stw 25, 116(1) # 4-byte Folded Spill -; AIX32-NEXT: stw 28, 128(1) # 4-byte Folded Spill -; AIX32-NEXT: stw 31, 140(1) # 4-byte Folded Spill -; AIX32-NEXT: not 25, 3 -; AIX32-NEXT: lwarx 3, 0, 27 -; AIX32-NEXT: srw 6, 3, 26 -; AIX32-NEXT: clrlwi 6, 6, 24 -; AIX32-NEXT: cmplw 6, 4 -; AIX32-NEXT: bne 0, L..BB3_4 -; AIX32-NEXT: # %bb.1: # %cmpxchg.fencedstore289 -; AIX32-NEXT: sync -; AIX32-NEXT: slw 5, 5, 26 -; AIX32-NEXT: .align 4 -; AIX32-NEXT: L..BB3_2: # %cmpxchg.trystore288 -; AIX32-NEXT: # =>This Inner Loop Header: Depth=1 -; AIX32-NEXT: and 6, 3, 25 -; AIX32-NEXT: or 6, 6, 5 -; AIX32-NEXT: stwcx. 6, 0, 27 -; AIX32-NEXT: beq 0, L..BB3_4 -; AIX32-NEXT: # %bb.3: # %cmpxchg.releasedload287 -; AIX32-NEXT: # in Loop: Header=BB3_2 Depth=1 -; AIX32-NEXT: lwarx 3, 0, 27 -; AIX32-NEXT: srw 6, 3, 26 -; AIX32-NEXT: clrlwi 6, 6, 24 -; AIX32-NEXT: cmplw 6, 4 -; AIX32-NEXT: beq 0, L..BB3_2 -; AIX32-NEXT: L..BB3_4: # %cmpxchg.nostore285 -; AIX32-NEXT: not 4, 30 -; AIX32-NEXT: srw 5, 3, 26 -; AIX32-NEXT: lwsync -; AIX32-NEXT: lbz 3, 0(30) -; AIX32-NEXT: rlwinm 24, 30, 0, 0, 29 -; AIX32-NEXT: rlwinm 23, 4, 3, 27, 28 -; AIX32-NEXT: li 4, 255 -; AIX32-NEXT: stb 5, 0(29) -; AIX32-NEXT: slw 4, 4, 23 -; AIX32-NEXT: not 22, 4 -; AIX32-NEXT: lwarx 4, 0, 24 -; AIX32-NEXT: srw 6, 4, 23 -; AIX32-NEXT: clrlwi 6, 6, 24 -; AIX32-NEXT: cmplw 6, 3 -; AIX32-NEXT: bne 0, L..BB3_8 -; AIX32-NEXT: # %bb.5: # %cmpxchg.fencedstore256 -; AIX32-NEXT: clrlwi 5, 5, 24 -; AIX32-NEXT: sync -; AIX32-NEXT: slw 5, 5, 23 -; AIX32-NEXT: .align 4 -; AIX32-NEXT: L..BB3_6: # %cmpxchg.trystore255 -; AIX32-NEXT: # =>This Inner Loop Header: Depth=1 -; AIX32-NEXT: and 6, 4, 22 -; AIX32-NEXT: or 6, 6, 5 -; AIX32-NEXT: stwcx. 6, 0, 24 -; AIX32-NEXT: beq 0, L..BB3_8 -; AIX32-NEXT: # %bb.7: # %cmpxchg.releasedload254 -; AIX32-NEXT: # in Loop: Header=BB3_6 Depth=1 -; AIX32-NEXT: lwarx 4, 0, 24 -; AIX32-NEXT: srw 6, 4, 23 -; AIX32-NEXT: clrlwi 6, 6, 24 -; AIX32-NEXT: cmplw 6, 3 -; AIX32-NEXT: beq 0, L..BB3_6 -; AIX32-NEXT: L..BB3_8: # %cmpxchg.nostore252 -; AIX32-NEXT: srw 4, 4, 23 -; AIX32-NEXT: lwsync -; AIX32-NEXT: lis 3, 0 -; AIX32-NEXT: lbz 7, 0(29) -; AIX32-NEXT: stb 4, 0(30) -; AIX32-NEXT: clrlwi 6, 4, 24 -; AIX32-NEXT: lwz 4, L..C2(2) # @ss -; AIX32-NEXT: ori 3, 3, 65535 -; AIX32-NEXT: clrlwi 5, 4, 30 -; AIX32-NEXT: rlwinm 21, 4, 0, 0, 29 -; AIX32-NEXT: xori 5, 5, 2 -; AIX32-NEXT: slwi 20, 5, 3 -; AIX32-NEXT: slw 5, 3, 20 -; AIX32-NEXT: not 19, 5 -; AIX32-NEXT: lwarx 5, 0, 21 -; AIX32-NEXT: srw 8, 5, 20 -; AIX32-NEXT: clrlwi 8, 8, 16 -; AIX32-NEXT: cmplw 8, 6 -; AIX32-NEXT: bne 0, L..BB3_12 -; AIX32-NEXT: # %bb.9: # %cmpxchg.fencedstore223 -; AIX32-NEXT: extsb 7, 7 -; AIX32-NEXT: sync -; AIX32-NEXT: clrlwi 7, 7, 16 -; AIX32-NEXT: slw 7, 7, 20 -; AIX32-NEXT: .align 4 -; AIX32-NEXT: L..BB3_10: # %cmpxchg.trystore222 -; AIX32-NEXT: # =>This Inner Loop Header: Depth=1 -; AIX32-NEXT: and 8, 5, 19 -; AIX32-NEXT: or 8, 8, 7 -; AIX32-NEXT: stwcx. 8, 0, 21 -; AIX32-NEXT: beq 0, L..BB3_12 -; AIX32-NEXT: # %bb.11: # %cmpxchg.releasedload221 -; AIX32-NEXT: # in Loop: Header=BB3_10 Depth=1 -; AIX32-NEXT: lwarx 5, 0, 21 -; AIX32-NEXT: srw 8, 5, 20 -; AIX32-NEXT: clrlwi 8, 8, 16 -; AIX32-NEXT: cmplw 8, 6 -; AIX32-NEXT: beq 0, L..BB3_10 -; AIX32-NEXT: L..BB3_12: # %cmpxchg.nostore219 -; AIX32-NEXT: srw 5, 5, 20 -; AIX32-NEXT: lwsync -; AIX32-NEXT: lbz 6, 0(29) -; AIX32-NEXT: sth 5, 0(4) -; AIX32-NEXT: lwz 4, L..C3(2) # @us -; AIX32-NEXT: lbz 5, 0(30) -; AIX32-NEXT: clrlwi 7, 4, 30 -; AIX32-NEXT: rlwinm 18, 4, 0, 0, 29 -; AIX32-NEXT: xori 7, 7, 2 -; AIX32-NEXT: slwi 17, 7, 3 -; AIX32-NEXT: slw 3, 3, 17 -; AIX32-NEXT: not 16, 3 -; AIX32-NEXT: lwarx 3, 0, 18 -; AIX32-NEXT: srw 7, 3, 17 -; AIX32-NEXT: clrlwi 7, 7, 16 -; AIX32-NEXT: cmplw 7, 5 -; AIX32-NEXT: bne 0, L..BB3_16 -; AIX32-NEXT: # %bb.13: # %cmpxchg.fencedstore190 -; AIX32-NEXT: extsb 6, 6 -; AIX32-NEXT: sync -; AIX32-NEXT: clrlwi 6, 6, 16 -; AIX32-NEXT: slw 6, 6, 17 -; AIX32-NEXT: .align 4 -; AIX32-NEXT: L..BB3_14: # %cmpxchg.trystore189 -; AIX32-NEXT: # =>This Inner Loop Header: Depth=1 -; AIX32-NEXT: and 7, 3, 16 -; AIX32-NEXT: or 7, 7, 6 -; AIX32-NEXT: stwcx. 7, 0, 18 -; AIX32-NEXT: beq 0, L..BB3_16 -; AIX32-NEXT: # %bb.15: # %cmpxchg.releasedload188 -; AIX32-NEXT: # in Loop: Header=BB3_14 Depth=1 -; AIX32-NEXT: lwarx 3, 0, 18 -; AIX32-NEXT: srw 7, 3, 17 -; AIX32-NEXT: clrlwi 7, 7, 16 -; AIX32-NEXT: cmplw 7, 5 -; AIX32-NEXT: beq 0, L..BB3_14 -; AIX32-NEXT: L..BB3_16: # %cmpxchg.nostore186 -; AIX32-NEXT: srw 3, 3, 17 -; AIX32-NEXT: lwsync -; AIX32-NEXT: lwz 15, L..C4(2) # @si -; AIX32-NEXT: lbz 5, 0(29) -; AIX32-NEXT: sth 3, 0(4) -; AIX32-NEXT: lbz 4, 0(30) -; AIX32-NEXT: lwarx 3, 0, 15 -; AIX32-NEXT: cmplw 3, 4 -; AIX32-NEXT: bne 0, L..BB3_20 -; AIX32-NEXT: # %bb.17: # %cmpxchg.fencedstore171 -; AIX32-NEXT: extsb 5, 5 -; AIX32-NEXT: sync -; AIX32-NEXT: .align 5 -; AIX32-NEXT: L..BB3_18: # %cmpxchg.trystore170 -; AIX32-NEXT: # =>This Inner Loop Header: Depth=1 -; AIX32-NEXT: stwcx. 5, 0, 15 -; AIX32-NEXT: beq 0, L..BB3_20 -; AIX32-NEXT: # %bb.19: # %cmpxchg.releasedload169 -; AIX32-NEXT: # in Loop: Header=BB3_18 Depth=1 -; AIX32-NEXT: lwarx 3, 0, 15 -; AIX32-NEXT: cmplw 3, 4 -; AIX32-NEXT: beq 0, L..BB3_18 -; AIX32-NEXT: L..BB3_20: # %cmpxchg.nostore167 -; AIX32-NEXT: lwsync -; AIX32-NEXT: lwz 28, L..C5(2) # @ui -; AIX32-NEXT: stw 3, 0(15) -; AIX32-NEXT: lbz 4, 0(30) -; AIX32-NEXT: lbz 5, 0(29) -; AIX32-NEXT: lwarx 3, 0, 28 -; AIX32-NEXT: cmplw 3, 4 -; AIX32-NEXT: bne 0, L..BB3_24 -; AIX32-NEXT: # %bb.21: # %cmpxchg.fencedstore152 -; AIX32-NEXT: extsb 5, 5 -; AIX32-NEXT: sync -; AIX32-NEXT: .align 5 -; AIX32-NEXT: L..BB3_22: # %cmpxchg.trystore151 -; AIX32-NEXT: # =>This Inner Loop Header: Depth=1 -; AIX32-NEXT: stwcx. 5, 0, 28 -; AIX32-NEXT: beq 0, L..BB3_24 -; AIX32-NEXT: # %bb.23: # %cmpxchg.releasedload150 -; AIX32-NEXT: # in Loop: Header=BB3_22 Depth=1 -; AIX32-NEXT: lwarx 3, 0, 28 -; AIX32-NEXT: cmplw 3, 4 -; AIX32-NEXT: beq 0, L..BB3_22 -; AIX32-NEXT: L..BB3_24: # %cmpxchg.nostore148 -; AIX32-NEXT: lwsync -; AIX32-NEXT: stw 3, 0(28) -; AIX32-NEXT: lwz 31, L..C6(2) # @sll -; AIX32-NEXT: lbz 3, 0(29) -; AIX32-NEXT: li 14, 0 -; AIX32-NEXT: addi 4, 1, 64 -; AIX32-NEXT: li 7, 5 -; AIX32-NEXT: li 8, 5 -; AIX32-NEXT: stw 14, 64(1) -; AIX32-NEXT: extsb 6, 3 -; AIX32-NEXT: lbz 3, 0(30) -; AIX32-NEXT: srawi 5, 6, 31 -; AIX32-NEXT: stw 3, 68(1) -; AIX32-NEXT: mr 3, 31 -; AIX32-NEXT: bl .__atomic_compare_exchange_8[PR] -; AIX32-NEXT: nop -; AIX32-NEXT: lwz 3, 68(1) -; AIX32-NEXT: lbz 4, 0(29) -; AIX32-NEXT: li 7, 5 -; AIX32-NEXT: li 8, 5 -; AIX32-NEXT: stw 3, 4(31) -; AIX32-NEXT: lwz 3, 64(1) -; AIX32-NEXT: extsb 6, 4 -; AIX32-NEXT: addi 4, 1, 64 -; AIX32-NEXT: stw 14, 64(1) -; AIX32-NEXT: srawi 5, 6, 31 -; AIX32-NEXT: stw 3, 0(31) -; AIX32-NEXT: lbz 3, 0(30) -; AIX32-NEXT: lwz 31, L..C7(2) # @ull -; AIX32-NEXT: stw 3, 68(1) -; AIX32-NEXT: mr 3, 31 -; AIX32-NEXT: bl .__atomic_compare_exchange_8[PR] -; AIX32-NEXT: nop -; AIX32-NEXT: lwz 3, 64(1) -; AIX32-NEXT: lwz 4, 68(1) -; AIX32-NEXT: lbz 5, 0(29) -; AIX32-NEXT: stw 4, 4(31) -; AIX32-NEXT: stw 3, 0(31) -; AIX32-NEXT: lbz 3, 0(30) -; AIX32-NEXT: lwarx 4, 0, 27 -; AIX32-NEXT: srw 6, 4, 26 -; AIX32-NEXT: clrlwi 6, 6, 24 -; AIX32-NEXT: cmplw 6, 3 -; AIX32-NEXT: bne 0, L..BB3_28 -; AIX32-NEXT: # %bb.25: # %cmpxchg.fencedstore119 -; AIX32-NEXT: sync -; AIX32-NEXT: slw 5, 5, 26 -; AIX32-NEXT: .align 4 -; AIX32-NEXT: L..BB3_26: # %cmpxchg.trystore118 -; AIX32-NEXT: # =>This Inner Loop Header: Depth=1 -; AIX32-NEXT: and 4, 4, 25 -; AIX32-NEXT: or 4, 4, 5 -; AIX32-NEXT: stwcx. 4, 0, 27 -; AIX32-NEXT: beq 0, L..BB3_29 -; AIX32-NEXT: # %bb.27: # %cmpxchg.releasedload117 -; AIX32-NEXT: # in Loop: Header=BB3_26 Depth=1 -; AIX32-NEXT: lwarx 4, 0, 27 -; AIX32-NEXT: srw 6, 4, 26 -; AIX32-NEXT: clrlwi 6, 6, 24 -; AIX32-NEXT: cmplw 6, 3 -; AIX32-NEXT: beq 0, L..BB3_26 -; AIX32-NEXT: L..BB3_28: # %cmpxchg.nostore115 -; AIX32-NEXT: crxor 20, 20, 20 -; AIX32-NEXT: lwsync -; AIX32-NEXT: b L..BB3_30 -; AIX32-NEXT: L..BB3_29: # %cmpxchg.success116 -; AIX32-NEXT: lwsync -; AIX32-NEXT: creqv 20, 20, 20 -; AIX32-NEXT: L..BB3_30: # %cmpxchg.end113 -; AIX32-NEXT: li 3, 0 -; AIX32-NEXT: li 4, 1 -; AIX32-NEXT: lbz 5, 0(29) -; AIX32-NEXT: isel 3, 4, 3, 20 -; AIX32-NEXT: stw 3, 0(28) -; AIX32-NEXT: lbz 3, 0(30) -; AIX32-NEXT: lwarx 4, 0, 24 -; AIX32-NEXT: srw 6, 4, 23 -; AIX32-NEXT: clrlwi 6, 6, 24 -; AIX32-NEXT: cmplw 6, 3 -; AIX32-NEXT: bne 0, L..BB3_34 -; AIX32-NEXT: # %bb.31: # %cmpxchg.fencedstore86 -; AIX32-NEXT: sync -; AIX32-NEXT: slw 5, 5, 23 -; AIX32-NEXT: .align 4 -; AIX32-NEXT: L..BB3_32: # %cmpxchg.trystore85 -; AIX32-NEXT: # =>This Inner Loop Header: Depth=1 -; AIX32-NEXT: and 4, 4, 22 -; AIX32-NEXT: or 4, 4, 5 -; AIX32-NEXT: stwcx. 4, 0, 24 -; AIX32-NEXT: beq 0, L..BB3_35 -; AIX32-NEXT: # %bb.33: # %cmpxchg.releasedload84 -; AIX32-NEXT: # in Loop: Header=BB3_32 Depth=1 -; AIX32-NEXT: lwarx 4, 0, 24 -; AIX32-NEXT: srw 6, 4, 23 -; AIX32-NEXT: clrlwi 6, 6, 24 -; AIX32-NEXT: cmplw 6, 3 -; AIX32-NEXT: beq 0, L..BB3_32 -; AIX32-NEXT: L..BB3_34: # %cmpxchg.nostore82 -; AIX32-NEXT: crxor 20, 20, 20 -; AIX32-NEXT: lwsync -; AIX32-NEXT: b L..BB3_36 -; AIX32-NEXT: L..BB3_35: # %cmpxchg.success83 -; AIX32-NEXT: lwsync -; AIX32-NEXT: creqv 20, 20, 20 -; AIX32-NEXT: L..BB3_36: # %cmpxchg.end80 -; AIX32-NEXT: li 3, 0 -; AIX32-NEXT: li 4, 1 -; AIX32-NEXT: lbz 5, 0(29) -; AIX32-NEXT: isel 3, 4, 3, 20 -; AIX32-NEXT: stw 3, 0(28) -; AIX32-NEXT: lbz 3, 0(30) -; AIX32-NEXT: lwarx 4, 0, 21 -; AIX32-NEXT: srw 6, 4, 20 -; AIX32-NEXT: clrlwi 6, 6, 16 -; AIX32-NEXT: cmplw 6, 3 -; AIX32-NEXT: bne 0, L..BB3_40 -; AIX32-NEXT: # %bb.37: # %cmpxchg.fencedstore53 -; AIX32-NEXT: extsb 5, 5 -; AIX32-NEXT: sync -; AIX32-NEXT: clrlwi 5, 5, 16 -; AIX32-NEXT: slw 5, 5, 20 -; AIX32-NEXT: .align 4 -; AIX32-NEXT: L..BB3_38: # %cmpxchg.trystore52 -; AIX32-NEXT: # =>This Inner Loop Header: Depth=1 -; AIX32-NEXT: and 4, 4, 19 -; AIX32-NEXT: or 4, 4, 5 -; AIX32-NEXT: stwcx. 4, 0, 21 -; AIX32-NEXT: beq 0, L..BB3_41 -; AIX32-NEXT: # %bb.39: # %cmpxchg.releasedload51 -; AIX32-NEXT: # in Loop: Header=BB3_38 Depth=1 -; AIX32-NEXT: lwarx 4, 0, 21 -; AIX32-NEXT: srw 6, 4, 20 -; AIX32-NEXT: clrlwi 6, 6, 16 -; AIX32-NEXT: cmplw 6, 3 -; AIX32-NEXT: beq 0, L..BB3_38 -; AIX32-NEXT: L..BB3_40: # %cmpxchg.nostore49 -; AIX32-NEXT: crxor 20, 20, 20 -; AIX32-NEXT: lwsync -; AIX32-NEXT: b L..BB3_42 -; AIX32-NEXT: L..BB3_41: # %cmpxchg.success50 -; AIX32-NEXT: lwsync -; AIX32-NEXT: creqv 20, 20, 20 -; AIX32-NEXT: L..BB3_42: # %cmpxchg.end47 -; AIX32-NEXT: li 3, 0 -; AIX32-NEXT: li 4, 1 -; AIX32-NEXT: lbz 5, 0(29) -; AIX32-NEXT: isel 3, 4, 3, 20 -; AIX32-NEXT: stw 3, 0(28) -; AIX32-NEXT: lbz 3, 0(30) -; AIX32-NEXT: lwarx 4, 0, 18 -; AIX32-NEXT: srw 6, 4, 17 -; AIX32-NEXT: clrlwi 6, 6, 16 -; AIX32-NEXT: cmplw 6, 3 -; AIX32-NEXT: bne 0, L..BB3_46 -; AIX32-NEXT: # %bb.43: # %cmpxchg.fencedstore29 -; AIX32-NEXT: extsb 5, 5 -; AIX32-NEXT: sync -; AIX32-NEXT: clrlwi 5, 5, 16 -; AIX32-NEXT: slw 5, 5, 17 -; AIX32-NEXT: .align 4 -; AIX32-NEXT: L..BB3_44: # %cmpxchg.trystore28 -; AIX32-NEXT: # =>This Inner Loop Header: Depth=1 -; AIX32-NEXT: and 4, 4, 16 -; AIX32-NEXT: or 4, 4, 5 -; AIX32-NEXT: stwcx. 4, 0, 18 -; AIX32-NEXT: beq 0, L..BB3_47 -; AIX32-NEXT: # %bb.45: # %cmpxchg.releasedload27 -; AIX32-NEXT: # in Loop: Header=BB3_44 Depth=1 -; AIX32-NEXT: lwarx 4, 0, 18 -; AIX32-NEXT: srw 6, 4, 17 -; AIX32-NEXT: clrlwi 6, 6, 16 -; AIX32-NEXT: cmplw 6, 3 -; AIX32-NEXT: beq 0, L..BB3_44 -; AIX32-NEXT: L..BB3_46: # %cmpxchg.nostore25 -; AIX32-NEXT: crxor 20, 20, 20 -; AIX32-NEXT: lwsync -; AIX32-NEXT: b L..BB3_48 -; AIX32-NEXT: L..BB3_47: # %cmpxchg.success26 -; AIX32-NEXT: lwsync -; AIX32-NEXT: creqv 20, 20, 20 -; AIX32-NEXT: L..BB3_48: # %cmpxchg.end23 -; AIX32-NEXT: li 3, 0 -; AIX32-NEXT: li 4, 1 -; AIX32-NEXT: isel 3, 4, 3, 20 -; AIX32-NEXT: lbz 4, 0(29) -; AIX32-NEXT: stw 3, 0(28) -; AIX32-NEXT: lbz 3, 0(30) -; AIX32-NEXT: lwarx 5, 0, 15 -; AIX32-NEXT: cmplw 5, 3 -; AIX32-NEXT: bne 0, L..BB3_52 -; AIX32-NEXT: # %bb.49: # %cmpxchg.fencedstore10 -; AIX32-NEXT: extsb 4, 4 -; AIX32-NEXT: sync -; AIX32-NEXT: .align 5 -; AIX32-NEXT: L..BB3_50: # %cmpxchg.trystore9 -; AIX32-NEXT: # =>This Inner Loop Header: Depth=1 -; AIX32-NEXT: stwcx. 4, 0, 15 -; AIX32-NEXT: beq 0, L..BB3_53 -; AIX32-NEXT: # %bb.51: # %cmpxchg.releasedload8 -; AIX32-NEXT: # in Loop: Header=BB3_50 Depth=1 -; AIX32-NEXT: lwarx 5, 0, 15 -; AIX32-NEXT: cmplw 5, 3 -; AIX32-NEXT: beq 0, L..BB3_50 -; AIX32-NEXT: L..BB3_52: # %cmpxchg.nostore6 -; AIX32-NEXT: crxor 20, 20, 20 -; AIX32-NEXT: lwsync -; AIX32-NEXT: b L..BB3_54 -; AIX32-NEXT: L..BB3_53: # %cmpxchg.success7 -; AIX32-NEXT: lwsync -; AIX32-NEXT: creqv 20, 20, 20 -; AIX32-NEXT: L..BB3_54: # %cmpxchg.end4 -; AIX32-NEXT: li 3, 0 -; AIX32-NEXT: li 4, 1 -; AIX32-NEXT: isel 3, 4, 3, 20 -; AIX32-NEXT: lbz 4, 0(29) -; AIX32-NEXT: stw 3, 0(28) -; AIX32-NEXT: lbz 3, 0(30) -; AIX32-NEXT: lwarx 5, 0, 28 -; AIX32-NEXT: cmplw 5, 3 -; AIX32-NEXT: bne 0, L..BB3_58 -; AIX32-NEXT: # %bb.55: # %cmpxchg.fencedstore -; AIX32-NEXT: extsb 4, 4 -; AIX32-NEXT: sync -; AIX32-NEXT: .align 5 -; AIX32-NEXT: L..BB3_56: # %cmpxchg.trystore -; AIX32-NEXT: # =>This Inner Loop Header: Depth=1 -; AIX32-NEXT: stwcx. 4, 0, 28 -; AIX32-NEXT: beq 0, L..BB3_59 -; AIX32-NEXT: # %bb.57: # %cmpxchg.releasedload -; AIX32-NEXT: # in Loop: Header=BB3_56 Depth=1 -; AIX32-NEXT: lwarx 5, 0, 28 -; AIX32-NEXT: cmplw 5, 3 -; AIX32-NEXT: beq 0, L..BB3_56 -; AIX32-NEXT: L..BB3_58: # %cmpxchg.nostore -; AIX32-NEXT: crxor 20, 20, 20 -; AIX32-NEXT: lwsync -; AIX32-NEXT: b L..BB3_60 -; AIX32-NEXT: L..BB3_59: # %cmpxchg.success -; AIX32-NEXT: lwsync -; AIX32-NEXT: creqv 20, 20, 20 -; AIX32-NEXT: L..BB3_60: # %cmpxchg.end -; AIX32-NEXT: li 3, 1 -; AIX32-NEXT: li 31, 0 -; AIX32-NEXT: lbz 4, 0(29) -; AIX32-NEXT: isel 3, 3, 31, 20 -; AIX32-NEXT: li 7, 5 -; AIX32-NEXT: li 8, 5 -; AIX32-NEXT: extsb 6, 4 -; AIX32-NEXT: stw 3, 0(28) -; AIX32-NEXT: lbz 3, 0(30) -; AIX32-NEXT: addi 4, 1, 64 -; AIX32-NEXT: stw 31, 64(1) -; AIX32-NEXT: srawi 5, 6, 31 -; AIX32-NEXT: stw 3, 68(1) -; AIX32-NEXT: lwz 3, L..C6(2) # @sll -; AIX32-NEXT: bl .__atomic_compare_exchange_8[PR] -; AIX32-NEXT: nop -; AIX32-NEXT: lbz 4, 0(29) -; AIX32-NEXT: stw 3, 0(28) -; AIX32-NEXT: lbz 3, 0(30) -; AIX32-NEXT: li 7, 5 -; AIX32-NEXT: li 8, 5 -; AIX32-NEXT: extsb 6, 4 -; AIX32-NEXT: stw 3, 68(1) -; AIX32-NEXT: lwz 3, L..C7(2) # @ull -; AIX32-NEXT: addi 4, 1, 64 -; AIX32-NEXT: stw 31, 64(1) -; AIX32-NEXT: srawi 5, 6, 31 -; AIX32-NEXT: bl .__atomic_compare_exchange_8[PR] -; AIX32-NEXT: nop -; AIX32-NEXT: stw 3, 0(28) -; AIX32-NEXT: lwz 31, 140(1) # 4-byte Folded Reload -; AIX32-NEXT: lwz 30, 136(1) # 4-byte Folded Reload -; AIX32-NEXT: lwz 29, 132(1) # 4-byte Folded Reload -; AIX32-NEXT: lwz 28, 128(1) # 4-byte Folded Reload -; AIX32-NEXT: lwz 27, 124(1) # 4-byte Folded Reload -; AIX32-NEXT: lwz 26, 120(1) # 4-byte Folded Reload -; AIX32-NEXT: lwz 25, 116(1) # 4-byte Folded Reload -; AIX32-NEXT: lwz 24, 112(1) # 4-byte Folded Reload -; AIX32-NEXT: lwz 23, 108(1) # 4-byte Folded Reload -; AIX32-NEXT: lwz 22, 104(1) # 4-byte Folded Reload -; AIX32-NEXT: lwz 21, 100(1) # 4-byte Folded Reload -; AIX32-NEXT: lwz 20, 96(1) # 4-byte Folded Reload -; AIX32-NEXT: lwz 19, 92(1) # 4-byte Folded Reload -; AIX32-NEXT: lwz 18, 88(1) # 4-byte Folded Reload -; AIX32-NEXT: lwz 17, 84(1) # 4-byte Folded Reload -; AIX32-NEXT: lwz 16, 80(1) # 4-byte Folded Reload -; AIX32-NEXT: lwz 15, 76(1) # 4-byte Folded Reload -; AIX32-NEXT: lwz 14, 72(1) # 4-byte Folded Reload -; AIX32-NEXT: addi 1, 1, 144 -; AIX32-NEXT: lwz 0, 8(1) -; AIX32-NEXT: mtlr 0 +; AIX32-NEXT: stwu 1, -144(1) +; AIX32-NEXT: stw 0, 152(1) +; AIX32-NEXT: stw 29, 132(1) # 4-byte Folded Spill +; AIX32-NEXT: lwz 29, L..C0(2) # @sc +; AIX32-NEXT: stw 26, 120(1) # 4-byte Folded Spill +; AIX32-NEXT: not 3, 29 +; AIX32-NEXT: stw 30, 136(1) # 4-byte Folded Spill +; AIX32-NEXT: lwz 30, L..C1(2) # @uc +; AIX32-NEXT: lbz 4, 0(30) +; AIX32-NEXT: lbz 5, 0(29) +; AIX32-NEXT: stw 27, 124(1) # 4-byte Folded Spill +; AIX32-NEXT: rlwinm 27, 29, 0, 0, 29 +; AIX32-NEXT: stw 14, 72(1) # 4-byte Folded Spill +; AIX32-NEXT: stw 15, 76(1) # 4-byte Folded Spill +; AIX32-NEXT: rlwinm 26, 3, 3, 27, 28 +; AIX32-NEXT: li 3, 255 +; AIX32-NEXT: slw 3, 3, 26 +; AIX32-NEXT: stw 16, 80(1) # 4-byte Folded Spill +; AIX32-NEXT: stw 17, 84(1) # 4-byte Folded Spill +; AIX32-NEXT: stw 18, 88(1) # 4-byte Folded Spill +; AIX32-NEXT: stw 19, 92(1) # 4-byte Folded Spill +; AIX32-NEXT: stw 20, 96(1) # 4-byte Folded Spill +; AIX32-NEXT: stw 21, 100(1) # 4-byte Folded Spill +; AIX32-NEXT: stw 22, 104(1) # 4-byte Folded Spill +; AIX32-NEXT: stw 23, 108(1) # 4-byte Folded Spill +; AIX32-NEXT: stw 24, 112(1) # 4-byte Folded Spill +; AIX32-NEXT: stw 25, 116(1) # 4-byte Folded Spill +; AIX32-NEXT: stw 28, 128(1) # 4-byte Folded Spill +; AIX32-NEXT: stw 31, 140(1) # 4-byte Folded Spill +; AIX32-NEXT: not 25, 3 +; AIX32-NEXT: lwarx 3, 0, 27 +; AIX32-NEXT: srw 6, 3, 26 +; AIX32-NEXT: clrlwi 6, 6, 24 +; AIX32-NEXT: cmplw 6, 4 +; AIX32-NEXT: bne 0, L..BB3_4 +; AIX32-NEXT: # %bb.1: # %cmpxchg.fencedstore289 +; AIX32-NEXT: sync +; AIX32-NEXT: slw 5, 5, 26 +; AIX32-NEXT: .align 4 +; AIX32-NEXT: L..BB3_2: # %cmpxchg.trystore288 +; AIX32-NEXT: # +; AIX32-NEXT: and 6, 3, 25 +; AIX32-NEXT: or 6, 6, 5 +; AIX32-NEXT: stwcx. 6, 0, 27 +; AIX32-NEXT: beq 0, L..BB3_4 +; AIX32-NEXT: # %bb.3: # %cmpxchg.releasedload287 +; AIX32-NEXT: # +; AIX32-NEXT: lwarx 3, 0, 27 +; AIX32-NEXT: srw 6, 3, 26 +; AIX32-NEXT: clrlwi 6, 6, 24 +; AIX32-NEXT: cmplw 6, 4 +; AIX32-NEXT: beq 0, L..BB3_2 +; AIX32-NEXT: L..BB3_4: # %cmpxchg.nostore285 +; AIX32-NEXT: not 4, 30 +; AIX32-NEXT: srw 5, 3, 26 +; AIX32-NEXT: lwsync +; AIX32-NEXT: lbz 3, 0(30) +; AIX32-NEXT: rlwinm 24, 30, 0, 0, 29 +; AIX32-NEXT: rlwinm 23, 4, 3, 27, 28 +; AIX32-NEXT: li 4, 255 +; AIX32-NEXT: stb 5, 0(29) +; AIX32-NEXT: slw 4, 4, 23 +; AIX32-NEXT: not 22, 4 +; AIX32-NEXT: lwarx 4, 0, 24 +; AIX32-NEXT: srw 6, 4, 23 +; AIX32-NEXT: clrlwi 6, 6, 24 +; AIX32-NEXT: cmplw 6, 3 +; AIX32-NEXT: bne 0, L..BB3_8 +; AIX32-NEXT: # %bb.5: # %cmpxchg.fencedstore256 +; AIX32-NEXT: clrlwi 5, 5, 24 +; AIX32-NEXT: sync +; AIX32-NEXT: slw 5, 5, 23 +; AIX32-NEXT: .align 4 +; AIX32-NEXT: L..BB3_6: # %cmpxchg.trystore255 +; AIX32-NEXT: # +; AIX32-NEXT: and 6, 4, 22 +; AIX32-NEXT: or 6, 6, 5 +; AIX32-NEXT: stwcx. 6, 0, 24 +; AIX32-NEXT: beq 0, L..BB3_8 +; AIX32-NEXT: # %bb.7: # %cmpxchg.releasedload254 +; AIX32-NEXT: # +; AIX32-NEXT: lwarx 4, 0, 24 +; AIX32-NEXT: srw 6, 4, 23 +; AIX32-NEXT: clrlwi 6, 6, 24 +; AIX32-NEXT: cmplw 6, 3 +; AIX32-NEXT: beq 0, L..BB3_6 +; AIX32-NEXT: L..BB3_8: # %cmpxchg.nostore252 +; AIX32-NEXT: srw 4, 4, 23 +; AIX32-NEXT: lwsync +; AIX32-NEXT: lis 3, 0 +; AIX32-NEXT: lbz 7, 0(29) +; AIX32-NEXT: stb 4, 0(30) +; AIX32-NEXT: clrlwi 6, 4, 24 +; AIX32-NEXT: lwz 4, L..C2(2) # @ss +; AIX32-NEXT: ori 3, 3, 65535 +; AIX32-NEXT: clrlwi 5, 4, 30 +; AIX32-NEXT: rlwinm 21, 4, 0, 0, 29 +; AIX32-NEXT: xori 5, 5, 2 +; AIX32-NEXT: slwi 20, 5, 3 +; AIX32-NEXT: slw 5, 3, 20 +; AIX32-NEXT: not 19, 5 +; AIX32-NEXT: lwarx 5, 0, 21 +; AIX32-NEXT: srw 8, 5, 20 +; AIX32-NEXT: clrlwi 8, 8, 16 +; AIX32-NEXT: cmplw 8, 6 +; AIX32-NEXT: bne 0, L..BB3_12 +; AIX32-NEXT: # %bb.9: # %cmpxchg.fencedstore223 +; AIX32-NEXT: extsb 7, 7 +; AIX32-NEXT: sync +; AIX32-NEXT: clrlwi 7, 7, 16 +; AIX32-NEXT: slw 7, 7, 20 +; AIX32-NEXT: .align 4 +; AIX32-NEXT: L..BB3_10: # %cmpxchg.trystore222 +; AIX32-NEXT: # +; AIX32-NEXT: and 8, 5, 19 +; AIX32-NEXT: or 8, 8, 7 +; AIX32-NEXT: stwcx. 8, 0, 21 +; AIX32-NEXT: beq 0, L..BB3_12 +; AIX32-NEXT: # %bb.11: # %cmpxchg.releasedload221 +; AIX32-NEXT: # +; AIX32-NEXT: lwarx 5, 0, 21 +; AIX32-NEXT: srw 8, 5, 20 +; AIX32-NEXT: clrlwi 8, 8, 16 +; AIX32-NEXT: cmplw 8, 6 +; AIX32-NEXT: beq 0, L..BB3_10 +; AIX32-NEXT: L..BB3_12: # %cmpxchg.nostore219 +; AIX32-NEXT: srw 5, 5, 20 +; AIX32-NEXT: lwsync +; AIX32-NEXT: lbz 6, 0(29) +; AIX32-NEXT: sth 5, 0(4) +; AIX32-NEXT: lwz 4, L..C3(2) # @us +; AIX32-NEXT: lbz 5, 0(30) +; AIX32-NEXT: clrlwi 7, 4, 30 +; AIX32-NEXT: rlwinm 18, 4, 0, 0, 29 +; AIX32-NEXT: xori 7, 7, 2 +; AIX32-NEXT: slwi 17, 7, 3 +; AIX32-NEXT: slw 3, 3, 17 +; AIX32-NEXT: not 16, 3 +; AIX32-NEXT: lwarx 3, 0, 18 +; AIX32-NEXT: srw 7, 3, 17 +; AIX32-NEXT: clrlwi 7, 7, 16 +; AIX32-NEXT: cmplw 7, 5 +; AIX32-NEXT: bne 0, L..BB3_16 +; AIX32-NEXT: # %bb.13: # %cmpxchg.fencedstore190 +; AIX32-NEXT: extsb 6, 6 +; AIX32-NEXT: sync +; AIX32-NEXT: clrlwi 6, 6, 16 +; AIX32-NEXT: slw 6, 6, 17 +; AIX32-NEXT: .align 4 +; AIX32-NEXT: L..BB3_14: # %cmpxchg.trystore189 +; AIX32-NEXT: # +; AIX32-NEXT: and 7, 3, 16 +; AIX32-NEXT: or 7, 7, 6 +; AIX32-NEXT: stwcx. 7, 0, 18 +; AIX32-NEXT: beq 0, L..BB3_16 +; AIX32-NEXT: # %bb.15: # %cmpxchg.releasedload188 +; AIX32-NEXT: # +; AIX32-NEXT: lwarx 3, 0, 18 +; AIX32-NEXT: srw 7, 3, 17 +; AIX32-NEXT: clrlwi 7, 7, 16 +; AIX32-NEXT: cmplw 7, 5 +; AIX32-NEXT: beq 0, L..BB3_14 +; AIX32-NEXT: L..BB3_16: # %cmpxchg.nostore186 +; AIX32-NEXT: srw 3, 3, 17 +; AIX32-NEXT: lwsync +; AIX32-NEXT: lwz 15, L..C4(2) # @si +; AIX32-NEXT: lbz 5, 0(29) +; AIX32-NEXT: sth 3, 0(4) +; AIX32-NEXT: lbz 4, 0(30) +; AIX32-NEXT: lwarx 3, 0, 15 +; AIX32-NEXT: cmplw 3, 4 +; AIX32-NEXT: bne 0, L..BB3_20 +; AIX32-NEXT: # %bb.17: # %cmpxchg.fencedstore171 +; AIX32-NEXT: extsb 5, 5 +; AIX32-NEXT: sync +; AIX32-NEXT: .align 5 +; AIX32-NEXT: L..BB3_18: # %cmpxchg.trystore170 +; AIX32-NEXT: # +; AIX32-NEXT: stwcx. 5, 0, 15 +; AIX32-NEXT: beq 0, L..BB3_20 +; AIX32-NEXT: # %bb.19: # %cmpxchg.releasedload169 +; AIX32-NEXT: # +; AIX32-NEXT: lwarx 3, 0, 15 +; AIX32-NEXT: cmplw 3, 4 +; AIX32-NEXT: beq 0, L..BB3_18 +; AIX32-NEXT: L..BB3_20: # %cmpxchg.nostore167 +; AIX32-NEXT: lwsync +; AIX32-NEXT: lwz 28, L..C5(2) # @ui +; AIX32-NEXT: stw 3, 0(15) +; AIX32-NEXT: lbz 4, 0(30) +; AIX32-NEXT: lbz 5, 0(29) +; AIX32-NEXT: lwarx 3, 0, 28 +; AIX32-NEXT: cmplw 3, 4 +; AIX32-NEXT: bne 0, L..BB3_24 +; AIX32-NEXT: # %bb.21: # %cmpxchg.fencedstore152 +; AIX32-NEXT: extsb 5, 5 +; AIX32-NEXT: sync +; AIX32-NEXT: .align 5 +; AIX32-NEXT: L..BB3_22: # %cmpxchg.trystore151 +; AIX32-NEXT: # +; AIX32-NEXT: stwcx. 5, 0, 28 +; AIX32-NEXT: beq 0, L..BB3_24 +; AIX32-NEXT: # %bb.23: # %cmpxchg.releasedload150 +; AIX32-NEXT: # +; AIX32-NEXT: lwarx 3, 0, 28 +; AIX32-NEXT: cmplw 3, 4 +; AIX32-NEXT: beq 0, L..BB3_22 +; AIX32-NEXT: L..BB3_24: # %cmpxchg.nostore148 +; AIX32-NEXT: lwsync +; AIX32-NEXT: stw 3, 0(28) +; AIX32-NEXT: lwz 31, L..C6(2) # @sll +; AIX32-NEXT: lbz 3, 0(29) +; AIX32-NEXT: li 14, 0 +; AIX32-NEXT: addi 4, 1, 64 +; AIX32-NEXT: li 7, 5 +; AIX32-NEXT: li 8, 5 +; AIX32-NEXT: stw 14, 64(1) +; AIX32-NEXT: extsb 6, 3 +; AIX32-NEXT: lbz 3, 0(30) +; AIX32-NEXT: srawi 5, 6, 31 +; AIX32-NEXT: stw 3, 68(1) +; AIX32-NEXT: mr 3, 31 +; AIX32-NEXT: bl .__atomic_compare_exchange_8[PR] +; AIX32-NEXT: nop +; AIX32-NEXT: lwz 3, 68(1) +; AIX32-NEXT: lbz 4, 0(29) +; AIX32-NEXT: li 7, 5 +; AIX32-NEXT: li 8, 5 +; AIX32-NEXT: stw 3, 4(31) +; AIX32-NEXT: lwz 3, 64(1) +; AIX32-NEXT: extsb 6, 4 +; AIX32-NEXT: addi 4, 1, 64 +; AIX32-NEXT: stw 14, 64(1) +; AIX32-NEXT: srawi 5, 6, 31 +; AIX32-NEXT: stw 3, 0(31) +; AIX32-NEXT: lbz 3, 0(30) +; AIX32-NEXT: lwz 31, L..C7(2) # @ull +; AIX32-NEXT: stw 3, 68(1) +; AIX32-NEXT: mr 3, 31 +; AIX32-NEXT: bl .__atomic_compare_exchange_8[PR] +; AIX32-NEXT: nop +; AIX32-NEXT: lwz 3, 64(1) +; AIX32-NEXT: lwz 4, 68(1) +; AIX32-NEXT: lbz 5, 0(29) +; AIX32-NEXT: stw 4, 4(31) +; AIX32-NEXT: stw 3, 0(31) +; AIX32-NEXT: lbz 3, 0(30) +; AIX32-NEXT: lwarx 4, 0, 27 +; AIX32-NEXT: srw 6, 4, 26 +; AIX32-NEXT: clrlwi 6, 6, 24 +; AIX32-NEXT: cmplw 6, 3 +; AIX32-NEXT: bne 0, L..BB3_28 +; AIX32-NEXT: # %bb.25: # %cmpxchg.fencedstore119 +; AIX32-NEXT: sync +; AIX32-NEXT: slw 5, 5, 26 +; AIX32-NEXT: .align 4 +; AIX32-NEXT: L..BB3_26: # %cmpxchg.trystore118 +; AIX32-NEXT: # +; AIX32-NEXT: and 4, 4, 25 +; AIX32-NEXT: or 4, 4, 5 +; AIX32-NEXT: stwcx. 4, 0, 27 +; AIX32-NEXT: beq 0, L..BB3_29 +; AIX32-NEXT: # %bb.27: # %cmpxchg.releasedload117 +; AIX32-NEXT: # +; AIX32-NEXT: lwarx 4, 0, 27 +; AIX32-NEXT: srw 6, 4, 26 +; AIX32-NEXT: clrlwi 6, 6, 24 +; AIX32-NEXT: cmplw 6, 3 +; AIX32-NEXT: beq 0, L..BB3_26 +; AIX32-NEXT: L..BB3_28: # %cmpxchg.nostore115 +; AIX32-NEXT: crxor 20, 20, 20 +; AIX32-NEXT: lwsync +; AIX32-NEXT: b L..BB3_30 +; AIX32-NEXT: L..BB3_29: # %cmpxchg.success116 +; AIX32-NEXT: lwsync +; AIX32-NEXT: creqv 20, 20, 20 +; AIX32-NEXT: L..BB3_30: # %cmpxchg.end113 +; AIX32-NEXT: li 3, 0 +; AIX32-NEXT: li 4, 1 +; AIX32-NEXT: lbz 5, 0(29) +; AIX32-NEXT: isel 3, 4, 3, 20 +; AIX32-NEXT: stw 3, 0(28) +; AIX32-NEXT: lbz 3, 0(30) +; AIX32-NEXT: lwarx 4, 0, 24 +; AIX32-NEXT: srw 6, 4, 23 +; AIX32-NEXT: clrlwi 6, 6, 24 +; AIX32-NEXT: cmplw 6, 3 +; AIX32-NEXT: bne 0, L..BB3_34 +; AIX32-NEXT: # %bb.31: # %cmpxchg.fencedstore86 +; AIX32-NEXT: sync +; AIX32-NEXT: slw 5, 5, 23 +; AIX32-NEXT: .align 4 +; AIX32-NEXT: L..BB3_32: # %cmpxchg.trystore85 +; AIX32-NEXT: # +; AIX32-NEXT: and 4, 4, 22 +; AIX32-NEXT: or 4, 4, 5 +; AIX32-NEXT: stwcx. 4, 0, 24 +; AIX32-NEXT: beq 0, L..BB3_35 +; AIX32-NEXT: # %bb.33: # %cmpxchg.releasedload84 +; AIX32-NEXT: # +; AIX32-NEXT: lwarx 4, 0, 24 +; AIX32-NEXT: srw 6, 4, 23 +; AIX32-NEXT: clrlwi 6, 6, 24 +; AIX32-NEXT: cmplw 6, 3 +; AIX32-NEXT: beq 0, L..BB3_32 +; AIX32-NEXT: L..BB3_34: # %cmpxchg.nostore82 +; AIX32-NEXT: crxor 20, 20, 20 +; AIX32-NEXT: lwsync +; AIX32-NEXT: b L..BB3_36 +; AIX32-NEXT: L..BB3_35: # %cmpxchg.success83 +; AIX32-NEXT: lwsync +; AIX32-NEXT: creqv 20, 20, 20 +; AIX32-NEXT: L..BB3_36: # %cmpxchg.end80 +; AIX32-NEXT: li 3, 0 +; AIX32-NEXT: li 4, 1 +; AIX32-NEXT: lbz 5, 0(29) +; AIX32-NEXT: isel 3, 4, 3, 20 +; AIX32-NEXT: stw 3, 0(28) +; AIX32-NEXT: lbz 3, 0(30) +; AIX32-NEXT: lwarx 4, 0, 21 +; AIX32-NEXT: srw 6, 4, 20 +; AIX32-NEXT: clrlwi 6, 6, 16 +; AIX32-NEXT: cmplw 6, 3 +; AIX32-NEXT: bne 0, L..BB3_40 +; AIX32-NEXT: # %bb.37: # %cmpxchg.fencedstore53 +; AIX32-NEXT: extsb 5, 5 +; AIX32-NEXT: sync +; AIX32-NEXT: clrlwi 5, 5, 16 +; AIX32-NEXT: slw 5, 5, 20 +; AIX32-NEXT: .align 4 +; AIX32-NEXT: L..BB3_38: # %cmpxchg.trystore52 +; AIX32-NEXT: # +; AIX32-NEXT: and 4, 4, 19 +; AIX32-NEXT: or 4, 4, 5 +; AIX32-NEXT: stwcx. 4, 0, 21 +; AIX32-NEXT: beq 0, L..BB3_41 +; AIX32-NEXT: # %bb.39: # %cmpxchg.releasedload51 +; AIX32-NEXT: # +; AIX32-NEXT: lwarx 4, 0, 21 +; AIX32-NEXT: srw 6, 4, 20 +; AIX32-NEXT: clrlwi 6, 6, 16 +; AIX32-NEXT: cmplw 6, 3 +; AIX32-NEXT: beq 0, L..BB3_38 +; AIX32-NEXT: L..BB3_40: # %cmpxchg.nostore49 +; AIX32-NEXT: crxor 20, 20, 20 +; AIX32-NEXT: lwsync +; AIX32-NEXT: b L..BB3_42 +; AIX32-NEXT: L..BB3_41: # %cmpxchg.success50 +; AIX32-NEXT: lwsync +; AIX32-NEXT: creqv 20, 20, 20 +; AIX32-NEXT: L..BB3_42: # %cmpxchg.end47 +; AIX32-NEXT: li 3, 0 +; AIX32-NEXT: li 4, 1 +; AIX32-NEXT: lbz 5, 0(29) +; AIX32-NEXT: isel 3, 4, 3, 20 +; AIX32-NEXT: stw 3, 0(28) +; AIX32-NEXT: lbz 3, 0(30) +; AIX32-NEXT: lwarx 4, 0, 18 +; AIX32-NEXT: srw 6, 4, 17 +; AIX32-NEXT: clrlwi 6, 6, 16 +; AIX32-NEXT: cmplw 6, 3 +; AIX32-NEXT: bne 0, L..BB3_46 +; AIX32-NEXT: # %bb.43: # %cmpxchg.fencedstore29 +; AIX32-NEXT: extsb 5, 5 +; AIX32-NEXT: sync +; AIX32-NEXT: clrlwi 5, 5, 16 +; AIX32-NEXT: slw 5, 5, 17 +; AIX32-NEXT: .align 4 +; AIX32-NEXT: L..BB3_44: # %cmpxchg.trystore28 +; AIX32-NEXT: # +; AIX32-NEXT: and 4, 4, 16 +; AIX32-NEXT: or 4, 4, 5 +; AIX32-NEXT: stwcx. 4, 0, 18 +; AIX32-NEXT: beq 0, L..BB3_47 +; AIX32-NEXT: # %bb.45: # %cmpxchg.releasedload27 +; AIX32-NEXT: # +; AIX32-NEXT: lwarx 4, 0, 18 +; AIX32-NEXT: srw 6, 4, 17 +; AIX32-NEXT: clrlwi 6, 6, 16 +; AIX32-NEXT: cmplw 6, 3 +; AIX32-NEXT: beq 0, L..BB3_44 +; AIX32-NEXT: L..BB3_46: # %cmpxchg.nostore25 +; AIX32-NEXT: crxor 20, 20, 20 +; AIX32-NEXT: lwsync +; AIX32-NEXT: b L..BB3_48 +; AIX32-NEXT: L..BB3_47: # %cmpxchg.success26 +; AIX32-NEXT: lwsync +; AIX32-NEXT: creqv 20, 20, 20 +; AIX32-NEXT: L..BB3_48: # %cmpxchg.end23 +; AIX32-NEXT: li 3, 0 +; AIX32-NEXT: li 4, 1 +; AIX32-NEXT: isel 3, 4, 3, 20 +; AIX32-NEXT: lbz 4, 0(29) +; AIX32-NEXT: stw 3, 0(28) +; AIX32-NEXT: lbz 3, 0(30) +; AIX32-NEXT: lwarx 5, 0, 15 +; AIX32-NEXT: cmplw 5, 3 +; AIX32-NEXT: bne 0, L..BB3_52 +; AIX32-NEXT: # %bb.49: # %cmpxchg.fencedstore10 +; AIX32-NEXT: extsb 4, 4 +; AIX32-NEXT: sync +; AIX32-NEXT: .align 5 +; AIX32-NEXT: L..BB3_50: # %cmpxchg.trystore9 +; AIX32-NEXT: # +; AIX32-NEXT: stwcx. 4, 0, 15 +; AIX32-NEXT: beq 0, L..BB3_53 +; AIX32-NEXT: # %bb.51: # %cmpxchg.releasedload8 +; AIX32-NEXT: # +; AIX32-NEXT: lwarx 5, 0, 15 +; AIX32-NEXT: cmplw 5, 3 +; AIX32-NEXT: beq 0, L..BB3_50 +; AIX32-NEXT: L..BB3_52: # %cmpxchg.nostore6 +; AIX32-NEXT: crxor 20, 20, 20 +; AIX32-NEXT: lwsync +; AIX32-NEXT: b L..BB3_54 +; AIX32-NEXT: L..BB3_53: # %cmpxchg.success7 +; AIX32-NEXT: lwsync +; AIX32-NEXT: creqv 20, 20, 20 +; AIX32-NEXT: L..BB3_54: # %cmpxchg.end4 +; AIX32-NEXT: li 3, 0 +; AIX32-NEXT: li 4, 1 +; AIX32-NEXT: isel 3, 4, 3, 20 +; AIX32-NEXT: lbz 4, 0(29) +; AIX32-NEXT: stw 3, 0(28) +; AIX32-NEXT: lbz 3, 0(30) +; AIX32-NEXT: lwarx 5, 0, 28 +; AIX32-NEXT: cmplw 5, 3 +; AIX32-NEXT: bne 0, L..BB3_58 +; AIX32-NEXT: # %bb.55: # %cmpxchg.fencedstore +; AIX32-NEXT: extsb 4, 4 +; AIX32-NEXT: sync +; AIX32-NEXT: .align 5 +; AIX32-NEXT: L..BB3_56: # %cmpxchg.trystore +; AIX32-NEXT: # +; AIX32-NEXT: stwcx. 4, 0, 28 +; AIX32-NEXT: beq 0, L..BB3_59 +; AIX32-NEXT: # %bb.57: # %cmpxchg.releasedload +; AIX32-NEXT: # +; AIX32-NEXT: lwarx 5, 0, 28 +; AIX32-NEXT: cmplw 5, 3 +; AIX32-NEXT: beq 0, L..BB3_56 +; AIX32-NEXT: L..BB3_58: # %cmpxchg.nostore +; AIX32-NEXT: crxor 20, 20, 20 +; AIX32-NEXT: lwsync +; AIX32-NEXT: b L..BB3_60 +; AIX32-NEXT: L..BB3_59: # %cmpxchg.success +; AIX32-NEXT: lwsync +; AIX32-NEXT: creqv 20, 20, 20 +; AIX32-NEXT: L..BB3_60: # %cmpxchg.end +; AIX32-NEXT: li 3, 1 +; AIX32-NEXT: li 31, 0 +; AIX32-NEXT: lbz 4, 0(29) +; AIX32-NEXT: isel 3, 3, 31, 20 +; AIX32-NEXT: li 7, 5 +; AIX32-NEXT: li 8, 5 +; AIX32-NEXT: extsb 6, 4 +; AIX32-NEXT: stw 3, 0(28) +; AIX32-NEXT: lbz 3, 0(30) +; AIX32-NEXT: addi 4, 1, 64 +; AIX32-NEXT: stw 31, 64(1) +; AIX32-NEXT: srawi 5, 6, 31 +; AIX32-NEXT: stw 3, 68(1) +; AIX32-NEXT: lwz 3, L..C6(2) # @sll +; AIX32-NEXT: bl .__atomic_compare_exchange_8[PR] +; AIX32-NEXT: nop +; AIX32-NEXT: lbz 4, 0(29) +; AIX32-NEXT: stw 3, 0(28) +; AIX32-NEXT: lbz 3, 0(30) +; AIX32-NEXT: li 7, 5 +; AIX32-NEXT: li 8, 5 +; AIX32-NEXT: extsb 6, 4 +; AIX32-NEXT: stw 3, 68(1) +; AIX32-NEXT: lwz 3, L..C7(2) # @ull +; AIX32-NEXT: addi 4, 1, 64 +; AIX32-NEXT: stw 31, 64(1) +; AIX32-NEXT: srawi 5, 6, 31 +; AIX32-NEXT: bl .__atomic_compare_exchange_8[PR] +; AIX32-NEXT: nop +; AIX32-NEXT: stw 3, 0(28) +; AIX32-NEXT: lwz 31, 140(1) # 4-byte Folded Reload +; AIX32-NEXT: lwz 30, 136(1) # 4-byte Folded Reload +; AIX32-NEXT: lwz 29, 132(1) # 4-byte Folded Reload +; AIX32-NEXT: lwz 28, 128(1) # 4-byte Folded Reload +; AIX32-NEXT: lwz 27, 124(1) # 4-byte Folded Reload +; AIX32-NEXT: lwz 26, 120(1) # 4-byte Folded Reload +; AIX32-NEXT: lwz 25, 116(1) # 4-byte Folded Reload +; AIX32-NEXT: lwz 24, 112(1) # 4-byte Folded Reload +; AIX32-NEXT: lwz 23, 108(1) # 4-byte Folded Reload +; AIX32-NEXT: lwz 22, 104(1) # 4-byte Folded Reload +; AIX32-NEXT: lwz 21, 100(1) # 4-byte Folded Reload +; AIX32-NEXT: lwz 20, 96(1) # 4-byte Folded Reload +; AIX32-NEXT: lwz 19, 92(1) # 4-byte Folded Reload +; AIX32-NEXT: lwz 18, 88(1) # 4-byte Folded Reload +; AIX32-NEXT: lwz 17, 84(1) # 4-byte Folded Reload +; AIX32-NEXT: lwz 16, 80(1) # 4-byte Folded Reload +; AIX32-NEXT: lwz 15, 76(1) # 4-byte Folded Reload +; AIX32-NEXT: lwz 14, 72(1) # 4-byte Folded Reload +; AIX32-NEXT: addi 1, 1, 144 +; AIX32-NEXT: lwz 0, 8(1) +; AIX32-NEXT: mtlr 0 ; AIX32-NEXT: blr entry: %0 = load i8, ptr @uc, align 1 @@ -5852,20 +5852,23 @@ entry: define dso_local i64 @cmpswplp(ptr noundef %ptr, ptr nocapture noundef readnone %oldval, i64 noundef %newval) local_unnamed_addr #0 { ; CHECK-LABEL: cmpswplp: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: ldarx 4, 0, 3 -; CHECK-NEXT: cmpld 4, 5 -; CHECK-NEXT: bne 0, .LBB6_2 -; CHECK-NEXT: # %bb.1: # %cmpxchg.fencedstore -; CHECK-NEXT: addi 4, 5, 1 -; CHECK-NEXT: stdcx. 4, 0, 3 -; CHECK-NEXT: beq 0, .LBB6_4 -; CHECK-NEXT: .LBB6_2: # %cmpxchg.failure -; CHECK-NEXT: crxor 20, 20, 20 -; CHECK-NEXT: .LBB6_3: # %cmpxchg.end -; CHECK-NEXT: li 3, 66 -; CHECK-NEXT: li 4, 55 -; CHECK-NEXT: isel 3, 4, 3, 20 -; CHECK-NEXT: blr +; CHECK-NEXT: ldarx 4, 0, 3 +; CHECK-NEXT: cmpld 4, 5 +; CHECK-NEXT: bne 0, .LBB6_2 +; CHECK-NEXT: # %bb.1: # %cmpxchg.fencedstore +; CHECK-NEXT: addi 4, 5, 1 +; CHECK-NEXT: stdcx. 4, 0, 3 +; CHECK-NEXT: beq 0, .LBB6_4 +; CHECK-NEXT: .LBB6_2: # %cmpxchg.failure +; CHECK-NEXT: crxor 20, 20, 20 +; CHECK-NEXT: .LBB6_3: # %cmpxchg.end +; CHECK-NEXT: li 3, 66 +; CHECK-NEXT: li 4, 55 +; CHECK-NEXT: isel 3, 4, 3, 20 +; CHECK-NEXT: blr +; CHECK-NEXT: .LBB6_4: +; CHECK-NEXT: creqv 20, 20, 20 +; CHECK-NEXT: b .LBB6_3 ; ; AIX32-LABEL: cmpswplp: ; AIX32: # %bb.0: # %entry diff --git a/llvm/test/CodeGen/PowerPC/loop-comment.ll b/llvm/test/CodeGen/PowerPC/loop-comment.ll index 1fa9dda51ef9..34b29cbe901e 100644 --- a/llvm/test/CodeGen/PowerPC/loop-comment.ll +++ b/llvm/test/CodeGen/PowerPC/loop-comment.ll @@ -6,18 +6,18 @@ define void @test(ptr %ptr, i8 %cmp, i8 %val) { ; PPC64LE: # %bb.0: ; PPC64LE-NEXT: clrlwi 5, 5, 24 ; PPC64LE-NEXT: clrlwi 4, 4, 24 -; PPC64LE-NEXT: .p2align 5 -; PPC64LE-NEXT: .LBB0_1: # %cmpxchg.start -; PPC64LE-NEXT: # =>This Inner Loop Header: Depth=1 +; PPC64LE-NEXT: .p2align 5 +; PPC64LE-NEXT: .LBB0_1: # %cmpxchg.start +; PPC64LE-NEXT: # ; PPC64LE-NEXT: lbarx 6, 0, 3 -; PPC64LE-NEXT: clrlwi 6, 6, 24 +; PPC64LE-NEXT: clrlwi 6, 6, 24 ; PPC64LE-NEXT: cmplw 6, 4 ; PPC64LE-NEXT: bnelr 0 -; PPC64LE-NEXT: # %bb.2: -; PPC64LE-NEXT: # in Loop: Header=BB0_1 Depth=1 +; PPC64LE-NEXT: # %bb.2: # %cmpxchg.fencedstore +; PPC64LE-NEXT: # ; PPC64LE-NEXT: stbcx. 5, 0, 3 ; PPC64LE-NEXT: bne 0, .LBB0_1 -; PPC64LE-NEXT: # %bb.3: +; PPC64LE-NEXT: # %bb.3: # %cmpxchg.end ; PPC64LE-NEXT: blr %res = cmpxchg ptr %ptr, i8 %cmp, i8 %val monotonic monotonic ret void -- cgit v1.2.3 From 17f5b8b52a3552de1143efb42af6a94d47d8c7fd Mon Sep 17 00:00:00 2001 From: Andre Kuhlenschmidt Date: Wed, 18 Jun 2025 11:21:35 -0700 Subject: [flang][driver] add ability to look up feature flags without setting them (#144559) This just adds some convenience methods to feature control and rewrites old code in terms of those methods. Also cleans up some names that I just realize were overloads of another method. --- flang/include/flang/Support/Fortran-features.h | 14 +++++++++++++- flang/lib/Frontend/CompilerInvocation.cpp | 2 +- flang/lib/Support/Fortran-features.cpp | 23 ++++++++++++----------- 3 files changed, 26 insertions(+), 13 deletions(-) diff --git a/flang/include/flang/Support/Fortran-features.h b/flang/include/flang/Support/Fortran-features.h index ea0845b7d605..39356daa3606 100644 --- a/flang/include/flang/Support/Fortran-features.h +++ b/flang/include/flang/Support/Fortran-features.h @@ -81,6 +81,9 @@ ENUM_CLASS(UsageWarning, Portability, PointerToUndefinable, using LanguageFeatures = EnumSet; using UsageWarnings = EnumSet; +using LanguageFeatureOrWarning = std::variant; +using LanguageControlFlag = + std::pair; class LanguageFeatureControl { public: @@ -94,6 +97,13 @@ public: void EnableWarning(UsageWarning w, bool yes = true) { warnUsage_.set(w, yes); } + void EnableWarning(LanguageFeatureOrWarning flag, bool yes = true) { + if (std::holds_alternative(flag)) { + EnableWarning(std::get(flag), yes); + } else { + EnableWarning(std::get(flag), yes); + } + } void WarnOnAllNonstandard(bool yes = true); bool IsWarnOnAllNonstandard() const { return warnAllLanguage_; } void WarnOnAllUsage(bool yes = true); @@ -116,9 +126,11 @@ public: bool ShouldWarn(LanguageFeature f) const { return warnLanguage_.test(f); } bool ShouldWarn(UsageWarning w) const { return warnUsage_.test(w); } // Cli options + // Find a warning by its Cli spelling, i.e. '[no-]warning-name'. + std::optional FindWarning(std::string_view input); // Take a string from the Cli and apply it to the LanguageFeatureControl. // Return true if the option was recognized (and hence applied). - bool ApplyCliOption(std::string input); + bool EnableWarning(std::string_view input); // The add and replace functions are not currently used but are provided // to allow a flexible many-to-one mapping from Cli spellings to enum values. // Taking a string by value because the functions own this string after the diff --git a/flang/lib/Frontend/CompilerInvocation.cpp b/flang/lib/Frontend/CompilerInvocation.cpp index 147849b0b7d2..2603a3f6dc64 100644 --- a/flang/lib/Frontend/CompilerInvocation.cpp +++ b/flang/lib/Frontend/CompilerInvocation.cpp @@ -1011,7 +1011,7 @@ static bool parseDiagArgs(CompilerInvocation &res, llvm::opt::ArgList &args, if (wArg == "error") { res.setWarnAsErr(true); // -W(no-) - } else if (!features.ApplyCliOption(wArg)) { + } else if (!features.EnableWarning(wArg)) { const unsigned diagID = diags.getCustomDiagID( clang::DiagnosticsEngine::Error, "Unknown diagnostic option: -W%0"); diags.Report(diagID) << wArg; diff --git a/flang/lib/Support/Fortran-features.cpp b/flang/lib/Support/Fortran-features.cpp index 08ded173de51..17b5f8368916 100644 --- a/flang/lib/Support/Fortran-features.cpp +++ b/flang/lib/Support/Fortran-features.cpp @@ -151,22 +151,23 @@ LanguageFeatureControl::LanguageFeatureControl() { warnLanguage_.set(LanguageFeature::NullActualForAllocatable); } -// Take a string from the Cli and apply it to the LanguageFeatureControl. -bool LanguageFeatureControl::ApplyCliOption(std::string input) { +std::optional LanguageFeatureControl::FindWarning( + std::string_view input) { bool negated{false}; if (input.size() > 3 && input.substr(0, 3) == "no-") { negated = true; input = input.substr(3); } - if (auto it{cliOptions_.find(input)}; it != cliOptions_.end()) { - if (std::holds_alternative(it->second)) { - EnableWarning(std::get(it->second), !negated); - return true; - } - if (std::holds_alternative(it->second)) { - EnableWarning(std::get(it->second), !negated); - return true; - } + if (auto it{cliOptions_.find(std::string{input})}; it != cliOptions_.end()) { + return std::make_pair(it->second, !negated); + } + return std::nullopt; +} + +bool LanguageFeatureControl::EnableWarning(std::string_view input) { + if (auto warningAndEnabled{FindWarning(input)}) { + EnableWarning(warningAndEnabled->first, warningAndEnabled->second); + return true; } return false; } -- cgit v1.2.3 From 8c3fbaf0ee7322e948403d2234a7230bd6137c98 Mon Sep 17 00:00:00 2001 From: "Walter J.T.V" <81811777+eZWALT@users.noreply.github.com> Date: Wed, 18 Jun 2025 20:52:41 +0200 Subject: [Clang][OpenMP][LoopTransformations] Fix incorrect number of generated loops for Tile and Reverse directives (#140532) This patch is closely related to #139293 and addresses an existing issue in the loop transformation codebase. Specifically, it corrects the handling of the `NumGeneratedLoops` variable in `OMPLoopTransformationDirective` AST nodes and its inheritors (such as OMPUnrollDirective, OMPTileDirective, etc.). Previously, this variable was inaccurately set for certain transformations like reverse or tile. While this did not lead to functional bugs, since the value was only checked to determine whether it was greater than zero or equal to zero, the inconsistency could introduce problems when supporting more complex directives in the future. --- clang/include/clang/AST/StmtOpenMP.h | 23 +++++++++++++++-------- clang/lib/AST/StmtOpenMP.cpp | 11 +++++++---- clang/lib/Sema/SemaOpenMP.cpp | 4 ++-- clang/lib/Serialization/ASTReaderStmt.cpp | 5 ++--- 4 files changed, 26 insertions(+), 17 deletions(-) diff --git a/clang/include/clang/AST/StmtOpenMP.h b/clang/include/clang/AST/StmtOpenMP.h index 736bcabbad1f..e2fd2114026f 100644 --- a/clang/include/clang/AST/StmtOpenMP.h +++ b/clang/include/clang/AST/StmtOpenMP.h @@ -5787,10 +5787,13 @@ class OMPReverseDirective final : public OMPLoopTransformationDirective { TransformedStmtOffset, }; - explicit OMPReverseDirective(SourceLocation StartLoc, SourceLocation EndLoc) + explicit OMPReverseDirective(SourceLocation StartLoc, SourceLocation EndLoc, + unsigned NumLoops) : OMPLoopTransformationDirective(OMPReverseDirectiveClass, llvm::omp::OMPD_reverse, StartLoc, - EndLoc, 1) {} + EndLoc, NumLoops) { + setNumGeneratedLoops(NumLoops); + } void setPreInits(Stmt *PreInits) { Data->getChildren()[PreInitsOffset] = PreInits; @@ -5806,19 +5809,23 @@ public: /// \param C Context of the AST. /// \param StartLoc Location of the introducer (e.g. the 'omp' token). /// \param EndLoc Location of the directive's end (e.g. the tok::eod). + /// \param NumLoops Number of affected loops /// \param AssociatedStmt The outermost associated loop. /// \param TransformedStmt The loop nest after tiling, or nullptr in /// dependent contexts. /// \param PreInits Helper preinits statements for the loop nest. - static OMPReverseDirective * - Create(const ASTContext &C, SourceLocation StartLoc, SourceLocation EndLoc, - Stmt *AssociatedStmt, Stmt *TransformedStmt, Stmt *PreInits); + static OMPReverseDirective *Create(const ASTContext &C, + SourceLocation StartLoc, + SourceLocation EndLoc, + Stmt *AssociatedStmt, unsigned NumLoops, + Stmt *TransformedStmt, Stmt *PreInits); /// Build an empty '#pragma omp reverse' AST node for deserialization. /// /// \param C Context of the AST. - /// \param NumClauses Number of clauses to allocate. - static OMPReverseDirective *CreateEmpty(const ASTContext &C); + /// \param NumLoops Number of associated loops to allocate + static OMPReverseDirective *CreateEmpty(const ASTContext &C, + unsigned NumLoops); /// Gets/sets the associated loops after the transformation, i.e. after /// de-sugaring. @@ -5857,7 +5864,7 @@ class OMPInterchangeDirective final : public OMPLoopTransformationDirective { : OMPLoopTransformationDirective(OMPInterchangeDirectiveClass, llvm::omp::OMPD_interchange, StartLoc, EndLoc, NumLoops) { - setNumGeneratedLoops(3 * NumLoops); + setNumGeneratedLoops(NumLoops); } void setPreInits(Stmt *PreInits) { diff --git a/clang/lib/AST/StmtOpenMP.cpp b/clang/lib/AST/StmtOpenMP.cpp index 093e1f659916..2eeb5e45ab51 100644 --- a/clang/lib/AST/StmtOpenMP.cpp +++ b/clang/lib/AST/StmtOpenMP.cpp @@ -471,18 +471,21 @@ OMPUnrollDirective *OMPUnrollDirective::CreateEmpty(const ASTContext &C, OMPReverseDirective * OMPReverseDirective::Create(const ASTContext &C, SourceLocation StartLoc, SourceLocation EndLoc, Stmt *AssociatedStmt, - Stmt *TransformedStmt, Stmt *PreInits) { + unsigned NumLoops, Stmt *TransformedStmt, + Stmt *PreInits) { OMPReverseDirective *Dir = createDirective( - C, {}, AssociatedStmt, TransformedStmtOffset + 1, StartLoc, EndLoc); + C, {}, AssociatedStmt, TransformedStmtOffset + 1, StartLoc, EndLoc, + NumLoops); Dir->setTransformedStmt(TransformedStmt); Dir->setPreInits(PreInits); return Dir; } -OMPReverseDirective *OMPReverseDirective::CreateEmpty(const ASTContext &C) { +OMPReverseDirective *OMPReverseDirective::CreateEmpty(const ASTContext &C, + unsigned NumLoops) { return createEmptyDirective( C, /*NumClauses=*/0, /*HasAssociatedStmt=*/true, - TransformedStmtOffset + 1, SourceLocation(), SourceLocation()); + TransformedStmtOffset + 1, SourceLocation(), SourceLocation(), NumLoops); } OMPInterchangeDirective *OMPInterchangeDirective::Create( diff --git a/clang/lib/Sema/SemaOpenMP.cpp b/clang/lib/Sema/SemaOpenMP.cpp index d928b7ae2b4c..00f465818080 100644 --- a/clang/lib/Sema/SemaOpenMP.cpp +++ b/clang/lib/Sema/SemaOpenMP.cpp @@ -15140,7 +15140,7 @@ StmtResult SemaOpenMP::ActOnOpenMPReverseDirective(Stmt *AStmt, // instantiated. if (SemaRef.CurContext->isDependentContext()) return OMPReverseDirective::Create(Context, StartLoc, EndLoc, AStmt, - nullptr, nullptr); + NumLoops, nullptr, nullptr); assert(LoopHelpers.size() == NumLoops && "Expecting a single-dimensional loop iteration space"); @@ -15299,7 +15299,7 @@ StmtResult SemaOpenMP::ActOnOpenMPReverseDirective(Stmt *AStmt, ForStmt(Context, Init.get(), Cond.get(), nullptr, Incr.get(), ReversedBody, LoopHelper.Init->getBeginLoc(), LoopHelper.Init->getBeginLoc(), LoopHelper.Inc->getEndLoc()); - return OMPReverseDirective::Create(Context, StartLoc, EndLoc, AStmt, + return OMPReverseDirective::Create(Context, StartLoc, EndLoc, AStmt, NumLoops, ReversedFor, buildPreInits(Context, PreInits)); } diff --git a/clang/lib/Serialization/ASTReaderStmt.cpp b/clang/lib/Serialization/ASTReaderStmt.cpp index 65102b64030c..44cfb83ad2db 100644 --- a/clang/lib/Serialization/ASTReaderStmt.cpp +++ b/clang/lib/Serialization/ASTReaderStmt.cpp @@ -3602,11 +3602,10 @@ Stmt *ASTReader::ReadStmtFromStream(ModuleFile &F) { } case STMT_OMP_REVERSE_DIRECTIVE: { - assert(Record[ASTStmtReader::NumStmtFields] == 1 && - "Reverse directive accepts only a single loop"); + unsigned NumLoops = Record[ASTStmtReader::NumStmtFields]; assert(Record[ASTStmtReader::NumStmtFields + 1] == 0 && "Reverse directive has no clauses"); - S = OMPReverseDirective::CreateEmpty(Context); + S = OMPReverseDirective::CreateEmpty(Context, NumLoops); break; } -- cgit v1.2.3 From ab6beeca9ccc1968661eea27c1a55e8734f7437b Mon Sep 17 00:00:00 2001 From: uthmanna <114300283+uthmanna@users.noreply.github.com> Date: Wed, 18 Jun 2025 21:00:10 +0200 Subject: [llvm-cov] Export decision coverage to output json (#144335) This commit adds decision coverage counts derived from MC/DC test vector execution to the JSON output of llvm-cov, as discussed here: [Missing Decision Coverage (DC) in output json](https://discourse.llvm.org/t/missing-decision-coverage-dc-in-output-json/86783) with @evodius96 --------- Co-authored-by: uthmanna --- llvm/include/llvm/ProfileData/Coverage/CoverageMapping.h | 12 ++++++++++++ llvm/tools/llvm-cov/CoverageExporterJson.cpp | 6 ++++-- 2 files changed, 16 insertions(+), 2 deletions(-) diff --git a/llvm/include/llvm/ProfileData/Coverage/CoverageMapping.h b/llvm/include/llvm/ProfileData/Coverage/CoverageMapping.h index d1230b0ba7c5..8e6180be25b5 100644 --- a/llvm/include/llvm/ProfileData/Coverage/CoverageMapping.h +++ b/llvm/include/llvm/ProfileData/Coverage/CoverageMapping.h @@ -31,6 +31,7 @@ #include "llvm/Support/Endian.h" #include "llvm/Support/Error.h" #include "llvm/Support/raw_ostream.h" +#include #include #include #include @@ -494,6 +495,17 @@ public: return TV[TestVectorIndex].first[PosToID[Condition]]; } + /// Return the number of True and False decisions for all executed test + /// vectors. + std::pair getDecisions() const { + const unsigned TrueDecisions = + std::count_if(TV.begin(), TV.end(), [](const auto &TestVec) { + return TestVec.second == CondState::MCDC_True; + }); + + return {TrueDecisions, TV.size() - TrueDecisions}; + } + /// Return the Result evaluation for an executed test vector. /// See MCDCRecordProcessor::RecordTestVector(). CondState getTVResult(unsigned TestVectorIndex) { diff --git a/llvm/tools/llvm-cov/CoverageExporterJson.cpp b/llvm/tools/llvm-cov/CoverageExporterJson.cpp index 4088c1b053aa..024693a24cc2 100644 --- a/llvm/tools/llvm-cov/CoverageExporterJson.cpp +++ b/llvm/tools/llvm-cov/CoverageExporterJson.cpp @@ -62,7 +62,7 @@ #include /// The semantic version combined as a string. -#define LLVM_COVERAGE_EXPORT_JSON_STR "2.0.1" +#define LLVM_COVERAGE_EXPORT_JSON_STR "3.0.0" /// Unique type identifier for JSON coverage export. #define LLVM_COVERAGE_EXPORT_JSON_TYPE_STR "llvm.coverage.json.export" @@ -110,8 +110,10 @@ json::Array gatherConditions(const coverage::MCDCRecord &Record) { json::Array renderMCDCRecord(const coverage::MCDCRecord &Record) { const llvm::coverage::CounterMappingRegion &CMR = Record.getDecisionRegion(); + const auto [TrueDecisions, FalseDecisions] = Record.getDecisions(); return json::Array({CMR.LineStart, CMR.ColumnStart, CMR.LineEnd, - CMR.ColumnEnd, CMR.ExpandedFileID, int64_t(CMR.Kind), + CMR.ColumnEnd, TrueDecisions, FalseDecisions, + CMR.ExpandedFileID, int64_t(CMR.Kind), gatherConditions(Record)}); } -- cgit v1.2.3 From ca9a09dbe679dbdd4d47cb7894977e04c3bb914e Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Wed, 18 Jun 2025 12:03:17 -0700 Subject: [libc++] Fix a typo in documentation (#144763) --- libcxx/docs/ABIGuarantees.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libcxx/docs/ABIGuarantees.rst b/libcxx/docs/ABIGuarantees.rst index e6ac4f2b5b23..c7d5afe1080b 100644 --- a/libcxx/docs/ABIGuarantees.rst +++ b/libcxx/docs/ABIGuarantees.rst @@ -92,7 +92,7 @@ Linking TUs which have been compiled with different flags affecting code gen There are a lot of compiler (and library) flags which change the code generated for functions. This includes flags like ``-O1``, which are guaranteed by the compiler to not change the observable behaviour of a correct program, as well as flags like ``-fexceptions``, which **do** change the observable behaviour. libc++ allows linking of TUs which have been -compiled whith specific flags only and makes no guarantees for any of the flags not listed below. +compiled with specific flags only and makes no guarantees for any of the flags not listed below. The flags allowed (in any combination) are: - ``-f[no-]exceptions`` -- cgit v1.2.3 From a94eb27a29ef3aee5ccafc1d7bebee1c8efbaf38 Mon Sep 17 00:00:00 2001 From: Ramkumar Ramachandra Date: Wed, 18 Jun 2025 20:18:22 +0100 Subject: [HashRecognize] Fix big-endian CRC tables (#144754) Big-endian CRC tables are incorrect due to the initial value of CRC in genSarwateTable being hard-coded for CRC-8. 128 is the signed-min value for CRC-8, but it should be generalized to APInt::getSignedMinValue. The issue was found when writing CRC verification tests for llvm-test-suite. --- llvm/lib/Analysis/HashRecognize.cpp | 2 +- .../HashRecognize/cyclic-redundancy-check.ll | 96 +++++++++++----------- 2 files changed, 49 insertions(+), 49 deletions(-) diff --git a/llvm/lib/Analysis/HashRecognize.cpp b/llvm/lib/Analysis/HashRecognize.cpp index 1edb8b3bdc9a..987d13731276 100644 --- a/llvm/lib/Analysis/HashRecognize.cpp +++ b/llvm/lib/Analysis/HashRecognize.cpp @@ -478,7 +478,7 @@ CRCTable HashRecognize::genSarwateTable(const APInt &GenPoly, Table[0] = APInt::getZero(BW); if (ByteOrderSwapped) { - APInt CRCInit(BW, 128); + APInt CRCInit = APInt::getSignedMinValue(BW); for (unsigned I = 1; I < 256; I <<= 1) { CRCInit = CRCInit.shl(1) ^ (CRCInit.isSignBitSet() ? GenPoly : APInt::getZero(BW)); diff --git a/llvm/test/Analysis/HashRecognize/cyclic-redundancy-check.ll b/llvm/test/Analysis/HashRecognize/cyclic-redundancy-check.ll index 7a3082056ad2..0fbc376c40d7 100644 --- a/llvm/test/Analysis/HashRecognize/cyclic-redundancy-check.ll +++ b/llvm/test/Analysis/HashRecognize/cyclic-redundancy-check.ll @@ -151,22 +151,22 @@ define i16 @crc16.be.tc8.crc.init.li(i16 %checksum, i8 %msg) { ; CHECK-NEXT: Generating polynomial: 4129 ; CHECK-NEXT: Computed CRC: %crc.next = select i1 %check.sb, i16 %crc.xor, i16 %crc.shl ; CHECK-NEXT: Computed CRC lookup table: -; CHECK-NEXT: 0 256 512 768 1024 1280 1536 1792 2048 2304 2560 2816 3072 3328 3584 3840 -; CHECK-NEXT: 4096 4352 4608 4864 5120 5376 5632 5888 6144 6400 6656 6912 7168 7424 7680 7936 -; CHECK-NEXT: 8192 8448 8704 8960 9216 9472 9728 9984 10240 10496 10752 11008 11264 11520 11776 12032 -; CHECK-NEXT: 12288 12544 12800 13056 13312 13568 13824 14080 14336 14592 14848 15104 15360 15616 15872 16128 -; CHECK-NEXT: 16384 16640 16896 17152 17408 17664 17920 18176 18432 18688 18944 19200 19456 19712 19968 20224 -; CHECK-NEXT: 20480 20736 20992 21248 21504 21760 22016 22272 22528 22784 23040 23296 23552 23808 24064 24320 -; CHECK-NEXT: 24576 24832 25088 25344 25600 25856 26112 26368 26624 26880 27136 27392 27648 27904 28160 28416 -; CHECK-NEXT: 28672 28928 29184 29440 29696 29952 30208 30464 30720 30976 31232 31488 31744 32000 32256 32512 -; CHECK-NEXT: 32768 33024 33280 33536 33792 34048 34304 34560 34816 35072 35328 35584 35840 36096 36352 36608 -; CHECK-NEXT: 36864 37120 37376 37632 37888 38144 38400 38656 38912 39168 39424 39680 39936 40192 40448 40704 -; CHECK-NEXT: 40960 41216 41472 41728 41984 42240 42496 42752 43008 43264 43520 43776 44032 44288 44544 44800 -; CHECK-NEXT: 45056 45312 45568 45824 46080 46336 46592 46848 47104 47360 47616 47872 48128 48384 48640 48896 -; CHECK-NEXT: 49152 49408 49664 49920 50176 50432 50688 50944 51200 51456 51712 51968 52224 52480 52736 52992 -; CHECK-NEXT: 53248 53504 53760 54016 54272 54528 54784 55040 55296 55552 55808 56064 56320 56576 56832 57088 -; CHECK-NEXT: 57344 57600 57856 58112 58368 58624 58880 59136 59392 59648 59904 60160 60416 60672 60928 61184 -; CHECK-NEXT: 61440 61696 61952 62208 62464 62720 62976 63232 63488 63744 64000 64256 64512 64768 65024 65280 +; CHECK-NEXT: 0 4129 8258 12387 16516 20645 24774 28903 33032 37161 41290 45419 49548 53677 57806 61935 +; CHECK-NEXT: 4657 528 12915 8786 21173 17044 29431 25302 37689 33560 45947 41818 54205 50076 62463 58334 +; CHECK-NEXT: 9314 13379 1056 5121 25830 29895 17572 21637 42346 46411 34088 38153 58862 62927 50604 54669 +; CHECK-NEXT: 13907 9842 5649 1584 30423 26358 22165 18100 46939 42874 38681 34616 63455 59390 55197 51132 +; CHECK-NEXT: 18628 22757 26758 30887 2112 6241 10242 14371 51660 55789 59790 63919 35144 39273 43274 47403 +; CHECK-NEXT: 23285 19156 31415 27286 6769 2640 14899 10770 56317 52188 64447 60318 39801 35672 47931 43802 +; CHECK-NEXT: 27814 31879 19684 23749 11298 15363 3168 7233 60846 64911 52716 56781 44330 48395 36200 40265 +; CHECK-NEXT: 32407 28342 24277 20212 15891 11826 7761 3696 65439 61374 57309 53244 48923 44858 40793 36728 +; CHECK-NEXT: 37256 33193 45514 41451 53516 49453 61774 57711 4224 161 12482 8419 20484 16421 28742 24679 +; CHECK-NEXT: 33721 37784 41979 46042 49981 54044 58239 62302 689 4752 8947 13010 16949 21012 25207 29270 +; CHECK-NEXT: 46570 42443 38312 34185 62830 58703 54572 50445 13538 9411 5280 1153 29798 25671 21540 17413 +; CHECK-NEXT: 42971 47098 34713 38840 59231 63358 50973 55100 9939 14066 1681 5808 26199 30326 17941 22068 +; CHECK-NEXT: 55628 51565 63758 59695 39368 35305 47498 43435 22596 18533 30726 26663 6336 2273 14466 10403 +; CHECK-NEXT: 52093 56156 60223 64286 35833 39896 43963 48026 19061 23124 27191 31254 2801 6864 10931 14994 +; CHECK-NEXT: 64814 60687 56684 52557 48554 44427 40424 36297 31782 27655 23652 19525 15522 11395 7392 3265 +; CHECK-NEXT: 61215 65342 53085 57212 44955 49082 36825 40952 28183 32310 20053 24180 11923 16050 3793 7920 ; entry: %msg.ext = zext i8 %msg to i16 @@ -196,22 +196,22 @@ define i16 @crc16.be.tc8.crc.init.arg(i16 %crc.init) { ; CHECK-NEXT: Generating polynomial: 4129 ; CHECK-NEXT: Computed CRC: %crc.next = select i1 %check.sb, i16 %crc.xor, i16 %crc.shl ; CHECK-NEXT: Computed CRC lookup table: -; CHECK-NEXT: 0 256 512 768 1024 1280 1536 1792 2048 2304 2560 2816 3072 3328 3584 3840 -; CHECK-NEXT: 4096 4352 4608 4864 5120 5376 5632 5888 6144 6400 6656 6912 7168 7424 7680 7936 -; CHECK-NEXT: 8192 8448 8704 8960 9216 9472 9728 9984 10240 10496 10752 11008 11264 11520 11776 12032 -; CHECK-NEXT: 12288 12544 12800 13056 13312 13568 13824 14080 14336 14592 14848 15104 15360 15616 15872 16128 -; CHECK-NEXT: 16384 16640 16896 17152 17408 17664 17920 18176 18432 18688 18944 19200 19456 19712 19968 20224 -; CHECK-NEXT: 20480 20736 20992 21248 21504 21760 22016 22272 22528 22784 23040 23296 23552 23808 24064 24320 -; CHECK-NEXT: 24576 24832 25088 25344 25600 25856 26112 26368 26624 26880 27136 27392 27648 27904 28160 28416 -; CHECK-NEXT: 28672 28928 29184 29440 29696 29952 30208 30464 30720 30976 31232 31488 31744 32000 32256 32512 -; CHECK-NEXT: 32768 33024 33280 33536 33792 34048 34304 34560 34816 35072 35328 35584 35840 36096 36352 36608 -; CHECK-NEXT: 36864 37120 37376 37632 37888 38144 38400 38656 38912 39168 39424 39680 39936 40192 40448 40704 -; CHECK-NEXT: 40960 41216 41472 41728 41984 42240 42496 42752 43008 43264 43520 43776 44032 44288 44544 44800 -; CHECK-NEXT: 45056 45312 45568 45824 46080 46336 46592 46848 47104 47360 47616 47872 48128 48384 48640 48896 -; CHECK-NEXT: 49152 49408 49664 49920 50176 50432 50688 50944 51200 51456 51712 51968 52224 52480 52736 52992 -; CHECK-NEXT: 53248 53504 53760 54016 54272 54528 54784 55040 55296 55552 55808 56064 56320 56576 56832 57088 -; CHECK-NEXT: 57344 57600 57856 58112 58368 58624 58880 59136 59392 59648 59904 60160 60416 60672 60928 61184 -; CHECK-NEXT: 61440 61696 61952 62208 62464 62720 62976 63232 63488 63744 64000 64256 64512 64768 65024 65280 +; CHECK-NEXT: 0 4129 8258 12387 16516 20645 24774 28903 33032 37161 41290 45419 49548 53677 57806 61935 +; CHECK-NEXT: 4657 528 12915 8786 21173 17044 29431 25302 37689 33560 45947 41818 54205 50076 62463 58334 +; CHECK-NEXT: 9314 13379 1056 5121 25830 29895 17572 21637 42346 46411 34088 38153 58862 62927 50604 54669 +; CHECK-NEXT: 13907 9842 5649 1584 30423 26358 22165 18100 46939 42874 38681 34616 63455 59390 55197 51132 +; CHECK-NEXT: 18628 22757 26758 30887 2112 6241 10242 14371 51660 55789 59790 63919 35144 39273 43274 47403 +; CHECK-NEXT: 23285 19156 31415 27286 6769 2640 14899 10770 56317 52188 64447 60318 39801 35672 47931 43802 +; CHECK-NEXT: 27814 31879 19684 23749 11298 15363 3168 7233 60846 64911 52716 56781 44330 48395 36200 40265 +; CHECK-NEXT: 32407 28342 24277 20212 15891 11826 7761 3696 65439 61374 57309 53244 48923 44858 40793 36728 +; CHECK-NEXT: 37256 33193 45514 41451 53516 49453 61774 57711 4224 161 12482 8419 20484 16421 28742 24679 +; CHECK-NEXT: 33721 37784 41979 46042 49981 54044 58239 62302 689 4752 8947 13010 16949 21012 25207 29270 +; CHECK-NEXT: 46570 42443 38312 34185 62830 58703 54572 50445 13538 9411 5280 1153 29798 25671 21540 17413 +; CHECK-NEXT: 42971 47098 34713 38840 59231 63358 50973 55100 9939 14066 1681 5808 26199 30326 17941 22068 +; CHECK-NEXT: 55628 51565 63758 59695 39368 35305 47498 43435 22596 18533 30726 26663 6336 2273 14466 10403 +; CHECK-NEXT: 52093 56156 60223 64286 35833 39896 43963 48026 19061 23124 27191 31254 2801 6864 10931 14994 +; CHECK-NEXT: 64814 60687 56684 52557 48554 44427 40424 36297 31782 27655 23652 19525 15522 11395 7392 3265 +; CHECK-NEXT: 61215 65342 53085 57212 44955 49082 36825 40952 28183 32310 20053 24180 11923 16050 3793 7920 ; entry: br label %loop @@ -238,22 +238,22 @@ define i16 @crc16.be.tc8.crc.init.arg.flipped.sb.check(i16 %crc.init) { ; CHECK-NEXT: Generating polynomial: 4129 ; CHECK-NEXT: Computed CRC: %crc.next = select i1 %check.sb, i16 %crc.shl, i16 %crc.xor ; CHECK-NEXT: Computed CRC lookup table: -; CHECK-NEXT: 0 256 512 768 1024 1280 1536 1792 2048 2304 2560 2816 3072 3328 3584 3840 -; CHECK-NEXT: 4096 4352 4608 4864 5120 5376 5632 5888 6144 6400 6656 6912 7168 7424 7680 7936 -; CHECK-NEXT: 8192 8448 8704 8960 9216 9472 9728 9984 10240 10496 10752 11008 11264 11520 11776 12032 -; CHECK-NEXT: 12288 12544 12800 13056 13312 13568 13824 14080 14336 14592 14848 15104 15360 15616 15872 16128 -; CHECK-NEXT: 16384 16640 16896 17152 17408 17664 17920 18176 18432 18688 18944 19200 19456 19712 19968 20224 -; CHECK-NEXT: 20480 20736 20992 21248 21504 21760 22016 22272 22528 22784 23040 23296 23552 23808 24064 24320 -; CHECK-NEXT: 24576 24832 25088 25344 25600 25856 26112 26368 26624 26880 27136 27392 27648 27904 28160 28416 -; CHECK-NEXT: 28672 28928 29184 29440 29696 29952 30208 30464 30720 30976 31232 31488 31744 32000 32256 32512 -; CHECK-NEXT: 32768 33024 33280 33536 33792 34048 34304 34560 34816 35072 35328 35584 35840 36096 36352 36608 -; CHECK-NEXT: 36864 37120 37376 37632 37888 38144 38400 38656 38912 39168 39424 39680 39936 40192 40448 40704 -; CHECK-NEXT: 40960 41216 41472 41728 41984 42240 42496 42752 43008 43264 43520 43776 44032 44288 44544 44800 -; CHECK-NEXT: 45056 45312 45568 45824 46080 46336 46592 46848 47104 47360 47616 47872 48128 48384 48640 48896 -; CHECK-NEXT: 49152 49408 49664 49920 50176 50432 50688 50944 51200 51456 51712 51968 52224 52480 52736 52992 -; CHECK-NEXT: 53248 53504 53760 54016 54272 54528 54784 55040 55296 55552 55808 56064 56320 56576 56832 57088 -; CHECK-NEXT: 57344 57600 57856 58112 58368 58624 58880 59136 59392 59648 59904 60160 60416 60672 60928 61184 -; CHECK-NEXT: 61440 61696 61952 62208 62464 62720 62976 63232 63488 63744 64000 64256 64512 64768 65024 65280 +; CHECK-NEXT: 0 4129 8258 12387 16516 20645 24774 28903 33032 37161 41290 45419 49548 53677 57806 61935 +; CHECK-NEXT: 4657 528 12915 8786 21173 17044 29431 25302 37689 33560 45947 41818 54205 50076 62463 58334 +; CHECK-NEXT: 9314 13379 1056 5121 25830 29895 17572 21637 42346 46411 34088 38153 58862 62927 50604 54669 +; CHECK-NEXT: 13907 9842 5649 1584 30423 26358 22165 18100 46939 42874 38681 34616 63455 59390 55197 51132 +; CHECK-NEXT: 18628 22757 26758 30887 2112 6241 10242 14371 51660 55789 59790 63919 35144 39273 43274 47403 +; CHECK-NEXT: 23285 19156 31415 27286 6769 2640 14899 10770 56317 52188 64447 60318 39801 35672 47931 43802 +; CHECK-NEXT: 27814 31879 19684 23749 11298 15363 3168 7233 60846 64911 52716 56781 44330 48395 36200 40265 +; CHECK-NEXT: 32407 28342 24277 20212 15891 11826 7761 3696 65439 61374 57309 53244 48923 44858 40793 36728 +; CHECK-NEXT: 37256 33193 45514 41451 53516 49453 61774 57711 4224 161 12482 8419 20484 16421 28742 24679 +; CHECK-NEXT: 33721 37784 41979 46042 49981 54044 58239 62302 689 4752 8947 13010 16949 21012 25207 29270 +; CHECK-NEXT: 46570 42443 38312 34185 62830 58703 54572 50445 13538 9411 5280 1153 29798 25671 21540 17413 +; CHECK-NEXT: 42971 47098 34713 38840 59231 63358 50973 55100 9939 14066 1681 5808 26199 30326 17941 22068 +; CHECK-NEXT: 55628 51565 63758 59695 39368 35305 47498 43435 22596 18533 30726 26663 6336 2273 14466 10403 +; CHECK-NEXT: 52093 56156 60223 64286 35833 39896 43963 48026 19061 23124 27191 31254 2801 6864 10931 14994 +; CHECK-NEXT: 64814 60687 56684 52557 48554 44427 40424 36297 31782 27655 23652 19525 15522 11395 7392 3265 +; CHECK-NEXT: 61215 65342 53085 57212 44955 49082 36825 40952 28183 32310 20053 24180 11923 16050 3793 7920 ; entry: br label %loop -- cgit v1.2.3 From f13b9e3643661ea2cda252c7e2c59ace036407c7 Mon Sep 17 00:00:00 2001 From: Ramkumar Ramachandra Date: Wed, 18 Jun 2025 20:18:53 +0100 Subject: [HashRecognize] Don't const-qualify Values in result (#144752) Const-qualifying Values in the analysis result makes them unusable with IRBuilder. The issue was discovered when attempting to use the result of the analysis for a transform. --- llvm/include/llvm/Analysis/HashRecognize.h | 12 ++++++------ llvm/lib/Analysis/HashRecognize.cpp | 8 ++++---- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/llvm/include/llvm/Analysis/HashRecognize.h b/llvm/include/llvm/Analysis/HashRecognize.h index 8ab68a5dc2cb..c169383bf7b0 100644 --- a/llvm/include/llvm/Analysis/HashRecognize.h +++ b/llvm/include/llvm/Analysis/HashRecognize.h @@ -53,7 +53,7 @@ struct PolynomialInfo { // division in the case of CRC. Since polynomial division is an XOR in // GF(2^m), this variable must be XOR'ed with RHS in a loop to yield the // ComputedValue. - const Value *LHS; + Value *LHS; // The generating polynomial, or the RHS of the polynomial division in the // case of CRC. @@ -61,7 +61,7 @@ struct PolynomialInfo { // The final computed value. This is a remainder of a polynomial division in // the case of CRC, which must be zero. - const Value *ComputedValue; + Value *ComputedValue; // Set to true in the case of big-endian. bool ByteOrderSwapped; @@ -69,11 +69,11 @@ struct PolynomialInfo { // An optional auxiliary checksum that augments the LHS. In the case of CRC, // it is XOR'ed with the LHS, so that the computation's final remainder is // zero. - const Value *LHSAux; + Value *LHSAux; - PolynomialInfo(unsigned TripCount, const Value *LHS, const APInt &RHS, - const Value *ComputedValue, bool ByteOrderSwapped, - const Value *LHSAux = nullptr); + PolynomialInfo(unsigned TripCount, Value *LHS, const APInt &RHS, + Value *ComputedValue, bool ByteOrderSwapped, + Value *LHSAux = nullptr); }; /// The analysis. diff --git a/llvm/lib/Analysis/HashRecognize.cpp b/llvm/lib/Analysis/HashRecognize.cpp index 987d13731276..06a3738018e9 100644 --- a/llvm/lib/Analysis/HashRecognize.cpp +++ b/llvm/lib/Analysis/HashRecognize.cpp @@ -442,9 +442,9 @@ getRecurrences(BasicBlock *LoopLatch, const PHINode *IndVar, const Loop &L) { return std::make_pair(SimpleRecurrence, ConditionalRecurrence); } -PolynomialInfo::PolynomialInfo(unsigned TripCount, const Value *LHS, - const APInt &RHS, const Value *ComputedValue, - bool ByteOrderSwapped, const Value *LHSAux) +PolynomialInfo::PolynomialInfo(unsigned TripCount, Value *LHS, const APInt &RHS, + Value *ComputedValue, bool ByteOrderSwapped, + Value *LHSAux) : TripCount(TripCount), LHS(LHS), RHS(RHS), ComputedValue(ComputedValue), ByteOrderSwapped(ByteOrderSwapped), LHSAux(LHSAux) {} @@ -623,7 +623,7 @@ HashRecognize::recognizeCRC() const { if (!checkExtractBits(ResultBits, TC, IsZero, *ByteOrderSwapped)) return ErrBits(ResultBits, TC, *ByteOrderSwapped); - const Value *LHSAux = SimpleRecurrence ? SimpleRecurrence.Start : nullptr; + Value *LHSAux = SimpleRecurrence ? SimpleRecurrence.Start : nullptr; return PolynomialInfo(TC, ConditionalRecurrence.Start, GenPoly, ComputedValue, *ByteOrderSwapped, LHSAux); } -- cgit v1.2.3 From 156a64c585faf0870936b62ec85fae19ceb9ad3f Mon Sep 17 00:00:00 2001 From: Ramkumar Ramachandra Date: Wed, 18 Jun 2025 20:19:25 +0100 Subject: [HashRecognize] Tighten pre-conditions for analysis (#144757) Exit early if the TC is not a byte-multiple, as optimization works by dividing TC by 8. Also delay the SCEV TC query. --- llvm/lib/Analysis/HashRecognize.cpp | 8 +-- .../HashRecognize/cyclic-redundancy-check.ll | 62 ++++++++++++++++++++-- 2 files changed, 61 insertions(+), 9 deletions(-) diff --git a/llvm/lib/Analysis/HashRecognize.cpp b/llvm/lib/Analysis/HashRecognize.cpp index 06a3738018e9..d11602f92187 100644 --- a/llvm/lib/Analysis/HashRecognize.cpp +++ b/llvm/lib/Analysis/HashRecognize.cpp @@ -561,14 +561,14 @@ std::variant HashRecognize::recognizeCRC() const { if (!L.isInnermost()) return "Loop is not innermost"; - unsigned TC = SE.getSmallConstantMaxTripCount(&L); - if (!TC || TC > 256) - return "Unable to find a small constant trip count"; BasicBlock *Latch = L.getLoopLatch(); BasicBlock *Exit = L.getExitBlock(); const PHINode *IndVar = L.getCanonicalInductionVariable(); - if (!Latch || !Exit || !IndVar) + if (!Latch || !Exit || !IndVar || L.getNumBlocks() != 1) return "Loop not in canonical form"; + unsigned TC = SE.getSmallConstantTripCount(&L); + if (!TC || TC > 256 || TC % 8) + return "Unable to find a small constant byte-multiple trip count"; auto R = getRecurrences(Latch, IndVar, L); if (!R) diff --git a/llvm/test/Analysis/HashRecognize/cyclic-redundancy-check.ll b/llvm/test/Analysis/HashRecognize/cyclic-redundancy-check.ll index 0fbc376c40d7..0366684a13b5 100644 --- a/llvm/test/Analysis/HashRecognize/cyclic-redundancy-check.ll +++ b/llvm/test/Analysis/HashRecognize/cyclic-redundancy-check.ll @@ -384,7 +384,7 @@ exit: ; preds = %loop define i16 @not.crc.non.const.tc(i16 %crc.init, i32 %loop.limit) { ; CHECK-LABEL: 'not.crc.non.const.tc' ; CHECK-NEXT: Did not find a hash algorithm -; CHECK-NEXT: Reason: Unable to find a small constant trip count +; CHECK-NEXT: Reason: Unable to find a small constant byte-multiple trip count ; entry: br label %loop @@ -404,8 +404,31 @@ exit: ; preds = %loop ret i16 %crc.next } -define i16 @not.crc.non.canonical.loop(i16 %crc.init) { -; CHECK-LABEL: 'not.crc.non.canonical.loop' +define i16 @not.crc.non.canonical.not.multiple.8(i16 %crc.init) { +; CHECK-LABEL: 'not.crc.non.canonical.not.multiple.8' +; CHECK-NEXT: Did not find a hash algorithm +; CHECK-NEXT: Reason: Unable to find a small constant byte-multiple trip count +; +entry: + br label %loop + +loop: ; preds = %loop, %entry + %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ] + %crc = phi i16 [ %crc.init, %entry ], [ %crc.next, %loop ] + %crc.shl = shl i16 %crc, 1 + %crc.xor = xor i16 %crc.shl, 4129 + %check.sb = icmp slt i16 %crc, 0 + %crc.next = select i1 %check.sb, i16 %crc.xor, i16 %crc.shl + %iv.next = add nuw nsw i32 %iv, 1 + %exit.cond = icmp samesign eq i32 %iv, 3 + br i1 %exit.cond, label %exit, label %loop + +exit: ; preds = %loop + ret i16 %crc.next +} + +define i16 @not.crc.non.canonical.loop.countdown(i16 %crc.init) { +; CHECK-LABEL: 'not.crc.non.canonical.loop.countdown' ; CHECK-NEXT: Did not find a hash algorithm ; CHECK-NEXT: Reason: Loop not in canonical form ; @@ -427,10 +450,39 @@ exit: ; preds = %loop ret i16 %crc.next } +define i16 @not.crc.non.canonical.loop.multiple.blocks(i16 %crc.init) { +; CHECK-LABEL: 'not.crc.non.canonical.loop.multiple.blocks' +; CHECK-NEXT: Did not find a hash algorithm +; CHECK-NEXT: Reason: Loop not in canonical form +; +entry: + br label %loop + +loop: ; preds = %loop, %entry + %iv = phi i32 [ 0, %entry ], [ %iv.next, %continue ] + %crc = phi i16 [ %crc.init, %entry ], [ %crc.next, %continue ] + %check.sb = icmp slt i16 %crc, 0 + %crc.shl = shl i16 %crc, 1 + br i1 %check.sb, label %xor, label %continue + +xor: + %crc.xor = xor i16 %crc.shl, 4129 + br label %continue + +continue: + %crc.next = phi i16 [ %crc.xor, %xor ], [ %crc.shl, %loop ] + %iv.next = add nuw nsw i32 %iv, 1 + %exit.cond = icmp samesign eq i32 %iv, 7 + br i1 %exit.cond, label %exit, label %loop + +exit: ; preds = %loop + ret i16 %crc.next +} + define i16 @not.crc.tc.limit(i16 %crc.init) { ; CHECK-LABEL: 'not.crc.tc.limit' ; CHECK-NEXT: Did not find a hash algorithm -; CHECK-NEXT: Reason: Unable to find a small constant trip count +; CHECK-NEXT: Reason: Unable to find a small constant byte-multiple trip count ; entry: br label %loop @@ -617,7 +669,7 @@ loop: ; preds = %loop, %entry %crc.xor = xor i16 %crc.lshr, -24575 %crc.next = select i1 %check.sb, i16 %crc.lshr, i16 %crc.xor %iv.next = add nuw nsw i8 %iv, 1 - %exit.cond = icmp samesign ult i8 %iv, 20 + %exit.cond = icmp samesign ult i8 %iv, 31 br i1 %exit.cond, label %loop, label %exit exit: ; preds = %loop -- cgit v1.2.3 From 88d250729eb00842a41c946632bcacf1af106f64 Mon Sep 17 00:00:00 2001 From: Alan Phipps Date: Wed, 18 Jun 2025 14:33:59 -0500 Subject: Revert "[llvm-cov] Export decision coverage to output json" (#144783) Reverts llvm/llvm-project#144335 Need to resolve test failures --- llvm/include/llvm/ProfileData/Coverage/CoverageMapping.h | 12 ------------ llvm/tools/llvm-cov/CoverageExporterJson.cpp | 6 ++---- 2 files changed, 2 insertions(+), 16 deletions(-) diff --git a/llvm/include/llvm/ProfileData/Coverage/CoverageMapping.h b/llvm/include/llvm/ProfileData/Coverage/CoverageMapping.h index 8e6180be25b5..d1230b0ba7c5 100644 --- a/llvm/include/llvm/ProfileData/Coverage/CoverageMapping.h +++ b/llvm/include/llvm/ProfileData/Coverage/CoverageMapping.h @@ -31,7 +31,6 @@ #include "llvm/Support/Endian.h" #include "llvm/Support/Error.h" #include "llvm/Support/raw_ostream.h" -#include #include #include #include @@ -495,17 +494,6 @@ public: return TV[TestVectorIndex].first[PosToID[Condition]]; } - /// Return the number of True and False decisions for all executed test - /// vectors. - std::pair getDecisions() const { - const unsigned TrueDecisions = - std::count_if(TV.begin(), TV.end(), [](const auto &TestVec) { - return TestVec.second == CondState::MCDC_True; - }); - - return {TrueDecisions, TV.size() - TrueDecisions}; - } - /// Return the Result evaluation for an executed test vector. /// See MCDCRecordProcessor::RecordTestVector(). CondState getTVResult(unsigned TestVectorIndex) { diff --git a/llvm/tools/llvm-cov/CoverageExporterJson.cpp b/llvm/tools/llvm-cov/CoverageExporterJson.cpp index 024693a24cc2..4088c1b053aa 100644 --- a/llvm/tools/llvm-cov/CoverageExporterJson.cpp +++ b/llvm/tools/llvm-cov/CoverageExporterJson.cpp @@ -62,7 +62,7 @@ #include /// The semantic version combined as a string. -#define LLVM_COVERAGE_EXPORT_JSON_STR "3.0.0" +#define LLVM_COVERAGE_EXPORT_JSON_STR "2.0.1" /// Unique type identifier for JSON coverage export. #define LLVM_COVERAGE_EXPORT_JSON_TYPE_STR "llvm.coverage.json.export" @@ -110,10 +110,8 @@ json::Array gatherConditions(const coverage::MCDCRecord &Record) { json::Array renderMCDCRecord(const coverage::MCDCRecord &Record) { const llvm::coverage::CounterMappingRegion &CMR = Record.getDecisionRegion(); - const auto [TrueDecisions, FalseDecisions] = Record.getDecisions(); return json::Array({CMR.LineStart, CMR.ColumnStart, CMR.LineEnd, - CMR.ColumnEnd, TrueDecisions, FalseDecisions, - CMR.ExpandedFileID, int64_t(CMR.Kind), + CMR.ColumnEnd, CMR.ExpandedFileID, int64_t(CMR.Kind), gatherConditions(Record)}); } -- cgit v1.2.3 From fb0651959b1b6ae64f84cf5840adc95923af991f Mon Sep 17 00:00:00 2001 From: Jameson Nash Date: Wed, 18 Jun 2025 15:37:56 -0400 Subject: [AArch64] fix trampoline implementation: actually use X15 (#143892) A incorrect switch statement caused it to try to use X4 instead of X15 in #126743, which would have not worked. --- llvm/lib/Target/AArch64/AArch64ISelLowering.cpp | 3 +-- llvm/test/CodeGen/AArch64/trampoline.ll | 12 ++++++------ 2 files changed, 7 insertions(+), 8 deletions(-) diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index d8b574719dae..581f15277602 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -7138,8 +7138,7 @@ SDValue AArch64TargetLowering::LowerINIT_TRAMPOLINE(SDValue Op, switch (CC) { default: NestReg = 0x0f; // X15 - LLVM_FALLTHROUGH; - case CallingConv::ARM64EC_Thunk_Native: + break; case CallingConv::ARM64EC_Thunk_X64: // Must be kept in sync with AArch64CallingConv.td NestReg = 0x04; // X4 diff --git a/llvm/test/CodeGen/AArch64/trampoline.ll b/llvm/test/CodeGen/AArch64/trampoline.ll index d9016b02a0f8..0e682704afbf 100644 --- a/llvm/test/CodeGen/AArch64/trampoline.ll +++ b/llvm/test/CodeGen/AArch64/trampoline.ll @@ -83,7 +83,7 @@ define i64 @func1() { ; CHECK-LINUX-NEXT: str w9, [sp, #16] ; CHECK-LINUX-NEXT: add x9, sp, #56 ; CHECK-LINUX-NEXT: stp x9, x8, [sp, #24] -; CHECK-LINUX-NEXT: mov x8, #132 // =0x84 +; CHECK-LINUX-NEXT: mov x8, #143 // =0x8f ; CHECK-LINUX-NEXT: movk x8, #22528, lsl #16 ; CHECK-LINUX-NEXT: movk x8, #177, lsl #32 ; CHECK-LINUX-NEXT: movk x8, #22528, lsl #48 @@ -112,7 +112,7 @@ define i64 @func1() { ; CHECK-PC-NEXT: add x0, sp, #8 ; CHECK-PC-NEXT: movk w8, #54815, lsl #16 ; CHECK-PC-NEXT: str w8, [sp, #16] -; CHECK-PC-NEXT: mov x8, #132 // =0x84 +; CHECK-PC-NEXT: mov x8, #143 // =0x8f ; CHECK-PC-NEXT: movk x8, #22528, lsl #16 ; CHECK-PC-NEXT: movk x8, #177, lsl #32 ; CHECK-PC-NEXT: movk x8, #22528, lsl #48 @@ -148,7 +148,7 @@ define i64 @func1() { ; CHECK-APPLE-NEXT: mov x0, sp ; CHECK-APPLE-NEXT: movk w8, #54815, lsl #16 ; CHECK-APPLE-NEXT: str w8, [sp, #8] -; CHECK-APPLE-NEXT: mov x8, #132 ; =0x84 +; CHECK-APPLE-NEXT: mov x8, #143 ; =0x8f ; CHECK-APPLE-NEXT: movk x8, #22528, lsl #16 ; CHECK-APPLE-NEXT: movk x8, #177, lsl #32 ; CHECK-APPLE-NEXT: movk x8, #22528, lsl #48 @@ -184,7 +184,7 @@ define i64 @func2() { ; CHECK-LINUX-NEXT: add x9, sp, #8 ; CHECK-LINUX-NEXT: add x1, x0, #12 ; CHECK-LINUX-NEXT: stp x9, x8, [x0, #16] -; CHECK-LINUX-NEXT: mov x8, #132 // =0x84 +; CHECK-LINUX-NEXT: mov x8, #143 // =0x8f ; CHECK-LINUX-NEXT: movk x8, #22528, lsl #16 ; CHECK-LINUX-NEXT: movk x8, #177, lsl #32 ; CHECK-LINUX-NEXT: movk x8, #22528, lsl #48 @@ -210,7 +210,7 @@ define i64 @func2() { ; CHECK-PC-NEXT: mov w8, #544 // =0x220 ; CHECK-PC-NEXT: movk w8, #54815, lsl #16 ; CHECK-PC-NEXT: str w8, [x0, #8] -; CHECK-PC-NEXT: mov x8, #132 // =0x84 +; CHECK-PC-NEXT: mov x8, #143 // =0x8f ; CHECK-PC-NEXT: movk x8, #22528, lsl #16 ; CHECK-PC-NEXT: movk x8, #177, lsl #32 ; CHECK-PC-NEXT: movk x8, #22528, lsl #48 @@ -246,7 +246,7 @@ define i64 @func2() { ; CHECK-APPLE-NEXT: mov w8, #544 ; =0x220 ; CHECK-APPLE-NEXT: movk w8, #54815, lsl #16 ; CHECK-APPLE-NEXT: str w8, [x0, #8] -; CHECK-APPLE-NEXT: mov x8, #132 ; =0x84 +; CHECK-APPLE-NEXT: mov x8, #143 ; =0x8f ; CHECK-APPLE-NEXT: movk x8, #22528, lsl #16 ; CHECK-APPLE-NEXT: movk x8, #177, lsl #32 ; CHECK-APPLE-NEXT: movk x8, #22528, lsl #48 -- cgit v1.2.3 From c04fc5596ec8c197c75b92a086c31438bfb08faf Mon Sep 17 00:00:00 2001 From: Jameson Nash Date: Wed, 18 Jun 2025 15:38:34 -0400 Subject: [MemCpyOpt] allow some undef contents overread in processMemCpyMemCpyDependence (#143745) Allows memcpy to memcpy forwarding in cases where the second memcpy is larger, but the overread is known to be undef, by shrinking the memcpy size. Refs https://github.com/llvm/llvm-project/pull/140954 which laid some of the groundwork for this. --- llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp | 52 +++++++++++++++------- .../Transforms/MemCpyOpt/memcpy-memcpy-offset.ll | 33 +++++++++++--- .../MemCpyOpt/variable-sized-memcpy-memcpy.ll | 37 ++++++++++++++- 3 files changed, 98 insertions(+), 24 deletions(-) diff --git a/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp index 1c4ec6aa08b4..2b0e221f341e 100644 --- a/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp +++ b/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp @@ -107,6 +107,9 @@ struct MemsetRange { } // end anonymous namespace +static bool overreadUndefContents(MemorySSA *MSSA, MemCpyInst *MemCpy, + MemIntrinsic *MemSrc, BatchAAResults &BAA); + bool MemsetRange::isProfitableToUseMemset(const DataLayout &DL) const { // If we found more than 4 stores to merge or 16 bytes, use memset. if (TheStores.size() >= 4 || End - Start >= 16) @@ -1129,14 +1132,29 @@ bool MemCpyOptPass::processMemCpyMemCpyDependence(MemCpyInst *M, MForwardOffset = *Offset; } - // The length of the memcpy's must be the same, or the preceding one - // must be larger than the following one. - if (MForwardOffset != 0 || MDep->getLength() != M->getLength()) { + Value *CopyLength = M->getLength(); + + // The length of the memcpy's must be the same, or the preceding one must be + // larger than the following one, or the contents of the overread must be + // undefined bytes of a defined size. + if (MForwardOffset != 0 || MDep->getLength() != CopyLength) { auto *MDepLen = dyn_cast(MDep->getLength()); - auto *MLen = dyn_cast(M->getLength()); - if (!MDepLen || !MLen || - MDepLen->getZExtValue() < MLen->getZExtValue() + MForwardOffset) + auto *MLen = dyn_cast(CopyLength); + // This could be converted to a runtime test (%CopyLength = + // min(max(0, MDepLen - MForwardOffset), MLen)), but it is + // unclear if that is useful + if (!MDepLen || !MLen) return false; + if (MDepLen->getZExtValue() < MLen->getZExtValue() + MForwardOffset) { + if (!overreadUndefContents(MSSA, M, MDep, BAA)) + return false; + if (MDepLen->getZExtValue() <= (uint64_t)MForwardOffset) + return false; // Should not reach here (there is obviously no aliasing + // with MDep), so just bail in case it had incomplete info + // somehow + CopyLength = ConstantInt::get(CopyLength->getType(), + MDepLen->getZExtValue() - MForwardOffset); + } } IRBuilder<> Builder(M); @@ -1152,9 +1170,13 @@ bool MemCpyOptPass::processMemCpyMemCpyDependence(MemCpyInst *M, eraseInstruction(NewCopySource); }); MaybeAlign CopySourceAlign = MDep->getSourceAlign(); - // We just need to calculate the actual size of the copy. - auto MCopyLoc = MemoryLocation::getForSource(MDep).getWithNewSize( - MemoryLocation::getForSource(M).Size); + auto MCopyLoc = MemoryLocation::getForSource(MDep); + // Truncate the size of the MDep access to just the bytes read + if (MDep->getLength() != CopyLength) { + auto *ConstLength = cast(CopyLength); + MCopyLoc = MCopyLoc.getWithNewSize( + LocationSize::precise(ConstLength->getZExtValue())); + } // When the forwarding offset is greater than 0, we transform // memcpy(d1 <- s1) @@ -1223,20 +1245,18 @@ bool MemCpyOptPass::processMemCpyMemCpyDependence(MemCpyInst *M, // example we could be moving from movaps -> movq on x86. Instruction *NewM; if (UseMemMove) - NewM = - Builder.CreateMemMove(M->getDest(), M->getDestAlign(), CopySource, - CopySourceAlign, M->getLength(), M->isVolatile()); + NewM = Builder.CreateMemMove(M->getDest(), M->getDestAlign(), CopySource, + CopySourceAlign, CopyLength, M->isVolatile()); else if (M->isForceInlined()) // llvm.memcpy may be promoted to llvm.memcpy.inline, but the converse is // never allowed since that would allow the latter to be lowered as a call // to an external function. NewM = Builder.CreateMemCpyInline(M->getDest(), M->getDestAlign(), - CopySource, CopySourceAlign, - M->getLength(), M->isVolatile()); + CopySource, CopySourceAlign, CopyLength, + M->isVolatile()); else NewM = Builder.CreateMemCpy(M->getDest(), M->getDestAlign(), CopySource, - CopySourceAlign, M->getLength(), - M->isVolatile()); + CopySourceAlign, CopyLength, M->isVolatile()); NewM->copyMetadata(*M, LLVMContext::MD_DIAssignID); diff --git a/llvm/test/Transforms/MemCpyOpt/memcpy-memcpy-offset.ll b/llvm/test/Transforms/MemCpyOpt/memcpy-memcpy-offset.ll index da654438d7bd..7dc579aad02f 100644 --- a/llvm/test/Transforms/MemCpyOpt/memcpy-memcpy-offset.ll +++ b/llvm/test/Transforms/MemCpyOpt/memcpy-memcpy-offset.ll @@ -134,14 +134,15 @@ define void @forward_offset_memcpy_inline(ptr %src, ptr %dest) { ret void } -; We cannot forward `memcpy` because it exceeds the size of `memcpy` it depends on. -define void @do_not_forward_oversize_offset(ptr %src, ptr %dest) { -; CHECK-LABEL: define void @do_not_forward_oversize_offset( +; We can forward `memcpy` by shrinking it to the size of the `memcpy` it depends on. +define void @forward_oversize_offset(ptr %src, ptr %dest) { +; CHECK-LABEL: define void @forward_oversize_offset( ; CHECK-SAME: ptr [[SRC:%.*]], ptr [[DEST:%.*]]) { -; CHECK-NEXT: [[DEP_DEST:%.*]] = alloca [9 x i8], align 1 -; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 1 [[DEP_DEST]], ptr align 1 [[SRC]], i64 6, i1 false) -; CHECK-NEXT: [[TMP_OFFSET:%.*]] = getelementptr inbounds i8, ptr [[DEP_DEST]], i64 1 -; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 1 [[DEST]], ptr align 1 [[TMP_OFFSET]], i64 6, i1 false) +; CHECK-NEXT: [[CPY_TMP:%.*]] = alloca [9 x i8], align 1 +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 1 [[CPY_TMP]], ptr align 1 [[SRC]], i64 6, i1 false) +; CHECK-NEXT: [[CPY_TMP_OFFSET:%.*]] = getelementptr inbounds i8, ptr [[CPY_TMP]], i64 1 +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 1 +; CHECK-NEXT: call void @llvm.memmove.p0.p0.i64(ptr align 1 [[DEST]], ptr align 1 [[TMP1]], i64 5, i1 false) ; CHECK-NEXT: ret void ; %cpy_tmp = alloca %buf, align 1 @@ -214,6 +215,24 @@ define void @pr98675(ptr noalias %p1, ptr noalias %p2) { ret void } +define void @over_offset_cpy(ptr %src) { +; CHECK-LABEL: define void @over_offset_cpy( +; CHECK-SAME: ptr [[SRC:%.*]]) { +; CHECK-NEXT: [[TMP:%.*]] = alloca [2 x i8], align 1 +; CHECK-NEXT: [[DST:%.*]] = alloca i8, align 1 +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[TMP]], ptr align 8 [[SRC]], i64 1, i1 false) +; CHECK-NEXT: [[TMP_OFFSET:%.*]] = getelementptr inbounds i8, ptr [[TMP]], i64 1 +; CHECK-NEXT: ret void +; + %tmp = alloca [2 x i8] + %dst = alloca i8 + call void @llvm.memcpy.p0.p0.i64(ptr align 8 %tmp, ptr align 8 %src, i64 1, i1 false) + %tmp_offset = getelementptr inbounds i8, ptr %tmp, i64 1 + call void @llvm.memcpy.p0.p0.i64(ptr align 8 %dst, ptr align 8 %tmp_offset, i64 1, i1 false) + + ret void +} + declare void @use(ptr) declare void @llvm.memcpy.p0.p0.i64(ptr nocapture, ptr nocapture, i64, i1) diff --git a/llvm/test/Transforms/MemCpyOpt/variable-sized-memcpy-memcpy.ll b/llvm/test/Transforms/MemCpyOpt/variable-sized-memcpy-memcpy.ll index 4f6b734ec057..95402a8ea686 100644 --- a/llvm/test/Transforms/MemCpyOpt/variable-sized-memcpy-memcpy.ll +++ b/llvm/test/Transforms/MemCpyOpt/variable-sized-memcpy-memcpy.ll @@ -18,7 +18,42 @@ define void @test(ptr %src, i64 %size) { ret void } -; Differing sizes, so left as it is. +define void @dynalloca_test(ptr %src, i64 %size1) { +; CHECK-LABEL: @dynalloca_test( +; CHECK-NEXT: [[TMP:%.*]] = alloca i8, i64 [[SIZE1:%.*]], align 1 +; CHECK-NEXT: [[DST:%.*]] = alloca i8, i64 [[SIZE1]], align 1 +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[TMP]], ptr align 8 [[SRC:%.*]], i64 31, i1 false) +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[DST]], ptr align 8 [[SRC]], i64 31, i1 false) +; CHECK-NEXT: ret void +; + %tmp = alloca i8, i64 %size1 + %dst = alloca i8, i64 %size1 + call void @llvm.memcpy.p0.p0.i64(ptr align 8 %tmp, ptr align 8 %src, i64 31, i1 false) + call void @llvm.memcpy.p0.p0.i64(ptr align 8 %dst, ptr align 8 %tmp, i64 32, i1 false) + + ret void +} + +define void @dynalloca_offset_test(ptr %src, i64 %size1) { +; CHECK-LABEL: @dynalloca_offset_test( +; CHECK-NEXT: [[TMP:%.*]] = alloca i8, i64 [[SIZE1:%.*]], align 1 +; CHECK-NEXT: [[DST:%.*]] = alloca i8, i64 [[SIZE1]], align 1 +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[TMP]], ptr align 8 [[SRC:%.*]], i64 31, i1 false) +; CHECK-NEXT: [[TMP_OFFSET:%.*]] = getelementptr inbounds i8, ptr [[TMP]], i64 1 +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 1 +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[DST]], ptr align 1 [[TMP1]], i64 30, i1 false) +; CHECK-NEXT: ret void +; + %tmp = alloca i8, i64 %size1 + %dst = alloca i8, i64 %size1 + call void @llvm.memcpy.p0.p0.i64(ptr align 8 %tmp, ptr align 8 %src, i64 31, i1 false) + %tmp_offset = getelementptr inbounds i8, ptr %tmp, i64 1 + call void @llvm.memcpy.p0.p0.i64(ptr align 8 %dst, ptr align 8 %tmp_offset, i64 31, i1 false) + + ret void +} + +; Dynamic sizes, so left as it is. define void @negative_test(ptr %src, i64 %size1, i64 %size2) { ; CHECK-LABEL: @negative_test( ; CHECK-NEXT: [[TMP:%.*]] = alloca i8, i64 [[SIZE1:%.*]], align 1 -- cgit v1.2.3 From 67c52aacae2aa698eb1d31d81d2376bd77723d3a Mon Sep 17 00:00:00 2001 From: Amr Hesham Date: Wed, 18 Jun 2025 21:47:50 +0200 Subject: [CIR] Upstream support for IncompleteArrayType (#144138) This change adds the basic support for IncompleteArray Issue https://github.com/llvm/llvm-project/issues/130197 --- clang/lib/CIR/CodeGen/CIRGenTypes.cpp | 16 ++++++++++++++++ clang/test/CIR/CodeGen/struct.c | 14 ++++++++++++++ 2 files changed, 30 insertions(+) diff --git a/clang/lib/CIR/CodeGen/CIRGenTypes.cpp b/clang/lib/CIR/CodeGen/CIRGenTypes.cpp index bab47924dd71..621eb66962bf 100644 --- a/clang/lib/CIR/CodeGen/CIRGenTypes.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenTypes.cpp @@ -416,6 +416,22 @@ mlir::Type CIRGenTypes::convertType(QualType type) { break; } + case Type::IncompleteArray: { + const IncompleteArrayType *arrTy = cast(ty); + if (arrTy->getIndexTypeCVRQualifiers() != 0) + cgm.errorNYI(SourceLocation(), "non trivial array types", type); + + mlir::Type elemTy = convertTypeForMem(arrTy->getElementType()); + // int X[] -> [0 x int], unless the element type is not sized. If it is + // unsized (e.g. an incomplete record) just use [0 x i8]. + if (!builder.isSized(elemTy)) { + elemTy = cgm.SInt8Ty; + } + + resultType = cir::ArrayType::get(elemTy, 0); + break; + } + case Type::ConstantArray: { const ConstantArrayType *arrTy = cast(ty); mlir::Type elemTy = convertTypeForMem(arrTy->getElementType()); diff --git a/clang/test/CIR/CodeGen/struct.c b/clang/test/CIR/CodeGen/struct.c index ed84edd97e5d..b722b64eeb58 100644 --- a/clang/test/CIR/CodeGen/struct.c +++ b/clang/test/CIR/CodeGen/struct.c @@ -19,6 +19,7 @@ // CIR-DAG: !rec_CycleEnd = !cir.record>}>>}>>}> // CIR-DAG: !rec_CycleMiddle = !cir.record}> // CIR-DAG: !rec_CycleStart = !cir.record}> +// CIR-DAG: !rec_IncompleteArray = !cir.record}> // LLVM-DAG: %struct.CompleteS = type { i32, i8 } // LLVM-DAG: %struct.OuterS = type { %struct.InnerS, i32 } // LLVM-DAG: %struct.InnerS = type { i32, i8 } @@ -30,6 +31,7 @@ // LLVM-DAG: %struct.CycleStart = type { ptr } // LLVM-DAG: %struct.CycleMiddle = type { ptr } // LLVM-DAG: %struct.CycleEnd = type { ptr } +// LLVM-DAG: %struct.IncompleteArray = type { [0 x i32] } // OGCG-DAG: %struct.CompleteS = type { i32, i8 } // OGCG-DAG: %struct.OuterS = type { %struct.InnerS, i32 } // OGCG-DAG: %struct.InnerS = type { i32, i8 } @@ -41,6 +43,7 @@ // OGCG-DAG: %struct.CycleStart = type { ptr } // OGCG-DAG: %struct.CycleMiddle = type { ptr } // OGCG-DAG: %struct.CycleEnd = type { ptr } +// OGCG-DAG: %struct.IncompleteArray = type { [0 x i32] } struct CompleteS { int a; @@ -149,6 +152,16 @@ struct CycleEnd { // LLVM-DAG: @end = global %struct.CycleEnd zeroinitializer // OGCG-DAG: @end = global %struct.CycleEnd zeroinitializer +struct IncompleteArray { + int array[]; +} incomplete; + +// CIR: cir.global external @incomplete = #cir.zero : !rec_IncompleteArray + +// LLVM-DAG: global %struct.IncompleteArray zeroinitializer + +// OGCG-DAG: global %struct.IncompleteArray zeroinitializer + void f(void) { struct IncompleteS *p; } @@ -313,3 +326,4 @@ void f6(struct CycleStart *start) { // OGCG: %[[MIDDLE:.*]] = getelementptr inbounds nuw %struct.CycleStart, ptr %{{.*}}, i32 0, i32 0 // OGCG: %[[END:.*]] = getelementptr inbounds nuw %struct.CycleMiddle, ptr %{{.*}}, i32 0, i32 0 // OGCG: %[[START2:.*]] = getelementptr inbounds nuw %struct.CycleEnd, ptr %{{.*}}, i32 0, i32 0 + -- cgit v1.2.3 From d4b7c0d8b437f50ea254d814a1aeecf87a17be91 Mon Sep 17 00:00:00 2001 From: Tobias Stadler Date: Wed, 18 Jun 2025 20:49:55 +0100 Subject: [Remarks] Auto-detect remark parser format (#144554) Add remark format 'Auto', which performs automatic detection of the remark format using the magic numbers at the beginning of the remarks files. The RemarkLinker already did something similar, so we streamlined this and exposed this to llvm-remarkutil. --- llvm/include/llvm/Remarks/RemarkFormat.h | 5 ++++- llvm/include/llvm/Remarks/RemarkLinker.h | 5 ++--- llvm/lib/Remarks/RemarkFormat.cpp | 18 +++++++++++++++++- llvm/lib/Remarks/RemarkLinker.cpp | 14 +++----------- llvm/lib/Remarks/RemarkParser.cpp | 21 +++++++++++++++------ llvm/lib/Remarks/RemarkSerializer.cpp | 6 ++++-- .../Inputs/broken-remark-magic.bitstream | 1 + .../tools/llvm-remarkutil/annotation-count.test | 2 ++ .../broken-bitstream-remark-magic.test | 6 ++++++ llvm/test/tools/llvm-remarkutil/empty-file.test | 5 +++++ .../tools/llvm-remarkutil/instruction-count.test | 4 +++- .../test/tools/llvm-remarkutil/instruction-mix.test | 4 +++- .../llvm-remarkutil/size-diff/no-difference.test | 3 +++ llvm/tools/llvm-remarkutil/RemarkUtilHelpers.h | 9 ++++++--- llvm/unittests/Remarks/RemarksLinkingTest.cpp | 6 ++---- 15 files changed, 76 insertions(+), 33 deletions(-) create mode 100644 llvm/test/tools/llvm-remarkutil/Inputs/broken-remark-magic.bitstream create mode 100644 llvm/test/tools/llvm-remarkutil/broken-bitstream-remark-magic.test diff --git a/llvm/include/llvm/Remarks/RemarkFormat.h b/llvm/include/llvm/Remarks/RemarkFormat.h index a39a013dcf90..eda201d4ee6f 100644 --- a/llvm/include/llvm/Remarks/RemarkFormat.h +++ b/llvm/include/llvm/Remarks/RemarkFormat.h @@ -23,7 +23,7 @@ namespace remarks { constexpr StringLiteral Magic("REMARKS"); /// The format used for serializing/deserializing remarks. -enum class Format { Unknown, YAML, Bitstream }; +enum class Format { Unknown, Auto, YAML, Bitstream }; /// Parse and validate a string for the remark format. LLVM_ABI Expected parseFormat(StringRef FormatStr); @@ -31,6 +31,9 @@ LLVM_ABI Expected parseFormat(StringRef FormatStr); /// Parse and validate a magic number to a remark format. LLVM_ABI Expected magicToFormat(StringRef Magic); +/// Detect format based on selected format and magic number +LLVM_ABI Expected detectFormat(Format Selected, StringRef Magic); + } // end namespace remarks } // end namespace llvm diff --git a/llvm/include/llvm/Remarks/RemarkLinker.h b/llvm/include/llvm/Remarks/RemarkLinker.h index 5343c6214470..67208f40592a 100644 --- a/llvm/include/llvm/Remarks/RemarkLinker.h +++ b/llvm/include/llvm/Remarks/RemarkLinker.h @@ -80,13 +80,12 @@ public: /// \p Buffer. /// \p Buffer can be either a standalone remark container or just /// metadata. This takes care of uniquing and merging the remarks. - LLVM_ABI Error link(StringRef Buffer, - std::optional RemarkFormat = std::nullopt); + LLVM_ABI Error link(StringRef Buffer, Format RemarkFormat = Format::Auto); /// Link the remarks found in \p Obj by looking for the right section and /// calling the method above. LLVM_ABI Error link(const object::ObjectFile &Obj, - std::optional RemarkFormat = std::nullopt); + Format RemarkFormat = Format::Auto); /// Serialize the linked remarks to the stream \p OS, using the format \p /// RemarkFormat. diff --git a/llvm/lib/Remarks/RemarkFormat.cpp b/llvm/lib/Remarks/RemarkFormat.cpp index 800f5bffe70d..1c52e352f939 100644 --- a/llvm/lib/Remarks/RemarkFormat.cpp +++ b/llvm/lib/Remarks/RemarkFormat.cpp @@ -42,6 +42,22 @@ Expected llvm::remarks::magicToFormat(StringRef MagicStr) { if (Result == Format::Unknown) return createStringError(std::make_error_code(std::errc::invalid_argument), - "Unknown remark magic: '%s'", MagicStr.data()); + "Automatic detection of remark format failed. " + "Unknown magic number: '%.4s'", + MagicStr.data()); return Result; } + +Expected llvm::remarks::detectFormat(Format Selected, + StringRef MagicStr) { + if (Selected == Format::Unknown) + return createStringError(std::make_error_code(std::errc::invalid_argument), + "Unknown remark parser format."); + if (Selected != Format::Auto) + return Selected; + + // Empty files are valid bitstream files + if (MagicStr.empty()) + return Format::Bitstream; + return magicToFormat(MagicStr); +} diff --git a/llvm/lib/Remarks/RemarkLinker.cpp b/llvm/lib/Remarks/RemarkLinker.cpp index b8395aa135d8..0ca6217edfdd 100644 --- a/llvm/lib/Remarks/RemarkLinker.cpp +++ b/llvm/lib/Remarks/RemarkLinker.cpp @@ -66,17 +66,10 @@ void RemarkLinker::setExternalFilePrependPath(StringRef PrependPathIn) { PrependPath = std::string(PrependPathIn); } -Error RemarkLinker::link(StringRef Buffer, std::optional RemarkFormat) { - if (!RemarkFormat) { - Expected ParserFormat = magicToFormat(Buffer); - if (!ParserFormat) - return ParserFormat.takeError(); - RemarkFormat = *ParserFormat; - } - +Error RemarkLinker::link(StringRef Buffer, Format RemarkFormat) { Expected> MaybeParser = createRemarkParserFromMeta( - *RemarkFormat, Buffer, + RemarkFormat, Buffer, PrependPath ? std::optional(StringRef(*PrependPath)) : std::optional()); if (!MaybeParser) @@ -102,8 +95,7 @@ Error RemarkLinker::link(StringRef Buffer, std::optional RemarkFormat) { return Error::success(); } -Error RemarkLinker::link(const object::ObjectFile &Obj, - std::optional RemarkFormat) { +Error RemarkLinker::link(const object::ObjectFile &Obj, Format RemarkFormat) { Expected> SectionOrErr = getRemarksSectionContents(Obj); if (!SectionOrErr) diff --git a/llvm/lib/Remarks/RemarkParser.cpp b/llvm/lib/Remarks/RemarkParser.cpp index 5c1690aaa0fe..038fc1d3f485 100644 --- a/llvm/lib/Remarks/RemarkParser.cpp +++ b/llvm/lib/Remarks/RemarkParser.cpp @@ -15,6 +15,7 @@ #include "BitstreamRemarkParser.h" #include "YAMLRemarkParser.h" #include "llvm-c/Remarks.h" +#include "llvm/Remarks/RemarkFormat.h" #include "llvm/Support/CBindingWrapping.h" #include @@ -50,14 +51,18 @@ Expected ParsedStringTable::operator[](size_t Index) const { Expected> llvm::remarks::createRemarkParser(Format ParserFormat, StringRef Buf) { - switch (ParserFormat) { + auto DetectedFormat = detectFormat(ParserFormat, Buf); + if (!DetectedFormat) + return DetectedFormat.takeError(); + + switch (*DetectedFormat) { case Format::YAML: return std::make_unique(Buf); case Format::Bitstream: return std::make_unique(Buf); case Format::Unknown: - return createStringError(std::make_error_code(std::errc::invalid_argument), - "Unknown remark parser format."); + case Format::Auto: + break; } llvm_unreachable("unhandled ParseFormat"); } @@ -66,15 +71,19 @@ Expected> llvm::remarks::createRemarkParserFromMeta( Format ParserFormat, StringRef Buf, std::optional ExternalFilePrependPath) { - switch (ParserFormat) { + auto DetectedFormat = detectFormat(ParserFormat, Buf); + if (!DetectedFormat) + return DetectedFormat.takeError(); + + switch (*DetectedFormat) { case Format::YAML: return createYAMLParserFromMeta(Buf, std::move(ExternalFilePrependPath)); case Format::Bitstream: return createBitstreamParserFromMeta(Buf, std::move(ExternalFilePrependPath)); case Format::Unknown: - return createStringError(std::make_error_code(std::errc::invalid_argument), - "Unknown remark parser format."); + case Format::Auto: + break; } llvm_unreachable("unhandled ParseFormat"); } diff --git a/llvm/lib/Remarks/RemarkSerializer.cpp b/llvm/lib/Remarks/RemarkSerializer.cpp index cc10b91f287a..df1da53d7c8a 100644 --- a/llvm/lib/Remarks/RemarkSerializer.cpp +++ b/llvm/lib/Remarks/RemarkSerializer.cpp @@ -22,8 +22,9 @@ remarks::createRemarkSerializer(Format RemarksFormat, SerializerMode Mode, raw_ostream &OS) { switch (RemarksFormat) { case Format::Unknown: + case Format::Auto: return createStringError(std::errc::invalid_argument, - "Unknown remark serializer format."); + "Invalid remark serializer format."); case Format::YAML: return std::make_unique(OS, Mode); case Format::Bitstream: @@ -37,8 +38,9 @@ remarks::createRemarkSerializer(Format RemarksFormat, SerializerMode Mode, raw_ostream &OS, remarks::StringTable StrTab) { switch (RemarksFormat) { case Format::Unknown: + case Format::Auto: return createStringError(std::errc::invalid_argument, - "Unknown remark serializer format."); + "Invalid remark serializer format."); case Format::YAML: return std::make_unique(OS, Mode, std::move(StrTab)); case Format::Bitstream: diff --git a/llvm/test/tools/llvm-remarkutil/Inputs/broken-remark-magic.bitstream b/llvm/test/tools/llvm-remarkutil/Inputs/broken-remark-magic.bitstream new file mode 100644 index 000000000000..97b5955f788b --- /dev/null +++ b/llvm/test/tools/llvm-remarkutil/Inputs/broken-remark-magic.bitstream @@ -0,0 +1 @@ +12345678 diff --git a/llvm/test/tools/llvm-remarkutil/annotation-count.test b/llvm/test/tools/llvm-remarkutil/annotation-count.test index e006220c64f3..ee44ed2035c8 100644 --- a/llvm/test/tools/llvm-remarkutil/annotation-count.test +++ b/llvm/test/tools/llvm-remarkutil/annotation-count.test @@ -1,5 +1,7 @@ RUN: llvm-remarkutil annotation-count --parser=yaml --annotation-type=remark %p/Inputs/annotation-count.yaml | FileCheck %s +RUN: llvm-remarkutil annotation-count --annotation-type=remark %p/Inputs/annotation-count.yaml | FileCheck %s RUN: llvm-remarkutil yaml2bitstream %p/Inputs/annotation-count.yaml | llvm-remarkutil annotation-count --parser=bitstream --annotation-type=remark | FileCheck %s +RUN: llvm-remarkutil yaml2bitstream %p/Inputs/annotation-count.yaml | llvm-remarkutil annotation-count --annotation-type=remark | FileCheck %s RUN: llvm-remarkutil count --parser=yaml --count-by=arg --group-by=function --remark-name="AnnotationSummary" %p/Inputs/annotation-count.yaml | FileCheck %s --check-prefix=COUNT-CHECK RUN: llvm-remarkutil yaml2bitstream %p/Inputs/annotation-count.yaml | llvm-remarkutil count --parser=bitstream --count-by=arg --group-by=function --remark-name="AnnotationSummary" | FileCheck %s --check-prefix=COUNT-CHECK diff --git a/llvm/test/tools/llvm-remarkutil/broken-bitstream-remark-magic.test b/llvm/test/tools/llvm-remarkutil/broken-bitstream-remark-magic.test new file mode 100644 index 000000000000..f469eadc07f9 --- /dev/null +++ b/llvm/test/tools/llvm-remarkutil/broken-bitstream-remark-magic.test @@ -0,0 +1,6 @@ +RUN: not llvm-remarkutil instruction-count %p/Inputs/broken-remark-magic.bitstream -o - 2>&1 | FileCheck %s +RUN: not llvm-remarkutil instruction-mix %p/Inputs/broken-remark-magic.bitstream -o - 2>&1 | FileCheck %s +RUN: not llvm-remarkutil annotation-count --annotation-type=remark %p/Inputs/broken-remark-magic.bitstream -o - 2>&1 | FileCheck %s +RUN: not llvm-remarkutil count %p/Inputs/broken-remark-magic.bitstream -o - 2>&1 | FileCheck %s + +CHECK: error: Automatic detection of remark format failed. Unknown magic number: '1234' diff --git a/llvm/test/tools/llvm-remarkutil/empty-file.test b/llvm/test/tools/llvm-remarkutil/empty-file.test index bdc5fcf87f7b..d9820a088ea8 100644 --- a/llvm/test/tools/llvm-remarkutil/empty-file.test +++ b/llvm/test/tools/llvm-remarkutil/empty-file.test @@ -8,6 +8,11 @@ RUN: llvm-remarkutil instruction-count --parser=bitstream %p/Inputs/empty-file - RUN: llvm-remarkutil instruction-mix --parser=bitstream %p/Inputs/empty-file --report_style=csv -o - 2>&1 | FileCheck %s --allow-empty --check-prefix=MIXBITSTREAM RUN: llvm-remarkutil annotation-count --parser=bitstream --annotation-type=remark %p/Inputs/empty-file -o - 2>&1 | FileCheck %s --allow-empty --check-prefix=ANNOTATIONBITSTREAM RUN: llvm-remarkutil count --parser=bitstream %p/Inputs/empty-file -o - 2>&1 | FileCheck %s --allow-empty --check-prefix=COUNTBITSTREAM +; Parser format auto-detection should treat empty files as bitstream files +RUN: llvm-remarkutil instruction-count %p/Inputs/empty-file -o - 2>&1 | FileCheck %s --allow-empty --check-prefix=SIZEBITSTREAM +RUN: llvm-remarkutil instruction-mix %p/Inputs/empty-file --report_style=csv -o - 2>&1 | FileCheck %s --allow-empty --check-prefix=MIXBITSTREAM +RUN: llvm-remarkutil annotation-count --annotation-type=remark %p/Inputs/empty-file -o - 2>&1 | FileCheck %s --allow-empty --check-prefix=ANNOTATIONBITSTREAM +RUN: llvm-remarkutil count %p/Inputs/empty-file -o - 2>&1 | FileCheck %s --allow-empty --check-prefix=COUNTBITSTREAM ; YAMLPARSER: error: document root is not of mapping type. diff --git a/llvm/test/tools/llvm-remarkutil/instruction-count.test b/llvm/test/tools/llvm-remarkutil/instruction-count.test index d94f4f94cc1d..a0aa6dc98c44 100644 --- a/llvm/test/tools/llvm-remarkutil/instruction-count.test +++ b/llvm/test/tools/llvm-remarkutil/instruction-count.test @@ -1,5 +1,7 @@ RUN: llvm-remarkutil instruction-count --parser=yaml %p/Inputs/instruction-count.yaml | FileCheck %s +RUN: llvm-remarkutil instruction-count %p/Inputs/instruction-count.yaml | FileCheck %s RUN: llvm-remarkutil yaml2bitstream %p/Inputs/instruction-count.yaml | llvm-remarkutil instruction-count --parser=bitstream | FileCheck %s +RUN: llvm-remarkutil yaml2bitstream %p/Inputs/instruction-count.yaml | llvm-remarkutil instruction-count | FileCheck %s RUN: llvm-remarkutil count --parser=yaml --count-by=arg --group-by=function --remark-name="InstructionCount" %p/Inputs/instruction-count.yaml | FileCheck %s --check-prefix=COUNT-CHECK RUN: llvm-remarkutil yaml2bitstream %p/Inputs/instruction-count.yaml | llvm-remarkutil count --parser=bitstream --count-by=arg --group-by=function --remark-name="InstructionCount" | FileCheck %s --check-prefix=COUNT-CHECK RUN: not llvm-remarkutil count --parser=yaml --count-by=arg --group-by=function --rremark-name=* %p/Inputs/instruction-count.yaml 2>&1 | FileCheck %s --check-prefix=ERROR-REPOPERATOR -DARG=rremark-name @@ -18,4 +20,4 @@ RUN: not llvm-remarkutil count --parser=yaml --count-by=arg --group-by=function ; COUNT-CHECK: func3,3 ; ERROR-REPOPERATOR: error: invalid argument '--[[ARG]]=*': repetition-operator operand invalid -; ERROR-BOTHFILTERS: error: conflicting arguments: --remark-name and --rremark-name \ No newline at end of file +; ERROR-BOTHFILTERS: error: conflicting arguments: --remark-name and --rremark-name diff --git a/llvm/test/tools/llvm-remarkutil/instruction-mix.test b/llvm/test/tools/llvm-remarkutil/instruction-mix.test index 178c1311b2fe..15994679f5d4 100644 --- a/llvm/test/tools/llvm-remarkutil/instruction-mix.test +++ b/llvm/test/tools/llvm-remarkutil/instruction-mix.test @@ -1,5 +1,7 @@ RUN: llvm-remarkutil instruction-mix --parser=yaml %p/Inputs/instruction-mix.yaml | FileCheck %s +RUN: llvm-remarkutil instruction-mix %p/Inputs/instruction-mix.yaml | FileCheck %s RUN: llvm-remarkutil yaml2bitstream %p/Inputs/instruction-mix.yaml | llvm-remarkutil instruction-mix --parser=bitstream | FileCheck %s +RUN: llvm-remarkutil yaml2bitstream %p/Inputs/instruction-mix.yaml | llvm-remarkutil instruction-mix | FileCheck %s RUN: llvm-remarkutil instruction-mix --parser=yaml %p/Inputs/instruction-mix.yaml --report_style=human | FileCheck %s RUN: llvm-remarkutil instruction-mix --parser=yaml %p/Inputs/instruction-mix.yaml --report_style=csv | FileCheck %s --check-prefix=CSV RUN: llvm-remarkutil instruction-mix --parser=yaml %p/Inputs/instruction-mix.yaml --rfilter=meow | FileCheck %s --check-prefix=MEOW-RE @@ -34,4 +36,4 @@ RUN: not llvm-remarkutil instruction-mix --parser=yaml %p/Inputs/instruction-mix ; NONE-EXACT: ----------- ----- ; NONE-NOT: {{.*}} -; ERROR: error: invalid argument '--rfilter=*': repetition-operator operand invalid \ No newline at end of file +; ERROR: error: invalid argument '--rfilter=*': repetition-operator operand invalid diff --git a/llvm/test/tools/llvm-remarkutil/size-diff/no-difference.test b/llvm/test/tools/llvm-remarkutil/size-diff/no-difference.test index a9b6ba4ae256..8550339bebc4 100644 --- a/llvm/test/tools/llvm-remarkutil/size-diff/no-difference.test +++ b/llvm/test/tools/llvm-remarkutil/size-diff/no-difference.test @@ -1,4 +1,7 @@ RUN: llvm-remarkutil size-diff %p/Inputs/1-func-1-instr-1-stack.yaml %p/Inputs/1-func-1-instr-1-stack.yaml --parser=yaml | FileCheck -strict-whitespace %s +RUN: llvm-remarkutil size-diff %p/Inputs/1-func-1-instr-1-stack.yaml %p/Inputs/1-func-1-instr-1-stack.yaml | FileCheck -strict-whitespace %s +RUN: llvm-remarkutil yaml2bitstream -o %t.bitstream %p/Inputs/1-func-1-instr-1-stack.yaml +RUN: llvm-remarkutil size-diff %t.bitstream %p/Inputs/1-func-1-instr-1-stack.yaml | FileCheck -strict-whitespace %s ; Same file passed twice -> no changes reported. diff --git a/llvm/tools/llvm-remarkutil/RemarkUtilHelpers.h b/llvm/tools/llvm-remarkutil/RemarkUtilHelpers.h index eb393bc3e304..894ac8354e18 100644 --- a/llvm/tools/llvm-remarkutil/RemarkUtilHelpers.h +++ b/llvm/tools/llvm-remarkutil/RemarkUtilHelpers.h @@ -35,9 +35,12 @@ // Keep Input format and names consistent accross the modes via a macro. #define INPUT_FORMAT_COMMAND_LINE_OPTIONS(SUBOPT) \ static cl::opt InputFormat( \ - "parser", cl::desc("Input remark format to parse"), \ - cl::values(clEnumValN(Format::YAML, "yaml", "YAML"), \ - clEnumValN(Format::Bitstream, "bitstream", "Bitstream")), \ + "parser", cl::init(Format::Auto), \ + cl::desc("Input remark format to parse"), \ + cl::values( \ + clEnumValN(Format::Auto, "auto", "Automatic detection (default)"), \ + clEnumValN(Format::YAML, "yaml", "YAML"), \ + clEnumValN(Format::Bitstream, "bitstream", "Bitstream")), \ cl::sub(SUBOPT)); #define DEBUG_LOC_INFO_COMMAND_LINE_OPTIONS(SUBOPT) \ diff --git a/llvm/unittests/Remarks/RemarksLinkingTest.cpp b/llvm/unittests/Remarks/RemarksLinkingTest.cpp index dcd598aaeb5c..89de9e8f4f95 100644 --- a/llvm/unittests/Remarks/RemarksLinkingTest.cpp +++ b/llvm/unittests/Remarks/RemarksLinkingTest.cpp @@ -243,10 +243,8 @@ TEST(Remarks, LinkingError) { // Check that the prepend path is propagated and fails with the full path. // Also ensures that the remark format is correctly auto-detected. RL.setExternalFilePrependPath("/baddir/"); - Error E = RL.link( - StringRef("REMARKS\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0badfile.opt.yaml", - 40), - /*RemarkFormat=*/std::nullopt); + Error E = RL.link(StringRef( + "REMARKS\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0badfile.opt.yaml", 40)); EXPECT_TRUE(static_cast(E)); std::string ErrorMessage = toString(std::move(E)); EXPECT_EQ(StringRef(ErrorMessage).lower(), -- cgit v1.2.3 From 22a69a266d8206b1585dd82d466cd96d01725a65 Mon Sep 17 00:00:00 2001 From: Justin King Date: Wed, 18 Jun 2025 12:57:49 -0700 Subject: lsan: Support free_sized and free_aligned_sized from C23 (#144604) Adds support to LSan for `free_sized` and `free_aligned_sized` from C23. Other sanitizers will be handled with their own separate PRs. For https://github.com/llvm/llvm-project/issues/144435 This is attempt number 2. Signed-off-by: Justin King --- compiler-rt/lib/lsan/lsan_allocator.cpp | 4 +++ compiler-rt/lib/lsan/lsan_allocator.h | 2 ++ compiler-rt/lib/lsan/lsan_interceptors.cpp | 31 ++++++++++++++++++++++ compiler-rt/lib/lsan/lsan_malloc_mac.cpp | 23 +++++++++------- .../lib/sanitizer_common/sanitizer_malloc_mac.inc | 16 +++++++++++ .../sanitizer_platform_interceptors.h | 11 ++++++++ .../TestCases/Linux/free_aligned_sized.c | 2 +- .../sanitizer_common/TestCases/Linux/free_sized.c | 2 +- 8 files changed, 79 insertions(+), 12 deletions(-) diff --git a/compiler-rt/lib/lsan/lsan_allocator.cpp b/compiler-rt/lib/lsan/lsan_allocator.cpp index 493bf5f9efc5..a436d9c07ac6 100644 --- a/compiler-rt/lib/lsan/lsan_allocator.cpp +++ b/compiler-rt/lib/lsan/lsan_allocator.cpp @@ -220,6 +220,10 @@ void lsan_free(void *p) { Deallocate(p); } +void lsan_free_sized(void *p, uptr) { Deallocate(p); } + +void lsan_free_aligned_sized(void *p, uptr, uptr) { Deallocate(p); } + void *lsan_realloc(void *p, uptr size, const StackTrace &stack) { return SetErrnoOnNull(Reallocate(stack, p, size, 1)); } diff --git a/compiler-rt/lib/lsan/lsan_allocator.h b/compiler-rt/lib/lsan/lsan_allocator.h index 5eed0cbdb309..2342f11fb5d0 100644 --- a/compiler-rt/lib/lsan/lsan_allocator.h +++ b/compiler-rt/lib/lsan/lsan_allocator.h @@ -127,6 +127,8 @@ void *lsan_aligned_alloc(uptr alignment, uptr size, const StackTrace &stack); void *lsan_memalign(uptr alignment, uptr size, const StackTrace &stack); void *lsan_malloc(uptr size, const StackTrace &stack); void lsan_free(void *p); +void lsan_free_sized(void *p, uptr size); +void lsan_free_aligned_sized(void *p, uptr alignment, uptr size); void *lsan_realloc(void *p, uptr size, const StackTrace &stack); void *lsan_reallocarray(void *p, uptr nmemb, uptr size, const StackTrace &stack); diff --git a/compiler-rt/lib/lsan/lsan_interceptors.cpp b/compiler-rt/lib/lsan/lsan_interceptors.cpp index a8252cddacf2..6da9d0d9d24f 100644 --- a/compiler-rt/lib/lsan/lsan_interceptors.cpp +++ b/compiler-rt/lib/lsan/lsan_interceptors.cpp @@ -84,6 +84,35 @@ INTERCEPTOR(void, free, void *p) { lsan_free(p); } +# if SANITIZER_INTERCEPT_FREE_SIZED +INTERCEPTOR(void, free_sized, void *p, uptr size) { + if (UNLIKELY(!p)) + return; + if (DlsymAlloc::PointerIsMine(p)) + return DlsymAlloc::Free(p); + ENSURE_LSAN_INITED; + lsan_free_sized(p, size); +} +# define LSAN_MAYBE_INTERCEPT_FREE_SIZED INTERCEPT_FUNCTION(free_sized) +# else +# define LSAN_MAYBE_INTERCEPT_FREE_SIZED +# endif + +# if SANITIZER_INTERCEPT_FREE_ALIGNED_SIZED +INTERCEPTOR(void, free_aligned_sized, void *p, uptr alignment, uptr size) { + if (UNLIKELY(!p)) + return; + if (DlsymAlloc::PointerIsMine(p)) + return DlsymAlloc::Free(p); + ENSURE_LSAN_INITED; + lsan_free_aligned_sized(p, alignment, size); +} +# define LSAN_MAYBE_INTERCEPT_FREE_ALIGNED_SIZED \ + INTERCEPT_FUNCTION(free_aligned_sized) +# else +# define LSAN_MAYBE_INTERCEPT_FREE_ALIGNED_SIZED +# endif + INTERCEPTOR(void*, calloc, uptr nmemb, uptr size) { if (DlsymAlloc::Use()) return DlsymAlloc::Callocate(nmemb, size); @@ -547,6 +576,8 @@ void InitializeInterceptors() { INTERCEPT_FUNCTION(malloc); INTERCEPT_FUNCTION(free); + LSAN_MAYBE_INTERCEPT_FREE_SIZED; + LSAN_MAYBE_INTERCEPT_FREE_ALIGNED_SIZED; LSAN_MAYBE_INTERCEPT_CFREE; INTERCEPT_FUNCTION(calloc); INTERCEPT_FUNCTION(realloc); diff --git a/compiler-rt/lib/lsan/lsan_malloc_mac.cpp b/compiler-rt/lib/lsan/lsan_malloc_mac.cpp index 525c30272ccc..8a16c053da23 100644 --- a/compiler-rt/lib/lsan/lsan_malloc_mac.cpp +++ b/compiler-rt/lib/lsan/lsan_malloc_mac.cpp @@ -44,16 +44,19 @@ using namespace __lsan; void *p = lsan_valloc(size, stack) #define COMMON_MALLOC_FREE(ptr) \ lsan_free(ptr) -#define COMMON_MALLOC_SIZE(ptr) \ - uptr size = lsan_mz_size(ptr) -#define COMMON_MALLOC_FILL_STATS(zone, stats) -#define COMMON_MALLOC_REPORT_UNKNOWN_REALLOC(ptr, zone_ptr, zone_name) \ - (void)zone_name; \ - Report("mz_realloc(%p) -- attempting to realloc unallocated memory.\n", ptr); -#define COMMON_MALLOC_NAMESPACE __lsan -#define COMMON_MALLOC_HAS_ZONE_ENUMERATOR 0 -#define COMMON_MALLOC_HAS_EXTRA_INTROSPECTION_INIT 0 +# define COMMON_MALLOC_FREE_SIZED(ptr, size) lsan_free_sized(ptr, size) +# define COMMON_MALLOC_FREE_ALIGNED_SIZED(ptr, alignment, size) \ + lsan_free_aligned_sized(ptr, alignment, size) +# define COMMON_MALLOC_SIZE(ptr) uptr size = lsan_mz_size(ptr) +# define COMMON_MALLOC_FILL_STATS(zone, stats) +# define COMMON_MALLOC_REPORT_UNKNOWN_REALLOC(ptr, zone_ptr, zone_name) \ + (void)zone_name; \ + Report("mz_realloc(%p) -- attempting to realloc unallocated memory.\n", \ + ptr); +# define COMMON_MALLOC_NAMESPACE __lsan +# define COMMON_MALLOC_HAS_ZONE_ENUMERATOR 0 +# define COMMON_MALLOC_HAS_EXTRA_INTROSPECTION_INIT 0 -#include "sanitizer_common/sanitizer_malloc_mac.inc" +# include "sanitizer_common/sanitizer_malloc_mac.inc" #endif // SANITIZER_APPLE diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_malloc_mac.inc b/compiler-rt/lib/sanitizer_common/sanitizer_malloc_mac.inc index 6343eb284afb..be27584f2053 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_malloc_mac.inc +++ b/compiler-rt/lib/sanitizer_common/sanitizer_malloc_mac.inc @@ -144,6 +144,22 @@ INTERCEPTOR(void, free, void *ptr) { COMMON_MALLOC_FREE(ptr); } +#if SANITIZER_INTERCEPT_FREE_SIZED && defined(COMMON_MALLOC_FREE_SIZED) +INTERCEPTOR(void, free_sized, void *ptr, size_t size) { + COMMON_MALLOC_ENTER(); + COMMON_MALLOC_FREE_SIZED(ptr, size); +} +#endif + +#if SANITIZER_INTERCEPT_FREE_ALIGNED_SIZED && \ + defined(COMMON_MALLOC_FREE_ALIGNED_SIZED) +INTERCEPTOR(void, free_aligned_sized, void *ptr, size_t alignment, + size_t size) { + COMMON_MALLOC_ENTER(); + COMMON_MALLOC_FREE_ALIGNED_SIZED(ptr, alignment, size); +} +#endif + INTERCEPTOR(void *, realloc, void *ptr, size_t size) { COMMON_MALLOC_ENTER(); COMMON_MALLOC_REALLOC(ptr, size); diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_platform_interceptors.h b/compiler-rt/lib/sanitizer_common/sanitizer_platform_interceptors.h index ccc808b60ca7..29987decdff4 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_platform_interceptors.h +++ b/compiler-rt/lib/sanitizer_common/sanitizer_platform_interceptors.h @@ -663,6 +663,17 @@ SANITIZER_WEAK_IMPORT void *aligned_alloc(__sanitizer::usize __alignment, #define SANITIZER_INTERCEPT_GETSERVBYNAME_R SI_GLIBC #define SANITIZER_INTERCEPT_GETSERVBYPORT_R SI_GLIBC +// Until free_sized and free_aligned_sized are more generally available, +// we can only unconditionally intercept on ELF-based platforms where it +// is okay to have undefined weak symbols. +#ifdef __ELF__ +# define SANITIZER_INTERCEPT_FREE_SIZED 1 +# define SANITIZER_INTERCEPT_FREE_ALIGNED_SIZED 1 +#else +# define SANITIZER_INTERCEPT_FREE_SIZED 0 +# define SANITIZER_INTERCEPT_FREE_ALIGNED_SIZED 0 +#endif + // This macro gives a way for downstream users to override the above // interceptor macros irrespective of the platform they are on. They have // to do two things: diff --git a/compiler-rt/test/sanitizer_common/TestCases/Linux/free_aligned_sized.c b/compiler-rt/test/sanitizer_common/TestCases/Linux/free_aligned_sized.c index f4c6c0f973bd..e9cb6f20c5ea 100644 --- a/compiler-rt/test/sanitizer_common/TestCases/Linux/free_aligned_sized.c +++ b/compiler-rt/test/sanitizer_common/TestCases/Linux/free_aligned_sized.c @@ -1,5 +1,5 @@ // RUN: %clang -std=c23 -O0 %s -o %t && %run %t -// UNSUPPORTED: asan, hwasan, rtsan, tsan, msan, lsan, ubsan +// UNSUPPORTED: asan, hwasan, rtsan, tsan, msan, ubsan #include #include diff --git a/compiler-rt/test/sanitizer_common/TestCases/Linux/free_sized.c b/compiler-rt/test/sanitizer_common/TestCases/Linux/free_sized.c index 0ee2289684d0..8cdf3216e528 100644 --- a/compiler-rt/test/sanitizer_common/TestCases/Linux/free_sized.c +++ b/compiler-rt/test/sanitizer_common/TestCases/Linux/free_sized.c @@ -1,5 +1,5 @@ // RUN: %clang -std=c23 -O0 %s -o %t && %run %t -// UNSUPPORTED: asan, hwasan, rtsan, tsan, msan, lsan, ubsan +// UNSUPPORTED: asan, hwasan, rtsan, tsan, msan, ubsan #include #include -- cgit v1.2.3 From 23b8f11b27f1345cfdd9d03c9ebaccaf81897764 Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Wed, 18 Jun 2025 20:59:15 +0100 Subject: [VPlan] Remove redundant VPWidenRecipe constructors (NFC) Since the removal of VPWidenEVLRecipe, the constructors taking a VPDefOpcode are not needed any more. Remove them. --- llvm/lib/Transforms/Vectorize/VPlan.h | 21 ++++++--------------- llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp | 5 ++--- 2 files changed, 8 insertions(+), 18 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h index f3306ad7cb8e..ab5ff82a7720 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -1304,24 +1304,15 @@ protected: class VPWidenRecipe : public VPRecipeWithIRFlags, public VPIRMetadata { unsigned Opcode; -protected: - VPWidenRecipe(unsigned VPDefOpcode, Instruction &I, - ArrayRef Operands) - : VPRecipeWithIRFlags(VPDefOpcode, Operands, I), VPIRMetadata(I), - Opcode(I.getOpcode()) {} - - VPWidenRecipe(unsigned VPDefOpcode, unsigned Opcode, - ArrayRef Operands, bool NUW, bool NSW, DebugLoc DL) - : VPRecipeWithIRFlags(VPDefOpcode, Operands, WrapFlagsTy(NUW, NSW), DL), +public: + VPWidenRecipe(unsigned Opcode, ArrayRef Operands, + const VPIRFlags &Flags, DebugLoc DL) + : VPRecipeWithIRFlags(VPDef::VPWidenSC, Operands, Flags, DL), Opcode(Opcode) {} -public: VPWidenRecipe(Instruction &I, ArrayRef Operands) - : VPWidenRecipe(VPDef::VPWidenSC, I, Operands) {} - - VPWidenRecipe(unsigned Opcode, ArrayRef Operands, bool NUW, - bool NSW, DebugLoc DL) - : VPWidenRecipe(VPDef::VPWidenSC, Opcode, Operands, NUW, NSW, DL) {} + : VPRecipeWithIRFlags(VPDef::VPWidenSC, Operands, I), VPIRMetadata(I), + Opcode(I.getOpcode()) {} ~VPWidenRecipe() override = default; diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index 11f0f2a93032..cc73ae44f9c0 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -2703,9 +2703,8 @@ expandVPMulAccumulateReduction(VPMulAccumulateReductionRecipe *MulAcc) { } std::array MulOps = {Op0, Op1}; - auto *Mul = new VPWidenRecipe( - Instruction::Mul, ArrayRef(MulOps), MulAcc->hasNoUnsignedWrap(), - MulAcc->hasNoSignedWrap(), MulAcc->getDebugLoc()); + auto *Mul = new VPWidenRecipe(Instruction::Mul, ArrayRef(MulOps), *MulAcc, + MulAcc->getDebugLoc()); Mul->insertBefore(MulAcc); auto *Red = new VPReductionRecipe( -- cgit v1.2.3 From a630ca6f6c4727d852d60076d1179c3e9830ca2f Mon Sep 17 00:00:00 2001 From: Chelsea Cassanova Date: Wed, 18 Jun 2025 13:06:20 -0700 Subject: [lldb][breakpoint] Grey out disabled breakpoints (#91404) This commit adds colour settings to the list of breakpoints in order to grey out breakpoints that have been disabled. --- lldb/include/lldb/Core/Debugger.h | 4 ++++ lldb/source/Breakpoint/Breakpoint.cpp | 15 ++++++++++++++ lldb/source/Core/CoreProperties.td | 16 +++++++++++++++ lldb/source/Core/Debugger.cpp | 12 +++++++++++ lldb/test/API/terminal/TestDisabledBreakpoints.py | 25 +++++++++++++++++++++++ 5 files changed, 72 insertions(+) create mode 100644 lldb/test/API/terminal/TestDisabledBreakpoints.py diff --git a/lldb/include/lldb/Core/Debugger.h b/lldb/include/lldb/Core/Debugger.h index d73aba1e3ce5..2087ef2a1156 100644 --- a/lldb/include/lldb/Core/Debugger.h +++ b/lldb/include/lldb/Core/Debugger.h @@ -307,6 +307,10 @@ public: llvm::StringRef GetShowProgressAnsiSuffix() const; + llvm::StringRef GetDisabledAnsiPrefix() const; + + llvm::StringRef GetDisabledAnsiSuffix() const; + bool GetUseAutosuggestion() const; llvm::StringRef GetAutosuggestionAnsiPrefix() const; diff --git a/lldb/source/Breakpoint/Breakpoint.cpp b/lldb/source/Breakpoint/Breakpoint.cpp index 337c1a4ac401..2ed0c9314e3e 100644 --- a/lldb/source/Breakpoint/Breakpoint.cpp +++ b/lldb/source/Breakpoint/Breakpoint.cpp @@ -15,6 +15,7 @@ #include "lldb/Breakpoint/BreakpointResolver.h" #include "lldb/Breakpoint/BreakpointResolverFileLine.h" #include "lldb/Core/Address.h" +#include "lldb/Core/Debugger.h" #include "lldb/Core/Module.h" #include "lldb/Core/ModuleList.h" #include "lldb/Core/SearchFilter.h" @@ -26,6 +27,7 @@ #include "lldb/Target/SectionLoadList.h" #include "lldb/Target/Target.h" #include "lldb/Target/ThreadSpec.h" +#include "lldb/Utility/AnsiTerminal.h" #include "lldb/Utility/LLDBLog.h" #include "lldb/Utility/Log.h" #include "lldb/Utility/Stream.h" @@ -838,6 +840,13 @@ void Breakpoint::GetDescription(Stream *s, lldb::DescriptionLevel level, bool show_locations) { assert(s != nullptr); + const bool dim_breakpoint_description = + !IsEnabled() && s->AsRawOstream().colors_enabled(); + if (dim_breakpoint_description) + s->Printf("%s", ansi::FormatAnsiTerminalCodes( + GetTarget().GetDebugger().GetDisabledAnsiPrefix()) + .c_str()); + if (!m_kind_description.empty()) { if (level == eDescriptionLevelBrief) { s->PutCString(GetBreakpointKind()); @@ -934,6 +943,12 @@ void Breakpoint::GetDescription(Stream *s, lldb::DescriptionLevel level, } s->IndentLess(); } + + // Reset the colors back to normal if they were previously greyed out. + if (dim_breakpoint_description) + s->Printf("%s", ansi::FormatAnsiTerminalCodes( + GetTarget().GetDebugger().GetDisabledAnsiSuffix()) + .c_str()); } void Breakpoint::GetResolverDescription(Stream *s) { diff --git a/lldb/source/Core/CoreProperties.td b/lldb/source/Core/CoreProperties.td index 4d1ea5dfec2e..53dd333f045c 100644 --- a/lldb/source/Core/CoreProperties.td +++ b/lldb/source/Core/CoreProperties.td @@ -191,6 +191,22 @@ let Definition = "debugger" in { "${separator}${thread.stop-reason}}{ " "${separator}{${progress.count} }${progress.message}}">, Desc<"The default statusline format string.">; + + def ShowDisabledAnsiPrefix + : Property<"disable-ansi-prefix", "String">, + Global, + DefaultStringValue<"${ansi.faint}">, + Desc<"If something has been disabled in a color-enabled terminal, use " + "the ANSI terminal code specified immediately before whatever has " + "been disabled.">; + def ShowDisabledAnsiSuffix + : Property<"disable-ansi-suffix", "String">, + Global, + DefaultStringValue<"${ansi.normal}">, + Desc<"When somehing has been disabled in a color-enabled terminal, use " + "the ANSI terminal code specified immediately after whatever has " + "been disabled.">; + def UseSourceCache: Property<"use-source-cache", "Boolean">, Global, DefaultTrue, diff --git a/lldb/source/Core/Debugger.cpp b/lldb/source/Core/Debugger.cpp index 81037d3def81..c9935f2d745f 100644 --- a/lldb/source/Core/Debugger.cpp +++ b/lldb/source/Core/Debugger.cpp @@ -510,6 +510,18 @@ llvm::StringRef Debugger::GetSeparator() const { idx, g_debugger_properties[idx].default_cstr_value); } +llvm::StringRef Debugger::GetDisabledAnsiPrefix() const { + const uint32_t idx = ePropertyShowDisabledAnsiPrefix; + return GetPropertyAtIndexAs( + idx, g_debugger_properties[idx].default_cstr_value); +} + +llvm::StringRef Debugger::GetDisabledAnsiSuffix() const { + const uint32_t idx = ePropertyShowDisabledAnsiSuffix; + return GetPropertyAtIndexAs( + idx, g_debugger_properties[idx].default_cstr_value); +} + bool Debugger::SetSeparator(llvm::StringRef s) { constexpr uint32_t idx = ePropertySeparator; bool ret = SetPropertyAtIndex(idx, s); diff --git a/lldb/test/API/terminal/TestDisabledBreakpoints.py b/lldb/test/API/terminal/TestDisabledBreakpoints.py new file mode 100644 index 000000000000..a644c94c8a17 --- /dev/null +++ b/lldb/test/API/terminal/TestDisabledBreakpoints.py @@ -0,0 +1,25 @@ +""" +Test that disabling breakpoints and viewing them in a list uses the correct ANSI color settings when colors are enabled and disabled. +""" + +import lldb +from lldbsuite.test.decorators import * +from lldbsuite.test.lldbtest import * +from lldbsuite.test import lldbutil +from lldbsuite.test.lldbpexpect import PExpectTest + +import io + + +class DisabledBreakpointsTest(PExpectTest): + @add_test_categories(["pexpect"]) + def test_disabling_breakpoints_with_color(self): + """Test that disabling a breakpoint and viewing the breakpoints list uses the specified ANSI color prefix.""" + ansi_red_color_code = "\x1b[31m" + + self.launch(use_colors=True, dimensions=(100, 100)) + self.expect('settings set disable-ansi-prefix "${ansi.fg.red}"') + self.expect("b main") + self.expect("br dis") + self.expect("br l", substrs=[ansi_red_color_code + "1:"]) + self.quit() -- cgit v1.2.3 From a88e655809655eec8fa85366318fb3c4a0baa113 Mon Sep 17 00:00:00 2001 From: Andrew Rogers Date: Wed, 18 Jun 2025 13:08:05 -0700 Subject: [llvm] build Blake3 source with LLVM_EXPORTS defined (#144753) ## Purpose This patch ensures that the BLAKE3 implementation in the LLVM Support library exports its public interface with `__declspec(dllexport)` when building LLVM as a Windows DLL. ## Background The effort to support building LLVM as a Windows DLL is tracked in #109483. Additional context is provided in [this discourse](https://discourse.llvm.org/t/psa-annotating-llvm-public-interface/85307). ## Overview Replicate [this logic](https://github.com/llvm/llvm-project/blob/main/llvm/cmake/modules/AddLLVM.cmake#L662-L664) from `llvm_add_library()` for the `LLVMSupportBlake3` target. Without this change, the `llvm_blake_` functions will only be annotated with `__declspec(dllimport)` when building LLVM as a Windows DLL which leads to inconsistent DLL linkage warnings from MSVC and `clang-cl`. --- llvm/lib/Support/BLAKE3/CMakeLists.txt | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/llvm/lib/Support/BLAKE3/CMakeLists.txt b/llvm/lib/Support/BLAKE3/CMakeLists.txt index 99cb78881ec4..eae2b0280e5d 100644 --- a/llvm/lib/Support/BLAKE3/CMakeLists.txt +++ b/llvm/lib/Support/BLAKE3/CMakeLists.txt @@ -85,3 +85,9 @@ endif() add_library(LLVMSupportBlake3 OBJECT EXCLUDE_FROM_ALL ${LLVM_BLAKE3_FILES}) set_target_properties(LLVMSupportBlake3 PROPERTIES FOLDER "LLVM/Libraries") llvm_update_compile_flags(LLVMSupportBlake3) +if(LLVM_BUILD_LLVM_DYLIB OR BUILD_SHARED_LIBS) + # Since LLVMSupportBlake3 is not defined using llvm_add_library(), we must + # define LLVM_EXPORTS here so its public interface is annotated with + # __declspec(dllexport) when building as a DLL on Windows. + target_compile_definitions(LLVMSupportBlake3 PRIVATE LLVM_EXPORTS) +endif() -- cgit v1.2.3 From 96bbe472ef01e5f34bfeabedceea397889ff1119 Mon Sep 17 00:00:00 2001 From: Jakub Kuderski Date: Wed, 18 Jun 2025 16:15:06 -0400 Subject: Revert "[mlir][spirv] Fix int type declaration duplication when serializing" and follow up commits (#144773) This reverts the following PRs: * https://github.com/llvm/llvm-project/pull/143108 * https://github.com/llvm/llvm-project/pull/144538 * https://github.com/llvm/llvm-project/pull/144685 Reverting because this disabled tests when building without the llvm spirv backend enabled. --- mlir/lib/Target/SPIRV/Serialization/Serializer.cpp | 13 ------------- mlir/test/CMakeLists.txt | 6 ------ mlir/test/Target/SPIRV/constant.mlir | 5 +---- mlir/test/Target/SPIRV/lit.local.cfg | 7 ------- mlir/test/lit.cfg.py | 1 - mlir/test/lit.site.cfg.py.in | 4 +--- utils/bazel/llvm-project-overlay/mlir/test/BUILD.bazel | 1 - 7 files changed, 2 insertions(+), 35 deletions(-) delete mode 100644 mlir/test/Target/SPIRV/lit.local.cfg diff --git a/mlir/lib/Target/SPIRV/Serialization/Serializer.cpp b/mlir/lib/Target/SPIRV/Serialization/Serializer.cpp index 56c64f38fe29..d258bfd85296 100644 --- a/mlir/lib/Target/SPIRV/Serialization/Serializer.cpp +++ b/mlir/lib/Target/SPIRV/Serialization/Serializer.cpp @@ -446,19 +446,6 @@ LogicalResult Serializer::processType(Location loc, Type type, LogicalResult Serializer::processTypeImpl(Location loc, Type type, uint32_t &typeID, SetVector &serializationCtx) { - - // Map unsigned integer types to singless integer types. - // This is needed otherwise the generated spirv assembly will contain - // twice a type declaration (like OpTypeInt 32 0) which is no permitted and - // such module fails validation. Indeed at MLIR level the two types are - // different and lookup in the cache below misses. - // Note: This conversion needs to happen here before the type is looked up in - // the cache. - if (type.isUnsignedInteger()) { - type = IntegerType::get(loc->getContext(), type.getIntOrFloatBitWidth(), - IntegerType::SignednessSemantics::Signless); - } - typeID = getTypeID(type); if (typeID) return success(); diff --git a/mlir/test/CMakeLists.txt b/mlir/test/CMakeLists.txt index 89568e7766ae..ac8b44f53aeb 100644 --- a/mlir/test/CMakeLists.txt +++ b/mlir/test/CMakeLists.txt @@ -68,7 +68,6 @@ endif() llvm_canonicalize_cmake_booleans( LLVM_BUILD_EXAMPLES LLVM_HAS_NVPTX_TARGET - LLVM_INCLUDE_SPIRV_TOOLS_TESTS MLIR_ENABLE_BINDINGS_PYTHON MLIR_ENABLE_CUDA_RUNNER MLIR_ENABLE_ROCM_CONVERSIONS @@ -218,11 +217,6 @@ if(MLIR_ENABLE_BINDINGS_PYTHON) ) endif() -if (LLVM_INCLUDE_SPIRV_TOOLS_TESTS) - list(APPEND MLIR_TEST_DEPENDS spirv-as) - list(APPEND MLIR_TEST_DEPENDS spirv-val) -endif() - # This target can be used to just build the dependencies # for the check-mlir target without executing the tests. # This is useful for bots when splitting the build step diff --git a/mlir/test/Target/SPIRV/constant.mlir b/mlir/test/Target/SPIRV/constant.mlir index 50d9b09ee004..8d4e53418b70 100644 --- a/mlir/test/Target/SPIRV/constant.mlir +++ b/mlir/test/Target/SPIRV/constant.mlir @@ -1,7 +1,6 @@ // RUN: mlir-translate --no-implicit-module --test-spirv-roundtrip %s | FileCheck %s -// RUN: %if spirv-tools %{ mlir-translate -no-implicit-module -serialize-spirv %s | spirv-val %} -spirv.module Logical Vulkan requires #spirv.vce { +spirv.module Logical GLSL450 requires #spirv.vce { // CHECK-LABEL: @bool_const spirv.func @bool_const() -> () "None" { // CHECK: spirv.Constant true @@ -306,6 +305,4 @@ spirv.module Logical Vulkan requires #spirv.vce : !spirv.coopmatrix<16x16xi8, Subgroup, MatrixAcc> spirv.ReturnValue %coop : !spirv.coopmatrix<16x16xi8, Subgroup, MatrixAcc> } - - spirv.EntryPoint "GLCompute" @bool_const } diff --git a/mlir/test/Target/SPIRV/lit.local.cfg b/mlir/test/Target/SPIRV/lit.local.cfg deleted file mode 100644 index 167c454db518..000000000000 --- a/mlir/test/Target/SPIRV/lit.local.cfg +++ /dev/null @@ -1,7 +0,0 @@ -if not "SPIRV" in config.root.targets: - config.unsupported = True - -if config.spirv_tools_tests: - config.available_features.add("spirv-tools") - config.substitutions.append(("spirv-as", os.path.join(config.llvm_tools_dir, "spirv-as"))) - config.substitutions.append(("spirv-val", os.path.join(config.llvm_tools_dir, "spirv-val"))) diff --git a/mlir/test/lit.cfg.py b/mlir/test/lit.cfg.py index a6f1ac0d568f..9b5cadd62bef 100644 --- a/mlir/test/lit.cfg.py +++ b/mlir/test/lit.cfg.py @@ -332,7 +332,6 @@ if config.enable_assertions: else: config.available_features.add("noasserts") -config.targets = frozenset(config.targets_to_build.split()) def have_host_jit_feature_support(feature_name): mlir_runner_exe = lit.util.which("mlir-runner", config.mlir_tools_dir) diff --git a/mlir/test/lit.site.cfg.py.in b/mlir/test/lit.site.cfg.py.in index 77f24e0f29b0..132aabe13594 100644 --- a/mlir/test/lit.site.cfg.py.in +++ b/mlir/test/lit.site.cfg.py.in @@ -5,8 +5,6 @@ import sys config.target_triple = "@LLVM_TARGET_TRIPLE@" config.llvm_src_root = "@LLVM_SOURCE_DIR@" config.llvm_tools_dir = lit_config.substitute("@LLVM_TOOLS_DIR@") -config.spirv_tools_tests = @LLVM_INCLUDE_SPIRV_TOOLS_TESTS@ -config.targets_to_build = "@TARGETS_TO_BUILD@" config.llvm_shlib_ext = "@SHLIBEXT@" config.llvm_shlib_dir = lit_config.substitute(path(r"@SHLIBDIR@")) config.python_executable = "@Python3_EXECUTABLE@" @@ -43,7 +41,7 @@ config.mlir_run_amx_tests = @MLIR_RUN_AMX_TESTS@ config.mlir_run_arm_sve_tests = @MLIR_RUN_ARM_SVE_TESTS@ # This is a workaround for the fact that LIT's: # %if -# requires to be in the set of available features. +# requires to be in the set of available features. # TODO: Update LIT's TestRunner so that this is not required. if config.mlir_run_arm_sve_tests: config.available_features.add("mlir_arm_sve_tests") diff --git a/utils/bazel/llvm-project-overlay/mlir/test/BUILD.bazel b/utils/bazel/llvm-project-overlay/mlir/test/BUILD.bazel index a439fdd50d21..51731b1e8f74 100644 --- a/utils/bazel/llvm-project-overlay/mlir/test/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/mlir/test/BUILD.bazel @@ -37,7 +37,6 @@ expand_template( # All disabled, but required to substituted because they are not in quotes. "@LLVM_BUILD_EXAMPLES@": "0", "@LLVM_HAS_NVPTX_TARGET@": "0", - "@LLVM_INCLUDE_SPIRV_TOOLS_TESTS@": "0", "@MLIR_ENABLE_CUDA_RUNNER@": "0", "@MLIR_ENABLE_ROCM_CONVERSIONS@": "0", "@MLIR_ENABLE_ROCM_RUNNER@": "0", -- cgit v1.2.3 From b85e92990fdec32ec7512dec7bd36d945f8e0144 Mon Sep 17 00:00:00 2001 From: Alexey Karyakin Date: Wed, 18 Jun 2025 15:26:38 -0500 Subject: Hexagon v87 v89 elf flags (#144584) --- llvm/include/llvm/BinaryFormat/ELF.h | 64 +++++++++++++++++++++--------------- 1 file changed, 37 insertions(+), 27 deletions(-) diff --git a/llvm/include/llvm/BinaryFormat/ELF.h b/llvm/include/llvm/BinaryFormat/ELF.h index 97f417263aff..dcf1f1c6d7b2 100644 --- a/llvm/include/llvm/BinaryFormat/ELF.h +++ b/llvm/include/llvm/BinaryFormat/ELF.h @@ -614,33 +614,7 @@ enum { // Hexagon-specific e_flags enum { - // Object processor version flags, bits[11:0] - EF_HEXAGON_MACH_V2 = 0x00000001, // Hexagon V2 - EF_HEXAGON_MACH_V3 = 0x00000002, // Hexagon V3 - EF_HEXAGON_MACH_V4 = 0x00000003, // Hexagon V4 - EF_HEXAGON_MACH_V5 = 0x00000004, // Hexagon V5 - EF_HEXAGON_MACH_V55 = 0x00000005, // Hexagon V55 - EF_HEXAGON_MACH_V60 = 0x00000060, // Hexagon V60 - EF_HEXAGON_MACH_V61 = 0x00000061, // Hexagon V61 - EF_HEXAGON_MACH_V62 = 0x00000062, // Hexagon V62 - EF_HEXAGON_MACH_V65 = 0x00000065, // Hexagon V65 - EF_HEXAGON_MACH_V66 = 0x00000066, // Hexagon V66 - EF_HEXAGON_MACH_V67 = 0x00000067, // Hexagon V67 - EF_HEXAGON_MACH_V67T = 0x00008067, // Hexagon V67T - EF_HEXAGON_MACH_V68 = 0x00000068, // Hexagon V68 - EF_HEXAGON_MACH_V69 = 0x00000069, // Hexagon V69 - EF_HEXAGON_MACH_V71 = 0x00000071, // Hexagon V71 - EF_HEXAGON_MACH_V71T = 0x00008071, // Hexagon V71T - EF_HEXAGON_MACH_V73 = 0x00000073, // Hexagon V73 - EF_HEXAGON_MACH_V75 = 0x00000075, // Hexagon V75 - EF_HEXAGON_MACH_V77 = 0x00000077, // Hexagon V77 - EF_HEXAGON_MACH_V79 = 0x00000079, // Hexagon V79 - EF_HEXAGON_MACH_V81 = 0x00000081, // Hexagon V81 - EF_HEXAGON_MACH_V83 = 0x00000083, // Hexagon V83 - EF_HEXAGON_MACH_V85 = 0x00000085, // Hexagon V85 - EF_HEXAGON_MACH = 0x000003ff, // Hexagon V.. - - // Highest ISA version flags + // Hexagon ISA version, bits[11:0] EF_HEXAGON_ISA_MACH = 0x00000000, // Same as specified in bits[11:0] // of e_flags EF_HEXAGON_ISA_V2 = 0x00000010, // Hexagon V2 ISA @@ -664,7 +638,43 @@ enum { EF_HEXAGON_ISA_V81 = 0x00000081, // Hexagon V81 ISA EF_HEXAGON_ISA_V83 = 0x00000083, // Hexagon V83 ISA EF_HEXAGON_ISA_V85 = 0x00000085, // Hexagon V85 ISA + EF_HEXAGON_ISA_V87 = 0x00000087, // Hexagon V87 ISA + EF_HEXAGON_ISA_V89 = 0x00000089, // Hexagon V89 ISA EF_HEXAGON_ISA = 0x000003ff, // Hexagon V.. ISA + + // Tiny core flag, bit[15] + EF_HEXAGON_TINY_CORE = 0x00008000, // Hexagon Tiny Core + + // Hexagon processor version, bits[15:0] + EF_HEXAGON_MACH_V2 = 0x00000001, // Hexagon V2 + EF_HEXAGON_MACH_V3 = 0x00000002, // Hexagon V3 + EF_HEXAGON_MACH_V4 = 0x00000003, // Hexagon V4 + EF_HEXAGON_MACH_V5 = 0x00000004, // Hexagon V5 + EF_HEXAGON_MACH_V55 = 0x00000005, // Hexagon V55 + EF_HEXAGON_MACH_V60 = EF_HEXAGON_ISA_V60, // Hexagon V60 + EF_HEXAGON_MACH_V61 = EF_HEXAGON_ISA_V61, // Hexagon V61 + EF_HEXAGON_MACH_V62 = EF_HEXAGON_ISA_V62, // Hexagon V62 + EF_HEXAGON_MACH_V65 = EF_HEXAGON_ISA_V65, // Hexagon V65 + EF_HEXAGON_MACH_V66 = EF_HEXAGON_ISA_V66, // Hexagon V66 + EF_HEXAGON_MACH_V67 = EF_HEXAGON_ISA_V67, // Hexagon V67 + EF_HEXAGON_MACH_V67T = + EF_HEXAGON_ISA_V67 | EF_HEXAGON_TINY_CORE, // Hexagon V67T + EF_HEXAGON_MACH_V68 = EF_HEXAGON_ISA_V68, // Hexagon V68 + EF_HEXAGON_MACH_V69 = EF_HEXAGON_ISA_V69, // Hexagon V69 + EF_HEXAGON_MACH_V71 = EF_HEXAGON_ISA_V71, // Hexagon V71 + EF_HEXAGON_MACH_V71T = + EF_HEXAGON_ISA_V71 | EF_HEXAGON_TINY_CORE, // Hexagon V71T + EF_HEXAGON_MACH_V73 = EF_HEXAGON_ISA_V73, // Hexagon V73 + EF_HEXAGON_MACH_V75 = EF_HEXAGON_ISA_V75, // Hexagon V75 + EF_HEXAGON_MACH_V77 = EF_HEXAGON_ISA_V77, // Hexagon V77 + EF_HEXAGON_MACH_V79 = EF_HEXAGON_ISA_V79, // Hexagon V79 + EF_HEXAGON_MACH_V81 = EF_HEXAGON_ISA_V81, // Hexagon V81 + EF_HEXAGON_MACH_V83 = EF_HEXAGON_ISA_V83, // Hexagon V83 + EF_HEXAGON_MACH_V85 = EF_HEXAGON_ISA_V85, // Hexagon V85 + EF_HEXAGON_MACH_V87 = EF_HEXAGON_ISA_V87, // Hexagon V87 + EF_HEXAGON_MACH_V89 = EF_HEXAGON_ISA_V89, // Hexagon V89 + + EF_HEXAGON_MACH = 0x0000ffff, // Hexagon V.. }; // Hexagon-specific section indexes for common small data -- cgit v1.2.3 From 7aecd7ecacb4b305b94149f3cfcef306a9da6beb Mon Sep 17 00:00:00 2001 From: Diego Caballero Date: Wed, 18 Jun 2025 13:45:43 -0700 Subject: [mlir][Vector] Add `vector.to_elements` op (#141457) This PR introduces the `vector.to_elements` op, which decomposes a vector into its scalar elements. This operation is symmetrical to the existing `vector.from_elements`. Examples: ``` // Decompose a 0-D vector. %0 = vector.to_elements %v0 : vector // %0 = %v0[0] // Decompose a 1-D vector. %0:2 = vector.to_elements %v1 : vector<2xf32> // %0#0 = %v1[0] // %0#1 = %v1[1] // Decompose a 2-D. %0:6 = vector.to_elements %v2 : vector<2x3xf32> // %0#0 = %v2[0, 0] // %0#1 = %v2[0, 1] // %0#2 = %v2[0, 2] // %0#3 = %v2[1, 0] // %0#4 = %v2[1, 1] // %0#5 = %v2[1, 2] ``` This op is aimed at reducing code size when modeling "structured" vector extractions and simplifying canonicalizations of large sequences of `vector.extract` and `vector.insert` ops into `vector.shuffle` and other sophisticated ops that can re-arrange vector elements. --- mlir/include/mlir/Dialect/Vector/IR/VectorOps.td | 85 +++++++++++++++++++----- mlir/include/mlir/IR/OpBase.td | 19 ++++++ mlir/lib/TableGen/Operator.cpp | 31 +++++++++ mlir/test/Dialect/Vector/invalid.mlir | 24 +++++-- mlir/test/Dialect/Vector/ops.mlir | 18 +++++ mlir/tools/mlir-tblgen/OpFormatGen.cpp | 26 ++++++++ 6 files changed, 181 insertions(+), 22 deletions(-) diff --git a/mlir/include/mlir/Dialect/Vector/IR/VectorOps.td b/mlir/include/mlir/Dialect/Vector/IR/VectorOps.td index 8353314ed958..125cd4645ccc 100644 --- a/mlir/include/mlir/Dialect/Vector/IR/VectorOps.td +++ b/mlir/include/mlir/Dialect/Vector/IR/VectorOps.td @@ -790,40 +790,89 @@ def Vector_FMAOp : }]; } +def Vector_ToElementsOp : Vector_Op<"to_elements", [ + Pure, + ShapedTypeMatchesElementCountAndTypes<"source", "elements">]> { + let summary = "operation that decomposes a vector into all its scalar elements"; + let description = [{ + This operation decomposes all the scalar elements from a vector. The + decomposed scalar elements are returned in row-major order. The number of + scalar results must match the number of elements in the input vector type. + All the result elements have the same result type, which must match the + element type of the input vector. Scalable vectors are not supported. + + Examples: + + ```mlir + // Decompose a 0-D vector. + %0 = vector.to_elements %v0 : vector + // %0 = %v0[0] + + // Decompose a 1-D vector. + %0:2 = vector.to_elements %v1 : vector<2xf32> + // %0#0 = %v1[0] + // %0#1 = %v1[1] + + // Decompose a 2-D. + %0:6 = vector.to_elements %v2 : vector<2x3xf32> + // %0#0 = %v2[0, 0] + // %0#1 = %v2[0, 1] + // %0#2 = %v2[0, 2] + // %0#3 = %v2[1, 0] + // %0#4 = %v2[1, 1] + // %0#5 = %v2[1, 2] + + // Decompose a 3-D vector. + %0:6 = vector.to_elements %v3 : vector<3x1x2xf32> + // %0#0 = %v3[0, 0, 0] + // %0#1 = %v3[0, 0, 1] + // %0#2 = %v3[1, 0, 0] + // %0#3 = %v3[1, 0, 1] + // %0#4 = %v3[2, 0, 0] + // %0#5 = %v3[2, 0, 1] + ``` + }]; + + let arguments = (ins AnyVectorOfAnyRank:$source); + let results = (outs Variadic:$elements); + let assemblyFormat = "$source attr-dict `:` type($source)"; +} + def Vector_FromElementsOp : Vector_Op<"from_elements", [ Pure, - TypesMatchWith<"operand types match result element type", - "result", "elements", "SmallVector(" - "::llvm::cast($_self).getNumElements(), " - "::llvm::cast($_self).getElementType())">]> { + ShapedTypeMatchesElementCountAndTypes<"dest", "elements">]> { let summary = "operation that defines a vector from scalar elements"; let description = [{ This operation defines a vector from one or multiple scalar elements. The - number of elements must match the number of elements in the result type. - All elements must have the same type, which must match the element type of - the result vector type. - - `elements` are a flattened version of the result vector in row-major order. + scalar elements are arranged in row-major within the vector. The number of + elements must match the number of elements in the result type. All elements + must have the same type, which must match the element type of the result + vector type. Scalable vectors are not supported. - Example: + Examples: ```mlir - // %f1 + // Define a 0-D vector. %0 = vector.from_elements %f1 : vector - // [%f1, %f2] + // [%f1] + + // Define a 1-D vector. %1 = vector.from_elements %f1, %f2 : vector<2xf32> - // [[%f1, %f2, %f3], [%f4, %f5, %f6]] + // [%f1, %f2] + + // Define a 2-D vector. %2 = vector.from_elements %f1, %f2, %f3, %f4, %f5, %f6 : vector<2x3xf32> - // [[[%f1, %f2]], [[%f3, %f4]], [[%f5, %f6]]] + // [[%f1, %f2, %f3], [%f4, %f5, %f6]] + + // Define a 3-D vector. %3 = vector.from_elements %f1, %f2, %f3, %f4, %f5, %f6 : vector<3x1x2xf32> + // [[[%f1, %f2]], [[%f3, %f4]], [[%f5, %f6]]] ``` - - Note, scalable vectors are not supported. }]; let arguments = (ins Variadic:$elements); - let results = (outs AnyFixedVectorOfAnyRank:$result); - let assemblyFormat = "$elements attr-dict `:` type($result)"; + let results = (outs AnyFixedVectorOfAnyRank:$dest); + let assemblyFormat = "$elements attr-dict `:` type($dest)"; let hasCanonicalizer = 1; } diff --git a/mlir/include/mlir/IR/OpBase.td b/mlir/include/mlir/IR/OpBase.td index 51b60972203e..b3fabe409806 100644 --- a/mlir/include/mlir/IR/OpBase.td +++ b/mlir/include/mlir/IR/OpBase.td @@ -556,6 +556,25 @@ class AllShapesMatch names> : class AllTypesMatch names> : AllMatchSameOperatorTrait; +// A type constraint that verifies that a shaped type matches the size and +// element type of a container with element types. More specifically, it denotes +// shapedArg.getType().getNumElements() == elementsArg.size() && +// shapedArg.getType().getElementType() == elementsArg[i].getType(), for i in +// [0, elementsArg.size()). +class ShapedTypeMatchesElementCountAndTypes : + PredOpTrait<"shaped type '" # shapedArg # "' matches '" # elementsArg # "' " + "element count and types", + And<[CPred.result # " == " + "$" # elementsArg # ".getTypes().size()">, + CPred<"::llvm::all_of($" # elementsArg # ".getTypes(), " + "[&](::mlir::Type t) { return t == " + # ElementType.result # "; })">]>> { + + string shaped = shapedArg; + string elements = elementsArg; +} + // A type constraint that denotes `transform(lhs.getType()) == rhs.getType()`. // An optional comparator function may be provided that changes the above form // into: `comparator(transform(lhs.getType()), rhs.getType())`. diff --git a/mlir/lib/TableGen/Operator.cpp b/mlir/lib/TableGen/Operator.cpp index 2544f0a1b91b..07520a2f94d7 100644 --- a/mlir/lib/TableGen/Operator.cpp +++ b/mlir/lib/TableGen/Operator.cpp @@ -468,6 +468,37 @@ void Operator::populateTypeInferenceInfo( continue; } + // The `ShapedTypeMatchesElementCountAndTypes` trait represents a 1 -> 1 + // type inference edge where a shaped type matches element count and types + // of variadic elements. + if (def.isSubClassOf("ShapedTypeMatchesElementCountAndTypes")) { + StringRef shapedArg = def.getValueAsString("shaped"); + StringRef elementsArg = def.getValueAsString("elements"); + + int shapedIndex = argumentsAndResultsIndex.lookup(shapedArg); + int elementsIndex = argumentsAndResultsIndex.lookup(elementsArg); + + // Handle result type inference from shaped type to variadic elements. + if (InferredResultType::isResultIndex(elementsIndex) && + InferredResultType::isArgIndex(shapedIndex)) { + int resultIndex = InferredResultType::unmapResultIndex(elementsIndex); + ResultTypeInference &infer = inference[resultIndex]; + if (!infer.inferred) { + infer.sources.emplace_back( + shapedIndex, + "::llvm::SmallVector<::mlir::Type>(::llvm::cast<::mlir::" + "ShapedType>($_self).getNumElements(), " + "::llvm::cast<::mlir::ShapedType>($_self).getElementType())"); + infer.inferred = true; + } + } + + // Type inference in the opposite direction is not possible as the actual + // shaped type can't be inferred from the variadic elements. + + continue; + } + if (!def.isSubClassOf("AllTypesMatch")) continue; diff --git a/mlir/test/Dialect/Vector/invalid.mlir b/mlir/test/Dialect/Vector/invalid.mlir index 04810ed52584..ec7cee7b2c64 100644 --- a/mlir/test/Dialect/Vector/invalid.mlir +++ b/mlir/test/Dialect/Vector/invalid.mlir @@ -1896,7 +1896,24 @@ func.func @deinterleave_scalable_rank_fail(%vec : vector<2x[4]xf32>) { // ----- -func.func @invalid_from_elements(%a: f32) { +func.func @to_elements_wrong_num_results(%a: vector<1x1x2xf32>) { + // expected-error @+1 {{operation defines 2 results but was provided 4 to bind}} + %0:4 = vector.to_elements %a : vector<1x1x2xf32> + return +} + +// ----- + +func.func @to_elements_wrong_result_type(%a: vector<2xf32>) -> i32 { + // expected-error @+3 {{use of value '%0' expects different type than prior uses: 'i32'}} + // expected-note @+1 {{prior use here}} + %0:2 = vector.to_elements %a : vector<2xf32> + return %0#0 : i32 +} + +// ----- + +func.func @from_elements_wrong_num_operands(%a: f32) { // expected-error @+1 {{'vector.from_elements' number of operands and types do not match: got 1 operands and 2 types}} vector.from_elements %a : vector<2xf32> return @@ -1905,16 +1922,15 @@ func.func @invalid_from_elements(%a: f32) { // ----- // expected-note @+1 {{prior use here}} -func.func @invalid_from_elements(%a: f32, %b: i32) { +func.func @from_elements_wrong_operand_type(%a: f32, %b: i32) { // expected-error @+1 {{use of value '%b' expects different type than prior uses: 'f32' vs 'i32'}} vector.from_elements %a, %b : vector<2xf32> return } - // ----- func.func @invalid_from_elements_scalable(%a: f32, %b: i32) { - // expected-error @+1 {{'result' must be fixed-length vector of any type values, but got 'vector<[2]xf32>'}} + // expected-error @+1 {{'dest' must be fixed-length vector of any type values, but got 'vector<[2]xf32>'}} vector.from_elements %a, %b : vector<[2]xf32> return } diff --git a/mlir/test/Dialect/Vector/ops.mlir b/mlir/test/Dialect/Vector/ops.mlir index f3220aed4360..c59f7bd00190 100644 --- a/mlir/test/Dialect/Vector/ops.mlir +++ b/mlir/test/Dialect/Vector/ops.mlir @@ -1175,6 +1175,24 @@ func.func @deinterleave_nd_scalable(%arg:vector<2x3x4x[6]xf32>) -> (vector<2x3x4 return %0, %1 : vector<2x3x4x[3]xf32>, vector<2x3x4x[3]xf32> } +// CHECK-LABEL: func @to_elements( +// CHECK-SAME: %[[A_VEC:.*]]: vector, %[[B_VEC:.*]]: vector<1xf32>, +// CHECK-SAME: %[[C_VEC:.*]]: vector<1x2xf32>, %[[D_VEC:.*]]: vector<2x2xf32>) +func.func @to_elements(%a_vec : vector, %b_vec : vector<1xf32>, + %c_vec : vector<1x2xf32>, %d_vec : vector<2x2xf32>) + -> (f32, f32, f32, f32, f32, f32, f32, f32) { + // CHECK: %[[A_ELEMS:.*]] = vector.to_elements %[[A_VEC]] : vector + %0 = vector.to_elements %a_vec : vector + // CHECK: %[[B_ELEMS:.*]] = vector.to_elements %[[B_VEC]] : vector<1xf32> + %1 = vector.to_elements %b_vec : vector<1xf32> + // CHECK: %[[C_ELEMS:.*]]:2 = vector.to_elements %[[C_VEC]] : vector<1x2xf32> + %2:2 = vector.to_elements %c_vec : vector<1x2xf32> + // CHECK: %[[D_ELEMS:.*]]:4 = vector.to_elements %[[D_VEC]] : vector<2x2xf32> + %3:4 = vector.to_elements %d_vec : vector<2x2xf32> + // CHECK: return %[[A_ELEMS]], %[[B_ELEMS]], %[[C_ELEMS]]#0, %[[C_ELEMS]]#1, %[[D_ELEMS]]#0, %[[D_ELEMS]]#1, %[[D_ELEMS]]#2, %[[D_ELEMS]]#3 + return %0, %1, %2#0, %2#1, %3#0, %3#1, %3#2, %3#3: f32, f32, f32, f32, f32, f32, f32, f32 +} + // CHECK-LABEL: func @from_elements( // CHECK-SAME: %[[a:.*]]: f32, %[[b:.*]]: f32) func.func @from_elements(%a: f32, %b: f32) -> (vector, vector<1xf32>, vector<1x2xf32>, vector<2x2xf32>) { diff --git a/mlir/tools/mlir-tblgen/OpFormatGen.cpp b/mlir/tools/mlir-tblgen/OpFormatGen.cpp index 0a9d14d6603a..ef3a18ba7df2 100644 --- a/mlir/tools/mlir-tblgen/OpFormatGen.cpp +++ b/mlir/tools/mlir-tblgen/OpFormatGen.cpp @@ -2787,6 +2787,11 @@ private: void handleTypesMatchConstraint( StringMap &variableTyResolver, const Record &def); + /// Check for inferable type resolution based on + /// `ShapedTypeMatchesElementCountAndTypes` constraint. + void handleShapedTypeMatchesElementCountAndTypesConstraint( + StringMap &variableTyResolver, const Record &def); + /// Returns an argument or attribute with the given name that has been seen /// within the format. ConstArgument findSeenArg(StringRef name); @@ -2850,6 +2855,9 @@ LogicalResult OpFormatParser::verify(SMLoc loc, handleSameTypesConstraint(variableTyResolver, /*includeResults=*/true); } else if (def.isSubClassOf("TypesMatchWith")) { handleTypesMatchConstraint(variableTyResolver, def); + } else if (def.isSubClassOf("ShapedTypeMatchesElementCountAndTypes")) { + handleShapedTypeMatchesElementCountAndTypesConstraint(variableTyResolver, + def); } else if (!op.allResultTypesKnown()) { // This doesn't check the name directly to handle // DeclareOpInterfaceMethods @@ -3289,6 +3297,24 @@ void OpFormatParser::handleTypesMatchConstraint( variableTyResolver[rhsName] = {arg, transformer}; } +void OpFormatParser::handleShapedTypeMatchesElementCountAndTypesConstraint( + StringMap &variableTyResolver, const Record &def) { + StringRef shapedArg = def.getValueAsString("shaped"); + StringRef elementsArg = def.getValueAsString("elements"); + + // Check if the 'shaped' argument is seen, then we can infer the 'elements' + // types. + if (ConstArgument arg = findSeenArg(shapedArg)) { + variableTyResolver[elementsArg] = { + arg, "::llvm::SmallVector<::mlir::Type>(::llvm::cast<::mlir::" + "ShapedType>($_self).getNumElements(), " + "::llvm::cast<::mlir::ShapedType>($_self).getElementType())"}; + } + + // Type inference in the opposite direction is not possible as the actual + // shaped type can't be inferred from the variadic elements. +} + ConstArgument OpFormatParser::findSeenArg(StringRef name) { if (const NamedTypeConstraint *arg = findArg(op.getOperands(), name)) return seenOperandTypes.test(arg - op.operand_begin()) ? arg : nullptr; -- cgit v1.2.3 From 86d1d6b2c0c1f03e82cb8e360f2672c6f0ea39d5 Mon Sep 17 00:00:00 2001 From: Nick Sarnie Date: Wed, 18 Jun 2025 16:50:12 -0400 Subject: [clang] Use TargetInfo to determine device kernel calling convention (#144728) We should abstract this logic away to `TargetInfo`. See https://github.com/llvm/llvm-project/pull/137882 for more information. --------- Signed-off-by: Sarnie, Nick --- clang/lib/CodeGen/CGCall.cpp | 13 ++----------- clang/lib/CodeGen/TargetInfo.cpp | 27 +++++++++++++++------------ clang/lib/CodeGen/TargetInfo.h | 4 ++-- clang/lib/CodeGen/Targets/AMDGPU.cpp | 4 ++-- clang/lib/CodeGen/Targets/NVPTX.cpp | 2 +- clang/lib/CodeGen/Targets/SPIR.cpp | 4 ++-- 6 files changed, 24 insertions(+), 30 deletions(-) diff --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp index a06455d25b1e..fd75de42515d 100644 --- a/clang/lib/CodeGen/CGCall.cpp +++ b/clang/lib/CodeGen/CGCall.cpp @@ -83,17 +83,8 @@ unsigned CodeGenTypes::ClangCallConvToLLVMCallConv(CallingConv CC) { return llvm::CallingConv::AArch64_SVE_VectorCall; case CC_SpirFunction: return llvm::CallingConv::SPIR_FUNC; - case CC_DeviceKernel: { - if (CGM.getLangOpts().OpenCL) - return CGM.getTargetCodeGenInfo().getOpenCLKernelCallingConv(); - if (CGM.getTriple().isSPIROrSPIRV()) - return llvm::CallingConv::SPIR_KERNEL; - if (CGM.getTriple().isAMDGPU()) - return llvm::CallingConv::AMDGPU_KERNEL; - if (CGM.getTriple().isNVPTX()) - return llvm::CallingConv::PTX_Kernel; - llvm_unreachable("Unknown kernel calling convention"); - } + case CC_DeviceKernel: + return CGM.getTargetCodeGenInfo().getDeviceKernelCallingConv(); case CC_PreserveMost: return llvm::CallingConv::PreserveMost; case CC_PreserveAll: diff --git a/clang/lib/CodeGen/TargetInfo.cpp b/clang/lib/CodeGen/TargetInfo.cpp index f3df92c44bb6..277d69daf493 100644 --- a/clang/lib/CodeGen/TargetInfo.cpp +++ b/clang/lib/CodeGen/TargetInfo.cpp @@ -103,18 +103,21 @@ TargetCodeGenInfo::getDependentLibraryOption(llvm::StringRef Lib, Opt += Lib; } -unsigned TargetCodeGenInfo::getOpenCLKernelCallingConv() const { - // OpenCL kernels are called via an explicit runtime API with arguments - // set with clSetKernelArg(), not as normal sub-functions. - // Return SPIR_KERNEL by default as the kernel calling convention to - // ensure the fingerprint is fixed such way that each OpenCL argument - // gets one matching argument in the produced kernel function argument - // list to enable feasible implementation of clSetKernelArg() with - // aggregates etc. In case we would use the default C calling conv here, - // clSetKernelArg() might break depending on the target-specific - // conventions; different targets might split structs passed as values - // to multiple function arguments etc. - return llvm::CallingConv::SPIR_KERNEL; +unsigned TargetCodeGenInfo::getDeviceKernelCallingConv() const { + if (getABIInfo().getContext().getLangOpts().OpenCL) { + // Device kernels are called via an explicit runtime API with arguments, + // such as set with clSetKernelArg() for OpenCL, not as normal + // sub-functions. Return SPIR_KERNEL by default as the kernel calling + // convention to ensure the fingerprint is fixed such way that each kernel + // argument gets one matching argument in the produced kernel function + // argument list to enable feasible implementation of clSetKernelArg() with + // aggregates etc. In case we would use the default C calling conv here, + // clSetKernelArg() might break depending on the target-specific + // conventions; different targets might split structs passed as values + // to multiple function arguments etc. + return llvm::CallingConv::SPIR_KERNEL; + } + llvm_unreachable("Unknown kernel calling convention"); } void TargetCodeGenInfo::setOCLKernelStubCallingConvention( diff --git a/clang/lib/CodeGen/TargetInfo.h b/clang/lib/CodeGen/TargetInfo.h index 2783e222eb80..b4057d369f98 100644 --- a/clang/lib/CodeGen/TargetInfo.h +++ b/clang/lib/CodeGen/TargetInfo.h @@ -298,8 +298,8 @@ public: llvm::StringRef Value, llvm::SmallString<32> &Opt) const {} - /// Get LLVM calling convention for OpenCL kernel. - virtual unsigned getOpenCLKernelCallingConv() const; + /// Get LLVM calling convention for device kernels. + virtual unsigned getDeviceKernelCallingConv() const; /// Get target specific null pointer. /// \param T is the LLVM type of the null pointer. diff --git a/clang/lib/CodeGen/Targets/AMDGPU.cpp b/clang/lib/CodeGen/Targets/AMDGPU.cpp index 8660373c3927..47a552a7bf49 100644 --- a/clang/lib/CodeGen/Targets/AMDGPU.cpp +++ b/clang/lib/CodeGen/Targets/AMDGPU.cpp @@ -304,7 +304,7 @@ public: void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &M) const override; - unsigned getOpenCLKernelCallingConv() const override; + unsigned getDeviceKernelCallingConv() const override; llvm::Constant *getNullPointer(const CodeGen::CodeGenModule &CGM, llvm::PointerType *T, QualType QT) const override; @@ -431,7 +431,7 @@ void AMDGPUTargetCodeGenInfo::setTargetAttributes( F->addFnAttr("amdgpu-ieee", "false"); } -unsigned AMDGPUTargetCodeGenInfo::getOpenCLKernelCallingConv() const { +unsigned AMDGPUTargetCodeGenInfo::getDeviceKernelCallingConv() const { return llvm::CallingConv::AMDGPU_KERNEL; } diff --git a/clang/lib/CodeGen/Targets/NVPTX.cpp b/clang/lib/CodeGen/Targets/NVPTX.cpp index ad802c9131de..82bdfe2666b5 100644 --- a/clang/lib/CodeGen/Targets/NVPTX.cpp +++ b/clang/lib/CodeGen/Targets/NVPTX.cpp @@ -78,7 +78,7 @@ public: return true; } - unsigned getOpenCLKernelCallingConv() const override { + unsigned getDeviceKernelCallingConv() const override { return llvm::CallingConv::PTX_Kernel; } diff --git a/clang/lib/CodeGen/Targets/SPIR.cpp b/clang/lib/CodeGen/Targets/SPIR.cpp index 2f1e43cdc8cc..afa23bffcd07 100644 --- a/clang/lib/CodeGen/Targets/SPIR.cpp +++ b/clang/lib/CodeGen/Targets/SPIR.cpp @@ -51,7 +51,7 @@ public: getABIInfo().getDataLayout().getAllocaAddrSpace()); } - unsigned getOpenCLKernelCallingConv() const override; + unsigned getDeviceKernelCallingConv() const override; llvm::Type *getOpenCLType(CodeGenModule &CGM, const Type *T) const override; llvm::Type * getHLSLType(CodeGenModule &CGM, const Type *Ty, @@ -219,7 +219,7 @@ void computeSPIRKernelABIInfo(CodeGenModule &CGM, CGFunctionInfo &FI) { } } -unsigned CommonSPIRTargetCodeGenInfo::getOpenCLKernelCallingConv() const { +unsigned CommonSPIRTargetCodeGenInfo::getDeviceKernelCallingConv() const { return llvm::CallingConv::SPIR_KERNEL; } -- cgit v1.2.3 From 03bdc0a1f68adcddef80a4e7931dbfae914e5652 Mon Sep 17 00:00:00 2001 From: Chelsea Cassanova Date: Wed, 18 Jun 2025 13:51:59 -0700 Subject: [lldb][target] Add progress report for wait-attaching to process (#144768) This commit adds a progress report when wait-attaching to a process as well as a test for this. --- lldb/source/Target/Target.cpp | 1 + .../progress_reporting/TestProgressReporting.py | 31 ++++++++++++++++++++++ 2 files changed, 32 insertions(+) diff --git a/lldb/source/Target/Target.cpp b/lldb/source/Target/Target.cpp index 45a9e1196a04..8f8d2ef21cc5 100644 --- a/lldb/source/Target/Target.cpp +++ b/lldb/source/Target/Target.cpp @@ -3546,6 +3546,7 @@ llvm::Expected Target::GetTraceOrCreate() { } Status Target::Attach(ProcessAttachInfo &attach_info, Stream *stream) { + Progress attach_progress("Waiting to attach to process"); m_stats.SetLaunchOrAttachTime(); auto state = eStateInvalid; auto process_sp = GetProcessSP(); diff --git a/lldb/test/API/functionalities/progress_reporting/TestProgressReporting.py b/lldb/test/API/functionalities/progress_reporting/TestProgressReporting.py index 9af53845ca1b..8198c50a5ff0 100644 --- a/lldb/test/API/functionalities/progress_reporting/TestProgressReporting.py +++ b/lldb/test/API/functionalities/progress_reporting/TestProgressReporting.py @@ -2,6 +2,7 @@ Test that we are able to broadcast and receive progress events from lldb """ import lldb +import threading import lldbsuite.test.lldbutil as lldbutil @@ -16,6 +17,36 @@ class TestProgressReporting(TestBase): self.broadcaster, lldb.SBDebugger.eBroadcastBitProgress ) + def test_wait_attach_progress_reporting(self): + """Test that progress reports for wait attaching work as intended.""" + self.build() + target = self.dbg.CreateTarget(None) + + # Wait attach to a process, then check to see that a progress report was created + # and that its message is correct for waiting to attach to a process. + class AttachThread(threading.Thread): + def __init__(self, target): + threading.Thread.__init__(self) + self.target = target + + def run(self): + self.target.AttachToProcessWithName( + lldb.SBListener(), "a.out", True, lldb.SBError() + ) + + thread = AttachThread(target) + thread.start() + + event = lldbutil.fetch_next_event(self, self.listener, self.broadcaster) + progress_data = lldb.SBDebugger.GetProgressDataFromEvent(event) + message = progress_data.GetValueForKey("message").GetStringValue(100) + self.assertEqual(message, "Waiting to attach to process") + + # Interrupt the process attach to keep the test from stalling. + target.process.SendAsyncInterrupt() + + thread.join() + def test_dwarf_symbol_loading_progress_report(self): """Test that we are able to fetch dwarf symbol loading progress events""" self.build() -- cgit v1.2.3 From 4dca4459a328b8d589d81cd1f203b798c36ebf35 Mon Sep 17 00:00:00 2001 From: Amr Hesham Date: Wed, 18 Jun 2025 23:09:48 +0200 Subject: [CIR] Upstream ComplexType builtin_complex (#144225) This change adds support for builtin_complex https://github.com/llvm/llvm-project/issues/141365 --- clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp | 11 +++++++++- clang/lib/CIR/CodeGen/CIRGenExprComplex.cpp | 33 ++++++++++++++++++++++++++++- clang/lib/CIR/CodeGen/CIRGenValue.h | 1 - clang/test/CIR/CodeGen/complex.cpp | 25 ++++++++++++++++++++++ 4 files changed, 67 insertions(+), 3 deletions(-) diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp index 83825f0835a1..cff139a7802d 100644 --- a/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp @@ -78,6 +78,8 @@ RValue CIRGenFunction::emitBuiltinExpr(const GlobalDecl &gd, unsigned builtinID, assert(!cir::MissingFeatures::builtinCallMathErrno()); assert(!cir::MissingFeatures::builtinCall()); + mlir::Location loc = getLoc(e->getExprLoc()); + switch (builtinIDIfNoAsmLabel) { default: break; @@ -88,9 +90,16 @@ RValue CIRGenFunction::emitBuiltinExpr(const GlobalDecl &gd, unsigned builtinID, return RValue::get(nullptr); mlir::Value argValue = emitCheckedArgForAssume(e->getArg(0)); - builder.create(getLoc(e->getExprLoc()), argValue); + builder.create(loc, argValue); return RValue::get(nullptr); } + + case Builtin::BI__builtin_complex: { + mlir::Value real = emitScalarExpr(e->getArg(0)); + mlir::Value imag = emitScalarExpr(e->getArg(1)); + mlir::Value complex = builder.createComplexCreate(loc, real, imag); + return RValue::get(complex); + } } cgm.errorNYI(e->getSourceRange(), "unimplemented builtin call"); diff --git a/clang/lib/CIR/CodeGen/CIRGenExprComplex.cpp b/clang/lib/CIR/CodeGen/CIRGenExprComplex.cpp index 26070a6ca307..12e8e27948cf 100644 --- a/clang/lib/CIR/CodeGen/CIRGenExprComplex.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenExprComplex.cpp @@ -15,11 +15,25 @@ public: explicit ComplexExprEmitter(CIRGenFunction &cgf) : cgf(cgf), builder(cgf.getBuilder()) {} + //===--------------------------------------------------------------------===// + // Utilities + //===--------------------------------------------------------------------===// + + /// Given an expression with complex type that represents a value l-value, + /// this method emits the address of the l-value, then loads and returns the + /// result. + mlir::Value emitLoadOfLValue(const Expr *e) { + return emitLoadOfLValue(cgf.emitLValue(e), e->getExprLoc()); + } + + mlir::Value emitLoadOfLValue(LValue lv, SourceLocation loc); + /// Store the specified real/imag parts into the /// specified value pointer. void emitStoreOfComplex(mlir::Location loc, mlir::Value val, LValue lv, bool isInit); + mlir::Value VisitCallExpr(const CallExpr *e); mlir::Value VisitInitListExpr(InitListExpr *e); mlir::Value VisitImaginaryLiteral(const ImaginaryLiteral *il); @@ -34,11 +48,21 @@ static const ComplexType *getComplexType(QualType type) { return cast(cast(type)->getValueType()); } +mlir::Value ComplexExprEmitter::emitLoadOfLValue(LValue lv, + SourceLocation loc) { + assert(lv.isSimple() && "non-simple complex l-value?"); + if (lv.getType()->isAtomicType()) + cgf.cgm.errorNYI(loc, "emitLoadOfLValue with Atomic LV"); + + const Address srcAddr = lv.getAddress(); + return builder.createLoad(cgf.getLoc(loc), srcAddr); +} + void ComplexExprEmitter::emitStoreOfComplex(mlir::Location loc, mlir::Value val, LValue lv, bool isInit) { if (lv.getType()->isAtomicType() || (!isInit && cgf.isLValueSuitableForInlineAtomic(lv))) { - cgf.cgm.errorNYI("StoreOfComplex with Atomic LV"); + cgf.cgm.errorNYI(loc, "StoreOfComplex with Atomic LV"); return; } @@ -46,6 +70,13 @@ void ComplexExprEmitter::emitStoreOfComplex(mlir::Location loc, mlir::Value val, builder.createStore(loc, val, destAddr); } +mlir::Value ComplexExprEmitter::VisitCallExpr(const CallExpr *e) { + if (e->getCallReturnType(cgf.getContext())->isReferenceType()) + return emitLoadOfLValue(e); + + return cgf.emitCallExpr(e).getValue(); +} + mlir::Value ComplexExprEmitter::VisitInitListExpr(InitListExpr *e) { mlir::Location loc = cgf.getLoc(e->getExprLoc()); if (e->getNumInits() == 2) { diff --git a/clang/lib/CIR/CodeGen/CIRGenValue.h b/clang/lib/CIR/CodeGen/CIRGenValue.h index 84972fc7f911..7180d92f8c31 100644 --- a/clang/lib/CIR/CodeGen/CIRGenValue.h +++ b/clang/lib/CIR/CodeGen/CIRGenValue.h @@ -88,7 +88,6 @@ public: return er; } - // FIXME: Aggregate rvalues need to retain information about whether they are // volatile or not. Remove default to find all places that probably get this // wrong. diff --git a/clang/test/CIR/CodeGen/complex.cpp b/clang/test/CIR/CodeGen/complex.cpp index db0b9111ab4f..721db235b37d 100644 --- a/clang/test/CIR/CodeGen/complex.cpp +++ b/clang/test/CIR/CodeGen/complex.cpp @@ -191,6 +191,31 @@ void foo8() { // OGCG: store double 0.000000e+00, ptr %[[C_REAL_PTR]], align 8 // OGCG: store double 2.000000e+00, ptr %[[C_IMAG_PTR]], align 8 +void foo9(double a, double b) { + double _Complex c = __builtin_complex(a, b); +} + +// CIR: %[[INIT:.*]] = cir.alloca !cir.complex, !cir.ptr>, ["c", init] +// CIR: %[[TMP_A:.*]] = cir.load{{.*}} {{.*}} : !cir.ptr, !cir.double +// CIR: %[[TMP_B:.*]] = cir.load{{.*}} {{.*}} : !cir.ptr, !cir.double +// CIR: %[[COMPLEX:.*]] = cir.complex.create %[[TMP_A]], %[[TMP_B]] : !cir.double -> !cir.complex +// CIR: cir.store{{.*}} %[[COMPLEX]], %[[INIT]] : !cir.complex, !cir.ptr> + +// LLVM: %[[COMPLEX:.*]] = alloca { double, double }, i64 1, align 8 +// LLVM: %[[TMP_A:.*]] = load double, ptr {{.*}}, align 8 +// LLVM: %[[TMP_B:.*]] = load double, ptr {{.*}}, align 8 +// LLVM: %[[TMP:.*]] = insertvalue { double, double } undef, double %[[TMP_A]], 0 +// LLVM: %[[TMP_2:.*]] = insertvalue { double, double } %[[TMP]], double %[[TMP_B]], 1 +// LLVM: store { double, double } %[[TMP_2]], ptr %[[COMPLEX]], align 8 + +// OGCG: %[[COMPLEX]] = alloca { double, double }, align 8 +// OGCG: %[[TMP_A:.*]] = load double, ptr {{.*}}, align 8 +// OGCG: %[[TMP_B:.*]] = load double, ptr {{.*}}, align 8 +// OGCG: %[[C_REAL_PTR:.*]] = getelementptr inbounds nuw { double, double }, ptr %[[COMPLEX]], i32 0, i32 0 +// OGCG: %[[C_IMAG_PTR:.*]] = getelementptr inbounds nuw { double, double }, ptr %[[COMPLEX]], i32 0, i32 1 +// OGCG: store double %[[TMP_A]], ptr %[[C_REAL_PTR]], align 8 +// OGCG: store double %[[TMP_B]], ptr %[[C_IMAG_PTR]], align 8 + void foo14() { int _Complex c = 2i; } -- cgit v1.2.3 From ac37a0df949afc31d12de75f85306db32dd50713 Mon Sep 17 00:00:00 2001 From: Diego Caballero Date: Wed, 18 Jun 2025 14:11:21 -0700 Subject: [mlir] Fix integer comparison warning (#144794) Introduced by https://github.com/llvm/llvm-project/pull/141457 --- mlir/include/mlir/IR/OpBase.td | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mlir/include/mlir/IR/OpBase.td b/mlir/include/mlir/IR/OpBase.td index b3fabe409806..43ef28624fb1 100644 --- a/mlir/include/mlir/IR/OpBase.td +++ b/mlir/include/mlir/IR/OpBase.td @@ -566,7 +566,7 @@ class ShapedTypeMatchesElementCountAndTypes.result # " == " - "$" # elementsArg # ".getTypes().size()">, + "static_cast($" # elementsArg # ".getTypes().size())">, CPred<"::llvm::all_of($" # elementsArg # ".getTypes(), " "[&](::mlir::Type t) { return t == " # ElementType.result # "; })">]>> { -- cgit v1.2.3 From d10079e305acae58b44dc773cb94f7127de197ef Mon Sep 17 00:00:00 2001 From: Min-Yih Hsu Date: Wed, 18 Jun 2025 14:23:51 -0700 Subject: [RISCV] Reduce the VL of both operands in VMERGE_VVM (#144759) The `tryToReduceVL` function in RISCVVectorPeephole currently only reduces the VL of the instruction that defines the true operand in VMERGE_VVM. We should be able to reduce VL of both operands. This patch generalizes this function to support multiple operands from a single instruction. --- llvm/lib/Target/RISCV/RISCVVectorPeephole.cpp | 74 ++++++++++++++------------ llvm/test/CodeGen/RISCV/rvv/masked-load-int.ll | 3 +- llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll | 4 +- 3 files changed, 43 insertions(+), 38 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVVectorPeephole.cpp b/llvm/lib/Target/RISCV/RISCVVectorPeephole.cpp index c9c2413d009b..f7acd676461f 100644 --- a/llvm/lib/Target/RISCV/RISCVVectorPeephole.cpp +++ b/llvm/lib/Target/RISCV/RISCVVectorPeephole.cpp @@ -112,7 +112,7 @@ bool RISCVVectorPeephole::tryToReduceVL(MachineInstr &MI) const { // // TODO: We can handle a bunch more instructions here, and probably // recurse backwards through operands too. - unsigned SrcIdx = 0; + SmallVector SrcIndices = {0}; switch (RISCV::getRVVMCOpcode(MI.getOpcode())) { default: return false; @@ -122,10 +122,10 @@ bool RISCVVectorPeephole::tryToReduceVL(MachineInstr &MI) const { case RISCV::VSE64_V: break; case RISCV::VMV_V_V: - SrcIdx = 2; + SrcIndices[0] = 2; break; case RISCV::VMERGE_VVM: - SrcIdx = 3; // TODO: We can also handle the false operand. + SrcIndices.assign({2, 3}); break; case RISCV::VREDSUM_VS: case RISCV::VREDMAXU_VS: @@ -143,7 +143,7 @@ bool RISCVVectorPeephole::tryToReduceVL(MachineInstr &MI) const { case RISCV::VFREDMIN_VS: case RISCV::VFWREDUSUM_VS: case RISCV::VFWREDOSUM_VS: - SrcIdx = 2; + SrcIndices[0] = 2; break; } @@ -151,42 +151,48 @@ bool RISCVVectorPeephole::tryToReduceVL(MachineInstr &MI) const { if (VL.isImm() && VL.getImm() == RISCV::VLMaxSentinel) return false; - Register SrcReg = MI.getOperand(SrcIdx).getReg(); - // Note: one *use*, not one *user*. - if (!MRI->hasOneUse(SrcReg)) - return false; - - MachineInstr *Src = MRI->getVRegDef(SrcReg); - if (!Src || Src->hasUnmodeledSideEffects() || - Src->getParent() != MI.getParent() || Src->getNumDefs() != 1 || - !RISCVII::hasVLOp(Src->getDesc().TSFlags) || - !RISCVII::hasSEWOp(Src->getDesc().TSFlags)) - return false; - - // Src's dest needs to have the same EEW as MI's input. - if (!hasSameEEW(MI, *Src)) - return false; - - bool ElementsDependOnVL = RISCVII::elementsDependOnVL( - TII->get(RISCV::getRVVMCOpcode(Src->getOpcode())).TSFlags); - if (ElementsDependOnVL || Src->mayRaiseFPException()) - return false; + bool Changed = false; + for (unsigned SrcIdx : SrcIndices) { + Register SrcReg = MI.getOperand(SrcIdx).getReg(); + // Note: one *use*, not one *user*. + if (!MRI->hasOneUse(SrcReg)) + continue; + + MachineInstr *Src = MRI->getVRegDef(SrcReg); + if (!Src || Src->hasUnmodeledSideEffects() || + Src->getParent() != MI.getParent() || Src->getNumDefs() != 1 || + !RISCVII::hasVLOp(Src->getDesc().TSFlags) || + !RISCVII::hasSEWOp(Src->getDesc().TSFlags)) + continue; + + // Src's dest needs to have the same EEW as MI's input. + if (!hasSameEEW(MI, *Src)) + continue; + + bool ElementsDependOnVL = RISCVII::elementsDependOnVL( + TII->get(RISCV::getRVVMCOpcode(Src->getOpcode())).TSFlags); + if (ElementsDependOnVL || Src->mayRaiseFPException()) + continue; + + MachineOperand &SrcVL = + Src->getOperand(RISCVII::getVLOpNum(Src->getDesc())); + if (VL.isIdenticalTo(SrcVL) || !RISCV::isVLKnownLE(VL, SrcVL)) + continue; - MachineOperand &SrcVL = Src->getOperand(RISCVII::getVLOpNum(Src->getDesc())); - if (VL.isIdenticalTo(SrcVL) || !RISCV::isVLKnownLE(VL, SrcVL)) - return false; + if (!ensureDominates(VL, *Src)) + continue; - if (!ensureDominates(VL, *Src)) - return false; + if (VL.isImm()) + SrcVL.ChangeToImmediate(VL.getImm()); + else if (VL.isReg()) + SrcVL.ChangeToRegister(VL.getReg(), false); - if (VL.isImm()) - SrcVL.ChangeToImmediate(VL.getImm()); - else if (VL.isReg()) - SrcVL.ChangeToRegister(VL.getReg(), false); + Changed = true; + } // TODO: For instructions with a passthru, we could clear the passthru // and tail policy since we've just proven the tail is not demanded. - return true; + return Changed; } /// Check if an operand is an immediate or a materialized ADDI $x0, imm. diff --git a/llvm/test/CodeGen/RISCV/rvv/masked-load-int.ll b/llvm/test/CodeGen/RISCV/rvv/masked-load-int.ll index 75537406f351..372b07e0137b 100644 --- a/llvm/test/CodeGen/RISCV/rvv/masked-load-int.ll +++ b/llvm/test/CodeGen/RISCV/rvv/masked-load-int.ll @@ -34,9 +34,8 @@ define @masked_load_passthru_nxv1i8(ptr %a, ; ZVE32: # %bb.0: ; ZVE32-NEXT: csrr a1, vlenb ; ZVE32-NEXT: srli a1, a1, 3 -; ZVE32-NEXT: vsetvli a2, zero, e8, mf4, ta, ma -; ZVE32-NEXT: vmv.v.i v8, 0 ; ZVE32-NEXT: vsetvli zero, a1, e8, mf4, ta, mu +; ZVE32-NEXT: vmv.v.i v8, 0 ; ZVE32-NEXT: vle8.v v8, (a0), v0.t ; ZVE32-NEXT: ret %load = call @llvm.masked.load.nxv1i8(ptr %a, i32 1, %mask, zeroinitializer) diff --git a/llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll b/llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll index 10a92f0188a9..1cbb980aebff 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll @@ -3063,9 +3063,9 @@ define @vmv_v_x( %a, i32 %x, iXLen %vl) { define @vmv_v_v( %a, %b, %c, %m, iXLen %vl) { ; NOVLOPT-LABEL: vmv_v_v: ; NOVLOPT: # %bb.0: -; NOVLOPT-NEXT: vsetvli a1, zero, e8, mf8, tu, ma +; NOVLOPT-NEXT: vsetvli zero, a0, e8, mf8, tu, ma ; NOVLOPT-NEXT: vmv.v.v v8, v9 -; NOVLOPT-NEXT: vsetvli zero, a0, e8, mf8, ta, ma +; NOVLOPT-NEXT: vsetvli zero, zero, e8, mf8, ta, ma ; NOVLOPT-NEXT: vmerge.vvm v8, v8, v10, v0 ; NOVLOPT-NEXT: ret ; -- cgit v1.2.3 From c4d7ea8049688a1d6d6d93129893fd1700a9f7e5 Mon Sep 17 00:00:00 2001 From: Javier Lopez-Gomez Date: Wed, 18 Jun 2025 23:47:30 +0200 Subject: [llvm-debuginfo-analyzer] Apply various memory savings in Core/LVxxx base classes (#144399) This small changelist reduces memory footprint of instances of the Core classes. Specifically, - For `LVProperties`, use underlying type of `uint32_t` if there are at most 32 properties to keep track of. Otherwise, fallback to the generic `std::bitset`. The use of `llvm::SmallBitVector` is disregarded in this case, as the upper bound on the size of the bitset can be determined statically (no heap alloc ever needed). - Reorder members in `LVElement` s.t. padding between members is reduced. - `LVScopeCompileUnit`: fix a couple of members which should be `static constexpr` instead. --- .../llvm/DebugInfo/LogicalView/Core/LVElement.h | 10 ++++---- .../llvm/DebugInfo/LogicalView/Core/LVScope.h | 4 +-- .../llvm/DebugInfo/LogicalView/Core/LVSupport.h | 29 ++++++++++++++++++---- 3 files changed, 31 insertions(+), 12 deletions(-) diff --git a/llvm/include/llvm/DebugInfo/LogicalView/Core/LVElement.h b/llvm/include/llvm/DebugInfo/LogicalView/Core/LVElement.h index b4501db190fe..0e7be45abfef 100644 --- a/llvm/include/llvm/DebugInfo/LogicalView/Core/LVElement.h +++ b/llvm/include/llvm/DebugInfo/LogicalView/Core/LVElement.h @@ -107,18 +107,18 @@ class LLVM_ABI LVElement : public LVObject { IsAnonymous, LastEntry }; - // Typed bitvector with properties for this element. - LVProperties Properties; static LVElementDispatch Dispatch; - /// RTTI. - const LVSubclassID SubclassID; - // Indexes in the String Pool. size_t NameIndex = 0; size_t QualifiedNameIndex = 0; size_t FilenameIndex = 0; + // Typed bitvector with properties for this element. + LVProperties Properties; + /// RTTI. + const LVSubclassID SubclassID; + uint16_t AccessibilityCode : 2; // DW_AT_accessibility. uint16_t InlineCode : 2; // DW_AT_inline. uint16_t VirtualityCode : 2; // DW_AT_virtuality. diff --git a/llvm/include/llvm/DebugInfo/LogicalView/Core/LVScope.h b/llvm/include/llvm/DebugInfo/LogicalView/Core/LVScope.h index 5715a37185b2..a453923d032e 100644 --- a/llvm/include/llvm/DebugInfo/LogicalView/Core/LVScope.h +++ b/llvm/include/llvm/DebugInfo/LogicalView/Core/LVScope.h @@ -473,7 +473,7 @@ class LLVM_ABI LVScopeCompileUnit final : public LVScope { // Record scope sizes indexed by lexical level. // Setting an initial size that will cover a very deep nested scopes. - const size_t TotalInitialSize = 8; + static constexpr size_t TotalInitialSize = 8; using LVTotalsEntry = std::pair; SmallVector Totals; // Maximum seen lexical level. It is used to control how many entries @@ -510,7 +510,7 @@ public: void addMapping(LVLine *Line, LVSectionIndex SectionIndex); LVLineRange lineRange(LVLocation *Location) const; - LVNameInfo NameNone = {UINT64_MAX, 0}; + static constexpr LVNameInfo NameNone = {UINT64_MAX, 0}; void addPublicName(LVScope *Scope, LVAddress LowPC, LVAddress HighPC) { PublicNames.emplace(std::piecewise_construct, std::forward_as_tuple(Scope), std::forward_as_tuple(LowPC, HighPC - LowPC)); diff --git a/llvm/include/llvm/DebugInfo/LogicalView/Core/LVSupport.h b/llvm/include/llvm/DebugInfo/LogicalView/Core/LVSupport.h index 01137f80c0f8..058ca2da9a96 100644 --- a/llvm/include/llvm/DebugInfo/LogicalView/Core/LVSupport.h +++ b/llvm/include/llvm/DebugInfo/LogicalView/Core/LVSupport.h @@ -13,7 +13,6 @@ #ifndef LLVM_DEBUGINFO_LOGICALVIEW_CORE_LVSUPPORT_H #define LLVM_DEBUGINFO_LOGICALVIEW_CORE_LVSUPPORT_H -#include "llvm/ADT/SmallBitVector.h" #include "llvm/ADT/Twine.h" #include "llvm/DebugInfo/LogicalView/Core/LVStringPool.h" #include "llvm/Support/Compiler.h" @@ -21,9 +20,11 @@ #include "llvm/Support/Format.h" #include "llvm/Support/Path.h" #include "llvm/Support/raw_ostream.h" +#include #include #include #include +#include namespace llvm { namespace logicalview { @@ -38,14 +39,32 @@ using LVLexicalIndex = // Used to record specific characteristics about the objects. template class LVProperties { - SmallBitVector Bits = SmallBitVector(static_cast(T::LastEntry) + 1); + static constexpr unsigned N_PROPS = static_cast(T::LastEntry); + // Use uint32_t as the underlying type if the `T` enum has at most 32 + // enumerators; otherwise, fallback to the generic `std::bitset` case. + std::conditional_t<(N_PROPS > 32), std::bitset, uint32_t> Bits{}; public: LVProperties() = default; - void set(T Idx) { Bits[static_cast(Idx)] = 1; } - void reset(T Idx) { Bits[static_cast(Idx)] = 0; } - bool get(T Idx) const { return Bits[static_cast(Idx)]; } + void set(T Idx) { + if constexpr (std::is_same_v) + Bits |= 1 << static_cast(Idx); + else + Bits.set(static_cast(Idx)); + } + void reset(T Idx) { + if constexpr (std::is_same_v) + Bits &= ~(1 << static_cast(Idx)); + else + Bits.reset(static_cast(Idx)); + } + bool get(T Idx) const { + if constexpr (std::is_same_v) + return Bits & (1 << static_cast(Idx)); + else + return Bits[static_cast(Idx)]; + } }; // Generate get, set and reset 'bool' functions for LVProperties instances. -- cgit v1.2.3 From 51aa6a4993ea18c968a087352d1cf569c840c41f Mon Sep 17 00:00:00 2001 From: Ebuka Ezike Date: Wed, 18 Jun 2025 22:48:24 +0100 Subject: [lldb-dap] Use protocol types for ReadMemory request (#144552) Read memory from process instead of target. --- .../API/tools/lldb-dap/memory/TestDAP_memory.py | 17 ++- .../lldb-dap/Handler/ReadMemoryRequestHandler.cpp | 143 ++++----------------- lldb/tools/lldb-dap/Handler/RequestHandler.h | 9 +- lldb/tools/lldb-dap/Protocol/ProtocolRequests.cpp | 37 ++++++ lldb/tools/lldb-dap/Protocol/ProtocolRequests.h | 36 ++++++ lldb/unittests/DAP/ProtocolTypesTest.cpp | 33 +++++ 6 files changed, 152 insertions(+), 123 deletions(-) diff --git a/lldb/test/API/tools/lldb-dap/memory/TestDAP_memory.py b/lldb/test/API/tools/lldb-dap/memory/TestDAP_memory.py index 74062f3ab216..55fb4a961e78 100644 --- a/lldb/test/API/tools/lldb-dap/memory/TestDAP_memory.py +++ b/lldb/test/API/tools/lldb-dap/memory/TestDAP_memory.py @@ -111,8 +111,17 @@ class TestDAP_memory(lldbdap_testcase.DAPTestCaseBase): # VS Code sends those in order to check if a `memoryReference` can actually be dereferenced. mem = self.dap_server.request_readMemory(memref, 0, 0) self.assertEqual(mem["success"], True) - self.assertEqual(mem["body"]["data"], "") + self.assertNotIn( + "data", mem["body"], f"expects no data key in response: {mem!r}" + ) + + # Reads at offset 0x0 return unreadable bytes + bytes_to_read = 6 + mem = self.dap_server.request_readMemory("0x0", 0, bytes_to_read) + self.assertEqual(mem["body"]["unreadableBytes"], bytes_to_read) + + # Reads with invalid address fails. + mem = self.dap_server.request_readMemory("-3204", 0, 10) + self.assertFalse(mem["success"], "expect fail on reading memory.") - # Reads at offset 0x0 fail - mem = self.dap_server.request_readMemory("0x0", 0, 6) - self.assertEqual(mem["success"], False) + self.continue_to_exit() diff --git a/lldb/tools/lldb-dap/Handler/ReadMemoryRequestHandler.cpp b/lldb/tools/lldb-dap/Handler/ReadMemoryRequestHandler.cpp index 891c2af4f2f2..7065b6a24b55 100644 --- a/lldb/tools/lldb-dap/Handler/ReadMemoryRequestHandler.cpp +++ b/lldb/tools/lldb-dap/Handler/ReadMemoryRequestHandler.cpp @@ -7,136 +7,47 @@ //===----------------------------------------------------------------------===// #include "DAP.h" -#include "EventHelper.h" #include "JSONUtils.h" #include "RequestHandler.h" #include "llvm/ADT/StringExtras.h" -#include "llvm/Support/Base64.h" namespace lldb_dap { -// "ReadMemoryRequest": { -// "allOf": [ { "$ref": "#/definitions/Request" }, { -// "type": "object", -// "description": "Reads bytes from memory at the provided location. Clients -// should only call this request if the corresponding -// capability `supportsReadMemoryRequest` is true.", -// "properties": { -// "command": { -// "type": "string", -// "enum": [ "readMemory" ] -// }, -// "arguments": { -// "$ref": "#/definitions/ReadMemoryArguments" -// } -// }, -// "required": [ "command", "arguments" ] -// }] -// }, -// "ReadMemoryArguments": { -// "type": "object", -// "description": "Arguments for `readMemory` request.", -// "properties": { -// "memoryReference": { -// "type": "string", -// "description": "Memory reference to the base location from which data -// should be read." -// }, -// "offset": { -// "type": "integer", -// "description": "Offset (in bytes) to be applied to the reference -// location before reading data. Can be negative." -// }, -// "count": { -// "type": "integer", -// "description": "Number of bytes to read at the specified location and -// offset." -// } -// }, -// "required": [ "memoryReference", "count" ] -// }, -// "ReadMemoryResponse": { -// "allOf": [ { "$ref": "#/definitions/Response" }, { -// "type": "object", -// "description": "Response to `readMemory` request.", -// "properties": { -// "body": { -// "type": "object", -// "properties": { -// "address": { -// "type": "string", -// "description": "The address of the first byte of data returned. -// Treated as a hex value if prefixed with `0x`, or -// as a decimal value otherwise." -// }, -// "unreadableBytes": { -// "type": "integer", -// "description": "The number of unreadable bytes encountered after -// the last successfully read byte.\nThis can be -// used to determine the number of bytes that should -// be skipped before a subsequent -// `readMemory` request succeeds." -// }, -// "data": { -// "type": "string", -// "description": "The bytes read from memory, encoded using base64. -// If the decoded length of `data` is less than the -// requested `count` in the original `readMemory` -// request, and `unreadableBytes` is zero or -// omitted, then the client should assume it's -// reached the end of readable memory." -// } -// }, -// "required": [ "address" ] -// } -// } -// }] -// }, -void ReadMemoryRequestHandler::operator()( - const llvm::json::Object &request) const { - llvm::json::Object response; - FillResponse(request, response); - auto *arguments = request.getObject("arguments"); +// Reads bytes from memory at the provided location. +// +// Clients should only call this request if the corresponding capability +// `supportsReadMemoryRequest` is true +llvm::Expected +ReadMemoryRequestHandler::Run(const protocol::ReadMemoryArguments &args) const { + const lldb::addr_t raw_address = args.memoryReference + args.offset; - llvm::StringRef memoryReference = - GetString(arguments, "memoryReference").value_or(""); - auto addr_opt = DecodeMemoryReference(memoryReference); - if (!addr_opt.has_value()) { - response["success"] = false; - response["message"] = - "Malformed memory reference: " + memoryReference.str(); - dap.SendJSON(llvm::json::Value(std::move(response))); - return; - } - lldb::addr_t addr_int = *addr_opt; - addr_int += GetInteger(arguments, "offset").value_or(0); - const uint64_t count_requested = - GetInteger(arguments, "count").value_or(0); + lldb::SBProcess process = dap.target.GetProcess(); + if (!lldb::SBDebugger::StateIsStoppedState(process.GetState())) + return llvm::make_error(); + const uint64_t count_read = std::max(args.count, 1); // We also need support reading 0 bytes // VS Code sends those requests to check if a `memoryReference` // can be dereferenced. - const uint64_t count_read = std::max(count_requested, 1); - std::vector buf; - buf.resize(count_read); + protocol::ReadMemoryResponseBody response; + std::vector &buffer = response.data; + buffer.resize(count_read); + lldb::SBError error; - lldb::SBAddress addr{addr_int, dap.target}; - size_t count_result = - dap.target.ReadMemory(addr, buf.data(), count_read, error); - if (count_result == 0) { - response["success"] = false; - EmplaceSafeString(response, "message", error.GetCString()); - dap.SendJSON(llvm::json::Value(std::move(response))); - return; + const size_t memory_count = dap.target.GetProcess().ReadMemory( + raw_address, buffer.data(), buffer.size(), error); + + response.address = "0x" + llvm::utohexstr(raw_address); + + // reading memory may fail for multiple reasons. memory not readable, + // reading out of memory range and gaps in memory. return from + // the last readable byte. + if (error.Fail() && (memory_count < count_read)) { + response.unreadableBytes = count_read - memory_count; } - buf.resize(std::min(count_result, count_requested)); - llvm::json::Object body; - std::string formatted_addr = "0x" + llvm::utohexstr(addr_int); - body.try_emplace("address", formatted_addr); - body.try_emplace("data", llvm::encodeBase64(buf)); - response.try_emplace("body", std::move(body)); - dap.SendJSON(llvm::json::Value(std::move(response))); + buffer.resize(std::min(memory_count, args.count)); + return response; } } // namespace lldb_dap diff --git a/lldb/tools/lldb-dap/Handler/RequestHandler.h b/lldb/tools/lldb-dap/Handler/RequestHandler.h index 054cc7a32131..e35b9830ab60 100644 --- a/lldb/tools/lldb-dap/Handler/RequestHandler.h +++ b/lldb/tools/lldb-dap/Handler/RequestHandler.h @@ -564,14 +564,17 @@ public: Run(const protocol::DisassembleArguments &args) const override; }; -class ReadMemoryRequestHandler : public LegacyRequestHandler { +class ReadMemoryRequestHandler final + : public RequestHandler> { public: - using LegacyRequestHandler::LegacyRequestHandler; + using RequestHandler::RequestHandler; static llvm::StringLiteral GetCommand() { return "readMemory"; } FeatureSet GetSupportedFeatures() const override { return {protocol::eAdapterFeatureReadMemoryRequest}; } - void operator()(const llvm::json::Object &request) const override; + llvm::Expected + Run(const protocol::ReadMemoryArguments &args) const override; }; class CancelRequestHandler : public RequestHandler @@ -480,4 +482,39 @@ json::Value toJSON(const DisassembleResponseBody &DRB) { return json::Object{{"instructions", DRB.instructions}}; } +bool fromJSON(const json::Value &Params, ReadMemoryArguments &RMA, + json::Path P) { + json::ObjectMapper O(Params, P); + + const json::Object *rma_obj = Params.getAsObject(); + constexpr llvm::StringRef ref_key = "memoryReference"; + const std::optional memory_ref = rma_obj->getString(ref_key); + if (!memory_ref) { + P.field(ref_key).report("missing value"); + return false; + } + + const std::optional addr_opt = + DecodeMemoryReference(*memory_ref); + if (!addr_opt) { + P.field(ref_key).report("Malformed memory reference"); + return false; + } + + RMA.memoryReference = *addr_opt; + + return O && O.map("count", RMA.count) && O.mapOptional("offset", RMA.offset); +} + +json::Value toJSON(const ReadMemoryResponseBody &RMR) { + json::Object result{{"address", RMR.address}}; + + if (RMR.unreadableBytes != 0) + result.insert({"unreadableBytes", RMR.unreadableBytes}); + if (!RMR.data.empty()) + result.insert({"data", llvm::encodeBase64(RMR.data)}); + + return result; +} + } // namespace lldb_dap::protocol diff --git a/lldb/tools/lldb-dap/Protocol/ProtocolRequests.h b/lldb/tools/lldb-dap/Protocol/ProtocolRequests.h index 01b8f2445c9f..7d9a99fdacce 100644 --- a/lldb/tools/lldb-dap/Protocol/ProtocolRequests.h +++ b/lldb/tools/lldb-dap/Protocol/ProtocolRequests.h @@ -839,6 +839,42 @@ bool fromJSON(const llvm::json::Value &, DisassembleResponseBody &, llvm::json::Path); llvm::json::Value toJSON(const DisassembleResponseBody &); +/// Arguments for `readMemory` request. +struct ReadMemoryArguments { + /// Memory reference to the base location from which data should be read. + lldb::addr_t memoryReference; + + /// Offset (in bytes) to be applied to the reference location before reading + /// data. Can be negative. + int64_t offset = 0; + + /// Number of bytes to read at the specified location and offset. + uint64_t count; +}; +bool fromJSON(const llvm::json::Value &, ReadMemoryArguments &, + llvm::json::Path); + +/// Response to `readMemory` request. +struct ReadMemoryResponseBody { + /// The address of the first byte of data returned. + /// Treated as a hex value if prefixed with `0x`, or as a decimal value + /// otherwise. + std::string address; + + /// The number of unreadable bytes encountered after the last successfully + /// read byte. + /// This can be used to determine the number of bytes that should be skipped + /// before a subsequent `readMemory` request succeeds. + uint64_t unreadableBytes = 0; + + /// The bytes read from memory, encoded using base64. If the decoded length + /// of `data` is less than the requested `count` in the original `readMemory` + /// request, and `unreadableBytes` is zero or omitted, then the client should + /// assume it's reached the end of readable memory. + std::vector data; +}; +llvm::json::Value toJSON(const ReadMemoryResponseBody &); + } // namespace lldb_dap::protocol #endif diff --git a/lldb/unittests/DAP/ProtocolTypesTest.cpp b/lldb/unittests/DAP/ProtocolTypesTest.cpp index 46a09f090fea..9c93eb8c94b0 100644 --- a/lldb/unittests/DAP/ProtocolTypesTest.cpp +++ b/lldb/unittests/DAP/ProtocolTypesTest.cpp @@ -765,3 +765,36 @@ TEST(ProtocolTypesTest, StepInTarget) { EXPECT_EQ(target.endLine, deserialized_target->endLine); EXPECT_EQ(target.endColumn, deserialized_target->endColumn); } + +TEST(ProtocolTypesTest, ReadMemoryArguments) { + ReadMemoryArguments args; + args.count = 20; + args.memoryReference = 43962; + args.offset = 0; + + llvm::Expected expected = + parse(R"({"memoryReference":"-4000", "count": 20})"); + ASSERT_THAT_EXPECTED(expected, llvm::Failed()); + expected = parse( + R"({"memoryReference":"0xabba", "count": 20})"); + ASSERT_THAT_EXPECTED(expected, llvm::Succeeded()); + + EXPECT_EQ(args.count, expected->count); + EXPECT_EQ(args.memoryReference, expected->memoryReference); + EXPECT_EQ(args.offset, expected->offset); +} + +TEST(ProtocolTypesTest, ReadMemoryResponseBody) { + ReadMemoryResponseBody response; + response.address = "0xdeadbeef"; + const std::string data_str = "hello world!"; + std::transform(data_str.begin(), data_str.end(), + std::back_inserter(response.data), + [](char letter) { return std::byte(letter); }); + response.unreadableBytes = 1; + + Expected expected = json::parse( + R"({ "address": "0xdeadbeef", "data": "aGVsbG8gd29ybGQh", "unreadableBytes": 1})"); + ASSERT_THAT_EXPECTED(expected, llvm::Succeeded()); + EXPECT_EQ(pp(*expected), pp(response)); +} \ No newline at end of file -- cgit v1.2.3 From 118bfcda46c17349575217bc901e8e5942521955 Mon Sep 17 00:00:00 2001 From: Jianhui Li Date: Wed, 18 Jun 2025 14:52:03 -0700 Subject: [MLIR][XEGPU] Add blocking support for scatter ops (#144766) Add blocking support for scatter ops: Create_tdesc, update, prefetch, load and store. It also enables the load/store with chunk size. --- .../lib/Dialect/XeGPU/Transforms/XeGPUBlocking.cpp | 42 ++++++-- mlir/test/Dialect/XeGPU/xegpu-blocking.mlir | 113 +++++++++++++++++++-- .../test/lib/Dialect/XeGPU/TestXeGPUTransforms.cpp | 14 ++- 3 files changed, 142 insertions(+), 27 deletions(-) diff --git a/mlir/lib/Dialect/XeGPU/Transforms/XeGPUBlocking.cpp b/mlir/lib/Dialect/XeGPU/Transforms/XeGPUBlocking.cpp index a3826c56e1f6..3950e8f70d1c 100644 --- a/mlir/lib/Dialect/XeGPU/Transforms/XeGPUBlocking.cpp +++ b/mlir/lib/Dialect/XeGPU/Transforms/XeGPUBlocking.cpp @@ -134,11 +134,13 @@ XeGPUBlockingPass::getTileShape(const T &operandOrResult) const { std::optional> XeGPUBlockingPass::getTileShape(Operation *op) const { - if (isa(op)) + if (isa(op)) return getTileShape(op->getOpResult(0)); - if (isa(op)) + if (isa(op)) return getTileShape(op->getOpOperand(0)); - if (isa(op)) + if (isa(op)) return getTileShape(op->getOpOperand(1)); if (isa(op)) { @@ -295,12 +297,36 @@ void XeGPUBlockingPass::runOnOperation() { Type elemTy = type.getElementType(); Type newTy; - if (auto tdescTy = dyn_cast(type)) - newTy = xegpu::TensorDescType::get( - ctx, tileShape, elemTy, tdescTy.getEncoding(), - tdescTy.getLayoutAttr().dropInstData()); - else + if (auto tdescTy = dyn_cast(type)) { + + Attribute encoding = tdescTy.getEncoding(); + // If the encoding is a ScatterTensorDescAttr, we need to + // potentially adjust the chunk size based on the inst_data. + if (tdescTy.isScattered()) { + auto scatterAttr = + llvm::dyn_cast_if_present(encoding); + int64_t chunkSize = scatterAttr.getChunkSize().getInt(); + + if (chunkSize > 1) { + int64_t blockedChunkSize = chunkSize; + auto instData = tdescTy.getLayoutAttr().getInstData(); + if (!instData.empty()) + blockedChunkSize = instData.asArrayRef().back(); + + // To create a new attribute with a different chunk_size: + auto newEncoding = xegpu::ScatterTensorDescAttr::get( + ctx, scatterAttr.getMemorySpace().getValue(), blockedChunkSize); + + encoding = newEncoding; + } + } + + newTy = + xegpu::TensorDescType::get(ctx, tileShape, elemTy, encoding, + tdescTy.getLayoutAttr().dropInstData()); + } else { newTy = type.clone(tileShape, elemTy); + } std::optional> ratio = computeShapeRatio(type.getShape(), tileShape); diff --git a/mlir/test/Dialect/XeGPU/xegpu-blocking.mlir b/mlir/test/Dialect/XeGPU/xegpu-blocking.mlir index 67d3bd9b393c..f977ba3c11bc 100644 --- a/mlir/test/Dialect/XeGPU/xegpu-blocking.mlir +++ b/mlir/test/Dialect/XeGPU/xegpu-blocking.mlir @@ -250,8 +250,7 @@ gpu.module @test_kernel { // ----- #l = #xegpu.layout #r = #xegpu.layout - -gpu.module @kernel attributes {spirv.target_env = #spirv.target_env<#spirv.vce, api=OpenCL, #spirv.resource_limits<>>} { +gpu.module @test_kernel { gpu.func @reduce_dim_0(%a: memref<16x512xf32>, %b: memref<512xf32>) kernel attributes {VectorComputeFunctionINTEL, spirv.entry_point_abi = #spirv.entry_point_abi<>} { %acc = arith.constant dense<0.0> : vector<64xf32> %c64 = arith.constant 64 : index @@ -271,8 +270,7 @@ gpu.module @kernel attributes {spirv.target_env = #spirv.target_env<#spirv.vce< // ----- #l = #xegpu.layout #r = #xegpu.layout - -gpu.module @kernel attributes {spirv.target_env = #spirv.target_env<#spirv.vce, api=OpenCL, #spirv.resource_limits<>>} { +gpu.module @test_kernel { gpu.func @reduce_dim_1(%a: memref<512x32xf32>, %b: memref<512xf32>) kernel attributes {VectorComputeFunctionINTEL, spirv.entry_point_abi = #spirv.entry_point_abi<>} { %c1 = arith.constant 1 : index %c32 = arith.constant 32 : index @@ -299,8 +297,7 @@ gpu.module @kernel attributes {spirv.target_env = #spirv.target_env<#spirv.vce< // ----- #r = #xegpu.layout #l = #xegpu.layout - -gpu.module @kernel attributes {spirv.target_env = #spirv.target_env<#spirv.vce, api=OpenCL, #spirv.resource_limits<>>} { +gpu.module @test_kernel { gpu.func @broadcast_dim_0(%a: memref<512xf32>, %b: memref<16x512xf32>) kernel attributes {VectorComputeFunctionINTEL, spirv.entry_point_abi = #spirv.entry_point_abi<>} { %c64 = arith.constant 64 : index @@ -319,8 +316,7 @@ gpu.module @kernel attributes {spirv.target_env = #spirv.target_env<#spirv.vce< // ----- #r = #xegpu.layout #l = #xegpu.layout - -gpu.module @kernel attributes {spirv.target_env = #spirv.target_env<#spirv.vce, api=OpenCL, #spirv.resource_limits<>>} { +gpu.module @test_kernel { gpu.func @broadcast_dim_1(%a: memref<512xf32>, %b: memref<16x512xf32>) kernel attributes {VectorComputeFunctionINTEL, spirv.entry_point_abi = #spirv.entry_point_abi<>} { %c32 = arith.constant 32 : index @@ -340,8 +336,7 @@ gpu.module @kernel attributes {spirv.target_env = #spirv.target_env<#spirv.vce< // ----- #l = #xegpu.layout #t = #xegpu.layout - -gpu.module @kernel attributes {spirv.target_env = #spirv.target_env<#spirv.vce, api=OpenCL, #spirv.resource_limits<>>} { +gpu.module @test_kernel { gpu.func @transpose(%a: memref<512x8xf32>, %b: memref<8x512xf32>) kernel attributes {VectorComputeFunctionINTEL, spirv.entry_point_abi = #spirv.entry_point_abi<>} { %c32 = arith.constant 32 : index @@ -355,4 +350,100 @@ gpu.module @kernel attributes {spirv.target_env = #spirv.target_env<#spirv.vce< xegpu.store_nd %2, %3: vector<8x32xf32>, !xegpu.tensor_desc<8x32xf32, #t> gpu.return } -} \ No newline at end of file +} + +// ----- +gpu.module @test_kernel { + // CHECK-LABEL: test_prefetch_load_store_update + // CHECK-SAME: [[arg0:%.+]]: ui64 + // CHECK-COUNT-2: xegpu.create_tdesc [[arg0]], {{.*}} : ui64, vector<16xindex> -> !xegpu.tensor_desc<16xf32, #xegpu.scatter_tdesc_attr<>> + // CHECK-COUNT-2: xegpu.prefetch {{.*}} : !xegpu.tensor_desc<16xf32, #xegpu.scatter_tdesc_attr<>> + // CHECK-COUNT-2: xegpu.update_offset {{.*}} : !xegpu.tensor_desc<16xf32, #xegpu.scatter_tdesc_attr<>>, vector<16xindex> + // CHECK-COUNT-2: xegpu.load {{.*}} : !xegpu.tensor_desc<16xf32, #xegpu.scatter_tdesc_attr<>>, vector<16xi1> -> vector<16xf32> + // CHECK-COUNT-2: xegpu.store {{.*}} : vector<16xf32>, !xegpu.tensor_desc<16xf32, #xegpu.scatter_tdesc_attr<>>, vector<16xi1> + + gpu.func @test_prefetch_load_store_update(%src: ui64) { + + %cst = arith.constant dense<[ + 0, 8, 16, 24, 32, 40, 48, 56, + 64, 72, 80, 88, 96, 104, 112, 120, + 128, 136, 144, 152, 160, 168, 176, 184, + 192, 200, 208, 216, 224, 232, 240, 248 + ]> : vector<32xindex> + + %tdesc = xegpu.create_tdesc %src, %cst : ui64, vector<32xindex> -> !xegpu.tensor_desc<32xf32, #xegpu.scatter_tdesc_attr<>, #xegpu.layout> + xegpu.prefetch %tdesc: !xegpu.tensor_desc<32xf32, #xegpu.scatter_tdesc_attr<>, #xegpu.layout> + + %delta = arith.constant dense<[ + 32, 32, 32, 32, 32, 32, 32, 32, + 32, 32, 32, 32, 32, 32, 32, 64, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 256 + ]> : vector<32xindex> + %new_tdesc = xegpu.update_offset %tdesc, %delta + : !xegpu.tensor_desc<32xf32, #xegpu.scatter_tdesc_attr<>, #xegpu.layout>, vector<32xindex> + + %c17 = arith.constant 17: index + %mask = vector.create_mask %c17: vector<32xi1> + + %ld_vec = xegpu.load %new_tdesc, %mask: !xegpu.tensor_desc<32xf32, #xegpu.scatter_tdesc_attr<>, #xegpu.layout>, vector<32xi1> -> vector<32xf32> + + %st_vec = arith.addf %ld_vec, %ld_vec : vector<32xf32> + xegpu.store %st_vec, %tdesc, %mask: + vector<32xf32>, + !xegpu.tensor_desc<32xf32, #xegpu.scatter_tdesc_attr<>, #xegpu.layout>, + vector<32xi1> + + gpu.return + } + +} + +// ----- + +gpu.module @test_kernel { + // CHECK-LABEL: test_prefetch_load_store_update_chunk + // CHECK-SAME: [[arg0:%.+]]: ui64 + // CHECK-COUNT-4: xegpu.create_tdesc [[arg0]], {{.*}} : ui64, vector<16xindex> -> !xegpu.tensor_desc<16x2xf32, #xegpu.scatter_tdesc_attr> + // CHECK-COUNT-4: xegpu.prefetch {{.*}} : !xegpu.tensor_desc<16x2xf32, #xegpu.scatter_tdesc_attr> + // CHECK-COUNT-4: xegpu.update_offset {{.*}} : !xegpu.tensor_desc<16x2xf32, #xegpu.scatter_tdesc_attr>, vector<16xindex> + // CHECK-COUNT-4: xegpu.load {{.*}} : !xegpu.tensor_desc<16x2xf32, #xegpu.scatter_tdesc_attr>, vector<16xi1> -> vector<2x16xf32> + // CHECK-COUNT-4: xegpu.store {{.*}} : vector<2x16xf32>, !xegpu.tensor_desc<16x2xf32, #xegpu.scatter_tdesc_attr>, vector<16xi1> + + gpu.func @test_prefetch_load_store_update_chunk(%src: ui64) { + + %cst = arith.constant dense<[ + 0, 8, 16, 24, 32, 40, 48, 56, + 64, 72, 80, 88, 96, 104, 112, 120, + 128, 136, 144, 152, 160, 168, 176, 184, + 192, 200, 208, 216, 224, 232, 240, 248 + ]> : vector<32xindex> + + %tdesc = xegpu.create_tdesc %src, %cst : ui64, vector<32xindex> -> !xegpu.tensor_desc<32x4xf32, #xegpu.scatter_tdesc_attr, #xegpu.layout> + xegpu.prefetch %tdesc: !xegpu.tensor_desc<32x4xf32, #xegpu.scatter_tdesc_attr, #xegpu.layout> + + %delta = arith.constant dense<[ + 32, 32, 32, 32, 32, 32, 32, 32, + 32, 32, 32, 32, 32, 32, 32, 64, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 256 + ]> : vector<32xindex> + %new_tdesc = xegpu.update_offset %tdesc, %delta + : !xegpu.tensor_desc<32x4xf32, #xegpu.scatter_tdesc_attr, #xegpu.layout>, vector<32xindex> + + %c17 = arith.constant 17: index + %mask = vector.create_mask %c17: vector<32xi1> + + %ld_vec = xegpu.load %new_tdesc, %mask <{l1_hint = #xegpu.cache_hint, l2_hint = #xegpu.cache_hint, transpose}>: !xegpu.tensor_desc<32x4xf32, #xegpu.scatter_tdesc_attr, #xegpu.layout>, vector<32xi1> -> vector<4x32xf32> + + %st_vec = arith.addf %ld_vec, %ld_vec : vector<4x32xf32> + xegpu.store %st_vec, %tdesc, %mask <{l1_hint = #xegpu.cache_hint, l2_hint = #xegpu.cache_hint, transpose}>: + vector<4x32xf32>, + !xegpu.tensor_desc<32x4xf32, #xegpu.scatter_tdesc_attr, #xegpu.layout>, + vector<32xi1> + + gpu.return + } +} + + diff --git a/mlir/test/lib/Dialect/XeGPU/TestXeGPUTransforms.cpp b/mlir/test/lib/Dialect/XeGPU/TestXeGPUTransforms.cpp index 4400d6d9625f..c84eb7419854 100644 --- a/mlir/test/lib/Dialect/XeGPU/TestXeGPUTransforms.cpp +++ b/mlir/test/lib/Dialect/XeGPU/TestXeGPUTransforms.cpp @@ -102,14 +102,14 @@ struct TestXeGPUUnrollingPatterns // attribute if (auto tdescTy = dyn_cast(type)) { Attribute encoding = tdescTy.getEncoding(); - auto layout = llvm::dyn_cast_if_present( - tdescTy.getLayout()); + auto layout = tdescTy.getLayoutAttr(); // If the encoding is a ScatterTensorDescAttr, we need to // potentially adjust the chunk size based on the inst_data. - if (encoding && mlir::isa(encoding)) { + if (tdescTy.isScattered()) { auto scatterAttr = - mlir::dyn_cast(encoding); + llvm::dyn_cast_if_present( + encoding); int64_t chunkSize = scatterAttr.getChunkSize().getInt(); if (chunkSize > 1) { @@ -118,12 +118,10 @@ struct TestXeGPUUnrollingPatterns if (!instData.empty()) blockedChunkSize = instData.asArrayRef().back(); - auto chunkSizeAttr = mlir::IntegerAttr::get( - mlir::IntegerType::get(ctx, 64), blockedChunkSize); - // To create a new attribute with a different chunk_size: auto newEncoding = xegpu::ScatterTensorDescAttr::get( - ctx, scatterAttr.getMemorySpace(), chunkSizeAttr); + ctx, scatterAttr.getMemorySpace().getValue(), + blockedChunkSize); encoding = newEncoding; } -- cgit v1.2.3 From 7b6963ea672f8fedbbaefd15eaca943495709d37 Mon Sep 17 00:00:00 2001 From: Muhammad Omair Javaid Date: Thu, 19 Jun 2025 03:06:46 +0500 Subject: [compiler-rt] [Fuzzer] Fix tests linking buildbot failure (#144495) Fix for #144495 by 6f4add3 broke sanitizer-aarch64-linux buildbot. compiler-rt/lib/fuzzer/tests build failed because the linker was looking gcc_s without '-l' appended. The CMake script was adding the library name without the required '-l' prefix. This patch adds the -l prefix changing gcc_s to -lgcc_s and gcc to -lgcc. https://lab.llvm.org/buildbot/#/builders/51/builds/18170 --- compiler-rt/lib/fuzzer/tests/CMakeLists.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/compiler-rt/lib/fuzzer/tests/CMakeLists.txt b/compiler-rt/lib/fuzzer/tests/CMakeLists.txt index 543f486a9d50..c5885ccccd20 100644 --- a/compiler-rt/lib/fuzzer/tests/CMakeLists.txt +++ b/compiler-rt/lib/fuzzer/tests/CMakeLists.txt @@ -45,10 +45,10 @@ if(CMAKE_SYSTEM_NAME STREQUAL "Linux" AND list(APPEND FUZZER_UNWINDER_LIBS ${COMPILER_RT_UNWINDER_LINK_LIBS}) elseif(COMPILER_RT_HAS_GCC_S_LIB) # As a fallback, use the shared libgcc_s library. - list(APPEND FUZZER_UNWINDER_LIBS gcc_s) + list(APPEND FUZZER_UNWINDER_LIBS -lgcc_s) elseif(COMPILER_RT_HAS_GCC_LIB) # As a final fallback, use the static libgcc library. - list(APPEND FUZZER_UNWINDER_LIBS gcc) + list(APPEND FUZZER_UNWINDER_LIBS -lgcc) elseif(NOT COMPILER_RT_USE_BUILTINS_LIBRARY) # If no unwinder is found and we aren't using the builtins library message(FATAL_ERROR "Fuzzer tests require a suitable unwinder, but none was found.") -- cgit v1.2.3 From 00ae89a1cbece94412cf832e47fdf449a611ad24 Mon Sep 17 00:00:00 2001 From: zGoldthorpe Date: Wed, 18 Jun 2025 16:35:01 -0600 Subject: Revert "[IPO] Added attributor for identifying invariant loads" (#144808) Reverts llvm/llvm-project#141800 The implementation critically misunderstands the `AAMemoryBehavior` attributor, which it relies on heavily. @shiltian, since I do not have commit permissions. --- llvm/include/llvm/Transforms/IPO/Attributor.h | 41 --- llvm/lib/Transforms/IPO/Attributor.cpp | 2 - llvm/lib/Transforms/IPO/AttributorAttributes.cpp | 339 ------------------ .../Attributor/AMDGPU/tag-invariant-loads.ll | 382 --------------------- .../Transforms/Attributor/dereferenceable-1.ll | 1 + .../Attributor/value-simplify-local-remote.ll | 22 +- 6 files changed, 15 insertions(+), 772 deletions(-) delete mode 100644 llvm/test/Transforms/Attributor/AMDGPU/tag-invariant-loads.ll diff --git a/llvm/include/llvm/Transforms/IPO/Attributor.h b/llvm/include/llvm/Transforms/IPO/Attributor.h index f19f3292c479..e6eb756df987 100644 --- a/llvm/include/llvm/Transforms/IPO/Attributor.h +++ b/llvm/include/llvm/Transforms/IPO/Attributor.h @@ -6335,47 +6335,6 @@ struct AAUnderlyingObjects : AbstractAttribute { AA::ValueScope Scope = AA::Interprocedural) const = 0; }; -/// An abstract interface for identifying pointers from which loads can be -/// marked invariant. -struct AAInvariantLoadPointer : public AbstractAttribute { - AAInvariantLoadPointer(const IRPosition &IRP) : AbstractAttribute(IRP) {} - - /// See AbstractAttribute::isValidIRPositionForInit - static bool isValidIRPositionForInit(Attributor &A, const IRPosition &IRP) { - if (!IRP.getAssociatedType()->isPointerTy()) - return false; - - return AbstractAttribute::isValidIRPositionForInit(A, IRP); - } - - /// Create an abstract attribute view for the position \p IRP. - static AAInvariantLoadPointer &createForPosition(const IRPosition &IRP, - Attributor &A); - - /// Return true if the pointer's contents are known to remain invariant. - virtual bool isKnownInvariant() const = 0; - virtual bool isKnownLocallyInvariant() const = 0; - - /// Return true if the pointer's contents are assumed to remain invariant. - virtual bool isAssumedInvariant() const = 0; - virtual bool isAssumedLocallyInvariant() const = 0; - - /// See AbstractAttribute::getName(). - StringRef getName() const override { return "AAInvariantLoadPointer"; } - - /// See AbstractAttribute::getIdAddr(). - const char *getIdAddr() const override { return &ID; } - - /// This function should return true if the type of the \p AA is - /// AAInvariantLoadPointer - static bool classof(const AbstractAttribute *AA) { - return (AA->getIdAddr() == &ID); - } - - /// Unique ID (due to the unique address). - static const char ID; -}; - /// An abstract interface for address space information. struct AAAddressSpace : public StateWrapper { AAAddressSpace(const IRPosition &IRP, Attributor &A) diff --git a/llvm/lib/Transforms/IPO/Attributor.cpp b/llvm/lib/Transforms/IPO/Attributor.cpp index a2548258ddaf..dac1f7a30c37 100644 --- a/llvm/lib/Transforms/IPO/Attributor.cpp +++ b/llvm/lib/Transforms/IPO/Attributor.cpp @@ -3612,8 +3612,6 @@ void Attributor::identifyDefaultAbstractAttributes(Function &F) { if (SimplifyAllLoads) getAssumedSimplified(IRPosition::value(I), nullptr, UsedAssumedInformation, AA::Intraprocedural); - getOrCreateAAFor( - IRPosition::value(*LI->getPointerOperand())); getOrCreateAAFor( IRPosition::value(*LI->getPointerOperand())); } else { diff --git a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp index 5cb8f888354b..3799a696f67a 100644 --- a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp +++ b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp @@ -191,7 +191,6 @@ PIPE_OPERATOR(AAInterFnReachability) PIPE_OPERATOR(AAPointerInfo) PIPE_OPERATOR(AAAssumptionInfo) PIPE_OPERATOR(AAUnderlyingObjects) -PIPE_OPERATOR(AAInvariantLoadPointer) PIPE_OPERATOR(AAAddressSpace) PIPE_OPERATOR(AAAllocationInfo) PIPE_OPERATOR(AAIndirectCallInfo) @@ -12534,342 +12533,6 @@ private: }; } // namespace -/// --------------------- Invariant Load Pointer ------------------------------- -namespace { - -struct AAInvariantLoadPointerImpl - : public StateWrapper, - AAInvariantLoadPointer> { - - enum { - // pointer does not alias within the bounds of the function - IS_NOALIAS = 1 << 0, - // pointer is not involved in any effectful instructions within the bounds - // of the function - IS_NOEFFECT = 1 << 1, - // loads are invariant within the bounds of the function - IS_LOCALLY_INVARIANT = 1 << 2, - // memory lifetime is constrained within the bounds of the function - IS_LOCALLY_CONSTRAINED = 1 << 3, - - IS_BEST_STATE = IS_NOALIAS | IS_NOEFFECT | IS_LOCALLY_INVARIANT | - IS_LOCALLY_CONSTRAINED, - }; - static_assert(getBestState() == IS_BEST_STATE, "Unexpected best state"); - - using Base = - StateWrapper, AAInvariantLoadPointer>; - - // the BitIntegerState is optimistic about IS_NOALIAS and IS_NOEFFECT, but - // pessimistic about IS_KNOWN_INVARIANT - AAInvariantLoadPointerImpl(const IRPosition &IRP, Attributor &A) - : Base(IRP) {} - - bool isKnownInvariant() const final { - return isKnownLocallyInvariant() && isKnown(IS_LOCALLY_CONSTRAINED); - } - - bool isKnownLocallyInvariant() const final { - if (isKnown(IS_LOCALLY_INVARIANT)) - return true; - return isKnown(IS_NOALIAS | IS_NOEFFECT); - } - - bool isAssumedInvariant() const final { - return isAssumedLocallyInvariant() && isAssumed(IS_LOCALLY_CONSTRAINED); - } - - bool isAssumedLocallyInvariant() const final { - if (isAssumed(IS_LOCALLY_INVARIANT)) - return true; - return isAssumed(IS_NOALIAS | IS_NOEFFECT); - } - - ChangeStatus updateImpl(Attributor &A) override { - ChangeStatus Changed = ChangeStatus::UNCHANGED; - - Changed |= updateNoAlias(A); - if (requiresNoAlias() && !isAssumed(IS_NOALIAS)) - return indicatePessimisticFixpoint(); - - Changed |= updateNoEffect(A); - - Changed |= updateLocalInvariance(A); - - return Changed; - } - - ChangeStatus manifest(Attributor &A) override { - if (!isKnownInvariant()) - return ChangeStatus::UNCHANGED; - - ChangeStatus Changed = ChangeStatus::UNCHANGED; - const Value *Ptr = &getAssociatedValue(); - const auto TagInvariantLoads = [&](const Use &U, bool &) { - if (U.get() != Ptr) - return true; - auto *I = dyn_cast(U.getUser()); - if (!I) - return true; - - // Ensure that we are only changing uses from the corresponding callgraph - // SSC in the case that the AA isn't run on the entire module - if (!A.isRunOn(I->getFunction())) - return true; - - if (I->hasMetadata(LLVMContext::MD_invariant_load)) - return true; - - if (auto *LI = dyn_cast(I)) { - LI->setMetadata(LLVMContext::MD_invariant_load, - MDNode::get(LI->getContext(), {})); - Changed = ChangeStatus::CHANGED; - } - return true; - }; - - (void)A.checkForAllUses(TagInvariantLoads, *this, *Ptr); - return Changed; - } - - /// See AbstractAttribute::getAsStr(). - const std::string getAsStr(Attributor *) const override { - if (isKnownInvariant()) - return "load-invariant pointer"; - return "non-invariant pointer"; - } - - /// See AbstractAttribute::trackStatistics(). - void trackStatistics() const override {} - -private: - /// Indicate that noalias is required for the pointer to be invariant. - bool requiresNoAlias() const { - switch (getPositionKind()) { - default: - // Conservatively default to require noalias. - return true; - case IRP_FLOAT: - case IRP_RETURNED: - case IRP_CALL_SITE: - return false; - case IRP_CALL_SITE_RETURNED: { - const auto &CB = cast(getAnchorValue()); - return !isIntrinsicReturningPointerAliasingArgumentWithoutCapturing( - &CB, /*MustPreserveNullness=*/false); - } - case IRP_ARGUMENT: { - const Function *F = getAssociatedFunction(); - assert(F && "no associated function for argument"); - return !isCallableCC(F->getCallingConv()); - } - } - } - - bool isExternal() const { - const Function *F = getAssociatedFunction(); - if (!F) - return true; - return isCallableCC(F->getCallingConv()) && - getPositionKind() != IRP_CALL_SITE_RETURNED; - } - - ChangeStatus updateNoAlias(Attributor &A) { - if (isKnown(IS_NOALIAS) || !isAssumed(IS_NOALIAS)) - return ChangeStatus::UNCHANGED; - - // Try to use AANoAlias. - if (const auto *ANoAlias = A.getOrCreateAAFor( - getIRPosition(), this, DepClassTy::REQUIRED)) { - if (ANoAlias->isKnownNoAlias()) { - addKnownBits(IS_NOALIAS); - return ChangeStatus::CHANGED; - } - - if (!ANoAlias->isAssumedNoAlias()) { - removeAssumedBits(IS_NOALIAS); - return ChangeStatus::CHANGED; - } - - return ChangeStatus::UNCHANGED; - } - - // Try to infer noalias from argument attribute, since it is applicable for - // the duration of the function. - if (const Argument *Arg = getAssociatedArgument()) { - if (Arg->hasNoAliasAttr()) { - addKnownBits(IS_NOALIAS); - return ChangeStatus::UNCHANGED; - } - - // Noalias information is not provided, and cannot be inferred, - // so we conservatively assume the pointer aliases. - removeAssumedBits(IS_NOALIAS); - return ChangeStatus::CHANGED; - } - - return ChangeStatus::UNCHANGED; - } - - ChangeStatus updateNoEffect(Attributor &A) { - if (isKnown(IS_NOEFFECT) || !isAssumed(IS_NOEFFECT)) - return ChangeStatus::UNCHANGED; - - if (!getAssociatedFunction()) - return indicatePessimisticFixpoint(); - - const auto HasNoEffectLoads = [&](const Use &U, bool &) { - const auto *LI = dyn_cast(U.getUser()); - return !LI || !LI->mayHaveSideEffects(); - }; - if (!A.checkForAllUses(HasNoEffectLoads, *this, getAssociatedValue())) - return indicatePessimisticFixpoint(); - - // Try to use AAMemoryBehavior to infer readonly attribute. - if (const auto *AMemoryBehavior = A.getOrCreateAAFor( - getIRPosition(), this, DepClassTy::REQUIRED)) { - if (!AMemoryBehavior->isAssumedReadOnly()) - return indicatePessimisticFixpoint(); - - if (AMemoryBehavior->isKnownReadOnly()) { - addKnownBits(IS_NOEFFECT); - return ChangeStatus::UNCHANGED; - } - - return ChangeStatus::UNCHANGED; - } - - if (const Argument *Arg = getAssociatedArgument()) { - if (Arg->onlyReadsMemory()) { - addKnownBits(IS_NOEFFECT); - return ChangeStatus::UNCHANGED; - } - - // Readonly information is not provided, and cannot be inferred from - // AAMemoryBehavior. - return indicatePessimisticFixpoint(); - } - - return ChangeStatus::UNCHANGED; - } - - ChangeStatus updateLocalInvariance(Attributor &A) { - if (isKnown(IS_LOCALLY_INVARIANT) || !isAssumed(IS_LOCALLY_INVARIANT)) - return ChangeStatus::UNCHANGED; - - // try to infer invariance from underlying objects - const auto *AUO = A.getOrCreateAAFor( - getIRPosition(), this, DepClassTy::REQUIRED); - if (!AUO) - return ChangeStatus::UNCHANGED; - - bool UsedAssumedInformation = false; - const auto IsLocallyInvariantLoadIfPointer = [&](const Value &V) { - if (!V.getType()->isPointerTy()) - return true; - const auto *IsInvariantLoadPointer = - A.getOrCreateAAFor(IRPosition::value(V), this, - DepClassTy::REQUIRED); - // Conservatively fail if invariance cannot be inferred. - if (!IsInvariantLoadPointer) - return false; - - if (IsInvariantLoadPointer->isKnownLocallyInvariant()) - return true; - if (!IsInvariantLoadPointer->isAssumedLocallyInvariant()) - return false; - - UsedAssumedInformation = true; - return true; - }; - if (!AUO->forallUnderlyingObjects(IsLocallyInvariantLoadIfPointer)) - return indicatePessimisticFixpoint(); - - if (const auto *CB = dyn_cast(&getAnchorValue())) { - if (isIntrinsicReturningPointerAliasingArgumentWithoutCapturing( - CB, /*MustPreserveNullness=*/false)) { - for (const Value *Arg : CB->args()) { - if (!IsLocallyInvariantLoadIfPointer(*Arg)) - return indicatePessimisticFixpoint(); - } - } - } - - if (!UsedAssumedInformation) { - // Pointer is known and not just assumed to be locally invariant. - addKnownBits(IS_LOCALLY_INVARIANT); - return ChangeStatus::CHANGED; - } - - return ChangeStatus::UNCHANGED; - } -}; - -struct AAInvariantLoadPointerFloating final : AAInvariantLoadPointerImpl { - AAInvariantLoadPointerFloating(const IRPosition &IRP, Attributor &A) - : AAInvariantLoadPointerImpl(IRP, A) {} -}; - -struct AAInvariantLoadPointerReturned final : AAInvariantLoadPointerImpl { - AAInvariantLoadPointerReturned(const IRPosition &IRP, Attributor &A) - : AAInvariantLoadPointerImpl(IRP, A) {} - - void initialize(Attributor &) override { - removeAssumedBits(IS_LOCALLY_CONSTRAINED); - } -}; - -struct AAInvariantLoadPointerCallSiteReturned final - : AAInvariantLoadPointerImpl { - AAInvariantLoadPointerCallSiteReturned(const IRPosition &IRP, Attributor &A) - : AAInvariantLoadPointerImpl(IRP, A) {} - - void initialize(Attributor &A) override { - const Function *F = getAssociatedFunction(); - assert(F && "no associated function for return from call"); - - if (!F->isDeclaration() && !F->isIntrinsic()) - return AAInvariantLoadPointerImpl::initialize(A); - - const auto &CB = cast(getAnchorValue()); - if (isIntrinsicReturningPointerAliasingArgumentWithoutCapturing( - &CB, /*MustPreserveNullness=*/false)) - return AAInvariantLoadPointerImpl::initialize(A); - - if (F->onlyReadsMemory() && F->hasNoSync()) - return AAInvariantLoadPointerImpl::initialize(A); - - // At this point, the function is opaque, so we conservatively assume - // non-invariance. - indicatePessimisticFixpoint(); - } -}; - -struct AAInvariantLoadPointerArgument final : AAInvariantLoadPointerImpl { - AAInvariantLoadPointerArgument(const IRPosition &IRP, Attributor &A) - : AAInvariantLoadPointerImpl(IRP, A) {} - - void initialize(Attributor &) override { - const Function *F = getAssociatedFunction(); - assert(F && "no associated function for argument"); - - if (!isCallableCC(F->getCallingConv())) { - addKnownBits(IS_LOCALLY_CONSTRAINED); - return; - } - - if (!F->hasLocalLinkage()) - removeAssumedBits(IS_LOCALLY_CONSTRAINED); - } -}; - -struct AAInvariantLoadPointerCallSiteArgument final - : AAInvariantLoadPointerImpl { - AAInvariantLoadPointerCallSiteArgument(const IRPosition &IRP, Attributor &A) - : AAInvariantLoadPointerImpl(IRP, A) {} -}; -} // namespace - /// ------------------------ Address Space ------------------------------------ namespace { @@ -13375,7 +13038,6 @@ const char AAInterFnReachability::ID = 0; const char AAPointerInfo::ID = 0; const char AAAssumptionInfo::ID = 0; const char AAUnderlyingObjects::ID = 0; -const char AAInvariantLoadPointer::ID = 0; const char AAAddressSpace::ID = 0; const char AAAllocationInfo::ID = 0; const char AAIndirectCallInfo::ID = 0; @@ -13510,7 +13172,6 @@ CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAPotentialValues) CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AANoUndef) CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AANoFPClass) CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAPointerInfo) -CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAInvariantLoadPointer) CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAAddressSpace) CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAAllocationInfo) diff --git a/llvm/test/Transforms/Attributor/AMDGPU/tag-invariant-loads.ll b/llvm/test/Transforms/Attributor/AMDGPU/tag-invariant-loads.ll deleted file mode 100644 index ace68a19bf41..000000000000 --- a/llvm/test/Transforms/Attributor/AMDGPU/tag-invariant-loads.ll +++ /dev/null @@ -1,382 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 -; RUN: opt -mtriple=amdgcn-amd-amdhsa -passes=attributor %s -S | FileCheck %s --check-prefix=AMDGCN - -@G = addrspace(1) global i32 zeroinitializer, align 4 -declare void @clobber(i32) #0 -declare ptr addrspace(1) @get_ptr() #0 -declare noalias ptr addrspace(1) @get_noalias_ptr() #0 -declare noalias ptr addrspace(1) @get_untouched_ptr() #1 - -define void @test_nonkernel(ptr addrspace(1) noalias %ptr) { -; AMDGCN-LABEL: define void @test_nonkernel( -; AMDGCN-SAME: ptr addrspace(1) noalias nofree noundef readonly align 4 captures(none) dereferenceable_or_null(4) [[PTR:%.*]]) #[[ATTR2:[0-9]+]] { -; AMDGCN-NEXT: [[VAL:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4 -; AMDGCN-NEXT: call void @clobber(i32 [[VAL]]) #[[ATTR6:[0-9]+]] -; AMDGCN-NEXT: ret void -; - %val = load i32, ptr addrspace(1) %ptr, align 4 - ;; may not be !invariant.load, as the caller may modify %ptr - call void @clobber(i32 %val) - ret void -} - -define amdgpu_kernel void @test_plain(ptr addrspace(1) %ptr) { -; AMDGCN-LABEL: define amdgpu_kernel void @test_plain( -; AMDGCN-SAME: ptr addrspace(1) nofree noundef readonly align 4 captures(none) dereferenceable_or_null(4) [[PTR:%.*]]) #[[ATTR2]] { -; AMDGCN-NEXT: [[VAL:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4 -; AMDGCN-NEXT: call void @clobber(i32 [[VAL]]) #[[ATTR6]] -; AMDGCN-NEXT: ret void -; - %val = load i32, ptr addrspace(1) %ptr, align 4 - ;; may not be !invariant.load, as %ptr may alias a pointer in @clobber - call void @clobber(i32 %val) - ret void -} - -define amdgpu_kernel void @test_noalias_ptr(ptr addrspace(1) noalias %ptr) { -; AMDGCN-LABEL: define amdgpu_kernel void @test_noalias_ptr( -; AMDGCN-SAME: ptr addrspace(1) noalias nofree noundef readonly align 4 captures(none) dereferenceable_or_null(4) [[PTR:%.*]]) #[[ATTR2]] { -; AMDGCN-NEXT: [[VAL:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4, !invariant.load [[META0:![0-9]+]] -; AMDGCN-NEXT: call void @clobber(i32 [[VAL]]) #[[ATTR6]] -; AMDGCN-NEXT: ret void -; - %val = load i32, ptr addrspace(1) %ptr, align 4 - call void @clobber(i32 %val) - ret void -} - -define amdgpu_kernel void @test_gep(ptr addrspace(1) %ptr) { -; AMDGCN-LABEL: define amdgpu_kernel void @test_gep( -; AMDGCN-SAME: ptr addrspace(1) nofree readonly align 4 captures(none) [[PTR:%.*]]) #[[ATTR2]] { -; AMDGCN-NEXT: [[GEP:%.*]] = getelementptr i32, ptr addrspace(1) [[PTR]], i32 4 -; AMDGCN-NEXT: [[VAL:%.*]] = load i32, ptr addrspace(1) [[GEP]], align 4 -; AMDGCN-NEXT: call void @clobber(i32 [[VAL]]) #[[ATTR6]] -; AMDGCN-NEXT: ret void -; - %gep = getelementptr i32, ptr addrspace(1) %ptr, i32 4 - %val = load i32, ptr addrspace(1) %gep, align 4 - call void @clobber(i32 %val) - ret void -} - -define amdgpu_kernel void @test_noalias_gep(ptr addrspace(1) noalias %ptr) { -; AMDGCN-LABEL: define amdgpu_kernel void @test_noalias_gep( -; AMDGCN-SAME: ptr addrspace(1) noalias nofree readonly align 4 captures(none) [[PTR:%.*]]) #[[ATTR2]] { -; AMDGCN-NEXT: [[GEP:%.*]] = getelementptr i32, ptr addrspace(1) [[PTR]], i32 4 -; AMDGCN-NEXT: [[VAL:%.*]] = load i32, ptr addrspace(1) [[GEP]], align 4, !invariant.load [[META0]] -; AMDGCN-NEXT: call void @clobber(i32 [[VAL]]) #[[ATTR6]] -; AMDGCN-NEXT: ret void -; - %gep = getelementptr i32, ptr addrspace(1) %ptr, i32 4 - %val = load i32, ptr addrspace(1) %gep, align 4 - call void @clobber(i32 %val) - ret void -} - -define amdgpu_kernel void @test_swap(ptr addrspace(1) noalias %ptr, i32 inreg %swap) { -; AMDGCN-LABEL: define amdgpu_kernel void @test_swap( -; AMDGCN-SAME: ptr addrspace(1) noalias nofree noundef align 4 captures(none) dereferenceable_or_null(4) [[PTR:%.*]], i32 inreg [[SWAP:%.*]]) #[[ATTR2]] { -; AMDGCN-NEXT: [[VAL:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4 -; AMDGCN-NEXT: store i32 [[SWAP]], ptr addrspace(1) [[PTR]], align 4 -; AMDGCN-NEXT: call void @clobber(i32 [[VAL]]) #[[ATTR6]] -; AMDGCN-NEXT: ret void -; - %val = load i32, ptr addrspace(1) %ptr, align 4 - ;; cannot be !invariant.load due to the write to %ptr - store i32 %swap, ptr addrspace(1) %ptr, align 4 - call void @clobber(i32 %val) - ret void -} - -define amdgpu_kernel void @test_volatile(ptr addrspace(1) noalias %ptr) { -; AMDGCN-LABEL: define amdgpu_kernel void @test_volatile( -; AMDGCN-SAME: ptr addrspace(1) noalias nofree noundef align 4 [[PTR:%.*]]) #[[ATTR3:[0-9]+]] { -; AMDGCN-NEXT: [[VAL:%.*]] = load volatile i32, ptr addrspace(1) [[PTR]], align 4 -; AMDGCN-NEXT: call void @clobber(i32 [[VAL]]) #[[ATTR6]] -; AMDGCN-NEXT: ret void -; - %val = load volatile i32, ptr addrspace(1) %ptr, align 4 - ;; volatiles loads cannot be !invariant.load - call void @clobber(i32 %val) - ret void -} - -define amdgpu_kernel void @test_unordered(ptr addrspace(1) noalias %ptr) { -; AMDGCN-LABEL: define amdgpu_kernel void @test_unordered( -; AMDGCN-SAME: ptr addrspace(1) noalias nofree noundef readonly align 4 captures(none) dereferenceable_or_null(4) [[PTR:%.*]]) #[[ATTR2]] { -; AMDGCN-NEXT: [[VAL:%.*]] = load atomic i32, ptr addrspace(1) [[PTR]] unordered, align 4, !invariant.load [[META0]] -; AMDGCN-NEXT: call void @clobber(i32 [[VAL]]) #[[ATTR6]] -; AMDGCN-NEXT: ret void -; - %val = load atomic i32, ptr addrspace(1) %ptr unordered, align 4 - call void @clobber(i32 %val) - ret void -} - -define amdgpu_kernel void @test_monotonic(ptr addrspace(1) noalias %ptr) { -; AMDGCN-LABEL: define amdgpu_kernel void @test_monotonic( -; AMDGCN-SAME: ptr addrspace(1) noalias nofree noundef readonly align 4 captures(none) dereferenceable_or_null(4) [[PTR:%.*]]) #[[ATTR2]] { -; AMDGCN-NEXT: [[VAL:%.*]] = load atomic i32, ptr addrspace(1) [[PTR]] monotonic, align 4 -; AMDGCN-NEXT: call void @clobber(i32 [[VAL]]) #[[ATTR6]] -; AMDGCN-NEXT: ret void -; - %val = load atomic i32, ptr addrspace(1) %ptr monotonic, align 4 - ;; atomic loads with ordering guarantees may have side effects - call void @clobber(i32 %val) - ret void -} - -define amdgpu_kernel void @test_global() { -; AMDGCN-LABEL: define amdgpu_kernel void @test_global( -; AMDGCN-SAME: ) #[[ATTR2]] { -; AMDGCN-NEXT: [[VAL:%.*]] = load i32, ptr addrspace(1) @G, align 4 -; AMDGCN-NEXT: call void @clobber(i32 [[VAL]]) #[[ATTR6]] -; AMDGCN-NEXT: ret void -; - %val = load i32, ptr addrspace(1) @G, align 4 - ;; is not an !invariant.load as global variables may change - call void @clobber(i32 %val) - ret void -} - -define internal i32 @test_internal_noalias_load(ptr addrspace(1) %ptr) { -; AMDGCN-LABEL: define internal i32 @test_internal_noalias_load( -; AMDGCN-SAME: ptr addrspace(1) noalias nofree noundef readonly align 4 captures(none) dereferenceable_or_null(4) [[PTR:%.*]]) #[[ATTR4:[0-9]+]] { -; AMDGCN-NEXT: [[VAL:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4, !invariant.load [[META0]] -; AMDGCN-NEXT: ret i32 [[VAL]] -; - %val = load i32, ptr addrspace(1) %ptr, align 4 - ;; is an !invariant.load due to its only caller @test_call_internal_noalias - ret i32 %val -} - -define amdgpu_kernel void @test_call_internal_noalias(ptr addrspace(1) noalias %ptr) { -; AMDGCN-LABEL: define amdgpu_kernel void @test_call_internal_noalias( -; AMDGCN-SAME: ptr addrspace(1) noalias nofree readonly captures(none) [[PTR:%.*]]) #[[ATTR2]] { -; AMDGCN-NEXT: [[VAL:%.*]] = call i32 @test_internal_noalias_load(ptr addrspace(1) noalias nofree noundef readonly align 4 captures(none) [[PTR]]) #[[ATTR7:[0-9]+]] -; AMDGCN-NEXT: call void @clobber(i32 [[VAL]]) #[[ATTR6]] -; AMDGCN-NEXT: ret void -; - %val = call i32 @test_internal_noalias_load(ptr addrspace(1) %ptr) - call void @clobber(i32 %val) - ret void -} - -define internal i32 @test_internal_load(ptr addrspace(1) noalias %ptr) { -; AMDGCN-LABEL: define internal i32 @test_internal_load( -; AMDGCN-SAME: ptr addrspace(1) noalias nofree noundef readonly align 4 captures(none) dereferenceable_or_null(4) [[PTR:%.*]]) #[[ATTR4]] { -; AMDGCN-NEXT: [[VAL:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4 -; AMDGCN-NEXT: ret i32 [[VAL]] -; - %val = load i32, ptr addrspace(1) %ptr, align 4 - ;; may not be an !invariant.load since the pointer in @test_call_internal may alias - ret i32 %val -} - -define amdgpu_kernel void @test_call_internal(ptr addrspace(1) %ptr) { -; AMDGCN-LABEL: define amdgpu_kernel void @test_call_internal( -; AMDGCN-SAME: ptr addrspace(1) nofree readonly captures(none) [[PTR:%.*]]) #[[ATTR2]] { -; AMDGCN-NEXT: [[VAL:%.*]] = call i32 @test_internal_load(ptr addrspace(1) nofree noundef readonly align 4 captures(none) [[PTR]]) #[[ATTR7]] -; AMDGCN-NEXT: call void @clobber(i32 [[VAL]]) #[[ATTR6]] -; AMDGCN-NEXT: ret void -; - %val = call i32 @test_internal_load(ptr addrspace(1) %ptr) - call void @clobber(i32 %val) - ret void -} - -define internal i32 @test_internal_written(ptr addrspace(1) %ptr) { -; AMDGCN-LABEL: define internal i32 @test_internal_written( -; AMDGCN-SAME: ptr addrspace(1) noalias nofree noundef readonly align 4 captures(none) dereferenceable_or_null(4) [[PTR:%.*]]) #[[ATTR4]] { -; AMDGCN-NEXT: [[VAL:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4 -; AMDGCN-NEXT: ret i32 [[VAL]] -; - %val = load i32, ptr addrspace(1) %ptr, align 4 - ;; cannot be an !invariant.load because of the write in caller @test_call_internal_written - ret i32 %val -} - -define amdgpu_kernel void @test_call_internal_written(ptr addrspace(1) noalias %ptr, i32 inreg %x) { -; AMDGCN-LABEL: define amdgpu_kernel void @test_call_internal_written( -; AMDGCN-SAME: ptr addrspace(1) noalias nofree captures(none) [[PTR:%.*]], i32 inreg [[X:%.*]]) #[[ATTR2]] { -; AMDGCN-NEXT: [[VAL:%.*]] = call i32 @test_internal_written(ptr addrspace(1) noalias nofree noundef readonly align 4 captures(none) [[PTR]]) #[[ATTR7]] -; AMDGCN-NEXT: store i32 [[X]], ptr addrspace(1) [[PTR]], align 4 -; AMDGCN-NEXT: call void @clobber(i32 [[VAL]]) #[[ATTR6]] -; AMDGCN-NEXT: ret void -; - %val = call i32 @test_internal_written(ptr addrspace(1) %ptr) - store i32 %x, ptr addrspace(1) %ptr - call void @clobber(i32 %val) - ret void -} - -define amdgpu_kernel void @test_call_ptr() { -; AMDGCN-LABEL: define amdgpu_kernel void @test_call_ptr( -; AMDGCN-SAME: ) #[[ATTR2]] { -; AMDGCN-NEXT: [[PTR:%.*]] = call align 4 ptr addrspace(1) @get_ptr() #[[ATTR6]] -; AMDGCN-NEXT: [[VAL:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4 -; AMDGCN-NEXT: call void @clobber(i32 [[VAL]]) #[[ATTR6]] -; AMDGCN-NEXT: ret void -; - %ptr = call ptr addrspace(1) @get_ptr() - %val = load i32, ptr addrspace(1) %ptr, align 4 - ;; may not be an !invariant.load since %ptr may alias - call void @clobber(i32 %val) - ret void -} - -define amdgpu_kernel void @test_call_noalias_ptr() { -; AMDGCN-LABEL: define amdgpu_kernel void @test_call_noalias_ptr( -; AMDGCN-SAME: ) #[[ATTR2]] { -; AMDGCN-NEXT: [[PTR:%.*]] = call align 4 ptr addrspace(1) @get_noalias_ptr() #[[ATTR6]] -; AMDGCN-NEXT: [[VAL:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4 -; AMDGCN-NEXT: call void @clobber(i32 [[VAL]]) #[[ATTR6]] -; AMDGCN-NEXT: ret void -; - %ptr = call ptr addrspace(1) @get_noalias_ptr() - %val = load i32, ptr addrspace(1) %ptr, align 4 - ;; may not be an !invariant.load since %ptr may have been written to before returning - call void @clobber(i32 %val) - ret void -} - -define amdgpu_kernel void @test_call_untouched_ptr() { -; AMDGCN-LABEL: define amdgpu_kernel void @test_call_untouched_ptr( -; AMDGCN-SAME: ) #[[ATTR2]] { -; AMDGCN-NEXT: [[PTR:%.*]] = call noalias align 4 ptr addrspace(1) @get_untouched_ptr() #[[ATTR8:[0-9]+]] -; AMDGCN-NEXT: [[VAL:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4, !invariant.load [[META0]] -; AMDGCN-NEXT: call void @clobber(i32 [[VAL]]) #[[ATTR6]] -; AMDGCN-NEXT: ret void -; - %ptr = call ptr addrspace(1) @get_untouched_ptr() - %val = load i32, ptr addrspace(1) %ptr, align 4 - call void @clobber(i32 %val) - ret void -} - -define amdgpu_kernel void @test_make_buffer(ptr addrspace(1) %ptr) { -; AMDGCN-LABEL: define amdgpu_kernel void @test_make_buffer( -; AMDGCN-SAME: ptr addrspace(1) nofree readonly captures(none) [[PTR:%.*]]) #[[ATTR2]] { -; AMDGCN-NEXT: [[RSRC:%.*]] = call align 4 ptr addrspace(7) @llvm.amdgcn.make.buffer.rsrc.p7.p1(ptr addrspace(1) [[PTR]], i16 noundef 0, i32 noundef 0, i32 noundef 0) #[[ATTR9:[0-9]+]] -; AMDGCN-NEXT: [[VAL:%.*]] = load i32, ptr addrspace(7) [[RSRC]], align 4 -; AMDGCN-NEXT: call void @clobber(i32 [[VAL]]) #[[ATTR6]] -; AMDGCN-NEXT: ret void -; - %rsrc = call ptr addrspace(7) @llvm.amdgcn.make.buffer.rsrc.p7.p1(ptr addrspace(1) %ptr, i16 0, i32 0, i32 0) - %val = load i32, ptr addrspace(7) %rsrc, align 4 - ;; original %ptr may alias - call void @clobber(i32 %val) - ret void -} - -define amdgpu_kernel void @test_make_buffer_noalias(ptr addrspace(1) noalias %ptr) { -; AMDGCN-LABEL: define amdgpu_kernel void @test_make_buffer_noalias( -; AMDGCN-SAME: ptr addrspace(1) noalias nofree readonly captures(none) [[PTR:%.*]]) #[[ATTR2]] { -; AMDGCN-NEXT: [[RSRC:%.*]] = call align 4 ptr addrspace(7) @llvm.amdgcn.make.buffer.rsrc.p7.p1(ptr addrspace(1) [[PTR]], i16 noundef 0, i32 noundef 0, i32 noundef 0) #[[ATTR9]] -; AMDGCN-NEXT: [[VAL:%.*]] = load i32, ptr addrspace(7) [[RSRC]], align 4, !invariant.load [[META0]] -; AMDGCN-NEXT: call void @clobber(i32 [[VAL]]) #[[ATTR6]] -; AMDGCN-NEXT: ret void -; - %rsrc = call ptr addrspace(7) @llvm.amdgcn.make.buffer.rsrc.p7.p1(ptr addrspace(1) %ptr, i16 0, i32 0, i32 0) - %val = load i32, ptr addrspace(7) %rsrc, align 4 - call void @clobber(i32 %val) - ret void -} - -define amdgpu_kernel void @test_selected_load(i1 inreg %cond, ptr addrspace(1) noalias %ptr.true, ptr addrspace(1) noalias %ptr.false) { -; AMDGCN-LABEL: define amdgpu_kernel void @test_selected_load( -; AMDGCN-SAME: i1 inreg [[COND:%.*]], ptr addrspace(1) noalias nofree readonly captures(none) [[PTR_TRUE:%.*]], ptr addrspace(1) noalias nofree readonly captures(none) [[PTR_FALSE:%.*]]) #[[ATTR2]] { -; AMDGCN-NEXT: [[PTR:%.*]] = select i1 [[COND]], ptr addrspace(1) [[PTR_TRUE]], ptr addrspace(1) [[PTR_FALSE]] -; AMDGCN-NEXT: [[VAL:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4, !invariant.load [[META0]] -; AMDGCN-NEXT: call void @clobber(i32 [[VAL]]) #[[ATTR6]] -; AMDGCN-NEXT: ret void -; - %ptr = select i1 %cond, ptr addrspace(1) %ptr.true, ptr addrspace(1) %ptr.false - %val = load i32, ptr addrspace(1) %ptr, align 4 - ;; either pointer yields an !invariant.load - call void @clobber(i32 %val) - ret void -} - -define amdgpu_kernel void @test_selected_load_partial_noalias(i1 inreg %cond, ptr addrspace(1) noalias %ptr.true, ptr addrspace(1) %ptr.false) { -; AMDGCN-LABEL: define amdgpu_kernel void @test_selected_load_partial_noalias( -; AMDGCN-SAME: i1 inreg [[COND:%.*]], ptr addrspace(1) noalias nofree readonly captures(none) [[PTR_TRUE:%.*]], ptr addrspace(1) nofree readonly captures(none) [[PTR_FALSE:%.*]]) #[[ATTR2]] { -; AMDGCN-NEXT: [[PTR:%.*]] = select i1 [[COND]], ptr addrspace(1) [[PTR_TRUE]], ptr addrspace(1) [[PTR_FALSE]] -; AMDGCN-NEXT: [[VAL:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4 -; AMDGCN-NEXT: call void @clobber(i32 [[VAL]]) #[[ATTR6]] -; AMDGCN-NEXT: ret void -; - %ptr = select i1 %cond, ptr addrspace(1) %ptr.true, ptr addrspace(1) %ptr.false - %val = load i32, ptr addrspace(1) %ptr, align 4 - ;; %ptr.false may alias, so no !invariant.load - call void @clobber(i32 %val) - ret void -} - -define amdgpu_kernel void @test_branch_load(i1 %cond, ptr addrspace(1) noalias %ptr.true, ptr addrspace(1) noalias %ptr.false) { -; AMDGCN-LABEL: define amdgpu_kernel void @test_branch_load( -; AMDGCN-SAME: i1 noundef [[COND:%.*]], ptr addrspace(1) noalias nofree readonly captures(none) [[PTR_TRUE:%.*]], ptr addrspace(1) noalias nofree readonly captures(none) [[PTR_FALSE:%.*]]) #[[ATTR2]] { -; AMDGCN-NEXT: [[ENTRY:.*:]] -; AMDGCN-NEXT: br i1 [[COND]], label %[[TRUE:.*]], label %[[FALSE:.*]] -; AMDGCN: [[TRUE]]: -; AMDGCN-NEXT: call void @clobber(i32 noundef 1) #[[ATTR6]] -; AMDGCN-NEXT: br label %[[FINISH:.*]] -; AMDGCN: [[FALSE]]: -; AMDGCN-NEXT: br label %[[FINISH]] -; AMDGCN: [[FINISH]]: -; AMDGCN-NEXT: [[PTR:%.*]] = phi ptr addrspace(1) [ [[PTR_TRUE]], %[[TRUE]] ], [ [[PTR_FALSE]], %[[FALSE]] ] -; AMDGCN-NEXT: [[VAL:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4, !invariant.load [[META0]] -; AMDGCN-NEXT: call void @clobber(i32 [[VAL]]) #[[ATTR6]] -; AMDGCN-NEXT: ret void -; -entry: - br i1 %cond, label %true, label %false -true: - call void @clobber(i32 1) - br label %finish -false: - br label %finish -finish: - %ptr = phi ptr addrspace(1) [ %ptr.true, %true ], [ %ptr.false, %false ] - %val = load i32, ptr addrspace(1) %ptr, align 4 - ;; either pointer yields an !invariant.load - call void @clobber(i32 %val) - ret void -} - -define amdgpu_kernel void @test_branch_load_partial_noalias(i1 %cond, ptr addrspace(1) noalias %ptr.true, ptr addrspace(1) %ptr.false) { -; AMDGCN-LABEL: define amdgpu_kernel void @test_branch_load_partial_noalias( -; AMDGCN-SAME: i1 noundef [[COND:%.*]], ptr addrspace(1) noalias nofree readonly captures(none) [[PTR_TRUE:%.*]], ptr addrspace(1) nofree readonly captures(none) [[PTR_FALSE:%.*]]) #[[ATTR2]] { -; AMDGCN-NEXT: [[ENTRY:.*:]] -; AMDGCN-NEXT: br i1 [[COND]], label %[[TRUE:.*]], label %[[FALSE:.*]] -; AMDGCN: [[TRUE]]: -; AMDGCN-NEXT: call void @clobber(i32 noundef 1) #[[ATTR6]] -; AMDGCN-NEXT: br label %[[FINISH:.*]] -; AMDGCN: [[FALSE]]: -; AMDGCN-NEXT: br label %[[FINISH]] -; AMDGCN: [[FINISH]]: -; AMDGCN-NEXT: [[PTR:%.*]] = phi ptr addrspace(1) [ [[PTR_TRUE]], %[[TRUE]] ], [ [[PTR_FALSE]], %[[FALSE]] ] -; AMDGCN-NEXT: [[VAL:%.*]] = load i32, ptr addrspace(1) [[PTR]], align 4 -; AMDGCN-NEXT: call void @clobber(i32 [[VAL]]) #[[ATTR6]] -; AMDGCN-NEXT: ret void -; -entry: - br i1 %cond, label %true, label %false -true: - call void @clobber(i32 1) - br label %finish -false: - br label %finish -finish: - %ptr = phi ptr addrspace(1) [ %ptr.true, %true ], [ %ptr.false, %false ] - %val = load i32, ptr addrspace(1) %ptr, align 4 - ;; ptr.false may alias, so no !invariant.load - call void @clobber(i32 %val) - ret void -} - -attributes #0 = { nofree norecurse nosync nounwind willreturn } -attributes #1 = { nofree norecurse nosync nounwind willreturn readonly } -;. -; AMDGCN: [[META0]] = !{} -;. diff --git a/llvm/test/Transforms/Attributor/dereferenceable-1.ll b/llvm/test/Transforms/Attributor/dereferenceable-1.ll index 5bff2a2e6b20..07e2d5ea1575 100644 --- a/llvm/test/Transforms/Attributor/dereferenceable-1.ll +++ b/llvm/test/Transforms/Attributor/dereferenceable-1.ll @@ -207,6 +207,7 @@ define void @f7_1(ptr %ptr, i1 %cnd) { ; CHECK-LABEL: define {{[^@]+}}@f7_1 ; CHECK-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[PTR:%.*]], i1 noundef [[CND:%.*]]) #[[ATTR2]] { ; CHECK-NEXT: [[A:%.*]] = tail call i32 @unkown_f(ptr noundef nonnull align 4 dereferenceable(4) [[PTR]]) #[[ATTR1]] +; CHECK-NEXT: [[PTR_0:%.*]] = load i32, ptr [[PTR]], align 4 ; CHECK-NEXT: [[B:%.*]] = tail call i32 @unkown_f(ptr noundef nonnull align 4 dereferenceable(4) [[PTR]]) #[[ATTR1]] ; CHECK-NEXT: br i1 [[CND]], label [[IF_TRUE:%.*]], label [[IF_FALSE:%.*]] ; CHECK: if.true: diff --git a/llvm/test/Transforms/Attributor/value-simplify-local-remote.ll b/llvm/test/Transforms/Attributor/value-simplify-local-remote.ll index 4767244800d2..374d5ba7ff52 100644 --- a/llvm/test/Transforms/Attributor/value-simplify-local-remote.ll +++ b/llvm/test/Transforms/Attributor/value-simplify-local-remote.ll @@ -135,7 +135,7 @@ define internal %S @foo.1(ptr %foo.this) { ; TUNIT-NEXT: [[RETVAL:%.*]] = alloca [[S:%.*]], i32 0, align 8 ; TUNIT-NEXT: store ptr [[FOO_THIS]], ptr [[FOO_THIS]], align 8 ; TUNIT-NEXT: call void @bar.2(ptr noalias nofree noundef nonnull writeonly align 8 captures(none) [[RETVAL]], ptr nofree noundef nonnull writeonly align 8 dereferenceable(8) [[FOO_THIS]]) #[[ATTR5:[0-9]+]] -; TUNIT-NEXT: [[FOO_RET:%.*]] = load [[S]], ptr [[RETVAL]], align 8, !invariant.load [[META8:![0-9]+]] +; TUNIT-NEXT: [[FOO_RET:%.*]] = load [[S]], ptr [[RETVAL]], align 8 ; TUNIT-NEXT: ret [[S]] [[FOO_RET]] ; ; CGSCC: Function Attrs: mustprogress nofree nosync nounwind willreturn memory(argmem: readwrite) @@ -145,7 +145,7 @@ define internal %S @foo.1(ptr %foo.this) { ; CGSCC-NEXT: [[RETVAL:%.*]] = alloca [[S:%.*]], i32 0, align 8 ; CGSCC-NEXT: store ptr [[FOO_THIS]], ptr [[FOO_THIS]], align 8 ; CGSCC-NEXT: call void @bar.2(ptr noalias nofree noundef nonnull writeonly align 8 captures(none) dereferenceable(8) [[RETVAL]], ptr nofree noundef nonnull writeonly align 8 dereferenceable(8) [[FOO_THIS]]) #[[ATTR6]] -; CGSCC-NEXT: [[FOO_RET:%.*]] = load [[S]], ptr [[RETVAL]], align 8, !invariant.load [[META8:![0-9]+]] +; CGSCC-NEXT: [[FOO_RET:%.*]] = load [[S]], ptr [[RETVAL]], align 8 ; CGSCC-NEXT: ret [[S]] [[FOO_RET]] ; entry: @@ -234,7 +234,7 @@ define internal %S @bar.5(ptr %this) { ; TUNIT-NEXT: [[RETVAL:%.*]] = alloca [[S:%.*]], i32 0, align 8 ; TUNIT-NEXT: store ptr [[THIS]], ptr [[THIS]], align 8 ; TUNIT-NEXT: call void @baz.6(ptr noalias nofree noundef nonnull writeonly align 8 captures(none) [[RETVAL]], ptr nofree noundef nonnull align 8 dereferenceable(8) [[THIS]]) #[[ATTR4]] -; TUNIT-NEXT: [[BAR_RET:%.*]] = load [[S]], ptr [[RETVAL]], align 8, !invariant.load [[META8]] +; TUNIT-NEXT: [[BAR_RET:%.*]] = load [[S]], ptr [[RETVAL]], align 8 ; TUNIT-NEXT: ret [[S]] [[BAR_RET]] ; ; CGSCC: Function Attrs: mustprogress nofree nosync nounwind willreturn memory(argmem: readwrite) @@ -244,7 +244,7 @@ define internal %S @bar.5(ptr %this) { ; CGSCC-NEXT: [[RETVAL:%.*]] = alloca [[S:%.*]], i32 0, align 8 ; CGSCC-NEXT: store ptr [[THIS]], ptr [[THIS]], align 8 ; CGSCC-NEXT: call void @baz.6(ptr noalias nofree noundef nonnull writeonly align 8 captures(none) dereferenceable(8) [[RETVAL]], ptr nofree noundef nonnull align 8 dereferenceable(8) [[THIS]]) #[[ATTR9:[0-9]+]] -; CGSCC-NEXT: [[BAR_RET:%.*]] = load [[S]], ptr [[RETVAL]], align 8, !invariant.load [[META8]] +; CGSCC-NEXT: [[BAR_RET:%.*]] = load [[S]], ptr [[RETVAL]], align 8 ; CGSCC-NEXT: ret [[S]] [[BAR_RET]] ; entry: @@ -286,7 +286,7 @@ define internal void @boom(ptr %this, ptr %data) { ; TUNIT-NEXT: entry: ; TUNIT-NEXT: [[DATA_ADDR:%.*]] = alloca ptr, i32 0, align 8 ; TUNIT-NEXT: store ptr [[DATA]], ptr [[DATA_ADDR]], align 8 -; TUNIT-NEXT: [[V:%.*]] = load ptr, ptr [[DATA_ADDR]], align 8, !invariant.load [[META8]] +; TUNIT-NEXT: [[V:%.*]] = load ptr, ptr [[DATA_ADDR]], align 8 ; TUNIT-NEXT: store ptr [[V]], ptr [[THIS]], align 8 ; TUNIT-NEXT: ret void ; @@ -342,6 +342,14 @@ define %S.2 @t3.helper() { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[RETVAL:%.*]] = alloca [[S_2:%.*]], align 8 ; CHECK-NEXT: call void @ext1(ptr noundef nonnull align 8 dereferenceable(24) [[RETVAL]]) +; CHECK-NEXT: [[DOTFCA_0_LOAD:%.*]] = load ptr, ptr [[RETVAL]], align 8 +; CHECK-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [[S_2]] poison, ptr [[DOTFCA_0_LOAD]], 0 +; CHECK-NEXT: [[DOTFCA_1_GEP:%.*]] = getelementptr inbounds [[S_2]], ptr [[RETVAL]], i32 0, i32 1 +; CHECK-NEXT: [[DOTFCA_1_LOAD:%.*]] = load i64, ptr [[DOTFCA_1_GEP]], align 8 +; CHECK-NEXT: [[DOTFCA_1_INSERT:%.*]] = insertvalue [[S_2]] [[DOTFCA_0_INSERT]], i64 [[DOTFCA_1_LOAD]], 1 +; CHECK-NEXT: [[DOTFCA_2_GEP:%.*]] = getelementptr inbounds [[S_2]], ptr [[RETVAL]], i32 0, i32 2 +; CHECK-NEXT: [[DOTFCA_2_LOAD:%.*]] = load i64, ptr [[DOTFCA_2_GEP]], align 8 +; CHECK-NEXT: [[DOTFCA_2_INSERT:%.*]] = insertvalue [[S_2]] [[DOTFCA_1_INSERT]], i64 [[DOTFCA_2_LOAD]], 2 ; CHECK-NEXT: ret [[S_2]] zeroinitializer ; entry: @@ -500,7 +508,7 @@ define internal %S @t4a(ptr %this) { ; CGSCC-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, i32 0, align 8 ; CGSCC-NEXT: store ptr [[THIS]], ptr [[THIS]], align 8 ; CGSCC-NEXT: call void @t4b(ptr noalias nofree noundef nonnull writeonly align 8 captures(none) dereferenceable(8) [[RETVAL]], ptr nofree noundef nonnull writeonly align 8 dereferenceable(8) [[THIS]]) #[[ATTR6]] -; CGSCC-NEXT: [[TMP0:%.*]] = load [[S]], ptr [[RETVAL]], align 8, !invariant.load [[META8]] +; CGSCC-NEXT: [[TMP0:%.*]] = load [[S]], ptr [[RETVAL]], align 8 ; CGSCC-NEXT: ret [[S]] [[TMP0]] ; entry: @@ -615,7 +623,6 @@ entry: ; TUNIT: [[META5:![0-9]+]] = !{i32 7, !"frame-pointer", i32 2} ; TUNIT: [[META6:![0-9]+]] = !{i32 7, !"Dwarf Version", i32 2} ; TUNIT: [[META7:![0-9]+]] = !{i32 2, !"Debug Info Version", i32 3} -; TUNIT: [[META8]] = !{} ;. ; CGSCC: [[META0:![0-9]+]] = !{i32 2, !"SDK Version", [2 x i32] [i32 11, i32 5]} ; CGSCC: [[META1:![0-9]+]] = !{i32 1, !"wchar_size", i32 4} @@ -625,5 +632,4 @@ entry: ; CGSCC: [[META5:![0-9]+]] = !{i32 7, !"frame-pointer", i32 2} ; CGSCC: [[META6:![0-9]+]] = !{i32 7, !"Dwarf Version", i32 2} ; CGSCC: [[META7:![0-9]+]] = !{i32 2, !"Debug Info Version", i32 3} -; CGSCC: [[META8]] = !{} ;. -- cgit v1.2.3 From e0933ab5ae4856c4aa188a5ea16716b3a8d0840b Mon Sep 17 00:00:00 2001 From: Chelsea Cassanova Date: Wed, 18 Jun 2025 15:39:25 -0700 Subject: Revert "[lldb][target] Add progress report for wait-attaching to process" (#144810) This is breaking TestCreateAfterAttach.py on Ubuntu: ``` ====================================================================== FAIL: test_create_after_attach_dwo (TestCreateAfterAttach.CreateAfterAttachTestCase.test_create_after_attach_dwo) Test thread creation after process attach. ---------------------------------------------------------------------- Traceback (most recent call last): File "/home/buildbot/worker/as-builder-9/lldb-remote-linux-ubuntu/llvm-project/lldb/packages/Python/lldbsuite/test/lldbtest.py", line 1804, in test_method return attrvalue(self) ^^^^^^^^^^^^^^^ File "/home/buildbot/worker/as-builder-9/lldb-remote-linux-ubuntu/llvm-project/lldb/packages/Python/lldbsuite/test/decorators.py", line 149, in wrapper return func(*args, **kwargs) ^^^^^^^^^^^^^^^^^^^^^ File "/home/buildbot/worker/as-builder-9/lldb-remote-linux-ubuntu/llvm-project/lldb/test/API/functionalities/thread/create_after_attach/TestCreateAfterAttach.py", line 36, in test_create_after_attach self.runCmd("process attach -p " + str(pid)) File "/home/buildbot/worker/as-builder-9/lldb-remote-linux-ubuntu/llvm-project/lldb/packages/Python/lldbsuite/test/lldbtest.py", line 1005, in runCmd self.assertTrue(self.res.Succeeded(), msg + output) AssertionError: False is not true : Command 'process attach -p 1474309' did not return successfully Error output: error: attach failed: lost connection ``` on the buildbots for lldb-remote-linux-ubuntu, lldb-arm-ubuntu, lldb-aarch64-ubuntu, lldb-arm-ubuntu. --- lldb/source/Target/Target.cpp | 1 - .../progress_reporting/TestProgressReporting.py | 31 ---------------------- 2 files changed, 32 deletions(-) diff --git a/lldb/source/Target/Target.cpp b/lldb/source/Target/Target.cpp index 8f8d2ef21cc5..45a9e1196a04 100644 --- a/lldb/source/Target/Target.cpp +++ b/lldb/source/Target/Target.cpp @@ -3546,7 +3546,6 @@ llvm::Expected Target::GetTraceOrCreate() { } Status Target::Attach(ProcessAttachInfo &attach_info, Stream *stream) { - Progress attach_progress("Waiting to attach to process"); m_stats.SetLaunchOrAttachTime(); auto state = eStateInvalid; auto process_sp = GetProcessSP(); diff --git a/lldb/test/API/functionalities/progress_reporting/TestProgressReporting.py b/lldb/test/API/functionalities/progress_reporting/TestProgressReporting.py index 8198c50a5ff0..9af53845ca1b 100644 --- a/lldb/test/API/functionalities/progress_reporting/TestProgressReporting.py +++ b/lldb/test/API/functionalities/progress_reporting/TestProgressReporting.py @@ -2,7 +2,6 @@ Test that we are able to broadcast and receive progress events from lldb """ import lldb -import threading import lldbsuite.test.lldbutil as lldbutil @@ -17,36 +16,6 @@ class TestProgressReporting(TestBase): self.broadcaster, lldb.SBDebugger.eBroadcastBitProgress ) - def test_wait_attach_progress_reporting(self): - """Test that progress reports for wait attaching work as intended.""" - self.build() - target = self.dbg.CreateTarget(None) - - # Wait attach to a process, then check to see that a progress report was created - # and that its message is correct for waiting to attach to a process. - class AttachThread(threading.Thread): - def __init__(self, target): - threading.Thread.__init__(self) - self.target = target - - def run(self): - self.target.AttachToProcessWithName( - lldb.SBListener(), "a.out", True, lldb.SBError() - ) - - thread = AttachThread(target) - thread.start() - - event = lldbutil.fetch_next_event(self, self.listener, self.broadcaster) - progress_data = lldb.SBDebugger.GetProgressDataFromEvent(event) - message = progress_data.GetValueForKey("message").GetStringValue(100) - self.assertEqual(message, "Waiting to attach to process") - - # Interrupt the process attach to keep the test from stalling. - target.process.SendAsyncInterrupt() - - thread.join() - def test_dwarf_symbol_loading_progress_report(self): """Test that we are able to fetch dwarf symbol loading progress events""" self.build() -- cgit v1.2.3 From 780c0ef7fb97027aa21c2ee6b02282693f908a20 Mon Sep 17 00:00:00 2001 From: Andrew Rogers Date: Wed, 18 Jun 2025 15:41:13 -0700 Subject: [llvm] explicitly link llvm-jitlink-executor with ExecutionEngine (#144778) ## Overview Explicitly link `llvm-jitlink-executor` with `ExecutionEngine` to avoid link failures when building LLVM as a Windows DLL. This link dependency should probably have always been declared here, but didn't matter when building against an LLVM static library because it was (presumably) picked up as a transitive dependency. This change is required to enable the Windows DLL build because `llvm-jitlink-executor` is declared using `add_llvm_utility` which invokes `add_llvm_executable` with [`DISABLE_LLVM_LINK_LLVM_DYLIB`](https://github.com/llvm/llvm-project/blob/main/llvm/cmake/modules/AddLLVM.cmake#L500-L502) so it links statically against its dependencies instead of against the main LLVM library. ## Background The effort to support building LLVM as a Windows DLL is tracked in #109483. Additional context is provided in [this discourse](https://discourse.llvm.org/t/psa-annotating-llvm-public-interface/85307). --- llvm/tools/llvm-jitlink/llvm-jitlink-executor/CMakeLists.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/llvm/tools/llvm-jitlink/llvm-jitlink-executor/CMakeLists.txt b/llvm/tools/llvm-jitlink/llvm-jitlink-executor/CMakeLists.txt index f2daa294eec0..792ecf544f61 100644 --- a/llvm/tools/llvm-jitlink/llvm-jitlink-executor/CMakeLists.txt +++ b/llvm/tools/llvm-jitlink/llvm-jitlink-executor/CMakeLists.txt @@ -1,4 +1,5 @@ set(LLVM_LINK_COMPONENTS + ExecutionEngine OrcShared OrcTargetProcess Support -- cgit v1.2.3 From bb1f5c3189c4d8d30e3b1273e0b774a7ccdbd86a Mon Sep 17 00:00:00 2001 From: Guy David <49722543+guy-david@users.noreply.github.com> Date: Thu, 19 Jun 2025 01:53:36 +0300 Subject: [AArch64] Lower jump table cases threshold to 10 (#143632) Previous stabs at this setting (https://github.com/llvm/llvm-project/pull/71166) hypertuned it for SPEC2017, but Clang's own compilation can benefit from a slightly lower threshold, yielding a 0.3% improvement in compile time, while still not regressing SPEC. Most notable beneficiaries of this change are: - `llvm::Instruction::getNumSuccessors` (11 cases) - `llvm::Instruction::getSuccessor` (11 cases) Test Suite with a bootstrapped build: ``` Tests: 4316 Metric: compile_time Program compile_time lhs rhs diff SingleSour...ce/UnitTests/SignlessTypes/div 0.02 0.02 3.0% SingleSour.../UnitTests/SignlessTypes/cast2 0.02 0.02 2.8% SingleSource/Benchmarks/Misc/flops-4 0.02 0.02 1.9% SingleSour...ebra/solvers/cholesky/cholesky 0.05 0.05 1.8% SingleSour...tTests/2020-01-06-coverage-006 0.02 0.02 1.7% SingleSour...ce/Benchmarks/Stanford/FloatMM 0.03 0.03 1.7% SingleSour...9-04-16-BitfieldInitialization 0.02 0.02 1.7% SingleSour...nitTests/2003-07-08-BitOpsTest 0.02 0.02 1.7% MultiSourc...marks/Prolangs-C++/vcirc/vcirc 0.02 0.02 1.6% MultiSourc...Prolangs-C/fixoutput/fixoutput 0.05 0.05 1.5% SingleSour...h/stencils/jacobi-1d/jacobi-1d 0.04 0.04 1.4% MultiSourc...rks/Prolangs-C++/office/office 0.28 0.28 1.4% SingleSour...arks/Adobe-C++/functionobjects 0.39 0.40 1.3% SingleSour...Tests/2003-10-29-ScalarReplBug 0.02 0.02 1.2% SingleSour...arks/Adobe-C++/stepanov_vector 0.41 0.42 1.2% Geomean difference -0.3% compile_time l/r lhs rhs diff count 4316.000000 4316.000000 469.000000 mean 0.057747 0.057595 -0.003034 std 0.544528 0.543139 0.007625 min 0.000000 0.000000 -0.035294 25% 0.000000 0.000000 -0.007006 50% 0.000000 0.000000 -0.003257 75% 0.000000 0.000000 0.000000 max 18.295300 18.252500 0.030151 ``` --- llvm/lib/Target/AArch64/AArch64Subtarget.cpp | 2 +- llvm/test/CodeGen/AArch64/min-jump-table.ll | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/llvm/lib/Target/AArch64/AArch64Subtarget.cpp b/llvm/lib/Target/AArch64/AArch64Subtarget.cpp index a28e6bad0dca..68ed10570a52 100644 --- a/llvm/lib/Target/AArch64/AArch64Subtarget.cpp +++ b/llvm/lib/Target/AArch64/AArch64Subtarget.cpp @@ -73,7 +73,7 @@ static cl::opt cl::values(AUTH_CHECK_METHOD_CL_VALUES_LR)); static cl::opt AArch64MinimumJumpTableEntries( - "aarch64-min-jump-table-entries", cl::init(13), cl::Hidden, + "aarch64-min-jump-table-entries", cl::init(10), cl::Hidden, cl::desc("Set minimum number of entries to use a jump table on AArch64")); static cl::opt AArch64StreamingHazardSize( diff --git a/llvm/test/CodeGen/AArch64/min-jump-table.ll b/llvm/test/CodeGen/AArch64/min-jump-table.ll index 98b89210f5a0..7cdff6e435f7 100644 --- a/llvm/test/CodeGen/AArch64/min-jump-table.ll +++ b/llvm/test/CodeGen/AArch64/min-jump-table.ll @@ -105,7 +105,7 @@ entry: ; CHECK4-NEXT: Jump Tables: ; CHECK8-NEXT: Jump Tables: ; CHECK12-NEXT: Jump Tables: -; CHECK-DEFAULT-NOT: {{^}}Jump Tables: +; CHECK-DEFAULT: {{^}}Jump Tables: bb1: tail call void @ext(i32 1, i32 0) br label %return bb2: tail call void @ext(i32 2, i32 2) br label %return -- cgit v1.2.3 From 5f69d680e2cc94dcb30a7f29e8144725530a6da4 Mon Sep 17 00:00:00 2001 From: Steven Perron Date: Wed, 18 Jun 2025 19:30:43 -0400 Subject: Revert "[HLSL][SPIRV] Add vk::constant_id attribute." (#144812) Reverts llvm/llvm-project#143544 --- clang/include/clang/Basic/Attr.td | 8 - clang/include/clang/Basic/AttrDocs.td | 15 -- clang/include/clang/Basic/Builtins.td | 13 -- clang/include/clang/Basic/DiagnosticSemaKinds.td | 4 - clang/include/clang/Sema/SemaHLSL.h | 5 +- clang/lib/CodeGen/CGHLSLBuiltins.cpp | 74 -------- clang/lib/CodeGen/CodeGenFunction.h | 6 - clang/lib/Sema/SemaDecl.cpp | 13 -- clang/lib/Sema/SemaDeclAttr.cpp | 3 - clang/lib/Sema/SemaHLSL.cpp | 120 +----------- clang/test/AST/HLSL/vk.spec-constant.usage.hlsl | 130 ------------- .../inline-spirv/SpirvType.alignment.hlsl | 16 ++ clang/test/CodeGenHLSL/inline-spirv/SpirvType.hlsl | 68 +++++++ .../vk-features/SpirvType.alignment.hlsl | 16 -- clang/test/CodeGenHLSL/vk-features/SpirvType.hlsl | 68 ------- .../CodeGenHLSL/vk-features/vk.spec-constant.hlsl | 210 --------------------- clang/test/SemaHLSL/vk.spec-constant.error.hlsl | 37 ---- 17 files changed, 86 insertions(+), 720 deletions(-) delete mode 100644 clang/test/AST/HLSL/vk.spec-constant.usage.hlsl create mode 100644 clang/test/CodeGenHLSL/inline-spirv/SpirvType.alignment.hlsl create mode 100644 clang/test/CodeGenHLSL/inline-spirv/SpirvType.hlsl delete mode 100644 clang/test/CodeGenHLSL/vk-features/SpirvType.alignment.hlsl delete mode 100644 clang/test/CodeGenHLSL/vk-features/SpirvType.hlsl delete mode 100644 clang/test/CodeGenHLSL/vk-features/vk.spec-constant.hlsl delete mode 100644 clang/test/SemaHLSL/vk.spec-constant.error.hlsl diff --git a/clang/include/clang/Basic/Attr.td b/clang/include/clang/Basic/Attr.td index 27fea7dea0a5..f113cd2ba2fb 100644 --- a/clang/include/clang/Basic/Attr.td +++ b/clang/include/clang/Basic/Attr.td @@ -5023,14 +5023,6 @@ def HLSLVkExtBuiltinInput : InheritableAttr { let Documentation = [HLSLVkExtBuiltinInputDocs]; } -def HLSLVkConstantId : InheritableAttr { - let Spellings = [CXX11<"vk", "constant_id">]; - let Args = [IntArgument<"Id">]; - let Subjects = SubjectList<[ExternalGlobalVar]>; - let LangOpts = [HLSL]; - let Documentation = [VkConstantIdDocs]; -} - def RandomizeLayout : InheritableAttr { let Spellings = [GCC<"randomize_layout">]; let Subjects = SubjectList<[Record]>; diff --git a/clang/include/clang/Basic/AttrDocs.td b/clang/include/clang/Basic/AttrDocs.td index 43442f177ab7..6051e1fc4511 100644 --- a/clang/include/clang/Basic/AttrDocs.td +++ b/clang/include/clang/Basic/AttrDocs.td @@ -8252,21 +8252,6 @@ and https://microsoft.github.io/hlsl-specs/proposals/0013-wave-size-range.html }]; } -def VkConstantIdDocs : Documentation { - let Category = DocCatFunction; - let Content = [{ -The ``vk::constant_id`` attribute specifies the id for a SPIR-V specialization -constant. The attribute applies to const global scalar variables. The variable must be initialized with a C++11 constexpr. -In SPIR-V, the -variable will be replaced with an `OpSpecConstant` with the given id. -The syntax is: - -.. code-block:: text - - ``[[vk::constant_id()]] const T Name = `` -}]; -} - def RootSignatureDocs : Documentation { let Category = DocCatFunction; let Content = [{ diff --git a/clang/include/clang/Basic/Builtins.td b/clang/include/clang/Basic/Builtins.td index d65b3a5d2f44..68cd3d790e78 100644 --- a/clang/include/clang/Basic/Builtins.td +++ b/clang/include/clang/Basic/Builtins.td @@ -5065,19 +5065,6 @@ def HLSLGroupMemoryBarrierWithGroupSync: LangBuiltin<"HLSL_LANG"> { let Prototype = "void()"; } -class HLSLScalarTemplate - : Template<["bool", "char", "short", "int", "long long int", - "unsigned short", "unsigned int", "unsigned long long int", - "__fp16", "float", "double"], - ["_bool", "_char", "_short", "_int", "_longlong", "_ushort", - "_uint", "_ulonglong", "_half", "_float", "_double"]>; - -def HLSLGetSpirvSpecConstant : LangBuiltin<"HLSL_LANG">, HLSLScalarTemplate { - let Spellings = ["__builtin_get_spirv_spec_constant"]; - let Attributes = [NoThrow, Const, Pure]; - let Prototype = "T(unsigned int, T)"; -} - // Builtins for XRay. def XRayCustomEvent : Builtin { let Spellings = ["__xray_customevent"]; diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td index 34b798a09c21..979ff60b73b7 100644 --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -12927,10 +12927,6 @@ def err_spirv_enum_not_int : Error< def err_spirv_enum_not_valid : Error< "invalid value for %select{storage class}0 argument">; -def err_specialization_const - : Error<"variable with 'vk::constant_id' attribute must be a const " - "int/float/enum/bool and be initialized with a literal">; - // errors of expect.with.probability def err_probability_not_constant_float : Error< "probability argument to __builtin_expect_with_probability must be constant " diff --git a/clang/include/clang/Sema/SemaHLSL.h b/clang/include/clang/Sema/SemaHLSL.h index 97091792ba23..33c4b8d1568b 100644 --- a/clang/include/clang/Sema/SemaHLSL.h +++ b/clang/include/clang/Sema/SemaHLSL.h @@ -98,8 +98,6 @@ public: HLSLWaveSizeAttr *mergeWaveSizeAttr(Decl *D, const AttributeCommonInfo &AL, int Min, int Max, int Preferred, int SpelledArgsCount); - HLSLVkConstantIdAttr * - mergeVkConstantIdAttr(Decl *D, const AttributeCommonInfo &AL, int Id); HLSLShaderAttr *mergeShaderAttr(Decl *D, const AttributeCommonInfo &AL, llvm::Triple::EnvironmentType ShaderType); HLSLParamModifierAttr * @@ -137,7 +135,6 @@ public: void handleRootSignatureAttr(Decl *D, const ParsedAttr &AL); void handleNumThreadsAttr(Decl *D, const ParsedAttr &AL); void handleWaveSizeAttr(Decl *D, const ParsedAttr &AL); - void handleVkConstantIdAttr(Decl *D, const ParsedAttr &AL); void handleSV_DispatchThreadIDAttr(Decl *D, const ParsedAttr &AL); void handleSV_GroupThreadIDAttr(Decl *D, const ParsedAttr &AL); void handleSV_GroupIDAttr(Decl *D, const ParsedAttr &AL); @@ -174,7 +171,7 @@ public: QualType getInoutParameterType(QualType Ty); bool transformInitList(const InitializedEntity &Entity, InitListExpr *Init); - bool handleInitialization(VarDecl *VDecl, Expr *&Init); + void deduceAddressSpace(VarDecl *Decl); private: diff --git a/clang/lib/CodeGen/CGHLSLBuiltins.cpp b/clang/lib/CodeGen/CGHLSLBuiltins.cpp index cbc5ef9cb0d5..ccf45c0c6ff1 100644 --- a/clang/lib/CodeGen/CGHLSLBuiltins.cpp +++ b/clang/lib/CodeGen/CGHLSLBuiltins.cpp @@ -12,7 +12,6 @@ #include "CGBuiltin.h" #include "CGHLSLRuntime.h" -#include "CodeGenFunction.h" using namespace clang; using namespace CodeGen; @@ -215,43 +214,6 @@ static Intrinsic::ID getWaveActiveMaxIntrinsic(llvm::Triple::ArchType Arch, } } -// Returns the mangled name for a builtin function that the SPIR-V backend -// will expand into a spec Constant. -static std::string getSpecConstantFunctionName(clang::QualType SpecConstantType, - ASTContext &Context) { - // The parameter types for our conceptual intrinsic function. - QualType ClangParamTypes[] = {Context.IntTy, SpecConstantType}; - - // Create a temporary FunctionDecl for the builtin fuction. It won't be - // added to the AST. - FunctionProtoType::ExtProtoInfo EPI; - QualType FnType = - Context.getFunctionType(SpecConstantType, ClangParamTypes, EPI); - DeclarationName FuncName = &Context.Idents.get("__spirv_SpecConstant"); - FunctionDecl *FnDeclForMangling = FunctionDecl::Create( - Context, Context.getTranslationUnitDecl(), SourceLocation(), - SourceLocation(), FuncName, FnType, /*TSI=*/nullptr, SC_Extern); - - // Attach the created parameter declarations to the function declaration. - SmallVector ParamDecls; - for (QualType ParamType : ClangParamTypes) { - ParmVarDecl *PD = ParmVarDecl::Create( - Context, FnDeclForMangling, SourceLocation(), SourceLocation(), - /*IdentifierInfo*/ nullptr, ParamType, /*TSI*/ nullptr, SC_None, - /*DefaultArg*/ nullptr); - ParamDecls.push_back(PD); - } - FnDeclForMangling->setParams(ParamDecls); - - // Get the mangled name. - std::string Name; - llvm::raw_string_ostream MangledNameStream(Name); - MangleContext *Mangler = Context.createMangleContext(); - Mangler->mangleName(FnDeclForMangling, MangledNameStream); - MangledNameStream.flush(); - return Name; -} - Value *CodeGenFunction::EmitHLSLBuiltinExpr(unsigned BuiltinID, const CallExpr *E, ReturnValueSlot ReturnValue) { @@ -811,42 +773,6 @@ Value *CodeGenFunction::EmitHLSLBuiltinExpr(unsigned BuiltinID, return EmitRuntimeCall( Intrinsic::getOrInsertDeclaration(&CGM.getModule(), ID)); } - case Builtin::BI__builtin_get_spirv_spec_constant_bool: - case Builtin::BI__builtin_get_spirv_spec_constant_short: - case Builtin::BI__builtin_get_spirv_spec_constant_ushort: - case Builtin::BI__builtin_get_spirv_spec_constant_int: - case Builtin::BI__builtin_get_spirv_spec_constant_uint: - case Builtin::BI__builtin_get_spirv_spec_constant_longlong: - case Builtin::BI__builtin_get_spirv_spec_constant_ulonglong: - case Builtin::BI__builtin_get_spirv_spec_constant_half: - case Builtin::BI__builtin_get_spirv_spec_constant_float: - case Builtin::BI__builtin_get_spirv_spec_constant_double: { - llvm::Function *SpecConstantFn = getSpecConstantFunction(E->getType()); - llvm::Value *SpecId = EmitScalarExpr(E->getArg(0)); - llvm::Value *DefaultVal = EmitScalarExpr(E->getArg(1)); - llvm::Value *Args[] = {SpecId, DefaultVal}; - return Builder.CreateCall(SpecConstantFn, Args); - } } return nullptr; } - -llvm::Function *clang::CodeGen::CodeGenFunction::getSpecConstantFunction( - const clang::QualType &SpecConstantType) { - - // Find or create the declaration for the function. - llvm::Module *M = &CGM.getModule(); - std::string MangledName = - getSpecConstantFunctionName(SpecConstantType, getContext()); - llvm::Function *SpecConstantFn = M->getFunction(MangledName); - - if (!SpecConstantFn) { - llvm::Type *IntType = ConvertType(getContext().IntTy); - llvm::Type *RetTy = ConvertType(SpecConstantType); - llvm::Type *ArgTypes[] = {IntType, RetTy}; - llvm::FunctionType *FnTy = llvm::FunctionType::get(RetTy, ArgTypes, false); - SpecConstantFn = llvm::Function::Create( - FnTy, llvm::GlobalValue::ExternalLinkage, MangledName, M); - } - return SpecConstantFn; -} diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h index 59f14b3e35fd..a5ab9df01dba 100644 --- a/clang/lib/CodeGen/CodeGenFunction.h +++ b/clang/lib/CodeGen/CodeGenFunction.h @@ -4850,12 +4850,6 @@ public: llvm::Value *EmitAMDGPUBuiltinExpr(unsigned BuiltinID, const CallExpr *E); llvm::Value *EmitHLSLBuiltinExpr(unsigned BuiltinID, const CallExpr *E, ReturnValueSlot ReturnValue); - - // Returns a builtin function that the SPIR-V backend will expand into a spec - // constant. - llvm::Function * - getSpecConstantFunction(const clang::QualType &SpecConstantType); - llvm::Value *EmitDirectXBuiltinExpr(unsigned BuiltinID, const CallExpr *E); llvm::Value *EmitSPIRVBuiltinExpr(unsigned BuiltinID, const CallExpr *E); llvm::Value *EmitScalarOrConstFoldImmArg(unsigned ICEArguments, unsigned Idx, diff --git a/clang/lib/Sema/SemaDecl.cpp b/clang/lib/Sema/SemaDecl.cpp index e1cccf068b5a..1bf72e5bb7b9 100644 --- a/clang/lib/Sema/SemaDecl.cpp +++ b/clang/lib/Sema/SemaDecl.cpp @@ -2890,8 +2890,6 @@ static bool mergeDeclAttribute(Sema &S, NamedDecl *D, NewAttr = S.HLSL().mergeWaveSizeAttr(D, *WS, WS->getMin(), WS->getMax(), WS->getPreferred(), WS->getSpelledArgsCount()); - else if (const auto *CI = dyn_cast(Attr)) - NewAttr = S.HLSL().mergeVkConstantIdAttr(D, *CI, CI->getId()); else if (const auto *SA = dyn_cast(Attr)) NewAttr = S.HLSL().mergeShaderAttr(D, *SA, SA->getType()); else if (isa(Attr)) @@ -13759,10 +13757,6 @@ void Sema::AddInitializerToDecl(Decl *RealDecl, Expr *Init, bool DirectInit) { return; } - if (getLangOpts().HLSL) - if (!HLSL().handleInitialization(VDecl, Init)) - return; - // Get the decls type and save a reference for later, since // CheckInitializerTypes may change it. QualType DclT = VDecl->getType(), SavT = DclT; @@ -14185,13 +14179,6 @@ void Sema::ActOnUninitializedDecl(Decl *RealDecl) { } } - // HLSL variable with the `vk::constant_id` attribute must be initialized. - if (!Var->isInvalidDecl() && Var->hasAttr()) { - Diag(Var->getLocation(), diag::err_specialization_const); - Var->setInvalidDecl(); - return; - } - if (!Var->isInvalidDecl() && RealDecl->hasAttr()) { if (Var->getStorageClass() == SC_Extern) { Diag(Var->getLocation(), diag::err_loader_uninitialized_extern_decl) diff --git a/clang/lib/Sema/SemaDeclAttr.cpp b/clang/lib/Sema/SemaDeclAttr.cpp index eba29e609cb0..1c2fa80e782d 100644 --- a/clang/lib/Sema/SemaDeclAttr.cpp +++ b/clang/lib/Sema/SemaDeclAttr.cpp @@ -7590,9 +7590,6 @@ ProcessDeclAttribute(Sema &S, Scope *scope, Decl *D, const ParsedAttr &AL, case ParsedAttr::AT_HLSLVkExtBuiltinInput: S.HLSL().handleVkExtBuiltinInputAttr(D, AL); break; - case ParsedAttr::AT_HLSLVkConstantId: - S.HLSL().handleVkConstantIdAttr(D, AL); - break; case ParsedAttr::AT_HLSLSV_GroupThreadID: S.HLSL().handleSV_GroupThreadIDAttr(D, AL); break; diff --git a/clang/lib/Sema/SemaHLSL.cpp b/clang/lib/Sema/SemaHLSL.cpp index 9b43ee00810b..b55f4fd786b5 100644 --- a/clang/lib/Sema/SemaHLSL.cpp +++ b/clang/lib/Sema/SemaHLSL.cpp @@ -119,40 +119,6 @@ static ResourceClass getResourceClass(RegisterType RT) { llvm_unreachable("unexpected RegisterType value"); } -static Builtin::ID getSpecConstBuiltinId(QualType Type) { - const auto *BT = dyn_cast(Type); - if (!BT) { - if (!Type->isEnumeralType()) - return Builtin::NotBuiltin; - return Builtin::BI__builtin_get_spirv_spec_constant_int; - } - - switch (BT->getKind()) { - case BuiltinType::Bool: - return Builtin::BI__builtin_get_spirv_spec_constant_bool; - case BuiltinType::Short: - return Builtin::BI__builtin_get_spirv_spec_constant_short; - case BuiltinType::Int: - return Builtin::BI__builtin_get_spirv_spec_constant_int; - case BuiltinType::LongLong: - return Builtin::BI__builtin_get_spirv_spec_constant_longlong; - case BuiltinType::UShort: - return Builtin::BI__builtin_get_spirv_spec_constant_ushort; - case BuiltinType::UInt: - return Builtin::BI__builtin_get_spirv_spec_constant_uint; - case BuiltinType::ULongLong: - return Builtin::BI__builtin_get_spirv_spec_constant_ulonglong; - case BuiltinType::Half: - return Builtin::BI__builtin_get_spirv_spec_constant_half; - case BuiltinType::Float: - return Builtin::BI__builtin_get_spirv_spec_constant_float; - case BuiltinType::Double: - return Builtin::BI__builtin_get_spirv_spec_constant_double; - default: - return Builtin::NotBuiltin; - } -} - DeclBindingInfo *ResourceBindings::addDeclBindingInfo(const VarDecl *VD, ResourceClass ResClass) { assert(getDeclBindingInfo(VD, ResClass) == nullptr && @@ -641,41 +607,6 @@ HLSLWaveSizeAttr *SemaHLSL::mergeWaveSizeAttr(Decl *D, return Result; } -HLSLVkConstantIdAttr * -SemaHLSL::mergeVkConstantIdAttr(Decl *D, const AttributeCommonInfo &AL, - int Id) { - - auto &TargetInfo = getASTContext().getTargetInfo(); - if (TargetInfo.getTriple().getArch() != llvm::Triple::spirv) { - Diag(AL.getLoc(), diag::warn_attribute_ignored) << AL; - return nullptr; - } - - auto *VD = cast(D); - - if (getSpecConstBuiltinId(VD->getType()) == Builtin::NotBuiltin) { - Diag(VD->getLocation(), diag::err_specialization_const); - return nullptr; - } - - if (!VD->getType().isConstQualified()) { - Diag(VD->getLocation(), diag::err_specialization_const); - return nullptr; - } - - if (HLSLVkConstantIdAttr *CI = D->getAttr()) { - if (CI->getId() != Id) { - Diag(CI->getLocation(), diag::err_hlsl_attribute_param_mismatch) << AL; - Diag(AL.getLoc(), diag::note_conflicting_attribute); - } - return nullptr; - } - - HLSLVkConstantIdAttr *Result = - ::new (getASTContext()) HLSLVkConstantIdAttr(getASTContext(), AL, Id); - return Result; -} - HLSLShaderAttr * SemaHLSL::mergeShaderAttr(Decl *D, const AttributeCommonInfo &AL, llvm::Triple::EnvironmentType ShaderType) { @@ -1226,15 +1157,6 @@ void SemaHLSL::handleVkExtBuiltinInputAttr(Decl *D, const ParsedAttr &AL) { HLSLVkExtBuiltinInputAttr(getASTContext(), AL, ID)); } -void SemaHLSL::handleVkConstantIdAttr(Decl *D, const ParsedAttr &AL) { - uint32_t Id; - if (!SemaRef.checkUInt32Argument(AL, AL.getArgAsExpr(0), Id)) - return; - HLSLVkConstantIdAttr *NewAttr = mergeVkConstantIdAttr(D, AL, Id); - if (NewAttr) - D->addAttr(NewAttr); -} - bool SemaHLSL::diagnoseInputIDType(QualType T, const ParsedAttr &AL) { const auto *VT = T->getAs(); @@ -3284,7 +3206,6 @@ static bool IsDefaultBufferConstantDecl(VarDecl *VD) { return VD->getDeclContext()->isTranslationUnit() && QT.getAddressSpace() == LangAS::Default && VD->getStorageClass() != SC_Static && - !VD->hasAttr() && !isInvalidConstantBufferLeafElementType(QT.getTypePtr()); } @@ -3352,8 +3273,7 @@ void SemaHLSL::ActOnVariableDeclarator(VarDecl *VD) { const Type *VarType = VD->getType().getTypePtr(); while (VarType->isArrayType()) VarType = VarType->getArrayElementTypeNoTypeQual(); - if (VarType->isHLSLResourceRecord() || - VD->hasAttr()) { + if (VarType->isHLSLResourceRecord()) { // Make the variable for resources static. The global externally visible // storage is accessed through the handle, which is a member. The variable // itself is not externally visible. @@ -3776,41 +3696,3 @@ bool SemaHLSL::transformInitList(const InitializedEntity &Entity, Init->updateInit(Ctx, I, NewInit->getInit(I)); return true; } - -bool SemaHLSL::handleInitialization(VarDecl *VDecl, Expr *&Init) { - const HLSLVkConstantIdAttr *ConstIdAttr = - VDecl->getAttr(); - if (!ConstIdAttr) - return true; - - ASTContext &Context = SemaRef.getASTContext(); - - APValue InitValue; - if (!Init->isCXX11ConstantExpr(Context, &InitValue)) { - Diag(VDecl->getLocation(), diag::err_specialization_const); - VDecl->setInvalidDecl(); - return false; - } - - Builtin::ID BID = getSpecConstBuiltinId(VDecl->getType()); - - // Argument 1: The ID from the attribute - int ConstantID = ConstIdAttr->getId(); - llvm::APInt IDVal(Context.getIntWidth(Context.IntTy), ConstantID); - Expr *IdExpr = IntegerLiteral::Create(Context, IDVal, Context.IntTy, - ConstIdAttr->getLocation()); - - SmallVector Args = {IdExpr, Init}; - Expr *C = SemaRef.BuildBuiltinCallExpr(Init->getExprLoc(), BID, Args); - if (C->getType()->getCanonicalTypeUnqualified() != - VDecl->getType()->getCanonicalTypeUnqualified()) { - C = SemaRef - .BuildCStyleCastExpr(SourceLocation(), - Context.getTrivialTypeSourceInfo( - Init->getType(), Init->getExprLoc()), - SourceLocation(), C) - .get(); - } - Init = C; - return true; -} diff --git a/clang/test/AST/HLSL/vk.spec-constant.usage.hlsl b/clang/test/AST/HLSL/vk.spec-constant.usage.hlsl deleted file mode 100644 index c0955c1ea7b4..000000000000 --- a/clang/test/AST/HLSL/vk.spec-constant.usage.hlsl +++ /dev/null @@ -1,130 +0,0 @@ -// RUN: %clang_cc1 -finclude-default-header -triple spirv-unknown-vulkan-compute -x hlsl -ast-dump -o - %s | FileCheck %s - -// CHECK: VarDecl {{.*}} bool_const 'const hlsl_private bool' static cinit -// CHECK-NEXT: CallExpr {{.*}} 'bool' -// CHECK-NEXT: ImplicitCastExpr {{.*}} 'bool (*)(unsigned int, bool) noexcept' -// CHECK-NEXT: DeclRefExpr {{.*}} 'bool (unsigned int, bool) noexcept' lvalue Function {{.*}} '__builtin_get_spirv_spec_constant_bool' 'bool (unsigned int, bool) noexcept' -// CHECK-NEXT: ImplicitCastExpr {{.*}} 'unsigned int' -// CHECK-NEXT: IntegerLiteral {{.*}} 'int' 1 -// CHECK-NEXT: CXXBoolLiteralExpr {{.*}} 'bool' true -[[vk::constant_id(1)]] -const bool bool_const = true; - -// CHECK: VarDecl {{.*}} short_const 'const hlsl_private short' static cinit -// CHECK-NEXT: CallExpr {{.*}} 'short' -// CHECK-NEXT: ImplicitCastExpr {{.*}} 'short (*)(unsigned int, short) noexcept' -// CHECK-NEXT: DeclRefExpr {{.*}} 'short (unsigned int, short) noexcept' lvalue Function {{.*}} '__builtin_get_spirv_spec_constant_short' 'short (unsigned int, short) noexcept' -// CHECK-NEXT: ImplicitCastExpr {{.*}} 'unsigned int' -// CHECK-NEXT: IntegerLiteral {{.*}} 'int' 2 -// CHECK-NEXT: ImplicitCastExpr {{.*}} 'short' -// CHECK-NEXT: IntegerLiteral {{.*}} 'int' 4 -[[vk::constant_id(2)]] -const short short_const = 4; - -// CHECK: VarDecl {{.*}} int_const 'const hlsl_private int' static cinit -// CHECK-NEXT: CallExpr {{.*}} 'int' -// CHECK-NEXT: ImplicitCastExpr {{.*}} 'int (*)(unsigned int, int) noexcept' -// CHECK-NEXT: DeclRefExpr {{.*}} 'int (unsigned int, int) noexcept' lvalue Function {{.*}} '__builtin_get_spirv_spec_constant_int' 'int (unsigned int, int) noexcept' -// CHECK-NEXT: ImplicitCastExpr {{.*}} 'unsigned int' -// CHECK-NEXT: IntegerLiteral {{.*}} 'int' 3 -// CHECK-NEXT: IntegerLiteral {{.*}} 'int' 5 -[[vk::constant_id(3)]] -const int int_const = 5; - -// CHECK: VarDecl {{.*}} long_const 'const hlsl_private long long' static cinit -// CHECK-NEXT: CallExpr {{.*}} 'long long' -// CHECK-NEXT: ImplicitCastExpr {{.*}} 'long long (*)(unsigned int, long long) noexcept' -// CHECK-NEXT: DeclRefExpr {{.*}} 'long long (unsigned int, long long) noexcept' lvalue Function {{.*}} '__builtin_get_spirv_spec_constant_longlong' 'long long (unsigned int, long long) noexcept' -// CHECK-NEXT: ImplicitCastExpr {{.*}} 'unsigned int' -// CHECK-NEXT: IntegerLiteral {{.*}} 'int' 4 -// CHECK-NEXT: ImplicitCastExpr {{.*}} 'long long' -// CHECK-NEXT: IntegerLiteral {{.*}} 'int' 8 -[[vk::constant_id(4)]] -const long long long_const = 8; - -// CHECK: VarDecl {{.*}} ushort_const 'const hlsl_private unsigned short' static cinit -// CHECK-NEXT: CallExpr {{.*}} 'unsigned short' -// CHECK-NEXT: ImplicitCastExpr {{.*}} 'unsigned short (*)(unsigned int, unsigned short) noexcept' -// CHECK-NEXT: DeclRefExpr {{.*}} 'unsigned short (unsigned int, unsigned short) noexcept' lvalue Function {{.*}} '__builtin_get_spirv_spec_constant_ushort' 'unsigned short (unsigned int, unsigned short) noexcept' -// CHECK-NEXT: ImplicitCastExpr {{.*}} 'unsigned int' -// CHECK-NEXT: IntegerLiteral {{.*}} 'int' 5 -// CHECK-NEXT: ImplicitCastExpr {{.*}} 'unsigned short' -// CHECK-NEXT: IntegerLiteral {{.*}} 'int' 10 -[[vk::constant_id(5)]] -const unsigned short ushort_const = 10; - -// CHECK: VarDecl {{.*}} uint_const 'const hlsl_private unsigned int' static cinit -// CHECK-NEXT: CallExpr {{.*}} 'unsigned int' -// CHECK-NEXT: ImplicitCastExpr {{.*}} 'unsigned int (*)(unsigned int, unsigned int) noexcept' -// CHECK-NEXT: DeclRefExpr {{.*}} 'unsigned int (unsigned int, unsigned int) noexcept' lvalue Function {{.*}} '__builtin_get_spirv_spec_constant_uint' 'unsigned int (unsigned int, unsigned int) noexcept' -// CHECK-NEXT: ImplicitCastExpr {{.*}} 'unsigned int' -// CHECK-NEXT: IntegerLiteral {{.*}} 'int' 6 -// CHECK-NEXT: ImplicitCastExpr {{.*}} 'unsigned int' -// CHECK-NEXT: IntegerLiteral {{.*}} 'int' 12 -[[vk::constant_id(6)]] -const unsigned int uint_const = 12; - - -// CHECK: VarDecl {{.*}} ulong_const 'const hlsl_private unsigned long long' static cinit -// CHECK-NEXT: CallExpr {{.*}} 'unsigned long long' -// CHECK-NEXT: ImplicitCastExpr {{.*}} 'unsigned long long (*)(unsigned int, unsigned long long) noexcept' -// CHECK-NEXT: DeclRefExpr {{.*}} 'unsigned long long (unsigned int, unsigned long long) noexcept' lvalue Function {{.*}} '__builtin_get_spirv_spec_constant_ulonglong' 'unsigned long long (unsigned int, unsigned long long) noexcept' -// CHECK-NEXT: ImplicitCastExpr {{.*}} 'unsigned int' -// CHECK-NEXT: IntegerLiteral {{.*}} 'int' 7 -// CHECK-NEXT: ImplicitCastExpr {{.*}} 'unsigned long long' -// CHECK-NEXT: IntegerLiteral {{.*}} 'int' 25 -[[vk::constant_id(7)]] -const unsigned long long ulong_const = 25; - -// CHECK: VarDecl {{.*}} half_const 'const hlsl_private half' static cinit -// CHECK-NEXT: CallExpr {{.*}} 'half' -// CHECK-NEXT: ImplicitCastExpr {{.*}} 'half (*)(unsigned int, half) noexcept' -// CHECK-NEXT: DeclRefExpr {{.*}} 'half (unsigned int, half) noexcept' lvalue Function {{.*}} '__builtin_get_spirv_spec_constant_half' 'half (unsigned int, half) noexcept' -// CHECK-NEXT: ImplicitCastExpr {{.*}} 'unsigned int' -// CHECK-NEXT: IntegerLiteral {{.*}} 'int' 8 -// CHECK-NEXT: ImplicitCastExpr {{.*}} 'half' -// CHECK-NEXT: FloatingLiteral {{.*}} 'float' 4.040000e+01 -[[vk::constant_id(8)]] -const half half_const = 40.4; - -// CHECK: VarDecl {{.*}} float_const 'const hlsl_private float' static cinit -// CHECK-NEXT: CallExpr {{.*}} 'float' -// CHECK-NEXT: ImplicitCastExpr {{.*}} 'float (*)(unsigned int, float) noexcept' -// CHECK-NEXT: DeclRefExpr {{.*}} 'float (unsigned int, float) noexcept' lvalue Function {{.*}} '__builtin_get_spirv_spec_constant_float' 'float (unsigned int, float) noexcept' -// CHECK-NEXT: ImplicitCastExpr {{.*}} 'unsigned int' -// CHECK-NEXT: IntegerLiteral {{.*}} 'int' 8 -// CHECK-NEXT: ImplicitCastExpr {{.*}} 'float' -// CHECK-NEXT: IntegerLiteral {{.*}} 'int' 50 -[[vk::constant_id(8)]] -const float float_const = 50; - -// CHECK: VarDecl {{.*}} double_const 'const hlsl_private double' static cinit -// CHECK-NEXT: CallExpr {{.*}} 'double' -// CHECK-NEXT: ImplicitCastExpr {{.*}} 'double (*)(unsigned int, double) noexcept' -// CHECK-NEXT: DeclRefExpr {{.*}} 'double (unsigned int, double) noexcept' lvalue Function {{.*}} '__builtin_get_spirv_spec_constant_double' 'double (unsigned int, double) noexcept' -// CHECK-NEXT: ImplicitCastExpr {{.*}} 'unsigned int' -// CHECK-NEXT: IntegerLiteral {{.*}} 'int' 9 -// CHECK-NEXT: ImplicitCastExpr {{.*}} 'double' -// CHECK-NEXT: IntegerLiteral {{.*}} 'int' 100 -[[vk::constant_id(9)]] -const double double_const = 100; - -// CHECK: VarDecl {{.*}} enum_const 'const hlsl_private E' static cinit -// CHECK-NEXT: CStyleCastExpr {{.*}} 'E' -// CHECK-NEXT: CallExpr {{.*}} 'int' -// CHECK-NEXT: ImplicitCastExpr {{.*}} 'int (*)(unsigned int, int) noexcept' -// CHECK-NEXT: DeclRefExpr {{.*}} 'int (unsigned int, int) noexcept' lvalue Function {{.*}} '__builtin_get_spirv_spec_constant_int' 'int (unsigned int, int) noexcept' -// CHECK-NEXT: ImplicitCastExpr {{.*}} 'unsigned int' -// CHECK-NEXT: IntegerLiteral {{.*}} 'int' 10 -// CHECK-NEXT: ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: DeclRefExpr {{.*}} 'E' EnumConstant {{.*}} 'e2' 'E' -enum E { - e0 = 10, - e1 = 20, - e2 = 30 -}; - -[[vk::constant_id(10)]] -const E enum_const = e2; - -// CHECK-NOT: CXXRecordDecl {{.*}} implicit struct __cblayout_$Globals definition diff --git a/clang/test/CodeGenHLSL/inline-spirv/SpirvType.alignment.hlsl b/clang/test/CodeGenHLSL/inline-spirv/SpirvType.alignment.hlsl new file mode 100644 index 000000000000..41cdd7d21bcb --- /dev/null +++ b/clang/test/CodeGenHLSL/inline-spirv/SpirvType.alignment.hlsl @@ -0,0 +1,16 @@ +// RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \ +// RUN: spirv-unknown-vulkan-compute %s -emit-llvm -disable-llvm-passes \ +// RUN: -o - | FileCheck %s + +using Int = vk::SpirvType>, vk::Literal>>; + +// CHECK: %struct.S = type <{ i32, target("spirv.Type", target("spirv.Literal", 8), target("spirv.Literal", 0), 21, 4, 64), [4 x i8] }> +struct S { + int a; + Int b; +}; + +[numthreads(1,1,1)] +void main() { + S value; +} diff --git a/clang/test/CodeGenHLSL/inline-spirv/SpirvType.hlsl b/clang/test/CodeGenHLSL/inline-spirv/SpirvType.hlsl new file mode 100644 index 000000000000..7149be0122f4 --- /dev/null +++ b/clang/test/CodeGenHLSL/inline-spirv/SpirvType.hlsl @@ -0,0 +1,68 @@ +// RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \ +// RUN: spirv-unknown-vulkan-compute %s -emit-llvm -disable-llvm-passes \ +// RUN: -o - | FileCheck %s + +template +using Array = vk::SpirvOpaqueType>; + +template +using ArrayBuffer = Array, Size>; + +typedef vk::SpirvType>, vk::Literal>> Int; + +typedef Array ArrayInt; + +// CHECK: %struct.S = type { target("spirv.Type", target("spirv.Image", float, 5, 2, 0, 0, 2, 0), target("spirv.IntegralConstant", i64, 4), 28, 0, 0), target("spirv.Type", target("spirv.Literal", 32), target("spirv.Literal", 0), 21, 4, 32) } +struct S { + ArrayBuffer<4> b; + Int i; +}; + +// CHECK: define hidden spir_func target("spirv.Type", target("spirv.Image", float, 5, 2, 0, 0, 2, 0), target("spirv.IntegralConstant", i64, 4), 28, 0, 0) @_Z14getArrayBufferu17spirv_type_28_0_0U5_TypeN4hlsl8RWBufferIfEEU6_ConstLm4E(target("spirv.Type", target("spirv.Image", float, 5, 2, 0, 0, 2, 0), target("spirv.IntegralConstant", i64, 4), 28, 0, 0) %v) #0 +ArrayBuffer<4> getArrayBuffer(ArrayBuffer<4> v) { + return v; +} + +// CHECK: define hidden spir_func target("spirv.Type", target("spirv.Literal", 32), target("spirv.Literal", 0), 21, 4, 32) @_Z6getIntu18spirv_type_21_4_32U4_LitLi32EU4_LitLi0E(target("spirv.Type", target("spirv.Literal", 32), target("spirv.Literal", 0), 21, 4, 32) %v) #0 +Int getInt(Int v) { + return v; +} + +// TODO: uncomment and test once CBuffer handles are implemented for SPIR-V +// ArrayBuffer<4> g_buffers; +// Int g_word; + +[numthreads(1, 1, 1)] +void main() { + // CHECK: [[buffers:%.*]] = alloca target("spirv.Type", target("spirv.Image", float, 5, 2, 0, 0, 2, 0), target("spirv.IntegralConstant", i64, 4), 28, 0, 0), align 4 + ArrayBuffer<4> buffers; + + // CHECK: [[longBuffers:%.*]] = alloca target("spirv.Type", target("spirv.Image", float, 5, 2, 0, 0, 2, 0), target("spirv.IntegralConstant", i64, 591751049, 1), 28, 0, 0), align 4 + ArrayBuffer<0x123456789> longBuffers; + + // CHECK: [[word:%.*]] = alloca target("spirv.Type", target("spirv.Literal", 32), target("spirv.Literal", 0), 21, 4, 32), align 4 + Int word; + + // CHECK: [[words:%.*]] = alloca [4 x target("spirv.Type", target("spirv.Literal", 32), target("spirv.Literal", 0), 21, 4, 32)], align 4 + Int words[4]; + + // CHECK: [[words2:%.*]] = alloca target("spirv.Type", target("spirv.Type", target("spirv.Literal", 32), target("spirv.Literal", 0), 21, 4, 32), target("spirv.IntegralConstant", i64, 5), 28, 0, 0), align 4 + ArrayInt words2; + + // CHECK: [[value:%.*]] = alloca %struct.S, align 1 + S value; + + // CHECK: [[buffers2:%.*]] = alloca target("spirv.Type", target("spirv.Image", float, 5, 2, 0, 0, 2, 0), target("spirv.IntegralConstant", i64, 4), 28, 0, 0), align 4 + // CHECK: [[word2:%.*]] = alloca target("spirv.Type", target("spirv.Literal", 32), target("spirv.Literal", 0), 21, 4, 32), align 4 + + + // CHECK: [[loaded:%[0-9]+]] = load target("spirv.Type", target("spirv.Image", float, 5, 2, 0, 0, 2, 0), target("spirv.IntegralConstant", i64, 4), 28, 0, 0), ptr [[buffers]], align 4 + // CHECK: %call1 = call spir_func target("spirv.Type", target("spirv.Image", float, 5, 2, 0, 0, 2, 0), target("spirv.IntegralConstant", i64, 4), 28, 0, 0) @_Z14getArrayBufferu17spirv_type_28_0_0U5_TypeN4hlsl8RWBufferIfEEU6_ConstLm4E(target("spirv.Type", target("spirv.Image", float, 5, 2, 0, 0, 2, 0), target("spirv.IntegralConstant", i64, 4), 28, 0, 0) [[loaded]]) + // CHECK: store target("spirv.Type", target("spirv.Image", float, 5, 2, 0, 0, 2, 0), target("spirv.IntegralConstant", i64, 4), 28, 0, 0) %call1, ptr [[buffers2]], align 4 + ArrayBuffer<4> buffers2 = getArrayBuffer(buffers); + + // CHECK: [[loaded:%[0-9]+]] = load target("spirv.Type", target("spirv.Literal", 32), target("spirv.Literal", 0), 21, 4, 32), ptr [[word]], align 4 + // CHECK: %call2 = call spir_func target("spirv.Type", target("spirv.Literal", 32), target("spirv.Literal", 0), 21, 4, 32) @_Z6getIntu18spirv_type_21_4_32U4_LitLi32EU4_LitLi0E(target("spirv.Type", target("spirv.Literal", 32), target("spirv.Literal", 0), 21, 4, 32) [[loaded]]) + // CHECK: store target("spirv.Type", target("spirv.Literal", 32), target("spirv.Literal", 0), 21, 4, 32) %call2, ptr [[word2]], align 4 + Int word2 = getInt(word); +} diff --git a/clang/test/CodeGenHLSL/vk-features/SpirvType.alignment.hlsl b/clang/test/CodeGenHLSL/vk-features/SpirvType.alignment.hlsl deleted file mode 100644 index 41cdd7d21bcb..000000000000 --- a/clang/test/CodeGenHLSL/vk-features/SpirvType.alignment.hlsl +++ /dev/null @@ -1,16 +0,0 @@ -// RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \ -// RUN: spirv-unknown-vulkan-compute %s -emit-llvm -disable-llvm-passes \ -// RUN: -o - | FileCheck %s - -using Int = vk::SpirvType>, vk::Literal>>; - -// CHECK: %struct.S = type <{ i32, target("spirv.Type", target("spirv.Literal", 8), target("spirv.Literal", 0), 21, 4, 64), [4 x i8] }> -struct S { - int a; - Int b; -}; - -[numthreads(1,1,1)] -void main() { - S value; -} diff --git a/clang/test/CodeGenHLSL/vk-features/SpirvType.hlsl b/clang/test/CodeGenHLSL/vk-features/SpirvType.hlsl deleted file mode 100644 index 7149be0122f4..000000000000 --- a/clang/test/CodeGenHLSL/vk-features/SpirvType.hlsl +++ /dev/null @@ -1,68 +0,0 @@ -// RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \ -// RUN: spirv-unknown-vulkan-compute %s -emit-llvm -disable-llvm-passes \ -// RUN: -o - | FileCheck %s - -template -using Array = vk::SpirvOpaqueType>; - -template -using ArrayBuffer = Array, Size>; - -typedef vk::SpirvType>, vk::Literal>> Int; - -typedef Array ArrayInt; - -// CHECK: %struct.S = type { target("spirv.Type", target("spirv.Image", float, 5, 2, 0, 0, 2, 0), target("spirv.IntegralConstant", i64, 4), 28, 0, 0), target("spirv.Type", target("spirv.Literal", 32), target("spirv.Literal", 0), 21, 4, 32) } -struct S { - ArrayBuffer<4> b; - Int i; -}; - -// CHECK: define hidden spir_func target("spirv.Type", target("spirv.Image", float, 5, 2, 0, 0, 2, 0), target("spirv.IntegralConstant", i64, 4), 28, 0, 0) @_Z14getArrayBufferu17spirv_type_28_0_0U5_TypeN4hlsl8RWBufferIfEEU6_ConstLm4E(target("spirv.Type", target("spirv.Image", float, 5, 2, 0, 0, 2, 0), target("spirv.IntegralConstant", i64, 4), 28, 0, 0) %v) #0 -ArrayBuffer<4> getArrayBuffer(ArrayBuffer<4> v) { - return v; -} - -// CHECK: define hidden spir_func target("spirv.Type", target("spirv.Literal", 32), target("spirv.Literal", 0), 21, 4, 32) @_Z6getIntu18spirv_type_21_4_32U4_LitLi32EU4_LitLi0E(target("spirv.Type", target("spirv.Literal", 32), target("spirv.Literal", 0), 21, 4, 32) %v) #0 -Int getInt(Int v) { - return v; -} - -// TODO: uncomment and test once CBuffer handles are implemented for SPIR-V -// ArrayBuffer<4> g_buffers; -// Int g_word; - -[numthreads(1, 1, 1)] -void main() { - // CHECK: [[buffers:%.*]] = alloca target("spirv.Type", target("spirv.Image", float, 5, 2, 0, 0, 2, 0), target("spirv.IntegralConstant", i64, 4), 28, 0, 0), align 4 - ArrayBuffer<4> buffers; - - // CHECK: [[longBuffers:%.*]] = alloca target("spirv.Type", target("spirv.Image", float, 5, 2, 0, 0, 2, 0), target("spirv.IntegralConstant", i64, 591751049, 1), 28, 0, 0), align 4 - ArrayBuffer<0x123456789> longBuffers; - - // CHECK: [[word:%.*]] = alloca target("spirv.Type", target("spirv.Literal", 32), target("spirv.Literal", 0), 21, 4, 32), align 4 - Int word; - - // CHECK: [[words:%.*]] = alloca [4 x target("spirv.Type", target("spirv.Literal", 32), target("spirv.Literal", 0), 21, 4, 32)], align 4 - Int words[4]; - - // CHECK: [[words2:%.*]] = alloca target("spirv.Type", target("spirv.Type", target("spirv.Literal", 32), target("spirv.Literal", 0), 21, 4, 32), target("spirv.IntegralConstant", i64, 5), 28, 0, 0), align 4 - ArrayInt words2; - - // CHECK: [[value:%.*]] = alloca %struct.S, align 1 - S value; - - // CHECK: [[buffers2:%.*]] = alloca target("spirv.Type", target("spirv.Image", float, 5, 2, 0, 0, 2, 0), target("spirv.IntegralConstant", i64, 4), 28, 0, 0), align 4 - // CHECK: [[word2:%.*]] = alloca target("spirv.Type", target("spirv.Literal", 32), target("spirv.Literal", 0), 21, 4, 32), align 4 - - - // CHECK: [[loaded:%[0-9]+]] = load target("spirv.Type", target("spirv.Image", float, 5, 2, 0, 0, 2, 0), target("spirv.IntegralConstant", i64, 4), 28, 0, 0), ptr [[buffers]], align 4 - // CHECK: %call1 = call spir_func target("spirv.Type", target("spirv.Image", float, 5, 2, 0, 0, 2, 0), target("spirv.IntegralConstant", i64, 4), 28, 0, 0) @_Z14getArrayBufferu17spirv_type_28_0_0U5_TypeN4hlsl8RWBufferIfEEU6_ConstLm4E(target("spirv.Type", target("spirv.Image", float, 5, 2, 0, 0, 2, 0), target("spirv.IntegralConstant", i64, 4), 28, 0, 0) [[loaded]]) - // CHECK: store target("spirv.Type", target("spirv.Image", float, 5, 2, 0, 0, 2, 0), target("spirv.IntegralConstant", i64, 4), 28, 0, 0) %call1, ptr [[buffers2]], align 4 - ArrayBuffer<4> buffers2 = getArrayBuffer(buffers); - - // CHECK: [[loaded:%[0-9]+]] = load target("spirv.Type", target("spirv.Literal", 32), target("spirv.Literal", 0), 21, 4, 32), ptr [[word]], align 4 - // CHECK: %call2 = call spir_func target("spirv.Type", target("spirv.Literal", 32), target("spirv.Literal", 0), 21, 4, 32) @_Z6getIntu18spirv_type_21_4_32U4_LitLi32EU4_LitLi0E(target("spirv.Type", target("spirv.Literal", 32), target("spirv.Literal", 0), 21, 4, 32) [[loaded]]) - // CHECK: store target("spirv.Type", target("spirv.Literal", 32), target("spirv.Literal", 0), 21, 4, 32) %call2, ptr [[word2]], align 4 - Int word2 = getInt(word); -} diff --git a/clang/test/CodeGenHLSL/vk-features/vk.spec-constant.hlsl b/clang/test/CodeGenHLSL/vk-features/vk.spec-constant.hlsl deleted file mode 100644 index cbc1fa61eae2..000000000000 --- a/clang/test/CodeGenHLSL/vk-features/vk.spec-constant.hlsl +++ /dev/null @@ -1,210 +0,0 @@ -// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --check-globals all --include-generated-funcs --version 5 -// RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \ -// RUN: spirv-unknown-vulkan-compute %s -emit-llvm -disable-llvm-passes \ -// RUN: -o - | FileCheck %s - -[[vk::constant_id(1)]] -const bool bool_const = true; - -[[vk::constant_id(1)]] -const short short_const = 4; - -[[vk::constant_id(3)]] -const int int_const = 5; - -[[vk::constant_id(4)]] -const long long long_const = 8; - -[[vk::constant_id(5)]] -const unsigned short ushort_const = 10; - -[[vk::constant_id(6)]] -const unsigned int uint_const = 12; - -[[vk::constant_id(7)]] -const unsigned long long ulong_const = 25; - -[[vk::constant_id(8)]] -const half half_const = 40.4; - -[[vk::constant_id(8)]] -const float float_const = 50.5; - -[[vk::constant_id(9)]] -const double double_const = 100.2; - -enum E { - e0 = 10, - e1 = 20, - e2 = 30 -}; - -[[vk::constant_id(10)]] -const E enum_const = e2; - -[numthreads(1,1,1)] -void main() { - bool b = bool_const; - short s = short_const; - int i = int_const; - long long l = long_const; - unsigned short us = ushort_const; - unsigned int ui = uint_const; - unsigned long long ul = ulong_const; - half h = half_const; - float f = float_const; - double d = double_const; - E e = enum_const; -} -//. -// CHECK: @_ZL10bool_const = internal addrspace(10) global i32 0, align 4 -// CHECK: @_ZL11short_const = internal addrspace(10) global i16 0, align 2 -// CHECK: @_ZL9int_const = internal addrspace(10) global i32 0, align 4 -// CHECK: @_ZL10long_const = internal addrspace(10) global i64 0, align 8 -// CHECK: @_ZL12ushort_const = internal addrspace(10) global i16 0, align 2 -// CHECK: @_ZL10uint_const = internal addrspace(10) global i32 0, align 4 -// CHECK: @_ZL11ulong_const = internal addrspace(10) global i64 0, align 8 -// CHECK: @_ZL10half_const = internal addrspace(10) global float 0.000000e+00, align 4 -// CHECK: @_ZL11float_const = internal addrspace(10) global float 0.000000e+00, align 4 -// CHECK: @_ZL12double_const = internal addrspace(10) global double 0.000000e+00, align 8 -// CHECK: @_ZL10enum_const = internal addrspace(10) global i32 0, align 4 -//. -// CHECK-LABEL: define internal spir_func void @_Z4mainv( -// CHECK-SAME: ) #[[ATTR0:[0-9]+]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = call token @llvm.experimental.convergence.entry() -// CHECK-NEXT: [[B:%.*]] = alloca i32, align 4 -// CHECK-NEXT: [[S:%.*]] = alloca i16, align 2 -// CHECK-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-NEXT: [[L:%.*]] = alloca i64, align 8 -// CHECK-NEXT: [[US:%.*]] = alloca i16, align 2 -// CHECK-NEXT: [[UI:%.*]] = alloca i32, align 4 -// CHECK-NEXT: [[UL:%.*]] = alloca i64, align 8 -// CHECK-NEXT: [[H:%.*]] = alloca float, align 4 -// CHECK-NEXT: [[F:%.*]] = alloca float, align 4 -// CHECK-NEXT: [[D:%.*]] = alloca double, align 8 -// CHECK-NEXT: [[E:%.*]] = alloca i32, align 4 -// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(10) @_ZL10bool_const, align 4 -// CHECK-NEXT: [[LOADEDV:%.*]] = trunc i32 [[TMP1]] to i1 -// CHECK-NEXT: [[STOREDV:%.*]] = zext i1 [[LOADEDV]] to i32 -// CHECK-NEXT: store i32 [[STOREDV]], ptr [[B]], align 4 -// CHECK-NEXT: [[TMP2:%.*]] = load i16, ptr addrspace(10) @_ZL11short_const, align 2 -// CHECK-NEXT: store i16 [[TMP2]], ptr [[S]], align 2 -// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(10) @_ZL9int_const, align 4 -// CHECK-NEXT: store i32 [[TMP3]], ptr [[I]], align 4 -// CHECK-NEXT: [[TMP4:%.*]] = load i64, ptr addrspace(10) @_ZL10long_const, align 8 -// CHECK-NEXT: store i64 [[TMP4]], ptr [[L]], align 8 -// CHECK-NEXT: [[TMP5:%.*]] = load i16, ptr addrspace(10) @_ZL12ushort_const, align 2 -// CHECK-NEXT: store i16 [[TMP5]], ptr [[US]], align 2 -// CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr addrspace(10) @_ZL10uint_const, align 4 -// CHECK-NEXT: store i32 [[TMP6]], ptr [[UI]], align 4 -// CHECK-NEXT: [[TMP7:%.*]] = load i64, ptr addrspace(10) @_ZL11ulong_const, align 8 -// CHECK-NEXT: store i64 [[TMP7]], ptr [[UL]], align 8 -// CHECK-NEXT: [[TMP8:%.*]] = load float, ptr addrspace(10) @_ZL10half_const, align 4 -// CHECK-NEXT: store float [[TMP8]], ptr [[H]], align 4 -// CHECK-NEXT: [[TMP9:%.*]] = load float, ptr addrspace(10) @_ZL11float_const, align 4 -// CHECK-NEXT: store float [[TMP9]], ptr [[F]], align 4 -// CHECK-NEXT: [[TMP10:%.*]] = load double, ptr addrspace(10) @_ZL12double_const, align 8 -// CHECK-NEXT: store double [[TMP10]], ptr [[D]], align 8 -// CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr addrspace(10) @_ZL10enum_const, align 4 -// CHECK-NEXT: store i32 [[TMP11]], ptr [[E]], align 4 -// CHECK-NEXT: ret void -// -// CHECK-LABEL: define internal spir_func void @__cxx_global_var_init( -// CHECK-SAME: ) #[[ATTR3:[0-9]+]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = call token @llvm.experimental.convergence.entry() -// CHECK-NEXT: [[TMP1:%.*]] = call i1 @_Z20__spirv_SpecConstantib(i32 1, i1 true) -// CHECK-NEXT: [[STOREDV:%.*]] = zext i1 [[TMP1]] to i32 -// CHECK-NEXT: store i32 [[STOREDV]], ptr addrspace(10) @_ZL10bool_const, align 4 -// CHECK-NEXT: ret void -// -// -// CHECK-LABEL: define internal spir_func void @__cxx_global_var_init.1( -// CHECK-SAME: ) #[[ATTR3]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = call token @llvm.experimental.convergence.entry() -// CHECK-NEXT: [[TMP1:%.*]] = call i16 @_Z20__spirv_SpecConstantis(i32 1, i16 4) -// CHECK-NEXT: store i16 [[TMP1]], ptr addrspace(10) @_ZL11short_const, align 2 -// CHECK-NEXT: ret void -// -// -// CHECK-LABEL: define internal spir_func void @__cxx_global_var_init.2( -// CHECK-SAME: ) #[[ATTR3]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = call token @llvm.experimental.convergence.entry() -// CHECK-NEXT: [[TMP1:%.*]] = call i32 @_Z20__spirv_SpecConstantii(i32 3, i32 5) -// CHECK-NEXT: store i32 [[TMP1]], ptr addrspace(10) @_ZL9int_const, align 4 -// CHECK-NEXT: ret void -// -// -// CHECK-LABEL: define internal spir_func void @__cxx_global_var_init.3( -// CHECK-SAME: ) #[[ATTR3]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = call token @llvm.experimental.convergence.entry() -// CHECK-NEXT: [[TMP1:%.*]] = call i64 @_Z20__spirv_SpecConstantix(i32 4, i64 8) -// CHECK-NEXT: store i64 [[TMP1]], ptr addrspace(10) @_ZL10long_const, align 8 -// CHECK-NEXT: ret void -// -// -// CHECK-LABEL: define internal spir_func void @__cxx_global_var_init.4( -// CHECK-SAME: ) #[[ATTR3]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = call token @llvm.experimental.convergence.entry() -// CHECK-NEXT: [[TMP1:%.*]] = call i16 @_Z20__spirv_SpecConstantit(i32 5, i16 10) -// CHECK-NEXT: store i16 [[TMP1]], ptr addrspace(10) @_ZL12ushort_const, align 2 -// CHECK-NEXT: ret void -// -// -// CHECK-LABEL: define internal spir_func void @__cxx_global_var_init.5( -// CHECK-SAME: ) #[[ATTR3]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = call token @llvm.experimental.convergence.entry() -// CHECK-NEXT: [[TMP1:%.*]] = call i32 @_Z20__spirv_SpecConstantij(i32 6, i32 12) -// CHECK-NEXT: store i32 [[TMP1]], ptr addrspace(10) @_ZL10uint_const, align 4 -// CHECK-NEXT: ret void -// -// -// CHECK-LABEL: define internal spir_func void @__cxx_global_var_init.6( -// CHECK-SAME: ) #[[ATTR3]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = call token @llvm.experimental.convergence.entry() -// CHECK-NEXT: [[TMP1:%.*]] = call i64 @_Z20__spirv_SpecConstantiy(i32 7, i64 25) -// CHECK-NEXT: store i64 [[TMP1]], ptr addrspace(10) @_ZL11ulong_const, align 8 -// CHECK-NEXT: ret void -// -// -// CHECK-LABEL: define internal spir_func void @__cxx_global_var_init.7( -// CHECK-SAME: ) #[[ATTR3]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = call token @llvm.experimental.convergence.entry() -// CHECK-NEXT: [[TMP1:%.*]] = call reassoc nnan ninf nsz arcp afn float @_Z20__spirv_SpecConstantiDh(i32 8, float 0x4044333340000000) -// CHECK-NEXT: store float [[TMP1]], ptr addrspace(10) @_ZL10half_const, align 4 -// CHECK-NEXT: ret void -// -// -// CHECK-LABEL: define internal spir_func void @__cxx_global_var_init.8( -// CHECK-SAME: ) #[[ATTR3]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = call token @llvm.experimental.convergence.entry() -// CHECK-NEXT: [[TMP1:%.*]] = call reassoc nnan ninf nsz arcp afn float @_Z20__spirv_SpecConstantif(i32 8, float 5.050000e+01) -// CHECK-NEXT: store float [[TMP1]], ptr addrspace(10) @_ZL11float_const, align 4 -// CHECK-NEXT: ret void -// -// -// CHECK-LABEL: define internal spir_func void @__cxx_global_var_init.9( -// CHECK-SAME: ) #[[ATTR3]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = call token @llvm.experimental.convergence.entry() -// CHECK-NEXT: [[TMP1:%.*]] = call reassoc nnan ninf nsz arcp afn double @_Z20__spirv_SpecConstantid(i32 9, double 0x40590CCCC0000000) -// CHECK-NEXT: store double [[TMP1]], ptr addrspace(10) @_ZL12double_const, align 8 -// CHECK-NEXT: ret void -// -// -// CHECK-LABEL: define internal spir_func void @__cxx_global_var_init.10( -// CHECK-SAME: ) #[[ATTR3]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = call token @llvm.experimental.convergence.entry() -// CHECK-NEXT: [[TMP1:%.*]] = call i32 @_Z20__spirv_SpecConstantii(i32 10, i32 30) -// CHECK-NEXT: store i32 [[TMP1]], ptr addrspace(10) @_ZL10enum_const, align 4 -// CHECK-NEXT: ret void diff --git a/clang/test/SemaHLSL/vk.spec-constant.error.hlsl b/clang/test/SemaHLSL/vk.spec-constant.error.hlsl deleted file mode 100644 index 24873d272a54..000000000000 --- a/clang/test/SemaHLSL/vk.spec-constant.error.hlsl +++ /dev/null @@ -1,37 +0,0 @@ -// RUN: %clang_cc1 -finclude-default-header -triple spirv-pc-vulkan1.3-compute -verify %s -// RUN: %clang_cc1 -finclude-default-header -triple dxil-pc-shadermodel6.8-compute -verify %s - -#ifndef __spirv__ -// expected-warning@+2{{'constant_id' attribute ignored}} -#endif -[[vk::constant_id(0)]] -const bool sc0 = true; - -#ifdef __spirv__ -// expected-error@+2{{variable with 'vk::constant_id' attribute must be a const int/float/enum/bool and be initialized with a literal}} -[[vk::constant_id(1)]] -const bool sc1 = sc0; // error - -// expected-warning@+1{{'constant_id' attribute only applies to external global variables}} -[[vk::constant_id(2)]] -static const bool sc2 = false; // error - -// expected-error@+2{{variable with 'vk::constant_id' attribute must be a const int/float/enum/bool and be initialized with a literal}} -[[vk::constant_id(3)]] -const bool sc3; // error - -// expected-error@+2{{variable with 'vk::constant_id' attribute must be a const int/float/enum/bool and be initialized with a literal}} -[[vk::constant_id(4)]] -bool sc4 = false; // error - -// expected-error@+2{{variable with 'vk::constant_id' attribute must be a const int/float/enum/bool and be initialized with a literal}} -[[vk::constant_id(5)]] -const int2 sc5 = {0,0}; // error - -[numthreads(1,1,1)] -void main() { - // expected-warning@+1{{'constant_id' attribute only applies to external global variables}} - [[vk::constant_id(6)]] - const bool sc6 = false; // error -} -#endif -- cgit v1.2.3 From d265105b8f50718a684d792d3ca957231d668533 Mon Sep 17 00:00:00 2001 From: David Justo Date: Wed, 18 Jun 2025 17:13:25 -0700 Subject: Augment `uncaught-exception.test` fuzzer test to be msvc-compatible (#125924) Today, the `uncaught-exception.test` fuzzer test checks for the string "libFuzzer: deadly signal" in the program output as the result of an uncaught exception. Although this is correct for `clang`, `msvc` reports a different error message: "libFuzzer: uncaught C++ exception". Since `msvc` reuses the `libFuzzer` infrastructure for ASan regression testing, it would help us greatly if the test handled the `msvc` divergence more gracefully. **This PR:** augments this test so check for a different string (namely "libFuzzer: uncaught C++ exception") if the compiler target matches the `msvc` naming scheme. I understand if this is outside the scope of support for LLVM as well, and I'm also open for different approaches here. Thanks! --- compiler-rt/test/fuzzer/uncaught-exception.test | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/compiler-rt/test/fuzzer/uncaught-exception.test b/compiler-rt/test/fuzzer/uncaught-exception.test index b055c88f6d90..d1b98cfb7c74 100644 --- a/compiler-rt/test/fuzzer/uncaught-exception.test +++ b/compiler-rt/test/fuzzer/uncaught-exception.test @@ -4,7 +4,10 @@ REQUIRES: windows RUN: %cpp_compiler %S/UncaughtException.cpp -o %t-UncaughtException -RUN: not %run %t-UncaughtException 2>&1 | FileCheck %s +# Clang will fail the test with 'deadly signal', but other compilers may fail with different error messages. +# For example, msvc fails with 'uncaught C++ exception'. So the error we check depends on the compiler target. +RUN: not %run %t-UncaughtException 2>&1 | FileCheck %s --check-prefixes=CHECK-CRASH,%if target={{.*-windows-msvc.*}} %{CHECK-MSVC%} %else %{CHECK-ERROR%} -CHECK: ERROR: libFuzzer: deadly signal -CHECK: Test unit written to ./crash +CHECK-ERROR: ERROR: libFuzzer: deadly signal +CHECK-MSVC: ERROR: libFuzzer: uncaught C++ exception +CHECK-CRASH: Test unit written to ./crash -- cgit v1.2.3 From bc8908a4e93b0641e1c17f408885c8aebb308bbe Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Thu, 19 Jun 2025 09:36:10 +0900 Subject: ARM: Move declaration of supportSplitCSR to be public (#144679) This is an implementation of a public method from the base class, so it should also be public. Avoids unrelated diff in a future patch. --- llvm/lib/Target/ARM/ARMISelLowering.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/llvm/lib/Target/ARM/ARMISelLowering.h b/llvm/lib/Target/ARM/ARMISelLowering.h index 87710ee29a24..357ca9ea5d20 100644 --- a/llvm/lib/Target/ARM/ARMISelLowering.h +++ b/llvm/lib/Target/ARM/ARMISelLowering.h @@ -716,6 +716,11 @@ class VectorType; return true; } + bool supportSplitCSR(MachineFunction *MF) const override { + return MF->getFunction().getCallingConv() == CallingConv::CXX_FAST_TLS && + MF->getFunction().hasFnAttribute(Attribute::NoUnwind); + } + bool hasStandaloneRem(EVT VT) const override { return HasStandaloneRem; } @@ -914,11 +919,6 @@ class VectorType; SmallVectorImpl &InVals, bool isThisReturn, SDValue ThisVal, bool isCmseNSCall) const; - bool supportSplitCSR(MachineFunction *MF) const override { - return MF->getFunction().getCallingConv() == CallingConv::CXX_FAST_TLS && - MF->getFunction().hasFnAttribute(Attribute::NoUnwind); - } - void initializeSplitCSR(MachineBasicBlock *Entry) const override; void insertCopiesSplitCSR( MachineBasicBlock *Entry, -- cgit v1.2.3 From 874a02f05b6ebb4b5dbe0ab09beb9c3d5b36e237 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Thu, 19 Jun 2025 09:38:22 +0900 Subject: ARM: Move ABI helpers from Subtarget to TargetMachine (#144680) These are module level concepts, and attaching them to the function level subtarget is confusing. Similarly these other helpers that only operate on the triple should also be removed from the subtarget. --- llvm/lib/Target/ARM/ARMAsmPrinter.cpp | 6 +++++- llvm/lib/Target/ARM/ARMAsmPrinter.h | 2 ++ llvm/lib/Target/ARM/ARMFastISel.cpp | 15 ++++++++------- llvm/lib/Target/ARM/ARMISelLowering.cpp | 27 +++++++++++++++++---------- llvm/lib/Target/ARM/ARMISelLowering.h | 3 +++ llvm/lib/Target/ARM/ARMSubtarget.cpp | 20 ++------------------ llvm/lib/Target/ARM/ARMSubtarget.h | 6 ------ llvm/lib/Target/ARM/ARMTargetMachine.cpp | 16 ++++++++++++++++ llvm/lib/Target/ARM/ARMTargetMachine.h | 4 ++++ 9 files changed, 57 insertions(+), 42 deletions(-) diff --git a/llvm/lib/Target/ARM/ARMAsmPrinter.cpp b/llvm/lib/Target/ARM/ARMAsmPrinter.cpp index 1443747709b7..2fd784373f4a 100644 --- a/llvm/lib/Target/ARM/ARMAsmPrinter.cpp +++ b/llvm/lib/Target/ARM/ARMAsmPrinter.cpp @@ -54,6 +54,10 @@ ARMAsmPrinter::ARMAsmPrinter(TargetMachine &TM, : AsmPrinter(TM, std::move(Streamer), ID), Subtarget(nullptr), AFI(nullptr), MCP(nullptr), InConstantPool(false), OptimizationGoals(-1) {} +const ARMBaseTargetMachine &ARMAsmPrinter::getTM() const { + return static_cast(TM); +} + void ARMAsmPrinter::emitFunctionBodyEnd() { // Make sure to terminate any constant pools that were at the end // of the function. @@ -750,7 +754,7 @@ void ARMAsmPrinter::emitAttributes() { ATS.emitAttribute(ARMBuildAttrs::ABI_align_preserved, 1); // Hard float. Use both S and D registers and conform to AAPCS-VFP. - if (STI.isAAPCS_ABI() && TM.Options.FloatABIType == FloatABI::Hard) + if (getTM().isAAPCS_ABI() && TM.Options.FloatABIType == FloatABI::Hard) ATS.emitAttribute(ARMBuildAttrs::ABI_VFP_args, ARMBuildAttrs::HardFPAAPCS); // FIXME: To support emitting this build attribute as GCC does, the diff --git a/llvm/lib/Target/ARM/ARMAsmPrinter.h b/llvm/lib/Target/ARM/ARMAsmPrinter.h index 8a7ec4e2bcf2..2b067c753264 100644 --- a/llvm/lib/Target/ARM/ARMAsmPrinter.h +++ b/llvm/lib/Target/ARM/ARMAsmPrinter.h @@ -76,6 +76,8 @@ public: return "ARM Assembly Printer"; } + const ARMBaseTargetMachine &getTM() const; + void printOperand(const MachineInstr *MI, int OpNum, raw_ostream &O); void PrintSymbolOperand(const MachineOperand &MO, raw_ostream &O) override; diff --git a/llvm/lib/Target/ARM/ARMFastISel.cpp b/llvm/lib/Target/ARM/ARMFastISel.cpp index 765c65c5fcb2..06499a3945ee 100644 --- a/llvm/lib/Target/ARM/ARMFastISel.cpp +++ b/llvm/lib/Target/ARM/ARMFastISel.cpp @@ -20,6 +20,7 @@ #include "ARMISelLowering.h" #include "ARMMachineFunctionInfo.h" #include "ARMSubtarget.h" +#include "ARMTargetMachine.h" #include "MCTargetDesc/ARMAddressingModes.h" #include "MCTargetDesc/ARMBaseInfo.h" #include "Utils/ARMBaseInfo.h" @@ -134,9 +135,9 @@ class ARMFastISel final : public FastISel { /// make the right decision when generating code for different targets. const ARMSubtarget *Subtarget; Module &M; - const TargetMachine &TM; - const TargetInstrInfo &TII; - const TargetLowering &TLI; + const ARMBaseInstrInfo &TII; + const ARMTargetLowering &TLI; + const ARMBaseTargetMachine &TM; ARMFunctionInfo *AFI; // Convenience variables to avoid some queries. @@ -149,8 +150,8 @@ class ARMFastISel final : public FastISel { : FastISel(funcInfo, libInfo), Subtarget(&funcInfo.MF->getSubtarget()), M(const_cast(*funcInfo.Fn->getParent())), - TM(funcInfo.MF->getTarget()), TII(*Subtarget->getInstrInfo()), - TLI(*Subtarget->getTargetLowering()) { + TII(*Subtarget->getInstrInfo()), TLI(*Subtarget->getTargetLowering()), + TM(TLI.getTM()) { AFI = funcInfo.MF->getInfo(); isThumb2 = AFI->isThumbFunction(); Context = &funcInfo.Fn->getContext(); @@ -1893,7 +1894,7 @@ CCAssignFn *ARMFastISel::CCAssignFnForCall(CallingConv::ID CC, report_fatal_error("Unsupported calling convention"); case CallingConv::Fast: if (Subtarget->hasVFP2Base() && !isVarArg) { - if (!Subtarget->isAAPCS_ABI()) + if (!TM.isAAPCS_ABI()) return (Return ? RetFastCC_ARM_APCS : FastCC_ARM_APCS); // For AAPCS ABI targets, just use VFP variant of the calling convention. return (Return ? RetCC_ARM_AAPCS_VFP : CC_ARM_AAPCS_VFP); @@ -1902,7 +1903,7 @@ CCAssignFn *ARMFastISel::CCAssignFnForCall(CallingConv::ID CC, case CallingConv::C: case CallingConv::CXX_FAST_TLS: // Use target triple & subtarget features to do actual dispatch. - if (Subtarget->isAAPCS_ABI()) { + if (TM.isAAPCS_ABI()) { if (Subtarget->hasFPRegs() && TM.Options.FloatABIType == FloatABI::Hard && !isVarArg) return (Return ? RetCC_ARM_AAPCS_VFP: CC_ARM_AAPCS_VFP); diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp index 05d8a1190ada..6e653687dbcb 100644 --- a/llvm/lib/Target/ARM/ARMISelLowering.cpp +++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -499,9 +499,16 @@ void ARMTargetLowering::addMVEVectorTypes(bool HasMVEFP) { setOperationAction(ISD::TRUNCATE, MVT::v16i16, Custom); } -ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM, +const ARMBaseTargetMachine &ARMTargetLowering::getTM() const { + return static_cast(getTargetMachine()); +} + +ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM_, const ARMSubtarget &STI) - : TargetLowering(TM), Subtarget(&STI) { + : TargetLowering(TM_), Subtarget(&STI) { + + const auto &TM = static_cast(TM_); + RegInfo = Subtarget->getRegisterInfo(); Itins = Subtarget->getInstrItineraryData(); @@ -591,7 +598,7 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM, } // RTLIB - if (Subtarget->isAAPCS_ABI() && + if (TM.isAAPCS_ABI() && (Subtarget->isTargetAEABI() || Subtarget->isTargetGNUAEABI() || Subtarget->isTargetMuslAEABI() || Subtarget->isTargetAndroid())) { // clang-format off @@ -716,7 +723,7 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM, // non-watchos platforms, but are needed for some targets which use a // hard-float calling convention by default. if (!Subtarget->isTargetWatchABI()) { - if (Subtarget->isAAPCS_ABI()) { + if (TM.isAAPCS_ABI()) { setLibcallCallingConv(RTLIB::FPROUND_F32_F16, CallingConv::ARM_AAPCS); setLibcallCallingConv(RTLIB::FPROUND_F64_F16, CallingConv::ARM_AAPCS); setLibcallCallingConv(RTLIB::FPEXT_F16_F32, CallingConv::ARM_AAPCS); @@ -2070,7 +2077,7 @@ ARMTargetLowering::getEffectiveCallingConv(CallingConv::ID CC, return isVarArg ? CallingConv::ARM_AAPCS : CallingConv::ARM_AAPCS_VFP; case CallingConv::C: case CallingConv::Tail: - if (!Subtarget->isAAPCS_ABI()) + if (!getTM().isAAPCS_ABI()) return CallingConv::ARM_APCS; else if (Subtarget->hasFPRegs() && !Subtarget->isThumb1Only() && getTargetMachine().Options.FloatABIType == FloatABI::Hard && @@ -2080,12 +2087,12 @@ ARMTargetLowering::getEffectiveCallingConv(CallingConv::ID CC, return CallingConv::ARM_AAPCS; case CallingConv::Fast: case CallingConv::CXX_FAST_TLS: - if (!Subtarget->isAAPCS_ABI()) { + if (!getTM().isAAPCS_ABI()) { if (Subtarget->hasVFP2Base() && !Subtarget->isThumb1Only() && !isVarArg) return CallingConv::Fast; return CallingConv::ARM_APCS; - } else if (Subtarget->hasVFP2Base() && - !Subtarget->isThumb1Only() && !isVarArg) + } else if (Subtarget->hasVFP2Base() && !Subtarget->isThumb1Only() && + !isVarArg) return CallingConv::ARM_AAPCS_VFP; else return CallingConv::ARM_AAPCS; @@ -3273,7 +3280,7 @@ ARMTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, SDValue Arg = OutVals[realRVLocIdx]; bool ReturnF16 = false; - if (Subtarget->hasFullFP16() && Subtarget->isTargetHardFloat()) { + if (Subtarget->hasFullFP16() && getTM().isTargetHardFloat()) { // Half-precision return values can be returned like this: // // t11 f16 = fadd ... @@ -9937,7 +9944,7 @@ SDValue ARMTargetLowering::LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const { auto &DL = DAG.getDataLayout(); ArgListTy Args; - bool ShouldUseSRet = Subtarget->isAPCS_ABI(); + bool ShouldUseSRet = getTM().isAPCS_ABI(); SDValue SRet; if (ShouldUseSRet) { // Create stack object for sret. diff --git a/llvm/lib/Target/ARM/ARMISelLowering.h b/llvm/lib/Target/ARM/ARMISelLowering.h index 357ca9ea5d20..9c330e60a7d5 100644 --- a/llvm/lib/Target/ARM/ARMISelLowering.h +++ b/llvm/lib/Target/ARM/ARMISelLowering.h @@ -34,6 +34,7 @@ namespace llvm { +class ARMBaseTargetMachine; class ARMSubtarget; class DataLayout; class FastISel; @@ -414,6 +415,8 @@ class VectorType; explicit ARMTargetLowering(const TargetMachine &TM, const ARMSubtarget &STI); + const ARMBaseTargetMachine &getTM() const; + unsigned getJumpTableEncoding() const override; bool useSoftFloat() const override; diff --git a/llvm/lib/Target/ARM/ARMSubtarget.cpp b/llvm/lib/Target/ARM/ARMSubtarget.cpp index 91d385a0b595..abca4bb947bc 100644 --- a/llvm/lib/Target/ARM/ARMSubtarget.cpp +++ b/llvm/lib/Target/ARM/ARMSubtarget.cpp @@ -201,9 +201,9 @@ void ARMSubtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) { if (isTargetWindows()) NoARM = true; - if (isAAPCS_ABI()) + if (TM.isAAPCS_ABI()) stackAlignment = Align(8); - if (isTargetNaCl() || isAAPCS16_ABI()) + if (isTargetNaCl() || TM.isAAPCS16_ABI()) stackAlignment = Align(16); // FIXME: Completely disable sibcall for Thumb1 since ThumbRegisterInfo:: @@ -320,22 +320,6 @@ void ARMSubtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) { } } -bool ARMSubtarget::isTargetHardFloat() const { return TM.isTargetHardFloat(); } - -bool ARMSubtarget::isAPCS_ABI() const { - assert(TM.TargetABI != ARMBaseTargetMachine::ARM_ABI_UNKNOWN); - return TM.TargetABI == ARMBaseTargetMachine::ARM_ABI_APCS; -} -bool ARMSubtarget::isAAPCS_ABI() const { - assert(TM.TargetABI != ARMBaseTargetMachine::ARM_ABI_UNKNOWN); - return TM.TargetABI == ARMBaseTargetMachine::ARM_ABI_AAPCS || - TM.TargetABI == ARMBaseTargetMachine::ARM_ABI_AAPCS16; -} -bool ARMSubtarget::isAAPCS16_ABI() const { - assert(TM.TargetABI != ARMBaseTargetMachine::ARM_ABI_UNKNOWN); - return TM.TargetABI == ARMBaseTargetMachine::ARM_ABI_AAPCS16; -} - bool ARMSubtarget::isROPI() const { return TM.getRelocationModel() == Reloc::ROPI || TM.getRelocationModel() == Reloc::ROPI_RWPI; diff --git a/llvm/lib/Target/ARM/ARMSubtarget.h b/llvm/lib/Target/ARM/ARMSubtarget.h index 890a22f574a6..7893796e313b 100644 --- a/llvm/lib/Target/ARM/ARMSubtarget.h +++ b/llvm/lib/Target/ARM/ARMSubtarget.h @@ -360,8 +360,6 @@ public: return TargetTriple.isTargetEHABICompatible(); } - bool isTargetHardFloat() const; - bool isReadTPSoft() const { return !(isReadTPTPIDRURW() || isReadTPTPIDRURO() || isReadTPTPIDRPRW()); } @@ -370,10 +368,6 @@ public: bool isXRaySupported() const override; - bool isAPCS_ABI() const; - bool isAAPCS_ABI() const; - bool isAAPCS16_ABI() const; - bool isROPI() const; bool isRWPI() const; diff --git a/llvm/lib/Target/ARM/ARMTargetMachine.cpp b/llvm/lib/Target/ARM/ARMTargetMachine.cpp index fee77a44e5e8..0d947d924eb6 100644 --- a/llvm/lib/Target/ARM/ARMTargetMachine.cpp +++ b/llvm/lib/Target/ARM/ARMTargetMachine.cpp @@ -271,6 +271,22 @@ ARMBaseTargetMachine::ARMBaseTargetMachine(const Target &T, const Triple &TT, ARMBaseTargetMachine::~ARMBaseTargetMachine() = default; +bool ARMBaseTargetMachine::isAPCS_ABI() const { + assert(TargetABI != ARMBaseTargetMachine::ARM_ABI_UNKNOWN); + return TargetABI == ARMBaseTargetMachine::ARM_ABI_APCS; +} + +bool ARMBaseTargetMachine::isAAPCS_ABI() const { + assert(TargetABI != ARMBaseTargetMachine::ARM_ABI_UNKNOWN); + return TargetABI == ARMBaseTargetMachine::ARM_ABI_AAPCS || + TargetABI == ARMBaseTargetMachine::ARM_ABI_AAPCS16; +} + +bool ARMBaseTargetMachine::isAAPCS16_ABI() const { + assert(TargetABI != ARMBaseTargetMachine::ARM_ABI_UNKNOWN); + return TargetABI == ARMBaseTargetMachine::ARM_ABI_AAPCS16; +} + MachineFunctionInfo *ARMBaseTargetMachine::createMachineFunctionInfo( BumpPtrAllocator &Allocator, const Function &F, const TargetSubtargetInfo *STI) const { diff --git a/llvm/lib/Target/ARM/ARMTargetMachine.h b/llvm/lib/Target/ARM/ARMTargetMachine.h index 99fd817c81f8..513fe713c0bc 100644 --- a/llvm/lib/Target/ARM/ARMTargetMachine.h +++ b/llvm/lib/Target/ARM/ARMTargetMachine.h @@ -66,6 +66,10 @@ public: return TLOF.get(); } + bool isAPCS_ABI() const; + bool isAAPCS_ABI() const; + bool isAAPCS16_ABI() const; + bool isTargetHardFloat() const { return TargetTriple.getEnvironment() == Triple::GNUEABIHF || TargetTriple.getEnvironment() == Triple::GNUEABIHFT64 || -- cgit v1.2.3 From 6e5ee4aa98f1dc16e6a75a7fd298a59f1edd1c6e Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Wed, 18 Jun 2025 17:48:10 -0700 Subject: [RISCV] Save vector registers in interrupt handler. (#143808) Corresponding gcc bug report https://gcc.gnu.org/bugzilla/show_bug.cgi?id=110665 The generated code is pretty awful. --- llvm/lib/Target/RISCV/RISCVCallingConv.td | 30 +- llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp | 10 + llvm/test/CodeGen/RISCV/interrupt-attr.ll | 6820 ++++++++++++++++++++ .../CodeGen/RISCV/rvv/interrupt-attr-nocall.ll | 502 ++ 4 files changed, 7360 insertions(+), 2 deletions(-) create mode 100644 llvm/test/CodeGen/RISCV/rvv/interrupt-attr-nocall.ll diff --git a/llvm/lib/Target/RISCV/RISCVCallingConv.td b/llvm/lib/Target/RISCV/RISCVCallingConv.td index 98e05b7f8eca..cbf039edec27 100644 --- a/llvm/lib/Target/RISCV/RISCVCallingConv.td +++ b/llvm/lib/Target/RISCV/RISCVCallingConv.td @@ -56,14 +56,40 @@ def CSR_XLEN_F32_Interrupt: CalleeSavedRegs<(add CSR_Interrupt, def CSR_XLEN_F64_Interrupt: CalleeSavedRegs<(add CSR_Interrupt, (sequence "F%u_D", 0, 31))>; +// Same as CSR_Interrupt, but including all vector registers. +def CSR_XLEN_V_Interrupt: CalleeSavedRegs<(add CSR_Interrupt, + (sequence "V%u", 0, 31))>; + +// Same as CSR_Interrupt, but including all 32-bit FP registers and all vector +// registers. +def CSR_XLEN_F32_V_Interrupt: CalleeSavedRegs<(add CSR_XLEN_F32_Interrupt, + (sequence "V%u", 0, 31))>; + +// Same as CSR_Interrupt, but including all 64-bit FP registers and all vector +// registers. +def CSR_XLEN_F64_V_Interrupt: CalleeSavedRegs<(add CSR_XLEN_F64_Interrupt, + (sequence "V%u", 0, 31))>; + // Same as CSR_Interrupt, but excluding X16-X31. def CSR_Interrupt_RVE : CalleeSavedRegs<(sub CSR_Interrupt, (sequence "X%u", 16, 31))>; // Same as CSR_XLEN_F32_Interrupt, but excluding X16-X31. def CSR_XLEN_F32_Interrupt_RVE: CalleeSavedRegs<(sub CSR_XLEN_F32_Interrupt, - (sequence "X%u", 16, 31))>; + (sequence "X%u", 16, 31))>; // Same as CSR_XLEN_F64_Interrupt, but excluding X16-X31. def CSR_XLEN_F64_Interrupt_RVE: CalleeSavedRegs<(sub CSR_XLEN_F64_Interrupt, - (sequence "X%u", 16, 31))>; + (sequence "X%u", 16, 31))>; + +// Same as CSR_XLEN_V_Interrupt, but excluding X16-X31. +def CSR_XLEN_V_Interrupt_RVE: CalleeSavedRegs<(sub CSR_XLEN_V_Interrupt, + (sequence "X%u", 16, 31))>; + +// Same as CSR_XLEN_F32_V_Interrupt, but excluding X16-X31. +def CSR_XLEN_F32_V_Interrupt_RVE: CalleeSavedRegs<(sub CSR_XLEN_F32_V_Interrupt, + (sequence "X%u", 16, 31))>; + +// Same as CSR_XLEN_F64_V_Interrupt, but excluding X16-X31. +def CSR_XLEN_F64_V_Interrupt_RVE: CalleeSavedRegs<(sub CSR_XLEN_F64_V_Interrupt, + (sequence "X%u", 16, 31))>; diff --git a/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp b/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp index 112142e1ef2f..7fdbf4be1ed1 100644 --- a/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp @@ -69,6 +69,16 @@ RISCVRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { if (MF->getFunction().getCallingConv() == CallingConv::GHC) return CSR_NoRegs_SaveList; if (MF->getFunction().hasFnAttribute("interrupt")) { + if (Subtarget.hasVInstructions()) { + if (Subtarget.hasStdExtD()) + return Subtarget.hasStdExtE() ? CSR_XLEN_F64_V_Interrupt_RVE_SaveList + : CSR_XLEN_F64_V_Interrupt_SaveList; + if (Subtarget.hasStdExtF()) + return Subtarget.hasStdExtE() ? CSR_XLEN_F32_V_Interrupt_RVE_SaveList + : CSR_XLEN_F32_V_Interrupt_SaveList; + return Subtarget.hasStdExtE() ? CSR_XLEN_V_Interrupt_RVE_SaveList + : CSR_XLEN_V_Interrupt_SaveList; + } if (Subtarget.hasStdExtD()) return Subtarget.hasStdExtE() ? CSR_XLEN_F64_Interrupt_RVE_SaveList : CSR_XLEN_F64_Interrupt_SaveList; diff --git a/llvm/test/CodeGen/RISCV/interrupt-attr.ll b/llvm/test/CodeGen/RISCV/interrupt-attr.ll index ba20ba77e6b2..e278b8d0b53b 100644 --- a/llvm/test/CodeGen/RISCV/interrupt-attr.ll +++ b/llvm/test/CodeGen/RISCV/interrupt-attr.ll @@ -19,6 +19,13 @@ ; RUN: 2>&1 | FileCheck %s -check-prefixes=CHECK,CHECK-RV32E ; RUN: llc -mtriple riscv32-unknown-elf -mattr=+e,+f -o - %s \ ; RUN: 2>&1 | FileCheck %s -check-prefixes=CHECK,CHECK-RV32E-F + +; RUN: llc -mtriple riscv32-unknown-elf -mattr=+zve32x -o - %s \ +; RUN: 2>&1 | FileCheck %s -check-prefix CHECK -check-prefix CHECK-RV32-V +; RUN: llc -mtriple riscv32-unknown-elf -mattr=+zve32x,+f -o - %s \ +; RUN: 2>&1 | FileCheck %s -check-prefix CHECK -check-prefix CHECK-RV32-FV +; RUN: llc -mtriple riscv32-unknown-elf -mattr=+zve32x,+f,+d -o - %s \ +; RUN: 2>&1 | FileCheck %s -check-prefix CHECK -check-prefix CHECK-RV32-FDV ; ; RUN: llc -mtriple riscv64-unknown-elf -o - %s \ ; RUN: 2>&1 | FileCheck %s -check-prefix CHECK -check-prefix CHECK-RV64 @@ -42,6 +49,13 @@ ; RUN: 2>&1 | FileCheck %s -check-prefixes=CHECK,CHECK-RV64E-F ; RUN: llc -mtriple riscv64-unknown-elf -mattr=+e,+f,+d -o - %s \ ; RUN: 2>&1 | FileCheck %s -check-prefixes=CHECK,CHECK-RV64E-FD +; +; RUN: llc -mtriple riscv64-unknown-elf -mattr=+zve32x -o - %s \ +; RUN: 2>&1 | FileCheck %s -check-prefix CHECK -check-prefix CHECK-RV64-V +; RUN: llc -mtriple riscv64-unknown-elf -mattr=+zve32x,+f -o - %s \ +; RUN: 2>&1 | FileCheck %s -check-prefix CHECK -check-prefix CHECK-RV64-FV +; RUN: llc -mtriple riscv64-unknown-elf -mattr=+zve32x,+f,+d -o - %s \ +; RUN: 2>&1 | FileCheck %s -check-prefix CHECK -check-prefix CHECK-RV64-FDV ; ; Checking for special return instructions (sret, mret). @@ -757,6 +771,1697 @@ define void @foo_with_call() #1 { ; CHECK-RV32E-F-NEXT: addi sp, sp, 168 ; CHECK-RV32E-F-NEXT: mret ; +; CHECK-RV32-V-LABEL: foo_with_call: +; CHECK-RV32-V: # %bb.0: +; CHECK-RV32-V-NEXT: addi sp, sp, -80 +; CHECK-RV32-V-NEXT: sw ra, 76(sp) # 4-byte Folded Spill +; CHECK-RV32-V-NEXT: sw t0, 72(sp) # 4-byte Folded Spill +; CHECK-RV32-V-NEXT: sw t1, 68(sp) # 4-byte Folded Spill +; CHECK-RV32-V-NEXT: sw t2, 64(sp) # 4-byte Folded Spill +; CHECK-RV32-V-NEXT: sw a0, 60(sp) # 4-byte Folded Spill +; CHECK-RV32-V-NEXT: sw a1, 56(sp) # 4-byte Folded Spill +; CHECK-RV32-V-NEXT: sw a2, 52(sp) # 4-byte Folded Spill +; CHECK-RV32-V-NEXT: sw a3, 48(sp) # 4-byte Folded Spill +; CHECK-RV32-V-NEXT: sw a4, 44(sp) # 4-byte Folded Spill +; CHECK-RV32-V-NEXT: sw a5, 40(sp) # 4-byte Folded Spill +; CHECK-RV32-V-NEXT: sw a6, 36(sp) # 4-byte Folded Spill +; CHECK-RV32-V-NEXT: sw a7, 32(sp) # 4-byte Folded Spill +; CHECK-RV32-V-NEXT: sw t3, 28(sp) # 4-byte Folded Spill +; CHECK-RV32-V-NEXT: sw t4, 24(sp) # 4-byte Folded Spill +; CHECK-RV32-V-NEXT: sw t5, 20(sp) # 4-byte Folded Spill +; CHECK-RV32-V-NEXT: sw t6, 16(sp) # 4-byte Folded Spill +; CHECK-RV32-V-NEXT: csrr a0, vlenb +; CHECK-RV32-V-NEXT: slli a0, a0, 5 +; CHECK-RV32-V-NEXT: sub sp, sp, a0 +; CHECK-RV32-V-NEXT: csrr a0, vlenb +; CHECK-RV32-V-NEXT: slli a1, a0, 5 +; CHECK-RV32-V-NEXT: sub a0, a1, a0 +; CHECK-RV32-V-NEXT: add a0, sp, a0 +; CHECK-RV32-V-NEXT: addi a0, a0, 16 +; CHECK-RV32-V-NEXT: vs1r.v v0, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV32-V-NEXT: csrr a0, vlenb +; CHECK-RV32-V-NEXT: slli a0, a0, 1 +; CHECK-RV32-V-NEXT: mv a1, a0 +; CHECK-RV32-V-NEXT: slli a0, a0, 1 +; CHECK-RV32-V-NEXT: add a1, a1, a0 +; CHECK-RV32-V-NEXT: slli a0, a0, 1 +; CHECK-RV32-V-NEXT: add a1, a1, a0 +; CHECK-RV32-V-NEXT: slli a0, a0, 1 +; CHECK-RV32-V-NEXT: add a0, a0, a1 +; CHECK-RV32-V-NEXT: add a0, sp, a0 +; CHECK-RV32-V-NEXT: addi a0, a0, 16 +; CHECK-RV32-V-NEXT: vs1r.v v1, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV32-V-NEXT: csrr a0, vlenb +; CHECK-RV32-V-NEXT: mv a1, a0 +; CHECK-RV32-V-NEXT: slli a0, a0, 2 +; CHECK-RV32-V-NEXT: add a1, a1, a0 +; CHECK-RV32-V-NEXT: slli a0, a0, 1 +; CHECK-RV32-V-NEXT: add a1, a1, a0 +; CHECK-RV32-V-NEXT: slli a0, a0, 1 +; CHECK-RV32-V-NEXT: add a0, a0, a1 +; CHECK-RV32-V-NEXT: add a0, sp, a0 +; CHECK-RV32-V-NEXT: addi a0, a0, 16 +; CHECK-RV32-V-NEXT: vs1r.v v2, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV32-V-NEXT: csrr a0, vlenb +; CHECK-RV32-V-NEXT: slli a0, a0, 2 +; CHECK-RV32-V-NEXT: mv a1, a0 +; CHECK-RV32-V-NEXT: slli a0, a0, 1 +; CHECK-RV32-V-NEXT: add a1, a1, a0 +; CHECK-RV32-V-NEXT: slli a0, a0, 1 +; CHECK-RV32-V-NEXT: add a0, a0, a1 +; CHECK-RV32-V-NEXT: add a0, sp, a0 +; CHECK-RV32-V-NEXT: addi a0, a0, 16 +; CHECK-RV32-V-NEXT: vs1r.v v3, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV32-V-NEXT: csrr a0, vlenb +; CHECK-RV32-V-NEXT: mv a1, a0 +; CHECK-RV32-V-NEXT: slli a0, a0, 1 +; CHECK-RV32-V-NEXT: add a1, a1, a0 +; CHECK-RV32-V-NEXT: slli a0, a0, 2 +; CHECK-RV32-V-NEXT: add a1, a1, a0 +; CHECK-RV32-V-NEXT: slli a0, a0, 1 +; CHECK-RV32-V-NEXT: add a0, a0, a1 +; CHECK-RV32-V-NEXT: add a0, sp, a0 +; CHECK-RV32-V-NEXT: addi a0, a0, 16 +; CHECK-RV32-V-NEXT: vs1r.v v4, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV32-V-NEXT: csrr a0, vlenb +; CHECK-RV32-V-NEXT: slli a0, a0, 1 +; CHECK-RV32-V-NEXT: mv a1, a0 +; CHECK-RV32-V-NEXT: slli a0, a0, 2 +; CHECK-RV32-V-NEXT: add a1, a1, a0 +; CHECK-RV32-V-NEXT: slli a0, a0, 1 +; CHECK-RV32-V-NEXT: add a0, a0, a1 +; CHECK-RV32-V-NEXT: add a0, sp, a0 +; CHECK-RV32-V-NEXT: addi a0, a0, 16 +; CHECK-RV32-V-NEXT: vs1r.v v5, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV32-V-NEXT: csrr a0, vlenb +; CHECK-RV32-V-NEXT: mv a1, a0 +; CHECK-RV32-V-NEXT: slli a0, a0, 3 +; CHECK-RV32-V-NEXT: add a1, a1, a0 +; CHECK-RV32-V-NEXT: slli a0, a0, 1 +; CHECK-RV32-V-NEXT: add a0, a0, a1 +; CHECK-RV32-V-NEXT: add a0, sp, a0 +; CHECK-RV32-V-NEXT: addi a0, a0, 16 +; CHECK-RV32-V-NEXT: vs1r.v v6, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV32-V-NEXT: csrr a0, vlenb +; CHECK-RV32-V-NEXT: slli a0, a0, 3 +; CHECK-RV32-V-NEXT: mv a1, a0 +; CHECK-RV32-V-NEXT: slli a0, a0, 1 +; CHECK-RV32-V-NEXT: add a0, a0, a1 +; CHECK-RV32-V-NEXT: add a0, sp, a0 +; CHECK-RV32-V-NEXT: addi a0, a0, 16 +; CHECK-RV32-V-NEXT: vs1r.v v7, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV32-V-NEXT: csrr a0, vlenb +; CHECK-RV32-V-NEXT: mv a1, a0 +; CHECK-RV32-V-NEXT: slli a0, a0, 1 +; CHECK-RV32-V-NEXT: add a1, a1, a0 +; CHECK-RV32-V-NEXT: slli a0, a0, 1 +; CHECK-RV32-V-NEXT: add a1, a1, a0 +; CHECK-RV32-V-NEXT: slli a0, a0, 2 +; CHECK-RV32-V-NEXT: add a0, a0, a1 +; CHECK-RV32-V-NEXT: add a0, sp, a0 +; CHECK-RV32-V-NEXT: addi a0, a0, 16 +; CHECK-RV32-V-NEXT: vs1r.v v8, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV32-V-NEXT: csrr a0, vlenb +; CHECK-RV32-V-NEXT: slli a0, a0, 1 +; CHECK-RV32-V-NEXT: mv a1, a0 +; CHECK-RV32-V-NEXT: slli a0, a0, 1 +; CHECK-RV32-V-NEXT: add a1, a1, a0 +; CHECK-RV32-V-NEXT: slli a0, a0, 2 +; CHECK-RV32-V-NEXT: add a0, a0, a1 +; CHECK-RV32-V-NEXT: add a0, sp, a0 +; CHECK-RV32-V-NEXT: addi a0, a0, 16 +; CHECK-RV32-V-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV32-V-NEXT: csrr a0, vlenb +; CHECK-RV32-V-NEXT: mv a1, a0 +; CHECK-RV32-V-NEXT: slli a0, a0, 2 +; CHECK-RV32-V-NEXT: add a1, a1, a0 +; CHECK-RV32-V-NEXT: slli a0, a0, 2 +; CHECK-RV32-V-NEXT: add a0, a0, a1 +; CHECK-RV32-V-NEXT: add a0, sp, a0 +; CHECK-RV32-V-NEXT: addi a0, a0, 16 +; CHECK-RV32-V-NEXT: vs1r.v v10, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV32-V-NEXT: csrr a0, vlenb +; CHECK-RV32-V-NEXT: slli a0, a0, 2 +; CHECK-RV32-V-NEXT: mv a1, a0 +; CHECK-RV32-V-NEXT: slli a0, a0, 2 +; CHECK-RV32-V-NEXT: add a0, a0, a1 +; CHECK-RV32-V-NEXT: add a0, sp, a0 +; CHECK-RV32-V-NEXT: addi a0, a0, 16 +; CHECK-RV32-V-NEXT: vs1r.v v11, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV32-V-NEXT: csrr a0, vlenb +; CHECK-RV32-V-NEXT: mv a1, a0 +; CHECK-RV32-V-NEXT: slli a0, a0, 1 +; CHECK-RV32-V-NEXT: add a1, a1, a0 +; CHECK-RV32-V-NEXT: slli a0, a0, 3 +; CHECK-RV32-V-NEXT: add a0, a0, a1 +; CHECK-RV32-V-NEXT: add a0, sp, a0 +; CHECK-RV32-V-NEXT: addi a0, a0, 16 +; CHECK-RV32-V-NEXT: vs1r.v v12, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV32-V-NEXT: csrr a0, vlenb +; CHECK-RV32-V-NEXT: slli a0, a0, 1 +; CHECK-RV32-V-NEXT: mv a1, a0 +; CHECK-RV32-V-NEXT: slli a0, a0, 3 +; CHECK-RV32-V-NEXT: add a0, a0, a1 +; CHECK-RV32-V-NEXT: add a0, sp, a0 +; CHECK-RV32-V-NEXT: addi a0, a0, 16 +; CHECK-RV32-V-NEXT: vs1r.v v13, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV32-V-NEXT: csrr a0, vlenb +; CHECK-RV32-V-NEXT: slli a1, a0, 4 +; CHECK-RV32-V-NEXT: add a0, a1, a0 +; CHECK-RV32-V-NEXT: add a0, sp, a0 +; CHECK-RV32-V-NEXT: addi a0, a0, 16 +; CHECK-RV32-V-NEXT: vs1r.v v14, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV32-V-NEXT: csrr a0, vlenb +; CHECK-RV32-V-NEXT: slli a0, a0, 4 +; CHECK-RV32-V-NEXT: add a0, sp, a0 +; CHECK-RV32-V-NEXT: addi a0, a0, 16 +; CHECK-RV32-V-NEXT: vs1r.v v15, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV32-V-NEXT: csrr a0, vlenb +; CHECK-RV32-V-NEXT: slli a1, a0, 4 +; CHECK-RV32-V-NEXT: sub a0, a1, a0 +; CHECK-RV32-V-NEXT: add a0, sp, a0 +; CHECK-RV32-V-NEXT: addi a0, a0, 16 +; CHECK-RV32-V-NEXT: vs1r.v v16, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV32-V-NEXT: csrr a0, vlenb +; CHECK-RV32-V-NEXT: slli a0, a0, 1 +; CHECK-RV32-V-NEXT: mv a1, a0 +; CHECK-RV32-V-NEXT: slli a0, a0, 1 +; CHECK-RV32-V-NEXT: add a1, a1, a0 +; CHECK-RV32-V-NEXT: slli a0, a0, 1 +; CHECK-RV32-V-NEXT: add a0, a0, a1 +; CHECK-RV32-V-NEXT: add a0, sp, a0 +; CHECK-RV32-V-NEXT: addi a0, a0, 16 +; CHECK-RV32-V-NEXT: vs1r.v v17, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV32-V-NEXT: csrr a0, vlenb +; CHECK-RV32-V-NEXT: mv a1, a0 +; CHECK-RV32-V-NEXT: slli a0, a0, 2 +; CHECK-RV32-V-NEXT: add a1, a1, a0 +; CHECK-RV32-V-NEXT: slli a0, a0, 1 +; CHECK-RV32-V-NEXT: add a0, a0, a1 +; CHECK-RV32-V-NEXT: add a0, sp, a0 +; CHECK-RV32-V-NEXT: addi a0, a0, 16 +; CHECK-RV32-V-NEXT: vs1r.v v18, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV32-V-NEXT: csrr a0, vlenb +; CHECK-RV32-V-NEXT: slli a0, a0, 2 +; CHECK-RV32-V-NEXT: mv a1, a0 +; CHECK-RV32-V-NEXT: slli a0, a0, 1 +; CHECK-RV32-V-NEXT: add a0, a0, a1 +; CHECK-RV32-V-NEXT: add a0, sp, a0 +; CHECK-RV32-V-NEXT: addi a0, a0, 16 +; CHECK-RV32-V-NEXT: vs1r.v v19, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV32-V-NEXT: csrr a0, vlenb +; CHECK-RV32-V-NEXT: mv a1, a0 +; CHECK-RV32-V-NEXT: slli a0, a0, 1 +; CHECK-RV32-V-NEXT: add a1, a1, a0 +; CHECK-RV32-V-NEXT: slli a0, a0, 2 +; CHECK-RV32-V-NEXT: add a0, a0, a1 +; CHECK-RV32-V-NEXT: add a0, sp, a0 +; CHECK-RV32-V-NEXT: addi a0, a0, 16 +; CHECK-RV32-V-NEXT: vs1r.v v20, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV32-V-NEXT: csrr a0, vlenb +; CHECK-RV32-V-NEXT: slli a0, a0, 1 +; CHECK-RV32-V-NEXT: mv a1, a0 +; CHECK-RV32-V-NEXT: slli a0, a0, 2 +; CHECK-RV32-V-NEXT: add a0, a0, a1 +; CHECK-RV32-V-NEXT: add a0, sp, a0 +; CHECK-RV32-V-NEXT: addi a0, a0, 16 +; CHECK-RV32-V-NEXT: vs1r.v v21, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV32-V-NEXT: csrr a0, vlenb +; CHECK-RV32-V-NEXT: slli a1, a0, 3 +; CHECK-RV32-V-NEXT: add a0, a1, a0 +; CHECK-RV32-V-NEXT: add a0, sp, a0 +; CHECK-RV32-V-NEXT: addi a0, a0, 16 +; CHECK-RV32-V-NEXT: vs1r.v v22, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV32-V-NEXT: csrr a0, vlenb +; CHECK-RV32-V-NEXT: slli a0, a0, 3 +; CHECK-RV32-V-NEXT: add a0, sp, a0 +; CHECK-RV32-V-NEXT: addi a0, a0, 16 +; CHECK-RV32-V-NEXT: vs1r.v v23, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV32-V-NEXT: csrr a0, vlenb +; CHECK-RV32-V-NEXT: slli a1, a0, 3 +; CHECK-RV32-V-NEXT: sub a0, a1, a0 +; CHECK-RV32-V-NEXT: add a0, sp, a0 +; CHECK-RV32-V-NEXT: addi a0, a0, 16 +; CHECK-RV32-V-NEXT: vs1r.v v24, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV32-V-NEXT: csrr a0, vlenb +; CHECK-RV32-V-NEXT: slli a0, a0, 1 +; CHECK-RV32-V-NEXT: mv a1, a0 +; CHECK-RV32-V-NEXT: slli a0, a0, 1 +; CHECK-RV32-V-NEXT: add a0, a0, a1 +; CHECK-RV32-V-NEXT: add a0, sp, a0 +; CHECK-RV32-V-NEXT: addi a0, a0, 16 +; CHECK-RV32-V-NEXT: vs1r.v v25, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV32-V-NEXT: csrr a0, vlenb +; CHECK-RV32-V-NEXT: slli a1, a0, 2 +; CHECK-RV32-V-NEXT: add a0, a1, a0 +; CHECK-RV32-V-NEXT: add a0, sp, a0 +; CHECK-RV32-V-NEXT: addi a0, a0, 16 +; CHECK-RV32-V-NEXT: vs1r.v v26, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV32-V-NEXT: csrr a0, vlenb +; CHECK-RV32-V-NEXT: slli a0, a0, 2 +; CHECK-RV32-V-NEXT: add a0, sp, a0 +; CHECK-RV32-V-NEXT: addi a0, a0, 16 +; CHECK-RV32-V-NEXT: vs1r.v v27, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV32-V-NEXT: csrr a0, vlenb +; CHECK-RV32-V-NEXT: slli a1, a0, 1 +; CHECK-RV32-V-NEXT: add a0, a1, a0 +; CHECK-RV32-V-NEXT: add a0, sp, a0 +; CHECK-RV32-V-NEXT: addi a0, a0, 16 +; CHECK-RV32-V-NEXT: vs1r.v v28, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV32-V-NEXT: csrr a0, vlenb +; CHECK-RV32-V-NEXT: slli a0, a0, 1 +; CHECK-RV32-V-NEXT: add a0, sp, a0 +; CHECK-RV32-V-NEXT: addi a0, a0, 16 +; CHECK-RV32-V-NEXT: vs1r.v v29, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV32-V-NEXT: csrr a0, vlenb +; CHECK-RV32-V-NEXT: add a0, sp, a0 +; CHECK-RV32-V-NEXT: addi a0, a0, 16 +; CHECK-RV32-V-NEXT: vs1r.v v30, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV32-V-NEXT: addi a0, sp, 16 +; CHECK-RV32-V-NEXT: vs1r.v v31, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV32-V-NEXT: call otherfoo +; CHECK-RV32-V-NEXT: csrr a0, vlenb +; CHECK-RV32-V-NEXT: slli a1, a0, 5 +; CHECK-RV32-V-NEXT: sub a0, a1, a0 +; CHECK-RV32-V-NEXT: add a0, sp, a0 +; CHECK-RV32-V-NEXT: addi a0, a0, 16 +; CHECK-RV32-V-NEXT: vl1r.v v0, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV32-V-NEXT: csrr a0, vlenb +; CHECK-RV32-V-NEXT: slli a0, a0, 1 +; CHECK-RV32-V-NEXT: mv a1, a0 +; CHECK-RV32-V-NEXT: slli a0, a0, 1 +; CHECK-RV32-V-NEXT: add a1, a1, a0 +; CHECK-RV32-V-NEXT: slli a0, a0, 1 +; CHECK-RV32-V-NEXT: add a1, a1, a0 +; CHECK-RV32-V-NEXT: slli a0, a0, 1 +; CHECK-RV32-V-NEXT: add a0, a0, a1 +; CHECK-RV32-V-NEXT: add a0, sp, a0 +; CHECK-RV32-V-NEXT: addi a0, a0, 16 +; CHECK-RV32-V-NEXT: vl1r.v v1, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV32-V-NEXT: csrr a0, vlenb +; CHECK-RV32-V-NEXT: mv a1, a0 +; CHECK-RV32-V-NEXT: slli a0, a0, 2 +; CHECK-RV32-V-NEXT: add a1, a1, a0 +; CHECK-RV32-V-NEXT: slli a0, a0, 1 +; CHECK-RV32-V-NEXT: add a1, a1, a0 +; CHECK-RV32-V-NEXT: slli a0, a0, 1 +; CHECK-RV32-V-NEXT: add a0, a0, a1 +; CHECK-RV32-V-NEXT: add a0, sp, a0 +; CHECK-RV32-V-NEXT: addi a0, a0, 16 +; CHECK-RV32-V-NEXT: vl1r.v v2, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV32-V-NEXT: csrr a0, vlenb +; CHECK-RV32-V-NEXT: slli a0, a0, 2 +; CHECK-RV32-V-NEXT: mv a1, a0 +; CHECK-RV32-V-NEXT: slli a0, a0, 1 +; CHECK-RV32-V-NEXT: add a1, a1, a0 +; CHECK-RV32-V-NEXT: slli a0, a0, 1 +; CHECK-RV32-V-NEXT: add a0, a0, a1 +; CHECK-RV32-V-NEXT: add a0, sp, a0 +; CHECK-RV32-V-NEXT: addi a0, a0, 16 +; CHECK-RV32-V-NEXT: vl1r.v v3, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV32-V-NEXT: csrr a0, vlenb +; CHECK-RV32-V-NEXT: mv a1, a0 +; CHECK-RV32-V-NEXT: slli a0, a0, 1 +; CHECK-RV32-V-NEXT: add a1, a1, a0 +; CHECK-RV32-V-NEXT: slli a0, a0, 2 +; CHECK-RV32-V-NEXT: add a1, a1, a0 +; CHECK-RV32-V-NEXT: slli a0, a0, 1 +; CHECK-RV32-V-NEXT: add a0, a0, a1 +; CHECK-RV32-V-NEXT: add a0, sp, a0 +; CHECK-RV32-V-NEXT: addi a0, a0, 16 +; CHECK-RV32-V-NEXT: vl1r.v v4, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV32-V-NEXT: csrr a0, vlenb +; CHECK-RV32-V-NEXT: slli a0, a0, 1 +; CHECK-RV32-V-NEXT: mv a1, a0 +; CHECK-RV32-V-NEXT: slli a0, a0, 2 +; CHECK-RV32-V-NEXT: add a1, a1, a0 +; CHECK-RV32-V-NEXT: slli a0, a0, 1 +; CHECK-RV32-V-NEXT: add a0, a0, a1 +; CHECK-RV32-V-NEXT: add a0, sp, a0 +; CHECK-RV32-V-NEXT: addi a0, a0, 16 +; CHECK-RV32-V-NEXT: vl1r.v v5, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV32-V-NEXT: csrr a0, vlenb +; CHECK-RV32-V-NEXT: mv a1, a0 +; CHECK-RV32-V-NEXT: slli a0, a0, 3 +; CHECK-RV32-V-NEXT: add a1, a1, a0 +; CHECK-RV32-V-NEXT: slli a0, a0, 1 +; CHECK-RV32-V-NEXT: add a0, a0, a1 +; CHECK-RV32-V-NEXT: add a0, sp, a0 +; CHECK-RV32-V-NEXT: addi a0, a0, 16 +; CHECK-RV32-V-NEXT: vl1r.v v6, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV32-V-NEXT: csrr a0, vlenb +; CHECK-RV32-V-NEXT: slli a0, a0, 3 +; CHECK-RV32-V-NEXT: mv a1, a0 +; CHECK-RV32-V-NEXT: slli a0, a0, 1 +; CHECK-RV32-V-NEXT: add a0, a0, a1 +; CHECK-RV32-V-NEXT: add a0, sp, a0 +; CHECK-RV32-V-NEXT: addi a0, a0, 16 +; CHECK-RV32-V-NEXT: vl1r.v v7, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV32-V-NEXT: csrr a0, vlenb +; CHECK-RV32-V-NEXT: mv a1, a0 +; CHECK-RV32-V-NEXT: slli a0, a0, 1 +; CHECK-RV32-V-NEXT: add a1, a1, a0 +; CHECK-RV32-V-NEXT: slli a0, a0, 1 +; CHECK-RV32-V-NEXT: add a1, a1, a0 +; CHECK-RV32-V-NEXT: slli a0, a0, 2 +; CHECK-RV32-V-NEXT: add a0, a0, a1 +; CHECK-RV32-V-NEXT: add a0, sp, a0 +; CHECK-RV32-V-NEXT: addi a0, a0, 16 +; CHECK-RV32-V-NEXT: vl1r.v v8, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV32-V-NEXT: csrr a0, vlenb +; CHECK-RV32-V-NEXT: slli a0, a0, 1 +; CHECK-RV32-V-NEXT: mv a1, a0 +; CHECK-RV32-V-NEXT: slli a0, a0, 1 +; CHECK-RV32-V-NEXT: add a1, a1, a0 +; CHECK-RV32-V-NEXT: slli a0, a0, 2 +; CHECK-RV32-V-NEXT: add a0, a0, a1 +; CHECK-RV32-V-NEXT: add a0, sp, a0 +; CHECK-RV32-V-NEXT: addi a0, a0, 16 +; CHECK-RV32-V-NEXT: vl1r.v v9, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV32-V-NEXT: csrr a0, vlenb +; CHECK-RV32-V-NEXT: mv a1, a0 +; CHECK-RV32-V-NEXT: slli a0, a0, 2 +; CHECK-RV32-V-NEXT: add a1, a1, a0 +; CHECK-RV32-V-NEXT: slli a0, a0, 2 +; CHECK-RV32-V-NEXT: add a0, a0, a1 +; CHECK-RV32-V-NEXT: add a0, sp, a0 +; CHECK-RV32-V-NEXT: addi a0, a0, 16 +; CHECK-RV32-V-NEXT: vl1r.v v10, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV32-V-NEXT: csrr a0, vlenb +; CHECK-RV32-V-NEXT: slli a0, a0, 2 +; CHECK-RV32-V-NEXT: mv a1, a0 +; CHECK-RV32-V-NEXT: slli a0, a0, 2 +; CHECK-RV32-V-NEXT: add a0, a0, a1 +; CHECK-RV32-V-NEXT: add a0, sp, a0 +; CHECK-RV32-V-NEXT: addi a0, a0, 16 +; CHECK-RV32-V-NEXT: vl1r.v v11, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV32-V-NEXT: csrr a0, vlenb +; CHECK-RV32-V-NEXT: mv a1, a0 +; CHECK-RV32-V-NEXT: slli a0, a0, 1 +; CHECK-RV32-V-NEXT: add a1, a1, a0 +; CHECK-RV32-V-NEXT: slli a0, a0, 3 +; CHECK-RV32-V-NEXT: add a0, a0, a1 +; CHECK-RV32-V-NEXT: add a0, sp, a0 +; CHECK-RV32-V-NEXT: addi a0, a0, 16 +; CHECK-RV32-V-NEXT: vl1r.v v12, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV32-V-NEXT: csrr a0, vlenb +; CHECK-RV32-V-NEXT: slli a0, a0, 1 +; CHECK-RV32-V-NEXT: mv a1, a0 +; CHECK-RV32-V-NEXT: slli a0, a0, 3 +; CHECK-RV32-V-NEXT: add a0, a0, a1 +; CHECK-RV32-V-NEXT: add a0, sp, a0 +; CHECK-RV32-V-NEXT: addi a0, a0, 16 +; CHECK-RV32-V-NEXT: vl1r.v v13, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV32-V-NEXT: csrr a0, vlenb +; CHECK-RV32-V-NEXT: slli a1, a0, 4 +; CHECK-RV32-V-NEXT: add a0, a1, a0 +; CHECK-RV32-V-NEXT: add a0, sp, a0 +; CHECK-RV32-V-NEXT: addi a0, a0, 16 +; CHECK-RV32-V-NEXT: vl1r.v v14, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV32-V-NEXT: csrr a0, vlenb +; CHECK-RV32-V-NEXT: slli a0, a0, 4 +; CHECK-RV32-V-NEXT: add a0, sp, a0 +; CHECK-RV32-V-NEXT: addi a0, a0, 16 +; CHECK-RV32-V-NEXT: vl1r.v v15, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV32-V-NEXT: csrr a0, vlenb +; CHECK-RV32-V-NEXT: slli a1, a0, 4 +; CHECK-RV32-V-NEXT: sub a0, a1, a0 +; CHECK-RV32-V-NEXT: add a0, sp, a0 +; CHECK-RV32-V-NEXT: addi a0, a0, 16 +; CHECK-RV32-V-NEXT: vl1r.v v16, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV32-V-NEXT: csrr a0, vlenb +; CHECK-RV32-V-NEXT: slli a0, a0, 1 +; CHECK-RV32-V-NEXT: mv a1, a0 +; CHECK-RV32-V-NEXT: slli a0, a0, 1 +; CHECK-RV32-V-NEXT: add a1, a1, a0 +; CHECK-RV32-V-NEXT: slli a0, a0, 1 +; CHECK-RV32-V-NEXT: add a0, a0, a1 +; CHECK-RV32-V-NEXT: add a0, sp, a0 +; CHECK-RV32-V-NEXT: addi a0, a0, 16 +; CHECK-RV32-V-NEXT: vl1r.v v17, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV32-V-NEXT: csrr a0, vlenb +; CHECK-RV32-V-NEXT: mv a1, a0 +; CHECK-RV32-V-NEXT: slli a0, a0, 2 +; CHECK-RV32-V-NEXT: add a1, a1, a0 +; CHECK-RV32-V-NEXT: slli a0, a0, 1 +; CHECK-RV32-V-NEXT: add a0, a0, a1 +; CHECK-RV32-V-NEXT: add a0, sp, a0 +; CHECK-RV32-V-NEXT: addi a0, a0, 16 +; CHECK-RV32-V-NEXT: vl1r.v v18, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV32-V-NEXT: csrr a0, vlenb +; CHECK-RV32-V-NEXT: slli a0, a0, 2 +; CHECK-RV32-V-NEXT: mv a1, a0 +; CHECK-RV32-V-NEXT: slli a0, a0, 1 +; CHECK-RV32-V-NEXT: add a0, a0, a1 +; CHECK-RV32-V-NEXT: add a0, sp, a0 +; CHECK-RV32-V-NEXT: addi a0, a0, 16 +; CHECK-RV32-V-NEXT: vl1r.v v19, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV32-V-NEXT: csrr a0, vlenb +; CHECK-RV32-V-NEXT: mv a1, a0 +; CHECK-RV32-V-NEXT: slli a0, a0, 1 +; CHECK-RV32-V-NEXT: add a1, a1, a0 +; CHECK-RV32-V-NEXT: slli a0, a0, 2 +; CHECK-RV32-V-NEXT: add a0, a0, a1 +; CHECK-RV32-V-NEXT: add a0, sp, a0 +; CHECK-RV32-V-NEXT: addi a0, a0, 16 +; CHECK-RV32-V-NEXT: vl1r.v v20, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV32-V-NEXT: csrr a0, vlenb +; CHECK-RV32-V-NEXT: slli a0, a0, 1 +; CHECK-RV32-V-NEXT: mv a1, a0 +; CHECK-RV32-V-NEXT: slli a0, a0, 2 +; CHECK-RV32-V-NEXT: add a0, a0, a1 +; CHECK-RV32-V-NEXT: add a0, sp, a0 +; CHECK-RV32-V-NEXT: addi a0, a0, 16 +; CHECK-RV32-V-NEXT: vl1r.v v21, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV32-V-NEXT: csrr a0, vlenb +; CHECK-RV32-V-NEXT: slli a1, a0, 3 +; CHECK-RV32-V-NEXT: add a0, a1, a0 +; CHECK-RV32-V-NEXT: add a0, sp, a0 +; CHECK-RV32-V-NEXT: addi a0, a0, 16 +; CHECK-RV32-V-NEXT: vl1r.v v22, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV32-V-NEXT: csrr a0, vlenb +; CHECK-RV32-V-NEXT: slli a0, a0, 3 +; CHECK-RV32-V-NEXT: add a0, sp, a0 +; CHECK-RV32-V-NEXT: addi a0, a0, 16 +; CHECK-RV32-V-NEXT: vl1r.v v23, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV32-V-NEXT: csrr a0, vlenb +; CHECK-RV32-V-NEXT: slli a1, a0, 3 +; CHECK-RV32-V-NEXT: sub a0, a1, a0 +; CHECK-RV32-V-NEXT: add a0, sp, a0 +; CHECK-RV32-V-NEXT: addi a0, a0, 16 +; CHECK-RV32-V-NEXT: vl1r.v v24, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV32-V-NEXT: csrr a0, vlenb +; CHECK-RV32-V-NEXT: slli a0, a0, 1 +; CHECK-RV32-V-NEXT: mv a1, a0 +; CHECK-RV32-V-NEXT: slli a0, a0, 1 +; CHECK-RV32-V-NEXT: add a0, a0, a1 +; CHECK-RV32-V-NEXT: add a0, sp, a0 +; CHECK-RV32-V-NEXT: addi a0, a0, 16 +; CHECK-RV32-V-NEXT: vl1r.v v25, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV32-V-NEXT: csrr a0, vlenb +; CHECK-RV32-V-NEXT: slli a1, a0, 2 +; CHECK-RV32-V-NEXT: add a0, a1, a0 +; CHECK-RV32-V-NEXT: add a0, sp, a0 +; CHECK-RV32-V-NEXT: addi a0, a0, 16 +; CHECK-RV32-V-NEXT: vl1r.v v26, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV32-V-NEXT: csrr a0, vlenb +; CHECK-RV32-V-NEXT: slli a0, a0, 2 +; CHECK-RV32-V-NEXT: add a0, sp, a0 +; CHECK-RV32-V-NEXT: addi a0, a0, 16 +; CHECK-RV32-V-NEXT: vl1r.v v27, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV32-V-NEXT: csrr a0, vlenb +; CHECK-RV32-V-NEXT: slli a1, a0, 1 +; CHECK-RV32-V-NEXT: add a0, a1, a0 +; CHECK-RV32-V-NEXT: add a0, sp, a0 +; CHECK-RV32-V-NEXT: addi a0, a0, 16 +; CHECK-RV32-V-NEXT: vl1r.v v28, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV32-V-NEXT: csrr a0, vlenb +; CHECK-RV32-V-NEXT: slli a0, a0, 1 +; CHECK-RV32-V-NEXT: add a0, sp, a0 +; CHECK-RV32-V-NEXT: addi a0, a0, 16 +; CHECK-RV32-V-NEXT: vl1r.v v29, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV32-V-NEXT: csrr a0, vlenb +; CHECK-RV32-V-NEXT: add a0, sp, a0 +; CHECK-RV32-V-NEXT: addi a0, a0, 16 +; CHECK-RV32-V-NEXT: vl1r.v v30, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV32-V-NEXT: addi a0, sp, 16 +; CHECK-RV32-V-NEXT: vl1r.v v31, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV32-V-NEXT: csrr a0, vlenb +; CHECK-RV32-V-NEXT: slli a0, a0, 5 +; CHECK-RV32-V-NEXT: add sp, sp, a0 +; CHECK-RV32-V-NEXT: lw ra, 76(sp) # 4-byte Folded Reload +; CHECK-RV32-V-NEXT: lw t0, 72(sp) # 4-byte Folded Reload +; CHECK-RV32-V-NEXT: lw t1, 68(sp) # 4-byte Folded Reload +; CHECK-RV32-V-NEXT: lw t2, 64(sp) # 4-byte Folded Reload +; CHECK-RV32-V-NEXT: lw a0, 60(sp) # 4-byte Folded Reload +; CHECK-RV32-V-NEXT: lw a1, 56(sp) # 4-byte Folded Reload +; CHECK-RV32-V-NEXT: lw a2, 52(sp) # 4-byte Folded Reload +; CHECK-RV32-V-NEXT: lw a3, 48(sp) # 4-byte Folded Reload +; CHECK-RV32-V-NEXT: lw a4, 44(sp) # 4-byte Folded Reload +; CHECK-RV32-V-NEXT: lw a5, 40(sp) # 4-byte Folded Reload +; CHECK-RV32-V-NEXT: lw a6, 36(sp) # 4-byte Folded Reload +; CHECK-RV32-V-NEXT: lw a7, 32(sp) # 4-byte Folded Reload +; CHECK-RV32-V-NEXT: lw t3, 28(sp) # 4-byte Folded Reload +; CHECK-RV32-V-NEXT: lw t4, 24(sp) # 4-byte Folded Reload +; CHECK-RV32-V-NEXT: lw t5, 20(sp) # 4-byte Folded Reload +; CHECK-RV32-V-NEXT: lw t6, 16(sp) # 4-byte Folded Reload +; CHECK-RV32-V-NEXT: addi sp, sp, 80 +; CHECK-RV32-V-NEXT: mret +; +; CHECK-RV32-FV-LABEL: foo_with_call: +; CHECK-RV32-FV: # %bb.0: +; CHECK-RV32-FV-NEXT: addi sp, sp, -160 +; CHECK-RV32-FV-NEXT: sw ra, 156(sp) # 4-byte Folded Spill +; CHECK-RV32-FV-NEXT: sw t0, 152(sp) # 4-byte Folded Spill +; CHECK-RV32-FV-NEXT: sw t1, 148(sp) # 4-byte Folded Spill +; CHECK-RV32-FV-NEXT: sw t2, 144(sp) # 4-byte Folded Spill +; CHECK-RV32-FV-NEXT: sw a0, 140(sp) # 4-byte Folded Spill +; CHECK-RV32-FV-NEXT: sw a1, 136(sp) # 4-byte Folded Spill +; CHECK-RV32-FV-NEXT: sw a2, 132(sp) # 4-byte Folded Spill +; CHECK-RV32-FV-NEXT: sw a3, 128(sp) # 4-byte Folded Spill +; CHECK-RV32-FV-NEXT: sw a4, 124(sp) # 4-byte Folded Spill +; CHECK-RV32-FV-NEXT: sw a5, 120(sp) # 4-byte Folded Spill +; CHECK-RV32-FV-NEXT: sw a6, 116(sp) # 4-byte Folded Spill +; CHECK-RV32-FV-NEXT: sw a7, 112(sp) # 4-byte Folded Spill +; CHECK-RV32-FV-NEXT: sw t3, 108(sp) # 4-byte Folded Spill +; CHECK-RV32-FV-NEXT: sw t4, 104(sp) # 4-byte Folded Spill +; CHECK-RV32-FV-NEXT: sw t5, 100(sp) # 4-byte Folded Spill +; CHECK-RV32-FV-NEXT: sw t6, 96(sp) # 4-byte Folded Spill +; CHECK-RV32-FV-NEXT: fsw ft0, 92(sp) # 4-byte Folded Spill +; CHECK-RV32-FV-NEXT: fsw ft1, 88(sp) # 4-byte Folded Spill +; CHECK-RV32-FV-NEXT: fsw ft2, 84(sp) # 4-byte Folded Spill +; CHECK-RV32-FV-NEXT: fsw ft3, 80(sp) # 4-byte Folded Spill +; CHECK-RV32-FV-NEXT: fsw ft4, 76(sp) # 4-byte Folded Spill +; CHECK-RV32-FV-NEXT: fsw ft5, 72(sp) # 4-byte Folded Spill +; CHECK-RV32-FV-NEXT: fsw ft6, 68(sp) # 4-byte Folded Spill +; CHECK-RV32-FV-NEXT: fsw ft7, 64(sp) # 4-byte Folded Spill +; CHECK-RV32-FV-NEXT: fsw fa0, 60(sp) # 4-byte Folded Spill +; CHECK-RV32-FV-NEXT: fsw fa1, 56(sp) # 4-byte Folded Spill +; CHECK-RV32-FV-NEXT: fsw fa2, 52(sp) # 4-byte Folded Spill +; CHECK-RV32-FV-NEXT: fsw fa3, 48(sp) # 4-byte Folded Spill +; CHECK-RV32-FV-NEXT: fsw fa4, 44(sp) # 4-byte Folded Spill +; CHECK-RV32-FV-NEXT: fsw fa5, 40(sp) # 4-byte Folded Spill +; CHECK-RV32-FV-NEXT: fsw fa6, 36(sp) # 4-byte Folded Spill +; CHECK-RV32-FV-NEXT: fsw fa7, 32(sp) # 4-byte Folded Spill +; CHECK-RV32-FV-NEXT: fsw ft8, 28(sp) # 4-byte Folded Spill +; CHECK-RV32-FV-NEXT: fsw ft9, 24(sp) # 4-byte Folded Spill +; CHECK-RV32-FV-NEXT: fsw ft10, 20(sp) # 4-byte Folded Spill +; CHECK-RV32-FV-NEXT: fsw ft11, 16(sp) # 4-byte Folded Spill +; CHECK-RV32-FV-NEXT: csrr a0, vlenb +; CHECK-RV32-FV-NEXT: slli a0, a0, 5 +; CHECK-RV32-FV-NEXT: sub sp, sp, a0 +; CHECK-RV32-FV-NEXT: csrr a0, vlenb +; CHECK-RV32-FV-NEXT: slli a1, a0, 5 +; CHECK-RV32-FV-NEXT: sub a0, a1, a0 +; CHECK-RV32-FV-NEXT: add a0, sp, a0 +; CHECK-RV32-FV-NEXT: addi a0, a0, 16 +; CHECK-RV32-FV-NEXT: vs1r.v v0, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV32-FV-NEXT: csrr a0, vlenb +; CHECK-RV32-FV-NEXT: slli a0, a0, 1 +; CHECK-RV32-FV-NEXT: mv a1, a0 +; CHECK-RV32-FV-NEXT: slli a0, a0, 1 +; CHECK-RV32-FV-NEXT: add a1, a1, a0 +; CHECK-RV32-FV-NEXT: slli a0, a0, 1 +; CHECK-RV32-FV-NEXT: add a1, a1, a0 +; CHECK-RV32-FV-NEXT: slli a0, a0, 1 +; CHECK-RV32-FV-NEXT: add a0, a0, a1 +; CHECK-RV32-FV-NEXT: add a0, sp, a0 +; CHECK-RV32-FV-NEXT: addi a0, a0, 16 +; CHECK-RV32-FV-NEXT: vs1r.v v1, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV32-FV-NEXT: csrr a0, vlenb +; CHECK-RV32-FV-NEXT: mv a1, a0 +; CHECK-RV32-FV-NEXT: slli a0, a0, 2 +; CHECK-RV32-FV-NEXT: add a1, a1, a0 +; CHECK-RV32-FV-NEXT: slli a0, a0, 1 +; CHECK-RV32-FV-NEXT: add a1, a1, a0 +; CHECK-RV32-FV-NEXT: slli a0, a0, 1 +; CHECK-RV32-FV-NEXT: add a0, a0, a1 +; CHECK-RV32-FV-NEXT: add a0, sp, a0 +; CHECK-RV32-FV-NEXT: addi a0, a0, 16 +; CHECK-RV32-FV-NEXT: vs1r.v v2, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV32-FV-NEXT: csrr a0, vlenb +; CHECK-RV32-FV-NEXT: slli a0, a0, 2 +; CHECK-RV32-FV-NEXT: mv a1, a0 +; CHECK-RV32-FV-NEXT: slli a0, a0, 1 +; CHECK-RV32-FV-NEXT: add a1, a1, a0 +; CHECK-RV32-FV-NEXT: slli a0, a0, 1 +; CHECK-RV32-FV-NEXT: add a0, a0, a1 +; CHECK-RV32-FV-NEXT: add a0, sp, a0 +; CHECK-RV32-FV-NEXT: addi a0, a0, 16 +; CHECK-RV32-FV-NEXT: vs1r.v v3, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV32-FV-NEXT: csrr a0, vlenb +; CHECK-RV32-FV-NEXT: mv a1, a0 +; CHECK-RV32-FV-NEXT: slli a0, a0, 1 +; CHECK-RV32-FV-NEXT: add a1, a1, a0 +; CHECK-RV32-FV-NEXT: slli a0, a0, 2 +; CHECK-RV32-FV-NEXT: add a1, a1, a0 +; CHECK-RV32-FV-NEXT: slli a0, a0, 1 +; CHECK-RV32-FV-NEXT: add a0, a0, a1 +; CHECK-RV32-FV-NEXT: add a0, sp, a0 +; CHECK-RV32-FV-NEXT: addi a0, a0, 16 +; CHECK-RV32-FV-NEXT: vs1r.v v4, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV32-FV-NEXT: csrr a0, vlenb +; CHECK-RV32-FV-NEXT: slli a0, a0, 1 +; CHECK-RV32-FV-NEXT: mv a1, a0 +; CHECK-RV32-FV-NEXT: slli a0, a0, 2 +; CHECK-RV32-FV-NEXT: add a1, a1, a0 +; CHECK-RV32-FV-NEXT: slli a0, a0, 1 +; CHECK-RV32-FV-NEXT: add a0, a0, a1 +; CHECK-RV32-FV-NEXT: add a0, sp, a0 +; CHECK-RV32-FV-NEXT: addi a0, a0, 16 +; CHECK-RV32-FV-NEXT: vs1r.v v5, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV32-FV-NEXT: csrr a0, vlenb +; CHECK-RV32-FV-NEXT: mv a1, a0 +; CHECK-RV32-FV-NEXT: slli a0, a0, 3 +; CHECK-RV32-FV-NEXT: add a1, a1, a0 +; CHECK-RV32-FV-NEXT: slli a0, a0, 1 +; CHECK-RV32-FV-NEXT: add a0, a0, a1 +; CHECK-RV32-FV-NEXT: add a0, sp, a0 +; CHECK-RV32-FV-NEXT: addi a0, a0, 16 +; CHECK-RV32-FV-NEXT: vs1r.v v6, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV32-FV-NEXT: csrr a0, vlenb +; CHECK-RV32-FV-NEXT: slli a0, a0, 3 +; CHECK-RV32-FV-NEXT: mv a1, a0 +; CHECK-RV32-FV-NEXT: slli a0, a0, 1 +; CHECK-RV32-FV-NEXT: add a0, a0, a1 +; CHECK-RV32-FV-NEXT: add a0, sp, a0 +; CHECK-RV32-FV-NEXT: addi a0, a0, 16 +; CHECK-RV32-FV-NEXT: vs1r.v v7, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV32-FV-NEXT: csrr a0, vlenb +; CHECK-RV32-FV-NEXT: mv a1, a0 +; CHECK-RV32-FV-NEXT: slli a0, a0, 1 +; CHECK-RV32-FV-NEXT: add a1, a1, a0 +; CHECK-RV32-FV-NEXT: slli a0, a0, 1 +; CHECK-RV32-FV-NEXT: add a1, a1, a0 +; CHECK-RV32-FV-NEXT: slli a0, a0, 2 +; CHECK-RV32-FV-NEXT: add a0, a0, a1 +; CHECK-RV32-FV-NEXT: add a0, sp, a0 +; CHECK-RV32-FV-NEXT: addi a0, a0, 16 +; CHECK-RV32-FV-NEXT: vs1r.v v8, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV32-FV-NEXT: csrr a0, vlenb +; CHECK-RV32-FV-NEXT: slli a0, a0, 1 +; CHECK-RV32-FV-NEXT: mv a1, a0 +; CHECK-RV32-FV-NEXT: slli a0, a0, 1 +; CHECK-RV32-FV-NEXT: add a1, a1, a0 +; CHECK-RV32-FV-NEXT: slli a0, a0, 2 +; CHECK-RV32-FV-NEXT: add a0, a0, a1 +; CHECK-RV32-FV-NEXT: add a0, sp, a0 +; CHECK-RV32-FV-NEXT: addi a0, a0, 16 +; CHECK-RV32-FV-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV32-FV-NEXT: csrr a0, vlenb +; CHECK-RV32-FV-NEXT: mv a1, a0 +; CHECK-RV32-FV-NEXT: slli a0, a0, 2 +; CHECK-RV32-FV-NEXT: add a1, a1, a0 +; CHECK-RV32-FV-NEXT: slli a0, a0, 2 +; CHECK-RV32-FV-NEXT: add a0, a0, a1 +; CHECK-RV32-FV-NEXT: add a0, sp, a0 +; CHECK-RV32-FV-NEXT: addi a0, a0, 16 +; CHECK-RV32-FV-NEXT: vs1r.v v10, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV32-FV-NEXT: csrr a0, vlenb +; CHECK-RV32-FV-NEXT: slli a0, a0, 2 +; CHECK-RV32-FV-NEXT: mv a1, a0 +; CHECK-RV32-FV-NEXT: slli a0, a0, 2 +; CHECK-RV32-FV-NEXT: add a0, a0, a1 +; CHECK-RV32-FV-NEXT: add a0, sp, a0 +; CHECK-RV32-FV-NEXT: addi a0, a0, 16 +; CHECK-RV32-FV-NEXT: vs1r.v v11, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV32-FV-NEXT: csrr a0, vlenb +; CHECK-RV32-FV-NEXT: mv a1, a0 +; CHECK-RV32-FV-NEXT: slli a0, a0, 1 +; CHECK-RV32-FV-NEXT: add a1, a1, a0 +; CHECK-RV32-FV-NEXT: slli a0, a0, 3 +; CHECK-RV32-FV-NEXT: add a0, a0, a1 +; CHECK-RV32-FV-NEXT: add a0, sp, a0 +; CHECK-RV32-FV-NEXT: addi a0, a0, 16 +; CHECK-RV32-FV-NEXT: vs1r.v v12, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV32-FV-NEXT: csrr a0, vlenb +; CHECK-RV32-FV-NEXT: slli a0, a0, 1 +; CHECK-RV32-FV-NEXT: mv a1, a0 +; CHECK-RV32-FV-NEXT: slli a0, a0, 3 +; CHECK-RV32-FV-NEXT: add a0, a0, a1 +; CHECK-RV32-FV-NEXT: add a0, sp, a0 +; CHECK-RV32-FV-NEXT: addi a0, a0, 16 +; CHECK-RV32-FV-NEXT: vs1r.v v13, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV32-FV-NEXT: csrr a0, vlenb +; CHECK-RV32-FV-NEXT: slli a1, a0, 4 +; CHECK-RV32-FV-NEXT: add a0, a1, a0 +; CHECK-RV32-FV-NEXT: add a0, sp, a0 +; CHECK-RV32-FV-NEXT: addi a0, a0, 16 +; CHECK-RV32-FV-NEXT: vs1r.v v14, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV32-FV-NEXT: csrr a0, vlenb +; CHECK-RV32-FV-NEXT: slli a0, a0, 4 +; CHECK-RV32-FV-NEXT: add a0, sp, a0 +; CHECK-RV32-FV-NEXT: addi a0, a0, 16 +; CHECK-RV32-FV-NEXT: vs1r.v v15, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV32-FV-NEXT: csrr a0, vlenb +; CHECK-RV32-FV-NEXT: slli a1, a0, 4 +; CHECK-RV32-FV-NEXT: sub a0, a1, a0 +; CHECK-RV32-FV-NEXT: add a0, sp, a0 +; CHECK-RV32-FV-NEXT: addi a0, a0, 16 +; CHECK-RV32-FV-NEXT: vs1r.v v16, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV32-FV-NEXT: csrr a0, vlenb +; CHECK-RV32-FV-NEXT: slli a0, a0, 1 +; CHECK-RV32-FV-NEXT: mv a1, a0 +; CHECK-RV32-FV-NEXT: slli a0, a0, 1 +; CHECK-RV32-FV-NEXT: add a1, a1, a0 +; CHECK-RV32-FV-NEXT: slli a0, a0, 1 +; CHECK-RV32-FV-NEXT: add a0, a0, a1 +; CHECK-RV32-FV-NEXT: add a0, sp, a0 +; CHECK-RV32-FV-NEXT: addi a0, a0, 16 +; CHECK-RV32-FV-NEXT: vs1r.v v17, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV32-FV-NEXT: csrr a0, vlenb +; CHECK-RV32-FV-NEXT: mv a1, a0 +; CHECK-RV32-FV-NEXT: slli a0, a0, 2 +; CHECK-RV32-FV-NEXT: add a1, a1, a0 +; CHECK-RV32-FV-NEXT: slli a0, a0, 1 +; CHECK-RV32-FV-NEXT: add a0, a0, a1 +; CHECK-RV32-FV-NEXT: add a0, sp, a0 +; CHECK-RV32-FV-NEXT: addi a0, a0, 16 +; CHECK-RV32-FV-NEXT: vs1r.v v18, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV32-FV-NEXT: csrr a0, vlenb +; CHECK-RV32-FV-NEXT: slli a0, a0, 2 +; CHECK-RV32-FV-NEXT: mv a1, a0 +; CHECK-RV32-FV-NEXT: slli a0, a0, 1 +; CHECK-RV32-FV-NEXT: add a0, a0, a1 +; CHECK-RV32-FV-NEXT: add a0, sp, a0 +; CHECK-RV32-FV-NEXT: addi a0, a0, 16 +; CHECK-RV32-FV-NEXT: vs1r.v v19, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV32-FV-NEXT: csrr a0, vlenb +; CHECK-RV32-FV-NEXT: mv a1, a0 +; CHECK-RV32-FV-NEXT: slli a0, a0, 1 +; CHECK-RV32-FV-NEXT: add a1, a1, a0 +; CHECK-RV32-FV-NEXT: slli a0, a0, 2 +; CHECK-RV32-FV-NEXT: add a0, a0, a1 +; CHECK-RV32-FV-NEXT: add a0, sp, a0 +; CHECK-RV32-FV-NEXT: addi a0, a0, 16 +; CHECK-RV32-FV-NEXT: vs1r.v v20, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV32-FV-NEXT: csrr a0, vlenb +; CHECK-RV32-FV-NEXT: slli a0, a0, 1 +; CHECK-RV32-FV-NEXT: mv a1, a0 +; CHECK-RV32-FV-NEXT: slli a0, a0, 2 +; CHECK-RV32-FV-NEXT: add a0, a0, a1 +; CHECK-RV32-FV-NEXT: add a0, sp, a0 +; CHECK-RV32-FV-NEXT: addi a0, a0, 16 +; CHECK-RV32-FV-NEXT: vs1r.v v21, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV32-FV-NEXT: csrr a0, vlenb +; CHECK-RV32-FV-NEXT: slli a1, a0, 3 +; CHECK-RV32-FV-NEXT: add a0, a1, a0 +; CHECK-RV32-FV-NEXT: add a0, sp, a0 +; CHECK-RV32-FV-NEXT: addi a0, a0, 16 +; CHECK-RV32-FV-NEXT: vs1r.v v22, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV32-FV-NEXT: csrr a0, vlenb +; CHECK-RV32-FV-NEXT: slli a0, a0, 3 +; CHECK-RV32-FV-NEXT: add a0, sp, a0 +; CHECK-RV32-FV-NEXT: addi a0, a0, 16 +; CHECK-RV32-FV-NEXT: vs1r.v v23, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV32-FV-NEXT: csrr a0, vlenb +; CHECK-RV32-FV-NEXT: slli a1, a0, 3 +; CHECK-RV32-FV-NEXT: sub a0, a1, a0 +; CHECK-RV32-FV-NEXT: add a0, sp, a0 +; CHECK-RV32-FV-NEXT: addi a0, a0, 16 +; CHECK-RV32-FV-NEXT: vs1r.v v24, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV32-FV-NEXT: csrr a0, vlenb +; CHECK-RV32-FV-NEXT: slli a0, a0, 1 +; CHECK-RV32-FV-NEXT: mv a1, a0 +; CHECK-RV32-FV-NEXT: slli a0, a0, 1 +; CHECK-RV32-FV-NEXT: add a0, a0, a1 +; CHECK-RV32-FV-NEXT: add a0, sp, a0 +; CHECK-RV32-FV-NEXT: addi a0, a0, 16 +; CHECK-RV32-FV-NEXT: vs1r.v v25, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV32-FV-NEXT: csrr a0, vlenb +; CHECK-RV32-FV-NEXT: slli a1, a0, 2 +; CHECK-RV32-FV-NEXT: add a0, a1, a0 +; CHECK-RV32-FV-NEXT: add a0, sp, a0 +; CHECK-RV32-FV-NEXT: addi a0, a0, 16 +; CHECK-RV32-FV-NEXT: vs1r.v v26, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV32-FV-NEXT: csrr a0, vlenb +; CHECK-RV32-FV-NEXT: slli a0, a0, 2 +; CHECK-RV32-FV-NEXT: add a0, sp, a0 +; CHECK-RV32-FV-NEXT: addi a0, a0, 16 +; CHECK-RV32-FV-NEXT: vs1r.v v27, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV32-FV-NEXT: csrr a0, vlenb +; CHECK-RV32-FV-NEXT: slli a1, a0, 1 +; CHECK-RV32-FV-NEXT: add a0, a1, a0 +; CHECK-RV32-FV-NEXT: add a0, sp, a0 +; CHECK-RV32-FV-NEXT: addi a0, a0, 16 +; CHECK-RV32-FV-NEXT: vs1r.v v28, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV32-FV-NEXT: csrr a0, vlenb +; CHECK-RV32-FV-NEXT: slli a0, a0, 1 +; CHECK-RV32-FV-NEXT: add a0, sp, a0 +; CHECK-RV32-FV-NEXT: addi a0, a0, 16 +; CHECK-RV32-FV-NEXT: vs1r.v v29, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV32-FV-NEXT: csrr a0, vlenb +; CHECK-RV32-FV-NEXT: add a0, sp, a0 +; CHECK-RV32-FV-NEXT: addi a0, a0, 16 +; CHECK-RV32-FV-NEXT: vs1r.v v30, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV32-FV-NEXT: addi a0, sp, 16 +; CHECK-RV32-FV-NEXT: vs1r.v v31, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV32-FV-NEXT: call otherfoo +; CHECK-RV32-FV-NEXT: csrr a0, vlenb +; CHECK-RV32-FV-NEXT: slli a1, a0, 5 +; CHECK-RV32-FV-NEXT: sub a0, a1, a0 +; CHECK-RV32-FV-NEXT: add a0, sp, a0 +; CHECK-RV32-FV-NEXT: addi a0, a0, 16 +; CHECK-RV32-FV-NEXT: vl1r.v v0, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV32-FV-NEXT: csrr a0, vlenb +; CHECK-RV32-FV-NEXT: slli a0, a0, 1 +; CHECK-RV32-FV-NEXT: mv a1, a0 +; CHECK-RV32-FV-NEXT: slli a0, a0, 1 +; CHECK-RV32-FV-NEXT: add a1, a1, a0 +; CHECK-RV32-FV-NEXT: slli a0, a0, 1 +; CHECK-RV32-FV-NEXT: add a1, a1, a0 +; CHECK-RV32-FV-NEXT: slli a0, a0, 1 +; CHECK-RV32-FV-NEXT: add a0, a0, a1 +; CHECK-RV32-FV-NEXT: add a0, sp, a0 +; CHECK-RV32-FV-NEXT: addi a0, a0, 16 +; CHECK-RV32-FV-NEXT: vl1r.v v1, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV32-FV-NEXT: csrr a0, vlenb +; CHECK-RV32-FV-NEXT: mv a1, a0 +; CHECK-RV32-FV-NEXT: slli a0, a0, 2 +; CHECK-RV32-FV-NEXT: add a1, a1, a0 +; CHECK-RV32-FV-NEXT: slli a0, a0, 1 +; CHECK-RV32-FV-NEXT: add a1, a1, a0 +; CHECK-RV32-FV-NEXT: slli a0, a0, 1 +; CHECK-RV32-FV-NEXT: add a0, a0, a1 +; CHECK-RV32-FV-NEXT: add a0, sp, a0 +; CHECK-RV32-FV-NEXT: addi a0, a0, 16 +; CHECK-RV32-FV-NEXT: vl1r.v v2, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV32-FV-NEXT: csrr a0, vlenb +; CHECK-RV32-FV-NEXT: slli a0, a0, 2 +; CHECK-RV32-FV-NEXT: mv a1, a0 +; CHECK-RV32-FV-NEXT: slli a0, a0, 1 +; CHECK-RV32-FV-NEXT: add a1, a1, a0 +; CHECK-RV32-FV-NEXT: slli a0, a0, 1 +; CHECK-RV32-FV-NEXT: add a0, a0, a1 +; CHECK-RV32-FV-NEXT: add a0, sp, a0 +; CHECK-RV32-FV-NEXT: addi a0, a0, 16 +; CHECK-RV32-FV-NEXT: vl1r.v v3, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV32-FV-NEXT: csrr a0, vlenb +; CHECK-RV32-FV-NEXT: mv a1, a0 +; CHECK-RV32-FV-NEXT: slli a0, a0, 1 +; CHECK-RV32-FV-NEXT: add a1, a1, a0 +; CHECK-RV32-FV-NEXT: slli a0, a0, 2 +; CHECK-RV32-FV-NEXT: add a1, a1, a0 +; CHECK-RV32-FV-NEXT: slli a0, a0, 1 +; CHECK-RV32-FV-NEXT: add a0, a0, a1 +; CHECK-RV32-FV-NEXT: add a0, sp, a0 +; CHECK-RV32-FV-NEXT: addi a0, a0, 16 +; CHECK-RV32-FV-NEXT: vl1r.v v4, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV32-FV-NEXT: csrr a0, vlenb +; CHECK-RV32-FV-NEXT: slli a0, a0, 1 +; CHECK-RV32-FV-NEXT: mv a1, a0 +; CHECK-RV32-FV-NEXT: slli a0, a0, 2 +; CHECK-RV32-FV-NEXT: add a1, a1, a0 +; CHECK-RV32-FV-NEXT: slli a0, a0, 1 +; CHECK-RV32-FV-NEXT: add a0, a0, a1 +; CHECK-RV32-FV-NEXT: add a0, sp, a0 +; CHECK-RV32-FV-NEXT: addi a0, a0, 16 +; CHECK-RV32-FV-NEXT: vl1r.v v5, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV32-FV-NEXT: csrr a0, vlenb +; CHECK-RV32-FV-NEXT: mv a1, a0 +; CHECK-RV32-FV-NEXT: slli a0, a0, 3 +; CHECK-RV32-FV-NEXT: add a1, a1, a0 +; CHECK-RV32-FV-NEXT: slli a0, a0, 1 +; CHECK-RV32-FV-NEXT: add a0, a0, a1 +; CHECK-RV32-FV-NEXT: add a0, sp, a0 +; CHECK-RV32-FV-NEXT: addi a0, a0, 16 +; CHECK-RV32-FV-NEXT: vl1r.v v6, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV32-FV-NEXT: csrr a0, vlenb +; CHECK-RV32-FV-NEXT: slli a0, a0, 3 +; CHECK-RV32-FV-NEXT: mv a1, a0 +; CHECK-RV32-FV-NEXT: slli a0, a0, 1 +; CHECK-RV32-FV-NEXT: add a0, a0, a1 +; CHECK-RV32-FV-NEXT: add a0, sp, a0 +; CHECK-RV32-FV-NEXT: addi a0, a0, 16 +; CHECK-RV32-FV-NEXT: vl1r.v v7, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV32-FV-NEXT: csrr a0, vlenb +; CHECK-RV32-FV-NEXT: mv a1, a0 +; CHECK-RV32-FV-NEXT: slli a0, a0, 1 +; CHECK-RV32-FV-NEXT: add a1, a1, a0 +; CHECK-RV32-FV-NEXT: slli a0, a0, 1 +; CHECK-RV32-FV-NEXT: add a1, a1, a0 +; CHECK-RV32-FV-NEXT: slli a0, a0, 2 +; CHECK-RV32-FV-NEXT: add a0, a0, a1 +; CHECK-RV32-FV-NEXT: add a0, sp, a0 +; CHECK-RV32-FV-NEXT: addi a0, a0, 16 +; CHECK-RV32-FV-NEXT: vl1r.v v8, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV32-FV-NEXT: csrr a0, vlenb +; CHECK-RV32-FV-NEXT: slli a0, a0, 1 +; CHECK-RV32-FV-NEXT: mv a1, a0 +; CHECK-RV32-FV-NEXT: slli a0, a0, 1 +; CHECK-RV32-FV-NEXT: add a1, a1, a0 +; CHECK-RV32-FV-NEXT: slli a0, a0, 2 +; CHECK-RV32-FV-NEXT: add a0, a0, a1 +; CHECK-RV32-FV-NEXT: add a0, sp, a0 +; CHECK-RV32-FV-NEXT: addi a0, a0, 16 +; CHECK-RV32-FV-NEXT: vl1r.v v9, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV32-FV-NEXT: csrr a0, vlenb +; CHECK-RV32-FV-NEXT: mv a1, a0 +; CHECK-RV32-FV-NEXT: slli a0, a0, 2 +; CHECK-RV32-FV-NEXT: add a1, a1, a0 +; CHECK-RV32-FV-NEXT: slli a0, a0, 2 +; CHECK-RV32-FV-NEXT: add a0, a0, a1 +; CHECK-RV32-FV-NEXT: add a0, sp, a0 +; CHECK-RV32-FV-NEXT: addi a0, a0, 16 +; CHECK-RV32-FV-NEXT: vl1r.v v10, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV32-FV-NEXT: csrr a0, vlenb +; CHECK-RV32-FV-NEXT: slli a0, a0, 2 +; CHECK-RV32-FV-NEXT: mv a1, a0 +; CHECK-RV32-FV-NEXT: slli a0, a0, 2 +; CHECK-RV32-FV-NEXT: add a0, a0, a1 +; CHECK-RV32-FV-NEXT: add a0, sp, a0 +; CHECK-RV32-FV-NEXT: addi a0, a0, 16 +; CHECK-RV32-FV-NEXT: vl1r.v v11, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV32-FV-NEXT: csrr a0, vlenb +; CHECK-RV32-FV-NEXT: mv a1, a0 +; CHECK-RV32-FV-NEXT: slli a0, a0, 1 +; CHECK-RV32-FV-NEXT: add a1, a1, a0 +; CHECK-RV32-FV-NEXT: slli a0, a0, 3 +; CHECK-RV32-FV-NEXT: add a0, a0, a1 +; CHECK-RV32-FV-NEXT: add a0, sp, a0 +; CHECK-RV32-FV-NEXT: addi a0, a0, 16 +; CHECK-RV32-FV-NEXT: vl1r.v v12, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV32-FV-NEXT: csrr a0, vlenb +; CHECK-RV32-FV-NEXT: slli a0, a0, 1 +; CHECK-RV32-FV-NEXT: mv a1, a0 +; CHECK-RV32-FV-NEXT: slli a0, a0, 3 +; CHECK-RV32-FV-NEXT: add a0, a0, a1 +; CHECK-RV32-FV-NEXT: add a0, sp, a0 +; CHECK-RV32-FV-NEXT: addi a0, a0, 16 +; CHECK-RV32-FV-NEXT: vl1r.v v13, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV32-FV-NEXT: csrr a0, vlenb +; CHECK-RV32-FV-NEXT: slli a1, a0, 4 +; CHECK-RV32-FV-NEXT: add a0, a1, a0 +; CHECK-RV32-FV-NEXT: add a0, sp, a0 +; CHECK-RV32-FV-NEXT: addi a0, a0, 16 +; CHECK-RV32-FV-NEXT: vl1r.v v14, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV32-FV-NEXT: csrr a0, vlenb +; CHECK-RV32-FV-NEXT: slli a0, a0, 4 +; CHECK-RV32-FV-NEXT: add a0, sp, a0 +; CHECK-RV32-FV-NEXT: addi a0, a0, 16 +; CHECK-RV32-FV-NEXT: vl1r.v v15, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV32-FV-NEXT: csrr a0, vlenb +; CHECK-RV32-FV-NEXT: slli a1, a0, 4 +; CHECK-RV32-FV-NEXT: sub a0, a1, a0 +; CHECK-RV32-FV-NEXT: add a0, sp, a0 +; CHECK-RV32-FV-NEXT: addi a0, a0, 16 +; CHECK-RV32-FV-NEXT: vl1r.v v16, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV32-FV-NEXT: csrr a0, vlenb +; CHECK-RV32-FV-NEXT: slli a0, a0, 1 +; CHECK-RV32-FV-NEXT: mv a1, a0 +; CHECK-RV32-FV-NEXT: slli a0, a0, 1 +; CHECK-RV32-FV-NEXT: add a1, a1, a0 +; CHECK-RV32-FV-NEXT: slli a0, a0, 1 +; CHECK-RV32-FV-NEXT: add a0, a0, a1 +; CHECK-RV32-FV-NEXT: add a0, sp, a0 +; CHECK-RV32-FV-NEXT: addi a0, a0, 16 +; CHECK-RV32-FV-NEXT: vl1r.v v17, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV32-FV-NEXT: csrr a0, vlenb +; CHECK-RV32-FV-NEXT: mv a1, a0 +; CHECK-RV32-FV-NEXT: slli a0, a0, 2 +; CHECK-RV32-FV-NEXT: add a1, a1, a0 +; CHECK-RV32-FV-NEXT: slli a0, a0, 1 +; CHECK-RV32-FV-NEXT: add a0, a0, a1 +; CHECK-RV32-FV-NEXT: add a0, sp, a0 +; CHECK-RV32-FV-NEXT: addi a0, a0, 16 +; CHECK-RV32-FV-NEXT: vl1r.v v18, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV32-FV-NEXT: csrr a0, vlenb +; CHECK-RV32-FV-NEXT: slli a0, a0, 2 +; CHECK-RV32-FV-NEXT: mv a1, a0 +; CHECK-RV32-FV-NEXT: slli a0, a0, 1 +; CHECK-RV32-FV-NEXT: add a0, a0, a1 +; CHECK-RV32-FV-NEXT: add a0, sp, a0 +; CHECK-RV32-FV-NEXT: addi a0, a0, 16 +; CHECK-RV32-FV-NEXT: vl1r.v v19, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV32-FV-NEXT: csrr a0, vlenb +; CHECK-RV32-FV-NEXT: mv a1, a0 +; CHECK-RV32-FV-NEXT: slli a0, a0, 1 +; CHECK-RV32-FV-NEXT: add a1, a1, a0 +; CHECK-RV32-FV-NEXT: slli a0, a0, 2 +; CHECK-RV32-FV-NEXT: add a0, a0, a1 +; CHECK-RV32-FV-NEXT: add a0, sp, a0 +; CHECK-RV32-FV-NEXT: addi a0, a0, 16 +; CHECK-RV32-FV-NEXT: vl1r.v v20, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV32-FV-NEXT: csrr a0, vlenb +; CHECK-RV32-FV-NEXT: slli a0, a0, 1 +; CHECK-RV32-FV-NEXT: mv a1, a0 +; CHECK-RV32-FV-NEXT: slli a0, a0, 2 +; CHECK-RV32-FV-NEXT: add a0, a0, a1 +; CHECK-RV32-FV-NEXT: add a0, sp, a0 +; CHECK-RV32-FV-NEXT: addi a0, a0, 16 +; CHECK-RV32-FV-NEXT: vl1r.v v21, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV32-FV-NEXT: csrr a0, vlenb +; CHECK-RV32-FV-NEXT: slli a1, a0, 3 +; CHECK-RV32-FV-NEXT: add a0, a1, a0 +; CHECK-RV32-FV-NEXT: add a0, sp, a0 +; CHECK-RV32-FV-NEXT: addi a0, a0, 16 +; CHECK-RV32-FV-NEXT: vl1r.v v22, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV32-FV-NEXT: csrr a0, vlenb +; CHECK-RV32-FV-NEXT: slli a0, a0, 3 +; CHECK-RV32-FV-NEXT: add a0, sp, a0 +; CHECK-RV32-FV-NEXT: addi a0, a0, 16 +; CHECK-RV32-FV-NEXT: vl1r.v v23, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV32-FV-NEXT: csrr a0, vlenb +; CHECK-RV32-FV-NEXT: slli a1, a0, 3 +; CHECK-RV32-FV-NEXT: sub a0, a1, a0 +; CHECK-RV32-FV-NEXT: add a0, sp, a0 +; CHECK-RV32-FV-NEXT: addi a0, a0, 16 +; CHECK-RV32-FV-NEXT: vl1r.v v24, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV32-FV-NEXT: csrr a0, vlenb +; CHECK-RV32-FV-NEXT: slli a0, a0, 1 +; CHECK-RV32-FV-NEXT: mv a1, a0 +; CHECK-RV32-FV-NEXT: slli a0, a0, 1 +; CHECK-RV32-FV-NEXT: add a0, a0, a1 +; CHECK-RV32-FV-NEXT: add a0, sp, a0 +; CHECK-RV32-FV-NEXT: addi a0, a0, 16 +; CHECK-RV32-FV-NEXT: vl1r.v v25, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV32-FV-NEXT: csrr a0, vlenb +; CHECK-RV32-FV-NEXT: slli a1, a0, 2 +; CHECK-RV32-FV-NEXT: add a0, a1, a0 +; CHECK-RV32-FV-NEXT: add a0, sp, a0 +; CHECK-RV32-FV-NEXT: addi a0, a0, 16 +; CHECK-RV32-FV-NEXT: vl1r.v v26, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV32-FV-NEXT: csrr a0, vlenb +; CHECK-RV32-FV-NEXT: slli a0, a0, 2 +; CHECK-RV32-FV-NEXT: add a0, sp, a0 +; CHECK-RV32-FV-NEXT: addi a0, a0, 16 +; CHECK-RV32-FV-NEXT: vl1r.v v27, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV32-FV-NEXT: csrr a0, vlenb +; CHECK-RV32-FV-NEXT: slli a1, a0, 1 +; CHECK-RV32-FV-NEXT: add a0, a1, a0 +; CHECK-RV32-FV-NEXT: add a0, sp, a0 +; CHECK-RV32-FV-NEXT: addi a0, a0, 16 +; CHECK-RV32-FV-NEXT: vl1r.v v28, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV32-FV-NEXT: csrr a0, vlenb +; CHECK-RV32-FV-NEXT: slli a0, a0, 1 +; CHECK-RV32-FV-NEXT: add a0, sp, a0 +; CHECK-RV32-FV-NEXT: addi a0, a0, 16 +; CHECK-RV32-FV-NEXT: vl1r.v v29, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV32-FV-NEXT: csrr a0, vlenb +; CHECK-RV32-FV-NEXT: add a0, sp, a0 +; CHECK-RV32-FV-NEXT: addi a0, a0, 16 +; CHECK-RV32-FV-NEXT: vl1r.v v30, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV32-FV-NEXT: addi a0, sp, 16 +; CHECK-RV32-FV-NEXT: vl1r.v v31, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV32-FV-NEXT: csrr a0, vlenb +; CHECK-RV32-FV-NEXT: slli a0, a0, 5 +; CHECK-RV32-FV-NEXT: add sp, sp, a0 +; CHECK-RV32-FV-NEXT: lw ra, 156(sp) # 4-byte Folded Reload +; CHECK-RV32-FV-NEXT: lw t0, 152(sp) # 4-byte Folded Reload +; CHECK-RV32-FV-NEXT: lw t1, 148(sp) # 4-byte Folded Reload +; CHECK-RV32-FV-NEXT: lw t2, 144(sp) # 4-byte Folded Reload +; CHECK-RV32-FV-NEXT: lw a0, 140(sp) # 4-byte Folded Reload +; CHECK-RV32-FV-NEXT: lw a1, 136(sp) # 4-byte Folded Reload +; CHECK-RV32-FV-NEXT: lw a2, 132(sp) # 4-byte Folded Reload +; CHECK-RV32-FV-NEXT: lw a3, 128(sp) # 4-byte Folded Reload +; CHECK-RV32-FV-NEXT: lw a4, 124(sp) # 4-byte Folded Reload +; CHECK-RV32-FV-NEXT: lw a5, 120(sp) # 4-byte Folded Reload +; CHECK-RV32-FV-NEXT: lw a6, 116(sp) # 4-byte Folded Reload +; CHECK-RV32-FV-NEXT: lw a7, 112(sp) # 4-byte Folded Reload +; CHECK-RV32-FV-NEXT: lw t3, 108(sp) # 4-byte Folded Reload +; CHECK-RV32-FV-NEXT: lw t4, 104(sp) # 4-byte Folded Reload +; CHECK-RV32-FV-NEXT: lw t5, 100(sp) # 4-byte Folded Reload +; CHECK-RV32-FV-NEXT: lw t6, 96(sp) # 4-byte Folded Reload +; CHECK-RV32-FV-NEXT: flw ft0, 92(sp) # 4-byte Folded Reload +; CHECK-RV32-FV-NEXT: flw ft1, 88(sp) # 4-byte Folded Reload +; CHECK-RV32-FV-NEXT: flw ft2, 84(sp) # 4-byte Folded Reload +; CHECK-RV32-FV-NEXT: flw ft3, 80(sp) # 4-byte Folded Reload +; CHECK-RV32-FV-NEXT: flw ft4, 76(sp) # 4-byte Folded Reload +; CHECK-RV32-FV-NEXT: flw ft5, 72(sp) # 4-byte Folded Reload +; CHECK-RV32-FV-NEXT: flw ft6, 68(sp) # 4-byte Folded Reload +; CHECK-RV32-FV-NEXT: flw ft7, 64(sp) # 4-byte Folded Reload +; CHECK-RV32-FV-NEXT: flw fa0, 60(sp) # 4-byte Folded Reload +; CHECK-RV32-FV-NEXT: flw fa1, 56(sp) # 4-byte Folded Reload +; CHECK-RV32-FV-NEXT: flw fa2, 52(sp) # 4-byte Folded Reload +; CHECK-RV32-FV-NEXT: flw fa3, 48(sp) # 4-byte Folded Reload +; CHECK-RV32-FV-NEXT: flw fa4, 44(sp) # 4-byte Folded Reload +; CHECK-RV32-FV-NEXT: flw fa5, 40(sp) # 4-byte Folded Reload +; CHECK-RV32-FV-NEXT: flw fa6, 36(sp) # 4-byte Folded Reload +; CHECK-RV32-FV-NEXT: flw fa7, 32(sp) # 4-byte Folded Reload +; CHECK-RV32-FV-NEXT: flw ft8, 28(sp) # 4-byte Folded Reload +; CHECK-RV32-FV-NEXT: flw ft9, 24(sp) # 4-byte Folded Reload +; CHECK-RV32-FV-NEXT: flw ft10, 20(sp) # 4-byte Folded Reload +; CHECK-RV32-FV-NEXT: flw ft11, 16(sp) # 4-byte Folded Reload +; CHECK-RV32-FV-NEXT: addi sp, sp, 160 +; CHECK-RV32-FV-NEXT: mret +; +; CHECK-RV32-FDV-LABEL: foo_with_call: +; CHECK-RV32-FDV: # %bb.0: +; CHECK-RV32-FDV-NEXT: addi sp, sp, -240 +; CHECK-RV32-FDV-NEXT: sw ra, 236(sp) # 4-byte Folded Spill +; CHECK-RV32-FDV-NEXT: sw t0, 232(sp) # 4-byte Folded Spill +; CHECK-RV32-FDV-NEXT: sw t1, 228(sp) # 4-byte Folded Spill +; CHECK-RV32-FDV-NEXT: sw t2, 224(sp) # 4-byte Folded Spill +; CHECK-RV32-FDV-NEXT: sw a0, 220(sp) # 4-byte Folded Spill +; CHECK-RV32-FDV-NEXT: sw a1, 216(sp) # 4-byte Folded Spill +; CHECK-RV32-FDV-NEXT: sw a2, 212(sp) # 4-byte Folded Spill +; CHECK-RV32-FDV-NEXT: sw a3, 208(sp) # 4-byte Folded Spill +; CHECK-RV32-FDV-NEXT: sw a4, 204(sp) # 4-byte Folded Spill +; CHECK-RV32-FDV-NEXT: sw a5, 200(sp) # 4-byte Folded Spill +; CHECK-RV32-FDV-NEXT: sw a6, 196(sp) # 4-byte Folded Spill +; CHECK-RV32-FDV-NEXT: sw a7, 192(sp) # 4-byte Folded Spill +; CHECK-RV32-FDV-NEXT: sw t3, 188(sp) # 4-byte Folded Spill +; CHECK-RV32-FDV-NEXT: sw t4, 184(sp) # 4-byte Folded Spill +; CHECK-RV32-FDV-NEXT: sw t5, 180(sp) # 4-byte Folded Spill +; CHECK-RV32-FDV-NEXT: sw t6, 176(sp) # 4-byte Folded Spill +; CHECK-RV32-FDV-NEXT: fsd ft0, 168(sp) # 8-byte Folded Spill +; CHECK-RV32-FDV-NEXT: fsd ft1, 160(sp) # 8-byte Folded Spill +; CHECK-RV32-FDV-NEXT: fsd ft2, 152(sp) # 8-byte Folded Spill +; CHECK-RV32-FDV-NEXT: fsd ft3, 144(sp) # 8-byte Folded Spill +; CHECK-RV32-FDV-NEXT: fsd ft4, 136(sp) # 8-byte Folded Spill +; CHECK-RV32-FDV-NEXT: fsd ft5, 128(sp) # 8-byte Folded Spill +; CHECK-RV32-FDV-NEXT: fsd ft6, 120(sp) # 8-byte Folded Spill +; CHECK-RV32-FDV-NEXT: fsd ft7, 112(sp) # 8-byte Folded Spill +; CHECK-RV32-FDV-NEXT: fsd fa0, 104(sp) # 8-byte Folded Spill +; CHECK-RV32-FDV-NEXT: fsd fa1, 96(sp) # 8-byte Folded Spill +; CHECK-RV32-FDV-NEXT: fsd fa2, 88(sp) # 8-byte Folded Spill +; CHECK-RV32-FDV-NEXT: fsd fa3, 80(sp) # 8-byte Folded Spill +; CHECK-RV32-FDV-NEXT: fsd fa4, 72(sp) # 8-byte Folded Spill +; CHECK-RV32-FDV-NEXT: fsd fa5, 64(sp) # 8-byte Folded Spill +; CHECK-RV32-FDV-NEXT: fsd fa6, 56(sp) # 8-byte Folded Spill +; CHECK-RV32-FDV-NEXT: fsd fa7, 48(sp) # 8-byte Folded Spill +; CHECK-RV32-FDV-NEXT: fsd ft8, 40(sp) # 8-byte Folded Spill +; CHECK-RV32-FDV-NEXT: fsd ft9, 32(sp) # 8-byte Folded Spill +; CHECK-RV32-FDV-NEXT: fsd ft10, 24(sp) # 8-byte Folded Spill +; CHECK-RV32-FDV-NEXT: fsd ft11, 16(sp) # 8-byte Folded Spill +; CHECK-RV32-FDV-NEXT: csrr a0, vlenb +; CHECK-RV32-FDV-NEXT: slli a0, a0, 5 +; CHECK-RV32-FDV-NEXT: sub sp, sp, a0 +; CHECK-RV32-FDV-NEXT: csrr a0, vlenb +; CHECK-RV32-FDV-NEXT: slli a1, a0, 5 +; CHECK-RV32-FDV-NEXT: sub a0, a1, a0 +; CHECK-RV32-FDV-NEXT: add a0, sp, a0 +; CHECK-RV32-FDV-NEXT: addi a0, a0, 16 +; CHECK-RV32-FDV-NEXT: vs1r.v v0, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV32-FDV-NEXT: csrr a0, vlenb +; CHECK-RV32-FDV-NEXT: slli a0, a0, 1 +; CHECK-RV32-FDV-NEXT: mv a1, a0 +; CHECK-RV32-FDV-NEXT: slli a0, a0, 1 +; CHECK-RV32-FDV-NEXT: add a1, a1, a0 +; CHECK-RV32-FDV-NEXT: slli a0, a0, 1 +; CHECK-RV32-FDV-NEXT: add a1, a1, a0 +; CHECK-RV32-FDV-NEXT: slli a0, a0, 1 +; CHECK-RV32-FDV-NEXT: add a0, a0, a1 +; CHECK-RV32-FDV-NEXT: add a0, sp, a0 +; CHECK-RV32-FDV-NEXT: addi a0, a0, 16 +; CHECK-RV32-FDV-NEXT: vs1r.v v1, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV32-FDV-NEXT: csrr a0, vlenb +; CHECK-RV32-FDV-NEXT: mv a1, a0 +; CHECK-RV32-FDV-NEXT: slli a0, a0, 2 +; CHECK-RV32-FDV-NEXT: add a1, a1, a0 +; CHECK-RV32-FDV-NEXT: slli a0, a0, 1 +; CHECK-RV32-FDV-NEXT: add a1, a1, a0 +; CHECK-RV32-FDV-NEXT: slli a0, a0, 1 +; CHECK-RV32-FDV-NEXT: add a0, a0, a1 +; CHECK-RV32-FDV-NEXT: add a0, sp, a0 +; CHECK-RV32-FDV-NEXT: addi a0, a0, 16 +; CHECK-RV32-FDV-NEXT: vs1r.v v2, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV32-FDV-NEXT: csrr a0, vlenb +; CHECK-RV32-FDV-NEXT: slli a0, a0, 2 +; CHECK-RV32-FDV-NEXT: mv a1, a0 +; CHECK-RV32-FDV-NEXT: slli a0, a0, 1 +; CHECK-RV32-FDV-NEXT: add a1, a1, a0 +; CHECK-RV32-FDV-NEXT: slli a0, a0, 1 +; CHECK-RV32-FDV-NEXT: add a0, a0, a1 +; CHECK-RV32-FDV-NEXT: add a0, sp, a0 +; CHECK-RV32-FDV-NEXT: addi a0, a0, 16 +; CHECK-RV32-FDV-NEXT: vs1r.v v3, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV32-FDV-NEXT: csrr a0, vlenb +; CHECK-RV32-FDV-NEXT: mv a1, a0 +; CHECK-RV32-FDV-NEXT: slli a0, a0, 1 +; CHECK-RV32-FDV-NEXT: add a1, a1, a0 +; CHECK-RV32-FDV-NEXT: slli a0, a0, 2 +; CHECK-RV32-FDV-NEXT: add a1, a1, a0 +; CHECK-RV32-FDV-NEXT: slli a0, a0, 1 +; CHECK-RV32-FDV-NEXT: add a0, a0, a1 +; CHECK-RV32-FDV-NEXT: add a0, sp, a0 +; CHECK-RV32-FDV-NEXT: addi a0, a0, 16 +; CHECK-RV32-FDV-NEXT: vs1r.v v4, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV32-FDV-NEXT: csrr a0, vlenb +; CHECK-RV32-FDV-NEXT: slli a0, a0, 1 +; CHECK-RV32-FDV-NEXT: mv a1, a0 +; CHECK-RV32-FDV-NEXT: slli a0, a0, 2 +; CHECK-RV32-FDV-NEXT: add a1, a1, a0 +; CHECK-RV32-FDV-NEXT: slli a0, a0, 1 +; CHECK-RV32-FDV-NEXT: add a0, a0, a1 +; CHECK-RV32-FDV-NEXT: add a0, sp, a0 +; CHECK-RV32-FDV-NEXT: addi a0, a0, 16 +; CHECK-RV32-FDV-NEXT: vs1r.v v5, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV32-FDV-NEXT: csrr a0, vlenb +; CHECK-RV32-FDV-NEXT: mv a1, a0 +; CHECK-RV32-FDV-NEXT: slli a0, a0, 3 +; CHECK-RV32-FDV-NEXT: add a1, a1, a0 +; CHECK-RV32-FDV-NEXT: slli a0, a0, 1 +; CHECK-RV32-FDV-NEXT: add a0, a0, a1 +; CHECK-RV32-FDV-NEXT: add a0, sp, a0 +; CHECK-RV32-FDV-NEXT: addi a0, a0, 16 +; CHECK-RV32-FDV-NEXT: vs1r.v v6, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV32-FDV-NEXT: csrr a0, vlenb +; CHECK-RV32-FDV-NEXT: slli a0, a0, 3 +; CHECK-RV32-FDV-NEXT: mv a1, a0 +; CHECK-RV32-FDV-NEXT: slli a0, a0, 1 +; CHECK-RV32-FDV-NEXT: add a0, a0, a1 +; CHECK-RV32-FDV-NEXT: add a0, sp, a0 +; CHECK-RV32-FDV-NEXT: addi a0, a0, 16 +; CHECK-RV32-FDV-NEXT: vs1r.v v7, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV32-FDV-NEXT: csrr a0, vlenb +; CHECK-RV32-FDV-NEXT: mv a1, a0 +; CHECK-RV32-FDV-NEXT: slli a0, a0, 1 +; CHECK-RV32-FDV-NEXT: add a1, a1, a0 +; CHECK-RV32-FDV-NEXT: slli a0, a0, 1 +; CHECK-RV32-FDV-NEXT: add a1, a1, a0 +; CHECK-RV32-FDV-NEXT: slli a0, a0, 2 +; CHECK-RV32-FDV-NEXT: add a0, a0, a1 +; CHECK-RV32-FDV-NEXT: add a0, sp, a0 +; CHECK-RV32-FDV-NEXT: addi a0, a0, 16 +; CHECK-RV32-FDV-NEXT: vs1r.v v8, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV32-FDV-NEXT: csrr a0, vlenb +; CHECK-RV32-FDV-NEXT: slli a0, a0, 1 +; CHECK-RV32-FDV-NEXT: mv a1, a0 +; CHECK-RV32-FDV-NEXT: slli a0, a0, 1 +; CHECK-RV32-FDV-NEXT: add a1, a1, a0 +; CHECK-RV32-FDV-NEXT: slli a0, a0, 2 +; CHECK-RV32-FDV-NEXT: add a0, a0, a1 +; CHECK-RV32-FDV-NEXT: add a0, sp, a0 +; CHECK-RV32-FDV-NEXT: addi a0, a0, 16 +; CHECK-RV32-FDV-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV32-FDV-NEXT: csrr a0, vlenb +; CHECK-RV32-FDV-NEXT: mv a1, a0 +; CHECK-RV32-FDV-NEXT: slli a0, a0, 2 +; CHECK-RV32-FDV-NEXT: add a1, a1, a0 +; CHECK-RV32-FDV-NEXT: slli a0, a0, 2 +; CHECK-RV32-FDV-NEXT: add a0, a0, a1 +; CHECK-RV32-FDV-NEXT: add a0, sp, a0 +; CHECK-RV32-FDV-NEXT: addi a0, a0, 16 +; CHECK-RV32-FDV-NEXT: vs1r.v v10, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV32-FDV-NEXT: csrr a0, vlenb +; CHECK-RV32-FDV-NEXT: slli a0, a0, 2 +; CHECK-RV32-FDV-NEXT: mv a1, a0 +; CHECK-RV32-FDV-NEXT: slli a0, a0, 2 +; CHECK-RV32-FDV-NEXT: add a0, a0, a1 +; CHECK-RV32-FDV-NEXT: add a0, sp, a0 +; CHECK-RV32-FDV-NEXT: addi a0, a0, 16 +; CHECK-RV32-FDV-NEXT: vs1r.v v11, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV32-FDV-NEXT: csrr a0, vlenb +; CHECK-RV32-FDV-NEXT: mv a1, a0 +; CHECK-RV32-FDV-NEXT: slli a0, a0, 1 +; CHECK-RV32-FDV-NEXT: add a1, a1, a0 +; CHECK-RV32-FDV-NEXT: slli a0, a0, 3 +; CHECK-RV32-FDV-NEXT: add a0, a0, a1 +; CHECK-RV32-FDV-NEXT: add a0, sp, a0 +; CHECK-RV32-FDV-NEXT: addi a0, a0, 16 +; CHECK-RV32-FDV-NEXT: vs1r.v v12, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV32-FDV-NEXT: csrr a0, vlenb +; CHECK-RV32-FDV-NEXT: slli a0, a0, 1 +; CHECK-RV32-FDV-NEXT: mv a1, a0 +; CHECK-RV32-FDV-NEXT: slli a0, a0, 3 +; CHECK-RV32-FDV-NEXT: add a0, a0, a1 +; CHECK-RV32-FDV-NEXT: add a0, sp, a0 +; CHECK-RV32-FDV-NEXT: addi a0, a0, 16 +; CHECK-RV32-FDV-NEXT: vs1r.v v13, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV32-FDV-NEXT: csrr a0, vlenb +; CHECK-RV32-FDV-NEXT: slli a1, a0, 4 +; CHECK-RV32-FDV-NEXT: add a0, a1, a0 +; CHECK-RV32-FDV-NEXT: add a0, sp, a0 +; CHECK-RV32-FDV-NEXT: addi a0, a0, 16 +; CHECK-RV32-FDV-NEXT: vs1r.v v14, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV32-FDV-NEXT: csrr a0, vlenb +; CHECK-RV32-FDV-NEXT: slli a0, a0, 4 +; CHECK-RV32-FDV-NEXT: add a0, sp, a0 +; CHECK-RV32-FDV-NEXT: addi a0, a0, 16 +; CHECK-RV32-FDV-NEXT: vs1r.v v15, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV32-FDV-NEXT: csrr a0, vlenb +; CHECK-RV32-FDV-NEXT: slli a1, a0, 4 +; CHECK-RV32-FDV-NEXT: sub a0, a1, a0 +; CHECK-RV32-FDV-NEXT: add a0, sp, a0 +; CHECK-RV32-FDV-NEXT: addi a0, a0, 16 +; CHECK-RV32-FDV-NEXT: vs1r.v v16, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV32-FDV-NEXT: csrr a0, vlenb +; CHECK-RV32-FDV-NEXT: slli a0, a0, 1 +; CHECK-RV32-FDV-NEXT: mv a1, a0 +; CHECK-RV32-FDV-NEXT: slli a0, a0, 1 +; CHECK-RV32-FDV-NEXT: add a1, a1, a0 +; CHECK-RV32-FDV-NEXT: slli a0, a0, 1 +; CHECK-RV32-FDV-NEXT: add a0, a0, a1 +; CHECK-RV32-FDV-NEXT: add a0, sp, a0 +; CHECK-RV32-FDV-NEXT: addi a0, a0, 16 +; CHECK-RV32-FDV-NEXT: vs1r.v v17, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV32-FDV-NEXT: csrr a0, vlenb +; CHECK-RV32-FDV-NEXT: mv a1, a0 +; CHECK-RV32-FDV-NEXT: slli a0, a0, 2 +; CHECK-RV32-FDV-NEXT: add a1, a1, a0 +; CHECK-RV32-FDV-NEXT: slli a0, a0, 1 +; CHECK-RV32-FDV-NEXT: add a0, a0, a1 +; CHECK-RV32-FDV-NEXT: add a0, sp, a0 +; CHECK-RV32-FDV-NEXT: addi a0, a0, 16 +; CHECK-RV32-FDV-NEXT: vs1r.v v18, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV32-FDV-NEXT: csrr a0, vlenb +; CHECK-RV32-FDV-NEXT: slli a0, a0, 2 +; CHECK-RV32-FDV-NEXT: mv a1, a0 +; CHECK-RV32-FDV-NEXT: slli a0, a0, 1 +; CHECK-RV32-FDV-NEXT: add a0, a0, a1 +; CHECK-RV32-FDV-NEXT: add a0, sp, a0 +; CHECK-RV32-FDV-NEXT: addi a0, a0, 16 +; CHECK-RV32-FDV-NEXT: vs1r.v v19, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV32-FDV-NEXT: csrr a0, vlenb +; CHECK-RV32-FDV-NEXT: mv a1, a0 +; CHECK-RV32-FDV-NEXT: slli a0, a0, 1 +; CHECK-RV32-FDV-NEXT: add a1, a1, a0 +; CHECK-RV32-FDV-NEXT: slli a0, a0, 2 +; CHECK-RV32-FDV-NEXT: add a0, a0, a1 +; CHECK-RV32-FDV-NEXT: add a0, sp, a0 +; CHECK-RV32-FDV-NEXT: addi a0, a0, 16 +; CHECK-RV32-FDV-NEXT: vs1r.v v20, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV32-FDV-NEXT: csrr a0, vlenb +; CHECK-RV32-FDV-NEXT: slli a0, a0, 1 +; CHECK-RV32-FDV-NEXT: mv a1, a0 +; CHECK-RV32-FDV-NEXT: slli a0, a0, 2 +; CHECK-RV32-FDV-NEXT: add a0, a0, a1 +; CHECK-RV32-FDV-NEXT: add a0, sp, a0 +; CHECK-RV32-FDV-NEXT: addi a0, a0, 16 +; CHECK-RV32-FDV-NEXT: vs1r.v v21, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV32-FDV-NEXT: csrr a0, vlenb +; CHECK-RV32-FDV-NEXT: slli a1, a0, 3 +; CHECK-RV32-FDV-NEXT: add a0, a1, a0 +; CHECK-RV32-FDV-NEXT: add a0, sp, a0 +; CHECK-RV32-FDV-NEXT: addi a0, a0, 16 +; CHECK-RV32-FDV-NEXT: vs1r.v v22, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV32-FDV-NEXT: csrr a0, vlenb +; CHECK-RV32-FDV-NEXT: slli a0, a0, 3 +; CHECK-RV32-FDV-NEXT: add a0, sp, a0 +; CHECK-RV32-FDV-NEXT: addi a0, a0, 16 +; CHECK-RV32-FDV-NEXT: vs1r.v v23, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV32-FDV-NEXT: csrr a0, vlenb +; CHECK-RV32-FDV-NEXT: slli a1, a0, 3 +; CHECK-RV32-FDV-NEXT: sub a0, a1, a0 +; CHECK-RV32-FDV-NEXT: add a0, sp, a0 +; CHECK-RV32-FDV-NEXT: addi a0, a0, 16 +; CHECK-RV32-FDV-NEXT: vs1r.v v24, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV32-FDV-NEXT: csrr a0, vlenb +; CHECK-RV32-FDV-NEXT: slli a0, a0, 1 +; CHECK-RV32-FDV-NEXT: mv a1, a0 +; CHECK-RV32-FDV-NEXT: slli a0, a0, 1 +; CHECK-RV32-FDV-NEXT: add a0, a0, a1 +; CHECK-RV32-FDV-NEXT: add a0, sp, a0 +; CHECK-RV32-FDV-NEXT: addi a0, a0, 16 +; CHECK-RV32-FDV-NEXT: vs1r.v v25, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV32-FDV-NEXT: csrr a0, vlenb +; CHECK-RV32-FDV-NEXT: slli a1, a0, 2 +; CHECK-RV32-FDV-NEXT: add a0, a1, a0 +; CHECK-RV32-FDV-NEXT: add a0, sp, a0 +; CHECK-RV32-FDV-NEXT: addi a0, a0, 16 +; CHECK-RV32-FDV-NEXT: vs1r.v v26, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV32-FDV-NEXT: csrr a0, vlenb +; CHECK-RV32-FDV-NEXT: slli a0, a0, 2 +; CHECK-RV32-FDV-NEXT: add a0, sp, a0 +; CHECK-RV32-FDV-NEXT: addi a0, a0, 16 +; CHECK-RV32-FDV-NEXT: vs1r.v v27, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV32-FDV-NEXT: csrr a0, vlenb +; CHECK-RV32-FDV-NEXT: slli a1, a0, 1 +; CHECK-RV32-FDV-NEXT: add a0, a1, a0 +; CHECK-RV32-FDV-NEXT: add a0, sp, a0 +; CHECK-RV32-FDV-NEXT: addi a0, a0, 16 +; CHECK-RV32-FDV-NEXT: vs1r.v v28, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV32-FDV-NEXT: csrr a0, vlenb +; CHECK-RV32-FDV-NEXT: slli a0, a0, 1 +; CHECK-RV32-FDV-NEXT: add a0, sp, a0 +; CHECK-RV32-FDV-NEXT: addi a0, a0, 16 +; CHECK-RV32-FDV-NEXT: vs1r.v v29, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV32-FDV-NEXT: csrr a0, vlenb +; CHECK-RV32-FDV-NEXT: add a0, sp, a0 +; CHECK-RV32-FDV-NEXT: addi a0, a0, 16 +; CHECK-RV32-FDV-NEXT: vs1r.v v30, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV32-FDV-NEXT: addi a0, sp, 16 +; CHECK-RV32-FDV-NEXT: vs1r.v v31, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV32-FDV-NEXT: call otherfoo +; CHECK-RV32-FDV-NEXT: csrr a0, vlenb +; CHECK-RV32-FDV-NEXT: slli a1, a0, 5 +; CHECK-RV32-FDV-NEXT: sub a0, a1, a0 +; CHECK-RV32-FDV-NEXT: add a0, sp, a0 +; CHECK-RV32-FDV-NEXT: addi a0, a0, 16 +; CHECK-RV32-FDV-NEXT: vl1r.v v0, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV32-FDV-NEXT: csrr a0, vlenb +; CHECK-RV32-FDV-NEXT: slli a0, a0, 1 +; CHECK-RV32-FDV-NEXT: mv a1, a0 +; CHECK-RV32-FDV-NEXT: slli a0, a0, 1 +; CHECK-RV32-FDV-NEXT: add a1, a1, a0 +; CHECK-RV32-FDV-NEXT: slli a0, a0, 1 +; CHECK-RV32-FDV-NEXT: add a1, a1, a0 +; CHECK-RV32-FDV-NEXT: slli a0, a0, 1 +; CHECK-RV32-FDV-NEXT: add a0, a0, a1 +; CHECK-RV32-FDV-NEXT: add a0, sp, a0 +; CHECK-RV32-FDV-NEXT: addi a0, a0, 16 +; CHECK-RV32-FDV-NEXT: vl1r.v v1, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV32-FDV-NEXT: csrr a0, vlenb +; CHECK-RV32-FDV-NEXT: mv a1, a0 +; CHECK-RV32-FDV-NEXT: slli a0, a0, 2 +; CHECK-RV32-FDV-NEXT: add a1, a1, a0 +; CHECK-RV32-FDV-NEXT: slli a0, a0, 1 +; CHECK-RV32-FDV-NEXT: add a1, a1, a0 +; CHECK-RV32-FDV-NEXT: slli a0, a0, 1 +; CHECK-RV32-FDV-NEXT: add a0, a0, a1 +; CHECK-RV32-FDV-NEXT: add a0, sp, a0 +; CHECK-RV32-FDV-NEXT: addi a0, a0, 16 +; CHECK-RV32-FDV-NEXT: vl1r.v v2, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV32-FDV-NEXT: csrr a0, vlenb +; CHECK-RV32-FDV-NEXT: slli a0, a0, 2 +; CHECK-RV32-FDV-NEXT: mv a1, a0 +; CHECK-RV32-FDV-NEXT: slli a0, a0, 1 +; CHECK-RV32-FDV-NEXT: add a1, a1, a0 +; CHECK-RV32-FDV-NEXT: slli a0, a0, 1 +; CHECK-RV32-FDV-NEXT: add a0, a0, a1 +; CHECK-RV32-FDV-NEXT: add a0, sp, a0 +; CHECK-RV32-FDV-NEXT: addi a0, a0, 16 +; CHECK-RV32-FDV-NEXT: vl1r.v v3, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV32-FDV-NEXT: csrr a0, vlenb +; CHECK-RV32-FDV-NEXT: mv a1, a0 +; CHECK-RV32-FDV-NEXT: slli a0, a0, 1 +; CHECK-RV32-FDV-NEXT: add a1, a1, a0 +; CHECK-RV32-FDV-NEXT: slli a0, a0, 2 +; CHECK-RV32-FDV-NEXT: add a1, a1, a0 +; CHECK-RV32-FDV-NEXT: slli a0, a0, 1 +; CHECK-RV32-FDV-NEXT: add a0, a0, a1 +; CHECK-RV32-FDV-NEXT: add a0, sp, a0 +; CHECK-RV32-FDV-NEXT: addi a0, a0, 16 +; CHECK-RV32-FDV-NEXT: vl1r.v v4, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV32-FDV-NEXT: csrr a0, vlenb +; CHECK-RV32-FDV-NEXT: slli a0, a0, 1 +; CHECK-RV32-FDV-NEXT: mv a1, a0 +; CHECK-RV32-FDV-NEXT: slli a0, a0, 2 +; CHECK-RV32-FDV-NEXT: add a1, a1, a0 +; CHECK-RV32-FDV-NEXT: slli a0, a0, 1 +; CHECK-RV32-FDV-NEXT: add a0, a0, a1 +; CHECK-RV32-FDV-NEXT: add a0, sp, a0 +; CHECK-RV32-FDV-NEXT: addi a0, a0, 16 +; CHECK-RV32-FDV-NEXT: vl1r.v v5, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV32-FDV-NEXT: csrr a0, vlenb +; CHECK-RV32-FDV-NEXT: mv a1, a0 +; CHECK-RV32-FDV-NEXT: slli a0, a0, 3 +; CHECK-RV32-FDV-NEXT: add a1, a1, a0 +; CHECK-RV32-FDV-NEXT: slli a0, a0, 1 +; CHECK-RV32-FDV-NEXT: add a0, a0, a1 +; CHECK-RV32-FDV-NEXT: add a0, sp, a0 +; CHECK-RV32-FDV-NEXT: addi a0, a0, 16 +; CHECK-RV32-FDV-NEXT: vl1r.v v6, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV32-FDV-NEXT: csrr a0, vlenb +; CHECK-RV32-FDV-NEXT: slli a0, a0, 3 +; CHECK-RV32-FDV-NEXT: mv a1, a0 +; CHECK-RV32-FDV-NEXT: slli a0, a0, 1 +; CHECK-RV32-FDV-NEXT: add a0, a0, a1 +; CHECK-RV32-FDV-NEXT: add a0, sp, a0 +; CHECK-RV32-FDV-NEXT: addi a0, a0, 16 +; CHECK-RV32-FDV-NEXT: vl1r.v v7, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV32-FDV-NEXT: csrr a0, vlenb +; CHECK-RV32-FDV-NEXT: mv a1, a0 +; CHECK-RV32-FDV-NEXT: slli a0, a0, 1 +; CHECK-RV32-FDV-NEXT: add a1, a1, a0 +; CHECK-RV32-FDV-NEXT: slli a0, a0, 1 +; CHECK-RV32-FDV-NEXT: add a1, a1, a0 +; CHECK-RV32-FDV-NEXT: slli a0, a0, 2 +; CHECK-RV32-FDV-NEXT: add a0, a0, a1 +; CHECK-RV32-FDV-NEXT: add a0, sp, a0 +; CHECK-RV32-FDV-NEXT: addi a0, a0, 16 +; CHECK-RV32-FDV-NEXT: vl1r.v v8, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV32-FDV-NEXT: csrr a0, vlenb +; CHECK-RV32-FDV-NEXT: slli a0, a0, 1 +; CHECK-RV32-FDV-NEXT: mv a1, a0 +; CHECK-RV32-FDV-NEXT: slli a0, a0, 1 +; CHECK-RV32-FDV-NEXT: add a1, a1, a0 +; CHECK-RV32-FDV-NEXT: slli a0, a0, 2 +; CHECK-RV32-FDV-NEXT: add a0, a0, a1 +; CHECK-RV32-FDV-NEXT: add a0, sp, a0 +; CHECK-RV32-FDV-NEXT: addi a0, a0, 16 +; CHECK-RV32-FDV-NEXT: vl1r.v v9, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV32-FDV-NEXT: csrr a0, vlenb +; CHECK-RV32-FDV-NEXT: mv a1, a0 +; CHECK-RV32-FDV-NEXT: slli a0, a0, 2 +; CHECK-RV32-FDV-NEXT: add a1, a1, a0 +; CHECK-RV32-FDV-NEXT: slli a0, a0, 2 +; CHECK-RV32-FDV-NEXT: add a0, a0, a1 +; CHECK-RV32-FDV-NEXT: add a0, sp, a0 +; CHECK-RV32-FDV-NEXT: addi a0, a0, 16 +; CHECK-RV32-FDV-NEXT: vl1r.v v10, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV32-FDV-NEXT: csrr a0, vlenb +; CHECK-RV32-FDV-NEXT: slli a0, a0, 2 +; CHECK-RV32-FDV-NEXT: mv a1, a0 +; CHECK-RV32-FDV-NEXT: slli a0, a0, 2 +; CHECK-RV32-FDV-NEXT: add a0, a0, a1 +; CHECK-RV32-FDV-NEXT: add a0, sp, a0 +; CHECK-RV32-FDV-NEXT: addi a0, a0, 16 +; CHECK-RV32-FDV-NEXT: vl1r.v v11, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV32-FDV-NEXT: csrr a0, vlenb +; CHECK-RV32-FDV-NEXT: mv a1, a0 +; CHECK-RV32-FDV-NEXT: slli a0, a0, 1 +; CHECK-RV32-FDV-NEXT: add a1, a1, a0 +; CHECK-RV32-FDV-NEXT: slli a0, a0, 3 +; CHECK-RV32-FDV-NEXT: add a0, a0, a1 +; CHECK-RV32-FDV-NEXT: add a0, sp, a0 +; CHECK-RV32-FDV-NEXT: addi a0, a0, 16 +; CHECK-RV32-FDV-NEXT: vl1r.v v12, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV32-FDV-NEXT: csrr a0, vlenb +; CHECK-RV32-FDV-NEXT: slli a0, a0, 1 +; CHECK-RV32-FDV-NEXT: mv a1, a0 +; CHECK-RV32-FDV-NEXT: slli a0, a0, 3 +; CHECK-RV32-FDV-NEXT: add a0, a0, a1 +; CHECK-RV32-FDV-NEXT: add a0, sp, a0 +; CHECK-RV32-FDV-NEXT: addi a0, a0, 16 +; CHECK-RV32-FDV-NEXT: vl1r.v v13, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV32-FDV-NEXT: csrr a0, vlenb +; CHECK-RV32-FDV-NEXT: slli a1, a0, 4 +; CHECK-RV32-FDV-NEXT: add a0, a1, a0 +; CHECK-RV32-FDV-NEXT: add a0, sp, a0 +; CHECK-RV32-FDV-NEXT: addi a0, a0, 16 +; CHECK-RV32-FDV-NEXT: vl1r.v v14, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV32-FDV-NEXT: csrr a0, vlenb +; CHECK-RV32-FDV-NEXT: slli a0, a0, 4 +; CHECK-RV32-FDV-NEXT: add a0, sp, a0 +; CHECK-RV32-FDV-NEXT: addi a0, a0, 16 +; CHECK-RV32-FDV-NEXT: vl1r.v v15, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV32-FDV-NEXT: csrr a0, vlenb +; CHECK-RV32-FDV-NEXT: slli a1, a0, 4 +; CHECK-RV32-FDV-NEXT: sub a0, a1, a0 +; CHECK-RV32-FDV-NEXT: add a0, sp, a0 +; CHECK-RV32-FDV-NEXT: addi a0, a0, 16 +; CHECK-RV32-FDV-NEXT: vl1r.v v16, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV32-FDV-NEXT: csrr a0, vlenb +; CHECK-RV32-FDV-NEXT: slli a0, a0, 1 +; CHECK-RV32-FDV-NEXT: mv a1, a0 +; CHECK-RV32-FDV-NEXT: slli a0, a0, 1 +; CHECK-RV32-FDV-NEXT: add a1, a1, a0 +; CHECK-RV32-FDV-NEXT: slli a0, a0, 1 +; CHECK-RV32-FDV-NEXT: add a0, a0, a1 +; CHECK-RV32-FDV-NEXT: add a0, sp, a0 +; CHECK-RV32-FDV-NEXT: addi a0, a0, 16 +; CHECK-RV32-FDV-NEXT: vl1r.v v17, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV32-FDV-NEXT: csrr a0, vlenb +; CHECK-RV32-FDV-NEXT: mv a1, a0 +; CHECK-RV32-FDV-NEXT: slli a0, a0, 2 +; CHECK-RV32-FDV-NEXT: add a1, a1, a0 +; CHECK-RV32-FDV-NEXT: slli a0, a0, 1 +; CHECK-RV32-FDV-NEXT: add a0, a0, a1 +; CHECK-RV32-FDV-NEXT: add a0, sp, a0 +; CHECK-RV32-FDV-NEXT: addi a0, a0, 16 +; CHECK-RV32-FDV-NEXT: vl1r.v v18, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV32-FDV-NEXT: csrr a0, vlenb +; CHECK-RV32-FDV-NEXT: slli a0, a0, 2 +; CHECK-RV32-FDV-NEXT: mv a1, a0 +; CHECK-RV32-FDV-NEXT: slli a0, a0, 1 +; CHECK-RV32-FDV-NEXT: add a0, a0, a1 +; CHECK-RV32-FDV-NEXT: add a0, sp, a0 +; CHECK-RV32-FDV-NEXT: addi a0, a0, 16 +; CHECK-RV32-FDV-NEXT: vl1r.v v19, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV32-FDV-NEXT: csrr a0, vlenb +; CHECK-RV32-FDV-NEXT: mv a1, a0 +; CHECK-RV32-FDV-NEXT: slli a0, a0, 1 +; CHECK-RV32-FDV-NEXT: add a1, a1, a0 +; CHECK-RV32-FDV-NEXT: slli a0, a0, 2 +; CHECK-RV32-FDV-NEXT: add a0, a0, a1 +; CHECK-RV32-FDV-NEXT: add a0, sp, a0 +; CHECK-RV32-FDV-NEXT: addi a0, a0, 16 +; CHECK-RV32-FDV-NEXT: vl1r.v v20, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV32-FDV-NEXT: csrr a0, vlenb +; CHECK-RV32-FDV-NEXT: slli a0, a0, 1 +; CHECK-RV32-FDV-NEXT: mv a1, a0 +; CHECK-RV32-FDV-NEXT: slli a0, a0, 2 +; CHECK-RV32-FDV-NEXT: add a0, a0, a1 +; CHECK-RV32-FDV-NEXT: add a0, sp, a0 +; CHECK-RV32-FDV-NEXT: addi a0, a0, 16 +; CHECK-RV32-FDV-NEXT: vl1r.v v21, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV32-FDV-NEXT: csrr a0, vlenb +; CHECK-RV32-FDV-NEXT: slli a1, a0, 3 +; CHECK-RV32-FDV-NEXT: add a0, a1, a0 +; CHECK-RV32-FDV-NEXT: add a0, sp, a0 +; CHECK-RV32-FDV-NEXT: addi a0, a0, 16 +; CHECK-RV32-FDV-NEXT: vl1r.v v22, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV32-FDV-NEXT: csrr a0, vlenb +; CHECK-RV32-FDV-NEXT: slli a0, a0, 3 +; CHECK-RV32-FDV-NEXT: add a0, sp, a0 +; CHECK-RV32-FDV-NEXT: addi a0, a0, 16 +; CHECK-RV32-FDV-NEXT: vl1r.v v23, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV32-FDV-NEXT: csrr a0, vlenb +; CHECK-RV32-FDV-NEXT: slli a1, a0, 3 +; CHECK-RV32-FDV-NEXT: sub a0, a1, a0 +; CHECK-RV32-FDV-NEXT: add a0, sp, a0 +; CHECK-RV32-FDV-NEXT: addi a0, a0, 16 +; CHECK-RV32-FDV-NEXT: vl1r.v v24, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV32-FDV-NEXT: csrr a0, vlenb +; CHECK-RV32-FDV-NEXT: slli a0, a0, 1 +; CHECK-RV32-FDV-NEXT: mv a1, a0 +; CHECK-RV32-FDV-NEXT: slli a0, a0, 1 +; CHECK-RV32-FDV-NEXT: add a0, a0, a1 +; CHECK-RV32-FDV-NEXT: add a0, sp, a0 +; CHECK-RV32-FDV-NEXT: addi a0, a0, 16 +; CHECK-RV32-FDV-NEXT: vl1r.v v25, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV32-FDV-NEXT: csrr a0, vlenb +; CHECK-RV32-FDV-NEXT: slli a1, a0, 2 +; CHECK-RV32-FDV-NEXT: add a0, a1, a0 +; CHECK-RV32-FDV-NEXT: add a0, sp, a0 +; CHECK-RV32-FDV-NEXT: addi a0, a0, 16 +; CHECK-RV32-FDV-NEXT: vl1r.v v26, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV32-FDV-NEXT: csrr a0, vlenb +; CHECK-RV32-FDV-NEXT: slli a0, a0, 2 +; CHECK-RV32-FDV-NEXT: add a0, sp, a0 +; CHECK-RV32-FDV-NEXT: addi a0, a0, 16 +; CHECK-RV32-FDV-NEXT: vl1r.v v27, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV32-FDV-NEXT: csrr a0, vlenb +; CHECK-RV32-FDV-NEXT: slli a1, a0, 1 +; CHECK-RV32-FDV-NEXT: add a0, a1, a0 +; CHECK-RV32-FDV-NEXT: add a0, sp, a0 +; CHECK-RV32-FDV-NEXT: addi a0, a0, 16 +; CHECK-RV32-FDV-NEXT: vl1r.v v28, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV32-FDV-NEXT: csrr a0, vlenb +; CHECK-RV32-FDV-NEXT: slli a0, a0, 1 +; CHECK-RV32-FDV-NEXT: add a0, sp, a0 +; CHECK-RV32-FDV-NEXT: addi a0, a0, 16 +; CHECK-RV32-FDV-NEXT: vl1r.v v29, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV32-FDV-NEXT: csrr a0, vlenb +; CHECK-RV32-FDV-NEXT: add a0, sp, a0 +; CHECK-RV32-FDV-NEXT: addi a0, a0, 16 +; CHECK-RV32-FDV-NEXT: vl1r.v v30, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV32-FDV-NEXT: addi a0, sp, 16 +; CHECK-RV32-FDV-NEXT: vl1r.v v31, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV32-FDV-NEXT: csrr a0, vlenb +; CHECK-RV32-FDV-NEXT: slli a0, a0, 5 +; CHECK-RV32-FDV-NEXT: add sp, sp, a0 +; CHECK-RV32-FDV-NEXT: lw ra, 236(sp) # 4-byte Folded Reload +; CHECK-RV32-FDV-NEXT: lw t0, 232(sp) # 4-byte Folded Reload +; CHECK-RV32-FDV-NEXT: lw t1, 228(sp) # 4-byte Folded Reload +; CHECK-RV32-FDV-NEXT: lw t2, 224(sp) # 4-byte Folded Reload +; CHECK-RV32-FDV-NEXT: lw a0, 220(sp) # 4-byte Folded Reload +; CHECK-RV32-FDV-NEXT: lw a1, 216(sp) # 4-byte Folded Reload +; CHECK-RV32-FDV-NEXT: lw a2, 212(sp) # 4-byte Folded Reload +; CHECK-RV32-FDV-NEXT: lw a3, 208(sp) # 4-byte Folded Reload +; CHECK-RV32-FDV-NEXT: lw a4, 204(sp) # 4-byte Folded Reload +; CHECK-RV32-FDV-NEXT: lw a5, 200(sp) # 4-byte Folded Reload +; CHECK-RV32-FDV-NEXT: lw a6, 196(sp) # 4-byte Folded Reload +; CHECK-RV32-FDV-NEXT: lw a7, 192(sp) # 4-byte Folded Reload +; CHECK-RV32-FDV-NEXT: lw t3, 188(sp) # 4-byte Folded Reload +; CHECK-RV32-FDV-NEXT: lw t4, 184(sp) # 4-byte Folded Reload +; CHECK-RV32-FDV-NEXT: lw t5, 180(sp) # 4-byte Folded Reload +; CHECK-RV32-FDV-NEXT: lw t6, 176(sp) # 4-byte Folded Reload +; CHECK-RV32-FDV-NEXT: fld ft0, 168(sp) # 8-byte Folded Reload +; CHECK-RV32-FDV-NEXT: fld ft1, 160(sp) # 8-byte Folded Reload +; CHECK-RV32-FDV-NEXT: fld ft2, 152(sp) # 8-byte Folded Reload +; CHECK-RV32-FDV-NEXT: fld ft3, 144(sp) # 8-byte Folded Reload +; CHECK-RV32-FDV-NEXT: fld ft4, 136(sp) # 8-byte Folded Reload +; CHECK-RV32-FDV-NEXT: fld ft5, 128(sp) # 8-byte Folded Reload +; CHECK-RV32-FDV-NEXT: fld ft6, 120(sp) # 8-byte Folded Reload +; CHECK-RV32-FDV-NEXT: fld ft7, 112(sp) # 8-byte Folded Reload +; CHECK-RV32-FDV-NEXT: fld fa0, 104(sp) # 8-byte Folded Reload +; CHECK-RV32-FDV-NEXT: fld fa1, 96(sp) # 8-byte Folded Reload +; CHECK-RV32-FDV-NEXT: fld fa2, 88(sp) # 8-byte Folded Reload +; CHECK-RV32-FDV-NEXT: fld fa3, 80(sp) # 8-byte Folded Reload +; CHECK-RV32-FDV-NEXT: fld fa4, 72(sp) # 8-byte Folded Reload +; CHECK-RV32-FDV-NEXT: fld fa5, 64(sp) # 8-byte Folded Reload +; CHECK-RV32-FDV-NEXT: fld fa6, 56(sp) # 8-byte Folded Reload +; CHECK-RV32-FDV-NEXT: fld fa7, 48(sp) # 8-byte Folded Reload +; CHECK-RV32-FDV-NEXT: fld ft8, 40(sp) # 8-byte Folded Reload +; CHECK-RV32-FDV-NEXT: fld ft9, 32(sp) # 8-byte Folded Reload +; CHECK-RV32-FDV-NEXT: fld ft10, 24(sp) # 8-byte Folded Reload +; CHECK-RV32-FDV-NEXT: fld ft11, 16(sp) # 8-byte Folded Reload +; CHECK-RV32-FDV-NEXT: addi sp, sp, 240 +; CHECK-RV32-FDV-NEXT: mret +; ; CHECK-RV64-LABEL: foo_with_call: ; CHECK-RV64: # %bb.0: ; CHECK-RV64-NEXT: addi sp, sp, -128 @@ -1530,6 +3235,1697 @@ define void @foo_with_call() #1 { ; CHECK-RV64E-FD-NEXT: fld ft11, 0(sp) # 8-byte Folded Reload ; CHECK-RV64E-FD-NEXT: addi sp, sp, 336 ; CHECK-RV64E-FD-NEXT: mret +; +; CHECK-RV64-V-LABEL: foo_with_call: +; CHECK-RV64-V: # %bb.0: +; CHECK-RV64-V-NEXT: addi sp, sp, -144 +; CHECK-RV64-V-NEXT: sd ra, 136(sp) # 8-byte Folded Spill +; CHECK-RV64-V-NEXT: sd t0, 128(sp) # 8-byte Folded Spill +; CHECK-RV64-V-NEXT: sd t1, 120(sp) # 8-byte Folded Spill +; CHECK-RV64-V-NEXT: sd t2, 112(sp) # 8-byte Folded Spill +; CHECK-RV64-V-NEXT: sd a0, 104(sp) # 8-byte Folded Spill +; CHECK-RV64-V-NEXT: sd a1, 96(sp) # 8-byte Folded Spill +; CHECK-RV64-V-NEXT: sd a2, 88(sp) # 8-byte Folded Spill +; CHECK-RV64-V-NEXT: sd a3, 80(sp) # 8-byte Folded Spill +; CHECK-RV64-V-NEXT: sd a4, 72(sp) # 8-byte Folded Spill +; CHECK-RV64-V-NEXT: sd a5, 64(sp) # 8-byte Folded Spill +; CHECK-RV64-V-NEXT: sd a6, 56(sp) # 8-byte Folded Spill +; CHECK-RV64-V-NEXT: sd a7, 48(sp) # 8-byte Folded Spill +; CHECK-RV64-V-NEXT: sd t3, 40(sp) # 8-byte Folded Spill +; CHECK-RV64-V-NEXT: sd t4, 32(sp) # 8-byte Folded Spill +; CHECK-RV64-V-NEXT: sd t5, 24(sp) # 8-byte Folded Spill +; CHECK-RV64-V-NEXT: sd t6, 16(sp) # 8-byte Folded Spill +; CHECK-RV64-V-NEXT: csrr a0, vlenb +; CHECK-RV64-V-NEXT: slli a0, a0, 5 +; CHECK-RV64-V-NEXT: sub sp, sp, a0 +; CHECK-RV64-V-NEXT: csrr a0, vlenb +; CHECK-RV64-V-NEXT: slli a1, a0, 5 +; CHECK-RV64-V-NEXT: sub a0, a1, a0 +; CHECK-RV64-V-NEXT: add a0, sp, a0 +; CHECK-RV64-V-NEXT: addi a0, a0, 16 +; CHECK-RV64-V-NEXT: vs1r.v v0, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV64-V-NEXT: csrr a0, vlenb +; CHECK-RV64-V-NEXT: slli a0, a0, 1 +; CHECK-RV64-V-NEXT: mv a1, a0 +; CHECK-RV64-V-NEXT: slli a0, a0, 1 +; CHECK-RV64-V-NEXT: add a1, a1, a0 +; CHECK-RV64-V-NEXT: slli a0, a0, 1 +; CHECK-RV64-V-NEXT: add a1, a1, a0 +; CHECK-RV64-V-NEXT: slli a0, a0, 1 +; CHECK-RV64-V-NEXT: add a0, a0, a1 +; CHECK-RV64-V-NEXT: add a0, sp, a0 +; CHECK-RV64-V-NEXT: addi a0, a0, 16 +; CHECK-RV64-V-NEXT: vs1r.v v1, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV64-V-NEXT: csrr a0, vlenb +; CHECK-RV64-V-NEXT: mv a1, a0 +; CHECK-RV64-V-NEXT: slli a0, a0, 2 +; CHECK-RV64-V-NEXT: add a1, a1, a0 +; CHECK-RV64-V-NEXT: slli a0, a0, 1 +; CHECK-RV64-V-NEXT: add a1, a1, a0 +; CHECK-RV64-V-NEXT: slli a0, a0, 1 +; CHECK-RV64-V-NEXT: add a0, a0, a1 +; CHECK-RV64-V-NEXT: add a0, sp, a0 +; CHECK-RV64-V-NEXT: addi a0, a0, 16 +; CHECK-RV64-V-NEXT: vs1r.v v2, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV64-V-NEXT: csrr a0, vlenb +; CHECK-RV64-V-NEXT: slli a0, a0, 2 +; CHECK-RV64-V-NEXT: mv a1, a0 +; CHECK-RV64-V-NEXT: slli a0, a0, 1 +; CHECK-RV64-V-NEXT: add a1, a1, a0 +; CHECK-RV64-V-NEXT: slli a0, a0, 1 +; CHECK-RV64-V-NEXT: add a0, a0, a1 +; CHECK-RV64-V-NEXT: add a0, sp, a0 +; CHECK-RV64-V-NEXT: addi a0, a0, 16 +; CHECK-RV64-V-NEXT: vs1r.v v3, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV64-V-NEXT: csrr a0, vlenb +; CHECK-RV64-V-NEXT: mv a1, a0 +; CHECK-RV64-V-NEXT: slli a0, a0, 1 +; CHECK-RV64-V-NEXT: add a1, a1, a0 +; CHECK-RV64-V-NEXT: slli a0, a0, 2 +; CHECK-RV64-V-NEXT: add a1, a1, a0 +; CHECK-RV64-V-NEXT: slli a0, a0, 1 +; CHECK-RV64-V-NEXT: add a0, a0, a1 +; CHECK-RV64-V-NEXT: add a0, sp, a0 +; CHECK-RV64-V-NEXT: addi a0, a0, 16 +; CHECK-RV64-V-NEXT: vs1r.v v4, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV64-V-NEXT: csrr a0, vlenb +; CHECK-RV64-V-NEXT: slli a0, a0, 1 +; CHECK-RV64-V-NEXT: mv a1, a0 +; CHECK-RV64-V-NEXT: slli a0, a0, 2 +; CHECK-RV64-V-NEXT: add a1, a1, a0 +; CHECK-RV64-V-NEXT: slli a0, a0, 1 +; CHECK-RV64-V-NEXT: add a0, a0, a1 +; CHECK-RV64-V-NEXT: add a0, sp, a0 +; CHECK-RV64-V-NEXT: addi a0, a0, 16 +; CHECK-RV64-V-NEXT: vs1r.v v5, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV64-V-NEXT: csrr a0, vlenb +; CHECK-RV64-V-NEXT: mv a1, a0 +; CHECK-RV64-V-NEXT: slli a0, a0, 3 +; CHECK-RV64-V-NEXT: add a1, a1, a0 +; CHECK-RV64-V-NEXT: slli a0, a0, 1 +; CHECK-RV64-V-NEXT: add a0, a0, a1 +; CHECK-RV64-V-NEXT: add a0, sp, a0 +; CHECK-RV64-V-NEXT: addi a0, a0, 16 +; CHECK-RV64-V-NEXT: vs1r.v v6, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV64-V-NEXT: csrr a0, vlenb +; CHECK-RV64-V-NEXT: slli a0, a0, 3 +; CHECK-RV64-V-NEXT: mv a1, a0 +; CHECK-RV64-V-NEXT: slli a0, a0, 1 +; CHECK-RV64-V-NEXT: add a0, a0, a1 +; CHECK-RV64-V-NEXT: add a0, sp, a0 +; CHECK-RV64-V-NEXT: addi a0, a0, 16 +; CHECK-RV64-V-NEXT: vs1r.v v7, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV64-V-NEXT: csrr a0, vlenb +; CHECK-RV64-V-NEXT: mv a1, a0 +; CHECK-RV64-V-NEXT: slli a0, a0, 1 +; CHECK-RV64-V-NEXT: add a1, a1, a0 +; CHECK-RV64-V-NEXT: slli a0, a0, 1 +; CHECK-RV64-V-NEXT: add a1, a1, a0 +; CHECK-RV64-V-NEXT: slli a0, a0, 2 +; CHECK-RV64-V-NEXT: add a0, a0, a1 +; CHECK-RV64-V-NEXT: add a0, sp, a0 +; CHECK-RV64-V-NEXT: addi a0, a0, 16 +; CHECK-RV64-V-NEXT: vs1r.v v8, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV64-V-NEXT: csrr a0, vlenb +; CHECK-RV64-V-NEXT: slli a0, a0, 1 +; CHECK-RV64-V-NEXT: mv a1, a0 +; CHECK-RV64-V-NEXT: slli a0, a0, 1 +; CHECK-RV64-V-NEXT: add a1, a1, a0 +; CHECK-RV64-V-NEXT: slli a0, a0, 2 +; CHECK-RV64-V-NEXT: add a0, a0, a1 +; CHECK-RV64-V-NEXT: add a0, sp, a0 +; CHECK-RV64-V-NEXT: addi a0, a0, 16 +; CHECK-RV64-V-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV64-V-NEXT: csrr a0, vlenb +; CHECK-RV64-V-NEXT: mv a1, a0 +; CHECK-RV64-V-NEXT: slli a0, a0, 2 +; CHECK-RV64-V-NEXT: add a1, a1, a0 +; CHECK-RV64-V-NEXT: slli a0, a0, 2 +; CHECK-RV64-V-NEXT: add a0, a0, a1 +; CHECK-RV64-V-NEXT: add a0, sp, a0 +; CHECK-RV64-V-NEXT: addi a0, a0, 16 +; CHECK-RV64-V-NEXT: vs1r.v v10, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV64-V-NEXT: csrr a0, vlenb +; CHECK-RV64-V-NEXT: slli a0, a0, 2 +; CHECK-RV64-V-NEXT: mv a1, a0 +; CHECK-RV64-V-NEXT: slli a0, a0, 2 +; CHECK-RV64-V-NEXT: add a0, a0, a1 +; CHECK-RV64-V-NEXT: add a0, sp, a0 +; CHECK-RV64-V-NEXT: addi a0, a0, 16 +; CHECK-RV64-V-NEXT: vs1r.v v11, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV64-V-NEXT: csrr a0, vlenb +; CHECK-RV64-V-NEXT: mv a1, a0 +; CHECK-RV64-V-NEXT: slli a0, a0, 1 +; CHECK-RV64-V-NEXT: add a1, a1, a0 +; CHECK-RV64-V-NEXT: slli a0, a0, 3 +; CHECK-RV64-V-NEXT: add a0, a0, a1 +; CHECK-RV64-V-NEXT: add a0, sp, a0 +; CHECK-RV64-V-NEXT: addi a0, a0, 16 +; CHECK-RV64-V-NEXT: vs1r.v v12, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV64-V-NEXT: csrr a0, vlenb +; CHECK-RV64-V-NEXT: slli a0, a0, 1 +; CHECK-RV64-V-NEXT: mv a1, a0 +; CHECK-RV64-V-NEXT: slli a0, a0, 3 +; CHECK-RV64-V-NEXT: add a0, a0, a1 +; CHECK-RV64-V-NEXT: add a0, sp, a0 +; CHECK-RV64-V-NEXT: addi a0, a0, 16 +; CHECK-RV64-V-NEXT: vs1r.v v13, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV64-V-NEXT: csrr a0, vlenb +; CHECK-RV64-V-NEXT: slli a1, a0, 4 +; CHECK-RV64-V-NEXT: add a0, a1, a0 +; CHECK-RV64-V-NEXT: add a0, sp, a0 +; CHECK-RV64-V-NEXT: addi a0, a0, 16 +; CHECK-RV64-V-NEXT: vs1r.v v14, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV64-V-NEXT: csrr a0, vlenb +; CHECK-RV64-V-NEXT: slli a0, a0, 4 +; CHECK-RV64-V-NEXT: add a0, sp, a0 +; CHECK-RV64-V-NEXT: addi a0, a0, 16 +; CHECK-RV64-V-NEXT: vs1r.v v15, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV64-V-NEXT: csrr a0, vlenb +; CHECK-RV64-V-NEXT: slli a1, a0, 4 +; CHECK-RV64-V-NEXT: sub a0, a1, a0 +; CHECK-RV64-V-NEXT: add a0, sp, a0 +; CHECK-RV64-V-NEXT: addi a0, a0, 16 +; CHECK-RV64-V-NEXT: vs1r.v v16, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV64-V-NEXT: csrr a0, vlenb +; CHECK-RV64-V-NEXT: slli a0, a0, 1 +; CHECK-RV64-V-NEXT: mv a1, a0 +; CHECK-RV64-V-NEXT: slli a0, a0, 1 +; CHECK-RV64-V-NEXT: add a1, a1, a0 +; CHECK-RV64-V-NEXT: slli a0, a0, 1 +; CHECK-RV64-V-NEXT: add a0, a0, a1 +; CHECK-RV64-V-NEXT: add a0, sp, a0 +; CHECK-RV64-V-NEXT: addi a0, a0, 16 +; CHECK-RV64-V-NEXT: vs1r.v v17, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV64-V-NEXT: csrr a0, vlenb +; CHECK-RV64-V-NEXT: mv a1, a0 +; CHECK-RV64-V-NEXT: slli a0, a0, 2 +; CHECK-RV64-V-NEXT: add a1, a1, a0 +; CHECK-RV64-V-NEXT: slli a0, a0, 1 +; CHECK-RV64-V-NEXT: add a0, a0, a1 +; CHECK-RV64-V-NEXT: add a0, sp, a0 +; CHECK-RV64-V-NEXT: addi a0, a0, 16 +; CHECK-RV64-V-NEXT: vs1r.v v18, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV64-V-NEXT: csrr a0, vlenb +; CHECK-RV64-V-NEXT: slli a0, a0, 2 +; CHECK-RV64-V-NEXT: mv a1, a0 +; CHECK-RV64-V-NEXT: slli a0, a0, 1 +; CHECK-RV64-V-NEXT: add a0, a0, a1 +; CHECK-RV64-V-NEXT: add a0, sp, a0 +; CHECK-RV64-V-NEXT: addi a0, a0, 16 +; CHECK-RV64-V-NEXT: vs1r.v v19, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV64-V-NEXT: csrr a0, vlenb +; CHECK-RV64-V-NEXT: mv a1, a0 +; CHECK-RV64-V-NEXT: slli a0, a0, 1 +; CHECK-RV64-V-NEXT: add a1, a1, a0 +; CHECK-RV64-V-NEXT: slli a0, a0, 2 +; CHECK-RV64-V-NEXT: add a0, a0, a1 +; CHECK-RV64-V-NEXT: add a0, sp, a0 +; CHECK-RV64-V-NEXT: addi a0, a0, 16 +; CHECK-RV64-V-NEXT: vs1r.v v20, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV64-V-NEXT: csrr a0, vlenb +; CHECK-RV64-V-NEXT: slli a0, a0, 1 +; CHECK-RV64-V-NEXT: mv a1, a0 +; CHECK-RV64-V-NEXT: slli a0, a0, 2 +; CHECK-RV64-V-NEXT: add a0, a0, a1 +; CHECK-RV64-V-NEXT: add a0, sp, a0 +; CHECK-RV64-V-NEXT: addi a0, a0, 16 +; CHECK-RV64-V-NEXT: vs1r.v v21, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV64-V-NEXT: csrr a0, vlenb +; CHECK-RV64-V-NEXT: slli a1, a0, 3 +; CHECK-RV64-V-NEXT: add a0, a1, a0 +; CHECK-RV64-V-NEXT: add a0, sp, a0 +; CHECK-RV64-V-NEXT: addi a0, a0, 16 +; CHECK-RV64-V-NEXT: vs1r.v v22, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV64-V-NEXT: csrr a0, vlenb +; CHECK-RV64-V-NEXT: slli a0, a0, 3 +; CHECK-RV64-V-NEXT: add a0, sp, a0 +; CHECK-RV64-V-NEXT: addi a0, a0, 16 +; CHECK-RV64-V-NEXT: vs1r.v v23, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV64-V-NEXT: csrr a0, vlenb +; CHECK-RV64-V-NEXT: slli a1, a0, 3 +; CHECK-RV64-V-NEXT: sub a0, a1, a0 +; CHECK-RV64-V-NEXT: add a0, sp, a0 +; CHECK-RV64-V-NEXT: addi a0, a0, 16 +; CHECK-RV64-V-NEXT: vs1r.v v24, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV64-V-NEXT: csrr a0, vlenb +; CHECK-RV64-V-NEXT: slli a0, a0, 1 +; CHECK-RV64-V-NEXT: mv a1, a0 +; CHECK-RV64-V-NEXT: slli a0, a0, 1 +; CHECK-RV64-V-NEXT: add a0, a0, a1 +; CHECK-RV64-V-NEXT: add a0, sp, a0 +; CHECK-RV64-V-NEXT: addi a0, a0, 16 +; CHECK-RV64-V-NEXT: vs1r.v v25, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV64-V-NEXT: csrr a0, vlenb +; CHECK-RV64-V-NEXT: slli a1, a0, 2 +; CHECK-RV64-V-NEXT: add a0, a1, a0 +; CHECK-RV64-V-NEXT: add a0, sp, a0 +; CHECK-RV64-V-NEXT: addi a0, a0, 16 +; CHECK-RV64-V-NEXT: vs1r.v v26, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV64-V-NEXT: csrr a0, vlenb +; CHECK-RV64-V-NEXT: slli a0, a0, 2 +; CHECK-RV64-V-NEXT: add a0, sp, a0 +; CHECK-RV64-V-NEXT: addi a0, a0, 16 +; CHECK-RV64-V-NEXT: vs1r.v v27, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV64-V-NEXT: csrr a0, vlenb +; CHECK-RV64-V-NEXT: slli a1, a0, 1 +; CHECK-RV64-V-NEXT: add a0, a1, a0 +; CHECK-RV64-V-NEXT: add a0, sp, a0 +; CHECK-RV64-V-NEXT: addi a0, a0, 16 +; CHECK-RV64-V-NEXT: vs1r.v v28, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV64-V-NEXT: csrr a0, vlenb +; CHECK-RV64-V-NEXT: slli a0, a0, 1 +; CHECK-RV64-V-NEXT: add a0, sp, a0 +; CHECK-RV64-V-NEXT: addi a0, a0, 16 +; CHECK-RV64-V-NEXT: vs1r.v v29, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV64-V-NEXT: csrr a0, vlenb +; CHECK-RV64-V-NEXT: add a0, sp, a0 +; CHECK-RV64-V-NEXT: addi a0, a0, 16 +; CHECK-RV64-V-NEXT: vs1r.v v30, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV64-V-NEXT: addi a0, sp, 16 +; CHECK-RV64-V-NEXT: vs1r.v v31, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV64-V-NEXT: call otherfoo +; CHECK-RV64-V-NEXT: csrr a0, vlenb +; CHECK-RV64-V-NEXT: slli a1, a0, 5 +; CHECK-RV64-V-NEXT: sub a0, a1, a0 +; CHECK-RV64-V-NEXT: add a0, sp, a0 +; CHECK-RV64-V-NEXT: addi a0, a0, 16 +; CHECK-RV64-V-NEXT: vl1r.v v0, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV64-V-NEXT: csrr a0, vlenb +; CHECK-RV64-V-NEXT: slli a0, a0, 1 +; CHECK-RV64-V-NEXT: mv a1, a0 +; CHECK-RV64-V-NEXT: slli a0, a0, 1 +; CHECK-RV64-V-NEXT: add a1, a1, a0 +; CHECK-RV64-V-NEXT: slli a0, a0, 1 +; CHECK-RV64-V-NEXT: add a1, a1, a0 +; CHECK-RV64-V-NEXT: slli a0, a0, 1 +; CHECK-RV64-V-NEXT: add a0, a0, a1 +; CHECK-RV64-V-NEXT: add a0, sp, a0 +; CHECK-RV64-V-NEXT: addi a0, a0, 16 +; CHECK-RV64-V-NEXT: vl1r.v v1, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV64-V-NEXT: csrr a0, vlenb +; CHECK-RV64-V-NEXT: mv a1, a0 +; CHECK-RV64-V-NEXT: slli a0, a0, 2 +; CHECK-RV64-V-NEXT: add a1, a1, a0 +; CHECK-RV64-V-NEXT: slli a0, a0, 1 +; CHECK-RV64-V-NEXT: add a1, a1, a0 +; CHECK-RV64-V-NEXT: slli a0, a0, 1 +; CHECK-RV64-V-NEXT: add a0, a0, a1 +; CHECK-RV64-V-NEXT: add a0, sp, a0 +; CHECK-RV64-V-NEXT: addi a0, a0, 16 +; CHECK-RV64-V-NEXT: vl1r.v v2, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV64-V-NEXT: csrr a0, vlenb +; CHECK-RV64-V-NEXT: slli a0, a0, 2 +; CHECK-RV64-V-NEXT: mv a1, a0 +; CHECK-RV64-V-NEXT: slli a0, a0, 1 +; CHECK-RV64-V-NEXT: add a1, a1, a0 +; CHECK-RV64-V-NEXT: slli a0, a0, 1 +; CHECK-RV64-V-NEXT: add a0, a0, a1 +; CHECK-RV64-V-NEXT: add a0, sp, a0 +; CHECK-RV64-V-NEXT: addi a0, a0, 16 +; CHECK-RV64-V-NEXT: vl1r.v v3, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV64-V-NEXT: csrr a0, vlenb +; CHECK-RV64-V-NEXT: mv a1, a0 +; CHECK-RV64-V-NEXT: slli a0, a0, 1 +; CHECK-RV64-V-NEXT: add a1, a1, a0 +; CHECK-RV64-V-NEXT: slli a0, a0, 2 +; CHECK-RV64-V-NEXT: add a1, a1, a0 +; CHECK-RV64-V-NEXT: slli a0, a0, 1 +; CHECK-RV64-V-NEXT: add a0, a0, a1 +; CHECK-RV64-V-NEXT: add a0, sp, a0 +; CHECK-RV64-V-NEXT: addi a0, a0, 16 +; CHECK-RV64-V-NEXT: vl1r.v v4, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV64-V-NEXT: csrr a0, vlenb +; CHECK-RV64-V-NEXT: slli a0, a0, 1 +; CHECK-RV64-V-NEXT: mv a1, a0 +; CHECK-RV64-V-NEXT: slli a0, a0, 2 +; CHECK-RV64-V-NEXT: add a1, a1, a0 +; CHECK-RV64-V-NEXT: slli a0, a0, 1 +; CHECK-RV64-V-NEXT: add a0, a0, a1 +; CHECK-RV64-V-NEXT: add a0, sp, a0 +; CHECK-RV64-V-NEXT: addi a0, a0, 16 +; CHECK-RV64-V-NEXT: vl1r.v v5, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV64-V-NEXT: csrr a0, vlenb +; CHECK-RV64-V-NEXT: mv a1, a0 +; CHECK-RV64-V-NEXT: slli a0, a0, 3 +; CHECK-RV64-V-NEXT: add a1, a1, a0 +; CHECK-RV64-V-NEXT: slli a0, a0, 1 +; CHECK-RV64-V-NEXT: add a0, a0, a1 +; CHECK-RV64-V-NEXT: add a0, sp, a0 +; CHECK-RV64-V-NEXT: addi a0, a0, 16 +; CHECK-RV64-V-NEXT: vl1r.v v6, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV64-V-NEXT: csrr a0, vlenb +; CHECK-RV64-V-NEXT: slli a0, a0, 3 +; CHECK-RV64-V-NEXT: mv a1, a0 +; CHECK-RV64-V-NEXT: slli a0, a0, 1 +; CHECK-RV64-V-NEXT: add a0, a0, a1 +; CHECK-RV64-V-NEXT: add a0, sp, a0 +; CHECK-RV64-V-NEXT: addi a0, a0, 16 +; CHECK-RV64-V-NEXT: vl1r.v v7, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV64-V-NEXT: csrr a0, vlenb +; CHECK-RV64-V-NEXT: mv a1, a0 +; CHECK-RV64-V-NEXT: slli a0, a0, 1 +; CHECK-RV64-V-NEXT: add a1, a1, a0 +; CHECK-RV64-V-NEXT: slli a0, a0, 1 +; CHECK-RV64-V-NEXT: add a1, a1, a0 +; CHECK-RV64-V-NEXT: slli a0, a0, 2 +; CHECK-RV64-V-NEXT: add a0, a0, a1 +; CHECK-RV64-V-NEXT: add a0, sp, a0 +; CHECK-RV64-V-NEXT: addi a0, a0, 16 +; CHECK-RV64-V-NEXT: vl1r.v v8, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV64-V-NEXT: csrr a0, vlenb +; CHECK-RV64-V-NEXT: slli a0, a0, 1 +; CHECK-RV64-V-NEXT: mv a1, a0 +; CHECK-RV64-V-NEXT: slli a0, a0, 1 +; CHECK-RV64-V-NEXT: add a1, a1, a0 +; CHECK-RV64-V-NEXT: slli a0, a0, 2 +; CHECK-RV64-V-NEXT: add a0, a0, a1 +; CHECK-RV64-V-NEXT: add a0, sp, a0 +; CHECK-RV64-V-NEXT: addi a0, a0, 16 +; CHECK-RV64-V-NEXT: vl1r.v v9, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV64-V-NEXT: csrr a0, vlenb +; CHECK-RV64-V-NEXT: mv a1, a0 +; CHECK-RV64-V-NEXT: slli a0, a0, 2 +; CHECK-RV64-V-NEXT: add a1, a1, a0 +; CHECK-RV64-V-NEXT: slli a0, a0, 2 +; CHECK-RV64-V-NEXT: add a0, a0, a1 +; CHECK-RV64-V-NEXT: add a0, sp, a0 +; CHECK-RV64-V-NEXT: addi a0, a0, 16 +; CHECK-RV64-V-NEXT: vl1r.v v10, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV64-V-NEXT: csrr a0, vlenb +; CHECK-RV64-V-NEXT: slli a0, a0, 2 +; CHECK-RV64-V-NEXT: mv a1, a0 +; CHECK-RV64-V-NEXT: slli a0, a0, 2 +; CHECK-RV64-V-NEXT: add a0, a0, a1 +; CHECK-RV64-V-NEXT: add a0, sp, a0 +; CHECK-RV64-V-NEXT: addi a0, a0, 16 +; CHECK-RV64-V-NEXT: vl1r.v v11, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV64-V-NEXT: csrr a0, vlenb +; CHECK-RV64-V-NEXT: mv a1, a0 +; CHECK-RV64-V-NEXT: slli a0, a0, 1 +; CHECK-RV64-V-NEXT: add a1, a1, a0 +; CHECK-RV64-V-NEXT: slli a0, a0, 3 +; CHECK-RV64-V-NEXT: add a0, a0, a1 +; CHECK-RV64-V-NEXT: add a0, sp, a0 +; CHECK-RV64-V-NEXT: addi a0, a0, 16 +; CHECK-RV64-V-NEXT: vl1r.v v12, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV64-V-NEXT: csrr a0, vlenb +; CHECK-RV64-V-NEXT: slli a0, a0, 1 +; CHECK-RV64-V-NEXT: mv a1, a0 +; CHECK-RV64-V-NEXT: slli a0, a0, 3 +; CHECK-RV64-V-NEXT: add a0, a0, a1 +; CHECK-RV64-V-NEXT: add a0, sp, a0 +; CHECK-RV64-V-NEXT: addi a0, a0, 16 +; CHECK-RV64-V-NEXT: vl1r.v v13, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV64-V-NEXT: csrr a0, vlenb +; CHECK-RV64-V-NEXT: slli a1, a0, 4 +; CHECK-RV64-V-NEXT: add a0, a1, a0 +; CHECK-RV64-V-NEXT: add a0, sp, a0 +; CHECK-RV64-V-NEXT: addi a0, a0, 16 +; CHECK-RV64-V-NEXT: vl1r.v v14, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV64-V-NEXT: csrr a0, vlenb +; CHECK-RV64-V-NEXT: slli a0, a0, 4 +; CHECK-RV64-V-NEXT: add a0, sp, a0 +; CHECK-RV64-V-NEXT: addi a0, a0, 16 +; CHECK-RV64-V-NEXT: vl1r.v v15, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV64-V-NEXT: csrr a0, vlenb +; CHECK-RV64-V-NEXT: slli a1, a0, 4 +; CHECK-RV64-V-NEXT: sub a0, a1, a0 +; CHECK-RV64-V-NEXT: add a0, sp, a0 +; CHECK-RV64-V-NEXT: addi a0, a0, 16 +; CHECK-RV64-V-NEXT: vl1r.v v16, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV64-V-NEXT: csrr a0, vlenb +; CHECK-RV64-V-NEXT: slli a0, a0, 1 +; CHECK-RV64-V-NEXT: mv a1, a0 +; CHECK-RV64-V-NEXT: slli a0, a0, 1 +; CHECK-RV64-V-NEXT: add a1, a1, a0 +; CHECK-RV64-V-NEXT: slli a0, a0, 1 +; CHECK-RV64-V-NEXT: add a0, a0, a1 +; CHECK-RV64-V-NEXT: add a0, sp, a0 +; CHECK-RV64-V-NEXT: addi a0, a0, 16 +; CHECK-RV64-V-NEXT: vl1r.v v17, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV64-V-NEXT: csrr a0, vlenb +; CHECK-RV64-V-NEXT: mv a1, a0 +; CHECK-RV64-V-NEXT: slli a0, a0, 2 +; CHECK-RV64-V-NEXT: add a1, a1, a0 +; CHECK-RV64-V-NEXT: slli a0, a0, 1 +; CHECK-RV64-V-NEXT: add a0, a0, a1 +; CHECK-RV64-V-NEXT: add a0, sp, a0 +; CHECK-RV64-V-NEXT: addi a0, a0, 16 +; CHECK-RV64-V-NEXT: vl1r.v v18, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV64-V-NEXT: csrr a0, vlenb +; CHECK-RV64-V-NEXT: slli a0, a0, 2 +; CHECK-RV64-V-NEXT: mv a1, a0 +; CHECK-RV64-V-NEXT: slli a0, a0, 1 +; CHECK-RV64-V-NEXT: add a0, a0, a1 +; CHECK-RV64-V-NEXT: add a0, sp, a0 +; CHECK-RV64-V-NEXT: addi a0, a0, 16 +; CHECK-RV64-V-NEXT: vl1r.v v19, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV64-V-NEXT: csrr a0, vlenb +; CHECK-RV64-V-NEXT: mv a1, a0 +; CHECK-RV64-V-NEXT: slli a0, a0, 1 +; CHECK-RV64-V-NEXT: add a1, a1, a0 +; CHECK-RV64-V-NEXT: slli a0, a0, 2 +; CHECK-RV64-V-NEXT: add a0, a0, a1 +; CHECK-RV64-V-NEXT: add a0, sp, a0 +; CHECK-RV64-V-NEXT: addi a0, a0, 16 +; CHECK-RV64-V-NEXT: vl1r.v v20, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV64-V-NEXT: csrr a0, vlenb +; CHECK-RV64-V-NEXT: slli a0, a0, 1 +; CHECK-RV64-V-NEXT: mv a1, a0 +; CHECK-RV64-V-NEXT: slli a0, a0, 2 +; CHECK-RV64-V-NEXT: add a0, a0, a1 +; CHECK-RV64-V-NEXT: add a0, sp, a0 +; CHECK-RV64-V-NEXT: addi a0, a0, 16 +; CHECK-RV64-V-NEXT: vl1r.v v21, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV64-V-NEXT: csrr a0, vlenb +; CHECK-RV64-V-NEXT: slli a1, a0, 3 +; CHECK-RV64-V-NEXT: add a0, a1, a0 +; CHECK-RV64-V-NEXT: add a0, sp, a0 +; CHECK-RV64-V-NEXT: addi a0, a0, 16 +; CHECK-RV64-V-NEXT: vl1r.v v22, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV64-V-NEXT: csrr a0, vlenb +; CHECK-RV64-V-NEXT: slli a0, a0, 3 +; CHECK-RV64-V-NEXT: add a0, sp, a0 +; CHECK-RV64-V-NEXT: addi a0, a0, 16 +; CHECK-RV64-V-NEXT: vl1r.v v23, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV64-V-NEXT: csrr a0, vlenb +; CHECK-RV64-V-NEXT: slli a1, a0, 3 +; CHECK-RV64-V-NEXT: sub a0, a1, a0 +; CHECK-RV64-V-NEXT: add a0, sp, a0 +; CHECK-RV64-V-NEXT: addi a0, a0, 16 +; CHECK-RV64-V-NEXT: vl1r.v v24, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV64-V-NEXT: csrr a0, vlenb +; CHECK-RV64-V-NEXT: slli a0, a0, 1 +; CHECK-RV64-V-NEXT: mv a1, a0 +; CHECK-RV64-V-NEXT: slli a0, a0, 1 +; CHECK-RV64-V-NEXT: add a0, a0, a1 +; CHECK-RV64-V-NEXT: add a0, sp, a0 +; CHECK-RV64-V-NEXT: addi a0, a0, 16 +; CHECK-RV64-V-NEXT: vl1r.v v25, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV64-V-NEXT: csrr a0, vlenb +; CHECK-RV64-V-NEXT: slli a1, a0, 2 +; CHECK-RV64-V-NEXT: add a0, a1, a0 +; CHECK-RV64-V-NEXT: add a0, sp, a0 +; CHECK-RV64-V-NEXT: addi a0, a0, 16 +; CHECK-RV64-V-NEXT: vl1r.v v26, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV64-V-NEXT: csrr a0, vlenb +; CHECK-RV64-V-NEXT: slli a0, a0, 2 +; CHECK-RV64-V-NEXT: add a0, sp, a0 +; CHECK-RV64-V-NEXT: addi a0, a0, 16 +; CHECK-RV64-V-NEXT: vl1r.v v27, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV64-V-NEXT: csrr a0, vlenb +; CHECK-RV64-V-NEXT: slli a1, a0, 1 +; CHECK-RV64-V-NEXT: add a0, a1, a0 +; CHECK-RV64-V-NEXT: add a0, sp, a0 +; CHECK-RV64-V-NEXT: addi a0, a0, 16 +; CHECK-RV64-V-NEXT: vl1r.v v28, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV64-V-NEXT: csrr a0, vlenb +; CHECK-RV64-V-NEXT: slli a0, a0, 1 +; CHECK-RV64-V-NEXT: add a0, sp, a0 +; CHECK-RV64-V-NEXT: addi a0, a0, 16 +; CHECK-RV64-V-NEXT: vl1r.v v29, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV64-V-NEXT: csrr a0, vlenb +; CHECK-RV64-V-NEXT: add a0, sp, a0 +; CHECK-RV64-V-NEXT: addi a0, a0, 16 +; CHECK-RV64-V-NEXT: vl1r.v v30, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV64-V-NEXT: addi a0, sp, 16 +; CHECK-RV64-V-NEXT: vl1r.v v31, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV64-V-NEXT: csrr a0, vlenb +; CHECK-RV64-V-NEXT: slli a0, a0, 5 +; CHECK-RV64-V-NEXT: add sp, sp, a0 +; CHECK-RV64-V-NEXT: ld ra, 136(sp) # 8-byte Folded Reload +; CHECK-RV64-V-NEXT: ld t0, 128(sp) # 8-byte Folded Reload +; CHECK-RV64-V-NEXT: ld t1, 120(sp) # 8-byte Folded Reload +; CHECK-RV64-V-NEXT: ld t2, 112(sp) # 8-byte Folded Reload +; CHECK-RV64-V-NEXT: ld a0, 104(sp) # 8-byte Folded Reload +; CHECK-RV64-V-NEXT: ld a1, 96(sp) # 8-byte Folded Reload +; CHECK-RV64-V-NEXT: ld a2, 88(sp) # 8-byte Folded Reload +; CHECK-RV64-V-NEXT: ld a3, 80(sp) # 8-byte Folded Reload +; CHECK-RV64-V-NEXT: ld a4, 72(sp) # 8-byte Folded Reload +; CHECK-RV64-V-NEXT: ld a5, 64(sp) # 8-byte Folded Reload +; CHECK-RV64-V-NEXT: ld a6, 56(sp) # 8-byte Folded Reload +; CHECK-RV64-V-NEXT: ld a7, 48(sp) # 8-byte Folded Reload +; CHECK-RV64-V-NEXT: ld t3, 40(sp) # 8-byte Folded Reload +; CHECK-RV64-V-NEXT: ld t4, 32(sp) # 8-byte Folded Reload +; CHECK-RV64-V-NEXT: ld t5, 24(sp) # 8-byte Folded Reload +; CHECK-RV64-V-NEXT: ld t6, 16(sp) # 8-byte Folded Reload +; CHECK-RV64-V-NEXT: addi sp, sp, 144 +; CHECK-RV64-V-NEXT: mret +; +; CHECK-RV64-FV-LABEL: foo_with_call: +; CHECK-RV64-FV: # %bb.0: +; CHECK-RV64-FV-NEXT: addi sp, sp, -224 +; CHECK-RV64-FV-NEXT: sd ra, 216(sp) # 8-byte Folded Spill +; CHECK-RV64-FV-NEXT: sd t0, 208(sp) # 8-byte Folded Spill +; CHECK-RV64-FV-NEXT: sd t1, 200(sp) # 8-byte Folded Spill +; CHECK-RV64-FV-NEXT: sd t2, 192(sp) # 8-byte Folded Spill +; CHECK-RV64-FV-NEXT: sd a0, 184(sp) # 8-byte Folded Spill +; CHECK-RV64-FV-NEXT: sd a1, 176(sp) # 8-byte Folded Spill +; CHECK-RV64-FV-NEXT: sd a2, 168(sp) # 8-byte Folded Spill +; CHECK-RV64-FV-NEXT: sd a3, 160(sp) # 8-byte Folded Spill +; CHECK-RV64-FV-NEXT: sd a4, 152(sp) # 8-byte Folded Spill +; CHECK-RV64-FV-NEXT: sd a5, 144(sp) # 8-byte Folded Spill +; CHECK-RV64-FV-NEXT: sd a6, 136(sp) # 8-byte Folded Spill +; CHECK-RV64-FV-NEXT: sd a7, 128(sp) # 8-byte Folded Spill +; CHECK-RV64-FV-NEXT: sd t3, 120(sp) # 8-byte Folded Spill +; CHECK-RV64-FV-NEXT: sd t4, 112(sp) # 8-byte Folded Spill +; CHECK-RV64-FV-NEXT: sd t5, 104(sp) # 8-byte Folded Spill +; CHECK-RV64-FV-NEXT: sd t6, 96(sp) # 8-byte Folded Spill +; CHECK-RV64-FV-NEXT: fsw ft0, 92(sp) # 4-byte Folded Spill +; CHECK-RV64-FV-NEXT: fsw ft1, 88(sp) # 4-byte Folded Spill +; CHECK-RV64-FV-NEXT: fsw ft2, 84(sp) # 4-byte Folded Spill +; CHECK-RV64-FV-NEXT: fsw ft3, 80(sp) # 4-byte Folded Spill +; CHECK-RV64-FV-NEXT: fsw ft4, 76(sp) # 4-byte Folded Spill +; CHECK-RV64-FV-NEXT: fsw ft5, 72(sp) # 4-byte Folded Spill +; CHECK-RV64-FV-NEXT: fsw ft6, 68(sp) # 4-byte Folded Spill +; CHECK-RV64-FV-NEXT: fsw ft7, 64(sp) # 4-byte Folded Spill +; CHECK-RV64-FV-NEXT: fsw fa0, 60(sp) # 4-byte Folded Spill +; CHECK-RV64-FV-NEXT: fsw fa1, 56(sp) # 4-byte Folded Spill +; CHECK-RV64-FV-NEXT: fsw fa2, 52(sp) # 4-byte Folded Spill +; CHECK-RV64-FV-NEXT: fsw fa3, 48(sp) # 4-byte Folded Spill +; CHECK-RV64-FV-NEXT: fsw fa4, 44(sp) # 4-byte Folded Spill +; CHECK-RV64-FV-NEXT: fsw fa5, 40(sp) # 4-byte Folded Spill +; CHECK-RV64-FV-NEXT: fsw fa6, 36(sp) # 4-byte Folded Spill +; CHECK-RV64-FV-NEXT: fsw fa7, 32(sp) # 4-byte Folded Spill +; CHECK-RV64-FV-NEXT: fsw ft8, 28(sp) # 4-byte Folded Spill +; CHECK-RV64-FV-NEXT: fsw ft9, 24(sp) # 4-byte Folded Spill +; CHECK-RV64-FV-NEXT: fsw ft10, 20(sp) # 4-byte Folded Spill +; CHECK-RV64-FV-NEXT: fsw ft11, 16(sp) # 4-byte Folded Spill +; CHECK-RV64-FV-NEXT: csrr a0, vlenb +; CHECK-RV64-FV-NEXT: slli a0, a0, 5 +; CHECK-RV64-FV-NEXT: sub sp, sp, a0 +; CHECK-RV64-FV-NEXT: csrr a0, vlenb +; CHECK-RV64-FV-NEXT: slli a1, a0, 5 +; CHECK-RV64-FV-NEXT: sub a0, a1, a0 +; CHECK-RV64-FV-NEXT: add a0, sp, a0 +; CHECK-RV64-FV-NEXT: addi a0, a0, 16 +; CHECK-RV64-FV-NEXT: vs1r.v v0, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV64-FV-NEXT: csrr a0, vlenb +; CHECK-RV64-FV-NEXT: slli a0, a0, 1 +; CHECK-RV64-FV-NEXT: mv a1, a0 +; CHECK-RV64-FV-NEXT: slli a0, a0, 1 +; CHECK-RV64-FV-NEXT: add a1, a1, a0 +; CHECK-RV64-FV-NEXT: slli a0, a0, 1 +; CHECK-RV64-FV-NEXT: add a1, a1, a0 +; CHECK-RV64-FV-NEXT: slli a0, a0, 1 +; CHECK-RV64-FV-NEXT: add a0, a0, a1 +; CHECK-RV64-FV-NEXT: add a0, sp, a0 +; CHECK-RV64-FV-NEXT: addi a0, a0, 16 +; CHECK-RV64-FV-NEXT: vs1r.v v1, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV64-FV-NEXT: csrr a0, vlenb +; CHECK-RV64-FV-NEXT: mv a1, a0 +; CHECK-RV64-FV-NEXT: slli a0, a0, 2 +; CHECK-RV64-FV-NEXT: add a1, a1, a0 +; CHECK-RV64-FV-NEXT: slli a0, a0, 1 +; CHECK-RV64-FV-NEXT: add a1, a1, a0 +; CHECK-RV64-FV-NEXT: slli a0, a0, 1 +; CHECK-RV64-FV-NEXT: add a0, a0, a1 +; CHECK-RV64-FV-NEXT: add a0, sp, a0 +; CHECK-RV64-FV-NEXT: addi a0, a0, 16 +; CHECK-RV64-FV-NEXT: vs1r.v v2, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV64-FV-NEXT: csrr a0, vlenb +; CHECK-RV64-FV-NEXT: slli a0, a0, 2 +; CHECK-RV64-FV-NEXT: mv a1, a0 +; CHECK-RV64-FV-NEXT: slli a0, a0, 1 +; CHECK-RV64-FV-NEXT: add a1, a1, a0 +; CHECK-RV64-FV-NEXT: slli a0, a0, 1 +; CHECK-RV64-FV-NEXT: add a0, a0, a1 +; CHECK-RV64-FV-NEXT: add a0, sp, a0 +; CHECK-RV64-FV-NEXT: addi a0, a0, 16 +; CHECK-RV64-FV-NEXT: vs1r.v v3, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV64-FV-NEXT: csrr a0, vlenb +; CHECK-RV64-FV-NEXT: mv a1, a0 +; CHECK-RV64-FV-NEXT: slli a0, a0, 1 +; CHECK-RV64-FV-NEXT: add a1, a1, a0 +; CHECK-RV64-FV-NEXT: slli a0, a0, 2 +; CHECK-RV64-FV-NEXT: add a1, a1, a0 +; CHECK-RV64-FV-NEXT: slli a0, a0, 1 +; CHECK-RV64-FV-NEXT: add a0, a0, a1 +; CHECK-RV64-FV-NEXT: add a0, sp, a0 +; CHECK-RV64-FV-NEXT: addi a0, a0, 16 +; CHECK-RV64-FV-NEXT: vs1r.v v4, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV64-FV-NEXT: csrr a0, vlenb +; CHECK-RV64-FV-NEXT: slli a0, a0, 1 +; CHECK-RV64-FV-NEXT: mv a1, a0 +; CHECK-RV64-FV-NEXT: slli a0, a0, 2 +; CHECK-RV64-FV-NEXT: add a1, a1, a0 +; CHECK-RV64-FV-NEXT: slli a0, a0, 1 +; CHECK-RV64-FV-NEXT: add a0, a0, a1 +; CHECK-RV64-FV-NEXT: add a0, sp, a0 +; CHECK-RV64-FV-NEXT: addi a0, a0, 16 +; CHECK-RV64-FV-NEXT: vs1r.v v5, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV64-FV-NEXT: csrr a0, vlenb +; CHECK-RV64-FV-NEXT: mv a1, a0 +; CHECK-RV64-FV-NEXT: slli a0, a0, 3 +; CHECK-RV64-FV-NEXT: add a1, a1, a0 +; CHECK-RV64-FV-NEXT: slli a0, a0, 1 +; CHECK-RV64-FV-NEXT: add a0, a0, a1 +; CHECK-RV64-FV-NEXT: add a0, sp, a0 +; CHECK-RV64-FV-NEXT: addi a0, a0, 16 +; CHECK-RV64-FV-NEXT: vs1r.v v6, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV64-FV-NEXT: csrr a0, vlenb +; CHECK-RV64-FV-NEXT: slli a0, a0, 3 +; CHECK-RV64-FV-NEXT: mv a1, a0 +; CHECK-RV64-FV-NEXT: slli a0, a0, 1 +; CHECK-RV64-FV-NEXT: add a0, a0, a1 +; CHECK-RV64-FV-NEXT: add a0, sp, a0 +; CHECK-RV64-FV-NEXT: addi a0, a0, 16 +; CHECK-RV64-FV-NEXT: vs1r.v v7, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV64-FV-NEXT: csrr a0, vlenb +; CHECK-RV64-FV-NEXT: mv a1, a0 +; CHECK-RV64-FV-NEXT: slli a0, a0, 1 +; CHECK-RV64-FV-NEXT: add a1, a1, a0 +; CHECK-RV64-FV-NEXT: slli a0, a0, 1 +; CHECK-RV64-FV-NEXT: add a1, a1, a0 +; CHECK-RV64-FV-NEXT: slli a0, a0, 2 +; CHECK-RV64-FV-NEXT: add a0, a0, a1 +; CHECK-RV64-FV-NEXT: add a0, sp, a0 +; CHECK-RV64-FV-NEXT: addi a0, a0, 16 +; CHECK-RV64-FV-NEXT: vs1r.v v8, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV64-FV-NEXT: csrr a0, vlenb +; CHECK-RV64-FV-NEXT: slli a0, a0, 1 +; CHECK-RV64-FV-NEXT: mv a1, a0 +; CHECK-RV64-FV-NEXT: slli a0, a0, 1 +; CHECK-RV64-FV-NEXT: add a1, a1, a0 +; CHECK-RV64-FV-NEXT: slli a0, a0, 2 +; CHECK-RV64-FV-NEXT: add a0, a0, a1 +; CHECK-RV64-FV-NEXT: add a0, sp, a0 +; CHECK-RV64-FV-NEXT: addi a0, a0, 16 +; CHECK-RV64-FV-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV64-FV-NEXT: csrr a0, vlenb +; CHECK-RV64-FV-NEXT: mv a1, a0 +; CHECK-RV64-FV-NEXT: slli a0, a0, 2 +; CHECK-RV64-FV-NEXT: add a1, a1, a0 +; CHECK-RV64-FV-NEXT: slli a0, a0, 2 +; CHECK-RV64-FV-NEXT: add a0, a0, a1 +; CHECK-RV64-FV-NEXT: add a0, sp, a0 +; CHECK-RV64-FV-NEXT: addi a0, a0, 16 +; CHECK-RV64-FV-NEXT: vs1r.v v10, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV64-FV-NEXT: csrr a0, vlenb +; CHECK-RV64-FV-NEXT: slli a0, a0, 2 +; CHECK-RV64-FV-NEXT: mv a1, a0 +; CHECK-RV64-FV-NEXT: slli a0, a0, 2 +; CHECK-RV64-FV-NEXT: add a0, a0, a1 +; CHECK-RV64-FV-NEXT: add a0, sp, a0 +; CHECK-RV64-FV-NEXT: addi a0, a0, 16 +; CHECK-RV64-FV-NEXT: vs1r.v v11, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV64-FV-NEXT: csrr a0, vlenb +; CHECK-RV64-FV-NEXT: mv a1, a0 +; CHECK-RV64-FV-NEXT: slli a0, a0, 1 +; CHECK-RV64-FV-NEXT: add a1, a1, a0 +; CHECK-RV64-FV-NEXT: slli a0, a0, 3 +; CHECK-RV64-FV-NEXT: add a0, a0, a1 +; CHECK-RV64-FV-NEXT: add a0, sp, a0 +; CHECK-RV64-FV-NEXT: addi a0, a0, 16 +; CHECK-RV64-FV-NEXT: vs1r.v v12, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV64-FV-NEXT: csrr a0, vlenb +; CHECK-RV64-FV-NEXT: slli a0, a0, 1 +; CHECK-RV64-FV-NEXT: mv a1, a0 +; CHECK-RV64-FV-NEXT: slli a0, a0, 3 +; CHECK-RV64-FV-NEXT: add a0, a0, a1 +; CHECK-RV64-FV-NEXT: add a0, sp, a0 +; CHECK-RV64-FV-NEXT: addi a0, a0, 16 +; CHECK-RV64-FV-NEXT: vs1r.v v13, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV64-FV-NEXT: csrr a0, vlenb +; CHECK-RV64-FV-NEXT: slli a1, a0, 4 +; CHECK-RV64-FV-NEXT: add a0, a1, a0 +; CHECK-RV64-FV-NEXT: add a0, sp, a0 +; CHECK-RV64-FV-NEXT: addi a0, a0, 16 +; CHECK-RV64-FV-NEXT: vs1r.v v14, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV64-FV-NEXT: csrr a0, vlenb +; CHECK-RV64-FV-NEXT: slli a0, a0, 4 +; CHECK-RV64-FV-NEXT: add a0, sp, a0 +; CHECK-RV64-FV-NEXT: addi a0, a0, 16 +; CHECK-RV64-FV-NEXT: vs1r.v v15, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV64-FV-NEXT: csrr a0, vlenb +; CHECK-RV64-FV-NEXT: slli a1, a0, 4 +; CHECK-RV64-FV-NEXT: sub a0, a1, a0 +; CHECK-RV64-FV-NEXT: add a0, sp, a0 +; CHECK-RV64-FV-NEXT: addi a0, a0, 16 +; CHECK-RV64-FV-NEXT: vs1r.v v16, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV64-FV-NEXT: csrr a0, vlenb +; CHECK-RV64-FV-NEXT: slli a0, a0, 1 +; CHECK-RV64-FV-NEXT: mv a1, a0 +; CHECK-RV64-FV-NEXT: slli a0, a0, 1 +; CHECK-RV64-FV-NEXT: add a1, a1, a0 +; CHECK-RV64-FV-NEXT: slli a0, a0, 1 +; CHECK-RV64-FV-NEXT: add a0, a0, a1 +; CHECK-RV64-FV-NEXT: add a0, sp, a0 +; CHECK-RV64-FV-NEXT: addi a0, a0, 16 +; CHECK-RV64-FV-NEXT: vs1r.v v17, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV64-FV-NEXT: csrr a0, vlenb +; CHECK-RV64-FV-NEXT: mv a1, a0 +; CHECK-RV64-FV-NEXT: slli a0, a0, 2 +; CHECK-RV64-FV-NEXT: add a1, a1, a0 +; CHECK-RV64-FV-NEXT: slli a0, a0, 1 +; CHECK-RV64-FV-NEXT: add a0, a0, a1 +; CHECK-RV64-FV-NEXT: add a0, sp, a0 +; CHECK-RV64-FV-NEXT: addi a0, a0, 16 +; CHECK-RV64-FV-NEXT: vs1r.v v18, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV64-FV-NEXT: csrr a0, vlenb +; CHECK-RV64-FV-NEXT: slli a0, a0, 2 +; CHECK-RV64-FV-NEXT: mv a1, a0 +; CHECK-RV64-FV-NEXT: slli a0, a0, 1 +; CHECK-RV64-FV-NEXT: add a0, a0, a1 +; CHECK-RV64-FV-NEXT: add a0, sp, a0 +; CHECK-RV64-FV-NEXT: addi a0, a0, 16 +; CHECK-RV64-FV-NEXT: vs1r.v v19, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV64-FV-NEXT: csrr a0, vlenb +; CHECK-RV64-FV-NEXT: mv a1, a0 +; CHECK-RV64-FV-NEXT: slli a0, a0, 1 +; CHECK-RV64-FV-NEXT: add a1, a1, a0 +; CHECK-RV64-FV-NEXT: slli a0, a0, 2 +; CHECK-RV64-FV-NEXT: add a0, a0, a1 +; CHECK-RV64-FV-NEXT: add a0, sp, a0 +; CHECK-RV64-FV-NEXT: addi a0, a0, 16 +; CHECK-RV64-FV-NEXT: vs1r.v v20, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV64-FV-NEXT: csrr a0, vlenb +; CHECK-RV64-FV-NEXT: slli a0, a0, 1 +; CHECK-RV64-FV-NEXT: mv a1, a0 +; CHECK-RV64-FV-NEXT: slli a0, a0, 2 +; CHECK-RV64-FV-NEXT: add a0, a0, a1 +; CHECK-RV64-FV-NEXT: add a0, sp, a0 +; CHECK-RV64-FV-NEXT: addi a0, a0, 16 +; CHECK-RV64-FV-NEXT: vs1r.v v21, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV64-FV-NEXT: csrr a0, vlenb +; CHECK-RV64-FV-NEXT: slli a1, a0, 3 +; CHECK-RV64-FV-NEXT: add a0, a1, a0 +; CHECK-RV64-FV-NEXT: add a0, sp, a0 +; CHECK-RV64-FV-NEXT: addi a0, a0, 16 +; CHECK-RV64-FV-NEXT: vs1r.v v22, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV64-FV-NEXT: csrr a0, vlenb +; CHECK-RV64-FV-NEXT: slli a0, a0, 3 +; CHECK-RV64-FV-NEXT: add a0, sp, a0 +; CHECK-RV64-FV-NEXT: addi a0, a0, 16 +; CHECK-RV64-FV-NEXT: vs1r.v v23, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV64-FV-NEXT: csrr a0, vlenb +; CHECK-RV64-FV-NEXT: slli a1, a0, 3 +; CHECK-RV64-FV-NEXT: sub a0, a1, a0 +; CHECK-RV64-FV-NEXT: add a0, sp, a0 +; CHECK-RV64-FV-NEXT: addi a0, a0, 16 +; CHECK-RV64-FV-NEXT: vs1r.v v24, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV64-FV-NEXT: csrr a0, vlenb +; CHECK-RV64-FV-NEXT: slli a0, a0, 1 +; CHECK-RV64-FV-NEXT: mv a1, a0 +; CHECK-RV64-FV-NEXT: slli a0, a0, 1 +; CHECK-RV64-FV-NEXT: add a0, a0, a1 +; CHECK-RV64-FV-NEXT: add a0, sp, a0 +; CHECK-RV64-FV-NEXT: addi a0, a0, 16 +; CHECK-RV64-FV-NEXT: vs1r.v v25, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV64-FV-NEXT: csrr a0, vlenb +; CHECK-RV64-FV-NEXT: slli a1, a0, 2 +; CHECK-RV64-FV-NEXT: add a0, a1, a0 +; CHECK-RV64-FV-NEXT: add a0, sp, a0 +; CHECK-RV64-FV-NEXT: addi a0, a0, 16 +; CHECK-RV64-FV-NEXT: vs1r.v v26, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV64-FV-NEXT: csrr a0, vlenb +; CHECK-RV64-FV-NEXT: slli a0, a0, 2 +; CHECK-RV64-FV-NEXT: add a0, sp, a0 +; CHECK-RV64-FV-NEXT: addi a0, a0, 16 +; CHECK-RV64-FV-NEXT: vs1r.v v27, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV64-FV-NEXT: csrr a0, vlenb +; CHECK-RV64-FV-NEXT: slli a1, a0, 1 +; CHECK-RV64-FV-NEXT: add a0, a1, a0 +; CHECK-RV64-FV-NEXT: add a0, sp, a0 +; CHECK-RV64-FV-NEXT: addi a0, a0, 16 +; CHECK-RV64-FV-NEXT: vs1r.v v28, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV64-FV-NEXT: csrr a0, vlenb +; CHECK-RV64-FV-NEXT: slli a0, a0, 1 +; CHECK-RV64-FV-NEXT: add a0, sp, a0 +; CHECK-RV64-FV-NEXT: addi a0, a0, 16 +; CHECK-RV64-FV-NEXT: vs1r.v v29, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV64-FV-NEXT: csrr a0, vlenb +; CHECK-RV64-FV-NEXT: add a0, sp, a0 +; CHECK-RV64-FV-NEXT: addi a0, a0, 16 +; CHECK-RV64-FV-NEXT: vs1r.v v30, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV64-FV-NEXT: addi a0, sp, 16 +; CHECK-RV64-FV-NEXT: vs1r.v v31, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV64-FV-NEXT: call otherfoo +; CHECK-RV64-FV-NEXT: csrr a0, vlenb +; CHECK-RV64-FV-NEXT: slli a1, a0, 5 +; CHECK-RV64-FV-NEXT: sub a0, a1, a0 +; CHECK-RV64-FV-NEXT: add a0, sp, a0 +; CHECK-RV64-FV-NEXT: addi a0, a0, 16 +; CHECK-RV64-FV-NEXT: vl1r.v v0, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV64-FV-NEXT: csrr a0, vlenb +; CHECK-RV64-FV-NEXT: slli a0, a0, 1 +; CHECK-RV64-FV-NEXT: mv a1, a0 +; CHECK-RV64-FV-NEXT: slli a0, a0, 1 +; CHECK-RV64-FV-NEXT: add a1, a1, a0 +; CHECK-RV64-FV-NEXT: slli a0, a0, 1 +; CHECK-RV64-FV-NEXT: add a1, a1, a0 +; CHECK-RV64-FV-NEXT: slli a0, a0, 1 +; CHECK-RV64-FV-NEXT: add a0, a0, a1 +; CHECK-RV64-FV-NEXT: add a0, sp, a0 +; CHECK-RV64-FV-NEXT: addi a0, a0, 16 +; CHECK-RV64-FV-NEXT: vl1r.v v1, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV64-FV-NEXT: csrr a0, vlenb +; CHECK-RV64-FV-NEXT: mv a1, a0 +; CHECK-RV64-FV-NEXT: slli a0, a0, 2 +; CHECK-RV64-FV-NEXT: add a1, a1, a0 +; CHECK-RV64-FV-NEXT: slli a0, a0, 1 +; CHECK-RV64-FV-NEXT: add a1, a1, a0 +; CHECK-RV64-FV-NEXT: slli a0, a0, 1 +; CHECK-RV64-FV-NEXT: add a0, a0, a1 +; CHECK-RV64-FV-NEXT: add a0, sp, a0 +; CHECK-RV64-FV-NEXT: addi a0, a0, 16 +; CHECK-RV64-FV-NEXT: vl1r.v v2, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV64-FV-NEXT: csrr a0, vlenb +; CHECK-RV64-FV-NEXT: slli a0, a0, 2 +; CHECK-RV64-FV-NEXT: mv a1, a0 +; CHECK-RV64-FV-NEXT: slli a0, a0, 1 +; CHECK-RV64-FV-NEXT: add a1, a1, a0 +; CHECK-RV64-FV-NEXT: slli a0, a0, 1 +; CHECK-RV64-FV-NEXT: add a0, a0, a1 +; CHECK-RV64-FV-NEXT: add a0, sp, a0 +; CHECK-RV64-FV-NEXT: addi a0, a0, 16 +; CHECK-RV64-FV-NEXT: vl1r.v v3, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV64-FV-NEXT: csrr a0, vlenb +; CHECK-RV64-FV-NEXT: mv a1, a0 +; CHECK-RV64-FV-NEXT: slli a0, a0, 1 +; CHECK-RV64-FV-NEXT: add a1, a1, a0 +; CHECK-RV64-FV-NEXT: slli a0, a0, 2 +; CHECK-RV64-FV-NEXT: add a1, a1, a0 +; CHECK-RV64-FV-NEXT: slli a0, a0, 1 +; CHECK-RV64-FV-NEXT: add a0, a0, a1 +; CHECK-RV64-FV-NEXT: add a0, sp, a0 +; CHECK-RV64-FV-NEXT: addi a0, a0, 16 +; CHECK-RV64-FV-NEXT: vl1r.v v4, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV64-FV-NEXT: csrr a0, vlenb +; CHECK-RV64-FV-NEXT: slli a0, a0, 1 +; CHECK-RV64-FV-NEXT: mv a1, a0 +; CHECK-RV64-FV-NEXT: slli a0, a0, 2 +; CHECK-RV64-FV-NEXT: add a1, a1, a0 +; CHECK-RV64-FV-NEXT: slli a0, a0, 1 +; CHECK-RV64-FV-NEXT: add a0, a0, a1 +; CHECK-RV64-FV-NEXT: add a0, sp, a0 +; CHECK-RV64-FV-NEXT: addi a0, a0, 16 +; CHECK-RV64-FV-NEXT: vl1r.v v5, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV64-FV-NEXT: csrr a0, vlenb +; CHECK-RV64-FV-NEXT: mv a1, a0 +; CHECK-RV64-FV-NEXT: slli a0, a0, 3 +; CHECK-RV64-FV-NEXT: add a1, a1, a0 +; CHECK-RV64-FV-NEXT: slli a0, a0, 1 +; CHECK-RV64-FV-NEXT: add a0, a0, a1 +; CHECK-RV64-FV-NEXT: add a0, sp, a0 +; CHECK-RV64-FV-NEXT: addi a0, a0, 16 +; CHECK-RV64-FV-NEXT: vl1r.v v6, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV64-FV-NEXT: csrr a0, vlenb +; CHECK-RV64-FV-NEXT: slli a0, a0, 3 +; CHECK-RV64-FV-NEXT: mv a1, a0 +; CHECK-RV64-FV-NEXT: slli a0, a0, 1 +; CHECK-RV64-FV-NEXT: add a0, a0, a1 +; CHECK-RV64-FV-NEXT: add a0, sp, a0 +; CHECK-RV64-FV-NEXT: addi a0, a0, 16 +; CHECK-RV64-FV-NEXT: vl1r.v v7, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV64-FV-NEXT: csrr a0, vlenb +; CHECK-RV64-FV-NEXT: mv a1, a0 +; CHECK-RV64-FV-NEXT: slli a0, a0, 1 +; CHECK-RV64-FV-NEXT: add a1, a1, a0 +; CHECK-RV64-FV-NEXT: slli a0, a0, 1 +; CHECK-RV64-FV-NEXT: add a1, a1, a0 +; CHECK-RV64-FV-NEXT: slli a0, a0, 2 +; CHECK-RV64-FV-NEXT: add a0, a0, a1 +; CHECK-RV64-FV-NEXT: add a0, sp, a0 +; CHECK-RV64-FV-NEXT: addi a0, a0, 16 +; CHECK-RV64-FV-NEXT: vl1r.v v8, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV64-FV-NEXT: csrr a0, vlenb +; CHECK-RV64-FV-NEXT: slli a0, a0, 1 +; CHECK-RV64-FV-NEXT: mv a1, a0 +; CHECK-RV64-FV-NEXT: slli a0, a0, 1 +; CHECK-RV64-FV-NEXT: add a1, a1, a0 +; CHECK-RV64-FV-NEXT: slli a0, a0, 2 +; CHECK-RV64-FV-NEXT: add a0, a0, a1 +; CHECK-RV64-FV-NEXT: add a0, sp, a0 +; CHECK-RV64-FV-NEXT: addi a0, a0, 16 +; CHECK-RV64-FV-NEXT: vl1r.v v9, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV64-FV-NEXT: csrr a0, vlenb +; CHECK-RV64-FV-NEXT: mv a1, a0 +; CHECK-RV64-FV-NEXT: slli a0, a0, 2 +; CHECK-RV64-FV-NEXT: add a1, a1, a0 +; CHECK-RV64-FV-NEXT: slli a0, a0, 2 +; CHECK-RV64-FV-NEXT: add a0, a0, a1 +; CHECK-RV64-FV-NEXT: add a0, sp, a0 +; CHECK-RV64-FV-NEXT: addi a0, a0, 16 +; CHECK-RV64-FV-NEXT: vl1r.v v10, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV64-FV-NEXT: csrr a0, vlenb +; CHECK-RV64-FV-NEXT: slli a0, a0, 2 +; CHECK-RV64-FV-NEXT: mv a1, a0 +; CHECK-RV64-FV-NEXT: slli a0, a0, 2 +; CHECK-RV64-FV-NEXT: add a0, a0, a1 +; CHECK-RV64-FV-NEXT: add a0, sp, a0 +; CHECK-RV64-FV-NEXT: addi a0, a0, 16 +; CHECK-RV64-FV-NEXT: vl1r.v v11, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV64-FV-NEXT: csrr a0, vlenb +; CHECK-RV64-FV-NEXT: mv a1, a0 +; CHECK-RV64-FV-NEXT: slli a0, a0, 1 +; CHECK-RV64-FV-NEXT: add a1, a1, a0 +; CHECK-RV64-FV-NEXT: slli a0, a0, 3 +; CHECK-RV64-FV-NEXT: add a0, a0, a1 +; CHECK-RV64-FV-NEXT: add a0, sp, a0 +; CHECK-RV64-FV-NEXT: addi a0, a0, 16 +; CHECK-RV64-FV-NEXT: vl1r.v v12, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV64-FV-NEXT: csrr a0, vlenb +; CHECK-RV64-FV-NEXT: slli a0, a0, 1 +; CHECK-RV64-FV-NEXT: mv a1, a0 +; CHECK-RV64-FV-NEXT: slli a0, a0, 3 +; CHECK-RV64-FV-NEXT: add a0, a0, a1 +; CHECK-RV64-FV-NEXT: add a0, sp, a0 +; CHECK-RV64-FV-NEXT: addi a0, a0, 16 +; CHECK-RV64-FV-NEXT: vl1r.v v13, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV64-FV-NEXT: csrr a0, vlenb +; CHECK-RV64-FV-NEXT: slli a1, a0, 4 +; CHECK-RV64-FV-NEXT: add a0, a1, a0 +; CHECK-RV64-FV-NEXT: add a0, sp, a0 +; CHECK-RV64-FV-NEXT: addi a0, a0, 16 +; CHECK-RV64-FV-NEXT: vl1r.v v14, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV64-FV-NEXT: csrr a0, vlenb +; CHECK-RV64-FV-NEXT: slli a0, a0, 4 +; CHECK-RV64-FV-NEXT: add a0, sp, a0 +; CHECK-RV64-FV-NEXT: addi a0, a0, 16 +; CHECK-RV64-FV-NEXT: vl1r.v v15, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV64-FV-NEXT: csrr a0, vlenb +; CHECK-RV64-FV-NEXT: slli a1, a0, 4 +; CHECK-RV64-FV-NEXT: sub a0, a1, a0 +; CHECK-RV64-FV-NEXT: add a0, sp, a0 +; CHECK-RV64-FV-NEXT: addi a0, a0, 16 +; CHECK-RV64-FV-NEXT: vl1r.v v16, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV64-FV-NEXT: csrr a0, vlenb +; CHECK-RV64-FV-NEXT: slli a0, a0, 1 +; CHECK-RV64-FV-NEXT: mv a1, a0 +; CHECK-RV64-FV-NEXT: slli a0, a0, 1 +; CHECK-RV64-FV-NEXT: add a1, a1, a0 +; CHECK-RV64-FV-NEXT: slli a0, a0, 1 +; CHECK-RV64-FV-NEXT: add a0, a0, a1 +; CHECK-RV64-FV-NEXT: add a0, sp, a0 +; CHECK-RV64-FV-NEXT: addi a0, a0, 16 +; CHECK-RV64-FV-NEXT: vl1r.v v17, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV64-FV-NEXT: csrr a0, vlenb +; CHECK-RV64-FV-NEXT: mv a1, a0 +; CHECK-RV64-FV-NEXT: slli a0, a0, 2 +; CHECK-RV64-FV-NEXT: add a1, a1, a0 +; CHECK-RV64-FV-NEXT: slli a0, a0, 1 +; CHECK-RV64-FV-NEXT: add a0, a0, a1 +; CHECK-RV64-FV-NEXT: add a0, sp, a0 +; CHECK-RV64-FV-NEXT: addi a0, a0, 16 +; CHECK-RV64-FV-NEXT: vl1r.v v18, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV64-FV-NEXT: csrr a0, vlenb +; CHECK-RV64-FV-NEXT: slli a0, a0, 2 +; CHECK-RV64-FV-NEXT: mv a1, a0 +; CHECK-RV64-FV-NEXT: slli a0, a0, 1 +; CHECK-RV64-FV-NEXT: add a0, a0, a1 +; CHECK-RV64-FV-NEXT: add a0, sp, a0 +; CHECK-RV64-FV-NEXT: addi a0, a0, 16 +; CHECK-RV64-FV-NEXT: vl1r.v v19, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV64-FV-NEXT: csrr a0, vlenb +; CHECK-RV64-FV-NEXT: mv a1, a0 +; CHECK-RV64-FV-NEXT: slli a0, a0, 1 +; CHECK-RV64-FV-NEXT: add a1, a1, a0 +; CHECK-RV64-FV-NEXT: slli a0, a0, 2 +; CHECK-RV64-FV-NEXT: add a0, a0, a1 +; CHECK-RV64-FV-NEXT: add a0, sp, a0 +; CHECK-RV64-FV-NEXT: addi a0, a0, 16 +; CHECK-RV64-FV-NEXT: vl1r.v v20, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV64-FV-NEXT: csrr a0, vlenb +; CHECK-RV64-FV-NEXT: slli a0, a0, 1 +; CHECK-RV64-FV-NEXT: mv a1, a0 +; CHECK-RV64-FV-NEXT: slli a0, a0, 2 +; CHECK-RV64-FV-NEXT: add a0, a0, a1 +; CHECK-RV64-FV-NEXT: add a0, sp, a0 +; CHECK-RV64-FV-NEXT: addi a0, a0, 16 +; CHECK-RV64-FV-NEXT: vl1r.v v21, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV64-FV-NEXT: csrr a0, vlenb +; CHECK-RV64-FV-NEXT: slli a1, a0, 3 +; CHECK-RV64-FV-NEXT: add a0, a1, a0 +; CHECK-RV64-FV-NEXT: add a0, sp, a0 +; CHECK-RV64-FV-NEXT: addi a0, a0, 16 +; CHECK-RV64-FV-NEXT: vl1r.v v22, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV64-FV-NEXT: csrr a0, vlenb +; CHECK-RV64-FV-NEXT: slli a0, a0, 3 +; CHECK-RV64-FV-NEXT: add a0, sp, a0 +; CHECK-RV64-FV-NEXT: addi a0, a0, 16 +; CHECK-RV64-FV-NEXT: vl1r.v v23, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV64-FV-NEXT: csrr a0, vlenb +; CHECK-RV64-FV-NEXT: slli a1, a0, 3 +; CHECK-RV64-FV-NEXT: sub a0, a1, a0 +; CHECK-RV64-FV-NEXT: add a0, sp, a0 +; CHECK-RV64-FV-NEXT: addi a0, a0, 16 +; CHECK-RV64-FV-NEXT: vl1r.v v24, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV64-FV-NEXT: csrr a0, vlenb +; CHECK-RV64-FV-NEXT: slli a0, a0, 1 +; CHECK-RV64-FV-NEXT: mv a1, a0 +; CHECK-RV64-FV-NEXT: slli a0, a0, 1 +; CHECK-RV64-FV-NEXT: add a0, a0, a1 +; CHECK-RV64-FV-NEXT: add a0, sp, a0 +; CHECK-RV64-FV-NEXT: addi a0, a0, 16 +; CHECK-RV64-FV-NEXT: vl1r.v v25, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV64-FV-NEXT: csrr a0, vlenb +; CHECK-RV64-FV-NEXT: slli a1, a0, 2 +; CHECK-RV64-FV-NEXT: add a0, a1, a0 +; CHECK-RV64-FV-NEXT: add a0, sp, a0 +; CHECK-RV64-FV-NEXT: addi a0, a0, 16 +; CHECK-RV64-FV-NEXT: vl1r.v v26, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV64-FV-NEXT: csrr a0, vlenb +; CHECK-RV64-FV-NEXT: slli a0, a0, 2 +; CHECK-RV64-FV-NEXT: add a0, sp, a0 +; CHECK-RV64-FV-NEXT: addi a0, a0, 16 +; CHECK-RV64-FV-NEXT: vl1r.v v27, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV64-FV-NEXT: csrr a0, vlenb +; CHECK-RV64-FV-NEXT: slli a1, a0, 1 +; CHECK-RV64-FV-NEXT: add a0, a1, a0 +; CHECK-RV64-FV-NEXT: add a0, sp, a0 +; CHECK-RV64-FV-NEXT: addi a0, a0, 16 +; CHECK-RV64-FV-NEXT: vl1r.v v28, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV64-FV-NEXT: csrr a0, vlenb +; CHECK-RV64-FV-NEXT: slli a0, a0, 1 +; CHECK-RV64-FV-NEXT: add a0, sp, a0 +; CHECK-RV64-FV-NEXT: addi a0, a0, 16 +; CHECK-RV64-FV-NEXT: vl1r.v v29, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV64-FV-NEXT: csrr a0, vlenb +; CHECK-RV64-FV-NEXT: add a0, sp, a0 +; CHECK-RV64-FV-NEXT: addi a0, a0, 16 +; CHECK-RV64-FV-NEXT: vl1r.v v30, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV64-FV-NEXT: addi a0, sp, 16 +; CHECK-RV64-FV-NEXT: vl1r.v v31, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV64-FV-NEXT: csrr a0, vlenb +; CHECK-RV64-FV-NEXT: slli a0, a0, 5 +; CHECK-RV64-FV-NEXT: add sp, sp, a0 +; CHECK-RV64-FV-NEXT: ld ra, 216(sp) # 8-byte Folded Reload +; CHECK-RV64-FV-NEXT: ld t0, 208(sp) # 8-byte Folded Reload +; CHECK-RV64-FV-NEXT: ld t1, 200(sp) # 8-byte Folded Reload +; CHECK-RV64-FV-NEXT: ld t2, 192(sp) # 8-byte Folded Reload +; CHECK-RV64-FV-NEXT: ld a0, 184(sp) # 8-byte Folded Reload +; CHECK-RV64-FV-NEXT: ld a1, 176(sp) # 8-byte Folded Reload +; CHECK-RV64-FV-NEXT: ld a2, 168(sp) # 8-byte Folded Reload +; CHECK-RV64-FV-NEXT: ld a3, 160(sp) # 8-byte Folded Reload +; CHECK-RV64-FV-NEXT: ld a4, 152(sp) # 8-byte Folded Reload +; CHECK-RV64-FV-NEXT: ld a5, 144(sp) # 8-byte Folded Reload +; CHECK-RV64-FV-NEXT: ld a6, 136(sp) # 8-byte Folded Reload +; CHECK-RV64-FV-NEXT: ld a7, 128(sp) # 8-byte Folded Reload +; CHECK-RV64-FV-NEXT: ld t3, 120(sp) # 8-byte Folded Reload +; CHECK-RV64-FV-NEXT: ld t4, 112(sp) # 8-byte Folded Reload +; CHECK-RV64-FV-NEXT: ld t5, 104(sp) # 8-byte Folded Reload +; CHECK-RV64-FV-NEXT: ld t6, 96(sp) # 8-byte Folded Reload +; CHECK-RV64-FV-NEXT: flw ft0, 92(sp) # 4-byte Folded Reload +; CHECK-RV64-FV-NEXT: flw ft1, 88(sp) # 4-byte Folded Reload +; CHECK-RV64-FV-NEXT: flw ft2, 84(sp) # 4-byte Folded Reload +; CHECK-RV64-FV-NEXT: flw ft3, 80(sp) # 4-byte Folded Reload +; CHECK-RV64-FV-NEXT: flw ft4, 76(sp) # 4-byte Folded Reload +; CHECK-RV64-FV-NEXT: flw ft5, 72(sp) # 4-byte Folded Reload +; CHECK-RV64-FV-NEXT: flw ft6, 68(sp) # 4-byte Folded Reload +; CHECK-RV64-FV-NEXT: flw ft7, 64(sp) # 4-byte Folded Reload +; CHECK-RV64-FV-NEXT: flw fa0, 60(sp) # 4-byte Folded Reload +; CHECK-RV64-FV-NEXT: flw fa1, 56(sp) # 4-byte Folded Reload +; CHECK-RV64-FV-NEXT: flw fa2, 52(sp) # 4-byte Folded Reload +; CHECK-RV64-FV-NEXT: flw fa3, 48(sp) # 4-byte Folded Reload +; CHECK-RV64-FV-NEXT: flw fa4, 44(sp) # 4-byte Folded Reload +; CHECK-RV64-FV-NEXT: flw fa5, 40(sp) # 4-byte Folded Reload +; CHECK-RV64-FV-NEXT: flw fa6, 36(sp) # 4-byte Folded Reload +; CHECK-RV64-FV-NEXT: flw fa7, 32(sp) # 4-byte Folded Reload +; CHECK-RV64-FV-NEXT: flw ft8, 28(sp) # 4-byte Folded Reload +; CHECK-RV64-FV-NEXT: flw ft9, 24(sp) # 4-byte Folded Reload +; CHECK-RV64-FV-NEXT: flw ft10, 20(sp) # 4-byte Folded Reload +; CHECK-RV64-FV-NEXT: flw ft11, 16(sp) # 4-byte Folded Reload +; CHECK-RV64-FV-NEXT: addi sp, sp, 224 +; CHECK-RV64-FV-NEXT: mret +; +; CHECK-RV64-FDV-LABEL: foo_with_call: +; CHECK-RV64-FDV: # %bb.0: +; CHECK-RV64-FDV-NEXT: addi sp, sp, -304 +; CHECK-RV64-FDV-NEXT: sd ra, 296(sp) # 8-byte Folded Spill +; CHECK-RV64-FDV-NEXT: sd t0, 288(sp) # 8-byte Folded Spill +; CHECK-RV64-FDV-NEXT: sd t1, 280(sp) # 8-byte Folded Spill +; CHECK-RV64-FDV-NEXT: sd t2, 272(sp) # 8-byte Folded Spill +; CHECK-RV64-FDV-NEXT: sd a0, 264(sp) # 8-byte Folded Spill +; CHECK-RV64-FDV-NEXT: sd a1, 256(sp) # 8-byte Folded Spill +; CHECK-RV64-FDV-NEXT: sd a2, 248(sp) # 8-byte Folded Spill +; CHECK-RV64-FDV-NEXT: sd a3, 240(sp) # 8-byte Folded Spill +; CHECK-RV64-FDV-NEXT: sd a4, 232(sp) # 8-byte Folded Spill +; CHECK-RV64-FDV-NEXT: sd a5, 224(sp) # 8-byte Folded Spill +; CHECK-RV64-FDV-NEXT: sd a6, 216(sp) # 8-byte Folded Spill +; CHECK-RV64-FDV-NEXT: sd a7, 208(sp) # 8-byte Folded Spill +; CHECK-RV64-FDV-NEXT: sd t3, 200(sp) # 8-byte Folded Spill +; CHECK-RV64-FDV-NEXT: sd t4, 192(sp) # 8-byte Folded Spill +; CHECK-RV64-FDV-NEXT: sd t5, 184(sp) # 8-byte Folded Spill +; CHECK-RV64-FDV-NEXT: sd t6, 176(sp) # 8-byte Folded Spill +; CHECK-RV64-FDV-NEXT: fsd ft0, 168(sp) # 8-byte Folded Spill +; CHECK-RV64-FDV-NEXT: fsd ft1, 160(sp) # 8-byte Folded Spill +; CHECK-RV64-FDV-NEXT: fsd ft2, 152(sp) # 8-byte Folded Spill +; CHECK-RV64-FDV-NEXT: fsd ft3, 144(sp) # 8-byte Folded Spill +; CHECK-RV64-FDV-NEXT: fsd ft4, 136(sp) # 8-byte Folded Spill +; CHECK-RV64-FDV-NEXT: fsd ft5, 128(sp) # 8-byte Folded Spill +; CHECK-RV64-FDV-NEXT: fsd ft6, 120(sp) # 8-byte Folded Spill +; CHECK-RV64-FDV-NEXT: fsd ft7, 112(sp) # 8-byte Folded Spill +; CHECK-RV64-FDV-NEXT: fsd fa0, 104(sp) # 8-byte Folded Spill +; CHECK-RV64-FDV-NEXT: fsd fa1, 96(sp) # 8-byte Folded Spill +; CHECK-RV64-FDV-NEXT: fsd fa2, 88(sp) # 8-byte Folded Spill +; CHECK-RV64-FDV-NEXT: fsd fa3, 80(sp) # 8-byte Folded Spill +; CHECK-RV64-FDV-NEXT: fsd fa4, 72(sp) # 8-byte Folded Spill +; CHECK-RV64-FDV-NEXT: fsd fa5, 64(sp) # 8-byte Folded Spill +; CHECK-RV64-FDV-NEXT: fsd fa6, 56(sp) # 8-byte Folded Spill +; CHECK-RV64-FDV-NEXT: fsd fa7, 48(sp) # 8-byte Folded Spill +; CHECK-RV64-FDV-NEXT: fsd ft8, 40(sp) # 8-byte Folded Spill +; CHECK-RV64-FDV-NEXT: fsd ft9, 32(sp) # 8-byte Folded Spill +; CHECK-RV64-FDV-NEXT: fsd ft10, 24(sp) # 8-byte Folded Spill +; CHECK-RV64-FDV-NEXT: fsd ft11, 16(sp) # 8-byte Folded Spill +; CHECK-RV64-FDV-NEXT: csrr a0, vlenb +; CHECK-RV64-FDV-NEXT: slli a0, a0, 5 +; CHECK-RV64-FDV-NEXT: sub sp, sp, a0 +; CHECK-RV64-FDV-NEXT: csrr a0, vlenb +; CHECK-RV64-FDV-NEXT: slli a1, a0, 5 +; CHECK-RV64-FDV-NEXT: sub a0, a1, a0 +; CHECK-RV64-FDV-NEXT: add a0, sp, a0 +; CHECK-RV64-FDV-NEXT: addi a0, a0, 16 +; CHECK-RV64-FDV-NEXT: vs1r.v v0, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV64-FDV-NEXT: csrr a0, vlenb +; CHECK-RV64-FDV-NEXT: slli a0, a0, 1 +; CHECK-RV64-FDV-NEXT: mv a1, a0 +; CHECK-RV64-FDV-NEXT: slli a0, a0, 1 +; CHECK-RV64-FDV-NEXT: add a1, a1, a0 +; CHECK-RV64-FDV-NEXT: slli a0, a0, 1 +; CHECK-RV64-FDV-NEXT: add a1, a1, a0 +; CHECK-RV64-FDV-NEXT: slli a0, a0, 1 +; CHECK-RV64-FDV-NEXT: add a0, a0, a1 +; CHECK-RV64-FDV-NEXT: add a0, sp, a0 +; CHECK-RV64-FDV-NEXT: addi a0, a0, 16 +; CHECK-RV64-FDV-NEXT: vs1r.v v1, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV64-FDV-NEXT: csrr a0, vlenb +; CHECK-RV64-FDV-NEXT: mv a1, a0 +; CHECK-RV64-FDV-NEXT: slli a0, a0, 2 +; CHECK-RV64-FDV-NEXT: add a1, a1, a0 +; CHECK-RV64-FDV-NEXT: slli a0, a0, 1 +; CHECK-RV64-FDV-NEXT: add a1, a1, a0 +; CHECK-RV64-FDV-NEXT: slli a0, a0, 1 +; CHECK-RV64-FDV-NEXT: add a0, a0, a1 +; CHECK-RV64-FDV-NEXT: add a0, sp, a0 +; CHECK-RV64-FDV-NEXT: addi a0, a0, 16 +; CHECK-RV64-FDV-NEXT: vs1r.v v2, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV64-FDV-NEXT: csrr a0, vlenb +; CHECK-RV64-FDV-NEXT: slli a0, a0, 2 +; CHECK-RV64-FDV-NEXT: mv a1, a0 +; CHECK-RV64-FDV-NEXT: slli a0, a0, 1 +; CHECK-RV64-FDV-NEXT: add a1, a1, a0 +; CHECK-RV64-FDV-NEXT: slli a0, a0, 1 +; CHECK-RV64-FDV-NEXT: add a0, a0, a1 +; CHECK-RV64-FDV-NEXT: add a0, sp, a0 +; CHECK-RV64-FDV-NEXT: addi a0, a0, 16 +; CHECK-RV64-FDV-NEXT: vs1r.v v3, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV64-FDV-NEXT: csrr a0, vlenb +; CHECK-RV64-FDV-NEXT: mv a1, a0 +; CHECK-RV64-FDV-NEXT: slli a0, a0, 1 +; CHECK-RV64-FDV-NEXT: add a1, a1, a0 +; CHECK-RV64-FDV-NEXT: slli a0, a0, 2 +; CHECK-RV64-FDV-NEXT: add a1, a1, a0 +; CHECK-RV64-FDV-NEXT: slli a0, a0, 1 +; CHECK-RV64-FDV-NEXT: add a0, a0, a1 +; CHECK-RV64-FDV-NEXT: add a0, sp, a0 +; CHECK-RV64-FDV-NEXT: addi a0, a0, 16 +; CHECK-RV64-FDV-NEXT: vs1r.v v4, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV64-FDV-NEXT: csrr a0, vlenb +; CHECK-RV64-FDV-NEXT: slli a0, a0, 1 +; CHECK-RV64-FDV-NEXT: mv a1, a0 +; CHECK-RV64-FDV-NEXT: slli a0, a0, 2 +; CHECK-RV64-FDV-NEXT: add a1, a1, a0 +; CHECK-RV64-FDV-NEXT: slli a0, a0, 1 +; CHECK-RV64-FDV-NEXT: add a0, a0, a1 +; CHECK-RV64-FDV-NEXT: add a0, sp, a0 +; CHECK-RV64-FDV-NEXT: addi a0, a0, 16 +; CHECK-RV64-FDV-NEXT: vs1r.v v5, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV64-FDV-NEXT: csrr a0, vlenb +; CHECK-RV64-FDV-NEXT: mv a1, a0 +; CHECK-RV64-FDV-NEXT: slli a0, a0, 3 +; CHECK-RV64-FDV-NEXT: add a1, a1, a0 +; CHECK-RV64-FDV-NEXT: slli a0, a0, 1 +; CHECK-RV64-FDV-NEXT: add a0, a0, a1 +; CHECK-RV64-FDV-NEXT: add a0, sp, a0 +; CHECK-RV64-FDV-NEXT: addi a0, a0, 16 +; CHECK-RV64-FDV-NEXT: vs1r.v v6, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV64-FDV-NEXT: csrr a0, vlenb +; CHECK-RV64-FDV-NEXT: slli a0, a0, 3 +; CHECK-RV64-FDV-NEXT: mv a1, a0 +; CHECK-RV64-FDV-NEXT: slli a0, a0, 1 +; CHECK-RV64-FDV-NEXT: add a0, a0, a1 +; CHECK-RV64-FDV-NEXT: add a0, sp, a0 +; CHECK-RV64-FDV-NEXT: addi a0, a0, 16 +; CHECK-RV64-FDV-NEXT: vs1r.v v7, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV64-FDV-NEXT: csrr a0, vlenb +; CHECK-RV64-FDV-NEXT: mv a1, a0 +; CHECK-RV64-FDV-NEXT: slli a0, a0, 1 +; CHECK-RV64-FDV-NEXT: add a1, a1, a0 +; CHECK-RV64-FDV-NEXT: slli a0, a0, 1 +; CHECK-RV64-FDV-NEXT: add a1, a1, a0 +; CHECK-RV64-FDV-NEXT: slli a0, a0, 2 +; CHECK-RV64-FDV-NEXT: add a0, a0, a1 +; CHECK-RV64-FDV-NEXT: add a0, sp, a0 +; CHECK-RV64-FDV-NEXT: addi a0, a0, 16 +; CHECK-RV64-FDV-NEXT: vs1r.v v8, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV64-FDV-NEXT: csrr a0, vlenb +; CHECK-RV64-FDV-NEXT: slli a0, a0, 1 +; CHECK-RV64-FDV-NEXT: mv a1, a0 +; CHECK-RV64-FDV-NEXT: slli a0, a0, 1 +; CHECK-RV64-FDV-NEXT: add a1, a1, a0 +; CHECK-RV64-FDV-NEXT: slli a0, a0, 2 +; CHECK-RV64-FDV-NEXT: add a0, a0, a1 +; CHECK-RV64-FDV-NEXT: add a0, sp, a0 +; CHECK-RV64-FDV-NEXT: addi a0, a0, 16 +; CHECK-RV64-FDV-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV64-FDV-NEXT: csrr a0, vlenb +; CHECK-RV64-FDV-NEXT: mv a1, a0 +; CHECK-RV64-FDV-NEXT: slli a0, a0, 2 +; CHECK-RV64-FDV-NEXT: add a1, a1, a0 +; CHECK-RV64-FDV-NEXT: slli a0, a0, 2 +; CHECK-RV64-FDV-NEXT: add a0, a0, a1 +; CHECK-RV64-FDV-NEXT: add a0, sp, a0 +; CHECK-RV64-FDV-NEXT: addi a0, a0, 16 +; CHECK-RV64-FDV-NEXT: vs1r.v v10, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV64-FDV-NEXT: csrr a0, vlenb +; CHECK-RV64-FDV-NEXT: slli a0, a0, 2 +; CHECK-RV64-FDV-NEXT: mv a1, a0 +; CHECK-RV64-FDV-NEXT: slli a0, a0, 2 +; CHECK-RV64-FDV-NEXT: add a0, a0, a1 +; CHECK-RV64-FDV-NEXT: add a0, sp, a0 +; CHECK-RV64-FDV-NEXT: addi a0, a0, 16 +; CHECK-RV64-FDV-NEXT: vs1r.v v11, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV64-FDV-NEXT: csrr a0, vlenb +; CHECK-RV64-FDV-NEXT: mv a1, a0 +; CHECK-RV64-FDV-NEXT: slli a0, a0, 1 +; CHECK-RV64-FDV-NEXT: add a1, a1, a0 +; CHECK-RV64-FDV-NEXT: slli a0, a0, 3 +; CHECK-RV64-FDV-NEXT: add a0, a0, a1 +; CHECK-RV64-FDV-NEXT: add a0, sp, a0 +; CHECK-RV64-FDV-NEXT: addi a0, a0, 16 +; CHECK-RV64-FDV-NEXT: vs1r.v v12, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV64-FDV-NEXT: csrr a0, vlenb +; CHECK-RV64-FDV-NEXT: slli a0, a0, 1 +; CHECK-RV64-FDV-NEXT: mv a1, a0 +; CHECK-RV64-FDV-NEXT: slli a0, a0, 3 +; CHECK-RV64-FDV-NEXT: add a0, a0, a1 +; CHECK-RV64-FDV-NEXT: add a0, sp, a0 +; CHECK-RV64-FDV-NEXT: addi a0, a0, 16 +; CHECK-RV64-FDV-NEXT: vs1r.v v13, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV64-FDV-NEXT: csrr a0, vlenb +; CHECK-RV64-FDV-NEXT: slli a1, a0, 4 +; CHECK-RV64-FDV-NEXT: add a0, a1, a0 +; CHECK-RV64-FDV-NEXT: add a0, sp, a0 +; CHECK-RV64-FDV-NEXT: addi a0, a0, 16 +; CHECK-RV64-FDV-NEXT: vs1r.v v14, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV64-FDV-NEXT: csrr a0, vlenb +; CHECK-RV64-FDV-NEXT: slli a0, a0, 4 +; CHECK-RV64-FDV-NEXT: add a0, sp, a0 +; CHECK-RV64-FDV-NEXT: addi a0, a0, 16 +; CHECK-RV64-FDV-NEXT: vs1r.v v15, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV64-FDV-NEXT: csrr a0, vlenb +; CHECK-RV64-FDV-NEXT: slli a1, a0, 4 +; CHECK-RV64-FDV-NEXT: sub a0, a1, a0 +; CHECK-RV64-FDV-NEXT: add a0, sp, a0 +; CHECK-RV64-FDV-NEXT: addi a0, a0, 16 +; CHECK-RV64-FDV-NEXT: vs1r.v v16, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV64-FDV-NEXT: csrr a0, vlenb +; CHECK-RV64-FDV-NEXT: slli a0, a0, 1 +; CHECK-RV64-FDV-NEXT: mv a1, a0 +; CHECK-RV64-FDV-NEXT: slli a0, a0, 1 +; CHECK-RV64-FDV-NEXT: add a1, a1, a0 +; CHECK-RV64-FDV-NEXT: slli a0, a0, 1 +; CHECK-RV64-FDV-NEXT: add a0, a0, a1 +; CHECK-RV64-FDV-NEXT: add a0, sp, a0 +; CHECK-RV64-FDV-NEXT: addi a0, a0, 16 +; CHECK-RV64-FDV-NEXT: vs1r.v v17, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV64-FDV-NEXT: csrr a0, vlenb +; CHECK-RV64-FDV-NEXT: mv a1, a0 +; CHECK-RV64-FDV-NEXT: slli a0, a0, 2 +; CHECK-RV64-FDV-NEXT: add a1, a1, a0 +; CHECK-RV64-FDV-NEXT: slli a0, a0, 1 +; CHECK-RV64-FDV-NEXT: add a0, a0, a1 +; CHECK-RV64-FDV-NEXT: add a0, sp, a0 +; CHECK-RV64-FDV-NEXT: addi a0, a0, 16 +; CHECK-RV64-FDV-NEXT: vs1r.v v18, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV64-FDV-NEXT: csrr a0, vlenb +; CHECK-RV64-FDV-NEXT: slli a0, a0, 2 +; CHECK-RV64-FDV-NEXT: mv a1, a0 +; CHECK-RV64-FDV-NEXT: slli a0, a0, 1 +; CHECK-RV64-FDV-NEXT: add a0, a0, a1 +; CHECK-RV64-FDV-NEXT: add a0, sp, a0 +; CHECK-RV64-FDV-NEXT: addi a0, a0, 16 +; CHECK-RV64-FDV-NEXT: vs1r.v v19, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV64-FDV-NEXT: csrr a0, vlenb +; CHECK-RV64-FDV-NEXT: mv a1, a0 +; CHECK-RV64-FDV-NEXT: slli a0, a0, 1 +; CHECK-RV64-FDV-NEXT: add a1, a1, a0 +; CHECK-RV64-FDV-NEXT: slli a0, a0, 2 +; CHECK-RV64-FDV-NEXT: add a0, a0, a1 +; CHECK-RV64-FDV-NEXT: add a0, sp, a0 +; CHECK-RV64-FDV-NEXT: addi a0, a0, 16 +; CHECK-RV64-FDV-NEXT: vs1r.v v20, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV64-FDV-NEXT: csrr a0, vlenb +; CHECK-RV64-FDV-NEXT: slli a0, a0, 1 +; CHECK-RV64-FDV-NEXT: mv a1, a0 +; CHECK-RV64-FDV-NEXT: slli a0, a0, 2 +; CHECK-RV64-FDV-NEXT: add a0, a0, a1 +; CHECK-RV64-FDV-NEXT: add a0, sp, a0 +; CHECK-RV64-FDV-NEXT: addi a0, a0, 16 +; CHECK-RV64-FDV-NEXT: vs1r.v v21, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV64-FDV-NEXT: csrr a0, vlenb +; CHECK-RV64-FDV-NEXT: slli a1, a0, 3 +; CHECK-RV64-FDV-NEXT: add a0, a1, a0 +; CHECK-RV64-FDV-NEXT: add a0, sp, a0 +; CHECK-RV64-FDV-NEXT: addi a0, a0, 16 +; CHECK-RV64-FDV-NEXT: vs1r.v v22, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV64-FDV-NEXT: csrr a0, vlenb +; CHECK-RV64-FDV-NEXT: slli a0, a0, 3 +; CHECK-RV64-FDV-NEXT: add a0, sp, a0 +; CHECK-RV64-FDV-NEXT: addi a0, a0, 16 +; CHECK-RV64-FDV-NEXT: vs1r.v v23, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV64-FDV-NEXT: csrr a0, vlenb +; CHECK-RV64-FDV-NEXT: slli a1, a0, 3 +; CHECK-RV64-FDV-NEXT: sub a0, a1, a0 +; CHECK-RV64-FDV-NEXT: add a0, sp, a0 +; CHECK-RV64-FDV-NEXT: addi a0, a0, 16 +; CHECK-RV64-FDV-NEXT: vs1r.v v24, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV64-FDV-NEXT: csrr a0, vlenb +; CHECK-RV64-FDV-NEXT: slli a0, a0, 1 +; CHECK-RV64-FDV-NEXT: mv a1, a0 +; CHECK-RV64-FDV-NEXT: slli a0, a0, 1 +; CHECK-RV64-FDV-NEXT: add a0, a0, a1 +; CHECK-RV64-FDV-NEXT: add a0, sp, a0 +; CHECK-RV64-FDV-NEXT: addi a0, a0, 16 +; CHECK-RV64-FDV-NEXT: vs1r.v v25, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV64-FDV-NEXT: csrr a0, vlenb +; CHECK-RV64-FDV-NEXT: slli a1, a0, 2 +; CHECK-RV64-FDV-NEXT: add a0, a1, a0 +; CHECK-RV64-FDV-NEXT: add a0, sp, a0 +; CHECK-RV64-FDV-NEXT: addi a0, a0, 16 +; CHECK-RV64-FDV-NEXT: vs1r.v v26, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV64-FDV-NEXT: csrr a0, vlenb +; CHECK-RV64-FDV-NEXT: slli a0, a0, 2 +; CHECK-RV64-FDV-NEXT: add a0, sp, a0 +; CHECK-RV64-FDV-NEXT: addi a0, a0, 16 +; CHECK-RV64-FDV-NEXT: vs1r.v v27, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV64-FDV-NEXT: csrr a0, vlenb +; CHECK-RV64-FDV-NEXT: slli a1, a0, 1 +; CHECK-RV64-FDV-NEXT: add a0, a1, a0 +; CHECK-RV64-FDV-NEXT: add a0, sp, a0 +; CHECK-RV64-FDV-NEXT: addi a0, a0, 16 +; CHECK-RV64-FDV-NEXT: vs1r.v v28, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV64-FDV-NEXT: csrr a0, vlenb +; CHECK-RV64-FDV-NEXT: slli a0, a0, 1 +; CHECK-RV64-FDV-NEXT: add a0, sp, a0 +; CHECK-RV64-FDV-NEXT: addi a0, a0, 16 +; CHECK-RV64-FDV-NEXT: vs1r.v v29, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV64-FDV-NEXT: csrr a0, vlenb +; CHECK-RV64-FDV-NEXT: add a0, sp, a0 +; CHECK-RV64-FDV-NEXT: addi a0, a0, 16 +; CHECK-RV64-FDV-NEXT: vs1r.v v30, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV64-FDV-NEXT: addi a0, sp, 16 +; CHECK-RV64-FDV-NEXT: vs1r.v v31, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV64-FDV-NEXT: call otherfoo +; CHECK-RV64-FDV-NEXT: csrr a0, vlenb +; CHECK-RV64-FDV-NEXT: slli a1, a0, 5 +; CHECK-RV64-FDV-NEXT: sub a0, a1, a0 +; CHECK-RV64-FDV-NEXT: add a0, sp, a0 +; CHECK-RV64-FDV-NEXT: addi a0, a0, 16 +; CHECK-RV64-FDV-NEXT: vl1r.v v0, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV64-FDV-NEXT: csrr a0, vlenb +; CHECK-RV64-FDV-NEXT: slli a0, a0, 1 +; CHECK-RV64-FDV-NEXT: mv a1, a0 +; CHECK-RV64-FDV-NEXT: slli a0, a0, 1 +; CHECK-RV64-FDV-NEXT: add a1, a1, a0 +; CHECK-RV64-FDV-NEXT: slli a0, a0, 1 +; CHECK-RV64-FDV-NEXT: add a1, a1, a0 +; CHECK-RV64-FDV-NEXT: slli a0, a0, 1 +; CHECK-RV64-FDV-NEXT: add a0, a0, a1 +; CHECK-RV64-FDV-NEXT: add a0, sp, a0 +; CHECK-RV64-FDV-NEXT: addi a0, a0, 16 +; CHECK-RV64-FDV-NEXT: vl1r.v v1, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV64-FDV-NEXT: csrr a0, vlenb +; CHECK-RV64-FDV-NEXT: mv a1, a0 +; CHECK-RV64-FDV-NEXT: slli a0, a0, 2 +; CHECK-RV64-FDV-NEXT: add a1, a1, a0 +; CHECK-RV64-FDV-NEXT: slli a0, a0, 1 +; CHECK-RV64-FDV-NEXT: add a1, a1, a0 +; CHECK-RV64-FDV-NEXT: slli a0, a0, 1 +; CHECK-RV64-FDV-NEXT: add a0, a0, a1 +; CHECK-RV64-FDV-NEXT: add a0, sp, a0 +; CHECK-RV64-FDV-NEXT: addi a0, a0, 16 +; CHECK-RV64-FDV-NEXT: vl1r.v v2, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV64-FDV-NEXT: csrr a0, vlenb +; CHECK-RV64-FDV-NEXT: slli a0, a0, 2 +; CHECK-RV64-FDV-NEXT: mv a1, a0 +; CHECK-RV64-FDV-NEXT: slli a0, a0, 1 +; CHECK-RV64-FDV-NEXT: add a1, a1, a0 +; CHECK-RV64-FDV-NEXT: slli a0, a0, 1 +; CHECK-RV64-FDV-NEXT: add a0, a0, a1 +; CHECK-RV64-FDV-NEXT: add a0, sp, a0 +; CHECK-RV64-FDV-NEXT: addi a0, a0, 16 +; CHECK-RV64-FDV-NEXT: vl1r.v v3, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV64-FDV-NEXT: csrr a0, vlenb +; CHECK-RV64-FDV-NEXT: mv a1, a0 +; CHECK-RV64-FDV-NEXT: slli a0, a0, 1 +; CHECK-RV64-FDV-NEXT: add a1, a1, a0 +; CHECK-RV64-FDV-NEXT: slli a0, a0, 2 +; CHECK-RV64-FDV-NEXT: add a1, a1, a0 +; CHECK-RV64-FDV-NEXT: slli a0, a0, 1 +; CHECK-RV64-FDV-NEXT: add a0, a0, a1 +; CHECK-RV64-FDV-NEXT: add a0, sp, a0 +; CHECK-RV64-FDV-NEXT: addi a0, a0, 16 +; CHECK-RV64-FDV-NEXT: vl1r.v v4, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV64-FDV-NEXT: csrr a0, vlenb +; CHECK-RV64-FDV-NEXT: slli a0, a0, 1 +; CHECK-RV64-FDV-NEXT: mv a1, a0 +; CHECK-RV64-FDV-NEXT: slli a0, a0, 2 +; CHECK-RV64-FDV-NEXT: add a1, a1, a0 +; CHECK-RV64-FDV-NEXT: slli a0, a0, 1 +; CHECK-RV64-FDV-NEXT: add a0, a0, a1 +; CHECK-RV64-FDV-NEXT: add a0, sp, a0 +; CHECK-RV64-FDV-NEXT: addi a0, a0, 16 +; CHECK-RV64-FDV-NEXT: vl1r.v v5, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV64-FDV-NEXT: csrr a0, vlenb +; CHECK-RV64-FDV-NEXT: mv a1, a0 +; CHECK-RV64-FDV-NEXT: slli a0, a0, 3 +; CHECK-RV64-FDV-NEXT: add a1, a1, a0 +; CHECK-RV64-FDV-NEXT: slli a0, a0, 1 +; CHECK-RV64-FDV-NEXT: add a0, a0, a1 +; CHECK-RV64-FDV-NEXT: add a0, sp, a0 +; CHECK-RV64-FDV-NEXT: addi a0, a0, 16 +; CHECK-RV64-FDV-NEXT: vl1r.v v6, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV64-FDV-NEXT: csrr a0, vlenb +; CHECK-RV64-FDV-NEXT: slli a0, a0, 3 +; CHECK-RV64-FDV-NEXT: mv a1, a0 +; CHECK-RV64-FDV-NEXT: slli a0, a0, 1 +; CHECK-RV64-FDV-NEXT: add a0, a0, a1 +; CHECK-RV64-FDV-NEXT: add a0, sp, a0 +; CHECK-RV64-FDV-NEXT: addi a0, a0, 16 +; CHECK-RV64-FDV-NEXT: vl1r.v v7, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV64-FDV-NEXT: csrr a0, vlenb +; CHECK-RV64-FDV-NEXT: mv a1, a0 +; CHECK-RV64-FDV-NEXT: slli a0, a0, 1 +; CHECK-RV64-FDV-NEXT: add a1, a1, a0 +; CHECK-RV64-FDV-NEXT: slli a0, a0, 1 +; CHECK-RV64-FDV-NEXT: add a1, a1, a0 +; CHECK-RV64-FDV-NEXT: slli a0, a0, 2 +; CHECK-RV64-FDV-NEXT: add a0, a0, a1 +; CHECK-RV64-FDV-NEXT: add a0, sp, a0 +; CHECK-RV64-FDV-NEXT: addi a0, a0, 16 +; CHECK-RV64-FDV-NEXT: vl1r.v v8, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV64-FDV-NEXT: csrr a0, vlenb +; CHECK-RV64-FDV-NEXT: slli a0, a0, 1 +; CHECK-RV64-FDV-NEXT: mv a1, a0 +; CHECK-RV64-FDV-NEXT: slli a0, a0, 1 +; CHECK-RV64-FDV-NEXT: add a1, a1, a0 +; CHECK-RV64-FDV-NEXT: slli a0, a0, 2 +; CHECK-RV64-FDV-NEXT: add a0, a0, a1 +; CHECK-RV64-FDV-NEXT: add a0, sp, a0 +; CHECK-RV64-FDV-NEXT: addi a0, a0, 16 +; CHECK-RV64-FDV-NEXT: vl1r.v v9, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV64-FDV-NEXT: csrr a0, vlenb +; CHECK-RV64-FDV-NEXT: mv a1, a0 +; CHECK-RV64-FDV-NEXT: slli a0, a0, 2 +; CHECK-RV64-FDV-NEXT: add a1, a1, a0 +; CHECK-RV64-FDV-NEXT: slli a0, a0, 2 +; CHECK-RV64-FDV-NEXT: add a0, a0, a1 +; CHECK-RV64-FDV-NEXT: add a0, sp, a0 +; CHECK-RV64-FDV-NEXT: addi a0, a0, 16 +; CHECK-RV64-FDV-NEXT: vl1r.v v10, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV64-FDV-NEXT: csrr a0, vlenb +; CHECK-RV64-FDV-NEXT: slli a0, a0, 2 +; CHECK-RV64-FDV-NEXT: mv a1, a0 +; CHECK-RV64-FDV-NEXT: slli a0, a0, 2 +; CHECK-RV64-FDV-NEXT: add a0, a0, a1 +; CHECK-RV64-FDV-NEXT: add a0, sp, a0 +; CHECK-RV64-FDV-NEXT: addi a0, a0, 16 +; CHECK-RV64-FDV-NEXT: vl1r.v v11, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV64-FDV-NEXT: csrr a0, vlenb +; CHECK-RV64-FDV-NEXT: mv a1, a0 +; CHECK-RV64-FDV-NEXT: slli a0, a0, 1 +; CHECK-RV64-FDV-NEXT: add a1, a1, a0 +; CHECK-RV64-FDV-NEXT: slli a0, a0, 3 +; CHECK-RV64-FDV-NEXT: add a0, a0, a1 +; CHECK-RV64-FDV-NEXT: add a0, sp, a0 +; CHECK-RV64-FDV-NEXT: addi a0, a0, 16 +; CHECK-RV64-FDV-NEXT: vl1r.v v12, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV64-FDV-NEXT: csrr a0, vlenb +; CHECK-RV64-FDV-NEXT: slli a0, a0, 1 +; CHECK-RV64-FDV-NEXT: mv a1, a0 +; CHECK-RV64-FDV-NEXT: slli a0, a0, 3 +; CHECK-RV64-FDV-NEXT: add a0, a0, a1 +; CHECK-RV64-FDV-NEXT: add a0, sp, a0 +; CHECK-RV64-FDV-NEXT: addi a0, a0, 16 +; CHECK-RV64-FDV-NEXT: vl1r.v v13, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV64-FDV-NEXT: csrr a0, vlenb +; CHECK-RV64-FDV-NEXT: slli a1, a0, 4 +; CHECK-RV64-FDV-NEXT: add a0, a1, a0 +; CHECK-RV64-FDV-NEXT: add a0, sp, a0 +; CHECK-RV64-FDV-NEXT: addi a0, a0, 16 +; CHECK-RV64-FDV-NEXT: vl1r.v v14, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV64-FDV-NEXT: csrr a0, vlenb +; CHECK-RV64-FDV-NEXT: slli a0, a0, 4 +; CHECK-RV64-FDV-NEXT: add a0, sp, a0 +; CHECK-RV64-FDV-NEXT: addi a0, a0, 16 +; CHECK-RV64-FDV-NEXT: vl1r.v v15, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV64-FDV-NEXT: csrr a0, vlenb +; CHECK-RV64-FDV-NEXT: slli a1, a0, 4 +; CHECK-RV64-FDV-NEXT: sub a0, a1, a0 +; CHECK-RV64-FDV-NEXT: add a0, sp, a0 +; CHECK-RV64-FDV-NEXT: addi a0, a0, 16 +; CHECK-RV64-FDV-NEXT: vl1r.v v16, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV64-FDV-NEXT: csrr a0, vlenb +; CHECK-RV64-FDV-NEXT: slli a0, a0, 1 +; CHECK-RV64-FDV-NEXT: mv a1, a0 +; CHECK-RV64-FDV-NEXT: slli a0, a0, 1 +; CHECK-RV64-FDV-NEXT: add a1, a1, a0 +; CHECK-RV64-FDV-NEXT: slli a0, a0, 1 +; CHECK-RV64-FDV-NEXT: add a0, a0, a1 +; CHECK-RV64-FDV-NEXT: add a0, sp, a0 +; CHECK-RV64-FDV-NEXT: addi a0, a0, 16 +; CHECK-RV64-FDV-NEXT: vl1r.v v17, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV64-FDV-NEXT: csrr a0, vlenb +; CHECK-RV64-FDV-NEXT: mv a1, a0 +; CHECK-RV64-FDV-NEXT: slli a0, a0, 2 +; CHECK-RV64-FDV-NEXT: add a1, a1, a0 +; CHECK-RV64-FDV-NEXT: slli a0, a0, 1 +; CHECK-RV64-FDV-NEXT: add a0, a0, a1 +; CHECK-RV64-FDV-NEXT: add a0, sp, a0 +; CHECK-RV64-FDV-NEXT: addi a0, a0, 16 +; CHECK-RV64-FDV-NEXT: vl1r.v v18, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV64-FDV-NEXT: csrr a0, vlenb +; CHECK-RV64-FDV-NEXT: slli a0, a0, 2 +; CHECK-RV64-FDV-NEXT: mv a1, a0 +; CHECK-RV64-FDV-NEXT: slli a0, a0, 1 +; CHECK-RV64-FDV-NEXT: add a0, a0, a1 +; CHECK-RV64-FDV-NEXT: add a0, sp, a0 +; CHECK-RV64-FDV-NEXT: addi a0, a0, 16 +; CHECK-RV64-FDV-NEXT: vl1r.v v19, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV64-FDV-NEXT: csrr a0, vlenb +; CHECK-RV64-FDV-NEXT: mv a1, a0 +; CHECK-RV64-FDV-NEXT: slli a0, a0, 1 +; CHECK-RV64-FDV-NEXT: add a1, a1, a0 +; CHECK-RV64-FDV-NEXT: slli a0, a0, 2 +; CHECK-RV64-FDV-NEXT: add a0, a0, a1 +; CHECK-RV64-FDV-NEXT: add a0, sp, a0 +; CHECK-RV64-FDV-NEXT: addi a0, a0, 16 +; CHECK-RV64-FDV-NEXT: vl1r.v v20, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV64-FDV-NEXT: csrr a0, vlenb +; CHECK-RV64-FDV-NEXT: slli a0, a0, 1 +; CHECK-RV64-FDV-NEXT: mv a1, a0 +; CHECK-RV64-FDV-NEXT: slli a0, a0, 2 +; CHECK-RV64-FDV-NEXT: add a0, a0, a1 +; CHECK-RV64-FDV-NEXT: add a0, sp, a0 +; CHECK-RV64-FDV-NEXT: addi a0, a0, 16 +; CHECK-RV64-FDV-NEXT: vl1r.v v21, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV64-FDV-NEXT: csrr a0, vlenb +; CHECK-RV64-FDV-NEXT: slli a1, a0, 3 +; CHECK-RV64-FDV-NEXT: add a0, a1, a0 +; CHECK-RV64-FDV-NEXT: add a0, sp, a0 +; CHECK-RV64-FDV-NEXT: addi a0, a0, 16 +; CHECK-RV64-FDV-NEXT: vl1r.v v22, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV64-FDV-NEXT: csrr a0, vlenb +; CHECK-RV64-FDV-NEXT: slli a0, a0, 3 +; CHECK-RV64-FDV-NEXT: add a0, sp, a0 +; CHECK-RV64-FDV-NEXT: addi a0, a0, 16 +; CHECK-RV64-FDV-NEXT: vl1r.v v23, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV64-FDV-NEXT: csrr a0, vlenb +; CHECK-RV64-FDV-NEXT: slli a1, a0, 3 +; CHECK-RV64-FDV-NEXT: sub a0, a1, a0 +; CHECK-RV64-FDV-NEXT: add a0, sp, a0 +; CHECK-RV64-FDV-NEXT: addi a0, a0, 16 +; CHECK-RV64-FDV-NEXT: vl1r.v v24, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV64-FDV-NEXT: csrr a0, vlenb +; CHECK-RV64-FDV-NEXT: slli a0, a0, 1 +; CHECK-RV64-FDV-NEXT: mv a1, a0 +; CHECK-RV64-FDV-NEXT: slli a0, a0, 1 +; CHECK-RV64-FDV-NEXT: add a0, a0, a1 +; CHECK-RV64-FDV-NEXT: add a0, sp, a0 +; CHECK-RV64-FDV-NEXT: addi a0, a0, 16 +; CHECK-RV64-FDV-NEXT: vl1r.v v25, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV64-FDV-NEXT: csrr a0, vlenb +; CHECK-RV64-FDV-NEXT: slli a1, a0, 2 +; CHECK-RV64-FDV-NEXT: add a0, a1, a0 +; CHECK-RV64-FDV-NEXT: add a0, sp, a0 +; CHECK-RV64-FDV-NEXT: addi a0, a0, 16 +; CHECK-RV64-FDV-NEXT: vl1r.v v26, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV64-FDV-NEXT: csrr a0, vlenb +; CHECK-RV64-FDV-NEXT: slli a0, a0, 2 +; CHECK-RV64-FDV-NEXT: add a0, sp, a0 +; CHECK-RV64-FDV-NEXT: addi a0, a0, 16 +; CHECK-RV64-FDV-NEXT: vl1r.v v27, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV64-FDV-NEXT: csrr a0, vlenb +; CHECK-RV64-FDV-NEXT: slli a1, a0, 1 +; CHECK-RV64-FDV-NEXT: add a0, a1, a0 +; CHECK-RV64-FDV-NEXT: add a0, sp, a0 +; CHECK-RV64-FDV-NEXT: addi a0, a0, 16 +; CHECK-RV64-FDV-NEXT: vl1r.v v28, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV64-FDV-NEXT: csrr a0, vlenb +; CHECK-RV64-FDV-NEXT: slli a0, a0, 1 +; CHECK-RV64-FDV-NEXT: add a0, sp, a0 +; CHECK-RV64-FDV-NEXT: addi a0, a0, 16 +; CHECK-RV64-FDV-NEXT: vl1r.v v29, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV64-FDV-NEXT: csrr a0, vlenb +; CHECK-RV64-FDV-NEXT: add a0, sp, a0 +; CHECK-RV64-FDV-NEXT: addi a0, a0, 16 +; CHECK-RV64-FDV-NEXT: vl1r.v v30, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV64-FDV-NEXT: addi a0, sp, 16 +; CHECK-RV64-FDV-NEXT: vl1r.v v31, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV64-FDV-NEXT: csrr a0, vlenb +; CHECK-RV64-FDV-NEXT: slli a0, a0, 5 +; CHECK-RV64-FDV-NEXT: add sp, sp, a0 +; CHECK-RV64-FDV-NEXT: ld ra, 296(sp) # 8-byte Folded Reload +; CHECK-RV64-FDV-NEXT: ld t0, 288(sp) # 8-byte Folded Reload +; CHECK-RV64-FDV-NEXT: ld t1, 280(sp) # 8-byte Folded Reload +; CHECK-RV64-FDV-NEXT: ld t2, 272(sp) # 8-byte Folded Reload +; CHECK-RV64-FDV-NEXT: ld a0, 264(sp) # 8-byte Folded Reload +; CHECK-RV64-FDV-NEXT: ld a1, 256(sp) # 8-byte Folded Reload +; CHECK-RV64-FDV-NEXT: ld a2, 248(sp) # 8-byte Folded Reload +; CHECK-RV64-FDV-NEXT: ld a3, 240(sp) # 8-byte Folded Reload +; CHECK-RV64-FDV-NEXT: ld a4, 232(sp) # 8-byte Folded Reload +; CHECK-RV64-FDV-NEXT: ld a5, 224(sp) # 8-byte Folded Reload +; CHECK-RV64-FDV-NEXT: ld a6, 216(sp) # 8-byte Folded Reload +; CHECK-RV64-FDV-NEXT: ld a7, 208(sp) # 8-byte Folded Reload +; CHECK-RV64-FDV-NEXT: ld t3, 200(sp) # 8-byte Folded Reload +; CHECK-RV64-FDV-NEXT: ld t4, 192(sp) # 8-byte Folded Reload +; CHECK-RV64-FDV-NEXT: ld t5, 184(sp) # 8-byte Folded Reload +; CHECK-RV64-FDV-NEXT: ld t6, 176(sp) # 8-byte Folded Reload +; CHECK-RV64-FDV-NEXT: fld ft0, 168(sp) # 8-byte Folded Reload +; CHECK-RV64-FDV-NEXT: fld ft1, 160(sp) # 8-byte Folded Reload +; CHECK-RV64-FDV-NEXT: fld ft2, 152(sp) # 8-byte Folded Reload +; CHECK-RV64-FDV-NEXT: fld ft3, 144(sp) # 8-byte Folded Reload +; CHECK-RV64-FDV-NEXT: fld ft4, 136(sp) # 8-byte Folded Reload +; CHECK-RV64-FDV-NEXT: fld ft5, 128(sp) # 8-byte Folded Reload +; CHECK-RV64-FDV-NEXT: fld ft6, 120(sp) # 8-byte Folded Reload +; CHECK-RV64-FDV-NEXT: fld ft7, 112(sp) # 8-byte Folded Reload +; CHECK-RV64-FDV-NEXT: fld fa0, 104(sp) # 8-byte Folded Reload +; CHECK-RV64-FDV-NEXT: fld fa1, 96(sp) # 8-byte Folded Reload +; CHECK-RV64-FDV-NEXT: fld fa2, 88(sp) # 8-byte Folded Reload +; CHECK-RV64-FDV-NEXT: fld fa3, 80(sp) # 8-byte Folded Reload +; CHECK-RV64-FDV-NEXT: fld fa4, 72(sp) # 8-byte Folded Reload +; CHECK-RV64-FDV-NEXT: fld fa5, 64(sp) # 8-byte Folded Reload +; CHECK-RV64-FDV-NEXT: fld fa6, 56(sp) # 8-byte Folded Reload +; CHECK-RV64-FDV-NEXT: fld fa7, 48(sp) # 8-byte Folded Reload +; CHECK-RV64-FDV-NEXT: fld ft8, 40(sp) # 8-byte Folded Reload +; CHECK-RV64-FDV-NEXT: fld ft9, 32(sp) # 8-byte Folded Reload +; CHECK-RV64-FDV-NEXT: fld ft10, 24(sp) # 8-byte Folded Reload +; CHECK-RV64-FDV-NEXT: fld ft11, 16(sp) # 8-byte Folded Reload +; CHECK-RV64-FDV-NEXT: addi sp, sp, 304 +; CHECK-RV64-FDV-NEXT: mret %call = call i32 @otherfoo() ret void } @@ -2249,6 +5645,1718 @@ define void @foo_fp_with_call() #2 { ; CHECK-RV32E-F-NEXT: addi sp, sp, 172 ; CHECK-RV32E-F-NEXT: mret ; +; CHECK-RV32-V-LABEL: foo_fp_with_call: +; CHECK-RV32-V: # %bb.0: +; CHECK-RV32-V-NEXT: addi sp, sp, -80 +; CHECK-RV32-V-NEXT: sw ra, 76(sp) # 4-byte Folded Spill +; CHECK-RV32-V-NEXT: sw t0, 72(sp) # 4-byte Folded Spill +; CHECK-RV32-V-NEXT: sw t1, 68(sp) # 4-byte Folded Spill +; CHECK-RV32-V-NEXT: sw t2, 64(sp) # 4-byte Folded Spill +; CHECK-RV32-V-NEXT: sw s0, 60(sp) # 4-byte Folded Spill +; CHECK-RV32-V-NEXT: sw a0, 56(sp) # 4-byte Folded Spill +; CHECK-RV32-V-NEXT: sw a1, 52(sp) # 4-byte Folded Spill +; CHECK-RV32-V-NEXT: sw a2, 48(sp) # 4-byte Folded Spill +; CHECK-RV32-V-NEXT: sw a3, 44(sp) # 4-byte Folded Spill +; CHECK-RV32-V-NEXT: sw a4, 40(sp) # 4-byte Folded Spill +; CHECK-RV32-V-NEXT: sw a5, 36(sp) # 4-byte Folded Spill +; CHECK-RV32-V-NEXT: sw a6, 32(sp) # 4-byte Folded Spill +; CHECK-RV32-V-NEXT: sw a7, 28(sp) # 4-byte Folded Spill +; CHECK-RV32-V-NEXT: sw t3, 24(sp) # 4-byte Folded Spill +; CHECK-RV32-V-NEXT: sw t4, 20(sp) # 4-byte Folded Spill +; CHECK-RV32-V-NEXT: sw t5, 16(sp) # 4-byte Folded Spill +; CHECK-RV32-V-NEXT: sw t6, 12(sp) # 4-byte Folded Spill +; CHECK-RV32-V-NEXT: addi s0, sp, 80 +; CHECK-RV32-V-NEXT: csrr a0, vlenb +; CHECK-RV32-V-NEXT: slli a0, a0, 5 +; CHECK-RV32-V-NEXT: sub sp, sp, a0 +; CHECK-RV32-V-NEXT: csrr a0, vlenb +; CHECK-RV32-V-NEXT: sub a0, s0, a0 +; CHECK-RV32-V-NEXT: addi a0, a0, -80 +; CHECK-RV32-V-NEXT: vs1r.v v0, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV32-V-NEXT: csrr a0, vlenb +; CHECK-RV32-V-NEXT: slli a0, a0, 1 +; CHECK-RV32-V-NEXT: sub a0, s0, a0 +; CHECK-RV32-V-NEXT: addi a0, a0, -80 +; CHECK-RV32-V-NEXT: vs1r.v v1, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV32-V-NEXT: csrr a0, vlenb +; CHECK-RV32-V-NEXT: slli a1, a0, 1 +; CHECK-RV32-V-NEXT: add a0, a1, a0 +; CHECK-RV32-V-NEXT: sub a0, s0, a0 +; CHECK-RV32-V-NEXT: addi a0, a0, -80 +; CHECK-RV32-V-NEXT: vs1r.v v2, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV32-V-NEXT: csrr a0, vlenb +; CHECK-RV32-V-NEXT: slli a0, a0, 2 +; CHECK-RV32-V-NEXT: sub a0, s0, a0 +; CHECK-RV32-V-NEXT: addi a0, a0, -80 +; CHECK-RV32-V-NEXT: vs1r.v v3, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV32-V-NEXT: csrr a0, vlenb +; CHECK-RV32-V-NEXT: slli a1, a0, 2 +; CHECK-RV32-V-NEXT: add a0, a1, a0 +; CHECK-RV32-V-NEXT: sub a0, s0, a0 +; CHECK-RV32-V-NEXT: addi a0, a0, -80 +; CHECK-RV32-V-NEXT: vs1r.v v4, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV32-V-NEXT: csrr a0, vlenb +; CHECK-RV32-V-NEXT: slli a0, a0, 1 +; CHECK-RV32-V-NEXT: mv a1, a0 +; CHECK-RV32-V-NEXT: slli a0, a0, 1 +; CHECK-RV32-V-NEXT: add a0, a0, a1 +; CHECK-RV32-V-NEXT: sub a0, s0, a0 +; CHECK-RV32-V-NEXT: addi a0, a0, -80 +; CHECK-RV32-V-NEXT: vs1r.v v5, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV32-V-NEXT: csrr a0, vlenb +; CHECK-RV32-V-NEXT: slli a1, a0, 3 +; CHECK-RV32-V-NEXT: sub a0, a1, a0 +; CHECK-RV32-V-NEXT: sub a0, s0, a0 +; CHECK-RV32-V-NEXT: addi a0, a0, -80 +; CHECK-RV32-V-NEXT: vs1r.v v6, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV32-V-NEXT: csrr a0, vlenb +; CHECK-RV32-V-NEXT: slli a0, a0, 3 +; CHECK-RV32-V-NEXT: sub a0, s0, a0 +; CHECK-RV32-V-NEXT: addi a0, a0, -80 +; CHECK-RV32-V-NEXT: vs1r.v v7, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV32-V-NEXT: csrr a0, vlenb +; CHECK-RV32-V-NEXT: slli a1, a0, 3 +; CHECK-RV32-V-NEXT: add a0, a1, a0 +; CHECK-RV32-V-NEXT: sub a0, s0, a0 +; CHECK-RV32-V-NEXT: addi a0, a0, -80 +; CHECK-RV32-V-NEXT: vs1r.v v8, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV32-V-NEXT: csrr a0, vlenb +; CHECK-RV32-V-NEXT: slli a0, a0, 1 +; CHECK-RV32-V-NEXT: mv a1, a0 +; CHECK-RV32-V-NEXT: slli a0, a0, 2 +; CHECK-RV32-V-NEXT: add a0, a0, a1 +; CHECK-RV32-V-NEXT: sub a0, s0, a0 +; CHECK-RV32-V-NEXT: addi a0, a0, -80 +; CHECK-RV32-V-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV32-V-NEXT: csrr a0, vlenb +; CHECK-RV32-V-NEXT: mv a1, a0 +; CHECK-RV32-V-NEXT: slli a0, a0, 1 +; CHECK-RV32-V-NEXT: add a1, a1, a0 +; CHECK-RV32-V-NEXT: slli a0, a0, 2 +; CHECK-RV32-V-NEXT: add a0, a0, a1 +; CHECK-RV32-V-NEXT: sub a0, s0, a0 +; CHECK-RV32-V-NEXT: addi a0, a0, -80 +; CHECK-RV32-V-NEXT: vs1r.v v10, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV32-V-NEXT: csrr a0, vlenb +; CHECK-RV32-V-NEXT: slli a0, a0, 2 +; CHECK-RV32-V-NEXT: mv a1, a0 +; CHECK-RV32-V-NEXT: slli a0, a0, 1 +; CHECK-RV32-V-NEXT: add a0, a0, a1 +; CHECK-RV32-V-NEXT: sub a0, s0, a0 +; CHECK-RV32-V-NEXT: addi a0, a0, -80 +; CHECK-RV32-V-NEXT: vs1r.v v11, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV32-V-NEXT: csrr a0, vlenb +; CHECK-RV32-V-NEXT: mv a1, a0 +; CHECK-RV32-V-NEXT: slli a0, a0, 2 +; CHECK-RV32-V-NEXT: add a1, a1, a0 +; CHECK-RV32-V-NEXT: slli a0, a0, 1 +; CHECK-RV32-V-NEXT: add a0, a0, a1 +; CHECK-RV32-V-NEXT: sub a0, s0, a0 +; CHECK-RV32-V-NEXT: addi a0, a0, -80 +; CHECK-RV32-V-NEXT: vs1r.v v12, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV32-V-NEXT: csrr a0, vlenb +; CHECK-RV32-V-NEXT: slli a0, a0, 1 +; CHECK-RV32-V-NEXT: mv a1, a0 +; CHECK-RV32-V-NEXT: slli a0, a0, 1 +; CHECK-RV32-V-NEXT: add a1, a1, a0 +; CHECK-RV32-V-NEXT: slli a0, a0, 1 +; CHECK-RV32-V-NEXT: add a0, a0, a1 +; CHECK-RV32-V-NEXT: sub a0, s0, a0 +; CHECK-RV32-V-NEXT: addi a0, a0, -80 +; CHECK-RV32-V-NEXT: vs1r.v v13, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV32-V-NEXT: csrr a0, vlenb +; CHECK-RV32-V-NEXT: slli a1, a0, 4 +; CHECK-RV32-V-NEXT: sub a0, a1, a0 +; CHECK-RV32-V-NEXT: sub a0, s0, a0 +; CHECK-RV32-V-NEXT: addi a0, a0, -80 +; CHECK-RV32-V-NEXT: vs1r.v v14, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV32-V-NEXT: csrr a0, vlenb +; CHECK-RV32-V-NEXT: slli a0, a0, 4 +; CHECK-RV32-V-NEXT: sub a0, s0, a0 +; CHECK-RV32-V-NEXT: addi a0, a0, -80 +; CHECK-RV32-V-NEXT: vs1r.v v15, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV32-V-NEXT: csrr a0, vlenb +; CHECK-RV32-V-NEXT: slli a1, a0, 4 +; CHECK-RV32-V-NEXT: add a0, a1, a0 +; CHECK-RV32-V-NEXT: sub a0, s0, a0 +; CHECK-RV32-V-NEXT: addi a0, a0, -80 +; CHECK-RV32-V-NEXT: vs1r.v v16, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV32-V-NEXT: csrr a0, vlenb +; CHECK-RV32-V-NEXT: slli a0, a0, 1 +; CHECK-RV32-V-NEXT: mv a1, a0 +; CHECK-RV32-V-NEXT: slli a0, a0, 3 +; CHECK-RV32-V-NEXT: add a0, a0, a1 +; CHECK-RV32-V-NEXT: sub a0, s0, a0 +; CHECK-RV32-V-NEXT: addi a0, a0, -80 +; CHECK-RV32-V-NEXT: vs1r.v v17, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV32-V-NEXT: csrr a0, vlenb +; CHECK-RV32-V-NEXT: mv a1, a0 +; CHECK-RV32-V-NEXT: slli a0, a0, 1 +; CHECK-RV32-V-NEXT: add a1, a1, a0 +; CHECK-RV32-V-NEXT: slli a0, a0, 3 +; CHECK-RV32-V-NEXT: add a0, a0, a1 +; CHECK-RV32-V-NEXT: sub a0, s0, a0 +; CHECK-RV32-V-NEXT: addi a0, a0, -80 +; CHECK-RV32-V-NEXT: vs1r.v v18, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV32-V-NEXT: csrr a0, vlenb +; CHECK-RV32-V-NEXT: slli a0, a0, 2 +; CHECK-RV32-V-NEXT: mv a1, a0 +; CHECK-RV32-V-NEXT: slli a0, a0, 2 +; CHECK-RV32-V-NEXT: add a0, a0, a1 +; CHECK-RV32-V-NEXT: sub a0, s0, a0 +; CHECK-RV32-V-NEXT: addi a0, a0, -80 +; CHECK-RV32-V-NEXT: vs1r.v v19, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV32-V-NEXT: csrr a0, vlenb +; CHECK-RV32-V-NEXT: mv a1, a0 +; CHECK-RV32-V-NEXT: slli a0, a0, 2 +; CHECK-RV32-V-NEXT: add a1, a1, a0 +; CHECK-RV32-V-NEXT: slli a0, a0, 2 +; CHECK-RV32-V-NEXT: add a0, a0, a1 +; CHECK-RV32-V-NEXT: sub a0, s0, a0 +; CHECK-RV32-V-NEXT: addi a0, a0, -80 +; CHECK-RV32-V-NEXT: vs1r.v v20, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV32-V-NEXT: csrr a0, vlenb +; CHECK-RV32-V-NEXT: slli a0, a0, 1 +; CHECK-RV32-V-NEXT: mv a1, a0 +; CHECK-RV32-V-NEXT: slli a0, a0, 1 +; CHECK-RV32-V-NEXT: add a1, a1, a0 +; CHECK-RV32-V-NEXT: slli a0, a0, 2 +; CHECK-RV32-V-NEXT: add a0, a0, a1 +; CHECK-RV32-V-NEXT: sub a0, s0, a0 +; CHECK-RV32-V-NEXT: addi a0, a0, -80 +; CHECK-RV32-V-NEXT: vs1r.v v21, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV32-V-NEXT: csrr a0, vlenb +; CHECK-RV32-V-NEXT: mv a1, a0 +; CHECK-RV32-V-NEXT: slli a0, a0, 1 +; CHECK-RV32-V-NEXT: add a1, a1, a0 +; CHECK-RV32-V-NEXT: slli a0, a0, 1 +; CHECK-RV32-V-NEXT: add a1, a1, a0 +; CHECK-RV32-V-NEXT: slli a0, a0, 2 +; CHECK-RV32-V-NEXT: add a0, a0, a1 +; CHECK-RV32-V-NEXT: sub a0, s0, a0 +; CHECK-RV32-V-NEXT: addi a0, a0, -80 +; CHECK-RV32-V-NEXT: vs1r.v v22, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV32-V-NEXT: csrr a0, vlenb +; CHECK-RV32-V-NEXT: slli a0, a0, 3 +; CHECK-RV32-V-NEXT: mv a1, a0 +; CHECK-RV32-V-NEXT: slli a0, a0, 1 +; CHECK-RV32-V-NEXT: add a0, a0, a1 +; CHECK-RV32-V-NEXT: sub a0, s0, a0 +; CHECK-RV32-V-NEXT: addi a0, a0, -80 +; CHECK-RV32-V-NEXT: vs1r.v v23, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV32-V-NEXT: csrr a0, vlenb +; CHECK-RV32-V-NEXT: mv a1, a0 +; CHECK-RV32-V-NEXT: slli a0, a0, 3 +; CHECK-RV32-V-NEXT: add a1, a1, a0 +; CHECK-RV32-V-NEXT: slli a0, a0, 1 +; CHECK-RV32-V-NEXT: add a0, a0, a1 +; CHECK-RV32-V-NEXT: sub a0, s0, a0 +; CHECK-RV32-V-NEXT: addi a0, a0, -80 +; CHECK-RV32-V-NEXT: vs1r.v v24, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV32-V-NEXT: csrr a0, vlenb +; CHECK-RV32-V-NEXT: slli a0, a0, 1 +; CHECK-RV32-V-NEXT: mv a1, a0 +; CHECK-RV32-V-NEXT: slli a0, a0, 2 +; CHECK-RV32-V-NEXT: add a1, a1, a0 +; CHECK-RV32-V-NEXT: slli a0, a0, 1 +; CHECK-RV32-V-NEXT: add a0, a0, a1 +; CHECK-RV32-V-NEXT: sub a0, s0, a0 +; CHECK-RV32-V-NEXT: addi a0, a0, -80 +; CHECK-RV32-V-NEXT: vs1r.v v25, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV32-V-NEXT: csrr a0, vlenb +; CHECK-RV32-V-NEXT: mv a1, a0 +; CHECK-RV32-V-NEXT: slli a0, a0, 1 +; CHECK-RV32-V-NEXT: add a1, a1, a0 +; CHECK-RV32-V-NEXT: slli a0, a0, 2 +; CHECK-RV32-V-NEXT: add a1, a1, a0 +; CHECK-RV32-V-NEXT: slli a0, a0, 1 +; CHECK-RV32-V-NEXT: add a0, a0, a1 +; CHECK-RV32-V-NEXT: sub a0, s0, a0 +; CHECK-RV32-V-NEXT: addi a0, a0, -80 +; CHECK-RV32-V-NEXT: vs1r.v v26, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV32-V-NEXT: csrr a0, vlenb +; CHECK-RV32-V-NEXT: slli a0, a0, 2 +; CHECK-RV32-V-NEXT: mv a1, a0 +; CHECK-RV32-V-NEXT: slli a0, a0, 1 +; CHECK-RV32-V-NEXT: add a1, a1, a0 +; CHECK-RV32-V-NEXT: slli a0, a0, 1 +; CHECK-RV32-V-NEXT: add a0, a0, a1 +; CHECK-RV32-V-NEXT: sub a0, s0, a0 +; CHECK-RV32-V-NEXT: addi a0, a0, -80 +; CHECK-RV32-V-NEXT: vs1r.v v27, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV32-V-NEXT: csrr a0, vlenb +; CHECK-RV32-V-NEXT: mv a1, a0 +; CHECK-RV32-V-NEXT: slli a0, a0, 2 +; CHECK-RV32-V-NEXT: add a1, a1, a0 +; CHECK-RV32-V-NEXT: slli a0, a0, 1 +; CHECK-RV32-V-NEXT: add a1, a1, a0 +; CHECK-RV32-V-NEXT: slli a0, a0, 1 +; CHECK-RV32-V-NEXT: add a0, a0, a1 +; CHECK-RV32-V-NEXT: sub a0, s0, a0 +; CHECK-RV32-V-NEXT: addi a0, a0, -80 +; CHECK-RV32-V-NEXT: vs1r.v v28, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV32-V-NEXT: csrr a0, vlenb +; CHECK-RV32-V-NEXT: slli a0, a0, 1 +; CHECK-RV32-V-NEXT: mv a1, a0 +; CHECK-RV32-V-NEXT: slli a0, a0, 1 +; CHECK-RV32-V-NEXT: add a1, a1, a0 +; CHECK-RV32-V-NEXT: slli a0, a0, 1 +; CHECK-RV32-V-NEXT: add a1, a1, a0 +; CHECK-RV32-V-NEXT: slli a0, a0, 1 +; CHECK-RV32-V-NEXT: add a0, a0, a1 +; CHECK-RV32-V-NEXT: sub a0, s0, a0 +; CHECK-RV32-V-NEXT: addi a0, a0, -80 +; CHECK-RV32-V-NEXT: vs1r.v v29, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV32-V-NEXT: csrr a0, vlenb +; CHECK-RV32-V-NEXT: slli a1, a0, 5 +; CHECK-RV32-V-NEXT: sub a0, a1, a0 +; CHECK-RV32-V-NEXT: sub a0, s0, a0 +; CHECK-RV32-V-NEXT: addi a0, a0, -80 +; CHECK-RV32-V-NEXT: vs1r.v v30, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV32-V-NEXT: csrr a0, vlenb +; CHECK-RV32-V-NEXT: slli a0, a0, 5 +; CHECK-RV32-V-NEXT: sub a0, s0, a0 +; CHECK-RV32-V-NEXT: addi a0, a0, -80 +; CHECK-RV32-V-NEXT: vs1r.v v31, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV32-V-NEXT: call otherfoo +; CHECK-RV32-V-NEXT: csrr a0, vlenb +; CHECK-RV32-V-NEXT: sub a0, s0, a0 +; CHECK-RV32-V-NEXT: addi a0, a0, -80 +; CHECK-RV32-V-NEXT: vl1r.v v0, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV32-V-NEXT: csrr a0, vlenb +; CHECK-RV32-V-NEXT: slli a0, a0, 1 +; CHECK-RV32-V-NEXT: sub a0, s0, a0 +; CHECK-RV32-V-NEXT: addi a0, a0, -80 +; CHECK-RV32-V-NEXT: vl1r.v v1, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV32-V-NEXT: csrr a0, vlenb +; CHECK-RV32-V-NEXT: slli a1, a0, 1 +; CHECK-RV32-V-NEXT: add a0, a1, a0 +; CHECK-RV32-V-NEXT: sub a0, s0, a0 +; CHECK-RV32-V-NEXT: addi a0, a0, -80 +; CHECK-RV32-V-NEXT: vl1r.v v2, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV32-V-NEXT: csrr a0, vlenb +; CHECK-RV32-V-NEXT: slli a0, a0, 2 +; CHECK-RV32-V-NEXT: sub a0, s0, a0 +; CHECK-RV32-V-NEXT: addi a0, a0, -80 +; CHECK-RV32-V-NEXT: vl1r.v v3, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV32-V-NEXT: csrr a0, vlenb +; CHECK-RV32-V-NEXT: slli a1, a0, 2 +; CHECK-RV32-V-NEXT: add a0, a1, a0 +; CHECK-RV32-V-NEXT: sub a0, s0, a0 +; CHECK-RV32-V-NEXT: addi a0, a0, -80 +; CHECK-RV32-V-NEXT: vl1r.v v4, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV32-V-NEXT: csrr a0, vlenb +; CHECK-RV32-V-NEXT: slli a0, a0, 1 +; CHECK-RV32-V-NEXT: mv a1, a0 +; CHECK-RV32-V-NEXT: slli a0, a0, 1 +; CHECK-RV32-V-NEXT: add a0, a0, a1 +; CHECK-RV32-V-NEXT: sub a0, s0, a0 +; CHECK-RV32-V-NEXT: addi a0, a0, -80 +; CHECK-RV32-V-NEXT: vl1r.v v5, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV32-V-NEXT: csrr a0, vlenb +; CHECK-RV32-V-NEXT: slli a1, a0, 3 +; CHECK-RV32-V-NEXT: sub a0, a1, a0 +; CHECK-RV32-V-NEXT: sub a0, s0, a0 +; CHECK-RV32-V-NEXT: addi a0, a0, -80 +; CHECK-RV32-V-NEXT: vl1r.v v6, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV32-V-NEXT: csrr a0, vlenb +; CHECK-RV32-V-NEXT: slli a0, a0, 3 +; CHECK-RV32-V-NEXT: sub a0, s0, a0 +; CHECK-RV32-V-NEXT: addi a0, a0, -80 +; CHECK-RV32-V-NEXT: vl1r.v v7, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV32-V-NEXT: csrr a0, vlenb +; CHECK-RV32-V-NEXT: slli a1, a0, 3 +; CHECK-RV32-V-NEXT: add a0, a1, a0 +; CHECK-RV32-V-NEXT: sub a0, s0, a0 +; CHECK-RV32-V-NEXT: addi a0, a0, -80 +; CHECK-RV32-V-NEXT: vl1r.v v8, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV32-V-NEXT: csrr a0, vlenb +; CHECK-RV32-V-NEXT: slli a0, a0, 1 +; CHECK-RV32-V-NEXT: mv a1, a0 +; CHECK-RV32-V-NEXT: slli a0, a0, 2 +; CHECK-RV32-V-NEXT: add a0, a0, a1 +; CHECK-RV32-V-NEXT: sub a0, s0, a0 +; CHECK-RV32-V-NEXT: addi a0, a0, -80 +; CHECK-RV32-V-NEXT: vl1r.v v9, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV32-V-NEXT: csrr a0, vlenb +; CHECK-RV32-V-NEXT: mv a1, a0 +; CHECK-RV32-V-NEXT: slli a0, a0, 1 +; CHECK-RV32-V-NEXT: add a1, a1, a0 +; CHECK-RV32-V-NEXT: slli a0, a0, 2 +; CHECK-RV32-V-NEXT: add a0, a0, a1 +; CHECK-RV32-V-NEXT: sub a0, s0, a0 +; CHECK-RV32-V-NEXT: addi a0, a0, -80 +; CHECK-RV32-V-NEXT: vl1r.v v10, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV32-V-NEXT: csrr a0, vlenb +; CHECK-RV32-V-NEXT: slli a0, a0, 2 +; CHECK-RV32-V-NEXT: mv a1, a0 +; CHECK-RV32-V-NEXT: slli a0, a0, 1 +; CHECK-RV32-V-NEXT: add a0, a0, a1 +; CHECK-RV32-V-NEXT: sub a0, s0, a0 +; CHECK-RV32-V-NEXT: addi a0, a0, -80 +; CHECK-RV32-V-NEXT: vl1r.v v11, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV32-V-NEXT: csrr a0, vlenb +; CHECK-RV32-V-NEXT: mv a1, a0 +; CHECK-RV32-V-NEXT: slli a0, a0, 2 +; CHECK-RV32-V-NEXT: add a1, a1, a0 +; CHECK-RV32-V-NEXT: slli a0, a0, 1 +; CHECK-RV32-V-NEXT: add a0, a0, a1 +; CHECK-RV32-V-NEXT: sub a0, s0, a0 +; CHECK-RV32-V-NEXT: addi a0, a0, -80 +; CHECK-RV32-V-NEXT: vl1r.v v12, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV32-V-NEXT: csrr a0, vlenb +; CHECK-RV32-V-NEXT: slli a0, a0, 1 +; CHECK-RV32-V-NEXT: mv a1, a0 +; CHECK-RV32-V-NEXT: slli a0, a0, 1 +; CHECK-RV32-V-NEXT: add a1, a1, a0 +; CHECK-RV32-V-NEXT: slli a0, a0, 1 +; CHECK-RV32-V-NEXT: add a0, a0, a1 +; CHECK-RV32-V-NEXT: sub a0, s0, a0 +; CHECK-RV32-V-NEXT: addi a0, a0, -80 +; CHECK-RV32-V-NEXT: vl1r.v v13, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV32-V-NEXT: csrr a0, vlenb +; CHECK-RV32-V-NEXT: slli a1, a0, 4 +; CHECK-RV32-V-NEXT: sub a0, a1, a0 +; CHECK-RV32-V-NEXT: sub a0, s0, a0 +; CHECK-RV32-V-NEXT: addi a0, a0, -80 +; CHECK-RV32-V-NEXT: vl1r.v v14, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV32-V-NEXT: csrr a0, vlenb +; CHECK-RV32-V-NEXT: slli a0, a0, 4 +; CHECK-RV32-V-NEXT: sub a0, s0, a0 +; CHECK-RV32-V-NEXT: addi a0, a0, -80 +; CHECK-RV32-V-NEXT: vl1r.v v15, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV32-V-NEXT: csrr a0, vlenb +; CHECK-RV32-V-NEXT: slli a1, a0, 4 +; CHECK-RV32-V-NEXT: add a0, a1, a0 +; CHECK-RV32-V-NEXT: sub a0, s0, a0 +; CHECK-RV32-V-NEXT: addi a0, a0, -80 +; CHECK-RV32-V-NEXT: vl1r.v v16, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV32-V-NEXT: csrr a0, vlenb +; CHECK-RV32-V-NEXT: slli a0, a0, 1 +; CHECK-RV32-V-NEXT: mv a1, a0 +; CHECK-RV32-V-NEXT: slli a0, a0, 3 +; CHECK-RV32-V-NEXT: add a0, a0, a1 +; CHECK-RV32-V-NEXT: sub a0, s0, a0 +; CHECK-RV32-V-NEXT: addi a0, a0, -80 +; CHECK-RV32-V-NEXT: vl1r.v v17, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV32-V-NEXT: csrr a0, vlenb +; CHECK-RV32-V-NEXT: mv a1, a0 +; CHECK-RV32-V-NEXT: slli a0, a0, 1 +; CHECK-RV32-V-NEXT: add a1, a1, a0 +; CHECK-RV32-V-NEXT: slli a0, a0, 3 +; CHECK-RV32-V-NEXT: add a0, a0, a1 +; CHECK-RV32-V-NEXT: sub a0, s0, a0 +; CHECK-RV32-V-NEXT: addi a0, a0, -80 +; CHECK-RV32-V-NEXT: vl1r.v v18, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV32-V-NEXT: csrr a0, vlenb +; CHECK-RV32-V-NEXT: slli a0, a0, 2 +; CHECK-RV32-V-NEXT: mv a1, a0 +; CHECK-RV32-V-NEXT: slli a0, a0, 2 +; CHECK-RV32-V-NEXT: add a0, a0, a1 +; CHECK-RV32-V-NEXT: sub a0, s0, a0 +; CHECK-RV32-V-NEXT: addi a0, a0, -80 +; CHECK-RV32-V-NEXT: vl1r.v v19, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV32-V-NEXT: csrr a0, vlenb +; CHECK-RV32-V-NEXT: mv a1, a0 +; CHECK-RV32-V-NEXT: slli a0, a0, 2 +; CHECK-RV32-V-NEXT: add a1, a1, a0 +; CHECK-RV32-V-NEXT: slli a0, a0, 2 +; CHECK-RV32-V-NEXT: add a0, a0, a1 +; CHECK-RV32-V-NEXT: sub a0, s0, a0 +; CHECK-RV32-V-NEXT: addi a0, a0, -80 +; CHECK-RV32-V-NEXT: vl1r.v v20, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV32-V-NEXT: csrr a0, vlenb +; CHECK-RV32-V-NEXT: slli a0, a0, 1 +; CHECK-RV32-V-NEXT: mv a1, a0 +; CHECK-RV32-V-NEXT: slli a0, a0, 1 +; CHECK-RV32-V-NEXT: add a1, a1, a0 +; CHECK-RV32-V-NEXT: slli a0, a0, 2 +; CHECK-RV32-V-NEXT: add a0, a0, a1 +; CHECK-RV32-V-NEXT: sub a0, s0, a0 +; CHECK-RV32-V-NEXT: addi a0, a0, -80 +; CHECK-RV32-V-NEXT: vl1r.v v21, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV32-V-NEXT: csrr a0, vlenb +; CHECK-RV32-V-NEXT: mv a1, a0 +; CHECK-RV32-V-NEXT: slli a0, a0, 1 +; CHECK-RV32-V-NEXT: add a1, a1, a0 +; CHECK-RV32-V-NEXT: slli a0, a0, 1 +; CHECK-RV32-V-NEXT: add a1, a1, a0 +; CHECK-RV32-V-NEXT: slli a0, a0, 2 +; CHECK-RV32-V-NEXT: add a0, a0, a1 +; CHECK-RV32-V-NEXT: sub a0, s0, a0 +; CHECK-RV32-V-NEXT: addi a0, a0, -80 +; CHECK-RV32-V-NEXT: vl1r.v v22, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV32-V-NEXT: csrr a0, vlenb +; CHECK-RV32-V-NEXT: slli a0, a0, 3 +; CHECK-RV32-V-NEXT: mv a1, a0 +; CHECK-RV32-V-NEXT: slli a0, a0, 1 +; CHECK-RV32-V-NEXT: add a0, a0, a1 +; CHECK-RV32-V-NEXT: sub a0, s0, a0 +; CHECK-RV32-V-NEXT: addi a0, a0, -80 +; CHECK-RV32-V-NEXT: vl1r.v v23, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV32-V-NEXT: csrr a0, vlenb +; CHECK-RV32-V-NEXT: mv a1, a0 +; CHECK-RV32-V-NEXT: slli a0, a0, 3 +; CHECK-RV32-V-NEXT: add a1, a1, a0 +; CHECK-RV32-V-NEXT: slli a0, a0, 1 +; CHECK-RV32-V-NEXT: add a0, a0, a1 +; CHECK-RV32-V-NEXT: sub a0, s0, a0 +; CHECK-RV32-V-NEXT: addi a0, a0, -80 +; CHECK-RV32-V-NEXT: vl1r.v v24, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV32-V-NEXT: csrr a0, vlenb +; CHECK-RV32-V-NEXT: slli a0, a0, 1 +; CHECK-RV32-V-NEXT: mv a1, a0 +; CHECK-RV32-V-NEXT: slli a0, a0, 2 +; CHECK-RV32-V-NEXT: add a1, a1, a0 +; CHECK-RV32-V-NEXT: slli a0, a0, 1 +; CHECK-RV32-V-NEXT: add a0, a0, a1 +; CHECK-RV32-V-NEXT: sub a0, s0, a0 +; CHECK-RV32-V-NEXT: addi a0, a0, -80 +; CHECK-RV32-V-NEXT: vl1r.v v25, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV32-V-NEXT: csrr a0, vlenb +; CHECK-RV32-V-NEXT: mv a1, a0 +; CHECK-RV32-V-NEXT: slli a0, a0, 1 +; CHECK-RV32-V-NEXT: add a1, a1, a0 +; CHECK-RV32-V-NEXT: slli a0, a0, 2 +; CHECK-RV32-V-NEXT: add a1, a1, a0 +; CHECK-RV32-V-NEXT: slli a0, a0, 1 +; CHECK-RV32-V-NEXT: add a0, a0, a1 +; CHECK-RV32-V-NEXT: sub a0, s0, a0 +; CHECK-RV32-V-NEXT: addi a0, a0, -80 +; CHECK-RV32-V-NEXT: vl1r.v v26, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV32-V-NEXT: csrr a0, vlenb +; CHECK-RV32-V-NEXT: slli a0, a0, 2 +; CHECK-RV32-V-NEXT: mv a1, a0 +; CHECK-RV32-V-NEXT: slli a0, a0, 1 +; CHECK-RV32-V-NEXT: add a1, a1, a0 +; CHECK-RV32-V-NEXT: slli a0, a0, 1 +; CHECK-RV32-V-NEXT: add a0, a0, a1 +; CHECK-RV32-V-NEXT: sub a0, s0, a0 +; CHECK-RV32-V-NEXT: addi a0, a0, -80 +; CHECK-RV32-V-NEXT: vl1r.v v27, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV32-V-NEXT: csrr a0, vlenb +; CHECK-RV32-V-NEXT: mv a1, a0 +; CHECK-RV32-V-NEXT: slli a0, a0, 2 +; CHECK-RV32-V-NEXT: add a1, a1, a0 +; CHECK-RV32-V-NEXT: slli a0, a0, 1 +; CHECK-RV32-V-NEXT: add a1, a1, a0 +; CHECK-RV32-V-NEXT: slli a0, a0, 1 +; CHECK-RV32-V-NEXT: add a0, a0, a1 +; CHECK-RV32-V-NEXT: sub a0, s0, a0 +; CHECK-RV32-V-NEXT: addi a0, a0, -80 +; CHECK-RV32-V-NEXT: vl1r.v v28, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV32-V-NEXT: csrr a0, vlenb +; CHECK-RV32-V-NEXT: slli a0, a0, 1 +; CHECK-RV32-V-NEXT: mv a1, a0 +; CHECK-RV32-V-NEXT: slli a0, a0, 1 +; CHECK-RV32-V-NEXT: add a1, a1, a0 +; CHECK-RV32-V-NEXT: slli a0, a0, 1 +; CHECK-RV32-V-NEXT: add a1, a1, a0 +; CHECK-RV32-V-NEXT: slli a0, a0, 1 +; CHECK-RV32-V-NEXT: add a0, a0, a1 +; CHECK-RV32-V-NEXT: sub a0, s0, a0 +; CHECK-RV32-V-NEXT: addi a0, a0, -80 +; CHECK-RV32-V-NEXT: vl1r.v v29, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV32-V-NEXT: csrr a0, vlenb +; CHECK-RV32-V-NEXT: slli a1, a0, 5 +; CHECK-RV32-V-NEXT: sub a0, a1, a0 +; CHECK-RV32-V-NEXT: sub a0, s0, a0 +; CHECK-RV32-V-NEXT: addi a0, a0, -80 +; CHECK-RV32-V-NEXT: vl1r.v v30, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV32-V-NEXT: csrr a0, vlenb +; CHECK-RV32-V-NEXT: slli a0, a0, 5 +; CHECK-RV32-V-NEXT: sub a0, s0, a0 +; CHECK-RV32-V-NEXT: addi a0, a0, -80 +; CHECK-RV32-V-NEXT: vl1r.v v31, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV32-V-NEXT: addi sp, s0, -80 +; CHECK-RV32-V-NEXT: lw ra, 76(sp) # 4-byte Folded Reload +; CHECK-RV32-V-NEXT: lw t0, 72(sp) # 4-byte Folded Reload +; CHECK-RV32-V-NEXT: lw t1, 68(sp) # 4-byte Folded Reload +; CHECK-RV32-V-NEXT: lw t2, 64(sp) # 4-byte Folded Reload +; CHECK-RV32-V-NEXT: lw s0, 60(sp) # 4-byte Folded Reload +; CHECK-RV32-V-NEXT: lw a0, 56(sp) # 4-byte Folded Reload +; CHECK-RV32-V-NEXT: lw a1, 52(sp) # 4-byte Folded Reload +; CHECK-RV32-V-NEXT: lw a2, 48(sp) # 4-byte Folded Reload +; CHECK-RV32-V-NEXT: lw a3, 44(sp) # 4-byte Folded Reload +; CHECK-RV32-V-NEXT: lw a4, 40(sp) # 4-byte Folded Reload +; CHECK-RV32-V-NEXT: lw a5, 36(sp) # 4-byte Folded Reload +; CHECK-RV32-V-NEXT: lw a6, 32(sp) # 4-byte Folded Reload +; CHECK-RV32-V-NEXT: lw a7, 28(sp) # 4-byte Folded Reload +; CHECK-RV32-V-NEXT: lw t3, 24(sp) # 4-byte Folded Reload +; CHECK-RV32-V-NEXT: lw t4, 20(sp) # 4-byte Folded Reload +; CHECK-RV32-V-NEXT: lw t5, 16(sp) # 4-byte Folded Reload +; CHECK-RV32-V-NEXT: lw t6, 12(sp) # 4-byte Folded Reload +; CHECK-RV32-V-NEXT: addi sp, sp, 80 +; CHECK-RV32-V-NEXT: mret +; +; CHECK-RV32-FV-LABEL: foo_fp_with_call: +; CHECK-RV32-FV: # %bb.0: +; CHECK-RV32-FV-NEXT: addi sp, sp, -160 +; CHECK-RV32-FV-NEXT: sw ra, 156(sp) # 4-byte Folded Spill +; CHECK-RV32-FV-NEXT: sw t0, 152(sp) # 4-byte Folded Spill +; CHECK-RV32-FV-NEXT: sw t1, 148(sp) # 4-byte Folded Spill +; CHECK-RV32-FV-NEXT: sw t2, 144(sp) # 4-byte Folded Spill +; CHECK-RV32-FV-NEXT: sw s0, 140(sp) # 4-byte Folded Spill +; CHECK-RV32-FV-NEXT: sw a0, 136(sp) # 4-byte Folded Spill +; CHECK-RV32-FV-NEXT: sw a1, 132(sp) # 4-byte Folded Spill +; CHECK-RV32-FV-NEXT: sw a2, 128(sp) # 4-byte Folded Spill +; CHECK-RV32-FV-NEXT: sw a3, 124(sp) # 4-byte Folded Spill +; CHECK-RV32-FV-NEXT: sw a4, 120(sp) # 4-byte Folded Spill +; CHECK-RV32-FV-NEXT: sw a5, 116(sp) # 4-byte Folded Spill +; CHECK-RV32-FV-NEXT: sw a6, 112(sp) # 4-byte Folded Spill +; CHECK-RV32-FV-NEXT: sw a7, 108(sp) # 4-byte Folded Spill +; CHECK-RV32-FV-NEXT: sw t3, 104(sp) # 4-byte Folded Spill +; CHECK-RV32-FV-NEXT: sw t4, 100(sp) # 4-byte Folded Spill +; CHECK-RV32-FV-NEXT: sw t5, 96(sp) # 4-byte Folded Spill +; CHECK-RV32-FV-NEXT: sw t6, 92(sp) # 4-byte Folded Spill +; CHECK-RV32-FV-NEXT: fsw ft0, 88(sp) # 4-byte Folded Spill +; CHECK-RV32-FV-NEXT: fsw ft1, 84(sp) # 4-byte Folded Spill +; CHECK-RV32-FV-NEXT: fsw ft2, 80(sp) # 4-byte Folded Spill +; CHECK-RV32-FV-NEXT: fsw ft3, 76(sp) # 4-byte Folded Spill +; CHECK-RV32-FV-NEXT: fsw ft4, 72(sp) # 4-byte Folded Spill +; CHECK-RV32-FV-NEXT: fsw ft5, 68(sp) # 4-byte Folded Spill +; CHECK-RV32-FV-NEXT: fsw ft6, 64(sp) # 4-byte Folded Spill +; CHECK-RV32-FV-NEXT: fsw ft7, 60(sp) # 4-byte Folded Spill +; CHECK-RV32-FV-NEXT: fsw fa0, 56(sp) # 4-byte Folded Spill +; CHECK-RV32-FV-NEXT: fsw fa1, 52(sp) # 4-byte Folded Spill +; CHECK-RV32-FV-NEXT: fsw fa2, 48(sp) # 4-byte Folded Spill +; CHECK-RV32-FV-NEXT: fsw fa3, 44(sp) # 4-byte Folded Spill +; CHECK-RV32-FV-NEXT: fsw fa4, 40(sp) # 4-byte Folded Spill +; CHECK-RV32-FV-NEXT: fsw fa5, 36(sp) # 4-byte Folded Spill +; CHECK-RV32-FV-NEXT: fsw fa6, 32(sp) # 4-byte Folded Spill +; CHECK-RV32-FV-NEXT: fsw fa7, 28(sp) # 4-byte Folded Spill +; CHECK-RV32-FV-NEXT: fsw ft8, 24(sp) # 4-byte Folded Spill +; CHECK-RV32-FV-NEXT: fsw ft9, 20(sp) # 4-byte Folded Spill +; CHECK-RV32-FV-NEXT: fsw ft10, 16(sp) # 4-byte Folded Spill +; CHECK-RV32-FV-NEXT: fsw ft11, 12(sp) # 4-byte Folded Spill +; CHECK-RV32-FV-NEXT: addi s0, sp, 160 +; CHECK-RV32-FV-NEXT: csrr a0, vlenb +; CHECK-RV32-FV-NEXT: slli a0, a0, 5 +; CHECK-RV32-FV-NEXT: sub sp, sp, a0 +; CHECK-RV32-FV-NEXT: csrr a0, vlenb +; CHECK-RV32-FV-NEXT: sub a0, s0, a0 +; CHECK-RV32-FV-NEXT: addi a0, a0, -160 +; CHECK-RV32-FV-NEXT: vs1r.v v0, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV32-FV-NEXT: csrr a0, vlenb +; CHECK-RV32-FV-NEXT: slli a0, a0, 1 +; CHECK-RV32-FV-NEXT: sub a0, s0, a0 +; CHECK-RV32-FV-NEXT: addi a0, a0, -160 +; CHECK-RV32-FV-NEXT: vs1r.v v1, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV32-FV-NEXT: csrr a0, vlenb +; CHECK-RV32-FV-NEXT: slli a1, a0, 1 +; CHECK-RV32-FV-NEXT: add a0, a1, a0 +; CHECK-RV32-FV-NEXT: sub a0, s0, a0 +; CHECK-RV32-FV-NEXT: addi a0, a0, -160 +; CHECK-RV32-FV-NEXT: vs1r.v v2, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV32-FV-NEXT: csrr a0, vlenb +; CHECK-RV32-FV-NEXT: slli a0, a0, 2 +; CHECK-RV32-FV-NEXT: sub a0, s0, a0 +; CHECK-RV32-FV-NEXT: addi a0, a0, -160 +; CHECK-RV32-FV-NEXT: vs1r.v v3, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV32-FV-NEXT: csrr a0, vlenb +; CHECK-RV32-FV-NEXT: slli a1, a0, 2 +; CHECK-RV32-FV-NEXT: add a0, a1, a0 +; CHECK-RV32-FV-NEXT: sub a0, s0, a0 +; CHECK-RV32-FV-NEXT: addi a0, a0, -160 +; CHECK-RV32-FV-NEXT: vs1r.v v4, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV32-FV-NEXT: csrr a0, vlenb +; CHECK-RV32-FV-NEXT: slli a0, a0, 1 +; CHECK-RV32-FV-NEXT: mv a1, a0 +; CHECK-RV32-FV-NEXT: slli a0, a0, 1 +; CHECK-RV32-FV-NEXT: add a0, a0, a1 +; CHECK-RV32-FV-NEXT: sub a0, s0, a0 +; CHECK-RV32-FV-NEXT: addi a0, a0, -160 +; CHECK-RV32-FV-NEXT: vs1r.v v5, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV32-FV-NEXT: csrr a0, vlenb +; CHECK-RV32-FV-NEXT: slli a1, a0, 3 +; CHECK-RV32-FV-NEXT: sub a0, a1, a0 +; CHECK-RV32-FV-NEXT: sub a0, s0, a0 +; CHECK-RV32-FV-NEXT: addi a0, a0, -160 +; CHECK-RV32-FV-NEXT: vs1r.v v6, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV32-FV-NEXT: csrr a0, vlenb +; CHECK-RV32-FV-NEXT: slli a0, a0, 3 +; CHECK-RV32-FV-NEXT: sub a0, s0, a0 +; CHECK-RV32-FV-NEXT: addi a0, a0, -160 +; CHECK-RV32-FV-NEXT: vs1r.v v7, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV32-FV-NEXT: csrr a0, vlenb +; CHECK-RV32-FV-NEXT: slli a1, a0, 3 +; CHECK-RV32-FV-NEXT: add a0, a1, a0 +; CHECK-RV32-FV-NEXT: sub a0, s0, a0 +; CHECK-RV32-FV-NEXT: addi a0, a0, -160 +; CHECK-RV32-FV-NEXT: vs1r.v v8, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV32-FV-NEXT: csrr a0, vlenb +; CHECK-RV32-FV-NEXT: slli a0, a0, 1 +; CHECK-RV32-FV-NEXT: mv a1, a0 +; CHECK-RV32-FV-NEXT: slli a0, a0, 2 +; CHECK-RV32-FV-NEXT: add a0, a0, a1 +; CHECK-RV32-FV-NEXT: sub a0, s0, a0 +; CHECK-RV32-FV-NEXT: addi a0, a0, -160 +; CHECK-RV32-FV-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV32-FV-NEXT: csrr a0, vlenb +; CHECK-RV32-FV-NEXT: mv a1, a0 +; CHECK-RV32-FV-NEXT: slli a0, a0, 1 +; CHECK-RV32-FV-NEXT: add a1, a1, a0 +; CHECK-RV32-FV-NEXT: slli a0, a0, 2 +; CHECK-RV32-FV-NEXT: add a0, a0, a1 +; CHECK-RV32-FV-NEXT: sub a0, s0, a0 +; CHECK-RV32-FV-NEXT: addi a0, a0, -160 +; CHECK-RV32-FV-NEXT: vs1r.v v10, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV32-FV-NEXT: csrr a0, vlenb +; CHECK-RV32-FV-NEXT: slli a0, a0, 2 +; CHECK-RV32-FV-NEXT: mv a1, a0 +; CHECK-RV32-FV-NEXT: slli a0, a0, 1 +; CHECK-RV32-FV-NEXT: add a0, a0, a1 +; CHECK-RV32-FV-NEXT: sub a0, s0, a0 +; CHECK-RV32-FV-NEXT: addi a0, a0, -160 +; CHECK-RV32-FV-NEXT: vs1r.v v11, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV32-FV-NEXT: csrr a0, vlenb +; CHECK-RV32-FV-NEXT: mv a1, a0 +; CHECK-RV32-FV-NEXT: slli a0, a0, 2 +; CHECK-RV32-FV-NEXT: add a1, a1, a0 +; CHECK-RV32-FV-NEXT: slli a0, a0, 1 +; CHECK-RV32-FV-NEXT: add a0, a0, a1 +; CHECK-RV32-FV-NEXT: sub a0, s0, a0 +; CHECK-RV32-FV-NEXT: addi a0, a0, -160 +; CHECK-RV32-FV-NEXT: vs1r.v v12, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV32-FV-NEXT: csrr a0, vlenb +; CHECK-RV32-FV-NEXT: slli a0, a0, 1 +; CHECK-RV32-FV-NEXT: mv a1, a0 +; CHECK-RV32-FV-NEXT: slli a0, a0, 1 +; CHECK-RV32-FV-NEXT: add a1, a1, a0 +; CHECK-RV32-FV-NEXT: slli a0, a0, 1 +; CHECK-RV32-FV-NEXT: add a0, a0, a1 +; CHECK-RV32-FV-NEXT: sub a0, s0, a0 +; CHECK-RV32-FV-NEXT: addi a0, a0, -160 +; CHECK-RV32-FV-NEXT: vs1r.v v13, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV32-FV-NEXT: csrr a0, vlenb +; CHECK-RV32-FV-NEXT: slli a1, a0, 4 +; CHECK-RV32-FV-NEXT: sub a0, a1, a0 +; CHECK-RV32-FV-NEXT: sub a0, s0, a0 +; CHECK-RV32-FV-NEXT: addi a0, a0, -160 +; CHECK-RV32-FV-NEXT: vs1r.v v14, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV32-FV-NEXT: csrr a0, vlenb +; CHECK-RV32-FV-NEXT: slli a0, a0, 4 +; CHECK-RV32-FV-NEXT: sub a0, s0, a0 +; CHECK-RV32-FV-NEXT: addi a0, a0, -160 +; CHECK-RV32-FV-NEXT: vs1r.v v15, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV32-FV-NEXT: csrr a0, vlenb +; CHECK-RV32-FV-NEXT: slli a1, a0, 4 +; CHECK-RV32-FV-NEXT: add a0, a1, a0 +; CHECK-RV32-FV-NEXT: sub a0, s0, a0 +; CHECK-RV32-FV-NEXT: addi a0, a0, -160 +; CHECK-RV32-FV-NEXT: vs1r.v v16, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV32-FV-NEXT: csrr a0, vlenb +; CHECK-RV32-FV-NEXT: slli a0, a0, 1 +; CHECK-RV32-FV-NEXT: mv a1, a0 +; CHECK-RV32-FV-NEXT: slli a0, a0, 3 +; CHECK-RV32-FV-NEXT: add a0, a0, a1 +; CHECK-RV32-FV-NEXT: sub a0, s0, a0 +; CHECK-RV32-FV-NEXT: addi a0, a0, -160 +; CHECK-RV32-FV-NEXT: vs1r.v v17, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV32-FV-NEXT: csrr a0, vlenb +; CHECK-RV32-FV-NEXT: mv a1, a0 +; CHECK-RV32-FV-NEXT: slli a0, a0, 1 +; CHECK-RV32-FV-NEXT: add a1, a1, a0 +; CHECK-RV32-FV-NEXT: slli a0, a0, 3 +; CHECK-RV32-FV-NEXT: add a0, a0, a1 +; CHECK-RV32-FV-NEXT: sub a0, s0, a0 +; CHECK-RV32-FV-NEXT: addi a0, a0, -160 +; CHECK-RV32-FV-NEXT: vs1r.v v18, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV32-FV-NEXT: csrr a0, vlenb +; CHECK-RV32-FV-NEXT: slli a0, a0, 2 +; CHECK-RV32-FV-NEXT: mv a1, a0 +; CHECK-RV32-FV-NEXT: slli a0, a0, 2 +; CHECK-RV32-FV-NEXT: add a0, a0, a1 +; CHECK-RV32-FV-NEXT: sub a0, s0, a0 +; CHECK-RV32-FV-NEXT: addi a0, a0, -160 +; CHECK-RV32-FV-NEXT: vs1r.v v19, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV32-FV-NEXT: csrr a0, vlenb +; CHECK-RV32-FV-NEXT: mv a1, a0 +; CHECK-RV32-FV-NEXT: slli a0, a0, 2 +; CHECK-RV32-FV-NEXT: add a1, a1, a0 +; CHECK-RV32-FV-NEXT: slli a0, a0, 2 +; CHECK-RV32-FV-NEXT: add a0, a0, a1 +; CHECK-RV32-FV-NEXT: sub a0, s0, a0 +; CHECK-RV32-FV-NEXT: addi a0, a0, -160 +; CHECK-RV32-FV-NEXT: vs1r.v v20, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV32-FV-NEXT: csrr a0, vlenb +; CHECK-RV32-FV-NEXT: slli a0, a0, 1 +; CHECK-RV32-FV-NEXT: mv a1, a0 +; CHECK-RV32-FV-NEXT: slli a0, a0, 1 +; CHECK-RV32-FV-NEXT: add a1, a1, a0 +; CHECK-RV32-FV-NEXT: slli a0, a0, 2 +; CHECK-RV32-FV-NEXT: add a0, a0, a1 +; CHECK-RV32-FV-NEXT: sub a0, s0, a0 +; CHECK-RV32-FV-NEXT: addi a0, a0, -160 +; CHECK-RV32-FV-NEXT: vs1r.v v21, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV32-FV-NEXT: csrr a0, vlenb +; CHECK-RV32-FV-NEXT: mv a1, a0 +; CHECK-RV32-FV-NEXT: slli a0, a0, 1 +; CHECK-RV32-FV-NEXT: add a1, a1, a0 +; CHECK-RV32-FV-NEXT: slli a0, a0, 1 +; CHECK-RV32-FV-NEXT: add a1, a1, a0 +; CHECK-RV32-FV-NEXT: slli a0, a0, 2 +; CHECK-RV32-FV-NEXT: add a0, a0, a1 +; CHECK-RV32-FV-NEXT: sub a0, s0, a0 +; CHECK-RV32-FV-NEXT: addi a0, a0, -160 +; CHECK-RV32-FV-NEXT: vs1r.v v22, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV32-FV-NEXT: csrr a0, vlenb +; CHECK-RV32-FV-NEXT: slli a0, a0, 3 +; CHECK-RV32-FV-NEXT: mv a1, a0 +; CHECK-RV32-FV-NEXT: slli a0, a0, 1 +; CHECK-RV32-FV-NEXT: add a0, a0, a1 +; CHECK-RV32-FV-NEXT: sub a0, s0, a0 +; CHECK-RV32-FV-NEXT: addi a0, a0, -160 +; CHECK-RV32-FV-NEXT: vs1r.v v23, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV32-FV-NEXT: csrr a0, vlenb +; CHECK-RV32-FV-NEXT: mv a1, a0 +; CHECK-RV32-FV-NEXT: slli a0, a0, 3 +; CHECK-RV32-FV-NEXT: add a1, a1, a0 +; CHECK-RV32-FV-NEXT: slli a0, a0, 1 +; CHECK-RV32-FV-NEXT: add a0, a0, a1 +; CHECK-RV32-FV-NEXT: sub a0, s0, a0 +; CHECK-RV32-FV-NEXT: addi a0, a0, -160 +; CHECK-RV32-FV-NEXT: vs1r.v v24, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV32-FV-NEXT: csrr a0, vlenb +; CHECK-RV32-FV-NEXT: slli a0, a0, 1 +; CHECK-RV32-FV-NEXT: mv a1, a0 +; CHECK-RV32-FV-NEXT: slli a0, a0, 2 +; CHECK-RV32-FV-NEXT: add a1, a1, a0 +; CHECK-RV32-FV-NEXT: slli a0, a0, 1 +; CHECK-RV32-FV-NEXT: add a0, a0, a1 +; CHECK-RV32-FV-NEXT: sub a0, s0, a0 +; CHECK-RV32-FV-NEXT: addi a0, a0, -160 +; CHECK-RV32-FV-NEXT: vs1r.v v25, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV32-FV-NEXT: csrr a0, vlenb +; CHECK-RV32-FV-NEXT: mv a1, a0 +; CHECK-RV32-FV-NEXT: slli a0, a0, 1 +; CHECK-RV32-FV-NEXT: add a1, a1, a0 +; CHECK-RV32-FV-NEXT: slli a0, a0, 2 +; CHECK-RV32-FV-NEXT: add a1, a1, a0 +; CHECK-RV32-FV-NEXT: slli a0, a0, 1 +; CHECK-RV32-FV-NEXT: add a0, a0, a1 +; CHECK-RV32-FV-NEXT: sub a0, s0, a0 +; CHECK-RV32-FV-NEXT: addi a0, a0, -160 +; CHECK-RV32-FV-NEXT: vs1r.v v26, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV32-FV-NEXT: csrr a0, vlenb +; CHECK-RV32-FV-NEXT: slli a0, a0, 2 +; CHECK-RV32-FV-NEXT: mv a1, a0 +; CHECK-RV32-FV-NEXT: slli a0, a0, 1 +; CHECK-RV32-FV-NEXT: add a1, a1, a0 +; CHECK-RV32-FV-NEXT: slli a0, a0, 1 +; CHECK-RV32-FV-NEXT: add a0, a0, a1 +; CHECK-RV32-FV-NEXT: sub a0, s0, a0 +; CHECK-RV32-FV-NEXT: addi a0, a0, -160 +; CHECK-RV32-FV-NEXT: vs1r.v v27, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV32-FV-NEXT: csrr a0, vlenb +; CHECK-RV32-FV-NEXT: mv a1, a0 +; CHECK-RV32-FV-NEXT: slli a0, a0, 2 +; CHECK-RV32-FV-NEXT: add a1, a1, a0 +; CHECK-RV32-FV-NEXT: slli a0, a0, 1 +; CHECK-RV32-FV-NEXT: add a1, a1, a0 +; CHECK-RV32-FV-NEXT: slli a0, a0, 1 +; CHECK-RV32-FV-NEXT: add a0, a0, a1 +; CHECK-RV32-FV-NEXT: sub a0, s0, a0 +; CHECK-RV32-FV-NEXT: addi a0, a0, -160 +; CHECK-RV32-FV-NEXT: vs1r.v v28, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV32-FV-NEXT: csrr a0, vlenb +; CHECK-RV32-FV-NEXT: slli a0, a0, 1 +; CHECK-RV32-FV-NEXT: mv a1, a0 +; CHECK-RV32-FV-NEXT: slli a0, a0, 1 +; CHECK-RV32-FV-NEXT: add a1, a1, a0 +; CHECK-RV32-FV-NEXT: slli a0, a0, 1 +; CHECK-RV32-FV-NEXT: add a1, a1, a0 +; CHECK-RV32-FV-NEXT: slli a0, a0, 1 +; CHECK-RV32-FV-NEXT: add a0, a0, a1 +; CHECK-RV32-FV-NEXT: sub a0, s0, a0 +; CHECK-RV32-FV-NEXT: addi a0, a0, -160 +; CHECK-RV32-FV-NEXT: vs1r.v v29, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV32-FV-NEXT: csrr a0, vlenb +; CHECK-RV32-FV-NEXT: slli a1, a0, 5 +; CHECK-RV32-FV-NEXT: sub a0, a1, a0 +; CHECK-RV32-FV-NEXT: sub a0, s0, a0 +; CHECK-RV32-FV-NEXT: addi a0, a0, -160 +; CHECK-RV32-FV-NEXT: vs1r.v v30, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV32-FV-NEXT: csrr a0, vlenb +; CHECK-RV32-FV-NEXT: slli a0, a0, 5 +; CHECK-RV32-FV-NEXT: sub a0, s0, a0 +; CHECK-RV32-FV-NEXT: addi a0, a0, -160 +; CHECK-RV32-FV-NEXT: vs1r.v v31, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV32-FV-NEXT: call otherfoo +; CHECK-RV32-FV-NEXT: csrr a0, vlenb +; CHECK-RV32-FV-NEXT: sub a0, s0, a0 +; CHECK-RV32-FV-NEXT: addi a0, a0, -160 +; CHECK-RV32-FV-NEXT: vl1r.v v0, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV32-FV-NEXT: csrr a0, vlenb +; CHECK-RV32-FV-NEXT: slli a0, a0, 1 +; CHECK-RV32-FV-NEXT: sub a0, s0, a0 +; CHECK-RV32-FV-NEXT: addi a0, a0, -160 +; CHECK-RV32-FV-NEXT: vl1r.v v1, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV32-FV-NEXT: csrr a0, vlenb +; CHECK-RV32-FV-NEXT: slli a1, a0, 1 +; CHECK-RV32-FV-NEXT: add a0, a1, a0 +; CHECK-RV32-FV-NEXT: sub a0, s0, a0 +; CHECK-RV32-FV-NEXT: addi a0, a0, -160 +; CHECK-RV32-FV-NEXT: vl1r.v v2, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV32-FV-NEXT: csrr a0, vlenb +; CHECK-RV32-FV-NEXT: slli a0, a0, 2 +; CHECK-RV32-FV-NEXT: sub a0, s0, a0 +; CHECK-RV32-FV-NEXT: addi a0, a0, -160 +; CHECK-RV32-FV-NEXT: vl1r.v v3, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV32-FV-NEXT: csrr a0, vlenb +; CHECK-RV32-FV-NEXT: slli a1, a0, 2 +; CHECK-RV32-FV-NEXT: add a0, a1, a0 +; CHECK-RV32-FV-NEXT: sub a0, s0, a0 +; CHECK-RV32-FV-NEXT: addi a0, a0, -160 +; CHECK-RV32-FV-NEXT: vl1r.v v4, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV32-FV-NEXT: csrr a0, vlenb +; CHECK-RV32-FV-NEXT: slli a0, a0, 1 +; CHECK-RV32-FV-NEXT: mv a1, a0 +; CHECK-RV32-FV-NEXT: slli a0, a0, 1 +; CHECK-RV32-FV-NEXT: add a0, a0, a1 +; CHECK-RV32-FV-NEXT: sub a0, s0, a0 +; CHECK-RV32-FV-NEXT: addi a0, a0, -160 +; CHECK-RV32-FV-NEXT: vl1r.v v5, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV32-FV-NEXT: csrr a0, vlenb +; CHECK-RV32-FV-NEXT: slli a1, a0, 3 +; CHECK-RV32-FV-NEXT: sub a0, a1, a0 +; CHECK-RV32-FV-NEXT: sub a0, s0, a0 +; CHECK-RV32-FV-NEXT: addi a0, a0, -160 +; CHECK-RV32-FV-NEXT: vl1r.v v6, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV32-FV-NEXT: csrr a0, vlenb +; CHECK-RV32-FV-NEXT: slli a0, a0, 3 +; CHECK-RV32-FV-NEXT: sub a0, s0, a0 +; CHECK-RV32-FV-NEXT: addi a0, a0, -160 +; CHECK-RV32-FV-NEXT: vl1r.v v7, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV32-FV-NEXT: csrr a0, vlenb +; CHECK-RV32-FV-NEXT: slli a1, a0, 3 +; CHECK-RV32-FV-NEXT: add a0, a1, a0 +; CHECK-RV32-FV-NEXT: sub a0, s0, a0 +; CHECK-RV32-FV-NEXT: addi a0, a0, -160 +; CHECK-RV32-FV-NEXT: vl1r.v v8, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV32-FV-NEXT: csrr a0, vlenb +; CHECK-RV32-FV-NEXT: slli a0, a0, 1 +; CHECK-RV32-FV-NEXT: mv a1, a0 +; CHECK-RV32-FV-NEXT: slli a0, a0, 2 +; CHECK-RV32-FV-NEXT: add a0, a0, a1 +; CHECK-RV32-FV-NEXT: sub a0, s0, a0 +; CHECK-RV32-FV-NEXT: addi a0, a0, -160 +; CHECK-RV32-FV-NEXT: vl1r.v v9, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV32-FV-NEXT: csrr a0, vlenb +; CHECK-RV32-FV-NEXT: mv a1, a0 +; CHECK-RV32-FV-NEXT: slli a0, a0, 1 +; CHECK-RV32-FV-NEXT: add a1, a1, a0 +; CHECK-RV32-FV-NEXT: slli a0, a0, 2 +; CHECK-RV32-FV-NEXT: add a0, a0, a1 +; CHECK-RV32-FV-NEXT: sub a0, s0, a0 +; CHECK-RV32-FV-NEXT: addi a0, a0, -160 +; CHECK-RV32-FV-NEXT: vl1r.v v10, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV32-FV-NEXT: csrr a0, vlenb +; CHECK-RV32-FV-NEXT: slli a0, a0, 2 +; CHECK-RV32-FV-NEXT: mv a1, a0 +; CHECK-RV32-FV-NEXT: slli a0, a0, 1 +; CHECK-RV32-FV-NEXT: add a0, a0, a1 +; CHECK-RV32-FV-NEXT: sub a0, s0, a0 +; CHECK-RV32-FV-NEXT: addi a0, a0, -160 +; CHECK-RV32-FV-NEXT: vl1r.v v11, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV32-FV-NEXT: csrr a0, vlenb +; CHECK-RV32-FV-NEXT: mv a1, a0 +; CHECK-RV32-FV-NEXT: slli a0, a0, 2 +; CHECK-RV32-FV-NEXT: add a1, a1, a0 +; CHECK-RV32-FV-NEXT: slli a0, a0, 1 +; CHECK-RV32-FV-NEXT: add a0, a0, a1 +; CHECK-RV32-FV-NEXT: sub a0, s0, a0 +; CHECK-RV32-FV-NEXT: addi a0, a0, -160 +; CHECK-RV32-FV-NEXT: vl1r.v v12, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV32-FV-NEXT: csrr a0, vlenb +; CHECK-RV32-FV-NEXT: slli a0, a0, 1 +; CHECK-RV32-FV-NEXT: mv a1, a0 +; CHECK-RV32-FV-NEXT: slli a0, a0, 1 +; CHECK-RV32-FV-NEXT: add a1, a1, a0 +; CHECK-RV32-FV-NEXT: slli a0, a0, 1 +; CHECK-RV32-FV-NEXT: add a0, a0, a1 +; CHECK-RV32-FV-NEXT: sub a0, s0, a0 +; CHECK-RV32-FV-NEXT: addi a0, a0, -160 +; CHECK-RV32-FV-NEXT: vl1r.v v13, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV32-FV-NEXT: csrr a0, vlenb +; CHECK-RV32-FV-NEXT: slli a1, a0, 4 +; CHECK-RV32-FV-NEXT: sub a0, a1, a0 +; CHECK-RV32-FV-NEXT: sub a0, s0, a0 +; CHECK-RV32-FV-NEXT: addi a0, a0, -160 +; CHECK-RV32-FV-NEXT: vl1r.v v14, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV32-FV-NEXT: csrr a0, vlenb +; CHECK-RV32-FV-NEXT: slli a0, a0, 4 +; CHECK-RV32-FV-NEXT: sub a0, s0, a0 +; CHECK-RV32-FV-NEXT: addi a0, a0, -160 +; CHECK-RV32-FV-NEXT: vl1r.v v15, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV32-FV-NEXT: csrr a0, vlenb +; CHECK-RV32-FV-NEXT: slli a1, a0, 4 +; CHECK-RV32-FV-NEXT: add a0, a1, a0 +; CHECK-RV32-FV-NEXT: sub a0, s0, a0 +; CHECK-RV32-FV-NEXT: addi a0, a0, -160 +; CHECK-RV32-FV-NEXT: vl1r.v v16, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV32-FV-NEXT: csrr a0, vlenb +; CHECK-RV32-FV-NEXT: slli a0, a0, 1 +; CHECK-RV32-FV-NEXT: mv a1, a0 +; CHECK-RV32-FV-NEXT: slli a0, a0, 3 +; CHECK-RV32-FV-NEXT: add a0, a0, a1 +; CHECK-RV32-FV-NEXT: sub a0, s0, a0 +; CHECK-RV32-FV-NEXT: addi a0, a0, -160 +; CHECK-RV32-FV-NEXT: vl1r.v v17, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV32-FV-NEXT: csrr a0, vlenb +; CHECK-RV32-FV-NEXT: mv a1, a0 +; CHECK-RV32-FV-NEXT: slli a0, a0, 1 +; CHECK-RV32-FV-NEXT: add a1, a1, a0 +; CHECK-RV32-FV-NEXT: slli a0, a0, 3 +; CHECK-RV32-FV-NEXT: add a0, a0, a1 +; CHECK-RV32-FV-NEXT: sub a0, s0, a0 +; CHECK-RV32-FV-NEXT: addi a0, a0, -160 +; CHECK-RV32-FV-NEXT: vl1r.v v18, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV32-FV-NEXT: csrr a0, vlenb +; CHECK-RV32-FV-NEXT: slli a0, a0, 2 +; CHECK-RV32-FV-NEXT: mv a1, a0 +; CHECK-RV32-FV-NEXT: slli a0, a0, 2 +; CHECK-RV32-FV-NEXT: add a0, a0, a1 +; CHECK-RV32-FV-NEXT: sub a0, s0, a0 +; CHECK-RV32-FV-NEXT: addi a0, a0, -160 +; CHECK-RV32-FV-NEXT: vl1r.v v19, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV32-FV-NEXT: csrr a0, vlenb +; CHECK-RV32-FV-NEXT: mv a1, a0 +; CHECK-RV32-FV-NEXT: slli a0, a0, 2 +; CHECK-RV32-FV-NEXT: add a1, a1, a0 +; CHECK-RV32-FV-NEXT: slli a0, a0, 2 +; CHECK-RV32-FV-NEXT: add a0, a0, a1 +; CHECK-RV32-FV-NEXT: sub a0, s0, a0 +; CHECK-RV32-FV-NEXT: addi a0, a0, -160 +; CHECK-RV32-FV-NEXT: vl1r.v v20, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV32-FV-NEXT: csrr a0, vlenb +; CHECK-RV32-FV-NEXT: slli a0, a0, 1 +; CHECK-RV32-FV-NEXT: mv a1, a0 +; CHECK-RV32-FV-NEXT: slli a0, a0, 1 +; CHECK-RV32-FV-NEXT: add a1, a1, a0 +; CHECK-RV32-FV-NEXT: slli a0, a0, 2 +; CHECK-RV32-FV-NEXT: add a0, a0, a1 +; CHECK-RV32-FV-NEXT: sub a0, s0, a0 +; CHECK-RV32-FV-NEXT: addi a0, a0, -160 +; CHECK-RV32-FV-NEXT: vl1r.v v21, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV32-FV-NEXT: csrr a0, vlenb +; CHECK-RV32-FV-NEXT: mv a1, a0 +; CHECK-RV32-FV-NEXT: slli a0, a0, 1 +; CHECK-RV32-FV-NEXT: add a1, a1, a0 +; CHECK-RV32-FV-NEXT: slli a0, a0, 1 +; CHECK-RV32-FV-NEXT: add a1, a1, a0 +; CHECK-RV32-FV-NEXT: slli a0, a0, 2 +; CHECK-RV32-FV-NEXT: add a0, a0, a1 +; CHECK-RV32-FV-NEXT: sub a0, s0, a0 +; CHECK-RV32-FV-NEXT: addi a0, a0, -160 +; CHECK-RV32-FV-NEXT: vl1r.v v22, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV32-FV-NEXT: csrr a0, vlenb +; CHECK-RV32-FV-NEXT: slli a0, a0, 3 +; CHECK-RV32-FV-NEXT: mv a1, a0 +; CHECK-RV32-FV-NEXT: slli a0, a0, 1 +; CHECK-RV32-FV-NEXT: add a0, a0, a1 +; CHECK-RV32-FV-NEXT: sub a0, s0, a0 +; CHECK-RV32-FV-NEXT: addi a0, a0, -160 +; CHECK-RV32-FV-NEXT: vl1r.v v23, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV32-FV-NEXT: csrr a0, vlenb +; CHECK-RV32-FV-NEXT: mv a1, a0 +; CHECK-RV32-FV-NEXT: slli a0, a0, 3 +; CHECK-RV32-FV-NEXT: add a1, a1, a0 +; CHECK-RV32-FV-NEXT: slli a0, a0, 1 +; CHECK-RV32-FV-NEXT: add a0, a0, a1 +; CHECK-RV32-FV-NEXT: sub a0, s0, a0 +; CHECK-RV32-FV-NEXT: addi a0, a0, -160 +; CHECK-RV32-FV-NEXT: vl1r.v v24, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV32-FV-NEXT: csrr a0, vlenb +; CHECK-RV32-FV-NEXT: slli a0, a0, 1 +; CHECK-RV32-FV-NEXT: mv a1, a0 +; CHECK-RV32-FV-NEXT: slli a0, a0, 2 +; CHECK-RV32-FV-NEXT: add a1, a1, a0 +; CHECK-RV32-FV-NEXT: slli a0, a0, 1 +; CHECK-RV32-FV-NEXT: add a0, a0, a1 +; CHECK-RV32-FV-NEXT: sub a0, s0, a0 +; CHECK-RV32-FV-NEXT: addi a0, a0, -160 +; CHECK-RV32-FV-NEXT: vl1r.v v25, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV32-FV-NEXT: csrr a0, vlenb +; CHECK-RV32-FV-NEXT: mv a1, a0 +; CHECK-RV32-FV-NEXT: slli a0, a0, 1 +; CHECK-RV32-FV-NEXT: add a1, a1, a0 +; CHECK-RV32-FV-NEXT: slli a0, a0, 2 +; CHECK-RV32-FV-NEXT: add a1, a1, a0 +; CHECK-RV32-FV-NEXT: slli a0, a0, 1 +; CHECK-RV32-FV-NEXT: add a0, a0, a1 +; CHECK-RV32-FV-NEXT: sub a0, s0, a0 +; CHECK-RV32-FV-NEXT: addi a0, a0, -160 +; CHECK-RV32-FV-NEXT: vl1r.v v26, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV32-FV-NEXT: csrr a0, vlenb +; CHECK-RV32-FV-NEXT: slli a0, a0, 2 +; CHECK-RV32-FV-NEXT: mv a1, a0 +; CHECK-RV32-FV-NEXT: slli a0, a0, 1 +; CHECK-RV32-FV-NEXT: add a1, a1, a0 +; CHECK-RV32-FV-NEXT: slli a0, a0, 1 +; CHECK-RV32-FV-NEXT: add a0, a0, a1 +; CHECK-RV32-FV-NEXT: sub a0, s0, a0 +; CHECK-RV32-FV-NEXT: addi a0, a0, -160 +; CHECK-RV32-FV-NEXT: vl1r.v v27, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV32-FV-NEXT: csrr a0, vlenb +; CHECK-RV32-FV-NEXT: mv a1, a0 +; CHECK-RV32-FV-NEXT: slli a0, a0, 2 +; CHECK-RV32-FV-NEXT: add a1, a1, a0 +; CHECK-RV32-FV-NEXT: slli a0, a0, 1 +; CHECK-RV32-FV-NEXT: add a1, a1, a0 +; CHECK-RV32-FV-NEXT: slli a0, a0, 1 +; CHECK-RV32-FV-NEXT: add a0, a0, a1 +; CHECK-RV32-FV-NEXT: sub a0, s0, a0 +; CHECK-RV32-FV-NEXT: addi a0, a0, -160 +; CHECK-RV32-FV-NEXT: vl1r.v v28, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV32-FV-NEXT: csrr a0, vlenb +; CHECK-RV32-FV-NEXT: slli a0, a0, 1 +; CHECK-RV32-FV-NEXT: mv a1, a0 +; CHECK-RV32-FV-NEXT: slli a0, a0, 1 +; CHECK-RV32-FV-NEXT: add a1, a1, a0 +; CHECK-RV32-FV-NEXT: slli a0, a0, 1 +; CHECK-RV32-FV-NEXT: add a1, a1, a0 +; CHECK-RV32-FV-NEXT: slli a0, a0, 1 +; CHECK-RV32-FV-NEXT: add a0, a0, a1 +; CHECK-RV32-FV-NEXT: sub a0, s0, a0 +; CHECK-RV32-FV-NEXT: addi a0, a0, -160 +; CHECK-RV32-FV-NEXT: vl1r.v v29, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV32-FV-NEXT: csrr a0, vlenb +; CHECK-RV32-FV-NEXT: slli a1, a0, 5 +; CHECK-RV32-FV-NEXT: sub a0, a1, a0 +; CHECK-RV32-FV-NEXT: sub a0, s0, a0 +; CHECK-RV32-FV-NEXT: addi a0, a0, -160 +; CHECK-RV32-FV-NEXT: vl1r.v v30, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV32-FV-NEXT: csrr a0, vlenb +; CHECK-RV32-FV-NEXT: slli a0, a0, 5 +; CHECK-RV32-FV-NEXT: sub a0, s0, a0 +; CHECK-RV32-FV-NEXT: addi a0, a0, -160 +; CHECK-RV32-FV-NEXT: vl1r.v v31, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV32-FV-NEXT: addi sp, s0, -160 +; CHECK-RV32-FV-NEXT: lw ra, 156(sp) # 4-byte Folded Reload +; CHECK-RV32-FV-NEXT: lw t0, 152(sp) # 4-byte Folded Reload +; CHECK-RV32-FV-NEXT: lw t1, 148(sp) # 4-byte Folded Reload +; CHECK-RV32-FV-NEXT: lw t2, 144(sp) # 4-byte Folded Reload +; CHECK-RV32-FV-NEXT: lw s0, 140(sp) # 4-byte Folded Reload +; CHECK-RV32-FV-NEXT: lw a0, 136(sp) # 4-byte Folded Reload +; CHECK-RV32-FV-NEXT: lw a1, 132(sp) # 4-byte Folded Reload +; CHECK-RV32-FV-NEXT: lw a2, 128(sp) # 4-byte Folded Reload +; CHECK-RV32-FV-NEXT: lw a3, 124(sp) # 4-byte Folded Reload +; CHECK-RV32-FV-NEXT: lw a4, 120(sp) # 4-byte Folded Reload +; CHECK-RV32-FV-NEXT: lw a5, 116(sp) # 4-byte Folded Reload +; CHECK-RV32-FV-NEXT: lw a6, 112(sp) # 4-byte Folded Reload +; CHECK-RV32-FV-NEXT: lw a7, 108(sp) # 4-byte Folded Reload +; CHECK-RV32-FV-NEXT: lw t3, 104(sp) # 4-byte Folded Reload +; CHECK-RV32-FV-NEXT: lw t4, 100(sp) # 4-byte Folded Reload +; CHECK-RV32-FV-NEXT: lw t5, 96(sp) # 4-byte Folded Reload +; CHECK-RV32-FV-NEXT: lw t6, 92(sp) # 4-byte Folded Reload +; CHECK-RV32-FV-NEXT: flw ft0, 88(sp) # 4-byte Folded Reload +; CHECK-RV32-FV-NEXT: flw ft1, 84(sp) # 4-byte Folded Reload +; CHECK-RV32-FV-NEXT: flw ft2, 80(sp) # 4-byte Folded Reload +; CHECK-RV32-FV-NEXT: flw ft3, 76(sp) # 4-byte Folded Reload +; CHECK-RV32-FV-NEXT: flw ft4, 72(sp) # 4-byte Folded Reload +; CHECK-RV32-FV-NEXT: flw ft5, 68(sp) # 4-byte Folded Reload +; CHECK-RV32-FV-NEXT: flw ft6, 64(sp) # 4-byte Folded Reload +; CHECK-RV32-FV-NEXT: flw ft7, 60(sp) # 4-byte Folded Reload +; CHECK-RV32-FV-NEXT: flw fa0, 56(sp) # 4-byte Folded Reload +; CHECK-RV32-FV-NEXT: flw fa1, 52(sp) # 4-byte Folded Reload +; CHECK-RV32-FV-NEXT: flw fa2, 48(sp) # 4-byte Folded Reload +; CHECK-RV32-FV-NEXT: flw fa3, 44(sp) # 4-byte Folded Reload +; CHECK-RV32-FV-NEXT: flw fa4, 40(sp) # 4-byte Folded Reload +; CHECK-RV32-FV-NEXT: flw fa5, 36(sp) # 4-byte Folded Reload +; CHECK-RV32-FV-NEXT: flw fa6, 32(sp) # 4-byte Folded Reload +; CHECK-RV32-FV-NEXT: flw fa7, 28(sp) # 4-byte Folded Reload +; CHECK-RV32-FV-NEXT: flw ft8, 24(sp) # 4-byte Folded Reload +; CHECK-RV32-FV-NEXT: flw ft9, 20(sp) # 4-byte Folded Reload +; CHECK-RV32-FV-NEXT: flw ft10, 16(sp) # 4-byte Folded Reload +; CHECK-RV32-FV-NEXT: flw ft11, 12(sp) # 4-byte Folded Reload +; CHECK-RV32-FV-NEXT: addi sp, sp, 160 +; CHECK-RV32-FV-NEXT: mret +; +; CHECK-RV32-FDV-LABEL: foo_fp_with_call: +; CHECK-RV32-FDV: # %bb.0: +; CHECK-RV32-FDV-NEXT: addi sp, sp, -240 +; CHECK-RV32-FDV-NEXT: sw ra, 236(sp) # 4-byte Folded Spill +; CHECK-RV32-FDV-NEXT: sw t0, 232(sp) # 4-byte Folded Spill +; CHECK-RV32-FDV-NEXT: sw t1, 228(sp) # 4-byte Folded Spill +; CHECK-RV32-FDV-NEXT: sw t2, 224(sp) # 4-byte Folded Spill +; CHECK-RV32-FDV-NEXT: sw s0, 220(sp) # 4-byte Folded Spill +; CHECK-RV32-FDV-NEXT: sw a0, 216(sp) # 4-byte Folded Spill +; CHECK-RV32-FDV-NEXT: sw a1, 212(sp) # 4-byte Folded Spill +; CHECK-RV32-FDV-NEXT: sw a2, 208(sp) # 4-byte Folded Spill +; CHECK-RV32-FDV-NEXT: sw a3, 204(sp) # 4-byte Folded Spill +; CHECK-RV32-FDV-NEXT: sw a4, 200(sp) # 4-byte Folded Spill +; CHECK-RV32-FDV-NEXT: sw a5, 196(sp) # 4-byte Folded Spill +; CHECK-RV32-FDV-NEXT: sw a6, 192(sp) # 4-byte Folded Spill +; CHECK-RV32-FDV-NEXT: sw a7, 188(sp) # 4-byte Folded Spill +; CHECK-RV32-FDV-NEXT: sw t3, 184(sp) # 4-byte Folded Spill +; CHECK-RV32-FDV-NEXT: sw t4, 180(sp) # 4-byte Folded Spill +; CHECK-RV32-FDV-NEXT: sw t5, 176(sp) # 4-byte Folded Spill +; CHECK-RV32-FDV-NEXT: sw t6, 172(sp) # 4-byte Folded Spill +; CHECK-RV32-FDV-NEXT: fsd ft0, 160(sp) # 8-byte Folded Spill +; CHECK-RV32-FDV-NEXT: fsd ft1, 152(sp) # 8-byte Folded Spill +; CHECK-RV32-FDV-NEXT: fsd ft2, 144(sp) # 8-byte Folded Spill +; CHECK-RV32-FDV-NEXT: fsd ft3, 136(sp) # 8-byte Folded Spill +; CHECK-RV32-FDV-NEXT: fsd ft4, 128(sp) # 8-byte Folded Spill +; CHECK-RV32-FDV-NEXT: fsd ft5, 120(sp) # 8-byte Folded Spill +; CHECK-RV32-FDV-NEXT: fsd ft6, 112(sp) # 8-byte Folded Spill +; CHECK-RV32-FDV-NEXT: fsd ft7, 104(sp) # 8-byte Folded Spill +; CHECK-RV32-FDV-NEXT: fsd fa0, 96(sp) # 8-byte Folded Spill +; CHECK-RV32-FDV-NEXT: fsd fa1, 88(sp) # 8-byte Folded Spill +; CHECK-RV32-FDV-NEXT: fsd fa2, 80(sp) # 8-byte Folded Spill +; CHECK-RV32-FDV-NEXT: fsd fa3, 72(sp) # 8-byte Folded Spill +; CHECK-RV32-FDV-NEXT: fsd fa4, 64(sp) # 8-byte Folded Spill +; CHECK-RV32-FDV-NEXT: fsd fa5, 56(sp) # 8-byte Folded Spill +; CHECK-RV32-FDV-NEXT: fsd fa6, 48(sp) # 8-byte Folded Spill +; CHECK-RV32-FDV-NEXT: fsd fa7, 40(sp) # 8-byte Folded Spill +; CHECK-RV32-FDV-NEXT: fsd ft8, 32(sp) # 8-byte Folded Spill +; CHECK-RV32-FDV-NEXT: fsd ft9, 24(sp) # 8-byte Folded Spill +; CHECK-RV32-FDV-NEXT: fsd ft10, 16(sp) # 8-byte Folded Spill +; CHECK-RV32-FDV-NEXT: fsd ft11, 8(sp) # 8-byte Folded Spill +; CHECK-RV32-FDV-NEXT: addi s0, sp, 240 +; CHECK-RV32-FDV-NEXT: csrr a0, vlenb +; CHECK-RV32-FDV-NEXT: slli a0, a0, 5 +; CHECK-RV32-FDV-NEXT: sub sp, sp, a0 +; CHECK-RV32-FDV-NEXT: csrr a0, vlenb +; CHECK-RV32-FDV-NEXT: sub a0, s0, a0 +; CHECK-RV32-FDV-NEXT: addi a0, a0, -240 +; CHECK-RV32-FDV-NEXT: vs1r.v v0, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV32-FDV-NEXT: csrr a0, vlenb +; CHECK-RV32-FDV-NEXT: slli a0, a0, 1 +; CHECK-RV32-FDV-NEXT: sub a0, s0, a0 +; CHECK-RV32-FDV-NEXT: addi a0, a0, -240 +; CHECK-RV32-FDV-NEXT: vs1r.v v1, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV32-FDV-NEXT: csrr a0, vlenb +; CHECK-RV32-FDV-NEXT: slli a1, a0, 1 +; CHECK-RV32-FDV-NEXT: add a0, a1, a0 +; CHECK-RV32-FDV-NEXT: sub a0, s0, a0 +; CHECK-RV32-FDV-NEXT: addi a0, a0, -240 +; CHECK-RV32-FDV-NEXT: vs1r.v v2, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV32-FDV-NEXT: csrr a0, vlenb +; CHECK-RV32-FDV-NEXT: slli a0, a0, 2 +; CHECK-RV32-FDV-NEXT: sub a0, s0, a0 +; CHECK-RV32-FDV-NEXT: addi a0, a0, -240 +; CHECK-RV32-FDV-NEXT: vs1r.v v3, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV32-FDV-NEXT: csrr a0, vlenb +; CHECK-RV32-FDV-NEXT: slli a1, a0, 2 +; CHECK-RV32-FDV-NEXT: add a0, a1, a0 +; CHECK-RV32-FDV-NEXT: sub a0, s0, a0 +; CHECK-RV32-FDV-NEXT: addi a0, a0, -240 +; CHECK-RV32-FDV-NEXT: vs1r.v v4, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV32-FDV-NEXT: csrr a0, vlenb +; CHECK-RV32-FDV-NEXT: slli a0, a0, 1 +; CHECK-RV32-FDV-NEXT: mv a1, a0 +; CHECK-RV32-FDV-NEXT: slli a0, a0, 1 +; CHECK-RV32-FDV-NEXT: add a0, a0, a1 +; CHECK-RV32-FDV-NEXT: sub a0, s0, a0 +; CHECK-RV32-FDV-NEXT: addi a0, a0, -240 +; CHECK-RV32-FDV-NEXT: vs1r.v v5, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV32-FDV-NEXT: csrr a0, vlenb +; CHECK-RV32-FDV-NEXT: slli a1, a0, 3 +; CHECK-RV32-FDV-NEXT: sub a0, a1, a0 +; CHECK-RV32-FDV-NEXT: sub a0, s0, a0 +; CHECK-RV32-FDV-NEXT: addi a0, a0, -240 +; CHECK-RV32-FDV-NEXT: vs1r.v v6, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV32-FDV-NEXT: csrr a0, vlenb +; CHECK-RV32-FDV-NEXT: slli a0, a0, 3 +; CHECK-RV32-FDV-NEXT: sub a0, s0, a0 +; CHECK-RV32-FDV-NEXT: addi a0, a0, -240 +; CHECK-RV32-FDV-NEXT: vs1r.v v7, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV32-FDV-NEXT: csrr a0, vlenb +; CHECK-RV32-FDV-NEXT: slli a1, a0, 3 +; CHECK-RV32-FDV-NEXT: add a0, a1, a0 +; CHECK-RV32-FDV-NEXT: sub a0, s0, a0 +; CHECK-RV32-FDV-NEXT: addi a0, a0, -240 +; CHECK-RV32-FDV-NEXT: vs1r.v v8, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV32-FDV-NEXT: csrr a0, vlenb +; CHECK-RV32-FDV-NEXT: slli a0, a0, 1 +; CHECK-RV32-FDV-NEXT: mv a1, a0 +; CHECK-RV32-FDV-NEXT: slli a0, a0, 2 +; CHECK-RV32-FDV-NEXT: add a0, a0, a1 +; CHECK-RV32-FDV-NEXT: sub a0, s0, a0 +; CHECK-RV32-FDV-NEXT: addi a0, a0, -240 +; CHECK-RV32-FDV-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV32-FDV-NEXT: csrr a0, vlenb +; CHECK-RV32-FDV-NEXT: mv a1, a0 +; CHECK-RV32-FDV-NEXT: slli a0, a0, 1 +; CHECK-RV32-FDV-NEXT: add a1, a1, a0 +; CHECK-RV32-FDV-NEXT: slli a0, a0, 2 +; CHECK-RV32-FDV-NEXT: add a0, a0, a1 +; CHECK-RV32-FDV-NEXT: sub a0, s0, a0 +; CHECK-RV32-FDV-NEXT: addi a0, a0, -240 +; CHECK-RV32-FDV-NEXT: vs1r.v v10, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV32-FDV-NEXT: csrr a0, vlenb +; CHECK-RV32-FDV-NEXT: slli a0, a0, 2 +; CHECK-RV32-FDV-NEXT: mv a1, a0 +; CHECK-RV32-FDV-NEXT: slli a0, a0, 1 +; CHECK-RV32-FDV-NEXT: add a0, a0, a1 +; CHECK-RV32-FDV-NEXT: sub a0, s0, a0 +; CHECK-RV32-FDV-NEXT: addi a0, a0, -240 +; CHECK-RV32-FDV-NEXT: vs1r.v v11, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV32-FDV-NEXT: csrr a0, vlenb +; CHECK-RV32-FDV-NEXT: mv a1, a0 +; CHECK-RV32-FDV-NEXT: slli a0, a0, 2 +; CHECK-RV32-FDV-NEXT: add a1, a1, a0 +; CHECK-RV32-FDV-NEXT: slli a0, a0, 1 +; CHECK-RV32-FDV-NEXT: add a0, a0, a1 +; CHECK-RV32-FDV-NEXT: sub a0, s0, a0 +; CHECK-RV32-FDV-NEXT: addi a0, a0, -240 +; CHECK-RV32-FDV-NEXT: vs1r.v v12, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV32-FDV-NEXT: csrr a0, vlenb +; CHECK-RV32-FDV-NEXT: slli a0, a0, 1 +; CHECK-RV32-FDV-NEXT: mv a1, a0 +; CHECK-RV32-FDV-NEXT: slli a0, a0, 1 +; CHECK-RV32-FDV-NEXT: add a1, a1, a0 +; CHECK-RV32-FDV-NEXT: slli a0, a0, 1 +; CHECK-RV32-FDV-NEXT: add a0, a0, a1 +; CHECK-RV32-FDV-NEXT: sub a0, s0, a0 +; CHECK-RV32-FDV-NEXT: addi a0, a0, -240 +; CHECK-RV32-FDV-NEXT: vs1r.v v13, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV32-FDV-NEXT: csrr a0, vlenb +; CHECK-RV32-FDV-NEXT: slli a1, a0, 4 +; CHECK-RV32-FDV-NEXT: sub a0, a1, a0 +; CHECK-RV32-FDV-NEXT: sub a0, s0, a0 +; CHECK-RV32-FDV-NEXT: addi a0, a0, -240 +; CHECK-RV32-FDV-NEXT: vs1r.v v14, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV32-FDV-NEXT: csrr a0, vlenb +; CHECK-RV32-FDV-NEXT: slli a0, a0, 4 +; CHECK-RV32-FDV-NEXT: sub a0, s0, a0 +; CHECK-RV32-FDV-NEXT: addi a0, a0, -240 +; CHECK-RV32-FDV-NEXT: vs1r.v v15, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV32-FDV-NEXT: csrr a0, vlenb +; CHECK-RV32-FDV-NEXT: slli a1, a0, 4 +; CHECK-RV32-FDV-NEXT: add a0, a1, a0 +; CHECK-RV32-FDV-NEXT: sub a0, s0, a0 +; CHECK-RV32-FDV-NEXT: addi a0, a0, -240 +; CHECK-RV32-FDV-NEXT: vs1r.v v16, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV32-FDV-NEXT: csrr a0, vlenb +; CHECK-RV32-FDV-NEXT: slli a0, a0, 1 +; CHECK-RV32-FDV-NEXT: mv a1, a0 +; CHECK-RV32-FDV-NEXT: slli a0, a0, 3 +; CHECK-RV32-FDV-NEXT: add a0, a0, a1 +; CHECK-RV32-FDV-NEXT: sub a0, s0, a0 +; CHECK-RV32-FDV-NEXT: addi a0, a0, -240 +; CHECK-RV32-FDV-NEXT: vs1r.v v17, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV32-FDV-NEXT: csrr a0, vlenb +; CHECK-RV32-FDV-NEXT: mv a1, a0 +; CHECK-RV32-FDV-NEXT: slli a0, a0, 1 +; CHECK-RV32-FDV-NEXT: add a1, a1, a0 +; CHECK-RV32-FDV-NEXT: slli a0, a0, 3 +; CHECK-RV32-FDV-NEXT: add a0, a0, a1 +; CHECK-RV32-FDV-NEXT: sub a0, s0, a0 +; CHECK-RV32-FDV-NEXT: addi a0, a0, -240 +; CHECK-RV32-FDV-NEXT: vs1r.v v18, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV32-FDV-NEXT: csrr a0, vlenb +; CHECK-RV32-FDV-NEXT: slli a0, a0, 2 +; CHECK-RV32-FDV-NEXT: mv a1, a0 +; CHECK-RV32-FDV-NEXT: slli a0, a0, 2 +; CHECK-RV32-FDV-NEXT: add a0, a0, a1 +; CHECK-RV32-FDV-NEXT: sub a0, s0, a0 +; CHECK-RV32-FDV-NEXT: addi a0, a0, -240 +; CHECK-RV32-FDV-NEXT: vs1r.v v19, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV32-FDV-NEXT: csrr a0, vlenb +; CHECK-RV32-FDV-NEXT: mv a1, a0 +; CHECK-RV32-FDV-NEXT: slli a0, a0, 2 +; CHECK-RV32-FDV-NEXT: add a1, a1, a0 +; CHECK-RV32-FDV-NEXT: slli a0, a0, 2 +; CHECK-RV32-FDV-NEXT: add a0, a0, a1 +; CHECK-RV32-FDV-NEXT: sub a0, s0, a0 +; CHECK-RV32-FDV-NEXT: addi a0, a0, -240 +; CHECK-RV32-FDV-NEXT: vs1r.v v20, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV32-FDV-NEXT: csrr a0, vlenb +; CHECK-RV32-FDV-NEXT: slli a0, a0, 1 +; CHECK-RV32-FDV-NEXT: mv a1, a0 +; CHECK-RV32-FDV-NEXT: slli a0, a0, 1 +; CHECK-RV32-FDV-NEXT: add a1, a1, a0 +; CHECK-RV32-FDV-NEXT: slli a0, a0, 2 +; CHECK-RV32-FDV-NEXT: add a0, a0, a1 +; CHECK-RV32-FDV-NEXT: sub a0, s0, a0 +; CHECK-RV32-FDV-NEXT: addi a0, a0, -240 +; CHECK-RV32-FDV-NEXT: vs1r.v v21, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV32-FDV-NEXT: csrr a0, vlenb +; CHECK-RV32-FDV-NEXT: mv a1, a0 +; CHECK-RV32-FDV-NEXT: slli a0, a0, 1 +; CHECK-RV32-FDV-NEXT: add a1, a1, a0 +; CHECK-RV32-FDV-NEXT: slli a0, a0, 1 +; CHECK-RV32-FDV-NEXT: add a1, a1, a0 +; CHECK-RV32-FDV-NEXT: slli a0, a0, 2 +; CHECK-RV32-FDV-NEXT: add a0, a0, a1 +; CHECK-RV32-FDV-NEXT: sub a0, s0, a0 +; CHECK-RV32-FDV-NEXT: addi a0, a0, -240 +; CHECK-RV32-FDV-NEXT: vs1r.v v22, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV32-FDV-NEXT: csrr a0, vlenb +; CHECK-RV32-FDV-NEXT: slli a0, a0, 3 +; CHECK-RV32-FDV-NEXT: mv a1, a0 +; CHECK-RV32-FDV-NEXT: slli a0, a0, 1 +; CHECK-RV32-FDV-NEXT: add a0, a0, a1 +; CHECK-RV32-FDV-NEXT: sub a0, s0, a0 +; CHECK-RV32-FDV-NEXT: addi a0, a0, -240 +; CHECK-RV32-FDV-NEXT: vs1r.v v23, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV32-FDV-NEXT: csrr a0, vlenb +; CHECK-RV32-FDV-NEXT: mv a1, a0 +; CHECK-RV32-FDV-NEXT: slli a0, a0, 3 +; CHECK-RV32-FDV-NEXT: add a1, a1, a0 +; CHECK-RV32-FDV-NEXT: slli a0, a0, 1 +; CHECK-RV32-FDV-NEXT: add a0, a0, a1 +; CHECK-RV32-FDV-NEXT: sub a0, s0, a0 +; CHECK-RV32-FDV-NEXT: addi a0, a0, -240 +; CHECK-RV32-FDV-NEXT: vs1r.v v24, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV32-FDV-NEXT: csrr a0, vlenb +; CHECK-RV32-FDV-NEXT: slli a0, a0, 1 +; CHECK-RV32-FDV-NEXT: mv a1, a0 +; CHECK-RV32-FDV-NEXT: slli a0, a0, 2 +; CHECK-RV32-FDV-NEXT: add a1, a1, a0 +; CHECK-RV32-FDV-NEXT: slli a0, a0, 1 +; CHECK-RV32-FDV-NEXT: add a0, a0, a1 +; CHECK-RV32-FDV-NEXT: sub a0, s0, a0 +; CHECK-RV32-FDV-NEXT: addi a0, a0, -240 +; CHECK-RV32-FDV-NEXT: vs1r.v v25, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV32-FDV-NEXT: csrr a0, vlenb +; CHECK-RV32-FDV-NEXT: mv a1, a0 +; CHECK-RV32-FDV-NEXT: slli a0, a0, 1 +; CHECK-RV32-FDV-NEXT: add a1, a1, a0 +; CHECK-RV32-FDV-NEXT: slli a0, a0, 2 +; CHECK-RV32-FDV-NEXT: add a1, a1, a0 +; CHECK-RV32-FDV-NEXT: slli a0, a0, 1 +; CHECK-RV32-FDV-NEXT: add a0, a0, a1 +; CHECK-RV32-FDV-NEXT: sub a0, s0, a0 +; CHECK-RV32-FDV-NEXT: addi a0, a0, -240 +; CHECK-RV32-FDV-NEXT: vs1r.v v26, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV32-FDV-NEXT: csrr a0, vlenb +; CHECK-RV32-FDV-NEXT: slli a0, a0, 2 +; CHECK-RV32-FDV-NEXT: mv a1, a0 +; CHECK-RV32-FDV-NEXT: slli a0, a0, 1 +; CHECK-RV32-FDV-NEXT: add a1, a1, a0 +; CHECK-RV32-FDV-NEXT: slli a0, a0, 1 +; CHECK-RV32-FDV-NEXT: add a0, a0, a1 +; CHECK-RV32-FDV-NEXT: sub a0, s0, a0 +; CHECK-RV32-FDV-NEXT: addi a0, a0, -240 +; CHECK-RV32-FDV-NEXT: vs1r.v v27, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV32-FDV-NEXT: csrr a0, vlenb +; CHECK-RV32-FDV-NEXT: mv a1, a0 +; CHECK-RV32-FDV-NEXT: slli a0, a0, 2 +; CHECK-RV32-FDV-NEXT: add a1, a1, a0 +; CHECK-RV32-FDV-NEXT: slli a0, a0, 1 +; CHECK-RV32-FDV-NEXT: add a1, a1, a0 +; CHECK-RV32-FDV-NEXT: slli a0, a0, 1 +; CHECK-RV32-FDV-NEXT: add a0, a0, a1 +; CHECK-RV32-FDV-NEXT: sub a0, s0, a0 +; CHECK-RV32-FDV-NEXT: addi a0, a0, -240 +; CHECK-RV32-FDV-NEXT: vs1r.v v28, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV32-FDV-NEXT: csrr a0, vlenb +; CHECK-RV32-FDV-NEXT: slli a0, a0, 1 +; CHECK-RV32-FDV-NEXT: mv a1, a0 +; CHECK-RV32-FDV-NEXT: slli a0, a0, 1 +; CHECK-RV32-FDV-NEXT: add a1, a1, a0 +; CHECK-RV32-FDV-NEXT: slli a0, a0, 1 +; CHECK-RV32-FDV-NEXT: add a1, a1, a0 +; CHECK-RV32-FDV-NEXT: slli a0, a0, 1 +; CHECK-RV32-FDV-NEXT: add a0, a0, a1 +; CHECK-RV32-FDV-NEXT: sub a0, s0, a0 +; CHECK-RV32-FDV-NEXT: addi a0, a0, -240 +; CHECK-RV32-FDV-NEXT: vs1r.v v29, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV32-FDV-NEXT: csrr a0, vlenb +; CHECK-RV32-FDV-NEXT: slli a1, a0, 5 +; CHECK-RV32-FDV-NEXT: sub a0, a1, a0 +; CHECK-RV32-FDV-NEXT: sub a0, s0, a0 +; CHECK-RV32-FDV-NEXT: addi a0, a0, -240 +; CHECK-RV32-FDV-NEXT: vs1r.v v30, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV32-FDV-NEXT: csrr a0, vlenb +; CHECK-RV32-FDV-NEXT: slli a0, a0, 5 +; CHECK-RV32-FDV-NEXT: sub a0, s0, a0 +; CHECK-RV32-FDV-NEXT: addi a0, a0, -240 +; CHECK-RV32-FDV-NEXT: vs1r.v v31, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV32-FDV-NEXT: call otherfoo +; CHECK-RV32-FDV-NEXT: csrr a0, vlenb +; CHECK-RV32-FDV-NEXT: sub a0, s0, a0 +; CHECK-RV32-FDV-NEXT: addi a0, a0, -240 +; CHECK-RV32-FDV-NEXT: vl1r.v v0, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV32-FDV-NEXT: csrr a0, vlenb +; CHECK-RV32-FDV-NEXT: slli a0, a0, 1 +; CHECK-RV32-FDV-NEXT: sub a0, s0, a0 +; CHECK-RV32-FDV-NEXT: addi a0, a0, -240 +; CHECK-RV32-FDV-NEXT: vl1r.v v1, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV32-FDV-NEXT: csrr a0, vlenb +; CHECK-RV32-FDV-NEXT: slli a1, a0, 1 +; CHECK-RV32-FDV-NEXT: add a0, a1, a0 +; CHECK-RV32-FDV-NEXT: sub a0, s0, a0 +; CHECK-RV32-FDV-NEXT: addi a0, a0, -240 +; CHECK-RV32-FDV-NEXT: vl1r.v v2, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV32-FDV-NEXT: csrr a0, vlenb +; CHECK-RV32-FDV-NEXT: slli a0, a0, 2 +; CHECK-RV32-FDV-NEXT: sub a0, s0, a0 +; CHECK-RV32-FDV-NEXT: addi a0, a0, -240 +; CHECK-RV32-FDV-NEXT: vl1r.v v3, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV32-FDV-NEXT: csrr a0, vlenb +; CHECK-RV32-FDV-NEXT: slli a1, a0, 2 +; CHECK-RV32-FDV-NEXT: add a0, a1, a0 +; CHECK-RV32-FDV-NEXT: sub a0, s0, a0 +; CHECK-RV32-FDV-NEXT: addi a0, a0, -240 +; CHECK-RV32-FDV-NEXT: vl1r.v v4, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV32-FDV-NEXT: csrr a0, vlenb +; CHECK-RV32-FDV-NEXT: slli a0, a0, 1 +; CHECK-RV32-FDV-NEXT: mv a1, a0 +; CHECK-RV32-FDV-NEXT: slli a0, a0, 1 +; CHECK-RV32-FDV-NEXT: add a0, a0, a1 +; CHECK-RV32-FDV-NEXT: sub a0, s0, a0 +; CHECK-RV32-FDV-NEXT: addi a0, a0, -240 +; CHECK-RV32-FDV-NEXT: vl1r.v v5, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV32-FDV-NEXT: csrr a0, vlenb +; CHECK-RV32-FDV-NEXT: slli a1, a0, 3 +; CHECK-RV32-FDV-NEXT: sub a0, a1, a0 +; CHECK-RV32-FDV-NEXT: sub a0, s0, a0 +; CHECK-RV32-FDV-NEXT: addi a0, a0, -240 +; CHECK-RV32-FDV-NEXT: vl1r.v v6, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV32-FDV-NEXT: csrr a0, vlenb +; CHECK-RV32-FDV-NEXT: slli a0, a0, 3 +; CHECK-RV32-FDV-NEXT: sub a0, s0, a0 +; CHECK-RV32-FDV-NEXT: addi a0, a0, -240 +; CHECK-RV32-FDV-NEXT: vl1r.v v7, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV32-FDV-NEXT: csrr a0, vlenb +; CHECK-RV32-FDV-NEXT: slli a1, a0, 3 +; CHECK-RV32-FDV-NEXT: add a0, a1, a0 +; CHECK-RV32-FDV-NEXT: sub a0, s0, a0 +; CHECK-RV32-FDV-NEXT: addi a0, a0, -240 +; CHECK-RV32-FDV-NEXT: vl1r.v v8, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV32-FDV-NEXT: csrr a0, vlenb +; CHECK-RV32-FDV-NEXT: slli a0, a0, 1 +; CHECK-RV32-FDV-NEXT: mv a1, a0 +; CHECK-RV32-FDV-NEXT: slli a0, a0, 2 +; CHECK-RV32-FDV-NEXT: add a0, a0, a1 +; CHECK-RV32-FDV-NEXT: sub a0, s0, a0 +; CHECK-RV32-FDV-NEXT: addi a0, a0, -240 +; CHECK-RV32-FDV-NEXT: vl1r.v v9, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV32-FDV-NEXT: csrr a0, vlenb +; CHECK-RV32-FDV-NEXT: mv a1, a0 +; CHECK-RV32-FDV-NEXT: slli a0, a0, 1 +; CHECK-RV32-FDV-NEXT: add a1, a1, a0 +; CHECK-RV32-FDV-NEXT: slli a0, a0, 2 +; CHECK-RV32-FDV-NEXT: add a0, a0, a1 +; CHECK-RV32-FDV-NEXT: sub a0, s0, a0 +; CHECK-RV32-FDV-NEXT: addi a0, a0, -240 +; CHECK-RV32-FDV-NEXT: vl1r.v v10, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV32-FDV-NEXT: csrr a0, vlenb +; CHECK-RV32-FDV-NEXT: slli a0, a0, 2 +; CHECK-RV32-FDV-NEXT: mv a1, a0 +; CHECK-RV32-FDV-NEXT: slli a0, a0, 1 +; CHECK-RV32-FDV-NEXT: add a0, a0, a1 +; CHECK-RV32-FDV-NEXT: sub a0, s0, a0 +; CHECK-RV32-FDV-NEXT: addi a0, a0, -240 +; CHECK-RV32-FDV-NEXT: vl1r.v v11, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV32-FDV-NEXT: csrr a0, vlenb +; CHECK-RV32-FDV-NEXT: mv a1, a0 +; CHECK-RV32-FDV-NEXT: slli a0, a0, 2 +; CHECK-RV32-FDV-NEXT: add a1, a1, a0 +; CHECK-RV32-FDV-NEXT: slli a0, a0, 1 +; CHECK-RV32-FDV-NEXT: add a0, a0, a1 +; CHECK-RV32-FDV-NEXT: sub a0, s0, a0 +; CHECK-RV32-FDV-NEXT: addi a0, a0, -240 +; CHECK-RV32-FDV-NEXT: vl1r.v v12, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV32-FDV-NEXT: csrr a0, vlenb +; CHECK-RV32-FDV-NEXT: slli a0, a0, 1 +; CHECK-RV32-FDV-NEXT: mv a1, a0 +; CHECK-RV32-FDV-NEXT: slli a0, a0, 1 +; CHECK-RV32-FDV-NEXT: add a1, a1, a0 +; CHECK-RV32-FDV-NEXT: slli a0, a0, 1 +; CHECK-RV32-FDV-NEXT: add a0, a0, a1 +; CHECK-RV32-FDV-NEXT: sub a0, s0, a0 +; CHECK-RV32-FDV-NEXT: addi a0, a0, -240 +; CHECK-RV32-FDV-NEXT: vl1r.v v13, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV32-FDV-NEXT: csrr a0, vlenb +; CHECK-RV32-FDV-NEXT: slli a1, a0, 4 +; CHECK-RV32-FDV-NEXT: sub a0, a1, a0 +; CHECK-RV32-FDV-NEXT: sub a0, s0, a0 +; CHECK-RV32-FDV-NEXT: addi a0, a0, -240 +; CHECK-RV32-FDV-NEXT: vl1r.v v14, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV32-FDV-NEXT: csrr a0, vlenb +; CHECK-RV32-FDV-NEXT: slli a0, a0, 4 +; CHECK-RV32-FDV-NEXT: sub a0, s0, a0 +; CHECK-RV32-FDV-NEXT: addi a0, a0, -240 +; CHECK-RV32-FDV-NEXT: vl1r.v v15, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV32-FDV-NEXT: csrr a0, vlenb +; CHECK-RV32-FDV-NEXT: slli a1, a0, 4 +; CHECK-RV32-FDV-NEXT: add a0, a1, a0 +; CHECK-RV32-FDV-NEXT: sub a0, s0, a0 +; CHECK-RV32-FDV-NEXT: addi a0, a0, -240 +; CHECK-RV32-FDV-NEXT: vl1r.v v16, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV32-FDV-NEXT: csrr a0, vlenb +; CHECK-RV32-FDV-NEXT: slli a0, a0, 1 +; CHECK-RV32-FDV-NEXT: mv a1, a0 +; CHECK-RV32-FDV-NEXT: slli a0, a0, 3 +; CHECK-RV32-FDV-NEXT: add a0, a0, a1 +; CHECK-RV32-FDV-NEXT: sub a0, s0, a0 +; CHECK-RV32-FDV-NEXT: addi a0, a0, -240 +; CHECK-RV32-FDV-NEXT: vl1r.v v17, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV32-FDV-NEXT: csrr a0, vlenb +; CHECK-RV32-FDV-NEXT: mv a1, a0 +; CHECK-RV32-FDV-NEXT: slli a0, a0, 1 +; CHECK-RV32-FDV-NEXT: add a1, a1, a0 +; CHECK-RV32-FDV-NEXT: slli a0, a0, 3 +; CHECK-RV32-FDV-NEXT: add a0, a0, a1 +; CHECK-RV32-FDV-NEXT: sub a0, s0, a0 +; CHECK-RV32-FDV-NEXT: addi a0, a0, -240 +; CHECK-RV32-FDV-NEXT: vl1r.v v18, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV32-FDV-NEXT: csrr a0, vlenb +; CHECK-RV32-FDV-NEXT: slli a0, a0, 2 +; CHECK-RV32-FDV-NEXT: mv a1, a0 +; CHECK-RV32-FDV-NEXT: slli a0, a0, 2 +; CHECK-RV32-FDV-NEXT: add a0, a0, a1 +; CHECK-RV32-FDV-NEXT: sub a0, s0, a0 +; CHECK-RV32-FDV-NEXT: addi a0, a0, -240 +; CHECK-RV32-FDV-NEXT: vl1r.v v19, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV32-FDV-NEXT: csrr a0, vlenb +; CHECK-RV32-FDV-NEXT: mv a1, a0 +; CHECK-RV32-FDV-NEXT: slli a0, a0, 2 +; CHECK-RV32-FDV-NEXT: add a1, a1, a0 +; CHECK-RV32-FDV-NEXT: slli a0, a0, 2 +; CHECK-RV32-FDV-NEXT: add a0, a0, a1 +; CHECK-RV32-FDV-NEXT: sub a0, s0, a0 +; CHECK-RV32-FDV-NEXT: addi a0, a0, -240 +; CHECK-RV32-FDV-NEXT: vl1r.v v20, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV32-FDV-NEXT: csrr a0, vlenb +; CHECK-RV32-FDV-NEXT: slli a0, a0, 1 +; CHECK-RV32-FDV-NEXT: mv a1, a0 +; CHECK-RV32-FDV-NEXT: slli a0, a0, 1 +; CHECK-RV32-FDV-NEXT: add a1, a1, a0 +; CHECK-RV32-FDV-NEXT: slli a0, a0, 2 +; CHECK-RV32-FDV-NEXT: add a0, a0, a1 +; CHECK-RV32-FDV-NEXT: sub a0, s0, a0 +; CHECK-RV32-FDV-NEXT: addi a0, a0, -240 +; CHECK-RV32-FDV-NEXT: vl1r.v v21, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV32-FDV-NEXT: csrr a0, vlenb +; CHECK-RV32-FDV-NEXT: mv a1, a0 +; CHECK-RV32-FDV-NEXT: slli a0, a0, 1 +; CHECK-RV32-FDV-NEXT: add a1, a1, a0 +; CHECK-RV32-FDV-NEXT: slli a0, a0, 1 +; CHECK-RV32-FDV-NEXT: add a1, a1, a0 +; CHECK-RV32-FDV-NEXT: slli a0, a0, 2 +; CHECK-RV32-FDV-NEXT: add a0, a0, a1 +; CHECK-RV32-FDV-NEXT: sub a0, s0, a0 +; CHECK-RV32-FDV-NEXT: addi a0, a0, -240 +; CHECK-RV32-FDV-NEXT: vl1r.v v22, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV32-FDV-NEXT: csrr a0, vlenb +; CHECK-RV32-FDV-NEXT: slli a0, a0, 3 +; CHECK-RV32-FDV-NEXT: mv a1, a0 +; CHECK-RV32-FDV-NEXT: slli a0, a0, 1 +; CHECK-RV32-FDV-NEXT: add a0, a0, a1 +; CHECK-RV32-FDV-NEXT: sub a0, s0, a0 +; CHECK-RV32-FDV-NEXT: addi a0, a0, -240 +; CHECK-RV32-FDV-NEXT: vl1r.v v23, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV32-FDV-NEXT: csrr a0, vlenb +; CHECK-RV32-FDV-NEXT: mv a1, a0 +; CHECK-RV32-FDV-NEXT: slli a0, a0, 3 +; CHECK-RV32-FDV-NEXT: add a1, a1, a0 +; CHECK-RV32-FDV-NEXT: slli a0, a0, 1 +; CHECK-RV32-FDV-NEXT: add a0, a0, a1 +; CHECK-RV32-FDV-NEXT: sub a0, s0, a0 +; CHECK-RV32-FDV-NEXT: addi a0, a0, -240 +; CHECK-RV32-FDV-NEXT: vl1r.v v24, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV32-FDV-NEXT: csrr a0, vlenb +; CHECK-RV32-FDV-NEXT: slli a0, a0, 1 +; CHECK-RV32-FDV-NEXT: mv a1, a0 +; CHECK-RV32-FDV-NEXT: slli a0, a0, 2 +; CHECK-RV32-FDV-NEXT: add a1, a1, a0 +; CHECK-RV32-FDV-NEXT: slli a0, a0, 1 +; CHECK-RV32-FDV-NEXT: add a0, a0, a1 +; CHECK-RV32-FDV-NEXT: sub a0, s0, a0 +; CHECK-RV32-FDV-NEXT: addi a0, a0, -240 +; CHECK-RV32-FDV-NEXT: vl1r.v v25, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV32-FDV-NEXT: csrr a0, vlenb +; CHECK-RV32-FDV-NEXT: mv a1, a0 +; CHECK-RV32-FDV-NEXT: slli a0, a0, 1 +; CHECK-RV32-FDV-NEXT: add a1, a1, a0 +; CHECK-RV32-FDV-NEXT: slli a0, a0, 2 +; CHECK-RV32-FDV-NEXT: add a1, a1, a0 +; CHECK-RV32-FDV-NEXT: slli a0, a0, 1 +; CHECK-RV32-FDV-NEXT: add a0, a0, a1 +; CHECK-RV32-FDV-NEXT: sub a0, s0, a0 +; CHECK-RV32-FDV-NEXT: addi a0, a0, -240 +; CHECK-RV32-FDV-NEXT: vl1r.v v26, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV32-FDV-NEXT: csrr a0, vlenb +; CHECK-RV32-FDV-NEXT: slli a0, a0, 2 +; CHECK-RV32-FDV-NEXT: mv a1, a0 +; CHECK-RV32-FDV-NEXT: slli a0, a0, 1 +; CHECK-RV32-FDV-NEXT: add a1, a1, a0 +; CHECK-RV32-FDV-NEXT: slli a0, a0, 1 +; CHECK-RV32-FDV-NEXT: add a0, a0, a1 +; CHECK-RV32-FDV-NEXT: sub a0, s0, a0 +; CHECK-RV32-FDV-NEXT: addi a0, a0, -240 +; CHECK-RV32-FDV-NEXT: vl1r.v v27, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV32-FDV-NEXT: csrr a0, vlenb +; CHECK-RV32-FDV-NEXT: mv a1, a0 +; CHECK-RV32-FDV-NEXT: slli a0, a0, 2 +; CHECK-RV32-FDV-NEXT: add a1, a1, a0 +; CHECK-RV32-FDV-NEXT: slli a0, a0, 1 +; CHECK-RV32-FDV-NEXT: add a1, a1, a0 +; CHECK-RV32-FDV-NEXT: slli a0, a0, 1 +; CHECK-RV32-FDV-NEXT: add a0, a0, a1 +; CHECK-RV32-FDV-NEXT: sub a0, s0, a0 +; CHECK-RV32-FDV-NEXT: addi a0, a0, -240 +; CHECK-RV32-FDV-NEXT: vl1r.v v28, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV32-FDV-NEXT: csrr a0, vlenb +; CHECK-RV32-FDV-NEXT: slli a0, a0, 1 +; CHECK-RV32-FDV-NEXT: mv a1, a0 +; CHECK-RV32-FDV-NEXT: slli a0, a0, 1 +; CHECK-RV32-FDV-NEXT: add a1, a1, a0 +; CHECK-RV32-FDV-NEXT: slli a0, a0, 1 +; CHECK-RV32-FDV-NEXT: add a1, a1, a0 +; CHECK-RV32-FDV-NEXT: slli a0, a0, 1 +; CHECK-RV32-FDV-NEXT: add a0, a0, a1 +; CHECK-RV32-FDV-NEXT: sub a0, s0, a0 +; CHECK-RV32-FDV-NEXT: addi a0, a0, -240 +; CHECK-RV32-FDV-NEXT: vl1r.v v29, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV32-FDV-NEXT: csrr a0, vlenb +; CHECK-RV32-FDV-NEXT: slli a1, a0, 5 +; CHECK-RV32-FDV-NEXT: sub a0, a1, a0 +; CHECK-RV32-FDV-NEXT: sub a0, s0, a0 +; CHECK-RV32-FDV-NEXT: addi a0, a0, -240 +; CHECK-RV32-FDV-NEXT: vl1r.v v30, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV32-FDV-NEXT: csrr a0, vlenb +; CHECK-RV32-FDV-NEXT: slli a0, a0, 5 +; CHECK-RV32-FDV-NEXT: sub a0, s0, a0 +; CHECK-RV32-FDV-NEXT: addi a0, a0, -240 +; CHECK-RV32-FDV-NEXT: vl1r.v v31, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV32-FDV-NEXT: addi sp, s0, -240 +; CHECK-RV32-FDV-NEXT: lw ra, 236(sp) # 4-byte Folded Reload +; CHECK-RV32-FDV-NEXT: lw t0, 232(sp) # 4-byte Folded Reload +; CHECK-RV32-FDV-NEXT: lw t1, 228(sp) # 4-byte Folded Reload +; CHECK-RV32-FDV-NEXT: lw t2, 224(sp) # 4-byte Folded Reload +; CHECK-RV32-FDV-NEXT: lw s0, 220(sp) # 4-byte Folded Reload +; CHECK-RV32-FDV-NEXT: lw a0, 216(sp) # 4-byte Folded Reload +; CHECK-RV32-FDV-NEXT: lw a1, 212(sp) # 4-byte Folded Reload +; CHECK-RV32-FDV-NEXT: lw a2, 208(sp) # 4-byte Folded Reload +; CHECK-RV32-FDV-NEXT: lw a3, 204(sp) # 4-byte Folded Reload +; CHECK-RV32-FDV-NEXT: lw a4, 200(sp) # 4-byte Folded Reload +; CHECK-RV32-FDV-NEXT: lw a5, 196(sp) # 4-byte Folded Reload +; CHECK-RV32-FDV-NEXT: lw a6, 192(sp) # 4-byte Folded Reload +; CHECK-RV32-FDV-NEXT: lw a7, 188(sp) # 4-byte Folded Reload +; CHECK-RV32-FDV-NEXT: lw t3, 184(sp) # 4-byte Folded Reload +; CHECK-RV32-FDV-NEXT: lw t4, 180(sp) # 4-byte Folded Reload +; CHECK-RV32-FDV-NEXT: lw t5, 176(sp) # 4-byte Folded Reload +; CHECK-RV32-FDV-NEXT: lw t6, 172(sp) # 4-byte Folded Reload +; CHECK-RV32-FDV-NEXT: fld ft0, 160(sp) # 8-byte Folded Reload +; CHECK-RV32-FDV-NEXT: fld ft1, 152(sp) # 8-byte Folded Reload +; CHECK-RV32-FDV-NEXT: fld ft2, 144(sp) # 8-byte Folded Reload +; CHECK-RV32-FDV-NEXT: fld ft3, 136(sp) # 8-byte Folded Reload +; CHECK-RV32-FDV-NEXT: fld ft4, 128(sp) # 8-byte Folded Reload +; CHECK-RV32-FDV-NEXT: fld ft5, 120(sp) # 8-byte Folded Reload +; CHECK-RV32-FDV-NEXT: fld ft6, 112(sp) # 8-byte Folded Reload +; CHECK-RV32-FDV-NEXT: fld ft7, 104(sp) # 8-byte Folded Reload +; CHECK-RV32-FDV-NEXT: fld fa0, 96(sp) # 8-byte Folded Reload +; CHECK-RV32-FDV-NEXT: fld fa1, 88(sp) # 8-byte Folded Reload +; CHECK-RV32-FDV-NEXT: fld fa2, 80(sp) # 8-byte Folded Reload +; CHECK-RV32-FDV-NEXT: fld fa3, 72(sp) # 8-byte Folded Reload +; CHECK-RV32-FDV-NEXT: fld fa4, 64(sp) # 8-byte Folded Reload +; CHECK-RV32-FDV-NEXT: fld fa5, 56(sp) # 8-byte Folded Reload +; CHECK-RV32-FDV-NEXT: fld fa6, 48(sp) # 8-byte Folded Reload +; CHECK-RV32-FDV-NEXT: fld fa7, 40(sp) # 8-byte Folded Reload +; CHECK-RV32-FDV-NEXT: fld ft8, 32(sp) # 8-byte Folded Reload +; CHECK-RV32-FDV-NEXT: fld ft9, 24(sp) # 8-byte Folded Reload +; CHECK-RV32-FDV-NEXT: fld ft10, 16(sp) # 8-byte Folded Reload +; CHECK-RV32-FDV-NEXT: fld ft11, 8(sp) # 8-byte Folded Reload +; CHECK-RV32-FDV-NEXT: addi sp, sp, 240 +; CHECK-RV32-FDV-NEXT: mret +; ; CHECK-RV64-LABEL: foo_fp_with_call: ; CHECK-RV64: # %bb.0: ; CHECK-RV64-NEXT: addi sp, sp, -144 @@ -3052,6 +8160,1718 @@ define void @foo_fp_with_call() #2 { ; CHECK-RV64E-FD-NEXT: fld ft11, 0(sp) # 8-byte Folded Reload ; CHECK-RV64E-FD-NEXT: addi sp, sp, 344 ; CHECK-RV64E-FD-NEXT: mret +; +; CHECK-RV64-V-LABEL: foo_fp_with_call: +; CHECK-RV64-V: # %bb.0: +; CHECK-RV64-V-NEXT: addi sp, sp, -160 +; CHECK-RV64-V-NEXT: sd ra, 152(sp) # 8-byte Folded Spill +; CHECK-RV64-V-NEXT: sd t0, 144(sp) # 8-byte Folded Spill +; CHECK-RV64-V-NEXT: sd t1, 136(sp) # 8-byte Folded Spill +; CHECK-RV64-V-NEXT: sd t2, 128(sp) # 8-byte Folded Spill +; CHECK-RV64-V-NEXT: sd s0, 120(sp) # 8-byte Folded Spill +; CHECK-RV64-V-NEXT: sd a0, 112(sp) # 8-byte Folded Spill +; CHECK-RV64-V-NEXT: sd a1, 104(sp) # 8-byte Folded Spill +; CHECK-RV64-V-NEXT: sd a2, 96(sp) # 8-byte Folded Spill +; CHECK-RV64-V-NEXT: sd a3, 88(sp) # 8-byte Folded Spill +; CHECK-RV64-V-NEXT: sd a4, 80(sp) # 8-byte Folded Spill +; CHECK-RV64-V-NEXT: sd a5, 72(sp) # 8-byte Folded Spill +; CHECK-RV64-V-NEXT: sd a6, 64(sp) # 8-byte Folded Spill +; CHECK-RV64-V-NEXT: sd a7, 56(sp) # 8-byte Folded Spill +; CHECK-RV64-V-NEXT: sd t3, 48(sp) # 8-byte Folded Spill +; CHECK-RV64-V-NEXT: sd t4, 40(sp) # 8-byte Folded Spill +; CHECK-RV64-V-NEXT: sd t5, 32(sp) # 8-byte Folded Spill +; CHECK-RV64-V-NEXT: sd t6, 24(sp) # 8-byte Folded Spill +; CHECK-RV64-V-NEXT: addi s0, sp, 160 +; CHECK-RV64-V-NEXT: csrr a0, vlenb +; CHECK-RV64-V-NEXT: slli a0, a0, 5 +; CHECK-RV64-V-NEXT: sub sp, sp, a0 +; CHECK-RV64-V-NEXT: csrr a0, vlenb +; CHECK-RV64-V-NEXT: sub a0, s0, a0 +; CHECK-RV64-V-NEXT: addi a0, a0, -160 +; CHECK-RV64-V-NEXT: vs1r.v v0, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV64-V-NEXT: csrr a0, vlenb +; CHECK-RV64-V-NEXT: slli a0, a0, 1 +; CHECK-RV64-V-NEXT: sub a0, s0, a0 +; CHECK-RV64-V-NEXT: addi a0, a0, -160 +; CHECK-RV64-V-NEXT: vs1r.v v1, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV64-V-NEXT: csrr a0, vlenb +; CHECK-RV64-V-NEXT: slli a1, a0, 1 +; CHECK-RV64-V-NEXT: add a0, a1, a0 +; CHECK-RV64-V-NEXT: sub a0, s0, a0 +; CHECK-RV64-V-NEXT: addi a0, a0, -160 +; CHECK-RV64-V-NEXT: vs1r.v v2, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV64-V-NEXT: csrr a0, vlenb +; CHECK-RV64-V-NEXT: slli a0, a0, 2 +; CHECK-RV64-V-NEXT: sub a0, s0, a0 +; CHECK-RV64-V-NEXT: addi a0, a0, -160 +; CHECK-RV64-V-NEXT: vs1r.v v3, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV64-V-NEXT: csrr a0, vlenb +; CHECK-RV64-V-NEXT: slli a1, a0, 2 +; CHECK-RV64-V-NEXT: add a0, a1, a0 +; CHECK-RV64-V-NEXT: sub a0, s0, a0 +; CHECK-RV64-V-NEXT: addi a0, a0, -160 +; CHECK-RV64-V-NEXT: vs1r.v v4, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV64-V-NEXT: csrr a0, vlenb +; CHECK-RV64-V-NEXT: slli a0, a0, 1 +; CHECK-RV64-V-NEXT: mv a1, a0 +; CHECK-RV64-V-NEXT: slli a0, a0, 1 +; CHECK-RV64-V-NEXT: add a0, a0, a1 +; CHECK-RV64-V-NEXT: sub a0, s0, a0 +; CHECK-RV64-V-NEXT: addi a0, a0, -160 +; CHECK-RV64-V-NEXT: vs1r.v v5, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV64-V-NEXT: csrr a0, vlenb +; CHECK-RV64-V-NEXT: slli a1, a0, 3 +; CHECK-RV64-V-NEXT: sub a0, a1, a0 +; CHECK-RV64-V-NEXT: sub a0, s0, a0 +; CHECK-RV64-V-NEXT: addi a0, a0, -160 +; CHECK-RV64-V-NEXT: vs1r.v v6, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV64-V-NEXT: csrr a0, vlenb +; CHECK-RV64-V-NEXT: slli a0, a0, 3 +; CHECK-RV64-V-NEXT: sub a0, s0, a0 +; CHECK-RV64-V-NEXT: addi a0, a0, -160 +; CHECK-RV64-V-NEXT: vs1r.v v7, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV64-V-NEXT: csrr a0, vlenb +; CHECK-RV64-V-NEXT: slli a1, a0, 3 +; CHECK-RV64-V-NEXT: add a0, a1, a0 +; CHECK-RV64-V-NEXT: sub a0, s0, a0 +; CHECK-RV64-V-NEXT: addi a0, a0, -160 +; CHECK-RV64-V-NEXT: vs1r.v v8, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV64-V-NEXT: csrr a0, vlenb +; CHECK-RV64-V-NEXT: slli a0, a0, 1 +; CHECK-RV64-V-NEXT: mv a1, a0 +; CHECK-RV64-V-NEXT: slli a0, a0, 2 +; CHECK-RV64-V-NEXT: add a0, a0, a1 +; CHECK-RV64-V-NEXT: sub a0, s0, a0 +; CHECK-RV64-V-NEXT: addi a0, a0, -160 +; CHECK-RV64-V-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV64-V-NEXT: csrr a0, vlenb +; CHECK-RV64-V-NEXT: mv a1, a0 +; CHECK-RV64-V-NEXT: slli a0, a0, 1 +; CHECK-RV64-V-NEXT: add a1, a1, a0 +; CHECK-RV64-V-NEXT: slli a0, a0, 2 +; CHECK-RV64-V-NEXT: add a0, a0, a1 +; CHECK-RV64-V-NEXT: sub a0, s0, a0 +; CHECK-RV64-V-NEXT: addi a0, a0, -160 +; CHECK-RV64-V-NEXT: vs1r.v v10, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV64-V-NEXT: csrr a0, vlenb +; CHECK-RV64-V-NEXT: slli a0, a0, 2 +; CHECK-RV64-V-NEXT: mv a1, a0 +; CHECK-RV64-V-NEXT: slli a0, a0, 1 +; CHECK-RV64-V-NEXT: add a0, a0, a1 +; CHECK-RV64-V-NEXT: sub a0, s0, a0 +; CHECK-RV64-V-NEXT: addi a0, a0, -160 +; CHECK-RV64-V-NEXT: vs1r.v v11, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV64-V-NEXT: csrr a0, vlenb +; CHECK-RV64-V-NEXT: mv a1, a0 +; CHECK-RV64-V-NEXT: slli a0, a0, 2 +; CHECK-RV64-V-NEXT: add a1, a1, a0 +; CHECK-RV64-V-NEXT: slli a0, a0, 1 +; CHECK-RV64-V-NEXT: add a0, a0, a1 +; CHECK-RV64-V-NEXT: sub a0, s0, a0 +; CHECK-RV64-V-NEXT: addi a0, a0, -160 +; CHECK-RV64-V-NEXT: vs1r.v v12, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV64-V-NEXT: csrr a0, vlenb +; CHECK-RV64-V-NEXT: slli a0, a0, 1 +; CHECK-RV64-V-NEXT: mv a1, a0 +; CHECK-RV64-V-NEXT: slli a0, a0, 1 +; CHECK-RV64-V-NEXT: add a1, a1, a0 +; CHECK-RV64-V-NEXT: slli a0, a0, 1 +; CHECK-RV64-V-NEXT: add a0, a0, a1 +; CHECK-RV64-V-NEXT: sub a0, s0, a0 +; CHECK-RV64-V-NEXT: addi a0, a0, -160 +; CHECK-RV64-V-NEXT: vs1r.v v13, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV64-V-NEXT: csrr a0, vlenb +; CHECK-RV64-V-NEXT: slli a1, a0, 4 +; CHECK-RV64-V-NEXT: sub a0, a1, a0 +; CHECK-RV64-V-NEXT: sub a0, s0, a0 +; CHECK-RV64-V-NEXT: addi a0, a0, -160 +; CHECK-RV64-V-NEXT: vs1r.v v14, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV64-V-NEXT: csrr a0, vlenb +; CHECK-RV64-V-NEXT: slli a0, a0, 4 +; CHECK-RV64-V-NEXT: sub a0, s0, a0 +; CHECK-RV64-V-NEXT: addi a0, a0, -160 +; CHECK-RV64-V-NEXT: vs1r.v v15, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV64-V-NEXT: csrr a0, vlenb +; CHECK-RV64-V-NEXT: slli a1, a0, 4 +; CHECK-RV64-V-NEXT: add a0, a1, a0 +; CHECK-RV64-V-NEXT: sub a0, s0, a0 +; CHECK-RV64-V-NEXT: addi a0, a0, -160 +; CHECK-RV64-V-NEXT: vs1r.v v16, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV64-V-NEXT: csrr a0, vlenb +; CHECK-RV64-V-NEXT: slli a0, a0, 1 +; CHECK-RV64-V-NEXT: mv a1, a0 +; CHECK-RV64-V-NEXT: slli a0, a0, 3 +; CHECK-RV64-V-NEXT: add a0, a0, a1 +; CHECK-RV64-V-NEXT: sub a0, s0, a0 +; CHECK-RV64-V-NEXT: addi a0, a0, -160 +; CHECK-RV64-V-NEXT: vs1r.v v17, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV64-V-NEXT: csrr a0, vlenb +; CHECK-RV64-V-NEXT: mv a1, a0 +; CHECK-RV64-V-NEXT: slli a0, a0, 1 +; CHECK-RV64-V-NEXT: add a1, a1, a0 +; CHECK-RV64-V-NEXT: slli a0, a0, 3 +; CHECK-RV64-V-NEXT: add a0, a0, a1 +; CHECK-RV64-V-NEXT: sub a0, s0, a0 +; CHECK-RV64-V-NEXT: addi a0, a0, -160 +; CHECK-RV64-V-NEXT: vs1r.v v18, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV64-V-NEXT: csrr a0, vlenb +; CHECK-RV64-V-NEXT: slli a0, a0, 2 +; CHECK-RV64-V-NEXT: mv a1, a0 +; CHECK-RV64-V-NEXT: slli a0, a0, 2 +; CHECK-RV64-V-NEXT: add a0, a0, a1 +; CHECK-RV64-V-NEXT: sub a0, s0, a0 +; CHECK-RV64-V-NEXT: addi a0, a0, -160 +; CHECK-RV64-V-NEXT: vs1r.v v19, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV64-V-NEXT: csrr a0, vlenb +; CHECK-RV64-V-NEXT: mv a1, a0 +; CHECK-RV64-V-NEXT: slli a0, a0, 2 +; CHECK-RV64-V-NEXT: add a1, a1, a0 +; CHECK-RV64-V-NEXT: slli a0, a0, 2 +; CHECK-RV64-V-NEXT: add a0, a0, a1 +; CHECK-RV64-V-NEXT: sub a0, s0, a0 +; CHECK-RV64-V-NEXT: addi a0, a0, -160 +; CHECK-RV64-V-NEXT: vs1r.v v20, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV64-V-NEXT: csrr a0, vlenb +; CHECK-RV64-V-NEXT: slli a0, a0, 1 +; CHECK-RV64-V-NEXT: mv a1, a0 +; CHECK-RV64-V-NEXT: slli a0, a0, 1 +; CHECK-RV64-V-NEXT: add a1, a1, a0 +; CHECK-RV64-V-NEXT: slli a0, a0, 2 +; CHECK-RV64-V-NEXT: add a0, a0, a1 +; CHECK-RV64-V-NEXT: sub a0, s0, a0 +; CHECK-RV64-V-NEXT: addi a0, a0, -160 +; CHECK-RV64-V-NEXT: vs1r.v v21, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV64-V-NEXT: csrr a0, vlenb +; CHECK-RV64-V-NEXT: mv a1, a0 +; CHECK-RV64-V-NEXT: slli a0, a0, 1 +; CHECK-RV64-V-NEXT: add a1, a1, a0 +; CHECK-RV64-V-NEXT: slli a0, a0, 1 +; CHECK-RV64-V-NEXT: add a1, a1, a0 +; CHECK-RV64-V-NEXT: slli a0, a0, 2 +; CHECK-RV64-V-NEXT: add a0, a0, a1 +; CHECK-RV64-V-NEXT: sub a0, s0, a0 +; CHECK-RV64-V-NEXT: addi a0, a0, -160 +; CHECK-RV64-V-NEXT: vs1r.v v22, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV64-V-NEXT: csrr a0, vlenb +; CHECK-RV64-V-NEXT: slli a0, a0, 3 +; CHECK-RV64-V-NEXT: mv a1, a0 +; CHECK-RV64-V-NEXT: slli a0, a0, 1 +; CHECK-RV64-V-NEXT: add a0, a0, a1 +; CHECK-RV64-V-NEXT: sub a0, s0, a0 +; CHECK-RV64-V-NEXT: addi a0, a0, -160 +; CHECK-RV64-V-NEXT: vs1r.v v23, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV64-V-NEXT: csrr a0, vlenb +; CHECK-RV64-V-NEXT: mv a1, a0 +; CHECK-RV64-V-NEXT: slli a0, a0, 3 +; CHECK-RV64-V-NEXT: add a1, a1, a0 +; CHECK-RV64-V-NEXT: slli a0, a0, 1 +; CHECK-RV64-V-NEXT: add a0, a0, a1 +; CHECK-RV64-V-NEXT: sub a0, s0, a0 +; CHECK-RV64-V-NEXT: addi a0, a0, -160 +; CHECK-RV64-V-NEXT: vs1r.v v24, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV64-V-NEXT: csrr a0, vlenb +; CHECK-RV64-V-NEXT: slli a0, a0, 1 +; CHECK-RV64-V-NEXT: mv a1, a0 +; CHECK-RV64-V-NEXT: slli a0, a0, 2 +; CHECK-RV64-V-NEXT: add a1, a1, a0 +; CHECK-RV64-V-NEXT: slli a0, a0, 1 +; CHECK-RV64-V-NEXT: add a0, a0, a1 +; CHECK-RV64-V-NEXT: sub a0, s0, a0 +; CHECK-RV64-V-NEXT: addi a0, a0, -160 +; CHECK-RV64-V-NEXT: vs1r.v v25, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV64-V-NEXT: csrr a0, vlenb +; CHECK-RV64-V-NEXT: mv a1, a0 +; CHECK-RV64-V-NEXT: slli a0, a0, 1 +; CHECK-RV64-V-NEXT: add a1, a1, a0 +; CHECK-RV64-V-NEXT: slli a0, a0, 2 +; CHECK-RV64-V-NEXT: add a1, a1, a0 +; CHECK-RV64-V-NEXT: slli a0, a0, 1 +; CHECK-RV64-V-NEXT: add a0, a0, a1 +; CHECK-RV64-V-NEXT: sub a0, s0, a0 +; CHECK-RV64-V-NEXT: addi a0, a0, -160 +; CHECK-RV64-V-NEXT: vs1r.v v26, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV64-V-NEXT: csrr a0, vlenb +; CHECK-RV64-V-NEXT: slli a0, a0, 2 +; CHECK-RV64-V-NEXT: mv a1, a0 +; CHECK-RV64-V-NEXT: slli a0, a0, 1 +; CHECK-RV64-V-NEXT: add a1, a1, a0 +; CHECK-RV64-V-NEXT: slli a0, a0, 1 +; CHECK-RV64-V-NEXT: add a0, a0, a1 +; CHECK-RV64-V-NEXT: sub a0, s0, a0 +; CHECK-RV64-V-NEXT: addi a0, a0, -160 +; CHECK-RV64-V-NEXT: vs1r.v v27, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV64-V-NEXT: csrr a0, vlenb +; CHECK-RV64-V-NEXT: mv a1, a0 +; CHECK-RV64-V-NEXT: slli a0, a0, 2 +; CHECK-RV64-V-NEXT: add a1, a1, a0 +; CHECK-RV64-V-NEXT: slli a0, a0, 1 +; CHECK-RV64-V-NEXT: add a1, a1, a0 +; CHECK-RV64-V-NEXT: slli a0, a0, 1 +; CHECK-RV64-V-NEXT: add a0, a0, a1 +; CHECK-RV64-V-NEXT: sub a0, s0, a0 +; CHECK-RV64-V-NEXT: addi a0, a0, -160 +; CHECK-RV64-V-NEXT: vs1r.v v28, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV64-V-NEXT: csrr a0, vlenb +; CHECK-RV64-V-NEXT: slli a0, a0, 1 +; CHECK-RV64-V-NEXT: mv a1, a0 +; CHECK-RV64-V-NEXT: slli a0, a0, 1 +; CHECK-RV64-V-NEXT: add a1, a1, a0 +; CHECK-RV64-V-NEXT: slli a0, a0, 1 +; CHECK-RV64-V-NEXT: add a1, a1, a0 +; CHECK-RV64-V-NEXT: slli a0, a0, 1 +; CHECK-RV64-V-NEXT: add a0, a0, a1 +; CHECK-RV64-V-NEXT: sub a0, s0, a0 +; CHECK-RV64-V-NEXT: addi a0, a0, -160 +; CHECK-RV64-V-NEXT: vs1r.v v29, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV64-V-NEXT: csrr a0, vlenb +; CHECK-RV64-V-NEXT: slli a1, a0, 5 +; CHECK-RV64-V-NEXT: sub a0, a1, a0 +; CHECK-RV64-V-NEXT: sub a0, s0, a0 +; CHECK-RV64-V-NEXT: addi a0, a0, -160 +; CHECK-RV64-V-NEXT: vs1r.v v30, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV64-V-NEXT: csrr a0, vlenb +; CHECK-RV64-V-NEXT: slli a0, a0, 5 +; CHECK-RV64-V-NEXT: sub a0, s0, a0 +; CHECK-RV64-V-NEXT: addi a0, a0, -160 +; CHECK-RV64-V-NEXT: vs1r.v v31, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV64-V-NEXT: call otherfoo +; CHECK-RV64-V-NEXT: csrr a0, vlenb +; CHECK-RV64-V-NEXT: sub a0, s0, a0 +; CHECK-RV64-V-NEXT: addi a0, a0, -160 +; CHECK-RV64-V-NEXT: vl1r.v v0, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV64-V-NEXT: csrr a0, vlenb +; CHECK-RV64-V-NEXT: slli a0, a0, 1 +; CHECK-RV64-V-NEXT: sub a0, s0, a0 +; CHECK-RV64-V-NEXT: addi a0, a0, -160 +; CHECK-RV64-V-NEXT: vl1r.v v1, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV64-V-NEXT: csrr a0, vlenb +; CHECK-RV64-V-NEXT: slli a1, a0, 1 +; CHECK-RV64-V-NEXT: add a0, a1, a0 +; CHECK-RV64-V-NEXT: sub a0, s0, a0 +; CHECK-RV64-V-NEXT: addi a0, a0, -160 +; CHECK-RV64-V-NEXT: vl1r.v v2, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV64-V-NEXT: csrr a0, vlenb +; CHECK-RV64-V-NEXT: slli a0, a0, 2 +; CHECK-RV64-V-NEXT: sub a0, s0, a0 +; CHECK-RV64-V-NEXT: addi a0, a0, -160 +; CHECK-RV64-V-NEXT: vl1r.v v3, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV64-V-NEXT: csrr a0, vlenb +; CHECK-RV64-V-NEXT: slli a1, a0, 2 +; CHECK-RV64-V-NEXT: add a0, a1, a0 +; CHECK-RV64-V-NEXT: sub a0, s0, a0 +; CHECK-RV64-V-NEXT: addi a0, a0, -160 +; CHECK-RV64-V-NEXT: vl1r.v v4, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV64-V-NEXT: csrr a0, vlenb +; CHECK-RV64-V-NEXT: slli a0, a0, 1 +; CHECK-RV64-V-NEXT: mv a1, a0 +; CHECK-RV64-V-NEXT: slli a0, a0, 1 +; CHECK-RV64-V-NEXT: add a0, a0, a1 +; CHECK-RV64-V-NEXT: sub a0, s0, a0 +; CHECK-RV64-V-NEXT: addi a0, a0, -160 +; CHECK-RV64-V-NEXT: vl1r.v v5, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV64-V-NEXT: csrr a0, vlenb +; CHECK-RV64-V-NEXT: slli a1, a0, 3 +; CHECK-RV64-V-NEXT: sub a0, a1, a0 +; CHECK-RV64-V-NEXT: sub a0, s0, a0 +; CHECK-RV64-V-NEXT: addi a0, a0, -160 +; CHECK-RV64-V-NEXT: vl1r.v v6, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV64-V-NEXT: csrr a0, vlenb +; CHECK-RV64-V-NEXT: slli a0, a0, 3 +; CHECK-RV64-V-NEXT: sub a0, s0, a0 +; CHECK-RV64-V-NEXT: addi a0, a0, -160 +; CHECK-RV64-V-NEXT: vl1r.v v7, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV64-V-NEXT: csrr a0, vlenb +; CHECK-RV64-V-NEXT: slli a1, a0, 3 +; CHECK-RV64-V-NEXT: add a0, a1, a0 +; CHECK-RV64-V-NEXT: sub a0, s0, a0 +; CHECK-RV64-V-NEXT: addi a0, a0, -160 +; CHECK-RV64-V-NEXT: vl1r.v v8, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV64-V-NEXT: csrr a0, vlenb +; CHECK-RV64-V-NEXT: slli a0, a0, 1 +; CHECK-RV64-V-NEXT: mv a1, a0 +; CHECK-RV64-V-NEXT: slli a0, a0, 2 +; CHECK-RV64-V-NEXT: add a0, a0, a1 +; CHECK-RV64-V-NEXT: sub a0, s0, a0 +; CHECK-RV64-V-NEXT: addi a0, a0, -160 +; CHECK-RV64-V-NEXT: vl1r.v v9, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV64-V-NEXT: csrr a0, vlenb +; CHECK-RV64-V-NEXT: mv a1, a0 +; CHECK-RV64-V-NEXT: slli a0, a0, 1 +; CHECK-RV64-V-NEXT: add a1, a1, a0 +; CHECK-RV64-V-NEXT: slli a0, a0, 2 +; CHECK-RV64-V-NEXT: add a0, a0, a1 +; CHECK-RV64-V-NEXT: sub a0, s0, a0 +; CHECK-RV64-V-NEXT: addi a0, a0, -160 +; CHECK-RV64-V-NEXT: vl1r.v v10, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV64-V-NEXT: csrr a0, vlenb +; CHECK-RV64-V-NEXT: slli a0, a0, 2 +; CHECK-RV64-V-NEXT: mv a1, a0 +; CHECK-RV64-V-NEXT: slli a0, a0, 1 +; CHECK-RV64-V-NEXT: add a0, a0, a1 +; CHECK-RV64-V-NEXT: sub a0, s0, a0 +; CHECK-RV64-V-NEXT: addi a0, a0, -160 +; CHECK-RV64-V-NEXT: vl1r.v v11, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV64-V-NEXT: csrr a0, vlenb +; CHECK-RV64-V-NEXT: mv a1, a0 +; CHECK-RV64-V-NEXT: slli a0, a0, 2 +; CHECK-RV64-V-NEXT: add a1, a1, a0 +; CHECK-RV64-V-NEXT: slli a0, a0, 1 +; CHECK-RV64-V-NEXT: add a0, a0, a1 +; CHECK-RV64-V-NEXT: sub a0, s0, a0 +; CHECK-RV64-V-NEXT: addi a0, a0, -160 +; CHECK-RV64-V-NEXT: vl1r.v v12, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV64-V-NEXT: csrr a0, vlenb +; CHECK-RV64-V-NEXT: slli a0, a0, 1 +; CHECK-RV64-V-NEXT: mv a1, a0 +; CHECK-RV64-V-NEXT: slli a0, a0, 1 +; CHECK-RV64-V-NEXT: add a1, a1, a0 +; CHECK-RV64-V-NEXT: slli a0, a0, 1 +; CHECK-RV64-V-NEXT: add a0, a0, a1 +; CHECK-RV64-V-NEXT: sub a0, s0, a0 +; CHECK-RV64-V-NEXT: addi a0, a0, -160 +; CHECK-RV64-V-NEXT: vl1r.v v13, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV64-V-NEXT: csrr a0, vlenb +; CHECK-RV64-V-NEXT: slli a1, a0, 4 +; CHECK-RV64-V-NEXT: sub a0, a1, a0 +; CHECK-RV64-V-NEXT: sub a0, s0, a0 +; CHECK-RV64-V-NEXT: addi a0, a0, -160 +; CHECK-RV64-V-NEXT: vl1r.v v14, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV64-V-NEXT: csrr a0, vlenb +; CHECK-RV64-V-NEXT: slli a0, a0, 4 +; CHECK-RV64-V-NEXT: sub a0, s0, a0 +; CHECK-RV64-V-NEXT: addi a0, a0, -160 +; CHECK-RV64-V-NEXT: vl1r.v v15, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV64-V-NEXT: csrr a0, vlenb +; CHECK-RV64-V-NEXT: slli a1, a0, 4 +; CHECK-RV64-V-NEXT: add a0, a1, a0 +; CHECK-RV64-V-NEXT: sub a0, s0, a0 +; CHECK-RV64-V-NEXT: addi a0, a0, -160 +; CHECK-RV64-V-NEXT: vl1r.v v16, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV64-V-NEXT: csrr a0, vlenb +; CHECK-RV64-V-NEXT: slli a0, a0, 1 +; CHECK-RV64-V-NEXT: mv a1, a0 +; CHECK-RV64-V-NEXT: slli a0, a0, 3 +; CHECK-RV64-V-NEXT: add a0, a0, a1 +; CHECK-RV64-V-NEXT: sub a0, s0, a0 +; CHECK-RV64-V-NEXT: addi a0, a0, -160 +; CHECK-RV64-V-NEXT: vl1r.v v17, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV64-V-NEXT: csrr a0, vlenb +; CHECK-RV64-V-NEXT: mv a1, a0 +; CHECK-RV64-V-NEXT: slli a0, a0, 1 +; CHECK-RV64-V-NEXT: add a1, a1, a0 +; CHECK-RV64-V-NEXT: slli a0, a0, 3 +; CHECK-RV64-V-NEXT: add a0, a0, a1 +; CHECK-RV64-V-NEXT: sub a0, s0, a0 +; CHECK-RV64-V-NEXT: addi a0, a0, -160 +; CHECK-RV64-V-NEXT: vl1r.v v18, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV64-V-NEXT: csrr a0, vlenb +; CHECK-RV64-V-NEXT: slli a0, a0, 2 +; CHECK-RV64-V-NEXT: mv a1, a0 +; CHECK-RV64-V-NEXT: slli a0, a0, 2 +; CHECK-RV64-V-NEXT: add a0, a0, a1 +; CHECK-RV64-V-NEXT: sub a0, s0, a0 +; CHECK-RV64-V-NEXT: addi a0, a0, -160 +; CHECK-RV64-V-NEXT: vl1r.v v19, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV64-V-NEXT: csrr a0, vlenb +; CHECK-RV64-V-NEXT: mv a1, a0 +; CHECK-RV64-V-NEXT: slli a0, a0, 2 +; CHECK-RV64-V-NEXT: add a1, a1, a0 +; CHECK-RV64-V-NEXT: slli a0, a0, 2 +; CHECK-RV64-V-NEXT: add a0, a0, a1 +; CHECK-RV64-V-NEXT: sub a0, s0, a0 +; CHECK-RV64-V-NEXT: addi a0, a0, -160 +; CHECK-RV64-V-NEXT: vl1r.v v20, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV64-V-NEXT: csrr a0, vlenb +; CHECK-RV64-V-NEXT: slli a0, a0, 1 +; CHECK-RV64-V-NEXT: mv a1, a0 +; CHECK-RV64-V-NEXT: slli a0, a0, 1 +; CHECK-RV64-V-NEXT: add a1, a1, a0 +; CHECK-RV64-V-NEXT: slli a0, a0, 2 +; CHECK-RV64-V-NEXT: add a0, a0, a1 +; CHECK-RV64-V-NEXT: sub a0, s0, a0 +; CHECK-RV64-V-NEXT: addi a0, a0, -160 +; CHECK-RV64-V-NEXT: vl1r.v v21, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV64-V-NEXT: csrr a0, vlenb +; CHECK-RV64-V-NEXT: mv a1, a0 +; CHECK-RV64-V-NEXT: slli a0, a0, 1 +; CHECK-RV64-V-NEXT: add a1, a1, a0 +; CHECK-RV64-V-NEXT: slli a0, a0, 1 +; CHECK-RV64-V-NEXT: add a1, a1, a0 +; CHECK-RV64-V-NEXT: slli a0, a0, 2 +; CHECK-RV64-V-NEXT: add a0, a0, a1 +; CHECK-RV64-V-NEXT: sub a0, s0, a0 +; CHECK-RV64-V-NEXT: addi a0, a0, -160 +; CHECK-RV64-V-NEXT: vl1r.v v22, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV64-V-NEXT: csrr a0, vlenb +; CHECK-RV64-V-NEXT: slli a0, a0, 3 +; CHECK-RV64-V-NEXT: mv a1, a0 +; CHECK-RV64-V-NEXT: slli a0, a0, 1 +; CHECK-RV64-V-NEXT: add a0, a0, a1 +; CHECK-RV64-V-NEXT: sub a0, s0, a0 +; CHECK-RV64-V-NEXT: addi a0, a0, -160 +; CHECK-RV64-V-NEXT: vl1r.v v23, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV64-V-NEXT: csrr a0, vlenb +; CHECK-RV64-V-NEXT: mv a1, a0 +; CHECK-RV64-V-NEXT: slli a0, a0, 3 +; CHECK-RV64-V-NEXT: add a1, a1, a0 +; CHECK-RV64-V-NEXT: slli a0, a0, 1 +; CHECK-RV64-V-NEXT: add a0, a0, a1 +; CHECK-RV64-V-NEXT: sub a0, s0, a0 +; CHECK-RV64-V-NEXT: addi a0, a0, -160 +; CHECK-RV64-V-NEXT: vl1r.v v24, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV64-V-NEXT: csrr a0, vlenb +; CHECK-RV64-V-NEXT: slli a0, a0, 1 +; CHECK-RV64-V-NEXT: mv a1, a0 +; CHECK-RV64-V-NEXT: slli a0, a0, 2 +; CHECK-RV64-V-NEXT: add a1, a1, a0 +; CHECK-RV64-V-NEXT: slli a0, a0, 1 +; CHECK-RV64-V-NEXT: add a0, a0, a1 +; CHECK-RV64-V-NEXT: sub a0, s0, a0 +; CHECK-RV64-V-NEXT: addi a0, a0, -160 +; CHECK-RV64-V-NEXT: vl1r.v v25, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV64-V-NEXT: csrr a0, vlenb +; CHECK-RV64-V-NEXT: mv a1, a0 +; CHECK-RV64-V-NEXT: slli a0, a0, 1 +; CHECK-RV64-V-NEXT: add a1, a1, a0 +; CHECK-RV64-V-NEXT: slli a0, a0, 2 +; CHECK-RV64-V-NEXT: add a1, a1, a0 +; CHECK-RV64-V-NEXT: slli a0, a0, 1 +; CHECK-RV64-V-NEXT: add a0, a0, a1 +; CHECK-RV64-V-NEXT: sub a0, s0, a0 +; CHECK-RV64-V-NEXT: addi a0, a0, -160 +; CHECK-RV64-V-NEXT: vl1r.v v26, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV64-V-NEXT: csrr a0, vlenb +; CHECK-RV64-V-NEXT: slli a0, a0, 2 +; CHECK-RV64-V-NEXT: mv a1, a0 +; CHECK-RV64-V-NEXT: slli a0, a0, 1 +; CHECK-RV64-V-NEXT: add a1, a1, a0 +; CHECK-RV64-V-NEXT: slli a0, a0, 1 +; CHECK-RV64-V-NEXT: add a0, a0, a1 +; CHECK-RV64-V-NEXT: sub a0, s0, a0 +; CHECK-RV64-V-NEXT: addi a0, a0, -160 +; CHECK-RV64-V-NEXT: vl1r.v v27, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV64-V-NEXT: csrr a0, vlenb +; CHECK-RV64-V-NEXT: mv a1, a0 +; CHECK-RV64-V-NEXT: slli a0, a0, 2 +; CHECK-RV64-V-NEXT: add a1, a1, a0 +; CHECK-RV64-V-NEXT: slli a0, a0, 1 +; CHECK-RV64-V-NEXT: add a1, a1, a0 +; CHECK-RV64-V-NEXT: slli a0, a0, 1 +; CHECK-RV64-V-NEXT: add a0, a0, a1 +; CHECK-RV64-V-NEXT: sub a0, s0, a0 +; CHECK-RV64-V-NEXT: addi a0, a0, -160 +; CHECK-RV64-V-NEXT: vl1r.v v28, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV64-V-NEXT: csrr a0, vlenb +; CHECK-RV64-V-NEXT: slli a0, a0, 1 +; CHECK-RV64-V-NEXT: mv a1, a0 +; CHECK-RV64-V-NEXT: slli a0, a0, 1 +; CHECK-RV64-V-NEXT: add a1, a1, a0 +; CHECK-RV64-V-NEXT: slli a0, a0, 1 +; CHECK-RV64-V-NEXT: add a1, a1, a0 +; CHECK-RV64-V-NEXT: slli a0, a0, 1 +; CHECK-RV64-V-NEXT: add a0, a0, a1 +; CHECK-RV64-V-NEXT: sub a0, s0, a0 +; CHECK-RV64-V-NEXT: addi a0, a0, -160 +; CHECK-RV64-V-NEXT: vl1r.v v29, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV64-V-NEXT: csrr a0, vlenb +; CHECK-RV64-V-NEXT: slli a1, a0, 5 +; CHECK-RV64-V-NEXT: sub a0, a1, a0 +; CHECK-RV64-V-NEXT: sub a0, s0, a0 +; CHECK-RV64-V-NEXT: addi a0, a0, -160 +; CHECK-RV64-V-NEXT: vl1r.v v30, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV64-V-NEXT: csrr a0, vlenb +; CHECK-RV64-V-NEXT: slli a0, a0, 5 +; CHECK-RV64-V-NEXT: sub a0, s0, a0 +; CHECK-RV64-V-NEXT: addi a0, a0, -160 +; CHECK-RV64-V-NEXT: vl1r.v v31, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV64-V-NEXT: addi sp, s0, -160 +; CHECK-RV64-V-NEXT: ld ra, 152(sp) # 8-byte Folded Reload +; CHECK-RV64-V-NEXT: ld t0, 144(sp) # 8-byte Folded Reload +; CHECK-RV64-V-NEXT: ld t1, 136(sp) # 8-byte Folded Reload +; CHECK-RV64-V-NEXT: ld t2, 128(sp) # 8-byte Folded Reload +; CHECK-RV64-V-NEXT: ld s0, 120(sp) # 8-byte Folded Reload +; CHECK-RV64-V-NEXT: ld a0, 112(sp) # 8-byte Folded Reload +; CHECK-RV64-V-NEXT: ld a1, 104(sp) # 8-byte Folded Reload +; CHECK-RV64-V-NEXT: ld a2, 96(sp) # 8-byte Folded Reload +; CHECK-RV64-V-NEXT: ld a3, 88(sp) # 8-byte Folded Reload +; CHECK-RV64-V-NEXT: ld a4, 80(sp) # 8-byte Folded Reload +; CHECK-RV64-V-NEXT: ld a5, 72(sp) # 8-byte Folded Reload +; CHECK-RV64-V-NEXT: ld a6, 64(sp) # 8-byte Folded Reload +; CHECK-RV64-V-NEXT: ld a7, 56(sp) # 8-byte Folded Reload +; CHECK-RV64-V-NEXT: ld t3, 48(sp) # 8-byte Folded Reload +; CHECK-RV64-V-NEXT: ld t4, 40(sp) # 8-byte Folded Reload +; CHECK-RV64-V-NEXT: ld t5, 32(sp) # 8-byte Folded Reload +; CHECK-RV64-V-NEXT: ld t6, 24(sp) # 8-byte Folded Reload +; CHECK-RV64-V-NEXT: addi sp, sp, 160 +; CHECK-RV64-V-NEXT: mret +; +; CHECK-RV64-FV-LABEL: foo_fp_with_call: +; CHECK-RV64-FV: # %bb.0: +; CHECK-RV64-FV-NEXT: addi sp, sp, -240 +; CHECK-RV64-FV-NEXT: sd ra, 232(sp) # 8-byte Folded Spill +; CHECK-RV64-FV-NEXT: sd t0, 224(sp) # 8-byte Folded Spill +; CHECK-RV64-FV-NEXT: sd t1, 216(sp) # 8-byte Folded Spill +; CHECK-RV64-FV-NEXT: sd t2, 208(sp) # 8-byte Folded Spill +; CHECK-RV64-FV-NEXT: sd s0, 200(sp) # 8-byte Folded Spill +; CHECK-RV64-FV-NEXT: sd a0, 192(sp) # 8-byte Folded Spill +; CHECK-RV64-FV-NEXT: sd a1, 184(sp) # 8-byte Folded Spill +; CHECK-RV64-FV-NEXT: sd a2, 176(sp) # 8-byte Folded Spill +; CHECK-RV64-FV-NEXT: sd a3, 168(sp) # 8-byte Folded Spill +; CHECK-RV64-FV-NEXT: sd a4, 160(sp) # 8-byte Folded Spill +; CHECK-RV64-FV-NEXT: sd a5, 152(sp) # 8-byte Folded Spill +; CHECK-RV64-FV-NEXT: sd a6, 144(sp) # 8-byte Folded Spill +; CHECK-RV64-FV-NEXT: sd a7, 136(sp) # 8-byte Folded Spill +; CHECK-RV64-FV-NEXT: sd t3, 128(sp) # 8-byte Folded Spill +; CHECK-RV64-FV-NEXT: sd t4, 120(sp) # 8-byte Folded Spill +; CHECK-RV64-FV-NEXT: sd t5, 112(sp) # 8-byte Folded Spill +; CHECK-RV64-FV-NEXT: sd t6, 104(sp) # 8-byte Folded Spill +; CHECK-RV64-FV-NEXT: fsw ft0, 100(sp) # 4-byte Folded Spill +; CHECK-RV64-FV-NEXT: fsw ft1, 96(sp) # 4-byte Folded Spill +; CHECK-RV64-FV-NEXT: fsw ft2, 92(sp) # 4-byte Folded Spill +; CHECK-RV64-FV-NEXT: fsw ft3, 88(sp) # 4-byte Folded Spill +; CHECK-RV64-FV-NEXT: fsw ft4, 84(sp) # 4-byte Folded Spill +; CHECK-RV64-FV-NEXT: fsw ft5, 80(sp) # 4-byte Folded Spill +; CHECK-RV64-FV-NEXT: fsw ft6, 76(sp) # 4-byte Folded Spill +; CHECK-RV64-FV-NEXT: fsw ft7, 72(sp) # 4-byte Folded Spill +; CHECK-RV64-FV-NEXT: fsw fa0, 68(sp) # 4-byte Folded Spill +; CHECK-RV64-FV-NEXT: fsw fa1, 64(sp) # 4-byte Folded Spill +; CHECK-RV64-FV-NEXT: fsw fa2, 60(sp) # 4-byte Folded Spill +; CHECK-RV64-FV-NEXT: fsw fa3, 56(sp) # 4-byte Folded Spill +; CHECK-RV64-FV-NEXT: fsw fa4, 52(sp) # 4-byte Folded Spill +; CHECK-RV64-FV-NEXT: fsw fa5, 48(sp) # 4-byte Folded Spill +; CHECK-RV64-FV-NEXT: fsw fa6, 44(sp) # 4-byte Folded Spill +; CHECK-RV64-FV-NEXT: fsw fa7, 40(sp) # 4-byte Folded Spill +; CHECK-RV64-FV-NEXT: fsw ft8, 36(sp) # 4-byte Folded Spill +; CHECK-RV64-FV-NEXT: fsw ft9, 32(sp) # 4-byte Folded Spill +; CHECK-RV64-FV-NEXT: fsw ft10, 28(sp) # 4-byte Folded Spill +; CHECK-RV64-FV-NEXT: fsw ft11, 24(sp) # 4-byte Folded Spill +; CHECK-RV64-FV-NEXT: addi s0, sp, 240 +; CHECK-RV64-FV-NEXT: csrr a0, vlenb +; CHECK-RV64-FV-NEXT: slli a0, a0, 5 +; CHECK-RV64-FV-NEXT: sub sp, sp, a0 +; CHECK-RV64-FV-NEXT: csrr a0, vlenb +; CHECK-RV64-FV-NEXT: sub a0, s0, a0 +; CHECK-RV64-FV-NEXT: addi a0, a0, -240 +; CHECK-RV64-FV-NEXT: vs1r.v v0, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV64-FV-NEXT: csrr a0, vlenb +; CHECK-RV64-FV-NEXT: slli a0, a0, 1 +; CHECK-RV64-FV-NEXT: sub a0, s0, a0 +; CHECK-RV64-FV-NEXT: addi a0, a0, -240 +; CHECK-RV64-FV-NEXT: vs1r.v v1, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV64-FV-NEXT: csrr a0, vlenb +; CHECK-RV64-FV-NEXT: slli a1, a0, 1 +; CHECK-RV64-FV-NEXT: add a0, a1, a0 +; CHECK-RV64-FV-NEXT: sub a0, s0, a0 +; CHECK-RV64-FV-NEXT: addi a0, a0, -240 +; CHECK-RV64-FV-NEXT: vs1r.v v2, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV64-FV-NEXT: csrr a0, vlenb +; CHECK-RV64-FV-NEXT: slli a0, a0, 2 +; CHECK-RV64-FV-NEXT: sub a0, s0, a0 +; CHECK-RV64-FV-NEXT: addi a0, a0, -240 +; CHECK-RV64-FV-NEXT: vs1r.v v3, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV64-FV-NEXT: csrr a0, vlenb +; CHECK-RV64-FV-NEXT: slli a1, a0, 2 +; CHECK-RV64-FV-NEXT: add a0, a1, a0 +; CHECK-RV64-FV-NEXT: sub a0, s0, a0 +; CHECK-RV64-FV-NEXT: addi a0, a0, -240 +; CHECK-RV64-FV-NEXT: vs1r.v v4, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV64-FV-NEXT: csrr a0, vlenb +; CHECK-RV64-FV-NEXT: slli a0, a0, 1 +; CHECK-RV64-FV-NEXT: mv a1, a0 +; CHECK-RV64-FV-NEXT: slli a0, a0, 1 +; CHECK-RV64-FV-NEXT: add a0, a0, a1 +; CHECK-RV64-FV-NEXT: sub a0, s0, a0 +; CHECK-RV64-FV-NEXT: addi a0, a0, -240 +; CHECK-RV64-FV-NEXT: vs1r.v v5, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV64-FV-NEXT: csrr a0, vlenb +; CHECK-RV64-FV-NEXT: slli a1, a0, 3 +; CHECK-RV64-FV-NEXT: sub a0, a1, a0 +; CHECK-RV64-FV-NEXT: sub a0, s0, a0 +; CHECK-RV64-FV-NEXT: addi a0, a0, -240 +; CHECK-RV64-FV-NEXT: vs1r.v v6, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV64-FV-NEXT: csrr a0, vlenb +; CHECK-RV64-FV-NEXT: slli a0, a0, 3 +; CHECK-RV64-FV-NEXT: sub a0, s0, a0 +; CHECK-RV64-FV-NEXT: addi a0, a0, -240 +; CHECK-RV64-FV-NEXT: vs1r.v v7, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV64-FV-NEXT: csrr a0, vlenb +; CHECK-RV64-FV-NEXT: slli a1, a0, 3 +; CHECK-RV64-FV-NEXT: add a0, a1, a0 +; CHECK-RV64-FV-NEXT: sub a0, s0, a0 +; CHECK-RV64-FV-NEXT: addi a0, a0, -240 +; CHECK-RV64-FV-NEXT: vs1r.v v8, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV64-FV-NEXT: csrr a0, vlenb +; CHECK-RV64-FV-NEXT: slli a0, a0, 1 +; CHECK-RV64-FV-NEXT: mv a1, a0 +; CHECK-RV64-FV-NEXT: slli a0, a0, 2 +; CHECK-RV64-FV-NEXT: add a0, a0, a1 +; CHECK-RV64-FV-NEXT: sub a0, s0, a0 +; CHECK-RV64-FV-NEXT: addi a0, a0, -240 +; CHECK-RV64-FV-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV64-FV-NEXT: csrr a0, vlenb +; CHECK-RV64-FV-NEXT: mv a1, a0 +; CHECK-RV64-FV-NEXT: slli a0, a0, 1 +; CHECK-RV64-FV-NEXT: add a1, a1, a0 +; CHECK-RV64-FV-NEXT: slli a0, a0, 2 +; CHECK-RV64-FV-NEXT: add a0, a0, a1 +; CHECK-RV64-FV-NEXT: sub a0, s0, a0 +; CHECK-RV64-FV-NEXT: addi a0, a0, -240 +; CHECK-RV64-FV-NEXT: vs1r.v v10, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV64-FV-NEXT: csrr a0, vlenb +; CHECK-RV64-FV-NEXT: slli a0, a0, 2 +; CHECK-RV64-FV-NEXT: mv a1, a0 +; CHECK-RV64-FV-NEXT: slli a0, a0, 1 +; CHECK-RV64-FV-NEXT: add a0, a0, a1 +; CHECK-RV64-FV-NEXT: sub a0, s0, a0 +; CHECK-RV64-FV-NEXT: addi a0, a0, -240 +; CHECK-RV64-FV-NEXT: vs1r.v v11, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV64-FV-NEXT: csrr a0, vlenb +; CHECK-RV64-FV-NEXT: mv a1, a0 +; CHECK-RV64-FV-NEXT: slli a0, a0, 2 +; CHECK-RV64-FV-NEXT: add a1, a1, a0 +; CHECK-RV64-FV-NEXT: slli a0, a0, 1 +; CHECK-RV64-FV-NEXT: add a0, a0, a1 +; CHECK-RV64-FV-NEXT: sub a0, s0, a0 +; CHECK-RV64-FV-NEXT: addi a0, a0, -240 +; CHECK-RV64-FV-NEXT: vs1r.v v12, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV64-FV-NEXT: csrr a0, vlenb +; CHECK-RV64-FV-NEXT: slli a0, a0, 1 +; CHECK-RV64-FV-NEXT: mv a1, a0 +; CHECK-RV64-FV-NEXT: slli a0, a0, 1 +; CHECK-RV64-FV-NEXT: add a1, a1, a0 +; CHECK-RV64-FV-NEXT: slli a0, a0, 1 +; CHECK-RV64-FV-NEXT: add a0, a0, a1 +; CHECK-RV64-FV-NEXT: sub a0, s0, a0 +; CHECK-RV64-FV-NEXT: addi a0, a0, -240 +; CHECK-RV64-FV-NEXT: vs1r.v v13, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV64-FV-NEXT: csrr a0, vlenb +; CHECK-RV64-FV-NEXT: slli a1, a0, 4 +; CHECK-RV64-FV-NEXT: sub a0, a1, a0 +; CHECK-RV64-FV-NEXT: sub a0, s0, a0 +; CHECK-RV64-FV-NEXT: addi a0, a0, -240 +; CHECK-RV64-FV-NEXT: vs1r.v v14, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV64-FV-NEXT: csrr a0, vlenb +; CHECK-RV64-FV-NEXT: slli a0, a0, 4 +; CHECK-RV64-FV-NEXT: sub a0, s0, a0 +; CHECK-RV64-FV-NEXT: addi a0, a0, -240 +; CHECK-RV64-FV-NEXT: vs1r.v v15, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV64-FV-NEXT: csrr a0, vlenb +; CHECK-RV64-FV-NEXT: slli a1, a0, 4 +; CHECK-RV64-FV-NEXT: add a0, a1, a0 +; CHECK-RV64-FV-NEXT: sub a0, s0, a0 +; CHECK-RV64-FV-NEXT: addi a0, a0, -240 +; CHECK-RV64-FV-NEXT: vs1r.v v16, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV64-FV-NEXT: csrr a0, vlenb +; CHECK-RV64-FV-NEXT: slli a0, a0, 1 +; CHECK-RV64-FV-NEXT: mv a1, a0 +; CHECK-RV64-FV-NEXT: slli a0, a0, 3 +; CHECK-RV64-FV-NEXT: add a0, a0, a1 +; CHECK-RV64-FV-NEXT: sub a0, s0, a0 +; CHECK-RV64-FV-NEXT: addi a0, a0, -240 +; CHECK-RV64-FV-NEXT: vs1r.v v17, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV64-FV-NEXT: csrr a0, vlenb +; CHECK-RV64-FV-NEXT: mv a1, a0 +; CHECK-RV64-FV-NEXT: slli a0, a0, 1 +; CHECK-RV64-FV-NEXT: add a1, a1, a0 +; CHECK-RV64-FV-NEXT: slli a0, a0, 3 +; CHECK-RV64-FV-NEXT: add a0, a0, a1 +; CHECK-RV64-FV-NEXT: sub a0, s0, a0 +; CHECK-RV64-FV-NEXT: addi a0, a0, -240 +; CHECK-RV64-FV-NEXT: vs1r.v v18, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV64-FV-NEXT: csrr a0, vlenb +; CHECK-RV64-FV-NEXT: slli a0, a0, 2 +; CHECK-RV64-FV-NEXT: mv a1, a0 +; CHECK-RV64-FV-NEXT: slli a0, a0, 2 +; CHECK-RV64-FV-NEXT: add a0, a0, a1 +; CHECK-RV64-FV-NEXT: sub a0, s0, a0 +; CHECK-RV64-FV-NEXT: addi a0, a0, -240 +; CHECK-RV64-FV-NEXT: vs1r.v v19, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV64-FV-NEXT: csrr a0, vlenb +; CHECK-RV64-FV-NEXT: mv a1, a0 +; CHECK-RV64-FV-NEXT: slli a0, a0, 2 +; CHECK-RV64-FV-NEXT: add a1, a1, a0 +; CHECK-RV64-FV-NEXT: slli a0, a0, 2 +; CHECK-RV64-FV-NEXT: add a0, a0, a1 +; CHECK-RV64-FV-NEXT: sub a0, s0, a0 +; CHECK-RV64-FV-NEXT: addi a0, a0, -240 +; CHECK-RV64-FV-NEXT: vs1r.v v20, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV64-FV-NEXT: csrr a0, vlenb +; CHECK-RV64-FV-NEXT: slli a0, a0, 1 +; CHECK-RV64-FV-NEXT: mv a1, a0 +; CHECK-RV64-FV-NEXT: slli a0, a0, 1 +; CHECK-RV64-FV-NEXT: add a1, a1, a0 +; CHECK-RV64-FV-NEXT: slli a0, a0, 2 +; CHECK-RV64-FV-NEXT: add a0, a0, a1 +; CHECK-RV64-FV-NEXT: sub a0, s0, a0 +; CHECK-RV64-FV-NEXT: addi a0, a0, -240 +; CHECK-RV64-FV-NEXT: vs1r.v v21, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV64-FV-NEXT: csrr a0, vlenb +; CHECK-RV64-FV-NEXT: mv a1, a0 +; CHECK-RV64-FV-NEXT: slli a0, a0, 1 +; CHECK-RV64-FV-NEXT: add a1, a1, a0 +; CHECK-RV64-FV-NEXT: slli a0, a0, 1 +; CHECK-RV64-FV-NEXT: add a1, a1, a0 +; CHECK-RV64-FV-NEXT: slli a0, a0, 2 +; CHECK-RV64-FV-NEXT: add a0, a0, a1 +; CHECK-RV64-FV-NEXT: sub a0, s0, a0 +; CHECK-RV64-FV-NEXT: addi a0, a0, -240 +; CHECK-RV64-FV-NEXT: vs1r.v v22, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV64-FV-NEXT: csrr a0, vlenb +; CHECK-RV64-FV-NEXT: slli a0, a0, 3 +; CHECK-RV64-FV-NEXT: mv a1, a0 +; CHECK-RV64-FV-NEXT: slli a0, a0, 1 +; CHECK-RV64-FV-NEXT: add a0, a0, a1 +; CHECK-RV64-FV-NEXT: sub a0, s0, a0 +; CHECK-RV64-FV-NEXT: addi a0, a0, -240 +; CHECK-RV64-FV-NEXT: vs1r.v v23, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV64-FV-NEXT: csrr a0, vlenb +; CHECK-RV64-FV-NEXT: mv a1, a0 +; CHECK-RV64-FV-NEXT: slli a0, a0, 3 +; CHECK-RV64-FV-NEXT: add a1, a1, a0 +; CHECK-RV64-FV-NEXT: slli a0, a0, 1 +; CHECK-RV64-FV-NEXT: add a0, a0, a1 +; CHECK-RV64-FV-NEXT: sub a0, s0, a0 +; CHECK-RV64-FV-NEXT: addi a0, a0, -240 +; CHECK-RV64-FV-NEXT: vs1r.v v24, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV64-FV-NEXT: csrr a0, vlenb +; CHECK-RV64-FV-NEXT: slli a0, a0, 1 +; CHECK-RV64-FV-NEXT: mv a1, a0 +; CHECK-RV64-FV-NEXT: slli a0, a0, 2 +; CHECK-RV64-FV-NEXT: add a1, a1, a0 +; CHECK-RV64-FV-NEXT: slli a0, a0, 1 +; CHECK-RV64-FV-NEXT: add a0, a0, a1 +; CHECK-RV64-FV-NEXT: sub a0, s0, a0 +; CHECK-RV64-FV-NEXT: addi a0, a0, -240 +; CHECK-RV64-FV-NEXT: vs1r.v v25, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV64-FV-NEXT: csrr a0, vlenb +; CHECK-RV64-FV-NEXT: mv a1, a0 +; CHECK-RV64-FV-NEXT: slli a0, a0, 1 +; CHECK-RV64-FV-NEXT: add a1, a1, a0 +; CHECK-RV64-FV-NEXT: slli a0, a0, 2 +; CHECK-RV64-FV-NEXT: add a1, a1, a0 +; CHECK-RV64-FV-NEXT: slli a0, a0, 1 +; CHECK-RV64-FV-NEXT: add a0, a0, a1 +; CHECK-RV64-FV-NEXT: sub a0, s0, a0 +; CHECK-RV64-FV-NEXT: addi a0, a0, -240 +; CHECK-RV64-FV-NEXT: vs1r.v v26, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV64-FV-NEXT: csrr a0, vlenb +; CHECK-RV64-FV-NEXT: slli a0, a0, 2 +; CHECK-RV64-FV-NEXT: mv a1, a0 +; CHECK-RV64-FV-NEXT: slli a0, a0, 1 +; CHECK-RV64-FV-NEXT: add a1, a1, a0 +; CHECK-RV64-FV-NEXT: slli a0, a0, 1 +; CHECK-RV64-FV-NEXT: add a0, a0, a1 +; CHECK-RV64-FV-NEXT: sub a0, s0, a0 +; CHECK-RV64-FV-NEXT: addi a0, a0, -240 +; CHECK-RV64-FV-NEXT: vs1r.v v27, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV64-FV-NEXT: csrr a0, vlenb +; CHECK-RV64-FV-NEXT: mv a1, a0 +; CHECK-RV64-FV-NEXT: slli a0, a0, 2 +; CHECK-RV64-FV-NEXT: add a1, a1, a0 +; CHECK-RV64-FV-NEXT: slli a0, a0, 1 +; CHECK-RV64-FV-NEXT: add a1, a1, a0 +; CHECK-RV64-FV-NEXT: slli a0, a0, 1 +; CHECK-RV64-FV-NEXT: add a0, a0, a1 +; CHECK-RV64-FV-NEXT: sub a0, s0, a0 +; CHECK-RV64-FV-NEXT: addi a0, a0, -240 +; CHECK-RV64-FV-NEXT: vs1r.v v28, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV64-FV-NEXT: csrr a0, vlenb +; CHECK-RV64-FV-NEXT: slli a0, a0, 1 +; CHECK-RV64-FV-NEXT: mv a1, a0 +; CHECK-RV64-FV-NEXT: slli a0, a0, 1 +; CHECK-RV64-FV-NEXT: add a1, a1, a0 +; CHECK-RV64-FV-NEXT: slli a0, a0, 1 +; CHECK-RV64-FV-NEXT: add a1, a1, a0 +; CHECK-RV64-FV-NEXT: slli a0, a0, 1 +; CHECK-RV64-FV-NEXT: add a0, a0, a1 +; CHECK-RV64-FV-NEXT: sub a0, s0, a0 +; CHECK-RV64-FV-NEXT: addi a0, a0, -240 +; CHECK-RV64-FV-NEXT: vs1r.v v29, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV64-FV-NEXT: csrr a0, vlenb +; CHECK-RV64-FV-NEXT: slli a1, a0, 5 +; CHECK-RV64-FV-NEXT: sub a0, a1, a0 +; CHECK-RV64-FV-NEXT: sub a0, s0, a0 +; CHECK-RV64-FV-NEXT: addi a0, a0, -240 +; CHECK-RV64-FV-NEXT: vs1r.v v30, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV64-FV-NEXT: csrr a0, vlenb +; CHECK-RV64-FV-NEXT: slli a0, a0, 5 +; CHECK-RV64-FV-NEXT: sub a0, s0, a0 +; CHECK-RV64-FV-NEXT: addi a0, a0, -240 +; CHECK-RV64-FV-NEXT: vs1r.v v31, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV64-FV-NEXT: call otherfoo +; CHECK-RV64-FV-NEXT: csrr a0, vlenb +; CHECK-RV64-FV-NEXT: sub a0, s0, a0 +; CHECK-RV64-FV-NEXT: addi a0, a0, -240 +; CHECK-RV64-FV-NEXT: vl1r.v v0, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV64-FV-NEXT: csrr a0, vlenb +; CHECK-RV64-FV-NEXT: slli a0, a0, 1 +; CHECK-RV64-FV-NEXT: sub a0, s0, a0 +; CHECK-RV64-FV-NEXT: addi a0, a0, -240 +; CHECK-RV64-FV-NEXT: vl1r.v v1, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV64-FV-NEXT: csrr a0, vlenb +; CHECK-RV64-FV-NEXT: slli a1, a0, 1 +; CHECK-RV64-FV-NEXT: add a0, a1, a0 +; CHECK-RV64-FV-NEXT: sub a0, s0, a0 +; CHECK-RV64-FV-NEXT: addi a0, a0, -240 +; CHECK-RV64-FV-NEXT: vl1r.v v2, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV64-FV-NEXT: csrr a0, vlenb +; CHECK-RV64-FV-NEXT: slli a0, a0, 2 +; CHECK-RV64-FV-NEXT: sub a0, s0, a0 +; CHECK-RV64-FV-NEXT: addi a0, a0, -240 +; CHECK-RV64-FV-NEXT: vl1r.v v3, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV64-FV-NEXT: csrr a0, vlenb +; CHECK-RV64-FV-NEXT: slli a1, a0, 2 +; CHECK-RV64-FV-NEXT: add a0, a1, a0 +; CHECK-RV64-FV-NEXT: sub a0, s0, a0 +; CHECK-RV64-FV-NEXT: addi a0, a0, -240 +; CHECK-RV64-FV-NEXT: vl1r.v v4, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV64-FV-NEXT: csrr a0, vlenb +; CHECK-RV64-FV-NEXT: slli a0, a0, 1 +; CHECK-RV64-FV-NEXT: mv a1, a0 +; CHECK-RV64-FV-NEXT: slli a0, a0, 1 +; CHECK-RV64-FV-NEXT: add a0, a0, a1 +; CHECK-RV64-FV-NEXT: sub a0, s0, a0 +; CHECK-RV64-FV-NEXT: addi a0, a0, -240 +; CHECK-RV64-FV-NEXT: vl1r.v v5, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV64-FV-NEXT: csrr a0, vlenb +; CHECK-RV64-FV-NEXT: slli a1, a0, 3 +; CHECK-RV64-FV-NEXT: sub a0, a1, a0 +; CHECK-RV64-FV-NEXT: sub a0, s0, a0 +; CHECK-RV64-FV-NEXT: addi a0, a0, -240 +; CHECK-RV64-FV-NEXT: vl1r.v v6, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV64-FV-NEXT: csrr a0, vlenb +; CHECK-RV64-FV-NEXT: slli a0, a0, 3 +; CHECK-RV64-FV-NEXT: sub a0, s0, a0 +; CHECK-RV64-FV-NEXT: addi a0, a0, -240 +; CHECK-RV64-FV-NEXT: vl1r.v v7, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV64-FV-NEXT: csrr a0, vlenb +; CHECK-RV64-FV-NEXT: slli a1, a0, 3 +; CHECK-RV64-FV-NEXT: add a0, a1, a0 +; CHECK-RV64-FV-NEXT: sub a0, s0, a0 +; CHECK-RV64-FV-NEXT: addi a0, a0, -240 +; CHECK-RV64-FV-NEXT: vl1r.v v8, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV64-FV-NEXT: csrr a0, vlenb +; CHECK-RV64-FV-NEXT: slli a0, a0, 1 +; CHECK-RV64-FV-NEXT: mv a1, a0 +; CHECK-RV64-FV-NEXT: slli a0, a0, 2 +; CHECK-RV64-FV-NEXT: add a0, a0, a1 +; CHECK-RV64-FV-NEXT: sub a0, s0, a0 +; CHECK-RV64-FV-NEXT: addi a0, a0, -240 +; CHECK-RV64-FV-NEXT: vl1r.v v9, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV64-FV-NEXT: csrr a0, vlenb +; CHECK-RV64-FV-NEXT: mv a1, a0 +; CHECK-RV64-FV-NEXT: slli a0, a0, 1 +; CHECK-RV64-FV-NEXT: add a1, a1, a0 +; CHECK-RV64-FV-NEXT: slli a0, a0, 2 +; CHECK-RV64-FV-NEXT: add a0, a0, a1 +; CHECK-RV64-FV-NEXT: sub a0, s0, a0 +; CHECK-RV64-FV-NEXT: addi a0, a0, -240 +; CHECK-RV64-FV-NEXT: vl1r.v v10, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV64-FV-NEXT: csrr a0, vlenb +; CHECK-RV64-FV-NEXT: slli a0, a0, 2 +; CHECK-RV64-FV-NEXT: mv a1, a0 +; CHECK-RV64-FV-NEXT: slli a0, a0, 1 +; CHECK-RV64-FV-NEXT: add a0, a0, a1 +; CHECK-RV64-FV-NEXT: sub a0, s0, a0 +; CHECK-RV64-FV-NEXT: addi a0, a0, -240 +; CHECK-RV64-FV-NEXT: vl1r.v v11, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV64-FV-NEXT: csrr a0, vlenb +; CHECK-RV64-FV-NEXT: mv a1, a0 +; CHECK-RV64-FV-NEXT: slli a0, a0, 2 +; CHECK-RV64-FV-NEXT: add a1, a1, a0 +; CHECK-RV64-FV-NEXT: slli a0, a0, 1 +; CHECK-RV64-FV-NEXT: add a0, a0, a1 +; CHECK-RV64-FV-NEXT: sub a0, s0, a0 +; CHECK-RV64-FV-NEXT: addi a0, a0, -240 +; CHECK-RV64-FV-NEXT: vl1r.v v12, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV64-FV-NEXT: csrr a0, vlenb +; CHECK-RV64-FV-NEXT: slli a0, a0, 1 +; CHECK-RV64-FV-NEXT: mv a1, a0 +; CHECK-RV64-FV-NEXT: slli a0, a0, 1 +; CHECK-RV64-FV-NEXT: add a1, a1, a0 +; CHECK-RV64-FV-NEXT: slli a0, a0, 1 +; CHECK-RV64-FV-NEXT: add a0, a0, a1 +; CHECK-RV64-FV-NEXT: sub a0, s0, a0 +; CHECK-RV64-FV-NEXT: addi a0, a0, -240 +; CHECK-RV64-FV-NEXT: vl1r.v v13, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV64-FV-NEXT: csrr a0, vlenb +; CHECK-RV64-FV-NEXT: slli a1, a0, 4 +; CHECK-RV64-FV-NEXT: sub a0, a1, a0 +; CHECK-RV64-FV-NEXT: sub a0, s0, a0 +; CHECK-RV64-FV-NEXT: addi a0, a0, -240 +; CHECK-RV64-FV-NEXT: vl1r.v v14, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV64-FV-NEXT: csrr a0, vlenb +; CHECK-RV64-FV-NEXT: slli a0, a0, 4 +; CHECK-RV64-FV-NEXT: sub a0, s0, a0 +; CHECK-RV64-FV-NEXT: addi a0, a0, -240 +; CHECK-RV64-FV-NEXT: vl1r.v v15, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV64-FV-NEXT: csrr a0, vlenb +; CHECK-RV64-FV-NEXT: slli a1, a0, 4 +; CHECK-RV64-FV-NEXT: add a0, a1, a0 +; CHECK-RV64-FV-NEXT: sub a0, s0, a0 +; CHECK-RV64-FV-NEXT: addi a0, a0, -240 +; CHECK-RV64-FV-NEXT: vl1r.v v16, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV64-FV-NEXT: csrr a0, vlenb +; CHECK-RV64-FV-NEXT: slli a0, a0, 1 +; CHECK-RV64-FV-NEXT: mv a1, a0 +; CHECK-RV64-FV-NEXT: slli a0, a0, 3 +; CHECK-RV64-FV-NEXT: add a0, a0, a1 +; CHECK-RV64-FV-NEXT: sub a0, s0, a0 +; CHECK-RV64-FV-NEXT: addi a0, a0, -240 +; CHECK-RV64-FV-NEXT: vl1r.v v17, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV64-FV-NEXT: csrr a0, vlenb +; CHECK-RV64-FV-NEXT: mv a1, a0 +; CHECK-RV64-FV-NEXT: slli a0, a0, 1 +; CHECK-RV64-FV-NEXT: add a1, a1, a0 +; CHECK-RV64-FV-NEXT: slli a0, a0, 3 +; CHECK-RV64-FV-NEXT: add a0, a0, a1 +; CHECK-RV64-FV-NEXT: sub a0, s0, a0 +; CHECK-RV64-FV-NEXT: addi a0, a0, -240 +; CHECK-RV64-FV-NEXT: vl1r.v v18, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV64-FV-NEXT: csrr a0, vlenb +; CHECK-RV64-FV-NEXT: slli a0, a0, 2 +; CHECK-RV64-FV-NEXT: mv a1, a0 +; CHECK-RV64-FV-NEXT: slli a0, a0, 2 +; CHECK-RV64-FV-NEXT: add a0, a0, a1 +; CHECK-RV64-FV-NEXT: sub a0, s0, a0 +; CHECK-RV64-FV-NEXT: addi a0, a0, -240 +; CHECK-RV64-FV-NEXT: vl1r.v v19, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV64-FV-NEXT: csrr a0, vlenb +; CHECK-RV64-FV-NEXT: mv a1, a0 +; CHECK-RV64-FV-NEXT: slli a0, a0, 2 +; CHECK-RV64-FV-NEXT: add a1, a1, a0 +; CHECK-RV64-FV-NEXT: slli a0, a0, 2 +; CHECK-RV64-FV-NEXT: add a0, a0, a1 +; CHECK-RV64-FV-NEXT: sub a0, s0, a0 +; CHECK-RV64-FV-NEXT: addi a0, a0, -240 +; CHECK-RV64-FV-NEXT: vl1r.v v20, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV64-FV-NEXT: csrr a0, vlenb +; CHECK-RV64-FV-NEXT: slli a0, a0, 1 +; CHECK-RV64-FV-NEXT: mv a1, a0 +; CHECK-RV64-FV-NEXT: slli a0, a0, 1 +; CHECK-RV64-FV-NEXT: add a1, a1, a0 +; CHECK-RV64-FV-NEXT: slli a0, a0, 2 +; CHECK-RV64-FV-NEXT: add a0, a0, a1 +; CHECK-RV64-FV-NEXT: sub a0, s0, a0 +; CHECK-RV64-FV-NEXT: addi a0, a0, -240 +; CHECK-RV64-FV-NEXT: vl1r.v v21, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV64-FV-NEXT: csrr a0, vlenb +; CHECK-RV64-FV-NEXT: mv a1, a0 +; CHECK-RV64-FV-NEXT: slli a0, a0, 1 +; CHECK-RV64-FV-NEXT: add a1, a1, a0 +; CHECK-RV64-FV-NEXT: slli a0, a0, 1 +; CHECK-RV64-FV-NEXT: add a1, a1, a0 +; CHECK-RV64-FV-NEXT: slli a0, a0, 2 +; CHECK-RV64-FV-NEXT: add a0, a0, a1 +; CHECK-RV64-FV-NEXT: sub a0, s0, a0 +; CHECK-RV64-FV-NEXT: addi a0, a0, -240 +; CHECK-RV64-FV-NEXT: vl1r.v v22, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV64-FV-NEXT: csrr a0, vlenb +; CHECK-RV64-FV-NEXT: slli a0, a0, 3 +; CHECK-RV64-FV-NEXT: mv a1, a0 +; CHECK-RV64-FV-NEXT: slli a0, a0, 1 +; CHECK-RV64-FV-NEXT: add a0, a0, a1 +; CHECK-RV64-FV-NEXT: sub a0, s0, a0 +; CHECK-RV64-FV-NEXT: addi a0, a0, -240 +; CHECK-RV64-FV-NEXT: vl1r.v v23, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV64-FV-NEXT: csrr a0, vlenb +; CHECK-RV64-FV-NEXT: mv a1, a0 +; CHECK-RV64-FV-NEXT: slli a0, a0, 3 +; CHECK-RV64-FV-NEXT: add a1, a1, a0 +; CHECK-RV64-FV-NEXT: slli a0, a0, 1 +; CHECK-RV64-FV-NEXT: add a0, a0, a1 +; CHECK-RV64-FV-NEXT: sub a0, s0, a0 +; CHECK-RV64-FV-NEXT: addi a0, a0, -240 +; CHECK-RV64-FV-NEXT: vl1r.v v24, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV64-FV-NEXT: csrr a0, vlenb +; CHECK-RV64-FV-NEXT: slli a0, a0, 1 +; CHECK-RV64-FV-NEXT: mv a1, a0 +; CHECK-RV64-FV-NEXT: slli a0, a0, 2 +; CHECK-RV64-FV-NEXT: add a1, a1, a0 +; CHECK-RV64-FV-NEXT: slli a0, a0, 1 +; CHECK-RV64-FV-NEXT: add a0, a0, a1 +; CHECK-RV64-FV-NEXT: sub a0, s0, a0 +; CHECK-RV64-FV-NEXT: addi a0, a0, -240 +; CHECK-RV64-FV-NEXT: vl1r.v v25, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV64-FV-NEXT: csrr a0, vlenb +; CHECK-RV64-FV-NEXT: mv a1, a0 +; CHECK-RV64-FV-NEXT: slli a0, a0, 1 +; CHECK-RV64-FV-NEXT: add a1, a1, a0 +; CHECK-RV64-FV-NEXT: slli a0, a0, 2 +; CHECK-RV64-FV-NEXT: add a1, a1, a0 +; CHECK-RV64-FV-NEXT: slli a0, a0, 1 +; CHECK-RV64-FV-NEXT: add a0, a0, a1 +; CHECK-RV64-FV-NEXT: sub a0, s0, a0 +; CHECK-RV64-FV-NEXT: addi a0, a0, -240 +; CHECK-RV64-FV-NEXT: vl1r.v v26, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV64-FV-NEXT: csrr a0, vlenb +; CHECK-RV64-FV-NEXT: slli a0, a0, 2 +; CHECK-RV64-FV-NEXT: mv a1, a0 +; CHECK-RV64-FV-NEXT: slli a0, a0, 1 +; CHECK-RV64-FV-NEXT: add a1, a1, a0 +; CHECK-RV64-FV-NEXT: slli a0, a0, 1 +; CHECK-RV64-FV-NEXT: add a0, a0, a1 +; CHECK-RV64-FV-NEXT: sub a0, s0, a0 +; CHECK-RV64-FV-NEXT: addi a0, a0, -240 +; CHECK-RV64-FV-NEXT: vl1r.v v27, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV64-FV-NEXT: csrr a0, vlenb +; CHECK-RV64-FV-NEXT: mv a1, a0 +; CHECK-RV64-FV-NEXT: slli a0, a0, 2 +; CHECK-RV64-FV-NEXT: add a1, a1, a0 +; CHECK-RV64-FV-NEXT: slli a0, a0, 1 +; CHECK-RV64-FV-NEXT: add a1, a1, a0 +; CHECK-RV64-FV-NEXT: slli a0, a0, 1 +; CHECK-RV64-FV-NEXT: add a0, a0, a1 +; CHECK-RV64-FV-NEXT: sub a0, s0, a0 +; CHECK-RV64-FV-NEXT: addi a0, a0, -240 +; CHECK-RV64-FV-NEXT: vl1r.v v28, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV64-FV-NEXT: csrr a0, vlenb +; CHECK-RV64-FV-NEXT: slli a0, a0, 1 +; CHECK-RV64-FV-NEXT: mv a1, a0 +; CHECK-RV64-FV-NEXT: slli a0, a0, 1 +; CHECK-RV64-FV-NEXT: add a1, a1, a0 +; CHECK-RV64-FV-NEXT: slli a0, a0, 1 +; CHECK-RV64-FV-NEXT: add a1, a1, a0 +; CHECK-RV64-FV-NEXT: slli a0, a0, 1 +; CHECK-RV64-FV-NEXT: add a0, a0, a1 +; CHECK-RV64-FV-NEXT: sub a0, s0, a0 +; CHECK-RV64-FV-NEXT: addi a0, a0, -240 +; CHECK-RV64-FV-NEXT: vl1r.v v29, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV64-FV-NEXT: csrr a0, vlenb +; CHECK-RV64-FV-NEXT: slli a1, a0, 5 +; CHECK-RV64-FV-NEXT: sub a0, a1, a0 +; CHECK-RV64-FV-NEXT: sub a0, s0, a0 +; CHECK-RV64-FV-NEXT: addi a0, a0, -240 +; CHECK-RV64-FV-NEXT: vl1r.v v30, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV64-FV-NEXT: csrr a0, vlenb +; CHECK-RV64-FV-NEXT: slli a0, a0, 5 +; CHECK-RV64-FV-NEXT: sub a0, s0, a0 +; CHECK-RV64-FV-NEXT: addi a0, a0, -240 +; CHECK-RV64-FV-NEXT: vl1r.v v31, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV64-FV-NEXT: addi sp, s0, -240 +; CHECK-RV64-FV-NEXT: ld ra, 232(sp) # 8-byte Folded Reload +; CHECK-RV64-FV-NEXT: ld t0, 224(sp) # 8-byte Folded Reload +; CHECK-RV64-FV-NEXT: ld t1, 216(sp) # 8-byte Folded Reload +; CHECK-RV64-FV-NEXT: ld t2, 208(sp) # 8-byte Folded Reload +; CHECK-RV64-FV-NEXT: ld s0, 200(sp) # 8-byte Folded Reload +; CHECK-RV64-FV-NEXT: ld a0, 192(sp) # 8-byte Folded Reload +; CHECK-RV64-FV-NEXT: ld a1, 184(sp) # 8-byte Folded Reload +; CHECK-RV64-FV-NEXT: ld a2, 176(sp) # 8-byte Folded Reload +; CHECK-RV64-FV-NEXT: ld a3, 168(sp) # 8-byte Folded Reload +; CHECK-RV64-FV-NEXT: ld a4, 160(sp) # 8-byte Folded Reload +; CHECK-RV64-FV-NEXT: ld a5, 152(sp) # 8-byte Folded Reload +; CHECK-RV64-FV-NEXT: ld a6, 144(sp) # 8-byte Folded Reload +; CHECK-RV64-FV-NEXT: ld a7, 136(sp) # 8-byte Folded Reload +; CHECK-RV64-FV-NEXT: ld t3, 128(sp) # 8-byte Folded Reload +; CHECK-RV64-FV-NEXT: ld t4, 120(sp) # 8-byte Folded Reload +; CHECK-RV64-FV-NEXT: ld t5, 112(sp) # 8-byte Folded Reload +; CHECK-RV64-FV-NEXT: ld t6, 104(sp) # 8-byte Folded Reload +; CHECK-RV64-FV-NEXT: flw ft0, 100(sp) # 4-byte Folded Reload +; CHECK-RV64-FV-NEXT: flw ft1, 96(sp) # 4-byte Folded Reload +; CHECK-RV64-FV-NEXT: flw ft2, 92(sp) # 4-byte Folded Reload +; CHECK-RV64-FV-NEXT: flw ft3, 88(sp) # 4-byte Folded Reload +; CHECK-RV64-FV-NEXT: flw ft4, 84(sp) # 4-byte Folded Reload +; CHECK-RV64-FV-NEXT: flw ft5, 80(sp) # 4-byte Folded Reload +; CHECK-RV64-FV-NEXT: flw ft6, 76(sp) # 4-byte Folded Reload +; CHECK-RV64-FV-NEXT: flw ft7, 72(sp) # 4-byte Folded Reload +; CHECK-RV64-FV-NEXT: flw fa0, 68(sp) # 4-byte Folded Reload +; CHECK-RV64-FV-NEXT: flw fa1, 64(sp) # 4-byte Folded Reload +; CHECK-RV64-FV-NEXT: flw fa2, 60(sp) # 4-byte Folded Reload +; CHECK-RV64-FV-NEXT: flw fa3, 56(sp) # 4-byte Folded Reload +; CHECK-RV64-FV-NEXT: flw fa4, 52(sp) # 4-byte Folded Reload +; CHECK-RV64-FV-NEXT: flw fa5, 48(sp) # 4-byte Folded Reload +; CHECK-RV64-FV-NEXT: flw fa6, 44(sp) # 4-byte Folded Reload +; CHECK-RV64-FV-NEXT: flw fa7, 40(sp) # 4-byte Folded Reload +; CHECK-RV64-FV-NEXT: flw ft8, 36(sp) # 4-byte Folded Reload +; CHECK-RV64-FV-NEXT: flw ft9, 32(sp) # 4-byte Folded Reload +; CHECK-RV64-FV-NEXT: flw ft10, 28(sp) # 4-byte Folded Reload +; CHECK-RV64-FV-NEXT: flw ft11, 24(sp) # 4-byte Folded Reload +; CHECK-RV64-FV-NEXT: addi sp, sp, 240 +; CHECK-RV64-FV-NEXT: mret +; +; CHECK-RV64-FDV-LABEL: foo_fp_with_call: +; CHECK-RV64-FDV: # %bb.0: +; CHECK-RV64-FDV-NEXT: addi sp, sp, -320 +; CHECK-RV64-FDV-NEXT: sd ra, 312(sp) # 8-byte Folded Spill +; CHECK-RV64-FDV-NEXT: sd t0, 304(sp) # 8-byte Folded Spill +; CHECK-RV64-FDV-NEXT: sd t1, 296(sp) # 8-byte Folded Spill +; CHECK-RV64-FDV-NEXT: sd t2, 288(sp) # 8-byte Folded Spill +; CHECK-RV64-FDV-NEXT: sd s0, 280(sp) # 8-byte Folded Spill +; CHECK-RV64-FDV-NEXT: sd a0, 272(sp) # 8-byte Folded Spill +; CHECK-RV64-FDV-NEXT: sd a1, 264(sp) # 8-byte Folded Spill +; CHECK-RV64-FDV-NEXT: sd a2, 256(sp) # 8-byte Folded Spill +; CHECK-RV64-FDV-NEXT: sd a3, 248(sp) # 8-byte Folded Spill +; CHECK-RV64-FDV-NEXT: sd a4, 240(sp) # 8-byte Folded Spill +; CHECK-RV64-FDV-NEXT: sd a5, 232(sp) # 8-byte Folded Spill +; CHECK-RV64-FDV-NEXT: sd a6, 224(sp) # 8-byte Folded Spill +; CHECK-RV64-FDV-NEXT: sd a7, 216(sp) # 8-byte Folded Spill +; CHECK-RV64-FDV-NEXT: sd t3, 208(sp) # 8-byte Folded Spill +; CHECK-RV64-FDV-NEXT: sd t4, 200(sp) # 8-byte Folded Spill +; CHECK-RV64-FDV-NEXT: sd t5, 192(sp) # 8-byte Folded Spill +; CHECK-RV64-FDV-NEXT: sd t6, 184(sp) # 8-byte Folded Spill +; CHECK-RV64-FDV-NEXT: fsd ft0, 176(sp) # 8-byte Folded Spill +; CHECK-RV64-FDV-NEXT: fsd ft1, 168(sp) # 8-byte Folded Spill +; CHECK-RV64-FDV-NEXT: fsd ft2, 160(sp) # 8-byte Folded Spill +; CHECK-RV64-FDV-NEXT: fsd ft3, 152(sp) # 8-byte Folded Spill +; CHECK-RV64-FDV-NEXT: fsd ft4, 144(sp) # 8-byte Folded Spill +; CHECK-RV64-FDV-NEXT: fsd ft5, 136(sp) # 8-byte Folded Spill +; CHECK-RV64-FDV-NEXT: fsd ft6, 128(sp) # 8-byte Folded Spill +; CHECK-RV64-FDV-NEXT: fsd ft7, 120(sp) # 8-byte Folded Spill +; CHECK-RV64-FDV-NEXT: fsd fa0, 112(sp) # 8-byte Folded Spill +; CHECK-RV64-FDV-NEXT: fsd fa1, 104(sp) # 8-byte Folded Spill +; CHECK-RV64-FDV-NEXT: fsd fa2, 96(sp) # 8-byte Folded Spill +; CHECK-RV64-FDV-NEXT: fsd fa3, 88(sp) # 8-byte Folded Spill +; CHECK-RV64-FDV-NEXT: fsd fa4, 80(sp) # 8-byte Folded Spill +; CHECK-RV64-FDV-NEXT: fsd fa5, 72(sp) # 8-byte Folded Spill +; CHECK-RV64-FDV-NEXT: fsd fa6, 64(sp) # 8-byte Folded Spill +; CHECK-RV64-FDV-NEXT: fsd fa7, 56(sp) # 8-byte Folded Spill +; CHECK-RV64-FDV-NEXT: fsd ft8, 48(sp) # 8-byte Folded Spill +; CHECK-RV64-FDV-NEXT: fsd ft9, 40(sp) # 8-byte Folded Spill +; CHECK-RV64-FDV-NEXT: fsd ft10, 32(sp) # 8-byte Folded Spill +; CHECK-RV64-FDV-NEXT: fsd ft11, 24(sp) # 8-byte Folded Spill +; CHECK-RV64-FDV-NEXT: addi s0, sp, 320 +; CHECK-RV64-FDV-NEXT: csrr a0, vlenb +; CHECK-RV64-FDV-NEXT: slli a0, a0, 5 +; CHECK-RV64-FDV-NEXT: sub sp, sp, a0 +; CHECK-RV64-FDV-NEXT: csrr a0, vlenb +; CHECK-RV64-FDV-NEXT: sub a0, s0, a0 +; CHECK-RV64-FDV-NEXT: addi a0, a0, -320 +; CHECK-RV64-FDV-NEXT: vs1r.v v0, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV64-FDV-NEXT: csrr a0, vlenb +; CHECK-RV64-FDV-NEXT: slli a0, a0, 1 +; CHECK-RV64-FDV-NEXT: sub a0, s0, a0 +; CHECK-RV64-FDV-NEXT: addi a0, a0, -320 +; CHECK-RV64-FDV-NEXT: vs1r.v v1, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV64-FDV-NEXT: csrr a0, vlenb +; CHECK-RV64-FDV-NEXT: slli a1, a0, 1 +; CHECK-RV64-FDV-NEXT: add a0, a1, a0 +; CHECK-RV64-FDV-NEXT: sub a0, s0, a0 +; CHECK-RV64-FDV-NEXT: addi a0, a0, -320 +; CHECK-RV64-FDV-NEXT: vs1r.v v2, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV64-FDV-NEXT: csrr a0, vlenb +; CHECK-RV64-FDV-NEXT: slli a0, a0, 2 +; CHECK-RV64-FDV-NEXT: sub a0, s0, a0 +; CHECK-RV64-FDV-NEXT: addi a0, a0, -320 +; CHECK-RV64-FDV-NEXT: vs1r.v v3, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV64-FDV-NEXT: csrr a0, vlenb +; CHECK-RV64-FDV-NEXT: slli a1, a0, 2 +; CHECK-RV64-FDV-NEXT: add a0, a1, a0 +; CHECK-RV64-FDV-NEXT: sub a0, s0, a0 +; CHECK-RV64-FDV-NEXT: addi a0, a0, -320 +; CHECK-RV64-FDV-NEXT: vs1r.v v4, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV64-FDV-NEXT: csrr a0, vlenb +; CHECK-RV64-FDV-NEXT: slli a0, a0, 1 +; CHECK-RV64-FDV-NEXT: mv a1, a0 +; CHECK-RV64-FDV-NEXT: slli a0, a0, 1 +; CHECK-RV64-FDV-NEXT: add a0, a0, a1 +; CHECK-RV64-FDV-NEXT: sub a0, s0, a0 +; CHECK-RV64-FDV-NEXT: addi a0, a0, -320 +; CHECK-RV64-FDV-NEXT: vs1r.v v5, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV64-FDV-NEXT: csrr a0, vlenb +; CHECK-RV64-FDV-NEXT: slli a1, a0, 3 +; CHECK-RV64-FDV-NEXT: sub a0, a1, a0 +; CHECK-RV64-FDV-NEXT: sub a0, s0, a0 +; CHECK-RV64-FDV-NEXT: addi a0, a0, -320 +; CHECK-RV64-FDV-NEXT: vs1r.v v6, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV64-FDV-NEXT: csrr a0, vlenb +; CHECK-RV64-FDV-NEXT: slli a0, a0, 3 +; CHECK-RV64-FDV-NEXT: sub a0, s0, a0 +; CHECK-RV64-FDV-NEXT: addi a0, a0, -320 +; CHECK-RV64-FDV-NEXT: vs1r.v v7, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV64-FDV-NEXT: csrr a0, vlenb +; CHECK-RV64-FDV-NEXT: slli a1, a0, 3 +; CHECK-RV64-FDV-NEXT: add a0, a1, a0 +; CHECK-RV64-FDV-NEXT: sub a0, s0, a0 +; CHECK-RV64-FDV-NEXT: addi a0, a0, -320 +; CHECK-RV64-FDV-NEXT: vs1r.v v8, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV64-FDV-NEXT: csrr a0, vlenb +; CHECK-RV64-FDV-NEXT: slli a0, a0, 1 +; CHECK-RV64-FDV-NEXT: mv a1, a0 +; CHECK-RV64-FDV-NEXT: slli a0, a0, 2 +; CHECK-RV64-FDV-NEXT: add a0, a0, a1 +; CHECK-RV64-FDV-NEXT: sub a0, s0, a0 +; CHECK-RV64-FDV-NEXT: addi a0, a0, -320 +; CHECK-RV64-FDV-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV64-FDV-NEXT: csrr a0, vlenb +; CHECK-RV64-FDV-NEXT: mv a1, a0 +; CHECK-RV64-FDV-NEXT: slli a0, a0, 1 +; CHECK-RV64-FDV-NEXT: add a1, a1, a0 +; CHECK-RV64-FDV-NEXT: slli a0, a0, 2 +; CHECK-RV64-FDV-NEXT: add a0, a0, a1 +; CHECK-RV64-FDV-NEXT: sub a0, s0, a0 +; CHECK-RV64-FDV-NEXT: addi a0, a0, -320 +; CHECK-RV64-FDV-NEXT: vs1r.v v10, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV64-FDV-NEXT: csrr a0, vlenb +; CHECK-RV64-FDV-NEXT: slli a0, a0, 2 +; CHECK-RV64-FDV-NEXT: mv a1, a0 +; CHECK-RV64-FDV-NEXT: slli a0, a0, 1 +; CHECK-RV64-FDV-NEXT: add a0, a0, a1 +; CHECK-RV64-FDV-NEXT: sub a0, s0, a0 +; CHECK-RV64-FDV-NEXT: addi a0, a0, -320 +; CHECK-RV64-FDV-NEXT: vs1r.v v11, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV64-FDV-NEXT: csrr a0, vlenb +; CHECK-RV64-FDV-NEXT: mv a1, a0 +; CHECK-RV64-FDV-NEXT: slli a0, a0, 2 +; CHECK-RV64-FDV-NEXT: add a1, a1, a0 +; CHECK-RV64-FDV-NEXT: slli a0, a0, 1 +; CHECK-RV64-FDV-NEXT: add a0, a0, a1 +; CHECK-RV64-FDV-NEXT: sub a0, s0, a0 +; CHECK-RV64-FDV-NEXT: addi a0, a0, -320 +; CHECK-RV64-FDV-NEXT: vs1r.v v12, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV64-FDV-NEXT: csrr a0, vlenb +; CHECK-RV64-FDV-NEXT: slli a0, a0, 1 +; CHECK-RV64-FDV-NEXT: mv a1, a0 +; CHECK-RV64-FDV-NEXT: slli a0, a0, 1 +; CHECK-RV64-FDV-NEXT: add a1, a1, a0 +; CHECK-RV64-FDV-NEXT: slli a0, a0, 1 +; CHECK-RV64-FDV-NEXT: add a0, a0, a1 +; CHECK-RV64-FDV-NEXT: sub a0, s0, a0 +; CHECK-RV64-FDV-NEXT: addi a0, a0, -320 +; CHECK-RV64-FDV-NEXT: vs1r.v v13, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV64-FDV-NEXT: csrr a0, vlenb +; CHECK-RV64-FDV-NEXT: slli a1, a0, 4 +; CHECK-RV64-FDV-NEXT: sub a0, a1, a0 +; CHECK-RV64-FDV-NEXT: sub a0, s0, a0 +; CHECK-RV64-FDV-NEXT: addi a0, a0, -320 +; CHECK-RV64-FDV-NEXT: vs1r.v v14, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV64-FDV-NEXT: csrr a0, vlenb +; CHECK-RV64-FDV-NEXT: slli a0, a0, 4 +; CHECK-RV64-FDV-NEXT: sub a0, s0, a0 +; CHECK-RV64-FDV-NEXT: addi a0, a0, -320 +; CHECK-RV64-FDV-NEXT: vs1r.v v15, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV64-FDV-NEXT: csrr a0, vlenb +; CHECK-RV64-FDV-NEXT: slli a1, a0, 4 +; CHECK-RV64-FDV-NEXT: add a0, a1, a0 +; CHECK-RV64-FDV-NEXT: sub a0, s0, a0 +; CHECK-RV64-FDV-NEXT: addi a0, a0, -320 +; CHECK-RV64-FDV-NEXT: vs1r.v v16, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV64-FDV-NEXT: csrr a0, vlenb +; CHECK-RV64-FDV-NEXT: slli a0, a0, 1 +; CHECK-RV64-FDV-NEXT: mv a1, a0 +; CHECK-RV64-FDV-NEXT: slli a0, a0, 3 +; CHECK-RV64-FDV-NEXT: add a0, a0, a1 +; CHECK-RV64-FDV-NEXT: sub a0, s0, a0 +; CHECK-RV64-FDV-NEXT: addi a0, a0, -320 +; CHECK-RV64-FDV-NEXT: vs1r.v v17, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV64-FDV-NEXT: csrr a0, vlenb +; CHECK-RV64-FDV-NEXT: mv a1, a0 +; CHECK-RV64-FDV-NEXT: slli a0, a0, 1 +; CHECK-RV64-FDV-NEXT: add a1, a1, a0 +; CHECK-RV64-FDV-NEXT: slli a0, a0, 3 +; CHECK-RV64-FDV-NEXT: add a0, a0, a1 +; CHECK-RV64-FDV-NEXT: sub a0, s0, a0 +; CHECK-RV64-FDV-NEXT: addi a0, a0, -320 +; CHECK-RV64-FDV-NEXT: vs1r.v v18, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV64-FDV-NEXT: csrr a0, vlenb +; CHECK-RV64-FDV-NEXT: slli a0, a0, 2 +; CHECK-RV64-FDV-NEXT: mv a1, a0 +; CHECK-RV64-FDV-NEXT: slli a0, a0, 2 +; CHECK-RV64-FDV-NEXT: add a0, a0, a1 +; CHECK-RV64-FDV-NEXT: sub a0, s0, a0 +; CHECK-RV64-FDV-NEXT: addi a0, a0, -320 +; CHECK-RV64-FDV-NEXT: vs1r.v v19, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV64-FDV-NEXT: csrr a0, vlenb +; CHECK-RV64-FDV-NEXT: mv a1, a0 +; CHECK-RV64-FDV-NEXT: slli a0, a0, 2 +; CHECK-RV64-FDV-NEXT: add a1, a1, a0 +; CHECK-RV64-FDV-NEXT: slli a0, a0, 2 +; CHECK-RV64-FDV-NEXT: add a0, a0, a1 +; CHECK-RV64-FDV-NEXT: sub a0, s0, a0 +; CHECK-RV64-FDV-NEXT: addi a0, a0, -320 +; CHECK-RV64-FDV-NEXT: vs1r.v v20, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV64-FDV-NEXT: csrr a0, vlenb +; CHECK-RV64-FDV-NEXT: slli a0, a0, 1 +; CHECK-RV64-FDV-NEXT: mv a1, a0 +; CHECK-RV64-FDV-NEXT: slli a0, a0, 1 +; CHECK-RV64-FDV-NEXT: add a1, a1, a0 +; CHECK-RV64-FDV-NEXT: slli a0, a0, 2 +; CHECK-RV64-FDV-NEXT: add a0, a0, a1 +; CHECK-RV64-FDV-NEXT: sub a0, s0, a0 +; CHECK-RV64-FDV-NEXT: addi a0, a0, -320 +; CHECK-RV64-FDV-NEXT: vs1r.v v21, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV64-FDV-NEXT: csrr a0, vlenb +; CHECK-RV64-FDV-NEXT: mv a1, a0 +; CHECK-RV64-FDV-NEXT: slli a0, a0, 1 +; CHECK-RV64-FDV-NEXT: add a1, a1, a0 +; CHECK-RV64-FDV-NEXT: slli a0, a0, 1 +; CHECK-RV64-FDV-NEXT: add a1, a1, a0 +; CHECK-RV64-FDV-NEXT: slli a0, a0, 2 +; CHECK-RV64-FDV-NEXT: add a0, a0, a1 +; CHECK-RV64-FDV-NEXT: sub a0, s0, a0 +; CHECK-RV64-FDV-NEXT: addi a0, a0, -320 +; CHECK-RV64-FDV-NEXT: vs1r.v v22, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV64-FDV-NEXT: csrr a0, vlenb +; CHECK-RV64-FDV-NEXT: slli a0, a0, 3 +; CHECK-RV64-FDV-NEXT: mv a1, a0 +; CHECK-RV64-FDV-NEXT: slli a0, a0, 1 +; CHECK-RV64-FDV-NEXT: add a0, a0, a1 +; CHECK-RV64-FDV-NEXT: sub a0, s0, a0 +; CHECK-RV64-FDV-NEXT: addi a0, a0, -320 +; CHECK-RV64-FDV-NEXT: vs1r.v v23, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV64-FDV-NEXT: csrr a0, vlenb +; CHECK-RV64-FDV-NEXT: mv a1, a0 +; CHECK-RV64-FDV-NEXT: slli a0, a0, 3 +; CHECK-RV64-FDV-NEXT: add a1, a1, a0 +; CHECK-RV64-FDV-NEXT: slli a0, a0, 1 +; CHECK-RV64-FDV-NEXT: add a0, a0, a1 +; CHECK-RV64-FDV-NEXT: sub a0, s0, a0 +; CHECK-RV64-FDV-NEXT: addi a0, a0, -320 +; CHECK-RV64-FDV-NEXT: vs1r.v v24, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV64-FDV-NEXT: csrr a0, vlenb +; CHECK-RV64-FDV-NEXT: slli a0, a0, 1 +; CHECK-RV64-FDV-NEXT: mv a1, a0 +; CHECK-RV64-FDV-NEXT: slli a0, a0, 2 +; CHECK-RV64-FDV-NEXT: add a1, a1, a0 +; CHECK-RV64-FDV-NEXT: slli a0, a0, 1 +; CHECK-RV64-FDV-NEXT: add a0, a0, a1 +; CHECK-RV64-FDV-NEXT: sub a0, s0, a0 +; CHECK-RV64-FDV-NEXT: addi a0, a0, -320 +; CHECK-RV64-FDV-NEXT: vs1r.v v25, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV64-FDV-NEXT: csrr a0, vlenb +; CHECK-RV64-FDV-NEXT: mv a1, a0 +; CHECK-RV64-FDV-NEXT: slli a0, a0, 1 +; CHECK-RV64-FDV-NEXT: add a1, a1, a0 +; CHECK-RV64-FDV-NEXT: slli a0, a0, 2 +; CHECK-RV64-FDV-NEXT: add a1, a1, a0 +; CHECK-RV64-FDV-NEXT: slli a0, a0, 1 +; CHECK-RV64-FDV-NEXT: add a0, a0, a1 +; CHECK-RV64-FDV-NEXT: sub a0, s0, a0 +; CHECK-RV64-FDV-NEXT: addi a0, a0, -320 +; CHECK-RV64-FDV-NEXT: vs1r.v v26, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV64-FDV-NEXT: csrr a0, vlenb +; CHECK-RV64-FDV-NEXT: slli a0, a0, 2 +; CHECK-RV64-FDV-NEXT: mv a1, a0 +; CHECK-RV64-FDV-NEXT: slli a0, a0, 1 +; CHECK-RV64-FDV-NEXT: add a1, a1, a0 +; CHECK-RV64-FDV-NEXT: slli a0, a0, 1 +; CHECK-RV64-FDV-NEXT: add a0, a0, a1 +; CHECK-RV64-FDV-NEXT: sub a0, s0, a0 +; CHECK-RV64-FDV-NEXT: addi a0, a0, -320 +; CHECK-RV64-FDV-NEXT: vs1r.v v27, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV64-FDV-NEXT: csrr a0, vlenb +; CHECK-RV64-FDV-NEXT: mv a1, a0 +; CHECK-RV64-FDV-NEXT: slli a0, a0, 2 +; CHECK-RV64-FDV-NEXT: add a1, a1, a0 +; CHECK-RV64-FDV-NEXT: slli a0, a0, 1 +; CHECK-RV64-FDV-NEXT: add a1, a1, a0 +; CHECK-RV64-FDV-NEXT: slli a0, a0, 1 +; CHECK-RV64-FDV-NEXT: add a0, a0, a1 +; CHECK-RV64-FDV-NEXT: sub a0, s0, a0 +; CHECK-RV64-FDV-NEXT: addi a0, a0, -320 +; CHECK-RV64-FDV-NEXT: vs1r.v v28, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV64-FDV-NEXT: csrr a0, vlenb +; CHECK-RV64-FDV-NEXT: slli a0, a0, 1 +; CHECK-RV64-FDV-NEXT: mv a1, a0 +; CHECK-RV64-FDV-NEXT: slli a0, a0, 1 +; CHECK-RV64-FDV-NEXT: add a1, a1, a0 +; CHECK-RV64-FDV-NEXT: slli a0, a0, 1 +; CHECK-RV64-FDV-NEXT: add a1, a1, a0 +; CHECK-RV64-FDV-NEXT: slli a0, a0, 1 +; CHECK-RV64-FDV-NEXT: add a0, a0, a1 +; CHECK-RV64-FDV-NEXT: sub a0, s0, a0 +; CHECK-RV64-FDV-NEXT: addi a0, a0, -320 +; CHECK-RV64-FDV-NEXT: vs1r.v v29, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV64-FDV-NEXT: csrr a0, vlenb +; CHECK-RV64-FDV-NEXT: slli a1, a0, 5 +; CHECK-RV64-FDV-NEXT: sub a0, a1, a0 +; CHECK-RV64-FDV-NEXT: sub a0, s0, a0 +; CHECK-RV64-FDV-NEXT: addi a0, a0, -320 +; CHECK-RV64-FDV-NEXT: vs1r.v v30, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV64-FDV-NEXT: csrr a0, vlenb +; CHECK-RV64-FDV-NEXT: slli a0, a0, 5 +; CHECK-RV64-FDV-NEXT: sub a0, s0, a0 +; CHECK-RV64-FDV-NEXT: addi a0, a0, -320 +; CHECK-RV64-FDV-NEXT: vs1r.v v31, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV64-FDV-NEXT: call otherfoo +; CHECK-RV64-FDV-NEXT: csrr a0, vlenb +; CHECK-RV64-FDV-NEXT: sub a0, s0, a0 +; CHECK-RV64-FDV-NEXT: addi a0, a0, -320 +; CHECK-RV64-FDV-NEXT: vl1r.v v0, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV64-FDV-NEXT: csrr a0, vlenb +; CHECK-RV64-FDV-NEXT: slli a0, a0, 1 +; CHECK-RV64-FDV-NEXT: sub a0, s0, a0 +; CHECK-RV64-FDV-NEXT: addi a0, a0, -320 +; CHECK-RV64-FDV-NEXT: vl1r.v v1, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV64-FDV-NEXT: csrr a0, vlenb +; CHECK-RV64-FDV-NEXT: slli a1, a0, 1 +; CHECK-RV64-FDV-NEXT: add a0, a1, a0 +; CHECK-RV64-FDV-NEXT: sub a0, s0, a0 +; CHECK-RV64-FDV-NEXT: addi a0, a0, -320 +; CHECK-RV64-FDV-NEXT: vl1r.v v2, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV64-FDV-NEXT: csrr a0, vlenb +; CHECK-RV64-FDV-NEXT: slli a0, a0, 2 +; CHECK-RV64-FDV-NEXT: sub a0, s0, a0 +; CHECK-RV64-FDV-NEXT: addi a0, a0, -320 +; CHECK-RV64-FDV-NEXT: vl1r.v v3, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV64-FDV-NEXT: csrr a0, vlenb +; CHECK-RV64-FDV-NEXT: slli a1, a0, 2 +; CHECK-RV64-FDV-NEXT: add a0, a1, a0 +; CHECK-RV64-FDV-NEXT: sub a0, s0, a0 +; CHECK-RV64-FDV-NEXT: addi a0, a0, -320 +; CHECK-RV64-FDV-NEXT: vl1r.v v4, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV64-FDV-NEXT: csrr a0, vlenb +; CHECK-RV64-FDV-NEXT: slli a0, a0, 1 +; CHECK-RV64-FDV-NEXT: mv a1, a0 +; CHECK-RV64-FDV-NEXT: slli a0, a0, 1 +; CHECK-RV64-FDV-NEXT: add a0, a0, a1 +; CHECK-RV64-FDV-NEXT: sub a0, s0, a0 +; CHECK-RV64-FDV-NEXT: addi a0, a0, -320 +; CHECK-RV64-FDV-NEXT: vl1r.v v5, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV64-FDV-NEXT: csrr a0, vlenb +; CHECK-RV64-FDV-NEXT: slli a1, a0, 3 +; CHECK-RV64-FDV-NEXT: sub a0, a1, a0 +; CHECK-RV64-FDV-NEXT: sub a0, s0, a0 +; CHECK-RV64-FDV-NEXT: addi a0, a0, -320 +; CHECK-RV64-FDV-NEXT: vl1r.v v6, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV64-FDV-NEXT: csrr a0, vlenb +; CHECK-RV64-FDV-NEXT: slli a0, a0, 3 +; CHECK-RV64-FDV-NEXT: sub a0, s0, a0 +; CHECK-RV64-FDV-NEXT: addi a0, a0, -320 +; CHECK-RV64-FDV-NEXT: vl1r.v v7, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV64-FDV-NEXT: csrr a0, vlenb +; CHECK-RV64-FDV-NEXT: slli a1, a0, 3 +; CHECK-RV64-FDV-NEXT: add a0, a1, a0 +; CHECK-RV64-FDV-NEXT: sub a0, s0, a0 +; CHECK-RV64-FDV-NEXT: addi a0, a0, -320 +; CHECK-RV64-FDV-NEXT: vl1r.v v8, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV64-FDV-NEXT: csrr a0, vlenb +; CHECK-RV64-FDV-NEXT: slli a0, a0, 1 +; CHECK-RV64-FDV-NEXT: mv a1, a0 +; CHECK-RV64-FDV-NEXT: slli a0, a0, 2 +; CHECK-RV64-FDV-NEXT: add a0, a0, a1 +; CHECK-RV64-FDV-NEXT: sub a0, s0, a0 +; CHECK-RV64-FDV-NEXT: addi a0, a0, -320 +; CHECK-RV64-FDV-NEXT: vl1r.v v9, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV64-FDV-NEXT: csrr a0, vlenb +; CHECK-RV64-FDV-NEXT: mv a1, a0 +; CHECK-RV64-FDV-NEXT: slli a0, a0, 1 +; CHECK-RV64-FDV-NEXT: add a1, a1, a0 +; CHECK-RV64-FDV-NEXT: slli a0, a0, 2 +; CHECK-RV64-FDV-NEXT: add a0, a0, a1 +; CHECK-RV64-FDV-NEXT: sub a0, s0, a0 +; CHECK-RV64-FDV-NEXT: addi a0, a0, -320 +; CHECK-RV64-FDV-NEXT: vl1r.v v10, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV64-FDV-NEXT: csrr a0, vlenb +; CHECK-RV64-FDV-NEXT: slli a0, a0, 2 +; CHECK-RV64-FDV-NEXT: mv a1, a0 +; CHECK-RV64-FDV-NEXT: slli a0, a0, 1 +; CHECK-RV64-FDV-NEXT: add a0, a0, a1 +; CHECK-RV64-FDV-NEXT: sub a0, s0, a0 +; CHECK-RV64-FDV-NEXT: addi a0, a0, -320 +; CHECK-RV64-FDV-NEXT: vl1r.v v11, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV64-FDV-NEXT: csrr a0, vlenb +; CHECK-RV64-FDV-NEXT: mv a1, a0 +; CHECK-RV64-FDV-NEXT: slli a0, a0, 2 +; CHECK-RV64-FDV-NEXT: add a1, a1, a0 +; CHECK-RV64-FDV-NEXT: slli a0, a0, 1 +; CHECK-RV64-FDV-NEXT: add a0, a0, a1 +; CHECK-RV64-FDV-NEXT: sub a0, s0, a0 +; CHECK-RV64-FDV-NEXT: addi a0, a0, -320 +; CHECK-RV64-FDV-NEXT: vl1r.v v12, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV64-FDV-NEXT: csrr a0, vlenb +; CHECK-RV64-FDV-NEXT: slli a0, a0, 1 +; CHECK-RV64-FDV-NEXT: mv a1, a0 +; CHECK-RV64-FDV-NEXT: slli a0, a0, 1 +; CHECK-RV64-FDV-NEXT: add a1, a1, a0 +; CHECK-RV64-FDV-NEXT: slli a0, a0, 1 +; CHECK-RV64-FDV-NEXT: add a0, a0, a1 +; CHECK-RV64-FDV-NEXT: sub a0, s0, a0 +; CHECK-RV64-FDV-NEXT: addi a0, a0, -320 +; CHECK-RV64-FDV-NEXT: vl1r.v v13, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV64-FDV-NEXT: csrr a0, vlenb +; CHECK-RV64-FDV-NEXT: slli a1, a0, 4 +; CHECK-RV64-FDV-NEXT: sub a0, a1, a0 +; CHECK-RV64-FDV-NEXT: sub a0, s0, a0 +; CHECK-RV64-FDV-NEXT: addi a0, a0, -320 +; CHECK-RV64-FDV-NEXT: vl1r.v v14, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV64-FDV-NEXT: csrr a0, vlenb +; CHECK-RV64-FDV-NEXT: slli a0, a0, 4 +; CHECK-RV64-FDV-NEXT: sub a0, s0, a0 +; CHECK-RV64-FDV-NEXT: addi a0, a0, -320 +; CHECK-RV64-FDV-NEXT: vl1r.v v15, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV64-FDV-NEXT: csrr a0, vlenb +; CHECK-RV64-FDV-NEXT: slli a1, a0, 4 +; CHECK-RV64-FDV-NEXT: add a0, a1, a0 +; CHECK-RV64-FDV-NEXT: sub a0, s0, a0 +; CHECK-RV64-FDV-NEXT: addi a0, a0, -320 +; CHECK-RV64-FDV-NEXT: vl1r.v v16, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV64-FDV-NEXT: csrr a0, vlenb +; CHECK-RV64-FDV-NEXT: slli a0, a0, 1 +; CHECK-RV64-FDV-NEXT: mv a1, a0 +; CHECK-RV64-FDV-NEXT: slli a0, a0, 3 +; CHECK-RV64-FDV-NEXT: add a0, a0, a1 +; CHECK-RV64-FDV-NEXT: sub a0, s0, a0 +; CHECK-RV64-FDV-NEXT: addi a0, a0, -320 +; CHECK-RV64-FDV-NEXT: vl1r.v v17, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV64-FDV-NEXT: csrr a0, vlenb +; CHECK-RV64-FDV-NEXT: mv a1, a0 +; CHECK-RV64-FDV-NEXT: slli a0, a0, 1 +; CHECK-RV64-FDV-NEXT: add a1, a1, a0 +; CHECK-RV64-FDV-NEXT: slli a0, a0, 3 +; CHECK-RV64-FDV-NEXT: add a0, a0, a1 +; CHECK-RV64-FDV-NEXT: sub a0, s0, a0 +; CHECK-RV64-FDV-NEXT: addi a0, a0, -320 +; CHECK-RV64-FDV-NEXT: vl1r.v v18, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV64-FDV-NEXT: csrr a0, vlenb +; CHECK-RV64-FDV-NEXT: slli a0, a0, 2 +; CHECK-RV64-FDV-NEXT: mv a1, a0 +; CHECK-RV64-FDV-NEXT: slli a0, a0, 2 +; CHECK-RV64-FDV-NEXT: add a0, a0, a1 +; CHECK-RV64-FDV-NEXT: sub a0, s0, a0 +; CHECK-RV64-FDV-NEXT: addi a0, a0, -320 +; CHECK-RV64-FDV-NEXT: vl1r.v v19, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV64-FDV-NEXT: csrr a0, vlenb +; CHECK-RV64-FDV-NEXT: mv a1, a0 +; CHECK-RV64-FDV-NEXT: slli a0, a0, 2 +; CHECK-RV64-FDV-NEXT: add a1, a1, a0 +; CHECK-RV64-FDV-NEXT: slli a0, a0, 2 +; CHECK-RV64-FDV-NEXT: add a0, a0, a1 +; CHECK-RV64-FDV-NEXT: sub a0, s0, a0 +; CHECK-RV64-FDV-NEXT: addi a0, a0, -320 +; CHECK-RV64-FDV-NEXT: vl1r.v v20, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV64-FDV-NEXT: csrr a0, vlenb +; CHECK-RV64-FDV-NEXT: slli a0, a0, 1 +; CHECK-RV64-FDV-NEXT: mv a1, a0 +; CHECK-RV64-FDV-NEXT: slli a0, a0, 1 +; CHECK-RV64-FDV-NEXT: add a1, a1, a0 +; CHECK-RV64-FDV-NEXT: slli a0, a0, 2 +; CHECK-RV64-FDV-NEXT: add a0, a0, a1 +; CHECK-RV64-FDV-NEXT: sub a0, s0, a0 +; CHECK-RV64-FDV-NEXT: addi a0, a0, -320 +; CHECK-RV64-FDV-NEXT: vl1r.v v21, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV64-FDV-NEXT: csrr a0, vlenb +; CHECK-RV64-FDV-NEXT: mv a1, a0 +; CHECK-RV64-FDV-NEXT: slli a0, a0, 1 +; CHECK-RV64-FDV-NEXT: add a1, a1, a0 +; CHECK-RV64-FDV-NEXT: slli a0, a0, 1 +; CHECK-RV64-FDV-NEXT: add a1, a1, a0 +; CHECK-RV64-FDV-NEXT: slli a0, a0, 2 +; CHECK-RV64-FDV-NEXT: add a0, a0, a1 +; CHECK-RV64-FDV-NEXT: sub a0, s0, a0 +; CHECK-RV64-FDV-NEXT: addi a0, a0, -320 +; CHECK-RV64-FDV-NEXT: vl1r.v v22, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV64-FDV-NEXT: csrr a0, vlenb +; CHECK-RV64-FDV-NEXT: slli a0, a0, 3 +; CHECK-RV64-FDV-NEXT: mv a1, a0 +; CHECK-RV64-FDV-NEXT: slli a0, a0, 1 +; CHECK-RV64-FDV-NEXT: add a0, a0, a1 +; CHECK-RV64-FDV-NEXT: sub a0, s0, a0 +; CHECK-RV64-FDV-NEXT: addi a0, a0, -320 +; CHECK-RV64-FDV-NEXT: vl1r.v v23, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV64-FDV-NEXT: csrr a0, vlenb +; CHECK-RV64-FDV-NEXT: mv a1, a0 +; CHECK-RV64-FDV-NEXT: slli a0, a0, 3 +; CHECK-RV64-FDV-NEXT: add a1, a1, a0 +; CHECK-RV64-FDV-NEXT: slli a0, a0, 1 +; CHECK-RV64-FDV-NEXT: add a0, a0, a1 +; CHECK-RV64-FDV-NEXT: sub a0, s0, a0 +; CHECK-RV64-FDV-NEXT: addi a0, a0, -320 +; CHECK-RV64-FDV-NEXT: vl1r.v v24, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV64-FDV-NEXT: csrr a0, vlenb +; CHECK-RV64-FDV-NEXT: slli a0, a0, 1 +; CHECK-RV64-FDV-NEXT: mv a1, a0 +; CHECK-RV64-FDV-NEXT: slli a0, a0, 2 +; CHECK-RV64-FDV-NEXT: add a1, a1, a0 +; CHECK-RV64-FDV-NEXT: slli a0, a0, 1 +; CHECK-RV64-FDV-NEXT: add a0, a0, a1 +; CHECK-RV64-FDV-NEXT: sub a0, s0, a0 +; CHECK-RV64-FDV-NEXT: addi a0, a0, -320 +; CHECK-RV64-FDV-NEXT: vl1r.v v25, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV64-FDV-NEXT: csrr a0, vlenb +; CHECK-RV64-FDV-NEXT: mv a1, a0 +; CHECK-RV64-FDV-NEXT: slli a0, a0, 1 +; CHECK-RV64-FDV-NEXT: add a1, a1, a0 +; CHECK-RV64-FDV-NEXT: slli a0, a0, 2 +; CHECK-RV64-FDV-NEXT: add a1, a1, a0 +; CHECK-RV64-FDV-NEXT: slli a0, a0, 1 +; CHECK-RV64-FDV-NEXT: add a0, a0, a1 +; CHECK-RV64-FDV-NEXT: sub a0, s0, a0 +; CHECK-RV64-FDV-NEXT: addi a0, a0, -320 +; CHECK-RV64-FDV-NEXT: vl1r.v v26, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV64-FDV-NEXT: csrr a0, vlenb +; CHECK-RV64-FDV-NEXT: slli a0, a0, 2 +; CHECK-RV64-FDV-NEXT: mv a1, a0 +; CHECK-RV64-FDV-NEXT: slli a0, a0, 1 +; CHECK-RV64-FDV-NEXT: add a1, a1, a0 +; CHECK-RV64-FDV-NEXT: slli a0, a0, 1 +; CHECK-RV64-FDV-NEXT: add a0, a0, a1 +; CHECK-RV64-FDV-NEXT: sub a0, s0, a0 +; CHECK-RV64-FDV-NEXT: addi a0, a0, -320 +; CHECK-RV64-FDV-NEXT: vl1r.v v27, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV64-FDV-NEXT: csrr a0, vlenb +; CHECK-RV64-FDV-NEXT: mv a1, a0 +; CHECK-RV64-FDV-NEXT: slli a0, a0, 2 +; CHECK-RV64-FDV-NEXT: add a1, a1, a0 +; CHECK-RV64-FDV-NEXT: slli a0, a0, 1 +; CHECK-RV64-FDV-NEXT: add a1, a1, a0 +; CHECK-RV64-FDV-NEXT: slli a0, a0, 1 +; CHECK-RV64-FDV-NEXT: add a0, a0, a1 +; CHECK-RV64-FDV-NEXT: sub a0, s0, a0 +; CHECK-RV64-FDV-NEXT: addi a0, a0, -320 +; CHECK-RV64-FDV-NEXT: vl1r.v v28, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV64-FDV-NEXT: csrr a0, vlenb +; CHECK-RV64-FDV-NEXT: slli a0, a0, 1 +; CHECK-RV64-FDV-NEXT: mv a1, a0 +; CHECK-RV64-FDV-NEXT: slli a0, a0, 1 +; CHECK-RV64-FDV-NEXT: add a1, a1, a0 +; CHECK-RV64-FDV-NEXT: slli a0, a0, 1 +; CHECK-RV64-FDV-NEXT: add a1, a1, a0 +; CHECK-RV64-FDV-NEXT: slli a0, a0, 1 +; CHECK-RV64-FDV-NEXT: add a0, a0, a1 +; CHECK-RV64-FDV-NEXT: sub a0, s0, a0 +; CHECK-RV64-FDV-NEXT: addi a0, a0, -320 +; CHECK-RV64-FDV-NEXT: vl1r.v v29, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV64-FDV-NEXT: csrr a0, vlenb +; CHECK-RV64-FDV-NEXT: slli a1, a0, 5 +; CHECK-RV64-FDV-NEXT: sub a0, a1, a0 +; CHECK-RV64-FDV-NEXT: sub a0, s0, a0 +; CHECK-RV64-FDV-NEXT: addi a0, a0, -320 +; CHECK-RV64-FDV-NEXT: vl1r.v v30, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV64-FDV-NEXT: csrr a0, vlenb +; CHECK-RV64-FDV-NEXT: slli a0, a0, 5 +; CHECK-RV64-FDV-NEXT: sub a0, s0, a0 +; CHECK-RV64-FDV-NEXT: addi a0, a0, -320 +; CHECK-RV64-FDV-NEXT: vl1r.v v31, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV64-FDV-NEXT: addi sp, s0, -320 +; CHECK-RV64-FDV-NEXT: ld ra, 312(sp) # 8-byte Folded Reload +; CHECK-RV64-FDV-NEXT: ld t0, 304(sp) # 8-byte Folded Reload +; CHECK-RV64-FDV-NEXT: ld t1, 296(sp) # 8-byte Folded Reload +; CHECK-RV64-FDV-NEXT: ld t2, 288(sp) # 8-byte Folded Reload +; CHECK-RV64-FDV-NEXT: ld s0, 280(sp) # 8-byte Folded Reload +; CHECK-RV64-FDV-NEXT: ld a0, 272(sp) # 8-byte Folded Reload +; CHECK-RV64-FDV-NEXT: ld a1, 264(sp) # 8-byte Folded Reload +; CHECK-RV64-FDV-NEXT: ld a2, 256(sp) # 8-byte Folded Reload +; CHECK-RV64-FDV-NEXT: ld a3, 248(sp) # 8-byte Folded Reload +; CHECK-RV64-FDV-NEXT: ld a4, 240(sp) # 8-byte Folded Reload +; CHECK-RV64-FDV-NEXT: ld a5, 232(sp) # 8-byte Folded Reload +; CHECK-RV64-FDV-NEXT: ld a6, 224(sp) # 8-byte Folded Reload +; CHECK-RV64-FDV-NEXT: ld a7, 216(sp) # 8-byte Folded Reload +; CHECK-RV64-FDV-NEXT: ld t3, 208(sp) # 8-byte Folded Reload +; CHECK-RV64-FDV-NEXT: ld t4, 200(sp) # 8-byte Folded Reload +; CHECK-RV64-FDV-NEXT: ld t5, 192(sp) # 8-byte Folded Reload +; CHECK-RV64-FDV-NEXT: ld t6, 184(sp) # 8-byte Folded Reload +; CHECK-RV64-FDV-NEXT: fld ft0, 176(sp) # 8-byte Folded Reload +; CHECK-RV64-FDV-NEXT: fld ft1, 168(sp) # 8-byte Folded Reload +; CHECK-RV64-FDV-NEXT: fld ft2, 160(sp) # 8-byte Folded Reload +; CHECK-RV64-FDV-NEXT: fld ft3, 152(sp) # 8-byte Folded Reload +; CHECK-RV64-FDV-NEXT: fld ft4, 144(sp) # 8-byte Folded Reload +; CHECK-RV64-FDV-NEXT: fld ft5, 136(sp) # 8-byte Folded Reload +; CHECK-RV64-FDV-NEXT: fld ft6, 128(sp) # 8-byte Folded Reload +; CHECK-RV64-FDV-NEXT: fld ft7, 120(sp) # 8-byte Folded Reload +; CHECK-RV64-FDV-NEXT: fld fa0, 112(sp) # 8-byte Folded Reload +; CHECK-RV64-FDV-NEXT: fld fa1, 104(sp) # 8-byte Folded Reload +; CHECK-RV64-FDV-NEXT: fld fa2, 96(sp) # 8-byte Folded Reload +; CHECK-RV64-FDV-NEXT: fld fa3, 88(sp) # 8-byte Folded Reload +; CHECK-RV64-FDV-NEXT: fld fa4, 80(sp) # 8-byte Folded Reload +; CHECK-RV64-FDV-NEXT: fld fa5, 72(sp) # 8-byte Folded Reload +; CHECK-RV64-FDV-NEXT: fld fa6, 64(sp) # 8-byte Folded Reload +; CHECK-RV64-FDV-NEXT: fld fa7, 56(sp) # 8-byte Folded Reload +; CHECK-RV64-FDV-NEXT: fld ft8, 48(sp) # 8-byte Folded Reload +; CHECK-RV64-FDV-NEXT: fld ft9, 40(sp) # 8-byte Folded Reload +; CHECK-RV64-FDV-NEXT: fld ft10, 32(sp) # 8-byte Folded Reload +; CHECK-RV64-FDV-NEXT: fld ft11, 24(sp) # 8-byte Folded Reload +; CHECK-RV64-FDV-NEXT: addi sp, sp, 320 +; CHECK-RV64-FDV-NEXT: mret %call = call i32 @otherfoo() ret void } diff --git a/llvm/test/CodeGen/RISCV/rvv/interrupt-attr-nocall.ll b/llvm/test/CodeGen/RISCV/rvv/interrupt-attr-nocall.ll new file mode 100644 index 000000000000..af2e8d384a44 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/interrupt-attr-nocall.ll @@ -0,0 +1,502 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple riscv32-unknown-elf -mattr=+zve32x,+zvl128b -o - %s \ +; RUN: 2>&1 | FileCheck %s -check-prefix CHECK-RV32 + +@a = external global <4 x i32> +@b = external global <4 x i32> +@c = external global <4 x i32> + +define void @foo_lmul1() nounwind #0 { +; CHECK-RV32-LABEL: foo_lmul1: +; CHECK-RV32: # %bb.0: +; CHECK-RV32-NEXT: addi sp, sp, -32 +; CHECK-RV32-NEXT: sw a0, 28(sp) # 4-byte Folded Spill +; CHECK-RV32-NEXT: csrr a0, vlenb +; CHECK-RV32-NEXT: slli a0, a0, 1 +; CHECK-RV32-NEXT: sub sp, sp, a0 +; CHECK-RV32-NEXT: csrr a0, vlenb +; CHECK-RV32-NEXT: add a0, sp, a0 +; CHECK-RV32-NEXT: addi a0, a0, 16 +; CHECK-RV32-NEXT: vs1r.v v8, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV32-NEXT: addi a0, sp, 16 +; CHECK-RV32-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV32-NEXT: lui a0, %hi(a) +; CHECK-RV32-NEXT: addi a0, a0, %lo(a) +; CHECK-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; CHECK-RV32-NEXT: vle32.v v8, (a0) +; CHECK-RV32-NEXT: lui a0, %hi(b) +; CHECK-RV32-NEXT: addi a0, a0, %lo(b) +; CHECK-RV32-NEXT: vle32.v v9, (a0) +; CHECK-RV32-NEXT: vadd.vv v8, v9, v8 +; CHECK-RV32-NEXT: lui a0, %hi(c) +; CHECK-RV32-NEXT: addi a0, a0, %lo(c) +; CHECK-RV32-NEXT: vse32.v v8, (a0) +; CHECK-RV32-NEXT: csrr a0, vlenb +; CHECK-RV32-NEXT: add a0, sp, a0 +; CHECK-RV32-NEXT: addi a0, a0, 16 +; CHECK-RV32-NEXT: vl1r.v v8, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV32-NEXT: addi a0, sp, 16 +; CHECK-RV32-NEXT: vl1r.v v9, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV32-NEXT: csrr a0, vlenb +; CHECK-RV32-NEXT: slli a0, a0, 1 +; CHECK-RV32-NEXT: add sp, sp, a0 +; CHECK-RV32-NEXT: lw a0, 28(sp) # 4-byte Folded Reload +; CHECK-RV32-NEXT: addi sp, sp, 32 +; CHECK-RV32-NEXT: mret + %1 = load <4 x i32>, ptr @a + %2 = load <4 x i32>, ptr @b + %add = add nsw <4 x i32> %2, %1 + store <4 x i32> %add, ptr @c + ret void +} + +@d = external global <8 x i32> +@e = external global <8 x i32> +@f = external global <8 x i32> + +define void @foo_lmul2() nounwind #0 { +; CHECK-RV32-LABEL: foo_lmul2: +; CHECK-RV32: # %bb.0: +; CHECK-RV32-NEXT: addi sp, sp, -32 +; CHECK-RV32-NEXT: sw a0, 28(sp) # 4-byte Folded Spill +; CHECK-RV32-NEXT: csrr a0, vlenb +; CHECK-RV32-NEXT: slli a0, a0, 2 +; CHECK-RV32-NEXT: sub sp, sp, a0 +; CHECK-RV32-NEXT: csrr a0, vlenb +; CHECK-RV32-NEXT: sw a1, 4(sp) # 4-byte Folded Spill +; CHECK-RV32-NEXT: slli a1, a0, 1 +; CHECK-RV32-NEXT: add a0, a1, a0 +; CHECK-RV32-NEXT: lw a1, 4(sp) # 4-byte Folded Reload +; CHECK-RV32-NEXT: add a0, sp, a0 +; CHECK-RV32-NEXT: addi a0, a0, 16 +; CHECK-RV32-NEXT: vs1r.v v8, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV32-NEXT: csrr a0, vlenb +; CHECK-RV32-NEXT: slli a0, a0, 1 +; CHECK-RV32-NEXT: add a0, sp, a0 +; CHECK-RV32-NEXT: addi a0, a0, 16 +; CHECK-RV32-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV32-NEXT: csrr a0, vlenb +; CHECK-RV32-NEXT: add a0, sp, a0 +; CHECK-RV32-NEXT: addi a0, a0, 16 +; CHECK-RV32-NEXT: vs1r.v v10, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV32-NEXT: addi a0, sp, 16 +; CHECK-RV32-NEXT: vs1r.v v11, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV32-NEXT: lui a0, %hi(d) +; CHECK-RV32-NEXT: addi a0, a0, %lo(d) +; CHECK-RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; CHECK-RV32-NEXT: vle32.v v8, (a0) +; CHECK-RV32-NEXT: lui a0, %hi(e) +; CHECK-RV32-NEXT: addi a0, a0, %lo(e) +; CHECK-RV32-NEXT: vle32.v v10, (a0) +; CHECK-RV32-NEXT: vadd.vv v8, v10, v8 +; CHECK-RV32-NEXT: lui a0, %hi(f) +; CHECK-RV32-NEXT: addi a0, a0, %lo(f) +; CHECK-RV32-NEXT: vse32.v v8, (a0) +; CHECK-RV32-NEXT: csrr a0, vlenb +; CHECK-RV32-NEXT: sw a1, 4(sp) # 4-byte Folded Spill +; CHECK-RV32-NEXT: slli a1, a0, 1 +; CHECK-RV32-NEXT: add a0, a1, a0 +; CHECK-RV32-NEXT: lw a1, 4(sp) # 4-byte Folded Reload +; CHECK-RV32-NEXT: add a0, sp, a0 +; CHECK-RV32-NEXT: addi a0, a0, 16 +; CHECK-RV32-NEXT: vl1r.v v8, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV32-NEXT: csrr a0, vlenb +; CHECK-RV32-NEXT: slli a0, a0, 1 +; CHECK-RV32-NEXT: add a0, sp, a0 +; CHECK-RV32-NEXT: addi a0, a0, 16 +; CHECK-RV32-NEXT: vl1r.v v9, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV32-NEXT: csrr a0, vlenb +; CHECK-RV32-NEXT: add a0, sp, a0 +; CHECK-RV32-NEXT: addi a0, a0, 16 +; CHECK-RV32-NEXT: vl1r.v v10, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV32-NEXT: addi a0, sp, 16 +; CHECK-RV32-NEXT: vl1r.v v11, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV32-NEXT: csrr a0, vlenb +; CHECK-RV32-NEXT: slli a0, a0, 2 +; CHECK-RV32-NEXT: add sp, sp, a0 +; CHECK-RV32-NEXT: lw a0, 28(sp) # 4-byte Folded Reload +; CHECK-RV32-NEXT: addi sp, sp, 32 +; CHECK-RV32-NEXT: mret + %1 = load <8 x i32>, ptr @d + %2 = load <8 x i32>, ptr @e + %add = add nsw <8 x i32> %2, %1 + store <8 x i32> %add, ptr @f + ret void +} + +@g = external global <16 x i32> +@h = external global <16 x i32> +@i = external global <16 x i32> + +define void @foo_lmul4() nounwind #0 { +; CHECK-RV32-LABEL: foo_lmul4: +; CHECK-RV32: # %bb.0: +; CHECK-RV32-NEXT: addi sp, sp, -32 +; CHECK-RV32-NEXT: sw a0, 28(sp) # 4-byte Folded Spill +; CHECK-RV32-NEXT: csrr a0, vlenb +; CHECK-RV32-NEXT: slli a0, a0, 3 +; CHECK-RV32-NEXT: sub sp, sp, a0 +; CHECK-RV32-NEXT: csrr a0, vlenb +; CHECK-RV32-NEXT: sw a1, 4(sp) # 4-byte Folded Spill +; CHECK-RV32-NEXT: slli a1, a0, 3 +; CHECK-RV32-NEXT: sub a0, a1, a0 +; CHECK-RV32-NEXT: lw a1, 4(sp) # 4-byte Folded Reload +; CHECK-RV32-NEXT: add a0, sp, a0 +; CHECK-RV32-NEXT: addi a0, a0, 16 +; CHECK-RV32-NEXT: vs1r.v v8, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV32-NEXT: csrr a0, vlenb +; CHECK-RV32-NEXT: slli a0, a0, 1 +; CHECK-RV32-NEXT: sw a1, 4(sp) # 4-byte Folded Spill +; CHECK-RV32-NEXT: mv a1, a0 +; CHECK-RV32-NEXT: slli a0, a0, 1 +; CHECK-RV32-NEXT: add a0, a0, a1 +; CHECK-RV32-NEXT: lw a1, 4(sp) # 4-byte Folded Reload +; CHECK-RV32-NEXT: add a0, sp, a0 +; CHECK-RV32-NEXT: addi a0, a0, 16 +; CHECK-RV32-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV32-NEXT: csrr a0, vlenb +; CHECK-RV32-NEXT: sw a1, 4(sp) # 4-byte Folded Spill +; CHECK-RV32-NEXT: slli a1, a0, 2 +; CHECK-RV32-NEXT: add a0, a1, a0 +; CHECK-RV32-NEXT: lw a1, 4(sp) # 4-byte Folded Reload +; CHECK-RV32-NEXT: add a0, sp, a0 +; CHECK-RV32-NEXT: addi a0, a0, 16 +; CHECK-RV32-NEXT: vs1r.v v10, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV32-NEXT: csrr a0, vlenb +; CHECK-RV32-NEXT: slli a0, a0, 2 +; CHECK-RV32-NEXT: add a0, sp, a0 +; CHECK-RV32-NEXT: addi a0, a0, 16 +; CHECK-RV32-NEXT: vs1r.v v11, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV32-NEXT: csrr a0, vlenb +; CHECK-RV32-NEXT: sw a1, 4(sp) # 4-byte Folded Spill +; CHECK-RV32-NEXT: slli a1, a0, 1 +; CHECK-RV32-NEXT: add a0, a1, a0 +; CHECK-RV32-NEXT: lw a1, 4(sp) # 4-byte Folded Reload +; CHECK-RV32-NEXT: add a0, sp, a0 +; CHECK-RV32-NEXT: addi a0, a0, 16 +; CHECK-RV32-NEXT: vs1r.v v12, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV32-NEXT: csrr a0, vlenb +; CHECK-RV32-NEXT: slli a0, a0, 1 +; CHECK-RV32-NEXT: add a0, sp, a0 +; CHECK-RV32-NEXT: addi a0, a0, 16 +; CHECK-RV32-NEXT: vs1r.v v13, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV32-NEXT: csrr a0, vlenb +; CHECK-RV32-NEXT: add a0, sp, a0 +; CHECK-RV32-NEXT: addi a0, a0, 16 +; CHECK-RV32-NEXT: vs1r.v v14, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV32-NEXT: addi a0, sp, 16 +; CHECK-RV32-NEXT: vs1r.v v15, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV32-NEXT: lui a0, %hi(g) +; CHECK-RV32-NEXT: addi a0, a0, %lo(g) +; CHECK-RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma +; CHECK-RV32-NEXT: vle32.v v8, (a0) +; CHECK-RV32-NEXT: lui a0, %hi(h) +; CHECK-RV32-NEXT: addi a0, a0, %lo(h) +; CHECK-RV32-NEXT: vle32.v v12, (a0) +; CHECK-RV32-NEXT: vadd.vv v8, v12, v8 +; CHECK-RV32-NEXT: lui a0, %hi(i) +; CHECK-RV32-NEXT: addi a0, a0, %lo(i) +; CHECK-RV32-NEXT: vse32.v v8, (a0) +; CHECK-RV32-NEXT: sw a1, 4(sp) # 4-byte Folded Spill +; CHECK-RV32-NEXT: csrr a0, vlenb +; CHECK-RV32-NEXT: slli a1, a0, 3 +; CHECK-RV32-NEXT: sub a0, a1, a0 +; CHECK-RV32-NEXT: add a0, sp, a0 +; CHECK-RV32-NEXT: addi a0, a0, 16 +; CHECK-RV32-NEXT: vl1r.v v8, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV32-NEXT: csrr a0, vlenb +; CHECK-RV32-NEXT: slli a0, a0, 1 +; CHECK-RV32-NEXT: mv a1, a0 +; CHECK-RV32-NEXT: slli a0, a0, 1 +; CHECK-RV32-NEXT: add a0, a0, a1 +; CHECK-RV32-NEXT: add a0, sp, a0 +; CHECK-RV32-NEXT: addi a0, a0, 16 +; CHECK-RV32-NEXT: vl1r.v v9, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV32-NEXT: csrr a0, vlenb +; CHECK-RV32-NEXT: slli a1, a0, 2 +; CHECK-RV32-NEXT: add a0, a1, a0 +; CHECK-RV32-NEXT: add a0, sp, a0 +; CHECK-RV32-NEXT: addi a0, a0, 16 +; CHECK-RV32-NEXT: vl1r.v v10, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV32-NEXT: csrr a0, vlenb +; CHECK-RV32-NEXT: slli a0, a0, 2 +; CHECK-RV32-NEXT: add a0, sp, a0 +; CHECK-RV32-NEXT: addi a0, a0, 16 +; CHECK-RV32-NEXT: vl1r.v v11, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV32-NEXT: csrr a0, vlenb +; CHECK-RV32-NEXT: slli a1, a0, 1 +; CHECK-RV32-NEXT: add a0, a1, a0 +; CHECK-RV32-NEXT: lw a1, 4(sp) # 4-byte Folded Reload +; CHECK-RV32-NEXT: add a0, sp, a0 +; CHECK-RV32-NEXT: addi a0, a0, 16 +; CHECK-RV32-NEXT: vl1r.v v12, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV32-NEXT: csrr a0, vlenb +; CHECK-RV32-NEXT: slli a0, a0, 1 +; CHECK-RV32-NEXT: add a0, sp, a0 +; CHECK-RV32-NEXT: addi a0, a0, 16 +; CHECK-RV32-NEXT: vl1r.v v13, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV32-NEXT: csrr a0, vlenb +; CHECK-RV32-NEXT: add a0, sp, a0 +; CHECK-RV32-NEXT: addi a0, a0, 16 +; CHECK-RV32-NEXT: vl1r.v v14, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV32-NEXT: addi a0, sp, 16 +; CHECK-RV32-NEXT: vl1r.v v15, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV32-NEXT: csrr a0, vlenb +; CHECK-RV32-NEXT: slli a0, a0, 3 +; CHECK-RV32-NEXT: add sp, sp, a0 +; CHECK-RV32-NEXT: lw a0, 28(sp) # 4-byte Folded Reload +; CHECK-RV32-NEXT: addi sp, sp, 32 +; CHECK-RV32-NEXT: mret + %1 = load <16 x i32>, ptr @g + %2 = load <16 x i32>, ptr @h + %add = add nsw <16 x i32> %2, %1 + store <16 x i32> %add, ptr @i + ret void +} + +@j = external global <32 x i32> +@k = external global <32 x i32> +@l = external global <32 x i32> + +define void @foo_lmul8() nounwind #0 { +; CHECK-RV32-LABEL: foo_lmul8: +; CHECK-RV32: # %bb.0: +; CHECK-RV32-NEXT: addi sp, sp, -32 +; CHECK-RV32-NEXT: sw a0, 28(sp) # 4-byte Folded Spill +; CHECK-RV32-NEXT: sw a1, 24(sp) # 4-byte Folded Spill +; CHECK-RV32-NEXT: csrr a0, vlenb +; CHECK-RV32-NEXT: slli a0, a0, 4 +; CHECK-RV32-NEXT: sub sp, sp, a0 +; CHECK-RV32-NEXT: csrr a0, vlenb +; CHECK-RV32-NEXT: slli a1, a0, 4 +; CHECK-RV32-NEXT: sub a0, a1, a0 +; CHECK-RV32-NEXT: add a0, sp, a0 +; CHECK-RV32-NEXT: addi a0, a0, 16 +; CHECK-RV32-NEXT: vs1r.v v8, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV32-NEXT: csrr a0, vlenb +; CHECK-RV32-NEXT: slli a0, a0, 1 +; CHECK-RV32-NEXT: mv a1, a0 +; CHECK-RV32-NEXT: slli a0, a0, 1 +; CHECK-RV32-NEXT: add a1, a1, a0 +; CHECK-RV32-NEXT: slli a0, a0, 1 +; CHECK-RV32-NEXT: add a0, a0, a1 +; CHECK-RV32-NEXT: add a0, sp, a0 +; CHECK-RV32-NEXT: addi a0, a0, 16 +; CHECK-RV32-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV32-NEXT: csrr a0, vlenb +; CHECK-RV32-NEXT: mv a1, a0 +; CHECK-RV32-NEXT: slli a0, a0, 2 +; CHECK-RV32-NEXT: add a1, a1, a0 +; CHECK-RV32-NEXT: slli a0, a0, 1 +; CHECK-RV32-NEXT: add a0, a0, a1 +; CHECK-RV32-NEXT: add a0, sp, a0 +; CHECK-RV32-NEXT: addi a0, a0, 16 +; CHECK-RV32-NEXT: vs1r.v v10, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV32-NEXT: csrr a0, vlenb +; CHECK-RV32-NEXT: slli a0, a0, 2 +; CHECK-RV32-NEXT: mv a1, a0 +; CHECK-RV32-NEXT: slli a0, a0, 1 +; CHECK-RV32-NEXT: add a0, a0, a1 +; CHECK-RV32-NEXT: add a0, sp, a0 +; CHECK-RV32-NEXT: addi a0, a0, 16 +; CHECK-RV32-NEXT: vs1r.v v11, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV32-NEXT: csrr a0, vlenb +; CHECK-RV32-NEXT: mv a1, a0 +; CHECK-RV32-NEXT: slli a0, a0, 1 +; CHECK-RV32-NEXT: add a1, a1, a0 +; CHECK-RV32-NEXT: slli a0, a0, 2 +; CHECK-RV32-NEXT: add a0, a0, a1 +; CHECK-RV32-NEXT: add a0, sp, a0 +; CHECK-RV32-NEXT: addi a0, a0, 16 +; CHECK-RV32-NEXT: vs1r.v v12, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV32-NEXT: csrr a0, vlenb +; CHECK-RV32-NEXT: slli a0, a0, 1 +; CHECK-RV32-NEXT: mv a1, a0 +; CHECK-RV32-NEXT: slli a0, a0, 2 +; CHECK-RV32-NEXT: add a0, a0, a1 +; CHECK-RV32-NEXT: add a0, sp, a0 +; CHECK-RV32-NEXT: addi a0, a0, 16 +; CHECK-RV32-NEXT: vs1r.v v13, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV32-NEXT: csrr a0, vlenb +; CHECK-RV32-NEXT: slli a1, a0, 3 +; CHECK-RV32-NEXT: add a0, a1, a0 +; CHECK-RV32-NEXT: add a0, sp, a0 +; CHECK-RV32-NEXT: addi a0, a0, 16 +; CHECK-RV32-NEXT: vs1r.v v14, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV32-NEXT: csrr a0, vlenb +; CHECK-RV32-NEXT: slli a0, a0, 3 +; CHECK-RV32-NEXT: add a0, sp, a0 +; CHECK-RV32-NEXT: addi a0, a0, 16 +; CHECK-RV32-NEXT: vs1r.v v15, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV32-NEXT: csrr a0, vlenb +; CHECK-RV32-NEXT: slli a1, a0, 3 +; CHECK-RV32-NEXT: sub a0, a1, a0 +; CHECK-RV32-NEXT: add a0, sp, a0 +; CHECK-RV32-NEXT: addi a0, a0, 16 +; CHECK-RV32-NEXT: vs1r.v v16, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV32-NEXT: csrr a0, vlenb +; CHECK-RV32-NEXT: slli a0, a0, 1 +; CHECK-RV32-NEXT: mv a1, a0 +; CHECK-RV32-NEXT: slli a0, a0, 1 +; CHECK-RV32-NEXT: add a0, a0, a1 +; CHECK-RV32-NEXT: add a0, sp, a0 +; CHECK-RV32-NEXT: addi a0, a0, 16 +; CHECK-RV32-NEXT: vs1r.v v17, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV32-NEXT: csrr a0, vlenb +; CHECK-RV32-NEXT: slli a1, a0, 2 +; CHECK-RV32-NEXT: add a0, a1, a0 +; CHECK-RV32-NEXT: add a0, sp, a0 +; CHECK-RV32-NEXT: addi a0, a0, 16 +; CHECK-RV32-NEXT: vs1r.v v18, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV32-NEXT: csrr a0, vlenb +; CHECK-RV32-NEXT: slli a0, a0, 2 +; CHECK-RV32-NEXT: add a0, sp, a0 +; CHECK-RV32-NEXT: addi a0, a0, 16 +; CHECK-RV32-NEXT: vs1r.v v19, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV32-NEXT: csrr a0, vlenb +; CHECK-RV32-NEXT: slli a1, a0, 1 +; CHECK-RV32-NEXT: add a0, a1, a0 +; CHECK-RV32-NEXT: add a0, sp, a0 +; CHECK-RV32-NEXT: addi a0, a0, 16 +; CHECK-RV32-NEXT: vs1r.v v20, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV32-NEXT: csrr a0, vlenb +; CHECK-RV32-NEXT: slli a0, a0, 1 +; CHECK-RV32-NEXT: add a0, sp, a0 +; CHECK-RV32-NEXT: addi a0, a0, 16 +; CHECK-RV32-NEXT: vs1r.v v21, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV32-NEXT: csrr a0, vlenb +; CHECK-RV32-NEXT: add a0, sp, a0 +; CHECK-RV32-NEXT: addi a0, a0, 16 +; CHECK-RV32-NEXT: vs1r.v v22, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV32-NEXT: addi a0, sp, 16 +; CHECK-RV32-NEXT: vs1r.v v23, (a0) # vscale x 8-byte Folded Spill +; CHECK-RV32-NEXT: lui a0, %hi(j) +; CHECK-RV32-NEXT: addi a0, a0, %lo(j) +; CHECK-RV32-NEXT: li a1, 32 +; CHECK-RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma +; CHECK-RV32-NEXT: vle32.v v8, (a0) +; CHECK-RV32-NEXT: lui a0, %hi(k) +; CHECK-RV32-NEXT: addi a0, a0, %lo(k) +; CHECK-RV32-NEXT: vle32.v v16, (a0) +; CHECK-RV32-NEXT: vadd.vv v8, v16, v8 +; CHECK-RV32-NEXT: lui a0, %hi(l) +; CHECK-RV32-NEXT: addi a0, a0, %lo(l) +; CHECK-RV32-NEXT: vse32.v v8, (a0) +; CHECK-RV32-NEXT: csrr a0, vlenb +; CHECK-RV32-NEXT: slli a1, a0, 4 +; CHECK-RV32-NEXT: sub a0, a1, a0 +; CHECK-RV32-NEXT: add a0, sp, a0 +; CHECK-RV32-NEXT: addi a0, a0, 16 +; CHECK-RV32-NEXT: vl1r.v v8, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV32-NEXT: csrr a0, vlenb +; CHECK-RV32-NEXT: slli a0, a0, 1 +; CHECK-RV32-NEXT: mv a1, a0 +; CHECK-RV32-NEXT: slli a0, a0, 1 +; CHECK-RV32-NEXT: add a1, a1, a0 +; CHECK-RV32-NEXT: slli a0, a0, 1 +; CHECK-RV32-NEXT: add a0, a0, a1 +; CHECK-RV32-NEXT: add a0, sp, a0 +; CHECK-RV32-NEXT: addi a0, a0, 16 +; CHECK-RV32-NEXT: vl1r.v v9, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV32-NEXT: csrr a0, vlenb +; CHECK-RV32-NEXT: mv a1, a0 +; CHECK-RV32-NEXT: slli a0, a0, 2 +; CHECK-RV32-NEXT: add a1, a1, a0 +; CHECK-RV32-NEXT: slli a0, a0, 1 +; CHECK-RV32-NEXT: add a0, a0, a1 +; CHECK-RV32-NEXT: add a0, sp, a0 +; CHECK-RV32-NEXT: addi a0, a0, 16 +; CHECK-RV32-NEXT: vl1r.v v10, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV32-NEXT: csrr a0, vlenb +; CHECK-RV32-NEXT: slli a0, a0, 2 +; CHECK-RV32-NEXT: mv a1, a0 +; CHECK-RV32-NEXT: slli a0, a0, 1 +; CHECK-RV32-NEXT: add a0, a0, a1 +; CHECK-RV32-NEXT: add a0, sp, a0 +; CHECK-RV32-NEXT: addi a0, a0, 16 +; CHECK-RV32-NEXT: vl1r.v v11, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV32-NEXT: csrr a0, vlenb +; CHECK-RV32-NEXT: mv a1, a0 +; CHECK-RV32-NEXT: slli a0, a0, 1 +; CHECK-RV32-NEXT: add a1, a1, a0 +; CHECK-RV32-NEXT: slli a0, a0, 2 +; CHECK-RV32-NEXT: add a0, a0, a1 +; CHECK-RV32-NEXT: add a0, sp, a0 +; CHECK-RV32-NEXT: addi a0, a0, 16 +; CHECK-RV32-NEXT: vl1r.v v12, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV32-NEXT: csrr a0, vlenb +; CHECK-RV32-NEXT: slli a0, a0, 1 +; CHECK-RV32-NEXT: mv a1, a0 +; CHECK-RV32-NEXT: slli a0, a0, 2 +; CHECK-RV32-NEXT: add a0, a0, a1 +; CHECK-RV32-NEXT: add a0, sp, a0 +; CHECK-RV32-NEXT: addi a0, a0, 16 +; CHECK-RV32-NEXT: vl1r.v v13, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV32-NEXT: csrr a0, vlenb +; CHECK-RV32-NEXT: slli a1, a0, 3 +; CHECK-RV32-NEXT: add a0, a1, a0 +; CHECK-RV32-NEXT: add a0, sp, a0 +; CHECK-RV32-NEXT: addi a0, a0, 16 +; CHECK-RV32-NEXT: vl1r.v v14, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV32-NEXT: csrr a0, vlenb +; CHECK-RV32-NEXT: slli a0, a0, 3 +; CHECK-RV32-NEXT: add a0, sp, a0 +; CHECK-RV32-NEXT: addi a0, a0, 16 +; CHECK-RV32-NEXT: vl1r.v v15, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV32-NEXT: csrr a0, vlenb +; CHECK-RV32-NEXT: slli a1, a0, 3 +; CHECK-RV32-NEXT: sub a0, a1, a0 +; CHECK-RV32-NEXT: add a0, sp, a0 +; CHECK-RV32-NEXT: addi a0, a0, 16 +; CHECK-RV32-NEXT: vl1r.v v16, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV32-NEXT: csrr a0, vlenb +; CHECK-RV32-NEXT: slli a0, a0, 1 +; CHECK-RV32-NEXT: mv a1, a0 +; CHECK-RV32-NEXT: slli a0, a0, 1 +; CHECK-RV32-NEXT: add a0, a0, a1 +; CHECK-RV32-NEXT: add a0, sp, a0 +; CHECK-RV32-NEXT: addi a0, a0, 16 +; CHECK-RV32-NEXT: vl1r.v v17, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV32-NEXT: csrr a0, vlenb +; CHECK-RV32-NEXT: slli a1, a0, 2 +; CHECK-RV32-NEXT: add a0, a1, a0 +; CHECK-RV32-NEXT: add a0, sp, a0 +; CHECK-RV32-NEXT: addi a0, a0, 16 +; CHECK-RV32-NEXT: vl1r.v v18, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV32-NEXT: csrr a0, vlenb +; CHECK-RV32-NEXT: slli a0, a0, 2 +; CHECK-RV32-NEXT: add a0, sp, a0 +; CHECK-RV32-NEXT: addi a0, a0, 16 +; CHECK-RV32-NEXT: vl1r.v v19, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV32-NEXT: csrr a0, vlenb +; CHECK-RV32-NEXT: slli a1, a0, 1 +; CHECK-RV32-NEXT: add a0, a1, a0 +; CHECK-RV32-NEXT: add a0, sp, a0 +; CHECK-RV32-NEXT: addi a0, a0, 16 +; CHECK-RV32-NEXT: vl1r.v v20, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV32-NEXT: csrr a0, vlenb +; CHECK-RV32-NEXT: slli a0, a0, 1 +; CHECK-RV32-NEXT: add a0, sp, a0 +; CHECK-RV32-NEXT: addi a0, a0, 16 +; CHECK-RV32-NEXT: vl1r.v v21, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV32-NEXT: csrr a0, vlenb +; CHECK-RV32-NEXT: add a0, sp, a0 +; CHECK-RV32-NEXT: addi a0, a0, 16 +; CHECK-RV32-NEXT: vl1r.v v22, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV32-NEXT: addi a0, sp, 16 +; CHECK-RV32-NEXT: vl1r.v v23, (a0) # vscale x 8-byte Folded Reload +; CHECK-RV32-NEXT: csrr a0, vlenb +; CHECK-RV32-NEXT: slli a0, a0, 4 +; CHECK-RV32-NEXT: add sp, sp, a0 +; CHECK-RV32-NEXT: lw a0, 28(sp) # 4-byte Folded Reload +; CHECK-RV32-NEXT: lw a1, 24(sp) # 4-byte Folded Reload +; CHECK-RV32-NEXT: addi sp, sp, 32 +; CHECK-RV32-NEXT: mret + %1 = load <32 x i32>, ptr @j + %2 = load <32 x i32>, ptr @k + %add = add nsw <32 x i32> %2, %1 + store <32 x i32> %add, ptr @l + ret void +} + +attributes #0 = { "interrupt"="machine" } -- cgit v1.2.3 From 2bcdfa198aa511479c46144c5cf95c7c685384ef Mon Sep 17 00:00:00 2001 From: Sirui Mu Date: Thu, 19 Jun 2025 09:58:19 +0800 Subject: [CIR] Add side effect attribute to call operations (#144201) This patch adds `side_effect` attribute to `cir.call` operation. Other function call attributes will be added in later patches. --- .../clang/CIR/Dialect/Builder/CIRBaseBuilder.h | 16 ++++-- clang/include/clang/CIR/Dialect/IR/CIRAttrs.td | 5 ++ clang/include/clang/CIR/Dialect/IR/CIROps.td | 43 +++++++++++++- .../clang/CIR/Interfaces/CIROpInterfaces.td | 2 + clang/include/clang/CIR/MissingFeatures.h | 1 - clang/lib/CIR/CodeGen/CIRGenCall.cpp | 46 ++++++++++++--- clang/lib/CIR/CodeGen/CIRGenCall.h | 6 ++ clang/lib/CIR/CodeGen/CIRGenModule.h | 10 ++++ clang/lib/CIR/Dialect/IR/CIRDialect.cpp | 66 +++++++++++++++++++++- .../lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp | 51 +++++++++++++++-- clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.h | 5 ++ clang/test/CIR/CodeGen/call.c | 26 +++++++++ clang/test/CIR/IR/call.cir | 4 ++ 13 files changed, 257 insertions(+), 24 deletions(-) diff --git a/clang/include/clang/CIR/Dialect/Builder/CIRBaseBuilder.h b/clang/include/clang/CIR/Dialect/Builder/CIRBaseBuilder.h index 502d58d7db8b..3e052c564112 100644 --- a/clang/include/clang/CIR/Dialect/Builder/CIRBaseBuilder.h +++ b/clang/include/clang/CIR/Dialect/Builder/CIRBaseBuilder.h @@ -227,22 +227,26 @@ public: //===--------------------------------------------------------------------===// cir::CallOp createCallOp(mlir::Location loc, mlir::SymbolRefAttr callee, - mlir::Type returnType, mlir::ValueRange operands) { - return create(loc, callee, returnType, operands); + mlir::Type returnType, mlir::ValueRange operands, + cir::SideEffect sideEffect = cir::SideEffect::All) { + return create(loc, callee, returnType, operands, sideEffect); } cir::CallOp createCallOp(mlir::Location loc, cir::FuncOp callee, - mlir::ValueRange operands) { + mlir::ValueRange operands, + cir::SideEffect sideEffect = cir::SideEffect::All) { return createCallOp(loc, mlir::SymbolRefAttr::get(callee), - callee.getFunctionType().getReturnType(), operands); + callee.getFunctionType().getReturnType(), operands, + sideEffect); } cir::CallOp createIndirectCallOp(mlir::Location loc, mlir::Value indirectTarget, cir::FuncType funcType, - mlir::ValueRange operands) { + mlir::ValueRange operands, + cir::SideEffect sideEffect) { return create(loc, indirectTarget, funcType.getReturnType(), - operands); + operands, sideEffect); } //===--------------------------------------------------------------------===// diff --git a/clang/include/clang/CIR/Dialect/IR/CIRAttrs.td b/clang/include/clang/CIR/Dialect/IR/CIRAttrs.td index b48f4ed461cc..9e01dde379d7 100644 --- a/clang/include/clang/CIR/Dialect/IR/CIRAttrs.td +++ b/clang/include/clang/CIR/Dialect/IR/CIRAttrs.td @@ -42,6 +42,11 @@ class CIR_TypedAttr traits = []> let assemblyFormat = [{}]; } +class CIR_I32EnumAttr cases> + : I32EnumAttr { + let cppNamespace = "::cir"; +} + class CIRUnitAttr traits = []> : CIR_Attr { let returnType = "bool"; diff --git a/clang/include/clang/CIR/Dialect/IR/CIROps.td b/clang/include/clang/CIR/Dialect/IR/CIROps.td index 4655cebc82ee..852d3aa13114 100644 --- a/clang/include/clang/CIR/Dialect/IR/CIROps.td +++ b/clang/include/clang/CIR/Dialect/IR/CIROps.td @@ -1858,6 +1858,36 @@ def FuncOp : CIR_Op<"func", [ // CallOp //===----------------------------------------------------------------------===// +def CIR_SideEffect : CIR_I32EnumAttr< + "SideEffect", "allowed side effects of a function", [ + I32EnumAttrCase<"All", 1, "all">, + I32EnumAttrCase<"Pure", 2, "pure">, + I32EnumAttrCase<"Const", 3, "const"> + ]> { + let description = [{ + The side effect attribute specifies the possible side effects of the callee + of a call operation. This is an enumeration attribute and all possible + enumerators are: + + - all: The callee can have any side effects. This is the default if no side + effects are explicitly listed. + - pure: The callee may read data from memory, but it cannot write data to + memory. This has the same effect as the GNU C/C++ attribute + `__attribute__((pure))`. + - const: The callee may not read or write data from memory. This has the + same effect as the GNU C/C++ attribute `__attribute__((const))`. + + Examples: + + ```mlir + %2 = cir.call @add(%0, %1) : (!s32i, !s32i) -> !s32i + %2 = cir.call @add(%0, %1) : (!s32i, !s32i) -> !s32i side_effect(pure) + %2 = cir.call @add(%0, %1) : (!s32i, !s32i) -> !s32i side_effect(const) + ``` + }]; + let cppNamespace = "::cir"; +} + class CIR_CallOpBase extra_traits = []> : Op extra_traits = []> // will add in the future. dag commonArgs = (ins OptionalAttr:$callee, - Variadic:$args); + Variadic:$args, + DefaultValuedAttr:$side_effect); } def CallOp : CIR_CallOpBase<"call", [NoRegionArguments]> { @@ -1942,20 +1973,26 @@ def CallOp : CIR_CallOpBase<"call", [NoRegionArguments]> { let builders = [ // Build a call op for a direct call OpBuilder<(ins "mlir::SymbolRefAttr":$callee, "mlir::Type":$resType, - "mlir::ValueRange":$operands), [{ + "mlir::ValueRange":$operands, + CArg<"SideEffect", "SideEffect::All">:$sideEffect), [{ assert(callee && "callee attribute is required for direct call"); $_state.addOperands(operands); $_state.addAttribute("callee", callee); + $_state.addAttribute("side_effect", + SideEffectAttr::get($_builder.getContext(), sideEffect)); if (resType && !isa(resType)) $_state.addTypes(resType); }]>, // Build a call op for an indirect call OpBuilder<(ins "mlir::Value":$calleePtr, "mlir::Type":$resType, - "mlir::ValueRange":$operands), [{ + "mlir::ValueRange":$operands, + CArg<"SideEffect", "SideEffect::All">:$sideEffect), [{ $_state.addOperands(calleePtr); $_state.addOperands(operands); if (resType && !isa(resType)) $_state.addTypes(resType); + $_state.addAttribute("side_effect", + SideEffectAttr::get($_builder.getContext(), sideEffect)); }]>, ]; } diff --git a/clang/include/clang/CIR/Interfaces/CIROpInterfaces.td b/clang/include/clang/CIR/Interfaces/CIROpInterfaces.td index 80d78b11c2ba..203e42f7c575 100644 --- a/clang/include/clang/CIR/Interfaces/CIROpInterfaces.td +++ b/clang/include/clang/CIR/Interfaces/CIROpInterfaces.td @@ -34,6 +34,8 @@ let cppNamespace = "::cir" in { "Return the number of operands, accounts for indirect call or " "exception info", "unsigned", "getNumArgOperands", (ins)>, + InterfaceMethod<"Return the side effects of the call operation", + "cir::SideEffect", "getSideEffect", (ins)>, ]; } diff --git a/clang/include/clang/CIR/MissingFeatures.h b/clang/include/clang/CIR/MissingFeatures.h index 3d120903dea1..45452c5929a3 100644 --- a/clang/include/clang/CIR/MissingFeatures.h +++ b/clang/include/clang/CIR/MissingFeatures.h @@ -95,7 +95,6 @@ struct MissingFeatures { static bool opCallReturn() { return false; } static bool opCallArgEvaluationOrder() { return false; } static bool opCallCallConv() { return false; } - static bool opCallSideEffect() { return false; } static bool opCallNoPrototypeFunc() { return false; } static bool opCallMustTail() { return false; } static bool opCallVirtual() { return false; } diff --git a/clang/lib/CIR/CodeGen/CIRGenCall.cpp b/clang/lib/CIR/CodeGen/CIRGenCall.cpp index af0e6ca822b8..9c9c96604c16 100644 --- a/clang/lib/CIR/CodeGen/CIRGenCall.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenCall.cpp @@ -77,6 +77,35 @@ void CIRGenFunction::emitAggregateStore(mlir::Value value, Address dest) { builder.createStore(*currSrcLoc, value, dest); } +/// Construct the CIR attribute list of a function or call. +void CIRGenModule::constructAttributeList(CIRGenCalleeInfo calleeInfo, + cir::SideEffect &sideEffect) { + assert(!cir::MissingFeatures::opCallCallConv()); + sideEffect = cir::SideEffect::All; + + assert(!cir::MissingFeatures::opCallAttrs()); + + const Decl *targetDecl = calleeInfo.getCalleeDecl().getDecl(); + + if (targetDecl) { + assert(!cir::MissingFeatures::opCallAttrs()); + + // 'const', 'pure' and 'noalias' attributed functions are also nounwind. + if (targetDecl->hasAttr()) { + // gcc specifies that 'const' functions have greater restrictions than + // 'pure' functions, so they also cannot have infinite loops. + sideEffect = cir::SideEffect::Const; + } else if (targetDecl->hasAttr()) { + // gcc specifies that 'pure' functions cannot have infinite loops. + sideEffect = cir::SideEffect::Pure; + } + + assert(!cir::MissingFeatures::opCallAttrs()); + } + + assert(!cir::MissingFeatures::opCallAttrs()); +} + /// Returns the canonical formal type of the given C++ method. static CanQual getFormalType(const CXXMethodDecl *md) { return md->getType() @@ -386,7 +415,8 @@ static cir::CIRCallOpInterface emitCallLikeOp(CIRGenFunction &cgf, mlir::Location callLoc, cir::FuncType indirectFuncTy, mlir::Value indirectFuncVal, cir::FuncOp directFuncOp, - const SmallVectorImpl &cirCallArgs) { + const SmallVectorImpl &cirCallArgs, + cir::SideEffect sideEffect) { CIRGenBuilderTy &builder = cgf.getBuilder(); assert(!cir::MissingFeatures::opCallSurroundingTry()); @@ -397,11 +427,11 @@ emitCallLikeOp(CIRGenFunction &cgf, mlir::Location callLoc, if (indirectFuncTy) { // TODO(cir): Set calling convention for indirect calls. assert(!cir::MissingFeatures::opCallCallConv()); - return builder.createIndirectCallOp(callLoc, indirectFuncVal, - indirectFuncTy, cirCallArgs); + return builder.createIndirectCallOp( + callLoc, indirectFuncVal, indirectFuncTy, cirCallArgs, sideEffect); } - return builder.createCallOp(callLoc, directFuncOp, cirCallArgs); + return builder.createCallOp(callLoc, directFuncOp, cirCallArgs, sideEffect); } const CIRGenFunctionInfo & @@ -513,8 +543,9 @@ RValue CIRGenFunction::emitCall(const CIRGenFunctionInfo &funcInfo, funcName = calleeFuncOp.getName(); assert(!cir::MissingFeatures::opCallCallConv()); - assert(!cir::MissingFeatures::opCallSideEffect()); assert(!cir::MissingFeatures::opCallAttrs()); + cir::SideEffect sideEffect; + cgm.constructAttributeList(callee.getAbstractInfo(), sideEffect); assert(!cir::MissingFeatures::invokeOp()); @@ -538,8 +569,9 @@ RValue CIRGenFunction::emitCall(const CIRGenFunctionInfo &funcInfo, assert(!cir::MissingFeatures::opCallAttrs()); mlir::Location callLoc = loc; - cir::CIRCallOpInterface theCall = emitCallLikeOp( - *this, loc, indirectFuncTy, indirectFuncVal, directFuncOp, cirCallArgs); + cir::CIRCallOpInterface theCall = + emitCallLikeOp(*this, loc, indirectFuncTy, indirectFuncVal, directFuncOp, + cirCallArgs, sideEffect); if (callOp) *callOp = theCall; diff --git a/clang/lib/CIR/CodeGen/CIRGenCall.h b/clang/lib/CIR/CodeGen/CIRGenCall.h index 0353848f3ec0..56c76c51a46d 100644 --- a/clang/lib/CIR/CodeGen/CIRGenCall.h +++ b/clang/lib/CIR/CodeGen/CIRGenCall.h @@ -105,6 +105,12 @@ public: /// callee CIRGenCallee prepareConcreteCallee(CIRGenFunction &cgf) const; + CIRGenCalleeInfo getAbstractInfo() const { + assert(!cir::MissingFeatures::opCallVirtual()); + assert(isOrdinary()); + return abstractInfo; + } + mlir::Operation *getFunctionPointer() const { assert(isOrdinary()); return reinterpret_cast(kindOrFunctionPtr); diff --git a/clang/lib/CIR/CodeGen/CIRGenModule.h b/clang/lib/CIR/CodeGen/CIRGenModule.h index 0ea2d9f9c822..71806e3c5de2 100644 --- a/clang/lib/CIR/CodeGen/CIRGenModule.h +++ b/clang/lib/CIR/CodeGen/CIRGenModule.h @@ -14,6 +14,7 @@ #define LLVM_CLANG_LIB_CIR_CODEGEN_CIRGENMODULE_H #include "CIRGenBuilder.h" +#include "CIRGenCall.h" #include "CIRGenTypeCache.h" #include "CIRGenTypes.h" #include "CIRGenValue.h" @@ -158,6 +159,15 @@ public: const CXXRecordDecl *derivedClass, llvm::iterator_range path); + /// Get the CIR attributes and calling convention to use for a particular + /// function type. + /// + /// \param calleeInfo - The callee information these attributes are being + /// constructed for. If valid, the attributes applied to this decl may + /// contribute to the function attributes and calling convention. + void constructAttributeList(CIRGenCalleeInfo calleeInfo, + cir::SideEffect &sideEffect); + /// Return a constant array for the given string. mlir::Attribute getConstantArrayFromStringLiteral(const StringLiteral *e); diff --git a/clang/lib/CIR/Dialect/IR/CIRDialect.cpp b/clang/lib/CIR/Dialect/IR/CIRDialect.cpp index 3fcb0213b219..16248059c497 100644 --- a/clang/lib/CIR/Dialect/IR/CIRDialect.cpp +++ b/clang/lib/CIR/Dialect/IR/CIRDialect.cpp @@ -92,6 +92,46 @@ Operation *cir::CIRDialect::materializeConstant(mlir::OpBuilder &builder, // Helpers //===----------------------------------------------------------------------===// +// Parses one of the keywords provided in the list `keywords` and returns the +// position of the parsed keyword in the list. If none of the keywords from the +// list is parsed, returns -1. +static int parseOptionalKeywordAlternative(AsmParser &parser, + ArrayRef keywords) { + for (auto en : llvm::enumerate(keywords)) { + if (succeeded(parser.parseOptionalKeyword(en.value()))) + return en.index(); + } + return -1; +} + +namespace { +template struct EnumTraits {}; + +#define REGISTER_ENUM_TYPE(Ty) \ + template <> struct EnumTraits { \ + static llvm::StringRef stringify(cir::Ty value) { \ + return stringify##Ty(value); \ + } \ + static unsigned getMaxEnumVal() { return cir::getMaxEnumValFor##Ty(); } \ + } + +REGISTER_ENUM_TYPE(SideEffect); +} // namespace + +/// Parse an enum from the keyword, return failure if the keyword is not found. +template +static ParseResult parseCIRKeyword(AsmParser &parser, RetTy &result) { + llvm::SmallVector names; + for (unsigned i = 0, e = EnumTraits::getMaxEnumVal(); i <= e; ++i) + names.push_back(EnumTraits::stringify(static_cast(i))); + + int index = parseOptionalKeywordAlternative(parser, names); + if (index == -1) + return failure(); + result = static_cast(index); + return success(); +} + // Check if a region's termination omission is valid and, if so, creates and // inserts the omitted terminator into the region. static LogicalResult ensureRegionTerm(OpAsmParser &parser, Region ®ion, @@ -534,6 +574,18 @@ static mlir::ParseResult parseCallCommon(mlir::OpAsmParser &parser, if (parser.parseRParen()) return mlir::failure(); + if (parser.parseOptionalKeyword("side_effect").succeeded()) { + if (parser.parseLParen().failed()) + return failure(); + cir::SideEffect sideEffect; + if (parseCIRKeyword(parser, sideEffect).failed()) + return failure(); + if (parser.parseRParen().failed()) + return failure(); + auto attr = cir::SideEffectAttr::get(parser.getContext(), sideEffect); + result.addAttribute("side_effect", attr); + } + if (parser.parseOptionalAttrDict(result.attributes)) return ::mlir::failure(); @@ -556,7 +608,8 @@ static mlir::ParseResult parseCallCommon(mlir::OpAsmParser &parser, static void printCallCommon(mlir::Operation *op, mlir::FlatSymbolRefAttr calleeSym, mlir::Value indirectCallee, - mlir::OpAsmPrinter &printer) { + mlir::OpAsmPrinter &printer, + cir::SideEffect sideEffect) { printer << ' '; auto callLikeOp = mlir::cast(op); @@ -572,7 +625,13 @@ static void printCallCommon(mlir::Operation *op, } printer << "(" << ops << ")"; - printer.printOptionalAttrDict(op->getAttrs(), {"callee"}); + if (sideEffect != cir::SideEffect::All) { + printer << " side_effect("; + printer << stringifySideEffect(sideEffect); + printer << ")"; + } + + printer.printOptionalAttrDict(op->getAttrs(), {"callee", "side_effect"}); printer << " : "; printer.printFunctionalType(op->getOperands().getTypes(), @@ -586,7 +645,8 @@ mlir::ParseResult cir::CallOp::parse(mlir::OpAsmParser &parser, void cir::CallOp::print(mlir::OpAsmPrinter &p) { mlir::Value indirectCallee = isIndirect() ? getIndirectCall() : nullptr; - printCallCommon(*this, getCalleeAttr(), indirectCallee, p); + cir::SideEffect sideEffect = getSideEffect(); + printCallCommon(*this, getCalleeAttr(), indirectCallee, p, sideEffect); } static LogicalResult diff --git a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp index a96501ab2c38..b73cb839828e 100644 --- a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp +++ b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp @@ -220,6 +220,39 @@ mlir::Value lowerCirAttrAsValue(mlir::Operation *parentOp, return value; } +void convertSideEffectForCall(mlir::Operation *callOp, + cir::SideEffect sideEffect, + mlir::LLVM::MemoryEffectsAttr &memoryEffect, + bool &noUnwind, bool &willReturn) { + using mlir::LLVM::ModRefInfo; + + switch (sideEffect) { + case cir::SideEffect::All: + memoryEffect = {}; + noUnwind = false; + willReturn = false; + break; + + case cir::SideEffect::Pure: + memoryEffect = mlir::LLVM::MemoryEffectsAttr::get( + callOp->getContext(), /*other=*/ModRefInfo::Ref, + /*argMem=*/ModRefInfo::Ref, + /*inaccessibleMem=*/ModRefInfo::Ref); + noUnwind = true; + willReturn = true; + break; + + case cir::SideEffect::Const: + memoryEffect = mlir::LLVM::MemoryEffectsAttr::get( + callOp->getContext(), /*other=*/ModRefInfo::NoModRef, + /*argMem=*/ModRefInfo::NoModRef, + /*inaccessibleMem=*/ModRefInfo::NoModRef); + noUnwind = true; + willReturn = true; + break; + } +} + /// IntAttr visitor. mlir::Value CIRAttrToValue::visitCirAttr(cir::IntAttr intAttr) { mlir::Location loc = parentOp->getLoc(); @@ -745,12 +778,18 @@ rewriteCallOrInvoke(mlir::Operation *op, mlir::ValueRange callOperands, mlir::FlatSymbolRefAttr calleeAttr) { llvm::SmallVector llvmResults; mlir::ValueTypeRange cirResults = op->getResultTypes(); + auto call = cast(op); if (converter->convertTypes(cirResults, llvmResults).failed()) return mlir::failure(); assert(!cir::MissingFeatures::opCallCallConv()); - assert(!cir::MissingFeatures::opCallSideEffect()); + + mlir::LLVM::MemoryEffectsAttr memoryEffects; + bool noUnwind = false; + bool willReturn = false; + convertSideEffectForCall(op, call.getSideEffect(), memoryEffects, noUnwind, + willReturn); mlir::LLVM::LLVMFunctionType llvmFnTy; if (calleeAttr) { // direct call @@ -775,10 +814,14 @@ rewriteCallOrInvoke(mlir::Operation *op, mlir::ValueRange callOperands, assert(!cir::MissingFeatures::opCallLandingPad()); assert(!cir::MissingFeatures::opCallContinueBlock()); assert(!cir::MissingFeatures::opCallCallConv()); - assert(!cir::MissingFeatures::opCallSideEffect()); - rewriter.replaceOpWithNewOp(op, llvmFnTy, calleeAttr, - callOperands); + auto newOp = rewriter.replaceOpWithNewOp( + op, llvmFnTy, calleeAttr, callOperands); + if (memoryEffects) + newOp.setMemoryEffectsAttr(memoryEffects); + newOp.setNoUnwind(noUnwind); + newOp.setWillReturn(willReturn); + return mlir::success(); } diff --git a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.h b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.h index a80c66ac1abf..ae7247332c66 100644 --- a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.h +++ b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.h @@ -29,6 +29,11 @@ mlir::Value lowerCirAttrAsValue(mlir::Operation *parentOp, mlir::Attribute attr, mlir::LLVM::Linkage convertLinkage(cir::GlobalLinkageKind linkage); +void convertSideEffectForCall(mlir::Operation *callOp, + cir::SideEffect sideEffect, + mlir::LLVM::MemoryEffectsAttr &memoryEffect, + bool &noUnwind, bool &willReturn); + class CIRToLLVMAssumeOpLowering : public mlir::OpConversionPattern { public: diff --git a/clang/test/CIR/CodeGen/call.c b/clang/test/CIR/CodeGen/call.c index 13f3c5a21ceb..f6aa41df7439 100644 --- a/clang/test/CIR/CodeGen/call.c +++ b/clang/test/CIR/CodeGen/call.c @@ -109,3 +109,29 @@ void f9() { // OGCG-NEXT: store i64 %[[RET]], ptr %[[SLOT]], align 4 // OGCG-NEXT: %[[ARG:.+]] = load i64, ptr %[[SLOT]], align 4 // OGCG-NEXT: call void @f1(i64 %[[ARG]]) + +__attribute__((pure)) int f10(int); +__attribute__((const)) int f11(int); +int f12(void) { + return f10(1) + f11(2); +} + +// CIR-LABEL: cir.func @f12() -> !s32i +// CIR: %[[A:.+]] = cir.const #cir.int<1> : !s32i +// CIR-NEXT: %{{.+}} = cir.call @f10(%[[A]]) side_effect(pure) : (!s32i) -> !s32i +// CIR-NEXT: %[[B:.+]] = cir.const #cir.int<2> : !s32i +// CIR-NEXT: %{{.+}} = cir.call @f11(%[[B]]) side_effect(const) : (!s32i) -> !s32i + +// LLVM-LABEL: define i32 @f12() +// LLVM: %{{.+}} = call i32 @f10(i32 1) #[[ATTR0:.+]] +// LLVM-NEXT: %{{.+}} = call i32 @f11(i32 2) #[[ATTR1:.+]] + +// OGCG-LABEL: define dso_local i32 @f12() +// OGCG: %{{.+}} = call i32 @f10(i32 noundef 1) #[[ATTR0:.+]] +// OGCG-NEXT: %{{.+}} = call i32 @f11(i32 noundef 2) #[[ATTR1:.+]] + +// LLVM: attributes #[[ATTR0]] = { nounwind willreturn memory(read, errnomem: none) } +// LLVM: attributes #[[ATTR1]] = { nounwind willreturn memory(none) } + +// OGCG: attributes #[[ATTR0]] = { nounwind willreturn memory(read) } +// OGCG: attributes #[[ATTR1]] = { nounwind willreturn memory(none) } diff --git a/clang/test/CIR/IR/call.cir b/clang/test/CIR/IR/call.cir index e35c201b6ed4..5f0916775479 100644 --- a/clang/test/CIR/IR/call.cir +++ b/clang/test/CIR/IR/call.cir @@ -8,11 +8,15 @@ cir.func @f1() cir.func @f2() { cir.call @f1() : () -> () + cir.call @f1() side_effect(pure) : () -> () + cir.call @f1() side_effect(const) : () -> () cir.return } // CHECK: cir.func @f2() { // CHECK-NEXT: cir.call @f1() : () -> () +// CHECK-NEXT: cir.call @f1() side_effect(pure) : () -> () +// CHECK-NEXT: cir.call @f1() side_effect(const) : () -> () // CHECK-NEXT: cir.return // CHECK-NEXT: } -- cgit v1.2.3 From faf9295f4e3a23a972d29e2be85052beef409d08 Mon Sep 17 00:00:00 2001 From: MingYan <99472920+NexMing@users.noreply.github.com> Date: Thu, 19 Jun 2025 10:15:31 +0800 Subject: [RISCV] Fix a bug where AVL is the last MI in MBB. (#144668) When `AVL` is the last MI, `std::next(II)` equals `MBB.end()`, and calling `II->getParent()` at that point will cause an error. --------- Co-authored-by: yanming --- llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp | 15 +++++----- .../CodeGen/RISCV/rvv/vsetvli-insert-crossbb.mir | 32 ++++++++++++++++++++++ 2 files changed, 40 insertions(+), 7 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp index 9a513891b765..78d64ea67324 100644 --- a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp +++ b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp @@ -1119,25 +1119,26 @@ void RISCVInsertVSETVLI::insertVSETVLI(MachineBasicBlock &MBB, LIS->InsertMachineInstrInMaps(*MI); LiveInterval &LI = LIS->getInterval(AVLReg); SlotIndex SI = LIS->getInstructionIndex(*MI).getRegSlot(); + const VNInfo *CurVNI = Info.getAVLVNInfo(); // If the AVL value isn't live at MI, do a quick check to see if it's easily // extendable. Otherwise, we need to copy it. - if (LI.getVNInfoBefore(SI) != Info.getAVLVNInfo()) { + if (LI.getVNInfoBefore(SI) != CurVNI) { if (!LI.liveAt(SI) && LI.containsOneValue()) LIS->extendToIndices(LI, SI); else { Register AVLCopyReg = MRI->createVirtualRegister(&RISCV::GPRNoX0RegClass); + MachineBasicBlock *MBB = LIS->getMBBFromIndex(CurVNI->def); MachineBasicBlock::iterator II; - if (Info.getAVLVNInfo()->isPHIDef()) - II = LIS->getMBBFromIndex(Info.getAVLVNInfo()->def)->getFirstNonPHI(); + if (CurVNI->isPHIDef()) + II = MBB->getFirstNonPHI(); else { - II = LIS->getInstructionFromIndex(Info.getAVLVNInfo()->def); + II = LIS->getInstructionFromIndex(CurVNI->def); II = std::next(II); } assert(II.isValid()); - auto AVLCopy = - BuildMI(*II->getParent(), II, DL, TII->get(RISCV::COPY), AVLCopyReg) - .addReg(AVLReg); + auto AVLCopy = BuildMI(*MBB, II, DL, TII->get(RISCV::COPY), AVLCopyReg) + .addReg(AVLReg); LIS->InsertMachineInstrInMaps(*AVLCopy); MI->getOperand(1).setReg(AVLCopyReg); LIS->createAndComputeVirtRegInterval(AVLCopyReg); diff --git a/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.mir b/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.mir index 140875c4b24a..e09fc1828fec 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.mir +++ b/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.mir @@ -142,6 +142,10 @@ ret void } + define void @avl_is_last_instr() { + ret void + } + declare i32 @llvm.vector.reduce.add.v4i32(<4 x i32>) declare @llvm.riscv.vadd.nxv1i64.nxv1i64.i64(, , , i64) #1 @@ -1099,3 +1103,31 @@ body: | renamable $v10m2 = PseudoVADD_VV_M2 undef renamable $v10m2, %v, %v, -1, 5, 0 renamable $v8m2 = PseudoVADD_VV_M2 undef renamable $v8m2, killed renamable $v10m2, killed %v, %outvl:gprnox0, 5, 0 PseudoRET implicit $v8m2 +... +--- +name: avl_is_last_instr +tracksRegLiveness: true +body: | + ; CHECK-LABEL: name: avl_is_last_instr + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.1(0x80000000) + ; CHECK-NEXT: liveins: $x10 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %avl:gprnox0 = COPY $x10 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gprnox0 = COPY %avl + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: dead %avl:gprnox0 = ADDI %avl, -1 + ; CHECK-NEXT: dead $x0 = PseudoVSETIVLI 1, 192 /* e8, m1, ta, ma */, implicit-def $vl, implicit-def $vtype + ; CHECK-NEXT: $v8 = PseudoVMV_S_X undef renamable $v8, $x0, 1, 3 /* e8 */, implicit $vl, implicit $vtype + ; CHECK-NEXT: dead $x0 = PseudoVSETVLI [[COPY]], 192 /* e8, m1, ta, ma */, implicit-def $vl, implicit-def $vtype + ; CHECK-NEXT: $v8 = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, $noreg, 3 /* e8 */, 3 /* ta, ma */, implicit $vl, implicit $vtype + bb.0: + liveins: $x10 + %avl:gprnox0 = COPY $x10 + + bb.1: + %vl:gprnox0 = PseudoVSETVLI %avl:gprnox0, 192, implicit-def dead $vl, implicit-def dead $vtype + %avl:gprnox0 = ADDI %avl:gprnox0, -1 + $v8 = PseudoVMV_S_X undef renamable $v8, $x0, 1, 3 + $v8 = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, %vl:gprnox0, 3, 3 -- cgit v1.2.3 From bfee625821c07d9a05b48e4a8b0f3d73c1233107 Mon Sep 17 00:00:00 2001 From: Durgadoss R Date: Thu, 19 Jun 2025 07:49:08 +0530 Subject: [NVPTX] Attach Range attr to setmaxnreg and fence intrinsics (#144120) This patch attaches the range attribute to the setmaxnreg and fence.proxy.tensormap.* intrinsics. The range checks are now handled generically in the Verifier. So, this patch removes the per-intrinsic error-handling for range-checks from the Verifier. This patch also adds more coverage tests for these cases. Signed-off-by: Durgadoss R --- llvm/include/llvm/IR/IntrinsicsNVVM.td | 8 ++++++-- llvm/lib/IR/Verifier.cpp | 10 ---------- llvm/test/Verifier/NVPTX/fence-proxy.tensormap.ll | 17 +++++++++++++++++ llvm/test/Verifier/NVPTX/setmaxnreg.ll | 4 +++- 4 files changed, 26 insertions(+), 13 deletions(-) create mode 100644 llvm/test/Verifier/NVPTX/fence-proxy.tensormap.ll diff --git a/llvm/include/llvm/IR/IntrinsicsNVVM.td b/llvm/include/llvm/IR/IntrinsicsNVVM.td index 4efdff71c016..410a0dea2bf5 100644 --- a/llvm/include/llvm/IR/IntrinsicsNVVM.td +++ b/llvm/include/llvm/IR/IntrinsicsNVVM.td @@ -1341,9 +1341,11 @@ foreach scope = ["cta", "cluster", "gpu", "sys"] in { Intrinsic<[], [], [IntrNoCallback], "llvm.nvvm.fence.proxy.tensormap_generic.release." # scope>; + // The imm-arg 'size' can only be 128. def int_nvvm_fence_proxy_tensormap_generic_acquire_ # scope : Intrinsic<[], [llvm_ptr_ty, llvm_i32_ty], - [IntrNoCallback, IntrArgMemOnly, ImmArg>], + [IntrNoCallback, IntrArgMemOnly, ImmArg>, + Range, 128, 129>], "llvm.nvvm.fence.proxy.tensormap_generic.acquire." # scope>; } @@ -1989,10 +1991,12 @@ def int_nvvm_is_explicit_cluster "llvm.nvvm.is_explicit_cluster">; // Setmaxnreg inc/dec intrinsics +// The imm-arg should be in the range: 24 <= val <= 256 foreach op = ["dec", "inc"] in def int_nvvm_setmaxnreg_ # op # _sync_aligned_u32 : DefaultAttrsIntrinsic<[], [llvm_i32_ty], - [IntrConvergent, IntrNoMem, IntrHasSideEffects, ImmArg>]>; + [IntrConvergent, IntrNoMem, IntrHasSideEffects, + ImmArg>, Range, 24, 257>]>; // Exit def int_nvvm_exit : NVVMBuiltin, diff --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp index 1f1041b25973..f0a4d7b6a4c1 100644 --- a/llvm/lib/IR/Verifier.cpp +++ b/llvm/lib/IR/Verifier.cpp @@ -6557,8 +6557,6 @@ void Verifier::visitIntrinsicCall(Intrinsic::ID ID, CallBase &Call) { unsigned RegCount = cast(V)->getZExtValue(); Check(RegCount % 8 == 0, "reg_count argument to nvvm.setmaxnreg must be in multiples of 8"); - Check((RegCount >= 24 && RegCount <= 256), - "reg_count argument to nvvm.setmaxnreg must be within [24, 256]"); break; } case Intrinsic::experimental_convergence_entry: @@ -6605,14 +6603,6 @@ void Verifier::visitIntrinsicCall(Intrinsic::ID ID, CallBase &Call) { "llvm.threadlocal.address operand isThreadLocal() must be true"); break; } - case Intrinsic::nvvm_fence_proxy_tensormap_generic_acquire_cta: - case Intrinsic::nvvm_fence_proxy_tensormap_generic_acquire_cluster: - case Intrinsic::nvvm_fence_proxy_tensormap_generic_acquire_gpu: - case Intrinsic::nvvm_fence_proxy_tensormap_generic_acquire_sys: { - unsigned size = cast(Call.getArgOperand(1))->getZExtValue(); - Check(size == 128, " The only supported value for size operand is 128"); - break; - } }; // Verify that there aren't any unmediated control transfers between funclets. diff --git a/llvm/test/Verifier/NVPTX/fence-proxy.tensormap.ll b/llvm/test/Verifier/NVPTX/fence-proxy.tensormap.ll new file mode 100644 index 000000000000..4fa7a7ae7100 --- /dev/null +++ b/llvm/test/Verifier/NVPTX/fence-proxy.tensormap.ll @@ -0,0 +1,17 @@ +; RUN: not llvm-as %s -o /dev/null 2>&1 | FileCheck %s + +define void @test_fence_proxy_tensormap_generic_acquire(ptr addrspace(0) %addr) { + ; CHECK: immarg value 127 out of range [128, 129) + call void @llvm.nvvm.fence.proxy.tensormap_generic.acquire.cta(ptr addrspace(0) %addr, i32 127); + + ; CHECK: immarg value 129 out of range [128, 129) + call void @llvm.nvvm.fence.proxy.tensormap_generic.acquire.cluster(ptr addrspace(0) %addr, i32 129); + + ; CHECK: immarg value 127 out of range [128, 129) + call void @llvm.nvvm.fence.proxy.tensormap_generic.acquire.gpu(ptr addrspace(0) %addr, i32 127); + + ; CHECK: immarg value 129 out of range [128, 129) + call void @llvm.nvvm.fence.proxy.tensormap_generic.acquire.sys(ptr addrspace(0) %addr, i32 129); + + ret void +} diff --git a/llvm/test/Verifier/NVPTX/setmaxnreg.ll b/llvm/test/Verifier/NVPTX/setmaxnreg.ll index 8999e4ffa667..1afebeab4742 100644 --- a/llvm/test/Verifier/NVPTX/setmaxnreg.ll +++ b/llvm/test/Verifier/NVPTX/setmaxnreg.ll @@ -7,8 +7,10 @@ define void @test_set_maxn_reg() { ; CHECK: reg_count argument to nvvm.setmaxnreg must be in multiples of 8 call void @llvm.nvvm.setmaxnreg.inc.sync.aligned.u32(i32 95) - ; CHECK: reg_count argument to nvvm.setmaxnreg must be within [24, 256] + ; CHECK: immarg value 16 out of range [24, 257) call void @llvm.nvvm.setmaxnreg.dec.sync.aligned.u32(i32 16) + ; CHECK: immarg value 264 out of range [24, 257) + call void @llvm.nvvm.setmaxnreg.dec.sync.aligned.u32(i32 264) ret void } -- cgit v1.2.3 From 5875fafdc547889fb089c943a881a9ab6d8a23c0 Mon Sep 17 00:00:00 2001 From: Phoebe Wang Date: Thu, 19 Jun 2025 10:30:47 +0800 Subject: [X86] Remove CLDEMOTE from Alderlake and later hybrid processors (#144662) SDM doesn't list any hybrid processors in this feature. Besides, physical machine also reports not supported. --- clang/test/Preprocessor/predefined-arch-macros.c | 4 ++-- llvm/lib/Target/X86/X86.td | 2 +- llvm/lib/TargetParser/X86TargetParser.cpp | 6 +++--- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/clang/test/Preprocessor/predefined-arch-macros.c b/clang/test/Preprocessor/predefined-arch-macros.c index 2d17891071aa..9dfeddbd4d5a 100644 --- a/clang/test/Preprocessor/predefined-arch-macros.c +++ b/clang/test/Preprocessor/predefined-arch-macros.c @@ -2102,7 +2102,7 @@ // CHECK_ADL_M32: #define __AVX__ 1 // CHECK_ADL_M32: #define __BMI2__ 1 // CHECK_ADL_M32: #define __BMI__ 1 -// CHECK_ADL_M32: #define __CLDEMOTE__ 1 +// CHECK_ADL_M32-NOT: #define __CLDEMOTE__ 1 // CHECK_ADL_M32: #define __CLFLUSHOPT__ 1 // CHECK_ADL_M32: #define __CLWB__ 1 // CHECK_ADL_M32: #define __F16C__ 1 @@ -2173,7 +2173,7 @@ // CHECK_ADL_M64: #define __AVX__ 1 // CHECK_ADL_M64: #define __BMI2__ 1 // CHECK_ADL_M64: #define __BMI__ 1 -// CHECK_ADL_M64: #define __CLDEMOTE__ 1 +// CHECK_ADL_M64-NOT: #define __CLDEMOTE__ 1 // CHECK_ADL_M64: #define __CLFLUSHOPT__ 1 // CHECK_ADL_M64: #define __CLWB__ 1 // CHECK_ADL_M64: #define __F16C__ 1 diff --git a/llvm/lib/Target/X86/X86.td b/llvm/lib/Target/X86/X86.td index 2d635835e3ff..b09891652ad9 100644 --- a/llvm/lib/Target/X86/X86.td +++ b/llvm/lib/Target/X86/X86.td @@ -1284,7 +1284,6 @@ def ProcessorFeatures { FeatureAVXVNNI, FeaturePKU, FeatureHRESET, - FeatureCLDEMOTE, FeatureMOVDIRI, FeatureMOVDIR64B, FeatureWAITPKG]; @@ -1311,6 +1310,7 @@ def ProcessorFeatures { FeatureAVXNECONVERT, FeatureENQCMD, FeatureUINTR, + FeatureCLDEMOTE, FeatureAVXVNNIINT8]; list SRFFeatures = !listconcat(ADLFeatures, SRFAdditionalFeatures); diff --git a/llvm/lib/TargetParser/X86TargetParser.cpp b/llvm/lib/TargetParser/X86TargetParser.cpp index 21d05ee389e6..4947b05cd037 100644 --- a/llvm/lib/TargetParser/X86TargetParser.cpp +++ b/llvm/lib/TargetParser/X86TargetParser.cpp @@ -165,11 +165,11 @@ constexpr FeatureBitset FeaturesAlderlake = FeaturesTremont | FeatureADX | FeatureBMI | FeatureBMI2 | FeatureF16C | FeatureFMA | FeatureINVPCID | FeatureLZCNT | FeaturePCONFIG | FeaturePKU | FeatureSERIALIZE | FeatureSHSTK | FeatureVAES | FeatureVPCLMULQDQ | - FeatureCLDEMOTE | FeatureMOVDIR64B | FeatureMOVDIRI | FeatureWAITPKG | - FeatureAVXVNNI | FeatureHRESET | FeatureWIDEKL; + FeatureMOVDIR64B | FeatureMOVDIRI | FeatureWAITPKG | FeatureAVXVNNI | + FeatureHRESET | FeatureWIDEKL; constexpr FeatureBitset FeaturesSierraforest = FeaturesAlderlake | FeatureCMPCCXADD | FeatureAVXIFMA | FeatureUINTR | - FeatureENQCMD | FeatureAVXNECONVERT | FeatureAVXVNNIINT8; + FeatureCLDEMOTE | FeatureENQCMD | FeatureAVXNECONVERT | FeatureAVXVNNIINT8; constexpr FeatureBitset FeaturesArrowlakeS = FeaturesSierraforest | FeatureAVXVNNIINT16 | FeatureSHA512 | FeatureSM3 | FeatureSM4; constexpr FeatureBitset FeaturesPantherlake = -- cgit v1.2.3 From 351303c28e8feb85c93d8e9480f534653b032735 Mon Sep 17 00:00:00 2001 From: Han-Chung Wang Date: Wed, 18 Jun 2025 20:07:43 -0700 Subject: [mlir][docs] Fix broken links to Traits documentation. (#144820) --- mlir/include/mlir/Dialect/MemRef/IR/MemRefOps.td | 2 +- mlir/include/mlir/IR/BuiltinOps.td | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/mlir/include/mlir/Dialect/MemRef/IR/MemRefOps.td b/mlir/include/mlir/Dialect/MemRef/IR/MemRefOps.td index 77e3074661ab..481b14cdb462 100644 --- a/mlir/include/mlir/Dialect/MemRef/IR/MemRefOps.td +++ b/mlir/include/mlir/Dialect/MemRef/IR/MemRefOps.td @@ -339,7 +339,7 @@ def MemRef_AllocaOp : AllocLikeOp<"alloca", AutomaticAllocationScopeResource,[ The `alloca` operation allocates memory on the stack, to be automatically released when control transfers back from the region of its closest surrounding operation with an - [`AutomaticAllocationScope`](../Traits.md/#automaticallocationscope) trait. + [`AutomaticAllocationScope`](../Traits/#automaticallocationscope) trait. The amount of memory allocated is specified by its memref and additional operands. For example: diff --git a/mlir/include/mlir/IR/BuiltinOps.td b/mlir/include/mlir/IR/BuiltinOps.td index 56edd7519cd6..cdc09afe0b67 100644 --- a/mlir/include/mlir/IR/BuiltinOps.td +++ b/mlir/include/mlir/IR/BuiltinOps.td @@ -40,7 +40,7 @@ def ModuleOp : Builtin_Op<"module", [ [graph region](../LangRef.md#control-flow-and-ssacfg-regions) containing a single block which can contain any operations and does not have a terminator. Operations within this region cannot implicitly capture values defined outside the module, - i.e. Modules are [IsolatedFromAbove](../Traits.md#isolatedfromabove). Modules have + i.e. Modules are [IsolatedFromAbove](../Traits#isolatedfromabove). Modules have an optional [symbol name](../SymbolsAndSymbolTables.md) which can be used to refer to them in operations. -- cgit v1.2.3 From 7b989ade35a43357f9152198ee2c76899df9a56d Mon Sep 17 00:00:00 2001 From: Med Ismail Bennani Date: Wed, 18 Jun 2025 22:49:21 -0700 Subject: [lldb/crashlog] Make interactive mode the new default (#144839) This patch makes interactive mode as the default when using the crashlog command. It replaces the existing `-i|--interactive` flag with a new `-m|--mode` option, that can either be `interactive` or `batch`. By default, when the option is not explicitely set by the user, the interactive mode is selected, however, lldb will fallback to batch mode if the command interpreter is not interactive or if stdout is not a tty. This also adds some railguards to prevent users from using interactive only options with the batch mode and updates the tests accordingly. rdar://97801509 Differential Revision: https://reviews.llvm.org/D141658 Signed-off-by: Med Ismail Bennani --- lldb/examples/python/crashlog.py | 126 +++++++++++++-------- .../Python/Crashlog/altered_threadState.test | 2 +- .../ScriptInterpreter/Python/Crashlog/json.test | 6 +- .../Python/Crashlog/no_threadState.test | 2 +- .../skipped_status_interactive_crashlog.test | 2 +- .../ScriptInterpreter/Python/Crashlog/text.test | 2 +- 6 files changed, 85 insertions(+), 55 deletions(-) diff --git a/lldb/examples/python/crashlog.py b/lldb/examples/python/crashlog.py index 6615c3353ffe..5f07cda2892a 100755 --- a/lldb/examples/python/crashlog.py +++ b/lldb/examples/python/crashlog.py @@ -31,6 +31,7 @@ import argparse import concurrent.futures import contextlib import datetime +import enum import json import os import platform @@ -45,7 +46,6 @@ import threading import time import uuid - print_lock = threading.RLock() try: @@ -1582,9 +1582,12 @@ def load_crashlog_in_scripted_process(debugger, crashlog_path, options, result): debugger.RunCommandInterpreter(True, False, run_options, 0, False, True) -def CreateSymbolicateCrashLogOptions( - command_name, description, add_interactive_options -): +class CrashLogLoadingMode(str, enum.Enum): + batch = "batch" + interactive = "interactive" + + +def CreateSymbolicateCrashLogOptions(command_name, description): usage = "crashlog [options] [FILE ...]" arg_parser = argparse.ArgumentParser( description=description, @@ -1600,6 +1603,12 @@ def CreateSymbolicateCrashLogOptions( help="crash report(s) to symbolicate", ) + arg_parser.add_argument( + "-m", + "--mode", + choices=[mode.value for mode in CrashLogLoadingMode], + help="change how the symbolicated process and threads are displayed to the user (default: interactive)", + ) arg_parser.add_argument( "--version", "-V", @@ -1736,36 +1745,35 @@ def CreateSymbolicateCrashLogOptions( help=argparse.SUPPRESS, default=False, ) - if add_interactive_options: - arg_parser.add_argument( - "-i", - "--interactive", - action="store_true", - help="parse a crash log and load it in a ScriptedProcess", - default=False, - ) - arg_parser.add_argument( - "-b", - "--batch", - action="store_true", - help="dump symbolicated stackframes without creating a debug session", - default=True, - ) - arg_parser.add_argument( - "--target", - "-t", - dest="target_path", - help="the target binary path that should be used for interactive crashlog (optional)", - default=None, - ) - arg_parser.add_argument( - "--skip-status", - "-s", - dest="skip_status", - action="store_true", - help="prevent the interactive crashlog to dump the process status and thread backtrace at launch", - default=False, - ) + arg_parser.add_argument( + "--target", + "-t", + dest="target_path", + help="the target binary path that should be used for interactive crashlog (optional)", + default=None, + ) + arg_parser.add_argument( + "--skip-status", + "-s", + dest="skip_status", + action="store_true", + help="prevent the interactive crashlog to dump the process status and thread backtrace at launch", + default=False, + ) + legacy_group = arg_parser.add_mutually_exclusive_group() + legacy_group.add_argument( + "-i", + "--interactive", + action="store_true", + help=argparse.SUPPRESS, + ) + legacy_group.add_argument( + "-b", + "--batch", + action="store_true", + help=argparse.SUPPRESS, + ) + return arg_parser @@ -1778,7 +1786,7 @@ for use at the LLDB command line. After a crash log has been parsed and symbolic created that has all of the shared libraries loaded at the load addresses found in the crash log file. This allows you to explore the program as if it were stopped at the locations described in the crash log and functions can be disassembled and lookups can be performed using the addresses found in the crash log.""" - return CreateSymbolicateCrashLogOptions("crashlog", description, True) + return CreateSymbolicateCrashLogOptions("crashlog", description) def SymbolicateCrashLogs(debugger, command_args, result, is_command): @@ -1794,8 +1802,35 @@ def SymbolicateCrashLogs(debugger, command_args, result, is_command): result.SetError(str(e)) return + # To avoid breaking existing users, we should keep supporting legacy flags + # even if we don't use them / advertise them anymore. + if not options.mode: + if options.batch: + options.mode = CrashLogLoadingMode.batch + else: + options.mode = CrashLogLoadingMode.interactive + + if options.mode != CrashLogLoadingMode.interactive and ( + options.target_path or options.skip_status + ): + print( + "Target path (-t) and skipping process status (-s) options can only used in interactive mode (-m=interactive)." + ) + print("Aborting symbolication.") + arg_parser.print_help() + return + + if options.version: + print(debugger.GetVersionString()) + return + + if options.debug: + print("command_args = %s" % command_args) + print("options", options) + print("args", options.reports) + # Interactive mode requires running the crashlog command from inside lldb. - if options.interactive and not is_command: + if options.mode == CrashLogLoadingMode.interactive and not is_command: lldb_exec = ( subprocess.check_output(["/usr/bin/xcrun", "-f", "lldb"]) .decode("utf-8") @@ -1821,31 +1856,26 @@ def SymbolicateCrashLogs(debugger, command_args, result, is_command): print(debugger.GetVersionString()) return - if options.debug: - print("command_args = %s" % command_args) - print("options", options) - print("args", options.reports) - if options.debug_delay > 0: print("Waiting %u seconds for debugger to attach..." % options.debug_delay) time.sleep(options.debug_delay) error = lldb.SBError() def should_run_in_interactive_mode(options, ci): - if options.interactive: - return True - elif options.batch: + if options.mode == CrashLogLoadingMode.batch: return False - # elif ci and ci.IsInteractive(): - # return True + elif options.mode == CrashLogLoadingMode.interactive or ( + ci and ci.IsInteractive() + ): + return True else: - return False + return sys.stdout.isatty() ci = debugger.GetCommandInterpreter() if options.reports: for crashlog_file in options.reports: - crashlog_path = os.path.expanduser(crashlog_file) + crashlog_path = os.path.normpath(os.path.expanduser(crashlog_file)) if not os.path.exists(crashlog_path): raise FileNotFoundError( "crashlog file %s does not exist" % crashlog_path diff --git a/lldb/test/Shell/ScriptInterpreter/Python/Crashlog/altered_threadState.test b/lldb/test/Shell/ScriptInterpreter/Python/Crashlog/altered_threadState.test index 5a946a38b195..d925324822de 100644 --- a/lldb/test/Shell/ScriptInterpreter/Python/Crashlog/altered_threadState.test +++ b/lldb/test/Shell/ScriptInterpreter/Python/Crashlog/altered_threadState.test @@ -1,7 +1,7 @@ # RUN: %clang_host -g %S/Inputs/test.c -o %t.out # RUN: cp %S/Inputs/altered_threadState.crash %t.crash # RUN: %python %S/patch-crashlog.py --binary %t.out --crashlog %t.crash --offsets '{"main":20, "bar":9, "foo":16}' -# RUN: %lldb %t.out -o 'command script import lldb.macosx.crashlog' -o 'crashlog %t.crash' 2>&1 | FileCheck %s +# RUN: %lldb %t.out -o 'command script import lldb.macosx.crashlog' -o 'crashlog -b %t.crash' 2>&1 | FileCheck %s # CHECK: "crashlog" {{.*}} commands have been installed, use the "--help" options on these commands diff --git a/lldb/test/Shell/ScriptInterpreter/Python/Crashlog/json.test b/lldb/test/Shell/ScriptInterpreter/Python/Crashlog/json.test index c2e23e82be7f..d5c6d915316e 100644 --- a/lldb/test/Shell/ScriptInterpreter/Python/Crashlog/json.test +++ b/lldb/test/Shell/ScriptInterpreter/Python/Crashlog/json.test @@ -2,12 +2,12 @@ # RUN: cp %S/Inputs/a.out.ips %t.crash # RUN: %python %S/patch-crashlog.py --binary %t.out --crashlog %t.crash --offsets '{"main":20, "bar":9, "foo":16}' --json -# RUN: %lldb %t.out -o 'command script import lldb.macosx.crashlog' -o 'crashlog %t.crash' 2>&1 | FileCheck %s -# RUN: %lldb %t.out -o 'command script import lldb.macosx.crashlog' -o 'crashlog -c %t.crash' 2>&1 | FileCheck %s +# RUN: %lldb %t.out -o 'command script import lldb.macosx.crashlog' -o 'crashlog --mode batch %t.crash' 2>&1 | FileCheck %s +# RUN: %lldb %t.out -o 'command script import lldb.macosx.crashlog' -o 'crashlog --mode batch -c %t.crash' 2>&1 | FileCheck %s # RUN: cp %S/Inputs/a.out.ips %t.nometadata.crash # RUN: %python %S/patch-crashlog.py --binary %t.out --crashlog %t.nometadata.crash --offsets '{"main":20, "bar":9, "foo":16}' --json --no-metadata -# RUN: %lldb %t.out -o 'command script import lldb.macosx.crashlog' -o 'crashlog %t.nometadata.crash' 2>&1 | FileCheck %s +# RUN: %lldb %t.out -o 'command script import lldb.macosx.crashlog' -o 'crashlog --mode batch %t.nometadata.crash' 2>&1 | FileCheck %s # CHECK: "crashlog" {{.*}} commands have been installed, use the "--help" options on these commands diff --git a/lldb/test/Shell/ScriptInterpreter/Python/Crashlog/no_threadState.test b/lldb/test/Shell/ScriptInterpreter/Python/Crashlog/no_threadState.test index 5b5cef40716c..2e4b46c8c240 100644 --- a/lldb/test/Shell/ScriptInterpreter/Python/Crashlog/no_threadState.test +++ b/lldb/test/Shell/ScriptInterpreter/Python/Crashlog/no_threadState.test @@ -2,7 +2,7 @@ # RUN: cp %S/Inputs/no_threadState.ips %t.crash # RUN: %python %S/patch-crashlog.py --binary %t.out --crashlog %t.crash --offsets '{"main":20, "bar":9, "foo":16}' --json -# RUN: %lldb %t.out -o 'command script import lldb.macosx.crashlog' -o 'crashlog %t.crash' 2>&1 | FileCheck %s +# RUN: %lldb %t.out -o 'command script import lldb.macosx.crashlog' -o 'crashlog --mode batch %t.crash' 2>&1 | FileCheck %s # CHECK: "crashlog" {{.*}} commands have been installed, use the "--help" options on these commands diff --git a/lldb/test/Shell/ScriptInterpreter/Python/Crashlog/skipped_status_interactive_crashlog.test b/lldb/test/Shell/ScriptInterpreter/Python/Crashlog/skipped_status_interactive_crashlog.test index 64cd0904371a..52a185b8cf76 100644 --- a/lldb/test/Shell/ScriptInterpreter/Python/Crashlog/skipped_status_interactive_crashlog.test +++ b/lldb/test/Shell/ScriptInterpreter/Python/Crashlog/skipped_status_interactive_crashlog.test @@ -3,7 +3,7 @@ # RUN: mkdir -p %t.dir # RUN: yaml2obj %S/Inputs/interactive_crashlog/multithread-test.yaml > %t.dir/multithread-test # RUN: %lldb -b -o 'command script import lldb.macosx.crashlog' \ -# RUN: -o 'crashlog -a -i -s -t %t.dir/multithread-test %S/Inputs/interactive_crashlog/multithread-test.ips' \ +# RUN: -o 'crashlog -a -s -t %t.dir/multithread-test %S/Inputs/interactive_crashlog/multithread-test.ips' \ # RUN: -o 'command source -s 0 %s' 2>&1 | FileCheck %s # CHECK: "crashlog" {{.*}} commands have been installed, use the "--help" options on these commands diff --git a/lldb/test/Shell/ScriptInterpreter/Python/Crashlog/text.test b/lldb/test/Shell/ScriptInterpreter/Python/Crashlog/text.test index e9d1c5e98fb3..eec30a1da64c 100644 --- a/lldb/test/Shell/ScriptInterpreter/Python/Crashlog/text.test +++ b/lldb/test/Shell/ScriptInterpreter/Python/Crashlog/text.test @@ -1,7 +1,7 @@ # RUN: %clang_host -g %S/Inputs/test.c -o %t.out # RUN: cp %S/Inputs/a.out.crash %t.crash # RUN: %python %S/patch-crashlog.py --binary %t.out --crashlog %t.crash --offsets '{"main":20, "bar":9, "foo":16}' -# RUN: %lldb %t.out -o 'command script import lldb.macosx.crashlog' -o 'crashlog %t.crash' 2>&1 | FileCheck %s +# RUN: %lldb %t.out -o 'command script import lldb.macosx.crashlog' -o 'crashlog -b %t.crash' 2>&1 | FileCheck %s # CHECK: "crashlog" {{.*}} commands have been installed, use the "--help" options on these commands -- cgit v1.2.3 From 590066bee70db37636311881c5b232464d6d4aec Mon Sep 17 00:00:00 2001 From: Rajat Bajpai Date: Thu, 19 Jun 2025 12:18:17 +0530 Subject: [NVPTX] Add family-specific architectures support (#141899) This change adds family-specific architecture variants support added in [PTX ISA 8.8](https://docs.nvidia.com/cuda/parallel-thread-execution/#ptx-isa-version-8-8). These architecture variants have "f" suffix. For example, sm_100f. This change doesn't promote existing features to family-specific architecture. --- llvm/docs/NVPTXUsage.rst | 52 +++++++++++++++++++++- llvm/lib/Target/NVPTX/NVPTX.td | 76 ++++++++++++++++++++++++++++----- llvm/lib/Target/NVPTX/NVPTXInstrInfo.td | 8 ++-- llvm/lib/Target/NVPTX/NVPTXSubtarget.h | 42 ++++++++++++------ llvm/test/CodeGen/NVPTX/sm-version.ll | 20 +++++++++ 5 files changed, 169 insertions(+), 29 deletions(-) diff --git a/llvm/docs/NVPTXUsage.rst b/llvm/docs/NVPTXUsage.rst index abd7ca545364..11017fe4e01b 100644 --- a/llvm/docs/NVPTXUsage.rst +++ b/llvm/docs/NVPTXUsage.rst @@ -147,7 +147,57 @@ Example: 32-bit PTX for CUDA Driver API: ``nvptx-nvidia-cuda`` Example: 64-bit PTX for CUDA Driver API: ``nvptx64-nvidia-cuda`` - +.. _nvptx_arch_hierarchy: + +NVPTX Architecture Hierarchy and Ordering +========================================= + +GPU architectures: sm_2Y/sm_3Y/sm_5Y/sm_6Y/sm_7Y/sm_8Y/sm_9Y/sm_10Y/sm_12Y +('Y' represents version within the architecture) +The architectures have name of form ``sm_XYz`` where ``X`` represent the generation +number, ``Y`` represents the version within the architecture, and ``z`` represents +the optional feature suffix. +If ``X1Y1 <= X2Y2``, then GPU capabilities of ``sm_X1Y1`` are included in ``sm_X2Y2``. +For example, take ``sm_90`` (9 represents ``X``, 0 represents ``Y``, and no feature +suffix) and ``sm_103`` architectures (10 represents ``X``, 3 represents ``Y``, and no +feature suffix). Since 90 <= 103, ``sm_90`` is compatible with ``sm_103``. + +The family-specific variants have ``f`` feature suffix and they follow +following order: +``sm_X{Y2}f > sm_X{Y1}f`` iff ``Y2 > Y1`` +``sm_XY{f} > sm_{XY}{}`` + +For example, take ``sm_100f`` (10 represents ``X``, 0 represents ``Y``, and ``f`` +represents ``z``) and ``sm_103f`` (10 represents ``X``, 3 represents ``Y``, and ``f`` +represents ``z``) architecture variants. Since ``Y1 < Y2``, ``sm_100f`` is compatible with +``sm_103f``. Similarly based on the second rule, ``sm_90`` is compatible with ``sm_103f``. + +Some counter examples, take ``sm_100f`` and ``sm_120f`` (12 represents ``X``, 0 +represents ``Y``, and ``f`` represents ``z``) architecture variants. Since both +belongs to different family i.e. ``X1 != X2``, ``sm_100f`` is not compatible with +``sm_120f``. + +The architecture-specific variants have ``a`` feature suffix and they follow +following order: +``sm_XY{a} > sm_XY{f} > sm_{XY}{}`` + +For example, take ``sm_103a`` (10 represents ``X``, 3 represents ``Y``, and ``a`` +represents ``z``), ``sm_103f``, and ``sm_103`` architecture variants. The ``sm_103`` is +compatible with ``sm_103a`` and ``sm_103f``, and ``sm_103f`` is compatible with ``sm_103a``. + +Encoding := Arch * 10 + 2 (for 'f') + 1 (for 'a') +Arch := X * 10 + Y + +For example, ``sm_103f`` is encoded as 1032 (103 * 10 + 2) and ``sm_103a`` is +encoded as 1033 (103 * 10 + 2 + 1). + +This encoding allows simple partial ordering of the architectures. + +* Compare Family and Arch by dividing FullSMVersion by 100 and 10 + respectively before the comparison. +* Compare within the family by comparing FullSMVersion, given both belongs to + the same family. +* Detect ``a`` variants by checking FullSMVersion & 1. .. _nvptx_intrinsics: diff --git a/llvm/lib/Target/NVPTX/NVPTX.td b/llvm/lib/Target/NVPTX/NVPTX.td index ff9a187ecf72..83992606bc41 100644 --- a/llvm/lib/Target/NVPTX/NVPTX.td +++ b/llvm/lib/Target/NVPTX/NVPTX.td @@ -33,20 +33,69 @@ class FeaturePTX: SubtargetFeature<"ptx"# version, "PTXVersion", "" # version, "Use PTX version " # version>; - +// NVPTX Architecture Hierarchy and Ordering: +// +// GPU architectures: sm_2Y/sm_3Y/sm_5Y/sm_6Y/sm_7Y/sm_8Y/sm_9Y/sm_10Y/sm_12Y +// ('Y' represents version within the architecture) +// The architectures have name of form sm_XYz where 'X' represent the generation +// number, 'Y' represents the version within the architecture, and 'z' represents +// the optional feature suffix. +// If X1Y1 <= X2Y2, then GPU capabilities of sm_X1Y1 are included in sm_X2Y2. +// For example, take sm_90 (9 represents 'X', 0 represents 'Y', and no feature +// suffix) and sm_103 architectures (10 represents 'X', 3 represents 'Y', and no +// feature suffix). Since 90 <= 103, sm_90 is compatible with sm_103. +// +// The family-specific variants have 'f' feature suffix and they follow +// following order: +// sm_X{Y2}f > sm_X{Y1}f iff Y2 > Y1 +// sm_XY{f} > sm_{XY}{} +// +// For example, take sm_100f (10 represents 'X', 0 represents 'Y', and 'f' +// represents 'z') and sm_103f (10 represents 'X', 3 represents 'Y', and 'f' +// represents 'z') architecture variants. Since Y1 < Y2, sm_100f is compatible with +// sm_103f. Similarly based on the second rule, sm_90 is compatible with sm_103f. +// +// Some counter examples, take sm_100f and sm_120f (12 represents 'X', 0 +// represents 'Y', and 'f' represents 'z') architecture variants. Since both +// belongs to different family i.e. X1 != X2, sm_100f is not compatible with +// sm_120f. +// +// The architecture-specific variants have 'a' feature suffix and they follow +// following order: +// sm_XY{a} > sm_XY{f} > sm_{XY}{} +// +// For example, take sm_103a (10 represents 'X', 3 represents 'Y', and 'a' +// represents 'z'), sm_103f, and sm_103 architecture variants. The sm_103 is +// compatible with sm_103a and sm_103f, and sm_103f is compatible with sm_103a. +// +// Encoding := Arch * 10 + 2 (for 'f') + 1 (for 'a') +// Arch := X * 10 + Y +// +// For example, sm_103a is encoded as 1033 (103 * 10 + 2 + 1) and sm_103f is +// encoded as 1032 (103 * 10 + 2). +// +// This encoding allows simple partial ordering of the architectures. +// + Compare Family and Arch by dividing FullSMVersion by 100 and 10 +// respectively before the comparison. +// + Compare within the family by comparing FullSMVersion, given both belongs to +// the same family. +// + Detect 'a' variants by checking FullSMVersion & 1. foreach sm = [20, 21, 30, 32, 35, 37, 50, 52, 53, 60, 61, 62, 70, 72, 75, 80, 86, 87, - 89, 90, 100, 101, 103, 120, 121] in - def SM#sm: FeatureSM<""#sm, !mul(sm, 10)>; + 89, 90, 100, 101, 103, 120, 121] in { + // Base SM version (e.g. FullSMVersion for sm_100 is 1000) + def SM#sm : FeatureSM<""#sm, !mul(sm, 10)>; + + // Family-specific targets which are compatible within same family + // (e.g. FullSMVersion for sm_100f is 1002) + if !ge(sm, 100) then + def SM#sm#f : FeatureSM<""#sm#"f", !add(!mul(sm, 10), 2)>; -// Arch-specific targets. PTX for these is not compatible with any other -// architectures. -def SM90a : FeatureSM<"90a", 901>; -def SM100a: FeatureSM<"100a", 1001>; -def SM101a: FeatureSM<"101a", 1011>; -def SM103a: FeatureSM<"103a", 1031>; -def SM120a: FeatureSM<"120a", 1201>; -def SM121a: FeatureSM<"121a", 1211>; + // Architecture-specific targets which are incompatible across architectures + // (e.g. FullSMVersion for sm_100a is 1003) + if !ge(sm, 90) then + def SM#sm#a : FeatureSM<""#sm#"a", !add(!mul(sm, 10), 3)>; +} foreach version = [32, 40, 41, 42, 43, 50, 60, 61, 62, 63, 64, 65, 70, 71, 72, 73, 74, 75, 76, 77, 78, @@ -83,14 +132,19 @@ def : Proc<"sm_90", [SM90, PTX78]>; def : Proc<"sm_90a", [SM90a, PTX80]>; def : Proc<"sm_100", [SM100, PTX86]>; def : Proc<"sm_100a", [SM100a, PTX86]>; +def : Proc<"sm_100f", [SM100f, PTX88]>; def : Proc<"sm_101", [SM101, PTX86]>; def : Proc<"sm_101a", [SM101a, PTX86]>; +def : Proc<"sm_101f", [SM101f, PTX88]>; def : Proc<"sm_103", [SM103, PTX88]>; def : Proc<"sm_103a", [SM103a, PTX88]>; +def : Proc<"sm_103f", [SM103f, PTX88]>; def : Proc<"sm_120", [SM120, PTX87]>; def : Proc<"sm_120a", [SM120a, PTX87]>; +def : Proc<"sm_120f", [SM120f, PTX88]>; def : Proc<"sm_121", [SM121, PTX88]>; def : Proc<"sm_121a", [SM121a, PTX88]>; +def : Proc<"sm_121f", [SM121f, PTX88]>; def NVPTXInstrInfo : InstrInfo { } diff --git a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td index 5dbdce52f055..bbe99dec5c44 100644 --- a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td +++ b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td @@ -158,10 +158,10 @@ class hasPTX: Predicate<"Subtarget->getPTXVersion() >= " # version> class hasSM: Predicate<"Subtarget->getSmVersion() >= " # version>; // Explicit records for arch-accelerated SM versions -def hasSM90a : Predicate<"Subtarget->getFullSmVersion() == 901">; -def hasSM100a : Predicate<"Subtarget->getFullSmVersion() == 1001">; -def hasSM101a : Predicate<"Subtarget->getFullSmVersion() == 1011">; -def hasSM120a : Predicate<"Subtarget->getFullSmVersion() == 1201">; +def hasSM90a : Predicate<"Subtarget->getSmVersion() == 90 && Subtarget->hasArchAccelFeatures()">; +def hasSM100a : Predicate<"Subtarget->getSmVersion() == 100 && Subtarget->hasArchAccelFeatures()">; +def hasSM101a : Predicate<"Subtarget->getSmVersion() == 101 && Subtarget->hasArchAccelFeatures()">; +def hasSM120a : Predicate<"Subtarget->getSmVersion() == 120 && Subtarget->hasArchAccelFeatures()">; // non-sync shfl instructions are not available on sm_70+ in PTX6.4+ def hasSHFL : Predicate<"!(Subtarget->getSmVersion() >= 70" diff --git a/llvm/lib/Target/NVPTX/NVPTXSubtarget.h b/llvm/lib/Target/NVPTX/NVPTXSubtarget.h index d2eae4882682..8810feaee297 100644 --- a/llvm/lib/Target/NVPTX/NVPTXSubtarget.h +++ b/llvm/lib/Target/NVPTX/NVPTXSubtarget.h @@ -108,8 +108,8 @@ public: switch (FullSmVersion) { default: break; - case 1001: // sm_100a - case 1011: // sm_101a + case 1003: // sm_100a + case 1013: // sm_101a HasTcgen05 = true; break; } @@ -120,9 +120,15 @@ public: // TMA G2S copy with cta_group::1/2 support bool hasCpAsyncBulkTensorCTAGroupSupport() const { // TODO: Update/tidy-up after the family-conditional support arrives - return ((FullSmVersion == 1001 || FullSmVersion == 1011) && - PTXVersion >= 86) || - (FullSmVersion == 1031 && PTXVersion >= 88); + switch (FullSmVersion) { + case 1003: + case 1013: + return PTXVersion >= 86; + case 1033: + return PTXVersion >= 88; + default: + return false; + } } // Prior to CUDA 12.3 ptxas did not recognize that the trap instruction @@ -136,14 +142,24 @@ public: bool hasCvtaParam() const { return SmVersion >= 70 && PTXVersion >= 77; } unsigned int getFullSmVersion() const { return FullSmVersion; } unsigned int getSmVersion() const { return getFullSmVersion() / 10; } - // GPUs with "a" suffix have include architecture-accelerated features that - // are supported on the specified architecture only, hence such targets do not - // follow the onion layer model. hasArchAccelFeatures() allows - // distinguishing such GPU variants from the base GPU architecture. - // - 0 represents base GPU model, - // - non-zero value identifies particular architecture-accelerated variant. - bool hasArchAccelFeatures() const { return getFullSmVersion() % 10; } - + // GPUs with "a" suffix have architecture-accelerated features that are + // supported on the specified architecture only, hence such targets do not + // follow the onion layer model. hasArchAccelFeatures() allows distinguishing + // such GPU variants from the base GPU architecture. + // - false represents non-accelerated architecture. + // - true represents architecture-accelerated variant. + bool hasArchAccelFeatures() const { + return (getFullSmVersion() & 1) && PTXVersion >= 80; + } + // GPUs with 'f' suffix have architecture-accelerated features which are + // portable across all future architectures under same SM major. For example, + // sm_100f features will work for sm_10X*f*/sm_10X*a* future architectures. + // - false represents non-family-specific architecture. + // - true represents family-specific variant. + bool hasFamilySpecificFeatures() const { + return getFullSmVersion() % 10 == 2 ? PTXVersion >= 88 + : hasArchAccelFeatures(); + } // If the user did not provide a target we default to the `sm_30` target. std::string getTargetName() const { return TargetName.empty() ? "sm_30" : TargetName; diff --git a/llvm/test/CodeGen/NVPTX/sm-version.ll b/llvm/test/CodeGen/NVPTX/sm-version.ll index 9705a2f3ba73..3a154a1b9ac9 100644 --- a/llvm/test/CodeGen/NVPTX/sm-version.ll +++ b/llvm/test/CodeGen/NVPTX/sm-version.ll @@ -18,14 +18,19 @@ ; RUN: llc < %s -mtriple=nvptx -mcpu=sm_90a | FileCheck %s --check-prefix=SM90a ; RUN: llc < %s -mtriple=nvptx -mcpu=sm_100 | FileCheck %s --check-prefix=SM100 ; RUN: llc < %s -mtriple=nvptx -mcpu=sm_100a | FileCheck %s --check-prefix=SM100a +; RUN: llc < %s -mtriple=nvptx -mcpu=sm_100f | FileCheck %s --check-prefix=SM100f ; RUN: llc < %s -mtriple=nvptx -mcpu=sm_101 | FileCheck %s --check-prefix=SM101 ; RUN: llc < %s -mtriple=nvptx -mcpu=sm_101a | FileCheck %s --check-prefix=SM101a +; RUN: llc < %s -mtriple=nvptx -mcpu=sm_101f | FileCheck %s --check-prefix=SM101f ; RUN: llc < %s -mtriple=nvptx -mcpu=sm_103 | FileCheck %s --check-prefix=SM103 ; RUN: llc < %s -mtriple=nvptx -mcpu=sm_103a | FileCheck %s --check-prefix=SM103a +; RUN: llc < %s -mtriple=nvptx -mcpu=sm_103f | FileCheck %s --check-prefix=SM103f ; RUN: llc < %s -mtriple=nvptx -mcpu=sm_120 | FileCheck %s --check-prefix=SM120 ; RUN: llc < %s -mtriple=nvptx -mcpu=sm_120a | FileCheck %s --check-prefix=SM120a +; RUN: llc < %s -mtriple=nvptx -mcpu=sm_120f | FileCheck %s --check-prefix=SM120f ; RUN: llc < %s -mtriple=nvptx -mcpu=sm_121 | FileCheck %s --check-prefix=SM121 ; RUN: llc < %s -mtriple=nvptx -mcpu=sm_121a | FileCheck %s --check-prefix=SM121a +; RUN: llc < %s -mtriple=nvptx -mcpu=sm_121f | FileCheck %s --check-prefix=SM121f ; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_20 | FileCheck %s --check-prefix=SM20 ; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_21 | FileCheck %s --check-prefix=SM21 @@ -47,14 +52,19 @@ ; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_90a | FileCheck %s --check-prefix=SM90a ; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_100 | FileCheck %s --check-prefix=SM100 ; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_100a | FileCheck %s --check-prefix=SM100a +; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_100f | FileCheck %s --check-prefix=SM100f ; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_101 | FileCheck %s --check-prefix=SM101 ; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_101a | FileCheck %s --check-prefix=SM101a +; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_101f | FileCheck %s --check-prefix=SM101f ; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_103 | FileCheck %s --check-prefix=SM103 ; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_103a | FileCheck %s --check-prefix=SM103a +; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_103f | FileCheck %s --check-prefix=SM103f ; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_120 | FileCheck %s --check-prefix=SM120 ; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_120a | FileCheck %s --check-prefix=SM120a +; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_120f | FileCheck %s --check-prefix=SM120f ; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_121 | FileCheck %s --check-prefix=SM121 ; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_121a | FileCheck %s --check-prefix=SM121a +; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_121f | FileCheck %s --check-prefix=SM121f ; SM20: .version 3.2 ; SM21: .version 3.2 @@ -76,14 +86,19 @@ ; SM90a: .version 8.0 ; SM100: .version 8.6 ; SM100a: .version 8.6 +; SM100f: .version 8.8 ; SM101: .version 8.6 ; SM101a: .version 8.6 +; SM101f: .version 8.8 ; SM103: .version 8.8 ; SM103a: .version 8.8 +; SM103f: .version 8.8 ; SM120: .version 8.7 ; SM120a: .version 8.7 +; SM120f: .version 8.8 ; SM121: .version 8.8 ; SM121a: .version 8.8 +; SM121f: .version 8.8 ; SM20: .target sm_20 ; SM21: .target sm_21 @@ -105,11 +120,16 @@ ; SM90a: .target sm_90a ; SM100: .target sm_100 ; SM100a: .target sm_100a +; SM100f: .target sm_100f ; SM101: .target sm_101 ; SM101a: .target sm_101a +; SM101f: .target sm_101f ; SM103: .target sm_103 ; SM103a: .target sm_103a +; SM103f: .target sm_103f ; SM120: .target sm_120 ; SM120a: .target sm_120a +; SM120f: .target sm_120f ; SM121: .target sm_121 ; SM121a: .target sm_121a +; SM121f: .target sm_121f -- cgit v1.2.3 From 03461c9c6e21e43a6e1c699bfb254ddb3d575c93 Mon Sep 17 00:00:00 2001 From: Hsiangkai Wang Date: Thu, 19 Jun 2025 07:56:30 +0100 Subject: [mlir][gpu][spirv] Remove rotation semantics of gpu.shuffle up/down (#139105) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit From the description of gpu.shuffle operation, shuffle up/down rotates values in the subgroup because it applies modulo on the shifted value to calculate the result lane ID. It is inconsistent with the definition of SPIR-V shuffle up/down and NVVM data movement definitions within subgroup. In NVVM, it says "If the computed source lane index j is in range, the returned i32 value will be the value of %a from lane j; otherwise, it will be the the value of %a from the current thread." It will keep the original value if the result land ID is out of range. In SPIR-V OpGroupNonUniformShuffleUp and OpGroupNonUniformShuffleDown, it says "The resulting value is undefined if Delta is greater than the current invocation’s id within the scope or if the identified invocation is not in scope restricted tangle." It's an undefined value if the result land ID is out of range. Anyway, there is no circular movement in shuffle up/down from these 2 specifications. This patch removes the circular movement in gpu.shuffle up/down and lower gpu.shuffle up/down to SPIR-V OpGroupNonUniformShuffleUp and OpGroupNonUniformShuffleDown directly. Reference: https://docs.nvidia.com/cuda/archive/12.2.1/nvvm-ir-spec/index.html#data-movement https://registry.khronos.org/SPIR-V/specs/unified1/SPIRV.html#OpGroupNonUniformShuffleUp https://registry.khronos.org/SPIR-V/specs/unified1/SPIRV.html#OpGroupNonUniformShuffleDown --- mlir/include/mlir/Dialect/GPU/IR/GPUOps.td | 6 ++- mlir/lib/Conversion/GPUToSPIRV/GPUToSPIRV.cpp | 45 ++++++++++++++--- mlir/test/Conversion/GPUToSPIRV/shuffle.mlir | 71 ++++++++++++++++++++++++++- 3 files changed, 111 insertions(+), 11 deletions(-) diff --git a/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td b/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td index 15b14c767b66..a81b2e83ddef 100644 --- a/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td +++ b/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td @@ -1332,7 +1332,8 @@ def GPU_ShuffleOp : GPU_Op< %3, %4 = gpu.shuffle down %0, %cst1, %width : f32 ``` - For lane `k`, returns the value from lane `(k + 1) % width`. + For lane `k`, returns the value from lane `(k + cst1)`. If `(k + cst1)` is + bigger than or equal to `width`, the value is poison and `valid` is `false`. `up` example: @@ -1341,7 +1342,8 @@ def GPU_ShuffleOp : GPU_Op< %5, %6 = gpu.shuffle up %0, %cst1, %width : f32 ``` - For lane `k`, returns the value from lane `(k - 1) % width`. + For lane `k`, returns the value from lane `(k - cst1)`. If `(k - cst1)` is + smaller than `0`, the value is poison and `valid` is `false`. `idx` example: diff --git a/mlir/lib/Conversion/GPUToSPIRV/GPUToSPIRV.cpp b/mlir/lib/Conversion/GPUToSPIRV/GPUToSPIRV.cpp index 46db5d3fdca3..93c76d267c51 100644 --- a/mlir/lib/Conversion/GPUToSPIRV/GPUToSPIRV.cpp +++ b/mlir/lib/Conversion/GPUToSPIRV/GPUToSPIRV.cpp @@ -435,26 +435,57 @@ LogicalResult GPUShuffleConversion::matchAndRewrite( return rewriter.notifyMatchFailure( shuffleOp, "shuffle width and target subgroup size mismatch"); + assert(!adaptor.getOffset().getType().isSignedInteger() && + "shuffle offset must be a signless/unsigned integer"); + Location loc = shuffleOp.getLoc(); - Value trueVal = spirv::ConstantOp::getOne(rewriter.getI1Type(), - shuffleOp.getLoc(), rewriter); auto scope = rewriter.getAttr(spirv::Scope::Subgroup); Value result; + Value validVal; switch (shuffleOp.getMode()) { - case gpu::ShuffleMode::XOR: + case gpu::ShuffleMode::XOR: { result = rewriter.create( loc, scope, adaptor.getValue(), adaptor.getOffset()); + validVal = spirv::ConstantOp::getOne(rewriter.getI1Type(), + shuffleOp.getLoc(), rewriter); break; - case gpu::ShuffleMode::IDX: + } + case gpu::ShuffleMode::IDX: { result = rewriter.create( loc, scope, adaptor.getValue(), adaptor.getOffset()); + validVal = spirv::ConstantOp::getOne(rewriter.getI1Type(), + shuffleOp.getLoc(), rewriter); + break; + } + case gpu::ShuffleMode::DOWN: { + result = rewriter.create( + loc, scope, adaptor.getValue(), adaptor.getOffset()); + + Value laneId = rewriter.create(loc, widthAttr); + Value resultLaneId = + rewriter.create(loc, laneId, adaptor.getOffset()); + validVal = rewriter.create(loc, arith::CmpIPredicate::ult, + resultLaneId, adaptor.getWidth()); break; - default: - return rewriter.notifyMatchFailure(shuffleOp, "unimplemented shuffle mode"); + } + case gpu::ShuffleMode::UP: { + result = rewriter.create( + loc, scope, adaptor.getValue(), adaptor.getOffset()); + + Value laneId = rewriter.create(loc, widthAttr); + Value resultLaneId = + rewriter.create(loc, laneId, adaptor.getOffset()); + auto i32Type = rewriter.getIntegerType(32); + validVal = rewriter.create( + loc, arith::CmpIPredicate::sge, resultLaneId, + rewriter.create( + loc, i32Type, rewriter.getIntegerAttr(i32Type, 0))); + break; + } } - rewriter.replaceOp(shuffleOp, {result, trueVal}); + rewriter.replaceOp(shuffleOp, {result, validVal}); return success(); } diff --git a/mlir/test/Conversion/GPUToSPIRV/shuffle.mlir b/mlir/test/Conversion/GPUToSPIRV/shuffle.mlir index d3d8ec0dab40..e93f69704f25 100644 --- a/mlir/test/Conversion/GPUToSPIRV/shuffle.mlir +++ b/mlir/test/Conversion/GPUToSPIRV/shuffle.mlir @@ -15,8 +15,8 @@ gpu.module @kernels { // CHECK: %[[MASK:.+]] = spirv.Constant 8 : i32 // CHECK: %[[VAL:.+]] = spirv.Constant 4.200000e+01 : f32 - // CHECK: %{{.+}} = spirv.Constant true // CHECK: %{{.+}} = spirv.GroupNonUniformShuffleXor %[[VAL]], %[[MASK]] : f32, i32 + // CHECK: %{{.+}} = spirv.Constant true %result, %valid = gpu.shuffle xor %val, %mask, %width : f32 gpu.return } @@ -64,11 +64,78 @@ gpu.module @kernels { // CHECK: %[[MASK:.+]] = spirv.Constant 8 : i32 // CHECK: %[[VAL:.+]] = spirv.Constant 4.200000e+01 : f32 - // CHECK: %{{.+}} = spirv.Constant true // CHECK: %{{.+}} = spirv.GroupNonUniformShuffle %[[VAL]], %[[MASK]] : f32, i32 + // CHECK: %{{.+}} = spirv.Constant true %result, %valid = gpu.shuffle idx %val, %mask, %width : f32 gpu.return } } } + +// ----- + +module attributes { + gpu.container_module, + spirv.target_env = #spirv.target_env<#spirv.vce, + #spirv.resource_limits> +} { + +gpu.module @kernels { + // CHECK-LABEL: spirv.func @shuffle_down() + gpu.func @shuffle_down() kernel + attributes {spirv.entry_point_abi = #spirv.entry_point_abi} { + %offset = arith.constant 4 : i32 + %width = arith.constant 16 : i32 + %val = arith.constant 42.0 : f32 + + // CHECK: %[[OFFSET:.+]] = spirv.Constant 4 : i32 + // CHECK: %[[WIDTH:.+]] = spirv.Constant 16 : i32 + // CHECK: %[[VAL:.+]] = spirv.Constant 4.200000e+01 : f32 + // CHECK: %{{.+}} = spirv.GroupNonUniformShuffleDown %[[VAL]], %[[OFFSET]] : f32, i32 + + // CHECK: %[[INVOCATION_ID_ADDR:.+]] = spirv.mlir.addressof @__builtin__SubgroupLocalInvocationId__ : !spirv.ptr + // CHECK: %[[LANE_ID:.+]] = spirv.Load "Input" %[[INVOCATION_ID_ADDR]] : i32 + // CHECK: %[[VAL_LANE_ID:.+]] = spirv.IAdd %[[LANE_ID]], %[[OFFSET]] : i32 + // CHECK: %[[VALID:.+]] = spirv.ULessThan %[[VAL_LANE_ID]], %[[WIDTH]] : i32 + + %result, %valid = gpu.shuffle down %val, %offset, %width : f32 + gpu.return + } +} + +} + +// ----- + +module attributes { + gpu.container_module, + spirv.target_env = #spirv.target_env<#spirv.vce, + #spirv.resource_limits> +} { + +gpu.module @kernels { + // CHECK-LABEL: spirv.func @shuffle_up() + gpu.func @shuffle_up() kernel + attributes {spirv.entry_point_abi = #spirv.entry_point_abi} { + %offset = arith.constant 4 : i32 + %width = arith.constant 16 : i32 + %val = arith.constant 42.0 : f32 + + // CHECK: %[[OFFSET:.+]] = spirv.Constant 4 : i32 + // CHECK: %[[WIDTH:.+]] = spirv.Constant 16 : i32 + // CHECK: %[[VAL:.+]] = spirv.Constant 4.200000e+01 : f32 + // CHECK: %{{.+}} = spirv.GroupNonUniformShuffleUp %[[VAL]], %[[OFFSET]] : f32, i32 + + // CHECK: %[[INVOCATION_ID_ADDR:.+]] = spirv.mlir.addressof @__builtin__SubgroupLocalInvocationId__ : !spirv.ptr + // CHECK: %[[LANE_ID:.+]] = spirv.Load "Input" %[[INVOCATION_ID_ADDR]] : i32 + // CHECK: %[[VAL_LANE_ID:.+]] = spirv.ISub %[[LANE_ID]], %[[OFFSET]] : i32 + // CHECK: %[[CST0:.+]] = spirv.Constant 0 : i32 + // CHECK: %[[VALID:.+]] = spirv.SGreaterThanEqual %[[VAL_LANE_ID]], %[[CST0]] : i32 + + %result, %valid = gpu.shuffle up %val, %offset, %width : f32 + gpu.return + } +} + +} -- cgit v1.2.3 From 2c2ad9a096e78e9129f8cb2d4ee260eb7e67473f Mon Sep 17 00:00:00 2001 From: Younan Zhang Date: Thu, 19 Jun 2025 14:59:32 +0800 Subject: Reapply "[Clang] Profile singly-resolved UnresolvedLookupExpr with the declaration" (#140680) For a dependent variable template specialization, we don't build a dependent Decl node or a DeclRefExpr to represent it. Instead, we preserve the UnresolvedLookupExpr until instantiation. However, this approach isn't ideal for constraint normalization. We consider the qualifier during profiling, but since that's based on the written code, it can introduce confusing differences, even when the expressions resolve to the same declaration. This change profiles the underlying VarTemplateDecl if UnresolvedLookupExpr is used to model a dependent use of it. Fixes https://github.com/llvm/llvm-project/issues/139476 --- clang/docs/ReleaseNotes.rst | 1 + clang/lib/AST/StmtProfile.cpp | 10 ++++++++-- clang/test/SemaCXX/exception-spec.cpp | 21 +++++++++++++++++++++ .../test/SemaTemplate/concepts-out-of-line-def.cpp | 15 +++++++++++++++ 4 files changed, 45 insertions(+), 2 deletions(-) diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index 18234188101f..dd748ab06873 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -852,6 +852,7 @@ Bug Fixes to C++ Support - Fixed the handling of pack indexing types in the constraints of a member function redeclaration. (#GH138255) - Clang now correctly parses arbitrary order of ``[[]]``, ``__attribute__`` and ``alignas`` attributes for declarations (#GH133107) - Fixed a crash when forming an invalid function type in a dependent context. (#GH138657) (#GH115725) (#GH68852) +- Fixed a function declaration mismatch that caused inconsistencies between concepts and variable template declarations. (#GH139476) - Clang no longer segfaults when there is a configuration mismatch between modules and their users (http://crbug.com/400353616). - Fix an incorrect deduction when calling an explicit object member function template through an overload set address. - Fixed bug in constant evaluation that would allow using the value of a diff --git a/clang/lib/AST/StmtProfile.cpp b/clang/lib/AST/StmtProfile.cpp index c666d966a6e5..c61450e19f1b 100644 --- a/clang/lib/AST/StmtProfile.cpp +++ b/clang/lib/AST/StmtProfile.cpp @@ -2189,8 +2189,14 @@ StmtProfiler::VisitCXXPseudoDestructorExpr(const CXXPseudoDestructorExpr *S) { void StmtProfiler::VisitOverloadExpr(const OverloadExpr *S) { VisitExpr(S); - VisitNestedNameSpecifier(S->getQualifier()); - VisitName(S->getName(), /*TreatAsDecl*/ true); + bool DescribingDependentVarTemplate = + S->getNumDecls() == 1 && isa(*S->decls_begin()); + if (DescribingDependentVarTemplate) { + VisitDecl(*S->decls_begin()); + } else { + VisitNestedNameSpecifier(S->getQualifier()); + VisitName(S->getName(), /*TreatAsDecl*/ true); + } ID.AddBoolean(S->hasExplicitTemplateArgs()); if (S->hasExplicitTemplateArgs()) VisitTemplateArguments(S->getTemplateArgs(), S->getNumTemplateArgs()); diff --git a/clang/test/SemaCXX/exception-spec.cpp b/clang/test/SemaCXX/exception-spec.cpp index 6ad19aab397b..31c691b28da4 100644 --- a/clang/test/SemaCXX/exception-spec.cpp +++ b/clang/test/SemaCXX/exception-spec.cpp @@ -52,3 +52,24 @@ namespace AssignmentOp { D2 &operator=(const D2&); // expected-error {{more lax}} }; } + +namespace OverloadedFunctions { + +template +void f(T&) noexcept; + +template +void f(T (&arr)[N]) noexcept(noexcept(f(*arr))); + +template +inline void f(T&) noexcept {} + +template +inline void f(T (&arr)[N]) noexcept(noexcept(f(*arr))) {} + +void g() { + int x[1]; + f(x); +} + +} diff --git a/clang/test/SemaTemplate/concepts-out-of-line-def.cpp b/clang/test/SemaTemplate/concepts-out-of-line-def.cpp index e5d00491d3fb..bf505dec0ca1 100644 --- a/clang/test/SemaTemplate/concepts-out-of-line-def.cpp +++ b/clang/test/SemaTemplate/concepts-out-of-line-def.cpp @@ -853,3 +853,18 @@ template requires C auto TplClass::buggy() -> void {} } + +namespace GH139476 { + +namespace moo { + template + constexpr bool baa = true; + + template requires baa + void caw(); +} + +template requires moo::baa +void moo::caw() {} + +} -- cgit v1.2.3 From 9ee55e717308757b580dff182fc23b40d1c18a56 Mon Sep 17 00:00:00 2001 From: Amr Hesham Date: Thu, 19 Jun 2025 09:01:51 +0200 Subject: [CIR] Implement folder for VecSplatOp (#143771) This change adds a folder for the VecSplatOp Issue https://github.com/llvm/llvm-project/issues/136487 --- clang/lib/CIR/Dialect/Transforms/CIRSimplify.cpp | 30 ++++++++++++++++++++-- .../lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp | 4 +-- clang/test/CIR/Transforms/vector-splat.cir | 16 ++++++++++++ 3 files changed, 45 insertions(+), 5 deletions(-) create mode 100644 clang/test/CIR/Transforms/vector-splat.cir diff --git a/clang/lib/CIR/Dialect/Transforms/CIRSimplify.cpp b/clang/lib/CIR/Dialect/Transforms/CIRSimplify.cpp index 67ed4124f26c..3b7f08c44140 100644 --- a/clang/lib/CIR/Dialect/Transforms/CIRSimplify.cpp +++ b/clang/lib/CIR/Dialect/Transforms/CIRSimplify.cpp @@ -260,6 +260,31 @@ struct SimplifySwitch : public OpRewritePattern { } }; +struct SimplifyVecSplat : public OpRewritePattern { + using OpRewritePattern::OpRewritePattern; + LogicalResult matchAndRewrite(VecSplatOp op, + PatternRewriter &rewriter) const override { + mlir::Value splatValue = op.getValue(); + auto constant = + mlir::dyn_cast_if_present(splatValue.getDefiningOp()); + if (!constant) + return mlir::failure(); + + auto value = constant.getValue(); + if (!mlir::isa_and_nonnull(value) && + !mlir::isa_and_nonnull(value)) + return mlir::failure(); + + cir::VectorType resultType = op.getResult().getType(); + SmallVector elements(resultType.getSize(), value); + auto constVecAttr = cir::ConstVectorAttr::get( + resultType, mlir::ArrayAttr::get(getContext(), elements)); + + rewriter.replaceOpWithNewOp(op, constVecAttr); + return mlir::success(); + } +}; + //===----------------------------------------------------------------------===// // CIRSimplifyPass //===----------------------------------------------------------------------===// @@ -275,7 +300,8 @@ void populateMergeCleanupPatterns(RewritePatternSet &patterns) { patterns.add< SimplifyTernary, SimplifySelect, - SimplifySwitch + SimplifySwitch, + SimplifyVecSplat >(patterns.getContext()); // clang-format on } @@ -288,7 +314,7 @@ void CIRSimplifyPass::runOnOperation() { // Collect operations to apply patterns. llvm::SmallVector ops; getOperation()->walk([&](Operation *op) { - if (isa(op)) + if (isa(op)) ops.push_back(op); }); diff --git a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp index b73cb839828e..5f41e340e247 100644 --- a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp +++ b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp @@ -979,9 +979,7 @@ mlir::LogicalResult CIRToLLVMConstantOpLowering::matchAndRewrite( } attr = rewriter.getArrayAttr(components); - } - - else { + } else { return op.emitError() << "unsupported constant type " << op.getType(); } diff --git a/clang/test/CIR/Transforms/vector-splat.cir b/clang/test/CIR/Transforms/vector-splat.cir new file mode 100644 index 000000000000..e2274b8627b1 --- /dev/null +++ b/clang/test/CIR/Transforms/vector-splat.cir @@ -0,0 +1,16 @@ +// RUN: cir-opt %s -cir-simplify -o - | FileCheck %s + +!s32i = !cir.int + +module { + cir.func @fold_shuffle_vector_op_test() -> !cir.vector<4 x !s32i> { + %v = cir.const #cir.int<3> : !s32i + %vec = cir.vec.splat %v : !s32i, !cir.vector<4 x !s32i> + cir.return %vec : !cir.vector<4 x !s32i> + } + + // CHECK: cir.func @fold_shuffle_vector_op_test() -> !cir.vector<4 x !s32i> { + // CHECK-NEXT: %0 = cir.const #cir.const_vector<[#cir.int<3> : !s32i, #cir.int<3> : !s32i, + // CHECK-SAME: #cir.int<3> : !s32i, #cir.int<3> : !s32i]> : !cir.vector<4 x !s32i> + // CHECK-NEXT: cir.return %0 : !cir.vector<4 x !s32i> +} -- cgit v1.2.3 From 408e55098d7d8f7064d7a288b5e3fe6fdbbc2ad4 Mon Sep 17 00:00:00 2001 From: quic_hchandel Date: Thu, 19 Jun 2025 12:36:20 +0530 Subject: [RISCV] Add support for handling one tied operand in the source instruction for compress patterns (#143660) This update enables compress patterns to handle one tied operand in source instructions, which was previously unsupported. Qualcomm's uC extension Xqci includes several instructions with tied operands that can be compressed into smaller forms. This change adds the necessary support to enable such compression. Additionally, a compress pattern for the qc.muliadd instruction has been implemented. --- llvm/lib/Target/RISCV/RISCVInstrInfoXqci.td | 5 +++ llvm/test/MC/RISCV/xqciac-valid.s | 21 ++++++++--- llvm/utils/TableGen/CompressInstEmitter.cpp | 58 +++++++++++++++++++++-------- 3 files changed, 63 insertions(+), 21 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoXqci.td b/llvm/lib/Target/RISCV/RISCVInstrInfoXqci.td index 09852c6fd596..2856df47f704 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoXqci.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoXqci.td @@ -1570,6 +1570,11 @@ def : CompressPat<(QC_E_ADDI X2, X2, simm10_lsb0000nonzero:$imm), (C_ADDI16SP X2, simm10_lsb0000nonzero:$imm)>; } // let isCompressOnly = true, Predicates = [HasVendorXqcilia, IsRV32] +let Predicates = [HasVendorXqciac, IsRV32] in { +def : CompressPat<(QC_MULIADD GPRC:$rd, GPRC:$rs1, uimm5:$imm5), + (QC_C_MULIADD GPRC:$rd, GPRC:$rs1, uimm5:$imm5)>; +} + let isCompressOnly = true, Predicates = [HasVendorXqcibi, IsRV32] in { def : CompressPat<(QC_E_BEQI GPRNoX0:$rs1, simm5nonzero:$imm5, bare_simm13_lsb0:$imm12), (QC_BEQI GPRNoX0:$rs1, simm5nonzero:$imm5, bare_simm13_lsb0:$imm12)>; diff --git a/llvm/test/MC/RISCV/xqciac-valid.s b/llvm/test/MC/RISCV/xqciac-valid.s index 438c4cafe0df..1afebc75cb45 100644 --- a/llvm/test/MC/RISCV/xqciac-valid.s +++ b/llvm/test/MC/RISCV/xqciac-valid.s @@ -1,24 +1,27 @@ # Xqciac - Qualcomm uC Load-Store Address Calculation Extension # RUN: llvm-mc %s -triple=riscv32 -mattr=+experimental-xqciac -M no-aliases -show-encoding \ -# RUN: | FileCheck -check-prefixes=CHECK-ENC,CHECK-INST %s +# RUN: | FileCheck -check-prefixes=CHECK-ENC,CHECK-INST,CHECK-NOALIAS %s # RUN: llvm-mc -filetype=obj -triple riscv32 -mattr=+experimental-xqciac < %s \ # RUN: | llvm-objdump --mattr=+experimental-xqciac -M no-aliases --no-print-imm-hex -d - \ # RUN: | FileCheck -check-prefix=CHECK-INST %s # RUN: llvm-mc %s -triple=riscv32 -mattr=+experimental-xqciac -show-encoding \ -# RUN: | FileCheck -check-prefixes=CHECK-ENC,CHECK-INST %s +# RUN: | FileCheck -check-prefixes=CHECK-ENC,CHECK-INST,CHECK-ALIAS %s # RUN: llvm-mc -filetype=obj -triple riscv32 -mattr=+experimental-xqciac < %s \ # RUN: | llvm-objdump --mattr=+experimental-xqciac --no-print-imm-hex -d - \ # RUN: | FileCheck -check-prefix=CHECK-INST %s -# CHECK-INST: qc.c.muliadd a0, a1, 0 +# CHECK-NOALIAS: qc.c.muliadd a0, a1, 0 +# CHECK-ALIAS: qc.muliadd a0, a1, 0 # CHECK-ENC: encoding: [0x8a,0x21] qc.c.muliadd x10, x11, 0 -# CHECK-INST: qc.c.muliadd a0, a1, 31 +# CHECK-NOALIAS: qc.c.muliadd a0, a1, 31 +# CHECK-ALIAS: qc.muliadd a0, a1, 31 # CHECK-ENC: encoding: [0xea,0x3d] qc.c.muliadd x10, x11, 31 -# CHECK-INST: qc.c.muliadd a0, a1, 16 +# CHECK-NOALIAS: qc.c.muliadd a0, a1, 16 +# CHECK-ALIAS: qc.muliadd a0, a1, 16 # CHECK-ENC: encoding: [0xaa,0x21] qc.c.muliadd x10, x11, 16 @@ -47,3 +50,11 @@ qc.shladd x10, x11, x12, 4 # CHECK-INST: qc.shladd a0, a1, a2, 31 # CHECK-ENC: encoding: [0x0b,0xb5,0xc5,0x7e] qc.shladd x10, x11, x12, 31 + +# Check that compress pattern for qc.muliadd works + +# CHECK-NOALIAS: qc.c.muliadd a0, a1, 16 +# CHECK-ALIAS: qc.muliadd a0, a1, 16 +# CHECK-ENC: encoding: [0xaa,0x21] +qc.muliadd x10, x11, 16 + diff --git a/llvm/utils/TableGen/CompressInstEmitter.cpp b/llvm/utils/TableGen/CompressInstEmitter.cpp index 4a0b6d79c53d..2dfeea36e213 100644 --- a/llvm/utils/TableGen/CompressInstEmitter.cpp +++ b/llvm/utils/TableGen/CompressInstEmitter.cpp @@ -75,6 +75,7 @@ #include "llvm/TableGen/Error.h" #include "llvm/TableGen/Record.h" #include "llvm/TableGen/TableGenBackend.h" +#include #include #include using namespace llvm; @@ -123,10 +124,10 @@ class CompressInstEmitter { const RecordKeeper &Records; const CodeGenTarget Target; std::vector CompressPatterns; - void addDagOperandMapping(const Record *Rec, const DagInit *Dag, const CodeGenInstruction &Inst, - IndexedMap &OperandMap, bool IsSourceInst); + IndexedMap &OperandMap, bool IsSourceInst, + unsigned *SourceLastTiedOpPtr); void evaluateCompressPat(const Record *Compress); void emitCompressInstEmitter(raw_ostream &OS, EmitterType EType); bool validateTypes(const Record *DagOpType, const Record *InstOpType, @@ -143,7 +144,8 @@ class CompressInstEmitter { IndexedMap &SourceOperandMap, IndexedMap &DestOperandMap, StringMap &SourceOperands, - const CodeGenInstruction &DestInst); + const CodeGenInstruction &DestInst, + unsigned SourceLastTiedOp); public: CompressInstEmitter(const RecordKeeper &R) : Records(R), Target(R) {} @@ -206,7 +208,8 @@ void CompressInstEmitter::addDagOperandMapping(const Record *Rec, const DagInit *Dag, const CodeGenInstruction &Inst, IndexedMap &OperandMap, - bool IsSourceInst) { + bool IsSourceInst, + unsigned *SourceLastTiedOpPtr) { unsigned NumMIOperands = 0; for (const auto &Op : Inst.Operands) NumMIOperands += Op.MINumOperands; @@ -219,12 +222,16 @@ void CompressInstEmitter::addDagOperandMapping(const Record *Rec, // are represented. unsigned TiedCount = 0; unsigned OpNo = 0; + if (IsSourceInst) + *SourceLastTiedOpPtr = std::numeric_limits::max(); for (const auto &Opnd : Inst.Operands) { int TiedOpIdx = Opnd.getTiedRegister(); if (-1 != TiedOpIdx) { // Set the entry in OperandMap for the tied operand we're skipping. OperandMap[OpNo].Kind = OperandMap[TiedOpIdx].Kind; OperandMap[OpNo].Data = OperandMap[TiedOpIdx].Data; + if (IsSourceInst) + *SourceLastTiedOpPtr = OpNo; ++OpNo; ++TiedCount; continue; @@ -289,15 +296,23 @@ void CompressInstEmitter::addDagOperandMapping(const Record *Rec, static bool verifyDagOpCount(const CodeGenInstruction &Inst, const DagInit *Dag, bool IsSource) { unsigned NumMIOperands = 0; - for (const auto &Op : Inst.Operands) + + // Use this to count number of tied Operands in Source Inst in this function. + // This counter is required here to error out when there is a Source + // Inst with two or more tied operands. + unsigned SourceInstTiedOpCount = 0; + for (const auto &Op : Inst.Operands) { NumMIOperands += Op.MINumOperands; + if (Op.getTiedRegister() != -1) + SourceInstTiedOpCount++; + } if (Dag->getNumArgs() == NumMIOperands) return true; - // Source instructions are non compressed instructions and don't have tied - // operands. - if (IsSource) + // Source instructions are non compressed instructions and have at most one + // tied operand. + if (IsSource && (SourceInstTiedOpCount >= 2)) PrintFatalError(Inst.TheDef->getLoc(), "Input operands for Inst '" + Inst.TheDef->getName() + "' and input Dag operand count mismatch"); @@ -381,7 +396,8 @@ void CompressInstEmitter::createDagOperandMapping( void CompressInstEmitter::createInstOperandMapping( const Record *Rec, const DagInit *SourceDag, const DagInit *DestDag, IndexedMap &SourceOperandMap, IndexedMap &DestOperandMap, - StringMap &SourceOperands, const CodeGenInstruction &DestInst) { + StringMap &SourceOperands, const CodeGenInstruction &DestInst, + unsigned SourceLastTiedOp) { // TiedCount keeps track of the number of operands skipped in Inst // operands list to get to the corresponding Dag operand. unsigned TiedCount = 0; @@ -422,10 +438,18 @@ void CompressInstEmitter::createInstOperandMapping( assert(DestDag->getArgNameStr(DagArgIdx) == SourceDag->getArgNameStr(SourceOp->getValue()) && "Incorrect operand mapping detected!\n"); - DestOperandMap[OpNo].Data.Operand = SourceOp->getValue(); - SourceOperandMap[SourceOp->getValue()].Data.Operand = OpNo; - LLVM_DEBUG(dbgs() << " " << SourceOp->getValue() << " ====> " << OpNo - << "\n"); + + // Following four lines ensure the correct handling of a single tied + // operand in the Source Inst. SourceDagOp points to the position of + // appropriate Dag argument which is not correct in presence of tied + // operand in the Source Inst and must be incremented by 1 to reflect + // correct position of the operand in Source Inst + unsigned SourceDagOp = SourceOp->getValue(); + if (SourceDagOp >= SourceLastTiedOp) + SourceDagOp++; + DestOperandMap[OpNo].Data.Operand = SourceDagOp; + SourceOperandMap[SourceDagOp].Data.Operand = OpNo; + LLVM_DEBUG(dbgs() << " " << SourceDagOp << " ====> " << OpNo << "\n"); } } } @@ -484,15 +508,16 @@ void CompressInstEmitter::evaluateCompressPat(const Record *Rec) { // Fill the mapping from the source to destination instructions. IndexedMap SourceOperandMap; + unsigned SourceLastTiedOp; // postion of the last tied operand in Source Inst // Create a mapping between source Dag operands and source Inst operands. addDagOperandMapping(Rec, SourceDag, SourceInst, SourceOperandMap, - /*IsSourceInst*/ true); + /*IsSourceInst*/ true, &SourceLastTiedOp); IndexedMap DestOperandMap; // Create a mapping between destination Dag operands and destination Inst // operands. addDagOperandMapping(Rec, DestDag, DestInst, DestOperandMap, - /*IsSourceInst*/ false); + /*IsSourceInst*/ false, nullptr); StringMap SourceOperands; StringMap DestOperands; @@ -500,7 +525,8 @@ void CompressInstEmitter::evaluateCompressPat(const Record *Rec) { SourceOperandMap); // Create operand mapping between the source and destination instructions. createInstOperandMapping(Rec, SourceDag, DestDag, SourceOperandMap, - DestOperandMap, SourceOperands, DestInst); + DestOperandMap, SourceOperands, DestInst, + SourceLastTiedOp); // Get the target features for the CompressPat. std::vector PatReqFeatures; -- cgit v1.2.3 From 3e795c60c73e990fbbf254715cb47855c32bcfae Mon Sep 17 00:00:00 2001 From: Dmitry Vasilyev Date: Thu, 19 Jun 2025 11:12:34 +0400 Subject: [lldb] Disable TestTargetWatchAddress on Windows x86_64 (#144779) See #144777 for details. --- .../watchpoint/watchlocation/TestTargetWatchAddress.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/lldb/test/API/python_api/watchpoint/watchlocation/TestTargetWatchAddress.py b/lldb/test/API/python_api/watchpoint/watchlocation/TestTargetWatchAddress.py index 37fa911b3714..f1c7a60300df 100644 --- a/lldb/test/API/python_api/watchpoint/watchlocation/TestTargetWatchAddress.py +++ b/lldb/test/API/python_api/watchpoint/watchlocation/TestTargetWatchAddress.py @@ -21,6 +21,11 @@ class TargetWatchpointCreateByAddressPITestCase(TestBase): # This is for verifying that watch location works. self.violating_func = "do_bad_thing_with_location" + @skipIf( + oslist=["windows"], + archs=["x86_64"], + bugnumber="github.com/llvm/llvm-project/issues/144777", + ) def test_watch_create_by_address(self): """Exercise SBTarget.WatchpointCreateByAddress() API to set a watchpoint.""" self.build() @@ -88,6 +93,11 @@ class TargetWatchpointCreateByAddressPITestCase(TestBase): # This finishes our test. + @skipIf( + oslist=["windows"], + archs=["x86_64"], + bugnumber="github.com/llvm/llvm-project/issues/144777", + ) def test_watch_address(self): """Exercise SBTarget.WatchAddress() API to set a watchpoint. Same as test_watch_create_by_address, but uses the simpler API. -- cgit v1.2.3 From a9a71b6d311892d6add6aab3790b20fe945cca38 Mon Sep 17 00:00:00 2001 From: "S. B. Tam" Date: Thu, 19 Jun 2025 15:21:28 +0800 Subject: [libc++][test] Don't pass ill-formed UTF-8 to MAKE_STRING_VIEW (#136403) --- .../escaped_output.unicode.pass.cpp | 2 +- .../format/format.functions/fill.unicode.pass.cpp | 36 ++++++++++++++-------- 2 files changed, 24 insertions(+), 14 deletions(-) diff --git a/libcxx/test/std/utilities/format/format.functions/escaped_output.unicode.pass.cpp b/libcxx/test/std/utilities/format/format.functions/escaped_output.unicode.pass.cpp index c4adf601c40a..eb27c7095466 100644 --- a/libcxx/test/std/utilities/format/format.functions/escaped_output.unicode.pass.cpp +++ b/libcxx/test/std/utilities/format/format.functions/escaped_output.unicode.pass.cpp @@ -337,7 +337,7 @@ void test_string() { // Ill-formed if constexpr (sizeof(CharT) == 1) - test_format(SV(R"("\x{80}")"), SV("{:?}"), SV("\x80")); + test_format(SV(R"("\x{80}")"), SV("{:?}"), "\x80"); // *** P2713R1 examples *** test_format(SV(R"(["\u{301}"])"), SV("[{:?}]"), SV("\u0301")); diff --git a/libcxx/test/std/utilities/format/format.functions/fill.unicode.pass.cpp b/libcxx/test/std/utilities/format/format.functions/fill.unicode.pass.cpp index cd555e1ab9ce..76f756ae9148 100644 --- a/libcxx/test/std/utilities/format/format.functions/fill.unicode.pass.cpp +++ b/libcxx/test/std/utilities/format/format.functions/fill.unicode.pass.cpp @@ -75,30 +75,40 @@ void test() { // Invalid Unicode Scalar Values if constexpr (std::same_as) { - check_exception("The format specifier contains malformed Unicode characters", SV("{:\xed\xa0\x80^}"), 42); // U+D800 - check_exception("The format specifier contains malformed Unicode characters", SV("{:\xed\xa0\xbf^}"), 42); // U+DBFF - check_exception("The format specifier contains malformed Unicode characters", SV("{:\xed\xbf\x80^}"), 42); // U+DC00 - check_exception("The format specifier contains malformed Unicode characters", SV("{:\xed\xbf\xbf^}"), 42); // U+DFFF + check_exception("The format specifier contains malformed Unicode characters", + std::string_view{"{:\xed\xa0\x80^}"}, + 42); // U+D800 + check_exception("The format specifier contains malformed Unicode characters", + std::string_view{"{:\xed\xa0\xbf^}"}, + 42); // U+DBFF + check_exception("The format specifier contains malformed Unicode characters", + std::string_view{"{:\xed\xbf\x80^}"}, + 42); // U+DC00 + check_exception("The format specifier contains malformed Unicode characters", + std::string_view{"{:\xed\xbf\xbf^}"}, + 42); // U+DFFF - check_exception( - "The format specifier contains malformed Unicode characters", SV("{:\xf4\x90\x80\x80^}"), 42); // U+110000 - check_exception( - "The format specifier contains malformed Unicode characters", SV("{:\xf4\x90\xbf\xbf^}"), 42); // U+11FFFF + check_exception("The format specifier contains malformed Unicode characters", + std::string_view{"{:\xf4\x90\x80\x80^}"}, + 42); // U+110000 + check_exception("The format specifier contains malformed Unicode characters", + std::string_view{"{:\xf4\x90\xbf\xbf^}"}, + 42); // U+11FFFF check_exception("The format specifier contains malformed Unicode characters", - SV("{:\x80^}"), + std::string_view{"{:\x80^}"}, 42); // Trailing code unit with no leading one. check_exception("The format specifier contains malformed Unicode characters", - SV("{:\xc0^}"), + std::string_view{"{:\xc0^}"}, 42); // Missing trailing code unit. check_exception("The format specifier contains malformed Unicode characters", - SV("{:\xe0\x80^}"), + std::string_view{"{:\xe0\x80^}"}, 42); // Missing trailing code unit. check_exception("The format specifier contains malformed Unicode characters", - SV("{:\xf0\x80^}"), + std::string_view{"{:\xf0\x80^}"}, 42); // Missing two trailing code units. check_exception("The format specifier contains malformed Unicode characters", - SV("{:\xf0\x80\x80^}"), + std::string_view{"{:\xf0\x80\x80^}"}, 42); // Missing trailing code unit. #ifndef TEST_HAS_NO_WIDE_CHARACTERS -- cgit v1.2.3 From 50a7511138a42d2c7a69b68237ce88cc027b91bc Mon Sep 17 00:00:00 2001 From: Paschalis Mpeis Date: Thu, 19 Jun 2025 08:51:08 +0100 Subject: [BOLT][AArch64] Fix PREL Relocs on RHEL8 (#144505) --- bolt/test/AArch64/r_aarch64_prelxx.s | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/bolt/test/AArch64/r_aarch64_prelxx.s b/bolt/test/AArch64/r_aarch64_prelxx.s index 5cbe2c50b294..39f74301cedf 100644 --- a/bolt/test/AArch64/r_aarch64_prelxx.s +++ b/bolt/test/AArch64/r_aarch64_prelxx.s @@ -5,7 +5,7 @@ // REQUIRES: system-linux // RUN: %clang %cflags -nostartfiles -nostdlib %s -o %t.exe -mlittle-endian \ -// RUN: -Wl,-q -Wl,-z,max-page-size=4 +// RUN: -Wl,-q -Wl,-z,max-page-size=4 -Wl,--no-relax // RUN: llvm-readelf -Wa %t.exe | FileCheck %s -check-prefix=CHECKPREL // CHECKPREL: R_AARCH64_PREL16 {{.*}} .dummy + 0 @@ -36,9 +36,9 @@ .type _start, %function _start: adrp x0, datatable - add x0, x0, :lo12:datable + add x0, x0, :lo12:datatable mov x0, #0 - ret + ret .section .dummy, "a", @progbits dummy: -- cgit v1.2.3 From e73bff89ef8e3c8cdd8895cdc3d021fc4dcabd76 Mon Sep 17 00:00:00 2001 From: Pierre van Houtryve Date: Thu, 19 Jun 2025 09:56:07 +0200 Subject: [AMDGPU] New RegBankSelect: Handle all 32/64 bit pointer types for B32/B64 rule (#142560) The previous system explicitly enumerated the types. P0 was missing and thus we couldn't handle a select of P0s for example. Generalize the logic to simply check the width of the pointer for 32/64 bit pointers, this should handle all common address spaces --- .../Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp | 8 +- .../AMDGPU/GlobalISel/regbankselect-select.mir | 196 +++++++++++++++++++++ 2 files changed, 199 insertions(+), 5 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp index 89056b0271f1..5e21f44f7d54 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp @@ -199,13 +199,11 @@ UniformityLLTOpPredicateID LLTToId(LLT Ty) { UniformityLLTOpPredicateID LLTToBId(LLT Ty) { if (Ty == LLT::scalar(32) || Ty == LLT::fixed_vector(2, 16) || - Ty == LLT::pointer(3, 32) || Ty == LLT::pointer(5, 32) || - Ty == LLT::pointer(6, 32)) + (Ty.isPointer() && Ty.getSizeInBits() == 32)) return B32; if (Ty == LLT::scalar(64) || Ty == LLT::fixed_vector(2, 32) || - Ty == LLT::fixed_vector(4, 16) || Ty == LLT::pointer(1, 64) || - Ty == LLT::pointer(4, 64) || - (Ty.isPointer() && Ty.getAddressSpace() > AMDGPUAS::MAX_AMDGPU_ADDRESS)) + Ty == LLT::fixed_vector(4, 16) || + (Ty.isPointer() && Ty.getSizeInBits() == 64)) return B64; if (Ty == LLT::fixed_vector(3, 32)) return B96; diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-select.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-select.mir index 762f7b950036..2fd2e03471f1 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-select.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-select.mir @@ -896,6 +896,31 @@ body: | %5:_(<4 x s16>) = G_SELECT %4, %2, %3 ... +--- +name: select_p0_scc_ss +legalized: true +body: | + bb.0: + liveins: $sgpr0, $sgpr1, $sgpr2_sgpr3, $sgpr4_sgpr5 + ; CHECK-LABEL: name: select_p0_scc_ss + ; CHECK: liveins: $sgpr0, $sgpr1, $sgpr2_sgpr3, $sgpr4_sgpr5 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(p0) = COPY $sgpr2_sgpr3 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr(p0) = COPY $sgpr4_sgpr5 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(ne), [[COPY]](s32), [[COPY1]] + ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[AND:%[0-9]+]]:sgpr(s32) = G_AND [[ICMP]], [[C]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:sgpr(p0) = G_SELECT [[AND]](s32), [[COPY2]], [[COPY3]] + %0:_(s32) = COPY $sgpr0 + %1:_(s32) = COPY $sgpr1 + %2:_(p0) = COPY $sgpr2_sgpr3 + %3:_(p0) = COPY $sgpr4_sgpr5 + %4:_(s1) = G_ICMP intpred(ne), %0, %1 + %5:_(p0) = G_SELECT %4, %2, %3 +... + --- name: select_p1_scc_ss legalized: true @@ -946,6 +971,36 @@ body: | %5:_(p999) = G_SELECT %4, %2, %3 ... +--- +name: select_p0_scc_sv +legalized: true +body: | + bb.0: + liveins: $sgpr0, $sgpr1, $sgpr2_sgpr3, $vgpr0_vgpr1 + ; CHECK-LABEL: name: select_p0_scc_sv + ; CHECK: liveins: $sgpr0, $sgpr1, $sgpr2_sgpr3, $vgpr0_vgpr1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(p0) = COPY $sgpr2_sgpr3 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(p0) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(ne), [[COPY]](s32), [[COPY1]] + ; CHECK-NEXT: [[AMDGPU_COPY_VCC_SCC:%[0-9]+]]:vcc(s1) = G_AMDGPU_COPY_VCC_SCC [[ICMP]](s32) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(p0) = COPY [[COPY2]](p0) + ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY4]](p0) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY3]](p0) + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[AMDGPU_COPY_VCC_SCC]](s1), [[UV]], [[UV2]] + ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:vgpr(s32) = G_SELECT [[AMDGPU_COPY_VCC_SCC]](s1), [[UV1]], [[UV3]] + ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(p0) = G_MERGE_VALUES [[SELECT]](s32), [[SELECT1]](s32) + %0:_(s32) = COPY $sgpr0 + %1:_(s32) = COPY $sgpr1 + %2:_(p0) = COPY $sgpr2_sgpr3 + %3:_(p0) = COPY $vgpr0_vgpr1 + %4:_(s1) = G_ICMP intpred(ne), %0, %1 + %5:_(p0) = G_SELECT %4, %2, %3 + +... + --- name: select_p1_scc_sv legalized: true @@ -976,6 +1031,35 @@ body: | ... +--- +name: select_p0_scc_vs +legalized: true +body: | + bb.0: + liveins: $sgpr0, $sgpr1, $sgpr2_sgpr3, $vgpr0_vgpr1 + ; CHECK-LABEL: name: select_p0_scc_vs + ; CHECK: liveins: $sgpr0, $sgpr1, $sgpr2_sgpr3, $vgpr0_vgpr1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(p0) = COPY $sgpr2_sgpr3 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(p0) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(ne), [[COPY]](s32), [[COPY1]] + ; CHECK-NEXT: [[AMDGPU_COPY_VCC_SCC:%[0-9]+]]:vcc(s1) = G_AMDGPU_COPY_VCC_SCC [[ICMP]](s32) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(p0) = COPY [[COPY2]](p0) + ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY3]](p0) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY4]](p0) + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[AMDGPU_COPY_VCC_SCC]](s1), [[UV]], [[UV2]] + ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:vgpr(s32) = G_SELECT [[AMDGPU_COPY_VCC_SCC]](s1), [[UV1]], [[UV3]] + ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(p0) = G_MERGE_VALUES [[SELECT]](s32), [[SELECT1]](s32) + %0:_(s32) = COPY $sgpr0 + %1:_(s32) = COPY $sgpr1 + %2:_(p0) = COPY $sgpr2_sgpr3 + %3:_(p0) = COPY $vgpr0_vgpr1 + %4:_(s1) = G_ICMP intpred(ne), %0, %1 + %5:_(p0) = G_SELECT %4, %3, %2 +... + --- name: select_p1_scc_vs legalized: true @@ -1033,6 +1117,35 @@ body: | %5:_(p1) = G_SELECT %4, %2, %3 ... +--- +name: select_p0_vcc_ss +legalized: true +body: | + bb.0: + liveins: $sgpr0_sgpr1, $sgpr2_sgpr3, $vgpr0, $vgpr1 + ; CHECK-LABEL: name: select_p0_vcc_ss + ; CHECK: liveins: $sgpr0_sgpr1, $sgpr2_sgpr3, $vgpr0, $vgpr1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(p0) = COPY $sgpr0_sgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(p0) = COPY $sgpr2_sgpr3 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(ne), [[COPY2]](s32), [[COPY3]] + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(p0) = COPY [[COPY]](p0) + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr(p0) = COPY [[COPY1]](p0) + ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY4]](p0) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY5]](p0) + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP]](s1), [[UV]], [[UV2]] + ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP]](s1), [[UV1]], [[UV3]] + ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(p0) = G_MERGE_VALUES [[SELECT]](s32), [[SELECT1]](s32) + %0:_(p0) = COPY $sgpr0_sgpr1 + %1:_(p0) = COPY $sgpr2_sgpr3 + %2:_(s32) = COPY $vgpr0 + %3:_(s32) = COPY $vgpr1 + %4:_(s1) = G_ICMP intpred(ne), %2, %3 + %5:_(p0) = G_SELECT %4, %0, %1 +... + --- name: select_p1_vcc_ss legalized: true @@ -1062,6 +1175,34 @@ body: | %5:_(p1) = G_SELECT %4, %0, %1 ... +--- +name: select_p0_vcc_sv +legalized: true +body: | + bb.0: + liveins: $sgpr0_sgpr1, $vgpr0, $vgpr1, $vgpr2_vgpr3 + ; CHECK-LABEL: name: select_p0_vcc_sv + ; CHECK: liveins: $sgpr0_sgpr1, $vgpr0, $vgpr1, $vgpr2_vgpr3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(p0) = COPY $sgpr0_sgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(p0) = COPY $vgpr2_vgpr3 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(ne), [[COPY1]](s32), [[COPY2]] + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(p0) = COPY [[COPY]](p0) + ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY4]](p0) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY3]](p0) + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP]](s1), [[UV]], [[UV2]] + ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP]](s1), [[UV1]], [[UV3]] + ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(p0) = G_MERGE_VALUES [[SELECT]](s32), [[SELECT1]](s32) + %0:_(p0) = COPY $sgpr0_sgpr1 + %1:_(s32) = COPY $vgpr0 + %2:_(s32) = COPY $vgpr1 + %3:_(p0) = COPY $vgpr2_vgpr3 + %4:_(s1) = G_ICMP intpred(ne), %1, %2 + %5:_(p0) = G_SELECT %4, %0, %3 +... + --- name: select_p1_vcc_sv legalized: true @@ -1090,6 +1231,34 @@ body: | %5:_(p1) = G_SELECT %4, %0, %3 ... +--- +name: select_p0_vcc_vs +legalized: true +body: | + bb.0: + liveins: $sgpr0_sgpr1, $vgpr0, $vgpr1, $vgpr2_vgpr3 + ; CHECK-LABEL: name: select_p0_vcc_vs + ; CHECK: liveins: $sgpr0_sgpr1, $vgpr0, $vgpr1, $vgpr2_vgpr3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(p0) = COPY $sgpr0_sgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(p0) = COPY $vgpr2_vgpr3 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(ne), [[COPY1]](s32), [[COPY2]] + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(p0) = COPY [[COPY]](p0) + ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY3]](p0) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY4]](p0) + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP]](s1), [[UV]], [[UV2]] + ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP]](s1), [[UV1]], [[UV3]] + ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(p0) = G_MERGE_VALUES [[SELECT]](s32), [[SELECT1]](s32) + %0:_(p0) = COPY $sgpr0_sgpr1 + %1:_(s32) = COPY $vgpr0 + %2:_(s32) = COPY $vgpr1 + %3:_(p0) = COPY $vgpr2_vgpr3 + %4:_(s1) = G_ICMP intpred(ne), %1, %2 + %5:_(p0) = G_SELECT %4, %3, %0 +... + --- name: select_p1_vcc_vs legalized: true @@ -1118,6 +1287,33 @@ body: | %5:_(p1) = G_SELECT %4, %3, %0 ... +--- +name: select_p0_vcc_vv +legalized: true +body: | + bb.0: + liveins: $vgpr0, $vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 + ; CHECK-LABEL: name: select_p0_vcc_vv + ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(p0) = COPY $vgpr2_vgpr3 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(p0) = COPY $vgpr4_vgpr5 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(ne), [[COPY]](s32), [[COPY1]] + ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY2]](p0) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY3]](p0) + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP]](s1), [[UV]], [[UV2]] + ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP]](s1), [[UV1]], [[UV3]] + ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(p0) = G_MERGE_VALUES [[SELECT]](s32), [[SELECT1]](s32) + %0:_(s32) = COPY $vgpr0 + %1:_(s32) = COPY $vgpr1 + %2:_(p0) = COPY $vgpr2_vgpr3 + %3:_(p0) = COPY $vgpr4_vgpr5 + %4:_(s1) = G_ICMP intpred(ne), %0, %1 + %5:_(p0) = G_SELECT %4, %2, %3 +... + --- name: select_p1_vcc_vv legalized: true -- cgit v1.2.3 From db8e6fc64534e986f5bf96cceaa76cc5007ac1c7 Mon Sep 17 00:00:00 2001 From: Pierre van Houtryve Date: Thu, 19 Jun 2025 09:58:57 +0200 Subject: [AMDGPU] New RegBanKSelect: Add S128 types (#142601) --- llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp | 9 +++++++++ llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp | 6 ++++++ llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.h | 5 +++++ 3 files changed, 20 insertions(+) diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp index 7ff822c6f658..89af98263659 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp @@ -556,6 +556,9 @@ LLT RegBankLegalizeHelper::getTyFromID(RegBankLLTMappingApplyID ID) { case Sgpr64: case Vgpr64: return LLT::scalar(64); + case Sgpr128: + case Vgpr128: + return LLT::scalar(128); case VgprP0: return LLT::pointer(0, 64); case SgprP1: @@ -646,6 +649,7 @@ RegBankLegalizeHelper::getRegBankFromID(RegBankLLTMappingApplyID ID) { case Sgpr16: case Sgpr32: case Sgpr64: + case Sgpr128: case SgprP1: case SgprP3: case SgprP4: @@ -678,6 +682,7 @@ RegBankLegalizeHelper::getRegBankFromID(RegBankLLTMappingApplyID ID) { case Vgpr16: case Vgpr32: case Vgpr64: + case Vgpr128: case VgprP0: case VgprP1: case VgprP3: @@ -718,6 +723,7 @@ void RegBankLegalizeHelper::applyMappingDst( case Sgpr16: case Sgpr32: case Sgpr64: + case Sgpr128: case SgprP1: case SgprP3: case SgprP4: @@ -728,6 +734,7 @@ void RegBankLegalizeHelper::applyMappingDst( case Vgpr16: case Vgpr32: case Vgpr64: + case Vgpr128: case VgprP0: case VgprP1: case VgprP3: @@ -839,6 +846,7 @@ void RegBankLegalizeHelper::applyMappingSrc( case Sgpr16: case Sgpr32: case Sgpr64: + case Sgpr128: case SgprP1: case SgprP3: case SgprP4: @@ -865,6 +873,7 @@ void RegBankLegalizeHelper::applyMappingSrc( case Vgpr16: case Vgpr32: case Vgpr64: + case Vgpr128: case VgprP0: case VgprP1: case VgprP3: diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp index 5e21f44f7d54..672fc5b79abc 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp @@ -50,6 +50,8 @@ bool matchUniformityAndLLT(Register Reg, UniformityLLTOpPredicateID UniID, return MRI.getType(Reg) == LLT::scalar(32); case S64: return MRI.getType(Reg) == LLT::scalar(64); + case S128: + return MRI.getType(Reg) == LLT::scalar(128); case P0: return MRI.getType(Reg) == LLT::pointer(0, 64); case P1: @@ -84,6 +86,8 @@ bool matchUniformityAndLLT(Register Reg, UniformityLLTOpPredicateID UniID, return MRI.getType(Reg) == LLT::scalar(32) && MUI.isUniform(Reg); case UniS64: return MRI.getType(Reg) == LLT::scalar(64) && MUI.isUniform(Reg); + case UniS128: + return MRI.getType(Reg) == LLT::scalar(128) && MUI.isUniform(Reg); case UniP0: return MRI.getType(Reg) == LLT::pointer(0, 64) && MUI.isUniform(Reg); case UniP1: @@ -116,6 +120,8 @@ bool matchUniformityAndLLT(Register Reg, UniformityLLTOpPredicateID UniID, return MRI.getType(Reg) == LLT::scalar(32) && MUI.isDivergent(Reg); case DivS64: return MRI.getType(Reg) == LLT::scalar(64) && MUI.isDivergent(Reg); + case DivS128: + return MRI.getType(Reg) == LLT::scalar(128) && MUI.isDivergent(Reg); case DivP0: return MRI.getType(Reg) == LLT::pointer(0, 64) && MUI.isDivergent(Reg); case DivP1: diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.h b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.h index bddfb8dd1913..30b900d871f3 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.h +++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.h @@ -39,16 +39,19 @@ enum UniformityLLTOpPredicateID { S16, S32, S64, + S128, UniS1, UniS16, UniS32, UniS64, + UniS128, DivS1, DivS16, DivS32, DivS64, + DivS128, // pointers P0, @@ -117,6 +120,7 @@ enum RegBankLLTMappingApplyID { Sgpr16, Sgpr32, Sgpr64, + Sgpr128, SgprP1, SgprP3, SgprP4, @@ -135,6 +139,7 @@ enum RegBankLLTMappingApplyID { Vgpr16, Vgpr32, Vgpr64, + Vgpr128, VgprP0, VgprP1, VgprP3, -- cgit v1.2.3 From 26d4b3cb4ca2f882384d940f3dad28f8d79451eb Mon Sep 17 00:00:00 2001 From: Sudharsan Veeravalli Date: Thu, 19 Jun 2025 13:29:57 +0530 Subject: [RISCV] Don't prefer QC_EXT for SEXT_INREG patterns when Zbb is enabled (#144837) `Zbb` has the `sext.b` and `sext.h` instructions that are compressible. --- llvm/lib/Target/RISCV/RISCVInstrInfoXqci.td | 8 +- llvm/test/CodeGen/RISCV/xqcibm-extract.ll | 141 ++++++++++++++++++++++++++++ 2 files changed, 147 insertions(+), 2 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoXqci.td b/llvm/lib/Target/RISCV/RISCVInstrInfoXqci.td index 2856df47f704..3d0cad7884fd 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoXqci.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoXqci.td @@ -1439,8 +1439,6 @@ def : SelectQCbi; -def : Pat<(sext_inreg (i32 GPR:$rs1), i8), (QC_EXT GPR:$rs1, 8, 0)>; def : Pat<(sext_inreg (i32 GPR:$rs1), i1), (QC_EXT GPR:$rs1, 1, 0)>; // Prefer qc.extu to andi for the following cases since the former can be compressed @@ -1452,6 +1450,12 @@ def : Pat<(i32 (and GPRNoX0:$rs, 1023)), (QC_EXTU GPRNoX0:$rs, 10, 0)>; def : Pat<(i32 (and GPRNoX0:$rs, 2047)), (QC_EXTU GPRNoX0:$rs, 11, 0)>; } // Predicates = [HasVendorXqcibm, IsRV32] +// If Zbb is enabled sext.b/h is preferred since they are compressible +let Predicates = [HasVendorXqcibm, NoStdExtZbb, IsRV32] in { +def : Pat<(sext_inreg (i32 GPR:$rs1), i16), (QC_EXT GPR:$rs1, 16, 0)>; +def : Pat<(sext_inreg (i32 GPR:$rs1), i8), (QC_EXT GPR:$rs1, 8, 0)>; +} // Predicates = [HasVendorXqcibm, NoStdExtZbb, IsRV32] + let Predicates = [HasVendorXqcibm, HasStdExtZbb, IsRV32] in { def: Pat<(i32 (cttz (not (i32 GPR:$rs1)))), (QC_CTO GPR:$rs1)>; def: Pat<(i32 (ctlz (not (i32 GPR:$rs1)))), (QC_CLO GPR:$rs1)>; diff --git a/llvm/test/CodeGen/RISCV/xqcibm-extract.ll b/llvm/test/CodeGen/RISCV/xqcibm-extract.ll index edf6e9a2d501..481bfdd66643 100644 --- a/llvm/test/CodeGen/RISCV/xqcibm-extract.ll +++ b/llvm/test/CodeGen/RISCV/xqcibm-extract.ll @@ -3,6 +3,8 @@ ; RUN: | FileCheck %s -check-prefixes=RV32I ; RUN: llc -mtriple=riscv32 -mattr=+experimental-xqcibm -verify-machineinstrs < %s \ ; RUN: | FileCheck %s -check-prefixes=RV32XQCIBM +; RUN: llc -mtriple=riscv32 -mattr=+experimental-xqcibm,+zbb -verify-machineinstrs < %s \ +; RUN: | FileCheck %s -check-prefixes=RV32XQCIBMZBB define i32 @sexti1_i32(i1 %a) nounwind { ; RV32I-LABEL: sexti1_i32: @@ -15,6 +17,11 @@ define i32 @sexti1_i32(i1 %a) nounwind { ; RV32XQCIBM: # %bb.0: ; RV32XQCIBM-NEXT: qc.ext a0, a0, 1, 0 ; RV32XQCIBM-NEXT: ret +; +; RV32XQCIBMZBB-LABEL: sexti1_i32: +; RV32XQCIBMZBB: # %bb.0: +; RV32XQCIBMZBB-NEXT: qc.ext a0, a0, 1, 0 +; RV32XQCIBMZBB-NEXT: ret %sext = sext i1 %a to i32 ret i32 %sext } @@ -30,6 +37,11 @@ define i32 @sexti1_i32_2(i32 %a) { ; RV32XQCIBM: # %bb.0: ; RV32XQCIBM-NEXT: qc.ext a0, a0, 1, 0 ; RV32XQCIBM-NEXT: ret +; +; RV32XQCIBMZBB-LABEL: sexti1_i32_2: +; RV32XQCIBMZBB: # %bb.0: +; RV32XQCIBMZBB-NEXT: qc.ext a0, a0, 1, 0 +; RV32XQCIBMZBB-NEXT: ret %shl = shl i32 %a, 31 %shr = ashr exact i32 %shl, 31 ret i32 %shr @@ -47,6 +59,11 @@ define i32 @sexti8_i32(i8 %a) nounwind { ; RV32XQCIBM: # %bb.0: ; RV32XQCIBM-NEXT: qc.ext a0, a0, 8, 0 ; RV32XQCIBM-NEXT: ret +; +; RV32XQCIBMZBB-LABEL: sexti8_i32: +; RV32XQCIBMZBB: # %bb.0: +; RV32XQCIBMZBB-NEXT: sext.b a0, a0 +; RV32XQCIBMZBB-NEXT: ret %sext = sext i8 %a to i32 ret i32 %sext } @@ -62,6 +79,11 @@ define i32 @sexti8_i32_2(i32 %a) { ; RV32XQCIBM: # %bb.0: ; RV32XQCIBM-NEXT: qc.ext a0, a0, 8, 0 ; RV32XQCIBM-NEXT: ret +; +; RV32XQCIBMZBB-LABEL: sexti8_i32_2: +; RV32XQCIBMZBB: # %bb.0: +; RV32XQCIBMZBB-NEXT: sext.b a0, a0 +; RV32XQCIBMZBB-NEXT: ret %shl = shl i32 %a, 24 %shr = ashr exact i32 %shl, 24 ret i32 %shr @@ -78,6 +100,11 @@ define i32 @sexti16_i32(i16 %a) nounwind { ; RV32XQCIBM: # %bb.0: ; RV32XQCIBM-NEXT: qc.ext a0, a0, 16, 0 ; RV32XQCIBM-NEXT: ret +; +; RV32XQCIBMZBB-LABEL: sexti16_i32: +; RV32XQCIBMZBB: # %bb.0: +; RV32XQCIBMZBB-NEXT: sext.h a0, a0 +; RV32XQCIBMZBB-NEXT: ret %sext = sext i16 %a to i32 ret i32 %sext } @@ -93,6 +120,11 @@ define i32 @sexti16_i32_2(i32 %a) { ; RV32XQCIBM: # %bb.0: ; RV32XQCIBM-NEXT: qc.ext a0, a0, 16, 0 ; RV32XQCIBM-NEXT: ret +; +; RV32XQCIBMZBB-LABEL: sexti16_i32_2: +; RV32XQCIBMZBB: # %bb.0: +; RV32XQCIBMZBB-NEXT: sext.h a0, a0 +; RV32XQCIBMZBB-NEXT: ret %shl = shl i32 %a, 16 %shr = ashr exact i32 %shl, 16 ret i32 %shr @@ -111,6 +143,12 @@ define i64 @sexti1_i64(i64 %a) { ; RV32XQCIBM-NEXT: qc.ext a0, a0, 1, 0 ; RV32XQCIBM-NEXT: mv a1, a0 ; RV32XQCIBM-NEXT: ret +; +; RV32XQCIBMZBB-LABEL: sexti1_i64: +; RV32XQCIBMZBB: # %bb.0: +; RV32XQCIBMZBB-NEXT: qc.ext a0, a0, 1, 0 +; RV32XQCIBMZBB-NEXT: mv a1, a0 +; RV32XQCIBMZBB-NEXT: ret %shl = shl i64 %a, 63 %shr = ashr exact i64 %shl, 63 ret i64 %shr @@ -129,6 +167,12 @@ define i64 @sexti1_i64_2(i1 %a) { ; RV32XQCIBM-NEXT: qc.ext a0, a0, 1, 0 ; RV32XQCIBM-NEXT: mv a1, a0 ; RV32XQCIBM-NEXT: ret +; +; RV32XQCIBMZBB-LABEL: sexti1_i64_2: +; RV32XQCIBMZBB: # %bb.0: +; RV32XQCIBMZBB-NEXT: qc.ext a0, a0, 1, 0 +; RV32XQCIBMZBB-NEXT: mv a1, a0 +; RV32XQCIBMZBB-NEXT: ret %1 = sext i1 %a to i64 ret i64 %1 } @@ -146,6 +190,12 @@ define i64 @sexti8_i64(i64 %a) { ; RV32XQCIBM-NEXT: qc.ext a0, a0, 8, 0 ; RV32XQCIBM-NEXT: srai a1, a0, 31 ; RV32XQCIBM-NEXT: ret +; +; RV32XQCIBMZBB-LABEL: sexti8_i64: +; RV32XQCIBMZBB: # %bb.0: +; RV32XQCIBMZBB-NEXT: sext.b a0, a0 +; RV32XQCIBMZBB-NEXT: srai a1, a0, 31 +; RV32XQCIBMZBB-NEXT: ret %shl = shl i64 %a, 56 %shr = ashr exact i64 %shl, 56 ret i64 %shr @@ -164,6 +214,12 @@ define i64 @sexti8_i64_2(i8 %a) { ; RV32XQCIBM-NEXT: qc.ext a0, a0, 8, 0 ; RV32XQCIBM-NEXT: srai a1, a0, 31 ; RV32XQCIBM-NEXT: ret +; +; RV32XQCIBMZBB-LABEL: sexti8_i64_2: +; RV32XQCIBMZBB: # %bb.0: +; RV32XQCIBMZBB-NEXT: sext.b a0, a0 +; RV32XQCIBMZBB-NEXT: srai a1, a0, 31 +; RV32XQCIBMZBB-NEXT: ret %1 = sext i8 %a to i64 ret i64 %1 } @@ -181,6 +237,12 @@ define i64 @sexti16_i64(i64 %a) { ; RV32XQCIBM-NEXT: qc.ext a0, a0, 16, 0 ; RV32XQCIBM-NEXT: srai a1, a0, 31 ; RV32XQCIBM-NEXT: ret +; +; RV32XQCIBMZBB-LABEL: sexti16_i64: +; RV32XQCIBMZBB: # %bb.0: +; RV32XQCIBMZBB-NEXT: sext.h a0, a0 +; RV32XQCIBMZBB-NEXT: srai a1, a0, 31 +; RV32XQCIBMZBB-NEXT: ret %shl = shl i64 %a, 48 %shr = ashr exact i64 %shl, 48 ret i64 %shr @@ -199,6 +261,12 @@ define i64 @sexti16_i64_2(i16 %a) { ; RV32XQCIBM-NEXT: qc.ext a0, a0, 16, 0 ; RV32XQCIBM-NEXT: srai a1, a0, 31 ; RV32XQCIBM-NEXT: ret +; +; RV32XQCIBMZBB-LABEL: sexti16_i64_2: +; RV32XQCIBMZBB: # %bb.0: +; RV32XQCIBMZBB-NEXT: sext.h a0, a0 +; RV32XQCIBMZBB-NEXT: srai a1, a0, 31 +; RV32XQCIBMZBB-NEXT: ret %1 = sext i16 %a to i64 ret i64 %1 } @@ -213,6 +281,11 @@ define i64 @sexti32_i64(i64 %a) { ; RV32XQCIBM: # %bb.0: ; RV32XQCIBM-NEXT: srai a1, a0, 31 ; RV32XQCIBM-NEXT: ret +; +; RV32XQCIBMZBB-LABEL: sexti32_i64: +; RV32XQCIBMZBB: # %bb.0: +; RV32XQCIBMZBB-NEXT: srai a1, a0, 31 +; RV32XQCIBMZBB-NEXT: ret %shl = shl i64 %a, 32 %shr = ashr exact i64 %shl, 32 ret i64 %shr @@ -228,6 +301,11 @@ define i64 @sexti32_i64_2(i32 %a) { ; RV32XQCIBM: # %bb.0: ; RV32XQCIBM-NEXT: srai a1, a0, 31 ; RV32XQCIBM-NEXT: ret +; +; RV32XQCIBMZBB-LABEL: sexti32_i64_2: +; RV32XQCIBMZBB: # %bb.0: +; RV32XQCIBMZBB-NEXT: srai a1, a0, 31 +; RV32XQCIBMZBB-NEXT: ret %1 = sext i32 %a to i64 ret i64 %1 } @@ -243,6 +321,11 @@ define i32 @extu_from_and_i32(i32 %x) { ; RV32XQCIBM: # %bb.0: ; RV32XQCIBM-NEXT: qc.extu a0, a0, 12, 0 ; RV32XQCIBM-NEXT: ret +; +; RV32XQCIBMZBB-LABEL: extu_from_and_i32: +; RV32XQCIBMZBB: # %bb.0: +; RV32XQCIBMZBB-NEXT: qc.extu a0, a0, 12, 0 +; RV32XQCIBMZBB-NEXT: ret %a = and i32 %x, 4095 ret i32 %a } @@ -257,6 +340,11 @@ define i32 @no_extu_from_and_i32(i32 %x) { ; RV32XQCIBM: # %bb.0: ; RV32XQCIBM-NEXT: andi a0, a0, 31 ; RV32XQCIBM-NEXT: ret +; +; RV32XQCIBMZBB-LABEL: no_extu_from_and_i32: +; RV32XQCIBMZBB: # %bb.0: +; RV32XQCIBMZBB-NEXT: andi a0, a0, 31 +; RV32XQCIBMZBB-NEXT: ret %a = and i32 %x, 31 ret i32 %a } @@ -271,6 +359,11 @@ define i32 @extu_from_and_i32_simm12_lb(i32 %x) { ; RV32XQCIBM: # %bb.0: ; RV32XQCIBM-NEXT: qc.extu a0, a0, 6, 0 ; RV32XQCIBM-NEXT: ret +; +; RV32XQCIBMZBB-LABEL: extu_from_and_i32_simm12_lb: +; RV32XQCIBMZBB: # %bb.0: +; RV32XQCIBMZBB-NEXT: qc.extu a0, a0, 6, 0 +; RV32XQCIBMZBB-NEXT: ret %a = and i32 %x, 63 ret i32 %a } @@ -285,6 +378,11 @@ define i32 @extu_from_and_i32_simm12_ub(i32 %x) { ; RV32XQCIBM: # %bb.0: ; RV32XQCIBM-NEXT: qc.extu a0, a0, 11, 0 ; RV32XQCIBM-NEXT: ret +; +; RV32XQCIBMZBB-LABEL: extu_from_and_i32_simm12_ub: +; RV32XQCIBMZBB: # %bb.0: +; RV32XQCIBMZBB-NEXT: qc.extu a0, a0, 11, 0 +; RV32XQCIBMZBB-NEXT: ret %a = and i32 %x, 2047 ret i32 %a } @@ -302,6 +400,12 @@ define i64 @extu_from_and_i64(i64 %x) { ; RV32XQCIBM-NEXT: qc.extu a0, a0, 12, 0 ; RV32XQCIBM-NEXT: li a1, 0 ; RV32XQCIBM-NEXT: ret +; +; RV32XQCIBMZBB-LABEL: extu_from_and_i64: +; RV32XQCIBMZBB: # %bb.0: +; RV32XQCIBMZBB-NEXT: qc.extu a0, a0, 12, 0 +; RV32XQCIBMZBB-NEXT: li a1, 0 +; RV32XQCIBMZBB-NEXT: ret %a = and i64 %x, 4095 ret i64 %a } @@ -317,6 +421,11 @@ define i32 @extu_from_and_lshr_i32(i32 %x) { ; RV32XQCIBM: # %bb.0: ; RV32XQCIBM-NEXT: qc.extu a0, a0, 3, 23 ; RV32XQCIBM-NEXT: ret +; +; RV32XQCIBMZBB-LABEL: extu_from_and_lshr_i32: +; RV32XQCIBMZBB: # %bb.0: +; RV32XQCIBMZBB-NEXT: qc.extu a0, a0, 3, 23 +; RV32XQCIBMZBB-NEXT: ret %shifted = lshr i32 %x, 23 %masked = and i32 %shifted, 7 ret i32 %masked @@ -335,6 +444,12 @@ define i64 @extu_from_and_lshr_i64(i64 %x) { ; RV32XQCIBM-NEXT: qc.extu a0, a1, 12, 14 ; RV32XQCIBM-NEXT: li a1, 0 ; RV32XQCIBM-NEXT: ret +; +; RV32XQCIBMZBB-LABEL: extu_from_and_lshr_i64: +; RV32XQCIBMZBB: # %bb.0: +; RV32XQCIBMZBB-NEXT: qc.extu a0, a1, 12, 14 +; RV32XQCIBMZBB-NEXT: li a1, 0 +; RV32XQCIBMZBB-NEXT: ret %shifted = lshr i64 %x, 46 %masked = and i64 %shifted, 4095 ret i64 %masked @@ -351,6 +466,11 @@ define i32 @extu_from_lshr_and_i32(i32 %x) { ; RV32XQCIBM: # %bb.0: ; RV32XQCIBM-NEXT: qc.extu a0, a0, 12, 12 ; RV32XQCIBM-NEXT: ret +; +; RV32XQCIBMZBB-LABEL: extu_from_lshr_and_i32: +; RV32XQCIBMZBB: # %bb.0: +; RV32XQCIBMZBB-NEXT: qc.extu a0, a0, 12, 12 +; RV32XQCIBMZBB-NEXT: ret %masked = and i32 %x, 16773120 %shifted = lshr i32 %masked, 12 ret i32 %shifted @@ -369,6 +489,12 @@ define i64 @extu_from_lshr_and_i64(i64 %x) { ; RV32XQCIBM-NEXT: qc.extu a0, a0, 12, 12 ; RV32XQCIBM-NEXT: li a1, 0 ; RV32XQCIBM-NEXT: ret +; +; RV32XQCIBMZBB-LABEL: extu_from_lshr_and_i64: +; RV32XQCIBMZBB: # %bb.0: +; RV32XQCIBMZBB-NEXT: qc.extu a0, a0, 12, 12 +; RV32XQCIBMZBB-NEXT: li a1, 0 +; RV32XQCIBMZBB-NEXT: ret %masked = and i64 %x, 16773120 %shifted = lshr i64 %masked, 12 ret i64 %shifted @@ -385,6 +511,11 @@ define i32 @ext_from_ashr_shl_i32(i32 %x) { ; RV32XQCIBM: # %bb.0: ; RV32XQCIBM-NEXT: qc.ext a0, a0, 8, 16 ; RV32XQCIBM-NEXT: ret +; +; RV32XQCIBMZBB-LABEL: ext_from_ashr_shl_i32: +; RV32XQCIBMZBB: # %bb.0: +; RV32XQCIBMZBB-NEXT: qc.ext a0, a0, 8, 16 +; RV32XQCIBMZBB-NEXT: ret %shl = shl i32 %x, 8 %ashr = ashr i32 %shl, 24 ret i32 %ashr @@ -401,6 +532,11 @@ define i32 @ext_from_ashr_sexti8_i32(i8 %x) { ; RV32XQCIBM: # %bb.0: ; RV32XQCIBM-NEXT: qc.ext a0, a0, 3, 5 ; RV32XQCIBM-NEXT: ret +; +; RV32XQCIBMZBB-LABEL: ext_from_ashr_sexti8_i32: +; RV32XQCIBMZBB: # %bb.0: +; RV32XQCIBMZBB-NEXT: qc.ext a0, a0, 3, 5 +; RV32XQCIBMZBB-NEXT: ret %sext = sext i8 %x to i32 %ashr = ashr i32 %sext, 5 ret i32 %ashr @@ -417,6 +553,11 @@ define i32 @ext_from_ashr_sexti16_i32(i16 %x) { ; RV32XQCIBM: # %bb.0: ; RV32XQCIBM-NEXT: qc.ext a0, a0, 1, 15 ; RV32XQCIBM-NEXT: ret +; +; RV32XQCIBMZBB-LABEL: ext_from_ashr_sexti16_i32: +; RV32XQCIBMZBB: # %bb.0: +; RV32XQCIBMZBB-NEXT: qc.ext a0, a0, 1, 15 +; RV32XQCIBMZBB-NEXT: ret %sext = sext i16 %x to i32 %ashr = ashr i32 %sext, 24 ret i32 %ashr -- cgit v1.2.3 From 7ceea22a7adad5d21328839facbc6a6d0151e056 Mon Sep 17 00:00:00 2001 From: Pierre van Houtryve Date: Thu, 19 Jun 2025 10:06:38 +0200 Subject: [AMDGPU] New RegBankSelect: Add Ptr32/Ptr64/Ptr128 (#142602) There's quite a few opcodes that do not care about the exact AS of the pointer, just its size. Adding generic types for these will help reduce duplication in the rule definitions. I also moved the usual B types to use the new `isAnyPtr` helper I added to make sure they're supersets of the `Ptr` cases --- .../Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp | 42 +++++++++++++++++----- .../Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp | 29 ++++++++++++--- .../lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.h | 19 ++++++++++ 3 files changed, 77 insertions(+), 13 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp index 89af98263659..b2ddc6e88966 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp @@ -595,17 +595,23 @@ LLT RegBankLegalizeHelper::getBTyFromID(RegBankLLTMappingApplyID ID, LLT Ty) { case VgprB32: case UniInVgprB32: if (Ty == LLT::scalar(32) || Ty == LLT::fixed_vector(2, 16) || - Ty == LLT::pointer(3, 32) || Ty == LLT::pointer(5, 32) || - Ty == LLT::pointer(6, 32)) + isAnyPtr(Ty, 32)) return Ty; return LLT(); + case SgprPtr32: + case VgprPtr32: + return isAnyPtr(Ty, 32) ? Ty : LLT(); + case SgprPtr64: + case VgprPtr64: + return isAnyPtr(Ty, 64) ? Ty : LLT(); + case SgprPtr128: + case VgprPtr128: + return isAnyPtr(Ty, 128) ? Ty : LLT(); case SgprB64: case VgprB64: case UniInVgprB64: if (Ty == LLT::scalar(64) || Ty == LLT::fixed_vector(2, 32) || - Ty == LLT::fixed_vector(4, 16) || Ty == LLT::pointer(0, 64) || - Ty == LLT::pointer(1, 64) || Ty == LLT::pointer(4, 64) || - (Ty.isPointer() && Ty.getAddressSpace() > AMDGPUAS::MAX_AMDGPU_ADDRESS)) + Ty == LLT::fixed_vector(4, 16) || isAnyPtr(Ty, 64)) return Ty; return LLT(); case SgprB96: @@ -619,7 +625,7 @@ LLT RegBankLegalizeHelper::getBTyFromID(RegBankLLTMappingApplyID ID, LLT Ty) { case VgprB128: case UniInVgprB128: if (Ty == LLT::scalar(128) || Ty == LLT::fixed_vector(4, 32) || - Ty == LLT::fixed_vector(2, 64)) + Ty == LLT::fixed_vector(2, 64) || isAnyPtr(Ty, 128)) return Ty; return LLT(); case SgprB256: @@ -654,6 +660,9 @@ RegBankLegalizeHelper::getRegBankFromID(RegBankLLTMappingApplyID ID) { case SgprP3: case SgprP4: case SgprP5: + case SgprPtr32: + case SgprPtr64: + case SgprPtr128: case SgprV2S16: case SgprV2S32: case SgprV4S32: @@ -688,6 +697,9 @@ RegBankLegalizeHelper::getRegBankFromID(RegBankLLTMappingApplyID ID) { case VgprP3: case VgprP4: case VgprP5: + case VgprPtr32: + case VgprPtr64: + case VgprPtr128: case VgprV2S16: case VgprV2S32: case VgprV4S32: @@ -754,12 +766,18 @@ void RegBankLegalizeHelper::applyMappingDst( case SgprB128: case SgprB256: case SgprB512: + case SgprPtr32: + case SgprPtr64: + case SgprPtr128: case VgprB32: case VgprB64: case VgprB96: case VgprB128: case VgprB256: - case VgprB512: { + case VgprB512: + case VgprPtr32: + case VgprPtr64: + case VgprPtr128: { assert(Ty == getBTyFromID(MethodIDs[OpIdx], Ty)); assert(RB == getRegBankFromID(MethodIDs[OpIdx])); break; @@ -864,7 +882,10 @@ void RegBankLegalizeHelper::applyMappingSrc( case SgprB96: case SgprB128: case SgprB256: - case SgprB512: { + case SgprB512: + case SgprPtr32: + case SgprPtr64: + case SgprPtr128: { assert(Ty == getBTyFromID(MethodIDs[i], Ty)); assert(RB == getRegBankFromID(MethodIDs[i])); break; @@ -895,7 +916,10 @@ void RegBankLegalizeHelper::applyMappingSrc( case VgprB96: case VgprB128: case VgprB256: - case VgprB512: { + case VgprB512: + case VgprPtr32: + case VgprPtr64: + case VgprPtr128: { assert(Ty == getBTyFromID(MethodIDs[i], Ty)); if (RB != VgprRB) { auto CopyToVgpr = B.buildCopy({VgprRB, Ty}, Reg); diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp index 672fc5b79abc..5402129e4188 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp @@ -26,6 +26,10 @@ using namespace llvm; using namespace AMDGPU; +bool AMDGPU::isAnyPtr(LLT Ty, unsigned Width) { + return Ty.isPointer() && Ty.getSizeInBits() == Width; +} + RegBankLLTMapping::RegBankLLTMapping( std::initializer_list DstOpMappingList, std::initializer_list SrcOpMappingList, @@ -62,6 +66,12 @@ bool matchUniformityAndLLT(Register Reg, UniformityLLTOpPredicateID UniID, return MRI.getType(Reg) == LLT::pointer(4, 64); case P5: return MRI.getType(Reg) == LLT::pointer(5, 32); + case Ptr32: + return isAnyPtr(MRI.getType(Reg), 32); + case Ptr64: + return isAnyPtr(MRI.getType(Reg), 64); + case Ptr128: + return isAnyPtr(MRI.getType(Reg), 128); case V2S32: return MRI.getType(Reg) == LLT::fixed_vector(2, 32); case V4S32: @@ -98,6 +108,12 @@ bool matchUniformityAndLLT(Register Reg, UniformityLLTOpPredicateID UniID, return MRI.getType(Reg) == LLT::pointer(4, 64) && MUI.isUniform(Reg); case UniP5: return MRI.getType(Reg) == LLT::pointer(5, 32) && MUI.isUniform(Reg); + case UniPtr32: + return isAnyPtr(MRI.getType(Reg), 32) && MUI.isUniform(Reg); + case UniPtr64: + return isAnyPtr(MRI.getType(Reg), 64) && MUI.isUniform(Reg); + case UniPtr128: + return isAnyPtr(MRI.getType(Reg), 128) && MUI.isUniform(Reg); case UniV2S16: return MRI.getType(Reg) == LLT::fixed_vector(2, 16) && MUI.isUniform(Reg); case UniB32: @@ -132,6 +148,12 @@ bool matchUniformityAndLLT(Register Reg, UniformityLLTOpPredicateID UniID, return MRI.getType(Reg) == LLT::pointer(4, 64) && MUI.isDivergent(Reg); case DivP5: return MRI.getType(Reg) == LLT::pointer(5, 32) && MUI.isDivergent(Reg); + case DivPtr32: + return isAnyPtr(MRI.getType(Reg), 32) && MUI.isDivergent(Reg); + case DivPtr64: + return isAnyPtr(MRI.getType(Reg), 64) && MUI.isDivergent(Reg); + case DivPtr128: + return isAnyPtr(MRI.getType(Reg), 128) && MUI.isDivergent(Reg); case DivV2S16: return MRI.getType(Reg) == LLT::fixed_vector(2, 16) && MUI.isDivergent(Reg); case DivB32: @@ -205,15 +227,14 @@ UniformityLLTOpPredicateID LLTToId(LLT Ty) { UniformityLLTOpPredicateID LLTToBId(LLT Ty) { if (Ty == LLT::scalar(32) || Ty == LLT::fixed_vector(2, 16) || - (Ty.isPointer() && Ty.getSizeInBits() == 32)) + isAnyPtr(Ty, 32)) return B32; if (Ty == LLT::scalar(64) || Ty == LLT::fixed_vector(2, 32) || - Ty == LLT::fixed_vector(4, 16) || - (Ty.isPointer() && Ty.getSizeInBits() == 64)) + Ty == LLT::fixed_vector(4, 16) || isAnyPtr(Ty, 64)) return B64; if (Ty == LLT::fixed_vector(3, 32)) return B96; - if (Ty == LLT::fixed_vector(4, 32)) + if (Ty == LLT::fixed_vector(4, 32) || isAnyPtr(Ty, 128)) return B128; return _; } diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.h b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.h index 30b900d871f3..7243d75aa830 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.h +++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.h @@ -15,6 +15,7 @@ namespace llvm { +class LLT; class MachineRegisterInfo; class MachineInstr; class GCNSubtarget; @@ -26,6 +27,9 @@ using MachineUniformityInfo = GenericUniformityInfo; namespace AMDGPU { +/// \returns true if \p Ty is a pointer type with size \p Width. +bool isAnyPtr(LLT Ty, unsigned Width); + // IDs used to build predicate for RegBankLegalizeRule. Predicate can have one // or more IDs and each represents a check for 'uniform or divergent' + LLT or // just LLT on register operand. @@ -59,18 +63,27 @@ enum UniformityLLTOpPredicateID { P3, P4, P5, + Ptr32, + Ptr64, + Ptr128, UniP0, UniP1, UniP3, UniP4, UniP5, + UniPtr32, + UniPtr64, + UniPtr128, DivP0, DivP1, DivP3, DivP4, DivP5, + DivPtr32, + DivPtr64, + DivPtr128, // vectors V2S16, @@ -125,6 +138,9 @@ enum RegBankLLTMappingApplyID { SgprP3, SgprP4, SgprP5, + SgprPtr32, + SgprPtr64, + SgprPtr128, SgprV2S16, SgprV4S32, SgprV2S32, @@ -145,6 +161,9 @@ enum RegBankLLTMappingApplyID { VgprP3, VgprP4, VgprP5, + VgprPtr32, + VgprPtr64, + VgprPtr128, VgprV2S16, VgprV2S32, VgprB32, -- cgit v1.2.3 From 52ff58c3300338876ae63126ce0d33331000f1ba Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Juan=20Manuel=20Martinez=20Caama=C3=B1o?= Date: Thu, 19 Jun 2025 10:06:50 +0200 Subject: Revert "[CUDA][HIP] Add a __device__ version of std::__glibcxx_assert_fail()" (#144850) Reverts llvm/llvm-project#136133 --- clang/lib/Headers/CMakeLists.txt | 1 - clang/lib/Headers/cuda_wrappers/bits/c++config.h | 51 ------------------------ 2 files changed, 52 deletions(-) delete mode 100644 clang/lib/Headers/cuda_wrappers/bits/c++config.h diff --git a/clang/lib/Headers/CMakeLists.txt b/clang/lib/Headers/CMakeLists.txt index c96d209c1fc0..c1c9d2e8c7b7 100644 --- a/clang/lib/Headers/CMakeLists.txt +++ b/clang/lib/Headers/CMakeLists.txt @@ -341,7 +341,6 @@ set(cuda_wrapper_files ) set(cuda_wrapper_bits_files - cuda_wrappers/bits/c++config.h cuda_wrappers/bits/shared_ptr_base.h cuda_wrappers/bits/basic_string.h cuda_wrappers/bits/basic_string.tcc diff --git a/clang/lib/Headers/cuda_wrappers/bits/c++config.h b/clang/lib/Headers/cuda_wrappers/bits/c++config.h deleted file mode 100644 index eafa13a9cc64..000000000000 --- a/clang/lib/Headers/cuda_wrappers/bits/c++config.h +++ /dev/null @@ -1,51 +0,0 @@ -// libstdc++ uses the non-constexpr function std::__glibcxx_assert_fail() -// to trigger compilation errors when the __glibcxx_assert(cond) macro -// is used in a constexpr context. -// Compilation fails when using code from the libstdc++ (such as std::array) on -// device code, since these assertions invoke a non-constexpr host function from -// device code. -// -// To work around this issue, we declare our own device version of the function - -#ifndef __CLANG_CUDA_WRAPPERS_BITS_CPP_CONFIG -#define __CLANG_CUDA_WRAPPERS_BITS_CPP_CONFIG - -#include_next - -#ifdef _LIBCPP_BEGIN_NAMESPACE_STD -_LIBCPP_BEGIN_NAMESPACE_STD -#else -namespace std { -#ifdef _GLIBCXX_BEGIN_NAMESPACE_VERSION -_GLIBCXX_BEGIN_NAMESPACE_VERSION -#endif - -#ifdef _GLIBCXX_VERBOSE_ASSERT -__attribute__((device, noreturn)) inline void -__glibcxx_assert_fail(const char *file, int line, const char *function, - const char *condition) noexcept { - if (file && function && condition) - __builtin_printf("%s:%d: %s: Assertion '%s' failed.\n", file, line, - function, condition); - else if (function) - __builtin_printf("%s: Undefined behavior detected.\n", function); - __builtin_abort(); -} -#endif - -#endif -__attribute__((device, noreturn, __always_inline__, - __visibility__("default"))) inline void -__glibcxx_assert_fail(...) noexcept { - __builtin_abort(); -} -#ifdef _LIBCPP_END_NAMESPACE_STD -_LIBCPP_END_NAMESPACE_STD -#else -#ifdef _GLIBCXX_BEGIN_NAMESPACE_VERSION -_GLIBCXX_END_NAMESPACE_VERSION -#endif -} // namespace std -#endif - -#endif -- cgit v1.2.3 From 650b451d0065c8ea6a1f87e7fdc6d07648729549 Mon Sep 17 00:00:00 2001 From: Nikolas Klauser Date: Thu, 19 Jun 2025 10:06:59 +0200 Subject: [libc++] Simplify the implementation of pointer_traits a bit (#142260) --- libcxx/include/__memory/pointer_traits.h | 116 +++++++++---------------------- 1 file changed, 33 insertions(+), 83 deletions(-) diff --git a/libcxx/include/__memory/pointer_traits.h b/libcxx/include/__memory/pointer_traits.h index 879b387b9ad1..8c7f8dff1b76 100644 --- a/libcxx/include/__memory/pointer_traits.h +++ b/libcxx/include/__memory/pointer_traits.h @@ -16,11 +16,13 @@ #include <__type_traits/conditional.h> #include <__type_traits/conjunction.h> #include <__type_traits/decay.h> +#include <__type_traits/detected_or.h> #include <__type_traits/enable_if.h> #include <__type_traits/integral_constant.h> #include <__type_traits/is_class.h> #include <__type_traits/is_function.h> #include <__type_traits/is_void.h> +#include <__type_traits/nat.h> #include <__type_traits/void_t.h> #include <__utility/declval.h> #include <__utility/forward.h> @@ -34,67 +36,37 @@ _LIBCPP_PUSH_MACROS _LIBCPP_BEGIN_NAMESPACE_STD -// clang-format off -#define _LIBCPP_CLASS_TRAITS_HAS_XXX(NAME, PROPERTY) \ - template \ - struct NAME : false_type {}; \ - template \ - struct NAME<_Tp, __void_t > : true_type {} -// clang-format on - -_LIBCPP_CLASS_TRAITS_HAS_XXX(__has_pointer, pointer); -_LIBCPP_CLASS_TRAITS_HAS_XXX(__has_element_type, element_type); - -template ::value> -struct __pointer_traits_element_type {}; - template -struct __pointer_traits_element_type<_Ptr, true> { - using type _LIBCPP_NODEBUG = typename _Ptr::element_type; -}; +struct __pointer_traits_element_type_impl {}; template