summaryrefslogtreecommitdiff
path: root/llvm/test/CodeGen/WebAssembly
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/test/CodeGen/WebAssembly')
-rw-r--r--llvm/test/CodeGen/WebAssembly/f16-intrinsics.ll (renamed from llvm/test/CodeGen/WebAssembly/half-precision.ll)2
-rw-r--r--llvm/test/CodeGen/WebAssembly/f16.ll712
-rw-r--r--llvm/test/CodeGen/WebAssembly/global-set.ll16
-rw-r--r--llvm/test/CodeGen/WebAssembly/int-mac-reduction-loops.ll8
-rw-r--r--llvm/test/CodeGen/WebAssembly/memory-interleave.ll465
-rw-r--r--llvm/test/CodeGen/WebAssembly/narrow-simd-mul.ll68
6 files changed, 966 insertions, 305 deletions
diff --git a/llvm/test/CodeGen/WebAssembly/half-precision.ll b/llvm/test/CodeGen/WebAssembly/f16-intrinsics.ll
index 4e8ff5955c63..8033ec5d310f 100644
--- a/llvm/test/CodeGen/WebAssembly/half-precision.ll
+++ b/llvm/test/CodeGen/WebAssembly/f16-intrinsics.ll
@@ -1,5 +1,7 @@
; RUN: llc < %s --mtriple=wasm32-unknown-unknown -asm-verbose=false -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -mattr=+fp16,+simd128 | FileCheck %s
+; Tests for `llvm.wasm.*.*f16` intrinsics
+
declare float @llvm.wasm.loadf32.f16(ptr)
declare void @llvm.wasm.storef16.f32(float, ptr)
diff --git a/llvm/test/CodeGen/WebAssembly/f16.ll b/llvm/test/CodeGen/WebAssembly/f16.ll
index b67c0c16d465..0486975f6cba 100644
--- a/llvm/test/CodeGen/WebAssembly/f16.ll
+++ b/llvm/test/CodeGen/WebAssembly/f16.ll
@@ -1,69 +1,653 @@
-; RUN: llc < %s -asm-verbose=false -disable-wasm-fallthrough-return-opt -wasm-keep-registers | FileCheck %s
-; RUN: llc < %s -asm-verbose=false -disable-wasm-fallthrough-return-opt -wasm-keep-registers -fast-isel | FileCheck %s
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; Test that f16 is expanded.
+; RUN: llc < %s -disable-wasm-fallthrough-return-opt -wasm-keep-registers | FileCheck %s --check-prefixes=ALL,DEFISEL
+; RUN: llc < %s -disable-wasm-fallthrough-return-opt -wasm-keep-registers -fast-isel | FileCheck %s --check-prefixes=ALL,FASTISEL
+
+; Tests for various operations on half precison float. Much of the test is
+; copied from test/CodeGen/X86/half.ll.
target triple = "wasm32-unknown-unknown"
-; CHECK-LABEL: demote.f32:
-; CHECK-NEXT: .functype demote.f32 (f32) -> (f32){{$}}
-; CHECK-NEXT: local.get $push[[L0:[0-9]+]]=, 0{{$}}
-; CHECK-NEXT: call $push[[L1:[0-9]+]]=, __truncsfhf2, $pop[[L0]]{{$}}
-; CHECK-NEXT: call $push[[L2:[0-9]+]]=, __extendhfsf2, $pop[[L1]]{{$}}
-; CHECK-NEXT: return $pop[[L2]]{{$}}
-define half @demote.f32(float %f) {
- %t = fptrunc float %f to half
- ret half %t
-}
-
-; CHECK-LABEL: promote.f32:
-; CHECK-NEXT: .functype promote.f32 (f32) -> (f32){{$}}
-; CHECK-NEXT: local.get $push0=, 0{{$}}
-; CHECK-NEXT: return $pop0{{$}}
-define float @promote.f32(half %f) {
- %t = fpext half %f to float
- ret float %t
-}
-
-; CHECK-LABEL: demote.f64:
-; CHECK-NEXT: .functype demote.f64 (f64) -> (f32){{$}}
-; CHECK-NEXT: local.get $push[[L0:[0-9]+]]=, 0{{$}}
-; CHECK-NEXT: call $push[[L1:[0-9]+]]=, __truncdfhf2, $pop[[L0]]{{$}}
-; CHECK-NEXT: call $push[[L2:[0-9]+]]=, __extendhfsf2, $pop[[L1]]{{$}}
-; CHECK-NEXT: return $pop[[L2]]{{$}}
-define half @demote.f64(double %f) {
- %t = fptrunc double %f to half
- ret half %t
-}
-
-; CHECK-LABEL: promote.f64:
-; CHECK-NEXT: .functype promote.f64 (f32) -> (f64){{$}}
-; CHECK-NEXT: local.get $push[[L0:[0-9]+]]=, 0{{$}}
-; CHECK-NEXT: f64.promote_f32 $push[[L1:[0-9]+]]=, $pop[[L0]]{{$}}
-; CHECK-NEXT: return $pop[[L1]]{{$}}
-define double @promote.f64(half %f) {
- %t = fpext half %f to double
- ret double %t
-}
-
-; CHECK-LABEL: demote.f128:
-; CHECK-NEXT: .functype demote.f128 (i64, i64) -> (f32){{$}}
-; CHECK-NEXT: local.get $push[[L0:[0-9]+]]=, 0{{$}}
-; CHECK-NEXT: local.get $push[[L1:[0-9]+]]=, 1{{$}}
-; CHECK-NEXT: call $push[[L2:[0-9]+]]=, __trunctfhf2, $pop[[L0]], $pop[[L1]]{{$}}
-; CHECK-NEXT: call $push[[L3:[0-9]+]]=, __extendhfsf2, $pop[[L2]]{{$}}
-; CHECK-NEXT: return $pop[[L3]]{{$}}
-define half @demote.f128(fp128 %f) {
- %t = fptrunc fp128 %f to half
- ret half %t
-}
-
-; CHECK-LABEL: promote.f128:
-; CHECK-NEXT: .functype promote.f128 (i32, f32) -> (){{$}}
-; CHECK: call __extendsftf2
-; CHECK: i64.store
-; CHECK: i64.store
-define fp128 @promote.f128(half %f) {
- %t = fpext half %f to fp128
- ret fp128 %t
+define void @store(half %x, ptr %p) nounwind {
+; ALL-LABEL: store:
+; ALL: .functype store (f32, i32) -> ()
+; ALL-NEXT: # %bb.0:
+; ALL-NEXT: local.get $push2=, 1
+; ALL-NEXT: local.get $push1=, 0
+; ALL-NEXT: call $push0=, __truncsfhf2, $pop1
+; ALL-NEXT: i32.store16 0($pop2), $pop0
+; ALL-NEXT: return
+ store half %x, ptr %p
+ ret void
+}
+
+define half @return(ptr %p) nounwind {
+; ALL-LABEL: return:
+; ALL: .functype return (i32) -> (f32)
+; ALL-NEXT: # %bb.0:
+; ALL-NEXT: local.get $push2=, 0
+; ALL-NEXT: i32.load16_u $push0=, 0($pop2)
+; ALL-NEXT: call $push1=, __extendhfsf2, $pop0
+; ALL-NEXT: return $pop1
+ %r = load half, ptr %p
+ ret half %r
+}
+
+define dso_local double @loadd(ptr nocapture readonly %a) local_unnamed_addr nounwind {
+; DEFISEL-LABEL: loadd:
+; DEFISEL: .functype loadd (i32) -> (f64)
+; DEFISEL-NEXT: # %bb.0:
+; DEFISEL-NEXT: local.get $push3=, 0
+; DEFISEL-NEXT: i32.load16_u $push0=, 2($pop3)
+; DEFISEL-NEXT: call $push1=, __extendhfsf2, $pop0
+; DEFISEL-NEXT: f64.promote_f32 $push2=, $pop1
+; DEFISEL-NEXT: return $pop2
+;
+; FASTISEL-LABEL: loadd:
+; FASTISEL: .functype loadd (i32) -> (f64)
+; FASTISEL-NEXT: # %bb.0:
+; FASTISEL-NEXT: local.get $push3=, 0
+; FASTISEL-NEXT: i32.load16_u $push2=, 2($pop3)
+; FASTISEL-NEXT: call $push1=, __extendhfsf2, $pop2
+; FASTISEL-NEXT: f64.promote_f32 $push0=, $pop1
+; FASTISEL-NEXT: return $pop0
+ %arrayidx = getelementptr inbounds i16, ptr %a, i64 1
+ %x = load i16, ptr %arrayidx, align 2
+ %ret = tail call double @llvm.convert.from.fp16.f64(i16 %x)
+ ret double %ret
+}
+
+define dso_local float @loadf(ptr nocapture readonly %a) local_unnamed_addr nounwind {
+; DEFISEL-LABEL: loadf:
+; DEFISEL: .functype loadf (i32) -> (f32)
+; DEFISEL-NEXT: # %bb.0:
+; DEFISEL-NEXT: local.get $push2=, 0
+; DEFISEL-NEXT: i32.load16_u $push0=, 2($pop2)
+; DEFISEL-NEXT: call $push1=, __extendhfsf2, $pop0
+; DEFISEL-NEXT: return $pop1
+;
+; FASTISEL-LABEL: loadf:
+; FASTISEL: .functype loadf (i32) -> (f32)
+; FASTISEL-NEXT: # %bb.0:
+; FASTISEL-NEXT: local.get $push2=, 0
+; FASTISEL-NEXT: i32.load16_u $push1=, 2($pop2)
+; FASTISEL-NEXT: call $push0=, __extendhfsf2, $pop1
+; FASTISEL-NEXT: return $pop0
+ %arrayidx = getelementptr inbounds i16, ptr %a, i64 1
+ %x = load i16, ptr %arrayidx, align 2
+ %ret = tail call float @llvm.convert.from.fp16.f32(i16 %x)
+ ret float %ret
+}
+
+define dso_local void @stored(ptr nocapture %a, double %b) local_unnamed_addr nounwind {
+; DEFISEL-LABEL: stored:
+; DEFISEL: .functype stored (i32, f64) -> ()
+; DEFISEL-NEXT: # %bb.0:
+; DEFISEL-NEXT: local.get $push2=, 0
+; DEFISEL-NEXT: local.get $push1=, 1
+; DEFISEL-NEXT: call $push0=, __truncdfhf2, $pop1
+; DEFISEL-NEXT: i32.store16 0($pop2), $pop0
+; DEFISEL-NEXT: return
+;
+; FASTISEL-LABEL: stored:
+; FASTISEL: .functype stored (i32, f64) -> ()
+; FASTISEL-NEXT: # %bb.0:
+; FASTISEL-NEXT: local.get $push4=, 0
+; FASTISEL-NEXT: local.get $push3=, 1
+; FASTISEL-NEXT: call $push2=, __truncdfhf2, $pop3
+; FASTISEL-NEXT: i32.const $push1=, 65535
+; FASTISEL-NEXT: i32.and $push0=, $pop2, $pop1
+; FASTISEL-NEXT: i32.store16 0($pop4), $pop0
+; FASTISEL-NEXT: return
+ %x = tail call i16 @llvm.convert.to.fp16.f64(double %b)
+ store i16 %x, ptr %a, align 2
+ ret void
+}
+
+define dso_local void @storef(ptr nocapture %a, float %b) local_unnamed_addr nounwind {
+; DEFISEL-LABEL: storef:
+; DEFISEL: .functype storef (i32, f32) -> ()
+; DEFISEL-NEXT: # %bb.0:
+; DEFISEL-NEXT: local.get $push2=, 0
+; DEFISEL-NEXT: local.get $push1=, 1
+; DEFISEL-NEXT: call $push0=, __truncsfhf2, $pop1
+; DEFISEL-NEXT: i32.store16 0($pop2), $pop0
+; DEFISEL-NEXT: return
+;
+; FASTISEL-LABEL: storef:
+; FASTISEL: .functype storef (i32, f32) -> ()
+; FASTISEL-NEXT: # %bb.0:
+; FASTISEL-NEXT: local.get $push4=, 0
+; FASTISEL-NEXT: local.get $push3=, 1
+; FASTISEL-NEXT: call $push2=, __truncsfhf2, $pop3
+; FASTISEL-NEXT: i32.const $push1=, 65535
+; FASTISEL-NEXT: i32.and $push0=, $pop2, $pop1
+; FASTISEL-NEXT: i32.store16 0($pop4), $pop0
+; FASTISEL-NEXT: return
+ %x = tail call i16 @llvm.convert.to.fp16.f32(float %b)
+ store i16 %x, ptr %a, align 2
+ ret void
+}
+
+define void @test_load_store(ptr %in, ptr %out) nounwind {
+; ALL-LABEL: test_load_store:
+; ALL: .functype test_load_store (i32, i32) -> ()
+; ALL-NEXT: # %bb.0:
+; ALL-NEXT: local.get $push2=, 1
+; ALL-NEXT: local.get $push1=, 0
+; ALL-NEXT: i32.load16_u $push0=, 0($pop1)
+; ALL-NEXT: i32.store16 0($pop2), $pop0
+; ALL-NEXT: return
+ %val = load half, ptr %in
+ store half %val, ptr %out
+ ret void
+}
+
+define i16 @test_bitcast_from_half(ptr %addr) nounwind {
+; ALL-LABEL: test_bitcast_from_half:
+; ALL: .functype test_bitcast_from_half (i32) -> (i32)
+; ALL-NEXT: # %bb.0:
+; ALL-NEXT: local.get $push1=, 0
+; ALL-NEXT: i32.load16_u $push0=, 0($pop1)
+; ALL-NEXT: return $pop0
+ %val = load half, ptr %addr
+ %val_int = bitcast half %val to i16
+ ret i16 %val_int
+}
+
+define void @test_bitcast_to_half(ptr %addr, i16 %in) nounwind {
+; ALL-LABEL: test_bitcast_to_half:
+; ALL: .functype test_bitcast_to_half (i32, i32) -> ()
+; ALL-NEXT: # %bb.0:
+; ALL-NEXT: local.get $push1=, 0
+; ALL-NEXT: local.get $push0=, 1
+; ALL-NEXT: i32.store16 0($pop1), $pop0
+; ALL-NEXT: return
+ %val_fp = bitcast i16 %in to half
+ store half %val_fp, ptr %addr
+ ret void
+}
+
+define half @from_bits(i16 %x) nounwind {
+; ALL-LABEL: from_bits:
+; ALL: .functype from_bits (i32) -> (f32)
+; ALL-NEXT: # %bb.0:
+; ALL-NEXT: local.get $push1=, 0
+; ALL-NEXT: call $push0=, __extendhfsf2, $pop1
+; ALL-NEXT: return $pop0
+ %res = bitcast i16 %x to half
+ ret half %res
+}
+
+define i16 @to_bits(half %x) nounwind {
+; DEFISEL-LABEL: to_bits:
+; DEFISEL: .functype to_bits (f32) -> (i32)
+; DEFISEL-NEXT: # %bb.0:
+; DEFISEL-NEXT: local.get $push3=, 0
+; DEFISEL-NEXT: call $push1=, __truncsfhf2, $pop3
+; DEFISEL-NEXT: i32.const $push0=, 65535
+; DEFISEL-NEXT: i32.and $push2=, $pop1, $pop0
+; DEFISEL-NEXT: return $pop2
+;
+; FASTISEL-LABEL: to_bits:
+; FASTISEL: .functype to_bits (f32) -> (i32)
+; FASTISEL-NEXT: # %bb.0:
+; FASTISEL-NEXT: local.get $push3=, 0
+; FASTISEL-NEXT: call $push2=, __truncsfhf2, $pop3
+; FASTISEL-NEXT: i32.const $push1=, 65535
+; FASTISEL-NEXT: i32.and $push0=, $pop2, $pop1
+; FASTISEL-NEXT: return $pop0
+ %res = bitcast half %x to i16
+ ret i16 %res
+}
+
+define float @test_extend32(ptr %addr) nounwind {
+; DEFISEL-LABEL: test_extend32:
+; DEFISEL: .functype test_extend32 (i32) -> (f32)
+; DEFISEL-NEXT: # %bb.0:
+; DEFISEL-NEXT: local.get $push2=, 0
+; DEFISEL-NEXT: i32.load16_u $push0=, 0($pop2)
+; DEFISEL-NEXT: call $push1=, __extendhfsf2, $pop0
+; DEFISEL-NEXT: return $pop1
+;
+; FASTISEL-LABEL: test_extend32:
+; FASTISEL: .functype test_extend32 (i32) -> (f32)
+; FASTISEL-NEXT: # %bb.0:
+; FASTISEL-NEXT: local.get $push2=, 0
+; FASTISEL-NEXT: i32.load16_u $push1=, 0($pop2)
+; FASTISEL-NEXT: call $push0=, __extendhfsf2, $pop1
+; FASTISEL-NEXT: return $pop0
+ %val16 = load half, ptr %addr
+ %val32 = fpext half %val16 to float
+ ret float %val32
+}
+
+define double @test_extend64(ptr %addr) nounwind {
+; DEFISEL-LABEL: test_extend64:
+; DEFISEL: .functype test_extend64 (i32) -> (f64)
+; DEFISEL-NEXT: # %bb.0:
+; DEFISEL-NEXT: local.get $push3=, 0
+; DEFISEL-NEXT: i32.load16_u $push0=, 0($pop3)
+; DEFISEL-NEXT: call $push1=, __extendhfsf2, $pop0
+; DEFISEL-NEXT: f64.promote_f32 $push2=, $pop1
+; DEFISEL-NEXT: return $pop2
+;
+; FASTISEL-LABEL: test_extend64:
+; FASTISEL: .functype test_extend64 (i32) -> (f64)
+; FASTISEL-NEXT: # %bb.0:
+; FASTISEL-NEXT: local.get $push3=, 0
+; FASTISEL-NEXT: i32.load16_u $push1=, 0($pop3)
+; FASTISEL-NEXT: call $push2=, __extendhfsf2, $pop1
+; FASTISEL-NEXT: f64.promote_f32 $push0=, $pop2
+; FASTISEL-NEXT: return $pop0
+ %val16 = load half, ptr %addr
+ %val32 = fpext half %val16 to double
+ ret double %val32
+}
+
+define fp128 @test_extend128(ptr %addr) nounwind {
+; ALL-LABEL: test_extend128:
+; ALL: .functype test_extend128 (i32, i32) -> ()
+; ALL-NEXT: .local i32
+; ALL-NEXT: # %bb.0:
+; ALL-NEXT: global.get $push4=, __stack_pointer
+; ALL-NEXT: i32.const $push5=, 16
+; ALL-NEXT: i32.sub $push9=, $pop4, $pop5
+; ALL-NEXT: local.tee $push8=, 2, $pop9
+; ALL-NEXT: global.set __stack_pointer, $pop8
+; ALL-NEXT: local.get $push11=, 2
+; ALL-NEXT: local.get $push10=, 1
+; ALL-NEXT: i32.load16_u $push0=, 0($pop10)
+; ALL-NEXT: call $push1=, __extendhfsf2, $pop0
+; ALL-NEXT: call __extendsftf2, $pop11, $pop1
+; ALL-NEXT: local.get $push13=, 0
+; ALL-NEXT: local.get $push12=, 2
+; ALL-NEXT: i64.load $push2=, 8($pop12)
+; ALL-NEXT: i64.store 8($pop13), $pop2
+; ALL-NEXT: local.get $push15=, 0
+; ALL-NEXT: local.get $push14=, 2
+; ALL-NEXT: i64.load $push3=, 0($pop14)
+; ALL-NEXT: i64.store 0($pop15), $pop3
+; ALL-NEXT: local.get $push16=, 2
+; ALL-NEXT: i32.const $push6=, 16
+; ALL-NEXT: i32.add $push7=, $pop16, $pop6
+; ALL-NEXT: global.set __stack_pointer, $pop7
+; ALL-NEXT: return
+ %val16 = load half, ptr %addr
+ %val32 = fpext half %val16 to fp128
+ ret fp128 %val32
+}
+
+define void @test_trunc32(float %in, ptr %addr) nounwind {
+; ALL-LABEL: test_trunc32:
+; ALL: .functype test_trunc32 (f32, i32) -> ()
+; ALL-NEXT: # %bb.0:
+; ALL-NEXT: local.get $push2=, 1
+; ALL-NEXT: local.get $push1=, 0
+; ALL-NEXT: call $push0=, __truncsfhf2, $pop1
+; ALL-NEXT: i32.store16 0($pop2), $pop0
+; ALL-NEXT: return
+ %val16 = fptrunc float %in to half
+ store half %val16, ptr %addr
+ ret void
+}
+
+define void @test_trunc64(double %in, ptr %addr) nounwind {
+; ALL-LABEL: test_trunc64:
+; ALL: .functype test_trunc64 (f64, i32) -> ()
+; ALL-NEXT: # %bb.0:
+; ALL-NEXT: local.get $push2=, 1
+; ALL-NEXT: local.get $push1=, 0
+; ALL-NEXT: call $push0=, __truncdfhf2, $pop1
+; ALL-NEXT: i32.store16 0($pop2), $pop0
+; ALL-NEXT: return
+ %val16 = fptrunc double %in to half
+ store half %val16, ptr %addr
+ ret void
+}
+
+define void @test_trunc128(fp128 %in, ptr %addr) nounwind {
+; ALL-LABEL: test_trunc128:
+; ALL: .functype test_trunc128 (i64, i64, i32) -> ()
+; ALL-NEXT: # %bb.0:
+; ALL-NEXT: local.get $push3=, 2
+; ALL-NEXT: local.get $push2=, 0
+; ALL-NEXT: local.get $push1=, 1
+; ALL-NEXT: call $push0=, __trunctfhf2, $pop2, $pop1
+; ALL-NEXT: i32.store16 0($pop3), $pop0
+; ALL-NEXT: return
+ %val16 = fptrunc fp128 %in to half
+ store half %val16, ptr %addr
+ ret void
+}
+
+define i64 @test_fptosi_i64(ptr %p) nounwind {
+; DEFISEL-LABEL: test_fptosi_i64:
+; DEFISEL: .functype test_fptosi_i64 (i32) -> (i64)
+; DEFISEL-NEXT: # %bb.0:
+; DEFISEL-NEXT: local.get $push3=, 0
+; DEFISEL-NEXT: i32.load16_u $push0=, 0($pop3)
+; DEFISEL-NEXT: call $push1=, __extendhfsf2, $pop0
+; DEFISEL-NEXT: i64.trunc_sat_f32_s $push2=, $pop1
+; DEFISEL-NEXT: return $pop2
+;
+; FASTISEL-LABEL: test_fptosi_i64:
+; FASTISEL: .functype test_fptosi_i64 (i32) -> (i64)
+; FASTISEL-NEXT: # %bb.0:
+; FASTISEL-NEXT: local.get $push3=, 0
+; FASTISEL-NEXT: i32.load16_u $push1=, 0($pop3)
+; FASTISEL-NEXT: call $push2=, __extendhfsf2, $pop1
+; FASTISEL-NEXT: i64.trunc_sat_f32_s $push0=, $pop2
+; FASTISEL-NEXT: return $pop0
+ %a = load half, ptr %p, align 2
+ %r = fptosi half %a to i64
+ ret i64 %r
+}
+
+define void @test_sitofp_i64(i64 %a, ptr %p) nounwind {
+; ALL-LABEL: test_sitofp_i64:
+; ALL: .functype test_sitofp_i64 (i64, i32) -> ()
+; ALL-NEXT: # %bb.0:
+; ALL-NEXT: local.get $push3=, 1
+; ALL-NEXT: local.get $push2=, 0
+; ALL-NEXT: f32.convert_i64_s $push0=, $pop2
+; ALL-NEXT: call $push1=, __truncsfhf2, $pop0
+; ALL-NEXT: i32.store16 0($pop3), $pop1
+; ALL-NEXT: return
+ %r = sitofp i64 %a to half
+ store half %r, ptr %p
+ ret void
+}
+
+define i64 @test_fptoui_i64(ptr %p) nounwind {
+; DEFISEL-LABEL: test_fptoui_i64:
+; DEFISEL: .functype test_fptoui_i64 (i32) -> (i64)
+; DEFISEL-NEXT: # %bb.0:
+; DEFISEL-NEXT: local.get $push3=, 0
+; DEFISEL-NEXT: i32.load16_u $push0=, 0($pop3)
+; DEFISEL-NEXT: call $push1=, __extendhfsf2, $pop0
+; DEFISEL-NEXT: i64.trunc_sat_f32_u $push2=, $pop1
+; DEFISEL-NEXT: return $pop2
+;
+; FASTISEL-LABEL: test_fptoui_i64:
+; FASTISEL: .functype test_fptoui_i64 (i32) -> (i64)
+; FASTISEL-NEXT: # %bb.0:
+; FASTISEL-NEXT: local.get $push3=, 0
+; FASTISEL-NEXT: i32.load16_u $push1=, 0($pop3)
+; FASTISEL-NEXT: call $push2=, __extendhfsf2, $pop1
+; FASTISEL-NEXT: i64.trunc_sat_f32_u $push0=, $pop2
+; FASTISEL-NEXT: return $pop0
+ %a = load half, ptr %p, align 2
+ %r = fptoui half %a to i64
+ ret i64 %r
+}
+
+define void @test_uitofp_i64(i64 %a, ptr %p) nounwind {
+; ALL-LABEL: test_uitofp_i64:
+; ALL: .functype test_uitofp_i64 (i64, i32) -> ()
+; ALL-NEXT: # %bb.0:
+; ALL-NEXT: local.get $push3=, 1
+; ALL-NEXT: local.get $push2=, 0
+; ALL-NEXT: f32.convert_i64_u $push0=, $pop2
+; ALL-NEXT: call $push1=, __truncsfhf2, $pop0
+; ALL-NEXT: i32.store16 0($pop3), $pop1
+; ALL-NEXT: return
+ %r = uitofp i64 %a to half
+ store half %r, ptr %p
+ ret void
+}
+
+define <4 x float> @test_extend32_vec4(ptr %p) nounwind {
+; ALL-LABEL: test_extend32_vec4:
+; ALL: .functype test_extend32_vec4 (i32, i32) -> ()
+; ALL-NEXT: # %bb.0:
+; ALL-NEXT: local.get $push9=, 0
+; ALL-NEXT: local.get $push8=, 1
+; ALL-NEXT: i32.load16_u $push0=, 6($pop8)
+; ALL-NEXT: call $push1=, __extendhfsf2, $pop0
+; ALL-NEXT: f32.store 12($pop9), $pop1
+; ALL-NEXT: local.get $push11=, 0
+; ALL-NEXT: local.get $push10=, 1
+; ALL-NEXT: i32.load16_u $push2=, 4($pop10)
+; ALL-NEXT: call $push3=, __extendhfsf2, $pop2
+; ALL-NEXT: f32.store 8($pop11), $pop3
+; ALL-NEXT: local.get $push13=, 0
+; ALL-NEXT: local.get $push12=, 1
+; ALL-NEXT: i32.load16_u $push4=, 2($pop12)
+; ALL-NEXT: call $push5=, __extendhfsf2, $pop4
+; ALL-NEXT: f32.store 4($pop13), $pop5
+; ALL-NEXT: local.get $push15=, 0
+; ALL-NEXT: local.get $push14=, 1
+; ALL-NEXT: i32.load16_u $push6=, 0($pop14)
+; ALL-NEXT: call $push7=, __extendhfsf2, $pop6
+; ALL-NEXT: f32.store 0($pop15), $pop7
+; ALL-NEXT: return
+ %a = load <4 x half>, ptr %p, align 8
+ %b = fpext <4 x half> %a to <4 x float>
+ ret <4 x float> %b
+}
+
+define <4 x double> @test_extend64_vec4(ptr %p) nounwind {
+; ALL-LABEL: test_extend64_vec4:
+; ALL: .functype test_extend64_vec4 (i32, i32) -> ()
+; ALL-NEXT: # %bb.0:
+; ALL-NEXT: local.get $push13=, 0
+; ALL-NEXT: local.get $push12=, 1
+; ALL-NEXT: i64.load16_u $push0=, 6($pop12)
+; ALL-NEXT: call $push1=, __extendhfsf2, $pop0
+; ALL-NEXT: f64.promote_f32 $push2=, $pop1
+; ALL-NEXT: f64.store 24($pop13), $pop2
+; ALL-NEXT: local.get $push15=, 0
+; ALL-NEXT: local.get $push14=, 1
+; ALL-NEXT: i64.load16_u $push3=, 4($pop14)
+; ALL-NEXT: call $push4=, __extendhfsf2, $pop3
+; ALL-NEXT: f64.promote_f32 $push5=, $pop4
+; ALL-NEXT: f64.store 16($pop15), $pop5
+; ALL-NEXT: local.get $push17=, 0
+; ALL-NEXT: local.get $push16=, 1
+; ALL-NEXT: i64.load16_u $push6=, 2($pop16)
+; ALL-NEXT: call $push7=, __extendhfsf2, $pop6
+; ALL-NEXT: f64.promote_f32 $push8=, $pop7
+; ALL-NEXT: f64.store 8($pop17), $pop8
+; ALL-NEXT: local.get $push19=, 0
+; ALL-NEXT: local.get $push18=, 1
+; ALL-NEXT: i64.load16_u $push9=, 0($pop18)
+; ALL-NEXT: call $push10=, __extendhfsf2, $pop9
+; ALL-NEXT: f64.promote_f32 $push11=, $pop10
+; ALL-NEXT: f64.store 0($pop19), $pop11
+; ALL-NEXT: return
+ %a = load <4 x half>, ptr %p, align 8
+ %b = fpext <4 x half> %a to <4 x double>
+ ret <4 x double> %b
+}
+
+define void @test_trunc32_vec4(<4 x float> %a, ptr %p) nounwind {
+; DEFISEL-LABEL: test_trunc32_vec4:
+; DEFISEL: .functype test_trunc32_vec4 (f32, f32, f32, f32, i32) -> ()
+; DEFISEL-NEXT: # %bb.0:
+; DEFISEL-NEXT: local.get $push5=, 4
+; DEFISEL-NEXT: local.get $push4=, 3
+; DEFISEL-NEXT: call $push0=, __truncsfhf2, $pop4
+; DEFISEL-NEXT: i32.store16 6($pop5), $pop0
+; DEFISEL-NEXT: local.get $push7=, 4
+; DEFISEL-NEXT: local.get $push6=, 2
+; DEFISEL-NEXT: call $push1=, __truncsfhf2, $pop6
+; DEFISEL-NEXT: i32.store16 4($pop7), $pop1
+; DEFISEL-NEXT: local.get $push9=, 4
+; DEFISEL-NEXT: local.get $push8=, 1
+; DEFISEL-NEXT: call $push2=, __truncsfhf2, $pop8
+; DEFISEL-NEXT: i32.store16 2($pop9), $pop2
+; DEFISEL-NEXT: local.get $push11=, 4
+; DEFISEL-NEXT: local.get $push10=, 0
+; DEFISEL-NEXT: call $push3=, __truncsfhf2, $pop10
+; DEFISEL-NEXT: i32.store16 0($pop11), $pop3
+; DEFISEL-NEXT: return
+;
+; FASTISEL-LABEL: test_trunc32_vec4:
+; FASTISEL: .functype test_trunc32_vec4 (f32, f32, f32, f32, i32) -> ()
+; FASTISEL-NEXT: # %bb.0:
+; FASTISEL-NEXT: local.get $push5=, 4
+; FASTISEL-NEXT: local.get $push4=, 0
+; FASTISEL-NEXT: call $push0=, __truncsfhf2, $pop4
+; FASTISEL-NEXT: i32.store16 0($pop5), $pop0
+; FASTISEL-NEXT: local.get $push7=, 4
+; FASTISEL-NEXT: local.get $push6=, 1
+; FASTISEL-NEXT: call $push1=, __truncsfhf2, $pop6
+; FASTISEL-NEXT: i32.store16 2($pop7), $pop1
+; FASTISEL-NEXT: local.get $push9=, 4
+; FASTISEL-NEXT: local.get $push8=, 2
+; FASTISEL-NEXT: call $push2=, __truncsfhf2, $pop8
+; FASTISEL-NEXT: i32.store16 4($pop9), $pop2
+; FASTISEL-NEXT: local.get $push11=, 4
+; FASTISEL-NEXT: local.get $push10=, 3
+; FASTISEL-NEXT: call $push3=, __truncsfhf2, $pop10
+; FASTISEL-NEXT: i32.store16 6($pop11), $pop3
+; FASTISEL-NEXT: return
+ %v = fptrunc <4 x float> %a to <4 x half>
+ store <4 x half> %v, ptr %p
+ ret void
+}
+
+define void @test_trunc64_vec4(<4 x double> %a, ptr %p) nounwind {
+; DEFISEL-LABEL: test_trunc64_vec4:
+; DEFISEL: .functype test_trunc64_vec4 (f64, f64, f64, f64, i32) -> ()
+; DEFISEL-NEXT: # %bb.0:
+; DEFISEL-NEXT: local.get $push5=, 4
+; DEFISEL-NEXT: local.get $push4=, 3
+; DEFISEL-NEXT: call $push0=, __truncdfhf2, $pop4
+; DEFISEL-NEXT: i32.store16 6($pop5), $pop0
+; DEFISEL-NEXT: local.get $push7=, 4
+; DEFISEL-NEXT: local.get $push6=, 2
+; DEFISEL-NEXT: call $push1=, __truncdfhf2, $pop6
+; DEFISEL-NEXT: i32.store16 4($pop7), $pop1
+; DEFISEL-NEXT: local.get $push9=, 4
+; DEFISEL-NEXT: local.get $push8=, 1
+; DEFISEL-NEXT: call $push2=, __truncdfhf2, $pop8
+; DEFISEL-NEXT: i32.store16 2($pop9), $pop2
+; DEFISEL-NEXT: local.get $push11=, 4
+; DEFISEL-NEXT: local.get $push10=, 0
+; DEFISEL-NEXT: call $push3=, __truncdfhf2, $pop10
+; DEFISEL-NEXT: i32.store16 0($pop11), $pop3
+; DEFISEL-NEXT: return
+;
+; FASTISEL-LABEL: test_trunc64_vec4:
+; FASTISEL: .functype test_trunc64_vec4 (f64, f64, f64, f64, i32) -> ()
+; FASTISEL-NEXT: # %bb.0:
+; FASTISEL-NEXT: local.get $push5=, 4
+; FASTISEL-NEXT: local.get $push4=, 0
+; FASTISEL-NEXT: call $push0=, __truncdfhf2, $pop4
+; FASTISEL-NEXT: i32.store16 0($pop5), $pop0
+; FASTISEL-NEXT: local.get $push7=, 4
+; FASTISEL-NEXT: local.get $push6=, 1
+; FASTISEL-NEXT: call $push1=, __truncdfhf2, $pop6
+; FASTISEL-NEXT: i32.store16 2($pop7), $pop1
+; FASTISEL-NEXT: local.get $push9=, 4
+; FASTISEL-NEXT: local.get $push8=, 2
+; FASTISEL-NEXT: call $push2=, __truncdfhf2, $pop8
+; FASTISEL-NEXT: i32.store16 4($pop9), $pop2
+; FASTISEL-NEXT: local.get $push11=, 4
+; FASTISEL-NEXT: local.get $push10=, 3
+; FASTISEL-NEXT: call $push3=, __truncdfhf2, $pop10
+; FASTISEL-NEXT: i32.store16 6($pop11), $pop3
+; FASTISEL-NEXT: return
+ %v = fptrunc <4 x double> %a to <4 x half>
+ store <4 x half> %v, ptr %p
+ ret void
+}
+
+define float @test_sitofp_fadd_i32(i32 %a, ptr %b) nounwind {
+; DEFISEL-LABEL: test_sitofp_fadd_i32:
+; DEFISEL: .functype test_sitofp_fadd_i32 (i32, i32) -> (f32)
+; DEFISEL-NEXT: # %bb.0:
+; DEFISEL-NEXT: local.get $push6=, 1
+; DEFISEL-NEXT: i32.load16_u $push1=, 0($pop6)
+; DEFISEL-NEXT: call $push2=, __extendhfsf2, $pop1
+; DEFISEL-NEXT: local.get $push7=, 0
+; DEFISEL-NEXT: f32.convert_i32_s $push0=, $pop7
+; DEFISEL-NEXT: call $push3=, __truncsfhf2, $pop0
+; DEFISEL-NEXT: call $push4=, __extendhfsf2, $pop3
+; DEFISEL-NEXT: f32.add $push5=, $pop2, $pop4
+; DEFISEL-NEXT: return $pop5
+;
+; FASTISEL-LABEL: test_sitofp_fadd_i32:
+; FASTISEL: .functype test_sitofp_fadd_i32 (i32, i32) -> (f32)
+; FASTISEL-NEXT: # %bb.0:
+; FASTISEL-NEXT: local.get $push6=, 1
+; FASTISEL-NEXT: i32.load16_u $push2=, 0($pop6)
+; FASTISEL-NEXT: call $push3=, __extendhfsf2, $pop2
+; FASTISEL-NEXT: local.get $push7=, 0
+; FASTISEL-NEXT: f32.convert_i32_s $push1=, $pop7
+; FASTISEL-NEXT: call $push4=, __truncsfhf2, $pop1
+; FASTISEL-NEXT: call $push5=, __extendhfsf2, $pop4
+; FASTISEL-NEXT: f32.add $push0=, $pop3, $pop5
+; FASTISEL-NEXT: return $pop0
+ %tmp0 = load half, ptr %b
+ %tmp1 = sitofp i32 %a to half
+ %tmp2 = fadd half %tmp0, %tmp1
+ %tmp3 = fpext half %tmp2 to float
+ ret float %tmp3
+}
+
+define half @chained_fp_ops(half %x) {
+; ALL-LABEL: chained_fp_ops:
+; ALL: .functype chained_fp_ops (f32) -> (f32)
+; ALL-NEXT: # %bb.0: # %start
+; ALL-NEXT: local.get $push6=, 0
+; ALL-NEXT: call $push0=, __truncsfhf2, $pop6
+; ALL-NEXT: call $push5=, __extendhfsf2, $pop0
+; ALL-NEXT: local.tee $push4=, 0, $pop5
+; ALL-NEXT: local.get $push7=, 0
+; ALL-NEXT: f32.add $push1=, $pop4, $pop7
+; ALL-NEXT: f32.const $push2=, 0x1p-1
+; ALL-NEXT: f32.mul $push3=, $pop1, $pop2
+; ALL-NEXT: return $pop3
+start:
+ %y = fmul half %x, 0xH4000
+ %z = fdiv half %y, 0xH4000
+ ret half %z
+}
+
+define half @test_select_cc(half) nounwind {
+; ALL-LABEL: test_select_cc:
+; ALL: .functype test_select_cc (f32) -> (f32)
+; ALL-NEXT: # %bb.0:
+; ALL-NEXT: f32.const $push4=, 0x1p0
+; ALL-NEXT: f32.const $push0=, 0x0p0
+; ALL-NEXT: local.get $push7=, 0
+; ALL-NEXT: call $push1=, __truncsfhf2, $pop7
+; ALL-NEXT: call $push2=, __extendhfsf2, $pop1
+; ALL-NEXT: f32.const $push6=, 0x0p0
+; ALL-NEXT: f32.ne $push3=, $pop2, $pop6
+; ALL-NEXT: f32.select $push5=, $pop4, $pop0, $pop3
+; ALL-NEXT: return $pop5
+ %2 = fcmp une half %0, 0xH0000
+ %3 = uitofp i1 %2 to half
+ ret half %3
+}
+
+define half @fabs(half %x) nounwind {
+; ALL-LABEL: fabs:
+; ALL: .functype fabs (f32) -> (f32)
+; ALL-NEXT: # %bb.0:
+; ALL-NEXT: local.get $push3=, 0
+; ALL-NEXT: call $push0=, __truncsfhf2, $pop3
+; ALL-NEXT: call $push1=, __extendhfsf2, $pop0
+; ALL-NEXT: f32.abs $push2=, $pop1
+; ALL-NEXT: return $pop2
+ %a = call half @llvm.fabs.f16(half %x)
+ ret half %a
+}
+
+define half @fcopysign(half %x, half %y) nounwind {
+; ALL-LABEL: fcopysign:
+; ALL: .functype fcopysign (f32, f32) -> (f32)
+; ALL-NEXT: # %bb.0:
+; ALL-NEXT: local.get $push3=, 0
+; ALL-NEXT: call $push0=, __truncsfhf2, $pop3
+; ALL-NEXT: call $push1=, __extendhfsf2, $pop0
+; ALL-NEXT: local.get $push4=, 1
+; ALL-NEXT: f32.copysign $push2=, $pop1, $pop4
+; ALL-NEXT: return $pop2
+ %a = call half @llvm.copysign.f16(half %x, half %y)
+ ret half %a
}
diff --git a/llvm/test/CodeGen/WebAssembly/global-set.ll b/llvm/test/CodeGen/WebAssembly/global-set.ll
index 7db374528fe9..4553957ae758 100644
--- a/llvm/test/CodeGen/WebAssembly/global-set.ll
+++ b/llvm/test/CodeGen/WebAssembly/global-set.ll
@@ -45,6 +45,22 @@ define void @set_f64_global(double %v) {
ret void
}
+declare i32 @get_i32()
+define i32 @stackifyAcrossGlobalSet() {
+; https://github.com/llvm/llvm-project/issues/156055
+; CHECK-LABEL: stackifyAcrossGlobalSet:
+; CHECK-NEXT: .functype
+; CHECK-NEXT: .local
+; CHECK-NEXT: call get_i32
+; CHECK-NEXT: local.tee
+; CHECK-NEXT: global.set i32_global
+; CHECK-NEXT: local.get
+; CHECK-NEXT: end_function
+ %1 = call i32 @get_i32()
+ store i32 %1, ptr addrspace(1) @i32_global
+ ret i32 %1
+}
+
; CHECK: .globaltype i32_global, i32
; CHECK: .globl i32_global
; CHECK-LABEL: i32_global:
diff --git a/llvm/test/CodeGen/WebAssembly/int-mac-reduction-loops.ll b/llvm/test/CodeGen/WebAssembly/int-mac-reduction-loops.ll
index 0184e22a3b40..04a2268db175 100644
--- a/llvm/test/CodeGen/WebAssembly/int-mac-reduction-loops.ll
+++ b/llvm/test/CodeGen/WebAssembly/int-mac-reduction-loops.ll
@@ -191,13 +191,9 @@ define hidden i32 @i32_mac_u8(ptr nocapture noundef readonly %a, ptr nocapture n
; MAX-BANDWIDTH: v128.load
; MAX-BANDWIDTH: v128.load
; MAX-BANDWIDTH: i16x8.extmul_low_i8x16_u
-; MAX-BANDWIDTH: i32x4.extend_low_i16x8_u
-; MAX-BANDWIDTH: i32x4.extend_high_i16x8_u
-; MAX-BANDWIDTH: i32x4.add
+; MAX-BANDWIDTH: i32x4.extadd_pairwise_i16x8_u
; MAX-BANDWIDTH: i16x8.extmul_high_i8x16_u
-; MAX-BANDWIDTH: i32x4.extend_low_i16x8_u
-; MAX-BANDWIDTH: i32x4.extend_high_i16x8_u
-; MAX-BANDWIDTH: i32x4.add
+; MAX-BANDWIDTH: i32x4.extadd_pairwise_i16x8_u
; MAX-BANDWIDTH: i32x4.add
; MAX-BANDWIDTH: i32x4.add
diff --git a/llvm/test/CodeGen/WebAssembly/memory-interleave.ll b/llvm/test/CodeGen/WebAssembly/memory-interleave.ll
index 97c2311c2558..94efe0f4157f 100644
--- a/llvm/test/CodeGen/WebAssembly/memory-interleave.ll
+++ b/llvm/test/CodeGen/WebAssembly/memory-interleave.ll
@@ -1,4 +1,4 @@
-; RUN: opt -S -mattr=+simd128 -passes=loop-vectorize %s | llc -mtriple=wasm32 -asm-verbose=false -disable-wasm-fallthrough-return-opt -wasm-keep-registers | FileCheck %s
+; RUN: opt -mtriple=wasm32 -mattr=+simd128 -passes=loop-vectorize %s | llc -mtriple=wasm32 -mattr=+simd128 -asm-verbose=false -disable-wasm-fallthrough-return-opt -wasm-keep-registers | FileCheck %s
target datalayout = "e-m:e-p:32:32-p10:8:8-p20:8:8-i64:64-n32:64-S128-ni:1:10:20"
@@ -15,14 +15,20 @@ target datalayout = "e-m:e-p:32:32-p10:8:8-p20:8:8-i64:64-n32:64-S128-ni:1:10:20
; CHECK-LABEL: two_ints_same_op:
; CHECK: loop
-; CHECK: i32.load
-; CHECK: i32.load
-; CHECK: i32.add
-; CHECK: i32.store
-; CHECK: i32.load
-; CHECK: i32.load
-; CHECK: i32.add
-; CHECK: i32.store
+; CHECK: v128.load
+; CHECK: v128.load
+; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 8, 9, 10, 11, 16, 17, 18, 19, 24, 25, 26, 27
+; CHECK: v128.load
+; CHECK: v128.load
+; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 8, 9, 10, 11, 16, 17, 18, 19, 24, 25, 26, 27
+; CHECK: i32x4.add
+; CHECK: i8x16.shuffle {{.*}} 4, 5, 6, 7, 12, 13, 14, 15, 20, 21, 22, 23, 28, 29, 30, 31
+; CHECK: i8x16.shuffle {{.*}} 4, 5, 6, 7, 12, 13, 14, 15, 20, 21, 22, 23, 28, 29, 30, 31
+; CHECK: i32x4.add
+; CHECK: i8x16.shuffle {{.*}} 8, 9, 10, 11, 24, 25, 26, 27, 12, 13, 14, 15, 28, 29, 30, 31
+; CHECK: v128.store
+; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 16, 17, 18, 19, 4, 5, 6, 7, 20, 21, 22, 23
+; CHECK: v128.store
define hidden void @two_ints_same_op(ptr noalias nocapture noundef writeonly %0, ptr nocapture noundef readonly %1, ptr nocapture noundef readonly %2, i32 noundef %3) {
%5 = icmp eq i32 %3, 0
br i1 %5, label %6, label %7
@@ -53,14 +59,20 @@ define hidden void @two_ints_same_op(ptr noalias nocapture noundef writeonly %0,
; CHECK-LABEL: two_ints_vary_op:
; CHECK: loop
-; CHECK: i32.load
-; CHECK: i32.load
-; CHECK: i32.add
-; CHECK: i32.store
-; CHECK: i32.load
-; CHECK: i32.load
-; CHECK: i32.sub
-; CHECK: i32.store
+; CHECK: v128.load
+; CHECK: v128.load
+; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 8, 9, 10, 11, 16, 17, 18, 19, 24, 25, 26, 27
+; CHECK: v128.load
+; CHECK: v128.load
+; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 8, 9, 10, 11, 16, 17, 18, 19, 24, 25, 26, 27
+; CHECK: i32x4.add
+; CHECK: i8x16.shuffle {{.*}} 4, 5, 6, 7, 12, 13, 14, 15, 20, 21, 22, 23, 28, 29, 30, 31
+; CHECK: i8x16.shuffle {{.*}} 4, 5, 6, 7, 12, 13, 14, 15, 20, 21, 22, 23, 28, 29, 30, 31
+; CHECK: i32x4.sub
+; CHECK: i8x16.shuffle {{.*}} 8, 9, 10, 11, 24, 25, 26, 27, 12, 13, 14, 15, 28, 29, 30, 31
+; CHECK: v128.store
+; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 16, 17, 18, 19, 4, 5, 6, 7, 20, 21, 22, 23
+; CHECK: v128.store
define hidden void @two_ints_vary_op(ptr noalias nocapture noundef writeonly %0, ptr nocapture noundef readonly %1, ptr nocapture noundef readonly %2, i32 noundef %3) {
%5 = icmp eq i32 %3, 0
br i1 %5, label %6, label %7
@@ -91,6 +103,7 @@ define hidden void @two_ints_vary_op(ptr noalias nocapture noundef writeonly %0,
; CHECK-LABEL: three_ints:
; CHECK: loop
+; CHECK-NOT: v128.load
; CHECK: i32.load
; CHECK: i32.load
; CHECK: i32.add
@@ -140,6 +153,7 @@ define hidden void @three_ints(ptr noalias nocapture noundef writeonly %0, ptr n
; CHECK-LABEL: three_shorts:
; CHECK: loop
+; CHECK-NOT: v128.load
; CHECK: i32.load16_u
; CHECK: i32.load16_u
; CHECK: i32.mul
@@ -189,22 +203,30 @@ define hidden void @three_shorts(ptr noalias nocapture noundef writeonly %0, ptr
; CHECK-LABEL: four_shorts_same_op:
; CHECK: loop
-; CHECK: i32.load16_u
-; CHECK: i32.load16_u
-; CHECK: i32.sub
-; CHECK: i32.store16
-; CHECK: i32.load16_u
-; CHECK: i32.load16_u
-; CHECK: i32.sub
-; CHECK: i32.store16
-; CHECK: i32.load16_u
-; CHECK: i32.load16_u
-; CHECK: i32.sub
-; CHECK: i32.store16
-; CHECK: i32.load16_u
-; CHECK: i32.load16_u
-; CHECK: i32.sub
-; CHECK: i32.store16
+; CHECK: v128.load
+; CHECK: v128.load
+; CHECK: i8x16.shuffle {{.*}} 0, 1, 8, 9, 16, 17, 24, 25, 0, 1, 0, 1, 0, 1, 0, 1
+; CHECK: v128.load
+; CHECK: v128.load
+; CHECK: i8x16.shuffle {{.*}} 0, 1, 8, 9, 16, 17, 24, 25, 0, 1, 0, 1, 0, 1, 0, 1
+; CHECK: i16x8.sub
+; CHECK: i8x16.shuffle {{.*}} 2, 3, 10, 11, 18, 19, 26, 27, 0, 1, 0, 1, 0, 1, 0, 1
+; CHECK: i8x16.shuffle {{.*}} 2, 3, 10, 11, 18, 19, 26, 27, 0, 1, 0, 1, 0, 1, 0, 1
+; CHECK: i16x8.sub
+; CHECK: i8x16.shuffle {{.*}} 4, 5, 20, 21, 0, 1, 0, 1, 6, 7, 22, 23, 0, 1, 0, 1
+; CHECK: i8x16.shuffle {{.*}} 4, 5, 12, 13, 20, 21, 28, 29, 0, 1, 0, 1, 0, 1, 0, 1
+; CHECK: i8x16.shuffle {{.*}} 4, 5, 12, 13, 20, 21, 28, 29, 0, 1, 0, 1, 0, 1, 0, 1
+; CHECK: i16x8.sub
+; CHECK: i8x16.shuffle {{.*}} 6, 7, 14, 15, 22, 23, 30, 31, 0, 1, 0, 1, 0, 1, 0, 1
+; CHECK: i8x16.shuffle {{.*}} 6, 7, 14, 15, 22, 23, 30, 31, 0, 1, 0, 1, 0, 1, 0, 1
+; CHECK: i16x8.sub
+; CHECK: i8x16.shuffle {{.*}} 0, 1, 0, 1, 4, 5, 20, 21, 0, 1, 0, 1, 6, 7, 22, 23
+; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 20, 21, 22, 23, 8, 9, 10, 11, 28, 29, 30, 31
+; CHECK: v128.store
+; CHECK: i8x16.shuffle {{.*}} 0, 1, 16, 17, 0, 1, 0, 1, 2, 3, 18, 19, 0, 1, 0, 1
+; CHECK: i8x16.shuffle {{.*}} 0, 1, 0, 1, 0, 1, 16, 17, 0, 1, 0, 1, 2, 3, 18, 19
+; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 20, 21, 22, 23, 8, 9, 10, 11, 28, 29, 30, 31
+; CHECK: v128.store
define hidden void @four_shorts_same_op(ptr noalias nocapture noundef writeonly %0, ptr nocapture noundef readonly %1, ptr nocapture noundef readonly %2, i32 noundef %3) {
%5 = icmp eq i32 %3, 0
br i1 %5, label %6, label %7
@@ -249,22 +271,30 @@ define hidden void @four_shorts_same_op(ptr noalias nocapture noundef writeonly
; CHECK-LABEL: four_shorts_split_op:
; CHECK: loop
-; CHECK: i32.load16_u
-; CHECK: i32.load16_u
-; CHECK: i32.or
-; CHECK: i32.store16
-; CHECK: i32.load16_u
-; CHECK: i32.load16_u
-; CHECK: i32.or
-; CHECK: i32.store16
-; CHECK: i32.load16_u
-; CHECK: i32.load16_u
-; CHECK: i32.xor
-; CHECK: i32.store16
-; CHECK: i32.load16_u
-; CHECK: i32.load16_u
-; CHECK: i32.xor
-; CHECK: i32.store16
+; CHECK: v128.load
+; CHECK: v128.load
+; CHECK: i8x16.shuffle {{.*}} 0, 1, 8, 9, 16, 17, 24, 25, 0, 1, 0, 1, 0, 1, 0, 1
+; CHECK: v128.load
+; CHECK: v128.load
+; CHECK: i8x16.shuffle {{.*}} 0, 1, 8, 9, 16, 17, 24, 25, 0, 1, 0, 1, 0, 1, 0, 1
+; CHECK: v128.or
+; CHECK: i8x16.shuffle {{.*}} 2, 3, 10, 11, 18, 19, 26, 27, 0, 1, 0, 1, 0, 1, 0, 1
+; CHECK: i8x16.shuffle {{.*}} 2, 3, 10, 11, 18, 19, 26, 27, 0, 1, 0, 1, 0, 1, 0, 1
+; CHECK: v128.or
+; CHECK: i8x16.shuffle {{.*}} 4, 5, 20, 21, 0, 1, 0, 1, 6, 7, 22, 23, 0, 1, 0, 1
+; CHECK: i8x16.shuffle {{.*}} 4, 5, 12, 13, 20, 21, 28, 29, 0, 1, 0, 1, 0, 1, 0, 1
+; CHECK: i8x16.shuffle {{.*}} 4, 5, 12, 13, 20, 21, 28, 29, 0, 1, 0, 1, 0, 1, 0, 1
+; CHECK: v128.xor
+; CHECK: i8x16.shuffle {{.*}} 6, 7, 14, 15, 22, 23, 30, 31, 0, 1, 0, 1, 0, 1, 0, 1
+; CHECK: i8x16.shuffle {{.*}} 6, 7, 14, 15, 22, 23, 30, 31, 0, 1, 0, 1, 0, 1, 0, 1
+; CHECK: v128.xor
+; CHECK: i8x16.shuffle {{.*}} 0, 1, 0, 1, 4, 5, 20, 21, 0, 1, 0, 1, 6, 7, 22, 23
+; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 20, 21, 22, 23, 8, 9, 10, 11, 28, 29, 30, 31
+; CHECK: v128.store
+; CHECK: i8x16.shuffle {{.*}} 0, 1, 16, 17, 0, 1, 0, 1, 2, 3, 18, 19, 0, 1, 0, 1
+; CHECK: i8x16.shuffle {{.*}} 0, 1, 0, 1, 0, 1, 16, 17, 0, 1, 0, 1, 2, 3, 18, 19
+; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 20, 21, 22, 23, 8, 9, 10, 11, 28, 29, 30, 31
+; CHECK: v128.store
define hidden void @four_shorts_split_op(ptr noalias nocapture noundef writeonly %0, ptr nocapture noundef readonly %1, ptr nocapture noundef readonly %2, i32 noundef %3) {
%5 = icmp eq i32 %3, 0
br i1 %5, label %6, label %7
@@ -308,23 +338,30 @@ define hidden void @four_shorts_split_op(ptr noalias nocapture noundef writeonly
}
; CHECK-LABEL: four_shorts_interleave_op:
-; CHECK: loop
-; CHECK: i32.load16_u
-; CHECK: i32.load16_u
-; CHECK: i32.or
-; CHECK: i32.store16
-; CHECK: i32.load16_u
-; CHECK: i32.load16_u
-; CHECK: i32.xor
-; CHECK: i32.store16
-; CHECK: i32.load16_u
-; CHECK: i32.load16_u
-; CHECK: i32.or
-; CHECK: i32.store16
-; CHECK: i32.load16_u
-; CHECK: i32.load16_u
-; CHECK: i32.xor
-; CHECK: i32.store16
+; CHECK: v128.load
+; CHECK: v128.load
+; CHECK: i8x16.shuffle {{.*}} 0, 1, 8, 9, 16, 17, 24, 25, 0, 1, 0, 1, 0, 1, 0, 1
+; CHECK: v128.load
+; CHECK: v128.load
+; CHECK: i8x16.shuffle {{.*}} 0, 1, 8, 9, 16, 17, 24, 25, 0, 1, 0, 1, 0, 1, 0, 1
+; CHECK: v128.or
+; CHECK: i8x16.shuffle {{.*}} 2, 3, 10, 11, 18, 19, 26, 27, 0, 1, 0, 1, 0, 1, 0, 1
+; CHECK: i8x16.shuffle {{.*}} 2, 3, 10, 11, 18, 19, 26, 27, 0, 1, 0, 1, 0, 1, 0, 1
+; CHECK: v128.xor
+; CHECK: i8x16.shuffle {{.*}} 4, 5, 20, 21, 0, 1, 0, 1, 6, 7, 22, 23, 0, 1, 0, 1
+; CHECK: i8x16.shuffle {{.*}} 4, 5, 12, 13, 20, 21, 28, 29, 0, 1, 0, 1, 0, 1, 0, 1
+; CHECK: i8x16.shuffle {{.*}} 4, 5, 12, 13, 20, 21, 28, 29, 0, 1, 0, 1, 0, 1, 0, 1
+; CHECK: v128.or
+; CHECK: i8x16.shuffle {{.*}} 6, 7, 14, 15, 22, 23, 30, 31, 0, 1, 0, 1, 0, 1, 0, 1
+; CHECK: i8x16.shuffle {{.*}} 6, 7, 14, 15, 22, 23, 30, 31, 0, 1, 0, 1, 0, 1, 0, 1
+; CHECK: v128.xor
+; CHECK: i8x16.shuffle {{.*}} 0, 1, 0, 1, 4, 5, 20, 21, 0, 1, 0, 1, 6, 7, 22, 23
+; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 20, 21, 22, 23, 8, 9, 10, 11, 28, 29, 30, 31
+; CHECK: v128.store
+; CHECK: i8x16.shuffle {{.*}} 0, 1, 16, 17, 0, 1, 0, 1, 2, 3, 18, 19, 0, 1, 0, 1
+; CHECK: i8x16.shuffle {{.*}} 0, 1, 0, 1, 0, 1, 16, 17, 0, 1, 0, 1, 2, 3, 18, 19
+; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 20, 21, 22, 23, 8, 9, 10, 11, 28, 29, 30, 31
+; CHECK: v128.store
define hidden void @four_shorts_interleave_op(ptr noalias nocapture noundef writeonly %0, ptr nocapture noundef readonly %1, ptr nocapture noundef readonly %2, i32 noundef %3) {
%5 = icmp eq i32 %3, 0
br i1 %5, label %6, label %7
@@ -369,6 +406,7 @@ define hidden void @four_shorts_interleave_op(ptr noalias nocapture noundef writ
; CHECK-LABEL: five_shorts:
; CHECK: loop
+; CHECK-NOT: v128.load
; CHECK: i32.load16_u
; CHECK: i32.load16_u
; CHECK: i32.sub
@@ -440,14 +478,22 @@ define hidden void @five_shorts(ptr noalias nocapture noundef writeonly %0, ptr
; CHECK-LABEL: two_bytes_same_op:
; CHECK: loop
-; CHECK: i32.load8_u
-; CHECK: i32.load8_u
-; CHECK: i32.mul
-; CHECK: i32.store8
-; CHECK: i32.load8_u
-; CHECK: i32.load8_u
-; CHECK: i32.mul
-; CHECK: i32.store8
+; CHECK: v128.load
+; CHECK: v128.load
+; CHECK: i8x16.shuffle {{.*}} 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30
+; CHECK: v128.load
+; CHECK: v128.load
+; CHECK: i8x16.shuffle {{.*}} 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30
+; CHECK: i16x8.extmul_high_i8x16_u
+; CHECK: i8x16.shuffle {{.*}} 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31
+; CHECK: i8x16.shuffle {{.*}} 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31
+; CHECK: i16x8.extmul_high_i8x16_u
+; CHECK: i8x16.shuffle {{.*}} 0, 16, 2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30
+; CHECK: v128.store
+; CHECK: i16x8.extmul_low_i8x16_u
+; CHECK: i16x8.extmul_low_i8x16_u
+; CHECK: i8x16.shuffle {{.*}} 0, 16, 2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30
+; CHECK: v128.store
define hidden void @two_bytes_same_op(ptr noalias nocapture noundef writeonly %0, ptr nocapture noundef readonly %1, ptr nocapture noundef readonly %2, i32 noundef %3) {
%5 = icmp eq i32 %3, 0
br i1 %5, label %6, label %7
@@ -478,14 +524,21 @@ define hidden void @two_bytes_same_op(ptr noalias nocapture noundef writeonly %0
; CHECK-LABEL: two_bytes_vary_op:
; CHECK: loop
-; CHECK: i32.load8_u
-; CHECK: i32.load8_u
-; CHECK: i32.mul
-; CHECK: i32.store8
-; CHECK: i32.load8_u
-; CHECK: i32.load8_u
-; CHECK: i32.sub
-; CHECK: i32.store8
+; CHECK: v128.load
+; CHECK: v128.load
+; CHECK: i8x16.shuffle {{.*}} 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30
+; CHECK: v128.load
+; CHECK: v128.load
+; CHECK: i8x16.shuffle {{.*}} 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30
+; CHECK: i16x8.extmul_high_i8x16_u
+; CHECK: i8x16.shuffle {{.*}} 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31
+; CHECK: i8x16.shuffle {{.*}} 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31
+; CHECK: i8x16.sub
+; CHECK: i8x16.shuffle {{.*}} 0, 24, 2, 25, 4, 26, 6, 27, 8, 28, 10, 29, 12, 30, 14, 31
+; CHECK: v128.store
+; CHECK: i16x8.extmul_low_i8x16_u
+; CHECK: i8x16.shuffle {{.*}} 0, 16, 2, 17, 4, 18, 6, 19, 8, 20, 10, 21, 12, 22, 14, 23
+; CHECK: v128.store
define hidden void @two_bytes_vary_op(ptr noalias nocapture noundef writeonly %0, ptr nocapture noundef readonly %1, ptr nocapture noundef readonly %2, i32 noundef %3) {
%5 = icmp eq i32 %3, 0
br i1 %5, label %6, label %7
@@ -614,22 +667,30 @@ define hidden void @three_bytes_interleave_op(ptr noalias nocapture noundef writ
; CHECK-LABEL: four_bytes_same_op:
; CHECK: loop
-; CHECK: i32.load8_u
-; CHECK: i32.load8_u
-; CHECK: i32.and
-; CHECK: i32.store8
-; CHECK: i32.load8_u
-; CHECK: i32.load8_u
-; CHECK: i32.and
-; CHECK: i32.store8
-; CHECK: i32.load8_u
-; CHECK: i32.load8_u
-; CHECK: i32.and
-; CHECK: i32.store8
-; CHECK: i32.load8_u
-; CHECK: i32.load8_u
-; CHECK: i32.and
-; CHECK: i32.store8
+; CHECK: v128.load
+; CHECK: v128.load
+; CHECK: i8x16.shuffle {{.*}} 0, 4, 8, 12, 16, 20, 24, 28, 0, 0, 0, 0, 0, 0, 0, 0
+; CHECK: v128.load
+; CHECK: v128.load
+; CHECK: i8x16.shuffle {{.*}} 0, 4, 8, 12, 16, 20, 24, 28, 0, 0, 0, 0, 0, 0, 0, 0
+; CHECK: v128.and
+; CHECK: i8x16.shuffle {{.*}} 1, 5, 9, 13, 17, 21, 25, 29, 0, 0, 0, 0, 0, 0, 0, 0
+; CHECK: i8x16.shuffle {{.*}} 1, 5, 9, 13, 17, 21, 25, 29, 0, 0, 0, 0, 0, 0, 0, 0
+; CHECK: v128.and
+; CHECK: i8x16.shuffle {{.*}} 4, 20, 0, 0, 5, 21, 0, 0, 6, 22, 0, 0, 7, 23, 0, 0
+; CHECK: i8x16.shuffle {{.*}} 2, 6, 10, 14, 18, 22, 26, 30, 0, 0, 0, 0, 0, 0, 0, 0
+; CHECK: i8x16.shuffle {{.*}} 2, 6, 10, 14, 18, 22, 26, 30, 0, 0, 0, 0, 0, 0, 0, 0
+; CHECK: v128.and
+; CHECK: i8x16.shuffle {{.*}} 3, 7, 11, 15, 19, 23, 27, 31, 0, 0, 0, 0, 0, 0, 0, 0
+; CHECK: i8x16.shuffle {{.*}} 3, 7, 11, 15, 19, 23, 27, 31, 0, 0, 0, 0, 0, 0, 0, 0
+; CHECK: v128.and
+; CHECK: i8x16.shuffle {{.*}} 0, 0, 4, 20, 0, 0, 5, 21, 0, 0, 6, 22, 0, 0, 7, 23
+; CHECK: i8x16.shuffle {{.*}} 0, 1, 18, 19, 4, 5, 22, 23, 8, 9, 26, 27, 12, 13, 30, 31
+; CHECK: v128.store
+; CHECK: i8x16.shuffle {{.*}} 0, 16, 0, 0, 1, 17, 0, 0, 2, 18, 0, 0, 3, 19, 0, 0
+; CHECK: i8x16.shuffle {{.*}} 0, 0, 0, 16, 0, 0, 1, 17, 0, 0, 2, 18, 0, 0, 3, 19
+; CHECK: i8x16.shuffle {{.*}} 0, 1, 18, 19, 4, 5, 22, 23, 8, 9, 26, 27, 12, 13, 30, 31
+; CHECK: v128.store
define hidden void @four_bytes_same_op(ptr noalias nocapture noundef writeonly %0, ptr nocapture noundef readonly %1, ptr nocapture noundef readonly %2, i32 noundef %3) {
%5 = icmp eq i32 %3, 0
br i1 %5, label %6, label %7
@@ -674,22 +735,28 @@ define hidden void @four_bytes_same_op(ptr noalias nocapture noundef writeonly %
; CHECK-LABEL: four_bytes_split_op:
; CHECK: loop
-; CHECK: i32.load8_u
-; CHECK: i32.load8_u
-; CHECK: i32.mul
-; CHECK: i32.store8
-; CHECK: i32.load8_u
-; CHECK: i32.load8_u
-; CHECK: i32.mul
-; CHECK: i32.store8
-; CHECK: i32.load8_u
-; CHECK: i32.load8_u
-; CHECK: i32.sub
-; CHECK: i32.store8
-; CHECK: i32.load8_u
-; CHECK: i32.load8_u
-; CHECK: i32.sub
-; CHECK: i32.store8
+; CHECK: v128.load
+; CHECK: v128.load
+; CHECK: i8x16.shuffle {{.*}}, 0, 4, 8, 12, 16, 20, 24, 28, 0, 0, 0, 0, 0, 0, 0, 0
+; CHECK: v128.load
+; CHECK: v128.load
+; CHECK: i8x16.shuffle {{.*}}, 0, 4, 8, 12, 16, 20, 24, 28, 0, 0, 0, 0, 0, 0, 0, 0
+; CHECK: i16x8.extmul_low_i8x16_u
+; CHECK: i8x16.shuffle {{.*}}, 1, 5, 9, 13, 17, 21, 25, 29, 0, 0, 0, 0, 0, 0, 0, 0
+; CHECK: i8x16.shuffle {{.*}}, 1, 5, 9, 13, 17, 21, 25, 29, 0, 0, 0, 0, 0, 0, 0, 0
+; CHECK: i16x8.extmul_low_i8x16_u
+; CHECK: i8x16.shuffle {{.*}}, 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30
+; CHECK: i8x16.shuffle {{.*}}, 2, 6, 10, 14, 18, 22, 26, 30, 0, 0, 0, 0, 0, 0, 0, 0
+; CHECK: i8x16.shuffle {{.*}}, 2, 6, 10, 14, 18, 22, 26, 30, 0, 0, 0, 0, 0, 0, 0, 0
+; CHECK: i8x16.sub
+; CHECK: i8x16.shuffle {{.*}}, 3, 7, 11, 15, 19, 23, 27, 31, 0, 0, 0, 0, 0, 0, 0, 0
+; CHECK: i8x16.shuffle {{.*}}, 3, 7, 11, 15, 19, 23, 27, 31, 0, 0, 0, 0, 0, 0, 0, 0
+; CHECK: i8x16.sub
+; CHECK: i8x16.shuffle {{.*}}, 0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 20, 21, 22, 23
+; CHECK: i8x16.shuffle {{.*}}, 4, 12, 20, 28, 5, 13, 21, 29, 6, 14, 22, 30, 7, 15, 23, 31
+; CHECK: v128.store
+; CHECK: i8x16.shuffle {{.*}}, 0, 8, 16, 24, 1, 9, 17, 25, 2, 10, 18, 26, 3, 11, 19, 27
+; CHECK: v128.store
define hidden void @four_bytes_split_op(ptr noalias nocapture noundef writeonly %0, ptr nocapture noundef readonly %1, ptr nocapture noundef readonly %2, i32 noundef %3) {
%5 = icmp eq i32 %3, 0
br i1 %5, label %6, label %7
@@ -734,22 +801,30 @@ define hidden void @four_bytes_split_op(ptr noalias nocapture noundef writeonly
; CHECK-LABEL: four_bytes_interleave_op:
; CHECK: loop
-; CHECK: i32.load8_u
-; CHECK: i32.load8_u
-; CHECK: i32.add
-; CHECK: i32.store8
-; CHECK: i32.load8_u
-; CHECK: i32.load8_u
-; CHECK: i32.sub
-; CHECK: i32.store8
-; CHECK: i32.load8_u
-; CHECK: i32.load8_u
-; CHECK: i32.add
-; CHECK: i32.store8
-; CHECK: i32.load8_u
-; CHECK: i32.load8_u
-; CHECK: i32.sub
-; CHECK: i32.store8
+; CHECK: v128.load
+; CHECK: v128.load
+; CHECK: i8x16.shuffle {{.*}} 0, 4, 8, 12, 16, 20, 24, 28, 0, 0, 0, 0, 0, 0, 0, 0
+; CHECK: v128.load
+; CHECK: v128.load
+; CHECK: i8x16.shuffle {{.*}} 0, 4, 8, 12, 16, 20, 24, 28, 0, 0, 0, 0, 0, 0, 0, 0
+; CHECK: i8x16.add
+; CHECK: i8x16.shuffle {{.*}} 1, 5, 9, 13, 17, 21, 25, 29, 0, 0, 0, 0, 0, 0, 0, 0
+; CHECK: i8x16.shuffle {{.*}} 1, 5, 9, 13, 17, 21, 25, 29, 0, 0, 0, 0, 0, 0, 0, 0
+; CHECK: i8x16.sub
+; CHECK: i8x16.shuffle {{.*}} 4, 20, 0, 0, 5, 21, 0, 0, 6, 22, 0, 0, 7, 23, 0, 0
+; CHECK: i8x16.shuffle {{.*}} 2, 6, 10, 14, 18, 22, 26, 30, 0, 0, 0, 0, 0, 0, 0, 0
+; CHECK: i8x16.shuffle {{.*}} 2, 6, 10, 14, 18, 22, 26, 30, 0, 0, 0, 0, 0, 0, 0, 0
+; CHECK: i8x16.add
+; CHECK: i8x16.shuffle {{.*}} 3, 7, 11, 15, 19, 23, 27, 31, 0, 0, 0, 0, 0, 0, 0, 0
+; CHECK: i8x16.shuffle {{.*}} 3, 7, 11, 15, 19, 23, 27, 31, 0, 0, 0, 0, 0, 0, 0, 0
+; CHECK: i8x16.sub
+; CHECK: i8x16.shuffle {{.*}} 0, 0, 4, 20, 0, 0, 5, 21, 0, 0, 6, 22, 0, 0, 7, 23
+; CHECK: i8x16.shuffle {{.*}} 0, 1, 18, 19, 4, 5, 22, 23, 8, 9, 26, 27, 12, 13, 30, 31
+; CHECK: v128.store
+; CHECK: i8x16.shuffle {{.*}} 0, 16, 0, 0, 1, 17, 0, 0, 2, 18, 0, 0, 3, 19, 0, 0
+; CHECK: i8x16.shuffle {{.*}} 0, 0, 0, 16, 0, 0, 1, 17, 0, 0, 2, 18, 0, 0, 3, 19
+; CHECK: i8x16.shuffle {{.*}} 0, 1, 18, 19, 4, 5, 22, 23, 8, 9, 26, 27, 12, 13, 30, 31
+; CHECK: v128.store
define hidden void @four_bytes_interleave_op(ptr noalias nocapture noundef writeonly %0, ptr nocapture noundef readonly %1, ptr nocapture noundef readonly %2, i32 noundef %3) {
%5 = icmp eq i32 %3, 0
br i1 %5, label %6, label %7
@@ -794,6 +869,7 @@ define hidden void @four_bytes_interleave_op(ptr noalias nocapture noundef write
; CHECK-LABEL: eight_bytes_same_op:
; CHECK: loop
+; CHECK-NOT: v128.load
; CHECK: i32.load8_u
; CHECK: i32.load8_u
; CHECK: i32.mul
@@ -898,6 +974,7 @@ define hidden void @eight_bytes_same_op(ptr noalias nocapture noundef writeonly
; CHECK-LABEL: eight_bytes_split_op:
; CHECK: loop
+; CHECK-NOT: v128.load
; CHECK: i32.load8_u
; CHECK: i32.load8_u
; CHECK: i32.add
@@ -1002,6 +1079,7 @@ define hidden void @eight_bytes_split_op(ptr noalias nocapture noundef writeonly
; CHECK-LABEL: eight_bytes_interleave_op:
; CHECK: loop
+; CHECK-NOT: v128.load
; CHECK: i32.load8_u
; CHECK: i32.load8_u
; CHECK: i32.add
@@ -1190,22 +1268,47 @@ define hidden void @four_bytes_into_four_ints_same_op(ptr noalias nocapture noun
; CHECK-LABEL: four_bytes_into_four_ints_vary_op:
; CHECK: loop
-; CHECK: i32.load8_u
-; CHECK: i32.load8_u
-; CHECK: i32.add
-; CHECK: i32.store
-; CHECK: i32.load8_u
-; CHECK: i32.load8_u
-; CHECK: i32.sub
-; CHECK: i32.store
-; CHECK: i32.load8_u
-; CHECK: i32.load8_u
-; CHECK: i32.mul
-; CHECK: i32.store
-; CHECK: i32.load8_u
-; CHECK: i32.load8_u
-; CHECK: i32.and
-; CHECK: i32.store
+; CHECK: v128.load
+; CHECK: i8x16.shuffle {{.*}} 0, 4, 8, 12, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+; CHECK: i16x8.extend_low_i8x16_u
+; CHECK: i32x4.extend_low_i16x8_u
+; CHECK: v128.load
+; CHECK: i8x16.shuffle {{.*}} 0, 4, 8, 12, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+; CHECK: i16x8.extend_low_i8x16_u
+; CHECK: i32x4.extend_low_i16x8_u
+; CHECK: i32x4.add
+; CHECK: i8x16.shuffle {{.*}} 1, 5, 9, 13, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+; CHECK: i16x8.extend_low_i8x16_u
+; CHECK: i32x4.extend_low_i16x8_u
+; CHECK: i8x16.shuffle {{.*}} 1, 5, 9, 13, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+; CHECK: i16x8.extend_low_i8x16_u
+; CHECK: i32x4.extend_low_i16x8_u
+; CHECK: i32x4.sub
+; CHECK: i8x16.shuffle {{.*}} 12, 13, 14, 15, 28, 29, 30, 31, 0, 1, 2, 3, 0, 1, 2, 3
+; CHECK: i8x16.shuffle {{.*}} 2, 6, 10, 14, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+; CHECK: i16x8.extend_low_i8x16_u
+; CHECK: i8x16.shuffle {{.*}} 2, 6, 10, 14, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+; CHECK: i16x8.extend_low_i8x16_u
+; CHECK: i32x4.extmul_low_i16x8_u
+; CHECK: v128.and
+; CHECK: i8x16.shuffle {{.*}} 3, 7, 11, 15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+; CHECK: i16x8.extend_low_i8x16_u
+; CHECK: i32x4.extend_low_i16x8_u
+; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 0, 1, 2, 3, 12, 13, 14, 15, 28, 29, 30, 31
+; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31
+; CHECK: v128.store
+; CHECK: i8x16.shuffle {{.*}} 8, 9, 10, 11, 24, 25, 26, 27, 0, 1, 2, 3, 0, 1, 2, 3
+; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 0, 1, 2, 3, 8, 9, 10, 11, 24, 25, 26, 27
+; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31
+; CHECK: v128.store
+; CHECK: i8x16.shuffle {{.*}} 4, 5, 6, 7, 20, 21, 22, 23, 0, 1, 2, 3, 0, 1, 2, 3
+; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 0, 1, 2, 3, 4, 5, 6, 7, 20, 21, 22, 23
+; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31
+; CHECK: v128.store
+; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 16, 17, 18, 19, 0, 1, 2, 3, 0, 1, 2, 3
+; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 16, 17, 18, 19
+; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31
+; CHECK: v128.store
define hidden void @four_bytes_into_four_ints_vary_op(ptr noalias nocapture noundef writeonly %0, ptr nocapture noundef readonly %1, ptr nocapture noundef readonly %2, i32 noundef %3) {
%5 = icmp eq i32 %3, 0
br i1 %5, label %6, label %7
@@ -1257,10 +1360,10 @@ define hidden void @four_bytes_into_four_ints_vary_op(ptr noalias nocapture noun
; CHECK-LABEL: scale_uv_row_down2:
; CHECK: loop
-; CHECK: i32.load8_u
-; CHECK: i32.store8
-; CHECK: i32.load8_u
-; CHECK: i32.store8
+; CHECK: v128.load
+; CHECK: v128.load
+; CHECK: i8x16.shuffle {{.*}} 0, 1, 4, 5, 8, 9, 12, 13, 16, 17, 20, 21, 24, 25, 28, 29
+; CHECK: v128.store
define hidden void @scale_uv_row_down2(ptr nocapture noundef readonly %0, i32 noundef %1, ptr nocapture noundef writeonly %2, i32 noundef %3) {
%5 = icmp sgt i32 %3, 0
br i1 %5, label %6, label %19
@@ -1288,18 +1391,38 @@ define hidden void @scale_uv_row_down2(ptr nocapture noundef readonly %0, i32 no
; CHECK-LABEL: scale_uv_row_down2_box:
; CHECK: loop
-; CHECK: i32.load8_u
-; CHECK: i32.load8_u
-; CHECK: i32.load8_u
-; CHECK: i32.load8_u
-; CHECK: i32.shr_u
-; CHECK: i32.store8
-; CHECK: i32.load8_u
-; CHECK: i32.load8_u
-; CHECK: i32.load8_u
-; CHECK: i32.load8_u
-; CHECK: i32.shr_u
-; CHECK: i32.store8
+; CHECK: v128.load
+; CHECK: v128.load
+; CHECK: i8x16.shuffle {{.*}} 0, 4, 8, 12, 16, 20, 24, 28, 0, 0, 0, 0, 0, 0, 0, 0
+; CHECK: i16x8.extend_low_i8x16_u
+; CHECK: i8x16.shuffle {{.*}} 2, 6, 10, 14, 18, 22, 26, 30, 0, 0, 0, 0, 0, 0, 0, 0
+; CHECK: i16x8.extend_low_i8x16_u
+; CHECK: i16x8.add
+; CHECK: v128.load
+; CHECK: v128.load
+; CHECK: i8x16.shuffle {{.*}} 0, 4, 8, 12, 16, 20, 24, 28, 0, 0, 0, 0, 0, 0, 0, 0
+; CHECK: i16x8.extend_low_i8x16_u
+; CHECK: i16x8.add
+; CHECK: i8x16.shuffle {{.*}} 2, 6, 10, 14, 18, 22, 26, 30, 0, 0, 0, 0, 0, 0, 0, 0
+; CHECK: i16x8.extend_low_i8x16_u
+; CHECK: i16x8.add
+; CHECK: i16x8.add
+; CHECK: i16x8.shr_u
+; CHECK: i8x16.shuffle {{.*}} 1, 5, 9, 13, 17, 21, 25, 29, 0, 0, 0, 0, 0, 0, 0, 0
+; CHECK: i16x8.extend_low_i8x16_u
+; CHECK: i8x16.shuffle {{.*}} 3, 7, 11, 15, 19, 23, 27, 31, 0, 0, 0, 0, 0, 0, 0, 0
+; CHECK: i16x8.extend_low_i8x16_u
+; CHECK: i16x8.add
+; CHECK: i8x16.shuffle {{.*}} 1, 5, 9, 13, 17, 21, 25, 29, 0, 0, 0, 0, 0, 0, 0, 0
+; CHECK: i16x8.extend_low_i8x16_u
+; CHECK: i16x8.add
+; CHECK: i8x16.shuffle {{.*}} 3, 7, 11, 15, 19, 23, 27, 31, 0, 0, 0, 0, 0, 0, 0, 0
+; CHECK: i16x8.extend_low_i8x16_u
+; CHECK: i16x8.add
+; CHECK: i16x8.add
+; CHECK: i16x8.shr_u
+; CHECK: i8x16.shuffle {{.*}} 0, 16, 2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30
+; CHECK: v128.store
define hidden void @scale_uv_row_down2_box(ptr nocapture noundef readonly %0, i32 noundef %1, ptr nocapture noundef writeonly %2, i32 noundef %3) {
%5 = icmp sgt i32 %3, 0
br i1 %5, label %6, label %54
@@ -1364,14 +1487,16 @@ define hidden void @scale_uv_row_down2_box(ptr nocapture noundef readonly %0, i3
; CHECK-LABEL: scale_uv_row_down2_linear:
; CHECK: loop
-; CHECK: i32.load8_u
-; CHECK: i32.load8_u
-; CHECK: i32.shr_u
-; CHECK: i32.store8
-; CHECK: i32.load8_u
-; CHECK: i32.load8_u
-; CHECK: i32.shr_u
-; CHECK: i32.store8
+; CHECK: v128.load
+; CHECK: v128.load
+; CHECK: i8x16.shuffle {{.*}} 0, 4, 8, 12, 16, 20, 24, 28, 0, 0, 0, 0, 0, 0, 0, 0
+; CHECK: i8x16.shuffle {{.*}} 2, 6, 10, 14, 18, 22, 26, 30, 0, 0, 0, 0, 0, 0, 0, 0
+; CHECK: i8x16.avgr_u
+; CHECK: i8x16.shuffle {{.*}} 1, 5, 9, 13, 17, 21, 25, 29, 0, 0, 0, 0, 0, 0, 0, 0
+; CHECK: i8x16.shuffle {{.*}} 3, 7, 11, 15, 19, 23, 27, 31, 0, 0, 0, 0, 0, 0, 0, 0
+; CHECK: i8x16.avgr_u
+; CHECK: i8x16.shuffle {{.*}} 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23
+; CHECK: v128.store
define hidden void @scale_uv_row_down2_linear(ptr nocapture noundef readonly %0, i32 noundef %1, ptr nocapture noundef writeonly %2, i32 noundef %3) {
%5 = icmp sgt i32 %3, 0
br i1 %5, label %6, label %34
diff --git a/llvm/test/CodeGen/WebAssembly/narrow-simd-mul.ll b/llvm/test/CodeGen/WebAssembly/narrow-simd-mul.ll
index 1f6c960c27aa..310636d4c07d 100644
--- a/llvm/test/CodeGen/WebAssembly/narrow-simd-mul.ll
+++ b/llvm/test/CodeGen/WebAssembly/narrow-simd-mul.ll
@@ -5,71 +5,9 @@ define <8 x i8> @mul_v8i8(<8 x i8> %a, <8 x i8> %b) {
; CHECK-LABEL: mul_v8i8:
; CHECK: .functype mul_v8i8 (v128, v128) -> (v128)
; CHECK-NEXT: # %bb.0:
-; CHECK-NEXT: i8x16.extract_lane_u $push4=, $0, 0
-; CHECK-NEXT: i8x16.extract_lane_u $push3=, $1, 0
-; CHECK-NEXT: i32.mul $push5=, $pop4, $pop3
-; CHECK-NEXT: i8x16.splat $push6=, $pop5
-; CHECK-NEXT: i8x16.extract_lane_u $push1=, $0, 1
-; CHECK-NEXT: i8x16.extract_lane_u $push0=, $1, 1
-; CHECK-NEXT: i32.mul $push2=, $pop1, $pop0
-; CHECK-NEXT: i8x16.replace_lane $push7=, $pop6, 1, $pop2
-; CHECK-NEXT: i8x16.extract_lane_u $push9=, $0, 2
-; CHECK-NEXT: i8x16.extract_lane_u $push8=, $1, 2
-; CHECK-NEXT: i32.mul $push10=, $pop9, $pop8
-; CHECK-NEXT: i8x16.replace_lane $push11=, $pop7, 2, $pop10
-; CHECK-NEXT: i8x16.extract_lane_u $push13=, $0, 3
-; CHECK-NEXT: i8x16.extract_lane_u $push12=, $1, 3
-; CHECK-NEXT: i32.mul $push14=, $pop13, $pop12
-; CHECK-NEXT: i8x16.replace_lane $push15=, $pop11, 3, $pop14
-; CHECK-NEXT: i8x16.extract_lane_u $push17=, $0, 4
-; CHECK-NEXT: i8x16.extract_lane_u $push16=, $1, 4
-; CHECK-NEXT: i32.mul $push18=, $pop17, $pop16
-; CHECK-NEXT: i8x16.replace_lane $push19=, $pop15, 4, $pop18
-; CHECK-NEXT: i8x16.extract_lane_u $push21=, $0, 5
-; CHECK-NEXT: i8x16.extract_lane_u $push20=, $1, 5
-; CHECK-NEXT: i32.mul $push22=, $pop21, $pop20
-; CHECK-NEXT: i8x16.replace_lane $push23=, $pop19, 5, $pop22
-; CHECK-NEXT: i8x16.extract_lane_u $push25=, $0, 6
-; CHECK-NEXT: i8x16.extract_lane_u $push24=, $1, 6
-; CHECK-NEXT: i32.mul $push26=, $pop25, $pop24
-; CHECK-NEXT: i8x16.replace_lane $push27=, $pop23, 6, $pop26
-; CHECK-NEXT: i8x16.extract_lane_u $push29=, $0, 7
-; CHECK-NEXT: i8x16.extract_lane_u $push28=, $1, 7
-; CHECK-NEXT: i32.mul $push30=, $pop29, $pop28
-; CHECK-NEXT: i8x16.replace_lane $push31=, $pop27, 7, $pop30
-; CHECK-NEXT: i8x16.extract_lane_u $push33=, $0, 8
-; CHECK-NEXT: i8x16.extract_lane_u $push32=, $1, 8
-; CHECK-NEXT: i32.mul $push34=, $pop33, $pop32
-; CHECK-NEXT: i8x16.replace_lane $push35=, $pop31, 8, $pop34
-; CHECK-NEXT: i8x16.extract_lane_u $push37=, $0, 9
-; CHECK-NEXT: i8x16.extract_lane_u $push36=, $1, 9
-; CHECK-NEXT: i32.mul $push38=, $pop37, $pop36
-; CHECK-NEXT: i8x16.replace_lane $push39=, $pop35, 9, $pop38
-; CHECK-NEXT: i8x16.extract_lane_u $push41=, $0, 10
-; CHECK-NEXT: i8x16.extract_lane_u $push40=, $1, 10
-; CHECK-NEXT: i32.mul $push42=, $pop41, $pop40
-; CHECK-NEXT: i8x16.replace_lane $push43=, $pop39, 10, $pop42
-; CHECK-NEXT: i8x16.extract_lane_u $push45=, $0, 11
-; CHECK-NEXT: i8x16.extract_lane_u $push44=, $1, 11
-; CHECK-NEXT: i32.mul $push46=, $pop45, $pop44
-; CHECK-NEXT: i8x16.replace_lane $push47=, $pop43, 11, $pop46
-; CHECK-NEXT: i8x16.extract_lane_u $push49=, $0, 12
-; CHECK-NEXT: i8x16.extract_lane_u $push48=, $1, 12
-; CHECK-NEXT: i32.mul $push50=, $pop49, $pop48
-; CHECK-NEXT: i8x16.replace_lane $push51=, $pop47, 12, $pop50
-; CHECK-NEXT: i8x16.extract_lane_u $push53=, $0, 13
-; CHECK-NEXT: i8x16.extract_lane_u $push52=, $1, 13
-; CHECK-NEXT: i32.mul $push54=, $pop53, $pop52
-; CHECK-NEXT: i8x16.replace_lane $push55=, $pop51, 13, $pop54
-; CHECK-NEXT: i8x16.extract_lane_u $push57=, $0, 14
-; CHECK-NEXT: i8x16.extract_lane_u $push56=, $1, 14
-; CHECK-NEXT: i32.mul $push58=, $pop57, $pop56
-; CHECK-NEXT: i8x16.replace_lane $push59=, $pop55, 14, $pop58
-; CHECK-NEXT: i8x16.extract_lane_u $push61=, $0, 15
-; CHECK-NEXT: i8x16.extract_lane_u $push60=, $1, 15
-; CHECK-NEXT: i32.mul $push62=, $pop61, $pop60
-; CHECK-NEXT: i8x16.replace_lane $push63=, $pop59, 15, $pop62
-; CHECK-NEXT: return $pop63
+; CHECK-NEXT: i16x8.extmul_low_i8x16_u $push0=, $0, $1
+; CHECK-NEXT: i8x16.shuffle $push1=, $pop0, $1, 0, 2, 4, 6, 8, 10, 12, 14, 0, 0, 0, 0, 0, 0, 0, 0
+; CHECK-NEXT: return $pop1
%mul = mul <8 x i8> %a, %b
ret <8 x i8> %mul
}