diff options
Diffstat (limited to 'llvm/test/CodeGen/LoongArch/lasx/broadcast-load.ll')
| -rw-r--r-- | llvm/test/CodeGen/LoongArch/lasx/broadcast-load.ll | 125 |
1 files changed, 104 insertions, 21 deletions
diff --git a/llvm/test/CodeGen/LoongArch/lasx/broadcast-load.ll b/llvm/test/CodeGen/LoongArch/lasx/broadcast-load.ll index 976924bdca68..4aa2bd76ab80 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/broadcast-load.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/broadcast-load.ll @@ -1,16 +1,31 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 -; RUN: llc --mtriple=loongarch64 -mattr=+lasx < %s | FileCheck %s +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s --check-prefixes=CHECK,LA32 +; RUN: llc --mtriple=loongarch64 -mattr=+lasx < %s | FileCheck %s --check-prefixes=CHECK,LA64 -; TODO: Load a element and splat it to a vector could be lowerd to xvldrepl -; A load has more than one user shouldn't be lowered to xvldrepl define <4 x i64> @should_not_be_optimized(ptr %ptr, ptr %dst) { -; CHECK-LABEL: should_not_be_optimized: -; CHECK: # %bb.0: -; CHECK-NEXT: ld.d $a0, $a0, 0 -; CHECK-NEXT: xvreplgr2vr.d $xr0, $a0 -; CHECK-NEXT: st.d $a0, $a1, 0 -; CHECK-NEXT: ret +; LA32-LABEL: should_not_be_optimized: +; LA32: # %bb.0: +; LA32-NEXT: ld.w $a2, $a0, 0 +; LA32-NEXT: ld.w $a0, $a0, 4 +; LA32-NEXT: st.w $a2, $a1, 0 +; LA32-NEXT: xvinsgr2vr.w $xr0, $a2, 0 +; LA32-NEXT: xvinsgr2vr.w $xr0, $a0, 1 +; LA32-NEXT: xvinsgr2vr.w $xr0, $a2, 2 +; LA32-NEXT: xvinsgr2vr.w $xr0, $a0, 3 +; LA32-NEXT: xvinsgr2vr.w $xr0, $a2, 4 +; LA32-NEXT: xvinsgr2vr.w $xr0, $a0, 5 +; LA32-NEXT: xvinsgr2vr.w $xr0, $a2, 6 +; LA32-NEXT: xvinsgr2vr.w $xr0, $a0, 7 +; LA32-NEXT: st.w $a0, $a1, 4 +; LA32-NEXT: ret +; +; LA64-LABEL: should_not_be_optimized: +; LA64: # %bb.0: +; LA64-NEXT: ld.d $a0, $a0, 0 +; LA64-NEXT: xvreplgr2vr.d $xr0, $a0 +; LA64-NEXT: st.d $a0, $a1, 0 +; LA64-NEXT: ret %tmp = load i64, ptr %ptr store i64 %tmp, ptr %dst %tmp1 = insertelement <4 x i64> zeroinitializer, i64 %tmp, i32 0 @@ -18,12 +33,52 @@ define <4 x i64> @should_not_be_optimized(ptr %ptr, ptr %dst) { ret <4 x i64> %tmp2 } -define <4 x i64> @xvldrepl_d_unaligned_offset(ptr %ptr) { -; CHECK-LABEL: xvldrepl_d_unaligned_offset: +define <16 x i16> @should_not_be_optimized_sext_load(ptr %ptr) { +; CHECK-LABEL: should_not_be_optimized_sext_load: ; CHECK: # %bb.0: -; CHECK-NEXT: addi.d $a0, $a0, 4 -; CHECK-NEXT: xvldrepl.d $xr0, $a0, 0 +; CHECK-NEXT: ld.b $a0, $a0, 0 +; CHECK-NEXT: xvreplgr2vr.h $xr0, $a0 +; CHECK-NEXT: ret + %tmp = load i8, ptr %ptr + %tmp1 = sext i8 %tmp to i16 + %tmp2 = insertelement <16 x i16> zeroinitializer, i16 %tmp1, i32 0 + %tmp3 = shufflevector <16 x i16> %tmp2, <16 x i16> poison, <16 x i32> zeroinitializer + ret <16 x i16> %tmp3 +} + +define <16 x i16> @should_not_be_optimized_zext_load(ptr %ptr) { +; CHECK-LABEL: should_not_be_optimized_zext_load: +; CHECK: # %bb.0: +; CHECK-NEXT: ld.bu $a0, $a0, 0 +; CHECK-NEXT: xvreplgr2vr.h $xr0, $a0 ; CHECK-NEXT: ret + %tmp = load i8, ptr %ptr + %tmp1 = zext i8 %tmp to i16 + %tmp2 = insertelement <16 x i16> zeroinitializer, i16 %tmp1, i32 0 + %tmp3 = shufflevector <16 x i16> %tmp2, <16 x i16> poison, <16 x i32> zeroinitializer + ret <16 x i16> %tmp3 +} + +define <4 x i64> @xvldrepl_d_unaligned_offset(ptr %ptr) { +; LA32-LABEL: xvldrepl_d_unaligned_offset: +; LA32: # %bb.0: +; LA32-NEXT: ld.w $a1, $a0, 4 +; LA32-NEXT: ld.w $a0, $a0, 8 +; LA32-NEXT: xvinsgr2vr.w $xr0, $a1, 0 +; LA32-NEXT: xvinsgr2vr.w $xr0, $a0, 1 +; LA32-NEXT: xvinsgr2vr.w $xr0, $a1, 2 +; LA32-NEXT: xvinsgr2vr.w $xr0, $a0, 3 +; LA32-NEXT: xvinsgr2vr.w $xr0, $a1, 4 +; LA32-NEXT: xvinsgr2vr.w $xr0, $a0, 5 +; LA32-NEXT: xvinsgr2vr.w $xr0, $a1, 6 +; LA32-NEXT: xvinsgr2vr.w $xr0, $a0, 7 +; LA32-NEXT: ret +; +; LA64-LABEL: xvldrepl_d_unaligned_offset: +; LA64: # %bb.0: +; LA64-NEXT: addi.d $a0, $a0, 4 +; LA64-NEXT: xvldrepl.d $xr0, $a0, 0 +; LA64-NEXT: ret %p = getelementptr i32, ptr %ptr, i32 1 %tmp = load i64, ptr %p %tmp1 = insertelement <4 x i64> zeroinitializer, i64 %tmp, i32 0 @@ -103,10 +158,24 @@ define <8 x i32> @xvldrepl_w_offset(ptr %ptr) { define <4 x i64> @xvldrepl_d(ptr %ptr) { -; CHECK-LABEL: xvldrepl_d: -; CHECK: # %bb.0: -; CHECK-NEXT: xvldrepl.d $xr0, $a0, 0 -; CHECK-NEXT: ret +; LA32-LABEL: xvldrepl_d: +; LA32: # %bb.0: +; LA32-NEXT: ld.w $a1, $a0, 0 +; LA32-NEXT: ld.w $a0, $a0, 4 +; LA32-NEXT: xvinsgr2vr.w $xr0, $a1, 0 +; LA32-NEXT: xvinsgr2vr.w $xr0, $a0, 1 +; LA32-NEXT: xvinsgr2vr.w $xr0, $a1, 2 +; LA32-NEXT: xvinsgr2vr.w $xr0, $a0, 3 +; LA32-NEXT: xvinsgr2vr.w $xr0, $a1, 4 +; LA32-NEXT: xvinsgr2vr.w $xr0, $a0, 5 +; LA32-NEXT: xvinsgr2vr.w $xr0, $a1, 6 +; LA32-NEXT: xvinsgr2vr.w $xr0, $a0, 7 +; LA32-NEXT: ret +; +; LA64-LABEL: xvldrepl_d: +; LA64: # %bb.0: +; LA64-NEXT: xvldrepl.d $xr0, $a0, 0 +; LA64-NEXT: ret %tmp = load i64, ptr %ptr %tmp1 = insertelement <4 x i64> zeroinitializer, i64 %tmp, i32 0 %tmp2 = shufflevector <4 x i64> %tmp1, <4 x i64> poison, <4 x i32> zeroinitializer @@ -114,10 +183,24 @@ define <4 x i64> @xvldrepl_d(ptr %ptr) { } define <4 x i64> @xvldrepl_d_offset(ptr %ptr) { -; CHECK-LABEL: xvldrepl_d_offset: -; CHECK: # %bb.0: -; CHECK-NEXT: xvldrepl.d $xr0, $a0, 264 -; CHECK-NEXT: ret +; LA32-LABEL: xvldrepl_d_offset: +; LA32: # %bb.0: +; LA32-NEXT: ld.w $a1, $a0, 264 +; LA32-NEXT: ld.w $a0, $a0, 268 +; LA32-NEXT: xvinsgr2vr.w $xr0, $a1, 0 +; LA32-NEXT: xvinsgr2vr.w $xr0, $a0, 1 +; LA32-NEXT: xvinsgr2vr.w $xr0, $a1, 2 +; LA32-NEXT: xvinsgr2vr.w $xr0, $a0, 3 +; LA32-NEXT: xvinsgr2vr.w $xr0, $a1, 4 +; LA32-NEXT: xvinsgr2vr.w $xr0, $a0, 5 +; LA32-NEXT: xvinsgr2vr.w $xr0, $a1, 6 +; LA32-NEXT: xvinsgr2vr.w $xr0, $a0, 7 +; LA32-NEXT: ret +; +; LA64-LABEL: xvldrepl_d_offset: +; LA64: # %bb.0: +; LA64-NEXT: xvldrepl.d $xr0, $a0, 264 +; LA64-NEXT: ret %p = getelementptr i64, ptr %ptr, i64 33 %tmp = load i64, ptr %p %tmp1 = insertelement <4 x i64> zeroinitializer, i64 %tmp, i32 0 |
