summaryrefslogtreecommitdiff
path: root/llvm/test/CodeGen/LoongArch/lsx/broadcast-load.ll
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/test/CodeGen/LoongArch/lsx/broadcast-load.ll')
-rw-r--r--llvm/test/CodeGen/LoongArch/lsx/broadcast-load.ll109
1 files changed, 88 insertions, 21 deletions
diff --git a/llvm/test/CodeGen/LoongArch/lsx/broadcast-load.ll b/llvm/test/CodeGen/LoongArch/lsx/broadcast-load.ll
index c46747ef3050..349684ff22be 100644
--- a/llvm/test/CodeGen/LoongArch/lsx/broadcast-load.ll
+++ b/llvm/test/CodeGen/LoongArch/lsx/broadcast-load.ll
@@ -1,16 +1,27 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
-; RUN: llc --mtriple=loongarch64 -mattr=+lsx < %s | FileCheck %s
-
-; TODO: Load a element and splat it to a vector could be lowerd to vldrepl
+; RUN: llc --mtriple=loongarch32 -mattr=+32s,+lsx < %s | FileCheck %s --check-prefixes=CHECK,LA32
+; RUN: llc --mtriple=loongarch64 -mattr=+lsx < %s | FileCheck %s --check-prefixes=CHECK,LA64
; A load has more than one user shouldn't be lowered to vldrepl
define <2 x i64> @should_not_be_optimized(ptr %ptr, ptr %dst){
-; CHECK-LABEL: should_not_be_optimized:
-; CHECK: # %bb.0:
-; CHECK-NEXT: ld.d $a0, $a0, 0
-; CHECK-NEXT: vreplgr2vr.d $vr0, $a0
-; CHECK-NEXT: st.d $a0, $a1, 0
-; CHECK-NEXT: ret
+; LA32-LABEL: should_not_be_optimized:
+; LA32: # %bb.0:
+; LA32-NEXT: ld.w $a2, $a0, 0
+; LA32-NEXT: ld.w $a0, $a0, 4
+; LA32-NEXT: st.w $a2, $a1, 0
+; LA32-NEXT: vinsgr2vr.w $vr0, $a2, 0
+; LA32-NEXT: vinsgr2vr.w $vr0, $a0, 1
+; LA32-NEXT: vinsgr2vr.w $vr0, $a2, 2
+; LA32-NEXT: vinsgr2vr.w $vr0, $a0, 3
+; LA32-NEXT: st.w $a0, $a1, 4
+; LA32-NEXT: ret
+;
+; LA64-LABEL: should_not_be_optimized:
+; LA64: # %bb.0:
+; LA64-NEXT: ld.d $a0, $a0, 0
+; LA64-NEXT: vreplgr2vr.d $vr0, $a0
+; LA64-NEXT: st.d $a0, $a1, 0
+; LA64-NEXT: ret
%tmp = load i64, ptr %ptr
store i64 %tmp, ptr %dst
%tmp1 = insertelement <2 x i64> zeroinitializer, i64 %tmp, i32 0
@@ -18,12 +29,48 @@ define <2 x i64> @should_not_be_optimized(ptr %ptr, ptr %dst){
ret <2 x i64> %tmp2
}
-define <2 x i64> @vldrepl_d_unaligned_offset(ptr %ptr) {
-; CHECK-LABEL: vldrepl_d_unaligned_offset:
+define <8 x i16> @should_not_be_optimized_sext_load(ptr %ptr) {
+; CHECK-LABEL: should_not_be_optimized_sext_load:
; CHECK: # %bb.0:
-; CHECK-NEXT: addi.d $a0, $a0, 4
-; CHECK-NEXT: vldrepl.d $vr0, $a0, 0
+; CHECK-NEXT: ld.b $a0, $a0, 0
+; CHECK-NEXT: vreplgr2vr.h $vr0, $a0
+; CHECK-NEXT: ret
+ %tmp = load i8, ptr %ptr
+ %tmp1 = sext i8 %tmp to i16
+ %tmp2 = insertelement <8 x i16> zeroinitializer, i16 %tmp1, i32 0
+ %tmp3 = shufflevector <8 x i16> %tmp2, <8 x i16> poison, <8 x i32> zeroinitializer
+ ret <8 x i16> %tmp3
+}
+
+define <8 x i16> @should_not_be_optimized_zext_load(ptr %ptr) {
+; CHECK-LABEL: should_not_be_optimized_zext_load:
+; CHECK: # %bb.0:
+; CHECK-NEXT: ld.bu $a0, $a0, 0
+; CHECK-NEXT: vreplgr2vr.h $vr0, $a0
; CHECK-NEXT: ret
+ %tmp = load i8, ptr %ptr
+ %tmp1 = zext i8 %tmp to i16
+ %tmp2 = insertelement <8 x i16> zeroinitializer, i16 %tmp1, i32 0
+ %tmp3 = shufflevector <8 x i16> %tmp2, <8 x i16> poison, <8 x i32> zeroinitializer
+ ret <8 x i16> %tmp3
+}
+
+define <2 x i64> @vldrepl_d_unaligned_offset(ptr %ptr) {
+; LA32-LABEL: vldrepl_d_unaligned_offset:
+; LA32: # %bb.0:
+; LA32-NEXT: ld.w $a1, $a0, 4
+; LA32-NEXT: ld.w $a0, $a0, 8
+; LA32-NEXT: vinsgr2vr.w $vr0, $a1, 0
+; LA32-NEXT: vinsgr2vr.w $vr0, $a0, 1
+; LA32-NEXT: vinsgr2vr.w $vr0, $a1, 2
+; LA32-NEXT: vinsgr2vr.w $vr0, $a0, 3
+; LA32-NEXT: ret
+;
+; LA64-LABEL: vldrepl_d_unaligned_offset:
+; LA64: # %bb.0:
+; LA64-NEXT: addi.d $a0, $a0, 4
+; LA64-NEXT: vldrepl.d $vr0, $a0, 0
+; LA64-NEXT: ret
%p = getelementptr i32, ptr %ptr, i32 1
%tmp = load i64, ptr %p
%tmp1 = insertelement <2 x i64> zeroinitializer, i64 %tmp, i32 0
@@ -102,10 +149,20 @@ define <4 x i32> @vldrepl_w_offset(ptr %ptr) {
}
define <2 x i64> @vldrepl_d(ptr %ptr) {
-; CHECK-LABEL: vldrepl_d:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vldrepl.d $vr0, $a0, 0
-; CHECK-NEXT: ret
+; LA32-LABEL: vldrepl_d:
+; LA32: # %bb.0:
+; LA32-NEXT: ld.w $a1, $a0, 0
+; LA32-NEXT: ld.w $a0, $a0, 4
+; LA32-NEXT: vinsgr2vr.w $vr0, $a1, 0
+; LA32-NEXT: vinsgr2vr.w $vr0, $a0, 1
+; LA32-NEXT: vinsgr2vr.w $vr0, $a1, 2
+; LA32-NEXT: vinsgr2vr.w $vr0, $a0, 3
+; LA32-NEXT: ret
+;
+; LA64-LABEL: vldrepl_d:
+; LA64: # %bb.0:
+; LA64-NEXT: vldrepl.d $vr0, $a0, 0
+; LA64-NEXT: ret
%tmp = load i64, ptr %ptr
%tmp1 = insertelement <2 x i64> zeroinitializer, i64 %tmp, i32 0
%tmp2 = shufflevector <2 x i64> %tmp1, <2 x i64> poison, <2 x i32> zeroinitializer
@@ -113,10 +170,20 @@ define <2 x i64> @vldrepl_d(ptr %ptr) {
}
define <2 x i64> @vldrepl_d_offset(ptr %ptr) {
-; CHECK-LABEL: vldrepl_d_offset:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vldrepl.d $vr0, $a0, 264
-; CHECK-NEXT: ret
+; LA32-LABEL: vldrepl_d_offset:
+; LA32: # %bb.0:
+; LA32-NEXT: ld.w $a1, $a0, 264
+; LA32-NEXT: ld.w $a0, $a0, 268
+; LA32-NEXT: vinsgr2vr.w $vr0, $a1, 0
+; LA32-NEXT: vinsgr2vr.w $vr0, $a0, 1
+; LA32-NEXT: vinsgr2vr.w $vr0, $a1, 2
+; LA32-NEXT: vinsgr2vr.w $vr0, $a0, 3
+; LA32-NEXT: ret
+;
+; LA64-LABEL: vldrepl_d_offset:
+; LA64: # %bb.0:
+; LA64-NEXT: vldrepl.d $vr0, $a0, 264
+; LA64-NEXT: ret
%p = getelementptr i64, ptr %ptr, i64 33
%tmp = load i64, ptr %p
%tmp1 = insertelement <2 x i64> zeroinitializer, i64 %tmp, i32 0