diff options
| author | Mingming Liu <mingmingl@google.com> | 2025-09-10 15:25:31 -0700 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2025-09-10 15:25:31 -0700 |
| commit | 1417dafa1db9cb1b2b09438aa9f53ea5ab6e36e2 (patch) | |
| tree | 57f4b1f313c8cf74eed8819870f39c36ea263c68 /llvm/test/CodeGen/LoongArch | |
| parent | 898b813bc8a6d0276bf0f4769f5f2f64b34e632d (diff) | |
| parent | b8cefcb601ddaa18482555c4ff363c01a270c2fe (diff) | |
Merge branch 'main' into users/mingmingl-llvm/samplefdo-profile-formatusers/mingmingl-llvm/samplefdo-profile-format
Diffstat (limited to 'llvm/test/CodeGen/LoongArch')
539 files changed, 11018 insertions, 2932 deletions
diff --git a/llvm/test/CodeGen/LoongArch/bittest.ll b/llvm/test/CodeGen/LoongArch/bittest.ll new file mode 100644 index 000000000000..9cf24dc5f65c --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/bittest.ll @@ -0,0 +1,3304 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 -mattr=+d < %s | FileCheck %s --check-prefixes=CHECK,LA32 +; RUN: llc --mtriple=loongarch64 -mattr=+d < %s | FileCheck %s --check-prefixes=CHECK,LA64 + +define signext i32 @bittest_7_i32(i32 signext %a) nounwind { +; LA32-LABEL: bittest_7_i32: +; LA32: # %bb.0: +; LA32-NEXT: nor $a0, $a0, $zero +; LA32-NEXT: srli.w $a0, $a0, 7 +; LA32-NEXT: andi $a0, $a0, 1 +; LA32-NEXT: ret +; +; LA64-LABEL: bittest_7_i32: +; LA64: # %bb.0: +; LA64-NEXT: nor $a0, $a0, $zero +; LA64-NEXT: bstrpick.d $a0, $a0, 7, 7 +; LA64-NEXT: ret + %shr = lshr i32 %a, 7 + %not = xor i32 %shr, -1 + %and = and i32 %not, 1 + ret i32 %and +} + +define signext i32 @bittest_10_i32(i32 signext %a) nounwind { +; LA32-LABEL: bittest_10_i32: +; LA32: # %bb.0: +; LA32-NEXT: nor $a0, $a0, $zero +; LA32-NEXT: srli.w $a0, $a0, 10 +; LA32-NEXT: andi $a0, $a0, 1 +; LA32-NEXT: ret +; +; LA64-LABEL: bittest_10_i32: +; LA64: # %bb.0: +; LA64-NEXT: nor $a0, $a0, $zero +; LA64-NEXT: bstrpick.d $a0, $a0, 10, 10 +; LA64-NEXT: ret + %shr = lshr i32 %a, 10 + %not = xor i32 %shr, -1 + %and = and i32 %not, 1 + ret i32 %and +} + +define signext i32 @bittest_11_i32(i32 signext %a) nounwind { +; LA32-LABEL: bittest_11_i32: +; LA32: # %bb.0: +; LA32-NEXT: nor $a0, $a0, $zero +; LA32-NEXT: srli.w $a0, $a0, 11 +; LA32-NEXT: andi $a0, $a0, 1 +; LA32-NEXT: ret +; +; LA64-LABEL: bittest_11_i32: +; LA64: # %bb.0: +; LA64-NEXT: nor $a0, $a0, $zero +; LA64-NEXT: bstrpick.d $a0, $a0, 11, 11 +; LA64-NEXT: ret + %shr = lshr i32 %a, 11 + %not = xor i32 %shr, -1 + %and = and i32 %not, 1 + ret i32 %and +} + +define signext i32 @bittest_31_i32(i32 signext %a) nounwind { +; LA32-LABEL: bittest_31_i32: +; LA32: # %bb.0: +; LA32-NEXT: nor $a0, $a0, $zero +; LA32-NEXT: srli.w $a0, $a0, 31 +; LA32-NEXT: ret +; +; LA64-LABEL: bittest_31_i32: +; LA64: # %bb.0: +; LA64-NEXT: nor $a0, $a0, $zero +; LA64-NEXT: bstrpick.d $a0, $a0, 31, 31 +; LA64-NEXT: ret + %shr = lshr i32 %a, 31 + %not = xor i32 %shr, -1 + %and = and i32 %not, 1 + ret i32 %and +} + +define i64 @bittest_7_i64(i64 %a) nounwind { +; LA32-LABEL: bittest_7_i64: +; LA32: # %bb.0: +; LA32-NEXT: nor $a0, $a0, $zero +; LA32-NEXT: srli.w $a0, $a0, 7 +; LA32-NEXT: andi $a0, $a0, 1 +; LA32-NEXT: move $a1, $zero +; LA32-NEXT: ret +; +; LA64-LABEL: bittest_7_i64: +; LA64: # %bb.0: +; LA64-NEXT: nor $a0, $a0, $zero +; LA64-NEXT: bstrpick.d $a0, $a0, 7, 7 +; LA64-NEXT: ret + %shr = lshr i64 %a, 7 + %not = xor i64 %shr, -1 + %and = and i64 %not, 1 + ret i64 %and +} + +define i64 @bittest_10_i64(i64 %a) nounwind { +; LA32-LABEL: bittest_10_i64: +; LA32: # %bb.0: +; LA32-NEXT: nor $a0, $a0, $zero +; LA32-NEXT: srli.w $a0, $a0, 10 +; LA32-NEXT: andi $a0, $a0, 1 +; LA32-NEXT: move $a1, $zero +; LA32-NEXT: ret +; +; LA64-LABEL: bittest_10_i64: +; LA64: # %bb.0: +; LA64-NEXT: nor $a0, $a0, $zero +; LA64-NEXT: bstrpick.d $a0, $a0, 10, 10 +; LA64-NEXT: ret + %shr = lshr i64 %a, 10 + %not = xor i64 %shr, -1 + %and = and i64 %not, 1 + ret i64 %and +} + +define i64 @bittest_11_i64(i64 %a) nounwind { +; LA32-LABEL: bittest_11_i64: +; LA32: # %bb.0: +; LA32-NEXT: nor $a0, $a0, $zero +; LA32-NEXT: srli.w $a0, $a0, 11 +; LA32-NEXT: andi $a0, $a0, 1 +; LA32-NEXT: move $a1, $zero +; LA32-NEXT: ret +; +; LA64-LABEL: bittest_11_i64: +; LA64: # %bb.0: +; LA64-NEXT: nor $a0, $a0, $zero +; LA64-NEXT: bstrpick.d $a0, $a0, 11, 11 +; LA64-NEXT: ret + %shr = lshr i64 %a, 11 + %not = xor i64 %shr, -1 + %and = and i64 %not, 1 + ret i64 %and +} + +define i64 @bittest_31_i64(i64 %a) nounwind { +; LA32-LABEL: bittest_31_i64: +; LA32: # %bb.0: +; LA32-NEXT: nor $a0, $a0, $zero +; LA32-NEXT: srli.w $a0, $a0, 31 +; LA32-NEXT: move $a1, $zero +; LA32-NEXT: ret +; +; LA64-LABEL: bittest_31_i64: +; LA64: # %bb.0: +; LA64-NEXT: nor $a0, $a0, $zero +; LA64-NEXT: bstrpick.d $a0, $a0, 31, 31 +; LA64-NEXT: ret + %shr = lshr i64 %a, 31 + %not = xor i64 %shr, -1 + %and = and i64 %not, 1 + ret i64 %and +} + +define i64 @bittest_32_i64(i64 %a) nounwind { +; LA32-LABEL: bittest_32_i64: +; LA32: # %bb.0: +; LA32-NEXT: ori $a0, $zero, 1 +; LA32-NEXT: andn $a0, $a0, $a1 +; LA32-NEXT: move $a1, $zero +; LA32-NEXT: ret +; +; LA64-LABEL: bittest_32_i64: +; LA64: # %bb.0: +; LA64-NEXT: nor $a0, $a0, $zero +; LA64-NEXT: bstrpick.d $a0, $a0, 32, 32 +; LA64-NEXT: ret + %shr = lshr i64 %a, 32 + %not = xor i64 %shr, -1 + %and = and i64 %not, 1 + ret i64 %and +} + +define i64 @bittest_63_i64(i64 %a) nounwind { +; LA32-LABEL: bittest_63_i64: +; LA32: # %bb.0: +; LA32-NEXT: nor $a0, $a1, $zero +; LA32-NEXT: srli.w $a0, $a0, 31 +; LA32-NEXT: move $a1, $zero +; LA32-NEXT: ret +; +; LA64-LABEL: bittest_63_i64: +; LA64: # %bb.0: +; LA64-NEXT: nor $a0, $a0, $zero +; LA64-NEXT: srli.d $a0, $a0, 63 +; LA64-NEXT: ret + %shr = lshr i64 %a, 63 + %not = xor i64 %shr, -1 + %and = and i64 %not, 1 + ret i64 %and +} + +define i1 @bittest_constant_by_var_shr_i32(i32 signext %b) nounwind { +; CHECK-LABEL: bittest_constant_by_var_shr_i32: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $a1, 301408 +; CHECK-NEXT: ori $a1, $a1, 722 +; CHECK-NEXT: srl.w $a0, $a1, $a0 +; CHECK-NEXT: andi $a0, $a0, 1 +; CHECK-NEXT: ret + %shl = lshr i32 1234567890, %b + %and = and i32 %shl, 1 + %cmp = icmp ne i32 %and, 0 + ret i1 %cmp +} + +define i1 @bittest_constant_by_var_shl_i32(i32 signext %b) nounwind { +; CHECK-LABEL: bittest_constant_by_var_shl_i32: +; CHECK: # %bb.0: +; CHECK-NEXT: ori $a1, $zero, 1 +; CHECK-NEXT: sll.w $a0, $a1, $a0 +; CHECK-NEXT: lu12i.w $a1, 301408 +; CHECK-NEXT: ori $a1, $a1, 722 +; CHECK-NEXT: and $a0, $a0, $a1 +; CHECK-NEXT: sltu $a0, $zero, $a0 +; CHECK-NEXT: ret + %shl = shl i32 1, %b + %and = and i32 %shl, 1234567890 + %cmp = icmp ne i32 %and, 0 + ret i1 %cmp +} + +define i1 @bittest_constant_by_var_shr_i64(i64 %b) nounwind { +; LA32-LABEL: bittest_constant_by_var_shr_i64: +; LA32: # %bb.0: +; LA32-NEXT: lu12i.w $a1, 301408 +; LA32-NEXT: ori $a1, $a1, 722 +; LA32-NEXT: srl.w $a1, $a1, $a0 +; LA32-NEXT: addi.w $a0, $a0, -32 +; LA32-NEXT: slti $a0, $a0, 0 +; LA32-NEXT: and $a0, $a0, $a1 +; LA32-NEXT: ret +; +; LA64-LABEL: bittest_constant_by_var_shr_i64: +; LA64: # %bb.0: +; LA64-NEXT: lu12i.w $a1, 301408 +; LA64-NEXT: ori $a1, $a1, 722 +; LA64-NEXT: srl.d $a0, $a1, $a0 +; LA64-NEXT: andi $a0, $a0, 1 +; LA64-NEXT: ret + %shl = lshr i64 1234567890, %b + %and = and i64 %shl, 1 + %cmp = icmp ne i64 %and, 0 + ret i1 %cmp +} + +define i1 @bittest_constant_by_var_shl_i64(i64 %b) nounwind { +; LA32-LABEL: bittest_constant_by_var_shl_i64: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $a1, $a0, -32 +; LA32-NEXT: slti $a1, $a1, 0 +; LA32-NEXT: sub.w $a1, $zero, $a1 +; LA32-NEXT: ori $a2, $zero, 1 +; LA32-NEXT: sll.w $a0, $a2, $a0 +; LA32-NEXT: and $a0, $a1, $a0 +; LA32-NEXT: lu12i.w $a1, 301408 +; LA32-NEXT: ori $a1, $a1, 722 +; LA32-NEXT: and $a0, $a0, $a1 +; LA32-NEXT: sltu $a0, $zero, $a0 +; LA32-NEXT: ret +; +; LA64-LABEL: bittest_constant_by_var_shl_i64: +; LA64: # %bb.0: +; LA64-NEXT: ori $a1, $zero, 1 +; LA64-NEXT: sll.d $a0, $a1, $a0 +; LA64-NEXT: lu12i.w $a1, 301408 +; LA64-NEXT: ori $a1, $a1, 722 +; LA64-NEXT: and $a0, $a0, $a1 +; LA64-NEXT: sltu $a0, $zero, $a0 +; LA64-NEXT: ret + %shl = shl i64 1, %b + %and = and i64 %shl, 1234567890 + %cmp = icmp ne i64 %and, 0 + ret i1 %cmp +} + +define void @bittest_switch(i32 signext %0) { +; LA32-LABEL: bittest_switch: +; LA32: # %bb.0: +; LA32-NEXT: ori $a1, $zero, 31 +; LA32-NEXT: bltu $a1, $a0, .LBB14_3 +; LA32-NEXT: # %bb.1: +; LA32-NEXT: ori $a1, $zero, 1 +; LA32-NEXT: sll.w $a0, $a1, $a0 +; LA32-NEXT: lu12i.w $a1, -524285 +; LA32-NEXT: ori $a1, $a1, 768 +; LA32-NEXT: and $a0, $a0, $a1 +; LA32-NEXT: beq $a0, $zero, .LBB14_3 +; LA32-NEXT: # %bb.2: +; LA32-NEXT: b bar +; LA32-NEXT: .LBB14_3: +; LA32-NEXT: ret +; +; LA64-LABEL: bittest_switch: +; LA64: # %bb.0: +; LA64-NEXT: ori $a1, $zero, 31 +; LA64-NEXT: bltu $a1, $a0, .LBB14_3 +; LA64-NEXT: # %bb.1: +; LA64-NEXT: ori $a1, $zero, 1 +; LA64-NEXT: sll.d $a0, $a1, $a0 +; LA64-NEXT: lu12i.w $a1, -524285 +; LA64-NEXT: ori $a1, $a1, 768 +; LA64-NEXT: lu32i.d $a1, 0 +; LA64-NEXT: and $a0, $a0, $a1 +; LA64-NEXT: beqz $a0, .LBB14_3 +; LA64-NEXT: # %bb.2: +; LA64-NEXT: pcaddu18i $t8, %call36(bar) +; LA64-NEXT: jr $t8 +; LA64-NEXT: .LBB14_3: +; LA64-NEXT: ret + switch i32 %0, label %3 [ + i32 8, label %2 + i32 9, label %2 + i32 12, label %2 + i32 13, label %2 + i32 31, label %2 + ] + +2: + tail call void @bar() + br label %3 + +3: + ret void +} + +declare void @bar() + +define signext i32 @bit_10_z_select_i32(i32 signext %a, i32 signext %b, i32 signext %c) { +; LA32-LABEL: bit_10_z_select_i32: +; LA32: # %bb.0: +; LA32-NEXT: andi $a3, $a0, 1024 +; LA32-NEXT: move $a0, $a1 +; LA32-NEXT: beq $a3, $zero, .LBB15_2 +; LA32-NEXT: # %bb.1: +; LA32-NEXT: move $a0, $a2 +; LA32-NEXT: .LBB15_2: +; LA32-NEXT: ret +; +; LA64-LABEL: bit_10_z_select_i32: +; LA64: # %bb.0: +; LA64-NEXT: andi $a0, $a0, 1024 +; LA64-NEXT: sltui $a0, $a0, 1 +; LA64-NEXT: masknez $a2, $a2, $a0 +; LA64-NEXT: maskeqz $a0, $a1, $a0 +; LA64-NEXT: or $a0, $a0, $a2 +; LA64-NEXT: ret + %1 = and i32 %a, 1024 + %2 = icmp eq i32 %1, 0 + %3 = select i1 %2, i32 %b, i32 %c + ret i32 %3 +} + +define signext i32 @bit_10_nz_select_i32(i32 signext %a, i32 signext %b, i32 signext %c) { +; LA32-LABEL: bit_10_nz_select_i32: +; LA32: # %bb.0: +; LA32-NEXT: slli.w $a3, $a0, 21 +; LA32-NEXT: move $a0, $a1 +; LA32-NEXT: bltz $a3, .LBB16_2 +; LA32-NEXT: # %bb.1: +; LA32-NEXT: move $a0, $a2 +; LA32-NEXT: .LBB16_2: +; LA32-NEXT: ret +; +; LA64-LABEL: bit_10_nz_select_i32: +; LA64: # %bb.0: +; LA64-NEXT: bstrpick.d $a0, $a0, 10, 10 +; LA64-NEXT: masknez $a2, $a2, $a0 +; LA64-NEXT: maskeqz $a0, $a1, $a0 +; LA64-NEXT: or $a0, $a0, $a2 +; LA64-NEXT: ret + %1 = and i32 %a, 1024 + %2 = icmp ne i32 %1, 0 + %3 = select i1 %2, i32 %b, i32 %c + ret i32 %3 +} + +define signext i32 @bit_11_z_select_i32(i32 signext %a, i32 signext %b, i32 signext %c) { +; LA32-LABEL: bit_11_z_select_i32: +; LA32: # %bb.0: +; LA32-NEXT: slli.w $a3, $a0, 20 +; LA32-NEXT: move $a0, $a1 +; LA32-NEXT: bgez $a3, .LBB17_2 +; LA32-NEXT: # %bb.1: +; LA32-NEXT: move $a0, $a2 +; LA32-NEXT: .LBB17_2: +; LA32-NEXT: ret +; +; LA64-LABEL: bit_11_z_select_i32: +; LA64: # %bb.0: +; LA64-NEXT: andi $a0, $a0, 2048 +; LA64-NEXT: sltui $a0, $a0, 1 +; LA64-NEXT: masknez $a2, $a2, $a0 +; LA64-NEXT: maskeqz $a0, $a1, $a0 +; LA64-NEXT: or $a0, $a0, $a2 +; LA64-NEXT: ret + %1 = and i32 %a, 2048 + %2 = icmp eq i32 %1, 0 + %3 = select i1 %2, i32 %b, i32 %c + ret i32 %3 +} + +define signext i32 @bit_11_nz_select_i32(i32 signext %a, i32 signext %b, i32 signext %c) { +; LA32-LABEL: bit_11_nz_select_i32: +; LA32: # %bb.0: +; LA32-NEXT: slli.w $a3, $a0, 20 +; LA32-NEXT: move $a0, $a1 +; LA32-NEXT: bltz $a3, .LBB18_2 +; LA32-NEXT: # %bb.1: +; LA32-NEXT: move $a0, $a2 +; LA32-NEXT: .LBB18_2: +; LA32-NEXT: ret +; +; LA64-LABEL: bit_11_nz_select_i32: +; LA64: # %bb.0: +; LA64-NEXT: bstrpick.d $a0, $a0, 11, 11 +; LA64-NEXT: masknez $a2, $a2, $a0 +; LA64-NEXT: maskeqz $a0, $a1, $a0 +; LA64-NEXT: or $a0, $a0, $a2 +; LA64-NEXT: ret + %1 = and i32 %a, 2048 + %2 = icmp ne i32 %1, 0 + %3 = select i1 %2, i32 %b, i32 %c + ret i32 %3 +} + +define signext i32 @bit_20_z_select_i32(i32 signext %a, i32 signext %b, i32 signext %c) { +; LA32-LABEL: bit_20_z_select_i32: +; LA32: # %bb.0: +; LA32-NEXT: slli.w $a3, $a0, 11 +; LA32-NEXT: move $a0, $a1 +; LA32-NEXT: bgez $a3, .LBB19_2 +; LA32-NEXT: # %bb.1: +; LA32-NEXT: move $a0, $a2 +; LA32-NEXT: .LBB19_2: +; LA32-NEXT: ret +; +; LA64-LABEL: bit_20_z_select_i32: +; LA64: # %bb.0: +; LA64-NEXT: lu12i.w $a3, 256 +; LA64-NEXT: and $a0, $a0, $a3 +; LA64-NEXT: sltui $a0, $a0, 1 +; LA64-NEXT: masknez $a2, $a2, $a0 +; LA64-NEXT: maskeqz $a0, $a1, $a0 +; LA64-NEXT: or $a0, $a0, $a2 +; LA64-NEXT: ret + %1 = and i32 %a, 1048576 + %2 = icmp eq i32 %1, 0 + %3 = select i1 %2, i32 %b, i32 %c + ret i32 %3 +} + +define signext i32 @bit_20_nz_select_i32(i32 signext %a, i32 signext %b, i32 signext %c) { +; LA32-LABEL: bit_20_nz_select_i32: +; LA32: # %bb.0: +; LA32-NEXT: slli.w $a3, $a0, 11 +; LA32-NEXT: move $a0, $a1 +; LA32-NEXT: bltz $a3, .LBB20_2 +; LA32-NEXT: # %bb.1: +; LA32-NEXT: move $a0, $a2 +; LA32-NEXT: .LBB20_2: +; LA32-NEXT: ret +; +; LA64-LABEL: bit_20_nz_select_i32: +; LA64: # %bb.0: +; LA64-NEXT: bstrpick.d $a0, $a0, 20, 20 +; LA64-NEXT: masknez $a2, $a2, $a0 +; LA64-NEXT: maskeqz $a0, $a1, $a0 +; LA64-NEXT: or $a0, $a0, $a2 +; LA64-NEXT: ret + %1 = and i32 %a, 1048576 + %2 = icmp ne i32 %1, 0 + %3 = select i1 %2, i32 %b, i32 %c + ret i32 %3 +} + +define signext i32 @bit_31_z_select_i32(i32 signext %a, i32 signext %b, i32 signext %c) { +; LA32-LABEL: bit_31_z_select_i32: +; LA32: # %bb.0: +; LA32-NEXT: bgez $a0, .LBB21_2 +; LA32-NEXT: # %bb.1: +; LA32-NEXT: move $a1, $a2 +; LA32-NEXT: .LBB21_2: +; LA32-NEXT: move $a0, $a1 +; LA32-NEXT: ret +; +; LA64-LABEL: bit_31_z_select_i32: +; LA64: # %bb.0: +; LA64-NEXT: bstrins.d $a0, $zero, 30, 0 +; LA64-NEXT: sltui $a0, $a0, 1 +; LA64-NEXT: masknez $a2, $a2, $a0 +; LA64-NEXT: maskeqz $a0, $a1, $a0 +; LA64-NEXT: or $a0, $a0, $a2 +; LA64-NEXT: ret + %1 = and i32 %a, 2147483648 + %2 = icmp eq i32 %1, 0 + %3 = select i1 %2, i32 %b, i32 %c + ret i32 %3 +} + +define signext i32 @bit_31_nz_select_i32(i32 signext %a, i32 signext %b, i32 signext %c) { +; LA32-LABEL: bit_31_nz_select_i32: +; LA32: # %bb.0: +; LA32-NEXT: srli.w $a3, $a0, 31 +; LA32-NEXT: move $a0, $a1 +; LA32-NEXT: bne $a3, $zero, .LBB22_2 +; LA32-NEXT: # %bb.1: +; LA32-NEXT: move $a0, $a2 +; LA32-NEXT: .LBB22_2: +; LA32-NEXT: ret +; +; LA64-LABEL: bit_31_nz_select_i32: +; LA64: # %bb.0: +; LA64-NEXT: bstrins.d $a0, $zero, 30, 0 +; LA64-NEXT: sltu $a0, $zero, $a0 +; LA64-NEXT: masknez $a2, $a2, $a0 +; LA64-NEXT: maskeqz $a0, $a1, $a0 +; LA64-NEXT: or $a0, $a0, $a2 +; LA64-NEXT: ret + %1 = and i32 %a, 2147483648 + %2 = icmp ne i32 %1, 0 + %3 = select i1 %2, i32 %b, i32 %c + ret i32 %3 +} + +define i64 @bit_10_z_select_i64(i64 %a, i64 %b, i64 %c) { +; LA32-LABEL: bit_10_z_select_i64: +; LA32: # %bb.0: +; LA32-NEXT: andi $a6, $a0, 1024 +; LA32-NEXT: move $a1, $a3 +; LA32-NEXT: move $a0, $a2 +; LA32-NEXT: beq $a6, $zero, .LBB23_2 +; LA32-NEXT: # %bb.1: +; LA32-NEXT: move $a0, $a4 +; LA32-NEXT: move $a1, $a5 +; LA32-NEXT: .LBB23_2: +; LA32-NEXT: ret +; +; LA64-LABEL: bit_10_z_select_i64: +; LA64: # %bb.0: +; LA64-NEXT: andi $a0, $a0, 1024 +; LA64-NEXT: sltui $a0, $a0, 1 +; LA64-NEXT: masknez $a2, $a2, $a0 +; LA64-NEXT: maskeqz $a0, $a1, $a0 +; LA64-NEXT: or $a0, $a0, $a2 +; LA64-NEXT: ret + %1 = and i64 %a, 1024 + %2 = icmp eq i64 %1, 0 + %3 = select i1 %2, i64 %b, i64 %c + ret i64 %3 +} + +define i64 @bit_10_nz_select_i64(i64 %a, i64 %b, i64 %c) { +; LA32-LABEL: bit_10_nz_select_i64: +; LA32: # %bb.0: +; LA32-NEXT: srli.w $a0, $a0, 10 +; LA32-NEXT: andi $a6, $a0, 1 +; LA32-NEXT: move $a1, $a3 +; LA32-NEXT: move $a0, $a2 +; LA32-NEXT: bne $a6, $zero, .LBB24_2 +; LA32-NEXT: # %bb.1: +; LA32-NEXT: move $a0, $a4 +; LA32-NEXT: move $a1, $a5 +; LA32-NEXT: .LBB24_2: +; LA32-NEXT: ret +; +; LA64-LABEL: bit_10_nz_select_i64: +; LA64: # %bb.0: +; LA64-NEXT: bstrpick.d $a0, $a0, 10, 10 +; LA64-NEXT: masknez $a2, $a2, $a0 +; LA64-NEXT: maskeqz $a0, $a1, $a0 +; LA64-NEXT: or $a0, $a0, $a2 +; LA64-NEXT: ret + %1 = and i64 %a, 1024 + %2 = icmp ne i64 %1, 0 + %3 = select i1 %2, i64 %b, i64 %c + ret i64 %3 +} + +define i64 @bit_11_z_select_i64(i64 %a, i64 %b, i64 %c) { +; LA32-LABEL: bit_11_z_select_i64: +; LA32: # %bb.0: +; LA32-NEXT: slli.w $a6, $a0, 20 +; LA32-NEXT: move $a1, $a3 +; LA32-NEXT: move $a0, $a2 +; LA32-NEXT: bgez $a6, .LBB25_2 +; LA32-NEXT: # %bb.1: +; LA32-NEXT: move $a0, $a4 +; LA32-NEXT: move $a1, $a5 +; LA32-NEXT: .LBB25_2: +; LA32-NEXT: ret +; +; LA64-LABEL: bit_11_z_select_i64: +; LA64: # %bb.0: +; LA64-NEXT: andi $a0, $a0, 2048 +; LA64-NEXT: sltui $a0, $a0, 1 +; LA64-NEXT: masknez $a2, $a2, $a0 +; LA64-NEXT: maskeqz $a0, $a1, $a0 +; LA64-NEXT: or $a0, $a0, $a2 +; LA64-NEXT: ret + %1 = and i64 %a, 2048 + %2 = icmp eq i64 %1, 0 + %3 = select i1 %2, i64 %b, i64 %c + ret i64 %3 +} + +define i64 @bit_11_nz_select_i64(i64 %a, i64 %b, i64 %c) { +; LA32-LABEL: bit_11_nz_select_i64: +; LA32: # %bb.0: +; LA32-NEXT: srli.w $a0, $a0, 11 +; LA32-NEXT: andi $a6, $a0, 1 +; LA32-NEXT: move $a1, $a3 +; LA32-NEXT: move $a0, $a2 +; LA32-NEXT: bne $a6, $zero, .LBB26_2 +; LA32-NEXT: # %bb.1: +; LA32-NEXT: move $a0, $a4 +; LA32-NEXT: move $a1, $a5 +; LA32-NEXT: .LBB26_2: +; LA32-NEXT: ret +; +; LA64-LABEL: bit_11_nz_select_i64: +; LA64: # %bb.0: +; LA64-NEXT: bstrpick.d $a0, $a0, 11, 11 +; LA64-NEXT: masknez $a2, $a2, $a0 +; LA64-NEXT: maskeqz $a0, $a1, $a0 +; LA64-NEXT: or $a0, $a0, $a2 +; LA64-NEXT: ret + %1 = and i64 %a, 2048 + %2 = icmp ne i64 %1, 0 + %3 = select i1 %2, i64 %b, i64 %c + ret i64 %3 +} + +define i64 @bit_20_z_select_i64(i64 %a, i64 %b, i64 %c) { +; LA32-LABEL: bit_20_z_select_i64: +; LA32: # %bb.0: +; LA32-NEXT: slli.w $a6, $a0, 11 +; LA32-NEXT: move $a1, $a3 +; LA32-NEXT: move $a0, $a2 +; LA32-NEXT: bgez $a6, .LBB27_2 +; LA32-NEXT: # %bb.1: +; LA32-NEXT: move $a0, $a4 +; LA32-NEXT: move $a1, $a5 +; LA32-NEXT: .LBB27_2: +; LA32-NEXT: ret +; +; LA64-LABEL: bit_20_z_select_i64: +; LA64: # %bb.0: +; LA64-NEXT: lu12i.w $a3, 256 +; LA64-NEXT: and $a0, $a0, $a3 +; LA64-NEXT: sltui $a0, $a0, 1 +; LA64-NEXT: masknez $a2, $a2, $a0 +; LA64-NEXT: maskeqz $a0, $a1, $a0 +; LA64-NEXT: or $a0, $a0, $a2 +; LA64-NEXT: ret + %1 = and i64 %a, 1048576 + %2 = icmp eq i64 %1, 0 + %3 = select i1 %2, i64 %b, i64 %c + ret i64 %3 +} + +define i64 @bit_20_nz_select_i64(i64 %a, i64 %b, i64 %c) { +; LA32-LABEL: bit_20_nz_select_i64: +; LA32: # %bb.0: +; LA32-NEXT: srli.w $a0, $a0, 20 +; LA32-NEXT: andi $a6, $a0, 1 +; LA32-NEXT: move $a1, $a3 +; LA32-NEXT: move $a0, $a2 +; LA32-NEXT: bne $a6, $zero, .LBB28_2 +; LA32-NEXT: # %bb.1: +; LA32-NEXT: move $a0, $a4 +; LA32-NEXT: move $a1, $a5 +; LA32-NEXT: .LBB28_2: +; LA32-NEXT: ret +; +; LA64-LABEL: bit_20_nz_select_i64: +; LA64: # %bb.0: +; LA64-NEXT: bstrpick.d $a0, $a0, 20, 20 +; LA64-NEXT: masknez $a2, $a2, $a0 +; LA64-NEXT: maskeqz $a0, $a1, $a0 +; LA64-NEXT: or $a0, $a0, $a2 +; LA64-NEXT: ret + %1 = and i64 %a, 1048576 + %2 = icmp ne i64 %1, 0 + %3 = select i1 %2, i64 %b, i64 %c + ret i64 %3 +} + +define i64 @bit_31_z_select_i64(i64 %a, i64 %b, i64 %c) { +; LA32-LABEL: bit_31_z_select_i64: +; LA32: # %bb.0: +; LA32-NEXT: move $a1, $a3 +; LA32-NEXT: bgez $a0, .LBB29_2 +; LA32-NEXT: # %bb.1: +; LA32-NEXT: move $a2, $a4 +; LA32-NEXT: move $a1, $a5 +; LA32-NEXT: .LBB29_2: +; LA32-NEXT: move $a0, $a2 +; LA32-NEXT: ret +; +; LA64-LABEL: bit_31_z_select_i64: +; LA64: # %bb.0: +; LA64-NEXT: lu12i.w $a3, -524288 +; LA64-NEXT: lu32i.d $a3, 0 +; LA64-NEXT: and $a0, $a0, $a3 +; LA64-NEXT: sltui $a0, $a0, 1 +; LA64-NEXT: masknez $a2, $a2, $a0 +; LA64-NEXT: maskeqz $a0, $a1, $a0 +; LA64-NEXT: or $a0, $a0, $a2 +; LA64-NEXT: ret + %1 = and i64 %a, 2147483648 + %2 = icmp eq i64 %1, 0 + %3 = select i1 %2, i64 %b, i64 %c + ret i64 %3 +} + +define i64 @bit_31_nz_select_i64(i64 %a, i64 %b, i64 %c) { +; LA32-LABEL: bit_31_nz_select_i64: +; LA32: # %bb.0: +; LA32-NEXT: srli.w $a6, $a0, 31 +; LA32-NEXT: move $a1, $a3 +; LA32-NEXT: move $a0, $a2 +; LA32-NEXT: bne $a6, $zero, .LBB30_2 +; LA32-NEXT: # %bb.1: +; LA32-NEXT: move $a0, $a4 +; LA32-NEXT: move $a1, $a5 +; LA32-NEXT: .LBB30_2: +; LA32-NEXT: ret +; +; LA64-LABEL: bit_31_nz_select_i64: +; LA64: # %bb.0: +; LA64-NEXT: bstrpick.d $a0, $a0, 31, 31 +; LA64-NEXT: masknez $a2, $a2, $a0 +; LA64-NEXT: maskeqz $a0, $a1, $a0 +; LA64-NEXT: or $a0, $a0, $a2 +; LA64-NEXT: ret + %1 = and i64 %a, 2147483648 + %2 = icmp ne i64 %1, 0 + %3 = select i1 %2, i64 %b, i64 %c + ret i64 %3 +} + +define i64 @bit_32_z_select_i64(i64 %a, i64 %b, i64 %c) { +; LA32-LABEL: bit_32_z_select_i64: +; LA32: # %bb.0: +; LA32-NEXT: andi $a6, $a1, 1 +; LA32-NEXT: move $a1, $a3 +; LA32-NEXT: move $a0, $a2 +; LA32-NEXT: beq $a6, $zero, .LBB31_2 +; LA32-NEXT: # %bb.1: +; LA32-NEXT: move $a0, $a4 +; LA32-NEXT: move $a1, $a5 +; LA32-NEXT: .LBB31_2: +; LA32-NEXT: ret +; +; LA64-LABEL: bit_32_z_select_i64: +; LA64: # %bb.0: +; LA64-NEXT: ori $a3, $zero, 0 +; LA64-NEXT: lu32i.d $a3, 1 +; LA64-NEXT: and $a0, $a0, $a3 +; LA64-NEXT: sltui $a0, $a0, 1 +; LA64-NEXT: masknez $a2, $a2, $a0 +; LA64-NEXT: maskeqz $a0, $a1, $a0 +; LA64-NEXT: or $a0, $a0, $a2 +; LA64-NEXT: ret + %1 = and i64 %a, 4294967296 + %2 = icmp eq i64 %1, 0 + %3 = select i1 %2, i64 %b, i64 %c + ret i64 %3 +} + +define i64 @bit_32_nz_select_i64(i64 %a, i64 %b, i64 %c) { +; LA32-LABEL: bit_32_nz_select_i64: +; LA32: # %bb.0: +; LA32-NEXT: andi $a6, $a1, 1 +; LA32-NEXT: move $a1, $a3 +; LA32-NEXT: move $a0, $a2 +; LA32-NEXT: bne $a6, $zero, .LBB32_2 +; LA32-NEXT: # %bb.1: +; LA32-NEXT: move $a0, $a4 +; LA32-NEXT: move $a1, $a5 +; LA32-NEXT: .LBB32_2: +; LA32-NEXT: ret +; +; LA64-LABEL: bit_32_nz_select_i64: +; LA64: # %bb.0: +; LA64-NEXT: bstrpick.d $a0, $a0, 32, 32 +; LA64-NEXT: masknez $a2, $a2, $a0 +; LA64-NEXT: maskeqz $a0, $a1, $a0 +; LA64-NEXT: or $a0, $a0, $a2 +; LA64-NEXT: ret + %1 = and i64 %a, 4294967296 + %2 = icmp ne i64 %1, 0 + %3 = select i1 %2, i64 %b, i64 %c + ret i64 %3 +} + +define i64 @bit_55_z_select_i64(i64 %a, i64 %b, i64 %c) { +; LA32-LABEL: bit_55_z_select_i64: +; LA32: # %bb.0: +; LA32-NEXT: slli.w $a6, $a1, 8 +; LA32-NEXT: move $a1, $a3 +; LA32-NEXT: move $a0, $a2 +; LA32-NEXT: bgez $a6, .LBB33_2 +; LA32-NEXT: # %bb.1: +; LA32-NEXT: move $a0, $a4 +; LA32-NEXT: move $a1, $a5 +; LA32-NEXT: .LBB33_2: +; LA32-NEXT: ret +; +; LA64-LABEL: bit_55_z_select_i64: +; LA64: # %bb.0: +; LA64-NEXT: lu52i.d $a3, $zero, 8 +; LA64-NEXT: and $a0, $a0, $a3 +; LA64-NEXT: sltui $a0, $a0, 1 +; LA64-NEXT: masknez $a2, $a2, $a0 +; LA64-NEXT: maskeqz $a0, $a1, $a0 +; LA64-NEXT: or $a0, $a0, $a2 +; LA64-NEXT: ret + %1 = and i64 %a, 36028797018963968 + %2 = icmp eq i64 %1, 0 + %3 = select i1 %2, i64 %b, i64 %c + ret i64 %3 +} + +define i64 @bit_55_nz_select_i64(i64 %a, i64 %b, i64 %c) { +; LA32-LABEL: bit_55_nz_select_i64: +; LA32: # %bb.0: +; LA32-NEXT: srli.w $a0, $a1, 23 +; LA32-NEXT: andi $a6, $a0, 1 +; LA32-NEXT: move $a1, $a3 +; LA32-NEXT: move $a0, $a2 +; LA32-NEXT: bne $a6, $zero, .LBB34_2 +; LA32-NEXT: # %bb.1: +; LA32-NEXT: move $a0, $a4 +; LA32-NEXT: move $a1, $a5 +; LA32-NEXT: .LBB34_2: +; LA32-NEXT: ret +; +; LA64-LABEL: bit_55_nz_select_i64: +; LA64: # %bb.0: +; LA64-NEXT: bstrpick.d $a0, $a0, 55, 55 +; LA64-NEXT: masknez $a2, $a2, $a0 +; LA64-NEXT: maskeqz $a0, $a1, $a0 +; LA64-NEXT: or $a0, $a0, $a2 +; LA64-NEXT: ret + %1 = and i64 %a, 36028797018963968 + %2 = icmp ne i64 %1, 0 + %3 = select i1 %2, i64 %b, i64 %c + ret i64 %3 +} + +define i64 @bit_63_z_select_i64(i64 %a, i64 %b, i64 %c) { +; LA32-LABEL: bit_63_z_select_i64: +; LA32: # %bb.0: +; LA32-NEXT: move $a0, $a2 +; LA32-NEXT: bgez $a1, .LBB35_2 +; LA32-NEXT: # %bb.1: +; LA32-NEXT: move $a0, $a4 +; LA32-NEXT: move $a3, $a5 +; LA32-NEXT: .LBB35_2: +; LA32-NEXT: move $a1, $a3 +; LA32-NEXT: ret +; +; LA64-LABEL: bit_63_z_select_i64: +; LA64: # %bb.0: +; LA64-NEXT: bstrins.d $a0, $zero, 62, 0 +; LA64-NEXT: sltui $a0, $a0, 1 +; LA64-NEXT: masknez $a2, $a2, $a0 +; LA64-NEXT: maskeqz $a0, $a1, $a0 +; LA64-NEXT: or $a0, $a0, $a2 +; LA64-NEXT: ret + %1 = and i64 %a, 9223372036854775808 + %2 = icmp eq i64 %1, 0 + %3 = select i1 %2, i64 %b, i64 %c + ret i64 %3 +} + +define i64 @bit_63_nz_select_i64(i64 %a, i64 %b, i64 %c) { +; LA32-LABEL: bit_63_nz_select_i64: +; LA32: # %bb.0: +; LA32-NEXT: srli.w $a6, $a1, 31 +; LA32-NEXT: move $a1, $a3 +; LA32-NEXT: move $a0, $a2 +; LA32-NEXT: bne $a6, $zero, .LBB36_2 +; LA32-NEXT: # %bb.1: +; LA32-NEXT: move $a0, $a4 +; LA32-NEXT: move $a1, $a5 +; LA32-NEXT: .LBB36_2: +; LA32-NEXT: ret +; +; LA64-LABEL: bit_63_nz_select_i64: +; LA64: # %bb.0: +; LA64-NEXT: srli.d $a0, $a0, 63 +; LA64-NEXT: masknez $a2, $a2, $a0 +; LA64-NEXT: maskeqz $a0, $a1, $a0 +; LA64-NEXT: or $a0, $a0, $a2 +; LA64-NEXT: ret + %1 = and i64 %a, 9223372036854775808 + %2 = icmp ne i64 %1, 0 + %3 = select i1 %2, i64 %b, i64 %c + ret i64 %3 +} + +define void @bit_10_z_branch_i32(i32 signext %0) { +; LA32-LABEL: bit_10_z_branch_i32: +; LA32: # %bb.0: +; LA32-NEXT: andi $a0, $a0, 1024 +; LA32-NEXT: bne $a0, $zero, .LBB37_2 +; LA32-NEXT: # %bb.1: +; LA32-NEXT: b bar +; LA32-NEXT: .LBB37_2: +; LA32-NEXT: ret +; +; LA64-LABEL: bit_10_z_branch_i32: +; LA64: # %bb.0: +; LA64-NEXT: andi $a0, $a0, 1024 +; LA64-NEXT: bnez $a0, .LBB37_2 +; LA64-NEXT: # %bb.1: +; LA64-NEXT: pcaddu18i $t8, %call36(bar) +; LA64-NEXT: jr $t8 +; LA64-NEXT: .LBB37_2: +; LA64-NEXT: ret + %2 = and i32 %0, 1024 + %3 = icmp eq i32 %2, 0 + br i1 %3, label %4, label %5 + +4: + tail call void @bar() + br label %5 + +5: + ret void +} + +define void @bit_10_nz_branch_i32(i32 signext %0) { +; LA32-LABEL: bit_10_nz_branch_i32: +; LA32: # %bb.0: +; LA32-NEXT: andi $a0, $a0, 1024 +; LA32-NEXT: beq $a0, $zero, .LBB38_2 +; LA32-NEXT: # %bb.1: +; LA32-NEXT: b bar +; LA32-NEXT: .LBB38_2: +; LA32-NEXT: ret +; +; LA64-LABEL: bit_10_nz_branch_i32: +; LA64: # %bb.0: +; LA64-NEXT: andi $a0, $a0, 1024 +; LA64-NEXT: beqz $a0, .LBB38_2 +; LA64-NEXT: # %bb.1: +; LA64-NEXT: pcaddu18i $t8, %call36(bar) +; LA64-NEXT: jr $t8 +; LA64-NEXT: .LBB38_2: +; LA64-NEXT: ret + %2 = and i32 %0, 1024 + %3 = icmp ne i32 %2, 0 + br i1 %3, label %4, label %5 + +4: + tail call void @bar() + br label %5 + +5: + ret void +} + +define void @bit_11_z_branch_i32(i32 signext %0) { +; LA32-LABEL: bit_11_z_branch_i32: +; LA32: # %bb.0: +; LA32-NEXT: slli.w $a0, $a0, 20 +; LA32-NEXT: bltz $a0, .LBB39_2 +; LA32-NEXT: # %bb.1: +; LA32-NEXT: b bar +; LA32-NEXT: .LBB39_2: +; LA32-NEXT: ret +; +; LA64-LABEL: bit_11_z_branch_i32: +; LA64: # %bb.0: +; LA64-NEXT: slli.d $a0, $a0, 52 +; LA64-NEXT: bltz $a0, .LBB39_2 +; LA64-NEXT: # %bb.1: +; LA64-NEXT: pcaddu18i $t8, %call36(bar) +; LA64-NEXT: jr $t8 +; LA64-NEXT: .LBB39_2: +; LA64-NEXT: ret + %2 = and i32 %0, 2048 + %3 = icmp eq i32 %2, 0 + br i1 %3, label %4, label %5 + +4: + tail call void @bar() + br label %5 + +5: + ret void +} + +define void @bit_11_nz_branch_i32(i32 signext %0) { +; LA32-LABEL: bit_11_nz_branch_i32: +; LA32: # %bb.0: +; LA32-NEXT: slli.w $a0, $a0, 20 +; LA32-NEXT: bgez $a0, .LBB40_2 +; LA32-NEXT: # %bb.1: +; LA32-NEXT: b bar +; LA32-NEXT: .LBB40_2: +; LA32-NEXT: ret +; +; LA64-LABEL: bit_11_nz_branch_i32: +; LA64: # %bb.0: +; LA64-NEXT: slli.d $a0, $a0, 52 +; LA64-NEXT: bgez $a0, .LBB40_2 +; LA64-NEXT: # %bb.1: +; LA64-NEXT: pcaddu18i $t8, %call36(bar) +; LA64-NEXT: jr $t8 +; LA64-NEXT: .LBB40_2: +; LA64-NEXT: ret + %2 = and i32 %0, 2048 + %3 = icmp ne i32 %2, 0 + br i1 %3, label %4, label %5 + +4: + tail call void @bar() + br label %5 + +5: + ret void +} + +define void @bit_24_z_branch_i32(i32 signext %0) { +; LA32-LABEL: bit_24_z_branch_i32: +; LA32: # %bb.0: +; LA32-NEXT: slli.w $a0, $a0, 7 +; LA32-NEXT: bltz $a0, .LBB41_2 +; LA32-NEXT: # %bb.1: +; LA32-NEXT: b bar +; LA32-NEXT: .LBB41_2: +; LA32-NEXT: ret +; +; LA64-LABEL: bit_24_z_branch_i32: +; LA64: # %bb.0: +; LA64-NEXT: slli.d $a0, $a0, 39 +; LA64-NEXT: bltz $a0, .LBB41_2 +; LA64-NEXT: # %bb.1: +; LA64-NEXT: pcaddu18i $t8, %call36(bar) +; LA64-NEXT: jr $t8 +; LA64-NEXT: .LBB41_2: +; LA64-NEXT: ret + %2 = and i32 %0, 16777216 + %3 = icmp eq i32 %2, 0 + br i1 %3, label %4, label %5 + +4: + tail call void @bar() + br label %5 + +5: + ret void +} + +define void @bit_24_nz_branch_i32(i32 signext %0) { +; LA32-LABEL: bit_24_nz_branch_i32: +; LA32: # %bb.0: +; LA32-NEXT: slli.w $a0, $a0, 7 +; LA32-NEXT: bgez $a0, .LBB42_2 +; LA32-NEXT: # %bb.1: +; LA32-NEXT: b bar +; LA32-NEXT: .LBB42_2: +; LA32-NEXT: ret +; +; LA64-LABEL: bit_24_nz_branch_i32: +; LA64: # %bb.0: +; LA64-NEXT: slli.d $a0, $a0, 39 +; LA64-NEXT: bgez $a0, .LBB42_2 +; LA64-NEXT: # %bb.1: +; LA64-NEXT: pcaddu18i $t8, %call36(bar) +; LA64-NEXT: jr $t8 +; LA64-NEXT: .LBB42_2: +; LA64-NEXT: ret + %2 = and i32 %0, 16777216 + %3 = icmp ne i32 %2, 0 + br i1 %3, label %4, label %5 + +4: + tail call void @bar() + br label %5 + +5: + ret void +} + +define void @bit_31_z_branch_i32(i32 signext %0) { +; LA32-LABEL: bit_31_z_branch_i32: +; LA32: # %bb.0: +; LA32-NEXT: bltz $a0, .LBB43_2 +; LA32-NEXT: # %bb.1: +; LA32-NEXT: b bar +; LA32-NEXT: .LBB43_2: +; LA32-NEXT: ret +; +; LA64-LABEL: bit_31_z_branch_i32: +; LA64: # %bb.0: +; LA64-NEXT: bstrins.d $a0, $zero, 30, 0 +; LA64-NEXT: bnez $a0, .LBB43_2 +; LA64-NEXT: # %bb.1: +; LA64-NEXT: pcaddu18i $t8, %call36(bar) +; LA64-NEXT: jr $t8 +; LA64-NEXT: .LBB43_2: +; LA64-NEXT: ret + %2 = and i32 %0, 2147483648 + %3 = icmp eq i32 %2, 0 + br i1 %3, label %4, label %5 + +4: + tail call void @bar() + br label %5 + +5: + ret void +} + +define void @bit_31_nz_branch_i32(i32 signext %0) { +; LA32-LABEL: bit_31_nz_branch_i32: +; LA32: # %bb.0: +; LA32-NEXT: bgez $a0, .LBB44_2 +; LA32-NEXT: # %bb.1: +; LA32-NEXT: b bar +; LA32-NEXT: .LBB44_2: +; LA32-NEXT: ret +; +; LA64-LABEL: bit_31_nz_branch_i32: +; LA64: # %bb.0: +; LA64-NEXT: bstrins.d $a0, $zero, 30, 0 +; LA64-NEXT: beqz $a0, .LBB44_2 +; LA64-NEXT: # %bb.1: +; LA64-NEXT: pcaddu18i $t8, %call36(bar) +; LA64-NEXT: jr $t8 +; LA64-NEXT: .LBB44_2: +; LA64-NEXT: ret + %2 = and i32 %0, 2147483648 + %3 = icmp ne i32 %2, 0 + br i1 %3, label %4, label %5 + +4: + tail call void @bar() + br label %5 + +5: + ret void +} + +define void @bit_10_z_branch_i64(i64 %0) { +; LA32-LABEL: bit_10_z_branch_i64: +; LA32: # %bb.0: +; LA32-NEXT: andi $a0, $a0, 1024 +; LA32-NEXT: bne $a0, $zero, .LBB45_2 +; LA32-NEXT: # %bb.1: +; LA32-NEXT: b bar +; LA32-NEXT: .LBB45_2: +; LA32-NEXT: ret +; +; LA64-LABEL: bit_10_z_branch_i64: +; LA64: # %bb.0: +; LA64-NEXT: andi $a0, $a0, 1024 +; LA64-NEXT: bnez $a0, .LBB45_2 +; LA64-NEXT: # %bb.1: +; LA64-NEXT: pcaddu18i $t8, %call36(bar) +; LA64-NEXT: jr $t8 +; LA64-NEXT: .LBB45_2: +; LA64-NEXT: ret + %2 = and i64 %0, 1024 + %3 = icmp eq i64 %2, 0 + br i1 %3, label %4, label %5 + +4: + tail call void @bar() + br label %5 + +5: + ret void +} + +define void @bit_10_nz_branch_i64(i64 %0) { +; LA32-LABEL: bit_10_nz_branch_i64: +; LA32: # %bb.0: +; LA32-NEXT: andi $a0, $a0, 1024 +; LA32-NEXT: beq $a0, $zero, .LBB46_2 +; LA32-NEXT: # %bb.1: +; LA32-NEXT: b bar +; LA32-NEXT: .LBB46_2: +; LA32-NEXT: ret +; +; LA64-LABEL: bit_10_nz_branch_i64: +; LA64: # %bb.0: +; LA64-NEXT: andi $a0, $a0, 1024 +; LA64-NEXT: beqz $a0, .LBB46_2 +; LA64-NEXT: # %bb.1: +; LA64-NEXT: pcaddu18i $t8, %call36(bar) +; LA64-NEXT: jr $t8 +; LA64-NEXT: .LBB46_2: +; LA64-NEXT: ret + %2 = and i64 %0, 1024 + %3 = icmp ne i64 %2, 0 + br i1 %3, label %4, label %5 + +4: + tail call void @bar() + br label %5 + +5: + ret void +} + +define void @bit_11_z_branch_i64(i64 %0) { +; LA32-LABEL: bit_11_z_branch_i64: +; LA32: # %bb.0: +; LA32-NEXT: slli.w $a0, $a0, 20 +; LA32-NEXT: bltz $a0, .LBB47_2 +; LA32-NEXT: # %bb.1: +; LA32-NEXT: b bar +; LA32-NEXT: .LBB47_2: +; LA32-NEXT: ret +; +; LA64-LABEL: bit_11_z_branch_i64: +; LA64: # %bb.0: +; LA64-NEXT: slli.d $a0, $a0, 52 +; LA64-NEXT: bltz $a0, .LBB47_2 +; LA64-NEXT: # %bb.1: +; LA64-NEXT: pcaddu18i $t8, %call36(bar) +; LA64-NEXT: jr $t8 +; LA64-NEXT: .LBB47_2: +; LA64-NEXT: ret + %2 = and i64 %0, 2048 + %3 = icmp eq i64 %2, 0 + br i1 %3, label %4, label %5 + +4: + tail call void @bar() + br label %5 + +5: + ret void +} + +define void @bit_11_nz_branch_i64(i64 %0) { +; LA32-LABEL: bit_11_nz_branch_i64: +; LA32: # %bb.0: +; LA32-NEXT: slli.w $a0, $a0, 20 +; LA32-NEXT: bgez $a0, .LBB48_2 +; LA32-NEXT: # %bb.1: +; LA32-NEXT: b bar +; LA32-NEXT: .LBB48_2: +; LA32-NEXT: ret +; +; LA64-LABEL: bit_11_nz_branch_i64: +; LA64: # %bb.0: +; LA64-NEXT: slli.d $a0, $a0, 52 +; LA64-NEXT: bgez $a0, .LBB48_2 +; LA64-NEXT: # %bb.1: +; LA64-NEXT: pcaddu18i $t8, %call36(bar) +; LA64-NEXT: jr $t8 +; LA64-NEXT: .LBB48_2: +; LA64-NEXT: ret + %2 = and i64 %0, 2048 + %3 = icmp ne i64 %2, 0 + br i1 %3, label %4, label %5 + +4: + tail call void @bar() + br label %5 + +5: + ret void +} + +define void @bit_24_z_branch_i64(i64 %0) { +; LA32-LABEL: bit_24_z_branch_i64: +; LA32: # %bb.0: +; LA32-NEXT: slli.w $a0, $a0, 7 +; LA32-NEXT: bltz $a0, .LBB49_2 +; LA32-NEXT: # %bb.1: +; LA32-NEXT: b bar +; LA32-NEXT: .LBB49_2: +; LA32-NEXT: ret +; +; LA64-LABEL: bit_24_z_branch_i64: +; LA64: # %bb.0: +; LA64-NEXT: slli.d $a0, $a0, 39 +; LA64-NEXT: bltz $a0, .LBB49_2 +; LA64-NEXT: # %bb.1: +; LA64-NEXT: pcaddu18i $t8, %call36(bar) +; LA64-NEXT: jr $t8 +; LA64-NEXT: .LBB49_2: +; LA64-NEXT: ret + %2 = and i64 %0, 16777216 + %3 = icmp eq i64 %2, 0 + br i1 %3, label %4, label %5 + +4: + tail call void @bar() + br label %5 + +5: + ret void +} + +define void @bit_24_nz_branch_i64(i64 %0) { +; LA32-LABEL: bit_24_nz_branch_i64: +; LA32: # %bb.0: +; LA32-NEXT: slli.w $a0, $a0, 7 +; LA32-NEXT: bgez $a0, .LBB50_2 +; LA32-NEXT: # %bb.1: +; LA32-NEXT: b bar +; LA32-NEXT: .LBB50_2: +; LA32-NEXT: ret +; +; LA64-LABEL: bit_24_nz_branch_i64: +; LA64: # %bb.0: +; LA64-NEXT: slli.d $a0, $a0, 39 +; LA64-NEXT: bgez $a0, .LBB50_2 +; LA64-NEXT: # %bb.1: +; LA64-NEXT: pcaddu18i $t8, %call36(bar) +; LA64-NEXT: jr $t8 +; LA64-NEXT: .LBB50_2: +; LA64-NEXT: ret + %2 = and i64 %0, 16777216 + %3 = icmp ne i64 %2, 0 + br i1 %3, label %4, label %5 + +4: + tail call void @bar() + br label %5 + +5: + ret void +} + +define void @bit_31_z_branch_i64(i64 %0) { +; LA32-LABEL: bit_31_z_branch_i64: +; LA32: # %bb.0: +; LA32-NEXT: bltz $a0, .LBB51_2 +; LA32-NEXT: # %bb.1: +; LA32-NEXT: b bar +; LA32-NEXT: .LBB51_2: +; LA32-NEXT: ret +; +; LA64-LABEL: bit_31_z_branch_i64: +; LA64: # %bb.0: +; LA64-NEXT: slli.d $a0, $a0, 32 +; LA64-NEXT: bltz $a0, .LBB51_2 +; LA64-NEXT: # %bb.1: +; LA64-NEXT: pcaddu18i $t8, %call36(bar) +; LA64-NEXT: jr $t8 +; LA64-NEXT: .LBB51_2: +; LA64-NEXT: ret + %2 = and i64 %0, 2147483648 + %3 = icmp eq i64 %2, 0 + br i1 %3, label %4, label %5 + +4: + tail call void @bar() + br label %5 + +5: + ret void +} + +define void @bit_31_nz_branch_i64(i64 %0) { +; LA32-LABEL: bit_31_nz_branch_i64: +; LA32: # %bb.0: +; LA32-NEXT: bgez $a0, .LBB52_2 +; LA32-NEXT: # %bb.1: +; LA32-NEXT: b bar +; LA32-NEXT: .LBB52_2: +; LA32-NEXT: ret +; +; LA64-LABEL: bit_31_nz_branch_i64: +; LA64: # %bb.0: +; LA64-NEXT: slli.d $a0, $a0, 32 +; LA64-NEXT: bgez $a0, .LBB52_2 +; LA64-NEXT: # %bb.1: +; LA64-NEXT: pcaddu18i $t8, %call36(bar) +; LA64-NEXT: jr $t8 +; LA64-NEXT: .LBB52_2: +; LA64-NEXT: ret + %2 = and i64 %0, 2147483648 + %3 = icmp ne i64 %2, 0 + br i1 %3, label %4, label %5 + +4: + tail call void @bar() + br label %5 + +5: + ret void +} + +define void @bit_32_z_branch_i64(i64 %0) { +; LA32-LABEL: bit_32_z_branch_i64: +; LA32: # %bb.0: +; LA32-NEXT: andi $a0, $a1, 1 +; LA32-NEXT: bne $a0, $zero, .LBB53_2 +; LA32-NEXT: # %bb.1: +; LA32-NEXT: b bar +; LA32-NEXT: .LBB53_2: +; LA32-NEXT: ret +; +; LA64-LABEL: bit_32_z_branch_i64: +; LA64: # %bb.0: +; LA64-NEXT: slli.d $a0, $a0, 31 +; LA64-NEXT: bltz $a0, .LBB53_2 +; LA64-NEXT: # %bb.1: +; LA64-NEXT: pcaddu18i $t8, %call36(bar) +; LA64-NEXT: jr $t8 +; LA64-NEXT: .LBB53_2: +; LA64-NEXT: ret + %2 = and i64 %0, 4294967296 + %3 = icmp eq i64 %2, 0 + br i1 %3, label %4, label %5 + +4: + tail call void @bar() + br label %5 + +5: + ret void +} + +define void @bit_32_nz_branch_i64(i64 %0) { +; LA32-LABEL: bit_32_nz_branch_i64: +; LA32: # %bb.0: +; LA32-NEXT: andi $a0, $a1, 1 +; LA32-NEXT: beq $a0, $zero, .LBB54_2 +; LA32-NEXT: # %bb.1: +; LA32-NEXT: b bar +; LA32-NEXT: .LBB54_2: +; LA32-NEXT: ret +; +; LA64-LABEL: bit_32_nz_branch_i64: +; LA64: # %bb.0: +; LA64-NEXT: slli.d $a0, $a0, 31 +; LA64-NEXT: bgez $a0, .LBB54_2 +; LA64-NEXT: # %bb.1: +; LA64-NEXT: pcaddu18i $t8, %call36(bar) +; LA64-NEXT: jr $t8 +; LA64-NEXT: .LBB54_2: +; LA64-NEXT: ret + %2 = and i64 %0, 4294967296 + %3 = icmp ne i64 %2, 0 + br i1 %3, label %4, label %5 + +4: + tail call void @bar() + br label %5 + +5: + ret void +} + +define void @bit_62_z_branch_i64(i64 %0) { +; LA32-LABEL: bit_62_z_branch_i64: +; LA32: # %bb.0: +; LA32-NEXT: slli.w $a0, $a1, 1 +; LA32-NEXT: bltz $a0, .LBB55_2 +; LA32-NEXT: # %bb.1: +; LA32-NEXT: b bar +; LA32-NEXT: .LBB55_2: +; LA32-NEXT: ret +; +; LA64-LABEL: bit_62_z_branch_i64: +; LA64: # %bb.0: +; LA64-NEXT: slli.d $a0, $a0, 1 +; LA64-NEXT: bltz $a0, .LBB55_2 +; LA64-NEXT: # %bb.1: +; LA64-NEXT: pcaddu18i $t8, %call36(bar) +; LA64-NEXT: jr $t8 +; LA64-NEXT: .LBB55_2: +; LA64-NEXT: ret + %2 = and i64 %0, 4611686018427387904 + %3 = icmp eq i64 %2, 0 + br i1 %3, label %4, label %5 + +4: + tail call void @bar() + br label %5 + +5: + ret void +} + +define void @bit_62_nz_branch_i64(i64 %0) { +; LA32-LABEL: bit_62_nz_branch_i64: +; LA32: # %bb.0: +; LA32-NEXT: slli.w $a0, $a1, 1 +; LA32-NEXT: bgez $a0, .LBB56_2 +; LA32-NEXT: # %bb.1: +; LA32-NEXT: b bar +; LA32-NEXT: .LBB56_2: +; LA32-NEXT: ret +; +; LA64-LABEL: bit_62_nz_branch_i64: +; LA64: # %bb.0: +; LA64-NEXT: slli.d $a0, $a0, 1 +; LA64-NEXT: bgez $a0, .LBB56_2 +; LA64-NEXT: # %bb.1: +; LA64-NEXT: pcaddu18i $t8, %call36(bar) +; LA64-NEXT: jr $t8 +; LA64-NEXT: .LBB56_2: +; LA64-NEXT: ret + %2 = and i64 %0, 4611686018427387904 + %3 = icmp ne i64 %2, 0 + br i1 %3, label %4, label %5 + +4: + tail call void @bar() + br label %5 + +5: + ret void +} + +define void @bit_63_z_branch_i64(i64 %0) { +; LA32-LABEL: bit_63_z_branch_i64: +; LA32: # %bb.0: +; LA32-NEXT: bltz $a1, .LBB57_2 +; LA32-NEXT: # %bb.1: +; LA32-NEXT: b bar +; LA32-NEXT: .LBB57_2: +; LA32-NEXT: ret +; +; LA64-LABEL: bit_63_z_branch_i64: +; LA64: # %bb.0: +; LA64-NEXT: bltz $a0, .LBB57_2 +; LA64-NEXT: # %bb.1: +; LA64-NEXT: pcaddu18i $t8, %call36(bar) +; LA64-NEXT: jr $t8 +; LA64-NEXT: .LBB57_2: +; LA64-NEXT: ret + %2 = and i64 %0, 9223372036854775808 + %3 = icmp eq i64 %2, 0 + br i1 %3, label %4, label %5 + +4: + tail call void @bar() + br label %5 + +5: + ret void +} + +define void @bit_63_nz_branch_i64(i64 %0) { +; LA32-LABEL: bit_63_nz_branch_i64: +; LA32: # %bb.0: +; LA32-NEXT: bgez $a1, .LBB58_2 +; LA32-NEXT: # %bb.1: +; LA32-NEXT: b bar +; LA32-NEXT: .LBB58_2: +; LA32-NEXT: ret +; +; LA64-LABEL: bit_63_nz_branch_i64: +; LA64: # %bb.0: +; LA64-NEXT: bgez $a0, .LBB58_2 +; LA64-NEXT: # %bb.1: +; LA64-NEXT: pcaddu18i $t8, %call36(bar) +; LA64-NEXT: jr $t8 +; LA64-NEXT: .LBB58_2: +; LA64-NEXT: ret + %2 = and i64 %0, 9223372036854775808 + %3 = icmp ne i64 %2, 0 + br i1 %3, label %4, label %5 + +4: + tail call void @bar() + br label %5 + +5: + ret void +} + +define signext i32 @bit_10_1_z_select_i32(i32 signext %a, i32 signext %b, i32 signext %c) { +; LA32-LABEL: bit_10_1_z_select_i32: +; LA32: # %bb.0: +; LA32-NEXT: andi $a3, $a0, 1023 +; LA32-NEXT: move $a0, $a1 +; LA32-NEXT: beq $a3, $zero, .LBB59_2 +; LA32-NEXT: # %bb.1: +; LA32-NEXT: move $a0, $a2 +; LA32-NEXT: .LBB59_2: +; LA32-NEXT: ret +; +; LA64-LABEL: bit_10_1_z_select_i32: +; LA64: # %bb.0: +; LA64-NEXT: andi $a0, $a0, 1023 +; LA64-NEXT: sltui $a0, $a0, 1 +; LA64-NEXT: masknez $a2, $a2, $a0 +; LA64-NEXT: maskeqz $a0, $a1, $a0 +; LA64-NEXT: or $a0, $a0, $a2 +; LA64-NEXT: ret + %1 = and i32 %a, 1023 + %2 = icmp eq i32 %1, 0 + %3 = select i1 %2, i32 %b, i32 %c + ret i32 %3 +} + +define signext i32 @bit_10_1_nz_select_i32(i32 signext %a, i32 signext %b, i32 signext %c) { +; LA32-LABEL: bit_10_1_nz_select_i32: +; LA32: # %bb.0: +; LA32-NEXT: andi $a3, $a0, 1023 +; LA32-NEXT: move $a0, $a1 +; LA32-NEXT: bne $a3, $zero, .LBB60_2 +; LA32-NEXT: # %bb.1: +; LA32-NEXT: move $a0, $a2 +; LA32-NEXT: .LBB60_2: +; LA32-NEXT: ret +; +; LA64-LABEL: bit_10_1_nz_select_i32: +; LA64: # %bb.0: +; LA64-NEXT: andi $a0, $a0, 1023 +; LA64-NEXT: sltu $a0, $zero, $a0 +; LA64-NEXT: masknez $a2, $a2, $a0 +; LA64-NEXT: maskeqz $a0, $a1, $a0 +; LA64-NEXT: or $a0, $a0, $a2 +; LA64-NEXT: ret + %1 = and i32 %a, 1023 + %2 = icmp ne i32 %1, 0 + %3 = select i1 %2, i32 %b, i32 %c + ret i32 %3 +} + +define signext i32 @bit_11_1_z_select_i32(i32 signext %a, i32 signext %b, i32 signext %c) { +; LA32-LABEL: bit_11_1_z_select_i32: +; LA32: # %bb.0: +; LA32-NEXT: andi $a3, $a0, 2047 +; LA32-NEXT: move $a0, $a1 +; LA32-NEXT: beq $a3, $zero, .LBB61_2 +; LA32-NEXT: # %bb.1: +; LA32-NEXT: move $a0, $a2 +; LA32-NEXT: .LBB61_2: +; LA32-NEXT: ret +; +; LA64-LABEL: bit_11_1_z_select_i32: +; LA64: # %bb.0: +; LA64-NEXT: andi $a0, $a0, 2047 +; LA64-NEXT: sltui $a0, $a0, 1 +; LA64-NEXT: masknez $a2, $a2, $a0 +; LA64-NEXT: maskeqz $a0, $a1, $a0 +; LA64-NEXT: or $a0, $a0, $a2 +; LA64-NEXT: ret + %1 = and i32 %a, 2047 + %2 = icmp eq i32 %1, 0 + %3 = select i1 %2, i32 %b, i32 %c + ret i32 %3 +} + +define signext i32 @bit_11_1_nz_select_i32(i32 signext %a, i32 signext %b, i32 signext %c) { +; LA32-LABEL: bit_11_1_nz_select_i32: +; LA32: # %bb.0: +; LA32-NEXT: andi $a3, $a0, 2047 +; LA32-NEXT: move $a0, $a1 +; LA32-NEXT: bne $a3, $zero, .LBB62_2 +; LA32-NEXT: # %bb.1: +; LA32-NEXT: move $a0, $a2 +; LA32-NEXT: .LBB62_2: +; LA32-NEXT: ret +; +; LA64-LABEL: bit_11_1_nz_select_i32: +; LA64: # %bb.0: +; LA64-NEXT: andi $a0, $a0, 2047 +; LA64-NEXT: sltu $a0, $zero, $a0 +; LA64-NEXT: masknez $a2, $a2, $a0 +; LA64-NEXT: maskeqz $a0, $a1, $a0 +; LA64-NEXT: or $a0, $a0, $a2 +; LA64-NEXT: ret + %1 = and i32 %a, 2047 + %2 = icmp ne i32 %1, 0 + %3 = select i1 %2, i32 %b, i32 %c + ret i32 %3 +} + +define signext i32 @bit_16_1_z_select_i32(i32 signext %a, i32 signext %b, i32 signext %c) { +; LA32-LABEL: bit_16_1_z_select_i32: +; LA32: # %bb.0: +; LA32-NEXT: slli.w $a3, $a0, 16 +; LA32-NEXT: move $a0, $a1 +; LA32-NEXT: beq $a3, $zero, .LBB63_2 +; LA32-NEXT: # %bb.1: +; LA32-NEXT: move $a0, $a2 +; LA32-NEXT: .LBB63_2: +; LA32-NEXT: ret +; +; LA64-LABEL: bit_16_1_z_select_i32: +; LA64: # %bb.0: +; LA64-NEXT: bstrpick.d $a0, $a0, 15, 0 +; LA64-NEXT: sltui $a0, $a0, 1 +; LA64-NEXT: masknez $a2, $a2, $a0 +; LA64-NEXT: maskeqz $a0, $a1, $a0 +; LA64-NEXT: or $a0, $a0, $a2 +; LA64-NEXT: ret + %1 = and i32 %a, 65535 + %2 = icmp eq i32 %1, 0 + %3 = select i1 %2, i32 %b, i32 %c + ret i32 %3 +} + +define signext i32 @bit_16_1_nz_select_i32(i32 signext %a, i32 signext %b, i32 signext %c) { +; LA32-LABEL: bit_16_1_nz_select_i32: +; LA32: # %bb.0: +; LA32-NEXT: slli.w $a3, $a0, 16 +; LA32-NEXT: move $a0, $a1 +; LA32-NEXT: bne $a3, $zero, .LBB64_2 +; LA32-NEXT: # %bb.1: +; LA32-NEXT: move $a0, $a2 +; LA32-NEXT: .LBB64_2: +; LA32-NEXT: ret +; +; LA64-LABEL: bit_16_1_nz_select_i32: +; LA64: # %bb.0: +; LA64-NEXT: bstrpick.d $a0, $a0, 15, 0 +; LA64-NEXT: sltu $a0, $zero, $a0 +; LA64-NEXT: masknez $a2, $a2, $a0 +; LA64-NEXT: maskeqz $a0, $a1, $a0 +; LA64-NEXT: or $a0, $a0, $a2 +; LA64-NEXT: ret + %1 = and i32 %a, 65535 + %2 = icmp ne i32 %1, 0 + %3 = select i1 %2, i32 %b, i32 %c + ret i32 %3 +} + +define signext i32 @bit_20_1_z_select_i32(i32 signext %a, i32 signext %b, i32 signext %c) { +; LA32-LABEL: bit_20_1_z_select_i32: +; LA32: # %bb.0: +; LA32-NEXT: slli.w $a3, $a0, 12 +; LA32-NEXT: move $a0, $a1 +; LA32-NEXT: beq $a3, $zero, .LBB65_2 +; LA32-NEXT: # %bb.1: +; LA32-NEXT: move $a0, $a2 +; LA32-NEXT: .LBB65_2: +; LA32-NEXT: ret +; +; LA64-LABEL: bit_20_1_z_select_i32: +; LA64: # %bb.0: +; LA64-NEXT: bstrpick.d $a0, $a0, 19, 0 +; LA64-NEXT: sltui $a0, $a0, 1 +; LA64-NEXT: masknez $a2, $a2, $a0 +; LA64-NEXT: maskeqz $a0, $a1, $a0 +; LA64-NEXT: or $a0, $a0, $a2 +; LA64-NEXT: ret + %1 = and i32 %a, 1048575 + %2 = icmp eq i32 %1, 0 + %3 = select i1 %2, i32 %b, i32 %c + ret i32 %3 +} + +define signext i32 @bit_20_1_nz_select_i32(i32 signext %a, i32 signext %b, i32 signext %c) { +; LA32-LABEL: bit_20_1_nz_select_i32: +; LA32: # %bb.0: +; LA32-NEXT: slli.w $a3, $a0, 12 +; LA32-NEXT: move $a0, $a1 +; LA32-NEXT: bne $a3, $zero, .LBB66_2 +; LA32-NEXT: # %bb.1: +; LA32-NEXT: move $a0, $a2 +; LA32-NEXT: .LBB66_2: +; LA32-NEXT: ret +; +; LA64-LABEL: bit_20_1_nz_select_i32: +; LA64: # %bb.0: +; LA64-NEXT: bstrpick.d $a0, $a0, 19, 0 +; LA64-NEXT: sltu $a0, $zero, $a0 +; LA64-NEXT: masknez $a2, $a2, $a0 +; LA64-NEXT: maskeqz $a0, $a1, $a0 +; LA64-NEXT: or $a0, $a0, $a2 +; LA64-NEXT: ret + %1 = and i32 %a, 1048575 + %2 = icmp ne i32 %1, 0 + %3 = select i1 %2, i32 %b, i32 %c + ret i32 %3 +} + +define signext i32 @bit_31_1_z_select_i32(i32 signext %a, i32 signext %b, i32 signext %c) { +; LA32-LABEL: bit_31_1_z_select_i32: +; LA32: # %bb.0: +; LA32-NEXT: slli.w $a3, $a0, 1 +; LA32-NEXT: move $a0, $a1 +; LA32-NEXT: beq $a3, $zero, .LBB67_2 +; LA32-NEXT: # %bb.1: +; LA32-NEXT: move $a0, $a2 +; LA32-NEXT: .LBB67_2: +; LA32-NEXT: ret +; +; LA64-LABEL: bit_31_1_z_select_i32: +; LA64: # %bb.0: +; LA64-NEXT: bstrpick.d $a0, $a0, 30, 0 +; LA64-NEXT: sltui $a0, $a0, 1 +; LA64-NEXT: masknez $a2, $a2, $a0 +; LA64-NEXT: maskeqz $a0, $a1, $a0 +; LA64-NEXT: or $a0, $a0, $a2 +; LA64-NEXT: ret + %1 = and i32 %a, 2147483647 + %2 = icmp eq i32 %1, 0 + %3 = select i1 %2, i32 %b, i32 %c + ret i32 %3 +} + +define signext i32 @bit_31_1_nz_select_i32(i32 signext %a, i32 signext %b, i32 signext %c) { +; LA32-LABEL: bit_31_1_nz_select_i32: +; LA32: # %bb.0: +; LA32-NEXT: slli.w $a3, $a0, 1 +; LA32-NEXT: move $a0, $a1 +; LA32-NEXT: bne $a3, $zero, .LBB68_2 +; LA32-NEXT: # %bb.1: +; LA32-NEXT: move $a0, $a2 +; LA32-NEXT: .LBB68_2: +; LA32-NEXT: ret +; +; LA64-LABEL: bit_31_1_nz_select_i32: +; LA64: # %bb.0: +; LA64-NEXT: bstrpick.d $a0, $a0, 30, 0 +; LA64-NEXT: sltu $a0, $zero, $a0 +; LA64-NEXT: masknez $a2, $a2, $a0 +; LA64-NEXT: maskeqz $a0, $a1, $a0 +; LA64-NEXT: or $a0, $a0, $a2 +; LA64-NEXT: ret + %1 = and i32 %a, 2147483647 + %2 = icmp ne i32 %1, 0 + %3 = select i1 %2, i32 %b, i32 %c + ret i32 %3 +} + +define signext i32 @bit_32_1_z_select_i32(i32 signext %a, i32 signext %b, i32 signext %c) { +; LA32-LABEL: bit_32_1_z_select_i32: +; LA32: # %bb.0: +; LA32-NEXT: beq $a0, $zero, .LBB69_2 +; LA32-NEXT: # %bb.1: +; LA32-NEXT: move $a1, $a2 +; LA32-NEXT: .LBB69_2: +; LA32-NEXT: move $a0, $a1 +; LA32-NEXT: ret +; +; LA64-LABEL: bit_32_1_z_select_i32: +; LA64: # %bb.0: +; LA64-NEXT: sltui $a0, $a0, 1 +; LA64-NEXT: masknez $a2, $a2, $a0 +; LA64-NEXT: maskeqz $a0, $a1, $a0 +; LA64-NEXT: or $a0, $a0, $a2 +; LA64-NEXT: ret + %1 = and i32 %a, 4294967295 + %2 = icmp eq i32 %1, 0 + %3 = select i1 %2, i32 %b, i32 %c + ret i32 %3 +} + +define signext i32 @bit_32_1_nz_select_i32(i32 signext %a, i32 signext %b, i32 signext %c) { +; LA32-LABEL: bit_32_1_nz_select_i32: +; LA32: # %bb.0: +; LA32-NEXT: bne $a0, $zero, .LBB70_2 +; LA32-NEXT: # %bb.1: +; LA32-NEXT: move $a1, $a2 +; LA32-NEXT: .LBB70_2: +; LA32-NEXT: move $a0, $a1 +; LA32-NEXT: ret +; +; LA64-LABEL: bit_32_1_nz_select_i32: +; LA64: # %bb.0: +; LA64-NEXT: sltu $a0, $zero, $a0 +; LA64-NEXT: masknez $a2, $a2, $a0 +; LA64-NEXT: maskeqz $a0, $a1, $a0 +; LA64-NEXT: or $a0, $a0, $a2 +; LA64-NEXT: ret + %1 = and i32 %a, 4294967295 + %2 = icmp ne i32 %1, 0 + %3 = select i1 %2, i32 %b, i32 %c + ret i32 %3 +} + +define i64 @bit_10_1_z_select_i64(i64 %a, i64 %b, i64 %c) { +; LA32-LABEL: bit_10_1_z_select_i64: +; LA32: # %bb.0: +; LA32-NEXT: andi $a6, $a0, 1023 +; LA32-NEXT: move $a1, $a3 +; LA32-NEXT: move $a0, $a2 +; LA32-NEXT: beq $a6, $zero, .LBB71_2 +; LA32-NEXT: # %bb.1: +; LA32-NEXT: move $a0, $a4 +; LA32-NEXT: move $a1, $a5 +; LA32-NEXT: .LBB71_2: +; LA32-NEXT: ret +; +; LA64-LABEL: bit_10_1_z_select_i64: +; LA64: # %bb.0: +; LA64-NEXT: andi $a0, $a0, 1023 +; LA64-NEXT: sltui $a0, $a0, 1 +; LA64-NEXT: masknez $a2, $a2, $a0 +; LA64-NEXT: maskeqz $a0, $a1, $a0 +; LA64-NEXT: or $a0, $a0, $a2 +; LA64-NEXT: ret + %1 = and i64 %a, 1023 + %2 = icmp eq i64 %1, 0 + %3 = select i1 %2, i64 %b, i64 %c + ret i64 %3 +} + +define i64 @bit_10_1_nz_select_i64(i64 %a, i64 %b, i64 %c) { +; LA32-LABEL: bit_10_1_nz_select_i64: +; LA32: # %bb.0: +; LA32-NEXT: andi $a6, $a0, 1023 +; LA32-NEXT: move $a1, $a3 +; LA32-NEXT: move $a0, $a2 +; LA32-NEXT: bne $a6, $zero, .LBB72_2 +; LA32-NEXT: # %bb.1: +; LA32-NEXT: move $a0, $a4 +; LA32-NEXT: move $a1, $a5 +; LA32-NEXT: .LBB72_2: +; LA32-NEXT: ret +; +; LA64-LABEL: bit_10_1_nz_select_i64: +; LA64: # %bb.0: +; LA64-NEXT: andi $a0, $a0, 1023 +; LA64-NEXT: sltu $a0, $zero, $a0 +; LA64-NEXT: masknez $a2, $a2, $a0 +; LA64-NEXT: maskeqz $a0, $a1, $a0 +; LA64-NEXT: or $a0, $a0, $a2 +; LA64-NEXT: ret + %1 = and i64 %a, 1023 + %2 = icmp ne i64 %1, 0 + %3 = select i1 %2, i64 %b, i64 %c + ret i64 %3 +} + +define i64 @bit_11_1_z_select_i64(i64 %a, i64 %b, i64 %c) { +; LA32-LABEL: bit_11_1_z_select_i64: +; LA32: # %bb.0: +; LA32-NEXT: andi $a6, $a0, 2047 +; LA32-NEXT: move $a1, $a3 +; LA32-NEXT: move $a0, $a2 +; LA32-NEXT: beq $a6, $zero, .LBB73_2 +; LA32-NEXT: # %bb.1: +; LA32-NEXT: move $a0, $a4 +; LA32-NEXT: move $a1, $a5 +; LA32-NEXT: .LBB73_2: +; LA32-NEXT: ret +; +; LA64-LABEL: bit_11_1_z_select_i64: +; LA64: # %bb.0: +; LA64-NEXT: andi $a0, $a0, 2047 +; LA64-NEXT: sltui $a0, $a0, 1 +; LA64-NEXT: masknez $a2, $a2, $a0 +; LA64-NEXT: maskeqz $a0, $a1, $a0 +; LA64-NEXT: or $a0, $a0, $a2 +; LA64-NEXT: ret + %1 = and i64 %a, 2047 + %2 = icmp eq i64 %1, 0 + %3 = select i1 %2, i64 %b, i64 %c + ret i64 %3 +} + +define i64 @bit_11_1_nz_select_i64(i64 %a, i64 %b, i64 %c) { +; LA32-LABEL: bit_11_1_nz_select_i64: +; LA32: # %bb.0: +; LA32-NEXT: andi $a6, $a0, 2047 +; LA32-NEXT: move $a1, $a3 +; LA32-NEXT: move $a0, $a2 +; LA32-NEXT: bne $a6, $zero, .LBB74_2 +; LA32-NEXT: # %bb.1: +; LA32-NEXT: move $a0, $a4 +; LA32-NEXT: move $a1, $a5 +; LA32-NEXT: .LBB74_2: +; LA32-NEXT: ret +; +; LA64-LABEL: bit_11_1_nz_select_i64: +; LA64: # %bb.0: +; LA64-NEXT: andi $a0, $a0, 2047 +; LA64-NEXT: sltu $a0, $zero, $a0 +; LA64-NEXT: masknez $a2, $a2, $a0 +; LA64-NEXT: maskeqz $a0, $a1, $a0 +; LA64-NEXT: or $a0, $a0, $a2 +; LA64-NEXT: ret + %1 = and i64 %a, 2047 + %2 = icmp ne i64 %1, 0 + %3 = select i1 %2, i64 %b, i64 %c + ret i64 %3 +} + +define i64 @bit_16_1_z_select_i64(i64 %a, i64 %b, i64 %c) { +; LA32-LABEL: bit_16_1_z_select_i64: +; LA32: # %bb.0: +; LA32-NEXT: slli.w $a6, $a0, 16 +; LA32-NEXT: move $a1, $a3 +; LA32-NEXT: move $a0, $a2 +; LA32-NEXT: beq $a6, $zero, .LBB75_2 +; LA32-NEXT: # %bb.1: +; LA32-NEXT: move $a0, $a4 +; LA32-NEXT: move $a1, $a5 +; LA32-NEXT: .LBB75_2: +; LA32-NEXT: ret +; +; LA64-LABEL: bit_16_1_z_select_i64: +; LA64: # %bb.0: +; LA64-NEXT: bstrpick.d $a0, $a0, 15, 0 +; LA64-NEXT: sltui $a0, $a0, 1 +; LA64-NEXT: masknez $a2, $a2, $a0 +; LA64-NEXT: maskeqz $a0, $a1, $a0 +; LA64-NEXT: or $a0, $a0, $a2 +; LA64-NEXT: ret + %1 = and i64 %a, 65535 + %2 = icmp eq i64 %1, 0 + %3 = select i1 %2, i64 %b, i64 %c + ret i64 %3 +} + +define i64 @bit_16_1_nz_select_i64(i64 %a, i64 %b, i64 %c) { +; LA32-LABEL: bit_16_1_nz_select_i64: +; LA32: # %bb.0: +; LA32-NEXT: move $a1, $a3 +; LA32-NEXT: bne $a0, $zero, .LBB76_2 +; LA32-NEXT: # %bb.1: +; LA32-NEXT: move $a2, $a4 +; LA32-NEXT: move $a1, $a5 +; LA32-NEXT: .LBB76_2: +; LA32-NEXT: move $a0, $a2 +; LA32-NEXT: ret +; +; LA64-LABEL: bit_16_1_nz_select_i64: +; LA64: # %bb.0: +; LA64-NEXT: bstrpick.d $a0, $a0, 31, 0 +; LA64-NEXT: sltu $a0, $zero, $a0 +; LA64-NEXT: masknez $a2, $a2, $a0 +; LA64-NEXT: maskeqz $a0, $a1, $a0 +; LA64-NEXT: or $a0, $a0, $a2 +; LA64-NEXT: ret + %1 = and i64 %a, 4294967295 + %2 = icmp ne i64 %1, 0 + %3 = select i1 %2, i64 %b, i64 %c + ret i64 %3 +} + +define i64 @bit_20_1_z_select_i64(i64 %a, i64 %b, i64 %c) { +; LA32-LABEL: bit_20_1_z_select_i64: +; LA32: # %bb.0: +; LA32-NEXT: slli.w $a6, $a0, 12 +; LA32-NEXT: move $a1, $a3 +; LA32-NEXT: move $a0, $a2 +; LA32-NEXT: beq $a6, $zero, .LBB77_2 +; LA32-NEXT: # %bb.1: +; LA32-NEXT: move $a0, $a4 +; LA32-NEXT: move $a1, $a5 +; LA32-NEXT: .LBB77_2: +; LA32-NEXT: ret +; +; LA64-LABEL: bit_20_1_z_select_i64: +; LA64: # %bb.0: +; LA64-NEXT: bstrpick.d $a0, $a0, 19, 0 +; LA64-NEXT: sltui $a0, $a0, 1 +; LA64-NEXT: masknez $a2, $a2, $a0 +; LA64-NEXT: maskeqz $a0, $a1, $a0 +; LA64-NEXT: or $a0, $a0, $a2 +; LA64-NEXT: ret + %1 = and i64 %a, 1048575 + %2 = icmp eq i64 %1, 0 + %3 = select i1 %2, i64 %b, i64 %c + ret i64 %3 +} + +define i64 @bit_20_1_nz_select_i64(i64 %a, i64 %b, i64 %c) { +; LA32-LABEL: bit_20_1_nz_select_i64: +; LA32: # %bb.0: +; LA32-NEXT: slli.w $a6, $a0, 12 +; LA32-NEXT: move $a1, $a3 +; LA32-NEXT: move $a0, $a2 +; LA32-NEXT: bne $a6, $zero, .LBB78_2 +; LA32-NEXT: # %bb.1: +; LA32-NEXT: move $a0, $a4 +; LA32-NEXT: move $a1, $a5 +; LA32-NEXT: .LBB78_2: +; LA32-NEXT: ret +; +; LA64-LABEL: bit_20_1_nz_select_i64: +; LA64: # %bb.0: +; LA64-NEXT: bstrpick.d $a0, $a0, 19, 0 +; LA64-NEXT: sltu $a0, $zero, $a0 +; LA64-NEXT: masknez $a2, $a2, $a0 +; LA64-NEXT: maskeqz $a0, $a1, $a0 +; LA64-NEXT: or $a0, $a0, $a2 +; LA64-NEXT: ret + %1 = and i64 %a, 1048575 + %2 = icmp ne i64 %1, 0 + %3 = select i1 %2, i64 %b, i64 %c + ret i64 %3 +} + +define i64 @bit_31_1_z_select_i64(i64 %a, i64 %b, i64 %c) { +; LA32-LABEL: bit_31_1_z_select_i64: +; LA32: # %bb.0: +; LA32-NEXT: slli.w $a6, $a0, 1 +; LA32-NEXT: move $a1, $a3 +; LA32-NEXT: move $a0, $a2 +; LA32-NEXT: beq $a6, $zero, .LBB79_2 +; LA32-NEXT: # %bb.1: +; LA32-NEXT: move $a0, $a4 +; LA32-NEXT: move $a1, $a5 +; LA32-NEXT: .LBB79_2: +; LA32-NEXT: ret +; +; LA64-LABEL: bit_31_1_z_select_i64: +; LA64: # %bb.0: +; LA64-NEXT: bstrpick.d $a0, $a0, 30, 0 +; LA64-NEXT: sltui $a0, $a0, 1 +; LA64-NEXT: masknez $a2, $a2, $a0 +; LA64-NEXT: maskeqz $a0, $a1, $a0 +; LA64-NEXT: or $a0, $a0, $a2 +; LA64-NEXT: ret + %1 = and i64 %a, 2147483647 + %2 = icmp eq i64 %1, 0 + %3 = select i1 %2, i64 %b, i64 %c + ret i64 %3 +} + +define i64 @bit_31_1_nz_select_i64(i64 %a, i64 %b, i64 %c) { +; LA32-LABEL: bit_31_1_nz_select_i64: +; LA32: # %bb.0: +; LA32-NEXT: slli.w $a6, $a0, 1 +; LA32-NEXT: move $a1, $a3 +; LA32-NEXT: move $a0, $a2 +; LA32-NEXT: bne $a6, $zero, .LBB80_2 +; LA32-NEXT: # %bb.1: +; LA32-NEXT: move $a0, $a4 +; LA32-NEXT: move $a1, $a5 +; LA32-NEXT: .LBB80_2: +; LA32-NEXT: ret +; +; LA64-LABEL: bit_31_1_nz_select_i64: +; LA64: # %bb.0: +; LA64-NEXT: bstrpick.d $a0, $a0, 30, 0 +; LA64-NEXT: sltu $a0, $zero, $a0 +; LA64-NEXT: masknez $a2, $a2, $a0 +; LA64-NEXT: maskeqz $a0, $a1, $a0 +; LA64-NEXT: or $a0, $a0, $a2 +; LA64-NEXT: ret + %1 = and i64 %a, 2147483647 + %2 = icmp ne i64 %1, 0 + %3 = select i1 %2, i64 %b, i64 %c + ret i64 %3 +} + +define i64 @bit_32_1_z_select_i64(i64 %a, i64 %b, i64 %c) { +; LA32-LABEL: bit_32_1_z_select_i64: +; LA32: # %bb.0: +; LA32-NEXT: move $a1, $a3 +; LA32-NEXT: beq $a0, $zero, .LBB81_2 +; LA32-NEXT: # %bb.1: +; LA32-NEXT: move $a2, $a4 +; LA32-NEXT: move $a1, $a5 +; LA32-NEXT: .LBB81_2: +; LA32-NEXT: move $a0, $a2 +; LA32-NEXT: ret +; +; LA64-LABEL: bit_32_1_z_select_i64: +; LA64: # %bb.0: +; LA64-NEXT: bstrpick.d $a0, $a0, 31, 0 +; LA64-NEXT: sltui $a0, $a0, 1 +; LA64-NEXT: masknez $a2, $a2, $a0 +; LA64-NEXT: maskeqz $a0, $a1, $a0 +; LA64-NEXT: or $a0, $a0, $a2 +; LA64-NEXT: ret + %1 = and i64 %a, 4294967295 + %2 = icmp eq i64 %1, 0 + %3 = select i1 %2, i64 %b, i64 %c + ret i64 %3 +} + +define i64 @bit_32_1_nz_select_i64(i64 %a, i64 %b, i64 %c) { +; LA32-LABEL: bit_32_1_nz_select_i64: +; LA32: # %bb.0: +; LA32-NEXT: move $a1, $a3 +; LA32-NEXT: bne $a0, $zero, .LBB82_2 +; LA32-NEXT: # %bb.1: +; LA32-NEXT: move $a2, $a4 +; LA32-NEXT: move $a1, $a5 +; LA32-NEXT: .LBB82_2: +; LA32-NEXT: move $a0, $a2 +; LA32-NEXT: ret +; +; LA64-LABEL: bit_32_1_nz_select_i64: +; LA64: # %bb.0: +; LA64-NEXT: bstrpick.d $a0, $a0, 31, 0 +; LA64-NEXT: sltu $a0, $zero, $a0 +; LA64-NEXT: masknez $a2, $a2, $a0 +; LA64-NEXT: maskeqz $a0, $a1, $a0 +; LA64-NEXT: or $a0, $a0, $a2 +; LA64-NEXT: ret + %1 = and i64 %a, 4294967295 + %2 = icmp ne i64 %1, 0 + %3 = select i1 %2, i64 %b, i64 %c + ret i64 %3 +} + +define i64 @bit_55_1_z_select_i64(i64 %a, i64 %b, i64 %c) { +; LA32-LABEL: bit_55_1_z_select_i64: +; LA32: # %bb.0: +; LA32-NEXT: lu12i.w $a6, 2047 +; LA32-NEXT: ori $a6, $a6, 4095 +; LA32-NEXT: and $a1, $a1, $a6 +; LA32-NEXT: or $a6, $a0, $a1 +; LA32-NEXT: move $a1, $a3 +; LA32-NEXT: move $a0, $a2 +; LA32-NEXT: beq $a6, $zero, .LBB83_2 +; LA32-NEXT: # %bb.1: +; LA32-NEXT: move $a0, $a4 +; LA32-NEXT: move $a1, $a5 +; LA32-NEXT: .LBB83_2: +; LA32-NEXT: ret +; +; LA64-LABEL: bit_55_1_z_select_i64: +; LA64: # %bb.0: +; LA64-NEXT: bstrpick.d $a0, $a0, 54, 0 +; LA64-NEXT: sltui $a0, $a0, 1 +; LA64-NEXT: masknez $a2, $a2, $a0 +; LA64-NEXT: maskeqz $a0, $a1, $a0 +; LA64-NEXT: or $a0, $a0, $a2 +; LA64-NEXT: ret + %1 = and i64 %a, 36028797018963967 + %2 = icmp eq i64 %1, 0 + %3 = select i1 %2, i64 %b, i64 %c + ret i64 %3 +} + +define i64 @bit_55_1_nz_select_i64(i64 %a, i64 %b, i64 %c) { +; LA32-LABEL: bit_55_1_nz_select_i64: +; LA32: # %bb.0: +; LA32-NEXT: lu12i.w $a6, 2047 +; LA32-NEXT: ori $a6, $a6, 4095 +; LA32-NEXT: and $a1, $a1, $a6 +; LA32-NEXT: or $a6, $a0, $a1 +; LA32-NEXT: move $a1, $a3 +; LA32-NEXT: move $a0, $a2 +; LA32-NEXT: bne $a6, $zero, .LBB84_2 +; LA32-NEXT: # %bb.1: +; LA32-NEXT: move $a0, $a4 +; LA32-NEXT: move $a1, $a5 +; LA32-NEXT: .LBB84_2: +; LA32-NEXT: ret +; +; LA64-LABEL: bit_55_1_nz_select_i64: +; LA64: # %bb.0: +; LA64-NEXT: bstrpick.d $a0, $a0, 54, 0 +; LA64-NEXT: sltu $a0, $zero, $a0 +; LA64-NEXT: masknez $a2, $a2, $a0 +; LA64-NEXT: maskeqz $a0, $a1, $a0 +; LA64-NEXT: or $a0, $a0, $a2 +; LA64-NEXT: ret + %1 = and i64 %a, 36028797018963967 + %2 = icmp ne i64 %1, 0 + %3 = select i1 %2, i64 %b, i64 %c + ret i64 %3 +} + +define i64 @bit_63_1_z_select_i64(i64 %a, i64 %b, i64 %c) { +; LA32-LABEL: bit_63_1_z_select_i64: +; LA32: # %bb.0: +; LA32-NEXT: lu12i.w $a6, 524287 +; LA32-NEXT: ori $a6, $a6, 4095 +; LA32-NEXT: and $a1, $a1, $a6 +; LA32-NEXT: or $a6, $a0, $a1 +; LA32-NEXT: move $a1, $a3 +; LA32-NEXT: move $a0, $a2 +; LA32-NEXT: beq $a6, $zero, .LBB85_2 +; LA32-NEXT: # %bb.1: +; LA32-NEXT: move $a0, $a4 +; LA32-NEXT: move $a1, $a5 +; LA32-NEXT: .LBB85_2: +; LA32-NEXT: ret +; +; LA64-LABEL: bit_63_1_z_select_i64: +; LA64: # %bb.0: +; LA64-NEXT: bstrpick.d $a0, $a0, 62, 0 +; LA64-NEXT: sltui $a0, $a0, 1 +; LA64-NEXT: masknez $a2, $a2, $a0 +; LA64-NEXT: maskeqz $a0, $a1, $a0 +; LA64-NEXT: or $a0, $a0, $a2 +; LA64-NEXT: ret + %1 = and i64 %a, 9223372036854775807 + %2 = icmp eq i64 %1, 0 + %3 = select i1 %2, i64 %b, i64 %c + ret i64 %3 +} + +define i64 @bit_63_1_nz_select_i64(i64 %a, i64 %b, i64 %c) { +; LA32-LABEL: bit_63_1_nz_select_i64: +; LA32: # %bb.0: +; LA32-NEXT: lu12i.w $a6, 524287 +; LA32-NEXT: ori $a6, $a6, 4095 +; LA32-NEXT: and $a1, $a1, $a6 +; LA32-NEXT: or $a6, $a0, $a1 +; LA32-NEXT: move $a1, $a3 +; LA32-NEXT: move $a0, $a2 +; LA32-NEXT: bne $a6, $zero, .LBB86_2 +; LA32-NEXT: # %bb.1: +; LA32-NEXT: move $a0, $a4 +; LA32-NEXT: move $a1, $a5 +; LA32-NEXT: .LBB86_2: +; LA32-NEXT: ret +; +; LA64-LABEL: bit_63_1_nz_select_i64: +; LA64: # %bb.0: +; LA64-NEXT: bstrpick.d $a0, $a0, 62, 0 +; LA64-NEXT: sltu $a0, $zero, $a0 +; LA64-NEXT: masknez $a2, $a2, $a0 +; LA64-NEXT: maskeqz $a0, $a1, $a0 +; LA64-NEXT: or $a0, $a0, $a2 +; LA64-NEXT: ret + %1 = and i64 %a, 9223372036854775807 + %2 = icmp ne i64 %1, 0 + %3 = select i1 %2, i64 %b, i64 %c + ret i64 %3 +} + +define i64 @bit_64_1_z_select_i64(i64 %a, i64 %b, i64 %c) { +; LA32-LABEL: bit_64_1_z_select_i64: +; LA32: # %bb.0: +; LA32-NEXT: or $a6, $a0, $a1 +; LA32-NEXT: move $a1, $a3 +; LA32-NEXT: move $a0, $a2 +; LA32-NEXT: beq $a6, $zero, .LBB87_2 +; LA32-NEXT: # %bb.1: +; LA32-NEXT: move $a0, $a4 +; LA32-NEXT: move $a1, $a5 +; LA32-NEXT: .LBB87_2: +; LA32-NEXT: ret +; +; LA64-LABEL: bit_64_1_z_select_i64: +; LA64: # %bb.0: +; LA64-NEXT: sltui $a0, $a0, 1 +; LA64-NEXT: masknez $a2, $a2, $a0 +; LA64-NEXT: maskeqz $a0, $a1, $a0 +; LA64-NEXT: or $a0, $a0, $a2 +; LA64-NEXT: ret + %1 = and i64 %a, 18446744073709551615 + %2 = icmp eq i64 %1, 0 + %3 = select i1 %2, i64 %b, i64 %c + ret i64 %3 +} + +define i64 @bit_64_1_nz_select_i64(i64 %a, i64 %b, i64 %c) { +; LA32-LABEL: bit_64_1_nz_select_i64: +; LA32: # %bb.0: +; LA32-NEXT: or $a6, $a0, $a1 +; LA32-NEXT: move $a1, $a3 +; LA32-NEXT: move $a0, $a2 +; LA32-NEXT: bne $a6, $zero, .LBB88_2 +; LA32-NEXT: # %bb.1: +; LA32-NEXT: move $a0, $a4 +; LA32-NEXT: move $a1, $a5 +; LA32-NEXT: .LBB88_2: +; LA32-NEXT: ret +; +; LA64-LABEL: bit_64_1_nz_select_i64: +; LA64: # %bb.0: +; LA64-NEXT: sltu $a0, $zero, $a0 +; LA64-NEXT: masknez $a2, $a2, $a0 +; LA64-NEXT: maskeqz $a0, $a1, $a0 +; LA64-NEXT: or $a0, $a0, $a2 +; LA64-NEXT: ret + %1 = and i64 %a, 18446744073709551615 + %2 = icmp ne i64 %1, 0 + %3 = select i1 %2, i64 %b, i64 %c + ret i64 %3 +} + +define void @bit_10_1_z_branch_i32(i32 signext %0) { +; LA32-LABEL: bit_10_1_z_branch_i32: +; LA32: # %bb.0: +; LA32-NEXT: andi $a0, $a0, 1023 +; LA32-NEXT: beq $a0, $zero, .LBB89_2 +; LA32-NEXT: # %bb.1: +; LA32-NEXT: ret +; LA32-NEXT: .LBB89_2: +; LA32-NEXT: b bar +; +; LA64-LABEL: bit_10_1_z_branch_i32: +; LA64: # %bb.0: +; LA64-NEXT: andi $a0, $a0, 1023 +; LA64-NEXT: beqz $a0, .LBB89_2 +; LA64-NEXT: # %bb.1: +; LA64-NEXT: ret +; LA64-NEXT: .LBB89_2: +; LA64-NEXT: pcaddu18i $t8, %call36(bar) +; LA64-NEXT: jr $t8 + %2 = and i32 %0, 1023 + %3 = icmp eq i32 %2, 0 + br i1 %3, label %4, label %5 + +4: + tail call void @bar() + br label %5 + +5: + ret void +} + +define void @bit_10_1_nz_branch_i32(i32 signext %0) { +; LA32-LABEL: bit_10_1_nz_branch_i32: +; LA32: # %bb.0: +; LA32-NEXT: andi $a0, $a0, 1023 +; LA32-NEXT: beq $a0, $zero, .LBB90_2 +; LA32-NEXT: # %bb.1: +; LA32-NEXT: b bar +; LA32-NEXT: .LBB90_2: +; LA32-NEXT: ret +; +; LA64-LABEL: bit_10_1_nz_branch_i32: +; LA64: # %bb.0: +; LA64-NEXT: andi $a0, $a0, 1023 +; LA64-NEXT: beqz $a0, .LBB90_2 +; LA64-NEXT: # %bb.1: +; LA64-NEXT: pcaddu18i $t8, %call36(bar) +; LA64-NEXT: jr $t8 +; LA64-NEXT: .LBB90_2: +; LA64-NEXT: ret + %2 = and i32 %0, 1023 + %3 = icmp ne i32 %2, 0 + br i1 %3, label %4, label %5 + +4: + tail call void @bar() + br label %5 + +5: + ret void +} + +define void @bit_11_1_z_branch_i32(i32 signext %0) { +; LA32-LABEL: bit_11_1_z_branch_i32: +; LA32: # %bb.0: +; LA32-NEXT: andi $a0, $a0, 2047 +; LA32-NEXT: beq $a0, $zero, .LBB91_2 +; LA32-NEXT: # %bb.1: +; LA32-NEXT: ret +; LA32-NEXT: .LBB91_2: +; LA32-NEXT: b bar +; +; LA64-LABEL: bit_11_1_z_branch_i32: +; LA64: # %bb.0: +; LA64-NEXT: andi $a0, $a0, 2047 +; LA64-NEXT: beqz $a0, .LBB91_2 +; LA64-NEXT: # %bb.1: +; LA64-NEXT: ret +; LA64-NEXT: .LBB91_2: +; LA64-NEXT: pcaddu18i $t8, %call36(bar) +; LA64-NEXT: jr $t8 + %2 = and i32 %0, 2047 + %3 = icmp eq i32 %2, 0 + br i1 %3, label %4, label %5 + +4: + tail call void @bar() + br label %5 + +5: + ret void +} + +define void @bit_11_1_nz_branch_i32(i32 signext %0) { +; LA32-LABEL: bit_11_1_nz_branch_i32: +; LA32: # %bb.0: +; LA32-NEXT: andi $a0, $a0, 2047 +; LA32-NEXT: beq $a0, $zero, .LBB92_2 +; LA32-NEXT: # %bb.1: +; LA32-NEXT: b bar +; LA32-NEXT: .LBB92_2: +; LA32-NEXT: ret +; +; LA64-LABEL: bit_11_1_nz_branch_i32: +; LA64: # %bb.0: +; LA64-NEXT: andi $a0, $a0, 2047 +; LA64-NEXT: beqz $a0, .LBB92_2 +; LA64-NEXT: # %bb.1: +; LA64-NEXT: pcaddu18i $t8, %call36(bar) +; LA64-NEXT: jr $t8 +; LA64-NEXT: .LBB92_2: +; LA64-NEXT: ret + %2 = and i32 %0, 2047 + %3 = icmp ne i32 %2, 0 + br i1 %3, label %4, label %5 + +4: + tail call void @bar() + br label %5 + +5: + ret void +} + +define void @bit_16_1_z_branch_i32(i32 signext %0) { +; LA32-LABEL: bit_16_1_z_branch_i32: +; LA32: # %bb.0: +; LA32-NEXT: slli.w $a0, $a0, 16 +; LA32-NEXT: beq $a0, $zero, .LBB93_2 +; LA32-NEXT: # %bb.1: +; LA32-NEXT: ret +; LA32-NEXT: .LBB93_2: +; LA32-NEXT: b bar +; +; LA64-LABEL: bit_16_1_z_branch_i32: +; LA64: # %bb.0: +; LA64-NEXT: slli.d $a0, $a0, 48 +; LA64-NEXT: beqz $a0, .LBB93_2 +; LA64-NEXT: # %bb.1: +; LA64-NEXT: ret +; LA64-NEXT: .LBB93_2: +; LA64-NEXT: pcaddu18i $t8, %call36(bar) +; LA64-NEXT: jr $t8 + %2 = and i32 %0, 65535 + %3 = icmp eq i32 %2, 0 + br i1 %3, label %4, label %5 + +4: + tail call void @bar() + br label %5 + +5: + ret void +} + +define void @bit_16_1_nz_branch_i32(i32 signext %0) { +; LA32-LABEL: bit_16_1_nz_branch_i32: +; LA32: # %bb.0: +; LA32-NEXT: slli.w $a0, $a0, 16 +; LA32-NEXT: beq $a0, $zero, .LBB94_2 +; LA32-NEXT: # %bb.1: +; LA32-NEXT: b bar +; LA32-NEXT: .LBB94_2: +; LA32-NEXT: ret +; +; LA64-LABEL: bit_16_1_nz_branch_i32: +; LA64: # %bb.0: +; LA64-NEXT: slli.d $a0, $a0, 48 +; LA64-NEXT: beqz $a0, .LBB94_2 +; LA64-NEXT: # %bb.1: +; LA64-NEXT: pcaddu18i $t8, %call36(bar) +; LA64-NEXT: jr $t8 +; LA64-NEXT: .LBB94_2: +; LA64-NEXT: ret + %2 = and i32 %0, 65535 + %3 = icmp ne i32 %2, 0 + br i1 %3, label %4, label %5 + +4: + tail call void @bar() + br label %5 + +5: + ret void +} + +define void @bit_24_1_z_branch_i32(i32 signext %0) { +; LA32-LABEL: bit_24_1_z_branch_i32: +; LA32: # %bb.0: +; LA32-NEXT: slli.w $a0, $a0, 8 +; LA32-NEXT: beq $a0, $zero, .LBB95_2 +; LA32-NEXT: # %bb.1: +; LA32-NEXT: ret +; LA32-NEXT: .LBB95_2: +; LA32-NEXT: b bar +; +; LA64-LABEL: bit_24_1_z_branch_i32: +; LA64: # %bb.0: +; LA64-NEXT: slli.d $a0, $a0, 40 +; LA64-NEXT: beqz $a0, .LBB95_2 +; LA64-NEXT: # %bb.1: +; LA64-NEXT: ret +; LA64-NEXT: .LBB95_2: +; LA64-NEXT: pcaddu18i $t8, %call36(bar) +; LA64-NEXT: jr $t8 + %2 = and i32 %0, 16777215 + %3 = icmp eq i32 %2, 0 + br i1 %3, label %4, label %5 + +4: + tail call void @bar() + br label %5 + +5: + ret void +} + +define void @bit_24_1_nz_branch_i32(i32 signext %0) { +; LA32-LABEL: bit_24_1_nz_branch_i32: +; LA32: # %bb.0: +; LA32-NEXT: slli.w $a0, $a0, 8 +; LA32-NEXT: beq $a0, $zero, .LBB96_2 +; LA32-NEXT: # %bb.1: +; LA32-NEXT: b bar +; LA32-NEXT: .LBB96_2: +; LA32-NEXT: ret +; +; LA64-LABEL: bit_24_1_nz_branch_i32: +; LA64: # %bb.0: +; LA64-NEXT: slli.d $a0, $a0, 40 +; LA64-NEXT: beqz $a0, .LBB96_2 +; LA64-NEXT: # %bb.1: +; LA64-NEXT: pcaddu18i $t8, %call36(bar) +; LA64-NEXT: jr $t8 +; LA64-NEXT: .LBB96_2: +; LA64-NEXT: ret + %2 = and i32 %0, 16777215 + %3 = icmp ne i32 %2, 0 + br i1 %3, label %4, label %5 + +4: + tail call void @bar() + br label %5 + +5: + ret void +} + +define void @bit_31_1_z_branch_i32(i32 signext %0) { +; LA32-LABEL: bit_31_1_z_branch_i32: +; LA32: # %bb.0: +; LA32-NEXT: slli.w $a0, $a0, 1 +; LA32-NEXT: beq $a0, $zero, .LBB97_2 +; LA32-NEXT: # %bb.1: +; LA32-NEXT: ret +; LA32-NEXT: .LBB97_2: +; LA32-NEXT: b bar +; +; LA64-LABEL: bit_31_1_z_branch_i32: +; LA64: # %bb.0: +; LA64-NEXT: slli.d $a0, $a0, 33 +; LA64-NEXT: beqz $a0, .LBB97_2 +; LA64-NEXT: # %bb.1: +; LA64-NEXT: ret +; LA64-NEXT: .LBB97_2: +; LA64-NEXT: pcaddu18i $t8, %call36(bar) +; LA64-NEXT: jr $t8 + %2 = and i32 %0, 2147483647 + %3 = icmp eq i32 %2, 0 + br i1 %3, label %4, label %5 + +4: + tail call void @bar() + br label %5 + +5: + ret void +} + +define void @bit_31_1_nz_branch_i32(i32 signext %0) { +; LA32-LABEL: bit_31_1_nz_branch_i32: +; LA32: # %bb.0: +; LA32-NEXT: slli.w $a0, $a0, 1 +; LA32-NEXT: beq $a0, $zero, .LBB98_2 +; LA32-NEXT: # %bb.1: +; LA32-NEXT: b bar +; LA32-NEXT: .LBB98_2: +; LA32-NEXT: ret +; +; LA64-LABEL: bit_31_1_nz_branch_i32: +; LA64: # %bb.0: +; LA64-NEXT: slli.d $a0, $a0, 33 +; LA64-NEXT: beqz $a0, .LBB98_2 +; LA64-NEXT: # %bb.1: +; LA64-NEXT: pcaddu18i $t8, %call36(bar) +; LA64-NEXT: jr $t8 +; LA64-NEXT: .LBB98_2: +; LA64-NEXT: ret + %2 = and i32 %0, 2147483647 + %3 = icmp ne i32 %2, 0 + br i1 %3, label %4, label %5 + +4: + tail call void @bar() + br label %5 + +5: + ret void +} + +define void @bit_32_1_z_branch_i32(i32 signext %0) { +; LA32-LABEL: bit_32_1_z_branch_i32: +; LA32: # %bb.0: +; LA32-NEXT: beq $a0, $zero, .LBB99_2 +; LA32-NEXT: # %bb.1: +; LA32-NEXT: ret +; LA32-NEXT: .LBB99_2: +; LA32-NEXT: b bar +; +; LA64-LABEL: bit_32_1_z_branch_i32: +; LA64: # %bb.0: +; LA64-NEXT: beqz $a0, .LBB99_2 +; LA64-NEXT: # %bb.1: +; LA64-NEXT: ret +; LA64-NEXT: .LBB99_2: +; LA64-NEXT: pcaddu18i $t8, %call36(bar) +; LA64-NEXT: jr $t8 + %2 = and i32 %0, 4294967295 + %3 = icmp eq i32 %2, 0 + br i1 %3, label %4, label %5 + +4: + tail call void @bar() + br label %5 + +5: + ret void +} + +define void @bit_32_1_nz_branch_i32(i32 signext %0) { +; LA32-LABEL: bit_32_1_nz_branch_i32: +; LA32: # %bb.0: +; LA32-NEXT: beq $a0, $zero, .LBB100_2 +; LA32-NEXT: # %bb.1: +; LA32-NEXT: b bar +; LA32-NEXT: .LBB100_2: +; LA32-NEXT: ret +; +; LA64-LABEL: bit_32_1_nz_branch_i32: +; LA64: # %bb.0: +; LA64-NEXT: beqz $a0, .LBB100_2 +; LA64-NEXT: # %bb.1: +; LA64-NEXT: pcaddu18i $t8, %call36(bar) +; LA64-NEXT: jr $t8 +; LA64-NEXT: .LBB100_2: +; LA64-NEXT: ret + %2 = and i32 %0, 4294967295 + %3 = icmp ne i32 %2, 0 + br i1 %3, label %4, label %5 + +4: + tail call void @bar() + br label %5 + +5: + ret void +} + +define void @bit_10_1_z_branch_i64(i64 %0) { +; LA32-LABEL: bit_10_1_z_branch_i64: +; LA32: # %bb.0: +; LA32-NEXT: andi $a0, $a0, 1023 +; LA32-NEXT: beq $a0, $zero, .LBB101_2 +; LA32-NEXT: # %bb.1: +; LA32-NEXT: ret +; LA32-NEXT: .LBB101_2: +; LA32-NEXT: b bar +; +; LA64-LABEL: bit_10_1_z_branch_i64: +; LA64: # %bb.0: +; LA64-NEXT: andi $a0, $a0, 1023 +; LA64-NEXT: beqz $a0, .LBB101_2 +; LA64-NEXT: # %bb.1: +; LA64-NEXT: ret +; LA64-NEXT: .LBB101_2: +; LA64-NEXT: pcaddu18i $t8, %call36(bar) +; LA64-NEXT: jr $t8 + %2 = and i64 %0, 1023 + %3 = icmp eq i64 %2, 0 + br i1 %3, label %4, label %5 + +4: + tail call void @bar() + br label %5 + +5: + ret void +} + +define void @bit_10_1_nz_branch_i64(i64 %0) { +; LA32-LABEL: bit_10_1_nz_branch_i64: +; LA32: # %bb.0: +; LA32-NEXT: andi $a0, $a0, 1023 +; LA32-NEXT: beq $a0, $zero, .LBB102_2 +; LA32-NEXT: # %bb.1: +; LA32-NEXT: b bar +; LA32-NEXT: .LBB102_2: +; LA32-NEXT: ret +; +; LA64-LABEL: bit_10_1_nz_branch_i64: +; LA64: # %bb.0: +; LA64-NEXT: andi $a0, $a0, 1023 +; LA64-NEXT: beqz $a0, .LBB102_2 +; LA64-NEXT: # %bb.1: +; LA64-NEXT: pcaddu18i $t8, %call36(bar) +; LA64-NEXT: jr $t8 +; LA64-NEXT: .LBB102_2: +; LA64-NEXT: ret + %2 = and i64 %0, 1023 + %3 = icmp ne i64 %2, 0 + br i1 %3, label %4, label %5 + +4: + tail call void @bar() + br label %5 + +5: + ret void +} + +define void @bit_11_1_z_branch_i64(i64 %0) { +; LA32-LABEL: bit_11_1_z_branch_i64: +; LA32: # %bb.0: +; LA32-NEXT: andi $a0, $a0, 2047 +; LA32-NEXT: beq $a0, $zero, .LBB103_2 +; LA32-NEXT: # %bb.1: +; LA32-NEXT: ret +; LA32-NEXT: .LBB103_2: +; LA32-NEXT: b bar +; +; LA64-LABEL: bit_11_1_z_branch_i64: +; LA64: # %bb.0: +; LA64-NEXT: andi $a0, $a0, 2047 +; LA64-NEXT: beqz $a0, .LBB103_2 +; LA64-NEXT: # %bb.1: +; LA64-NEXT: ret +; LA64-NEXT: .LBB103_2: +; LA64-NEXT: pcaddu18i $t8, %call36(bar) +; LA64-NEXT: jr $t8 + %2 = and i64 %0, 2047 + %3 = icmp eq i64 %2, 0 + br i1 %3, label %4, label %5 + +4: + tail call void @bar() + br label %5 + +5: + ret void +} + +define void @bit_11_1_nz_branch_i64(i64 %0) { +; LA32-LABEL: bit_11_1_nz_branch_i64: +; LA32: # %bb.0: +; LA32-NEXT: andi $a0, $a0, 2047 +; LA32-NEXT: beq $a0, $zero, .LBB104_2 +; LA32-NEXT: # %bb.1: +; LA32-NEXT: b bar +; LA32-NEXT: .LBB104_2: +; LA32-NEXT: ret +; +; LA64-LABEL: bit_11_1_nz_branch_i64: +; LA64: # %bb.0: +; LA64-NEXT: andi $a0, $a0, 2047 +; LA64-NEXT: beqz $a0, .LBB104_2 +; LA64-NEXT: # %bb.1: +; LA64-NEXT: pcaddu18i $t8, %call36(bar) +; LA64-NEXT: jr $t8 +; LA64-NEXT: .LBB104_2: +; LA64-NEXT: ret + %2 = and i64 %0, 2047 + %3 = icmp ne i64 %2, 0 + br i1 %3, label %4, label %5 + +4: + tail call void @bar() + br label %5 + +5: + ret void +} + +define void @bit_16_1_z_branch_i64(i64 %0) { +; LA32-LABEL: bit_16_1_z_branch_i64: +; LA32: # %bb.0: +; LA32-NEXT: slli.w $a0, $a0, 16 +; LA32-NEXT: beq $a0, $zero, .LBB105_2 +; LA32-NEXT: # %bb.1: +; LA32-NEXT: ret +; LA32-NEXT: .LBB105_2: +; LA32-NEXT: b bar +; +; LA64-LABEL: bit_16_1_z_branch_i64: +; LA64: # %bb.0: +; LA64-NEXT: slli.d $a0, $a0, 48 +; LA64-NEXT: beqz $a0, .LBB105_2 +; LA64-NEXT: # %bb.1: +; LA64-NEXT: ret +; LA64-NEXT: .LBB105_2: +; LA64-NEXT: pcaddu18i $t8, %call36(bar) +; LA64-NEXT: jr $t8 + %2 = and i64 %0, 65535 + %3 = icmp eq i64 %2, 0 + br i1 %3, label %4, label %5 + +4: + tail call void @bar() + br label %5 + +5: + ret void +} + +define void @bit_16_1_nz_branch_i64(i64 %0) { +; LA32-LABEL: bit_16_1_nz_branch_i64: +; LA32: # %bb.0: +; LA32-NEXT: slli.w $a0, $a0, 16 +; LA32-NEXT: beq $a0, $zero, .LBB106_2 +; LA32-NEXT: # %bb.1: +; LA32-NEXT: b bar +; LA32-NEXT: .LBB106_2: +; LA32-NEXT: ret +; +; LA64-LABEL: bit_16_1_nz_branch_i64: +; LA64: # %bb.0: +; LA64-NEXT: slli.d $a0, $a0, 48 +; LA64-NEXT: beqz $a0, .LBB106_2 +; LA64-NEXT: # %bb.1: +; LA64-NEXT: pcaddu18i $t8, %call36(bar) +; LA64-NEXT: jr $t8 +; LA64-NEXT: .LBB106_2: +; LA64-NEXT: ret + %2 = and i64 %0, 65535 + %3 = icmp ne i64 %2, 0 + br i1 %3, label %4, label %5 + +4: + tail call void @bar() + br label %5 + +5: + ret void +} + +define void @bit_24_1_z_branch_i64(i64 %0) { +; LA32-LABEL: bit_24_1_z_branch_i64: +; LA32: # %bb.0: +; LA32-NEXT: slli.w $a0, $a0, 8 +; LA32-NEXT: beq $a0, $zero, .LBB107_2 +; LA32-NEXT: # %bb.1: +; LA32-NEXT: ret +; LA32-NEXT: .LBB107_2: +; LA32-NEXT: b bar +; +; LA64-LABEL: bit_24_1_z_branch_i64: +; LA64: # %bb.0: +; LA64-NEXT: slli.d $a0, $a0, 40 +; LA64-NEXT: beqz $a0, .LBB107_2 +; LA64-NEXT: # %bb.1: +; LA64-NEXT: ret +; LA64-NEXT: .LBB107_2: +; LA64-NEXT: pcaddu18i $t8, %call36(bar) +; LA64-NEXT: jr $t8 + %2 = and i64 %0, 16777215 + %3 = icmp eq i64 %2, 0 + br i1 %3, label %4, label %5 + +4: + tail call void @bar() + br label %5 + +5: + ret void +} + +define void @bit_24_1_nz_branch_i64(i64 %0) { +; LA32-LABEL: bit_24_1_nz_branch_i64: +; LA32: # %bb.0: +; LA32-NEXT: slli.w $a0, $a0, 8 +; LA32-NEXT: beq $a0, $zero, .LBB108_2 +; LA32-NEXT: # %bb.1: +; LA32-NEXT: b bar +; LA32-NEXT: .LBB108_2: +; LA32-NEXT: ret +; +; LA64-LABEL: bit_24_1_nz_branch_i64: +; LA64: # %bb.0: +; LA64-NEXT: slli.d $a0, $a0, 40 +; LA64-NEXT: beqz $a0, .LBB108_2 +; LA64-NEXT: # %bb.1: +; LA64-NEXT: pcaddu18i $t8, %call36(bar) +; LA64-NEXT: jr $t8 +; LA64-NEXT: .LBB108_2: +; LA64-NEXT: ret + %2 = and i64 %0, 16777215 + %3 = icmp ne i64 %2, 0 + br i1 %3, label %4, label %5 + +4: + tail call void @bar() + br label %5 + +5: + ret void +} + +define void @bit_31_1_z_branch_i64(i64 %0) { +; LA32-LABEL: bit_31_1_z_branch_i64: +; LA32: # %bb.0: +; LA32-NEXT: slli.w $a0, $a0, 1 +; LA32-NEXT: beq $a0, $zero, .LBB109_2 +; LA32-NEXT: # %bb.1: +; LA32-NEXT: ret +; LA32-NEXT: .LBB109_2: +; LA32-NEXT: b bar +; +; LA64-LABEL: bit_31_1_z_branch_i64: +; LA64: # %bb.0: +; LA64-NEXT: slli.d $a0, $a0, 33 +; LA64-NEXT: beqz $a0, .LBB109_2 +; LA64-NEXT: # %bb.1: +; LA64-NEXT: ret +; LA64-NEXT: .LBB109_2: +; LA64-NEXT: pcaddu18i $t8, %call36(bar) +; LA64-NEXT: jr $t8 + %2 = and i64 %0, 2147483647 + %3 = icmp eq i64 %2, 0 + br i1 %3, label %4, label %5 + +4: + tail call void @bar() + br label %5 + +5: + ret void +} + +define void @bit_31_1_nz_branch_i64(i64 %0) { +; LA32-LABEL: bit_31_1_nz_branch_i64: +; LA32: # %bb.0: +; LA32-NEXT: slli.w $a0, $a0, 1 +; LA32-NEXT: beq $a0, $zero, .LBB110_2 +; LA32-NEXT: # %bb.1: +; LA32-NEXT: b bar +; LA32-NEXT: .LBB110_2: +; LA32-NEXT: ret +; +; LA64-LABEL: bit_31_1_nz_branch_i64: +; LA64: # %bb.0: +; LA64-NEXT: slli.d $a0, $a0, 33 +; LA64-NEXT: beqz $a0, .LBB110_2 +; LA64-NEXT: # %bb.1: +; LA64-NEXT: pcaddu18i $t8, %call36(bar) +; LA64-NEXT: jr $t8 +; LA64-NEXT: .LBB110_2: +; LA64-NEXT: ret + %2 = and i64 %0, 2147483647 + %3 = icmp ne i64 %2, 0 + br i1 %3, label %4, label %5 + +4: + tail call void @bar() + br label %5 + +5: + ret void +} + +define void @bit_32_1_z_branch_i64(i64 %0) { +; LA32-LABEL: bit_32_1_z_branch_i64: +; LA32: # %bb.0: +; LA32-NEXT: beq $a0, $zero, .LBB111_2 +; LA32-NEXT: # %bb.1: +; LA32-NEXT: ret +; LA32-NEXT: .LBB111_2: +; LA32-NEXT: b bar +; +; LA64-LABEL: bit_32_1_z_branch_i64: +; LA64: # %bb.0: +; LA64-NEXT: slli.d $a0, $a0, 32 +; LA64-NEXT: beqz $a0, .LBB111_2 +; LA64-NEXT: # %bb.1: +; LA64-NEXT: ret +; LA64-NEXT: .LBB111_2: +; LA64-NEXT: pcaddu18i $t8, %call36(bar) +; LA64-NEXT: jr $t8 + %2 = and i64 %0, 4294967295 + %3 = icmp eq i64 %2, 0 + br i1 %3, label %4, label %5 + +4: + tail call void @bar() + br label %5 + +5: + ret void +} + +define void @bit_32_1_nz_branch_i64(i64 %0) { +; LA32-LABEL: bit_32_1_nz_branch_i64: +; LA32: # %bb.0: +; LA32-NEXT: beq $a0, $zero, .LBB112_2 +; LA32-NEXT: # %bb.1: +; LA32-NEXT: b bar +; LA32-NEXT: .LBB112_2: +; LA32-NEXT: ret +; +; LA64-LABEL: bit_32_1_nz_branch_i64: +; LA64: # %bb.0: +; LA64-NEXT: slli.d $a0, $a0, 32 +; LA64-NEXT: beqz $a0, .LBB112_2 +; LA64-NEXT: # %bb.1: +; LA64-NEXT: pcaddu18i $t8, %call36(bar) +; LA64-NEXT: jr $t8 +; LA64-NEXT: .LBB112_2: +; LA64-NEXT: ret + %2 = and i64 %0, 4294967295 + %3 = icmp ne i64 %2, 0 + br i1 %3, label %4, label %5 + +4: + tail call void @bar() + br label %5 + +5: + ret void +} + +define void @bit_62_1_z_branch_i64(i64 %0) { +; LA32-LABEL: bit_62_1_z_branch_i64: +; LA32: # %bb.0: +; LA32-NEXT: lu12i.w $a2, 262143 +; LA32-NEXT: ori $a2, $a2, 4095 +; LA32-NEXT: and $a1, $a1, $a2 +; LA32-NEXT: or $a0, $a0, $a1 +; LA32-NEXT: beq $a0, $zero, .LBB113_2 +; LA32-NEXT: # %bb.1: +; LA32-NEXT: ret +; LA32-NEXT: .LBB113_2: +; LA32-NEXT: b bar +; +; LA64-LABEL: bit_62_1_z_branch_i64: +; LA64: # %bb.0: +; LA64-NEXT: slli.d $a0, $a0, 2 +; LA64-NEXT: beqz $a0, .LBB113_2 +; LA64-NEXT: # %bb.1: +; LA64-NEXT: ret +; LA64-NEXT: .LBB113_2: +; LA64-NEXT: pcaddu18i $t8, %call36(bar) +; LA64-NEXT: jr $t8 + %2 = and i64 %0, 4611686018427387903 + %3 = icmp eq i64 %2, 0 + br i1 %3, label %4, label %5 + +4: + tail call void @bar() + br label %5 + +5: + ret void +} + +define void @bit_62_1_nz_branch_i64(i64 %0) { +; LA32-LABEL: bit_62_1_nz_branch_i64: +; LA32: # %bb.0: +; LA32-NEXT: lu12i.w $a2, 262143 +; LA32-NEXT: ori $a2, $a2, 4095 +; LA32-NEXT: and $a1, $a1, $a2 +; LA32-NEXT: or $a0, $a0, $a1 +; LA32-NEXT: beq $a0, $zero, .LBB114_2 +; LA32-NEXT: # %bb.1: +; LA32-NEXT: b bar +; LA32-NEXT: .LBB114_2: +; LA32-NEXT: ret +; +; LA64-LABEL: bit_62_1_nz_branch_i64: +; LA64: # %bb.0: +; LA64-NEXT: slli.d $a0, $a0, 2 +; LA64-NEXT: beqz $a0, .LBB114_2 +; LA64-NEXT: # %bb.1: +; LA64-NEXT: pcaddu18i $t8, %call36(bar) +; LA64-NEXT: jr $t8 +; LA64-NEXT: .LBB114_2: +; LA64-NEXT: ret + %2 = and i64 %0, 4611686018427387903 + %3 = icmp ne i64 %2, 0 + br i1 %3, label %4, label %5 + +4: + tail call void @bar() + br label %5 + +5: + ret void +} + +define void @bit_63_1_z_branch_i64(i64 %0) { +; LA32-LABEL: bit_63_1_z_branch_i64: +; LA32: # %bb.0: +; LA32-NEXT: lu12i.w $a2, 524287 +; LA32-NEXT: ori $a2, $a2, 4095 +; LA32-NEXT: and $a1, $a1, $a2 +; LA32-NEXT: or $a0, $a0, $a1 +; LA32-NEXT: beq $a0, $zero, .LBB115_2 +; LA32-NEXT: # %bb.1: +; LA32-NEXT: ret +; LA32-NEXT: .LBB115_2: +; LA32-NEXT: b bar +; +; LA64-LABEL: bit_63_1_z_branch_i64: +; LA64: # %bb.0: +; LA64-NEXT: slli.d $a0, $a0, 1 +; LA64-NEXT: beqz $a0, .LBB115_2 +; LA64-NEXT: # %bb.1: +; LA64-NEXT: ret +; LA64-NEXT: .LBB115_2: +; LA64-NEXT: pcaddu18i $t8, %call36(bar) +; LA64-NEXT: jr $t8 + %2 = and i64 %0, 9223372036854775807 + %3 = icmp eq i64 %2, 0 + br i1 %3, label %4, label %5 + +4: + tail call void @bar() + br label %5 + +5: + ret void +} + +define void @bit_63_1_nz_branch_i64(i64 %0) { +; LA32-LABEL: bit_63_1_nz_branch_i64: +; LA32: # %bb.0: +; LA32-NEXT: lu12i.w $a2, 524287 +; LA32-NEXT: ori $a2, $a2, 4095 +; LA32-NEXT: and $a1, $a1, $a2 +; LA32-NEXT: or $a0, $a0, $a1 +; LA32-NEXT: beq $a0, $zero, .LBB116_2 +; LA32-NEXT: # %bb.1: +; LA32-NEXT: b bar +; LA32-NEXT: .LBB116_2: +; LA32-NEXT: ret +; +; LA64-LABEL: bit_63_1_nz_branch_i64: +; LA64: # %bb.0: +; LA64-NEXT: slli.d $a0, $a0, 1 +; LA64-NEXT: beqz $a0, .LBB116_2 +; LA64-NEXT: # %bb.1: +; LA64-NEXT: pcaddu18i $t8, %call36(bar) +; LA64-NEXT: jr $t8 +; LA64-NEXT: .LBB116_2: +; LA64-NEXT: ret + %2 = and i64 %0, 9223372036854775807 + %3 = icmp ne i64 %2, 0 + br i1 %3, label %4, label %5 + +4: + tail call void @bar() + br label %5 + +5: + ret void +} + +define void @bit_64_1_z_branch_i64(i64 %0) { +; LA32-LABEL: bit_64_1_z_branch_i64: +; LA32: # %bb.0: +; LA32-NEXT: or $a0, $a0, $a1 +; LA32-NEXT: beq $a0, $zero, .LBB117_2 +; LA32-NEXT: # %bb.1: +; LA32-NEXT: ret +; LA32-NEXT: .LBB117_2: +; LA32-NEXT: b bar +; +; LA64-LABEL: bit_64_1_z_branch_i64: +; LA64: # %bb.0: +; LA64-NEXT: beqz $a0, .LBB117_2 +; LA64-NEXT: # %bb.1: +; LA64-NEXT: ret +; LA64-NEXT: .LBB117_2: +; LA64-NEXT: pcaddu18i $t8, %call36(bar) +; LA64-NEXT: jr $t8 + %2 = and i64 %0, 18446744073709551615 + %3 = icmp eq i64 %2, 0 + br i1 %3, label %4, label %5 + +4: + tail call void @bar() + br label %5 + +5: + ret void +} + +define void @bit_64_1_nz_branch_i64(i64 %0) { +; LA32-LABEL: bit_64_1_nz_branch_i64: +; LA32: # %bb.0: +; LA32-NEXT: or $a0, $a0, $a1 +; LA32-NEXT: beq $a0, $zero, .LBB118_2 +; LA32-NEXT: # %bb.1: +; LA32-NEXT: b bar +; LA32-NEXT: .LBB118_2: +; LA32-NEXT: ret +; +; LA64-LABEL: bit_64_1_nz_branch_i64: +; LA64: # %bb.0: +; LA64-NEXT: beqz $a0, .LBB118_2 +; LA64-NEXT: # %bb.1: +; LA64-NEXT: pcaddu18i $t8, %call36(bar) +; LA64-NEXT: jr $t8 +; LA64-NEXT: .LBB118_2: +; LA64-NEXT: ret + %2 = and i64 %0, 18446744073709551615 + %3 = icmp ne i64 %2, 0 + br i1 %3, label %4, label %5 + +4: + tail call void @bar() + br label %5 + +5: + ret void +} diff --git a/llvm/test/CodeGen/LoongArch/calling-conv-preserve-most.ll b/llvm/test/CodeGen/LoongArch/calling-conv-preserve-most.ll new file mode 100644 index 000000000000..26c53d4ef824 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/calling-conv-preserve-most.ll @@ -0,0 +1,321 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=loongarch32 < %s | FileCheck %s -check-prefix=LA32 +; RUN: llc -mtriple=loongarch64 < %s | FileCheck %s -check-prefix=LA64 + +; Check the PreserveMost calling convention works. + +declare void @standard_cc_func() +declare preserve_mostcc void @preserve_mostcc_func() + +define preserve_mostcc void @preserve_mostcc1() nounwind { +; LA32-LABEL: preserve_mostcc1: +; LA32: # %bb.0: # %entry +; LA32-NEXT: addi.w $sp, $sp, -64 +; LA32-NEXT: st.w $ra, $sp, 60 # 4-byte Folded Spill +; LA32-NEXT: st.w $a0, $sp, 56 # 4-byte Folded Spill +; LA32-NEXT: st.w $a1, $sp, 52 # 4-byte Folded Spill +; LA32-NEXT: st.w $a2, $sp, 48 # 4-byte Folded Spill +; LA32-NEXT: st.w $a3, $sp, 44 # 4-byte Folded Spill +; LA32-NEXT: st.w $a4, $sp, 40 # 4-byte Folded Spill +; LA32-NEXT: st.w $a5, $sp, 36 # 4-byte Folded Spill +; LA32-NEXT: st.w $a6, $sp, 32 # 4-byte Folded Spill +; LA32-NEXT: st.w $a7, $sp, 28 # 4-byte Folded Spill +; LA32-NEXT: st.w $t4, $sp, 24 # 4-byte Folded Spill +; LA32-NEXT: st.w $t5, $sp, 20 # 4-byte Folded Spill +; LA32-NEXT: st.w $t6, $sp, 16 # 4-byte Folded Spill +; LA32-NEXT: st.w $t7, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: bl standard_cc_func +; LA32-NEXT: ld.w $t7, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: ld.w $t6, $sp, 16 # 4-byte Folded Reload +; LA32-NEXT: ld.w $t5, $sp, 20 # 4-byte Folded Reload +; LA32-NEXT: ld.w $t4, $sp, 24 # 4-byte Folded Reload +; LA32-NEXT: ld.w $a7, $sp, 28 # 4-byte Folded Reload +; LA32-NEXT: ld.w $a6, $sp, 32 # 4-byte Folded Reload +; LA32-NEXT: ld.w $a5, $sp, 36 # 4-byte Folded Reload +; LA32-NEXT: ld.w $a4, $sp, 40 # 4-byte Folded Reload +; LA32-NEXT: ld.w $a3, $sp, 44 # 4-byte Folded Reload +; LA32-NEXT: ld.w $a2, $sp, 48 # 4-byte Folded Reload +; LA32-NEXT: ld.w $a1, $sp, 52 # 4-byte Folded Reload +; LA32-NEXT: ld.w $a0, $sp, 56 # 4-byte Folded Reload +; LA32-NEXT: ld.w $ra, $sp, 60 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 64 +; LA32-NEXT: ret +; +; LA64-LABEL: preserve_mostcc1: +; LA64: # %bb.0: # %entry +; LA64-NEXT: addi.d $sp, $sp, -112 +; LA64-NEXT: st.d $ra, $sp, 104 # 8-byte Folded Spill +; LA64-NEXT: st.d $a0, $sp, 96 # 8-byte Folded Spill +; LA64-NEXT: st.d $a1, $sp, 88 # 8-byte Folded Spill +; LA64-NEXT: st.d $a2, $sp, 80 # 8-byte Folded Spill +; LA64-NEXT: st.d $a3, $sp, 72 # 8-byte Folded Spill +; LA64-NEXT: st.d $a4, $sp, 64 # 8-byte Folded Spill +; LA64-NEXT: st.d $a5, $sp, 56 # 8-byte Folded Spill +; LA64-NEXT: st.d $a6, $sp, 48 # 8-byte Folded Spill +; LA64-NEXT: st.d $a7, $sp, 40 # 8-byte Folded Spill +; LA64-NEXT: st.d $t4, $sp, 32 # 8-byte Folded Spill +; LA64-NEXT: st.d $t5, $sp, 24 # 8-byte Folded Spill +; LA64-NEXT: st.d $t6, $sp, 16 # 8-byte Folded Spill +; LA64-NEXT: st.d $t7, $sp, 8 # 8-byte Folded Spill +; LA64-NEXT: pcaddu18i $ra, %call36(standard_cc_func) +; LA64-NEXT: jirl $ra, $ra, 0 +; LA64-NEXT: ld.d $t7, $sp, 8 # 8-byte Folded Reload +; LA64-NEXT: ld.d $t6, $sp, 16 # 8-byte Folded Reload +; LA64-NEXT: ld.d $t5, $sp, 24 # 8-byte Folded Reload +; LA64-NEXT: ld.d $t4, $sp, 32 # 8-byte Folded Reload +; LA64-NEXT: ld.d $a7, $sp, 40 # 8-byte Folded Reload +; LA64-NEXT: ld.d $a6, $sp, 48 # 8-byte Folded Reload +; LA64-NEXT: ld.d $a5, $sp, 56 # 8-byte Folded Reload +; LA64-NEXT: ld.d $a4, $sp, 64 # 8-byte Folded Reload +; LA64-NEXT: ld.d $a3, $sp, 72 # 8-byte Folded Reload +; LA64-NEXT: ld.d $a2, $sp, 80 # 8-byte Folded Reload +; LA64-NEXT: ld.d $a1, $sp, 88 # 8-byte Folded Reload +; LA64-NEXT: ld.d $a0, $sp, 96 # 8-byte Folded Reload +; LA64-NEXT: ld.d $ra, $sp, 104 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 112 +; LA64-NEXT: ret +entry: + call void @standard_cc_func() + ret void +} + +define preserve_mostcc void @preserve_mostcc2() nounwind { +; LA32-LABEL: preserve_mostcc2: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: bl preserve_mostcc_func +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; +; LA64-LABEL: preserve_mostcc2: +; LA64: # %bb.0: +; LA64-NEXT: addi.d $sp, $sp, -16 +; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NEXT: pcaddu18i $ra, %call36(preserve_mostcc_func) +; LA64-NEXT: jirl $ra, $ra, 0 +; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 16 +; LA64-NEXT: ret + call preserve_mostcc void @preserve_mostcc_func() + ret void +} + +define void @preserve_mostcc3() nounwind { +; LA32-LABEL: preserve_mostcc3: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: st.w $s0, $sp, 8 # 4-byte Folded Spill +; LA32-NEXT: st.w $s1, $sp, 4 # 4-byte Folded Spill +; LA32-NEXT: #APP +; LA32-NEXT: #NO_APP +; LA32-NEXT: move $a0, $t0 +; LA32-NEXT: #APP +; LA32-NEXT: #NO_APP +; LA32-NEXT: move $a1, $t1 +; LA32-NEXT: #APP +; LA32-NEXT: #NO_APP +; LA32-NEXT: move $a2, $t2 +; LA32-NEXT: #APP +; LA32-NEXT: #NO_APP +; LA32-NEXT: move $a3, $t3 +; LA32-NEXT: #APP +; LA32-NEXT: #NO_APP +; LA32-NEXT: move $a4, $t8 +; LA32-NEXT: #APP +; LA32-NEXT: #NO_APP +; LA32-NEXT: #APP +; LA32-NEXT: #NO_APP +; LA32-NEXT: bl preserve_mostcc_func +; LA32-NEXT: move $t0, $a0 +; LA32-NEXT: move $t1, $a1 +; LA32-NEXT: move $t2, $a2 +; LA32-NEXT: move $t3, $a3 +; LA32-NEXT: move $t8, $a4 +; LA32-NEXT: #APP +; LA32-NEXT: #NO_APP +; LA32-NEXT: ld.w $s1, $sp, 4 # 4-byte Folded Reload +; LA32-NEXT: ld.w $s0, $sp, 8 # 4-byte Folded Reload +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; +; LA64-LABEL: preserve_mostcc3: +; LA64: # %bb.0: +; LA64-NEXT: addi.d $sp, $sp, -96 +; LA64-NEXT: st.d $ra, $sp, 88 # 8-byte Folded Spill +; LA64-NEXT: st.d $s0, $sp, 80 # 8-byte Folded Spill +; LA64-NEXT: st.d $s1, $sp, 72 # 8-byte Folded Spill +; LA64-NEXT: fst.d $fs0, $sp, 64 # 8-byte Folded Spill +; LA64-NEXT: fst.d $fs1, $sp, 56 # 8-byte Folded Spill +; LA64-NEXT: fst.d $fs2, $sp, 48 # 8-byte Folded Spill +; LA64-NEXT: fst.d $fs3, $sp, 40 # 8-byte Folded Spill +; LA64-NEXT: fst.d $fs4, $sp, 32 # 8-byte Folded Spill +; LA64-NEXT: fst.d $fs5, $sp, 24 # 8-byte Folded Spill +; LA64-NEXT: fst.d $fs6, $sp, 16 # 8-byte Folded Spill +; LA64-NEXT: fst.d $fs7, $sp, 8 # 8-byte Folded Spill +; LA64-NEXT: #APP +; LA64-NEXT: #NO_APP +; LA64-NEXT: move $a0, $t0 +; LA64-NEXT: #APP +; LA64-NEXT: #NO_APP +; LA64-NEXT: move $a1, $t1 +; LA64-NEXT: #APP +; LA64-NEXT: #NO_APP +; LA64-NEXT: move $a2, $t2 +; LA64-NEXT: #APP +; LA64-NEXT: #NO_APP +; LA64-NEXT: move $a3, $t3 +; LA64-NEXT: #APP +; LA64-NEXT: #NO_APP +; LA64-NEXT: move $a4, $t8 +; LA64-NEXT: #APP +; LA64-NEXT: #NO_APP +; LA64-NEXT: #APP +; LA64-NEXT: #NO_APP +; LA64-NEXT: pcaddu18i $ra, %call36(preserve_mostcc_func) +; LA64-NEXT: jirl $ra, $ra, 0 +; LA64-NEXT: move $t0, $a0 +; LA64-NEXT: move $t1, $a1 +; LA64-NEXT: move $t2, $a2 +; LA64-NEXT: move $t3, $a3 +; LA64-NEXT: move $t8, $a4 +; LA64-NEXT: #APP +; LA64-NEXT: #NO_APP +; LA64-NEXT: fld.d $fs7, $sp, 8 # 8-byte Folded Reload +; LA64-NEXT: fld.d $fs6, $sp, 16 # 8-byte Folded Reload +; LA64-NEXT: fld.d $fs5, $sp, 24 # 8-byte Folded Reload +; LA64-NEXT: fld.d $fs4, $sp, 32 # 8-byte Folded Reload +; LA64-NEXT: fld.d $fs3, $sp, 40 # 8-byte Folded Reload +; LA64-NEXT: fld.d $fs2, $sp, 48 # 8-byte Folded Reload +; LA64-NEXT: fld.d $fs1, $sp, 56 # 8-byte Folded Reload +; LA64-NEXT: fld.d $fs0, $sp, 64 # 8-byte Folded Reload +; LA64-NEXT: ld.d $s1, $sp, 72 # 8-byte Folded Reload +; LA64-NEXT: ld.d $s0, $sp, 80 # 8-byte Folded Reload +; LA64-NEXT: ld.d $ra, $sp, 88 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 96 +; LA64-NEXT: ret + %1 = call i32 asm sideeffect "", "={r12}"() nounwind + %2 = call i32 asm sideeffect "", "={r13}"() nounwind + %3 = call i32 asm sideeffect "", "={r14}"() nounwind + %4 = call i32 asm sideeffect "", "={r15}"() nounwind + %5 = call i32 asm sideeffect "", "={r20}"() nounwind + %6 = call i32 asm sideeffect "", "={r23}"() nounwind + %7 = call i32 asm sideeffect "", "={r24}"() nounwind + call preserve_mostcc void @preserve_mostcc_func() + call void asm sideeffect "", "{r12},{r13},{r14},{r15},{r20},{r23},{r24}"(i32 %1, i32 %2, i32 %3, i32 %4, i32 %5, i32 %6, i32 %7) + ret void +} + +define void @preserve_mostcc4() nounwind { +; LA32-LABEL: preserve_mostcc4: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $sp, $sp, -32 +; LA32-NEXT: st.w $ra, $sp, 28 # 4-byte Folded Spill +; LA32-NEXT: st.w $fp, $sp, 24 # 4-byte Folded Spill +; LA32-NEXT: st.w $s0, $sp, 20 # 4-byte Folded Spill +; LA32-NEXT: st.w $s1, $sp, 16 # 4-byte Folded Spill +; LA32-NEXT: st.w $s2, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: st.w $s3, $sp, 8 # 4-byte Folded Spill +; LA32-NEXT: st.w $s4, $sp, 4 # 4-byte Folded Spill +; LA32-NEXT: st.w $s5, $sp, 0 # 4-byte Folded Spill +; LA32-NEXT: #APP +; LA32-NEXT: #NO_APP +; LA32-NEXT: move $fp, $t0 +; LA32-NEXT: #APP +; LA32-NEXT: #NO_APP +; LA32-NEXT: move $s2, $t1 +; LA32-NEXT: #APP +; LA32-NEXT: #NO_APP +; LA32-NEXT: move $s3, $t2 +; LA32-NEXT: #APP +; LA32-NEXT: #NO_APP +; LA32-NEXT: move $s4, $t3 +; LA32-NEXT: #APP +; LA32-NEXT: #NO_APP +; LA32-NEXT: move $s5, $t8 +; LA32-NEXT: #APP +; LA32-NEXT: #NO_APP +; LA32-NEXT: #APP +; LA32-NEXT: #NO_APP +; LA32-NEXT: bl standard_cc_func +; LA32-NEXT: move $t0, $fp +; LA32-NEXT: move $t1, $s2 +; LA32-NEXT: move $t2, $s3 +; LA32-NEXT: move $t3, $s4 +; LA32-NEXT: move $t8, $s5 +; LA32-NEXT: #APP +; LA32-NEXT: #NO_APP +; LA32-NEXT: ld.w $s5, $sp, 0 # 4-byte Folded Reload +; LA32-NEXT: ld.w $s4, $sp, 4 # 4-byte Folded Reload +; LA32-NEXT: ld.w $s3, $sp, 8 # 4-byte Folded Reload +; LA32-NEXT: ld.w $s2, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: ld.w $s1, $sp, 16 # 4-byte Folded Reload +; LA32-NEXT: ld.w $s0, $sp, 20 # 4-byte Folded Reload +; LA32-NEXT: ld.w $fp, $sp, 24 # 4-byte Folded Reload +; LA32-NEXT: ld.w $ra, $sp, 28 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 32 +; LA32-NEXT: ret +; +; LA64-LABEL: preserve_mostcc4: +; LA64: # %bb.0: +; LA64-NEXT: addi.d $sp, $sp, -80 +; LA64-NEXT: st.d $ra, $sp, 72 # 8-byte Folded Spill +; LA64-NEXT: st.d $fp, $sp, 64 # 8-byte Folded Spill +; LA64-NEXT: st.d $s0, $sp, 56 # 8-byte Folded Spill +; LA64-NEXT: st.d $s1, $sp, 48 # 8-byte Folded Spill +; LA64-NEXT: st.d $s2, $sp, 40 # 8-byte Folded Spill +; LA64-NEXT: st.d $s3, $sp, 32 # 8-byte Folded Spill +; LA64-NEXT: st.d $s4, $sp, 24 # 8-byte Folded Spill +; LA64-NEXT: st.d $s5, $sp, 16 # 8-byte Folded Spill +; LA64-NEXT: #APP +; LA64-NEXT: #NO_APP +; LA64-NEXT: move $fp, $t0 +; LA64-NEXT: #APP +; LA64-NEXT: #NO_APP +; LA64-NEXT: move $s2, $t1 +; LA64-NEXT: #APP +; LA64-NEXT: #NO_APP +; LA64-NEXT: move $s3, $t2 +; LA64-NEXT: #APP +; LA64-NEXT: #NO_APP +; LA64-NEXT: move $s4, $t3 +; LA64-NEXT: #APP +; LA64-NEXT: #NO_APP +; LA64-NEXT: move $s5, $t8 +; LA64-NEXT: #APP +; LA64-NEXT: #NO_APP +; LA64-NEXT: #APP +; LA64-NEXT: #NO_APP +; LA64-NEXT: pcaddu18i $ra, %call36(standard_cc_func) +; LA64-NEXT: jirl $ra, $ra, 0 +; LA64-NEXT: move $t0, $fp +; LA64-NEXT: move $t1, $s2 +; LA64-NEXT: move $t2, $s3 +; LA64-NEXT: move $t3, $s4 +; LA64-NEXT: move $t8, $s5 +; LA64-NEXT: #APP +; LA64-NEXT: #NO_APP +; LA64-NEXT: ld.d $s5, $sp, 16 # 8-byte Folded Reload +; LA64-NEXT: ld.d $s4, $sp, 24 # 8-byte Folded Reload +; LA64-NEXT: ld.d $s3, $sp, 32 # 8-byte Folded Reload +; LA64-NEXT: ld.d $s2, $sp, 40 # 8-byte Folded Reload +; LA64-NEXT: ld.d $s1, $sp, 48 # 8-byte Folded Reload +; LA64-NEXT: ld.d $s0, $sp, 56 # 8-byte Folded Reload +; LA64-NEXT: ld.d $fp, $sp, 64 # 8-byte Folded Reload +; LA64-NEXT: ld.d $ra, $sp, 72 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 80 +; LA64-NEXT: ret + %1 = call i32 asm sideeffect "", "={r12}"() nounwind + %2 = call i32 asm sideeffect "", "={r13}"() nounwind + %3 = call i32 asm sideeffect "", "={r14}"() nounwind + %4 = call i32 asm sideeffect "", "={r15}"() nounwind + %5 = call i32 asm sideeffect "", "={r20}"() nounwind + %6 = call i32 asm sideeffect "", "={r23}"() nounwind + %7 = call i32 asm sideeffect "", "={r24}"() nounwind + call void @standard_cc_func() + call void asm sideeffect "", "{r12},{r13},{r14},{r15},{r20},{r23},{r24}"(i32 %1, i32 %2, i32 %3, i32 %4, i32 %5, i32 %6, i32 %7) + ret void +} diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/fcmp-dbl.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/fcmp-dbl.ll index cff348493421..713af3fd9c84 100644 --- a/llvm/test/CodeGen/LoongArch/ir-instruction/fcmp-dbl.ll +++ b/llvm/test/CodeGen/LoongArch/ir-instruction/fcmp-dbl.ll @@ -263,8 +263,7 @@ define i1 @fcmp_fast_olt(double %a, double %b, i1 %c) nounwind { ; LA32-NEXT: movgr2fr.w $fa1, $zero ; LA32-NEXT: movgr2frh.w $fa1, $zero ; LA32-NEXT: fcmp.cle.d $fcc0, $fa1, $fa0 -; LA32-NEXT: movcf2gr $a1, $fcc0 -; LA32-NEXT: bnez $a1, .LBB16_2 +; LA32-NEXT: bcnez $fcc0, .LBB16_2 ; LA32-NEXT: # %bb.1: # %if.then ; LA32-NEXT: ret ; LA32-NEXT: .LBB16_2: # %if.else @@ -276,8 +275,7 @@ define i1 @fcmp_fast_olt(double %a, double %b, i1 %c) nounwind { ; LA64: # %bb.0: ; LA64-NEXT: movgr2fr.d $fa1, $zero ; LA64-NEXT: fcmp.cle.d $fcc0, $fa1, $fa0 -; LA64-NEXT: movcf2gr $a1, $fcc0 -; LA64-NEXT: bnez $a1, .LBB16_2 +; LA64-NEXT: bcnez $fcc0, .LBB16_2 ; LA64-NEXT: # %bb.1: # %if.then ; LA64-NEXT: ret ; LA64-NEXT: .LBB16_2: # %if.else @@ -300,9 +298,7 @@ define i1 @fcmp_fast_oeq(double %a, double %b, i1 %c) nounwind { ; LA32-NEXT: movgr2fr.w $fa1, $zero ; LA32-NEXT: movgr2frh.w $fa1, $zero ; LA32-NEXT: fcmp.ceq.d $fcc0, $fa0, $fa1 -; LA32-NEXT: movcf2gr $a1, $fcc0 -; LA32-NEXT: xori $a1, $a1, 1 -; LA32-NEXT: bnez $a1, .LBB17_2 +; LA32-NEXT: bceqz $fcc0, .LBB17_2 ; LA32-NEXT: # %bb.1: # %if.then ; LA32-NEXT: ret ; LA32-NEXT: .LBB17_2: # %if.else @@ -313,9 +309,7 @@ define i1 @fcmp_fast_oeq(double %a, double %b, i1 %c) nounwind { ; LA64: # %bb.0: ; LA64-NEXT: movgr2fr.d $fa1, $zero ; LA64-NEXT: fcmp.ceq.d $fcc0, $fa0, $fa1 -; LA64-NEXT: movcf2gr $a1, $fcc0 -; LA64-NEXT: xori $a1, $a1, 1 -; LA64-NEXT: bnez $a1, .LBB17_2 +; LA64-NEXT: bceqz $fcc0, .LBB17_2 ; LA64-NEXT: # %bb.1: # %if.then ; LA64-NEXT: ret ; LA64-NEXT: .LBB17_2: # %if.else diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/fcmp-flt.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/fcmp-flt.ll index 8b682ecac50f..4a97f693fafd 100644 --- a/llvm/test/CodeGen/LoongArch/ir-instruction/fcmp-flt.ll +++ b/llvm/test/CodeGen/LoongArch/ir-instruction/fcmp-flt.ll @@ -262,8 +262,7 @@ define i1 @fcmp_fast_olt(float %a, float %b, i1 %c) nounwind { ; LA32: # %bb.0: ; LA32-NEXT: movgr2fr.w $fa1, $zero ; LA32-NEXT: fcmp.cle.s $fcc0, $fa1, $fa0 -; LA32-NEXT: movcf2gr $a1, $fcc0 -; LA32-NEXT: bnez $a1, .LBB16_2 +; LA32-NEXT: bcnez $fcc0, .LBB16_2 ; LA32-NEXT: # %bb.1: # %if.then ; LA32-NEXT: ret ; LA32-NEXT: .LBB16_2: # %if.else @@ -275,8 +274,7 @@ define i1 @fcmp_fast_olt(float %a, float %b, i1 %c) nounwind { ; LA64: # %bb.0: ; LA64-NEXT: movgr2fr.w $fa1, $zero ; LA64-NEXT: fcmp.cle.s $fcc0, $fa1, $fa0 -; LA64-NEXT: movcf2gr $a1, $fcc0 -; LA64-NEXT: bnez $a1, .LBB16_2 +; LA64-NEXT: bcnez $fcc0, .LBB16_2 ; LA64-NEXT: # %bb.1: # %if.then ; LA64-NEXT: ret ; LA64-NEXT: .LBB16_2: # %if.else @@ -298,9 +296,7 @@ define i1 @fcmp_fast_oeq(float %a, float %b, i1 %c) nounwind { ; LA32: # %bb.0: ; LA32-NEXT: movgr2fr.w $fa1, $zero ; LA32-NEXT: fcmp.ceq.s $fcc0, $fa0, $fa1 -; LA32-NEXT: movcf2gr $a1, $fcc0 -; LA32-NEXT: xori $a1, $a1, 1 -; LA32-NEXT: bnez $a1, .LBB17_2 +; LA32-NEXT: bceqz $fcc0, .LBB17_2 ; LA32-NEXT: # %bb.1: # %if.then ; LA32-NEXT: ret ; LA32-NEXT: .LBB17_2: # %if.else @@ -311,9 +307,7 @@ define i1 @fcmp_fast_oeq(float %a, float %b, i1 %c) nounwind { ; LA64: # %bb.0: ; LA64-NEXT: movgr2fr.w $fa1, $zero ; LA64-NEXT: fcmp.ceq.s $fcc0, $fa0, $fa1 -; LA64-NEXT: movcf2gr $a1, $fcc0 -; LA64-NEXT: xori $a1, $a1, 1 -; LA64-NEXT: bnez $a1, .LBB17_2 +; LA64-NEXT: bceqz $fcc0, .LBB17_2 ; LA64-NEXT: # %bb.1: # %if.then ; LA64-NEXT: ret ; LA64-NEXT: .LBB17_2: # %if.else diff --git a/llvm/test/CodeGen/LoongArch/lasx/broadcast-load.ll b/llvm/test/CodeGen/LoongArch/lasx/broadcast-load.ll index 976924bdca68..4aa2bd76ab80 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/broadcast-load.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/broadcast-load.ll @@ -1,16 +1,31 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 -; RUN: llc --mtriple=loongarch64 -mattr=+lasx < %s | FileCheck %s +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s --check-prefixes=CHECK,LA32 +; RUN: llc --mtriple=loongarch64 -mattr=+lasx < %s | FileCheck %s --check-prefixes=CHECK,LA64 -; TODO: Load a element and splat it to a vector could be lowerd to xvldrepl -; A load has more than one user shouldn't be lowered to xvldrepl define <4 x i64> @should_not_be_optimized(ptr %ptr, ptr %dst) { -; CHECK-LABEL: should_not_be_optimized: -; CHECK: # %bb.0: -; CHECK-NEXT: ld.d $a0, $a0, 0 -; CHECK-NEXT: xvreplgr2vr.d $xr0, $a0 -; CHECK-NEXT: st.d $a0, $a1, 0 -; CHECK-NEXT: ret +; LA32-LABEL: should_not_be_optimized: +; LA32: # %bb.0: +; LA32-NEXT: ld.w $a2, $a0, 0 +; LA32-NEXT: ld.w $a0, $a0, 4 +; LA32-NEXT: st.w $a2, $a1, 0 +; LA32-NEXT: xvinsgr2vr.w $xr0, $a2, 0 +; LA32-NEXT: xvinsgr2vr.w $xr0, $a0, 1 +; LA32-NEXT: xvinsgr2vr.w $xr0, $a2, 2 +; LA32-NEXT: xvinsgr2vr.w $xr0, $a0, 3 +; LA32-NEXT: xvinsgr2vr.w $xr0, $a2, 4 +; LA32-NEXT: xvinsgr2vr.w $xr0, $a0, 5 +; LA32-NEXT: xvinsgr2vr.w $xr0, $a2, 6 +; LA32-NEXT: xvinsgr2vr.w $xr0, $a0, 7 +; LA32-NEXT: st.w $a0, $a1, 4 +; LA32-NEXT: ret +; +; LA64-LABEL: should_not_be_optimized: +; LA64: # %bb.0: +; LA64-NEXT: ld.d $a0, $a0, 0 +; LA64-NEXT: xvreplgr2vr.d $xr0, $a0 +; LA64-NEXT: st.d $a0, $a1, 0 +; LA64-NEXT: ret %tmp = load i64, ptr %ptr store i64 %tmp, ptr %dst %tmp1 = insertelement <4 x i64> zeroinitializer, i64 %tmp, i32 0 @@ -18,12 +33,52 @@ define <4 x i64> @should_not_be_optimized(ptr %ptr, ptr %dst) { ret <4 x i64> %tmp2 } -define <4 x i64> @xvldrepl_d_unaligned_offset(ptr %ptr) { -; CHECK-LABEL: xvldrepl_d_unaligned_offset: +define <16 x i16> @should_not_be_optimized_sext_load(ptr %ptr) { +; CHECK-LABEL: should_not_be_optimized_sext_load: ; CHECK: # %bb.0: -; CHECK-NEXT: addi.d $a0, $a0, 4 -; CHECK-NEXT: xvldrepl.d $xr0, $a0, 0 +; CHECK-NEXT: ld.b $a0, $a0, 0 +; CHECK-NEXT: xvreplgr2vr.h $xr0, $a0 +; CHECK-NEXT: ret + %tmp = load i8, ptr %ptr + %tmp1 = sext i8 %tmp to i16 + %tmp2 = insertelement <16 x i16> zeroinitializer, i16 %tmp1, i32 0 + %tmp3 = shufflevector <16 x i16> %tmp2, <16 x i16> poison, <16 x i32> zeroinitializer + ret <16 x i16> %tmp3 +} + +define <16 x i16> @should_not_be_optimized_zext_load(ptr %ptr) { +; CHECK-LABEL: should_not_be_optimized_zext_load: +; CHECK: # %bb.0: +; CHECK-NEXT: ld.bu $a0, $a0, 0 +; CHECK-NEXT: xvreplgr2vr.h $xr0, $a0 ; CHECK-NEXT: ret + %tmp = load i8, ptr %ptr + %tmp1 = zext i8 %tmp to i16 + %tmp2 = insertelement <16 x i16> zeroinitializer, i16 %tmp1, i32 0 + %tmp3 = shufflevector <16 x i16> %tmp2, <16 x i16> poison, <16 x i32> zeroinitializer + ret <16 x i16> %tmp3 +} + +define <4 x i64> @xvldrepl_d_unaligned_offset(ptr %ptr) { +; LA32-LABEL: xvldrepl_d_unaligned_offset: +; LA32: # %bb.0: +; LA32-NEXT: ld.w $a1, $a0, 4 +; LA32-NEXT: ld.w $a0, $a0, 8 +; LA32-NEXT: xvinsgr2vr.w $xr0, $a1, 0 +; LA32-NEXT: xvinsgr2vr.w $xr0, $a0, 1 +; LA32-NEXT: xvinsgr2vr.w $xr0, $a1, 2 +; LA32-NEXT: xvinsgr2vr.w $xr0, $a0, 3 +; LA32-NEXT: xvinsgr2vr.w $xr0, $a1, 4 +; LA32-NEXT: xvinsgr2vr.w $xr0, $a0, 5 +; LA32-NEXT: xvinsgr2vr.w $xr0, $a1, 6 +; LA32-NEXT: xvinsgr2vr.w $xr0, $a0, 7 +; LA32-NEXT: ret +; +; LA64-LABEL: xvldrepl_d_unaligned_offset: +; LA64: # %bb.0: +; LA64-NEXT: addi.d $a0, $a0, 4 +; LA64-NEXT: xvldrepl.d $xr0, $a0, 0 +; LA64-NEXT: ret %p = getelementptr i32, ptr %ptr, i32 1 %tmp = load i64, ptr %p %tmp1 = insertelement <4 x i64> zeroinitializer, i64 %tmp, i32 0 @@ -103,10 +158,24 @@ define <8 x i32> @xvldrepl_w_offset(ptr %ptr) { define <4 x i64> @xvldrepl_d(ptr %ptr) { -; CHECK-LABEL: xvldrepl_d: -; CHECK: # %bb.0: -; CHECK-NEXT: xvldrepl.d $xr0, $a0, 0 -; CHECK-NEXT: ret +; LA32-LABEL: xvldrepl_d: +; LA32: # %bb.0: +; LA32-NEXT: ld.w $a1, $a0, 0 +; LA32-NEXT: ld.w $a0, $a0, 4 +; LA32-NEXT: xvinsgr2vr.w $xr0, $a1, 0 +; LA32-NEXT: xvinsgr2vr.w $xr0, $a0, 1 +; LA32-NEXT: xvinsgr2vr.w $xr0, $a1, 2 +; LA32-NEXT: xvinsgr2vr.w $xr0, $a0, 3 +; LA32-NEXT: xvinsgr2vr.w $xr0, $a1, 4 +; LA32-NEXT: xvinsgr2vr.w $xr0, $a0, 5 +; LA32-NEXT: xvinsgr2vr.w $xr0, $a1, 6 +; LA32-NEXT: xvinsgr2vr.w $xr0, $a0, 7 +; LA32-NEXT: ret +; +; LA64-LABEL: xvldrepl_d: +; LA64: # %bb.0: +; LA64-NEXT: xvldrepl.d $xr0, $a0, 0 +; LA64-NEXT: ret %tmp = load i64, ptr %ptr %tmp1 = insertelement <4 x i64> zeroinitializer, i64 %tmp, i32 0 %tmp2 = shufflevector <4 x i64> %tmp1, <4 x i64> poison, <4 x i32> zeroinitializer @@ -114,10 +183,24 @@ define <4 x i64> @xvldrepl_d(ptr %ptr) { } define <4 x i64> @xvldrepl_d_offset(ptr %ptr) { -; CHECK-LABEL: xvldrepl_d_offset: -; CHECK: # %bb.0: -; CHECK-NEXT: xvldrepl.d $xr0, $a0, 264 -; CHECK-NEXT: ret +; LA32-LABEL: xvldrepl_d_offset: +; LA32: # %bb.0: +; LA32-NEXT: ld.w $a1, $a0, 264 +; LA32-NEXT: ld.w $a0, $a0, 268 +; LA32-NEXT: xvinsgr2vr.w $xr0, $a1, 0 +; LA32-NEXT: xvinsgr2vr.w $xr0, $a0, 1 +; LA32-NEXT: xvinsgr2vr.w $xr0, $a1, 2 +; LA32-NEXT: xvinsgr2vr.w $xr0, $a0, 3 +; LA32-NEXT: xvinsgr2vr.w $xr0, $a1, 4 +; LA32-NEXT: xvinsgr2vr.w $xr0, $a0, 5 +; LA32-NEXT: xvinsgr2vr.w $xr0, $a1, 6 +; LA32-NEXT: xvinsgr2vr.w $xr0, $a0, 7 +; LA32-NEXT: ret +; +; LA64-LABEL: xvldrepl_d_offset: +; LA64: # %bb.0: +; LA64-NEXT: xvldrepl.d $xr0, $a0, 264 +; LA64-NEXT: ret %p = getelementptr i64, ptr %ptr, i64 33 %tmp = load i64, ptr %p %tmp1 = insertelement <4 x i64> zeroinitializer, i64 %tmp, i32 0 diff --git a/llvm/test/CodeGen/LoongArch/lasx/bswap.ll b/llvm/test/CodeGen/LoongArch/lasx/bswap.ll index 1b0132d25ed5..a4c9abac7dcc 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/bswap.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/bswap.ll @@ -1,4 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s ; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s define void @bswap_v16i16(ptr %src, ptr %dst) nounwind { diff --git a/llvm/test/CodeGen/LoongArch/lasx/build-vector.ll b/llvm/test/CodeGen/LoongArch/lasx/build-vector.ll index cc9ad633b427..23245726c896 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/build-vector.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/build-vector.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3 -; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s --check-prefixes=CHECK,LA32 +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s --check-prefixes=CHECK,LA64 define void @buildvector_v32i8_splat(ptr %dst, i8 %a0) nounwind { ; CHECK-LABEL: buildvector_v32i8_splat: @@ -41,11 +42,24 @@ entry: } define void @buildvector_v4i64_splat(ptr %dst, i64 %a0) nounwind { -; CHECK-LABEL: buildvector_v4i64_splat: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: xvreplgr2vr.d $xr0, $a1 -; CHECK-NEXT: xvst $xr0, $a0, 0 -; CHECK-NEXT: ret +; LA32-LABEL: buildvector_v4i64_splat: +; LA32: # %bb.0: # %entry +; LA32-NEXT: xvinsgr2vr.w $xr0, $a1, 0 +; LA32-NEXT: xvinsgr2vr.w $xr0, $a2, 1 +; LA32-NEXT: xvinsgr2vr.w $xr0, $a1, 2 +; LA32-NEXT: xvinsgr2vr.w $xr0, $a2, 3 +; LA32-NEXT: xvinsgr2vr.w $xr0, $a1, 4 +; LA32-NEXT: xvinsgr2vr.w $xr0, $a2, 5 +; LA32-NEXT: xvinsgr2vr.w $xr0, $a1, 6 +; LA32-NEXT: xvinsgr2vr.w $xr0, $a2, 7 +; LA32-NEXT: xvst $xr0, $a0, 0 +; LA32-NEXT: ret +; +; LA64-LABEL: buildvector_v4i64_splat: +; LA64: # %bb.0: # %entry +; LA64-NEXT: xvreplgr2vr.d $xr0, $a1 +; LA64-NEXT: xvst $xr0, $a0, 0 +; LA64-NEXT: ret entry: %insert = insertelement <4 x i64> undef, i64 %a0, i8 0 %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer @@ -138,12 +152,19 @@ entry: } define void @buildvector_v4f64_const_splat(ptr %dst) nounwind { -; CHECK-LABEL: buildvector_v4f64_const_splat: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: lu52i.d $a1, $zero, 1023 -; CHECK-NEXT: xvreplgr2vr.d $xr0, $a1 -; CHECK-NEXT: xvst $xr0, $a0, 0 -; CHECK-NEXT: ret +; LA32-LABEL: buildvector_v4f64_const_splat: +; LA32: # %bb.0: # %entry +; LA32-NEXT: pcalau12i $a1, %pc_hi20(.LCPI11_0) +; LA32-NEXT: xvld $xr0, $a1, %pc_lo12(.LCPI11_0) +; LA32-NEXT: xvst $xr0, $a0, 0 +; LA32-NEXT: ret +; +; LA64-LABEL: buildvector_v4f64_const_splat: +; LA64: # %bb.0: # %entry +; LA64-NEXT: lu52i.d $a1, $zero, 1023 +; LA64-NEXT: xvreplgr2vr.d $xr0, $a1 +; LA64-NEXT: xvst $xr0, $a0, 0 +; LA64-NEXT: ret entry: store <4 x double> <double 1.0, double 1.0, double 1.0, double 1.0>, ptr %dst ret void @@ -222,147 +243,289 @@ entry: } define void @buildvector_v32i8(ptr %dst, i8 %a0, i8 %a1, i8 %a2, i8 %a3, i8 %a4, i8 %a5, i8 %a6, i8 %a7, i8 %a8, i8 %a9, i8 %a10, i8 %a11, i8 %a12, i8 %a13, i8 %a14, i8 %a15, i8 %a16, i8 %a17, i8 %a18, i8 %a19, i8 %a20, i8 %a21, i8 %a22, i8 %a23, i8 %a24, i8 %a25, i8 %a26, i8 %a27, i8 %a28, i8 %a29, i8 %a30, i8 %a31) nounwind { -; CHECK-LABEL: buildvector_v32i8: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addi.d $sp, $sp, -80 -; CHECK-NEXT: fst.d $fs0, $sp, 72 # 8-byte Folded Spill -; CHECK-NEXT: fst.d $fs1, $sp, 64 # 8-byte Folded Spill -; CHECK-NEXT: fst.d $fs2, $sp, 56 # 8-byte Folded Spill -; CHECK-NEXT: fst.d $fs3, $sp, 48 # 8-byte Folded Spill -; CHECK-NEXT: fst.d $fs4, $sp, 40 # 8-byte Folded Spill -; CHECK-NEXT: fst.d $fs5, $sp, 32 # 8-byte Folded Spill -; CHECK-NEXT: fst.d $fs6, $sp, 24 # 8-byte Folded Spill -; CHECK-NEXT: fst.d $fs7, $sp, 16 # 8-byte Folded Spill -; CHECK-NEXT: vinsgr2vr.b $vr0, $a1, 0 -; CHECK-NEXT: xvreplgr2vr.b $xr1, $a2 -; CHECK-NEXT: xvreplgr2vr.b $xr2, $a3 -; CHECK-NEXT: xvreplgr2vr.b $xr3, $a4 -; CHECK-NEXT: ld.b $a1, $sp, 264 -; CHECK-NEXT: xvreplgr2vr.b $xr4, $a5 -; CHECK-NEXT: ld.b $a2, $sp, 80 -; CHECK-NEXT: xvreplgr2vr.b $xr5, $a6 -; CHECK-NEXT: ld.b $a3, $sp, 88 -; CHECK-NEXT: xvreplgr2vr.b $xr6, $a7 -; CHECK-NEXT: ld.b $a4, $sp, 96 -; CHECK-NEXT: xvreplgr2vr.b $xr7, $a2 -; CHECK-NEXT: ld.b $a2, $sp, 104 -; CHECK-NEXT: xvreplgr2vr.b $xr8, $a3 -; CHECK-NEXT: ld.b $a3, $sp, 112 -; CHECK-NEXT: xvreplgr2vr.b $xr9, $a4 -; CHECK-NEXT: ld.b $a4, $sp, 120 -; CHECK-NEXT: xvreplgr2vr.b $xr10, $a2 -; CHECK-NEXT: ld.b $a2, $sp, 128 -; CHECK-NEXT: xvreplgr2vr.b $xr11, $a3 -; CHECK-NEXT: ld.b $a3, $sp, 136 -; CHECK-NEXT: xvreplgr2vr.b $xr12, $a4 -; CHECK-NEXT: ld.b $a4, $sp, 144 -; CHECK-NEXT: xvreplgr2vr.b $xr13, $a2 -; CHECK-NEXT: ld.b $a2, $sp, 152 -; CHECK-NEXT: xvreplgr2vr.b $xr14, $a3 -; CHECK-NEXT: ld.b $a3, $sp, 160 -; CHECK-NEXT: xvreplgr2vr.b $xr15, $a4 -; CHECK-NEXT: ld.b $a4, $sp, 168 -; CHECK-NEXT: xvreplgr2vr.b $xr16, $a2 -; CHECK-NEXT: ld.b $a2, $sp, 176 -; CHECK-NEXT: xvreplgr2vr.b $xr17, $a3 -; CHECK-NEXT: ld.b $a3, $sp, 184 -; CHECK-NEXT: xvreplgr2vr.b $xr18, $a4 -; CHECK-NEXT: ld.b $a4, $sp, 192 -; CHECK-NEXT: xvreplgr2vr.b $xr19, $a2 -; CHECK-NEXT: ld.b $a2, $sp, 200 -; CHECK-NEXT: xvreplgr2vr.b $xr20, $a3 -; CHECK-NEXT: ld.b $a3, $sp, 208 -; CHECK-NEXT: xvreplgr2vr.b $xr21, $a4 -; CHECK-NEXT: ld.b $a4, $sp, 216 -; CHECK-NEXT: xvreplgr2vr.b $xr22, $a2 -; CHECK-NEXT: ld.b $a2, $sp, 224 -; CHECK-NEXT: xvreplgr2vr.b $xr23, $a3 -; CHECK-NEXT: ld.b $a3, $sp, 232 -; CHECK-NEXT: xvreplgr2vr.b $xr24, $a4 -; CHECK-NEXT: ld.b $a4, $sp, 240 -; CHECK-NEXT: xvreplgr2vr.b $xr25, $a2 -; CHECK-NEXT: ld.b $a2, $sp, 248 -; CHECK-NEXT: xvreplgr2vr.b $xr26, $a3 -; CHECK-NEXT: ld.b $a3, $sp, 256 -; CHECK-NEXT: xvreplgr2vr.b $xr27, $a4 -; CHECK-NEXT: ld.b $a4, $sp, 272 -; CHECK-NEXT: xvreplgr2vr.b $xr28, $a2 -; CHECK-NEXT: xvreplgr2vr.b $xr29, $a3 -; CHECK-NEXT: xvreplgr2vr.b $xr30, $a1 -; CHECK-NEXT: xvreplgr2vr.b $xr31, $a4 -; CHECK-NEXT: xvpermi.q $xr1, $xr0, 18 -; CHECK-NEXT: xvextrins.b $xr0, $xr1, 17 -; CHECK-NEXT: xvpermi.q $xr2, $xr0, 18 -; CHECK-NEXT: xvextrins.b $xr0, $xr2, 34 -; CHECK-NEXT: xvpermi.q $xr3, $xr0, 18 -; CHECK-NEXT: xvextrins.b $xr0, $xr3, 51 -; CHECK-NEXT: xvpermi.q $xr4, $xr0, 18 -; CHECK-NEXT: xvextrins.b $xr0, $xr4, 68 -; CHECK-NEXT: xvpermi.q $xr5, $xr0, 18 -; CHECK-NEXT: xvextrins.b $xr0, $xr5, 85 -; CHECK-NEXT: xvpermi.q $xr6, $xr0, 18 -; CHECK-NEXT: xvextrins.b $xr0, $xr6, 102 -; CHECK-NEXT: xvpermi.q $xr7, $xr0, 18 -; CHECK-NEXT: xvextrins.b $xr0, $xr7, 119 -; CHECK-NEXT: xvpermi.q $xr8, $xr0, 18 -; CHECK-NEXT: xvextrins.b $xr0, $xr8, 136 -; CHECK-NEXT: xvpermi.q $xr9, $xr0, 18 -; CHECK-NEXT: xvextrins.b $xr0, $xr9, 153 -; CHECK-NEXT: xvpermi.q $xr10, $xr0, 18 -; CHECK-NEXT: xvextrins.b $xr0, $xr10, 170 -; CHECK-NEXT: xvpermi.q $xr11, $xr0, 18 -; CHECK-NEXT: xvextrins.b $xr0, $xr11, 187 -; CHECK-NEXT: xvpermi.q $xr12, $xr0, 18 -; CHECK-NEXT: xvextrins.b $xr0, $xr12, 204 -; CHECK-NEXT: xvpermi.q $xr13, $xr0, 18 -; CHECK-NEXT: xvextrins.b $xr0, $xr13, 221 -; CHECK-NEXT: xvpermi.q $xr14, $xr0, 18 -; CHECK-NEXT: xvextrins.b $xr0, $xr14, 238 -; CHECK-NEXT: xvpermi.q $xr15, $xr0, 18 -; CHECK-NEXT: xvextrins.b $xr0, $xr15, 255 -; CHECK-NEXT: xvpermi.q $xr16, $xr0, 48 -; CHECK-NEXT: xvextrins.b $xr0, $xr16, 0 -; CHECK-NEXT: xvpermi.q $xr17, $xr0, 48 -; CHECK-NEXT: xvextrins.b $xr0, $xr17, 17 -; CHECK-NEXT: xvpermi.q $xr18, $xr0, 48 -; CHECK-NEXT: xvextrins.b $xr0, $xr18, 34 -; CHECK-NEXT: xvpermi.q $xr19, $xr0, 48 -; CHECK-NEXT: xvextrins.b $xr0, $xr19, 51 -; CHECK-NEXT: xvpermi.q $xr20, $xr0, 48 -; CHECK-NEXT: xvextrins.b $xr0, $xr20, 68 -; CHECK-NEXT: xvpermi.q $xr21, $xr0, 48 -; CHECK-NEXT: xvextrins.b $xr0, $xr21, 85 -; CHECK-NEXT: xvpermi.q $xr22, $xr0, 48 -; CHECK-NEXT: xvextrins.b $xr0, $xr22, 102 -; CHECK-NEXT: xvpermi.q $xr23, $xr0, 48 -; CHECK-NEXT: xvextrins.b $xr0, $xr23, 119 -; CHECK-NEXT: xvpermi.q $xr24, $xr0, 48 -; CHECK-NEXT: xvextrins.b $xr0, $xr24, 136 -; CHECK-NEXT: xvpermi.q $xr25, $xr0, 48 -; CHECK-NEXT: xvextrins.b $xr0, $xr25, 153 -; CHECK-NEXT: xvpermi.q $xr26, $xr0, 48 -; CHECK-NEXT: xvextrins.b $xr0, $xr26, 170 -; CHECK-NEXT: xvpermi.q $xr27, $xr0, 48 -; CHECK-NEXT: xvextrins.b $xr0, $xr27, 187 -; CHECK-NEXT: xvpermi.q $xr28, $xr0, 48 -; CHECK-NEXT: xvextrins.b $xr0, $xr28, 204 -; CHECK-NEXT: xvpermi.q $xr29, $xr0, 48 -; CHECK-NEXT: xvextrins.b $xr0, $xr29, 221 -; CHECK-NEXT: xvpermi.q $xr30, $xr0, 48 -; CHECK-NEXT: xvextrins.b $xr0, $xr30, 238 -; CHECK-NEXT: xvpermi.q $xr31, $xr0, 48 -; CHECK-NEXT: xvextrins.b $xr0, $xr31, 255 -; CHECK-NEXT: xvst $xr0, $a0, 0 -; CHECK-NEXT: fld.d $fs7, $sp, 16 # 8-byte Folded Reload -; CHECK-NEXT: fld.d $fs6, $sp, 24 # 8-byte Folded Reload -; CHECK-NEXT: fld.d $fs5, $sp, 32 # 8-byte Folded Reload -; CHECK-NEXT: fld.d $fs4, $sp, 40 # 8-byte Folded Reload -; CHECK-NEXT: fld.d $fs3, $sp, 48 # 8-byte Folded Reload -; CHECK-NEXT: fld.d $fs2, $sp, 56 # 8-byte Folded Reload -; CHECK-NEXT: fld.d $fs1, $sp, 64 # 8-byte Folded Reload -; CHECK-NEXT: fld.d $fs0, $sp, 72 # 8-byte Folded Reload -; CHECK-NEXT: addi.d $sp, $sp, 80 -; CHECK-NEXT: ret +; LA32-LABEL: buildvector_v32i8: +; LA32: # %bb.0: # %entry +; LA32-NEXT: addi.w $sp, $sp, -80 +; LA32-NEXT: fst.d $fs0, $sp, 72 # 8-byte Folded Spill +; LA32-NEXT: fst.d $fs1, $sp, 64 # 8-byte Folded Spill +; LA32-NEXT: fst.d $fs2, $sp, 56 # 8-byte Folded Spill +; LA32-NEXT: fst.d $fs3, $sp, 48 # 8-byte Folded Spill +; LA32-NEXT: fst.d $fs4, $sp, 40 # 8-byte Folded Spill +; LA32-NEXT: fst.d $fs5, $sp, 32 # 8-byte Folded Spill +; LA32-NEXT: fst.d $fs6, $sp, 24 # 8-byte Folded Spill +; LA32-NEXT: fst.d $fs7, $sp, 16 # 8-byte Folded Spill +; LA32-NEXT: vinsgr2vr.b $vr0, $a1, 0 +; LA32-NEXT: xvreplgr2vr.b $xr1, $a2 +; LA32-NEXT: xvreplgr2vr.b $xr2, $a3 +; LA32-NEXT: xvreplgr2vr.b $xr3, $a4 +; LA32-NEXT: ld.b $a1, $sp, 172 +; LA32-NEXT: xvreplgr2vr.b $xr4, $a5 +; LA32-NEXT: ld.b $a2, $sp, 80 +; LA32-NEXT: xvreplgr2vr.b $xr5, $a6 +; LA32-NEXT: ld.b $a3, $sp, 84 +; LA32-NEXT: xvreplgr2vr.b $xr6, $a7 +; LA32-NEXT: ld.b $a4, $sp, 88 +; LA32-NEXT: xvreplgr2vr.b $xr7, $a2 +; LA32-NEXT: ld.b $a2, $sp, 92 +; LA32-NEXT: xvreplgr2vr.b $xr8, $a3 +; LA32-NEXT: ld.b $a3, $sp, 96 +; LA32-NEXT: xvreplgr2vr.b $xr9, $a4 +; LA32-NEXT: ld.b $a4, $sp, 100 +; LA32-NEXT: xvreplgr2vr.b $xr10, $a2 +; LA32-NEXT: ld.b $a2, $sp, 104 +; LA32-NEXT: xvreplgr2vr.b $xr11, $a3 +; LA32-NEXT: ld.b $a3, $sp, 108 +; LA32-NEXT: xvreplgr2vr.b $xr12, $a4 +; LA32-NEXT: ld.b $a4, $sp, 112 +; LA32-NEXT: xvreplgr2vr.b $xr13, $a2 +; LA32-NEXT: ld.b $a2, $sp, 116 +; LA32-NEXT: xvreplgr2vr.b $xr14, $a3 +; LA32-NEXT: ld.b $a3, $sp, 120 +; LA32-NEXT: xvreplgr2vr.b $xr15, $a4 +; LA32-NEXT: ld.b $a4, $sp, 124 +; LA32-NEXT: xvreplgr2vr.b $xr16, $a2 +; LA32-NEXT: ld.b $a2, $sp, 128 +; LA32-NEXT: xvreplgr2vr.b $xr17, $a3 +; LA32-NEXT: ld.b $a3, $sp, 132 +; LA32-NEXT: xvreplgr2vr.b $xr18, $a4 +; LA32-NEXT: ld.b $a4, $sp, 136 +; LA32-NEXT: xvreplgr2vr.b $xr19, $a2 +; LA32-NEXT: ld.b $a2, $sp, 140 +; LA32-NEXT: xvreplgr2vr.b $xr20, $a3 +; LA32-NEXT: ld.b $a3, $sp, 144 +; LA32-NEXT: xvreplgr2vr.b $xr21, $a4 +; LA32-NEXT: ld.b $a4, $sp, 148 +; LA32-NEXT: xvreplgr2vr.b $xr22, $a2 +; LA32-NEXT: ld.b $a2, $sp, 152 +; LA32-NEXT: xvreplgr2vr.b $xr23, $a3 +; LA32-NEXT: ld.b $a3, $sp, 156 +; LA32-NEXT: xvreplgr2vr.b $xr24, $a4 +; LA32-NEXT: ld.b $a4, $sp, 160 +; LA32-NEXT: xvreplgr2vr.b $xr25, $a2 +; LA32-NEXT: ld.b $a2, $sp, 164 +; LA32-NEXT: xvreplgr2vr.b $xr26, $a3 +; LA32-NEXT: ld.b $a3, $sp, 168 +; LA32-NEXT: xvreplgr2vr.b $xr27, $a4 +; LA32-NEXT: ld.b $a4, $sp, 176 +; LA32-NEXT: xvreplgr2vr.b $xr28, $a2 +; LA32-NEXT: xvreplgr2vr.b $xr29, $a3 +; LA32-NEXT: xvreplgr2vr.b $xr30, $a1 +; LA32-NEXT: xvreplgr2vr.b $xr31, $a4 +; LA32-NEXT: xvpermi.q $xr1, $xr0, 18 +; LA32-NEXT: xvextrins.b $xr0, $xr1, 17 +; LA32-NEXT: xvpermi.q $xr2, $xr0, 18 +; LA32-NEXT: xvextrins.b $xr0, $xr2, 34 +; LA32-NEXT: xvpermi.q $xr3, $xr0, 18 +; LA32-NEXT: xvextrins.b $xr0, $xr3, 51 +; LA32-NEXT: xvpermi.q $xr4, $xr0, 18 +; LA32-NEXT: xvextrins.b $xr0, $xr4, 68 +; LA32-NEXT: xvpermi.q $xr5, $xr0, 18 +; LA32-NEXT: xvextrins.b $xr0, $xr5, 85 +; LA32-NEXT: xvpermi.q $xr6, $xr0, 18 +; LA32-NEXT: xvextrins.b $xr0, $xr6, 102 +; LA32-NEXT: xvpermi.q $xr7, $xr0, 18 +; LA32-NEXT: xvextrins.b $xr0, $xr7, 119 +; LA32-NEXT: xvpermi.q $xr8, $xr0, 18 +; LA32-NEXT: xvextrins.b $xr0, $xr8, 136 +; LA32-NEXT: xvpermi.q $xr9, $xr0, 18 +; LA32-NEXT: xvextrins.b $xr0, $xr9, 153 +; LA32-NEXT: xvpermi.q $xr10, $xr0, 18 +; LA32-NEXT: xvextrins.b $xr0, $xr10, 170 +; LA32-NEXT: xvpermi.q $xr11, $xr0, 18 +; LA32-NEXT: xvextrins.b $xr0, $xr11, 187 +; LA32-NEXT: xvpermi.q $xr12, $xr0, 18 +; LA32-NEXT: xvextrins.b $xr0, $xr12, 204 +; LA32-NEXT: xvpermi.q $xr13, $xr0, 18 +; LA32-NEXT: xvextrins.b $xr0, $xr13, 221 +; LA32-NEXT: xvpermi.q $xr14, $xr0, 18 +; LA32-NEXT: xvextrins.b $xr0, $xr14, 238 +; LA32-NEXT: xvpermi.q $xr15, $xr0, 18 +; LA32-NEXT: xvextrins.b $xr0, $xr15, 255 +; LA32-NEXT: xvpermi.q $xr16, $xr0, 48 +; LA32-NEXT: xvextrins.b $xr0, $xr16, 0 +; LA32-NEXT: xvpermi.q $xr17, $xr0, 48 +; LA32-NEXT: xvextrins.b $xr0, $xr17, 17 +; LA32-NEXT: xvpermi.q $xr18, $xr0, 48 +; LA32-NEXT: xvextrins.b $xr0, $xr18, 34 +; LA32-NEXT: xvpermi.q $xr19, $xr0, 48 +; LA32-NEXT: xvextrins.b $xr0, $xr19, 51 +; LA32-NEXT: xvpermi.q $xr20, $xr0, 48 +; LA32-NEXT: xvextrins.b $xr0, $xr20, 68 +; LA32-NEXT: xvpermi.q $xr21, $xr0, 48 +; LA32-NEXT: xvextrins.b $xr0, $xr21, 85 +; LA32-NEXT: xvpermi.q $xr22, $xr0, 48 +; LA32-NEXT: xvextrins.b $xr0, $xr22, 102 +; LA32-NEXT: xvpermi.q $xr23, $xr0, 48 +; LA32-NEXT: xvextrins.b $xr0, $xr23, 119 +; LA32-NEXT: xvpermi.q $xr24, $xr0, 48 +; LA32-NEXT: xvextrins.b $xr0, $xr24, 136 +; LA32-NEXT: xvpermi.q $xr25, $xr0, 48 +; LA32-NEXT: xvextrins.b $xr0, $xr25, 153 +; LA32-NEXT: xvpermi.q $xr26, $xr0, 48 +; LA32-NEXT: xvextrins.b $xr0, $xr26, 170 +; LA32-NEXT: xvpermi.q $xr27, $xr0, 48 +; LA32-NEXT: xvextrins.b $xr0, $xr27, 187 +; LA32-NEXT: xvpermi.q $xr28, $xr0, 48 +; LA32-NEXT: xvextrins.b $xr0, $xr28, 204 +; LA32-NEXT: xvpermi.q $xr29, $xr0, 48 +; LA32-NEXT: xvextrins.b $xr0, $xr29, 221 +; LA32-NEXT: xvpermi.q $xr30, $xr0, 48 +; LA32-NEXT: xvextrins.b $xr0, $xr30, 238 +; LA32-NEXT: xvpermi.q $xr31, $xr0, 48 +; LA32-NEXT: xvextrins.b $xr0, $xr31, 255 +; LA32-NEXT: xvst $xr0, $a0, 0 +; LA32-NEXT: fld.d $fs7, $sp, 16 # 8-byte Folded Reload +; LA32-NEXT: fld.d $fs6, $sp, 24 # 8-byte Folded Reload +; LA32-NEXT: fld.d $fs5, $sp, 32 # 8-byte Folded Reload +; LA32-NEXT: fld.d $fs4, $sp, 40 # 8-byte Folded Reload +; LA32-NEXT: fld.d $fs3, $sp, 48 # 8-byte Folded Reload +; LA32-NEXT: fld.d $fs2, $sp, 56 # 8-byte Folded Reload +; LA32-NEXT: fld.d $fs1, $sp, 64 # 8-byte Folded Reload +; LA32-NEXT: fld.d $fs0, $sp, 72 # 8-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 80 +; LA32-NEXT: ret +; +; LA64-LABEL: buildvector_v32i8: +; LA64: # %bb.0: # %entry +; LA64-NEXT: addi.d $sp, $sp, -80 +; LA64-NEXT: fst.d $fs0, $sp, 72 # 8-byte Folded Spill +; LA64-NEXT: fst.d $fs1, $sp, 64 # 8-byte Folded Spill +; LA64-NEXT: fst.d $fs2, $sp, 56 # 8-byte Folded Spill +; LA64-NEXT: fst.d $fs3, $sp, 48 # 8-byte Folded Spill +; LA64-NEXT: fst.d $fs4, $sp, 40 # 8-byte Folded Spill +; LA64-NEXT: fst.d $fs5, $sp, 32 # 8-byte Folded Spill +; LA64-NEXT: fst.d $fs6, $sp, 24 # 8-byte Folded Spill +; LA64-NEXT: fst.d $fs7, $sp, 16 # 8-byte Folded Spill +; LA64-NEXT: vinsgr2vr.b $vr0, $a1, 0 +; LA64-NEXT: xvreplgr2vr.b $xr1, $a2 +; LA64-NEXT: xvreplgr2vr.b $xr2, $a3 +; LA64-NEXT: xvreplgr2vr.b $xr3, $a4 +; LA64-NEXT: ld.b $a1, $sp, 264 +; LA64-NEXT: xvreplgr2vr.b $xr4, $a5 +; LA64-NEXT: ld.b $a2, $sp, 80 +; LA64-NEXT: xvreplgr2vr.b $xr5, $a6 +; LA64-NEXT: ld.b $a3, $sp, 88 +; LA64-NEXT: xvreplgr2vr.b $xr6, $a7 +; LA64-NEXT: ld.b $a4, $sp, 96 +; LA64-NEXT: xvreplgr2vr.b $xr7, $a2 +; LA64-NEXT: ld.b $a2, $sp, 104 +; LA64-NEXT: xvreplgr2vr.b $xr8, $a3 +; LA64-NEXT: ld.b $a3, $sp, 112 +; LA64-NEXT: xvreplgr2vr.b $xr9, $a4 +; LA64-NEXT: ld.b $a4, $sp, 120 +; LA64-NEXT: xvreplgr2vr.b $xr10, $a2 +; LA64-NEXT: ld.b $a2, $sp, 128 +; LA64-NEXT: xvreplgr2vr.b $xr11, $a3 +; LA64-NEXT: ld.b $a3, $sp, 136 +; LA64-NEXT: xvreplgr2vr.b $xr12, $a4 +; LA64-NEXT: ld.b $a4, $sp, 144 +; LA64-NEXT: xvreplgr2vr.b $xr13, $a2 +; LA64-NEXT: ld.b $a2, $sp, 152 +; LA64-NEXT: xvreplgr2vr.b $xr14, $a3 +; LA64-NEXT: ld.b $a3, $sp, 160 +; LA64-NEXT: xvreplgr2vr.b $xr15, $a4 +; LA64-NEXT: ld.b $a4, $sp, 168 +; LA64-NEXT: xvreplgr2vr.b $xr16, $a2 +; LA64-NEXT: ld.b $a2, $sp, 176 +; LA64-NEXT: xvreplgr2vr.b $xr17, $a3 +; LA64-NEXT: ld.b $a3, $sp, 184 +; LA64-NEXT: xvreplgr2vr.b $xr18, $a4 +; LA64-NEXT: ld.b $a4, $sp, 192 +; LA64-NEXT: xvreplgr2vr.b $xr19, $a2 +; LA64-NEXT: ld.b $a2, $sp, 200 +; LA64-NEXT: xvreplgr2vr.b $xr20, $a3 +; LA64-NEXT: ld.b $a3, $sp, 208 +; LA64-NEXT: xvreplgr2vr.b $xr21, $a4 +; LA64-NEXT: ld.b $a4, $sp, 216 +; LA64-NEXT: xvreplgr2vr.b $xr22, $a2 +; LA64-NEXT: ld.b $a2, $sp, 224 +; LA64-NEXT: xvreplgr2vr.b $xr23, $a3 +; LA64-NEXT: ld.b $a3, $sp, 232 +; LA64-NEXT: xvreplgr2vr.b $xr24, $a4 +; LA64-NEXT: ld.b $a4, $sp, 240 +; LA64-NEXT: xvreplgr2vr.b $xr25, $a2 +; LA64-NEXT: ld.b $a2, $sp, 248 +; LA64-NEXT: xvreplgr2vr.b $xr26, $a3 +; LA64-NEXT: ld.b $a3, $sp, 256 +; LA64-NEXT: xvreplgr2vr.b $xr27, $a4 +; LA64-NEXT: ld.b $a4, $sp, 272 +; LA64-NEXT: xvreplgr2vr.b $xr28, $a2 +; LA64-NEXT: xvreplgr2vr.b $xr29, $a3 +; LA64-NEXT: xvreplgr2vr.b $xr30, $a1 +; LA64-NEXT: xvreplgr2vr.b $xr31, $a4 +; LA64-NEXT: xvpermi.q $xr1, $xr0, 18 +; LA64-NEXT: xvextrins.b $xr0, $xr1, 17 +; LA64-NEXT: xvpermi.q $xr2, $xr0, 18 +; LA64-NEXT: xvextrins.b $xr0, $xr2, 34 +; LA64-NEXT: xvpermi.q $xr3, $xr0, 18 +; LA64-NEXT: xvextrins.b $xr0, $xr3, 51 +; LA64-NEXT: xvpermi.q $xr4, $xr0, 18 +; LA64-NEXT: xvextrins.b $xr0, $xr4, 68 +; LA64-NEXT: xvpermi.q $xr5, $xr0, 18 +; LA64-NEXT: xvextrins.b $xr0, $xr5, 85 +; LA64-NEXT: xvpermi.q $xr6, $xr0, 18 +; LA64-NEXT: xvextrins.b $xr0, $xr6, 102 +; LA64-NEXT: xvpermi.q $xr7, $xr0, 18 +; LA64-NEXT: xvextrins.b $xr0, $xr7, 119 +; LA64-NEXT: xvpermi.q $xr8, $xr0, 18 +; LA64-NEXT: xvextrins.b $xr0, $xr8, 136 +; LA64-NEXT: xvpermi.q $xr9, $xr0, 18 +; LA64-NEXT: xvextrins.b $xr0, $xr9, 153 +; LA64-NEXT: xvpermi.q $xr10, $xr0, 18 +; LA64-NEXT: xvextrins.b $xr0, $xr10, 170 +; LA64-NEXT: xvpermi.q $xr11, $xr0, 18 +; LA64-NEXT: xvextrins.b $xr0, $xr11, 187 +; LA64-NEXT: xvpermi.q $xr12, $xr0, 18 +; LA64-NEXT: xvextrins.b $xr0, $xr12, 204 +; LA64-NEXT: xvpermi.q $xr13, $xr0, 18 +; LA64-NEXT: xvextrins.b $xr0, $xr13, 221 +; LA64-NEXT: xvpermi.q $xr14, $xr0, 18 +; LA64-NEXT: xvextrins.b $xr0, $xr14, 238 +; LA64-NEXT: xvpermi.q $xr15, $xr0, 18 +; LA64-NEXT: xvextrins.b $xr0, $xr15, 255 +; LA64-NEXT: xvpermi.q $xr16, $xr0, 48 +; LA64-NEXT: xvextrins.b $xr0, $xr16, 0 +; LA64-NEXT: xvpermi.q $xr17, $xr0, 48 +; LA64-NEXT: xvextrins.b $xr0, $xr17, 17 +; LA64-NEXT: xvpermi.q $xr18, $xr0, 48 +; LA64-NEXT: xvextrins.b $xr0, $xr18, 34 +; LA64-NEXT: xvpermi.q $xr19, $xr0, 48 +; LA64-NEXT: xvextrins.b $xr0, $xr19, 51 +; LA64-NEXT: xvpermi.q $xr20, $xr0, 48 +; LA64-NEXT: xvextrins.b $xr0, $xr20, 68 +; LA64-NEXT: xvpermi.q $xr21, $xr0, 48 +; LA64-NEXT: xvextrins.b $xr0, $xr21, 85 +; LA64-NEXT: xvpermi.q $xr22, $xr0, 48 +; LA64-NEXT: xvextrins.b $xr0, $xr22, 102 +; LA64-NEXT: xvpermi.q $xr23, $xr0, 48 +; LA64-NEXT: xvextrins.b $xr0, $xr23, 119 +; LA64-NEXT: xvpermi.q $xr24, $xr0, 48 +; LA64-NEXT: xvextrins.b $xr0, $xr24, 136 +; LA64-NEXT: xvpermi.q $xr25, $xr0, 48 +; LA64-NEXT: xvextrins.b $xr0, $xr25, 153 +; LA64-NEXT: xvpermi.q $xr26, $xr0, 48 +; LA64-NEXT: xvextrins.b $xr0, $xr26, 170 +; LA64-NEXT: xvpermi.q $xr27, $xr0, 48 +; LA64-NEXT: xvextrins.b $xr0, $xr27, 187 +; LA64-NEXT: xvpermi.q $xr28, $xr0, 48 +; LA64-NEXT: xvextrins.b $xr0, $xr28, 204 +; LA64-NEXT: xvpermi.q $xr29, $xr0, 48 +; LA64-NEXT: xvextrins.b $xr0, $xr29, 221 +; LA64-NEXT: xvpermi.q $xr30, $xr0, 48 +; LA64-NEXT: xvextrins.b $xr0, $xr30, 238 +; LA64-NEXT: xvpermi.q $xr31, $xr0, 48 +; LA64-NEXT: xvextrins.b $xr0, $xr31, 255 +; LA64-NEXT: xvst $xr0, $a0, 0 +; LA64-NEXT: fld.d $fs7, $sp, 16 # 8-byte Folded Reload +; LA64-NEXT: fld.d $fs6, $sp, 24 # 8-byte Folded Reload +; LA64-NEXT: fld.d $fs5, $sp, 32 # 8-byte Folded Reload +; LA64-NEXT: fld.d $fs4, $sp, 40 # 8-byte Folded Reload +; LA64-NEXT: fld.d $fs3, $sp, 48 # 8-byte Folded Reload +; LA64-NEXT: fld.d $fs2, $sp, 56 # 8-byte Folded Reload +; LA64-NEXT: fld.d $fs1, $sp, 64 # 8-byte Folded Reload +; LA64-NEXT: fld.d $fs0, $sp, 72 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 80 +; LA64-NEXT: ret entry: %ins0 = insertelement <32 x i8> undef, i8 %a0, i32 0 %ins1 = insertelement <32 x i8> %ins0, i8 %a1, i32 1 @@ -401,61 +564,117 @@ entry: } define void @buildvector_v32i8_partial(ptr %dst, i8 %a0, i8 %a1, i8 %a2, i8 %a5, i8 %a7, i8 %a8, i8 %a15, i8 %a17, i8 %a18, i8 %a20, i8 %a22, i8 %a23, i8 %a27, i8 %a28, i8 %a31) nounwind { -; CHECK-LABEL: buildvector_v32i8_partial: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: ld.b $t0, $sp, 56 -; CHECK-NEXT: ld.b $t1, $sp, 48 -; CHECK-NEXT: ld.b $t2, $sp, 40 -; CHECK-NEXT: ld.b $t3, $sp, 32 -; CHECK-NEXT: ld.b $t4, $sp, 24 -; CHECK-NEXT: ld.b $t5, $sp, 16 -; CHECK-NEXT: ld.b $t6, $sp, 8 -; CHECK-NEXT: ld.b $t7, $sp, 0 -; CHECK-NEXT: vinsgr2vr.b $vr0, $a1, 0 -; CHECK-NEXT: xvreplgr2vr.b $xr1, $a2 -; CHECK-NEXT: xvpermi.q $xr1, $xr0, 18 -; CHECK-NEXT: xvextrins.b $xr0, $xr1, 17 -; CHECK-NEXT: xvreplgr2vr.b $xr1, $a3 -; CHECK-NEXT: xvpermi.q $xr1, $xr0, 18 -; CHECK-NEXT: xvextrins.b $xr0, $xr1, 34 -; CHECK-NEXT: xvreplgr2vr.b $xr1, $a4 -; CHECK-NEXT: xvpermi.q $xr1, $xr0, 18 -; CHECK-NEXT: xvextrins.b $xr0, $xr1, 85 -; CHECK-NEXT: xvreplgr2vr.b $xr1, $a5 -; CHECK-NEXT: xvpermi.q $xr1, $xr0, 18 -; CHECK-NEXT: xvextrins.b $xr0, $xr1, 119 -; CHECK-NEXT: xvreplgr2vr.b $xr1, $a6 -; CHECK-NEXT: xvpermi.q $xr1, $xr0, 18 -; CHECK-NEXT: xvextrins.b $xr0, $xr1, 136 -; CHECK-NEXT: xvreplgr2vr.b $xr1, $a7 -; CHECK-NEXT: xvpermi.q $xr1, $xr0, 18 -; CHECK-NEXT: xvextrins.b $xr0, $xr1, 255 -; CHECK-NEXT: xvreplgr2vr.b $xr1, $t7 -; CHECK-NEXT: xvpermi.q $xr1, $xr0, 48 -; CHECK-NEXT: xvextrins.b $xr0, $xr1, 17 -; CHECK-NEXT: xvreplgr2vr.b $xr1, $t6 -; CHECK-NEXT: xvpermi.q $xr1, $xr0, 48 -; CHECK-NEXT: xvextrins.b $xr0, $xr1, 34 -; CHECK-NEXT: xvreplgr2vr.b $xr1, $t5 -; CHECK-NEXT: xvpermi.q $xr1, $xr0, 48 -; CHECK-NEXT: xvextrins.b $xr0, $xr1, 68 -; CHECK-NEXT: xvreplgr2vr.b $xr1, $t4 -; CHECK-NEXT: xvpermi.q $xr1, $xr0, 48 -; CHECK-NEXT: xvextrins.b $xr0, $xr1, 102 -; CHECK-NEXT: xvreplgr2vr.b $xr1, $t3 -; CHECK-NEXT: xvpermi.q $xr1, $xr0, 48 -; CHECK-NEXT: xvextrins.b $xr0, $xr1, 119 -; CHECK-NEXT: xvreplgr2vr.b $xr1, $t2 -; CHECK-NEXT: xvpermi.q $xr1, $xr0, 48 -; CHECK-NEXT: xvextrins.b $xr0, $xr1, 187 -; CHECK-NEXT: xvreplgr2vr.b $xr1, $t1 -; CHECK-NEXT: xvpermi.q $xr1, $xr0, 48 -; CHECK-NEXT: xvextrins.b $xr0, $xr1, 204 -; CHECK-NEXT: xvreplgr2vr.b $xr1, $t0 -; CHECK-NEXT: xvpermi.q $xr1, $xr0, 48 -; CHECK-NEXT: xvextrins.b $xr0, $xr1, 255 -; CHECK-NEXT: xvst $xr0, $a0, 0 -; CHECK-NEXT: ret +; LA32-LABEL: buildvector_v32i8_partial: +; LA32: # %bb.0: # %entry +; LA32-NEXT: ld.b $t0, $sp, 28 +; LA32-NEXT: ld.b $t1, $sp, 24 +; LA32-NEXT: ld.b $t2, $sp, 20 +; LA32-NEXT: ld.b $t3, $sp, 16 +; LA32-NEXT: ld.b $t4, $sp, 12 +; LA32-NEXT: ld.b $t5, $sp, 8 +; LA32-NEXT: ld.b $t6, $sp, 4 +; LA32-NEXT: ld.b $t7, $sp, 0 +; LA32-NEXT: vinsgr2vr.b $vr0, $a1, 0 +; LA32-NEXT: xvreplgr2vr.b $xr1, $a2 +; LA32-NEXT: xvpermi.q $xr1, $xr0, 18 +; LA32-NEXT: xvextrins.b $xr0, $xr1, 17 +; LA32-NEXT: xvreplgr2vr.b $xr1, $a3 +; LA32-NEXT: xvpermi.q $xr1, $xr0, 18 +; LA32-NEXT: xvextrins.b $xr0, $xr1, 34 +; LA32-NEXT: xvreplgr2vr.b $xr1, $a4 +; LA32-NEXT: xvpermi.q $xr1, $xr0, 18 +; LA32-NEXT: xvextrins.b $xr0, $xr1, 85 +; LA32-NEXT: xvreplgr2vr.b $xr1, $a5 +; LA32-NEXT: xvpermi.q $xr1, $xr0, 18 +; LA32-NEXT: xvextrins.b $xr0, $xr1, 119 +; LA32-NEXT: xvreplgr2vr.b $xr1, $a6 +; LA32-NEXT: xvpermi.q $xr1, $xr0, 18 +; LA32-NEXT: xvextrins.b $xr0, $xr1, 136 +; LA32-NEXT: xvreplgr2vr.b $xr1, $a7 +; LA32-NEXT: xvpermi.q $xr1, $xr0, 18 +; LA32-NEXT: xvextrins.b $xr0, $xr1, 255 +; LA32-NEXT: xvreplgr2vr.b $xr1, $t7 +; LA32-NEXT: xvpermi.q $xr1, $xr0, 48 +; LA32-NEXT: xvextrins.b $xr0, $xr1, 17 +; LA32-NEXT: xvreplgr2vr.b $xr1, $t6 +; LA32-NEXT: xvpermi.q $xr1, $xr0, 48 +; LA32-NEXT: xvextrins.b $xr0, $xr1, 34 +; LA32-NEXT: xvreplgr2vr.b $xr1, $t5 +; LA32-NEXT: xvpermi.q $xr1, $xr0, 48 +; LA32-NEXT: xvextrins.b $xr0, $xr1, 68 +; LA32-NEXT: xvreplgr2vr.b $xr1, $t4 +; LA32-NEXT: xvpermi.q $xr1, $xr0, 48 +; LA32-NEXT: xvextrins.b $xr0, $xr1, 102 +; LA32-NEXT: xvreplgr2vr.b $xr1, $t3 +; LA32-NEXT: xvpermi.q $xr1, $xr0, 48 +; LA32-NEXT: xvextrins.b $xr0, $xr1, 119 +; LA32-NEXT: xvreplgr2vr.b $xr1, $t2 +; LA32-NEXT: xvpermi.q $xr1, $xr0, 48 +; LA32-NEXT: xvextrins.b $xr0, $xr1, 187 +; LA32-NEXT: xvreplgr2vr.b $xr1, $t1 +; LA32-NEXT: xvpermi.q $xr1, $xr0, 48 +; LA32-NEXT: xvextrins.b $xr0, $xr1, 204 +; LA32-NEXT: xvreplgr2vr.b $xr1, $t0 +; LA32-NEXT: xvpermi.q $xr1, $xr0, 48 +; LA32-NEXT: xvextrins.b $xr0, $xr1, 255 +; LA32-NEXT: xvst $xr0, $a0, 0 +; LA32-NEXT: ret +; +; LA64-LABEL: buildvector_v32i8_partial: +; LA64: # %bb.0: # %entry +; LA64-NEXT: ld.b $t0, $sp, 56 +; LA64-NEXT: ld.b $t1, $sp, 48 +; LA64-NEXT: ld.b $t2, $sp, 40 +; LA64-NEXT: ld.b $t3, $sp, 32 +; LA64-NEXT: ld.b $t4, $sp, 24 +; LA64-NEXT: ld.b $t5, $sp, 16 +; LA64-NEXT: ld.b $t6, $sp, 8 +; LA64-NEXT: ld.b $t7, $sp, 0 +; LA64-NEXT: vinsgr2vr.b $vr0, $a1, 0 +; LA64-NEXT: xvreplgr2vr.b $xr1, $a2 +; LA64-NEXT: xvpermi.q $xr1, $xr0, 18 +; LA64-NEXT: xvextrins.b $xr0, $xr1, 17 +; LA64-NEXT: xvreplgr2vr.b $xr1, $a3 +; LA64-NEXT: xvpermi.q $xr1, $xr0, 18 +; LA64-NEXT: xvextrins.b $xr0, $xr1, 34 +; LA64-NEXT: xvreplgr2vr.b $xr1, $a4 +; LA64-NEXT: xvpermi.q $xr1, $xr0, 18 +; LA64-NEXT: xvextrins.b $xr0, $xr1, 85 +; LA64-NEXT: xvreplgr2vr.b $xr1, $a5 +; LA64-NEXT: xvpermi.q $xr1, $xr0, 18 +; LA64-NEXT: xvextrins.b $xr0, $xr1, 119 +; LA64-NEXT: xvreplgr2vr.b $xr1, $a6 +; LA64-NEXT: xvpermi.q $xr1, $xr0, 18 +; LA64-NEXT: xvextrins.b $xr0, $xr1, 136 +; LA64-NEXT: xvreplgr2vr.b $xr1, $a7 +; LA64-NEXT: xvpermi.q $xr1, $xr0, 18 +; LA64-NEXT: xvextrins.b $xr0, $xr1, 255 +; LA64-NEXT: xvreplgr2vr.b $xr1, $t7 +; LA64-NEXT: xvpermi.q $xr1, $xr0, 48 +; LA64-NEXT: xvextrins.b $xr0, $xr1, 17 +; LA64-NEXT: xvreplgr2vr.b $xr1, $t6 +; LA64-NEXT: xvpermi.q $xr1, $xr0, 48 +; LA64-NEXT: xvextrins.b $xr0, $xr1, 34 +; LA64-NEXT: xvreplgr2vr.b $xr1, $t5 +; LA64-NEXT: xvpermi.q $xr1, $xr0, 48 +; LA64-NEXT: xvextrins.b $xr0, $xr1, 68 +; LA64-NEXT: xvreplgr2vr.b $xr1, $t4 +; LA64-NEXT: xvpermi.q $xr1, $xr0, 48 +; LA64-NEXT: xvextrins.b $xr0, $xr1, 102 +; LA64-NEXT: xvreplgr2vr.b $xr1, $t3 +; LA64-NEXT: xvpermi.q $xr1, $xr0, 48 +; LA64-NEXT: xvextrins.b $xr0, $xr1, 119 +; LA64-NEXT: xvreplgr2vr.b $xr1, $t2 +; LA64-NEXT: xvpermi.q $xr1, $xr0, 48 +; LA64-NEXT: xvextrins.b $xr0, $xr1, 187 +; LA64-NEXT: xvreplgr2vr.b $xr1, $t1 +; LA64-NEXT: xvpermi.q $xr1, $xr0, 48 +; LA64-NEXT: xvextrins.b $xr0, $xr1, 204 +; LA64-NEXT: xvreplgr2vr.b $xr1, $t0 +; LA64-NEXT: xvpermi.q $xr1, $xr0, 48 +; LA64-NEXT: xvextrins.b $xr0, $xr1, 255 +; LA64-NEXT: xvst $xr0, $a0, 0 +; LA64-NEXT: ret entry: %ins0 = insertelement <32 x i8> undef, i8 %a0, i32 0 %ins1 = insertelement <32 x i8> %ins0, i8 %a1, i32 1 @@ -494,64 +713,123 @@ entry: } define void @buildvector_v32i8_with_constant(ptr %dst, i8 %a0, i8 %a1, i8 %a2, i8 %a5, i8 %a8, i8 %a9, i8 %a15, i8 %a17, i8 %a18, i8 %a20, i8 %a22, i8 %a23, i8 %a27, i8 %a28, i8 %a31) nounwind { -; CHECK-LABEL: buildvector_v32i8_with_constant: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: ld.b $t0, $sp, 56 -; CHECK-NEXT: ld.b $t1, $sp, 48 -; CHECK-NEXT: ld.b $t2, $sp, 40 -; CHECK-NEXT: ld.b $t3, $sp, 32 -; CHECK-NEXT: ld.b $t4, $sp, 24 -; CHECK-NEXT: ld.b $t5, $sp, 16 -; CHECK-NEXT: ld.b $t6, $sp, 8 -; CHECK-NEXT: ld.b $t7, $sp, 0 -; CHECK-NEXT: xvrepli.b $xr0, 0 -; CHECK-NEXT: xvreplgr2vr.b $xr1, $a1 -; CHECK-NEXT: xvpermi.q $xr1, $xr0, 18 -; CHECK-NEXT: xvextrins.b $xr0, $xr1, 0 -; CHECK-NEXT: xvreplgr2vr.b $xr1, $a2 -; CHECK-NEXT: xvpermi.q $xr1, $xr0, 18 -; CHECK-NEXT: xvextrins.b $xr0, $xr1, 17 -; CHECK-NEXT: xvreplgr2vr.b $xr1, $a3 -; CHECK-NEXT: xvpermi.q $xr1, $xr0, 18 -; CHECK-NEXT: xvextrins.b $xr0, $xr1, 34 -; CHECK-NEXT: xvreplgr2vr.b $xr1, $a4 -; CHECK-NEXT: xvpermi.q $xr1, $xr0, 18 -; CHECK-NEXT: xvextrins.b $xr0, $xr1, 85 -; CHECK-NEXT: xvreplgr2vr.b $xr1, $a5 -; CHECK-NEXT: xvpermi.q $xr1, $xr0, 18 -; CHECK-NEXT: xvextrins.b $xr0, $xr1, 136 -; CHECK-NEXT: xvreplgr2vr.b $xr1, $a6 -; CHECK-NEXT: xvpermi.q $xr1, $xr0, 18 -; CHECK-NEXT: xvextrins.b $xr0, $xr1, 153 -; CHECK-NEXT: xvreplgr2vr.b $xr1, $a7 -; CHECK-NEXT: xvpermi.q $xr1, $xr0, 18 -; CHECK-NEXT: xvextrins.b $xr0, $xr1, 255 -; CHECK-NEXT: xvreplgr2vr.b $xr1, $t7 -; CHECK-NEXT: xvpermi.q $xr1, $xr0, 48 -; CHECK-NEXT: xvextrins.b $xr0, $xr1, 17 -; CHECK-NEXT: xvreplgr2vr.b $xr1, $t6 -; CHECK-NEXT: xvpermi.q $xr1, $xr0, 48 -; CHECK-NEXT: xvextrins.b $xr0, $xr1, 34 -; CHECK-NEXT: xvreplgr2vr.b $xr1, $t5 -; CHECK-NEXT: xvpermi.q $xr1, $xr0, 48 -; CHECK-NEXT: xvextrins.b $xr0, $xr1, 68 -; CHECK-NEXT: xvreplgr2vr.b $xr1, $t4 -; CHECK-NEXT: xvpermi.q $xr1, $xr0, 48 -; CHECK-NEXT: xvextrins.b $xr0, $xr1, 102 -; CHECK-NEXT: xvreplgr2vr.b $xr1, $t3 -; CHECK-NEXT: xvpermi.q $xr1, $xr0, 48 -; CHECK-NEXT: xvextrins.b $xr0, $xr1, 119 -; CHECK-NEXT: xvreplgr2vr.b $xr1, $t2 -; CHECK-NEXT: xvpermi.q $xr1, $xr0, 48 -; CHECK-NEXT: xvextrins.b $xr0, $xr1, 187 -; CHECK-NEXT: xvreplgr2vr.b $xr1, $t1 -; CHECK-NEXT: xvpermi.q $xr1, $xr0, 48 -; CHECK-NEXT: xvextrins.b $xr0, $xr1, 204 -; CHECK-NEXT: xvreplgr2vr.b $xr1, $t0 -; CHECK-NEXT: xvpermi.q $xr1, $xr0, 48 -; CHECK-NEXT: xvextrins.b $xr0, $xr1, 255 -; CHECK-NEXT: xvst $xr0, $a0, 0 -; CHECK-NEXT: ret +; LA32-LABEL: buildvector_v32i8_with_constant: +; LA32: # %bb.0: # %entry +; LA32-NEXT: ld.b $t0, $sp, 28 +; LA32-NEXT: ld.b $t1, $sp, 24 +; LA32-NEXT: ld.b $t2, $sp, 20 +; LA32-NEXT: ld.b $t3, $sp, 16 +; LA32-NEXT: ld.b $t4, $sp, 12 +; LA32-NEXT: ld.b $t5, $sp, 8 +; LA32-NEXT: ld.b $t6, $sp, 4 +; LA32-NEXT: ld.b $t7, $sp, 0 +; LA32-NEXT: xvrepli.b $xr0, 0 +; LA32-NEXT: xvreplgr2vr.b $xr1, $a1 +; LA32-NEXT: xvpermi.q $xr1, $xr0, 18 +; LA32-NEXT: xvextrins.b $xr0, $xr1, 0 +; LA32-NEXT: xvreplgr2vr.b $xr1, $a2 +; LA32-NEXT: xvpermi.q $xr1, $xr0, 18 +; LA32-NEXT: xvextrins.b $xr0, $xr1, 17 +; LA32-NEXT: xvreplgr2vr.b $xr1, $a3 +; LA32-NEXT: xvpermi.q $xr1, $xr0, 18 +; LA32-NEXT: xvextrins.b $xr0, $xr1, 34 +; LA32-NEXT: xvreplgr2vr.b $xr1, $a4 +; LA32-NEXT: xvpermi.q $xr1, $xr0, 18 +; LA32-NEXT: xvextrins.b $xr0, $xr1, 85 +; LA32-NEXT: xvreplgr2vr.b $xr1, $a5 +; LA32-NEXT: xvpermi.q $xr1, $xr0, 18 +; LA32-NEXT: xvextrins.b $xr0, $xr1, 136 +; LA32-NEXT: xvreplgr2vr.b $xr1, $a6 +; LA32-NEXT: xvpermi.q $xr1, $xr0, 18 +; LA32-NEXT: xvextrins.b $xr0, $xr1, 153 +; LA32-NEXT: xvreplgr2vr.b $xr1, $a7 +; LA32-NEXT: xvpermi.q $xr1, $xr0, 18 +; LA32-NEXT: xvextrins.b $xr0, $xr1, 255 +; LA32-NEXT: xvreplgr2vr.b $xr1, $t7 +; LA32-NEXT: xvpermi.q $xr1, $xr0, 48 +; LA32-NEXT: xvextrins.b $xr0, $xr1, 17 +; LA32-NEXT: xvreplgr2vr.b $xr1, $t6 +; LA32-NEXT: xvpermi.q $xr1, $xr0, 48 +; LA32-NEXT: xvextrins.b $xr0, $xr1, 34 +; LA32-NEXT: xvreplgr2vr.b $xr1, $t5 +; LA32-NEXT: xvpermi.q $xr1, $xr0, 48 +; LA32-NEXT: xvextrins.b $xr0, $xr1, 68 +; LA32-NEXT: xvreplgr2vr.b $xr1, $t4 +; LA32-NEXT: xvpermi.q $xr1, $xr0, 48 +; LA32-NEXT: xvextrins.b $xr0, $xr1, 102 +; LA32-NEXT: xvreplgr2vr.b $xr1, $t3 +; LA32-NEXT: xvpermi.q $xr1, $xr0, 48 +; LA32-NEXT: xvextrins.b $xr0, $xr1, 119 +; LA32-NEXT: xvreplgr2vr.b $xr1, $t2 +; LA32-NEXT: xvpermi.q $xr1, $xr0, 48 +; LA32-NEXT: xvextrins.b $xr0, $xr1, 187 +; LA32-NEXT: xvreplgr2vr.b $xr1, $t1 +; LA32-NEXT: xvpermi.q $xr1, $xr0, 48 +; LA32-NEXT: xvextrins.b $xr0, $xr1, 204 +; LA32-NEXT: xvreplgr2vr.b $xr1, $t0 +; LA32-NEXT: xvpermi.q $xr1, $xr0, 48 +; LA32-NEXT: xvextrins.b $xr0, $xr1, 255 +; LA32-NEXT: xvst $xr0, $a0, 0 +; LA32-NEXT: ret +; +; LA64-LABEL: buildvector_v32i8_with_constant: +; LA64: # %bb.0: # %entry +; LA64-NEXT: ld.b $t0, $sp, 56 +; LA64-NEXT: ld.b $t1, $sp, 48 +; LA64-NEXT: ld.b $t2, $sp, 40 +; LA64-NEXT: ld.b $t3, $sp, 32 +; LA64-NEXT: ld.b $t4, $sp, 24 +; LA64-NEXT: ld.b $t5, $sp, 16 +; LA64-NEXT: ld.b $t6, $sp, 8 +; LA64-NEXT: ld.b $t7, $sp, 0 +; LA64-NEXT: xvrepli.b $xr0, 0 +; LA64-NEXT: xvreplgr2vr.b $xr1, $a1 +; LA64-NEXT: xvpermi.q $xr1, $xr0, 18 +; LA64-NEXT: xvextrins.b $xr0, $xr1, 0 +; LA64-NEXT: xvreplgr2vr.b $xr1, $a2 +; LA64-NEXT: xvpermi.q $xr1, $xr0, 18 +; LA64-NEXT: xvextrins.b $xr0, $xr1, 17 +; LA64-NEXT: xvreplgr2vr.b $xr1, $a3 +; LA64-NEXT: xvpermi.q $xr1, $xr0, 18 +; LA64-NEXT: xvextrins.b $xr0, $xr1, 34 +; LA64-NEXT: xvreplgr2vr.b $xr1, $a4 +; LA64-NEXT: xvpermi.q $xr1, $xr0, 18 +; LA64-NEXT: xvextrins.b $xr0, $xr1, 85 +; LA64-NEXT: xvreplgr2vr.b $xr1, $a5 +; LA64-NEXT: xvpermi.q $xr1, $xr0, 18 +; LA64-NEXT: xvextrins.b $xr0, $xr1, 136 +; LA64-NEXT: xvreplgr2vr.b $xr1, $a6 +; LA64-NEXT: xvpermi.q $xr1, $xr0, 18 +; LA64-NEXT: xvextrins.b $xr0, $xr1, 153 +; LA64-NEXT: xvreplgr2vr.b $xr1, $a7 +; LA64-NEXT: xvpermi.q $xr1, $xr0, 18 +; LA64-NEXT: xvextrins.b $xr0, $xr1, 255 +; LA64-NEXT: xvreplgr2vr.b $xr1, $t7 +; LA64-NEXT: xvpermi.q $xr1, $xr0, 48 +; LA64-NEXT: xvextrins.b $xr0, $xr1, 17 +; LA64-NEXT: xvreplgr2vr.b $xr1, $t6 +; LA64-NEXT: xvpermi.q $xr1, $xr0, 48 +; LA64-NEXT: xvextrins.b $xr0, $xr1, 34 +; LA64-NEXT: xvreplgr2vr.b $xr1, $t5 +; LA64-NEXT: xvpermi.q $xr1, $xr0, 48 +; LA64-NEXT: xvextrins.b $xr0, $xr1, 68 +; LA64-NEXT: xvreplgr2vr.b $xr1, $t4 +; LA64-NEXT: xvpermi.q $xr1, $xr0, 48 +; LA64-NEXT: xvextrins.b $xr0, $xr1, 102 +; LA64-NEXT: xvreplgr2vr.b $xr1, $t3 +; LA64-NEXT: xvpermi.q $xr1, $xr0, 48 +; LA64-NEXT: xvextrins.b $xr0, $xr1, 119 +; LA64-NEXT: xvreplgr2vr.b $xr1, $t2 +; LA64-NEXT: xvpermi.q $xr1, $xr0, 48 +; LA64-NEXT: xvextrins.b $xr0, $xr1, 187 +; LA64-NEXT: xvreplgr2vr.b $xr1, $t1 +; LA64-NEXT: xvpermi.q $xr1, $xr0, 48 +; LA64-NEXT: xvextrins.b $xr0, $xr1, 204 +; LA64-NEXT: xvreplgr2vr.b $xr1, $t0 +; LA64-NEXT: xvpermi.q $xr1, $xr0, 48 +; LA64-NEXT: xvextrins.b $xr0, $xr1, 255 +; LA64-NEXT: xvst $xr0, $a0, 0 +; LA64-NEXT: ret entry: %ins0 = insertelement <32 x i8> undef, i8 %a0, i32 0 %ins1 = insertelement <32 x i8> %ins0, i8 %a1, i32 1 @@ -589,67 +867,789 @@ entry: ret void } -define void @buildvector_v16i16(ptr %dst, i16 %a0, i16 %a1, i16 %a2, i16 %a3, i16 %a4, i16 %a5, i16 %a6, i16 %a7, i16 %a8, i16 %a9, i16 %a10, i16 %a11, i16 %a12, i16 %a13, i16 %a14, i16 %a15) nounwind { -; CHECK-LABEL: buildvector_v16i16: +define void @buildvector_v32i8_subseq_2(ptr %dst, i8 %a0, i8 %a1, i8 %a2, i8 %a3, i8 %a4, i8 %a5, i8 %a6, i8 %a7, i8 %a8, i8 %a9, i8 %a10, i8 %a11, i8 %a12, i8 %a13, i8 %a14, i8 %a15) nounwind { +; LA32-LABEL: buildvector_v32i8_subseq_2: +; LA32: # %bb.0: # %entry +; LA32-NEXT: ld.b $t0, $sp, 32 +; LA32-NEXT: ld.b $t1, $sp, 28 +; LA32-NEXT: ld.b $t2, $sp, 24 +; LA32-NEXT: ld.b $t3, $sp, 20 +; LA32-NEXT: ld.b $t4, $sp, 16 +; LA32-NEXT: ld.b $t5, $sp, 12 +; LA32-NEXT: ld.b $t6, $sp, 8 +; LA32-NEXT: ld.b $t7, $sp, 4 +; LA32-NEXT: ld.b $t8, $sp, 0 +; LA32-NEXT: xvreplgr2vr.b $xr1, $a2 +; LA32-NEXT: vinsgr2vr.b $vr0, $a1, 0 +; LA32-NEXT: xvori.b $xr3, $xr1, 0 +; LA32-NEXT: xvpermi.q $xr3, $xr0, 18 +; LA32-NEXT: xvreplgr2vr.b $xr2, $a3 +; LA32-NEXT: xvextrins.b $xr0, $xr3, 17 +; LA32-NEXT: xvori.b $xr4, $xr2, 0 +; LA32-NEXT: xvpermi.q $xr4, $xr0, 18 +; LA32-NEXT: xvreplgr2vr.b $xr3, $a4 +; LA32-NEXT: xvextrins.b $xr0, $xr4, 34 +; LA32-NEXT: xvori.b $xr5, $xr3, 0 +; LA32-NEXT: xvpermi.q $xr5, $xr0, 18 +; LA32-NEXT: xvreplgr2vr.b $xr4, $a5 +; LA32-NEXT: xvextrins.b $xr0, $xr5, 51 +; LA32-NEXT: xvori.b $xr6, $xr4, 0 +; LA32-NEXT: xvpermi.q $xr6, $xr0, 18 +; LA32-NEXT: xvreplgr2vr.b $xr5, $a6 +; LA32-NEXT: xvextrins.b $xr0, $xr6, 68 +; LA32-NEXT: xvori.b $xr7, $xr5, 0 +; LA32-NEXT: xvpermi.q $xr7, $xr0, 18 +; LA32-NEXT: xvreplgr2vr.b $xr6, $a7 +; LA32-NEXT: xvextrins.b $xr0, $xr7, 85 +; LA32-NEXT: xvori.b $xr8, $xr6, 0 +; LA32-NEXT: xvpermi.q $xr8, $xr0, 18 +; LA32-NEXT: xvreplgr2vr.b $xr7, $t8 +; LA32-NEXT: xvextrins.b $xr0, $xr8, 102 +; LA32-NEXT: xvori.b $xr9, $xr7, 0 +; LA32-NEXT: xvpermi.q $xr9, $xr0, 18 +; LA32-NEXT: xvreplgr2vr.b $xr8, $t7 +; LA32-NEXT: xvextrins.b $xr0, $xr9, 119 +; LA32-NEXT: xvori.b $xr10, $xr8, 0 +; LA32-NEXT: xvpermi.q $xr10, $xr0, 18 +; LA32-NEXT: xvreplgr2vr.b $xr9, $t6 +; LA32-NEXT: xvextrins.b $xr0, $xr10, 136 +; LA32-NEXT: xvori.b $xr11, $xr9, 0 +; LA32-NEXT: xvpermi.q $xr11, $xr0, 18 +; LA32-NEXT: xvreplgr2vr.b $xr10, $t5 +; LA32-NEXT: xvextrins.b $xr0, $xr11, 153 +; LA32-NEXT: xvori.b $xr12, $xr10, 0 +; LA32-NEXT: xvpermi.q $xr12, $xr0, 18 +; LA32-NEXT: xvreplgr2vr.b $xr11, $t4 +; LA32-NEXT: xvextrins.b $xr0, $xr12, 170 +; LA32-NEXT: xvori.b $xr13, $xr11, 0 +; LA32-NEXT: xvpermi.q $xr13, $xr0, 18 +; LA32-NEXT: xvreplgr2vr.b $xr12, $t3 +; LA32-NEXT: xvextrins.b $xr0, $xr13, 187 +; LA32-NEXT: xvori.b $xr14, $xr12, 0 +; LA32-NEXT: xvpermi.q $xr14, $xr0, 18 +; LA32-NEXT: xvreplgr2vr.b $xr13, $t2 +; LA32-NEXT: xvextrins.b $xr0, $xr14, 204 +; LA32-NEXT: xvori.b $xr15, $xr13, 0 +; LA32-NEXT: xvpermi.q $xr15, $xr0, 18 +; LA32-NEXT: xvreplgr2vr.b $xr14, $t1 +; LA32-NEXT: xvextrins.b $xr0, $xr15, 221 +; LA32-NEXT: xvori.b $xr16, $xr14, 0 +; LA32-NEXT: xvpermi.q $xr16, $xr0, 18 +; LA32-NEXT: xvreplgr2vr.b $xr15, $t0 +; LA32-NEXT: xvextrins.b $xr0, $xr16, 238 +; LA32-NEXT: xvori.b $xr16, $xr15, 0 +; LA32-NEXT: xvpermi.q $xr16, $xr0, 18 +; LA32-NEXT: xvextrins.b $xr0, $xr16, 255 +; LA32-NEXT: xvreplgr2vr.b $xr16, $a1 +; LA32-NEXT: xvpermi.q $xr16, $xr0, 48 +; LA32-NEXT: xvextrins.b $xr0, $xr16, 0 +; LA32-NEXT: xvpermi.q $xr1, $xr0, 48 +; LA32-NEXT: xvextrins.b $xr0, $xr1, 17 +; LA32-NEXT: xvpermi.q $xr2, $xr0, 48 +; LA32-NEXT: xvextrins.b $xr0, $xr2, 34 +; LA32-NEXT: xvpermi.q $xr3, $xr0, 48 +; LA32-NEXT: xvextrins.b $xr0, $xr3, 51 +; LA32-NEXT: xvpermi.q $xr4, $xr0, 48 +; LA32-NEXT: xvextrins.b $xr0, $xr4, 68 +; LA32-NEXT: xvpermi.q $xr5, $xr0, 48 +; LA32-NEXT: xvextrins.b $xr0, $xr5, 85 +; LA32-NEXT: xvpermi.q $xr6, $xr0, 48 +; LA32-NEXT: xvextrins.b $xr0, $xr6, 102 +; LA32-NEXT: xvpermi.q $xr7, $xr0, 48 +; LA32-NEXT: xvextrins.b $xr0, $xr7, 119 +; LA32-NEXT: xvpermi.q $xr8, $xr0, 48 +; LA32-NEXT: xvextrins.b $xr0, $xr8, 136 +; LA32-NEXT: xvpermi.q $xr9, $xr0, 48 +; LA32-NEXT: xvextrins.b $xr0, $xr9, 153 +; LA32-NEXT: xvpermi.q $xr10, $xr0, 48 +; LA32-NEXT: xvextrins.b $xr0, $xr10, 170 +; LA32-NEXT: xvpermi.q $xr11, $xr0, 48 +; LA32-NEXT: xvextrins.b $xr0, $xr11, 187 +; LA32-NEXT: xvpermi.q $xr12, $xr0, 48 +; LA32-NEXT: xvextrins.b $xr0, $xr12, 204 +; LA32-NEXT: xvpermi.q $xr13, $xr0, 48 +; LA32-NEXT: xvextrins.b $xr0, $xr13, 221 +; LA32-NEXT: xvpermi.q $xr14, $xr0, 48 +; LA32-NEXT: xvextrins.b $xr0, $xr14, 238 +; LA32-NEXT: xvpermi.q $xr15, $xr0, 48 +; LA32-NEXT: xvextrins.b $xr0, $xr15, 255 +; LA32-NEXT: xvst $xr0, $a0, 0 +; LA32-NEXT: ret +; +; LA64-LABEL: buildvector_v32i8_subseq_2: +; LA64: # %bb.0: # %entry +; LA64-NEXT: ld.b $t0, $sp, 64 +; LA64-NEXT: ld.b $t1, $sp, 56 +; LA64-NEXT: ld.b $t2, $sp, 48 +; LA64-NEXT: ld.b $t3, $sp, 40 +; LA64-NEXT: ld.b $t4, $sp, 32 +; LA64-NEXT: ld.b $t5, $sp, 24 +; LA64-NEXT: ld.b $t6, $sp, 16 +; LA64-NEXT: ld.b $t7, $sp, 8 +; LA64-NEXT: ld.b $t8, $sp, 0 +; LA64-NEXT: xvreplgr2vr.b $xr1, $a2 +; LA64-NEXT: vinsgr2vr.b $vr0, $a1, 0 +; LA64-NEXT: xvori.b $xr3, $xr1, 0 +; LA64-NEXT: xvpermi.q $xr3, $xr0, 18 +; LA64-NEXT: xvreplgr2vr.b $xr2, $a3 +; LA64-NEXT: xvextrins.b $xr0, $xr3, 17 +; LA64-NEXT: xvori.b $xr4, $xr2, 0 +; LA64-NEXT: xvpermi.q $xr4, $xr0, 18 +; LA64-NEXT: xvreplgr2vr.b $xr3, $a4 +; LA64-NEXT: xvextrins.b $xr0, $xr4, 34 +; LA64-NEXT: xvori.b $xr5, $xr3, 0 +; LA64-NEXT: xvpermi.q $xr5, $xr0, 18 +; LA64-NEXT: xvreplgr2vr.b $xr4, $a5 +; LA64-NEXT: xvextrins.b $xr0, $xr5, 51 +; LA64-NEXT: xvori.b $xr6, $xr4, 0 +; LA64-NEXT: xvpermi.q $xr6, $xr0, 18 +; LA64-NEXT: xvreplgr2vr.b $xr5, $a6 +; LA64-NEXT: xvextrins.b $xr0, $xr6, 68 +; LA64-NEXT: xvori.b $xr7, $xr5, 0 +; LA64-NEXT: xvpermi.q $xr7, $xr0, 18 +; LA64-NEXT: xvreplgr2vr.b $xr6, $a7 +; LA64-NEXT: xvextrins.b $xr0, $xr7, 85 +; LA64-NEXT: xvori.b $xr8, $xr6, 0 +; LA64-NEXT: xvpermi.q $xr8, $xr0, 18 +; LA64-NEXT: xvreplgr2vr.b $xr7, $t8 +; LA64-NEXT: xvextrins.b $xr0, $xr8, 102 +; LA64-NEXT: xvori.b $xr9, $xr7, 0 +; LA64-NEXT: xvpermi.q $xr9, $xr0, 18 +; LA64-NEXT: xvreplgr2vr.b $xr8, $t7 +; LA64-NEXT: xvextrins.b $xr0, $xr9, 119 +; LA64-NEXT: xvori.b $xr10, $xr8, 0 +; LA64-NEXT: xvpermi.q $xr10, $xr0, 18 +; LA64-NEXT: xvreplgr2vr.b $xr9, $t6 +; LA64-NEXT: xvextrins.b $xr0, $xr10, 136 +; LA64-NEXT: xvori.b $xr11, $xr9, 0 +; LA64-NEXT: xvpermi.q $xr11, $xr0, 18 +; LA64-NEXT: xvreplgr2vr.b $xr10, $t5 +; LA64-NEXT: xvextrins.b $xr0, $xr11, 153 +; LA64-NEXT: xvori.b $xr12, $xr10, 0 +; LA64-NEXT: xvpermi.q $xr12, $xr0, 18 +; LA64-NEXT: xvreplgr2vr.b $xr11, $t4 +; LA64-NEXT: xvextrins.b $xr0, $xr12, 170 +; LA64-NEXT: xvori.b $xr13, $xr11, 0 +; LA64-NEXT: xvpermi.q $xr13, $xr0, 18 +; LA64-NEXT: xvreplgr2vr.b $xr12, $t3 +; LA64-NEXT: xvextrins.b $xr0, $xr13, 187 +; LA64-NEXT: xvori.b $xr14, $xr12, 0 +; LA64-NEXT: xvpermi.q $xr14, $xr0, 18 +; LA64-NEXT: xvreplgr2vr.b $xr13, $t2 +; LA64-NEXT: xvextrins.b $xr0, $xr14, 204 +; LA64-NEXT: xvori.b $xr15, $xr13, 0 +; LA64-NEXT: xvpermi.q $xr15, $xr0, 18 +; LA64-NEXT: xvreplgr2vr.b $xr14, $t1 +; LA64-NEXT: xvextrins.b $xr0, $xr15, 221 +; LA64-NEXT: xvori.b $xr16, $xr14, 0 +; LA64-NEXT: xvpermi.q $xr16, $xr0, 18 +; LA64-NEXT: xvreplgr2vr.b $xr15, $t0 +; LA64-NEXT: xvextrins.b $xr0, $xr16, 238 +; LA64-NEXT: xvori.b $xr16, $xr15, 0 +; LA64-NEXT: xvpermi.q $xr16, $xr0, 18 +; LA64-NEXT: xvextrins.b $xr0, $xr16, 255 +; LA64-NEXT: xvreplgr2vr.b $xr16, $a1 +; LA64-NEXT: xvpermi.q $xr16, $xr0, 48 +; LA64-NEXT: xvextrins.b $xr0, $xr16, 0 +; LA64-NEXT: xvpermi.q $xr1, $xr0, 48 +; LA64-NEXT: xvextrins.b $xr0, $xr1, 17 +; LA64-NEXT: xvpermi.q $xr2, $xr0, 48 +; LA64-NEXT: xvextrins.b $xr0, $xr2, 34 +; LA64-NEXT: xvpermi.q $xr3, $xr0, 48 +; LA64-NEXT: xvextrins.b $xr0, $xr3, 51 +; LA64-NEXT: xvpermi.q $xr4, $xr0, 48 +; LA64-NEXT: xvextrins.b $xr0, $xr4, 68 +; LA64-NEXT: xvpermi.q $xr5, $xr0, 48 +; LA64-NEXT: xvextrins.b $xr0, $xr5, 85 +; LA64-NEXT: xvpermi.q $xr6, $xr0, 48 +; LA64-NEXT: xvextrins.b $xr0, $xr6, 102 +; LA64-NEXT: xvpermi.q $xr7, $xr0, 48 +; LA64-NEXT: xvextrins.b $xr0, $xr7, 119 +; LA64-NEXT: xvpermi.q $xr8, $xr0, 48 +; LA64-NEXT: xvextrins.b $xr0, $xr8, 136 +; LA64-NEXT: xvpermi.q $xr9, $xr0, 48 +; LA64-NEXT: xvextrins.b $xr0, $xr9, 153 +; LA64-NEXT: xvpermi.q $xr10, $xr0, 48 +; LA64-NEXT: xvextrins.b $xr0, $xr10, 170 +; LA64-NEXT: xvpermi.q $xr11, $xr0, 48 +; LA64-NEXT: xvextrins.b $xr0, $xr11, 187 +; LA64-NEXT: xvpermi.q $xr12, $xr0, 48 +; LA64-NEXT: xvextrins.b $xr0, $xr12, 204 +; LA64-NEXT: xvpermi.q $xr13, $xr0, 48 +; LA64-NEXT: xvextrins.b $xr0, $xr13, 221 +; LA64-NEXT: xvpermi.q $xr14, $xr0, 48 +; LA64-NEXT: xvextrins.b $xr0, $xr14, 238 +; LA64-NEXT: xvpermi.q $xr15, $xr0, 48 +; LA64-NEXT: xvextrins.b $xr0, $xr15, 255 +; LA64-NEXT: xvst $xr0, $a0, 0 +; LA64-NEXT: ret +entry: + %ins0 = insertelement <32 x i8> undef, i8 %a0, i32 0 + %ins1 = insertelement <32 x i8> %ins0, i8 %a1, i32 1 + %ins2 = insertelement <32 x i8> %ins1, i8 %a2, i32 2 + %ins3 = insertelement <32 x i8> %ins2, i8 %a3, i32 3 + %ins4 = insertelement <32 x i8> %ins3, i8 %a4, i32 4 + %ins5 = insertelement <32 x i8> %ins4, i8 %a5, i32 5 + %ins6 = insertelement <32 x i8> %ins5, i8 %a6, i32 6 + %ins7 = insertelement <32 x i8> %ins6, i8 %a7, i32 7 + %ins8 = insertelement <32 x i8> %ins7, i8 %a8, i32 8 + %ins9 = insertelement <32 x i8> %ins8, i8 %a9, i32 9 + %ins10 = insertelement <32 x i8> %ins9, i8 %a10, i32 10 + %ins11 = insertelement <32 x i8> %ins10, i8 %a11, i32 11 + %ins12 = insertelement <32 x i8> %ins11, i8 %a12, i32 12 + %ins13 = insertelement <32 x i8> %ins12, i8 %a13, i32 13 + %ins14 = insertelement <32 x i8> %ins13, i8 %a14, i32 14 + %ins15 = insertelement <32 x i8> %ins14, i8 %a15, i32 15 + %ins16 = insertelement <32 x i8> %ins15, i8 %a0, i32 16 + %ins17 = insertelement <32 x i8> %ins16, i8 %a1, i32 17 + %ins18 = insertelement <32 x i8> %ins17, i8 %a2, i32 18 + %ins19 = insertelement <32 x i8> %ins18, i8 %a3, i32 19 + %ins20 = insertelement <32 x i8> %ins19, i8 %a4, i32 20 + %ins21 = insertelement <32 x i8> %ins20, i8 %a5, i32 21 + %ins22 = insertelement <32 x i8> %ins21, i8 %a6, i32 22 + %ins23 = insertelement <32 x i8> %ins22, i8 %a7, i32 23 + %ins24 = insertelement <32 x i8> %ins23, i8 %a8, i32 24 + %ins25 = insertelement <32 x i8> %ins24, i8 %a9, i32 25 + %ins26 = insertelement <32 x i8> %ins25, i8 %a10, i32 26 + %ins27 = insertelement <32 x i8> %ins26, i8 %a11, i32 27 + %ins28 = insertelement <32 x i8> %ins27, i8 %a12, i32 28 + %ins29 = insertelement <32 x i8> %ins28, i8 %a13, i32 29 + %ins30 = insertelement <32 x i8> %ins29, i8 %a14, i32 30 + %ins31 = insertelement <32 x i8> %ins30, i8 %a15, i32 31 + store <32 x i8> %ins31, ptr %dst + ret void +} + +define void @buildvector_v32i8_subseq_4(ptr %dst, i8 %a0, i8 %a1, i8 %a2, i8 %a3, i8 %a4, i8 %a5, i8 %a6, i8 %a7) nounwind { +; CHECK-LABEL: buildvector_v32i8_subseq_4: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: ld.h $t0, $sp, 64 -; CHECK-NEXT: ld.h $t1, $sp, 56 -; CHECK-NEXT: ld.h $t2, $sp, 48 -; CHECK-NEXT: ld.h $t3, $sp, 40 -; CHECK-NEXT: ld.h $t4, $sp, 32 -; CHECK-NEXT: ld.h $t5, $sp, 24 -; CHECK-NEXT: ld.h $t6, $sp, 16 -; CHECK-NEXT: ld.h $t7, $sp, 8 -; CHECK-NEXT: ld.h $t8, $sp, 0 -; CHECK-NEXT: vinsgr2vr.h $vr0, $a1, 0 -; CHECK-NEXT: xvreplgr2vr.h $xr1, $a2 -; CHECK-NEXT: xvpermi.q $xr1, $xr0, 18 -; CHECK-NEXT: xvextrins.h $xr0, $xr1, 17 -; CHECK-NEXT: xvreplgr2vr.h $xr1, $a3 -; CHECK-NEXT: xvpermi.q $xr1, $xr0, 18 -; CHECK-NEXT: xvextrins.h $xr0, $xr1, 34 -; CHECK-NEXT: xvreplgr2vr.h $xr1, $a4 -; CHECK-NEXT: xvpermi.q $xr1, $xr0, 18 -; CHECK-NEXT: xvextrins.h $xr0, $xr1, 51 -; CHECK-NEXT: xvreplgr2vr.h $xr1, $a5 -; CHECK-NEXT: xvpermi.q $xr1, $xr0, 18 -; CHECK-NEXT: xvextrins.h $xr0, $xr1, 68 -; CHECK-NEXT: xvreplgr2vr.h $xr1, $a6 -; CHECK-NEXT: xvpermi.q $xr1, $xr0, 18 -; CHECK-NEXT: xvextrins.h $xr0, $xr1, 85 -; CHECK-NEXT: xvreplgr2vr.h $xr1, $a7 -; CHECK-NEXT: xvpermi.q $xr1, $xr0, 18 -; CHECK-NEXT: xvextrins.h $xr0, $xr1, 102 -; CHECK-NEXT: xvreplgr2vr.h $xr1, $t8 -; CHECK-NEXT: xvpermi.q $xr1, $xr0, 18 -; CHECK-NEXT: xvextrins.h $xr0, $xr1, 119 -; CHECK-NEXT: xvreplgr2vr.h $xr1, $t7 -; CHECK-NEXT: xvpermi.q $xr1, $xr0, 48 -; CHECK-NEXT: xvextrins.h $xr0, $xr1, 0 -; CHECK-NEXT: xvreplgr2vr.h $xr1, $t6 -; CHECK-NEXT: xvpermi.q $xr1, $xr0, 48 -; CHECK-NEXT: xvextrins.h $xr0, $xr1, 17 -; CHECK-NEXT: xvreplgr2vr.h $xr1, $t5 -; CHECK-NEXT: xvpermi.q $xr1, $xr0, 48 -; CHECK-NEXT: xvextrins.h $xr0, $xr1, 34 -; CHECK-NEXT: xvreplgr2vr.h $xr1, $t4 -; CHECK-NEXT: xvpermi.q $xr1, $xr0, 48 -; CHECK-NEXT: xvextrins.h $xr0, $xr1, 51 -; CHECK-NEXT: xvreplgr2vr.h $xr1, $t3 -; CHECK-NEXT: xvpermi.q $xr1, $xr0, 48 -; CHECK-NEXT: xvextrins.h $xr0, $xr1, 68 -; CHECK-NEXT: xvreplgr2vr.h $xr1, $t2 +; CHECK-NEXT: ld.b $t0, $sp, 0 +; CHECK-NEXT: xvreplgr2vr.b $xr1, $a2 +; CHECK-NEXT: vinsgr2vr.b $vr0, $a1, 0 +; CHECK-NEXT: xvori.b $xr3, $xr1, 0 +; CHECK-NEXT: xvpermi.q $xr3, $xr0, 18 +; CHECK-NEXT: xvreplgr2vr.b $xr2, $a3 +; CHECK-NEXT: xvextrins.b $xr0, $xr3, 17 +; CHECK-NEXT: xvori.b $xr4, $xr2, 0 +; CHECK-NEXT: xvpermi.q $xr4, $xr0, 18 +; CHECK-NEXT: xvreplgr2vr.b $xr3, $a4 +; CHECK-NEXT: xvextrins.b $xr0, $xr4, 34 +; CHECK-NEXT: xvori.b $xr5, $xr3, 0 +; CHECK-NEXT: xvpermi.q $xr5, $xr0, 18 +; CHECK-NEXT: xvreplgr2vr.b $xr4, $a5 +; CHECK-NEXT: xvextrins.b $xr0, $xr5, 51 +; CHECK-NEXT: xvori.b $xr6, $xr4, 0 +; CHECK-NEXT: xvpermi.q $xr6, $xr0, 18 +; CHECK-NEXT: xvreplgr2vr.b $xr5, $a6 +; CHECK-NEXT: xvextrins.b $xr0, $xr6, 68 +; CHECK-NEXT: xvori.b $xr7, $xr5, 0 +; CHECK-NEXT: xvpermi.q $xr7, $xr0, 18 +; CHECK-NEXT: xvreplgr2vr.b $xr6, $a7 +; CHECK-NEXT: xvextrins.b $xr0, $xr7, 85 +; CHECK-NEXT: xvori.b $xr8, $xr6, 0 +; CHECK-NEXT: xvpermi.q $xr8, $xr0, 18 +; CHECK-NEXT: xvreplgr2vr.b $xr7, $t0 +; CHECK-NEXT: xvextrins.b $xr0, $xr8, 102 +; CHECK-NEXT: xvori.b $xr9, $xr7, 0 +; CHECK-NEXT: xvpermi.q $xr9, $xr0, 18 +; CHECK-NEXT: xvreplgr2vr.b $xr8, $a1 +; CHECK-NEXT: xvextrins.b $xr0, $xr9, 119 +; CHECK-NEXT: xvori.b $xr9, $xr8, 0 +; CHECK-NEXT: xvpermi.q $xr9, $xr0, 18 +; CHECK-NEXT: xvextrins.b $xr0, $xr9, 136 +; CHECK-NEXT: xvori.b $xr9, $xr1, 0 +; CHECK-NEXT: xvpermi.q $xr9, $xr0, 18 +; CHECK-NEXT: xvextrins.b $xr0, $xr9, 153 +; CHECK-NEXT: xvori.b $xr9, $xr2, 0 +; CHECK-NEXT: xvpermi.q $xr9, $xr0, 18 +; CHECK-NEXT: xvextrins.b $xr0, $xr9, 170 +; CHECK-NEXT: xvori.b $xr9, $xr3, 0 +; CHECK-NEXT: xvpermi.q $xr9, $xr0, 18 +; CHECK-NEXT: xvextrins.b $xr0, $xr9, 187 +; CHECK-NEXT: xvori.b $xr9, $xr4, 0 +; CHECK-NEXT: xvpermi.q $xr9, $xr0, 18 +; CHECK-NEXT: xvextrins.b $xr0, $xr9, 204 +; CHECK-NEXT: xvori.b $xr9, $xr5, 0 +; CHECK-NEXT: xvpermi.q $xr9, $xr0, 18 +; CHECK-NEXT: xvextrins.b $xr0, $xr9, 221 +; CHECK-NEXT: xvori.b $xr9, $xr6, 0 +; CHECK-NEXT: xvpermi.q $xr9, $xr0, 18 +; CHECK-NEXT: xvextrins.b $xr0, $xr9, 238 +; CHECK-NEXT: xvori.b $xr9, $xr7, 0 +; CHECK-NEXT: xvpermi.q $xr9, $xr0, 18 +; CHECK-NEXT: xvextrins.b $xr0, $xr9, 255 +; CHECK-NEXT: xvori.b $xr9, $xr8, 0 +; CHECK-NEXT: xvpermi.q $xr9, $xr0, 48 +; CHECK-NEXT: xvextrins.b $xr0, $xr9, 0 +; CHECK-NEXT: xvori.b $xr9, $xr1, 0 +; CHECK-NEXT: xvpermi.q $xr9, $xr0, 48 +; CHECK-NEXT: xvextrins.b $xr0, $xr9, 17 +; CHECK-NEXT: xvori.b $xr9, $xr2, 0 +; CHECK-NEXT: xvpermi.q $xr9, $xr0, 48 +; CHECK-NEXT: xvextrins.b $xr0, $xr9, 34 +; CHECK-NEXT: xvori.b $xr9, $xr3, 0 +; CHECK-NEXT: xvpermi.q $xr9, $xr0, 48 +; CHECK-NEXT: xvextrins.b $xr0, $xr9, 51 +; CHECK-NEXT: xvori.b $xr9, $xr4, 0 +; CHECK-NEXT: xvpermi.q $xr9, $xr0, 48 +; CHECK-NEXT: xvextrins.b $xr0, $xr9, 68 +; CHECK-NEXT: xvori.b $xr9, $xr5, 0 +; CHECK-NEXT: xvpermi.q $xr9, $xr0, 48 +; CHECK-NEXT: xvextrins.b $xr0, $xr9, 85 +; CHECK-NEXT: xvori.b $xr9, $xr6, 0 +; CHECK-NEXT: xvpermi.q $xr9, $xr0, 48 +; CHECK-NEXT: xvextrins.b $xr0, $xr9, 102 +; CHECK-NEXT: xvori.b $xr9, $xr7, 0 +; CHECK-NEXT: xvpermi.q $xr9, $xr0, 48 +; CHECK-NEXT: xvextrins.b $xr0, $xr9, 119 +; CHECK-NEXT: xvpermi.q $xr8, $xr0, 48 +; CHECK-NEXT: xvextrins.b $xr0, $xr8, 136 ; CHECK-NEXT: xvpermi.q $xr1, $xr0, 48 -; CHECK-NEXT: xvextrins.h $xr0, $xr1, 85 -; CHECK-NEXT: xvreplgr2vr.h $xr1, $t1 +; CHECK-NEXT: xvextrins.b $xr0, $xr1, 153 +; CHECK-NEXT: xvpermi.q $xr2, $xr0, 48 +; CHECK-NEXT: xvextrins.b $xr0, $xr2, 170 +; CHECK-NEXT: xvpermi.q $xr3, $xr0, 48 +; CHECK-NEXT: xvextrins.b $xr0, $xr3, 187 +; CHECK-NEXT: xvpermi.q $xr4, $xr0, 48 +; CHECK-NEXT: xvextrins.b $xr0, $xr4, 204 +; CHECK-NEXT: xvpermi.q $xr5, $xr0, 48 +; CHECK-NEXT: xvextrins.b $xr0, $xr5, 221 +; CHECK-NEXT: xvpermi.q $xr6, $xr0, 48 +; CHECK-NEXT: xvextrins.b $xr0, $xr6, 238 +; CHECK-NEXT: xvpermi.q $xr7, $xr0, 48 +; CHECK-NEXT: xvextrins.b $xr0, $xr7, 255 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %ins0 = insertelement <32 x i8> undef, i8 %a0, i32 0 + %ins1 = insertelement <32 x i8> %ins0, i8 %a1, i32 1 + %ins2 = insertelement <32 x i8> %ins1, i8 %a2, i32 2 + %ins3 = insertelement <32 x i8> %ins2, i8 %a3, i32 3 + %ins4 = insertelement <32 x i8> %ins3, i8 %a4, i32 4 + %ins5 = insertelement <32 x i8> %ins4, i8 %a5, i32 5 + %ins6 = insertelement <32 x i8> %ins5, i8 %a6, i32 6 + %ins7 = insertelement <32 x i8> %ins6, i8 %a7, i32 7 + %ins8 = insertelement <32 x i8> %ins7, i8 %a0, i32 8 + %ins9 = insertelement <32 x i8> %ins8, i8 %a1, i32 9 + %ins10 = insertelement <32 x i8> %ins9, i8 %a2, i32 10 + %ins11 = insertelement <32 x i8> %ins10, i8 %a3, i32 11 + %ins12 = insertelement <32 x i8> %ins11, i8 %a4, i32 12 + %ins13 = insertelement <32 x i8> %ins12, i8 %a5, i32 13 + %ins14 = insertelement <32 x i8> %ins13, i8 %a6, i32 14 + %ins15 = insertelement <32 x i8> %ins14, i8 %a7, i32 15 + %ins16 = insertelement <32 x i8> %ins15, i8 %a0, i32 16 + %ins17 = insertelement <32 x i8> %ins16, i8 %a1, i32 17 + %ins18 = insertelement <32 x i8> %ins17, i8 %a2, i32 18 + %ins19 = insertelement <32 x i8> %ins18, i8 %a3, i32 19 + %ins20 = insertelement <32 x i8> %ins19, i8 %a4, i32 20 + %ins21 = insertelement <32 x i8> %ins20, i8 %a5, i32 21 + %ins22 = insertelement <32 x i8> %ins21, i8 %a6, i32 22 + %ins23 = insertelement <32 x i8> %ins22, i8 %a7, i32 23 + %ins24 = insertelement <32 x i8> %ins23, i8 %a0, i32 24 + %ins25 = insertelement <32 x i8> %ins24, i8 %a1, i32 25 + %ins26 = insertelement <32 x i8> %ins25, i8 %a2, i32 26 + %ins27 = insertelement <32 x i8> %ins26, i8 %a3, i32 27 + %ins28 = insertelement <32 x i8> %ins27, i8 %a4, i32 28 + %ins29 = insertelement <32 x i8> %ins28, i8 %a5, i32 29 + %ins30 = insertelement <32 x i8> %ins29, i8 %a6, i32 30 + %ins31 = insertelement <32 x i8> %ins30, i8 %a7, i32 31 + store <32 x i8> %ins31, ptr %dst + ret void +} + +define void @buildvector_v32i8_subseq_8(ptr %dst, i8 %a0, i8 %a1, i8 %a2, i8 %a3) nounwind { +; CHECK-LABEL: buildvector_v32i8_subseq_8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvreplgr2vr.b $xr1, $a2 +; CHECK-NEXT: vinsgr2vr.b $vr0, $a1, 0 +; CHECK-NEXT: xvori.b $xr3, $xr1, 0 +; CHECK-NEXT: xvpermi.q $xr3, $xr0, 18 +; CHECK-NEXT: xvreplgr2vr.b $xr2, $a3 +; CHECK-NEXT: xvextrins.b $xr0, $xr3, 17 +; CHECK-NEXT: xvori.b $xr4, $xr2, 0 +; CHECK-NEXT: xvpermi.q $xr4, $xr0, 18 +; CHECK-NEXT: xvreplgr2vr.b $xr3, $a4 +; CHECK-NEXT: xvextrins.b $xr0, $xr4, 34 +; CHECK-NEXT: xvori.b $xr5, $xr3, 0 +; CHECK-NEXT: xvpermi.q $xr5, $xr0, 18 +; CHECK-NEXT: xvreplgr2vr.b $xr4, $a1 +; CHECK-NEXT: xvextrins.b $xr0, $xr5, 51 +; CHECK-NEXT: xvori.b $xr5, $xr4, 0 +; CHECK-NEXT: xvpermi.q $xr5, $xr0, 18 +; CHECK-NEXT: xvextrins.b $xr0, $xr5, 68 +; CHECK-NEXT: xvori.b $xr5, $xr1, 0 +; CHECK-NEXT: xvpermi.q $xr5, $xr0, 18 +; CHECK-NEXT: xvextrins.b $xr0, $xr5, 85 +; CHECK-NEXT: xvori.b $xr5, $xr2, 0 +; CHECK-NEXT: xvpermi.q $xr5, $xr0, 18 +; CHECK-NEXT: xvextrins.b $xr0, $xr5, 102 +; CHECK-NEXT: xvori.b $xr5, $xr3, 0 +; CHECK-NEXT: xvpermi.q $xr5, $xr0, 18 +; CHECK-NEXT: xvextrins.b $xr0, $xr5, 119 +; CHECK-NEXT: xvori.b $xr5, $xr4, 0 +; CHECK-NEXT: xvpermi.q $xr5, $xr0, 18 +; CHECK-NEXT: xvextrins.b $xr0, $xr5, 136 +; CHECK-NEXT: xvori.b $xr5, $xr1, 0 +; CHECK-NEXT: xvpermi.q $xr5, $xr0, 18 +; CHECK-NEXT: xvextrins.b $xr0, $xr5, 153 +; CHECK-NEXT: xvori.b $xr5, $xr2, 0 +; CHECK-NEXT: xvpermi.q $xr5, $xr0, 18 +; CHECK-NEXT: xvextrins.b $xr0, $xr5, 170 +; CHECK-NEXT: xvori.b $xr5, $xr3, 0 +; CHECK-NEXT: xvpermi.q $xr5, $xr0, 18 +; CHECK-NEXT: xvextrins.b $xr0, $xr5, 187 +; CHECK-NEXT: xvori.b $xr5, $xr4, 0 +; CHECK-NEXT: xvpermi.q $xr5, $xr0, 18 +; CHECK-NEXT: xvextrins.b $xr0, $xr5, 204 +; CHECK-NEXT: xvori.b $xr5, $xr1, 0 +; CHECK-NEXT: xvpermi.q $xr5, $xr0, 18 +; CHECK-NEXT: xvextrins.b $xr0, $xr5, 221 +; CHECK-NEXT: xvori.b $xr5, $xr2, 0 +; CHECK-NEXT: xvpermi.q $xr5, $xr0, 18 +; CHECK-NEXT: xvextrins.b $xr0, $xr5, 238 +; CHECK-NEXT: xvori.b $xr5, $xr3, 0 +; CHECK-NEXT: xvpermi.q $xr5, $xr0, 18 +; CHECK-NEXT: xvextrins.b $xr0, $xr5, 255 +; CHECK-NEXT: xvori.b $xr5, $xr4, 0 +; CHECK-NEXT: xvpermi.q $xr5, $xr0, 48 +; CHECK-NEXT: xvextrins.b $xr0, $xr5, 0 +; CHECK-NEXT: xvori.b $xr5, $xr1, 0 +; CHECK-NEXT: xvpermi.q $xr5, $xr0, 48 +; CHECK-NEXT: xvextrins.b $xr0, $xr5, 17 +; CHECK-NEXT: xvori.b $xr5, $xr2, 0 +; CHECK-NEXT: xvpermi.q $xr5, $xr0, 48 +; CHECK-NEXT: xvextrins.b $xr0, $xr5, 34 +; CHECK-NEXT: xvori.b $xr5, $xr3, 0 +; CHECK-NEXT: xvpermi.q $xr5, $xr0, 48 +; CHECK-NEXT: xvextrins.b $xr0, $xr5, 51 +; CHECK-NEXT: xvori.b $xr5, $xr4, 0 +; CHECK-NEXT: xvpermi.q $xr5, $xr0, 48 +; CHECK-NEXT: xvextrins.b $xr0, $xr5, 68 +; CHECK-NEXT: xvori.b $xr5, $xr1, 0 +; CHECK-NEXT: xvpermi.q $xr5, $xr0, 48 +; CHECK-NEXT: xvextrins.b $xr0, $xr5, 85 +; CHECK-NEXT: xvori.b $xr5, $xr2, 0 +; CHECK-NEXT: xvpermi.q $xr5, $xr0, 48 +; CHECK-NEXT: xvextrins.b $xr0, $xr5, 102 +; CHECK-NEXT: xvori.b $xr5, $xr3, 0 +; CHECK-NEXT: xvpermi.q $xr5, $xr0, 48 +; CHECK-NEXT: xvextrins.b $xr0, $xr5, 119 +; CHECK-NEXT: xvori.b $xr5, $xr4, 0 +; CHECK-NEXT: xvpermi.q $xr5, $xr0, 48 +; CHECK-NEXT: xvextrins.b $xr0, $xr5, 136 +; CHECK-NEXT: xvori.b $xr5, $xr1, 0 +; CHECK-NEXT: xvpermi.q $xr5, $xr0, 48 +; CHECK-NEXT: xvextrins.b $xr0, $xr5, 153 +; CHECK-NEXT: xvori.b $xr5, $xr2, 0 +; CHECK-NEXT: xvpermi.q $xr5, $xr0, 48 +; CHECK-NEXT: xvextrins.b $xr0, $xr5, 170 +; CHECK-NEXT: xvori.b $xr5, $xr3, 0 +; CHECK-NEXT: xvpermi.q $xr5, $xr0, 48 +; CHECK-NEXT: xvextrins.b $xr0, $xr5, 187 +; CHECK-NEXT: xvpermi.q $xr4, $xr0, 48 +; CHECK-NEXT: xvextrins.b $xr0, $xr4, 204 ; CHECK-NEXT: xvpermi.q $xr1, $xr0, 48 -; CHECK-NEXT: xvextrins.h $xr0, $xr1, 102 -; CHECK-NEXT: xvreplgr2vr.h $xr1, $t0 +; CHECK-NEXT: xvextrins.b $xr0, $xr1, 221 +; CHECK-NEXT: xvpermi.q $xr2, $xr0, 48 +; CHECK-NEXT: xvextrins.b $xr0, $xr2, 238 +; CHECK-NEXT: xvpermi.q $xr3, $xr0, 48 +; CHECK-NEXT: xvextrins.b $xr0, $xr3, 255 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %ins0 = insertelement <32 x i8> undef, i8 %a0, i32 0 + %ins1 = insertelement <32 x i8> %ins0, i8 %a1, i32 1 + %ins2 = insertelement <32 x i8> %ins1, i8 %a2, i32 2 + %ins3 = insertelement <32 x i8> %ins2, i8 %a3, i32 3 + %ins4 = insertelement <32 x i8> %ins3, i8 %a0, i32 4 + %ins5 = insertelement <32 x i8> %ins4, i8 %a1, i32 5 + %ins6 = insertelement <32 x i8> %ins5, i8 %a2, i32 6 + %ins7 = insertelement <32 x i8> %ins6, i8 %a3, i32 7 + %ins8 = insertelement <32 x i8> %ins7, i8 %a0, i32 8 + %ins9 = insertelement <32 x i8> %ins8, i8 %a1, i32 9 + %ins10 = insertelement <32 x i8> %ins9, i8 %a2, i32 10 + %ins11 = insertelement <32 x i8> %ins10, i8 %a3, i32 11 + %ins12 = insertelement <32 x i8> %ins11, i8 %a0, i32 12 + %ins13 = insertelement <32 x i8> %ins12, i8 %a1, i32 13 + %ins14 = insertelement <32 x i8> %ins13, i8 %a2, i32 14 + %ins15 = insertelement <32 x i8> %ins14, i8 %a3, i32 15 + %ins16 = insertelement <32 x i8> %ins15, i8 %a0, i32 16 + %ins17 = insertelement <32 x i8> %ins16, i8 %a1, i32 17 + %ins18 = insertelement <32 x i8> %ins17, i8 %a2, i32 18 + %ins19 = insertelement <32 x i8> %ins18, i8 %a3, i32 19 + %ins20 = insertelement <32 x i8> %ins19, i8 %a0, i32 20 + %ins21 = insertelement <32 x i8> %ins20, i8 %a1, i32 21 + %ins22 = insertelement <32 x i8> %ins21, i8 %a2, i32 22 + %ins23 = insertelement <32 x i8> %ins22, i8 %a3, i32 23 + %ins24 = insertelement <32 x i8> %ins23, i8 %a0, i32 24 + %ins25 = insertelement <32 x i8> %ins24, i8 %a1, i32 25 + %ins26 = insertelement <32 x i8> %ins25, i8 %a2, i32 26 + %ins27 = insertelement <32 x i8> %ins26, i8 %a3, i32 27 + %ins28 = insertelement <32 x i8> %ins27, i8 %a0, i32 28 + %ins29 = insertelement <32 x i8> %ins28, i8 %a1, i32 29 + %ins30 = insertelement <32 x i8> %ins29, i8 %a2, i32 30 + %ins31 = insertelement <32 x i8> %ins30, i8 %a3, i32 31 + store <32 x i8> %ins31, ptr %dst + ret void +} + +define void @buildvector_v32i8_subseq_16(ptr %dst, i8 %a0, i8 %a1) nounwind { +; CHECK-LABEL: buildvector_v32i8_subseq_16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvreplgr2vr.b $xr1, $a2 +; CHECK-NEXT: vinsgr2vr.b $vr0, $a1, 0 +; CHECK-NEXT: xvori.b $xr3, $xr1, 0 +; CHECK-NEXT: xvpermi.q $xr3, $xr0, 18 +; CHECK-NEXT: xvreplgr2vr.b $xr2, $a1 +; CHECK-NEXT: xvextrins.b $xr0, $xr3, 17 +; CHECK-NEXT: xvori.b $xr3, $xr2, 0 +; CHECK-NEXT: xvpermi.q $xr3, $xr0, 18 +; CHECK-NEXT: xvextrins.b $xr0, $xr3, 34 +; CHECK-NEXT: xvori.b $xr3, $xr1, 0 +; CHECK-NEXT: xvpermi.q $xr3, $xr0, 18 +; CHECK-NEXT: xvextrins.b $xr0, $xr3, 51 +; CHECK-NEXT: xvori.b $xr3, $xr2, 0 +; CHECK-NEXT: xvpermi.q $xr3, $xr0, 18 +; CHECK-NEXT: xvextrins.b $xr0, $xr3, 68 +; CHECK-NEXT: xvori.b $xr3, $xr1, 0 +; CHECK-NEXT: xvpermi.q $xr3, $xr0, 18 +; CHECK-NEXT: xvextrins.b $xr0, $xr3, 85 +; CHECK-NEXT: xvori.b $xr3, $xr2, 0 +; CHECK-NEXT: xvpermi.q $xr3, $xr0, 18 +; CHECK-NEXT: xvextrins.b $xr0, $xr3, 102 +; CHECK-NEXT: xvori.b $xr3, $xr1, 0 +; CHECK-NEXT: xvpermi.q $xr3, $xr0, 18 +; CHECK-NEXT: xvextrins.b $xr0, $xr3, 119 +; CHECK-NEXT: xvori.b $xr3, $xr2, 0 +; CHECK-NEXT: xvpermi.q $xr3, $xr0, 18 +; CHECK-NEXT: xvextrins.b $xr0, $xr3, 136 +; CHECK-NEXT: xvori.b $xr3, $xr1, 0 +; CHECK-NEXT: xvpermi.q $xr3, $xr0, 18 +; CHECK-NEXT: xvextrins.b $xr0, $xr3, 153 +; CHECK-NEXT: xvori.b $xr3, $xr2, 0 +; CHECK-NEXT: xvpermi.q $xr3, $xr0, 18 +; CHECK-NEXT: xvextrins.b $xr0, $xr3, 170 +; CHECK-NEXT: xvori.b $xr3, $xr1, 0 +; CHECK-NEXT: xvpermi.q $xr3, $xr0, 18 +; CHECK-NEXT: xvextrins.b $xr0, $xr3, 187 +; CHECK-NEXT: xvori.b $xr3, $xr2, 0 +; CHECK-NEXT: xvpermi.q $xr3, $xr0, 18 +; CHECK-NEXT: xvextrins.b $xr0, $xr3, 204 +; CHECK-NEXT: xvori.b $xr3, $xr1, 0 +; CHECK-NEXT: xvpermi.q $xr3, $xr0, 18 +; CHECK-NEXT: xvextrins.b $xr0, $xr3, 221 +; CHECK-NEXT: xvori.b $xr3, $xr2, 0 +; CHECK-NEXT: xvpermi.q $xr3, $xr0, 18 +; CHECK-NEXT: xvextrins.b $xr0, $xr3, 238 +; CHECK-NEXT: xvori.b $xr3, $xr1, 0 +; CHECK-NEXT: xvpermi.q $xr3, $xr0, 18 +; CHECK-NEXT: xvextrins.b $xr0, $xr3, 255 +; CHECK-NEXT: xvori.b $xr3, $xr2, 0 +; CHECK-NEXT: xvpermi.q $xr3, $xr0, 48 +; CHECK-NEXT: xvextrins.b $xr0, $xr3, 0 +; CHECK-NEXT: xvori.b $xr3, $xr1, 0 +; CHECK-NEXT: xvpermi.q $xr3, $xr0, 48 +; CHECK-NEXT: xvextrins.b $xr0, $xr3, 17 +; CHECK-NEXT: xvori.b $xr3, $xr2, 0 +; CHECK-NEXT: xvpermi.q $xr3, $xr0, 48 +; CHECK-NEXT: xvextrins.b $xr0, $xr3, 34 +; CHECK-NEXT: xvori.b $xr3, $xr1, 0 +; CHECK-NEXT: xvpermi.q $xr3, $xr0, 48 +; CHECK-NEXT: xvextrins.b $xr0, $xr3, 51 +; CHECK-NEXT: xvori.b $xr3, $xr2, 0 +; CHECK-NEXT: xvpermi.q $xr3, $xr0, 48 +; CHECK-NEXT: xvextrins.b $xr0, $xr3, 68 +; CHECK-NEXT: xvori.b $xr3, $xr1, 0 +; CHECK-NEXT: xvpermi.q $xr3, $xr0, 48 +; CHECK-NEXT: xvextrins.b $xr0, $xr3, 85 +; CHECK-NEXT: xvori.b $xr3, $xr2, 0 +; CHECK-NEXT: xvpermi.q $xr3, $xr0, 48 +; CHECK-NEXT: xvextrins.b $xr0, $xr3, 102 +; CHECK-NEXT: xvori.b $xr3, $xr1, 0 +; CHECK-NEXT: xvpermi.q $xr3, $xr0, 48 +; CHECK-NEXT: xvextrins.b $xr0, $xr3, 119 +; CHECK-NEXT: xvori.b $xr3, $xr2, 0 +; CHECK-NEXT: xvpermi.q $xr3, $xr0, 48 +; CHECK-NEXT: xvextrins.b $xr0, $xr3, 136 +; CHECK-NEXT: xvori.b $xr3, $xr1, 0 +; CHECK-NEXT: xvpermi.q $xr3, $xr0, 48 +; CHECK-NEXT: xvextrins.b $xr0, $xr3, 153 +; CHECK-NEXT: xvori.b $xr3, $xr2, 0 +; CHECK-NEXT: xvpermi.q $xr3, $xr0, 48 +; CHECK-NEXT: xvextrins.b $xr0, $xr3, 170 +; CHECK-NEXT: xvori.b $xr3, $xr1, 0 +; CHECK-NEXT: xvpermi.q $xr3, $xr0, 48 +; CHECK-NEXT: xvextrins.b $xr0, $xr3, 187 +; CHECK-NEXT: xvori.b $xr3, $xr2, 0 +; CHECK-NEXT: xvpermi.q $xr3, $xr0, 48 +; CHECK-NEXT: xvextrins.b $xr0, $xr3, 204 +; CHECK-NEXT: xvori.b $xr3, $xr1, 0 +; CHECK-NEXT: xvpermi.q $xr3, $xr0, 48 +; CHECK-NEXT: xvextrins.b $xr0, $xr3, 221 +; CHECK-NEXT: xvpermi.q $xr2, $xr0, 48 +; CHECK-NEXT: xvextrins.b $xr0, $xr2, 238 ; CHECK-NEXT: xvpermi.q $xr1, $xr0, 48 -; CHECK-NEXT: xvextrins.h $xr0, $xr1, 119 +; CHECK-NEXT: xvextrins.b $xr0, $xr1, 255 ; CHECK-NEXT: xvst $xr0, $a0, 0 ; CHECK-NEXT: ret entry: + %ins0 = insertelement <32 x i8> undef, i8 %a0, i32 0 + %ins1 = insertelement <32 x i8> %ins0, i8 %a1, i32 1 + %ins2 = insertelement <32 x i8> %ins1, i8 %a0, i32 2 + %ins3 = insertelement <32 x i8> %ins2, i8 %a1, i32 3 + %ins4 = insertelement <32 x i8> %ins3, i8 %a0, i32 4 + %ins5 = insertelement <32 x i8> %ins4, i8 %a1, i32 5 + %ins6 = insertelement <32 x i8> %ins5, i8 %a0, i32 6 + %ins7 = insertelement <32 x i8> %ins6, i8 %a1, i32 7 + %ins8 = insertelement <32 x i8> %ins7, i8 %a0, i32 8 + %ins9 = insertelement <32 x i8> %ins8, i8 %a1, i32 9 + %ins10 = insertelement <32 x i8> %ins9, i8 %a0, i32 10 + %ins11 = insertelement <32 x i8> %ins10, i8 %a1, i32 11 + %ins12 = insertelement <32 x i8> %ins11, i8 %a0, i32 12 + %ins13 = insertelement <32 x i8> %ins12, i8 %a1, i32 13 + %ins14 = insertelement <32 x i8> %ins13, i8 %a0, i32 14 + %ins15 = insertelement <32 x i8> %ins14, i8 %a1, i32 15 + %ins16 = insertelement <32 x i8> %ins15, i8 %a0, i32 16 + %ins17 = insertelement <32 x i8> %ins16, i8 %a1, i32 17 + %ins18 = insertelement <32 x i8> %ins17, i8 %a0, i32 18 + %ins19 = insertelement <32 x i8> %ins18, i8 %a1, i32 19 + %ins20 = insertelement <32 x i8> %ins19, i8 %a0, i32 20 + %ins21 = insertelement <32 x i8> %ins20, i8 %a1, i32 21 + %ins22 = insertelement <32 x i8> %ins21, i8 %a0, i32 22 + %ins23 = insertelement <32 x i8> %ins22, i8 %a1, i32 23 + %ins24 = insertelement <32 x i8> %ins23, i8 %a0, i32 24 + %ins25 = insertelement <32 x i8> %ins24, i8 %a1, i32 25 + %ins26 = insertelement <32 x i8> %ins25, i8 %a0, i32 26 + %ins27 = insertelement <32 x i8> %ins26, i8 %a1, i32 27 + %ins28 = insertelement <32 x i8> %ins27, i8 %a0, i32 28 + %ins29 = insertelement <32 x i8> %ins28, i8 %a1, i32 29 + %ins30 = insertelement <32 x i8> %ins29, i8 %a0, i32 30 + %ins31 = insertelement <32 x i8> %ins30, i8 %a1, i32 31 + store <32 x i8> %ins31, ptr %dst + ret void +} + +define void @buildvector_v16i16(ptr %dst, i16 %a0, i16 %a1, i16 %a2, i16 %a3, i16 %a4, i16 %a5, i16 %a6, i16 %a7, i16 %a8, i16 %a9, i16 %a10, i16 %a11, i16 %a12, i16 %a13, i16 %a14, i16 %a15) nounwind { +; LA32-LABEL: buildvector_v16i16: +; LA32: # %bb.0: # %entry +; LA32-NEXT: ld.h $t0, $sp, 32 +; LA32-NEXT: ld.h $t1, $sp, 28 +; LA32-NEXT: ld.h $t2, $sp, 24 +; LA32-NEXT: ld.h $t3, $sp, 20 +; LA32-NEXT: ld.h $t4, $sp, 16 +; LA32-NEXT: ld.h $t5, $sp, 12 +; LA32-NEXT: ld.h $t6, $sp, 8 +; LA32-NEXT: ld.h $t7, $sp, 4 +; LA32-NEXT: ld.h $t8, $sp, 0 +; LA32-NEXT: vinsgr2vr.h $vr0, $a1, 0 +; LA32-NEXT: xvreplgr2vr.h $xr1, $a2 +; LA32-NEXT: xvpermi.q $xr1, $xr0, 18 +; LA32-NEXT: xvextrins.h $xr0, $xr1, 17 +; LA32-NEXT: xvreplgr2vr.h $xr1, $a3 +; LA32-NEXT: xvpermi.q $xr1, $xr0, 18 +; LA32-NEXT: xvextrins.h $xr0, $xr1, 34 +; LA32-NEXT: xvreplgr2vr.h $xr1, $a4 +; LA32-NEXT: xvpermi.q $xr1, $xr0, 18 +; LA32-NEXT: xvextrins.h $xr0, $xr1, 51 +; LA32-NEXT: xvreplgr2vr.h $xr1, $a5 +; LA32-NEXT: xvpermi.q $xr1, $xr0, 18 +; LA32-NEXT: xvextrins.h $xr0, $xr1, 68 +; LA32-NEXT: xvreplgr2vr.h $xr1, $a6 +; LA32-NEXT: xvpermi.q $xr1, $xr0, 18 +; LA32-NEXT: xvextrins.h $xr0, $xr1, 85 +; LA32-NEXT: xvreplgr2vr.h $xr1, $a7 +; LA32-NEXT: xvpermi.q $xr1, $xr0, 18 +; LA32-NEXT: xvextrins.h $xr0, $xr1, 102 +; LA32-NEXT: xvreplgr2vr.h $xr1, $t8 +; LA32-NEXT: xvpermi.q $xr1, $xr0, 18 +; LA32-NEXT: xvextrins.h $xr0, $xr1, 119 +; LA32-NEXT: xvreplgr2vr.h $xr1, $t7 +; LA32-NEXT: xvpermi.q $xr1, $xr0, 48 +; LA32-NEXT: xvextrins.h $xr0, $xr1, 0 +; LA32-NEXT: xvreplgr2vr.h $xr1, $t6 +; LA32-NEXT: xvpermi.q $xr1, $xr0, 48 +; LA32-NEXT: xvextrins.h $xr0, $xr1, 17 +; LA32-NEXT: xvreplgr2vr.h $xr1, $t5 +; LA32-NEXT: xvpermi.q $xr1, $xr0, 48 +; LA32-NEXT: xvextrins.h $xr0, $xr1, 34 +; LA32-NEXT: xvreplgr2vr.h $xr1, $t4 +; LA32-NEXT: xvpermi.q $xr1, $xr0, 48 +; LA32-NEXT: xvextrins.h $xr0, $xr1, 51 +; LA32-NEXT: xvreplgr2vr.h $xr1, $t3 +; LA32-NEXT: xvpermi.q $xr1, $xr0, 48 +; LA32-NEXT: xvextrins.h $xr0, $xr1, 68 +; LA32-NEXT: xvreplgr2vr.h $xr1, $t2 +; LA32-NEXT: xvpermi.q $xr1, $xr0, 48 +; LA32-NEXT: xvextrins.h $xr0, $xr1, 85 +; LA32-NEXT: xvreplgr2vr.h $xr1, $t1 +; LA32-NEXT: xvpermi.q $xr1, $xr0, 48 +; LA32-NEXT: xvextrins.h $xr0, $xr1, 102 +; LA32-NEXT: xvreplgr2vr.h $xr1, $t0 +; LA32-NEXT: xvpermi.q $xr1, $xr0, 48 +; LA32-NEXT: xvextrins.h $xr0, $xr1, 119 +; LA32-NEXT: xvst $xr0, $a0, 0 +; LA32-NEXT: ret +; +; LA64-LABEL: buildvector_v16i16: +; LA64: # %bb.0: # %entry +; LA64-NEXT: ld.h $t0, $sp, 64 +; LA64-NEXT: ld.h $t1, $sp, 56 +; LA64-NEXT: ld.h $t2, $sp, 48 +; LA64-NEXT: ld.h $t3, $sp, 40 +; LA64-NEXT: ld.h $t4, $sp, 32 +; LA64-NEXT: ld.h $t5, $sp, 24 +; LA64-NEXT: ld.h $t6, $sp, 16 +; LA64-NEXT: ld.h $t7, $sp, 8 +; LA64-NEXT: ld.h $t8, $sp, 0 +; LA64-NEXT: vinsgr2vr.h $vr0, $a1, 0 +; LA64-NEXT: xvreplgr2vr.h $xr1, $a2 +; LA64-NEXT: xvpermi.q $xr1, $xr0, 18 +; LA64-NEXT: xvextrins.h $xr0, $xr1, 17 +; LA64-NEXT: xvreplgr2vr.h $xr1, $a3 +; LA64-NEXT: xvpermi.q $xr1, $xr0, 18 +; LA64-NEXT: xvextrins.h $xr0, $xr1, 34 +; LA64-NEXT: xvreplgr2vr.h $xr1, $a4 +; LA64-NEXT: xvpermi.q $xr1, $xr0, 18 +; LA64-NEXT: xvextrins.h $xr0, $xr1, 51 +; LA64-NEXT: xvreplgr2vr.h $xr1, $a5 +; LA64-NEXT: xvpermi.q $xr1, $xr0, 18 +; LA64-NEXT: xvextrins.h $xr0, $xr1, 68 +; LA64-NEXT: xvreplgr2vr.h $xr1, $a6 +; LA64-NEXT: xvpermi.q $xr1, $xr0, 18 +; LA64-NEXT: xvextrins.h $xr0, $xr1, 85 +; LA64-NEXT: xvreplgr2vr.h $xr1, $a7 +; LA64-NEXT: xvpermi.q $xr1, $xr0, 18 +; LA64-NEXT: xvextrins.h $xr0, $xr1, 102 +; LA64-NEXT: xvreplgr2vr.h $xr1, $t8 +; LA64-NEXT: xvpermi.q $xr1, $xr0, 18 +; LA64-NEXT: xvextrins.h $xr0, $xr1, 119 +; LA64-NEXT: xvreplgr2vr.h $xr1, $t7 +; LA64-NEXT: xvpermi.q $xr1, $xr0, 48 +; LA64-NEXT: xvextrins.h $xr0, $xr1, 0 +; LA64-NEXT: xvreplgr2vr.h $xr1, $t6 +; LA64-NEXT: xvpermi.q $xr1, $xr0, 48 +; LA64-NEXT: xvextrins.h $xr0, $xr1, 17 +; LA64-NEXT: xvreplgr2vr.h $xr1, $t5 +; LA64-NEXT: xvpermi.q $xr1, $xr0, 48 +; LA64-NEXT: xvextrins.h $xr0, $xr1, 34 +; LA64-NEXT: xvreplgr2vr.h $xr1, $t4 +; LA64-NEXT: xvpermi.q $xr1, $xr0, 48 +; LA64-NEXT: xvextrins.h $xr0, $xr1, 51 +; LA64-NEXT: xvreplgr2vr.h $xr1, $t3 +; LA64-NEXT: xvpermi.q $xr1, $xr0, 48 +; LA64-NEXT: xvextrins.h $xr0, $xr1, 68 +; LA64-NEXT: xvreplgr2vr.h $xr1, $t2 +; LA64-NEXT: xvpermi.q $xr1, $xr0, 48 +; LA64-NEXT: xvextrins.h $xr0, $xr1, 85 +; LA64-NEXT: xvreplgr2vr.h $xr1, $t1 +; LA64-NEXT: xvpermi.q $xr1, $xr0, 48 +; LA64-NEXT: xvextrins.h $xr0, $xr1, 102 +; LA64-NEXT: xvreplgr2vr.h $xr1, $t0 +; LA64-NEXT: xvpermi.q $xr1, $xr0, 48 +; LA64-NEXT: xvextrins.h $xr0, $xr1, 119 +; LA64-NEXT: xvst $xr0, $a0, 0 +; LA64-NEXT: ret +entry: %ins0 = insertelement <16 x i16> undef, i16 %a0, i32 0 %ins1 = insertelement <16 x i16> %ins0, i16 %a1, i32 1 %ins2 = insertelement <16 x i16> %ins1, i16 %a2, i32 2 @@ -763,6 +1763,223 @@ entry: ret void } +define void @buildvector_v16i16_subseq_2(ptr %dst, i16 %a0, i16 %a1, i16 %a2, i16 %a3, i16 %a4, i16 %a5, i16 %a6, i16 %a7) nounwind { +; CHECK-LABEL: buildvector_v16i16_subseq_2: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: ld.h $t0, $sp, 0 +; CHECK-NEXT: xvreplgr2vr.h $xr1, $a2 +; CHECK-NEXT: vinsgr2vr.h $vr0, $a1, 0 +; CHECK-NEXT: xvori.b $xr2, $xr1, 0 +; CHECK-NEXT: xvpermi.q $xr2, $xr0, 18 +; CHECK-NEXT: xvreplgr2vr.h $xr3, $a3 +; CHECK-NEXT: xvextrins.h $xr0, $xr2, 17 +; CHECK-NEXT: xvori.b $xr2, $xr3, 0 +; CHECK-NEXT: xvpermi.q $xr2, $xr0, 18 +; CHECK-NEXT: xvreplgr2vr.h $xr4, $a4 +; CHECK-NEXT: xvextrins.h $xr0, $xr2, 34 +; CHECK-NEXT: xvori.b $xr2, $xr4, 0 +; CHECK-NEXT: xvpermi.q $xr2, $xr0, 18 +; CHECK-NEXT: xvreplgr2vr.h $xr5, $a5 +; CHECK-NEXT: xvextrins.h $xr0, $xr2, 51 +; CHECK-NEXT: xvori.b $xr2, $xr5, 0 +; CHECK-NEXT: xvpermi.q $xr2, $xr0, 18 +; CHECK-NEXT: xvreplgr2vr.h $xr6, $a6 +; CHECK-NEXT: xvextrins.h $xr0, $xr2, 68 +; CHECK-NEXT: xvori.b $xr2, $xr6, 0 +; CHECK-NEXT: xvpermi.q $xr2, $xr0, 18 +; CHECK-NEXT: xvreplgr2vr.h $xr7, $a7 +; CHECK-NEXT: xvextrins.h $xr0, $xr2, 85 +; CHECK-NEXT: xvori.b $xr2, $xr7, 0 +; CHECK-NEXT: xvpermi.q $xr2, $xr0, 18 +; CHECK-NEXT: xvreplgr2vr.h $xr8, $t0 +; CHECK-NEXT: xvextrins.h $xr0, $xr2, 102 +; CHECK-NEXT: xvori.b $xr2, $xr8, 0 +; CHECK-NEXT: xvpermi.q $xr2, $xr0, 18 +; CHECK-NEXT: xvextrins.h $xr0, $xr2, 119 +; CHECK-NEXT: xvreplgr2vr.h $xr2, $a1 +; CHECK-NEXT: xvpermi.q $xr2, $xr0, 48 +; CHECK-NEXT: xvextrins.h $xr0, $xr2, 0 +; CHECK-NEXT: xvpermi.q $xr1, $xr0, 48 +; CHECK-NEXT: xvextrins.h $xr0, $xr1, 17 +; CHECK-NEXT: xvpermi.q $xr3, $xr0, 48 +; CHECK-NEXT: xvextrins.h $xr0, $xr3, 34 +; CHECK-NEXT: xvpermi.q $xr4, $xr0, 48 +; CHECK-NEXT: xvextrins.h $xr0, $xr4, 51 +; CHECK-NEXT: xvpermi.q $xr5, $xr0, 48 +; CHECK-NEXT: xvextrins.h $xr0, $xr5, 68 +; CHECK-NEXT: xvpermi.q $xr6, $xr0, 48 +; CHECK-NEXT: xvextrins.h $xr0, $xr6, 85 +; CHECK-NEXT: xvpermi.q $xr7, $xr0, 48 +; CHECK-NEXT: xvextrins.h $xr0, $xr7, 102 +; CHECK-NEXT: xvpermi.q $xr8, $xr0, 48 +; CHECK-NEXT: xvextrins.h $xr0, $xr8, 119 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %ins0 = insertelement <16 x i16> undef, i16 %a0, i32 0 + %ins1 = insertelement <16 x i16> %ins0, i16 %a1, i32 1 + %ins2 = insertelement <16 x i16> %ins1, i16 %a2, i32 2 + %ins3 = insertelement <16 x i16> %ins2, i16 %a3, i32 3 + %ins4 = insertelement <16 x i16> %ins3, i16 %a4, i32 4 + %ins5 = insertelement <16 x i16> %ins4, i16 %a5, i32 5 + %ins6 = insertelement <16 x i16> %ins5, i16 %a6, i32 6 + %ins7 = insertelement <16 x i16> %ins6, i16 %a7, i32 7 + %ins8 = insertelement <16 x i16> %ins7, i16 %a0, i32 8 + %ins9 = insertelement <16 x i16> %ins8, i16 %a1, i32 9 + %ins10 = insertelement <16 x i16> %ins9, i16 %a2, i32 10 + %ins11 = insertelement <16 x i16> %ins10, i16 %a3, i32 11 + %ins12 = insertelement <16 x i16> %ins11, i16 %a4, i32 12 + %ins13 = insertelement <16 x i16> %ins12, i16 %a5, i32 13 + %ins14 = insertelement <16 x i16> %ins13, i16 %a6, i32 14 + %ins15 = insertelement <16 x i16> %ins14, i16 %a7, i32 15 + store <16 x i16> %ins15, ptr %dst + ret void +} + +define void @buildvector_v16i16_subseq_4(ptr %dst, i16 %a0, i16 %a1, i16 %a2, i16 %a3) nounwind { +; CHECK-LABEL: buildvector_v16i16_subseq_4: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvreplgr2vr.h $xr1, $a2 +; CHECK-NEXT: vinsgr2vr.h $vr0, $a1, 0 +; CHECK-NEXT: xvori.b $xr2, $xr1, 0 +; CHECK-NEXT: xvpermi.q $xr2, $xr0, 18 +; CHECK-NEXT: xvreplgr2vr.h $xr3, $a3 +; CHECK-NEXT: xvextrins.h $xr0, $xr2, 17 +; CHECK-NEXT: xvori.b $xr2, $xr3, 0 +; CHECK-NEXT: xvpermi.q $xr2, $xr0, 18 +; CHECK-NEXT: xvreplgr2vr.h $xr4, $a4 +; CHECK-NEXT: xvextrins.h $xr0, $xr2, 34 +; CHECK-NEXT: xvori.b $xr2, $xr4, 0 +; CHECK-NEXT: xvpermi.q $xr2, $xr0, 18 +; CHECK-NEXT: xvreplgr2vr.h $xr5, $a1 +; CHECK-NEXT: xvextrins.h $xr0, $xr2, 51 +; CHECK-NEXT: xvori.b $xr2, $xr5, 0 +; CHECK-NEXT: xvpermi.q $xr2, $xr0, 18 +; CHECK-NEXT: xvextrins.h $xr0, $xr2, 68 +; CHECK-NEXT: xvori.b $xr2, $xr1, 0 +; CHECK-NEXT: xvpermi.q $xr2, $xr0, 18 +; CHECK-NEXT: xvextrins.h $xr0, $xr2, 85 +; CHECK-NEXT: xvori.b $xr2, $xr3, 0 +; CHECK-NEXT: xvpermi.q $xr2, $xr0, 18 +; CHECK-NEXT: xvextrins.h $xr0, $xr2, 102 +; CHECK-NEXT: xvori.b $xr2, $xr4, 0 +; CHECK-NEXT: xvpermi.q $xr2, $xr0, 18 +; CHECK-NEXT: xvextrins.h $xr0, $xr2, 119 +; CHECK-NEXT: xvori.b $xr2, $xr5, 0 +; CHECK-NEXT: xvpermi.q $xr2, $xr0, 48 +; CHECK-NEXT: xvextrins.h $xr0, $xr2, 0 +; CHECK-NEXT: xvori.b $xr2, $xr1, 0 +; CHECK-NEXT: xvpermi.q $xr2, $xr0, 48 +; CHECK-NEXT: xvextrins.h $xr0, $xr2, 17 +; CHECK-NEXT: xvori.b $xr2, $xr3, 0 +; CHECK-NEXT: xvpermi.q $xr2, $xr0, 48 +; CHECK-NEXT: xvextrins.h $xr0, $xr2, 34 +; CHECK-NEXT: xvori.b $xr2, $xr4, 0 +; CHECK-NEXT: xvpermi.q $xr2, $xr0, 48 +; CHECK-NEXT: xvextrins.h $xr0, $xr2, 51 +; CHECK-NEXT: xvpermi.q $xr5, $xr0, 48 +; CHECK-NEXT: xvextrins.h $xr0, $xr5, 68 +; CHECK-NEXT: xvpermi.q $xr1, $xr0, 48 +; CHECK-NEXT: xvextrins.h $xr0, $xr1, 85 +; CHECK-NEXT: xvpermi.q $xr3, $xr0, 48 +; CHECK-NEXT: xvextrins.h $xr0, $xr3, 102 +; CHECK-NEXT: xvpermi.q $xr4, $xr0, 48 +; CHECK-NEXT: xvextrins.h $xr0, $xr4, 119 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %ins0 = insertelement <16 x i16> undef, i16 %a0, i32 0 + %ins1 = insertelement <16 x i16> %ins0, i16 %a1, i32 1 + %ins2 = insertelement <16 x i16> %ins1, i16 %a2, i32 2 + %ins3 = insertelement <16 x i16> %ins2, i16 %a3, i32 3 + %ins4 = insertelement <16 x i16> %ins3, i16 %a0, i32 4 + %ins5 = insertelement <16 x i16> %ins4, i16 %a1, i32 5 + %ins6 = insertelement <16 x i16> %ins5, i16 %a2, i32 6 + %ins7 = insertelement <16 x i16> %ins6, i16 %a3, i32 7 + %ins8 = insertelement <16 x i16> %ins7, i16 %a0, i32 8 + %ins9 = insertelement <16 x i16> %ins8, i16 %a1, i32 9 + %ins10 = insertelement <16 x i16> %ins9, i16 %a2, i32 10 + %ins11 = insertelement <16 x i16> %ins10, i16 %a3, i32 11 + %ins12 = insertelement <16 x i16> %ins11, i16 %a0, i32 12 + %ins13 = insertelement <16 x i16> %ins12, i16 %a1, i32 13 + %ins14 = insertelement <16 x i16> %ins13, i16 %a2, i32 14 + %ins15 = insertelement <16 x i16> %ins14, i16 %a3, i32 15 + store <16 x i16> %ins15, ptr %dst + ret void +} + +define void @buildvector_v16i16_subseq_8(ptr %dst, i16 %a0, i16 %a1) nounwind { +; CHECK-LABEL: buildvector_v16i16_subseq_8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvreplgr2vr.h $xr0, $a2 +; CHECK-NEXT: vinsgr2vr.h $vr1, $a1, 0 +; CHECK-NEXT: xvori.b $xr2, $xr0, 0 +; CHECK-NEXT: xvpermi.q $xr2, $xr1, 18 +; CHECK-NEXT: xvreplgr2vr.h $xr3, $a1 +; CHECK-NEXT: xvextrins.h $xr1, $xr2, 17 +; CHECK-NEXT: xvori.b $xr2, $xr3, 0 +; CHECK-NEXT: xvpermi.q $xr2, $xr1, 18 +; CHECK-NEXT: xvextrins.h $xr1, $xr2, 34 +; CHECK-NEXT: xvori.b $xr2, $xr0, 0 +; CHECK-NEXT: xvpermi.q $xr2, $xr1, 18 +; CHECK-NEXT: xvextrins.h $xr1, $xr2, 51 +; CHECK-NEXT: xvori.b $xr2, $xr3, 0 +; CHECK-NEXT: xvpermi.q $xr2, $xr1, 18 +; CHECK-NEXT: xvextrins.h $xr1, $xr2, 68 +; CHECK-NEXT: xvori.b $xr2, $xr0, 0 +; CHECK-NEXT: xvpermi.q $xr2, $xr1, 18 +; CHECK-NEXT: xvextrins.h $xr1, $xr2, 85 +; CHECK-NEXT: xvori.b $xr2, $xr3, 0 +; CHECK-NEXT: xvpermi.q $xr2, $xr1, 18 +; CHECK-NEXT: xvextrins.h $xr1, $xr2, 102 +; CHECK-NEXT: xvori.b $xr2, $xr0, 0 +; CHECK-NEXT: xvpermi.q $xr2, $xr1, 18 +; CHECK-NEXT: xvextrins.h $xr1, $xr2, 119 +; CHECK-NEXT: xvori.b $xr2, $xr3, 0 +; CHECK-NEXT: xvpermi.q $xr2, $xr1, 48 +; CHECK-NEXT: xvextrins.h $xr1, $xr2, 0 +; CHECK-NEXT: xvori.b $xr2, $xr0, 0 +; CHECK-NEXT: xvpermi.q $xr2, $xr1, 48 +; CHECK-NEXT: xvextrins.h $xr1, $xr2, 17 +; CHECK-NEXT: xvori.b $xr2, $xr3, 0 +; CHECK-NEXT: xvpermi.q $xr2, $xr1, 48 +; CHECK-NEXT: xvextrins.h $xr1, $xr2, 34 +; CHECK-NEXT: xvori.b $xr2, $xr0, 0 +; CHECK-NEXT: xvpermi.q $xr2, $xr1, 48 +; CHECK-NEXT: xvextrins.h $xr1, $xr2, 51 +; CHECK-NEXT: xvori.b $xr2, $xr3, 0 +; CHECK-NEXT: xvpermi.q $xr2, $xr1, 48 +; CHECK-NEXT: xvextrins.h $xr1, $xr2, 68 +; CHECK-NEXT: xvori.b $xr2, $xr0, 0 +; CHECK-NEXT: xvpermi.q $xr2, $xr1, 48 +; CHECK-NEXT: xvextrins.h $xr1, $xr2, 85 +; CHECK-NEXT: xvpermi.q $xr3, $xr1, 48 +; CHECK-NEXT: xvextrins.h $xr1, $xr3, 102 +; CHECK-NEXT: xvpermi.q $xr0, $xr1, 48 +; CHECK-NEXT: xvextrins.h $xr1, $xr0, 119 +; CHECK-NEXT: xvst $xr1, $a0, 0 +; CHECK-NEXT: ret +entry: + %ins0 = insertelement <16 x i16> undef, i16 %a0, i32 0 + %ins1 = insertelement <16 x i16> %ins0, i16 %a1, i32 1 + %ins2 = insertelement <16 x i16> %ins1, i16 %a0, i32 2 + %ins3 = insertelement <16 x i16> %ins2, i16 %a1, i32 3 + %ins4 = insertelement <16 x i16> %ins3, i16 %a0, i32 4 + %ins5 = insertelement <16 x i16> %ins4, i16 %a1, i32 5 + %ins6 = insertelement <16 x i16> %ins5, i16 %a0, i32 6 + %ins7 = insertelement <16 x i16> %ins6, i16 %a1, i32 7 + %ins8 = insertelement <16 x i16> %ins7, i16 %a0, i32 8 + %ins9 = insertelement <16 x i16> %ins8, i16 %a1, i32 9 + %ins10 = insertelement <16 x i16> %ins9, i16 %a0, i32 10 + %ins11 = insertelement <16 x i16> %ins10, i16 %a1, i32 11 + %ins12 = insertelement <16 x i16> %ins11, i16 %a0, i32 12 + %ins13 = insertelement <16 x i16> %ins12, i16 %a1, i32 13 + %ins14 = insertelement <16 x i16> %ins13, i16 %a0, i32 14 + %ins15 = insertelement <16 x i16> %ins14, i16 %a1, i32 15 + store <16 x i16> %ins15, ptr %dst + ret void +} + define void @buildvector_v8i32(ptr %dst, i32 %a0, i32 %a1, i32 %a2, i32 %a3, i32 %a4, i32 %a5, i32 %a6, i32 %a7) nounwind { ; CHECK-LABEL: buildvector_v8i32: ; CHECK: # %bb.0: # %entry @@ -835,16 +2052,82 @@ entry: ret void } -define void @buildvector_v4i64(ptr %dst, i64 %a0, i64 %a1, i64 %a2, i64 %a3) nounwind { -; CHECK-LABEL: buildvector_v4i64: +define void @buildvector_v8i32_subseq_2(ptr %dst, i32 %a0, i32 %a1, i32 %a2, i32 %a3) nounwind { +; CHECK-LABEL: buildvector_v8i32_subseq_2: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvinsgr2vr.w $xr0, $a1, 0 +; CHECK-NEXT: xvinsgr2vr.w $xr0, $a2, 1 +; CHECK-NEXT: xvinsgr2vr.w $xr0, $a3, 2 +; CHECK-NEXT: xvinsgr2vr.w $xr0, $a4, 3 +; CHECK-NEXT: xvinsgr2vr.w $xr0, $a1, 4 +; CHECK-NEXT: xvinsgr2vr.w $xr0, $a2, 5 +; CHECK-NEXT: xvinsgr2vr.w $xr0, $a3, 6 +; CHECK-NEXT: xvinsgr2vr.w $xr0, $a4, 7 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %ins0 = insertelement <8 x i32> undef, i32 %a0, i32 0 + %ins1 = insertelement <8 x i32> %ins0, i32 %a1, i32 1 + %ins2 = insertelement <8 x i32> %ins1, i32 %a2, i32 2 + %ins3 = insertelement <8 x i32> %ins2, i32 %a3, i32 3 + %ins4 = insertelement <8 x i32> %ins3, i32 %a0, i32 4 + %ins5 = insertelement <8 x i32> %ins4, i32 %a1, i32 5 + %ins6 = insertelement <8 x i32> %ins5, i32 %a2, i32 6 + %ins7 = insertelement <8 x i32> %ins6, i32 %a3, i32 7 + store <8 x i32> %ins7, ptr %dst + ret void +} + +define void @buildvector_v8i32_subseq_4(ptr %dst, i32 %a0, i32 %a1) nounwind { +; CHECK-LABEL: buildvector_v8i32_subseq_4: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: xvinsgr2vr.d $xr0, $a1, 0 -; CHECK-NEXT: xvinsgr2vr.d $xr0, $a2, 1 -; CHECK-NEXT: xvinsgr2vr.d $xr0, $a3, 2 -; CHECK-NEXT: xvinsgr2vr.d $xr0, $a4, 3 +; CHECK-NEXT: xvinsgr2vr.w $xr0, $a1, 0 +; CHECK-NEXT: xvinsgr2vr.w $xr0, $a2, 1 +; CHECK-NEXT: xvinsgr2vr.w $xr0, $a1, 2 +; CHECK-NEXT: xvinsgr2vr.w $xr0, $a2, 3 +; CHECK-NEXT: xvinsgr2vr.w $xr0, $a1, 4 +; CHECK-NEXT: xvinsgr2vr.w $xr0, $a2, 5 +; CHECK-NEXT: xvinsgr2vr.w $xr0, $a1, 6 +; CHECK-NEXT: xvinsgr2vr.w $xr0, $a2, 7 ; CHECK-NEXT: xvst $xr0, $a0, 0 ; CHECK-NEXT: ret entry: + %ins0 = insertelement <8 x i32> undef, i32 %a0, i32 0 + %ins1 = insertelement <8 x i32> %ins0, i32 %a1, i32 1 + %ins2 = insertelement <8 x i32> %ins1, i32 %a0, i32 2 + %ins3 = insertelement <8 x i32> %ins2, i32 %a1, i32 3 + %ins4 = insertelement <8 x i32> %ins3, i32 %a0, i32 4 + %ins5 = insertelement <8 x i32> %ins4, i32 %a1, i32 5 + %ins6 = insertelement <8 x i32> %ins5, i32 %a0, i32 6 + %ins7 = insertelement <8 x i32> %ins6, i32 %a1, i32 7 + store <8 x i32> %ins7, ptr %dst + ret void +} + +define void @buildvector_v4i64(ptr %dst, i64 %a0, i64 %a1, i64 %a2, i64 %a3) nounwind { +; LA32-LABEL: buildvector_v4i64: +; LA32: # %bb.0: # %entry +; LA32-NEXT: ld.w $t0, $sp, 0 +; LA32-NEXT: xvinsgr2vr.w $xr0, $a1, 0 +; LA32-NEXT: xvinsgr2vr.w $xr0, $a2, 1 +; LA32-NEXT: xvinsgr2vr.w $xr0, $a3, 2 +; LA32-NEXT: xvinsgr2vr.w $xr0, $a4, 3 +; LA32-NEXT: xvinsgr2vr.w $xr0, $a5, 4 +; LA32-NEXT: xvinsgr2vr.w $xr0, $a6, 5 +; LA32-NEXT: xvinsgr2vr.w $xr0, $a7, 6 +; LA32-NEXT: xvinsgr2vr.w $xr0, $t0, 7 +; LA32-NEXT: xvst $xr0, $a0, 0 +; LA32-NEXT: ret +; +; LA64-LABEL: buildvector_v4i64: +; LA64: # %bb.0: # %entry +; LA64-NEXT: xvinsgr2vr.d $xr0, $a1, 0 +; LA64-NEXT: xvinsgr2vr.d $xr0, $a2, 1 +; LA64-NEXT: xvinsgr2vr.d $xr0, $a3, 2 +; LA64-NEXT: xvinsgr2vr.d $xr0, $a4, 3 +; LA64-NEXT: xvst $xr0, $a0, 0 +; LA64-NEXT: ret +entry: %ins0 = insertelement <4 x i64> undef, i64 %a0, i32 0 %ins1 = insertelement <4 x i64> %ins0, i64 %a1, i32 1 %ins2 = insertelement <4 x i64> %ins1, i64 %a2, i32 2 @@ -854,12 +2137,21 @@ entry: } define void @buildvector_v4i64_partial(ptr %dst, i64 %a1, i64 %a2) nounwind { -; CHECK-LABEL: buildvector_v4i64_partial: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: xvinsgr2vr.d $xr0, $a1, 1 -; CHECK-NEXT: xvinsgr2vr.d $xr0, $a2, 2 -; CHECK-NEXT: xvst $xr0, $a0, 0 -; CHECK-NEXT: ret +; LA32-LABEL: buildvector_v4i64_partial: +; LA32: # %bb.0: # %entry +; LA32-NEXT: xvinsgr2vr.w $xr0, $a1, 2 +; LA32-NEXT: xvinsgr2vr.w $xr0, $a2, 3 +; LA32-NEXT: xvinsgr2vr.w $xr0, $a3, 4 +; LA32-NEXT: xvinsgr2vr.w $xr0, $a4, 5 +; LA32-NEXT: xvst $xr0, $a0, 0 +; LA32-NEXT: ret +; +; LA64-LABEL: buildvector_v4i64_partial: +; LA64: # %bb.0: # %entry +; LA64-NEXT: xvinsgr2vr.d $xr0, $a1, 1 +; LA64-NEXT: xvinsgr2vr.d $xr0, $a2, 2 +; LA64-NEXT: xvst $xr0, $a0, 0 +; LA64-NEXT: ret entry: %ins0 = insertelement <4 x i64> undef, i64 undef, i32 0 %ins1 = insertelement <4 x i64> %ins0, i64 %a1, i32 1 @@ -870,13 +2162,23 @@ entry: } define void @buildvector_v4i64_with_constant(ptr %dst, i64 %a0, i64 %a2) nounwind { -; CHECK-LABEL: buildvector_v4i64_with_constant: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: xvrepli.b $xr0, 0 -; CHECK-NEXT: xvinsgr2vr.d $xr0, $a1, 0 -; CHECK-NEXT: xvinsgr2vr.d $xr0, $a2, 2 -; CHECK-NEXT: xvst $xr0, $a0, 0 -; CHECK-NEXT: ret +; LA32-LABEL: buildvector_v4i64_with_constant: +; LA32: # %bb.0: # %entry +; LA32-NEXT: xvrepli.b $xr0, 0 +; LA32-NEXT: xvinsgr2vr.w $xr0, $a1, 0 +; LA32-NEXT: xvinsgr2vr.w $xr0, $a2, 1 +; LA32-NEXT: xvinsgr2vr.w $xr0, $a3, 4 +; LA32-NEXT: xvinsgr2vr.w $xr0, $a4, 5 +; LA32-NEXT: xvst $xr0, $a0, 0 +; LA32-NEXT: ret +; +; LA64-LABEL: buildvector_v4i64_with_constant: +; LA64: # %bb.0: # %entry +; LA64-NEXT: xvrepli.b $xr0, 0 +; LA64-NEXT: xvinsgr2vr.d $xr0, $a1, 0 +; LA64-NEXT: xvinsgr2vr.d $xr0, $a2, 2 +; LA64-NEXT: xvst $xr0, $a0, 0 +; LA64-NEXT: ret entry: %ins0 = insertelement <4 x i64> undef, i64 %a0, i32 0 %ins1 = insertelement <4 x i64> %ins0, i64 0, i32 1 @@ -886,6 +2188,37 @@ entry: ret void } +define void @buildvector_v4i64_subseq_2(ptr %dst, i64 %a0, i64 %a1) nounwind { +; LA32-LABEL: buildvector_v4i64_subseq_2: +; LA32: # %bb.0: # %entry +; LA32-NEXT: xvinsgr2vr.w $xr0, $a1, 0 +; LA32-NEXT: xvinsgr2vr.w $xr0, $a2, 1 +; LA32-NEXT: xvinsgr2vr.w $xr0, $a3, 2 +; LA32-NEXT: xvinsgr2vr.w $xr0, $a4, 3 +; LA32-NEXT: xvinsgr2vr.w $xr0, $a1, 4 +; LA32-NEXT: xvinsgr2vr.w $xr0, $a2, 5 +; LA32-NEXT: xvinsgr2vr.w $xr0, $a3, 6 +; LA32-NEXT: xvinsgr2vr.w $xr0, $a4, 7 +; LA32-NEXT: xvst $xr0, $a0, 0 +; LA32-NEXT: ret +; +; LA64-LABEL: buildvector_v4i64_subseq_2: +; LA64: # %bb.0: # %entry +; LA64-NEXT: xvinsgr2vr.d $xr0, $a1, 0 +; LA64-NEXT: xvinsgr2vr.d $xr0, $a2, 1 +; LA64-NEXT: xvinsgr2vr.d $xr0, $a1, 2 +; LA64-NEXT: xvinsgr2vr.d $xr0, $a2, 3 +; LA64-NEXT: xvst $xr0, $a0, 0 +; LA64-NEXT: ret +entry: + %ins0 = insertelement <4 x i64> undef, i64 %a0, i32 0 + %ins1 = insertelement <4 x i64> %ins0, i64 %a1, i32 1 + %ins2 = insertelement <4 x i64> %ins1, i64 %a0, i32 2 + %ins3 = insertelement <4 x i64> %ins2, i64 %a1, i32 3 + store <4 x i64> %ins3, ptr %dst + ret void +} + define void @buildvector_v8f32(ptr %dst, float %a0, float %a1, float %a2, float %a3, float %a4, float %a5, float %a6, float %a7) nounwind { ; CHECK-LABEL: buildvector_v8f32: ; CHECK: # %bb.0: # %entry @@ -973,6 +2306,64 @@ entry: ret void } +define void @buildvector_v8f32_subseq_2(ptr %dst, float %a0, float %a1, float %a2, float %a3) nounwind { +; CHECK-LABEL: buildvector_v8f32_subseq_2: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: # kill: def $f3 killed $f3 def $xr3 +; CHECK-NEXT: # kill: def $f2 killed $f2 def $xr2 +; CHECK-NEXT: # kill: def $f1 killed $f1 def $xr1 +; CHECK-NEXT: # kill: def $f0 killed $f0 def $xr0 +; CHECK-NEXT: xvori.b $xr4, $xr0, 0 +; CHECK-NEXT: xvinsve0.w $xr4, $xr1, 1 +; CHECK-NEXT: xvinsve0.w $xr4, $xr2, 2 +; CHECK-NEXT: xvinsve0.w $xr4, $xr3, 3 +; CHECK-NEXT: xvinsve0.w $xr4, $xr0, 4 +; CHECK-NEXT: xvinsve0.w $xr4, $xr1, 5 +; CHECK-NEXT: xvinsve0.w $xr4, $xr2, 6 +; CHECK-NEXT: xvinsve0.w $xr4, $xr3, 7 +; CHECK-NEXT: xvst $xr4, $a0, 0 +; CHECK-NEXT: ret +entry: + %ins0 = insertelement <8 x float> undef, float %a0, i32 0 + %ins1 = insertelement <8 x float> %ins0, float %a1, i32 1 + %ins2 = insertelement <8 x float> %ins1, float %a2, i32 2 + %ins3 = insertelement <8 x float> %ins2, float %a3, i32 3 + %ins4 = insertelement <8 x float> %ins3, float %a0, i32 4 + %ins5 = insertelement <8 x float> %ins4, float %a1, i32 5 + %ins6 = insertelement <8 x float> %ins5, float %a2, i32 6 + %ins7 = insertelement <8 x float> %ins6, float %a3, i32 7 + store <8 x float> %ins7, ptr %dst + ret void +} + +define void @buildvector_v8f32_subseq_4(ptr %dst, float %a0, float %a1) nounwind { +; CHECK-LABEL: buildvector_v8f32_subseq_4: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: # kill: def $f1 killed $f1 def $xr1 +; CHECK-NEXT: # kill: def $f0 killed $f0 def $xr0 +; CHECK-NEXT: xvori.b $xr2, $xr0, 0 +; CHECK-NEXT: xvinsve0.w $xr2, $xr1, 1 +; CHECK-NEXT: xvinsve0.w $xr2, $xr0, 2 +; CHECK-NEXT: xvinsve0.w $xr2, $xr1, 3 +; CHECK-NEXT: xvinsve0.w $xr2, $xr0, 4 +; CHECK-NEXT: xvinsve0.w $xr2, $xr1, 5 +; CHECK-NEXT: xvinsve0.w $xr2, $xr0, 6 +; CHECK-NEXT: xvinsve0.w $xr2, $xr1, 7 +; CHECK-NEXT: xvst $xr2, $a0, 0 +; CHECK-NEXT: ret +entry: + %ins0 = insertelement <8 x float> undef, float %a0, i32 0 + %ins1 = insertelement <8 x float> %ins0, float %a1, i32 1 + %ins2 = insertelement <8 x float> %ins1, float %a0, i32 2 + %ins3 = insertelement <8 x float> %ins2, float %a1, i32 3 + %ins4 = insertelement <8 x float> %ins3, float %a0, i32 4 + %ins5 = insertelement <8 x float> %ins4, float %a1, i32 5 + %ins6 = insertelement <8 x float> %ins5, float %a0, i32 6 + %ins7 = insertelement <8 x float> %ins6, float %a1, i32 7 + store <8 x float> %ins7, ptr %dst + ret void +} + define void @buildvector_v4f64(ptr %dst, double %a0, double %a1, double %a2, double %a3) nounwind { ; CHECK-LABEL: buildvector_v4f64: ; CHECK: # %bb.0: # %entry @@ -1029,3 +2420,23 @@ entry: store <4 x double> %ins3, ptr %dst ret void } + +define void @buildvector_v4f64_subseq_2(ptr %dst, double %a0, double %a1) nounwind { +; CHECK-LABEL: buildvector_v4f64_subseq_2: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: # kill: def $f1_64 killed $f1_64 def $xr1 +; CHECK-NEXT: # kill: def $f0_64 killed $f0_64 def $xr0 +; CHECK-NEXT: xvori.b $xr2, $xr0, 0 +; CHECK-NEXT: xvinsve0.d $xr2, $xr1, 1 +; CHECK-NEXT: xvinsve0.d $xr2, $xr0, 2 +; CHECK-NEXT: xvinsve0.d $xr2, $xr1, 3 +; CHECK-NEXT: xvst $xr2, $a0, 0 +; CHECK-NEXT: ret +entry: + %ins0 = insertelement <4 x double> undef, double %a0, i32 0 + %ins1 = insertelement <4 x double> %ins0, double %a1, i32 1 + %ins2 = insertelement <4 x double> %ins1, double %a0, i32 2 + %ins3 = insertelement <4 x double> %ins2, double %a1, i32 3 + store <4 x double> %ins3, ptr %dst + ret void +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/concat-vectors.ll b/llvm/test/CodeGen/LoongArch/lasx/concat-vectors.ll index 231e82a6d53a..d1868a949a07 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/concat-vectors.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/concat-vectors.ll @@ -1,4 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s ; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s define <32 x i8> @concat_poison_v32i8_1(<16 x i8> %a) { diff --git a/llvm/test/CodeGen/LoongArch/lasx/ctpop-ctlz.ll b/llvm/test/CodeGen/LoongArch/lasx/ctpop-ctlz.ll index 7786e399c95f..ba2118fb94f6 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/ctpop-ctlz.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/ctpop-ctlz.ll @@ -1,4 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s ; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s define void @ctpop_v32i8(ptr %src, ptr %dst) nounwind { diff --git a/llvm/test/CodeGen/LoongArch/lasx/fdiv-reciprocal-estimate.ll b/llvm/test/CodeGen/LoongArch/lasx/fdiv-reciprocal-estimate.ll index 769d9ef81faf..7514dafa8000 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/fdiv-reciprocal-estimate.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/fdiv-reciprocal-estimate.ll @@ -1,25 +1,27 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 -; RUN: llc --mtriple=loongarch64 --mattr=+lasx,-frecipe < %s | FileCheck %s --check-prefix=FAULT -; RUN: llc --mtriple=loongarch64 --mattr=+lasx,+frecipe < %s | FileCheck %s +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx,-frecipe < %s | FileCheck %s --check-prefixes=FAULT,FAULT-LA32 +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx,+frecipe < %s | FileCheck %s --check-prefixes=CHECK,LA32 +; RUN: llc --mtriple=loongarch64 --mattr=+lasx,-frecipe < %s | FileCheck %s --check-prefixes=FAULT,FAULT-LA64 +; RUN: llc --mtriple=loongarch64 --mattr=+lasx,+frecipe < %s | FileCheck %s --check-prefixes=CHECK,LA64 define void @fdiv_v8f32(ptr %res, ptr %a0, ptr %a1) nounwind { ; FAULT-LABEL: fdiv_v8f32: -; FAULT: # %bb.0: -; FAULT-NEXT: xvld $xr0, $a1, 0 -; FAULT-NEXT: xvld $xr1, $a2, 0 -; FAULT-NEXT: xvfdiv.s $xr0, $xr0, $xr1 -; FAULT-NEXT: xvst $xr0, $a0, 0 +; FAULT: # %bb.0: # %entry +; FAULT-NEXT: xvld $xr0, $a1, 0 +; FAULT-NEXT: xvld $xr1, $a2, 0 +; FAULT-NEXT: xvfdiv.s $xr0, $xr0, $xr1 +; FAULT-NEXT: xvst $xr0, $a0, 0 ; FAULT-NEXT: ret ; ; CHECK-LABEL: fdiv_v8f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: xvld $xr0, $a2, 0 -; CHECK-NEXT: xvld $xr1, $a1, 0 -; CHECK-NEXT: xvfrecipe.s $xr2, $xr0 -; CHECK-NEXT: xvfmul.s $xr3, $xr1, $xr2 -; CHECK-NEXT: xvfnmsub.s $xr0, $xr0, $xr3, $xr1 -; CHECK-NEXT: xvfmadd.s $xr0, $xr2, $xr0, $xr3 -; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: xvld $xr0, $a2, 0 +; CHECK-NEXT: xvld $xr1, $a1, 0 +; CHECK-NEXT: xvfrecipe.s $xr2, $xr0 +; CHECK-NEXT: xvfmul.s $xr3, $xr1, $xr2 +; CHECK-NEXT: xvfnmsub.s $xr0, $xr0, $xr3, $xr1 +; CHECK-NEXT: xvfmadd.s $xr0, $xr2, $xr0, $xr3 +; CHECK-NEXT: xvst $xr0, $a0, 0 ; CHECK-NEXT: ret entry: %v0 = load <8 x float>, ptr %a0 @@ -31,27 +33,42 @@ entry: define void @fdiv_v4f64(ptr %res, ptr %a0, ptr %a1) nounwind { ; FAULT-LABEL: fdiv_v4f64: -; FAULT: # %bb.0: -; FAULT-NEXT: xvld $xr0, $a1, 0 -; FAULT-NEXT: xvld $xr1, $a2, 0 -; FAULT-NEXT: xvfdiv.d $xr0, $xr0, $xr1 -; FAULT-NEXT: xvst $xr0, $a0, 0 +; FAULT: # %bb.0: # %entry +; FAULT-NEXT: xvld $xr0, $a1, 0 +; FAULT-NEXT: xvld $xr1, $a2, 0 +; FAULT-NEXT: xvfdiv.d $xr0, $xr0, $xr1 +; FAULT-NEXT: xvst $xr0, $a0, 0 ; FAULT-NEXT: ret ; -; CHECK-LABEL: fdiv_v4f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: xvld $xr0, $a2, 0 -; CHECK-NEXT: xvld $xr1, $a1, 0 -; CHECK-NEXT: lu52i.d $a1, $zero, -1025 -; CHECK-NEXT: xvreplgr2vr.d $xr2, $a1 -; CHECK-NEXT: xvfrecipe.d $xr3, $xr0 -; CHECK-NEXT: xvfmadd.d $xr2, $xr0, $xr3, $xr2 -; CHECK-NEXT: xvfnmsub.d $xr2, $xr2, $xr3, $xr3 -; CHECK-NEXT: xvfmul.d $xr3, $xr1, $xr2 -; CHECK-NEXT: xvfnmsub.d $xr0, $xr0, $xr3, $xr1 -; CHECK-NEXT: xvfmadd.d $xr0, $xr2, $xr0, $xr3 -; CHECK-NEXT: xvst $xr0, $a0, 0 -; CHECK-NEXT: ret +; LA32-LABEL: fdiv_v4f64: +; LA32: # %bb.0: # %entry +; LA32-NEXT: pcalau12i $a3, %pc_hi20(.LCPI1_0) +; LA32-NEXT: xvld $xr0, $a2, 0 +; LA32-NEXT: xvld $xr1, $a3, %pc_lo12(.LCPI1_0) +; LA32-NEXT: xvld $xr2, $a1, 0 +; LA32-NEXT: xvfrecipe.d $xr3, $xr0 +; LA32-NEXT: xvfmadd.d $xr1, $xr0, $xr3, $xr1 +; LA32-NEXT: xvfnmsub.d $xr1, $xr1, $xr3, $xr3 +; LA32-NEXT: xvfmul.d $xr3, $xr2, $xr1 +; LA32-NEXT: xvfnmsub.d $xr0, $xr0, $xr3, $xr2 +; LA32-NEXT: xvfmadd.d $xr0, $xr1, $xr0, $xr3 +; LA32-NEXT: xvst $xr0, $a0, 0 +; LA32-NEXT: ret +; +; LA64-LABEL: fdiv_v4f64: +; LA64: # %bb.0: # %entry +; LA64-NEXT: xvld $xr0, $a2, 0 +; LA64-NEXT: xvld $xr1, $a1, 0 +; LA64-NEXT: lu52i.d $a1, $zero, -1025 +; LA64-NEXT: xvreplgr2vr.d $xr2, $a1 +; LA64-NEXT: xvfrecipe.d $xr3, $xr0 +; LA64-NEXT: xvfmadd.d $xr2, $xr0, $xr3, $xr2 +; LA64-NEXT: xvfnmsub.d $xr2, $xr2, $xr3, $xr3 +; LA64-NEXT: xvfmul.d $xr3, $xr1, $xr2 +; LA64-NEXT: xvfnmsub.d $xr0, $xr0, $xr3, $xr1 +; LA64-NEXT: xvfmadd.d $xr0, $xr2, $xr0, $xr3 +; LA64-NEXT: xvst $xr0, $a0, 0 +; LA64-NEXT: ret entry: %v0 = load <4 x double>, ptr %a0 %v1 = load <4 x double>, ptr %a1 @@ -63,21 +80,21 @@ entry: ;; 1.0 / vec define void @one_fdiv_v8f32(ptr %res, ptr %a0) nounwind { ; FAULT-LABEL: one_fdiv_v8f32: -; FAULT: # %bb.0: -; FAULT-NEXT: xvld $xr0, $a1, 0 -; FAULT-NEXT: xvfrecip.s $xr0, $xr0 -; FAULT-NEXT: xvst $xr0, $a0, 0 +; FAULT: # %bb.0: # %entry +; FAULT-NEXT: xvld $xr0, $a1, 0 +; FAULT-NEXT: xvfrecip.s $xr0, $xr0 +; FAULT-NEXT: xvst $xr0, $a0, 0 ; FAULT-NEXT: ret ; ; CHECK-LABEL: one_fdiv_v8f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: xvld $xr0, $a1, 0 -; CHECK-NEXT: xvfrecipe.s $xr1, $xr0 -; CHECK-NEXT: lu12i.w $a1, -264192 -; CHECK-NEXT: xvreplgr2vr.w $xr2, $a1 -; CHECK-NEXT: xvfmadd.s $xr0, $xr0, $xr1, $xr2 -; CHECK-NEXT: xvfnmsub.s $xr0, $xr0, $xr1, $xr1 -; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvfrecipe.s $xr1, $xr0 +; CHECK-NEXT: lu12i.w $a1, -264192 +; CHECK-NEXT: xvreplgr2vr.w $xr2, $a1 +; CHECK-NEXT: xvfmadd.s $xr0, $xr0, $xr1, $xr2 +; CHECK-NEXT: xvfnmsub.s $xr0, $xr0, $xr1, $xr1 +; CHECK-NEXT: xvst $xr0, $a0, 0 ; CHECK-NEXT: ret entry: %v0 = load <8 x float>, ptr %a0 @@ -87,25 +104,47 @@ entry: } define void @one_fdiv_v4f64(ptr %res, ptr %a0) nounwind { -; FAULT-LABEL: one_fdiv_v4f64: -; FAULT: # %bb.0: -; FAULT-NEXT: xvld $xr0, $a1, 0 -; FAULT-NEXT: xvfrecip.d $xr0, $xr0 -; FAULT-NEXT: xvst $xr0, $a0, 0 -; FAULT-NEXT: ret +; FAULT-LA32-LABEL: one_fdiv_v4f64: +; FAULT-LA32: # %bb.0: # %entry +; FAULT-LA32-NEXT: xvld $xr0, $a1, 0 +; FAULT-LA32-NEXT: pcalau12i $a1, %pc_hi20(.LCPI3_0) +; FAULT-LA32-NEXT: xvld $xr1, $a1, %pc_lo12(.LCPI3_0) +; FAULT-LA32-NEXT: xvfdiv.d $xr0, $xr1, $xr0 +; FAULT-LA32-NEXT: xvst $xr0, $a0, 0 +; FAULT-LA32-NEXT: ret ; -; CHECK-LABEL: one_fdiv_v4f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: xvld $xr0, $a1, 0 -; CHECK-NEXT: xvfrecipe.d $xr1, $xr0 -; CHECK-NEXT: lu52i.d $a1, $zero, 1023 -; CHECK-NEXT: xvreplgr2vr.d $xr2, $a1 -; CHECK-NEXT: xvfnmsub.d $xr3, $xr0, $xr1, $xr2 -; CHECK-NEXT: xvfmadd.d $xr1, $xr1, $xr3, $xr1 -; CHECK-NEXT: xvfnmsub.d $xr0, $xr0, $xr1, $xr2 -; CHECK-NEXT: xvfmadd.d $xr0, $xr1, $xr0, $xr1 -; CHECK-NEXT: xvst $xr0, $a0, 0 -; CHECK-NEXT: ret +; LA32-LABEL: one_fdiv_v4f64: +; LA32: # %bb.0: # %entry +; LA32-NEXT: xvld $xr0, $a1, 0 +; LA32-NEXT: pcalau12i $a1, %pc_hi20(.LCPI3_0) +; LA32-NEXT: xvld $xr1, $a1, %pc_lo12(.LCPI3_0) +; LA32-NEXT: xvfrecipe.d $xr2, $xr0 +; LA32-NEXT: xvfnmsub.d $xr3, $xr0, $xr2, $xr1 +; LA32-NEXT: xvfmadd.d $xr2, $xr2, $xr3, $xr2 +; LA32-NEXT: xvfnmsub.d $xr0, $xr0, $xr2, $xr1 +; LA32-NEXT: xvfmadd.d $xr0, $xr2, $xr0, $xr2 +; LA32-NEXT: xvst $xr0, $a0, 0 +; LA32-NEXT: ret +; +; FAULT-LA64-LABEL: one_fdiv_v4f64: +; FAULT-LA64: # %bb.0: # %entry +; FAULT-LA64-NEXT: xvld $xr0, $a1, 0 +; FAULT-LA64-NEXT: xvfrecip.d $xr0, $xr0 +; FAULT-LA64-NEXT: xvst $xr0, $a0, 0 +; FAULT-LA64-NEXT: ret +; +; LA64-LABEL: one_fdiv_v4f64: +; LA64: # %bb.0: # %entry +; LA64-NEXT: xvld $xr0, $a1, 0 +; LA64-NEXT: xvfrecipe.d $xr1, $xr0 +; LA64-NEXT: lu52i.d $a1, $zero, 1023 +; LA64-NEXT: xvreplgr2vr.d $xr2, $a1 +; LA64-NEXT: xvfnmsub.d $xr3, $xr0, $xr1, $xr2 +; LA64-NEXT: xvfmadd.d $xr1, $xr1, $xr3, $xr1 +; LA64-NEXT: xvfnmsub.d $xr0, $xr0, $xr1, $xr2 +; LA64-NEXT: xvfmadd.d $xr0, $xr1, $xr0, $xr1 +; LA64-NEXT: xvst $xr0, $a0, 0 +; LA64-NEXT: ret entry: %v0 = load <4 x double>, ptr %a0 %div = fdiv fast <4 x double> <double 1.0, double 1.0, double 1.0, double 1.0>, %v0 diff --git a/llvm/test/CodeGen/LoongArch/lasx/fma-v4f64.ll b/llvm/test/CodeGen/LoongArch/lasx/fma-v4f64.ll index 0f3df3d573b6..8e1ba7ea1601 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/fma-v4f64.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/fma-v4f64.ll @@ -1,40 +1,75 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx --fp-contract=fast < %s \ +; RUN: | FileCheck %s --check-prefix=LA32-CONTRACT-FAST +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx --fp-contract=on < %s \ +; RUN: | FileCheck %s --check-prefix=LA32-CONTRACT-ON +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx --fp-contract=off < %s \ +; RUN: | FileCheck %s --check-prefix=LA32-CONTRACT-OFF ; RUN: llc --mtriple=loongarch64 --mattr=+lasx --fp-contract=fast < %s \ -; RUN: | FileCheck %s --check-prefix=CONTRACT-FAST +; RUN: | FileCheck %s --check-prefix=LA64-CONTRACT-FAST ; RUN: llc --mtriple=loongarch64 --mattr=+lasx --fp-contract=on < %s \ -; RUN: | FileCheck %s --check-prefix=CONTRACT-ON +; RUN: | FileCheck %s --check-prefix=LA64-CONTRACT-ON ; RUN: llc --mtriple=loongarch64 --mattr=+lasx --fp-contract=off < %s \ -; RUN: | FileCheck %s --check-prefix=CONTRACT-OFF +; RUN: | FileCheck %s --check-prefix=LA64-CONTRACT-OFF define void @xvfmadd_d(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { -; CONTRACT-FAST-LABEL: xvfmadd_d: -; CONTRACT-FAST: # %bb.0: # %entry -; CONTRACT-FAST-NEXT: xvld $xr0, $a1, 0 -; CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 -; CONTRACT-FAST-NEXT: xvld $xr2, $a3, 0 -; CONTRACT-FAST-NEXT: xvfmadd.d $xr0, $xr0, $xr1, $xr2 -; CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 -; CONTRACT-FAST-NEXT: ret -; -; CONTRACT-ON-LABEL: xvfmadd_d: -; CONTRACT-ON: # %bb.0: # %entry -; CONTRACT-ON-NEXT: xvld $xr0, $a1, 0 -; CONTRACT-ON-NEXT: xvld $xr1, $a2, 0 -; CONTRACT-ON-NEXT: xvld $xr2, $a3, 0 -; CONTRACT-ON-NEXT: xvfmul.d $xr0, $xr0, $xr1 -; CONTRACT-ON-NEXT: xvfadd.d $xr0, $xr0, $xr2 -; CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 -; CONTRACT-ON-NEXT: ret -; -; CONTRACT-OFF-LABEL: xvfmadd_d: -; CONTRACT-OFF: # %bb.0: # %entry -; CONTRACT-OFF-NEXT: xvld $xr0, $a1, 0 -; CONTRACT-OFF-NEXT: xvld $xr1, $a2, 0 -; CONTRACT-OFF-NEXT: xvld $xr2, $a3, 0 -; CONTRACT-OFF-NEXT: xvfmul.d $xr0, $xr0, $xr1 -; CONTRACT-OFF-NEXT: xvfadd.d $xr0, $xr0, $xr2 -; CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 -; CONTRACT-OFF-NEXT: ret +; LA32-CONTRACT-FAST-LABEL: xvfmadd_d: +; LA32-CONTRACT-FAST: # %bb.0: # %entry +; LA32-CONTRACT-FAST-NEXT: xvld $xr0, $a1, 0 +; LA32-CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 +; LA32-CONTRACT-FAST-NEXT: xvld $xr2, $a3, 0 +; LA32-CONTRACT-FAST-NEXT: xvfmadd.d $xr0, $xr0, $xr1, $xr2 +; LA32-CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 +; LA32-CONTRACT-FAST-NEXT: ret +; +; LA32-CONTRACT-ON-LABEL: xvfmadd_d: +; LA32-CONTRACT-ON: # %bb.0: # %entry +; LA32-CONTRACT-ON-NEXT: xvld $xr0, $a1, 0 +; LA32-CONTRACT-ON-NEXT: xvld $xr1, $a2, 0 +; LA32-CONTRACT-ON-NEXT: xvld $xr2, $a3, 0 +; LA32-CONTRACT-ON-NEXT: xvfmul.d $xr0, $xr0, $xr1 +; LA32-CONTRACT-ON-NEXT: xvfadd.d $xr0, $xr0, $xr2 +; LA32-CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 +; LA32-CONTRACT-ON-NEXT: ret +; +; LA32-CONTRACT-OFF-LABEL: xvfmadd_d: +; LA32-CONTRACT-OFF: # %bb.0: # %entry +; LA32-CONTRACT-OFF-NEXT: xvld $xr0, $a1, 0 +; LA32-CONTRACT-OFF-NEXT: xvld $xr1, $a2, 0 +; LA32-CONTRACT-OFF-NEXT: xvld $xr2, $a3, 0 +; LA32-CONTRACT-OFF-NEXT: xvfmul.d $xr0, $xr0, $xr1 +; LA32-CONTRACT-OFF-NEXT: xvfadd.d $xr0, $xr0, $xr2 +; LA32-CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 +; LA32-CONTRACT-OFF-NEXT: ret +; +; LA64-CONTRACT-FAST-LABEL: xvfmadd_d: +; LA64-CONTRACT-FAST: # %bb.0: # %entry +; LA64-CONTRACT-FAST-NEXT: xvld $xr0, $a1, 0 +; LA64-CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 +; LA64-CONTRACT-FAST-NEXT: xvld $xr2, $a3, 0 +; LA64-CONTRACT-FAST-NEXT: xvfmadd.d $xr0, $xr0, $xr1, $xr2 +; LA64-CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 +; LA64-CONTRACT-FAST-NEXT: ret +; +; LA64-CONTRACT-ON-LABEL: xvfmadd_d: +; LA64-CONTRACT-ON: # %bb.0: # %entry +; LA64-CONTRACT-ON-NEXT: xvld $xr0, $a1, 0 +; LA64-CONTRACT-ON-NEXT: xvld $xr1, $a2, 0 +; LA64-CONTRACT-ON-NEXT: xvld $xr2, $a3, 0 +; LA64-CONTRACT-ON-NEXT: xvfmul.d $xr0, $xr0, $xr1 +; LA64-CONTRACT-ON-NEXT: xvfadd.d $xr0, $xr0, $xr2 +; LA64-CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 +; LA64-CONTRACT-ON-NEXT: ret +; +; LA64-CONTRACT-OFF-LABEL: xvfmadd_d: +; LA64-CONTRACT-OFF: # %bb.0: # %entry +; LA64-CONTRACT-OFF-NEXT: xvld $xr0, $a1, 0 +; LA64-CONTRACT-OFF-NEXT: xvld $xr1, $a2, 0 +; LA64-CONTRACT-OFF-NEXT: xvld $xr2, $a3, 0 +; LA64-CONTRACT-OFF-NEXT: xvfmul.d $xr0, $xr0, $xr1 +; LA64-CONTRACT-OFF-NEXT: xvfadd.d $xr0, $xr0, $xr2 +; LA64-CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 +; LA64-CONTRACT-OFF-NEXT: ret entry: %v0 = load <4 x double>, ptr %a0 %v1 = load <4 x double>, ptr %a1 @@ -46,34 +81,63 @@ entry: } define void @xvfmsub_d(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { -; CONTRACT-FAST-LABEL: xvfmsub_d: -; CONTRACT-FAST: # %bb.0: # %entry -; CONTRACT-FAST-NEXT: xvld $xr0, $a1, 0 -; CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 -; CONTRACT-FAST-NEXT: xvld $xr2, $a3, 0 -; CONTRACT-FAST-NEXT: xvfmsub.d $xr0, $xr0, $xr1, $xr2 -; CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 -; CONTRACT-FAST-NEXT: ret -; -; CONTRACT-ON-LABEL: xvfmsub_d: -; CONTRACT-ON: # %bb.0: # %entry -; CONTRACT-ON-NEXT: xvld $xr0, $a1, 0 -; CONTRACT-ON-NEXT: xvld $xr1, $a2, 0 -; CONTRACT-ON-NEXT: xvld $xr2, $a3, 0 -; CONTRACT-ON-NEXT: xvfmul.d $xr0, $xr0, $xr1 -; CONTRACT-ON-NEXT: xvfsub.d $xr0, $xr0, $xr2 -; CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 -; CONTRACT-ON-NEXT: ret -; -; CONTRACT-OFF-LABEL: xvfmsub_d: -; CONTRACT-OFF: # %bb.0: # %entry -; CONTRACT-OFF-NEXT: xvld $xr0, $a1, 0 -; CONTRACT-OFF-NEXT: xvld $xr1, $a2, 0 -; CONTRACT-OFF-NEXT: xvld $xr2, $a3, 0 -; CONTRACT-OFF-NEXT: xvfmul.d $xr0, $xr0, $xr1 -; CONTRACT-OFF-NEXT: xvfsub.d $xr0, $xr0, $xr2 -; CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 -; CONTRACT-OFF-NEXT: ret +; LA32-CONTRACT-FAST-LABEL: xvfmsub_d: +; LA32-CONTRACT-FAST: # %bb.0: # %entry +; LA32-CONTRACT-FAST-NEXT: xvld $xr0, $a1, 0 +; LA32-CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 +; LA32-CONTRACT-FAST-NEXT: xvld $xr2, $a3, 0 +; LA32-CONTRACT-FAST-NEXT: xvfmsub.d $xr0, $xr0, $xr1, $xr2 +; LA32-CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 +; LA32-CONTRACT-FAST-NEXT: ret +; +; LA32-CONTRACT-ON-LABEL: xvfmsub_d: +; LA32-CONTRACT-ON: # %bb.0: # %entry +; LA32-CONTRACT-ON-NEXT: xvld $xr0, $a1, 0 +; LA32-CONTRACT-ON-NEXT: xvld $xr1, $a2, 0 +; LA32-CONTRACT-ON-NEXT: xvld $xr2, $a3, 0 +; LA32-CONTRACT-ON-NEXT: xvfmul.d $xr0, $xr0, $xr1 +; LA32-CONTRACT-ON-NEXT: xvfsub.d $xr0, $xr0, $xr2 +; LA32-CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 +; LA32-CONTRACT-ON-NEXT: ret +; +; LA32-CONTRACT-OFF-LABEL: xvfmsub_d: +; LA32-CONTRACT-OFF: # %bb.0: # %entry +; LA32-CONTRACT-OFF-NEXT: xvld $xr0, $a1, 0 +; LA32-CONTRACT-OFF-NEXT: xvld $xr1, $a2, 0 +; LA32-CONTRACT-OFF-NEXT: xvld $xr2, $a3, 0 +; LA32-CONTRACT-OFF-NEXT: xvfmul.d $xr0, $xr0, $xr1 +; LA32-CONTRACT-OFF-NEXT: xvfsub.d $xr0, $xr0, $xr2 +; LA32-CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 +; LA32-CONTRACT-OFF-NEXT: ret +; +; LA64-CONTRACT-FAST-LABEL: xvfmsub_d: +; LA64-CONTRACT-FAST: # %bb.0: # %entry +; LA64-CONTRACT-FAST-NEXT: xvld $xr0, $a1, 0 +; LA64-CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 +; LA64-CONTRACT-FAST-NEXT: xvld $xr2, $a3, 0 +; LA64-CONTRACT-FAST-NEXT: xvfmsub.d $xr0, $xr0, $xr1, $xr2 +; LA64-CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 +; LA64-CONTRACT-FAST-NEXT: ret +; +; LA64-CONTRACT-ON-LABEL: xvfmsub_d: +; LA64-CONTRACT-ON: # %bb.0: # %entry +; LA64-CONTRACT-ON-NEXT: xvld $xr0, $a1, 0 +; LA64-CONTRACT-ON-NEXT: xvld $xr1, $a2, 0 +; LA64-CONTRACT-ON-NEXT: xvld $xr2, $a3, 0 +; LA64-CONTRACT-ON-NEXT: xvfmul.d $xr0, $xr0, $xr1 +; LA64-CONTRACT-ON-NEXT: xvfsub.d $xr0, $xr0, $xr2 +; LA64-CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 +; LA64-CONTRACT-ON-NEXT: ret +; +; LA64-CONTRACT-OFF-LABEL: xvfmsub_d: +; LA64-CONTRACT-OFF: # %bb.0: # %entry +; LA64-CONTRACT-OFF-NEXT: xvld $xr0, $a1, 0 +; LA64-CONTRACT-OFF-NEXT: xvld $xr1, $a2, 0 +; LA64-CONTRACT-OFF-NEXT: xvld $xr2, $a3, 0 +; LA64-CONTRACT-OFF-NEXT: xvfmul.d $xr0, $xr0, $xr1 +; LA64-CONTRACT-OFF-NEXT: xvfsub.d $xr0, $xr0, $xr2 +; LA64-CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 +; LA64-CONTRACT-OFF-NEXT: ret entry: %v0 = load <4 x double>, ptr %a0 %v1 = load <4 x double>, ptr %a1 @@ -85,36 +149,67 @@ entry: } define void @xvfnmadd_d(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { -; CONTRACT-FAST-LABEL: xvfnmadd_d: -; CONTRACT-FAST: # %bb.0: # %entry -; CONTRACT-FAST-NEXT: xvld $xr0, $a1, 0 -; CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 -; CONTRACT-FAST-NEXT: xvld $xr2, $a3, 0 -; CONTRACT-FAST-NEXT: xvfnmadd.d $xr0, $xr0, $xr1, $xr2 -; CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 -; CONTRACT-FAST-NEXT: ret -; -; CONTRACT-ON-LABEL: xvfnmadd_d: -; CONTRACT-ON: # %bb.0: # %entry -; CONTRACT-ON-NEXT: xvld $xr0, $a1, 0 -; CONTRACT-ON-NEXT: xvld $xr1, $a2, 0 -; CONTRACT-ON-NEXT: xvld $xr2, $a3, 0 -; CONTRACT-ON-NEXT: xvfmul.d $xr0, $xr0, $xr1 -; CONTRACT-ON-NEXT: xvfadd.d $xr0, $xr0, $xr2 -; CONTRACT-ON-NEXT: xvbitrevi.d $xr0, $xr0, 63 -; CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 -; CONTRACT-ON-NEXT: ret -; -; CONTRACT-OFF-LABEL: xvfnmadd_d: -; CONTRACT-OFF: # %bb.0: # %entry -; CONTRACT-OFF-NEXT: xvld $xr0, $a1, 0 -; CONTRACT-OFF-NEXT: xvld $xr1, $a2, 0 -; CONTRACT-OFF-NEXT: xvld $xr2, $a3, 0 -; CONTRACT-OFF-NEXT: xvfmul.d $xr0, $xr0, $xr1 -; CONTRACT-OFF-NEXT: xvfadd.d $xr0, $xr0, $xr2 -; CONTRACT-OFF-NEXT: xvbitrevi.d $xr0, $xr0, 63 -; CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 -; CONTRACT-OFF-NEXT: ret +; LA32-CONTRACT-FAST-LABEL: xvfnmadd_d: +; LA32-CONTRACT-FAST: # %bb.0: # %entry +; LA32-CONTRACT-FAST-NEXT: xvld $xr0, $a1, 0 +; LA32-CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 +; LA32-CONTRACT-FAST-NEXT: xvld $xr2, $a3, 0 +; LA32-CONTRACT-FAST-NEXT: xvfnmadd.d $xr0, $xr0, $xr1, $xr2 +; LA32-CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 +; LA32-CONTRACT-FAST-NEXT: ret +; +; LA32-CONTRACT-ON-LABEL: xvfnmadd_d: +; LA32-CONTRACT-ON: # %bb.0: # %entry +; LA32-CONTRACT-ON-NEXT: xvld $xr0, $a1, 0 +; LA32-CONTRACT-ON-NEXT: xvld $xr1, $a2, 0 +; LA32-CONTRACT-ON-NEXT: xvld $xr2, $a3, 0 +; LA32-CONTRACT-ON-NEXT: xvfmul.d $xr0, $xr0, $xr1 +; LA32-CONTRACT-ON-NEXT: xvfadd.d $xr0, $xr0, $xr2 +; LA32-CONTRACT-ON-NEXT: xvbitrevi.d $xr0, $xr0, 63 +; LA32-CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 +; LA32-CONTRACT-ON-NEXT: ret +; +; LA32-CONTRACT-OFF-LABEL: xvfnmadd_d: +; LA32-CONTRACT-OFF: # %bb.0: # %entry +; LA32-CONTRACT-OFF-NEXT: xvld $xr0, $a1, 0 +; LA32-CONTRACT-OFF-NEXT: xvld $xr1, $a2, 0 +; LA32-CONTRACT-OFF-NEXT: xvld $xr2, $a3, 0 +; LA32-CONTRACT-OFF-NEXT: xvfmul.d $xr0, $xr0, $xr1 +; LA32-CONTRACT-OFF-NEXT: xvfadd.d $xr0, $xr0, $xr2 +; LA32-CONTRACT-OFF-NEXT: xvbitrevi.d $xr0, $xr0, 63 +; LA32-CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 +; LA32-CONTRACT-OFF-NEXT: ret +; +; LA64-CONTRACT-FAST-LABEL: xvfnmadd_d: +; LA64-CONTRACT-FAST: # %bb.0: # %entry +; LA64-CONTRACT-FAST-NEXT: xvld $xr0, $a1, 0 +; LA64-CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 +; LA64-CONTRACT-FAST-NEXT: xvld $xr2, $a3, 0 +; LA64-CONTRACT-FAST-NEXT: xvfnmadd.d $xr0, $xr0, $xr1, $xr2 +; LA64-CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 +; LA64-CONTRACT-FAST-NEXT: ret +; +; LA64-CONTRACT-ON-LABEL: xvfnmadd_d: +; LA64-CONTRACT-ON: # %bb.0: # %entry +; LA64-CONTRACT-ON-NEXT: xvld $xr0, $a1, 0 +; LA64-CONTRACT-ON-NEXT: xvld $xr1, $a2, 0 +; LA64-CONTRACT-ON-NEXT: xvld $xr2, $a3, 0 +; LA64-CONTRACT-ON-NEXT: xvfmul.d $xr0, $xr0, $xr1 +; LA64-CONTRACT-ON-NEXT: xvfadd.d $xr0, $xr0, $xr2 +; LA64-CONTRACT-ON-NEXT: xvbitrevi.d $xr0, $xr0, 63 +; LA64-CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 +; LA64-CONTRACT-ON-NEXT: ret +; +; LA64-CONTRACT-OFF-LABEL: xvfnmadd_d: +; LA64-CONTRACT-OFF: # %bb.0: # %entry +; LA64-CONTRACT-OFF-NEXT: xvld $xr0, $a1, 0 +; LA64-CONTRACT-OFF-NEXT: xvld $xr1, $a2, 0 +; LA64-CONTRACT-OFF-NEXT: xvld $xr2, $a3, 0 +; LA64-CONTRACT-OFF-NEXT: xvfmul.d $xr0, $xr0, $xr1 +; LA64-CONTRACT-OFF-NEXT: xvfadd.d $xr0, $xr0, $xr2 +; LA64-CONTRACT-OFF-NEXT: xvbitrevi.d $xr0, $xr0, 63 +; LA64-CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 +; LA64-CONTRACT-OFF-NEXT: ret entry: %v0 = load <4 x double>, ptr %a0 %v1 = load <4 x double>, ptr %a1 @@ -127,36 +222,67 @@ entry: } define void @xvfnmadd_d_nsz(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { -; CONTRACT-FAST-LABEL: xvfnmadd_d_nsz: -; CONTRACT-FAST: # %bb.0: # %entry -; CONTRACT-FAST-NEXT: xvld $xr0, $a1, 0 -; CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 -; CONTRACT-FAST-NEXT: xvld $xr2, $a3, 0 -; CONTRACT-FAST-NEXT: xvfnmadd.d $xr0, $xr0, $xr1, $xr2 -; CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 -; CONTRACT-FAST-NEXT: ret -; -; CONTRACT-ON-LABEL: xvfnmadd_d_nsz: -; CONTRACT-ON: # %bb.0: # %entry -; CONTRACT-ON-NEXT: xvld $xr0, $a1, 0 -; CONTRACT-ON-NEXT: xvld $xr1, $a2, 0 -; CONTRACT-ON-NEXT: xvld $xr2, $a3, 0 -; CONTRACT-ON-NEXT: xvbitrevi.d $xr0, $xr0, 63 -; CONTRACT-ON-NEXT: xvfmul.d $xr0, $xr0, $xr1 -; CONTRACT-ON-NEXT: xvfsub.d $xr0, $xr0, $xr2 -; CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 -; CONTRACT-ON-NEXT: ret -; -; CONTRACT-OFF-LABEL: xvfnmadd_d_nsz: -; CONTRACT-OFF: # %bb.0: # %entry -; CONTRACT-OFF-NEXT: xvld $xr0, $a1, 0 -; CONTRACT-OFF-NEXT: xvld $xr1, $a2, 0 -; CONTRACT-OFF-NEXT: xvld $xr2, $a3, 0 -; CONTRACT-OFF-NEXT: xvbitrevi.d $xr0, $xr0, 63 -; CONTRACT-OFF-NEXT: xvfmul.d $xr0, $xr0, $xr1 -; CONTRACT-OFF-NEXT: xvfsub.d $xr0, $xr0, $xr2 -; CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 -; CONTRACT-OFF-NEXT: ret +; LA32-CONTRACT-FAST-LABEL: xvfnmadd_d_nsz: +; LA32-CONTRACT-FAST: # %bb.0: # %entry +; LA32-CONTRACT-FAST-NEXT: xvld $xr0, $a1, 0 +; LA32-CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 +; LA32-CONTRACT-FAST-NEXT: xvld $xr2, $a3, 0 +; LA32-CONTRACT-FAST-NEXT: xvfnmadd.d $xr0, $xr0, $xr1, $xr2 +; LA32-CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 +; LA32-CONTRACT-FAST-NEXT: ret +; +; LA32-CONTRACT-ON-LABEL: xvfnmadd_d_nsz: +; LA32-CONTRACT-ON: # %bb.0: # %entry +; LA32-CONTRACT-ON-NEXT: xvld $xr0, $a1, 0 +; LA32-CONTRACT-ON-NEXT: xvld $xr1, $a2, 0 +; LA32-CONTRACT-ON-NEXT: xvld $xr2, $a3, 0 +; LA32-CONTRACT-ON-NEXT: xvbitrevi.d $xr0, $xr0, 63 +; LA32-CONTRACT-ON-NEXT: xvfmul.d $xr0, $xr0, $xr1 +; LA32-CONTRACT-ON-NEXT: xvfsub.d $xr0, $xr0, $xr2 +; LA32-CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 +; LA32-CONTRACT-ON-NEXT: ret +; +; LA32-CONTRACT-OFF-LABEL: xvfnmadd_d_nsz: +; LA32-CONTRACT-OFF: # %bb.0: # %entry +; LA32-CONTRACT-OFF-NEXT: xvld $xr0, $a1, 0 +; LA32-CONTRACT-OFF-NEXT: xvld $xr1, $a2, 0 +; LA32-CONTRACT-OFF-NEXT: xvld $xr2, $a3, 0 +; LA32-CONTRACT-OFF-NEXT: xvbitrevi.d $xr0, $xr0, 63 +; LA32-CONTRACT-OFF-NEXT: xvfmul.d $xr0, $xr0, $xr1 +; LA32-CONTRACT-OFF-NEXT: xvfsub.d $xr0, $xr0, $xr2 +; LA32-CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 +; LA32-CONTRACT-OFF-NEXT: ret +; +; LA64-CONTRACT-FAST-LABEL: xvfnmadd_d_nsz: +; LA64-CONTRACT-FAST: # %bb.0: # %entry +; LA64-CONTRACT-FAST-NEXT: xvld $xr0, $a1, 0 +; LA64-CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 +; LA64-CONTRACT-FAST-NEXT: xvld $xr2, $a3, 0 +; LA64-CONTRACT-FAST-NEXT: xvfnmadd.d $xr0, $xr0, $xr1, $xr2 +; LA64-CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 +; LA64-CONTRACT-FAST-NEXT: ret +; +; LA64-CONTRACT-ON-LABEL: xvfnmadd_d_nsz: +; LA64-CONTRACT-ON: # %bb.0: # %entry +; LA64-CONTRACT-ON-NEXT: xvld $xr0, $a1, 0 +; LA64-CONTRACT-ON-NEXT: xvld $xr1, $a2, 0 +; LA64-CONTRACT-ON-NEXT: xvld $xr2, $a3, 0 +; LA64-CONTRACT-ON-NEXT: xvbitrevi.d $xr0, $xr0, 63 +; LA64-CONTRACT-ON-NEXT: xvfmul.d $xr0, $xr0, $xr1 +; LA64-CONTRACT-ON-NEXT: xvfsub.d $xr0, $xr0, $xr2 +; LA64-CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 +; LA64-CONTRACT-ON-NEXT: ret +; +; LA64-CONTRACT-OFF-LABEL: xvfnmadd_d_nsz: +; LA64-CONTRACT-OFF: # %bb.0: # %entry +; LA64-CONTRACT-OFF-NEXT: xvld $xr0, $a1, 0 +; LA64-CONTRACT-OFF-NEXT: xvld $xr1, $a2, 0 +; LA64-CONTRACT-OFF-NEXT: xvld $xr2, $a3, 0 +; LA64-CONTRACT-OFF-NEXT: xvbitrevi.d $xr0, $xr0, 63 +; LA64-CONTRACT-OFF-NEXT: xvfmul.d $xr0, $xr0, $xr1 +; LA64-CONTRACT-OFF-NEXT: xvfsub.d $xr0, $xr0, $xr2 +; LA64-CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 +; LA64-CONTRACT-OFF-NEXT: ret entry: %v0 = load <4 x double>, ptr %a0 %v1 = load <4 x double>, ptr %a1 @@ -171,37 +297,69 @@ entry: ;; Check that xvfnmadd.d is not emitted. define void @not_xvfnmadd_d(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { -; CONTRACT-FAST-LABEL: not_xvfnmadd_d: -; CONTRACT-FAST: # %bb.0: # %entry -; CONTRACT-FAST-NEXT: xvld $xr0, $a1, 0 -; CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 -; CONTRACT-FAST-NEXT: xvld $xr2, $a3, 0 -; CONTRACT-FAST-NEXT: xvbitrevi.d $xr0, $xr0, 63 -; CONTRACT-FAST-NEXT: xvfmsub.d $xr0, $xr0, $xr1, $xr2 -; CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 -; CONTRACT-FAST-NEXT: ret -; -; CONTRACT-ON-LABEL: not_xvfnmadd_d: -; CONTRACT-ON: # %bb.0: # %entry -; CONTRACT-ON-NEXT: xvld $xr0, $a1, 0 -; CONTRACT-ON-NEXT: xvld $xr1, $a2, 0 -; CONTRACT-ON-NEXT: xvld $xr2, $a3, 0 -; CONTRACT-ON-NEXT: xvbitrevi.d $xr0, $xr0, 63 -; CONTRACT-ON-NEXT: xvfmul.d $xr0, $xr0, $xr1 -; CONTRACT-ON-NEXT: xvfsub.d $xr0, $xr0, $xr2 -; CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 -; CONTRACT-ON-NEXT: ret -; -; CONTRACT-OFF-LABEL: not_xvfnmadd_d: -; CONTRACT-OFF: # %bb.0: # %entry -; CONTRACT-OFF-NEXT: xvld $xr0, $a1, 0 -; CONTRACT-OFF-NEXT: xvld $xr1, $a2, 0 -; CONTRACT-OFF-NEXT: xvld $xr2, $a3, 0 -; CONTRACT-OFF-NEXT: xvbitrevi.d $xr0, $xr0, 63 -; CONTRACT-OFF-NEXT: xvfmul.d $xr0, $xr0, $xr1 -; CONTRACT-OFF-NEXT: xvfsub.d $xr0, $xr0, $xr2 -; CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 -; CONTRACT-OFF-NEXT: ret +; LA32-CONTRACT-FAST-LABEL: not_xvfnmadd_d: +; LA32-CONTRACT-FAST: # %bb.0: # %entry +; LA32-CONTRACT-FAST-NEXT: xvld $xr0, $a1, 0 +; LA32-CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 +; LA32-CONTRACT-FAST-NEXT: xvld $xr2, $a3, 0 +; LA32-CONTRACT-FAST-NEXT: xvbitrevi.d $xr0, $xr0, 63 +; LA32-CONTRACT-FAST-NEXT: xvfmsub.d $xr0, $xr0, $xr1, $xr2 +; LA32-CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 +; LA32-CONTRACT-FAST-NEXT: ret +; +; LA32-CONTRACT-ON-LABEL: not_xvfnmadd_d: +; LA32-CONTRACT-ON: # %bb.0: # %entry +; LA32-CONTRACT-ON-NEXT: xvld $xr0, $a1, 0 +; LA32-CONTRACT-ON-NEXT: xvld $xr1, $a2, 0 +; LA32-CONTRACT-ON-NEXT: xvld $xr2, $a3, 0 +; LA32-CONTRACT-ON-NEXT: xvbitrevi.d $xr0, $xr0, 63 +; LA32-CONTRACT-ON-NEXT: xvfmul.d $xr0, $xr0, $xr1 +; LA32-CONTRACT-ON-NEXT: xvfsub.d $xr0, $xr0, $xr2 +; LA32-CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 +; LA32-CONTRACT-ON-NEXT: ret +; +; LA32-CONTRACT-OFF-LABEL: not_xvfnmadd_d: +; LA32-CONTRACT-OFF: # %bb.0: # %entry +; LA32-CONTRACT-OFF-NEXT: xvld $xr0, $a1, 0 +; LA32-CONTRACT-OFF-NEXT: xvld $xr1, $a2, 0 +; LA32-CONTRACT-OFF-NEXT: xvld $xr2, $a3, 0 +; LA32-CONTRACT-OFF-NEXT: xvbitrevi.d $xr0, $xr0, 63 +; LA32-CONTRACT-OFF-NEXT: xvfmul.d $xr0, $xr0, $xr1 +; LA32-CONTRACT-OFF-NEXT: xvfsub.d $xr0, $xr0, $xr2 +; LA32-CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 +; LA32-CONTRACT-OFF-NEXT: ret +; +; LA64-CONTRACT-FAST-LABEL: not_xvfnmadd_d: +; LA64-CONTRACT-FAST: # %bb.0: # %entry +; LA64-CONTRACT-FAST-NEXT: xvld $xr0, $a1, 0 +; LA64-CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 +; LA64-CONTRACT-FAST-NEXT: xvld $xr2, $a3, 0 +; LA64-CONTRACT-FAST-NEXT: xvbitrevi.d $xr0, $xr0, 63 +; LA64-CONTRACT-FAST-NEXT: xvfmsub.d $xr0, $xr0, $xr1, $xr2 +; LA64-CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 +; LA64-CONTRACT-FAST-NEXT: ret +; +; LA64-CONTRACT-ON-LABEL: not_xvfnmadd_d: +; LA64-CONTRACT-ON: # %bb.0: # %entry +; LA64-CONTRACT-ON-NEXT: xvld $xr0, $a1, 0 +; LA64-CONTRACT-ON-NEXT: xvld $xr1, $a2, 0 +; LA64-CONTRACT-ON-NEXT: xvld $xr2, $a3, 0 +; LA64-CONTRACT-ON-NEXT: xvbitrevi.d $xr0, $xr0, 63 +; LA64-CONTRACT-ON-NEXT: xvfmul.d $xr0, $xr0, $xr1 +; LA64-CONTRACT-ON-NEXT: xvfsub.d $xr0, $xr0, $xr2 +; LA64-CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 +; LA64-CONTRACT-ON-NEXT: ret +; +; LA64-CONTRACT-OFF-LABEL: not_xvfnmadd_d: +; LA64-CONTRACT-OFF: # %bb.0: # %entry +; LA64-CONTRACT-OFF-NEXT: xvld $xr0, $a1, 0 +; LA64-CONTRACT-OFF-NEXT: xvld $xr1, $a2, 0 +; LA64-CONTRACT-OFF-NEXT: xvld $xr2, $a3, 0 +; LA64-CONTRACT-OFF-NEXT: xvbitrevi.d $xr0, $xr0, 63 +; LA64-CONTRACT-OFF-NEXT: xvfmul.d $xr0, $xr0, $xr1 +; LA64-CONTRACT-OFF-NEXT: xvfsub.d $xr0, $xr0, $xr2 +; LA64-CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 +; LA64-CONTRACT-OFF-NEXT: ret entry: %v0 = load <4 x double>, ptr %a0 %v1 = load <4 x double>, ptr %a1 @@ -215,36 +373,67 @@ entry: } define void @xvfnmsub_d(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { -; CONTRACT-FAST-LABEL: xvfnmsub_d: -; CONTRACT-FAST: # %bb.0: # %entry -; CONTRACT-FAST-NEXT: xvld $xr0, $a1, 0 -; CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 -; CONTRACT-FAST-NEXT: xvld $xr2, $a3, 0 -; CONTRACT-FAST-NEXT: xvfnmsub.d $xr0, $xr0, $xr1, $xr2 -; CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 -; CONTRACT-FAST-NEXT: ret -; -; CONTRACT-ON-LABEL: xvfnmsub_d: -; CONTRACT-ON: # %bb.0: # %entry -; CONTRACT-ON-NEXT: xvld $xr0, $a1, 0 -; CONTRACT-ON-NEXT: xvld $xr1, $a2, 0 -; CONTRACT-ON-NEXT: xvld $xr2, $a3, 0 -; CONTRACT-ON-NEXT: xvfmul.d $xr0, $xr0, $xr1 -; CONTRACT-ON-NEXT: xvfsub.d $xr0, $xr0, $xr2 -; CONTRACT-ON-NEXT: xvbitrevi.d $xr0, $xr0, 63 -; CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 -; CONTRACT-ON-NEXT: ret -; -; CONTRACT-OFF-LABEL: xvfnmsub_d: -; CONTRACT-OFF: # %bb.0: # %entry -; CONTRACT-OFF-NEXT: xvld $xr0, $a1, 0 -; CONTRACT-OFF-NEXT: xvld $xr1, $a2, 0 -; CONTRACT-OFF-NEXT: xvld $xr2, $a3, 0 -; CONTRACT-OFF-NEXT: xvfmul.d $xr0, $xr0, $xr1 -; CONTRACT-OFF-NEXT: xvfsub.d $xr0, $xr0, $xr2 -; CONTRACT-OFF-NEXT: xvbitrevi.d $xr0, $xr0, 63 -; CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 -; CONTRACT-OFF-NEXT: ret +; LA32-CONTRACT-FAST-LABEL: xvfnmsub_d: +; LA32-CONTRACT-FAST: # %bb.0: # %entry +; LA32-CONTRACT-FAST-NEXT: xvld $xr0, $a1, 0 +; LA32-CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 +; LA32-CONTRACT-FAST-NEXT: xvld $xr2, $a3, 0 +; LA32-CONTRACT-FAST-NEXT: xvfnmsub.d $xr0, $xr0, $xr1, $xr2 +; LA32-CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 +; LA32-CONTRACT-FAST-NEXT: ret +; +; LA32-CONTRACT-ON-LABEL: xvfnmsub_d: +; LA32-CONTRACT-ON: # %bb.0: # %entry +; LA32-CONTRACT-ON-NEXT: xvld $xr0, $a1, 0 +; LA32-CONTRACT-ON-NEXT: xvld $xr1, $a2, 0 +; LA32-CONTRACT-ON-NEXT: xvld $xr2, $a3, 0 +; LA32-CONTRACT-ON-NEXT: xvfmul.d $xr0, $xr0, $xr1 +; LA32-CONTRACT-ON-NEXT: xvfsub.d $xr0, $xr0, $xr2 +; LA32-CONTRACT-ON-NEXT: xvbitrevi.d $xr0, $xr0, 63 +; LA32-CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 +; LA32-CONTRACT-ON-NEXT: ret +; +; LA32-CONTRACT-OFF-LABEL: xvfnmsub_d: +; LA32-CONTRACT-OFF: # %bb.0: # %entry +; LA32-CONTRACT-OFF-NEXT: xvld $xr0, $a1, 0 +; LA32-CONTRACT-OFF-NEXT: xvld $xr1, $a2, 0 +; LA32-CONTRACT-OFF-NEXT: xvld $xr2, $a3, 0 +; LA32-CONTRACT-OFF-NEXT: xvfmul.d $xr0, $xr0, $xr1 +; LA32-CONTRACT-OFF-NEXT: xvfsub.d $xr0, $xr0, $xr2 +; LA32-CONTRACT-OFF-NEXT: xvbitrevi.d $xr0, $xr0, 63 +; LA32-CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 +; LA32-CONTRACT-OFF-NEXT: ret +; +; LA64-CONTRACT-FAST-LABEL: xvfnmsub_d: +; LA64-CONTRACT-FAST: # %bb.0: # %entry +; LA64-CONTRACT-FAST-NEXT: xvld $xr0, $a1, 0 +; LA64-CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 +; LA64-CONTRACT-FAST-NEXT: xvld $xr2, $a3, 0 +; LA64-CONTRACT-FAST-NEXT: xvfnmsub.d $xr0, $xr0, $xr1, $xr2 +; LA64-CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 +; LA64-CONTRACT-FAST-NEXT: ret +; +; LA64-CONTRACT-ON-LABEL: xvfnmsub_d: +; LA64-CONTRACT-ON: # %bb.0: # %entry +; LA64-CONTRACT-ON-NEXT: xvld $xr0, $a1, 0 +; LA64-CONTRACT-ON-NEXT: xvld $xr1, $a2, 0 +; LA64-CONTRACT-ON-NEXT: xvld $xr2, $a3, 0 +; LA64-CONTRACT-ON-NEXT: xvfmul.d $xr0, $xr0, $xr1 +; LA64-CONTRACT-ON-NEXT: xvfsub.d $xr0, $xr0, $xr2 +; LA64-CONTRACT-ON-NEXT: xvbitrevi.d $xr0, $xr0, 63 +; LA64-CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 +; LA64-CONTRACT-ON-NEXT: ret +; +; LA64-CONTRACT-OFF-LABEL: xvfnmsub_d: +; LA64-CONTRACT-OFF: # %bb.0: # %entry +; LA64-CONTRACT-OFF-NEXT: xvld $xr0, $a1, 0 +; LA64-CONTRACT-OFF-NEXT: xvld $xr1, $a2, 0 +; LA64-CONTRACT-OFF-NEXT: xvld $xr2, $a3, 0 +; LA64-CONTRACT-OFF-NEXT: xvfmul.d $xr0, $xr0, $xr1 +; LA64-CONTRACT-OFF-NEXT: xvfsub.d $xr0, $xr0, $xr2 +; LA64-CONTRACT-OFF-NEXT: xvbitrevi.d $xr0, $xr0, 63 +; LA64-CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 +; LA64-CONTRACT-OFF-NEXT: ret entry: %v0 = load <4 x double>, ptr %a0 %v1 = load <4 x double>, ptr %a1 @@ -258,34 +447,63 @@ entry: } define void @xvfnmsub_d_nsz(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { -; CONTRACT-FAST-LABEL: xvfnmsub_d_nsz: -; CONTRACT-FAST: # %bb.0: # %entry -; CONTRACT-FAST-NEXT: xvld $xr0, $a1, 0 -; CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 -; CONTRACT-FAST-NEXT: xvld $xr2, $a3, 0 -; CONTRACT-FAST-NEXT: xvfnmsub.d $xr0, $xr0, $xr1, $xr2 -; CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 -; CONTRACT-FAST-NEXT: ret -; -; CONTRACT-ON-LABEL: xvfnmsub_d_nsz: -; CONTRACT-ON: # %bb.0: # %entry -; CONTRACT-ON-NEXT: xvld $xr0, $a1, 0 -; CONTRACT-ON-NEXT: xvld $xr1, $a2, 0 -; CONTRACT-ON-NEXT: xvld $xr2, $a3, 0 -; CONTRACT-ON-NEXT: xvfmul.d $xr0, $xr0, $xr1 -; CONTRACT-ON-NEXT: xvfsub.d $xr0, $xr2, $xr0 -; CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 -; CONTRACT-ON-NEXT: ret -; -; CONTRACT-OFF-LABEL: xvfnmsub_d_nsz: -; CONTRACT-OFF: # %bb.0: # %entry -; CONTRACT-OFF-NEXT: xvld $xr0, $a1, 0 -; CONTRACT-OFF-NEXT: xvld $xr1, $a2, 0 -; CONTRACT-OFF-NEXT: xvld $xr2, $a3, 0 -; CONTRACT-OFF-NEXT: xvfmul.d $xr0, $xr0, $xr1 -; CONTRACT-OFF-NEXT: xvfsub.d $xr0, $xr2, $xr0 -; CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 -; CONTRACT-OFF-NEXT: ret +; LA32-CONTRACT-FAST-LABEL: xvfnmsub_d_nsz: +; LA32-CONTRACT-FAST: # %bb.0: # %entry +; LA32-CONTRACT-FAST-NEXT: xvld $xr0, $a1, 0 +; LA32-CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 +; LA32-CONTRACT-FAST-NEXT: xvld $xr2, $a3, 0 +; LA32-CONTRACT-FAST-NEXT: xvfnmsub.d $xr0, $xr0, $xr1, $xr2 +; LA32-CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 +; LA32-CONTRACT-FAST-NEXT: ret +; +; LA32-CONTRACT-ON-LABEL: xvfnmsub_d_nsz: +; LA32-CONTRACT-ON: # %bb.0: # %entry +; LA32-CONTRACT-ON-NEXT: xvld $xr0, $a1, 0 +; LA32-CONTRACT-ON-NEXT: xvld $xr1, $a2, 0 +; LA32-CONTRACT-ON-NEXT: xvld $xr2, $a3, 0 +; LA32-CONTRACT-ON-NEXT: xvfmul.d $xr0, $xr0, $xr1 +; LA32-CONTRACT-ON-NEXT: xvfsub.d $xr0, $xr2, $xr0 +; LA32-CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 +; LA32-CONTRACT-ON-NEXT: ret +; +; LA32-CONTRACT-OFF-LABEL: xvfnmsub_d_nsz: +; LA32-CONTRACT-OFF: # %bb.0: # %entry +; LA32-CONTRACT-OFF-NEXT: xvld $xr0, $a1, 0 +; LA32-CONTRACT-OFF-NEXT: xvld $xr1, $a2, 0 +; LA32-CONTRACT-OFF-NEXT: xvld $xr2, $a3, 0 +; LA32-CONTRACT-OFF-NEXT: xvfmul.d $xr0, $xr0, $xr1 +; LA32-CONTRACT-OFF-NEXT: xvfsub.d $xr0, $xr2, $xr0 +; LA32-CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 +; LA32-CONTRACT-OFF-NEXT: ret +; +; LA64-CONTRACT-FAST-LABEL: xvfnmsub_d_nsz: +; LA64-CONTRACT-FAST: # %bb.0: # %entry +; LA64-CONTRACT-FAST-NEXT: xvld $xr0, $a1, 0 +; LA64-CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 +; LA64-CONTRACT-FAST-NEXT: xvld $xr2, $a3, 0 +; LA64-CONTRACT-FAST-NEXT: xvfnmsub.d $xr0, $xr0, $xr1, $xr2 +; LA64-CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 +; LA64-CONTRACT-FAST-NEXT: ret +; +; LA64-CONTRACT-ON-LABEL: xvfnmsub_d_nsz: +; LA64-CONTRACT-ON: # %bb.0: # %entry +; LA64-CONTRACT-ON-NEXT: xvld $xr0, $a1, 0 +; LA64-CONTRACT-ON-NEXT: xvld $xr1, $a2, 0 +; LA64-CONTRACT-ON-NEXT: xvld $xr2, $a3, 0 +; LA64-CONTRACT-ON-NEXT: xvfmul.d $xr0, $xr0, $xr1 +; LA64-CONTRACT-ON-NEXT: xvfsub.d $xr0, $xr2, $xr0 +; LA64-CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 +; LA64-CONTRACT-ON-NEXT: ret +; +; LA64-CONTRACT-OFF-LABEL: xvfnmsub_d_nsz: +; LA64-CONTRACT-OFF: # %bb.0: # %entry +; LA64-CONTRACT-OFF-NEXT: xvld $xr0, $a1, 0 +; LA64-CONTRACT-OFF-NEXT: xvld $xr1, $a2, 0 +; LA64-CONTRACT-OFF-NEXT: xvld $xr2, $a3, 0 +; LA64-CONTRACT-OFF-NEXT: xvfmul.d $xr0, $xr0, $xr1 +; LA64-CONTRACT-OFF-NEXT: xvfsub.d $xr0, $xr2, $xr0 +; LA64-CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 +; LA64-CONTRACT-OFF-NEXT: ret entry: %v0 = load <4 x double>, ptr %a0 %v1 = load <4 x double>, ptr %a1 @@ -299,35 +517,65 @@ entry: ;; Check that xvfnmsub.d is not emitted. define void @not_xvfnmsub_d(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { -; CONTRACT-FAST-LABEL: not_xvfnmsub_d: -; CONTRACT-FAST: # %bb.0: # %entry -; CONTRACT-FAST-NEXT: xvld $xr0, $a1, 0 -; CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 -; CONTRACT-FAST-NEXT: xvld $xr2, $a3, 0 -; CONTRACT-FAST-NEXT: xvbitrevi.d $xr0, $xr0, 63 -; CONTRACT-FAST-NEXT: xvfmadd.d $xr0, $xr0, $xr1, $xr2 -; CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 -; CONTRACT-FAST-NEXT: ret -; -; CONTRACT-ON-LABEL: not_xvfnmsub_d: -; CONTRACT-ON: # %bb.0: # %entry -; CONTRACT-ON-NEXT: xvld $xr0, $a1, 0 -; CONTRACT-ON-NEXT: xvld $xr1, $a2, 0 -; CONTRACT-ON-NEXT: xvld $xr2, $a3, 0 -; CONTRACT-ON-NEXT: xvfmul.d $xr0, $xr0, $xr1 -; CONTRACT-ON-NEXT: xvfsub.d $xr0, $xr2, $xr0 -; CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 -; CONTRACT-ON-NEXT: ret -; -; CONTRACT-OFF-LABEL: not_xvfnmsub_d: -; CONTRACT-OFF: # %bb.0: # %entry -; CONTRACT-OFF-NEXT: xvld $xr0, $a1, 0 -; CONTRACT-OFF-NEXT: xvld $xr1, $a2, 0 -; CONTRACT-OFF-NEXT: xvld $xr2, $a3, 0 -; CONTRACT-OFF-NEXT: xvfmul.d $xr0, $xr0, $xr1 -; CONTRACT-OFF-NEXT: xvfsub.d $xr0, $xr2, $xr0 -; CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 -; CONTRACT-OFF-NEXT: ret +; LA32-CONTRACT-FAST-LABEL: not_xvfnmsub_d: +; LA32-CONTRACT-FAST: # %bb.0: # %entry +; LA32-CONTRACT-FAST-NEXT: xvld $xr0, $a1, 0 +; LA32-CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 +; LA32-CONTRACT-FAST-NEXT: xvld $xr2, $a3, 0 +; LA32-CONTRACT-FAST-NEXT: xvbitrevi.d $xr0, $xr0, 63 +; LA32-CONTRACT-FAST-NEXT: xvfmadd.d $xr0, $xr0, $xr1, $xr2 +; LA32-CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 +; LA32-CONTRACT-FAST-NEXT: ret +; +; LA32-CONTRACT-ON-LABEL: not_xvfnmsub_d: +; LA32-CONTRACT-ON: # %bb.0: # %entry +; LA32-CONTRACT-ON-NEXT: xvld $xr0, $a1, 0 +; LA32-CONTRACT-ON-NEXT: xvld $xr1, $a2, 0 +; LA32-CONTRACT-ON-NEXT: xvld $xr2, $a3, 0 +; LA32-CONTRACT-ON-NEXT: xvfmul.d $xr0, $xr0, $xr1 +; LA32-CONTRACT-ON-NEXT: xvfsub.d $xr0, $xr2, $xr0 +; LA32-CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 +; LA32-CONTRACT-ON-NEXT: ret +; +; LA32-CONTRACT-OFF-LABEL: not_xvfnmsub_d: +; LA32-CONTRACT-OFF: # %bb.0: # %entry +; LA32-CONTRACT-OFF-NEXT: xvld $xr0, $a1, 0 +; LA32-CONTRACT-OFF-NEXT: xvld $xr1, $a2, 0 +; LA32-CONTRACT-OFF-NEXT: xvld $xr2, $a3, 0 +; LA32-CONTRACT-OFF-NEXT: xvfmul.d $xr0, $xr0, $xr1 +; LA32-CONTRACT-OFF-NEXT: xvfsub.d $xr0, $xr2, $xr0 +; LA32-CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 +; LA32-CONTRACT-OFF-NEXT: ret +; +; LA64-CONTRACT-FAST-LABEL: not_xvfnmsub_d: +; LA64-CONTRACT-FAST: # %bb.0: # %entry +; LA64-CONTRACT-FAST-NEXT: xvld $xr0, $a1, 0 +; LA64-CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 +; LA64-CONTRACT-FAST-NEXT: xvld $xr2, $a3, 0 +; LA64-CONTRACT-FAST-NEXT: xvbitrevi.d $xr0, $xr0, 63 +; LA64-CONTRACT-FAST-NEXT: xvfmadd.d $xr0, $xr0, $xr1, $xr2 +; LA64-CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 +; LA64-CONTRACT-FAST-NEXT: ret +; +; LA64-CONTRACT-ON-LABEL: not_xvfnmsub_d: +; LA64-CONTRACT-ON: # %bb.0: # %entry +; LA64-CONTRACT-ON-NEXT: xvld $xr0, $a1, 0 +; LA64-CONTRACT-ON-NEXT: xvld $xr1, $a2, 0 +; LA64-CONTRACT-ON-NEXT: xvld $xr2, $a3, 0 +; LA64-CONTRACT-ON-NEXT: xvfmul.d $xr0, $xr0, $xr1 +; LA64-CONTRACT-ON-NEXT: xvfsub.d $xr0, $xr2, $xr0 +; LA64-CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 +; LA64-CONTRACT-ON-NEXT: ret +; +; LA64-CONTRACT-OFF-LABEL: not_xvfnmsub_d: +; LA64-CONTRACT-OFF: # %bb.0: # %entry +; LA64-CONTRACT-OFF-NEXT: xvld $xr0, $a1, 0 +; LA64-CONTRACT-OFF-NEXT: xvld $xr1, $a2, 0 +; LA64-CONTRACT-OFF-NEXT: xvld $xr2, $a3, 0 +; LA64-CONTRACT-OFF-NEXT: xvfmul.d $xr0, $xr0, $xr1 +; LA64-CONTRACT-OFF-NEXT: xvfsub.d $xr0, $xr2, $xr0 +; LA64-CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 +; LA64-CONTRACT-OFF-NEXT: ret entry: %v0 = load <4 x double>, ptr %a0 %v1 = load <4 x double>, ptr %a1 @@ -340,32 +588,59 @@ entry: } define void @contract_xvfmadd_d(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { -; CONTRACT-FAST-LABEL: contract_xvfmadd_d: -; CONTRACT-FAST: # %bb.0: # %entry -; CONTRACT-FAST-NEXT: xvld $xr0, $a1, 0 -; CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 -; CONTRACT-FAST-NEXT: xvld $xr2, $a3, 0 -; CONTRACT-FAST-NEXT: xvfmadd.d $xr0, $xr0, $xr1, $xr2 -; CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 -; CONTRACT-FAST-NEXT: ret -; -; CONTRACT-ON-LABEL: contract_xvfmadd_d: -; CONTRACT-ON: # %bb.0: # %entry -; CONTRACT-ON-NEXT: xvld $xr0, $a1, 0 -; CONTRACT-ON-NEXT: xvld $xr1, $a2, 0 -; CONTRACT-ON-NEXT: xvld $xr2, $a3, 0 -; CONTRACT-ON-NEXT: xvfmadd.d $xr0, $xr0, $xr1, $xr2 -; CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 -; CONTRACT-ON-NEXT: ret -; -; CONTRACT-OFF-LABEL: contract_xvfmadd_d: -; CONTRACT-OFF: # %bb.0: # %entry -; CONTRACT-OFF-NEXT: xvld $xr0, $a1, 0 -; CONTRACT-OFF-NEXT: xvld $xr1, $a2, 0 -; CONTRACT-OFF-NEXT: xvld $xr2, $a3, 0 -; CONTRACT-OFF-NEXT: xvfmadd.d $xr0, $xr0, $xr1, $xr2 -; CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 -; CONTRACT-OFF-NEXT: ret +; LA32-CONTRACT-FAST-LABEL: contract_xvfmadd_d: +; LA32-CONTRACT-FAST: # %bb.0: # %entry +; LA32-CONTRACT-FAST-NEXT: xvld $xr0, $a1, 0 +; LA32-CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 +; LA32-CONTRACT-FAST-NEXT: xvld $xr2, $a3, 0 +; LA32-CONTRACT-FAST-NEXT: xvfmadd.d $xr0, $xr0, $xr1, $xr2 +; LA32-CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 +; LA32-CONTRACT-FAST-NEXT: ret +; +; LA32-CONTRACT-ON-LABEL: contract_xvfmadd_d: +; LA32-CONTRACT-ON: # %bb.0: # %entry +; LA32-CONTRACT-ON-NEXT: xvld $xr0, $a1, 0 +; LA32-CONTRACT-ON-NEXT: xvld $xr1, $a2, 0 +; LA32-CONTRACT-ON-NEXT: xvld $xr2, $a3, 0 +; LA32-CONTRACT-ON-NEXT: xvfmadd.d $xr0, $xr0, $xr1, $xr2 +; LA32-CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 +; LA32-CONTRACT-ON-NEXT: ret +; +; LA32-CONTRACT-OFF-LABEL: contract_xvfmadd_d: +; LA32-CONTRACT-OFF: # %bb.0: # %entry +; LA32-CONTRACT-OFF-NEXT: xvld $xr0, $a1, 0 +; LA32-CONTRACT-OFF-NEXT: xvld $xr1, $a2, 0 +; LA32-CONTRACT-OFF-NEXT: xvld $xr2, $a3, 0 +; LA32-CONTRACT-OFF-NEXT: xvfmadd.d $xr0, $xr0, $xr1, $xr2 +; LA32-CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 +; LA32-CONTRACT-OFF-NEXT: ret +; +; LA64-CONTRACT-FAST-LABEL: contract_xvfmadd_d: +; LA64-CONTRACT-FAST: # %bb.0: # %entry +; LA64-CONTRACT-FAST-NEXT: xvld $xr0, $a1, 0 +; LA64-CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 +; LA64-CONTRACT-FAST-NEXT: xvld $xr2, $a3, 0 +; LA64-CONTRACT-FAST-NEXT: xvfmadd.d $xr0, $xr0, $xr1, $xr2 +; LA64-CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 +; LA64-CONTRACT-FAST-NEXT: ret +; +; LA64-CONTRACT-ON-LABEL: contract_xvfmadd_d: +; LA64-CONTRACT-ON: # %bb.0: # %entry +; LA64-CONTRACT-ON-NEXT: xvld $xr0, $a1, 0 +; LA64-CONTRACT-ON-NEXT: xvld $xr1, $a2, 0 +; LA64-CONTRACT-ON-NEXT: xvld $xr2, $a3, 0 +; LA64-CONTRACT-ON-NEXT: xvfmadd.d $xr0, $xr0, $xr1, $xr2 +; LA64-CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 +; LA64-CONTRACT-ON-NEXT: ret +; +; LA64-CONTRACT-OFF-LABEL: contract_xvfmadd_d: +; LA64-CONTRACT-OFF: # %bb.0: # %entry +; LA64-CONTRACT-OFF-NEXT: xvld $xr0, $a1, 0 +; LA64-CONTRACT-OFF-NEXT: xvld $xr1, $a2, 0 +; LA64-CONTRACT-OFF-NEXT: xvld $xr2, $a3, 0 +; LA64-CONTRACT-OFF-NEXT: xvfmadd.d $xr0, $xr0, $xr1, $xr2 +; LA64-CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 +; LA64-CONTRACT-OFF-NEXT: ret entry: %v0 = load <4 x double>, ptr %a0 %v1 = load <4 x double>, ptr %a1 @@ -377,32 +652,59 @@ entry: } define void @contract_xvfmsub_d(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { -; CONTRACT-FAST-LABEL: contract_xvfmsub_d: -; CONTRACT-FAST: # %bb.0: # %entry -; CONTRACT-FAST-NEXT: xvld $xr0, $a1, 0 -; CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 -; CONTRACT-FAST-NEXT: xvld $xr2, $a3, 0 -; CONTRACT-FAST-NEXT: xvfmsub.d $xr0, $xr0, $xr1, $xr2 -; CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 -; CONTRACT-FAST-NEXT: ret -; -; CONTRACT-ON-LABEL: contract_xvfmsub_d: -; CONTRACT-ON: # %bb.0: # %entry -; CONTRACT-ON-NEXT: xvld $xr0, $a1, 0 -; CONTRACT-ON-NEXT: xvld $xr1, $a2, 0 -; CONTRACT-ON-NEXT: xvld $xr2, $a3, 0 -; CONTRACT-ON-NEXT: xvfmsub.d $xr0, $xr0, $xr1, $xr2 -; CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 -; CONTRACT-ON-NEXT: ret -; -; CONTRACT-OFF-LABEL: contract_xvfmsub_d: -; CONTRACT-OFF: # %bb.0: # %entry -; CONTRACT-OFF-NEXT: xvld $xr0, $a1, 0 -; CONTRACT-OFF-NEXT: xvld $xr1, $a2, 0 -; CONTRACT-OFF-NEXT: xvld $xr2, $a3, 0 -; CONTRACT-OFF-NEXT: xvfmsub.d $xr0, $xr0, $xr1, $xr2 -; CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 -; CONTRACT-OFF-NEXT: ret +; LA32-CONTRACT-FAST-LABEL: contract_xvfmsub_d: +; LA32-CONTRACT-FAST: # %bb.0: # %entry +; LA32-CONTRACT-FAST-NEXT: xvld $xr0, $a1, 0 +; LA32-CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 +; LA32-CONTRACT-FAST-NEXT: xvld $xr2, $a3, 0 +; LA32-CONTRACT-FAST-NEXT: xvfmsub.d $xr0, $xr0, $xr1, $xr2 +; LA32-CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 +; LA32-CONTRACT-FAST-NEXT: ret +; +; LA32-CONTRACT-ON-LABEL: contract_xvfmsub_d: +; LA32-CONTRACT-ON: # %bb.0: # %entry +; LA32-CONTRACT-ON-NEXT: xvld $xr0, $a1, 0 +; LA32-CONTRACT-ON-NEXT: xvld $xr1, $a2, 0 +; LA32-CONTRACT-ON-NEXT: xvld $xr2, $a3, 0 +; LA32-CONTRACT-ON-NEXT: xvfmsub.d $xr0, $xr0, $xr1, $xr2 +; LA32-CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 +; LA32-CONTRACT-ON-NEXT: ret +; +; LA32-CONTRACT-OFF-LABEL: contract_xvfmsub_d: +; LA32-CONTRACT-OFF: # %bb.0: # %entry +; LA32-CONTRACT-OFF-NEXT: xvld $xr0, $a1, 0 +; LA32-CONTRACT-OFF-NEXT: xvld $xr1, $a2, 0 +; LA32-CONTRACT-OFF-NEXT: xvld $xr2, $a3, 0 +; LA32-CONTRACT-OFF-NEXT: xvfmsub.d $xr0, $xr0, $xr1, $xr2 +; LA32-CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 +; LA32-CONTRACT-OFF-NEXT: ret +; +; LA64-CONTRACT-FAST-LABEL: contract_xvfmsub_d: +; LA64-CONTRACT-FAST: # %bb.0: # %entry +; LA64-CONTRACT-FAST-NEXT: xvld $xr0, $a1, 0 +; LA64-CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 +; LA64-CONTRACT-FAST-NEXT: xvld $xr2, $a3, 0 +; LA64-CONTRACT-FAST-NEXT: xvfmsub.d $xr0, $xr0, $xr1, $xr2 +; LA64-CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 +; LA64-CONTRACT-FAST-NEXT: ret +; +; LA64-CONTRACT-ON-LABEL: contract_xvfmsub_d: +; LA64-CONTRACT-ON: # %bb.0: # %entry +; LA64-CONTRACT-ON-NEXT: xvld $xr0, $a1, 0 +; LA64-CONTRACT-ON-NEXT: xvld $xr1, $a2, 0 +; LA64-CONTRACT-ON-NEXT: xvld $xr2, $a3, 0 +; LA64-CONTRACT-ON-NEXT: xvfmsub.d $xr0, $xr0, $xr1, $xr2 +; LA64-CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 +; LA64-CONTRACT-ON-NEXT: ret +; +; LA64-CONTRACT-OFF-LABEL: contract_xvfmsub_d: +; LA64-CONTRACT-OFF: # %bb.0: # %entry +; LA64-CONTRACT-OFF-NEXT: xvld $xr0, $a1, 0 +; LA64-CONTRACT-OFF-NEXT: xvld $xr1, $a2, 0 +; LA64-CONTRACT-OFF-NEXT: xvld $xr2, $a3, 0 +; LA64-CONTRACT-OFF-NEXT: xvfmsub.d $xr0, $xr0, $xr1, $xr2 +; LA64-CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 +; LA64-CONTRACT-OFF-NEXT: ret entry: %v0 = load <4 x double>, ptr %a0 %v1 = load <4 x double>, ptr %a1 @@ -414,32 +716,59 @@ entry: } define void @contract_xvfnmadd_d(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { -; CONTRACT-FAST-LABEL: contract_xvfnmadd_d: -; CONTRACT-FAST: # %bb.0: # %entry -; CONTRACT-FAST-NEXT: xvld $xr0, $a1, 0 -; CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 -; CONTRACT-FAST-NEXT: xvld $xr2, $a3, 0 -; CONTRACT-FAST-NEXT: xvfnmadd.d $xr0, $xr0, $xr1, $xr2 -; CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 -; CONTRACT-FAST-NEXT: ret -; -; CONTRACT-ON-LABEL: contract_xvfnmadd_d: -; CONTRACT-ON: # %bb.0: # %entry -; CONTRACT-ON-NEXT: xvld $xr0, $a1, 0 -; CONTRACT-ON-NEXT: xvld $xr1, $a2, 0 -; CONTRACT-ON-NEXT: xvld $xr2, $a3, 0 -; CONTRACT-ON-NEXT: xvfnmadd.d $xr0, $xr0, $xr1, $xr2 -; CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 -; CONTRACT-ON-NEXT: ret -; -; CONTRACT-OFF-LABEL: contract_xvfnmadd_d: -; CONTRACT-OFF: # %bb.0: # %entry -; CONTRACT-OFF-NEXT: xvld $xr0, $a1, 0 -; CONTRACT-OFF-NEXT: xvld $xr1, $a2, 0 -; CONTRACT-OFF-NEXT: xvld $xr2, $a3, 0 -; CONTRACT-OFF-NEXT: xvfnmadd.d $xr0, $xr0, $xr1, $xr2 -; CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 -; CONTRACT-OFF-NEXT: ret +; LA32-CONTRACT-FAST-LABEL: contract_xvfnmadd_d: +; LA32-CONTRACT-FAST: # %bb.0: # %entry +; LA32-CONTRACT-FAST-NEXT: xvld $xr0, $a1, 0 +; LA32-CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 +; LA32-CONTRACT-FAST-NEXT: xvld $xr2, $a3, 0 +; LA32-CONTRACT-FAST-NEXT: xvfnmadd.d $xr0, $xr0, $xr1, $xr2 +; LA32-CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 +; LA32-CONTRACT-FAST-NEXT: ret +; +; LA32-CONTRACT-ON-LABEL: contract_xvfnmadd_d: +; LA32-CONTRACT-ON: # %bb.0: # %entry +; LA32-CONTRACT-ON-NEXT: xvld $xr0, $a1, 0 +; LA32-CONTRACT-ON-NEXT: xvld $xr1, $a2, 0 +; LA32-CONTRACT-ON-NEXT: xvld $xr2, $a3, 0 +; LA32-CONTRACT-ON-NEXT: xvfnmadd.d $xr0, $xr0, $xr1, $xr2 +; LA32-CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 +; LA32-CONTRACT-ON-NEXT: ret +; +; LA32-CONTRACT-OFF-LABEL: contract_xvfnmadd_d: +; LA32-CONTRACT-OFF: # %bb.0: # %entry +; LA32-CONTRACT-OFF-NEXT: xvld $xr0, $a1, 0 +; LA32-CONTRACT-OFF-NEXT: xvld $xr1, $a2, 0 +; LA32-CONTRACT-OFF-NEXT: xvld $xr2, $a3, 0 +; LA32-CONTRACT-OFF-NEXT: xvfnmadd.d $xr0, $xr0, $xr1, $xr2 +; LA32-CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 +; LA32-CONTRACT-OFF-NEXT: ret +; +; LA64-CONTRACT-FAST-LABEL: contract_xvfnmadd_d: +; LA64-CONTRACT-FAST: # %bb.0: # %entry +; LA64-CONTRACT-FAST-NEXT: xvld $xr0, $a1, 0 +; LA64-CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 +; LA64-CONTRACT-FAST-NEXT: xvld $xr2, $a3, 0 +; LA64-CONTRACT-FAST-NEXT: xvfnmadd.d $xr0, $xr0, $xr1, $xr2 +; LA64-CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 +; LA64-CONTRACT-FAST-NEXT: ret +; +; LA64-CONTRACT-ON-LABEL: contract_xvfnmadd_d: +; LA64-CONTRACT-ON: # %bb.0: # %entry +; LA64-CONTRACT-ON-NEXT: xvld $xr0, $a1, 0 +; LA64-CONTRACT-ON-NEXT: xvld $xr1, $a2, 0 +; LA64-CONTRACT-ON-NEXT: xvld $xr2, $a3, 0 +; LA64-CONTRACT-ON-NEXT: xvfnmadd.d $xr0, $xr0, $xr1, $xr2 +; LA64-CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 +; LA64-CONTRACT-ON-NEXT: ret +; +; LA64-CONTRACT-OFF-LABEL: contract_xvfnmadd_d: +; LA64-CONTRACT-OFF: # %bb.0: # %entry +; LA64-CONTRACT-OFF-NEXT: xvld $xr0, $a1, 0 +; LA64-CONTRACT-OFF-NEXT: xvld $xr1, $a2, 0 +; LA64-CONTRACT-OFF-NEXT: xvld $xr2, $a3, 0 +; LA64-CONTRACT-OFF-NEXT: xvfnmadd.d $xr0, $xr0, $xr1, $xr2 +; LA64-CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 +; LA64-CONTRACT-OFF-NEXT: ret entry: %v0 = load <4 x double>, ptr %a0 %v1 = load <4 x double>, ptr %a1 @@ -452,32 +781,59 @@ entry: } define void @contract_xvfnmadd_d_nsz(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { -; CONTRACT-FAST-LABEL: contract_xvfnmadd_d_nsz: -; CONTRACT-FAST: # %bb.0: # %entry -; CONTRACT-FAST-NEXT: xvld $xr0, $a1, 0 -; CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 -; CONTRACT-FAST-NEXT: xvld $xr2, $a3, 0 -; CONTRACT-FAST-NEXT: xvfnmadd.d $xr0, $xr0, $xr1, $xr2 -; CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 -; CONTRACT-FAST-NEXT: ret -; -; CONTRACT-ON-LABEL: contract_xvfnmadd_d_nsz: -; CONTRACT-ON: # %bb.0: # %entry -; CONTRACT-ON-NEXT: xvld $xr0, $a1, 0 -; CONTRACT-ON-NEXT: xvld $xr1, $a2, 0 -; CONTRACT-ON-NEXT: xvld $xr2, $a3, 0 -; CONTRACT-ON-NEXT: xvfnmadd.d $xr0, $xr0, $xr1, $xr2 -; CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 -; CONTRACT-ON-NEXT: ret -; -; CONTRACT-OFF-LABEL: contract_xvfnmadd_d_nsz: -; CONTRACT-OFF: # %bb.0: # %entry -; CONTRACT-OFF-NEXT: xvld $xr0, $a1, 0 -; CONTRACT-OFF-NEXT: xvld $xr1, $a2, 0 -; CONTRACT-OFF-NEXT: xvld $xr2, $a3, 0 -; CONTRACT-OFF-NEXT: xvfnmadd.d $xr0, $xr0, $xr1, $xr2 -; CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 -; CONTRACT-OFF-NEXT: ret +; LA32-CONTRACT-FAST-LABEL: contract_xvfnmadd_d_nsz: +; LA32-CONTRACT-FAST: # %bb.0: # %entry +; LA32-CONTRACT-FAST-NEXT: xvld $xr0, $a1, 0 +; LA32-CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 +; LA32-CONTRACT-FAST-NEXT: xvld $xr2, $a3, 0 +; LA32-CONTRACT-FAST-NEXT: xvfnmadd.d $xr0, $xr0, $xr1, $xr2 +; LA32-CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 +; LA32-CONTRACT-FAST-NEXT: ret +; +; LA32-CONTRACT-ON-LABEL: contract_xvfnmadd_d_nsz: +; LA32-CONTRACT-ON: # %bb.0: # %entry +; LA32-CONTRACT-ON-NEXT: xvld $xr0, $a1, 0 +; LA32-CONTRACT-ON-NEXT: xvld $xr1, $a2, 0 +; LA32-CONTRACT-ON-NEXT: xvld $xr2, $a3, 0 +; LA32-CONTRACT-ON-NEXT: xvfnmadd.d $xr0, $xr0, $xr1, $xr2 +; LA32-CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 +; LA32-CONTRACT-ON-NEXT: ret +; +; LA32-CONTRACT-OFF-LABEL: contract_xvfnmadd_d_nsz: +; LA32-CONTRACT-OFF: # %bb.0: # %entry +; LA32-CONTRACT-OFF-NEXT: xvld $xr0, $a1, 0 +; LA32-CONTRACT-OFF-NEXT: xvld $xr1, $a2, 0 +; LA32-CONTRACT-OFF-NEXT: xvld $xr2, $a3, 0 +; LA32-CONTRACT-OFF-NEXT: xvfnmadd.d $xr0, $xr0, $xr1, $xr2 +; LA32-CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 +; LA32-CONTRACT-OFF-NEXT: ret +; +; LA64-CONTRACT-FAST-LABEL: contract_xvfnmadd_d_nsz: +; LA64-CONTRACT-FAST: # %bb.0: # %entry +; LA64-CONTRACT-FAST-NEXT: xvld $xr0, $a1, 0 +; LA64-CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 +; LA64-CONTRACT-FAST-NEXT: xvld $xr2, $a3, 0 +; LA64-CONTRACT-FAST-NEXT: xvfnmadd.d $xr0, $xr0, $xr1, $xr2 +; LA64-CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 +; LA64-CONTRACT-FAST-NEXT: ret +; +; LA64-CONTRACT-ON-LABEL: contract_xvfnmadd_d_nsz: +; LA64-CONTRACT-ON: # %bb.0: # %entry +; LA64-CONTRACT-ON-NEXT: xvld $xr0, $a1, 0 +; LA64-CONTRACT-ON-NEXT: xvld $xr1, $a2, 0 +; LA64-CONTRACT-ON-NEXT: xvld $xr2, $a3, 0 +; LA64-CONTRACT-ON-NEXT: xvfnmadd.d $xr0, $xr0, $xr1, $xr2 +; LA64-CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 +; LA64-CONTRACT-ON-NEXT: ret +; +; LA64-CONTRACT-OFF-LABEL: contract_xvfnmadd_d_nsz: +; LA64-CONTRACT-OFF: # %bb.0: # %entry +; LA64-CONTRACT-OFF-NEXT: xvld $xr0, $a1, 0 +; LA64-CONTRACT-OFF-NEXT: xvld $xr1, $a2, 0 +; LA64-CONTRACT-OFF-NEXT: xvld $xr2, $a3, 0 +; LA64-CONTRACT-OFF-NEXT: xvfnmadd.d $xr0, $xr0, $xr1, $xr2 +; LA64-CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 +; LA64-CONTRACT-OFF-NEXT: ret entry: %v0 = load <4 x double>, ptr %a0 %v1 = load <4 x double>, ptr %a1 @@ -492,35 +848,65 @@ entry: ;; Check that xvfnmadd.d is not emitted. define void @not_contract_xvfnmadd_d(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { -; CONTRACT-FAST-LABEL: not_contract_xvfnmadd_d: -; CONTRACT-FAST: # %bb.0: # %entry -; CONTRACT-FAST-NEXT: xvld $xr0, $a1, 0 -; CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 -; CONTRACT-FAST-NEXT: xvld $xr2, $a3, 0 -; CONTRACT-FAST-NEXT: xvbitrevi.d $xr0, $xr0, 63 -; CONTRACT-FAST-NEXT: xvfmsub.d $xr0, $xr0, $xr1, $xr2 -; CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 -; CONTRACT-FAST-NEXT: ret -; -; CONTRACT-ON-LABEL: not_contract_xvfnmadd_d: -; CONTRACT-ON: # %bb.0: # %entry -; CONTRACT-ON-NEXT: xvld $xr0, $a1, 0 -; CONTRACT-ON-NEXT: xvld $xr1, $a2, 0 -; CONTRACT-ON-NEXT: xvld $xr2, $a3, 0 -; CONTRACT-ON-NEXT: xvbitrevi.d $xr0, $xr0, 63 -; CONTRACT-ON-NEXT: xvfmsub.d $xr0, $xr0, $xr1, $xr2 -; CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 -; CONTRACT-ON-NEXT: ret -; -; CONTRACT-OFF-LABEL: not_contract_xvfnmadd_d: -; CONTRACT-OFF: # %bb.0: # %entry -; CONTRACT-OFF-NEXT: xvld $xr0, $a1, 0 -; CONTRACT-OFF-NEXT: xvld $xr1, $a2, 0 -; CONTRACT-OFF-NEXT: xvld $xr2, $a3, 0 -; CONTRACT-OFF-NEXT: xvbitrevi.d $xr0, $xr0, 63 -; CONTRACT-OFF-NEXT: xvfmsub.d $xr0, $xr0, $xr1, $xr2 -; CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 -; CONTRACT-OFF-NEXT: ret +; LA32-CONTRACT-FAST-LABEL: not_contract_xvfnmadd_d: +; LA32-CONTRACT-FAST: # %bb.0: # %entry +; LA32-CONTRACT-FAST-NEXT: xvld $xr0, $a1, 0 +; LA32-CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 +; LA32-CONTRACT-FAST-NEXT: xvld $xr2, $a3, 0 +; LA32-CONTRACT-FAST-NEXT: xvbitrevi.d $xr0, $xr0, 63 +; LA32-CONTRACT-FAST-NEXT: xvfmsub.d $xr0, $xr0, $xr1, $xr2 +; LA32-CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 +; LA32-CONTRACT-FAST-NEXT: ret +; +; LA32-CONTRACT-ON-LABEL: not_contract_xvfnmadd_d: +; LA32-CONTRACT-ON: # %bb.0: # %entry +; LA32-CONTRACT-ON-NEXT: xvld $xr0, $a1, 0 +; LA32-CONTRACT-ON-NEXT: xvld $xr1, $a2, 0 +; LA32-CONTRACT-ON-NEXT: xvld $xr2, $a3, 0 +; LA32-CONTRACT-ON-NEXT: xvbitrevi.d $xr0, $xr0, 63 +; LA32-CONTRACT-ON-NEXT: xvfmsub.d $xr0, $xr0, $xr1, $xr2 +; LA32-CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 +; LA32-CONTRACT-ON-NEXT: ret +; +; LA32-CONTRACT-OFF-LABEL: not_contract_xvfnmadd_d: +; LA32-CONTRACT-OFF: # %bb.0: # %entry +; LA32-CONTRACT-OFF-NEXT: xvld $xr0, $a1, 0 +; LA32-CONTRACT-OFF-NEXT: xvld $xr1, $a2, 0 +; LA32-CONTRACT-OFF-NEXT: xvld $xr2, $a3, 0 +; LA32-CONTRACT-OFF-NEXT: xvbitrevi.d $xr0, $xr0, 63 +; LA32-CONTRACT-OFF-NEXT: xvfmsub.d $xr0, $xr0, $xr1, $xr2 +; LA32-CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 +; LA32-CONTRACT-OFF-NEXT: ret +; +; LA64-CONTRACT-FAST-LABEL: not_contract_xvfnmadd_d: +; LA64-CONTRACT-FAST: # %bb.0: # %entry +; LA64-CONTRACT-FAST-NEXT: xvld $xr0, $a1, 0 +; LA64-CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 +; LA64-CONTRACT-FAST-NEXT: xvld $xr2, $a3, 0 +; LA64-CONTRACT-FAST-NEXT: xvbitrevi.d $xr0, $xr0, 63 +; LA64-CONTRACT-FAST-NEXT: xvfmsub.d $xr0, $xr0, $xr1, $xr2 +; LA64-CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 +; LA64-CONTRACT-FAST-NEXT: ret +; +; LA64-CONTRACT-ON-LABEL: not_contract_xvfnmadd_d: +; LA64-CONTRACT-ON: # %bb.0: # %entry +; LA64-CONTRACT-ON-NEXT: xvld $xr0, $a1, 0 +; LA64-CONTRACT-ON-NEXT: xvld $xr1, $a2, 0 +; LA64-CONTRACT-ON-NEXT: xvld $xr2, $a3, 0 +; LA64-CONTRACT-ON-NEXT: xvbitrevi.d $xr0, $xr0, 63 +; LA64-CONTRACT-ON-NEXT: xvfmsub.d $xr0, $xr0, $xr1, $xr2 +; LA64-CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 +; LA64-CONTRACT-ON-NEXT: ret +; +; LA64-CONTRACT-OFF-LABEL: not_contract_xvfnmadd_d: +; LA64-CONTRACT-OFF: # %bb.0: # %entry +; LA64-CONTRACT-OFF-NEXT: xvld $xr0, $a1, 0 +; LA64-CONTRACT-OFF-NEXT: xvld $xr1, $a2, 0 +; LA64-CONTRACT-OFF-NEXT: xvld $xr2, $a3, 0 +; LA64-CONTRACT-OFF-NEXT: xvbitrevi.d $xr0, $xr0, 63 +; LA64-CONTRACT-OFF-NEXT: xvfmsub.d $xr0, $xr0, $xr1, $xr2 +; LA64-CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 +; LA64-CONTRACT-OFF-NEXT: ret entry: %v0 = load <4 x double>, ptr %a0 %v1 = load <4 x double>, ptr %a1 @@ -534,32 +920,59 @@ entry: } define void @contract_xvfnmsub_d(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { -; CONTRACT-FAST-LABEL: contract_xvfnmsub_d: -; CONTRACT-FAST: # %bb.0: # %entry -; CONTRACT-FAST-NEXT: xvld $xr0, $a1, 0 -; CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 -; CONTRACT-FAST-NEXT: xvld $xr2, $a3, 0 -; CONTRACT-FAST-NEXT: xvfnmsub.d $xr0, $xr0, $xr1, $xr2 -; CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 -; CONTRACT-FAST-NEXT: ret -; -; CONTRACT-ON-LABEL: contract_xvfnmsub_d: -; CONTRACT-ON: # %bb.0: # %entry -; CONTRACT-ON-NEXT: xvld $xr0, $a1, 0 -; CONTRACT-ON-NEXT: xvld $xr1, $a2, 0 -; CONTRACT-ON-NEXT: xvld $xr2, $a3, 0 -; CONTRACT-ON-NEXT: xvfnmsub.d $xr0, $xr0, $xr1, $xr2 -; CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 -; CONTRACT-ON-NEXT: ret -; -; CONTRACT-OFF-LABEL: contract_xvfnmsub_d: -; CONTRACT-OFF: # %bb.0: # %entry -; CONTRACT-OFF-NEXT: xvld $xr0, $a1, 0 -; CONTRACT-OFF-NEXT: xvld $xr1, $a2, 0 -; CONTRACT-OFF-NEXT: xvld $xr2, $a3, 0 -; CONTRACT-OFF-NEXT: xvfnmsub.d $xr0, $xr0, $xr1, $xr2 -; CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 -; CONTRACT-OFF-NEXT: ret +; LA32-CONTRACT-FAST-LABEL: contract_xvfnmsub_d: +; LA32-CONTRACT-FAST: # %bb.0: # %entry +; LA32-CONTRACT-FAST-NEXT: xvld $xr0, $a1, 0 +; LA32-CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 +; LA32-CONTRACT-FAST-NEXT: xvld $xr2, $a3, 0 +; LA32-CONTRACT-FAST-NEXT: xvfnmsub.d $xr0, $xr0, $xr1, $xr2 +; LA32-CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 +; LA32-CONTRACT-FAST-NEXT: ret +; +; LA32-CONTRACT-ON-LABEL: contract_xvfnmsub_d: +; LA32-CONTRACT-ON: # %bb.0: # %entry +; LA32-CONTRACT-ON-NEXT: xvld $xr0, $a1, 0 +; LA32-CONTRACT-ON-NEXT: xvld $xr1, $a2, 0 +; LA32-CONTRACT-ON-NEXT: xvld $xr2, $a3, 0 +; LA32-CONTRACT-ON-NEXT: xvfnmsub.d $xr0, $xr0, $xr1, $xr2 +; LA32-CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 +; LA32-CONTRACT-ON-NEXT: ret +; +; LA32-CONTRACT-OFF-LABEL: contract_xvfnmsub_d: +; LA32-CONTRACT-OFF: # %bb.0: # %entry +; LA32-CONTRACT-OFF-NEXT: xvld $xr0, $a1, 0 +; LA32-CONTRACT-OFF-NEXT: xvld $xr1, $a2, 0 +; LA32-CONTRACT-OFF-NEXT: xvld $xr2, $a3, 0 +; LA32-CONTRACT-OFF-NEXT: xvfnmsub.d $xr0, $xr0, $xr1, $xr2 +; LA32-CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 +; LA32-CONTRACT-OFF-NEXT: ret +; +; LA64-CONTRACT-FAST-LABEL: contract_xvfnmsub_d: +; LA64-CONTRACT-FAST: # %bb.0: # %entry +; LA64-CONTRACT-FAST-NEXT: xvld $xr0, $a1, 0 +; LA64-CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 +; LA64-CONTRACT-FAST-NEXT: xvld $xr2, $a3, 0 +; LA64-CONTRACT-FAST-NEXT: xvfnmsub.d $xr0, $xr0, $xr1, $xr2 +; LA64-CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 +; LA64-CONTRACT-FAST-NEXT: ret +; +; LA64-CONTRACT-ON-LABEL: contract_xvfnmsub_d: +; LA64-CONTRACT-ON: # %bb.0: # %entry +; LA64-CONTRACT-ON-NEXT: xvld $xr0, $a1, 0 +; LA64-CONTRACT-ON-NEXT: xvld $xr1, $a2, 0 +; LA64-CONTRACT-ON-NEXT: xvld $xr2, $a3, 0 +; LA64-CONTRACT-ON-NEXT: xvfnmsub.d $xr0, $xr0, $xr1, $xr2 +; LA64-CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 +; LA64-CONTRACT-ON-NEXT: ret +; +; LA64-CONTRACT-OFF-LABEL: contract_xvfnmsub_d: +; LA64-CONTRACT-OFF: # %bb.0: # %entry +; LA64-CONTRACT-OFF-NEXT: xvld $xr0, $a1, 0 +; LA64-CONTRACT-OFF-NEXT: xvld $xr1, $a2, 0 +; LA64-CONTRACT-OFF-NEXT: xvld $xr2, $a3, 0 +; LA64-CONTRACT-OFF-NEXT: xvfnmsub.d $xr0, $xr0, $xr1, $xr2 +; LA64-CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 +; LA64-CONTRACT-OFF-NEXT: ret entry: %v0 = load <4 x double>, ptr %a0 %v1 = load <4 x double>, ptr %a1 @@ -573,32 +986,59 @@ entry: } define void @contract_xvfnmsub_d_nsz(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { -; CONTRACT-FAST-LABEL: contract_xvfnmsub_d_nsz: -; CONTRACT-FAST: # %bb.0: # %entry -; CONTRACT-FAST-NEXT: xvld $xr0, $a1, 0 -; CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 -; CONTRACT-FAST-NEXT: xvld $xr2, $a3, 0 -; CONTRACT-FAST-NEXT: xvfnmsub.d $xr0, $xr0, $xr1, $xr2 -; CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 -; CONTRACT-FAST-NEXT: ret -; -; CONTRACT-ON-LABEL: contract_xvfnmsub_d_nsz: -; CONTRACT-ON: # %bb.0: # %entry -; CONTRACT-ON-NEXT: xvld $xr0, $a1, 0 -; CONTRACT-ON-NEXT: xvld $xr1, $a2, 0 -; CONTRACT-ON-NEXT: xvld $xr2, $a3, 0 -; CONTRACT-ON-NEXT: xvfnmsub.d $xr0, $xr0, $xr1, $xr2 -; CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 -; CONTRACT-ON-NEXT: ret -; -; CONTRACT-OFF-LABEL: contract_xvfnmsub_d_nsz: -; CONTRACT-OFF: # %bb.0: # %entry -; CONTRACT-OFF-NEXT: xvld $xr0, $a1, 0 -; CONTRACT-OFF-NEXT: xvld $xr1, $a2, 0 -; CONTRACT-OFF-NEXT: xvld $xr2, $a3, 0 -; CONTRACT-OFF-NEXT: xvfnmsub.d $xr0, $xr0, $xr1, $xr2 -; CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 -; CONTRACT-OFF-NEXT: ret +; LA32-CONTRACT-FAST-LABEL: contract_xvfnmsub_d_nsz: +; LA32-CONTRACT-FAST: # %bb.0: # %entry +; LA32-CONTRACT-FAST-NEXT: xvld $xr0, $a1, 0 +; LA32-CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 +; LA32-CONTRACT-FAST-NEXT: xvld $xr2, $a3, 0 +; LA32-CONTRACT-FAST-NEXT: xvfnmsub.d $xr0, $xr0, $xr1, $xr2 +; LA32-CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 +; LA32-CONTRACT-FAST-NEXT: ret +; +; LA32-CONTRACT-ON-LABEL: contract_xvfnmsub_d_nsz: +; LA32-CONTRACT-ON: # %bb.0: # %entry +; LA32-CONTRACT-ON-NEXT: xvld $xr0, $a1, 0 +; LA32-CONTRACT-ON-NEXT: xvld $xr1, $a2, 0 +; LA32-CONTRACT-ON-NEXT: xvld $xr2, $a3, 0 +; LA32-CONTRACT-ON-NEXT: xvfnmsub.d $xr0, $xr0, $xr1, $xr2 +; LA32-CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 +; LA32-CONTRACT-ON-NEXT: ret +; +; LA32-CONTRACT-OFF-LABEL: contract_xvfnmsub_d_nsz: +; LA32-CONTRACT-OFF: # %bb.0: # %entry +; LA32-CONTRACT-OFF-NEXT: xvld $xr0, $a1, 0 +; LA32-CONTRACT-OFF-NEXT: xvld $xr1, $a2, 0 +; LA32-CONTRACT-OFF-NEXT: xvld $xr2, $a3, 0 +; LA32-CONTRACT-OFF-NEXT: xvfnmsub.d $xr0, $xr0, $xr1, $xr2 +; LA32-CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 +; LA32-CONTRACT-OFF-NEXT: ret +; +; LA64-CONTRACT-FAST-LABEL: contract_xvfnmsub_d_nsz: +; LA64-CONTRACT-FAST: # %bb.0: # %entry +; LA64-CONTRACT-FAST-NEXT: xvld $xr0, $a1, 0 +; LA64-CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 +; LA64-CONTRACT-FAST-NEXT: xvld $xr2, $a3, 0 +; LA64-CONTRACT-FAST-NEXT: xvfnmsub.d $xr0, $xr0, $xr1, $xr2 +; LA64-CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 +; LA64-CONTRACT-FAST-NEXT: ret +; +; LA64-CONTRACT-ON-LABEL: contract_xvfnmsub_d_nsz: +; LA64-CONTRACT-ON: # %bb.0: # %entry +; LA64-CONTRACT-ON-NEXT: xvld $xr0, $a1, 0 +; LA64-CONTRACT-ON-NEXT: xvld $xr1, $a2, 0 +; LA64-CONTRACT-ON-NEXT: xvld $xr2, $a3, 0 +; LA64-CONTRACT-ON-NEXT: xvfnmsub.d $xr0, $xr0, $xr1, $xr2 +; LA64-CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 +; LA64-CONTRACT-ON-NEXT: ret +; +; LA64-CONTRACT-OFF-LABEL: contract_xvfnmsub_d_nsz: +; LA64-CONTRACT-OFF: # %bb.0: # %entry +; LA64-CONTRACT-OFF-NEXT: xvld $xr0, $a1, 0 +; LA64-CONTRACT-OFF-NEXT: xvld $xr1, $a2, 0 +; LA64-CONTRACT-OFF-NEXT: xvld $xr2, $a3, 0 +; LA64-CONTRACT-OFF-NEXT: xvfnmsub.d $xr0, $xr0, $xr1, $xr2 +; LA64-CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 +; LA64-CONTRACT-OFF-NEXT: ret entry: %v0 = load <4 x double>, ptr %a0 %v1 = load <4 x double>, ptr %a1 @@ -612,35 +1052,65 @@ entry: ;; Check that xvfnmsub.d is not emitted. define void @not_contract_xvfnmsub_d(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { -; CONTRACT-FAST-LABEL: not_contract_xvfnmsub_d: -; CONTRACT-FAST: # %bb.0: # %entry -; CONTRACT-FAST-NEXT: xvld $xr0, $a1, 0 -; CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 -; CONTRACT-FAST-NEXT: xvld $xr2, $a3, 0 -; CONTRACT-FAST-NEXT: xvbitrevi.d $xr0, $xr0, 63 -; CONTRACT-FAST-NEXT: xvfmadd.d $xr0, $xr0, $xr1, $xr2 -; CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 -; CONTRACT-FAST-NEXT: ret -; -; CONTRACT-ON-LABEL: not_contract_xvfnmsub_d: -; CONTRACT-ON: # %bb.0: # %entry -; CONTRACT-ON-NEXT: xvld $xr0, $a1, 0 -; CONTRACT-ON-NEXT: xvld $xr1, $a2, 0 -; CONTRACT-ON-NEXT: xvld $xr2, $a3, 0 -; CONTRACT-ON-NEXT: xvbitrevi.d $xr0, $xr0, 63 -; CONTRACT-ON-NEXT: xvfmadd.d $xr0, $xr0, $xr1, $xr2 -; CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 -; CONTRACT-ON-NEXT: ret -; -; CONTRACT-OFF-LABEL: not_contract_xvfnmsub_d: -; CONTRACT-OFF: # %bb.0: # %entry -; CONTRACT-OFF-NEXT: xvld $xr0, $a1, 0 -; CONTRACT-OFF-NEXT: xvld $xr1, $a2, 0 -; CONTRACT-OFF-NEXT: xvld $xr2, $a3, 0 -; CONTRACT-OFF-NEXT: xvbitrevi.d $xr0, $xr0, 63 -; CONTRACT-OFF-NEXT: xvfmadd.d $xr0, $xr0, $xr1, $xr2 -; CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 -; CONTRACT-OFF-NEXT: ret +; LA32-CONTRACT-FAST-LABEL: not_contract_xvfnmsub_d: +; LA32-CONTRACT-FAST: # %bb.0: # %entry +; LA32-CONTRACT-FAST-NEXT: xvld $xr0, $a1, 0 +; LA32-CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 +; LA32-CONTRACT-FAST-NEXT: xvld $xr2, $a3, 0 +; LA32-CONTRACT-FAST-NEXT: xvbitrevi.d $xr0, $xr0, 63 +; LA32-CONTRACT-FAST-NEXT: xvfmadd.d $xr0, $xr0, $xr1, $xr2 +; LA32-CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 +; LA32-CONTRACT-FAST-NEXT: ret +; +; LA32-CONTRACT-ON-LABEL: not_contract_xvfnmsub_d: +; LA32-CONTRACT-ON: # %bb.0: # %entry +; LA32-CONTRACT-ON-NEXT: xvld $xr0, $a1, 0 +; LA32-CONTRACT-ON-NEXT: xvld $xr1, $a2, 0 +; LA32-CONTRACT-ON-NEXT: xvld $xr2, $a3, 0 +; LA32-CONTRACT-ON-NEXT: xvbitrevi.d $xr0, $xr0, 63 +; LA32-CONTRACT-ON-NEXT: xvfmadd.d $xr0, $xr0, $xr1, $xr2 +; LA32-CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 +; LA32-CONTRACT-ON-NEXT: ret +; +; LA32-CONTRACT-OFF-LABEL: not_contract_xvfnmsub_d: +; LA32-CONTRACT-OFF: # %bb.0: # %entry +; LA32-CONTRACT-OFF-NEXT: xvld $xr0, $a1, 0 +; LA32-CONTRACT-OFF-NEXT: xvld $xr1, $a2, 0 +; LA32-CONTRACT-OFF-NEXT: xvld $xr2, $a3, 0 +; LA32-CONTRACT-OFF-NEXT: xvbitrevi.d $xr0, $xr0, 63 +; LA32-CONTRACT-OFF-NEXT: xvfmadd.d $xr0, $xr0, $xr1, $xr2 +; LA32-CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 +; LA32-CONTRACT-OFF-NEXT: ret +; +; LA64-CONTRACT-FAST-LABEL: not_contract_xvfnmsub_d: +; LA64-CONTRACT-FAST: # %bb.0: # %entry +; LA64-CONTRACT-FAST-NEXT: xvld $xr0, $a1, 0 +; LA64-CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 +; LA64-CONTRACT-FAST-NEXT: xvld $xr2, $a3, 0 +; LA64-CONTRACT-FAST-NEXT: xvbitrevi.d $xr0, $xr0, 63 +; LA64-CONTRACT-FAST-NEXT: xvfmadd.d $xr0, $xr0, $xr1, $xr2 +; LA64-CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 +; LA64-CONTRACT-FAST-NEXT: ret +; +; LA64-CONTRACT-ON-LABEL: not_contract_xvfnmsub_d: +; LA64-CONTRACT-ON: # %bb.0: # %entry +; LA64-CONTRACT-ON-NEXT: xvld $xr0, $a1, 0 +; LA64-CONTRACT-ON-NEXT: xvld $xr1, $a2, 0 +; LA64-CONTRACT-ON-NEXT: xvld $xr2, $a3, 0 +; LA64-CONTRACT-ON-NEXT: xvbitrevi.d $xr0, $xr0, 63 +; LA64-CONTRACT-ON-NEXT: xvfmadd.d $xr0, $xr0, $xr1, $xr2 +; LA64-CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 +; LA64-CONTRACT-ON-NEXT: ret +; +; LA64-CONTRACT-OFF-LABEL: not_contract_xvfnmsub_d: +; LA64-CONTRACT-OFF: # %bb.0: # %entry +; LA64-CONTRACT-OFF-NEXT: xvld $xr0, $a1, 0 +; LA64-CONTRACT-OFF-NEXT: xvld $xr1, $a2, 0 +; LA64-CONTRACT-OFF-NEXT: xvld $xr2, $a3, 0 +; LA64-CONTRACT-OFF-NEXT: xvbitrevi.d $xr0, $xr0, 63 +; LA64-CONTRACT-OFF-NEXT: xvfmadd.d $xr0, $xr0, $xr1, $xr2 +; LA64-CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 +; LA64-CONTRACT-OFF-NEXT: ret entry: %v0 = load <4 x double>, ptr %a0 %v1 = load <4 x double>, ptr %a1 @@ -653,32 +1123,59 @@ entry: } define void @xvfmadd_d_contract(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { -; CONTRACT-FAST-LABEL: xvfmadd_d_contract: -; CONTRACT-FAST: # %bb.0: # %entry -; CONTRACT-FAST-NEXT: xvld $xr0, $a1, 0 -; CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 -; CONTRACT-FAST-NEXT: xvld $xr2, $a3, 0 -; CONTRACT-FAST-NEXT: xvfmadd.d $xr0, $xr0, $xr1, $xr2 -; CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 -; CONTRACT-FAST-NEXT: ret -; -; CONTRACT-ON-LABEL: xvfmadd_d_contract: -; CONTRACT-ON: # %bb.0: # %entry -; CONTRACT-ON-NEXT: xvld $xr0, $a1, 0 -; CONTRACT-ON-NEXT: xvld $xr1, $a2, 0 -; CONTRACT-ON-NEXT: xvld $xr2, $a3, 0 -; CONTRACT-ON-NEXT: xvfmadd.d $xr0, $xr0, $xr1, $xr2 -; CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 -; CONTRACT-ON-NEXT: ret -; -; CONTRACT-OFF-LABEL: xvfmadd_d_contract: -; CONTRACT-OFF: # %bb.0: # %entry -; CONTRACT-OFF-NEXT: xvld $xr0, $a1, 0 -; CONTRACT-OFF-NEXT: xvld $xr1, $a2, 0 -; CONTRACT-OFF-NEXT: xvld $xr2, $a3, 0 -; CONTRACT-OFF-NEXT: xvfmadd.d $xr0, $xr0, $xr1, $xr2 -; CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 -; CONTRACT-OFF-NEXT: ret +; LA32-CONTRACT-FAST-LABEL: xvfmadd_d_contract: +; LA32-CONTRACT-FAST: # %bb.0: # %entry +; LA32-CONTRACT-FAST-NEXT: xvld $xr0, $a1, 0 +; LA32-CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 +; LA32-CONTRACT-FAST-NEXT: xvld $xr2, $a3, 0 +; LA32-CONTRACT-FAST-NEXT: xvfmadd.d $xr0, $xr0, $xr1, $xr2 +; LA32-CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 +; LA32-CONTRACT-FAST-NEXT: ret +; +; LA32-CONTRACT-ON-LABEL: xvfmadd_d_contract: +; LA32-CONTRACT-ON: # %bb.0: # %entry +; LA32-CONTRACT-ON-NEXT: xvld $xr0, $a1, 0 +; LA32-CONTRACT-ON-NEXT: xvld $xr1, $a2, 0 +; LA32-CONTRACT-ON-NEXT: xvld $xr2, $a3, 0 +; LA32-CONTRACT-ON-NEXT: xvfmadd.d $xr0, $xr0, $xr1, $xr2 +; LA32-CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 +; LA32-CONTRACT-ON-NEXT: ret +; +; LA32-CONTRACT-OFF-LABEL: xvfmadd_d_contract: +; LA32-CONTRACT-OFF: # %bb.0: # %entry +; LA32-CONTRACT-OFF-NEXT: xvld $xr0, $a1, 0 +; LA32-CONTRACT-OFF-NEXT: xvld $xr1, $a2, 0 +; LA32-CONTRACT-OFF-NEXT: xvld $xr2, $a3, 0 +; LA32-CONTRACT-OFF-NEXT: xvfmadd.d $xr0, $xr0, $xr1, $xr2 +; LA32-CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 +; LA32-CONTRACT-OFF-NEXT: ret +; +; LA64-CONTRACT-FAST-LABEL: xvfmadd_d_contract: +; LA64-CONTRACT-FAST: # %bb.0: # %entry +; LA64-CONTRACT-FAST-NEXT: xvld $xr0, $a1, 0 +; LA64-CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 +; LA64-CONTRACT-FAST-NEXT: xvld $xr2, $a3, 0 +; LA64-CONTRACT-FAST-NEXT: xvfmadd.d $xr0, $xr0, $xr1, $xr2 +; LA64-CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 +; LA64-CONTRACT-FAST-NEXT: ret +; +; LA64-CONTRACT-ON-LABEL: xvfmadd_d_contract: +; LA64-CONTRACT-ON: # %bb.0: # %entry +; LA64-CONTRACT-ON-NEXT: xvld $xr0, $a1, 0 +; LA64-CONTRACT-ON-NEXT: xvld $xr1, $a2, 0 +; LA64-CONTRACT-ON-NEXT: xvld $xr2, $a3, 0 +; LA64-CONTRACT-ON-NEXT: xvfmadd.d $xr0, $xr0, $xr1, $xr2 +; LA64-CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 +; LA64-CONTRACT-ON-NEXT: ret +; +; LA64-CONTRACT-OFF-LABEL: xvfmadd_d_contract: +; LA64-CONTRACT-OFF: # %bb.0: # %entry +; LA64-CONTRACT-OFF-NEXT: xvld $xr0, $a1, 0 +; LA64-CONTRACT-OFF-NEXT: xvld $xr1, $a2, 0 +; LA64-CONTRACT-OFF-NEXT: xvld $xr2, $a3, 0 +; LA64-CONTRACT-OFF-NEXT: xvfmadd.d $xr0, $xr0, $xr1, $xr2 +; LA64-CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 +; LA64-CONTRACT-OFF-NEXT: ret entry: %v0 = load <4 x double>, ptr %a0 %v1 = load <4 x double>, ptr %a1 @@ -690,32 +1187,59 @@ entry: } define void @xvfmsub_d_contract(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { -; CONTRACT-FAST-LABEL: xvfmsub_d_contract: -; CONTRACT-FAST: # %bb.0: # %entry -; CONTRACT-FAST-NEXT: xvld $xr0, $a1, 0 -; CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 -; CONTRACT-FAST-NEXT: xvld $xr2, $a3, 0 -; CONTRACT-FAST-NEXT: xvfmsub.d $xr0, $xr0, $xr1, $xr2 -; CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 -; CONTRACT-FAST-NEXT: ret -; -; CONTRACT-ON-LABEL: xvfmsub_d_contract: -; CONTRACT-ON: # %bb.0: # %entry -; CONTRACT-ON-NEXT: xvld $xr0, $a1, 0 -; CONTRACT-ON-NEXT: xvld $xr1, $a2, 0 -; CONTRACT-ON-NEXT: xvld $xr2, $a3, 0 -; CONTRACT-ON-NEXT: xvfmsub.d $xr0, $xr0, $xr1, $xr2 -; CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 -; CONTRACT-ON-NEXT: ret -; -; CONTRACT-OFF-LABEL: xvfmsub_d_contract: -; CONTRACT-OFF: # %bb.0: # %entry -; CONTRACT-OFF-NEXT: xvld $xr0, $a1, 0 -; CONTRACT-OFF-NEXT: xvld $xr1, $a2, 0 -; CONTRACT-OFF-NEXT: xvld $xr2, $a3, 0 -; CONTRACT-OFF-NEXT: xvfmsub.d $xr0, $xr0, $xr1, $xr2 -; CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 -; CONTRACT-OFF-NEXT: ret +; LA32-CONTRACT-FAST-LABEL: xvfmsub_d_contract: +; LA32-CONTRACT-FAST: # %bb.0: # %entry +; LA32-CONTRACT-FAST-NEXT: xvld $xr0, $a1, 0 +; LA32-CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 +; LA32-CONTRACT-FAST-NEXT: xvld $xr2, $a3, 0 +; LA32-CONTRACT-FAST-NEXT: xvfmsub.d $xr0, $xr0, $xr1, $xr2 +; LA32-CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 +; LA32-CONTRACT-FAST-NEXT: ret +; +; LA32-CONTRACT-ON-LABEL: xvfmsub_d_contract: +; LA32-CONTRACT-ON: # %bb.0: # %entry +; LA32-CONTRACT-ON-NEXT: xvld $xr0, $a1, 0 +; LA32-CONTRACT-ON-NEXT: xvld $xr1, $a2, 0 +; LA32-CONTRACT-ON-NEXT: xvld $xr2, $a3, 0 +; LA32-CONTRACT-ON-NEXT: xvfmsub.d $xr0, $xr0, $xr1, $xr2 +; LA32-CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 +; LA32-CONTRACT-ON-NEXT: ret +; +; LA32-CONTRACT-OFF-LABEL: xvfmsub_d_contract: +; LA32-CONTRACT-OFF: # %bb.0: # %entry +; LA32-CONTRACT-OFF-NEXT: xvld $xr0, $a1, 0 +; LA32-CONTRACT-OFF-NEXT: xvld $xr1, $a2, 0 +; LA32-CONTRACT-OFF-NEXT: xvld $xr2, $a3, 0 +; LA32-CONTRACT-OFF-NEXT: xvfmsub.d $xr0, $xr0, $xr1, $xr2 +; LA32-CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 +; LA32-CONTRACT-OFF-NEXT: ret +; +; LA64-CONTRACT-FAST-LABEL: xvfmsub_d_contract: +; LA64-CONTRACT-FAST: # %bb.0: # %entry +; LA64-CONTRACT-FAST-NEXT: xvld $xr0, $a1, 0 +; LA64-CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 +; LA64-CONTRACT-FAST-NEXT: xvld $xr2, $a3, 0 +; LA64-CONTRACT-FAST-NEXT: xvfmsub.d $xr0, $xr0, $xr1, $xr2 +; LA64-CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 +; LA64-CONTRACT-FAST-NEXT: ret +; +; LA64-CONTRACT-ON-LABEL: xvfmsub_d_contract: +; LA64-CONTRACT-ON: # %bb.0: # %entry +; LA64-CONTRACT-ON-NEXT: xvld $xr0, $a1, 0 +; LA64-CONTRACT-ON-NEXT: xvld $xr1, $a2, 0 +; LA64-CONTRACT-ON-NEXT: xvld $xr2, $a3, 0 +; LA64-CONTRACT-ON-NEXT: xvfmsub.d $xr0, $xr0, $xr1, $xr2 +; LA64-CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 +; LA64-CONTRACT-ON-NEXT: ret +; +; LA64-CONTRACT-OFF-LABEL: xvfmsub_d_contract: +; LA64-CONTRACT-OFF: # %bb.0: # %entry +; LA64-CONTRACT-OFF-NEXT: xvld $xr0, $a1, 0 +; LA64-CONTRACT-OFF-NEXT: xvld $xr1, $a2, 0 +; LA64-CONTRACT-OFF-NEXT: xvld $xr2, $a3, 0 +; LA64-CONTRACT-OFF-NEXT: xvfmsub.d $xr0, $xr0, $xr1, $xr2 +; LA64-CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 +; LA64-CONTRACT-OFF-NEXT: ret entry: %v0 = load <4 x double>, ptr %a0 %v1 = load <4 x double>, ptr %a1 @@ -727,32 +1251,59 @@ entry: } define void @xvfnmadd_d_contract(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { -; CONTRACT-FAST-LABEL: xvfnmadd_d_contract: -; CONTRACT-FAST: # %bb.0: # %entry -; CONTRACT-FAST-NEXT: xvld $xr0, $a1, 0 -; CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 -; CONTRACT-FAST-NEXT: xvld $xr2, $a3, 0 -; CONTRACT-FAST-NEXT: xvfnmadd.d $xr0, $xr0, $xr1, $xr2 -; CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 -; CONTRACT-FAST-NEXT: ret -; -; CONTRACT-ON-LABEL: xvfnmadd_d_contract: -; CONTRACT-ON: # %bb.0: # %entry -; CONTRACT-ON-NEXT: xvld $xr0, $a1, 0 -; CONTRACT-ON-NEXT: xvld $xr1, $a2, 0 -; CONTRACT-ON-NEXT: xvld $xr2, $a3, 0 -; CONTRACT-ON-NEXT: xvfnmadd.d $xr0, $xr0, $xr1, $xr2 -; CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 -; CONTRACT-ON-NEXT: ret -; -; CONTRACT-OFF-LABEL: xvfnmadd_d_contract: -; CONTRACT-OFF: # %bb.0: # %entry -; CONTRACT-OFF-NEXT: xvld $xr0, $a1, 0 -; CONTRACT-OFF-NEXT: xvld $xr1, $a2, 0 -; CONTRACT-OFF-NEXT: xvld $xr2, $a3, 0 -; CONTRACT-OFF-NEXT: xvfnmadd.d $xr0, $xr0, $xr1, $xr2 -; CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 -; CONTRACT-OFF-NEXT: ret +; LA32-CONTRACT-FAST-LABEL: xvfnmadd_d_contract: +; LA32-CONTRACT-FAST: # %bb.0: # %entry +; LA32-CONTRACT-FAST-NEXT: xvld $xr0, $a1, 0 +; LA32-CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 +; LA32-CONTRACT-FAST-NEXT: xvld $xr2, $a3, 0 +; LA32-CONTRACT-FAST-NEXT: xvfnmadd.d $xr0, $xr0, $xr1, $xr2 +; LA32-CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 +; LA32-CONTRACT-FAST-NEXT: ret +; +; LA32-CONTRACT-ON-LABEL: xvfnmadd_d_contract: +; LA32-CONTRACT-ON: # %bb.0: # %entry +; LA32-CONTRACT-ON-NEXT: xvld $xr0, $a1, 0 +; LA32-CONTRACT-ON-NEXT: xvld $xr1, $a2, 0 +; LA32-CONTRACT-ON-NEXT: xvld $xr2, $a3, 0 +; LA32-CONTRACT-ON-NEXT: xvfnmadd.d $xr0, $xr0, $xr1, $xr2 +; LA32-CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 +; LA32-CONTRACT-ON-NEXT: ret +; +; LA32-CONTRACT-OFF-LABEL: xvfnmadd_d_contract: +; LA32-CONTRACT-OFF: # %bb.0: # %entry +; LA32-CONTRACT-OFF-NEXT: xvld $xr0, $a1, 0 +; LA32-CONTRACT-OFF-NEXT: xvld $xr1, $a2, 0 +; LA32-CONTRACT-OFF-NEXT: xvld $xr2, $a3, 0 +; LA32-CONTRACT-OFF-NEXT: xvfnmadd.d $xr0, $xr0, $xr1, $xr2 +; LA32-CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 +; LA32-CONTRACT-OFF-NEXT: ret +; +; LA64-CONTRACT-FAST-LABEL: xvfnmadd_d_contract: +; LA64-CONTRACT-FAST: # %bb.0: # %entry +; LA64-CONTRACT-FAST-NEXT: xvld $xr0, $a1, 0 +; LA64-CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 +; LA64-CONTRACT-FAST-NEXT: xvld $xr2, $a3, 0 +; LA64-CONTRACT-FAST-NEXT: xvfnmadd.d $xr0, $xr0, $xr1, $xr2 +; LA64-CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 +; LA64-CONTRACT-FAST-NEXT: ret +; +; LA64-CONTRACT-ON-LABEL: xvfnmadd_d_contract: +; LA64-CONTRACT-ON: # %bb.0: # %entry +; LA64-CONTRACT-ON-NEXT: xvld $xr0, $a1, 0 +; LA64-CONTRACT-ON-NEXT: xvld $xr1, $a2, 0 +; LA64-CONTRACT-ON-NEXT: xvld $xr2, $a3, 0 +; LA64-CONTRACT-ON-NEXT: xvfnmadd.d $xr0, $xr0, $xr1, $xr2 +; LA64-CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 +; LA64-CONTRACT-ON-NEXT: ret +; +; LA64-CONTRACT-OFF-LABEL: xvfnmadd_d_contract: +; LA64-CONTRACT-OFF: # %bb.0: # %entry +; LA64-CONTRACT-OFF-NEXT: xvld $xr0, $a1, 0 +; LA64-CONTRACT-OFF-NEXT: xvld $xr1, $a2, 0 +; LA64-CONTRACT-OFF-NEXT: xvld $xr2, $a3, 0 +; LA64-CONTRACT-OFF-NEXT: xvfnmadd.d $xr0, $xr0, $xr1, $xr2 +; LA64-CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 +; LA64-CONTRACT-OFF-NEXT: ret entry: %v0 = load <4 x double>, ptr %a0 %v1 = load <4 x double>, ptr %a1 @@ -765,32 +1316,59 @@ entry: } define void @xvfnmsub_d_contract(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { -; CONTRACT-FAST-LABEL: xvfnmsub_d_contract: -; CONTRACT-FAST: # %bb.0: # %entry -; CONTRACT-FAST-NEXT: xvld $xr0, $a1, 0 -; CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 -; CONTRACT-FAST-NEXT: xvld $xr2, $a3, 0 -; CONTRACT-FAST-NEXT: xvfnmsub.d $xr0, $xr0, $xr1, $xr2 -; CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 -; CONTRACT-FAST-NEXT: ret -; -; CONTRACT-ON-LABEL: xvfnmsub_d_contract: -; CONTRACT-ON: # %bb.0: # %entry -; CONTRACT-ON-NEXT: xvld $xr0, $a1, 0 -; CONTRACT-ON-NEXT: xvld $xr1, $a2, 0 -; CONTRACT-ON-NEXT: xvld $xr2, $a3, 0 -; CONTRACT-ON-NEXT: xvfnmsub.d $xr0, $xr0, $xr1, $xr2 -; CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 -; CONTRACT-ON-NEXT: ret -; -; CONTRACT-OFF-LABEL: xvfnmsub_d_contract: -; CONTRACT-OFF: # %bb.0: # %entry -; CONTRACT-OFF-NEXT: xvld $xr0, $a1, 0 -; CONTRACT-OFF-NEXT: xvld $xr1, $a2, 0 -; CONTRACT-OFF-NEXT: xvld $xr2, $a3, 0 -; CONTRACT-OFF-NEXT: xvfnmsub.d $xr0, $xr0, $xr1, $xr2 -; CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 -; CONTRACT-OFF-NEXT: ret +; LA32-CONTRACT-FAST-LABEL: xvfnmsub_d_contract: +; LA32-CONTRACT-FAST: # %bb.0: # %entry +; LA32-CONTRACT-FAST-NEXT: xvld $xr0, $a1, 0 +; LA32-CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 +; LA32-CONTRACT-FAST-NEXT: xvld $xr2, $a3, 0 +; LA32-CONTRACT-FAST-NEXT: xvfnmsub.d $xr0, $xr0, $xr1, $xr2 +; LA32-CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 +; LA32-CONTRACT-FAST-NEXT: ret +; +; LA32-CONTRACT-ON-LABEL: xvfnmsub_d_contract: +; LA32-CONTRACT-ON: # %bb.0: # %entry +; LA32-CONTRACT-ON-NEXT: xvld $xr0, $a1, 0 +; LA32-CONTRACT-ON-NEXT: xvld $xr1, $a2, 0 +; LA32-CONTRACT-ON-NEXT: xvld $xr2, $a3, 0 +; LA32-CONTRACT-ON-NEXT: xvfnmsub.d $xr0, $xr0, $xr1, $xr2 +; LA32-CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 +; LA32-CONTRACT-ON-NEXT: ret +; +; LA32-CONTRACT-OFF-LABEL: xvfnmsub_d_contract: +; LA32-CONTRACT-OFF: # %bb.0: # %entry +; LA32-CONTRACT-OFF-NEXT: xvld $xr0, $a1, 0 +; LA32-CONTRACT-OFF-NEXT: xvld $xr1, $a2, 0 +; LA32-CONTRACT-OFF-NEXT: xvld $xr2, $a3, 0 +; LA32-CONTRACT-OFF-NEXT: xvfnmsub.d $xr0, $xr0, $xr1, $xr2 +; LA32-CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 +; LA32-CONTRACT-OFF-NEXT: ret +; +; LA64-CONTRACT-FAST-LABEL: xvfnmsub_d_contract: +; LA64-CONTRACT-FAST: # %bb.0: # %entry +; LA64-CONTRACT-FAST-NEXT: xvld $xr0, $a1, 0 +; LA64-CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 +; LA64-CONTRACT-FAST-NEXT: xvld $xr2, $a3, 0 +; LA64-CONTRACT-FAST-NEXT: xvfnmsub.d $xr0, $xr0, $xr1, $xr2 +; LA64-CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 +; LA64-CONTRACT-FAST-NEXT: ret +; +; LA64-CONTRACT-ON-LABEL: xvfnmsub_d_contract: +; LA64-CONTRACT-ON: # %bb.0: # %entry +; LA64-CONTRACT-ON-NEXT: xvld $xr0, $a1, 0 +; LA64-CONTRACT-ON-NEXT: xvld $xr1, $a2, 0 +; LA64-CONTRACT-ON-NEXT: xvld $xr2, $a3, 0 +; LA64-CONTRACT-ON-NEXT: xvfnmsub.d $xr0, $xr0, $xr1, $xr2 +; LA64-CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 +; LA64-CONTRACT-ON-NEXT: ret +; +; LA64-CONTRACT-OFF-LABEL: xvfnmsub_d_contract: +; LA64-CONTRACT-OFF: # %bb.0: # %entry +; LA64-CONTRACT-OFF-NEXT: xvld $xr0, $a1, 0 +; LA64-CONTRACT-OFF-NEXT: xvld $xr1, $a2, 0 +; LA64-CONTRACT-OFF-NEXT: xvld $xr2, $a3, 0 +; LA64-CONTRACT-OFF-NEXT: xvfnmsub.d $xr0, $xr0, $xr1, $xr2 +; LA64-CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 +; LA64-CONTRACT-OFF-NEXT: ret entry: %v0 = load <4 x double>, ptr %a0 %v1 = load <4 x double>, ptr %a1 diff --git a/llvm/test/CodeGen/LoongArch/lasx/fma-v8f32.ll b/llvm/test/CodeGen/LoongArch/lasx/fma-v8f32.ll index 6fd14d93a751..57b283801675 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/fma-v8f32.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/fma-v8f32.ll @@ -1,40 +1,75 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx --fp-contract=fast < %s \ +; RUN: | FileCheck %s --check-prefix=LA32-CONTRACT-FAST +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx --fp-contract=on < %s \ +; RUN: | FileCheck %s --check-prefix=LA32-CONTRACT-ON +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx --fp-contract=off < %s \ +; RUN: | FileCheck %s --check-prefix=LA32-CONTRACT-OFF ; RUN: llc --mtriple=loongarch64 --mattr=+lasx --fp-contract=fast < %s \ -; RUN: | FileCheck %s --check-prefix=CONTRACT-FAST +; RUN: | FileCheck %s --check-prefix=LA64-CONTRACT-FAST ; RUN: llc --mtriple=loongarch64 --mattr=+lasx --fp-contract=on < %s \ -; RUN: | FileCheck %s --check-prefix=CONTRACT-ON +; RUN: | FileCheck %s --check-prefix=LA64-CONTRACT-ON ; RUN: llc --mtriple=loongarch64 --mattr=+lasx --fp-contract=off < %s \ -; RUN: | FileCheck %s --check-prefix=CONTRACT-OFF +; RUN: | FileCheck %s --check-prefix=LA64-CONTRACT-OFF define void @xvfmadd_s(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { -; CONTRACT-FAST-LABEL: xvfmadd_s: -; CONTRACT-FAST: # %bb.0: # %entry -; CONTRACT-FAST-NEXT: xvld $xr0, $a1, 0 -; CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 -; CONTRACT-FAST-NEXT: xvld $xr2, $a3, 0 -; CONTRACT-FAST-NEXT: xvfmadd.s $xr0, $xr0, $xr1, $xr2 -; CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 -; CONTRACT-FAST-NEXT: ret -; -; CONTRACT-ON-LABEL: xvfmadd_s: -; CONTRACT-ON: # %bb.0: # %entry -; CONTRACT-ON-NEXT: xvld $xr0, $a1, 0 -; CONTRACT-ON-NEXT: xvld $xr1, $a2, 0 -; CONTRACT-ON-NEXT: xvld $xr2, $a3, 0 -; CONTRACT-ON-NEXT: xvfmul.s $xr0, $xr0, $xr1 -; CONTRACT-ON-NEXT: xvfadd.s $xr0, $xr0, $xr2 -; CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 -; CONTRACT-ON-NEXT: ret -; -; CONTRACT-OFF-LABEL: xvfmadd_s: -; CONTRACT-OFF: # %bb.0: # %entry -; CONTRACT-OFF-NEXT: xvld $xr0, $a1, 0 -; CONTRACT-OFF-NEXT: xvld $xr1, $a2, 0 -; CONTRACT-OFF-NEXT: xvld $xr2, $a3, 0 -; CONTRACT-OFF-NEXT: xvfmul.s $xr0, $xr0, $xr1 -; CONTRACT-OFF-NEXT: xvfadd.s $xr0, $xr0, $xr2 -; CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 -; CONTRACT-OFF-NEXT: ret +; LA32-CONTRACT-FAST-LABEL: xvfmadd_s: +; LA32-CONTRACT-FAST: # %bb.0: # %entry +; LA32-CONTRACT-FAST-NEXT: xvld $xr0, $a1, 0 +; LA32-CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 +; LA32-CONTRACT-FAST-NEXT: xvld $xr2, $a3, 0 +; LA32-CONTRACT-FAST-NEXT: xvfmadd.s $xr0, $xr0, $xr1, $xr2 +; LA32-CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 +; LA32-CONTRACT-FAST-NEXT: ret +; +; LA32-CONTRACT-ON-LABEL: xvfmadd_s: +; LA32-CONTRACT-ON: # %bb.0: # %entry +; LA32-CONTRACT-ON-NEXT: xvld $xr0, $a1, 0 +; LA32-CONTRACT-ON-NEXT: xvld $xr1, $a2, 0 +; LA32-CONTRACT-ON-NEXT: xvld $xr2, $a3, 0 +; LA32-CONTRACT-ON-NEXT: xvfmul.s $xr0, $xr0, $xr1 +; LA32-CONTRACT-ON-NEXT: xvfadd.s $xr0, $xr0, $xr2 +; LA32-CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 +; LA32-CONTRACT-ON-NEXT: ret +; +; LA32-CONTRACT-OFF-LABEL: xvfmadd_s: +; LA32-CONTRACT-OFF: # %bb.0: # %entry +; LA32-CONTRACT-OFF-NEXT: xvld $xr0, $a1, 0 +; LA32-CONTRACT-OFF-NEXT: xvld $xr1, $a2, 0 +; LA32-CONTRACT-OFF-NEXT: xvld $xr2, $a3, 0 +; LA32-CONTRACT-OFF-NEXT: xvfmul.s $xr0, $xr0, $xr1 +; LA32-CONTRACT-OFF-NEXT: xvfadd.s $xr0, $xr0, $xr2 +; LA32-CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 +; LA32-CONTRACT-OFF-NEXT: ret +; +; LA64-CONTRACT-FAST-LABEL: xvfmadd_s: +; LA64-CONTRACT-FAST: # %bb.0: # %entry +; LA64-CONTRACT-FAST-NEXT: xvld $xr0, $a1, 0 +; LA64-CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 +; LA64-CONTRACT-FAST-NEXT: xvld $xr2, $a3, 0 +; LA64-CONTRACT-FAST-NEXT: xvfmadd.s $xr0, $xr0, $xr1, $xr2 +; LA64-CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 +; LA64-CONTRACT-FAST-NEXT: ret +; +; LA64-CONTRACT-ON-LABEL: xvfmadd_s: +; LA64-CONTRACT-ON: # %bb.0: # %entry +; LA64-CONTRACT-ON-NEXT: xvld $xr0, $a1, 0 +; LA64-CONTRACT-ON-NEXT: xvld $xr1, $a2, 0 +; LA64-CONTRACT-ON-NEXT: xvld $xr2, $a3, 0 +; LA64-CONTRACT-ON-NEXT: xvfmul.s $xr0, $xr0, $xr1 +; LA64-CONTRACT-ON-NEXT: xvfadd.s $xr0, $xr0, $xr2 +; LA64-CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 +; LA64-CONTRACT-ON-NEXT: ret +; +; LA64-CONTRACT-OFF-LABEL: xvfmadd_s: +; LA64-CONTRACT-OFF: # %bb.0: # %entry +; LA64-CONTRACT-OFF-NEXT: xvld $xr0, $a1, 0 +; LA64-CONTRACT-OFF-NEXT: xvld $xr1, $a2, 0 +; LA64-CONTRACT-OFF-NEXT: xvld $xr2, $a3, 0 +; LA64-CONTRACT-OFF-NEXT: xvfmul.s $xr0, $xr0, $xr1 +; LA64-CONTRACT-OFF-NEXT: xvfadd.s $xr0, $xr0, $xr2 +; LA64-CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 +; LA64-CONTRACT-OFF-NEXT: ret entry: %v0 = load <8 x float>, ptr %a0 %v1 = load <8 x float>, ptr %a1 @@ -46,34 +81,63 @@ entry: } define void @xvfmsub_s(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { -; CONTRACT-FAST-LABEL: xvfmsub_s: -; CONTRACT-FAST: # %bb.0: # %entry -; CONTRACT-FAST-NEXT: xvld $xr0, $a1, 0 -; CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 -; CONTRACT-FAST-NEXT: xvld $xr2, $a3, 0 -; CONTRACT-FAST-NEXT: xvfmsub.s $xr0, $xr0, $xr1, $xr2 -; CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 -; CONTRACT-FAST-NEXT: ret -; -; CONTRACT-ON-LABEL: xvfmsub_s: -; CONTRACT-ON: # %bb.0: # %entry -; CONTRACT-ON-NEXT: xvld $xr0, $a1, 0 -; CONTRACT-ON-NEXT: xvld $xr1, $a2, 0 -; CONTRACT-ON-NEXT: xvld $xr2, $a3, 0 -; CONTRACT-ON-NEXT: xvfmul.s $xr0, $xr0, $xr1 -; CONTRACT-ON-NEXT: xvfsub.s $xr0, $xr0, $xr2 -; CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 -; CONTRACT-ON-NEXT: ret -; -; CONTRACT-OFF-LABEL: xvfmsub_s: -; CONTRACT-OFF: # %bb.0: # %entry -; CONTRACT-OFF-NEXT: xvld $xr0, $a1, 0 -; CONTRACT-OFF-NEXT: xvld $xr1, $a2, 0 -; CONTRACT-OFF-NEXT: xvld $xr2, $a3, 0 -; CONTRACT-OFF-NEXT: xvfmul.s $xr0, $xr0, $xr1 -; CONTRACT-OFF-NEXT: xvfsub.s $xr0, $xr0, $xr2 -; CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 -; CONTRACT-OFF-NEXT: ret +; LA32-CONTRACT-FAST-LABEL: xvfmsub_s: +; LA32-CONTRACT-FAST: # %bb.0: # %entry +; LA32-CONTRACT-FAST-NEXT: xvld $xr0, $a1, 0 +; LA32-CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 +; LA32-CONTRACT-FAST-NEXT: xvld $xr2, $a3, 0 +; LA32-CONTRACT-FAST-NEXT: xvfmsub.s $xr0, $xr0, $xr1, $xr2 +; LA32-CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 +; LA32-CONTRACT-FAST-NEXT: ret +; +; LA32-CONTRACT-ON-LABEL: xvfmsub_s: +; LA32-CONTRACT-ON: # %bb.0: # %entry +; LA32-CONTRACT-ON-NEXT: xvld $xr0, $a1, 0 +; LA32-CONTRACT-ON-NEXT: xvld $xr1, $a2, 0 +; LA32-CONTRACT-ON-NEXT: xvld $xr2, $a3, 0 +; LA32-CONTRACT-ON-NEXT: xvfmul.s $xr0, $xr0, $xr1 +; LA32-CONTRACT-ON-NEXT: xvfsub.s $xr0, $xr0, $xr2 +; LA32-CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 +; LA32-CONTRACT-ON-NEXT: ret +; +; LA32-CONTRACT-OFF-LABEL: xvfmsub_s: +; LA32-CONTRACT-OFF: # %bb.0: # %entry +; LA32-CONTRACT-OFF-NEXT: xvld $xr0, $a1, 0 +; LA32-CONTRACT-OFF-NEXT: xvld $xr1, $a2, 0 +; LA32-CONTRACT-OFF-NEXT: xvld $xr2, $a3, 0 +; LA32-CONTRACT-OFF-NEXT: xvfmul.s $xr0, $xr0, $xr1 +; LA32-CONTRACT-OFF-NEXT: xvfsub.s $xr0, $xr0, $xr2 +; LA32-CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 +; LA32-CONTRACT-OFF-NEXT: ret +; +; LA64-CONTRACT-FAST-LABEL: xvfmsub_s: +; LA64-CONTRACT-FAST: # %bb.0: # %entry +; LA64-CONTRACT-FAST-NEXT: xvld $xr0, $a1, 0 +; LA64-CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 +; LA64-CONTRACT-FAST-NEXT: xvld $xr2, $a3, 0 +; LA64-CONTRACT-FAST-NEXT: xvfmsub.s $xr0, $xr0, $xr1, $xr2 +; LA64-CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 +; LA64-CONTRACT-FAST-NEXT: ret +; +; LA64-CONTRACT-ON-LABEL: xvfmsub_s: +; LA64-CONTRACT-ON: # %bb.0: # %entry +; LA64-CONTRACT-ON-NEXT: xvld $xr0, $a1, 0 +; LA64-CONTRACT-ON-NEXT: xvld $xr1, $a2, 0 +; LA64-CONTRACT-ON-NEXT: xvld $xr2, $a3, 0 +; LA64-CONTRACT-ON-NEXT: xvfmul.s $xr0, $xr0, $xr1 +; LA64-CONTRACT-ON-NEXT: xvfsub.s $xr0, $xr0, $xr2 +; LA64-CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 +; LA64-CONTRACT-ON-NEXT: ret +; +; LA64-CONTRACT-OFF-LABEL: xvfmsub_s: +; LA64-CONTRACT-OFF: # %bb.0: # %entry +; LA64-CONTRACT-OFF-NEXT: xvld $xr0, $a1, 0 +; LA64-CONTRACT-OFF-NEXT: xvld $xr1, $a2, 0 +; LA64-CONTRACT-OFF-NEXT: xvld $xr2, $a3, 0 +; LA64-CONTRACT-OFF-NEXT: xvfmul.s $xr0, $xr0, $xr1 +; LA64-CONTRACT-OFF-NEXT: xvfsub.s $xr0, $xr0, $xr2 +; LA64-CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 +; LA64-CONTRACT-OFF-NEXT: ret entry: %v0 = load <8 x float>, ptr %a0 %v1 = load <8 x float>, ptr %a1 @@ -85,36 +149,67 @@ entry: } define void @xvfnmadd_s(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { -; CONTRACT-FAST-LABEL: xvfnmadd_s: -; CONTRACT-FAST: # %bb.0: # %entry -; CONTRACT-FAST-NEXT: xvld $xr0, $a1, 0 -; CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 -; CONTRACT-FAST-NEXT: xvld $xr2, $a3, 0 -; CONTRACT-FAST-NEXT: xvfnmadd.s $xr0, $xr0, $xr1, $xr2 -; CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 -; CONTRACT-FAST-NEXT: ret -; -; CONTRACT-ON-LABEL: xvfnmadd_s: -; CONTRACT-ON: # %bb.0: # %entry -; CONTRACT-ON-NEXT: xvld $xr0, $a1, 0 -; CONTRACT-ON-NEXT: xvld $xr1, $a2, 0 -; CONTRACT-ON-NEXT: xvld $xr2, $a3, 0 -; CONTRACT-ON-NEXT: xvfmul.s $xr0, $xr0, $xr1 -; CONTRACT-ON-NEXT: xvfadd.s $xr0, $xr0, $xr2 -; CONTRACT-ON-NEXT: xvbitrevi.w $xr0, $xr0, 31 -; CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 -; CONTRACT-ON-NEXT: ret -; -; CONTRACT-OFF-LABEL: xvfnmadd_s: -; CONTRACT-OFF: # %bb.0: # %entry -; CONTRACT-OFF-NEXT: xvld $xr0, $a1, 0 -; CONTRACT-OFF-NEXT: xvld $xr1, $a2, 0 -; CONTRACT-OFF-NEXT: xvld $xr2, $a3, 0 -; CONTRACT-OFF-NEXT: xvfmul.s $xr0, $xr0, $xr1 -; CONTRACT-OFF-NEXT: xvfadd.s $xr0, $xr0, $xr2 -; CONTRACT-OFF-NEXT: xvbitrevi.w $xr0, $xr0, 31 -; CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 -; CONTRACT-OFF-NEXT: ret +; LA32-CONTRACT-FAST-LABEL: xvfnmadd_s: +; LA32-CONTRACT-FAST: # %bb.0: # %entry +; LA32-CONTRACT-FAST-NEXT: xvld $xr0, $a1, 0 +; LA32-CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 +; LA32-CONTRACT-FAST-NEXT: xvld $xr2, $a3, 0 +; LA32-CONTRACT-FAST-NEXT: xvfnmadd.s $xr0, $xr0, $xr1, $xr2 +; LA32-CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 +; LA32-CONTRACT-FAST-NEXT: ret +; +; LA32-CONTRACT-ON-LABEL: xvfnmadd_s: +; LA32-CONTRACT-ON: # %bb.0: # %entry +; LA32-CONTRACT-ON-NEXT: xvld $xr0, $a1, 0 +; LA32-CONTRACT-ON-NEXT: xvld $xr1, $a2, 0 +; LA32-CONTRACT-ON-NEXT: xvld $xr2, $a3, 0 +; LA32-CONTRACT-ON-NEXT: xvfmul.s $xr0, $xr0, $xr1 +; LA32-CONTRACT-ON-NEXT: xvfadd.s $xr0, $xr0, $xr2 +; LA32-CONTRACT-ON-NEXT: xvbitrevi.w $xr0, $xr0, 31 +; LA32-CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 +; LA32-CONTRACT-ON-NEXT: ret +; +; LA32-CONTRACT-OFF-LABEL: xvfnmadd_s: +; LA32-CONTRACT-OFF: # %bb.0: # %entry +; LA32-CONTRACT-OFF-NEXT: xvld $xr0, $a1, 0 +; LA32-CONTRACT-OFF-NEXT: xvld $xr1, $a2, 0 +; LA32-CONTRACT-OFF-NEXT: xvld $xr2, $a3, 0 +; LA32-CONTRACT-OFF-NEXT: xvfmul.s $xr0, $xr0, $xr1 +; LA32-CONTRACT-OFF-NEXT: xvfadd.s $xr0, $xr0, $xr2 +; LA32-CONTRACT-OFF-NEXT: xvbitrevi.w $xr0, $xr0, 31 +; LA32-CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 +; LA32-CONTRACT-OFF-NEXT: ret +; +; LA64-CONTRACT-FAST-LABEL: xvfnmadd_s: +; LA64-CONTRACT-FAST: # %bb.0: # %entry +; LA64-CONTRACT-FAST-NEXT: xvld $xr0, $a1, 0 +; LA64-CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 +; LA64-CONTRACT-FAST-NEXT: xvld $xr2, $a3, 0 +; LA64-CONTRACT-FAST-NEXT: xvfnmadd.s $xr0, $xr0, $xr1, $xr2 +; LA64-CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 +; LA64-CONTRACT-FAST-NEXT: ret +; +; LA64-CONTRACT-ON-LABEL: xvfnmadd_s: +; LA64-CONTRACT-ON: # %bb.0: # %entry +; LA64-CONTRACT-ON-NEXT: xvld $xr0, $a1, 0 +; LA64-CONTRACT-ON-NEXT: xvld $xr1, $a2, 0 +; LA64-CONTRACT-ON-NEXT: xvld $xr2, $a3, 0 +; LA64-CONTRACT-ON-NEXT: xvfmul.s $xr0, $xr0, $xr1 +; LA64-CONTRACT-ON-NEXT: xvfadd.s $xr0, $xr0, $xr2 +; LA64-CONTRACT-ON-NEXT: xvbitrevi.w $xr0, $xr0, 31 +; LA64-CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 +; LA64-CONTRACT-ON-NEXT: ret +; +; LA64-CONTRACT-OFF-LABEL: xvfnmadd_s: +; LA64-CONTRACT-OFF: # %bb.0: # %entry +; LA64-CONTRACT-OFF-NEXT: xvld $xr0, $a1, 0 +; LA64-CONTRACT-OFF-NEXT: xvld $xr1, $a2, 0 +; LA64-CONTRACT-OFF-NEXT: xvld $xr2, $a3, 0 +; LA64-CONTRACT-OFF-NEXT: xvfmul.s $xr0, $xr0, $xr1 +; LA64-CONTRACT-OFF-NEXT: xvfadd.s $xr0, $xr0, $xr2 +; LA64-CONTRACT-OFF-NEXT: xvbitrevi.w $xr0, $xr0, 31 +; LA64-CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 +; LA64-CONTRACT-OFF-NEXT: ret entry: %v0 = load <8 x float>, ptr %a0 %v1 = load <8 x float>, ptr %a1 @@ -127,36 +222,67 @@ entry: } define void @xvfnmadd_s_nsz(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { -; CONTRACT-FAST-LABEL: xvfnmadd_s_nsz: -; CONTRACT-FAST: # %bb.0: # %entry -; CONTRACT-FAST-NEXT: xvld $xr0, $a1, 0 -; CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 -; CONTRACT-FAST-NEXT: xvld $xr2, $a3, 0 -; CONTRACT-FAST-NEXT: xvfnmadd.s $xr0, $xr0, $xr1, $xr2 -; CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 -; CONTRACT-FAST-NEXT: ret -; -; CONTRACT-ON-LABEL: xvfnmadd_s_nsz: -; CONTRACT-ON: # %bb.0: # %entry -; CONTRACT-ON-NEXT: xvld $xr0, $a1, 0 -; CONTRACT-ON-NEXT: xvld $xr1, $a2, 0 -; CONTRACT-ON-NEXT: xvld $xr2, $a3, 0 -; CONTRACT-ON-NEXT: xvbitrevi.w $xr0, $xr0, 31 -; CONTRACT-ON-NEXT: xvfmul.s $xr0, $xr0, $xr1 -; CONTRACT-ON-NEXT: xvfsub.s $xr0, $xr0, $xr2 -; CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 -; CONTRACT-ON-NEXT: ret -; -; CONTRACT-OFF-LABEL: xvfnmadd_s_nsz: -; CONTRACT-OFF: # %bb.0: # %entry -; CONTRACT-OFF-NEXT: xvld $xr0, $a1, 0 -; CONTRACT-OFF-NEXT: xvld $xr1, $a2, 0 -; CONTRACT-OFF-NEXT: xvld $xr2, $a3, 0 -; CONTRACT-OFF-NEXT: xvbitrevi.w $xr0, $xr0, 31 -; CONTRACT-OFF-NEXT: xvfmul.s $xr0, $xr0, $xr1 -; CONTRACT-OFF-NEXT: xvfsub.s $xr0, $xr0, $xr2 -; CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 -; CONTRACT-OFF-NEXT: ret +; LA32-CONTRACT-FAST-LABEL: xvfnmadd_s_nsz: +; LA32-CONTRACT-FAST: # %bb.0: # %entry +; LA32-CONTRACT-FAST-NEXT: xvld $xr0, $a1, 0 +; LA32-CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 +; LA32-CONTRACT-FAST-NEXT: xvld $xr2, $a3, 0 +; LA32-CONTRACT-FAST-NEXT: xvfnmadd.s $xr0, $xr0, $xr1, $xr2 +; LA32-CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 +; LA32-CONTRACT-FAST-NEXT: ret +; +; LA32-CONTRACT-ON-LABEL: xvfnmadd_s_nsz: +; LA32-CONTRACT-ON: # %bb.0: # %entry +; LA32-CONTRACT-ON-NEXT: xvld $xr0, $a1, 0 +; LA32-CONTRACT-ON-NEXT: xvld $xr1, $a2, 0 +; LA32-CONTRACT-ON-NEXT: xvld $xr2, $a3, 0 +; LA32-CONTRACT-ON-NEXT: xvbitrevi.w $xr0, $xr0, 31 +; LA32-CONTRACT-ON-NEXT: xvfmul.s $xr0, $xr0, $xr1 +; LA32-CONTRACT-ON-NEXT: xvfsub.s $xr0, $xr0, $xr2 +; LA32-CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 +; LA32-CONTRACT-ON-NEXT: ret +; +; LA32-CONTRACT-OFF-LABEL: xvfnmadd_s_nsz: +; LA32-CONTRACT-OFF: # %bb.0: # %entry +; LA32-CONTRACT-OFF-NEXT: xvld $xr0, $a1, 0 +; LA32-CONTRACT-OFF-NEXT: xvld $xr1, $a2, 0 +; LA32-CONTRACT-OFF-NEXT: xvld $xr2, $a3, 0 +; LA32-CONTRACT-OFF-NEXT: xvbitrevi.w $xr0, $xr0, 31 +; LA32-CONTRACT-OFF-NEXT: xvfmul.s $xr0, $xr0, $xr1 +; LA32-CONTRACT-OFF-NEXT: xvfsub.s $xr0, $xr0, $xr2 +; LA32-CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 +; LA32-CONTRACT-OFF-NEXT: ret +; +; LA64-CONTRACT-FAST-LABEL: xvfnmadd_s_nsz: +; LA64-CONTRACT-FAST: # %bb.0: # %entry +; LA64-CONTRACT-FAST-NEXT: xvld $xr0, $a1, 0 +; LA64-CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 +; LA64-CONTRACT-FAST-NEXT: xvld $xr2, $a3, 0 +; LA64-CONTRACT-FAST-NEXT: xvfnmadd.s $xr0, $xr0, $xr1, $xr2 +; LA64-CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 +; LA64-CONTRACT-FAST-NEXT: ret +; +; LA64-CONTRACT-ON-LABEL: xvfnmadd_s_nsz: +; LA64-CONTRACT-ON: # %bb.0: # %entry +; LA64-CONTRACT-ON-NEXT: xvld $xr0, $a1, 0 +; LA64-CONTRACT-ON-NEXT: xvld $xr1, $a2, 0 +; LA64-CONTRACT-ON-NEXT: xvld $xr2, $a3, 0 +; LA64-CONTRACT-ON-NEXT: xvbitrevi.w $xr0, $xr0, 31 +; LA64-CONTRACT-ON-NEXT: xvfmul.s $xr0, $xr0, $xr1 +; LA64-CONTRACT-ON-NEXT: xvfsub.s $xr0, $xr0, $xr2 +; LA64-CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 +; LA64-CONTRACT-ON-NEXT: ret +; +; LA64-CONTRACT-OFF-LABEL: xvfnmadd_s_nsz: +; LA64-CONTRACT-OFF: # %bb.0: # %entry +; LA64-CONTRACT-OFF-NEXT: xvld $xr0, $a1, 0 +; LA64-CONTRACT-OFF-NEXT: xvld $xr1, $a2, 0 +; LA64-CONTRACT-OFF-NEXT: xvld $xr2, $a3, 0 +; LA64-CONTRACT-OFF-NEXT: xvbitrevi.w $xr0, $xr0, 31 +; LA64-CONTRACT-OFF-NEXT: xvfmul.s $xr0, $xr0, $xr1 +; LA64-CONTRACT-OFF-NEXT: xvfsub.s $xr0, $xr0, $xr2 +; LA64-CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 +; LA64-CONTRACT-OFF-NEXT: ret entry: %v0 = load <8 x float>, ptr %a0 %v1 = load <8 x float>, ptr %a1 @@ -171,37 +297,69 @@ entry: ;; Check that fnmadd.s is not emitted. define void @not_xvfnmadd_s(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { -; CONTRACT-FAST-LABEL: not_xvfnmadd_s: -; CONTRACT-FAST: # %bb.0: # %entry -; CONTRACT-FAST-NEXT: xvld $xr0, $a1, 0 -; CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 -; CONTRACT-FAST-NEXT: xvld $xr2, $a3, 0 -; CONTRACT-FAST-NEXT: xvbitrevi.w $xr0, $xr0, 31 -; CONTRACT-FAST-NEXT: xvfmsub.s $xr0, $xr0, $xr1, $xr2 -; CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 -; CONTRACT-FAST-NEXT: ret -; -; CONTRACT-ON-LABEL: not_xvfnmadd_s: -; CONTRACT-ON: # %bb.0: # %entry -; CONTRACT-ON-NEXT: xvld $xr0, $a1, 0 -; CONTRACT-ON-NEXT: xvld $xr1, $a2, 0 -; CONTRACT-ON-NEXT: xvld $xr2, $a3, 0 -; CONTRACT-ON-NEXT: xvbitrevi.w $xr0, $xr0, 31 -; CONTRACT-ON-NEXT: xvfmul.s $xr0, $xr0, $xr1 -; CONTRACT-ON-NEXT: xvfsub.s $xr0, $xr0, $xr2 -; CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 -; CONTRACT-ON-NEXT: ret -; -; CONTRACT-OFF-LABEL: not_xvfnmadd_s: -; CONTRACT-OFF: # %bb.0: # %entry -; CONTRACT-OFF-NEXT: xvld $xr0, $a1, 0 -; CONTRACT-OFF-NEXT: xvld $xr1, $a2, 0 -; CONTRACT-OFF-NEXT: xvld $xr2, $a3, 0 -; CONTRACT-OFF-NEXT: xvbitrevi.w $xr0, $xr0, 31 -; CONTRACT-OFF-NEXT: xvfmul.s $xr0, $xr0, $xr1 -; CONTRACT-OFF-NEXT: xvfsub.s $xr0, $xr0, $xr2 -; CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 -; CONTRACT-OFF-NEXT: ret +; LA32-CONTRACT-FAST-LABEL: not_xvfnmadd_s: +; LA32-CONTRACT-FAST: # %bb.0: # %entry +; LA32-CONTRACT-FAST-NEXT: xvld $xr0, $a1, 0 +; LA32-CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 +; LA32-CONTRACT-FAST-NEXT: xvld $xr2, $a3, 0 +; LA32-CONTRACT-FAST-NEXT: xvbitrevi.w $xr0, $xr0, 31 +; LA32-CONTRACT-FAST-NEXT: xvfmsub.s $xr0, $xr0, $xr1, $xr2 +; LA32-CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 +; LA32-CONTRACT-FAST-NEXT: ret +; +; LA32-CONTRACT-ON-LABEL: not_xvfnmadd_s: +; LA32-CONTRACT-ON: # %bb.0: # %entry +; LA32-CONTRACT-ON-NEXT: xvld $xr0, $a1, 0 +; LA32-CONTRACT-ON-NEXT: xvld $xr1, $a2, 0 +; LA32-CONTRACT-ON-NEXT: xvld $xr2, $a3, 0 +; LA32-CONTRACT-ON-NEXT: xvbitrevi.w $xr0, $xr0, 31 +; LA32-CONTRACT-ON-NEXT: xvfmul.s $xr0, $xr0, $xr1 +; LA32-CONTRACT-ON-NEXT: xvfsub.s $xr0, $xr0, $xr2 +; LA32-CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 +; LA32-CONTRACT-ON-NEXT: ret +; +; LA32-CONTRACT-OFF-LABEL: not_xvfnmadd_s: +; LA32-CONTRACT-OFF: # %bb.0: # %entry +; LA32-CONTRACT-OFF-NEXT: xvld $xr0, $a1, 0 +; LA32-CONTRACT-OFF-NEXT: xvld $xr1, $a2, 0 +; LA32-CONTRACT-OFF-NEXT: xvld $xr2, $a3, 0 +; LA32-CONTRACT-OFF-NEXT: xvbitrevi.w $xr0, $xr0, 31 +; LA32-CONTRACT-OFF-NEXT: xvfmul.s $xr0, $xr0, $xr1 +; LA32-CONTRACT-OFF-NEXT: xvfsub.s $xr0, $xr0, $xr2 +; LA32-CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 +; LA32-CONTRACT-OFF-NEXT: ret +; +; LA64-CONTRACT-FAST-LABEL: not_xvfnmadd_s: +; LA64-CONTRACT-FAST: # %bb.0: # %entry +; LA64-CONTRACT-FAST-NEXT: xvld $xr0, $a1, 0 +; LA64-CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 +; LA64-CONTRACT-FAST-NEXT: xvld $xr2, $a3, 0 +; LA64-CONTRACT-FAST-NEXT: xvbitrevi.w $xr0, $xr0, 31 +; LA64-CONTRACT-FAST-NEXT: xvfmsub.s $xr0, $xr0, $xr1, $xr2 +; LA64-CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 +; LA64-CONTRACT-FAST-NEXT: ret +; +; LA64-CONTRACT-ON-LABEL: not_xvfnmadd_s: +; LA64-CONTRACT-ON: # %bb.0: # %entry +; LA64-CONTRACT-ON-NEXT: xvld $xr0, $a1, 0 +; LA64-CONTRACT-ON-NEXT: xvld $xr1, $a2, 0 +; LA64-CONTRACT-ON-NEXT: xvld $xr2, $a3, 0 +; LA64-CONTRACT-ON-NEXT: xvbitrevi.w $xr0, $xr0, 31 +; LA64-CONTRACT-ON-NEXT: xvfmul.s $xr0, $xr0, $xr1 +; LA64-CONTRACT-ON-NEXT: xvfsub.s $xr0, $xr0, $xr2 +; LA64-CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 +; LA64-CONTRACT-ON-NEXT: ret +; +; LA64-CONTRACT-OFF-LABEL: not_xvfnmadd_s: +; LA64-CONTRACT-OFF: # %bb.0: # %entry +; LA64-CONTRACT-OFF-NEXT: xvld $xr0, $a1, 0 +; LA64-CONTRACT-OFF-NEXT: xvld $xr1, $a2, 0 +; LA64-CONTRACT-OFF-NEXT: xvld $xr2, $a3, 0 +; LA64-CONTRACT-OFF-NEXT: xvbitrevi.w $xr0, $xr0, 31 +; LA64-CONTRACT-OFF-NEXT: xvfmul.s $xr0, $xr0, $xr1 +; LA64-CONTRACT-OFF-NEXT: xvfsub.s $xr0, $xr0, $xr2 +; LA64-CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 +; LA64-CONTRACT-OFF-NEXT: ret entry: %v0 = load <8 x float>, ptr %a0 %v1 = load <8 x float>, ptr %a1 @@ -215,36 +373,67 @@ entry: } define void @xvfnmsub_s(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { -; CONTRACT-FAST-LABEL: xvfnmsub_s: -; CONTRACT-FAST: # %bb.0: # %entry -; CONTRACT-FAST-NEXT: xvld $xr0, $a1, 0 -; CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 -; CONTRACT-FAST-NEXT: xvld $xr2, $a3, 0 -; CONTRACT-FAST-NEXT: xvfnmsub.s $xr0, $xr0, $xr1, $xr2 -; CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 -; CONTRACT-FAST-NEXT: ret -; -; CONTRACT-ON-LABEL: xvfnmsub_s: -; CONTRACT-ON: # %bb.0: # %entry -; CONTRACT-ON-NEXT: xvld $xr0, $a1, 0 -; CONTRACT-ON-NEXT: xvld $xr1, $a2, 0 -; CONTRACT-ON-NEXT: xvld $xr2, $a3, 0 -; CONTRACT-ON-NEXT: xvfmul.s $xr0, $xr0, $xr1 -; CONTRACT-ON-NEXT: xvfsub.s $xr0, $xr0, $xr2 -; CONTRACT-ON-NEXT: xvbitrevi.w $xr0, $xr0, 31 -; CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 -; CONTRACT-ON-NEXT: ret -; -; CONTRACT-OFF-LABEL: xvfnmsub_s: -; CONTRACT-OFF: # %bb.0: # %entry -; CONTRACT-OFF-NEXT: xvld $xr0, $a1, 0 -; CONTRACT-OFF-NEXT: xvld $xr1, $a2, 0 -; CONTRACT-OFF-NEXT: xvld $xr2, $a3, 0 -; CONTRACT-OFF-NEXT: xvfmul.s $xr0, $xr0, $xr1 -; CONTRACT-OFF-NEXT: xvfsub.s $xr0, $xr0, $xr2 -; CONTRACT-OFF-NEXT: xvbitrevi.w $xr0, $xr0, 31 -; CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 -; CONTRACT-OFF-NEXT: ret +; LA32-CONTRACT-FAST-LABEL: xvfnmsub_s: +; LA32-CONTRACT-FAST: # %bb.0: # %entry +; LA32-CONTRACT-FAST-NEXT: xvld $xr0, $a1, 0 +; LA32-CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 +; LA32-CONTRACT-FAST-NEXT: xvld $xr2, $a3, 0 +; LA32-CONTRACT-FAST-NEXT: xvfnmsub.s $xr0, $xr0, $xr1, $xr2 +; LA32-CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 +; LA32-CONTRACT-FAST-NEXT: ret +; +; LA32-CONTRACT-ON-LABEL: xvfnmsub_s: +; LA32-CONTRACT-ON: # %bb.0: # %entry +; LA32-CONTRACT-ON-NEXT: xvld $xr0, $a1, 0 +; LA32-CONTRACT-ON-NEXT: xvld $xr1, $a2, 0 +; LA32-CONTRACT-ON-NEXT: xvld $xr2, $a3, 0 +; LA32-CONTRACT-ON-NEXT: xvfmul.s $xr0, $xr0, $xr1 +; LA32-CONTRACT-ON-NEXT: xvfsub.s $xr0, $xr0, $xr2 +; LA32-CONTRACT-ON-NEXT: xvbitrevi.w $xr0, $xr0, 31 +; LA32-CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 +; LA32-CONTRACT-ON-NEXT: ret +; +; LA32-CONTRACT-OFF-LABEL: xvfnmsub_s: +; LA32-CONTRACT-OFF: # %bb.0: # %entry +; LA32-CONTRACT-OFF-NEXT: xvld $xr0, $a1, 0 +; LA32-CONTRACT-OFF-NEXT: xvld $xr1, $a2, 0 +; LA32-CONTRACT-OFF-NEXT: xvld $xr2, $a3, 0 +; LA32-CONTRACT-OFF-NEXT: xvfmul.s $xr0, $xr0, $xr1 +; LA32-CONTRACT-OFF-NEXT: xvfsub.s $xr0, $xr0, $xr2 +; LA32-CONTRACT-OFF-NEXT: xvbitrevi.w $xr0, $xr0, 31 +; LA32-CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 +; LA32-CONTRACT-OFF-NEXT: ret +; +; LA64-CONTRACT-FAST-LABEL: xvfnmsub_s: +; LA64-CONTRACT-FAST: # %bb.0: # %entry +; LA64-CONTRACT-FAST-NEXT: xvld $xr0, $a1, 0 +; LA64-CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 +; LA64-CONTRACT-FAST-NEXT: xvld $xr2, $a3, 0 +; LA64-CONTRACT-FAST-NEXT: xvfnmsub.s $xr0, $xr0, $xr1, $xr2 +; LA64-CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 +; LA64-CONTRACT-FAST-NEXT: ret +; +; LA64-CONTRACT-ON-LABEL: xvfnmsub_s: +; LA64-CONTRACT-ON: # %bb.0: # %entry +; LA64-CONTRACT-ON-NEXT: xvld $xr0, $a1, 0 +; LA64-CONTRACT-ON-NEXT: xvld $xr1, $a2, 0 +; LA64-CONTRACT-ON-NEXT: xvld $xr2, $a3, 0 +; LA64-CONTRACT-ON-NEXT: xvfmul.s $xr0, $xr0, $xr1 +; LA64-CONTRACT-ON-NEXT: xvfsub.s $xr0, $xr0, $xr2 +; LA64-CONTRACT-ON-NEXT: xvbitrevi.w $xr0, $xr0, 31 +; LA64-CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 +; LA64-CONTRACT-ON-NEXT: ret +; +; LA64-CONTRACT-OFF-LABEL: xvfnmsub_s: +; LA64-CONTRACT-OFF: # %bb.0: # %entry +; LA64-CONTRACT-OFF-NEXT: xvld $xr0, $a1, 0 +; LA64-CONTRACT-OFF-NEXT: xvld $xr1, $a2, 0 +; LA64-CONTRACT-OFF-NEXT: xvld $xr2, $a3, 0 +; LA64-CONTRACT-OFF-NEXT: xvfmul.s $xr0, $xr0, $xr1 +; LA64-CONTRACT-OFF-NEXT: xvfsub.s $xr0, $xr0, $xr2 +; LA64-CONTRACT-OFF-NEXT: xvbitrevi.w $xr0, $xr0, 31 +; LA64-CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 +; LA64-CONTRACT-OFF-NEXT: ret entry: %v0 = load <8 x float>, ptr %a0 %v1 = load <8 x float>, ptr %a1 @@ -258,34 +447,63 @@ entry: } define void @xvfnmsub_s_nsz(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { -; CONTRACT-FAST-LABEL: xvfnmsub_s_nsz: -; CONTRACT-FAST: # %bb.0: # %entry -; CONTRACT-FAST-NEXT: xvld $xr0, $a1, 0 -; CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 -; CONTRACT-FAST-NEXT: xvld $xr2, $a3, 0 -; CONTRACT-FAST-NEXT: xvfnmsub.s $xr0, $xr0, $xr1, $xr2 -; CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 -; CONTRACT-FAST-NEXT: ret -; -; CONTRACT-ON-LABEL: xvfnmsub_s_nsz: -; CONTRACT-ON: # %bb.0: # %entry -; CONTRACT-ON-NEXT: xvld $xr0, $a1, 0 -; CONTRACT-ON-NEXT: xvld $xr1, $a2, 0 -; CONTRACT-ON-NEXT: xvld $xr2, $a3, 0 -; CONTRACT-ON-NEXT: xvfmul.s $xr0, $xr0, $xr1 -; CONTRACT-ON-NEXT: xvfsub.s $xr0, $xr2, $xr0 -; CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 -; CONTRACT-ON-NEXT: ret -; -; CONTRACT-OFF-LABEL: xvfnmsub_s_nsz: -; CONTRACT-OFF: # %bb.0: # %entry -; CONTRACT-OFF-NEXT: xvld $xr0, $a1, 0 -; CONTRACT-OFF-NEXT: xvld $xr1, $a2, 0 -; CONTRACT-OFF-NEXT: xvld $xr2, $a3, 0 -; CONTRACT-OFF-NEXT: xvfmul.s $xr0, $xr0, $xr1 -; CONTRACT-OFF-NEXT: xvfsub.s $xr0, $xr2, $xr0 -; CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 -; CONTRACT-OFF-NEXT: ret +; LA32-CONTRACT-FAST-LABEL: xvfnmsub_s_nsz: +; LA32-CONTRACT-FAST: # %bb.0: # %entry +; LA32-CONTRACT-FAST-NEXT: xvld $xr0, $a1, 0 +; LA32-CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 +; LA32-CONTRACT-FAST-NEXT: xvld $xr2, $a3, 0 +; LA32-CONTRACT-FAST-NEXT: xvfnmsub.s $xr0, $xr0, $xr1, $xr2 +; LA32-CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 +; LA32-CONTRACT-FAST-NEXT: ret +; +; LA32-CONTRACT-ON-LABEL: xvfnmsub_s_nsz: +; LA32-CONTRACT-ON: # %bb.0: # %entry +; LA32-CONTRACT-ON-NEXT: xvld $xr0, $a1, 0 +; LA32-CONTRACT-ON-NEXT: xvld $xr1, $a2, 0 +; LA32-CONTRACT-ON-NEXT: xvld $xr2, $a3, 0 +; LA32-CONTRACT-ON-NEXT: xvfmul.s $xr0, $xr0, $xr1 +; LA32-CONTRACT-ON-NEXT: xvfsub.s $xr0, $xr2, $xr0 +; LA32-CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 +; LA32-CONTRACT-ON-NEXT: ret +; +; LA32-CONTRACT-OFF-LABEL: xvfnmsub_s_nsz: +; LA32-CONTRACT-OFF: # %bb.0: # %entry +; LA32-CONTRACT-OFF-NEXT: xvld $xr0, $a1, 0 +; LA32-CONTRACT-OFF-NEXT: xvld $xr1, $a2, 0 +; LA32-CONTRACT-OFF-NEXT: xvld $xr2, $a3, 0 +; LA32-CONTRACT-OFF-NEXT: xvfmul.s $xr0, $xr0, $xr1 +; LA32-CONTRACT-OFF-NEXT: xvfsub.s $xr0, $xr2, $xr0 +; LA32-CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 +; LA32-CONTRACT-OFF-NEXT: ret +; +; LA64-CONTRACT-FAST-LABEL: xvfnmsub_s_nsz: +; LA64-CONTRACT-FAST: # %bb.0: # %entry +; LA64-CONTRACT-FAST-NEXT: xvld $xr0, $a1, 0 +; LA64-CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 +; LA64-CONTRACT-FAST-NEXT: xvld $xr2, $a3, 0 +; LA64-CONTRACT-FAST-NEXT: xvfnmsub.s $xr0, $xr0, $xr1, $xr2 +; LA64-CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 +; LA64-CONTRACT-FAST-NEXT: ret +; +; LA64-CONTRACT-ON-LABEL: xvfnmsub_s_nsz: +; LA64-CONTRACT-ON: # %bb.0: # %entry +; LA64-CONTRACT-ON-NEXT: xvld $xr0, $a1, 0 +; LA64-CONTRACT-ON-NEXT: xvld $xr1, $a2, 0 +; LA64-CONTRACT-ON-NEXT: xvld $xr2, $a3, 0 +; LA64-CONTRACT-ON-NEXT: xvfmul.s $xr0, $xr0, $xr1 +; LA64-CONTRACT-ON-NEXT: xvfsub.s $xr0, $xr2, $xr0 +; LA64-CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 +; LA64-CONTRACT-ON-NEXT: ret +; +; LA64-CONTRACT-OFF-LABEL: xvfnmsub_s_nsz: +; LA64-CONTRACT-OFF: # %bb.0: # %entry +; LA64-CONTRACT-OFF-NEXT: xvld $xr0, $a1, 0 +; LA64-CONTRACT-OFF-NEXT: xvld $xr1, $a2, 0 +; LA64-CONTRACT-OFF-NEXT: xvld $xr2, $a3, 0 +; LA64-CONTRACT-OFF-NEXT: xvfmul.s $xr0, $xr0, $xr1 +; LA64-CONTRACT-OFF-NEXT: xvfsub.s $xr0, $xr2, $xr0 +; LA64-CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 +; LA64-CONTRACT-OFF-NEXT: ret entry: %v0 = load <8 x float>, ptr %a0 %v1 = load <8 x float>, ptr %a1 @@ -299,35 +517,65 @@ entry: ;; Check that fnmsub.s is not emitted. define void @not_xvfnmsub_s(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { -; CONTRACT-FAST-LABEL: not_xvfnmsub_s: -; CONTRACT-FAST: # %bb.0: # %entry -; CONTRACT-FAST-NEXT: xvld $xr0, $a1, 0 -; CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 -; CONTRACT-FAST-NEXT: xvld $xr2, $a3, 0 -; CONTRACT-FAST-NEXT: xvbitrevi.w $xr0, $xr0, 31 -; CONTRACT-FAST-NEXT: xvfmadd.s $xr0, $xr0, $xr1, $xr2 -; CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 -; CONTRACT-FAST-NEXT: ret -; -; CONTRACT-ON-LABEL: not_xvfnmsub_s: -; CONTRACT-ON: # %bb.0: # %entry -; CONTRACT-ON-NEXT: xvld $xr0, $a1, 0 -; CONTRACT-ON-NEXT: xvld $xr1, $a2, 0 -; CONTRACT-ON-NEXT: xvld $xr2, $a3, 0 -; CONTRACT-ON-NEXT: xvfmul.s $xr0, $xr0, $xr1 -; CONTRACT-ON-NEXT: xvfsub.s $xr0, $xr2, $xr0 -; CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 -; CONTRACT-ON-NEXT: ret -; -; CONTRACT-OFF-LABEL: not_xvfnmsub_s: -; CONTRACT-OFF: # %bb.0: # %entry -; CONTRACT-OFF-NEXT: xvld $xr0, $a1, 0 -; CONTRACT-OFF-NEXT: xvld $xr1, $a2, 0 -; CONTRACT-OFF-NEXT: xvld $xr2, $a3, 0 -; CONTRACT-OFF-NEXT: xvfmul.s $xr0, $xr0, $xr1 -; CONTRACT-OFF-NEXT: xvfsub.s $xr0, $xr2, $xr0 -; CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 -; CONTRACT-OFF-NEXT: ret +; LA32-CONTRACT-FAST-LABEL: not_xvfnmsub_s: +; LA32-CONTRACT-FAST: # %bb.0: # %entry +; LA32-CONTRACT-FAST-NEXT: xvld $xr0, $a1, 0 +; LA32-CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 +; LA32-CONTRACT-FAST-NEXT: xvld $xr2, $a3, 0 +; LA32-CONTRACT-FAST-NEXT: xvbitrevi.w $xr0, $xr0, 31 +; LA32-CONTRACT-FAST-NEXT: xvfmadd.s $xr0, $xr0, $xr1, $xr2 +; LA32-CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 +; LA32-CONTRACT-FAST-NEXT: ret +; +; LA32-CONTRACT-ON-LABEL: not_xvfnmsub_s: +; LA32-CONTRACT-ON: # %bb.0: # %entry +; LA32-CONTRACT-ON-NEXT: xvld $xr0, $a1, 0 +; LA32-CONTRACT-ON-NEXT: xvld $xr1, $a2, 0 +; LA32-CONTRACT-ON-NEXT: xvld $xr2, $a3, 0 +; LA32-CONTRACT-ON-NEXT: xvfmul.s $xr0, $xr0, $xr1 +; LA32-CONTRACT-ON-NEXT: xvfsub.s $xr0, $xr2, $xr0 +; LA32-CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 +; LA32-CONTRACT-ON-NEXT: ret +; +; LA32-CONTRACT-OFF-LABEL: not_xvfnmsub_s: +; LA32-CONTRACT-OFF: # %bb.0: # %entry +; LA32-CONTRACT-OFF-NEXT: xvld $xr0, $a1, 0 +; LA32-CONTRACT-OFF-NEXT: xvld $xr1, $a2, 0 +; LA32-CONTRACT-OFF-NEXT: xvld $xr2, $a3, 0 +; LA32-CONTRACT-OFF-NEXT: xvfmul.s $xr0, $xr0, $xr1 +; LA32-CONTRACT-OFF-NEXT: xvfsub.s $xr0, $xr2, $xr0 +; LA32-CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 +; LA32-CONTRACT-OFF-NEXT: ret +; +; LA64-CONTRACT-FAST-LABEL: not_xvfnmsub_s: +; LA64-CONTRACT-FAST: # %bb.0: # %entry +; LA64-CONTRACT-FAST-NEXT: xvld $xr0, $a1, 0 +; LA64-CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 +; LA64-CONTRACT-FAST-NEXT: xvld $xr2, $a3, 0 +; LA64-CONTRACT-FAST-NEXT: xvbitrevi.w $xr0, $xr0, 31 +; LA64-CONTRACT-FAST-NEXT: xvfmadd.s $xr0, $xr0, $xr1, $xr2 +; LA64-CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 +; LA64-CONTRACT-FAST-NEXT: ret +; +; LA64-CONTRACT-ON-LABEL: not_xvfnmsub_s: +; LA64-CONTRACT-ON: # %bb.0: # %entry +; LA64-CONTRACT-ON-NEXT: xvld $xr0, $a1, 0 +; LA64-CONTRACT-ON-NEXT: xvld $xr1, $a2, 0 +; LA64-CONTRACT-ON-NEXT: xvld $xr2, $a3, 0 +; LA64-CONTRACT-ON-NEXT: xvfmul.s $xr0, $xr0, $xr1 +; LA64-CONTRACT-ON-NEXT: xvfsub.s $xr0, $xr2, $xr0 +; LA64-CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 +; LA64-CONTRACT-ON-NEXT: ret +; +; LA64-CONTRACT-OFF-LABEL: not_xvfnmsub_s: +; LA64-CONTRACT-OFF: # %bb.0: # %entry +; LA64-CONTRACT-OFF-NEXT: xvld $xr0, $a1, 0 +; LA64-CONTRACT-OFF-NEXT: xvld $xr1, $a2, 0 +; LA64-CONTRACT-OFF-NEXT: xvld $xr2, $a3, 0 +; LA64-CONTRACT-OFF-NEXT: xvfmul.s $xr0, $xr0, $xr1 +; LA64-CONTRACT-OFF-NEXT: xvfsub.s $xr0, $xr2, $xr0 +; LA64-CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 +; LA64-CONTRACT-OFF-NEXT: ret entry: %v0 = load <8 x float>, ptr %a0 %v1 = load <8 x float>, ptr %a1 @@ -340,32 +588,59 @@ entry: } define void @contract_xvfmadd_s(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { -; CONTRACT-FAST-LABEL: contract_xvfmadd_s: -; CONTRACT-FAST: # %bb.0: # %entry -; CONTRACT-FAST-NEXT: xvld $xr0, $a1, 0 -; CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 -; CONTRACT-FAST-NEXT: xvld $xr2, $a3, 0 -; CONTRACT-FAST-NEXT: xvfmadd.s $xr0, $xr0, $xr1, $xr2 -; CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 -; CONTRACT-FAST-NEXT: ret -; -; CONTRACT-ON-LABEL: contract_xvfmadd_s: -; CONTRACT-ON: # %bb.0: # %entry -; CONTRACT-ON-NEXT: xvld $xr0, $a1, 0 -; CONTRACT-ON-NEXT: xvld $xr1, $a2, 0 -; CONTRACT-ON-NEXT: xvld $xr2, $a3, 0 -; CONTRACT-ON-NEXT: xvfmadd.s $xr0, $xr0, $xr1, $xr2 -; CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 -; CONTRACT-ON-NEXT: ret -; -; CONTRACT-OFF-LABEL: contract_xvfmadd_s: -; CONTRACT-OFF: # %bb.0: # %entry -; CONTRACT-OFF-NEXT: xvld $xr0, $a1, 0 -; CONTRACT-OFF-NEXT: xvld $xr1, $a2, 0 -; CONTRACT-OFF-NEXT: xvld $xr2, $a3, 0 -; CONTRACT-OFF-NEXT: xvfmadd.s $xr0, $xr0, $xr1, $xr2 -; CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 -; CONTRACT-OFF-NEXT: ret +; LA32-CONTRACT-FAST-LABEL: contract_xvfmadd_s: +; LA32-CONTRACT-FAST: # %bb.0: # %entry +; LA32-CONTRACT-FAST-NEXT: xvld $xr0, $a1, 0 +; LA32-CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 +; LA32-CONTRACT-FAST-NEXT: xvld $xr2, $a3, 0 +; LA32-CONTRACT-FAST-NEXT: xvfmadd.s $xr0, $xr0, $xr1, $xr2 +; LA32-CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 +; LA32-CONTRACT-FAST-NEXT: ret +; +; LA32-CONTRACT-ON-LABEL: contract_xvfmadd_s: +; LA32-CONTRACT-ON: # %bb.0: # %entry +; LA32-CONTRACT-ON-NEXT: xvld $xr0, $a1, 0 +; LA32-CONTRACT-ON-NEXT: xvld $xr1, $a2, 0 +; LA32-CONTRACT-ON-NEXT: xvld $xr2, $a3, 0 +; LA32-CONTRACT-ON-NEXT: xvfmadd.s $xr0, $xr0, $xr1, $xr2 +; LA32-CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 +; LA32-CONTRACT-ON-NEXT: ret +; +; LA32-CONTRACT-OFF-LABEL: contract_xvfmadd_s: +; LA32-CONTRACT-OFF: # %bb.0: # %entry +; LA32-CONTRACT-OFF-NEXT: xvld $xr0, $a1, 0 +; LA32-CONTRACT-OFF-NEXT: xvld $xr1, $a2, 0 +; LA32-CONTRACT-OFF-NEXT: xvld $xr2, $a3, 0 +; LA32-CONTRACT-OFF-NEXT: xvfmadd.s $xr0, $xr0, $xr1, $xr2 +; LA32-CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 +; LA32-CONTRACT-OFF-NEXT: ret +; +; LA64-CONTRACT-FAST-LABEL: contract_xvfmadd_s: +; LA64-CONTRACT-FAST: # %bb.0: # %entry +; LA64-CONTRACT-FAST-NEXT: xvld $xr0, $a1, 0 +; LA64-CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 +; LA64-CONTRACT-FAST-NEXT: xvld $xr2, $a3, 0 +; LA64-CONTRACT-FAST-NEXT: xvfmadd.s $xr0, $xr0, $xr1, $xr2 +; LA64-CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 +; LA64-CONTRACT-FAST-NEXT: ret +; +; LA64-CONTRACT-ON-LABEL: contract_xvfmadd_s: +; LA64-CONTRACT-ON: # %bb.0: # %entry +; LA64-CONTRACT-ON-NEXT: xvld $xr0, $a1, 0 +; LA64-CONTRACT-ON-NEXT: xvld $xr1, $a2, 0 +; LA64-CONTRACT-ON-NEXT: xvld $xr2, $a3, 0 +; LA64-CONTRACT-ON-NEXT: xvfmadd.s $xr0, $xr0, $xr1, $xr2 +; LA64-CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 +; LA64-CONTRACT-ON-NEXT: ret +; +; LA64-CONTRACT-OFF-LABEL: contract_xvfmadd_s: +; LA64-CONTRACT-OFF: # %bb.0: # %entry +; LA64-CONTRACT-OFF-NEXT: xvld $xr0, $a1, 0 +; LA64-CONTRACT-OFF-NEXT: xvld $xr1, $a2, 0 +; LA64-CONTRACT-OFF-NEXT: xvld $xr2, $a3, 0 +; LA64-CONTRACT-OFF-NEXT: xvfmadd.s $xr0, $xr0, $xr1, $xr2 +; LA64-CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 +; LA64-CONTRACT-OFF-NEXT: ret entry: %v0 = load <8 x float>, ptr %a0 %v1 = load <8 x float>, ptr %a1 @@ -377,32 +652,59 @@ entry: } define void @contract_xvfmsub_s(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { -; CONTRACT-FAST-LABEL: contract_xvfmsub_s: -; CONTRACT-FAST: # %bb.0: # %entry -; CONTRACT-FAST-NEXT: xvld $xr0, $a1, 0 -; CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 -; CONTRACT-FAST-NEXT: xvld $xr2, $a3, 0 -; CONTRACT-FAST-NEXT: xvfmsub.s $xr0, $xr0, $xr1, $xr2 -; CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 -; CONTRACT-FAST-NEXT: ret -; -; CONTRACT-ON-LABEL: contract_xvfmsub_s: -; CONTRACT-ON: # %bb.0: # %entry -; CONTRACT-ON-NEXT: xvld $xr0, $a1, 0 -; CONTRACT-ON-NEXT: xvld $xr1, $a2, 0 -; CONTRACT-ON-NEXT: xvld $xr2, $a3, 0 -; CONTRACT-ON-NEXT: xvfmsub.s $xr0, $xr0, $xr1, $xr2 -; CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 -; CONTRACT-ON-NEXT: ret -; -; CONTRACT-OFF-LABEL: contract_xvfmsub_s: -; CONTRACT-OFF: # %bb.0: # %entry -; CONTRACT-OFF-NEXT: xvld $xr0, $a1, 0 -; CONTRACT-OFF-NEXT: xvld $xr1, $a2, 0 -; CONTRACT-OFF-NEXT: xvld $xr2, $a3, 0 -; CONTRACT-OFF-NEXT: xvfmsub.s $xr0, $xr0, $xr1, $xr2 -; CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 -; CONTRACT-OFF-NEXT: ret +; LA32-CONTRACT-FAST-LABEL: contract_xvfmsub_s: +; LA32-CONTRACT-FAST: # %bb.0: # %entry +; LA32-CONTRACT-FAST-NEXT: xvld $xr0, $a1, 0 +; LA32-CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 +; LA32-CONTRACT-FAST-NEXT: xvld $xr2, $a3, 0 +; LA32-CONTRACT-FAST-NEXT: xvfmsub.s $xr0, $xr0, $xr1, $xr2 +; LA32-CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 +; LA32-CONTRACT-FAST-NEXT: ret +; +; LA32-CONTRACT-ON-LABEL: contract_xvfmsub_s: +; LA32-CONTRACT-ON: # %bb.0: # %entry +; LA32-CONTRACT-ON-NEXT: xvld $xr0, $a1, 0 +; LA32-CONTRACT-ON-NEXT: xvld $xr1, $a2, 0 +; LA32-CONTRACT-ON-NEXT: xvld $xr2, $a3, 0 +; LA32-CONTRACT-ON-NEXT: xvfmsub.s $xr0, $xr0, $xr1, $xr2 +; LA32-CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 +; LA32-CONTRACT-ON-NEXT: ret +; +; LA32-CONTRACT-OFF-LABEL: contract_xvfmsub_s: +; LA32-CONTRACT-OFF: # %bb.0: # %entry +; LA32-CONTRACT-OFF-NEXT: xvld $xr0, $a1, 0 +; LA32-CONTRACT-OFF-NEXT: xvld $xr1, $a2, 0 +; LA32-CONTRACT-OFF-NEXT: xvld $xr2, $a3, 0 +; LA32-CONTRACT-OFF-NEXT: xvfmsub.s $xr0, $xr0, $xr1, $xr2 +; LA32-CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 +; LA32-CONTRACT-OFF-NEXT: ret +; +; LA64-CONTRACT-FAST-LABEL: contract_xvfmsub_s: +; LA64-CONTRACT-FAST: # %bb.0: # %entry +; LA64-CONTRACT-FAST-NEXT: xvld $xr0, $a1, 0 +; LA64-CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 +; LA64-CONTRACT-FAST-NEXT: xvld $xr2, $a3, 0 +; LA64-CONTRACT-FAST-NEXT: xvfmsub.s $xr0, $xr0, $xr1, $xr2 +; LA64-CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 +; LA64-CONTRACT-FAST-NEXT: ret +; +; LA64-CONTRACT-ON-LABEL: contract_xvfmsub_s: +; LA64-CONTRACT-ON: # %bb.0: # %entry +; LA64-CONTRACT-ON-NEXT: xvld $xr0, $a1, 0 +; LA64-CONTRACT-ON-NEXT: xvld $xr1, $a2, 0 +; LA64-CONTRACT-ON-NEXT: xvld $xr2, $a3, 0 +; LA64-CONTRACT-ON-NEXT: xvfmsub.s $xr0, $xr0, $xr1, $xr2 +; LA64-CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 +; LA64-CONTRACT-ON-NEXT: ret +; +; LA64-CONTRACT-OFF-LABEL: contract_xvfmsub_s: +; LA64-CONTRACT-OFF: # %bb.0: # %entry +; LA64-CONTRACT-OFF-NEXT: xvld $xr0, $a1, 0 +; LA64-CONTRACT-OFF-NEXT: xvld $xr1, $a2, 0 +; LA64-CONTRACT-OFF-NEXT: xvld $xr2, $a3, 0 +; LA64-CONTRACT-OFF-NEXT: xvfmsub.s $xr0, $xr0, $xr1, $xr2 +; LA64-CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 +; LA64-CONTRACT-OFF-NEXT: ret entry: %v0 = load <8 x float>, ptr %a0 %v1 = load <8 x float>, ptr %a1 @@ -414,32 +716,59 @@ entry: } define void @contract_xvfnmadd_s(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { -; CONTRACT-FAST-LABEL: contract_xvfnmadd_s: -; CONTRACT-FAST: # %bb.0: # %entry -; CONTRACT-FAST-NEXT: xvld $xr0, $a1, 0 -; CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 -; CONTRACT-FAST-NEXT: xvld $xr2, $a3, 0 -; CONTRACT-FAST-NEXT: xvfnmadd.s $xr0, $xr0, $xr1, $xr2 -; CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 -; CONTRACT-FAST-NEXT: ret -; -; CONTRACT-ON-LABEL: contract_xvfnmadd_s: -; CONTRACT-ON: # %bb.0: # %entry -; CONTRACT-ON-NEXT: xvld $xr0, $a1, 0 -; CONTRACT-ON-NEXT: xvld $xr1, $a2, 0 -; CONTRACT-ON-NEXT: xvld $xr2, $a3, 0 -; CONTRACT-ON-NEXT: xvfnmadd.s $xr0, $xr0, $xr1, $xr2 -; CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 -; CONTRACT-ON-NEXT: ret -; -; CONTRACT-OFF-LABEL: contract_xvfnmadd_s: -; CONTRACT-OFF: # %bb.0: # %entry -; CONTRACT-OFF-NEXT: xvld $xr0, $a1, 0 -; CONTRACT-OFF-NEXT: xvld $xr1, $a2, 0 -; CONTRACT-OFF-NEXT: xvld $xr2, $a3, 0 -; CONTRACT-OFF-NEXT: xvfnmadd.s $xr0, $xr0, $xr1, $xr2 -; CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 -; CONTRACT-OFF-NEXT: ret +; LA32-CONTRACT-FAST-LABEL: contract_xvfnmadd_s: +; LA32-CONTRACT-FAST: # %bb.0: # %entry +; LA32-CONTRACT-FAST-NEXT: xvld $xr0, $a1, 0 +; LA32-CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 +; LA32-CONTRACT-FAST-NEXT: xvld $xr2, $a3, 0 +; LA32-CONTRACT-FAST-NEXT: xvfnmadd.s $xr0, $xr0, $xr1, $xr2 +; LA32-CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 +; LA32-CONTRACT-FAST-NEXT: ret +; +; LA32-CONTRACT-ON-LABEL: contract_xvfnmadd_s: +; LA32-CONTRACT-ON: # %bb.0: # %entry +; LA32-CONTRACT-ON-NEXT: xvld $xr0, $a1, 0 +; LA32-CONTRACT-ON-NEXT: xvld $xr1, $a2, 0 +; LA32-CONTRACT-ON-NEXT: xvld $xr2, $a3, 0 +; LA32-CONTRACT-ON-NEXT: xvfnmadd.s $xr0, $xr0, $xr1, $xr2 +; LA32-CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 +; LA32-CONTRACT-ON-NEXT: ret +; +; LA32-CONTRACT-OFF-LABEL: contract_xvfnmadd_s: +; LA32-CONTRACT-OFF: # %bb.0: # %entry +; LA32-CONTRACT-OFF-NEXT: xvld $xr0, $a1, 0 +; LA32-CONTRACT-OFF-NEXT: xvld $xr1, $a2, 0 +; LA32-CONTRACT-OFF-NEXT: xvld $xr2, $a3, 0 +; LA32-CONTRACT-OFF-NEXT: xvfnmadd.s $xr0, $xr0, $xr1, $xr2 +; LA32-CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 +; LA32-CONTRACT-OFF-NEXT: ret +; +; LA64-CONTRACT-FAST-LABEL: contract_xvfnmadd_s: +; LA64-CONTRACT-FAST: # %bb.0: # %entry +; LA64-CONTRACT-FAST-NEXT: xvld $xr0, $a1, 0 +; LA64-CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 +; LA64-CONTRACT-FAST-NEXT: xvld $xr2, $a3, 0 +; LA64-CONTRACT-FAST-NEXT: xvfnmadd.s $xr0, $xr0, $xr1, $xr2 +; LA64-CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 +; LA64-CONTRACT-FAST-NEXT: ret +; +; LA64-CONTRACT-ON-LABEL: contract_xvfnmadd_s: +; LA64-CONTRACT-ON: # %bb.0: # %entry +; LA64-CONTRACT-ON-NEXT: xvld $xr0, $a1, 0 +; LA64-CONTRACT-ON-NEXT: xvld $xr1, $a2, 0 +; LA64-CONTRACT-ON-NEXT: xvld $xr2, $a3, 0 +; LA64-CONTRACT-ON-NEXT: xvfnmadd.s $xr0, $xr0, $xr1, $xr2 +; LA64-CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 +; LA64-CONTRACT-ON-NEXT: ret +; +; LA64-CONTRACT-OFF-LABEL: contract_xvfnmadd_s: +; LA64-CONTRACT-OFF: # %bb.0: # %entry +; LA64-CONTRACT-OFF-NEXT: xvld $xr0, $a1, 0 +; LA64-CONTRACT-OFF-NEXT: xvld $xr1, $a2, 0 +; LA64-CONTRACT-OFF-NEXT: xvld $xr2, $a3, 0 +; LA64-CONTRACT-OFF-NEXT: xvfnmadd.s $xr0, $xr0, $xr1, $xr2 +; LA64-CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 +; LA64-CONTRACT-OFF-NEXT: ret entry: %v0 = load <8 x float>, ptr %a0 %v1 = load <8 x float>, ptr %a1 @@ -452,32 +781,59 @@ entry: } define void @contract_xvfnmadd_s_nsz(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { -; CONTRACT-FAST-LABEL: contract_xvfnmadd_s_nsz: -; CONTRACT-FAST: # %bb.0: # %entry -; CONTRACT-FAST-NEXT: xvld $xr0, $a1, 0 -; CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 -; CONTRACT-FAST-NEXT: xvld $xr2, $a3, 0 -; CONTRACT-FAST-NEXT: xvfnmadd.s $xr0, $xr0, $xr1, $xr2 -; CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 -; CONTRACT-FAST-NEXT: ret -; -; CONTRACT-ON-LABEL: contract_xvfnmadd_s_nsz: -; CONTRACT-ON: # %bb.0: # %entry -; CONTRACT-ON-NEXT: xvld $xr0, $a1, 0 -; CONTRACT-ON-NEXT: xvld $xr1, $a2, 0 -; CONTRACT-ON-NEXT: xvld $xr2, $a3, 0 -; CONTRACT-ON-NEXT: xvfnmadd.s $xr0, $xr0, $xr1, $xr2 -; CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 -; CONTRACT-ON-NEXT: ret -; -; CONTRACT-OFF-LABEL: contract_xvfnmadd_s_nsz: -; CONTRACT-OFF: # %bb.0: # %entry -; CONTRACT-OFF-NEXT: xvld $xr0, $a1, 0 -; CONTRACT-OFF-NEXT: xvld $xr1, $a2, 0 -; CONTRACT-OFF-NEXT: xvld $xr2, $a3, 0 -; CONTRACT-OFF-NEXT: xvfnmadd.s $xr0, $xr0, $xr1, $xr2 -; CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 -; CONTRACT-OFF-NEXT: ret +; LA32-CONTRACT-FAST-LABEL: contract_xvfnmadd_s_nsz: +; LA32-CONTRACT-FAST: # %bb.0: # %entry +; LA32-CONTRACT-FAST-NEXT: xvld $xr0, $a1, 0 +; LA32-CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 +; LA32-CONTRACT-FAST-NEXT: xvld $xr2, $a3, 0 +; LA32-CONTRACT-FAST-NEXT: xvfnmadd.s $xr0, $xr0, $xr1, $xr2 +; LA32-CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 +; LA32-CONTRACT-FAST-NEXT: ret +; +; LA32-CONTRACT-ON-LABEL: contract_xvfnmadd_s_nsz: +; LA32-CONTRACT-ON: # %bb.0: # %entry +; LA32-CONTRACT-ON-NEXT: xvld $xr0, $a1, 0 +; LA32-CONTRACT-ON-NEXT: xvld $xr1, $a2, 0 +; LA32-CONTRACT-ON-NEXT: xvld $xr2, $a3, 0 +; LA32-CONTRACT-ON-NEXT: xvfnmadd.s $xr0, $xr0, $xr1, $xr2 +; LA32-CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 +; LA32-CONTRACT-ON-NEXT: ret +; +; LA32-CONTRACT-OFF-LABEL: contract_xvfnmadd_s_nsz: +; LA32-CONTRACT-OFF: # %bb.0: # %entry +; LA32-CONTRACT-OFF-NEXT: xvld $xr0, $a1, 0 +; LA32-CONTRACT-OFF-NEXT: xvld $xr1, $a2, 0 +; LA32-CONTRACT-OFF-NEXT: xvld $xr2, $a3, 0 +; LA32-CONTRACT-OFF-NEXT: xvfnmadd.s $xr0, $xr0, $xr1, $xr2 +; LA32-CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 +; LA32-CONTRACT-OFF-NEXT: ret +; +; LA64-CONTRACT-FAST-LABEL: contract_xvfnmadd_s_nsz: +; LA64-CONTRACT-FAST: # %bb.0: # %entry +; LA64-CONTRACT-FAST-NEXT: xvld $xr0, $a1, 0 +; LA64-CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 +; LA64-CONTRACT-FAST-NEXT: xvld $xr2, $a3, 0 +; LA64-CONTRACT-FAST-NEXT: xvfnmadd.s $xr0, $xr0, $xr1, $xr2 +; LA64-CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 +; LA64-CONTRACT-FAST-NEXT: ret +; +; LA64-CONTRACT-ON-LABEL: contract_xvfnmadd_s_nsz: +; LA64-CONTRACT-ON: # %bb.0: # %entry +; LA64-CONTRACT-ON-NEXT: xvld $xr0, $a1, 0 +; LA64-CONTRACT-ON-NEXT: xvld $xr1, $a2, 0 +; LA64-CONTRACT-ON-NEXT: xvld $xr2, $a3, 0 +; LA64-CONTRACT-ON-NEXT: xvfnmadd.s $xr0, $xr0, $xr1, $xr2 +; LA64-CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 +; LA64-CONTRACT-ON-NEXT: ret +; +; LA64-CONTRACT-OFF-LABEL: contract_xvfnmadd_s_nsz: +; LA64-CONTRACT-OFF: # %bb.0: # %entry +; LA64-CONTRACT-OFF-NEXT: xvld $xr0, $a1, 0 +; LA64-CONTRACT-OFF-NEXT: xvld $xr1, $a2, 0 +; LA64-CONTRACT-OFF-NEXT: xvld $xr2, $a3, 0 +; LA64-CONTRACT-OFF-NEXT: xvfnmadd.s $xr0, $xr0, $xr1, $xr2 +; LA64-CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 +; LA64-CONTRACT-OFF-NEXT: ret entry: %v0 = load <8 x float>, ptr %a0 %v1 = load <8 x float>, ptr %a1 @@ -492,35 +848,65 @@ entry: ;; Check that fnmadd.s is not emitted. define void @not_contract_xvfnmadd_s(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { -; CONTRACT-FAST-LABEL: not_contract_xvfnmadd_s: -; CONTRACT-FAST: # %bb.0: # %entry -; CONTRACT-FAST-NEXT: xvld $xr0, $a1, 0 -; CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 -; CONTRACT-FAST-NEXT: xvld $xr2, $a3, 0 -; CONTRACT-FAST-NEXT: xvbitrevi.w $xr0, $xr0, 31 -; CONTRACT-FAST-NEXT: xvfmsub.s $xr0, $xr0, $xr1, $xr2 -; CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 -; CONTRACT-FAST-NEXT: ret -; -; CONTRACT-ON-LABEL: not_contract_xvfnmadd_s: -; CONTRACT-ON: # %bb.0: # %entry -; CONTRACT-ON-NEXT: xvld $xr0, $a1, 0 -; CONTRACT-ON-NEXT: xvld $xr1, $a2, 0 -; CONTRACT-ON-NEXT: xvld $xr2, $a3, 0 -; CONTRACT-ON-NEXT: xvbitrevi.w $xr0, $xr0, 31 -; CONTRACT-ON-NEXT: xvfmsub.s $xr0, $xr0, $xr1, $xr2 -; CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 -; CONTRACT-ON-NEXT: ret -; -; CONTRACT-OFF-LABEL: not_contract_xvfnmadd_s: -; CONTRACT-OFF: # %bb.0: # %entry -; CONTRACT-OFF-NEXT: xvld $xr0, $a1, 0 -; CONTRACT-OFF-NEXT: xvld $xr1, $a2, 0 -; CONTRACT-OFF-NEXT: xvld $xr2, $a3, 0 -; CONTRACT-OFF-NEXT: xvbitrevi.w $xr0, $xr0, 31 -; CONTRACT-OFF-NEXT: xvfmsub.s $xr0, $xr0, $xr1, $xr2 -; CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 -; CONTRACT-OFF-NEXT: ret +; LA32-CONTRACT-FAST-LABEL: not_contract_xvfnmadd_s: +; LA32-CONTRACT-FAST: # %bb.0: # %entry +; LA32-CONTRACT-FAST-NEXT: xvld $xr0, $a1, 0 +; LA32-CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 +; LA32-CONTRACT-FAST-NEXT: xvld $xr2, $a3, 0 +; LA32-CONTRACT-FAST-NEXT: xvbitrevi.w $xr0, $xr0, 31 +; LA32-CONTRACT-FAST-NEXT: xvfmsub.s $xr0, $xr0, $xr1, $xr2 +; LA32-CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 +; LA32-CONTRACT-FAST-NEXT: ret +; +; LA32-CONTRACT-ON-LABEL: not_contract_xvfnmadd_s: +; LA32-CONTRACT-ON: # %bb.0: # %entry +; LA32-CONTRACT-ON-NEXT: xvld $xr0, $a1, 0 +; LA32-CONTRACT-ON-NEXT: xvld $xr1, $a2, 0 +; LA32-CONTRACT-ON-NEXT: xvld $xr2, $a3, 0 +; LA32-CONTRACT-ON-NEXT: xvbitrevi.w $xr0, $xr0, 31 +; LA32-CONTRACT-ON-NEXT: xvfmsub.s $xr0, $xr0, $xr1, $xr2 +; LA32-CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 +; LA32-CONTRACT-ON-NEXT: ret +; +; LA32-CONTRACT-OFF-LABEL: not_contract_xvfnmadd_s: +; LA32-CONTRACT-OFF: # %bb.0: # %entry +; LA32-CONTRACT-OFF-NEXT: xvld $xr0, $a1, 0 +; LA32-CONTRACT-OFF-NEXT: xvld $xr1, $a2, 0 +; LA32-CONTRACT-OFF-NEXT: xvld $xr2, $a3, 0 +; LA32-CONTRACT-OFF-NEXT: xvbitrevi.w $xr0, $xr0, 31 +; LA32-CONTRACT-OFF-NEXT: xvfmsub.s $xr0, $xr0, $xr1, $xr2 +; LA32-CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 +; LA32-CONTRACT-OFF-NEXT: ret +; +; LA64-CONTRACT-FAST-LABEL: not_contract_xvfnmadd_s: +; LA64-CONTRACT-FAST: # %bb.0: # %entry +; LA64-CONTRACT-FAST-NEXT: xvld $xr0, $a1, 0 +; LA64-CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 +; LA64-CONTRACT-FAST-NEXT: xvld $xr2, $a3, 0 +; LA64-CONTRACT-FAST-NEXT: xvbitrevi.w $xr0, $xr0, 31 +; LA64-CONTRACT-FAST-NEXT: xvfmsub.s $xr0, $xr0, $xr1, $xr2 +; LA64-CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 +; LA64-CONTRACT-FAST-NEXT: ret +; +; LA64-CONTRACT-ON-LABEL: not_contract_xvfnmadd_s: +; LA64-CONTRACT-ON: # %bb.0: # %entry +; LA64-CONTRACT-ON-NEXT: xvld $xr0, $a1, 0 +; LA64-CONTRACT-ON-NEXT: xvld $xr1, $a2, 0 +; LA64-CONTRACT-ON-NEXT: xvld $xr2, $a3, 0 +; LA64-CONTRACT-ON-NEXT: xvbitrevi.w $xr0, $xr0, 31 +; LA64-CONTRACT-ON-NEXT: xvfmsub.s $xr0, $xr0, $xr1, $xr2 +; LA64-CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 +; LA64-CONTRACT-ON-NEXT: ret +; +; LA64-CONTRACT-OFF-LABEL: not_contract_xvfnmadd_s: +; LA64-CONTRACT-OFF: # %bb.0: # %entry +; LA64-CONTRACT-OFF-NEXT: xvld $xr0, $a1, 0 +; LA64-CONTRACT-OFF-NEXT: xvld $xr1, $a2, 0 +; LA64-CONTRACT-OFF-NEXT: xvld $xr2, $a3, 0 +; LA64-CONTRACT-OFF-NEXT: xvbitrevi.w $xr0, $xr0, 31 +; LA64-CONTRACT-OFF-NEXT: xvfmsub.s $xr0, $xr0, $xr1, $xr2 +; LA64-CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 +; LA64-CONTRACT-OFF-NEXT: ret entry: %v0 = load <8 x float>, ptr %a0 %v1 = load <8 x float>, ptr %a1 @@ -534,32 +920,59 @@ entry: } define void @contract_xvfnmsub_s(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { -; CONTRACT-FAST-LABEL: contract_xvfnmsub_s: -; CONTRACT-FAST: # %bb.0: # %entry -; CONTRACT-FAST-NEXT: xvld $xr0, $a1, 0 -; CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 -; CONTRACT-FAST-NEXT: xvld $xr2, $a3, 0 -; CONTRACT-FAST-NEXT: xvfnmsub.s $xr0, $xr0, $xr1, $xr2 -; CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 -; CONTRACT-FAST-NEXT: ret -; -; CONTRACT-ON-LABEL: contract_xvfnmsub_s: -; CONTRACT-ON: # %bb.0: # %entry -; CONTRACT-ON-NEXT: xvld $xr0, $a1, 0 -; CONTRACT-ON-NEXT: xvld $xr1, $a2, 0 -; CONTRACT-ON-NEXT: xvld $xr2, $a3, 0 -; CONTRACT-ON-NEXT: xvfnmsub.s $xr0, $xr0, $xr1, $xr2 -; CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 -; CONTRACT-ON-NEXT: ret -; -; CONTRACT-OFF-LABEL: contract_xvfnmsub_s: -; CONTRACT-OFF: # %bb.0: # %entry -; CONTRACT-OFF-NEXT: xvld $xr0, $a1, 0 -; CONTRACT-OFF-NEXT: xvld $xr1, $a2, 0 -; CONTRACT-OFF-NEXT: xvld $xr2, $a3, 0 -; CONTRACT-OFF-NEXT: xvfnmsub.s $xr0, $xr0, $xr1, $xr2 -; CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 -; CONTRACT-OFF-NEXT: ret +; LA32-CONTRACT-FAST-LABEL: contract_xvfnmsub_s: +; LA32-CONTRACT-FAST: # %bb.0: # %entry +; LA32-CONTRACT-FAST-NEXT: xvld $xr0, $a1, 0 +; LA32-CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 +; LA32-CONTRACT-FAST-NEXT: xvld $xr2, $a3, 0 +; LA32-CONTRACT-FAST-NEXT: xvfnmsub.s $xr0, $xr0, $xr1, $xr2 +; LA32-CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 +; LA32-CONTRACT-FAST-NEXT: ret +; +; LA32-CONTRACT-ON-LABEL: contract_xvfnmsub_s: +; LA32-CONTRACT-ON: # %bb.0: # %entry +; LA32-CONTRACT-ON-NEXT: xvld $xr0, $a1, 0 +; LA32-CONTRACT-ON-NEXT: xvld $xr1, $a2, 0 +; LA32-CONTRACT-ON-NEXT: xvld $xr2, $a3, 0 +; LA32-CONTRACT-ON-NEXT: xvfnmsub.s $xr0, $xr0, $xr1, $xr2 +; LA32-CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 +; LA32-CONTRACT-ON-NEXT: ret +; +; LA32-CONTRACT-OFF-LABEL: contract_xvfnmsub_s: +; LA32-CONTRACT-OFF: # %bb.0: # %entry +; LA32-CONTRACT-OFF-NEXT: xvld $xr0, $a1, 0 +; LA32-CONTRACT-OFF-NEXT: xvld $xr1, $a2, 0 +; LA32-CONTRACT-OFF-NEXT: xvld $xr2, $a3, 0 +; LA32-CONTRACT-OFF-NEXT: xvfnmsub.s $xr0, $xr0, $xr1, $xr2 +; LA32-CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 +; LA32-CONTRACT-OFF-NEXT: ret +; +; LA64-CONTRACT-FAST-LABEL: contract_xvfnmsub_s: +; LA64-CONTRACT-FAST: # %bb.0: # %entry +; LA64-CONTRACT-FAST-NEXT: xvld $xr0, $a1, 0 +; LA64-CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 +; LA64-CONTRACT-FAST-NEXT: xvld $xr2, $a3, 0 +; LA64-CONTRACT-FAST-NEXT: xvfnmsub.s $xr0, $xr0, $xr1, $xr2 +; LA64-CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 +; LA64-CONTRACT-FAST-NEXT: ret +; +; LA64-CONTRACT-ON-LABEL: contract_xvfnmsub_s: +; LA64-CONTRACT-ON: # %bb.0: # %entry +; LA64-CONTRACT-ON-NEXT: xvld $xr0, $a1, 0 +; LA64-CONTRACT-ON-NEXT: xvld $xr1, $a2, 0 +; LA64-CONTRACT-ON-NEXT: xvld $xr2, $a3, 0 +; LA64-CONTRACT-ON-NEXT: xvfnmsub.s $xr0, $xr0, $xr1, $xr2 +; LA64-CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 +; LA64-CONTRACT-ON-NEXT: ret +; +; LA64-CONTRACT-OFF-LABEL: contract_xvfnmsub_s: +; LA64-CONTRACT-OFF: # %bb.0: # %entry +; LA64-CONTRACT-OFF-NEXT: xvld $xr0, $a1, 0 +; LA64-CONTRACT-OFF-NEXT: xvld $xr1, $a2, 0 +; LA64-CONTRACT-OFF-NEXT: xvld $xr2, $a3, 0 +; LA64-CONTRACT-OFF-NEXT: xvfnmsub.s $xr0, $xr0, $xr1, $xr2 +; LA64-CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 +; LA64-CONTRACT-OFF-NEXT: ret entry: %v0 = load <8 x float>, ptr %a0 %v1 = load <8 x float>, ptr %a1 @@ -573,32 +986,59 @@ entry: } define void @contract_xvfnmsub_s_nsz(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { -; CONTRACT-FAST-LABEL: contract_xvfnmsub_s_nsz: -; CONTRACT-FAST: # %bb.0: # %entry -; CONTRACT-FAST-NEXT: xvld $xr0, $a1, 0 -; CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 -; CONTRACT-FAST-NEXT: xvld $xr2, $a3, 0 -; CONTRACT-FAST-NEXT: xvfnmsub.s $xr0, $xr0, $xr1, $xr2 -; CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 -; CONTRACT-FAST-NEXT: ret -; -; CONTRACT-ON-LABEL: contract_xvfnmsub_s_nsz: -; CONTRACT-ON: # %bb.0: # %entry -; CONTRACT-ON-NEXT: xvld $xr0, $a1, 0 -; CONTRACT-ON-NEXT: xvld $xr1, $a2, 0 -; CONTRACT-ON-NEXT: xvld $xr2, $a3, 0 -; CONTRACT-ON-NEXT: xvfnmsub.s $xr0, $xr0, $xr1, $xr2 -; CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 -; CONTRACT-ON-NEXT: ret -; -; CONTRACT-OFF-LABEL: contract_xvfnmsub_s_nsz: -; CONTRACT-OFF: # %bb.0: # %entry -; CONTRACT-OFF-NEXT: xvld $xr0, $a1, 0 -; CONTRACT-OFF-NEXT: xvld $xr1, $a2, 0 -; CONTRACT-OFF-NEXT: xvld $xr2, $a3, 0 -; CONTRACT-OFF-NEXT: xvfnmsub.s $xr0, $xr0, $xr1, $xr2 -; CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 -; CONTRACT-OFF-NEXT: ret +; LA32-CONTRACT-FAST-LABEL: contract_xvfnmsub_s_nsz: +; LA32-CONTRACT-FAST: # %bb.0: # %entry +; LA32-CONTRACT-FAST-NEXT: xvld $xr0, $a1, 0 +; LA32-CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 +; LA32-CONTRACT-FAST-NEXT: xvld $xr2, $a3, 0 +; LA32-CONTRACT-FAST-NEXT: xvfnmsub.s $xr0, $xr0, $xr1, $xr2 +; LA32-CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 +; LA32-CONTRACT-FAST-NEXT: ret +; +; LA32-CONTRACT-ON-LABEL: contract_xvfnmsub_s_nsz: +; LA32-CONTRACT-ON: # %bb.0: # %entry +; LA32-CONTRACT-ON-NEXT: xvld $xr0, $a1, 0 +; LA32-CONTRACT-ON-NEXT: xvld $xr1, $a2, 0 +; LA32-CONTRACT-ON-NEXT: xvld $xr2, $a3, 0 +; LA32-CONTRACT-ON-NEXT: xvfnmsub.s $xr0, $xr0, $xr1, $xr2 +; LA32-CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 +; LA32-CONTRACT-ON-NEXT: ret +; +; LA32-CONTRACT-OFF-LABEL: contract_xvfnmsub_s_nsz: +; LA32-CONTRACT-OFF: # %bb.0: # %entry +; LA32-CONTRACT-OFF-NEXT: xvld $xr0, $a1, 0 +; LA32-CONTRACT-OFF-NEXT: xvld $xr1, $a2, 0 +; LA32-CONTRACT-OFF-NEXT: xvld $xr2, $a3, 0 +; LA32-CONTRACT-OFF-NEXT: xvfnmsub.s $xr0, $xr0, $xr1, $xr2 +; LA32-CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 +; LA32-CONTRACT-OFF-NEXT: ret +; +; LA64-CONTRACT-FAST-LABEL: contract_xvfnmsub_s_nsz: +; LA64-CONTRACT-FAST: # %bb.0: # %entry +; LA64-CONTRACT-FAST-NEXT: xvld $xr0, $a1, 0 +; LA64-CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 +; LA64-CONTRACT-FAST-NEXT: xvld $xr2, $a3, 0 +; LA64-CONTRACT-FAST-NEXT: xvfnmsub.s $xr0, $xr0, $xr1, $xr2 +; LA64-CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 +; LA64-CONTRACT-FAST-NEXT: ret +; +; LA64-CONTRACT-ON-LABEL: contract_xvfnmsub_s_nsz: +; LA64-CONTRACT-ON: # %bb.0: # %entry +; LA64-CONTRACT-ON-NEXT: xvld $xr0, $a1, 0 +; LA64-CONTRACT-ON-NEXT: xvld $xr1, $a2, 0 +; LA64-CONTRACT-ON-NEXT: xvld $xr2, $a3, 0 +; LA64-CONTRACT-ON-NEXT: xvfnmsub.s $xr0, $xr0, $xr1, $xr2 +; LA64-CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 +; LA64-CONTRACT-ON-NEXT: ret +; +; LA64-CONTRACT-OFF-LABEL: contract_xvfnmsub_s_nsz: +; LA64-CONTRACT-OFF: # %bb.0: # %entry +; LA64-CONTRACT-OFF-NEXT: xvld $xr0, $a1, 0 +; LA64-CONTRACT-OFF-NEXT: xvld $xr1, $a2, 0 +; LA64-CONTRACT-OFF-NEXT: xvld $xr2, $a3, 0 +; LA64-CONTRACT-OFF-NEXT: xvfnmsub.s $xr0, $xr0, $xr1, $xr2 +; LA64-CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 +; LA64-CONTRACT-OFF-NEXT: ret entry: %v0 = load <8 x float>, ptr %a0 %v1 = load <8 x float>, ptr %a1 @@ -612,35 +1052,65 @@ entry: ;; Check that fnmsub.s is not emitted. define void @not_contract_xvfnmsub_s(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { -; CONTRACT-FAST-LABEL: not_contract_xvfnmsub_s: -; CONTRACT-FAST: # %bb.0: # %entry -; CONTRACT-FAST-NEXT: xvld $xr0, $a1, 0 -; CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 -; CONTRACT-FAST-NEXT: xvld $xr2, $a3, 0 -; CONTRACT-FAST-NEXT: xvbitrevi.w $xr0, $xr0, 31 -; CONTRACT-FAST-NEXT: xvfmadd.s $xr0, $xr0, $xr1, $xr2 -; CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 -; CONTRACT-FAST-NEXT: ret -; -; CONTRACT-ON-LABEL: not_contract_xvfnmsub_s: -; CONTRACT-ON: # %bb.0: # %entry -; CONTRACT-ON-NEXT: xvld $xr0, $a1, 0 -; CONTRACT-ON-NEXT: xvld $xr1, $a2, 0 -; CONTRACT-ON-NEXT: xvld $xr2, $a3, 0 -; CONTRACT-ON-NEXT: xvbitrevi.w $xr0, $xr0, 31 -; CONTRACT-ON-NEXT: xvfmadd.s $xr0, $xr0, $xr1, $xr2 -; CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 -; CONTRACT-ON-NEXT: ret -; -; CONTRACT-OFF-LABEL: not_contract_xvfnmsub_s: -; CONTRACT-OFF: # %bb.0: # %entry -; CONTRACT-OFF-NEXT: xvld $xr0, $a1, 0 -; CONTRACT-OFF-NEXT: xvld $xr1, $a2, 0 -; CONTRACT-OFF-NEXT: xvld $xr2, $a3, 0 -; CONTRACT-OFF-NEXT: xvbitrevi.w $xr0, $xr0, 31 -; CONTRACT-OFF-NEXT: xvfmadd.s $xr0, $xr0, $xr1, $xr2 -; CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 -; CONTRACT-OFF-NEXT: ret +; LA32-CONTRACT-FAST-LABEL: not_contract_xvfnmsub_s: +; LA32-CONTRACT-FAST: # %bb.0: # %entry +; LA32-CONTRACT-FAST-NEXT: xvld $xr0, $a1, 0 +; LA32-CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 +; LA32-CONTRACT-FAST-NEXT: xvld $xr2, $a3, 0 +; LA32-CONTRACT-FAST-NEXT: xvbitrevi.w $xr0, $xr0, 31 +; LA32-CONTRACT-FAST-NEXT: xvfmadd.s $xr0, $xr0, $xr1, $xr2 +; LA32-CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 +; LA32-CONTRACT-FAST-NEXT: ret +; +; LA32-CONTRACT-ON-LABEL: not_contract_xvfnmsub_s: +; LA32-CONTRACT-ON: # %bb.0: # %entry +; LA32-CONTRACT-ON-NEXT: xvld $xr0, $a1, 0 +; LA32-CONTRACT-ON-NEXT: xvld $xr1, $a2, 0 +; LA32-CONTRACT-ON-NEXT: xvld $xr2, $a3, 0 +; LA32-CONTRACT-ON-NEXT: xvbitrevi.w $xr0, $xr0, 31 +; LA32-CONTRACT-ON-NEXT: xvfmadd.s $xr0, $xr0, $xr1, $xr2 +; LA32-CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 +; LA32-CONTRACT-ON-NEXT: ret +; +; LA32-CONTRACT-OFF-LABEL: not_contract_xvfnmsub_s: +; LA32-CONTRACT-OFF: # %bb.0: # %entry +; LA32-CONTRACT-OFF-NEXT: xvld $xr0, $a1, 0 +; LA32-CONTRACT-OFF-NEXT: xvld $xr1, $a2, 0 +; LA32-CONTRACT-OFF-NEXT: xvld $xr2, $a3, 0 +; LA32-CONTRACT-OFF-NEXT: xvbitrevi.w $xr0, $xr0, 31 +; LA32-CONTRACT-OFF-NEXT: xvfmadd.s $xr0, $xr0, $xr1, $xr2 +; LA32-CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 +; LA32-CONTRACT-OFF-NEXT: ret +; +; LA64-CONTRACT-FAST-LABEL: not_contract_xvfnmsub_s: +; LA64-CONTRACT-FAST: # %bb.0: # %entry +; LA64-CONTRACT-FAST-NEXT: xvld $xr0, $a1, 0 +; LA64-CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 +; LA64-CONTRACT-FAST-NEXT: xvld $xr2, $a3, 0 +; LA64-CONTRACT-FAST-NEXT: xvbitrevi.w $xr0, $xr0, 31 +; LA64-CONTRACT-FAST-NEXT: xvfmadd.s $xr0, $xr0, $xr1, $xr2 +; LA64-CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 +; LA64-CONTRACT-FAST-NEXT: ret +; +; LA64-CONTRACT-ON-LABEL: not_contract_xvfnmsub_s: +; LA64-CONTRACT-ON: # %bb.0: # %entry +; LA64-CONTRACT-ON-NEXT: xvld $xr0, $a1, 0 +; LA64-CONTRACT-ON-NEXT: xvld $xr1, $a2, 0 +; LA64-CONTRACT-ON-NEXT: xvld $xr2, $a3, 0 +; LA64-CONTRACT-ON-NEXT: xvbitrevi.w $xr0, $xr0, 31 +; LA64-CONTRACT-ON-NEXT: xvfmadd.s $xr0, $xr0, $xr1, $xr2 +; LA64-CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 +; LA64-CONTRACT-ON-NEXT: ret +; +; LA64-CONTRACT-OFF-LABEL: not_contract_xvfnmsub_s: +; LA64-CONTRACT-OFF: # %bb.0: # %entry +; LA64-CONTRACT-OFF-NEXT: xvld $xr0, $a1, 0 +; LA64-CONTRACT-OFF-NEXT: xvld $xr1, $a2, 0 +; LA64-CONTRACT-OFF-NEXT: xvld $xr2, $a3, 0 +; LA64-CONTRACT-OFF-NEXT: xvbitrevi.w $xr0, $xr0, 31 +; LA64-CONTRACT-OFF-NEXT: xvfmadd.s $xr0, $xr0, $xr1, $xr2 +; LA64-CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 +; LA64-CONTRACT-OFF-NEXT: ret entry: %v0 = load <8 x float>, ptr %a0 %v1 = load <8 x float>, ptr %a1 @@ -653,32 +1123,59 @@ entry: } define void @xvfmadd_s_contract(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { -; CONTRACT-FAST-LABEL: xvfmadd_s_contract: -; CONTRACT-FAST: # %bb.0: # %entry -; CONTRACT-FAST-NEXT: xvld $xr0, $a1, 0 -; CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 -; CONTRACT-FAST-NEXT: xvld $xr2, $a3, 0 -; CONTRACT-FAST-NEXT: xvfmadd.s $xr0, $xr0, $xr1, $xr2 -; CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 -; CONTRACT-FAST-NEXT: ret -; -; CONTRACT-ON-LABEL: xvfmadd_s_contract: -; CONTRACT-ON: # %bb.0: # %entry -; CONTRACT-ON-NEXT: xvld $xr0, $a1, 0 -; CONTRACT-ON-NEXT: xvld $xr1, $a2, 0 -; CONTRACT-ON-NEXT: xvld $xr2, $a3, 0 -; CONTRACT-ON-NEXT: xvfmadd.s $xr0, $xr0, $xr1, $xr2 -; CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 -; CONTRACT-ON-NEXT: ret -; -; CONTRACT-OFF-LABEL: xvfmadd_s_contract: -; CONTRACT-OFF: # %bb.0: # %entry -; CONTRACT-OFF-NEXT: xvld $xr0, $a1, 0 -; CONTRACT-OFF-NEXT: xvld $xr1, $a2, 0 -; CONTRACT-OFF-NEXT: xvld $xr2, $a3, 0 -; CONTRACT-OFF-NEXT: xvfmadd.s $xr0, $xr0, $xr1, $xr2 -; CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 -; CONTRACT-OFF-NEXT: ret +; LA32-CONTRACT-FAST-LABEL: xvfmadd_s_contract: +; LA32-CONTRACT-FAST: # %bb.0: # %entry +; LA32-CONTRACT-FAST-NEXT: xvld $xr0, $a1, 0 +; LA32-CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 +; LA32-CONTRACT-FAST-NEXT: xvld $xr2, $a3, 0 +; LA32-CONTRACT-FAST-NEXT: xvfmadd.s $xr0, $xr0, $xr1, $xr2 +; LA32-CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 +; LA32-CONTRACT-FAST-NEXT: ret +; +; LA32-CONTRACT-ON-LABEL: xvfmadd_s_contract: +; LA32-CONTRACT-ON: # %bb.0: # %entry +; LA32-CONTRACT-ON-NEXT: xvld $xr0, $a1, 0 +; LA32-CONTRACT-ON-NEXT: xvld $xr1, $a2, 0 +; LA32-CONTRACT-ON-NEXT: xvld $xr2, $a3, 0 +; LA32-CONTRACT-ON-NEXT: xvfmadd.s $xr0, $xr0, $xr1, $xr2 +; LA32-CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 +; LA32-CONTRACT-ON-NEXT: ret +; +; LA32-CONTRACT-OFF-LABEL: xvfmadd_s_contract: +; LA32-CONTRACT-OFF: # %bb.0: # %entry +; LA32-CONTRACT-OFF-NEXT: xvld $xr0, $a1, 0 +; LA32-CONTRACT-OFF-NEXT: xvld $xr1, $a2, 0 +; LA32-CONTRACT-OFF-NEXT: xvld $xr2, $a3, 0 +; LA32-CONTRACT-OFF-NEXT: xvfmadd.s $xr0, $xr0, $xr1, $xr2 +; LA32-CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 +; LA32-CONTRACT-OFF-NEXT: ret +; +; LA64-CONTRACT-FAST-LABEL: xvfmadd_s_contract: +; LA64-CONTRACT-FAST: # %bb.0: # %entry +; LA64-CONTRACT-FAST-NEXT: xvld $xr0, $a1, 0 +; LA64-CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 +; LA64-CONTRACT-FAST-NEXT: xvld $xr2, $a3, 0 +; LA64-CONTRACT-FAST-NEXT: xvfmadd.s $xr0, $xr0, $xr1, $xr2 +; LA64-CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 +; LA64-CONTRACT-FAST-NEXT: ret +; +; LA64-CONTRACT-ON-LABEL: xvfmadd_s_contract: +; LA64-CONTRACT-ON: # %bb.0: # %entry +; LA64-CONTRACT-ON-NEXT: xvld $xr0, $a1, 0 +; LA64-CONTRACT-ON-NEXT: xvld $xr1, $a2, 0 +; LA64-CONTRACT-ON-NEXT: xvld $xr2, $a3, 0 +; LA64-CONTRACT-ON-NEXT: xvfmadd.s $xr0, $xr0, $xr1, $xr2 +; LA64-CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 +; LA64-CONTRACT-ON-NEXT: ret +; +; LA64-CONTRACT-OFF-LABEL: xvfmadd_s_contract: +; LA64-CONTRACT-OFF: # %bb.0: # %entry +; LA64-CONTRACT-OFF-NEXT: xvld $xr0, $a1, 0 +; LA64-CONTRACT-OFF-NEXT: xvld $xr1, $a2, 0 +; LA64-CONTRACT-OFF-NEXT: xvld $xr2, $a3, 0 +; LA64-CONTRACT-OFF-NEXT: xvfmadd.s $xr0, $xr0, $xr1, $xr2 +; LA64-CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 +; LA64-CONTRACT-OFF-NEXT: ret entry: %v0 = load <8 x float>, ptr %a0 %v1 = load <8 x float>, ptr %a1 @@ -690,32 +1187,59 @@ entry: } define void @xvfmsub_s_contract(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { -; CONTRACT-FAST-LABEL: xvfmsub_s_contract: -; CONTRACT-FAST: # %bb.0: # %entry -; CONTRACT-FAST-NEXT: xvld $xr0, $a1, 0 -; CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 -; CONTRACT-FAST-NEXT: xvld $xr2, $a3, 0 -; CONTRACT-FAST-NEXT: xvfmsub.s $xr0, $xr0, $xr1, $xr2 -; CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 -; CONTRACT-FAST-NEXT: ret -; -; CONTRACT-ON-LABEL: xvfmsub_s_contract: -; CONTRACT-ON: # %bb.0: # %entry -; CONTRACT-ON-NEXT: xvld $xr0, $a1, 0 -; CONTRACT-ON-NEXT: xvld $xr1, $a2, 0 -; CONTRACT-ON-NEXT: xvld $xr2, $a3, 0 -; CONTRACT-ON-NEXT: xvfmsub.s $xr0, $xr0, $xr1, $xr2 -; CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 -; CONTRACT-ON-NEXT: ret -; -; CONTRACT-OFF-LABEL: xvfmsub_s_contract: -; CONTRACT-OFF: # %bb.0: # %entry -; CONTRACT-OFF-NEXT: xvld $xr0, $a1, 0 -; CONTRACT-OFF-NEXT: xvld $xr1, $a2, 0 -; CONTRACT-OFF-NEXT: xvld $xr2, $a3, 0 -; CONTRACT-OFF-NEXT: xvfmsub.s $xr0, $xr0, $xr1, $xr2 -; CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 -; CONTRACT-OFF-NEXT: ret +; LA32-CONTRACT-FAST-LABEL: xvfmsub_s_contract: +; LA32-CONTRACT-FAST: # %bb.0: # %entry +; LA32-CONTRACT-FAST-NEXT: xvld $xr0, $a1, 0 +; LA32-CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 +; LA32-CONTRACT-FAST-NEXT: xvld $xr2, $a3, 0 +; LA32-CONTRACT-FAST-NEXT: xvfmsub.s $xr0, $xr0, $xr1, $xr2 +; LA32-CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 +; LA32-CONTRACT-FAST-NEXT: ret +; +; LA32-CONTRACT-ON-LABEL: xvfmsub_s_contract: +; LA32-CONTRACT-ON: # %bb.0: # %entry +; LA32-CONTRACT-ON-NEXT: xvld $xr0, $a1, 0 +; LA32-CONTRACT-ON-NEXT: xvld $xr1, $a2, 0 +; LA32-CONTRACT-ON-NEXT: xvld $xr2, $a3, 0 +; LA32-CONTRACT-ON-NEXT: xvfmsub.s $xr0, $xr0, $xr1, $xr2 +; LA32-CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 +; LA32-CONTRACT-ON-NEXT: ret +; +; LA32-CONTRACT-OFF-LABEL: xvfmsub_s_contract: +; LA32-CONTRACT-OFF: # %bb.0: # %entry +; LA32-CONTRACT-OFF-NEXT: xvld $xr0, $a1, 0 +; LA32-CONTRACT-OFF-NEXT: xvld $xr1, $a2, 0 +; LA32-CONTRACT-OFF-NEXT: xvld $xr2, $a3, 0 +; LA32-CONTRACT-OFF-NEXT: xvfmsub.s $xr0, $xr0, $xr1, $xr2 +; LA32-CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 +; LA32-CONTRACT-OFF-NEXT: ret +; +; LA64-CONTRACT-FAST-LABEL: xvfmsub_s_contract: +; LA64-CONTRACT-FAST: # %bb.0: # %entry +; LA64-CONTRACT-FAST-NEXT: xvld $xr0, $a1, 0 +; LA64-CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 +; LA64-CONTRACT-FAST-NEXT: xvld $xr2, $a3, 0 +; LA64-CONTRACT-FAST-NEXT: xvfmsub.s $xr0, $xr0, $xr1, $xr2 +; LA64-CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 +; LA64-CONTRACT-FAST-NEXT: ret +; +; LA64-CONTRACT-ON-LABEL: xvfmsub_s_contract: +; LA64-CONTRACT-ON: # %bb.0: # %entry +; LA64-CONTRACT-ON-NEXT: xvld $xr0, $a1, 0 +; LA64-CONTRACT-ON-NEXT: xvld $xr1, $a2, 0 +; LA64-CONTRACT-ON-NEXT: xvld $xr2, $a3, 0 +; LA64-CONTRACT-ON-NEXT: xvfmsub.s $xr0, $xr0, $xr1, $xr2 +; LA64-CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 +; LA64-CONTRACT-ON-NEXT: ret +; +; LA64-CONTRACT-OFF-LABEL: xvfmsub_s_contract: +; LA64-CONTRACT-OFF: # %bb.0: # %entry +; LA64-CONTRACT-OFF-NEXT: xvld $xr0, $a1, 0 +; LA64-CONTRACT-OFF-NEXT: xvld $xr1, $a2, 0 +; LA64-CONTRACT-OFF-NEXT: xvld $xr2, $a3, 0 +; LA64-CONTRACT-OFF-NEXT: xvfmsub.s $xr0, $xr0, $xr1, $xr2 +; LA64-CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 +; LA64-CONTRACT-OFF-NEXT: ret entry: %v0 = load <8 x float>, ptr %a0 %v1 = load <8 x float>, ptr %a1 @@ -727,32 +1251,59 @@ entry: } define void @xvfnmadd_s_contract(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { -; CONTRACT-FAST-LABEL: xvfnmadd_s_contract: -; CONTRACT-FAST: # %bb.0: # %entry -; CONTRACT-FAST-NEXT: xvld $xr0, $a1, 0 -; CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 -; CONTRACT-FAST-NEXT: xvld $xr2, $a3, 0 -; CONTRACT-FAST-NEXT: xvfnmadd.s $xr0, $xr0, $xr1, $xr2 -; CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 -; CONTRACT-FAST-NEXT: ret -; -; CONTRACT-ON-LABEL: xvfnmadd_s_contract: -; CONTRACT-ON: # %bb.0: # %entry -; CONTRACT-ON-NEXT: xvld $xr0, $a1, 0 -; CONTRACT-ON-NEXT: xvld $xr1, $a2, 0 -; CONTRACT-ON-NEXT: xvld $xr2, $a3, 0 -; CONTRACT-ON-NEXT: xvfnmadd.s $xr0, $xr0, $xr1, $xr2 -; CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 -; CONTRACT-ON-NEXT: ret -; -; CONTRACT-OFF-LABEL: xvfnmadd_s_contract: -; CONTRACT-OFF: # %bb.0: # %entry -; CONTRACT-OFF-NEXT: xvld $xr0, $a1, 0 -; CONTRACT-OFF-NEXT: xvld $xr1, $a2, 0 -; CONTRACT-OFF-NEXT: xvld $xr2, $a3, 0 -; CONTRACT-OFF-NEXT: xvfnmadd.s $xr0, $xr0, $xr1, $xr2 -; CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 -; CONTRACT-OFF-NEXT: ret +; LA32-CONTRACT-FAST-LABEL: xvfnmadd_s_contract: +; LA32-CONTRACT-FAST: # %bb.0: # %entry +; LA32-CONTRACT-FAST-NEXT: xvld $xr0, $a1, 0 +; LA32-CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 +; LA32-CONTRACT-FAST-NEXT: xvld $xr2, $a3, 0 +; LA32-CONTRACT-FAST-NEXT: xvfnmadd.s $xr0, $xr0, $xr1, $xr2 +; LA32-CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 +; LA32-CONTRACT-FAST-NEXT: ret +; +; LA32-CONTRACT-ON-LABEL: xvfnmadd_s_contract: +; LA32-CONTRACT-ON: # %bb.0: # %entry +; LA32-CONTRACT-ON-NEXT: xvld $xr0, $a1, 0 +; LA32-CONTRACT-ON-NEXT: xvld $xr1, $a2, 0 +; LA32-CONTRACT-ON-NEXT: xvld $xr2, $a3, 0 +; LA32-CONTRACT-ON-NEXT: xvfnmadd.s $xr0, $xr0, $xr1, $xr2 +; LA32-CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 +; LA32-CONTRACT-ON-NEXT: ret +; +; LA32-CONTRACT-OFF-LABEL: xvfnmadd_s_contract: +; LA32-CONTRACT-OFF: # %bb.0: # %entry +; LA32-CONTRACT-OFF-NEXT: xvld $xr0, $a1, 0 +; LA32-CONTRACT-OFF-NEXT: xvld $xr1, $a2, 0 +; LA32-CONTRACT-OFF-NEXT: xvld $xr2, $a3, 0 +; LA32-CONTRACT-OFF-NEXT: xvfnmadd.s $xr0, $xr0, $xr1, $xr2 +; LA32-CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 +; LA32-CONTRACT-OFF-NEXT: ret +; +; LA64-CONTRACT-FAST-LABEL: xvfnmadd_s_contract: +; LA64-CONTRACT-FAST: # %bb.0: # %entry +; LA64-CONTRACT-FAST-NEXT: xvld $xr0, $a1, 0 +; LA64-CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 +; LA64-CONTRACT-FAST-NEXT: xvld $xr2, $a3, 0 +; LA64-CONTRACT-FAST-NEXT: xvfnmadd.s $xr0, $xr0, $xr1, $xr2 +; LA64-CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 +; LA64-CONTRACT-FAST-NEXT: ret +; +; LA64-CONTRACT-ON-LABEL: xvfnmadd_s_contract: +; LA64-CONTRACT-ON: # %bb.0: # %entry +; LA64-CONTRACT-ON-NEXT: xvld $xr0, $a1, 0 +; LA64-CONTRACT-ON-NEXT: xvld $xr1, $a2, 0 +; LA64-CONTRACT-ON-NEXT: xvld $xr2, $a3, 0 +; LA64-CONTRACT-ON-NEXT: xvfnmadd.s $xr0, $xr0, $xr1, $xr2 +; LA64-CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 +; LA64-CONTRACT-ON-NEXT: ret +; +; LA64-CONTRACT-OFF-LABEL: xvfnmadd_s_contract: +; LA64-CONTRACT-OFF: # %bb.0: # %entry +; LA64-CONTRACT-OFF-NEXT: xvld $xr0, $a1, 0 +; LA64-CONTRACT-OFF-NEXT: xvld $xr1, $a2, 0 +; LA64-CONTRACT-OFF-NEXT: xvld $xr2, $a3, 0 +; LA64-CONTRACT-OFF-NEXT: xvfnmadd.s $xr0, $xr0, $xr1, $xr2 +; LA64-CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 +; LA64-CONTRACT-OFF-NEXT: ret entry: %v0 = load <8 x float>, ptr %a0 %v1 = load <8 x float>, ptr %a1 @@ -765,32 +1316,59 @@ entry: } define void @xvfnmsub_s_contract(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { -; CONTRACT-FAST-LABEL: xvfnmsub_s_contract: -; CONTRACT-FAST: # %bb.0: # %entry -; CONTRACT-FAST-NEXT: xvld $xr0, $a1, 0 -; CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 -; CONTRACT-FAST-NEXT: xvld $xr2, $a3, 0 -; CONTRACT-FAST-NEXT: xvfnmsub.s $xr0, $xr0, $xr1, $xr2 -; CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 -; CONTRACT-FAST-NEXT: ret -; -; CONTRACT-ON-LABEL: xvfnmsub_s_contract: -; CONTRACT-ON: # %bb.0: # %entry -; CONTRACT-ON-NEXT: xvld $xr0, $a1, 0 -; CONTRACT-ON-NEXT: xvld $xr1, $a2, 0 -; CONTRACT-ON-NEXT: xvld $xr2, $a3, 0 -; CONTRACT-ON-NEXT: xvfnmsub.s $xr0, $xr0, $xr1, $xr2 -; CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 -; CONTRACT-ON-NEXT: ret -; -; CONTRACT-OFF-LABEL: xvfnmsub_s_contract: -; CONTRACT-OFF: # %bb.0: # %entry -; CONTRACT-OFF-NEXT: xvld $xr0, $a1, 0 -; CONTRACT-OFF-NEXT: xvld $xr1, $a2, 0 -; CONTRACT-OFF-NEXT: xvld $xr2, $a3, 0 -; CONTRACT-OFF-NEXT: xvfnmsub.s $xr0, $xr0, $xr1, $xr2 -; CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 -; CONTRACT-OFF-NEXT: ret +; LA32-CONTRACT-FAST-LABEL: xvfnmsub_s_contract: +; LA32-CONTRACT-FAST: # %bb.0: # %entry +; LA32-CONTRACT-FAST-NEXT: xvld $xr0, $a1, 0 +; LA32-CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 +; LA32-CONTRACT-FAST-NEXT: xvld $xr2, $a3, 0 +; LA32-CONTRACT-FAST-NEXT: xvfnmsub.s $xr0, $xr0, $xr1, $xr2 +; LA32-CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 +; LA32-CONTRACT-FAST-NEXT: ret +; +; LA32-CONTRACT-ON-LABEL: xvfnmsub_s_contract: +; LA32-CONTRACT-ON: # %bb.0: # %entry +; LA32-CONTRACT-ON-NEXT: xvld $xr0, $a1, 0 +; LA32-CONTRACT-ON-NEXT: xvld $xr1, $a2, 0 +; LA32-CONTRACT-ON-NEXT: xvld $xr2, $a3, 0 +; LA32-CONTRACT-ON-NEXT: xvfnmsub.s $xr0, $xr0, $xr1, $xr2 +; LA32-CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 +; LA32-CONTRACT-ON-NEXT: ret +; +; LA32-CONTRACT-OFF-LABEL: xvfnmsub_s_contract: +; LA32-CONTRACT-OFF: # %bb.0: # %entry +; LA32-CONTRACT-OFF-NEXT: xvld $xr0, $a1, 0 +; LA32-CONTRACT-OFF-NEXT: xvld $xr1, $a2, 0 +; LA32-CONTRACT-OFF-NEXT: xvld $xr2, $a3, 0 +; LA32-CONTRACT-OFF-NEXT: xvfnmsub.s $xr0, $xr0, $xr1, $xr2 +; LA32-CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 +; LA32-CONTRACT-OFF-NEXT: ret +; +; LA64-CONTRACT-FAST-LABEL: xvfnmsub_s_contract: +; LA64-CONTRACT-FAST: # %bb.0: # %entry +; LA64-CONTRACT-FAST-NEXT: xvld $xr0, $a1, 0 +; LA64-CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 +; LA64-CONTRACT-FAST-NEXT: xvld $xr2, $a3, 0 +; LA64-CONTRACT-FAST-NEXT: xvfnmsub.s $xr0, $xr0, $xr1, $xr2 +; LA64-CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 +; LA64-CONTRACT-FAST-NEXT: ret +; +; LA64-CONTRACT-ON-LABEL: xvfnmsub_s_contract: +; LA64-CONTRACT-ON: # %bb.0: # %entry +; LA64-CONTRACT-ON-NEXT: xvld $xr0, $a1, 0 +; LA64-CONTRACT-ON-NEXT: xvld $xr1, $a2, 0 +; LA64-CONTRACT-ON-NEXT: xvld $xr2, $a3, 0 +; LA64-CONTRACT-ON-NEXT: xvfnmsub.s $xr0, $xr0, $xr1, $xr2 +; LA64-CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 +; LA64-CONTRACT-ON-NEXT: ret +; +; LA64-CONTRACT-OFF-LABEL: xvfnmsub_s_contract: +; LA64-CONTRACT-OFF: # %bb.0: # %entry +; LA64-CONTRACT-OFF-NEXT: xvld $xr0, $a1, 0 +; LA64-CONTRACT-OFF-NEXT: xvld $xr1, $a2, 0 +; LA64-CONTRACT-OFF-NEXT: xvld $xr2, $a3, 0 +; LA64-CONTRACT-OFF-NEXT: xvfnmsub.s $xr0, $xr0, $xr1, $xr2 +; LA64-CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 +; LA64-CONTRACT-OFF-NEXT: ret entry: %v0 = load <8 x float>, ptr %a0 %v1 = load <8 x float>, ptr %a1 diff --git a/llvm/test/CodeGen/LoongArch/lasx/fsqrt-reciprocal-estimate.ll b/llvm/test/CodeGen/LoongArch/lasx/fsqrt-reciprocal-estimate.ll index 48fd12697417..4e475daa8ced 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/fsqrt-reciprocal-estimate.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/fsqrt-reciprocal-estimate.ll @@ -1,31 +1,114 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc --mtriple=loongarch64 --mattr=+lasx,-frecipe < %s | FileCheck %s --check-prefix=FAULT -; RUN: llc --mtriple=loongarch64 --mattr=+lasx,+frecipe < %s | FileCheck %s +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx,-frecipe < %s | FileCheck %s --check-prefix=FAULT-LA32 +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx,+frecipe < %s | FileCheck %s --check-prefix=LA32 +; RUN: llc --mtriple=loongarch64 --mattr=+lasx,-frecipe < %s | FileCheck %s --check-prefix=FAULT-LA64 +; RUN: llc --mtriple=loongarch64 --mattr=+lasx,+frecipe < %s | FileCheck %s --check-prefix=LA64 ;; 1.0 / (fsqrt vec) define void @one_div_sqrt_v8f32(ptr %res, ptr %a0) nounwind { -; FAULT-LABEL: one_div_sqrt_v8f32: -; FAULT: # %bb.0: # %entry -; FAULT-NEXT: xvld $xr0, $a1, 0 -; FAULT-NEXT: xvfrsqrt.s $xr0, $xr0 -; FAULT-NEXT: xvst $xr0, $a0, 0 -; FAULT-NEXT: ret +; FAULT-LA32-LABEL: one_div_sqrt_v8f32: +; FAULT-LA32: # %bb.0: # %entry +; FAULT-LA32-NEXT: addi.w $sp, $sp, -128 +; FAULT-LA32-NEXT: st.w $ra, $sp, 124 # 4-byte Folded Spill +; FAULT-LA32-NEXT: st.w $fp, $sp, 120 # 4-byte Folded Spill +; FAULT-LA32-NEXT: addi.w $fp, $sp, 128 +; FAULT-LA32-NEXT: bstrins.w $sp, $zero, 4, 0 +; FAULT-LA32-NEXT: vld $vr0, $a1, 16 +; FAULT-LA32-NEXT: vst $vr0, $sp, 48 +; FAULT-LA32-NEXT: ld.w $a2, $a1, 12 +; FAULT-LA32-NEXT: st.w $a2, $sp, 44 +; FAULT-LA32-NEXT: ld.w $a2, $a1, 8 +; FAULT-LA32-NEXT: st.w $a2, $sp, 40 +; FAULT-LA32-NEXT: ld.w $a2, $a1, 4 +; FAULT-LA32-NEXT: st.w $a2, $sp, 36 +; FAULT-LA32-NEXT: ld.w $a1, $a1, 0 +; FAULT-LA32-NEXT: st.w $a1, $sp, 32 +; FAULT-LA32-NEXT: xvld $xr0, $sp, 32 +; FAULT-LA32-NEXT: xvfrsqrt.s $xr0, $xr0 +; FAULT-LA32-NEXT: xvst $xr0, $sp, 64 +; FAULT-LA32-NEXT: vld $vr0, $sp, 80 +; FAULT-LA32-NEXT: vst $vr0, $a0, 16 +; FAULT-LA32-NEXT: ld.w $a1, $sp, 76 +; FAULT-LA32-NEXT: st.w $a1, $a0, 12 +; FAULT-LA32-NEXT: ld.w $a1, $sp, 72 +; FAULT-LA32-NEXT: st.w $a1, $a0, 8 +; FAULT-LA32-NEXT: ld.w $a1, $sp, 68 +; FAULT-LA32-NEXT: st.w $a1, $a0, 4 +; FAULT-LA32-NEXT: ld.w $a1, $sp, 64 +; FAULT-LA32-NEXT: st.w $a1, $a0, 0 +; FAULT-LA32-NEXT: addi.w $sp, $fp, -128 +; FAULT-LA32-NEXT: ld.w $fp, $sp, 120 # 4-byte Folded Reload +; FAULT-LA32-NEXT: ld.w $ra, $sp, 124 # 4-byte Folded Reload +; FAULT-LA32-NEXT: addi.w $sp, $sp, 128 +; FAULT-LA32-NEXT: ret ; -; CHECK-LABEL: one_div_sqrt_v8f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: xvld $xr0, $a1, 0 -; CHECK-NEXT: xvfrsqrte.s $xr1, $xr0 -; CHECK-NEXT: xvfmul.s $xr1, $xr0, $xr1 -; CHECK-NEXT: xvfmul.s $xr0, $xr0, $xr1 -; CHECK-NEXT: lu12i.w $a1, -261120 -; CHECK-NEXT: xvreplgr2vr.w $xr2, $a1 -; CHECK-NEXT: xvfmadd.s $xr0, $xr0, $xr1, $xr2 -; CHECK-NEXT: lu12i.w $a1, -266240 -; CHECK-NEXT: xvreplgr2vr.w $xr2, $a1 -; CHECK-NEXT: xvfmul.s $xr1, $xr1, $xr2 -; CHECK-NEXT: xvfmul.s $xr0, $xr1, $xr0 -; CHECK-NEXT: xvst $xr0, $a0, 0 -; CHECK-NEXT: ret +; LA32-LABEL: one_div_sqrt_v8f32: +; LA32: # %bb.0: # %entry +; LA32-NEXT: addi.w $sp, $sp, -128 +; LA32-NEXT: st.w $ra, $sp, 124 # 4-byte Folded Spill +; LA32-NEXT: st.w $fp, $sp, 120 # 4-byte Folded Spill +; LA32-NEXT: addi.w $fp, $sp, 128 +; LA32-NEXT: bstrins.w $sp, $zero, 4, 0 +; LA32-NEXT: vld $vr0, $a1, 16 +; LA32-NEXT: vst $vr0, $sp, 48 +; LA32-NEXT: ld.w $a2, $a1, 12 +; LA32-NEXT: st.w $a2, $sp, 44 +; LA32-NEXT: ld.w $a2, $a1, 8 +; LA32-NEXT: st.w $a2, $sp, 40 +; LA32-NEXT: ld.w $a2, $a1, 4 +; LA32-NEXT: st.w $a2, $sp, 36 +; LA32-NEXT: ld.w $a1, $a1, 0 +; LA32-NEXT: st.w $a1, $sp, 32 +; LA32-NEXT: xvld $xr0, $sp, 32 +; LA32-NEXT: xvfrsqrte.s $xr1, $xr0 +; LA32-NEXT: xvfmul.s $xr1, $xr0, $xr1 +; LA32-NEXT: xvfmul.s $xr0, $xr0, $xr1 +; LA32-NEXT: lu12i.w $a1, -261120 +; LA32-NEXT: xvreplgr2vr.w $xr2, $a1 +; LA32-NEXT: xvfmadd.s $xr0, $xr0, $xr1, $xr2 +; LA32-NEXT: lu12i.w $a1, -266240 +; LA32-NEXT: xvreplgr2vr.w $xr2, $a1 +; LA32-NEXT: xvfmul.s $xr1, $xr1, $xr2 +; LA32-NEXT: xvfmul.s $xr0, $xr1, $xr0 +; LA32-NEXT: xvst $xr0, $sp, 64 +; LA32-NEXT: vld $vr0, $sp, 80 +; LA32-NEXT: vst $vr0, $a0, 16 +; LA32-NEXT: ld.w $a1, $sp, 76 +; LA32-NEXT: st.w $a1, $a0, 12 +; LA32-NEXT: ld.w $a1, $sp, 72 +; LA32-NEXT: st.w $a1, $a0, 8 +; LA32-NEXT: ld.w $a1, $sp, 68 +; LA32-NEXT: st.w $a1, $a0, 4 +; LA32-NEXT: ld.w $a1, $sp, 64 +; LA32-NEXT: st.w $a1, $a0, 0 +; LA32-NEXT: addi.w $sp, $fp, -128 +; LA32-NEXT: ld.w $fp, $sp, 120 # 4-byte Folded Reload +; LA32-NEXT: ld.w $ra, $sp, 124 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 128 +; LA32-NEXT: ret +; +; FAULT-LA64-LABEL: one_div_sqrt_v8f32: +; FAULT-LA64: # %bb.0: # %entry +; FAULT-LA64-NEXT: xvld $xr0, $a1, 0 +; FAULT-LA64-NEXT: xvfrsqrt.s $xr0, $xr0 +; FAULT-LA64-NEXT: xvst $xr0, $a0, 0 +; FAULT-LA64-NEXT: ret +; +; LA64-LABEL: one_div_sqrt_v8f32: +; LA64: # %bb.0: # %entry +; LA64-NEXT: xvld $xr0, $a1, 0 +; LA64-NEXT: xvfrsqrte.s $xr1, $xr0 +; LA64-NEXT: xvfmul.s $xr1, $xr0, $xr1 +; LA64-NEXT: xvfmul.s $xr0, $xr0, $xr1 +; LA64-NEXT: lu12i.w $a1, -261120 +; LA64-NEXT: xvreplgr2vr.w $xr2, $a1 +; LA64-NEXT: xvfmadd.s $xr0, $xr0, $xr1, $xr2 +; LA64-NEXT: lu12i.w $a1, -266240 +; LA64-NEXT: xvreplgr2vr.w $xr2, $a1 +; LA64-NEXT: xvfmul.s $xr1, $xr1, $xr2 +; LA64-NEXT: xvfmul.s $xr0, $xr1, $xr0 +; LA64-NEXT: xvst $xr0, $a0, 0 +; LA64-NEXT: ret entry: %v0 = load <8 x float>, ptr %a0, align 16 %sqrt = call fast <8 x float> @llvm.sqrt.v8f32 (<8 x float> %v0) @@ -35,34 +118,122 @@ entry: } define void @one_div_sqrt_v4f64(ptr %res, ptr %a0) nounwind { -; FAULT-LABEL: one_div_sqrt_v4f64: -; FAULT: # %bb.0: # %entry -; FAULT-NEXT: xvld $xr0, $a1, 0 -; FAULT-NEXT: xvfrsqrt.d $xr0, $xr0 -; FAULT-NEXT: xvst $xr0, $a0, 0 -; FAULT-NEXT: ret +; FAULT-LA32-LABEL: one_div_sqrt_v4f64: +; FAULT-LA32: # %bb.0: # %entry +; FAULT-LA32-NEXT: addi.w $sp, $sp, -128 +; FAULT-LA32-NEXT: st.w $ra, $sp, 124 # 4-byte Folded Spill +; FAULT-LA32-NEXT: st.w $fp, $sp, 120 # 4-byte Folded Spill +; FAULT-LA32-NEXT: addi.w $fp, $sp, 128 +; FAULT-LA32-NEXT: bstrins.w $sp, $zero, 4, 0 +; FAULT-LA32-NEXT: vld $vr0, $a1, 16 +; FAULT-LA32-NEXT: vst $vr0, $sp, 48 +; FAULT-LA32-NEXT: ld.w $a2, $a1, 12 +; FAULT-LA32-NEXT: st.w $a2, $sp, 44 +; FAULT-LA32-NEXT: ld.w $a2, $a1, 8 +; FAULT-LA32-NEXT: st.w $a2, $sp, 40 +; FAULT-LA32-NEXT: ld.w $a2, $a1, 4 +; FAULT-LA32-NEXT: st.w $a2, $sp, 36 +; FAULT-LA32-NEXT: ld.w $a1, $a1, 0 +; FAULT-LA32-NEXT: st.w $a1, $sp, 32 +; FAULT-LA32-NEXT: xvld $xr0, $sp, 32 +; FAULT-LA32-NEXT: pcalau12i $a1, %pc_hi20(.LCPI1_0) +; FAULT-LA32-NEXT: xvld $xr1, $a1, %pc_lo12(.LCPI1_0) +; FAULT-LA32-NEXT: xvfsqrt.d $xr0, $xr0 +; FAULT-LA32-NEXT: xvfdiv.d $xr0, $xr1, $xr0 +; FAULT-LA32-NEXT: xvst $xr0, $sp, 64 +; FAULT-LA32-NEXT: vld $vr0, $sp, 80 +; FAULT-LA32-NEXT: vst $vr0, $a0, 16 +; FAULT-LA32-NEXT: ld.w $a1, $sp, 76 +; FAULT-LA32-NEXT: st.w $a1, $a0, 12 +; FAULT-LA32-NEXT: ld.w $a1, $sp, 72 +; FAULT-LA32-NEXT: st.w $a1, $a0, 8 +; FAULT-LA32-NEXT: ld.w $a1, $sp, 68 +; FAULT-LA32-NEXT: st.w $a1, $a0, 4 +; FAULT-LA32-NEXT: ld.w $a1, $sp, 64 +; FAULT-LA32-NEXT: st.w $a1, $a0, 0 +; FAULT-LA32-NEXT: addi.w $sp, $fp, -128 +; FAULT-LA32-NEXT: ld.w $fp, $sp, 120 # 4-byte Folded Reload +; FAULT-LA32-NEXT: ld.w $ra, $sp, 124 # 4-byte Folded Reload +; FAULT-LA32-NEXT: addi.w $sp, $sp, 128 +; FAULT-LA32-NEXT: ret +; +; LA32-LABEL: one_div_sqrt_v4f64: +; LA32: # %bb.0: # %entry +; LA32-NEXT: addi.w $sp, $sp, -128 +; LA32-NEXT: st.w $ra, $sp, 124 # 4-byte Folded Spill +; LA32-NEXT: st.w $fp, $sp, 120 # 4-byte Folded Spill +; LA32-NEXT: addi.w $fp, $sp, 128 +; LA32-NEXT: bstrins.w $sp, $zero, 4, 0 +; LA32-NEXT: vld $vr0, $a1, 16 +; LA32-NEXT: vst $vr0, $sp, 48 +; LA32-NEXT: ld.w $a2, $a1, 12 +; LA32-NEXT: st.w $a2, $sp, 44 +; LA32-NEXT: ld.w $a2, $a1, 8 +; LA32-NEXT: st.w $a2, $sp, 40 +; LA32-NEXT: ld.w $a2, $a1, 4 +; LA32-NEXT: st.w $a2, $sp, 36 +; LA32-NEXT: ld.w $a1, $a1, 0 +; LA32-NEXT: st.w $a1, $sp, 32 +; LA32-NEXT: xvld $xr0, $sp, 32 +; LA32-NEXT: xvfrsqrte.d $xr1, $xr0 +; LA32-NEXT: pcalau12i $a1, %pc_hi20(.LCPI1_0) +; LA32-NEXT: xvld $xr2, $a1, %pc_lo12(.LCPI1_0) +; LA32-NEXT: pcalau12i $a1, %pc_hi20(.LCPI1_1) +; LA32-NEXT: xvld $xr3, $a1, %pc_lo12(.LCPI1_1) +; LA32-NEXT: xvfmul.d $xr1, $xr0, $xr1 +; LA32-NEXT: xvfmul.d $xr4, $xr0, $xr1 +; LA32-NEXT: xvfmadd.d $xr4, $xr4, $xr1, $xr2 +; LA32-NEXT: xvfmul.d $xr1, $xr1, $xr3 +; LA32-NEXT: xvfmul.d $xr1, $xr1, $xr4 +; LA32-NEXT: xvfmul.d $xr0, $xr0, $xr1 +; LA32-NEXT: xvfmadd.d $xr0, $xr0, $xr1, $xr2 +; LA32-NEXT: xvfmul.d $xr1, $xr1, $xr3 +; LA32-NEXT: xvfmul.d $xr0, $xr1, $xr0 +; LA32-NEXT: xvst $xr0, $sp, 64 +; LA32-NEXT: vld $vr0, $sp, 80 +; LA32-NEXT: vst $vr0, $a0, 16 +; LA32-NEXT: ld.w $a1, $sp, 76 +; LA32-NEXT: st.w $a1, $a0, 12 +; LA32-NEXT: ld.w $a1, $sp, 72 +; LA32-NEXT: st.w $a1, $a0, 8 +; LA32-NEXT: ld.w $a1, $sp, 68 +; LA32-NEXT: st.w $a1, $a0, 4 +; LA32-NEXT: ld.w $a1, $sp, 64 +; LA32-NEXT: st.w $a1, $a0, 0 +; LA32-NEXT: addi.w $sp, $fp, -128 +; LA32-NEXT: ld.w $fp, $sp, 120 # 4-byte Folded Reload +; LA32-NEXT: ld.w $ra, $sp, 124 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 128 +; LA32-NEXT: ret +; +; FAULT-LA64-LABEL: one_div_sqrt_v4f64: +; FAULT-LA64: # %bb.0: # %entry +; FAULT-LA64-NEXT: xvld $xr0, $a1, 0 +; FAULT-LA64-NEXT: xvfrsqrt.d $xr0, $xr0 +; FAULT-LA64-NEXT: xvst $xr0, $a0, 0 +; FAULT-LA64-NEXT: ret ; -; CHECK-LABEL: one_div_sqrt_v4f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: xvld $xr0, $a1, 0 -; CHECK-NEXT: xvfrsqrte.d $xr1, $xr0 -; CHECK-NEXT: xvfmul.d $xr1, $xr0, $xr1 -; CHECK-NEXT: xvfmul.d $xr2, $xr0, $xr1 -; CHECK-NEXT: ori $a1, $zero, 0 -; CHECK-NEXT: lu32i.d $a1, -524288 -; CHECK-NEXT: lu52i.d $a1, $a1, -1024 -; CHECK-NEXT: xvreplgr2vr.d $xr3, $a1 -; CHECK-NEXT: xvfmadd.d $xr2, $xr2, $xr1, $xr3 -; CHECK-NEXT: lu52i.d $a1, $zero, -1026 -; CHECK-NEXT: xvreplgr2vr.d $xr4, $a1 -; CHECK-NEXT: xvfmul.d $xr1, $xr1, $xr4 -; CHECK-NEXT: xvfmul.d $xr1, $xr1, $xr2 -; CHECK-NEXT: xvfmul.d $xr0, $xr0, $xr1 -; CHECK-NEXT: xvfmadd.d $xr0, $xr0, $xr1, $xr3 -; CHECK-NEXT: xvfmul.d $xr1, $xr1, $xr4 -; CHECK-NEXT: xvfmul.d $xr0, $xr1, $xr0 -; CHECK-NEXT: xvst $xr0, $a0, 0 -; CHECK-NEXT: ret +; LA64-LABEL: one_div_sqrt_v4f64: +; LA64: # %bb.0: # %entry +; LA64-NEXT: xvld $xr0, $a1, 0 +; LA64-NEXT: xvfrsqrte.d $xr1, $xr0 +; LA64-NEXT: xvfmul.d $xr1, $xr0, $xr1 +; LA64-NEXT: xvfmul.d $xr2, $xr0, $xr1 +; LA64-NEXT: ori $a1, $zero, 0 +; LA64-NEXT: lu32i.d $a1, -524288 +; LA64-NEXT: lu52i.d $a1, $a1, -1024 +; LA64-NEXT: xvreplgr2vr.d $xr3, $a1 +; LA64-NEXT: xvfmadd.d $xr2, $xr2, $xr1, $xr3 +; LA64-NEXT: lu52i.d $a1, $zero, -1026 +; LA64-NEXT: xvreplgr2vr.d $xr4, $a1 +; LA64-NEXT: xvfmul.d $xr1, $xr1, $xr4 +; LA64-NEXT: xvfmul.d $xr1, $xr1, $xr2 +; LA64-NEXT: xvfmul.d $xr0, $xr0, $xr1 +; LA64-NEXT: xvfmadd.d $xr0, $xr0, $xr1, $xr3 +; LA64-NEXT: xvfmul.d $xr1, $xr1, $xr4 +; LA64-NEXT: xvfmul.d $xr0, $xr1, $xr0 +; LA64-NEXT: xvst $xr0, $a0, 0 +; LA64-NEXT: ret entry: %v0 = load <4 x double>, ptr %a0, align 16 %sqrt = call fast <4 x double> @llvm.sqrt.v4f64 (<4 x double> %v0) diff --git a/llvm/test/CodeGen/LoongArch/lasx/fsqrt.ll b/llvm/test/CodeGen/LoongArch/lasx/fsqrt.ll index c4a881bdeae9..f8a3284f04dc 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/fsqrt.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/fsqrt.ll @@ -1,14 +1,51 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s --check-prefix=LA32 +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s --check-prefix=LA64 ;; fsqrt define void @sqrt_v8f32(ptr %res, ptr %a0) nounwind { -; CHECK-LABEL: sqrt_v8f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: xvld $xr0, $a1, 0 -; CHECK-NEXT: xvfsqrt.s $xr0, $xr0 -; CHECK-NEXT: xvst $xr0, $a0, 0 -; CHECK-NEXT: ret +; LA32-LABEL: sqrt_v8f32: +; LA32: # %bb.0: # %entry +; LA32-NEXT: addi.w $sp, $sp, -128 +; LA32-NEXT: st.w $ra, $sp, 124 # 4-byte Folded Spill +; LA32-NEXT: st.w $fp, $sp, 120 # 4-byte Folded Spill +; LA32-NEXT: addi.w $fp, $sp, 128 +; LA32-NEXT: bstrins.w $sp, $zero, 4, 0 +; LA32-NEXT: vld $vr0, $a1, 16 +; LA32-NEXT: vst $vr0, $sp, 48 +; LA32-NEXT: ld.w $a2, $a1, 12 +; LA32-NEXT: st.w $a2, $sp, 44 +; LA32-NEXT: ld.w $a2, $a1, 8 +; LA32-NEXT: st.w $a2, $sp, 40 +; LA32-NEXT: ld.w $a2, $a1, 4 +; LA32-NEXT: st.w $a2, $sp, 36 +; LA32-NEXT: ld.w $a1, $a1, 0 +; LA32-NEXT: st.w $a1, $sp, 32 +; LA32-NEXT: xvld $xr0, $sp, 32 +; LA32-NEXT: xvfsqrt.s $xr0, $xr0 +; LA32-NEXT: xvst $xr0, $sp, 64 +; LA32-NEXT: vld $vr0, $sp, 80 +; LA32-NEXT: vst $vr0, $a0, 16 +; LA32-NEXT: ld.w $a1, $sp, 76 +; LA32-NEXT: st.w $a1, $a0, 12 +; LA32-NEXT: ld.w $a1, $sp, 72 +; LA32-NEXT: st.w $a1, $a0, 8 +; LA32-NEXT: ld.w $a1, $sp, 68 +; LA32-NEXT: st.w $a1, $a0, 4 +; LA32-NEXT: ld.w $a1, $sp, 64 +; LA32-NEXT: st.w $a1, $a0, 0 +; LA32-NEXT: addi.w $sp, $fp, -128 +; LA32-NEXT: ld.w $fp, $sp, 120 # 4-byte Folded Reload +; LA32-NEXT: ld.w $ra, $sp, 124 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 128 +; LA32-NEXT: ret +; +; LA64-LABEL: sqrt_v8f32: +; LA64: # %bb.0: # %entry +; LA64-NEXT: xvld $xr0, $a1, 0 +; LA64-NEXT: xvfsqrt.s $xr0, $xr0 +; LA64-NEXT: xvst $xr0, $a0, 0 +; LA64-NEXT: ret entry: %v0 = load <8 x float>, ptr %a0, align 16 %sqrt = call <8 x float> @llvm.sqrt.v8f32 (<8 x float> %v0) @@ -17,12 +54,48 @@ entry: } define void @sqrt_v4f64(ptr %res, ptr %a0) nounwind { -; CHECK-LABEL: sqrt_v4f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: xvld $xr0, $a1, 0 -; CHECK-NEXT: xvfsqrt.d $xr0, $xr0 -; CHECK-NEXT: xvst $xr0, $a0, 0 -; CHECK-NEXT: ret +; LA32-LABEL: sqrt_v4f64: +; LA32: # %bb.0: # %entry +; LA32-NEXT: addi.w $sp, $sp, -128 +; LA32-NEXT: st.w $ra, $sp, 124 # 4-byte Folded Spill +; LA32-NEXT: st.w $fp, $sp, 120 # 4-byte Folded Spill +; LA32-NEXT: addi.w $fp, $sp, 128 +; LA32-NEXT: bstrins.w $sp, $zero, 4, 0 +; LA32-NEXT: vld $vr0, $a1, 16 +; LA32-NEXT: vst $vr0, $sp, 48 +; LA32-NEXT: ld.w $a2, $a1, 12 +; LA32-NEXT: st.w $a2, $sp, 44 +; LA32-NEXT: ld.w $a2, $a1, 8 +; LA32-NEXT: st.w $a2, $sp, 40 +; LA32-NEXT: ld.w $a2, $a1, 4 +; LA32-NEXT: st.w $a2, $sp, 36 +; LA32-NEXT: ld.w $a1, $a1, 0 +; LA32-NEXT: st.w $a1, $sp, 32 +; LA32-NEXT: xvld $xr0, $sp, 32 +; LA32-NEXT: xvfsqrt.d $xr0, $xr0 +; LA32-NEXT: xvst $xr0, $sp, 64 +; LA32-NEXT: vld $vr0, $sp, 80 +; LA32-NEXT: vst $vr0, $a0, 16 +; LA32-NEXT: ld.w $a1, $sp, 76 +; LA32-NEXT: st.w $a1, $a0, 12 +; LA32-NEXT: ld.w $a1, $sp, 72 +; LA32-NEXT: st.w $a1, $a0, 8 +; LA32-NEXT: ld.w $a1, $sp, 68 +; LA32-NEXT: st.w $a1, $a0, 4 +; LA32-NEXT: ld.w $a1, $sp, 64 +; LA32-NEXT: st.w $a1, $a0, 0 +; LA32-NEXT: addi.w $sp, $fp, -128 +; LA32-NEXT: ld.w $fp, $sp, 120 # 4-byte Folded Reload +; LA32-NEXT: ld.w $ra, $sp, 124 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 128 +; LA32-NEXT: ret +; +; LA64-LABEL: sqrt_v4f64: +; LA64: # %bb.0: # %entry +; LA64-NEXT: xvld $xr0, $a1, 0 +; LA64-NEXT: xvfsqrt.d $xr0, $xr0 +; LA64-NEXT: xvst $xr0, $a0, 0 +; LA64-NEXT: ret entry: %v0 = load <4 x double>, ptr %a0, align 16 %sqrt = call <4 x double> @llvm.sqrt.v4f64 (<4 x double> %v0) @@ -32,12 +105,48 @@ entry: ;; 1.0 / (fsqrt vec) define void @one_div_sqrt_v8f32(ptr %res, ptr %a0) nounwind { -; CHECK-LABEL: one_div_sqrt_v8f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: xvld $xr0, $a1, 0 -; CHECK-NEXT: xvfrsqrt.s $xr0, $xr0 -; CHECK-NEXT: xvst $xr0, $a0, 0 -; CHECK-NEXT: ret +; LA32-LABEL: one_div_sqrt_v8f32: +; LA32: # %bb.0: # %entry +; LA32-NEXT: addi.w $sp, $sp, -128 +; LA32-NEXT: st.w $ra, $sp, 124 # 4-byte Folded Spill +; LA32-NEXT: st.w $fp, $sp, 120 # 4-byte Folded Spill +; LA32-NEXT: addi.w $fp, $sp, 128 +; LA32-NEXT: bstrins.w $sp, $zero, 4, 0 +; LA32-NEXT: vld $vr0, $a1, 16 +; LA32-NEXT: vst $vr0, $sp, 48 +; LA32-NEXT: ld.w $a2, $a1, 12 +; LA32-NEXT: st.w $a2, $sp, 44 +; LA32-NEXT: ld.w $a2, $a1, 8 +; LA32-NEXT: st.w $a2, $sp, 40 +; LA32-NEXT: ld.w $a2, $a1, 4 +; LA32-NEXT: st.w $a2, $sp, 36 +; LA32-NEXT: ld.w $a1, $a1, 0 +; LA32-NEXT: st.w $a1, $sp, 32 +; LA32-NEXT: xvld $xr0, $sp, 32 +; LA32-NEXT: xvfrsqrt.s $xr0, $xr0 +; LA32-NEXT: xvst $xr0, $sp, 64 +; LA32-NEXT: vld $vr0, $sp, 80 +; LA32-NEXT: vst $vr0, $a0, 16 +; LA32-NEXT: ld.w $a1, $sp, 76 +; LA32-NEXT: st.w $a1, $a0, 12 +; LA32-NEXT: ld.w $a1, $sp, 72 +; LA32-NEXT: st.w $a1, $a0, 8 +; LA32-NEXT: ld.w $a1, $sp, 68 +; LA32-NEXT: st.w $a1, $a0, 4 +; LA32-NEXT: ld.w $a1, $sp, 64 +; LA32-NEXT: st.w $a1, $a0, 0 +; LA32-NEXT: addi.w $sp, $fp, -128 +; LA32-NEXT: ld.w $fp, $sp, 120 # 4-byte Folded Reload +; LA32-NEXT: ld.w $ra, $sp, 124 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 128 +; LA32-NEXT: ret +; +; LA64-LABEL: one_div_sqrt_v8f32: +; LA64: # %bb.0: # %entry +; LA64-NEXT: xvld $xr0, $a1, 0 +; LA64-NEXT: xvfrsqrt.s $xr0, $xr0 +; LA64-NEXT: xvst $xr0, $a0, 0 +; LA64-NEXT: ret entry: %v0 = load <8 x float>, ptr %a0, align 16 %sqrt = call <8 x float> @llvm.sqrt.v8f32 (<8 x float> %v0) @@ -47,12 +156,51 @@ entry: } define void @one_div_sqrt_v4f64(ptr %res, ptr %a0) nounwind { -; CHECK-LABEL: one_div_sqrt_v4f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: xvld $xr0, $a1, 0 -; CHECK-NEXT: xvfrsqrt.d $xr0, $xr0 -; CHECK-NEXT: xvst $xr0, $a0, 0 -; CHECK-NEXT: ret +; LA32-LABEL: one_div_sqrt_v4f64: +; LA32: # %bb.0: # %entry +; LA32-NEXT: addi.w $sp, $sp, -128 +; LA32-NEXT: st.w $ra, $sp, 124 # 4-byte Folded Spill +; LA32-NEXT: st.w $fp, $sp, 120 # 4-byte Folded Spill +; LA32-NEXT: addi.w $fp, $sp, 128 +; LA32-NEXT: bstrins.w $sp, $zero, 4, 0 +; LA32-NEXT: vld $vr0, $a1, 16 +; LA32-NEXT: vst $vr0, $sp, 48 +; LA32-NEXT: ld.w $a2, $a1, 12 +; LA32-NEXT: st.w $a2, $sp, 44 +; LA32-NEXT: ld.w $a2, $a1, 8 +; LA32-NEXT: st.w $a2, $sp, 40 +; LA32-NEXT: ld.w $a2, $a1, 4 +; LA32-NEXT: st.w $a2, $sp, 36 +; LA32-NEXT: ld.w $a1, $a1, 0 +; LA32-NEXT: st.w $a1, $sp, 32 +; LA32-NEXT: xvld $xr0, $sp, 32 +; LA32-NEXT: pcalau12i $a1, %pc_hi20(.LCPI3_0) +; LA32-NEXT: xvld $xr1, $a1, %pc_lo12(.LCPI3_0) +; LA32-NEXT: xvfsqrt.d $xr0, $xr0 +; LA32-NEXT: xvfdiv.d $xr0, $xr1, $xr0 +; LA32-NEXT: xvst $xr0, $sp, 64 +; LA32-NEXT: vld $vr0, $sp, 80 +; LA32-NEXT: vst $vr0, $a0, 16 +; LA32-NEXT: ld.w $a1, $sp, 76 +; LA32-NEXT: st.w $a1, $a0, 12 +; LA32-NEXT: ld.w $a1, $sp, 72 +; LA32-NEXT: st.w $a1, $a0, 8 +; LA32-NEXT: ld.w $a1, $sp, 68 +; LA32-NEXT: st.w $a1, $a0, 4 +; LA32-NEXT: ld.w $a1, $sp, 64 +; LA32-NEXT: st.w $a1, $a0, 0 +; LA32-NEXT: addi.w $sp, $fp, -128 +; LA32-NEXT: ld.w $fp, $sp, 120 # 4-byte Folded Reload +; LA32-NEXT: ld.w $ra, $sp, 124 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 128 +; LA32-NEXT: ret +; +; LA64-LABEL: one_div_sqrt_v4f64: +; LA64: # %bb.0: # %entry +; LA64-NEXT: xvld $xr0, $a1, 0 +; LA64-NEXT: xvfrsqrt.d $xr0, $xr0 +; LA64-NEXT: xvst $xr0, $a0, 0 +; LA64-NEXT: ret entry: %v0 = load <4 x double>, ptr %a0, align 16 %sqrt = call <4 x double> @llvm.sqrt.v4f64 (<4 x double> %v0) diff --git a/llvm/test/CodeGen/LoongArch/lasx/inline-asm-operand-modifier.ll b/llvm/test/CodeGen/LoongArch/lasx/inline-asm-operand-modifier.ll index 8b25a6525381..1e60b389bb23 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/inline-asm-operand-modifier.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/inline-asm-operand-modifier.ll @@ -1,4 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s ; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s define void @test_u() nounwind { diff --git a/llvm/test/CodeGen/LoongArch/lasx/inline-asm-reg-names.ll b/llvm/test/CodeGen/LoongArch/lasx/inline-asm-reg-names.ll index dd400ecfcf91..1289892b2c03 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/inline-asm-reg-names.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/inline-asm-reg-names.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 -; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s --check-prefixes=CHECK,LA32 +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s --check-prefixes=CHECK,LA64 define void @register_xr1() nounwind { ; CHECK-LABEL: register_xr1: @@ -42,16 +43,27 @@ entry: ;; is a callee-saved register which is preserved across calls. ;; That's why the fst.d and fld.d instructions are emitted. define void @register_xr31() nounwind { -; CHECK-LABEL: register_xr31: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addi.d $sp, $sp, -16 -; CHECK-NEXT: fst.d $fs7, $sp, 8 # 8-byte Folded Spill -; CHECK-NEXT: #APP -; CHECK-NEXT: xvldi $xr31, 1 -; CHECK-NEXT: #NO_APP -; CHECK-NEXT: fld.d $fs7, $sp, 8 # 8-byte Folded Reload -; CHECK-NEXT: addi.d $sp, $sp, 16 -; CHECK-NEXT: ret +; LA32-LABEL: register_xr31: +; LA32: # %bb.0: # %entry +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: fst.d $fs7, $sp, 8 # 8-byte Folded Spill +; LA32-NEXT: #APP +; LA32-NEXT: xvldi $xr31, 1 +; LA32-NEXT: #NO_APP +; LA32-NEXT: fld.d $fs7, $sp, 8 # 8-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; +; LA64-LABEL: register_xr31: +; LA64: # %bb.0: # %entry +; LA64-NEXT: addi.d $sp, $sp, -16 +; LA64-NEXT: fst.d $fs7, $sp, 8 # 8-byte Folded Spill +; LA64-NEXT: #APP +; LA64-NEXT: xvldi $xr31, 1 +; LA64-NEXT: #NO_APP +; LA64-NEXT: fld.d $fs7, $sp, 8 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 16 +; LA64-NEXT: ret entry: %0 = tail call <4 x i64> asm sideeffect "xvldi ${0:u}, 1", "={$xr31}"() ret void diff --git a/llvm/test/CodeGen/LoongArch/lasx/insert-extract-subvector.ll b/llvm/test/CodeGen/LoongArch/lasx/insert-extract-subvector.ll index 7a90afca376d..48d6e0130105 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/insert-extract-subvector.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/insert-extract-subvector.ll @@ -1,4 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s ; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s declare <8 x i32> @llvm.experimental.vector.insert.v8i32.v4i32(<8 x i32>, <4 x i32>, i64) diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-absd.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-absd.ll index bf54f44357b0..09c4161728bd 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-absd.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-absd.ll @@ -1,4 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s ; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s declare <32 x i8> @llvm.loongarch.lasx.xvabsd.b(<32 x i8>, <32 x i8>) diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-add.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-add.ll index 0c2f2ace29fc..2eac147a860c 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-add.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-add.ll @@ -1,4 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s ; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s declare <32 x i8> @llvm.loongarch.lasx.xvadd.b(<32 x i8>, <32 x i8>) diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-adda.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-adda.ll index c1258d53e913..0f5bdad6c777 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-adda.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-adda.ll @@ -1,4 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s ; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s declare <32 x i8> @llvm.loongarch.lasx.xvadda.b(<32 x i8>, <32 x i8>) diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-addi-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-addi-invalid-imm.ll index 4998847f0910..8855a8a6cc1b 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-addi-invalid-imm.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-addi-invalid-imm.ll @@ -1,3 +1,4 @@ +; RUN: not llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s 2>&1 | FileCheck %s ; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s declare <32 x i8> @llvm.loongarch.lasx.xvaddi.bu(<32 x i8>, i32) diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-addi-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-addi-non-imm.ll index f25f0e61a28e..cb7ae06a5f56 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-addi-non-imm.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-addi-non-imm.ll @@ -1,3 +1,4 @@ +; RUN: not llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s 2>&1 | FileCheck %s ; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s declare <32 x i8> @llvm.loongarch.lasx.xvaddi.bu(<32 x i8>, i32) diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-addi.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-addi.ll index 09b5d07a0151..fb39220b27c7 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-addi.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-addi.ll @@ -1,4 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s ; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s declare <32 x i8> @llvm.loongarch.lasx.xvaddi.bu(<32 x i8>, i32) diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-addw.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-addw.ll index ef7a1b5a50ef..7f6fc9c4dbc1 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-addw.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-addw.ll @@ -1,4 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s ; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s declare <16 x i16> @llvm.loongarch.lasx.xvaddwev.h.b(<32 x i8>, <32 x i8>) diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-and.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-and.ll index 15f3a8094770..e726657bd629 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-and.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-and.ll @@ -1,4 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s ; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s declare <32 x i8> @llvm.loongarch.lasx.xvand.v(<32 x i8>, <32 x i8>) diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-andi-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-andi-invalid-imm.ll index 60f0b765f954..cd85cdacdbb1 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-andi-invalid-imm.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-andi-invalid-imm.ll @@ -1,3 +1,4 @@ +; RUN: not llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s 2>&1 | FileCheck %s ; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s declare <32 x i8> @llvm.loongarch.lasx.xvandi.b(<32 x i8>, i32) diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-andi-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-andi-non-imm.ll index 1273dc6b450b..ca12d53ddf29 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-andi-non-imm.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-andi-non-imm.ll @@ -1,3 +1,4 @@ +; RUN: not llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s 2>&1 | FileCheck %s ; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s declare <32 x i8> @llvm.loongarch.lasx.xvandi.b(<32 x i8>, i32) diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-andi.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-andi.ll index 88cf142d6968..924b69745ea2 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-andi.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-andi.ll @@ -1,4 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s ; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s declare <32 x i8> @llvm.loongarch.lasx.xvandi.b(<32 x i8>, i32) diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-andn.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-andn.ll index f385ef3661cb..4eabdc021ec0 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-andn.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-andn.ll @@ -1,4 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s ; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s declare <32 x i8> @llvm.loongarch.lasx.xvandn.v(<32 x i8>, <32 x i8>) diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-avg.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-avg.ll index 488d3b96b003..b89511cbeeb5 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-avg.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-avg.ll @@ -1,4 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s ; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s declare <32 x i8> @llvm.loongarch.lasx.xvavg.b(<32 x i8>, <32 x i8>) diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-avgr.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-avgr.ll index b5ab5a5366aa..0aa573e64c49 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-avgr.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-avgr.ll @@ -1,4 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s ; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s declare <32 x i8> @llvm.loongarch.lasx.xvavgr.b(<32 x i8>, <32 x i8>) diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitclr-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitclr-invalid-imm.ll index ecc287e89bbc..462ea24510be 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitclr-invalid-imm.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitclr-invalid-imm.ll @@ -1,3 +1,4 @@ +; RUN: not llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s 2>&1 | FileCheck %s ; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s declare <32 x i8> @llvm.loongarch.lasx.xvbitclri.b(<32 x i8>, i32) diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitclr-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitclr-non-imm.ll index 09da85411082..0ecd45ca296d 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitclr-non-imm.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitclr-non-imm.ll @@ -1,3 +1,4 @@ +; RUN: not llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s 2>&1 | FileCheck %s ; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s declare <32 x i8> @llvm.loongarch.lasx.xvbitclri.b(<32 x i8>, i32) diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitclr.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitclr.ll index cec71bab2fe8..9f148e5a447a 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitclr.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitclr.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s --check-prefixes=CHECK,LA32 +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s --check-prefixes=CHECK,LA64 declare <32 x i8> @llvm.loongarch.lasx.xvbitclr.b(<32 x i8>, <32 x i8>) @@ -40,10 +41,21 @@ entry: declare <4 x i64> @llvm.loongarch.lasx.xvbitclr.d(<4 x i64>, <4 x i64>) define <4 x i64> @lasx_xvbitclr_d(<4 x i64> %va, <4 x i64> %vb) nounwind { -; CHECK-LABEL: lasx_xvbitclr_d: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: xvbitclr.d $xr0, $xr0, $xr1 -; CHECK-NEXT: ret +; LA32-LABEL: lasx_xvbitclr_d: +; LA32: # %bb.0: # %entry +; LA32-NEXT: xvrepli.d $xr2, 63 +; LA32-NEXT: xvand.v $xr1, $xr1, $xr2 +; LA32-NEXT: xvrepli.d $xr2, 1 +; LA32-NEXT: xvsll.d $xr1, $xr2, $xr1 +; LA32-NEXT: xvrepli.b $xr2, -1 +; LA32-NEXT: xvxor.v $xr1, $xr1, $xr2 +; LA32-NEXT: xvand.v $xr0, $xr0, $xr1 +; LA32-NEXT: ret +; +; LA64-LABEL: lasx_xvbitclr_d: +; LA64: # %bb.0: # %entry +; LA64-NEXT: xvbitclr.d $xr0, $xr0, $xr1 +; LA64-NEXT: ret entry: %res = call <4 x i64> @llvm.loongarch.lasx.xvbitclr.d(<4 x i64> %va, <4 x i64> %vb) ret <4 x i64> %res diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitrev-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitrev-invalid-imm.ll index dff0884fdd5a..9532684ab420 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitrev-invalid-imm.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitrev-invalid-imm.ll @@ -1,3 +1,4 @@ +; RUN: not llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s 2>&1 | FileCheck %s ; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s declare <32 x i8> @llvm.loongarch.lasx.xvbitrevi.b(<32 x i8>, i32) diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitrev-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitrev-non-imm.ll index e1aef1a82f0c..800ce010e317 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitrev-non-imm.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitrev-non-imm.ll @@ -1,3 +1,4 @@ +; RUN: not llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s 2>&1 | FileCheck %s ; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s declare <32 x i8> @llvm.loongarch.lasx.xvbitrevi.b(<32 x i8>, i32) diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitrev.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitrev.ll index fb4f9fbc2e4b..01c56eebf6c5 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitrev.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitrev.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s --check-prefixes=CHECK,LA32 +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s --check-prefixes=CHECK,LA64 declare <32 x i8> @llvm.loongarch.lasx.xvbitrev.b(<32 x i8>, <32 x i8>) @@ -40,10 +41,19 @@ entry: declare <4 x i64> @llvm.loongarch.lasx.xvbitrev.d(<4 x i64>, <4 x i64>) define <4 x i64> @lasx_xvbitrev_d(<4 x i64> %va, <4 x i64> %vb) nounwind { -; CHECK-LABEL: lasx_xvbitrev_d: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: xvbitrev.d $xr0, $xr0, $xr1 -; CHECK-NEXT: ret +; LA32-LABEL: lasx_xvbitrev_d: +; LA32: # %bb.0: # %entry +; LA32-NEXT: xvrepli.d $xr2, 63 +; LA32-NEXT: xvand.v $xr1, $xr1, $xr2 +; LA32-NEXT: xvrepli.d $xr2, 1 +; LA32-NEXT: xvsll.d $xr1, $xr2, $xr1 +; LA32-NEXT: xvxor.v $xr0, $xr0, $xr1 +; LA32-NEXT: ret +; +; LA64-LABEL: lasx_xvbitrev_d: +; LA64: # %bb.0: # %entry +; LA64-NEXT: xvbitrev.d $xr0, $xr0, $xr1 +; LA64-NEXT: ret entry: %res = call <4 x i64> @llvm.loongarch.lasx.xvbitrev.d(<4 x i64> %va, <4 x i64> %vb) ret <4 x i64> %res diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitsel.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitsel.ll index 2e91407590ac..f112b58b28c8 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitsel.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitsel.ll @@ -1,4 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s ; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s declare <32 x i8> @llvm.loongarch.lasx.xvbitsel.v(<32 x i8>, <32 x i8>, <32 x i8>) diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitseli-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitseli-invalid-imm.ll index 3f6fd44f842c..7b0b60ebbf45 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitseli-invalid-imm.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitseli-invalid-imm.ll @@ -1,3 +1,4 @@ +; RUN: not llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s 2>&1 | FileCheck %s ; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s declare <32 x i8> @llvm.loongarch.lasx.xvbitseli.b(<32 x i8>, <32 x i8>, i32) diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitseli-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitseli-non-imm.ll index 40533ab96d86..6f582fd4a9a1 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitseli-non-imm.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitseli-non-imm.ll @@ -1,3 +1,4 @@ +; RUN: not llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s 2>&1 | FileCheck %s ; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s declare <32 x i8> @llvm.loongarch.lasx.xvbitseli.b(<32 x i8>, <32 x i8>, i32) diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitseli.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitseli.ll index 79dd55cbfef9..1a3d5799194a 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitseli.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitseli.ll @@ -1,4 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s ; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s declare <32 x i8> @llvm.loongarch.lasx.xvbitseli.b(<32 x i8>, <32 x i8>, i32) diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitset-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitset-invalid-imm.ll index 17a77ece7775..f05b5c7681a2 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitset-invalid-imm.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitset-invalid-imm.ll @@ -1,3 +1,4 @@ +; RUN: not llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s 2>&1 | FileCheck %s ; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s declare <32 x i8> @llvm.loongarch.lasx.xvbitseti.b(<32 x i8>, i32) diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitset-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitset-non-imm.ll index 613285804e0e..a684c1bfec44 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitset-non-imm.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitset-non-imm.ll @@ -1,3 +1,4 @@ +; RUN: not llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s 2>&1 | FileCheck %s ; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s declare <32 x i8> @llvm.loongarch.lasx.xvbitseti.b(<32 x i8>, i32) diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitset.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitset.ll index 83d1f0ef60c6..0baad661ad59 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitset.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitset.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s --check-prefixes=CHECK,LA32 +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s --check-prefixes=CHECK,LA64 declare <32 x i8> @llvm.loongarch.lasx.xvbitset.b(<32 x i8>, <32 x i8>) @@ -40,10 +41,19 @@ entry: declare <4 x i64> @llvm.loongarch.lasx.xvbitset.d(<4 x i64>, <4 x i64>) define <4 x i64> @lasx_xvbitset_d(<4 x i64> %va, <4 x i64> %vb) nounwind { -; CHECK-LABEL: lasx_xvbitset_d: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: xvbitset.d $xr0, $xr0, $xr1 -; CHECK-NEXT: ret +; LA32-LABEL: lasx_xvbitset_d: +; LA32: # %bb.0: # %entry +; LA32-NEXT: xvrepli.d $xr2, 63 +; LA32-NEXT: xvand.v $xr1, $xr1, $xr2 +; LA32-NEXT: xvrepli.d $xr2, 1 +; LA32-NEXT: xvsll.d $xr1, $xr2, $xr1 +; LA32-NEXT: xvor.v $xr0, $xr0, $xr1 +; LA32-NEXT: ret +; +; LA64-LABEL: lasx_xvbitset_d: +; LA64: # %bb.0: # %entry +; LA64-NEXT: xvbitset.d $xr0, $xr0, $xr1 +; LA64-NEXT: ret entry: %res = call <4 x i64> @llvm.loongarch.lasx.xvbitset.d(<4 x i64> %va, <4 x i64> %vb) ret <4 x i64> %res diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bsll-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bsll-invalid-imm.ll index 1da08a633bd2..fbe6b69b679a 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bsll-invalid-imm.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bsll-invalid-imm.ll @@ -1,3 +1,4 @@ +; RUN: not llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s 2>&1 | FileCheck %s ; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s declare <32 x i8> @llvm.loongarch.lasx.xvbsll.v(<32 x i8>, i32) diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bsll-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bsll-non-imm.ll index e19a3232c179..694de9ec5a03 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bsll-non-imm.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bsll-non-imm.ll @@ -1,3 +1,4 @@ +; RUN: not llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s 2>&1 | FileCheck %s ; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s declare <32 x i8> @llvm.loongarch.lasx.xvbsll.v(<32 x i8>, i32) diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bsll.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bsll.ll index cbb63ced5cc0..f6e3542ed7cc 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bsll.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bsll.ll @@ -1,4 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s ; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s declare <32 x i8> @llvm.loongarch.lasx.xvbsll.v(<32 x i8>, i32) diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bsrl-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bsrl-invalid-imm.ll index 5d2b63391e67..72a75f491a91 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bsrl-invalid-imm.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bsrl-invalid-imm.ll @@ -1,3 +1,4 @@ +; RUN: not llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s 2>&1 | FileCheck %s ; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s declare <32 x i8> @llvm.loongarch.lasx.xvbsrl.v(<32 x i8>, i32) diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bsrl-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bsrl-non-imm.ll index 8dfd0ca579b8..b74676a73ad9 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bsrl-non-imm.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bsrl-non-imm.ll @@ -1,3 +1,4 @@ +; RUN: not llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s 2>&1 | FileCheck %s ; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s declare <32 x i8> @llvm.loongarch.lasx.xvbsrl.v(<32 x i8>, i32) diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bsrl.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bsrl.ll index b0c26cbe3e35..3e865eaa95d6 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bsrl.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bsrl.ll @@ -1,4 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s ; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s declare <32 x i8> @llvm.loongarch.lasx.xvbsrl.v(<32 x i8>, i32) diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-clo.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-clo.ll index 29b2be03d54e..4e8110f74fc8 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-clo.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-clo.ll @@ -1,4 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s ; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s declare <32 x i8> @llvm.loongarch.lasx.xvclo.b(<32 x i8>) diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-clz.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-clz.ll index 5247ceedbd14..526bc8ee176d 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-clz.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-clz.ll @@ -1,4 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s ; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s declare <32 x i8> @llvm.loongarch.lasx.xvclz.b(<32 x i8>) diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-div.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-div.ll index 813204092e94..9e23ee1bf240 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-div.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-div.ll @@ -1,4 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s ; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s declare <32 x i8> @llvm.loongarch.lasx.xvdiv.b(<32 x i8>, <32 x i8>) diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ext2xv.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ext2xv.ll index 48721b52af00..faf2a58f90a2 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ext2xv.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ext2xv.ll @@ -1,4 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s ; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s declare <16 x i16> @llvm.loongarch.lasx.vext2xv.h.b(<32 x i8>) diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-exth.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-exth.ll index 543589e61b12..1580a09ca975 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-exth.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-exth.ll @@ -1,4 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s ; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s declare <16 x i16> @llvm.loongarch.lasx.xvexth.h.b(<32 x i8>) diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-extl.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-extl.ll index 7040c8c784cd..b437c1f87687 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-extl.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-extl.ll @@ -1,4 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s ; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s declare <4 x i64> @llvm.loongarch.lasx.xvextl.q.d(<4 x i64>) diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-extrins-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-extrins-invalid-imm.ll index 1301b8a146eb..337f46a45fc3 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-extrins-invalid-imm.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-extrins-invalid-imm.ll @@ -1,3 +1,4 @@ +; RUN: not llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s 2>&1 | FileCheck %s ; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s declare <32 x i8> @llvm.loongarch.lasx.xvextrins.b(<32 x i8>, <32 x i8>, i32) diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-extrins-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-extrins-non-imm.ll index bca8f8b3c778..9164f8f623fa 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-extrins-non-imm.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-extrins-non-imm.ll @@ -1,3 +1,4 @@ +; RUN: not llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s 2>&1 | FileCheck %s ; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s declare <32 x i8> @llvm.loongarch.lasx.xvextrins.b(<32 x i8>, <32 x i8>, i32) diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-extrins.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-extrins.ll index c8774a7b29c0..af1b3e2959a7 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-extrins.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-extrins.ll @@ -1,4 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s ; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s declare <32 x i8> @llvm.loongarch.lasx.xvextrins.b(<32 x i8>, <32 x i8>, i32) diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fadd.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fadd.ll index 563a0ce9e384..6b309d89b62e 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fadd.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fadd.ll @@ -1,4 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s ; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s declare <8 x float> @llvm.loongarch.lasx.xvfadd.s(<8 x float>, <8 x float>) diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fclass.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fclass.ll index 901ca5bb0260..1a22e3a7eca7 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fclass.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fclass.ll @@ -1,4 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s ; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s declare <8 x i32> @llvm.loongarch.lasx.xvfclass.s(<8 x float>) diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fcmp.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fcmp.ll index b01f908e71af..581d1a68bdda 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fcmp.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fcmp.ll @@ -1,4 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s ; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s declare <8 x i32> @llvm.loongarch.lasx.xvfcmp.caf.s(<8 x float>, <8 x float>) diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fcvt.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fcvt.ll index 82bf1d3df72c..62206d7b8496 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fcvt.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fcvt.ll @@ -1,4 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s ; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s declare <16 x i16> @llvm.loongarch.lasx.xvfcvt.h.s(<8 x float>, <8 x float>) diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fcvth.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fcvth.ll index e1a6a2923e67..c562e2923135 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fcvth.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fcvth.ll @@ -1,4 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s ; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s declare <8 x float> @llvm.loongarch.lasx.xvfcvth.s.h(<16 x i16>) diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fcvtl.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fcvtl.ll index 0b3e693c7f51..4e270284568b 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fcvtl.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fcvtl.ll @@ -1,4 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s ; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s declare <8 x float> @llvm.loongarch.lasx.xvfcvtl.s.h(<16 x i16>) diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fdiv.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fdiv.ll index 49923ddd4e8d..45a4d4c4d5ca 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fdiv.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fdiv.ll @@ -1,4 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s ; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s declare <8 x float> @llvm.loongarch.lasx.xvfdiv.s(<8 x float>, <8 x float>) diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ffint.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ffint.ll index 24da0bd33838..fb96482795c5 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ffint.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ffint.ll @@ -1,4 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s ; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s declare <8 x float> @llvm.loongarch.lasx.xvffint.s.w(<8 x i32>) diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-flogb.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-flogb.ll index bccef4504d70..7e8273956331 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-flogb.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-flogb.ll @@ -1,4 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s ; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s declare <8 x float> @llvm.loongarch.lasx.xvflogb.s(<8 x float>) diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fmadd.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fmadd.ll index 0fc06f971660..1caf43fb4299 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fmadd.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fmadd.ll @@ -1,4 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s ; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s declare <8 x float> @llvm.loongarch.lasx.xvfmadd.s(<8 x float>, <8 x float>, <8 x float>) diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fmax.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fmax.ll index 2422fa0c00d8..eb2fa913311c 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fmax.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fmax.ll @@ -1,4 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s ; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s declare <8 x float> @llvm.loongarch.lasx.xvfmax.s(<8 x float>, <8 x float>) diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fmaxa.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fmaxa.ll index cd9ccc656aef..adc4649d3933 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fmaxa.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fmaxa.ll @@ -1,4 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s ; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s declare <8 x float> @llvm.loongarch.lasx.xvfmaxa.s(<8 x float>, <8 x float>) diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fmin.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fmin.ll index effb3f9e1d75..61205890a440 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fmin.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fmin.ll @@ -1,4 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s ; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s declare <8 x float> @llvm.loongarch.lasx.xvfmin.s(<8 x float>, <8 x float>) diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fmina.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fmina.ll index 753a6f31ba06..214ab30ee7b3 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fmina.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fmina.ll @@ -1,4 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s ; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s declare <8 x float> @llvm.loongarch.lasx.xvfmina.s(<8 x float>, <8 x float>) diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fmsub.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fmsub.ll index 57909d0dd168..132aae31c4ed 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fmsub.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fmsub.ll @@ -1,4 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s ; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s declare <8 x float> @llvm.loongarch.lasx.xvfmsub.s(<8 x float>, <8 x float>, <8 x float>) diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fmul.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fmul.ll index 9cad6f383066..aca107fd4c18 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fmul.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fmul.ll @@ -1,4 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s ; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s declare <8 x float> @llvm.loongarch.lasx.xvfmul.s(<8 x float>, <8 x float>) diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fnmadd.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fnmadd.ll index c30993590f98..007b9739fe6a 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fnmadd.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fnmadd.ll @@ -1,4 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s ; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s declare <8 x float> @llvm.loongarch.lasx.xvfnmadd.s(<8 x float>, <8 x float>, <8 x float>) diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fnmsub.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fnmsub.ll index 2e7ca695be62..3e711803828c 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fnmsub.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fnmsub.ll @@ -1,4 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s ; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s declare <8 x float> @llvm.loongarch.lasx.xvfnmsub.s(<8 x float>, <8 x float>, <8 x float>) diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-frecip.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-frecip.ll index da3a26df2824..aa3581018bef 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-frecip.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-frecip.ll @@ -1,4 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s ; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s declare <8 x float> @llvm.loongarch.lasx.xvfrecip.s(<8 x float>) diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-frint.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-frint.ll index ddead27cd14b..b410d76c395a 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-frint.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-frint.ll @@ -1,4 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s ; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s declare <8 x float> @llvm.loongarch.lasx.xvfrintrne.s(<8 x float>) diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-frsqrt.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-frsqrt.ll index 6efa8122baf1..692cfc19218d 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-frsqrt.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-frsqrt.ll @@ -1,4 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s ; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s declare <8 x float> @llvm.loongarch.lasx.xvfrsqrt.s(<8 x float>) diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-frstp-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-frstp-invalid-imm.ll index 64b4632669d2..e9d1bf38eb5a 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-frstp-invalid-imm.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-frstp-invalid-imm.ll @@ -1,3 +1,4 @@ +; RUN: not llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s 2>&1 | FileCheck %s ; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s declare <32 x i8> @llvm.loongarch.lasx.xvfrstpi.b(<32 x i8>, <32 x i8>, i32) diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-frstp-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-frstp-non-imm.ll index ca92cff9b2d1..767d69ccc09c 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-frstp-non-imm.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-frstp-non-imm.ll @@ -1,3 +1,4 @@ +; RUN: not llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s 2>&1 | FileCheck %s ; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s declare <32 x i8> @llvm.loongarch.lasx.xvfrstpi.b(<32 x i8>, <32 x i8>, i32) diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-frstp.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-frstp.ll index e83e55a52a11..853778f292a1 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-frstp.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-frstp.ll @@ -1,4 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s ; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s declare <32 x i8> @llvm.loongarch.lasx.xvfrstp.b(<32 x i8>, <32 x i8>, <32 x i8>) diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fsqrt.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fsqrt.ll index a13333d8d81c..4669f2c92ed1 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fsqrt.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fsqrt.ll @@ -1,4 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s ; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s declare <8 x float> @llvm.loongarch.lasx.xvfsqrt.s(<8 x float>) diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fsub.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fsub.ll index b52774a03618..3dde655fd568 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fsub.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fsub.ll @@ -1,4 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s ; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s declare <8 x float> @llvm.loongarch.lasx.xvfsub.s(<8 x float>, <8 x float>) diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ftint.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ftint.ll index 74cd507f16d2..b716c32d43e2 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ftint.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ftint.ll @@ -1,4 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s ; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s declare <8 x i32> @llvm.loongarch.lasx.xvftintrne.w.s(<8 x float>) diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-haddw.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-haddw.ll index 2c64ab23806b..a75325228213 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-haddw.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-haddw.ll @@ -1,4 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s ; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s declare <16 x i16> @llvm.loongarch.lasx.xvhaddw.h.b(<32 x i8>, <32 x i8>) diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-hsubw.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-hsubw.ll index a5223c1d89a0..a40e80b730d0 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-hsubw.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-hsubw.ll @@ -1,4 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s ; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s declare <16 x i16> @llvm.loongarch.lasx.xvhsubw.h.b(<32 x i8>, <32 x i8>) diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ilv.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ilv.ll index c9d0ca6b0324..7e83feb2ed9a 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ilv.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ilv.ll @@ -1,4 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s ; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s declare <32 x i8> @llvm.loongarch.lasx.xvilvl.b(<32 x i8>, <32 x i8>) diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-insgr2vr-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-insgr2vr-invalid-imm.ll index 4982f2c7d43a..1dd4ffad8fb3 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-insgr2vr-invalid-imm.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-insgr2vr-invalid-imm.ll @@ -1,3 +1,4 @@ +; RUN: not llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s 2>&1 | FileCheck %s ; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s declare <8 x i32> @llvm.loongarch.lasx.xvinsgr2vr.w(<8 x i32>, i32, i32) diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-insgr2vr-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-insgr2vr-non-imm.ll index 3accabf6dbd9..ef3c09cc1c42 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-insgr2vr-non-imm.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-insgr2vr-non-imm.ll @@ -1,3 +1,4 @@ +; RUN: not llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s 2>&1 | FileCheck %s ; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s declare <8 x i32> @llvm.loongarch.lasx.xvinsgr2vr.w(<8 x i32>, i32, i32) diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-insgr2vr.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-insgr2vr.ll index ea98c96464ae..1d1b33b4f9e6 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-insgr2vr.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-insgr2vr.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s --check-prefixes=CHECK,LA32 +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s --check-prefixes=CHECK,LA64 declare <8 x i32> @llvm.loongarch.lasx.xvinsgr2vr.w(<8 x i32>, i32, i32) @@ -17,11 +18,18 @@ entry: declare <4 x i64> @llvm.loongarch.lasx.xvinsgr2vr.d(<4 x i64>, i64, i32) define <4 x i64> @lasx_xvinsgr2vr_d(<4 x i64> %va) nounwind { -; CHECK-LABEL: lasx_xvinsgr2vr_d: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: ori $a0, $zero, 1 -; CHECK-NEXT: xvinsgr2vr.d $xr0, $a0, 1 -; CHECK-NEXT: ret +; LA32-LABEL: lasx_xvinsgr2vr_d: +; LA32: # %bb.0: # %entry +; LA32-NEXT: ori $a0, $zero, 1 +; LA32-NEXT: xvinsgr2vr.w $xr0, $a0, 2 +; LA32-NEXT: xvinsgr2vr.w $xr0, $zero, 3 +; LA32-NEXT: ret +; +; LA64-LABEL: lasx_xvinsgr2vr_d: +; LA64: # %bb.0: # %entry +; LA64-NEXT: ori $a0, $zero, 1 +; LA64-NEXT: xvinsgr2vr.d $xr0, $a0, 1 +; LA64-NEXT: ret entry: %res = call <4 x i64> @llvm.loongarch.lasx.xvinsgr2vr.d(<4 x i64> %va, i64 1, i32 1) ret <4 x i64> %res diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-insve0-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-insve0-invalid-imm.ll index a54fa8515fba..98d1532b26d1 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-insve0-invalid-imm.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-insve0-invalid-imm.ll @@ -1,3 +1,4 @@ +; RUN: not llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s 2>&1 | FileCheck %s ; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s declare <8 x i32> @llvm.loongarch.lasx.xvinsve0.w(<8 x i32>, <8 x i32>, i32) diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-insve0-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-insve0-non-imm.ll index 53e59db11aa6..04f64e7a9c29 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-insve0-non-imm.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-insve0-non-imm.ll @@ -1,3 +1,4 @@ +; RUN: not llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s 2>&1 | FileCheck %s ; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s declare <8 x i32> @llvm.loongarch.lasx.xvinsve0.w(<8 x i32>, <8 x i32>, i32) diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-insve0.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-insve0.ll index 27ae819c4144..561ba17e8ff1 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-insve0.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-insve0.ll @@ -1,4 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s ; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s declare <8 x i32> @llvm.loongarch.lasx.xvinsve0.w(<8 x i32>, <8 x i32>, i32) diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ld-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ld-invalid-imm.ll index 27c690c91aec..4dd6fc037aa2 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ld-invalid-imm.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ld-invalid-imm.ll @@ -1,3 +1,4 @@ +; RUN: not llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s 2>&1 | FileCheck %s ; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s declare <32 x i8> @llvm.loongarch.lasx.xvld(ptr, i32) diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ld-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ld-non-imm.ll index 1d8d5c764ce8..c0e3239dfb89 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ld-non-imm.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ld-non-imm.ll @@ -1,3 +1,4 @@ +; RUN: not llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s 2>&1 | FileCheck %s ; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s declare <32 x i8> @llvm.loongarch.lasx.xvld(ptr, i32) diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ldi-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ldi-invalid-imm.ll index f3dd3650cf8a..c574833ec8f3 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ldi-invalid-imm.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ldi-invalid-imm.ll @@ -1,3 +1,4 @@ +; RUN: not llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s 2>&1 | FileCheck %s ; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s declare <4 x i64> @llvm.loongarch.lasx.xvldi(i32) diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ldi-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ldi-non-imm.ll index 6466818bf674..eb41e993b9d1 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ldi-non-imm.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ldi-non-imm.ll @@ -1,3 +1,4 @@ +; RUN: not llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s 2>&1 | FileCheck %s ; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s declare <4 x i64> @llvm.loongarch.lasx.xvldi(i32) diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ldi.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ldi.ll index 59f79dd32af3..e65ecf5fda19 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ldi.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ldi.ll @@ -1,4 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s ; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s declare <4 x i64> @llvm.loongarch.lasx.xvldi(i32) diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ldrepl-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ldrepl-invalid-imm.ll index 6fe6de82e1c0..14cf0593c2de 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ldrepl-invalid-imm.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ldrepl-invalid-imm.ll @@ -1,3 +1,4 @@ +; RUN: not llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s 2>&1 | FileCheck %s ; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s declare <32 x i8> @llvm.loongarch.lasx.xvldrepl.b(ptr, i32) diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ldrepl-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ldrepl-non-imm.ll index 74c22298db50..05a44e9d2806 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ldrepl-non-imm.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ldrepl-non-imm.ll @@ -1,3 +1,4 @@ +; RUN: not llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s 2>&1 | FileCheck %s ; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s declare <32 x i8> @llvm.loongarch.lasx.xvldrepl.b(ptr, i32) diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ldrepl.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ldrepl.ll index ccd969a9f299..bb5bb6539d2a 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ldrepl.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ldrepl.ll @@ -1,4 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s ; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s declare <32 x i8> @llvm.loongarch.lasx.xvldrepl.b(ptr, i32) diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-madd.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-madd.ll index d3b09396727e..8dd45221d421 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-madd.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-madd.ll @@ -1,4 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s ; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s declare <32 x i8> @llvm.loongarch.lasx.xvmadd.b(<32 x i8>, <32 x i8>, <32 x i8>) diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-maddw.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-maddw.ll index 146624a764a2..eb7189cf0ae7 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-maddw.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-maddw.ll @@ -1,4 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s ; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s declare <16 x i16> @llvm.loongarch.lasx.xvmaddwev.h.b(<16 x i16>, <32 x i8>, <32 x i8>) diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-max-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-max-non-imm.ll index b85798b53c92..33035ffb1413 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-max-non-imm.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-max-non-imm.ll @@ -1,3 +1,4 @@ +; RUN: not llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s 2>&1 | FileCheck %s ; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s declare <32 x i8> @llvm.loongarch.lasx.xvmaxi.b(<32 x i8>, i32) diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-max.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-max.ll index 9cf09df4439a..4739ed6be91d 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-max.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-max.ll @@ -1,4 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s ; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s declare <32 x i8> @llvm.loongarch.lasx.xvmax.b(<32 x i8>, <32 x i8>) diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-min-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-min-non-imm.ll index b81931977aad..f853ca45fce1 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-min-non-imm.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-min-non-imm.ll @@ -1,3 +1,4 @@ +; RUN: not llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s 2>&1 | FileCheck %s ; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s declare <32 x i8> @llvm.loongarch.lasx.xvmini.b(<32 x i8>, i32) diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-min.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-min.ll index c94b1e4ea44c..dde42a0239ad 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-min.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-min.ll @@ -1,4 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s ; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s declare <32 x i8> @llvm.loongarch.lasx.xvmin.b(<32 x i8>, <32 x i8>) diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-mod.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-mod.ll index a177246bb235..9b13612c8d04 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-mod.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-mod.ll @@ -1,4 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s ; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s declare <32 x i8> @llvm.loongarch.lasx.xvmod.b(<32 x i8>, <32 x i8>) diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-mskgez.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-mskgez.ll index da87c20ad6ee..afb8472d18a7 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-mskgez.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-mskgez.ll @@ -1,4 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s ; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s declare <32 x i8> @llvm.loongarch.lasx.xvmskgez.b(<32 x i8>) diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-mskltz.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-mskltz.ll index b2218487535c..4a6640ead171 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-mskltz.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-mskltz.ll @@ -1,4 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s ; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s declare <32 x i8> @llvm.loongarch.lasx.xvmskltz.b(<32 x i8>) diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-msknz.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-msknz.ll index becd2c883a7e..f6d4998f47d4 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-msknz.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-msknz.ll @@ -1,4 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s ; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s declare <32 x i8> @llvm.loongarch.lasx.xvmsknz.b(<32 x i8>) diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-msub.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-msub.ll index c89f9578b77d..5a7f5bffe228 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-msub.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-msub.ll @@ -1,4 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s ; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s declare <32 x i8> @llvm.loongarch.lasx.xvmsub.b(<32 x i8>, <32 x i8>, <32 x i8>) diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-muh.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-muh.ll index 97461512ce16..4d7b14cc5fe0 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-muh.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-muh.ll @@ -1,4 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s ; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s declare <32 x i8> @llvm.loongarch.lasx.xvmuh.b(<32 x i8>, <32 x i8>) diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-mul.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-mul.ll index d5d852e58a9f..305604ed6c08 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-mul.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-mul.ll @@ -1,4 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s ; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s declare <32 x i8> @llvm.loongarch.lasx.xvmul.b(<32 x i8>, <32 x i8>) diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-mulw.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-mulw.ll index f69e64aa7698..d7a8aaade203 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-mulw.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-mulw.ll @@ -1,4 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s ; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s declare <16 x i16> @llvm.loongarch.lasx.xvmulwev.h.b(<32 x i8>, <32 x i8>) diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-neg.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-neg.ll index ecbedf334657..e4b7ef728490 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-neg.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-neg.ll @@ -1,4 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s ; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s declare <32 x i8> @llvm.loongarch.lasx.xvneg.b(<32 x i8>) diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-nor.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-nor.ll index 674746b7624e..fa6f5a22cdbc 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-nor.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-nor.ll @@ -1,4 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s ; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s declare <32 x i8> @llvm.loongarch.lasx.xvnor.v(<32 x i8>, <32 x i8>) diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-nori-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-nori-invalid-imm.ll index 1130e094bf1f..e3f061e6a1c9 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-nori-invalid-imm.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-nori-invalid-imm.ll @@ -1,3 +1,4 @@ +; RUN: not llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s 2>&1 | FileCheck %s ; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s declare <32 x i8> @llvm.loongarch.lasx.xvnori.b(<32 x i8>, i32) diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-nori-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-nori-non-imm.ll index 8f2333064d64..92feeeb00aa1 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-nori-non-imm.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-nori-non-imm.ll @@ -1,3 +1,4 @@ +; RUN: not llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s 2>&1 | FileCheck %s ; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s declare <32 x i8> @llvm.loongarch.lasx.xvnori.b(<32 x i8>, i32) diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-nori.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-nori.ll index 55eebf87ee92..fc3c6398d6f1 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-nori.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-nori.ll @@ -1,4 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s ; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s declare <32 x i8> @llvm.loongarch.lasx.xvnori.b(<32 x i8>, i32) diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-or.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-or.ll index 16462cfafc54..b1b5e4b7e009 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-or.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-or.ll @@ -1,4 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s ; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s declare <32 x i8> @llvm.loongarch.lasx.xvor.v(<32 x i8>, <32 x i8>) diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ori-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ori-invalid-imm.ll index 90dec8e55f2d..de37309a623f 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ori-invalid-imm.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ori-invalid-imm.ll @@ -1,3 +1,4 @@ +; RUN: not llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s 2>&1 | FileCheck %s ; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s declare <32 x i8> @llvm.loongarch.lasx.xvori.b(<32 x i8>, i32) diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ori-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ori-non-imm.ll index ae6571d98f4a..49296350000b 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ori-non-imm.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ori-non-imm.ll @@ -1,3 +1,4 @@ +; RUN: not llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s 2>&1 | FileCheck %s ; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s declare <32 x i8> @llvm.loongarch.lasx.xvori.b(<32 x i8>, i32) diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ori.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ori.ll index 8e53d88bac37..96eaf0140a41 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ori.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ori.ll @@ -1,4 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s ; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s declare <32 x i8> @llvm.loongarch.lasx.xvori.b(<32 x i8>, i32) diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-orn.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-orn.ll index 3a335cdd3716..ea2c69bbb1c8 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-orn.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-orn.ll @@ -1,4 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s ; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s declare <32 x i8> @llvm.loongarch.lasx.xvorn.v(<32 x i8>, <32 x i8>) diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-pack.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-pack.ll index 512b30234917..89ca5d4883c2 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-pack.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-pack.ll @@ -1,4 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s ; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s declare <32 x i8> @llvm.loongarch.lasx.xvpackev.b(<32 x i8>, <32 x i8>) diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-pcnt.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-pcnt.ll index d77f1d2082c8..80c8f0a19c8e 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-pcnt.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-pcnt.ll @@ -1,4 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s ; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s declare <32 x i8> @llvm.loongarch.lasx.xvpcnt.b(<32 x i8>) diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-perm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-perm.ll index 4ec434edd4ec..086b250324b1 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-perm.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-perm.ll @@ -1,4 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s ; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s declare <8 x i32> @llvm.loongarch.lasx.xvperm.w(<8 x i32>, <8 x i32>) diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-permi-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-permi-invalid-imm.ll index 41f4856bd8f7..78f876cf4661 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-permi-invalid-imm.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-permi-invalid-imm.ll @@ -1,3 +1,4 @@ +; RUN: not llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s 2>&1 | FileCheck %s ; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s declare <8 x i32> @llvm.loongarch.lasx.xvpermi.w(<8 x i32>, <8 x i32>, i32) diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-permi-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-permi-non-imm.ll index afb335c5d6ca..50723bb1960e 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-permi-non-imm.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-permi-non-imm.ll @@ -1,3 +1,4 @@ +; RUN: not llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s 2>&1 | FileCheck %s ; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s declare <8 x i32> @llvm.loongarch.lasx.xvpermi.w(<8 x i32>, <8 x i32>, i32) diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-permi.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-permi.ll index 0d9f9daabc44..73797899431b 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-permi.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-permi.ll @@ -1,4 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s ; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s declare <8 x i32> @llvm.loongarch.lasx.xvpermi.w(<8 x i32>, <8 x i32>, i32) diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-pick.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-pick.ll index bbd6d693ca0b..cc3c4bb3ed38 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-pick.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-pick.ll @@ -1,4 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s ; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s declare <32 x i8> @llvm.loongarch.lasx.xvpickev.b(<32 x i8>, <32 x i8>) diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-pickve-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-pickve-invalid-imm.ll index cfc6ec42874e..f113ee2c3e9a 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-pickve-invalid-imm.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-pickve-invalid-imm.ll @@ -1,3 +1,4 @@ +; RUN: not llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s 2>&1 | FileCheck %s ; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s declare <8 x i32> @llvm.loongarch.lasx.xvpickve.w(<8 x i32>, i32) diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-pickve-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-pickve-non-imm.ll index be1f19a89737..eae84ac93d7e 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-pickve-non-imm.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-pickve-non-imm.ll @@ -1,3 +1,4 @@ +; RUN: not llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s 2>&1 | FileCheck %s ; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s declare <8 x i32> @llvm.loongarch.lasx.xvpickve.w(<8 x i32>, i32) diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-pickve.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-pickve.ll index 546777bc72ab..a4f0bad28d41 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-pickve.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-pickve.ll @@ -1,4 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s ; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s declare <8 x i32> @llvm.loongarch.lasx.xvpickve.w(<8 x i32>, i32) diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-pickve2gr-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-pickve2gr-non-imm.ll index 0fa8c94adc60..f3d6e3149187 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-pickve2gr-non-imm.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-pickve2gr-non-imm.ll @@ -1,3 +1,4 @@ +; RUN: not llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s 2>&1 | FileCheck %s ; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s declare i32 @llvm.loongarch.lasx.xvpickve2gr.w(<8 x i32>, i32) diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-repl128vei-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-repl128vei-invalid-imm.ll index a0cb309c54e1..4a20a21baf4c 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-repl128vei-invalid-imm.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-repl128vei-invalid-imm.ll @@ -1,3 +1,4 @@ +; RUN: not llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s 2>&1 | FileCheck %s ; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s declare <32 x i8> @llvm.loongarch.lasx.xvrepl128vei.b(<32 x i8>, i32) diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-repl128vei-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-repl128vei-non-imm.ll index c537ffa66ba7..fd99f3b52f65 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-repl128vei-non-imm.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-repl128vei-non-imm.ll @@ -1,3 +1,4 @@ +; RUN: not llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s 2>&1 | FileCheck %s ; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s declare <32 x i8> @llvm.loongarch.lasx.xvrepl128vei.b(<32 x i8>, i32) diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-repl128vei.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-repl128vei.ll index 25fab44f461f..500ab529b725 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-repl128vei.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-repl128vei.ll @@ -1,4 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s ; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s declare <32 x i8> @llvm.loongarch.lasx.xvrepl128vei.b(<32 x i8>, i32) diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-replve.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-replve.ll index 21d36ff7bb5e..b95c5323d973 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-replve.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-replve.ll @@ -1,4 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s ; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s declare <32 x i8> @llvm.loongarch.lasx.xvreplve.b(<32 x i8>, i32) diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-replve0.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-replve0.ll index 7996bb36ef03..c70fb4e01e91 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-replve0.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-replve0.ll @@ -1,4 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s ; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s declare <32 x i8> @llvm.loongarch.lasx.xvreplve0.b(<32 x i8>) diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-rotr-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-rotr-invalid-imm.ll index 40abdf497605..1f7434c77594 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-rotr-invalid-imm.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-rotr-invalid-imm.ll @@ -1,3 +1,4 @@ +; RUN: not llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s 2>&1 | FileCheck %s ; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s declare <32 x i8> @llvm.loongarch.lasx.xvrotri.b(<32 x i8>, i32) diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-rotr-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-rotr-non-imm.ll index dd38301d0534..69ed350f3480 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-rotr-non-imm.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-rotr-non-imm.ll @@ -1,3 +1,4 @@ +; RUN: not llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s 2>&1 | FileCheck %s ; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s declare <32 x i8> @llvm.loongarch.lasx.xvrotri.b(<32 x i8>, i32) diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-rotr.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-rotr.ll index 64d2773864e9..c65f8d073757 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-rotr.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-rotr.ll @@ -1,4 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s ; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s declare <32 x i8> @llvm.loongarch.lasx.xvrotr.b(<32 x i8>, <32 x i8>) diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sadd.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sadd.ll index 54a5e2e9c833..bec705422d19 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sadd.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sadd.ll @@ -1,4 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s ; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s declare <32 x i8> @llvm.loongarch.lasx.xvsadd.b(<32 x i8>, <32 x i8>) diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sat-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sat-invalid-imm.ll index 839fbc9990d3..e9d9e9f5d18a 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sat-invalid-imm.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sat-invalid-imm.ll @@ -1,3 +1,4 @@ +; RUN: not llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s 2>&1 | FileCheck %s ; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s declare <32 x i8> @llvm.loongarch.lasx.xvsat.b(<32 x i8>, i32) diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sat-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sat-non-imm.ll index b73b32ebd3b0..9d9027465340 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sat-non-imm.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sat-non-imm.ll @@ -1,3 +1,4 @@ +; RUN: not llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s 2>&1 | FileCheck %s ; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s declare <32 x i8> @llvm.loongarch.lasx.xvsat.b(<32 x i8>, i32) diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sat.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sat.ll index 293b9dc9eb4d..77fc912e78f9 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sat.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sat.ll @@ -1,4 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s ; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s declare <32 x i8> @llvm.loongarch.lasx.xvsat.b(<32 x i8>, i32) diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-seq-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-seq-invalid-imm.ll index bb6ef0cc6574..39ef86cc36b4 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-seq-invalid-imm.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-seq-invalid-imm.ll @@ -1,3 +1,4 @@ +; RUN: not llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s 2>&1 | FileCheck %s ; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s declare <32 x i8> @llvm.loongarch.lasx.xvseqi.b(<32 x i8>, i32) diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-seq-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-seq-non-imm.ll index fb2c6206da7b..24cf5c0b940a 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-seq-non-imm.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-seq-non-imm.ll @@ -1,3 +1,4 @@ +; RUN: not llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s 2>&1 | FileCheck %s ; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s declare <32 x i8> @llvm.loongarch.lasx.xvseqi.b(<32 x i8>, i32) diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-seq.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-seq.ll index 83bc93c88c73..d4e48e8575a6 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-seq.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-seq.ll @@ -1,4 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s ; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s declare <32 x i8> @llvm.loongarch.lasx.xvseq.b(<32 x i8>, <32 x i8>) diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-shuf.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-shuf.ll index 9b9140f6ad62..35fab17aa611 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-shuf.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-shuf.ll @@ -1,4 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s ; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s declare <32 x i8> @llvm.loongarch.lasx.xvshuf.b(<32 x i8>, <32 x i8>, <32 x i8>) diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-shuf4i-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-shuf4i-invalid-imm.ll index 9217d1f6a05d..65660dbbe780 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-shuf4i-invalid-imm.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-shuf4i-invalid-imm.ll @@ -1,3 +1,4 @@ +; RUN: not llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s 2>&1 | FileCheck %s ; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s declare <32 x i8> @llvm.loongarch.lasx.xvshuf4i.b(<32 x i8>, i32) diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-shuf4i-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-shuf4i-non-imm.ll index 8d6d1c694193..0f2652472a64 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-shuf4i-non-imm.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-shuf4i-non-imm.ll @@ -1,3 +1,4 @@ +; RUN: not llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s 2>&1 | FileCheck %s ; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s declare <32 x i8> @llvm.loongarch.lasx.xvshuf4i.b(<32 x i8>, i32) diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-shuf4i.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-shuf4i.ll index 31205086759c..7e0e6a8a2aa9 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-shuf4i.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-shuf4i.ll @@ -1,4 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s ; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s declare <32 x i8> @llvm.loongarch.lasx.xvshuf4i.b(<32 x i8>, i32) diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-signcov.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-signcov.ll index e6c6d8ccd0d3..2ee27d612888 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-signcov.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-signcov.ll @@ -1,4 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s ; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s declare <32 x i8> @llvm.loongarch.lasx.xvsigncov.b(<32 x i8>, <32 x i8>) diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sle-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sle-invalid-imm.ll index 5b10aca9801d..791f7b284580 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sle-invalid-imm.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sle-invalid-imm.ll @@ -1,3 +1,4 @@ +; RUN: not llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s 2>&1 | FileCheck %s ; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s declare <32 x i8> @llvm.loongarch.lasx.xvslei.b(<32 x i8>, i32) diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sle-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sle-non-imm.ll index 903bc10d88b7..6a3c6efe1e60 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sle-non-imm.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sle-non-imm.ll @@ -1,3 +1,4 @@ +; RUN: not llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s 2>&1 | FileCheck %s ; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s declare <32 x i8> @llvm.loongarch.lasx.xvslei.b(<32 x i8>, i32) diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sle.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sle.ll index 8895efc84b84..fa3e3389ed8e 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sle.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sle.ll @@ -1,4 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s ; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s declare <32 x i8> @llvm.loongarch.lasx.xvsle.b(<32 x i8>, <32 x i8>) diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sll-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sll-invalid-imm.ll index bf8205376a6c..2514788b1dff 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sll-invalid-imm.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sll-invalid-imm.ll @@ -1,3 +1,4 @@ +; RUN: not llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s 2>&1 | FileCheck %s ; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s declare <32 x i8> @llvm.loongarch.lasx.xvslli.b(<32 x i8>, i32) diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sll-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sll-non-imm.ll index b5368a86b5c3..0cc5f7962199 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sll-non-imm.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sll-non-imm.ll @@ -1,3 +1,4 @@ +; RUN: not llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s 2>&1 | FileCheck %s ; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s declare <32 x i8> @llvm.loongarch.lasx.xvslli.b(<32 x i8>, i32) diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sll.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sll.ll index 14110b613dbe..228facde7b96 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sll.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sll.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s --check-prefixes=CHECK,LA32 +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s --check-prefixes=CHECK,LA64 declare <32 x i8> @llvm.loongarch.lasx.xvsll.b(<32 x i8>, <32 x i8>) @@ -40,10 +41,17 @@ entry: declare <4 x i64> @llvm.loongarch.lasx.xvsll.d(<4 x i64>, <4 x i64>) define <4 x i64> @lasx_xvsll_d(<4 x i64> %va, <4 x i64> %vb) nounwind { -; CHECK-LABEL: lasx_xvsll_d: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: xvsll.d $xr0, $xr0, $xr1 -; CHECK-NEXT: ret +; LA32-LABEL: lasx_xvsll_d: +; LA32: # %bb.0: # %entry +; LA32-NEXT: xvrepli.d $xr2, 63 +; LA32-NEXT: xvand.v $xr1, $xr1, $xr2 +; LA32-NEXT: xvsll.d $xr0, $xr0, $xr1 +; LA32-NEXT: ret +; +; LA64-LABEL: lasx_xvsll_d: +; LA64: # %bb.0: # %entry +; LA64-NEXT: xvsll.d $xr0, $xr0, $xr1 +; LA64-NEXT: ret entry: %res = call <4 x i64> @llvm.loongarch.lasx.xvsll.d(<4 x i64> %va, <4 x i64> %vb) ret <4 x i64> %res diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sllwil-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sllwil-invalid-imm.ll index 18803767d6c0..28df3fcc37ec 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sllwil-invalid-imm.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sllwil-invalid-imm.ll @@ -1,3 +1,4 @@ +; RUN: not llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s 2>&1 | FileCheck %s ; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s declare <16 x i16> @llvm.loongarch.lasx.xvsllwil.h.b(<32 x i8>, i32) diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sllwil-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sllwil-non-imm.ll index 3f5d4d631671..49e1a1d4108c 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sllwil-non-imm.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sllwil-non-imm.ll @@ -1,3 +1,4 @@ +; RUN: not llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s 2>&1 | FileCheck %s ; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s declare <16 x i16> @llvm.loongarch.lasx.xvsllwil.h.b(<32 x i8>, i32) diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sllwil.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sllwil.ll index a72b8a6cbb4f..9cd6312d07ef 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sllwil.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sllwil.ll @@ -1,4 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s ; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s declare <16 x i16> @llvm.loongarch.lasx.xvsllwil.h.b(<32 x i8>, i32) diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-slt-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-slt-invalid-imm.ll index dc0567da4e47..c92c006672aa 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-slt-invalid-imm.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-slt-invalid-imm.ll @@ -1,3 +1,4 @@ +; RUN: not llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s 2>&1 | FileCheck %s ; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s declare <32 x i8> @llvm.loongarch.lasx.xvslti.b(<32 x i8>, i32) diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-slt-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-slt-non-imm.ll index a2cedc8d3ef3..b129cb00caf6 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-slt-non-imm.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-slt-non-imm.ll @@ -1,3 +1,4 @@ +; RUN: not llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s 2>&1 | FileCheck %s ; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s declare <32 x i8> @llvm.loongarch.lasx.xvslti.b(<32 x i8>, i32) diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-slt.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-slt.ll index 3ea87adff110..94e587288c13 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-slt.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-slt.ll @@ -1,4 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s ; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s declare <32 x i8> @llvm.loongarch.lasx.xvslt.b(<32 x i8>, <32 x i8>) diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sra-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sra-invalid-imm.ll index 15b522d5e7e3..004f0ffb277c 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sra-invalid-imm.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sra-invalid-imm.ll @@ -1,3 +1,4 @@ +; RUN: not llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s 2>&1 | FileCheck %s ; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s declare <32 x i8> @llvm.loongarch.lasx.xvsrai.b(<32 x i8>, i32) diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sra-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sra-non-imm.ll index fefee7246ae6..1438becbea98 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sra-non-imm.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sra-non-imm.ll @@ -1,3 +1,4 @@ +; RUN: not llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s 2>&1 | FileCheck %s ; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s declare <32 x i8> @llvm.loongarch.lasx.xvsrai.b(<32 x i8>, i32) diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sra.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sra.ll index a7498682559b..12bc60695304 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sra.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sra.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s --check-prefixes=CHECK,LA32 +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s --check-prefixes=CHECK,LA64 declare <32 x i8> @llvm.loongarch.lasx.xvsra.b(<32 x i8>, <32 x i8>) @@ -40,10 +41,17 @@ entry: declare <4 x i64> @llvm.loongarch.lasx.xvsra.d(<4 x i64>, <4 x i64>) define <4 x i64> @lasx_xvsra_d(<4 x i64> %va, <4 x i64> %vb) nounwind { -; CHECK-LABEL: lasx_xvsra_d: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: xvsra.d $xr0, $xr0, $xr1 -; CHECK-NEXT: ret +; LA32-LABEL: lasx_xvsra_d: +; LA32: # %bb.0: # %entry +; LA32-NEXT: xvrepli.d $xr2, 63 +; LA32-NEXT: xvand.v $xr1, $xr1, $xr2 +; LA32-NEXT: xvsra.d $xr0, $xr0, $xr1 +; LA32-NEXT: ret +; +; LA64-LABEL: lasx_xvsra_d: +; LA64: # %bb.0: # %entry +; LA64-NEXT: xvsra.d $xr0, $xr0, $xr1 +; LA64-NEXT: ret entry: %res = call <4 x i64> @llvm.loongarch.lasx.xvsra.d(<4 x i64> %va, <4 x i64> %vb) ret <4 x i64> %res diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sran.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sran.ll index f59ae4c19662..9690b9b960d3 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sran.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sran.ll @@ -1,4 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s ; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s declare <32 x i8> @llvm.loongarch.lasx.xvsran.b.h(<16 x i16>, <16 x i16>) diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srani-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srani-invalid-imm.ll index bedbfc4889d2..c4592d628e57 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srani-invalid-imm.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srani-invalid-imm.ll @@ -1,3 +1,4 @@ +; RUN: not llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s 2>&1 | FileCheck %s ; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s declare <32 x i8> @llvm.loongarch.lasx.xvsrani.b.h(<32 x i8>, <32 x i8>, i32) diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srani-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srani-non-imm.ll index 3c17f2b6090a..2ac5cd9d6399 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srani-non-imm.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srani-non-imm.ll @@ -1,3 +1,4 @@ +; RUN: not llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s 2>&1 | FileCheck %s ; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s declare <32 x i8> @llvm.loongarch.lasx.xvsrani.b.h(<32 x i8>, <32 x i8>, i32) diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srani.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srani.ll index 91fb90da9c52..dc017da9edda 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srani.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srani.ll @@ -1,4 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s ; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s declare <32 x i8> @llvm.loongarch.lasx.xvsrani.b.h(<32 x i8>, <32 x i8>, i32) diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srar-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srar-invalid-imm.ll index e417e3cc5bbf..49f8505746eb 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srar-invalid-imm.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srar-invalid-imm.ll @@ -1,3 +1,4 @@ +; RUN: not llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s 2>&1 | FileCheck %s ; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s declare <32 x i8> @llvm.loongarch.lasx.xvsrari.b(<32 x i8>, i32) diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srar-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srar-non-imm.ll index 15fed7966f1c..430187b11b74 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srar-non-imm.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srar-non-imm.ll @@ -1,3 +1,4 @@ +; RUN: not llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s 2>&1 | FileCheck %s ; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s declare <32 x i8> @llvm.loongarch.lasx.xvsrari.b(<32 x i8>, i32) diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srar.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srar.ll index e2c160557c4d..fcba1aa005e2 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srar.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srar.ll @@ -1,4 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s ; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s declare <32 x i8> @llvm.loongarch.lasx.xvsrar.b(<32 x i8>, <32 x i8>) diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srarn.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srarn.ll index 02dd989773ca..bfb725d13547 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srarn.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srarn.ll @@ -1,4 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s ; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s declare <32 x i8> @llvm.loongarch.lasx.xvsrarn.b.h(<16 x i16>, <16 x i16>) diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srarni-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srarni-invalid-imm.ll index 83e977827e2d..35786248752c 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srarni-invalid-imm.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srarni-invalid-imm.ll @@ -1,3 +1,4 @@ +; RUN: not llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s 2>&1 | FileCheck %s ; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s declare <32 x i8> @llvm.loongarch.lasx.xvsrarni.b.h(<32 x i8>, <32 x i8>, i32) diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srarni-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srarni-non-imm.ll index eb577a29fb33..fb7bcc380aab 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srarni-non-imm.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srarni-non-imm.ll @@ -1,3 +1,4 @@ +; RUN: not llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s 2>&1 | FileCheck %s ; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s declare <32 x i8> @llvm.loongarch.lasx.xvsrarni.b.h(<32 x i8>, <32 x i8>, i32) diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srarni.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srarni.ll index a7d2c3739793..9c3848b39611 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srarni.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srarni.ll @@ -1,4 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s ; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s declare <32 x i8> @llvm.loongarch.lasx.xvsrarni.b.h(<32 x i8>, <32 x i8>, i32) diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srl-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srl-invalid-imm.ll index 3ab02dcb97ed..2824c3944b88 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srl-invalid-imm.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srl-invalid-imm.ll @@ -1,3 +1,4 @@ +; RUN: not llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s 2>&1 | FileCheck %s ; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s declare <32 x i8> @llvm.loongarch.lasx.xvsrli.b(<32 x i8>, i32) diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srl-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srl-non-imm.ll index bc085aeaa232..52a84c8b5bcc 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srl-non-imm.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srl-non-imm.ll @@ -1,3 +1,4 @@ +; RUN: not llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s 2>&1 | FileCheck %s ; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s declare <32 x i8> @llvm.loongarch.lasx.xvsrli.b(<32 x i8>, i32) diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srl.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srl.ll index 7b2992f2ca3b..315ca74ad6dc 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srl.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srl.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s --check-prefixes=CHECK,LA32 +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s --check-prefixes=CHECK,LA64 declare <32 x i8> @llvm.loongarch.lasx.xvsrl.b(<32 x i8>, <32 x i8>) @@ -40,10 +41,17 @@ entry: declare <4 x i64> @llvm.loongarch.lasx.xvsrl.d(<4 x i64>, <4 x i64>) define <4 x i64> @lasx_xvsrl_d(<4 x i64> %va, <4 x i64> %vb) nounwind { -; CHECK-LABEL: lasx_xvsrl_d: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: xvsrl.d $xr0, $xr0, $xr1 -; CHECK-NEXT: ret +; LA32-LABEL: lasx_xvsrl_d: +; LA32: # %bb.0: # %entry +; LA32-NEXT: xvrepli.d $xr2, 63 +; LA32-NEXT: xvand.v $xr1, $xr1, $xr2 +; LA32-NEXT: xvsrl.d $xr0, $xr0, $xr1 +; LA32-NEXT: ret +; +; LA64-LABEL: lasx_xvsrl_d: +; LA64: # %bb.0: # %entry +; LA64-NEXT: xvsrl.d $xr0, $xr0, $xr1 +; LA64-NEXT: ret entry: %res = call <4 x i64> @llvm.loongarch.lasx.xvsrl.d(<4 x i64> %va, <4 x i64> %vb) ret <4 x i64> %res diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srln.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srln.ll index dc5c0e016ea0..f72672ffba5a 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srln.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srln.ll @@ -1,4 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s ; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s declare <32 x i8> @llvm.loongarch.lasx.xvsrln.b.h(<16 x i16>, <16 x i16>) diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srlni-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srlni-invalid-imm.ll index 9e7c94305630..67f9fb70fcaf 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srlni-invalid-imm.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srlni-invalid-imm.ll @@ -1,3 +1,4 @@ +; RUN: not llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s 2>&1 | FileCheck %s ; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s declare <32 x i8> @llvm.loongarch.lasx.xvsrlni.b.h(<32 x i8>, <32 x i8>, i32) diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srlni-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srlni-non-imm.ll index 66d800470003..58b2182e929b 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srlni-non-imm.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srlni-non-imm.ll @@ -1,3 +1,4 @@ +; RUN: not llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s 2>&1 | FileCheck %s ; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s declare <32 x i8> @llvm.loongarch.lasx.xvsrlni.b.h(<32 x i8>, <32 x i8>, i32) diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srlni.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srlni.ll index 0301ebb195e2..f87b41304f03 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srlni.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srlni.ll @@ -1,4 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s ; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s declare <32 x i8> @llvm.loongarch.lasx.xvsrlni.b.h(<32 x i8>, <32 x i8>, i32) diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srlr-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srlr-invalid-imm.ll index 52621ddc6f49..de78e88c3e48 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srlr-invalid-imm.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srlr-invalid-imm.ll @@ -1,3 +1,4 @@ +; RUN: not llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s 2>&1 | FileCheck %s ; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s declare <32 x i8> @llvm.loongarch.lasx.xvsrlri.b(<32 x i8>, i32) diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srlr-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srlr-non-imm.ll index 5663e3475b12..e088643d97b3 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srlr-non-imm.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srlr-non-imm.ll @@ -1,3 +1,4 @@ +; RUN: not llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s 2>&1 | FileCheck %s ; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s declare <32 x i8> @llvm.loongarch.lasx.xvsrlri.b(<32 x i8>, i32) diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srlr.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srlr.ll index e04504158e27..0a57b07b38f6 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srlr.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srlr.ll @@ -1,4 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s ; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s declare <32 x i8> @llvm.loongarch.lasx.xvsrlr.b(<32 x i8>, <32 x i8>) diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srlrn.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srlrn.ll index 1e7df379c6e1..546da1030698 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srlrn.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srlrn.ll @@ -1,4 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s ; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s declare <32 x i8> @llvm.loongarch.lasx.xvsrlrn.b.h(<16 x i16>, <16 x i16>) diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srlrni-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srlrni-invalid-imm.ll index 2d65a75b175a..eafdbf5b2bde 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srlrni-invalid-imm.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srlrni-invalid-imm.ll @@ -1,3 +1,4 @@ +; RUN: not llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s 2>&1 | FileCheck %s ; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s declare <32 x i8> @llvm.loongarch.lasx.xvsrlrni.b.h(<32 x i8>, <32 x i8>, i32) diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srlrni-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srlrni-non-imm.ll index 82da0d21d013..8a401ef8a992 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srlrni-non-imm.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srlrni-non-imm.ll @@ -1,3 +1,4 @@ +; RUN: not llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s 2>&1 | FileCheck %s ; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s declare <32 x i8> @llvm.loongarch.lasx.xvsrlrni.b.h(<32 x i8>, <32 x i8>, i32) diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srlrni.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srlrni.ll index 56dbafe8b1ac..2851d4715473 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srlrni.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srlrni.ll @@ -1,4 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s ; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s declare <32 x i8> @llvm.loongarch.lasx.xvsrlrni.b.h(<32 x i8>, <32 x i8>, i32) diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssran.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssran.ll index da1857dad145..29640da6f7c6 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssran.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssran.ll @@ -1,4 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s ; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s declare <32 x i8> @llvm.loongarch.lasx.xvssran.b.h(<16 x i16>, <16 x i16>) diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrani-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrani-invalid-imm.ll index e10d5d7bd488..cd3b3baa9a58 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrani-invalid-imm.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrani-invalid-imm.ll @@ -1,3 +1,4 @@ +; RUN: not llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s 2>&1 | FileCheck %s ; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s declare <32 x i8> @llvm.loongarch.lasx.xvssrani.b.h(<32 x i8>, <32 x i8>, i32) diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrani-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrani-non-imm.ll index a928cc2de8c8..3d912502e0cc 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrani-non-imm.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrani-non-imm.ll @@ -1,3 +1,4 @@ +; RUN: not llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s 2>&1 | FileCheck %s ; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s declare <32 x i8> @llvm.loongarch.lasx.xvssrani.b.h(<32 x i8>, <32 x i8>, i32) diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrani.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrani.ll index 9efa659b4a1e..7461f4c71c01 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrani.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrani.ll @@ -1,4 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s ; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s declare <32 x i8> @llvm.loongarch.lasx.xvssrani.b.h(<32 x i8>, <32 x i8>, i32) diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrarn.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrarn.ll index b5d59ff06f4d..9502b29b5d67 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrarn.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrarn.ll @@ -1,4 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s ; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s declare <32 x i8> @llvm.loongarch.lasx.xvssrarn.b.h(<16 x i16>, <16 x i16>) diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrarni-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrarni-invalid-imm.ll index 42cd6ac99754..5d0153fd4c8d 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrarni-invalid-imm.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrarni-invalid-imm.ll @@ -1,3 +1,4 @@ +; RUN: not llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s 2>&1 | FileCheck %s ; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s declare <32 x i8> @llvm.loongarch.lasx.xvssrarni.b.h(<32 x i8>, <32 x i8>, i32) diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrarni-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrarni-non-imm.ll index f050e7d79b0f..d7ba76e878df 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrarni-non-imm.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrarni-non-imm.ll @@ -1,3 +1,4 @@ +; RUN: not llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s 2>&1 | FileCheck %s ; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s declare <32 x i8> @llvm.loongarch.lasx.xvssrarni.b.h(<32 x i8>, <32 x i8>, i32) diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrarni.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrarni.ll index da411dad645b..90abf2fab74d 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrarni.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrarni.ll @@ -1,4 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s ; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s declare <32 x i8> @llvm.loongarch.lasx.xvssrarni.b.h(<32 x i8>, <32 x i8>, i32) diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrln.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrln.ll index c60b5bdf81a0..1352baecd12a 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrln.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrln.ll @@ -1,4 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s ; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s declare <32 x i8> @llvm.loongarch.lasx.xvssrln.b.h(<16 x i16>, <16 x i16>) diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrlni-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrlni-invalid-imm.ll index 26be21a83aa4..fc41df8c6fbf 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrlni-invalid-imm.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrlni-invalid-imm.ll @@ -1,3 +1,4 @@ +; RUN: not llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s 2>&1 | FileCheck %s ; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s declare <32 x i8> @llvm.loongarch.lasx.xvssrlni.b.h(<32 x i8>, <32 x i8>, i32) diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrlni-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrlni-non-imm.ll index 72da2a746dd5..99b0d4561792 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrlni-non-imm.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrlni-non-imm.ll @@ -1,3 +1,4 @@ +; RUN: not llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s 2>&1 | FileCheck %s ; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s declare <32 x i8> @llvm.loongarch.lasx.xvssrlni.b.h(<32 x i8>, <32 x i8>, i32) diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrlni.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrlni.ll index e57dd426bde8..1a49761dedae 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrlni.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrlni.ll @@ -1,4 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s ; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s declare <32 x i8> @llvm.loongarch.lasx.xvssrlni.b.h(<32 x i8>, <32 x i8>, i32) diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrlrn.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrlrn.ll index 774cf1bd5e84..d89511ba180e 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrlrn.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrlrn.ll @@ -1,4 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s ; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s declare <32 x i8> @llvm.loongarch.lasx.xvssrlrn.b.h(<16 x i16>, <16 x i16>) diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrlrni-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrlrni-invalid-imm.ll index cd778e2c0627..36d19b5ce9f4 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrlrni-invalid-imm.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrlrni-invalid-imm.ll @@ -1,3 +1,4 @@ +; RUN: not llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s 2>&1 | FileCheck %s ; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s declare <32 x i8> @llvm.loongarch.lasx.xvssrlrni.b.h(<32 x i8>, <32 x i8>, i32) diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrlrni-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrlrni-non-imm.ll index a10c54329149..69f5baa8536b 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrlrni-non-imm.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrlrni-non-imm.ll @@ -1,3 +1,4 @@ +; RUN: not llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s 2>&1 | FileCheck %s ; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s declare <32 x i8> @llvm.loongarch.lasx.xvssrlrni.b.h(<32 x i8>, <32 x i8>, i32) diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrlrni.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrlrni.ll index 9a80516d8d78..b3119d6b27bb 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrlrni.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrlrni.ll @@ -1,4 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s ; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s declare <32 x i8> @llvm.loongarch.lasx.xvssrlrni.b.h(<32 x i8>, <32 x i8>, i32) diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssub.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssub.ll index cd3ccd9f5262..e600eac3f002 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssub.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssub.ll @@ -1,4 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s ; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s declare <32 x i8> @llvm.loongarch.lasx.xvssub.b(<32 x i8>, <32 x i8>) diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-st-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-st-invalid-imm.ll index 6108ae1883da..d659f1d95865 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-st-invalid-imm.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-st-invalid-imm.ll @@ -1,3 +1,4 @@ +; RUN: not llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s 2>&1 | FileCheck %s ; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s declare void @llvm.loongarch.lasx.xvst(<32 x i8>, ptr, i32) diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-st-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-st-non-imm.ll index 969fb5765dd8..0882834b23a3 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-st-non-imm.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-st-non-imm.ll @@ -1,3 +1,4 @@ +; RUN: not llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s 2>&1 | FileCheck %s ; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s declare void @llvm.loongarch.lasx.xvst(<32 x i8>, ptr, i32) diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-stelm-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-stelm-invalid-imm.ll index 4593de13fbff..a1de4f49b580 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-stelm-invalid-imm.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-stelm-invalid-imm.ll @@ -1,3 +1,4 @@ +; RUN: not llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s 2>&1 | FileCheck %s ; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s declare void @llvm.loongarch.lasx.xvstelm.b(<32 x i8>, ptr, i32, i32) diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-stelm-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-stelm-non-imm.ll index faa7d501eb74..d970689e63d4 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-stelm-non-imm.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-stelm-non-imm.ll @@ -1,3 +1,4 @@ +; RUN: not llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s 2>&1 | FileCheck %s ; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s declare void @llvm.loongarch.lasx.xvstelm.b(<32 x i8>, ptr, i32, i32) diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-stelm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-stelm.ll index 34d1866e9d5e..87c5946270ef 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-stelm.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-stelm.ll @@ -1,4 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s ; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s declare void @llvm.loongarch.lasx.xvstelm.b(<32 x i8>, ptr, i32, i32) diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sub.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sub.ll index 4d69dd83dcde..f8778adb458e 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sub.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sub.ll @@ -1,4 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s ; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s declare <32 x i8> @llvm.loongarch.lasx.xvsub.b(<32 x i8>, <32 x i8>) diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-subi-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-subi-invalid-imm.ll index 810008c17f7e..8e24ec98d0c5 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-subi-invalid-imm.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-subi-invalid-imm.ll @@ -1,3 +1,4 @@ +; RUN: not llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s 2>&1 | FileCheck %s ; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s declare <32 x i8> @llvm.loongarch.lasx.xvsubi.bu(<32 x i8>, i32) diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-subi-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-subi-non-imm.ll index 924b89ce9d6c..73ff9268230d 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-subi-non-imm.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-subi-non-imm.ll @@ -1,3 +1,4 @@ +; RUN: not llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s 2>&1 | FileCheck %s ; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s declare <32 x i8> @llvm.loongarch.lasx.xvsubi.bu(<32 x i8>, i32) diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-subi.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-subi.ll index cc3235ff4657..1992628aefbc 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-subi.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-subi.ll @@ -1,4 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s ; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s declare <32 x i8> @llvm.loongarch.lasx.xvsubi.bu(<32 x i8>, i32) diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-subw.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-subw.ll index 6f203e894990..afdd7b2e7935 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-subw.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-subw.ll @@ -1,4 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s ; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s declare <16 x i16> @llvm.loongarch.lasx.xvsubwev.h.b(<32 x i8>, <32 x i8>) diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-xor.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-xor.ll index 6395b3d6f2e7..98d180f3560f 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-xor.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-xor.ll @@ -1,4 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s ; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s declare <32 x i8> @llvm.loongarch.lasx.xvxor.v(<32 x i8>, <32 x i8>) diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-xori-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-xori-invalid-imm.ll index 0170d204cf42..d12082b3eafc 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-xori-invalid-imm.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-xori-invalid-imm.ll @@ -1,3 +1,4 @@ +; RUN: not llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s 2>&1 | FileCheck %s ; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s declare <32 x i8> @llvm.loongarch.lasx.xvxori.b(<32 x i8>, i32) diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-xori-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-xori-non-imm.ll index 1478f691a1cc..e1c9574aeafa 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-xori-non-imm.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-xori-non-imm.ll @@ -1,3 +1,4 @@ +; RUN: not llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s 2>&1 | FileCheck %s ; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s declare <32 x i8> @llvm.loongarch.lasx.xvxori.b(<32 x i8>, i32) diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-xori.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-xori.ll index c71d7e731165..8cc53991fb6e 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-xori.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-xori.ll @@ -1,4 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s ; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s declare <32 x i8> @llvm.loongarch.lasx.xvxori.b(<32 x i8>, i32) diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/absd.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/absd.ll index c5df9f842083..20934480dce9 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/absd.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/absd.ll @@ -1,4 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s ; RUN: llc -mtriple=loongarch64 -mattr=+lasx < %s | FileCheck %s ;; 1. trunc(abs(sub(sext(a),sext(b)))) -> abds(a,b) or abdu(a,b) diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/add.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/add.ll index 136f34bafb32..030f75b775ad 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/add.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/add.ll @@ -1,4 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s ; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s define void @add_v32i8(ptr %res, ptr %a0, ptr %a1) nounwind { diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/and.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/and.ll index b06d1bea4ef6..a2df7aa1ae6d 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/and.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/and.ll @@ -1,4 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3 +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s ; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s define void @and_v32i8(ptr %res, ptr %a0, ptr %a1) nounwind { diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/ashr.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/ashr.ll index 4dd2cee7a2ed..edcbe1583945 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/ashr.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/ashr.ll @@ -1,4 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s ; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s define void @ashr_v32i8(ptr %res, ptr %a0, ptr %a1) nounwind { diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/bitcast-extract-element.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/bitcast-extract-element.ll index 09ce1a04d6c9..073fc54f5654 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/bitcast-extract-element.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/bitcast-extract-element.ll @@ -1,11 +1,18 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 -; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s --check-prefix=LA32 +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s --check-prefix=LA64 define i32 @bitcast_extract_v8f32(<8 x float> %a) nounwind { -; CHECK-LABEL: bitcast_extract_v8f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: xvpickve2gr.w $a0, $xr0, 7 -; CHECK-NEXT: ret +; LA32-LABEL: bitcast_extract_v8f32: +; LA32: # %bb.0: # %entry +; LA32-NEXT: xvpickve.w $xr0, $xr0, 7 +; LA32-NEXT: movfr2gr.s $a0, $fa0 +; LA32-NEXT: ret +; +; LA64-LABEL: bitcast_extract_v8f32: +; LA64: # %bb.0: # %entry +; LA64-NEXT: xvpickve2gr.w $a0, $xr0, 7 +; LA64-NEXT: ret entry: %b = extractelement <8 x float> %a, i32 7 %c = bitcast float %b to i32 @@ -13,10 +20,17 @@ entry: } define i64 @bitcast_extract_v4f64(<4 x double> %a) nounwind { -; CHECK-LABEL: bitcast_extract_v4f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: xvpickve2gr.d $a0, $xr0, 3 -; CHECK-NEXT: ret +; LA32-LABEL: bitcast_extract_v4f64: +; LA32: # %bb.0: # %entry +; LA32-NEXT: xvpickve.d $xr0, $xr0, 3 +; LA32-NEXT: movfr2gr.s $a0, $fa0 +; LA32-NEXT: movfrh2gr.s $a1, $fa0 +; LA32-NEXT: ret +; +; LA64-LABEL: bitcast_extract_v4f64: +; LA64: # %bb.0: # %entry +; LA64-NEXT: xvpickve2gr.d $a0, $xr0, 3 +; LA64-NEXT: ret entry: %b = extractelement <4 x double> %a, i32 3 %c = bitcast double %b to i64 diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/extractelement.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/extractelement.ll index 2e1618748688..dddee35fb9e7 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/extractelement.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/extractelement.ll @@ -76,21 +76,11 @@ define void @extract_4xdouble(ptr %src, ptr %dst) nounwind { define void @extract_32xi8_idx(ptr %src, ptr %dst, i32 %idx) nounwind { ; CHECK-LABEL: extract_32xi8_idx: ; CHECK: # %bb.0: -; CHECK-NEXT: addi.d $sp, $sp, -96 -; CHECK-NEXT: st.d $ra, $sp, 88 # 8-byte Folded Spill -; CHECK-NEXT: st.d $fp, $sp, 80 # 8-byte Folded Spill -; CHECK-NEXT: addi.d $fp, $sp, 96 -; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0 ; CHECK-NEXT: xvld $xr0, $a0, 0 -; CHECK-NEXT: xvst $xr0, $sp, 32 -; CHECK-NEXT: addi.d $a0, $sp, 32 -; CHECK-NEXT: bstrins.d $a0, $a2, 4, 0 -; CHECK-NEXT: ld.b $a0, $a0, 0 -; CHECK-NEXT: st.b $a0, $a1, 0 -; CHECK-NEXT: addi.d $sp, $fp, -96 -; CHECK-NEXT: ld.d $fp, $sp, 80 # 8-byte Folded Reload -; CHECK-NEXT: ld.d $ra, $sp, 88 # 8-byte Folded Reload -; CHECK-NEXT: addi.d $sp, $sp, 96 +; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1 +; CHECK-NEXT: movgr2fr.w $fa2, $a2 +; CHECK-NEXT: xvshuf.b $xr0, $xr1, $xr0, $xr2 +; CHECK-NEXT: xvstelm.b $xr0, $a1, 0, 0 ; CHECK-NEXT: ret %v = load volatile <32 x i8>, ptr %src %e = extractelement <32 x i8> %v, i32 %idx @@ -101,21 +91,11 @@ define void @extract_32xi8_idx(ptr %src, ptr %dst, i32 %idx) nounwind { define void @extract_16xi16_idx(ptr %src, ptr %dst, i32 %idx) nounwind { ; CHECK-LABEL: extract_16xi16_idx: ; CHECK: # %bb.0: -; CHECK-NEXT: addi.d $sp, $sp, -96 -; CHECK-NEXT: st.d $ra, $sp, 88 # 8-byte Folded Spill -; CHECK-NEXT: st.d $fp, $sp, 80 # 8-byte Folded Spill -; CHECK-NEXT: addi.d $fp, $sp, 96 -; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0 ; CHECK-NEXT: xvld $xr0, $a0, 0 -; CHECK-NEXT: xvst $xr0, $sp, 32 -; CHECK-NEXT: addi.d $a0, $sp, 32 -; CHECK-NEXT: bstrins.d $a0, $a2, 4, 1 -; CHECK-NEXT: ld.h $a0, $a0, 0 -; CHECK-NEXT: st.h $a0, $a1, 0 -; CHECK-NEXT: addi.d $sp, $fp, -96 -; CHECK-NEXT: ld.d $fp, $sp, 80 # 8-byte Folded Reload -; CHECK-NEXT: ld.d $ra, $sp, 88 # 8-byte Folded Reload -; CHECK-NEXT: addi.d $sp, $sp, 96 +; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1 +; CHECK-NEXT: movgr2fr.w $fa2, $a2 +; CHECK-NEXT: xvshuf.h $xr2, $xr1, $xr0 +; CHECK-NEXT: xvstelm.h $xr2, $a1, 0, 0 ; CHECK-NEXT: ret %v = load volatile <16 x i16>, ptr %src %e = extractelement <16 x i16> %v, i32 %idx @@ -126,21 +106,10 @@ define void @extract_16xi16_idx(ptr %src, ptr %dst, i32 %idx) nounwind { define void @extract_8xi32_idx(ptr %src, ptr %dst, i32 %idx) nounwind { ; CHECK-LABEL: extract_8xi32_idx: ; CHECK: # %bb.0: -; CHECK-NEXT: addi.d $sp, $sp, -96 -; CHECK-NEXT: st.d $ra, $sp, 88 # 8-byte Folded Spill -; CHECK-NEXT: st.d $fp, $sp, 80 # 8-byte Folded Spill -; CHECK-NEXT: addi.d $fp, $sp, 96 -; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0 ; CHECK-NEXT: xvld $xr0, $a0, 0 -; CHECK-NEXT: xvst $xr0, $sp, 32 -; CHECK-NEXT: addi.d $a0, $sp, 32 -; CHECK-NEXT: bstrins.d $a0, $a2, 4, 2 -; CHECK-NEXT: ld.w $a0, $a0, 0 -; CHECK-NEXT: st.w $a0, $a1, 0 -; CHECK-NEXT: addi.d $sp, $fp, -96 -; CHECK-NEXT: ld.d $fp, $sp, 80 # 8-byte Folded Reload -; CHECK-NEXT: ld.d $ra, $sp, 88 # 8-byte Folded Reload -; CHECK-NEXT: addi.d $sp, $sp, 96 +; CHECK-NEXT: xvreplgr2vr.w $xr1, $a2 +; CHECK-NEXT: xvperm.w $xr0, $xr0, $xr1 +; CHECK-NEXT: xvstelm.w $xr0, $a1, 0, 0 ; CHECK-NEXT: ret %v = load volatile <8 x i32>, ptr %src %e = extractelement <8 x i32> %v, i32 %idx @@ -151,21 +120,11 @@ define void @extract_8xi32_idx(ptr %src, ptr %dst, i32 %idx) nounwind { define void @extract_4xi64_idx(ptr %src, ptr %dst, i32 %idx) nounwind { ; CHECK-LABEL: extract_4xi64_idx: ; CHECK: # %bb.0: -; CHECK-NEXT: addi.d $sp, $sp, -96 -; CHECK-NEXT: st.d $ra, $sp, 88 # 8-byte Folded Spill -; CHECK-NEXT: st.d $fp, $sp, 80 # 8-byte Folded Spill -; CHECK-NEXT: addi.d $fp, $sp, 96 -; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0 ; CHECK-NEXT: xvld $xr0, $a0, 0 -; CHECK-NEXT: xvst $xr0, $sp, 32 -; CHECK-NEXT: addi.d $a0, $sp, 32 -; CHECK-NEXT: bstrins.d $a0, $a2, 4, 3 -; CHECK-NEXT: ld.d $a0, $a0, 0 -; CHECK-NEXT: st.d $a0, $a1, 0 -; CHECK-NEXT: addi.d $sp, $fp, -96 -; CHECK-NEXT: ld.d $fp, $sp, 80 # 8-byte Folded Reload -; CHECK-NEXT: ld.d $ra, $sp, 88 # 8-byte Folded Reload -; CHECK-NEXT: addi.d $sp, $sp, 96 +; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1 +; CHECK-NEXT: movgr2fr.w $fa2, $a2 +; CHECK-NEXT: xvshuf.d $xr2, $xr1, $xr0 +; CHECK-NEXT: xvstelm.d $xr2, $a1, 0, 0 ; CHECK-NEXT: ret %v = load volatile <4 x i64>, ptr %src %e = extractelement <4 x i64> %v, i32 %idx @@ -176,21 +135,10 @@ define void @extract_4xi64_idx(ptr %src, ptr %dst, i32 %idx) nounwind { define void @extract_8xfloat_idx(ptr %src, ptr %dst, i32 %idx) nounwind { ; CHECK-LABEL: extract_8xfloat_idx: ; CHECK: # %bb.0: -; CHECK-NEXT: addi.d $sp, $sp, -96 -; CHECK-NEXT: st.d $ra, $sp, 88 # 8-byte Folded Spill -; CHECK-NEXT: st.d $fp, $sp, 80 # 8-byte Folded Spill -; CHECK-NEXT: addi.d $fp, $sp, 96 -; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0 ; CHECK-NEXT: xvld $xr0, $a0, 0 -; CHECK-NEXT: xvst $xr0, $sp, 32 -; CHECK-NEXT: addi.d $a0, $sp, 32 -; CHECK-NEXT: bstrins.d $a0, $a2, 4, 2 -; CHECK-NEXT: fld.s $fa0, $a0, 0 -; CHECK-NEXT: fst.s $fa0, $a1, 0 -; CHECK-NEXT: addi.d $sp, $fp, -96 -; CHECK-NEXT: ld.d $fp, $sp, 80 # 8-byte Folded Reload -; CHECK-NEXT: ld.d $ra, $sp, 88 # 8-byte Folded Reload -; CHECK-NEXT: addi.d $sp, $sp, 96 +; CHECK-NEXT: xvreplgr2vr.w $xr1, $a2 +; CHECK-NEXT: xvperm.w $xr0, $xr0, $xr1 +; CHECK-NEXT: xvstelm.w $xr0, $a1, 0, 0 ; CHECK-NEXT: ret %v = load volatile <8 x float>, ptr %src %e = extractelement <8 x float> %v, i32 %idx @@ -201,21 +149,11 @@ define void @extract_8xfloat_idx(ptr %src, ptr %dst, i32 %idx) nounwind { define void @extract_4xdouble_idx(ptr %src, ptr %dst, i32 %idx) nounwind { ; CHECK-LABEL: extract_4xdouble_idx: ; CHECK: # %bb.0: -; CHECK-NEXT: addi.d $sp, $sp, -96 -; CHECK-NEXT: st.d $ra, $sp, 88 # 8-byte Folded Spill -; CHECK-NEXT: st.d $fp, $sp, 80 # 8-byte Folded Spill -; CHECK-NEXT: addi.d $fp, $sp, 96 -; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0 ; CHECK-NEXT: xvld $xr0, $a0, 0 -; CHECK-NEXT: xvst $xr0, $sp, 32 -; CHECK-NEXT: addi.d $a0, $sp, 32 -; CHECK-NEXT: bstrins.d $a0, $a2, 4, 3 -; CHECK-NEXT: fld.d $fa0, $a0, 0 -; CHECK-NEXT: fst.d $fa0, $a1, 0 -; CHECK-NEXT: addi.d $sp, $fp, -96 -; CHECK-NEXT: ld.d $fp, $sp, 80 # 8-byte Folded Reload -; CHECK-NEXT: ld.d $ra, $sp, 88 # 8-byte Folded Reload -; CHECK-NEXT: addi.d $sp, $sp, 96 +; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1 +; CHECK-NEXT: movgr2fr.w $fa2, $a2 +; CHECK-NEXT: xvshuf.d $xr2, $xr1, $xr0 +; CHECK-NEXT: xvstelm.d $xr2, $a1, 0, 0 ; CHECK-NEXT: ret %v = load volatile <4 x double>, ptr %src %e = extractelement <4 x double> %v, i32 %idx diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fadd.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fadd.ll index b3eb328e8d44..b4d5dc10c20c 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fadd.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fadd.ll @@ -1,4 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s ; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s define void @fadd_v8f32(ptr %res, ptr %a0, ptr %a1) nounwind { diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fcmp.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fcmp.ll index 4f56dd29c1b2..be60b7518e39 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fcmp.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fcmp.ll @@ -1,4 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s ; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ;; TREU diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fdiv.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fdiv.ll index 63d8c222ae54..ae6f091ddb49 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fdiv.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fdiv.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 -; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s --check-prefixes=CHECK,LA32 +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s --check-prefixes=CHECK,LA64 define void @fdiv_v8f32(ptr %res, ptr %a0, ptr %a1) nounwind { ; CHECK-LABEL: fdiv_v8f32: @@ -49,12 +50,21 @@ entry: } define void @one_fdiv_v4f64(ptr %res, ptr %a0) nounwind { -; CHECK-LABEL: one_fdiv_v4f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: xvld $xr0, $a1, 0 -; CHECK-NEXT: xvfrecip.d $xr0, $xr0 -; CHECK-NEXT: xvst $xr0, $a0, 0 -; CHECK-NEXT: ret +; LA32-LABEL: one_fdiv_v4f64: +; LA32: # %bb.0: # %entry +; LA32-NEXT: xvld $xr0, $a1, 0 +; LA32-NEXT: pcalau12i $a1, %pc_hi20(.LCPI3_0) +; LA32-NEXT: xvld $xr1, $a1, %pc_lo12(.LCPI3_0) +; LA32-NEXT: xvfdiv.d $xr0, $xr1, $xr0 +; LA32-NEXT: xvst $xr0, $a0, 0 +; LA32-NEXT: ret +; +; LA64-LABEL: one_fdiv_v4f64: +; LA64: # %bb.0: # %entry +; LA64-NEXT: xvld $xr0, $a1, 0 +; LA64-NEXT: xvfrecip.d $xr0, $xr0 +; LA64-NEXT: xvst $xr0, $a0, 0 +; LA64-NEXT: ret entry: %v0 = load <4 x double>, ptr %a0 %div = fdiv <4 x double> <double 1.0, double 1.0, double 1.0, double 1.0>, %v0 diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fix-xvshuf.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fix-xvshuf.ll index 8ee567c2a92f..ffb793dd1301 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fix-xvshuf.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fix-xvshuf.ll @@ -1,4 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s ; RUN: llc --mtriple=loongarch64 --mattr=+lasx %s -o - | FileCheck %s ;; Fix https://github.com/llvm/llvm-project/issues/137000. diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fmul.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fmul.ll index f777151cdb0a..430ab98a5c69 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fmul.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fmul.ll @@ -1,4 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s ; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s define void @fmul_v8f32(ptr %res, ptr %a0, ptr %a1) nounwind { diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fneg.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fneg.ll index 5eb468fc55a0..515403f8362c 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fneg.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fneg.ll @@ -1,4 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s ; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s define void @fneg_v8f32(ptr %res, ptr %a0) nounwind { diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fptosi.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fptosi.ll index ed333c303879..5b63ef3e53a4 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fptosi.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fptosi.ll @@ -1,4 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3 +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s ; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s define void @fptosi_v8f32_v8i32(ptr %res, ptr %in){ diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fptoui.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fptoui.ll index 9c499ba71d64..4c699a0721bf 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fptoui.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fptoui.ll @@ -1,4 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3 +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s ; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s define void @fptoui_v8f32_v8i32(ptr %res, ptr %in){ diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fsub.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fsub.ll index 201ba5f5df66..99074e08c0de 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fsub.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fsub.ll @@ -1,4 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s ; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s define void @fsub_v8f32(ptr %res, ptr %a0, ptr %a1) nounwind { diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/icmp.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/icmp.ll index d15c4133855f..47229fc9a0fc 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/icmp.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/icmp.ll @@ -1,4 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s ; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ;; SETEQ diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/insert-bitcast-element.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/insert-bitcast-element.ll index b37b525981fd..4a9d2579766b 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/insert-bitcast-element.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/insert-bitcast-element.ll @@ -1,11 +1,18 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 -; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s --check-prefix=LA32 +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s --check-prefix=LA64 define <8 x float> @insert_bitcast_v8f32(<8 x float> %a, i32 %b) nounwind { -; CHECK-LABEL: insert_bitcast_v8f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: xvinsgr2vr.w $xr0, $a0, 1 -; CHECK-NEXT: ret +; LA32-LABEL: insert_bitcast_v8f32: +; LA32: # %bb.0: # %entry +; LA32-NEXT: movgr2fr.w $fa1, $a0 +; LA32-NEXT: xvinsve0.w $xr0, $xr1, 1 +; LA32-NEXT: ret +; +; LA64-LABEL: insert_bitcast_v8f32: +; LA64: # %bb.0: # %entry +; LA64-NEXT: xvinsgr2vr.w $xr0, $a0, 1 +; LA64-NEXT: ret entry: %c = bitcast i32 %b to float %d = insertelement <8 x float> %a, float %c, i32 1 @@ -13,10 +20,17 @@ entry: } define <4 x double> @insert_bitcast_v4f64(<4 x double> %a, i64 %b) nounwind { -; CHECK-LABEL: insert_bitcast_v4f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: xvinsgr2vr.d $xr0, $a0, 1 -; CHECK-NEXT: ret +; LA32-LABEL: insert_bitcast_v4f64: +; LA32: # %bb.0: # %entry +; LA32-NEXT: movgr2fr.w $fa1, $a0 +; LA32-NEXT: movgr2frh.w $fa1, $a1 +; LA32-NEXT: xvinsve0.d $xr0, $xr1, 1 +; LA32-NEXT: ret +; +; LA64-LABEL: insert_bitcast_v4f64: +; LA64: # %bb.0: # %entry +; LA64-NEXT: xvinsgr2vr.d $xr0, $a0, 1 +; LA64-NEXT: ret entry: %c = bitcast i64 %b to double %d = insertelement <4 x double> %a, double %c, i32 1 diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/lshr.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/lshr.ll index 5b992b5e38de..4baa6e2bf1a2 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/lshr.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/lshr.ll @@ -1,4 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s ; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s define void @lshr_v32i8(ptr %res, ptr %a0, ptr %a1) nounwind { diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/mul.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/mul.ll index 4745e7003cb1..620d2233c904 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/mul.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/mul.ll @@ -1,4 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s ; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s define void @mul_v32i8(ptr %res, ptr %a0, ptr %a1) nounwind { diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/or.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/or.ll index f32b8897bebc..07ccb88f2bba 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/or.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/or.ll @@ -1,4 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3 +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s ; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s define void @or_v32i8(ptr %res, ptr %a0, ptr %a1) nounwind { diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/sdiv.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/sdiv.ll index 879caa5a6700..2c783e3a5e22 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/sdiv.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/sdiv.ll @@ -1,4 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s ; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s define void @sdiv_v32i8(ptr %res, ptr %a0, ptr %a1) nounwind { diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/shl.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/shl.ll index 56c69171c9d4..3e496eac475c 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/shl.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/shl.ll @@ -1,4 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s ; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s define void @shl_v32i8(ptr %res, ptr %a0, ptr %a1) nounwind { diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/shuffle-as-xvilv.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/shuffle-as-xvilv.ll index 22ab19b9fa44..382c0f551692 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/shuffle-as-xvilv.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/shuffle-as-xvilv.ll @@ -1,4 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s ; RUN: llc --mtriple=loongarch64 --mattr=+lasx %s -o - | FileCheck %s ;; xvilvl.b diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/shuffle-as-xvpack.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/shuffle-as-xvpack.ll index 2ff9af4069b9..c36a87f77bb2 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/shuffle-as-xvpack.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/shuffle-as-xvpack.ll @@ -1,4 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s ; RUN: llc --mtriple=loongarch64 --mattr=+lasx %s -o - | FileCheck %s ;; xvpackev.b diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/shuffle-as-xvpick.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/shuffle-as-xvpick.ll index 294d292d1764..327f1d4f5481 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/shuffle-as-xvpick.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/shuffle-as-xvpick.ll @@ -1,4 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s ; RUN: llc --mtriple=loongarch64 --mattr=+lasx %s -o - | FileCheck %s ;; xvpickev.b diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/sitofp.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/sitofp.ll index 208a758ea4e9..3673f594094b 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/sitofp.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/sitofp.ll @@ -1,4 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3 +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s ; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s define void @sitofp_v8i32_v8f32(ptr %res, ptr %in){ diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/sub.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/sub.ll index 5102abac83d8..bf9f29b578d7 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/sub.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/sub.ll @@ -1,4 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s ; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s define void @sub_v32i8(ptr %res, ptr %a0, ptr %a1) nounwind { diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/udiv.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/udiv.ll index 43f558f3cdf3..c3bd69a8a0bc 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/udiv.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/udiv.ll @@ -1,4 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s ; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s define void @udiv_v32i8(ptr %res, ptr %a0, ptr %a1) nounwind { diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/uitofp.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/uitofp.ll index 70cf71c4cec2..6e417032acff 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/uitofp.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/uitofp.ll @@ -1,4 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3 +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s ; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s define void @uitofp_v8i32_v8f32(ptr %res, ptr %in){ diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/xor.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/xor.ll index e062e10b21d9..5365685d395e 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/xor.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/xor.ll @@ -1,4 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3 +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s ; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s define void @xor_v32i8(ptr %res, ptr %a0, ptr %a1) nounwind { diff --git a/llvm/test/CodeGen/LoongArch/lasx/issue107355.ll b/llvm/test/CodeGen/LoongArch/lasx/issue107355.ll index 506b5c1232f2..249414f5b1bb 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/issue107355.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/issue107355.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 -; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s --check-prefix=LA32 +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s --check-prefix=LA64 ;; Without this patch(codegen for concat_vectors), the test will hang. @g_156 = external global [12 x i32] @@ -7,23 +8,51 @@ @g_813 = external global i32 define void @foo() { -; CHECK-LABEL: foo: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: pcalau12i $a0, %got_pc_hi20(g_156) -; CHECK-NEXT: ld.d $a0, $a0, %got_pc_lo12(g_156) -; CHECK-NEXT: pcalau12i $a1, %got_pc_hi20(g_490) -; CHECK-NEXT: ld.d $a1, $a1, %got_pc_lo12(g_490) -; CHECK-NEXT: ld.w $a2, $a0, 24 -; CHECK-NEXT: pcalau12i $a3, %got_pc_hi20(g_813) -; CHECK-NEXT: ld.d $a3, $a3, %got_pc_lo12(g_813) -; CHECK-NEXT: st.w $zero, $a1, 0 -; CHECK-NEXT: st.w $a2, $a3, 0 -; CHECK-NEXT: xvrepli.b $xr0, 0 -; CHECK-NEXT: xvst $xr0, $a0, 0 -; CHECK-NEXT: vrepli.b $vr0, 0 -; CHECK-NEXT: vst $vr0, $a0, 32 -; CHECK-NEXT: st.w $zero, $a0, 20 -; CHECK-NEXT: ret +; LA32-LABEL: foo: +; LA32: # %bb.0: # %entry +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: .cfi_def_cfa_offset 16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: st.w $fp, $sp, 8 # 4-byte Folded Spill +; LA32-NEXT: .cfi_offset 1, -4 +; LA32-NEXT: .cfi_offset 22, -8 +; LA32-NEXT: pcalau12i $a0, %got_pc_hi20(g_156) +; LA32-NEXT: ld.w $fp, $a0, %got_pc_lo12(g_156) +; LA32-NEXT: pcalau12i $a0, %got_pc_hi20(g_490) +; LA32-NEXT: ld.w $a0, $a0, %got_pc_lo12(g_490) +; LA32-NEXT: ld.w $a1, $fp, 24 +; LA32-NEXT: pcalau12i $a2, %got_pc_hi20(g_813) +; LA32-NEXT: ld.w $a2, $a2, %got_pc_lo12(g_813) +; LA32-NEXT: st.w $zero, $fp, 20 +; LA32-NEXT: st.w $zero, $a0, 0 +; LA32-NEXT: st.w $a1, $a2, 0 +; LA32-NEXT: ori $a2, $zero, 48 +; LA32-NEXT: move $a0, $fp +; LA32-NEXT: move $a1, $zero +; LA32-NEXT: bl memset +; LA32-NEXT: st.w $zero, $fp, 20 +; LA32-NEXT: ld.w $fp, $sp, 8 # 4-byte Folded Reload +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; +; LA64-LABEL: foo: +; LA64: # %bb.0: # %entry +; LA64-NEXT: pcalau12i $a0, %got_pc_hi20(g_156) +; LA64-NEXT: ld.d $a0, $a0, %got_pc_lo12(g_156) +; LA64-NEXT: pcalau12i $a1, %got_pc_hi20(g_490) +; LA64-NEXT: ld.d $a1, $a1, %got_pc_lo12(g_490) +; LA64-NEXT: ld.w $a2, $a0, 24 +; LA64-NEXT: pcalau12i $a3, %got_pc_hi20(g_813) +; LA64-NEXT: ld.d $a3, $a3, %got_pc_lo12(g_813) +; LA64-NEXT: st.w $zero, $a1, 0 +; LA64-NEXT: st.w $a2, $a3, 0 +; LA64-NEXT: xvrepli.b $xr0, 0 +; LA64-NEXT: xvst $xr0, $a0, 0 +; LA64-NEXT: vrepli.b $vr0, 0 +; LA64-NEXT: vst $vr0, $a0, 32 +; LA64-NEXT: st.w $zero, $a0, 20 +; LA64-NEXT: ret entry: store i32 0, ptr getelementptr inbounds (i8, ptr @g_156, i64 20), align 4 store i32 0, ptr @g_490, align 4 diff --git a/llvm/test/CodeGen/LoongArch/lasx/mulh.ll b/llvm/test/CodeGen/LoongArch/lasx/mulh.ll index db3cc7f38774..f99cc0a6cc33 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/mulh.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/mulh.ll @@ -1,4 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s ; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s define void @mulhs_v32i8(ptr %res, ptr %a0, ptr %a1) nounwind { diff --git a/llvm/test/CodeGen/LoongArch/lasx/scalar-to-vector.ll b/llvm/test/CodeGen/LoongArch/lasx/scalar-to-vector.ll index 05fbb746bd9d..5593890bb768 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/scalar-to-vector.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/scalar-to-vector.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 -; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s --check-prefixes=CHECK,LA32 +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s --check-prefixes=CHECK,LA64 ;; Test scalar_to_vector expansion. @@ -31,10 +32,16 @@ define <8 x i32> @scalar_to_8xi32(i32 %val) { } define <4 x i64> @scalar_to_4xi64(i64 %val) { -; CHECK-LABEL: scalar_to_4xi64: -; CHECK: # %bb.0: -; CHECK-NEXT: xvinsgr2vr.d $xr0, $a0, 0 -; CHECK-NEXT: ret +; LA32-LABEL: scalar_to_4xi64: +; LA32: # %bb.0: +; LA32-NEXT: xvinsgr2vr.w $xr0, $a0, 0 +; LA32-NEXT: xvinsgr2vr.w $xr0, $a1, 1 +; LA32-NEXT: ret +; +; LA64-LABEL: scalar_to_4xi64: +; LA64: # %bb.0: +; LA64-NEXT: xvinsgr2vr.d $xr0, $a0, 0 +; LA64-NEXT: ret %ret = insertelement <4 x i64> poison, i64 %val, i32 0 ret <4 x i64> %ret } diff --git a/llvm/test/CodeGen/LoongArch/lasx/shuffle-as-permute-and-shuffle.ll b/llvm/test/CodeGen/LoongArch/lasx/shuffle-as-permute-and-shuffle.ll index fed085843485..5f76d9951df9 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/shuffle-as-permute-and-shuffle.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/shuffle-as-permute-and-shuffle.ll @@ -61,13 +61,8 @@ define <8 x i32> @shuffle_v8i32(<8 x i32> %a) { ; CHECK-LABEL: shuffle_v8i32: ; CHECK: # %bb.0: ; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI4_0) -; CHECK-NEXT: xvld $xr2, $a0, %pc_lo12(.LCPI4_0) -; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI4_1) -; CHECK-NEXT: xvld $xr1, $a0, %pc_lo12(.LCPI4_1) -; CHECK-NEXT: xvpermi.d $xr3, $xr0, 78 -; CHECK-NEXT: xvshuf.d $xr2, $xr0, $xr3 -; CHECK-NEXT: xvshuf.d $xr1, $xr2, $xr0 -; CHECK-NEXT: xvori.b $xr0, $xr1, 0 +; CHECK-NEXT: xvld $xr1, $a0, %pc_lo12(.LCPI4_0) +; CHECK-NEXT: xvperm.w $xr0, $xr0, $xr1 ; CHECK-NEXT: ret %shuffle = shufflevector <8 x i32> %a, <8 x i32> poison, <8 x i32> <i32 4, i32 5, i32 0, i32 1, i32 4, i32 5, i32 6, i32 7> ret <8 x i32> %shuffle @@ -117,13 +112,8 @@ define <8 x float> @shuffle_v8f32(<8 x float> %a) { ; CHECK-LABEL: shuffle_v8f32: ; CHECK: # %bb.0: ; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI8_0) -; CHECK-NEXT: xvld $xr2, $a0, %pc_lo12(.LCPI8_0) -; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI8_1) -; CHECK-NEXT: xvld $xr1, $a0, %pc_lo12(.LCPI8_1) -; CHECK-NEXT: xvpermi.d $xr3, $xr0, 78 -; CHECK-NEXT: xvshuf.d $xr2, $xr0, $xr3 -; CHECK-NEXT: xvshuf.d $xr1, $xr2, $xr0 -; CHECK-NEXT: xvori.b $xr0, $xr1, 0 +; CHECK-NEXT: xvld $xr1, $a0, %pc_lo12(.LCPI8_0) +; CHECK-NEXT: xvperm.w $xr0, $xr0, $xr1 ; CHECK-NEXT: ret %shuffle = shufflevector <8 x float> %a, <8 x float> poison, <8 x i32> <i32 4, i32 5, i32 0, i32 1, i32 4, i32 5, i32 6, i32 7> ret <8 x float> %shuffle diff --git a/llvm/test/CodeGen/LoongArch/lasx/vec-reduce-add.ll b/llvm/test/CodeGen/LoongArch/lasx/vec-reduce-add.ll index bf5effd7b391..7268eb24ee51 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/vec-reduce-add.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/vec-reduce-add.ll @@ -1,27 +1,18 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 - ; RUN: llc --mtriple=loongarch64 --mattr=+lasx %s -o - | FileCheck %s define void @vec_reduce_add_v32i8(ptr %src, ptr %dst) nounwind { ; CHECK-LABEL: vec_reduce_add_v32i8: ; CHECK: # %bb.0: ; CHECK-NEXT: xvld $xr0, $a0, 0 -; CHECK-NEXT: xvpermi.d $xr1, $xr0, 78 -; CHECK-NEXT: xvshuf4i.b $xr1, $xr1, 228 -; CHECK-NEXT: xvadd.b $xr0, $xr0, $xr1 -; CHECK-NEXT: xvpermi.d $xr1, $xr0, 68 -; CHECK-NEXT: xvbsrl.v $xr1, $xr1, 8 -; CHECK-NEXT: xvadd.b $xr0, $xr0, $xr1 -; CHECK-NEXT: xvpermi.d $xr1, $xr0, 68 -; CHECK-NEXT: xvsrli.d $xr1, $xr1, 32 -; CHECK-NEXT: xvadd.b $xr0, $xr0, $xr1 -; CHECK-NEXT: xvpermi.d $xr1, $xr0, 68 -; CHECK-NEXT: xvshuf4i.b $xr1, $xr1, 14 -; CHECK-NEXT: xvadd.b $xr0, $xr0, $xr1 -; CHECK-NEXT: xvpermi.d $xr1, $xr0, 68 -; CHECK-NEXT: xvrepl128vei.b $xr1, $xr1, 1 -; CHECK-NEXT: xvadd.b $xr0, $xr0, $xr1 -; CHECK-NEXT: xvstelm.b $xr0, $a1, 0, 0 +; CHECK-NEXT: xvhaddw.h.b $xr0, $xr0, $xr0 +; CHECK-NEXT: xvhaddw.w.h $xr0, $xr0, $xr0 +; CHECK-NEXT: xvhaddw.d.w $xr0, $xr0, $xr0 +; CHECK-NEXT: xvhaddw.q.d $xr0, $xr0, $xr0 +; CHECK-NEXT: xvpermi.d $xr1, $xr0, 2 +; CHECK-NEXT: xvadd.d $xr0, $xr1, $xr0 +; CHECK-NEXT: xvpickve2gr.d $a0, $xr0, 0 +; CHECK-NEXT: st.b $a0, $a1, 0 ; CHECK-NEXT: ret %v = load <32 x i8>, ptr %src %res = call i8 @llvm.vector.reduce.add.v32i8(<32 x i8> %v) @@ -33,19 +24,13 @@ define void @vec_reduce_add_v16i16(ptr %src, ptr %dst) nounwind { ; CHECK-LABEL: vec_reduce_add_v16i16: ; CHECK: # %bb.0: ; CHECK-NEXT: xvld $xr0, $a0, 0 -; CHECK-NEXT: xvpermi.d $xr1, $xr0, 78 -; CHECK-NEXT: xvshuf4i.h $xr1, $xr1, 228 -; CHECK-NEXT: xvadd.h $xr0, $xr0, $xr1 -; CHECK-NEXT: xvpermi.d $xr1, $xr0, 68 -; CHECK-NEXT: xvbsrl.v $xr1, $xr1, 8 -; CHECK-NEXT: xvadd.h $xr0, $xr0, $xr1 -; CHECK-NEXT: xvpermi.d $xr1, $xr0, 68 -; CHECK-NEXT: xvshuf4i.h $xr1, $xr1, 14 -; CHECK-NEXT: xvadd.h $xr0, $xr0, $xr1 -; CHECK-NEXT: xvpermi.d $xr1, $xr0, 68 -; CHECK-NEXT: xvrepl128vei.h $xr1, $xr1, 1 -; CHECK-NEXT: xvadd.h $xr0, $xr0, $xr1 -; CHECK-NEXT: xvstelm.h $xr0, $a1, 0, 0 +; CHECK-NEXT: xvhaddw.w.h $xr0, $xr0, $xr0 +; CHECK-NEXT: xvhaddw.d.w $xr0, $xr0, $xr0 +; CHECK-NEXT: xvhaddw.q.d $xr0, $xr0, $xr0 +; CHECK-NEXT: xvpermi.d $xr1, $xr0, 2 +; CHECK-NEXT: xvadd.d $xr0, $xr1, $xr0 +; CHECK-NEXT: xvpickve2gr.d $a0, $xr0, 0 +; CHECK-NEXT: st.h $a0, $a1, 0 ; CHECK-NEXT: ret %v = load <16 x i16>, ptr %src %res = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> %v) @@ -57,16 +42,12 @@ define void @vec_reduce_add_v8i32(ptr %src, ptr %dst) nounwind { ; CHECK-LABEL: vec_reduce_add_v8i32: ; CHECK: # %bb.0: ; CHECK-NEXT: xvld $xr0, $a0, 0 -; CHECK-NEXT: xvpermi.d $xr1, $xr0, 78 -; CHECK-NEXT: xvshuf4i.w $xr1, $xr1, 228 -; CHECK-NEXT: xvadd.w $xr0, $xr0, $xr1 -; CHECK-NEXT: xvpermi.d $xr1, $xr0, 68 -; CHECK-NEXT: xvshuf4i.w $xr1, $xr1, 14 -; CHECK-NEXT: xvadd.w $xr0, $xr0, $xr1 -; CHECK-NEXT: xvpermi.d $xr1, $xr0, 68 -; CHECK-NEXT: xvrepl128vei.w $xr1, $xr1, 1 -; CHECK-NEXT: xvadd.w $xr0, $xr0, $xr1 -; CHECK-NEXT: xvstelm.w $xr0, $a1, 0, 0 +; CHECK-NEXT: xvhaddw.d.w $xr0, $xr0, $xr0 +; CHECK-NEXT: xvhaddw.q.d $xr0, $xr0, $xr0 +; CHECK-NEXT: xvpermi.d $xr1, $xr0, 2 +; CHECK-NEXT: xvadd.d $xr0, $xr1, $xr0 +; CHECK-NEXT: xvpickve2gr.d $a0, $xr0, 0 +; CHECK-NEXT: st.w $a0, $a1, 0 ; CHECK-NEXT: ret %v = load <8 x i32>, ptr %src %res = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %v) @@ -78,14 +59,9 @@ define void @vec_reduce_add_v4i64(ptr %src, ptr %dst) nounwind { ; CHECK-LABEL: vec_reduce_add_v4i64: ; CHECK: # %bb.0: ; CHECK-NEXT: xvld $xr0, $a0, 0 -; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI3_0) -; CHECK-NEXT: xvld $xr1, $a0, %pc_lo12(.LCPI3_0) -; CHECK-NEXT: xvpermi.d $xr2, $xr0, 78 -; CHECK-NEXT: xvshuf.d $xr1, $xr0, $xr2 -; CHECK-NEXT: xvadd.d $xr0, $xr0, $xr1 -; CHECK-NEXT: xvpermi.d $xr1, $xr0, 68 -; CHECK-NEXT: xvrepl128vei.d $xr1, $xr1, 1 -; CHECK-NEXT: xvadd.d $xr0, $xr0, $xr1 +; CHECK-NEXT: xvhaddw.q.d $xr0, $xr0, $xr0 +; CHECK-NEXT: xvpermi.d $xr1, $xr0, 2 +; CHECK-NEXT: xvadd.d $xr0, $xr1, $xr0 ; CHECK-NEXT: xvstelm.d $xr0, $a1, 0, 0 ; CHECK-NEXT: ret %v = load <4 x i64>, ptr %src @@ -93,4 +69,3 @@ define void @vec_reduce_add_v4i64(ptr %src, ptr %dst) nounwind { store i64 %res, ptr %dst ret void } - diff --git a/llvm/test/CodeGen/LoongArch/lasx/vec-reduce-and.ll b/llvm/test/CodeGen/LoongArch/lasx/vec-reduce-and.ll index a3160f10c8ca..fd64beab57bf 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/vec-reduce-and.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/vec-reduce-and.ll @@ -5,22 +5,17 @@ define void @vec_reduce_and_v32i8(ptr %src, ptr %dst) nounwind { ; CHECK-LABEL: vec_reduce_and_v32i8: ; CHECK: # %bb.0: ; CHECK-NEXT: xvld $xr0, $a0, 0 -; CHECK-NEXT: xvpermi.d $xr1, $xr0, 78 -; CHECK-NEXT: xvshuf4i.b $xr1, $xr1, 228 -; CHECK-NEXT: xvand.v $xr0, $xr0, $xr1 -; CHECK-NEXT: xvpermi.d $xr1, $xr0, 68 -; CHECK-NEXT: xvbsrl.v $xr1, $xr1, 8 -; CHECK-NEXT: xvand.v $xr0, $xr0, $xr1 -; CHECK-NEXT: xvpermi.d $xr1, $xr0, 68 -; CHECK-NEXT: xvsrli.d $xr1, $xr1, 32 -; CHECK-NEXT: xvand.v $xr0, $xr0, $xr1 -; CHECK-NEXT: xvpermi.d $xr1, $xr0, 68 -; CHECK-NEXT: xvshuf4i.b $xr1, $xr1, 14 -; CHECK-NEXT: xvand.v $xr0, $xr0, $xr1 -; CHECK-NEXT: xvpermi.d $xr1, $xr0, 68 -; CHECK-NEXT: xvrepl128vei.b $xr1, $xr1, 1 -; CHECK-NEXT: xvand.v $xr0, $xr0, $xr1 -; CHECK-NEXT: xvstelm.b $xr0, $a1, 0, 0 +; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1 +; CHECK-NEXT: vand.v $vr0, $vr0, $vr1 +; CHECK-NEXT: vbsrl.v $vr1, $vr0, 8 +; CHECK-NEXT: vand.v $vr0, $vr1, $vr0 +; CHECK-NEXT: vbsrl.v $vr1, $vr0, 4 +; CHECK-NEXT: vand.v $vr0, $vr1, $vr0 +; CHECK-NEXT: vbsrl.v $vr1, $vr0, 2 +; CHECK-NEXT: vand.v $vr0, $vr1, $vr0 +; CHECK-NEXT: vbsrl.v $vr1, $vr0, 1 +; CHECK-NEXT: vand.v $vr0, $vr1, $vr0 +; CHECK-NEXT: vstelm.b $vr0, $a1, 0, 0 ; CHECK-NEXT: ret %v = load <32 x i8>, ptr %src %res = call i8 @llvm.vector.reduce.and.v32i8(<32 x i8> %v) @@ -32,19 +27,15 @@ define void @vec_reduce_and_v16i16(ptr %src, ptr %dst) nounwind { ; CHECK-LABEL: vec_reduce_and_v16i16: ; CHECK: # %bb.0: ; CHECK-NEXT: xvld $xr0, $a0, 0 -; CHECK-NEXT: xvpermi.d $xr1, $xr0, 78 -; CHECK-NEXT: xvshuf4i.h $xr1, $xr1, 228 -; CHECK-NEXT: xvand.v $xr0, $xr0, $xr1 -; CHECK-NEXT: xvpermi.d $xr1, $xr0, 68 -; CHECK-NEXT: xvbsrl.v $xr1, $xr1, 8 -; CHECK-NEXT: xvand.v $xr0, $xr0, $xr1 -; CHECK-NEXT: xvpermi.d $xr1, $xr0, 68 -; CHECK-NEXT: xvshuf4i.h $xr1, $xr1, 14 -; CHECK-NEXT: xvand.v $xr0, $xr0, $xr1 -; CHECK-NEXT: xvpermi.d $xr1, $xr0, 68 -; CHECK-NEXT: xvrepl128vei.h $xr1, $xr1, 1 -; CHECK-NEXT: xvand.v $xr0, $xr0, $xr1 -; CHECK-NEXT: xvstelm.h $xr0, $a1, 0, 0 +; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1 +; CHECK-NEXT: vand.v $vr0, $vr0, $vr1 +; CHECK-NEXT: vbsrl.v $vr1, $vr0, 8 +; CHECK-NEXT: vand.v $vr0, $vr1, $vr0 +; CHECK-NEXT: vbsrl.v $vr1, $vr0, 4 +; CHECK-NEXT: vand.v $vr0, $vr1, $vr0 +; CHECK-NEXT: vbsrl.v $vr1, $vr0, 2 +; CHECK-NEXT: vand.v $vr0, $vr1, $vr0 +; CHECK-NEXT: vstelm.h $vr0, $a1, 0, 0 ; CHECK-NEXT: ret %v = load <16 x i16>, ptr %src %res = call i16 @llvm.vector.reduce.and.v16i16(<16 x i16> %v) @@ -56,16 +47,13 @@ define void @vec_reduce_and_v8i32(ptr %src, ptr %dst) nounwind { ; CHECK-LABEL: vec_reduce_and_v8i32: ; CHECK: # %bb.0: ; CHECK-NEXT: xvld $xr0, $a0, 0 -; CHECK-NEXT: xvpermi.d $xr1, $xr0, 78 -; CHECK-NEXT: xvshuf4i.w $xr1, $xr1, 228 -; CHECK-NEXT: xvand.v $xr0, $xr0, $xr1 -; CHECK-NEXT: xvpermi.d $xr1, $xr0, 68 -; CHECK-NEXT: xvshuf4i.w $xr1, $xr1, 14 -; CHECK-NEXT: xvand.v $xr0, $xr0, $xr1 -; CHECK-NEXT: xvpermi.d $xr1, $xr0, 68 -; CHECK-NEXT: xvrepl128vei.w $xr1, $xr1, 1 -; CHECK-NEXT: xvand.v $xr0, $xr0, $xr1 -; CHECK-NEXT: xvstelm.w $xr0, $a1, 0, 0 +; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1 +; CHECK-NEXT: vand.v $vr0, $vr0, $vr1 +; CHECK-NEXT: vbsrl.v $vr1, $vr0, 8 +; CHECK-NEXT: vand.v $vr0, $vr1, $vr0 +; CHECK-NEXT: vbsrl.v $vr1, $vr0, 4 +; CHECK-NEXT: vand.v $vr0, $vr1, $vr0 +; CHECK-NEXT: vstelm.w $vr0, $a1, 0, 0 ; CHECK-NEXT: ret %v = load <8 x i32>, ptr %src %res = call i32 @llvm.vector.reduce.and.v8i32(<8 x i32> %v) @@ -77,15 +65,11 @@ define void @vec_reduce_and_v4i64(ptr %src, ptr %dst) nounwind { ; CHECK-LABEL: vec_reduce_and_v4i64: ; CHECK: # %bb.0: ; CHECK-NEXT: xvld $xr0, $a0, 0 -; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI3_0) -; CHECK-NEXT: xvld $xr1, $a0, %pc_lo12(.LCPI3_0) -; CHECK-NEXT: xvpermi.d $xr2, $xr0, 78 -; CHECK-NEXT: xvshuf.d $xr1, $xr0, $xr2 -; CHECK-NEXT: xvand.v $xr0, $xr0, $xr1 -; CHECK-NEXT: xvpermi.d $xr1, $xr0, 68 -; CHECK-NEXT: xvrepl128vei.d $xr1, $xr1, 1 -; CHECK-NEXT: xvand.v $xr0, $xr0, $xr1 -; CHECK-NEXT: xvstelm.d $xr0, $a1, 0, 0 +; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1 +; CHECK-NEXT: vand.v $vr0, $vr0, $vr1 +; CHECK-NEXT: vbsrl.v $vr1, $vr0, 8 +; CHECK-NEXT: vand.v $vr0, $vr1, $vr0 +; CHECK-NEXT: vstelm.d $vr0, $a1, 0, 0 ; CHECK-NEXT: ret %v = load <4 x i64>, ptr %src %res = call i64 @llvm.vector.reduce.and.v4i64(<4 x i64> %v) diff --git a/llvm/test/CodeGen/LoongArch/lasx/vec-reduce-or.ll b/llvm/test/CodeGen/LoongArch/lasx/vec-reduce-or.ll index bc910c23e4b1..cdb08d9de382 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/vec-reduce-or.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/vec-reduce-or.ll @@ -5,22 +5,17 @@ define void @vec_reduce_or_v32i8(ptr %src, ptr %dst) nounwind { ; CHECK-LABEL: vec_reduce_or_v32i8: ; CHECK: # %bb.0: ; CHECK-NEXT: xvld $xr0, $a0, 0 -; CHECK-NEXT: xvpermi.d $xr1, $xr0, 78 -; CHECK-NEXT: xvshuf4i.b $xr1, $xr1, 228 -; CHECK-NEXT: xvor.v $xr0, $xr0, $xr1 -; CHECK-NEXT: xvpermi.d $xr1, $xr0, 68 -; CHECK-NEXT: xvbsrl.v $xr1, $xr1, 8 -; CHECK-NEXT: xvor.v $xr0, $xr0, $xr1 -; CHECK-NEXT: xvpermi.d $xr1, $xr0, 68 -; CHECK-NEXT: xvsrli.d $xr1, $xr1, 32 -; CHECK-NEXT: xvor.v $xr0, $xr0, $xr1 -; CHECK-NEXT: xvpermi.d $xr1, $xr0, 68 -; CHECK-NEXT: xvshuf4i.b $xr1, $xr1, 14 -; CHECK-NEXT: xvor.v $xr0, $xr0, $xr1 -; CHECK-NEXT: xvpermi.d $xr1, $xr0, 68 -; CHECK-NEXT: xvrepl128vei.b $xr1, $xr1, 1 -; CHECK-NEXT: xvor.v $xr0, $xr0, $xr1 -; CHECK-NEXT: xvstelm.b $xr0, $a1, 0, 0 +; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1 +; CHECK-NEXT: vor.v $vr0, $vr0, $vr1 +; CHECK-NEXT: vbsrl.v $vr1, $vr0, 8 +; CHECK-NEXT: vor.v $vr0, $vr1, $vr0 +; CHECK-NEXT: vbsrl.v $vr1, $vr0, 4 +; CHECK-NEXT: vor.v $vr0, $vr1, $vr0 +; CHECK-NEXT: vbsrl.v $vr1, $vr0, 2 +; CHECK-NEXT: vor.v $vr0, $vr1, $vr0 +; CHECK-NEXT: vbsrl.v $vr1, $vr0, 1 +; CHECK-NEXT: vor.v $vr0, $vr1, $vr0 +; CHECK-NEXT: vstelm.b $vr0, $a1, 0, 0 ; CHECK-NEXT: ret %v = load <32 x i8>, ptr %src %res = call i8 @llvm.vector.reduce.or.v32i8(<32 x i8> %v) @@ -32,19 +27,15 @@ define void @vec_reduce_or_v16i16(ptr %src, ptr %dst) nounwind { ; CHECK-LABEL: vec_reduce_or_v16i16: ; CHECK: # %bb.0: ; CHECK-NEXT: xvld $xr0, $a0, 0 -; CHECK-NEXT: xvpermi.d $xr1, $xr0, 78 -; CHECK-NEXT: xvshuf4i.h $xr1, $xr1, 228 -; CHECK-NEXT: xvor.v $xr0, $xr0, $xr1 -; CHECK-NEXT: xvpermi.d $xr1, $xr0, 68 -; CHECK-NEXT: xvbsrl.v $xr1, $xr1, 8 -; CHECK-NEXT: xvor.v $xr0, $xr0, $xr1 -; CHECK-NEXT: xvpermi.d $xr1, $xr0, 68 -; CHECK-NEXT: xvshuf4i.h $xr1, $xr1, 14 -; CHECK-NEXT: xvor.v $xr0, $xr0, $xr1 -; CHECK-NEXT: xvpermi.d $xr1, $xr0, 68 -; CHECK-NEXT: xvrepl128vei.h $xr1, $xr1, 1 -; CHECK-NEXT: xvor.v $xr0, $xr0, $xr1 -; CHECK-NEXT: xvstelm.h $xr0, $a1, 0, 0 +; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1 +; CHECK-NEXT: vor.v $vr0, $vr0, $vr1 +; CHECK-NEXT: vbsrl.v $vr1, $vr0, 8 +; CHECK-NEXT: vor.v $vr0, $vr1, $vr0 +; CHECK-NEXT: vbsrl.v $vr1, $vr0, 4 +; CHECK-NEXT: vor.v $vr0, $vr1, $vr0 +; CHECK-NEXT: vbsrl.v $vr1, $vr0, 2 +; CHECK-NEXT: vor.v $vr0, $vr1, $vr0 +; CHECK-NEXT: vstelm.h $vr0, $a1, 0, 0 ; CHECK-NEXT: ret %v = load <16 x i16>, ptr %src %res = call i16 @llvm.vector.reduce.or.v16i16(<16 x i16> %v) @@ -56,16 +47,13 @@ define void @vec_reduce_or_v8i32(ptr %src, ptr %dst) nounwind { ; CHECK-LABEL: vec_reduce_or_v8i32: ; CHECK: # %bb.0: ; CHECK-NEXT: xvld $xr0, $a0, 0 -; CHECK-NEXT: xvpermi.d $xr1, $xr0, 78 -; CHECK-NEXT: xvshuf4i.w $xr1, $xr1, 228 -; CHECK-NEXT: xvor.v $xr0, $xr0, $xr1 -; CHECK-NEXT: xvpermi.d $xr1, $xr0, 68 -; CHECK-NEXT: xvshuf4i.w $xr1, $xr1, 14 -; CHECK-NEXT: xvor.v $xr0, $xr0, $xr1 -; CHECK-NEXT: xvpermi.d $xr1, $xr0, 68 -; CHECK-NEXT: xvrepl128vei.w $xr1, $xr1, 1 -; CHECK-NEXT: xvor.v $xr0, $xr0, $xr1 -; CHECK-NEXT: xvstelm.w $xr0, $a1, 0, 0 +; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1 +; CHECK-NEXT: vor.v $vr0, $vr0, $vr1 +; CHECK-NEXT: vbsrl.v $vr1, $vr0, 8 +; CHECK-NEXT: vor.v $vr0, $vr1, $vr0 +; CHECK-NEXT: vbsrl.v $vr1, $vr0, 4 +; CHECK-NEXT: vor.v $vr0, $vr1, $vr0 +; CHECK-NEXT: vstelm.w $vr0, $a1, 0, 0 ; CHECK-NEXT: ret %v = load <8 x i32>, ptr %src %res = call i32 @llvm.vector.reduce.or.v8i32(<8 x i32> %v) @@ -77,15 +65,11 @@ define void @vec_reduce_or_v4i64(ptr %src, ptr %dst) nounwind { ; CHECK-LABEL: vec_reduce_or_v4i64: ; CHECK: # %bb.0: ; CHECK-NEXT: xvld $xr0, $a0, 0 -; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI3_0) -; CHECK-NEXT: xvld $xr1, $a0, %pc_lo12(.LCPI3_0) -; CHECK-NEXT: xvpermi.d $xr2, $xr0, 78 -; CHECK-NEXT: xvshuf.d $xr1, $xr0, $xr2 -; CHECK-NEXT: xvor.v $xr0, $xr0, $xr1 -; CHECK-NEXT: xvpermi.d $xr1, $xr0, 68 -; CHECK-NEXT: xvrepl128vei.d $xr1, $xr1, 1 -; CHECK-NEXT: xvor.v $xr0, $xr0, $xr1 -; CHECK-NEXT: xvstelm.d $xr0, $a1, 0, 0 +; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1 +; CHECK-NEXT: vor.v $vr0, $vr0, $vr1 +; CHECK-NEXT: vbsrl.v $vr1, $vr0, 8 +; CHECK-NEXT: vor.v $vr0, $vr1, $vr0 +; CHECK-NEXT: vstelm.d $vr0, $a1, 0, 0 ; CHECK-NEXT: ret %v = load <4 x i64>, ptr %src %res = call i64 @llvm.vector.reduce.or.v4i64(<4 x i64> %v) diff --git a/llvm/test/CodeGen/LoongArch/lasx/vec-reduce-smax.ll b/llvm/test/CodeGen/LoongArch/lasx/vec-reduce-smax.ll index 378088c9f828..1d182731c93b 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/vec-reduce-smax.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/vec-reduce-smax.ll @@ -5,22 +5,17 @@ define void @vec_reduce_smax_v32i8(ptr %src, ptr %dst) nounwind { ; CHECK-LABEL: vec_reduce_smax_v32i8: ; CHECK: # %bb.0: ; CHECK-NEXT: xvld $xr0, $a0, 0 -; CHECK-NEXT: xvpermi.d $xr1, $xr0, 78 -; CHECK-NEXT: xvshuf4i.b $xr1, $xr1, 228 -; CHECK-NEXT: xvmax.b $xr0, $xr0, $xr1 -; CHECK-NEXT: xvpermi.d $xr1, $xr0, 68 -; CHECK-NEXT: xvbsrl.v $xr1, $xr1, 8 -; CHECK-NEXT: xvmax.b $xr0, $xr0, $xr1 -; CHECK-NEXT: xvpermi.d $xr1, $xr0, 68 -; CHECK-NEXT: xvsrli.d $xr1, $xr1, 32 -; CHECK-NEXT: xvmax.b $xr0, $xr0, $xr1 -; CHECK-NEXT: xvpermi.d $xr1, $xr0, 68 -; CHECK-NEXT: xvshuf4i.b $xr1, $xr1, 14 -; CHECK-NEXT: xvmax.b $xr0, $xr0, $xr1 -; CHECK-NEXT: xvpermi.d $xr1, $xr0, 68 -; CHECK-NEXT: xvrepl128vei.b $xr1, $xr1, 1 -; CHECK-NEXT: xvmax.b $xr0, $xr0, $xr1 -; CHECK-NEXT: xvstelm.b $xr0, $a1, 0, 0 +; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1 +; CHECK-NEXT: vmax.b $vr0, $vr0, $vr1 +; CHECK-NEXT: vbsrl.v $vr1, $vr0, 8 +; CHECK-NEXT: vmax.b $vr0, $vr1, $vr0 +; CHECK-NEXT: vbsrl.v $vr1, $vr0, 4 +; CHECK-NEXT: vmax.b $vr0, $vr1, $vr0 +; CHECK-NEXT: vbsrl.v $vr1, $vr0, 2 +; CHECK-NEXT: vmax.b $vr0, $vr1, $vr0 +; CHECK-NEXT: vbsrl.v $vr1, $vr0, 1 +; CHECK-NEXT: vmax.b $vr0, $vr1, $vr0 +; CHECK-NEXT: vstelm.b $vr0, $a1, 0, 0 ; CHECK-NEXT: ret %v = load <32 x i8>, ptr %src %res = call i8 @llvm.vector.reduce.smax.v32i8(<32 x i8> %v) @@ -32,19 +27,15 @@ define void @vec_reduce_smax_v16i16(ptr %src, ptr %dst) nounwind { ; CHECK-LABEL: vec_reduce_smax_v16i16: ; CHECK: # %bb.0: ; CHECK-NEXT: xvld $xr0, $a0, 0 -; CHECK-NEXT: xvpermi.d $xr1, $xr0, 78 -; CHECK-NEXT: xvshuf4i.h $xr1, $xr1, 228 -; CHECK-NEXT: xvmax.h $xr0, $xr0, $xr1 -; CHECK-NEXT: xvpermi.d $xr1, $xr0, 68 -; CHECK-NEXT: xvbsrl.v $xr1, $xr1, 8 -; CHECK-NEXT: xvmax.h $xr0, $xr0, $xr1 -; CHECK-NEXT: xvpermi.d $xr1, $xr0, 68 -; CHECK-NEXT: xvshuf4i.h $xr1, $xr1, 14 -; CHECK-NEXT: xvmax.h $xr0, $xr0, $xr1 -; CHECK-NEXT: xvpermi.d $xr1, $xr0, 68 -; CHECK-NEXT: xvrepl128vei.h $xr1, $xr1, 1 -; CHECK-NEXT: xvmax.h $xr0, $xr0, $xr1 -; CHECK-NEXT: xvstelm.h $xr0, $a1, 0, 0 +; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1 +; CHECK-NEXT: vmax.h $vr0, $vr0, $vr1 +; CHECK-NEXT: vbsrl.v $vr1, $vr0, 8 +; CHECK-NEXT: vmax.h $vr0, $vr1, $vr0 +; CHECK-NEXT: vbsrl.v $vr1, $vr0, 4 +; CHECK-NEXT: vmax.h $vr0, $vr1, $vr0 +; CHECK-NEXT: vbsrl.v $vr1, $vr0, 2 +; CHECK-NEXT: vmax.h $vr0, $vr1, $vr0 +; CHECK-NEXT: vstelm.h $vr0, $a1, 0, 0 ; CHECK-NEXT: ret %v = load <16 x i16>, ptr %src %res = call i16 @llvm.vector.reduce.smax.v16i16(<16 x i16> %v) @@ -56,16 +47,13 @@ define void @vec_reduce_smax_v8i32(ptr %src, ptr %dst) nounwind { ; CHECK-LABEL: vec_reduce_smax_v8i32: ; CHECK: # %bb.0: ; CHECK-NEXT: xvld $xr0, $a0, 0 -; CHECK-NEXT: xvpermi.d $xr1, $xr0, 78 -; CHECK-NEXT: xvshuf4i.w $xr1, $xr1, 228 -; CHECK-NEXT: xvmax.w $xr0, $xr0, $xr1 -; CHECK-NEXT: xvpermi.d $xr1, $xr0, 68 -; CHECK-NEXT: xvshuf4i.w $xr1, $xr1, 14 -; CHECK-NEXT: xvmax.w $xr0, $xr0, $xr1 -; CHECK-NEXT: xvpermi.d $xr1, $xr0, 68 -; CHECK-NEXT: xvrepl128vei.w $xr1, $xr1, 1 -; CHECK-NEXT: xvmax.w $xr0, $xr0, $xr1 -; CHECK-NEXT: xvstelm.w $xr0, $a1, 0, 0 +; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1 +; CHECK-NEXT: vmax.w $vr0, $vr0, $vr1 +; CHECK-NEXT: vbsrl.v $vr1, $vr0, 8 +; CHECK-NEXT: vmax.w $vr0, $vr1, $vr0 +; CHECK-NEXT: vbsrl.v $vr1, $vr0, 4 +; CHECK-NEXT: vmax.w $vr0, $vr1, $vr0 +; CHECK-NEXT: vstelm.w $vr0, $a1, 0, 0 ; CHECK-NEXT: ret %v = load <8 x i32>, ptr %src %res = call i32 @llvm.vector.reduce.smax.v8i32(<8 x i32> %v) @@ -77,15 +65,11 @@ define void @vec_reduce_smax_v4i64(ptr %src, ptr %dst) nounwind { ; CHECK-LABEL: vec_reduce_smax_v4i64: ; CHECK: # %bb.0: ; CHECK-NEXT: xvld $xr0, $a0, 0 -; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI3_0) -; CHECK-NEXT: xvld $xr1, $a0, %pc_lo12(.LCPI3_0) -; CHECK-NEXT: xvpermi.d $xr2, $xr0, 78 -; CHECK-NEXT: xvshuf.d $xr1, $xr0, $xr2 -; CHECK-NEXT: xvmax.d $xr0, $xr0, $xr1 -; CHECK-NEXT: xvpermi.d $xr1, $xr0, 68 -; CHECK-NEXT: xvrepl128vei.d $xr1, $xr1, 1 -; CHECK-NEXT: xvmax.d $xr0, $xr0, $xr1 -; CHECK-NEXT: xvstelm.d $xr0, $a1, 0, 0 +; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1 +; CHECK-NEXT: vmax.d $vr0, $vr0, $vr1 +; CHECK-NEXT: vbsrl.v $vr1, $vr0, 8 +; CHECK-NEXT: vmax.d $vr0, $vr1, $vr0 +; CHECK-NEXT: vstelm.d $vr0, $a1, 0, 0 ; CHECK-NEXT: ret %v = load <4 x i64>, ptr %src %res = call i64 @llvm.vector.reduce.smax.v4i64(<4 x i64> %v) diff --git a/llvm/test/CodeGen/LoongArch/lasx/vec-reduce-smin.ll b/llvm/test/CodeGen/LoongArch/lasx/vec-reduce-smin.ll index 1c7f2054cd4e..369afdd1fc7b 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/vec-reduce-smin.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/vec-reduce-smin.ll @@ -5,22 +5,17 @@ define void @vec_reduce_smin_v32i8(ptr %src, ptr %dst) nounwind { ; CHECK-LABEL: vec_reduce_smin_v32i8: ; CHECK: # %bb.0: ; CHECK-NEXT: xvld $xr0, $a0, 0 -; CHECK-NEXT: xvpermi.d $xr1, $xr0, 78 -; CHECK-NEXT: xvshuf4i.b $xr1, $xr1, 228 -; CHECK-NEXT: xvmin.b $xr0, $xr0, $xr1 -; CHECK-NEXT: xvpermi.d $xr1, $xr0, 68 -; CHECK-NEXT: xvbsrl.v $xr1, $xr1, 8 -; CHECK-NEXT: xvmin.b $xr0, $xr0, $xr1 -; CHECK-NEXT: xvpermi.d $xr1, $xr0, 68 -; CHECK-NEXT: xvsrli.d $xr1, $xr1, 32 -; CHECK-NEXT: xvmin.b $xr0, $xr0, $xr1 -; CHECK-NEXT: xvpermi.d $xr1, $xr0, 68 -; CHECK-NEXT: xvshuf4i.b $xr1, $xr1, 14 -; CHECK-NEXT: xvmin.b $xr0, $xr0, $xr1 -; CHECK-NEXT: xvpermi.d $xr1, $xr0, 68 -; CHECK-NEXT: xvrepl128vei.b $xr1, $xr1, 1 -; CHECK-NEXT: xvmin.b $xr0, $xr0, $xr1 -; CHECK-NEXT: xvstelm.b $xr0, $a1, 0, 0 +; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1 +; CHECK-NEXT: vmin.b $vr0, $vr0, $vr1 +; CHECK-NEXT: vbsrl.v $vr1, $vr0, 8 +; CHECK-NEXT: vmin.b $vr0, $vr1, $vr0 +; CHECK-NEXT: vbsrl.v $vr1, $vr0, 4 +; CHECK-NEXT: vmin.b $vr0, $vr1, $vr0 +; CHECK-NEXT: vbsrl.v $vr1, $vr0, 2 +; CHECK-NEXT: vmin.b $vr0, $vr1, $vr0 +; CHECK-NEXT: vbsrl.v $vr1, $vr0, 1 +; CHECK-NEXT: vmin.b $vr0, $vr1, $vr0 +; CHECK-NEXT: vstelm.b $vr0, $a1, 0, 0 ; CHECK-NEXT: ret %v = load <32 x i8>, ptr %src %res = call i8 @llvm.vector.reduce.smin.v32i8(<32 x i8> %v) @@ -32,19 +27,15 @@ define void @vec_reduce_smin_v16i16(ptr %src, ptr %dst) nounwind { ; CHECK-LABEL: vec_reduce_smin_v16i16: ; CHECK: # %bb.0: ; CHECK-NEXT: xvld $xr0, $a0, 0 -; CHECK-NEXT: xvpermi.d $xr1, $xr0, 78 -; CHECK-NEXT: xvshuf4i.h $xr1, $xr1, 228 -; CHECK-NEXT: xvmin.h $xr0, $xr0, $xr1 -; CHECK-NEXT: xvpermi.d $xr1, $xr0, 68 -; CHECK-NEXT: xvbsrl.v $xr1, $xr1, 8 -; CHECK-NEXT: xvmin.h $xr0, $xr0, $xr1 -; CHECK-NEXT: xvpermi.d $xr1, $xr0, 68 -; CHECK-NEXT: xvshuf4i.h $xr1, $xr1, 14 -; CHECK-NEXT: xvmin.h $xr0, $xr0, $xr1 -; CHECK-NEXT: xvpermi.d $xr1, $xr0, 68 -; CHECK-NEXT: xvrepl128vei.h $xr1, $xr1, 1 -; CHECK-NEXT: xvmin.h $xr0, $xr0, $xr1 -; CHECK-NEXT: xvstelm.h $xr0, $a1, 0, 0 +; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1 +; CHECK-NEXT: vmin.h $vr0, $vr0, $vr1 +; CHECK-NEXT: vbsrl.v $vr1, $vr0, 8 +; CHECK-NEXT: vmin.h $vr0, $vr1, $vr0 +; CHECK-NEXT: vbsrl.v $vr1, $vr0, 4 +; CHECK-NEXT: vmin.h $vr0, $vr1, $vr0 +; CHECK-NEXT: vbsrl.v $vr1, $vr0, 2 +; CHECK-NEXT: vmin.h $vr0, $vr1, $vr0 +; CHECK-NEXT: vstelm.h $vr0, $a1, 0, 0 ; CHECK-NEXT: ret %v = load <16 x i16>, ptr %src %res = call i16 @llvm.vector.reduce.smin.v16i16(<16 x i16> %v) @@ -56,16 +47,13 @@ define void @vec_reduce_smin_v8i32(ptr %src, ptr %dst) nounwind { ; CHECK-LABEL: vec_reduce_smin_v8i32: ; CHECK: # %bb.0: ; CHECK-NEXT: xvld $xr0, $a0, 0 -; CHECK-NEXT: xvpermi.d $xr1, $xr0, 78 -; CHECK-NEXT: xvshuf4i.w $xr1, $xr1, 228 -; CHECK-NEXT: xvmin.w $xr0, $xr0, $xr1 -; CHECK-NEXT: xvpermi.d $xr1, $xr0, 68 -; CHECK-NEXT: xvshuf4i.w $xr1, $xr1, 14 -; CHECK-NEXT: xvmin.w $xr0, $xr0, $xr1 -; CHECK-NEXT: xvpermi.d $xr1, $xr0, 68 -; CHECK-NEXT: xvrepl128vei.w $xr1, $xr1, 1 -; CHECK-NEXT: xvmin.w $xr0, $xr0, $xr1 -; CHECK-NEXT: xvstelm.w $xr0, $a1, 0, 0 +; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1 +; CHECK-NEXT: vmin.w $vr0, $vr0, $vr1 +; CHECK-NEXT: vbsrl.v $vr1, $vr0, 8 +; CHECK-NEXT: vmin.w $vr0, $vr1, $vr0 +; CHECK-NEXT: vbsrl.v $vr1, $vr0, 4 +; CHECK-NEXT: vmin.w $vr0, $vr1, $vr0 +; CHECK-NEXT: vstelm.w $vr0, $a1, 0, 0 ; CHECK-NEXT: ret %v = load <8 x i32>, ptr %src %res = call i32 @llvm.vector.reduce.smin.v8i32(<8 x i32> %v) @@ -77,15 +65,11 @@ define void @vec_reduce_smin_v4i64(ptr %src, ptr %dst) nounwind { ; CHECK-LABEL: vec_reduce_smin_v4i64: ; CHECK: # %bb.0: ; CHECK-NEXT: xvld $xr0, $a0, 0 -; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI3_0) -; CHECK-NEXT: xvld $xr1, $a0, %pc_lo12(.LCPI3_0) -; CHECK-NEXT: xvpermi.d $xr2, $xr0, 78 -; CHECK-NEXT: xvshuf.d $xr1, $xr0, $xr2 -; CHECK-NEXT: xvmin.d $xr0, $xr0, $xr1 -; CHECK-NEXT: xvpermi.d $xr1, $xr0, 68 -; CHECK-NEXT: xvrepl128vei.d $xr1, $xr1, 1 -; CHECK-NEXT: xvmin.d $xr0, $xr0, $xr1 -; CHECK-NEXT: xvstelm.d $xr0, $a1, 0, 0 +; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1 +; CHECK-NEXT: vmin.d $vr0, $vr0, $vr1 +; CHECK-NEXT: vbsrl.v $vr1, $vr0, 8 +; CHECK-NEXT: vmin.d $vr0, $vr1, $vr0 +; CHECK-NEXT: vstelm.d $vr0, $a1, 0, 0 ; CHECK-NEXT: ret %v = load <4 x i64>, ptr %src %res = call i64 @llvm.vector.reduce.smin.v4i64(<4 x i64> %v) diff --git a/llvm/test/CodeGen/LoongArch/lasx/vec-reduce-umax.ll b/llvm/test/CodeGen/LoongArch/lasx/vec-reduce-umax.ll index 152f093cbd02..5256a72ad7d9 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/vec-reduce-umax.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/vec-reduce-umax.ll @@ -5,22 +5,17 @@ define void @vec_reduce_umax_v32i8(ptr %src, ptr %dst) nounwind { ; CHECK-LABEL: vec_reduce_umax_v32i8: ; CHECK: # %bb.0: ; CHECK-NEXT: xvld $xr0, $a0, 0 -; CHECK-NEXT: xvpermi.d $xr1, $xr0, 78 -; CHECK-NEXT: xvshuf4i.b $xr1, $xr1, 228 -; CHECK-NEXT: xvmax.bu $xr0, $xr0, $xr1 -; CHECK-NEXT: xvpermi.d $xr1, $xr0, 68 -; CHECK-NEXT: xvbsrl.v $xr1, $xr1, 8 -; CHECK-NEXT: xvmax.bu $xr0, $xr0, $xr1 -; CHECK-NEXT: xvpermi.d $xr1, $xr0, 68 -; CHECK-NEXT: xvsrli.d $xr1, $xr1, 32 -; CHECK-NEXT: xvmax.bu $xr0, $xr0, $xr1 -; CHECK-NEXT: xvpermi.d $xr1, $xr0, 68 -; CHECK-NEXT: xvshuf4i.b $xr1, $xr1, 14 -; CHECK-NEXT: xvmax.bu $xr0, $xr0, $xr1 -; CHECK-NEXT: xvpermi.d $xr1, $xr0, 68 -; CHECK-NEXT: xvrepl128vei.b $xr1, $xr1, 1 -; CHECK-NEXT: xvmax.bu $xr0, $xr0, $xr1 -; CHECK-NEXT: xvstelm.b $xr0, $a1, 0, 0 +; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1 +; CHECK-NEXT: vmax.bu $vr0, $vr0, $vr1 +; CHECK-NEXT: vbsrl.v $vr1, $vr0, 8 +; CHECK-NEXT: vmax.bu $vr0, $vr1, $vr0 +; CHECK-NEXT: vbsrl.v $vr1, $vr0, 4 +; CHECK-NEXT: vmax.bu $vr0, $vr1, $vr0 +; CHECK-NEXT: vbsrl.v $vr1, $vr0, 2 +; CHECK-NEXT: vmax.bu $vr0, $vr1, $vr0 +; CHECK-NEXT: vbsrl.v $vr1, $vr0, 1 +; CHECK-NEXT: vmax.bu $vr0, $vr1, $vr0 +; CHECK-NEXT: vstelm.b $vr0, $a1, 0, 0 ; CHECK-NEXT: ret %v = load <32 x i8>, ptr %src %res = call i8 @llvm.vector.reduce.umax.v32i8(<32 x i8> %v) @@ -32,19 +27,15 @@ define void @vec_reduce_umax_v16i16(ptr %src, ptr %dst) nounwind { ; CHECK-LABEL: vec_reduce_umax_v16i16: ; CHECK: # %bb.0: ; CHECK-NEXT: xvld $xr0, $a0, 0 -; CHECK-NEXT: xvpermi.d $xr1, $xr0, 78 -; CHECK-NEXT: xvshuf4i.h $xr1, $xr1, 228 -; CHECK-NEXT: xvmax.hu $xr0, $xr0, $xr1 -; CHECK-NEXT: xvpermi.d $xr1, $xr0, 68 -; CHECK-NEXT: xvbsrl.v $xr1, $xr1, 8 -; CHECK-NEXT: xvmax.hu $xr0, $xr0, $xr1 -; CHECK-NEXT: xvpermi.d $xr1, $xr0, 68 -; CHECK-NEXT: xvshuf4i.h $xr1, $xr1, 14 -; CHECK-NEXT: xvmax.hu $xr0, $xr0, $xr1 -; CHECK-NEXT: xvpermi.d $xr1, $xr0, 68 -; CHECK-NEXT: xvrepl128vei.h $xr1, $xr1, 1 -; CHECK-NEXT: xvmax.hu $xr0, $xr0, $xr1 -; CHECK-NEXT: xvstelm.h $xr0, $a1, 0, 0 +; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1 +; CHECK-NEXT: vmax.hu $vr0, $vr0, $vr1 +; CHECK-NEXT: vbsrl.v $vr1, $vr0, 8 +; CHECK-NEXT: vmax.hu $vr0, $vr1, $vr0 +; CHECK-NEXT: vbsrl.v $vr1, $vr0, 4 +; CHECK-NEXT: vmax.hu $vr0, $vr1, $vr0 +; CHECK-NEXT: vbsrl.v $vr1, $vr0, 2 +; CHECK-NEXT: vmax.hu $vr0, $vr1, $vr0 +; CHECK-NEXT: vstelm.h $vr0, $a1, 0, 0 ; CHECK-NEXT: ret %v = load <16 x i16>, ptr %src %res = call i16 @llvm.vector.reduce.umax.v16i16(<16 x i16> %v) @@ -56,16 +47,13 @@ define void @vec_reduce_umax_v8i32(ptr %src, ptr %dst) nounwind { ; CHECK-LABEL: vec_reduce_umax_v8i32: ; CHECK: # %bb.0: ; CHECK-NEXT: xvld $xr0, $a0, 0 -; CHECK-NEXT: xvpermi.d $xr1, $xr0, 78 -; CHECK-NEXT: xvshuf4i.w $xr1, $xr1, 228 -; CHECK-NEXT: xvmax.wu $xr0, $xr0, $xr1 -; CHECK-NEXT: xvpermi.d $xr1, $xr0, 68 -; CHECK-NEXT: xvshuf4i.w $xr1, $xr1, 14 -; CHECK-NEXT: xvmax.wu $xr0, $xr0, $xr1 -; CHECK-NEXT: xvpermi.d $xr1, $xr0, 68 -; CHECK-NEXT: xvrepl128vei.w $xr1, $xr1, 1 -; CHECK-NEXT: xvmax.wu $xr0, $xr0, $xr1 -; CHECK-NEXT: xvstelm.w $xr0, $a1, 0, 0 +; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1 +; CHECK-NEXT: vmax.wu $vr0, $vr0, $vr1 +; CHECK-NEXT: vbsrl.v $vr1, $vr0, 8 +; CHECK-NEXT: vmax.wu $vr0, $vr1, $vr0 +; CHECK-NEXT: vbsrl.v $vr1, $vr0, 4 +; CHECK-NEXT: vmax.wu $vr0, $vr1, $vr0 +; CHECK-NEXT: vstelm.w $vr0, $a1, 0, 0 ; CHECK-NEXT: ret %v = load <8 x i32>, ptr %src %res = call i32 @llvm.vector.reduce.umax.v8i32(<8 x i32> %v) @@ -77,15 +65,11 @@ define void @vec_reduce_umax_v4i64(ptr %src, ptr %dst) nounwind { ; CHECK-LABEL: vec_reduce_umax_v4i64: ; CHECK: # %bb.0: ; CHECK-NEXT: xvld $xr0, $a0, 0 -; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI3_0) -; CHECK-NEXT: xvld $xr1, $a0, %pc_lo12(.LCPI3_0) -; CHECK-NEXT: xvpermi.d $xr2, $xr0, 78 -; CHECK-NEXT: xvshuf.d $xr1, $xr0, $xr2 -; CHECK-NEXT: xvmax.du $xr0, $xr0, $xr1 -; CHECK-NEXT: xvpermi.d $xr1, $xr0, 68 -; CHECK-NEXT: xvrepl128vei.d $xr1, $xr1, 1 -; CHECK-NEXT: xvmax.du $xr0, $xr0, $xr1 -; CHECK-NEXT: xvstelm.d $xr0, $a1, 0, 0 +; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1 +; CHECK-NEXT: vmax.du $vr0, $vr0, $vr1 +; CHECK-NEXT: vbsrl.v $vr1, $vr0, 8 +; CHECK-NEXT: vmax.du $vr0, $vr1, $vr0 +; CHECK-NEXT: vstelm.d $vr0, $a1, 0, 0 ; CHECK-NEXT: ret %v = load <4 x i64>, ptr %src %res = call i64 @llvm.vector.reduce.umax.v4i64(<4 x i64> %v) diff --git a/llvm/test/CodeGen/LoongArch/lasx/vec-reduce-umin.ll b/llvm/test/CodeGen/LoongArch/lasx/vec-reduce-umin.ll index 64ed377535ab..a82c886d8eed 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/vec-reduce-umin.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/vec-reduce-umin.ll @@ -5,22 +5,17 @@ define void @vec_reduce_umin_v32i8(ptr %src, ptr %dst) nounwind { ; CHECK-LABEL: vec_reduce_umin_v32i8: ; CHECK: # %bb.0: ; CHECK-NEXT: xvld $xr0, $a0, 0 -; CHECK-NEXT: xvpermi.d $xr1, $xr0, 78 -; CHECK-NEXT: xvshuf4i.b $xr1, $xr1, 228 -; CHECK-NEXT: xvmin.bu $xr0, $xr0, $xr1 -; CHECK-NEXT: xvpermi.d $xr1, $xr0, 68 -; CHECK-NEXT: xvbsrl.v $xr1, $xr1, 8 -; CHECK-NEXT: xvmin.bu $xr0, $xr0, $xr1 -; CHECK-NEXT: xvpermi.d $xr1, $xr0, 68 -; CHECK-NEXT: xvsrli.d $xr1, $xr1, 32 -; CHECK-NEXT: xvmin.bu $xr0, $xr0, $xr1 -; CHECK-NEXT: xvpermi.d $xr1, $xr0, 68 -; CHECK-NEXT: xvshuf4i.b $xr1, $xr1, 14 -; CHECK-NEXT: xvmin.bu $xr0, $xr0, $xr1 -; CHECK-NEXT: xvpermi.d $xr1, $xr0, 68 -; CHECK-NEXT: xvrepl128vei.b $xr1, $xr1, 1 -; CHECK-NEXT: xvmin.bu $xr0, $xr0, $xr1 -; CHECK-NEXT: xvstelm.b $xr0, $a1, 0, 0 +; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1 +; CHECK-NEXT: vmin.bu $vr0, $vr0, $vr1 +; CHECK-NEXT: vbsrl.v $vr1, $vr0, 8 +; CHECK-NEXT: vmin.bu $vr0, $vr1, $vr0 +; CHECK-NEXT: vbsrl.v $vr1, $vr0, 4 +; CHECK-NEXT: vmin.bu $vr0, $vr1, $vr0 +; CHECK-NEXT: vbsrl.v $vr1, $vr0, 2 +; CHECK-NEXT: vmin.bu $vr0, $vr1, $vr0 +; CHECK-NEXT: vbsrl.v $vr1, $vr0, 1 +; CHECK-NEXT: vmin.bu $vr0, $vr1, $vr0 +; CHECK-NEXT: vstelm.b $vr0, $a1, 0, 0 ; CHECK-NEXT: ret %v = load <32 x i8>, ptr %src %res = call i8 @llvm.vector.reduce.umin.v32i8(<32 x i8> %v) @@ -32,19 +27,15 @@ define void @vec_reduce_umin_v16i16(ptr %src, ptr %dst) nounwind { ; CHECK-LABEL: vec_reduce_umin_v16i16: ; CHECK: # %bb.0: ; CHECK-NEXT: xvld $xr0, $a0, 0 -; CHECK-NEXT: xvpermi.d $xr1, $xr0, 78 -; CHECK-NEXT: xvshuf4i.h $xr1, $xr1, 228 -; CHECK-NEXT: xvmin.hu $xr0, $xr0, $xr1 -; CHECK-NEXT: xvpermi.d $xr1, $xr0, 68 -; CHECK-NEXT: xvbsrl.v $xr1, $xr1, 8 -; CHECK-NEXT: xvmin.hu $xr0, $xr0, $xr1 -; CHECK-NEXT: xvpermi.d $xr1, $xr0, 68 -; CHECK-NEXT: xvshuf4i.h $xr1, $xr1, 14 -; CHECK-NEXT: xvmin.hu $xr0, $xr0, $xr1 -; CHECK-NEXT: xvpermi.d $xr1, $xr0, 68 -; CHECK-NEXT: xvrepl128vei.h $xr1, $xr1, 1 -; CHECK-NEXT: xvmin.hu $xr0, $xr0, $xr1 -; CHECK-NEXT: xvstelm.h $xr0, $a1, 0, 0 +; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1 +; CHECK-NEXT: vmin.hu $vr0, $vr0, $vr1 +; CHECK-NEXT: vbsrl.v $vr1, $vr0, 8 +; CHECK-NEXT: vmin.hu $vr0, $vr1, $vr0 +; CHECK-NEXT: vbsrl.v $vr1, $vr0, 4 +; CHECK-NEXT: vmin.hu $vr0, $vr1, $vr0 +; CHECK-NEXT: vbsrl.v $vr1, $vr0, 2 +; CHECK-NEXT: vmin.hu $vr0, $vr1, $vr0 +; CHECK-NEXT: vstelm.h $vr0, $a1, 0, 0 ; CHECK-NEXT: ret %v = load <16 x i16>, ptr %src %res = call i16 @llvm.vector.reduce.umin.v16i16(<16 x i16> %v) @@ -56,16 +47,13 @@ define void @vec_reduce_umin_v8i32(ptr %src, ptr %dst) nounwind { ; CHECK-LABEL: vec_reduce_umin_v8i32: ; CHECK: # %bb.0: ; CHECK-NEXT: xvld $xr0, $a0, 0 -; CHECK-NEXT: xvpermi.d $xr1, $xr0, 78 -; CHECK-NEXT: xvshuf4i.w $xr1, $xr1, 228 -; CHECK-NEXT: xvmin.wu $xr0, $xr0, $xr1 -; CHECK-NEXT: xvpermi.d $xr1, $xr0, 68 -; CHECK-NEXT: xvshuf4i.w $xr1, $xr1, 14 -; CHECK-NEXT: xvmin.wu $xr0, $xr0, $xr1 -; CHECK-NEXT: xvpermi.d $xr1, $xr0, 68 -; CHECK-NEXT: xvrepl128vei.w $xr1, $xr1, 1 -; CHECK-NEXT: xvmin.wu $xr0, $xr0, $xr1 -; CHECK-NEXT: xvstelm.w $xr0, $a1, 0, 0 +; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1 +; CHECK-NEXT: vmin.wu $vr0, $vr0, $vr1 +; CHECK-NEXT: vbsrl.v $vr1, $vr0, 8 +; CHECK-NEXT: vmin.wu $vr0, $vr1, $vr0 +; CHECK-NEXT: vbsrl.v $vr1, $vr0, 4 +; CHECK-NEXT: vmin.wu $vr0, $vr1, $vr0 +; CHECK-NEXT: vstelm.w $vr0, $a1, 0, 0 ; CHECK-NEXT: ret %v = load <8 x i32>, ptr %src %res = call i32 @llvm.vector.reduce.umin.v8i32(<8 x i32> %v) @@ -77,15 +65,11 @@ define void @vec_reduce_umin_v4i64(ptr %src, ptr %dst) nounwind { ; CHECK-LABEL: vec_reduce_umin_v4i64: ; CHECK: # %bb.0: ; CHECK-NEXT: xvld $xr0, $a0, 0 -; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI3_0) -; CHECK-NEXT: xvld $xr1, $a0, %pc_lo12(.LCPI3_0) -; CHECK-NEXT: xvpermi.d $xr2, $xr0, 78 -; CHECK-NEXT: xvshuf.d $xr1, $xr0, $xr2 -; CHECK-NEXT: xvmin.du $xr0, $xr0, $xr1 -; CHECK-NEXT: xvpermi.d $xr1, $xr0, 68 -; CHECK-NEXT: xvrepl128vei.d $xr1, $xr1, 1 -; CHECK-NEXT: xvmin.du $xr0, $xr0, $xr1 -; CHECK-NEXT: xvstelm.d $xr0, $a1, 0, 0 +; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1 +; CHECK-NEXT: vmin.du $vr0, $vr0, $vr1 +; CHECK-NEXT: vbsrl.v $vr1, $vr0, 8 +; CHECK-NEXT: vmin.du $vr0, $vr1, $vr0 +; CHECK-NEXT: vstelm.d $vr0, $a1, 0, 0 ; CHECK-NEXT: ret %v = load <4 x i64>, ptr %src %res = call i64 @llvm.vector.reduce.umin.v4i64(<4 x i64> %v) diff --git a/llvm/test/CodeGen/LoongArch/lasx/vec-reduce-xor.ll b/llvm/test/CodeGen/LoongArch/lasx/vec-reduce-xor.ll index 5dbf37e73263..429fadcdd156 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/vec-reduce-xor.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/vec-reduce-xor.ll @@ -5,22 +5,17 @@ define void @vec_reduce_xor_v32i8(ptr %src, ptr %dst) nounwind { ; CHECK-LABEL: vec_reduce_xor_v32i8: ; CHECK: # %bb.0: ; CHECK-NEXT: xvld $xr0, $a0, 0 -; CHECK-NEXT: xvpermi.d $xr1, $xr0, 78 -; CHECK-NEXT: xvshuf4i.b $xr1, $xr1, 228 -; CHECK-NEXT: xvxor.v $xr0, $xr0, $xr1 -; CHECK-NEXT: xvpermi.d $xr1, $xr0, 68 -; CHECK-NEXT: xvbsrl.v $xr1, $xr1, 8 -; CHECK-NEXT: xvxor.v $xr0, $xr0, $xr1 -; CHECK-NEXT: xvpermi.d $xr1, $xr0, 68 -; CHECK-NEXT: xvsrli.d $xr1, $xr1, 32 -; CHECK-NEXT: xvxor.v $xr0, $xr0, $xr1 -; CHECK-NEXT: xvpermi.d $xr1, $xr0, 68 -; CHECK-NEXT: xvshuf4i.b $xr1, $xr1, 14 -; CHECK-NEXT: xvxor.v $xr0, $xr0, $xr1 -; CHECK-NEXT: xvpermi.d $xr1, $xr0, 68 -; CHECK-NEXT: xvrepl128vei.b $xr1, $xr1, 1 -; CHECK-NEXT: xvxor.v $xr0, $xr0, $xr1 -; CHECK-NEXT: xvstelm.b $xr0, $a1, 0, 0 +; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1 +; CHECK-NEXT: vxor.v $vr0, $vr0, $vr1 +; CHECK-NEXT: vbsrl.v $vr1, $vr0, 8 +; CHECK-NEXT: vxor.v $vr0, $vr1, $vr0 +; CHECK-NEXT: vbsrl.v $vr1, $vr0, 4 +; CHECK-NEXT: vxor.v $vr0, $vr1, $vr0 +; CHECK-NEXT: vbsrl.v $vr1, $vr0, 2 +; CHECK-NEXT: vxor.v $vr0, $vr1, $vr0 +; CHECK-NEXT: vbsrl.v $vr1, $vr0, 1 +; CHECK-NEXT: vxor.v $vr0, $vr1, $vr0 +; CHECK-NEXT: vstelm.b $vr0, $a1, 0, 0 ; CHECK-NEXT: ret %v = load <32 x i8>, ptr %src %res = call i8 @llvm.vector.reduce.xor.v32i8(<32 x i8> %v) @@ -32,19 +27,15 @@ define void @vec_reduce_xor_v16i16(ptr %src, ptr %dst) nounwind { ; CHECK-LABEL: vec_reduce_xor_v16i16: ; CHECK: # %bb.0: ; CHECK-NEXT: xvld $xr0, $a0, 0 -; CHECK-NEXT: xvpermi.d $xr1, $xr0, 78 -; CHECK-NEXT: xvshuf4i.h $xr1, $xr1, 228 -; CHECK-NEXT: xvxor.v $xr0, $xr0, $xr1 -; CHECK-NEXT: xvpermi.d $xr1, $xr0, 68 -; CHECK-NEXT: xvbsrl.v $xr1, $xr1, 8 -; CHECK-NEXT: xvxor.v $xr0, $xr0, $xr1 -; CHECK-NEXT: xvpermi.d $xr1, $xr0, 68 -; CHECK-NEXT: xvshuf4i.h $xr1, $xr1, 14 -; CHECK-NEXT: xvxor.v $xr0, $xr0, $xr1 -; CHECK-NEXT: xvpermi.d $xr1, $xr0, 68 -; CHECK-NEXT: xvrepl128vei.h $xr1, $xr1, 1 -; CHECK-NEXT: xvxor.v $xr0, $xr0, $xr1 -; CHECK-NEXT: xvstelm.h $xr0, $a1, 0, 0 +; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1 +; CHECK-NEXT: vxor.v $vr0, $vr0, $vr1 +; CHECK-NEXT: vbsrl.v $vr1, $vr0, 8 +; CHECK-NEXT: vxor.v $vr0, $vr1, $vr0 +; CHECK-NEXT: vbsrl.v $vr1, $vr0, 4 +; CHECK-NEXT: vxor.v $vr0, $vr1, $vr0 +; CHECK-NEXT: vbsrl.v $vr1, $vr0, 2 +; CHECK-NEXT: vxor.v $vr0, $vr1, $vr0 +; CHECK-NEXT: vstelm.h $vr0, $a1, 0, 0 ; CHECK-NEXT: ret %v = load <16 x i16>, ptr %src %res = call i16 @llvm.vector.reduce.xor.v16i16(<16 x i16> %v) @@ -56,16 +47,13 @@ define void @vec_reduce_xor_v8i32(ptr %src, ptr %dst) nounwind { ; CHECK-LABEL: vec_reduce_xor_v8i32: ; CHECK: # %bb.0: ; CHECK-NEXT: xvld $xr0, $a0, 0 -; CHECK-NEXT: xvpermi.d $xr1, $xr0, 78 -; CHECK-NEXT: xvshuf4i.w $xr1, $xr1, 228 -; CHECK-NEXT: xvxor.v $xr0, $xr0, $xr1 -; CHECK-NEXT: xvpermi.d $xr1, $xr0, 68 -; CHECK-NEXT: xvshuf4i.w $xr1, $xr1, 14 -; CHECK-NEXT: xvxor.v $xr0, $xr0, $xr1 -; CHECK-NEXT: xvpermi.d $xr1, $xr0, 68 -; CHECK-NEXT: xvrepl128vei.w $xr1, $xr1, 1 -; CHECK-NEXT: xvxor.v $xr0, $xr0, $xr1 -; CHECK-NEXT: xvstelm.w $xr0, $a1, 0, 0 +; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1 +; CHECK-NEXT: vxor.v $vr0, $vr0, $vr1 +; CHECK-NEXT: vbsrl.v $vr1, $vr0, 8 +; CHECK-NEXT: vxor.v $vr0, $vr1, $vr0 +; CHECK-NEXT: vbsrl.v $vr1, $vr0, 4 +; CHECK-NEXT: vxor.v $vr0, $vr1, $vr0 +; CHECK-NEXT: vstelm.w $vr0, $a1, 0, 0 ; CHECK-NEXT: ret %v = load <8 x i32>, ptr %src %res = call i32 @llvm.vector.reduce.xor.v8i32(<8 x i32> %v) @@ -77,15 +65,11 @@ define void @vec_reduce_xor_v4i64(ptr %src, ptr %dst) nounwind { ; CHECK-LABEL: vec_reduce_xor_v4i64: ; CHECK: # %bb.0: ; CHECK-NEXT: xvld $xr0, $a0, 0 -; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI3_0) -; CHECK-NEXT: xvld $xr1, $a0, %pc_lo12(.LCPI3_0) -; CHECK-NEXT: xvpermi.d $xr2, $xr0, 78 -; CHECK-NEXT: xvshuf.d $xr1, $xr0, $xr2 -; CHECK-NEXT: xvxor.v $xr0, $xr0, $xr1 -; CHECK-NEXT: xvpermi.d $xr1, $xr0, 68 -; CHECK-NEXT: xvrepl128vei.d $xr1, $xr1, 1 -; CHECK-NEXT: xvxor.v $xr0, $xr0, $xr1 -; CHECK-NEXT: xvstelm.d $xr0, $a1, 0, 0 +; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1 +; CHECK-NEXT: vxor.v $vr0, $vr0, $vr1 +; CHECK-NEXT: vbsrl.v $vr1, $vr0, 8 +; CHECK-NEXT: vxor.v $vr0, $vr1, $vr0 +; CHECK-NEXT: vstelm.d $vr0, $a1, 0, 0 ; CHECK-NEXT: ret %v = load <4 x i64>, ptr %src %res = call i64 @llvm.vector.reduce.xor.v4i64(<4 x i64> %v) diff --git a/llvm/test/CodeGen/LoongArch/lasx/vec-shuffle-bit-shift.ll b/llvm/test/CodeGen/LoongArch/lasx/vec-shuffle-bit-shift.ll index 72e06f680e43..48a534f43a0d 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/vec-shuffle-bit-shift.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/vec-shuffle-bit-shift.ll @@ -1,4 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s ; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s define <32 x i8> @shuffle_to_xvslli_h_8(<32 x i8> %a) nounwind { diff --git a/llvm/test/CodeGen/LoongArch/lasx/vec-shuffle-byte-shift.ll b/llvm/test/CodeGen/LoongArch/lasx/vec-shuffle-byte-shift.ll index 15bfce902f9d..cea604e96bc1 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/vec-shuffle-byte-shift.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/vec-shuffle-byte-shift.ll @@ -1,4 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s ; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s define <32 x i8> @shuffle_32i8_byte_left_shift_1(<32 x i8> %a) { diff --git a/llvm/test/CodeGen/LoongArch/lasx/vselect.ll b/llvm/test/CodeGen/LoongArch/lasx/vselect.ll index 17ba28afc81f..44e4f71c8d08 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/vselect.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/vselect.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s --check-prefixes=CHECK,LA32 +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s --check-prefixes=CHECK,LA64 define void @select_v32i8_imm(ptr %res, ptr %a0) nounwind { ; CHECK-LABEL: select_v32i8_imm: @@ -49,16 +50,26 @@ define void @select_v16i16(ptr %res, ptr %a0, ptr %a1) nounwind { } define void @select_v8i32(ptr %res, ptr %a0, ptr %a1) nounwind { -; CHECK-LABEL: select_v8i32: -; CHECK: # %bb.0: -; CHECK-NEXT: xvld $xr0, $a1, 0 -; CHECK-NEXT: xvld $xr1, $a2, 0 -; CHECK-NEXT: ori $a1, $zero, 0 -; CHECK-NEXT: lu32i.d $a1, -1 -; CHECK-NEXT: xvreplgr2vr.d $xr2, $a1 -; CHECK-NEXT: xvbitsel.v $xr0, $xr1, $xr0, $xr2 -; CHECK-NEXT: xvst $xr0, $a0, 0 -; CHECK-NEXT: ret +; LA32-LABEL: select_v8i32: +; LA32: # %bb.0: +; LA32-NEXT: xvld $xr0, $a1, 0 +; LA32-NEXT: xvld $xr1, $a2, 0 +; LA32-NEXT: pcalau12i $a1, %pc_hi20(.LCPI3_0) +; LA32-NEXT: xvld $xr2, $a1, %pc_lo12(.LCPI3_0) +; LA32-NEXT: xvbitsel.v $xr0, $xr1, $xr0, $xr2 +; LA32-NEXT: xvst $xr0, $a0, 0 +; LA32-NEXT: ret +; +; LA64-LABEL: select_v8i32: +; LA64: # %bb.0: +; LA64-NEXT: xvld $xr0, $a1, 0 +; LA64-NEXT: xvld $xr1, $a2, 0 +; LA64-NEXT: ori $a1, $zero, 0 +; LA64-NEXT: lu32i.d $a1, -1 +; LA64-NEXT: xvreplgr2vr.d $xr2, $a1 +; LA64-NEXT: xvbitsel.v $xr0, $xr1, $xr0, $xr2 +; LA64-NEXT: xvst $xr0, $a0, 0 +; LA64-NEXT: ret %v0 = load <8 x i32>, ptr %a0 %v1 = load <8 x i32>, ptr %a1 %sel = select <8 x i1> <i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true>, <8 x i32> %v0, <8 x i32> %v1 diff --git a/llvm/test/CodeGen/LoongArch/lasx/widen-shuffle-mask.ll b/llvm/test/CodeGen/LoongArch/lasx/widen-shuffle-mask.ll index df639cb78cd1..2efe96fe18d4 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/widen-shuffle-mask.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/widen-shuffle-mask.ll @@ -1,4 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s ; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s define <32 x i8> @widen_shuffle_mask_v32i8_to_v16i16(<32 x i8> %a, <32 x i8> %b) { diff --git a/llvm/test/CodeGen/LoongArch/lsx/broadcast-load.ll b/llvm/test/CodeGen/LoongArch/lsx/broadcast-load.ll index c46747ef3050..349684ff22be 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/broadcast-load.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/broadcast-load.ll @@ -1,16 +1,27 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 -; RUN: llc --mtriple=loongarch64 -mattr=+lsx < %s | FileCheck %s - -; TODO: Load a element and splat it to a vector could be lowerd to vldrepl +; RUN: llc --mtriple=loongarch32 -mattr=+32s,+lsx < %s | FileCheck %s --check-prefixes=CHECK,LA32 +; RUN: llc --mtriple=loongarch64 -mattr=+lsx < %s | FileCheck %s --check-prefixes=CHECK,LA64 ; A load has more than one user shouldn't be lowered to vldrepl define <2 x i64> @should_not_be_optimized(ptr %ptr, ptr %dst){ -; CHECK-LABEL: should_not_be_optimized: -; CHECK: # %bb.0: -; CHECK-NEXT: ld.d $a0, $a0, 0 -; CHECK-NEXT: vreplgr2vr.d $vr0, $a0 -; CHECK-NEXT: st.d $a0, $a1, 0 -; CHECK-NEXT: ret +; LA32-LABEL: should_not_be_optimized: +; LA32: # %bb.0: +; LA32-NEXT: ld.w $a2, $a0, 0 +; LA32-NEXT: ld.w $a0, $a0, 4 +; LA32-NEXT: st.w $a2, $a1, 0 +; LA32-NEXT: vinsgr2vr.w $vr0, $a2, 0 +; LA32-NEXT: vinsgr2vr.w $vr0, $a0, 1 +; LA32-NEXT: vinsgr2vr.w $vr0, $a2, 2 +; LA32-NEXT: vinsgr2vr.w $vr0, $a0, 3 +; LA32-NEXT: st.w $a0, $a1, 4 +; LA32-NEXT: ret +; +; LA64-LABEL: should_not_be_optimized: +; LA64: # %bb.0: +; LA64-NEXT: ld.d $a0, $a0, 0 +; LA64-NEXT: vreplgr2vr.d $vr0, $a0 +; LA64-NEXT: st.d $a0, $a1, 0 +; LA64-NEXT: ret %tmp = load i64, ptr %ptr store i64 %tmp, ptr %dst %tmp1 = insertelement <2 x i64> zeroinitializer, i64 %tmp, i32 0 @@ -18,12 +29,48 @@ define <2 x i64> @should_not_be_optimized(ptr %ptr, ptr %dst){ ret <2 x i64> %tmp2 } -define <2 x i64> @vldrepl_d_unaligned_offset(ptr %ptr) { -; CHECK-LABEL: vldrepl_d_unaligned_offset: +define <8 x i16> @should_not_be_optimized_sext_load(ptr %ptr) { +; CHECK-LABEL: should_not_be_optimized_sext_load: ; CHECK: # %bb.0: -; CHECK-NEXT: addi.d $a0, $a0, 4 -; CHECK-NEXT: vldrepl.d $vr0, $a0, 0 +; CHECK-NEXT: ld.b $a0, $a0, 0 +; CHECK-NEXT: vreplgr2vr.h $vr0, $a0 +; CHECK-NEXT: ret + %tmp = load i8, ptr %ptr + %tmp1 = sext i8 %tmp to i16 + %tmp2 = insertelement <8 x i16> zeroinitializer, i16 %tmp1, i32 0 + %tmp3 = shufflevector <8 x i16> %tmp2, <8 x i16> poison, <8 x i32> zeroinitializer + ret <8 x i16> %tmp3 +} + +define <8 x i16> @should_not_be_optimized_zext_load(ptr %ptr) { +; CHECK-LABEL: should_not_be_optimized_zext_load: +; CHECK: # %bb.0: +; CHECK-NEXT: ld.bu $a0, $a0, 0 +; CHECK-NEXT: vreplgr2vr.h $vr0, $a0 ; CHECK-NEXT: ret + %tmp = load i8, ptr %ptr + %tmp1 = zext i8 %tmp to i16 + %tmp2 = insertelement <8 x i16> zeroinitializer, i16 %tmp1, i32 0 + %tmp3 = shufflevector <8 x i16> %tmp2, <8 x i16> poison, <8 x i32> zeroinitializer + ret <8 x i16> %tmp3 +} + +define <2 x i64> @vldrepl_d_unaligned_offset(ptr %ptr) { +; LA32-LABEL: vldrepl_d_unaligned_offset: +; LA32: # %bb.0: +; LA32-NEXT: ld.w $a1, $a0, 4 +; LA32-NEXT: ld.w $a0, $a0, 8 +; LA32-NEXT: vinsgr2vr.w $vr0, $a1, 0 +; LA32-NEXT: vinsgr2vr.w $vr0, $a0, 1 +; LA32-NEXT: vinsgr2vr.w $vr0, $a1, 2 +; LA32-NEXT: vinsgr2vr.w $vr0, $a0, 3 +; LA32-NEXT: ret +; +; LA64-LABEL: vldrepl_d_unaligned_offset: +; LA64: # %bb.0: +; LA64-NEXT: addi.d $a0, $a0, 4 +; LA64-NEXT: vldrepl.d $vr0, $a0, 0 +; LA64-NEXT: ret %p = getelementptr i32, ptr %ptr, i32 1 %tmp = load i64, ptr %p %tmp1 = insertelement <2 x i64> zeroinitializer, i64 %tmp, i32 0 @@ -102,10 +149,20 @@ define <4 x i32> @vldrepl_w_offset(ptr %ptr) { } define <2 x i64> @vldrepl_d(ptr %ptr) { -; CHECK-LABEL: vldrepl_d: -; CHECK: # %bb.0: -; CHECK-NEXT: vldrepl.d $vr0, $a0, 0 -; CHECK-NEXT: ret +; LA32-LABEL: vldrepl_d: +; LA32: # %bb.0: +; LA32-NEXT: ld.w $a1, $a0, 0 +; LA32-NEXT: ld.w $a0, $a0, 4 +; LA32-NEXT: vinsgr2vr.w $vr0, $a1, 0 +; LA32-NEXT: vinsgr2vr.w $vr0, $a0, 1 +; LA32-NEXT: vinsgr2vr.w $vr0, $a1, 2 +; LA32-NEXT: vinsgr2vr.w $vr0, $a0, 3 +; LA32-NEXT: ret +; +; LA64-LABEL: vldrepl_d: +; LA64: # %bb.0: +; LA64-NEXT: vldrepl.d $vr0, $a0, 0 +; LA64-NEXT: ret %tmp = load i64, ptr %ptr %tmp1 = insertelement <2 x i64> zeroinitializer, i64 %tmp, i32 0 %tmp2 = shufflevector <2 x i64> %tmp1, <2 x i64> poison, <2 x i32> zeroinitializer @@ -113,10 +170,20 @@ define <2 x i64> @vldrepl_d(ptr %ptr) { } define <2 x i64> @vldrepl_d_offset(ptr %ptr) { -; CHECK-LABEL: vldrepl_d_offset: -; CHECK: # %bb.0: -; CHECK-NEXT: vldrepl.d $vr0, $a0, 264 -; CHECK-NEXT: ret +; LA32-LABEL: vldrepl_d_offset: +; LA32: # %bb.0: +; LA32-NEXT: ld.w $a1, $a0, 264 +; LA32-NEXT: ld.w $a0, $a0, 268 +; LA32-NEXT: vinsgr2vr.w $vr0, $a1, 0 +; LA32-NEXT: vinsgr2vr.w $vr0, $a0, 1 +; LA32-NEXT: vinsgr2vr.w $vr0, $a1, 2 +; LA32-NEXT: vinsgr2vr.w $vr0, $a0, 3 +; LA32-NEXT: ret +; +; LA64-LABEL: vldrepl_d_offset: +; LA64: # %bb.0: +; LA64-NEXT: vldrepl.d $vr0, $a0, 264 +; LA64-NEXT: ret %p = getelementptr i64, ptr %ptr, i64 33 %tmp = load i64, ptr %p %tmp1 = insertelement <2 x i64> zeroinitializer, i64 %tmp, i32 0 diff --git a/llvm/test/CodeGen/LoongArch/lsx/bswap.ll b/llvm/test/CodeGen/LoongArch/lsx/bswap.ll index 8172e21eae34..ecfb82627fe9 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/bswap.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/bswap.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 -; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s +; RUN: llc --mtriple=loongarch32 -mattr=+32s,+lsx < %s | FileCheck %s +; RUN: llc --mtriple=loongarch64 -mattr=+lsx < %s | FileCheck %s define void @bswap_v8i16(ptr %src, ptr %dst) nounwind { ; CHECK-LABEL: bswap_v8i16: diff --git a/llvm/test/CodeGen/LoongArch/lsx/build-vector.ll b/llvm/test/CodeGen/LoongArch/lsx/build-vector.ll index 9517558a92ed..24df71c2ad71 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/build-vector.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/build-vector.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3 -; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s | FileCheck %s --check-prefixes=CHECK,LA32 +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s --check-prefixes=CHECK,LA64 define void @buildvector_v16i8_splat(ptr %dst, i8 %a0) nounwind { ; CHECK-LABEL: buildvector_v16i8_splat: @@ -41,11 +42,20 @@ entry: } define void @buildvector_v2i64_splat(ptr %dst, i64 %a0) nounwind { -; CHECK-LABEL: buildvector_v2i64_splat: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vreplgr2vr.d $vr0, $a1 -; CHECK-NEXT: vst $vr0, $a0, 0 -; CHECK-NEXT: ret +; LA32-LABEL: buildvector_v2i64_splat: +; LA32: # %bb.0: # %entry +; LA32-NEXT: vinsgr2vr.w $vr0, $a1, 0 +; LA32-NEXT: vinsgr2vr.w $vr0, $a2, 1 +; LA32-NEXT: vinsgr2vr.w $vr0, $a1, 2 +; LA32-NEXT: vinsgr2vr.w $vr0, $a2, 3 +; LA32-NEXT: vst $vr0, $a0, 0 +; LA32-NEXT: ret +; +; LA64-LABEL: buildvector_v2i64_splat: +; LA64: # %bb.0: # %entry +; LA64-NEXT: vreplgr2vr.d $vr0, $a1 +; LA64-NEXT: vst $vr0, $a0, 0 +; LA64-NEXT: ret entry: %insert = insertelement <2 x i64> undef, i64 %a0, i8 0 %splat = shufflevector <2 x i64> %insert, <2 x i64> undef, <2 x i32> zeroinitializer @@ -138,12 +148,19 @@ entry: } define void @buildvector_v2f64_const_splat(ptr %dst) nounwind { -; CHECK-LABEL: buildvector_v2f64_const_splat: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: lu52i.d $a1, $zero, 1023 -; CHECK-NEXT: vreplgr2vr.d $vr0, $a1 -; CHECK-NEXT: vst $vr0, $a0, 0 -; CHECK-NEXT: ret +; LA32-LABEL: buildvector_v2f64_const_splat: +; LA32: # %bb.0: # %entry +; LA32-NEXT: pcalau12i $a1, %pc_hi20(.LCPI11_0) +; LA32-NEXT: vld $vr0, $a1, %pc_lo12(.LCPI11_0) +; LA32-NEXT: vst $vr0, $a0, 0 +; LA32-NEXT: ret +; +; LA64-LABEL: buildvector_v2f64_const_splat: +; LA64: # %bb.0: # %entry +; LA64-NEXT: lu52i.d $a1, $zero, 1023 +; LA64-NEXT: vreplgr2vr.d $vr0, $a1 +; LA64-NEXT: vst $vr0, $a0, 0 +; LA64-NEXT: ret entry: store <2 x double> <double 1.0, double 1.0>, ptr %dst ret void @@ -222,35 +239,65 @@ entry: } define void @buildvector_v16i8(ptr %dst, i8 %a0, i8 %a1, i8 %a2, i8 %a3, i8 %a4, i8 %a5, i8 %a6, i8 %a7, i8 %a8, i8 %a9, i8 %a10, i8 %a11, i8 %a12, i8 %a13, i8 %a14, i8 %a15) nounwind { -; CHECK-LABEL: buildvector_v16i8: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: ld.b $t0, $sp, 64 -; CHECK-NEXT: ld.b $t1, $sp, 56 -; CHECK-NEXT: ld.b $t2, $sp, 48 -; CHECK-NEXT: ld.b $t3, $sp, 40 -; CHECK-NEXT: ld.b $t4, $sp, 32 -; CHECK-NEXT: ld.b $t5, $sp, 24 -; CHECK-NEXT: ld.b $t6, $sp, 16 -; CHECK-NEXT: ld.b $t7, $sp, 8 -; CHECK-NEXT: ld.b $t8, $sp, 0 -; CHECK-NEXT: vinsgr2vr.b $vr0, $a1, 0 -; CHECK-NEXT: vinsgr2vr.b $vr0, $a2, 1 -; CHECK-NEXT: vinsgr2vr.b $vr0, $a3, 2 -; CHECK-NEXT: vinsgr2vr.b $vr0, $a4, 3 -; CHECK-NEXT: vinsgr2vr.b $vr0, $a5, 4 -; CHECK-NEXT: vinsgr2vr.b $vr0, $a6, 5 -; CHECK-NEXT: vinsgr2vr.b $vr0, $a7, 6 -; CHECK-NEXT: vinsgr2vr.b $vr0, $t8, 7 -; CHECK-NEXT: vinsgr2vr.b $vr0, $t7, 8 -; CHECK-NEXT: vinsgr2vr.b $vr0, $t6, 9 -; CHECK-NEXT: vinsgr2vr.b $vr0, $t5, 10 -; CHECK-NEXT: vinsgr2vr.b $vr0, $t4, 11 -; CHECK-NEXT: vinsgr2vr.b $vr0, $t3, 12 -; CHECK-NEXT: vinsgr2vr.b $vr0, $t2, 13 -; CHECK-NEXT: vinsgr2vr.b $vr0, $t1, 14 -; CHECK-NEXT: vinsgr2vr.b $vr0, $t0, 15 -; CHECK-NEXT: vst $vr0, $a0, 0 -; CHECK-NEXT: ret +; LA32-LABEL: buildvector_v16i8: +; LA32: # %bb.0: # %entry +; LA32-NEXT: ld.b $t0, $sp, 32 +; LA32-NEXT: ld.b $t1, $sp, 28 +; LA32-NEXT: ld.b $t2, $sp, 24 +; LA32-NEXT: ld.b $t3, $sp, 20 +; LA32-NEXT: ld.b $t4, $sp, 16 +; LA32-NEXT: ld.b $t5, $sp, 12 +; LA32-NEXT: ld.b $t6, $sp, 8 +; LA32-NEXT: ld.b $t7, $sp, 4 +; LA32-NEXT: ld.b $t8, $sp, 0 +; LA32-NEXT: vinsgr2vr.b $vr0, $a1, 0 +; LA32-NEXT: vinsgr2vr.b $vr0, $a2, 1 +; LA32-NEXT: vinsgr2vr.b $vr0, $a3, 2 +; LA32-NEXT: vinsgr2vr.b $vr0, $a4, 3 +; LA32-NEXT: vinsgr2vr.b $vr0, $a5, 4 +; LA32-NEXT: vinsgr2vr.b $vr0, $a6, 5 +; LA32-NEXT: vinsgr2vr.b $vr0, $a7, 6 +; LA32-NEXT: vinsgr2vr.b $vr0, $t8, 7 +; LA32-NEXT: vinsgr2vr.b $vr0, $t7, 8 +; LA32-NEXT: vinsgr2vr.b $vr0, $t6, 9 +; LA32-NEXT: vinsgr2vr.b $vr0, $t5, 10 +; LA32-NEXT: vinsgr2vr.b $vr0, $t4, 11 +; LA32-NEXT: vinsgr2vr.b $vr0, $t3, 12 +; LA32-NEXT: vinsgr2vr.b $vr0, $t2, 13 +; LA32-NEXT: vinsgr2vr.b $vr0, $t1, 14 +; LA32-NEXT: vinsgr2vr.b $vr0, $t0, 15 +; LA32-NEXT: vst $vr0, $a0, 0 +; LA32-NEXT: ret +; +; LA64-LABEL: buildvector_v16i8: +; LA64: # %bb.0: # %entry +; LA64-NEXT: ld.b $t0, $sp, 64 +; LA64-NEXT: ld.b $t1, $sp, 56 +; LA64-NEXT: ld.b $t2, $sp, 48 +; LA64-NEXT: ld.b $t3, $sp, 40 +; LA64-NEXT: ld.b $t4, $sp, 32 +; LA64-NEXT: ld.b $t5, $sp, 24 +; LA64-NEXT: ld.b $t6, $sp, 16 +; LA64-NEXT: ld.b $t7, $sp, 8 +; LA64-NEXT: ld.b $t8, $sp, 0 +; LA64-NEXT: vinsgr2vr.b $vr0, $a1, 0 +; LA64-NEXT: vinsgr2vr.b $vr0, $a2, 1 +; LA64-NEXT: vinsgr2vr.b $vr0, $a3, 2 +; LA64-NEXT: vinsgr2vr.b $vr0, $a4, 3 +; LA64-NEXT: vinsgr2vr.b $vr0, $a5, 4 +; LA64-NEXT: vinsgr2vr.b $vr0, $a6, 5 +; LA64-NEXT: vinsgr2vr.b $vr0, $a7, 6 +; LA64-NEXT: vinsgr2vr.b $vr0, $t8, 7 +; LA64-NEXT: vinsgr2vr.b $vr0, $t7, 8 +; LA64-NEXT: vinsgr2vr.b $vr0, $t6, 9 +; LA64-NEXT: vinsgr2vr.b $vr0, $t5, 10 +; LA64-NEXT: vinsgr2vr.b $vr0, $t4, 11 +; LA64-NEXT: vinsgr2vr.b $vr0, $t3, 12 +; LA64-NEXT: vinsgr2vr.b $vr0, $t2, 13 +; LA64-NEXT: vinsgr2vr.b $vr0, $t1, 14 +; LA64-NEXT: vinsgr2vr.b $vr0, $t0, 15 +; LA64-NEXT: vst $vr0, $a0, 0 +; LA64-NEXT: ret entry: %ins0 = insertelement <16 x i8> undef, i8 %a0, i32 0 %ins1 = insertelement <16 x i8> %ins0, i8 %a1, i32 1 @@ -338,6 +385,133 @@ entry: ret void } +define void @buildvector_v16i8_subseq_2(ptr %dst, i8 %a0, i8 %a1, i8 %a2, i8 %a3, i8 %a4, i8 %a5, i8 %a6, i8 %a7) nounwind { +; CHECK-LABEL: buildvector_v16i8_subseq_2: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: ld.b $t0, $sp, 0 +; CHECK-NEXT: vinsgr2vr.b $vr0, $a1, 0 +; CHECK-NEXT: vinsgr2vr.b $vr0, $a2, 1 +; CHECK-NEXT: vinsgr2vr.b $vr0, $a3, 2 +; CHECK-NEXT: vinsgr2vr.b $vr0, $a4, 3 +; CHECK-NEXT: vinsgr2vr.b $vr0, $a5, 4 +; CHECK-NEXT: vinsgr2vr.b $vr0, $a6, 5 +; CHECK-NEXT: vinsgr2vr.b $vr0, $a7, 6 +; CHECK-NEXT: vinsgr2vr.b $vr0, $t0, 7 +; CHECK-NEXT: vinsgr2vr.b $vr0, $a1, 8 +; CHECK-NEXT: vinsgr2vr.b $vr0, $a2, 9 +; CHECK-NEXT: vinsgr2vr.b $vr0, $a3, 10 +; CHECK-NEXT: vinsgr2vr.b $vr0, $a4, 11 +; CHECK-NEXT: vinsgr2vr.b $vr0, $a5, 12 +; CHECK-NEXT: vinsgr2vr.b $vr0, $a6, 13 +; CHECK-NEXT: vinsgr2vr.b $vr0, $a7, 14 +; CHECK-NEXT: vinsgr2vr.b $vr0, $t0, 15 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %ins0 = insertelement <16 x i8> undef, i8 %a0, i32 0 + %ins1 = insertelement <16 x i8> %ins0, i8 %a1, i32 1 + %ins2 = insertelement <16 x i8> %ins1, i8 %a2, i32 2 + %ins3 = insertelement <16 x i8> %ins2, i8 %a3, i32 3 + %ins4 = insertelement <16 x i8> %ins3, i8 %a4, i32 4 + %ins5 = insertelement <16 x i8> %ins4, i8 %a5, i32 5 + %ins6 = insertelement <16 x i8> %ins5, i8 %a6, i32 6 + %ins7 = insertelement <16 x i8> %ins6, i8 %a7, i32 7 + %ins8 = insertelement <16 x i8> %ins7, i8 %a0, i32 8 + %ins9 = insertelement <16 x i8> %ins8, i8 %a1, i32 9 + %ins10 = insertelement <16 x i8> %ins9, i8 %a2, i32 10 + %ins11 = insertelement <16 x i8> %ins10, i8 %a3, i32 11 + %ins12 = insertelement <16 x i8> %ins11, i8 %a4, i32 12 + %ins13 = insertelement <16 x i8> %ins12, i8 %a5, i32 13 + %ins14 = insertelement <16 x i8> %ins13, i8 %a6, i32 14 + %ins15 = insertelement <16 x i8> %ins14, i8 %a7, i32 15 + store <16 x i8> %ins15, ptr %dst + ret void +} + +define void @buildvector_v16i8_subseq_4(ptr %dst, i8 %a0, i8 %a1, i8 %a2, i8 %a3) nounwind { +; CHECK-LABEL: buildvector_v16i8_subseq_4: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vinsgr2vr.b $vr0, $a1, 0 +; CHECK-NEXT: vinsgr2vr.b $vr0, $a2, 1 +; CHECK-NEXT: vinsgr2vr.b $vr0, $a3, 2 +; CHECK-NEXT: vinsgr2vr.b $vr0, $a4, 3 +; CHECK-NEXT: vinsgr2vr.b $vr0, $a1, 4 +; CHECK-NEXT: vinsgr2vr.b $vr0, $a2, 5 +; CHECK-NEXT: vinsgr2vr.b $vr0, $a3, 6 +; CHECK-NEXT: vinsgr2vr.b $vr0, $a4, 7 +; CHECK-NEXT: vinsgr2vr.b $vr0, $a1, 8 +; CHECK-NEXT: vinsgr2vr.b $vr0, $a2, 9 +; CHECK-NEXT: vinsgr2vr.b $vr0, $a3, 10 +; CHECK-NEXT: vinsgr2vr.b $vr0, $a4, 11 +; CHECK-NEXT: vinsgr2vr.b $vr0, $a1, 12 +; CHECK-NEXT: vinsgr2vr.b $vr0, $a2, 13 +; CHECK-NEXT: vinsgr2vr.b $vr0, $a3, 14 +; CHECK-NEXT: vinsgr2vr.b $vr0, $a4, 15 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %ins0 = insertelement <16 x i8> undef, i8 %a0, i32 0 + %ins1 = insertelement <16 x i8> %ins0, i8 %a1, i32 1 + %ins2 = insertelement <16 x i8> %ins1, i8 %a2, i32 2 + %ins3 = insertelement <16 x i8> %ins2, i8 %a3, i32 3 + %ins4 = insertelement <16 x i8> %ins3, i8 %a0, i32 4 + %ins5 = insertelement <16 x i8> %ins4, i8 %a1, i32 5 + %ins6 = insertelement <16 x i8> %ins5, i8 %a2, i32 6 + %ins7 = insertelement <16 x i8> %ins6, i8 %a3, i32 7 + %ins8 = insertelement <16 x i8> %ins7, i8 %a0, i32 8 + %ins9 = insertelement <16 x i8> %ins8, i8 %a1, i32 9 + %ins10 = insertelement <16 x i8> %ins9, i8 %a2, i32 10 + %ins11 = insertelement <16 x i8> %ins10, i8 %a3, i32 11 + %ins12 = insertelement <16 x i8> %ins11, i8 %a0, i32 12 + %ins13 = insertelement <16 x i8> %ins12, i8 %a1, i32 13 + %ins14 = insertelement <16 x i8> %ins13, i8 %a2, i32 14 + %ins15 = insertelement <16 x i8> %ins14, i8 %a3, i32 15 + store <16 x i8> %ins15, ptr %dst + ret void +} + +define void @buildvector_v16i8_subseq_8(ptr %dst, i8 %a0, i8 %a1) nounwind { +; CHECK-LABEL: buildvector_v16i8_subseq_8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vinsgr2vr.b $vr0, $a1, 0 +; CHECK-NEXT: vinsgr2vr.b $vr0, $a2, 1 +; CHECK-NEXT: vinsgr2vr.b $vr0, $a1, 2 +; CHECK-NEXT: vinsgr2vr.b $vr0, $a2, 3 +; CHECK-NEXT: vinsgr2vr.b $vr0, $a1, 4 +; CHECK-NEXT: vinsgr2vr.b $vr0, $a2, 5 +; CHECK-NEXT: vinsgr2vr.b $vr0, $a1, 6 +; CHECK-NEXT: vinsgr2vr.b $vr0, $a2, 7 +; CHECK-NEXT: vinsgr2vr.b $vr0, $a1, 8 +; CHECK-NEXT: vinsgr2vr.b $vr0, $a2, 9 +; CHECK-NEXT: vinsgr2vr.b $vr0, $a1, 10 +; CHECK-NEXT: vinsgr2vr.b $vr0, $a2, 11 +; CHECK-NEXT: vinsgr2vr.b $vr0, $a1, 12 +; CHECK-NEXT: vinsgr2vr.b $vr0, $a2, 13 +; CHECK-NEXT: vinsgr2vr.b $vr0, $a1, 14 +; CHECK-NEXT: vinsgr2vr.b $vr0, $a2, 15 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %ins0 = insertelement <16 x i8> undef, i8 %a0, i32 0 + %ins1 = insertelement <16 x i8> %ins0, i8 %a1, i32 1 + %ins2 = insertelement <16 x i8> %ins1, i8 %a0, i32 2 + %ins3 = insertelement <16 x i8> %ins2, i8 %a1, i32 3 + %ins4 = insertelement <16 x i8> %ins3, i8 %a0, i32 4 + %ins5 = insertelement <16 x i8> %ins4, i8 %a1, i32 5 + %ins6 = insertelement <16 x i8> %ins5, i8 %a0, i32 6 + %ins7 = insertelement <16 x i8> %ins6, i8 %a1, i32 7 + %ins8 = insertelement <16 x i8> %ins7, i8 %a0, i32 8 + %ins9 = insertelement <16 x i8> %ins8, i8 %a1, i32 9 + %ins10 = insertelement <16 x i8> %ins9, i8 %a0, i32 10 + %ins11 = insertelement <16 x i8> %ins10, i8 %a1, i32 11 + %ins12 = insertelement <16 x i8> %ins11, i8 %a0, i32 12 + %ins13 = insertelement <16 x i8> %ins12, i8 %a1, i32 13 + %ins14 = insertelement <16 x i8> %ins13, i8 %a0, i32 14 + %ins15 = insertelement <16 x i8> %ins14, i8 %a1, i32 15 + store <16 x i8> %ins15, ptr %dst + ret void +} + define void @buildvector_v8i16(ptr %dst, i16 %a0, i16 %a1, i16 %a2, i16 %a3, i16 %a4, i16 %a5, i16 %a6, i16 %a7) nounwind { ; CHECK-LABEL: buildvector_v8i16: ; CHECK: # %bb.0: # %entry @@ -410,6 +584,58 @@ entry: ret void } +define void @buildvector_v8i16_subseq_2(ptr %dst, i16 %a0, i16 %a1, i16 %a2, i16 %a3) nounwind { +; CHECK-LABEL: buildvector_v8i16_subseq_2: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vinsgr2vr.h $vr0, $a1, 0 +; CHECK-NEXT: vinsgr2vr.h $vr0, $a2, 1 +; CHECK-NEXT: vinsgr2vr.h $vr0, $a3, 2 +; CHECK-NEXT: vinsgr2vr.h $vr0, $a4, 3 +; CHECK-NEXT: vinsgr2vr.h $vr0, $a1, 4 +; CHECK-NEXT: vinsgr2vr.h $vr0, $a2, 5 +; CHECK-NEXT: vinsgr2vr.h $vr0, $a3, 6 +; CHECK-NEXT: vinsgr2vr.h $vr0, $a4, 7 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %ins0 = insertelement <8 x i16> undef, i16 %a0, i32 0 + %ins1 = insertelement <8 x i16> %ins0, i16 %a1, i32 1 + %ins2 = insertelement <8 x i16> %ins1, i16 %a2, i32 2 + %ins3 = insertelement <8 x i16> %ins2, i16 %a3, i32 3 + %ins4 = insertelement <8 x i16> %ins3, i16 %a0, i32 4 + %ins5 = insertelement <8 x i16> %ins4, i16 %a1, i32 5 + %ins6 = insertelement <8 x i16> %ins5, i16 %a2, i32 6 + %ins7 = insertelement <8 x i16> %ins6, i16 %a3, i32 7 + store <8 x i16> %ins7, ptr %dst + ret void +} + +define void @buildvector_v8i16_subseq_4(ptr %dst, i16 %a0, i16 %a1) nounwind { +; CHECK-LABEL: buildvector_v8i16_subseq_4: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vinsgr2vr.h $vr0, $a1, 0 +; CHECK-NEXT: vinsgr2vr.h $vr0, $a2, 1 +; CHECK-NEXT: vinsgr2vr.h $vr0, $a1, 2 +; CHECK-NEXT: vinsgr2vr.h $vr0, $a2, 3 +; CHECK-NEXT: vinsgr2vr.h $vr0, $a1, 4 +; CHECK-NEXT: vinsgr2vr.h $vr0, $a2, 5 +; CHECK-NEXT: vinsgr2vr.h $vr0, $a1, 6 +; CHECK-NEXT: vinsgr2vr.h $vr0, $a2, 7 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %ins0 = insertelement <8 x i16> undef, i16 %a0, i32 0 + %ins1 = insertelement <8 x i16> %ins0, i16 %a1, i32 1 + %ins2 = insertelement <8 x i16> %ins1, i16 %a0, i32 2 + %ins3 = insertelement <8 x i16> %ins2, i16 %a1, i32 3 + %ins4 = insertelement <8 x i16> %ins3, i16 %a0, i32 4 + %ins5 = insertelement <8 x i16> %ins4, i16 %a1, i32 5 + %ins6 = insertelement <8 x i16> %ins5, i16 %a0, i32 6 + %ins7 = insertelement <8 x i16> %ins6, i16 %a1, i32 7 + store <8 x i16> %ins7, ptr %dst + ret void +} + define void @buildvector_v4i32(ptr %dst, i32 %a0, i32 %a1, i32 %a2, i32 %a3) nounwind { ; CHECK-LABEL: buildvector_v4i32: ; CHECK: # %bb.0: # %entry @@ -462,14 +688,41 @@ entry: ret void } -define void @buildvector_v2i64(ptr %dst, i64 %a0, i64 %a1) nounwind { -; CHECK-LABEL: buildvector_v2i64: +define void @buildvector_v4i32_subseq_2(ptr %dst, i32 %a0, i32 %a1) nounwind { +; CHECK-LABEL: buildvector_v4i32_subseq_2: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vinsgr2vr.d $vr0, $a1, 0 -; CHECK-NEXT: vinsgr2vr.d $vr0, $a2, 1 +; CHECK-NEXT: vinsgr2vr.w $vr0, $a1, 0 +; CHECK-NEXT: vinsgr2vr.w $vr0, $a2, 1 +; CHECK-NEXT: vinsgr2vr.w $vr0, $a1, 2 +; CHECK-NEXT: vinsgr2vr.w $vr0, $a2, 3 ; CHECK-NEXT: vst $vr0, $a0, 0 ; CHECK-NEXT: ret entry: + %ins0 = insertelement <4 x i32> undef, i32 %a0, i32 0 + %ins1 = insertelement <4 x i32> %ins0, i32 %a1, i32 1 + %ins2 = insertelement <4 x i32> %ins1, i32 %a0, i32 2 + %ins3 = insertelement <4 x i32> %ins2, i32 %a1, i32 3 + store <4 x i32> %ins3, ptr %dst + ret void +} + +define void @buildvector_v2i64(ptr %dst, i64 %a0, i64 %a1) nounwind { +; LA32-LABEL: buildvector_v2i64: +; LA32: # %bb.0: # %entry +; LA32-NEXT: vinsgr2vr.w $vr0, $a1, 0 +; LA32-NEXT: vinsgr2vr.w $vr0, $a2, 1 +; LA32-NEXT: vinsgr2vr.w $vr0, $a3, 2 +; LA32-NEXT: vinsgr2vr.w $vr0, $a4, 3 +; LA32-NEXT: vst $vr0, $a0, 0 +; LA32-NEXT: ret +; +; LA64-LABEL: buildvector_v2i64: +; LA64: # %bb.0: # %entry +; LA64-NEXT: vinsgr2vr.d $vr0, $a1, 0 +; LA64-NEXT: vinsgr2vr.d $vr0, $a2, 1 +; LA64-NEXT: vst $vr0, $a0, 0 +; LA64-NEXT: ret +entry: %ins0 = insertelement <2 x i64> undef, i64 %a0, i32 0 %ins1 = insertelement <2 x i64> %ins0, i64 %a1, i32 1 store <2 x i64> %ins1, ptr %dst @@ -477,11 +730,18 @@ entry: } define void @buildvector_v2i64_partial(ptr %dst, i64 %a0) nounwind { -; CHECK-LABEL: buildvector_v2i64_partial: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vinsgr2vr.d $vr0, $a1, 0 -; CHECK-NEXT: vst $vr0, $a0, 0 -; CHECK-NEXT: ret +; LA32-LABEL: buildvector_v2i64_partial: +; LA32: # %bb.0: # %entry +; LA32-NEXT: vinsgr2vr.w $vr0, $a1, 0 +; LA32-NEXT: vinsgr2vr.w $vr0, $a2, 1 +; LA32-NEXT: vst $vr0, $a0, 0 +; LA32-NEXT: ret +; +; LA64-LABEL: buildvector_v2i64_partial: +; LA64: # %bb.0: # %entry +; LA64-NEXT: vinsgr2vr.d $vr0, $a1, 0 +; LA64-NEXT: vst $vr0, $a0, 0 +; LA64-NEXT: ret entry: %ins0 = insertelement <2 x i64> undef, i64 %a0, i32 0 %ins1 = insertelement <2 x i64> %ins0, i64 undef, i32 1 @@ -490,12 +750,20 @@ entry: } define void @buildvector_v2i64_with_constant(ptr %dst, i64 %a1) nounwind { -; CHECK-LABEL: buildvector_v2i64_with_constant: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vrepli.b $vr0, 0 -; CHECK-NEXT: vinsgr2vr.d $vr0, $a1, 1 -; CHECK-NEXT: vst $vr0, $a0, 0 -; CHECK-NEXT: ret +; LA32-LABEL: buildvector_v2i64_with_constant: +; LA32: # %bb.0: # %entry +; LA32-NEXT: vrepli.b $vr0, 0 +; LA32-NEXT: vinsgr2vr.w $vr0, $a1, 2 +; LA32-NEXT: vinsgr2vr.w $vr0, $a2, 3 +; LA32-NEXT: vst $vr0, $a0, 0 +; LA32-NEXT: ret +; +; LA64-LABEL: buildvector_v2i64_with_constant: +; LA64: # %bb.0: # %entry +; LA64-NEXT: vrepli.b $vr0, 0 +; LA64-NEXT: vinsgr2vr.d $vr0, $a1, 1 +; LA64-NEXT: vst $vr0, $a0, 0 +; LA64-NEXT: ret entry: %ins0 = insertelement <2 x i64> undef, i64 0, i32 0 %ins1 = insertelement <2 x i64> %ins0, i64 %a1, i32 1 @@ -562,6 +830,26 @@ entry: ret void } +define void @buildvector_v4f32_subseq_2(ptr %dst, float %a0, float %a1) nounwind { +; CHECK-LABEL: buildvector_v4f32_subseq_2: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: # kill: def $f1 killed $f1 def $vr1 +; CHECK-NEXT: # kill: def $f0 killed $f0 def $vr0 +; CHECK-NEXT: vori.b $vr2, $vr0, 0 +; CHECK-NEXT: vextrins.w $vr2, $vr1, 16 +; CHECK-NEXT: vextrins.w $vr2, $vr0, 32 +; CHECK-NEXT: vextrins.w $vr2, $vr1, 48 +; CHECK-NEXT: vst $vr2, $a0, 0 +; CHECK-NEXT: ret +entry: + %ins0 = insertelement <4 x float> undef, float %a0, i32 0 + %ins1 = insertelement <4 x float> %ins0, float %a1, i32 1 + %ins2 = insertelement <4 x float> %ins1, float %a0, i32 2 + %ins3 = insertelement <4 x float> %ins2, float %a1, i32 3 + store <4 x float> %ins3, ptr %dst + ret void +} + define void @buildvector_v2f64(ptr %dst, double %a0, double %a1) nounwind { ; CHECK-LABEL: buildvector_v2f64: ; CHECK: # %bb.0: # %entry diff --git a/llvm/test/CodeGen/LoongArch/lsx/ctpop-ctlz.ll b/llvm/test/CodeGen/LoongArch/lsx/ctpop-ctlz.ll index 5df553fba7ef..a9a38e8f75f9 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/ctpop-ctlz.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/ctpop-ctlz.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s +; RUN: llc --mtriple=loongarch32 -mattr=+32s,+lsx < %s | FileCheck %s +; RUN: llc --mtriple=loongarch64 -mattr=+lsx < %s | FileCheck %s define void @ctpop_v16i8(ptr %src, ptr %dst) nounwind { ; CHECK-LABEL: ctpop_v16i8: diff --git a/llvm/test/CodeGen/LoongArch/lsx/fdiv-reciprocal-estimate.ll b/llvm/test/CodeGen/LoongArch/lsx/fdiv-reciprocal-estimate.ll index 21dbbf310ad8..58e16d37ae27 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/fdiv-reciprocal-estimate.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/fdiv-reciprocal-estimate.ll @@ -1,6 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 -; RUN: llc --mtriple=loongarch64 --mattr=+lsx,-frecipe < %s | FileCheck %s --check-prefix=FAULT -; RUN: llc --mtriple=loongarch64 --mattr=+lsx,+frecipe < %s | FileCheck %s +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lsx,-frecipe < %s | FileCheck %s --check-prefixes=FAULT,FAULT-LA32 +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lsx,+frecipe < %s | FileCheck %s --check-prefixes=CHECK,LA32 +; RUN: llc --mtriple=loongarch64 --mattr=+lsx,-frecipe < %s | FileCheck %s --check-prefixes=FAULT,FAULT-LA64 +; RUN: llc --mtriple=loongarch64 --mattr=+lsx,+frecipe < %s | FileCheck %s --check-prefixes=CHECK,LA64 define void @fdiv_v4f32(ptr %res, ptr %a0, ptr %a1) nounwind { ; FAULT-LABEL: fdiv_v4f32: @@ -13,13 +15,13 @@ define void @fdiv_v4f32(ptr %res, ptr %a0, ptr %a1) nounwind { ; ; CHECK-LABEL: fdiv_v4f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vld $vr0, $a2, 0 -; CHECK-NEXT: vld $vr1, $a1, 0 -; CHECK-NEXT: vfrecipe.s $vr2, $vr0 -; CHECK-NEXT: vfmul.s $vr3, $vr1, $vr2 -; CHECK-NEXT: vfnmsub.s $vr0, $vr0, $vr3, $vr1 -; CHECK-NEXT: vfmadd.s $vr0, $vr2, $vr0, $vr3 -; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: vld $vr0, $a2, 0 +; CHECK-NEXT: vld $vr1, $a1, 0 +; CHECK-NEXT: vfrecipe.s $vr2, $vr0 +; CHECK-NEXT: vfmul.s $vr3, $vr1, $vr2 +; CHECK-NEXT: vfnmsub.s $vr0, $vr0, $vr3, $vr1 +; CHECK-NEXT: vfmadd.s $vr0, $vr2, $vr0, $vr3 +; CHECK-NEXT: vst $vr0, $a0, 0 ; CHECK-NEXT: ret entry: %v0 = load <4 x float>, ptr %a0 @@ -38,20 +40,35 @@ define void @fdiv_v2f64(ptr %res, ptr %a0, ptr %a1) nounwind { ; FAULT-NEXT: vst $vr0, $a0, 0 ; FAULT-NEXT: ret ; -; CHECK-LABEL: fdiv_v2f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vld $vr0, $a2, 0 -; CHECK-NEXT: vld $vr1, $a1, 0 -; CHECK-NEXT: lu52i.d $a1, $zero, -1025 -; CHECK-NEXT: vreplgr2vr.d $vr2, $a1 -; CHECK-NEXT: vfrecipe.d $vr3, $vr0 -; CHECK-NEXT: vfmadd.d $vr2, $vr0, $vr3, $vr2 -; CHECK-NEXT: vfnmsub.d $vr2, $vr2, $vr3, $vr3 -; CHECK-NEXT: vfmul.d $vr3, $vr1, $vr2 -; CHECK-NEXT: vfnmsub.d $vr0, $vr0, $vr3, $vr1 -; CHECK-NEXT: vfmadd.d $vr0, $vr2, $vr0, $vr3 -; CHECK-NEXT: vst $vr0, $a0, 0 -; CHECK-NEXT: ret +; LA32-LABEL: fdiv_v2f64: +; LA32: # %bb.0: # %entry +; LA32-NEXT: pcalau12i $a3, %pc_hi20(.LCPI1_0) +; LA32-NEXT: vld $vr0, $a2, 0 +; LA32-NEXT: vld $vr1, $a3, %pc_lo12(.LCPI1_0) +; LA32-NEXT: vld $vr2, $a1, 0 +; LA32-NEXT: vfrecipe.d $vr3, $vr0 +; LA32-NEXT: vfmadd.d $vr1, $vr0, $vr3, $vr1 +; LA32-NEXT: vfnmsub.d $vr1, $vr1, $vr3, $vr3 +; LA32-NEXT: vfmul.d $vr3, $vr2, $vr1 +; LA32-NEXT: vfnmsub.d $vr0, $vr0, $vr3, $vr2 +; LA32-NEXT: vfmadd.d $vr0, $vr1, $vr0, $vr3 +; LA32-NEXT: vst $vr0, $a0, 0 +; LA32-NEXT: ret +; +; LA64-LABEL: fdiv_v2f64: +; LA64: # %bb.0: # %entry +; LA64-NEXT: vld $vr0, $a2, 0 +; LA64-NEXT: vld $vr1, $a1, 0 +; LA64-NEXT: lu52i.d $a1, $zero, -1025 +; LA64-NEXT: vreplgr2vr.d $vr2, $a1 +; LA64-NEXT: vfrecipe.d $vr3, $vr0 +; LA64-NEXT: vfmadd.d $vr2, $vr0, $vr3, $vr2 +; LA64-NEXT: vfnmsub.d $vr2, $vr2, $vr3, $vr3 +; LA64-NEXT: vfmul.d $vr3, $vr1, $vr2 +; LA64-NEXT: vfnmsub.d $vr0, $vr0, $vr3, $vr1 +; LA64-NEXT: vfmadd.d $vr0, $vr2, $vr0, $vr3 +; LA64-NEXT: vst $vr0, $a0, 0 +; LA64-NEXT: ret entry: %v0 = load <2 x double>, ptr %a0 %v1 = load <2 x double>, ptr %a1 @@ -71,13 +88,13 @@ define void @one_fdiv_v4f32(ptr %res, ptr %a0) nounwind { ; ; CHECK-LABEL: one_fdiv_v4f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vld $vr0, $a1, 0 -; CHECK-NEXT: vfrecipe.s $vr1, $vr0 -; CHECK-NEXT: lu12i.w $a1, -264192 -; CHECK-NEXT: vreplgr2vr.w $vr2, $a1 -; CHECK-NEXT: vfmadd.s $vr0, $vr0, $vr1, $vr2 -; CHECK-NEXT: vfnmsub.s $vr0, $vr0, $vr1, $vr1 -; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vfrecipe.s $vr1, $vr0 +; CHECK-NEXT: lu12i.w $a1, -264192 +; CHECK-NEXT: vreplgr2vr.w $vr2, $a1 +; CHECK-NEXT: vfmadd.s $vr0, $vr0, $vr1, $vr2 +; CHECK-NEXT: vfnmsub.s $vr0, $vr0, $vr1, $vr1 +; CHECK-NEXT: vst $vr0, $a0, 0 ; CHECK-NEXT: ret entry: %v0 = load <4 x float>, ptr %a0 @@ -87,25 +104,47 @@ entry: } define void @one_fdiv_v2f64(ptr %res, ptr %a0) nounwind { -; FAULT-LABEL: one_fdiv_v2f64: -; FAULT: # %bb.0: # %entry -; FAULT-NEXT: vld $vr0, $a1, 0 -; FAULT-NEXT: vfrecip.d $vr0, $vr0 -; FAULT-NEXT: vst $vr0, $a0, 0 -; FAULT-NEXT: ret +; FAULT-LA32-LABEL: one_fdiv_v2f64: +; FAULT-LA32: # %bb.0: # %entry +; FAULT-LA32-NEXT: vld $vr0, $a1, 0 +; FAULT-LA32-NEXT: pcalau12i $a1, %pc_hi20(.LCPI3_0) +; FAULT-LA32-NEXT: vld $vr1, $a1, %pc_lo12(.LCPI3_0) +; FAULT-LA32-NEXT: vfdiv.d $vr0, $vr1, $vr0 +; FAULT-LA32-NEXT: vst $vr0, $a0, 0 +; FAULT-LA32-NEXT: ret ; -; CHECK-LABEL: one_fdiv_v2f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vld $vr0, $a1, 0 -; CHECK-NEXT: vfrecipe.d $vr1, $vr0 -; CHECK-NEXT: lu52i.d $a1, $zero, 1023 -; CHECK-NEXT: vreplgr2vr.d $vr2, $a1 -; CHECK-NEXT: vfnmsub.d $vr3, $vr0, $vr1, $vr2 -; CHECK-NEXT: vfmadd.d $vr1, $vr1, $vr3, $vr1 -; CHECK-NEXT: vfnmsub.d $vr0, $vr0, $vr1, $vr2 -; CHECK-NEXT: vfmadd.d $vr0, $vr1, $vr0, $vr1 -; CHECK-NEXT: vst $vr0, $a0, 0 -; CHECK-NEXT: ret +; LA32-LABEL: one_fdiv_v2f64: +; LA32: # %bb.0: # %entry +; LA32-NEXT: vld $vr0, $a1, 0 +; LA32-NEXT: pcalau12i $a1, %pc_hi20(.LCPI3_0) +; LA32-NEXT: vld $vr1, $a1, %pc_lo12(.LCPI3_0) +; LA32-NEXT: vfrecipe.d $vr2, $vr0 +; LA32-NEXT: vfnmsub.d $vr3, $vr0, $vr2, $vr1 +; LA32-NEXT: vfmadd.d $vr2, $vr2, $vr3, $vr2 +; LA32-NEXT: vfnmsub.d $vr0, $vr0, $vr2, $vr1 +; LA32-NEXT: vfmadd.d $vr0, $vr2, $vr0, $vr2 +; LA32-NEXT: vst $vr0, $a0, 0 +; LA32-NEXT: ret +; +; FAULT-LA64-LABEL: one_fdiv_v2f64: +; FAULT-LA64: # %bb.0: # %entry +; FAULT-LA64-NEXT: vld $vr0, $a1, 0 +; FAULT-LA64-NEXT: vfrecip.d $vr0, $vr0 +; FAULT-LA64-NEXT: vst $vr0, $a0, 0 +; FAULT-LA64-NEXT: ret +; +; LA64-LABEL: one_fdiv_v2f64: +; LA64: # %bb.0: # %entry +; LA64-NEXT: vld $vr0, $a1, 0 +; LA64-NEXT: vfrecipe.d $vr1, $vr0 +; LA64-NEXT: lu52i.d $a1, $zero, 1023 +; LA64-NEXT: vreplgr2vr.d $vr2, $a1 +; LA64-NEXT: vfnmsub.d $vr3, $vr0, $vr1, $vr2 +; LA64-NEXT: vfmadd.d $vr1, $vr1, $vr3, $vr1 +; LA64-NEXT: vfnmsub.d $vr0, $vr0, $vr1, $vr2 +; LA64-NEXT: vfmadd.d $vr0, $vr1, $vr0, $vr1 +; LA64-NEXT: vst $vr0, $a0, 0 +; LA64-NEXT: ret entry: %v0 = load <2 x double>, ptr %a0 %div = fdiv fast <2 x double> <double 1.0, double 1.0>, %v0 diff --git a/llvm/test/CodeGen/LoongArch/lsx/fma-v2f64.ll b/llvm/test/CodeGen/LoongArch/lsx/fma-v2f64.ll index c83c563952d4..89442908b31b 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/fma-v2f64.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/fma-v2f64.ll @@ -1,8 +1,14 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lsx --fp-contract=fast < %s \ +; RUN: | FileCheck %s --check-prefix=CONTRACT-FAST ; RUN: llc --mtriple=loongarch64 --mattr=+lsx --fp-contract=fast < %s \ ; RUN: | FileCheck %s --check-prefix=CONTRACT-FAST +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lsx --fp-contract=on < %s \ +; RUN: | FileCheck %s --check-prefix=CONTRACT-ON ; RUN: llc --mtriple=loongarch64 --mattr=+lsx --fp-contract=on < %s \ ; RUN: | FileCheck %s --check-prefix=CONTRACT-ON +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lsx --fp-contract=off < %s \ +; RUN: | FileCheck %s --check-prefix=CONTRACT-OFF ; RUN: llc --mtriple=loongarch64 --mattr=+lsx --fp-contract=off < %s \ ; RUN: | FileCheck %s --check-prefix=CONTRACT-OFF diff --git a/llvm/test/CodeGen/LoongArch/lsx/fma-v4f32.ll b/llvm/test/CodeGen/LoongArch/lsx/fma-v4f32.ll index 1f316d5b1c8a..0e5cd3cdcd35 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/fma-v4f32.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/fma-v4f32.ll @@ -1,8 +1,14 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lsx --fp-contract=fast < %s \ +; RUN: | FileCheck %s --check-prefix=CONTRACT-FAST ; RUN: llc --mtriple=loongarch64 --mattr=+lsx --fp-contract=fast < %s \ ; RUN: | FileCheck %s --check-prefix=CONTRACT-FAST +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lsx --fp-contract=on < %s \ +; RUN: | FileCheck %s --check-prefix=CONTRACT-ON ; RUN: llc --mtriple=loongarch64 --mattr=+lsx --fp-contract=on < %s \ ; RUN: | FileCheck %s --check-prefix=CONTRACT-ON +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lsx --fp-contract=off < %s \ +; RUN: | FileCheck %s --check-prefix=CONTRACT-OFF ; RUN: llc --mtriple=loongarch64 --mattr=+lsx --fp-contract=off < %s \ ; RUN: | FileCheck %s --check-prefix=CONTRACT-OFF diff --git a/llvm/test/CodeGen/LoongArch/lsx/fpowi.ll b/llvm/test/CodeGen/LoongArch/lsx/fpowi.ll index 735dad453660..8005318f4f62 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/fpowi.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/fpowi.ll @@ -1,57 +1,102 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 -; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s +; RUN: llc --mtriple=loongarch32 -mattr=+32s,+lsx < %s | FileCheck %s --check-prefix=LA32 +; RUN: llc --mtriple=loongarch64 -mattr=+lsx < %s | FileCheck %s --check-prefix=LA64 declare <4 x float> @llvm.powi.v4f32.i32(<4 x float>, i32) define <4 x float> @powi_v4f32(<4 x float> %va, i32 %b) nounwind { -; CHECK-LABEL: powi_v4f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addi.d $sp, $sp, -48 -; CHECK-NEXT: st.d $ra, $sp, 40 # 8-byte Folded Spill -; CHECK-NEXT: st.d $fp, $sp, 32 # 8-byte Folded Spill -; CHECK-NEXT: vst $vr0, $sp, 16 # 16-byte Folded Spill -; CHECK-NEXT: addi.w $fp, $a0, 0 -; CHECK-NEXT: vreplvei.w $vr0, $vr0, 1 -; CHECK-NEXT: # kill: def $f0 killed $f0 killed $vr0 -; CHECK-NEXT: move $a0, $fp -; CHECK-NEXT: pcaddu18i $ra, %call36(__powisf2) -; CHECK-NEXT: jirl $ra, $ra, 0 -; CHECK-NEXT: # kill: def $f0 killed $f0 def $vr0 -; CHECK-NEXT: vst $vr0, $sp, 0 # 16-byte Folded Spill -; CHECK-NEXT: vld $vr0, $sp, 16 # 16-byte Folded Reload -; CHECK-NEXT: vreplvei.w $vr0, $vr0, 0 -; CHECK-NEXT: # kill: def $f0 killed $f0 killed $vr0 -; CHECK-NEXT: move $a0, $fp -; CHECK-NEXT: pcaddu18i $ra, %call36(__powisf2) -; CHECK-NEXT: jirl $ra, $ra, 0 -; CHECK-NEXT: # kill: def $f0 killed $f0 def $vr0 -; CHECK-NEXT: vld $vr1, $sp, 0 # 16-byte Folded Reload -; CHECK-NEXT: vextrins.w $vr0, $vr1, 16 -; CHECK-NEXT: vst $vr0, $sp, 0 # 16-byte Folded Spill -; CHECK-NEXT: vld $vr0, $sp, 16 # 16-byte Folded Reload -; CHECK-NEXT: vreplvei.w $vr0, $vr0, 2 -; CHECK-NEXT: # kill: def $f0 killed $f0 killed $vr0 -; CHECK-NEXT: move $a0, $fp -; CHECK-NEXT: pcaddu18i $ra, %call36(__powisf2) -; CHECK-NEXT: jirl $ra, $ra, 0 -; CHECK-NEXT: # kill: def $f0 killed $f0 def $vr0 -; CHECK-NEXT: vld $vr1, $sp, 0 # 16-byte Folded Reload -; CHECK-NEXT: vextrins.w $vr1, $vr0, 32 -; CHECK-NEXT: vst $vr1, $sp, 0 # 16-byte Folded Spill -; CHECK-NEXT: vld $vr0, $sp, 16 # 16-byte Folded Reload -; CHECK-NEXT: vreplvei.w $vr0, $vr0, 3 -; CHECK-NEXT: # kill: def $f0 killed $f0 killed $vr0 -; CHECK-NEXT: move $a0, $fp -; CHECK-NEXT: pcaddu18i $ra, %call36(__powisf2) -; CHECK-NEXT: jirl $ra, $ra, 0 -; CHECK-NEXT: # kill: def $f0 killed $f0 def $vr0 -; CHECK-NEXT: vld $vr1, $sp, 0 # 16-byte Folded Reload -; CHECK-NEXT: vextrins.w $vr1, $vr0, 48 -; CHECK-NEXT: vori.b $vr0, $vr1, 0 -; CHECK-NEXT: ld.d $fp, $sp, 32 # 8-byte Folded Reload -; CHECK-NEXT: ld.d $ra, $sp, 40 # 8-byte Folded Reload -; CHECK-NEXT: addi.d $sp, $sp, 48 -; CHECK-NEXT: ret +; LA32-LABEL: powi_v4f32: +; LA32: # %bb.0: # %entry +; LA32-NEXT: addi.w $sp, $sp, -48 +; LA32-NEXT: st.w $ra, $sp, 44 # 4-byte Folded Spill +; LA32-NEXT: st.w $fp, $sp, 40 # 4-byte Folded Spill +; LA32-NEXT: move $fp, $a0 +; LA32-NEXT: vst $vr0, $sp, 16 # 16-byte Folded Spill +; LA32-NEXT: vreplvei.w $vr0, $vr0, 1 +; LA32-NEXT: # kill: def $f0 killed $f0 killed $vr0 +; LA32-NEXT: bl __powisf2 +; LA32-NEXT: # kill: def $f0 killed $f0 def $vr0 +; LA32-NEXT: vst $vr0, $sp, 0 # 16-byte Folded Spill +; LA32-NEXT: vld $vr0, $sp, 16 # 16-byte Folded Reload +; LA32-NEXT: vreplvei.w $vr0, $vr0, 0 +; LA32-NEXT: # kill: def $f0 killed $f0 killed $vr0 +; LA32-NEXT: move $a0, $fp +; LA32-NEXT: bl __powisf2 +; LA32-NEXT: # kill: def $f0 killed $f0 def $vr0 +; LA32-NEXT: vld $vr1, $sp, 0 # 16-byte Folded Reload +; LA32-NEXT: vextrins.w $vr0, $vr1, 16 +; LA32-NEXT: vst $vr0, $sp, 0 # 16-byte Folded Spill +; LA32-NEXT: vld $vr0, $sp, 16 # 16-byte Folded Reload +; LA32-NEXT: vreplvei.w $vr0, $vr0, 2 +; LA32-NEXT: # kill: def $f0 killed $f0 killed $vr0 +; LA32-NEXT: move $a0, $fp +; LA32-NEXT: bl __powisf2 +; LA32-NEXT: # kill: def $f0 killed $f0 def $vr0 +; LA32-NEXT: vld $vr1, $sp, 0 # 16-byte Folded Reload +; LA32-NEXT: vextrins.w $vr1, $vr0, 32 +; LA32-NEXT: vst $vr1, $sp, 0 # 16-byte Folded Spill +; LA32-NEXT: vld $vr0, $sp, 16 # 16-byte Folded Reload +; LA32-NEXT: vreplvei.w $vr0, $vr0, 3 +; LA32-NEXT: # kill: def $f0 killed $f0 killed $vr0 +; LA32-NEXT: move $a0, $fp +; LA32-NEXT: bl __powisf2 +; LA32-NEXT: # kill: def $f0 killed $f0 def $vr0 +; LA32-NEXT: vld $vr1, $sp, 0 # 16-byte Folded Reload +; LA32-NEXT: vextrins.w $vr1, $vr0, 48 +; LA32-NEXT: vori.b $vr0, $vr1, 0 +; LA32-NEXT: ld.w $fp, $sp, 40 # 4-byte Folded Reload +; LA32-NEXT: ld.w $ra, $sp, 44 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 48 +; LA32-NEXT: ret +; +; LA64-LABEL: powi_v4f32: +; LA64: # %bb.0: # %entry +; LA64-NEXT: addi.d $sp, $sp, -48 +; LA64-NEXT: st.d $ra, $sp, 40 # 8-byte Folded Spill +; LA64-NEXT: st.d $fp, $sp, 32 # 8-byte Folded Spill +; LA64-NEXT: vst $vr0, $sp, 16 # 16-byte Folded Spill +; LA64-NEXT: addi.w $fp, $a0, 0 +; LA64-NEXT: vreplvei.w $vr0, $vr0, 1 +; LA64-NEXT: # kill: def $f0 killed $f0 killed $vr0 +; LA64-NEXT: move $a0, $fp +; LA64-NEXT: pcaddu18i $ra, %call36(__powisf2) +; LA64-NEXT: jirl $ra, $ra, 0 +; LA64-NEXT: # kill: def $f0 killed $f0 def $vr0 +; LA64-NEXT: vst $vr0, $sp, 0 # 16-byte Folded Spill +; LA64-NEXT: vld $vr0, $sp, 16 # 16-byte Folded Reload +; LA64-NEXT: vreplvei.w $vr0, $vr0, 0 +; LA64-NEXT: # kill: def $f0 killed $f0 killed $vr0 +; LA64-NEXT: move $a0, $fp +; LA64-NEXT: pcaddu18i $ra, %call36(__powisf2) +; LA64-NEXT: jirl $ra, $ra, 0 +; LA64-NEXT: # kill: def $f0 killed $f0 def $vr0 +; LA64-NEXT: vld $vr1, $sp, 0 # 16-byte Folded Reload +; LA64-NEXT: vextrins.w $vr0, $vr1, 16 +; LA64-NEXT: vst $vr0, $sp, 0 # 16-byte Folded Spill +; LA64-NEXT: vld $vr0, $sp, 16 # 16-byte Folded Reload +; LA64-NEXT: vreplvei.w $vr0, $vr0, 2 +; LA64-NEXT: # kill: def $f0 killed $f0 killed $vr0 +; LA64-NEXT: move $a0, $fp +; LA64-NEXT: pcaddu18i $ra, %call36(__powisf2) +; LA64-NEXT: jirl $ra, $ra, 0 +; LA64-NEXT: # kill: def $f0 killed $f0 def $vr0 +; LA64-NEXT: vld $vr1, $sp, 0 # 16-byte Folded Reload +; LA64-NEXT: vextrins.w $vr1, $vr0, 32 +; LA64-NEXT: vst $vr1, $sp, 0 # 16-byte Folded Spill +; LA64-NEXT: vld $vr0, $sp, 16 # 16-byte Folded Reload +; LA64-NEXT: vreplvei.w $vr0, $vr0, 3 +; LA64-NEXT: # kill: def $f0 killed $f0 killed $vr0 +; LA64-NEXT: move $a0, $fp +; LA64-NEXT: pcaddu18i $ra, %call36(__powisf2) +; LA64-NEXT: jirl $ra, $ra, 0 +; LA64-NEXT: # kill: def $f0 killed $f0 def $vr0 +; LA64-NEXT: vld $vr1, $sp, 0 # 16-byte Folded Reload +; LA64-NEXT: vextrins.w $vr1, $vr0, 48 +; LA64-NEXT: vori.b $vr0, $vr1, 0 +; LA64-NEXT: ld.d $fp, $sp, 32 # 8-byte Folded Reload +; LA64-NEXT: ld.d $ra, $sp, 40 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 48 +; LA64-NEXT: ret entry: %res = call <4 x float> @llvm.powi.v4f32.i32(<4 x float> %va, i32 %b) ret <4 x float> %res @@ -60,33 +105,58 @@ entry: declare <2 x double> @llvm.powi.v2f64.i32(<2 x double>, i32) define <2 x double> @powi_v2f64(<2 x double> %va, i32 %b) nounwind { -; CHECK-LABEL: powi_v2f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addi.d $sp, $sp, -48 -; CHECK-NEXT: st.d $ra, $sp, 40 # 8-byte Folded Spill -; CHECK-NEXT: st.d $fp, $sp, 32 # 8-byte Folded Spill -; CHECK-NEXT: vst $vr0, $sp, 0 # 16-byte Folded Spill -; CHECK-NEXT: addi.w $fp, $a0, 0 -; CHECK-NEXT: vreplvei.d $vr0, $vr0, 1 -; CHECK-NEXT: # kill: def $f0_64 killed $f0_64 killed $vr0 -; CHECK-NEXT: move $a0, $fp -; CHECK-NEXT: pcaddu18i $ra, %call36(__powidf2) -; CHECK-NEXT: jirl $ra, $ra, 0 -; CHECK-NEXT: # kill: def $f0_64 killed $f0_64 def $vr0 -; CHECK-NEXT: vst $vr0, $sp, 16 # 16-byte Folded Spill -; CHECK-NEXT: vld $vr0, $sp, 0 # 16-byte Folded Reload -; CHECK-NEXT: vreplvei.d $vr0, $vr0, 0 -; CHECK-NEXT: # kill: def $f0_64 killed $f0_64 killed $vr0 -; CHECK-NEXT: move $a0, $fp -; CHECK-NEXT: pcaddu18i $ra, %call36(__powidf2) -; CHECK-NEXT: jirl $ra, $ra, 0 -; CHECK-NEXT: # kill: def $f0_64 killed $f0_64 def $vr0 -; CHECK-NEXT: vld $vr1, $sp, 16 # 16-byte Folded Reload -; CHECK-NEXT: vextrins.d $vr0, $vr1, 16 -; CHECK-NEXT: ld.d $fp, $sp, 32 # 8-byte Folded Reload -; CHECK-NEXT: ld.d $ra, $sp, 40 # 8-byte Folded Reload -; CHECK-NEXT: addi.d $sp, $sp, 48 -; CHECK-NEXT: ret +; LA32-LABEL: powi_v2f64: +; LA32: # %bb.0: # %entry +; LA32-NEXT: addi.w $sp, $sp, -48 +; LA32-NEXT: st.w $ra, $sp, 44 # 4-byte Folded Spill +; LA32-NEXT: st.w $fp, $sp, 40 # 4-byte Folded Spill +; LA32-NEXT: move $fp, $a0 +; LA32-NEXT: vst $vr0, $sp, 0 # 16-byte Folded Spill +; LA32-NEXT: vreplvei.d $vr0, $vr0, 1 +; LA32-NEXT: # kill: def $f0_64 killed $f0_64 killed $vr0 +; LA32-NEXT: bl __powidf2 +; LA32-NEXT: # kill: def $f0_64 killed $f0_64 def $vr0 +; LA32-NEXT: vst $vr0, $sp, 16 # 16-byte Folded Spill +; LA32-NEXT: vld $vr0, $sp, 0 # 16-byte Folded Reload +; LA32-NEXT: vreplvei.d $vr0, $vr0, 0 +; LA32-NEXT: # kill: def $f0_64 killed $f0_64 killed $vr0 +; LA32-NEXT: move $a0, $fp +; LA32-NEXT: bl __powidf2 +; LA32-NEXT: # kill: def $f0_64 killed $f0_64 def $vr0 +; LA32-NEXT: vld $vr1, $sp, 16 # 16-byte Folded Reload +; LA32-NEXT: vextrins.d $vr0, $vr1, 16 +; LA32-NEXT: ld.w $fp, $sp, 40 # 4-byte Folded Reload +; LA32-NEXT: ld.w $ra, $sp, 44 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 48 +; LA32-NEXT: ret +; +; LA64-LABEL: powi_v2f64: +; LA64: # %bb.0: # %entry +; LA64-NEXT: addi.d $sp, $sp, -48 +; LA64-NEXT: st.d $ra, $sp, 40 # 8-byte Folded Spill +; LA64-NEXT: st.d $fp, $sp, 32 # 8-byte Folded Spill +; LA64-NEXT: vst $vr0, $sp, 0 # 16-byte Folded Spill +; LA64-NEXT: addi.w $fp, $a0, 0 +; LA64-NEXT: vreplvei.d $vr0, $vr0, 1 +; LA64-NEXT: # kill: def $f0_64 killed $f0_64 killed $vr0 +; LA64-NEXT: move $a0, $fp +; LA64-NEXT: pcaddu18i $ra, %call36(__powidf2) +; LA64-NEXT: jirl $ra, $ra, 0 +; LA64-NEXT: # kill: def $f0_64 killed $f0_64 def $vr0 +; LA64-NEXT: vst $vr0, $sp, 16 # 16-byte Folded Spill +; LA64-NEXT: vld $vr0, $sp, 0 # 16-byte Folded Reload +; LA64-NEXT: vreplvei.d $vr0, $vr0, 0 +; LA64-NEXT: # kill: def $f0_64 killed $f0_64 killed $vr0 +; LA64-NEXT: move $a0, $fp +; LA64-NEXT: pcaddu18i $ra, %call36(__powidf2) +; LA64-NEXT: jirl $ra, $ra, 0 +; LA64-NEXT: # kill: def $f0_64 killed $f0_64 def $vr0 +; LA64-NEXT: vld $vr1, $sp, 16 # 16-byte Folded Reload +; LA64-NEXT: vextrins.d $vr0, $vr1, 16 +; LA64-NEXT: ld.d $fp, $sp, 32 # 8-byte Folded Reload +; LA64-NEXT: ld.d $ra, $sp, 40 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 48 +; LA64-NEXT: ret entry: %res = call <2 x double> @llvm.powi.v2f64.i32(<2 x double> %va, i32 %b) ret <2 x double> %res diff --git a/llvm/test/CodeGen/LoongArch/lsx/fsqrt-reciprocal-estimate.ll b/llvm/test/CodeGen/LoongArch/lsx/fsqrt-reciprocal-estimate.ll index 912d06242f7d..1f744830bd56 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/fsqrt-reciprocal-estimate.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/fsqrt-reciprocal-estimate.ll @@ -1,6 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc --mtriple=loongarch64 --mattr=+lsx,-frecipe < %s | FileCheck %s --check-prefix=FAULT -; RUN: llc --mtriple=loongarch64 --mattr=+lsx,+frecipe < %s | FileCheck %s +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lsx,-frecipe < %s | FileCheck %s --check-prefixes=FAULT,FAULT-LA32 +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lsx,+frecipe < %s | FileCheck %s --check-prefixes=CHECK,LA32 +; RUN: llc --mtriple=loongarch64 --mattr=+lsx,-frecipe < %s | FileCheck %s --check-prefixes=FAULT,FAULT-LA64 +; RUN: llc --mtriple=loongarch64 --mattr=+lsx,+frecipe < %s | FileCheck %s --check-prefixes=CHECK,LA64 ;; 1.0 / (fsqrt vec) define void @one_div_sqrt_v4f32(ptr %res, ptr %a0) nounwind { @@ -11,20 +13,20 @@ define void @one_div_sqrt_v4f32(ptr %res, ptr %a0) nounwind { ; FAULT-NEXT: vst $vr0, $a0, 0 ; FAULT-NEXT: ret ; -; CHECK-LABEL one_div_sqrt_v4f32: +; CHECK-LABEL: one_div_sqrt_v4f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vld $vr0, $a1, 0 -; CHECK-NEXT: vfrsqrte.s $vr1, $vr0 -; CHECK-NEXT: vfmul.s $vr1, $vr0, $vr1 -; CHECK-NEXT: vfmul.s $vr0, $vr0, $vr1 -; CHECK-NEXT: lu12i.w $a1, -261120 -; CHECK-NEXT: vreplgr2vr.w $vr2, $a1 -; CHECK-NEXT: vfmadd.s $vr0, $vr0, $vr1, $vr2 -; CHECK-NEXT: lu12i.w $a1, -266240 -; CHECK-NEXT: vreplgr2vr.w $vr2, $a1 -; CHECK-NEXT: vfmul.s $vr1, $vr1, $vr2 -; CHECK-NEXT: vfmul.s $vr0, $vr1, $vr0 -; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vfrsqrte.s $vr1, $vr0 +; CHECK-NEXT: vfmul.s $vr1, $vr0, $vr1 +; CHECK-NEXT: vfmul.s $vr0, $vr0, $vr1 +; CHECK-NEXT: lu12i.w $a1, -261120 +; CHECK-NEXT: vreplgr2vr.w $vr2, $a1 +; CHECK-NEXT: vfmadd.s $vr0, $vr0, $vr1, $vr2 +; CHECK-NEXT: lu12i.w $a1, -266240 +; CHECK-NEXT: vreplgr2vr.w $vr2, $a1 +; CHECK-NEXT: vfmul.s $vr1, $vr1, $vr2 +; CHECK-NEXT: vfmul.s $vr0, $vr1, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 ; CHECK-NEXT: ret entry: %v0 = load <4 x float>, ptr %a0, align 16 @@ -35,34 +37,64 @@ entry: } define void @one_div_sqrt_v2f64(ptr %res, ptr %a0) nounwind { -; FAULT-LABEL: one_div_sqrt_v2f64: -; FAULT: # %bb.0: # %entry -; FAULT-NEXT: vld $vr0, $a1, 0 -; FAULT-NEXT: vfrsqrt.d $vr0, $vr0 -; FAULT-NEXT: vst $vr0, $a0, 0 -; FAULT-NEXT: ret +; FAULT-LA32-LABEL: one_div_sqrt_v2f64: +; FAULT-LA32: # %bb.0: # %entry +; FAULT-LA32-NEXT: vld $vr0, $a1, 0 +; FAULT-LA32-NEXT: pcalau12i $a1, %pc_hi20(.LCPI1_0) +; FAULT-LA32-NEXT: vld $vr1, $a1, %pc_lo12(.LCPI1_0) +; FAULT-LA32-NEXT: vfsqrt.d $vr0, $vr0 +; FAULT-LA32-NEXT: vfdiv.d $vr0, $vr1, $vr0 +; FAULT-LA32-NEXT: vst $vr0, $a0, 0 +; FAULT-LA32-NEXT: ret ; -; CHECK-LABEL one_div_sqrt_v2f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vld $vr0, $a1, 0 -; CHECK-NEXT: vfrsqrte.d $vr1, $vr0 -; CHECK-NEXT: vfmul.d $vr1, $vr0, $vr1 -; CHECK-NEXT: vfmul.d $vr2, $vr0, $vr1 -; CHECK-NEXT: ori $a1, $zero, 0 -; CHECK-NEXT: lu32i.d $a1, -524288 -; CHECK-NEXT: lu52i.d $a1, $a1, -1024 -; CHECK-NEXT: vreplgr2vr.d $vr3, $a1 -; CHECK-NEXT: vfmadd.d $vr2, $vr2, $vr1, $vr3 -; CHECK-NEXT: lu52i.d $a1, $zero, -1026 -; CHECK-NEXT: vreplgr2vr.d $vr4, $a1 -; CHECK-NEXT: vfmul.d $vr1, $vr1, $vr4 -; CHECK-NEXT: vfmul.d $vr1, $vr1, $vr2 -; CHECK-NEXT: vfmul.d $vr0, $vr0, $vr1 -; CHECK-NEXT: vfmadd.d $vr0, $vr0, $vr1, $vr3 -; CHECK-NEXT: vfmul.d $vr1, $vr1, $vr4 -; CHECK-NEXT: vfmul.d $vr0, $vr1, $vr0 -; CHECK-NEXT: vst $vr0, $a0, 0 -; CHECK-NEXT: ret +; LA32-LABEL: one_div_sqrt_v2f64: +; LA32: # %bb.0: # %entry +; LA32-NEXT: vld $vr0, $a1, 0 +; LA32-NEXT: vfrsqrte.d $vr1, $vr0 +; LA32-NEXT: pcalau12i $a1, %pc_hi20(.LCPI1_0) +; LA32-NEXT: vld $vr2, $a1, %pc_lo12(.LCPI1_0) +; LA32-NEXT: pcalau12i $a1, %pc_hi20(.LCPI1_1) +; LA32-NEXT: vld $vr3, $a1, %pc_lo12(.LCPI1_1) +; LA32-NEXT: vfmul.d $vr1, $vr0, $vr1 +; LA32-NEXT: vfmul.d $vr4, $vr0, $vr1 +; LA32-NEXT: vfmadd.d $vr4, $vr4, $vr1, $vr2 +; LA32-NEXT: vfmul.d $vr1, $vr1, $vr3 +; LA32-NEXT: vfmul.d $vr1, $vr1, $vr4 +; LA32-NEXT: vfmul.d $vr0, $vr0, $vr1 +; LA32-NEXT: vfmadd.d $vr0, $vr0, $vr1, $vr2 +; LA32-NEXT: vfmul.d $vr1, $vr1, $vr3 +; LA32-NEXT: vfmul.d $vr0, $vr1, $vr0 +; LA32-NEXT: vst $vr0, $a0, 0 +; LA32-NEXT: ret +; +; FAULT-LA64-LABEL: one_div_sqrt_v2f64: +; FAULT-LA64: # %bb.0: # %entry +; FAULT-LA64-NEXT: vld $vr0, $a1, 0 +; FAULT-LA64-NEXT: vfrsqrt.d $vr0, $vr0 +; FAULT-LA64-NEXT: vst $vr0, $a0, 0 +; FAULT-LA64-NEXT: ret +; +; LA64-LABEL: one_div_sqrt_v2f64: +; LA64: # %bb.0: # %entry +; LA64-NEXT: vld $vr0, $a1, 0 +; LA64-NEXT: vfrsqrte.d $vr1, $vr0 +; LA64-NEXT: vfmul.d $vr1, $vr0, $vr1 +; LA64-NEXT: vfmul.d $vr2, $vr0, $vr1 +; LA64-NEXT: ori $a1, $zero, 0 +; LA64-NEXT: lu32i.d $a1, -524288 +; LA64-NEXT: lu52i.d $a1, $a1, -1024 +; LA64-NEXT: vreplgr2vr.d $vr3, $a1 +; LA64-NEXT: vfmadd.d $vr2, $vr2, $vr1, $vr3 +; LA64-NEXT: lu52i.d $a1, $zero, -1026 +; LA64-NEXT: vreplgr2vr.d $vr4, $a1 +; LA64-NEXT: vfmul.d $vr1, $vr1, $vr4 +; LA64-NEXT: vfmul.d $vr1, $vr1, $vr2 +; LA64-NEXT: vfmul.d $vr0, $vr0, $vr1 +; LA64-NEXT: vfmadd.d $vr0, $vr0, $vr1, $vr3 +; LA64-NEXT: vfmul.d $vr1, $vr1, $vr4 +; LA64-NEXT: vfmul.d $vr0, $vr1, $vr0 +; LA64-NEXT: vst $vr0, $a0, 0 +; LA64-NEXT: ret entry: %v0 = load <2 x double>, ptr %a0, align 16 %sqrt = call fast <2 x double> @llvm.sqrt.v2f64 (<2 x double> %v0) diff --git a/llvm/test/CodeGen/LoongArch/lsx/fsqrt.ll b/llvm/test/CodeGen/LoongArch/lsx/fsqrt.ll index a57bc1ca0e94..d88e0d1ea7c2 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/fsqrt.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/fsqrt.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s | FileCheck %s --check-prefixes=CHECK,LA32 +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s --check-prefixes=CHECK,LA64 ;; fsqrt define void @sqrt_v4f32(ptr %res, ptr %a0) nounwind { @@ -47,12 +48,22 @@ entry: } define void @one_div_sqrt_v2f64(ptr %res, ptr %a0) nounwind { -; CHECK-LABEL: one_div_sqrt_v2f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vld $vr0, $a1, 0 -; CHECK-NEXT: vfrsqrt.d $vr0, $vr0 -; CHECK-NEXT: vst $vr0, $a0, 0 -; CHECK-NEXT: ret +; LA32-LABEL: one_div_sqrt_v2f64: +; LA32: # %bb.0: # %entry +; LA32-NEXT: vld $vr0, $a1, 0 +; LA32-NEXT: pcalau12i $a1, %pc_hi20(.LCPI3_0) +; LA32-NEXT: vld $vr1, $a1, %pc_lo12(.LCPI3_0) +; LA32-NEXT: vfsqrt.d $vr0, $vr0 +; LA32-NEXT: vfdiv.d $vr0, $vr1, $vr0 +; LA32-NEXT: vst $vr0, $a0, 0 +; LA32-NEXT: ret +; +; LA64-LABEL: one_div_sqrt_v2f64: +; LA64: # %bb.0: # %entry +; LA64-NEXT: vld $vr0, $a1, 0 +; LA64-NEXT: vfrsqrt.d $vr0, $vr0 +; LA64-NEXT: vst $vr0, $a0, 0 +; LA64-NEXT: ret entry: %v0 = load <2 x double>, ptr %a0, align 16 %sqrt = call <2 x double> @llvm.sqrt.v2f64 (<2 x double> %v0) diff --git a/llvm/test/CodeGen/LoongArch/lsx/inline-asm-operand-modifier.ll b/llvm/test/CodeGen/LoongArch/lsx/inline-asm-operand-modifier.ll index c46e624ddaa8..951ccbe4d08c 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/inline-asm-operand-modifier.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/inline-asm-operand-modifier.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 -; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s +; RUN: llc --mtriple=loongarch32 -mattr=+32s,+lsx < %s | FileCheck %s +; RUN: llc --mtriple=loongarch64 -mattr=+lsx < %s | FileCheck %s define void @test_w() nounwind { ; CHECK-LABEL: test_w: diff --git a/llvm/test/CodeGen/LoongArch/lsx/inline-asm-reg-names.ll b/llvm/test/CodeGen/LoongArch/lsx/inline-asm-reg-names.ll index ceea3621be2f..eacfca88fc74 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/inline-asm-reg-names.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/inline-asm-reg-names.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 -; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s +; RUN: llc --mtriple=loongarch32 -mattr=+32s,+lsx < %s | FileCheck %s --check-prefixes=CHECK,LA32 +; RUN: llc --mtriple=loongarch64 -mattr=+lsx < %s | FileCheck %s --check-prefixes=CHECK,LA64 define void @register_vr1() nounwind { ; CHECK-LABEL: register_vr1: @@ -42,16 +43,27 @@ entry: ;; register which is preserved across calls. That's why the ;; fst.d and fld.d instructions are emitted. define void @register_vr31() nounwind { -; CHECK-LABEL: register_vr31: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addi.d $sp, $sp, -16 -; CHECK-NEXT: fst.d $fs7, $sp, 8 # 8-byte Folded Spill -; CHECK-NEXT: #APP -; CHECK-NEXT: vldi $vr31, 1 -; CHECK-NEXT: #NO_APP -; CHECK-NEXT: fld.d $fs7, $sp, 8 # 8-byte Folded Reload -; CHECK-NEXT: addi.d $sp, $sp, 16 -; CHECK-NEXT: ret +; LA32-LABEL: register_vr31: +; LA32: # %bb.0: # %entry +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: fst.d $fs7, $sp, 8 # 8-byte Folded Spill +; LA32-NEXT: #APP +; LA32-NEXT: vldi $vr31, 1 +; LA32-NEXT: #NO_APP +; LA32-NEXT: fld.d $fs7, $sp, 8 # 8-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; +; LA64-LABEL: register_vr31: +; LA64: # %bb.0: # %entry +; LA64-NEXT: addi.d $sp, $sp, -16 +; LA64-NEXT: fst.d $fs7, $sp, 8 # 8-byte Folded Spill +; LA64-NEXT: #APP +; LA64-NEXT: vldi $vr31, 1 +; LA64-NEXT: #NO_APP +; LA64-NEXT: fld.d $fs7, $sp, 8 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 16 +; LA64-NEXT: ret entry: %0 = tail call <2 x i64> asm sideeffect "vldi ${0:w}, 1", "={$vr31}"() ret void diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-absd.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-absd.ll index 811d9d712de4..0b3c559ef97d 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-absd.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-absd.ll @@ -1,4 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s | FileCheck %s ; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s declare <16 x i8> @llvm.loongarch.lsx.vabsd.b(<16 x i8>, <16 x i8>) diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-add.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-add.ll index fac16c8308da..daaed8388bca 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-add.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-add.ll @@ -1,4 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s | FileCheck %s ; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s declare <16 x i8> @llvm.loongarch.lsx.vadd.b(<16 x i8>, <16 x i8>) diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-adda.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-adda.ll index 79be0a184bfb..c2d5264d5441 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-adda.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-adda.ll @@ -1,4 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s | FileCheck %s ; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s declare <16 x i8> @llvm.loongarch.lsx.vadda.b(<16 x i8>, <16 x i8>) diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-addi-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-addi-invalid-imm.ll index 6875872b6f83..a1fa4f3449b2 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-addi-invalid-imm.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-addi-invalid-imm.ll @@ -1,3 +1,4 @@ +; RUN: not llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s 2>&1 | FileCheck %s ; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s declare <16 x i8> @llvm.loongarch.lsx.vaddi.bu(<16 x i8>, i32) diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-addi-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-addi-non-imm.ll index 87d32b3ce02a..19100bc1426c 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-addi-non-imm.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-addi-non-imm.ll @@ -1,3 +1,4 @@ +; RUN: not llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s 2>&1 | FileCheck %s ; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s declare <16 x i8> @llvm.loongarch.lsx.vaddi.bu(<16 x i8>, i32) diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-addi.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-addi.ll index b9134e0724fe..68ce36ccd801 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-addi.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-addi.ll @@ -1,4 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s | FileCheck %s ; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s declare <16 x i8> @llvm.loongarch.lsx.vaddi.bu(<16 x i8>, i32) diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-addw.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-addw.ll index 086e3bec12d2..3951e121bdc6 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-addw.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-addw.ll @@ -1,4 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s | FileCheck %s ; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s declare <8 x i16> @llvm.loongarch.lsx.vaddwev.h.b(<16 x i8>, <16 x i8>) diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-and.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-and.ll index 77496239c3a9..6fe22ea83b7d 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-and.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-and.ll @@ -1,4 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s | FileCheck %s ; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s declare <16 x i8> @llvm.loongarch.lsx.vand.v(<16 x i8>, <16 x i8>) diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-andi-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-andi-invalid-imm.ll index 82a117b2aba5..f36b2f39bc1e 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-andi-invalid-imm.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-andi-invalid-imm.ll @@ -1,3 +1,4 @@ +; RUN: not llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s 2>&1 | FileCheck %s ; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s declare <16 x i8> @llvm.loongarch.lsx.vandi.b(<16 x i8>, i32) diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-andi-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-andi-non-imm.ll index c0c35c775266..43f60d9e56e4 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-andi-non-imm.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-andi-non-imm.ll @@ -1,3 +1,4 @@ +; RUN: not llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s 2>&1 | FileCheck %s ; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s declare <16 x i8> @llvm.loongarch.lsx.vandi.b(<16 x i8>, i32) diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-andi.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-andi.ll index 9a1c38a641d0..f6966213e32f 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-andi.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-andi.ll @@ -1,4 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s | FileCheck %s ; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s declare <16 x i8> @llvm.loongarch.lsx.vandi.b(<16 x i8>, i32) diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-andn.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-andn.ll index b08c759ecc32..2484051943c4 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-andn.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-andn.ll @@ -1,4 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s | FileCheck %s ; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s declare <16 x i8> @llvm.loongarch.lsx.vandn.v(<16 x i8>, <16 x i8>) diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-avg.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-avg.ll index fb0861f4cd5e..189e95ccb0c0 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-avg.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-avg.ll @@ -1,4 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s | FileCheck %s ; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s declare <16 x i8> @llvm.loongarch.lsx.vavg.b(<16 x i8>, <16 x i8>) diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-avgr.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-avgr.ll index 8bf7d0ed8817..0675361263e2 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-avgr.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-avgr.ll @@ -1,4 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s | FileCheck %s ; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s declare <16 x i8> @llvm.loongarch.lsx.vavgr.b(<16 x i8>, <16 x i8>) diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitclr-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitclr-invalid-imm.ll index b020806cd86c..3b968483dee8 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitclr-invalid-imm.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitclr-invalid-imm.ll @@ -1,3 +1,4 @@ +; RUN: not llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s 2>&1 | FileCheck %s ; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s declare <16 x i8> @llvm.loongarch.lsx.vbitclri.b(<16 x i8>, i32) diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitclr-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitclr-non-imm.ll index df6cdb99cdbc..335dd66ba740 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitclr-non-imm.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitclr-non-imm.ll @@ -1,3 +1,4 @@ +; RUN: not llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s 2>&1 | FileCheck %s ; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s declare <16 x i8> @llvm.loongarch.lsx.vbitclri.b(<16 x i8>, i32) diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitclr.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitclr.ll index f5fba6dbb141..ac0eca2fc33e 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitclr.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitclr.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s | FileCheck %s --check-prefixes=CHECK,LA32 +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s --check-prefixes=CHECK,LA64 declare <16 x i8> @llvm.loongarch.lsx.vbitclr.b(<16 x i8>, <16 x i8>) @@ -40,10 +41,21 @@ entry: declare <2 x i64> @llvm.loongarch.lsx.vbitclr.d(<2 x i64>, <2 x i64>) define <2 x i64> @lsx_vbitclr_d(<2 x i64> %va, <2 x i64> %vb) nounwind { -; CHECK-LABEL: lsx_vbitclr_d: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vbitclr.d $vr0, $vr0, $vr1 -; CHECK-NEXT: ret +; LA32-LABEL: lsx_vbitclr_d: +; LA32: # %bb.0: # %entry +; LA32-NEXT: vrepli.d $vr2, 63 +; LA32-NEXT: vand.v $vr1, $vr1, $vr2 +; LA32-NEXT: vrepli.d $vr2, 1 +; LA32-NEXT: vsll.d $vr1, $vr2, $vr1 +; LA32-NEXT: vrepli.b $vr2, -1 +; LA32-NEXT: vxor.v $vr1, $vr1, $vr2 +; LA32-NEXT: vand.v $vr0, $vr0, $vr1 +; LA32-NEXT: ret +; +; LA64-LABEL: lsx_vbitclr_d: +; LA64: # %bb.0: # %entry +; LA64-NEXT: vbitclr.d $vr0, $vr0, $vr1 +; LA64-NEXT: ret entry: %res = call <2 x i64> @llvm.loongarch.lsx.vbitclr.d(<2 x i64> %va, <2 x i64> %vb) ret <2 x i64> %res @@ -88,10 +100,17 @@ entry: declare <2 x i64> @llvm.loongarch.lsx.vbitclri.d(<2 x i64>, i32) define <2 x i64> @lsx_vbitclri_d(<2 x i64> %va) nounwind { -; CHECK-LABEL: lsx_vbitclri_d: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vbitclri.d $vr0, $vr0, 63 -; CHECK-NEXT: ret +; LA32-LABEL: lsx_vbitclri_d: +; LA32: # %bb.0: # %entry +; LA32-NEXT: pcalau12i $a0, %pc_hi20(.LCPI7_0) +; LA32-NEXT: vld $vr1, $a0, %pc_lo12(.LCPI7_0) +; LA32-NEXT: vand.v $vr0, $vr0, $vr1 +; LA32-NEXT: ret +; +; LA64-LABEL: lsx_vbitclri_d: +; LA64: # %bb.0: # %entry +; LA64-NEXT: vbitclri.d $vr0, $vr0, 63 +; LA64-NEXT: ret entry: %res = call <2 x i64> @llvm.loongarch.lsx.vbitclri.d(<2 x i64> %va, i32 63) ret <2 x i64> %res diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitrev-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitrev-invalid-imm.ll index 24b6ec3284cb..a664bdebf642 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitrev-invalid-imm.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitrev-invalid-imm.ll @@ -1,3 +1,4 @@ +; RUN: not llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s 2>&1 | FileCheck %s ; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s declare <16 x i8> @llvm.loongarch.lsx.vbitrevi.b(<16 x i8>, i32) diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitrev-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitrev-non-imm.ll index 3ffb494c9907..379ba45863a9 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitrev-non-imm.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitrev-non-imm.ll @@ -1,3 +1,4 @@ +; RUN: not llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s 2>&1 | FileCheck %s ; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s declare <16 x i8> @llvm.loongarch.lsx.vbitrevi.b(<16 x i8>, i32) diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitrev.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitrev.ll index ad56e88fdb88..ece12db99ce2 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitrev.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitrev.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s | FileCheck %s --check-prefixes=CHECK,LA32 +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s --check-prefixes=CHECK,LA64 declare <16 x i8> @llvm.loongarch.lsx.vbitrev.b(<16 x i8>, <16 x i8>) @@ -40,10 +41,19 @@ entry: declare <2 x i64> @llvm.loongarch.lsx.vbitrev.d(<2 x i64>, <2 x i64>) define <2 x i64> @lsx_vbitrev_d(<2 x i64> %va, <2 x i64> %vb) nounwind { -; CHECK-LABEL: lsx_vbitrev_d: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vbitrev.d $vr0, $vr0, $vr1 -; CHECK-NEXT: ret +; LA32-LABEL: lsx_vbitrev_d: +; LA32: # %bb.0: # %entry +; LA32-NEXT: vrepli.d $vr2, 63 +; LA32-NEXT: vand.v $vr1, $vr1, $vr2 +; LA32-NEXT: vrepli.d $vr2, 1 +; LA32-NEXT: vsll.d $vr1, $vr2, $vr1 +; LA32-NEXT: vxor.v $vr0, $vr0, $vr1 +; LA32-NEXT: ret +; +; LA64-LABEL: lsx_vbitrev_d: +; LA64: # %bb.0: # %entry +; LA64-NEXT: vbitrev.d $vr0, $vr0, $vr1 +; LA64-NEXT: ret entry: %res = call <2 x i64> @llvm.loongarch.lsx.vbitrev.d(<2 x i64> %va, <2 x i64> %vb) ret <2 x i64> %res @@ -88,10 +98,17 @@ entry: declare <2 x i64> @llvm.loongarch.lsx.vbitrevi.d(<2 x i64>, i32) define <2 x i64> @lsx_vbitrevi_d(<2 x i64> %va) nounwind { -; CHECK-LABEL: lsx_vbitrevi_d: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vbitrevi.d $vr0, $vr0, 63 -; CHECK-NEXT: ret +; LA32-LABEL: lsx_vbitrevi_d: +; LA32: # %bb.0: # %entry +; LA32-NEXT: pcalau12i $a0, %pc_hi20(.LCPI7_0) +; LA32-NEXT: vld $vr1, $a0, %pc_lo12(.LCPI7_0) +; LA32-NEXT: vxor.v $vr0, $vr0, $vr1 +; LA32-NEXT: ret +; +; LA64-LABEL: lsx_vbitrevi_d: +; LA64: # %bb.0: # %entry +; LA64-NEXT: vbitrevi.d $vr0, $vr0, 63 +; LA64-NEXT: ret entry: %res = call <2 x i64> @llvm.loongarch.lsx.vbitrevi.d(<2 x i64> %va, i32 63) ret <2 x i64> %res diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitsel.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitsel.ll index 4b4b5ff1fc8c..23caf9c12836 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitsel.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitsel.ll @@ -1,4 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s | FileCheck %s ; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s declare <16 x i8> @llvm.loongarch.lsx.vbitsel.v(<16 x i8>, <16 x i8>, <16 x i8>) diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitseli-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitseli-invalid-imm.ll index bc63b40e9fca..5fa9db3bd0ea 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitseli-invalid-imm.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitseli-invalid-imm.ll @@ -1,3 +1,4 @@ +; RUN: not llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s 2>&1 | FileCheck %s ; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s declare <16 x i8> @llvm.loongarch.lsx.vbitseli.b(<16 x i8>, <16 x i8>, i32) diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitseli-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitseli-non-imm.ll index 52c1eb7d2024..05724df0a3dd 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitseli-non-imm.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitseli-non-imm.ll @@ -1,3 +1,4 @@ +; RUN: not llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s 2>&1 | FileCheck %s ; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s declare <16 x i8> @llvm.loongarch.lsx.vbitseli.b(<16 x i8>, <16 x i8>, i32) diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitseli.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitseli.ll index 28d342b5c378..93111bc86c2b 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitseli.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitseli.ll @@ -1,4 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s | FileCheck %s ; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s declare <16 x i8> @llvm.loongarch.lsx.vbitseli.b(<16 x i8>, <16 x i8>, i32) diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitset-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitset-invalid-imm.ll index e57e14d8cb07..089c192b567a 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitset-invalid-imm.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitset-invalid-imm.ll @@ -1,3 +1,4 @@ +; RUN: not llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s 2>&1 | FileCheck %s ; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s declare <16 x i8> @llvm.loongarch.lsx.vbitseti.b(<16 x i8>, i32) diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitset-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitset-non-imm.ll index 9b2bde015ed9..182c46078505 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitset-non-imm.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitset-non-imm.ll @@ -1,3 +1,4 @@ +; RUN: not llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s 2>&1 | FileCheck %s ; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s declare <16 x i8> @llvm.loongarch.lsx.vbitseti.b(<16 x i8>, i32) diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitset.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitset.ll index 75d98e6f8bce..b01e533a1c8b 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitset.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitset.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s | FileCheck %s --check-prefixes=CHECK,LA32 +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s --check-prefixes=CHECK,LA64 declare <16 x i8> @llvm.loongarch.lsx.vbitset.b(<16 x i8>, <16 x i8>) @@ -40,10 +41,19 @@ entry: declare <2 x i64> @llvm.loongarch.lsx.vbitset.d(<2 x i64>, <2 x i64>) define <2 x i64> @lsx_vbitset_d(<2 x i64> %va, <2 x i64> %vb) nounwind { -; CHECK-LABEL: lsx_vbitset_d: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vbitset.d $vr0, $vr0, $vr1 -; CHECK-NEXT: ret +; LA32-LABEL: lsx_vbitset_d: +; LA32: # %bb.0: # %entry +; LA32-NEXT: vrepli.d $vr2, 63 +; LA32-NEXT: vand.v $vr1, $vr1, $vr2 +; LA32-NEXT: vrepli.d $vr2, 1 +; LA32-NEXT: vsll.d $vr1, $vr2, $vr1 +; LA32-NEXT: vor.v $vr0, $vr0, $vr1 +; LA32-NEXT: ret +; +; LA64-LABEL: lsx_vbitset_d: +; LA64: # %bb.0: # %entry +; LA64-NEXT: vbitset.d $vr0, $vr0, $vr1 +; LA64-NEXT: ret entry: %res = call <2 x i64> @llvm.loongarch.lsx.vbitset.d(<2 x i64> %va, <2 x i64> %vb) ret <2 x i64> %res @@ -88,10 +98,17 @@ entry: declare <2 x i64> @llvm.loongarch.lsx.vbitseti.d(<2 x i64>, i32) define <2 x i64> @lsx_vbitseti_d(<2 x i64> %va) nounwind { -; CHECK-LABEL: lsx_vbitseti_d: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vbitseti.d $vr0, $vr0, 63 -; CHECK-NEXT: ret +; LA32-LABEL: lsx_vbitseti_d: +; LA32: # %bb.0: # %entry +; LA32-NEXT: pcalau12i $a0, %pc_hi20(.LCPI7_0) +; LA32-NEXT: vld $vr1, $a0, %pc_lo12(.LCPI7_0) +; LA32-NEXT: vor.v $vr0, $vr0, $vr1 +; LA32-NEXT: ret +; +; LA64-LABEL: lsx_vbitseti_d: +; LA64: # %bb.0: # %entry +; LA64-NEXT: vbitseti.d $vr0, $vr0, 63 +; LA64-NEXT: ret entry: %res = call <2 x i64> @llvm.loongarch.lsx.vbitseti.d(<2 x i64> %va, i32 63) ret <2 x i64> %res diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bsll-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bsll-invalid-imm.ll index eb49af49c9be..84771bf4ea95 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bsll-invalid-imm.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bsll-invalid-imm.ll @@ -1,3 +1,4 @@ +; RUN: not llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s 2>&1 | FileCheck %s ; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s declare <16 x i8> @llvm.loongarch.lsx.vbsll.v(<16 x i8>, i32) diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bsll-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bsll-non-imm.ll index 5b10c9e91a4f..8a4fb0c9364c 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bsll-non-imm.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bsll-non-imm.ll @@ -1,3 +1,4 @@ +; RUN: not llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s 2>&1 | FileCheck %s ; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s declare <16 x i8> @llvm.loongarch.lsx.vbsll.v(<16 x i8>, i32) diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bsll.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bsll.ll index e7eb1cfcb407..5613b4dd0c7f 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bsll.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bsll.ll @@ -1,4 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s | FileCheck %s ; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s declare <16 x i8> @llvm.loongarch.lsx.vbsll.v(<16 x i8>, i32) diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bsrl-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bsrl-invalid-imm.ll index bf56822e2ef5..8ff1a23c58d0 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bsrl-invalid-imm.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bsrl-invalid-imm.ll @@ -1,3 +1,4 @@ +; RUN: not llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s 2>&1 | FileCheck %s ; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s declare <16 x i8> @llvm.loongarch.lsx.vbsrl.v(<16 x i8>, i32) diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bsrl-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bsrl-non-imm.ll index 0bc038c869ce..89fb0c6b922a 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bsrl-non-imm.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bsrl-non-imm.ll @@ -1,3 +1,4 @@ +; RUN: not llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s 2>&1 | FileCheck %s ; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s declare <16 x i8> @llvm.loongarch.lsx.vbsrl.v(<16 x i8>, i32) diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bsrl.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bsrl.ll index fe0565297641..04b4894b6cff 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bsrl.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bsrl.ll @@ -1,4 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s | FileCheck %s ; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s declare <16 x i8> @llvm.loongarch.lsx.vbsrl.v(<16 x i8>, i32) diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-clo.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-clo.ll index c581109f3fd0..7f7f25c69013 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-clo.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-clo.ll @@ -1,4 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s | FileCheck %s ; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s declare <16 x i8> @llvm.loongarch.lsx.vclo.b(<16 x i8>) diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-clz.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-clz.ll index 25c37b64349b..d205c171c091 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-clz.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-clz.ll @@ -1,4 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s | FileCheck %s ; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s declare <16 x i8> @llvm.loongarch.lsx.vclz.b(<16 x i8>) diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-div.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-div.ll index 53166e84d269..04434d9fb014 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-div.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-div.ll @@ -1,4 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s | FileCheck %s ; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s declare <16 x i8> @llvm.loongarch.lsx.vdiv.b(<16 x i8>, <16 x i8>) diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-exth.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-exth.ll index 2f3e891a9eef..93b08b2cfe00 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-exth.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-exth.ll @@ -1,4 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s | FileCheck %s ; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s declare <8 x i16> @llvm.loongarch.lsx.vexth.h.b(<16 x i8>) diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-extl.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-extl.ll index cbf19e2a3919..c4c86f928e93 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-extl.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-extl.ll @@ -1,4 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s | FileCheck %s ; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s declare <2 x i64> @llvm.loongarch.lsx.vextl.q.d(<2 x i64>) diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-extrins-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-extrins-invalid-imm.ll index 7f94234ed603..9ef68997b4af 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-extrins-invalid-imm.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-extrins-invalid-imm.ll @@ -1,3 +1,4 @@ +; RUN: not llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s 2>&1 | FileCheck %s ; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s declare <16 x i8> @llvm.loongarch.lsx.vextrins.b(<16 x i8>, <16 x i8>, i32) diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-extrins-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-extrins-non-imm.ll index e834002bb60b..aa0fa319c82e 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-extrins-non-imm.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-extrins-non-imm.ll @@ -1,3 +1,4 @@ +; RUN: not llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s 2>&1 | FileCheck %s ; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s declare <16 x i8> @llvm.loongarch.lsx.vextrins.b(<16 x i8>, <16 x i8>, i32) diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-extrins.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-extrins.ll index 8f03a2b81291..895cffc07208 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-extrins.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-extrins.ll @@ -1,4 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s | FileCheck %s ; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s declare <16 x i8> @llvm.loongarch.lsx.vextrins.b(<16 x i8>, <16 x i8>, i32) diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fadd.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fadd.ll index 569002314c92..9d6c1cc22e8a 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fadd.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fadd.ll @@ -1,4 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s | FileCheck %s ; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s declare <4 x float> @llvm.loongarch.lsx.vfadd.s(<4 x float>, <4 x float>) diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fclass.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fclass.ll index 0c6682187101..aa35970017cb 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fclass.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fclass.ll @@ -1,4 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s | FileCheck %s ; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s declare <4 x i32> @llvm.loongarch.lsx.vfclass.s(<4 x float>) diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fcvt.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fcvt.ll index a6a151a96d84..4da977ca398d 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fcvt.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fcvt.ll @@ -1,4 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s | FileCheck %s ; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s declare <8 x i16> @llvm.loongarch.lsx.vfcvt.h.s(<4 x float>, <4 x float>) diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fcvth.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fcvth.ll index a9e4328bd011..81897ecf5dea 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fcvth.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fcvth.ll @@ -1,4 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s | FileCheck %s ; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s declare <4 x float> @llvm.loongarch.lsx.vfcvth.s.h(<8 x i16>) diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fcvtl.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fcvtl.ll index 9a69964bb227..0abbbe3dba13 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fcvtl.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fcvtl.ll @@ -1,4 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s | FileCheck %s ; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s declare <4 x float> @llvm.loongarch.lsx.vfcvtl.s.h(<8 x i16>) diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fdiv.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fdiv.ll index 1ca8e5e2c0e9..4f39b2d5cee9 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fdiv.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fdiv.ll @@ -1,4 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s | FileCheck %s ; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s declare <4 x float> @llvm.loongarch.lsx.vfdiv.s(<4 x float>, <4 x float>) diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ffint.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ffint.ll index 62fbcfa339cd..c3286a529898 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ffint.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ffint.ll @@ -1,4 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s | FileCheck %s ; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s declare <4 x float> @llvm.loongarch.lsx.vffint.s.w(<4 x i32>) diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-flogb.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-flogb.ll index d8382acc70ed..d382b862b506 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-flogb.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-flogb.ll @@ -1,4 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s | FileCheck %s ; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s declare <4 x float> @llvm.loongarch.lsx.vflogb.s(<4 x float>) diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fmadd.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fmadd.ll index adbaf6c76b1b..6f921a490ac3 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fmadd.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fmadd.ll @@ -1,4 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s | FileCheck %s ; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s declare <4 x float> @llvm.loongarch.lsx.vfmadd.s(<4 x float>, <4 x float>, <4 x float>) diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fmax.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fmax.ll index 89f757c4e456..850170db39f2 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fmax.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fmax.ll @@ -1,4 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s | FileCheck %s ; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s declare <4 x float> @llvm.loongarch.lsx.vfmax.s(<4 x float>, <4 x float>) diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fmaxa.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fmaxa.ll index 5662acc0b9a1..3365841b804a 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fmaxa.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fmaxa.ll @@ -1,4 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s | FileCheck %s ; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s declare <4 x float> @llvm.loongarch.lsx.vfmaxa.s(<4 x float>, <4 x float>) diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fmin.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fmin.ll index 0f844240277f..da7e4d203c14 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fmin.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fmin.ll @@ -1,4 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s | FileCheck %s ; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s declare <4 x float> @llvm.loongarch.lsx.vfmin.s(<4 x float>, <4 x float>) diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fmina.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fmina.ll index 27f70b5fba32..4fb1c5bfeb84 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fmina.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fmina.ll @@ -1,4 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s | FileCheck %s ; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s declare <4 x float> @llvm.loongarch.lsx.vfmina.s(<4 x float>, <4 x float>) diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fmsub.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fmsub.ll index 856ca9cadbd9..090c1dd0d09c 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fmsub.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fmsub.ll @@ -1,4 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s | FileCheck %s ; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s declare <4 x float> @llvm.loongarch.lsx.vfmsub.s(<4 x float>, <4 x float>, <4 x float>) diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fmul.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fmul.ll index 1e6c4c77d536..50b335f17563 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fmul.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fmul.ll @@ -1,4 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s | FileCheck %s ; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s declare <4 x float> @llvm.loongarch.lsx.vfmul.s(<4 x float>, <4 x float>) diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fnmadd.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fnmadd.ll index e1a9ea78ef9d..bdb7667e1962 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fnmadd.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fnmadd.ll @@ -1,4 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s | FileCheck %s ; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s declare <4 x float> @llvm.loongarch.lsx.vfnmadd.s(<4 x float>, <4 x float>, <4 x float>) diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fnmsub.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fnmsub.ll index 46db0f4a5061..ac035b3bc7f1 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fnmsub.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fnmsub.ll @@ -1,4 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s | FileCheck %s ; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s declare <4 x float> @llvm.loongarch.lsx.vfnmsub.s(<4 x float>, <4 x float>, <4 x float>) diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-frecip.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-frecip.ll index 669fde5912d4..db63deb618e6 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-frecip.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-frecip.ll @@ -1,4 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s | FileCheck %s ; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s declare <4 x float> @llvm.loongarch.lsx.vfrecip.s(<4 x float>) diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-frint.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-frint.ll index 8d872fc72962..ba40b6d8ceae 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-frint.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-frint.ll @@ -1,4 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s | FileCheck %s ; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s declare <4 x float> @llvm.loongarch.lsx.vfrintrne.s(<4 x float>) diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-frsqrt.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-frsqrt.ll index 326d87308b0b..9ddd45b5dd53 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-frsqrt.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-frsqrt.ll @@ -1,4 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s | FileCheck %s ; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s declare <4 x float> @llvm.loongarch.lsx.vfrsqrt.s(<4 x float>) diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-frstp-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-frstp-invalid-imm.ll index 0184c855c9c1..8758478f4735 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-frstp-invalid-imm.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-frstp-invalid-imm.ll @@ -1,3 +1,4 @@ +; RUN: not llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s 2>&1 | FileCheck %s ; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s declare <16 x i8> @llvm.loongarch.lsx.vfrstpi.b(<16 x i8>, <16 x i8>, i32) diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-frstp-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-frstp-non-imm.ll index 9583f672a305..164c0676b76c 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-frstp-non-imm.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-frstp-non-imm.ll @@ -1,3 +1,4 @@ +; RUN: not llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s 2>&1 | FileCheck %s ; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s declare <16 x i8> @llvm.loongarch.lsx.vfrstpi.b(<16 x i8>, <16 x i8>, i32) diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-frstp.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-frstp.ll index 5c072b194d4f..dfe75b2181a0 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-frstp.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-frstp.ll @@ -1,4 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s | FileCheck %s ; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s declare <16 x i8> @llvm.loongarch.lsx.vfrstp.b(<16 x i8>, <16 x i8>, <16 x i8>) diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fsqrt.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fsqrt.ll index 55bffba9e99e..f8d0713dad08 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fsqrt.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fsqrt.ll @@ -1,4 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s | FileCheck %s ; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s declare <4 x float> @llvm.loongarch.lsx.vfsqrt.s(<4 x float>) diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fsub.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fsub.ll index 2beba4a70dc9..cc0f88d9d32c 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fsub.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fsub.ll @@ -1,4 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s | FileCheck %s ; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s declare <4 x float> @llvm.loongarch.lsx.vfsub.s(<4 x float>, <4 x float>) diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ftint.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ftint.ll index 2a494cd7fa87..1024ee34715a 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ftint.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ftint.ll @@ -1,4 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s | FileCheck %s ; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s declare <4 x i32> @llvm.loongarch.lsx.vftintrne.w.s(<4 x float>) diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-haddw.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-haddw.ll index 05725582334a..349c2ddccf3b 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-haddw.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-haddw.ll @@ -1,4 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s | FileCheck %s ; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s declare <8 x i16> @llvm.loongarch.lsx.vhaddw.h.b(<16 x i8>, <16 x i8>) diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-hsubw.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-hsubw.ll index dd5815b2ea85..039f5994bfba 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-hsubw.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-hsubw.ll @@ -1,4 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s | FileCheck %s ; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s declare <8 x i16> @llvm.loongarch.lsx.vhsubw.h.b(<16 x i8>, <16 x i8>) diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ilv.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ilv.ll index 77b0b3484df8..0c731f45f7ea 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ilv.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ilv.ll @@ -1,4 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s | FileCheck %s ; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s declare <16 x i8> @llvm.loongarch.lsx.vilvl.b(<16 x i8>, <16 x i8>) diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-insgr2vr-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-insgr2vr-invalid-imm.ll index 3d4f84fb6e03..4ba83bc5cb66 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-insgr2vr-invalid-imm.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-insgr2vr-invalid-imm.ll @@ -1,3 +1,4 @@ +; RUN: not llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s 2>&1 | FileCheck %s ; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s declare <16 x i8> @llvm.loongarch.lsx.vinsgr2vr.b(<16 x i8>, i32, i32) diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-insgr2vr-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-insgr2vr-non-imm.ll index 2a4c2218de8c..3633ba0a2d91 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-insgr2vr-non-imm.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-insgr2vr-non-imm.ll @@ -1,3 +1,4 @@ +; RUN: not llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s 2>&1 | FileCheck %s ; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s declare <16 x i8> @llvm.loongarch.lsx.vinsgr2vr.b(<16 x i8>, i32, i32) diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-insgr2vr.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-insgr2vr.ll index 61d2cbd28066..8ededfd38d1f 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-insgr2vr.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-insgr2vr.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s | FileCheck %s --check-prefixes=CHECK,LA32 +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s --check-prefixes=CHECK,LA64 declare <16 x i8> @llvm.loongarch.lsx.vinsgr2vr.b(<16 x i8>, i32, i32) @@ -43,11 +44,18 @@ entry: declare <2 x i64> @llvm.loongarch.lsx.vinsgr2vr.d(<2 x i64>, i64, i32) define <2 x i64> @lsx_vinsgr2vr_d(<2 x i64> %va) nounwind { -; CHECK-LABEL: lsx_vinsgr2vr_d: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: ori $a0, $zero, 1 -; CHECK-NEXT: vinsgr2vr.d $vr0, $a0, 1 -; CHECK-NEXT: ret +; LA32-LABEL: lsx_vinsgr2vr_d: +; LA32: # %bb.0: # %entry +; LA32-NEXT: ori $a0, $zero, 1 +; LA32-NEXT: vinsgr2vr.w $vr0, $a0, 2 +; LA32-NEXT: vinsgr2vr.w $vr0, $zero, 3 +; LA32-NEXT: ret +; +; LA64-LABEL: lsx_vinsgr2vr_d: +; LA64: # %bb.0: # %entry +; LA64-NEXT: ori $a0, $zero, 1 +; LA64-NEXT: vinsgr2vr.d $vr0, $a0, 1 +; LA64-NEXT: ret entry: %res = call <2 x i64> @llvm.loongarch.lsx.vinsgr2vr.d(<2 x i64> %va, i64 1, i32 1) ret <2 x i64> %res diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ld-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ld-invalid-imm.ll index 9375f9f01a92..de2189a8e892 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ld-invalid-imm.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ld-invalid-imm.ll @@ -1,3 +1,4 @@ +; RUN: not llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s 2>&1 | FileCheck %s ; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s declare <16 x i8> @llvm.loongarch.lsx.vld(ptr, i32) diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ld-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ld-non-imm.ll index f8b4c42326df..be7365d0ee5a 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ld-non-imm.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ld-non-imm.ll @@ -1,3 +1,4 @@ +; RUN: not llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s 2>&1 | FileCheck %s ; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s declare <16 x i8> @llvm.loongarch.lsx.vld(ptr, i32) diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ldi-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ldi-invalid-imm.ll index 57f6f8e81d91..106b8e6e6dfd 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ldi-invalid-imm.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ldi-invalid-imm.ll @@ -1,3 +1,4 @@ +; RUN: not llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s 2>&1 | FileCheck %s ; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s declare <2 x i64> @llvm.loongarch.lsx.vldi(i32) diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ldi-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ldi-non-imm.ll index a8f8278f8097..2f4a577f36c3 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ldi-non-imm.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ldi-non-imm.ll @@ -1,3 +1,4 @@ +; RUN: not llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s 2>&1 | FileCheck %s ; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s declare <2 x i64> @llvm.loongarch.lsx.vldi(i32) diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ldi.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ldi.ll index ace910b54d9a..582b49a74cb4 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ldi.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ldi.ll @@ -1,4 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s | FileCheck %s ; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s declare <2 x i64> @llvm.loongarch.lsx.vldi(i32) diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ldrepl-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ldrepl-invalid-imm.ll index 34bf945c9df4..f0dd80063189 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ldrepl-invalid-imm.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ldrepl-invalid-imm.ll @@ -1,3 +1,4 @@ +; RUN: not llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s 2>&1 | FileCheck %s ; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s declare <16 x i8> @llvm.loongarch.lsx.vldrepl.b(ptr, i32) diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ldrepl-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ldrepl-non-imm.ll index 9613c1a62540..8f8f0aeade05 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ldrepl-non-imm.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ldrepl-non-imm.ll @@ -1,3 +1,4 @@ +; RUN: not llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s 2>&1 | FileCheck %s ; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s declare <16 x i8> @llvm.loongarch.lsx.vldrepl.b(ptr, i32) diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ldrepl.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ldrepl.ll index 9ebe0c2fccd5..74f20745a3d0 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ldrepl.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ldrepl.ll @@ -1,4 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s | FileCheck %s ; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s declare <16 x i8> @llvm.loongarch.lsx.vldrepl.b(ptr, i32) diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-madd.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-madd.ll index 89503724fd73..14634fe8daec 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-madd.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-madd.ll @@ -1,4 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s | FileCheck %s ; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s declare <16 x i8> @llvm.loongarch.lsx.vmadd.b(<16 x i8>, <16 x i8>, <16 x i8>) diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-maddw.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-maddw.ll index 1e3ab25a5fcf..58ee70aacae9 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-maddw.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-maddw.ll @@ -1,4 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s | FileCheck %s ; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s declare <8 x i16> @llvm.loongarch.lsx.vmaddwev.h.b(<8 x i16>, <16 x i8>, <16 x i8>) diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-max-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-max-non-imm.ll index 34bbe3495670..6c7a0657940e 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-max-non-imm.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-max-non-imm.ll @@ -1,3 +1,4 @@ +; RUN: not llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s 2>&1 | FileCheck %s ; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s declare <16 x i8> @llvm.loongarch.lsx.vmaxi.b(<16 x i8>, i32) diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-max.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-max.ll index 4dd289cf6ed7..caea2e0d6818 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-max.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-max.ll @@ -1,4 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s | FileCheck %s ; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s declare <16 x i8> @llvm.loongarch.lsx.vmax.b(<16 x i8>, <16 x i8>) diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-min-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-min-non-imm.ll index 5d9b98cec4d0..444220f87039 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-min-non-imm.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-min-non-imm.ll @@ -1,3 +1,4 @@ +; RUN: not llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s 2>&1 | FileCheck %s ; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s declare <16 x i8> @llvm.loongarch.lsx.vmini.b(<16 x i8>, i32) diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-min.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-min.ll index aa12a5ead6a3..fa26a5bc430d 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-min.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-min.ll @@ -1,4 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s | FileCheck %s ; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s declare <16 x i8> @llvm.loongarch.lsx.vmin.b(<16 x i8>, <16 x i8>) diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-mod.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-mod.ll index 6b3dc6865584..46630562c84f 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-mod.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-mod.ll @@ -1,4 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s | FileCheck %s ; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s declare <16 x i8> @llvm.loongarch.lsx.vmod.b(<16 x i8>, <16 x i8>) diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-mskgez.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-mskgez.ll index 3ecd777aee67..8bc4c572e728 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-mskgez.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-mskgez.ll @@ -1,4 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s | FileCheck %s ; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s declare <16 x i8> @llvm.loongarch.lsx.vmskgez.b(<16 x i8>) diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-mskltz.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-mskltz.ll index be00c76137c7..9e36cc77080a 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-mskltz.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-mskltz.ll @@ -1,4 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s | FileCheck %s ; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s declare <16 x i8> @llvm.loongarch.lsx.vmskltz.b(<16 x i8>) diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-msknz.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-msknz.ll index 02f1752f7190..a47f0303db27 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-msknz.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-msknz.ll @@ -1,4 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s | FileCheck %s ; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s declare <16 x i8> @llvm.loongarch.lsx.vmsknz.b(<16 x i8>) diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-msub.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-msub.ll index 98684e10c78e..1b7ae48b44ad 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-msub.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-msub.ll @@ -1,4 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s | FileCheck %s ; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s declare <16 x i8> @llvm.loongarch.lsx.vmsub.b(<16 x i8>, <16 x i8>, <16 x i8>) diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-muh.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-muh.ll index a4deb8f8f823..ce584fe9ed0c 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-muh.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-muh.ll @@ -1,4 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s | FileCheck %s ; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s declare <16 x i8> @llvm.loongarch.lsx.vmuh.b(<16 x i8>, <16 x i8>) diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-mul.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-mul.ll index aca60d1663b7..46f6f0919571 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-mul.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-mul.ll @@ -1,4 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s | FileCheck %s ; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s declare <16 x i8> @llvm.loongarch.lsx.vmul.b(<16 x i8>, <16 x i8>) diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-mulw.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-mulw.ll index eb55c1f809e3..74358585a444 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-mulw.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-mulw.ll @@ -1,4 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s | FileCheck %s ; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s declare <8 x i16> @llvm.loongarch.lsx.vmulwev.h.b(<16 x i8>, <16 x i8>) diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-neg.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-neg.ll index 43c6e9757614..2e6dbdbc99b1 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-neg.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-neg.ll @@ -1,4 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s | FileCheck %s ; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s declare <16 x i8> @llvm.loongarch.lsx.vneg.b(<16 x i8>) diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-nor.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-nor.ll index 16619225f2d1..2bf21fb2dc31 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-nor.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-nor.ll @@ -1,4 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s | FileCheck %s ; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s declare <16 x i8> @llvm.loongarch.lsx.vnor.v(<16 x i8>, <16 x i8>) diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-nori-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-nori-invalid-imm.ll index 8c59d8fb9fa5..e82f9c78f2ab 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-nori-invalid-imm.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-nori-invalid-imm.ll @@ -1,3 +1,4 @@ +; RUN: not llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s 2>&1 | FileCheck %s ; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s declare <16 x i8> @llvm.loongarch.lsx.vnori.b(<16 x i8>, i32) diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-nori-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-nori-non-imm.ll index 322a39c106a6..0dd3accc8e5e 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-nori-non-imm.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-nori-non-imm.ll @@ -1,3 +1,4 @@ +; RUN: not llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s 2>&1 | FileCheck %s ; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s declare <16 x i8> @llvm.loongarch.lsx.vnori.b(<16 x i8>, i32) diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-nori.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-nori.ll index c2388a1e0da3..0777fab45d09 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-nori.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-nori.ll @@ -1,4 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s | FileCheck %s ; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s declare <16 x i8> @llvm.loongarch.lsx.vnori.b(<16 x i8>, i32) diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-or.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-or.ll index ab557003d150..40b4549d1b58 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-or.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-or.ll @@ -1,4 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s | FileCheck %s ; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s declare <16 x i8> @llvm.loongarch.lsx.vor.v(<16 x i8>, <16 x i8>) diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ori-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ori-invalid-imm.ll index 4a7fc7e109d9..612fecfd239e 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ori-invalid-imm.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ori-invalid-imm.ll @@ -1,3 +1,4 @@ +; RUN: not llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s 2>&1 | FileCheck %s ; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s declare <16 x i8> @llvm.loongarch.lsx.vori.b(<16 x i8>, i32) diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ori-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ori-non-imm.ll index 5644b8581dce..d8b78e71fcb4 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ori-non-imm.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ori-non-imm.ll @@ -1,3 +1,4 @@ +; RUN: not llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s 2>&1 | FileCheck %s ; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s declare <16 x i8> @llvm.loongarch.lsx.vori.b(<16 x i8>, i32) diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ori.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ori.ll index 85c0f432c54a..71bd9f03feea 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ori.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ori.ll @@ -1,4 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s | FileCheck %s ; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s declare <16 x i8> @llvm.loongarch.lsx.vori.b(<16 x i8>, i32) diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-orn.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-orn.ll index 4528628e02c3..8974703ba6e1 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-orn.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-orn.ll @@ -1,4 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s | FileCheck %s ; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s declare <16 x i8> @llvm.loongarch.lsx.vorn.v(<16 x i8>, <16 x i8>) diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-pack.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-pack.ll index 70a3620d1757..42d614250684 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-pack.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-pack.ll @@ -1,4 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s | FileCheck %s ; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s declare <16 x i8> @llvm.loongarch.lsx.vpackev.b(<16 x i8>, <16 x i8>) diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-pcnt.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-pcnt.ll index 431b270ab0a1..177d732604af 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-pcnt.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-pcnt.ll @@ -1,4 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s | FileCheck %s ; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s declare <16 x i8> @llvm.loongarch.lsx.vpcnt.b(<16 x i8>) diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-permi-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-permi-invalid-imm.ll index e439bbae6130..f719c5cf30e7 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-permi-invalid-imm.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-permi-invalid-imm.ll @@ -1,3 +1,4 @@ +; RUN: not llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s 2>&1 | FileCheck %s ; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s declare <4 x i32> @llvm.loongarch.lsx.vpermi.w(<4 x i32>, <4 x i32>, i32) diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-permi-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-permi-non-imm.ll index bdfc08ed680a..e41d74ef2dae 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-permi-non-imm.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-permi-non-imm.ll @@ -1,3 +1,4 @@ +; RUN: not llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s 2>&1 | FileCheck %s ; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s declare <4 x i32> @llvm.loongarch.lsx.vpermi.w(<4 x i32>, <4 x i32>, i32) diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-permi.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-permi.ll index b8367d98caf6..ccad1b7a8250 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-permi.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-permi.ll @@ -1,4 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s | FileCheck %s ; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s declare <4 x i32> @llvm.loongarch.lsx.vpermi.w(<4 x i32>, <4 x i32>, i32) diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-pick.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-pick.ll index 4ebf29e1409c..d75889965f39 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-pick.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-pick.ll @@ -1,4 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s | FileCheck %s ; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s declare <16 x i8> @llvm.loongarch.lsx.vpickev.b(<16 x i8>, <16 x i8>) diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-pickve2gr-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-pickve2gr-non-imm.ll index 6dd3c1f27a81..707334640771 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-pickve2gr-non-imm.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-pickve2gr-non-imm.ll @@ -1,3 +1,4 @@ +; RUN: not llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s 2>&1 | FileCheck %s ; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s declare i32 @llvm.loongarch.lsx.vpickve2gr.b(<16 x i8>, i32) diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-replve.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-replve.ll index 3ba184dad052..35fd8d644700 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-replve.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-replve.ll @@ -1,4 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s | FileCheck %s ; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s declare <16 x i8> @llvm.loongarch.lsx.vreplve.b(<16 x i8>, i32) diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-replvei-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-replvei-invalid-imm.ll index d625441122a6..7fdc8fe8c690 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-replvei-invalid-imm.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-replvei-invalid-imm.ll @@ -1,3 +1,4 @@ +; RUN: not llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s 2>&1 | FileCheck %s ; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s declare <16 x i8> @llvm.loongarch.lsx.vreplvei.b(<16 x i8>, i32) diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-replvei-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-replvei-non-imm.ll index 3d271bb2b307..e7f8a0708ad1 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-replvei-non-imm.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-replvei-non-imm.ll @@ -1,3 +1,4 @@ +; RUN: not llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s 2>&1 | FileCheck %s ; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s declare <16 x i8> @llvm.loongarch.lsx.vreplvei.b(<16 x i8>, i32) diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-replvei.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-replvei.ll index 9b8af1878cb8..216c2ae52844 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-replvei.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-replvei.ll @@ -1,4 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s | FileCheck %s ; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s declare <16 x i8> @llvm.loongarch.lsx.vreplvei.b(<16 x i8>, i32) diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-rotr-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-rotr-invalid-imm.ll index 3c53b36672ad..6e0b6e6e4f3e 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-rotr-invalid-imm.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-rotr-invalid-imm.ll @@ -1,3 +1,4 @@ +; RUN: not llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s 2>&1 | FileCheck %s ; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s declare <16 x i8> @llvm.loongarch.lsx.vrotri.b(<16 x i8>, i32) diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-rotr-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-rotr-non-imm.ll index fd8ba3a1c633..b7cc04338576 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-rotr-non-imm.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-rotr-non-imm.ll @@ -1,3 +1,4 @@ +; RUN: not llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s 2>&1 | FileCheck %s ; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s declare <16 x i8> @llvm.loongarch.lsx.vrotri.b(<16 x i8>, i32) diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-rotr.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-rotr.ll index df8650677147..5ee7b2642999 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-rotr.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-rotr.ll @@ -1,4 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s | FileCheck %s ; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s declare <16 x i8> @llvm.loongarch.lsx.vrotr.b(<16 x i8>, <16 x i8>) diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sadd.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sadd.ll index a54f955766df..82149a263bd7 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sadd.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sadd.ll @@ -1,4 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s | FileCheck %s ; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s declare <16 x i8> @llvm.loongarch.lsx.vsadd.b(<16 x i8>, <16 x i8>) diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sat-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sat-invalid-imm.ll index 45fa4e43be19..9b15c0538200 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sat-invalid-imm.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sat-invalid-imm.ll @@ -1,3 +1,4 @@ +; RUN: not llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s 2>&1 | FileCheck %s ; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s declare <16 x i8> @llvm.loongarch.lsx.vsat.b(<16 x i8>, i32) diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sat-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sat-non-imm.ll index afdbe0c1ce0b..ce99366ee5f8 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sat-non-imm.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sat-non-imm.ll @@ -1,3 +1,4 @@ +; RUN: not llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s 2>&1 | FileCheck %s ; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s declare <16 x i8> @llvm.loongarch.lsx.vsat.b(<16 x i8>, i32) diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sat.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sat.ll index 4286842a63b9..3412b878eed4 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sat.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sat.ll @@ -1,4 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s | FileCheck %s ; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s declare <16 x i8> @llvm.loongarch.lsx.vsat.b(<16 x i8>, i32) diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-seq-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-seq-invalid-imm.ll index 220398ff28cd..304894dbf410 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-seq-invalid-imm.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-seq-invalid-imm.ll @@ -1,3 +1,4 @@ +; RUN: not llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s 2>&1 | FileCheck %s ; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s declare <16 x i8> @llvm.loongarch.lsx.vseqi.b(<16 x i8>, i32) diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-seq-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-seq-non-imm.ll index 5fa1dd30475c..82db51ada1c2 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-seq-non-imm.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-seq-non-imm.ll @@ -1,3 +1,4 @@ +; RUN: not llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s 2>&1 | FileCheck %s ; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s declare <16 x i8> @llvm.loongarch.lsx.vseqi.b(<16 x i8>, i32) diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-seq.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-seq.ll index 3cb4acd82439..ac946cacc63a 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-seq.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-seq.ll @@ -1,4 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s | FileCheck %s ; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s declare <16 x i8> @llvm.loongarch.lsx.vseq.b(<16 x i8>, <16 x i8>) diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-shuf.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-shuf.ll index f5d516521e45..5deca45044cc 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-shuf.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-shuf.ll @@ -1,4 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s | FileCheck %s ; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s declare <16 x i8> @llvm.loongarch.lsx.vshuf.b(<16 x i8>, <16 x i8>, <16 x i8>) diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-shuf4i-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-shuf4i-invalid-imm.ll index 4d6fadf08c26..9d7f2729c0f2 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-shuf4i-invalid-imm.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-shuf4i-invalid-imm.ll @@ -1,3 +1,4 @@ +; RUN: not llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s 2>&1 | FileCheck %s ; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s declare <16 x i8> @llvm.loongarch.lsx.vshuf4i.b(<16 x i8>, i32) diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-shuf4i-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-shuf4i-non-imm.ll index a7d138bcc00b..b5518763975e 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-shuf4i-non-imm.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-shuf4i-non-imm.ll @@ -1,3 +1,4 @@ +; RUN: not llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s 2>&1 | FileCheck %s ; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s declare <16 x i8> @llvm.loongarch.lsx.vshuf4i.b(<16 x i8>, i32) diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-shuf4i.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-shuf4i.ll index 1ad5f2af5591..3cb712fd07fa 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-shuf4i.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-shuf4i.ll @@ -1,4 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s | FileCheck %s ; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s declare <16 x i8> @llvm.loongarch.lsx.vshuf4i.b(<16 x i8>, i32) diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-signcov.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-signcov.ll index 3997b0cc995c..46f18670e0df 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-signcov.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-signcov.ll @@ -1,4 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s | FileCheck %s ; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s declare <16 x i8> @llvm.loongarch.lsx.vsigncov.b(<16 x i8>, <16 x i8>) diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sle-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sle-invalid-imm.ll index 4c945e296711..39787b3d0f5b 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sle-invalid-imm.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sle-invalid-imm.ll @@ -1,3 +1,4 @@ +; RUN: not llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s 2>&1 | FileCheck %s ; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s declare <16 x i8> @llvm.loongarch.lsx.vslei.b(<16 x i8>, i32) diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sle-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sle-non-imm.ll index 0fc137bf0549..0feb967068cb 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sle-non-imm.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sle-non-imm.ll @@ -1,3 +1,4 @@ +; RUN: not llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s 2>&1 | FileCheck %s ; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s declare <16 x i8> @llvm.loongarch.lsx.vslei.b(<16 x i8>, i32) diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sle.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sle.ll index 5a9d5f06e63f..8a225a31f9da 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sle.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sle.ll @@ -1,4 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s | FileCheck %s ; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s declare <16 x i8> @llvm.loongarch.lsx.vsle.b(<16 x i8>, <16 x i8>) diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sll-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sll-invalid-imm.ll index 75406f94887c..6a5b11196cae 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sll-invalid-imm.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sll-invalid-imm.ll @@ -1,3 +1,4 @@ +; RUN: not llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s 2>&1 | FileCheck %s ; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s declare <16 x i8> @llvm.loongarch.lsx.vslli.b(<16 x i8>, i32) diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sll-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sll-non-imm.ll index 7474b5e29734..ce005d097fa7 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sll-non-imm.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sll-non-imm.ll @@ -1,3 +1,4 @@ +; RUN: not llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s 2>&1 | FileCheck %s ; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s declare <16 x i8> @llvm.loongarch.lsx.vslli.b(<16 x i8>, i32) diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sll.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sll.ll index 7bc20af41f17..d1316e04efbb 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sll.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sll.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s | FileCheck %s --check-prefixes=CHECK,LA32 +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s --check-prefixes=CHECK,LA64 declare <16 x i8> @llvm.loongarch.lsx.vsll.b(<16 x i8>, <16 x i8>) @@ -40,10 +41,17 @@ entry: declare <2 x i64> @llvm.loongarch.lsx.vsll.d(<2 x i64>, <2 x i64>) define <2 x i64> @lsx_vsll_d(<2 x i64> %va, <2 x i64> %vb) nounwind { -; CHECK-LABEL: lsx_vsll_d: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsll.d $vr0, $vr0, $vr1 -; CHECK-NEXT: ret +; LA32-LABEL: lsx_vsll_d: +; LA32: # %bb.0: # %entry +; LA32-NEXT: vrepli.d $vr2, 63 +; LA32-NEXT: vand.v $vr1, $vr1, $vr2 +; LA32-NEXT: vsll.d $vr0, $vr0, $vr1 +; LA32-NEXT: ret +; +; LA64-LABEL: lsx_vsll_d: +; LA64: # %bb.0: # %entry +; LA64-NEXT: vsll.d $vr0, $vr0, $vr1 +; LA64-NEXT: ret entry: %res = call <2 x i64> @llvm.loongarch.lsx.vsll.d(<2 x i64> %va, <2 x i64> %vb) ret <2 x i64> %res diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sllwil-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sllwil-invalid-imm.ll index bda3523a0b5c..74e65742cfa8 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sllwil-invalid-imm.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sllwil-invalid-imm.ll @@ -1,3 +1,4 @@ +; RUN: not llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s 2>&1 | FileCheck %s ; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s declare <8 x i16> @llvm.loongarch.lsx.vsllwil.h.b(<16 x i8>, i32) diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sllwil-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sllwil-non-imm.ll index a03656d5ca07..51921e7e6b43 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sllwil-non-imm.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sllwil-non-imm.ll @@ -1,3 +1,4 @@ +; RUN: not llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s 2>&1 | FileCheck %s ; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s declare <8 x i16> @llvm.loongarch.lsx.vsllwil.h.b(<16 x i8>, i32) diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sllwil.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sllwil.ll index 29ab70da1ced..65f1f6b49c81 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sllwil.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sllwil.ll @@ -1,4 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s | FileCheck %s ; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s declare <8 x i16> @llvm.loongarch.lsx.vsllwil.h.b(<16 x i8>, i32) diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-slt-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-slt-invalid-imm.ll index f6d014b19d6c..e585f370b7f0 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-slt-invalid-imm.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-slt-invalid-imm.ll @@ -1,3 +1,4 @@ +; RUN: not llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s 2>&1 | FileCheck %s ; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s declare <16 x i8> @llvm.loongarch.lsx.vslti.b(<16 x i8>, i32) diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-slt-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-slt-non-imm.ll index 9a8b757dab4e..e42215291f25 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-slt-non-imm.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-slt-non-imm.ll @@ -1,3 +1,4 @@ +; RUN: not llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s 2>&1 | FileCheck %s ; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s declare <16 x i8> @llvm.loongarch.lsx.vslti.b(<16 x i8>, i32) diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-slt.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-slt.ll index 18683e9dc46f..a0eb870ba9ad 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-slt.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-slt.ll @@ -1,4 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s | FileCheck %s ; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s declare <16 x i8> @llvm.loongarch.lsx.vslt.b(<16 x i8>, <16 x i8>) diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sra-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sra-invalid-imm.ll index 2a033a21b565..cacb7d7c6448 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sra-invalid-imm.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sra-invalid-imm.ll @@ -1,3 +1,4 @@ +; RUN: not llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s 2>&1 | FileCheck %s ; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s declare <16 x i8> @llvm.loongarch.lsx.vsrai.b(<16 x i8>, i32) diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sra-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sra-non-imm.ll index c3b328145864..f3bfe73911b4 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sra-non-imm.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sra-non-imm.ll @@ -1,3 +1,4 @@ +; RUN: not llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s 2>&1 | FileCheck %s ; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s declare <16 x i8> @llvm.loongarch.lsx.vsrai.b(<16 x i8>, i32) diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sra.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sra.ll index e85c8464c18e..14c3801a8040 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sra.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sra.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s | FileCheck %s --check-prefixes=CHECK,LA32 +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s --check-prefixes=CHECK,LA64 declare <16 x i8> @llvm.loongarch.lsx.vsra.b(<16 x i8>, <16 x i8>) @@ -40,10 +41,17 @@ entry: declare <2 x i64> @llvm.loongarch.lsx.vsra.d(<2 x i64>, <2 x i64>) define <2 x i64> @lsx_vsra_d(<2 x i64> %va, <2 x i64> %vb) nounwind { -; CHECK-LABEL: lsx_vsra_d: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsra.d $vr0, $vr0, $vr1 -; CHECK-NEXT: ret +; LA32-LABEL: lsx_vsra_d: +; LA32: # %bb.0: # %entry +; LA32-NEXT: vrepli.d $vr2, 63 +; LA32-NEXT: vand.v $vr1, $vr1, $vr2 +; LA32-NEXT: vsra.d $vr0, $vr0, $vr1 +; LA32-NEXT: ret +; +; LA64-LABEL: lsx_vsra_d: +; LA64: # %bb.0: # %entry +; LA64-NEXT: vsra.d $vr0, $vr0, $vr1 +; LA64-NEXT: ret entry: %res = call <2 x i64> @llvm.loongarch.lsx.vsra.d(<2 x i64> %va, <2 x i64> %vb) ret <2 x i64> %res diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sran.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sran.ll index 4ffe5a704c2c..4cb38b4612f7 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sran.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sran.ll @@ -1,4 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s | FileCheck %s ; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s declare <16 x i8> @llvm.loongarch.lsx.vsran.b.h(<8 x i16>, <8 x i16>) diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srani-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srani-invalid-imm.ll index d68064e9b902..bd0cdb79d918 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srani-invalid-imm.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srani-invalid-imm.ll @@ -1,3 +1,4 @@ +; RUN: not llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s 2>&1 | FileCheck %s ; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s declare <16 x i8> @llvm.loongarch.lsx.vsrani.b.h(<16 x i8>, <16 x i8>, i32) diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srani-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srani-non-imm.ll index 38cfde214dc1..8a501434b1d2 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srani-non-imm.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srani-non-imm.ll @@ -1,3 +1,4 @@ +; RUN: not llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s 2>&1 | FileCheck %s ; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s declare <16 x i8> @llvm.loongarch.lsx.vsrani.b.h(<16 x i8>, <16 x i8>, i32) diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srani.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srani.ll index 717c641616c8..7e2a9e107f4a 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srani.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srani.ll @@ -1,4 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s | FileCheck %s ; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s declare <16 x i8> @llvm.loongarch.lsx.vsrani.b.h(<16 x i8>, <16 x i8>, i32) diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srar-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srar-invalid-imm.ll index b6c2d70cebbc..a8f59a58a0c4 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srar-invalid-imm.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srar-invalid-imm.ll @@ -1,3 +1,4 @@ +; RUN: not llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s 2>&1 | FileCheck %s ; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s declare <16 x i8> @llvm.loongarch.lsx.vsrari.b(<16 x i8>, i32) diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srar-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srar-non-imm.ll index 2ad8adcd823b..f2122054ae4d 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srar-non-imm.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srar-non-imm.ll @@ -1,3 +1,4 @@ +; RUN: not llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s 2>&1 | FileCheck %s ; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s declare <16 x i8> @llvm.loongarch.lsx.vsrari.b(<16 x i8>, i32) diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srar.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srar.ll index 8b52b7ac9631..40aa39e89bf1 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srar.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srar.ll @@ -1,4 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s | FileCheck %s ; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s declare <16 x i8> @llvm.loongarch.lsx.vsrar.b(<16 x i8>, <16 x i8>) diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srarn.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srarn.ll index d4cdfb5359ea..c952fae87719 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srarn.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srarn.ll @@ -1,4 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s | FileCheck %s ; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s declare <16 x i8> @llvm.loongarch.lsx.vsrarn.b.h(<8 x i16>, <8 x i16>) diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srarni-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srarni-invalid-imm.ll index d24cf92a0392..5e4d9f94b385 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srarni-invalid-imm.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srarni-invalid-imm.ll @@ -1,3 +1,4 @@ +; RUN: not llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s 2>&1 | FileCheck %s ; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s declare <16 x i8> @llvm.loongarch.lsx.vsrarni.b.h(<16 x i8>, <16 x i8>, i32) diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srarni-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srarni-non-imm.ll index 19de7445cba1..15ac105abd76 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srarni-non-imm.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srarni-non-imm.ll @@ -1,3 +1,4 @@ +; RUN: not llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s 2>&1 | FileCheck %s ; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s declare <16 x i8> @llvm.loongarch.lsx.vsrarni.b.h(<16 x i8>, <16 x i8>, i32) diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srarni.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srarni.ll index 2253e88372fc..af3ce7722e46 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srarni.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srarni.ll @@ -1,4 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s | FileCheck %s ; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s declare <16 x i8> @llvm.loongarch.lsx.vsrarni.b.h(<16 x i8>, <16 x i8>, i32) diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srl-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srl-invalid-imm.ll index 3beff790afab..0d1b8906e655 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srl-invalid-imm.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srl-invalid-imm.ll @@ -1,3 +1,4 @@ +; RUN: not llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s 2>&1 | FileCheck %s ; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s declare <16 x i8> @llvm.loongarch.lsx.vsrli.b(<16 x i8>, i32) diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srl-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srl-non-imm.ll index 98652aca0d62..9c32772c0aba 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srl-non-imm.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srl-non-imm.ll @@ -1,3 +1,4 @@ +; RUN: not llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s 2>&1 | FileCheck %s ; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s declare <16 x i8> @llvm.loongarch.lsx.vsrli.b(<16 x i8>, i32) diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srl.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srl.ll index 1cddd9622233..9499a0ab445e 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srl.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srl.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s | FileCheck %s --check-prefixes=CHECK,LA32 +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s --check-prefixes=CHECK,LA64 declare <16 x i8> @llvm.loongarch.lsx.vsrl.b(<16 x i8>, <16 x i8>) @@ -40,10 +41,17 @@ entry: declare <2 x i64> @llvm.loongarch.lsx.vsrl.d(<2 x i64>, <2 x i64>) define <2 x i64> @lsx_vsrl_d(<2 x i64> %va, <2 x i64> %vb) nounwind { -; CHECK-LABEL: lsx_vsrl_d: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsrl.d $vr0, $vr0, $vr1 -; CHECK-NEXT: ret +; LA32-LABEL: lsx_vsrl_d: +; LA32: # %bb.0: # %entry +; LA32-NEXT: vrepli.d $vr2, 63 +; LA32-NEXT: vand.v $vr1, $vr1, $vr2 +; LA32-NEXT: vsrl.d $vr0, $vr0, $vr1 +; LA32-NEXT: ret +; +; LA64-LABEL: lsx_vsrl_d: +; LA64: # %bb.0: # %entry +; LA64-NEXT: vsrl.d $vr0, $vr0, $vr1 +; LA64-NEXT: ret entry: %res = call <2 x i64> @llvm.loongarch.lsx.vsrl.d(<2 x i64> %va, <2 x i64> %vb) ret <2 x i64> %res diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srln.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srln.ll index 1c9b23243ffb..7a86688e43f4 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srln.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srln.ll @@ -1,4 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s | FileCheck %s ; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s declare <16 x i8> @llvm.loongarch.lsx.vsrln.b.h(<8 x i16>, <8 x i16>) diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srlni-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srlni-invalid-imm.ll index 054c4f393548..366effc2e7c6 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srlni-invalid-imm.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srlni-invalid-imm.ll @@ -1,3 +1,4 @@ +; RUN: not llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s 2>&1 | FileCheck %s ; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s declare <16 x i8> @llvm.loongarch.lsx.vsrlni.b.h(<16 x i8>, <16 x i8>, i32) diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srlni-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srlni-non-imm.ll index 76341df197fd..d3f5cc07966e 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srlni-non-imm.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srlni-non-imm.ll @@ -1,3 +1,4 @@ +; RUN: not llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s 2>&1 | FileCheck %s ; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s declare <16 x i8> @llvm.loongarch.lsx.vsrlni.b.h(<16 x i8>, <16 x i8>, i32) diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srlni.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srlni.ll index 6e523efa1824..e8d06e7e8bc6 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srlni.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srlni.ll @@ -1,4 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s | FileCheck %s ; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s declare <16 x i8> @llvm.loongarch.lsx.vsrlni.b.h(<16 x i8>, <16 x i8>, i32) diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srlr-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srlr-invalid-imm.ll index bcbd38e26e5f..6b557dfbada3 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srlr-invalid-imm.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srlr-invalid-imm.ll @@ -1,3 +1,4 @@ +; RUN: not llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s 2>&1 | FileCheck %s ; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s declare <16 x i8> @llvm.loongarch.lsx.vsrlri.b(<16 x i8>, i32) diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srlr-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srlr-non-imm.ll index 4862b1546ccf..c0ba458cb2f5 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srlr-non-imm.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srlr-non-imm.ll @@ -1,3 +1,4 @@ +; RUN: not llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s 2>&1 | FileCheck %s ; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s declare <16 x i8> @llvm.loongarch.lsx.vsrlri.b(<16 x i8>, i32) diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srlr.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srlr.ll index 51638fa1a47f..0d479951a57a 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srlr.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srlr.ll @@ -1,4 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s | FileCheck %s ; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s declare <16 x i8> @llvm.loongarch.lsx.vsrlr.b(<16 x i8>, <16 x i8>) diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srlrn.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srlrn.ll index 893e51396241..9563b9459788 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srlrn.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srlrn.ll @@ -1,4 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s | FileCheck %s ; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s declare <16 x i8> @llvm.loongarch.lsx.vsrlrn.b.h(<8 x i16>, <8 x i16>) diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srlrni-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srlrni-invalid-imm.ll index 8988ae88f9eb..e4085b52bbf8 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srlrni-invalid-imm.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srlrni-invalid-imm.ll @@ -1,3 +1,4 @@ +; RUN: not llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s 2>&1 | FileCheck %s ; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s declare <16 x i8> @llvm.loongarch.lsx.vsrlrni.b.h(<16 x i8>, <16 x i8>, i32) diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srlrni-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srlrni-non-imm.ll index e5530db56fed..51488926f92c 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srlrni-non-imm.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srlrni-non-imm.ll @@ -1,3 +1,4 @@ +; RUN: not llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s 2>&1 | FileCheck %s ; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s declare <16 x i8> @llvm.loongarch.lsx.vsrlrni.b.h(<16 x i8>, <16 x i8>, i32) diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srlrni.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srlrni.ll index d1ea450d2237..3db7e3f3e571 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srlrni.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srlrni.ll @@ -1,4 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s | FileCheck %s ; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s declare <16 x i8> @llvm.loongarch.lsx.vsrlrni.b.h(<16 x i8>, <16 x i8>, i32) diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssran.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssran.ll index cecccbb730c9..96493d555da6 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssran.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssran.ll @@ -1,4 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s | FileCheck %s ; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s declare <16 x i8> @llvm.loongarch.lsx.vssran.b.h(<8 x i16>, <8 x i16>) diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrani-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrani-invalid-imm.ll index f7817921ebeb..bf185ae5d974 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrani-invalid-imm.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrani-invalid-imm.ll @@ -1,3 +1,4 @@ +; RUN: not llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s 2>&1 | FileCheck %s ; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s declare <16 x i8> @llvm.loongarch.lsx.vssrani.b.h(<16 x i8>, <16 x i8>, i32) diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrani-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrani-non-imm.ll index a80ede9c5243..a4f0a4ed9319 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrani-non-imm.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrani-non-imm.ll @@ -1,3 +1,4 @@ +; RUN: not llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s 2>&1 | FileCheck %s ; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s declare <16 x i8> @llvm.loongarch.lsx.vssrani.b.h(<16 x i8>, <16 x i8>, i32) diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrani.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrani.ll index 57b8eb169866..75577dbafb1c 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrani.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrani.ll @@ -1,4 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s | FileCheck %s ; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s declare <16 x i8> @llvm.loongarch.lsx.vssrani.b.h(<16 x i8>, <16 x i8>, i32) diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrarn.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrarn.ll index c6b7d9ec8e1d..5ad63f2bd0ab 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrarn.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrarn.ll @@ -1,4 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s | FileCheck %s ; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s declare <16 x i8> @llvm.loongarch.lsx.vssrarn.b.h(<8 x i16>, <8 x i16>) diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrarni-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrarni-invalid-imm.ll index 4edda8c0a24a..f54bb71881c4 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrarni-invalid-imm.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrarni-invalid-imm.ll @@ -1,3 +1,4 @@ +; RUN: not llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s 2>&1 | FileCheck %s ; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s declare <16 x i8> @llvm.loongarch.lsx.vssrarni.b.h(<16 x i8>, <16 x i8>, i32) diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrarni-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrarni-non-imm.ll index a77e6e764c9d..00cfc60b4314 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrarni-non-imm.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrarni-non-imm.ll @@ -1,3 +1,4 @@ +; RUN: not llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s 2>&1 | FileCheck %s ; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s declare <16 x i8> @llvm.loongarch.lsx.vssrarni.b.h(<16 x i8>, <16 x i8>, i32) diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrarni.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrarni.ll index 1a2e91962ac3..af32b58b53f8 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrarni.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrarni.ll @@ -1,4 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s | FileCheck %s ; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s declare <16 x i8> @llvm.loongarch.lsx.vssrarni.b.h(<16 x i8>, <16 x i8>, i32) diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrln.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrln.ll index 697ccc3962a8..6fb1a3b15860 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrln.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrln.ll @@ -1,4 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s | FileCheck %s ; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s declare <16 x i8> @llvm.loongarch.lsx.vssrln.b.h(<8 x i16>, <8 x i16>) diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrlni-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrlni-invalid-imm.ll index 6218af1fa773..4fb4b307a706 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrlni-invalid-imm.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrlni-invalid-imm.ll @@ -1,3 +1,4 @@ +; RUN: not llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s 2>&1 | FileCheck %s ; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s declare <16 x i8> @llvm.loongarch.lsx.vssrlni.b.h(<16 x i8>, <16 x i8>, i32) diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrlni-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrlni-non-imm.ll index 688be826f467..2c8356ed0a60 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrlni-non-imm.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrlni-non-imm.ll @@ -1,3 +1,4 @@ +; RUN: not llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s 2>&1 | FileCheck %s ; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s declare <16 x i8> @llvm.loongarch.lsx.vssrlni.b.h(<16 x i8>, <16 x i8>, i32) diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrlni.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrlni.ll index 8dd41e7abe87..deb1575bedcc 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrlni.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrlni.ll @@ -1,4 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s | FileCheck %s ; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s declare <16 x i8> @llvm.loongarch.lsx.vssrlni.b.h(<16 x i8>, <16 x i8>, i32) diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrlrn.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrlrn.ll index a8e76cbaa7fd..a17ab94d02ca 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrlrn.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrlrn.ll @@ -1,4 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s | FileCheck %s ; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s declare <16 x i8> @llvm.loongarch.lsx.vssrlrn.b.h(<8 x i16>, <8 x i16>) diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrlrni-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrlrni-invalid-imm.ll index 98a0c5b3cd28..61e85be08c0a 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrlrni-invalid-imm.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrlrni-invalid-imm.ll @@ -1,3 +1,4 @@ +; RUN: not llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s 2>&1 | FileCheck %s ; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s declare <16 x i8> @llvm.loongarch.lsx.vssrlrni.b.h(<16 x i8>, <16 x i8>, i32) diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrlrni-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrlrni-non-imm.ll index c389b4fd6023..f4450f25274e 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrlrni-non-imm.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrlrni-non-imm.ll @@ -1,3 +1,4 @@ +; RUN: not llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s 2>&1 | FileCheck %s ; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s declare <16 x i8> @llvm.loongarch.lsx.vssrlrni.b.h(<16 x i8>, <16 x i8>, i32) diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrlrni.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrlrni.ll index 869e81b2b09d..840fafc038fc 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrlrni.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrlrni.ll @@ -1,4 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s | FileCheck %s ; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s declare <16 x i8> @llvm.loongarch.lsx.vssrlrni.b.h(<16 x i8>, <16 x i8>, i32) diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssub.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssub.ll index c594b426d650..ac0ef538cdcb 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssub.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssub.ll @@ -1,4 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s | FileCheck %s ; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s declare <16 x i8> @llvm.loongarch.lsx.vssub.b(<16 x i8>, <16 x i8>) diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-st-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-st-invalid-imm.ll index a72126cd15a6..079fba79b350 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-st-invalid-imm.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-st-invalid-imm.ll @@ -1,3 +1,4 @@ +; RUN: not llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s 2>&1 | FileCheck %s ; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s declare void @llvm.loongarch.lsx.vst(<16 x i8>, ptr, i32) diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-st-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-st-non-imm.ll index ba9f44c59c37..aae751c17497 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-st-non-imm.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-st-non-imm.ll @@ -1,3 +1,4 @@ +; RUN: not llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s 2>&1 | FileCheck %s ; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s declare void @llvm.loongarch.lsx.vst(<16 x i8>, ptr, i32) diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-stelm-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-stelm-invalid-imm.ll index 82dba30ed1e7..7f3c343dd5bf 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-stelm-invalid-imm.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-stelm-invalid-imm.ll @@ -1,3 +1,4 @@ +; RUN: not llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s 2>&1 | FileCheck %s ; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s declare void @llvm.loongarch.lsx.vstelm.b(<16 x i8>, ptr, i32, i32) diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-stelm-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-stelm-non-imm.ll index a8a74819c204..feb8fbaf2952 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-stelm-non-imm.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-stelm-non-imm.ll @@ -1,3 +1,4 @@ +; RUN: not llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s 2>&1 | FileCheck %s ; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s declare void @llvm.loongarch.lsx.vstelm.b(<16 x i8>, ptr, i32, i32) diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-stelm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-stelm.ll index 4f8412be9579..9b6af6fc1a96 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-stelm.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-stelm.ll @@ -1,4 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s | FileCheck %s ; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s declare void @llvm.loongarch.lsx.vstelm.b(<16 x i8>, ptr, i32, i32) diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sub.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sub.ll index 5c04a3d8de0d..8f1b441e92fe 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sub.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sub.ll @@ -1,4 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s | FileCheck %s ; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s declare <16 x i8> @llvm.loongarch.lsx.vsub.b(<16 x i8>, <16 x i8>) diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-subi-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-subi-invalid-imm.ll index 96cc1241fbf3..fc1c9e110876 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-subi-invalid-imm.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-subi-invalid-imm.ll @@ -1,3 +1,4 @@ +; RUN: not llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s 2>&1 | FileCheck %s ; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s declare <16 x i8> @llvm.loongarch.lsx.vsubi.bu(<16 x i8>, i32) diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-subi-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-subi-non-imm.ll index 162f9ad131c7..68b59ca9c02f 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-subi-non-imm.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-subi-non-imm.ll @@ -1,3 +1,4 @@ +; RUN: not llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s 2>&1 | FileCheck %s ; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s declare <16 x i8> @llvm.loongarch.lsx.vsubi.bu(<16 x i8>, i32) diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-subi.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-subi.ll index 304a4e4a78cc..46642171aaa1 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-subi.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-subi.ll @@ -1,4 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s | FileCheck %s ; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s declare <16 x i8> @llvm.loongarch.lsx.vsubi.bu(<16 x i8>, i32) diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-subw.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-subw.ll index 48100db74334..f41a3a1b99fd 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-subw.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-subw.ll @@ -1,4 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s | FileCheck %s ; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s declare <8 x i16> @llvm.loongarch.lsx.vsubwev.h.b(<16 x i8>, <16 x i8>) diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-xor.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-xor.ll index 72a1fe93c2c0..5a49605f5fa5 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-xor.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-xor.ll @@ -1,4 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s | FileCheck %s ; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s declare <16 x i8> @llvm.loongarch.lsx.vxor.v(<16 x i8>, <16 x i8>) diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-xori-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-xori-invalid-imm.ll index 5f5613189ac8..a3e3fa9dfa03 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-xori-invalid-imm.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-xori-invalid-imm.ll @@ -1,3 +1,4 @@ +; RUN: not llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s 2>&1 | FileCheck %s ; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s declare <16 x i8> @llvm.loongarch.lsx.vxori.b(<16 x i8>, i32) diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-xori-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-xori-non-imm.ll index 4238d89120f1..d0a6df6bc78a 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-xori-non-imm.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-xori-non-imm.ll @@ -1,3 +1,4 @@ +; RUN: not llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s 2>&1 | FileCheck %s ; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s declare <16 x i8> @llvm.loongarch.lsx.vxori.b(<16 x i8>, i32) diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-xori.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-xori.ll index 09669cd5ac14..7241bfbfcedd 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-xori.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-xori.ll @@ -1,4 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s | FileCheck %s ; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s declare <16 x i8> @llvm.loongarch.lsx.vxori.b(<16 x i8>, i32) diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/absd.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/absd.ll index f77a31b60076..6546719647f3 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/absd.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/absd.ll @@ -1,4 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s | FileCheck %s ; RUN: llc -mtriple=loongarch64 -mattr=+lsx < %s | FileCheck %s ;; 1. trunc(abs(sub(sext(a),sext(b)))) -> abds(a,b) or abdu(a,b) diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/add.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/add.ll index 485bd1df8d66..41164b635983 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/add.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/add.ll @@ -1,4 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s | FileCheck %s ; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s define void @add_v16i8(ptr %res, ptr %a0, ptr %a1) nounwind { diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/and.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/and.ll index d3e4efb1b1c2..2d6f0bea0712 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/and.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/and.ll @@ -1,4 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3 +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s | FileCheck %s ; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s define void @and_v16i8(ptr %res, ptr %a0, ptr %a1) nounwind { diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/ashr.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/ashr.ll index 2a3107447098..3a099cb18d37 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/ashr.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/ashr.ll @@ -1,4 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s | FileCheck %s ; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s define void @ashr_v16i8(ptr %res, ptr %a0, ptr %a1) nounwind { diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/bitcast-extract-element.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/bitcast-extract-element.ll index 9a40feb45671..514a5527e829 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/bitcast-extract-element.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/bitcast-extract-element.ll @@ -1,11 +1,18 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 -; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s | FileCheck %s --check-prefix=LA32 +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s --check-prefix=LA64 define i32 @bitcast_extract_v4f32(<4 x float> %a) nounwind { -; CHECK-LABEL: bitcast_extract_v4f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vpickve2gr.w $a0, $vr0, 3 -; CHECK-NEXT: ret +; LA32-LABEL: bitcast_extract_v4f32: +; LA32: # %bb.0: # %entry +; LA32-NEXT: vreplvei.w $vr0, $vr0, 3 +; LA32-NEXT: movfr2gr.s $a0, $fa0 +; LA32-NEXT: ret +; +; LA64-LABEL: bitcast_extract_v4f32: +; LA64: # %bb.0: # %entry +; LA64-NEXT: vpickve2gr.w $a0, $vr0, 3 +; LA64-NEXT: ret entry: %b = extractelement <4 x float> %a, i32 3 %c = bitcast float %b to i32 @@ -13,10 +20,17 @@ entry: } define i64 @bitcast_extract_v2f64(<2 x double> %a) nounwind { -; CHECK-LABEL: bitcast_extract_v2f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vpickve2gr.d $a0, $vr0, 1 -; CHECK-NEXT: ret +; LA32-LABEL: bitcast_extract_v2f64: +; LA32: # %bb.0: # %entry +; LA32-NEXT: vreplvei.d $vr0, $vr0, 1 +; LA32-NEXT: movfr2gr.s $a0, $fa0 +; LA32-NEXT: movfrh2gr.s $a1, $fa0 +; LA32-NEXT: ret +; +; LA64-LABEL: bitcast_extract_v2f64: +; LA64: # %bb.0: # %entry +; LA64-NEXT: vpickve2gr.d $a0, $vr0, 1 +; LA64-NEXT: ret entry: %b = extractelement <2 x double> %a, i32 1 %c = bitcast double %b to i64 diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/fadd.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/fadd.ll index 989ad10a44ff..c0f010a93662 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/fadd.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/fadd.ll @@ -1,4 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s | FileCheck %s ; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s define void @fadd_v4f32(ptr %res, ptr %a0, ptr %a1) nounwind { diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/fcmp.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/fcmp.ll index 95e46a4e71da..9a1498f55582 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/fcmp.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/fcmp.ll @@ -1,4 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s | FileCheck %s ; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s ;; TREU diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/fdiv.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/fdiv.ll index 3b9642e31b02..603bd21ab9af 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/fdiv.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/fdiv.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 -; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s | FileCheck %s --check-prefixes=CHECK,LA32 +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s --check-prefixes=CHECK,LA64 define void @fdiv_v4f32(ptr %res, ptr %a0, ptr %a1) nounwind { ; CHECK-LABEL: fdiv_v4f32: @@ -49,12 +50,21 @@ entry: } define void @one_fdiv_v2f64(ptr %res, ptr %a0) nounwind { -; CHECK-LABEL: one_fdiv_v2f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vld $vr0, $a1, 0 -; CHECK-NEXT: vfrecip.d $vr0, $vr0 -; CHECK-NEXT: vst $vr0, $a0, 0 -; CHECK-NEXT: ret +; LA32-LABEL: one_fdiv_v2f64: +; LA32: # %bb.0: # %entry +; LA32-NEXT: vld $vr0, $a1, 0 +; LA32-NEXT: pcalau12i $a1, %pc_hi20(.LCPI3_0) +; LA32-NEXT: vld $vr1, $a1, %pc_lo12(.LCPI3_0) +; LA32-NEXT: vfdiv.d $vr0, $vr1, $vr0 +; LA32-NEXT: vst $vr0, $a0, 0 +; LA32-NEXT: ret +; +; LA64-LABEL: one_fdiv_v2f64: +; LA64: # %bb.0: # %entry +; LA64-NEXT: vld $vr0, $a1, 0 +; LA64-NEXT: vfrecip.d $vr0, $vr0 +; LA64-NEXT: vst $vr0, $a0, 0 +; LA64-NEXT: ret entry: %v0 = load <2 x double>, ptr %a0 %div = fdiv <2 x double> <double 1.0, double 1.0>, %v0 diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/fmul.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/fmul.ll index f604a8962958..faf461c67781 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/fmul.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/fmul.ll @@ -1,4 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s | FileCheck %s ; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s define void @fmul_v4f32(ptr %res, ptr %a0, ptr %a1) nounwind { diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/fneg.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/fneg.ll index 795c1ac8b368..007634d28d17 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/fneg.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/fneg.ll @@ -1,4 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s | FileCheck %s ; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s define void @fneg_v4f32(ptr %res, ptr %a0) nounwind { diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/fptosi.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/fptosi.ll index c3008fe96e47..7ea6d7431670 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/fptosi.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/fptosi.ll @@ -1,4 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3 +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s | FileCheck %s ; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s define void @fptosi_v4f32_v4i32(ptr %res, ptr %in){ diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/fptoui.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/fptoui.ll index f0aeb0bd14e7..ec3a86713ed2 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/fptoui.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/fptoui.ll @@ -1,4 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3 +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s | FileCheck %s ; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s define void @fptoui_v4f32_v4i32(ptr %res, ptr %in){ diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/fsub.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/fsub.ll index 02350c0763ba..f7fe458f353f 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/fsub.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/fsub.ll @@ -1,4 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s | FileCheck %s ; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s define void @fsub_v4f32(ptr %res, ptr %a0, ptr %a1) nounwind { diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/icmp.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/icmp.ll index 04b4831f1188..7166469bf5ce 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/icmp.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/icmp.ll @@ -1,4 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s | FileCheck %s ; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s ;; SETEQ diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/insert-bitcast-element.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/insert-bitcast-element.ll index c42e3013c113..e7e0a89a8958 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/insert-bitcast-element.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/insert-bitcast-element.ll @@ -1,11 +1,18 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 -; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s | FileCheck %s --check-prefix=LA32 +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s --check-prefix=LA64 define <4 x float> @insert_bitcast_v4f32(<4 x float> %a, i32 %b) nounwind { -; CHECK-LABEL: insert_bitcast_v4f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vinsgr2vr.w $vr0, $a0, 1 -; CHECK-NEXT: ret +; LA32-LABEL: insert_bitcast_v4f32: +; LA32: # %bb.0: # %entry +; LA32-NEXT: movgr2fr.w $fa1, $a0 +; LA32-NEXT: vextrins.w $vr0, $vr1, 16 +; LA32-NEXT: ret +; +; LA64-LABEL: insert_bitcast_v4f32: +; LA64: # %bb.0: # %entry +; LA64-NEXT: vinsgr2vr.w $vr0, $a0, 1 +; LA64-NEXT: ret entry: %c = bitcast i32 %b to float %d = insertelement <4 x float> %a, float %c, i32 1 @@ -13,10 +20,17 @@ entry: } define <2 x double> @insert_bitcast_v2f64(<2 x double> %a, i64 %b) nounwind { -; CHECK-LABEL: insert_bitcast_v2f64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vinsgr2vr.d $vr0, $a0, 1 -; CHECK-NEXT: ret +; LA32-LABEL: insert_bitcast_v2f64: +; LA32: # %bb.0: # %entry +; LA32-NEXT: movgr2fr.w $fa1, $a0 +; LA32-NEXT: movgr2frh.w $fa1, $a1 +; LA32-NEXT: vextrins.d $vr0, $vr1, 16 +; LA32-NEXT: ret +; +; LA64-LABEL: insert_bitcast_v2f64: +; LA64: # %bb.0: # %entry +; LA64-NEXT: vinsgr2vr.d $vr0, $a0, 1 +; LA64-NEXT: ret entry: %c = bitcast i64 %b to double %d = insertelement <2 x double> %a, double %c, i32 1 diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/insert-extract-element.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/insert-extract-element.ll index e9a0c8a11045..65aff8071897 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/insert-extract-element.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/insert-extract-element.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 -; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s | FileCheck %s --check-prefixes=CHECK,LA32 +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s --check-prefixes=CHECK,LA64 define <16 x i8> @insert_extract_v16i8(<16 x i8> %a) nounwind { ; CHECK-LABEL: insert_extract_v16i8: @@ -46,10 +47,18 @@ entry: } define <2 x i64> @insert_extract_v2i64(<2 x i64> %a) nounwind { -; CHECK-LABEL: insert_extract_v2i64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vextrins.d $vr0, $vr0, 1 -; CHECK-NEXT: ret +; LA32-LABEL: insert_extract_v2i64: +; LA32: # %bb.0: # %entry +; LA32-NEXT: vori.b $vr1, $vr0, 0 +; LA32-NEXT: vextrins.w $vr1, $vr0, 2 +; LA32-NEXT: vextrins.w $vr1, $vr0, 19 +; LA32-NEXT: vori.b $vr0, $vr1, 0 +; LA32-NEXT: ret +; +; LA64-LABEL: insert_extract_v2i64: +; LA64: # %bb.0: # %entry +; LA64-NEXT: vextrins.d $vr0, $vr0, 1 +; LA64-NEXT: ret entry: %b = extractelement <2 x i64> %a, i32 1 %c = insertelement <2 x i64> %a, i64 %b, i32 0 diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/lshr.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/lshr.ll index 2693310b4f50..584b202691c7 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/lshr.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/lshr.ll @@ -1,4 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s | FileCheck %s ; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s define void @lshr_v16i8(ptr %res, ptr %a0, ptr %a1) nounwind { diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/mul.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/mul.ll index f66cae6a1802..efe85fb13e69 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/mul.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/mul.ll @@ -1,4 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s | FileCheck %s ; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s define void @mul_v16i8(ptr %res, ptr %a0, ptr %a1) nounwind { diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/or.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/or.ll index 89702e60c01f..94a129438457 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/or.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/or.ll @@ -1,4 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3 +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s | FileCheck %s ; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s define void @or_v16i8(ptr %res, ptr %a0, ptr %a1) nounwind { diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/sdiv.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/sdiv.ll index cdff58defdae..c5dfe231b686 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/sdiv.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/sdiv.ll @@ -1,4 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s | FileCheck %s ; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s define void @sdiv_v16i8(ptr %res, ptr %a0, ptr %a1) nounwind { diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/shl.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/shl.ll index 4b34c04f3374..1d8ed9ec7e90 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/shl.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/shl.ll @@ -1,4 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s | FileCheck %s ; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s define void @shl_v16i8(ptr %res, ptr %a0, ptr %a1) nounwind { diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/shuffle-as-vilv.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/shuffle-as-vilv.ll index 31398c6081c0..076395e56e81 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/shuffle-as-vilv.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/shuffle-as-vilv.ll @@ -1,4 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s | FileCheck %s ; RUN: llc --mtriple=loongarch64 --mattr=+lsx %s -o - | FileCheck %s ;; vilvl.b diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/shuffle-as-vpack.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/shuffle-as-vpack.ll index 171e68306cd1..4034773a8a1f 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/shuffle-as-vpack.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/shuffle-as-vpack.ll @@ -1,4 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s | FileCheck %s ; RUN: llc --mtriple=loongarch64 --mattr=+lsx %s -o - | FileCheck %s ;; vpackev.b diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/shuffle-as-vpick.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/shuffle-as-vpick.ll index ca636d942b58..c6d6019517be 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/shuffle-as-vpick.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/shuffle-as-vpick.ll @@ -1,4 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s | FileCheck %s ; RUN: llc --mtriple=loongarch64 --mattr=+lsx %s -o - | FileCheck %s ;; vpickev.b diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/sitofp.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/sitofp.ll index 1e820a37a240..37eb9e7e8dc4 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/sitofp.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/sitofp.ll @@ -1,4 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3 +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s | FileCheck %s ; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s define void @sitofp_v4i32_v4f32(ptr %res, ptr %in){ diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/sub.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/sub.ll index 2813d9c97e68..ab135faa6ee3 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/sub.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/sub.ll @@ -1,4 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s | FileCheck %s ; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s define void @sub_v16i8(ptr %res, ptr %a0, ptr %a1) nounwind { diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/udiv.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/udiv.ll index 32dac67d36a8..65a4075cf359 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/udiv.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/udiv.ll @@ -1,4 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s | FileCheck %s ; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s define void @udiv_v16i8(ptr %res, ptr %a0, ptr %a1) nounwind { diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/uitofp.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/uitofp.ll index 3d4913f12e57..3ae1119435ef 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/uitofp.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/uitofp.ll @@ -1,4 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3 +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s | FileCheck %s ; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s define void @uitofp_v4i32_v4f32(ptr %res, ptr %in){ diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/xor.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/xor.ll index 482cecb1d752..fd63b2122a8b 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/xor.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/xor.ll @@ -1,4 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3 +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s | FileCheck %s ; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s define void @xor_v16i8(ptr %res, ptr %a0, ptr %a1) nounwind { diff --git a/llvm/test/CodeGen/LoongArch/lsx/mulh.ll b/llvm/test/CodeGen/LoongArch/lsx/mulh.ll index b0ca556eeff3..687b3040f5e7 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/mulh.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/mulh.ll @@ -1,4 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s | FileCheck %s ; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s define void @mulhs_v16i8(ptr %res, ptr %a0, ptr %a1) nounwind { diff --git a/llvm/test/CodeGen/LoongArch/lsx/pr116008.ll b/llvm/test/CodeGen/LoongArch/lsx/pr116008.ll index ba8ffc349318..f359b44cec4a 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/pr116008.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/pr116008.ll @@ -1,4 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s | FileCheck %s ; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s define <4 x i32> @xor_shl_splat_vec_one(i32 %x, <4 x i32> %y) nounwind { diff --git a/llvm/test/CodeGen/LoongArch/lsx/scalar-to-vector.ll b/llvm/test/CodeGen/LoongArch/lsx/scalar-to-vector.ll index 87b68ac59172..d2a506dd9854 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/scalar-to-vector.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/scalar-to-vector.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 -; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s | FileCheck %s --check-prefixes=CHECK,LA32 +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s --check-prefixes=CHECK,LA64 ;; Test scalar_to_vector expansion. @@ -31,10 +32,16 @@ define <4 x i32> @scalar_to_4xi32(i32 %val) { } define <2 x i64> @scalar_to_2xi64(i64 %val) { -; CHECK-LABEL: scalar_to_2xi64: -; CHECK: # %bb.0: -; CHECK-NEXT: vinsgr2vr.d $vr0, $a0, 0 -; CHECK-NEXT: ret +; LA32-LABEL: scalar_to_2xi64: +; LA32: # %bb.0: +; LA32-NEXT: vinsgr2vr.w $vr0, $a0, 0 +; LA32-NEXT: vinsgr2vr.w $vr0, $a1, 1 +; LA32-NEXT: ret +; +; LA64-LABEL: scalar_to_2xi64: +; LA64: # %bb.0: +; LA64-NEXT: vinsgr2vr.d $vr0, $a0, 0 +; LA64-NEXT: ret %ret = insertelement <2 x i64> poison, i64 %val, i32 0 ret <2 x i64> %ret } diff --git a/llvm/test/CodeGen/LoongArch/lsx/vec-reduce-add.ll b/llvm/test/CodeGen/LoongArch/lsx/vec-reduce-add.ll index a71bdea917cb..57fd09ed2e09 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/vec-reduce-add.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/vec-reduce-add.ll @@ -5,15 +5,12 @@ define void @vec_reduce_add_v16i8(ptr %src, ptr %dst) nounwind { ; CHECK-LABEL: vec_reduce_add_v16i8: ; CHECK: # %bb.0: ; CHECK-NEXT: vld $vr0, $a0, 0 -; CHECK-NEXT: vbsrl.v $vr1, $vr0, 8 -; CHECK-NEXT: vadd.b $vr0, $vr0, $vr1 -; CHECK-NEXT: vsrli.d $vr1, $vr0, 32 -; CHECK-NEXT: vadd.b $vr0, $vr0, $vr1 -; CHECK-NEXT: vshuf4i.b $vr1, $vr0, 14 -; CHECK-NEXT: vadd.b $vr0, $vr0, $vr1 -; CHECK-NEXT: vreplvei.b $vr1, $vr0, 1 -; CHECK-NEXT: vadd.b $vr0, $vr0, $vr1 -; CHECK-NEXT: vstelm.b $vr0, $a1, 0, 0 +; CHECK-NEXT: vhaddw.h.b $vr0, $vr0, $vr0 +; CHECK-NEXT: vhaddw.w.h $vr0, $vr0, $vr0 +; CHECK-NEXT: vhaddw.d.w $vr0, $vr0, $vr0 +; CHECK-NEXT: vhaddw.q.d $vr0, $vr0, $vr0 +; CHECK-NEXT: vpickve2gr.d $a0, $vr0, 0 +; CHECK-NEXT: st.b $a0, $a1, 0 ; CHECK-NEXT: ret %v = load <16 x i8>, ptr %src %res = call i8 @llvm.vector.reduce.add.v16i8(<16 x i8> %v) @@ -21,17 +18,62 @@ define void @vec_reduce_add_v16i8(ptr %src, ptr %dst) nounwind { ret void } +define void @vec_reduce_add_v8i8(ptr %src, ptr %dst) nounwind { +; CHECK-LABEL: vec_reduce_add_v8i8: +; CHECK: # %bb.0: +; CHECK-NEXT: ld.d $a0, $a0, 0 +; CHECK-NEXT: vinsgr2vr.d $vr0, $a0, 0 +; CHECK-NEXT: vhaddw.h.b $vr0, $vr0, $vr0 +; CHECK-NEXT: vhaddw.w.h $vr0, $vr0, $vr0 +; CHECK-NEXT: vhaddw.d.w $vr0, $vr0, $vr0 +; CHECK-NEXT: vpickve2gr.w $a0, $vr0, 0 +; CHECK-NEXT: st.b $a0, $a1, 0 +; CHECK-NEXT: ret + %v = load <8 x i8>, ptr %src + %res = call i8 @llvm.vector.reduce.add.v8i8(<8 x i8> %v) + store i8 %res, ptr %dst + ret void +} + +define void @vec_reduce_add_v4i8(ptr %src, ptr %dst) nounwind { +; CHECK-LABEL: vec_reduce_add_v4i8: +; CHECK: # %bb.0: +; CHECK-NEXT: ld.w $a0, $a0, 0 +; CHECK-NEXT: vinsgr2vr.w $vr0, $a0, 0 +; CHECK-NEXT: vhaddw.h.b $vr0, $vr0, $vr0 +; CHECK-NEXT: vhaddw.w.h $vr0, $vr0, $vr0 +; CHECK-NEXT: vpickve2gr.h $a0, $vr0, 0 +; CHECK-NEXT: st.b $a0, $a1, 0 +; CHECK-NEXT: ret + %v = load <4 x i8>, ptr %src + %res = call i8 @llvm.vector.reduce.add.v4i8(<4 x i8> %v) + store i8 %res, ptr %dst + ret void +} + +define void @vec_reduce_add_v2i8(ptr %src, ptr %dst) nounwind { +; CHECK-LABEL: vec_reduce_add_v2i8: +; CHECK: # %bb.0: +; CHECK-NEXT: ld.h $a0, $a0, 0 +; CHECK-NEXT: vinsgr2vr.h $vr0, $a0, 0 +; CHECK-NEXT: vhaddw.h.b $vr0, $vr0, $vr0 +; CHECK-NEXT: vstelm.b $vr0, $a1, 0, 0 +; CHECK-NEXT: ret + %v = load <2 x i8>, ptr %src + %res = call i8 @llvm.vector.reduce.add.v2i8(<2 x i8> %v) + store i8 %res, ptr %dst + ret void +} + define void @vec_reduce_add_v8i16(ptr %src, ptr %dst) nounwind { ; CHECK-LABEL: vec_reduce_add_v8i16: ; CHECK: # %bb.0: ; CHECK-NEXT: vld $vr0, $a0, 0 -; CHECK-NEXT: vbsrl.v $vr1, $vr0, 8 -; CHECK-NEXT: vadd.h $vr0, $vr0, $vr1 -; CHECK-NEXT: vshuf4i.h $vr1, $vr0, 14 -; CHECK-NEXT: vadd.h $vr0, $vr0, $vr1 -; CHECK-NEXT: vreplvei.h $vr1, $vr0, 1 -; CHECK-NEXT: vadd.h $vr0, $vr0, $vr1 -; CHECK-NEXT: vstelm.h $vr0, $a1, 0, 0 +; CHECK-NEXT: vhaddw.w.h $vr0, $vr0, $vr0 +; CHECK-NEXT: vhaddw.d.w $vr0, $vr0, $vr0 +; CHECK-NEXT: vhaddw.q.d $vr0, $vr0, $vr0 +; CHECK-NEXT: vpickve2gr.d $a0, $vr0, 0 +; CHECK-NEXT: st.h $a0, $a1, 0 ; CHECK-NEXT: ret %v = load <8 x i16>, ptr %src %res = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> %v) @@ -39,15 +81,44 @@ define void @vec_reduce_add_v8i16(ptr %src, ptr %dst) nounwind { ret void } +define void @vec_reduce_add_v4i16(ptr %src, ptr %dst) nounwind { +; CHECK-LABEL: vec_reduce_add_v4i16: +; CHECK: # %bb.0: +; CHECK-NEXT: ld.d $a0, $a0, 0 +; CHECK-NEXT: vinsgr2vr.d $vr0, $a0, 0 +; CHECK-NEXT: vhaddw.w.h $vr0, $vr0, $vr0 +; CHECK-NEXT: vhaddw.d.w $vr0, $vr0, $vr0 +; CHECK-NEXT: vpickve2gr.w $a0, $vr0, 0 +; CHECK-NEXT: st.h $a0, $a1, 0 +; CHECK-NEXT: ret + %v = load <4 x i16>, ptr %src + %res = call i16 @llvm.vector.reduce.add.v4i16(<4 x i16> %v) + store i16 %res, ptr %dst + ret void +} + +define void @vec_reduce_add_v2i16(ptr %src, ptr %dst) nounwind { +; CHECK-LABEL: vec_reduce_add_v2i16: +; CHECK: # %bb.0: +; CHECK-NEXT: ld.w $a0, $a0, 0 +; CHECK-NEXT: vinsgr2vr.w $vr0, $a0, 0 +; CHECK-NEXT: vhaddw.w.h $vr0, $vr0, $vr0 +; CHECK-NEXT: vstelm.h $vr0, $a1, 0, 0 +; CHECK-NEXT: ret + %v = load <2 x i16>, ptr %src + %res = call i16 @llvm.vector.reduce.add.v2i16(<2 x i16> %v) + store i16 %res, ptr %dst + ret void +} + define void @vec_reduce_add_v4i32(ptr %src, ptr %dst) nounwind { ; CHECK-LABEL: vec_reduce_add_v4i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vld $vr0, $a0, 0 -; CHECK-NEXT: vshuf4i.w $vr1, $vr0, 14 -; CHECK-NEXT: vadd.w $vr0, $vr0, $vr1 -; CHECK-NEXT: vreplvei.w $vr1, $vr0, 1 -; CHECK-NEXT: vadd.w $vr0, $vr0, $vr1 -; CHECK-NEXT: vstelm.w $vr0, $a1, 0, 0 +; CHECK-NEXT: vhaddw.d.w $vr0, $vr0, $vr0 +; CHECK-NEXT: vhaddw.q.d $vr0, $vr0, $vr0 +; CHECK-NEXT: vpickve2gr.d $a0, $vr0, 0 +; CHECK-NEXT: st.w $a0, $a1, 0 ; CHECK-NEXT: ret %v = load <4 x i32>, ptr %src %res = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %v) @@ -55,12 +126,25 @@ define void @vec_reduce_add_v4i32(ptr %src, ptr %dst) nounwind { ret void } +define void @vec_reduce_add_v2i32(ptr %src, ptr %dst) nounwind { +; CHECK-LABEL: vec_reduce_add_v2i32: +; CHECK: # %bb.0: +; CHECK-NEXT: ld.d $a0, $a0, 0 +; CHECK-NEXT: vinsgr2vr.d $vr0, $a0, 0 +; CHECK-NEXT: vhaddw.d.w $vr0, $vr0, $vr0 +; CHECK-NEXT: vstelm.w $vr0, $a1, 0, 0 +; CHECK-NEXT: ret + %v = load <2 x i32>, ptr %src + %res = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> %v) + store i32 %res, ptr %dst + ret void +} + define void @vec_reduce_add_v2i64(ptr %src, ptr %dst) nounwind { ; CHECK-LABEL: vec_reduce_add_v2i64: ; CHECK: # %bb.0: ; CHECK-NEXT: vld $vr0, $a0, 0 -; CHECK-NEXT: vreplvei.d $vr1, $vr0, 1 -; CHECK-NEXT: vadd.d $vr0, $vr0, $vr1 +; CHECK-NEXT: vhaddw.q.d $vr0, $vr0, $vr0 ; CHECK-NEXT: vstelm.d $vr0, $a1, 0, 0 ; CHECK-NEXT: ret %v = load <2 x i64>, ptr %src diff --git a/llvm/test/CodeGen/LoongArch/lsx/vec-reduce-and.ll b/llvm/test/CodeGen/LoongArch/lsx/vec-reduce-and.ll index c16de1023964..cca4ce30758f 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/vec-reduce-and.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/vec-reduce-and.ll @@ -6,13 +6,13 @@ define void @vec_reduce_and_v16i8(ptr %src, ptr %dst) nounwind { ; CHECK: # %bb.0: ; CHECK-NEXT: vld $vr0, $a0, 0 ; CHECK-NEXT: vbsrl.v $vr1, $vr0, 8 -; CHECK-NEXT: vand.v $vr0, $vr0, $vr1 -; CHECK-NEXT: vsrli.d $vr1, $vr0, 32 -; CHECK-NEXT: vand.v $vr0, $vr0, $vr1 -; CHECK-NEXT: vshuf4i.b $vr1, $vr0, 14 -; CHECK-NEXT: vand.v $vr0, $vr0, $vr1 -; CHECK-NEXT: vreplvei.b $vr1, $vr0, 1 -; CHECK-NEXT: vand.v $vr0, $vr0, $vr1 +; CHECK-NEXT: vand.v $vr0, $vr1, $vr0 +; CHECK-NEXT: vbsrl.v $vr1, $vr0, 4 +; CHECK-NEXT: vand.v $vr0, $vr1, $vr0 +; CHECK-NEXT: vbsrl.v $vr1, $vr0, 2 +; CHECK-NEXT: vand.v $vr0, $vr1, $vr0 +; CHECK-NEXT: vbsrl.v $vr1, $vr0, 1 +; CHECK-NEXT: vand.v $vr0, $vr1, $vr0 ; CHECK-NEXT: vstelm.b $vr0, $a1, 0, 0 ; CHECK-NEXT: ret %v = load <16 x i8>, ptr %src @@ -26,12 +26,12 @@ define void @vec_reduce_and_v8i8(ptr %src, ptr %dst) nounwind { ; CHECK: # %bb.0: ; CHECK-NEXT: ld.d $a0, $a0, 0 ; CHECK-NEXT: vinsgr2vr.d $vr0, $a0, 0 -; CHECK-NEXT: vsrli.d $vr1, $vr0, 32 -; CHECK-NEXT: vand.v $vr0, $vr0, $vr1 -; CHECK-NEXT: vshuf4i.b $vr1, $vr0, 14 -; CHECK-NEXT: vand.v $vr0, $vr0, $vr1 -; CHECK-NEXT: vreplvei.b $vr1, $vr0, 1 -; CHECK-NEXT: vand.v $vr0, $vr0, $vr1 +; CHECK-NEXT: vbsrl.v $vr1, $vr0, 4 +; CHECK-NEXT: vand.v $vr0, $vr1, $vr0 +; CHECK-NEXT: vbsrl.v $vr1, $vr0, 2 +; CHECK-NEXT: vand.v $vr0, $vr1, $vr0 +; CHECK-NEXT: vbsrl.v $vr1, $vr0, 1 +; CHECK-NEXT: vand.v $vr0, $vr1, $vr0 ; CHECK-NEXT: vstelm.b $vr0, $a1, 0, 0 ; CHECK-NEXT: ret %v = load <8 x i8>, ptr %src @@ -45,10 +45,10 @@ define void @vec_reduce_and_v4i8(ptr %src, ptr %dst) nounwind { ; CHECK: # %bb.0: ; CHECK-NEXT: ld.w $a0, $a0, 0 ; CHECK-NEXT: vinsgr2vr.w $vr0, $a0, 0 -; CHECK-NEXT: vshuf4i.b $vr1, $vr0, 14 -; CHECK-NEXT: vand.v $vr0, $vr0, $vr1 -; CHECK-NEXT: vreplvei.b $vr1, $vr0, 1 -; CHECK-NEXT: vand.v $vr0, $vr0, $vr1 +; CHECK-NEXT: vbsrl.v $vr1, $vr0, 2 +; CHECK-NEXT: vand.v $vr0, $vr1, $vr0 +; CHECK-NEXT: vbsrl.v $vr1, $vr0, 1 +; CHECK-NEXT: vand.v $vr0, $vr1, $vr0 ; CHECK-NEXT: vstelm.b $vr0, $a1, 0, 0 ; CHECK-NEXT: ret %v = load <4 x i8>, ptr %src @@ -62,8 +62,8 @@ define void @vec_reduce_and_v2i8(ptr %src, ptr %dst) nounwind { ; CHECK: # %bb.0: ; CHECK-NEXT: ld.h $a0, $a0, 0 ; CHECK-NEXT: vinsgr2vr.h $vr0, $a0, 0 -; CHECK-NEXT: vreplvei.b $vr1, $vr0, 1 -; CHECK-NEXT: vand.v $vr0, $vr0, $vr1 +; CHECK-NEXT: vbsrl.v $vr1, $vr0, 1 +; CHECK-NEXT: vand.v $vr0, $vr1, $vr0 ; CHECK-NEXT: vstelm.b $vr0, $a1, 0, 0 ; CHECK-NEXT: ret %v = load <2 x i8>, ptr %src @@ -77,11 +77,11 @@ define void @vec_reduce_and_v8i16(ptr %src, ptr %dst) nounwind { ; CHECK: # %bb.0: ; CHECK-NEXT: vld $vr0, $a0, 0 ; CHECK-NEXT: vbsrl.v $vr1, $vr0, 8 -; CHECK-NEXT: vand.v $vr0, $vr0, $vr1 -; CHECK-NEXT: vshuf4i.h $vr1, $vr0, 14 -; CHECK-NEXT: vand.v $vr0, $vr0, $vr1 -; CHECK-NEXT: vreplvei.h $vr1, $vr0, 1 -; CHECK-NEXT: vand.v $vr0, $vr0, $vr1 +; CHECK-NEXT: vand.v $vr0, $vr1, $vr0 +; CHECK-NEXT: vbsrl.v $vr1, $vr0, 4 +; CHECK-NEXT: vand.v $vr0, $vr1, $vr0 +; CHECK-NEXT: vbsrl.v $vr1, $vr0, 2 +; CHECK-NEXT: vand.v $vr0, $vr1, $vr0 ; CHECK-NEXT: vstelm.h $vr0, $a1, 0, 0 ; CHECK-NEXT: ret %v = load <8 x i16>, ptr %src @@ -95,10 +95,10 @@ define void @vec_reduce_and_v4i16(ptr %src, ptr %dst) nounwind { ; CHECK: # %bb.0: ; CHECK-NEXT: ld.d $a0, $a0, 0 ; CHECK-NEXT: vinsgr2vr.d $vr0, $a0, 0 -; CHECK-NEXT: vshuf4i.h $vr1, $vr0, 14 -; CHECK-NEXT: vand.v $vr0, $vr0, $vr1 -; CHECK-NEXT: vreplvei.h $vr1, $vr0, 1 -; CHECK-NEXT: vand.v $vr0, $vr0, $vr1 +; CHECK-NEXT: vbsrl.v $vr1, $vr0, 4 +; CHECK-NEXT: vand.v $vr0, $vr1, $vr0 +; CHECK-NEXT: vbsrl.v $vr1, $vr0, 2 +; CHECK-NEXT: vand.v $vr0, $vr1, $vr0 ; CHECK-NEXT: vstelm.h $vr0, $a1, 0, 0 ; CHECK-NEXT: ret %v = load <4 x i16>, ptr %src @@ -112,8 +112,8 @@ define void @vec_reduce_and_v2i16(ptr %src, ptr %dst) nounwind { ; CHECK: # %bb.0: ; CHECK-NEXT: ld.w $a0, $a0, 0 ; CHECK-NEXT: vinsgr2vr.w $vr0, $a0, 0 -; CHECK-NEXT: vreplvei.h $vr1, $vr0, 1 -; CHECK-NEXT: vand.v $vr0, $vr0, $vr1 +; CHECK-NEXT: vbsrl.v $vr1, $vr0, 2 +; CHECK-NEXT: vand.v $vr0, $vr1, $vr0 ; CHECK-NEXT: vstelm.h $vr0, $a1, 0, 0 ; CHECK-NEXT: ret %v = load <2 x i16>, ptr %src @@ -126,10 +126,10 @@ define void @vec_reduce_and_v4i32(ptr %src, ptr %dst) nounwind { ; CHECK-LABEL: vec_reduce_and_v4i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vld $vr0, $a0, 0 -; CHECK-NEXT: vshuf4i.w $vr1, $vr0, 14 -; CHECK-NEXT: vand.v $vr0, $vr0, $vr1 -; CHECK-NEXT: vreplvei.w $vr1, $vr0, 1 -; CHECK-NEXT: vand.v $vr0, $vr0, $vr1 +; CHECK-NEXT: vbsrl.v $vr1, $vr0, 8 +; CHECK-NEXT: vand.v $vr0, $vr1, $vr0 +; CHECK-NEXT: vbsrl.v $vr1, $vr0, 4 +; CHECK-NEXT: vand.v $vr0, $vr1, $vr0 ; CHECK-NEXT: vstelm.w $vr0, $a1, 0, 0 ; CHECK-NEXT: ret %v = load <4 x i32>, ptr %src @@ -143,8 +143,8 @@ define void @vec_reduce_and_v2i32(ptr %src, ptr %dst) nounwind { ; CHECK: # %bb.0: ; CHECK-NEXT: ld.d $a0, $a0, 0 ; CHECK-NEXT: vinsgr2vr.d $vr0, $a0, 0 -; CHECK-NEXT: vreplvei.w $vr1, $vr0, 1 -; CHECK-NEXT: vand.v $vr0, $vr0, $vr1 +; CHECK-NEXT: vbsrl.v $vr1, $vr0, 4 +; CHECK-NEXT: vand.v $vr0, $vr1, $vr0 ; CHECK-NEXT: vstelm.w $vr0, $a1, 0, 0 ; CHECK-NEXT: ret %v = load <2 x i32>, ptr %src @@ -157,8 +157,8 @@ define void @vec_reduce_and_v2i64(ptr %src, ptr %dst) nounwind { ; CHECK-LABEL: vec_reduce_and_v2i64: ; CHECK: # %bb.0: ; CHECK-NEXT: vld $vr0, $a0, 0 -; CHECK-NEXT: vreplvei.d $vr1, $vr0, 1 -; CHECK-NEXT: vand.v $vr0, $vr0, $vr1 +; CHECK-NEXT: vbsrl.v $vr1, $vr0, 8 +; CHECK-NEXT: vand.v $vr0, $vr1, $vr0 ; CHECK-NEXT: vstelm.d $vr0, $a1, 0, 0 ; CHECK-NEXT: ret %v = load <2 x i64>, ptr %src diff --git a/llvm/test/CodeGen/LoongArch/lsx/vec-reduce-or.ll b/llvm/test/CodeGen/LoongArch/lsx/vec-reduce-or.ll index 52f18cce611d..ce431f0cf6a7 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/vec-reduce-or.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/vec-reduce-or.ll @@ -6,13 +6,13 @@ define void @vec_reduce_or_v16i8(ptr %src, ptr %dst) nounwind { ; CHECK: # %bb.0: ; CHECK-NEXT: vld $vr0, $a0, 0 ; CHECK-NEXT: vbsrl.v $vr1, $vr0, 8 -; CHECK-NEXT: vor.v $vr0, $vr0, $vr1 -; CHECK-NEXT: vsrli.d $vr1, $vr0, 32 -; CHECK-NEXT: vor.v $vr0, $vr0, $vr1 -; CHECK-NEXT: vshuf4i.b $vr1, $vr0, 14 -; CHECK-NEXT: vor.v $vr0, $vr0, $vr1 -; CHECK-NEXT: vreplvei.b $vr1, $vr0, 1 -; CHECK-NEXT: vor.v $vr0, $vr0, $vr1 +; CHECK-NEXT: vor.v $vr0, $vr1, $vr0 +; CHECK-NEXT: vbsrl.v $vr1, $vr0, 4 +; CHECK-NEXT: vor.v $vr0, $vr1, $vr0 +; CHECK-NEXT: vbsrl.v $vr1, $vr0, 2 +; CHECK-NEXT: vor.v $vr0, $vr1, $vr0 +; CHECK-NEXT: vbsrl.v $vr1, $vr0, 1 +; CHECK-NEXT: vor.v $vr0, $vr1, $vr0 ; CHECK-NEXT: vstelm.b $vr0, $a1, 0, 0 ; CHECK-NEXT: ret %v = load <16 x i8>, ptr %src @@ -26,12 +26,12 @@ define void @vec_reduce_or_v8i8(ptr %src, ptr %dst) nounwind { ; CHECK: # %bb.0: ; CHECK-NEXT: ld.d $a0, $a0, 0 ; CHECK-NEXT: vinsgr2vr.d $vr0, $a0, 0 -; CHECK-NEXT: vsrli.d $vr1, $vr0, 32 -; CHECK-NEXT: vor.v $vr0, $vr0, $vr1 -; CHECK-NEXT: vshuf4i.b $vr1, $vr0, 14 -; CHECK-NEXT: vor.v $vr0, $vr0, $vr1 -; CHECK-NEXT: vreplvei.b $vr1, $vr0, 1 -; CHECK-NEXT: vor.v $vr0, $vr0, $vr1 +; CHECK-NEXT: vbsrl.v $vr1, $vr0, 4 +; CHECK-NEXT: vor.v $vr0, $vr1, $vr0 +; CHECK-NEXT: vbsrl.v $vr1, $vr0, 2 +; CHECK-NEXT: vor.v $vr0, $vr1, $vr0 +; CHECK-NEXT: vbsrl.v $vr1, $vr0, 1 +; CHECK-NEXT: vor.v $vr0, $vr1, $vr0 ; CHECK-NEXT: vstelm.b $vr0, $a1, 0, 0 ; CHECK-NEXT: ret %v = load <8 x i8>, ptr %src @@ -45,10 +45,10 @@ define void @vec_reduce_or_v4i8(ptr %src, ptr %dst) nounwind { ; CHECK: # %bb.0: ; CHECK-NEXT: ld.w $a0, $a0, 0 ; CHECK-NEXT: vinsgr2vr.w $vr0, $a0, 0 -; CHECK-NEXT: vshuf4i.b $vr1, $vr0, 14 -; CHECK-NEXT: vor.v $vr0, $vr0, $vr1 -; CHECK-NEXT: vreplvei.b $vr1, $vr0, 1 -; CHECK-NEXT: vor.v $vr0, $vr0, $vr1 +; CHECK-NEXT: vbsrl.v $vr1, $vr0, 2 +; CHECK-NEXT: vor.v $vr0, $vr1, $vr0 +; CHECK-NEXT: vbsrl.v $vr1, $vr0, 1 +; CHECK-NEXT: vor.v $vr0, $vr1, $vr0 ; CHECK-NEXT: vstelm.b $vr0, $a1, 0, 0 ; CHECK-NEXT: ret %v = load <4 x i8>, ptr %src @@ -62,8 +62,8 @@ define void @vec_reduce_or_v2i8(ptr %src, ptr %dst) nounwind { ; CHECK: # %bb.0: ; CHECK-NEXT: ld.h $a0, $a0, 0 ; CHECK-NEXT: vinsgr2vr.h $vr0, $a0, 0 -; CHECK-NEXT: vreplvei.b $vr1, $vr0, 1 -; CHECK-NEXT: vor.v $vr0, $vr0, $vr1 +; CHECK-NEXT: vbsrl.v $vr1, $vr0, 1 +; CHECK-NEXT: vor.v $vr0, $vr1, $vr0 ; CHECK-NEXT: vstelm.b $vr0, $a1, 0, 0 ; CHECK-NEXT: ret %v = load <2 x i8>, ptr %src @@ -77,11 +77,11 @@ define void @vec_reduce_or_v8i16(ptr %src, ptr %dst) nounwind { ; CHECK: # %bb.0: ; CHECK-NEXT: vld $vr0, $a0, 0 ; CHECK-NEXT: vbsrl.v $vr1, $vr0, 8 -; CHECK-NEXT: vor.v $vr0, $vr0, $vr1 -; CHECK-NEXT: vshuf4i.h $vr1, $vr0, 14 -; CHECK-NEXT: vor.v $vr0, $vr0, $vr1 -; CHECK-NEXT: vreplvei.h $vr1, $vr0, 1 -; CHECK-NEXT: vor.v $vr0, $vr0, $vr1 +; CHECK-NEXT: vor.v $vr0, $vr1, $vr0 +; CHECK-NEXT: vbsrl.v $vr1, $vr0, 4 +; CHECK-NEXT: vor.v $vr0, $vr1, $vr0 +; CHECK-NEXT: vbsrl.v $vr1, $vr0, 2 +; CHECK-NEXT: vor.v $vr0, $vr1, $vr0 ; CHECK-NEXT: vstelm.h $vr0, $a1, 0, 0 ; CHECK-NEXT: ret %v = load <8 x i16>, ptr %src @@ -95,10 +95,10 @@ define void @vec_reduce_or_v4i16(ptr %src, ptr %dst) nounwind { ; CHECK: # %bb.0: ; CHECK-NEXT: ld.d $a0, $a0, 0 ; CHECK-NEXT: vinsgr2vr.d $vr0, $a0, 0 -; CHECK-NEXT: vshuf4i.h $vr1, $vr0, 14 -; CHECK-NEXT: vor.v $vr0, $vr0, $vr1 -; CHECK-NEXT: vreplvei.h $vr1, $vr0, 1 -; CHECK-NEXT: vor.v $vr0, $vr0, $vr1 +; CHECK-NEXT: vbsrl.v $vr1, $vr0, 4 +; CHECK-NEXT: vor.v $vr0, $vr1, $vr0 +; CHECK-NEXT: vbsrl.v $vr1, $vr0, 2 +; CHECK-NEXT: vor.v $vr0, $vr1, $vr0 ; CHECK-NEXT: vstelm.h $vr0, $a1, 0, 0 ; CHECK-NEXT: ret %v = load <4 x i16>, ptr %src @@ -112,8 +112,8 @@ define void @vec_reduce_or_v2i16(ptr %src, ptr %dst) nounwind { ; CHECK: # %bb.0: ; CHECK-NEXT: ld.w $a0, $a0, 0 ; CHECK-NEXT: vinsgr2vr.w $vr0, $a0, 0 -; CHECK-NEXT: vreplvei.h $vr1, $vr0, 1 -; CHECK-NEXT: vor.v $vr0, $vr0, $vr1 +; CHECK-NEXT: vbsrl.v $vr1, $vr0, 2 +; CHECK-NEXT: vor.v $vr0, $vr1, $vr0 ; CHECK-NEXT: vstelm.h $vr0, $a1, 0, 0 ; CHECK-NEXT: ret %v = load <2 x i16>, ptr %src @@ -126,10 +126,10 @@ define void @vec_reduce_or_v4i32(ptr %src, ptr %dst) nounwind { ; CHECK-LABEL: vec_reduce_or_v4i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vld $vr0, $a0, 0 -; CHECK-NEXT: vshuf4i.w $vr1, $vr0, 14 -; CHECK-NEXT: vor.v $vr0, $vr0, $vr1 -; CHECK-NEXT: vreplvei.w $vr1, $vr0, 1 -; CHECK-NEXT: vor.v $vr0, $vr0, $vr1 +; CHECK-NEXT: vbsrl.v $vr1, $vr0, 8 +; CHECK-NEXT: vor.v $vr0, $vr1, $vr0 +; CHECK-NEXT: vbsrl.v $vr1, $vr0, 4 +; CHECK-NEXT: vor.v $vr0, $vr1, $vr0 ; CHECK-NEXT: vstelm.w $vr0, $a1, 0, 0 ; CHECK-NEXT: ret %v = load <4 x i32>, ptr %src @@ -143,8 +143,8 @@ define void @vec_reduce_or_v2i32(ptr %src, ptr %dst) nounwind { ; CHECK: # %bb.0: ; CHECK-NEXT: ld.d $a0, $a0, 0 ; CHECK-NEXT: vinsgr2vr.d $vr0, $a0, 0 -; CHECK-NEXT: vreplvei.w $vr1, $vr0, 1 -; CHECK-NEXT: vor.v $vr0, $vr0, $vr1 +; CHECK-NEXT: vbsrl.v $vr1, $vr0, 4 +; CHECK-NEXT: vor.v $vr0, $vr1, $vr0 ; CHECK-NEXT: vstelm.w $vr0, $a1, 0, 0 ; CHECK-NEXT: ret %v = load <2 x i32>, ptr %src @@ -157,8 +157,8 @@ define void @vec_reduce_or_v2i64(ptr %src, ptr %dst) nounwind { ; CHECK-LABEL: vec_reduce_or_v2i64: ; CHECK: # %bb.0: ; CHECK-NEXT: vld $vr0, $a0, 0 -; CHECK-NEXT: vreplvei.d $vr1, $vr0, 1 -; CHECK-NEXT: vor.v $vr0, $vr0, $vr1 +; CHECK-NEXT: vbsrl.v $vr1, $vr0, 8 +; CHECK-NEXT: vor.v $vr0, $vr1, $vr0 ; CHECK-NEXT: vstelm.d $vr0, $a1, 0, 0 ; CHECK-NEXT: ret %v = load <2 x i64>, ptr %src diff --git a/llvm/test/CodeGen/LoongArch/lsx/vec-reduce-smax.ll b/llvm/test/CodeGen/LoongArch/lsx/vec-reduce-smax.ll index 5d8c3e36549d..bdf153ad7794 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/vec-reduce-smax.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/vec-reduce-smax.ll @@ -6,13 +6,13 @@ define void @vec_reduce_smax_v16i8(ptr %src, ptr %dst) nounwind { ; CHECK: # %bb.0: ; CHECK-NEXT: vld $vr0, $a0, 0 ; CHECK-NEXT: vbsrl.v $vr1, $vr0, 8 -; CHECK-NEXT: vmax.b $vr0, $vr0, $vr1 -; CHECK-NEXT: vsrli.d $vr1, $vr0, 32 -; CHECK-NEXT: vmax.b $vr0, $vr0, $vr1 -; CHECK-NEXT: vshuf4i.b $vr1, $vr0, 14 -; CHECK-NEXT: vmax.b $vr0, $vr0, $vr1 -; CHECK-NEXT: vreplvei.b $vr1, $vr0, 1 -; CHECK-NEXT: vmax.b $vr0, $vr0, $vr1 +; CHECK-NEXT: vmax.b $vr0, $vr1, $vr0 +; CHECK-NEXT: vbsrl.v $vr1, $vr0, 4 +; CHECK-NEXT: vmax.b $vr0, $vr1, $vr0 +; CHECK-NEXT: vbsrl.v $vr1, $vr0, 2 +; CHECK-NEXT: vmax.b $vr0, $vr1, $vr0 +; CHECK-NEXT: vbsrl.v $vr1, $vr0, 1 +; CHECK-NEXT: vmax.b $vr0, $vr1, $vr0 ; CHECK-NEXT: vstelm.b $vr0, $a1, 0, 0 ; CHECK-NEXT: ret %v = load <16 x i8>, ptr %src @@ -26,12 +26,12 @@ define void @vec_reduce_smax_v8i8(ptr %src, ptr %dst) nounwind { ; CHECK: # %bb.0: ; CHECK-NEXT: ld.d $a0, $a0, 0 ; CHECK-NEXT: vinsgr2vr.d $vr0, $a0, 0 -; CHECK-NEXT: vsrli.d $vr1, $vr0, 32 -; CHECK-NEXT: vmax.b $vr0, $vr0, $vr1 -; CHECK-NEXT: vshuf4i.b $vr1, $vr0, 14 -; CHECK-NEXT: vmax.b $vr0, $vr0, $vr1 -; CHECK-NEXT: vreplvei.b $vr1, $vr0, 1 -; CHECK-NEXT: vmax.b $vr0, $vr0, $vr1 +; CHECK-NEXT: vbsrl.v $vr1, $vr0, 4 +; CHECK-NEXT: vmax.b $vr0, $vr1, $vr0 +; CHECK-NEXT: vbsrl.v $vr1, $vr0, 2 +; CHECK-NEXT: vmax.b $vr0, $vr1, $vr0 +; CHECK-NEXT: vbsrl.v $vr1, $vr0, 1 +; CHECK-NEXT: vmax.b $vr0, $vr1, $vr0 ; CHECK-NEXT: vstelm.b $vr0, $a1, 0, 0 ; CHECK-NEXT: ret %v = load <8 x i8>, ptr %src @@ -45,10 +45,10 @@ define void @vec_reduce_smax_v4i8(ptr %src, ptr %dst) nounwind { ; CHECK: # %bb.0: ; CHECK-NEXT: ld.w $a0, $a0, 0 ; CHECK-NEXT: vinsgr2vr.w $vr0, $a0, 0 -; CHECK-NEXT: vshuf4i.b $vr1, $vr0, 14 -; CHECK-NEXT: vmax.b $vr0, $vr0, $vr1 -; CHECK-NEXT: vreplvei.b $vr1, $vr0, 1 -; CHECK-NEXT: vmax.b $vr0, $vr0, $vr1 +; CHECK-NEXT: vbsrl.v $vr1, $vr0, 2 +; CHECK-NEXT: vmax.b $vr0, $vr1, $vr0 +; CHECK-NEXT: vbsrl.v $vr1, $vr0, 1 +; CHECK-NEXT: vmax.b $vr0, $vr1, $vr0 ; CHECK-NEXT: vstelm.b $vr0, $a1, 0, 0 ; CHECK-NEXT: ret %v = load <4 x i8>, ptr %src @@ -62,8 +62,8 @@ define void @vec_reduce_smax_v2i8(ptr %src, ptr %dst) nounwind { ; CHECK: # %bb.0: ; CHECK-NEXT: ld.h $a0, $a0, 0 ; CHECK-NEXT: vinsgr2vr.h $vr0, $a0, 0 -; CHECK-NEXT: vreplvei.b $vr1, $vr0, 1 -; CHECK-NEXT: vmax.b $vr0, $vr0, $vr1 +; CHECK-NEXT: vbsrl.v $vr1, $vr0, 1 +; CHECK-NEXT: vmax.b $vr0, $vr1, $vr0 ; CHECK-NEXT: vstelm.b $vr0, $a1, 0, 0 ; CHECK-NEXT: ret %v = load <2 x i8>, ptr %src @@ -77,11 +77,11 @@ define void @vec_reduce_smax_v8i16(ptr %src, ptr %dst) nounwind { ; CHECK: # %bb.0: ; CHECK-NEXT: vld $vr0, $a0, 0 ; CHECK-NEXT: vbsrl.v $vr1, $vr0, 8 -; CHECK-NEXT: vmax.h $vr0, $vr0, $vr1 -; CHECK-NEXT: vshuf4i.h $vr1, $vr0, 14 -; CHECK-NEXT: vmax.h $vr0, $vr0, $vr1 -; CHECK-NEXT: vreplvei.h $vr1, $vr0, 1 -; CHECK-NEXT: vmax.h $vr0, $vr0, $vr1 +; CHECK-NEXT: vmax.h $vr0, $vr1, $vr0 +; CHECK-NEXT: vbsrl.v $vr1, $vr0, 4 +; CHECK-NEXT: vmax.h $vr0, $vr1, $vr0 +; CHECK-NEXT: vbsrl.v $vr1, $vr0, 2 +; CHECK-NEXT: vmax.h $vr0, $vr1, $vr0 ; CHECK-NEXT: vstelm.h $vr0, $a1, 0, 0 ; CHECK-NEXT: ret %v = load <8 x i16>, ptr %src @@ -95,10 +95,10 @@ define void @vec_reduce_smax_v4i16(ptr %src, ptr %dst) nounwind { ; CHECK: # %bb.0: ; CHECK-NEXT: ld.d $a0, $a0, 0 ; CHECK-NEXT: vinsgr2vr.d $vr0, $a0, 0 -; CHECK-NEXT: vshuf4i.h $vr1, $vr0, 14 -; CHECK-NEXT: vmax.h $vr0, $vr0, $vr1 -; CHECK-NEXT: vreplvei.h $vr1, $vr0, 1 -; CHECK-NEXT: vmax.h $vr0, $vr0, $vr1 +; CHECK-NEXT: vbsrl.v $vr1, $vr0, 4 +; CHECK-NEXT: vmax.h $vr0, $vr1, $vr0 +; CHECK-NEXT: vbsrl.v $vr1, $vr0, 2 +; CHECK-NEXT: vmax.h $vr0, $vr1, $vr0 ; CHECK-NEXT: vstelm.h $vr0, $a1, 0, 0 ; CHECK-NEXT: ret %v = load <4 x i16>, ptr %src @@ -112,8 +112,8 @@ define void @vec_reduce_smax_v2i16(ptr %src, ptr %dst) nounwind { ; CHECK: # %bb.0: ; CHECK-NEXT: ld.w $a0, $a0, 0 ; CHECK-NEXT: vinsgr2vr.w $vr0, $a0, 0 -; CHECK-NEXT: vreplvei.h $vr1, $vr0, 1 -; CHECK-NEXT: vmax.h $vr0, $vr0, $vr1 +; CHECK-NEXT: vbsrl.v $vr1, $vr0, 2 +; CHECK-NEXT: vmax.h $vr0, $vr1, $vr0 ; CHECK-NEXT: vstelm.h $vr0, $a1, 0, 0 ; CHECK-NEXT: ret %v = load <2 x i16>, ptr %src @@ -126,10 +126,10 @@ define void @vec_reduce_smax_v4i32(ptr %src, ptr %dst) nounwind { ; CHECK-LABEL: vec_reduce_smax_v4i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vld $vr0, $a0, 0 -; CHECK-NEXT: vshuf4i.w $vr1, $vr0, 14 -; CHECK-NEXT: vmax.w $vr0, $vr0, $vr1 -; CHECK-NEXT: vreplvei.w $vr1, $vr0, 1 -; CHECK-NEXT: vmax.w $vr0, $vr0, $vr1 +; CHECK-NEXT: vbsrl.v $vr1, $vr0, 8 +; CHECK-NEXT: vmax.w $vr0, $vr1, $vr0 +; CHECK-NEXT: vbsrl.v $vr1, $vr0, 4 +; CHECK-NEXT: vmax.w $vr0, $vr1, $vr0 ; CHECK-NEXT: vstelm.w $vr0, $a1, 0, 0 ; CHECK-NEXT: ret %v = load <4 x i32>, ptr %src @@ -143,8 +143,8 @@ define void @vec_reduce_smax_v2i32(ptr %src, ptr %dst) nounwind { ; CHECK: # %bb.0: ; CHECK-NEXT: ld.d $a0, $a0, 0 ; CHECK-NEXT: vinsgr2vr.d $vr0, $a0, 0 -; CHECK-NEXT: vreplvei.w $vr1, $vr0, 1 -; CHECK-NEXT: vmax.w $vr0, $vr0, $vr1 +; CHECK-NEXT: vbsrl.v $vr1, $vr0, 4 +; CHECK-NEXT: vmax.w $vr0, $vr1, $vr0 ; CHECK-NEXT: vstelm.w $vr0, $a1, 0, 0 ; CHECK-NEXT: ret %v = load <2 x i32>, ptr %src @@ -157,8 +157,8 @@ define void @vec_reduce_smax_v2i64(ptr %src, ptr %dst) nounwind { ; CHECK-LABEL: vec_reduce_smax_v2i64: ; CHECK: # %bb.0: ; CHECK-NEXT: vld $vr0, $a0, 0 -; CHECK-NEXT: vreplvei.d $vr1, $vr0, 1 -; CHECK-NEXT: vmax.d $vr0, $vr0, $vr1 +; CHECK-NEXT: vbsrl.v $vr1, $vr0, 8 +; CHECK-NEXT: vmax.d $vr0, $vr1, $vr0 ; CHECK-NEXT: vstelm.d $vr0, $a1, 0, 0 ; CHECK-NEXT: ret %v = load <2 x i64>, ptr %src diff --git a/llvm/test/CodeGen/LoongArch/lsx/vec-reduce-smin.ll b/llvm/test/CodeGen/LoongArch/lsx/vec-reduce-smin.ll index 2d53095db89d..e3b3c5e6f241 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/vec-reduce-smin.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/vec-reduce-smin.ll @@ -6,13 +6,13 @@ define void @vec_reduce_smin_v16i8(ptr %src, ptr %dst) nounwind { ; CHECK: # %bb.0: ; CHECK-NEXT: vld $vr0, $a0, 0 ; CHECK-NEXT: vbsrl.v $vr1, $vr0, 8 -; CHECK-NEXT: vmin.b $vr0, $vr0, $vr1 -; CHECK-NEXT: vsrli.d $vr1, $vr0, 32 -; CHECK-NEXT: vmin.b $vr0, $vr0, $vr1 -; CHECK-NEXT: vshuf4i.b $vr1, $vr0, 14 -; CHECK-NEXT: vmin.b $vr0, $vr0, $vr1 -; CHECK-NEXT: vreplvei.b $vr1, $vr0, 1 -; CHECK-NEXT: vmin.b $vr0, $vr0, $vr1 +; CHECK-NEXT: vmin.b $vr0, $vr1, $vr0 +; CHECK-NEXT: vbsrl.v $vr1, $vr0, 4 +; CHECK-NEXT: vmin.b $vr0, $vr1, $vr0 +; CHECK-NEXT: vbsrl.v $vr1, $vr0, 2 +; CHECK-NEXT: vmin.b $vr0, $vr1, $vr0 +; CHECK-NEXT: vbsrl.v $vr1, $vr0, 1 +; CHECK-NEXT: vmin.b $vr0, $vr1, $vr0 ; CHECK-NEXT: vstelm.b $vr0, $a1, 0, 0 ; CHECK-NEXT: ret %v = load <16 x i8>, ptr %src @@ -26,12 +26,12 @@ define void @vec_reduce_smin_v8i8(ptr %src, ptr %dst) nounwind { ; CHECK: # %bb.0: ; CHECK-NEXT: ld.d $a0, $a0, 0 ; CHECK-NEXT: vinsgr2vr.d $vr0, $a0, 0 -; CHECK-NEXT: vsrli.d $vr1, $vr0, 32 -; CHECK-NEXT: vmin.b $vr0, $vr0, $vr1 -; CHECK-NEXT: vshuf4i.b $vr1, $vr0, 14 -; CHECK-NEXT: vmin.b $vr0, $vr0, $vr1 -; CHECK-NEXT: vreplvei.b $vr1, $vr0, 1 -; CHECK-NEXT: vmin.b $vr0, $vr0, $vr1 +; CHECK-NEXT: vbsrl.v $vr1, $vr0, 4 +; CHECK-NEXT: vmin.b $vr0, $vr1, $vr0 +; CHECK-NEXT: vbsrl.v $vr1, $vr0, 2 +; CHECK-NEXT: vmin.b $vr0, $vr1, $vr0 +; CHECK-NEXT: vbsrl.v $vr1, $vr0, 1 +; CHECK-NEXT: vmin.b $vr0, $vr1, $vr0 ; CHECK-NEXT: vstelm.b $vr0, $a1, 0, 0 ; CHECK-NEXT: ret %v = load <8 x i8>, ptr %src @@ -45,10 +45,10 @@ define void @vec_reduce_smin_v4i8(ptr %src, ptr %dst) nounwind { ; CHECK: # %bb.0: ; CHECK-NEXT: ld.w $a0, $a0, 0 ; CHECK-NEXT: vinsgr2vr.w $vr0, $a0, 0 -; CHECK-NEXT: vshuf4i.b $vr1, $vr0, 14 -; CHECK-NEXT: vmin.b $vr0, $vr0, $vr1 -; CHECK-NEXT: vreplvei.b $vr1, $vr0, 1 -; CHECK-NEXT: vmin.b $vr0, $vr0, $vr1 +; CHECK-NEXT: vbsrl.v $vr1, $vr0, 2 +; CHECK-NEXT: vmin.b $vr0, $vr1, $vr0 +; CHECK-NEXT: vbsrl.v $vr1, $vr0, 1 +; CHECK-NEXT: vmin.b $vr0, $vr1, $vr0 ; CHECK-NEXT: vstelm.b $vr0, $a1, 0, 0 ; CHECK-NEXT: ret %v = load <4 x i8>, ptr %src @@ -62,8 +62,8 @@ define void @vec_reduce_smin_v2i8(ptr %src, ptr %dst) nounwind { ; CHECK: # %bb.0: ; CHECK-NEXT: ld.h $a0, $a0, 0 ; CHECK-NEXT: vinsgr2vr.h $vr0, $a0, 0 -; CHECK-NEXT: vreplvei.b $vr1, $vr0, 1 -; CHECK-NEXT: vmin.b $vr0, $vr0, $vr1 +; CHECK-NEXT: vbsrl.v $vr1, $vr0, 1 +; CHECK-NEXT: vmin.b $vr0, $vr1, $vr0 ; CHECK-NEXT: vstelm.b $vr0, $a1, 0, 0 ; CHECK-NEXT: ret %v = load <2 x i8>, ptr %src @@ -77,11 +77,11 @@ define void @vec_reduce_smin_v8i16(ptr %src, ptr %dst) nounwind { ; CHECK: # %bb.0: ; CHECK-NEXT: vld $vr0, $a0, 0 ; CHECK-NEXT: vbsrl.v $vr1, $vr0, 8 -; CHECK-NEXT: vmin.h $vr0, $vr0, $vr1 -; CHECK-NEXT: vshuf4i.h $vr1, $vr0, 14 -; CHECK-NEXT: vmin.h $vr0, $vr0, $vr1 -; CHECK-NEXT: vreplvei.h $vr1, $vr0, 1 -; CHECK-NEXT: vmin.h $vr0, $vr0, $vr1 +; CHECK-NEXT: vmin.h $vr0, $vr1, $vr0 +; CHECK-NEXT: vbsrl.v $vr1, $vr0, 4 +; CHECK-NEXT: vmin.h $vr0, $vr1, $vr0 +; CHECK-NEXT: vbsrl.v $vr1, $vr0, 2 +; CHECK-NEXT: vmin.h $vr0, $vr1, $vr0 ; CHECK-NEXT: vstelm.h $vr0, $a1, 0, 0 ; CHECK-NEXT: ret %v = load <8 x i16>, ptr %src @@ -95,10 +95,10 @@ define void @vec_reduce_smin_v4i16(ptr %src, ptr %dst) nounwind { ; CHECK: # %bb.0: ; CHECK-NEXT: ld.d $a0, $a0, 0 ; CHECK-NEXT: vinsgr2vr.d $vr0, $a0, 0 -; CHECK-NEXT: vshuf4i.h $vr1, $vr0, 14 -; CHECK-NEXT: vmin.h $vr0, $vr0, $vr1 -; CHECK-NEXT: vreplvei.h $vr1, $vr0, 1 -; CHECK-NEXT: vmin.h $vr0, $vr0, $vr1 +; CHECK-NEXT: vbsrl.v $vr1, $vr0, 4 +; CHECK-NEXT: vmin.h $vr0, $vr1, $vr0 +; CHECK-NEXT: vbsrl.v $vr1, $vr0, 2 +; CHECK-NEXT: vmin.h $vr0, $vr1, $vr0 ; CHECK-NEXT: vstelm.h $vr0, $a1, 0, 0 ; CHECK-NEXT: ret %v = load <4 x i16>, ptr %src @@ -112,8 +112,8 @@ define void @vec_reduce_smin_v2i16(ptr %src, ptr %dst) nounwind { ; CHECK: # %bb.0: ; CHECK-NEXT: ld.w $a0, $a0, 0 ; CHECK-NEXT: vinsgr2vr.w $vr0, $a0, 0 -; CHECK-NEXT: vreplvei.h $vr1, $vr0, 1 -; CHECK-NEXT: vmin.h $vr0, $vr0, $vr1 +; CHECK-NEXT: vbsrl.v $vr1, $vr0, 2 +; CHECK-NEXT: vmin.h $vr0, $vr1, $vr0 ; CHECK-NEXT: vstelm.h $vr0, $a1, 0, 0 ; CHECK-NEXT: ret %v = load <2 x i16>, ptr %src @@ -126,10 +126,10 @@ define void @vec_reduce_smin_v4i32(ptr %src, ptr %dst) nounwind { ; CHECK-LABEL: vec_reduce_smin_v4i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vld $vr0, $a0, 0 -; CHECK-NEXT: vshuf4i.w $vr1, $vr0, 14 -; CHECK-NEXT: vmin.w $vr0, $vr0, $vr1 -; CHECK-NEXT: vreplvei.w $vr1, $vr0, 1 -; CHECK-NEXT: vmin.w $vr0, $vr0, $vr1 +; CHECK-NEXT: vbsrl.v $vr1, $vr0, 8 +; CHECK-NEXT: vmin.w $vr0, $vr1, $vr0 +; CHECK-NEXT: vbsrl.v $vr1, $vr0, 4 +; CHECK-NEXT: vmin.w $vr0, $vr1, $vr0 ; CHECK-NEXT: vstelm.w $vr0, $a1, 0, 0 ; CHECK-NEXT: ret %v = load <4 x i32>, ptr %src @@ -143,8 +143,8 @@ define void @vec_reduce_smin_v2i32(ptr %src, ptr %dst) nounwind { ; CHECK: # %bb.0: ; CHECK-NEXT: ld.d $a0, $a0, 0 ; CHECK-NEXT: vinsgr2vr.d $vr0, $a0, 0 -; CHECK-NEXT: vreplvei.w $vr1, $vr0, 1 -; CHECK-NEXT: vmin.w $vr0, $vr0, $vr1 +; CHECK-NEXT: vbsrl.v $vr1, $vr0, 4 +; CHECK-NEXT: vmin.w $vr0, $vr1, $vr0 ; CHECK-NEXT: vstelm.w $vr0, $a1, 0, 0 ; CHECK-NEXT: ret %v = load <2 x i32>, ptr %src @@ -157,8 +157,8 @@ define void @vec_reduce_smin_v2i64(ptr %src, ptr %dst) nounwind { ; CHECK-LABEL: vec_reduce_smin_v2i64: ; CHECK: # %bb.0: ; CHECK-NEXT: vld $vr0, $a0, 0 -; CHECK-NEXT: vreplvei.d $vr1, $vr0, 1 -; CHECK-NEXT: vmin.d $vr0, $vr0, $vr1 +; CHECK-NEXT: vbsrl.v $vr1, $vr0, 8 +; CHECK-NEXT: vmin.d $vr0, $vr1, $vr0 ; CHECK-NEXT: vstelm.d $vr0, $a1, 0, 0 ; CHECK-NEXT: ret %v = load <2 x i64>, ptr %src diff --git a/llvm/test/CodeGen/LoongArch/lsx/vec-reduce-umax.ll b/llvm/test/CodeGen/LoongArch/lsx/vec-reduce-umax.ll index abe9ba7dfb24..fff2304befd6 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/vec-reduce-umax.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/vec-reduce-umax.ll @@ -6,13 +6,13 @@ define void @vec_reduce_umax_v16i8(ptr %src, ptr %dst) nounwind { ; CHECK: # %bb.0: ; CHECK-NEXT: vld $vr0, $a0, 0 ; CHECK-NEXT: vbsrl.v $vr1, $vr0, 8 -; CHECK-NEXT: vmax.bu $vr0, $vr0, $vr1 -; CHECK-NEXT: vsrli.d $vr1, $vr0, 32 -; CHECK-NEXT: vmax.bu $vr0, $vr0, $vr1 -; CHECK-NEXT: vshuf4i.b $vr1, $vr0, 14 -; CHECK-NEXT: vmax.bu $vr0, $vr0, $vr1 -; CHECK-NEXT: vreplvei.b $vr1, $vr0, 1 -; CHECK-NEXT: vmax.bu $vr0, $vr0, $vr1 +; CHECK-NEXT: vmax.bu $vr0, $vr1, $vr0 +; CHECK-NEXT: vbsrl.v $vr1, $vr0, 4 +; CHECK-NEXT: vmax.bu $vr0, $vr1, $vr0 +; CHECK-NEXT: vbsrl.v $vr1, $vr0, 2 +; CHECK-NEXT: vmax.bu $vr0, $vr1, $vr0 +; CHECK-NEXT: vbsrl.v $vr1, $vr0, 1 +; CHECK-NEXT: vmax.bu $vr0, $vr1, $vr0 ; CHECK-NEXT: vstelm.b $vr0, $a1, 0, 0 ; CHECK-NEXT: ret %v = load <16 x i8>, ptr %src @@ -26,12 +26,12 @@ define void @vec_reduce_umax_v8i8(ptr %src, ptr %dst) nounwind { ; CHECK: # %bb.0: ; CHECK-NEXT: ld.d $a0, $a0, 0 ; CHECK-NEXT: vinsgr2vr.d $vr0, $a0, 0 -; CHECK-NEXT: vsrli.d $vr1, $vr0, 32 -; CHECK-NEXT: vmax.bu $vr0, $vr0, $vr1 -; CHECK-NEXT: vshuf4i.b $vr1, $vr0, 14 -; CHECK-NEXT: vmax.bu $vr0, $vr0, $vr1 -; CHECK-NEXT: vreplvei.b $vr1, $vr0, 1 -; CHECK-NEXT: vmax.bu $vr0, $vr0, $vr1 +; CHECK-NEXT: vbsrl.v $vr1, $vr0, 4 +; CHECK-NEXT: vmax.bu $vr0, $vr1, $vr0 +; CHECK-NEXT: vbsrl.v $vr1, $vr0, 2 +; CHECK-NEXT: vmax.bu $vr0, $vr1, $vr0 +; CHECK-NEXT: vbsrl.v $vr1, $vr0, 1 +; CHECK-NEXT: vmax.bu $vr0, $vr1, $vr0 ; CHECK-NEXT: vstelm.b $vr0, $a1, 0, 0 ; CHECK-NEXT: ret %v = load <8 x i8>, ptr %src @@ -45,10 +45,10 @@ define void @vec_reduce_umax_v4i8(ptr %src, ptr %dst) nounwind { ; CHECK: # %bb.0: ; CHECK-NEXT: ld.w $a0, $a0, 0 ; CHECK-NEXT: vinsgr2vr.w $vr0, $a0, 0 -; CHECK-NEXT: vshuf4i.b $vr1, $vr0, 14 -; CHECK-NEXT: vmax.bu $vr0, $vr0, $vr1 -; CHECK-NEXT: vreplvei.b $vr1, $vr0, 1 -; CHECK-NEXT: vmax.bu $vr0, $vr0, $vr1 +; CHECK-NEXT: vbsrl.v $vr1, $vr0, 2 +; CHECK-NEXT: vmax.bu $vr0, $vr1, $vr0 +; CHECK-NEXT: vbsrl.v $vr1, $vr0, 1 +; CHECK-NEXT: vmax.bu $vr0, $vr1, $vr0 ; CHECK-NEXT: vstelm.b $vr0, $a1, 0, 0 ; CHECK-NEXT: ret %v = load <4 x i8>, ptr %src @@ -62,8 +62,8 @@ define void @vec_reduce_umax_v2i8(ptr %src, ptr %dst) nounwind { ; CHECK: # %bb.0: ; CHECK-NEXT: ld.h $a0, $a0, 0 ; CHECK-NEXT: vinsgr2vr.h $vr0, $a0, 0 -; CHECK-NEXT: vreplvei.b $vr1, $vr0, 1 -; CHECK-NEXT: vmax.bu $vr0, $vr0, $vr1 +; CHECK-NEXT: vbsrl.v $vr1, $vr0, 1 +; CHECK-NEXT: vmax.bu $vr0, $vr1, $vr0 ; CHECK-NEXT: vstelm.b $vr0, $a1, 0, 0 ; CHECK-NEXT: ret %v = load <2 x i8>, ptr %src @@ -77,11 +77,11 @@ define void @vec_reduce_umax_v8i16(ptr %src, ptr %dst) nounwind { ; CHECK: # %bb.0: ; CHECK-NEXT: vld $vr0, $a0, 0 ; CHECK-NEXT: vbsrl.v $vr1, $vr0, 8 -; CHECK-NEXT: vmax.hu $vr0, $vr0, $vr1 -; CHECK-NEXT: vshuf4i.h $vr1, $vr0, 14 -; CHECK-NEXT: vmax.hu $vr0, $vr0, $vr1 -; CHECK-NEXT: vreplvei.h $vr1, $vr0, 1 -; CHECK-NEXT: vmax.hu $vr0, $vr0, $vr1 +; CHECK-NEXT: vmax.hu $vr0, $vr1, $vr0 +; CHECK-NEXT: vbsrl.v $vr1, $vr0, 4 +; CHECK-NEXT: vmax.hu $vr0, $vr1, $vr0 +; CHECK-NEXT: vbsrl.v $vr1, $vr0, 2 +; CHECK-NEXT: vmax.hu $vr0, $vr1, $vr0 ; CHECK-NEXT: vstelm.h $vr0, $a1, 0, 0 ; CHECK-NEXT: ret %v = load <8 x i16>, ptr %src @@ -95,10 +95,10 @@ define void @vec_reduce_umax_v4i16(ptr %src, ptr %dst) nounwind { ; CHECK: # %bb.0: ; CHECK-NEXT: ld.d $a0, $a0, 0 ; CHECK-NEXT: vinsgr2vr.d $vr0, $a0, 0 -; CHECK-NEXT: vshuf4i.h $vr1, $vr0, 14 -; CHECK-NEXT: vmax.hu $vr0, $vr0, $vr1 -; CHECK-NEXT: vreplvei.h $vr1, $vr0, 1 -; CHECK-NEXT: vmax.hu $vr0, $vr0, $vr1 +; CHECK-NEXT: vbsrl.v $vr1, $vr0, 4 +; CHECK-NEXT: vmax.hu $vr0, $vr1, $vr0 +; CHECK-NEXT: vbsrl.v $vr1, $vr0, 2 +; CHECK-NEXT: vmax.hu $vr0, $vr1, $vr0 ; CHECK-NEXT: vstelm.h $vr0, $a1, 0, 0 ; CHECK-NEXT: ret %v = load <4 x i16>, ptr %src @@ -112,8 +112,8 @@ define void @vec_reduce_umax_v2i16(ptr %src, ptr %dst) nounwind { ; CHECK: # %bb.0: ; CHECK-NEXT: ld.w $a0, $a0, 0 ; CHECK-NEXT: vinsgr2vr.w $vr0, $a0, 0 -; CHECK-NEXT: vreplvei.h $vr1, $vr0, 1 -; CHECK-NEXT: vmax.hu $vr0, $vr0, $vr1 +; CHECK-NEXT: vbsrl.v $vr1, $vr0, 2 +; CHECK-NEXT: vmax.hu $vr0, $vr1, $vr0 ; CHECK-NEXT: vstelm.h $vr0, $a1, 0, 0 ; CHECK-NEXT: ret %v = load <2 x i16>, ptr %src @@ -126,10 +126,10 @@ define void @vec_reduce_umax_v4i32(ptr %src, ptr %dst) nounwind { ; CHECK-LABEL: vec_reduce_umax_v4i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vld $vr0, $a0, 0 -; CHECK-NEXT: vshuf4i.w $vr1, $vr0, 14 -; CHECK-NEXT: vmax.wu $vr0, $vr0, $vr1 -; CHECK-NEXT: vreplvei.w $vr1, $vr0, 1 -; CHECK-NEXT: vmax.wu $vr0, $vr0, $vr1 +; CHECK-NEXT: vbsrl.v $vr1, $vr0, 8 +; CHECK-NEXT: vmax.wu $vr0, $vr1, $vr0 +; CHECK-NEXT: vbsrl.v $vr1, $vr0, 4 +; CHECK-NEXT: vmax.wu $vr0, $vr1, $vr0 ; CHECK-NEXT: vstelm.w $vr0, $a1, 0, 0 ; CHECK-NEXT: ret %v = load <4 x i32>, ptr %src @@ -143,8 +143,8 @@ define void @vec_reduce_umax_v2i32(ptr %src, ptr %dst) nounwind { ; CHECK: # %bb.0: ; CHECK-NEXT: ld.d $a0, $a0, 0 ; CHECK-NEXT: vinsgr2vr.d $vr0, $a0, 0 -; CHECK-NEXT: vreplvei.w $vr1, $vr0, 1 -; CHECK-NEXT: vmax.wu $vr0, $vr0, $vr1 +; CHECK-NEXT: vbsrl.v $vr1, $vr0, 4 +; CHECK-NEXT: vmax.wu $vr0, $vr1, $vr0 ; CHECK-NEXT: vstelm.w $vr0, $a1, 0, 0 ; CHECK-NEXT: ret %v = load <2 x i32>, ptr %src @@ -157,8 +157,8 @@ define void @vec_reduce_umax_v2i64(ptr %src, ptr %dst) nounwind { ; CHECK-LABEL: vec_reduce_umax_v2i64: ; CHECK: # %bb.0: ; CHECK-NEXT: vld $vr0, $a0, 0 -; CHECK-NEXT: vreplvei.d $vr1, $vr0, 1 -; CHECK-NEXT: vmax.du $vr0, $vr0, $vr1 +; CHECK-NEXT: vbsrl.v $vr1, $vr0, 8 +; CHECK-NEXT: vmax.du $vr0, $vr1, $vr0 ; CHECK-NEXT: vstelm.d $vr0, $a1, 0, 0 ; CHECK-NEXT: ret %v = load <2 x i64>, ptr %src diff --git a/llvm/test/CodeGen/LoongArch/lsx/vec-reduce-umin.ll b/llvm/test/CodeGen/LoongArch/lsx/vec-reduce-umin.ll index 3d396f3692e7..e14a294cbcfb 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/vec-reduce-umin.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/vec-reduce-umin.ll @@ -6,13 +6,13 @@ define void @vec_reduce_umin_v16i8(ptr %src, ptr %dst) nounwind { ; CHECK: # %bb.0: ; CHECK-NEXT: vld $vr0, $a0, 0 ; CHECK-NEXT: vbsrl.v $vr1, $vr0, 8 -; CHECK-NEXT: vmin.bu $vr0, $vr0, $vr1 -; CHECK-NEXT: vsrli.d $vr1, $vr0, 32 -; CHECK-NEXT: vmin.bu $vr0, $vr0, $vr1 -; CHECK-NEXT: vshuf4i.b $vr1, $vr0, 14 -; CHECK-NEXT: vmin.bu $vr0, $vr0, $vr1 -; CHECK-NEXT: vreplvei.b $vr1, $vr0, 1 -; CHECK-NEXT: vmin.bu $vr0, $vr0, $vr1 +; CHECK-NEXT: vmin.bu $vr0, $vr1, $vr0 +; CHECK-NEXT: vbsrl.v $vr1, $vr0, 4 +; CHECK-NEXT: vmin.bu $vr0, $vr1, $vr0 +; CHECK-NEXT: vbsrl.v $vr1, $vr0, 2 +; CHECK-NEXT: vmin.bu $vr0, $vr1, $vr0 +; CHECK-NEXT: vbsrl.v $vr1, $vr0, 1 +; CHECK-NEXT: vmin.bu $vr0, $vr1, $vr0 ; CHECK-NEXT: vstelm.b $vr0, $a1, 0, 0 ; CHECK-NEXT: ret %v = load <16 x i8>, ptr %src @@ -26,12 +26,12 @@ define void @vec_reduce_umin_v8i8(ptr %src, ptr %dst) nounwind { ; CHECK: # %bb.0: ; CHECK-NEXT: ld.d $a0, $a0, 0 ; CHECK-NEXT: vinsgr2vr.d $vr0, $a0, 0 -; CHECK-NEXT: vsrli.d $vr1, $vr0, 32 -; CHECK-NEXT: vmin.bu $vr0, $vr0, $vr1 -; CHECK-NEXT: vshuf4i.b $vr1, $vr0, 14 -; CHECK-NEXT: vmin.bu $vr0, $vr0, $vr1 -; CHECK-NEXT: vreplvei.b $vr1, $vr0, 1 -; CHECK-NEXT: vmin.bu $vr0, $vr0, $vr1 +; CHECK-NEXT: vbsrl.v $vr1, $vr0, 4 +; CHECK-NEXT: vmin.bu $vr0, $vr1, $vr0 +; CHECK-NEXT: vbsrl.v $vr1, $vr0, 2 +; CHECK-NEXT: vmin.bu $vr0, $vr1, $vr0 +; CHECK-NEXT: vbsrl.v $vr1, $vr0, 1 +; CHECK-NEXT: vmin.bu $vr0, $vr1, $vr0 ; CHECK-NEXT: vstelm.b $vr0, $a1, 0, 0 ; CHECK-NEXT: ret %v = load <8 x i8>, ptr %src @@ -45,10 +45,10 @@ define void @vec_reduce_umin_v4i8(ptr %src, ptr %dst) nounwind { ; CHECK: # %bb.0: ; CHECK-NEXT: ld.w $a0, $a0, 0 ; CHECK-NEXT: vinsgr2vr.w $vr0, $a0, 0 -; CHECK-NEXT: vshuf4i.b $vr1, $vr0, 14 -; CHECK-NEXT: vmin.bu $vr0, $vr0, $vr1 -; CHECK-NEXT: vreplvei.b $vr1, $vr0, 1 -; CHECK-NEXT: vmin.bu $vr0, $vr0, $vr1 +; CHECK-NEXT: vbsrl.v $vr1, $vr0, 2 +; CHECK-NEXT: vmin.bu $vr0, $vr1, $vr0 +; CHECK-NEXT: vbsrl.v $vr1, $vr0, 1 +; CHECK-NEXT: vmin.bu $vr0, $vr1, $vr0 ; CHECK-NEXT: vstelm.b $vr0, $a1, 0, 0 ; CHECK-NEXT: ret %v = load <4 x i8>, ptr %src @@ -62,8 +62,8 @@ define void @vec_reduce_umin_v2i8(ptr %src, ptr %dst) nounwind { ; CHECK: # %bb.0: ; CHECK-NEXT: ld.h $a0, $a0, 0 ; CHECK-NEXT: vinsgr2vr.h $vr0, $a0, 0 -; CHECK-NEXT: vreplvei.b $vr1, $vr0, 1 -; CHECK-NEXT: vmin.bu $vr0, $vr0, $vr1 +; CHECK-NEXT: vbsrl.v $vr1, $vr0, 1 +; CHECK-NEXT: vmin.bu $vr0, $vr1, $vr0 ; CHECK-NEXT: vstelm.b $vr0, $a1, 0, 0 ; CHECK-NEXT: ret %v = load <2 x i8>, ptr %src @@ -77,11 +77,11 @@ define void @vec_reduce_umin_v8i16(ptr %src, ptr %dst) nounwind { ; CHECK: # %bb.0: ; CHECK-NEXT: vld $vr0, $a0, 0 ; CHECK-NEXT: vbsrl.v $vr1, $vr0, 8 -; CHECK-NEXT: vmin.hu $vr0, $vr0, $vr1 -; CHECK-NEXT: vshuf4i.h $vr1, $vr0, 14 -; CHECK-NEXT: vmin.hu $vr0, $vr0, $vr1 -; CHECK-NEXT: vreplvei.h $vr1, $vr0, 1 -; CHECK-NEXT: vmin.hu $vr0, $vr0, $vr1 +; CHECK-NEXT: vmin.hu $vr0, $vr1, $vr0 +; CHECK-NEXT: vbsrl.v $vr1, $vr0, 4 +; CHECK-NEXT: vmin.hu $vr0, $vr1, $vr0 +; CHECK-NEXT: vbsrl.v $vr1, $vr0, 2 +; CHECK-NEXT: vmin.hu $vr0, $vr1, $vr0 ; CHECK-NEXT: vstelm.h $vr0, $a1, 0, 0 ; CHECK-NEXT: ret %v = load <8 x i16>, ptr %src @@ -95,10 +95,10 @@ define void @vec_reduce_umin_v4i16(ptr %src, ptr %dst) nounwind { ; CHECK: # %bb.0: ; CHECK-NEXT: ld.d $a0, $a0, 0 ; CHECK-NEXT: vinsgr2vr.d $vr0, $a0, 0 -; CHECK-NEXT: vshuf4i.h $vr1, $vr0, 14 -; CHECK-NEXT: vmin.hu $vr0, $vr0, $vr1 -; CHECK-NEXT: vreplvei.h $vr1, $vr0, 1 -; CHECK-NEXT: vmin.hu $vr0, $vr0, $vr1 +; CHECK-NEXT: vbsrl.v $vr1, $vr0, 4 +; CHECK-NEXT: vmin.hu $vr0, $vr1, $vr0 +; CHECK-NEXT: vbsrl.v $vr1, $vr0, 2 +; CHECK-NEXT: vmin.hu $vr0, $vr1, $vr0 ; CHECK-NEXT: vstelm.h $vr0, $a1, 0, 0 ; CHECK-NEXT: ret %v = load <4 x i16>, ptr %src @@ -112,8 +112,8 @@ define void @vec_reduce_umin_v2i16(ptr %src, ptr %dst) nounwind { ; CHECK: # %bb.0: ; CHECK-NEXT: ld.w $a0, $a0, 0 ; CHECK-NEXT: vinsgr2vr.w $vr0, $a0, 0 -; CHECK-NEXT: vreplvei.h $vr1, $vr0, 1 -; CHECK-NEXT: vmin.hu $vr0, $vr0, $vr1 +; CHECK-NEXT: vbsrl.v $vr1, $vr0, 2 +; CHECK-NEXT: vmin.hu $vr0, $vr1, $vr0 ; CHECK-NEXT: vstelm.h $vr0, $a1, 0, 0 ; CHECK-NEXT: ret %v = load <2 x i16>, ptr %src @@ -126,10 +126,10 @@ define void @vec_reduce_umin_v4i32(ptr %src, ptr %dst) nounwind { ; CHECK-LABEL: vec_reduce_umin_v4i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vld $vr0, $a0, 0 -; CHECK-NEXT: vshuf4i.w $vr1, $vr0, 14 -; CHECK-NEXT: vmin.wu $vr0, $vr0, $vr1 -; CHECK-NEXT: vreplvei.w $vr1, $vr0, 1 -; CHECK-NEXT: vmin.wu $vr0, $vr0, $vr1 +; CHECK-NEXT: vbsrl.v $vr1, $vr0, 8 +; CHECK-NEXT: vmin.wu $vr0, $vr1, $vr0 +; CHECK-NEXT: vbsrl.v $vr1, $vr0, 4 +; CHECK-NEXT: vmin.wu $vr0, $vr1, $vr0 ; CHECK-NEXT: vstelm.w $vr0, $a1, 0, 0 ; CHECK-NEXT: ret %v = load <4 x i32>, ptr %src @@ -143,8 +143,8 @@ define void @vec_reduce_umin_v2i32(ptr %src, ptr %dst) nounwind { ; CHECK: # %bb.0: ; CHECK-NEXT: ld.d $a0, $a0, 0 ; CHECK-NEXT: vinsgr2vr.d $vr0, $a0, 0 -; CHECK-NEXT: vreplvei.w $vr1, $vr0, 1 -; CHECK-NEXT: vmin.wu $vr0, $vr0, $vr1 +; CHECK-NEXT: vbsrl.v $vr1, $vr0, 4 +; CHECK-NEXT: vmin.wu $vr0, $vr1, $vr0 ; CHECK-NEXT: vstelm.w $vr0, $a1, 0, 0 ; CHECK-NEXT: ret %v = load <2 x i32>, ptr %src @@ -157,8 +157,8 @@ define void @vec_reduce_umin_v2i64(ptr %src, ptr %dst) nounwind { ; CHECK-LABEL: vec_reduce_umin_v2i64: ; CHECK: # %bb.0: ; CHECK-NEXT: vld $vr0, $a0, 0 -; CHECK-NEXT: vreplvei.d $vr1, $vr0, 1 -; CHECK-NEXT: vmin.du $vr0, $vr0, $vr1 +; CHECK-NEXT: vbsrl.v $vr1, $vr0, 8 +; CHECK-NEXT: vmin.du $vr0, $vr1, $vr0 ; CHECK-NEXT: vstelm.d $vr0, $a1, 0, 0 ; CHECK-NEXT: ret %v = load <2 x i64>, ptr %src diff --git a/llvm/test/CodeGen/LoongArch/lsx/vec-reduce-xor.ll b/llvm/test/CodeGen/LoongArch/lsx/vec-reduce-xor.ll index 1894532d6121..ae2bb8f91de0 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/vec-reduce-xor.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/vec-reduce-xor.ll @@ -6,13 +6,13 @@ define void @vec_reduce_xor_v16i8(ptr %src, ptr %dst) nounwind { ; CHECK: # %bb.0: ; CHECK-NEXT: vld $vr0, $a0, 0 ; CHECK-NEXT: vbsrl.v $vr1, $vr0, 8 -; CHECK-NEXT: vxor.v $vr0, $vr0, $vr1 -; CHECK-NEXT: vsrli.d $vr1, $vr0, 32 -; CHECK-NEXT: vxor.v $vr0, $vr0, $vr1 -; CHECK-NEXT: vshuf4i.b $vr1, $vr0, 14 -; CHECK-NEXT: vxor.v $vr0, $vr0, $vr1 -; CHECK-NEXT: vreplvei.b $vr1, $vr0, 1 -; CHECK-NEXT: vxor.v $vr0, $vr0, $vr1 +; CHECK-NEXT: vxor.v $vr0, $vr1, $vr0 +; CHECK-NEXT: vbsrl.v $vr1, $vr0, 4 +; CHECK-NEXT: vxor.v $vr0, $vr1, $vr0 +; CHECK-NEXT: vbsrl.v $vr1, $vr0, 2 +; CHECK-NEXT: vxor.v $vr0, $vr1, $vr0 +; CHECK-NEXT: vbsrl.v $vr1, $vr0, 1 +; CHECK-NEXT: vxor.v $vr0, $vr1, $vr0 ; CHECK-NEXT: vstelm.b $vr0, $a1, 0, 0 ; CHECK-NEXT: ret %v = load <16 x i8>, ptr %src @@ -26,12 +26,12 @@ define void @vec_reduce_xor_v8i8(ptr %src, ptr %dst) nounwind { ; CHECK: # %bb.0: ; CHECK-NEXT: ld.d $a0, $a0, 0 ; CHECK-NEXT: vinsgr2vr.d $vr0, $a0, 0 -; CHECK-NEXT: vsrli.d $vr1, $vr0, 32 -; CHECK-NEXT: vxor.v $vr0, $vr0, $vr1 -; CHECK-NEXT: vshuf4i.b $vr1, $vr0, 14 -; CHECK-NEXT: vxor.v $vr0, $vr0, $vr1 -; CHECK-NEXT: vreplvei.b $vr1, $vr0, 1 -; CHECK-NEXT: vxor.v $vr0, $vr0, $vr1 +; CHECK-NEXT: vbsrl.v $vr1, $vr0, 4 +; CHECK-NEXT: vxor.v $vr0, $vr1, $vr0 +; CHECK-NEXT: vbsrl.v $vr1, $vr0, 2 +; CHECK-NEXT: vxor.v $vr0, $vr1, $vr0 +; CHECK-NEXT: vbsrl.v $vr1, $vr0, 1 +; CHECK-NEXT: vxor.v $vr0, $vr1, $vr0 ; CHECK-NEXT: vstelm.b $vr0, $a1, 0, 0 ; CHECK-NEXT: ret %v = load <8 x i8>, ptr %src @@ -45,10 +45,10 @@ define void @vec_reduce_xor_v4i8(ptr %src, ptr %dst) nounwind { ; CHECK: # %bb.0: ; CHECK-NEXT: ld.w $a0, $a0, 0 ; CHECK-NEXT: vinsgr2vr.w $vr0, $a0, 0 -; CHECK-NEXT: vshuf4i.b $vr1, $vr0, 14 -; CHECK-NEXT: vxor.v $vr0, $vr0, $vr1 -; CHECK-NEXT: vreplvei.b $vr1, $vr0, 1 -; CHECK-NEXT: vxor.v $vr0, $vr0, $vr1 +; CHECK-NEXT: vbsrl.v $vr1, $vr0, 2 +; CHECK-NEXT: vxor.v $vr0, $vr1, $vr0 +; CHECK-NEXT: vbsrl.v $vr1, $vr0, 1 +; CHECK-NEXT: vxor.v $vr0, $vr1, $vr0 ; CHECK-NEXT: vstelm.b $vr0, $a1, 0, 0 ; CHECK-NEXT: ret %v = load <4 x i8>, ptr %src @@ -62,8 +62,8 @@ define void @vec_reduce_xor_v2i8(ptr %src, ptr %dst) nounwind { ; CHECK: # %bb.0: ; CHECK-NEXT: ld.h $a0, $a0, 0 ; CHECK-NEXT: vinsgr2vr.h $vr0, $a0, 0 -; CHECK-NEXT: vreplvei.b $vr1, $vr0, 1 -; CHECK-NEXT: vxor.v $vr0, $vr0, $vr1 +; CHECK-NEXT: vbsrl.v $vr1, $vr0, 1 +; CHECK-NEXT: vxor.v $vr0, $vr1, $vr0 ; CHECK-NEXT: vstelm.b $vr0, $a1, 0, 0 ; CHECK-NEXT: ret %v = load <2 x i8>, ptr %src @@ -77,11 +77,11 @@ define void @vec_reduce_xor_v8i16(ptr %src, ptr %dst) nounwind { ; CHECK: # %bb.0: ; CHECK-NEXT: vld $vr0, $a0, 0 ; CHECK-NEXT: vbsrl.v $vr1, $vr0, 8 -; CHECK-NEXT: vxor.v $vr0, $vr0, $vr1 -; CHECK-NEXT: vshuf4i.h $vr1, $vr0, 14 -; CHECK-NEXT: vxor.v $vr0, $vr0, $vr1 -; CHECK-NEXT: vreplvei.h $vr1, $vr0, 1 -; CHECK-NEXT: vxor.v $vr0, $vr0, $vr1 +; CHECK-NEXT: vxor.v $vr0, $vr1, $vr0 +; CHECK-NEXT: vbsrl.v $vr1, $vr0, 4 +; CHECK-NEXT: vxor.v $vr0, $vr1, $vr0 +; CHECK-NEXT: vbsrl.v $vr1, $vr0, 2 +; CHECK-NEXT: vxor.v $vr0, $vr1, $vr0 ; CHECK-NEXT: vstelm.h $vr0, $a1, 0, 0 ; CHECK-NEXT: ret %v = load <8 x i16>, ptr %src @@ -95,10 +95,10 @@ define void @vec_reduce_xor_v4i16(ptr %src, ptr %dst) nounwind { ; CHECK: # %bb.0: ; CHECK-NEXT: ld.d $a0, $a0, 0 ; CHECK-NEXT: vinsgr2vr.d $vr0, $a0, 0 -; CHECK-NEXT: vshuf4i.h $vr1, $vr0, 14 -; CHECK-NEXT: vxor.v $vr0, $vr0, $vr1 -; CHECK-NEXT: vreplvei.h $vr1, $vr0, 1 -; CHECK-NEXT: vxor.v $vr0, $vr0, $vr1 +; CHECK-NEXT: vbsrl.v $vr1, $vr0, 4 +; CHECK-NEXT: vxor.v $vr0, $vr1, $vr0 +; CHECK-NEXT: vbsrl.v $vr1, $vr0, 2 +; CHECK-NEXT: vxor.v $vr0, $vr1, $vr0 ; CHECK-NEXT: vstelm.h $vr0, $a1, 0, 0 ; CHECK-NEXT: ret %v = load <4 x i16>, ptr %src @@ -112,8 +112,8 @@ define void @vec_reduce_xor_v2i16(ptr %src, ptr %dst) nounwind { ; CHECK: # %bb.0: ; CHECK-NEXT: ld.w $a0, $a0, 0 ; CHECK-NEXT: vinsgr2vr.w $vr0, $a0, 0 -; CHECK-NEXT: vreplvei.h $vr1, $vr0, 1 -; CHECK-NEXT: vxor.v $vr0, $vr0, $vr1 +; CHECK-NEXT: vbsrl.v $vr1, $vr0, 2 +; CHECK-NEXT: vxor.v $vr0, $vr1, $vr0 ; CHECK-NEXT: vstelm.h $vr0, $a1, 0, 0 ; CHECK-NEXT: ret %v = load <2 x i16>, ptr %src @@ -126,10 +126,10 @@ define void @vec_reduce_xor_v4i32(ptr %src, ptr %dst) nounwind { ; CHECK-LABEL: vec_reduce_xor_v4i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vld $vr0, $a0, 0 -; CHECK-NEXT: vshuf4i.w $vr1, $vr0, 14 -; CHECK-NEXT: vxor.v $vr0, $vr0, $vr1 -; CHECK-NEXT: vreplvei.w $vr1, $vr0, 1 -; CHECK-NEXT: vxor.v $vr0, $vr0, $vr1 +; CHECK-NEXT: vbsrl.v $vr1, $vr0, 8 +; CHECK-NEXT: vxor.v $vr0, $vr1, $vr0 +; CHECK-NEXT: vbsrl.v $vr1, $vr0, 4 +; CHECK-NEXT: vxor.v $vr0, $vr1, $vr0 ; CHECK-NEXT: vstelm.w $vr0, $a1, 0, 0 ; CHECK-NEXT: ret %v = load <4 x i32>, ptr %src @@ -143,8 +143,8 @@ define void @vec_reduce_xor_v2i32(ptr %src, ptr %dst) nounwind { ; CHECK: # %bb.0: ; CHECK-NEXT: ld.d $a0, $a0, 0 ; CHECK-NEXT: vinsgr2vr.d $vr0, $a0, 0 -; CHECK-NEXT: vreplvei.w $vr1, $vr0, 1 -; CHECK-NEXT: vxor.v $vr0, $vr0, $vr1 +; CHECK-NEXT: vbsrl.v $vr1, $vr0, 4 +; CHECK-NEXT: vxor.v $vr0, $vr1, $vr0 ; CHECK-NEXT: vstelm.w $vr0, $a1, 0, 0 ; CHECK-NEXT: ret %v = load <2 x i32>, ptr %src @@ -157,8 +157,8 @@ define void @vec_reduce_xor_v2i64(ptr %src, ptr %dst) nounwind { ; CHECK-LABEL: vec_reduce_xor_v2i64: ; CHECK: # %bb.0: ; CHECK-NEXT: vld $vr0, $a0, 0 -; CHECK-NEXT: vreplvei.d $vr1, $vr0, 1 -; CHECK-NEXT: vxor.v $vr0, $vr0, $vr1 +; CHECK-NEXT: vbsrl.v $vr1, $vr0, 8 +; CHECK-NEXT: vxor.v $vr0, $vr1, $vr0 ; CHECK-NEXT: vstelm.d $vr0, $a1, 0, 0 ; CHECK-NEXT: ret %v = load <2 x i64>, ptr %src diff --git a/llvm/test/CodeGen/LoongArch/lsx/vec-shuffle-bit-shift.ll b/llvm/test/CodeGen/LoongArch/lsx/vec-shuffle-bit-shift.ll index 48f18a35a38c..efb6b8632f95 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/vec-shuffle-bit-shift.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/vec-shuffle-bit-shift.ll @@ -1,4 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s | FileCheck %s ; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s define <16 x i8> @shuffle_to_vslli_h_8(<16 x i8> %a) nounwind { diff --git a/llvm/test/CodeGen/LoongArch/lsx/vec-shuffle-sign-ext.ll b/llvm/test/CodeGen/LoongArch/lsx/vec-shuffle-sign-ext.ll index 7e9f5b653d01..5e0ff9a07585 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/vec-shuffle-sign-ext.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/vec-shuffle-sign-ext.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 -; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s | FileCheck %s --check-prefixes=CHECK,LA32 +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s --check-prefixes=CHECK,LA64 define void @shuffle_sign_ext_2i8_to_2i64(ptr %ptr, ptr %dst) nounwind { ; CHECK-LABEL: shuffle_sign_ext_2i8_to_2i64: @@ -37,14 +38,24 @@ define void @shuffle_sign_ext_2i16_to_2i64(ptr %ptr, ptr %dst) nounwind { } define void @shuffle_sign_ext_2i32_to_2i64(ptr %ptr, ptr %dst) nounwind { -; CHECK-LABEL: shuffle_sign_ext_2i32_to_2i64: -; CHECK: # %bb.0: -; CHECK-NEXT: ld.d $a0, $a0, 0 -; CHECK-NEXT: vinsgr2vr.d $vr0, $a0, 0 -; CHECK-NEXT: vrepli.b $vr1, 0 -; CHECK-NEXT: vilvl.w $vr0, $vr1, $vr0 -; CHECK-NEXT: vst $vr0, $a1, 0 -; CHECK-NEXT: ret +; LA32-LABEL: shuffle_sign_ext_2i32_to_2i64: +; LA32: # %bb.0: +; LA32-NEXT: ld.w $a2, $a0, 0 +; LA32-NEXT: ld.w $a0, $a0, 4 +; LA32-NEXT: vrepli.b $vr0, 0 +; LA32-NEXT: vinsgr2vr.w $vr0, $a2, 0 +; LA32-NEXT: vinsgr2vr.w $vr0, $a0, 2 +; LA32-NEXT: vst $vr0, $a1, 0 +; LA32-NEXT: ret +; +; LA64-LABEL: shuffle_sign_ext_2i32_to_2i64: +; LA64: # %bb.0: +; LA64-NEXT: ld.d $a0, $a0, 0 +; LA64-NEXT: vinsgr2vr.d $vr0, $a0, 0 +; LA64-NEXT: vrepli.b $vr1, 0 +; LA64-NEXT: vilvl.w $vr0, $vr1, $vr0 +; LA64-NEXT: vst $vr0, $a1, 0 +; LA64-NEXT: ret %x = load <2 x i32>, ptr %ptr %y = shufflevector <2 x i32> %x, <2 x i32> zeroinitializer, <4 x i32> <i32 0, i32 3, i32 1, i32 2> %r = bitcast <4 x i32> %y to <2 x i64> @@ -70,14 +81,25 @@ define void @shuffle_sign_ext_4i8_to_4i32(ptr %ptr, ptr %dst) nounwind { } define void @shuffle_sign_ext_4i16_to_4i32(ptr %ptr, ptr %dst) nounwind { -; CHECK-LABEL: shuffle_sign_ext_4i16_to_4i32: -; CHECK: # %bb.0: -; CHECK-NEXT: ld.d $a0, $a0, 0 -; CHECK-NEXT: vinsgr2vr.d $vr0, $a0, 0 -; CHECK-NEXT: vrepli.b $vr1, 0 -; CHECK-NEXT: vilvl.h $vr0, $vr1, $vr0 -; CHECK-NEXT: vst $vr0, $a1, 0 -; CHECK-NEXT: ret +; LA32-LABEL: shuffle_sign_ext_4i16_to_4i32: +; LA32: # %bb.0: +; LA32-NEXT: ld.w $a2, $a0, 0 +; LA32-NEXT: ld.w $a0, $a0, 4 +; LA32-NEXT: vinsgr2vr.w $vr0, $a2, 0 +; LA32-NEXT: vinsgr2vr.w $vr0, $a0, 1 +; LA32-NEXT: vrepli.b $vr1, 0 +; LA32-NEXT: vilvl.h $vr0, $vr1, $vr0 +; LA32-NEXT: vst $vr0, $a1, 0 +; LA32-NEXT: ret +; +; LA64-LABEL: shuffle_sign_ext_4i16_to_4i32: +; LA64: # %bb.0: +; LA64-NEXT: ld.d $a0, $a0, 0 +; LA64-NEXT: vinsgr2vr.d $vr0, $a0, 0 +; LA64-NEXT: vrepli.b $vr1, 0 +; LA64-NEXT: vilvl.h $vr0, $vr1, $vr0 +; LA64-NEXT: vst $vr0, $a1, 0 +; LA64-NEXT: ret %x = load <4 x i16>, ptr %ptr %y = shufflevector <4 x i16> %x, <4 x i16> zeroinitializer, <8 x i32> <i32 0, i32 7, i32 1, i32 6, i32 2, i32 5, i32 3, i32 4> %r = bitcast <8 x i16> %y to <4 x i32> @@ -86,14 +108,25 @@ define void @shuffle_sign_ext_4i16_to_4i32(ptr %ptr, ptr %dst) nounwind { } define void @shuffle_sign_ext_8i8_to_8i16(ptr %ptr, ptr %dst) nounwind { -; CHECK-LABEL: shuffle_sign_ext_8i8_to_8i16: -; CHECK: # %bb.0: -; CHECK-NEXT: ld.d $a0, $a0, 0 -; CHECK-NEXT: vinsgr2vr.d $vr0, $a0, 0 -; CHECK-NEXT: vrepli.b $vr1, 0 -; CHECK-NEXT: vilvl.b $vr0, $vr1, $vr0 -; CHECK-NEXT: vst $vr0, $a1, 0 -; CHECK-NEXT: ret +; LA32-LABEL: shuffle_sign_ext_8i8_to_8i16: +; LA32: # %bb.0: +; LA32-NEXT: ld.w $a2, $a0, 0 +; LA32-NEXT: ld.w $a0, $a0, 4 +; LA32-NEXT: vinsgr2vr.w $vr0, $a2, 0 +; LA32-NEXT: vinsgr2vr.w $vr0, $a0, 1 +; LA32-NEXT: vrepli.b $vr1, 0 +; LA32-NEXT: vilvl.b $vr0, $vr1, $vr0 +; LA32-NEXT: vst $vr0, $a1, 0 +; LA32-NEXT: ret +; +; LA64-LABEL: shuffle_sign_ext_8i8_to_8i16: +; LA64: # %bb.0: +; LA64-NEXT: ld.d $a0, $a0, 0 +; LA64-NEXT: vinsgr2vr.d $vr0, $a0, 0 +; LA64-NEXT: vrepli.b $vr1, 0 +; LA64-NEXT: vilvl.b $vr0, $vr1, $vr0 +; LA64-NEXT: vst $vr0, $a1, 0 +; LA64-NEXT: ret %x = load <8 x i8>, ptr %ptr %y = shufflevector <8 x i8> %x, <8 x i8> zeroinitializer, <16 x i32> <i32 0, i32 15, i32 1, i32 14, i32 2, i32 13, i32 3, i32 12, i32 4, i32 11, i32 5, i32 10, i32 6, i32 9, i32 7, i32 8> %r = bitcast <16 x i8> %y to <8 x i16> diff --git a/llvm/test/CodeGen/LoongArch/lsx/vec-zext.ll b/llvm/test/CodeGen/LoongArch/lsx/vec-zext.ll index bbcfbe1b0726..602c0f1a5a91 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/vec-zext.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/vec-zext.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 -; RUN: llc --mtriple=loongarch64 --mattr=+lsx %s -o - | FileCheck %s +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s | FileCheck %s --check-prefixes=CHECK,LA32 +; RUN: llc --mtriple=loongarch64 --mattr=+lsx %s -o - | FileCheck %s --check-prefixes=CHECK,LA64 define void @load_zext_2i8_to_2i64(ptr %ptr, ptr %dst) { @@ -38,14 +39,25 @@ entry: } define void @load_zext_8i8_to_8i16(ptr %ptr, ptr %dst) { -; CHECK-LABEL: load_zext_8i8_to_8i16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: ld.d $a0, $a0, 0 -; CHECK-NEXT: vinsgr2vr.d $vr0, $a0, 0 -; CHECK-NEXT: vrepli.b $vr1, 0 -; CHECK-NEXT: vilvl.b $vr0, $vr1, $vr0 -; CHECK-NEXT: vst $vr0, $a1, 0 -; CHECK-NEXT: ret +; LA32-LABEL: load_zext_8i8_to_8i16: +; LA32: # %bb.0: # %entry +; LA32-NEXT: ld.w $a2, $a0, 0 +; LA32-NEXT: ld.w $a0, $a0, 4 +; LA32-NEXT: vinsgr2vr.w $vr0, $a2, 0 +; LA32-NEXT: vinsgr2vr.w $vr0, $a0, 1 +; LA32-NEXT: vrepli.b $vr1, 0 +; LA32-NEXT: vilvl.b $vr0, $vr1, $vr0 +; LA32-NEXT: vst $vr0, $a1, 0 +; LA32-NEXT: ret +; +; LA64-LABEL: load_zext_8i8_to_8i16: +; LA64: # %bb.0: # %entry +; LA64-NEXT: ld.d $a0, $a0, 0 +; LA64-NEXT: vinsgr2vr.d $vr0, $a0, 0 +; LA64-NEXT: vrepli.b $vr1, 0 +; LA64-NEXT: vilvl.b $vr0, $vr1, $vr0 +; LA64-NEXT: vst $vr0, $a1, 0 +; LA64-NEXT: ret entry: %A = load <8 x i8>, ptr %ptr %B = zext <8 x i8> %A to <8 x i16> @@ -71,14 +83,25 @@ entry: } define void @load_zext_4i16_to_4i32(ptr %ptr, ptr %dst) { -; CHECK-LABEL: load_zext_4i16_to_4i32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: ld.d $a0, $a0, 0 -; CHECK-NEXT: vinsgr2vr.d $vr0, $a0, 0 -; CHECK-NEXT: vrepli.b $vr1, 0 -; CHECK-NEXT: vilvl.h $vr0, $vr1, $vr0 -; CHECK-NEXT: vst $vr0, $a1, 0 -; CHECK-NEXT: ret +; LA32-LABEL: load_zext_4i16_to_4i32: +; LA32: # %bb.0: # %entry +; LA32-NEXT: ld.w $a2, $a0, 0 +; LA32-NEXT: ld.w $a0, $a0, 4 +; LA32-NEXT: vinsgr2vr.w $vr0, $a2, 0 +; LA32-NEXT: vinsgr2vr.w $vr0, $a0, 1 +; LA32-NEXT: vrepli.b $vr1, 0 +; LA32-NEXT: vilvl.h $vr0, $vr1, $vr0 +; LA32-NEXT: vst $vr0, $a1, 0 +; LA32-NEXT: ret +; +; LA64-LABEL: load_zext_4i16_to_4i32: +; LA64: # %bb.0: # %entry +; LA64-NEXT: ld.d $a0, $a0, 0 +; LA64-NEXT: vinsgr2vr.d $vr0, $a0, 0 +; LA64-NEXT: vrepli.b $vr1, 0 +; LA64-NEXT: vilvl.h $vr0, $vr1, $vr0 +; LA64-NEXT: vst $vr0, $a1, 0 +; LA64-NEXT: ret entry: %A = load <4 x i16>, ptr %ptr %B = zext <4 x i16> %A to <4 x i32> @@ -87,14 +110,24 @@ entry: } define void @load_zext_2i32_to_2i64(ptr %ptr, ptr %dst) { -; CHECK-LABEL: load_zext_2i32_to_2i64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: ld.d $a0, $a0, 0 -; CHECK-NEXT: vinsgr2vr.d $vr0, $a0, 0 -; CHECK-NEXT: vrepli.b $vr1, 0 -; CHECK-NEXT: vilvl.w $vr0, $vr1, $vr0 -; CHECK-NEXT: vst $vr0, $a1, 0 -; CHECK-NEXT: ret +; LA32-LABEL: load_zext_2i32_to_2i64: +; LA32: # %bb.0: # %entry +; LA32-NEXT: ld.w $a2, $a0, 0 +; LA32-NEXT: ld.w $a0, $a0, 4 +; LA32-NEXT: vrepli.b $vr0, 0 +; LA32-NEXT: vinsgr2vr.w $vr0, $a2, 0 +; LA32-NEXT: vinsgr2vr.w $vr0, $a0, 2 +; LA32-NEXT: vst $vr0, $a1, 0 +; LA32-NEXT: ret +; +; LA64-LABEL: load_zext_2i32_to_2i64: +; LA64: # %bb.0: # %entry +; LA64-NEXT: ld.d $a0, $a0, 0 +; LA64-NEXT: vinsgr2vr.d $vr0, $a0, 0 +; LA64-NEXT: vrepli.b $vr1, 0 +; LA64-NEXT: vilvl.w $vr0, $vr1, $vr0 +; LA64-NEXT: vst $vr0, $a1, 0 +; LA64-NEXT: ret entry: %A = load <2 x i32>, ptr %ptr %B = zext <2 x i32> %A to <2 x i64> diff --git a/llvm/test/CodeGen/LoongArch/lsx/vmskcond.ll b/llvm/test/CodeGen/LoongArch/lsx/vmskcond.ll index ad57bbf9ee5c..7fa591db5d1f 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/vmskcond.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/vmskcond.ll @@ -603,3 +603,207 @@ define i4 @vmsk_eq_allzeros_v4i8(<4 x i8> %a) { %2 = bitcast <4 x i1> %1 to i4 ret i4 %2 } + +define i32 @vmsk2_eq_allzeros_i8(<32 x i8> %a) { +; CHECK-LABEL: vmsk2_eq_allzeros_i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vseqi.b $vr0, $vr0, 0 +; CHECK-NEXT: vmskltz.b $vr0, $vr0 +; CHECK-NEXT: vpickve2gr.hu $a0, $vr0, 0 +; CHECK-NEXT: vseqi.b $vr0, $vr1, 0 +; CHECK-NEXT: vmskltz.b $vr0, $vr0 +; CHECK-NEXT: vpickve2gr.hu $a1, $vr0, 0 +; CHECK-NEXT: slli.d $a1, $a1, 16 +; CHECK-NEXT: or $a0, $a0, $a1 +; CHECK-NEXT: ret +entry: + %1 = icmp eq <32 x i8> %a, splat (i8 0) + %2 = bitcast <32 x i1> %1 to i32 + ret i32 %2 +} + +define i32 @vmsk2_sgt_allzeros_i8(<32 x i8> %a) { +; CHECK-LABEL: vmsk2_sgt_allzeros_i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vrepli.b $vr2, 0 +; CHECK-NEXT: vslt.b $vr0, $vr2, $vr0 +; CHECK-NEXT: vmskltz.b $vr0, $vr0 +; CHECK-NEXT: vpickve2gr.hu $a0, $vr0, 0 +; CHECK-NEXT: vslt.b $vr0, $vr2, $vr1 +; CHECK-NEXT: vmskltz.b $vr0, $vr0 +; CHECK-NEXT: vpickve2gr.hu $a1, $vr0, 0 +; CHECK-NEXT: slli.d $a1, $a1, 16 +; CHECK-NEXT: or $a0, $a0, $a1 +; CHECK-NEXT: ret +entry: + %1 = icmp sgt <32 x i8> %a, splat (i8 0) + %2 = bitcast <32 x i1> %1 to i32 + ret i32 %2 +} + +define i32 @vmsk2_sgt_allones_i8(<32 x i8> %a) { +; CHECK-LABEL: vmsk2_sgt_allones_i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vrepli.b $vr2, -1 +; CHECK-NEXT: vslt.b $vr0, $vr2, $vr0 +; CHECK-NEXT: vmskltz.b $vr0, $vr0 +; CHECK-NEXT: vpickve2gr.hu $a0, $vr0, 0 +; CHECK-NEXT: vslt.b $vr0, $vr2, $vr1 +; CHECK-NEXT: vmskltz.b $vr0, $vr0 +; CHECK-NEXT: vpickve2gr.hu $a1, $vr0, 0 +; CHECK-NEXT: slli.d $a1, $a1, 16 +; CHECK-NEXT: or $a0, $a0, $a1 +; CHECK-NEXT: ret +entry: + %1 = icmp sgt <32 x i8> %a, splat (i8 -1) + %2 = bitcast <32 x i1> %1 to i32 + ret i32 %2 +} + +define i32 @vmsk2_sge_allzeros_i8(<32 x i8> %a) { +; CHECK-LABEL: vmsk2_sge_allzeros_i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vrepli.b $vr2, 0 +; CHECK-NEXT: vsle.b $vr0, $vr2, $vr0 +; CHECK-NEXT: vmskltz.b $vr0, $vr0 +; CHECK-NEXT: vpickve2gr.hu $a0, $vr0, 0 +; CHECK-NEXT: vsle.b $vr0, $vr2, $vr1 +; CHECK-NEXT: vmskltz.b $vr0, $vr0 +; CHECK-NEXT: vpickve2gr.hu $a1, $vr0, 0 +; CHECK-NEXT: slli.d $a1, $a1, 16 +; CHECK-NEXT: or $a0, $a0, $a1 +; CHECK-NEXT: ret +entry: + %1 = icmp sge <32 x i8> %a, splat (i8 0) + %2 = bitcast <32 x i1> %1 to i32 + ret i32 %2 +} + +define i32 @vmsk2_slt_allzeros_i8(<32 x i8> %a) { +; CHECK-LABEL: vmsk2_slt_allzeros_i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmskltz.b $vr0, $vr0 +; CHECK-NEXT: vpickve2gr.hu $a0, $vr0, 0 +; CHECK-NEXT: vmskltz.b $vr0, $vr1 +; CHECK-NEXT: vpickve2gr.hu $a1, $vr0, 0 +; CHECK-NEXT: slli.d $a1, $a1, 16 +; CHECK-NEXT: or $a0, $a0, $a1 +; CHECK-NEXT: ret +entry: + %1 = icmp slt <32 x i8> %a, splat (i8 0) + %2 = bitcast <32 x i1> %1 to i32 + ret i32 %2 +} + +define i32 @vmsk2_sle_allzeros_i8(<32 x i8> %a) { +; CHECK-LABEL: vmsk2_sle_allzeros_i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vslei.b $vr0, $vr0, 0 +; CHECK-NEXT: vmskltz.b $vr0, $vr0 +; CHECK-NEXT: vpickve2gr.hu $a0, $vr0, 0 +; CHECK-NEXT: vslei.b $vr0, $vr1, 0 +; CHECK-NEXT: vmskltz.b $vr0, $vr0 +; CHECK-NEXT: vpickve2gr.hu $a1, $vr0, 0 +; CHECK-NEXT: slli.d $a1, $a1, 16 +; CHECK-NEXT: or $a0, $a0, $a1 +; CHECK-NEXT: ret +entry: + %1 = icmp sle <32 x i8> %a, splat (i8 0) + %2 = bitcast <32 x i1> %1 to i32 + ret i32 %2 +} + +define i32 @vmsk2_sle_allones_i8(<32 x i8> %a) { +; CHECK-LABEL: vmsk2_sle_allones_i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vslei.b $vr0, $vr0, -1 +; CHECK-NEXT: vmskltz.b $vr0, $vr0 +; CHECK-NEXT: vpickve2gr.hu $a0, $vr0, 0 +; CHECK-NEXT: vslei.b $vr0, $vr1, -1 +; CHECK-NEXT: vmskltz.b $vr0, $vr0 +; CHECK-NEXT: vpickve2gr.hu $a1, $vr0, 0 +; CHECK-NEXT: slli.d $a1, $a1, 16 +; CHECK-NEXT: or $a0, $a0, $a1 +; CHECK-NEXT: ret +entry: + %1 = icmp sle <32 x i8> %a, splat (i8 -1) + %2 = bitcast <32 x i1> %1 to i32 + ret i32 %2 +} + +define i32 @vmsk2_ne_allzeros_i8(<32 x i8> %a) { +; CHECK-LABEL: vmsk2_ne_allzeros_i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vseqi.b $vr0, $vr0, 0 +; CHECK-NEXT: vxori.b $vr0, $vr0, 255 +; CHECK-NEXT: vmskltz.b $vr0, $vr0 +; CHECK-NEXT: vpickve2gr.hu $a0, $vr0, 0 +; CHECK-NEXT: vseqi.b $vr0, $vr1, 0 +; CHECK-NEXT: vxori.b $vr0, $vr0, 255 +; CHECK-NEXT: vmskltz.b $vr0, $vr0 +; CHECK-NEXT: vpickve2gr.hu $a1, $vr0, 0 +; CHECK-NEXT: slli.d $a1, $a1, 16 +; CHECK-NEXT: or $a0, $a0, $a1 +; CHECK-NEXT: ret +entry: + %1 = icmp ne <32 x i8> %a, splat (i8 0) + %2 = bitcast <32 x i1> %1 to i32 + ret i32 %2 +} + +define i32 @vmsk2_sgt_v32i8(<32 x i8> %a, <32 x i8> %b) { +; CHECK-LABEL: vmsk2_sgt_v32i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vslt.b $vr0, $vr2, $vr0 +; CHECK-NEXT: vmskltz.b $vr0, $vr0 +; CHECK-NEXT: vpickve2gr.hu $a0, $vr0, 0 +; CHECK-NEXT: vslt.b $vr0, $vr3, $vr1 +; CHECK-NEXT: vmskltz.b $vr0, $vr0 +; CHECK-NEXT: vpickve2gr.hu $a1, $vr0, 0 +; CHECK-NEXT: slli.d $a1, $a1, 16 +; CHECK-NEXT: or $a0, $a0, $a1 +; CHECK-NEXT: ret + %x = icmp sgt <32 x i8> %a, %b + %res = bitcast <32 x i1> %x to i32 + ret i32 %res +} + +define i32 @vmsk2_sgt_and_sgt_v32i8(<32 x i8> %a, <32 x i8> %b, <32 x i8> %c, <32 x i8> %d) { +; CHECK-LABEL: vmsk2_sgt_and_sgt_v32i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vslt.b $vr0, $vr2, $vr0 +; CHECK-NEXT: vslt.b $vr1, $vr3, $vr1 +; CHECK-NEXT: vslt.b $vr2, $vr6, $vr4 +; CHECK-NEXT: vslt.b $vr3, $vr7, $vr5 +; CHECK-NEXT: vand.v $vr1, $vr1, $vr3 +; CHECK-NEXT: vand.v $vr0, $vr0, $vr2 +; CHECK-NEXT: vmskltz.b $vr0, $vr0 +; CHECK-NEXT: vpickve2gr.hu $a0, $vr0, 0 +; CHECK-NEXT: vmskltz.b $vr0, $vr1 +; CHECK-NEXT: vpickve2gr.hu $a1, $vr0, 0 +; CHECK-NEXT: slli.d $a1, $a1, 16 +; CHECK-NEXT: or $a0, $a0, $a1 +; CHECK-NEXT: ret + %x0 = icmp sgt <32 x i8> %a, %b + %x1 = icmp sgt <32 x i8> %c, %d + %y = and <32 x i1> %x0, %x1 + %res = bitcast <32 x i1> %y to i32 + ret i32 %res +} + +define i32 @vmsk2_trunc_i8(<32 x i8> %a) { +; CHECK-LABEL: vmsk2_trunc_i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vslli.b $vr0, $vr0, 7 +; CHECK-NEXT: vmskltz.b $vr0, $vr0 +; CHECK-NEXT: vpickve2gr.hu $a0, $vr0, 0 +; CHECK-NEXT: vslli.b $vr0, $vr1, 7 +; CHECK-NEXT: vmskltz.b $vr0, $vr0 +; CHECK-NEXT: vpickve2gr.hu $a1, $vr0, 0 +; CHECK-NEXT: slli.d $a1, $a1, 16 +; CHECK-NEXT: or $a0, $a0, $a1 +; CHECK-NEXT: ret + %y = trunc <32 x i8> %a to <32 x i1> + %res = bitcast <32 x i1> %y to i32 + ret i32 %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/vselect.ll b/llvm/test/CodeGen/LoongArch/lsx/vselect.ll index 4d2ddeb2889b..5dbff4a402b3 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/vselect.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/vselect.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s | FileCheck %s --check-prefixes=CHECK,LA32 +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s --check-prefixes=CHECK,LA64 define void @select_v16i8_imm(ptr %res, ptr %a0) nounwind { ; CHECK-LABEL: select_v16i8_imm: @@ -49,16 +50,26 @@ define void @select_v8i16(ptr %res, ptr %a0, ptr %a1) nounwind { } define void @select_v4i32(ptr %res, ptr %a0, ptr %a1) nounwind { -; CHECK-LABEL: select_v4i32: -; CHECK: # %bb.0: -; CHECK-NEXT: vld $vr0, $a1, 0 -; CHECK-NEXT: vld $vr1, $a2, 0 -; CHECK-NEXT: ori $a1, $zero, 0 -; CHECK-NEXT: lu32i.d $a1, -1 -; CHECK-NEXT: vreplgr2vr.d $vr2, $a1 -; CHECK-NEXT: vbitsel.v $vr0, $vr1, $vr0, $vr2 -; CHECK-NEXT: vst $vr0, $a0, 0 -; CHECK-NEXT: ret +; LA32-LABEL: select_v4i32: +; LA32: # %bb.0: +; LA32-NEXT: vld $vr0, $a1, 0 +; LA32-NEXT: vld $vr1, $a2, 0 +; LA32-NEXT: pcalau12i $a1, %pc_hi20(.LCPI3_0) +; LA32-NEXT: vld $vr2, $a1, %pc_lo12(.LCPI3_0) +; LA32-NEXT: vbitsel.v $vr0, $vr1, $vr0, $vr2 +; LA32-NEXT: vst $vr0, $a0, 0 +; LA32-NEXT: ret +; +; LA64-LABEL: select_v4i32: +; LA64: # %bb.0: +; LA64-NEXT: vld $vr0, $a1, 0 +; LA64-NEXT: vld $vr1, $a2, 0 +; LA64-NEXT: ori $a1, $zero, 0 +; LA64-NEXT: lu32i.d $a1, -1 +; LA64-NEXT: vreplgr2vr.d $vr2, $a1 +; LA64-NEXT: vbitsel.v $vr0, $vr1, $vr0, $vr2 +; LA64-NEXT: vst $vr0, $a0, 0 +; LA64-NEXT: ret %v0 = load <4 x i32>, ptr %a0 %v1 = load <4 x i32>, ptr %a1 %sel = select <4 x i1> <i1 false, i1 true, i1 false, i1 true>, <4 x i32> %v0, <4 x i32> %v1 diff --git a/llvm/test/CodeGen/LoongArch/merge-base-offset-tlsle.ll b/llvm/test/CodeGen/LoongArch/merge-base-offset-tlsle.ll index 9ed9a865ce55..97d33379913e 100644 --- a/llvm/test/CodeGen/LoongArch/merge-base-offset-tlsle.ll +++ b/llvm/test/CodeGen/LoongArch/merge-base-offset-tlsle.ll @@ -630,8 +630,7 @@ define dso_local void @tlsle_control_flow_with_mem_access() nounwind { ; LA32-NEXT: lu12i.w $a0, %le_hi20_r(g_a32+4) ; LA32-NEXT: add.w $a0, $a0, $tp, %le_add_r(g_a32+4) ; LA32-NEXT: ld.w $a1, $a0, %le_lo12_r(g_a32+4) -; LA32-NEXT: ori $a2, $zero, 1 -; LA32-NEXT: blt $a1, $a2, .LBB25_2 +; LA32-NEXT: blez $a1, .LBB25_2 ; LA32-NEXT: # %bb.1: # %if.then ; LA32-NEXT: ori $a1, $zero, 10 ; LA32-NEXT: st.w $a1, $a0, %le_lo12_r(g_a32+4) @@ -643,8 +642,7 @@ define dso_local void @tlsle_control_flow_with_mem_access() nounwind { ; LA64-NEXT: lu12i.w $a0, %le_hi20_r(g_a32+4) ; LA64-NEXT: add.d $a0, $a0, $tp, %le_add_r(g_a32+4) ; LA64-NEXT: ld.w $a1, $a0, %le_lo12_r(g_a32+4) -; LA64-NEXT: ori $a2, $zero, 1 -; LA64-NEXT: blt $a1, $a2, .LBB25_2 +; LA64-NEXT: blez $a1, .LBB25_2 ; LA64-NEXT: # %bb.1: # %if.then ; LA64-NEXT: ori $a1, $zero, 10 ; LA64-NEXT: st.w $a1, $a0, %le_lo12_r(g_a32+4) diff --git a/llvm/test/CodeGen/LoongArch/merge-base-offset.ll b/llvm/test/CodeGen/LoongArch/merge-base-offset.ll index 2af206699d4a..1151c77c9af7 100644 --- a/llvm/test/CodeGen/LoongArch/merge-base-offset.ll +++ b/llvm/test/CodeGen/LoongArch/merge-base-offset.ll @@ -811,8 +811,7 @@ define dso_local void @control_flow_with_mem_access() nounwind { ; LA32: # %bb.0: # %entry ; LA32-NEXT: pcalau12i $a0, %pc_hi20(g_a32+4) ; LA32-NEXT: ld.w $a1, $a0, %pc_lo12(g_a32+4) -; LA32-NEXT: ori $a2, $zero, 1 -; LA32-NEXT: blt $a1, $a2, .LBB25_2 +; LA32-NEXT: blez $a1, .LBB25_2 ; LA32-NEXT: # %bb.1: # %if.then ; LA32-NEXT: ori $a1, $zero, 10 ; LA32-NEXT: st.w $a1, $a0, %pc_lo12(g_a32+4) @@ -823,8 +822,7 @@ define dso_local void @control_flow_with_mem_access() nounwind { ; LA64: # %bb.0: # %entry ; LA64-NEXT: pcalau12i $a0, %pc_hi20(g_a32+4) ; LA64-NEXT: ld.w $a1, $a0, %pc_lo12(g_a32+4) -; LA64-NEXT: ori $a2, $zero, 1 -; LA64-NEXT: blt $a1, $a2, .LBB25_2 +; LA64-NEXT: blez $a1, .LBB25_2 ; LA64-NEXT: # %bb.1: # %if.then ; LA64-NEXT: ori $a1, $zero, 10 ; LA64-NEXT: st.w $a1, $a0, %pc_lo12(g_a32+4) @@ -838,8 +836,7 @@ define dso_local void @control_flow_with_mem_access() nounwind { ; LA64-LARGE-NEXT: lu32i.d $a1, %pc64_lo20(g_a32+4) ; LA64-LARGE-NEXT: lu52i.d $a1, $a1, %pc64_hi12(g_a32+4) ; LA64-LARGE-NEXT: ldx.w $a0, $a1, $a0 -; LA64-LARGE-NEXT: ori $a1, $zero, 1 -; LA64-LARGE-NEXT: blt $a0, $a1, .LBB25_2 +; LA64-LARGE-NEXT: blez $a0, .LBB25_2 ; LA64-LARGE-NEXT: # %bb.1: # %if.then ; LA64-LARGE-NEXT: pcalau12i $a0, %pc_hi20(g_a32+4) ; LA64-LARGE-NEXT: addi.d $a1, $zero, %pc_lo12(g_a32+4) diff --git a/llvm/test/CodeGen/LoongArch/opt-pipeline.ll b/llvm/test/CodeGen/LoongArch/opt-pipeline.ll index 90d994909264..661f67d4989c 100644 --- a/llvm/test/CodeGen/LoongArch/opt-pipeline.ll +++ b/llvm/test/CodeGen/LoongArch/opt-pipeline.ll @@ -20,9 +20,9 @@ ; LAXX-NEXT: Target Pass Configuration ; LAXX-NEXT: Machine Module Information ; LAXX-NEXT: Target Transform Information +; LAXX-NEXT: Assumption Cache Tracker ; LAXX-NEXT: Type-Based Alias Analysis ; LAXX-NEXT: Scoped NoAlias Alias Analysis -; LAXX-NEXT: Assumption Cache Tracker ; LAXX-NEXT: Profile summary info ; LAXX-NEXT: Create Garbage Collector Module Metadata ; LAXX-NEXT: Machine Branch Probability Analysis diff --git a/llvm/test/CodeGen/LoongArch/preferred-alignments.ll b/llvm/test/CodeGen/LoongArch/preferred-alignments.ll index c3618db64601..0f81f860025d 100644 --- a/llvm/test/CodeGen/LoongArch/preferred-alignments.ll +++ b/llvm/test/CodeGen/LoongArch/preferred-alignments.ll @@ -5,10 +5,9 @@ define signext i32 @sum(ptr noalias nocapture noundef readonly %0, i32 noundef signext %1) { ; LA464-LABEL: sum: ; LA464: # %bb.0: -; LA464-NEXT: ori $a2, $zero, 1 -; LA464-NEXT: blt $a1, $a2, .LBB0_4 -; LA464-NEXT: # %bb.1: ; LA464-NEXT: move $a2, $zero +; LA464-NEXT: blez $a1, .LBB0_3 +; LA464-NEXT: # %bb.1: ; LA464-NEXT: bstrpick.d $a1, $a1, 31, 0 ; LA464-NEXT: .p2align 4, , 16 ; LA464-NEXT: .LBB0_2: # =>This Inner Loop Header: Depth=1 @@ -17,11 +16,7 @@ define signext i32 @sum(ptr noalias nocapture noundef readonly %0, i32 noundef s ; LA464-NEXT: addi.d $a1, $a1, -1 ; LA464-NEXT: addi.d $a0, $a0, 4 ; LA464-NEXT: bnez $a1, .LBB0_2 -; LA464-NEXT: # %bb.3: -; LA464-NEXT: move $a0, $a2 -; LA464-NEXT: ret -; LA464-NEXT: .LBB0_4: -; LA464-NEXT: move $a2, $zero +; LA464-NEXT: .LBB0_3: ; LA464-NEXT: move $a0, $a2 ; LA464-NEXT: ret %3 = icmp sgt i32 %1, 0 diff --git a/llvm/test/CodeGen/LoongArch/select-const.ll b/llvm/test/CodeGen/LoongArch/select-const.ll index e9506b3a8359..ec69f5187e4f 100644 --- a/llvm/test/CodeGen/LoongArch/select-const.ll +++ b/llvm/test/CodeGen/LoongArch/select-const.ll @@ -301,3 +301,25 @@ define i32 @select_ne_10001_10002(i32 signext %a, i32 signext %b) { %2 = select i1 %1, i32 10001, i32 10002 ret i32 %2 } + +define i32 @select_slt_zero_constant1_constant2(i32 signext %x) { +; LA32-LABEL: select_slt_zero_constant1_constant2: +; LA32: # %bb.0: +; LA32-NEXT: srai.w $a0, $a0, 31 +; LA32-NEXT: andi $a0, $a0, 10 +; LA32-NEXT: addi.w $a0, $a0, -3 +; LA32-NEXT: ret +; +; LA64-LABEL: select_slt_zero_constant1_constant2: +; LA64: # %bb.0: +; LA64-NEXT: slti $a0, $a0, 0 +; LA64-NEXT: addi.w $a1, $zero, -3 +; LA64-NEXT: masknez $a1, $a1, $a0 +; LA64-NEXT: ori $a2, $zero, 7 +; LA64-NEXT: maskeqz $a0, $a2, $a0 +; LA64-NEXT: or $a0, $a0, $a1 +; LA64-NEXT: ret + %cmp = icmp slt i32 %x, 0 + %cond = select i1 %cmp, i32 7, i32 -3 + ret i32 %cond +} |
