summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorQi Zhao <zhaoqi01@loongson.cn>2025-11-04 10:04:30 +0800
committerQi Zhao <zhaoqi01@loongson.cn>2025-11-04 10:13:30 +0800
commit54ef5a325369e589d0cc4c79203f2ac47db365a7 (patch)
tree4bf2b8b3137a16ce4077aba5261fb707e5f8d4fb
parentf7fff18ad09680056f028a99a961d4120063c55b (diff)
[LoongArch] Optimize for scalar type `ctpop` when lsx enabledusers/zhaoqi5/opt-scalar-ctpop-with-lsx
-rw-r--r--llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp60
-rw-r--r--llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td24
-rw-r--r--llvm/test/CodeGen/LoongArch/ctlz-cttz-ctpop.ll27
-rw-r--r--llvm/test/CodeGen/LoongArch/ctpop-with-lsx.ll87
-rw-r--r--llvm/test/CodeGen/LoongArch/sextw-removal.ll28
5 files changed, 112 insertions, 114 deletions
diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
index fe700e17d341..9c55ea35b34c 100644
--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
@@ -7038,29 +7038,40 @@ static MachineBasicBlock *emitPseudoCTPOP(MachineInstr &MI,
MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
Register Dst = MI.getOperand(0).getReg();
Register Src = MI.getOperand(1).getReg();
+
+ unsigned BroadcastOp, CTOp, PickOp;
+ switch (MI.getOpcode()) {
+ default:
+ llvm_unreachable("Unexpected opcode");
+ case LoongArch::PseudoCTPOP_B:
+ BroadcastOp = LoongArch::VREPLGR2VR_B;
+ CTOp = LoongArch::VPCNT_B;
+ PickOp = LoongArch::VPICKVE2GR_B;
+ break;
+ case LoongArch::PseudoCTPOP_H:
+ case LoongArch::PseudoCTPOP_H_LA32:
+ BroadcastOp = LoongArch::VREPLGR2VR_H;
+ CTOp = LoongArch::VPCNT_H;
+ PickOp = LoongArch::VPICKVE2GR_H;
+ break;
+ case LoongArch::PseudoCTPOP_W:
+ case LoongArch::PseudoCTPOP_W_LA32:
+ BroadcastOp = LoongArch::VREPLGR2VR_W;
+ CTOp = LoongArch::VPCNT_W;
+ PickOp = LoongArch::VPICKVE2GR_W;
+ break;
+ case LoongArch::PseudoCTPOP_D:
+ BroadcastOp = LoongArch::VREPLGR2VR_D;
+ CTOp = LoongArch::VPCNT_D;
+ PickOp = LoongArch::VPICKVE2GR_D;
+ break;
+ }
+
Register ScratchReg1 = MRI.createVirtualRegister(RC);
Register ScratchReg2 = MRI.createVirtualRegister(RC);
- Register ScratchReg3 = MRI.createVirtualRegister(RC);
-
- BuildMI(*BB, MI, DL, TII->get(LoongArch::VLDI), ScratchReg1).addImm(0);
- BuildMI(*BB, MI, DL,
- TII->get(Subtarget.is64Bit() ? LoongArch::VINSGR2VR_D
- : LoongArch::VINSGR2VR_W),
- ScratchReg2)
- .addReg(ScratchReg1)
- .addReg(Src)
- .addImm(0);
- BuildMI(
- *BB, MI, DL,
- TII->get(Subtarget.is64Bit() ? LoongArch::VPCNT_D : LoongArch::VPCNT_W),
- ScratchReg3)
- .addReg(ScratchReg2);
- BuildMI(*BB, MI, DL,
- TII->get(Subtarget.is64Bit() ? LoongArch::VPICKVE2GR_D
- : LoongArch::VPICKVE2GR_W),
- Dst)
- .addReg(ScratchReg3)
- .addImm(0);
+ BuildMI(*BB, MI, DL, TII->get(BroadcastOp), ScratchReg1).addReg(Src);
+ BuildMI(*BB, MI, DL, TII->get(CTOp), ScratchReg2).addReg(ScratchReg1);
+ BuildMI(*BB, MI, DL, TII->get(PickOp), Dst).addReg(ScratchReg2).addImm(0);
MI.eraseFromParent();
return BB;
@@ -7432,7 +7443,12 @@ MachineBasicBlock *LoongArchTargetLowering::EmitInstrWithCustomInserter(
case LoongArch::PseudoXVINSGR2VR_B:
case LoongArch::PseudoXVINSGR2VR_H:
return emitPseudoXVINSGR2VR(MI, BB, Subtarget);
- case LoongArch::PseudoCTPOP:
+ case LoongArch::PseudoCTPOP_B:
+ case LoongArch::PseudoCTPOP_H:
+ case LoongArch::PseudoCTPOP_W:
+ case LoongArch::PseudoCTPOP_D:
+ case LoongArch::PseudoCTPOP_H_LA32:
+ case LoongArch::PseudoCTPOP_W_LA32:
return emitPseudoCTPOP(MI, BB, Subtarget);
case LoongArch::PseudoVMSKLTZ_B:
case LoongArch::PseudoVMSKLTZ_H:
diff --git a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td
index 6b74a4b5e5f6..14543b3e1f5a 100644
--- a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td
+++ b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td
@@ -1271,9 +1271,27 @@ def PseudoVBZ_W : VecCond<loongarch_vall_zero, v4i32>;
def PseudoVBZ_D : VecCond<loongarch_vall_zero, v2i64>;
def PseudoVBZ : VecCond<loongarch_vany_zero, v16i8>;
-let usesCustomInserter = 1 in
-def PseudoCTPOP : Pseudo<(outs GPR:$rd), (ins GPR:$rj),
- [(set GPR:$rd, (ctpop GPR:$rj))]>;
+let usesCustomInserter = 1 in {
+def PseudoCTPOP_B : Pseudo<(outs GPR:$rd), (ins GPR:$rj),
+ [(set GPR:$rd, (ctpop (and GPR:$rj, 255)))]>;
+def PseudoCTPOP_H : Pseudo<(outs GPR:$rd), (ins GPR:$rj),
+ [(set GPR:$rd, (ctpop (loongarch_bstrpick GRLenVT:$rj,
+ (GRLenVT 15), (GRLenVT 0))))]>;
+let Predicates = [IsLA32] in {
+def PseudoCTPOP_H_LA32 : Pseudo<(outs GPR:$rd), (ins GPR:$rj),
+ [(set GPR:$rd, (ctpop (and GPR:$rj, 65535)))]>;
+def PseudoCTPOP_W_LA32 : Pseudo<(outs GPR:$rd), (ins GPR:$rj),
+ [(set GPR:$rd, (ctpop GPR:$rj))]>;
+} // Predicates = [IsLA32]
+
+let Predicates = [IsLA64] in {
+def PseudoCTPOP_W : Pseudo<(outs GPR:$rd), (ins GPR:$rj),
+ [(set GPR:$rd, (ctpop (loongarch_bstrpick i64:$rj,
+ (i64 31), (i64 0))))]>;
+def PseudoCTPOP_D : Pseudo<(outs GPR:$rd), (ins GPR:$rj),
+ [(set GPR:$rd, (ctpop GPR:$rj))]>;
+} // Predicates = [IsLA64]
+} // usesCustomInserter = 1
let usesCustomInserter = 1, hasSideEffects = 0, mayLoad = 0, mayStore = 0 in {
def PseudoVMSKLTZ_B : Pseudo<(outs GPR:$rd), (ins LSX128:$vj)>;
diff --git a/llvm/test/CodeGen/LoongArch/ctlz-cttz-ctpop.ll b/llvm/test/CodeGen/LoongArch/ctlz-cttz-ctpop.ll
index 27be02c50f1c..62dbeef42547 100644
--- a/llvm/test/CodeGen/LoongArch/ctlz-cttz-ctpop.ll
+++ b/llvm/test/CodeGen/LoongArch/ctlz-cttz-ctpop.ll
@@ -510,11 +510,9 @@ define i8 @test_ctpop_i8(i8 %a) nounwind {
;
; LA64-LABEL: test_ctpop_i8:
; LA64: # %bb.0:
-; LA64-NEXT: andi $a0, $a0, 255
-; LA64-NEXT: vldi $vr0, 0
-; LA64-NEXT: vinsgr2vr.d $vr0, $a0, 0
-; LA64-NEXT: vpcnt.d $vr0, $vr0
-; LA64-NEXT: vpickve2gr.d $a0, $vr0, 0
+; LA64-NEXT: vreplgr2vr.b $vr0, $a0
+; LA64-NEXT: vpcnt.b $vr0, $vr0
+; LA64-NEXT: vpickve2gr.b $a0, $vr0, 0
; LA64-NEXT: ret
%1 = call i8 @llvm.ctpop.i8(i8 %a)
ret i8 %1
@@ -564,11 +562,9 @@ define i16 @test_ctpop_i16(i16 %a) nounwind {
;
; LA64-LABEL: test_ctpop_i16:
; LA64: # %bb.0:
-; LA64-NEXT: bstrpick.d $a0, $a0, 15, 0
-; LA64-NEXT: vldi $vr0, 0
-; LA64-NEXT: vinsgr2vr.d $vr0, $a0, 0
-; LA64-NEXT: vpcnt.d $vr0, $vr0
-; LA64-NEXT: vpickve2gr.d $a0, $vr0, 0
+; LA64-NEXT: vreplgr2vr.h $vr0, $a0
+; LA64-NEXT: vpcnt.h $vr0, $vr0
+; LA64-NEXT: vpickve2gr.h $a0, $vr0, 0
; LA64-NEXT: ret
%1 = call i16 @llvm.ctpop.i16(i16 %a)
ret i16 %1
@@ -625,11 +621,9 @@ define i32 @test_ctpop_i32(i32 %a) nounwind {
;
; LA64-LABEL: test_ctpop_i32:
; LA64: # %bb.0:
-; LA64-NEXT: bstrpick.d $a0, $a0, 31, 0
-; LA64-NEXT: vldi $vr0, 0
-; LA64-NEXT: vinsgr2vr.d $vr0, $a0, 0
-; LA64-NEXT: vpcnt.d $vr0, $vr0
-; LA64-NEXT: vpickve2gr.d $a0, $vr0, 0
+; LA64-NEXT: vreplgr2vr.w $vr0, $a0
+; LA64-NEXT: vpcnt.w $vr0, $vr0
+; LA64-NEXT: vpickve2gr.w $a0, $vr0, 0
; LA64-NEXT: ret
%1 = call i32 @llvm.ctpop.i32(i32 %a)
ret i32 %1
@@ -714,8 +708,7 @@ define i64 @test_ctpop_i64(i64 %a) nounwind {
;
; LA64-LABEL: test_ctpop_i64:
; LA64: # %bb.0:
-; LA64-NEXT: vldi $vr0, 0
-; LA64-NEXT: vinsgr2vr.d $vr0, $a0, 0
+; LA64-NEXT: vreplgr2vr.d $vr0, $a0
; LA64-NEXT: vpcnt.d $vr0, $vr0
; LA64-NEXT: vpickve2gr.d $a0, $vr0, 0
; LA64-NEXT: ret
diff --git a/llvm/test/CodeGen/LoongArch/ctpop-with-lsx.ll b/llvm/test/CodeGen/LoongArch/ctpop-with-lsx.ll
index 150a6f16804d..4bce0f4089d0 100644
--- a/llvm/test/CodeGen/LoongArch/ctpop-with-lsx.ll
+++ b/llvm/test/CodeGen/LoongArch/ctpop-with-lsx.ll
@@ -11,29 +11,23 @@ declare i64 @llvm.ctpop.i64(i64)
define i8 @test_ctpop_i8(i8 %a) nounwind {
; LA32R-LABEL: test_ctpop_i8:
; LA32R: # %bb.0:
-; LA32R-NEXT: andi $a0, $a0, 255
-; LA32R-NEXT: vldi $vr0, 0
-; LA32R-NEXT: vinsgr2vr.w $vr0, $a0, 0
-; LA32R-NEXT: vpcnt.w $vr0, $vr0
-; LA32R-NEXT: vpickve2gr.w $a0, $vr0, 0
+; LA32R-NEXT: vreplgr2vr.b $vr0, $a0
+; LA32R-NEXT: vpcnt.b $vr0, $vr0
+; LA32R-NEXT: vpickve2gr.b $a0, $vr0, 0
; LA32R-NEXT: ret
;
; LA32S-LABEL: test_ctpop_i8:
; LA32S: # %bb.0:
-; LA32S-NEXT: andi $a0, $a0, 255
-; LA32S-NEXT: vldi $vr0, 0
-; LA32S-NEXT: vinsgr2vr.w $vr0, $a0, 0
-; LA32S-NEXT: vpcnt.w $vr0, $vr0
-; LA32S-NEXT: vpickve2gr.w $a0, $vr0, 0
+; LA32S-NEXT: vreplgr2vr.b $vr0, $a0
+; LA32S-NEXT: vpcnt.b $vr0, $vr0
+; LA32S-NEXT: vpickve2gr.b $a0, $vr0, 0
; LA32S-NEXT: ret
;
; LA64-LABEL: test_ctpop_i8:
; LA64: # %bb.0:
-; LA64-NEXT: andi $a0, $a0, 255
-; LA64-NEXT: vldi $vr0, 0
-; LA64-NEXT: vinsgr2vr.d $vr0, $a0, 0
-; LA64-NEXT: vpcnt.d $vr0, $vr0
-; LA64-NEXT: vpickve2gr.d $a0, $vr0, 0
+; LA64-NEXT: vreplgr2vr.b $vr0, $a0
+; LA64-NEXT: vpcnt.b $vr0, $vr0
+; LA64-NEXT: vpickve2gr.b $a0, $vr0, 0
; LA64-NEXT: ret
%1 = call i8 @llvm.ctpop.i8(i8 %a)
ret i8 %1
@@ -42,31 +36,23 @@ define i8 @test_ctpop_i8(i8 %a) nounwind {
define i16 @test_ctpop_i16(i16 %a) nounwind {
; LA32R-LABEL: test_ctpop_i16:
; LA32R: # %bb.0:
-; LA32R-NEXT: lu12i.w $a1, 15
-; LA32R-NEXT: ori $a1, $a1, 4095
-; LA32R-NEXT: and $a0, $a0, $a1
-; LA32R-NEXT: vldi $vr0, 0
-; LA32R-NEXT: vinsgr2vr.w $vr0, $a0, 0
-; LA32R-NEXT: vpcnt.w $vr0, $vr0
-; LA32R-NEXT: vpickve2gr.w $a0, $vr0, 0
+; LA32R-NEXT: vreplgr2vr.h $vr0, $a0
+; LA32R-NEXT: vpcnt.h $vr0, $vr0
+; LA32R-NEXT: vpickve2gr.h $a0, $vr0, 0
; LA32R-NEXT: ret
;
; LA32S-LABEL: test_ctpop_i16:
; LA32S: # %bb.0:
-; LA32S-NEXT: bstrpick.w $a0, $a0, 15, 0
-; LA32S-NEXT: vldi $vr0, 0
-; LA32S-NEXT: vinsgr2vr.w $vr0, $a0, 0
-; LA32S-NEXT: vpcnt.w $vr0, $vr0
-; LA32S-NEXT: vpickve2gr.w $a0, $vr0, 0
+; LA32S-NEXT: vreplgr2vr.h $vr0, $a0
+; LA32S-NEXT: vpcnt.h $vr0, $vr0
+; LA32S-NEXT: vpickve2gr.h $a0, $vr0, 0
; LA32S-NEXT: ret
;
; LA64-LABEL: test_ctpop_i16:
; LA64: # %bb.0:
-; LA64-NEXT: bstrpick.d $a0, $a0, 15, 0
-; LA64-NEXT: vldi $vr0, 0
-; LA64-NEXT: vinsgr2vr.d $vr0, $a0, 0
-; LA64-NEXT: vpcnt.d $vr0, $vr0
-; LA64-NEXT: vpickve2gr.d $a0, $vr0, 0
+; LA64-NEXT: vreplgr2vr.h $vr0, $a0
+; LA64-NEXT: vpcnt.h $vr0, $vr0
+; LA64-NEXT: vpickve2gr.h $a0, $vr0, 0
; LA64-NEXT: ret
%1 = call i16 @llvm.ctpop.i16(i16 %a)
ret i16 %1
@@ -75,27 +61,23 @@ define i16 @test_ctpop_i16(i16 %a) nounwind {
define i32 @test_ctpop_i32(i32 %a) nounwind {
; LA32R-LABEL: test_ctpop_i32:
; LA32R: # %bb.0:
-; LA32R-NEXT: vldi $vr0, 0
-; LA32R-NEXT: vinsgr2vr.w $vr0, $a0, 0
+; LA32R-NEXT: vreplgr2vr.w $vr0, $a0
; LA32R-NEXT: vpcnt.w $vr0, $vr0
; LA32R-NEXT: vpickve2gr.w $a0, $vr0, 0
; LA32R-NEXT: ret
;
; LA32S-LABEL: test_ctpop_i32:
; LA32S: # %bb.0:
-; LA32S-NEXT: vldi $vr0, 0
-; LA32S-NEXT: vinsgr2vr.w $vr0, $a0, 0
+; LA32S-NEXT: vreplgr2vr.w $vr0, $a0
; LA32S-NEXT: vpcnt.w $vr0, $vr0
; LA32S-NEXT: vpickve2gr.w $a0, $vr0, 0
; LA32S-NEXT: ret
;
; LA64-LABEL: test_ctpop_i32:
; LA64: # %bb.0:
-; LA64-NEXT: bstrpick.d $a0, $a0, 31, 0
-; LA64-NEXT: vldi $vr0, 0
-; LA64-NEXT: vinsgr2vr.d $vr0, $a0, 0
-; LA64-NEXT: vpcnt.d $vr0, $vr0
-; LA64-NEXT: vpickve2gr.d $a0, $vr0, 0
+; LA64-NEXT: vreplgr2vr.w $vr0, $a0
+; LA64-NEXT: vpcnt.w $vr0, $vr0
+; LA64-NEXT: vpickve2gr.w $a0, $vr0, 0
; LA64-NEXT: ret
%1 = call i32 @llvm.ctpop.i32(i32 %a)
ret i32 %1
@@ -104,12 +86,10 @@ define i32 @test_ctpop_i32(i32 %a) nounwind {
define i64 @test_ctpop_i64(i64 %a) nounwind {
; LA32R-LABEL: test_ctpop_i64:
; LA32R: # %bb.0:
-; LA32R-NEXT: vldi $vr0, 0
-; LA32R-NEXT: vldi $vr1, 0
-; LA32R-NEXT: vinsgr2vr.w $vr1, $a1, 0
-; LA32R-NEXT: vpcnt.w $vr1, $vr1
-; LA32R-NEXT: vpickve2gr.w $a1, $vr1, 0
-; LA32R-NEXT: vinsgr2vr.w $vr0, $a0, 0
+; LA32R-NEXT: vreplgr2vr.w $vr0, $a1
+; LA32R-NEXT: vpcnt.w $vr0, $vr0
+; LA32R-NEXT: vpickve2gr.w $a1, $vr0, 0
+; LA32R-NEXT: vreplgr2vr.w $vr0, $a0
; LA32R-NEXT: vpcnt.w $vr0, $vr0
; LA32R-NEXT: vpickve2gr.w $a0, $vr0, 0
; LA32R-NEXT: add.w $a0, $a0, $a1
@@ -118,12 +98,10 @@ define i64 @test_ctpop_i64(i64 %a) nounwind {
;
; LA32S-LABEL: test_ctpop_i64:
; LA32S: # %bb.0:
-; LA32S-NEXT: vldi $vr0, 0
-; LA32S-NEXT: vldi $vr1, 0
-; LA32S-NEXT: vinsgr2vr.w $vr1, $a1, 0
-; LA32S-NEXT: vpcnt.w $vr1, $vr1
-; LA32S-NEXT: vpickve2gr.w $a1, $vr1, 0
-; LA32S-NEXT: vinsgr2vr.w $vr0, $a0, 0
+; LA32S-NEXT: vreplgr2vr.w $vr0, $a1
+; LA32S-NEXT: vpcnt.w $vr0, $vr0
+; LA32S-NEXT: vpickve2gr.w $a1, $vr0, 0
+; LA32S-NEXT: vreplgr2vr.w $vr0, $a0
; LA32S-NEXT: vpcnt.w $vr0, $vr0
; LA32S-NEXT: vpickve2gr.w $a0, $vr0, 0
; LA32S-NEXT: add.w $a0, $a0, $a1
@@ -132,8 +110,7 @@ define i64 @test_ctpop_i64(i64 %a) nounwind {
;
; LA64-LABEL: test_ctpop_i64:
; LA64: # %bb.0:
-; LA64-NEXT: vldi $vr0, 0
-; LA64-NEXT: vinsgr2vr.d $vr0, $a0, 0
+; LA64-NEXT: vreplgr2vr.d $vr0, $a0
; LA64-NEXT: vpcnt.d $vr0, $vr0
; LA64-NEXT: vpickve2gr.d $a0, $vr0, 0
; LA64-NEXT: ret
diff --git a/llvm/test/CodeGen/LoongArch/sextw-removal.ll b/llvm/test/CodeGen/LoongArch/sextw-removal.ll
index 0c31ff9eee1f..683d760721c3 100644
--- a/llvm/test/CodeGen/LoongArch/sextw-removal.ll
+++ b/llvm/test/CodeGen/LoongArch/sextw-removal.ll
@@ -146,19 +146,17 @@ define void @test5(i32 signext %arg, i32 signext %arg1) nounwind {
; CHECK: # %bb.0: # %bb
; CHECK-NEXT: addi.d $sp, $sp, -16
; CHECK-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill
-; CHECK-NEXT: sra.w $a1, $a0, $a1
+; CHECK-NEXT: sra.w $a0, $a0, $a1
; CHECK-NEXT: .p2align 4, , 16
; CHECK-NEXT: .LBB4_1: # %bb2
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: addi.w $a0, $a1, 0
; CHECK-NEXT: pcaddu18i $ra, %call36(bar)
; CHECK-NEXT: jirl $ra, $ra, 0
-; CHECK-NEXT: bstrpick.d $a1, $a0, 31, 0
-; CHECK-NEXT: vldi $vr0, 0
-; CHECK-NEXT: vinsgr2vr.d $vr0, $a1, 0
-; CHECK-NEXT: vpcnt.d $vr0, $vr0
-; CHECK-NEXT: vpickve2gr.d $a1, $vr0, 0
-; CHECK-NEXT: bnez $a0, .LBB4_1
+; CHECK-NEXT: move $a1, $a0
+; CHECK-NEXT: vreplgr2vr.w $vr0, $a0
+; CHECK-NEXT: vpcnt.w $vr0, $vr0
+; CHECK-NEXT: vpickve2gr.w $a0, $vr0, 0
+; CHECK-NEXT: bnez $a1, .LBB4_1
; CHECK-NEXT: # %bb.2: # %bb7
; CHECK-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload
; CHECK-NEXT: addi.d $sp, $sp, 16
@@ -175,11 +173,9 @@ define void @test5(i32 signext %arg, i32 signext %arg1) nounwind {
; NORMV-NEXT: addi.w $a0, $a1, 0
; NORMV-NEXT: pcaddu18i $ra, %call36(bar)
; NORMV-NEXT: jirl $ra, $ra, 0
-; NORMV-NEXT: bstrpick.d $a1, $a0, 31, 0
-; NORMV-NEXT: vldi $vr0, 0
-; NORMV-NEXT: vinsgr2vr.d $vr0, $a1, 0
-; NORMV-NEXT: vpcnt.d $vr0, $vr0
-; NORMV-NEXT: vpickve2gr.d $a1, $vr0, 0
+; NORMV-NEXT: vreplgr2vr.w $vr0, $a0
+; NORMV-NEXT: vpcnt.w $vr0, $vr0
+; NORMV-NEXT: vpickve2gr.w $a1, $vr0, 0
; NORMV-NEXT: bnez $a0, .LBB4_1
; NORMV-NEXT: # %bb.2: # %bb7
; NORMV-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload
@@ -275,8 +271,7 @@ define void @test7(i32 signext %arg, i32 signext %arg1) nounwind {
; CHECK-NEXT: addi.w $a0, $a0, 0
; CHECK-NEXT: pcaddu18i $ra, %call36(foo)
; CHECK-NEXT: jirl $ra, $ra, 0
-; CHECK-NEXT: vldi $vr0, 0
-; CHECK-NEXT: vinsgr2vr.d $vr0, $a0, 0
+; CHECK-NEXT: vreplgr2vr.d $vr0, $a0
; CHECK-NEXT: vpcnt.d $vr0, $vr0
; CHECK-NEXT: vpickve2gr.d $a0, $vr0, 0
; CHECK-NEXT: bnez $a0, .LBB6_1
@@ -296,8 +291,7 @@ define void @test7(i32 signext %arg, i32 signext %arg1) nounwind {
; NORMV-NEXT: addi.w $a0, $a0, 0
; NORMV-NEXT: pcaddu18i $ra, %call36(foo)
; NORMV-NEXT: jirl $ra, $ra, 0
-; NORMV-NEXT: vldi $vr0, 0
-; NORMV-NEXT: vinsgr2vr.d $vr0, $a0, 0
+; NORMV-NEXT: vreplgr2vr.d $vr0, $a0
; NORMV-NEXT: vpcnt.d $vr0, $vr0
; NORMV-NEXT: vpickve2gr.d $a0, $vr0, 0
; NORMV-NEXT: bnez $a0, .LBB6_1