summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authoryangzhaoxin <yangzhaoxin@loongson.cn>2025-10-20 17:52:37 +0800
committeryangzhaoxin <yangzhaoxin@loongson.cn>2025-10-23 09:19:26 +0800
commite52b1d33b98949b883a826716620a7c88ecaf5a1 (patch)
treea14b4b687f179d08f446702051ed98f713391e30
parent1014b2a1018b5ded88f5a23c7a1fddf7725e1a63 (diff)
[LoongArch] Add support for vector FP_EXTEND from vxf32 to vxf64users/ylzsx/opt-vector-fpext
-rw-r--r--llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp68
-rw-r--r--llvm/lib/Target/LoongArch/LoongArchISelLowering.h4
-rw-r--r--llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td5
-rw-r--r--llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td10
-rw-r--r--llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fpext.ll43
-rw-r--r--llvm/test/CodeGen/LoongArch/lsx/ir-instruction/fpext.ll27
6 files changed, 101 insertions, 56 deletions
diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
index ca4a655f0658..54f0db42ed16 100644
--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
@@ -386,6 +386,7 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::VECREDUCE_UMAX, VT, Custom);
setOperationAction(ISD::VECREDUCE_UMIN, VT, Custom);
}
+ setOperationAction(ISD::FP_EXTEND, MVT::v2f32, Custom);
}
// Set operations for 'LASX' feature.
@@ -448,6 +449,7 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM,
VT, Expand);
setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Legal);
}
+ setOperationAction(ISD::FP_EXTEND, MVT::v4f64, Custom);
}
// Set DAG combine for LA32 and LA64.
@@ -592,10 +594,74 @@ SDValue LoongArchTargetLowering::LowerOperation(SDValue Op,
return lowerVECREDUCE(Op, DAG);
case ISD::ConstantFP:
return lowerConstantFP(Op, DAG);
+ case ISD::FP_EXTEND:
+ return lowerFP_EXTEND(Op, DAG);
}
return SDValue();
}
+SDValue LoongArchTargetLowering::lowerFP_EXTEND(SDValue Op,
+ SelectionDAG &DAG) const {
+
+ SDLoc DL(Op);
+ EVT VT = Op.getValueType();
+ SDValue Src = Op->getOperand(0);
+ EVT SVT = Src.getValueType();
+
+ // Check if Op is the high part of vector.
+ auto CheckVecHighPart = [](SDValue Op) {
+ Op = peekThroughBitcasts(Op);
+ if (Op.getOpcode() == ISD::EXTRACT_SUBVECTOR) {
+ SDValue SOp = Op.getOperand(0);
+ EVT SVT = SOp.getValueType();
+ if (!SVT.isVector() || (SVT.getVectorNumElements() % 2 != 0))
+ return SDValue();
+
+ const uint64_t Imm = Op.getConstantOperandVal(1);
+ if (Imm == SVT.getVectorNumElements() / 2)
+ return SOp;
+ return SDValue();
+ }
+ return SDValue();
+ };
+
+ unsigned Opcode;
+ SDValue VFCVTOp;
+ EVT WideOpVT = SVT.getSimpleVT().getDoubleNumVectorElementsVT();
+ SDValue ZeroIdx = DAG.getVectorIdxConstant(0, DL);
+
+ // If the operand of ISD::FP_EXTEND comes from the high part of vector,
+ // generate LoongArchISD::VFCVTH, otherwise LoongArchISD::VFCVTL.
+ if (SDValue V = CheckVecHighPart(Src)) {
+ assert(V.getValueSizeInBits() == WideOpVT.getSizeInBits() &&
+ "Unexpected wide vector");
+ Opcode = LoongArchISD::VFCVTH;
+ VFCVTOp = DAG.getBitcast(WideOpVT, V);
+ } else {
+ Opcode = LoongArchISD::VFCVTL;
+ VFCVTOp = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, WideOpVT,
+ DAG.getUNDEF(WideOpVT), Src, ZeroIdx);
+ }
+
+ // v2f64 = fp_extend v2f32
+ if (VT == MVT::v2f64 && SVT == MVT::v2f32 && Subtarget.hasExtLSX()) {
+ return DAG.getNode(Opcode, DL, VT, VFCVTOp);
+ }
+
+ // v4f64 = fp_extend v4f32
+ if (VT == MVT::v4f64 && SVT == MVT::v4f32 && Subtarget.hasExtLASX()) {
+ // XVFCVT instruction operators on each 128-bit segament as a lane, so a
+ // vector_shuffle is required firstly.
+ SmallVector<int, 8> Mask = {0, 1, 4, 5, 2, 3, 6, 7};
+ SDValue Res = DAG.getVectorShuffle(WideOpVT, DL, VFCVTOp,
+ DAG.getUNDEF(WideOpVT), Mask);
+ Res = DAG.getNode(Opcode, DL, VT, Res);
+ return Res;
+ }
+
+ return SDValue();
+}
+
SDValue LoongArchTargetLowering::lowerConstantFP(SDValue Op,
SelectionDAG &DAG) const {
EVT VT = Op.getValueType();
@@ -7515,6 +7581,8 @@ const char *LoongArchTargetLowering::getTargetNodeName(unsigned Opcode) const {
NODE_NAME_CASE(VANY_NONZERO)
NODE_NAME_CASE(FRECIPE)
NODE_NAME_CASE(FRSQRTE)
+ NODE_NAME_CASE(VFCVTL)
+ NODE_NAME_CASE(VFCVTH)
NODE_NAME_CASE(VSLLI)
NODE_NAME_CASE(VSRLI)
NODE_NAME_CASE(VBSLL)
diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h
index 8a4d7748467c..cf6dffa19508 100644
--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h
+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h
@@ -167,6 +167,9 @@ enum NodeType : unsigned {
FRECIPE,
FRSQRTE,
+ VFCVTL,
+ VFCVTH,
+
// Vector logicial left / right shift by immediate
VSLLI,
VSRLI,
@@ -415,6 +418,7 @@ private:
SDValue lowerVECREDUCE_ADD(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerVECREDUCE(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerConstantFP(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const;
bool isFPImmLegal(const APFloat &Imm, EVT VT,
bool ForCodeSize) const override;
diff --git a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td
index 613dea6093f5..2d518bb7c4e9 100644
--- a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td
+++ b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td
@@ -2403,6 +2403,11 @@ def : Pat<(int_loongarch_lasx_xvpickve_w_f v8f32:$xj, timm:$imm),
def : Pat<(int_loongarch_lasx_xvpickve_d_f v4f64:$xj, timm:$imm),
(XVPICKVE_D v4f64:$xj, (to_valid_timm timm:$imm))>;
+def : Pat<(v4f64 (loongarch_vfcvtl_d_s (v8f32 LASX256:$xj))),
+ (XVFCVTL_D_S LASX256:$xj)>;
+def : Pat<(v4f64 (loongarch_vfcvth_d_s (v8f32 LASX256:$xj))),
+ (XVFCVTH_D_S LASX256:$xj)>;
+
// load
def : Pat<(int_loongarch_lasx_xvld GPR:$rj, timm:$imm),
(XVLD GPR:$rj, (to_valid_timm timm:$imm))>;
diff --git a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td
index 4619c6bd248a..8477125481c7 100644
--- a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td
+++ b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td
@@ -32,6 +32,8 @@ def SDT_LoongArchVFRECIPE : SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisVec<0>, SDTC
def SDT_LoongArchVFRSQRTE : SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisVec<0>, SDTCisSameAs<0, 1>]>;
def SDT_LoongArchVLDREPL : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisPtrTy<1>]>;
def SDT_LoongArchVMSKCOND : SDTypeProfile<1, 1, [SDTCisInt<0>, SDTCisVec<1>]>;
+def SDT_LoongArchVFCVTLH_D_S : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisFP<0>,
+ SDTCisVec<1>, SDTCisFP<1>]>;
// Target nodes.
def loongarch_vreplve : SDNode<"LoongArchISD::VREPLVE", SDT_LoongArchVreplve>;
@@ -82,6 +84,9 @@ def loongarch_vmskgez: SDNode<"LoongArchISD::VMSKGEZ", SDT_LoongArchVMSKCOND>;
def loongarch_vmskeqz: SDNode<"LoongArchISD::VMSKEQZ", SDT_LoongArchVMSKCOND>;
def loongarch_vmsknez: SDNode<"LoongArchISD::VMSKNEZ", SDT_LoongArchVMSKCOND>;
+def loongarch_vfcvtl_d_s: SDNode<"LoongArchISD::VFCVTL", SDT_LoongArchVFCVTLH_D_S>;
+def loongarch_vfcvth_d_s: SDNode<"LoongArchISD::VFCVTH", SDT_LoongArchVFCVTLH_D_S>;
+
def immZExt1 : ImmLeaf<GRLenVT, [{return isUInt<1>(Imm);}]>;
def immZExt2 : ImmLeaf<GRLenVT, [{return isUInt<2>(Imm);}]>;
def immZExt3 : ImmLeaf<GRLenVT, [{return isUInt<3>(Imm);}]>;
@@ -2519,6 +2524,11 @@ def : Pat<(f64 (froundeven FPR64:$fj)),
(f64 (EXTRACT_SUBREG (VFRINTRNE_D (VREPLVEI_D
(SUBREG_TO_REG (i64 0), FPR64:$fj, sub_64), 0)), sub_64))>;
+def : Pat<(v2f64 (loongarch_vfcvtl_d_s (v4f32 LSX128:$vj))),
+ (VFCVTL_D_S LSX128:$vj)>;
+def : Pat<(v2f64 (loongarch_vfcvth_d_s (v4f32 LSX128:$vj))),
+ (VFCVTH_D_S LSX128:$vj)>;
+
// load
def : Pat<(int_loongarch_lsx_vld GPR:$rj, timm:$imm),
(VLD GPR:$rj, (to_valid_timm timm:$imm))>;
diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fpext.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fpext.ll
index 9c370dd78fad..619c7d00b54b 100644
--- a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fpext.ll
+++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fpext.ll
@@ -6,17 +6,8 @@ define void @fpext_v4f32_to_v4f64(ptr %res, ptr %a0) nounwind {
; CHECK-LABEL: fpext_v4f32_to_v4f64:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vld $vr0, $a1, 0
-; CHECK-NEXT: vreplvei.w $vr1, $vr0, 3
-; CHECK-NEXT: fcvt.d.s $fa1, $fa1
-; CHECK-NEXT: vreplvei.w $vr2, $vr0, 2
-; CHECK-NEXT: fcvt.d.s $fa2, $fa2
-; CHECK-NEXT: vextrins.d $vr2, $vr1, 16
-; CHECK-NEXT: vreplvei.w $vr1, $vr0, 1
-; CHECK-NEXT: fcvt.d.s $fa1, $fa1
-; CHECK-NEXT: vreplvei.w $vr0, $vr0, 0
-; CHECK-NEXT: fcvt.d.s $fa0, $fa0
-; CHECK-NEXT: vextrins.d $vr0, $vr1, 16
-; CHECK-NEXT: xvpermi.q $xr0, $xr2, 2
+; CHECK-NEXT: xvpermi.d $xr0, $xr0, 216
+; CHECK-NEXT: xvfcvtl.d.s $xr0, $xr0
; CHECK-NEXT: xvst $xr0, $a0, 0
; CHECK-NEXT: ret
entry:
@@ -30,31 +21,11 @@ define void @fpext_v8f32_to_v8f64(ptr %res, ptr %a0) nounwind {
; CHECK-LABEL: fpext_v8f32_to_v8f64:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xvld $xr0, $a1, 0
-; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1
-; CHECK-NEXT: vreplvei.w $vr2, $vr1, 3
-; CHECK-NEXT: fcvt.d.s $fa2, $fa2
-; CHECK-NEXT: vreplvei.w $vr3, $vr1, 2
-; CHECK-NEXT: fcvt.d.s $fa3, $fa3
-; CHECK-NEXT: vextrins.d $vr3, $vr2, 16
-; CHECK-NEXT: vreplvei.w $vr2, $vr1, 1
-; CHECK-NEXT: fcvt.d.s $fa2, $fa2
-; CHECK-NEXT: vreplvei.w $vr1, $vr1, 0
-; CHECK-NEXT: fcvt.d.s $fa1, $fa1
-; CHECK-NEXT: vextrins.d $vr1, $vr2, 16
-; CHECK-NEXT: xvpermi.q $xr1, $xr3, 2
-; CHECK-NEXT: vreplvei.w $vr2, $vr0, 3
-; CHECK-NEXT: fcvt.d.s $fa2, $fa2
-; CHECK-NEXT: vreplvei.w $vr3, $vr0, 2
-; CHECK-NEXT: fcvt.d.s $fa3, $fa3
-; CHECK-NEXT: vextrins.d $vr3, $vr2, 16
-; CHECK-NEXT: vreplvei.w $vr2, $vr0, 1
-; CHECK-NEXT: fcvt.d.s $fa2, $fa2
-; CHECK-NEXT: vreplvei.w $vr0, $vr0, 0
-; CHECK-NEXT: fcvt.d.s $fa0, $fa0
-; CHECK-NEXT: vextrins.d $vr0, $vr2, 16
-; CHECK-NEXT: xvpermi.q $xr0, $xr3, 2
-; CHECK-NEXT: xvst $xr0, $a0, 0
-; CHECK-NEXT: xvst $xr1, $a0, 32
+; CHECK-NEXT: xvpermi.d $xr0, $xr0, 216
+; CHECK-NEXT: xvfcvtl.d.s $xr1, $xr0
+; CHECK-NEXT: xvfcvth.d.s $xr0, $xr0
+; CHECK-NEXT: xvst $xr0, $a0, 32
+; CHECK-NEXT: xvst $xr1, $a0, 0
; CHECK-NEXT: ret
entry:
%v0 = load <8 x float>, ptr %a0
diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/fpext.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/fpext.ll
index e2bf8ab71d8e..840f6aa3f530 100644
--- a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/fpext.ll
+++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/fpext.ll
@@ -22,21 +22,16 @@ define void @fpext_v2f32_to_v2f64(ptr %res, ptr %a0) nounwind {
; LA32: # %bb.0: # %entry
; LA32-NEXT: fld.s $fa0, $a1, 4
; LA32-NEXT: fld.s $fa1, $a1, 0
-; LA32-NEXT: fcvt.d.s $fa0, $fa0
-; LA32-NEXT: fcvt.d.s $fa1, $fa1
-; LA32-NEXT: vextrins.d $vr1, $vr0, 16
-; LA32-NEXT: vst $vr1, $a0, 0
+; LA32-NEXT: vextrins.w $vr1, $vr0, 16
+; LA32-NEXT: vfcvtl.d.s $vr0, $vr1
+; LA32-NEXT: vst $vr0, $a0, 0
; LA32-NEXT: ret
;
; LA64-LABEL: fpext_v2f32_to_v2f64:
; LA64: # %bb.0: # %entry
; LA64-NEXT: ld.d $a1, $a1, 0
; LA64-NEXT: vinsgr2vr.d $vr0, $a1, 0
-; LA64-NEXT: vreplvei.w $vr1, $vr0, 1
-; LA64-NEXT: fcvt.d.s $fa1, $fa1
-; LA64-NEXT: vreplvei.w $vr0, $vr0, 0
-; LA64-NEXT: fcvt.d.s $fa0, $fa0
-; LA64-NEXT: vextrins.d $vr0, $vr1, 16
+; LA64-NEXT: vfcvtl.d.s $vr0, $vr0
; LA64-NEXT: vst $vr0, $a0, 0
; LA64-NEXT: ret
entry:
@@ -50,18 +45,10 @@ define void @fpext_v4f32_to_v4f64(ptr %res, ptr %a0) nounwind {
; CHECK-LABEL: fpext_v4f32_to_v4f64:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vld $vr0, $a1, 0
-; CHECK-NEXT: vreplvei.w $vr1, $vr0, 1
-; CHECK-NEXT: fcvt.d.s $fa1, $fa1
-; CHECK-NEXT: vreplvei.w $vr2, $vr0, 0
-; CHECK-NEXT: fcvt.d.s $fa2, $fa2
-; CHECK-NEXT: vextrins.d $vr2, $vr1, 16
-; CHECK-NEXT: vreplvei.w $vr1, $vr0, 3
-; CHECK-NEXT: fcvt.d.s $fa1, $fa1
-; CHECK-NEXT: vreplvei.w $vr0, $vr0, 2
-; CHECK-NEXT: fcvt.d.s $fa0, $fa0
-; CHECK-NEXT: vextrins.d $vr0, $vr1, 16
+; CHECK-NEXT: vfcvtl.d.s $vr1, $vr0
+; CHECK-NEXT: vfcvth.d.s $vr0, $vr0
; CHECK-NEXT: vst $vr0, $a0, 16
-; CHECK-NEXT: vst $vr2, $a0, 0
+; CHECK-NEXT: vst $vr1, $a0, 0
; CHECK-NEXT: ret
entry:
%v0 = load <4 x float>, ptr %a0