diff options
| author | yangzhaoxin <yangzhaoxin@loongson.cn> | 2025-10-20 17:52:37 +0800 |
|---|---|---|
| committer | yangzhaoxin <yangzhaoxin@loongson.cn> | 2025-10-23 09:19:26 +0800 |
| commit | e52b1d33b98949b883a826716620a7c88ecaf5a1 (patch) | |
| tree | a14b4b687f179d08f446702051ed98f713391e30 | |
| parent | 1014b2a1018b5ded88f5a23c7a1fddf7725e1a63 (diff) | |
[LoongArch] Add support for vector FP_EXTEND from vxf32 to vxf64users/ylzsx/opt-vector-fpext
6 files changed, 101 insertions, 56 deletions
diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp index ca4a655f0658..54f0db42ed16 100644 --- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp +++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp @@ -386,6 +386,7 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM, setOperationAction(ISD::VECREDUCE_UMAX, VT, Custom); setOperationAction(ISD::VECREDUCE_UMIN, VT, Custom); } + setOperationAction(ISD::FP_EXTEND, MVT::v2f32, Custom); } // Set operations for 'LASX' feature. @@ -448,6 +449,7 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM, VT, Expand); setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Legal); } + setOperationAction(ISD::FP_EXTEND, MVT::v4f64, Custom); } // Set DAG combine for LA32 and LA64. @@ -592,10 +594,74 @@ SDValue LoongArchTargetLowering::LowerOperation(SDValue Op, return lowerVECREDUCE(Op, DAG); case ISD::ConstantFP: return lowerConstantFP(Op, DAG); + case ISD::FP_EXTEND: + return lowerFP_EXTEND(Op, DAG); } return SDValue(); } +SDValue LoongArchTargetLowering::lowerFP_EXTEND(SDValue Op, + SelectionDAG &DAG) const { + + SDLoc DL(Op); + EVT VT = Op.getValueType(); + SDValue Src = Op->getOperand(0); + EVT SVT = Src.getValueType(); + + // Check if Op is the high part of vector. + auto CheckVecHighPart = [](SDValue Op) { + Op = peekThroughBitcasts(Op); + if (Op.getOpcode() == ISD::EXTRACT_SUBVECTOR) { + SDValue SOp = Op.getOperand(0); + EVT SVT = SOp.getValueType(); + if (!SVT.isVector() || (SVT.getVectorNumElements() % 2 != 0)) + return SDValue(); + + const uint64_t Imm = Op.getConstantOperandVal(1); + if (Imm == SVT.getVectorNumElements() / 2) + return SOp; + return SDValue(); + } + return SDValue(); + }; + + unsigned Opcode; + SDValue VFCVTOp; + EVT WideOpVT = SVT.getSimpleVT().getDoubleNumVectorElementsVT(); + SDValue ZeroIdx = DAG.getVectorIdxConstant(0, DL); + + // If the operand of ISD::FP_EXTEND comes from the high part of vector, + // generate LoongArchISD::VFCVTH, otherwise LoongArchISD::VFCVTL. + if (SDValue V = CheckVecHighPart(Src)) { + assert(V.getValueSizeInBits() == WideOpVT.getSizeInBits() && + "Unexpected wide vector"); + Opcode = LoongArchISD::VFCVTH; + VFCVTOp = DAG.getBitcast(WideOpVT, V); + } else { + Opcode = LoongArchISD::VFCVTL; + VFCVTOp = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, WideOpVT, + DAG.getUNDEF(WideOpVT), Src, ZeroIdx); + } + + // v2f64 = fp_extend v2f32 + if (VT == MVT::v2f64 && SVT == MVT::v2f32 && Subtarget.hasExtLSX()) { + return DAG.getNode(Opcode, DL, VT, VFCVTOp); + } + + // v4f64 = fp_extend v4f32 + if (VT == MVT::v4f64 && SVT == MVT::v4f32 && Subtarget.hasExtLASX()) { + // XVFCVT instruction operators on each 128-bit segament as a lane, so a + // vector_shuffle is required firstly. + SmallVector<int, 8> Mask = {0, 1, 4, 5, 2, 3, 6, 7}; + SDValue Res = DAG.getVectorShuffle(WideOpVT, DL, VFCVTOp, + DAG.getUNDEF(WideOpVT), Mask); + Res = DAG.getNode(Opcode, DL, VT, Res); + return Res; + } + + return SDValue(); +} + SDValue LoongArchTargetLowering::lowerConstantFP(SDValue Op, SelectionDAG &DAG) const { EVT VT = Op.getValueType(); @@ -7515,6 +7581,8 @@ const char *LoongArchTargetLowering::getTargetNodeName(unsigned Opcode) const { NODE_NAME_CASE(VANY_NONZERO) NODE_NAME_CASE(FRECIPE) NODE_NAME_CASE(FRSQRTE) + NODE_NAME_CASE(VFCVTL) + NODE_NAME_CASE(VFCVTH) NODE_NAME_CASE(VSLLI) NODE_NAME_CASE(VSRLI) NODE_NAME_CASE(VBSLL) diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h index 8a4d7748467c..cf6dffa19508 100644 --- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h +++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h @@ -167,6 +167,9 @@ enum NodeType : unsigned { FRECIPE, FRSQRTE, + VFCVTL, + VFCVTH, + // Vector logicial left / right shift by immediate VSLLI, VSRLI, @@ -415,6 +418,7 @@ private: SDValue lowerVECREDUCE_ADD(SDValue Op, SelectionDAG &DAG) const; SDValue lowerVECREDUCE(SDValue Op, SelectionDAG &DAG) const; SDValue lowerConstantFP(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const; bool isFPImmLegal(const APFloat &Imm, EVT VT, bool ForCodeSize) const override; diff --git a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td index 613dea6093f5..2d518bb7c4e9 100644 --- a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td +++ b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td @@ -2403,6 +2403,11 @@ def : Pat<(int_loongarch_lasx_xvpickve_w_f v8f32:$xj, timm:$imm), def : Pat<(int_loongarch_lasx_xvpickve_d_f v4f64:$xj, timm:$imm), (XVPICKVE_D v4f64:$xj, (to_valid_timm timm:$imm))>; +def : Pat<(v4f64 (loongarch_vfcvtl_d_s (v8f32 LASX256:$xj))), + (XVFCVTL_D_S LASX256:$xj)>; +def : Pat<(v4f64 (loongarch_vfcvth_d_s (v8f32 LASX256:$xj))), + (XVFCVTH_D_S LASX256:$xj)>; + // load def : Pat<(int_loongarch_lasx_xvld GPR:$rj, timm:$imm), (XVLD GPR:$rj, (to_valid_timm timm:$imm))>; diff --git a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td index 4619c6bd248a..8477125481c7 100644 --- a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td +++ b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td @@ -32,6 +32,8 @@ def SDT_LoongArchVFRECIPE : SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisVec<0>, SDTC def SDT_LoongArchVFRSQRTE : SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisVec<0>, SDTCisSameAs<0, 1>]>; def SDT_LoongArchVLDREPL : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisPtrTy<1>]>; def SDT_LoongArchVMSKCOND : SDTypeProfile<1, 1, [SDTCisInt<0>, SDTCisVec<1>]>; +def SDT_LoongArchVFCVTLH_D_S : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisFP<0>, + SDTCisVec<1>, SDTCisFP<1>]>; // Target nodes. def loongarch_vreplve : SDNode<"LoongArchISD::VREPLVE", SDT_LoongArchVreplve>; @@ -82,6 +84,9 @@ def loongarch_vmskgez: SDNode<"LoongArchISD::VMSKGEZ", SDT_LoongArchVMSKCOND>; def loongarch_vmskeqz: SDNode<"LoongArchISD::VMSKEQZ", SDT_LoongArchVMSKCOND>; def loongarch_vmsknez: SDNode<"LoongArchISD::VMSKNEZ", SDT_LoongArchVMSKCOND>; +def loongarch_vfcvtl_d_s: SDNode<"LoongArchISD::VFCVTL", SDT_LoongArchVFCVTLH_D_S>; +def loongarch_vfcvth_d_s: SDNode<"LoongArchISD::VFCVTH", SDT_LoongArchVFCVTLH_D_S>; + def immZExt1 : ImmLeaf<GRLenVT, [{return isUInt<1>(Imm);}]>; def immZExt2 : ImmLeaf<GRLenVT, [{return isUInt<2>(Imm);}]>; def immZExt3 : ImmLeaf<GRLenVT, [{return isUInt<3>(Imm);}]>; @@ -2519,6 +2524,11 @@ def : Pat<(f64 (froundeven FPR64:$fj)), (f64 (EXTRACT_SUBREG (VFRINTRNE_D (VREPLVEI_D (SUBREG_TO_REG (i64 0), FPR64:$fj, sub_64), 0)), sub_64))>; +def : Pat<(v2f64 (loongarch_vfcvtl_d_s (v4f32 LSX128:$vj))), + (VFCVTL_D_S LSX128:$vj)>; +def : Pat<(v2f64 (loongarch_vfcvth_d_s (v4f32 LSX128:$vj))), + (VFCVTH_D_S LSX128:$vj)>; + // load def : Pat<(int_loongarch_lsx_vld GPR:$rj, timm:$imm), (VLD GPR:$rj, (to_valid_timm timm:$imm))>; diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fpext.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fpext.ll index 9c370dd78fad..619c7d00b54b 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fpext.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fpext.ll @@ -6,17 +6,8 @@ define void @fpext_v4f32_to_v4f64(ptr %res, ptr %a0) nounwind { ; CHECK-LABEL: fpext_v4f32_to_v4f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vld $vr0, $a1, 0 -; CHECK-NEXT: vreplvei.w $vr1, $vr0, 3 -; CHECK-NEXT: fcvt.d.s $fa1, $fa1 -; CHECK-NEXT: vreplvei.w $vr2, $vr0, 2 -; CHECK-NEXT: fcvt.d.s $fa2, $fa2 -; CHECK-NEXT: vextrins.d $vr2, $vr1, 16 -; CHECK-NEXT: vreplvei.w $vr1, $vr0, 1 -; CHECK-NEXT: fcvt.d.s $fa1, $fa1 -; CHECK-NEXT: vreplvei.w $vr0, $vr0, 0 -; CHECK-NEXT: fcvt.d.s $fa0, $fa0 -; CHECK-NEXT: vextrins.d $vr0, $vr1, 16 -; CHECK-NEXT: xvpermi.q $xr0, $xr2, 2 +; CHECK-NEXT: xvpermi.d $xr0, $xr0, 216 +; CHECK-NEXT: xvfcvtl.d.s $xr0, $xr0 ; CHECK-NEXT: xvst $xr0, $a0, 0 ; CHECK-NEXT: ret entry: @@ -30,31 +21,11 @@ define void @fpext_v8f32_to_v8f64(ptr %res, ptr %a0) nounwind { ; CHECK-LABEL: fpext_v8f32_to_v8f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xvld $xr0, $a1, 0 -; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1 -; CHECK-NEXT: vreplvei.w $vr2, $vr1, 3 -; CHECK-NEXT: fcvt.d.s $fa2, $fa2 -; CHECK-NEXT: vreplvei.w $vr3, $vr1, 2 -; CHECK-NEXT: fcvt.d.s $fa3, $fa3 -; CHECK-NEXT: vextrins.d $vr3, $vr2, 16 -; CHECK-NEXT: vreplvei.w $vr2, $vr1, 1 -; CHECK-NEXT: fcvt.d.s $fa2, $fa2 -; CHECK-NEXT: vreplvei.w $vr1, $vr1, 0 -; CHECK-NEXT: fcvt.d.s $fa1, $fa1 -; CHECK-NEXT: vextrins.d $vr1, $vr2, 16 -; CHECK-NEXT: xvpermi.q $xr1, $xr3, 2 -; CHECK-NEXT: vreplvei.w $vr2, $vr0, 3 -; CHECK-NEXT: fcvt.d.s $fa2, $fa2 -; CHECK-NEXT: vreplvei.w $vr3, $vr0, 2 -; CHECK-NEXT: fcvt.d.s $fa3, $fa3 -; CHECK-NEXT: vextrins.d $vr3, $vr2, 16 -; CHECK-NEXT: vreplvei.w $vr2, $vr0, 1 -; CHECK-NEXT: fcvt.d.s $fa2, $fa2 -; CHECK-NEXT: vreplvei.w $vr0, $vr0, 0 -; CHECK-NEXT: fcvt.d.s $fa0, $fa0 -; CHECK-NEXT: vextrins.d $vr0, $vr2, 16 -; CHECK-NEXT: xvpermi.q $xr0, $xr3, 2 -; CHECK-NEXT: xvst $xr0, $a0, 0 -; CHECK-NEXT: xvst $xr1, $a0, 32 +; CHECK-NEXT: xvpermi.d $xr0, $xr0, 216 +; CHECK-NEXT: xvfcvtl.d.s $xr1, $xr0 +; CHECK-NEXT: xvfcvth.d.s $xr0, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 32 +; CHECK-NEXT: xvst $xr1, $a0, 0 ; CHECK-NEXT: ret entry: %v0 = load <8 x float>, ptr %a0 diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/fpext.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/fpext.ll index e2bf8ab71d8e..840f6aa3f530 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/fpext.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/fpext.ll @@ -22,21 +22,16 @@ define void @fpext_v2f32_to_v2f64(ptr %res, ptr %a0) nounwind { ; LA32: # %bb.0: # %entry ; LA32-NEXT: fld.s $fa0, $a1, 4 ; LA32-NEXT: fld.s $fa1, $a1, 0 -; LA32-NEXT: fcvt.d.s $fa0, $fa0 -; LA32-NEXT: fcvt.d.s $fa1, $fa1 -; LA32-NEXT: vextrins.d $vr1, $vr0, 16 -; LA32-NEXT: vst $vr1, $a0, 0 +; LA32-NEXT: vextrins.w $vr1, $vr0, 16 +; LA32-NEXT: vfcvtl.d.s $vr0, $vr1 +; LA32-NEXT: vst $vr0, $a0, 0 ; LA32-NEXT: ret ; ; LA64-LABEL: fpext_v2f32_to_v2f64: ; LA64: # %bb.0: # %entry ; LA64-NEXT: ld.d $a1, $a1, 0 ; LA64-NEXT: vinsgr2vr.d $vr0, $a1, 0 -; LA64-NEXT: vreplvei.w $vr1, $vr0, 1 -; LA64-NEXT: fcvt.d.s $fa1, $fa1 -; LA64-NEXT: vreplvei.w $vr0, $vr0, 0 -; LA64-NEXT: fcvt.d.s $fa0, $fa0 -; LA64-NEXT: vextrins.d $vr0, $vr1, 16 +; LA64-NEXT: vfcvtl.d.s $vr0, $vr0 ; LA64-NEXT: vst $vr0, $a0, 0 ; LA64-NEXT: ret entry: @@ -50,18 +45,10 @@ define void @fpext_v4f32_to_v4f64(ptr %res, ptr %a0) nounwind { ; CHECK-LABEL: fpext_v4f32_to_v4f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vld $vr0, $a1, 0 -; CHECK-NEXT: vreplvei.w $vr1, $vr0, 1 -; CHECK-NEXT: fcvt.d.s $fa1, $fa1 -; CHECK-NEXT: vreplvei.w $vr2, $vr0, 0 -; CHECK-NEXT: fcvt.d.s $fa2, $fa2 -; CHECK-NEXT: vextrins.d $vr2, $vr1, 16 -; CHECK-NEXT: vreplvei.w $vr1, $vr0, 3 -; CHECK-NEXT: fcvt.d.s $fa1, $fa1 -; CHECK-NEXT: vreplvei.w $vr0, $vr0, 2 -; CHECK-NEXT: fcvt.d.s $fa0, $fa0 -; CHECK-NEXT: vextrins.d $vr0, $vr1, 16 +; CHECK-NEXT: vfcvtl.d.s $vr1, $vr0 +; CHECK-NEXT: vfcvth.d.s $vr0, $vr0 ; CHECK-NEXT: vst $vr0, $a0, 16 -; CHECK-NEXT: vst $vr2, $a0, 0 +; CHECK-NEXT: vst $vr1, $a0, 0 ; CHECK-NEXT: ret entry: %v0 = load <4 x float>, ptr %a0 |
