diff options
Diffstat (limited to 'llvm/lib/Target/RISCV/RISCVISelLowering.cpp')
| -rw-r--r-- | llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 424 |
1 files changed, 335 insertions, 89 deletions
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index a33224845e2b..a68a3c14dc41 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -2173,7 +2173,7 @@ bool RISCVTargetLowering::isMaskAndCmp0FoldingBeneficial( // on the basis that it's possible the sinking+duplication of the AND in // CodeGenPrepare triggered by this hook wouldn't decrease the instruction // count and would increase code size (e.g. ANDI+BNEZ => BEXTI+BNEZ). - if (!Subtarget.hasStdExtZbs() && !Subtarget.hasVendorXTHeadBs()) + if (!Subtarget.hasBEXTILike()) return false; ConstantInt *Mask = dyn_cast<ConstantInt>(AndI.getOperand(1)); if (!Mask) @@ -3744,9 +3744,11 @@ static SDValue matchSplatAsGather(SDValue SplatVal, MVT VT, const SDLoc &DL, // different // FIXME: Support i1 vectors, maybe by promoting to i8? MVT EltTy = VT.getVectorElementType(); + if (EltTy == MVT::i1 || + !DAG.getTargetLoweringInfo().isTypeLegal(Src.getValueType())) + return SDValue(); MVT SrcVT = Src.getSimpleValueType(); - if (EltTy == MVT::i1 || EltTy != SrcVT.getVectorElementType() || - !DAG.getTargetLoweringInfo().isTypeLegal(SrcVT)) + if (EltTy != SrcVT.getVectorElementType()) return SDValue(); SDValue Idx = SplatVal.getOperand(1); // The index must be a legal type. @@ -4518,41 +4520,104 @@ static SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG, const unsigned Policy = RISCVVType::TAIL_AGNOSTIC | RISCVVType::MASK_AGNOSTIC; + // General case: splat the first operand and slide other operands down one + // by one to form a vector. Alternatively, if every operand is an + // extraction from element 0 of a vector, we use that vector from the last + // extraction as the start value and slide up instead of slide down. Such that + // (1) we can avoid the initial splat (2) we can turn those vslide1up into + // vslideup of 1 later and eliminate the vector to scalar movement, which is + // something we cannot do with vslide1down/vslidedown. + // Of course, using vslide1up/vslideup might increase the register pressure, + // and that's why we conservatively limit to cases where every operand is an + // extraction from the first element. + SmallVector<SDValue> Operands(Op->op_begin(), Op->op_end()); + SDValue EVec; + bool SlideUp = false; + auto getVSlide = [&](EVT ContainerVT, SDValue Passthru, SDValue Vec, + SDValue Offset, SDValue Mask, SDValue VL) -> SDValue { + if (SlideUp) + return getVSlideup(DAG, Subtarget, DL, ContainerVT, Passthru, Vec, Offset, + Mask, VL, Policy); + return getVSlidedown(DAG, Subtarget, DL, ContainerVT, Passthru, Vec, Offset, + Mask, VL, Policy); + }; + + // The reason we don't use all_of here is because we're also capturing EVec + // from the last non-undef operand. If the std::execution_policy of the + // underlying std::all_of is anything but std::sequenced_policy we might + // capture the wrong EVec. + for (SDValue V : Operands) { + using namespace SDPatternMatch; + SlideUp = V.isUndef() || sd_match(V, m_ExtractElt(m_Value(EVec), m_Zero())); + if (!SlideUp) + break; + } + + if (SlideUp) { + MVT EVecContainerVT = EVec.getSimpleValueType(); + // Make sure the original vector has scalable vector type. + if (EVecContainerVT.isFixedLengthVector()) { + EVecContainerVT = + getContainerForFixedLengthVector(DAG, EVecContainerVT, Subtarget); + EVec = convertToScalableVector(EVecContainerVT, EVec, DAG, Subtarget); + } + + // Adapt EVec's type into ContainerVT. + if (EVecContainerVT.getVectorMinNumElements() < + ContainerVT.getVectorMinNumElements()) + EVec = DAG.getInsertSubvector(DL, DAG.getUNDEF(ContainerVT), EVec, 0); + else + EVec = DAG.getExtractSubvector(DL, ContainerVT, EVec, 0); + + // Reverse the elements as we're going to slide up from the last element. + std::reverse(Operands.begin(), Operands.end()); + } + SDValue Vec; UndefCount = 0; - for (SDValue V : Op->ops()) { + for (SDValue V : Operands) { if (V.isUndef()) { UndefCount++; continue; } - // Start our sequence with a TA splat in the hopes that hardware is able to - // recognize there's no dependency on the prior value of our temporary - // register. + // Start our sequence with either a TA splat or extract source in the + // hopes that hardware is able to recognize there's no dependency on the + // prior value of our temporary register. if (!Vec) { - Vec = DAG.getSplatVector(VT, DL, V); - Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget); + if (SlideUp) { + Vec = EVec; + } else { + Vec = DAG.getSplatVector(VT, DL, V); + Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget); + } + UndefCount = 0; continue; } if (UndefCount) { const SDValue Offset = DAG.getConstant(UndefCount, DL, Subtarget.getXLenVT()); - Vec = getVSlidedown(DAG, Subtarget, DL, ContainerVT, DAG.getUNDEF(ContainerVT), - Vec, Offset, Mask, VL, Policy); + Vec = getVSlide(ContainerVT, DAG.getUNDEF(ContainerVT), Vec, Offset, Mask, + VL); UndefCount = 0; } - auto OpCode = - VT.isFloatingPoint() ? RISCVISD::VFSLIDE1DOWN_VL : RISCVISD::VSLIDE1DOWN_VL; + + unsigned Opcode; + if (VT.isFloatingPoint()) + Opcode = SlideUp ? RISCVISD::VFSLIDE1UP_VL : RISCVISD::VFSLIDE1DOWN_VL; + else + Opcode = SlideUp ? RISCVISD::VSLIDE1UP_VL : RISCVISD::VSLIDE1DOWN_VL; + if (!VT.isFloatingPoint()) V = DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getXLenVT(), V); - Vec = DAG.getNode(OpCode, DL, ContainerVT, DAG.getUNDEF(ContainerVT), Vec, + Vec = DAG.getNode(Opcode, DL, ContainerVT, DAG.getUNDEF(ContainerVT), Vec, V, Mask, VL); } if (UndefCount) { const SDValue Offset = DAG.getConstant(UndefCount, DL, Subtarget.getXLenVT()); - Vec = getVSlidedown(DAG, Subtarget, DL, ContainerVT, DAG.getUNDEF(ContainerVT), - Vec, Offset, Mask, VL, Policy); + Vec = getVSlide(ContainerVT, DAG.getUNDEF(ContainerVT), Vec, Offset, Mask, + VL); } return convertFromScalableVector(VT, Vec, DAG, Subtarget); } @@ -8193,6 +8258,13 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op, DL, VT, LHS, DAG.getSignedConstant(Imm + 1, DL, OpVT), CCVal); return DAG.getLogicalNOT(DL, SetCC, VT); } + // Lower (setugt X, 2047) as (setne (srl X, 11), 0). + if (CCVal == ISD::SETUGT && Imm == 2047) { + SDValue Shift = DAG.getNode(ISD::SRL, DL, OpVT, LHS, + DAG.getShiftAmountConstant(11, OpVT, DL)); + return DAG.getSetCC(DL, VT, Shift, DAG.getConstant(0, DL, OpVT), + ISD::SETNE); + } } // Not a constant we could handle, swap the operands and condition code to @@ -8815,7 +8887,15 @@ SDValue RISCVTargetLowering::getAddr(NodeTy *N, SelectionDAG &DAG, reportFatalUsageError("Unsupported code model for lowering"); case CodeModel::Small: { // Generate a sequence for accessing addresses within the first 2 GiB of - // address space. This generates the pattern (addi (lui %hi(sym)) %lo(sym)). + // address space. + if (Subtarget.hasVendorXqcili()) { + // Use QC.E.LI to generate the address, as this is easier to relax than + // LUI/ADDI. + SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0); + return DAG.getNode(RISCVISD::QC_E_LI, DL, Ty, Addr); + } + + // This generates the pattern (addi (lui %hi(sym)) %lo(sym)). SDValue AddrHi = getTargetNode(N, DL, Ty, DAG, RISCVII::MO_HI); SDValue AddrLo = getTargetNode(N, DL, Ty, DAG, RISCVII::MO_LO); SDValue MNHi = DAG.getNode(RISCVISD::HI, DL, Ty, AddrHi); @@ -9036,8 +9116,12 @@ static std::optional<bool> matchSetCC(SDValue LHS, SDValue RHS, return std::nullopt; } -static SDValue combineSelectToBinOp(SDNode *N, SelectionDAG &DAG, - const RISCVSubtarget &Subtarget) { +static bool isSimm12Constant(SDValue V) { + return isa<ConstantSDNode>(V) && V->getAsAPIntVal().isSignedIntN(12); +} + +static SDValue lowerSelectToBinOp(SDNode *N, SelectionDAG &DAG, + const RISCVSubtarget &Subtarget) { SDValue CondV = N->getOperand(0); SDValue TrueV = N->getOperand(1); SDValue FalseV = N->getOperand(2); @@ -9057,14 +9141,16 @@ static SDValue combineSelectToBinOp(SDNode *N, SelectionDAG &DAG, return DAG.getNode(ISD::OR, DL, VT, Neg, DAG.getFreeze(TrueV)); } + const bool HasCZero = VT.isScalarInteger() && Subtarget.hasCZEROLike(); + // (select c, 0, y) -> (c-1) & y - if (isNullConstant(TrueV)) { - SDValue Neg = DAG.getNode(ISD::ADD, DL, VT, CondV, - DAG.getAllOnesConstant(DL, VT)); + if (isNullConstant(TrueV) && (!HasCZero || isSimm12Constant(FalseV))) { + SDValue Neg = + DAG.getNode(ISD::ADD, DL, VT, CondV, DAG.getAllOnesConstant(DL, VT)); return DAG.getNode(ISD::AND, DL, VT, Neg, DAG.getFreeze(FalseV)); } // (select c, y, 0) -> -c & y - if (isNullConstant(FalseV)) { + if (isNullConstant(FalseV) && (!HasCZero || isSimm12Constant(TrueV))) { SDValue Neg = DAG.getNegative(CondV, DL, VT); return DAG.getNode(ISD::AND, DL, VT, Neg, DAG.getFreeze(TrueV)); } @@ -9185,12 +9271,16 @@ SDValue RISCVTargetLowering::lowerSELECT(SDValue Op, SelectionDAG &DAG) const { return DAG.getNode(ISD::VSELECT, DL, VT, CondSplat, TrueV, FalseV); } + // Try some other optimizations before falling back to generic lowering. + if (SDValue V = lowerSelectToBinOp(Op.getNode(), DAG, Subtarget)) + return V; + // When Zicond or XVentanaCondOps is present, emit CZERO_EQZ and CZERO_NEZ // nodes to implement the SELECT. Performing the lowering here allows for // greater control over when CZERO_{EQZ/NEZ} are used vs another branchless // sequence or RISCVISD::SELECT_CC node (branch-based select). - if ((Subtarget.hasStdExtZicond() || Subtarget.hasVendorXVentanaCondOps()) && - VT.isScalarInteger()) { + if (Subtarget.hasCZEROLike() && VT.isScalarInteger()) { + // (select c, t, 0) -> (czero_eqz t, c) if (isNullConstant(FalseV)) return DAG.getNode(RISCVISD::CZERO_EQZ, DL, VT, TrueV, CondV); @@ -9244,10 +9334,6 @@ SDValue RISCVTargetLowering::lowerSELECT(SDValue Op, SelectionDAG &DAG) const { DAG.getNode(RISCVISD::CZERO_EQZ, DL, VT, TrueV, CondV)); } - // Try some other optimizations before falling back to generic lowering. - if (SDValue V = combineSelectToBinOp(Op.getNode(), DAG, Subtarget)) - return V; - // (select c, c1, c2) -> (add (czero_nez c2 - c1, c), c1) // (select c, c1, c2) -> (add (czero_eqz c1 - c2, c), c2) if (isa<ConstantSDNode>(TrueV) && isa<ConstantSDNode>(FalseV)) { @@ -9280,19 +9366,38 @@ SDValue RISCVTargetLowering::lowerSELECT(SDValue Op, SelectionDAG &DAG) const { } } - const int TrueValCost = RISCVMatInt::getIntMatCost( - TrueVal, Subtarget.getXLen(), Subtarget, /*CompressionCost=*/true); - const int FalseValCost = RISCVMatInt::getIntMatCost( - FalseVal, Subtarget.getXLen(), Subtarget, /*CompressionCost=*/true); - bool IsCZERO_NEZ = TrueValCost <= FalseValCost; + // Use SHL/ADDI (and possible XORI) to avoid having to materialize + // a constant in register + if ((TrueVal - FalseVal).isPowerOf2() && FalseVal.isSignedIntN(12)) { + SDValue Log2 = DAG.getConstant((TrueVal - FalseVal).logBase2(), DL, VT); + SDValue BitDiff = DAG.getNode(ISD::SHL, DL, VT, CondV, Log2); + return DAG.getNode(ISD::ADD, DL, VT, FalseV, BitDiff); + } + if ((FalseVal - TrueVal).isPowerOf2() && TrueVal.isSignedIntN(12)) { + SDValue Log2 = DAG.getConstant((FalseVal - TrueVal).logBase2(), DL, VT); + CondV = DAG.getLogicalNOT(DL, CondV, CondV->getValueType(0)); + SDValue BitDiff = DAG.getNode(ISD::SHL, DL, VT, CondV, Log2); + return DAG.getNode(ISD::ADD, DL, VT, TrueV, BitDiff); + } + + auto getCost = [&](const APInt &Delta, const APInt &Addend) { + const int DeltaCost = RISCVMatInt::getIntMatCost( + Delta, Subtarget.getXLen(), Subtarget, /*CompressionCost=*/true); + // Does the addend fold into an ADDI + if (Addend.isSignedIntN(12)) + return DeltaCost; + const int AddendCost = RISCVMatInt::getIntMatCost( + Addend, Subtarget.getXLen(), Subtarget, /*CompressionCost=*/true); + return AddendCost + DeltaCost; + }; + bool IsCZERO_NEZ = getCost(FalseVal - TrueVal, TrueVal) <= + getCost(TrueVal - FalseVal, FalseVal); SDValue LHSVal = DAG.getConstant( IsCZERO_NEZ ? FalseVal - TrueVal : TrueVal - FalseVal, DL, VT); - SDValue RHSVal = - DAG.getConstant(IsCZERO_NEZ ? TrueVal : FalseVal, DL, VT); SDValue CMOV = DAG.getNode(IsCZERO_NEZ ? RISCVISD::CZERO_NEZ : RISCVISD::CZERO_EQZ, DL, VT, LHSVal, CondV); - return DAG.getNode(ISD::ADD, DL, VT, CMOV, RHSVal); + return DAG.getNode(ISD::ADD, DL, VT, CMOV, IsCZERO_NEZ ? TrueV : FalseV); } // (select c, c1, t) -> (add (czero_nez t - c1, c), c1) @@ -9327,12 +9432,10 @@ SDValue RISCVTargetLowering::lowerSELECT(SDValue Op, SelectionDAG &DAG) const { return DAG.getNode( ISD::OR, DL, VT, DAG.getNode(RISCVISD::CZERO_EQZ, DL, VT, TrueV, CondV), - DAG.getNode(RISCVISD::CZERO_NEZ, DL, VT, FalseV, CondV)); + DAG.getNode(RISCVISD::CZERO_NEZ, DL, VT, FalseV, CondV), + SDNodeFlags::Disjoint); } - if (SDValue V = combineSelectToBinOp(Op.getNode(), DAG, Subtarget)) - return V; - if (Op.hasOneUse()) { unsigned UseOpc = Op->user_begin()->getOpcode(); if (isBinOp(UseOpc) && DAG.isSafeToSpeculativelyExecute(UseOpc)) { @@ -10738,11 +10841,11 @@ SDValue RISCVTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, return DAG.getNode(Opc, DL, XLenVT, Op.getOperand(1)); } case Intrinsic::riscv_mopr: - return DAG.getNode(RISCVISD::MOPR, DL, XLenVT, Op.getOperand(1), + return DAG.getNode(RISCVISD::MOP_R, DL, XLenVT, Op.getOperand(1), Op.getOperand(2)); case Intrinsic::riscv_moprr: { - return DAG.getNode(RISCVISD::MOPRR, DL, XLenVT, Op.getOperand(1), + return DAG.getNode(RISCVISD::MOP_RR, DL, XLenVT, Op.getOperand(1), Op.getOperand(2), Op.getOperand(3)); } case Intrinsic::riscv_clmul: @@ -14877,7 +14980,7 @@ void RISCVTargetLowering::ReplaceNodeResults(SDNode *N, SDValue NewOp = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1)); SDValue Res = DAG.getNode( - RISCVISD::MOPR, DL, MVT::i64, NewOp, + RISCVISD::MOP_R, DL, MVT::i64, NewOp, DAG.getTargetConstant(N->getConstantOperandVal(2), DL, MVT::i64)); Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res)); return; @@ -14890,7 +14993,7 @@ void RISCVTargetLowering::ReplaceNodeResults(SDNode *N, SDValue NewOp1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(2)); SDValue Res = DAG.getNode( - RISCVISD::MOPRR, DL, MVT::i64, NewOp0, NewOp1, + RISCVISD::MOP_RR, DL, MVT::i64, NewOp0, NewOp1, DAG.getTargetConstant(N->getConstantOperandVal(3), DL, MVT::i64)); Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res)); return; @@ -15381,9 +15484,7 @@ static SDValue combineSelectAndUse(SDNode *N, SDValue Slct, SDValue OtherOp, if (!Subtarget.hasConditionalMoveFusion()) { // (select cond, x, (and x, c)) has custom lowering with Zicond. - if ((!Subtarget.hasStdExtZicond() && - !Subtarget.hasVendorXVentanaCondOps()) || - N->getOpcode() != ISD::AND) + if (!Subtarget.hasCZEROLike() || N->getOpcode() != ISD::AND) return SDValue(); // Maybe harmful when condition code has multiple use. @@ -16059,12 +16160,55 @@ static SDValue combineOrOfCZERO(SDNode *N, SDValue N0, SDValue N1, SDValue NewN0 = DAG.getNode(RISCVISD::CZERO_EQZ, DL, VT, TrueV.getOperand(0), Cond); - SDValue NewN1 = DAG.getNode(RISCVISD::CZERO_NEZ, DL, VT, FalseV.getOperand(0), - Cond); - SDValue NewOr = DAG.getNode(ISD::OR, DL, VT, NewN0, NewN1); + SDValue NewN1 = + DAG.getNode(RISCVISD::CZERO_NEZ, DL, VT, FalseV.getOperand(0), Cond); + SDValue NewOr = + DAG.getNode(ISD::OR, DL, VT, NewN0, NewN1, SDNodeFlags::Disjoint); return DAG.getNode(ISD::XOR, DL, VT, NewOr, TrueV.getOperand(1)); } +// (xor X, (xor (and X, C2), Y)) +// ->(qc_insb X, (sra Y, ShAmt), Width, ShAmt) +// where C2 is a shifted mask with width = Width and shift = ShAmt +// qc_insb might become qc.insb or qc.insbi depending on the operands. +static SDValue combineXorToBitfieldInsert(SDNode *N, SelectionDAG &DAG, + const RISCVSubtarget &Subtarget) { + if (!Subtarget.hasVendorXqcibm()) + return SDValue(); + + using namespace SDPatternMatch; + + SDValue Base, Inserted; + APInt CMask; + if (!sd_match(N, m_Xor(m_Value(Base), + m_OneUse(m_Xor(m_OneUse(m_And(m_Deferred(Base), + m_ConstInt(CMask))), + m_Value(Inserted)))))) + return SDValue(); + + if (N->getValueType(0) != MVT::i32) + return SDValue(); + + unsigned Width, ShAmt; + if (!CMask.isShiftedMask(ShAmt, Width)) + return SDValue(); + + // Check if all zero bits in CMask are also zero in Inserted + if (!DAG.MaskedValueIsZero(Inserted, ~CMask)) + return SDValue(); + + SDLoc DL(N); + + // `Inserted` needs to be right shifted before it is put into the + // instruction. + Inserted = DAG.getNode(ISD::SRA, DL, MVT::i32, Inserted, + DAG.getShiftAmountConstant(ShAmt, MVT::i32, DL)); + + SDValue Ops[] = {Base, Inserted, DAG.getConstant(Width, DL, MVT::i32), + DAG.getConstant(ShAmt, DL, MVT::i32)}; + return DAG.getNode(RISCVISD::QC_INSB, DL, MVT::i32, Ops); +} + static SDValue performORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const RISCVSubtarget &Subtarget) { SelectionDAG &DAG = DCI.DAG; @@ -16108,8 +16252,8 @@ static SDValue performXORCombine(SDNode *N, SelectionDAG &DAG, SDValue Op0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N0.getOperand(0)); SDValue Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N0.getOperand(1)); SDValue Shl = DAG.getNode(ISD::SHL, DL, MVT::i64, Op0, Op1); - SDValue And = DAG.getNOT(DL, Shl, MVT::i64); - return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, And); + SDValue Not = DAG.getNOT(DL, Shl, MVT::i64); + return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Not); } // fold (xor (sllw 1, x), -1) -> (rolw ~1, x) @@ -16137,6 +16281,9 @@ static SDValue performXORCombine(SDNode *N, SelectionDAG &DAG, } } + if (SDValue V = combineXorToBitfieldInsert(N, DAG, Subtarget)) + return V; + if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget)) return V; if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget)) @@ -16590,10 +16737,6 @@ combineVectorSizedSetCCEquality(EVT VT, SDValue X, SDValue Y, ISD::CondCode CC, DAG.getConstant(0, DL, XLenVT), CC); } -// Replace (seteq (i64 (and X, 0xffffffff)), C1) with -// (seteq (i64 (sext_inreg (X, i32)), C1')) where C1' is C1 sign extended from -// bit 31. Same for setne. C1' may be cheaper to materialize and the sext_inreg -// can become a sext.w instead of a shift pair. static SDValue performSETCCCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const RISCVSubtarget &Subtarget) { @@ -16613,20 +16756,44 @@ static SDValue performSETCCCombine(SDNode *N, combineVectorSizedSetCCEquality(VT, N0, N1, Cond, dl, DAG, Subtarget)) return V; - // (X & -4096) == 0 -> (X >> 12) == 0 if the AND constant can't use ANDI. - if (DCI.isAfterLegalizeDAG() && isNullConstant(N1) && + if (DCI.isAfterLegalizeDAG() && isa<ConstantSDNode>(N1) && N0.getOpcode() == ISD::AND && N0.hasOneUse() && isa<ConstantSDNode>(N0.getOperand(1))) { - const APInt &AndRHSC = - cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue(); - if (!isInt<12>(AndRHSC.getSExtValue()) && AndRHSC.isNegatedPowerOf2()) { + const APInt &AndRHSC = N0.getConstantOperandAPInt(1); + // (X & -(1 << C)) == 0 -> (X >> C) == 0 if the AND constant can't use ANDI. + if (isNullConstant(N1) && !isInt<12>(AndRHSC.getSExtValue()) && + AndRHSC.isNegatedPowerOf2()) { unsigned ShiftBits = AndRHSC.countr_zero(); - SDValue Shift = DAG.getNode(ISD::SRL, dl, VT, N0.getOperand(0), - DAG.getConstant(ShiftBits, dl, VT)); + SDValue Shift = DAG.getNode(ISD::SRL, dl, OpVT, N0.getOperand(0), + DAG.getConstant(ShiftBits, dl, OpVT)); return DAG.getSetCC(dl, VT, Shift, N1, Cond); } + + // Similar to above but handling the lower 32 bits by using sraiw. Allow + // comparing with constants other than 0 if the constant can be folded into + // addi or xori after shifting. + uint64_t N1Int = cast<ConstantSDNode>(N1)->getZExtValue(); + uint64_t AndRHSInt = AndRHSC.getZExtValue(); + if (OpVT == MVT::i64 && AndRHSInt <= 0xffffffff && + isPowerOf2_32(-uint32_t(AndRHSInt)) && (N1Int & AndRHSInt) == N1Int) { + unsigned ShiftBits = llvm::countr_zero(AndRHSInt); + int64_t NewC = SignExtend64<32>(N1Int) >> ShiftBits; + if (NewC >= -2048 && NewC <= 2048) { + SDValue SExt = + DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, OpVT, N0.getOperand(0), + DAG.getValueType(MVT::i32)); + SDValue Shift = DAG.getNode(ISD::SRA, dl, OpVT, SExt, + DAG.getConstant(ShiftBits, dl, OpVT)); + return DAG.getSetCC(dl, VT, Shift, + DAG.getSignedConstant(NewC, dl, OpVT), Cond); + } + } } + // Replace (seteq (i64 (and X, 0xffffffff)), C1) with + // (seteq (i64 (sext_inreg (X, i32)), C1')) where C1' is C1 sign extended from + // bit 31. Same for setne. C1' may be cheaper to materialize and the + // sext_inreg can become a sext.w instead of a shift pair. if (OpVT != MVT::i64 || !Subtarget.is64Bit()) return SDValue(); @@ -18674,7 +18841,7 @@ static SDValue tryFoldSelectIntoOp(SDNode *N, SelectionDAG &DAG, break; } - if (!TrueVal.hasOneUse() || isa<ConstantSDNode>(FalseVal)) + if (!TrueVal.hasOneUse()) return SDValue(); unsigned OpToFold; @@ -18746,6 +18913,10 @@ static SDValue foldSelectOfCTTZOrCTLZ(SDNode *N, SelectionDAG &DAG) { if (Cond->getOperand(0) != CountZeroesArgument) return SDValue(); + unsigned BitWidth = CountZeroes.getValueSizeInBits(); + if (!isPowerOf2_32(BitWidth)) + return SDValue(); + if (CountZeroes.getOpcode() == ISD::CTTZ_ZERO_UNDEF) { CountZeroes = DAG.getNode(ISD::CTTZ, SDLoc(CountZeroes), CountZeroes.getValueType(), CountZeroesArgument); @@ -18754,7 +18925,6 @@ static SDValue foldSelectOfCTTZOrCTLZ(SDNode *N, SelectionDAG &DAG) { CountZeroes.getValueType(), CountZeroesArgument); } - unsigned BitWidth = CountZeroes.getValueSizeInBits(); SDValue BitWidthMinusOne = DAG.getConstant(BitWidth - 1, SDLoc(N), CountZeroes.getValueType()); @@ -18778,7 +18948,7 @@ static SDValue useInversedSetcc(SDNode *N, SelectionDAG &DAG, // Replace (setcc eq (and x, C)) with (setcc ne (and x, C))) to generate // BEXTI, where C is power of 2. if (Subtarget.hasStdExtZbs() && VT.isScalarInteger() && - (Subtarget.hasStdExtZicond() || Subtarget.hasVendorXVentanaCondOps())) { + (Subtarget.hasCZEROLike() || Subtarget.hasVendorXTHeadCondMov())) { SDValue LHS = Cond.getOperand(0); SDValue RHS = Cond.getOperand(1); ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get(); @@ -18953,6 +19123,7 @@ static SDValue foldReduceOperandViaVQDOT(SDValue InVec, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const RISCVTargetLowering &TLI) { + using namespace SDPatternMatch; // Note: We intentionally do not check the legality of the reduction type. // We want to handle the m4/m8 *src* types, and thus need to let illegal // intermediate types flow through here. @@ -18960,11 +19131,10 @@ static SDValue foldReduceOperandViaVQDOT(SDValue InVec, const SDLoc &DL, !InVec.getValueType().getVectorElementCount().isKnownMultipleOf(4)) return SDValue(); - // Recurse through adds (since generic dag canonicalizes to that - // form). TODO: Handle disjoint or here. - if (InVec->getOpcode() == ISD::ADD) { - SDValue A = InVec.getOperand(0); - SDValue B = InVec.getOperand(1); + // Recurse through adds/disjoint ors (since generic dag canonicalizes to that + // form). + SDValue A, B; + if (sd_match(InVec, m_AddLike(m_Value(A), m_Value(B)))) { SDValue AOpt = foldReduceOperandViaVQDOT(A, DL, DAG, Subtarget, TLI); SDValue BOpt = foldReduceOperandViaVQDOT(B, DL, DAG, Subtarget, TLI); if (AOpt || BOpt) { @@ -19001,12 +19171,9 @@ static SDValue foldReduceOperandViaVQDOT(SDValue InVec, const SDLoc &DL, // mul (zext a, zext b) -> partial_reduce_umla 0, a, b // mul (sext a, zext b) -> partial_reduce_ssmla 0, a, b // mul (zext a, sext b) -> partial_reduce_smla 0, b, a (swapped) - if (InVec.getOpcode() != ISD::MUL) + if (!sd_match(InVec, m_Mul(m_Value(A), m_Value(B)))) return SDValue(); - SDValue A = InVec.getOperand(0); - SDValue B = InVec.getOperand(1); - if (!ISD::isExtOpcode(A.getOpcode())) return SDValue(); @@ -20081,6 +20248,17 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N, return V; break; case ISD::FMUL: { + using namespace SDPatternMatch; + SDLoc DL(N); + EVT VT = N->getValueType(0); + SDValue X, Y; + // InstCombine canonicalizes fneg (fmul x, y) -> fmul x, (fneg y), see + // hoistFNegAboveFMulFDiv. + // Undo this and sink the fneg so we match more fmsub/fnmadd patterns. + if (sd_match(N, m_FMul(m_Value(X), m_OneUse(m_FNeg(m_Value(Y)))))) + return DAG.getNode(ISD::FNEG, DL, VT, + DAG.getNode(ISD::FMUL, DL, VT, X, Y)); + // fmul X, (copysign 1.0, Y) -> fsgnjx X, Y SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); @@ -20091,13 +20269,12 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N, ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N0->getOperand(0)); if (!C || !C->getValueAPF().isExactlyValue(+1.0)) return SDValue(); - EVT VT = N->getValueType(0); if (VT.isVector() || !isOperationLegal(ISD::FCOPYSIGN, VT)) return SDValue(); SDValue Sign = N0->getOperand(1); if (Sign.getValueType() != VT) return SDValue(); - return DAG.getNode(RISCVISD::FSGNJX, SDLoc(N), VT, N1, N0->getOperand(1)); + return DAG.getNode(RISCVISD::FSGNJX, DL, VT, N1, N0->getOperand(1)); } case ISD::FADD: case ISD::UMAX: @@ -20381,9 +20558,9 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N, VT, DL, MGN->getChain(), BasePtr, DAG.getSignedConstant(StepNumerator, DL, XLenVT), MGN->getMask(), EVL, MGN->getMemOperand()); - SDValue VPSelect = DAG.getNode(ISD::VP_SELECT, DL, VT, MGN->getMask(), - StridedLoad, MGN->getPassThru(), EVL); - return DAG.getMergeValues({VPSelect, SDValue(StridedLoad.getNode(), 1)}, + SDValue Select = DAG.getSelect(DL, VT, MGN->getMask(), StridedLoad, + MGN->getPassThru()); + return DAG.getMergeValues({Select, SDValue(StridedLoad.getNode(), 1)}, DL); } } @@ -21060,6 +21237,38 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N, return N->getOperand(0); break; } + case RISCVISD::VSLIDE1UP_VL: + case RISCVISD::VFSLIDE1UP_VL: { + using namespace SDPatternMatch; + SDValue SrcVec; + SDLoc DL(N); + MVT VT = N->getSimpleValueType(0); + // If the scalar we're sliding in was extracted from the first element of a + // vector, we can use that vector as the passthru in a normal slideup of 1. + // This saves us an extract_element instruction (i.e. vfmv.f.s, vmv.x.s). + if (!N->getOperand(0).isUndef() || + !sd_match(N->getOperand(2), + m_AnyOf(m_ExtractElt(m_Value(SrcVec), m_Zero()), + m_Node(RISCVISD::VMV_X_S, m_Value(SrcVec))))) + break; + + MVT SrcVecVT = SrcVec.getSimpleValueType(); + if (SrcVecVT.getVectorElementType() != VT.getVectorElementType()) + break; + // Adapt the value type of source vector. + if (SrcVecVT.isFixedLengthVector()) { + SrcVecVT = getContainerForFixedLengthVector(SrcVecVT); + SrcVec = convertToScalableVector(SrcVecVT, SrcVec, DAG, Subtarget); + } + if (SrcVecVT.getVectorMinNumElements() < VT.getVectorMinNumElements()) + SrcVec = DAG.getInsertSubvector(DL, DAG.getUNDEF(VT), SrcVec, 0); + else + SrcVec = DAG.getExtractSubvector(DL, VT, SrcVec, 0); + + return getVSlideup(DAG, Subtarget, DL, VT, SrcVec, N->getOperand(1), + DAG.getConstant(1, DL, XLenVT), N->getOperand(3), + N->getOperand(4)); + } } return SDValue(); @@ -21120,9 +21329,14 @@ bool RISCVTargetLowering::isDesirableToCommuteWithShift( auto *C1 = dyn_cast<ConstantSDNode>(N0->getOperand(1)); auto *C2 = dyn_cast<ConstantSDNode>(N->getOperand(1)); - // Bail if we might break a sh{1,2,3}add pattern. - if ((Subtarget.hasStdExtZba() || Subtarget.hasVendorXAndesPerf()) && C2 && - C2->getZExtValue() >= 1 && C2->getZExtValue() <= 3 && N->hasOneUse() && + bool IsShXAdd = + (Subtarget.hasStdExtZba() || Subtarget.hasVendorXAndesPerf()) && C2 && + C2->getZExtValue() >= 1 && C2->getZExtValue() <= 3; + bool IsQCShlAdd = Subtarget.hasVendorXqciac() && C2 && + C2->getZExtValue() >= 4 && C2->getZExtValue() <= 31; + + // Bail if we might break a sh{1,2,3}add/qc.shladd pattern. + if ((IsShXAdd || IsQCShlAdd) && N->hasOneUse() && N->user_begin()->getOpcode() == ISD::ADD && !isUsedByLdSt(*N->user_begin(), nullptr) && !isa<ConstantSDNode>(N->user_begin()->getOperand(1))) @@ -21346,6 +21560,24 @@ void RISCVTargetLowering::computeKnownBitsForTargetNode(const SDValue Op, Known = Known.sext(BitWidth); break; } + case RISCVISD::SRLW: { + KnownBits Known2; + Known = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); + Known2 = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1); + Known = KnownBits::lshr(Known.trunc(32), Known2.trunc(5).zext(32)); + // Restore the original width by sign extending. + Known = Known.sext(BitWidth); + break; + } + case RISCVISD::SRAW: { + KnownBits Known2; + Known = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); + Known2 = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1); + Known = KnownBits::ashr(Known.trunc(32), Known2.trunc(5).zext(32)); + // Restore the original width by sign extending. + Known = Known.sext(BitWidth); + break; + } case RISCVISD::CTZW: { KnownBits Known2 = DAG.computeKnownBits(Op.getOperand(0), Depth + 1); unsigned PossibleTZ = Known2.trunc(32).countMaxTrailingZeros(); @@ -21451,8 +21683,16 @@ unsigned RISCVTargetLowering::ComputeNumSignBitsForTargetNode( if (Tmp < 33) return 1; return 33; } + case RISCVISD::SRAW: { + unsigned Tmp = + DAG.ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth + 1); + // sraw produces at least 33 sign bits. If the input already has more than + // 33 sign bits sraw, will preserve them. + // TODO: A more precise answer could be calculated depending on known bits + // in the shift amount. + return std::max(Tmp, 33U); + } case RISCVISD::SLLW: - case RISCVISD::SRAW: case RISCVISD::SRLW: case RISCVISD::DIVW: case RISCVISD::DIVUW: @@ -21463,9 +21703,7 @@ unsigned RISCVTargetLowering::ComputeNumSignBitsForTargetNode( case RISCVISD::FCVT_WU_RV64: case RISCVISD::STRICT_FCVT_W_RV64: case RISCVISD::STRICT_FCVT_WU_RV64: - // TODO: As the result is sign-extended, this is conservatively correct. A - // more precise answer could be calculated for SRAW depending on known - // bits in the shift amount. + // TODO: As the result is sign-extended, this is conservatively correct. return 33; case RISCVISD::VMV_X_S: { // The number of sign bits of the scalar result is computed by obtaining the @@ -21548,6 +21786,14 @@ bool RISCVTargetLowering::canCreateUndefOrPoisonForTargetNode( // TODO: Add more target nodes. switch (Op.getOpcode()) { + case RISCVISD::SLLW: + case RISCVISD::SRAW: + case RISCVISD::SRLW: + case RISCVISD::RORW: + case RISCVISD::ROLW: + // Only the lower 5 bits of RHS are read, guaranteeing the rotate/shift + // amount is bounds. + return false; case RISCVISD::SELECT_CC: // Integer comparisons cannot create poison. assert(Op.getOperand(0).getValueType().isInteger() && @@ -24683,7 +24929,7 @@ RISCVTargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor, bool RISCVTargetLowering::shouldFoldSelectWithSingleBitTest( EVT VT, const APInt &AndMask) const { - if (Subtarget.hasStdExtZicond() || Subtarget.hasVendorXVentanaCondOps()) + if (Subtarget.hasCZEROLike()) return !Subtarget.hasStdExtZbs() && AndMask.ugt(1024); return TargetLowering::shouldFoldSelectWithSingleBitTest(VT, AndMask); } |
