summaryrefslogtreecommitdiff
path: root/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Target/RISCV/RISCVISelLowering.cpp')
-rw-r--r--llvm/lib/Target/RISCV/RISCVISelLowering.cpp424
1 files changed, 335 insertions, 89 deletions
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index a33224845e2b..a68a3c14dc41 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -2173,7 +2173,7 @@ bool RISCVTargetLowering::isMaskAndCmp0FoldingBeneficial(
// on the basis that it's possible the sinking+duplication of the AND in
// CodeGenPrepare triggered by this hook wouldn't decrease the instruction
// count and would increase code size (e.g. ANDI+BNEZ => BEXTI+BNEZ).
- if (!Subtarget.hasStdExtZbs() && !Subtarget.hasVendorXTHeadBs())
+ if (!Subtarget.hasBEXTILike())
return false;
ConstantInt *Mask = dyn_cast<ConstantInt>(AndI.getOperand(1));
if (!Mask)
@@ -3744,9 +3744,11 @@ static SDValue matchSplatAsGather(SDValue SplatVal, MVT VT, const SDLoc &DL,
// different
// FIXME: Support i1 vectors, maybe by promoting to i8?
MVT EltTy = VT.getVectorElementType();
+ if (EltTy == MVT::i1 ||
+ !DAG.getTargetLoweringInfo().isTypeLegal(Src.getValueType()))
+ return SDValue();
MVT SrcVT = Src.getSimpleValueType();
- if (EltTy == MVT::i1 || EltTy != SrcVT.getVectorElementType() ||
- !DAG.getTargetLoweringInfo().isTypeLegal(SrcVT))
+ if (EltTy != SrcVT.getVectorElementType())
return SDValue();
SDValue Idx = SplatVal.getOperand(1);
// The index must be a legal type.
@@ -4518,41 +4520,104 @@ static SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
const unsigned Policy = RISCVVType::TAIL_AGNOSTIC | RISCVVType::MASK_AGNOSTIC;
+ // General case: splat the first operand and slide other operands down one
+ // by one to form a vector. Alternatively, if every operand is an
+ // extraction from element 0 of a vector, we use that vector from the last
+ // extraction as the start value and slide up instead of slide down. Such that
+ // (1) we can avoid the initial splat (2) we can turn those vslide1up into
+ // vslideup of 1 later and eliminate the vector to scalar movement, which is
+ // something we cannot do with vslide1down/vslidedown.
+ // Of course, using vslide1up/vslideup might increase the register pressure,
+ // and that's why we conservatively limit to cases where every operand is an
+ // extraction from the first element.
+ SmallVector<SDValue> Operands(Op->op_begin(), Op->op_end());
+ SDValue EVec;
+ bool SlideUp = false;
+ auto getVSlide = [&](EVT ContainerVT, SDValue Passthru, SDValue Vec,
+ SDValue Offset, SDValue Mask, SDValue VL) -> SDValue {
+ if (SlideUp)
+ return getVSlideup(DAG, Subtarget, DL, ContainerVT, Passthru, Vec, Offset,
+ Mask, VL, Policy);
+ return getVSlidedown(DAG, Subtarget, DL, ContainerVT, Passthru, Vec, Offset,
+ Mask, VL, Policy);
+ };
+
+ // The reason we don't use all_of here is because we're also capturing EVec
+ // from the last non-undef operand. If the std::execution_policy of the
+ // underlying std::all_of is anything but std::sequenced_policy we might
+ // capture the wrong EVec.
+ for (SDValue V : Operands) {
+ using namespace SDPatternMatch;
+ SlideUp = V.isUndef() || sd_match(V, m_ExtractElt(m_Value(EVec), m_Zero()));
+ if (!SlideUp)
+ break;
+ }
+
+ if (SlideUp) {
+ MVT EVecContainerVT = EVec.getSimpleValueType();
+ // Make sure the original vector has scalable vector type.
+ if (EVecContainerVT.isFixedLengthVector()) {
+ EVecContainerVT =
+ getContainerForFixedLengthVector(DAG, EVecContainerVT, Subtarget);
+ EVec = convertToScalableVector(EVecContainerVT, EVec, DAG, Subtarget);
+ }
+
+ // Adapt EVec's type into ContainerVT.
+ if (EVecContainerVT.getVectorMinNumElements() <
+ ContainerVT.getVectorMinNumElements())
+ EVec = DAG.getInsertSubvector(DL, DAG.getUNDEF(ContainerVT), EVec, 0);
+ else
+ EVec = DAG.getExtractSubvector(DL, ContainerVT, EVec, 0);
+
+ // Reverse the elements as we're going to slide up from the last element.
+ std::reverse(Operands.begin(), Operands.end());
+ }
+
SDValue Vec;
UndefCount = 0;
- for (SDValue V : Op->ops()) {
+ for (SDValue V : Operands) {
if (V.isUndef()) {
UndefCount++;
continue;
}
- // Start our sequence with a TA splat in the hopes that hardware is able to
- // recognize there's no dependency on the prior value of our temporary
- // register.
+ // Start our sequence with either a TA splat or extract source in the
+ // hopes that hardware is able to recognize there's no dependency on the
+ // prior value of our temporary register.
if (!Vec) {
- Vec = DAG.getSplatVector(VT, DL, V);
- Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
+ if (SlideUp) {
+ Vec = EVec;
+ } else {
+ Vec = DAG.getSplatVector(VT, DL, V);
+ Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
+ }
+
UndefCount = 0;
continue;
}
if (UndefCount) {
const SDValue Offset = DAG.getConstant(UndefCount, DL, Subtarget.getXLenVT());
- Vec = getVSlidedown(DAG, Subtarget, DL, ContainerVT, DAG.getUNDEF(ContainerVT),
- Vec, Offset, Mask, VL, Policy);
+ Vec = getVSlide(ContainerVT, DAG.getUNDEF(ContainerVT), Vec, Offset, Mask,
+ VL);
UndefCount = 0;
}
- auto OpCode =
- VT.isFloatingPoint() ? RISCVISD::VFSLIDE1DOWN_VL : RISCVISD::VSLIDE1DOWN_VL;
+
+ unsigned Opcode;
+ if (VT.isFloatingPoint())
+ Opcode = SlideUp ? RISCVISD::VFSLIDE1UP_VL : RISCVISD::VFSLIDE1DOWN_VL;
+ else
+ Opcode = SlideUp ? RISCVISD::VSLIDE1UP_VL : RISCVISD::VSLIDE1DOWN_VL;
+
if (!VT.isFloatingPoint())
V = DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getXLenVT(), V);
- Vec = DAG.getNode(OpCode, DL, ContainerVT, DAG.getUNDEF(ContainerVT), Vec,
+ Vec = DAG.getNode(Opcode, DL, ContainerVT, DAG.getUNDEF(ContainerVT), Vec,
V, Mask, VL);
}
if (UndefCount) {
const SDValue Offset = DAG.getConstant(UndefCount, DL, Subtarget.getXLenVT());
- Vec = getVSlidedown(DAG, Subtarget, DL, ContainerVT, DAG.getUNDEF(ContainerVT),
- Vec, Offset, Mask, VL, Policy);
+ Vec = getVSlide(ContainerVT, DAG.getUNDEF(ContainerVT), Vec, Offset, Mask,
+ VL);
}
return convertFromScalableVector(VT, Vec, DAG, Subtarget);
}
@@ -8193,6 +8258,13 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
DL, VT, LHS, DAG.getSignedConstant(Imm + 1, DL, OpVT), CCVal);
return DAG.getLogicalNOT(DL, SetCC, VT);
}
+ // Lower (setugt X, 2047) as (setne (srl X, 11), 0).
+ if (CCVal == ISD::SETUGT && Imm == 2047) {
+ SDValue Shift = DAG.getNode(ISD::SRL, DL, OpVT, LHS,
+ DAG.getShiftAmountConstant(11, OpVT, DL));
+ return DAG.getSetCC(DL, VT, Shift, DAG.getConstant(0, DL, OpVT),
+ ISD::SETNE);
+ }
}
// Not a constant we could handle, swap the operands and condition code to
@@ -8815,7 +8887,15 @@ SDValue RISCVTargetLowering::getAddr(NodeTy *N, SelectionDAG &DAG,
reportFatalUsageError("Unsupported code model for lowering");
case CodeModel::Small: {
// Generate a sequence for accessing addresses within the first 2 GiB of
- // address space. This generates the pattern (addi (lui %hi(sym)) %lo(sym)).
+ // address space.
+ if (Subtarget.hasVendorXqcili()) {
+ // Use QC.E.LI to generate the address, as this is easier to relax than
+ // LUI/ADDI.
+ SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0);
+ return DAG.getNode(RISCVISD::QC_E_LI, DL, Ty, Addr);
+ }
+
+ // This generates the pattern (addi (lui %hi(sym)) %lo(sym)).
SDValue AddrHi = getTargetNode(N, DL, Ty, DAG, RISCVII::MO_HI);
SDValue AddrLo = getTargetNode(N, DL, Ty, DAG, RISCVII::MO_LO);
SDValue MNHi = DAG.getNode(RISCVISD::HI, DL, Ty, AddrHi);
@@ -9036,8 +9116,12 @@ static std::optional<bool> matchSetCC(SDValue LHS, SDValue RHS,
return std::nullopt;
}
-static SDValue combineSelectToBinOp(SDNode *N, SelectionDAG &DAG,
- const RISCVSubtarget &Subtarget) {
+static bool isSimm12Constant(SDValue V) {
+ return isa<ConstantSDNode>(V) && V->getAsAPIntVal().isSignedIntN(12);
+}
+
+static SDValue lowerSelectToBinOp(SDNode *N, SelectionDAG &DAG,
+ const RISCVSubtarget &Subtarget) {
SDValue CondV = N->getOperand(0);
SDValue TrueV = N->getOperand(1);
SDValue FalseV = N->getOperand(2);
@@ -9057,14 +9141,16 @@ static SDValue combineSelectToBinOp(SDNode *N, SelectionDAG &DAG,
return DAG.getNode(ISD::OR, DL, VT, Neg, DAG.getFreeze(TrueV));
}
+ const bool HasCZero = VT.isScalarInteger() && Subtarget.hasCZEROLike();
+
// (select c, 0, y) -> (c-1) & y
- if (isNullConstant(TrueV)) {
- SDValue Neg = DAG.getNode(ISD::ADD, DL, VT, CondV,
- DAG.getAllOnesConstant(DL, VT));
+ if (isNullConstant(TrueV) && (!HasCZero || isSimm12Constant(FalseV))) {
+ SDValue Neg =
+ DAG.getNode(ISD::ADD, DL, VT, CondV, DAG.getAllOnesConstant(DL, VT));
return DAG.getNode(ISD::AND, DL, VT, Neg, DAG.getFreeze(FalseV));
}
// (select c, y, 0) -> -c & y
- if (isNullConstant(FalseV)) {
+ if (isNullConstant(FalseV) && (!HasCZero || isSimm12Constant(TrueV))) {
SDValue Neg = DAG.getNegative(CondV, DL, VT);
return DAG.getNode(ISD::AND, DL, VT, Neg, DAG.getFreeze(TrueV));
}
@@ -9185,12 +9271,16 @@ SDValue RISCVTargetLowering::lowerSELECT(SDValue Op, SelectionDAG &DAG) const {
return DAG.getNode(ISD::VSELECT, DL, VT, CondSplat, TrueV, FalseV);
}
+ // Try some other optimizations before falling back to generic lowering.
+ if (SDValue V = lowerSelectToBinOp(Op.getNode(), DAG, Subtarget))
+ return V;
+
// When Zicond or XVentanaCondOps is present, emit CZERO_EQZ and CZERO_NEZ
// nodes to implement the SELECT. Performing the lowering here allows for
// greater control over when CZERO_{EQZ/NEZ} are used vs another branchless
// sequence or RISCVISD::SELECT_CC node (branch-based select).
- if ((Subtarget.hasStdExtZicond() || Subtarget.hasVendorXVentanaCondOps()) &&
- VT.isScalarInteger()) {
+ if (Subtarget.hasCZEROLike() && VT.isScalarInteger()) {
+
// (select c, t, 0) -> (czero_eqz t, c)
if (isNullConstant(FalseV))
return DAG.getNode(RISCVISD::CZERO_EQZ, DL, VT, TrueV, CondV);
@@ -9244,10 +9334,6 @@ SDValue RISCVTargetLowering::lowerSELECT(SDValue Op, SelectionDAG &DAG) const {
DAG.getNode(RISCVISD::CZERO_EQZ, DL, VT, TrueV, CondV));
}
- // Try some other optimizations before falling back to generic lowering.
- if (SDValue V = combineSelectToBinOp(Op.getNode(), DAG, Subtarget))
- return V;
-
// (select c, c1, c2) -> (add (czero_nez c2 - c1, c), c1)
// (select c, c1, c2) -> (add (czero_eqz c1 - c2, c), c2)
if (isa<ConstantSDNode>(TrueV) && isa<ConstantSDNode>(FalseV)) {
@@ -9280,19 +9366,38 @@ SDValue RISCVTargetLowering::lowerSELECT(SDValue Op, SelectionDAG &DAG) const {
}
}
- const int TrueValCost = RISCVMatInt::getIntMatCost(
- TrueVal, Subtarget.getXLen(), Subtarget, /*CompressionCost=*/true);
- const int FalseValCost = RISCVMatInt::getIntMatCost(
- FalseVal, Subtarget.getXLen(), Subtarget, /*CompressionCost=*/true);
- bool IsCZERO_NEZ = TrueValCost <= FalseValCost;
+ // Use SHL/ADDI (and possible XORI) to avoid having to materialize
+ // a constant in register
+ if ((TrueVal - FalseVal).isPowerOf2() && FalseVal.isSignedIntN(12)) {
+ SDValue Log2 = DAG.getConstant((TrueVal - FalseVal).logBase2(), DL, VT);
+ SDValue BitDiff = DAG.getNode(ISD::SHL, DL, VT, CondV, Log2);
+ return DAG.getNode(ISD::ADD, DL, VT, FalseV, BitDiff);
+ }
+ if ((FalseVal - TrueVal).isPowerOf2() && TrueVal.isSignedIntN(12)) {
+ SDValue Log2 = DAG.getConstant((FalseVal - TrueVal).logBase2(), DL, VT);
+ CondV = DAG.getLogicalNOT(DL, CondV, CondV->getValueType(0));
+ SDValue BitDiff = DAG.getNode(ISD::SHL, DL, VT, CondV, Log2);
+ return DAG.getNode(ISD::ADD, DL, VT, TrueV, BitDiff);
+ }
+
+ auto getCost = [&](const APInt &Delta, const APInt &Addend) {
+ const int DeltaCost = RISCVMatInt::getIntMatCost(
+ Delta, Subtarget.getXLen(), Subtarget, /*CompressionCost=*/true);
+ // Does the addend fold into an ADDI
+ if (Addend.isSignedIntN(12))
+ return DeltaCost;
+ const int AddendCost = RISCVMatInt::getIntMatCost(
+ Addend, Subtarget.getXLen(), Subtarget, /*CompressionCost=*/true);
+ return AddendCost + DeltaCost;
+ };
+ bool IsCZERO_NEZ = getCost(FalseVal - TrueVal, TrueVal) <=
+ getCost(TrueVal - FalseVal, FalseVal);
SDValue LHSVal = DAG.getConstant(
IsCZERO_NEZ ? FalseVal - TrueVal : TrueVal - FalseVal, DL, VT);
- SDValue RHSVal =
- DAG.getConstant(IsCZERO_NEZ ? TrueVal : FalseVal, DL, VT);
SDValue CMOV =
DAG.getNode(IsCZERO_NEZ ? RISCVISD::CZERO_NEZ : RISCVISD::CZERO_EQZ,
DL, VT, LHSVal, CondV);
- return DAG.getNode(ISD::ADD, DL, VT, CMOV, RHSVal);
+ return DAG.getNode(ISD::ADD, DL, VT, CMOV, IsCZERO_NEZ ? TrueV : FalseV);
}
// (select c, c1, t) -> (add (czero_nez t - c1, c), c1)
@@ -9327,12 +9432,10 @@ SDValue RISCVTargetLowering::lowerSELECT(SDValue Op, SelectionDAG &DAG) const {
return DAG.getNode(
ISD::OR, DL, VT,
DAG.getNode(RISCVISD::CZERO_EQZ, DL, VT, TrueV, CondV),
- DAG.getNode(RISCVISD::CZERO_NEZ, DL, VT, FalseV, CondV));
+ DAG.getNode(RISCVISD::CZERO_NEZ, DL, VT, FalseV, CondV),
+ SDNodeFlags::Disjoint);
}
- if (SDValue V = combineSelectToBinOp(Op.getNode(), DAG, Subtarget))
- return V;
-
if (Op.hasOneUse()) {
unsigned UseOpc = Op->user_begin()->getOpcode();
if (isBinOp(UseOpc) && DAG.isSafeToSpeculativelyExecute(UseOpc)) {
@@ -10738,11 +10841,11 @@ SDValue RISCVTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
return DAG.getNode(Opc, DL, XLenVT, Op.getOperand(1));
}
case Intrinsic::riscv_mopr:
- return DAG.getNode(RISCVISD::MOPR, DL, XLenVT, Op.getOperand(1),
+ return DAG.getNode(RISCVISD::MOP_R, DL, XLenVT, Op.getOperand(1),
Op.getOperand(2));
case Intrinsic::riscv_moprr: {
- return DAG.getNode(RISCVISD::MOPRR, DL, XLenVT, Op.getOperand(1),
+ return DAG.getNode(RISCVISD::MOP_RR, DL, XLenVT, Op.getOperand(1),
Op.getOperand(2), Op.getOperand(3));
}
case Intrinsic::riscv_clmul:
@@ -14877,7 +14980,7 @@ void RISCVTargetLowering::ReplaceNodeResults(SDNode *N,
SDValue NewOp =
DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
SDValue Res = DAG.getNode(
- RISCVISD::MOPR, DL, MVT::i64, NewOp,
+ RISCVISD::MOP_R, DL, MVT::i64, NewOp,
DAG.getTargetConstant(N->getConstantOperandVal(2), DL, MVT::i64));
Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
return;
@@ -14890,7 +14993,7 @@ void RISCVTargetLowering::ReplaceNodeResults(SDNode *N,
SDValue NewOp1 =
DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(2));
SDValue Res = DAG.getNode(
- RISCVISD::MOPRR, DL, MVT::i64, NewOp0, NewOp1,
+ RISCVISD::MOP_RR, DL, MVT::i64, NewOp0, NewOp1,
DAG.getTargetConstant(N->getConstantOperandVal(3), DL, MVT::i64));
Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
return;
@@ -15381,9 +15484,7 @@ static SDValue combineSelectAndUse(SDNode *N, SDValue Slct, SDValue OtherOp,
if (!Subtarget.hasConditionalMoveFusion()) {
// (select cond, x, (and x, c)) has custom lowering with Zicond.
- if ((!Subtarget.hasStdExtZicond() &&
- !Subtarget.hasVendorXVentanaCondOps()) ||
- N->getOpcode() != ISD::AND)
+ if (!Subtarget.hasCZEROLike() || N->getOpcode() != ISD::AND)
return SDValue();
// Maybe harmful when condition code has multiple use.
@@ -16059,12 +16160,55 @@ static SDValue combineOrOfCZERO(SDNode *N, SDValue N0, SDValue N1,
SDValue NewN0 = DAG.getNode(RISCVISD::CZERO_EQZ, DL, VT, TrueV.getOperand(0),
Cond);
- SDValue NewN1 = DAG.getNode(RISCVISD::CZERO_NEZ, DL, VT, FalseV.getOperand(0),
- Cond);
- SDValue NewOr = DAG.getNode(ISD::OR, DL, VT, NewN0, NewN1);
+ SDValue NewN1 =
+ DAG.getNode(RISCVISD::CZERO_NEZ, DL, VT, FalseV.getOperand(0), Cond);
+ SDValue NewOr =
+ DAG.getNode(ISD::OR, DL, VT, NewN0, NewN1, SDNodeFlags::Disjoint);
return DAG.getNode(ISD::XOR, DL, VT, NewOr, TrueV.getOperand(1));
}
+// (xor X, (xor (and X, C2), Y))
+// ->(qc_insb X, (sra Y, ShAmt), Width, ShAmt)
+// where C2 is a shifted mask with width = Width and shift = ShAmt
+// qc_insb might become qc.insb or qc.insbi depending on the operands.
+static SDValue combineXorToBitfieldInsert(SDNode *N, SelectionDAG &DAG,
+ const RISCVSubtarget &Subtarget) {
+ if (!Subtarget.hasVendorXqcibm())
+ return SDValue();
+
+ using namespace SDPatternMatch;
+
+ SDValue Base, Inserted;
+ APInt CMask;
+ if (!sd_match(N, m_Xor(m_Value(Base),
+ m_OneUse(m_Xor(m_OneUse(m_And(m_Deferred(Base),
+ m_ConstInt(CMask))),
+ m_Value(Inserted))))))
+ return SDValue();
+
+ if (N->getValueType(0) != MVT::i32)
+ return SDValue();
+
+ unsigned Width, ShAmt;
+ if (!CMask.isShiftedMask(ShAmt, Width))
+ return SDValue();
+
+ // Check if all zero bits in CMask are also zero in Inserted
+ if (!DAG.MaskedValueIsZero(Inserted, ~CMask))
+ return SDValue();
+
+ SDLoc DL(N);
+
+ // `Inserted` needs to be right shifted before it is put into the
+ // instruction.
+ Inserted = DAG.getNode(ISD::SRA, DL, MVT::i32, Inserted,
+ DAG.getShiftAmountConstant(ShAmt, MVT::i32, DL));
+
+ SDValue Ops[] = {Base, Inserted, DAG.getConstant(Width, DL, MVT::i32),
+ DAG.getConstant(ShAmt, DL, MVT::i32)};
+ return DAG.getNode(RISCVISD::QC_INSB, DL, MVT::i32, Ops);
+}
+
static SDValue performORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI,
const RISCVSubtarget &Subtarget) {
SelectionDAG &DAG = DCI.DAG;
@@ -16108,8 +16252,8 @@ static SDValue performXORCombine(SDNode *N, SelectionDAG &DAG,
SDValue Op0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N0.getOperand(0));
SDValue Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N0.getOperand(1));
SDValue Shl = DAG.getNode(ISD::SHL, DL, MVT::i64, Op0, Op1);
- SDValue And = DAG.getNOT(DL, Shl, MVT::i64);
- return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, And);
+ SDValue Not = DAG.getNOT(DL, Shl, MVT::i64);
+ return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Not);
}
// fold (xor (sllw 1, x), -1) -> (rolw ~1, x)
@@ -16137,6 +16281,9 @@ static SDValue performXORCombine(SDNode *N, SelectionDAG &DAG,
}
}
+ if (SDValue V = combineXorToBitfieldInsert(N, DAG, Subtarget))
+ return V;
+
if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget))
return V;
if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget))
@@ -16590,10 +16737,6 @@ combineVectorSizedSetCCEquality(EVT VT, SDValue X, SDValue Y, ISD::CondCode CC,
DAG.getConstant(0, DL, XLenVT), CC);
}
-// Replace (seteq (i64 (and X, 0xffffffff)), C1) with
-// (seteq (i64 (sext_inreg (X, i32)), C1')) where C1' is C1 sign extended from
-// bit 31. Same for setne. C1' may be cheaper to materialize and the sext_inreg
-// can become a sext.w instead of a shift pair.
static SDValue performSETCCCombine(SDNode *N,
TargetLowering::DAGCombinerInfo &DCI,
const RISCVSubtarget &Subtarget) {
@@ -16613,20 +16756,44 @@ static SDValue performSETCCCombine(SDNode *N,
combineVectorSizedSetCCEquality(VT, N0, N1, Cond, dl, DAG, Subtarget))
return V;
- // (X & -4096) == 0 -> (X >> 12) == 0 if the AND constant can't use ANDI.
- if (DCI.isAfterLegalizeDAG() && isNullConstant(N1) &&
+ if (DCI.isAfterLegalizeDAG() && isa<ConstantSDNode>(N1) &&
N0.getOpcode() == ISD::AND && N0.hasOneUse() &&
isa<ConstantSDNode>(N0.getOperand(1))) {
- const APInt &AndRHSC =
- cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
- if (!isInt<12>(AndRHSC.getSExtValue()) && AndRHSC.isNegatedPowerOf2()) {
+ const APInt &AndRHSC = N0.getConstantOperandAPInt(1);
+ // (X & -(1 << C)) == 0 -> (X >> C) == 0 if the AND constant can't use ANDI.
+ if (isNullConstant(N1) && !isInt<12>(AndRHSC.getSExtValue()) &&
+ AndRHSC.isNegatedPowerOf2()) {
unsigned ShiftBits = AndRHSC.countr_zero();
- SDValue Shift = DAG.getNode(ISD::SRL, dl, VT, N0.getOperand(0),
- DAG.getConstant(ShiftBits, dl, VT));
+ SDValue Shift = DAG.getNode(ISD::SRL, dl, OpVT, N0.getOperand(0),
+ DAG.getConstant(ShiftBits, dl, OpVT));
return DAG.getSetCC(dl, VT, Shift, N1, Cond);
}
+
+ // Similar to above but handling the lower 32 bits by using sraiw. Allow
+ // comparing with constants other than 0 if the constant can be folded into
+ // addi or xori after shifting.
+ uint64_t N1Int = cast<ConstantSDNode>(N1)->getZExtValue();
+ uint64_t AndRHSInt = AndRHSC.getZExtValue();
+ if (OpVT == MVT::i64 && AndRHSInt <= 0xffffffff &&
+ isPowerOf2_32(-uint32_t(AndRHSInt)) && (N1Int & AndRHSInt) == N1Int) {
+ unsigned ShiftBits = llvm::countr_zero(AndRHSInt);
+ int64_t NewC = SignExtend64<32>(N1Int) >> ShiftBits;
+ if (NewC >= -2048 && NewC <= 2048) {
+ SDValue SExt =
+ DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, OpVT, N0.getOperand(0),
+ DAG.getValueType(MVT::i32));
+ SDValue Shift = DAG.getNode(ISD::SRA, dl, OpVT, SExt,
+ DAG.getConstant(ShiftBits, dl, OpVT));
+ return DAG.getSetCC(dl, VT, Shift,
+ DAG.getSignedConstant(NewC, dl, OpVT), Cond);
+ }
+ }
}
+ // Replace (seteq (i64 (and X, 0xffffffff)), C1) with
+ // (seteq (i64 (sext_inreg (X, i32)), C1')) where C1' is C1 sign extended from
+ // bit 31. Same for setne. C1' may be cheaper to materialize and the
+ // sext_inreg can become a sext.w instead of a shift pair.
if (OpVT != MVT::i64 || !Subtarget.is64Bit())
return SDValue();
@@ -18674,7 +18841,7 @@ static SDValue tryFoldSelectIntoOp(SDNode *N, SelectionDAG &DAG,
break;
}
- if (!TrueVal.hasOneUse() || isa<ConstantSDNode>(FalseVal))
+ if (!TrueVal.hasOneUse())
return SDValue();
unsigned OpToFold;
@@ -18746,6 +18913,10 @@ static SDValue foldSelectOfCTTZOrCTLZ(SDNode *N, SelectionDAG &DAG) {
if (Cond->getOperand(0) != CountZeroesArgument)
return SDValue();
+ unsigned BitWidth = CountZeroes.getValueSizeInBits();
+ if (!isPowerOf2_32(BitWidth))
+ return SDValue();
+
if (CountZeroes.getOpcode() == ISD::CTTZ_ZERO_UNDEF) {
CountZeroes = DAG.getNode(ISD::CTTZ, SDLoc(CountZeroes),
CountZeroes.getValueType(), CountZeroesArgument);
@@ -18754,7 +18925,6 @@ static SDValue foldSelectOfCTTZOrCTLZ(SDNode *N, SelectionDAG &DAG) {
CountZeroes.getValueType(), CountZeroesArgument);
}
- unsigned BitWidth = CountZeroes.getValueSizeInBits();
SDValue BitWidthMinusOne =
DAG.getConstant(BitWidth - 1, SDLoc(N), CountZeroes.getValueType());
@@ -18778,7 +18948,7 @@ static SDValue useInversedSetcc(SDNode *N, SelectionDAG &DAG,
// Replace (setcc eq (and x, C)) with (setcc ne (and x, C))) to generate
// BEXTI, where C is power of 2.
if (Subtarget.hasStdExtZbs() && VT.isScalarInteger() &&
- (Subtarget.hasStdExtZicond() || Subtarget.hasVendorXVentanaCondOps())) {
+ (Subtarget.hasCZEROLike() || Subtarget.hasVendorXTHeadCondMov())) {
SDValue LHS = Cond.getOperand(0);
SDValue RHS = Cond.getOperand(1);
ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
@@ -18953,6 +19123,7 @@ static SDValue foldReduceOperandViaVQDOT(SDValue InVec, const SDLoc &DL,
SelectionDAG &DAG,
const RISCVSubtarget &Subtarget,
const RISCVTargetLowering &TLI) {
+ using namespace SDPatternMatch;
// Note: We intentionally do not check the legality of the reduction type.
// We want to handle the m4/m8 *src* types, and thus need to let illegal
// intermediate types flow through here.
@@ -18960,11 +19131,10 @@ static SDValue foldReduceOperandViaVQDOT(SDValue InVec, const SDLoc &DL,
!InVec.getValueType().getVectorElementCount().isKnownMultipleOf(4))
return SDValue();
- // Recurse through adds (since generic dag canonicalizes to that
- // form). TODO: Handle disjoint or here.
- if (InVec->getOpcode() == ISD::ADD) {
- SDValue A = InVec.getOperand(0);
- SDValue B = InVec.getOperand(1);
+ // Recurse through adds/disjoint ors (since generic dag canonicalizes to that
+ // form).
+ SDValue A, B;
+ if (sd_match(InVec, m_AddLike(m_Value(A), m_Value(B)))) {
SDValue AOpt = foldReduceOperandViaVQDOT(A, DL, DAG, Subtarget, TLI);
SDValue BOpt = foldReduceOperandViaVQDOT(B, DL, DAG, Subtarget, TLI);
if (AOpt || BOpt) {
@@ -19001,12 +19171,9 @@ static SDValue foldReduceOperandViaVQDOT(SDValue InVec, const SDLoc &DL,
// mul (zext a, zext b) -> partial_reduce_umla 0, a, b
// mul (sext a, zext b) -> partial_reduce_ssmla 0, a, b
// mul (zext a, sext b) -> partial_reduce_smla 0, b, a (swapped)
- if (InVec.getOpcode() != ISD::MUL)
+ if (!sd_match(InVec, m_Mul(m_Value(A), m_Value(B))))
return SDValue();
- SDValue A = InVec.getOperand(0);
- SDValue B = InVec.getOperand(1);
-
if (!ISD::isExtOpcode(A.getOpcode()))
return SDValue();
@@ -20081,6 +20248,17 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
return V;
break;
case ISD::FMUL: {
+ using namespace SDPatternMatch;
+ SDLoc DL(N);
+ EVT VT = N->getValueType(0);
+ SDValue X, Y;
+ // InstCombine canonicalizes fneg (fmul x, y) -> fmul x, (fneg y), see
+ // hoistFNegAboveFMulFDiv.
+ // Undo this and sink the fneg so we match more fmsub/fnmadd patterns.
+ if (sd_match(N, m_FMul(m_Value(X), m_OneUse(m_FNeg(m_Value(Y))))))
+ return DAG.getNode(ISD::FNEG, DL, VT,
+ DAG.getNode(ISD::FMUL, DL, VT, X, Y));
+
// fmul X, (copysign 1.0, Y) -> fsgnjx X, Y
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
@@ -20091,13 +20269,12 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N0->getOperand(0));
if (!C || !C->getValueAPF().isExactlyValue(+1.0))
return SDValue();
- EVT VT = N->getValueType(0);
if (VT.isVector() || !isOperationLegal(ISD::FCOPYSIGN, VT))
return SDValue();
SDValue Sign = N0->getOperand(1);
if (Sign.getValueType() != VT)
return SDValue();
- return DAG.getNode(RISCVISD::FSGNJX, SDLoc(N), VT, N1, N0->getOperand(1));
+ return DAG.getNode(RISCVISD::FSGNJX, DL, VT, N1, N0->getOperand(1));
}
case ISD::FADD:
case ISD::UMAX:
@@ -20381,9 +20558,9 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
VT, DL, MGN->getChain(), BasePtr,
DAG.getSignedConstant(StepNumerator, DL, XLenVT), MGN->getMask(),
EVL, MGN->getMemOperand());
- SDValue VPSelect = DAG.getNode(ISD::VP_SELECT, DL, VT, MGN->getMask(),
- StridedLoad, MGN->getPassThru(), EVL);
- return DAG.getMergeValues({VPSelect, SDValue(StridedLoad.getNode(), 1)},
+ SDValue Select = DAG.getSelect(DL, VT, MGN->getMask(), StridedLoad,
+ MGN->getPassThru());
+ return DAG.getMergeValues({Select, SDValue(StridedLoad.getNode(), 1)},
DL);
}
}
@@ -21060,6 +21237,38 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
return N->getOperand(0);
break;
}
+ case RISCVISD::VSLIDE1UP_VL:
+ case RISCVISD::VFSLIDE1UP_VL: {
+ using namespace SDPatternMatch;
+ SDValue SrcVec;
+ SDLoc DL(N);
+ MVT VT = N->getSimpleValueType(0);
+ // If the scalar we're sliding in was extracted from the first element of a
+ // vector, we can use that vector as the passthru in a normal slideup of 1.
+ // This saves us an extract_element instruction (i.e. vfmv.f.s, vmv.x.s).
+ if (!N->getOperand(0).isUndef() ||
+ !sd_match(N->getOperand(2),
+ m_AnyOf(m_ExtractElt(m_Value(SrcVec), m_Zero()),
+ m_Node(RISCVISD::VMV_X_S, m_Value(SrcVec)))))
+ break;
+
+ MVT SrcVecVT = SrcVec.getSimpleValueType();
+ if (SrcVecVT.getVectorElementType() != VT.getVectorElementType())
+ break;
+ // Adapt the value type of source vector.
+ if (SrcVecVT.isFixedLengthVector()) {
+ SrcVecVT = getContainerForFixedLengthVector(SrcVecVT);
+ SrcVec = convertToScalableVector(SrcVecVT, SrcVec, DAG, Subtarget);
+ }
+ if (SrcVecVT.getVectorMinNumElements() < VT.getVectorMinNumElements())
+ SrcVec = DAG.getInsertSubvector(DL, DAG.getUNDEF(VT), SrcVec, 0);
+ else
+ SrcVec = DAG.getExtractSubvector(DL, VT, SrcVec, 0);
+
+ return getVSlideup(DAG, Subtarget, DL, VT, SrcVec, N->getOperand(1),
+ DAG.getConstant(1, DL, XLenVT), N->getOperand(3),
+ N->getOperand(4));
+ }
}
return SDValue();
@@ -21120,9 +21329,14 @@ bool RISCVTargetLowering::isDesirableToCommuteWithShift(
auto *C1 = dyn_cast<ConstantSDNode>(N0->getOperand(1));
auto *C2 = dyn_cast<ConstantSDNode>(N->getOperand(1));
- // Bail if we might break a sh{1,2,3}add pattern.
- if ((Subtarget.hasStdExtZba() || Subtarget.hasVendorXAndesPerf()) && C2 &&
- C2->getZExtValue() >= 1 && C2->getZExtValue() <= 3 && N->hasOneUse() &&
+ bool IsShXAdd =
+ (Subtarget.hasStdExtZba() || Subtarget.hasVendorXAndesPerf()) && C2 &&
+ C2->getZExtValue() >= 1 && C2->getZExtValue() <= 3;
+ bool IsQCShlAdd = Subtarget.hasVendorXqciac() && C2 &&
+ C2->getZExtValue() >= 4 && C2->getZExtValue() <= 31;
+
+ // Bail if we might break a sh{1,2,3}add/qc.shladd pattern.
+ if ((IsShXAdd || IsQCShlAdd) && N->hasOneUse() &&
N->user_begin()->getOpcode() == ISD::ADD &&
!isUsedByLdSt(*N->user_begin(), nullptr) &&
!isa<ConstantSDNode>(N->user_begin()->getOperand(1)))
@@ -21346,6 +21560,24 @@ void RISCVTargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
Known = Known.sext(BitWidth);
break;
}
+ case RISCVISD::SRLW: {
+ KnownBits Known2;
+ Known = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
+ Known2 = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
+ Known = KnownBits::lshr(Known.trunc(32), Known2.trunc(5).zext(32));
+ // Restore the original width by sign extending.
+ Known = Known.sext(BitWidth);
+ break;
+ }
+ case RISCVISD::SRAW: {
+ KnownBits Known2;
+ Known = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
+ Known2 = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
+ Known = KnownBits::ashr(Known.trunc(32), Known2.trunc(5).zext(32));
+ // Restore the original width by sign extending.
+ Known = Known.sext(BitWidth);
+ break;
+ }
case RISCVISD::CTZW: {
KnownBits Known2 = DAG.computeKnownBits(Op.getOperand(0), Depth + 1);
unsigned PossibleTZ = Known2.trunc(32).countMaxTrailingZeros();
@@ -21451,8 +21683,16 @@ unsigned RISCVTargetLowering::ComputeNumSignBitsForTargetNode(
if (Tmp < 33) return 1;
return 33;
}
+ case RISCVISD::SRAW: {
+ unsigned Tmp =
+ DAG.ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth + 1);
+ // sraw produces at least 33 sign bits. If the input already has more than
+ // 33 sign bits sraw, will preserve them.
+ // TODO: A more precise answer could be calculated depending on known bits
+ // in the shift amount.
+ return std::max(Tmp, 33U);
+ }
case RISCVISD::SLLW:
- case RISCVISD::SRAW:
case RISCVISD::SRLW:
case RISCVISD::DIVW:
case RISCVISD::DIVUW:
@@ -21463,9 +21703,7 @@ unsigned RISCVTargetLowering::ComputeNumSignBitsForTargetNode(
case RISCVISD::FCVT_WU_RV64:
case RISCVISD::STRICT_FCVT_W_RV64:
case RISCVISD::STRICT_FCVT_WU_RV64:
- // TODO: As the result is sign-extended, this is conservatively correct. A
- // more precise answer could be calculated for SRAW depending on known
- // bits in the shift amount.
+ // TODO: As the result is sign-extended, this is conservatively correct.
return 33;
case RISCVISD::VMV_X_S: {
// The number of sign bits of the scalar result is computed by obtaining the
@@ -21548,6 +21786,14 @@ bool RISCVTargetLowering::canCreateUndefOrPoisonForTargetNode(
// TODO: Add more target nodes.
switch (Op.getOpcode()) {
+ case RISCVISD::SLLW:
+ case RISCVISD::SRAW:
+ case RISCVISD::SRLW:
+ case RISCVISD::RORW:
+ case RISCVISD::ROLW:
+ // Only the lower 5 bits of RHS are read, guaranteeing the rotate/shift
+ // amount is bounds.
+ return false;
case RISCVISD::SELECT_CC:
// Integer comparisons cannot create poison.
assert(Op.getOperand(0).getValueType().isInteger() &&
@@ -24683,7 +24929,7 @@ RISCVTargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor,
bool RISCVTargetLowering::shouldFoldSelectWithSingleBitTest(
EVT VT, const APInt &AndMask) const {
- if (Subtarget.hasStdExtZicond() || Subtarget.hasVendorXVentanaCondOps())
+ if (Subtarget.hasCZEROLike())
return !Subtarget.hasStdExtZbs() && AndMask.ugt(1024);
return TargetLowering::shouldFoldSelectWithSingleBitTest(VT, AndMask);
}