summaryrefslogtreecommitdiff
path: root/llvm/lib/CodeGen/SelectionDAG
diff options
context:
space:
mode:
authorMingming Liu <mingmingl@google.com>2025-09-10 15:25:31 -0700
committerGitHub <noreply@github.com>2025-09-10 15:25:31 -0700
commit1417dafa1db9cb1b2b09438aa9f53ea5ab6e36e2 (patch)
tree57f4b1f313c8cf74eed8819870f39c36ea263c68 /llvm/lib/CodeGen/SelectionDAG
parent898b813bc8a6d0276bf0f4769f5f2f64b34e632d (diff)
parentb8cefcb601ddaa18482555c4ff363c01a270c2fe (diff)
Merge branch 'main' into users/mingmingl-llvm/samplefdo-profile-formatusers/mingmingl-llvm/samplefdo-profile-format
Diffstat (limited to 'llvm/lib/CodeGen/SelectionDAG')
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp113
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp2
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp11
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h5
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp51
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp66
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp2
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp60
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp102
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h4
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp4
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp10
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp93
13 files changed, 357 insertions, 166 deletions
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 27b5a0d37b67..d130efe96b56 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -4710,7 +4710,10 @@ template <class MatchContextClass> SDValue DAGCombiner::visitMUL(SDNode *N) {
if (SDValue LogBase2 = BuildLogBase2(N1, DL)) {
EVT ShiftVT = getShiftAmountTy(N0.getValueType());
SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ShiftVT);
- return Matcher.getNode(ISD::SHL, DL, VT, N0, Trunc);
+ SDNodeFlags Flags;
+ Flags.setNoUnsignedWrap(N->getFlags().hasNoUnsignedWrap());
+ // TODO: Preserve setNoSignedWrap if LogBase2 isn't BitWidth - 1.
+ return Matcher.getNode(ISD::SHL, DL, VT, N0, Trunc, Flags);
}
}
@@ -9998,13 +10001,16 @@ SDValue DAGCombiner::visitXOR(SDNode *N) {
}
}
- // fold (not (neg x)) -> (add X, -1)
- // FIXME: This can be generalized to (not (sub Y, X)) -> (add X, ~Y) if
- // Y is a constant or the subtract has a single use.
- if (isAllOnesConstant(N1) && N0.getOpcode() == ISD::SUB &&
- isNullConstant(N0.getOperand(0))) {
- return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(1),
- DAG.getAllOnesConstant(DL, VT));
+ // fold (not (sub Y, X)) -> (add X, ~Y) if Y is a constant
+ if (N0.getOpcode() == ISD::SUB && isAllOnesConstant(N1)) {
+ SDValue Y = N0.getOperand(0);
+ SDValue X = N0.getOperand(1);
+
+ if (auto *YConst = dyn_cast<ConstantSDNode>(Y)) {
+ APInt NotYValue = ~YConst->getAPIntValue();
+ SDValue NotY = DAG.getConstant(NotYValue, DL, VT);
+ return DAG.getNode(ISD::ADD, DL, VT, X, NotY, N->getFlags());
+ }
}
// fold (not (add X, -1)) -> (neg X)
@@ -11089,38 +11095,43 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
}
}
- // fold (srl (shl x, c1), c2) -> (and (shl x, (sub c1, c2), MASK) or
- // (and (srl x, (sub c2, c1), MASK)
- if (N0.getOpcode() == ISD::SHL &&
- (N0.getOperand(1) == N1 || N0->hasOneUse()) &&
- TLI.shouldFoldConstantShiftPairToMask(N, Level)) {
- auto MatchShiftAmount = [OpSizeInBits](ConstantSDNode *LHS,
- ConstantSDNode *RHS) {
- const APInt &LHSC = LHS->getAPIntValue();
- const APInt &RHSC = RHS->getAPIntValue();
- return LHSC.ult(OpSizeInBits) && RHSC.ult(OpSizeInBits) &&
- LHSC.getZExtValue() <= RHSC.getZExtValue();
- };
- if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchShiftAmount,
- /*AllowUndefs*/ false,
- /*AllowTypeMismatch*/ true)) {
- SDValue N01 = DAG.getZExtOrTrunc(N0.getOperand(1), DL, ShiftVT);
- SDValue Diff = DAG.getNode(ISD::SUB, DL, ShiftVT, N01, N1);
- SDValue Mask = DAG.getAllOnesConstant(DL, VT);
- Mask = DAG.getNode(ISD::SRL, DL, VT, Mask, N01);
- Mask = DAG.getNode(ISD::SHL, DL, VT, Mask, Diff);
- SDValue Shift = DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0), Diff);
- return DAG.getNode(ISD::AND, DL, VT, Shift, Mask);
- }
- if (ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchShiftAmount,
- /*AllowUndefs*/ false,
- /*AllowTypeMismatch*/ true)) {
- SDValue N01 = DAG.getZExtOrTrunc(N0.getOperand(1), DL, ShiftVT);
- SDValue Diff = DAG.getNode(ISD::SUB, DL, ShiftVT, N1, N01);
- SDValue Mask = DAG.getAllOnesConstant(DL, VT);
- Mask = DAG.getNode(ISD::SRL, DL, VT, Mask, N1);
- SDValue Shift = DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0), Diff);
- return DAG.getNode(ISD::AND, DL, VT, Shift, Mask);
+ if (N0.getOpcode() == ISD::SHL) {
+ // fold (srl (shl nuw x, c), c) -> x
+ if (N0.getOperand(1) == N1 && N0->getFlags().hasNoUnsignedWrap())
+ return N0.getOperand(0);
+
+ // fold (srl (shl x, c1), c2) -> (and (shl x, (sub c1, c2), MASK) or
+ // (and (srl x, (sub c2, c1), MASK)
+ if ((N0.getOperand(1) == N1 || N0->hasOneUse()) &&
+ TLI.shouldFoldConstantShiftPairToMask(N, Level)) {
+ auto MatchShiftAmount = [OpSizeInBits](ConstantSDNode *LHS,
+ ConstantSDNode *RHS) {
+ const APInt &LHSC = LHS->getAPIntValue();
+ const APInt &RHSC = RHS->getAPIntValue();
+ return LHSC.ult(OpSizeInBits) && RHSC.ult(OpSizeInBits) &&
+ LHSC.getZExtValue() <= RHSC.getZExtValue();
+ };
+ if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchShiftAmount,
+ /*AllowUndefs*/ false,
+ /*AllowTypeMismatch*/ true)) {
+ SDValue N01 = DAG.getZExtOrTrunc(N0.getOperand(1), DL, ShiftVT);
+ SDValue Diff = DAG.getNode(ISD::SUB, DL, ShiftVT, N01, N1);
+ SDValue Mask = DAG.getAllOnesConstant(DL, VT);
+ Mask = DAG.getNode(ISD::SRL, DL, VT, Mask, N01);
+ Mask = DAG.getNode(ISD::SHL, DL, VT, Mask, Diff);
+ SDValue Shift = DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0), Diff);
+ return DAG.getNode(ISD::AND, DL, VT, Shift, Mask);
+ }
+ if (ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchShiftAmount,
+ /*AllowUndefs*/ false,
+ /*AllowTypeMismatch*/ true)) {
+ SDValue N01 = DAG.getZExtOrTrunc(N0.getOperand(1), DL, ShiftVT);
+ SDValue Diff = DAG.getNode(ISD::SUB, DL, ShiftVT, N1, N01);
+ SDValue Mask = DAG.getAllOnesConstant(DL, VT);
+ Mask = DAG.getNode(ISD::SRL, DL, VT, Mask, N1);
+ SDValue Shift = DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0), Diff);
+ return DAG.getNode(ISD::AND, DL, VT, Shift, Mask);
+ }
}
}
@@ -15137,7 +15148,7 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
return foldedExt;
} else if (ISD::isNON_EXTLoad(N0.getNode()) &&
ISD::isUNINDEXEDLoad(N0.getNode()) &&
- TLI.isLoadExtLegal(ISD::EXTLOAD, VT, N0.getValueType())) {
+ TLI.isLoadExtLegalOrCustom(ISD::EXTLOAD, VT, N0.getValueType())) {
bool DoXform = true;
SmallVector<SDNode *, 4> SetCCs;
if (!N0.hasOneUse())
@@ -16309,7 +16320,15 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
if (VT.isScalarInteger() || TLI.isOperationLegal(N0.getOpcode(), VT)) {
SDValue NarrowL = DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(0));
SDValue NarrowR = DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(1));
- return DAG.getNode(N0.getOpcode(), DL, VT, NarrowL, NarrowR);
+ SDNodeFlags Flags;
+ // Propagate nuw for sub.
+ if (N0->getOpcode() == ISD::SUB && N0->getFlags().hasNoUnsignedWrap() &&
+ DAG.MaskedValueIsZero(
+ N0->getOperand(0),
+ APInt::getBitsSetFrom(SrcVT.getScalarSizeInBits(),
+ VT.getScalarSizeInBits())))
+ Flags.setNoUnsignedWrap(true);
+ return DAG.getNode(N0.getOpcode(), DL, VT, NarrowL, NarrowR, Flags);
}
}
break;
@@ -16788,6 +16807,8 @@ SDValue DAGCombiner::visitFREEZE(SDNode *N) {
// If we have frozen and unfrozen users of N0, update so everything uses N.
if (!N0.isUndef() && !N0.hasOneUse()) {
SDValue FrozenN0(N, 0);
+ // Unfreeze all uses of N to avoid double deleting N from the CSE map.
+ DAG.ReplaceAllUsesOfValueWith(FrozenN0, N0);
DAG.ReplaceAllUsesOfValueWith(N0, FrozenN0);
// ReplaceAllUsesOfValueWith will have also updated the use in N, thus
// creating a cycle in a DAG. Let's undo that by mutating the freeze.
@@ -19346,13 +19367,13 @@ SDValue DAGCombiner::visitBRCOND(SDNode *N) {
// MachineBasicBlock CFG, which is awkward.
// fold a brcond with a setcc condition into a BR_CC node if BR_CC is legal
- // on the target.
+ // on the target, also copy fast math flags.
if (N1.getOpcode() == ISD::SETCC &&
TLI.isOperationLegalOrCustom(ISD::BR_CC,
N1.getOperand(0).getValueType())) {
- return DAG.getNode(ISD::BR_CC, SDLoc(N), MVT::Other,
- Chain, N1.getOperand(2),
- N1.getOperand(0), N1.getOperand(1), N2);
+ return DAG.getNode(ISD::BR_CC, SDLoc(N), MVT::Other, Chain,
+ N1.getOperand(2), N1.getOperand(0), N1.getOperand(1), N2,
+ N1->getFlags());
}
if (N1.hasOneUse()) {
diff --git a/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
index 1a63518ab37a..861f76e93f2c 100644
--- a/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
@@ -238,7 +238,7 @@ void InstrEmitter::CreateVirtualRegisters(SDNode *Node,
// Create the result registers for this node and add the result regs to
// the machine instruction.
- if (VRBase == 0) {
+ if (!VRBase) {
assert(RC && "Isn't a register operand!");
VRBase = MRI->createVirtualRegister(RC);
MIB.addReg(VRBase, RegState::Define);
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
index 90d62e6da8e9..9e85f08abb76 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
@@ -324,6 +324,11 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) {
Res = PromoteIntRes_VP_REDUCE(N);
break;
+ case ISD::LOOP_DEPENDENCE_WAR_MASK:
+ case ISD::LOOP_DEPENDENCE_RAW_MASK:
+ Res = PromoteIntRes_LOOP_DEPENDENCE_MASK(N);
+ break;
+
case ISD::FREEZE:
Res = PromoteIntRes_FREEZE(N);
break;
@@ -374,6 +379,12 @@ SDValue DAGTypeLegalizer::PromoteIntRes_MERGE_VALUES(SDNode *N,
return GetPromotedInteger(Op);
}
+SDValue DAGTypeLegalizer::PromoteIntRes_LOOP_DEPENDENCE_MASK(SDNode *N) {
+ EVT VT = N->getValueType(0);
+ EVT NewVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
+ return DAG.getNode(N->getOpcode(), SDLoc(N), NewVT, N->ops());
+}
+
SDValue DAGTypeLegalizer::PromoteIntRes_AssertSext(SDNode *N) {
// Sign-extend the new bits, and continue the assertion.
SDValue Op = SExtPromotedInteger(N->getOperand(0));
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
index 65fd863e55ac..586c3411791f 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
@@ -382,6 +382,7 @@ private:
SDValue PromoteIntRes_VECTOR_FIND_LAST_ACTIVE(SDNode *N);
SDValue PromoteIntRes_GET_ACTIVE_LANE_MASK(SDNode *N);
SDValue PromoteIntRes_PARTIAL_REDUCE_MLA(SDNode *N);
+ SDValue PromoteIntRes_LOOP_DEPENDENCE_MASK(SDNode *N);
// Integer Operand Promotion.
bool PromoteIntegerOperand(SDNode *N, unsigned OpNo);
@@ -436,6 +437,7 @@ private:
SDValue PromoteIntOp_VECTOR_FIND_LAST_ACTIVE(SDNode *N, unsigned OpNo);
SDValue PromoteIntOp_GET_ACTIVE_LANE_MASK(SDNode *N);
SDValue PromoteIntOp_PARTIAL_REDUCE_MLA(SDNode *N);
+ SDValue PromoteIntOp_LOOP_DEPENDENCE_MASK(SDNode *N, unsigned OpNo);
void SExtOrZExtPromotedOperands(SDValue &LHS, SDValue &RHS);
void PromoteSetCCOperands(SDValue &LHS,SDValue &RHS, ISD::CondCode Code);
@@ -868,6 +870,7 @@ private:
// Vector Result Scalarization: <1 x ty> -> ty.
void ScalarizeVectorResult(SDNode *N, unsigned ResNo);
SDValue ScalarizeVecRes_MERGE_VALUES(SDNode *N, unsigned ResNo);
+ SDValue ScalarizeVecRes_LOOP_DEPENDENCE_MASK(SDNode *N);
SDValue ScalarizeVecRes_BinOp(SDNode *N);
SDValue ScalarizeVecRes_CMP(SDNode *N);
SDValue ScalarizeVecRes_TernaryOp(SDNode *N);
@@ -964,6 +967,7 @@ private:
void SplitVecRes_FIX(SDNode *N, SDValue &Lo, SDValue &Hi);
void SplitVecRes_BITCAST(SDNode *N, SDValue &Lo, SDValue &Hi);
+ void SplitVecRes_LOOP_DEPENDENCE_MASK(SDNode *N, SDValue &Lo, SDValue &Hi);
void SplitVecRes_BUILD_VECTOR(SDNode *N, SDValue &Lo, SDValue &Hi);
void SplitVecRes_CONCAT_VECTORS(SDNode *N, SDValue &Lo, SDValue &Hi);
void SplitVecRes_EXTRACT_SUBVECTOR(SDNode *N, SDValue &Lo, SDValue &Hi);
@@ -1070,6 +1074,7 @@ private:
SDValue WidenVecRes_ADDRSPACECAST(SDNode *N);
SDValue WidenVecRes_AssertZext(SDNode* N);
SDValue WidenVecRes_BITCAST(SDNode* N);
+ SDValue WidenVecRes_LOOP_DEPENDENCE_MASK(SDNode *N);
SDValue WidenVecRes_BUILD_VECTOR(SDNode* N);
SDValue WidenVecRes_CONCAT_VECTORS(SDNode* N);
SDValue WidenVecRes_EXTEND_VECTOR_INREG(SDNode* N);
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
index 2ca98958fde0..8e423c4f83b3 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
@@ -138,6 +138,7 @@ class VectorLegalizer {
SDValue ExpandVP_FNEG(SDNode *Node);
SDValue ExpandVP_FABS(SDNode *Node);
SDValue ExpandVP_FCOPYSIGN(SDNode *Node);
+ SDValue ExpandLOOP_DEPENDENCE_MASK(SDNode *N);
SDValue ExpandSELECT(SDNode *Node);
std::pair<SDValue, SDValue> ExpandLoad(SDNode *N);
SDValue ExpandStore(SDNode *N);
@@ -475,6 +476,8 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
case ISD::VECTOR_COMPRESS:
case ISD::SCMP:
case ISD::UCMP:
+ case ISD::LOOP_DEPENDENCE_WAR_MASK:
+ case ISD::LOOP_DEPENDENCE_RAW_MASK:
Action = TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0));
break;
case ISD::SMULFIX:
@@ -1291,6 +1294,10 @@ void VectorLegalizer::Expand(SDNode *Node, SmallVectorImpl<SDValue> &Results) {
case ISD::UCMP:
Results.push_back(TLI.expandCMP(Node, DAG));
return;
+ case ISD::LOOP_DEPENDENCE_WAR_MASK:
+ case ISD::LOOP_DEPENDENCE_RAW_MASK:
+ Results.push_back(ExpandLOOP_DEPENDENCE_MASK(Node));
+ return;
case ISD::FADD:
case ISD::FMUL:
@@ -1796,6 +1803,50 @@ SDValue VectorLegalizer::ExpandVP_FCOPYSIGN(SDNode *Node) {
return DAG.getNode(ISD::BITCAST, DL, VT, CopiedSign);
}
+SDValue VectorLegalizer::ExpandLOOP_DEPENDENCE_MASK(SDNode *N) {
+ SDLoc DL(N);
+ SDValue SourceValue = N->getOperand(0);
+ SDValue SinkValue = N->getOperand(1);
+ SDValue EltSize = N->getOperand(2);
+
+ bool IsReadAfterWrite = N->getOpcode() == ISD::LOOP_DEPENDENCE_RAW_MASK;
+ EVT VT = N->getValueType(0);
+ EVT PtrVT = SourceValue->getValueType(0);
+
+ SDValue Diff = DAG.getNode(ISD::SUB, DL, PtrVT, SinkValue, SourceValue);
+ if (IsReadAfterWrite)
+ Diff = DAG.getNode(ISD::ABS, DL, PtrVT, Diff);
+
+ Diff = DAG.getNode(ISD::SDIV, DL, PtrVT, Diff, EltSize);
+
+ // If the difference is positive then some elements may alias
+ EVT CmpVT = TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(),
+ Diff.getValueType());
+ SDValue Zero = DAG.getTargetConstant(0, DL, PtrVT);
+ SDValue Cmp = DAG.getSetCC(DL, CmpVT, Diff, Zero,
+ IsReadAfterWrite ? ISD::SETEQ : ISD::SETLE);
+
+ // Create the lane mask
+ EVT SplatVT = VT.changeElementType(PtrVT);
+ SDValue DiffSplat = DAG.getSplat(SplatVT, DL, Diff);
+ SDValue VectorStep = DAG.getStepVector(DL, SplatVT);
+ EVT MaskVT = VT.changeElementType(MVT::i1);
+ SDValue DiffMask =
+ DAG.getSetCC(DL, MaskVT, VectorStep, DiffSplat, ISD::CondCode::SETULT);
+
+ EVT EltVT = VT.getVectorElementType();
+ // Extend the diff setcc in case the intrinsic has been promoted to a vector
+ // type with elements larger than i1
+ if (EltVT.getScalarSizeInBits() > MaskVT.getScalarSizeInBits())
+ DiffMask = DAG.getNode(ISD::ANY_EXTEND, DL, VT, DiffMask);
+
+ // Splat the compare result then OR it with the lane mask
+ if (CmpVT.getScalarSizeInBits() < EltVT.getScalarSizeInBits())
+ Cmp = DAG.getNode(ISD::ZERO_EXTEND, DL, EltVT, Cmp);
+ SDValue Splat = DAG.getSplat(VT, DL, Cmp);
+ return DAG.getNode(ISD::OR, DL, VT, DiffMask, Splat);
+}
+
void VectorLegalizer::ExpandFP_TO_UINT(SDNode *Node,
SmallVectorImpl<SDValue> &Results) {
// Attempt to expand using TargetLowering.
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index 10e3a5149a5d..118fd8418f78 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -53,6 +53,10 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) {
report_fatal_error("Do not know how to scalarize the result of this "
"operator!\n");
+ case ISD::LOOP_DEPENDENCE_WAR_MASK:
+ case ISD::LOOP_DEPENDENCE_RAW_MASK:
+ R = ScalarizeVecRes_LOOP_DEPENDENCE_MASK(N);
+ break;
case ISD::MERGE_VALUES: R = ScalarizeVecRes_MERGE_VALUES(N, ResNo);break;
case ISD::BITCAST: R = ScalarizeVecRes_BITCAST(N); break;
case ISD::BUILD_VECTOR: R = ScalarizeVecRes_BUILD_VECTOR(N); break;
@@ -396,6 +400,22 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_MERGE_VALUES(SDNode *N,
return GetScalarizedVector(Op);
}
+SDValue DAGTypeLegalizer::ScalarizeVecRes_LOOP_DEPENDENCE_MASK(SDNode *N) {
+ SDValue SourceValue = N->getOperand(0);
+ SDValue SinkValue = N->getOperand(1);
+ SDValue EltSize = N->getOperand(2);
+ EVT PtrVT = SourceValue->getValueType(0);
+ SDLoc DL(N);
+
+ SDValue Diff = DAG.getNode(ISD::SUB, DL, PtrVT, SinkValue, SourceValue);
+ EVT CmpVT = TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(),
+ Diff.getValueType());
+ SDValue Zero = DAG.getTargetConstant(0, DL, PtrVT);
+ return DAG.getNode(ISD::OR, DL, CmpVT,
+ DAG.getSetCC(DL, CmpVT, Diff, EltSize, ISD::SETGE),
+ DAG.getSetCC(DL, CmpVT, Diff, Zero, ISD::SETEQ));
+}
+
SDValue DAGTypeLegalizer::ScalarizeVecRes_BITCAST(SDNode *N) {
SDValue Op = N->getOperand(0);
if (getTypeAction(Op.getValueType()) == TargetLowering::TypeScalarizeVector)
@@ -1159,6 +1179,10 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
report_fatal_error("Do not know how to split the result of this "
"operator!\n");
+ case ISD::LOOP_DEPENDENCE_RAW_MASK:
+ case ISD::LOOP_DEPENDENCE_WAR_MASK:
+ SplitVecRes_LOOP_DEPENDENCE_MASK(N, Lo, Hi);
+ break;
case ISD::MERGE_VALUES: SplitRes_MERGE_VALUES(N, ResNo, Lo, Hi); break;
case ISD::AssertZext: SplitVecRes_AssertZext(N, Lo, Hi); break;
case ISD::VSELECT:
@@ -1652,6 +1676,25 @@ void DAGTypeLegalizer::SplitVecRes_BITCAST(SDNode *N, SDValue &Lo,
Hi = DAG.getNode(ISD::BITCAST, dl, HiVT, Hi);
}
+void DAGTypeLegalizer::SplitVecRes_LOOP_DEPENDENCE_MASK(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ SDLoc DL(N);
+ EVT LoVT, HiVT;
+ std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0));
+ SDValue PtrA = N->getOperand(0);
+ SDValue PtrB = N->getOperand(1);
+ Lo = DAG.getNode(N->getOpcode(), DL, LoVT, PtrA, PtrB, N->getOperand(2));
+
+ unsigned EltSize = N->getConstantOperandVal(2);
+ unsigned Offset = EltSize * HiVT.getVectorMinNumElements();
+ SDValue Addend = HiVT.isScalableVT()
+ ? DAG.getVScale(DL, MVT::i64, APInt(64, Offset))
+ : DAG.getConstant(Offset, DL, MVT::i64);
+
+ PtrA = DAG.getNode(ISD::ADD, DL, MVT::i64, PtrA, Addend);
+ Hi = DAG.getNode(N->getOpcode(), DL, HiVT, PtrA, PtrB, N->getOperand(2));
+}
+
void DAGTypeLegalizer::SplitVecRes_BUILD_VECTOR(SDNode *N, SDValue &Lo,
SDValue &Hi) {
EVT LoVT, HiVT;
@@ -2517,10 +2560,10 @@ void DAGTypeLegalizer::SplitVecRes_Gather(MemSDNode *N, SDValue &Lo,
else
std::tie(IndexLo, IndexHi) = DAG.SplitVector(Ops.Index, dl);
+ MachineMemOperand::Flags MMOFlags = N->getMemOperand()->getFlags();
MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
- N->getPointerInfo(), MachineMemOperand::MOLoad,
- LocationSize::beforeOrAfterPointer(), Alignment, N->getAAInfo(),
- N->getRanges());
+ N->getPointerInfo(), MMOFlags, LocationSize::beforeOrAfterPointer(),
+ Alignment, N->getAAInfo(), N->getRanges());
if (auto *MGT = dyn_cast<MaskedGatherSDNode>(N)) {
SDValue PassThru = MGT->getPassThru();
@@ -4321,10 +4364,10 @@ SDValue DAGTypeLegalizer::SplitVecOp_Scatter(MemSDNode *N, unsigned OpNo) {
std::tie(IndexLo, IndexHi) = DAG.SplitVector(Ops.Index, DL);
SDValue Lo;
+ MachineMemOperand::Flags MMOFlags = N->getMemOperand()->getFlags();
MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
- N->getPointerInfo(), MachineMemOperand::MOStore,
- LocationSize::beforeOrAfterPointer(), Alignment, N->getAAInfo(),
- N->getRanges());
+ N->getPointerInfo(), MMOFlags, LocationSize::beforeOrAfterPointer(),
+ Alignment, N->getAAInfo(), N->getRanges());
if (auto *MSC = dyn_cast<MaskedScatterSDNode>(N)) {
SDValue OpsLo[] = {Ch, DataLo, MaskLo, Ptr, IndexLo, Ops.Scale};
@@ -4784,6 +4827,10 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
#endif
report_fatal_error("Do not know how to widen the result of this operator!");
+ case ISD::LOOP_DEPENDENCE_RAW_MASK:
+ case ISD::LOOP_DEPENDENCE_WAR_MASK:
+ Res = WidenVecRes_LOOP_DEPENDENCE_MASK(N);
+ break;
case ISD::MERGE_VALUES: Res = WidenVecRes_MERGE_VALUES(N, ResNo); break;
case ISD::ADDRSPACECAST:
Res = WidenVecRes_ADDRSPACECAST(N);
@@ -5986,6 +6033,13 @@ SDValue DAGTypeLegalizer::WidenVecRes_BITCAST(SDNode *N) {
return CreateStackStoreLoad(InOp, WidenVT);
}
+SDValue DAGTypeLegalizer::WidenVecRes_LOOP_DEPENDENCE_MASK(SDNode *N) {
+ return DAG.getNode(
+ N->getOpcode(), SDLoc(N),
+ TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)),
+ N->getOperand(0), N->getOperand(1), N->getOperand(2));
+}
+
SDValue DAGTypeLegalizer::WidenVecRes_BUILD_VECTOR(SDNode *N) {
SDLoc dl(N);
// Build a vector with undefined for the new nodes.
diff --git a/llvm/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp b/llvm/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp
index 0a449fd011e6..72ea0898f975 100644
--- a/llvm/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp
@@ -63,6 +63,8 @@ ResourcePriorityQueue::ResourcePriorityQueue(SelectionDAGISel *IS)
HorizontalVerticalBalance = 0;
}
+ResourcePriorityQueue::~ResourcePriorityQueue() = default;
+
unsigned
ResourcePriorityQueue::numberRCValPredInSU(SUnit *SU, unsigned RCId) {
unsigned NumberDeps = 0;
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 3672a91e33a3..bcf25958d098 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -3299,7 +3299,7 @@ SelectionDAG::getValidShiftAmountRange(SDValue V, const APInt &DemandedElts,
return std::nullopt;
}
-std::optional<uint64_t>
+std::optional<unsigned>
SelectionDAG::getValidShiftAmount(SDValue V, const APInt &DemandedElts,
unsigned Depth) const {
assert((V.getOpcode() == ISD::SHL || V.getOpcode() == ISD::SRL ||
@@ -3312,7 +3312,7 @@ SelectionDAG::getValidShiftAmount(SDValue V, const APInt &DemandedElts,
return std::nullopt;
}
-std::optional<uint64_t>
+std::optional<unsigned>
SelectionDAG::getValidShiftAmount(SDValue V, unsigned Depth) const {
EVT VT = V.getValueType();
APInt DemandedElts = VT.isFixedLengthVector()
@@ -3321,7 +3321,7 @@ SelectionDAG::getValidShiftAmount(SDValue V, unsigned Depth) const {
return getValidShiftAmount(V, DemandedElts, Depth);
}
-std::optional<uint64_t>
+std::optional<unsigned>
SelectionDAG::getValidMinimumShiftAmount(SDValue V, const APInt &DemandedElts,
unsigned Depth) const {
assert((V.getOpcode() == ISD::SHL || V.getOpcode() == ISD::SRL ||
@@ -3333,7 +3333,7 @@ SelectionDAG::getValidMinimumShiftAmount(SDValue V, const APInt &DemandedElts,
return std::nullopt;
}
-std::optional<uint64_t>
+std::optional<unsigned>
SelectionDAG::getValidMinimumShiftAmount(SDValue V, unsigned Depth) const {
EVT VT = V.getValueType();
APInt DemandedElts = VT.isFixedLengthVector()
@@ -3342,7 +3342,7 @@ SelectionDAG::getValidMinimumShiftAmount(SDValue V, unsigned Depth) const {
return getValidMinimumShiftAmount(V, DemandedElts, Depth);
}
-std::optional<uint64_t>
+std::optional<unsigned>
SelectionDAG::getValidMaximumShiftAmount(SDValue V, const APInt &DemandedElts,
unsigned Depth) const {
assert((V.getOpcode() == ISD::SHL || V.getOpcode() == ISD::SRL ||
@@ -3354,7 +3354,7 @@ SelectionDAG::getValidMaximumShiftAmount(SDValue V, const APInt &DemandedElts,
return std::nullopt;
}
-std::optional<uint64_t>
+std::optional<unsigned>
SelectionDAG::getValidMaximumShiftAmount(SDValue V, unsigned Depth) const {
EVT VT = V.getValueType();
APInt DemandedElts = VT.isFixedLengthVector()
@@ -3828,7 +3828,7 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
Known = KnownBits::shl(Known, Known2, NUW, NSW, ShAmtNonZero);
// Minimum shift low bits are known zero.
- if (std::optional<uint64_t> ShMinAmt =
+ if (std::optional<unsigned> ShMinAmt =
getValidMinimumShiftAmount(Op, DemandedElts, Depth + 1))
Known.Zero.setLowBits(*ShMinAmt);
break;
@@ -3840,7 +3840,7 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
Op->getFlags().hasExact());
// Minimum shift high bits are known zero.
- if (std::optional<uint64_t> ShMinAmt =
+ if (std::optional<unsigned> ShMinAmt =
getValidMinimumShiftAmount(Op, DemandedElts, Depth + 1))
Known.Zero.setHighBits(*ShMinAmt);
break;
@@ -3850,6 +3850,22 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
Known = KnownBits::ashr(Known, Known2, /*ShAmtNonZero=*/false,
Op->getFlags().hasExact());
break;
+ case ISD::ROTL:
+ case ISD::ROTR:
+ if (ConstantSDNode *C =
+ isConstOrConstSplat(Op.getOperand(1), DemandedElts)) {
+ unsigned Amt = C->getAPIntValue().urem(BitWidth);
+
+ Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
+
+ // Canonicalize to ROTR.
+ if (Opcode == ISD::ROTL && Amt != 0)
+ Amt = BitWidth - Amt;
+
+ Known.Zero = Known.Zero.rotr(Amt);
+ Known.One = Known.One.rotr(Amt);
+ }
+ break;
case ISD::FSHL:
case ISD::FSHR:
if (ConstantSDNode *C = isConstOrConstSplat(Op.getOperand(2), DemandedElts)) {
@@ -3868,15 +3884,11 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
Known2 = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
if (Opcode == ISD::FSHL) {
- Known.One <<= Amt;
- Known.Zero <<= Amt;
- Known2.One.lshrInPlace(BitWidth - Amt);
- Known2.Zero.lshrInPlace(BitWidth - Amt);
+ Known <<= Amt;
+ Known2 >>= BitWidth - Amt;
} else {
- Known.One <<= BitWidth - Amt;
- Known.Zero <<= BitWidth - Amt;
- Known2.One.lshrInPlace(Amt);
- Known2.Zero.lshrInPlace(Amt);
+ Known <<= BitWidth - Amt;
+ Known2 >>= Amt;
}
Known = Known.unionWith(Known2);
}
@@ -4875,15 +4887,15 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts,
case ISD::SRA:
Tmp = ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth + 1);
// SRA X, C -> adds C sign bits.
- if (std::optional<uint64_t> ShAmt =
+ if (std::optional<unsigned> ShAmt =
getValidMinimumShiftAmount(Op, DemandedElts, Depth + 1))
- Tmp = std::min<uint64_t>(Tmp + *ShAmt, VTBits);
+ Tmp = std::min(Tmp + *ShAmt, VTBits);
return Tmp;
case ISD::SHL:
if (std::optional<ConstantRange> ShAmtRange =
getValidShiftAmountRange(Op, DemandedElts, Depth + 1)) {
- uint64_t MaxShAmt = ShAmtRange->getUnsignedMax().getZExtValue();
- uint64_t MinShAmt = ShAmtRange->getUnsignedMin().getZExtValue();
+ unsigned MaxShAmt = ShAmtRange->getUnsignedMax().getZExtValue();
+ unsigned MinShAmt = ShAmtRange->getUnsignedMin().getZExtValue();
// Try to look through ZERO/SIGN/ANY_EXTEND. If all extended bits are
// shifted out, then we can compute the number of sign bits for the
// operand being extended. A future improvement could be to pass along the
@@ -4894,7 +4906,7 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts,
EVT ExtVT = Ext.getValueType();
SDValue Extendee = Ext.getOperand(0);
EVT ExtendeeVT = Extendee.getValueType();
- uint64_t SizeDifference =
+ unsigned SizeDifference =
ExtVT.getScalarSizeInBits() - ExtendeeVT.getScalarSizeInBits();
if (SizeDifference <= MinShAmt) {
Tmp = SizeDifference +
@@ -5127,7 +5139,7 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts,
// If the sign portion ends in our element the subtraction gives correct
// result. Otherwise it gives either negative or > bitwidth result
- return std::clamp(KnownSign - rIndex * BitWidth, 0, BitWidth);
+ return std::clamp(KnownSign - rIndex * BitWidth, 1, BitWidth);
}
case ISD::INSERT_VECTOR_ELT: {
if (VT.isScalableVector())
@@ -5660,6 +5672,10 @@ bool SelectionDAG::canCreateUndefOrPoison(SDValue Op, const APInt &DemandedElts,
case ISD::USUBSAT:
case ISD::MULHU:
case ISD::MULHS:
+ case ISD::AVGFLOORS:
+ case ISD::AVGFLOORU:
+ case ISD::AVGCEILS:
+ case ISD::AVGCEILU:
case ISD::ABDU:
case ISD::ABDS:
case ISD::SMIN:
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 62ba801f6992..430e47451fd4 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -7974,12 +7974,19 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
}
case Intrinsic::amdgcn_call_whole_wave: {
TargetLowering::ArgListTy Args;
+ bool isTailCall = I.isTailCall();
// The first argument is the callee. Skip it when assembling the call args.
for (unsigned Idx = 1; Idx < I.arg_size(); ++Idx) {
TargetLowering::ArgListEntry Arg(getValue(I.getArgOperand(Idx)),
I.getArgOperand(Idx)->getType());
Arg.setAttributes(&I, Idx);
+
+ // If we have an explicit sret argument that is an Instruction, (i.e., it
+ // might point to function-local memory), we can't meaningfully tail-call.
+ if (Arg.IsSRet && isa<Instruction>(I.getArgOperand(Idx)))
+ isTailCall = false;
+
Args.push_back(Arg);
}
@@ -7994,7 +8001,7 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
.setChain(getRoot())
.setCallee(CallingConv::AMDGPU_Gfx_WholeWave, I.getType(),
getValue(I.getArgOperand(0)), std::move(Args))
- .setTailCall(false)
+ .setTailCall(isTailCall && canTailCall(I))
.setIsPreallocated(
I.countOperandBundlesOfType(LLVMContext::OB_preallocated) != 0)
.setConvergent(I.isConvergent())
@@ -8295,6 +8302,18 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
visitVectorExtractLastActive(I, Intrinsic);
return;
}
+ case Intrinsic::loop_dependence_war_mask:
+ setValue(&I,
+ DAG.getNode(ISD::LOOP_DEPENDENCE_WAR_MASK, sdl,
+ EVT::getEVT(I.getType()), getValue(I.getOperand(0)),
+ getValue(I.getOperand(1)), getValue(I.getOperand(2))));
+ return;
+ case Intrinsic::loop_dependence_raw_mask:
+ setValue(&I,
+ DAG.getNode(ISD::LOOP_DEPENDENCE_RAW_MASK, sdl,
+ EVT::getEVT(I.getType()), getValue(I.getOperand(0)),
+ getValue(I.getOperand(1)), getValue(I.getOperand(2))));
+ return;
}
}
@@ -8456,8 +8475,11 @@ void SelectionDAGBuilder::visitVPLoad(
MemoryLocation ML = MemoryLocation::getAfter(PtrOperand, AAInfo);
bool AddToChain = !BatchAA || !BatchAA->pointsToConstantMemory(ML);
SDValue InChain = AddToChain ? DAG.getRoot() : DAG.getEntryNode();
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ MachineMemOperand::Flags MMOFlags =
+ TLI.getVPIntrinsicMemOperandFlags(VPIntrin);
MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
- MachinePointerInfo(PtrOperand), MachineMemOperand::MOLoad,
+ MachinePointerInfo(PtrOperand), MMOFlags,
LocationSize::beforeOrAfterPointer(), *Alignment, AAInfo, Ranges);
LD = DAG.getLoadVP(VT, DL, InChain, OpValues[0], OpValues[1], OpValues[2],
MMO, false /*IsExpanding */);
@@ -8508,9 +8530,11 @@ void SelectionDAGBuilder::visitVPGather(
Alignment = DAG.getEVTAlign(VT.getScalarType());
unsigned AS =
PtrOperand->getType()->getScalarType()->getPointerAddressSpace();
+ MachineMemOperand::Flags MMOFlags =
+ TLI.getVPIntrinsicMemOperandFlags(VPIntrin);
MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
- MachinePointerInfo(AS), MachineMemOperand::MOLoad,
- LocationSize::beforeOrAfterPointer(), *Alignment, AAInfo, Ranges);
+ MachinePointerInfo(AS), MMOFlags, LocationSize::beforeOrAfterPointer(),
+ *Alignment, AAInfo, Ranges);
SDValue Base, Index, Scale;
bool UniformBase =
getUniformBase(PtrOperand, Base, Index, Scale, this, VPIntrin.getParent(),
@@ -8546,8 +8570,11 @@ void SelectionDAGBuilder::visitVPStore(
Alignment = DAG.getEVTAlign(VT);
SDValue Ptr = OpValues[1];
SDValue Offset = DAG.getUNDEF(Ptr.getValueType());
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ MachineMemOperand::Flags MMOFlags =
+ TLI.getVPIntrinsicMemOperandFlags(VPIntrin);
MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
- MachinePointerInfo(PtrOperand), MachineMemOperand::MOStore,
+ MachinePointerInfo(PtrOperand), MMOFlags,
LocationSize::beforeOrAfterPointer(), *Alignment, AAInfo);
ST = DAG.getStoreVP(getMemoryRoot(), DL, OpValues[0], Ptr, Offset,
OpValues[2], OpValues[3], VT, MMO, ISD::UNINDEXED,
@@ -8569,9 +8596,11 @@ void SelectionDAGBuilder::visitVPScatter(
Alignment = DAG.getEVTAlign(VT.getScalarType());
unsigned AS =
PtrOperand->getType()->getScalarType()->getPointerAddressSpace();
+ MachineMemOperand::Flags MMOFlags =
+ TLI.getVPIntrinsicMemOperandFlags(VPIntrin);
MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
- MachinePointerInfo(AS), MachineMemOperand::MOStore,
- LocationSize::beforeOrAfterPointer(), *Alignment, AAInfo);
+ MachinePointerInfo(AS), MMOFlags, LocationSize::beforeOrAfterPointer(),
+ *Alignment, AAInfo);
SDValue Base, Index, Scale;
bool UniformBase =
getUniformBase(PtrOperand, Base, Index, Scale, this, VPIntrin.getParent(),
@@ -8609,9 +8638,12 @@ void SelectionDAGBuilder::visitVPStridedLoad(
bool AddToChain = !BatchAA || !BatchAA->pointsToConstantMemory(ML);
SDValue InChain = AddToChain ? DAG.getRoot() : DAG.getEntryNode();
unsigned AS = PtrOperand->getType()->getPointerAddressSpace();
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ MachineMemOperand::Flags MMOFlags =
+ TLI.getVPIntrinsicMemOperandFlags(VPIntrin);
MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
- MachinePointerInfo(AS), MachineMemOperand::MOLoad,
- LocationSize::beforeOrAfterPointer(), *Alignment, AAInfo, Ranges);
+ MachinePointerInfo(AS), MMOFlags, LocationSize::beforeOrAfterPointer(),
+ *Alignment, AAInfo, Ranges);
SDValue LD = DAG.getStridedLoadVP(VT, DL, InChain, OpValues[0], OpValues[1],
OpValues[2], OpValues[3], MMO,
@@ -8632,9 +8664,12 @@ void SelectionDAGBuilder::visitVPStridedStore(
Alignment = DAG.getEVTAlign(VT.getScalarType());
AAMDNodes AAInfo = VPIntrin.getAAMetadata();
unsigned AS = PtrOperand->getType()->getPointerAddressSpace();
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ MachineMemOperand::Flags MMOFlags =
+ TLI.getVPIntrinsicMemOperandFlags(VPIntrin);
MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
- MachinePointerInfo(AS), MachineMemOperand::MOStore,
- LocationSize::beforeOrAfterPointer(), *Alignment, AAInfo);
+ MachinePointerInfo(AS), MMOFlags, LocationSize::beforeOrAfterPointer(),
+ *Alignment, AAInfo);
SDValue ST = DAG.getStridedStoreVP(
getMemoryRoot(), DL, OpValues[0], OpValues[1],
@@ -8901,6 +8936,29 @@ SelectionDAGBuilder::lowerInvokable(TargetLowering::CallLoweringInfo &CLI,
return Result;
}
+bool SelectionDAGBuilder::canTailCall(const CallBase &CB) const {
+ bool isMustTailCall = CB.isMustTailCall();
+
+ // Avoid emitting tail calls in functions with the disable-tail-calls
+ // attribute.
+ const Function *Caller = CB.getParent()->getParent();
+ if (Caller->getFnAttribute("disable-tail-calls").getValueAsString() ==
+ "true" &&
+ !isMustTailCall)
+ return false;
+
+ // We can't tail call inside a function with a swifterror argument. Lowering
+ // does not support this yet. It would have to move into the swifterror
+ // register before the call.
+ if (DAG.getTargetLoweringInfo().supportSwiftError() &&
+ Caller->getAttributes().hasAttrSomewhere(Attribute::SwiftError))
+ return false;
+
+ // Check if target-independent constraints permit a tail call here.
+ // Target-dependent constraints are checked within TLI->LowerCallTo.
+ return isInTailCallPosition(CB, DAG.getTarget());
+}
+
void SelectionDAGBuilder::LowerCallTo(const CallBase &CB, SDValue Callee,
bool isTailCall, bool isMustTailCall,
const BasicBlock *EHPadBB,
@@ -8915,21 +8973,8 @@ void SelectionDAGBuilder::LowerCallTo(const CallBase &CB, SDValue Callee,
const Value *SwiftErrorVal = nullptr;
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
- if (isTailCall) {
- // Avoid emitting tail calls in functions with the disable-tail-calls
- // attribute.
- auto *Caller = CB.getParent()->getParent();
- if (Caller->getFnAttribute("disable-tail-calls").getValueAsString() ==
- "true" && !isMustTailCall)
- isTailCall = false;
-
- // We can't tail call inside a function with a swifterror argument. Lowering
- // does not support this yet. It would have to move into the swifterror
- // register before the call.
- if (TLI.supportSwiftError() &&
- Caller->getAttributes().hasAttrSomewhere(Attribute::SwiftError))
- isTailCall = false;
- }
+ if (isTailCall)
+ isTailCall = canTailCall(CB);
for (auto I = CB.arg_begin(), E = CB.arg_end(); I != E; ++I) {
const Value *V = *I;
@@ -8969,11 +9014,6 @@ void SelectionDAGBuilder::LowerCallTo(const CallBase &CB, SDValue Callee,
Args.push_back(Entry);
}
- // Check if target-independent constraints permit a tail call here.
- // Target-dependent constraints are checked within TLI->LowerCallTo.
- if (isTailCall && !isInTailCallPosition(CB, DAG.getTarget()))
- isTailCall = false;
-
// Disable tail calls if there is an swifterror argument. Targets have not
// been updated to support tail calls.
if (TLI.supportSwiftError() && SwiftErrorVal)
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
index e0835e631035..c7577fa335fe 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
@@ -408,6 +408,10 @@ public:
bool IsMustTailCall, const BasicBlock *EHPadBB = nullptr,
const TargetLowering::PtrAuthInfo *PAI = nullptr);
+ // Check some of the target-independent constraints for tail calls. This does
+ // not iterate over the call arguments.
+ bool canTailCall(const CallBase &CB) const;
+
// Lower range metadata from 0 to N to assert zext to an integer of nearest
// floor power of two.
SDValue lowerRangeToAssertZExt(SelectionDAG &DAG, const Instruction &I,
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
index 900da7645504..4b2a00c2e2cf 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
@@ -587,6 +587,10 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
return "partial_reduce_smla";
case ISD::PARTIAL_REDUCE_SUMLA:
return "partial_reduce_sumla";
+ case ISD::LOOP_DEPENDENCE_WAR_MASK:
+ return "loop_dep_war";
+ case ISD::LOOP_DEPENDENCE_RAW_MASK:
+ return "loop_dep_raw";
// Vector Predication
#define BEGIN_REGISTER_VP_SDNODE(SDID, LEGALARG, NAME, ...) \
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
index ece50ed95fc4..e61558c59bf0 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
@@ -1729,10 +1729,18 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
// Setup an EH landing-pad block.
FuncInfo->ExceptionPointerVirtReg = Register();
FuncInfo->ExceptionSelectorVirtReg = Register();
- if (LLVMBB->isEHPad())
+ if (LLVMBB->isEHPad()) {
if (!PrepareEHLandingPad())
continue;
+ if (!FastIS) {
+ SDValue NewRoot = TLI->lowerEHPadEntry(CurDAG->getRoot(),
+ SDB->getCurSDLoc(), *CurDAG);
+ if (NewRoot && NewRoot != CurDAG->getRoot())
+ CurDAG->setRoot(NewRoot);
+ }
+ }
+
// Before doing SelectionDAG ISel, see if FastISel has been requested.
if (FastIS) {
if (LLVMBB != &Fn.getEntryBlock())
diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 402a012e8e55..fd6d20e146bb 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -832,7 +832,7 @@ SDValue TargetLowering::SimplifyMultipleUseDemandedBits(
case ISD::SHL: {
// If we are only demanding sign bits then we can use the shift source
// directly.
- if (std::optional<uint64_t> MaxSA =
+ if (std::optional<unsigned> MaxSA =
DAG.getValidMaximumShiftAmount(Op, DemandedElts, Depth + 1)) {
SDValue Op0 = Op.getOperand(0);
unsigned ShAmt = *MaxSA;
@@ -847,7 +847,7 @@ SDValue TargetLowering::SimplifyMultipleUseDemandedBits(
case ISD::SRL: {
// If we are only demanding sign bits then we can use the shift source
// directly.
- if (std::optional<uint64_t> MaxSA =
+ if (std::optional<unsigned> MaxSA =
DAG.getValidMaximumShiftAmount(Op, DemandedElts, Depth + 1)) {
SDValue Op0 = Op.getOperand(0);
unsigned ShAmt = *MaxSA;
@@ -1780,7 +1780,7 @@ bool TargetLowering::SimplifyDemandedBits(
SDValue Op1 = Op.getOperand(1);
EVT ShiftVT = Op1.getValueType();
- if (std::optional<uint64_t> KnownSA =
+ if (std::optional<unsigned> KnownSA =
TLO.DAG.getValidShiftAmount(Op, DemandedElts, Depth + 1)) {
unsigned ShAmt = *KnownSA;
if (ShAmt == 0)
@@ -1792,7 +1792,7 @@ bool TargetLowering::SimplifyDemandedBits(
// TODO - support non-uniform vector amounts.
if (Op0.getOpcode() == ISD::SRL) {
if (!DemandedBits.intersects(APInt::getLowBitsSet(BitWidth, ShAmt))) {
- if (std::optional<uint64_t> InnerSA =
+ if (std::optional<unsigned> InnerSA =
TLO.DAG.getValidShiftAmount(Op0, DemandedElts, Depth + 2)) {
unsigned C1 = *InnerSA;
unsigned Opc = ISD::SHL;
@@ -1832,7 +1832,7 @@ bool TargetLowering::SimplifyDemandedBits(
// TODO - support non-uniform vector amounts.
if (InnerOp.getOpcode() == ISD::SRL && Op0.hasOneUse() &&
InnerOp.hasOneUse()) {
- if (std::optional<uint64_t> SA2 = TLO.DAG.getValidShiftAmount(
+ if (std::optional<unsigned> SA2 = TLO.DAG.getValidShiftAmount(
InnerOp, DemandedElts, Depth + 2)) {
unsigned InnerShAmt = *SA2;
if (InnerShAmt < ShAmt && InnerShAmt < InnerBits &&
@@ -1858,8 +1858,7 @@ bool TargetLowering::SimplifyDemandedBits(
Op->dropFlags(SDNodeFlags::NoWrap);
return true;
}
- Known.Zero <<= ShAmt;
- Known.One <<= ShAmt;
+ Known <<= ShAmt;
// low bits known zero.
Known.Zero.setLowBits(ShAmt);
@@ -1950,7 +1949,7 @@ bool TargetLowering::SimplifyDemandedBits(
// If we are only demanding sign bits then we can use the shift source
// directly.
- if (std::optional<uint64_t> MaxSA =
+ if (std::optional<unsigned> MaxSA =
TLO.DAG.getValidMaximumShiftAmount(Op, DemandedElts, Depth + 1)) {
unsigned ShAmt = *MaxSA;
unsigned NumSignBits =
@@ -1966,7 +1965,7 @@ bool TargetLowering::SimplifyDemandedBits(
SDValue Op1 = Op.getOperand(1);
EVT ShiftVT = Op1.getValueType();
- if (std::optional<uint64_t> KnownSA =
+ if (std::optional<unsigned> KnownSA =
TLO.DAG.getValidShiftAmount(Op, DemandedElts, Depth + 1)) {
unsigned ShAmt = *KnownSA;
if (ShAmt == 0)
@@ -1978,7 +1977,7 @@ bool TargetLowering::SimplifyDemandedBits(
// TODO - support non-uniform vector amounts.
if (Op0.getOpcode() == ISD::SHL) {
if (!DemandedBits.intersects(APInt::getHighBitsSet(BitWidth, ShAmt))) {
- if (std::optional<uint64_t> InnerSA =
+ if (std::optional<unsigned> InnerSA =
TLO.DAG.getValidShiftAmount(Op0, DemandedElts, Depth + 2)) {
unsigned C1 = *InnerSA;
unsigned Opc = ISD::SRL;
@@ -1998,7 +1997,7 @@ bool TargetLowering::SimplifyDemandedBits(
// single sra. We can do this if the top bits are never demanded.
if (Op0.getOpcode() == ISD::SRA && Op0.hasOneUse()) {
if (!DemandedBits.intersects(APInt::getHighBitsSet(BitWidth, ShAmt))) {
- if (std::optional<uint64_t> InnerSA =
+ if (std::optional<unsigned> InnerSA =
TLO.DAG.getValidShiftAmount(Op0, DemandedElts, Depth + 2)) {
unsigned C1 = *InnerSA;
// Clamp the combined shift amount if it exceeds the bit width.
@@ -2042,8 +2041,7 @@ bool TargetLowering::SimplifyDemandedBits(
if (SimplifyDemandedBits(Op0, InDemandedMask, DemandedElts, Known, TLO,
Depth + 1))
return true;
- Known.Zero.lshrInPlace(ShAmt);
- Known.One.lshrInPlace(ShAmt);
+ Known >>= ShAmt;
// High bits known zero.
Known.Zero.setHighBits(ShAmt);
@@ -2064,7 +2062,7 @@ bool TargetLowering::SimplifyDemandedBits(
// If we are only demanding sign bits then we can use the shift source
// directly.
- if (std::optional<uint64_t> MaxSA =
+ if (std::optional<unsigned> MaxSA =
TLO.DAG.getValidMaximumShiftAmount(Op, DemandedElts, Depth + 1)) {
unsigned ShAmt = *MaxSA;
// Must already be signbits in DemandedBits bounds, and can't demand any
@@ -2103,7 +2101,7 @@ bool TargetLowering::SimplifyDemandedBits(
if (DemandedBits.isOne())
return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, Op1));
- if (std::optional<uint64_t> KnownSA =
+ if (std::optional<unsigned> KnownSA =
TLO.DAG.getValidShiftAmount(Op, DemandedElts, Depth + 1)) {
unsigned ShAmt = *KnownSA;
if (ShAmt == 0)
@@ -2112,7 +2110,7 @@ bool TargetLowering::SimplifyDemandedBits(
// fold (sra (shl x, c1), c1) -> sext_inreg for some c1 and target
// supports sext_inreg.
if (Op0.getOpcode() == ISD::SHL) {
- if (std::optional<uint64_t> InnerSA =
+ if (std::optional<unsigned> InnerSA =
TLO.DAG.getValidShiftAmount(Op0, DemandedElts, Depth + 2)) {
unsigned LowBits = BitWidth - ShAmt;
EVT ExtVT = EVT::getIntegerVT(*TLO.DAG.getContext(), LowBits);
@@ -2153,8 +2151,7 @@ bool TargetLowering::SimplifyDemandedBits(
if (SimplifyDemandedBits(Op0, InDemandedMask, DemandedElts, Known, TLO,
Depth + 1))
return true;
- Known.Zero.lshrInPlace(ShAmt);
- Known.One.lshrInPlace(ShAmt);
+ Known >>= ShAmt;
// If the input sign bit is known to be zero, or if none of the top bits
// are demanded, turn this into an unsigned shift right.
@@ -2225,10 +2222,8 @@ bool TargetLowering::SimplifyDemandedBits(
Depth + 1))
return true;
- Known2.One <<= (IsFSHL ? Amt : (BitWidth - Amt));
- Known2.Zero <<= (IsFSHL ? Amt : (BitWidth - Amt));
- Known.One.lshrInPlace(IsFSHL ? (BitWidth - Amt) : Amt);
- Known.Zero.lshrInPlace(IsFSHL ? (BitWidth - Amt) : Amt);
+ Known2 <<= (IsFSHL ? Amt : (BitWidth - Amt));
+ Known >>= (IsFSHL ? (BitWidth - Amt) : Amt);
Known = Known.unionWith(Known2);
// Attempt to avoid multi-use ops if we don't need anything from them.
@@ -2363,8 +2358,7 @@ bool TargetLowering::SimplifyDemandedBits(
if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedElts, Known2, TLO,
Depth + 1))
return true;
- Known.One = Known2.One.reverseBits();
- Known.Zero = Known2.Zero.reverseBits();
+ Known = Known2.reverseBits();
break;
}
case ISD::BSWAP: {
@@ -2397,8 +2391,7 @@ bool TargetLowering::SimplifyDemandedBits(
if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedElts, Known2, TLO,
Depth + 1))
return true;
- Known.One = Known2.One.byteSwap();
- Known.Zero = Known2.Zero.byteSwap();
+ Known = Known2.byteSwap();
break;
}
case ISD::CTPOP: {
@@ -2664,11 +2657,11 @@ bool TargetLowering::SimplifyDemandedBits(
break;
}
- std::optional<uint64_t> ShAmtC =
+ std::optional<unsigned> ShAmtC =
TLO.DAG.getValidShiftAmount(Src, DemandedElts, Depth + 2);
if (!ShAmtC || *ShAmtC >= BitWidth)
break;
- uint64_t ShVal = *ShAmtC;
+ unsigned ShVal = *ShAmtC;
APInt HighBits =
APInt::getHighBitsSet(OperandBitWidth, OperandBitWidth - BitWidth);
@@ -3234,27 +3227,6 @@ bool TargetLowering::SimplifyDemandedVectorElts(
KnownUndef.setAllBits();
return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
}
- SDValue ScalarSrc = Op.getOperand(0);
- if (ScalarSrc.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
- SDValue Src = ScalarSrc.getOperand(0);
- SDValue Idx = ScalarSrc.getOperand(1);
- EVT SrcVT = Src.getValueType();
-
- ElementCount SrcEltCnt = SrcVT.getVectorElementCount();
-
- if (SrcEltCnt.isScalable())
- return false;
-
- unsigned NumSrcElts = SrcEltCnt.getFixedValue();
- if (isNullConstant(Idx)) {
- APInt SrcDemandedElts = APInt::getOneBitSet(NumSrcElts, 0);
- APInt SrcUndef = KnownUndef.zextOrTrunc(NumSrcElts);
- APInt SrcZero = KnownZero.zextOrTrunc(NumSrcElts);
- if (SimplifyDemandedVectorElts(Src, SrcDemandedElts, SrcUndef, SrcZero,
- TLO, Depth + 1))
- return true;
- }
- }
KnownUndef.setHighBits(NumElts - 1);
break;
}
@@ -9740,8 +9712,8 @@ SDValue TargetLowering::expandABS(SDNode *N, SelectionDAG &DAG,
SDValue TargetLowering::expandABD(SDNode *N, SelectionDAG &DAG) const {
SDLoc dl(N);
EVT VT = N->getValueType(0);
- SDValue LHS = DAG.getFreeze(N->getOperand(0));
- SDValue RHS = DAG.getFreeze(N->getOperand(1));
+ SDValue LHS = N->getOperand(0);
+ SDValue RHS = N->getOperand(1);
bool IsSigned = N->getOpcode() == ISD::ABDS;
// abds(lhs, rhs) -> sub(smax(lhs,rhs), smin(lhs,rhs))
@@ -9749,34 +9721,37 @@ SDValue TargetLowering::expandABD(SDNode *N, SelectionDAG &DAG) const {
unsigned MaxOpc = IsSigned ? ISD::SMAX : ISD::UMAX;
unsigned MinOpc = IsSigned ? ISD::SMIN : ISD::UMIN;
if (isOperationLegal(MaxOpc, VT) && isOperationLegal(MinOpc, VT)) {
+ LHS = DAG.getFreeze(LHS);
+ RHS = DAG.getFreeze(RHS);
SDValue Max = DAG.getNode(MaxOpc, dl, VT, LHS, RHS);
SDValue Min = DAG.getNode(MinOpc, dl, VT, LHS, RHS);
return DAG.getNode(ISD::SUB, dl, VT, Max, Min);
}
// abdu(lhs, rhs) -> or(usubsat(lhs,rhs), usubsat(rhs,lhs))
- if (!IsSigned && isOperationLegal(ISD::USUBSAT, VT))
+ if (!IsSigned && isOperationLegal(ISD::USUBSAT, VT)) {
+ LHS = DAG.getFreeze(LHS);
+ RHS = DAG.getFreeze(RHS);
return DAG.getNode(ISD::OR, dl, VT,
DAG.getNode(ISD::USUBSAT, dl, VT, LHS, RHS),
DAG.getNode(ISD::USUBSAT, dl, VT, RHS, LHS));
+ }
// If the subtract doesn't overflow then just use abs(sub())
- // NOTE: don't use frozen operands for value tracking.
- bool IsNonNegative = DAG.SignBitIsZero(N->getOperand(1)) &&
- DAG.SignBitIsZero(N->getOperand(0));
+ bool IsNonNegative = DAG.SignBitIsZero(LHS) && DAG.SignBitIsZero(RHS);
- if (DAG.willNotOverflowSub(IsSigned || IsNonNegative, N->getOperand(0),
- N->getOperand(1)))
+ if (DAG.willNotOverflowSub(IsSigned || IsNonNegative, LHS, RHS))
return DAG.getNode(ISD::ABS, dl, VT,
DAG.getNode(ISD::SUB, dl, VT, LHS, RHS));
- if (DAG.willNotOverflowSub(IsSigned || IsNonNegative, N->getOperand(1),
- N->getOperand(0)))
+ if (DAG.willNotOverflowSub(IsSigned || IsNonNegative, RHS, LHS))
return DAG.getNode(ISD::ABS, dl, VT,
DAG.getNode(ISD::SUB, dl, VT, RHS, LHS));
EVT CCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
ISD::CondCode CC = IsSigned ? ISD::CondCode::SETGT : ISD::CondCode::SETUGT;
+ LHS = DAG.getFreeze(LHS);
+ RHS = DAG.getFreeze(RHS);
SDValue Cmp = DAG.getSetCC(dl, CCVT, LHS, RHS, CC);
// Branchless expansion iff cmp result is allbits: