diff options
| author | Mingming Liu <mingmingl@google.com> | 2025-09-10 15:25:31 -0700 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2025-09-10 15:25:31 -0700 |
| commit | 1417dafa1db9cb1b2b09438aa9f53ea5ab6e36e2 (patch) | |
| tree | 57f4b1f313c8cf74eed8819870f39c36ea263c68 /llvm/lib/CodeGen/SelectionDAG | |
| parent | 898b813bc8a6d0276bf0f4769f5f2f64b34e632d (diff) | |
| parent | b8cefcb601ddaa18482555c4ff363c01a270c2fe (diff) | |
Merge branch 'main' into users/mingmingl-llvm/samplefdo-profile-formatusers/mingmingl-llvm/samplefdo-profile-format
Diffstat (limited to 'llvm/lib/CodeGen/SelectionDAG')
| -rw-r--r-- | llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 113 | ||||
| -rw-r--r-- | llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp | 2 | ||||
| -rw-r--r-- | llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp | 11 | ||||
| -rw-r--r-- | llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h | 5 | ||||
| -rw-r--r-- | llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp | 51 | ||||
| -rw-r--r-- | llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp | 66 | ||||
| -rw-r--r-- | llvm/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp | 2 | ||||
| -rw-r--r-- | llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 60 | ||||
| -rw-r--r-- | llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp | 102 | ||||
| -rw-r--r-- | llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h | 4 | ||||
| -rw-r--r-- | llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp | 4 | ||||
| -rw-r--r-- | llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp | 10 | ||||
| -rw-r--r-- | llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp | 93 |
13 files changed, 357 insertions, 166 deletions
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 27b5a0d37b67..d130efe96b56 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -4710,7 +4710,10 @@ template <class MatchContextClass> SDValue DAGCombiner::visitMUL(SDNode *N) { if (SDValue LogBase2 = BuildLogBase2(N1, DL)) { EVT ShiftVT = getShiftAmountTy(N0.getValueType()); SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ShiftVT); - return Matcher.getNode(ISD::SHL, DL, VT, N0, Trunc); + SDNodeFlags Flags; + Flags.setNoUnsignedWrap(N->getFlags().hasNoUnsignedWrap()); + // TODO: Preserve setNoSignedWrap if LogBase2 isn't BitWidth - 1. + return Matcher.getNode(ISD::SHL, DL, VT, N0, Trunc, Flags); } } @@ -9998,13 +10001,16 @@ SDValue DAGCombiner::visitXOR(SDNode *N) { } } - // fold (not (neg x)) -> (add X, -1) - // FIXME: This can be generalized to (not (sub Y, X)) -> (add X, ~Y) if - // Y is a constant or the subtract has a single use. - if (isAllOnesConstant(N1) && N0.getOpcode() == ISD::SUB && - isNullConstant(N0.getOperand(0))) { - return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(1), - DAG.getAllOnesConstant(DL, VT)); + // fold (not (sub Y, X)) -> (add X, ~Y) if Y is a constant + if (N0.getOpcode() == ISD::SUB && isAllOnesConstant(N1)) { + SDValue Y = N0.getOperand(0); + SDValue X = N0.getOperand(1); + + if (auto *YConst = dyn_cast<ConstantSDNode>(Y)) { + APInt NotYValue = ~YConst->getAPIntValue(); + SDValue NotY = DAG.getConstant(NotYValue, DL, VT); + return DAG.getNode(ISD::ADD, DL, VT, X, NotY, N->getFlags()); + } } // fold (not (add X, -1)) -> (neg X) @@ -11089,38 +11095,43 @@ SDValue DAGCombiner::visitSRL(SDNode *N) { } } - // fold (srl (shl x, c1), c2) -> (and (shl x, (sub c1, c2), MASK) or - // (and (srl x, (sub c2, c1), MASK) - if (N0.getOpcode() == ISD::SHL && - (N0.getOperand(1) == N1 || N0->hasOneUse()) && - TLI.shouldFoldConstantShiftPairToMask(N, Level)) { - auto MatchShiftAmount = [OpSizeInBits](ConstantSDNode *LHS, - ConstantSDNode *RHS) { - const APInt &LHSC = LHS->getAPIntValue(); - const APInt &RHSC = RHS->getAPIntValue(); - return LHSC.ult(OpSizeInBits) && RHSC.ult(OpSizeInBits) && - LHSC.getZExtValue() <= RHSC.getZExtValue(); - }; - if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchShiftAmount, - /*AllowUndefs*/ false, - /*AllowTypeMismatch*/ true)) { - SDValue N01 = DAG.getZExtOrTrunc(N0.getOperand(1), DL, ShiftVT); - SDValue Diff = DAG.getNode(ISD::SUB, DL, ShiftVT, N01, N1); - SDValue Mask = DAG.getAllOnesConstant(DL, VT); - Mask = DAG.getNode(ISD::SRL, DL, VT, Mask, N01); - Mask = DAG.getNode(ISD::SHL, DL, VT, Mask, Diff); - SDValue Shift = DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0), Diff); - return DAG.getNode(ISD::AND, DL, VT, Shift, Mask); - } - if (ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchShiftAmount, - /*AllowUndefs*/ false, - /*AllowTypeMismatch*/ true)) { - SDValue N01 = DAG.getZExtOrTrunc(N0.getOperand(1), DL, ShiftVT); - SDValue Diff = DAG.getNode(ISD::SUB, DL, ShiftVT, N1, N01); - SDValue Mask = DAG.getAllOnesConstant(DL, VT); - Mask = DAG.getNode(ISD::SRL, DL, VT, Mask, N1); - SDValue Shift = DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0), Diff); - return DAG.getNode(ISD::AND, DL, VT, Shift, Mask); + if (N0.getOpcode() == ISD::SHL) { + // fold (srl (shl nuw x, c), c) -> x + if (N0.getOperand(1) == N1 && N0->getFlags().hasNoUnsignedWrap()) + return N0.getOperand(0); + + // fold (srl (shl x, c1), c2) -> (and (shl x, (sub c1, c2), MASK) or + // (and (srl x, (sub c2, c1), MASK) + if ((N0.getOperand(1) == N1 || N0->hasOneUse()) && + TLI.shouldFoldConstantShiftPairToMask(N, Level)) { + auto MatchShiftAmount = [OpSizeInBits](ConstantSDNode *LHS, + ConstantSDNode *RHS) { + const APInt &LHSC = LHS->getAPIntValue(); + const APInt &RHSC = RHS->getAPIntValue(); + return LHSC.ult(OpSizeInBits) && RHSC.ult(OpSizeInBits) && + LHSC.getZExtValue() <= RHSC.getZExtValue(); + }; + if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchShiftAmount, + /*AllowUndefs*/ false, + /*AllowTypeMismatch*/ true)) { + SDValue N01 = DAG.getZExtOrTrunc(N0.getOperand(1), DL, ShiftVT); + SDValue Diff = DAG.getNode(ISD::SUB, DL, ShiftVT, N01, N1); + SDValue Mask = DAG.getAllOnesConstant(DL, VT); + Mask = DAG.getNode(ISD::SRL, DL, VT, Mask, N01); + Mask = DAG.getNode(ISD::SHL, DL, VT, Mask, Diff); + SDValue Shift = DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0), Diff); + return DAG.getNode(ISD::AND, DL, VT, Shift, Mask); + } + if (ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchShiftAmount, + /*AllowUndefs*/ false, + /*AllowTypeMismatch*/ true)) { + SDValue N01 = DAG.getZExtOrTrunc(N0.getOperand(1), DL, ShiftVT); + SDValue Diff = DAG.getNode(ISD::SUB, DL, ShiftVT, N1, N01); + SDValue Mask = DAG.getAllOnesConstant(DL, VT); + Mask = DAG.getNode(ISD::SRL, DL, VT, Mask, N1); + SDValue Shift = DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0), Diff); + return DAG.getNode(ISD::AND, DL, VT, Shift, Mask); + } } } @@ -15137,7 +15148,7 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) { return foldedExt; } else if (ISD::isNON_EXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) && - TLI.isLoadExtLegal(ISD::EXTLOAD, VT, N0.getValueType())) { + TLI.isLoadExtLegalOrCustom(ISD::EXTLOAD, VT, N0.getValueType())) { bool DoXform = true; SmallVector<SDNode *, 4> SetCCs; if (!N0.hasOneUse()) @@ -16309,7 +16320,15 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) { if (VT.isScalarInteger() || TLI.isOperationLegal(N0.getOpcode(), VT)) { SDValue NarrowL = DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(0)); SDValue NarrowR = DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(1)); - return DAG.getNode(N0.getOpcode(), DL, VT, NarrowL, NarrowR); + SDNodeFlags Flags; + // Propagate nuw for sub. + if (N0->getOpcode() == ISD::SUB && N0->getFlags().hasNoUnsignedWrap() && + DAG.MaskedValueIsZero( + N0->getOperand(0), + APInt::getBitsSetFrom(SrcVT.getScalarSizeInBits(), + VT.getScalarSizeInBits()))) + Flags.setNoUnsignedWrap(true); + return DAG.getNode(N0.getOpcode(), DL, VT, NarrowL, NarrowR, Flags); } } break; @@ -16788,6 +16807,8 @@ SDValue DAGCombiner::visitFREEZE(SDNode *N) { // If we have frozen and unfrozen users of N0, update so everything uses N. if (!N0.isUndef() && !N0.hasOneUse()) { SDValue FrozenN0(N, 0); + // Unfreeze all uses of N to avoid double deleting N from the CSE map. + DAG.ReplaceAllUsesOfValueWith(FrozenN0, N0); DAG.ReplaceAllUsesOfValueWith(N0, FrozenN0); // ReplaceAllUsesOfValueWith will have also updated the use in N, thus // creating a cycle in a DAG. Let's undo that by mutating the freeze. @@ -19346,13 +19367,13 @@ SDValue DAGCombiner::visitBRCOND(SDNode *N) { // MachineBasicBlock CFG, which is awkward. // fold a brcond with a setcc condition into a BR_CC node if BR_CC is legal - // on the target. + // on the target, also copy fast math flags. if (N1.getOpcode() == ISD::SETCC && TLI.isOperationLegalOrCustom(ISD::BR_CC, N1.getOperand(0).getValueType())) { - return DAG.getNode(ISD::BR_CC, SDLoc(N), MVT::Other, - Chain, N1.getOperand(2), - N1.getOperand(0), N1.getOperand(1), N2); + return DAG.getNode(ISD::BR_CC, SDLoc(N), MVT::Other, Chain, + N1.getOperand(2), N1.getOperand(0), N1.getOperand(1), N2, + N1->getFlags()); } if (N1.hasOneUse()) { diff --git a/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp index 1a63518ab37a..861f76e93f2c 100644 --- a/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp @@ -238,7 +238,7 @@ void InstrEmitter::CreateVirtualRegisters(SDNode *Node, // Create the result registers for this node and add the result regs to // the machine instruction. - if (VRBase == 0) { + if (!VRBase) { assert(RC && "Isn't a register operand!"); VRBase = MRI->createVirtualRegister(RC); MIB.addReg(VRBase, RegState::Define); diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp index 90d62e6da8e9..9e85f08abb76 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -324,6 +324,11 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) { Res = PromoteIntRes_VP_REDUCE(N); break; + case ISD::LOOP_DEPENDENCE_WAR_MASK: + case ISD::LOOP_DEPENDENCE_RAW_MASK: + Res = PromoteIntRes_LOOP_DEPENDENCE_MASK(N); + break; + case ISD::FREEZE: Res = PromoteIntRes_FREEZE(N); break; @@ -374,6 +379,12 @@ SDValue DAGTypeLegalizer::PromoteIntRes_MERGE_VALUES(SDNode *N, return GetPromotedInteger(Op); } +SDValue DAGTypeLegalizer::PromoteIntRes_LOOP_DEPENDENCE_MASK(SDNode *N) { + EVT VT = N->getValueType(0); + EVT NewVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); + return DAG.getNode(N->getOpcode(), SDLoc(N), NewVT, N->ops()); +} + SDValue DAGTypeLegalizer::PromoteIntRes_AssertSext(SDNode *N) { // Sign-extend the new bits, and continue the assertion. SDValue Op = SExtPromotedInteger(N->getOperand(0)); diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h index 65fd863e55ac..586c3411791f 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -382,6 +382,7 @@ private: SDValue PromoteIntRes_VECTOR_FIND_LAST_ACTIVE(SDNode *N); SDValue PromoteIntRes_GET_ACTIVE_LANE_MASK(SDNode *N); SDValue PromoteIntRes_PARTIAL_REDUCE_MLA(SDNode *N); + SDValue PromoteIntRes_LOOP_DEPENDENCE_MASK(SDNode *N); // Integer Operand Promotion. bool PromoteIntegerOperand(SDNode *N, unsigned OpNo); @@ -436,6 +437,7 @@ private: SDValue PromoteIntOp_VECTOR_FIND_LAST_ACTIVE(SDNode *N, unsigned OpNo); SDValue PromoteIntOp_GET_ACTIVE_LANE_MASK(SDNode *N); SDValue PromoteIntOp_PARTIAL_REDUCE_MLA(SDNode *N); + SDValue PromoteIntOp_LOOP_DEPENDENCE_MASK(SDNode *N, unsigned OpNo); void SExtOrZExtPromotedOperands(SDValue &LHS, SDValue &RHS); void PromoteSetCCOperands(SDValue &LHS,SDValue &RHS, ISD::CondCode Code); @@ -868,6 +870,7 @@ private: // Vector Result Scalarization: <1 x ty> -> ty. void ScalarizeVectorResult(SDNode *N, unsigned ResNo); SDValue ScalarizeVecRes_MERGE_VALUES(SDNode *N, unsigned ResNo); + SDValue ScalarizeVecRes_LOOP_DEPENDENCE_MASK(SDNode *N); SDValue ScalarizeVecRes_BinOp(SDNode *N); SDValue ScalarizeVecRes_CMP(SDNode *N); SDValue ScalarizeVecRes_TernaryOp(SDNode *N); @@ -964,6 +967,7 @@ private: void SplitVecRes_FIX(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_BITCAST(SDNode *N, SDValue &Lo, SDValue &Hi); + void SplitVecRes_LOOP_DEPENDENCE_MASK(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_BUILD_VECTOR(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_CONCAT_VECTORS(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_EXTRACT_SUBVECTOR(SDNode *N, SDValue &Lo, SDValue &Hi); @@ -1070,6 +1074,7 @@ private: SDValue WidenVecRes_ADDRSPACECAST(SDNode *N); SDValue WidenVecRes_AssertZext(SDNode* N); SDValue WidenVecRes_BITCAST(SDNode* N); + SDValue WidenVecRes_LOOP_DEPENDENCE_MASK(SDNode *N); SDValue WidenVecRes_BUILD_VECTOR(SDNode* N); SDValue WidenVecRes_CONCAT_VECTORS(SDNode* N); SDValue WidenVecRes_EXTEND_VECTOR_INREG(SDNode* N); diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp index 2ca98958fde0..8e423c4f83b3 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp @@ -138,6 +138,7 @@ class VectorLegalizer { SDValue ExpandVP_FNEG(SDNode *Node); SDValue ExpandVP_FABS(SDNode *Node); SDValue ExpandVP_FCOPYSIGN(SDNode *Node); + SDValue ExpandLOOP_DEPENDENCE_MASK(SDNode *N); SDValue ExpandSELECT(SDNode *Node); std::pair<SDValue, SDValue> ExpandLoad(SDNode *N); SDValue ExpandStore(SDNode *N); @@ -475,6 +476,8 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) { case ISD::VECTOR_COMPRESS: case ISD::SCMP: case ISD::UCMP: + case ISD::LOOP_DEPENDENCE_WAR_MASK: + case ISD::LOOP_DEPENDENCE_RAW_MASK: Action = TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0)); break; case ISD::SMULFIX: @@ -1291,6 +1294,10 @@ void VectorLegalizer::Expand(SDNode *Node, SmallVectorImpl<SDValue> &Results) { case ISD::UCMP: Results.push_back(TLI.expandCMP(Node, DAG)); return; + case ISD::LOOP_DEPENDENCE_WAR_MASK: + case ISD::LOOP_DEPENDENCE_RAW_MASK: + Results.push_back(ExpandLOOP_DEPENDENCE_MASK(Node)); + return; case ISD::FADD: case ISD::FMUL: @@ -1796,6 +1803,50 @@ SDValue VectorLegalizer::ExpandVP_FCOPYSIGN(SDNode *Node) { return DAG.getNode(ISD::BITCAST, DL, VT, CopiedSign); } +SDValue VectorLegalizer::ExpandLOOP_DEPENDENCE_MASK(SDNode *N) { + SDLoc DL(N); + SDValue SourceValue = N->getOperand(0); + SDValue SinkValue = N->getOperand(1); + SDValue EltSize = N->getOperand(2); + + bool IsReadAfterWrite = N->getOpcode() == ISD::LOOP_DEPENDENCE_RAW_MASK; + EVT VT = N->getValueType(0); + EVT PtrVT = SourceValue->getValueType(0); + + SDValue Diff = DAG.getNode(ISD::SUB, DL, PtrVT, SinkValue, SourceValue); + if (IsReadAfterWrite) + Diff = DAG.getNode(ISD::ABS, DL, PtrVT, Diff); + + Diff = DAG.getNode(ISD::SDIV, DL, PtrVT, Diff, EltSize); + + // If the difference is positive then some elements may alias + EVT CmpVT = TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), + Diff.getValueType()); + SDValue Zero = DAG.getTargetConstant(0, DL, PtrVT); + SDValue Cmp = DAG.getSetCC(DL, CmpVT, Diff, Zero, + IsReadAfterWrite ? ISD::SETEQ : ISD::SETLE); + + // Create the lane mask + EVT SplatVT = VT.changeElementType(PtrVT); + SDValue DiffSplat = DAG.getSplat(SplatVT, DL, Diff); + SDValue VectorStep = DAG.getStepVector(DL, SplatVT); + EVT MaskVT = VT.changeElementType(MVT::i1); + SDValue DiffMask = + DAG.getSetCC(DL, MaskVT, VectorStep, DiffSplat, ISD::CondCode::SETULT); + + EVT EltVT = VT.getVectorElementType(); + // Extend the diff setcc in case the intrinsic has been promoted to a vector + // type with elements larger than i1 + if (EltVT.getScalarSizeInBits() > MaskVT.getScalarSizeInBits()) + DiffMask = DAG.getNode(ISD::ANY_EXTEND, DL, VT, DiffMask); + + // Splat the compare result then OR it with the lane mask + if (CmpVT.getScalarSizeInBits() < EltVT.getScalarSizeInBits()) + Cmp = DAG.getNode(ISD::ZERO_EXTEND, DL, EltVT, Cmp); + SDValue Splat = DAG.getSplat(VT, DL, Cmp); + return DAG.getNode(ISD::OR, DL, VT, DiffMask, Splat); +} + void VectorLegalizer::ExpandFP_TO_UINT(SDNode *Node, SmallVectorImpl<SDValue> &Results) { // Attempt to expand using TargetLowering. diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index 10e3a5149a5d..118fd8418f78 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -53,6 +53,10 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) { report_fatal_error("Do not know how to scalarize the result of this " "operator!\n"); + case ISD::LOOP_DEPENDENCE_WAR_MASK: + case ISD::LOOP_DEPENDENCE_RAW_MASK: + R = ScalarizeVecRes_LOOP_DEPENDENCE_MASK(N); + break; case ISD::MERGE_VALUES: R = ScalarizeVecRes_MERGE_VALUES(N, ResNo);break; case ISD::BITCAST: R = ScalarizeVecRes_BITCAST(N); break; case ISD::BUILD_VECTOR: R = ScalarizeVecRes_BUILD_VECTOR(N); break; @@ -396,6 +400,22 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_MERGE_VALUES(SDNode *N, return GetScalarizedVector(Op); } +SDValue DAGTypeLegalizer::ScalarizeVecRes_LOOP_DEPENDENCE_MASK(SDNode *N) { + SDValue SourceValue = N->getOperand(0); + SDValue SinkValue = N->getOperand(1); + SDValue EltSize = N->getOperand(2); + EVT PtrVT = SourceValue->getValueType(0); + SDLoc DL(N); + + SDValue Diff = DAG.getNode(ISD::SUB, DL, PtrVT, SinkValue, SourceValue); + EVT CmpVT = TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), + Diff.getValueType()); + SDValue Zero = DAG.getTargetConstant(0, DL, PtrVT); + return DAG.getNode(ISD::OR, DL, CmpVT, + DAG.getSetCC(DL, CmpVT, Diff, EltSize, ISD::SETGE), + DAG.getSetCC(DL, CmpVT, Diff, Zero, ISD::SETEQ)); +} + SDValue DAGTypeLegalizer::ScalarizeVecRes_BITCAST(SDNode *N) { SDValue Op = N->getOperand(0); if (getTypeAction(Op.getValueType()) == TargetLowering::TypeScalarizeVector) @@ -1159,6 +1179,10 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) { report_fatal_error("Do not know how to split the result of this " "operator!\n"); + case ISD::LOOP_DEPENDENCE_RAW_MASK: + case ISD::LOOP_DEPENDENCE_WAR_MASK: + SplitVecRes_LOOP_DEPENDENCE_MASK(N, Lo, Hi); + break; case ISD::MERGE_VALUES: SplitRes_MERGE_VALUES(N, ResNo, Lo, Hi); break; case ISD::AssertZext: SplitVecRes_AssertZext(N, Lo, Hi); break; case ISD::VSELECT: @@ -1652,6 +1676,25 @@ void DAGTypeLegalizer::SplitVecRes_BITCAST(SDNode *N, SDValue &Lo, Hi = DAG.getNode(ISD::BITCAST, dl, HiVT, Hi); } +void DAGTypeLegalizer::SplitVecRes_LOOP_DEPENDENCE_MASK(SDNode *N, SDValue &Lo, + SDValue &Hi) { + SDLoc DL(N); + EVT LoVT, HiVT; + std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0)); + SDValue PtrA = N->getOperand(0); + SDValue PtrB = N->getOperand(1); + Lo = DAG.getNode(N->getOpcode(), DL, LoVT, PtrA, PtrB, N->getOperand(2)); + + unsigned EltSize = N->getConstantOperandVal(2); + unsigned Offset = EltSize * HiVT.getVectorMinNumElements(); + SDValue Addend = HiVT.isScalableVT() + ? DAG.getVScale(DL, MVT::i64, APInt(64, Offset)) + : DAG.getConstant(Offset, DL, MVT::i64); + + PtrA = DAG.getNode(ISD::ADD, DL, MVT::i64, PtrA, Addend); + Hi = DAG.getNode(N->getOpcode(), DL, HiVT, PtrA, PtrB, N->getOperand(2)); +} + void DAGTypeLegalizer::SplitVecRes_BUILD_VECTOR(SDNode *N, SDValue &Lo, SDValue &Hi) { EVT LoVT, HiVT; @@ -2517,10 +2560,10 @@ void DAGTypeLegalizer::SplitVecRes_Gather(MemSDNode *N, SDValue &Lo, else std::tie(IndexLo, IndexHi) = DAG.SplitVector(Ops.Index, dl); + MachineMemOperand::Flags MMOFlags = N->getMemOperand()->getFlags(); MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand( - N->getPointerInfo(), MachineMemOperand::MOLoad, - LocationSize::beforeOrAfterPointer(), Alignment, N->getAAInfo(), - N->getRanges()); + N->getPointerInfo(), MMOFlags, LocationSize::beforeOrAfterPointer(), + Alignment, N->getAAInfo(), N->getRanges()); if (auto *MGT = dyn_cast<MaskedGatherSDNode>(N)) { SDValue PassThru = MGT->getPassThru(); @@ -4321,10 +4364,10 @@ SDValue DAGTypeLegalizer::SplitVecOp_Scatter(MemSDNode *N, unsigned OpNo) { std::tie(IndexLo, IndexHi) = DAG.SplitVector(Ops.Index, DL); SDValue Lo; + MachineMemOperand::Flags MMOFlags = N->getMemOperand()->getFlags(); MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand( - N->getPointerInfo(), MachineMemOperand::MOStore, - LocationSize::beforeOrAfterPointer(), Alignment, N->getAAInfo(), - N->getRanges()); + N->getPointerInfo(), MMOFlags, LocationSize::beforeOrAfterPointer(), + Alignment, N->getAAInfo(), N->getRanges()); if (auto *MSC = dyn_cast<MaskedScatterSDNode>(N)) { SDValue OpsLo[] = {Ch, DataLo, MaskLo, Ptr, IndexLo, Ops.Scale}; @@ -4784,6 +4827,10 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) { #endif report_fatal_error("Do not know how to widen the result of this operator!"); + case ISD::LOOP_DEPENDENCE_RAW_MASK: + case ISD::LOOP_DEPENDENCE_WAR_MASK: + Res = WidenVecRes_LOOP_DEPENDENCE_MASK(N); + break; case ISD::MERGE_VALUES: Res = WidenVecRes_MERGE_VALUES(N, ResNo); break; case ISD::ADDRSPACECAST: Res = WidenVecRes_ADDRSPACECAST(N); @@ -5986,6 +6033,13 @@ SDValue DAGTypeLegalizer::WidenVecRes_BITCAST(SDNode *N) { return CreateStackStoreLoad(InOp, WidenVT); } +SDValue DAGTypeLegalizer::WidenVecRes_LOOP_DEPENDENCE_MASK(SDNode *N) { + return DAG.getNode( + N->getOpcode(), SDLoc(N), + TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)), + N->getOperand(0), N->getOperand(1), N->getOperand(2)); +} + SDValue DAGTypeLegalizer::WidenVecRes_BUILD_VECTOR(SDNode *N) { SDLoc dl(N); // Build a vector with undefined for the new nodes. diff --git a/llvm/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp b/llvm/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp index 0a449fd011e6..72ea0898f975 100644 --- a/llvm/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp @@ -63,6 +63,8 @@ ResourcePriorityQueue::ResourcePriorityQueue(SelectionDAGISel *IS) HorizontalVerticalBalance = 0; } +ResourcePriorityQueue::~ResourcePriorityQueue() = default; + unsigned ResourcePriorityQueue::numberRCValPredInSU(SUnit *SU, unsigned RCId) { unsigned NumberDeps = 0; diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 3672a91e33a3..bcf25958d098 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -3299,7 +3299,7 @@ SelectionDAG::getValidShiftAmountRange(SDValue V, const APInt &DemandedElts, return std::nullopt; } -std::optional<uint64_t> +std::optional<unsigned> SelectionDAG::getValidShiftAmount(SDValue V, const APInt &DemandedElts, unsigned Depth) const { assert((V.getOpcode() == ISD::SHL || V.getOpcode() == ISD::SRL || @@ -3312,7 +3312,7 @@ SelectionDAG::getValidShiftAmount(SDValue V, const APInt &DemandedElts, return std::nullopt; } -std::optional<uint64_t> +std::optional<unsigned> SelectionDAG::getValidShiftAmount(SDValue V, unsigned Depth) const { EVT VT = V.getValueType(); APInt DemandedElts = VT.isFixedLengthVector() @@ -3321,7 +3321,7 @@ SelectionDAG::getValidShiftAmount(SDValue V, unsigned Depth) const { return getValidShiftAmount(V, DemandedElts, Depth); } -std::optional<uint64_t> +std::optional<unsigned> SelectionDAG::getValidMinimumShiftAmount(SDValue V, const APInt &DemandedElts, unsigned Depth) const { assert((V.getOpcode() == ISD::SHL || V.getOpcode() == ISD::SRL || @@ -3333,7 +3333,7 @@ SelectionDAG::getValidMinimumShiftAmount(SDValue V, const APInt &DemandedElts, return std::nullopt; } -std::optional<uint64_t> +std::optional<unsigned> SelectionDAG::getValidMinimumShiftAmount(SDValue V, unsigned Depth) const { EVT VT = V.getValueType(); APInt DemandedElts = VT.isFixedLengthVector() @@ -3342,7 +3342,7 @@ SelectionDAG::getValidMinimumShiftAmount(SDValue V, unsigned Depth) const { return getValidMinimumShiftAmount(V, DemandedElts, Depth); } -std::optional<uint64_t> +std::optional<unsigned> SelectionDAG::getValidMaximumShiftAmount(SDValue V, const APInt &DemandedElts, unsigned Depth) const { assert((V.getOpcode() == ISD::SHL || V.getOpcode() == ISD::SRL || @@ -3354,7 +3354,7 @@ SelectionDAG::getValidMaximumShiftAmount(SDValue V, const APInt &DemandedElts, return std::nullopt; } -std::optional<uint64_t> +std::optional<unsigned> SelectionDAG::getValidMaximumShiftAmount(SDValue V, unsigned Depth) const { EVT VT = V.getValueType(); APInt DemandedElts = VT.isFixedLengthVector() @@ -3828,7 +3828,7 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts, Known = KnownBits::shl(Known, Known2, NUW, NSW, ShAmtNonZero); // Minimum shift low bits are known zero. - if (std::optional<uint64_t> ShMinAmt = + if (std::optional<unsigned> ShMinAmt = getValidMinimumShiftAmount(Op, DemandedElts, Depth + 1)) Known.Zero.setLowBits(*ShMinAmt); break; @@ -3840,7 +3840,7 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts, Op->getFlags().hasExact()); // Minimum shift high bits are known zero. - if (std::optional<uint64_t> ShMinAmt = + if (std::optional<unsigned> ShMinAmt = getValidMinimumShiftAmount(Op, DemandedElts, Depth + 1)) Known.Zero.setHighBits(*ShMinAmt); break; @@ -3850,6 +3850,22 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts, Known = KnownBits::ashr(Known, Known2, /*ShAmtNonZero=*/false, Op->getFlags().hasExact()); break; + case ISD::ROTL: + case ISD::ROTR: + if (ConstantSDNode *C = + isConstOrConstSplat(Op.getOperand(1), DemandedElts)) { + unsigned Amt = C->getAPIntValue().urem(BitWidth); + + Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); + + // Canonicalize to ROTR. + if (Opcode == ISD::ROTL && Amt != 0) + Amt = BitWidth - Amt; + + Known.Zero = Known.Zero.rotr(Amt); + Known.One = Known.One.rotr(Amt); + } + break; case ISD::FSHL: case ISD::FSHR: if (ConstantSDNode *C = isConstOrConstSplat(Op.getOperand(2), DemandedElts)) { @@ -3868,15 +3884,11 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts, Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); Known2 = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1); if (Opcode == ISD::FSHL) { - Known.One <<= Amt; - Known.Zero <<= Amt; - Known2.One.lshrInPlace(BitWidth - Amt); - Known2.Zero.lshrInPlace(BitWidth - Amt); + Known <<= Amt; + Known2 >>= BitWidth - Amt; } else { - Known.One <<= BitWidth - Amt; - Known.Zero <<= BitWidth - Amt; - Known2.One.lshrInPlace(Amt); - Known2.Zero.lshrInPlace(Amt); + Known <<= BitWidth - Amt; + Known2 >>= Amt; } Known = Known.unionWith(Known2); } @@ -4875,15 +4887,15 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts, case ISD::SRA: Tmp = ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth + 1); // SRA X, C -> adds C sign bits. - if (std::optional<uint64_t> ShAmt = + if (std::optional<unsigned> ShAmt = getValidMinimumShiftAmount(Op, DemandedElts, Depth + 1)) - Tmp = std::min<uint64_t>(Tmp + *ShAmt, VTBits); + Tmp = std::min(Tmp + *ShAmt, VTBits); return Tmp; case ISD::SHL: if (std::optional<ConstantRange> ShAmtRange = getValidShiftAmountRange(Op, DemandedElts, Depth + 1)) { - uint64_t MaxShAmt = ShAmtRange->getUnsignedMax().getZExtValue(); - uint64_t MinShAmt = ShAmtRange->getUnsignedMin().getZExtValue(); + unsigned MaxShAmt = ShAmtRange->getUnsignedMax().getZExtValue(); + unsigned MinShAmt = ShAmtRange->getUnsignedMin().getZExtValue(); // Try to look through ZERO/SIGN/ANY_EXTEND. If all extended bits are // shifted out, then we can compute the number of sign bits for the // operand being extended. A future improvement could be to pass along the @@ -4894,7 +4906,7 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts, EVT ExtVT = Ext.getValueType(); SDValue Extendee = Ext.getOperand(0); EVT ExtendeeVT = Extendee.getValueType(); - uint64_t SizeDifference = + unsigned SizeDifference = ExtVT.getScalarSizeInBits() - ExtendeeVT.getScalarSizeInBits(); if (SizeDifference <= MinShAmt) { Tmp = SizeDifference + @@ -5127,7 +5139,7 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts, // If the sign portion ends in our element the subtraction gives correct // result. Otherwise it gives either negative or > bitwidth result - return std::clamp(KnownSign - rIndex * BitWidth, 0, BitWidth); + return std::clamp(KnownSign - rIndex * BitWidth, 1, BitWidth); } case ISD::INSERT_VECTOR_ELT: { if (VT.isScalableVector()) @@ -5660,6 +5672,10 @@ bool SelectionDAG::canCreateUndefOrPoison(SDValue Op, const APInt &DemandedElts, case ISD::USUBSAT: case ISD::MULHU: case ISD::MULHS: + case ISD::AVGFLOORS: + case ISD::AVGFLOORU: + case ISD::AVGCEILS: + case ISD::AVGCEILU: case ISD::ABDU: case ISD::ABDS: case ISD::SMIN: diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 62ba801f6992..430e47451fd4 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -7974,12 +7974,19 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, } case Intrinsic::amdgcn_call_whole_wave: { TargetLowering::ArgListTy Args; + bool isTailCall = I.isTailCall(); // The first argument is the callee. Skip it when assembling the call args. for (unsigned Idx = 1; Idx < I.arg_size(); ++Idx) { TargetLowering::ArgListEntry Arg(getValue(I.getArgOperand(Idx)), I.getArgOperand(Idx)->getType()); Arg.setAttributes(&I, Idx); + + // If we have an explicit sret argument that is an Instruction, (i.e., it + // might point to function-local memory), we can't meaningfully tail-call. + if (Arg.IsSRet && isa<Instruction>(I.getArgOperand(Idx))) + isTailCall = false; + Args.push_back(Arg); } @@ -7994,7 +8001,7 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, .setChain(getRoot()) .setCallee(CallingConv::AMDGPU_Gfx_WholeWave, I.getType(), getValue(I.getArgOperand(0)), std::move(Args)) - .setTailCall(false) + .setTailCall(isTailCall && canTailCall(I)) .setIsPreallocated( I.countOperandBundlesOfType(LLVMContext::OB_preallocated) != 0) .setConvergent(I.isConvergent()) @@ -8295,6 +8302,18 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, visitVectorExtractLastActive(I, Intrinsic); return; } + case Intrinsic::loop_dependence_war_mask: + setValue(&I, + DAG.getNode(ISD::LOOP_DEPENDENCE_WAR_MASK, sdl, + EVT::getEVT(I.getType()), getValue(I.getOperand(0)), + getValue(I.getOperand(1)), getValue(I.getOperand(2)))); + return; + case Intrinsic::loop_dependence_raw_mask: + setValue(&I, + DAG.getNode(ISD::LOOP_DEPENDENCE_RAW_MASK, sdl, + EVT::getEVT(I.getType()), getValue(I.getOperand(0)), + getValue(I.getOperand(1)), getValue(I.getOperand(2)))); + return; } } @@ -8456,8 +8475,11 @@ void SelectionDAGBuilder::visitVPLoad( MemoryLocation ML = MemoryLocation::getAfter(PtrOperand, AAInfo); bool AddToChain = !BatchAA || !BatchAA->pointsToConstantMemory(ML); SDValue InChain = AddToChain ? DAG.getRoot() : DAG.getEntryNode(); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + MachineMemOperand::Flags MMOFlags = + TLI.getVPIntrinsicMemOperandFlags(VPIntrin); MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand( - MachinePointerInfo(PtrOperand), MachineMemOperand::MOLoad, + MachinePointerInfo(PtrOperand), MMOFlags, LocationSize::beforeOrAfterPointer(), *Alignment, AAInfo, Ranges); LD = DAG.getLoadVP(VT, DL, InChain, OpValues[0], OpValues[1], OpValues[2], MMO, false /*IsExpanding */); @@ -8508,9 +8530,11 @@ void SelectionDAGBuilder::visitVPGather( Alignment = DAG.getEVTAlign(VT.getScalarType()); unsigned AS = PtrOperand->getType()->getScalarType()->getPointerAddressSpace(); + MachineMemOperand::Flags MMOFlags = + TLI.getVPIntrinsicMemOperandFlags(VPIntrin); MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand( - MachinePointerInfo(AS), MachineMemOperand::MOLoad, - LocationSize::beforeOrAfterPointer(), *Alignment, AAInfo, Ranges); + MachinePointerInfo(AS), MMOFlags, LocationSize::beforeOrAfterPointer(), + *Alignment, AAInfo, Ranges); SDValue Base, Index, Scale; bool UniformBase = getUniformBase(PtrOperand, Base, Index, Scale, this, VPIntrin.getParent(), @@ -8546,8 +8570,11 @@ void SelectionDAGBuilder::visitVPStore( Alignment = DAG.getEVTAlign(VT); SDValue Ptr = OpValues[1]; SDValue Offset = DAG.getUNDEF(Ptr.getValueType()); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + MachineMemOperand::Flags MMOFlags = + TLI.getVPIntrinsicMemOperandFlags(VPIntrin); MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand( - MachinePointerInfo(PtrOperand), MachineMemOperand::MOStore, + MachinePointerInfo(PtrOperand), MMOFlags, LocationSize::beforeOrAfterPointer(), *Alignment, AAInfo); ST = DAG.getStoreVP(getMemoryRoot(), DL, OpValues[0], Ptr, Offset, OpValues[2], OpValues[3], VT, MMO, ISD::UNINDEXED, @@ -8569,9 +8596,11 @@ void SelectionDAGBuilder::visitVPScatter( Alignment = DAG.getEVTAlign(VT.getScalarType()); unsigned AS = PtrOperand->getType()->getScalarType()->getPointerAddressSpace(); + MachineMemOperand::Flags MMOFlags = + TLI.getVPIntrinsicMemOperandFlags(VPIntrin); MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand( - MachinePointerInfo(AS), MachineMemOperand::MOStore, - LocationSize::beforeOrAfterPointer(), *Alignment, AAInfo); + MachinePointerInfo(AS), MMOFlags, LocationSize::beforeOrAfterPointer(), + *Alignment, AAInfo); SDValue Base, Index, Scale; bool UniformBase = getUniformBase(PtrOperand, Base, Index, Scale, this, VPIntrin.getParent(), @@ -8609,9 +8638,12 @@ void SelectionDAGBuilder::visitVPStridedLoad( bool AddToChain = !BatchAA || !BatchAA->pointsToConstantMemory(ML); SDValue InChain = AddToChain ? DAG.getRoot() : DAG.getEntryNode(); unsigned AS = PtrOperand->getType()->getPointerAddressSpace(); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + MachineMemOperand::Flags MMOFlags = + TLI.getVPIntrinsicMemOperandFlags(VPIntrin); MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand( - MachinePointerInfo(AS), MachineMemOperand::MOLoad, - LocationSize::beforeOrAfterPointer(), *Alignment, AAInfo, Ranges); + MachinePointerInfo(AS), MMOFlags, LocationSize::beforeOrAfterPointer(), + *Alignment, AAInfo, Ranges); SDValue LD = DAG.getStridedLoadVP(VT, DL, InChain, OpValues[0], OpValues[1], OpValues[2], OpValues[3], MMO, @@ -8632,9 +8664,12 @@ void SelectionDAGBuilder::visitVPStridedStore( Alignment = DAG.getEVTAlign(VT.getScalarType()); AAMDNodes AAInfo = VPIntrin.getAAMetadata(); unsigned AS = PtrOperand->getType()->getPointerAddressSpace(); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + MachineMemOperand::Flags MMOFlags = + TLI.getVPIntrinsicMemOperandFlags(VPIntrin); MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand( - MachinePointerInfo(AS), MachineMemOperand::MOStore, - LocationSize::beforeOrAfterPointer(), *Alignment, AAInfo); + MachinePointerInfo(AS), MMOFlags, LocationSize::beforeOrAfterPointer(), + *Alignment, AAInfo); SDValue ST = DAG.getStridedStoreVP( getMemoryRoot(), DL, OpValues[0], OpValues[1], @@ -8901,6 +8936,29 @@ SelectionDAGBuilder::lowerInvokable(TargetLowering::CallLoweringInfo &CLI, return Result; } +bool SelectionDAGBuilder::canTailCall(const CallBase &CB) const { + bool isMustTailCall = CB.isMustTailCall(); + + // Avoid emitting tail calls in functions with the disable-tail-calls + // attribute. + const Function *Caller = CB.getParent()->getParent(); + if (Caller->getFnAttribute("disable-tail-calls").getValueAsString() == + "true" && + !isMustTailCall) + return false; + + // We can't tail call inside a function with a swifterror argument. Lowering + // does not support this yet. It would have to move into the swifterror + // register before the call. + if (DAG.getTargetLoweringInfo().supportSwiftError() && + Caller->getAttributes().hasAttrSomewhere(Attribute::SwiftError)) + return false; + + // Check if target-independent constraints permit a tail call here. + // Target-dependent constraints are checked within TLI->LowerCallTo. + return isInTailCallPosition(CB, DAG.getTarget()); +} + void SelectionDAGBuilder::LowerCallTo(const CallBase &CB, SDValue Callee, bool isTailCall, bool isMustTailCall, const BasicBlock *EHPadBB, @@ -8915,21 +8973,8 @@ void SelectionDAGBuilder::LowerCallTo(const CallBase &CB, SDValue Callee, const Value *SwiftErrorVal = nullptr; const TargetLowering &TLI = DAG.getTargetLoweringInfo(); - if (isTailCall) { - // Avoid emitting tail calls in functions with the disable-tail-calls - // attribute. - auto *Caller = CB.getParent()->getParent(); - if (Caller->getFnAttribute("disable-tail-calls").getValueAsString() == - "true" && !isMustTailCall) - isTailCall = false; - - // We can't tail call inside a function with a swifterror argument. Lowering - // does not support this yet. It would have to move into the swifterror - // register before the call. - if (TLI.supportSwiftError() && - Caller->getAttributes().hasAttrSomewhere(Attribute::SwiftError)) - isTailCall = false; - } + if (isTailCall) + isTailCall = canTailCall(CB); for (auto I = CB.arg_begin(), E = CB.arg_end(); I != E; ++I) { const Value *V = *I; @@ -8969,11 +9014,6 @@ void SelectionDAGBuilder::LowerCallTo(const CallBase &CB, SDValue Callee, Args.push_back(Entry); } - // Check if target-independent constraints permit a tail call here. - // Target-dependent constraints are checked within TLI->LowerCallTo. - if (isTailCall && !isInTailCallPosition(CB, DAG.getTarget())) - isTailCall = false; - // Disable tail calls if there is an swifterror argument. Targets have not // been updated to support tail calls. if (TLI.supportSwiftError() && SwiftErrorVal) diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h index e0835e631035..c7577fa335fe 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h @@ -408,6 +408,10 @@ public: bool IsMustTailCall, const BasicBlock *EHPadBB = nullptr, const TargetLowering::PtrAuthInfo *PAI = nullptr); + // Check some of the target-independent constraints for tail calls. This does + // not iterate over the call arguments. + bool canTailCall(const CallBase &CB) const; + // Lower range metadata from 0 to N to assert zext to an integer of nearest // floor power of two. SDValue lowerRangeToAssertZExt(SelectionDAG &DAG, const Instruction &I, diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp index 900da7645504..4b2a00c2e2cf 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp @@ -587,6 +587,10 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { return "partial_reduce_smla"; case ISD::PARTIAL_REDUCE_SUMLA: return "partial_reduce_sumla"; + case ISD::LOOP_DEPENDENCE_WAR_MASK: + return "loop_dep_war"; + case ISD::LOOP_DEPENDENCE_RAW_MASK: + return "loop_dep_raw"; // Vector Predication #define BEGIN_REGISTER_VP_SDNODE(SDID, LEGALARG, NAME, ...) \ diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp index ece50ed95fc4..e61558c59bf0 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp @@ -1729,10 +1729,18 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) { // Setup an EH landing-pad block. FuncInfo->ExceptionPointerVirtReg = Register(); FuncInfo->ExceptionSelectorVirtReg = Register(); - if (LLVMBB->isEHPad()) + if (LLVMBB->isEHPad()) { if (!PrepareEHLandingPad()) continue; + if (!FastIS) { + SDValue NewRoot = TLI->lowerEHPadEntry(CurDAG->getRoot(), + SDB->getCurSDLoc(), *CurDAG); + if (NewRoot && NewRoot != CurDAG->getRoot()) + CurDAG->setRoot(NewRoot); + } + } + // Before doing SelectionDAG ISel, see if FastISel has been requested. if (FastIS) { if (LLVMBB != &Fn.getEntryBlock()) diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp index 402a012e8e55..fd6d20e146bb 100644 --- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -832,7 +832,7 @@ SDValue TargetLowering::SimplifyMultipleUseDemandedBits( case ISD::SHL: { // If we are only demanding sign bits then we can use the shift source // directly. - if (std::optional<uint64_t> MaxSA = + if (std::optional<unsigned> MaxSA = DAG.getValidMaximumShiftAmount(Op, DemandedElts, Depth + 1)) { SDValue Op0 = Op.getOperand(0); unsigned ShAmt = *MaxSA; @@ -847,7 +847,7 @@ SDValue TargetLowering::SimplifyMultipleUseDemandedBits( case ISD::SRL: { // If we are only demanding sign bits then we can use the shift source // directly. - if (std::optional<uint64_t> MaxSA = + if (std::optional<unsigned> MaxSA = DAG.getValidMaximumShiftAmount(Op, DemandedElts, Depth + 1)) { SDValue Op0 = Op.getOperand(0); unsigned ShAmt = *MaxSA; @@ -1780,7 +1780,7 @@ bool TargetLowering::SimplifyDemandedBits( SDValue Op1 = Op.getOperand(1); EVT ShiftVT = Op1.getValueType(); - if (std::optional<uint64_t> KnownSA = + if (std::optional<unsigned> KnownSA = TLO.DAG.getValidShiftAmount(Op, DemandedElts, Depth + 1)) { unsigned ShAmt = *KnownSA; if (ShAmt == 0) @@ -1792,7 +1792,7 @@ bool TargetLowering::SimplifyDemandedBits( // TODO - support non-uniform vector amounts. if (Op0.getOpcode() == ISD::SRL) { if (!DemandedBits.intersects(APInt::getLowBitsSet(BitWidth, ShAmt))) { - if (std::optional<uint64_t> InnerSA = + if (std::optional<unsigned> InnerSA = TLO.DAG.getValidShiftAmount(Op0, DemandedElts, Depth + 2)) { unsigned C1 = *InnerSA; unsigned Opc = ISD::SHL; @@ -1832,7 +1832,7 @@ bool TargetLowering::SimplifyDemandedBits( // TODO - support non-uniform vector amounts. if (InnerOp.getOpcode() == ISD::SRL && Op0.hasOneUse() && InnerOp.hasOneUse()) { - if (std::optional<uint64_t> SA2 = TLO.DAG.getValidShiftAmount( + if (std::optional<unsigned> SA2 = TLO.DAG.getValidShiftAmount( InnerOp, DemandedElts, Depth + 2)) { unsigned InnerShAmt = *SA2; if (InnerShAmt < ShAmt && InnerShAmt < InnerBits && @@ -1858,8 +1858,7 @@ bool TargetLowering::SimplifyDemandedBits( Op->dropFlags(SDNodeFlags::NoWrap); return true; } - Known.Zero <<= ShAmt; - Known.One <<= ShAmt; + Known <<= ShAmt; // low bits known zero. Known.Zero.setLowBits(ShAmt); @@ -1950,7 +1949,7 @@ bool TargetLowering::SimplifyDemandedBits( // If we are only demanding sign bits then we can use the shift source // directly. - if (std::optional<uint64_t> MaxSA = + if (std::optional<unsigned> MaxSA = TLO.DAG.getValidMaximumShiftAmount(Op, DemandedElts, Depth + 1)) { unsigned ShAmt = *MaxSA; unsigned NumSignBits = @@ -1966,7 +1965,7 @@ bool TargetLowering::SimplifyDemandedBits( SDValue Op1 = Op.getOperand(1); EVT ShiftVT = Op1.getValueType(); - if (std::optional<uint64_t> KnownSA = + if (std::optional<unsigned> KnownSA = TLO.DAG.getValidShiftAmount(Op, DemandedElts, Depth + 1)) { unsigned ShAmt = *KnownSA; if (ShAmt == 0) @@ -1978,7 +1977,7 @@ bool TargetLowering::SimplifyDemandedBits( // TODO - support non-uniform vector amounts. if (Op0.getOpcode() == ISD::SHL) { if (!DemandedBits.intersects(APInt::getHighBitsSet(BitWidth, ShAmt))) { - if (std::optional<uint64_t> InnerSA = + if (std::optional<unsigned> InnerSA = TLO.DAG.getValidShiftAmount(Op0, DemandedElts, Depth + 2)) { unsigned C1 = *InnerSA; unsigned Opc = ISD::SRL; @@ -1998,7 +1997,7 @@ bool TargetLowering::SimplifyDemandedBits( // single sra. We can do this if the top bits are never demanded. if (Op0.getOpcode() == ISD::SRA && Op0.hasOneUse()) { if (!DemandedBits.intersects(APInt::getHighBitsSet(BitWidth, ShAmt))) { - if (std::optional<uint64_t> InnerSA = + if (std::optional<unsigned> InnerSA = TLO.DAG.getValidShiftAmount(Op0, DemandedElts, Depth + 2)) { unsigned C1 = *InnerSA; // Clamp the combined shift amount if it exceeds the bit width. @@ -2042,8 +2041,7 @@ bool TargetLowering::SimplifyDemandedBits( if (SimplifyDemandedBits(Op0, InDemandedMask, DemandedElts, Known, TLO, Depth + 1)) return true; - Known.Zero.lshrInPlace(ShAmt); - Known.One.lshrInPlace(ShAmt); + Known >>= ShAmt; // High bits known zero. Known.Zero.setHighBits(ShAmt); @@ -2064,7 +2062,7 @@ bool TargetLowering::SimplifyDemandedBits( // If we are only demanding sign bits then we can use the shift source // directly. - if (std::optional<uint64_t> MaxSA = + if (std::optional<unsigned> MaxSA = TLO.DAG.getValidMaximumShiftAmount(Op, DemandedElts, Depth + 1)) { unsigned ShAmt = *MaxSA; // Must already be signbits in DemandedBits bounds, and can't demand any @@ -2103,7 +2101,7 @@ bool TargetLowering::SimplifyDemandedBits( if (DemandedBits.isOne()) return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, Op1)); - if (std::optional<uint64_t> KnownSA = + if (std::optional<unsigned> KnownSA = TLO.DAG.getValidShiftAmount(Op, DemandedElts, Depth + 1)) { unsigned ShAmt = *KnownSA; if (ShAmt == 0) @@ -2112,7 +2110,7 @@ bool TargetLowering::SimplifyDemandedBits( // fold (sra (shl x, c1), c1) -> sext_inreg for some c1 and target // supports sext_inreg. if (Op0.getOpcode() == ISD::SHL) { - if (std::optional<uint64_t> InnerSA = + if (std::optional<unsigned> InnerSA = TLO.DAG.getValidShiftAmount(Op0, DemandedElts, Depth + 2)) { unsigned LowBits = BitWidth - ShAmt; EVT ExtVT = EVT::getIntegerVT(*TLO.DAG.getContext(), LowBits); @@ -2153,8 +2151,7 @@ bool TargetLowering::SimplifyDemandedBits( if (SimplifyDemandedBits(Op0, InDemandedMask, DemandedElts, Known, TLO, Depth + 1)) return true; - Known.Zero.lshrInPlace(ShAmt); - Known.One.lshrInPlace(ShAmt); + Known >>= ShAmt; // If the input sign bit is known to be zero, or if none of the top bits // are demanded, turn this into an unsigned shift right. @@ -2225,10 +2222,8 @@ bool TargetLowering::SimplifyDemandedBits( Depth + 1)) return true; - Known2.One <<= (IsFSHL ? Amt : (BitWidth - Amt)); - Known2.Zero <<= (IsFSHL ? Amt : (BitWidth - Amt)); - Known.One.lshrInPlace(IsFSHL ? (BitWidth - Amt) : Amt); - Known.Zero.lshrInPlace(IsFSHL ? (BitWidth - Amt) : Amt); + Known2 <<= (IsFSHL ? Amt : (BitWidth - Amt)); + Known >>= (IsFSHL ? (BitWidth - Amt) : Amt); Known = Known.unionWith(Known2); // Attempt to avoid multi-use ops if we don't need anything from them. @@ -2363,8 +2358,7 @@ bool TargetLowering::SimplifyDemandedBits( if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedElts, Known2, TLO, Depth + 1)) return true; - Known.One = Known2.One.reverseBits(); - Known.Zero = Known2.Zero.reverseBits(); + Known = Known2.reverseBits(); break; } case ISD::BSWAP: { @@ -2397,8 +2391,7 @@ bool TargetLowering::SimplifyDemandedBits( if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedElts, Known2, TLO, Depth + 1)) return true; - Known.One = Known2.One.byteSwap(); - Known.Zero = Known2.Zero.byteSwap(); + Known = Known2.byteSwap(); break; } case ISD::CTPOP: { @@ -2664,11 +2657,11 @@ bool TargetLowering::SimplifyDemandedBits( break; } - std::optional<uint64_t> ShAmtC = + std::optional<unsigned> ShAmtC = TLO.DAG.getValidShiftAmount(Src, DemandedElts, Depth + 2); if (!ShAmtC || *ShAmtC >= BitWidth) break; - uint64_t ShVal = *ShAmtC; + unsigned ShVal = *ShAmtC; APInt HighBits = APInt::getHighBitsSet(OperandBitWidth, OperandBitWidth - BitWidth); @@ -3234,27 +3227,6 @@ bool TargetLowering::SimplifyDemandedVectorElts( KnownUndef.setAllBits(); return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT)); } - SDValue ScalarSrc = Op.getOperand(0); - if (ScalarSrc.getOpcode() == ISD::EXTRACT_VECTOR_ELT) { - SDValue Src = ScalarSrc.getOperand(0); - SDValue Idx = ScalarSrc.getOperand(1); - EVT SrcVT = Src.getValueType(); - - ElementCount SrcEltCnt = SrcVT.getVectorElementCount(); - - if (SrcEltCnt.isScalable()) - return false; - - unsigned NumSrcElts = SrcEltCnt.getFixedValue(); - if (isNullConstant(Idx)) { - APInt SrcDemandedElts = APInt::getOneBitSet(NumSrcElts, 0); - APInt SrcUndef = KnownUndef.zextOrTrunc(NumSrcElts); - APInt SrcZero = KnownZero.zextOrTrunc(NumSrcElts); - if (SimplifyDemandedVectorElts(Src, SrcDemandedElts, SrcUndef, SrcZero, - TLO, Depth + 1)) - return true; - } - } KnownUndef.setHighBits(NumElts - 1); break; } @@ -9740,8 +9712,8 @@ SDValue TargetLowering::expandABS(SDNode *N, SelectionDAG &DAG, SDValue TargetLowering::expandABD(SDNode *N, SelectionDAG &DAG) const { SDLoc dl(N); EVT VT = N->getValueType(0); - SDValue LHS = DAG.getFreeze(N->getOperand(0)); - SDValue RHS = DAG.getFreeze(N->getOperand(1)); + SDValue LHS = N->getOperand(0); + SDValue RHS = N->getOperand(1); bool IsSigned = N->getOpcode() == ISD::ABDS; // abds(lhs, rhs) -> sub(smax(lhs,rhs), smin(lhs,rhs)) @@ -9749,34 +9721,37 @@ SDValue TargetLowering::expandABD(SDNode *N, SelectionDAG &DAG) const { unsigned MaxOpc = IsSigned ? ISD::SMAX : ISD::UMAX; unsigned MinOpc = IsSigned ? ISD::SMIN : ISD::UMIN; if (isOperationLegal(MaxOpc, VT) && isOperationLegal(MinOpc, VT)) { + LHS = DAG.getFreeze(LHS); + RHS = DAG.getFreeze(RHS); SDValue Max = DAG.getNode(MaxOpc, dl, VT, LHS, RHS); SDValue Min = DAG.getNode(MinOpc, dl, VT, LHS, RHS); return DAG.getNode(ISD::SUB, dl, VT, Max, Min); } // abdu(lhs, rhs) -> or(usubsat(lhs,rhs), usubsat(rhs,lhs)) - if (!IsSigned && isOperationLegal(ISD::USUBSAT, VT)) + if (!IsSigned && isOperationLegal(ISD::USUBSAT, VT)) { + LHS = DAG.getFreeze(LHS); + RHS = DAG.getFreeze(RHS); return DAG.getNode(ISD::OR, dl, VT, DAG.getNode(ISD::USUBSAT, dl, VT, LHS, RHS), DAG.getNode(ISD::USUBSAT, dl, VT, RHS, LHS)); + } // If the subtract doesn't overflow then just use abs(sub()) - // NOTE: don't use frozen operands for value tracking. - bool IsNonNegative = DAG.SignBitIsZero(N->getOperand(1)) && - DAG.SignBitIsZero(N->getOperand(0)); + bool IsNonNegative = DAG.SignBitIsZero(LHS) && DAG.SignBitIsZero(RHS); - if (DAG.willNotOverflowSub(IsSigned || IsNonNegative, N->getOperand(0), - N->getOperand(1))) + if (DAG.willNotOverflowSub(IsSigned || IsNonNegative, LHS, RHS)) return DAG.getNode(ISD::ABS, dl, VT, DAG.getNode(ISD::SUB, dl, VT, LHS, RHS)); - if (DAG.willNotOverflowSub(IsSigned || IsNonNegative, N->getOperand(1), - N->getOperand(0))) + if (DAG.willNotOverflowSub(IsSigned || IsNonNegative, RHS, LHS)) return DAG.getNode(ISD::ABS, dl, VT, DAG.getNode(ISD::SUB, dl, VT, RHS, LHS)); EVT CCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT); ISD::CondCode CC = IsSigned ? ISD::CondCode::SETGT : ISD::CondCode::SETUGT; + LHS = DAG.getFreeze(LHS); + RHS = DAG.getFreeze(RHS); SDValue Cmp = DAG.getSetCC(dl, CCVT, LHS, RHS, CC); // Branchless expansion iff cmp result is allbits: |
