diff options
Diffstat (limited to 'llvm/lib/Target/X86/X86ISelLowering.cpp')
| -rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 230 |
1 files changed, 129 insertions, 101 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index efeddd7c9bd4..b5f8ee50cba3 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -28,7 +28,6 @@ #include "llvm/Analysis/BlockFrequencyInfo.h" #include "llvm/Analysis/ProfileSummaryInfo.h" #include "llvm/Analysis/VectorUtils.h" -#include "llvm/CodeGen/IntrinsicLowering.h" #include "llvm/CodeGen/LivePhysRegs.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" @@ -3625,6 +3624,16 @@ X86TargetLowering::getJumpConditionMergingParams(Instruction::BinaryOps Opc, match(Lhs, m_SpecificICmp(ICmpInst::ICMP_EQ, m_Value(), m_Value())) && match(Rhs, m_SpecificICmp(ICmpInst::ICMP_EQ, m_Value(), m_Value()))) BaseCost += 1; + + // For OR conditions with EQ comparisons, prefer splitting into branches + // (unless CCMP is available). OR+EQ cannot be optimized via bitwise ops, + // unlike OR+NE which becomes (P|Q)!=0. Similarly, don't split signed + // comparisons (SLT, SGT) that can be optimized. + if (BaseCost >= 0 && !Subtarget.hasCCMP() && Opc == Instruction::Or && + match(Lhs, m_SpecificICmp(ICmpInst::ICMP_EQ, m_Value(), m_Value())) && + match(Rhs, m_SpecificICmp(ICmpInst::ICMP_EQ, m_Value(), m_Value()))) + return {-1, -1, -1}; + return {BaseCost, BrMergingLikelyBias.getValue(), BrMergingUnlikelyBias.getValue()}; } @@ -3634,7 +3643,7 @@ bool X86TargetLowering::preferScalarizeSplat(SDNode *N) const { } bool X86TargetLowering::shouldFoldConstantShiftPairToMask( - const SDNode *N, CombineLevel Level) const { + const SDNode *N) const { assert(((N->getOpcode() == ISD::SHL && N->getOperand(0).getOpcode() == ISD::SRL) || (N->getOpcode() == ISD::SRL && @@ -3649,7 +3658,7 @@ bool X86TargetLowering::shouldFoldConstantShiftPairToMask( // the fold for non-splats yet. return N->getOperand(1) == N->getOperand(0).getOperand(1); } - return TargetLoweringBase::shouldFoldConstantShiftPairToMask(N, Level); + return TargetLoweringBase::shouldFoldConstantShiftPairToMask(N); } bool X86TargetLowering::shouldFoldMaskToVariableShiftPair(SDValue Y) const { @@ -3659,11 +3668,8 @@ bool X86TargetLowering::shouldFoldMaskToVariableShiftPair(SDValue Y) const { if (VT.isVector()) return false; - // 64-bit shifts on 32-bit targets produce really bad bloated code. - if (VT == MVT::i64 && !Subtarget.is64Bit()) - return false; - - return true; + unsigned MaxWidth = Subtarget.is64Bit() ? 64 : 32; + return VT.getScalarSizeInBits() <= MaxWidth; } TargetLowering::ShiftLegalizationStrategy @@ -3791,7 +3797,7 @@ static bool isUndefOrZeroOrInRange(ArrayRef<int> Mask, int Low, int Hi) { /// Return true if every element in Mask, is an in-place blend/select mask or is /// undef. -LLVM_ATTRIBUTE_UNUSED static bool isBlendOrUndef(ArrayRef<int> Mask) { +[[maybe_unused]] static bool isBlendOrUndef(ArrayRef<int> Mask) { unsigned NumElts = Mask.size(); for (auto [I, M] : enumerate(Mask)) if (!isUndefOrEqual(M, I) && !isUndefOrEqual(M, I + NumElts)) @@ -4456,8 +4462,8 @@ SDValue SplitOpsAndApply(SelectionDAG &DAG, const X86Subtarget &Subtarget, bool AllowAVX512 = true) { assert(Subtarget.hasSSE2() && "Target assumed to support at least SSE2"); unsigned NumSubs = 1; - if ((CheckBWI && Subtarget.useBWIRegs()) || - (!CheckBWI && AllowAVX512 && Subtarget.useAVX512Regs())) { + if (AllowAVX512 && ((CheckBWI && Subtarget.useBWIRegs()) || + (!CheckBWI && Subtarget.useAVX512Regs()))) { if (VT.getSizeInBits() > 512) { NumSubs = VT.getSizeInBits() / 512; assert((VT.getSizeInBits() % 512) == 0 && "Illegal vector size"); @@ -8100,7 +8106,7 @@ static SDValue LowerBUILD_VECTORvXi1(SDValue Op, const SDLoc &dl, return DstVec; } -LLVM_ATTRIBUTE_UNUSED static bool isHorizOp(unsigned Opcode) { +[[maybe_unused]] static bool isHorizOp(unsigned Opcode) { switch (Opcode) { case X86ISD::PACKSS: case X86ISD::PACKUS: @@ -13783,10 +13789,12 @@ static SDValue lowerV4I32Shuffle(const SDLoc &DL, ArrayRef<int> Mask, // so prevents folding a load into this instruction or making a copy. const int UnpackLoMask[] = {0, 0, 1, 1}; const int UnpackHiMask[] = {2, 2, 3, 3}; - if (isShuffleEquivalent(Mask, {0, 0, 1, 1}, V1, V2)) - Mask = UnpackLoMask; - else if (isShuffleEquivalent(Mask, {2, 2, 3, 3}, V1, V2)) - Mask = UnpackHiMask; + if (!isSingleElementRepeatedMask(Mask)) { + if (isShuffleEquivalent(Mask, {0, 0, 1, 1}, V1, V2)) + Mask = UnpackLoMask; + else if (isShuffleEquivalent(Mask, {2, 2, 3, 3}, V1, V2)) + Mask = UnpackHiMask; + } return DAG.getNode(X86ISD::PSHUFD, DL, MVT::v4i32, V1, getV4X86ShuffleImm8ForMask(Mask, DL, DAG)); @@ -20815,7 +20823,7 @@ SDValue X86TargetLowering::FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG, // for DAG type consistency we have to match the FP operand type. APFloat Thresh(APFloat::IEEEsingle(), APInt(32, 0x5f000000)); - LLVM_ATTRIBUTE_UNUSED APFloat::opStatus Status = APFloat::opOK; + [[maybe_unused]] APFloat::opStatus Status = APFloat::opOK; bool LosesInfo = false; if (TheVT == MVT::f64) // The rounding mode is irrelevant as the conversion should be exact. @@ -22858,7 +22866,7 @@ static SDValue combineVectorSizedSetCCEquality(EVT VT, SDValue X, SDValue Y, // be generated by the memcmp expansion pass with oversized integer compares // (see PR33325). bool IsOrXorXorTreeCCZero = isNullConstant(Y) && isOrXorXorTree(X); - if (isNullConstant(Y) && !IsOrXorXorTreeCCZero) + if (isNullConstant(Y) && OpSize == 128 && !IsOrXorXorTreeCCZero) return SDValue(); // Don't perform this combine if constructing the vector will be expensive. @@ -29747,65 +29755,30 @@ static SDValue LowervXi8MulWithUNPCK(SDValue A, SDValue B, const SDLoc &dl, const X86Subtarget &Subtarget, SelectionDAG &DAG, SDValue *Low = nullptr) { - unsigned NumElts = VT.getVectorNumElements(); - // For vXi8 we will unpack the low and high half of each 128 bit lane to widen // to a vXi16 type. Do the multiplies, shift the results and pack the half // lane results back together. // We'll take different approaches for signed and unsigned. - // For unsigned we'll use punpcklbw/punpckhbw to put zero extend the bytes - // and use pmullw to calculate the full 16-bit product. + // For unsigned we'll use punpcklbw/punpckhbw to zero extend the bytes to + // words and use pmullw to calculate the full 16-bit product. // For signed we'll use punpcklbw/punpckbw to extend the bytes to words and // shift them left into the upper byte of each word. This allows us to use // pmulhw to calculate the full 16-bit product. This trick means we don't // need to sign extend the bytes to use pmullw. - - MVT ExVT = MVT::getVectorVT(MVT::i16, NumElts / 2); + MVT ExVT = MVT::getVectorVT(MVT::i16, VT.getVectorNumElements() / 2); SDValue Zero = DAG.getConstant(0, dl, VT); - SDValue ALo, AHi; + SDValue ALo, AHi, BLo, BHi; if (IsSigned) { ALo = DAG.getBitcast(ExVT, getUnpackl(DAG, dl, VT, Zero, A)); - AHi = DAG.getBitcast(ExVT, getUnpackh(DAG, dl, VT, Zero, A)); - } else { - ALo = DAG.getBitcast(ExVT, getUnpackl(DAG, dl, VT, A, Zero)); - AHi = DAG.getBitcast(ExVT, getUnpackh(DAG, dl, VT, A, Zero)); - } - - SDValue BLo, BHi; - if (ISD::isBuildVectorOfConstantSDNodes(B.getNode())) { - // If the RHS is a constant, manually unpackl/unpackh and extend. - SmallVector<SDValue, 16> LoOps, HiOps; - for (unsigned i = 0; i != NumElts; i += 16) { - for (unsigned j = 0; j != 8; ++j) { - SDValue LoOp = B.getOperand(i + j); - SDValue HiOp = B.getOperand(i + j + 8); - - if (IsSigned) { - LoOp = DAG.getAnyExtOrTrunc(LoOp, dl, MVT::i16); - HiOp = DAG.getAnyExtOrTrunc(HiOp, dl, MVT::i16); - LoOp = DAG.getNode(ISD::SHL, dl, MVT::i16, LoOp, - DAG.getConstant(8, dl, MVT::i16)); - HiOp = DAG.getNode(ISD::SHL, dl, MVT::i16, HiOp, - DAG.getConstant(8, dl, MVT::i16)); - } else { - LoOp = DAG.getZExtOrTrunc(LoOp, dl, MVT::i16); - HiOp = DAG.getZExtOrTrunc(HiOp, dl, MVT::i16); - } - - LoOps.push_back(LoOp); - HiOps.push_back(HiOp); - } - } - - BLo = DAG.getBuildVector(ExVT, dl, LoOps); - BHi = DAG.getBuildVector(ExVT, dl, HiOps); - } else if (IsSigned) { BLo = DAG.getBitcast(ExVT, getUnpackl(DAG, dl, VT, Zero, B)); + AHi = DAG.getBitcast(ExVT, getUnpackh(DAG, dl, VT, Zero, A)); BHi = DAG.getBitcast(ExVT, getUnpackh(DAG, dl, VT, Zero, B)); } else { + ALo = DAG.getBitcast(ExVT, getUnpackl(DAG, dl, VT, A, Zero)); BLo = DAG.getBitcast(ExVT, getUnpackl(DAG, dl, VT, B, Zero)); + AHi = DAG.getBitcast(ExVT, getUnpackh(DAG, dl, VT, A, Zero)); BHi = DAG.getBitcast(ExVT, getUnpackh(DAG, dl, VT, B, Zero)); } @@ -29818,7 +29791,7 @@ static SDValue LowervXi8MulWithUNPCK(SDValue A, SDValue B, const SDLoc &dl, if (Low) *Low = getPack(DAG, Subtarget, dl, VT, RLo, RHi); - return getPack(DAG, Subtarget, dl, VT, RLo, RHi, /*PackHiHalf*/ true); + return getPack(DAG, Subtarget, dl, VT, RLo, RHi, /*PackHiHalf=*/true); } static SDValue LowerMULH(SDValue Op, const X86Subtarget &Subtarget, @@ -30313,22 +30286,8 @@ static SDValue LowerShiftByScalarImmediate(SDValue Op, SelectionDAG &DAG, uint64_t ShiftAmt = APIntShiftAmt.getZExtValue(); - if (supportedVectorShiftWithImm(VT, Subtarget, Op.getOpcode())) { - // Hardware support for vector shifts is sparse which makes us scalarize the - // vector operations in many cases. Also, on sandybridge ADD is faster than - // shl: (shl V, 1) -> (add (freeze V), (freeze V)) - if (Op.getOpcode() == ISD::SHL && ShiftAmt == 1) { - // R may be undef at run-time, but (shl R, 1) must be an even number (LSB - // must be 0). (add undef, undef) however can be any value. To make this - // safe, we must freeze R to ensure that register allocation uses the same - // register for an undefined value. This ensures that the result will - // still be even and preserves the original semantics. - R = DAG.getFreeze(R); - return DAG.getNode(ISD::ADD, dl, VT, R, R); - } - + if (supportedVectorShiftWithImm(VT, Subtarget, Op.getOpcode())) return getTargetVShiftByConstNode(X86Opc, dl, VT, R, ShiftAmt, DAG); - } // i64 SRA needs to be performed as partial shifts. if (((!Subtarget.hasXOP() && VT == MVT::v2i64) || @@ -31229,16 +31188,16 @@ static SDValue LowerFunnelShift(SDValue Op, const X86Subtarget &Subtarget, unsigned NumElts = VT.getVectorNumElements(); if (Subtarget.hasVBMI2() && EltSizeInBits > 8) { - if (IsFSHR) - std::swap(Op0, Op1); if (IsCstSplat) { + if (IsFSHR) + std::swap(Op0, Op1); uint64_t ShiftAmt = APIntShiftAmt.urem(EltSizeInBits); SDValue Imm = DAG.getTargetConstant(ShiftAmt, DL, MVT::i8); return getAVX512Node(IsFSHR ? X86ISD::VSHRD : X86ISD::VSHLD, DL, VT, {Op0, Op1, Imm}, DAG, Subtarget); } - return getAVX512Node(IsFSHR ? X86ISD::VSHRDV : X86ISD::VSHLDV, DL, VT, + return getAVX512Node(IsFSHR ? ISD::FSHR : ISD::FSHL, DL, VT, {Op0, Op1, Amt}, DAG, Subtarget); } assert((VT == MVT::v16i8 || VT == MVT::v32i8 || VT == MVT::v64i8 || @@ -35153,8 +35112,6 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { NODE_NAME_CASE(VALIGN) NODE_NAME_CASE(VSHLD) NODE_NAME_CASE(VSHRD) - NODE_NAME_CASE(VSHLDV) - NODE_NAME_CASE(VSHRDV) NODE_NAME_CASE(PSHUFD) NODE_NAME_CASE(PSHUFHW) NODE_NAME_CASE(PSHUFLW) @@ -41889,7 +41846,7 @@ static SDValue combineCommutableSHUFP(SDValue N, MVT VT, const SDLoc &DL, if (!X86::mayFoldLoad(peekThroughOneUseBitcasts(N0), Subtarget) || X86::mayFoldLoad(peekThroughOneUseBitcasts(N1), Subtarget)) return SDValue(); - Imm = ((Imm & 0x0F) << 4) | ((Imm & 0xF0) >> 4); + Imm = llvm::rotl<uint8_t>(Imm, 4); return DAG.getNode(X86ISD::SHUFP, DL, VT, N1, N0, DAG.getTargetConstant(Imm, DL, MVT::i8)); }; @@ -44631,8 +44588,11 @@ bool X86TargetLowering::SimplifyDemandedBitsForTargetNode( APInt DemandedMask = OriginalDemandedBits << ShAmt; - // If we just want the sign bit then we don't need to shift it. - if (OriginalDemandedBits.isSignMask()) + // If we only want bits that already match the signbit then we don't need + // to shift. + unsigned NumHiDemandedBits = BitWidth - OriginalDemandedBits.countr_zero(); + if (TLO.DAG.ComputeNumSignBits(Op0, OriginalDemandedElts, Depth + 1) >= + NumHiDemandedBits) return TLO.CombineTo(Op, Op0); // fold (VSRAI (VSHLI X, C1), C1) --> X iff NumSignBits(X) > C1 @@ -44853,10 +44813,16 @@ bool X86TargetLowering::SimplifyDemandedBitsForTargetNode( } case X86ISD::PCMPGT: // icmp sgt(0, R) == ashr(R, BitWidth-1). - // iff we only need the sign bit then we can use R directly. - if (OriginalDemandedBits.isSignMask() && - ISD::isBuildVectorAllZeros(Op.getOperand(0).getNode())) - return TLO.CombineTo(Op, Op.getOperand(1)); + if (ISD::isBuildVectorAllZeros(Op.getOperand(0).getNode())) { + // iff we only need the signbit then we can use R directly. + if (OriginalDemandedBits.isSignMask()) + return TLO.CombineTo(Op, Op.getOperand(1)); + // otherwise we just need R's signbit for the comparison. + APInt SignMask = APInt::getSignMask(BitWidth); + if (SimplifyDemandedBits(Op.getOperand(1), SignMask, OriginalDemandedElts, + Known, TLO, Depth + 1)) + return true; + } break; case X86ISD::MOVMSK: { SDValue Src = Op.getOperand(0); @@ -45185,6 +45151,19 @@ bool X86TargetLowering::isGuaranteedNotToBeUndefOrPoisonForTargetNode( case X86ISD::Wrapper: case X86ISD::WrapperRIP: return true; + case X86ISD::PACKSS: + case X86ISD::PACKUS: { + APInt DemandedLHS, DemandedRHS; + getPackDemandedElts(Op.getSimpleValueType(), DemandedElts, DemandedLHS, + DemandedRHS); + return (!DemandedLHS || + DAG.isGuaranteedNotToBeUndefOrPoison(Op.getOperand(0), DemandedLHS, + PoisonOnly, Depth + 1)) && + (!DemandedRHS || + DAG.isGuaranteedNotToBeUndefOrPoison(Op.getOperand(1), DemandedRHS, + PoisonOnly, Depth + 1)); + } + case X86ISD::INSERTPS: case X86ISD::BLENDI: case X86ISD::PSHUFB: case X86ISD::PSHUFD: @@ -45254,7 +45233,12 @@ bool X86TargetLowering::canCreateUndefOrPoisonForTargetNode( case X86ISD::BLENDI: case X86ISD::BLENDV: return false; + // SSE packs. + case X86ISD::PACKSS: + case X86ISD::PACKUS: + return false; // SSE target shuffles. + case X86ISD::INSERTPS: case X86ISD::PSHUFB: case X86ISD::PSHUFD: case X86ISD::UNPCKL: @@ -45452,7 +45436,8 @@ static SDValue combineBitcastvxi1(SelectionDAG &DAG, EVT VT, SDValue Src, const SDLoc &DL, const X86Subtarget &Subtarget) { EVT SrcVT = Src.getValueType(); - if (!SrcVT.isSimple() || SrcVT.getScalarType() != MVT::i1) + if (Subtarget.useSoftFloat() || !SrcVT.isSimple() || + SrcVT.getScalarType() != MVT::i1) return SDValue(); // Recognize the IR pattern for the movmsk intrinsic under SSE1 before type @@ -46211,7 +46196,7 @@ static SDValue createVPDPBUSD(SelectionDAG &DAG, SDValue LHS, SDValue RHS, SDValue Zero = DAG.getConstant(0, DL, DpVT); return SplitOpsAndApply(DAG, Subtarget, DL, DpVT, {Zero, DpOp0, DpOp1}, - DpBuilder, false); + DpBuilder, /*CheckBWI=*/false, Subtarget.hasVNNI()); } // Create a PSADBW given two sources representable as zexts of vXi8. @@ -47747,6 +47732,15 @@ static SDValue combineSelect(SDNode *N, SelectionDAG &DAG, DL, DAG, Subtarget)) return V; + // If the sign bit is known then BLENDV can be folded away. + if (N->getOpcode() == X86ISD::BLENDV) { + KnownBits KnownCond = DAG.computeKnownBits(Cond); + if (KnownCond.isNegative()) + return LHS; + if (KnownCond.isNonNegative()) + return RHS; + } + if (N->getOpcode() == ISD::VSELECT || N->getOpcode() == X86ISD::BLENDV) { SmallVector<int, 64> CondMask; if (createShuffleMaskFromVSELECT(CondMask, Cond, @@ -52383,16 +52377,41 @@ static SDValue combineAddOrSubToADCOrSBB(bool IsSub, const SDLoc &DL, EVT VT, // Do not flip "e > c", where "c" is a constant, because Cmp instruction // cannot take an immediate as its first operand. // - if (EFLAGS.getOpcode() == X86ISD::SUB && EFLAGS.getNode()->hasOneUse() && - EFLAGS.getValueType().isInteger() && - !isa<ConstantSDNode>(EFLAGS.getOperand(1))) { - SDValue NewSub = - DAG.getNode(X86ISD::SUB, SDLoc(EFLAGS), EFLAGS.getNode()->getVTList(), - EFLAGS.getOperand(1), EFLAGS.getOperand(0)); - SDValue NewEFLAGS = NewSub.getValue(EFLAGS.getResNo()); + // If EFLAGS is from a CMP that compares the same operands as the earlier + // SUB producing X (i.e. CMP X, Y), we can directly use the carry flag with + // SBB/ADC without creating a flipped SUB. + if (EFLAGS.getOpcode() == X86ISD::CMP && + EFLAGS.getValueType().isInteger() && X == EFLAGS.getOperand(0)) { return DAG.getNode(IsSub ? X86ISD::SBB : X86ISD::ADC, DL, DAG.getVTList(VT, MVT::i32), X, - DAG.getConstant(0, DL, VT), NewEFLAGS); + DAG.getConstant(0, DL, VT), EFLAGS); + } + + if (EFLAGS.getOpcode() == X86ISD::SUB && + EFLAGS.getValueType().isInteger() && + !isa<ConstantSDNode>(EFLAGS.getOperand(1))) { + // Only create NewSub if we know one of the folds will succeed to avoid + // introducing a temporary node that may persist and affect one-use checks + // below. + if (EFLAGS.getNode()->hasOneUse()) { + SDValue NewSub = DAG.getNode( + X86ISD::SUB, SDLoc(EFLAGS), EFLAGS.getNode()->getVTList(), + EFLAGS.getOperand(1), EFLAGS.getOperand(0)); + SDValue NewEFLAGS = NewSub.getValue(EFLAGS.getResNo()); + return DAG.getNode(IsSub ? X86ISD::SBB : X86ISD::ADC, DL, + DAG.getVTList(VT, MVT::i32), X, + DAG.getConstant(0, DL, VT), NewEFLAGS); + } + + if (IsSub && X == EFLAGS.getValue(0)) { + SDValue NewSub = DAG.getNode( + X86ISD::SUB, SDLoc(EFLAGS), EFLAGS.getNode()->getVTList(), + EFLAGS.getOperand(1), EFLAGS.getOperand(0)); + SDValue NewEFLAGS = NewSub.getValue(EFLAGS.getResNo()); + return DAG.getNode(X86ISD::SBB, DL, DAG.getVTList(VT, MVT::i32), + EFLAGS.getOperand(0), EFLAGS.getOperand(1), + NewEFLAGS); + } } } @@ -58104,6 +58123,14 @@ static SDValue combineAdd(SDNode *N, SelectionDAG &DAG, if (SDValue V = combineToHorizontalAddSub(N, DAG, Subtarget)) return V; + // Prefer VSHLI to reduce uses, X86FixupInstTunings may revert this depending + // on the scheduler model. Limit multiple users to AVX+ targets to prevent + // introducing extra register moves. + if (Op0 == Op1 && supportedVectorShiftWithImm(VT, Subtarget, ISD::SHL)) + if (Subtarget.hasAVX() || N->isOnlyUserOf(Op0.getNode())) + return getTargetVShiftByConstNode(X86ISD::VSHLI, DL, VT.getSimpleVT(), + Op0, 1, DAG); + // Canonicalize hidden LEA pattern: // Fold (add (sub (shl x, c), y), z) -> (sub (add (shl x, c), z), y) // iff c < 4 @@ -58295,11 +58322,12 @@ static SDValue combineX86CloadCstore(SDNode *N, SelectionDAG &DAG) { } else if (Op1.getOpcode() == ISD::AND && Sub.getValue(0).use_empty()) { SDValue Src = Op1; SDValue Op10 = Op1.getOperand(0); - if (Op10.getOpcode() == ISD::XOR && isAllOnesConstant(Op10.getOperand(1))) { - // res, flags2 = sub 0, (and (xor X, -1), Y) + if (Op10.getOpcode() == ISD::XOR && isAllOnesConstant(Op10.getOperand(1)) && + llvm::isOneConstant(Op1.getOperand(1))) { + // res, flags2 = sub 0, (and (xor X, -1), 1) // cload/cstore ..., cond_ne, flag2 // -> - // res, flags2 = sub 0, (and X, Y) + // res, flags2 = sub 0, (and X, 1) // cload/cstore ..., cond_e, flag2 Src = DAG.getNode(ISD::AND, DL, Op1.getValueType(), Op10.getOperand(0), Op1.getOperand(1)); |
