diff options
Diffstat (limited to 'llvm/lib/CodeGen')
24 files changed, 558 insertions, 424 deletions
diff --git a/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp b/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp index bc74daf983e4..8abeb56adeac 100644 --- a/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp @@ -1318,8 +1318,10 @@ void CodeViewDebug::collectVariableInfoFromMFTable( TFI->getFrameIndexReference(*Asm->MF, VI.getStackSlot(), FrameReg); uint16_t CVReg = TRI->getCodeViewRegNum(FrameReg); - assert(!FrameOffset.getScalable() && - "Frame offsets with a scalable component are not supported"); + if (FrameOffset.getScalable()) { + // No encoding currently exists for scalable offsets; bail out. + continue; + } // Calculate the label ranges. LocalVarDef DefRange = @@ -1410,6 +1412,11 @@ void CodeViewDebug::calculateRanges( if (Location->FragmentInfo->OffsetInBits % 8) continue; + if (TRI->isIgnoredCVReg(Location->Register)) { + // No encoding currently exists for this register; bail out. + continue; + } + LocalVarDef DR; DR.CVRegister = TRI->getCodeViewRegNum(Location->Register); DR.InMemory = !Location->LoadChain.empty(); diff --git a/llvm/lib/CodeGen/AsmPrinter/EHStreamer.cpp b/llvm/lib/CodeGen/AsmPrinter/EHStreamer.cpp index e8d1aba63afb..1ab521e38358 100644 --- a/llvm/lib/CodeGen/AsmPrinter/EHStreamer.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/EHStreamer.cpp @@ -23,7 +23,6 @@ #include "llvm/MC/MCContext.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSymbol.h" -#include "llvm/MC/MCTargetOptions.h" #include "llvm/Support/Casting.h" #include "llvm/Support/LEB128.h" #include "llvm/Target/TargetLoweringObjectFile.h" diff --git a/llvm/lib/CodeGen/CodeGen.cpp b/llvm/lib/CodeGen/CodeGen.cpp index b9b394909bb4..c3b4077b27dd 100644 --- a/llvm/lib/CodeGen/CodeGen.cpp +++ b/llvm/lib/CodeGen/CodeGen.cpp @@ -110,7 +110,7 @@ void llvm::initializeCodeGen(PassRegistry &Registry) { initializePostRAHazardRecognizerLegacyPass(Registry); initializePostRASchedulerLegacyPass(Registry); initializePreISelIntrinsicLoweringLegacyPassPass(Registry); - initializeProcessImplicitDefsPass(Registry); + initializeProcessImplicitDefsLegacyPass(Registry); initializeRABasicPass(Registry); initializeRAGreedyLegacyPass(Registry); initializeRegAllocFastPass(Registry); diff --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp index 9bbb89e37865..dc8184394f74 100644 --- a/llvm/lib/CodeGen/CodeGenPrepare.cpp +++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp @@ -445,7 +445,6 @@ private: bool optimizeSwitchInst(SwitchInst *SI); bool optimizeExtractElementInst(Instruction *Inst); bool dupRetToEnableTailCallOpts(BasicBlock *BB, ModifyDT &ModifiedDT); - bool fixupDbgValue(Instruction *I); bool fixupDbgVariableRecord(DbgVariableRecord &I); bool fixupDbgVariableRecordsOnInst(Instruction &I); bool placeDbgValues(Function &F); @@ -2762,9 +2761,6 @@ bool CodeGenPrepare::optimizeCallInst(CallInst *CI, ModifyDT &ModifiedDT) { case Intrinsic::fshl: case Intrinsic::fshr: return optimizeFunnelShift(II); - case Intrinsic::dbg_assign: - case Intrinsic::dbg_value: - return fixupDbgValue(II); case Intrinsic::masked_gather: return optimizeGatherScatterInst(II, II->getArgOperand(0)); case Intrinsic::masked_scatter: @@ -3015,7 +3011,7 @@ bool CodeGenPrepare::dupRetToEnableTailCallOpts(BasicBlock *BB, // %phi = phi ptr [ %0, %bb0 ], [ %2, %entry ] if (PredBB && PredBB->getSingleSuccessor() == BB) CI = dyn_cast_or_null<CallInst>( - PredBB->getTerminator()->getPrevNonDebugInstruction(true)); + PredBB->getTerminator()->getPrevNode()); if (CI && CI->use_empty() && isIntrinsicOrLFToBeTailCalled(TLInfo, CI) && @@ -3032,7 +3028,7 @@ bool CodeGenPrepare::dupRetToEnableTailCallOpts(BasicBlock *BB, for (BasicBlock *Pred : predecessors(BB)) { if (!VisitedBBs.insert(Pred).second) continue; - if (Instruction *I = Pred->rbegin()->getPrevNonDebugInstruction(true)) { + if (Instruction *I = Pred->rbegin()->getPrevNode()) { CallInst *CI = dyn_cast<CallInst>(I); if (CI && CI->use_empty() && TLI->mayBeEmittedAsTailCall(CI) && attributesPermitTailCall(F, CI, RetI, *TLI)) { @@ -3554,8 +3550,6 @@ class TypePromotionTransaction { /// Keep track of the original uses (pair Instruction, Index). SmallVector<InstructionAndIdx, 4> OriginalUses; /// Keep track of the debug users. - SmallVector<DbgValueInst *, 1> DbgValues; - /// And non-instruction debug-users too. SmallVector<DbgVariableRecord *, 1> DbgVariableRecords; /// Keep track of the new value so that we can undo it by replacing @@ -3577,7 +3571,9 @@ class TypePromotionTransaction { } // Record the debug uses separately. They are not in the instruction's // use list, but they are replaced by RAUW. + SmallVector<DbgValueInst *> DbgValues; findDbgValues(DbgValues, Inst, &DbgVariableRecords); + assert(DbgValues.empty()); // Now, we can replace the uses. Inst->replaceAllUsesWith(New); @@ -3591,11 +3587,7 @@ class TypePromotionTransaction { // RAUW has replaced all original uses with references to the new value, // including the debug uses. Since we are undoing the replacements, // the original debug uses must also be reinstated to maintain the - // correctness and utility of debug value instructions. - for (auto *DVI : DbgValues) - DVI->replaceVariableLocationOp(New, Inst); - // Similar story with DbgVariableRecords, the non-instruction - // representation of dbg.values. + // correctness and utility of debug value records. for (DbgVariableRecord *DVR : DbgVariableRecords) DVR->replaceVariableLocationOp(New, Inst); } @@ -7328,7 +7320,7 @@ bool CodeGenPrepare::optimizeLoadExt(LoadInst *Load) { !TLI->isLoadExtLegal(ISD::ZEXTLOAD, LoadResultVT, TruncVT)) return false; - IRBuilder<> Builder(Load->getNextNonDebugInstruction()); + IRBuilder<> Builder(Load->getNextNode()); auto *NewAnd = cast<Instruction>( Builder.CreateAnd(Load, ConstantInt::get(Ctx, DemandBits))); // Mark this instruction as "inserted by CGP", so that other @@ -8933,32 +8925,6 @@ bool CodeGenPrepare::optimizeBlock(BasicBlock &BB, ModifyDT &ModifiedDT) { return MadeChange; } -// Some CGP optimizations may move or alter what's computed in a block. Check -// whether a dbg.value intrinsic could be pointed at a more appropriate operand. -bool CodeGenPrepare::fixupDbgValue(Instruction *I) { - assert(isa<DbgValueInst>(I)); - DbgValueInst &DVI = *cast<DbgValueInst>(I); - - // Does this dbg.value refer to a sunk address calculation? - bool AnyChange = false; - SmallDenseSet<Value *> LocationOps(DVI.location_ops().begin(), - DVI.location_ops().end()); - for (Value *Location : LocationOps) { - WeakTrackingVH SunkAddrVH = SunkAddrs[Location]; - Value *SunkAddr = SunkAddrVH.pointsToAliveValue() ? SunkAddrVH : nullptr; - if (SunkAddr) { - // Point dbg.value at locally computed address, which should give the best - // opportunity to be accurately lowered. This update may change the type - // of pointer being referred to; however this makes no difference to - // debugging information, and we can't generate bitcasts that may affect - // codegen. - DVI.replaceVariableLocationOp(Location, SunkAddr); - AnyChange = true; - } - } - return AnyChange; -} - bool CodeGenPrepare::fixupDbgVariableRecordsOnInst(Instruction &I) { bool AnyChange = false; for (DbgVariableRecord &DVR : filterDbgVars(I.getDbgRecordRange())) @@ -8993,14 +8959,6 @@ bool CodeGenPrepare::fixupDbgVariableRecord(DbgVariableRecord &DVR) { return AnyChange; } -static void DbgInserterHelper(DbgValueInst *DVI, BasicBlock::iterator VI) { - DVI->removeFromParent(); - if (isa<PHINode>(VI)) - DVI->insertBefore(VI->getParent()->getFirstInsertionPt()); - else - DVI->insertAfter(VI); -} - static void DbgInserterHelper(DbgVariableRecord *DVR, BasicBlock::iterator VI) { DVR->removeFromParent(); BasicBlock *VIBB = VI->getParent(); @@ -9065,15 +9023,8 @@ bool CodeGenPrepare::placeDbgValues(Function &F) { for (BasicBlock &BB : F) { for (Instruction &Insn : llvm::make_early_inc_range(BB)) { - // Process dbg.value intrinsics. - DbgValueInst *DVI = dyn_cast<DbgValueInst>(&Insn); - if (DVI) { - DbgProcessor(DVI, DVI); - continue; - } - - // If this isn't a dbg.value, process any attached DbgVariableRecord - // records attached to this instruction. + // Process any DbgVariableRecord records attached to this + // instruction. for (DbgVariableRecord &DVR : llvm::make_early_inc_range( filterDbgVars(Insn.getDbgRecordRange()))) { if (DVR.Type != DbgVariableRecord::LocationType::Value) diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp index 3b11d0848d30..e8f513ad5a7a 100644 --- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp @@ -162,6 +162,11 @@ bool CombinerHelper::isLegalOrBeforeLegalizer( return isPreLegalize() || isLegal(Query); } +bool CombinerHelper::isLegalOrHasWidenScalar(const LegalityQuery &Query) const { + return isLegal(Query) || + LI->getAction(Query).Action == LegalizeActions::WidenScalar; +} + bool CombinerHelper::isConstantLegalOrBeforeLegalizer(const LLT Ty) const { if (!Ty.isVector()) return isLegalOrBeforeLegalizer({TargetOpcode::G_CONSTANT, {Ty}}); @@ -5295,7 +5300,7 @@ bool CombinerHelper::matchSubAddSameReg(MachineInstr &MI, return false; } -MachineInstr *CombinerHelper::buildUDivorURemUsingMul(MachineInstr &MI) const { +MachineInstr *CombinerHelper::buildUDivOrURemUsingMul(MachineInstr &MI) const { unsigned Opcode = MI.getOpcode(); assert(Opcode == TargetOpcode::G_UDIV || Opcode == TargetOpcode::G_UREM); auto &UDivorRem = cast<GenericMachineInstr>(MI); @@ -5463,7 +5468,7 @@ MachineInstr *CombinerHelper::buildUDivorURemUsingMul(MachineInstr &MI) const { return ret; } -bool CombinerHelper::matchUDivorURemByConst(MachineInstr &MI) const { +bool CombinerHelper::matchUDivOrURemByConst(MachineInstr &MI) const { unsigned Opcode = MI.getOpcode(); assert(Opcode == TargetOpcode::G_UDIV || Opcode == TargetOpcode::G_UREM); Register Dst = MI.getOperand(0).getReg(); @@ -5512,16 +5517,19 @@ bool CombinerHelper::matchUDivorURemByConst(MachineInstr &MI) const { MRI, RHS, [](const Constant *C) { return C && !C->isNullValue(); }); } -void CombinerHelper::applyUDivorURemByConst(MachineInstr &MI) const { - auto *NewMI = buildUDivorURemUsingMul(MI); +void CombinerHelper::applyUDivOrURemByConst(MachineInstr &MI) const { + auto *NewMI = buildUDivOrURemUsingMul(MI); replaceSingleDefInstWithReg(MI, NewMI->getOperand(0).getReg()); } -bool CombinerHelper::matchSDivByConst(MachineInstr &MI) const { - assert(MI.getOpcode() == TargetOpcode::G_SDIV && "Expected SDIV"); +bool CombinerHelper::matchSDivOrSRemByConst(MachineInstr &MI) const { + unsigned Opcode = MI.getOpcode(); + assert(Opcode == TargetOpcode::G_SDIV || Opcode == TargetOpcode::G_SREM); Register Dst = MI.getOperand(0).getReg(); Register RHS = MI.getOperand(2).getReg(); LLT DstTy = MRI.getType(Dst); + auto SizeInBits = DstTy.getScalarSizeInBits(); + LLT WideTy = DstTy.changeElementSize(SizeInBits * 2); auto &MF = *MI.getMF(); AttributeList Attr = MF.getFunction().getAttributes(); @@ -5536,43 +5544,63 @@ bool CombinerHelper::matchSDivByConst(MachineInstr &MI) const { return false; // If the sdiv has an 'exact' flag we can use a simpler lowering. - if (MI.getFlag(MachineInstr::MIFlag::IsExact)) { + if (Opcode == TargetOpcode::G_SDIV && + MI.getFlag(MachineInstr::MIFlag::IsExact)) { return matchUnaryPredicate( MRI, RHS, [](const Constant *C) { return C && !C->isNullValue(); }); } - // Don't support the general case for now. - return false; + auto *RHSDef = MRI.getVRegDef(RHS); + if (!isConstantOrConstantVector(*RHSDef, MRI)) + return false; + + // Don't do this if the types are not going to be legal. + if (LI) { + if (!isLegalOrBeforeLegalizer({TargetOpcode::G_MUL, {DstTy, DstTy}})) + return false; + if (!isLegal({TargetOpcode::G_SMULH, {DstTy}}) && + !isLegalOrHasWidenScalar({TargetOpcode::G_MUL, {WideTy, WideTy}})) + return false; + if (Opcode == TargetOpcode::G_SREM && + !isLegalOrBeforeLegalizer({TargetOpcode::G_SUB, {DstTy, DstTy}})) + return false; + } + + return matchUnaryPredicate( + MRI, RHS, [](const Constant *C) { return C && !C->isNullValue(); }); } -void CombinerHelper::applySDivByConst(MachineInstr &MI) const { - auto *NewMI = buildSDivUsingMul(MI); +void CombinerHelper::applySDivOrSRemByConst(MachineInstr &MI) const { + auto *NewMI = buildSDivOrSRemUsingMul(MI); replaceSingleDefInstWithReg(MI, NewMI->getOperand(0).getReg()); } -MachineInstr *CombinerHelper::buildSDivUsingMul(MachineInstr &MI) const { - assert(MI.getOpcode() == TargetOpcode::G_SDIV && "Expected SDIV"); - auto &SDiv = cast<GenericMachineInstr>(MI); - Register Dst = SDiv.getReg(0); - Register LHS = SDiv.getReg(1); - Register RHS = SDiv.getReg(2); +MachineInstr *CombinerHelper::buildSDivOrSRemUsingMul(MachineInstr &MI) const { + unsigned Opcode = MI.getOpcode(); + assert(MI.getOpcode() == TargetOpcode::G_SDIV || + Opcode == TargetOpcode::G_SREM); + auto &SDivorRem = cast<GenericMachineInstr>(MI); + Register Dst = SDivorRem.getReg(0); + Register LHS = SDivorRem.getReg(1); + Register RHS = SDivorRem.getReg(2); LLT Ty = MRI.getType(Dst); LLT ScalarTy = Ty.getScalarType(); + const unsigned EltBits = ScalarTy.getScalarSizeInBits(); LLT ShiftAmtTy = getTargetLowering().getPreferredShiftAmountTy(Ty); LLT ScalarShiftAmtTy = ShiftAmtTy.getScalarType(); auto &MIB = Builder; bool UseSRA = false; - SmallVector<Register, 16> Shifts, Factors; + SmallVector<Register, 16> ExactShifts, ExactFactors; - auto *RHSDef = cast<GenericMachineInstr>(getDefIgnoringCopies(RHS, MRI)); - bool IsSplat = getIConstantSplatVal(*RHSDef, MRI).has_value(); + auto *RHSDefInstr = cast<GenericMachineInstr>(getDefIgnoringCopies(RHS, MRI)); + bool IsSplat = getIConstantSplatVal(*RHSDefInstr, MRI).has_value(); - auto BuildSDIVPattern = [&](const Constant *C) { + auto BuildExactSDIVPattern = [&](const Constant *C) { // Don't recompute inverses for each splat element. - if (IsSplat && !Factors.empty()) { - Shifts.push_back(Shifts[0]); - Factors.push_back(Factors[0]); + if (IsSplat && !ExactFactors.empty()) { + ExactShifts.push_back(ExactShifts[0]); + ExactFactors.push_back(ExactFactors[0]); return true; } @@ -5587,31 +5615,110 @@ MachineInstr *CombinerHelper::buildSDivUsingMul(MachineInstr &MI) const { // Calculate the multiplicative inverse modulo BW. // 2^W requires W + 1 bits, so we have to extend and then truncate. APInt Factor = Divisor.multiplicativeInverse(); - Shifts.push_back(MIB.buildConstant(ScalarShiftAmtTy, Shift).getReg(0)); - Factors.push_back(MIB.buildConstant(ScalarTy, Factor).getReg(0)); + ExactShifts.push_back(MIB.buildConstant(ScalarShiftAmtTy, Shift).getReg(0)); + ExactFactors.push_back(MIB.buildConstant(ScalarTy, Factor).getReg(0)); + return true; + }; + + if (MI.getFlag(MachineInstr::MIFlag::IsExact)) { + // Collect all magic values from the build vector. + bool Matched = matchUnaryPredicate(MRI, RHS, BuildExactSDIVPattern); + (void)Matched; + assert(Matched && "Expected unary predicate match to succeed"); + + Register Shift, Factor; + if (Ty.isVector()) { + Shift = MIB.buildBuildVector(ShiftAmtTy, ExactShifts).getReg(0); + Factor = MIB.buildBuildVector(Ty, ExactFactors).getReg(0); + } else { + Shift = ExactShifts[0]; + Factor = ExactFactors[0]; + } + + Register Res = LHS; + + if (UseSRA) + Res = MIB.buildAShr(Ty, Res, Shift, MachineInstr::IsExact).getReg(0); + + return MIB.buildMul(Ty, Res, Factor); + } + + SmallVector<Register, 16> MagicFactors, Factors, Shifts, ShiftMasks; + + auto BuildSDIVPattern = [&](const Constant *C) { + auto *CI = cast<ConstantInt>(C); + const APInt &Divisor = CI->getValue(); + + SignedDivisionByConstantInfo Magics = + SignedDivisionByConstantInfo::get(Divisor); + int NumeratorFactor = 0; + int ShiftMask = -1; + + if (Divisor.isOne() || Divisor.isAllOnes()) { + // If d is +1/-1, we just multiply the numerator by +1/-1. + NumeratorFactor = Divisor.getSExtValue(); + Magics.Magic = 0; + Magics.ShiftAmount = 0; + ShiftMask = 0; + } else if (Divisor.isStrictlyPositive() && Magics.Magic.isNegative()) { + // If d > 0 and m < 0, add the numerator. + NumeratorFactor = 1; + } else if (Divisor.isNegative() && Magics.Magic.isStrictlyPositive()) { + // If d < 0 and m > 0, subtract the numerator. + NumeratorFactor = -1; + } + + MagicFactors.push_back(MIB.buildConstant(ScalarTy, Magics.Magic).getReg(0)); + Factors.push_back(MIB.buildConstant(ScalarTy, NumeratorFactor).getReg(0)); + Shifts.push_back( + MIB.buildConstant(ScalarShiftAmtTy, Magics.ShiftAmount).getReg(0)); + ShiftMasks.push_back(MIB.buildConstant(ScalarTy, ShiftMask).getReg(0)); + return true; }; - // Collect all magic values from the build vector. + // Collect the shifts/magic values from each element. bool Matched = matchUnaryPredicate(MRI, RHS, BuildSDIVPattern); (void)Matched; assert(Matched && "Expected unary predicate match to succeed"); - Register Shift, Factor; - if (Ty.isVector()) { - Shift = MIB.buildBuildVector(ShiftAmtTy, Shifts).getReg(0); + Register MagicFactor, Factor, Shift, ShiftMask; + auto *RHSDef = getOpcodeDef<GBuildVector>(RHS, MRI); + if (RHSDef) { + MagicFactor = MIB.buildBuildVector(Ty, MagicFactors).getReg(0); Factor = MIB.buildBuildVector(Ty, Factors).getReg(0); + Shift = MIB.buildBuildVector(ShiftAmtTy, Shifts).getReg(0); + ShiftMask = MIB.buildBuildVector(Ty, ShiftMasks).getReg(0); } else { - Shift = Shifts[0]; + assert(MRI.getType(RHS).isScalar() && + "Non-build_vector operation should have been a scalar"); + MagicFactor = MagicFactors[0]; Factor = Factors[0]; + Shift = Shifts[0]; + ShiftMask = ShiftMasks[0]; } - Register Res = LHS; + Register Q = LHS; + Q = MIB.buildSMulH(Ty, LHS, MagicFactor).getReg(0); + + // (Optionally) Add/subtract the numerator using Factor. + Factor = MIB.buildMul(Ty, LHS, Factor).getReg(0); + Q = MIB.buildAdd(Ty, Q, Factor).getReg(0); - if (UseSRA) - Res = MIB.buildAShr(Ty, Res, Shift, MachineInstr::IsExact).getReg(0); + // Shift right algebraic by shift value. + Q = MIB.buildAShr(Ty, Q, Shift).getReg(0); - return MIB.buildMul(Ty, Res, Factor); + // Extract the sign bit, mask it and add it to the quotient. + auto SignShift = MIB.buildConstant(ShiftAmtTy, EltBits - 1); + auto T = MIB.buildLShr(Ty, Q, SignShift); + T = MIB.buildAnd(Ty, T, ShiftMask); + auto ret = MIB.buildAdd(Ty, Q, T); + + if (Opcode == TargetOpcode::G_SREM) { + auto Prod = MIB.buildMul(Ty, ret, RHS); + return MIB.buildSub(Ty, LHS, Prod); + } + return ret; } bool CombinerHelper::matchDivByPow2(MachineInstr &MI, bool IsSigned) const { diff --git a/llvm/lib/CodeGen/GlobalMergeFunctions.cpp b/llvm/lib/CodeGen/GlobalMergeFunctions.cpp index 92ecfadf97c9..73f11c1345da 100644 --- a/llvm/lib/CodeGen/GlobalMergeFunctions.cpp +++ b/llvm/lib/CodeGen/GlobalMergeFunctions.cpp @@ -95,6 +95,10 @@ bool isEligibleFunction(Function *F) { if (F->getCallingConv() == CallingConv::SwiftTail) return false; + // Unnamed functions are skipped for simplicity. + if (!F->hasName()) + return false; + // If function contains callsites with musttail, if we merge // it, the merged function will have the musttail callsite, but // the number of parameters can change, thus the parameter count diff --git a/llvm/lib/CodeGen/InterleavedAccessPass.cpp b/llvm/lib/CodeGen/InterleavedAccessPass.cpp index 7343eed0372e..d2b2edf2ebc8 100644 --- a/llvm/lib/CodeGen/InterleavedAccessPass.cpp +++ b/llvm/lib/CodeGen/InterleavedAccessPass.cpp @@ -48,6 +48,7 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/Analysis/VectorUtils.h" #include "llvm/CodeGen/InterleavedAccess.h" #include "llvm/CodeGen/TargetLowering.h" #include "llvm/CodeGen/TargetPassConfig.h" @@ -366,34 +367,23 @@ bool InterleavedAccessImpl::lowerInterleavedLoad( bool BinOpShuffleChanged = replaceBinOpShuffles(BinOpShuffles.getArrayRef(), Shuffles, Load); + Value *Mask = nullptr; if (auto *VPLoad = dyn_cast<VPIntrinsic>(Load)) { - Value *LaneMask = - getMask(VPLoad->getMaskParam(), Factor, cast<VectorType>(VecTy)); - if (!LaneMask) + Mask = getMask(VPLoad->getMaskParam(), Factor, cast<VectorType>(VecTy)); + if (!Mask) return false; - LLVM_DEBUG(dbgs() << "IA: Found an interleaved vp.load: " << *Load << "\n"); - - // Sometimes the number of Shuffles might be less than Factor, we have to - // fill the gaps with null. Also, lowerInterleavedVPLoad - // expects them to be sorted. - SmallVector<Value *, 4> ShuffleValues(Factor, nullptr); - for (auto [Idx, ShuffleMaskIdx] : enumerate(Indices)) - ShuffleValues[ShuffleMaskIdx] = Shuffles[Idx]; - if (!TLI->lowerInterleavedVPLoad(VPLoad, LaneMask, ShuffleValues)) - // If Extracts is not empty, tryReplaceExtracts made changes earlier. - return !Extracts.empty() || BinOpShuffleChanged; } else { LLVM_DEBUG(dbgs() << "IA: Found an interleaved load: " << *Load << "\n"); - - // Try to create target specific intrinsics to replace the load and - // shuffles. - if (!TLI->lowerInterleavedLoad(cast<LoadInst>(Load), Shuffles, Indices, - Factor)) - // If Extracts is not empty, tryReplaceExtracts made changes earlier. - return !Extracts.empty() || BinOpShuffleChanged; } + // Try to create target specific intrinsics to replace the load and + // shuffles. + if (!TLI->lowerInterleavedLoad(cast<Instruction>(Load), Mask, Shuffles, + Indices, Factor)) + // If Extracts is not empty, tryReplaceExtracts made changes earlier. + return !Extracts.empty() || BinOpShuffleChanged; + DeadInsts.insert_range(Shuffles); DeadInsts.insert(Load); @@ -574,69 +564,11 @@ bool InterleavedAccessImpl::lowerInterleavedStore( return true; } -static bool isInterleaveIntrinsic(Intrinsic::ID IID) { - switch (IID) { - case Intrinsic::vector_interleave2: - case Intrinsic::vector_interleave3: - case Intrinsic::vector_interleave4: - case Intrinsic::vector_interleave5: - case Intrinsic::vector_interleave6: - case Intrinsic::vector_interleave7: - case Intrinsic::vector_interleave8: - return true; - default: - return false; - } -} - -static bool isDeinterleaveIntrinsic(Intrinsic::ID IID) { - switch (IID) { - case Intrinsic::vector_deinterleave2: - case Intrinsic::vector_deinterleave3: - case Intrinsic::vector_deinterleave4: - case Intrinsic::vector_deinterleave5: - case Intrinsic::vector_deinterleave6: - case Intrinsic::vector_deinterleave7: - case Intrinsic::vector_deinterleave8: - return true; - default: - return false; - } -} - -static unsigned getIntrinsicFactor(const IntrinsicInst *II) { - switch (II->getIntrinsicID()) { - case Intrinsic::vector_deinterleave2: - case Intrinsic::vector_interleave2: - return 2; - case Intrinsic::vector_deinterleave3: - case Intrinsic::vector_interleave3: - return 3; - case Intrinsic::vector_deinterleave4: - case Intrinsic::vector_interleave4: - return 4; - case Intrinsic::vector_deinterleave5: - case Intrinsic::vector_interleave5: - return 5; - case Intrinsic::vector_deinterleave6: - case Intrinsic::vector_interleave6: - return 6; - case Intrinsic::vector_deinterleave7: - case Intrinsic::vector_interleave7: - return 7; - case Intrinsic::vector_deinterleave8: - case Intrinsic::vector_interleave8: - return 8; - default: - llvm_unreachable("Unexpected intrinsic"); - } -} - static Value *getMask(Value *WideMask, unsigned Factor, ElementCount LeafValueEC) { if (auto *IMI = dyn_cast<IntrinsicInst>(WideMask)) { - if (isInterleaveIntrinsic(IMI->getIntrinsicID()) && - getIntrinsicFactor(IMI) == Factor && llvm::all_equal(IMI->args())) { + if (unsigned F = getInterleaveIntrinsicFactor(IMI->getIntrinsicID()); + F && F == Factor && llvm::all_equal(IMI->args())) { return IMI->getArgOperand(0); } } @@ -672,41 +604,21 @@ bool InterleavedAccessImpl::lowerDeinterleaveIntrinsic( if (!LoadedVal->hasOneUse() || !isa<LoadInst, VPIntrinsic>(LoadedVal)) return false; - const unsigned Factor = getIntrinsicFactor(DI); - SmallVector<Value *, 8> DeinterleaveValues(Factor, nullptr); - Value *LastFactor = nullptr; - for (auto *User : DI->users()) { - auto *Extract = dyn_cast<ExtractValueInst>(User); - if (!Extract || Extract->getNumIndices() != 1) - return false; - unsigned Idx = Extract->getIndices()[0]; - if (DeinterleaveValues[Idx]) - return false; - DeinterleaveValues[Idx] = Extract; - LastFactor = Extract; - } - - if (!LastFactor) - return false; + const unsigned Factor = getDeinterleaveIntrinsicFactor(DI->getIntrinsicID()); + assert(Factor && "unexpected deinterleave intrinsic"); + Value *Mask = nullptr; if (auto *VPLoad = dyn_cast<VPIntrinsic>(LoadedVal)) { if (VPLoad->getIntrinsicID() != Intrinsic::vp_load) return false; // Check mask operand. Handle both all-true/false and interleaved mask. Value *WideMask = VPLoad->getOperand(1); - Value *Mask = - getMask(WideMask, Factor, cast<VectorType>(LastFactor->getType())); + Mask = getMask(WideMask, Factor, getDeinterleavedVectorType(DI)); if (!Mask) return false; LLVM_DEBUG(dbgs() << "IA: Found a vp.load with deinterleave intrinsic " << *DI << " and factor = " << Factor << "\n"); - - // Since lowerInterleaveLoad expects Shuffles and LoadInst, use special - // TLI function to emit target-specific interleaved instruction. - if (!TLI->lowerInterleavedVPLoad(VPLoad, Mask, DeinterleaveValues)) - return false; - } else { auto *LI = cast<LoadInst>(LoadedVal); if (!LI->isSimple()) @@ -714,15 +626,13 @@ bool InterleavedAccessImpl::lowerDeinterleaveIntrinsic( LLVM_DEBUG(dbgs() << "IA: Found a load with deinterleave intrinsic " << *DI << " and factor = " << Factor << "\n"); - - // Try and match this with target specific intrinsics. - if (!TLI->lowerDeinterleaveIntrinsicToLoad(LI, DeinterleaveValues)) - return false; } - for (Value *V : DeinterleaveValues) - if (V) - DeadInsts.insert(cast<Instruction>(V)); + // Try and match this with target specific intrinsics. + if (!TLI->lowerDeinterleaveIntrinsicToLoad(cast<Instruction>(LoadedVal), Mask, + DI)) + return false; + DeadInsts.insert(DI); // We now have a target-specific load, so delete the old one. DeadInsts.insert(cast<Instruction>(LoadedVal)); @@ -738,25 +648,22 @@ bool InterleavedAccessImpl::lowerInterleaveIntrinsic( return false; SmallVector<Value *, 8> InterleaveValues(II->args()); - const unsigned Factor = getIntrinsicFactor(II); + const unsigned Factor = getInterleaveIntrinsicFactor(II->getIntrinsicID()); + assert(Factor && "unexpected interleave intrinsic"); + Value *Mask = nullptr; if (auto *VPStore = dyn_cast<VPIntrinsic>(StoredBy)) { if (VPStore->getIntrinsicID() != Intrinsic::vp_store) return false; Value *WideMask = VPStore->getOperand(2); - Value *Mask = getMask(WideMask, Factor, - cast<VectorType>(InterleaveValues[0]->getType())); + Mask = getMask(WideMask, Factor, + cast<VectorType>(InterleaveValues[0]->getType())); if (!Mask) return false; LLVM_DEBUG(dbgs() << "IA: Found a vp.store with interleave intrinsic " << *II << " and factor = " << Factor << "\n"); - - // Since lowerInterleavedStore expects Shuffle and StoreInst, use special - // TLI function to emit target-specific interleaved instruction. - if (!TLI->lowerInterleavedVPStore(VPStore, Mask, InterleaveValues)) - return false; } else { auto *SI = cast<StoreInst>(StoredBy); if (!SI->isSimple()) @@ -764,12 +671,13 @@ bool InterleavedAccessImpl::lowerInterleaveIntrinsic( LLVM_DEBUG(dbgs() << "IA: Found a store with interleave intrinsic " << *II << " and factor = " << Factor << "\n"); - - // Try and match this with target specific intrinsics. - if (!TLI->lowerInterleaveIntrinsicToStore(SI, InterleaveValues)) - return false; } + // Try and match this with target specific intrinsics. + if (!TLI->lowerInterleaveIntrinsicToStore(cast<Instruction>(StoredBy), Mask, + InterleaveValues)) + return false; + // We now have a target-specific store, so delete the old one. DeadInsts.insert(cast<Instruction>(StoredBy)); DeadInsts.insert(II); @@ -792,9 +700,9 @@ bool InterleavedAccessImpl::runOnFunction(Function &F) { Changed |= lowerInterleavedStore(&I, DeadInsts); if (auto *II = dyn_cast<IntrinsicInst>(&I)) { - if (isDeinterleaveIntrinsic(II->getIntrinsicID())) + if (getDeinterleaveIntrinsicFactor(II->getIntrinsicID())) Changed |= lowerDeinterleaveIntrinsic(II, DeadInsts); - else if (isInterleaveIntrinsic(II->getIntrinsicID())) + else if (getInterleaveIntrinsicFactor(II->getIntrinsicID())) Changed |= lowerInterleaveIntrinsic(II, DeadInsts); } } diff --git a/llvm/lib/CodeGen/MachineBlockFrequencyInfo.cpp b/llvm/lib/CodeGen/MachineBlockFrequencyInfo.cpp index 9daacfd39978..e7fa0824fd98 100644 --- a/llvm/lib/CodeGen/MachineBlockFrequencyInfo.cpp +++ b/llvm/lib/CodeGen/MachineBlockFrequencyInfo.cpp @@ -202,8 +202,8 @@ MachineBlockFrequencyInfo::MachineBlockFrequencyInfo( MachineBlockFrequencyInfo &&) = default; MachineBlockFrequencyInfo::MachineBlockFrequencyInfo( - MachineFunction &F, MachineBranchProbabilityInfo &MBPI, - MachineLoopInfo &MLI) { + const MachineFunction &F, const MachineBranchProbabilityInfo &MBPI, + const MachineLoopInfo &MLI) { calculate(F, MBPI, MLI); } diff --git a/llvm/lib/CodeGen/MachineDebugify.cpp b/llvm/lib/CodeGen/MachineDebugify.cpp index 9b9cebc74054..1a20fe586e95 100644 --- a/llvm/lib/CodeGen/MachineDebugify.cpp +++ b/llvm/lib/CodeGen/MachineDebugify.cpp @@ -63,24 +63,9 @@ bool applyDebugifyMetadataToMachineFunction(MachineModuleInfo &MMI, // which cover a wide range of lines can help stress the debug info passes: // if we can't do that, fall back to using the local variable which precedes // all the others. - Function *DbgValF = M.getFunction("llvm.dbg.value"); - DbgValueInst *EarliestDVI = nullptr; DbgVariableRecord *EarliestDVR = nullptr; DenseMap<unsigned, DILocalVariable *> Line2Var; DIExpression *Expr = nullptr; - if (DbgValF) { - for (const Use &U : DbgValF->uses()) { - auto *DVI = dyn_cast<DbgValueInst>(U.getUser()); - if (!DVI || DVI->getFunction() != &F) - continue; - unsigned Line = DVI->getDebugLoc().getLine(); - assert(Line != 0 && "debugify should not insert line 0 locations"); - Line2Var[Line] = DVI->getVariable(); - if (!EarliestDVI || Line < EarliestDVI->getDebugLoc().getLine()) - EarliestDVI = DVI; - Expr = DVI->getExpression(); - } - } for (BasicBlock &BB : F) { for (Instruction &I : BB) { for (DbgVariableRecord &DVR : filterDbgVars(I.getDbgRecordRange())) { @@ -125,8 +110,7 @@ bool applyDebugifyMetadataToMachineFunction(MachineModuleInfo &MMI, unsigned Line = MI.getDebugLoc().getLine(); auto It = Line2Var.find(Line); if (It == Line2Var.end()) { - Line = EarliestDVI ? EarliestDVI->getDebugLoc().getLine() - : EarliestDVR->getDebugLoc().getLine(); + Line = EarliestDVR->getDebugLoc().getLine(); It = Line2Var.find(Line); assert(It != Line2Var.end()); } diff --git a/llvm/lib/CodeGen/MachineFunctionAnalysis.cpp b/llvm/lib/CodeGen/MachineFunctionAnalysis.cpp index e7a4d6d61e21..116a919585d7 100644 --- a/llvm/lib/CodeGen/MachineFunctionAnalysis.cpp +++ b/llvm/lib/CodeGen/MachineFunctionAnalysis.cpp @@ -45,3 +45,9 @@ MachineFunctionAnalysis::run(Function &F, FunctionAnalysisManager &FAM) { return Result(std::move(MF)); } + +PreservedAnalyses FreeMachineFunctionPass::run(Function &F, + FunctionAnalysisManager &FAM) { + FAM.clearAnalysis<MachineFunctionAnalysis>(F); + return PreservedAnalyses::all(); +} diff --git a/llvm/lib/CodeGen/MachineSink.cpp b/llvm/lib/CodeGen/MachineSink.cpp index a0e067a6323b..9ec5151a039b 100644 --- a/llvm/lib/CodeGen/MachineSink.cpp +++ b/llvm/lib/CodeGen/MachineSink.cpp @@ -2102,8 +2102,7 @@ public: } MachineFunctionProperties getRequiredProperties() const override { - return MachineFunctionProperties().set( - MachineFunctionProperties::Property::NoVRegs); + return MachineFunctionProperties().setNoVRegs(); } }; diff --git a/llvm/lib/CodeGen/ProcessImplicitDefs.cpp b/llvm/lib/CodeGen/ProcessImplicitDefs.cpp index 6c84cc2b6432..37a9e6203af7 100644 --- a/llvm/lib/CodeGen/ProcessImplicitDefs.cpp +++ b/llvm/lib/CodeGen/ProcessImplicitDefs.cpp @@ -6,6 +6,7 @@ // //===----------------------------------------------------------------------===// +#include "llvm/CodeGen/ProcessImplicitDefs.h" #include "llvm/ADT/SetVector.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/CodeGen/MachineFunctionPass.h" @@ -26,24 +27,15 @@ using namespace llvm; namespace { /// Process IMPLICIT_DEF instructions and make sure there is one implicit_def /// for each use. Add isUndef marker to implicit_def defs and their uses. -class ProcessImplicitDefs : public MachineFunctionPass { - const TargetInstrInfo *TII = nullptr; - const TargetRegisterInfo *TRI = nullptr; - MachineRegisterInfo *MRI = nullptr; - - SmallSetVector<MachineInstr*, 16> WorkList; - - void processImplicitDef(MachineInstr *MI); - bool canTurnIntoImplicitDef(MachineInstr *MI); - +class ProcessImplicitDefsLegacy : public MachineFunctionPass { public: static char ID; - ProcessImplicitDefs() : MachineFunctionPass(ID) { - initializeProcessImplicitDefsPass(*PassRegistry::getPassRegistry()); + ProcessImplicitDefsLegacy() : MachineFunctionPass(ID) { + initializeProcessImplicitDefsLegacyPass(*PassRegistry::getPassRegistry()); } - void getAnalysisUsage(AnalysisUsage &au) const override; + void getAnalysisUsage(AnalysisUsage &AU) const override; bool runOnMachineFunction(MachineFunction &MF) override; @@ -51,15 +43,29 @@ public: return MachineFunctionProperties().setIsSSA(); } }; + +class ProcessImplicitDefs { + const TargetInstrInfo *TII = nullptr; + const TargetRegisterInfo *TRI = nullptr; + MachineRegisterInfo *MRI = nullptr; + + SmallSetVector<MachineInstr *, 16> WorkList; + + void processImplicitDef(MachineInstr *MI); + bool canTurnIntoImplicitDef(MachineInstr *MI); + +public: + bool run(MachineFunction &MF); +}; } // end anonymous namespace -char ProcessImplicitDefs::ID = 0; -char &llvm::ProcessImplicitDefsID = ProcessImplicitDefs::ID; +char ProcessImplicitDefsLegacy::ID = 0; +char &llvm::ProcessImplicitDefsID = ProcessImplicitDefsLegacy::ID; -INITIALIZE_PASS(ProcessImplicitDefs, DEBUG_TYPE, +INITIALIZE_PASS(ProcessImplicitDefsLegacy, DEBUG_TYPE, "Process Implicit Definitions", false, false) -void ProcessImplicitDefs::getAnalysisUsage(AnalysisUsage &AU) const { +void ProcessImplicitDefsLegacy::getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesCFG(); AU.addPreserved<AAResultsWrapperPass>(); MachineFunctionPass::getAnalysisUsage(AU); @@ -132,9 +138,24 @@ void ProcessImplicitDefs::processImplicitDef(MachineInstr *MI) { LLVM_DEBUG(dbgs() << "Keeping physreg: " << *MI); } +bool ProcessImplicitDefsLegacy::runOnMachineFunction(MachineFunction &MF) { + return ProcessImplicitDefs().run(MF); +} + +PreservedAnalyses +ProcessImplicitDefsPass::run(MachineFunction &MF, + MachineFunctionAnalysisManager &MFAM) { + if (!ProcessImplicitDefs().run(MF)) + return PreservedAnalyses::all(); + + return getMachineFunctionPassPreservedAnalyses() + .preserveSet<CFGAnalyses>() + .preserve<AAManager>(); +} + /// processImplicitDefs - Process IMPLICIT_DEF instructions and turn them into /// <undef> operands. -bool ProcessImplicitDefs::runOnMachineFunction(MachineFunction &MF) { +bool ProcessImplicitDefs::run(MachineFunction &MF) { LLVM_DEBUG(dbgs() << "********** PROCESS IMPLICIT DEFS **********\n" << "********** Function: " << MF.getName() << '\n'); diff --git a/llvm/lib/CodeGen/RegAllocEvictionAdvisor.cpp b/llvm/lib/CodeGen/RegAllocEvictionAdvisor.cpp index 30523611977f..9b23a6aac629 100644 --- a/llvm/lib/CodeGen/RegAllocEvictionAdvisor.cpp +++ b/llvm/lib/CodeGen/RegAllocEvictionAdvisor.cpp @@ -20,7 +20,6 @@ #include "llvm/CodeGen/RegisterClassInfo.h" #include "llvm/CodeGen/VirtRegMap.h" #include "llvm/IR/Module.h" -#include "llvm/InitializePasses.h" #include "llvm/Pass.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/ErrorHandling.h" diff --git a/llvm/lib/CodeGen/SafeStack.cpp b/llvm/lib/CodeGen/SafeStack.cpp index da229f86f24c..996207034d07 100644 --- a/llvm/lib/CodeGen/SafeStack.cpp +++ b/llvm/lib/CodeGen/SafeStack.cpp @@ -475,8 +475,16 @@ void SafeStack::checkStackGuard(IRBuilder<> &IRB, Function &F, Instruction &RI, SplitBlockAndInsertIfThen(Cmp, &RI, /* Unreachable */ true, Weights, DTU); IRBuilder<> IRBFail(CheckTerm); // FIXME: respect -fsanitize-trap / -ftrap-function here? + const char *StackChkFailName = + TL.getLibcallName(RTLIB::STACKPROTECTOR_CHECK_FAIL); + if (!StackChkFailName) { + F.getContext().emitError( + "no libcall available for stackprotector check fail"); + return; + } + FunctionCallee StackChkFail = - F.getParent()->getOrInsertFunction("__stack_chk_fail", IRB.getVoidTy()); + F.getParent()->getOrInsertFunction(StackChkFailName, IRB.getVoidTy()); IRBFail.CreateCall(StackChkFail, {}); } @@ -791,8 +799,16 @@ bool SafeStack::run() { IRB.SetCurrentDebugLocation( DILocation::get(SP->getContext(), SP->getScopeLine(), 0, SP)); if (SafeStackUsePointerAddress) { + const char *SafestackPointerAddressName = + TL.getLibcallName(RTLIB::SAFESTACK_POINTER_ADDRESS); + if (!SafestackPointerAddressName) { + F.getContext().emitError( + "no libcall available for safestack pointer address"); + return false; + } + FunctionCallee Fn = F.getParent()->getOrInsertFunction( - "__safestack_pointer_address", IRB.getPtrTy(0)); + SafestackPointerAddressName, IRB.getPtrTy(0)); UnsafeStackPtr = IRB.CreateCall(Fn); } else { UnsafeStackPtr = TL.getSafeStackPointerLocation(IRB); diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 231184587d68..fed5e7238433 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -609,6 +609,8 @@ namespace { SDValue foldABSToABD(SDNode *N, const SDLoc &DL); SDValue foldSelectToABD(SDValue LHS, SDValue RHS, SDValue True, SDValue False, ISD::CondCode CC, const SDLoc &DL); + SDValue foldSelectToUMin(SDValue LHS, SDValue RHS, SDValue True, + SDValue False, ISD::CondCode CC, const SDLoc &DL); SDValue unfoldMaskedMerge(SDNode *N); SDValue unfoldExtremeBitClearingToShifts(SDNode *N); SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond, @@ -859,7 +861,7 @@ namespace { auto LK = TLI.getTypeConversion(*DAG.getContext(), VT); return (LK.first == TargetLoweringBase::TypeLegal || LK.first == TargetLoweringBase::TypePromoteInteger) && - TLI.isOperationLegal(ISD::UMIN, LK.second); + TLI.isOperationLegalOrCustom(ISD::UMIN, LK.second); } public: @@ -2606,9 +2608,7 @@ SDValue DAGCombiner::foldBinOpIntoSelect(SDNode *BO) { return SDValue(); } - SDValue SelectOp = DAG.getSelect(DL, VT, Sel.getOperand(0), NewCT, NewCF); - SelectOp->setFlags(BO->getFlags()); - return SelectOp; + return DAG.getSelect(DL, VT, Sel.getOperand(0), NewCT, NewCF, BO->getFlags()); } static SDValue foldAddSubBoolOfMaskedVal(SDNode *N, const SDLoc &DL, @@ -4095,6 +4095,26 @@ SDValue DAGCombiner::visitSUB(SDNode *N) { return N0; } + // (sub x, ([v]select (ult x, y), 0, y)) -> (umin x, (sub x, y)) + // (sub x, ([v]select (uge x, y), y, 0)) -> (umin x, (sub x, y)) + if (N1.hasOneUse() && hasUMin(VT)) { + SDValue Y; + if (sd_match(N1, m_Select(m_SetCC(m_Specific(N0), m_Value(Y), + m_SpecificCondCode(ISD::SETULT)), + m_Zero(), m_Deferred(Y))) || + sd_match(N1, m_Select(m_SetCC(m_Specific(N0), m_Value(Y), + m_SpecificCondCode(ISD::SETUGE)), + m_Deferred(Y), m_Zero())) || + sd_match(N1, m_VSelect(m_SetCC(m_Specific(N0), m_Value(Y), + m_SpecificCondCode(ISD::SETULT)), + m_Zero(), m_Deferred(Y))) || + sd_match(N1, m_VSelect(m_SetCC(m_Specific(N0), m_Value(Y), + m_SpecificCondCode(ISD::SETUGE)), + m_Deferred(Y), m_Zero()))) + return DAG.getNode(ISD::UMIN, DL, VT, N0, + DAG.getNode(ISD::SUB, DL, VT, N0, Y)); + } + if (SDValue NewSel = foldBinOpIntoSelect(N)) return NewSel; @@ -4444,20 +4464,6 @@ SDValue DAGCombiner::visitSUB(SDNode *N) { sd_match(N1, m_UMaxLike(m_Specific(A), m_Specific(B)))) return DAG.getNegative(DAG.getNode(ISD::ABDU, DL, VT, A, B), DL, VT); - // (sub x, (select (ult x, y), 0, y)) -> (umin x, (sub x, y)) - // (sub x, (select (uge x, y), y, 0)) -> (umin x, (sub x, y)) - if (hasUMin(VT)) { - SDValue Y; - if (sd_match(N1, m_OneUse(m_Select(m_SetCC(m_Specific(N0), m_Value(Y), - m_SpecificCondCode(ISD::SETULT)), - m_Zero(), m_Deferred(Y)))) || - sd_match(N1, m_OneUse(m_Select(m_SetCC(m_Specific(N0), m_Value(Y), - m_SpecificCondCode(ISD::SETUGE)), - m_Deferred(Y), m_Zero())))) - return DAG.getNode(ISD::UMIN, DL, VT, N0, - DAG.getNode(ISD::SUB, DL, VT, N0, Y)); - } - return SDValue(); } @@ -7635,7 +7641,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) { if (SDValue(GN0, 0).hasOneUse() && isConstantSplatVectorMaskForType(N1.getNode(), ScalarVT) && - TLI.isVectorLoadExtDesirable(SDValue(SDValue(GN0, 0)))) { + TLI.isVectorLoadExtDesirable(SDValue(N, 0))) { SDValue Ops[] = {GN0->getChain(), GN0->getPassThru(), GN0->getMask(), GN0->getBasePtr(), GN0->getIndex(), GN0->getScale()}; @@ -9149,7 +9155,7 @@ calculateByteProvider(SDValue Op, unsigned Index, unsigned Depth, if (Op.getOpcode() != ISD::LOAD && VectorIndex.has_value()) return std::nullopt; - unsigned BitWidth = Op.getValueSizeInBits(); + unsigned BitWidth = Op.getScalarValueSizeInBits(); if (BitWidth % 8 != 0) return std::nullopt; unsigned ByteWidth = BitWidth / 8; @@ -9248,7 +9254,7 @@ calculateByteProvider(SDValue Op, unsigned Index, unsigned Depth, if (!L->isSimple() || L->isIndexed()) return std::nullopt; - unsigned NarrowBitWidth = L->getMemoryVT().getSizeInBits(); + unsigned NarrowBitWidth = L->getMemoryVT().getScalarSizeInBits(); if (NarrowBitWidth % 8 != 0) return std::nullopt; uint64_t NarrowByteWidth = NarrowBitWidth / 8; @@ -12175,6 +12181,30 @@ SDValue DAGCombiner::foldSelectToABD(SDValue LHS, SDValue RHS, SDValue True, return SDValue(); } +// ([v]select (ugt x, C), (add x, ~C), x) -> (umin (add x, ~C), x) +// ([v]select (ult x, C), x, (add x, -C)) -> (umin x, (add x, -C)) +SDValue DAGCombiner::foldSelectToUMin(SDValue LHS, SDValue RHS, SDValue True, + SDValue False, ISD::CondCode CC, + const SDLoc &DL) { + APInt C; + EVT VT = True.getValueType(); + if (sd_match(RHS, m_ConstInt(C)) && hasUMin(VT)) { + if (CC == ISD::SETUGT && LHS == False && + sd_match(True, m_Add(m_Specific(False), m_SpecificInt(~C)))) { + SDValue AddC = DAG.getConstant(~C, DL, VT); + SDValue Add = DAG.getNode(ISD::ADD, DL, VT, False, AddC); + return DAG.getNode(ISD::UMIN, DL, VT, Add, False); + } + if (CC == ISD::SETULT && LHS == True && + sd_match(False, m_Add(m_Specific(True), m_SpecificInt(-C)))) { + SDValue AddC = DAG.getConstant(-C, DL, VT); + SDValue Add = DAG.getNode(ISD::ADD, DL, VT, True, AddC); + return DAG.getNode(ISD::UMIN, DL, VT, True, Add); + } + } + return SDValue(); +} + SDValue DAGCombiner::visitSELECT(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); @@ -12191,11 +12221,8 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) { return V; // select (not Cond), N1, N2 -> select Cond, N2, N1 - if (SDValue F = extractBooleanFlip(N0, DAG, TLI, false)) { - SDValue SelectOp = DAG.getSelect(DL, VT, F, N2, N1); - SelectOp->setFlags(Flags); - return SelectOp; - } + if (SDValue F = extractBooleanFlip(N0, DAG, TLI, false)) + return DAG.getSelect(DL, VT, F, N2, N1, Flags); if (SDValue V = foldSelectOfConstants(N)) return V; @@ -12363,24 +12390,8 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) { // (select (ugt x, C), (add x, ~C), x) -> (umin (add x, ~C), x) // (select (ult x, C), x, (add x, -C)) -> (umin x, (add x, -C)) - APInt C; - if (sd_match(Cond1, m_ConstInt(C)) && hasUMin(VT)) { - if (CC == ISD::SETUGT && Cond0 == N2 && - sd_match(N1, m_Add(m_Specific(N2), m_SpecificInt(~C)))) { - // The resulting code relies on an unsigned wrap in ADD. - // Recreating ADD to drop possible nuw/nsw flags. - SDValue AddC = DAG.getConstant(~C, DL, VT); - SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N2, AddC); - return DAG.getNode(ISD::UMIN, DL, VT, Add, N2); - } - if (CC == ISD::SETULT && Cond0 == N1 && - sd_match(N2, m_Add(m_Specific(N1), m_SpecificInt(-C)))) { - // Ditto. - SDValue AddC = DAG.getConstant(-C, DL, VT); - SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N1, AddC); - return DAG.getNode(ISD::UMIN, DL, VT, N1, Add); - } - } + if (SDValue UMin = foldSelectToUMin(Cond0, Cond1, N1, N2, CC, DL)) + return UMin; } if (!VT.isVector()) @@ -13417,6 +13428,11 @@ SDValue DAGCombiner::visitVSELECT(SDNode *N) { } } } + + // (vselect (ugt x, C), (add x, ~C), x) -> (umin (add x, ~C), x) + // (vselect (ult x, C), x, (add x, -C)) -> (umin x, (add x, -C)) + if (SDValue UMin = foldSelectToUMin(LHS, RHS, N1, N2, CC, DL)) + return UMin; } if (SimplifySelectOps(N, N1, N2)) @@ -13490,11 +13506,9 @@ SDValue DAGCombiner::visitSELECT_CC(SDNode *N) { // Fold to a simpler select_cc if (SCC.getOpcode() == ISD::SETCC) { - SDValue SelectOp = - DAG.getNode(ISD::SELECT_CC, DL, N2.getValueType(), SCC.getOperand(0), - SCC.getOperand(1), N2, N3, SCC.getOperand(2)); - SelectOp->setFlags(SCC->getFlags()); - return SelectOp; + return DAG.getNode(ISD::SELECT_CC, DL, N2.getValueType(), + SCC.getOperand(0), SCC.getOperand(1), N2, N3, + SCC.getOperand(2), SCC->getFlags()); } } @@ -15731,7 +15745,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) { // fold (sext_inreg (masked_gather x)) -> (sext_masked_gather x) if (auto *GN0 = dyn_cast<MaskedGatherSDNode>(N0)) { if (SDValue(GN0, 0).hasOneUse() && ExtVT == GN0->getMemoryVT() && - TLI.isVectorLoadExtDesirable(SDValue(SDValue(GN0, 0)))) { + TLI.isVectorLoadExtDesirable(SDValue(N, 0))) { SDValue Ops[] = {GN0->getChain(), GN0->getPassThru(), GN0->getMask(), GN0->getBasePtr(), GN0->getIndex(), GN0->getScale()}; @@ -16758,12 +16772,8 @@ SDValue DAGCombiner::visitFREEZE(SDNode *N) { if (DAG.isGuaranteedNotToBeUndefOrPoison(Op, /*PoisonOnly*/ false, /*Depth*/ 1)) continue; - bool HadMaybePoisonOperands = !MaybePoisonOperands.empty(); - bool IsNewMaybePoisonOperand = MaybePoisonOperands.insert(Op).second; - if (IsNewMaybePoisonOperand) + if (MaybePoisonOperands.insert(Op).second) MaybePoisonOperandNumbers.push_back(OpNo); - if (!HadMaybePoisonOperands) - continue; } // NOTE: the whole op may be not guaranteed to not be undef or poison because // it could create undef or poison due to it's poison-generating flags. @@ -18713,6 +18723,12 @@ SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) { if (SDValue FTrunc = foldFPToIntToFP(N, DL, DAG, TLI)) return FTrunc; + // fold (sint_to_fp (trunc nsw x)) -> (sint_to_fp x) + if (N0.getOpcode() == ISD::TRUNCATE && N0->getFlags().hasNoSignedWrap() && + TLI.isTypeDesirableForOp(ISD::SINT_TO_FP, + N0.getOperand(0).getValueType())) + return DAG.getNode(ISD::SINT_TO_FP, DL, VT, N0.getOperand(0)); + return SDValue(); } @@ -18750,6 +18766,12 @@ SDValue DAGCombiner::visitUINT_TO_FP(SDNode *N) { if (SDValue FTrunc = foldFPToIntToFP(N, DL, DAG, TLI)) return FTrunc; + // fold (uint_to_fp (trunc nuw x)) -> (uint_to_fp x) + if (N0.getOpcode() == ISD::TRUNCATE && N0->getFlags().hasNoUnsignedWrap() && + TLI.isTypeDesirableForOp(ISD::UINT_TO_FP, + N0.getOperand(0).getValueType())) + return DAG.getNode(ISD::UINT_TO_FP, DL, VT, N0.getOperand(0)); + return SDValue(); } @@ -28194,14 +28216,16 @@ SDValue DAGCombiner::SimplifyVCastOp(SDNode *N, const SDLoc &DL) { TLI.preferScalarizeSplat(N)) { EVT SrcVT = N0.getValueType(); EVT SrcEltVT = SrcVT.getVectorElementType(); - SDValue IndexC = DAG.getVectorIdxConstant(Index0, DL); - SDValue Elt = - DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, SrcEltVT, Src0, IndexC); - SDValue ScalarBO = DAG.getNode(Opcode, DL, EltVT, Elt, N->getFlags()); - if (VT.isScalableVector()) - return DAG.getSplatVector(VT, DL, ScalarBO); - SmallVector<SDValue, 8> Ops(VT.getVectorNumElements(), ScalarBO); - return DAG.getBuildVector(VT, DL, Ops); + if (!LegalTypes || TLI.isTypeLegal(SrcEltVT)) { + SDValue IndexC = DAG.getVectorIdxConstant(Index0, DL); + SDValue Elt = + DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, SrcEltVT, Src0, IndexC); + SDValue ScalarBO = DAG.getNode(Opcode, DL, EltVT, Elt, N->getFlags()); + if (VT.isScalableVector()) + return DAG.getSplatVector(VT, DL, ScalarBO); + SmallVector<SDValue, 8> Ops(VT.getVectorNumElements(), ScalarBO); + return DAG.getBuildVector(VT, DL, Ops); + } } return SDValue(); @@ -28343,10 +28367,8 @@ SDValue DAGCombiner::SimplifySelect(const SDLoc &DL, SDValue N0, SDValue N1, SCC.getOperand(0), SCC.getOperand(1), SCC.getOperand(4), Flags); AddToWorklist(SETCC.getNode()); - SDValue SelectNode = DAG.getSelect(SDLoc(SCC), SCC.getValueType(), SETCC, - SCC.getOperand(2), SCC.getOperand(3)); - SelectNode->setFlags(Flags); - return SelectNode; + return DAG.getSelect(SDLoc(SCC), SCC.getValueType(), SETCC, + SCC.getOperand(2), SCC.getOperand(3), Flags); } return SCC; @@ -28647,9 +28669,9 @@ SDValue DAGCombiner::foldSelectOfBinops(SDNode *N) { SDValue N10 = N1.getOperand(0); SDValue N20 = N2.getOperand(0); SDValue NewSel = DAG.getSelect(DL, N10.getValueType(), N0, N10, N20); - SDValue NewBinOp = DAG.getNode(BinOpc, DL, OpVTs, NewSel, N1.getOperand(1)); - NewBinOp->setFlags(N1->getFlags()); - NewBinOp->intersectFlagsWith(N2->getFlags()); + SDNodeFlags Flags = N1->getFlags() & N2->getFlags(); + SDValue NewBinOp = + DAG.getNode(BinOpc, DL, OpVTs, {NewSel, N1.getOperand(1)}, Flags); return SDValue(NewBinOp.getNode(), N1.getResNo()); } @@ -28661,10 +28683,9 @@ SDValue DAGCombiner::foldSelectOfBinops(SDNode *N) { // Second op VT might be different (e.g. shift amount type) if (N11.getValueType() == N21.getValueType()) { SDValue NewSel = DAG.getSelect(DL, N11.getValueType(), N0, N11, N21); + SDNodeFlags Flags = N1->getFlags() & N2->getFlags(); SDValue NewBinOp = - DAG.getNode(BinOpc, DL, OpVTs, N1.getOperand(0), NewSel); - NewBinOp->setFlags(N1->getFlags()); - NewBinOp->intersectFlagsWith(N2->getFlags()); + DAG.getNode(BinOpc, DL, OpVTs, {N1.getOperand(0), NewSel}, Flags); return SDValue(NewBinOp.getNode(), N1.getResNo()); } } diff --git a/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp index 03d3e8eab35d..85efb1bd8aed 100644 --- a/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp @@ -784,7 +784,7 @@ MachineInstr * InstrEmitter::EmitDbgInstrRef(SDDbgValue *SD, VRBaseMapType &VRBaseMap) { MDNode *Var = SD->getVariable(); - const DIExpression *Expr = (DIExpression *)SD->getExpression(); + const DIExpression *Expr = SD->getExpression(); DebugLoc DL = SD->getDebugLoc(); const MCInstrDesc &RefII = TII->get(TargetOpcode::DBG_INSTR_REF); diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index 528136a55f14..7266940c94bf 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -140,12 +140,19 @@ private: RTLIB::Libcall Call_F128, RTLIB::Libcall Call_PPCF128, SmallVectorImpl<SDValue> &Results); - SDValue ExpandIntLibCall(SDNode *Node, bool isSigned, - RTLIB::Libcall Call_I8, - RTLIB::Libcall Call_I16, - RTLIB::Libcall Call_I32, - RTLIB::Libcall Call_I64, - RTLIB::Libcall Call_I128); + + void + ExpandFastFPLibCall(SDNode *Node, bool IsFast, + std::pair<RTLIB::Libcall, RTLIB::Libcall> Call_F32, + std::pair<RTLIB::Libcall, RTLIB::Libcall> Call_F64, + std::pair<RTLIB::Libcall, RTLIB::Libcall> Call_F80, + std::pair<RTLIB::Libcall, RTLIB::Libcall> Call_F128, + std::pair<RTLIB::Libcall, RTLIB::Libcall> Call_PPCF128, + SmallVectorImpl<SDValue> &Results); + + SDValue ExpandIntLibCall(SDNode *Node, bool isSigned, RTLIB::Libcall Call_I8, + RTLIB::Libcall Call_I16, RTLIB::Libcall Call_I32, + RTLIB::Libcall Call_I64, RTLIB::Libcall Call_I128); void ExpandArgFPLibCall(SDNode *Node, RTLIB::Libcall Call_F32, RTLIB::Libcall Call_F64, RTLIB::Libcall Call_F80, RTLIB::Libcall Call_F128, @@ -2228,6 +2235,37 @@ void SelectionDAGLegalize::ExpandFPLibCall(SDNode* Node, ExpandFPLibCall(Node, LC, Results); } +void SelectionDAGLegalize::ExpandFastFPLibCall( + SDNode *Node, bool IsFast, + std::pair<RTLIB::Libcall, RTLIB::Libcall> Call_F32, + std::pair<RTLIB::Libcall, RTLIB::Libcall> Call_F64, + std::pair<RTLIB::Libcall, RTLIB::Libcall> Call_F80, + std::pair<RTLIB::Libcall, RTLIB::Libcall> Call_F128, + std::pair<RTLIB::Libcall, RTLIB::Libcall> Call_PPCF128, + SmallVectorImpl<SDValue> &Results) { + + EVT VT = Node->getSimpleValueType(0); + + RTLIB::Libcall LC; + + // FIXME: Probably should define fast to respect nan/inf and only be + // approximate functions. + + if (IsFast) { + LC = RTLIB::getFPLibCall(VT, Call_F32.first, Call_F64.first, Call_F80.first, + Call_F128.first, Call_PPCF128.first); + } + + if (!IsFast || TLI.getLibcallImpl(LC) == RTLIB::Unsupported) { + // Fall back if we don't have a fast implementation. + LC = RTLIB::getFPLibCall(VT, Call_F32.second, Call_F64.second, + Call_F80.second, Call_F128.second, + Call_PPCF128.second); + } + + ExpandFPLibCall(Node, LC, Results); +} + SDValue SelectionDAGLegalize::ExpandIntLibCall(SDNode* Node, bool isSigned, RTLIB::Libcall Call_I8, RTLIB::Libcall Call_I16, @@ -4514,6 +4552,18 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { return true; } +/// Return if we can use the FAST_* variant of a math libcall for the node. +/// FIXME: This is just guessing, we probably should have unique specific sets +/// flags required per libcall. +static bool canUseFastMathLibcall(const SDNode *Node) { + // FIXME: Probably should define fast to respect nan/inf and only be + // approximate functions. + + SDNodeFlags Flags = Node->getFlags(); + return Flags.hasApproximateFuncs() && Flags.hasNoNaNs() && + Flags.hasNoInfs() && Flags.hasNoSignedZeros(); +} + void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) { LLVM_DEBUG(dbgs() << "Trying to convert node to libcall\n"); SmallVector<SDValue, 8> Results; @@ -4634,11 +4684,18 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) { RTLIB::FMAXIMUM_NUM_PPCF128, Results); break; case ISD::FSQRT: - case ISD::STRICT_FSQRT: - ExpandFPLibCall(Node, RTLIB::SQRT_F32, RTLIB::SQRT_F64, - RTLIB::SQRT_F80, RTLIB::SQRT_F128, - RTLIB::SQRT_PPCF128, Results); + case ISD::STRICT_FSQRT: { + // FIXME: Probably should define fast to respect nan/inf and only be + // approximate functions. + ExpandFastFPLibCall(Node, canUseFastMathLibcall(Node), + {RTLIB::FAST_SQRT_F32, RTLIB::SQRT_F32}, + {RTLIB::FAST_SQRT_F64, RTLIB::SQRT_F64}, + {RTLIB::FAST_SQRT_F80, RTLIB::SQRT_F80}, + {RTLIB::FAST_SQRT_F128, RTLIB::SQRT_F128}, + {RTLIB::FAST_SQRT_PPCF128, RTLIB::SQRT_PPCF128}, + Results); break; + } case ISD::FCBRT: ExpandFPLibCall(Node, RTLIB::CBRT_F32, RTLIB::CBRT_F64, RTLIB::CBRT_F80, RTLIB::CBRT_F128, @@ -4875,11 +4932,15 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) { RTLIB::LLRINT_PPCF128, Results); break; case ISD::FDIV: - case ISD::STRICT_FDIV: - ExpandFPLibCall(Node, RTLIB::DIV_F32, RTLIB::DIV_F64, - RTLIB::DIV_F80, RTLIB::DIV_F128, - RTLIB::DIV_PPCF128, Results); + case ISD::STRICT_FDIV: { + ExpandFastFPLibCall(Node, canUseFastMathLibcall(Node), + {RTLIB::FAST_DIV_F32, RTLIB::DIV_F32}, + {RTLIB::FAST_DIV_F64, RTLIB::DIV_F64}, + {RTLIB::FAST_DIV_F80, RTLIB::DIV_F80}, + {RTLIB::FAST_DIV_F128, RTLIB::DIV_F128}, + {RTLIB::FAST_DIV_PPCF128, RTLIB::DIV_PPCF128}, Results); break; + } case ISD::FREM: case ISD::STRICT_FREM: ExpandFPLibCall(Node, RTLIB::REM_F32, RTLIB::REM_F64, @@ -4893,17 +4954,25 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) { RTLIB::FMA_PPCF128, Results); break; case ISD::FADD: - case ISD::STRICT_FADD: - ExpandFPLibCall(Node, RTLIB::ADD_F32, RTLIB::ADD_F64, - RTLIB::ADD_F80, RTLIB::ADD_F128, - RTLIB::ADD_PPCF128, Results); + case ISD::STRICT_FADD: { + ExpandFastFPLibCall(Node, canUseFastMathLibcall(Node), + {RTLIB::FAST_ADD_F32, RTLIB::ADD_F32}, + {RTLIB::FAST_ADD_F64, RTLIB::ADD_F64}, + {RTLIB::FAST_ADD_F80, RTLIB::ADD_F80}, + {RTLIB::FAST_ADD_F128, RTLIB::ADD_F128}, + {RTLIB::FAST_ADD_PPCF128, RTLIB::ADD_PPCF128}, Results); break; + } case ISD::FMUL: - case ISD::STRICT_FMUL: - ExpandFPLibCall(Node, RTLIB::MUL_F32, RTLIB::MUL_F64, - RTLIB::MUL_F80, RTLIB::MUL_F128, - RTLIB::MUL_PPCF128, Results); + case ISD::STRICT_FMUL: { + ExpandFastFPLibCall(Node, canUseFastMathLibcall(Node), + {RTLIB::FAST_MUL_F32, RTLIB::MUL_F32}, + {RTLIB::FAST_MUL_F64, RTLIB::MUL_F64}, + {RTLIB::FAST_MUL_F80, RTLIB::MUL_F80}, + {RTLIB::FAST_MUL_F128, RTLIB::MUL_F128}, + {RTLIB::FAST_MUL_PPCF128, RTLIB::MUL_PPCF128}, Results); break; + } case ISD::FP16_TO_FP: if (Node->getValueType(0) == MVT::f32) { Results.push_back(ExpandLibCall(RTLIB::FPEXT_F16_F32, Node, false).first); @@ -5076,11 +5145,15 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) { break; } case ISD::FSUB: - case ISD::STRICT_FSUB: - ExpandFPLibCall(Node, RTLIB::SUB_F32, RTLIB::SUB_F64, - RTLIB::SUB_F80, RTLIB::SUB_F128, - RTLIB::SUB_PPCF128, Results); + case ISD::STRICT_FSUB: { + ExpandFastFPLibCall(Node, canUseFastMathLibcall(Node), + {RTLIB::FAST_SUB_F32, RTLIB::SUB_F32}, + {RTLIB::FAST_SUB_F64, RTLIB::SUB_F64}, + {RTLIB::FAST_SUB_F80, RTLIB::SUB_F80}, + {RTLIB::FAST_SUB_F128, RTLIB::SUB_F128}, + {RTLIB::FAST_SUB_PPCF128, RTLIB::SUB_PPCF128}, Results); break; + } case ISD::SREM: Results.push_back(ExpandIntLibCall(Node, true, RTLIB::SREM_I8, diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 58be4fb7e833..245811587e3b 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -5544,6 +5544,8 @@ bool SelectionDAG::canCreateUndefOrPoison(SDValue Op, const APInt &DemandedElts, case ISD::USUBSAT: case ISD::MULHU: case ISD::MULHS: + case ISD::ABDU: + case ISD::ABDS: case ISD::SMIN: case ISD::SMAX: case ISD::UMIN: @@ -5569,6 +5571,12 @@ bool SelectionDAG::canCreateUndefOrPoison(SDValue Op, const APInt &DemandedElts, case ISD::BUILD_VECTOR: case ISD::BUILD_PAIR: case ISD::SPLAT_VECTOR: + case ISD::FABS: + return false; + + case ISD::ABS: + // ISD::ABS defines abs(INT_MIN) -> INT_MIN and never generates poison. + // Different to Intrinsic::abs. return false; case ISD::ADDC: @@ -5620,6 +5628,7 @@ bool SelectionDAG::canCreateUndefOrPoison(SDValue Op, const APInt &DemandedElts, case ISD::FMUL: case ISD::FDIV: case ISD::FREM: + case ISD::FCOPYSIGN: // No poison except from flags (which is handled above) return false; @@ -6744,7 +6753,9 @@ SDValue SelectionDAG::FoldSymbolOffset(unsigned Opcode, EVT VT, return SDValue(); int64_t Offset = C2->getSExtValue(); switch (Opcode) { - case ISD::ADD: break; + case ISD::ADD: + case ISD::PTRADD: + break; case ISD::SUB: Offset = -uint64_t(Offset); break; default: return SDValue(); } @@ -13866,6 +13877,8 @@ void SelectionDAG::copyExtraInfo(SDNode *From, SDNode *To) { return; } + const SDNode *EntrySDN = getEntryNode().getNode(); + // We need to copy NodeExtraInfo to all _new_ nodes that are being introduced // through the replacement of From with To. Otherwise, replacements of a node // (From) with more complex nodes (To and its operands) may result in lost @@ -13897,9 +13910,14 @@ void SelectionDAG::copyExtraInfo(SDNode *From, SDNode *To) { return true; if (!Visited.insert(N).second) return true; - if (getEntryNode().getNode() == N) + if (EntrySDN == N) return false; for (const SDValue &Op : N->op_values()) { + if (N == To && Op.getNode() == EntrySDN) { + // Special case: New node's operand is the entry node; just need to + // copy extra info to new node. + break; + } if (!Self(Self, Op.getNode())) return false; } diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index ecd1ff87e7fb..01e53123ea7e 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -18,6 +18,7 @@ #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Twine.h" #include "llvm/Analysis/AliasAnalysis.h" @@ -842,6 +843,23 @@ static void getCopyToPartsVector(SelectionDAG &DAG, const SDLoc &DL, } } +static void failForInvalidBundles(const CallBase &I, StringRef Name, + ArrayRef<uint32_t> AllowedBundles) { + if (I.hasOperandBundlesOtherThan(AllowedBundles)) { + ListSeparator LS; + std::string Error; + raw_string_ostream OS(Error); + for (unsigned i = 0, e = I.getNumOperandBundles(); i != e; ++i) { + OperandBundleUse U = I.getOperandBundleAt(i); + if (!is_contained(AllowedBundles, U.getTagID())) + OS << LS << U.getTagName(); + } + reportFatalUsageError( + Twine("cannot lower ", Name) + .concat(Twine(" with arbitrary operand bundles: ", Error))); + } +} + RegsForValue::RegsForValue(const SmallVector<Register, 4> ®s, MVT regvt, EVT valuevt, std::optional<CallingConv::ID> CC) : ValueVTs(1, valuevt), RegVTs(1, regvt), Regs(regs), @@ -3351,13 +3369,12 @@ void SelectionDAGBuilder::visitInvoke(const InvokeInst &I) { // Deopt and ptrauth bundles are lowered in helper functions, and we don't // have to do anything here to lower funclet bundles. - if (I.hasOperandBundlesOtherThan( - {LLVMContext::OB_deopt, LLVMContext::OB_gc_transition, - LLVMContext::OB_gc_live, LLVMContext::OB_funclet, - LLVMContext::OB_cfguardtarget, LLVMContext::OB_ptrauth, - LLVMContext::OB_clang_arc_attachedcall})) - reportFatalUsageError( - "cannot lower invokes with arbitrary operand bundles!"); + failForInvalidBundles(I, "invokes", + {LLVMContext::OB_deopt, LLVMContext::OB_gc_transition, + LLVMContext::OB_gc_live, LLVMContext::OB_funclet, + LLVMContext::OB_cfguardtarget, LLVMContext::OB_ptrauth, + LLVMContext::OB_clang_arc_attachedcall, + LLVMContext::OB_kcfi}); const Value *Callee(I.getCalledOperand()); const Function *Fn = dyn_cast<Function>(Callee); @@ -3457,10 +3474,8 @@ void SelectionDAGBuilder::visitCallBr(const CallBrInst &I) { // Deopt bundles are lowered in LowerCallSiteWithDeoptBundle, and we don't // have to do anything here to lower funclet bundles. - if (I.hasOperandBundlesOtherThan( - {LLVMContext::OB_deopt, LLVMContext::OB_funclet})) - reportFatalUsageError( - "cannot lower callbrs with arbitrary operand bundles!"); + failForInvalidBundles(I, "callbrs", + {LLVMContext::OB_deopt, LLVMContext::OB_funclet}); assert(I.isInlineAsm() && "Only know how to handle inlineasm callbr"); visitInlineAsm(I); @@ -9568,12 +9583,12 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) { // Deopt bundles are lowered in LowerCallSiteWithDeoptBundle, and we don't // have to do anything here to lower funclet bundles. // CFGuardTarget bundles are lowered in LowerCallTo. - if (I.hasOperandBundlesOtherThan( - {LLVMContext::OB_deopt, LLVMContext::OB_funclet, - LLVMContext::OB_cfguardtarget, LLVMContext::OB_preallocated, - LLVMContext::OB_clang_arc_attachedcall, LLVMContext::OB_kcfi, - LLVMContext::OB_convergencectrl})) - reportFatalUsageError("cannot lower calls with arbitrary operand bundles!"); + failForInvalidBundles( + I, "calls", + {LLVMContext::OB_deopt, LLVMContext::OB_funclet, + LLVMContext::OB_cfguardtarget, LLVMContext::OB_preallocated, + LLVMContext::OB_clang_arc_attachedcall, LLVMContext::OB_kcfi, + LLVMContext::OB_convergencectrl}); SDValue Callee = getValue(I.getCalledOperand()); diff --git a/llvm/lib/CodeGen/StackProtector.cpp b/llvm/lib/CodeGen/StackProtector.cpp index 5f866eea7d4e..b79911bcf3c4 100644 --- a/llvm/lib/CodeGen/StackProtector.cpp +++ b/llvm/lib/CodeGen/StackProtector.cpp @@ -76,7 +76,7 @@ static bool InsertStackProtectors(const TargetMachine *TM, Function *F, /// CreateFailBB - Create a basic block to jump to when the stack protector /// check fails. -static BasicBlock *CreateFailBB(Function *F, const Triple &Trip); +static BasicBlock *CreateFailBB(Function *F, const TargetLowering &TLI); bool SSPLayoutInfo::shouldEmitSDCheck(const BasicBlock &BB) const { return HasPrologue && !HasIRCheck && isa<ReturnInst>(BB.getTerminator()); @@ -626,7 +626,7 @@ bool InsertStackProtectors(const TargetMachine *TM, Function *F, // If we're instrumenting a block with a tail call, the check has to be // inserted before the call rather than between it and the return. - Instruction *Prev = CheckLoc->getPrevNonDebugInstruction(); + Instruction *Prev = CheckLoc->getPrevNode(); if (auto *CI = dyn_cast_if_present<CallInst>(Prev)) if (CI->isTailCall() && isInTailCallPosition(*CI, *TM)) CheckLoc = Prev; @@ -673,7 +673,7 @@ bool InsertStackProtectors(const TargetMachine *TM, Function *F, // merge pass will merge together all of the various BB into one including // fail BB generated by the stack protector pseudo instruction. if (!FailBB) - FailBB = CreateFailBB(F, TM->getTargetTriple()); + FailBB = CreateFailBB(F, *TLI); IRBuilder<> B(CheckLoc); Value *Guard = getStackGuard(TLI, M, B); @@ -706,7 +706,7 @@ bool InsertStackProtectors(const TargetMachine *TM, Function *F, return HasPrologue; } -BasicBlock *CreateFailBB(Function *F, const Triple &Trip) { +BasicBlock *CreateFailBB(Function *F, const TargetLowering &TLI) { auto *M = F->getParent(); LLVMContext &Context = F->getContext(); BasicBlock *FailBB = BasicBlock::Create(Context, "CallStackCheckFailBlk", F); @@ -716,17 +716,25 @@ BasicBlock *CreateFailBB(Function *F, const Triple &Trip) { DILocation::get(Context, 0, 0, F->getSubprogram())); FunctionCallee StackChkFail; SmallVector<Value *, 1> Args; - if (Trip.isOSOpenBSD()) { - StackChkFail = M->getOrInsertFunction("__stack_smash_handler", - Type::getVoidTy(Context), + + if (const char *ChkFailName = + TLI.getLibcallName(RTLIB::STACKPROTECTOR_CHECK_FAIL)) { + StackChkFail = + M->getOrInsertFunction(ChkFailName, Type::getVoidTy(Context)); + } else if (const char *SSHName = + TLI.getLibcallName(RTLIB::STACK_SMASH_HANDLER)) { + StackChkFail = M->getOrInsertFunction(SSHName, Type::getVoidTy(Context), PointerType::getUnqual(Context)); Args.push_back(B.CreateGlobalString(F->getName(), "SSH")); } else { - StackChkFail = - M->getOrInsertFunction("__stack_chk_fail", Type::getVoidTy(Context)); + Context.emitError("no libcall available for stack protector"); } - cast<Function>(StackChkFail.getCallee())->addFnAttr(Attribute::NoReturn); - B.CreateCall(StackChkFail, Args); + + if (StackChkFail) { + CallInst *Call = B.CreateCall(StackChkFail, Args); + Call->addFnAttr(Attribute::NoReturn); + } + B.CreateUnreachable(); return FailBB; } diff --git a/llvm/lib/CodeGen/TargetInstrInfo.cpp b/llvm/lib/CodeGen/TargetInstrInfo.cpp index 518a9339d8d1..18d6bbc0ff2b 100644 --- a/llvm/lib/CodeGen/TargetInstrInfo.cpp +++ b/llvm/lib/CodeGen/TargetInstrInfo.cpp @@ -792,12 +792,18 @@ MachineInstr *TargetInstrInfo::foldMemoryOperand(MachineInstr &MI, const MachineOperand &MO = MI.getOperand(1 - Ops[0]); MachineBasicBlock::iterator Pos = MI; - - if (Flags == MachineMemOperand::MOStore) - storeRegToStackSlot(*MBB, Pos, MO.getReg(), MO.isKill(), FI, RC, TRI, - Register()); - else + if (Flags == MachineMemOperand::MOStore) { + if (MO.isUndef()) { + // If this is an undef copy, we do not need to bother we inserting spill + // code. + BuildMI(*MBB, Pos, MI.getDebugLoc(), get(TargetOpcode::KILL)).add(MO); + } else { + storeRegToStackSlot(*MBB, Pos, MO.getReg(), MO.isKill(), FI, RC, TRI, + Register()); + } + } else loadRegFromStackSlot(*MBB, Pos, MO.getReg(), FI, RC, TRI, Register()); + return &*--Pos; } diff --git a/llvm/lib/CodeGen/TargetLoweringBase.cpp b/llvm/lib/CodeGen/TargetLoweringBase.cpp index 6feeb19bb858..d4a34555ed82 100644 --- a/llvm/lib/CodeGen/TargetLoweringBase.cpp +++ b/llvm/lib/CodeGen/TargetLoweringBase.cpp @@ -1965,15 +1965,26 @@ TargetLoweringBase::getDefaultSafeStackPointerLocation(IRBuilderBase &IRB, Value * TargetLoweringBase::getSafeStackPointerLocation(IRBuilderBase &IRB) const { + // FIXME: Can this triple check be replaced with SAFESTACK_POINTER_ADDRESS + // being available? if (!TM.getTargetTriple().isAndroid()) return getDefaultSafeStackPointerLocation(IRB, true); - // Android provides a libc function to retrieve the address of the current - // thread's unsafe stack pointer. Module *M = IRB.GetInsertBlock()->getParent()->getParent(); auto *PtrTy = PointerType::getUnqual(M->getContext()); + + const char *SafestackPointerAddressName = + getLibcallName(RTLIB::SAFESTACK_POINTER_ADDRESS); + if (!SafestackPointerAddressName) { + M->getContext().emitError( + "no libcall available for safestack pointer address"); + return PoisonValue::get(PtrTy); + } + + // Android provides a libc function to retrieve the address of the current + // thread's unsafe stack pointer. FunctionCallee Fn = - M->getOrInsertFunction("__safestack_pointer_address", PtrTy); + M->getOrInsertFunction(SafestackPointerAddressName, PtrTy); return IRB.CreateCall(Fn); } diff --git a/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp index 5454cd475f5e..7e501a9e2aa4 100644 --- a/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp +++ b/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp @@ -322,28 +322,7 @@ void TargetLoweringObjectFileELF::emitModuleMetadata(MCStreamer &Streamer, } } - if (NamedMDNode *FuncInfo = M.getNamedMetadata(PseudoProbeDescMetadataName)) { - // Emit a descriptor for every function including functions that have an - // available external linkage. We may not want this for imported functions - // that has code in another thinLTO module but we don't have a good way to - // tell them apart from inline functions defined in header files. Therefore - // we put each descriptor in a separate comdat section and rely on the - // linker to deduplicate. - for (const auto *Operand : FuncInfo->operands()) { - const auto *MD = cast<MDNode>(Operand); - auto *GUID = mdconst::dyn_extract<ConstantInt>(MD->getOperand(0)); - auto *Hash = mdconst::dyn_extract<ConstantInt>(MD->getOperand(1)); - auto *Name = cast<MDString>(MD->getOperand(2)); - auto *S = C.getObjectFileInfo()->getPseudoProbeDescSection( - TM->getFunctionSections() ? Name->getString() : StringRef()); - - Streamer.switchSection(S); - Streamer.emitInt64(GUID->getZExtValue()); - Streamer.emitInt64(Hash->getZExtValue()); - Streamer.emitULEB128IntValue(Name->getString().size()); - Streamer.emitBytes(Name->getString()); - } - } + emitPseudoProbeDescMetadata(Streamer, M); if (NamedMDNode *LLVMStats = M.getNamedMetadata("llvm.stats")) { // Emit the metadata for llvm statistics into .llvm_stats section, which is diff --git a/llvm/lib/CodeGen/VirtRegMap.cpp b/llvm/lib/CodeGen/VirtRegMap.cpp index 3d6ccba894e9..99ba893d6f09 100644 --- a/llvm/lib/CodeGen/VirtRegMap.cpp +++ b/llvm/lib/CodeGen/VirtRegMap.cpp @@ -301,6 +301,8 @@ bool VirtRegRewriterLegacy::runOnMachineFunction(MachineFunction &MF) { PreservedAnalyses VirtRegRewriterPass::run(MachineFunction &MF, MachineFunctionAnalysisManager &MFAM) { + MFPropsModifier _(*this, MF); + VirtRegMap &VRM = MFAM.getResult<VirtRegMapAnalysis>(MF); LiveIntervals &LIS = MFAM.getResult<LiveIntervalsAnalysis>(MF); LiveRegMatrix &LRM = MFAM.getResult<LiveRegMatrixAnalysis>(MF); |
