diff options
| author | Michael Kruse <llvm-project@meinersbur.de> | 2025-01-03 10:22:51 +0100 |
|---|---|---|
| committer | Michael Kruse <llvm-project@meinersbur.de> | 2025-01-03 10:22:51 +0100 |
| commit | 38500d63e14ce340236840f60d356cdefb56a52c (patch) | |
| tree | 17edbec446ce9b50d2f215a483b83afb293a635d /llvm/lib/CodeGen | |
| parent | 1a3d5daaef7a6a63448a497da3eff7fc9e23df26 (diff) | |
| parent | 27f30029741ecf023baece7b3dde1ff9011ffefc (diff) | |
Merge branch 'main' into users/meinersbur/flang_runtime_split-headersusers/meinersbur/flang_runtime_split-headers
Diffstat (limited to 'llvm/lib/CodeGen')
98 files changed, 2110 insertions, 1474 deletions
diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp index 3072edc5088e..7bd3fb33b47d 100644 --- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -503,13 +503,7 @@ bool AsmPrinter::doInitialization(Module &M) { // don't, this at least helps the user find where a global came from. if (MAI->hasSingleParameterDotFile()) { // .file "foo.c" - - SmallString<128> FileName; - if (MAI->hasBasenameOnlyForFileDirective()) - FileName = llvm::sys::path::filename(M.getSourceFileName()); - else - FileName = M.getSourceFileName(); - if (MAI->hasFourStringsDotFile()) { + if (MAI->isAIX()) { const char VerStr[] = #ifdef PACKAGE_VENDOR PACKAGE_VENDOR " " @@ -520,9 +514,10 @@ bool AsmPrinter::doInitialization(Module &M) { #endif ; // TODO: Add timestamp and description. - OutStreamer->emitFileDirective(FileName, VerStr, "", ""); + OutStreamer->emitFileDirective(M.getSourceFileName(), VerStr, "", ""); } else { - OutStreamer->emitFileDirective(FileName); + OutStreamer->emitFileDirective( + llvm::sys::path::filename(M.getSourceFileName())); } } @@ -531,7 +526,8 @@ bool AsmPrinter::doInitialization(Module &M) { if (TM.getTargetTriple().isOSBinFormatXCOFF()) { emitModuleCommandLines(M); // Now we can generate section information. - OutStreamer->initSections(false, *TM.getMCSubtargetInfo()); + OutStreamer->switchSection( + OutContext.getObjectFileInfo()->getTextSection()); // To work around an AIX assembler and/or linker bug, generate // a rename for the default text-section symbol name. This call has @@ -966,11 +962,10 @@ void AsmPrinter::emitFunctionHeader() { MF->setSection(getObjFileLowering().SectionForGlobal(&F, TM)); OutStreamer->switchSection(MF->getSection()); - if (!MAI->hasVisibilityOnlyWithLinkage()) - emitVisibility(CurrentFnSym, F.getVisibility()); - - if (MAI->needsFunctionDescriptors()) + if (MAI->isAIX()) emitLinkage(&F, CurrentFnDescSym); + else + emitVisibility(CurrentFnSym, F.getVisibility()); emitLinkage(&F, CurrentFnSym); if (MAI->hasFunctionAlignment()) @@ -1030,7 +1025,7 @@ void AsmPrinter::emitFunctionHeader() { // to emit their specific function descriptor. Right now it is only used by // the AIX target. The PowerPC 64-bit V1 ELF target also uses function // descriptors and should be converted to use this hook as well. - if (MAI->needsFunctionDescriptors()) + if (MAI->isAIX()) emitFunctionDescriptor(); // Emit the CurrentFnSym. This is a virtual function to allow targets to do @@ -1791,7 +1786,7 @@ void AsmPrinter::emitFunctionBody() { MDT = MDTWrapper ? &MDTWrapper->getDomTree() : nullptr; if (!MDT) { OwnedMDT = std::make_unique<MachineDominatorTree>(); - OwnedMDT->getBase().recalculate(*MF); + OwnedMDT->recalculate(*MF); MDT = OwnedMDT.get(); } @@ -1800,7 +1795,7 @@ void AsmPrinter::emitFunctionBody() { MLI = MLIWrapper ? &MLIWrapper->getLI() : nullptr; if (!MLI) { OwnedMLI = std::make_unique<MachineLoopInfo>(); - OwnedMLI->analyze(MDT->getBase()); + OwnedMLI->analyze(*MDT); MLI = OwnedMLI.get(); } } @@ -2233,9 +2228,6 @@ void AsmPrinter::emitGlobalAlias(const Module &M, const GlobalAlias &GA) { // point, all the extra label is emitted, we just have to emit linkage for // those labels. if (TM.getTargetTriple().isOSBinFormatXCOFF()) { - assert(MAI->hasVisibilityOnlyWithLinkage() && - "Visibility should be handled with emitLinkage() on AIX."); - // Linkage for alias of global variable has been emitted. if (isa<GlobalVariable>(GA.getAliaseeObject())) return; @@ -2406,12 +2398,53 @@ void AsmPrinter::emitRemarksSection(remarks::RemarkStreamer &RS) { OutStreamer->emitBinaryData(Buf); } +static void tagGlobalDefinition(Module &M, GlobalVariable *G) { + Constant *Initializer = G->getInitializer(); + uint64_t SizeInBytes = + M.getDataLayout().getTypeAllocSize(Initializer->getType()); + + uint64_t NewSize = alignTo(SizeInBytes, 16); + if (SizeInBytes != NewSize) { + // Pad the initializer out to the next multiple of 16 bytes. + llvm::SmallVector<uint8_t> Init(NewSize - SizeInBytes, 0); + Constant *Padding = ConstantDataArray::get(M.getContext(), Init); + Initializer = ConstantStruct::getAnon({Initializer, Padding}); + auto *NewGV = new GlobalVariable( + M, Initializer->getType(), G->isConstant(), G->getLinkage(), + Initializer, "", G, G->getThreadLocalMode(), G->getAddressSpace()); + NewGV->copyAttributesFrom(G); + NewGV->setComdat(G->getComdat()); + NewGV->copyMetadata(G, 0); + + NewGV->takeName(G); + G->replaceAllUsesWith(NewGV); + G->eraseFromParent(); + G = NewGV; + } + + if (G->getAlign().valueOrOne() < 16) + G->setAlignment(Align(16)); + + // Ensure that tagged globals don't get merged by ICF - as they should have + // different tags at runtime. + G->setUnnamedAddr(GlobalValue::UnnamedAddr::None); +} + bool AsmPrinter::doFinalization(Module &M) { // Set the MachineFunction to nullptr so that we can catch attempted // accesses to MF specific features at the module level and so that // we can conditionalize accesses based on whether or not it is nullptr. MF = nullptr; + std::vector<GlobalVariable *> GlobalsToTag; + for (GlobalVariable &G : M.globals()) { + if (G.isDeclaration() || !G.isTagged()) + continue; + GlobalsToTag.push_back(&G); + } + for (GlobalVariable *G : GlobalsToTag) + tagGlobalDefinition(M, G); + // Gather all GOT equivalent globals in the module. We really need two // passes over the globals: one to compute and another to avoid its emission // in EmitGlobalVariable, otherwise we would not be able to handle cases @@ -2688,7 +2721,7 @@ void AsmPrinter::SetupMachineFunction(MachineFunction &MF) { HasNoSplitStack = true; // Get the function symbol. - if (!MAI->needsFunctionDescriptors()) { + if (!MAI->isAIX()) { CurrentFnSym = getSymbol(&MF.getFunction()); } else { assert(TM.getTargetTriple().isOSAIX() && @@ -3602,10 +3635,11 @@ static void emitGlobalConstantArray(const DataLayout &DL, static void emitGlobalConstantLargeInt(const ConstantInt *CI, AsmPrinter &AP); -static void emitGlobalConstantVector(const DataLayout &DL, - const ConstantVector *CV, AsmPrinter &AP, +static void emitGlobalConstantVector(const DataLayout &DL, const Constant *CV, + AsmPrinter &AP, AsmPrinter::AliasMapTy *AliasList) { - Type *ElementType = CV->getType()->getElementType(); + auto *VTy = cast<FixedVectorType>(CV->getType()); + Type *ElementType = VTy->getElementType(); uint64_t ElementSizeInBits = DL.getTypeSizeInBits(ElementType); uint64_t ElementAllocSizeInBits = DL.getTypeAllocSizeInBits(ElementType); uint64_t EmittedSize; @@ -3618,7 +3652,7 @@ static void emitGlobalConstantVector(const DataLayout &DL, Type *IntT = IntegerType::get(CV->getContext(), DL.getTypeSizeInBits(CV->getType())); ConstantInt *CI = dyn_cast_or_null<ConstantInt>(ConstantFoldConstant( - ConstantExpr::getBitCast(const_cast<ConstantVector *>(CV), IntT), DL)); + ConstantExpr::getBitCast(const_cast<Constant *>(CV), IntT), DL)); if (!CI) { report_fatal_error( "Cannot lower vector global with unusual element type"); @@ -3627,12 +3661,11 @@ static void emitGlobalConstantVector(const DataLayout &DL, emitGlobalConstantLargeInt(CI, AP); EmittedSize = DL.getTypeStoreSize(CV->getType()); } else { - for (unsigned I = 0, E = CV->getType()->getNumElements(); I != E; ++I) { + for (unsigned I = 0, E = VTy->getNumElements(); I != E; ++I) { emitGlobalAliasInline(AP, DL.getTypeAllocSize(CV->getType()) * I, AliasList); - emitGlobalConstantImpl(DL, CV->getOperand(I), AP); + emitGlobalConstantImpl(DL, CV->getAggregateElement(I), AP); } - EmittedSize = - DL.getTypeAllocSize(ElementType) * CV->getType()->getNumElements(); + EmittedSize = DL.getTypeAllocSize(ElementType) * VTy->getNumElements(); } unsigned Size = DL.getTypeAllocSize(CV->getType()); @@ -3902,8 +3935,10 @@ static void emitGlobalConstantImpl(const DataLayout &DL, const Constant *CV, return AP.OutStreamer->emitZeros(Size); if (const ConstantInt *CI = dyn_cast<ConstantInt>(CV)) { - const uint64_t StoreSize = DL.getTypeStoreSize(CV->getType()); + if (isa<VectorType>(CV->getType())) + return emitGlobalConstantVector(DL, CV, AP, AliasList); + const uint64_t StoreSize = DL.getTypeStoreSize(CV->getType()); if (StoreSize <= 8) { if (AP.isVerbose()) AP.OutStreamer->getCommentOS() @@ -3920,8 +3955,12 @@ static void emitGlobalConstantImpl(const DataLayout &DL, const Constant *CV, return; } - if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CV)) - return emitGlobalConstantFP(CFP, AP); + if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CV)) { + if (isa<VectorType>(CV->getType())) + return emitGlobalConstantVector(DL, CV, AP, AliasList); + else + return emitGlobalConstantFP(CFP, AP); + } if (isa<ConstantPointerNull>(CV)) { AP.OutStreamer->emitIntValue(0, Size); @@ -3953,8 +3992,8 @@ static void emitGlobalConstantImpl(const DataLayout &DL, const Constant *CV, } } - if (const ConstantVector *V = dyn_cast<ConstantVector>(CV)) - return emitGlobalConstantVector(DL, V, AP, AliasList); + if (isa<ConstantVector>(CV)) + return emitGlobalConstantVector(DL, CV, AP, AliasList); // Otherwise, it must be a ConstantExpr. Lower it to an MCExpr, then emit it // thread the streamer with EmitValue. diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp index 6fe8d0e0af99..59fc4cfc23e1 100644 --- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp @@ -153,7 +153,7 @@ static void EmitInlineAsmStr(const char *AsmStr, const MachineInstr *MI, AsmPrinterVariant = MMI->getTarget().unqualifiedInlineAsmVariant(); // FIXME: Should this happen for `asm inteldialect` as well? - if (!InputIsIntelDialect && MAI->getEmitGNUAsmStartIndentationMarker()) + if (!InputIsIntelDialect && !MAI->isHLASM()) OS << '\t'; while (*LastEmitted) { @@ -312,10 +312,10 @@ static void EmitInlineAsmStr(const char *AsmStr, const MachineInstr *MI, } } if (Error) { - std::string msg; - raw_string_ostream Msg(msg); - Msg << "invalid operand in inline asm: '" << AsmStr << "'"; - MMI->getModule()->getContext().emitError(LocCookie, msg); + const Function &Fn = MI->getMF()->getFunction(); + Fn.getContext().diagnose(DiagnosticInfoInlineAsm( + LocCookie, + "invalid operand in inline asm: '" + Twine(AsmStr) + "'")); } } break; @@ -347,20 +347,11 @@ void AsmPrinter::emitInlineAsm(const MachineInstr *MI) const { // enabled, so we use emitRawComment. OutStreamer->emitRawComment(MAI->getInlineAsmStart()); - // Get the !srcloc metadata node if we have it, and decode the loc cookie from - // it. - uint64_t LocCookie = 0; - const MDNode *LocMD = nullptr; - for (const MachineOperand &MO : llvm::reverse(MI->operands())) { - if (MO.isMetadata() && (LocMD = MO.getMetadata()) && - LocMD->getNumOperands() != 0) { - if (const ConstantInt *CI = - mdconst::dyn_extract<ConstantInt>(LocMD->getOperand(0))) { - LocCookie = CI->getZExtValue(); - break; - } - } - } + const MDNode *LocMD = MI->getLocCookieMD(); + uint64_t LocCookie = + LocMD + ? mdconst::extract<ConstantInt>(LocMD->getOperand(0))->getZExtValue() + : 0; // Emit the inline asm to a temporary string so we can emit it through // EmitInlineAsm. @@ -397,20 +388,23 @@ void AsmPrinter::emitInlineAsm(const MachineInstr *MI) const { Msg += LS; Msg += TRI->getRegAsmName(RR); } + + const Function &Fn = MF->getFunction(); const char *Note = "Reserved registers on the clobber list may not be " "preserved across the asm statement, and clobbering them may " "lead to undefined behaviour."; - MMI->getModule()->getContext().diagnose(DiagnosticInfoInlineAsm( - LocCookie, Msg, DiagnosticSeverity::DS_Warning)); - MMI->getModule()->getContext().diagnose( + LLVMContext &Ctx = Fn.getContext(); + Ctx.diagnose(DiagnosticInfoInlineAsm(LocCookie, Msg, + DiagnosticSeverity::DS_Warning)); + Ctx.diagnose( DiagnosticInfoInlineAsm(LocCookie, Note, DiagnosticSeverity::DS_Note)); for (const Register RR : RestrRegs) { if (std::optional<std::string> reason = TRI->explainReservedReg(*MF, RR)) { - MMI->getModule()->getContext().diagnose(DiagnosticInfoInlineAsm( - LocCookie, *reason, DiagnosticSeverity::DS_Note)); + Ctx.diagnose(DiagnosticInfoInlineAsm(LocCookie, *reason, + DiagnosticSeverity::DS_Note)); } } } diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp index 087ee02a7f2b..4fac4bbc9847 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp @@ -50,7 +50,8 @@ void DwarfCFIException::endModule() { // Emit indirect reference table for all used personality functions for (const GlobalValue *Personality : Personalities) { MCSymbol *Sym = Asm->getSymbol(Personality); - TLOF.emitPersonalityValue(*Asm->OutStreamer, Asm->getDataLayout(), Sym); + TLOF.emitPersonalityValue(*Asm->OutStreamer, Asm->getDataLayout(), Sym, + Asm->MMI); } Personalities.clear(); } diff --git a/llvm/lib/CodeGen/BranchFolding.cpp b/llvm/lib/CodeGen/BranchFolding.cpp index f8de13650680..1dd7cccd9011 100644 --- a/llvm/lib/CodeGen/BranchFolding.cpp +++ b/llvm/lib/CodeGen/BranchFolding.cpp @@ -891,19 +891,21 @@ bool BranchFolder::TryTailMergeBlocks(MachineBasicBlock *SuccBB, unsigned MinCommonTailLength) { bool MadeChange = false; - LLVM_DEBUG( - dbgs() << "\nTryTailMergeBlocks: "; - for (unsigned i = 0, e = MergePotentials.size(); i != e; ++i) dbgs() - << printMBBReference(*MergePotentials[i].getBlock()) - << (i == e - 1 ? "" : ", "); - dbgs() << "\n"; if (SuccBB) { - dbgs() << " with successor " << printMBBReference(*SuccBB) << '\n'; - if (PredBB) - dbgs() << " which has fall-through from " - << printMBBReference(*PredBB) << "\n"; - } dbgs() << "Looking for common tails of at least " - << MinCommonTailLength << " instruction" - << (MinCommonTailLength == 1 ? "" : "s") << '\n';); + LLVM_DEBUG({ + dbgs() << "\nTryTailMergeBlocks: "; + for (unsigned i = 0, e = MergePotentials.size(); i != e; ++i) + dbgs() << printMBBReference(*MergePotentials[i].getBlock()) + << (i == e - 1 ? "" : ", "); + dbgs() << "\n"; + if (SuccBB) { + dbgs() << " with successor " << printMBBReference(*SuccBB) << '\n'; + if (PredBB) + dbgs() << " which has fall-through from " << printMBBReference(*PredBB) + << "\n"; + } + dbgs() << "Looking for common tails of at least " << MinCommonTailLength + << " instruction" << (MinCommonTailLength == 1 ? "" : "s") << '\n'; + }); // Sort by hash value so that blocks with identical end sequences sort // together. diff --git a/llvm/lib/CodeGen/CMakeLists.txt b/llvm/lib/CodeGen/CMakeLists.txt index 7b47c0e6f75d..145fd2fac8b5 100644 --- a/llvm/lib/CodeGen/CMakeLists.txt +++ b/llvm/lib/CodeGen/CMakeLists.txt @@ -263,6 +263,7 @@ add_llvm_component_library(LLVMCodeGen DEPENDS intrinsics_gen + vt_gen ${MLDeps} LINK_COMPONENTS diff --git a/llvm/lib/CodeGen/CodeGen.cpp b/llvm/lib/CodeGen/CodeGen.cpp index 59428818c1ee..8efe54077091 100644 --- a/llvm/lib/CodeGen/CodeGen.cpp +++ b/llvm/lib/CodeGen/CodeGen.cpp @@ -62,7 +62,7 @@ void llvm::initializeCodeGen(PassRegistry &Registry) { initializeLiveDebugVariablesWrapperLegacyPass(Registry); initializeLiveIntervalsWrapperPassPass(Registry); initializeLiveRangeShrinkPass(Registry); - initializeLiveStacksPass(Registry); + initializeLiveStacksWrapperLegacyPass(Registry); initializeLiveVariablesWrapperPassPass(Registry); initializeLocalStackSlotPassPass(Registry); initializeLowerGlobalDtorsLegacyPassPass(Registry); diff --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp index 83c6ecd40103..5c712e4f007d 100644 --- a/llvm/lib/CodeGen/CodeGenPrepare.cpp +++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp @@ -1885,7 +1885,7 @@ static bool foldICmpWithDominatingICmp(CmpInst *Cmp, return false; Value *CmpOp0 = Cmp->getOperand(0), *CmpOp1 = Cmp->getOperand(1); - ICmpInst::Predicate DomPred; + CmpPredicate DomPred; if (!match(DomCond, m_ICmp(DomPred, m_Specific(CmpOp0), m_Specific(CmpOp1)))) return false; if (DomPred != ICmpInst::ICMP_SGT && DomPred != ICmpInst::ICMP_SLT) @@ -2155,7 +2155,7 @@ bool CodeGenPrepare::optimizeURem(Instruction *Rem) { static bool adjustIsPower2Test(CmpInst *Cmp, const TargetLowering &TLI, const TargetTransformInfo &TTI, const DataLayout &DL) { - ICmpInst::Predicate Pred; + CmpPredicate Pred; if (!match(Cmp, m_ICmp(Pred, m_Intrinsic<Intrinsic::ctpop>(), m_One()))) return false; if (!ICmpInst::isEquality(Pred)) diff --git a/llvm/lib/CodeGen/ExpandMemCmp.cpp b/llvm/lib/CodeGen/ExpandMemCmp.cpp index a1acb4ef3683..f8ca7e370f6e 100644 --- a/llvm/lib/CodeGen/ExpandMemCmp.cpp +++ b/llvm/lib/CodeGen/ExpandMemCmp.cpp @@ -668,7 +668,7 @@ Value *MemCmpExpansion::getMemCmpOneBlock() { // We can generate more optimal code with a smaller number of operations if (CI->hasOneUser()) { auto *UI = cast<Instruction>(*CI->user_begin()); - ICmpInst::Predicate Pred = ICmpInst::Predicate::BAD_ICMP_PREDICATE; + CmpPredicate Pred = ICmpInst::Predicate::BAD_ICMP_PREDICATE; uint64_t Shift; bool NeedsZExt = false; // This is a special case because instead of checking if the result is less diff --git a/llvm/lib/CodeGen/FixupStatepointCallerSaved.cpp b/llvm/lib/CodeGen/FixupStatepointCallerSaved.cpp index 3bb9da5f1a37..0ebe845e473f 100644 --- a/llvm/lib/CodeGen/FixupStatepointCallerSaved.cpp +++ b/llvm/lib/CodeGen/FixupStatepointCallerSaved.cpp @@ -381,8 +381,6 @@ public: EndIdx = MI.getNumOperands(); Idx < EndIdx; ++Idx) { MachineOperand &MO = MI.getOperand(Idx); - // Leave `undef` operands as is, StackMaps will rewrite them - // into a constant. if (!MO.isReg() || MO.isImplicit() || MO.isUndef()) continue; Register Reg = MO.getReg(); diff --git a/llvm/lib/CodeGen/GCMetadata.cpp b/llvm/lib/CodeGen/GCMetadata.cpp index 6d1cc1a58e27..f33008c9e0f2 100644 --- a/llvm/lib/CodeGen/GCMetadata.cpp +++ b/llvm/lib/CodeGen/GCMetadata.cpp @@ -66,7 +66,7 @@ GCFunctionAnalysis::run(Function &F, FunctionAnalysisManager &FAM) { } INITIALIZE_PASS(GCModuleInfo, "collector-metadata", - "Create Garbage Collector Module Metadata", false, false) + "Create Garbage Collector Module Metadata", false, true) // ----------------------------------------------------------------------------- diff --git a/llvm/lib/CodeGen/GlobalISel/CSEInfo.cpp b/llvm/lib/CodeGen/GlobalISel/CSEInfo.cpp index cfb4ae85aa4f..0ac4a8a0aa91 100644 --- a/llvm/lib/CodeGen/GlobalISel/CSEInfo.cpp +++ b/llvm/lib/CodeGen/GlobalISel/CSEInfo.cpp @@ -163,7 +163,7 @@ MachineInstr *GISelCSEInfo::getMachineInstrIfExists(FoldingSetNodeID &ID, void *&InsertPos) { handleRecordedInsts(); if (auto *Inst = getNodeIfExists(ID, MBB, InsertPos)) { - LLVM_DEBUG(dbgs() << "CSEInfo::Found Instr " << *Inst->MI;); + LLVM_DEBUG(dbgs() << "CSEInfo::Found Instr " << *Inst->MI); return const_cast<MachineInstr *>(Inst->MI); } return nullptr; @@ -313,11 +313,11 @@ Error GISelCSEInfo::verify() { } void GISelCSEInfo::print() { - LLVM_DEBUG(for (auto &It - : OpcodeHitTable) { - dbgs() << "CSEInfo::CSE Hit for Opc " << It.first << " : " << It.second - << "\n"; - };); + LLVM_DEBUG({ + for (auto &It : OpcodeHitTable) + dbgs() << "CSEInfo::CSE Hit for Opc " << It.first << " : " << It.second + << "\n"; + }); } /// ----------------------------------------- // ---- Profiling methods for FoldingSetNode --- // diff --git a/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp b/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp index 5126aba3658c..d17b20d977ce 100644 --- a/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp +++ b/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp @@ -258,7 +258,7 @@ void CallLowering::setArgFlags(CallLowering::ArgInfo &Arg, unsigned OpIdx, else if ((ParamAlign = FuncInfo.getParamAlign(ParamIdx))) MemAlign = *ParamAlign; else - MemAlign = Align(getTLI()->getByValTypeAlignment(ElementTy, DL)); + MemAlign = getTLI()->getByValTypeAlignment(ElementTy, DL); } else if (OpIdx >= AttributeList::FirstArgIndex) { if (auto ParamAlign = FuncInfo.getParamStackAlign(OpIdx - AttributeList::FirstArgIndex)) diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp index d95fc8cfbcf5..4e3aaf5da719 100644 --- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp @@ -207,26 +207,27 @@ const RegisterBank *CombinerHelper::getRegBank(Register Reg) const { return RBI->getRegBank(Reg, MRI, *TRI); } -void CombinerHelper::setRegBank(Register Reg, const RegisterBank *RegBank) { +void CombinerHelper::setRegBank(Register Reg, + const RegisterBank *RegBank) const { if (RegBank) MRI.setRegBank(Reg, *RegBank); } -bool CombinerHelper::tryCombineCopy(MachineInstr &MI) { +bool CombinerHelper::tryCombineCopy(MachineInstr &MI) const { if (matchCombineCopy(MI)) { applyCombineCopy(MI); return true; } return false; } -bool CombinerHelper::matchCombineCopy(MachineInstr &MI) { +bool CombinerHelper::matchCombineCopy(MachineInstr &MI) const { if (MI.getOpcode() != TargetOpcode::COPY) return false; Register DstReg = MI.getOperand(0).getReg(); Register SrcReg = MI.getOperand(1).getReg(); return canReplaceReg(DstReg, SrcReg, MRI); } -void CombinerHelper::applyCombineCopy(MachineInstr &MI) { +void CombinerHelper::applyCombineCopy(MachineInstr &MI) const { Register DstReg = MI.getOperand(0).getReg(); Register SrcReg = MI.getOperand(1).getReg(); replaceRegWith(MRI, DstReg, SrcReg); @@ -234,7 +235,7 @@ void CombinerHelper::applyCombineCopy(MachineInstr &MI) { } bool CombinerHelper::matchFreezeOfSingleMaybePoisonOperand( - MachineInstr &MI, BuildFnTy &MatchInfo) { + MachineInstr &MI, BuildFnTy &MatchInfo) const { // Ported from InstCombinerImpl::pushFreezeToPreventPoisonFromPropagating. Register DstOp = MI.getOperand(0).getReg(); Register OrigOp = MI.getOperand(1).getReg(); @@ -303,8 +304,8 @@ bool CombinerHelper::matchFreezeOfSingleMaybePoisonOperand( return true; } -bool CombinerHelper::matchCombineConcatVectors(MachineInstr &MI, - SmallVector<Register> &Ops) { +bool CombinerHelper::matchCombineConcatVectors( + MachineInstr &MI, SmallVector<Register> &Ops) const { assert(MI.getOpcode() == TargetOpcode::G_CONCAT_VECTORS && "Invalid instruction"); bool IsUndef = true; @@ -361,8 +362,8 @@ bool CombinerHelper::matchCombineConcatVectors(MachineInstr &MI, return true; } -void CombinerHelper::applyCombineConcatVectors(MachineInstr &MI, - SmallVector<Register> &Ops) { +void CombinerHelper::applyCombineConcatVectors( + MachineInstr &MI, SmallVector<Register> &Ops) const { // We determined that the concat_vectors can be flatten. // Generate the flattened build_vector. Register DstReg = MI.getOperand(0).getReg(); @@ -383,8 +384,8 @@ void CombinerHelper::applyCombineConcatVectors(MachineInstr &MI, MI.eraseFromParent(); } -bool CombinerHelper::matchCombineShuffleConcat(MachineInstr &MI, - SmallVector<Register> &Ops) { +bool CombinerHelper::matchCombineShuffleConcat( + MachineInstr &MI, SmallVector<Register> &Ops) const { ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask(); auto ConcatMI1 = dyn_cast<GConcatVectors>(MRI.getVRegDef(MI.getOperand(1).getReg())); @@ -443,8 +444,8 @@ bool CombinerHelper::matchCombineShuffleConcat(MachineInstr &MI, return !Ops.empty(); } -void CombinerHelper::applyCombineShuffleConcat(MachineInstr &MI, - SmallVector<Register> &Ops) { +void CombinerHelper::applyCombineShuffleConcat( + MachineInstr &MI, SmallVector<Register> &Ops) const { LLT SrcTy; for (Register &Reg : Ops) { if (Reg != 0) @@ -469,7 +470,7 @@ void CombinerHelper::applyCombineShuffleConcat(MachineInstr &MI, MI.eraseFromParent(); } -bool CombinerHelper::tryCombineShuffleVector(MachineInstr &MI) { +bool CombinerHelper::tryCombineShuffleVector(MachineInstr &MI) const { SmallVector<Register, 4> Ops; if (matchCombineShuffleVector(MI, Ops)) { applyCombineShuffleVector(MI, Ops); @@ -478,8 +479,8 @@ bool CombinerHelper::tryCombineShuffleVector(MachineInstr &MI) { return false; } -bool CombinerHelper::matchCombineShuffleVector(MachineInstr &MI, - SmallVectorImpl<Register> &Ops) { +bool CombinerHelper::matchCombineShuffleVector( + MachineInstr &MI, SmallVectorImpl<Register> &Ops) const { assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR && "Invalid instruction kind"); LLT DstType = MRI.getType(MI.getOperand(0).getReg()); @@ -554,8 +555,8 @@ bool CombinerHelper::matchCombineShuffleVector(MachineInstr &MI, return true; } -void CombinerHelper::applyCombineShuffleVector(MachineInstr &MI, - const ArrayRef<Register> Ops) { +void CombinerHelper::applyCombineShuffleVector( + MachineInstr &MI, const ArrayRef<Register> Ops) const { Register DstReg = MI.getOperand(0).getReg(); Builder.setInsertPt(*MI.getParent(), MI); Register NewDstReg = MRI.cloneVirtualRegister(DstReg); @@ -569,7 +570,7 @@ void CombinerHelper::applyCombineShuffleVector(MachineInstr &MI, MI.eraseFromParent(); } -bool CombinerHelper::matchShuffleToExtract(MachineInstr &MI) { +bool CombinerHelper::matchShuffleToExtract(MachineInstr &MI) const { assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR && "Invalid instruction kind"); @@ -577,7 +578,7 @@ bool CombinerHelper::matchShuffleToExtract(MachineInstr &MI) { return Mask.size() == 1; } -void CombinerHelper::applyShuffleToExtract(MachineInstr &MI) { +void CombinerHelper::applyShuffleToExtract(MachineInstr &MI) const { Register DstReg = MI.getOperand(0).getReg(); Builder.setInsertPt(*MI.getParent(), MI); @@ -690,7 +691,7 @@ static void InsertInsnsWithoutSideEffectsBeforeUse( } } // end anonymous namespace -bool CombinerHelper::tryCombineExtendingLoads(MachineInstr &MI) { +bool CombinerHelper::tryCombineExtendingLoads(MachineInstr &MI) const { PreferredTuple Preferred; if (matchCombineExtendingLoads(MI, Preferred)) { applyCombineExtendingLoads(MI, Preferred); @@ -717,8 +718,8 @@ static unsigned getExtLoadOpcForExtend(unsigned ExtOpc) { return CandidateLoadOpc; } -bool CombinerHelper::matchCombineExtendingLoads(MachineInstr &MI, - PreferredTuple &Preferred) { +bool CombinerHelper::matchCombineExtendingLoads( + MachineInstr &MI, PreferredTuple &Preferred) const { // We match the loads and follow the uses to the extend instead of matching // the extends and following the def to the load. This is because the load // must remain in the same position for correctness (unless we also add code @@ -793,8 +794,8 @@ bool CombinerHelper::matchCombineExtendingLoads(MachineInstr &MI, return true; } -void CombinerHelper::applyCombineExtendingLoads(MachineInstr &MI, - PreferredTuple &Preferred) { +void CombinerHelper::applyCombineExtendingLoads( + MachineInstr &MI, PreferredTuple &Preferred) const { // Rewrite the load to the chosen extending load. Register ChosenDstReg = Preferred.MI->getOperand(0).getReg(); @@ -900,7 +901,7 @@ void CombinerHelper::applyCombineExtendingLoads(MachineInstr &MI, } bool CombinerHelper::matchCombineLoadWithAndMask(MachineInstr &MI, - BuildFnTy &MatchInfo) { + BuildFnTy &MatchInfo) const { assert(MI.getOpcode() == TargetOpcode::G_AND); // If we have the following code: @@ -982,7 +983,7 @@ bool CombinerHelper::matchCombineLoadWithAndMask(MachineInstr &MI, } bool CombinerHelper::isPredecessor(const MachineInstr &DefMI, - const MachineInstr &UseMI) { + const MachineInstr &UseMI) const { assert(!DefMI.isDebugInstr() && !UseMI.isDebugInstr() && "shouldn't consider debug uses"); assert(DefMI.getParent() == UseMI.getParent()); @@ -998,7 +999,7 @@ bool CombinerHelper::isPredecessor(const MachineInstr &DefMI, } bool CombinerHelper::dominates(const MachineInstr &DefMI, - const MachineInstr &UseMI) { + const MachineInstr &UseMI) const { assert(!DefMI.isDebugInstr() && !UseMI.isDebugInstr() && "shouldn't consider debug uses"); if (MDT) @@ -1009,7 +1010,7 @@ bool CombinerHelper::dominates(const MachineInstr &DefMI, return isPredecessor(DefMI, UseMI); } -bool CombinerHelper::matchSextTruncSextLoad(MachineInstr &MI) { +bool CombinerHelper::matchSextTruncSextLoad(MachineInstr &MI) const { assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG); Register SrcReg = MI.getOperand(1).getReg(); Register LoadUser = SrcReg; @@ -1036,14 +1037,14 @@ bool CombinerHelper::matchSextTruncSextLoad(MachineInstr &MI) { return false; } -void CombinerHelper::applySextTruncSextLoad(MachineInstr &MI) { +void CombinerHelper::applySextTruncSextLoad(MachineInstr &MI) const { assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG); Builder.buildCopy(MI.getOperand(0).getReg(), MI.getOperand(1).getReg()); MI.eraseFromParent(); } bool CombinerHelper::matchSextInRegOfLoad( - MachineInstr &MI, std::tuple<Register, unsigned> &MatchInfo) { + MachineInstr &MI, std::tuple<Register, unsigned> &MatchInfo) const { assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG); Register DstReg = MI.getOperand(0).getReg(); @@ -1095,7 +1096,7 @@ bool CombinerHelper::matchSextInRegOfLoad( } void CombinerHelper::applySextInRegOfLoad( - MachineInstr &MI, std::tuple<Register, unsigned> &MatchInfo) { + MachineInstr &MI, std::tuple<Register, unsigned> &MatchInfo) const { assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG); Register LoadReg; unsigned ScalarSizeBits; @@ -1185,7 +1186,7 @@ static cl::opt<unsigned> PostIndexUseThreshold( bool CombinerHelper::findPostIndexCandidate(GLoadStore &LdSt, Register &Addr, Register &Base, Register &Offset, - bool &RematOffset) { + bool &RematOffset) const { // We're looking for the following pattern, for either load or store: // %baseptr:_(p0) = ... // G_STORE %val(s64), %baseptr(p0) @@ -1280,7 +1281,8 @@ bool CombinerHelper::findPostIndexCandidate(GLoadStore &LdSt, Register &Addr, } bool CombinerHelper::findPreIndexCandidate(GLoadStore &LdSt, Register &Addr, - Register &Base, Register &Offset) { + Register &Base, + Register &Offset) const { auto &MF = *LdSt.getParent()->getParent(); const auto &TLI = *MF.getSubtarget().getTargetLowering(); @@ -1335,8 +1337,8 @@ bool CombinerHelper::findPreIndexCandidate(GLoadStore &LdSt, Register &Addr, return RealUse; } -bool CombinerHelper::matchCombineExtractedVectorLoad(MachineInstr &MI, - BuildFnTy &MatchInfo) { +bool CombinerHelper::matchCombineExtractedVectorLoad( + MachineInstr &MI, BuildFnTy &MatchInfo) const { assert(MI.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT); // Check if there is a load that defines the vector being extracted from. @@ -1442,7 +1444,7 @@ bool CombinerHelper::matchCombineExtractedVectorLoad(MachineInstr &MI, } bool CombinerHelper::matchCombineIndexedLoadStore( - MachineInstr &MI, IndexedLoadStoreMatchInfo &MatchInfo) { + MachineInstr &MI, IndexedLoadStoreMatchInfo &MatchInfo) const { auto &LdSt = cast<GLoadStore>(MI); if (LdSt.isAtomic()) @@ -1459,7 +1461,7 @@ bool CombinerHelper::matchCombineIndexedLoadStore( } void CombinerHelper::applyCombineIndexedLoadStore( - MachineInstr &MI, IndexedLoadStoreMatchInfo &MatchInfo) { + MachineInstr &MI, IndexedLoadStoreMatchInfo &MatchInfo) const { MachineInstr &AddrDef = *MRI.getUniqueVRegDef(MatchInfo.Addr); unsigned Opcode = MI.getOpcode(); bool IsStore = Opcode == TargetOpcode::G_STORE; @@ -1494,7 +1496,7 @@ void CombinerHelper::applyCombineIndexedLoadStore( } bool CombinerHelper::matchCombineDivRem(MachineInstr &MI, - MachineInstr *&OtherMI) { + MachineInstr *&OtherMI) const { unsigned Opcode = MI.getOpcode(); bool IsDiv, IsSigned; @@ -1557,7 +1559,7 @@ bool CombinerHelper::matchCombineDivRem(MachineInstr &MI, } void CombinerHelper::applyCombineDivRem(MachineInstr &MI, - MachineInstr *&OtherMI) { + MachineInstr *&OtherMI) const { unsigned Opcode = MI.getOpcode(); assert(OtherMI && "OtherMI shouldn't be empty."); @@ -1588,8 +1590,8 @@ void CombinerHelper::applyCombineDivRem(MachineInstr &MI, OtherMI->eraseFromParent(); } -bool CombinerHelper::matchOptBrCondByInvertingCond(MachineInstr &MI, - MachineInstr *&BrCond) { +bool CombinerHelper::matchOptBrCondByInvertingCond( + MachineInstr &MI, MachineInstr *&BrCond) const { assert(MI.getOpcode() == TargetOpcode::G_BR); // Try to match the following: @@ -1622,8 +1624,8 @@ bool CombinerHelper::matchOptBrCondByInvertingCond(MachineInstr &MI, MBB->isLayoutSuccessor(BrCondTarget); } -void CombinerHelper::applyOptBrCondByInvertingCond(MachineInstr &MI, - MachineInstr *&BrCond) { +void CombinerHelper::applyOptBrCondByInvertingCond( + MachineInstr &MI, MachineInstr *&BrCond) const { MachineBasicBlock *BrTarget = MI.getOperand(0).getMBB(); Builder.setInstrAndDebugLoc(*BrCond); LLT Ty = MRI.getType(BrCond->getOperand(0).getReg()); @@ -1647,8 +1649,7 @@ void CombinerHelper::applyOptBrCondByInvertingCond(MachineInstr &MI, Observer.changedInstr(*BrCond); } - -bool CombinerHelper::tryEmitMemcpyInline(MachineInstr &MI) { +bool CombinerHelper::tryEmitMemcpyInline(MachineInstr &MI) const { MachineIRBuilder HelperBuilder(MI); GISelObserverWrapper DummyObserver; LegalizerHelper Helper(HelperBuilder.getMF(), DummyObserver, HelperBuilder); @@ -1656,7 +1657,8 @@ bool CombinerHelper::tryEmitMemcpyInline(MachineInstr &MI) { LegalizerHelper::LegalizeResult::Legalized; } -bool CombinerHelper::tryCombineMemCpyFamily(MachineInstr &MI, unsigned MaxLen) { +bool CombinerHelper::tryCombineMemCpyFamily(MachineInstr &MI, + unsigned MaxLen) const { MachineIRBuilder HelperBuilder(MI); GISelObserverWrapper DummyObserver; LegalizerHelper Helper(HelperBuilder.getMF(), DummyObserver, HelperBuilder); @@ -1709,8 +1711,8 @@ static APFloat constantFoldFpUnary(const MachineInstr &MI, return Result; } -void CombinerHelper::applyCombineConstantFoldFpUnary(MachineInstr &MI, - const ConstantFP *Cst) { +void CombinerHelper::applyCombineConstantFoldFpUnary( + MachineInstr &MI, const ConstantFP *Cst) const { APFloat Folded = constantFoldFpUnary(MI, MRI, Cst->getValue()); const ConstantFP *NewCst = ConstantFP::get(Builder.getContext(), Folded); Builder.buildFConstant(MI.getOperand(0), *NewCst); @@ -1718,7 +1720,7 @@ void CombinerHelper::applyCombineConstantFoldFpUnary(MachineInstr &MI, } bool CombinerHelper::matchPtrAddImmedChain(MachineInstr &MI, - PtrAddChain &MatchInfo) { + PtrAddChain &MatchInfo) const { // We're trying to match the following pattern: // %t1 = G_PTR_ADD %base, G_CONSTANT imm1 // %root = G_PTR_ADD %t1, G_CONSTANT imm2 @@ -1780,7 +1782,7 @@ bool CombinerHelper::matchPtrAddImmedChain(MachineInstr &MI, } void CombinerHelper::applyPtrAddImmedChain(MachineInstr &MI, - PtrAddChain &MatchInfo) { + PtrAddChain &MatchInfo) const { assert(MI.getOpcode() == TargetOpcode::G_PTR_ADD && "Expected G_PTR_ADD"); MachineIRBuilder MIB(MI); LLT OffsetTy = MRI.getType(MI.getOperand(2).getReg()); @@ -1793,7 +1795,7 @@ void CombinerHelper::applyPtrAddImmedChain(MachineInstr &MI, } bool CombinerHelper::matchShiftImmedChain(MachineInstr &MI, - RegisterImmPair &MatchInfo) { + RegisterImmPair &MatchInfo) const { // We're trying to match the following pattern with any of // G_SHL/G_ASHR/G_LSHR/G_SSHLSAT/G_USHLSAT shift instructions: // %t1 = SHIFT %base, G_CONSTANT imm1 @@ -1838,7 +1840,7 @@ bool CombinerHelper::matchShiftImmedChain(MachineInstr &MI, } void CombinerHelper::applyShiftImmedChain(MachineInstr &MI, - RegisterImmPair &MatchInfo) { + RegisterImmPair &MatchInfo) const { unsigned Opcode = MI.getOpcode(); assert((Opcode == TargetOpcode::G_SHL || Opcode == TargetOpcode::G_ASHR || Opcode == TargetOpcode::G_LSHR || Opcode == TargetOpcode::G_SSHLSAT || @@ -1869,8 +1871,8 @@ void CombinerHelper::applyShiftImmedChain(MachineInstr &MI, Observer.changedInstr(MI); } -bool CombinerHelper::matchShiftOfShiftedLogic(MachineInstr &MI, - ShiftOfShiftedLogic &MatchInfo) { +bool CombinerHelper::matchShiftOfShiftedLogic( + MachineInstr &MI, ShiftOfShiftedLogic &MatchInfo) const { // We're trying to match the following pattern with any of // G_SHL/G_ASHR/G_LSHR/G_USHLSAT/G_SSHLSAT shift instructions in combination // with any of G_AND/G_OR/G_XOR logic instructions. @@ -1950,8 +1952,8 @@ bool CombinerHelper::matchShiftOfShiftedLogic(MachineInstr &MI, return true; } -void CombinerHelper::applyShiftOfShiftedLogic(MachineInstr &MI, - ShiftOfShiftedLogic &MatchInfo) { +void CombinerHelper::applyShiftOfShiftedLogic( + MachineInstr &MI, ShiftOfShiftedLogic &MatchInfo) const { unsigned Opcode = MI.getOpcode(); assert((Opcode == TargetOpcode::G_SHL || Opcode == TargetOpcode::G_ASHR || Opcode == TargetOpcode::G_LSHR || Opcode == TargetOpcode::G_USHLSAT || @@ -1989,7 +1991,8 @@ void CombinerHelper::applyShiftOfShiftedLogic(MachineInstr &MI, MI.eraseFromParent(); } -bool CombinerHelper::matchCommuteShift(MachineInstr &MI, BuildFnTy &MatchInfo) { +bool CombinerHelper::matchCommuteShift(MachineInstr &MI, + BuildFnTy &MatchInfo) const { assert(MI.getOpcode() == TargetOpcode::G_SHL && "Expected G_SHL"); // Combine (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2) // Combine (shl (or x, c1), c2) -> (or (shl x, c2), c1 << c2) @@ -2025,7 +2028,7 @@ bool CombinerHelper::matchCommuteShift(MachineInstr &MI, BuildFnTy &MatchInfo) { } bool CombinerHelper::matchCombineMulToShl(MachineInstr &MI, - unsigned &ShiftVal) { + unsigned &ShiftVal) const { assert(MI.getOpcode() == TargetOpcode::G_MUL && "Expected a G_MUL"); auto MaybeImmVal = getIConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI); @@ -2037,7 +2040,7 @@ bool CombinerHelper::matchCombineMulToShl(MachineInstr &MI, } void CombinerHelper::applyCombineMulToShl(MachineInstr &MI, - unsigned &ShiftVal) { + unsigned &ShiftVal) const { assert(MI.getOpcode() == TargetOpcode::G_MUL && "Expected a G_MUL"); MachineIRBuilder MIB(MI); LLT ShiftTy = MRI.getType(MI.getOperand(0).getReg()); @@ -2051,7 +2054,7 @@ void CombinerHelper::applyCombineMulToShl(MachineInstr &MI, } bool CombinerHelper::matchCombineSubToAdd(MachineInstr &MI, - BuildFnTy &MatchInfo) { + BuildFnTy &MatchInfo) const { GSub &Sub = cast<GSub>(MI); LLT Ty = MRI.getType(Sub.getReg(0)); @@ -2077,7 +2080,7 @@ bool CombinerHelper::matchCombineSubToAdd(MachineInstr &MI, // shl ([sza]ext x), y => zext (shl x, y), if shift does not overflow source bool CombinerHelper::matchCombineShlOfExtend(MachineInstr &MI, - RegisterImmPair &MatchData) { + RegisterImmPair &MatchData) const { assert(MI.getOpcode() == TargetOpcode::G_SHL && KB); if (!getTargetLowering().isDesirableToPullExtFromShl(MI)) return false; @@ -2116,8 +2119,8 @@ bool CombinerHelper::matchCombineShlOfExtend(MachineInstr &MI, return MinLeadingZeros >= ShiftAmt && ShiftAmt < SrcTySize; } -void CombinerHelper::applyCombineShlOfExtend(MachineInstr &MI, - const RegisterImmPair &MatchData) { +void CombinerHelper::applyCombineShlOfExtend( + MachineInstr &MI, const RegisterImmPair &MatchData) const { Register ExtSrcReg = MatchData.Reg; int64_t ShiftAmtVal = MatchData.Imm; @@ -2130,7 +2133,7 @@ void CombinerHelper::applyCombineShlOfExtend(MachineInstr &MI, } bool CombinerHelper::matchCombineMergeUnmerge(MachineInstr &MI, - Register &MatchInfo) { + Register &MatchInfo) const { GMerge &Merge = cast<GMerge>(MI); SmallVector<Register, 16> MergedValues; for (unsigned I = 0; I < Merge.getNumSources(); ++I) @@ -2157,7 +2160,7 @@ static Register peekThroughBitcast(Register Reg, } bool CombinerHelper::matchCombineUnmergeMergeToPlainValues( - MachineInstr &MI, SmallVectorImpl<Register> &Operands) { + MachineInstr &MI, SmallVectorImpl<Register> &Operands) const { assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES && "Expected an unmerge"); auto &Unmerge = cast<GUnmerge>(MI); @@ -2181,7 +2184,7 @@ bool CombinerHelper::matchCombineUnmergeMergeToPlainValues( } void CombinerHelper::applyCombineUnmergeMergeToPlainValues( - MachineInstr &MI, SmallVectorImpl<Register> &Operands) { + MachineInstr &MI, SmallVectorImpl<Register> &Operands) const { assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES && "Expected an unmerge"); assert((MI.getNumOperands() - 1 == Operands.size()) && @@ -2211,8 +2214,8 @@ void CombinerHelper::applyCombineUnmergeMergeToPlainValues( MI.eraseFromParent(); } -bool CombinerHelper::matchCombineUnmergeConstant(MachineInstr &MI, - SmallVectorImpl<APInt> &Csts) { +bool CombinerHelper::matchCombineUnmergeConstant( + MachineInstr &MI, SmallVectorImpl<APInt> &Csts) const { unsigned SrcIdx = MI.getNumOperands() - 1; Register SrcReg = MI.getOperand(SrcIdx).getReg(); MachineInstr *SrcInstr = MRI.getVRegDef(SrcReg); @@ -2236,8 +2239,8 @@ bool CombinerHelper::matchCombineUnmergeConstant(MachineInstr &MI, return true; } -void CombinerHelper::applyCombineUnmergeConstant(MachineInstr &MI, - SmallVectorImpl<APInt> &Csts) { +void CombinerHelper::applyCombineUnmergeConstant( + MachineInstr &MI, SmallVectorImpl<APInt> &Csts) const { assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES && "Expected an unmerge"); assert((MI.getNumOperands() - 1 == Csts.size()) && @@ -2252,7 +2255,8 @@ void CombinerHelper::applyCombineUnmergeConstant(MachineInstr &MI, } bool CombinerHelper::matchCombineUnmergeUndef( - MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) { + MachineInstr &MI, + std::function<void(MachineIRBuilder &)> &MatchInfo) const { unsigned SrcIdx = MI.getNumOperands() - 1; Register SrcReg = MI.getOperand(SrcIdx).getReg(); MatchInfo = [&MI](MachineIRBuilder &B) { @@ -2265,7 +2269,8 @@ bool CombinerHelper::matchCombineUnmergeUndef( return isa<GImplicitDef>(MRI.getVRegDef(SrcReg)); } -bool CombinerHelper::matchCombineUnmergeWithDeadLanesToTrunc(MachineInstr &MI) { +bool CombinerHelper::matchCombineUnmergeWithDeadLanesToTrunc( + MachineInstr &MI) const { assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES && "Expected an unmerge"); if (MRI.getType(MI.getOperand(0).getReg()).isVector() || @@ -2279,14 +2284,15 @@ bool CombinerHelper::matchCombineUnmergeWithDeadLanesToTrunc(MachineInstr &MI) { return true; } -void CombinerHelper::applyCombineUnmergeWithDeadLanesToTrunc(MachineInstr &MI) { +void CombinerHelper::applyCombineUnmergeWithDeadLanesToTrunc( + MachineInstr &MI) const { Register SrcReg = MI.getOperand(MI.getNumDefs()).getReg(); Register Dst0Reg = MI.getOperand(0).getReg(); Builder.buildTrunc(Dst0Reg, SrcReg); MI.eraseFromParent(); } -bool CombinerHelper::matchCombineUnmergeZExtToZExt(MachineInstr &MI) { +bool CombinerHelper::matchCombineUnmergeZExtToZExt(MachineInstr &MI) const { assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES && "Expected an unmerge"); Register Dst0Reg = MI.getOperand(0).getReg(); @@ -2312,7 +2318,7 @@ bool CombinerHelper::matchCombineUnmergeZExtToZExt(MachineInstr &MI) { return ZExtSrcTy.getSizeInBits() <= Dst0Ty.getSizeInBits(); } -void CombinerHelper::applyCombineUnmergeZExtToZExt(MachineInstr &MI) { +void CombinerHelper::applyCombineUnmergeZExtToZExt(MachineInstr &MI) const { assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES && "Expected an unmerge"); @@ -2346,7 +2352,7 @@ void CombinerHelper::applyCombineUnmergeZExtToZExt(MachineInstr &MI) { bool CombinerHelper::matchCombineShiftToUnmerge(MachineInstr &MI, unsigned TargetShiftSize, - unsigned &ShiftVal) { + unsigned &ShiftVal) const { assert((MI.getOpcode() == TargetOpcode::G_SHL || MI.getOpcode() == TargetOpcode::G_LSHR || MI.getOpcode() == TargetOpcode::G_ASHR) && "Expected a shift"); @@ -2369,8 +2375,8 @@ bool CombinerHelper::matchCombineShiftToUnmerge(MachineInstr &MI, return ShiftVal >= Size / 2 && ShiftVal < Size; } -void CombinerHelper::applyCombineShiftToUnmerge(MachineInstr &MI, - const unsigned &ShiftVal) { +void CombinerHelper::applyCombineShiftToUnmerge( + MachineInstr &MI, const unsigned &ShiftVal) const { Register DstReg = MI.getOperand(0).getReg(); Register SrcReg = MI.getOperand(1).getReg(); LLT Ty = MRI.getType(SrcReg); @@ -2441,8 +2447,8 @@ void CombinerHelper::applyCombineShiftToUnmerge(MachineInstr &MI, MI.eraseFromParent(); } -bool CombinerHelper::tryCombineShiftToUnmerge(MachineInstr &MI, - unsigned TargetShiftAmount) { +bool CombinerHelper::tryCombineShiftToUnmerge( + MachineInstr &MI, unsigned TargetShiftAmount) const { unsigned ShiftAmt; if (matchCombineShiftToUnmerge(MI, TargetShiftAmount, ShiftAmt)) { applyCombineShiftToUnmerge(MI, ShiftAmt); @@ -2452,7 +2458,8 @@ bool CombinerHelper::tryCombineShiftToUnmerge(MachineInstr &MI, return false; } -bool CombinerHelper::matchCombineI2PToP2I(MachineInstr &MI, Register &Reg) { +bool CombinerHelper::matchCombineI2PToP2I(MachineInstr &MI, + Register &Reg) const { assert(MI.getOpcode() == TargetOpcode::G_INTTOPTR && "Expected a G_INTTOPTR"); Register DstReg = MI.getOperand(0).getReg(); LLT DstTy = MRI.getType(DstReg); @@ -2461,14 +2468,16 @@ bool CombinerHelper::matchCombineI2PToP2I(MachineInstr &MI, Register &Reg) { m_GPtrToInt(m_all_of(m_SpecificType(DstTy), m_Reg(Reg)))); } -void CombinerHelper::applyCombineI2PToP2I(MachineInstr &MI, Register &Reg) { +void CombinerHelper::applyCombineI2PToP2I(MachineInstr &MI, + Register &Reg) const { assert(MI.getOpcode() == TargetOpcode::G_INTTOPTR && "Expected a G_INTTOPTR"); Register DstReg = MI.getOperand(0).getReg(); Builder.buildCopy(DstReg, Reg); MI.eraseFromParent(); } -void CombinerHelper::applyCombineP2IToI2P(MachineInstr &MI, Register &Reg) { +void CombinerHelper::applyCombineP2IToI2P(MachineInstr &MI, + Register &Reg) const { assert(MI.getOpcode() == TargetOpcode::G_PTRTOINT && "Expected a G_PTRTOINT"); Register DstReg = MI.getOperand(0).getReg(); Builder.buildZExtOrTrunc(DstReg, Reg); @@ -2476,7 +2485,7 @@ void CombinerHelper::applyCombineP2IToI2P(MachineInstr &MI, Register &Reg) { } bool CombinerHelper::matchCombineAddP2IToPtrAdd( - MachineInstr &MI, std::pair<Register, bool> &PtrReg) { + MachineInstr &MI, std::pair<Register, bool> &PtrReg) const { assert(MI.getOpcode() == TargetOpcode::G_ADD); Register LHS = MI.getOperand(1).getReg(); Register RHS = MI.getOperand(2).getReg(); @@ -2501,7 +2510,7 @@ bool CombinerHelper::matchCombineAddP2IToPtrAdd( } void CombinerHelper::applyCombineAddP2IToPtrAdd( - MachineInstr &MI, std::pair<Register, bool> &PtrReg) { + MachineInstr &MI, std::pair<Register, bool> &PtrReg) const { Register Dst = MI.getOperand(0).getReg(); Register LHS = MI.getOperand(1).getReg(); Register RHS = MI.getOperand(2).getReg(); @@ -2519,7 +2528,7 @@ void CombinerHelper::applyCombineAddP2IToPtrAdd( } bool CombinerHelper::matchCombineConstPtrAddToI2P(MachineInstr &MI, - APInt &NewCst) { + APInt &NewCst) const { auto &PtrAdd = cast<GPtrAdd>(MI); Register LHS = PtrAdd.getBaseReg(); Register RHS = PtrAdd.getOffsetReg(); @@ -2540,7 +2549,7 @@ bool CombinerHelper::matchCombineConstPtrAddToI2P(MachineInstr &MI, } void CombinerHelper::applyCombineConstPtrAddToI2P(MachineInstr &MI, - APInt &NewCst) { + APInt &NewCst) const { auto &PtrAdd = cast<GPtrAdd>(MI); Register Dst = PtrAdd.getReg(0); @@ -2548,7 +2557,8 @@ void CombinerHelper::applyCombineConstPtrAddToI2P(MachineInstr &MI, PtrAdd.eraseFromParent(); } -bool CombinerHelper::matchCombineAnyExtTrunc(MachineInstr &MI, Register &Reg) { +bool CombinerHelper::matchCombineAnyExtTrunc(MachineInstr &MI, + Register &Reg) const { assert(MI.getOpcode() == TargetOpcode::G_ANYEXT && "Expected a G_ANYEXT"); Register DstReg = MI.getOperand(0).getReg(); Register SrcReg = MI.getOperand(1).getReg(); @@ -2560,7 +2570,8 @@ bool CombinerHelper::matchCombineAnyExtTrunc(MachineInstr &MI, Register &Reg) { m_GTrunc(m_all_of(m_Reg(Reg), m_SpecificType(DstTy)))); } -bool CombinerHelper::matchCombineZextTrunc(MachineInstr &MI, Register &Reg) { +bool CombinerHelper::matchCombineZextTrunc(MachineInstr &MI, + Register &Reg) const { assert(MI.getOpcode() == TargetOpcode::G_ZEXT && "Expected a G_ZEXT"); Register DstReg = MI.getOperand(0).getReg(); Register SrcReg = MI.getOperand(1).getReg(); @@ -2592,7 +2603,7 @@ static LLT getMidVTForTruncRightShiftCombine(LLT ShiftTy, LLT TruncTy) { } bool CombinerHelper::matchCombineTruncOfShift( - MachineInstr &MI, std::pair<MachineInstr *, LLT> &MatchInfo) { + MachineInstr &MI, std::pair<MachineInstr *, LLT> &MatchInfo) const { assert(MI.getOpcode() == TargetOpcode::G_TRUNC && "Expected a G_TRUNC"); Register DstReg = MI.getOperand(0).getReg(); Register SrcReg = MI.getOperand(1).getReg(); @@ -2653,7 +2664,7 @@ bool CombinerHelper::matchCombineTruncOfShift( } void CombinerHelper::applyCombineTruncOfShift( - MachineInstr &MI, std::pair<MachineInstr *, LLT> &MatchInfo) { + MachineInstr &MI, std::pair<MachineInstr *, LLT> &MatchInfo) const { MachineInstr *ShiftMI = MatchInfo.first; LLT NewShiftTy = MatchInfo.second; @@ -2677,39 +2688,40 @@ void CombinerHelper::applyCombineTruncOfShift( eraseInst(MI); } -bool CombinerHelper::matchAnyExplicitUseIsUndef(MachineInstr &MI) { +bool CombinerHelper::matchAnyExplicitUseIsUndef(MachineInstr &MI) const { return any_of(MI.explicit_uses(), [this](const MachineOperand &MO) { return MO.isReg() && getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, MO.getReg(), MRI); }); } -bool CombinerHelper::matchAllExplicitUsesAreUndef(MachineInstr &MI) { +bool CombinerHelper::matchAllExplicitUsesAreUndef(MachineInstr &MI) const { return all_of(MI.explicit_uses(), [this](const MachineOperand &MO) { return !MO.isReg() || getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, MO.getReg(), MRI); }); } -bool CombinerHelper::matchUndefShuffleVectorMask(MachineInstr &MI) { +bool CombinerHelper::matchUndefShuffleVectorMask(MachineInstr &MI) const { assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR); ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask(); return all_of(Mask, [](int Elt) { return Elt < 0; }); } -bool CombinerHelper::matchUndefStore(MachineInstr &MI) { +bool CombinerHelper::matchUndefStore(MachineInstr &MI) const { assert(MI.getOpcode() == TargetOpcode::G_STORE); return getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, MI.getOperand(0).getReg(), MRI); } -bool CombinerHelper::matchUndefSelectCmp(MachineInstr &MI) { +bool CombinerHelper::matchUndefSelectCmp(MachineInstr &MI) const { assert(MI.getOpcode() == TargetOpcode::G_SELECT); return getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, MI.getOperand(1).getReg(), MRI); } -bool CombinerHelper::matchInsertExtractVecEltOutOfBounds(MachineInstr &MI) { +bool CombinerHelper::matchInsertExtractVecEltOutOfBounds( + MachineInstr &MI) const { assert((MI.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT || MI.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT) && "Expected an insert/extract element op"); @@ -2725,7 +2737,8 @@ bool CombinerHelper::matchInsertExtractVecEltOutOfBounds(MachineInstr &MI) { return Idx->getZExtValue() >= VecTy.getNumElements(); } -bool CombinerHelper::matchConstantSelectCmp(MachineInstr &MI, unsigned &OpIdx) { +bool CombinerHelper::matchConstantSelectCmp(MachineInstr &MI, + unsigned &OpIdx) const { GSelect &SelMI = cast<GSelect>(MI); auto Cst = isConstantOrConstantSplatVector(*MRI.getVRegDef(SelMI.getCondReg()), MRI); @@ -2735,10 +2748,10 @@ bool CombinerHelper::matchConstantSelectCmp(MachineInstr &MI, unsigned &OpIdx) { return true; } -void CombinerHelper::eraseInst(MachineInstr &MI) { MI.eraseFromParent(); } +void CombinerHelper::eraseInst(MachineInstr &MI) const { MI.eraseFromParent(); } bool CombinerHelper::matchEqualDefs(const MachineOperand &MOP1, - const MachineOperand &MOP2) { + const MachineOperand &MOP2) const { if (!MOP1.isReg() || !MOP2.isReg()) return false; auto InstAndDef1 = getDefSrcRegIgnoringCopies(MOP1.getReg(), MRI); @@ -2834,7 +2847,8 @@ bool CombinerHelper::matchEqualDefs(const MachineOperand &MOP1, return false; } -bool CombinerHelper::matchConstantOp(const MachineOperand &MOP, int64_t C) { +bool CombinerHelper::matchConstantOp(const MachineOperand &MOP, + int64_t C) const { if (!MOP.isReg()) return false; auto *MI = MRI.getVRegDef(MOP.getReg()); @@ -2843,7 +2857,8 @@ bool CombinerHelper::matchConstantOp(const MachineOperand &MOP, int64_t C) { MaybeCst->getSExtValue() == C; } -bool CombinerHelper::matchConstantFPOp(const MachineOperand &MOP, double C) { +bool CombinerHelper::matchConstantFPOp(const MachineOperand &MOP, + double C) const { if (!MOP.isReg()) return false; std::optional<FPValueAndVReg> MaybeCst; @@ -2854,7 +2869,7 @@ bool CombinerHelper::matchConstantFPOp(const MachineOperand &MOP, double C) { } void CombinerHelper::replaceSingleDefInstWithOperand(MachineInstr &MI, - unsigned OpIdx) { + unsigned OpIdx) const { assert(MI.getNumExplicitDefs() == 1 && "Expected one explicit def?"); Register OldReg = MI.getOperand(0).getReg(); Register Replacement = MI.getOperand(OpIdx).getReg(); @@ -2864,7 +2879,7 @@ void CombinerHelper::replaceSingleDefInstWithOperand(MachineInstr &MI, } void CombinerHelper::replaceSingleDefInstWithReg(MachineInstr &MI, - Register Replacement) { + Register Replacement) const { assert(MI.getNumExplicitDefs() == 1 && "Expected one explicit def?"); Register OldReg = MI.getOperand(0).getReg(); assert(canReplaceReg(OldReg, Replacement, MRI) && "Cannot replace register?"); @@ -2873,7 +2888,7 @@ void CombinerHelper::replaceSingleDefInstWithReg(MachineInstr &MI, } bool CombinerHelper::matchConstantLargerBitWidth(MachineInstr &MI, - unsigned ConstIdx) { + unsigned ConstIdx) const { Register ConstReg = MI.getOperand(ConstIdx).getReg(); LLT DstTy = MRI.getType(MI.getOperand(0).getReg()); @@ -2886,7 +2901,7 @@ bool CombinerHelper::matchConstantLargerBitWidth(MachineInstr &MI, return (VRegAndVal->Value.uge(DstTy.getSizeInBits())); } -void CombinerHelper::applyFunnelShiftConstantModulo(MachineInstr &MI) { +void CombinerHelper::applyFunnelShiftConstantModulo(MachineInstr &MI) const { assert((MI.getOpcode() == TargetOpcode::G_FSHL || MI.getOpcode() == TargetOpcode::G_FSHR) && "This is not a funnel shift operation"); @@ -2910,7 +2925,7 @@ void CombinerHelper::applyFunnelShiftConstantModulo(MachineInstr &MI) { MI.eraseFromParent(); } -bool CombinerHelper::matchSelectSameVal(MachineInstr &MI) { +bool CombinerHelper::matchSelectSameVal(MachineInstr &MI) const { assert(MI.getOpcode() == TargetOpcode::G_SELECT); // Match (cond ? x : x) return matchEqualDefs(MI.getOperand(2), MI.getOperand(3)) && @@ -2918,63 +2933,67 @@ bool CombinerHelper::matchSelectSameVal(MachineInstr &MI) { MRI); } -bool CombinerHelper::matchBinOpSameVal(MachineInstr &MI) { +bool CombinerHelper::matchBinOpSameVal(MachineInstr &MI) const { return matchEqualDefs(MI.getOperand(1), MI.getOperand(2)) && canReplaceReg(MI.getOperand(0).getReg(), MI.getOperand(1).getReg(), MRI); } -bool CombinerHelper::matchOperandIsZero(MachineInstr &MI, unsigned OpIdx) { +bool CombinerHelper::matchOperandIsZero(MachineInstr &MI, + unsigned OpIdx) const { return matchConstantOp(MI.getOperand(OpIdx), 0) && canReplaceReg(MI.getOperand(0).getReg(), MI.getOperand(OpIdx).getReg(), MRI); } -bool CombinerHelper::matchOperandIsUndef(MachineInstr &MI, unsigned OpIdx) { +bool CombinerHelper::matchOperandIsUndef(MachineInstr &MI, + unsigned OpIdx) const { MachineOperand &MO = MI.getOperand(OpIdx); return MO.isReg() && getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, MO.getReg(), MRI); } bool CombinerHelper::matchOperandIsKnownToBeAPowerOfTwo(MachineInstr &MI, - unsigned OpIdx) { + unsigned OpIdx) const { MachineOperand &MO = MI.getOperand(OpIdx); return isKnownToBeAPowerOfTwo(MO.getReg(), MRI, KB); } -void CombinerHelper::replaceInstWithFConstant(MachineInstr &MI, double C) { +void CombinerHelper::replaceInstWithFConstant(MachineInstr &MI, + double C) const { assert(MI.getNumDefs() == 1 && "Expected only one def?"); Builder.buildFConstant(MI.getOperand(0), C); MI.eraseFromParent(); } -void CombinerHelper::replaceInstWithConstant(MachineInstr &MI, int64_t C) { +void CombinerHelper::replaceInstWithConstant(MachineInstr &MI, + int64_t C) const { assert(MI.getNumDefs() == 1 && "Expected only one def?"); Builder.buildConstant(MI.getOperand(0), C); MI.eraseFromParent(); } -void CombinerHelper::replaceInstWithConstant(MachineInstr &MI, APInt C) { +void CombinerHelper::replaceInstWithConstant(MachineInstr &MI, APInt C) const { assert(MI.getNumDefs() == 1 && "Expected only one def?"); Builder.buildConstant(MI.getOperand(0), C); MI.eraseFromParent(); } void CombinerHelper::replaceInstWithFConstant(MachineInstr &MI, - ConstantFP *CFP) { + ConstantFP *CFP) const { assert(MI.getNumDefs() == 1 && "Expected only one def?"); Builder.buildFConstant(MI.getOperand(0), CFP->getValueAPF()); MI.eraseFromParent(); } -void CombinerHelper::replaceInstWithUndef(MachineInstr &MI) { +void CombinerHelper::replaceInstWithUndef(MachineInstr &MI) const { assert(MI.getNumDefs() == 1 && "Expected only one def?"); Builder.buildUndef(MI.getOperand(0)); MI.eraseFromParent(); } bool CombinerHelper::matchSimplifyAddToSub( - MachineInstr &MI, std::tuple<Register, Register> &MatchInfo) { + MachineInstr &MI, std::tuple<Register, Register> &MatchInfo) const { Register LHS = MI.getOperand(1).getReg(); Register RHS = MI.getOperand(2).getReg(); Register &NewLHS = std::get<0>(MatchInfo); @@ -2994,7 +3013,7 @@ bool CombinerHelper::matchSimplifyAddToSub( } bool CombinerHelper::matchCombineInsertVecElts( - MachineInstr &MI, SmallVectorImpl<Register> &MatchInfo) { + MachineInstr &MI, SmallVectorImpl<Register> &MatchInfo) const { assert(MI.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT && "Invalid opcode"); Register DstReg = MI.getOperand(0).getReg(); @@ -3041,7 +3060,7 @@ bool CombinerHelper::matchCombineInsertVecElts( } void CombinerHelper::applyCombineInsertVecElts( - MachineInstr &MI, SmallVectorImpl<Register> &MatchInfo) { + MachineInstr &MI, SmallVectorImpl<Register> &MatchInfo) const { Register UndefReg; auto GetUndef = [&]() { if (UndefReg) @@ -3059,7 +3078,7 @@ void CombinerHelper::applyCombineInsertVecElts( } void CombinerHelper::applySimplifyAddToSub( - MachineInstr &MI, std::tuple<Register, Register> &MatchInfo) { + MachineInstr &MI, std::tuple<Register, Register> &MatchInfo) const { Register SubLHS, SubRHS; std::tie(SubLHS, SubRHS) = MatchInfo; Builder.buildSub(MI.getOperand(0).getReg(), SubLHS, SubRHS); @@ -3067,7 +3086,7 @@ void CombinerHelper::applySimplifyAddToSub( } bool CombinerHelper::matchHoistLogicOpWithSameOpcodeHands( - MachineInstr &MI, InstructionStepsMatchInfo &MatchInfo) { + MachineInstr &MI, InstructionStepsMatchInfo &MatchInfo) const { // Matches: logic (hand x, ...), (hand y, ...) -> hand (logic x, y), ... // // Creates the new hand + logic instruction (but does not insert them.) @@ -3095,7 +3114,9 @@ bool CombinerHelper::matchHoistLogicOpWithSameOpcodeHands( unsigned HandOpcode = LeftHandInst->getOpcode(); if (HandOpcode != RightHandInst->getOpcode()) return false; - if (!LeftHandInst->getOperand(1).isReg() || + if (LeftHandInst->getNumOperands() < 2 || + !LeftHandInst->getOperand(1).isReg() || + RightHandInst->getNumOperands() < 2 || !RightHandInst->getOperand(1).isReg()) return false; @@ -3122,7 +3143,6 @@ bool CombinerHelper::matchHoistLogicOpWithSameOpcodeHands( case TargetOpcode::G_TRUNC: { // Match: logic (trunc X), (trunc Y) -> trunc (logic X, Y) const MachineFunction *MF = MI.getMF(); - const DataLayout &DL = MF->getDataLayout(); LLVMContext &Ctx = MF->getFunction().getContext(); LLT DstTy = MRI.getType(Dst); @@ -3130,8 +3150,7 @@ bool CombinerHelper::matchHoistLogicOpWithSameOpcodeHands( // Be extra careful sinking truncate. If it's free, there's no benefit in // widening a binop. - if (TLI.isZExtFree(DstTy, XTy, DL, Ctx) && - TLI.isTruncateFree(XTy, DstTy, DL, Ctx)) + if (TLI.isZExtFree(DstTy, XTy, Ctx) && TLI.isTruncateFree(XTy, DstTy, Ctx)) return false; break; } @@ -3175,7 +3194,7 @@ bool CombinerHelper::matchHoistLogicOpWithSameOpcodeHands( } void CombinerHelper::applyBuildInstructionSteps( - MachineInstr &MI, InstructionStepsMatchInfo &MatchInfo) { + MachineInstr &MI, InstructionStepsMatchInfo &MatchInfo) const { assert(MatchInfo.InstrsToBuild.size() && "Expected at least one instr to build?"); for (auto &InstrToBuild : MatchInfo.InstrsToBuild) { @@ -3189,7 +3208,7 @@ void CombinerHelper::applyBuildInstructionSteps( } bool CombinerHelper::matchAshrShlToSextInreg( - MachineInstr &MI, std::tuple<Register, int64_t> &MatchInfo) { + MachineInstr &MI, std::tuple<Register, int64_t> &MatchInfo) const { assert(MI.getOpcode() == TargetOpcode::G_ASHR); int64_t ShlCst, AshrCst; Register Src; @@ -3207,7 +3226,7 @@ bool CombinerHelper::matchAshrShlToSextInreg( } void CombinerHelper::applyAshShlToSextInreg( - MachineInstr &MI, std::tuple<Register, int64_t> &MatchInfo) { + MachineInstr &MI, std::tuple<Register, int64_t> &MatchInfo) const { assert(MI.getOpcode() == TargetOpcode::G_ASHR); Register Src; int64_t ShiftAmt; @@ -3219,7 +3238,8 @@ void CombinerHelper::applyAshShlToSextInreg( /// and(and(x, C1), C2) -> C1&C2 ? and(x, C1&C2) : 0 bool CombinerHelper::matchOverlappingAnd( - MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) { + MachineInstr &MI, + std::function<void(MachineIRBuilder &)> &MatchInfo) const { assert(MI.getOpcode() == TargetOpcode::G_AND); Register Dst = MI.getOperand(0).getReg(); @@ -3245,7 +3265,7 @@ bool CombinerHelper::matchOverlappingAnd( } bool CombinerHelper::matchRedundantAnd(MachineInstr &MI, - Register &Replacement) { + Register &Replacement) const { // Given // // %y:_(sN) = G_SOMETHING @@ -3300,7 +3320,8 @@ bool CombinerHelper::matchRedundantAnd(MachineInstr &MI, return false; } -bool CombinerHelper::matchRedundantOr(MachineInstr &MI, Register &Replacement) { +bool CombinerHelper::matchRedundantOr(MachineInstr &MI, + Register &Replacement) const { // Given // // %y:_(sN) = G_SOMETHING @@ -3341,7 +3362,7 @@ bool CombinerHelper::matchRedundantOr(MachineInstr &MI, Register &Replacement) { return false; } -bool CombinerHelper::matchRedundantSExtInReg(MachineInstr &MI) { +bool CombinerHelper::matchRedundantSExtInReg(MachineInstr &MI) const { // If the input is already sign extended, just drop the extension. Register Src = MI.getOperand(1).getReg(); unsigned ExtBits = MI.getOperand(2).getImm(); @@ -3373,7 +3394,7 @@ static bool isConstValidTrue(const TargetLowering &TLI, unsigned ScalarSizeBits, // // Only matches sources made up of G_TRUNCs followed by G_IMPLICIT_DEFs bool CombinerHelper::matchUseVectorTruncate(MachineInstr &MI, - Register &MatchInfo) { + Register &MatchInfo) const { auto BuildMI = cast<GBuildVector>(&MI); unsigned NumOperands = BuildMI->getNumSources(); LLT DstTy = MRI.getType(BuildMI->getReg(0)); @@ -3436,7 +3457,7 @@ bool CombinerHelper::matchUseVectorTruncate(MachineInstr &MI, } void CombinerHelper::applyUseVectorTruncate(MachineInstr &MI, - Register &MatchInfo) { + Register &MatchInfo) const { Register MidReg; auto BuildMI = cast<GBuildVector>(&MI); Register DstReg = BuildMI->getReg(0); @@ -3462,8 +3483,8 @@ void CombinerHelper::applyUseVectorTruncate(MachineInstr &MI, MI.eraseFromParent(); } -bool CombinerHelper::matchNotCmp(MachineInstr &MI, - SmallVectorImpl<Register> &RegsToNegate) { +bool CombinerHelper::matchNotCmp( + MachineInstr &MI, SmallVectorImpl<Register> &RegsToNegate) const { assert(MI.getOpcode() == TargetOpcode::G_XOR); LLT Ty = MRI.getType(MI.getOperand(0).getReg()); const auto &TLI = *Builder.getMF().getSubtarget().getTargetLowering(); @@ -3539,8 +3560,8 @@ bool CombinerHelper::matchNotCmp(MachineInstr &MI, return true; } -void CombinerHelper::applyNotCmp(MachineInstr &MI, - SmallVectorImpl<Register> &RegsToNegate) { +void CombinerHelper::applyNotCmp( + MachineInstr &MI, SmallVectorImpl<Register> &RegsToNegate) const { for (Register Reg : RegsToNegate) { MachineInstr *Def = MRI.getVRegDef(Reg); Observer.changingInstr(*Def); @@ -3572,7 +3593,7 @@ void CombinerHelper::applyNotCmp(MachineInstr &MI, } bool CombinerHelper::matchXorOfAndWithSameReg( - MachineInstr &MI, std::pair<Register, Register> &MatchInfo) { + MachineInstr &MI, std::pair<Register, Register> &MatchInfo) const { // Match (xor (and x, y), y) (or any of its commuted cases) assert(MI.getOpcode() == TargetOpcode::G_XOR); Register &X = MatchInfo.first; @@ -3603,7 +3624,7 @@ bool CombinerHelper::matchXorOfAndWithSameReg( } void CombinerHelper::applyXorOfAndWithSameReg( - MachineInstr &MI, std::pair<Register, Register> &MatchInfo) { + MachineInstr &MI, std::pair<Register, Register> &MatchInfo) const { // Fold (xor (and x, y), y) -> (and (not x), y) Register X, Y; std::tie(X, Y) = MatchInfo; @@ -3615,7 +3636,7 @@ void CombinerHelper::applyXorOfAndWithSameReg( Observer.changedInstr(MI); } -bool CombinerHelper::matchPtrAddZero(MachineInstr &MI) { +bool CombinerHelper::matchPtrAddZero(MachineInstr &MI) const { auto &PtrAdd = cast<GPtrAdd>(MI); Register DstReg = PtrAdd.getReg(0); LLT Ty = MRI.getType(DstReg); @@ -3634,14 +3655,14 @@ bool CombinerHelper::matchPtrAddZero(MachineInstr &MI) { return isBuildVectorAllZeros(*VecMI, MRI); } -void CombinerHelper::applyPtrAddZero(MachineInstr &MI) { +void CombinerHelper::applyPtrAddZero(MachineInstr &MI) const { auto &PtrAdd = cast<GPtrAdd>(MI); Builder.buildIntToPtr(PtrAdd.getReg(0), PtrAdd.getOffsetReg()); PtrAdd.eraseFromParent(); } /// The second source operand is known to be a power of 2. -void CombinerHelper::applySimplifyURemByPow2(MachineInstr &MI) { +void CombinerHelper::applySimplifyURemByPow2(MachineInstr &MI) const { Register DstReg = MI.getOperand(0).getReg(); Register Src0 = MI.getOperand(1).getReg(); Register Pow2Src1 = MI.getOperand(2).getReg(); @@ -3655,7 +3676,7 @@ void CombinerHelper::applySimplifyURemByPow2(MachineInstr &MI) { } bool CombinerHelper::matchFoldBinOpIntoSelect(MachineInstr &MI, - unsigned &SelectOpNo) { + unsigned &SelectOpNo) const { Register LHS = MI.getOperand(1).getReg(); Register RHS = MI.getOperand(2).getReg(); @@ -3708,8 +3729,8 @@ bool CombinerHelper::matchFoldBinOpIntoSelect(MachineInstr &MI, /// \p SelectOperand is the operand in binary operator \p MI that is the select /// to fold. -void CombinerHelper::applyFoldBinOpIntoSelect(MachineInstr &MI, - const unsigned &SelectOperand) { +void CombinerHelper::applyFoldBinOpIntoSelect( + MachineInstr &MI, const unsigned &SelectOperand) const { Register Dst = MI.getOperand(0).getReg(); Register LHS = MI.getOperand(1).getReg(); Register RHS = MI.getOperand(2).getReg(); @@ -3845,7 +3866,8 @@ matchLoadAndBytePosition(Register Reg, unsigned MemSizeInBits, std::optional<std::tuple<GZExtLoad *, int64_t, GZExtLoad *>> CombinerHelper::findLoadOffsetsForLoadOrCombine( SmallDenseMap<int64_t, int64_t, 8> &MemOffset2Idx, - const SmallVector<Register, 8> &RegsToVisit, const unsigned MemSizeInBits) { + const SmallVector<Register, 8> &RegsToVisit, + const unsigned MemSizeInBits) const { // Each load found for the pattern. There should be one for each RegsToVisit. SmallSetVector<const MachineInstr *, 8> Loads; @@ -3977,7 +3999,8 @@ CombinerHelper::findLoadOffsetsForLoadOrCombine( } bool CombinerHelper::matchLoadOrCombine( - MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) { + MachineInstr &MI, + std::function<void(MachineIRBuilder &)> &MatchInfo) const { assert(MI.getOpcode() == TargetOpcode::G_OR); MachineFunction &MF = *MI.getMF(); // Assuming a little-endian target, transform: @@ -4090,7 +4113,7 @@ bool CombinerHelper::matchLoadOrCombine( } bool CombinerHelper::matchExtendThroughPhis(MachineInstr &MI, - MachineInstr *&ExtMI) { + MachineInstr *&ExtMI) const { auto &PHI = cast<GPhi>(MI); Register DstReg = PHI.getReg(0); @@ -4144,7 +4167,7 @@ bool CombinerHelper::matchExtendThroughPhis(MachineInstr &MI, } void CombinerHelper::applyExtendThroughPhis(MachineInstr &MI, - MachineInstr *&ExtMI) { + MachineInstr *&ExtMI) const { auto &PHI = cast<GPhi>(MI); Register DstReg = ExtMI->getOperand(0).getReg(); LLT ExtTy = MRI.getType(DstReg); @@ -4189,7 +4212,7 @@ void CombinerHelper::applyExtendThroughPhis(MachineInstr &MI, } bool CombinerHelper::matchExtractVecEltBuildVec(MachineInstr &MI, - Register &Reg) { + Register &Reg) const { assert(MI.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT); // If we have a constant index, look for a G_BUILD_VECTOR source // and find the source register that the index maps to. @@ -4225,7 +4248,7 @@ bool CombinerHelper::matchExtractVecEltBuildVec(MachineInstr &MI, } void CombinerHelper::applyExtractVecEltBuildVec(MachineInstr &MI, - Register &Reg) { + Register &Reg) const { // Check the type of the register, since it may have come from a // G_BUILD_VECTOR_TRUNC. LLT ScalarTy = MRI.getType(Reg); @@ -4243,7 +4266,7 @@ void CombinerHelper::applyExtractVecEltBuildVec(MachineInstr &MI, bool CombinerHelper::matchExtractAllEltsFromBuildVector( MachineInstr &MI, - SmallVectorImpl<std::pair<Register, MachineInstr *>> &SrcDstPairs) { + SmallVectorImpl<std::pair<Register, MachineInstr *>> &SrcDstPairs) const { assert(MI.getOpcode() == TargetOpcode::G_BUILD_VECTOR); // This combine tries to find build_vector's which have every source element // extracted using G_EXTRACT_VECTOR_ELT. This can happen when transforms like @@ -4285,7 +4308,7 @@ bool CombinerHelper::matchExtractAllEltsFromBuildVector( void CombinerHelper::applyExtractAllEltsFromBuildVector( MachineInstr &MI, - SmallVectorImpl<std::pair<Register, MachineInstr *>> &SrcDstPairs) { + SmallVectorImpl<std::pair<Register, MachineInstr *>> &SrcDstPairs) const { assert(MI.getOpcode() == TargetOpcode::G_BUILD_VECTOR); for (auto &Pair : SrcDstPairs) { auto *ExtMI = Pair.second; @@ -4296,18 +4319,20 @@ void CombinerHelper::applyExtractAllEltsFromBuildVector( } void CombinerHelper::applyBuildFn( - MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) { + MachineInstr &MI, + std::function<void(MachineIRBuilder &)> &MatchInfo) const { applyBuildFnNoErase(MI, MatchInfo); MI.eraseFromParent(); } void CombinerHelper::applyBuildFnNoErase( - MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) { + MachineInstr &MI, + std::function<void(MachineIRBuilder &)> &MatchInfo) const { MatchInfo(Builder); } bool CombinerHelper::matchOrShiftToFunnelShift(MachineInstr &MI, - BuildFnTy &MatchInfo) { + BuildFnTy &MatchInfo) const { assert(MI.getOpcode() == TargetOpcode::G_OR); Register Dst = MI.getOperand(0).getReg(); @@ -4360,7 +4385,7 @@ bool CombinerHelper::matchOrShiftToFunnelShift(MachineInstr &MI, } /// Match an FSHL or FSHR that can be combined to a ROTR or ROTL rotate. -bool CombinerHelper::matchFunnelShiftToRotate(MachineInstr &MI) { +bool CombinerHelper::matchFunnelShiftToRotate(MachineInstr &MI) const { unsigned Opc = MI.getOpcode(); assert(Opc == TargetOpcode::G_FSHL || Opc == TargetOpcode::G_FSHR); Register X = MI.getOperand(1).getReg(); @@ -4372,7 +4397,7 @@ bool CombinerHelper::matchFunnelShiftToRotate(MachineInstr &MI) { return isLegalOrBeforeLegalizer({RotateOpc, {MRI.getType(X), MRI.getType(Y)}}); } -void CombinerHelper::applyFunnelShiftToRotate(MachineInstr &MI) { +void CombinerHelper::applyFunnelShiftToRotate(MachineInstr &MI) const { unsigned Opc = MI.getOpcode(); assert(Opc == TargetOpcode::G_FSHL || Opc == TargetOpcode::G_FSHR); bool IsFSHL = Opc == TargetOpcode::G_FSHL; @@ -4384,7 +4409,7 @@ void CombinerHelper::applyFunnelShiftToRotate(MachineInstr &MI) { } // Fold (rot x, c) -> (rot x, c % BitSize) -bool CombinerHelper::matchRotateOutOfRange(MachineInstr &MI) { +bool CombinerHelper::matchRotateOutOfRange(MachineInstr &MI) const { assert(MI.getOpcode() == TargetOpcode::G_ROTL || MI.getOpcode() == TargetOpcode::G_ROTR); unsigned Bitsize = @@ -4399,7 +4424,7 @@ bool CombinerHelper::matchRotateOutOfRange(MachineInstr &MI) { return matchUnaryPredicate(MRI, AmtReg, MatchOutOfRange) && OutOfRange; } -void CombinerHelper::applyRotateOutOfRange(MachineInstr &MI) { +void CombinerHelper::applyRotateOutOfRange(MachineInstr &MI) const { assert(MI.getOpcode() == TargetOpcode::G_ROTL || MI.getOpcode() == TargetOpcode::G_ROTR); unsigned Bitsize = @@ -4414,7 +4439,7 @@ void CombinerHelper::applyRotateOutOfRange(MachineInstr &MI) { } bool CombinerHelper::matchICmpToTrueFalseKnownBits(MachineInstr &MI, - int64_t &MatchInfo) { + int64_t &MatchInfo) const { assert(MI.getOpcode() == TargetOpcode::G_ICMP); auto Pred = static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate()); @@ -4458,7 +4483,8 @@ bool CombinerHelper::matchICmpToTrueFalseKnownBits(MachineInstr &MI, } bool CombinerHelper::matchICmpToLHSKnownBits( - MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) { + MachineInstr &MI, + std::function<void(MachineIRBuilder &)> &MatchInfo) const { assert(MI.getOpcode() == TargetOpcode::G_ICMP); // Given: // @@ -4501,7 +4527,8 @@ bool CombinerHelper::matchICmpToLHSKnownBits( // Replace (and (or x, c1), c2) with (and x, c2) iff c1 & c2 == 0 bool CombinerHelper::matchAndOrDisjointMask( - MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) { + MachineInstr &MI, + std::function<void(MachineIRBuilder &)> &MatchInfo) const { assert(MI.getOpcode() == TargetOpcode::G_AND); // Ignore vector types to simplify matching the two constants. @@ -4536,7 +4563,8 @@ bool CombinerHelper::matchAndOrDisjointMask( /// Form a G_SBFX from a G_SEXT_INREG fed by a right shift. bool CombinerHelper::matchBitfieldExtractFromSExtInReg( - MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) { + MachineInstr &MI, + std::function<void(MachineIRBuilder &)> &MatchInfo) const { assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG); Register Dst = MI.getOperand(0).getReg(); Register Src = MI.getOperand(1).getReg(); @@ -4565,7 +4593,7 @@ bool CombinerHelper::matchBitfieldExtractFromSExtInReg( /// Form a G_UBFX from "(a srl b) & mask", where b and mask are constants. bool CombinerHelper::matchBitfieldExtractFromAnd(MachineInstr &MI, - BuildFnTy &MatchInfo) { + BuildFnTy &MatchInfo) const { GAnd *And = cast<GAnd>(&MI); Register Dst = And->getReg(0); LLT Ty = MRI.getType(Dst); @@ -4602,7 +4630,8 @@ bool CombinerHelper::matchBitfieldExtractFromAnd(MachineInstr &MI, } bool CombinerHelper::matchBitfieldExtractFromShr( - MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) { + MachineInstr &MI, + std::function<void(MachineIRBuilder &)> &MatchInfo) const { const unsigned Opcode = MI.getOpcode(); assert(Opcode == TargetOpcode::G_ASHR || Opcode == TargetOpcode::G_LSHR); @@ -4651,7 +4680,8 @@ bool CombinerHelper::matchBitfieldExtractFromShr( } bool CombinerHelper::matchBitfieldExtractFromShrAnd( - MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) { + MachineInstr &MI, + std::function<void(MachineIRBuilder &)> &MatchInfo) const { const unsigned Opcode = MI.getOpcode(); assert(Opcode == TargetOpcode::G_LSHR || Opcode == TargetOpcode::G_ASHR); @@ -4708,7 +4738,7 @@ bool CombinerHelper::matchBitfieldExtractFromShrAnd( } bool CombinerHelper::reassociationCanBreakAddressingModePattern( - MachineInstr &MI) { + MachineInstr &MI) const { auto &PtrAdd = cast<GPtrAdd>(MI); Register Src1Reg = PtrAdd.getBaseReg(); @@ -4774,7 +4804,7 @@ bool CombinerHelper::reassociationCanBreakAddressingModePattern( bool CombinerHelper::matchReassocConstantInnerRHS(GPtrAdd &MI, MachineInstr *RHS, - BuildFnTy &MatchInfo) { + BuildFnTy &MatchInfo) const { // G_PTR_ADD(BASE, G_ADD(X, C)) -> G_PTR_ADD(G_PTR_ADD(BASE, X), C) Register Src1Reg = MI.getOperand(1).getReg(); if (RHS->getOpcode() != TargetOpcode::G_ADD) @@ -4799,7 +4829,7 @@ bool CombinerHelper::matchReassocConstantInnerRHS(GPtrAdd &MI, bool CombinerHelper::matchReassocConstantInnerLHS(GPtrAdd &MI, MachineInstr *LHS, MachineInstr *RHS, - BuildFnTy &MatchInfo) { + BuildFnTy &MatchInfo) const { // G_PTR_ADD (G_PTR_ADD X, C), Y) -> (G_PTR_ADD (G_PTR_ADD(X, Y), C) // if and only if (G_PTR_ADD X, C) has one use. Register LHSBase; @@ -4827,10 +4857,9 @@ bool CombinerHelper::matchReassocConstantInnerLHS(GPtrAdd &MI, return !reassociationCanBreakAddressingModePattern(MI); } -bool CombinerHelper::matchReassocFoldConstantsInSubTree(GPtrAdd &MI, - MachineInstr *LHS, - MachineInstr *RHS, - BuildFnTy &MatchInfo) { +bool CombinerHelper::matchReassocFoldConstantsInSubTree( + GPtrAdd &MI, MachineInstr *LHS, MachineInstr *RHS, + BuildFnTy &MatchInfo) const { // G_PTR_ADD(G_PTR_ADD(BASE, C1), C2) -> G_PTR_ADD(BASE, C1+C2) auto *LHSPtrAdd = dyn_cast<GPtrAdd>(LHS); if (!LHSPtrAdd) @@ -4857,7 +4886,7 @@ bool CombinerHelper::matchReassocFoldConstantsInSubTree(GPtrAdd &MI, } bool CombinerHelper::matchReassocPtrAdd(MachineInstr &MI, - BuildFnTy &MatchInfo) { + BuildFnTy &MatchInfo) const { auto &PtrAdd = cast<GPtrAdd>(MI); // We're trying to match a few pointer computation patterns here for // re-association opportunities. @@ -4890,7 +4919,7 @@ bool CombinerHelper::matchReassocPtrAdd(MachineInstr &MI, } bool CombinerHelper::tryReassocBinOp(unsigned Opc, Register DstReg, Register OpLHS, Register OpRHS, - BuildFnTy &MatchInfo) { + BuildFnTy &MatchInfo) const { LLT OpRHSTy = MRI.getType(OpRHS); MachineInstr *OpLHSDef = MRI.getVRegDef(OpLHS); @@ -4930,7 +4959,7 @@ bool CombinerHelper::tryReassocBinOp(unsigned Opc, Register DstReg, } bool CombinerHelper::matchReassocCommBinOp(MachineInstr &MI, - BuildFnTy &MatchInfo) { + BuildFnTy &MatchInfo) const { // We don't check if the reassociation will break a legal addressing mode // here since pointer arithmetic is handled by G_PTR_ADD. unsigned Opc = MI.getOpcode(); @@ -4945,7 +4974,8 @@ bool CombinerHelper::matchReassocCommBinOp(MachineInstr &MI, return false; } -bool CombinerHelper::matchConstantFoldCastOp(MachineInstr &MI, APInt &MatchInfo) { +bool CombinerHelper::matchConstantFoldCastOp(MachineInstr &MI, + APInt &MatchInfo) const { LLT DstTy = MRI.getType(MI.getOperand(0).getReg()); Register SrcOp = MI.getOperand(1).getReg(); @@ -4957,7 +4987,8 @@ bool CombinerHelper::matchConstantFoldCastOp(MachineInstr &MI, APInt &MatchInfo) return false; } -bool CombinerHelper::matchConstantFoldBinOp(MachineInstr &MI, APInt &MatchInfo) { +bool CombinerHelper::matchConstantFoldBinOp(MachineInstr &MI, + APInt &MatchInfo) const { Register Op1 = MI.getOperand(1).getReg(); Register Op2 = MI.getOperand(2).getReg(); auto MaybeCst = ConstantFoldBinOp(MI.getOpcode(), Op1, Op2, MRI); @@ -4967,7 +4998,8 @@ bool CombinerHelper::matchConstantFoldBinOp(MachineInstr &MI, APInt &MatchInfo) return true; } -bool CombinerHelper::matchConstantFoldFPBinOp(MachineInstr &MI, ConstantFP* &MatchInfo) { +bool CombinerHelper::matchConstantFoldFPBinOp(MachineInstr &MI, + ConstantFP *&MatchInfo) const { Register Op1 = MI.getOperand(1).getReg(); Register Op2 = MI.getOperand(2).getReg(); auto MaybeCst = ConstantFoldFPBinOp(MI.getOpcode(), Op1, Op2, MRI); @@ -4979,7 +5011,7 @@ bool CombinerHelper::matchConstantFoldFPBinOp(MachineInstr &MI, ConstantFP* &Mat } bool CombinerHelper::matchConstantFoldFMA(MachineInstr &MI, - ConstantFP *&MatchInfo) { + ConstantFP *&MatchInfo) const { assert(MI.getOpcode() == TargetOpcode::G_FMA || MI.getOpcode() == TargetOpcode::G_FMAD); auto [_, Op1, Op2, Op3] = MI.getFirst4Regs(); @@ -5004,7 +5036,8 @@ bool CombinerHelper::matchConstantFoldFMA(MachineInstr &MI, } bool CombinerHelper::matchNarrowBinopFeedingAnd( - MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) { + MachineInstr &MI, + std::function<void(MachineIRBuilder &)> &MatchInfo) const { // Look for a binop feeding into an AND with a mask: // // %add = G_ADD %lhs, %rhs @@ -5072,9 +5105,8 @@ bool CombinerHelper::matchNarrowBinopFeedingAnd( auto &MF = *MI.getMF(); const auto &TLI = getTargetLowering(); LLVMContext &Ctx = MF.getFunction().getContext(); - auto &DL = MF.getDataLayout(); - if (!TLI.isTruncateFree(WideTy, NarrowTy, DL, Ctx) || - !TLI.isZExtFree(NarrowTy, WideTy, DL, Ctx)) + if (!TLI.isTruncateFree(WideTy, NarrowTy, Ctx) || + !TLI.isZExtFree(NarrowTy, WideTy, Ctx)) return false; if (!isLegalOrBeforeLegalizer({TargetOpcode::G_TRUNC, {NarrowTy, WideTy}}) || !isLegalOrBeforeLegalizer({TargetOpcode::G_ZEXT, {WideTy, NarrowTy}})) @@ -5094,7 +5126,8 @@ bool CombinerHelper::matchNarrowBinopFeedingAnd( return true; } -bool CombinerHelper::matchMulOBy2(MachineInstr &MI, BuildFnTy &MatchInfo) { +bool CombinerHelper::matchMulOBy2(MachineInstr &MI, + BuildFnTy &MatchInfo) const { unsigned Opc = MI.getOpcode(); assert(Opc == TargetOpcode::G_UMULO || Opc == TargetOpcode::G_SMULO); @@ -5112,7 +5145,8 @@ bool CombinerHelper::matchMulOBy2(MachineInstr &MI, BuildFnTy &MatchInfo) { return true; } -bool CombinerHelper::matchMulOBy0(MachineInstr &MI, BuildFnTy &MatchInfo) { +bool CombinerHelper::matchMulOBy0(MachineInstr &MI, + BuildFnTy &MatchInfo) const { // (G_*MULO x, 0) -> 0 + no carry out assert(MI.getOpcode() == TargetOpcode::G_UMULO || MI.getOpcode() == TargetOpcode::G_SMULO); @@ -5130,7 +5164,8 @@ bool CombinerHelper::matchMulOBy0(MachineInstr &MI, BuildFnTy &MatchInfo) { return true; } -bool CombinerHelper::matchAddEToAddO(MachineInstr &MI, BuildFnTy &MatchInfo) { +bool CombinerHelper::matchAddEToAddO(MachineInstr &MI, + BuildFnTy &MatchInfo) const { // (G_*ADDE x, y, 0) -> (G_*ADDO x, y) // (G_*SUBE x, y, 0) -> (G_*SUBO x, y) assert(MI.getOpcode() == TargetOpcode::G_UADDE || @@ -5164,7 +5199,7 @@ bool CombinerHelper::matchAddEToAddO(MachineInstr &MI, BuildFnTy &MatchInfo) { } bool CombinerHelper::matchSubAddSameReg(MachineInstr &MI, - BuildFnTy &MatchInfo) { + BuildFnTy &MatchInfo) const { assert(MI.getOpcode() == TargetOpcode::G_SUB); Register Dst = MI.getOperand(0).getReg(); // (x + y) - z -> x (if y == z) @@ -5207,7 +5242,7 @@ bool CombinerHelper::matchSubAddSameReg(MachineInstr &MI, return false; } -MachineInstr *CombinerHelper::buildUDivUsingMul(MachineInstr &MI) { +MachineInstr *CombinerHelper::buildUDivUsingMul(MachineInstr &MI) const { assert(MI.getOpcode() == TargetOpcode::G_UDIV); auto &UDiv = cast<GenericMachineInstr>(MI); Register Dst = UDiv.getReg(0); @@ -5368,7 +5403,7 @@ MachineInstr *CombinerHelper::buildUDivUsingMul(MachineInstr &MI) { return MIB.buildSelect(Ty, IsOne, LHS, Q); } -bool CombinerHelper::matchUDivByConst(MachineInstr &MI) { +bool CombinerHelper::matchUDivByConst(MachineInstr &MI) const { assert(MI.getOpcode() == TargetOpcode::G_UDIV); Register Dst = MI.getOperand(0).getReg(); Register RHS = MI.getOperand(2).getReg(); @@ -5378,8 +5413,7 @@ bool CombinerHelper::matchUDivByConst(MachineInstr &MI) { AttributeList Attr = MF.getFunction().getAttributes(); const auto &TLI = getTargetLowering(); LLVMContext &Ctx = MF.getFunction().getContext(); - auto &DL = MF.getDataLayout(); - if (TLI.isIntDivCheap(getApproximateEVTForLLT(DstTy, DL, Ctx), Attr)) + if (TLI.isIntDivCheap(getApproximateEVTForLLT(DstTy, Ctx), Attr)) return false; // Don't do this for minsize because the instruction sequence is usually @@ -5413,12 +5447,12 @@ bool CombinerHelper::matchUDivByConst(MachineInstr &MI) { MRI, RHS, [](const Constant *C) { return C && !C->isNullValue(); }); } -void CombinerHelper::applyUDivByConst(MachineInstr &MI) { +void CombinerHelper::applyUDivByConst(MachineInstr &MI) const { auto *NewMI = buildUDivUsingMul(MI); replaceSingleDefInstWithReg(MI, NewMI->getOperand(0).getReg()); } -bool CombinerHelper::matchSDivByConst(MachineInstr &MI) { +bool CombinerHelper::matchSDivByConst(MachineInstr &MI) const { assert(MI.getOpcode() == TargetOpcode::G_SDIV && "Expected SDIV"); Register Dst = MI.getOperand(0).getReg(); Register RHS = MI.getOperand(2).getReg(); @@ -5428,8 +5462,7 @@ bool CombinerHelper::matchSDivByConst(MachineInstr &MI) { AttributeList Attr = MF.getFunction().getAttributes(); const auto &TLI = getTargetLowering(); LLVMContext &Ctx = MF.getFunction().getContext(); - auto &DL = MF.getDataLayout(); - if (TLI.isIntDivCheap(getApproximateEVTForLLT(DstTy, DL, Ctx), Attr)) + if (TLI.isIntDivCheap(getApproximateEVTForLLT(DstTy, Ctx), Attr)) return false; // Don't do this for minsize because the instruction sequence is usually @@ -5447,12 +5480,12 @@ bool CombinerHelper::matchSDivByConst(MachineInstr &MI) { return false; } -void CombinerHelper::applySDivByConst(MachineInstr &MI) { +void CombinerHelper::applySDivByConst(MachineInstr &MI) const { auto *NewMI = buildSDivUsingMul(MI); replaceSingleDefInstWithReg(MI, NewMI->getOperand(0).getReg()); } -MachineInstr *CombinerHelper::buildSDivUsingMul(MachineInstr &MI) { +MachineInstr *CombinerHelper::buildSDivUsingMul(MachineInstr &MI) const { assert(MI.getOpcode() == TargetOpcode::G_SDIV && "Expected SDIV"); auto &SDiv = cast<GenericMachineInstr>(MI); Register Dst = SDiv.getReg(0); @@ -5516,7 +5549,7 @@ MachineInstr *CombinerHelper::buildSDivUsingMul(MachineInstr &MI) { return MIB.buildMul(Ty, Res, Factor); } -bool CombinerHelper::matchDivByPow2(MachineInstr &MI, bool IsSigned) { +bool CombinerHelper::matchDivByPow2(MachineInstr &MI, bool IsSigned) const { assert((MI.getOpcode() == TargetOpcode::G_SDIV || MI.getOpcode() == TargetOpcode::G_UDIV) && "Expected SDIV or UDIV"); @@ -5530,7 +5563,7 @@ bool CombinerHelper::matchDivByPow2(MachineInstr &MI, bool IsSigned) { return matchUnaryPredicate(MRI, RHS, MatchPow2, /*AllowUndefs=*/false); } -void CombinerHelper::applySDivByPow2(MachineInstr &MI) { +void CombinerHelper::applySDivByPow2(MachineInstr &MI) const { assert(MI.getOpcode() == TargetOpcode::G_SDIV && "Expected SDIV"); auto &SDiv = cast<GenericMachineInstr>(MI); Register Dst = SDiv.getReg(0); @@ -5589,7 +5622,7 @@ void CombinerHelper::applySDivByPow2(MachineInstr &MI) { MI.eraseFromParent(); } -void CombinerHelper::applyUDivByPow2(MachineInstr &MI) { +void CombinerHelper::applyUDivByPow2(MachineInstr &MI) const { assert(MI.getOpcode() == TargetOpcode::G_UDIV && "Expected UDIV"); auto &UDiv = cast<GenericMachineInstr>(MI); Register Dst = UDiv.getReg(0); @@ -5603,7 +5636,7 @@ void CombinerHelper::applyUDivByPow2(MachineInstr &MI) { MI.eraseFromParent(); } -bool CombinerHelper::matchUMulHToLShr(MachineInstr &MI) { +bool CombinerHelper::matchUMulHToLShr(MachineInstr &MI) const { assert(MI.getOpcode() == TargetOpcode::G_UMULH); Register RHS = MI.getOperand(2).getReg(); Register Dst = MI.getOperand(0).getReg(); @@ -5619,7 +5652,7 @@ bool CombinerHelper::matchUMulHToLShr(MachineInstr &MI) { return isLegalOrBeforeLegalizer({TargetOpcode::G_LSHR, {Ty, ShiftAmtTy}}); } -void CombinerHelper::applyUMulHToLShr(MachineInstr &MI) { +void CombinerHelper::applyUMulHToLShr(MachineInstr &MI) const { Register LHS = MI.getOperand(1).getReg(); Register RHS = MI.getOperand(2).getReg(); Register Dst = MI.getOperand(0).getReg(); @@ -5636,7 +5669,7 @@ void CombinerHelper::applyUMulHToLShr(MachineInstr &MI) { } bool CombinerHelper::matchRedundantNegOperands(MachineInstr &MI, - BuildFnTy &MatchInfo) { + BuildFnTy &MatchInfo) const { unsigned Opc = MI.getOpcode(); assert(Opc == TargetOpcode::G_FADD || Opc == TargetOpcode::G_FSUB || Opc == TargetOpcode::G_FMUL || Opc == TargetOpcode::G_FDIV || @@ -5681,7 +5714,8 @@ bool CombinerHelper::matchRedundantNegOperands(MachineInstr &MI, return true; } -bool CombinerHelper::matchFsubToFneg(MachineInstr &MI, Register &MatchInfo) { +bool CombinerHelper::matchFsubToFneg(MachineInstr &MI, + Register &MatchInfo) const { assert(MI.getOpcode() == TargetOpcode::G_FSUB); Register LHS = MI.getOperand(1).getReg(); @@ -5705,7 +5739,8 @@ bool CombinerHelper::matchFsubToFneg(MachineInstr &MI, Register &MatchInfo) { return false; } -void CombinerHelper::applyFsubToFneg(MachineInstr &MI, Register &MatchInfo) { +void CombinerHelper::applyFsubToFneg(MachineInstr &MI, + Register &MatchInfo) const { Register Dst = MI.getOperand(0).getReg(); Builder.buildFNeg( Dst, Builder.buildFCanonicalize(MRI.getType(Dst), MatchInfo).getReg(0)); @@ -5731,7 +5766,7 @@ static bool hasMoreUses(const MachineInstr &MI0, const MachineInstr &MI1, bool CombinerHelper::canCombineFMadOrFMA(MachineInstr &MI, bool &AllowFusionGlobally, bool &HasFMAD, bool &Aggressive, - bool CanReassociate) { + bool CanReassociate) const { auto *MF = MI.getMF(); const auto &TLI = *MF->getSubtarget().getTargetLowering(); @@ -5762,7 +5797,8 @@ bool CombinerHelper::canCombineFMadOrFMA(MachineInstr &MI, } bool CombinerHelper::matchCombineFAddFMulToFMadOrFMA( - MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) { + MachineInstr &MI, + std::function<void(MachineIRBuilder &)> &MatchInfo) const { assert(MI.getOpcode() == TargetOpcode::G_FADD); bool AllowFusionGlobally, HasFMAD, Aggressive; @@ -5810,7 +5846,8 @@ bool CombinerHelper::matchCombineFAddFMulToFMadOrFMA( } bool CombinerHelper::matchCombineFAddFpExtFMulToFMadOrFMA( - MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) { + MachineInstr &MI, + std::function<void(MachineIRBuilder &)> &MatchInfo) const { assert(MI.getOpcode() == TargetOpcode::G_FADD); bool AllowFusionGlobally, HasFMAD, Aggressive; @@ -5869,7 +5906,8 @@ bool CombinerHelper::matchCombineFAddFpExtFMulToFMadOrFMA( } bool CombinerHelper::matchCombineFAddFMAFMulToFMadOrFMA( - MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) { + MachineInstr &MI, + std::function<void(MachineIRBuilder &)> &MatchInfo) const { assert(MI.getOpcode() == TargetOpcode::G_FADD); bool AllowFusionGlobally, HasFMAD, Aggressive; @@ -5934,7 +5972,8 @@ bool CombinerHelper::matchCombineFAddFMAFMulToFMadOrFMA( } bool CombinerHelper::matchCombineFAddFpExtFMulToFMadOrFMAAggressive( - MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) { + MachineInstr &MI, + std::function<void(MachineIRBuilder &)> &MatchInfo) const { assert(MI.getOpcode() == TargetOpcode::G_FADD); bool AllowFusionGlobally, HasFMAD, Aggressive; @@ -6060,7 +6099,8 @@ bool CombinerHelper::matchCombineFAddFpExtFMulToFMadOrFMAAggressive( } bool CombinerHelper::matchCombineFSubFMulToFMadOrFMA( - MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) { + MachineInstr &MI, + std::function<void(MachineIRBuilder &)> &MatchInfo) const { assert(MI.getOpcode() == TargetOpcode::G_FSUB); bool AllowFusionGlobally, HasFMAD, Aggressive; @@ -6112,7 +6152,8 @@ bool CombinerHelper::matchCombineFSubFMulToFMadOrFMA( } bool CombinerHelper::matchCombineFSubFNegFMulToFMadOrFMA( - MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) { + MachineInstr &MI, + std::function<void(MachineIRBuilder &)> &MatchInfo) const { assert(MI.getOpcode() == TargetOpcode::G_FSUB); bool AllowFusionGlobally, HasFMAD, Aggressive; @@ -6159,7 +6200,8 @@ bool CombinerHelper::matchCombineFSubFNegFMulToFMadOrFMA( } bool CombinerHelper::matchCombineFSubFpExtFMulToFMadOrFMA( - MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) { + MachineInstr &MI, + std::function<void(MachineIRBuilder &)> &MatchInfo) const { assert(MI.getOpcode() == TargetOpcode::G_FSUB); bool AllowFusionGlobally, HasFMAD, Aggressive; @@ -6210,7 +6252,8 @@ bool CombinerHelper::matchCombineFSubFpExtFMulToFMadOrFMA( } bool CombinerHelper::matchCombineFSubFpExtFNegFMulToFMadOrFMA( - MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) { + MachineInstr &MI, + std::function<void(MachineIRBuilder &)> &MatchInfo) const { assert(MI.getOpcode() == TargetOpcode::G_FSUB); bool AllowFusionGlobally, HasFMAD, Aggressive; @@ -6269,7 +6312,7 @@ bool CombinerHelper::matchCombineFSubFpExtFNegFMulToFMadOrFMA( } bool CombinerHelper::matchCombineFMinMaxNaN(MachineInstr &MI, - unsigned &IdxToPropagate) { + unsigned &IdxToPropagate) const { bool PropagateNaN; switch (MI.getOpcode()) { default: @@ -6296,7 +6339,7 @@ bool CombinerHelper::matchCombineFMinMaxNaN(MachineInstr &MI, return MatchNaN(1) || MatchNaN(2); } -bool CombinerHelper::matchAddSubSameReg(MachineInstr &MI, Register &Src) { +bool CombinerHelper::matchAddSubSameReg(MachineInstr &MI, Register &Src) const { assert(MI.getOpcode() == TargetOpcode::G_ADD && "Expected a G_ADD"); Register LHS = MI.getOperand(1).getReg(); Register RHS = MI.getOperand(2).getReg(); @@ -6313,7 +6356,7 @@ bool CombinerHelper::matchAddSubSameReg(MachineInstr &MI, Register &Src) { } bool CombinerHelper::matchBuildVectorIdentityFold(MachineInstr &MI, - Register &MatchInfo) { + Register &MatchInfo) const { // This combine folds the following patterns: // // G_BUILD_VECTOR_TRUNC (G_BITCAST(x), G_LSHR(G_BITCAST(x), k)) @@ -6359,7 +6402,7 @@ bool CombinerHelper::matchBuildVectorIdentityFold(MachineInstr &MI, } bool CombinerHelper::matchTruncBuildVectorFold(MachineInstr &MI, - Register &MatchInfo) { + Register &MatchInfo) const { // Replace (G_TRUNC (G_BITCAST (G_BUILD_VECTOR x, y)) with just x // if type(x) == type(G_TRUNC) if (!mi_match(MI.getOperand(1).getReg(), MRI, @@ -6370,7 +6413,7 @@ bool CombinerHelper::matchTruncBuildVectorFold(MachineInstr &MI, } bool CombinerHelper::matchTruncLshrBuildVectorFold(MachineInstr &MI, - Register &MatchInfo) { + Register &MatchInfo) const { // Replace (G_TRUNC (G_LSHR (G_BITCAST (G_BUILD_VECTOR x, y)), K)) with // y if K == size of vector element type std::optional<ValueAndVReg> ShiftAmt; @@ -6446,7 +6489,7 @@ CombinerHelper::computeRetValAgainstNaN(Register LHS, Register RHS, bool CombinerHelper::matchFPSelectToMinMax(Register Dst, Register Cond, Register TrueVal, Register FalseVal, - BuildFnTy &MatchInfo) { + BuildFnTy &MatchInfo) const { // Match: select (fcmp cond x, y) x, y // select (fcmp cond x, y) y, x // And turn it into fminnum/fmaxnum or fmin/fmax based off of the condition. @@ -6501,7 +6544,7 @@ bool CombinerHelper::matchFPSelectToMinMax(Register Dst, Register Cond, } bool CombinerHelper::matchSimplifySelectToMinMax(MachineInstr &MI, - BuildFnTy &MatchInfo) { + BuildFnTy &MatchInfo) const { // TODO: Handle integer cases. assert(MI.getOpcode() == TargetOpcode::G_SELECT); // Condition may be fed by a truncated compare. @@ -6516,7 +6559,7 @@ bool CombinerHelper::matchSimplifySelectToMinMax(MachineInstr &MI, } bool CombinerHelper::matchRedundantBinOpInEquality(MachineInstr &MI, - BuildFnTy &MatchInfo) { + BuildFnTy &MatchInfo) const { assert(MI.getOpcode() == TargetOpcode::G_ICMP); // (X + Y) == X --> Y == 0 // (X + Y) != X --> Y != 0 @@ -6547,7 +6590,7 @@ bool CombinerHelper::matchRedundantBinOpInEquality(MachineInstr &MI, return CmpInst::isEquality(Pred) && Y.isValid(); } -bool CombinerHelper::matchShiftsTooBig(MachineInstr &MI) { +bool CombinerHelper::matchShiftsTooBig(MachineInstr &MI) const { Register ShiftReg = MI.getOperand(2).getReg(); LLT ResTy = MRI.getType(MI.getOperand(0).getReg()); auto IsShiftTooBig = [&](const Constant *C) { @@ -6557,7 +6600,7 @@ bool CombinerHelper::matchShiftsTooBig(MachineInstr &MI) { return matchUnaryPredicate(MRI, ShiftReg, IsShiftTooBig); } -bool CombinerHelper::matchCommuteConstantToRHS(MachineInstr &MI) { +bool CombinerHelper::matchCommuteConstantToRHS(MachineInstr &MI) const { unsigned LHSOpndIdx = 1; unsigned RHSOpndIdx = 2; switch (MI.getOpcode()) { @@ -6587,7 +6630,7 @@ bool CombinerHelper::matchCommuteConstantToRHS(MachineInstr &MI) { !getIConstantVRegVal(RHS, MRI); } -bool CombinerHelper::matchCommuteFPConstantToRHS(MachineInstr &MI) { +bool CombinerHelper::matchCommuteFPConstantToRHS(MachineInstr &MI) const { Register LHS = MI.getOperand(1).getReg(); Register RHS = MI.getOperand(2).getReg(); std::optional<FPValueAndVReg> ValAndVReg; @@ -6596,7 +6639,7 @@ bool CombinerHelper::matchCommuteFPConstantToRHS(MachineInstr &MI) { return !mi_match(RHS, MRI, m_GFCstOrSplat(ValAndVReg)); } -void CombinerHelper::applyCommuteBinOpOperands(MachineInstr &MI) { +void CombinerHelper::applyCommuteBinOpOperands(MachineInstr &MI) const { Observer.changingInstr(MI); unsigned LHSOpndIdx = 1; unsigned RHSOpndIdx = 2; @@ -6618,7 +6661,7 @@ void CombinerHelper::applyCommuteBinOpOperands(MachineInstr &MI) { Observer.changedInstr(MI); } -bool CombinerHelper::isOneOrOneSplat(Register Src, bool AllowUndefs) { +bool CombinerHelper::isOneOrOneSplat(Register Src, bool AllowUndefs) const { LLT SrcTy = MRI.getType(Src); if (SrcTy.isFixedVector()) return isConstantSplatVector(Src, 1, AllowUndefs); @@ -6631,7 +6674,7 @@ bool CombinerHelper::isOneOrOneSplat(Register Src, bool AllowUndefs) { return false; // scalable vector } -bool CombinerHelper::isZeroOrZeroSplat(Register Src, bool AllowUndefs) { +bool CombinerHelper::isZeroOrZeroSplat(Register Src, bool AllowUndefs) const { LLT SrcTy = MRI.getType(Src); if (SrcTy.isFixedVector()) return isConstantSplatVector(Src, 0, AllowUndefs); @@ -6647,7 +6690,7 @@ bool CombinerHelper::isZeroOrZeroSplat(Register Src, bool AllowUndefs) { // Ignores COPYs during conformance checks. // FIXME scalable vectors. bool CombinerHelper::isConstantSplatVector(Register Src, int64_t SplatValue, - bool AllowUndefs) { + bool AllowUndefs) const { GBuildVector *BuildVector = getOpcodeDef<GBuildVector>(Src, MRI); if (!BuildVector) return false; @@ -6672,7 +6715,7 @@ bool CombinerHelper::isConstantSplatVector(Register Src, int64_t SplatValue, // Ignores COPYs during lookups. // FIXME scalable vectors std::optional<APInt> -CombinerHelper::getConstantOrConstantSplatVector(Register Src) { +CombinerHelper::getConstantOrConstantSplatVector(Register Src) const { auto IConstant = getIConstantVRegValWithLookThrough(Src, MRI); if (IConstant) return IConstant->Value; @@ -6718,7 +6761,7 @@ bool CombinerHelper::isConstantOrConstantVectorI(Register Src) const { // TODO: use knownbits to determine zeros bool CombinerHelper::tryFoldSelectOfConstants(GSelect *Select, - BuildFnTy &MatchInfo) { + BuildFnTy &MatchInfo) const { uint32_t Flags = Select->getFlags(); Register Dest = Select->getReg(0); Register Cond = Select->getCondReg(); @@ -6821,6 +6864,23 @@ bool CombinerHelper::tryFoldSelectOfConstants(GSelect *Select, }; return true; } + + // select Cond, 0, Pow2 --> (zext (!Cond)) << log2(Pow2) + if (FalseValue.isPowerOf2() && TrueValue.isZero()) { + MatchInfo = [=](MachineIRBuilder &B) { + B.setInstrAndDebugLoc(*Select); + Register Not = MRI.createGenericVirtualRegister(CondTy); + B.buildNot(Not, Cond); + Register Inner = MRI.createGenericVirtualRegister(TrueTy); + B.buildZExtOrTrunc(Inner, Not); + // The shift amount must be scalar. + LLT ShiftTy = TrueTy.isVector() ? TrueTy.getElementType() : TrueTy; + auto ShAmtC = B.buildConstant(ShiftTy, FalseValue.exactLogBase2()); + B.buildShl(Dest, Inner, ShAmtC, Flags); + }; + return true; + } + // select Cond, -1, C --> or (sext Cond), C if (TrueValue.isAllOnes()) { MatchInfo = [=](MachineIRBuilder &B) { @@ -6850,7 +6910,7 @@ bool CombinerHelper::tryFoldSelectOfConstants(GSelect *Select, // TODO: use knownbits to determine zeros bool CombinerHelper::tryFoldBoolSelectToLogic(GSelect *Select, - BuildFnTy &MatchInfo) { + BuildFnTy &MatchInfo) const { uint32_t Flags = Select->getFlags(); Register DstReg = Select->getReg(0); Register Cond = Select->getCondReg(); @@ -6931,7 +6991,7 @@ bool CombinerHelper::tryFoldBoolSelectToLogic(GSelect *Select, } bool CombinerHelper::matchSelectIMinMax(const MachineOperand &MO, - BuildFnTy &MatchInfo) { + BuildFnTy &MatchInfo) const { GSelect *Select = cast<GSelect>(MRI.getVRegDef(MO.getReg())); GICmp *Cmp = cast<GICmp>(MRI.getVRegDef(Select->getCondReg())); @@ -7002,7 +7062,35 @@ bool CombinerHelper::matchSelectIMinMax(const MachineOperand &MO, } } -bool CombinerHelper::matchSelect(MachineInstr &MI, BuildFnTy &MatchInfo) { +// (neg (min/max x, (neg x))) --> (max/min x, (neg x)) +bool CombinerHelper::matchSimplifyNegMinMax(MachineInstr &MI, + BuildFnTy &MatchInfo) const { + assert(MI.getOpcode() == TargetOpcode::G_SUB); + Register DestReg = MI.getOperand(0).getReg(); + LLT DestTy = MRI.getType(DestReg); + + Register X; + Register Sub0; + auto NegPattern = m_all_of(m_Neg(m_DeferredReg(X)), m_Reg(Sub0)); + if (mi_match(DestReg, MRI, + m_Neg(m_OneUse(m_any_of(m_GSMin(m_Reg(X), NegPattern), + m_GSMax(m_Reg(X), NegPattern), + m_GUMin(m_Reg(X), NegPattern), + m_GUMax(m_Reg(X), NegPattern)))))) { + MachineInstr *MinMaxMI = MRI.getVRegDef(MI.getOperand(2).getReg()); + unsigned NewOpc = getInverseGMinMaxOpcode(MinMaxMI->getOpcode()); + if (isLegal({NewOpc, {DestTy}})) { + MatchInfo = [=](MachineIRBuilder &B) { + B.buildInstr(NewOpc, {DestReg}, {X, Sub0}); + }; + return true; + } + } + + return false; +} + +bool CombinerHelper::matchSelect(MachineInstr &MI, BuildFnTy &MatchInfo) const { GSelect *Select = cast<GSelect>(&MI); if (tryFoldSelectOfConstants(Select, MatchInfo)) @@ -7018,8 +7106,8 @@ bool CombinerHelper::matchSelect(MachineInstr &MI, BuildFnTy &MatchInfo) { /// or (icmp Pred1 V1, C1) || (icmp Pred2 V2, C2) /// into a single comparison using range-based reasoning. /// see InstCombinerImpl::foldAndOrOfICmpsUsingRanges. -bool CombinerHelper::tryFoldAndOrOrICmpsUsingRanges(GLogicalBinOp *Logic, - BuildFnTy &MatchInfo) { +bool CombinerHelper::tryFoldAndOrOrICmpsUsingRanges( + GLogicalBinOp *Logic, BuildFnTy &MatchInfo) const { assert(Logic->getOpcode() != TargetOpcode::G_XOR && "unexpected xor"); bool IsAnd = Logic->getOpcode() == TargetOpcode::G_AND; Register DstReg = Logic->getReg(0); @@ -7178,7 +7266,7 @@ bool CombinerHelper::tryFoldAndOrOrICmpsUsingRanges(GLogicalBinOp *Logic, } bool CombinerHelper::tryFoldLogicOfFCmps(GLogicalBinOp *Logic, - BuildFnTy &MatchInfo) { + BuildFnTy &MatchInfo) const { assert(Logic->getOpcode() != TargetOpcode::G_XOR && "unexpecte xor"); Register DestReg = Logic->getReg(0); Register LHS = Logic->getLHSReg(); @@ -7252,7 +7340,7 @@ bool CombinerHelper::tryFoldLogicOfFCmps(GLogicalBinOp *Logic, return false; } -bool CombinerHelper::matchAnd(MachineInstr &MI, BuildFnTy &MatchInfo) { +bool CombinerHelper::matchAnd(MachineInstr &MI, BuildFnTy &MatchInfo) const { GAnd *And = cast<GAnd>(&MI); if (tryFoldAndOrOrICmpsUsingRanges(And, MatchInfo)) @@ -7264,7 +7352,7 @@ bool CombinerHelper::matchAnd(MachineInstr &MI, BuildFnTy &MatchInfo) { return false; } -bool CombinerHelper::matchOr(MachineInstr &MI, BuildFnTy &MatchInfo) { +bool CombinerHelper::matchOr(MachineInstr &MI, BuildFnTy &MatchInfo) const { GOr *Or = cast<GOr>(&MI); if (tryFoldAndOrOrICmpsUsingRanges(Or, MatchInfo)) @@ -7276,7 +7364,8 @@ bool CombinerHelper::matchOr(MachineInstr &MI, BuildFnTy &MatchInfo) { return false; } -bool CombinerHelper::matchAddOverflow(MachineInstr &MI, BuildFnTy &MatchInfo) { +bool CombinerHelper::matchAddOverflow(MachineInstr &MI, + BuildFnTy &MatchInfo) const { GAddCarryOut *Add = cast<GAddCarryOut>(&MI); // Addo has no flags @@ -7444,18 +7533,20 @@ bool CombinerHelper::matchAddOverflow(MachineInstr &MI, BuildFnTy &MatchInfo) { } void CombinerHelper::applyBuildFnMO(const MachineOperand &MO, - BuildFnTy &MatchInfo) { + BuildFnTy &MatchInfo) const { MachineInstr *Root = getDefIgnoringCopies(MO.getReg(), MRI); MatchInfo(Builder); Root->eraseFromParent(); } -bool CombinerHelper::matchFPowIExpansion(MachineInstr &MI, int64_t Exponent) { +bool CombinerHelper::matchFPowIExpansion(MachineInstr &MI, + int64_t Exponent) const { bool OptForSize = MI.getMF()->getFunction().hasOptSize(); return getTargetLowering().isBeneficialToExpandPowI(Exponent, OptForSize); } -void CombinerHelper::applyExpandFPowI(MachineInstr &MI, int64_t Exponent) { +void CombinerHelper::applyExpandFPowI(MachineInstr &MI, + int64_t Exponent) const { auto [Dst, Base] = MI.getFirst2Regs(); LLT Ty = MRI.getType(Dst); int64_t ExpVal = Exponent; @@ -7499,7 +7590,7 @@ void CombinerHelper::applyExpandFPowI(MachineInstr &MI, int64_t Exponent) { } bool CombinerHelper::matchFoldAPlusC1MinusC2(const MachineInstr &MI, - BuildFnTy &MatchInfo) { + BuildFnTy &MatchInfo) const { // fold (A+C1)-C2 -> A+(C1-C2) const GSub *Sub = cast<GSub>(&MI); GAdd *Add = cast<GAdd>(MRI.getVRegDef(Sub->getLHSReg())); @@ -7522,7 +7613,7 @@ bool CombinerHelper::matchFoldAPlusC1MinusC2(const MachineInstr &MI, } bool CombinerHelper::matchFoldC2MinusAPlusC1(const MachineInstr &MI, - BuildFnTy &MatchInfo) { + BuildFnTy &MatchInfo) const { // fold C2-(A+C1) -> (C2-C1)-A const GSub *Sub = cast<GSub>(&MI); GAdd *Add = cast<GAdd>(MRI.getVRegDef(Sub->getRHSReg())); @@ -7545,7 +7636,7 @@ bool CombinerHelper::matchFoldC2MinusAPlusC1(const MachineInstr &MI, } bool CombinerHelper::matchFoldAMinusC1MinusC2(const MachineInstr &MI, - BuildFnTy &MatchInfo) { + BuildFnTy &MatchInfo) const { // fold (A-C1)-C2 -> A-(C1+C2) const GSub *Sub1 = cast<GSub>(&MI); GSub *Sub2 = cast<GSub>(MRI.getVRegDef(Sub1->getLHSReg())); @@ -7568,7 +7659,7 @@ bool CombinerHelper::matchFoldAMinusC1MinusC2(const MachineInstr &MI, } bool CombinerHelper::matchFoldC1Minus2MinusC2(const MachineInstr &MI, - BuildFnTy &MatchInfo) { + BuildFnTy &MatchInfo) const { // fold (C1-A)-C2 -> (C1-C2)-A const GSub *Sub1 = cast<GSub>(&MI); GSub *Sub2 = cast<GSub>(MRI.getVRegDef(Sub1->getLHSReg())); @@ -7591,7 +7682,7 @@ bool CombinerHelper::matchFoldC1Minus2MinusC2(const MachineInstr &MI, } bool CombinerHelper::matchFoldAMinusC1PlusC2(const MachineInstr &MI, - BuildFnTy &MatchInfo) { + BuildFnTy &MatchInfo) const { // fold ((A-C1)+C2) -> (A+(C2-C1)) const GAdd *Add = cast<GAdd>(&MI); GSub *Sub = cast<GSub>(MRI.getVRegDef(Add->getLHSReg())); @@ -7613,8 +7704,8 @@ bool CombinerHelper::matchFoldAMinusC1PlusC2(const MachineInstr &MI, return true; } -bool CombinerHelper::matchUnmergeValuesAnyExtBuildVector(const MachineInstr &MI, - BuildFnTy &MatchInfo) { +bool CombinerHelper::matchUnmergeValuesAnyExtBuildVector( + const MachineInstr &MI, BuildFnTy &MatchInfo) const { const GUnmerge *Unmerge = cast<GUnmerge>(&MI); if (!MRI.hasOneNonDBGUse(Unmerge->getSourceReg())) @@ -7696,7 +7787,7 @@ bool CombinerHelper::matchUnmergeValuesAnyExtBuildVector(const MachineInstr &MI, } bool CombinerHelper::matchShuffleUndefRHS(MachineInstr &MI, - BuildFnTy &MatchInfo) { + BuildFnTy &MatchInfo) const { bool Changed = false; auto &Shuffle = cast<GShuffleVector>(MI); @@ -7717,9 +7808,9 @@ bool CombinerHelper::matchShuffleUndefRHS(MachineInstr &MI, if (!Changed) return false; - MatchInfo = [&, NewMask](MachineIRBuilder &B) { + MatchInfo = [&, NewMask = std::move(NewMask)](MachineIRBuilder &B) { B.buildShuffleVector(MI.getOperand(0), MI.getOperand(1), MI.getOperand(2), - NewMask); + std::move(NewMask)); }; return true; @@ -7740,7 +7831,7 @@ static void commuteMask(MutableArrayRef<int> Mask, const unsigned NumElems) { } bool CombinerHelper::matchShuffleDisjointMask(MachineInstr &MI, - BuildFnTy &MatchInfo) { + BuildFnTy &MatchInfo) const { auto &Shuffle = cast<GShuffleVector>(MI); // If any of the two inputs is already undef, don't check the mask again to @@ -7792,7 +7883,7 @@ bool CombinerHelper::matchShuffleDisjointMask(MachineInstr &MI, } bool CombinerHelper::matchSuboCarryOut(const MachineInstr &MI, - BuildFnTy &MatchInfo) { + BuildFnTy &MatchInfo) const { const GSubCarryOut *Subo = cast<GSubCarryOut>(&MI); Register Dst = Subo->getReg(0); diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelperArtifacts.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelperArtifacts.cpp index 797a1e84e21e..24d2d9ddaeeb 100644 --- a/llvm/lib/CodeGen/GlobalISel/CombinerHelperArtifacts.cpp +++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelperArtifacts.cpp @@ -29,7 +29,7 @@ using namespace llvm; bool CombinerHelper::matchMergeXAndUndef(const MachineInstr &MI, - BuildFnTy &MatchInfo) { + BuildFnTy &MatchInfo) const { const GMerge *Merge = cast<GMerge>(&MI); Register Dst = Merge->getReg(0); @@ -58,7 +58,7 @@ bool CombinerHelper::matchMergeXAndUndef(const MachineInstr &MI, } bool CombinerHelper::matchMergeXAndZero(const MachineInstr &MI, - BuildFnTy &MatchInfo) { + BuildFnTy &MatchInfo) const { const GMerge *Merge = cast<GMerge>(&MI); Register Dst = Merge->getReg(0); diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelperCasts.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelperCasts.cpp index 30557e6a2304..7b4c427a9c50 100644 --- a/llvm/lib/CodeGen/GlobalISel/CombinerHelperCasts.cpp +++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelperCasts.cpp @@ -26,7 +26,7 @@ using namespace llvm; bool CombinerHelper::matchSextOfTrunc(const MachineOperand &MO, - BuildFnTy &MatchInfo) { + BuildFnTy &MatchInfo) const { GSext *Sext = cast<GSext>(getDefIgnoringCopies(MO.getReg(), MRI)); GTrunc *Trunc = cast<GTrunc>(getDefIgnoringCopies(Sext->getSrcReg(), MRI)); @@ -59,7 +59,7 @@ bool CombinerHelper::matchSextOfTrunc(const MachineOperand &MO, } bool CombinerHelper::matchZextOfTrunc(const MachineOperand &MO, - BuildFnTy &MatchInfo) { + BuildFnTy &MatchInfo) const { GZext *Zext = cast<GZext>(getDefIgnoringCopies(MO.getReg(), MRI)); GTrunc *Trunc = cast<GTrunc>(getDefIgnoringCopies(Zext->getSrcReg(), MRI)); @@ -94,7 +94,7 @@ bool CombinerHelper::matchZextOfTrunc(const MachineOperand &MO, } bool CombinerHelper::matchNonNegZext(const MachineOperand &MO, - BuildFnTy &MatchInfo) { + BuildFnTy &MatchInfo) const { GZext *Zext = cast<GZext>(MRI.getVRegDef(MO.getReg())); Register Dst = Zext->getReg(0); @@ -116,7 +116,7 @@ bool CombinerHelper::matchNonNegZext(const MachineOperand &MO, bool CombinerHelper::matchTruncateOfExt(const MachineInstr &Root, const MachineInstr &ExtMI, - BuildFnTy &MatchInfo) { + BuildFnTy &MatchInfo) const { const GTrunc *Trunc = cast<GTrunc>(&Root); const GExtOp *Ext = cast<GExtOp>(&ExtMI); @@ -164,15 +164,14 @@ bool CombinerHelper::matchTruncateOfExt(const MachineInstr &Root, bool CombinerHelper::isCastFree(unsigned Opcode, LLT ToTy, LLT FromTy) const { const TargetLowering &TLI = getTargetLowering(); - const DataLayout &DL = getDataLayout(); LLVMContext &Ctx = getContext(); switch (Opcode) { case TargetOpcode::G_ANYEXT: case TargetOpcode::G_ZEXT: - return TLI.isZExtFree(FromTy, ToTy, DL, Ctx); + return TLI.isZExtFree(FromTy, ToTy, Ctx); case TargetOpcode::G_TRUNC: - return TLI.isTruncateFree(FromTy, ToTy, DL, Ctx); + return TLI.isTruncateFree(FromTy, ToTy, Ctx); default: return false; } @@ -180,7 +179,7 @@ bool CombinerHelper::isCastFree(unsigned Opcode, LLT ToTy, LLT FromTy) const { bool CombinerHelper::matchCastOfSelect(const MachineInstr &CastMI, const MachineInstr &SelectMI, - BuildFnTy &MatchInfo) { + BuildFnTy &MatchInfo) const { const GExtOrTruncOp *Cast = cast<GExtOrTruncOp>(&CastMI); const GSelect *Select = cast<GSelect>(&SelectMI); @@ -212,7 +211,7 @@ bool CombinerHelper::matchCastOfSelect(const MachineInstr &CastMI, bool CombinerHelper::matchExtOfExt(const MachineInstr &FirstMI, const MachineInstr &SecondMI, - BuildFnTy &MatchInfo) { + BuildFnTy &MatchInfo) const { const GExtOp *First = cast<GExtOp>(&FirstMI); const GExtOp *Second = cast<GExtOp>(&SecondMI); @@ -276,7 +275,7 @@ bool CombinerHelper::matchExtOfExt(const MachineInstr &FirstMI, bool CombinerHelper::matchCastOfBuildVector(const MachineInstr &CastMI, const MachineInstr &BVMI, - BuildFnTy &MatchInfo) { + BuildFnTy &MatchInfo) const { const GExtOrTruncOp *Cast = cast<GExtOrTruncOp>(&CastMI); const GBuildVector *BV = cast<GBuildVector>(&BVMI); @@ -316,7 +315,7 @@ bool CombinerHelper::matchCastOfBuildVector(const MachineInstr &CastMI, bool CombinerHelper::matchNarrowBinop(const MachineInstr &TruncMI, const MachineInstr &BinopMI, - BuildFnTy &MatchInfo) { + BuildFnTy &MatchInfo) const { const GTrunc *Trunc = cast<GTrunc>(&TruncMI); const GBinOp *BinOp = cast<GBinOp>(&BinopMI); @@ -340,7 +339,7 @@ bool CombinerHelper::matchNarrowBinop(const MachineInstr &TruncMI, } bool CombinerHelper::matchCastOfInteger(const MachineInstr &CastMI, - APInt &MatchInfo) { + APInt &MatchInfo) const { const GExtOrTruncOp *Cast = cast<GExtOrTruncOp>(&CastMI); APInt Input = getIConstantFromReg(Cast->getSrcReg(), MRI); diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelperCompares.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelperCompares.cpp index 872b5fed11c6..fc40533cf3dc 100644 --- a/llvm/lib/CodeGen/GlobalISel/CombinerHelperCompares.cpp +++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelperCompares.cpp @@ -29,7 +29,7 @@ using namespace llvm; bool CombinerHelper::constantFoldICmp(const GICmp &ICmp, const GIConstant &LHSCst, const GIConstant &RHSCst, - BuildFnTy &MatchInfo) { + BuildFnTy &MatchInfo) const { if (LHSCst.getKind() != GIConstant::GIConstantKind::Scalar) return false; @@ -60,7 +60,7 @@ bool CombinerHelper::constantFoldICmp(const GICmp &ICmp, bool CombinerHelper::constantFoldFCmp(const GFCmp &FCmp, const GFConstant &LHSCst, const GFConstant &RHSCst, - BuildFnTy &MatchInfo) { + BuildFnTy &MatchInfo) const { if (LHSCst.getKind() != GFConstant::GFConstantKind::Scalar) return false; @@ -89,7 +89,7 @@ bool CombinerHelper::constantFoldFCmp(const GFCmp &FCmp, } bool CombinerHelper::matchCanonicalizeICmp(const MachineInstr &MI, - BuildFnTy &MatchInfo) { + BuildFnTy &MatchInfo) const { const GICmp *Cmp = cast<GICmp>(&MI); Register Dst = Cmp->getReg(0); @@ -114,7 +114,7 @@ bool CombinerHelper::matchCanonicalizeICmp(const MachineInstr &MI, } bool CombinerHelper::matchCanonicalizeFCmp(const MachineInstr &MI, - BuildFnTy &MatchInfo) { + BuildFnTy &MatchInfo) const { const GFCmp *Cmp = cast<GFCmp>(&MI); Register Dst = Cmp->getReg(0); diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelperVectorOps.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelperVectorOps.cpp index 84fb3b596589..229076bbdeaa 100644 --- a/llvm/lib/CodeGen/GlobalISel/CombinerHelperVectorOps.cpp +++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelperVectorOps.cpp @@ -31,7 +31,7 @@ using namespace llvm; using namespace MIPatternMatch; bool CombinerHelper::matchExtractVectorElement(MachineInstr &MI, - BuildFnTy &MatchInfo) { + BuildFnTy &MatchInfo) const { GExtractVectorElement *Extract = cast<GExtractVectorElement>(&MI); Register Dst = Extract->getReg(0); @@ -89,7 +89,7 @@ bool CombinerHelper::matchExtractVectorElement(MachineInstr &MI, } bool CombinerHelper::matchExtractVectorElementWithDifferentIndices( - const MachineOperand &MO, BuildFnTy &MatchInfo) { + const MachineOperand &MO, BuildFnTy &MatchInfo) const { MachineInstr *Root = getDefIgnoringCopies(MO.getReg(), MRI); GExtractVectorElement *Extract = cast<GExtractVectorElement>(Root); @@ -146,7 +146,8 @@ bool CombinerHelper::matchExtractVectorElementWithDifferentIndices( } bool CombinerHelper::matchExtractVectorElementWithBuildVector( - const MachineInstr &MI, const MachineInstr &MI2, BuildFnTy &MatchInfo) { + const MachineInstr &MI, const MachineInstr &MI2, + BuildFnTy &MatchInfo) const { const GExtractVectorElement *Extract = cast<GExtractVectorElement>(&MI); const GBuildVector *Build = cast<GBuildVector>(&MI2); @@ -185,7 +186,7 @@ bool CombinerHelper::matchExtractVectorElementWithBuildVector( } bool CombinerHelper::matchExtractVectorElementWithBuildVectorTrunc( - const MachineOperand &MO, BuildFnTy &MatchInfo) { + const MachineOperand &MO, BuildFnTy &MatchInfo) const { MachineInstr *Root = getDefIgnoringCopies(MO.getReg(), MRI); GExtractVectorElement *Extract = cast<GExtractVectorElement>(Root); @@ -252,7 +253,8 @@ bool CombinerHelper::matchExtractVectorElementWithBuildVectorTrunc( } bool CombinerHelper::matchExtractVectorElementWithShuffleVector( - const MachineInstr &MI, const MachineInstr &MI2, BuildFnTy &MatchInfo) { + const MachineInstr &MI, const MachineInstr &MI2, + BuildFnTy &MatchInfo) const { const GExtractVectorElement *Extract = cast<GExtractVectorElement>(&MI); const GShuffleVector *Shuffle = cast<GShuffleVector>(&MI2); @@ -338,7 +340,7 @@ bool CombinerHelper::matchExtractVectorElementWithShuffleVector( } bool CombinerHelper::matchInsertVectorElementOOB(MachineInstr &MI, - BuildFnTy &MatchInfo) { + BuildFnTy &MatchInfo) const { GInsertVectorElement *Insert = cast<GInsertVectorElement>(&MI); Register Dst = Insert->getReg(0); @@ -361,7 +363,7 @@ bool CombinerHelper::matchInsertVectorElementOOB(MachineInstr &MI, } bool CombinerHelper::matchAddOfVScale(const MachineOperand &MO, - BuildFnTy &MatchInfo) { + BuildFnTy &MatchInfo) const { GAdd *Add = cast<GAdd>(MRI.getVRegDef(MO.getReg())); GVScale *LHSVScale = cast<GVScale>(MRI.getVRegDef(Add->getLHSReg())); GVScale *RHSVScale = cast<GVScale>(MRI.getVRegDef(Add->getRHSReg())); @@ -380,7 +382,7 @@ bool CombinerHelper::matchAddOfVScale(const MachineOperand &MO, } bool CombinerHelper::matchMulOfVScale(const MachineOperand &MO, - BuildFnTy &MatchInfo) { + BuildFnTy &MatchInfo) const { GMul *Mul = cast<GMul>(MRI.getVRegDef(MO.getReg())); GVScale *LHSVScale = cast<GVScale>(MRI.getVRegDef(Mul->getLHSReg())); @@ -401,7 +403,7 @@ bool CombinerHelper::matchMulOfVScale(const MachineOperand &MO, } bool CombinerHelper::matchSubOfVScale(const MachineOperand &MO, - BuildFnTy &MatchInfo) { + BuildFnTy &MatchInfo) const { GSub *Sub = cast<GSub>(MRI.getVRegDef(MO.getReg())); GVScale *RHSVScale = cast<GVScale>(MRI.getVRegDef(Sub->getRHSReg())); @@ -421,7 +423,7 @@ bool CombinerHelper::matchSubOfVScale(const MachineOperand &MO, } bool CombinerHelper::matchShlOfVScale(const MachineOperand &MO, - BuildFnTy &MatchInfo) { + BuildFnTy &MatchInfo) const { GShl *Shl = cast<GShl>(MRI.getVRegDef(MO.getReg())); GVScale *LHSVScale = cast<GVScale>(MRI.getVRegDef(Shl->getSrcReg())); diff --git a/llvm/lib/CodeGen/GlobalISel/GISelKnownBits.cpp b/llvm/lib/CodeGen/GlobalISel/GISelKnownBits.cpp index 30cd3ce3baa5..6c15ed3423d3 100644 --- a/llvm/lib/CodeGen/GlobalISel/GISelKnownBits.cpp +++ b/llvm/lib/CodeGen/GlobalISel/GISelKnownBits.cpp @@ -253,6 +253,7 @@ void GISelKnownBits::computeKnownBitsImpl(Register R, KnownBits &Known, // For COPYs we don't do anything, don't increase the depth. computeKnownBitsImpl(SrcReg, Known2, DemandedElts, Depth + (Opcode != TargetOpcode::COPY)); + Known2 = Known2.anyextOrTrunc(BitWidth); Known = Known.intersectWith(Known2); // If we reach a point where we don't know anything // just stop looking through the operands. diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp index cf835ad187f8..e2247f76098e 100644 --- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -182,9 +182,8 @@ void LegalizerHelper::insertParts(Register DstReg, // Merge sub-vectors with different number of elements and insert into DstReg. if (ResultTy.isVector()) { assert(LeftoverRegs.size() == 1 && "Expected one leftover register"); - SmallVector<Register, 8> AllRegs; - for (auto Reg : concat<const Register>(PartRegs, LeftoverRegs)) - AllRegs.push_back(Reg); + SmallVector<Register, 8> AllRegs(PartRegs.begin(), PartRegs.end()); + AllRegs.append(LeftoverRegs.begin(), LeftoverRegs.end()); return mergeMixedSubvectors(DstReg, AllRegs); } @@ -1717,14 +1716,9 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI, case TargetOpcode::G_ICMP: { Register LHS = MI.getOperand(2).getReg(); LLT SrcTy = MRI.getType(LHS); - uint64_t SrcSize = SrcTy.getSizeInBits(); CmpInst::Predicate Pred = static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate()); - // TODO: Handle the non-equality case for weird sizes. - if (NarrowSize * 2 != SrcSize && !ICmpInst::isEquality(Pred)) - return UnableToLegalize; - LLT LeftoverTy; // Example: s88 -> s64 (NarrowTy) + s24 (leftover) SmallVector<Register, 4> LHSPartRegs, LHSLeftoverRegs; if (!extractParts(LHS, SrcTy, NarrowTy, LeftoverTy, LHSPartRegs, @@ -1776,19 +1770,59 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI, Or = MIRBuilder.buildOr(NarrowTy, Or, Xors[I]); MIRBuilder.buildICmp(Pred, Dst, Or, Zero); } else { - // TODO: Handle non-power-of-two types. - assert(LHSPartRegs.size() == 2 && "Expected exactly 2 LHS part regs?"); - assert(RHSPartRegs.size() == 2 && "Expected exactly 2 RHS part regs?"); - Register LHSL = LHSPartRegs[0]; - Register LHSH = LHSPartRegs[1]; - Register RHSL = RHSPartRegs[0]; - Register RHSH = RHSPartRegs[1]; - MachineInstrBuilder CmpH = MIRBuilder.buildICmp(Pred, ResTy, LHSH, RHSH); - MachineInstrBuilder CmpHEQ = - MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, ResTy, LHSH, RHSH); - MachineInstrBuilder CmpLU = MIRBuilder.buildICmp( - ICmpInst::getUnsignedPredicate(Pred), ResTy, LHSL, RHSL); - MIRBuilder.buildSelect(Dst, CmpHEQ, CmpLU, CmpH); + Register CmpIn; + for (unsigned I = 0, E = LHSPartRegs.size(); I != E; ++I) { + Register CmpOut; + CmpInst::Predicate PartPred; + + if (I == E - 1 && LHSLeftoverRegs.empty()) { + PartPred = Pred; + CmpOut = Dst; + } else { + PartPred = ICmpInst::getUnsignedPredicate(Pred); + CmpOut = MRI.createGenericVirtualRegister(ResTy); + } + + if (!CmpIn) { + MIRBuilder.buildICmp(PartPred, CmpOut, LHSPartRegs[I], + RHSPartRegs[I]); + } else { + auto Cmp = MIRBuilder.buildICmp(PartPred, ResTy, LHSPartRegs[I], + RHSPartRegs[I]); + auto CmpEq = MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, ResTy, + LHSPartRegs[I], RHSPartRegs[I]); + MIRBuilder.buildSelect(CmpOut, CmpEq, CmpIn, Cmp); + } + + CmpIn = CmpOut; + } + + for (unsigned I = 0, E = LHSLeftoverRegs.size(); I != E; ++I) { + Register CmpOut; + CmpInst::Predicate PartPred; + + if (I == E - 1 && LHSLeftoverRegs.empty()) { + PartPred = Pred; + CmpOut = Dst; + } else { + PartPred = ICmpInst::getUnsignedPredicate(Pred); + CmpOut = MRI.createGenericVirtualRegister(ResTy); + } + + if (!CmpIn) { + MIRBuilder.buildICmp(PartPred, CmpOut, LHSLeftoverRegs[I], + RHSLeftoverRegs[I]); + } else { + auto Cmp = MIRBuilder.buildICmp(PartPred, ResTy, LHSLeftoverRegs[I], + RHSLeftoverRegs[I]); + auto CmpEq = + MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, ResTy, + LHSLeftoverRegs[I], RHSLeftoverRegs[I]); + MIRBuilder.buildSelect(CmpOut, CmpEq, CmpIn, Cmp); + } + + CmpIn = CmpOut; + } } MI.eraseFromParent(); return Legalized; @@ -2850,15 +2884,12 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) { return Legalized; case TargetOpcode::G_UDIV: case TargetOpcode::G_UREM: - case TargetOpcode::G_UMIN: - case TargetOpcode::G_UMAX: Observer.changingInstr(MI); widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT); widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT); widenScalarDst(MI, WideTy); Observer.changedInstr(MI); return Legalized; - case TargetOpcode::G_UDIVREM: Observer.changingInstr(MI); widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT); @@ -2867,6 +2898,24 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) { widenScalarDst(MI, WideTy, 1); Observer.changedInstr(MI); return Legalized; + case TargetOpcode::G_UMIN: + case TargetOpcode::G_UMAX: { + LLT Ty = MRI.getType(MI.getOperand(0).getReg()); + + auto &Ctx = MIRBuilder.getMF().getFunction().getContext(); + unsigned ExtOpc = + TLI.isSExtCheaperThanZExt(getApproximateEVTForLLT(Ty, Ctx), + getApproximateEVTForLLT(WideTy, Ctx)) + ? TargetOpcode::G_SEXT + : TargetOpcode::G_ZEXT; + + Observer.changingInstr(MI); + widenScalarSrc(MI, WideTy, 1, ExtOpc); + widenScalarSrc(MI, WideTy, 2, ExtOpc); + widenScalarDst(MI, WideTy); + Observer.changedInstr(MI); + return Legalized; + } case TargetOpcode::G_SELECT: Observer.changingInstr(MI); @@ -3043,10 +3092,17 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) { if (TypeIdx == 0) widenScalarDst(MI, WideTy); else { - unsigned ExtOpcode = CmpInst::isSigned(static_cast<CmpInst::Predicate>( - MI.getOperand(1).getPredicate())) - ? TargetOpcode::G_SEXT - : TargetOpcode::G_ZEXT; + LLT SrcTy = MRI.getType(MI.getOperand(2).getReg()); + CmpInst::Predicate Pred = + static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate()); + + auto &Ctx = MIRBuilder.getMF().getFunction().getContext(); + unsigned ExtOpcode = + (CmpInst::isSigned(Pred) || + TLI.isSExtCheaperThanZExt(getApproximateEVTForLLT(SrcTy, Ctx), + getApproximateEVTForLLT(WideTy, Ctx))) + ? TargetOpcode::G_SEXT + : TargetOpcode::G_ZEXT; widenScalarSrc(MI, WideTy, 2, ExtOpcode); widenScalarSrc(MI, WideTy, 3, ExtOpcode); } @@ -5348,9 +5404,9 @@ LegalizerHelper::fewerElementsBitcast(MachineInstr &MI, unsigned int TypeIdx, auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs(); - unsigned SrcScalSize = SrcTy.getScalarSizeInBits(); - LLT SrcNarrowTy = - LLT::fixed_vector(NarrowTy.getSizeInBits() / SrcScalSize, SrcScalSize); + unsigned NewElemCount = + NarrowTy.getSizeInBits() / SrcTy.getScalarSizeInBits(); + LLT SrcNarrowTy = LLT::fixed_vector(NewElemCount, SrcTy.getElementType()); // Split the Src and Dst Reg into smaller registers SmallVector<Register> SrcVRegs, BitcastVRegs; @@ -5390,7 +5446,7 @@ LegalizerHelper::LegalizeResult LegalizerHelper::fewerElementsVectorShuffle( // Further legalization attempts will be needed to do split further. NarrowTy = DstTy.changeElementCount(DstTy.getElementCount().divideCoefficientBy(2)); - unsigned NewElts = NarrowTy.getNumElements(); + unsigned NewElts = NarrowTy.isVector() ? NarrowTy.getNumElements() : 1; SmallVector<Register> SplitSrc1Regs, SplitSrc2Regs; extractParts(Src1Reg, NarrowTy, 2, SplitSrc1Regs, MIRBuilder, MRI); @@ -5501,7 +5557,7 @@ LegalizerHelper::LegalizeResult LegalizerHelper::fewerElementsVectorShuffle( Ops.clear(); } - MIRBuilder.buildConcatVectors(DstReg, {Lo, Hi}); + MIRBuilder.buildMergeLikeInstr(DstReg, {Lo, Hi}); MI.eraseFromParent(); return Legalized; } @@ -6173,9 +6229,8 @@ LegalizerHelper::equalizeVectorShuffleLengths(MachineInstr &MI) { if (MaskNumElts < SrcNumElts) { // Extend mask to match new destination vector size with // undef values. - SmallVector<int, 16> NewMask(Mask); - for (unsigned I = MaskNumElts; I < SrcNumElts; ++I) - NewMask.push_back(-1); + SmallVector<int, 16> NewMask(SrcNumElts, -1); + llvm::copy(Mask, NewMask.begin()); moreElementsVectorDst(MI, SrcTy, 0); MIRBuilder.setInstrAndDebugLoc(MI); @@ -6255,16 +6310,14 @@ LegalizerHelper::moreElementsVectorShuffle(MachineInstr &MI, moreElementsVectorSrc(MI, MoreTy, 2); // Adjust mask based on new input vector length. - SmallVector<int, 16> NewMask; + SmallVector<int, 16> NewMask(WidenNumElts, -1); for (unsigned I = 0; I != NumElts; ++I) { int Idx = Mask[I]; if (Idx < static_cast<int>(NumElts)) - NewMask.push_back(Idx); + NewMask[I] = Idx; else - NewMask.push_back(Idx - NumElts + WidenNumElts); + NewMask[I] = Idx - NumElts + WidenNumElts; } - for (unsigned I = NumElts; I != WidenNumElts; ++I) - NewMask.push_back(-1); moreElementsVectorDst(MI, MoreTy, 0); MIRBuilder.setInstrAndDebugLoc(MI); MIRBuilder.buildShuffleVector(MI.getOperand(0).getReg(), @@ -6397,19 +6450,19 @@ LegalizerHelper::narrowScalarAddSub(MachineInstr &MI, unsigned TypeIdx, MRI); int NarrowParts = Src1Regs.size(); - for (int I = 0, E = Src1Left.size(); I != E; ++I) { - Src1Regs.push_back(Src1Left[I]); - Src2Regs.push_back(Src2Left[I]); - } + Src1Regs.append(Src1Left); + Src2Regs.append(Src2Left); DstRegs.reserve(Src1Regs.size()); for (int i = 0, e = Src1Regs.size(); i != e; ++i) { Register DstReg = MRI.createGenericVirtualRegister(MRI.getType(Src1Regs[i])); - Register CarryOut = MRI.createGenericVirtualRegister(LLT::scalar(1)); + Register CarryOut; // Forward the final carry-out to the destination register if (i == e - 1 && CarryDst) CarryOut = CarryDst; + else + CarryOut = MRI.createGenericVirtualRegister(LLT::scalar(1)); if (!CarryIn) { MIRBuilder.buildInstr(OpO, {DstReg, CarryOut}, @@ -6570,8 +6623,7 @@ LegalizerHelper::narrowScalarInsert(MachineInstr &MI, unsigned TypeIdx, extractParts(MI.getOperand(1).getReg(), RegTy, NarrowTy, LeftoverTy, SrcRegs, LeftoverRegs, MIRBuilder, MRI); - for (Register Reg : LeftoverRegs) - SrcRegs.push_back(Reg); + SrcRegs.append(LeftoverRegs); uint64_t NarrowSize = NarrowTy.getSizeInBits(); Register OpReg = MI.getOperand(2).getReg(); @@ -7920,6 +7972,7 @@ LegalizerHelper::lowerThreewayCompare(MachineInstr &MI) { Register Dst = Cmp->getReg(0); LLT DstTy = MRI.getType(Dst); + LLT SrcTy = MRI.getType(Cmp->getReg(1)); LLT CmpTy = DstTy.changeElementSize(1); CmpInst::Predicate LTPredicate = Cmp->isSigned() @@ -7929,16 +7982,32 @@ LegalizerHelper::lowerThreewayCompare(MachineInstr &MI) { ? CmpInst::Predicate::ICMP_SGT : CmpInst::Predicate::ICMP_UGT; - auto One = MIRBuilder.buildConstant(DstTy, 1); auto Zero = MIRBuilder.buildConstant(DstTy, 0); auto IsGT = MIRBuilder.buildICmp(GTPredicate, CmpTy, Cmp->getLHSReg(), Cmp->getRHSReg()); - auto SelectZeroOrOne = MIRBuilder.buildSelect(DstTy, IsGT, One, Zero); - - auto MinusOne = MIRBuilder.buildConstant(DstTy, -1); auto IsLT = MIRBuilder.buildICmp(LTPredicate, CmpTy, Cmp->getLHSReg(), Cmp->getRHSReg()); - MIRBuilder.buildSelect(Dst, IsLT, MinusOne, SelectZeroOrOne); + + auto &Ctx = MIRBuilder.getMF().getFunction().getContext(); + auto BC = TLI.getBooleanContents(DstTy.isVector(), /*isFP=*/false); + if (TLI.shouldExpandCmpUsingSelects(getApproximateEVTForLLT(SrcTy, Ctx)) || + BC == TargetLowering::UndefinedBooleanContent) { + auto One = MIRBuilder.buildConstant(DstTy, 1); + auto SelectZeroOrOne = MIRBuilder.buildSelect(DstTy, IsGT, One, Zero); + + auto MinusOne = MIRBuilder.buildConstant(DstTy, -1); + MIRBuilder.buildSelect(Dst, IsLT, MinusOne, SelectZeroOrOne); + } else { + if (BC == TargetLowering::ZeroOrNegativeOneBooleanContent) + std::swap(IsGT, IsLT); + // Extend boolean results to DstTy, which is at least i2, before subtracting + // them. + unsigned BoolExtOp = + MIRBuilder.getBoolExtOp(DstTy.isVector(), /*isFP=*/false); + IsGT = MIRBuilder.buildInstr(BoolExtOp, {DstTy}, {IsGT}); + IsLT = MIRBuilder.buildInstr(BoolExtOp, {DstTy}, {IsLT}); + MIRBuilder.buildSub(Dst, IsGT, IsLT); + } MI.eraseFromParent(); return Legalized; diff --git a/llvm/lib/CodeGen/GlobalISel/LoadStoreOpt.cpp b/llvm/lib/CodeGen/GlobalISel/LoadStoreOpt.cpp index e411e73dbe73..4557b3fbed61 100644 --- a/llvm/lib/CodeGen/GlobalISel/LoadStoreOpt.cpp +++ b/llvm/lib/CodeGen/GlobalISel/LoadStoreOpt.cpp @@ -317,7 +317,6 @@ bool LoadStoreOpt::mergeStores(SmallVectorImpl<GStore *> &StoresToMerge) { assert(MRI->getType(StoreMI->getValueReg()) == OrigTy); #endif - const auto &DL = MF->getFunction().getDataLayout(); bool AnyMerged = false; do { unsigned NumPow2 = llvm::bit_floor(StoresToMerge.size()); @@ -327,7 +326,7 @@ bool LoadStoreOpt::mergeStores(SmallVectorImpl<GStore *> &StoresToMerge) { for (MergeSizeBits = MaxSizeBits; MergeSizeBits > 1; MergeSizeBits /= 2) { LLT StoreTy = LLT::scalar(MergeSizeBits); EVT StoreEVT = - getApproximateEVTForLLT(StoreTy, DL, MF->getFunction().getContext()); + getApproximateEVTForLLT(StoreTy, MF->getFunction().getContext()); if (LegalSizes.size() > MergeSizeBits && LegalSizes[MergeSizeBits] && TLI->canMergeStoresTo(AS, StoreEVT, *MF) && (TLI->isTypeLegal(StoreEVT))) diff --git a/llvm/lib/CodeGen/GlobalISel/Utils.cpp b/llvm/lib/CodeGen/GlobalISel/Utils.cpp index 45807a6818ee..625d556e3ff5 100644 --- a/llvm/lib/CodeGen/GlobalISel/Utils.cpp +++ b/llvm/lib/CodeGen/GlobalISel/Utils.cpp @@ -276,6 +276,21 @@ void llvm::reportGISelFailure(MachineFunction &MF, const TargetPassConfig &TPC, reportGISelFailure(MF, TPC, MORE, R); } +unsigned llvm::getInverseGMinMaxOpcode(unsigned MinMaxOpc) { + switch (MinMaxOpc) { + case TargetOpcode::G_SMIN: + return TargetOpcode::G_SMAX; + case TargetOpcode::G_SMAX: + return TargetOpcode::G_SMIN; + case TargetOpcode::G_UMIN: + return TargetOpcode::G_UMAX; + case TargetOpcode::G_UMAX: + return TargetOpcode::G_UMIN; + default: + llvm_unreachable("unrecognized opcode"); + } +} + std::optional<APInt> llvm::getIConstantVRegVal(Register VReg, const MachineRegisterInfo &MRI) { std::optional<ValueAndVReg> ValAndVReg = getIConstantVRegValWithLookThrough( @@ -525,8 +540,7 @@ bool llvm::extractParts(Register Reg, LLT RegTy, LLT MainTy, LLT &LeftoverTy, RegNumElts % LeftoverNumElts == 0 && RegTy.getScalarSizeInBits() == MainTy.getScalarSizeInBits() && LeftoverNumElts > 1) { - LeftoverTy = - LLT::fixed_vector(LeftoverNumElts, RegTy.getScalarSizeInBits()); + LeftoverTy = LLT::fixed_vector(LeftoverNumElts, RegTy.getElementType()); // Unmerge the SrcReg to LeftoverTy vectors SmallVector<Register, 4> UnmergeValues; @@ -1518,6 +1532,18 @@ llvm::isConstantOrConstantSplatVector(MachineInstr &MI, return APInt(ScalarSize, *MaybeCst, true); } +std::optional<APFloat> +llvm::isConstantOrConstantSplatVectorFP(MachineInstr &MI, + const MachineRegisterInfo &MRI) { + Register Def = MI.getOperand(0).getReg(); + if (auto FpConst = getFConstantVRegValWithLookThrough(Def, MRI)) + return FpConst->Value; + auto MaybeCstFP = getFConstantSplat(Def, MRI, /*allowUndef=*/false); + if (!MaybeCstFP) + return std::nullopt; + return MaybeCstFP->Value; +} + bool llvm::isNullOrNullSplat(const MachineInstr &MI, const MachineRegisterInfo &MRI, bool AllowUndefs) { switch (MI.getOpcode()) { diff --git a/llvm/lib/CodeGen/GlobalMergeFunctions.cpp b/llvm/lib/CodeGen/GlobalMergeFunctions.cpp index 470582885fab..e920b1be6822 100644 --- a/llvm/lib/CodeGen/GlobalMergeFunctions.cpp +++ b/llvm/lib/CodeGen/GlobalMergeFunctions.cpp @@ -60,11 +60,17 @@ static bool canParameterizeCallOperand(const CallBase *CI, unsigned OpIdx) { if (Name.starts_with("__dtrace")) return false; } - if (isCalleeOperand(CI, OpIdx) && - CI->getOperandBundle(LLVMContext::OB_ptrauth).has_value()) { + if (isCalleeOperand(CI, OpIdx)) { // The operand is the callee and it has already been signed. Ignore this // because we cannot add another ptrauth bundle to the call instruction. - return false; + if (CI->getOperandBundle(LLVMContext::OB_ptrauth).has_value()) + return false; + } else { + // The target of the arc-attached call must be a constant and cannot be + // parameterized. + if (CI->isOperandBundleOfType(LLVMContext::OB_clang_arc_attachedcall, + OpIdx)) + return false; } return true; } @@ -154,7 +160,7 @@ static Value *createCast(IRBuilder<> &Builder, Value *V, Type *DestTy) { auto *DestAT = dyn_cast<ArrayType>(DestTy); assert(DestAT); assert(SrcAT->getNumElements() == DestAT->getNumElements()); - Value *Result = UndefValue::get(DestTy); + Value *Result = PoisonValue::get(DestTy); for (unsigned int I = 0, E = SrcAT->getNumElements(); I < E; ++I) { Value *Element = createCast(Builder, Builder.CreateExtractValue(V, ArrayRef(I)), diff --git a/llvm/lib/CodeGen/InlineSpiller.cpp b/llvm/lib/CodeGen/InlineSpiller.cpp index e8f7c6850a50..64f290f5930a 100644 --- a/llvm/lib/CodeGen/InlineSpiller.cpp +++ b/llvm/lib/CodeGen/InlineSpiller.cpp @@ -131,7 +131,7 @@ public: HoistSpillHelper(MachineFunctionPass &pass, MachineFunction &mf, VirtRegMap &vrm) : MF(mf), LIS(pass.getAnalysis<LiveIntervalsWrapperPass>().getLIS()), - LSS(pass.getAnalysis<LiveStacks>()), + LSS(pass.getAnalysis<LiveStacksWrapperLegacy>().getLS()), MDT(pass.getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree()), VRM(vrm), MRI(mf.getRegInfo()), TII(*mf.getSubtarget().getInstrInfo()), TRI(*mf.getSubtarget().getRegisterInfo()), @@ -193,7 +193,7 @@ public: InlineSpiller(MachineFunctionPass &Pass, MachineFunction &MF, VirtRegMap &VRM, VirtRegAuxInfo &VRAI) : MF(MF), LIS(Pass.getAnalysis<LiveIntervalsWrapperPass>().getLIS()), - LSS(Pass.getAnalysis<LiveStacks>()), + LSS(Pass.getAnalysis<LiveStacksWrapperLegacy>().getLS()), MDT(Pass.getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree()), VRM(VRM), MRI(MF.getRegInfo()), TII(*MF.getSubtarget().getInstrInfo()), TRI(*MF.getSubtarget().getRegisterInfo()), diff --git a/llvm/lib/CodeGen/LazyMachineBlockFrequencyInfo.cpp b/llvm/lib/CodeGen/LazyMachineBlockFrequencyInfo.cpp index 2561f2e5c9bb..6fd84646009b 100644 --- a/llvm/lib/CodeGen/LazyMachineBlockFrequencyInfo.cpp +++ b/llvm/lib/CodeGen/LazyMachineBlockFrequencyInfo.cpp @@ -74,13 +74,13 @@ LazyMachineBlockFrequencyInfoPass::calculateIfNotAvailable() const { if (!MDT) { LLVM_DEBUG(dbgs() << "Building DominatorTree on the fly\n"); OwnedMDT = std::make_unique<MachineDominatorTree>(); - OwnedMDT->getBase().recalculate(*MF); + OwnedMDT->recalculate(*MF); MDT = OwnedMDT.get(); } // Generate LoopInfo from it. OwnedMLI = std::make_unique<MachineLoopInfo>(); - OwnedMLI->analyze(MDT->getBase()); + OwnedMLI->analyze(*MDT); MLI = OwnedMLI.get(); } diff --git a/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.cpp b/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.cpp index 748dd0ca9858..ade67bb545d1 100644 --- a/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.cpp +++ b/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.cpp @@ -2782,7 +2782,7 @@ void InstrRefBasedLDV::BlockPHIPlacement( // Apply IDF calculator to the designated set of location defs, storing // required PHIs into PHIBlocks. Uses the dominator tree stored in the // InstrRefBasedLDV object. - IDFCalculatorBase<MachineBasicBlock, false> IDF(DomTree->getBase()); + IDFCalculatorBase<MachineBasicBlock, false> IDF(*DomTree); IDF.setLiveInBlocks(AllBlocks); IDF.setDefiningBlocks(DefBlocks); diff --git a/llvm/lib/CodeGen/LiveDebugValues/LiveDebugValues.cpp b/llvm/lib/CodeGen/LiveDebugValues/LiveDebugValues.cpp index a2b166227194..a7f089928f84 100644 --- a/llvm/lib/CodeGen/LiveDebugValues/LiveDebugValues.cpp +++ b/llvm/lib/CodeGen/LiveDebugValues/LiveDebugValues.cpp @@ -112,7 +112,7 @@ bool LiveDebugValues::runOnMachineFunction(MachineFunction &MF) { MachineDominatorTree *DomTree = nullptr; if (InstrRefBased) { DomTree = &MDT; - MDT.calculate(MF); + MDT.recalculate(MF); TheImpl = &*InstrRefImpl; } diff --git a/llvm/lib/CodeGen/LiveDebugVariables.cpp b/llvm/lib/CodeGen/LiveDebugVariables.cpp index 317d3401f000..79085e587ebc 100644 --- a/llvm/lib/CodeGen/LiveDebugVariables.cpp +++ b/llvm/lib/CodeGen/LiveDebugVariables.cpp @@ -82,7 +82,7 @@ INITIALIZE_PASS_BEGIN(LiveDebugVariablesWrapperLegacy, DEBUG_TYPE, INITIALIZE_PASS_DEPENDENCY(MachineDominatorTreeWrapperPass) INITIALIZE_PASS_DEPENDENCY(LiveIntervalsWrapperPass) INITIALIZE_PASS_END(LiveDebugVariablesWrapperLegacy, DEBUG_TYPE, - "Debug Variable Analysis", false, false) + "Debug Variable Analysis", false, true) void LiveDebugVariablesWrapperLegacy::getAnalysisUsage( AnalysisUsage &AU) const { diff --git a/llvm/lib/CodeGen/LiveIntervals.cpp b/llvm/lib/CodeGen/LiveIntervals.cpp index f9ee6e4563f8..f38527a3ce6a 100644 --- a/llvm/lib/CodeGen/LiveIntervals.cpp +++ b/llvm/lib/CodeGen/LiveIntervals.cpp @@ -83,7 +83,7 @@ INITIALIZE_PASS_BEGIN(LiveIntervalsWrapperPass, "liveintervals", INITIALIZE_PASS_DEPENDENCY(MachineDominatorTreeWrapperPass) INITIALIZE_PASS_DEPENDENCY(SlotIndexesWrapperPass) INITIALIZE_PASS_END(LiveIntervalsWrapperPass, "liveintervals", - "Live Interval Analysis", false, false) + "Live Interval Analysis", false, true) bool LiveIntervalsWrapperPass::runOnMachineFunction(MachineFunction &MF) { LIS.Indexes = &getAnalysis<SlotIndexesWrapperPass>().getSI(); diff --git a/llvm/lib/CodeGen/LiveRegMatrix.cpp b/llvm/lib/CodeGen/LiveRegMatrix.cpp index bc8c59381a40..3367171a1566 100644 --- a/llvm/lib/CodeGen/LiveRegMatrix.cpp +++ b/llvm/lib/CodeGen/LiveRegMatrix.cpp @@ -41,7 +41,7 @@ INITIALIZE_PASS_BEGIN(LiveRegMatrixWrapperLegacy, "liveregmatrix", INITIALIZE_PASS_DEPENDENCY(LiveIntervalsWrapperPass) INITIALIZE_PASS_DEPENDENCY(VirtRegMapWrapperLegacy) INITIALIZE_PASS_END(LiveRegMatrixWrapperLegacy, "liveregmatrix", - "Live Register Matrix", false, false) + "Live Register Matrix", false, true) void LiveRegMatrixWrapperLegacy::getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesAll(); @@ -66,7 +66,7 @@ void LiveRegMatrix::init(MachineFunction &MF, LiveIntervals &pLIS, unsigned NumRegUnits = TRI->getNumRegUnits(); if (NumRegUnits != Matrix.size()) Queries.reset(new LiveIntervalUnion::Query[NumRegUnits]); - Matrix.init(LIUAlloc, NumRegUnits); + Matrix.init(*LIUAlloc, NumRegUnits); // Make sure no stale queries get reused. invalidateVirtRegs(); diff --git a/llvm/lib/CodeGen/LiveStacks.cpp b/llvm/lib/CodeGen/LiveStacks.cpp index 6228a4dd2ad3..d615caf48c0a 100644 --- a/llvm/lib/CodeGen/LiveStacks.cpp +++ b/llvm/lib/CodeGen/LiveStacks.cpp @@ -15,20 +15,21 @@ #include "llvm/CodeGen/LiveStacks.h" #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" +#include "llvm/IR/Function.h" using namespace llvm; #define DEBUG_TYPE "livestacks" -char LiveStacks::ID = 0; -INITIALIZE_PASS_BEGIN(LiveStacks, DEBUG_TYPE, - "Live Stack Slot Analysis", false, false) +char LiveStacksWrapperLegacy::ID = 0; +INITIALIZE_PASS_BEGIN(LiveStacksWrapperLegacy, DEBUG_TYPE, + "Live Stack Slot Analysis", false, false) INITIALIZE_PASS_DEPENDENCY(SlotIndexesWrapperPass) -INITIALIZE_PASS_END(LiveStacks, DEBUG_TYPE, - "Live Stack Slot Analysis", false, false) +INITIALIZE_PASS_END(LiveStacksWrapperLegacy, DEBUG_TYPE, + "Live Stack Slot Analysis", false, true) -char &llvm::LiveStacksID = LiveStacks::ID; +char &llvm::LiveStacksID = LiveStacksWrapperLegacy::ID; -void LiveStacks::getAnalysisUsage(AnalysisUsage &AU) const { +void LiveStacksWrapperLegacy::getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesAll(); AU.addPreserved<SlotIndexesWrapperPass>(); AU.addRequiredTransitive<SlotIndexesWrapperPass>(); @@ -42,11 +43,10 @@ void LiveStacks::releaseMemory() { S2RCMap.clear(); } -bool LiveStacks::runOnMachineFunction(MachineFunction &MF) { +void LiveStacks::init(MachineFunction &MF) { TRI = MF.getSubtarget().getRegisterInfo(); // FIXME: No analysis is being done right now. We are relying on the // register allocators to provide the information. - return false; } LiveInterval & @@ -68,6 +68,33 @@ LiveStacks::getOrCreateInterval(int Slot, const TargetRegisterClass *RC) { return I->second; } +AnalysisKey LiveStacksAnalysis::Key; + +LiveStacks LiveStacksAnalysis::run(MachineFunction &MF, + MachineFunctionAnalysisManager &) { + LiveStacks Impl; + Impl.init(MF); + return Impl; +} +PreservedAnalyses +LiveStacksPrinterPass::run(MachineFunction &MF, + MachineFunctionAnalysisManager &AM) { + AM.getResult<LiveStacksAnalysis>(MF).print(OS, MF.getFunction().getParent()); + return PreservedAnalyses::all(); +} + +bool LiveStacksWrapperLegacy::runOnMachineFunction(MachineFunction &MF) { + Impl = LiveStacks(); + Impl.init(MF); + return false; +} + +void LiveStacksWrapperLegacy::releaseMemory() { Impl = LiveStacks(); } + +void LiveStacksWrapperLegacy::print(raw_ostream &OS, const Module *) const { + Impl.print(OS); +} + /// print - Implement the dump method. void LiveStacks::print(raw_ostream &OS, const Module*) const { diff --git a/llvm/lib/CodeGen/LiveVariables.cpp b/llvm/lib/CodeGen/LiveVariables.cpp index f17d60dc22dd..55428ab7832d 100644 --- a/llvm/lib/CodeGen/LiveVariables.cpp +++ b/llvm/lib/CodeGen/LiveVariables.cpp @@ -217,8 +217,8 @@ void LiveVariables::HandleVirtRegDef(Register Reg, MachineInstr &MI) { /// Also returns the sub-registers that're defined by the instruction. MachineInstr * LiveVariables::FindLastPartialDef(Register Reg, - SmallSet<unsigned, 4> &PartDefRegs) { - unsigned LastDefReg = 0; + SmallSet<Register, 4> &PartDefRegs) { + Register LastDefReg = 0; unsigned LastDefDist = 0; MachineInstr *LastDef = nullptr; for (MCPhysReg SubReg : TRI->subregs(Reg)) { @@ -264,14 +264,14 @@ void LiveVariables::HandlePhysRegUse(Register Reg, MachineInstr &MI) { // ... // = EAX // All of the sub-registers must have been defined before the use of Reg! - SmallSet<unsigned, 4> PartDefRegs; + SmallSet<Register, 4> PartDefRegs; MachineInstr *LastPartialDef = FindLastPartialDef(Reg, PartDefRegs); // If LastPartialDef is NULL, it must be using a livein register. if (LastPartialDef) { LastPartialDef->addOperand(MachineOperand::CreateReg(Reg, true/*IsDef*/, true/*IsImp*/)); PhysRegDef[Reg] = LastPartialDef; - SmallSet<unsigned, 8> Processed; + SmallSet<MCPhysReg, 8> Processed; for (MCPhysReg SubReg : TRI->subregs(Reg)) { if (Processed.count(SubReg)) continue; @@ -460,7 +460,7 @@ void LiveVariables::HandleRegMask(const MachineOperand &MO, unsigned NumRegs) { } void LiveVariables::HandlePhysRegDef(Register Reg, MachineInstr *MI, - SmallVectorImpl<unsigned> &Defs) { + SmallVectorImpl<Register> &Defs) { // What parts of the register are previously defined? SmallSet<unsigned, 32> Live; if (PhysRegDef[Reg] || PhysRegUse[Reg]) { @@ -499,7 +499,7 @@ void LiveVariables::HandlePhysRegDef(Register Reg, MachineInstr *MI, } void LiveVariables::UpdatePhysRegDefs(MachineInstr &MI, - SmallVectorImpl<unsigned> &Defs) { + SmallVectorImpl<Register> &Defs) { while (!Defs.empty()) { Register Reg = Defs.pop_back_val(); for (MCPhysReg SubReg : TRI->subregs_inclusive(Reg)) { @@ -510,7 +510,7 @@ void LiveVariables::UpdatePhysRegDefs(MachineInstr &MI, } void LiveVariables::runOnInstr(MachineInstr &MI, - SmallVectorImpl<unsigned> &Defs, + SmallVectorImpl<Register> &Defs, unsigned NumRegs) { assert(!MI.isDebugOrPseudoInstr()); // Process all of the operands of the instruction... @@ -522,8 +522,8 @@ void LiveVariables::runOnInstr(MachineInstr &MI, NumOperandsToProcess = 1; // Clear kill and dead markers. LV will recompute them. - SmallVector<unsigned, 4> UseRegs; - SmallVector<unsigned, 4> DefRegs; + SmallVector<Register, 4> UseRegs; + SmallVector<Register, 4> DefRegs; SmallVector<unsigned, 1> RegMasks; for (unsigned i = 0; i != NumOperandsToProcess; ++i) { MachineOperand &MO = MI.getOperand(i); @@ -531,7 +531,7 @@ void LiveVariables::runOnInstr(MachineInstr &MI, RegMasks.push_back(i); continue; } - if (!MO.isReg() || MO.getReg() == 0) + if (!MO.isReg() || !MO.getReg()) continue; Register MOReg = MO.getReg(); if (MO.isUse()) { @@ -551,8 +551,8 @@ void LiveVariables::runOnInstr(MachineInstr &MI, MachineBasicBlock *MBB = MI.getParent(); // Process all uses. - for (unsigned MOReg : UseRegs) { - if (Register::isVirtualRegister(MOReg)) + for (Register MOReg : UseRegs) { + if (MOReg.isVirtual()) HandleVirtRegUse(MOReg, MBB, MI); else if (!MRI->isReserved(MOReg)) HandlePhysRegUse(MOReg, MI); @@ -563,8 +563,8 @@ void LiveVariables::runOnInstr(MachineInstr &MI, HandleRegMask(MI.getOperand(Mask), NumRegs); // Process all defs. - for (unsigned MOReg : DefRegs) { - if (Register::isVirtualRegister(MOReg)) + for (Register MOReg : DefRegs) { + if (MOReg.isVirtual()) HandleVirtRegDef(MOReg, MI); else if (!MRI->isReserved(MOReg)) HandlePhysRegDef(MOReg, &MI, Defs); @@ -574,7 +574,7 @@ void LiveVariables::runOnInstr(MachineInstr &MI, void LiveVariables::runOnBlock(MachineBasicBlock *MBB, unsigned NumRegs) { // Mark live-in registers as live-in. - SmallVector<unsigned, 4> Defs; + SmallVector<Register, 4> Defs; for (const auto &LI : MBB->liveins()) { assert(Register::isPhysicalRegister(LI.PhysReg) && "Cannot have a live-in virtual register!"); diff --git a/llvm/lib/CodeGen/LowLevelTypeUtils.cpp b/llvm/lib/CodeGen/LowLevelTypeUtils.cpp index 1602cd99c383..936c9fbb2fff 100644 --- a/llvm/lib/CodeGen/LowLevelTypeUtils.cpp +++ b/llvm/lib/CodeGen/LowLevelTypeUtils.cpp @@ -54,10 +54,9 @@ MVT llvm::getMVTForLLT(LLT Ty) { Ty.getElementCount()); } -EVT llvm::getApproximateEVTForLLT(LLT Ty, const DataLayout &DL, - LLVMContext &Ctx) { +EVT llvm::getApproximateEVTForLLT(LLT Ty, LLVMContext &Ctx) { if (Ty.isVector()) { - EVT EltVT = getApproximateEVTForLLT(Ty.getElementType(), DL, Ctx); + EVT EltVT = getApproximateEVTForLLT(Ty.getElementType(), Ctx); return EVT::getVectorVT(Ctx, EltVT, Ty.getElementCount()); } diff --git a/llvm/lib/CodeGen/MIRCanonicalizerPass.cpp b/llvm/lib/CodeGen/MIRCanonicalizerPass.cpp index 21b849244d9b..4be708a6abfa 100644 --- a/llvm/lib/CodeGen/MIRCanonicalizerPass.cpp +++ b/llvm/lib/CodeGen/MIRCanonicalizerPass.cpp @@ -271,15 +271,14 @@ static bool rescheduleCanonically(unsigned &PseudoIdempotentInstCount, continue; LLVM_DEBUG( - dbgs() << "Rescheduling Multi-Use Instructions Lexographically.";); + dbgs() << "Rescheduling Multi-Use Instructions Lexographically."); Changed |= rescheduleLexographically( MultiUsers[E.second], MBB, [&]() -> MachineBasicBlock::iterator { return UseI; }); } PseudoIdempotentInstCount = PseudoIdempotentInstructions.size(); - LLVM_DEBUG( - dbgs() << "Rescheduling Idempotent Instructions Lexographically.";); + LLVM_DEBUG(dbgs() << "Rescheduling Idempotent Instructions Lexographically."); Changed |= rescheduleLexographically( PseudoIdempotentInstructions, MBB, [&]() -> MachineBasicBlock::iterator { return MBB->begin(); }); @@ -365,7 +364,7 @@ static bool runOnBasicBlock(MachineBasicBlock *MBB, bool Changed = false; - LLVM_DEBUG(dbgs() << "\n\n NEW BASIC BLOCK: " << MBB->getName() << "\n\n";); + LLVM_DEBUG(dbgs() << "\n\n NEW BASIC BLOCK: " << MBB->getName() << "\n\n"); LLVM_DEBUG(dbgs() << "MBB Before Canonical Copy Propagation:\n"; MBB->dump();); @@ -384,7 +383,7 @@ static bool runOnBasicBlock(MachineBasicBlock *MBB, Changed |= doDefKillClear(MBB); LLVM_DEBUG(dbgs() << "Updated MachineBasicBlock:\n"; MBB->dump(); - dbgs() << "\n";); + dbgs() << "\n"); LLVM_DEBUG( dbgs() << "\n\n================================================\n\n"); return Changed; diff --git a/llvm/lib/CodeGen/MLRegAllocEvictAdvisor.cpp b/llvm/lib/CodeGen/MLRegAllocEvictAdvisor.cpp index 03f015f8c9e3..9c6487b40d60 100644 --- a/llvm/lib/CodeGen/MLRegAllocEvictAdvisor.cpp +++ b/llvm/lib/CodeGen/MLRegAllocEvictAdvisor.cpp @@ -63,6 +63,12 @@ static cl::opt<std::string> InteractiveChannelBaseName( "outgoing name should be " "<regalloc-evict-interactive-channel-base>.out")); +static cl::opt<unsigned> + MaxCascade("mlregalloc-max-cascade", cl::Hidden, + cl::desc("The maximum number of times a live range can be " + "evicted before preventing it from being evicted"), + cl::init(20)); + // Options that only make sense in development mode #ifdef LLVM_HAVE_TFLITE #include "RegAllocScore.h" @@ -554,7 +560,7 @@ private: std::unique_ptr<Logger> Log; }; -#endif //#ifdef LLVM_HAVE_TFLITE +#endif // #ifdef LLVM_HAVE_TFLITE } // namespace float MLEvictAdvisor::getInitialQueueSize(const MachineFunction &MF) { @@ -643,8 +649,18 @@ bool MLEvictAdvisor::loadInterferenceFeatures( RegClassInfo.getNumAllocatableRegs(MRI->getRegClass(VirtReg.reg())) < RegClassInfo.getNumAllocatableRegs( MRI->getRegClass(Intf->reg()))); - // Only evict older cascades or live ranges without a cascade. + unsigned IntfCascade = RA.getExtraInfo().getCascade(Intf->reg()); + // There is a potential that the model could be adversarial and + // continually evict live ranges over and over again, leading to a + // large amount of compile time being spent in regalloc. If we hit the + // threshold, prevent the range from being evicted. We still let the + // range through if it is urgent as we are required to produce an + // eviction if the candidate is not spillable. + if (IntfCascade >= MaxCascade && !Urgent) + return false; + + // Only evict older cascades or live ranges without a cascade. if (Cascade <= IntfCascade) { if (!Urgent) return false; diff --git a/llvm/lib/CodeGen/MachineBasicBlock.cpp b/llvm/lib/CodeGen/MachineBasicBlock.cpp index 5d06af3ebf33..5ac6472a01e9 100644 --- a/llvm/lib/CodeGen/MachineBasicBlock.cpp +++ b/llvm/lib/CodeGen/MachineBasicBlock.cpp @@ -16,11 +16,13 @@ #include "llvm/CodeGen/LiveIntervals.h" #include "llvm/CodeGen/LivePhysRegs.h" #include "llvm/CodeGen/LiveVariables.h" +#include "llvm/CodeGen/MachineDomTreeUpdater.h" #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineJumpTableInfo.h" #include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/CodeGen/MachinePostDominators.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/SlotIndexes.h" #include "llvm/CodeGen/TargetInstrInfo.h" @@ -1146,7 +1148,7 @@ public: MachineBasicBlock *MachineBasicBlock::SplitCriticalEdge( MachineBasicBlock *Succ, Pass *P, MachineFunctionAnalysisManager *MFAM, - std::vector<SparseBitVector<>> *LiveInSets) { + std::vector<SparseBitVector<>> *LiveInSets, MachineDomTreeUpdater *MDTU) { assert((P || MFAM) && "Need a way to get analysis results!"); if (!canSplitCriticalEdge(Succ)) return nullptr; @@ -1346,8 +1348,8 @@ MachineBasicBlock *MachineBasicBlock::SplitCriticalEdge( LIS->repairIntervalsInRange(this, getFirstTerminator(), end(), UsedRegs); } - if (auto *MDT = GET_RESULT(MachineDominatorTree, getDomTree, )) - MDT->recordSplitCriticalEdge(this, Succ, NMBB); + if (MDTU) + MDTU->splitCriticalEdge(this, Succ, NMBB); if (MachineLoopInfo *MLI = GET_RESULT(MachineLoop, getLI, Info)) if (MachineLoop *TIL = MLI->getLoopFor(this)) { diff --git a/llvm/lib/CodeGen/MachineCombiner.cpp b/llvm/lib/CodeGen/MachineCombiner.cpp index 141cc1f35d66..b8d59214a6ec 100644 --- a/llvm/lib/CodeGen/MachineCombiner.cpp +++ b/llvm/lib/CodeGen/MachineCombiner.cpp @@ -399,7 +399,7 @@ bool MachineCombiner::improvesCriticalPathLen( << RootSlack << " SlackIsAccurate=" << SlackIsAccurate << "\n\tNewRootDepth + NewRootLatency = " << NewCycleCount << "\n\tRootDepth + RootLatency + RootSlack = " - << OldCycleCount;); + << OldCycleCount); LLVM_DEBUG(NewCycleCount <= OldCycleCount ? dbgs() << "\n\t It IMPROVES PathLen because" : dbgs() << "\n\t It DOES NOT improve PathLen because"); @@ -452,7 +452,7 @@ bool MachineCombiner::preservesResourceLen( LLVM_DEBUG(dbgs() << "\t\tResource length before replacement: " << ResLenBeforeCombine - << " and after: " << ResLenAfterCombine << "\n";); + << " and after: " << ResLenAfterCombine << "\n"); LLVM_DEBUG( ResLenAfterCombine <= ResLenBeforeCombine + TII->getExtendResourceLenLimit() diff --git a/llvm/lib/CodeGen/MachineDomTreeUpdater.cpp b/llvm/lib/CodeGen/MachineDomTreeUpdater.cpp index a640fc007903..72e4be0165bf 100644 --- a/llvm/lib/CodeGen/MachineDomTreeUpdater.cpp +++ b/llvm/lib/CodeGen/MachineDomTreeUpdater.cpp @@ -25,6 +25,13 @@ GenericDomTreeUpdater<MachineDomTreeUpdater, MachineDominatorTree, MachinePostDominatorTree>::recalculate(MachineFunction &MF); +template void GenericDomTreeUpdater< + MachineDomTreeUpdater, MachineDominatorTree, + MachinePostDominatorTree>::applyUpdatesImpl</*IsForward=*/true>(); +template void GenericDomTreeUpdater< + MachineDomTreeUpdater, MachineDominatorTree, + MachinePostDominatorTree>::applyUpdatesImpl</*IsForward=*/false>(); + bool MachineDomTreeUpdater::forceFlushDeletedBB() { if (DeletedBBs.empty()) return false; diff --git a/llvm/lib/CodeGen/MachineDominanceFrontier.cpp b/llvm/lib/CodeGen/MachineDominanceFrontier.cpp index 6a8ede4feb93..ed69ed931c5c 100644 --- a/llvm/lib/CodeGen/MachineDominanceFrontier.cpp +++ b/llvm/lib/CodeGen/MachineDominanceFrontier.cpp @@ -38,8 +38,7 @@ char &llvm::MachineDominanceFrontierID = MachineDominanceFrontier::ID; bool MachineDominanceFrontier::runOnMachineFunction(MachineFunction &) { releaseMemory(); - Base.analyze( - getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree().getBase()); + Base.analyze(getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree()); return false; } diff --git a/llvm/lib/CodeGen/MachineDominators.cpp b/llvm/lib/CodeGen/MachineDominators.cpp index a2cc8fdfa7c9..67a91c87bb1b 100644 --- a/llvm/lib/CodeGen/MachineDominators.cpp +++ b/llvm/lib/CodeGen/MachineDominators.cpp @@ -95,12 +95,6 @@ MachineDominatorTreeWrapperPass::MachineDominatorTreeWrapperPass() *PassRegistry::getPassRegistry()); } -void MachineDominatorTree::calculate(MachineFunction &F) { - CriticalEdgesToSplit.clear(); - NewBBs.clear(); - recalculate(F); -} - char &llvm::MachineDominatorsID = MachineDominatorTreeWrapperPass::ID; bool MachineDominatorTreeWrapperPass::runOnMachineFunction(MachineFunction &F) { @@ -121,71 +115,3 @@ void MachineDominatorTreeWrapperPass::print(raw_ostream &OS, if (DT) DT->print(OS); } - -void MachineDominatorTree::applySplitCriticalEdges() const { - // Bail out early if there is nothing to do. - if (CriticalEdgesToSplit.empty()) - return; - - // For each element in CriticalEdgesToSplit, remember whether or not element - // is the new immediate domminator of its successor. The mapping is done by - // index, i.e., the information for the ith element of CriticalEdgesToSplit is - // the ith element of IsNewIDom. - SmallBitVector IsNewIDom(CriticalEdgesToSplit.size(), true); - size_t Idx = 0; - - // Collect all the dominance properties info, before invalidating - // the underlying DT. - for (CriticalEdge &Edge : CriticalEdgesToSplit) { - // Update dominator information. - MachineBasicBlock *Succ = Edge.ToBB; - MachineDomTreeNode *SuccDTNode = Base::getNode(Succ); - - for (MachineBasicBlock *PredBB : Succ->predecessors()) { - if (PredBB == Edge.NewBB) - continue; - // If we are in this situation: - // FromBB1 FromBB2 - // + + - // + + + + - // + + + + - // ... Split1 Split2 ... - // + + - // + + - // + - // Succ - // Instead of checking the domiance property with Split2, we check it with - // FromBB2 since Split2 is still unknown of the underlying DT structure. - if (NewBBs.count(PredBB)) { - assert(PredBB->pred_size() == 1 && "A basic block resulting from a " - "critical edge split has more " - "than one predecessor!"); - PredBB = *PredBB->pred_begin(); - } - if (!Base::dominates(SuccDTNode, Base::getNode(PredBB))) { - IsNewIDom[Idx] = false; - break; - } - } - ++Idx; - } - - // Now, update DT with the collected dominance properties info. - Idx = 0; - for (CriticalEdge &Edge : CriticalEdgesToSplit) { - // We know FromBB dominates NewBB. - MachineDomTreeNode *NewDTNode = - const_cast<MachineDominatorTree *>(this)->Base::addNewBlock( - Edge.NewBB, Edge.FromBB); - - // If all the other predecessors of "Succ" are dominated by "Succ" itself - // then the new block is the new immediate dominator of "Succ". Otherwise, - // the new block doesn't dominate anything. - if (IsNewIDom[Idx]) - const_cast<MachineDominatorTree *>(this)->Base::changeImmediateDominator( - Base::getNode(Edge.ToBB), NewDTNode); - ++Idx; - } - NewBBs.clear(); - CriticalEdgesToSplit.clear(); -} diff --git a/llvm/lib/CodeGen/MachineFunction.cpp b/llvm/lib/CodeGen/MachineFunction.cpp index a293a77d3fae..e6b9538fe9a0 100644 --- a/llvm/lib/CodeGen/MachineFunction.cpp +++ b/llvm/lib/CodeGen/MachineFunction.cpp @@ -102,6 +102,7 @@ static const char *getPropertyName(MachineFunctionProperties::Property Prop) { case P::TracksLiveness: return "TracksLiveness"; case P::TiedOpsRewritten: return "TiedOpsRewritten"; case P::FailsVerification: return "FailsVerification"; + case P::FailedRegAlloc: return "FailedRegAlloc"; case P::TracksDebugUserValues: return "TracksDebugUserValues"; } // clang-format on diff --git a/llvm/lib/CodeGen/MachineInstr.cpp b/llvm/lib/CodeGen/MachineInstr.cpp index 941861da5c56..958efa79d7e9 100644 --- a/llvm/lib/CodeGen/MachineInstr.cpp +++ b/llvm/lib/CodeGen/MachineInstr.cpp @@ -2219,26 +2219,36 @@ MachineInstrExpressionTrait::getHashValue(const MachineInstr* const &MI) { return hash_combine_range(HashComponents.begin(), HashComponents.end()); } -void MachineInstr::emitError(StringRef Msg) const { +const MDNode *MachineInstr::getLocCookieMD() const { // Find the source location cookie. - uint64_t LocCookie = 0; const MDNode *LocMD = nullptr; for (unsigned i = getNumOperands(); i != 0; --i) { if (getOperand(i-1).isMetadata() && (LocMD = getOperand(i-1).getMetadata()) && LocMD->getNumOperands() != 0) { - if (const ConstantInt *CI = - mdconst::dyn_extract<ConstantInt>(LocMD->getOperand(0))) { - LocCookie = CI->getZExtValue(); - break; - } + if (mdconst::hasa<ConstantInt>(LocMD->getOperand(0))) + return LocMD; } } - if (const MachineBasicBlock *MBB = getParent()) - if (const MachineFunction *MF = MBB->getParent()) - return MF->getFunction().getContext().emitError(LocCookie, Msg); - report_fatal_error(Msg); + return nullptr; +} + +void MachineInstr::emitInlineAsmError(const Twine &Msg) const { + assert(isInlineAsm()); + const MDNode *LocMD = getLocCookieMD(); + uint64_t LocCookie = + LocMD + ? mdconst::extract<ConstantInt>(LocMD->getOperand(0))->getZExtValue() + : 0; + LLVMContext &Ctx = getMF()->getFunction().getContext(); + Ctx.diagnose(DiagnosticInfoInlineAsm(LocCookie, Msg)); +} + +void MachineInstr::emitGenericError(const Twine &Msg) const { + const Function &Fn = getMF()->getFunction(); + Fn.getContext().diagnose( + DiagnosticInfoGenericWithLoc(Msg, Fn, getDebugLoc())); } MachineInstrBuilder llvm::BuildMI(MachineFunction &MF, const DebugLoc &DL, diff --git a/llvm/lib/CodeGen/MachineLICM.cpp b/llvm/lib/CodeGen/MachineLICM.cpp index d21059189b18..d1d5509dc482 100644 --- a/llvm/lib/CodeGen/MachineLICM.cpp +++ b/llvm/lib/CodeGen/MachineLICM.cpp @@ -24,6 +24,7 @@ #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineBlockFrequencyInfo.h" +#include "llvm/CodeGen/MachineDomTreeUpdater.h" #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" @@ -133,7 +134,7 @@ namespace { AliasAnalysis *AA = nullptr; // Alias analysis info. MachineBlockFrequencyInfo *MBFI = nullptr; // Machine block frequncy info MachineLoopInfo *MLI = nullptr; // Current MachineLoopInfo - MachineDominatorTree *DT = nullptr; // Machine dominator tree for the cur loop + MachineDomTreeUpdater *MDTU = nullptr; // Wraps current dominator tree // State that is updated as we process loops bool Changed = false; // True if a loop is changed. @@ -375,7 +376,9 @@ bool MachineLICMImpl::run(MachineFunction &MF) { .getManager() .getResult<AAManager>(MF.getFunction()) : &LegacyPass->getAnalysis<AAResultsWrapperPass>().getAAResults(); - DT = GET_RESULT(MachineDominatorTree, getDomTree, ); + MachineDomTreeUpdater DTU(GET_RESULT(MachineDominatorTree, getDomTree, ), + MachineDomTreeUpdater::UpdateStrategy::Lazy); + MDTU = &DTU; MLI = GET_RESULT(MachineLoop, getLI, Info); MBFI = DisableHoistingToHotterBlocks != UseBFI::None ? GET_RESULT(MachineBlockFrequency, getMBFI, Info) @@ -421,7 +424,7 @@ bool MachineLICMImpl::run(MachineFunction &MF) { else { // CSEMap is initialized for loop header when the first instruction is // being hoisted. - MachineDomTreeNode *N = DT->getNode(CurLoop->getHeader()); + MachineDomTreeNode *N = MDTU->getDomTree().getNode(CurLoop->getHeader()); FirstInLoop = true; HoistOutOfLoop(N, CurLoop, CurPreheader); CSEMap.clear(); @@ -764,7 +767,7 @@ bool MachineLICMImpl::IsGuaranteedToExecute(MachineBasicBlock *BB, SmallVector<MachineBasicBlock*, 8> CurrentLoopExitingBlocks; CurLoop->getExitingBlocks(CurrentLoopExitingBlocks); for (MachineBasicBlock *CurrentLoopExitingBlock : CurrentLoopExitingBlocks) - if (!DT->dominates(BB, CurrentLoopExitingBlock)) { + if (!MDTU->getDomTree().dominates(BB, CurrentLoopExitingBlock)) { SpeculationState = SpeculateTrue; return false; } @@ -1600,7 +1603,7 @@ bool MachineLICMImpl::MayCSE(MachineInstr *MI) { unsigned Opcode = MI->getOpcode(); for (auto &Map : CSEMap) { // Check this CSEMap's preheader dominates MI's basic block. - if (DT->dominates(Map.first, MI->getParent())) { + if (MDTU->getDomTree().dominates(Map.first, MI->getParent())) { DenseMap<unsigned, std::vector<MachineInstr *>>::iterator CI = Map.second.find(Opcode); // Do not CSE implicit_def so ProcessImplicitDefs can properly propagate @@ -1668,7 +1671,7 @@ unsigned MachineLICMImpl::Hoist(MachineInstr *MI, MachineBasicBlock *Preheader, bool HasCSEDone = false; for (auto &Map : CSEMap) { // Check this CSEMap's preheader dominates MI's basic block. - if (DT->dominates(Map.first, MI->getParent())) { + if (MDTU->getDomTree().dominates(Map.first, MI->getParent())) { DenseMap<unsigned, std::vector<MachineInstr *>>::iterator CI = Map.second.find(Opcode); if (CI != Map.second.end()) { @@ -1732,7 +1735,7 @@ MachineLICMImpl::getCurPreheader(MachineLoop *CurLoop, } CurPreheader = Pred->SplitCriticalEdge(CurLoop->getHeader(), LegacyPass, - MFAM, nullptr); + MFAM, nullptr, MDTU); if (!CurPreheader) { CurPreheader = reinterpret_cast<MachineBasicBlock *>(-1); return nullptr; diff --git a/llvm/lib/CodeGen/MachineLateInstrsCleanup.cpp b/llvm/lib/CodeGen/MachineLateInstrsCleanup.cpp index 6399e8a95236..8827a8327670 100644 --- a/llvm/lib/CodeGen/MachineLateInstrsCleanup.cpp +++ b/llvm/lib/CodeGen/MachineLateInstrsCleanup.cpp @@ -189,7 +189,7 @@ bool MachineLateInstrsCleanup::processBlock(MachineBasicBlock *MBB) { })) { MBBDefs[Reg] = DefMI; LLVM_DEBUG(dbgs() << "Reusable instruction from pred(s): in " - << printMBBReference(*MBB) << ": " << *DefMI;); + << printMBBReference(*MBB) << ": " << *DefMI); } } @@ -212,7 +212,7 @@ bool MachineLateInstrsCleanup::processBlock(MachineBasicBlock *MBB) { // Check for an earlier identical and reusable instruction. if (IsCandidate && MBBDefs.hasIdentical(DefedReg, &MI)) { LLVM_DEBUG(dbgs() << "Removing redundant instruction in " - << printMBBReference(*MBB) << ": " << MI;); + << printMBBReference(*MBB) << ": " << MI); removeRedundantDef(&MI); Changed = true; continue; @@ -232,7 +232,7 @@ bool MachineLateInstrsCleanup::processBlock(MachineBasicBlock *MBB) { // Record this MI for potential later reuse. if (IsCandidate) { LLVM_DEBUG(dbgs() << "Found interesting instruction in " - << printMBBReference(*MBB) << ": " << MI;); + << printMBBReference(*MBB) << ": " << MI); MBBDefs[DefedReg] = &MI; assert(!MBBKills.count(DefedReg) && "Should already have been removed."); } diff --git a/llvm/lib/CodeGen/MachineLoopInfo.cpp b/llvm/lib/CodeGen/MachineLoopInfo.cpp index b0d74ecd6a85..d6906bacde0e 100644 --- a/llvm/lib/CodeGen/MachineLoopInfo.cpp +++ b/llvm/lib/CodeGen/MachineLoopInfo.cpp @@ -77,7 +77,7 @@ bool MachineLoopInfo::invalidate( void MachineLoopInfo::calculate(MachineDominatorTree &MDT) { releaseMemory(); - analyze(MDT.getBase()); + analyze(MDT); } void MachineLoopInfoWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const { diff --git a/llvm/lib/CodeGen/MachineModuleInfoImpls.cpp b/llvm/lib/CodeGen/MachineModuleInfoImpls.cpp index 956317510dc7..5c5f4b6738bb 100644 --- a/llvm/lib/CodeGen/MachineModuleInfoImpls.cpp +++ b/llvm/lib/CodeGen/MachineModuleInfoImpls.cpp @@ -14,6 +14,8 @@ #include "llvm/CodeGen/MachineModuleInfoImpls.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/Module.h" #include "llvm/MC/MCSymbol.h" using namespace llvm; @@ -59,3 +61,10 @@ MachineModuleInfoImpl::ExprStubListTy MachineModuleInfoImpl::getSortedExprStubs( ExprStubs.clear(); return List; } + +MachineModuleInfoELF::MachineModuleInfoELF(const MachineModuleInfo &MMI) { + const Module *M = MMI.getModule(); + const auto *Flag = mdconst::extract_or_null<ConstantInt>( + M->getModuleFlag("ptrauth-sign-personality")); + HasSignedPersonality = Flag && Flag->getZExtValue() == 1; +} diff --git a/llvm/lib/CodeGen/MachineOutliner.cpp b/llvm/lib/CodeGen/MachineOutliner.cpp index 19a96e33163e..4c5489434c69 100644 --- a/llvm/lib/CodeGen/MachineOutliner.cpp +++ b/llvm/lib/CodeGen/MachineOutliner.cpp @@ -1182,7 +1182,7 @@ bool MachineOutliner::outline( } } - LLVM_DEBUG(dbgs() << "OutlinedSomething = " << OutlinedSomething << "\n";); + LLVM_DEBUG(dbgs() << "OutlinedSomething = " << OutlinedSomething << "\n"); return OutlinedSomething; } diff --git a/llvm/lib/CodeGen/MachinePipeliner.cpp b/llvm/lib/CodeGen/MachinePipeliner.cpp index 7a10bd39e269..acd42aa497c6 100644 --- a/llvm/lib/CodeGen/MachinePipeliner.cpp +++ b/llvm/lib/CodeGen/MachinePipeliner.cpp @@ -567,6 +567,7 @@ void SwingSchedulerDAG::schedule() { Topo.InitDAGTopologicalSorting(); changeDependences(); postProcessDAG(); + DDG = std::make_unique<SwingSchedulerDDG>(SUnits, &EntrySU, &ExitSU); LLVM_DEBUG(dump()); NodeSetType NodeSets; @@ -1216,7 +1217,6 @@ struct FuncUnitSorter { /// Calculate the maximum register pressure of the scheduled instructions stream class HighRegisterPressureDetector { MachineBasicBlock *OrigMBB; - const MachineFunction &MF; const MachineRegisterInfo &MRI; const TargetRegisterInfo *TRI; @@ -1283,9 +1283,9 @@ private: } } - // Return true if Reg is fixed one, for example, stack pointer - bool isFixedRegister(Register Reg) const { - return Reg.isPhysical() && TRI->isFixedRegister(MF, Reg.asMCReg()); + // Return true if Reg is reserved one, for example, stack pointer + bool isReservedRegister(Register Reg) const { + return Reg.isPhysical() && MRI.isReserved(Reg.asMCReg()); } bool isDefinedInThisLoop(Register Reg) const { @@ -1311,7 +1311,7 @@ private: // because it's used only at the first iteration. if (MI.isPHI() && Reg != getLoopPhiReg(MI, OrigMBB)) continue; - if (isFixedRegister(Reg)) + if (isReservedRegister(Reg)) continue; if (isDefinedInThisLoop(Reg)) continue; @@ -1326,48 +1326,7 @@ private: // Calculate the upper limit of each pressure set void computePressureSetLimit(const RegisterClassInfo &RCI) { for (unsigned PSet = 0; PSet < PSetNum; PSet++) - PressureSetLimit[PSet] = TRI->getRegPressureSetLimit(MF, PSet); - - // We assume fixed registers, such as stack pointer, are already in use. - // Therefore subtracting the weight of the fixed registers from the limit of - // each pressure set in advance. - SmallDenseSet<Register, 8> FixedRegs; - for (const TargetRegisterClass *TRC : TRI->regclasses()) { - for (const MCPhysReg Reg : *TRC) - if (isFixedRegister(Reg)) - FixedRegs.insert(Reg); - } - - LLVM_DEBUG({ - for (auto Reg : FixedRegs) { - dbgs() << printReg(Reg, TRI, 0, &MRI) << ": ["; - for (MCRegUnit Unit : TRI->regunits(Reg)) { - const int *Sets = TRI->getRegUnitPressureSets(Unit); - for (; *Sets != -1; Sets++) { - dbgs() << TRI->getRegPressureSetName(*Sets) << ", "; - } - } - dbgs() << "]\n"; - } - }); - - for (auto Reg : FixedRegs) { - LLVM_DEBUG(dbgs() << "fixed register: " << printReg(Reg, TRI, 0, &MRI) - << "\n"); - for (MCRegUnit Unit : TRI->regunits(Reg)) { - auto PSetIter = MRI.getPressureSets(Unit); - unsigned Weight = PSetIter.getWeight(); - for (; PSetIter.isValid(); ++PSetIter) { - unsigned &Limit = PressureSetLimit[*PSetIter]; - assert( - Limit >= Weight && - "register pressure limit must be greater than or equal weight"); - Limit -= Weight; - LLVM_DEBUG(dbgs() << "PSet=" << *PSetIter << " Limit=" << Limit - << " (decreased by " << Weight << ")\n"); - } - } - } + PressureSetLimit[PSet] = RCI.getRegPressureSetLimit(PSet); } // There are two patterns of last-use. @@ -1464,7 +1423,7 @@ private: const auto InsertReg = [this, &CurSetPressure](RegSetTy &RegSet, Register Reg) { - if (!Reg.isValid() || isFixedRegister(Reg)) + if (!Reg.isValid() || isReservedRegister(Reg)) return; bool Inserted = RegSet.insert(Reg).second; @@ -1478,7 +1437,7 @@ private: const auto EraseReg = [this, &CurSetPressure](RegSetTy &RegSet, Register Reg) { - if (!Reg.isValid() || isFixedRegister(Reg)) + if (!Reg.isValid() || isReservedRegister(Reg)) return; // live-in register @@ -1530,7 +1489,7 @@ private: public: HighRegisterPressureDetector(MachineBasicBlock *OrigMBB, const MachineFunction &MF) - : OrigMBB(OrigMBB), MF(MF), MRI(MF.getRegInfo()), + : OrigMBB(OrigMBB), MRI(MF.getRegInfo()), TRI(MF.getSubtarget().getRegisterInfo()), PSetNum(TRI->getNumRegPressureSets()), InitSetPressure(PSetNum, 0), PressureSetLimit(PSetNum, 0) {} @@ -1625,29 +1584,6 @@ unsigned SwingSchedulerDAG::calculateRecMII(NodeSetType &NodeSets) { return RecMII; } -/// Swap all the anti dependences in the DAG. That means it is no longer a DAG, -/// but we do this to find the circuits, and then change them back. -static void swapAntiDependences(std::vector<SUnit> &SUnits) { - SmallVector<std::pair<SUnit *, SDep>, 8> DepsAdded; - for (SUnit &SU : SUnits) { - for (SDep &Pred : SU.Preds) - if (Pred.getKind() == SDep::Anti) - DepsAdded.push_back(std::make_pair(&SU, Pred)); - } - for (std::pair<SUnit *, SDep> &P : DepsAdded) { - // Remove this anti dependency and add one in the reverse direction. - SUnit *SU = P.first; - SDep &D = P.second; - SUnit *TargetSU = D.getSUnit(); - unsigned Reg = D.getReg(); - unsigned Lat = D.getLatency(); - SU->removePred(D); - SDep Dep(SU, SDep::Anti, Reg); - Dep.setLatency(Lat); - TargetSU->addPred(Dep); - } -} - /// Create the adjacency structure of the nodes in the graph. void SwingSchedulerDAG::Circuits::createAdjacencyStructure( SwingSchedulerDAG *DAG) { @@ -1656,11 +1592,11 @@ void SwingSchedulerDAG::Circuits::createAdjacencyStructure( for (int i = 0, e = SUnits.size(); i != e; ++i) { Added.reset(); // Add any successor to the adjacency matrix and exclude duplicates. - for (auto &SI : SUnits[i].Succs) { + for (auto &OE : DAG->DDG->getOutEdges(&SUnits[i])) { // Only create a back-edge on the first and last nodes of a dependence // chain. This records any chains and adds them later. - if (SI.getKind() == SDep::Output) { - int N = SI.getSUnit()->NodeNum; + if (OE.isOutputDep()) { + int N = OE.getDst()->NodeNum; int BackEdge = i; auto Dep = OutputDeps.find(BackEdge); if (Dep != OutputDeps.end()) { @@ -1670,11 +1606,19 @@ void SwingSchedulerDAG::Circuits::createAdjacencyStructure( OutputDeps[N] = BackEdge; } // Do not process a boundary node, an artificial node. - // A back-edge is processed only if it goes to a Phi. - if (SI.getSUnit()->isBoundaryNode() || SI.isArtificial() || - (SI.getKind() == SDep::Anti && !SI.getSUnit()->getInstr()->isPHI())) + if (OE.getDst()->isBoundaryNode() || OE.isArtificial()) continue; - int N = SI.getSUnit()->NodeNum; + + // This code is retained o preserve previous behavior and prevent + // regression. This condition means that anti-dependnecies within an + // iteration are ignored when searching circuits. Therefore it's natural + // to consider this dependence as well. + // FIXME: Remove this code if it doesn't have significant impact on + // performance. + if (OE.isAntiDep()) + continue; + + int N = OE.getDst()->NodeNum; if (!Added.test(N)) { AdjK[i].push_back(N); Added.set(N); @@ -1682,12 +1626,13 @@ void SwingSchedulerDAG::Circuits::createAdjacencyStructure( } // A chain edge between a store and a load is treated as a back-edge in the // adjacency matrix. - for (auto &PI : SUnits[i].Preds) { - if (!SUnits[i].getInstr()->mayStore() || - !DAG->isLoopCarriedDep(&SUnits[i], PI, false)) + for (auto &IE : DAG->DDG->getInEdges(&SUnits[i])) { + SUnit *Src = IE.getSrc(); + SUnit *Dst = IE.getDst(); + if (!Dst->getInstr()->mayStore() || !DAG->isLoopCarriedDep(IE)) continue; - if (PI.getKind() == SDep::Order && PI.getSUnit()->getInstr()->mayLoad()) { - int N = PI.getSUnit()->NodeNum; + if (IE.isOrderDep() && Src->getInstr()->mayLoad()) { + int N = Src->NodeNum; if (!Added.test(N)) { AdjK[i].push_back(N); Added.set(N); @@ -1762,10 +1707,6 @@ void SwingSchedulerDAG::Circuits::unblock(int U) { /// Identify all the elementary circuits in the dependence graph using /// Johnson's circuit algorithm. void SwingSchedulerDAG::findCircuits(NodeSetType &NodeSets) { - // Swap all the anti dependences in the DAG. That means it is no longer a DAG, - // but we do this to find the circuits, and then change them back. - swapAntiDependences(SUnits); - Circuits Cir(SUnits, Topo); // Create the adjacency structure. Cir.createAdjacencyStructure(this); @@ -1773,9 +1714,6 @@ void SwingSchedulerDAG::findCircuits(NodeSetType &NodeSets) { Cir.reset(); Cir.circuit(I, I, NodeSets, this); } - - // Change the dependences back so that we've created a DAG again. - swapAntiDependences(SUnits); } // Create artificial dependencies between the source of COPY/REG_SEQUENCE that @@ -1858,15 +1796,6 @@ void SwingSchedulerDAG::CopyToPhiMutation::apply(ScheduleDAGInstrs *DAG) { } } -/// Return true for DAG nodes that we ignore when computing the cost functions. -/// We ignore the back-edge recurrence in order to avoid unbounded recursion -/// in the calculation of the ASAP, ALAP, etc functions. -static bool ignoreDependence(const SDep &D, bool isPred) { - if (D.isArtificial() || D.getSUnit()->isBoundaryNode()) - return true; - return D.getKind() == SDep::Anti && isPred; -} - /// Compute several functions need to order the nodes for scheduling. /// ASAP - Earliest time to schedule a node. /// ALAP - Latest time to schedule a node. @@ -1889,15 +1818,15 @@ void SwingSchedulerDAG::computeNodeFunctions(NodeSetType &NodeSets) { int asap = 0; int zeroLatencyDepth = 0; SUnit *SU = &SUnits[I]; - for (const SDep &P : SU->Preds) { - SUnit *pred = P.getSUnit(); - if (P.getLatency() == 0) + for (const auto &IE : DDG->getInEdges(SU)) { + SUnit *Pred = IE.getSrc(); + if (IE.getLatency() == 0) zeroLatencyDepth = - std::max(zeroLatencyDepth, getZeroLatencyDepth(pred) + 1); - if (ignoreDependence(P, true)) + std::max(zeroLatencyDepth, getZeroLatencyDepth(Pred) + 1); + if (IE.ignoreDependence(true)) continue; - asap = std::max(asap, (int)(getASAP(pred) + P.getLatency() - - getDistance(pred, SU, P) * MII)); + asap = std::max(asap, (int)(getASAP(Pred) + IE.getLatency() - + IE.getDistance() * MII)); } maxASAP = std::max(maxASAP, asap); ScheduleInfo[I].ASAP = asap; @@ -1909,17 +1838,17 @@ void SwingSchedulerDAG::computeNodeFunctions(NodeSetType &NodeSets) { int alap = maxASAP; int zeroLatencyHeight = 0; SUnit *SU = &SUnits[I]; - for (const SDep &S : SU->Succs) { - SUnit *succ = S.getSUnit(); - if (succ->isBoundaryNode()) + for (const auto &OE : DDG->getOutEdges(SU)) { + SUnit *Succ = OE.getDst(); + if (Succ->isBoundaryNode()) continue; - if (S.getLatency() == 0) + if (OE.getLatency() == 0) zeroLatencyHeight = - std::max(zeroLatencyHeight, getZeroLatencyHeight(succ) + 1); - if (ignoreDependence(S, true)) + std::max(zeroLatencyHeight, getZeroLatencyHeight(Succ) + 1); + if (OE.ignoreDependence(true)) continue; - alap = std::min(alap, (int)(getALAP(succ) - S.getLatency() + - getDistance(SU, succ, S) * MII)); + alap = std::min(alap, (int)(getALAP(Succ) - OE.getLatency() + + OE.getDistance() * MII)); } ScheduleInfo[I].ALAP = alap; @@ -1948,26 +1877,33 @@ void SwingSchedulerDAG::computeNodeFunctions(NodeSetType &NodeSets) { /// as the predecessors of the elements of NodeOrder that are not also in /// NodeOrder. static bool pred_L(SetVector<SUnit *> &NodeOrder, - SmallSetVector<SUnit *, 8> &Preds, + SmallSetVector<SUnit *, 8> &Preds, SwingSchedulerDDG *DDG, const NodeSet *S = nullptr) { Preds.clear(); - for (const SUnit *SU : NodeOrder) { - for (const SDep &Pred : SU->Preds) { - if (S && S->count(Pred.getSUnit()) == 0) + + for (SUnit *SU : NodeOrder) { + for (const auto &IE : DDG->getInEdges(SU)) { + SUnit *PredSU = IE.getSrc(); + if (S && S->count(PredSU) == 0) continue; - if (ignoreDependence(Pred, true)) + if (IE.ignoreDependence(true)) continue; - if (NodeOrder.count(Pred.getSUnit()) == 0) - Preds.insert(Pred.getSUnit()); + if (NodeOrder.count(PredSU) == 0) + Preds.insert(PredSU); } - // Back-edges are predecessors with an anti-dependence. - for (const SDep &Succ : SU->Succs) { - if (Succ.getKind() != SDep::Anti) + + // FIXME: The following loop-carried dependencies may also need to be + // considered. + // - Physical register dependencies (true-dependence and WAW). + // - Memory dependencies. + for (const auto &OE : DDG->getOutEdges(SU)) { + SUnit *SuccSU = OE.getDst(); + if (!OE.isAntiDep()) continue; - if (S && S->count(Succ.getSUnit()) == 0) + if (S && S->count(SuccSU) == 0) continue; - if (NodeOrder.count(Succ.getSUnit()) == 0) - Preds.insert(Succ.getSUnit()); + if (NodeOrder.count(SuccSU) == 0) + Preds.insert(SuccSU); } } return !Preds.empty(); @@ -1977,25 +1913,33 @@ static bool pred_L(SetVector<SUnit *> &NodeOrder, /// as the successors of the elements of NodeOrder that are not also in /// NodeOrder. static bool succ_L(SetVector<SUnit *> &NodeOrder, - SmallSetVector<SUnit *, 8> &Succs, + SmallSetVector<SUnit *, 8> &Succs, SwingSchedulerDDG *DDG, const NodeSet *S = nullptr) { Succs.clear(); - for (const SUnit *SU : NodeOrder) { - for (const SDep &Succ : SU->Succs) { - if (S && S->count(Succ.getSUnit()) == 0) + + for (SUnit *SU : NodeOrder) { + for (const auto &OE : DDG->getOutEdges(SU)) { + SUnit *SuccSU = OE.getDst(); + if (S && S->count(SuccSU) == 0) continue; - if (ignoreDependence(Succ, false)) + if (OE.ignoreDependence(false)) continue; - if (NodeOrder.count(Succ.getSUnit()) == 0) - Succs.insert(Succ.getSUnit()); + if (NodeOrder.count(SuccSU) == 0) + Succs.insert(SuccSU); } - for (const SDep &Pred : SU->Preds) { - if (Pred.getKind() != SDep::Anti) + + // FIXME: The following loop-carried dependencies may also need to be + // considered. + // - Physical register dependnecies (true-dependnece and WAW). + // - Memory dependencies. + for (const auto &IE : DDG->getInEdges(SU)) { + SUnit *PredSU = IE.getSrc(); + if (!IE.isAntiDep()) continue; - if (S && S->count(Pred.getSUnit()) == 0) + if (S && S->count(PredSU) == 0) continue; - if (NodeOrder.count(Pred.getSUnit()) == 0) - Succs.insert(Pred.getSUnit()); + if (NodeOrder.count(PredSU) == 0) + Succs.insert(PredSU); } } return !Succs.empty(); @@ -2006,7 +1950,8 @@ static bool succ_L(SetVector<SUnit *> &NodeOrder, static bool computePath(SUnit *Cur, SetVector<SUnit *> &Path, SetVector<SUnit *> &DestNodes, SetVector<SUnit *> &Exclude, - SmallPtrSet<SUnit *, 8> &Visited) { + SmallPtrSet<SUnit *, 8> &Visited, + SwingSchedulerDDG *DDG) { if (Cur->isBoundaryNode()) return false; if (Exclude.contains(Cur)) @@ -2016,14 +1961,14 @@ static bool computePath(SUnit *Cur, SetVector<SUnit *> &Path, if (!Visited.insert(Cur).second) return Path.contains(Cur); bool FoundPath = false; - for (auto &SI : Cur->Succs) - if (!ignoreDependence(SI, false)) + for (const auto &OE : DDG->getOutEdges(Cur)) + if (!OE.ignoreDependence(false)) FoundPath |= - computePath(SI.getSUnit(), Path, DestNodes, Exclude, Visited); - for (auto &PI : Cur->Preds) - if (PI.getKind() == SDep::Anti) + computePath(OE.getDst(), Path, DestNodes, Exclude, Visited, DDG); + for (const auto &IE : DDG->getInEdges(Cur)) + if (IE.isAntiDep() && IE.getDistance() == 0) FoundPath |= - computePath(PI.getSUnit(), Path, DestNodes, Exclude, Visited); + computePath(IE.getSrc(), Path, DestNodes, Exclude, Visited, DDG); if (FoundPath) Path.insert(Cur); return FoundPath; @@ -2120,14 +2065,14 @@ void SwingSchedulerDAG::colocateNodeSets(NodeSetType &NodeSets) { for (int i = 0, e = NodeSets.size(); i < e; ++i) { NodeSet &N1 = NodeSets[i]; SmallSetVector<SUnit *, 8> S1; - if (N1.empty() || !succ_L(N1, S1)) + if (N1.empty() || !succ_L(N1, S1, DDG.get())) continue; for (int j = i + 1; j < e; ++j) { NodeSet &N2 = NodeSets[j]; if (N1.compareRecMII(N2) != 0) continue; SmallSetVector<SUnit *, 8> S2; - if (N2.empty() || !succ_L(N2, S2)) + if (N2.empty() || !succ_L(N2, S2, DDG.get())) continue; if (llvm::set_is_subset(S1, S2) && S1.size() == S2.size()) { N1.setColocate(++Colocate); @@ -2168,22 +2113,22 @@ void SwingSchedulerDAG::groupRemainingNodes(NodeSetType &NodeSets) { for (NodeSet &I : NodeSets) { SmallSetVector<SUnit *, 8> N; // Add the nodes from the current node set to the previous node set. - if (succ_L(I, N)) { + if (succ_L(I, N, DDG.get())) { SetVector<SUnit *> Path; for (SUnit *NI : N) { Visited.clear(); - computePath(NI, Path, NodesAdded, I, Visited); + computePath(NI, Path, NodesAdded, I, Visited, DDG.get()); } if (!Path.empty()) I.insert(Path.begin(), Path.end()); } // Add the nodes from the previous node set to the current node set. N.clear(); - if (succ_L(NodesAdded, N)) { + if (succ_L(NodesAdded, N, DDG.get())) { SetVector<SUnit *> Path; for (SUnit *NI : N) { Visited.clear(); - computePath(NI, Path, I, NodesAdded, Visited); + computePath(NI, Path, I, NodesAdded, Visited, DDG.get()); } if (!Path.empty()) I.insert(Path.begin(), Path.end()); @@ -2195,7 +2140,7 @@ void SwingSchedulerDAG::groupRemainingNodes(NodeSetType &NodeSets) { // in a recurrent set. NodeSet NewSet; SmallSetVector<SUnit *, 8> N; - if (succ_L(NodesAdded, N)) + if (succ_L(NodesAdded, N, DDG.get())) for (SUnit *I : N) addConnectedNodes(I, NewSet, NodesAdded); if (!NewSet.empty()) @@ -2204,7 +2149,7 @@ void SwingSchedulerDAG::groupRemainingNodes(NodeSetType &NodeSets) { // Create a new node set with the connected nodes of any predecessor of a node // in a recurrent set. NewSet.clear(); - if (pred_L(NodesAdded, N)) + if (pred_L(NodesAdded, N, DDG.get())) for (SUnit *I : N) addConnectedNodes(I, NewSet, NodesAdded); if (!NewSet.empty()) @@ -2227,15 +2172,15 @@ void SwingSchedulerDAG::addConnectedNodes(SUnit *SU, NodeSet &NewSet, SetVector<SUnit *> &NodesAdded) { NewSet.insert(SU); NodesAdded.insert(SU); - for (auto &SI : SU->Succs) { - SUnit *Successor = SI.getSUnit(); - if (!SI.isArtificial() && !Successor->isBoundaryNode() && + for (auto &OE : DDG->getOutEdges(SU)) { + SUnit *Successor = OE.getDst(); + if (!OE.isArtificial() && !Successor->isBoundaryNode() && NodesAdded.count(Successor) == 0) addConnectedNodes(Successor, NewSet, NodesAdded); } - for (auto &PI : SU->Preds) { - SUnit *Predecessor = PI.getSUnit(); - if (!PI.isArtificial() && NodesAdded.count(Predecessor) == 0) + for (auto &IE : DDG->getInEdges(SU)) { + SUnit *Predecessor = IE.getSrc(); + if (!IE.isArtificial() && NodesAdded.count(Predecessor) == 0) addConnectedNodes(Predecessor, NewSet, NodesAdded); } } @@ -2301,11 +2246,12 @@ void SwingSchedulerDAG::computeNodeOrder(NodeSetType &NodeSets) { LLVM_DEBUG(dbgs() << "NodeSet size " << Nodes.size() << "\n"); OrderKind Order; SmallSetVector<SUnit *, 8> N; - if (pred_L(NodeOrder, N) && llvm::set_is_subset(N, Nodes)) { + if (pred_L(NodeOrder, N, DDG.get()) && llvm::set_is_subset(N, Nodes)) { R.insert(N.begin(), N.end()); Order = BottomUp; LLVM_DEBUG(dbgs() << " Bottom up (preds) "); - } else if (succ_L(NodeOrder, N) && llvm::set_is_subset(N, Nodes)) { + } else if (succ_L(NodeOrder, N, DDG.get()) && + llvm::set_is_subset(N, Nodes)) { R.insert(N.begin(), N.end()); Order = TopDown; LLVM_DEBUG(dbgs() << " Top down (succs) "); @@ -2355,30 +2301,36 @@ void SwingSchedulerDAG::computeNodeOrder(NodeSetType &NodeSets) { NodeOrder.insert(maxHeight); LLVM_DEBUG(dbgs() << maxHeight->NodeNum << " "); R.remove(maxHeight); - for (const auto &I : maxHeight->Succs) { - if (Nodes.count(I.getSUnit()) == 0) + for (const auto &OE : DDG->getOutEdges(maxHeight)) { + SUnit *SU = OE.getDst(); + if (Nodes.count(SU) == 0) continue; - if (NodeOrder.contains(I.getSUnit())) + if (NodeOrder.contains(SU)) continue; - if (ignoreDependence(I, false)) + if (OE.ignoreDependence(false)) continue; - R.insert(I.getSUnit()); + R.insert(SU); } - // Back-edges are predecessors with an anti-dependence. - for (const auto &I : maxHeight->Preds) { - if (I.getKind() != SDep::Anti) + + // FIXME: The following loop-carried dependencies may also need to be + // considered. + // - Physical register dependnecies (true-dependnece and WAW). + // - Memory dependencies. + for (const auto &IE : DDG->getInEdges(maxHeight)) { + SUnit *SU = IE.getSrc(); + if (!IE.isAntiDep()) continue; - if (Nodes.count(I.getSUnit()) == 0) + if (Nodes.count(SU) == 0) continue; - if (NodeOrder.contains(I.getSUnit())) + if (NodeOrder.contains(SU)) continue; - R.insert(I.getSUnit()); + R.insert(SU); } } Order = BottomUp; LLVM_DEBUG(dbgs() << "\n Switching order to bottom up "); SmallSetVector<SUnit *, 8> N; - if (pred_L(NodeOrder, N, &Nodes)) + if (pred_L(NodeOrder, N, DDG.get(), &Nodes)) R.insert(N.begin(), N.end()); } else { // Choose the node with the maximum depth. If more than one, choose @@ -2406,28 +2358,34 @@ void SwingSchedulerDAG::computeNodeOrder(NodeSetType &NodeSets) { R.insert(Nodes.getNode(0)); break; } - for (const auto &I : maxDepth->Preds) { - if (Nodes.count(I.getSUnit()) == 0) + for (const auto &IE : DDG->getInEdges(maxDepth)) { + SUnit *SU = IE.getSrc(); + if (Nodes.count(SU) == 0) continue; - if (NodeOrder.contains(I.getSUnit())) + if (NodeOrder.contains(SU)) continue; - R.insert(I.getSUnit()); + R.insert(SU); } - // Back-edges are predecessors with an anti-dependence. - for (const auto &I : maxDepth->Succs) { - if (I.getKind() != SDep::Anti) + + // FIXME: The following loop-carried dependencies may also need to be + // considered. + // - Physical register dependnecies (true-dependnece and WAW). + // - Memory dependencies. + for (const auto &OE : DDG->getOutEdges(maxDepth)) { + SUnit *SU = OE.getDst(); + if (!OE.isAntiDep()) continue; - if (Nodes.count(I.getSUnit()) == 0) + if (Nodes.count(SU) == 0) continue; - if (NodeOrder.contains(I.getSUnit())) + if (NodeOrder.contains(SU)) continue; - R.insert(I.getSUnit()); + R.insert(SU); } } Order = TopDown; LLVM_DEBUG(dbgs() << "\n Switching order to top down "); SmallSetVector<SUnit *, 8> N; - if (succ_L(NodeOrder, N, &Nodes)) + if (succ_L(NodeOrder, N, DDG.get(), &Nodes)) R.insert(N.begin(), N.end()); } } @@ -2500,7 +2458,7 @@ bool SwingSchedulerDAG::schedulePipeline(SMSchedule &Schedule) { // loop-carried output/order dependencies. Empirically, there are also // cases where scheduling becomes possible with backward search. if (SU->getInstr()->isPHI() || - Schedule.onlyHasLoopCarriedOutputOrOrderPreds(SU, this)) + Schedule.onlyHasLoopCarriedOutputOrOrderPreds(SU, this->getDDG())) scheduleFound = Schedule.insert(SU, LateStart, EarlyStart, II); else scheduleFound = Schedule.insert(SU, EarlyStart, LateStart, II); @@ -2720,22 +2678,20 @@ MachineInstr *SwingSchedulerDAG::findDefInLoop(Register Reg) { /// Return true for an order or output dependence that is loop carried /// potentially. A dependence is loop carried if the destination defines a value /// that may be used or defined by the source in a subsequent iteration. -bool SwingSchedulerDAG::isLoopCarriedDep(SUnit *Source, const SDep &Dep, - bool isSucc) const { - if ((Dep.getKind() != SDep::Order && Dep.getKind() != SDep::Output) || - Dep.isArtificial() || Dep.getSUnit()->isBoundaryNode()) +bool SwingSchedulerDAG::isLoopCarriedDep( + const SwingSchedulerDDGEdge &Edge) const { + if ((!Edge.isOrderDep() && !Edge.isOutputDep()) || Edge.isArtificial() || + Edge.getDst()->isBoundaryNode()) return false; if (!SwpPruneLoopCarried) return true; - if (Dep.getKind() == SDep::Output) + if (Edge.isOutputDep()) return true; - MachineInstr *SI = Source->getInstr(); - MachineInstr *DI = Dep.getSUnit()->getInstr(); - if (!isSucc) - std::swap(SI, DI); + MachineInstr *SI = Edge.getSrc()->getInstr(); + MachineInstr *DI = Edge.getDst()->getInstr(); assert(SI != nullptr && DI != nullptr && "Expecting SUnit with an MI."); // Assume ordered loads and stores may have a loop carried dependence. @@ -2857,46 +2813,48 @@ bool SMSchedule::insert(SUnit *SU, int StartCycle, int EndCycle, int II) { } // Return the cycle of the earliest scheduled instruction in the chain. -int SMSchedule::earliestCycleInChain(const SDep &Dep) { +int SMSchedule::earliestCycleInChain(const SwingSchedulerDDGEdge &Dep, + const SwingSchedulerDDG *DDG) { SmallPtrSet<SUnit *, 8> Visited; - SmallVector<SDep, 8> Worklist; + SmallVector<SwingSchedulerDDGEdge, 8> Worklist; Worklist.push_back(Dep); int EarlyCycle = INT_MAX; while (!Worklist.empty()) { - const SDep &Cur = Worklist.pop_back_val(); - SUnit *PrevSU = Cur.getSUnit(); + const SwingSchedulerDDGEdge &Cur = Worklist.pop_back_val(); + SUnit *PrevSU = Cur.getSrc(); if (Visited.count(PrevSU)) continue; std::map<SUnit *, int>::const_iterator it = InstrToCycle.find(PrevSU); if (it == InstrToCycle.end()) continue; EarlyCycle = std::min(EarlyCycle, it->second); - for (const auto &PI : PrevSU->Preds) - if (PI.getKind() == SDep::Order || PI.getKind() == SDep::Output) - Worklist.push_back(PI); + for (const auto &IE : DDG->getInEdges(PrevSU)) + if (IE.isOrderDep() || IE.isOutputDep()) + Worklist.push_back(IE); Visited.insert(PrevSU); } return EarlyCycle; } // Return the cycle of the latest scheduled instruction in the chain. -int SMSchedule::latestCycleInChain(const SDep &Dep) { +int SMSchedule::latestCycleInChain(const SwingSchedulerDDGEdge &Dep, + const SwingSchedulerDDG *DDG) { SmallPtrSet<SUnit *, 8> Visited; - SmallVector<SDep, 8> Worklist; + SmallVector<SwingSchedulerDDGEdge, 8> Worklist; Worklist.push_back(Dep); int LateCycle = INT_MIN; while (!Worklist.empty()) { - const SDep &Cur = Worklist.pop_back_val(); - SUnit *SuccSU = Cur.getSUnit(); + const SwingSchedulerDDGEdge &Cur = Worklist.pop_back_val(); + SUnit *SuccSU = Cur.getDst(); if (Visited.count(SuccSU) || SuccSU->isBoundaryNode()) continue; std::map<SUnit *, int>::const_iterator it = InstrToCycle.find(SuccSU); if (it == InstrToCycle.end()) continue; LateCycle = std::max(LateCycle, it->second); - for (const auto &SI : SuccSU->Succs) - if (SI.getKind() == SDep::Order || SI.getKind() == SDep::Output) - Worklist.push_back(SI); + for (const auto &OE : DDG->getOutEdges(SuccSU)) + if (OE.isOrderDep() || OE.isOutputDep()) + Worklist.push_back(OE); Visited.insert(SuccSU); } return LateCycle; @@ -2907,7 +2865,7 @@ int SMSchedule::latestCycleInChain(const SDep &Dep) { /// to a Phi, which contains a reference to another Phi. static SUnit *multipleIterations(SUnit *SU, SwingSchedulerDAG *DAG) { for (auto &P : SU->Preds) - if (DAG->isBackedge(SU, P) && P.getSUnit()->getInstr()->isPHI()) + if (P.getKind() == SDep::Anti && P.getSUnit()->getInstr()->isPHI()) for (auto &S : P.getSUnit()->Succs) if (S.getKind() == SDep::Data && S.getSUnit()->getInstr()->isPHI()) return P.getSUnit(); @@ -2918,57 +2876,47 @@ static SUnit *multipleIterations(SUnit *SU, SwingSchedulerDAG *DAG) { /// depends on any predecessor or successor nodes scheduled already. void SMSchedule::computeStart(SUnit *SU, int *MaxEarlyStart, int *MinLateStart, int II, SwingSchedulerDAG *DAG) { + const SwingSchedulerDDG *DDG = DAG->getDDG(); + // Iterate over each instruction that has been scheduled already. The start // slot computation depends on whether the previously scheduled instruction // is a predecessor or successor of the specified instruction. for (int cycle = getFirstCycle(); cycle <= LastCycle; ++cycle) { - - // Iterate over each instruction in the current cycle. for (SUnit *I : getInstructions(cycle)) { - // Because we're processing a DAG for the dependences, we recognize - // the back-edge in recurrences by anti dependences. - for (unsigned i = 0, e = (unsigned)SU->Preds.size(); i != e; ++i) { - const SDep &Dep = SU->Preds[i]; - if (Dep.getSUnit() == I) { - if (!DAG->isBackedge(SU, Dep)) { - int EarlyStart = cycle + Dep.getLatency() - - DAG->getDistance(Dep.getSUnit(), SU, Dep) * II; - *MaxEarlyStart = std::max(*MaxEarlyStart, EarlyStart); - if (DAG->isLoopCarriedDep(SU, Dep, false)) { - int End = earliestCycleInChain(Dep) + (II - 1); - *MinLateStart = std::min(*MinLateStart, End); - } - } else { - int LateStart = cycle - Dep.getLatency() + - DAG->getDistance(SU, Dep.getSUnit(), Dep) * II; - *MinLateStart = std::min(*MinLateStart, LateStart); + for (const auto &IE : DDG->getInEdges(SU)) { + if (IE.getSrc() == I) { + // FIXME: Add reverse edge to `DDG` instead of calling + // `isLoopCarriedDep` + if (DAG->isLoopCarriedDep(IE)) { + int End = earliestCycleInChain(IE, DDG) + (II - 1); + *MinLateStart = std::min(*MinLateStart, End); + } + int EarlyStart = cycle + IE.getLatency() - IE.getDistance() * II; + *MaxEarlyStart = std::max(*MaxEarlyStart, EarlyStart); + } + } + + for (const auto &OE : DDG->getOutEdges(SU)) { + if (OE.getDst() == I) { + // FIXME: Add reverse edge to `DDG` instead of calling + // `isLoopCarriedDep` + if (DAG->isLoopCarriedDep(OE)) { + int Start = latestCycleInChain(OE, DDG) + 1 - II; + *MaxEarlyStart = std::max(*MaxEarlyStart, Start); } + int LateStart = cycle - OE.getLatency() + OE.getDistance() * II; + *MinLateStart = std::min(*MinLateStart, LateStart); } + } + + SUnit *BE = multipleIterations(I, DAG); + for (const auto &Dep : SU->Preds) { // For instruction that requires multiple iterations, make sure that // the dependent instruction is not scheduled past the definition. - SUnit *BE = multipleIterations(I, DAG); if (BE && Dep.getSUnit() == BE && !SU->getInstr()->isPHI() && !SU->isPred(I)) *MinLateStart = std::min(*MinLateStart, cycle); } - for (unsigned i = 0, e = (unsigned)SU->Succs.size(); i != e; ++i) { - if (SU->Succs[i].getSUnit() == I) { - const SDep &Dep = SU->Succs[i]; - if (!DAG->isBackedge(SU, Dep)) { - int LateStart = cycle - Dep.getLatency() + - DAG->getDistance(SU, Dep.getSUnit(), Dep) * II; - *MinLateStart = std::min(*MinLateStart, LateStart); - if (DAG->isLoopCarriedDep(SU, Dep)) { - int Start = latestCycleInChain(Dep) + 1 - II; - *MaxEarlyStart = std::max(*MaxEarlyStart, Start); - } - } else { - int EarlyStart = cycle + Dep.getLatency() - - DAG->getDistance(Dep.getSUnit(), SU, Dep) * II; - *MaxEarlyStart = std::max(*MaxEarlyStart, EarlyStart); - } - } - } } } } @@ -2985,6 +2933,7 @@ void SMSchedule::orderDependence(const SwingSchedulerDAG *SSD, SUnit *SU, unsigned MoveDef = 0; unsigned MoveUse = 0; int StageInst1 = stageScheduled(SU); + const SwingSchedulerDDG *DDG = SSD->getDDG(); unsigned Pos = 0; for (std::deque<SUnit *>::iterator I = Insts.begin(), E = Insts.end(); I != E; @@ -3042,10 +2991,10 @@ void SMSchedule::orderDependence(const SwingSchedulerDAG *SSD, SUnit *SU, } // Check for order dependences between instructions. Make sure the source // is ordered before the destination. - for (auto &S : SU->Succs) { - if (S.getSUnit() != *I) + for (auto &OE : DDG->getOutEdges(SU)) { + if (OE.getDst() != *I) continue; - if (S.getKind() == SDep::Order && stageScheduled(*I) == StageInst1) { + if (OE.isOrderDep() && stageScheduled(*I) == StageInst1) { OrderBeforeUse = true; if (Pos < MoveUse) MoveUse = Pos; @@ -3053,18 +3002,17 @@ void SMSchedule::orderDependence(const SwingSchedulerDAG *SSD, SUnit *SU, // We did not handle HW dependences in previous for loop, // and we normally set Latency = 0 for Anti/Output deps, // so may have nodes in same cycle with Anti/Output dependent on HW regs. - else if ((S.getKind() == SDep::Anti || S.getKind() == SDep::Output) && + else if ((OE.isAntiDep() || OE.isOutputDep()) && stageScheduled(*I) == StageInst1) { OrderBeforeUse = true; if ((MoveUse == 0) || (Pos < MoveUse)) MoveUse = Pos; } } - for (auto &P : SU->Preds) { - if (P.getSUnit() != *I) + for (auto &IE : DDG->getInEdges(SU)) { + if (IE.getSrc() != *I) continue; - if ((P.getKind() == SDep::Order || P.getKind() == SDep::Anti || - P.getKind() == SDep::Output) && + if ((IE.isAntiDep() || IE.isOutputDep() || IE.isOrderDep()) && stageScheduled(*I) == StageInst1) { OrderAfterDef = true; MoveDef = Pos; @@ -3159,12 +3107,9 @@ bool SMSchedule::isLoopCarriedDefOfUse(const SwingSchedulerDAG *SSD, /// Return true if all scheduled predecessors are loop-carried output/order /// dependencies. bool SMSchedule::onlyHasLoopCarriedOutputOrOrderPreds( - SUnit *SU, SwingSchedulerDAG *DAG) const { - for (const SDep &Pred : SU->Preds) - if (InstrToCycle.count(Pred.getSUnit()) && !DAG->isBackedge(SU, Pred)) - return false; - for (const SDep &Succ : SU->Succs) - if (InstrToCycle.count(Succ.getSUnit()) && DAG->isBackedge(SU, Succ)) + SUnit *SU, const SwingSchedulerDDG *DDG) const { + for (const auto &IE : DDG->getInEdges(SU)) + if (InstrToCycle.count(IE.getSrc())) return false; return true; } @@ -3179,18 +3124,21 @@ SmallSet<SUnit *, 8> SMSchedule::computeUnpipelineableNodes( if (SU.isInstr() && PLI->shouldIgnoreForPipelining(SU.getInstr())) Worklist.push_back(&SU); + const SwingSchedulerDDG *DDG = SSD->getDDG(); while (!Worklist.empty()) { auto SU = Worklist.pop_back_val(); if (DoNotPipeline.count(SU)) continue; LLVM_DEBUG(dbgs() << "Do not pipeline SU(" << SU->NodeNum << ")\n"); DoNotPipeline.insert(SU); - for (auto &Dep : SU->Preds) - Worklist.push_back(Dep.getSUnit()); - if (SU->getInstr()->isPHI()) - for (auto &Dep : SU->Succs) - if (Dep.getKind() == SDep::Anti) - Worklist.push_back(Dep.getSUnit()); + for (const auto &IE : DDG->getInEdges(SU)) + Worklist.push_back(IE.getSrc()); + + // To preserve previous behavior and prevent regression + // FIXME: Remove if this doesn't have significant impact on + for (const auto &OE : DDG->getOutEdges(SU)) + if (OE.getDistance() == 1) + Worklist.push_back(OE.getDst()); } return DoNotPipeline; } @@ -3212,8 +3160,15 @@ bool SMSchedule::normalizeNonPipelinedInstructions( // Put the non-pipelined instruction as early as possible in the schedule int NewCycle = getFirstCycle(); - for (auto &Dep : SU.Preds) - NewCycle = std::max(InstrToCycle[Dep.getSUnit()], NewCycle); + for (const auto &IE : SSD->getDDG()->getInEdges(&SU)) + if (IE.getDistance() == 0) + NewCycle = std::max(InstrToCycle[IE.getSrc()], NewCycle); + + // To preserve previous behavior and prevent regression + // FIXME: Remove if this doesn't have significant impact on performance + for (auto &OE : SSD->getDDG()->getOutEdges(&SU)) + if (OE.getDistance() == 1) + NewCycle = std::max(InstrToCycle[OE.getDst()], NewCycle); int OldCycle = InstrToCycle[&SU]; if (OldCycle != NewCycle) { @@ -3246,14 +3201,16 @@ bool SMSchedule::isValidSchedule(SwingSchedulerDAG *SSD) { int StageDef = stageScheduled(&SU); int CycleDef = InstrToCycle[&SU]; assert(StageDef != -1 && "Instruction should have been scheduled."); - for (auto &SI : SU.Succs) - if (SI.isAssignedRegDep() && !SI.getSUnit()->isBoundaryNode()) - if (Register::isPhysicalRegister(SI.getReg())) { - if (stageScheduled(SI.getSUnit()) != StageDef) + for (auto &OE : SSD->getDDG()->getOutEdges(&SU)) { + SUnit *Dst = OE.getDst(); + if (OE.isAssignedRegDep() && !Dst->isBoundaryNode()) + if (Register::isPhysicalRegister(OE.getReg())) { + if (stageScheduled(Dst) != StageDef) return false; - if (InstrToCycle[SI.getSUnit()] <= CycleDef) + if (InstrToCycle[Dst] <= CycleDef) return false; } + } } return true; } @@ -3265,7 +3222,7 @@ bool SMSchedule::isValidSchedule(SwingSchedulerDAG *SSD) { /// The method below checks whether the property is met. /// If not, debug information is printed and statistics information updated. /// Note that we do not use an assert statement. -/// The reason is that although an invalid node oder may prevent +/// The reason is that although an invalid node order may prevent /// the pipeliner from finding a pipelined schedule for arbitrary II, /// it does not lead to the generation of incorrect code. void SwingSchedulerDAG::checkValidNodeOrder(const NodeSetType &Circuits) const { @@ -3303,8 +3260,8 @@ void SwingSchedulerDAG::checkValidNodeOrder(const NodeSetType &Circuits) const { (void)Succ; (void)Pred; - for (SDep &PredEdge : SU->Preds) { - SUnit *PredSU = PredEdge.getSUnit(); + for (const auto &IE : DDG->getInEdges(SU)) { + SUnit *PredSU = IE.getSrc(); unsigned PredIndex = std::get<1>( *llvm::lower_bound(Indices, std::make_pair(PredSU, 0), CompareKey)); if (!PredSU->getInstr()->isPHI() && PredIndex < Index) { @@ -3314,8 +3271,8 @@ void SwingSchedulerDAG::checkValidNodeOrder(const NodeSetType &Circuits) const { } } - for (SDep &SuccEdge : SU->Succs) { - SUnit *SuccSU = SuccEdge.getSUnit(); + for (const auto &OE : DDG->getOutEdges(SU)) { + SUnit *SuccSU = OE.getDst(); // Do not process a boundary node, it was not included in NodeOrder, // hence not in Indices either, call to std::lower_bound() below will // return Indices.end(). @@ -3336,15 +3293,15 @@ void SwingSchedulerDAG::checkValidNodeOrder(const NodeSetType &Circuits) const { bool InCircuit = llvm::any_of( Circuits, [SU](const NodeSet &Circuit) { return Circuit.count(SU); }); if (InCircuit) - LLVM_DEBUG(dbgs() << "In a circuit, predecessor ";); + LLVM_DEBUG(dbgs() << "In a circuit, predecessor "); else { Valid = false; NumNodeOrderIssues++; - LLVM_DEBUG(dbgs() << "Predecessor ";); + LLVM_DEBUG(dbgs() << "Predecessor "); } LLVM_DEBUG(dbgs() << Pred->NodeNum << " and successor " << Succ->NodeNum << " are scheduled before node " << SU->NodeNum - << "\n";); + << "\n"); } } @@ -3573,7 +3530,7 @@ bool ResourceManager::canReserveResources(SUnit &SU, int Cycle) { bool Result = !isOverbooked(); unreserveResources(SCDesc, Cycle); - LLVM_DEBUG(if (SwpDebugResource) dbgs() << "return " << Result << "\n\n";); + LLVM_DEBUG(if (SwpDebugResource) dbgs() << "return " << Result << "\n\n"); return Result; } @@ -3792,3 +3749,72 @@ void ResourceManager::init(int II) { NumScheduledMops.clear(); NumScheduledMops.resize(II); } + +bool SwingSchedulerDDGEdge::ignoreDependence(bool IgnoreAnti) const { + if (Pred.isArtificial() || Dst->isBoundaryNode()) + return true; + // Currently, dependence that is an anti-dependences but not a loop-carried is + // also ignored. This behavior is preserved to prevent regression. + // FIXME: Remove if this doesn't have significant impact on performance + return IgnoreAnti && (Pred.getKind() == SDep::Kind::Anti || Distance != 0); +} + +SwingSchedulerDDG::SwingSchedulerDDGEdges & +SwingSchedulerDDG::getEdges(const SUnit *SU) { + if (SU == EntrySU) + return EntrySUEdges; + if (SU == ExitSU) + return ExitSUEdges; + return EdgesVec[SU->NodeNum]; +} + +const SwingSchedulerDDG::SwingSchedulerDDGEdges & +SwingSchedulerDDG::getEdges(const SUnit *SU) const { + if (SU == EntrySU) + return EntrySUEdges; + if (SU == ExitSU) + return ExitSUEdges; + return EdgesVec[SU->NodeNum]; +} + +void SwingSchedulerDDG::addEdge(const SUnit *SU, + const SwingSchedulerDDGEdge &Edge) { + auto &Edges = getEdges(SU); + if (Edge.getSrc() == SU) + Edges.Succs.push_back(Edge); + else + Edges.Preds.push_back(Edge); +} + +void SwingSchedulerDDG::initEdges(SUnit *SU) { + for (const auto &PI : SU->Preds) { + SwingSchedulerDDGEdge Edge(SU, PI, false); + addEdge(SU, Edge); + } + + for (const auto &SI : SU->Succs) { + SwingSchedulerDDGEdge Edge(SU, SI, true); + addEdge(SU, Edge); + } +} + +SwingSchedulerDDG::SwingSchedulerDDG(std::vector<SUnit> &SUnits, SUnit *EntrySU, + SUnit *ExitSU) + : EntrySU(EntrySU), ExitSU(ExitSU) { + EdgesVec.resize(SUnits.size()); + + initEdges(EntrySU); + initEdges(ExitSU); + for (auto &SU : SUnits) + initEdges(&SU); +} + +const SwingSchedulerDDG::EdgesType & +SwingSchedulerDDG::getInEdges(const SUnit *SU) const { + return getEdges(SU).Preds; +} + +const SwingSchedulerDDG::EdgesType & +SwingSchedulerDDG::getOutEdges(const SUnit *SU) const { + return getEdges(SU).Succs; +} diff --git a/llvm/lib/CodeGen/MachineRegisterInfo.cpp b/llvm/lib/CodeGen/MachineRegisterInfo.cpp index fcedb302d228..394b99b85ddc 100644 --- a/llvm/lib/CodeGen/MachineRegisterInfo.cpp +++ b/llvm/lib/CodeGen/MachineRegisterInfo.cpp @@ -407,9 +407,11 @@ void MachineRegisterInfo::replaceRegWith(Register FromReg, Register ToReg) { MachineInstr *MachineRegisterInfo::getVRegDef(Register Reg) const { // Since we are in SSA form, we can use the first definition. def_instr_iterator I = def_instr_begin(Reg); - assert((I.atEnd() || std::next(I) == def_instr_end()) && - "getVRegDef assumes a single definition or no definition"); - return !I.atEnd() ? &*I : nullptr; + if (I == def_instr_end()) + return nullptr; + assert(std::next(I) == def_instr_end() && + "getVRegDef assumes at most one definition"); + return &*I; } /// getUniqueVRegDef - Return the unique machine instr that defines the @@ -635,7 +637,13 @@ const MCPhysReg *MachineRegisterInfo::getCalleeSavedRegs() const { if (IsUpdatedCSRsInitialized) return UpdatedCSRs.data(); - return getTargetRegisterInfo()->getCalleeSavedRegs(MF); + const MCPhysReg *Regs = getTargetRegisterInfo()->getCalleeSavedRegs(MF); + + for (unsigned I = 0; Regs[I]; ++I) + if (MF->getSubtarget().isRegisterReservedByUser(Regs[I])) + MF->getRegInfo().disableCalleeSavedRegister(Regs[I]); + + return Regs; } void MachineRegisterInfo::setCalleeSavedRegs(ArrayRef<MCPhysReg> CSRs) { diff --git a/llvm/lib/CodeGen/MachineScheduler.cpp b/llvm/lib/CodeGen/MachineScheduler.cpp index 23e5e4a4da6d..91aaeea156c4 100644 --- a/llvm/lib/CodeGen/MachineScheduler.cpp +++ b/llvm/lib/CodeGen/MachineScheduler.cpp @@ -77,30 +77,30 @@ STATISTIC(NumClustered, "Number of load/store pairs clustered"); namespace llvm { -cl::opt<bool> ForceTopDown("misched-topdown", cl::Hidden, - cl::desc("Force top-down list scheduling")); -cl::opt<bool> ForceBottomUp("misched-bottomup", cl::Hidden, - cl::desc("Force bottom-up list scheduling")); -namespace MISchedPostRASched { -enum Direction { - TopDown, - BottomUp, - Bidirectional, -}; -} // end namespace MISchedPostRASched -cl::opt<MISchedPostRASched::Direction> PostRADirection( +cl::opt<MISched::Direction> PreRADirection( + "misched-prera-direction", cl::Hidden, + cl::desc("Pre reg-alloc list scheduling direction"), + cl::init(MISched::Unspecified), + cl::values( + clEnumValN(MISched::TopDown, "topdown", + "Force top-down pre reg-alloc list scheduling"), + clEnumValN(MISched::BottomUp, "bottomup", + "Force bottom-up pre reg-alloc list scheduling"), + clEnumValN(MISched::Bidirectional, "bidirectional", + "Force bidirectional pre reg-alloc list scheduling"))); + +cl::opt<MISched::Direction> PostRADirection( "misched-postra-direction", cl::Hidden, cl::desc("Post reg-alloc list scheduling direction"), - // Default to top-down because it was implemented first and existing targets - // expect that behavior by default. - cl::init(MISchedPostRASched::TopDown), + cl::init(MISched::Unspecified), cl::values( - clEnumValN(MISchedPostRASched::TopDown, "topdown", + clEnumValN(MISched::TopDown, "topdown", "Force top-down post reg-alloc list scheduling"), - clEnumValN(MISchedPostRASched::BottomUp, "bottomup", + clEnumValN(MISched::BottomUp, "bottomup", "Force bottom-up post reg-alloc list scheduling"), - clEnumValN(MISchedPostRASched::Bidirectional, "bidirectional", + clEnumValN(MISched::Bidirectional, "bidirectional", "Force bidirectional post reg-alloc list scheduling"))); + cl::opt<bool> DumpCriticalPathLength("misched-dcpl", cl::Hidden, cl::desc("Print critical path length to stdout")); @@ -1947,6 +1947,9 @@ void BaseMemOpClusterMutation::collectMemOpRecords( LocationSize Width = 0; if (TII->getMemOperandsWithOffsetWidth(MI, BaseOps, Offset, OffsetIsScalable, Width, TRI)) { + if (!Width.hasValue()) + continue; + MemOpRecords.push_back( MemOpInfo(&SU, BaseOps, Offset, OffsetIsScalable, Width)); @@ -3304,19 +3307,15 @@ void GenericScheduler::initPolicy(MachineBasicBlock::iterator Begin, RegionPolicy.ShouldTrackLaneMasks = false; } - // Check -misched-topdown/bottomup can force or unforce scheduling direction. - // e.g. -misched-bottomup=false allows scheduling in both directions. - assert((!ForceTopDown || !ForceBottomUp) && - "-misched-topdown incompatible with -misched-bottomup"); - if (ForceBottomUp.getNumOccurrences() > 0) { - RegionPolicy.OnlyBottomUp = ForceBottomUp; - if (RegionPolicy.OnlyBottomUp) - RegionPolicy.OnlyTopDown = false; - } - if (ForceTopDown.getNumOccurrences() > 0) { - RegionPolicy.OnlyTopDown = ForceTopDown; - if (RegionPolicy.OnlyTopDown) - RegionPolicy.OnlyBottomUp = false; + if (PreRADirection == MISched::TopDown) { + RegionPolicy.OnlyTopDown = true; + RegionPolicy.OnlyBottomUp = false; + } else if (PreRADirection == MISched::BottomUp) { + RegionPolicy.OnlyTopDown = false; + RegionPolicy.OnlyBottomUp = true; + } else if (PreRADirection == MISched::Bidirectional) { + RegionPolicy.OnlyBottomUp = false; + RegionPolicy.OnlyTopDown = false; } } @@ -3908,17 +3907,15 @@ void PostGenericScheduler::initPolicy(MachineBasicBlock::iterator Begin, MF.getSubtarget().overridePostRASchedPolicy(RegionPolicy, NumRegionInstrs); // After subtarget overrides, apply command line options. - if (PostRADirection.getNumOccurrences() > 0) { - if (PostRADirection == MISchedPostRASched::TopDown) { - RegionPolicy.OnlyTopDown = true; - RegionPolicy.OnlyBottomUp = false; - } else if (PostRADirection == MISchedPostRASched::BottomUp) { - RegionPolicy.OnlyTopDown = false; - RegionPolicy.OnlyBottomUp = true; - } else if (PostRADirection == MISchedPostRASched::Bidirectional) { - RegionPolicy.OnlyBottomUp = false; - RegionPolicy.OnlyTopDown = false; - } + if (PostRADirection == MISched::TopDown) { + RegionPolicy.OnlyTopDown = true; + RegionPolicy.OnlyBottomUp = false; + } else if (PostRADirection == MISched::BottomUp) { + RegionPolicy.OnlyTopDown = false; + RegionPolicy.OnlyBottomUp = true; + } else if (PostRADirection == MISched::Bidirectional) { + RegionPolicy.OnlyBottomUp = false; + RegionPolicy.OnlyTopDown = false; } } @@ -3955,9 +3952,12 @@ bool PostGenericScheduler::tryCandidate(SchedCandidate &Cand, return TryCand.Reason != NoCand; // Keep clustered nodes together. - if (tryGreater(TryCand.SU == DAG->getNextClusterSucc(), - Cand.SU == DAG->getNextClusterSucc(), - TryCand, Cand, Cluster)) + const SUnit *CandNextClusterSU = + Cand.AtTop ? DAG->getNextClusterSucc() : DAG->getNextClusterPred(); + const SUnit *TryCandNextClusterSU = + TryCand.AtTop ? DAG->getNextClusterSucc() : DAG->getNextClusterPred(); + if (tryGreater(TryCand.SU == TryCandNextClusterSU, + Cand.SU == CandNextClusterSU, TryCand, Cand, Cluster)) return TryCand.Reason != NoCand; // Avoid critical resource consumption and balance the schedule. @@ -4362,10 +4362,9 @@ public: } // end anonymous namespace static ScheduleDAGInstrs *createInstructionShuffler(MachineSchedContext *C) { - bool Alternate = !ForceTopDown && !ForceBottomUp; - bool TopDown = !ForceBottomUp; - assert((TopDown || !ForceTopDown) && - "-misched-topdown incompatible with -misched-bottomup"); + bool Alternate = + PreRADirection != MISched::TopDown && PreRADirection != MISched::BottomUp; + bool TopDown = PreRADirection != MISched::BottomUp; return new ScheduleDAGMILive( C, std::make_unique<InstructionShuffler>(Alternate, TopDown)); } diff --git a/llvm/lib/CodeGen/MachineSink.cpp b/llvm/lib/CodeGen/MachineSink.cpp index 7d0bedab7cda..3c816f976509 100644 --- a/llvm/lib/CodeGen/MachineSink.cpp +++ b/llvm/lib/CodeGen/MachineSink.cpp @@ -30,6 +30,7 @@ #include "llvm/CodeGen/MachineBlockFrequencyInfo.h" #include "llvm/CodeGen/MachineBranchProbabilityInfo.h" #include "llvm/CodeGen/MachineCycleAnalysis.h" +#include "llvm/CodeGen/MachineDomTreeUpdater.h" #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" @@ -747,8 +748,11 @@ bool MachineSinking::runOnMachineFunction(MachineFunction &MF) { MadeChange |= ProcessBlock(MBB); // If we have anything we marked as toSplit, split it now. + MachineDomTreeUpdater MDTU(DT, PDT, + MachineDomTreeUpdater::UpdateStrategy::Lazy); for (const auto &Pair : ToSplit) { - auto NewSucc = Pair.first->SplitCriticalEdge(Pair.second, *this); + auto NewSucc = + Pair.first->SplitCriticalEdge(Pair.second, *this, nullptr, &MDTU); if (NewSucc != nullptr) { LLVM_DEBUG(dbgs() << " *** Splitting critical edge: " << printMBBReference(*Pair.first) << " -- " @@ -1094,7 +1098,7 @@ bool MachineSinking::registerPressureSetExceedsLimit( std::vector<unsigned> BBRegisterPressure = getBBRegisterPressure(MBB); for (; *PS != -1; PS++) if (Weight + BBRegisterPressure[*PS] >= - TRI->getRegPressureSetLimit(*MBB.getParent(), *PS)) + RegClassInfo.getRegPressureSetLimit(*PS)) return true; return false; } diff --git a/llvm/lib/CodeGen/MachineTraceMetrics.cpp b/llvm/lib/CodeGen/MachineTraceMetrics.cpp index 6576f97bea25..021c1a058c02 100644 --- a/llvm/lib/CodeGen/MachineTraceMetrics.cpp +++ b/llvm/lib/CodeGen/MachineTraceMetrics.cpp @@ -683,11 +683,10 @@ struct DataDep { DataDep(const MachineRegisterInfo *MRI, unsigned VirtReg, unsigned UseOp) : UseOp(UseOp) { assert(Register::isVirtualRegister(VirtReg)); - MachineRegisterInfo::def_iterator DefI = MRI->def_begin(VirtReg); - assert(!DefI.atEnd() && "Register has no defs"); - DefMI = DefI->getParent(); - DefOp = DefI.getOperandNo(); - assert((++DefI).atEnd() && "Register has multiple defs"); + MachineOperand *DefMO = MRI->getOneDef(VirtReg); + assert(DefMO && "Register does not have unique def"); + DefMI = DefMO->getParent(); + DefOp = DefMO->getOperandNo(); } }; diff --git a/llvm/lib/CodeGen/MachineUniformityAnalysis.cpp b/llvm/lib/CodeGen/MachineUniformityAnalysis.cpp index 7548fc8141ec..a4b78c1c75ce 100644 --- a/llvm/lib/CodeGen/MachineUniformityAnalysis.cpp +++ b/llvm/lib/CodeGen/MachineUniformityAnalysis.cpp @@ -199,8 +199,7 @@ void MachineUniformityAnalysisPass::getAnalysisUsage(AnalysisUsage &AU) const { } bool MachineUniformityAnalysisPass::runOnMachineFunction(MachineFunction &MF) { - auto &DomTree = - getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree().getBase(); + auto &DomTree = getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree(); auto &CI = getAnalysis<MachineCycleInfoWrapperPass>().getCycleInfo(); // FIXME: Query TTI::hasBranchDivergence. -run-pass seems to end up with a // default NoTTI diff --git a/llvm/lib/CodeGen/MachineVerifier.cpp b/llvm/lib/CodeGen/MachineVerifier.cpp index 8e64e4055665..bec36b728ae3 100644 --- a/llvm/lib/CodeGen/MachineVerifier.cpp +++ b/llvm/lib/CodeGen/MachineVerifier.cpp @@ -369,7 +369,7 @@ struct MachineVerifierLegacyPass : public MachineFunctionPass { } void getAnalysisUsage(AnalysisUsage &AU) const override { - AU.addUsedIfAvailable<LiveStacks>(); + AU.addUsedIfAvailable<LiveStacksWrapperLegacy>(); AU.addUsedIfAvailable<LiveVariablesWrapperPass>(); AU.addUsedIfAvailable<SlotIndexesWrapperPass>(); AU.addUsedIfAvailable<LiveIntervalsWrapperPass>(); @@ -491,7 +491,8 @@ bool MachineVerifier::verify(const MachineFunction &MF) { auto *LVWrapper = PASS->getAnalysisIfAvailable<LiveVariablesWrapperPass>(); if (!LiveInts) LiveVars = LVWrapper ? &LVWrapper->getLV() : nullptr; - LiveStks = PASS->getAnalysisIfAvailable<LiveStacks>(); + auto *LSWrapper = PASS->getAnalysisIfAvailable<LiveStacksWrapperLegacy>(); + LiveStks = LSWrapper ? &LSWrapper->getLS() : nullptr; auto *SIWrapper = PASS->getAnalysisIfAvailable<SlotIndexesWrapperPass>(); Indexes = SIWrapper ? &SIWrapper->getSI() : nullptr; } @@ -1585,38 +1586,12 @@ void MachineVerifier::verifyPreISelGenericInstruction(const MachineInstr *MI) { break; } - case TargetOpcode::G_ABDS: - case TargetOpcode::G_ABDU: { - LLT DstTy = MRI->getType(MI->getOperand(0).getReg()); - LLT SrcTy = MRI->getType(MI->getOperand(1).getReg()); - LLT SrcTy2 = MRI->getType(MI->getOperand(2).getReg()); - - if ((DstTy.isVector() != SrcTy.isVector()) || - (DstTy.isVector() && - DstTy.getElementCount() != SrcTy.getElementCount())) { - report("Generic vector abds/abdu must preserve number of lanes", MI); - break; - } - - if (SrcTy != SrcTy2) { - report("Generic abds/abdu must have same input types", MI); - break; - } - - if (DstTy != SrcTy) { - report("Generic abds/abdu must have same input and output types", MI); - break; - } - - break; - } case TargetOpcode::G_SCMP: case TargetOpcode::G_UCMP: { LLT DstTy = MRI->getType(MI->getOperand(0).getReg()); LLT SrcTy = MRI->getType(MI->getOperand(1).getReg()); - LLT SrcTy2 = MRI->getType(MI->getOperand(2).getReg()); - if (SrcTy.isPointerOrPointerVector() || SrcTy2.isPointerOrPointerVector()) { + if (SrcTy.isPointerOrPointerVector()) { report("Generic scmp/ucmp does not support pointers as operands", MI); break; } @@ -1626,6 +1601,11 @@ void MachineVerifier::verifyPreISelGenericInstruction(const MachineInstr *MI) { break; } + if (DstTy.getScalarSizeInBits() < 2) { + report("Result type must be at least 2 bits wide", MI); + break; + } + if ((DstTy.isVector() != SrcTy.isVector()) || (DstTy.isVector() && DstTy.getElementCount() != SrcTy.getElementCount())) { @@ -1633,11 +1613,6 @@ void MachineVerifier::verifyPreISelGenericInstruction(const MachineInstr *MI) { break; } - if (SrcTy != SrcTy2) { - report("Generic scmp/ucmp must have same input types", MI); - break; - } - break; } case TargetOpcode::G_EXTRACT: { diff --git a/llvm/lib/CodeGen/ModuloSchedule.cpp b/llvm/lib/CodeGen/ModuloSchedule.cpp index 99c82bc3a266..414c8cd71809 100644 --- a/llvm/lib/CodeGen/ModuloSchedule.cpp +++ b/llvm/lib/CodeGen/ModuloSchedule.cpp @@ -2693,8 +2693,7 @@ void ModuloScheduleExpanderMVE::expand() { /// Check if ModuloScheduleExpanderMVE can be applied to L bool ModuloScheduleExpanderMVE::canApply(MachineLoop &L) { if (!L.getExitBlock()) { - LLVM_DEBUG( - dbgs() << "Can not apply MVE expander: No single exit block.\n";); + LLVM_DEBUG(dbgs() << "Can not apply MVE expander: No single exit block.\n"); return false; } @@ -2711,9 +2710,8 @@ bool ModuloScheduleExpanderMVE::canApply(MachineLoop &L) { if (MO.isReg()) for (MachineInstr &Ref : MRI.use_instructions(MO.getReg())) if (Ref.getParent() != BB || Ref.isPHI()) { - LLVM_DEBUG(dbgs() - << "Can not apply MVE expander: A phi result is " - "referenced outside of the loop or by phi.\n";); + LLVM_DEBUG(dbgs() << "Can not apply MVE expander: A phi result is " + "referenced outside of the loop or by phi.\n"); return false; } @@ -2726,12 +2724,12 @@ bool ModuloScheduleExpanderMVE::canApply(MachineLoop &L) { MRI.getVRegDef(LoopVal)->getParent() != BB) { LLVM_DEBUG( dbgs() << "Can not apply MVE expander: A phi source value coming " - "from the loop is not defined in the loop.\n";); + "from the loop is not defined in the loop.\n"); return false; } if (UsedByPhi.count(LoopVal)) { LLVM_DEBUG(dbgs() << "Can not apply MVE expander: A value defined in the " - "loop is referenced by two or more phis.\n";); + "loop is referenced by two or more phis.\n"); return false; } UsedByPhi.insert(LoopVal); diff --git a/llvm/lib/CodeGen/PHIElimination.cpp b/llvm/lib/CodeGen/PHIElimination.cpp index e5f40771eda8..b71e5b853868 100644 --- a/llvm/lib/CodeGen/PHIElimination.cpp +++ b/llvm/lib/CodeGen/PHIElimination.cpp @@ -22,6 +22,7 @@ #include "llvm/CodeGen/LiveIntervals.h" #include "llvm/CodeGen/LiveVariables.h" #include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineDomTreeUpdater.h" #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" @@ -90,7 +91,8 @@ class PHIEliminationImpl { /// Split critical edges where necessary for good coalescer performance. bool SplitPHIEdges(MachineFunction &MF, MachineBasicBlock &MBB, MachineLoopInfo *MLI, - std::vector<SparseBitVector<>> *LiveInSets); + std::vector<SparseBitVector<>> *LiveInSets, + MachineDomTreeUpdater &MDTU); // These functions are temporary abstractions around LiveVariables and // LiveIntervals, so they can go away when LiveVariables does. @@ -203,6 +205,16 @@ void PHIElimination::getAnalysisUsage(AnalysisUsage &AU) const { bool PHIEliminationImpl::run(MachineFunction &MF) { MRI = &MF.getRegInfo(); + MachineDominatorTree *MDT = nullptr; + if (P) { + auto *MDTWrapper = + P->getAnalysisIfAvailable<MachineDominatorTreeWrapperPass>(); + MDT = MDTWrapper ? &MDTWrapper->getDomTree() : nullptr; + } else { + MDT = MFAM->getCachedResult<MachineDominatorTreeAnalysis>(MF); + } + MachineDomTreeUpdater MDTU(MDT, MachineDomTreeUpdater::UpdateStrategy::Lazy); + bool Changed = false; // Split critical edges to help the coalescer. @@ -237,7 +249,8 @@ bool PHIEliminationImpl::run(MachineFunction &MF) { } for (auto &MBB : MF) - Changed |= SplitPHIEdges(MF, MBB, MLI, (LV ? &LiveInSets : nullptr)); + Changed |= + SplitPHIEdges(MF, MBB, MLI, (LV ? &LiveInSets : nullptr), MDTU); } // This pass takes the function out of SSA form. @@ -268,10 +281,6 @@ bool PHIEliminationImpl::run(MachineFunction &MF) { MF.deleteMachineInstr(I.first); } - // TODO: we should use the incremental DomTree updater here. - if (Changed && MDT) - MDT->getBase().recalculate(MF); - LoweredPHIs.clear(); ImpDefs.clear(); VRegPHIUseCount.clear(); @@ -752,7 +761,7 @@ void PHIEliminationImpl::analyzePHINodes(const MachineFunction &MF) { bool PHIEliminationImpl::SplitPHIEdges( MachineFunction &MF, MachineBasicBlock &MBB, MachineLoopInfo *MLI, - std::vector<SparseBitVector<>> *LiveInSets) { + std::vector<SparseBitVector<>> *LiveInSets, MachineDomTreeUpdater &MDTU) { if (MBB.empty() || !MBB.front().isPHI() || MBB.isEHPad()) return false; // Quick exit for basic blocks without PHIs. @@ -819,8 +828,8 @@ bool PHIEliminationImpl::SplitPHIEdges( } if (!ShouldSplit && !SplitAllCriticalEdges) continue; - if (!(P ? PreMBB->SplitCriticalEdge(&MBB, *P, LiveInSets) - : PreMBB->SplitCriticalEdge(&MBB, *MFAM, LiveInSets))) { + if (!(P ? PreMBB->SplitCriticalEdge(&MBB, *P, LiveInSets, &MDTU) + : PreMBB->SplitCriticalEdge(&MBB, *MFAM, LiveInSets, &MDTU))) { LLVM_DEBUG(dbgs() << "Failed to split critical edge.\n"); continue; } diff --git a/llvm/lib/CodeGen/PostRASchedulerList.cpp b/llvm/lib/CodeGen/PostRASchedulerList.cpp index 2f7cfdd275b4..badfd9a68d6a 100644 --- a/llvm/lib/CodeGen/PostRASchedulerList.cpp +++ b/llvm/lib/CodeGen/PostRASchedulerList.cpp @@ -98,12 +98,6 @@ namespace { } bool runOnMachineFunction(MachineFunction &Fn) override; - - private: - bool enablePostRAScheduler( - const TargetSubtargetInfo &ST, CodeGenOptLevel OptLevel, - TargetSubtargetInfo::AntiDepBreakMode &Mode, - TargetSubtargetInfo::RegClassVector &CriticalPathRCs) const; }; char PostRAScheduler::ID = 0; @@ -259,13 +253,8 @@ LLVM_DUMP_METHOD void SchedulePostRATDList::dumpSchedule() const { } #endif -bool PostRAScheduler::enablePostRAScheduler( - const TargetSubtargetInfo &ST, CodeGenOptLevel OptLevel, - TargetSubtargetInfo::AntiDepBreakMode &Mode, - TargetSubtargetInfo::RegClassVector &CriticalPathRCs) const { - Mode = ST.getAntiDepBreakMode(); - ST.getCriticalPathRCs(CriticalPathRCs); - +static bool enablePostRAScheduler(const TargetSubtargetInfo &ST, + CodeGenOptLevel OptLevel) { // Check for explicit enable/disable of post-ra scheduling. if (EnablePostRAScheduler.getPosition() > 0) return EnablePostRAScheduler; @@ -278,24 +267,17 @@ bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) { if (skipFunction(Fn.getFunction())) return false; - TII = Fn.getSubtarget().getInstrInfo(); - MachineLoopInfo &MLI = getAnalysis<MachineLoopInfoWrapperPass>().getLI(); - AliasAnalysis *AA = &getAnalysis<AAResultsWrapperPass>().getAAResults(); + const auto &Subtarget = Fn.getSubtarget(); TargetPassConfig *PassConfig = &getAnalysis<TargetPassConfig>(); - - RegClassInfo.runOnMachineFunction(Fn); - - TargetSubtargetInfo::AntiDepBreakMode AntiDepMode = - TargetSubtargetInfo::ANTIDEP_NONE; - SmallVector<const TargetRegisterClass*, 4> CriticalPathRCs; - // Check that post-RA scheduling is enabled for this target. - // This may upgrade the AntiDepMode. - if (!enablePostRAScheduler(Fn.getSubtarget(), PassConfig->getOptLevel(), - AntiDepMode, CriticalPathRCs)) + if (!enablePostRAScheduler(Subtarget, PassConfig->getOptLevel())) return false; - // Check for antidep breaking override... + TII = Subtarget.getInstrInfo(); + MachineLoopInfo &MLI = getAnalysis<MachineLoopInfoWrapperPass>().getLI(); + AliasAnalysis *AA = &getAnalysis<AAResultsWrapperPass>().getAAResults(); + TargetSubtargetInfo::AntiDepBreakMode AntiDepMode = + Subtarget.getAntiDepBreakMode(); if (EnableAntiDepBreaking.getPosition() > 0) { AntiDepMode = (EnableAntiDepBreaking == "all") ? TargetSubtargetInfo::ANTIDEP_ALL @@ -303,6 +285,9 @@ bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) { ? TargetSubtargetInfo::ANTIDEP_CRITICAL : TargetSubtargetInfo::ANTIDEP_NONE); } + SmallVector<const TargetRegisterClass *, 4> CriticalPathRCs; + Subtarget.getCriticalPathRCs(CriticalPathRCs); + RegClassInfo.runOnMachineFunction(Fn); LLVM_DEBUG(dbgs() << "PostRAScheduler\n"); diff --git a/llvm/lib/CodeGen/ReachingDefAnalysis.cpp b/llvm/lib/CodeGen/ReachingDefAnalysis.cpp index 0e8220ec6251..79b0fa672cc6 100644 --- a/llvm/lib/CodeGen/ReachingDefAnalysis.cpp +++ b/llvm/lib/CodeGen/ReachingDefAnalysis.cpp @@ -16,7 +16,7 @@ using namespace llvm; -#define DEBUG_TYPE "reaching-deps-analysis" +#define DEBUG_TYPE "reaching-defs-analysis" char ReachingDefAnalysis::ID = 0; INITIALIZE_PASS(ReachingDefAnalysis, DEBUG_TYPE, "ReachingDefAnalysis", false, diff --git a/llvm/lib/CodeGen/RegAllocBase.cpp b/llvm/lib/CodeGen/RegAllocBase.cpp index 449033d63210..50addcbcca06 100644 --- a/llvm/lib/CodeGen/RegAllocBase.cpp +++ b/llvm/lib/CodeGen/RegAllocBase.cpp @@ -23,6 +23,7 @@ #include "llvm/CodeGen/Spiller.h" #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/CodeGen/VirtRegMap.h" +#include "llvm/IR/DiagnosticInfo.h" #include "llvm/IR/Module.h" #include "llvm/Pass.h" #include "llvm/Support/CommandLine.h" @@ -123,21 +124,10 @@ void RegAllocBase::allocatePhysRegs() { } const TargetRegisterClass *RC = MRI->getRegClass(VirtReg->reg()); - ArrayRef<MCPhysReg> AllocOrder = RegClassInfo.getOrder(RC); - if (AllocOrder.empty()) - report_fatal_error("no registers from class available to allocate"); - else if (MI && MI->isInlineAsm()) { - MI->emitError("inline assembly requires more registers than available"); - } else if (MI) { - LLVMContext &Context = - MI->getParent()->getParent()->getFunction().getContext(); - Context.emitError("ran out of registers during register allocation"); - } else { - report_fatal_error("ran out of registers during register allocation"); - } + AvailablePhysReg = getErrorAssignment(*RC, MI); // Keep going after reporting the error. - VRM->assignVirt2Phys(VirtReg->reg(), AllocOrder.front()); + VRM->assignVirt2Phys(VirtReg->reg(), AvailablePhysReg); } else if (AvailablePhysReg) Matrix->assign(*VirtReg, AvailablePhysReg); @@ -187,3 +177,48 @@ void RegAllocBase::enqueue(const LiveInterval *LI) { << " in skipped register class\n"); } } + +MCPhysReg RegAllocBase::getErrorAssignment(const TargetRegisterClass &RC, + const MachineInstr *CtxMI) { + MachineFunction &MF = VRM->getMachineFunction(); + + // Avoid printing the error for every single instance of the register. It + // would be better if this were per register class. + bool EmitError = !MF.getProperties().hasProperty( + MachineFunctionProperties::Property::FailedRegAlloc); + if (EmitError) + MF.getProperties().set(MachineFunctionProperties::Property::FailedRegAlloc); + + const Function &Fn = MF.getFunction(); + LLVMContext &Context = Fn.getContext(); + + ArrayRef<MCPhysReg> AllocOrder = RegClassInfo.getOrder(&RC); + if (AllocOrder.empty()) { + // If the allocation order is empty, it likely means all registers in the + // class are reserved. We still to need to pick something, so look at the + // underlying class. + ArrayRef<MCPhysReg> RawRegs = RC.getRegisters(); + + if (EmitError) { + Context.diagnose(DiagnosticInfoRegAllocFailure( + "no registers from class available to allocate", Fn, + CtxMI ? CtxMI->getDebugLoc() : DiagnosticLocation())); + } + + assert(!RawRegs.empty() && "register classes cannot have no registers"); + return RawRegs.front(); + } + + if (EmitError) { + if (CtxMI && CtxMI->isInlineAsm()) { + CtxMI->emitInlineAsmError( + "inline assembly requires more registers than available"); + } else { + Context.diagnose(DiagnosticInfoRegAllocFailure( + "ran out of registers during register allocation", Fn, + CtxMI ? CtxMI->getDebugLoc() : DiagnosticLocation())); + } + } + + return AllocOrder.front(); +} diff --git a/llvm/lib/CodeGen/RegAllocBase.h b/llvm/lib/CodeGen/RegAllocBase.h index a1ede08a1535..5bd52da61f2d 100644 --- a/llvm/lib/CodeGen/RegAllocBase.h +++ b/llvm/lib/CodeGen/RegAllocBase.h @@ -123,6 +123,12 @@ protected: virtual MCRegister selectOrSplit(const LiveInterval &VirtReg, SmallVectorImpl<Register> &splitLVRs) = 0; + /// Query a physical register to use as a filler in contexts where the + /// allocation has failed. This will raise an error, but not abort the + /// compilation. + MCPhysReg getErrorAssignment(const TargetRegisterClass &RC, + const MachineInstr *CtxMI = nullptr); + // Use this group name for NamedRegionTimer. static const char TimerGroupName[]; static const char TimerGroupDescription[]; diff --git a/llvm/lib/CodeGen/RegAllocBasic.cpp b/llvm/lib/CodeGen/RegAllocBasic.cpp index 7ee24c960dbe..c05aa1e40e47 100644 --- a/llvm/lib/CodeGen/RegAllocBasic.cpp +++ b/llvm/lib/CodeGen/RegAllocBasic.cpp @@ -135,7 +135,7 @@ INITIALIZE_PASS_DEPENDENCY(SlotIndexesWrapperPass) INITIALIZE_PASS_DEPENDENCY(LiveIntervalsWrapperPass) INITIALIZE_PASS_DEPENDENCY(RegisterCoalescer) INITIALIZE_PASS_DEPENDENCY(MachineScheduler) -INITIALIZE_PASS_DEPENDENCY(LiveStacks) +INITIALIZE_PASS_DEPENDENCY(LiveStacksWrapperLegacy) INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) INITIALIZE_PASS_DEPENDENCY(MachineDominatorTreeWrapperPass) INITIALIZE_PASS_DEPENDENCY(MachineLoopInfoWrapperPass) @@ -182,8 +182,8 @@ void RABasic::getAnalysisUsage(AnalysisUsage &AU) const { AU.addPreserved<SlotIndexesWrapperPass>(); AU.addRequired<LiveDebugVariablesWrapperLegacy>(); AU.addPreserved<LiveDebugVariablesWrapperLegacy>(); - AU.addRequired<LiveStacks>(); - AU.addPreserved<LiveStacks>(); + AU.addRequired<LiveStacksWrapperLegacy>(); + AU.addPreserved<LiveStacksWrapperLegacy>(); AU.addRequired<ProfileSummaryInfoWrapperPass>(); AU.addRequired<MachineBlockFrequencyInfoWrapperPass>(); AU.addPreserved<MachineBlockFrequencyInfoWrapperPass>(); diff --git a/llvm/lib/CodeGen/RegAllocFast.cpp b/llvm/lib/CodeGen/RegAllocFast.cpp index 6babd5a3f1f9..3863ca80bb44 100644 --- a/llvm/lib/CodeGen/RegAllocFast.cpp +++ b/llvm/lib/CodeGen/RegAllocFast.cpp @@ -368,6 +368,9 @@ private: bool LookAtPhysRegUses = false); bool useVirtReg(MachineInstr &MI, MachineOperand &MO, Register VirtReg); + MCPhysReg getErrorAssignment(const LiveReg &LR, MachineInstr &MI, + const TargetRegisterClass &RC); + MachineBasicBlock::iterator getMBBBeginInsertionPoint(MachineBasicBlock &MBB, SmallSet<Register, 2> &PrologLiveIns) const; @@ -682,7 +685,7 @@ void RegAllocFastImpl::reloadAtBegin(MachineBasicBlock &MBB) { getMBBBeginInsertionPoint(MBB, PrologLiveIns); for (const LiveReg &LR : LiveVirtRegs) { MCPhysReg PhysReg = LR.PhysReg; - if (PhysReg == 0) + if (PhysReg == 0 || LR.Error) continue; MCRegister FirstUnit = *TRI->regunits(PhysReg).begin(); @@ -963,13 +966,8 @@ void RegAllocFastImpl::allocVirtReg(MachineInstr &MI, LiveReg &LR, if (!BestReg) { // Nothing we can do: Report an error and keep going with an invalid // allocation. - if (MI.isInlineAsm()) - MI.emitError("inline assembly requires more registers than available"); - else - MI.emitError("ran out of registers during register allocation"); - + LR.PhysReg = getErrorAssignment(LR, MI, RC); LR.Error = true; - LR.PhysReg = 0; return; } @@ -984,15 +982,23 @@ void RegAllocFastImpl::allocVirtRegUndef(MachineOperand &MO) { if (!shouldAllocateRegister(VirtReg)) return; - LiveRegMap::const_iterator LRI = findLiveVirtReg(VirtReg); + LiveRegMap::iterator LRI = findLiveVirtReg(VirtReg); MCPhysReg PhysReg; if (LRI != LiveVirtRegs.end() && LRI->PhysReg) { PhysReg = LRI->PhysReg; } else { const TargetRegisterClass &RC = *MRI->getRegClass(VirtReg); ArrayRef<MCPhysReg> AllocationOrder = RegClassInfo.getOrder(&RC); - assert(!AllocationOrder.empty() && "Allocation order must not be empty"); - PhysReg = AllocationOrder[0]; + if (AllocationOrder.empty()) { + // All registers in the class were reserved. + // + // It might be OK to take any entry from the class as this is an undef + // use, but accepting this would give different behavior than greedy and + // basic. + PhysReg = getErrorAssignment(*LRI, *MO.getParent(), RC); + LRI->Error = true; + } else + PhysReg = AllocationOrder.front(); } unsigned SubRegIdx = MO.getSubReg(); @@ -1065,17 +1071,8 @@ bool RegAllocFastImpl::defineVirtReg(MachineInstr &MI, unsigned OpNum, } if (LRI->PhysReg == 0) { allocVirtReg(MI, *LRI, 0, LookAtPhysRegUses); - // If no physical register is available for LRI, we assign one at random - // and bail out of this function immediately. - if (LRI->Error) { - const TargetRegisterClass &RC = *MRI->getRegClass(VirtReg); - ArrayRef<MCPhysReg> AllocationOrder = RegClassInfo.getOrder(&RC); - if (AllocationOrder.empty()) - return setPhysReg(MI, MO, MCRegister::NoRegister); - return setPhysReg(MI, MO, *AllocationOrder.begin()); - } } else { - assert(!isRegUsedInInstr(LRI->PhysReg, LookAtPhysRegUses) && + assert((!isRegUsedInInstr(LRI->PhysReg, LookAtPhysRegUses) || LRI->Error) && "TODO: preassign mismatch"); LLVM_DEBUG(dbgs() << "In def of " << printReg(VirtReg, TRI) << " use existing assignment to " @@ -1158,13 +1155,6 @@ bool RegAllocFastImpl::useVirtReg(MachineInstr &MI, MachineOperand &MO, } } allocVirtReg(MI, *LRI, Hint, false); - if (LRI->Error) { - const TargetRegisterClass &RC = *MRI->getRegClass(VirtReg); - ArrayRef<MCPhysReg> AllocationOrder = RegClassInfo.getOrder(&RC); - if (AllocationOrder.empty()) - return setPhysReg(MI, MO, MCRegister::NoRegister); - return setPhysReg(MI, MO, *AllocationOrder.begin()); - } } LRI->LastUse = &MI; @@ -1176,6 +1166,54 @@ bool RegAllocFastImpl::useVirtReg(MachineInstr &MI, MachineOperand &MO, return setPhysReg(MI, MO, LRI->PhysReg); } +/// Query a physical register to use as a filler in contexts where the +/// allocation has failed. This will raise an error, but not abort the +/// compilation. +MCPhysReg RegAllocFastImpl::getErrorAssignment(const LiveReg &LR, + MachineInstr &MI, + const TargetRegisterClass &RC) { + MachineFunction &MF = *MI.getMF(); + + // Avoid repeating the error every time a register is used. + bool EmitError = !MF.getProperties().hasProperty( + MachineFunctionProperties::Property::FailedRegAlloc); + if (EmitError) + MF.getProperties().set(MachineFunctionProperties::Property::FailedRegAlloc); + + // If the allocation order was empty, all registers in the class were + // probably reserved. Fall back to taking the first register in the class, + // even if it's reserved. + ArrayRef<MCPhysReg> AllocationOrder = RegClassInfo.getOrder(&RC); + if (AllocationOrder.empty()) { + const Function &Fn = MF.getFunction(); + if (EmitError) { + Fn.getContext().diagnose(DiagnosticInfoRegAllocFailure( + "no registers from class available to allocate", Fn, + MI.getDebugLoc())); + } + + ArrayRef<MCPhysReg> RawRegs = RC.getRegisters(); + assert(!RawRegs.empty() && "register classes cannot have no registers"); + return RawRegs.front(); + } + + if (!LR.Error && EmitError) { + // Nothing we can do: Report an error and keep going with an invalid + // allocation. + if (MI.isInlineAsm()) { + MI.emitInlineAsmError( + "inline assembly requires more registers than available"); + } else { + const Function &Fn = MBB->getParent()->getFunction(); + Fn.getContext().diagnose(DiagnosticInfoRegAllocFailure( + "ran out of registers during register allocation", Fn, + MI.getDebugLoc())); + } + } + + return AllocationOrder.front(); +} + /// Changes operand OpNum in MI the refer the PhysReg, considering subregs. /// \return true if MI's MachineOperands were re-arranged/invalidated. bool RegAllocFastImpl::setPhysReg(MachineInstr &MI, MachineOperand &MO, diff --git a/llvm/lib/CodeGen/RegAllocGreedy.cpp b/llvm/lib/CodeGen/RegAllocGreedy.cpp index 8564fd8ca96d..95a7801c372f 100644 --- a/llvm/lib/CodeGen/RegAllocGreedy.cpp +++ b/llvm/lib/CodeGen/RegAllocGreedy.cpp @@ -156,7 +156,7 @@ INITIALIZE_PASS_DEPENDENCY(SlotIndexesWrapperPass) INITIALIZE_PASS_DEPENDENCY(LiveIntervalsWrapperPass) INITIALIZE_PASS_DEPENDENCY(RegisterCoalescer) INITIALIZE_PASS_DEPENDENCY(MachineScheduler) -INITIALIZE_PASS_DEPENDENCY(LiveStacks) +INITIALIZE_PASS_DEPENDENCY(LiveStacksWrapperLegacy) INITIALIZE_PASS_DEPENDENCY(MachineDominatorTreeWrapperPass) INITIALIZE_PASS_DEPENDENCY(MachineLoopInfoWrapperPass) INITIALIZE_PASS_DEPENDENCY(VirtRegMapWrapperLegacy) @@ -206,8 +206,8 @@ void RAGreedy::getAnalysisUsage(AnalysisUsage &AU) const { AU.addPreserved<SlotIndexesWrapperPass>(); AU.addRequired<LiveDebugVariablesWrapperLegacy>(); AU.addPreserved<LiveDebugVariablesWrapperLegacy>(); - AU.addRequired<LiveStacks>(); - AU.addPreserved<LiveStacks>(); + AU.addRequired<LiveStacksWrapperLegacy>(); + AU.addPreserved<LiveStacksWrapperLegacy>(); AU.addRequired<MachineDominatorTreeWrapperPass>(); AU.addPreserved<MachineDominatorTreeWrapperPass>(); AU.addRequired<MachineLoopInfoWrapperPass>(); @@ -376,6 +376,12 @@ unsigned DefaultPriorityAdvisor::getPriority(const LiveInterval &LI) const { return Prio; } +unsigned DummyPriorityAdvisor::getPriority(const LiveInterval &LI) const { + // Prioritize by virtual register number, lowest first. + Register Reg = LI.reg(); + return ~Reg.virtRegIndex(); +} + const LiveInterval *RAGreedy::dequeue() { return dequeue(Queue); } const LiveInterval *RAGreedy::dequeue(PQueue &CurQueue) { @@ -2427,7 +2433,7 @@ MCRegister RAGreedy::selectOrSplitImpl(const LiveInterval &VirtReg, } else return PhysReg; } - // Non emtpy NewVRegs means VirtReg has been split. + // Non empty NewVRegs means VirtReg has been split. if (!NewVRegs.empty()) return 0; @@ -2465,7 +2471,7 @@ MCRegister RAGreedy::selectOrSplitImpl(const LiveInterval &VirtReg, return 0; } - if (Stage < RS_Spill) { + if (Stage < RS_Spill && !VirtReg.empty()) { // Try splitting VirtReg or interferences. unsigned NewVRegSizeBefore = NewVRegs.size(); Register PhysReg = trySplit(VirtReg, Order, NewVRegs, FixedRegisters); diff --git a/llvm/lib/CodeGen/RegAllocPBQP.cpp b/llvm/lib/CodeGen/RegAllocPBQP.cpp index 261e93add7d8..696c312e4ba0 100644 --- a/llvm/lib/CodeGen/RegAllocPBQP.cpp +++ b/llvm/lib/CodeGen/RegAllocPBQP.cpp @@ -121,7 +121,7 @@ public: : MachineFunctionPass(ID), customPassID(cPassID) { initializeSlotIndexesWrapperPassPass(*PassRegistry::getPassRegistry()); initializeLiveIntervalsWrapperPassPass(*PassRegistry::getPassRegistry()); - initializeLiveStacksPass(*PassRegistry::getPassRegistry()); + initializeLiveStacksWrapperLegacyPass(*PassRegistry::getPassRegistry()); initializeVirtRegMapWrapperLegacyPass(*PassRegistry::getPassRegistry()); } @@ -550,8 +550,8 @@ void RegAllocPBQP::getAnalysisUsage(AnalysisUsage &au) const { //au.addRequiredID(SplitCriticalEdgesID); if (customPassID) au.addRequiredID(*customPassID); - au.addRequired<LiveStacks>(); - au.addPreserved<LiveStacks>(); + au.addRequired<LiveStacksWrapperLegacy>(); + au.addPreserved<LiveStacksWrapperLegacy>(); au.addRequired<MachineBlockFrequencyInfoWrapperPass>(); au.addPreserved<MachineBlockFrequencyInfoWrapperPass>(); au.addRequired<MachineLoopInfoWrapperPass>(); diff --git a/llvm/lib/CodeGen/RegAllocPriorityAdvisor.cpp b/llvm/lib/CodeGen/RegAllocPriorityAdvisor.cpp index 0650aaff56ea..4525b8fc5a38 100644 --- a/llvm/lib/CodeGen/RegAllocPriorityAdvisor.cpp +++ b/llvm/lib/CodeGen/RegAllocPriorityAdvisor.cpp @@ -30,7 +30,10 @@ static cl::opt<RegAllocPriorityAdvisorAnalysis::AdvisorMode> Mode( clEnumValN(RegAllocPriorityAdvisorAnalysis::AdvisorMode::Release, "release", "precompiled"), clEnumValN(RegAllocPriorityAdvisorAnalysis::AdvisorMode::Development, - "development", "for training"))); + "development", "for training"), + clEnumValN( + RegAllocPriorityAdvisorAnalysis::AdvisorMode::Dummy, "dummy", + "prioritize low virtual register numbers for test and debug"))); char RegAllocPriorityAdvisorAnalysis::ID = 0; INITIALIZE_PASS(RegAllocPriorityAdvisorAnalysis, "regalloc-priority", @@ -67,6 +70,31 @@ private: } const bool NotAsRequested; }; + +class DummyPriorityAdvisorAnalysis final + : public RegAllocPriorityAdvisorAnalysis { +public: + DummyPriorityAdvisorAnalysis() + : RegAllocPriorityAdvisorAnalysis(AdvisorMode::Dummy) {} + + // support for isa<> and dyn_cast. + static bool classof(const RegAllocPriorityAdvisorAnalysis *R) { + return R->getAdvisorMode() == AdvisorMode::Dummy; + } + +private: + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired<SlotIndexesWrapperPass>(); + RegAllocPriorityAdvisorAnalysis::getAnalysisUsage(AU); + } + + std::unique_ptr<RegAllocPriorityAdvisor> + getAdvisor(const MachineFunction &MF, const RAGreedy &RA) override { + return std::make_unique<DummyPriorityAdvisor>( + MF, RA, &getAnalysis<SlotIndexesWrapperPass>().getSI()); + } +}; + } // namespace template <> Pass *llvm::callDefaultCtor<RegAllocPriorityAdvisorAnalysis>() { @@ -75,6 +103,9 @@ template <> Pass *llvm::callDefaultCtor<RegAllocPriorityAdvisorAnalysis>() { case RegAllocPriorityAdvisorAnalysis::AdvisorMode::Default: Ret = new DefaultPriorityAdvisorAnalysis(/*NotAsRequested*/ false); break; + case RegAllocPriorityAdvisorAnalysis::AdvisorMode::Dummy: + Ret = new DummyPriorityAdvisorAnalysis(); + break; case RegAllocPriorityAdvisorAnalysis::AdvisorMode::Development: #if defined(LLVM_HAVE_TFLITE) Ret = createDevelopmentModePriorityAdvisor(); @@ -97,6 +128,8 @@ StringRef RegAllocPriorityAdvisorAnalysis::getPassName() const { return "Release mode Regalloc Priority Advisor"; case AdvisorMode::Development: return "Development mode Regalloc Priority Advisor"; + case AdvisorMode::Dummy: + return "Dummy Regalloc Priority Advisor"; } llvm_unreachable("Unknown advisor kind"); } diff --git a/llvm/lib/CodeGen/RegAllocPriorityAdvisor.h b/llvm/lib/CodeGen/RegAllocPriorityAdvisor.h index 1e9fa967214c..32e4598b7153 100644 --- a/llvm/lib/CodeGen/RegAllocPriorityAdvisor.h +++ b/llvm/lib/CodeGen/RegAllocPriorityAdvisor.h @@ -56,9 +56,21 @@ private: unsigned getPriority(const LiveInterval &LI) const override; }; +/// Stupid priority advisor which just enqueues in virtual register number +/// order, for debug purposes only. +class DummyPriorityAdvisor : public RegAllocPriorityAdvisor { +public: + DummyPriorityAdvisor(const MachineFunction &MF, const RAGreedy &RA, + SlotIndexes *const Indexes) + : RegAllocPriorityAdvisor(MF, RA, Indexes) {} + +private: + unsigned getPriority(const LiveInterval &LI) const override; +}; + class RegAllocPriorityAdvisorAnalysis : public ImmutablePass { public: - enum class AdvisorMode : int { Default, Release, Development }; + enum class AdvisorMode : int { Default, Release, Development, Dummy }; RegAllocPriorityAdvisorAnalysis(AdvisorMode Mode) : ImmutablePass(ID), Mode(Mode){}; diff --git a/llvm/lib/CodeGen/ReplaceWithVeclib.cpp b/llvm/lib/CodeGen/ReplaceWithVeclib.cpp index 8d457f58e6ee..a87c2063b1e3 100644 --- a/llvm/lib/CodeGen/ReplaceWithVeclib.cpp +++ b/llvm/lib/CodeGen/ReplaceWithVeclib.cpp @@ -121,7 +121,7 @@ static bool replaceWithCallToVeclib(const TargetLibraryInfo &TLI, auto *ArgTy = Arg.value()->getType(); bool IsOloadTy = isVectorIntrinsicWithOverloadTypeAtArg(IID, Arg.index(), /*TTI=*/nullptr); - if (isVectorIntrinsicWithScalarOpAtArg(IID, Arg.index())) { + if (isVectorIntrinsicWithScalarOpAtArg(IID, Arg.index(), /*TTI=*/nullptr)) { ScalarArgTypes.push_back(ArgTy); if (IsOloadTy) OloadTys.push_back(ArgTy); diff --git a/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp b/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp index b1d744378481..6d3b3f34e8ca 100644 --- a/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp +++ b/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp @@ -908,7 +908,7 @@ void ScheduleDAGInstrs::buildSchedGraph(AAResults *AA, BarrierChain = SU; LLVM_DEBUG(dbgs() << "Global memory object and new barrier chain: SU(" - << BarrierChain->NodeNum << ").\n";); + << BarrierChain->NodeNum << ").\n"); // Add dependencies against everything below it and clear maps. addBarrierChain(Stores); @@ -929,7 +929,7 @@ void ScheduleDAGInstrs::buildSchedGraph(AAResults *AA, FPExceptions.insert(SU, UnknownValue); if (FPExceptions.size() >= HugeRegion) { - LLVM_DEBUG(dbgs() << "Reducing FPExceptions map.\n";); + LLVM_DEBUG(dbgs() << "Reducing FPExceptions map.\n"); Value2SUsMap empty; reduceHugeMemNodeMaps(FPExceptions, empty, getReductionSize()); } @@ -1012,12 +1012,11 @@ void ScheduleDAGInstrs::buildSchedGraph(AAResults *AA, // Reduce maps if they grow huge. if (Stores.size() + Loads.size() >= HugeRegion) { - LLVM_DEBUG(dbgs() << "Reducing Stores and Loads maps.\n";); + LLVM_DEBUG(dbgs() << "Reducing Stores and Loads maps.\n"); reduceHugeMemNodeMaps(Stores, Loads, getReductionSize()); } if (NonAliasStores.size() + NonAliasLoads.size() >= HugeRegion) { - LLVM_DEBUG( - dbgs() << "Reducing NonAliasStores and NonAliasLoads maps.\n";); + LLVM_DEBUG(dbgs() << "Reducing NonAliasStores and NonAliasLoads maps.\n"); reduceHugeMemNodeMaps(NonAliasStores, NonAliasLoads, getReductionSize()); } } @@ -1090,11 +1089,11 @@ void ScheduleDAGInstrs::reduceHugeMemNodeMaps(Value2SUsMap &stores, BarrierChain->addPredBarrier(newBarrierChain); BarrierChain = newBarrierChain; LLVM_DEBUG(dbgs() << "Inserting new barrier chain: SU(" - << BarrierChain->NodeNum << ").\n";); + << BarrierChain->NodeNum << ").\n"); } else LLVM_DEBUG(dbgs() << "Keeping old barrier chain: SU(" - << BarrierChain->NodeNum << ").\n";); + << BarrierChain->NodeNum << ").\n"); } else BarrierChain = newBarrierChain; diff --git a/llvm/lib/CodeGen/SelectOptimize.cpp b/llvm/lib/CodeGen/SelectOptimize.cpp index 484705eabbc4..bfc49dd354aa 100644 --- a/llvm/lib/CodeGen/SelectOptimize.cpp +++ b/llvm/lib/CodeGen/SelectOptimize.cpp @@ -11,6 +11,7 @@ //===----------------------------------------------------------------------===// #include "llvm/CodeGen/SelectOptimize.h" +#include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/BlockFrequencyInfo.h" @@ -218,7 +219,7 @@ public: private: // Select groups consist of consecutive select-like instructions with the same // condition. Between select-likes could be any number of auxiliary - // instructions related to the condition like not, zext + // instructions related to the condition like not, zext, ashr/lshr struct SelectGroup { Value *Condition; SmallVector<SelectLike, 2> Selects; @@ -496,7 +497,14 @@ static Value *getTrueOrFalseValue( auto *CBO = BO->clone(); auto CondIdx = SI.getConditionOpIndex(); - CBO->setOperand(CondIdx, ConstantInt::get(CBO->getType(), 1)); + auto *AuxI = cast<Instruction>(CBO->getOperand(CondIdx)); + if (isa<ZExtInst>(AuxI) || isa<LShrOperator>(AuxI)) { + CBO->setOperand(CondIdx, ConstantInt::get(CBO->getType(), 1)); + } else { + assert((isa<AShrOperator>(AuxI) || isa<SExtInst>(AuxI)) && + "Unexpected opcode"); + CBO->setOperand(CondIdx, ConstantInt::get(CBO->getType(), -1)); + } unsigned OtherIdx = 1 - CondIdx; if (auto *IV = dyn_cast<Instruction>(CBO->getOperand(OtherIdx))) { @@ -754,7 +762,11 @@ void SelectOptimizeImpl::collectSelectGroups(BasicBlock &BB, // Auxiliary instruction are instructions that depends on a condition and have // zero or some constant value on True/False branch, such as: // * ZExt(1bit) + // * SExt(1bit) // * Not(1bit) + // * A(L)Shr(Val), ValBitSize - 1, where there is a condition like `Val <= 0` + // earlier in the BB. For conditions that check the sign of the Val compiler + // may generate shifts instead of ZExt/SExt. struct SelectLikeInfo { Value *Cond; bool IsAuxiliary; @@ -763,13 +775,21 @@ void SelectOptimizeImpl::collectSelectGroups(BasicBlock &BB, }; DenseMap<Value *, SelectLikeInfo> SelectInfo; + // Keeps visited comparisons to help identify AShr/LShr variants of auxiliary + // instructions. + SmallSetVector<CmpInst *, 4> SeenCmp; // Check if the instruction is SelectLike or might be part of SelectLike // expression, put information into SelectInfo and return the iterator to the // inserted position. - auto ProcessSelectInfo = [&SelectInfo](Instruction *I) { + auto ProcessSelectInfo = [&SelectInfo, &SeenCmp](Instruction *I) { + if (auto *Cmp = dyn_cast<CmpInst>(I)) { + SeenCmp.insert(Cmp); + return SelectInfo.end(); + } + Value *Cond; - if (match(I, m_OneUse(m_ZExt(m_Value(Cond)))) && + if (match(I, m_OneUse(m_ZExtOrSExt(m_Value(Cond)))) && Cond->getType()->isIntegerTy(1)) { bool Inverted = match(Cond, m_Not(m_Value(Cond))); return SelectInfo.insert({I, {Cond, true, Inverted, 0}}).first; @@ -784,30 +804,60 @@ void SelectOptimizeImpl::collectSelectGroups(BasicBlock &BB, bool Inverted = match(Cond, m_Not(m_Value(Cond))); return SelectInfo.insert({I, {Cond, false, Inverted, 0}}).first; } - - // An Or(zext(i1 X), Y) can also be treated like a select, with condition X - // and values Y|1 and Y. - if (auto *BO = dyn_cast<BinaryOperator>(I)) { - switch (I->getOpcode()) { - case Instruction::Add: - case Instruction::Sub: { - Value *X; - if (!((PatternMatch::match(I->getOperand(0), - m_OneUse(m_ZExt(m_Value(X)))) || - PatternMatch::match(I->getOperand(1), - m_OneUse(m_ZExt(m_Value(X))))) && - X->getType()->isIntegerTy(1))) - return SelectInfo.end(); - break; - } - case Instruction::Or: - if (BO->getType()->isIntegerTy(1) || BO->getOpcode() != Instruction::Or) - return SelectInfo.end(); - break; + Value *Val; + ConstantInt *Shift; + if (match(I, m_Shr(m_Value(Val), m_ConstantInt(Shift))) && + I->getType()->getIntegerBitWidth() == Shift->getZExtValue() + 1) { + for (auto *CmpI : SeenCmp) { + auto Pred = CmpI->getPredicate(); + if (Val != CmpI->getOperand(0)) + continue; + if ((Pred == CmpInst::ICMP_SGT && + match(CmpI->getOperand(1), m_ConstantInt<-1>())) || + (Pred == CmpInst::ICMP_SGE && + match(CmpI->getOperand(1), m_Zero())) || + (Pred == CmpInst::ICMP_SLT && + match(CmpI->getOperand(1), m_Zero())) || + (Pred == CmpInst::ICMP_SLE && + match(CmpI->getOperand(1), m_ConstantInt<-1>()))) { + bool Inverted = + Pred == CmpInst::ICMP_SGT || Pred == CmpInst::ICMP_SGE; + return SelectInfo.insert({I, {CmpI, true, Inverted, 0}}).first; + } } + return SelectInfo.end(); + } - for (unsigned Idx = 0; Idx < 2; Idx++) { - auto *Op = BO->getOperand(Idx); + // An BinOp(Aux(X), Y) can also be treated like a select, with condition X + // and values Y|1 and Y. + // `Aux` can be either `ZExt(1bit)`, `SExt(1bit)` or `XShr(Val), ValBitSize + // - 1` `BinOp` can be Add, Sub, Or + Value *X; + auto MatchZExtOrSExtPattern = + m_c_BinOp(m_Value(), m_OneUse(m_ZExtOrSExt(m_Value(X)))); + auto MatchShiftPattern = + m_c_BinOp(m_Value(), m_OneUse(m_Shr(m_Value(X), m_ConstantInt(Shift)))); + + // This check is unnecessary, but it prevents costly access to the + // SelectInfo map. + if ((match(I, MatchZExtOrSExtPattern) && X->getType()->isIntegerTy(1)) || + (match(I, MatchShiftPattern) && + X->getType()->getIntegerBitWidth() == Shift->getZExtValue() + 1)) { + if (I->getOpcode() != Instruction::Add && + I->getOpcode() != Instruction::Sub && + I->getOpcode() != Instruction::Or) + return SelectInfo.end(); + + if (I->getOpcode() == Instruction::Or && I->getType()->isIntegerTy(1)) + return SelectInfo.end(); + + // Iterate through operands and find dependant on recognised sign + // extending auxiliary select-like instructions. The operand index does + // not matter for Add and Or. However, for Sub, we can only safely + // transform when the operand is second. + unsigned Idx = I->getOpcode() == Instruction::Sub ? 1 : 0; + for (; Idx < 2; Idx++) { + auto *Op = I->getOperand(Idx); auto It = SelectInfo.find(Op); if (It != SelectInfo.end() && It->second.IsAuxiliary) { Cond = It->second.Cond; @@ -994,6 +1044,18 @@ bool SelectOptimizeImpl::isConvertToBranchProfitableBase( return true; } + // If latch has a select group with several elements, it is usually profitable + // to convert it to branches. We let `optimizeSelectsInnerLoops` decide if + // conversion is profitable for innermost loops. + auto *BB = SI.getI()->getParent(); + auto *L = LI->getLoopFor(BB); + if (L && !L->isInnermost() && L->getLoopLatch() == BB && + ASI.Selects.size() >= 3) { + OR << "Converted to branch because select group in the latch block is big."; + EmitAndPrintRemark(ORE, OR); + return true; + } + ORmiss << "Not profitable to convert to branch (base heuristic)."; EmitAndPrintRemark(ORE, ORmiss); return false; diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 48018ac29bd0..9ec3310b5219 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -138,6 +138,11 @@ static cl::opt<bool> EnableReduceLoadOpStoreWidth( "combiner-reduce-load-op-store-width", cl::Hidden, cl::init(true), cl::desc("DAG combiner enable reducing the width of load/op/store " "sequence")); +static cl::opt<bool> ReduceLoadOpStoreWidthForceNarrowingProfitable( + "combiner-reduce-load-op-store-width-force-narrowing-profitable", + cl::Hidden, cl::init(false), + cl::desc("DAG combiner force override the narrowing profitable check when" + "reducing the width of load/op/store sequences")); static cl::opt<bool> EnableShrinkLoadReplaceStoreWithStore( "combiner-shrink-load-replace-store-with-store", cl::Hidden, cl::init(true), @@ -148,12 +153,6 @@ static cl::opt<bool> EnableVectorFCopySignExtendRound( "combiner-vector-fcopysign-extend-round", cl::Hidden, cl::init(false), cl::desc( "Enable merging extends and rounds into FCOPYSIGN on vector types")); - -static cl::opt<unsigned int> - MaxSteps("has-predecessor-max-steps", cl::Hidden, cl::init(8192), - cl::desc("DAG combiner limit number of steps when searching DAG " - "for predecessor nodes")); - namespace { class DAGCombiner { @@ -203,7 +202,7 @@ namespace { /// When an instruction is simplified, add all users of the instruction to /// the work lists because they might get more simplified now. void AddUsersToWorklist(SDNode *N) { - for (SDNode *Node : N->uses()) + for (SDNode *Node : N->users()) AddToWorklist(Node); } @@ -1114,7 +1113,7 @@ bool DAGCombiner::reassociationCanBreakAddressingModePattern(unsigned Opc, : N1.getConstantOperandVal(1))); if (Opc == ISD::SUB) ScalableOffset = -ScalableOffset; - if (all_of(N->uses(), [&](SDNode *Node) { + if (all_of(N->users(), [&](SDNode *Node) { if (auto *LoadStore = dyn_cast<MemSDNode>(Node); LoadStore && LoadStore->getBasePtr().getNode() == N) { TargetLoweringBase::AddrMode AM; @@ -1152,7 +1151,7 @@ bool DAGCombiner::reassociationCanBreakAddressingModePattern(unsigned Opc, return false; const int64_t CombinedValue = CombinedValueIntVal.getSExtValue(); - for (SDNode *Node : N->uses()) { + for (SDNode *Node : N->users()) { if (auto *LoadStore = dyn_cast<MemSDNode>(Node)) { // Is x[offset2] already not a legal addressing mode? If so then // reassociating the constants breaks nothing (we test offset2 because @@ -1177,7 +1176,7 @@ bool DAGCombiner::reassociationCanBreakAddressingModePattern(unsigned Opc, if (GA->getOpcode() == ISD::GlobalAddress && TLI.isOffsetFoldingLegal(GA)) return false; - for (SDNode *Node : N->uses()) { + for (SDNode *Node : N->users()) { auto *LoadStore = dyn_cast<MemSDNode>(Node); if (!LoadStore) return false; @@ -2137,8 +2136,8 @@ SDValue DAGCombiner::visitTokenFactor(SDNode *N) { // If the sole user is a token factor, we should make sure we have a // chance to merge them together. This prevents TF chains from inhibiting // optimizations. - if (N->hasOneUse() && N->use_begin()->getOpcode() == ISD::TokenFactor) - AddToWorklist(*(N->use_begin())); + if (N->hasOneUse() && N->user_begin()->getOpcode() == ISD::TokenFactor) + AddToWorklist(*(N->user_begin())); SmallVector<SDNode *, 8> TFs; // List of token factors to visit. SmallVector<SDValue, 8> Ops; // Ops for replacing token factor. @@ -3950,6 +3949,23 @@ SDValue DAGCombiner::visitSUB(SDNode *N) { if (SDValue Result = TLI.expandABS(N1.getNode(), DAG, true)) return Result; + // Similar to the previous rule, but this time targeting an expanded abs. + // (sub 0, (max X, (sub 0, X))) --> (min X, (sub 0, X)) + // as well as + // (sub 0, (min X, (sub 0, X))) --> (max X, (sub 0, X)) + // Note that these two are applicable to both signed and unsigned min/max. + SDValue X; + SDValue S0; + auto NegPat = m_AllOf(m_Neg(m_Deferred(X)), m_Value(S0)); + if (sd_match(N1, m_OneUse(m_AnyOf(m_SMax(m_Value(X), NegPat), + m_UMax(m_Value(X), NegPat), + m_SMin(m_Value(X), NegPat), + m_UMin(m_Value(X), NegPat))))) { + unsigned NewOpc = ISD::getInverseMinMaxOpcode(N1->getOpcode()); + if (hasOperation(NewOpc, VT)) + return DAG.getNode(NewOpc, DL, VT, X, S0); + } + // Fold neg(splat(neg(x)) -> splat(x) if (VT.isVector()) { SDValue N1S = DAG.getSplatValue(N1, true); @@ -4721,7 +4737,7 @@ SDValue DAGCombiner::useDivRem(SDNode *Node) { SDValue Op0 = Node->getOperand(0); SDValue Op1 = Node->getOperand(1); SDValue combined; - for (SDNode *User : Op0->uses()) { + for (SDNode *User : Op0->users()) { if (User == Node || User->getOpcode() == ISD::DELETED_NODE || User->use_empty()) continue; @@ -10233,7 +10249,7 @@ SDValue DAGCombiner::visitSHL(SDNode *N) { // Variant of version done on multiply, except mul by a power of 2 is turned // into a shift. if ((N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::OR) && - N0->hasOneUse() && TLI.isDesirableToCommuteWithShift(N, Level)) { + TLI.isDesirableToCommuteWithShift(N, Level)) { SDValue N01 = N0.getOperand(1); if (SDValue Shl1 = DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N1), VT, {N01, N1})) { @@ -10252,8 +10268,7 @@ SDValue DAGCombiner::visitSHL(SDNode *N) { // TODO: Should we limit this with isLegalAddImmediate? if (N0.getOpcode() == ISD::SIGN_EXTEND && N0.getOperand(0).getOpcode() == ISD::ADD && - N0.getOperand(0)->getFlags().hasNoSignedWrap() && N0->hasOneUse() && - N0.getOperand(0)->hasOneUse() && + N0.getOperand(0)->getFlags().hasNoSignedWrap() && TLI.isDesirableToCommuteWithShift(N, Level)) { SDValue Add = N0.getOperand(0); SDLoc DL(N0); @@ -10371,7 +10386,7 @@ static SDValue combineShiftToMULH(SDNode *N, const SDLoc &DL, SelectionDAG &DAG, unsigned MulLoHiOp = IsSignExt ? ISD::SMUL_LOHI : ISD::UMUL_LOHI; if (!ShiftOperand.hasOneUse() && TLI.isOperationLegalOrCustom(MulLoHiOp, NarrowVT) && - llvm::any_of(ShiftOperand->uses(), UserOfLowerBits)) { + llvm::any_of(ShiftOperand->users(), UserOfLowerBits)) { return SDValue(); } @@ -10908,15 +10923,15 @@ SDValue DAGCombiner::visitSRL(SDNode *N) { // which we plan to do. This workaround can be removed once the DAG is // processed in topological order. if (N->hasOneUse()) { - SDNode *Use = *N->use_begin(); + SDNode *User = *N->user_begin(); // Look pass the truncate. - if (Use->getOpcode() == ISD::TRUNCATE && Use->hasOneUse()) - Use = *Use->use_begin(); + if (User->getOpcode() == ISD::TRUNCATE && User->hasOneUse()) + User = *User->user_begin(); - if (Use->getOpcode() == ISD::BRCOND || Use->getOpcode() == ISD::AND || - Use->getOpcode() == ISD::OR || Use->getOpcode() == ISD::XOR) - AddToWorklist(Use); + if (User->getOpcode() == ISD::BRCOND || User->getOpcode() == ISD::AND || + User->getOpcode() == ISD::OR || User->getOpcode() == ISD::XOR) + AddToWorklist(User); } // Try to transform this shift into a multiply-high if @@ -12919,7 +12934,7 @@ SDValue DAGCombiner::visitSETCC(SDNode *N) { // also lend itself to numerous combines and, as a result, it is desired // we keep the argument to a brcond as a setcc as much as possible. bool PreferSetCC = - N->hasOneUse() && N->use_begin()->getOpcode() == ISD::BRCOND; + N->hasOneUse() && N->user_begin()->getOpcode() == ISD::BRCOND; ISD::CondCode Cond = cast<CondCodeSDNode>(N->getOperand(2))->get(); EVT VT = N->getValueType(0); @@ -13231,12 +13246,11 @@ static bool ExtendUsesToFormExtLoad(EVT VT, SDNode *N, SDValue N0, const TargetLowering &TLI) { bool HasCopyToRegUses = false; bool isTruncFree = TLI.isTruncateFree(VT, N0.getValueType()); - for (SDNode::use_iterator UI = N0->use_begin(), UE = N0->use_end(); UI != UE; - ++UI) { - SDNode *User = *UI; + for (SDUse &Use : N0->uses()) { + SDNode *User = Use.getUser(); if (User == N) continue; - if (UI.getUse().getResNo() != N0.getResNo()) + if (Use.getResNo() != N0.getResNo()) continue; // FIXME: Only extend SETCC N, N and SETCC N, c for now. if (ExtOpc != ISD::ANY_EXTEND && User->getOpcode() == ISD::SETCC) { @@ -13268,9 +13282,7 @@ static bool ExtendUsesToFormExtLoad(EVT VT, SDNode *N, SDValue N0, if (HasCopyToRegUses) { bool BothLiveOut = false; - for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end(); - UI != UE; ++UI) { - SDUse &Use = UI.getUse(); + for (SDUse &Use : N->uses()) { if (Use.getResNo() == 0 && Use.getUser()->getOpcode() == ISD::CopyToReg) { BothLiveOut = true; break; @@ -13572,7 +13584,7 @@ static SDValue tryToFoldExtOfLoad(SelectionDAG &DAG, DAGCombiner &Combiner, if (NonNegZExt) { assert(ExtLoadType == ISD::ZEXTLOAD && ExtOpc == ISD::ZERO_EXTEND && "Unexpected load type or opcode"); - for (SDNode *User : N0->uses()) { + for (SDNode *User : N0->users()) { if (User->getOpcode() == ISD::SETCC) { ISD::CondCode CC = cast<CondCodeSDNode>(User->getOperand(2))->get(); if (ISD::isSignedIntSetCC(CC)) { @@ -13782,11 +13794,10 @@ SDValue DAGCombiner::foldSextSetcc(SDNode *N) { // Non-chain users of this value must either be the setcc in this // sequence or extends that can be folded into the new {z/s}ext-load. - for (SDNode::use_iterator UI = V->use_begin(), UE = V->use_end(); - UI != UE; ++UI) { + for (SDUse &Use : V->uses()) { // Skip uses of the chain and the setcc. - SDNode *User = *UI; - if (UI.getUse().getResNo() != 0 || User == N0.getNode()) + SDNode *User = Use.getUser(); + if (Use.getResNo() != 0 || User == N0.getNode()) continue; // Extra users must have exactly the same cast we are about to create. // TODO: This restriction could be eased if ExtendUsesToFormExtLoad() @@ -14827,7 +14838,7 @@ SDValue DAGCombiner::reduceLoadWidth(SDNode *N) { // If the SRL is only used by a masking AND, we may be able to adjust // the ExtVT to make the AND redundant. - SDNode *Mask = *(SRL->use_begin()); + SDNode *Mask = *(SRL->user_begin()); if (SRL.hasOneUse() && Mask->getOpcode() == ISD::AND && isa<ConstantSDNode>(Mask->getOperand(1))) { unsigned Offset, ActiveBits; @@ -15366,7 +15377,7 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) { } // If this is anyext(trunc), don't fold it, allow ourselves to be folded. - if (N->hasOneUse() && (N->use_begin()->getOpcode() == ISD::ANY_EXTEND)) + if (N->hasOneUse() && (N->user_begin()->getOpcode() == ISD::ANY_EXTEND)) return SDValue(); // Fold extract-and-trunc into a narrow extract. For example: @@ -16092,7 +16103,7 @@ SDValue DAGCombiner::visitFREEZE(SDNode *N) { if (N0.getOpcode() == ISD::BUILD_VECTOR) { SDLoc DL(N0); EVT VT = N0.getValueType(); - if (llvm::ISD::isBuildVectorAllOnes(N0.getNode())) + if (llvm::ISD::isBuildVectorAllOnes(N0.getNode()) && VT.isInteger()) return DAG.getAllOnesConstant(DL, VT); if (llvm::ISD::isBuildVectorOfConstantSDNodes(N0.getNode())) { SmallVector<SDValue, 8> NewVecC; @@ -17675,7 +17686,7 @@ SDValue DAGCombiner::combineRepeatedFPDivisors(SDNode *N) { // Find all FDIV users of the same divisor. // Use a set because duplicates may be present in the user list. SetVector<SDNode *> Users; - for (auto *U : N1->uses()) { + for (auto *U : N1->users()) { if (U->getOpcode() == ISD::FDIV && U->getOperand(1) == N1) { // Skip X/sqrt(X) that has not been simplified to sqrt(X) yet. if (U->getOperand(1).getOpcode() == ISD::FSQRT && @@ -18372,7 +18383,7 @@ SDValue DAGCombiner::visitFP_EXTEND(SDNode *N) { return FoldedVOp; // If this is fp_round(fpextend), don't fold it, allow ourselves to be folded. - if (N->hasOneUse() && N->use_begin()->getOpcode() == ISD::FP_ROUND) + if (N->hasOneUse() && N->user_begin()->getOpcode() == ISD::FP_ROUND) return SDValue(); // fold (fp_extend c1fp) -> c1fp @@ -18763,9 +18774,13 @@ SDValue DAGCombiner::rebuildSetCC(SDValue N) { EVT SetCCVT = N.getValueType(); if (LegalTypes) SetCCVT = getSetCCResultType(SetCCVT); - // Replace the uses of XOR with SETCC - return DAG.getSetCC(SDLoc(N), SetCCVT, Op0, Op1, - Equal ? ISD::SETEQ : ISD::SETNE); + // Replace the uses of XOR with SETCC. Note, avoid this transformation if + // it would introduce illegal operations post-legalization as this can + // result in infinite looping between converting xor->setcc here, and + // expanding setcc->xor in LegalizeSetCCCondCode if requested. + const ISD::CondCode CC = Equal ? ISD::SETEQ : ISD::SETNE; + if (!LegalOperations || TLI.isCondCodeLegal(CC, Op0.getSimpleValueType())) + return DAG.getSetCC(SDLoc(N), SetCCVT, Op0, Op1, CC); } } @@ -18921,11 +18936,9 @@ bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) { // can be folded with this one. We should do this to avoid having to keep // a copy of the original base pointer. SmallVector<SDNode *, 16> OtherUses; + unsigned MaxSteps = SelectionDAG::getHasPredecessorMaxSteps(); if (isa<ConstantSDNode>(Offset)) - for (SDNode::use_iterator UI = BasePtr->use_begin(), - UE = BasePtr->use_end(); - UI != UE; ++UI) { - SDUse &Use = UI.getUse(); + for (SDUse &Use : BasePtr->uses()) { // Skip the use that is Ptr and uses of other results from BasePtr's // node (important for nodes that return multiple results). if (Use.getUser() == Ptr.getNode() || Use != BasePtr) @@ -18941,7 +18954,7 @@ bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) { break; } - SDValue Op1 = Use.getUser()->getOperand((UI.getOperandNo() + 1) & 1); + SDValue Op1 = Use.getUser()->getOperand((Use.getOperandNo() + 1) & 1); if (!isa<ConstantSDNode>(Op1)) { OtherUses.clear(); break; @@ -18962,15 +18975,15 @@ bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) { // Now check for #3 and #4. bool RealUse = false; - for (SDNode *Use : Ptr->uses()) { - if (Use == N) + for (SDNode *User : Ptr->users()) { + if (User == N) continue; - if (SDNode::hasPredecessorHelper(Use, Visited, Worklist, MaxSteps)) + if (SDNode::hasPredecessorHelper(User, Visited, Worklist, MaxSteps)) return false; // If Ptr may be folded in addressing mode of other use, then it's // not profitable to do this transformation. - if (!canFoldInAddressingMode(Ptr.getNode(), Use, DAG, TLI)) + if (!canFoldInAddressingMode(Ptr.getNode(), User, DAG, TLI)) RealUse = true; } @@ -19085,19 +19098,20 @@ static bool shouldCombineToPostInc(SDNode *N, SDValue Ptr, SDNode *PtrUse, return false; SmallPtrSet<const SDNode *, 32> Visited; - for (SDNode *Use : BasePtr->uses()) { - if (Use == Ptr.getNode()) + unsigned MaxSteps = SelectionDAG::getHasPredecessorMaxSteps(); + for (SDNode *User : BasePtr->users()) { + if (User == Ptr.getNode()) continue; // No if there's a later user which could perform the index instead. - if (isa<MemSDNode>(Use)) { + if (isa<MemSDNode>(User)) { bool IsLoad = true; bool IsMasked = false; SDValue OtherPtr; - if (getCombineLoadStoreParts(Use, ISD::POST_INC, ISD::POST_DEC, IsLoad, + if (getCombineLoadStoreParts(User, ISD::POST_INC, ISD::POST_DEC, IsLoad, IsMasked, OtherPtr, TLI)) { SmallVector<const SDNode *, 2> Worklist; - Worklist.push_back(Use); + Worklist.push_back(User); if (SDNode::hasPredecessorHelper(N, Visited, Worklist, MaxSteps)) return false; } @@ -19105,9 +19119,9 @@ static bool shouldCombineToPostInc(SDNode *N, SDValue Ptr, SDNode *PtrUse, // If all the uses are load / store addresses, then don't do the // transformation. - if (Use->getOpcode() == ISD::ADD || Use->getOpcode() == ISD::SUB) { - for (SDNode *UseUse : Use->uses()) - if (canFoldInAddressingMode(Use, UseUse, DAG, TLI)) + if (User->getOpcode() == ISD::ADD || User->getOpcode() == ISD::SUB) { + for (SDNode *UserUser : User->users()) + if (canFoldInAddressingMode(User, UserUser, DAG, TLI)) return false; } } @@ -19131,7 +19145,8 @@ static SDNode *getPostIndexedLoadStoreOp(SDNode *N, bool &IsLoad, // 2) Op must be independent of N, i.e. Op is neither a predecessor // nor a successor of N. Otherwise, if Op is folded that would // create a cycle. - for (SDNode *Op : Ptr->uses()) { + unsigned MaxSteps = SelectionDAG::getHasPredecessorMaxSteps(); + for (SDNode *Op : Ptr->users()) { // Check for #1. if (!shouldCombineToPostInc(N, Ptr, Op, BasePtr, Offset, AM, DAG, TLI)) continue; @@ -19842,17 +19857,17 @@ struct LoadedSlice { bool canMergeExpensiveCrossRegisterBankCopy() const { if (!Inst || !Inst->hasOneUse()) return false; - SDNode *Use = *Inst->use_begin(); - if (Use->getOpcode() != ISD::BITCAST) + SDNode *User = *Inst->user_begin(); + if (User->getOpcode() != ISD::BITCAST) return false; assert(DAG && "Missing context"); const TargetLowering &TLI = DAG->getTargetLoweringInfo(); - EVT ResVT = Use->getValueType(0); + EVT ResVT = User->getValueType(0); const TargetRegisterClass *ResRC = - TLI.getRegClassFor(ResVT.getSimpleVT(), Use->isDivergent()); + TLI.getRegClassFor(ResVT.getSimpleVT(), User->isDivergent()); const TargetRegisterClass *ArgRC = - TLI.getRegClassFor(Use->getOperand(0).getValueType().getSimpleVT(), - Use->getOperand(0)->isDivergent()); + TLI.getRegClassFor(User->getOperand(0).getValueType().getSimpleVT(), + User->getOperand(0)->isDivergent()); if (ArgRC == ResRC || !TLI.isOperationLegal(ISD::LOAD, ResVT)) return false; @@ -20051,20 +20066,19 @@ bool DAGCombiner::SliceUpLoad(SDNode *N) { // Check if this load is used as several smaller chunks of bits. // Basically, look for uses in trunc or trunc(lshr) and record a new chain // of computation for each trunc. - for (SDNode::use_iterator UI = LD->use_begin(), UIEnd = LD->use_end(); - UI != UIEnd; ++UI) { + for (SDUse &U : LD->uses()) { // Skip the uses of the chain. - if (UI.getUse().getResNo() != 0) + if (U.getResNo() != 0) continue; - SDNode *User = *UI; + SDNode *User = U.getUser(); unsigned Shift = 0; // Check if this is a trunc(lshr). if (User->getOpcode() == ISD::SRL && User->hasOneUse() && isa<ConstantSDNode>(User->getOperand(1))) { Shift = User->getConstantOperandVal(1); - User = *User->use_begin(); + User = *User->user_begin(); } // At this point, User is a Truncate, iff we encountered, trunc or @@ -20334,74 +20348,98 @@ SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) { ST->getPointerInfo().getAddrSpace()) return SDValue(); - // Find the type to narrow it the load / op / store to. + // Find the type NewVT to narrow the load / op / store to. SDValue N1 = Value.getOperand(1); unsigned BitWidth = N1.getValueSizeInBits(); APInt Imm = N1->getAsAPIntVal(); if (Opc == ISD::AND) - Imm ^= APInt::getAllOnes(BitWidth); + Imm.flipAllBits(); if (Imm == 0 || Imm.isAllOnes()) return SDValue(); - unsigned ShAmt = Imm.countr_zero(); - unsigned MSB = BitWidth - Imm.countl_zero() - 1; - unsigned NewBW = NextPowerOf2(MSB - ShAmt); + // Find least/most significant bit that need to be part of the narrowed + // operation. We assume target will need to address/access full bytes, so + // we make sure to align LSB and MSB at byte boundaries. + unsigned BitsPerByteMask = 7u; + unsigned LSB = Imm.countr_zero() & ~BitsPerByteMask; + unsigned MSB = (Imm.getActiveBits() - 1) | BitsPerByteMask; + unsigned NewBW = NextPowerOf2(MSB - LSB); EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), NewBW); // The narrowing should be profitable, the load/store operation should be // legal (or custom) and the store size should be equal to the NewVT width. - while (NewBW < BitWidth && (NewVT.getStoreSizeInBits() != NewBW || - !TLI.isOperationLegalOrCustom(Opc, NewVT) || - !TLI.isNarrowingProfitable(N, VT, NewVT))) { + while (NewBW < BitWidth && + (NewVT.getStoreSizeInBits() != NewBW || + !TLI.isOperationLegalOrCustom(Opc, NewVT) || + (!ReduceLoadOpStoreWidthForceNarrowingProfitable && + !TLI.isNarrowingProfitable(N, VT, NewVT)))) { NewBW = NextPowerOf2(NewBW); NewVT = EVT::getIntegerVT(*DAG.getContext(), NewBW); } if (NewBW >= BitWidth) return SDValue(); - // If the lsb changed does not start at the type bitwidth boundary, - // start at the previous one. - if (ShAmt % NewBW) - ShAmt = (((ShAmt + NewBW - 1) / NewBW) * NewBW) - NewBW; - APInt Mask = APInt::getBitsSet(BitWidth, ShAmt, - std::min(BitWidth, ShAmt + NewBW)); - if ((Imm & Mask) == Imm) { - APInt NewImm = (Imm & Mask).lshr(ShAmt).trunc(NewBW); - if (Opc == ISD::AND) - NewImm ^= APInt::getAllOnes(NewBW); - uint64_t PtrOff = ShAmt / 8; - // For big endian targets, we need to adjust the offset to the pointer to - // load the correct bytes. - if (DAG.getDataLayout().isBigEndian()) - PtrOff = (BitWidth + 7 - NewBW) / 8 - PtrOff; + // If we come this far NewVT/NewBW reflect a power-of-2 sized type that is + // large enough to cover all bits that should be modified. This type might + // however be larger than really needed (such as i32 while we actually only + // need to modify one byte). Now we need to find our how to align the memory + // accesses to satisfy preferred alignments as well as avoiding to access + // memory outside the store size of the orignal access. + + unsigned VTStoreSize = VT.getStoreSizeInBits().getFixedValue(); + + // Let ShAmt denote amount of bits to skip, counted from the least + // significant bits of Imm. And let PtrOff how much the pointer needs to be + // offsetted (in bytes) for the new access. + unsigned ShAmt = 0; + uint64_t PtrOff = 0; + for (; ShAmt + NewBW <= VTStoreSize; ShAmt += 8) { + // Make sure the range [ShAmt, ShAmt+NewBW) cover both LSB and MSB. + if (ShAmt > LSB) + return SDValue(); + if (ShAmt + NewBW < MSB) + continue; + + // Calculate PtrOff. + unsigned PtrAdjustmentInBits = DAG.getDataLayout().isBigEndian() + ? VTStoreSize - NewBW - ShAmt + : ShAmt; + PtrOff = PtrAdjustmentInBits / 8; + // Now check if narrow access is allowed and fast, considering alignments. unsigned IsFast = 0; Align NewAlign = commonAlignment(LD->getAlign(), PtrOff); - if (!TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), NewVT, - LD->getAddressSpace(), NewAlign, - LD->getMemOperand()->getFlags(), &IsFast) || - !IsFast) - return SDValue(); - - SDValue NewPtr = - DAG.getMemBasePlusOffset(Ptr, TypeSize::getFixed(PtrOff), SDLoc(LD)); - SDValue NewLD = - DAG.getLoad(NewVT, SDLoc(N0), LD->getChain(), NewPtr, - LD->getPointerInfo().getWithOffset(PtrOff), NewAlign, - LD->getMemOperand()->getFlags(), LD->getAAInfo()); - SDValue NewVal = DAG.getNode(Opc, SDLoc(Value), NewVT, NewLD, - DAG.getConstant(NewImm, SDLoc(Value), - NewVT)); - SDValue NewST = - DAG.getStore(Chain, SDLoc(N), NewVal, NewPtr, - ST->getPointerInfo().getWithOffset(PtrOff), NewAlign); - - AddToWorklist(NewPtr.getNode()); - AddToWorklist(NewLD.getNode()); - AddToWorklist(NewVal.getNode()); - WorklistRemover DeadNodes(*this); - DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), NewLD.getValue(1)); - ++OpsNarrowed; - return NewST; + if (TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), NewVT, + LD->getAddressSpace(), NewAlign, + LD->getMemOperand()->getFlags(), &IsFast) && + IsFast) + break; } + // If loop above did not find any accepted ShAmt we need to exit here. + if (ShAmt + NewBW > VTStoreSize) + return SDValue(); + + APInt NewImm = Imm.lshr(ShAmt).trunc(NewBW); + if (Opc == ISD::AND) + NewImm.flipAllBits(); + Align NewAlign = commonAlignment(LD->getAlign(), PtrOff); + SDValue NewPtr = + DAG.getMemBasePlusOffset(Ptr, TypeSize::getFixed(PtrOff), SDLoc(LD)); + SDValue NewLD = + DAG.getLoad(NewVT, SDLoc(N0), LD->getChain(), NewPtr, + LD->getPointerInfo().getWithOffset(PtrOff), NewAlign, + LD->getMemOperand()->getFlags(), LD->getAAInfo()); + SDValue NewVal = DAG.getNode(Opc, SDLoc(Value), NewVT, NewLD, + DAG.getConstant(NewImm, SDLoc(Value), NewVT)); + SDValue NewST = + DAG.getStore(Chain, SDLoc(N), NewVal, NewPtr, + ST->getPointerInfo().getWithOffset(PtrOff), NewAlign); + + AddToWorklist(NewPtr.getNode()); + AddToWorklist(NewLD.getNode()); + AddToWorklist(NewVal.getNode()); + WorklistRemover DeadNodes(*this); + DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), NewLD.getValue(1)); + ++OpsNarrowed; + return NewST; } return SDValue(); @@ -20486,24 +20524,24 @@ bool DAGCombiner::isMulAddWithConstProfitable(SDNode *MulNode, SDValue AddNode, return true; // Walk all the users of the constant with which we're multiplying. - for (SDNode *Use : ConstNode->uses()) { - if (Use == MulNode) // This use is the one we're on right now. Skip it. + for (SDNode *User : ConstNode->users()) { + if (User == MulNode) // This use is the one we're on right now. Skip it. continue; - if (Use->getOpcode() == ISD::MUL) { // We have another multiply use. + if (User->getOpcode() == ISD::MUL) { // We have another multiply use. SDNode *OtherOp; SDNode *MulVar = AddNode.getOperand(0).getNode(); // OtherOp is what we're multiplying against the constant. - if (Use->getOperand(0) == ConstNode) - OtherOp = Use->getOperand(1).getNode(); + if (User->getOperand(0) == ConstNode) + OtherOp = User->getOperand(1).getNode(); else - OtherOp = Use->getOperand(0).getNode(); + OtherOp = User->getOperand(0).getNode(); // Check to see if multiply is with the same operand of our "add". // // ConstNode = CONST - // Use = ConstNode * A <-- visiting Use. OtherOp is A. + // User = ConstNode * A <-- visiting User. OtherOp is A. // ... // AddNode = (A + c1) <-- MulVar is A. // = AddNode * ConstNode <-- current visiting instruction. @@ -20521,7 +20559,7 @@ bool DAGCombiner::isMulAddWithConstProfitable(SDNode *MulNode, SDValue AddNode, // ... = AddNode * ConstNode <-- current visiting instruction. // ... // OtherOp = (A + c2) - // Use = OtherOp * ConstNode <-- visiting Use. + // User = OtherOp * ConstNode <-- visiting User. // // If we make this transformation, we will have a common // multiply (CONST * A) after we also do the same transformation @@ -20907,11 +20945,11 @@ DAGCombiner::getStoreMergeCandidates(StoreSDNode *St, RootCount->second.second > StoreMergeDependenceLimit; }; - auto TryToAddCandidate = [&](SDNode::use_iterator UseIter) { + auto TryToAddCandidate = [&](SDUse &Use) { // This must be a chain use. - if (UseIter.getOperandNo() != 0) + if (Use.getOperandNo() != 0) return; - if (auto *OtherStore = dyn_cast<StoreSDNode>(*UseIter)) { + if (auto *OtherStore = dyn_cast<StoreSDNode>(Use.getUser())) { BaseIndexOffset Ptr; int64_t PtrDiff; if (CandidateMatch(OtherStore, Ptr, PtrDiff) && @@ -20929,19 +20967,20 @@ DAGCombiner::getStoreMergeCandidates(StoreSDNode *St, return nullptr; for (auto I = RootNode->use_begin(), E = RootNode->use_end(); I != E && NumNodesExplored < MaxSearchNodes; ++I, ++NumNodesExplored) { - if (I.getOperandNo() == 0 && isa<LoadSDNode>(*I)) { // walk down chain - for (auto I2 = (*I)->use_begin(), E2 = (*I)->use_end(); I2 != E2; ++I2) - TryToAddCandidate(I2); + SDNode *User = I->getUser(); + if (I->getOperandNo() == 0 && isa<LoadSDNode>(User)) { // walk down chain + for (SDUse &U2 : User->uses()) + TryToAddCandidate(U2); } // Check stores that depend on the root (e.g. Store 3 in the chart above). - if (I.getOperandNo() == 0 && isa<StoreSDNode>(*I)) { - TryToAddCandidate(I); + if (I->getOperandNo() == 0 && isa<StoreSDNode>(User)) { + TryToAddCandidate(*I); } } } else { for (auto I = RootNode->use_begin(), E = RootNode->use_end(); I != E && NumNodesExplored < MaxSearchNodes; ++I, ++NumNodesExplored) - TryToAddCandidate(I); + TryToAddCandidate(*I); } return RootNode; @@ -22751,16 +22790,22 @@ SDValue DAGCombiner::scalarizeExtractedVectorLoad(SDNode *EVE, EVT InVecVT, /// Transform a vector binary operation into a scalar binary operation by moving /// the math/logic after an extract element of a vector. -static SDValue scalarizeExtractedBinop(SDNode *ExtElt, SelectionDAG &DAG, - const SDLoc &DL, bool LegalOperations) { +static SDValue scalarizeExtractedBinOp(SDNode *ExtElt, SelectionDAG &DAG, + const SDLoc &DL, bool LegalTypes) { const TargetLowering &TLI = DAG.getTargetLoweringInfo(); SDValue Vec = ExtElt->getOperand(0); SDValue Index = ExtElt->getOperand(1); auto *IndexC = dyn_cast<ConstantSDNode>(Index); - if (!IndexC || !TLI.isBinOp(Vec.getOpcode()) || !Vec.hasOneUse() || + unsigned Opc = Vec.getOpcode(); + if (!IndexC || !Vec.hasOneUse() || (!TLI.isBinOp(Opc) && Opc != ISD::SETCC) || Vec->getNumValues() != 1) return SDValue(); + EVT ResVT = ExtElt->getValueType(0); + if (Opc == ISD::SETCC && + (ResVT != Vec.getValueType().getVectorElementType() || LegalTypes)) + return SDValue(); + // Targets may want to avoid this to prevent an expensive register transfer. if (!TLI.shouldScalarizeBinop(Vec)) return SDValue(); @@ -22771,19 +22816,24 @@ static SDValue scalarizeExtractedBinop(SDNode *ExtElt, SelectionDAG &DAG, SDValue Op0 = Vec.getOperand(0); SDValue Op1 = Vec.getOperand(1); APInt SplatVal; - if (isAnyConstantBuildVector(Op0, true) || - ISD::isConstantSplatVector(Op0.getNode(), SplatVal) || - isAnyConstantBuildVector(Op1, true) || - ISD::isConstantSplatVector(Op1.getNode(), SplatVal)) { - // extractelt (binop X, C), IndexC --> binop (extractelt X, IndexC), C' - // extractelt (binop C, X), IndexC --> binop C', (extractelt X, IndexC) - EVT VT = ExtElt->getValueType(0); - SDValue Ext0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Op0, Index); - SDValue Ext1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Op1, Index); - return DAG.getNode(Vec.getOpcode(), DL, VT, Ext0, Ext1); - } + if (!isAnyConstantBuildVector(Op0, true) && + !ISD::isConstantSplatVector(Op0.getNode(), SplatVal) && + !isAnyConstantBuildVector(Op1, true) && + !ISD::isConstantSplatVector(Op1.getNode(), SplatVal)) + return SDValue(); - return SDValue(); + // extractelt (op X, C), IndexC --> op (extractelt X, IndexC), C' + // extractelt (op C, X), IndexC --> op C', (extractelt X, IndexC) + if (Opc == ISD::SETCC) { + EVT OpVT = Op0.getValueType().getVectorElementType(); + Op0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, OpVT, Op0, Index); + Op1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, OpVT, Op1, Index); + return DAG.getSetCC(DL, ResVT, Op0, Op1, + cast<CondCodeSDNode>(Vec->getOperand(2))->get()); + } + Op0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ResVT, Op0, Index); + Op1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ResVT, Op1, Index); + return DAG.getNode(Opc, DL, ResVT, Op0, Op1); } // Given a ISD::EXTRACT_VECTOR_ELT, which is a glorified bit sequence extract, @@ -22862,7 +22912,7 @@ bool DAGCombiner::refineExtractVectorEltIntoMultipleNarrowExtractVectorElts( // Did we fail to model any of the users of the Producer? bool ProducerIsLeaf = false; // Look at each user of this Producer. - for (SDNode *User : E.Producer->uses()) { + for (SDNode *User : E.Producer->users()) { switch (User->getOpcode()) { // TODO: support ISD::BITCAST // TODO: support ISD::ANY_EXTEND @@ -23016,7 +23066,7 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { } } - if (SDValue BO = scalarizeExtractedBinop(N, DAG, DL, LegalOperations)) + if (SDValue BO = scalarizeExtractedBinOp(N, DAG, DL, LegalTypes)) return BO; if (VecVT.isScalableVector()) @@ -23055,8 +23105,11 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { if (ExtractIndex == BCTruncElt && BCSrc.getValueType().isScalarInteger()) return DAG.getAnyExtOrTrunc(BCSrc, DL, ScalarVT); + // TODO: Add support for SCALAR_TO_VECTOR implicit truncation. if (LegalTypes && BCSrc.getValueType().isInteger() && - BCSrc.getOpcode() == ISD::SCALAR_TO_VECTOR) { + BCSrc.getOpcode() == ISD::SCALAR_TO_VECTOR && + BCSrc.getScalarValueSizeInBits() == + BCSrc.getOperand(0).getScalarValueSizeInBits()) { // ext_elt (bitcast (scalar_to_vec i64 X to v2i64) to v4i32), TruncElt --> // trunc i64 X to i32 SDValue X = BCSrc.getOperand(0); @@ -23136,14 +23189,14 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { // If only EXTRACT_VECTOR_ELT nodes use the source vector we can // simplify it based on the (valid) extraction indices. - if (llvm::all_of(VecOp->uses(), [&](SDNode *Use) { + if (llvm::all_of(VecOp->users(), [&](SDNode *Use) { return Use->getOpcode() == ISD::EXTRACT_VECTOR_ELT && Use->getOperand(0) == VecOp && isa<ConstantSDNode>(Use->getOperand(1)); })) { APInt DemandedElts = APInt::getZero(NumElts); - for (SDNode *Use : VecOp->uses()) { - auto *CstElt = cast<ConstantSDNode>(Use->getOperand(1)); + for (SDNode *User : VecOp->users()) { + auto *CstElt = cast<ConstantSDNode>(User->getOperand(1)); if (CstElt->getAPIntValue().ult(NumElts)) DemandedElts.setBit(CstElt->getZExtValue()); } @@ -27262,7 +27315,7 @@ SDValue DAGCombiner::visitGET_FPENV_MEM(SDNode *N) { // Check if the memory, where FP state is written to, is used only in a single // load operation. LoadSDNode *LdNode = nullptr; - for (auto *U : Ptr->uses()) { + for (auto *U : Ptr->users()) { if (U == N) continue; if (auto *Ld = dyn_cast<LoadSDNode>(U)) { @@ -27280,8 +27333,7 @@ SDValue DAGCombiner::visitGET_FPENV_MEM(SDNode *N) { // Check if the loaded value is used only in a store operation. StoreSDNode *StNode = nullptr; - for (auto I = LdNode->use_begin(), E = LdNode->use_end(); I != E; ++I) { - SDUse &U = I.getUse(); + for (SDUse &U : LdNode->uses()) { if (U.getResNo() == 0) { if (auto *St = dyn_cast<StoreSDNode>(U.getUser())) { if (StNode) @@ -27312,7 +27364,7 @@ SDValue DAGCombiner::visitSET_FPENV_MEM(SDNode *N) { // Check if the address of FP state is used also in a store operation only. StoreSDNode *StNode = nullptr; - for (auto *U : Ptr->uses()) { + for (auto *U : Ptr->users()) { if (U == N) continue; if (auto *St = dyn_cast<StoreSDNode>(U)) { diff --git a/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp b/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp index d5551758c073..ec5b058da297 100644 --- a/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp @@ -1078,7 +1078,7 @@ bool FastISel::lowerCallTo(CallLoweringInfo &CLI) { // For ByVal, alignment should come from FE. BE will guess if this info // is not there, but there are cases it cannot get right. if (!MemAlign) - MemAlign = Align(TLI.getByValTypeAlignment(Arg.IndirectType, DL)); + MemAlign = TLI.getByValTypeAlignment(Arg.IndirectType, DL); Flags.setByValSize(FrameSize); } else if (!MemAlign) { MemAlign = DL.getABITypeAlign(Arg.Ty); @@ -1229,7 +1229,7 @@ void FastISel::handleDbgInfo(const Instruction *II) { } if (!Res) - LLVM_DEBUG(dbgs() << "Dropping debug-info for " << DVR << "\n";); + LLVM_DEBUG(dbgs() << "Dropping debug-info for " << DVR << "\n"); } } diff --git a/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp index 9c7085cc7e7a..8e313fb21eed 100644 --- a/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp @@ -105,7 +105,7 @@ void InstrEmitter::EmitCopyFromReg(SDNode *Node, unsigned ResNo, bool IsClone, if (TLI->isTypeLegal(VT)) UseRC = TLI->getRegClassFor(VT, Node->isDivergent()); - for (SDNode *User : Node->uses()) { + for (SDNode *User : Node->users()) { bool Match = true; if (User->getOpcode() == ISD::CopyToReg && User->getOperand(2).getNode() == Node && @@ -225,7 +225,7 @@ void InstrEmitter::CreateVirtualRegisters(SDNode *Node, } if (!VRBase && !IsClone && !IsCloned) - for (SDNode *User : Node->uses()) { + for (SDNode *User : Node->users()) { if (User->getOpcode() == ISD::CopyToReg && User->getOperand(2).getNode() == Node && User->getOperand(2).getResNo() == i) { @@ -502,7 +502,7 @@ void InstrEmitter::EmitSubregNode(SDNode *Node, VRBaseMapType &VRBaseMap, // If the node is only used by a CopyToReg and the dest reg is a vreg, use // the CopyToReg'd destination register instead of creating a new vreg. - for (SDNode *User : Node->uses()) { + for (SDNode *User : Node->users()) { if (User->getOpcode() == ISD::CopyToReg && User->getOperand(2).getNode() == Node) { Register DestReg = cast<RegisterSDNode>(User->getOperand(1))->getReg(); diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index ca87168929f9..595a410101ec 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -1394,7 +1394,7 @@ SDValue SelectionDAGLegalize::ExpandExtractFromVectorThroughStack(SDValue Op) { Visited.insert(Op.getNode()); Worklist.push_back(Idx.getNode()); SDValue StackPtr, Ch; - for (SDNode *User : Vec.getNode()->uses()) { + for (SDNode *User : Vec.getNode()->users()) { if (StoreSDNode *ST = dyn_cast<StoreSDNode>(User)) { if (ST->isIndexed() || ST->isTruncatingStore() || ST->getValue() != Vec) @@ -2293,7 +2293,7 @@ static bool useSinCos(SDNode *Node) { ? ISD::FCOS : ISD::FSIN; SDValue Op0 = Node->getOperand(0); - for (const SDNode *User : Op0.getNode()->uses()) { + for (const SDNode *User : Op0.getNode()->users()) { if (User == Node) continue; // The other user might have been turned into sincos already. diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp index b52c2c07a7fb..71f100bfa034 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp @@ -3415,6 +3415,23 @@ SDValue DAGTypeLegalizer::SoftPromoteHalfRes_FP_ROUND(SDNode *N) { SDValue Op = N->getOperand(IsStrict ? 1 : 0); EVT SVT = Op.getValueType(); + // If the input type needs to be softened, do that now so that call lowering + // will see the f16 type. + if (getTypeAction(SVT) == TargetLowering::TypeSoftenFloat) { + RTLIB::Libcall LC = RTLIB::getFPROUND(SVT, RVT); + assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_ROUND libcall"); + + SDValue Chain = IsStrict ? N->getOperand(0) : SDValue(); + Op = GetSoftenedFloat(Op); + TargetLowering::MakeLibCallOptions CallOptions; + CallOptions.setTypeListBeforeSoften(SVT, RVT, true); + std::pair<SDValue, SDValue> Tmp = + TLI.makeLibCall(DAG, LC, RVT, Op, CallOptions, SDLoc(N), Chain); + if (IsStrict) + ReplaceValueWith(SDValue(N, 1), Tmp.second); + return DAG.getNode(ISD::BITCAST, SDLoc(N), MVT::i16, Tmp.first); + } + if (IsStrict) { SDValue Res = DAG.getNode(GetPromotionOpcodeStrict(SVT, RVT), SDLoc(N), {MVT::i16, MVT::Other}, {N->getOperand(0), Op}); diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp index 986d69e6c7a9..be7521f34168 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -2585,6 +2585,13 @@ SDValue DAGTypeLegalizer::PromoteIntOp_ExpOp(SDNode *N) { : RTLIB::getLDEXP(N->getValueType(0)); if (LC == RTLIB::UNKNOWN_LIBCALL || !TLI.getLibcallName(LC)) { + // Scalarize vector FPOWI instead of promoting the type. This allows the + // scalar FPOWIs to be visited and converted to libcalls before promoting + // the type. + // FIXME: This should be done in LegalizeVectorOps/LegalizeDAG, but call + // lowering needs the unpromoted EVT. + if (IsPowI && N->getValueType(0).isVector()) + return DAG.UnrollVectorOp(N); SmallVector<SDValue, 3> NewOps(N->ops()); NewOps[1 + OpOffset] = SExtPromotedInteger(N->getOperand(1 + OpOffset)); return SDValue(DAG.UpdateNodeOperands(N, NewOps), 0); @@ -2835,6 +2842,7 @@ void DAGTypeLegalizer::ExpandIntegerResult(SDNode *N, unsigned ResNo) { case ISD::SELECT_CC: SplitRes_SELECT_CC(N, Lo, Hi); break; case ISD::UNDEF: SplitRes_UNDEF(N, Lo, Hi); break; case ISD::FREEZE: SplitRes_FREEZE(N, Lo, Hi); break; + case ISD::SETCC: ExpandIntRes_SETCC(N, Lo, Hi); break; case ISD::BITCAST: ExpandRes_BITCAST(N, Lo, Hi); break; case ISD::BUILD_PAIR: ExpandRes_BUILD_PAIR(N, Lo, Hi); break; @@ -3316,6 +3324,20 @@ static std::pair<ISD::CondCode, ISD::NodeType> getExpandedMinMaxOps(int Op) { } } +void DAGTypeLegalizer::ExpandIntRes_SETCC(SDNode *N, SDValue &Lo, SDValue &Hi) { + SDLoc DL(N); + + SDValue LHS = N->getOperand(0); + SDValue RHS = N->getOperand(1); + EVT NewVT = getSetCCResultType(LHS.getValueType()); + + // Taking the same approach as ScalarizeVecRes_SETCC + SDValue Res = DAG.getNode(ISD::SETCC, DL, NewVT, LHS, RHS, N->getOperand(2)); + + Res = DAG.getBoolExtOrTrunc(Res, DL, N->getValueType(0), NewVT); + SplitInteger(Res, Lo, Hi); +} + void DAGTypeLegalizer::ExpandIntRes_MINMAX(SDNode *N, SDValue &Lo, SDValue &Hi) { SDLoc DL(N); diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp index cb6d3fe4db8a..b6abad830c37 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp @@ -88,10 +88,9 @@ void DAGTypeLegalizer::PerformExpensiveChecks() { if (I != ReplacedValues.end()) { Mapped |= 1; // Check that remapped values are only used by nodes marked NewNode. - for (SDNode::use_iterator UI = Node.use_begin(), UE = Node.use_end(); - UI != UE; ++UI) - if (UI.getUse().getResNo() == i) - assert(UI->getNodeId() == NewNode && + for (SDUse &U : Node.uses()) + if (U.getResNo() == i) + assert(U.getUser()->getNodeId() == NewNode && "Remapped value has non-trivial use!"); // Check that the final result of applying ReplacedValues is not @@ -189,7 +188,7 @@ void DAGTypeLegalizer::PerformExpensiveChecks() { #ifndef NDEBUG // Checked that NewNodes are only used by other NewNodes. for (SDNode *N : NewNodes) { - for (SDNode *U : N->uses()) + for (SDNode *U : N->users()) assert(U->getNodeId() == NewNode && "NewNode used by non-NewNode!"); } #endif @@ -399,7 +398,7 @@ NodeDone: assert(N->getNodeId() == ReadyToProcess && "Node ID recalculated?"); N->setNodeId(Processed); - for (SDNode *User : N->uses()) { + for (SDNode *User : N->users()) { int NodeId = User->getNodeId(); // This node has two options: it can either be a new node or its Node ID diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h index 1703149aca74..571a710cc92a 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -487,6 +487,7 @@ private: void ExpandIntRes_MINMAX (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandIntRes_CMP (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandIntRes_SETCC (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandIntRes_SADDSUBO (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandIntRes_UADDSUBO (SDNode *N, SDValue &Lo, SDValue &Hi); diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp index 2655e8428309..113a3bc0bbea 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp @@ -265,7 +265,7 @@ void DAGTypeLegalizer::ExpandRes_NormalLoad(SDNode *N, SDValue &Lo, // Increment the pointer to the other half. unsigned IncrementSize = NVT.getSizeInBits() / 8; - Ptr = DAG.getMemBasePlusOffset(Ptr, TypeSize::getFixed(IncrementSize), dl); + Ptr = DAG.getObjectPtrOffset(dl, Ptr, TypeSize::getFixed(IncrementSize)); Hi = DAG.getLoad( NVT, dl, Chain, Ptr, LD->getPointerInfo().getWithOffset(IncrementSize), LD->getOriginalAlign(), LD->getMemOperand()->getFlags(), AAInfo); diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index 465128099f44..107454a92e35 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -6421,16 +6421,14 @@ SDValue DAGTypeLegalizer::WidenVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N) { SDValue InOp2 = GetWidenedVector(N->getOperand(1)); // Adjust mask based on new input vector length. - SmallVector<int, 16> NewMask; + SmallVector<int, 16> NewMask(WidenNumElts, -1); for (unsigned i = 0; i != NumElts; ++i) { int Idx = N->getMaskElt(i); if (Idx < (int)NumElts) - NewMask.push_back(Idx); + NewMask[i] = Idx; else - NewMask.push_back(Idx - NumElts + WidenNumElts); + NewMask[i] = Idx - NumElts + WidenNumElts; } - for (unsigned i = NumElts; i != WidenNumElts; ++i) - NewMask.push_back(-1); return DAG.getVectorShuffle(WidenVT, dl, InOp1, InOp2, NewMask); } @@ -6478,12 +6476,8 @@ SDValue DAGTypeLegalizer::WidenVecRes_VECTOR_REVERSE(SDNode *N) { // Use VECTOR_SHUFFLE to combine new vector from 'ReverseVal' for // fixed-vectors. - SmallVector<int, 16> Mask; - for (unsigned i = 0; i != VTNumElts; ++i) { - Mask.push_back(IdxVal + i); - } - for (unsigned i = VTNumElts; i != WidenNumElts; ++i) - Mask.push_back(-1); + SmallVector<int, 16> Mask(WidenNumElts, -1); + std::iota(Mask.begin(), Mask.begin() + VTNumElts, IdxVal); return DAG.getVectorShuffle(WidenVT, dl, ReverseVal, DAG.getUNDEF(WidenVT), Mask); diff --git a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp index 70a743844019..26eba4b257fb 100644 --- a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp @@ -756,7 +756,7 @@ void ScheduleDAGLinearize::Schedule() { // Glue user must be scheduled together with the glue operand. So other // users of the glue operand must be treated as its users. SDNode *ImmGUser = Glue->getGluedUser(); - for (const SDNode *U : Glue->uses()) + for (const SDNode *U : Glue->users()) if (U == ImmGUser) --Degree; GUser->setNodeId(UDegree + Degree); diff --git a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp index 31939ae5922e..26fc75c0578e 100644 --- a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp @@ -236,7 +236,7 @@ void ScheduleDAGSDNodes::ClusterNeighboringLoads(SDNode *Node) { // This algorithm requires a reasonably low use count before finding a match // to avoid uselessly blowing up compile time in large blocks. unsigned UseCount = 0; - for (SDNode::use_iterator I = Chain->use_begin(), E = Chain->use_end(); + for (SDNode::user_iterator I = Chain->user_begin(), E = Chain->user_end(); I != E && UseCount < 100; ++I, ++UseCount) { if (I.getUse().getResNo() != Chain.getResNo()) continue; @@ -388,7 +388,7 @@ void ScheduleDAGSDNodes::BuildSchedUnits() { // There are either zero or one users of the Glue result. bool HasGlueUse = false; - for (SDNode *U : N->uses()) + for (SDNode *U : N->users()) if (GlueVal.isOperandOf(U)) { HasGlueUse = true; assert(N->getNodeId() == -1 && "Node already inserted!"); diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 182529123ec6..0dfd0302ae54 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -111,10 +111,17 @@ static cl::opt<int> MaxLdStGlue("ldstmemcpy-glue-max", cl::desc("Number limit for gluing ld/st of memcpy."), cl::Hidden, cl::init(0)); +static cl::opt<unsigned> + MaxSteps("has-predecessor-max-steps", cl::Hidden, cl::init(8192), + cl::desc("DAG combiner limit number of steps when searching DAG " + "for predecessor nodes")); + static void NewSDValueDbgMsg(SDValue V, StringRef Msg, SelectionDAG *G) { LLVM_DEBUG(dbgs() << Msg; V.getNode()->dump(G);); } +unsigned SelectionDAG::getHasPredecessorMaxSteps() { return MaxSteps; } + //===----------------------------------------------------------------------===// // ConstantFPSDNode Class //===----------------------------------------------------------------------===// @@ -423,6 +430,21 @@ bool ISD::matchBinaryPredicate( return true; } +ISD::NodeType ISD::getInverseMinMaxOpcode(unsigned MinMaxOpc) { + switch (MinMaxOpc) { + default: + llvm_unreachable("unrecognized opcode"); + case ISD::UMIN: + return ISD::UMAX; + case ISD::UMAX: + return ISD::UMIN; + case ISD::SMIN: + return ISD::SMAX; + case ISD::SMAX: + return ISD::SMIN; + } +} + ISD::NodeType ISD::getVecReduceBaseOpcode(unsigned VecReduceOpcode) { switch (VecReduceOpcode) { default: @@ -2474,6 +2496,51 @@ SDValue SelectionDAG::getPartialReduceAdd(SDLoc DL, EVT ReducedTy, SDValue Op1, return Subvectors[0]; } +/// Given a store node \p StoreNode, return true if it is safe to fold that node +/// into \p FPNode, which expands to a library call with output pointers. +static bool canFoldStoreIntoLibCallOutputPointers(StoreSDNode *StoreNode, + SDNode *FPNode) { + SmallVector<const SDNode *, 8> Worklist; + SmallVector<const SDNode *, 8> DeferredNodes; + SmallPtrSet<const SDNode *, 16> Visited; + + // Skip FPNode use by StoreNode (that's the use we want to fold into FPNode). + for (SDValue Op : StoreNode->ops()) + if (Op.getNode() != FPNode) + Worklist.push_back(Op.getNode()); + + unsigned MaxSteps = SelectionDAG::getHasPredecessorMaxSteps(); + while (!Worklist.empty()) { + const SDNode *Node = Worklist.pop_back_val(); + auto [_, Inserted] = Visited.insert(Node); + if (!Inserted) + continue; + + if (MaxSteps > 0 && Visited.size() >= MaxSteps) + return false; + + // Reached the FPNode (would result in a cycle). + // OR Reached CALLSEQ_START (would result in nested call sequences). + if (Node == FPNode || Node->getOpcode() == ISD::CALLSEQ_START) + return false; + + if (Node->getOpcode() == ISD::CALLSEQ_END) { + // Defer looking into call sequences (so we can check we're outside one). + // We still need to look through these for the predecessor check. + DeferredNodes.push_back(Node); + continue; + } + + for (SDValue Op : Node->ops()) + Worklist.push_back(Op.getNode()); + } + + // True if we're outside a call sequence and don't have the FPNode as a + // predecessor. No cycles or nested call sequences possible. + return !SDNode::hasPredecessorHelper(FPNode, Visited, DeferredNodes, + MaxSteps); +} + bool SelectionDAG::expandMultipleResultFPLibCall( RTLIB::Libcall LC, SDNode *Node, SmallVectorImpl<SDValue> &Results, std::optional<unsigned> CallRetResNo) { @@ -2502,26 +2569,35 @@ bool SelectionDAG::expandMultipleResultFPLibCall( // Find users of the node that store the results (and share input chains). The // destination pointers can be used instead of creating stack allocations. - // FIXME: This should allow stores with the same chains (not just the entry - // chain), but there's a risk the store is within a (CALLSEQ_START, - // CALLSEQ_END) pair, which after this expansion will lead to nested call - // sequences. - SDValue InChain = getEntryNode(); + SDValue StoresInChain; SmallVector<StoreSDNode *, 2> ResultStores(NumResults); - for (SDNode *User : Node->uses()) { + for (SDNode *User : Node->users()) { if (!ISD::isNormalStore(User)) continue; auto *ST = cast<StoreSDNode>(User); SDValue StoreValue = ST->getValue(); unsigned ResNo = StoreValue.getResNo(); + // Ensure the store corresponds to an output pointer. + if (CallRetResNo == ResNo) + continue; + // Ensure the store to the default address space and not atomic or volatile. + if (!ST->isSimple() || ST->getAddressSpace() != 0) + continue; + // Ensure all store chains are the same (so they don't alias). + if (StoresInChain && ST->getChain() != StoresInChain) + continue; + // Ensure the store is properly aligned. Type *StoreType = StoreValue.getValueType().getTypeForEVT(Ctx); - if (CallRetResNo == ResNo || !ST->isSimple() || - ST->getAddressSpace() != 0 || - ST->getAlign() < - getDataLayout().getABITypeAlign(StoreType->getScalarType()) || - ST->getChain() != InChain) + if (ST->getAlign() < + getDataLayout().getABITypeAlign(StoreType->getScalarType())) + continue; + // Avoid: + // 1. Creating cyclic dependencies. + // 2. Expanding the node to a call within a call sequence. + if (!canFoldStoreIntoLibCallOutputPointers(ST, Node)) continue; ResultStores[ResNo] = ST; + StoresInChain = ST->getChain(); } TargetLowering::ArgListTy Args; @@ -2563,6 +2639,7 @@ bool SelectionDAG::expandMultipleResultFPLibCall( Type *RetType = CallRetResNo.has_value() ? Node->getValueType(*CallRetResNo).getTypeForEVT(Ctx) : Type::getVoidTy(Ctx); + SDValue InChain = StoresInChain ? StoresInChain : getEntryNode(); SDValue Callee = getExternalSymbol(VD ? VD->getVectorFnName().data() : LCName, TLI->getPointerTy(getDataLayout())); TargetLowering::CallLoweringInfo CLI(*this); @@ -6823,7 +6900,7 @@ SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL, ScalarOps.push_back(getUNDEF(OpVT)); continue; } - APInt Val = cast<ConstantSDNode>(Op)->getAPIntValue(); + const APInt &Val = cast<ConstantSDNode>(Op)->getAPIntValue(); ScalarOps.push_back(SignExtendInReg(Val, OpVT)); } return getBuildVector(VT, DL, ScalarOps); @@ -7871,7 +7948,7 @@ SDValue SelectionDAG::getStackArgumentTokenFactor(SDValue Chain) { ArgChains.push_back(Chain); // Add a chain value for each stack argument. - for (SDNode *U : getEntryNode().getNode()->uses()) + for (SDNode *U : getEntryNode().getNode()->users()) if (LoadSDNode *L = dyn_cast<LoadSDNode>(U)) if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(L->getBasePtr())) if (FI->getIndex() < 0) @@ -8978,12 +9055,12 @@ SDValue SelectionDAG::getMemIntrinsicNode(unsigned Opcode, const SDLoc &dl, SDVTList VTList, ArrayRef<SDValue> Ops, EVT MemVT, MachineMemOperand *MMO) { - assert((Opcode == ISD::INTRINSIC_VOID || - Opcode == ISD::INTRINSIC_W_CHAIN || - Opcode == ISD::PREFETCH || - (Opcode <= (unsigned)std::numeric_limits<int>::max() && - (int)Opcode >= ISD::FIRST_TARGET_MEMORY_OPCODE)) && - "Opcode is not a memory-accessing opcode!"); + assert( + (Opcode == ISD::INTRINSIC_VOID || Opcode == ISD::INTRINSIC_W_CHAIN || + Opcode == ISD::PREFETCH || + (Opcode <= (unsigned)std::numeric_limits<int>::max() && + Opcode >= ISD::BUILTIN_OP_END && TSI->isTargetMemoryOpcode(Opcode))) && + "Opcode is not a memory-accessing opcode!"); // Memoize the node unless it returns a glue result. MemIntrinsicSDNode *N; @@ -10623,7 +10700,10 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, SDVTList VTList, } SDVTList SelectionDAG::getVTList(EVT VT) { - return makeVTList(SDNode::getValueTypeList(VT), 1); + if (!VT.isExtended()) + return makeVTList(SDNode::getValueTypeList(VT.getSimpleVT()), 1); + + return makeVTList(&(*EVTs.insert(VT).first), 1); } SDVTList SelectionDAG::getVTList(EVT VT1, EVT VT2) { @@ -11546,7 +11626,7 @@ class RAUWUpdateListener : public SelectionDAG::DAGUpdateListener { void NodeDeleted(SDNode *N, SDNode *E) override { // Increment the iterator as needed. - while (UI != UE && N == *UI) + while (UI != UE && N == UI->getUser()) ++UI; } @@ -11585,7 +11665,7 @@ void SelectionDAG::ReplaceAllUsesWith(SDValue FromN, SDValue To) { SDNode::use_iterator UI = From->use_begin(), UE = From->use_end(); RAUWUpdateListener Listener(*this, UI, UE); while (UI != UE) { - SDNode *User = *UI; + SDNode *User = UI->getUser(); // This node is about to morph, remove its old self from the CSE maps. RemoveNodeFromCSEMaps(User); @@ -11595,12 +11675,12 @@ void SelectionDAG::ReplaceAllUsesWith(SDValue FromN, SDValue To) { // To help reduce the number of CSE recomputations, process all // the uses of this user that we can find this way. do { - SDUse &Use = UI.getUse(); + SDUse &Use = *UI; ++UI; Use.set(To); if (To->isDivergent() != From->isDivergent()) updateDivergence(User); - } while (UI != UE && *UI == User); + } while (UI != UE && UI->getUser() == User); // Now that we have modified User, add it back to the CSE maps. If it // already exists there, recursively merge the results together. AddModifiedNodeToCSEMaps(User); @@ -11643,7 +11723,7 @@ void SelectionDAG::ReplaceAllUsesWith(SDNode *From, SDNode *To) { SDNode::use_iterator UI = From->use_begin(), UE = From->use_end(); RAUWUpdateListener Listener(*this, UI, UE); while (UI != UE) { - SDNode *User = *UI; + SDNode *User = UI->getUser(); // This node is about to morph, remove its old self from the CSE maps. RemoveNodeFromCSEMaps(User); @@ -11653,12 +11733,12 @@ void SelectionDAG::ReplaceAllUsesWith(SDNode *From, SDNode *To) { // To help reduce the number of CSE recomputations, process all // the uses of this user that we can find this way. do { - SDUse &Use = UI.getUse(); + SDUse &Use = *UI; ++UI; Use.setNode(To); if (To->isDivergent() != From->isDivergent()) updateDivergence(User); - } while (UI != UE && *UI == User); + } while (UI != UE && UI->getUser() == User); // Now that we have modified User, add it back to the CSE maps. If it // already exists there, recursively merge the results together. @@ -11691,7 +11771,7 @@ void SelectionDAG::ReplaceAllUsesWith(SDNode *From, const SDValue *To) { SDNode::use_iterator UI = From->use_begin(), UE = From->use_end(); RAUWUpdateListener Listener(*this, UI, UE); while (UI != UE) { - SDNode *User = *UI; + SDNode *User = UI->getUser(); // This node is about to morph, remove its old self from the CSE maps. RemoveNodeFromCSEMaps(User); @@ -11702,12 +11782,12 @@ void SelectionDAG::ReplaceAllUsesWith(SDNode *From, const SDValue *To) { // user that we can find this way. bool To_IsDivergent = false; do { - SDUse &Use = UI.getUse(); + SDUse &Use = *UI; const SDValue &ToOp = To[Use.getResNo()]; ++UI; Use.set(ToOp); To_IsDivergent |= ToOp->isDivergent(); - } while (UI != UE && *UI == User); + } while (UI != UE && UI->getUser() == User); if (To_IsDivergent != From->isDivergent()) updateDivergence(User); @@ -11745,7 +11825,7 @@ void SelectionDAG::ReplaceAllUsesOfValueWith(SDValue From, SDValue To){ UE = From.getNode()->use_end(); RAUWUpdateListener Listener(*this, UI, UE); while (UI != UE) { - SDNode *User = *UI; + SDNode *User = UI->getUser(); bool UserRemovedFromCSEMaps = false; // A user can appear in a use list multiple times, and when this @@ -11753,7 +11833,7 @@ void SelectionDAG::ReplaceAllUsesOfValueWith(SDValue From, SDValue To){ // To help reduce the number of CSE recomputations, process all // the uses of this user that we can find this way. do { - SDUse &Use = UI.getUse(); + SDUse &Use = *UI; // Skip uses of different values from the same node. if (Use.getResNo() != From.getResNo()) { @@ -11772,7 +11852,7 @@ void SelectionDAG::ReplaceAllUsesOfValueWith(SDValue From, SDValue To){ Use.set(To); if (To->isDivergent() != From->isDivergent()) updateDivergence(User); - } while (UI != UE && *UI == User); + } while (UI != UE && UI->getUser() == User); // We are iterating over all uses of the From node, so if a use // doesn't use the specific value, no changes are made. if (!UserRemovedFromCSEMaps) @@ -11807,7 +11887,7 @@ bool operator<(const UseMemo &L, const UseMemo &R) { /// pointed to by a UseMemo is deleted, set the User to nullptr to indicate that /// the node already has been taken care of recursively. class RAUOVWUpdateListener : public SelectionDAG::DAGUpdateListener { - SmallVector<UseMemo, 4> &Uses; + SmallVectorImpl<UseMemo> &Uses; void NodeDeleted(SDNode *N, SDNode *E) override { for (UseMemo &Memo : Uses) @@ -11816,7 +11896,7 @@ class RAUOVWUpdateListener : public SelectionDAG::DAGUpdateListener { } public: - RAUOVWUpdateListener(SelectionDAG &d, SmallVector<UseMemo, 4> &uses) + RAUOVWUpdateListener(SelectionDAG &d, SmallVectorImpl<UseMemo> &uses) : SelectionDAG::DAGUpdateListener(d), Uses(uses) {} }; @@ -11861,7 +11941,7 @@ void SelectionDAG::updateDivergence(SDNode *N) { bool IsDivergent = calculateDivergence(N); if (N->SDNodeBits.IsDivergent != IsDivergent) { N->SDNodeBits.IsDivergent = IsDivergent; - llvm::append_range(Worklist, N->uses()); + llvm::append_range(Worklist, N->users()); } } while (!Worklist.empty()); } @@ -11877,7 +11957,7 @@ void SelectionDAG::CreateTopologicalOrder(std::vector<SDNode *> &Order) { } for (size_t I = 0; I != Order.size(); ++I) { SDNode *N = Order[I]; - for (auto *U : N->uses()) { + for (auto *U : N->users()) { unsigned &UnsortedOps = Degree[U]; if (0 == --UnsortedOps) Order.push_back(U); @@ -11917,11 +11997,9 @@ void SelectionDAG::ReplaceAllUsesOfValuesWith(const SDValue *From, for (unsigned i = 0; i != Num; ++i) { unsigned FromResNo = From[i].getResNo(); SDNode *FromNode = From[i].getNode(); - for (SDNode::use_iterator UI = FromNode->use_begin(), - E = FromNode->use_end(); UI != E; ++UI) { - SDUse &Use = UI.getUse(); + for (SDUse &Use : FromNode->uses()) { if (Use.getResNo() == FromResNo) { - UseMemo Memo = { *UI, i, &Use }; + UseMemo Memo = {Use.getUser(), i, &Use}; Uses.push_back(Memo); } } @@ -12006,7 +12084,7 @@ unsigned SelectionDAG::AssignTopologicalOrder() { checkForCycles(N, this); // N is in sorted position, so all its uses have one less operand // that needs to be sorted. - for (SDNode *P : N->uses()) { + for (SDNode *P : N->users()) { unsigned Degree = P->getNodeId(); assert(Degree != 0 && "Invalid node degree"); --Degree; @@ -12383,17 +12461,11 @@ namespace { /// getValueTypeList - Return a pointer to the specified value type. /// -const EVT *SDNode::getValueTypeList(EVT VT) { - static std::set<EVT, EVT::compareRawBits> EVTs; +const EVT *SDNode::getValueTypeList(MVT VT) { static EVTArray SimpleVTArray; - static sys::SmartMutex<true> VTMutex; - if (VT.isExtended()) { - sys::SmartScopedLock<true> Lock(VTMutex); - return &(*EVTs.insert(VT).first); - } - assert(VT.getSimpleVT() < MVT::VALUETYPE_SIZE && "Value type out of range!"); - return &SimpleVTArray.VTs[VT.getSimpleVT().SimpleTy]; + assert(VT < MVT::VALUETYPE_SIZE && "Value type out of range!"); + return &SimpleVTArray.VTs[VT.SimpleTy]; } /// hasNUsesOfValue - Return true if there are exactly NUSES uses of the @@ -12403,8 +12475,8 @@ bool SDNode::hasNUsesOfValue(unsigned NUses, unsigned Value) const { assert(Value < getNumValues() && "Bad value!"); // TODO: Only iterate over uses of a given value of the node - for (SDNode::use_iterator UI = use_begin(), E = use_end(); UI != E; ++UI) { - if (UI.getUse().getResNo() == Value) { + for (SDUse &U : uses()) { + if (U.getResNo() == Value) { if (NUses == 0) return false; --NUses; @@ -12420,8 +12492,8 @@ bool SDNode::hasNUsesOfValue(unsigned NUses, unsigned Value) const { bool SDNode::hasAnyUseOfValue(unsigned Value) const { assert(Value < getNumValues() && "Bad value!"); - for (SDNode::use_iterator UI = use_begin(), E = use_end(); UI != E; ++UI) - if (UI.getUse().getResNo() == Value) + for (SDUse &U : uses()) + if (U.getResNo() == Value) return true; return false; @@ -12430,7 +12502,7 @@ bool SDNode::hasAnyUseOfValue(unsigned Value) const { /// isOnlyUserOf - Return true if this node is the only use of N. bool SDNode::isOnlyUserOf(const SDNode *N) const { bool Seen = false; - for (const SDNode *User : N->uses()) { + for (const SDNode *User : N->users()) { if (User == this) Seen = true; else @@ -12443,7 +12515,7 @@ bool SDNode::isOnlyUserOf(const SDNode *N) const { /// Return true if the only users of N are contained in Nodes. bool SDNode::areOnlyUsersOf(ArrayRef<const SDNode *> Nodes, const SDNode *N) { bool Seen = false; - for (const SDNode *User : N->uses()) { + for (const SDNode *User : N->users()) { if (llvm::is_contained(Nodes, User)) Seen = true; else diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index b72c5eff22f1..f8d7c3ef7bbe 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -319,13 +319,14 @@ getCopyFromParts(SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, static void diagnosePossiblyInvalidConstraint(LLVMContext &Ctx, const Value *V, const Twine &ErrMsg) { const Instruction *I = dyn_cast_or_null<Instruction>(V); - if (!V) + if (!I) return Ctx.emitError(ErrMsg); - const char *AsmError = ", possible invalid constraint for vector type"; if (const CallInst *CI = dyn_cast<CallInst>(I)) - if (CI->isInlineAsm()) - return Ctx.emitError(I, ErrMsg + AsmError); + if (CI->isInlineAsm()) { + return Ctx.diagnose(DiagnosticInfoInlineAsm( + *CI, ErrMsg + ", possible invalid constraint for vector type")); + } return Ctx.emitError(I, ErrMsg); } @@ -2194,19 +2195,13 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) { if (!FuncInfo.CanLowerReturn) { Register DemoteReg = FuncInfo.DemoteRegister; - const Function *F = I.getParent()->getParent(); // Emit a store of the return value through the virtual register. // Leave Outs empty so that LowerReturn won't try to load return // registers the usual way. - SmallVector<EVT, 1> PtrValueVTs; - ComputeValueVTs(TLI, DL, - PointerType::get(F->getContext(), - DAG.getDataLayout().getAllocaAddrSpace()), - PtrValueVTs); - + MVT PtrValueVT = TLI.getPointerTy(DL, DL.getAllocaAddrSpace()); SDValue RetPtr = - DAG.getCopyFromReg(Chain, getCurSDLoc(), DemoteReg, PtrValueVTs[0]); + DAG.getCopyFromReg(Chain, getCurSDLoc(), DemoteReg, PtrValueVT); SDValue RetOp = getValue(I.getOperand(0)); SmallVector<EVT, 4> ValueVTs, MemVTs; @@ -10509,7 +10504,7 @@ void SelectionDAGBuilder::visitInlineAsm(const CallBase &Call, void SelectionDAGBuilder::emitInlineAsmError(const CallBase &Call, const Twine &Message) { LLVMContext &Ctx = *DAG.getContext(); - Ctx.emitError(&Call, Message); + Ctx.diagnose(DiagnosticInfoInlineAsm(Call, Message)); // Make sure we leave the DAG in a valid state const TargetLowering &TLI = DAG.getTargetLoweringInfo(); @@ -10984,7 +10979,6 @@ std::pair<SDValue, SDValue> TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { // Handle the incoming return values from the call. CLI.Ins.clear(); - Type *OrigRetTy = CLI.RetTy; SmallVector<EVT, 4> RetTys; SmallVector<TypeSize, 4> Offsets; auto &DL = CLI.DAG.getDataLayout(); @@ -11197,7 +11191,7 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { if (auto MA = Args[i].Alignment) MemAlign = *MA; else - MemAlign = Align(getByValTypeAlignment(Args[i].IndirectType, DL)); + MemAlign = getByValTypeAlignment(Args[i].IndirectType, DL); } else if (auto MA = Args[i].Alignment) { MemAlign = *MA; } else { @@ -11309,13 +11303,7 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { if (!CanLowerReturn) { // The instruction result is the result of loading from the // hidden sret parameter. - SmallVector<EVT, 1> PVTs; - Type *PtrRetTy = - PointerType::get(OrigRetTy->getContext(), DL.getAllocaAddrSpace()); - - ComputeValueVTs(*this, DL, PtrRetTy, PVTs); - assert(PVTs.size() == 1 && "Pointers should fit in one register"); - EVT PtrVT = PVTs[0]; + MVT PtrVT = getPointerTy(DL, DL.getAllocaAddrSpace()); unsigned NumValues = RetTys.size(); ReturnValues.resize(NumValues); @@ -11635,18 +11623,12 @@ void SelectionDAGISel::LowerArguments(const Function &F) { if (!FuncInfo->CanLowerReturn) { // Put in an sret pointer parameter before all the other parameters. - SmallVector<EVT, 1> ValueVTs; - ComputeValueVTs(*TLI, DAG.getDataLayout(), - PointerType::get(F.getContext(), - DAG.getDataLayout().getAllocaAddrSpace()), - ValueVTs); - - // NOTE: Assuming that a pointer will never break down to more than one VT - // or one register. + MVT ValueVT = TLI->getPointerTy(DL, DL.getAllocaAddrSpace()); + ISD::ArgFlagsTy Flags; Flags.setSRet(); - MVT RegisterVT = TLI->getRegisterType(*DAG.getContext(), ValueVTs[0]); - ISD::InputArg RetArg(Flags, RegisterVT, ValueVTs[0], true, + MVT RegisterVT = TLI->getRegisterType(*DAG.getContext(), ValueVT); + ISD::InputArg RetArg(Flags, RegisterVT, ValueVT, true, ISD::InputArg::NoArgIndex, 0); Ins.push_back(RetArg); } @@ -11754,7 +11736,7 @@ void SelectionDAGISel::LowerArguments(const Function &F) { else if ((ParamAlign = Arg.getParamAlign())) MemAlign = *ParamAlign; else - MemAlign = Align(TLI->getByValTypeAlignment(ArgMemTy, DL)); + MemAlign = TLI->getByValTypeAlignment(ArgMemTy, DL); if (Flags.isByRef()) Flags.setByRefSize(MemSize); else @@ -11829,12 +11811,7 @@ void SelectionDAGISel::LowerArguments(const Function &F) { if (!FuncInfo->CanLowerReturn) { // Create a virtual register for the sret pointer, and put in a copy // from the sret argument into it. - SmallVector<EVT, 1> ValueVTs; - ComputeValueVTs(*TLI, DAG.getDataLayout(), - PointerType::get(F.getContext(), - DAG.getDataLayout().getAllocaAddrSpace()), - ValueVTs); - MVT VT = ValueVTs[0].getSimpleVT(); + MVT VT = TLI->getPointerTy(DL, DL.getAllocaAddrSpace()); MVT RegVT = TLI->getRegisterType(*CurDAG->getContext(), VT); std::optional<ISD::NodeType> AssertOp; SDValue ArgValue = diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp index 3000dfda1bea..d64a90bcaae7 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp @@ -51,6 +51,7 @@ #include "llvm/CodeGen/SchedulerRegistry.h" #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/CodeGen/SelectionDAGNodes.h" +#include "llvm/CodeGen/SelectionDAGTargetInfo.h" #include "llvm/CodeGen/StackMaps.h" #include "llvm/CodeGen/StackProtector.h" #include "llvm/CodeGen/SwiftErrorValueTracking.h" @@ -1225,7 +1226,7 @@ void SelectionDAGISel::EnforceNodeIdInvariant(SDNode *Node) { while (!Nodes.empty()) { SDNode *N = Nodes.pop_back_val(); - for (auto *U : N->uses()) { + for (auto *U : N->users()) { auto UId = U->getNodeId(); if (UId > 0) { InvalidateNodeId(U); @@ -2329,19 +2330,6 @@ void SelectionDAGISel::SelectInlineAsmMemoryOperands(std::vector<SDValue> &Ops, Ops.push_back(handle.getValue()); } -/// findGlueUse - Return use of MVT::Glue value produced by the specified -/// SDNode. -/// -static SDNode *findGlueUse(SDNode *N) { - unsigned FlagResNo = N->getNumValues()-1; - for (SDNode::use_iterator I = N->use_begin(), E = N->use_end(); I != E; ++I) { - SDUse &Use = I.getUse(); - if (Use.getResNo() == FlagResNo) - return Use.getUser(); - } - return nullptr; -} - /// findNonImmUse - Return true if "Def" is a predecessor of "Root" via a path /// beyond "ImmedUse". We may ignore chains as they are checked separately. static bool findNonImmUse(SDNode *Root, SDNode *Def, SDNode *ImmedUse, @@ -2445,7 +2433,7 @@ bool SelectionDAGISel::IsLegalToFold(SDValue N, SDNode *U, SDNode *Root, // glueged set. EVT VT = Root->getValueType(Root->getNumValues()-1); while (VT == MVT::Glue) { - SDNode *GU = findGlueUse(Root); + SDNode *GU = Root->getGluedUser(); if (!GU) break; Root = GU; @@ -3805,8 +3793,8 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch, for (unsigned i = 1, e = NodeStack.size()-1; i != e; ++i) { unsigned NNonChainUses = 0; SDNode *NS = NodeStack[i].getNode(); - for (auto UI = NS->use_begin(), UE = NS->use_end(); UI != UE; ++UI) - if (UI.getUse().getValueType() != MVT::Other) + for (const SDUse &U : NS->uses()) + if (U.getValueType() != MVT::Other) if (++NNonChainUses > 1) { HasMultipleUses = true; break; @@ -4269,11 +4257,12 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch, CurDAG->setNodeMemRefs(Res, FilteredMemRefs); } - LLVM_DEBUG(if (!MatchedMemRefs.empty() && Res->memoperands_empty()) dbgs() - << " Dropping mem operands\n"; - dbgs() << " " << (IsMorphNodeTo ? "Morphed" : "Created") - << " node: "; - Res->dump(CurDAG);); + LLVM_DEBUG({ + if (!MatchedMemRefs.empty() && Res->memoperands_empty()) + dbgs() << " Dropping mem operands\n"; + dbgs() << " " << (IsMorphNodeTo ? "Morphed" : "Created") << " node: "; + Res->dump(CurDAG); + }); // If this was a MorphNodeTo then we're completely done! if (IsMorphNodeTo) { @@ -4394,8 +4383,10 @@ bool SelectionDAGISel::mayRaiseFPException(SDNode *N) const { // For ISD opcodes, only StrictFP opcodes may raise an FP // exception. - if (N->isTargetOpcode()) - return N->isTargetStrictFPOpcode(); + if (N->isTargetOpcode()) { + const SelectionDAGTargetInfo &TSI = CurDAG->getSelectionDAGInfo(); + return TSI.mayRaiseFPException(N->getOpcode()); + } return N->isStrictFPOpcode(); } diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGTargetInfo.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGTargetInfo.cpp index 3a2df6f60593..0f3b36658f10 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGTargetInfo.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGTargetInfo.cpp @@ -15,3 +15,9 @@ using namespace llvm; SelectionDAGTargetInfo::~SelectionDAGTargetInfo() = default; + +bool SelectionDAGTargetInfo::mayRaiseFPException(unsigned Opcode) const { + // FIXME: All target memory opcodes are currently automatically considered + // to possibly raise FP exceptions. See rev. 63336795. + return isTargetStrictFPOpcode(Opcode) || isTargetMemoryOpcode(Opcode); +} diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp index eeba4b7d20f9..e87d809f88eb 100644 --- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -11889,6 +11889,47 @@ bool TargetLowering::LegalizeSetCCCondCode(SelectionDAG &DAG, EVT VT, return true; } + // Special case: expand i1 comparisons using logical operations. + if (OpVT == MVT::i1) { + SDValue Ret; + switch (CCCode) { + default: + llvm_unreachable("Unknown integer setcc!"); + case ISD::SETEQ: // X == Y --> ~(X ^ Y) + Ret = DAG.getNOT(dl, DAG.getNode(ISD::XOR, dl, MVT::i1, LHS, RHS), + MVT::i1); + break; + case ISD::SETNE: // X != Y --> (X ^ Y) + Ret = DAG.getNode(ISD::XOR, dl, MVT::i1, LHS, RHS); + break; + case ISD::SETGT: // X >s Y --> X == 0 & Y == 1 --> ~X & Y + case ISD::SETULT: // X <u Y --> X == 0 & Y == 1 --> ~X & Y + Ret = DAG.getNode(ISD::AND, dl, MVT::i1, RHS, + DAG.getNOT(dl, LHS, MVT::i1)); + break; + case ISD::SETLT: // X <s Y --> X == 1 & Y == 0 --> ~Y & X + case ISD::SETUGT: // X >u Y --> X == 1 & Y == 0 --> ~Y & X + Ret = DAG.getNode(ISD::AND, dl, MVT::i1, LHS, + DAG.getNOT(dl, RHS, MVT::i1)); + break; + case ISD::SETULE: // X <=u Y --> X == 0 | Y == 1 --> ~X | Y + case ISD::SETGE: // X >=s Y --> X == 0 | Y == 1 --> ~X | Y + Ret = DAG.getNode(ISD::OR, dl, MVT::i1, RHS, + DAG.getNOT(dl, LHS, MVT::i1)); + break; + case ISD::SETUGE: // X >=u Y --> X == 1 | Y == 0 --> ~Y | X + case ISD::SETLE: // X <=s Y --> X == 1 | Y == 0 --> ~Y | X + Ret = DAG.getNode(ISD::OR, dl, MVT::i1, LHS, + DAG.getNOT(dl, RHS, MVT::i1)); + break; + } + + LHS = DAG.getZExtOrTrunc(Ret, dl, VT); + RHS = SDValue(); + CC = SDValue(); + return true; + } + ISD::CondCode CC1 = ISD::SETCC_INVALID, CC2 = ISD::SETCC_INVALID; unsigned Opc = 0; switch (CCCode) { diff --git a/llvm/lib/CodeGen/ShrinkWrap.cpp b/llvm/lib/CodeGen/ShrinkWrap.cpp index 2742437ceb58..5029f45def22 100644 --- a/llvm/lib/CodeGen/ShrinkWrap.cpp +++ b/llvm/lib/CodeGen/ShrinkWrap.cpp @@ -986,6 +986,7 @@ bool ShrinkWrap::isShrinkWrapEnabled(const MachineFunction &MF) { !(MF.getFunction().hasFnAttribute(Attribute::SanitizeAddress) || MF.getFunction().hasFnAttribute(Attribute::SanitizeThread) || MF.getFunction().hasFnAttribute(Attribute::SanitizeMemory) || + MF.getFunction().hasFnAttribute(Attribute::SanitizeType) || MF.getFunction().hasFnAttribute(Attribute::SanitizeHWAddress)); // If EnableShrinkWrap is set, it takes precedence on whatever the // target sets. The rational is that we assume we want to test diff --git a/llvm/lib/CodeGen/StackMapLivenessAnalysis.cpp b/llvm/lib/CodeGen/StackMapLivenessAnalysis.cpp index 687acd90b405..843742284314 100644 --- a/llvm/lib/CodeGen/StackMapLivenessAnalysis.cpp +++ b/llvm/lib/CodeGen/StackMapLivenessAnalysis.cpp @@ -106,8 +106,6 @@ bool StackMapLiveness::runOnMachineFunction(MachineFunction &MF) { if (!EnablePatchPointLiveness) return false; - LLVM_DEBUG(dbgs() << "********** COMPUTING STACKMAP LIVENESS: " - << MF.getName() << " **********\n"); TRI = MF.getSubtarget().getRegisterInfo(); ++NumStackMapFuncVisited; @@ -121,6 +119,8 @@ bool StackMapLiveness::runOnMachineFunction(MachineFunction &MF) { /// Performs the actual liveness calculation for the function. bool StackMapLiveness::calculateLiveness(MachineFunction &MF) { + LLVM_DEBUG(dbgs() << "********** COMPUTING STACKMAP LIVENESS: " + << MF.getName() << " **********\n"); bool HasChanged = false; // For all basic blocks in the function. for (auto &MBB : MF) { diff --git a/llvm/lib/CodeGen/StackMaps.cpp b/llvm/lib/CodeGen/StackMaps.cpp index 81b288df3b07..7480963c1f52 100644 --- a/llvm/lib/CodeGen/StackMaps.cpp +++ b/llvm/lib/CodeGen/StackMaps.cpp @@ -268,12 +268,6 @@ StackMaps::parseOperand(MachineInstr::const_mop_iterator MOI, if (MOI->isImplicit()) return ++MOI; - if (MOI->isUndef()) { - // Record `undef` register as constant. Use same value as ISel uses. - Locs.emplace_back(Location::Constant, sizeof(int64_t), 0, 0xFEFEFEFE); - return ++MOI; - } - assert(MOI->getReg().isPhysical() && "Virtreg operands should have been rewritten before now."); const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(MOI->getReg()); diff --git a/llvm/lib/CodeGen/StackSlotColoring.cpp b/llvm/lib/CodeGen/StackSlotColoring.cpp index cdc530621de6..4dc5dc87ba3f 100644 --- a/llvm/lib/CodeGen/StackSlotColoring.cpp +++ b/llvm/lib/CodeGen/StackSlotColoring.cpp @@ -149,7 +149,7 @@ namespace { AU.setPreservesCFG(); AU.addRequired<SlotIndexesWrapperPass>(); AU.addPreserved<SlotIndexesWrapperPass>(); - AU.addRequired<LiveStacks>(); + AU.addRequired<LiveStacksWrapperLegacy>(); AU.addRequired<MachineBlockFrequencyInfoWrapperPass>(); AU.addPreserved<MachineBlockFrequencyInfoWrapperPass>(); AU.addPreservedID(MachineDominatorsID); @@ -185,7 +185,7 @@ char &llvm::StackSlotColoringID = StackSlotColoring::ID; INITIALIZE_PASS_BEGIN(StackSlotColoring, DEBUG_TYPE, "Stack Slot Coloring", false, false) INITIALIZE_PASS_DEPENDENCY(SlotIndexesWrapperPass) -INITIALIZE_PASS_DEPENDENCY(LiveStacks) +INITIALIZE_PASS_DEPENDENCY(LiveStacksWrapperLegacy) INITIALIZE_PASS_DEPENDENCY(MachineLoopInfoWrapperPass) INITIALIZE_PASS_END(StackSlotColoring, DEBUG_TYPE, "Stack Slot Coloring", false, false) @@ -522,7 +522,7 @@ bool StackSlotColoring::runOnMachineFunction(MachineFunction &MF) { MFI = &MF.getFrameInfo(); TII = MF.getSubtarget().getInstrInfo(); - LS = &getAnalysis<LiveStacks>(); + LS = &getAnalysis<LiveStacksWrapperLegacy>().getLS(); MBFI = &getAnalysis<MachineBlockFrequencyInfoWrapperPass>().getMBFI(); Indexes = &getAnalysis<SlotIndexesWrapperPass>().getSI(); diff --git a/llvm/lib/CodeGen/SwiftErrorValueTracking.cpp b/llvm/lib/CodeGen/SwiftErrorValueTracking.cpp index 74a94d6110f4..decffdc7dfe4 100644 --- a/llvm/lib/CodeGen/SwiftErrorValueTracking.cpp +++ b/llvm/lib/CodeGen/SwiftErrorValueTracking.cpp @@ -259,7 +259,7 @@ void SwiftErrorValueTracking::propagateVRegs() { for (const auto &Use : VRegUpwardsUse) { const MachineBasicBlock *UseBB = Use.first.first; Register VReg = Use.second; - if (!MRI.def_begin(VReg).atEnd()) + if (!MRI.def_empty(VReg)) continue; #ifdef EXPENSIVE_CHECKS diff --git a/llvm/lib/CodeGen/TargetLoweringBase.cpp b/llvm/lib/CodeGen/TargetLoweringBase.cpp index 392cfbdd2127..3b0e9c7526fd 100644 --- a/llvm/lib/CodeGen/TargetLoweringBase.cpp +++ b/llvm/lib/CodeGen/TargetLoweringBase.cpp @@ -1697,12 +1697,9 @@ void llvm::GetReturnInfo(CallingConv::ID CC, Type *ReturnType, } } -/// getByValTypeAlignment - Return the desired alignment for ByVal aggregate -/// function arguments in the caller parameter area. This is the actual -/// alignment, not its logarithm. -uint64_t TargetLoweringBase::getByValTypeAlignment(Type *Ty, - const DataLayout &DL) const { - return DL.getABITypeAlign(Ty).value(); +Align TargetLoweringBase::getByValTypeAlignment(Type *Ty, + const DataLayout &DL) const { + return DL.getABITypeAlign(Ty); } bool TargetLoweringBase::allowsMemoryAccessForAlignment( @@ -1753,7 +1750,7 @@ bool TargetLoweringBase::allowsMemoryAccess(LLVMContext &Context, const DataLayout &DL, LLT Ty, const MachineMemOperand &MMO, unsigned *Fast) const { - EVT VT = getApproximateEVTForLLT(Ty, DL, Context); + EVT VT = getApproximateEVTForLLT(Ty, Context); return allowsMemoryAccess(Context, DL, VT, MMO.getAddrSpace(), MMO.getAlign(), MMO.getFlags(), Fast); } diff --git a/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp index ce50a3c19ffe..be243c0e74e9 100644 --- a/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp +++ b/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp @@ -413,7 +413,8 @@ MCSymbol *TargetLoweringObjectFileELF::getCFIPersonalitySymbol( } void TargetLoweringObjectFileELF::emitPersonalityValue( - MCStreamer &Streamer, const DataLayout &DL, const MCSymbol *Sym) const { + MCStreamer &Streamer, const DataLayout &DL, const MCSymbol *Sym, + const MachineModuleInfo *MMI) const { SmallString<64> NameData("DW.ref."); NameData += Sym->getName(); MCSymbolELF *Label = @@ -431,7 +432,13 @@ void TargetLoweringObjectFileELF::emitPersonalityValue( Streamer.emitELFSize(Label, E); Streamer.emitLabel(Label); - Streamer.emitSymbolValue(Sym, Size); + emitPersonalityValueImpl(Streamer, DL, Sym, MMI); +} + +void TargetLoweringObjectFileELF::emitPersonalityValueImpl( + MCStreamer &Streamer, const DataLayout &DL, const MCSymbol *Sym, + const MachineModuleInfo *MMI) const { + Streamer.emitSymbolValue(Sym, DL.getPointerSize()); } const MCExpr *TargetLoweringObjectFileELF::getTTypeGlobalReference( diff --git a/llvm/lib/CodeGen/TargetPassConfig.cpp b/llvm/lib/CodeGen/TargetPassConfig.cpp index d407e9f0871d..5c055896130a 100644 --- a/llvm/lib/CodeGen/TargetPassConfig.cpp +++ b/llvm/lib/CodeGen/TargetPassConfig.cpp @@ -113,8 +113,6 @@ static cl::opt<bool> EnableImplicitNullChecks( static cl::opt<bool> DisableMergeICmps("disable-mergeicmps", cl::desc("Disable MergeICmps Pass"), cl::init(false), cl::Hidden); -static cl::opt<bool> PrintLSR("print-lsr-output", cl::Hidden, - cl::desc("Print LLVM IR produced by the loop-reduce pass")); static cl::opt<bool> PrintISelInput("print-isel-input", cl::Hidden, cl::desc("Print LLVM IR input to isel pass")); @@ -503,7 +501,6 @@ CGPassBuilderOption llvm::getCGPassBuilderOption() { SET_BOOLEAN_OPTION(DisableCGP) SET_BOOLEAN_OPTION(DisablePartialLibcallInlining) SET_BOOLEAN_OPTION(DisableSelectOptimize) - SET_BOOLEAN_OPTION(PrintLSR) SET_BOOLEAN_OPTION(PrintISelInput) SET_BOOLEAN_OPTION(DebugifyAndStripAll) SET_BOOLEAN_OPTION(DebugifyCheckAndStripAll) @@ -836,9 +833,6 @@ void TargetPassConfig::addIRPasses() { addPass(createLoopStrengthReducePass()); if (EnableLoopTermFold) addPass(createLoopTermFoldPass()); - if (PrintLSR) - addPass(createPrintFunctionPass(dbgs(), - "\n\n*** Code after LSR ***\n")); } // The MergeICmpsPass tries to create memcmp calls by grouping sequences of diff --git a/llvm/lib/CodeGen/TargetRegisterInfo.cpp b/llvm/lib/CodeGen/TargetRegisterInfo.cpp index 032f1a33e75c..af62623ece6a 100644 --- a/llvm/lib/CodeGen/TargetRegisterInfo.cpp +++ b/llvm/lib/CodeGen/TargetRegisterInfo.cpp @@ -201,44 +201,85 @@ TargetRegisterInfo::getAllocatableClass(const TargetRegisterClass *RC) const { return nullptr; } -/// getMinimalPhysRegClass - Returns the Register Class of a physical -/// register of the given type, picking the most sub register class of -/// the right type that contains this physreg. -const TargetRegisterClass * -TargetRegisterInfo::getMinimalPhysRegClass(MCRegister reg, MVT VT) const { - assert(Register::isPhysicalRegister(reg) && +template <typename TypeT> +static const TargetRegisterClass * +getMinimalPhysRegClass(const TargetRegisterInfo *TRI, MCRegister Reg, + TypeT Ty) { + static_assert(std::is_same_v<TypeT, MVT> || std::is_same_v<TypeT, LLT>); + assert(Register::isPhysicalRegister(Reg) && "reg must be a physical register"); + bool IsDefault = [&]() { + if constexpr (std::is_same_v<TypeT, MVT>) + return Ty == MVT::Other; + else + return !Ty.isValid(); + }(); + // Pick the most sub register class of the right type that contains // this physreg. - const TargetRegisterClass* BestRC = nullptr; - for (const TargetRegisterClass* RC : regclasses()) { - if ((VT == MVT::Other || isTypeLegalForClass(*RC, VT)) && - RC->contains(reg) && (!BestRC || BestRC->hasSubClass(RC))) + const TargetRegisterClass *BestRC = nullptr; + for (const TargetRegisterClass *RC : TRI->regclasses()) { + if ((IsDefault || TRI->isTypeLegalForClass(*RC, Ty)) && RC->contains(Reg) && + (!BestRC || BestRC->hasSubClass(RC))) BestRC = RC; } - assert(BestRC && "Couldn't find the register class"); + if constexpr (std::is_same_v<TypeT, MVT>) + assert(BestRC && "Couldn't find the register class"); return BestRC; } -const TargetRegisterClass * -TargetRegisterInfo::getMinimalPhysRegClassLLT(MCRegister reg, LLT Ty) const { - assert(Register::isPhysicalRegister(reg) && - "reg must be a physical register"); +template <typename TypeT> +static const TargetRegisterClass * +getCommonMinimalPhysRegClass(const TargetRegisterInfo *TRI, MCRegister Reg1, + MCRegister Reg2, TypeT Ty) { + static_assert(std::is_same_v<TypeT, MVT> || std::is_same_v<TypeT, LLT>); + assert(Register::isPhysicalRegister(Reg1) && + Register::isPhysicalRegister(Reg2) && + "Reg1/Reg2 must be a physical register"); + + bool IsDefault = [&]() { + if constexpr (std::is_same_v<TypeT, MVT>) + return Ty == MVT::Other; + else + return !Ty.isValid(); + }(); // Pick the most sub register class of the right type that contains // this physreg. const TargetRegisterClass *BestRC = nullptr; - for (const TargetRegisterClass *RC : regclasses()) { - if ((!Ty.isValid() || isTypeLegalForClass(*RC, Ty)) && RC->contains(reg) && - (!BestRC || BestRC->hasSubClass(RC))) + for (const TargetRegisterClass *RC : TRI->regclasses()) { + if ((IsDefault || TRI->isTypeLegalForClass(*RC, Ty)) && + RC->contains(Reg1, Reg2) && (!BestRC || BestRC->hasSubClass(RC))) BestRC = RC; } + if constexpr (std::is_same_v<TypeT, MVT>) + assert(BestRC && "Couldn't find the register class"); return BestRC; } +const TargetRegisterClass * +TargetRegisterInfo::getMinimalPhysRegClass(MCRegister Reg, MVT VT) const { + return ::getMinimalPhysRegClass(this, Reg, VT); +} + +const TargetRegisterClass *TargetRegisterInfo::getCommonMinimalPhysRegClass( + MCRegister Reg1, MCRegister Reg2, MVT VT) const { + return ::getCommonMinimalPhysRegClass(this, Reg1, Reg2, VT); +} + +const TargetRegisterClass * +TargetRegisterInfo::getMinimalPhysRegClassLLT(MCRegister Reg, LLT Ty) const { + return ::getMinimalPhysRegClass(this, Reg, Ty); +} + +const TargetRegisterClass *TargetRegisterInfo::getCommonMinimalPhysRegClassLLT( + MCRegister Reg1, MCRegister Reg2, LLT Ty) const { + return ::getCommonMinimalPhysRegClass(this, Reg1, Reg2, Ty); +} + /// getAllocatableSetForRC - Toggle the bits that represent allocatable /// registers for the specific register class. static void getAllocatableSetForRC(const MachineFunction &MF, diff --git a/llvm/lib/CodeGen/TargetSubtargetInfo.cpp b/llvm/lib/CodeGen/TargetSubtargetInfo.cpp index 6c97bc0568bd..cd396e6a619a 100644 --- a/llvm/lib/CodeGen/TargetSubtargetInfo.cpp +++ b/llvm/lib/CodeGen/TargetSubtargetInfo.cpp @@ -16,11 +16,12 @@ using namespace llvm; TargetSubtargetInfo::TargetSubtargetInfo( const Triple &TT, StringRef CPU, StringRef TuneCPU, StringRef FS, - ArrayRef<SubtargetFeatureKV> PF, ArrayRef<SubtargetSubTypeKV> PD, - const MCWriteProcResEntry *WPR, const MCWriteLatencyEntry *WL, - const MCReadAdvanceEntry *RA, const InstrStage *IS, const unsigned *OC, - const unsigned *FP) - : MCSubtargetInfo(TT, CPU, TuneCPU, FS, PF, PD, WPR, WL, RA, IS, OC, FP) {} + ArrayRef<StringRef> PN, ArrayRef<SubtargetFeatureKV> PF, + ArrayRef<SubtargetSubTypeKV> PD, const MCWriteProcResEntry *WPR, + const MCWriteLatencyEntry *WL, const MCReadAdvanceEntry *RA, + const InstrStage *IS, const unsigned *OC, const unsigned *FP) + : MCSubtargetInfo(TT, CPU, TuneCPU, FS, PN, PF, PD, WPR, WL, RA, IS, OC, + FP) {} TargetSubtargetInfo::~TargetSubtargetInfo() = default; diff --git a/llvm/lib/CodeGen/VLIWMachineScheduler.cpp b/llvm/lib/CodeGen/VLIWMachineScheduler.cpp index 0cddf59d0ca2..2fd1dd5f84a9 100644 --- a/llvm/lib/CodeGen/VLIWMachineScheduler.cpp +++ b/llvm/lib/CodeGen/VLIWMachineScheduler.cpp @@ -297,9 +297,6 @@ void ConvergingVLIWScheduler::initialize(ScheduleDAGMI *dag) { HighPressureSets[i] = ((float)MaxPressure[i] > ((float)Limit * RPThreshold)); } - - assert((!ForceTopDown || !ForceBottomUp) && - "-misched-topdown incompatible with -misched-bottomup"); } VLIWResourceModel *ConvergingVLIWScheduler::createVLIWResourceModel( @@ -954,7 +951,7 @@ SUnit *ConvergingVLIWScheduler::pickNode(bool &IsTopNode) { return nullptr; } SUnit *SU; - if (ForceTopDown) { + if (PreRADirection == MISched::TopDown) { SU = Top.pickOnlyChoice(); if (!SU) { SchedCandidate TopCand; @@ -965,7 +962,7 @@ SUnit *ConvergingVLIWScheduler::pickNode(bool &IsTopNode) { SU = TopCand.SU; } IsTopNode = true; - } else if (ForceBottomUp) { + } else if (PreRADirection == MISched::BottomUp) { SU = Bot.pickOnlyChoice(); if (!SU) { SchedCandidate BotCand; diff --git a/llvm/lib/CodeGen/VirtRegMap.cpp b/llvm/lib/CodeGen/VirtRegMap.cpp index 2084e68c16e2..d3f87f062622 100644 --- a/llvm/lib/CodeGen/VirtRegMap.cpp +++ b/llvm/lib/CodeGen/VirtRegMap.cpp @@ -60,7 +60,7 @@ STATISTIC(NumIdCopies, "Number of identity moves eliminated after rewriting"); char VirtRegMapWrapperLegacy::ID = 0; INITIALIZE_PASS(VirtRegMapWrapperLegacy, "virtregmap", "Virtual Register Map", - false, false) + false, true) void VirtRegMap::init(MachineFunction &mf) { MRI = &mf.getRegInfo(); @@ -88,7 +88,9 @@ void VirtRegMap::assignVirt2Phys(Register virtReg, MCPhysReg physReg) { assert(!Virt2PhysMap[virtReg] && "attempt to assign physical register to already mapped " "virtual register"); - assert(!getRegInfo().isReserved(physReg) && + assert((!getRegInfo().isReserved(physReg) || + MF->getProperties().hasProperty( + MachineFunctionProperties::Property::FailedRegAlloc)) && "Attempt to map virtReg to a reserved physReg"); Virt2PhysMap[virtReg] = physReg; } @@ -253,7 +255,7 @@ INITIALIZE_PASS_DEPENDENCY(SlotIndexesWrapperPass) INITIALIZE_PASS_DEPENDENCY(LiveIntervalsWrapperPass) INITIALIZE_PASS_DEPENDENCY(LiveDebugVariablesWrapperLegacy) INITIALIZE_PASS_DEPENDENCY(LiveRegMatrixWrapperLegacy) -INITIALIZE_PASS_DEPENDENCY(LiveStacks) +INITIALIZE_PASS_DEPENDENCY(LiveStacksWrapperLegacy) INITIALIZE_PASS_DEPENDENCY(VirtRegMapWrapperLegacy) INITIALIZE_PASS_END(VirtRegRewriter, "virtregrewriter", "Virtual Register Rewriter", false, false) @@ -265,8 +267,8 @@ void VirtRegRewriter::getAnalysisUsage(AnalysisUsage &AU) const { AU.addRequired<SlotIndexesWrapperPass>(); AU.addPreserved<SlotIndexesWrapperPass>(); AU.addRequired<LiveDebugVariablesWrapperLegacy>(); - AU.addRequired<LiveStacks>(); - AU.addPreserved<LiveStacks>(); + AU.addRequired<LiveStacksWrapperLegacy>(); + AU.addPreserved<LiveStacksWrapperLegacy>(); AU.addRequired<VirtRegMapWrapperLegacy>(); AU.addRequired<LiveRegMatrixWrapperLegacy>(); @@ -615,7 +617,10 @@ void VirtRegRewriter::rewrite() { assert(Register(PhysReg).isPhysical()); RewriteRegs.insert(PhysReg); - assert(!MRI->isReserved(PhysReg) && "Reserved register assignment"); + assert((!MRI->isReserved(PhysReg) || + MF->getProperties().hasProperty( + MachineFunctionProperties::Property::FailedRegAlloc)) && + "Reserved register assignment"); // Preserve semantics of sub-register operands. unsigned SubReg = MO.getSubReg(); diff --git a/llvm/lib/CodeGen/XRayInstrumentation.cpp b/llvm/lib/CodeGen/XRayInstrumentation.cpp index 8f718d884cd0..9035e10716c3 100644 --- a/llvm/lib/CodeGen/XRayInstrumentation.cpp +++ b/llvm/lib/CodeGen/XRayInstrumentation.cpp @@ -24,6 +24,7 @@ #include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/IR/Attributes.h" +#include "llvm/IR/DiagnosticInfo.h" #include "llvm/IR/Function.h" #include "llvm/InitializePasses.h" #include "llvm/Pass.h" @@ -175,7 +176,7 @@ bool XRayInstrumentation::runOnMachineFunction(MachineFunction &MF) { auto *MDT = MDTWrapper ? &MDTWrapper->getDomTree() : nullptr; MachineDominatorTree ComputedMDT; if (!MDT) { - ComputedMDT.getBase().recalculate(MF); + ComputedMDT.recalculate(MF); MDT = &ComputedMDT; } @@ -184,7 +185,7 @@ bool XRayInstrumentation::runOnMachineFunction(MachineFunction &MF) { auto *MLI = MLIWrapper ? &MLIWrapper->getLI() : nullptr; MachineLoopInfo ComputedMLI; if (!MLI) { - ComputedMLI.analyze(MDT->getBase()); + ComputedMLI.analyze(*MDT); MLI = &ComputedMLI; } @@ -211,8 +212,12 @@ bool XRayInstrumentation::runOnMachineFunction(MachineFunction &MF) { auto &FirstMI = *FirstMBB.begin(); if (!MF.getSubtarget().isXRaySupported()) { - FirstMI.emitError("An attempt to perform XRay instrumentation for an" - " unsupported target."); + + const Function &Fn = FirstMBB.getParent()->getFunction(); + Fn.getContext().diagnose(DiagnosticInfoUnsupported( + Fn, "An attempt to perform XRay instrumentation for an" + " unsupported target.")); + return false; } @@ -233,10 +238,13 @@ bool XRayInstrumentation::runOnMachineFunction(MachineFunction &MF) { case Triple::ArchType::mips: case Triple::ArchType::mipsel: case Triple::ArchType::mips64: - case Triple::ArchType::mips64el: { + case Triple::ArchType::mips64el: + case Triple::ArchType::riscv32: + case Triple::ArchType::riscv64: { // For the architectures which don't have a single return instruction InstrumentationOptions op; - op.HandleTailcall = false; + // RISC-V supports patching tail calls. + op.HandleTailcall = MF.getTarget().getTargetTriple().isRISCV(); op.HandleAllReturns = true; prependRetWithPatchableExit(MF, TII, op); break; |
