diff options
| author | Oliver Hunt <oliver@apple.com> | 2025-10-20 01:38:07 -0700 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2025-10-20 01:38:07 -0700 |
| commit | 7de01aa5d0418bd4e8db2917f831e7383c6863bb (patch) | |
| tree | 1db866f57c2236573cd4b4c2d141d6d420f87a92 /llvm/lib/CodeGen/GlobalISel | |
| parent | 6bc540043d4c3fed8f44c8f6de86be0d1740582e (diff) | |
| parent | 46a866ab7735aaa0f89fde209d516271c4825c49 (diff) | |
Merge branch 'main' into users/ojhunt/ptrauth-additionsusers/ojhunt/ptrauth-additions
Diffstat (limited to 'llvm/lib/CodeGen/GlobalISel')
| -rw-r--r-- | llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp | 70 | ||||
| -rw-r--r-- | llvm/lib/CodeGen/GlobalISel/GISelValueTracking.cpp | 93 | ||||
| -rw-r--r-- | llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp | 7 | ||||
| -rw-r--r-- | llvm/lib/CodeGen/GlobalISel/LegalizeMutations.cpp | 6 | ||||
| -rw-r--r-- | llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp | 69 | ||||
| -rw-r--r-- | llvm/lib/CodeGen/GlobalISel/LegalizerInfo.cpp | 2 | ||||
| -rw-r--r-- | llvm/lib/CodeGen/GlobalISel/LoadStoreOpt.cpp | 3 | ||||
| -rw-r--r-- | llvm/lib/CodeGen/GlobalISel/Utils.cpp | 3 |
8 files changed, 239 insertions, 14 deletions
diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp index 0ebee2cfd868..b425b952bfc1 100644 --- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp @@ -1215,7 +1215,7 @@ bool CombinerHelper::isIndexedLoadStoreLegal(GLoadStore &LdSt) const { LLT MemTy = LdSt.getMMO().getMemoryType(); SmallVector<LegalityQuery::MemDesc, 2> MemDescrs( {{MemTy, MemTy.getSizeInBits().getKnownMinValue(), - AtomicOrdering::NotAtomic}}); + AtomicOrdering::NotAtomic, AtomicOrdering::NotAtomic}}); unsigned IndexedOpc = getIndexedOpc(LdSt.getOpcode()); SmallVector<LLT> OpTys; if (IndexedOpc == TargetOpcode::G_INDEXED_STORE) @@ -1728,6 +1728,7 @@ static APFloat constantFoldFpUnary(const MachineInstr &MI, Result.clearSign(); return Result; } + case TargetOpcode::G_FPEXT: case TargetOpcode::G_FPTRUNC: { bool Unused; LLT DstTy = MRI.getType(MI.getOperand(0).getReg()); @@ -6745,6 +6746,73 @@ bool CombinerHelper::matchCombineFMinMaxNaN(MachineInstr &MI, return MatchNaN(1) || MatchNaN(2); } +// Combine multiple FDIVs with the same divisor into multiple FMULs by the +// reciprocal. +// E.g., (a / Y; b / Y;) -> (recip = 1.0 / Y; a * recip; b * recip) +bool CombinerHelper::matchRepeatedFPDivisor( + MachineInstr &MI, SmallVector<MachineInstr *> &MatchInfo) const { + assert(MI.getOpcode() == TargetOpcode::G_FDIV); + + Register X = MI.getOperand(1).getReg(); + Register Y = MI.getOperand(2).getReg(); + + if (!MI.getFlag(MachineInstr::MIFlag::FmArcp)) + return false; + + // Skip if current node is a reciprocal/fneg-reciprocal. + auto N0CFP = isConstantOrConstantSplatVectorFP(*MRI.getVRegDef(X), MRI); + if (N0CFP && (N0CFP->isExactlyValue(1.0) || N0CFP->isExactlyValue(-1.0))) + return false; + + // Exit early if the target does not want this transform or if there can't + // possibly be enough uses of the divisor to make the transform worthwhile. + unsigned MinUses = getTargetLowering().combineRepeatedFPDivisors(); + if (!MinUses) + return false; + + // Find all FDIV users of the same divisor. For the moment we limit all + // instructions to a single BB and use the first Instr in MatchInfo as the + // dominating position. + MatchInfo.push_back(&MI); + for (auto &U : MRI.use_nodbg_instructions(Y)) { + if (&U == &MI || U.getParent() != MI.getParent()) + continue; + if (U.getOpcode() == TargetOpcode::G_FDIV && + U.getOperand(2).getReg() == Y && U.getOperand(1).getReg() != Y) { + // This division is eligible for optimization only if global unsafe math + // is enabled or if this division allows reciprocal formation. + if (U.getFlag(MachineInstr::MIFlag::FmArcp)) { + MatchInfo.push_back(&U); + if (dominates(U, *MatchInfo[0])) + std::swap(MatchInfo[0], MatchInfo.back()); + } + } + } + + // Now that we have the actual number of divisor uses, make sure it meets + // the minimum threshold specified by the target. + return MatchInfo.size() >= MinUses; +} + +void CombinerHelper::applyRepeatedFPDivisor( + SmallVector<MachineInstr *> &MatchInfo) const { + // Generate the new div at the position of the first instruction, that we have + // ensured will dominate all other instructions. + Builder.setInsertPt(*MatchInfo[0]->getParent(), MatchInfo[0]); + LLT Ty = MRI.getType(MatchInfo[0]->getOperand(0).getReg()); + auto Div = Builder.buildFDiv(Ty, Builder.buildFConstant(Ty, 1.0), + MatchInfo[0]->getOperand(2).getReg(), + MatchInfo[0]->getFlags()); + + // Replace all found div's with fmul instructions. + for (MachineInstr *MI : MatchInfo) { + Builder.setInsertPt(*MI->getParent(), MI); + Builder.buildFMul(MI->getOperand(0).getReg(), MI->getOperand(1).getReg(), + Div->getOperand(0).getReg(), MI->getFlags()); + MI->eraseFromParent(); + } +} + bool CombinerHelper::matchAddSubSameReg(MachineInstr &MI, Register &Src) const { assert(MI.getOpcode() == TargetOpcode::G_ADD && "Expected a G_ADD"); Register LHS = MI.getOperand(1).getReg(); diff --git a/llvm/lib/CodeGen/GlobalISel/GISelValueTracking.cpp b/llvm/lib/CodeGen/GlobalISel/GISelValueTracking.cpp index 3f6813e52a1c..04d93098a526 100644 --- a/llvm/lib/CodeGen/GlobalISel/GISelValueTracking.cpp +++ b/llvm/lib/CodeGen/GlobalISel/GISelValueTracking.cpp @@ -112,7 +112,7 @@ APInt GISelValueTracking::getKnownOnes(Register R) { return getKnownBits(R).One; } -LLVM_ATTRIBUTE_UNUSED static void +[[maybe_unused]] static void dumpResult(const MachineInstr &MI, const KnownBits &Known, unsigned Depth) { dbgs() << "[" << Depth << "] Compute known bits: " << MI << "[" << Depth << "] Computed for: " << MI << "[" << Depth << "] Known: 0x" @@ -344,6 +344,22 @@ void GISelValueTracking::computeKnownBitsImpl(Register R, KnownBits &Known, Known = KnownBits::mul(Known, Known2); break; } + case TargetOpcode::G_UMULH: { + computeKnownBitsImpl(MI.getOperand(2).getReg(), Known, DemandedElts, + Depth + 1); + computeKnownBitsImpl(MI.getOperand(1).getReg(), Known2, DemandedElts, + Depth + 1); + Known = KnownBits::mulhu(Known, Known2); + break; + } + case TargetOpcode::G_SMULH: { + computeKnownBitsImpl(MI.getOperand(2).getReg(), Known, DemandedElts, + Depth + 1); + computeKnownBitsImpl(MI.getOperand(1).getReg(), Known2, DemandedElts, + Depth + 1); + Known = KnownBits::mulhs(Known, Known2); + break; + } case TargetOpcode::G_SELECT: { computeKnownBitsMin(MI.getOperand(2).getReg(), MI.getOperand(3).getReg(), Known, DemandedElts, Depth + 1); @@ -1959,6 +1975,81 @@ unsigned GISelValueTracking::computeNumSignBits(Register R, break; } + case TargetOpcode::G_SUB: { + Register Src2 = MI.getOperand(2).getReg(); + unsigned Src2NumSignBits = + computeNumSignBits(Src2, DemandedElts, Depth + 1); + if (Src2NumSignBits == 1) + return 1; // Early out. + + // Handle NEG. + Register Src1 = MI.getOperand(1).getReg(); + KnownBits Known1 = getKnownBits(Src1, DemandedElts, Depth); + if (Known1.isZero()) { + KnownBits Known2 = getKnownBits(Src2, DemandedElts, Depth); + // If the input is known to be 0 or 1, the output is 0/-1, which is all + // sign bits set. + if ((Known2.Zero | 1).isAllOnes()) + return TyBits; + + // If the input is known to be positive (the sign bit is known clear), + // the output of the NEG has, at worst, the same number of sign bits as + // the input. + if (Known2.isNonNegative()) { + FirstAnswer = Src2NumSignBits; + break; + } + + // Otherwise, we treat this like a SUB. + } + + unsigned Src1NumSignBits = + computeNumSignBits(Src1, DemandedElts, Depth + 1); + if (Src1NumSignBits == 1) + return 1; // Early Out. + + // Sub can have at most one carry bit. Thus we know that the output + // is, at worst, one more bit than the inputs. + FirstAnswer = std::min(Src1NumSignBits, Src2NumSignBits) - 1; + break; + } + case TargetOpcode::G_ADD: { + Register Src2 = MI.getOperand(2).getReg(); + unsigned Src2NumSignBits = + computeNumSignBits(Src2, DemandedElts, Depth + 1); + if (Src2NumSignBits <= 2) + return 1; // Early out. + + Register Src1 = MI.getOperand(1).getReg(); + unsigned Src1NumSignBits = + computeNumSignBits(Src1, DemandedElts, Depth + 1); + if (Src1NumSignBits == 1) + return 1; // Early Out. + + // Special case decrementing a value (ADD X, -1): + KnownBits Known2 = getKnownBits(Src2, DemandedElts, Depth); + if (Known2.isAllOnes()) { + KnownBits Known1 = getKnownBits(Src1, DemandedElts, Depth); + // If the input is known to be 0 or 1, the output is 0/-1, which is all + // sign bits set. + if ((Known1.Zero | 1).isAllOnes()) + return TyBits; + + // If we are subtracting one from a positive number, there is no carry + // out of the result. + if (Known1.isNonNegative()) { + FirstAnswer = Src1NumSignBits; + break; + } + + // Otherwise, we treat this like an ADD. + } + + // Add can have at most one carry bit. Thus we know that the output + // is, at worst, one more bit than the inputs. + FirstAnswer = std::min(Src1NumSignBits, Src2NumSignBits) - 1; + break; + } case TargetOpcode::G_FCMP: case TargetOpcode::G_ICMP: { bool IsFP = Opcode == TargetOpcode::G_FCMP; diff --git a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp index 56e13f075aaa..884c3f1692e9 100644 --- a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp +++ b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp @@ -2362,6 +2362,13 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID, MachineInstr::copyFlagsFromInstruction(CI)); return true; } + case Intrinsic::modf: { + ArrayRef<Register> VRegs = getOrCreateVRegs(CI); + MIRBuilder.buildModf(VRegs[0], VRegs[1], + getOrCreateVReg(*CI.getArgOperand(0)), + MachineInstr::copyFlagsFromInstruction(CI)); + return true; + } case Intrinsic::sincos: { ArrayRef<Register> VRegs = getOrCreateVRegs(CI); MIRBuilder.buildFSincos(VRegs[0], VRegs[1], diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizeMutations.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizeMutations.cpp index 25c1db91b05d..ded4df4edc14 100644 --- a/llvm/lib/CodeGen/GlobalISel/LegalizeMutations.cpp +++ b/llvm/lib/CodeGen/GlobalISel/LegalizeMutations.cpp @@ -55,12 +55,10 @@ LegalizeMutation LegalizeMutations::changeElementCountTo(unsigned TypeIdx, } LegalizeMutation LegalizeMutations::changeElementCountTo(unsigned TypeIdx, - LLT NewEltTy) { + ElementCount EC) { return [=](const LegalityQuery &Query) { const LLT OldTy = Query.Types[TypeIdx]; - ElementCount NewEltCount = NewEltTy.isVector() ? NewEltTy.getElementCount() - : ElementCount::getFixed(1); - return std::make_pair(TypeIdx, OldTy.changeElementCount(NewEltCount)); + return std::make_pair(TypeIdx, OldTy.changeElementCount(EC)); }; } diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp index 03dfa6f3f243..38ec83f81f47 100644 --- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -471,6 +471,8 @@ static RTLIB::Libcall getRTLibDesc(unsigned Opcode, unsigned Size) { RTLIBCASE(TANH_F); case TargetOpcode::G_FSINCOS: RTLIBCASE(SINCOS_F); + case TargetOpcode::G_FMODF: + RTLIBCASE(MODF_F); case TargetOpcode::G_FLOG10: RTLIBCASE(LOG10_F); case TargetOpcode::G_FLOG: @@ -703,6 +705,46 @@ LegalizerHelper::LegalizeResult LegalizerHelper::emitSincosLibcall( } LegalizerHelper::LegalizeResult +LegalizerHelper::emitModfLibcall(MachineInstr &MI, MachineIRBuilder &MIRBuilder, + unsigned Size, Type *OpType, + LostDebugLocObserver &LocObserver) { + MachineFunction &MF = MIRBuilder.getMF(); + MachineRegisterInfo &MRI = MF.getRegInfo(); + + Register DstFrac = MI.getOperand(0).getReg(); + Register DstInt = MI.getOperand(1).getReg(); + Register Src = MI.getOperand(2).getReg(); + LLT DstTy = MRI.getType(DstFrac); + + int MemSize = DstTy.getSizeInBytes(); + Align Alignment = getStackTemporaryAlignment(DstTy); + const DataLayout &DL = MIRBuilder.getDataLayout(); + unsigned AddrSpace = DL.getAllocaAddrSpace(); + MachinePointerInfo PtrInfo; + + Register StackPtrInt = + createStackTemporary(TypeSize::getFixed(MemSize), Alignment, PtrInfo) + .getReg(0); + + auto &Ctx = MF.getFunction().getContext(); + auto LibcallResult = createLibcall( + MIRBuilder, getRTLibDesc(MI.getOpcode(), Size), {DstFrac, OpType, 0}, + {{Src, OpType, 0}, {StackPtrInt, PointerType::get(Ctx, AddrSpace), 1}}, + LocObserver, &MI); + + if (LibcallResult != LegalizeResult::Legalized) + return LegalizerHelper::UnableToLegalize; + + MachineMemOperand *LoadMMOInt = MF.getMachineMemOperand( + PtrInfo, MachineMemOperand::MOLoad, MemSize, Alignment); + + MIRBuilder.buildLoad(DstInt, StackPtrInt, *LoadMMOInt); + MI.eraseFromParent(); + + return LegalizerHelper::Legalized; +} + +LegalizerHelper::LegalizeResult llvm::createMemLibcall(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI, MachineInstr &MI, LostDebugLocObserver &LocObserver) { auto &Ctx = MIRBuilder.getMF().getFunction().getContext(); @@ -1341,6 +1383,16 @@ LegalizerHelper::libcall(MachineInstr &MI, LostDebugLocObserver &LocObserver) { } return emitSincosLibcall(MI, MIRBuilder, Size, HLTy, LocObserver); } + case TargetOpcode::G_FMODF: { + LLT LLTy = MRI.getType(MI.getOperand(0).getReg()); + unsigned Size = LLTy.getSizeInBits(); + Type *HLTy = getFloatTypeForLLT(Ctx, LLTy); + if (!HLTy || (Size != 32 && Size != 64 && Size != 80 && Size != 128)) { + LLVM_DEBUG(dbgs() << "No libcall available for type " << LLTy << ".\n"); + return UnableToLegalize; + } + return emitModfLibcall(MI, MIRBuilder, Size, HLTy, LocObserver); + } case TargetOpcode::G_LROUND: case TargetOpcode::G_LLROUND: case TargetOpcode::G_INTRINSIC_LRINT: @@ -3240,8 +3292,7 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) { if (TypeIdx != 2) return UnableToLegalize; Observer.changingInstr(MI); - // TODO: Probably should be zext - widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT); + widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT); Observer.changedInstr(MI); return Legalized; } @@ -3273,8 +3324,7 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) { if (TypeIdx == 2) { Observer.changingInstr(MI); - // TODO: Probably should be zext - widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_SEXT); + widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ZEXT); Observer.changedInstr(MI); return Legalized; } @@ -3333,6 +3383,16 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) { widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC); Observer.changedInstr(MI); return Legalized; + case TargetOpcode::G_FMODF: { + Observer.changingInstr(MI); + widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_FPEXT); + + widenScalarDst(MI, WideTy, 1, TargetOpcode::G_FPTRUNC); + MIRBuilder.setInsertPt(MIRBuilder.getMBB(), --MIRBuilder.getInsertPt()); + widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC); + Observer.changedInstr(MI); + return Legalized; + } case TargetOpcode::G_FPOWI: case TargetOpcode::G_FLDEXP: case TargetOpcode::G_STRICT_FLDEXP: { @@ -5472,6 +5532,7 @@ LegalizerHelper::fewerElementsVector(MachineInstr &MI, unsigned TypeIdx, case G_LROUND: case G_LLROUND: case G_INTRINSIC_TRUNC: + case G_FMODF: case G_FCOS: case G_FSIN: case G_FTAN: diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerInfo.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerInfo.cpp index 477e5c1559b2..c2d474fdde69 100644 --- a/llvm/lib/CodeGen/GlobalISel/LegalizerInfo.cpp +++ b/llvm/lib/CodeGen/GlobalISel/LegalizerInfo.cpp @@ -34,7 +34,7 @@ cl::opt<bool> llvm::DisableGISelLegalityCheck( cl::desc("Don't verify that MIR is fully legal between GlobalISel passes"), cl::Hidden); -cl::opt<bool> VerboseVerifyLegalizerInfo( +static cl::opt<bool> VerboseVerifyLegalizerInfo( "verbose-gisel-verify-legalizer-info", cl::desc("Print more information to dbgs about GlobalISel legalizer rules " "being verified"), diff --git a/llvm/lib/CodeGen/GlobalISel/LoadStoreOpt.cpp b/llvm/lib/CodeGen/GlobalISel/LoadStoreOpt.cpp index b2f84351c9a9..cdc1f64dc439 100644 --- a/llvm/lib/CodeGen/GlobalISel/LoadStoreOpt.cpp +++ b/llvm/lib/CodeGen/GlobalISel/LoadStoreOpt.cpp @@ -958,7 +958,8 @@ void LoadStoreOpt::initializeStoreMergeTargetInfo(unsigned AddrSpace) { for (unsigned Size = 2; Size <= MaxStoreSizeToForm; Size *= 2) { LLT Ty = LLT::scalar(Size); SmallVector<LegalityQuery::MemDesc, 2> MemDescrs( - {{Ty, Ty.getSizeInBits(), AtomicOrdering::NotAtomic}}); + {{Ty, Ty.getSizeInBits(), AtomicOrdering::NotAtomic, + AtomicOrdering::NotAtomic}}); SmallVector<LLT> StoreTys({Ty, PtrTy}); LegalityQuery Q(TargetOpcode::G_STORE, StoreTys, MemDescrs); LegalizeActionStep ActionStep = LI.getAction(Q); diff --git a/llvm/lib/CodeGen/GlobalISel/Utils.cpp b/llvm/lib/CodeGen/GlobalISel/Utils.cpp index 055fdc6ad721..ca82857319ab 100644 --- a/llvm/lib/CodeGen/GlobalISel/Utils.cpp +++ b/llvm/lib/CodeGen/GlobalISel/Utils.cpp @@ -818,8 +818,7 @@ bool llvm::isKnownNeverNaN(Register Val, const MachineRegisterInfo &MRI, if (!DefMI) return false; - const TargetMachine& TM = DefMI->getMF()->getTarget(); - if (DefMI->getFlag(MachineInstr::FmNoNans) || TM.Options.NoNaNsFPMath) + if (DefMI->getFlag(MachineInstr::FmNoNans)) return true; // If the value is a constant, we can obviously see if it is a NaN or not. |
