diff options
Diffstat (limited to 'llvm/lib/Target/AArch64/AArch64FrameLowering.cpp')
| -rw-r--r-- | llvm/lib/Target/AArch64/AArch64FrameLowering.cpp | 926 |
1 files changed, 87 insertions, 839 deletions
diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp index 7725fa4f1ccb..175b5e04d82f 100644 --- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp @@ -211,6 +211,7 @@ #include "AArch64FrameLowering.h" #include "AArch64InstrInfo.h" #include "AArch64MachineFunctionInfo.h" +#include "AArch64PrologueEpilogue.h" #include "AArch64RegisterInfo.h" #include "AArch64Subtarget.h" #include "MCTargetDesc/AArch64AddressingModes.h" @@ -218,7 +219,6 @@ #include "Utils/AArch64SMEAttributes.h" #include "llvm/ADT/ScopeExit.h" #include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/Statistic.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/CodeGen/CFIInstBuilder.h" #include "llvm/CodeGen/LivePhysRegs.h" @@ -293,8 +293,6 @@ static cl::opt<bool> DisableMultiVectorSpillFill( cl::desc("Disable use of LD/ST pairs for SME2 or SVE2p1"), cl::init(false), cl::Hidden); -STATISTIC(NumRedZoneFunctions, "Number of functions using red zone"); - /// Returns how much of the incoming argument stack area (in bytes) we should /// clean up in an epilogue. For the C calling convention this will be 0, for /// guaranteed tail call conventions it can be positive (a normal return or a @@ -328,23 +326,20 @@ static int64_t getArgumentStackToRestore(MachineFunction &MF, return ArgumentPopSize; } -static bool produceCompactUnwindFrame(MachineFunction &MF); -static bool needsWinCFI(const MachineFunction &MF); -static StackOffset getSVEStackSize(const MachineFunction &MF); -static Register findScratchNonCalleeSaveRegister(MachineBasicBlock *MBB, - bool HasCall = false); -static bool requiresSaveVG(const MachineFunction &MF); +static bool produceCompactUnwindFrame(const AArch64FrameLowering &, + MachineFunction &MF); // Conservatively, returns true if the function is likely to have an SVE vectors // on the stack. This function is safe to be called before callee-saves or // object offsets have been determined. -static bool isLikelyToHaveSVEStack(const MachineFunction &MF) { +static bool isLikelyToHaveSVEStack(const AArch64FrameLowering &AFL, + const MachineFunction &MF) { auto *AFI = MF.getInfo<AArch64FunctionInfo>(); if (AFI->isSVECC()) return true; if (AFI->hasCalculatedStackSizeSVE()) - return bool(getSVEStackSize(MF)); + return bool(AFL.getSVEStackSize(MF)); const MachineFrameInfo &MFI = MF.getFrameInfo(); for (int FI = MFI.getObjectIndexBegin(); FI < MFI.getObjectIndexEnd(); FI++) { @@ -372,7 +367,7 @@ bool AArch64FrameLowering::homogeneousPrologEpilog( return false; // TODO: SVE is not supported yet. - if (isLikelyToHaveSVEStack(MF)) + if (isLikelyToHaveSVEStack(*this, MF)) return false; // Bail on stack adjustment needed on return for simplicity. @@ -409,7 +404,7 @@ bool AArch64FrameLowering::homogeneousPrologEpilog( /// Returns true if CSRs should be paired. bool AArch64FrameLowering::producePairRegisters(MachineFunction &MF) const { - return produceCompactUnwindFrame(MF) || homogeneousPrologEpilog(MF); + return produceCompactUnwindFrame(*this, MF) || homogeneousPrologEpilog(MF); } /// This is the biggest offset to the stack pointer we can encode in aarch64 @@ -451,11 +446,10 @@ AArch64FrameLowering::getStackIDForScalableVectors() const { return TargetStackID::ScalableVector; } -/// Returns the size of the fixed object area (allocated next to sp on entry) -/// On Win64 this may include a var args area and an UnwindHelp object for EH. -static unsigned getFixedObjectSize(const MachineFunction &MF, - const AArch64FunctionInfo *AFI, bool IsWin64, - bool IsFunclet) { +unsigned +AArch64FrameLowering::getFixedObjectSize(const MachineFunction &MF, + const AArch64FunctionInfo *AFI, + bool IsWin64, bool IsFunclet) const { assert(AFI->getTailCallReservedStack() % 16 == 0 && "Tail call reserved stack must be aligned to 16 bytes"); if (!IsWin64 || IsFunclet) { @@ -494,7 +488,8 @@ static unsigned getFixedObjectSize(const MachineFunction &MF, } /// Returns the size of the entire SVE stackframe (calleesaves + spills). -static StackOffset getSVEStackSize(const MachineFunction &MF) { +StackOffset +AArch64FrameLowering::getSVEStackSize(const MachineFunction &MF) const { const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>(); return StackOffset::getScalable((int64_t)AFI->getStackSizeSVE()); } @@ -683,70 +678,6 @@ MachineBasicBlock::iterator AArch64FrameLowering::eliminateCallFramePseudoInstr( return MBB.erase(I); } -void AArch64FrameLowering::emitCalleeSavedGPRLocations( - MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI) const { - MachineFunction &MF = *MBB.getParent(); - MachineFrameInfo &MFI = MF.getFrameInfo(); - - const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo(); - if (CSI.empty()) - return; - - CFIInstBuilder CFIBuilder(MBB, MBBI, MachineInstr::FrameSetup); - for (const auto &Info : CSI) { - unsigned FrameIdx = Info.getFrameIdx(); - if (MFI.getStackID(FrameIdx) == TargetStackID::ScalableVector) - continue; - - assert(!Info.isSpilledToReg() && "Spilling to registers not implemented"); - int64_t Offset = MFI.getObjectOffset(FrameIdx) - getOffsetOfLocalArea(); - CFIBuilder.buildOffset(Info.getReg(), Offset); - } -} - -void AArch64FrameLowering::emitCalleeSavedSVELocations( - MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI) const { - MachineFunction &MF = *MBB.getParent(); - MachineFrameInfo &MFI = MF.getFrameInfo(); - - // Add callee saved registers to move list. - const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo(); - if (CSI.empty()) - return; - - const TargetSubtargetInfo &STI = MF.getSubtarget(); - const TargetRegisterInfo &TRI = *STI.getRegisterInfo(); - AArch64FunctionInfo &AFI = *MF.getInfo<AArch64FunctionInfo>(); - CFIInstBuilder CFIBuilder(MBB, MBBI, MachineInstr::FrameSetup); - - std::optional<int64_t> IncomingVGOffsetFromDefCFA; - if (requiresSaveVG(MF)) { - auto IncomingVG = *find_if( - reverse(CSI), [](auto &Info) { return Info.getReg() == AArch64::VG; }); - IncomingVGOffsetFromDefCFA = - MFI.getObjectOffset(IncomingVG.getFrameIdx()) - getOffsetOfLocalArea(); - } - - for (const auto &Info : CSI) { - if (MFI.getStackID(Info.getFrameIdx()) != TargetStackID::ScalableVector) - continue; - - // Not all unwinders may know about SVE registers, so assume the lowest - // common denominator. - assert(!Info.isSpilledToReg() && "Spilling to registers not implemented"); - MCRegister Reg = Info.getReg(); - if (!static_cast<const AArch64RegisterInfo &>(TRI).regNeedsCFI(Reg, Reg)) - continue; - - StackOffset Offset = - StackOffset::getScalable(MFI.getObjectOffset(Info.getFrameIdx())) - - StackOffset::getFixed(AFI.getCalleeSavedStackSize(MFI)); - - CFIBuilder.insertCFIInst( - createCFAOffset(TRI, Reg, Offset, IncomingVGOffsetFromDefCFA)); - } -} - void AArch64FrameLowering::resetCFIToInitialState( MachineBasicBlock &MBB) const { @@ -1088,8 +1019,8 @@ void AArch64FrameLowering::emitZeroCallUsedRegs(BitVector RegsToZero, } } -static bool windowsRequiresStackProbe(const MachineFunction &MF, - uint64_t StackSizeInBytes) { +bool AArch64FrameLowering::windowsRequiresStackProbe( + const MachineFunction &MF, uint64_t StackSizeInBytes) const { const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>(); const AArch64FunctionInfo &MFI = *MF.getInfo<AArch64FunctionInfo>(); // TODO: When implementing stack protectors, take that into account @@ -1108,19 +1039,9 @@ static void getLiveRegsForEntryMBB(LivePhysRegs &LiveRegs, LiveRegs.addReg(CSRegs[i]); } -// Find a scratch register that we can use at the start of the prologue to -// re-align the stack pointer. We avoid using callee-save registers since they -// may appear to be free when this is called from canUseAsPrologue (during -// shrink wrapping), but then no longer be free when this is called from -// emitPrologue. -// -// FIXME: This is a bit conservative, since in the above case we could use one -// of the callee-save registers as a scratch temp to re-align the stack pointer, -// but we would then have to make sure that we were in fact saving at least one -// callee-save register in the prologue, which is additional complexity that -// doesn't seem worth the benefit. -static Register findScratchNonCalleeSaveRegister(MachineBasicBlock *MBB, - bool HasCall) { +Register +AArch64FrameLowering::findScratchNonCalleeSaveRegister(MachineBasicBlock *MBB, + bool HasCall) const { MachineFunction *MF = MBB->getParent(); // If MBB is an entry block, use X9 as the scratch register @@ -1193,13 +1114,14 @@ bool AArch64FrameLowering::canUseAsPrologue( return true; } -static bool needsWinCFI(const MachineFunction &MF) { +bool AArch64FrameLowering::needsWinCFI(const MachineFunction &MF) const { const Function &F = MF.getFunction(); return MF.getTarget().getMCAsmInfo()->usesWindowsCFI() && F.needsUnwindTableEntry(); } -static bool shouldSignReturnAddressEverywhere(const MachineFunction &MF) { +bool AArch64FrameLowering::shouldSignReturnAddressEverywhere( + const MachineFunction &MF) const { // FIXME: With WinCFI, extra care should be taken to place SEH_PACSignLR // and SEH_EpilogEnd instructions in the correct order. if (MF.getTarget().getMCAsmInfo()->usesWindowsCFI()) @@ -1475,13 +1397,13 @@ static void fixupSEHOpcode(MachineBasicBlock::iterator MBBI, ImmOpnd->setImm(ImmOpnd->getImm() + LocalStackSize); } -bool requiresGetVGCall(MachineFunction &MF) { - AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>(); +bool AArch64FrameLowering::requiresGetVGCall(const MachineFunction &MF) const { + auto *AFI = MF.getInfo<AArch64FunctionInfo>(); return AFI->hasStreamingModeChanges() && !MF.getSubtarget<AArch64Subtarget>().hasSVE(); } -static bool requiresSaveVG(const MachineFunction &MF) { +bool AArch64FrameLowering::requiresSaveVG(const MachineFunction &MF) const { const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>(); if (!AFI->needsDwarfUnwindInfo(MF) || !AFI->hasStreamingModeChanges()) return false; @@ -1499,8 +1421,8 @@ static bool matchLibcall(const TargetLowering &TLI, const MachineOperand &MO, StringRef(TLI.getLibcallName(LC)) == MO.getSymbolName(); } -bool isVGInstruction(MachineBasicBlock::iterator MBBI, - const TargetLowering &TLI) { +bool AArch64FrameLowering::isVGInstruction(MachineBasicBlock::iterator MBBI, + const TargetLowering &TLI) const { unsigned Opc = MBBI->getOpcode(); if (Opc == AArch64::CNTD_XPiI) return true; @@ -1514,15 +1436,12 @@ bool isVGInstruction(MachineBasicBlock::iterator MBBI, return Opc == TargetOpcode::COPY; } -// Convert callee-save register save/restore instruction to do stack pointer -// decrement/increment to allocate/deallocate the callee-save stack area by -// converting store/load to use pre/post increment version. -static MachineBasicBlock::iterator convertCalleeSaveRestoreToSPPrePostIncDec( +MachineBasicBlock::iterator +AArch64FrameLowering::convertCalleeSaveRestoreToSPPrePostIncDec( MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, const TargetInstrInfo *TII, int CSStackSizeInc, bool NeedsWinCFI, bool *HasWinCFI, bool EmitCFI, - MachineInstr::MIFlag FrameFlag = MachineInstr::FrameSetup, - int CFAOffset = 0) { + MachineInstr::MIFlag FrameFlag, int CFAOffset) const { unsigned NewOpc; // If the function contains streaming mode changes, we expect instructions @@ -1643,12 +1562,9 @@ static MachineBasicBlock::iterator convertCalleeSaveRestoreToSPPrePostIncDec( return std::prev(MBB.erase(MBBI)); } -// Fixup callee-save register save/restore instructions to take into account -// combined SP bump by adding the local stack size to the stack offsets. -static void fixupCalleeSaveRestoreStackOffset(MachineInstr &MI, - uint64_t LocalStackSize, - bool NeedsWinCFI, - bool *HasWinCFI) { +void AArch64FrameLowering::fixupCalleeSaveRestoreStackOffset( + MachineInstr &MI, uint64_t LocalStackSize, bool NeedsWinCFI, + bool *HasWinCFI) const { if (AArch64InstrInfo::isSEHInstruction(MI)) return; @@ -1703,7 +1619,8 @@ static unsigned getStackHazardSize(const MachineFunction &MF) { } // Convenience function to determine whether I is an SVE callee save. -static bool IsSVECalleeSave(MachineBasicBlock::iterator I) { +bool AArch64FrameLowering::isSVECalleeSave( + MachineBasicBlock::iterator I) const { switch (I->getOpcode()) { default: return false; @@ -1725,42 +1642,6 @@ static bool IsSVECalleeSave(MachineBasicBlock::iterator I) { } } -static void emitShadowCallStackPrologue(const TargetInstrInfo &TII, - MachineFunction &MF, - MachineBasicBlock &MBB, - MachineBasicBlock::iterator MBBI, - const DebugLoc &DL, bool NeedsWinCFI, - bool NeedsUnwindInfo) { - // Shadow call stack prolog: str x30, [x18], #8 - BuildMI(MBB, MBBI, DL, TII.get(AArch64::STRXpost)) - .addReg(AArch64::X18, RegState::Define) - .addReg(AArch64::LR) - .addReg(AArch64::X18) - .addImm(8) - .setMIFlag(MachineInstr::FrameSetup); - - // This instruction also makes x18 live-in to the entry block. - MBB.addLiveIn(AArch64::X18); - - if (NeedsWinCFI) - BuildMI(MBB, MBBI, DL, TII.get(AArch64::SEH_Nop)) - .setMIFlag(MachineInstr::FrameSetup); - - if (NeedsUnwindInfo) { - // Emit a CFI instruction that causes 8 to be subtracted from the value of - // x18 when unwinding past this frame. - static const char CFIInst[] = { - dwarf::DW_CFA_val_expression, - 18, // register - 2, // length - static_cast<char>(unsigned(dwarf::DW_OP_breg18)), - static_cast<char>(-8) & 0x7f, // addend (sleb128) - }; - CFIInstBuilder(MBB, MBBI, MachineInstr::FrameSetup) - .buildEscape(StringRef(CFIInst, sizeof(CFIInst))); - } -} - static void emitShadowCallStackEpilogue(const TargetInstrInfo &TII, MachineFunction &MF, MachineBasicBlock &MBB, @@ -1783,36 +1664,6 @@ static void emitShadowCallStackEpilogue(const TargetInstrInfo &TII, .buildRestore(AArch64::X18); } -// Define the current CFA rule to use the provided FP. -static void emitDefineCFAWithFP(MachineFunction &MF, MachineBasicBlock &MBB, - MachineBasicBlock::iterator MBBI, - unsigned FixedObject) { - const AArch64Subtarget &STI = MF.getSubtarget<AArch64Subtarget>(); - const AArch64RegisterInfo *TRI = STI.getRegisterInfo(); - AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>(); - - const int OffsetToFirstCalleeSaveFromFP = - AFI->getCalleeSaveBaseToFrameRecordOffset() - - AFI->getCalleeSavedStackSize(); - Register FramePtr = TRI->getFrameRegister(MF); - CFIInstBuilder(MBB, MBBI, MachineInstr::FrameSetup) - .buildDefCFA(FramePtr, FixedObject - OffsetToFirstCalleeSaveFromFP); -} - -#ifndef NDEBUG -/// Collect live registers from the end of \p MI's parent up to (including) \p -/// MI in \p LiveRegs. -static void getLivePhysRegsUpTo(MachineInstr &MI, const TargetRegisterInfo &TRI, - LivePhysRegs &LiveRegs) { - - MachineBasicBlock &MBB = *MI.getParent(); - LiveRegs.addLiveOuts(MBB); - for (const MachineInstr &MI : - reverse(make_range(MI.getIterator(), MBB.instr_end()))) - LiveRegs.stepBackward(MI); -} -#endif - void AArch64FrameLowering::emitPacRetPlusLeafHardening( MachineFunction &MF) const { const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>(); @@ -1848,616 +1699,8 @@ void AArch64FrameLowering::emitPacRetPlusLeafHardening( void AArch64FrameLowering::emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const { - MachineBasicBlock::iterator MBBI = MBB.begin(); - const MachineFrameInfo &MFI = MF.getFrameInfo(); - const Function &F = MF.getFunction(); - const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>(); - const AArch64RegisterInfo *RegInfo = Subtarget.getRegisterInfo(); - const TargetInstrInfo *TII = Subtarget.getInstrInfo(); - - AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>(); - bool EmitCFI = AFI->needsDwarfUnwindInfo(MF); - bool EmitAsyncCFI = AFI->needsAsyncDwarfUnwindInfo(MF); - bool HasFP = hasFP(MF); - bool NeedsWinCFI = needsWinCFI(MF); - bool HasWinCFI = false; - auto Cleanup = make_scope_exit([&]() { MF.setHasWinCFI(HasWinCFI); }); - - MachineBasicBlock::iterator End = MBB.end(); -#ifndef NDEBUG - const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); - // Collect live register from the end of MBB up to the start of the existing - // frame setup instructions. - MachineBasicBlock::iterator NonFrameStart = MBB.begin(); - while (NonFrameStart != End && - NonFrameStart->getFlag(MachineInstr::FrameSetup)) - ++NonFrameStart; - - LivePhysRegs LiveRegs(*TRI); - if (NonFrameStart != MBB.end()) { - getLivePhysRegsUpTo(*NonFrameStart, *TRI, LiveRegs); - // Ignore registers used for stack management for now. - LiveRegs.removeReg(AArch64::SP); - LiveRegs.removeReg(AArch64::X19); - LiveRegs.removeReg(AArch64::FP); - LiveRegs.removeReg(AArch64::LR); - - // X0 will be clobbered by a call to __arm_get_current_vg in the prologue. - // This is necessary to spill VG if required where SVE is unavailable, but - // X0 is preserved around this call. - if (requiresGetVGCall(MF)) - LiveRegs.removeReg(AArch64::X0); - } - - auto VerifyClobberOnExit = make_scope_exit([&]() { - if (NonFrameStart == MBB.end()) - return; - // Check if any of the newly instructions clobber any of the live registers. - for (MachineInstr &MI : - make_range(MBB.instr_begin(), NonFrameStart->getIterator())) { - for (auto &Op : MI.operands()) - if (Op.isReg() && Op.isDef()) - assert(!LiveRegs.contains(Op.getReg()) && - "live register clobbered by inserted prologue instructions"); - } - }); -#endif - - bool IsFunclet = MBB.isEHFuncletEntry(); - - // At this point, we're going to decide whether or not the function uses a - // redzone. In most cases, the function doesn't have a redzone so let's - // assume that's false and set it to true in the case that there's a redzone. - AFI->setHasRedZone(false); - - // Debug location must be unknown since the first debug location is used - // to determine the end of the prologue. - DebugLoc DL; - - const auto &MFnI = *MF.getInfo<AArch64FunctionInfo>(); - if (MFnI.shouldSignReturnAddress(MF)) { - // If pac-ret+leaf is in effect, PAUTH_PROLOGUE pseudo instructions - // are inserted by emitPacRetPlusLeafHardening(). - if (!shouldSignReturnAddressEverywhere(MF)) { - BuildMI(MBB, MBBI, DL, TII->get(AArch64::PAUTH_PROLOGUE)) - .setMIFlag(MachineInstr::FrameSetup); - } - // AArch64PointerAuth pass will insert SEH_PACSignLR - HasWinCFI |= NeedsWinCFI; - } - - if (MFnI.needsShadowCallStackPrologueEpilogue(MF)) { - emitShadowCallStackPrologue(*TII, MF, MBB, MBBI, DL, NeedsWinCFI, - MFnI.needsDwarfUnwindInfo(MF)); - HasWinCFI |= NeedsWinCFI; - } - - if (EmitCFI && MFnI.isMTETagged()) { - BuildMI(MBB, MBBI, DL, TII->get(AArch64::EMITMTETAGGED)) - .setMIFlag(MachineInstr::FrameSetup); - } - - // We signal the presence of a Swift extended frame to external tools by - // storing FP with 0b0001 in bits 63:60. In normal userland operation a simple - // ORR is sufficient, it is assumed a Swift kernel would initialize the TBI - // bits so that is still true. - if (HasFP && AFI->hasSwiftAsyncContext()) { - switch (MF.getTarget().Options.SwiftAsyncFramePointer) { - case SwiftAsyncFramePointerMode::DeploymentBased: - if (Subtarget.swiftAsyncContextIsDynamicallySet()) { - // The special symbol below is absolute and has a *value* that can be - // combined with the frame pointer to signal an extended frame. - BuildMI(MBB, MBBI, DL, TII->get(AArch64::LOADgot), AArch64::X16) - .addExternalSymbol("swift_async_extendedFramePointerFlags", - AArch64II::MO_GOT); - if (NeedsWinCFI) { - BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop)) - .setMIFlags(MachineInstr::FrameSetup); - HasWinCFI = true; - } - BuildMI(MBB, MBBI, DL, TII->get(AArch64::ORRXrs), AArch64::FP) - .addUse(AArch64::FP) - .addUse(AArch64::X16) - .addImm(Subtarget.isTargetILP32() ? 32 : 0); - if (NeedsWinCFI) { - BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop)) - .setMIFlags(MachineInstr::FrameSetup); - HasWinCFI = true; - } - break; - } - [[fallthrough]]; - - case SwiftAsyncFramePointerMode::Always: - // ORR x29, x29, #0x1000_0000_0000_0000 - BuildMI(MBB, MBBI, DL, TII->get(AArch64::ORRXri), AArch64::FP) - .addUse(AArch64::FP) - .addImm(0x1100) - .setMIFlag(MachineInstr::FrameSetup); - if (NeedsWinCFI) { - BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop)) - .setMIFlags(MachineInstr::FrameSetup); - HasWinCFI = true; - } - break; - - case SwiftAsyncFramePointerMode::Never: - break; - } - } - - // All calls are tail calls in GHC calling conv, and functions have no - // prologue/epilogue. - if (MF.getFunction().getCallingConv() == CallingConv::GHC) - return; - - // Set tagged base pointer to the requested stack slot. - // Ideally it should match SP value after prologue. - std::optional<int> TBPI = AFI->getTaggedBasePointerIndex(); - if (TBPI) - AFI->setTaggedBasePointerOffset(-MFI.getObjectOffset(*TBPI)); - else - AFI->setTaggedBasePointerOffset(MFI.getStackSize()); - - const StackOffset &SVEStackSize = getSVEStackSize(MF); - - // getStackSize() includes all the locals in its size calculation. We don't - // include these locals when computing the stack size of a funclet, as they - // are allocated in the parent's stack frame and accessed via the frame - // pointer from the funclet. We only save the callee saved registers in the - // funclet, which are really the callee saved registers of the parent - // function, including the funclet. - int64_t NumBytes = - IsFunclet ? getWinEHFuncletFrameSize(MF) : MFI.getStackSize(); - if (!AFI->hasStackFrame() && !windowsRequiresStackProbe(MF, NumBytes)) { - assert(!HasFP && "unexpected function without stack frame but with FP"); - assert(!SVEStackSize && - "unexpected function without stack frame but with SVE objects"); - // All of the stack allocation is for locals. - AFI->setLocalStackSize(NumBytes); - if (!NumBytes) { - if (NeedsWinCFI && HasWinCFI) { - BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_PrologEnd)) - .setMIFlag(MachineInstr::FrameSetup); - } - return; - } - // REDZONE: If the stack size is less than 128 bytes, we don't need - // to actually allocate. - if (canUseRedZone(MF)) { - AFI->setHasRedZone(true); - ++NumRedZoneFunctions; - } else { - emitFrameOffset(MBB, MBBI, DL, AArch64::SP, AArch64::SP, - StackOffset::getFixed(-NumBytes), TII, - MachineInstr::FrameSetup, false, NeedsWinCFI, &HasWinCFI); - if (EmitCFI) { - // Label used to tie together the PROLOG_LABEL and the MachineMoves. - MCSymbol *FrameLabel = MF.getContext().createTempSymbol(); - // Encode the stack size of the leaf function. - CFIInstBuilder(MBB, MBBI, MachineInstr::FrameSetup) - .buildDefCFAOffset(NumBytes, FrameLabel); - } - } - - if (NeedsWinCFI) { - HasWinCFI = true; - BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_PrologEnd)) - .setMIFlag(MachineInstr::FrameSetup); - } - - return; - } - - bool IsWin64 = Subtarget.isCallingConvWin64(F.getCallingConv(), F.isVarArg()); - unsigned FixedObject = getFixedObjectSize(MF, AFI, IsWin64, IsFunclet); - - // Windows unwind can't represent the required stack adjustments if we have - // both SVE callee-saves and dynamic stack allocations, and the frame - // pointer is before the SVE spills. The allocation of the frame pointer - // must be the last instruction in the prologue so the unwinder can restore - // the stack pointer correctly. (And there isn't any unwind opcode for - // `addvl sp, x29, -17`.) - // - // Because of this, we do spills in the opposite order on Windows: first SVE, - // then GPRs. The main side-effect of this is that it makes accessing - // parameters passed on the stack more expensive. - // - // We could consider rearranging the spills for simpler cases. - bool FPAfterSVECalleeSaves = - Subtarget.isTargetWindows() && AFI->getSVECalleeSavedStackSize(); - - if (FPAfterSVECalleeSaves && AFI->hasStackHazardSlotIndex()) - reportFatalUsageError("SME hazard padding is not supported on Windows"); - - auto PrologueSaveSize = AFI->getCalleeSavedStackSize() + FixedObject; - // All of the remaining stack allocations are for locals. - AFI->setLocalStackSize(NumBytes - PrologueSaveSize); - bool CombineSPBump = shouldCombineCSRLocalStackBump(MF, NumBytes); - bool HomPrologEpilog = homogeneousPrologEpilog(MF); - if (FPAfterSVECalleeSaves) { - // If we're doing SVE saves first, we need to immediately allocate space - // for fixed objects, then space for the SVE callee saves. - // - // Windows unwind requires that the scalable size is a multiple of 16; - // that's handled when the callee-saved size is computed. - auto SaveSize = - StackOffset::getScalable(AFI->getSVECalleeSavedStackSize()) + - StackOffset::getFixed(FixedObject); - allocateStackSpace(MBB, MBBI, 0, SaveSize, NeedsWinCFI, &HasWinCFI, - /*EmitCFI=*/false, StackOffset{}, - /*FollowupAllocs=*/true); - NumBytes -= FixedObject; - - // Now allocate space for the GPR callee saves. - while (MBBI != End && IsSVECalleeSave(MBBI)) - ++MBBI; - MBBI = convertCalleeSaveRestoreToSPPrePostIncDec( - MBB, MBBI, DL, TII, -AFI->getCalleeSavedStackSize(), NeedsWinCFI, - &HasWinCFI, EmitAsyncCFI); - NumBytes -= AFI->getCalleeSavedStackSize(); - } else if (CombineSPBump) { - assert(!SVEStackSize && "Cannot combine SP bump with SVE"); - emitFrameOffset(MBB, MBBI, DL, AArch64::SP, AArch64::SP, - StackOffset::getFixed(-NumBytes), TII, - MachineInstr::FrameSetup, false, NeedsWinCFI, &HasWinCFI, - EmitAsyncCFI); - NumBytes = 0; - } else if (HomPrologEpilog) { - // Stack has been already adjusted. - NumBytes -= PrologueSaveSize; - } else if (PrologueSaveSize != 0) { - MBBI = convertCalleeSaveRestoreToSPPrePostIncDec( - MBB, MBBI, DL, TII, -PrologueSaveSize, NeedsWinCFI, &HasWinCFI, - EmitAsyncCFI); - NumBytes -= PrologueSaveSize; - } - assert(NumBytes >= 0 && "Negative stack allocation size!?"); - - // Move past the saves of the callee-saved registers, fixing up the offsets - // and pre-inc if we decided to combine the callee-save and local stack - // pointer bump above. - auto &TLI = *MF.getSubtarget().getTargetLowering(); - while (MBBI != End && MBBI->getFlag(MachineInstr::FrameSetup) && - !IsSVECalleeSave(MBBI)) { - if (CombineSPBump && - // Only fix-up frame-setup load/store instructions. - (!requiresSaveVG(MF) || !isVGInstruction(MBBI, TLI))) - fixupCalleeSaveRestoreStackOffset(*MBBI, AFI->getLocalStackSize(), - NeedsWinCFI, &HasWinCFI); - ++MBBI; - } - - // For funclets the FP belongs to the containing function. - if (!IsFunclet && HasFP) { - // Only set up FP if we actually need to. - int64_t FPOffset = AFI->getCalleeSaveBaseToFrameRecordOffset(); - - if (CombineSPBump) - FPOffset += AFI->getLocalStackSize(); - - if (AFI->hasSwiftAsyncContext()) { - // Before we update the live FP we have to ensure there's a valid (or - // null) asynchronous context in its slot just before FP in the frame - // record, so store it now. - const auto &Attrs = MF.getFunction().getAttributes(); - bool HaveInitialContext = Attrs.hasAttrSomewhere(Attribute::SwiftAsync); - if (HaveInitialContext) - MBB.addLiveIn(AArch64::X22); - Register Reg = HaveInitialContext ? AArch64::X22 : AArch64::XZR; - BuildMI(MBB, MBBI, DL, TII->get(AArch64::StoreSwiftAsyncContext)) - .addUse(Reg) - .addUse(AArch64::SP) - .addImm(FPOffset - 8) - .setMIFlags(MachineInstr::FrameSetup); - if (NeedsWinCFI) { - // WinCFI and arm64e, where StoreSwiftAsyncContext is expanded - // to multiple instructions, should be mutually-exclusive. - assert(Subtarget.getTargetTriple().getArchName() != "arm64e"); - BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop)) - .setMIFlags(MachineInstr::FrameSetup); - HasWinCFI = true; - } - } - - if (HomPrologEpilog) { - auto Prolog = MBBI; - --Prolog; - assert(Prolog->getOpcode() == AArch64::HOM_Prolog); - Prolog->addOperand(MachineOperand::CreateImm(FPOffset)); - } else { - // Issue sub fp, sp, FPOffset or - // mov fp,sp when FPOffset is zero. - // Note: All stores of callee-saved registers are marked as "FrameSetup". - // This code marks the instruction(s) that set the FP also. - emitFrameOffset(MBB, MBBI, DL, AArch64::FP, AArch64::SP, - StackOffset::getFixed(FPOffset), TII, - MachineInstr::FrameSetup, false, NeedsWinCFI, &HasWinCFI); - if (NeedsWinCFI && HasWinCFI) { - BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_PrologEnd)) - .setMIFlag(MachineInstr::FrameSetup); - // After setting up the FP, the rest of the prolog doesn't need to be - // included in the SEH unwind info. - NeedsWinCFI = false; - } - } - if (EmitAsyncCFI) - emitDefineCFAWithFP(MF, MBB, MBBI, FixedObject); - } - - // Now emit the moves for whatever callee saved regs we have (including FP, - // LR if those are saved). Frame instructions for SVE register are emitted - // later, after the instruction which actually save SVE regs. - if (EmitAsyncCFI) - emitCalleeSavedGPRLocations(MBB, MBBI); - - // Alignment is required for the parent frame, not the funclet - const bool NeedsRealignment = - NumBytes && !IsFunclet && RegInfo->hasStackRealignment(MF); - const int64_t RealignmentPadding = - (NeedsRealignment && MFI.getMaxAlign() > Align(16)) - ? MFI.getMaxAlign().value() - 16 - : 0; - - if (windowsRequiresStackProbe(MF, NumBytes + RealignmentPadding)) { - if (AFI->getSVECalleeSavedStackSize()) - report_fatal_error( - "SVE callee saves not yet supported with stack probing"); - - // Find an available register to spill the value of X15 to, if X15 is being - // used already for nest. - unsigned X15Scratch = AArch64::NoRegister; - const AArch64Subtarget &STI = MF.getSubtarget<AArch64Subtarget>(); - if (llvm::any_of(MBB.liveins(), - [&STI](const MachineBasicBlock::RegisterMaskPair &LiveIn) { - return STI.getRegisterInfo()->isSuperOrSubRegisterEq( - AArch64::X15, LiveIn.PhysReg); - })) { - X15Scratch = findScratchNonCalleeSaveRegister(&MBB, true); - assert(X15Scratch != AArch64::NoRegister && - (X15Scratch < AArch64::X15 || X15Scratch > AArch64::X17)); -#ifndef NDEBUG - LiveRegs.removeReg(AArch64::X15); // ignore X15 since we restore it -#endif - BuildMI(MBB, MBBI, DL, TII->get(AArch64::ORRXrr), X15Scratch) - .addReg(AArch64::XZR) - .addReg(AArch64::X15, RegState::Undef) - .addReg(AArch64::X15, RegState::Implicit) - .setMIFlag(MachineInstr::FrameSetup); - } - - uint64_t NumWords = (NumBytes + RealignmentPadding) >> 4; - if (NeedsWinCFI) { - HasWinCFI = true; - // alloc_l can hold at most 256MB, so assume that NumBytes doesn't - // exceed this amount. We need to move at most 2^24 - 1 into x15. - // This is at most two instructions, MOVZ followed by MOVK. - // TODO: Fix to use multiple stack alloc unwind codes for stacks - // exceeding 256MB in size. - if (NumBytes >= (1 << 28)) - report_fatal_error("Stack size cannot exceed 256MB for stack " - "unwinding purposes"); - - uint32_t LowNumWords = NumWords & 0xFFFF; - BuildMI(MBB, MBBI, DL, TII->get(AArch64::MOVZXi), AArch64::X15) - .addImm(LowNumWords) - .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0)) - .setMIFlag(MachineInstr::FrameSetup); - BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop)) - .setMIFlag(MachineInstr::FrameSetup); - if ((NumWords & 0xFFFF0000) != 0) { - BuildMI(MBB, MBBI, DL, TII->get(AArch64::MOVKXi), AArch64::X15) - .addReg(AArch64::X15) - .addImm((NumWords & 0xFFFF0000) >> 16) // High half - .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 16)) - .setMIFlag(MachineInstr::FrameSetup); - BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop)) - .setMIFlag(MachineInstr::FrameSetup); - } - } else { - BuildMI(MBB, MBBI, DL, TII->get(AArch64::MOVi64imm), AArch64::X15) - .addImm(NumWords) - .setMIFlags(MachineInstr::FrameSetup); - } - - const char *ChkStk = Subtarget.getChkStkName(); - switch (MF.getTarget().getCodeModel()) { - case CodeModel::Tiny: - case CodeModel::Small: - case CodeModel::Medium: - case CodeModel::Kernel: - BuildMI(MBB, MBBI, DL, TII->get(AArch64::BL)) - .addExternalSymbol(ChkStk) - .addReg(AArch64::X15, RegState::Implicit) - .addReg(AArch64::X16, RegState::Implicit | RegState::Define | RegState::Dead) - .addReg(AArch64::X17, RegState::Implicit | RegState::Define | RegState::Dead) - .addReg(AArch64::NZCV, RegState::Implicit | RegState::Define | RegState::Dead) - .setMIFlags(MachineInstr::FrameSetup); - if (NeedsWinCFI) { - HasWinCFI = true; - BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop)) - .setMIFlag(MachineInstr::FrameSetup); - } - break; - case CodeModel::Large: - BuildMI(MBB, MBBI, DL, TII->get(AArch64::MOVaddrEXT)) - .addReg(AArch64::X16, RegState::Define) - .addExternalSymbol(ChkStk) - .addExternalSymbol(ChkStk) - .setMIFlags(MachineInstr::FrameSetup); - if (NeedsWinCFI) { - HasWinCFI = true; - BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop)) - .setMIFlag(MachineInstr::FrameSetup); - } - - BuildMI(MBB, MBBI, DL, TII->get(getBLRCallOpcode(MF))) - .addReg(AArch64::X16, RegState::Kill) - .addReg(AArch64::X15, RegState::Implicit | RegState::Define) - .addReg(AArch64::X16, RegState::Implicit | RegState::Define | RegState::Dead) - .addReg(AArch64::X17, RegState::Implicit | RegState::Define | RegState::Dead) - .addReg(AArch64::NZCV, RegState::Implicit | RegState::Define | RegState::Dead) - .setMIFlags(MachineInstr::FrameSetup); - if (NeedsWinCFI) { - HasWinCFI = true; - BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop)) - .setMIFlag(MachineInstr::FrameSetup); - } - break; - } - - BuildMI(MBB, MBBI, DL, TII->get(AArch64::SUBXrx64), AArch64::SP) - .addReg(AArch64::SP, RegState::Kill) - .addReg(AArch64::X15, RegState::Kill) - .addImm(AArch64_AM::getArithExtendImm(AArch64_AM::UXTX, 4)) - .setMIFlags(MachineInstr::FrameSetup); - if (NeedsWinCFI) { - HasWinCFI = true; - BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_StackAlloc)) - .addImm(NumBytes) - .setMIFlag(MachineInstr::FrameSetup); - } - NumBytes = 0; - - if (RealignmentPadding > 0) { - if (RealignmentPadding >= 4096) { - BuildMI(MBB, MBBI, DL, TII->get(AArch64::MOVi64imm)) - .addReg(AArch64::X16, RegState::Define) - .addImm(RealignmentPadding) - .setMIFlags(MachineInstr::FrameSetup); - BuildMI(MBB, MBBI, DL, TII->get(AArch64::ADDXrx64), AArch64::X15) - .addReg(AArch64::SP) - .addReg(AArch64::X16, RegState::Kill) - .addImm(AArch64_AM::getArithExtendImm(AArch64_AM::UXTX, 0)) - .setMIFlag(MachineInstr::FrameSetup); - } else { - BuildMI(MBB, MBBI, DL, TII->get(AArch64::ADDXri), AArch64::X15) - .addReg(AArch64::SP) - .addImm(RealignmentPadding) - .addImm(0) - .setMIFlag(MachineInstr::FrameSetup); - } - - uint64_t AndMask = ~(MFI.getMaxAlign().value() - 1); - BuildMI(MBB, MBBI, DL, TII->get(AArch64::ANDXri), AArch64::SP) - .addReg(AArch64::X15, RegState::Kill) - .addImm(AArch64_AM::encodeLogicalImmediate(AndMask, 64)); - AFI->setStackRealigned(true); - - // No need for SEH instructions here; if we're realigning the stack, - // we've set a frame pointer and already finished the SEH prologue. - assert(!NeedsWinCFI); - } - if (X15Scratch != AArch64::NoRegister) { - BuildMI(MBB, MBBI, DL, TII->get(AArch64::ORRXrr), AArch64::X15) - .addReg(AArch64::XZR) - .addReg(X15Scratch, RegState::Undef) - .addReg(X15Scratch, RegState::Implicit) - .setMIFlag(MachineInstr::FrameSetup); - } - } - - StackOffset SVECalleeSavesSize = {}, SVELocalsSize = SVEStackSize; - MachineBasicBlock::iterator CalleeSavesEnd = MBBI; - - StackOffset CFAOffset = - StackOffset::getFixed((int64_t)MFI.getStackSize() - NumBytes); - - // Process the SVE callee-saves to determine what space needs to be - // allocated. - if (int64_t CalleeSavedSize = AFI->getSVECalleeSavedStackSize()) { - LLVM_DEBUG(dbgs() << "SVECalleeSavedStackSize = " << CalleeSavedSize - << "\n"); - SVECalleeSavesSize = StackOffset::getScalable(CalleeSavedSize); - SVELocalsSize = SVEStackSize - SVECalleeSavesSize; - // Find callee save instructions in frame. - // Note: With FPAfterSVECalleeSaves the callee saves have already been - // allocated. - if (!FPAfterSVECalleeSaves) { - MachineBasicBlock::iterator CalleeSavesBegin = MBBI; - assert(IsSVECalleeSave(CalleeSavesBegin) && "Unexpected instruction"); - while (IsSVECalleeSave(MBBI) && MBBI != MBB.getFirstTerminator()) - ++MBBI; - CalleeSavesEnd = MBBI; - - StackOffset LocalsSize = SVELocalsSize + StackOffset::getFixed(NumBytes); - // Allocate space for the callee saves (if any). - allocateStackSpace(MBB, CalleeSavesBegin, 0, SVECalleeSavesSize, false, - nullptr, EmitAsyncCFI && !HasFP, CFAOffset, - MFI.hasVarSizedObjects() || LocalsSize); - } - } - CFAOffset += SVECalleeSavesSize; - - if (EmitAsyncCFI) - emitCalleeSavedSVELocations(MBB, CalleeSavesEnd); - - // Allocate space for the rest of the frame including SVE locals. Align the - // stack as necessary. - assert(!(canUseRedZone(MF) && NeedsRealignment) && - "Cannot use redzone with stack realignment"); - if (!canUseRedZone(MF)) { - // FIXME: in the case of dynamic re-alignment, NumBytes doesn't have - // the correct value here, as NumBytes also includes padding bytes, - // which shouldn't be counted here. - allocateStackSpace(MBB, CalleeSavesEnd, RealignmentPadding, - SVELocalsSize + StackOffset::getFixed(NumBytes), - NeedsWinCFI, &HasWinCFI, EmitAsyncCFI && !HasFP, - CFAOffset, MFI.hasVarSizedObjects()); - } - - // If we need a base pointer, set it up here. It's whatever the value of the - // stack pointer is at this point. Any variable size objects will be allocated - // after this, so we can still use the base pointer to reference locals. - // - // FIXME: Clarify FrameSetup flags here. - // Note: Use emitFrameOffset() like above for FP if the FrameSetup flag is - // needed. - // For funclets the BP belongs to the containing function. - if (!IsFunclet && RegInfo->hasBasePointer(MF)) { - TII->copyPhysReg(MBB, MBBI, DL, RegInfo->getBaseRegister(), AArch64::SP, - false); - if (NeedsWinCFI) { - HasWinCFI = true; - BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop)) - .setMIFlag(MachineInstr::FrameSetup); - } - } - - // The very last FrameSetup instruction indicates the end of prologue. Emit a - // SEH opcode indicating the prologue end. - if (NeedsWinCFI && HasWinCFI) { - BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_PrologEnd)) - .setMIFlag(MachineInstr::FrameSetup); - } - - // SEH funclets are passed the frame pointer in X1. If the parent - // function uses the base register, then the base register is used - // directly, and is not retrieved from X1. - if (IsFunclet && F.hasPersonalityFn()) { - EHPersonality Per = classifyEHPersonality(F.getPersonalityFn()); - if (isAsynchronousEHPersonality(Per)) { - BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::COPY), AArch64::FP) - .addReg(AArch64::X1) - .setMIFlag(MachineInstr::FrameSetup); - MBB.addLiveIn(AArch64::X1); - } - } - - if (EmitCFI && !EmitAsyncCFI) { - if (HasFP) { - emitDefineCFAWithFP(MF, MBB, MBBI, FixedObject); - } else { - StackOffset TotalSize = - SVEStackSize + StackOffset::getFixed((int64_t)MFI.getStackSize()); - CFIInstBuilder CFIBuilder(MBB, MBBI, MachineInstr::FrameSetup); - CFIBuilder.insertCFIInst( - createDefCFA(*RegInfo, /*FrameReg=*/AArch64::SP, /*Reg=*/AArch64::SP, - TotalSize, /*LastAdjustmentWasScalable=*/false)); - } - emitCalleeSavedGPRLocations(MBB, MBBI); - emitCalleeSavedSVELocations(MBB, MBBI); - } + AArch64PrologueEmitter PrologueEmitter(MF, MBB, *this); + PrologueEmitter.emitPrologue(); } static bool isFuncletReturnInstr(const MachineInstr &MI) { @@ -2548,15 +1791,15 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF, AFI->setLocalStackSize(NumBytes - PrologueSaveSize); if (homogeneousPrologEpilog(MF, &MBB)) { assert(!NeedsWinCFI); - auto LastPopI = MBB.getFirstTerminator(); - if (LastPopI != MBB.begin()) { - auto HomogeneousEpilog = std::prev(LastPopI); + auto FirstHomogenousEpilogI = MBB.getFirstTerminator(); + if (FirstHomogenousEpilogI != MBB.begin()) { + auto HomogeneousEpilog = std::prev(FirstHomogenousEpilogI); if (HomogeneousEpilog->getOpcode() == AArch64::HOM_Epilog) - LastPopI = HomogeneousEpilog; + FirstHomogenousEpilogI = HomogeneousEpilog; } // Adjust local stack - emitFrameOffset(MBB, LastPopI, DL, AArch64::SP, AArch64::SP, + emitFrameOffset(MBB, FirstHomogenousEpilogI, DL, AArch64::SP, AArch64::SP, StackOffset::getFixed(AFI->getLocalStackSize()), TII, MachineInstr::FrameDestroy, false, NeedsWinCFI, &HasWinCFI); @@ -2602,17 +1845,17 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF, // Move past the restores of the callee-saved registers. // If we plan on combining the sp bump of the local stack size and the callee // save stack size, we might need to adjust the CSR save and restore offsets. - MachineBasicBlock::iterator LastPopI = MBB.getFirstTerminator(); + MachineBasicBlock::iterator FirstGPRRestoreI = MBB.getFirstTerminator(); MachineBasicBlock::iterator Begin = MBB.begin(); - while (LastPopI != Begin) { - --LastPopI; - if (!LastPopI->getFlag(MachineInstr::FrameDestroy) || - (!FPAfterSVECalleeSaves && IsSVECalleeSave(LastPopI))) { - ++LastPopI; + while (FirstGPRRestoreI != Begin) { + --FirstGPRRestoreI; + if (!FirstGPRRestoreI->getFlag(MachineInstr::FrameDestroy) || + (!FPAfterSVECalleeSaves && isSVECalleeSave(FirstGPRRestoreI))) { + ++FirstGPRRestoreI; break; } else if (CombineSPBump) - fixupCalleeSaveRestoreStackOffset(*LastPopI, AFI->getLocalStackSize(), - NeedsWinCFI, &HasWinCFI); + fixupCalleeSaveRestoreStackOffset( + *FirstGPRRestoreI, AFI->getLocalStackSize(), NeedsWinCFI, &HasWinCFI); } if (NeedsWinCFI) { @@ -2622,9 +1865,9 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF, // arguments. Insert the SEH_EpilogStart and remove it later if it // we didn't emit any SEH opcodes to avoid generating WinCFI for // functions that don't need it. - BuildMI(MBB, LastPopI, DL, TII->get(AArch64::SEH_EpilogStart)) + BuildMI(MBB, FirstGPRRestoreI, DL, TII->get(AArch64::SEH_EpilogStart)) .setMIFlag(MachineInstr::FrameDestroy); - EpilogStartI = LastPopI; + EpilogStartI = FirstGPRRestoreI; --EpilogStartI; } @@ -2665,7 +1908,7 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF, // When we are about to restore the CSRs, the CFA register is SP again. if (EmitCFI && hasFP(MF)) - CFIInstBuilder(MBB, LastPopI, MachineInstr::FrameDestroy) + CFIInstBuilder(MBB, FirstGPRRestoreI, MachineInstr::FrameDestroy) .buildDefCFA(AArch64::SP, NumBytes); emitFrameOffset(MBB, MBB.getFirstTerminator(), DL, AArch64::SP, AArch64::SP, @@ -2681,18 +1924,19 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF, // Process the SVE callee-saves to determine what space needs to be // deallocated. StackOffset DeallocateBefore = {}, DeallocateAfter = SVEStackSize; - MachineBasicBlock::iterator RestoreBegin = LastPopI, RestoreEnd = LastPopI; + MachineBasicBlock::iterator RestoreBegin = FirstGPRRestoreI, + RestoreEnd = FirstGPRRestoreI; if (int64_t CalleeSavedSize = AFI->getSVECalleeSavedStackSize()) { if (FPAfterSVECalleeSaves) RestoreEnd = MBB.getFirstTerminator(); RestoreBegin = std::prev(RestoreEnd); while (RestoreBegin != MBB.begin() && - IsSVECalleeSave(std::prev(RestoreBegin))) + isSVECalleeSave(std::prev(RestoreBegin))) --RestoreBegin; - assert(IsSVECalleeSave(RestoreBegin) && - IsSVECalleeSave(std::prev(RestoreEnd)) && "Unexpected instruction"); + assert(isSVECalleeSave(RestoreBegin) && + isSVECalleeSave(std::prev(RestoreEnd)) && "Unexpected instruction"); StackOffset CalleeSavedSizeAsOffset = StackOffset::getScalable(CalleeSavedSize); @@ -2706,7 +1950,7 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF, // deallocates non-callee-save SVE allocations. Otherwise, deallocate // them explicitly. if (!AFI->isStackRealigned() && !MFI.hasVarSizedObjects()) { - emitFrameOffset(MBB, LastPopI, DL, AArch64::SP, AArch64::SP, + emitFrameOffset(MBB, FirstGPRRestoreI, DL, AArch64::SP, AArch64::SP, DeallocateBefore, TII, MachineInstr::FrameDestroy, false, NeedsWinCFI, &HasWinCFI); } @@ -2796,7 +2040,7 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF, StackRestoreBytes += AfterCSRPopSize; emitFrameOffset( - MBB, LastPopI, DL, AArch64::SP, AArch64::SP, + MBB, FirstGPRRestoreI, DL, AArch64::SP, AArch64::SP, StackOffset::getFixed(StackRestoreBytes), TII, MachineInstr::FrameDestroy, false, NeedsWinCFI, &HasWinCFI, EmitCFI, StackOffset::getFixed((RedZone ? 0 : NumBytes) + PrologueSaveSize)); @@ -2816,17 +2060,17 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF, // be able to save any instructions. if (!IsFunclet && (MFI.hasVarSizedObjects() || AFI->isStackRealigned())) { emitFrameOffset( - MBB, LastPopI, DL, AArch64::SP, AArch64::FP, + MBB, FirstGPRRestoreI, DL, AArch64::SP, AArch64::FP, StackOffset::getFixed(-AFI->getCalleeSaveBaseToFrameRecordOffset()), TII, MachineInstr::FrameDestroy, false, NeedsWinCFI, &HasWinCFI); } else if (NumBytes) - emitFrameOffset(MBB, LastPopI, DL, AArch64::SP, AArch64::SP, + emitFrameOffset(MBB, FirstGPRRestoreI, DL, AArch64::SP, AArch64::SP, StackOffset::getFixed(NumBytes), TII, MachineInstr::FrameDestroy, false, NeedsWinCFI, &HasWinCFI); // When we are about to restore the CSRs, the CFA register is SP again. if (EmitCFI && hasFP(MF)) - CFIInstBuilder(MBB, LastPopI, MachineInstr::FrameDestroy) + CFIInstBuilder(MBB, FirstGPRRestoreI, MachineInstr::FrameDestroy) .buildDefCFA(AArch64::SP, PrologueSaveSize); // This must be placed after the callee-save restore code because that code @@ -2926,8 +2170,8 @@ AArch64FrameLowering::getNonLocalFrameIndexReference(const MachineFunction &MF, return StackOffset::getFixed(getSEHFrameIndexOffset(MF, FI)); } -static StackOffset getFPOffset(const MachineFunction &MF, - int64_t ObjectOffset) { +StackOffset AArch64FrameLowering::getFPOffset(const MachineFunction &MF, + int64_t ObjectOffset) const { const auto *AFI = MF.getInfo<AArch64FunctionInfo>(); const auto &Subtarget = MF.getSubtarget<AArch64Subtarget>(); const Function &F = MF.getFunction(); @@ -2940,8 +2184,8 @@ static StackOffset getFPOffset(const MachineFunction &MF, return StackOffset::getFixed(ObjectOffset + FixedObject + FPAdjust); } -static StackOffset getStackOffset(const MachineFunction &MF, - int64_t ObjectOffset) { +StackOffset AArch64FrameLowering::getStackOffset(const MachineFunction &MF, + int64_t ObjectOffset) const { const auto &MFI = MF.getFrameInfo(); return StackOffset::getFixed(ObjectOffset + (int64_t)MFI.getStackSize()); } @@ -3139,7 +2383,8 @@ static unsigned getPrologueDeath(MachineFunction &MF, unsigned Reg) { return getKillRegState(!IsLiveIn); } -static bool produceCompactUnwindFrame(MachineFunction &MF) { +static bool produceCompactUnwindFrame(const AArch64FrameLowering &AFL, + MachineFunction &MF) { const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>(); AttributeList Attrs = MF.getFunction().getAttributes(); AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>(); @@ -3147,7 +2392,7 @@ static bool produceCompactUnwindFrame(MachineFunction &MF) { !(Subtarget.getTargetLowering()->supportSwiftError() && Attrs.hasAttrSomewhere(Attribute::SwiftError)) && MF.getFunction().getCallingConv() != CallingConv::SwiftTail && - !requiresSaveVG(MF) && !AFI->isSVECC(); + !AFL.requiresSaveVG(MF) && !AFI->isSVECC(); } static bool invalidateWindowsRegisterPairing(unsigned Reg1, unsigned Reg2, @@ -3244,16 +2489,18 @@ bool enableMultiVectorSpillFill(const AArch64Subtarget &Subtarget, (!IsLocallyStreaming && Subtarget.isStreaming())); } -static void computeCalleeSaveRegisterPairs( - MachineFunction &MF, ArrayRef<CalleeSavedInfo> CSI, - const TargetRegisterInfo *TRI, SmallVectorImpl<RegPairInfo> &RegPairs, - bool NeedsFrameRecord) { +void computeCalleeSaveRegisterPairs(const AArch64FrameLowering &AFL, + MachineFunction &MF, + ArrayRef<CalleeSavedInfo> CSI, + const TargetRegisterInfo *TRI, + SmallVectorImpl<RegPairInfo> &RegPairs, + bool NeedsFrameRecord) { if (CSI.empty()) return; bool IsWindows = isTargetWindows(MF); - bool NeedsWinCFI = needsWinCFI(MF); + bool NeedsWinCFI = AFL.needsWinCFI(MF); AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>(); unsigned StackHazardSize = getStackHazardSize(MF); MachineFrameInfo &MFI = MF.getFrameInfo(); @@ -3262,9 +2509,10 @@ static void computeCalleeSaveRegisterPairs( (void)CC; // MachO's compact unwind format relies on all registers being stored in // pairs. - assert((!produceCompactUnwindFrame(MF) || CC == CallingConv::PreserveMost || - CC == CallingConv::PreserveAll || CC == CallingConv::CXX_FAST_TLS || - CC == CallingConv::Win64 || (Count & 1) == 0) && + assert((!produceCompactUnwindFrame(AFL, MF) || + CC == CallingConv::PreserveMost || CC == CallingConv::PreserveAll || + CC == CallingConv::CXX_FAST_TLS || CC == CallingConv::Win64 || + (Count & 1) == 0) && "Odd number of callee-saved regs to spill!"); int ByteOffset = AFI->getCalleeSavedStackSize(); int StackFillDir = -1; @@ -3380,9 +2628,9 @@ static void computeCalleeSaveRegisterPairs( // MachO's compact unwind format relies on all registers being stored in // adjacent register pairs. - assert((!produceCompactUnwindFrame(MF) || CC == CallingConv::PreserveMost || - CC == CallingConv::PreserveAll || CC == CallingConv::CXX_FAST_TLS || - CC == CallingConv::Win64 || + assert((!produceCompactUnwindFrame(AFL, MF) || + CC == CallingConv::PreserveMost || CC == CallingConv::PreserveAll || + CC == CallingConv::CXX_FAST_TLS || CC == CallingConv::Win64 || (RPI.isPaired() && ((RPI.Reg1 == AArch64::LR && RPI.Reg2 == AArch64::FP) || RPI.Reg1 + 1 == RPI.Reg2))) && @@ -3495,7 +2743,7 @@ bool AArch64FrameLowering::spillCalleeSavedRegisters( DebugLoc DL; SmallVector<RegPairInfo, 8> RegPairs; - computeCalleeSaveRegisterPairs(MF, CSI, TRI, RegPairs, hasFP(MF)); + computeCalleeSaveRegisterPairs(*this, MF, CSI, TRI, RegPairs, hasFP(MF)); MachineRegisterInfo &MRI = MF.getRegInfo(); // Refresh the reserved regs in case there are any potential changes since the @@ -3707,7 +2955,7 @@ bool AArch64FrameLowering::restoreCalleeSavedRegisters( if (MBBI != MBB.end()) DL = MBBI->getDebugLoc(); - computeCalleeSaveRegisterPairs(MF, CSI, TRI, RegPairs, hasFP(MF)); + computeCalleeSaveRegisterPairs(*this, MF, CSI, TRI, RegPairs, hasFP(MF)); if (homogeneousPrologEpilog(MF, &MBB)) { auto MIB = BuildMI(MBB, MBBI, DL, TII.get(AArch64::HOM_Epilog)) .setMIFlag(MachineInstr::FrameDestroy); @@ -4141,7 +3389,7 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF, if (producePairRegisters(MF)) { if (UnspilledCSGPRPaired == AArch64::NoRegister) { // Failed to make a pair for compact unwind format, revert spilling. - if (produceCompactUnwindFrame(MF)) { + if (produceCompactUnwindFrame(*this, MF)) { SavedRegs.reset(UnspilledCSGPR); ExtraCSSpill = AArch64::NoRegister; } |
