summaryrefslogtreecommitdiff
path: root/llvm/lib/Target/AArch64/AArch64PrologueEpilogue.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Target/AArch64/AArch64PrologueEpilogue.cpp')
-rw-r--r--llvm/lib/Target/AArch64/AArch64PrologueEpilogue.cpp794
1 files changed, 794 insertions, 0 deletions
diff --git a/llvm/lib/Target/AArch64/AArch64PrologueEpilogue.cpp b/llvm/lib/Target/AArch64/AArch64PrologueEpilogue.cpp
new file mode 100644
index 000000000000..af424987b8dd
--- /dev/null
+++ b/llvm/lib/Target/AArch64/AArch64PrologueEpilogue.cpp
@@ -0,0 +1,794 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "AArch64PrologueEpilogue.h"
+#include "AArch64FrameLowering.h"
+#include "AArch64MachineFunctionInfo.h"
+#include "AArch64Subtarget.h"
+#include "MCTargetDesc/AArch64AddressingModes.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/BinaryFormat/Dwarf.h"
+#include "llvm/CodeGen/CFIInstBuilder.h"
+#include "llvm/MC/MCContext.h"
+
+#define DEBUG_TYPE "frame-info"
+
+STATISTIC(NumRedZoneFunctions, "Number of functions using red zone");
+
+namespace llvm {
+
+AArch64PrologueEmitter::AArch64PrologueEmitter(MachineFunction &MF,
+ MachineBasicBlock &MBB,
+ const AArch64FrameLowering &AFL)
+ : MF(MF), MBB(MBB), F(MF.getFunction()), MFI(MF.getFrameInfo()),
+ Subtarget(MF.getSubtarget<AArch64Subtarget>()), AFL(AFL),
+ RegInfo(*Subtarget.getRegisterInfo()) {
+ TII = Subtarget.getInstrInfo();
+ AFI = MF.getInfo<AArch64FunctionInfo>();
+
+ EmitCFI = AFI->needsDwarfUnwindInfo(MF);
+ EmitAsyncCFI = AFI->needsAsyncDwarfUnwindInfo(MF);
+ HasFP = AFL.hasFP(MF);
+ NeedsWinCFI = AFL.needsWinCFI(MF);
+ IsFunclet = MBB.isEHFuncletEntry();
+ HomPrologEpilog = AFL.homogeneousPrologEpilog(MF);
+
+#ifndef NDEBUG
+ collectBlockLiveins();
+#endif
+}
+
+#ifndef NDEBUG
+/// Collect live registers from the end of \p MI's parent up to (including) \p
+/// MI in \p LiveRegs.
+static void getLivePhysRegsUpTo(MachineInstr &MI, const TargetRegisterInfo &TRI,
+ LivePhysRegs &LiveRegs) {
+
+ MachineBasicBlock &MBB = *MI.getParent();
+ LiveRegs.addLiveOuts(MBB);
+ for (const MachineInstr &MI :
+ reverse(make_range(MI.getIterator(), MBB.instr_end())))
+ LiveRegs.stepBackward(MI);
+}
+
+void AArch64PrologueEmitter::collectBlockLiveins() {
+ // Collect live register from the end of MBB up to the start of the existing
+ // frame setup instructions.
+ PrologueEndI = MBB.begin();
+ while (PrologueEndI != MBB.end() &&
+ PrologueEndI->getFlag(MachineInstr::FrameSetup))
+ ++PrologueEndI;
+
+ if (PrologueEndI != MBB.end()) {
+ getLivePhysRegsUpTo(*PrologueEndI, RegInfo, LiveRegs);
+ // Ignore registers used for stack management for now.
+ LiveRegs.removeReg(AArch64::SP);
+ LiveRegs.removeReg(AArch64::X19);
+ LiveRegs.removeReg(AArch64::FP);
+ LiveRegs.removeReg(AArch64::LR);
+
+ // X0 will be clobbered by a call to __arm_get_current_vg in the prologue.
+ // This is necessary to spill VG if required where SVE is unavailable, but
+ // X0 is preserved around this call.
+ if (AFL.requiresGetVGCall(MF))
+ LiveRegs.removeReg(AArch64::X0);
+ }
+}
+
+void AArch64PrologueEmitter::verifyPrologueClobbers() const {
+ if (PrologueEndI == MBB.end())
+ return;
+ // Check if any of the newly instructions clobber any of the live registers.
+ for (MachineInstr &MI :
+ make_range(MBB.instr_begin(), PrologueEndI->getIterator())) {
+ for (auto &Op : MI.operands())
+ if (Op.isReg() && Op.isDef())
+ assert(!LiveRegs.contains(Op.getReg()) &&
+ "live register clobbered by inserted prologue instructions");
+ }
+}
+#endif
+
+void AArch64PrologueEmitter::determineLocalsStackSize(
+ uint64_t StackSize, uint64_t PrologueSaveSize) {
+ AFI->setLocalStackSize(StackSize - PrologueSaveSize);
+ CombineSPBump = AFL.shouldCombineCSRLocalStackBump(MF, StackSize);
+}
+
+void AArch64PrologueEmitter::emitPrologue() {
+ const MachineBasicBlock::iterator PrologueBeginI = MBB.begin();
+ const MachineBasicBlock::iterator EndI = MBB.end();
+
+ // At this point, we're going to decide whether or not the function uses a
+ // redzone. In most cases, the function doesn't have a redzone so let's
+ // assume that's false and set it to true in the case that there's a redzone.
+ AFI->setHasRedZone(false);
+
+ // Debug location must be unknown since the first debug location is used
+ // to determine the end of the prologue.
+ DebugLoc DL;
+
+ if (AFI->shouldSignReturnAddress(MF)) {
+ // If pac-ret+leaf is in effect, PAUTH_PROLOGUE pseudo instructions
+ // are inserted by emitPacRetPlusLeafHardening().
+ if (!AFL.shouldSignReturnAddressEverywhere(MF)) {
+ BuildMI(MBB, PrologueBeginI, DL, TII->get(AArch64::PAUTH_PROLOGUE))
+ .setMIFlag(MachineInstr::FrameSetup);
+ }
+ // AArch64PointerAuth pass will insert SEH_PACSignLR
+ HasWinCFI |= NeedsWinCFI;
+ }
+
+ if (AFI->needsShadowCallStackPrologueEpilogue(MF)) {
+ emitShadowCallStackPrologue(PrologueBeginI, DL);
+ HasWinCFI |= NeedsWinCFI;
+ }
+
+ if (EmitCFI && AFI->isMTETagged())
+ BuildMI(MBB, PrologueBeginI, DL, TII->get(AArch64::EMITMTETAGGED))
+ .setMIFlag(MachineInstr::FrameSetup);
+
+ // We signal the presence of a Swift extended frame to external tools by
+ // storing FP with 0b0001 in bits 63:60. In normal userland operation a simple
+ // ORR is sufficient, it is assumed a Swift kernel would initialize the TBI
+ // bits so that is still true.
+ if (HasFP && AFI->hasSwiftAsyncContext())
+ emitSwiftAsyncContextFramePointer(PrologueBeginI, DL);
+
+ // All calls are tail calls in GHC calling conv, and functions have no
+ // prologue/epilogue.
+ if (MF.getFunction().getCallingConv() == CallingConv::GHC)
+ return;
+
+ // Set tagged base pointer to the requested stack slot. Ideally it should
+ // match SP value after prologue.
+ if (std::optional<int> TBPI = AFI->getTaggedBasePointerIndex())
+ AFI->setTaggedBasePointerOffset(-MFI.getObjectOffset(*TBPI));
+ else
+ AFI->setTaggedBasePointerOffset(MFI.getStackSize());
+
+ // getStackSize() includes all the locals in its size calculation. We don't
+ // include these locals when computing the stack size of a funclet, as they
+ // are allocated in the parent's stack frame and accessed via the frame
+ // pointer from the funclet. We only save the callee saved registers in the
+ // funclet, which are really the callee saved registers of the parent
+ // function, including the funclet.
+ int64_t NumBytes =
+ IsFunclet ? AFL.getWinEHFuncletFrameSize(MF) : MFI.getStackSize();
+ if (!AFI->hasStackFrame() && !AFL.windowsRequiresStackProbe(MF, NumBytes))
+ return emitEmptyStackFramePrologue(NumBytes, PrologueBeginI, DL);
+
+ bool IsWin64 = Subtarget.isCallingConvWin64(F.getCallingConv(), F.isVarArg());
+ unsigned FixedObject = AFL.getFixedObjectSize(MF, AFI, IsWin64, IsFunclet);
+
+ // Windows unwind can't represent the required stack adjustments if we have
+ // both SVE callee-saves and dynamic stack allocations, and the frame
+ // pointer is before the SVE spills. The allocation of the frame pointer
+ // must be the last instruction in the prologue so the unwinder can restore
+ // the stack pointer correctly. (And there isn't any unwind opcode for
+ // `addvl sp, x29, -17`.)
+ //
+ // Because of this, we do spills in the opposite order on Windows: first SVE,
+ // then GPRs. The main side-effect of this is that it makes accessing
+ // parameters passed on the stack more expensive.
+ //
+ // We could consider rearranging the spills for simpler cases.
+ bool FPAfterSVECalleeSaves =
+ Subtarget.isTargetWindows() && AFI->getSVECalleeSavedStackSize();
+
+ if (FPAfterSVECalleeSaves && AFI->hasStackHazardSlotIndex())
+ reportFatalUsageError("SME hazard padding is not supported on Windows");
+
+ auto PrologueSaveSize = AFI->getCalleeSavedStackSize() + FixedObject;
+ // All of the remaining stack allocations are for locals.
+ determineLocalsStackSize(NumBytes, PrologueSaveSize);
+
+ MachineBasicBlock::iterator FirstGPRSaveI = PrologueBeginI;
+ if (FPAfterSVECalleeSaves) {
+ // If we're doing SVE saves first, we need to immediately allocate space
+ // for fixed objects, then space for the SVE callee saves.
+ //
+ // Windows unwind requires that the scalable size is a multiple of 16;
+ // that's handled when the callee-saved size is computed.
+ auto SaveSize =
+ StackOffset::getScalable(AFI->getSVECalleeSavedStackSize()) +
+ StackOffset::getFixed(FixedObject);
+ AFL.allocateStackSpace(MBB, PrologueBeginI, 0, SaveSize, NeedsWinCFI,
+ &HasWinCFI,
+ /*EmitCFI=*/false, StackOffset{},
+ /*FollowupAllocs=*/true);
+ NumBytes -= FixedObject;
+
+ // Now allocate space for the GPR callee saves.
+ MachineBasicBlock::iterator MBBI = PrologueBeginI;
+ while (MBBI != EndI && AFL.isSVECalleeSave(MBBI))
+ ++MBBI;
+ FirstGPRSaveI = AFL.convertCalleeSaveRestoreToSPPrePostIncDec(
+ MBB, MBBI, DL, TII, -AFI->getCalleeSavedStackSize(), NeedsWinCFI,
+ &HasWinCFI, EmitAsyncCFI);
+ NumBytes -= AFI->getCalleeSavedStackSize();
+ } else if (CombineSPBump) {
+ assert(!AFL.getSVEStackSize(MF) && "Cannot combine SP bump with SVE");
+ emitFrameOffset(MBB, PrologueBeginI, DL, AArch64::SP, AArch64::SP,
+ StackOffset::getFixed(-NumBytes), TII,
+ MachineInstr::FrameSetup, false, NeedsWinCFI, &HasWinCFI,
+ EmitAsyncCFI);
+ NumBytes = 0;
+ } else if (HomPrologEpilog) {
+ // Stack has been already adjusted.
+ NumBytes -= PrologueSaveSize;
+ } else if (PrologueSaveSize != 0) {
+ FirstGPRSaveI = AFL.convertCalleeSaveRestoreToSPPrePostIncDec(
+ MBB, PrologueBeginI, DL, TII, -PrologueSaveSize, NeedsWinCFI,
+ &HasWinCFI, EmitAsyncCFI);
+ NumBytes -= PrologueSaveSize;
+ }
+ assert(NumBytes >= 0 && "Negative stack allocation size!?");
+
+ // Move past the saves of the callee-saved registers, fixing up the offsets
+ // and pre-inc if we decided to combine the callee-save and local stack
+ // pointer bump above.
+ auto &TLI = *MF.getSubtarget().getTargetLowering();
+
+ MachineBasicBlock::iterator AfterGPRSavesI = FirstGPRSaveI;
+ while (AfterGPRSavesI != EndI &&
+ AfterGPRSavesI->getFlag(MachineInstr::FrameSetup) &&
+ !AFL.isSVECalleeSave(AfterGPRSavesI)) {
+ if (CombineSPBump &&
+ // Only fix-up frame-setup load/store instructions.
+ (!AFL.requiresSaveVG(MF) || !AFL.isVGInstruction(AfterGPRSavesI, TLI)))
+ AFL.fixupCalleeSaveRestoreStackOffset(
+ *AfterGPRSavesI, AFI->getLocalStackSize(), NeedsWinCFI, &HasWinCFI);
+ ++AfterGPRSavesI;
+ }
+
+ // For funclets the FP belongs to the containing function. Only set up FP if
+ // we actually need to.
+ if (!IsFunclet && HasFP)
+ emitFramePointerSetup(AfterGPRSavesI, DL, FixedObject);
+
+ // Now emit the moves for whatever callee saved regs we have (including FP,
+ // LR if those are saved). Frame instructions for SVE register are emitted
+ // later, after the instruction which actually save SVE regs.
+ if (EmitAsyncCFI)
+ emitCalleeSavedGPRLocations(AfterGPRSavesI);
+
+ // Alignment is required for the parent frame, not the funclet
+ const bool NeedsRealignment =
+ NumBytes && !IsFunclet && RegInfo.hasStackRealignment(MF);
+ const int64_t RealignmentPadding =
+ (NeedsRealignment && MFI.getMaxAlign() > Align(16))
+ ? MFI.getMaxAlign().value() - 16
+ : 0;
+
+ if (AFL.windowsRequiresStackProbe(MF, NumBytes + RealignmentPadding))
+ emitWindowsStackProbe(AfterGPRSavesI, DL, NumBytes, RealignmentPadding);
+
+ StackOffset SVEStackSize = AFL.getSVEStackSize(MF);
+ StackOffset SVECalleeSavesSize = {}, SVELocalsSize = SVEStackSize;
+ MachineBasicBlock::iterator CalleeSavesEnd = AfterGPRSavesI;
+
+ StackOffset CFAOffset =
+ StackOffset::getFixed((int64_t)MFI.getStackSize() - NumBytes);
+
+ // Process the SVE callee-saves to determine what space needs to be
+ // allocated.
+ MachineBasicBlock::iterator AfterSVESavesI = AfterGPRSavesI;
+ if (int64_t CalleeSavedSize = AFI->getSVECalleeSavedStackSize()) {
+ LLVM_DEBUG(dbgs() << "SVECalleeSavedStackSize = " << CalleeSavedSize
+ << "\n");
+ SVECalleeSavesSize = StackOffset::getScalable(CalleeSavedSize);
+ SVELocalsSize = SVEStackSize - SVECalleeSavesSize;
+ // Find callee save instructions in frame.
+ // Note: With FPAfterSVECalleeSaves the callee saves have already been
+ // allocated.
+ if (!FPAfterSVECalleeSaves) {
+ MachineBasicBlock::iterator CalleeSavesBegin = AfterGPRSavesI;
+ assert(AFL.isSVECalleeSave(CalleeSavesBegin) && "Unexpected instruction");
+ while (AFL.isSVECalleeSave(AfterSVESavesI) &&
+ AfterSVESavesI != MBB.getFirstTerminator())
+ ++AfterSVESavesI;
+ CalleeSavesEnd = AfterSVESavesI;
+
+ StackOffset LocalsSize = SVELocalsSize + StackOffset::getFixed(NumBytes);
+ // Allocate space for the callee saves (if any).
+ AFL.allocateStackSpace(MBB, CalleeSavesBegin, 0, SVECalleeSavesSize,
+ false, nullptr, EmitAsyncCFI && !HasFP, CFAOffset,
+ MFI.hasVarSizedObjects() || LocalsSize);
+ }
+ }
+ CFAOffset += SVECalleeSavesSize;
+
+ if (EmitAsyncCFI)
+ emitCalleeSavedSVELocations(CalleeSavesEnd);
+
+ // Allocate space for the rest of the frame including SVE locals. Align the
+ // stack as necessary.
+ assert(!(AFL.canUseRedZone(MF) && NeedsRealignment) &&
+ "Cannot use redzone with stack realignment");
+ if (!AFL.canUseRedZone(MF)) {
+ // FIXME: in the case of dynamic re-alignment, NumBytes doesn't have
+ // the correct value here, as NumBytes also includes padding bytes,
+ // which shouldn't be counted here.
+ AFL.allocateStackSpace(MBB, CalleeSavesEnd, RealignmentPadding,
+ SVELocalsSize + StackOffset::getFixed(NumBytes),
+ NeedsWinCFI, &HasWinCFI, EmitAsyncCFI && !HasFP,
+ CFAOffset, MFI.hasVarSizedObjects());
+ }
+
+ // If we need a base pointer, set it up here. It's whatever the value of the
+ // stack pointer is at this point. Any variable size objects will be allocated
+ // after this, so we can still use the base pointer to reference locals.
+ //
+ // FIXME: Clarify FrameSetup flags here.
+ // Note: Use emitFrameOffset() like above for FP if the FrameSetup flag is
+ // needed.
+ // For funclets the BP belongs to the containing function.
+ if (!IsFunclet && RegInfo.hasBasePointer(MF)) {
+ TII->copyPhysReg(MBB, AfterSVESavesI, DL, RegInfo.getBaseRegister(),
+ AArch64::SP, false);
+ if (NeedsWinCFI) {
+ HasWinCFI = true;
+ BuildMI(MBB, AfterSVESavesI, DL, TII->get(AArch64::SEH_Nop))
+ .setMIFlag(MachineInstr::FrameSetup);
+ }
+ }
+
+ // The very last FrameSetup instruction indicates the end of prologue. Emit a
+ // SEH opcode indicating the prologue end.
+ if (NeedsWinCFI && HasWinCFI) {
+ BuildMI(MBB, AfterSVESavesI, DL, TII->get(AArch64::SEH_PrologEnd))
+ .setMIFlag(MachineInstr::FrameSetup);
+ }
+
+ // SEH funclets are passed the frame pointer in X1. If the parent
+ // function uses the base register, then the base register is used
+ // directly, and is not retrieved from X1.
+ if (IsFunclet && F.hasPersonalityFn()) {
+ EHPersonality Per = classifyEHPersonality(F.getPersonalityFn());
+ if (isAsynchronousEHPersonality(Per)) {
+ BuildMI(MBB, AfterSVESavesI, DL, TII->get(TargetOpcode::COPY),
+ AArch64::FP)
+ .addReg(AArch64::X1)
+ .setMIFlag(MachineInstr::FrameSetup);
+ MBB.addLiveIn(AArch64::X1);
+ }
+ }
+
+ if (EmitCFI && !EmitAsyncCFI) {
+ if (HasFP) {
+ emitDefineCFAWithFP(AfterSVESavesI, FixedObject);
+ } else {
+ StackOffset TotalSize =
+ SVEStackSize + StackOffset::getFixed((int64_t)MFI.getStackSize());
+ CFIInstBuilder CFIBuilder(MBB, AfterSVESavesI, MachineInstr::FrameSetup);
+ CFIBuilder.insertCFIInst(
+ createDefCFA(RegInfo, /*FrameReg=*/AArch64::SP, /*Reg=*/AArch64::SP,
+ TotalSize, /*LastAdjustmentWasScalable=*/false));
+ }
+ emitCalleeSavedGPRLocations(AfterSVESavesI);
+ emitCalleeSavedSVELocations(AfterSVESavesI);
+ }
+}
+
+void AArch64PrologueEmitter::emitShadowCallStackPrologue(
+ MachineBasicBlock::iterator MBBI, const DebugLoc &DL) const {
+ // Shadow call stack prolog: str x30, [x18], #8
+ BuildMI(MBB, MBBI, DL, TII->get(AArch64::STRXpost))
+ .addReg(AArch64::X18, RegState::Define)
+ .addReg(AArch64::LR)
+ .addReg(AArch64::X18)
+ .addImm(8)
+ .setMIFlag(MachineInstr::FrameSetup);
+
+ // This instruction also makes x18 live-in to the entry block.
+ MBB.addLiveIn(AArch64::X18);
+
+ if (NeedsWinCFI)
+ BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
+ .setMIFlag(MachineInstr::FrameSetup);
+
+ if (EmitCFI) {
+ // Emit a CFI instruction that causes 8 to be subtracted from the value of
+ // x18 when unwinding past this frame.
+ static const char CFIInst[] = {
+ dwarf::DW_CFA_val_expression,
+ 18, // register
+ 2, // length
+ static_cast<char>(unsigned(dwarf::DW_OP_breg18)),
+ static_cast<char>(-8) & 0x7f, // addend (sleb128)
+ };
+ CFIInstBuilder(MBB, MBBI, MachineInstr::FrameSetup)
+ .buildEscape(StringRef(CFIInst, sizeof(CFIInst)));
+ }
+}
+
+void AArch64PrologueEmitter::emitSwiftAsyncContextFramePointer(
+ MachineBasicBlock::iterator MBBI, const DebugLoc &DL) const {
+ switch (MF.getTarget().Options.SwiftAsyncFramePointer) {
+ case SwiftAsyncFramePointerMode::DeploymentBased:
+ if (Subtarget.swiftAsyncContextIsDynamicallySet()) {
+ // The special symbol below is absolute and has a *value* that can be
+ // combined with the frame pointer to signal an extended frame.
+ BuildMI(MBB, MBBI, DL, TII->get(AArch64::LOADgot), AArch64::X16)
+ .addExternalSymbol("swift_async_extendedFramePointerFlags",
+ AArch64II::MO_GOT);
+ if (NeedsWinCFI) {
+ BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
+ .setMIFlags(MachineInstr::FrameSetup);
+ HasWinCFI = true;
+ }
+ BuildMI(MBB, MBBI, DL, TII->get(AArch64::ORRXrs), AArch64::FP)
+ .addUse(AArch64::FP)
+ .addUse(AArch64::X16)
+ .addImm(Subtarget.isTargetILP32() ? 32 : 0);
+ if (NeedsWinCFI) {
+ BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
+ .setMIFlags(MachineInstr::FrameSetup);
+ HasWinCFI = true;
+ }
+ break;
+ }
+ [[fallthrough]];
+
+ case SwiftAsyncFramePointerMode::Always:
+ // ORR x29, x29, #0x1000_0000_0000_0000
+ BuildMI(MBB, MBBI, DL, TII->get(AArch64::ORRXri), AArch64::FP)
+ .addUse(AArch64::FP)
+ .addImm(0x1100)
+ .setMIFlag(MachineInstr::FrameSetup);
+ if (NeedsWinCFI) {
+ BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
+ .setMIFlags(MachineInstr::FrameSetup);
+ HasWinCFI = true;
+ }
+ break;
+
+ case SwiftAsyncFramePointerMode::Never:
+ break;
+ }
+}
+
+void AArch64PrologueEmitter::emitEmptyStackFramePrologue(
+ int64_t NumBytes, MachineBasicBlock::iterator MBBI,
+ const DebugLoc &DL) const {
+ assert(!HasFP && "unexpected function without stack frame but with FP");
+ assert(!AFL.getSVEStackSize(MF) &&
+ "unexpected function without stack frame but with SVE objects");
+ // All of the stack allocation is for locals.
+ AFI->setLocalStackSize(NumBytes);
+ if (!NumBytes) {
+ if (NeedsWinCFI && HasWinCFI) {
+ BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_PrologEnd))
+ .setMIFlag(MachineInstr::FrameSetup);
+ }
+ return;
+ }
+ // REDZONE: If the stack size is less than 128 bytes, we don't need
+ // to actually allocate.
+ if (AFL.canUseRedZone(MF)) {
+ AFI->setHasRedZone(true);
+ ++NumRedZoneFunctions;
+ } else {
+ emitFrameOffset(MBB, MBBI, DL, AArch64::SP, AArch64::SP,
+ StackOffset::getFixed(-NumBytes), TII,
+ MachineInstr::FrameSetup, false, NeedsWinCFI, &HasWinCFI);
+ if (EmitCFI) {
+ // Label used to tie together the PROLOG_LABEL and the MachineMoves.
+ MCSymbol *FrameLabel = MF.getContext().createTempSymbol();
+ // Encode the stack size of the leaf function.
+ CFIInstBuilder(MBB, MBBI, MachineInstr::FrameSetup)
+ .buildDefCFAOffset(NumBytes, FrameLabel);
+ }
+ }
+
+ if (NeedsWinCFI) {
+ HasWinCFI = true;
+ BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_PrologEnd))
+ .setMIFlag(MachineInstr::FrameSetup);
+ }
+}
+
+void AArch64PrologueEmitter::emitFramePointerSetup(
+ MachineBasicBlock::iterator MBBI, const DebugLoc &DL,
+ unsigned FixedObject) {
+ int64_t FPOffset = AFI->getCalleeSaveBaseToFrameRecordOffset();
+ if (CombineSPBump)
+ FPOffset += AFI->getLocalStackSize();
+
+ if (AFI->hasSwiftAsyncContext()) {
+ // Before we update the live FP we have to ensure there's a valid (or
+ // null) asynchronous context in its slot just before FP in the frame
+ // record, so store it now.
+ const auto &Attrs = MF.getFunction().getAttributes();
+ bool HaveInitialContext = Attrs.hasAttrSomewhere(Attribute::SwiftAsync);
+ if (HaveInitialContext)
+ MBB.addLiveIn(AArch64::X22);
+ Register Reg = HaveInitialContext ? AArch64::X22 : AArch64::XZR;
+ BuildMI(MBB, MBBI, DL, TII->get(AArch64::StoreSwiftAsyncContext))
+ .addUse(Reg)
+ .addUse(AArch64::SP)
+ .addImm(FPOffset - 8)
+ .setMIFlags(MachineInstr::FrameSetup);
+ if (NeedsWinCFI) {
+ // WinCFI and arm64e, where StoreSwiftAsyncContext is expanded
+ // to multiple instructions, should be mutually-exclusive.
+ assert(Subtarget.getTargetTriple().getArchName() != "arm64e");
+ BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
+ .setMIFlags(MachineInstr::FrameSetup);
+ HasWinCFI = true;
+ }
+ }
+
+ if (HomPrologEpilog) {
+ auto Prolog = MBBI;
+ --Prolog;
+ assert(Prolog->getOpcode() == AArch64::HOM_Prolog);
+ Prolog->addOperand(MachineOperand::CreateImm(FPOffset));
+ } else {
+ // Issue sub fp, sp, FPOffset or
+ // mov fp,sp when FPOffset is zero.
+ // Note: All stores of callee-saved registers are marked as "FrameSetup".
+ // This code marks the instruction(s) that set the FP also.
+ emitFrameOffset(MBB, MBBI, DL, AArch64::FP, AArch64::SP,
+ StackOffset::getFixed(FPOffset), TII,
+ MachineInstr::FrameSetup, false, NeedsWinCFI, &HasWinCFI);
+ if (NeedsWinCFI && HasWinCFI) {
+ BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_PrologEnd))
+ .setMIFlag(MachineInstr::FrameSetup);
+ // After setting up the FP, the rest of the prolog doesn't need to be
+ // included in the SEH unwind info.
+ NeedsWinCFI = false;
+ }
+ }
+ if (EmitAsyncCFI)
+ emitDefineCFAWithFP(MBBI, FixedObject);
+}
+
+// Define the current CFA rule to use the provided FP.
+void AArch64PrologueEmitter::emitDefineCFAWithFP(
+ MachineBasicBlock::iterator MBBI, unsigned FixedObject) const {
+ const AArch64RegisterInfo *TRI = Subtarget.getRegisterInfo();
+ const int OffsetToFirstCalleeSaveFromFP =
+ AFI->getCalleeSaveBaseToFrameRecordOffset() -
+ AFI->getCalleeSavedStackSize();
+ Register FramePtr = TRI->getFrameRegister(MF);
+ CFIInstBuilder(MBB, MBBI, MachineInstr::FrameSetup)
+ .buildDefCFA(FramePtr, FixedObject - OffsetToFirstCalleeSaveFromFP);
+}
+
+void AArch64PrologueEmitter::emitWindowsStackProbe(
+ MachineBasicBlock::iterator MBBI, const DebugLoc &DL, int64_t &NumBytes,
+ int64_t RealignmentPadding) const {
+ if (AFI->getSVECalleeSavedStackSize())
+ report_fatal_error("SVE callee saves not yet supported with stack probing");
+
+ // Find an available register to spill the value of X15 to, if X15 is being
+ // used already for nest.
+ unsigned X15Scratch = AArch64::NoRegister;
+ const AArch64Subtarget &STI = MF.getSubtarget<AArch64Subtarget>();
+ if (llvm::any_of(MBB.liveins(),
+ [&STI](const MachineBasicBlock::RegisterMaskPair &LiveIn) {
+ return STI.getRegisterInfo()->isSuperOrSubRegisterEq(
+ AArch64::X15, LiveIn.PhysReg);
+ })) {
+ X15Scratch = AFL.findScratchNonCalleeSaveRegister(&MBB, /*HasCall=*/true);
+ assert(X15Scratch != AArch64::NoRegister &&
+ (X15Scratch < AArch64::X15 || X15Scratch > AArch64::X17));
+#ifndef NDEBUG
+ LiveRegs.removeReg(AArch64::X15); // ignore X15 since we restore it
+#endif
+ BuildMI(MBB, MBBI, DL, TII->get(AArch64::ORRXrr), X15Scratch)
+ .addReg(AArch64::XZR)
+ .addReg(AArch64::X15, RegState::Undef)
+ .addReg(AArch64::X15, RegState::Implicit)
+ .setMIFlag(MachineInstr::FrameSetup);
+ }
+
+ uint64_t NumWords = (NumBytes + RealignmentPadding) >> 4;
+ if (NeedsWinCFI) {
+ HasWinCFI = true;
+ // alloc_l can hold at most 256MB, so assume that NumBytes doesn't
+ // exceed this amount. We need to move at most 2^24 - 1 into x15.
+ // This is at most two instructions, MOVZ followed by MOVK.
+ // TODO: Fix to use multiple stack alloc unwind codes for stacks
+ // exceeding 256MB in size.
+ if (NumBytes >= (1 << 28))
+ report_fatal_error("Stack size cannot exceed 256MB for stack "
+ "unwinding purposes");
+
+ uint32_t LowNumWords = NumWords & 0xFFFF;
+ BuildMI(MBB, MBBI, DL, TII->get(AArch64::MOVZXi), AArch64::X15)
+ .addImm(LowNumWords)
+ .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0))
+ .setMIFlag(MachineInstr::FrameSetup);
+ BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
+ .setMIFlag(MachineInstr::FrameSetup);
+ if ((NumWords & 0xFFFF0000) != 0) {
+ BuildMI(MBB, MBBI, DL, TII->get(AArch64::MOVKXi), AArch64::X15)
+ .addReg(AArch64::X15)
+ .addImm((NumWords & 0xFFFF0000) >> 16) // High half
+ .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 16))
+ .setMIFlag(MachineInstr::FrameSetup);
+ BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
+ .setMIFlag(MachineInstr::FrameSetup);
+ }
+ } else {
+ BuildMI(MBB, MBBI, DL, TII->get(AArch64::MOVi64imm), AArch64::X15)
+ .addImm(NumWords)
+ .setMIFlags(MachineInstr::FrameSetup);
+ }
+
+ const char *ChkStk = Subtarget.getChkStkName();
+ switch (MF.getTarget().getCodeModel()) {
+ case CodeModel::Tiny:
+ case CodeModel::Small:
+ case CodeModel::Medium:
+ case CodeModel::Kernel:
+ BuildMI(MBB, MBBI, DL, TII->get(AArch64::BL))
+ .addExternalSymbol(ChkStk)
+ .addReg(AArch64::X15, RegState::Implicit)
+ .addReg(AArch64::X16,
+ RegState::Implicit | RegState::Define | RegState::Dead)
+ .addReg(AArch64::X17,
+ RegState::Implicit | RegState::Define | RegState::Dead)
+ .addReg(AArch64::NZCV,
+ RegState::Implicit | RegState::Define | RegState::Dead)
+ .setMIFlags(MachineInstr::FrameSetup);
+ if (NeedsWinCFI) {
+ HasWinCFI = true;
+ BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
+ .setMIFlag(MachineInstr::FrameSetup);
+ }
+ break;
+ case CodeModel::Large:
+ BuildMI(MBB, MBBI, DL, TII->get(AArch64::MOVaddrEXT))
+ .addReg(AArch64::X16, RegState::Define)
+ .addExternalSymbol(ChkStk)
+ .addExternalSymbol(ChkStk)
+ .setMIFlags(MachineInstr::FrameSetup);
+ if (NeedsWinCFI) {
+ HasWinCFI = true;
+ BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
+ .setMIFlag(MachineInstr::FrameSetup);
+ }
+
+ BuildMI(MBB, MBBI, DL, TII->get(getBLRCallOpcode(MF)))
+ .addReg(AArch64::X16, RegState::Kill)
+ .addReg(AArch64::X15, RegState::Implicit | RegState::Define)
+ .addReg(AArch64::X16,
+ RegState::Implicit | RegState::Define | RegState::Dead)
+ .addReg(AArch64::X17,
+ RegState::Implicit | RegState::Define | RegState::Dead)
+ .addReg(AArch64::NZCV,
+ RegState::Implicit | RegState::Define | RegState::Dead)
+ .setMIFlags(MachineInstr::FrameSetup);
+ if (NeedsWinCFI) {
+ HasWinCFI = true;
+ BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
+ .setMIFlag(MachineInstr::FrameSetup);
+ }
+ break;
+ }
+
+ BuildMI(MBB, MBBI, DL, TII->get(AArch64::SUBXrx64), AArch64::SP)
+ .addReg(AArch64::SP, RegState::Kill)
+ .addReg(AArch64::X15, RegState::Kill)
+ .addImm(AArch64_AM::getArithExtendImm(AArch64_AM::UXTX, 4))
+ .setMIFlags(MachineInstr::FrameSetup);
+ if (NeedsWinCFI) {
+ HasWinCFI = true;
+ BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_StackAlloc))
+ .addImm(NumBytes)
+ .setMIFlag(MachineInstr::FrameSetup);
+ }
+ NumBytes = 0;
+
+ if (RealignmentPadding > 0) {
+ if (RealignmentPadding >= 4096) {
+ BuildMI(MBB, MBBI, DL, TII->get(AArch64::MOVi64imm))
+ .addReg(AArch64::X16, RegState::Define)
+ .addImm(RealignmentPadding)
+ .setMIFlags(MachineInstr::FrameSetup);
+ BuildMI(MBB, MBBI, DL, TII->get(AArch64::ADDXrx64), AArch64::X15)
+ .addReg(AArch64::SP)
+ .addReg(AArch64::X16, RegState::Kill)
+ .addImm(AArch64_AM::getArithExtendImm(AArch64_AM::UXTX, 0))
+ .setMIFlag(MachineInstr::FrameSetup);
+ } else {
+ BuildMI(MBB, MBBI, DL, TII->get(AArch64::ADDXri), AArch64::X15)
+ .addReg(AArch64::SP)
+ .addImm(RealignmentPadding)
+ .addImm(0)
+ .setMIFlag(MachineInstr::FrameSetup);
+ }
+
+ uint64_t AndMask = ~(MFI.getMaxAlign().value() - 1);
+ BuildMI(MBB, MBBI, DL, TII->get(AArch64::ANDXri), AArch64::SP)
+ .addReg(AArch64::X15, RegState::Kill)
+ .addImm(AArch64_AM::encodeLogicalImmediate(AndMask, 64));
+ AFI->setStackRealigned(true);
+
+ // No need for SEH instructions here; if we're realigning the stack,
+ // we've set a frame pointer and already finished the SEH prologue.
+ assert(!NeedsWinCFI);
+ }
+ if (X15Scratch != AArch64::NoRegister) {
+ BuildMI(MBB, MBBI, DL, TII->get(AArch64::ORRXrr), AArch64::X15)
+ .addReg(AArch64::XZR)
+ .addReg(X15Scratch, RegState::Undef)
+ .addReg(X15Scratch, RegState::Implicit)
+ .setMIFlag(MachineInstr::FrameSetup);
+ }
+}
+
+void AArch64PrologueEmitter::emitCalleeSavedGPRLocations(
+ MachineBasicBlock::iterator MBBI) const {
+ MachineFunction &MF = *MBB.getParent();
+ MachineFrameInfo &MFI = MF.getFrameInfo();
+
+ const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
+ if (CSI.empty())
+ return;
+
+ CFIInstBuilder CFIBuilder(MBB, MBBI, MachineInstr::FrameSetup);
+ for (const auto &Info : CSI) {
+ unsigned FrameIdx = Info.getFrameIdx();
+ if (MFI.getStackID(FrameIdx) == TargetStackID::ScalableVector)
+ continue;
+
+ assert(!Info.isSpilledToReg() && "Spilling to registers not implemented");
+ int64_t Offset = MFI.getObjectOffset(FrameIdx) - AFL.getOffsetOfLocalArea();
+ CFIBuilder.buildOffset(Info.getReg(), Offset);
+ }
+}
+
+void AArch64PrologueEmitter::emitCalleeSavedSVELocations(
+ MachineBasicBlock::iterator MBBI) const {
+ MachineFunction &MF = *MBB.getParent();
+ MachineFrameInfo &MFI = MF.getFrameInfo();
+
+ // Add callee saved registers to move list.
+ const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
+ if (CSI.empty())
+ return;
+
+ const TargetSubtargetInfo &STI = MF.getSubtarget();
+ const TargetRegisterInfo &TRI = *STI.getRegisterInfo();
+ AArch64FunctionInfo &AFI = *MF.getInfo<AArch64FunctionInfo>();
+ CFIInstBuilder CFIBuilder(MBB, MBBI, MachineInstr::FrameSetup);
+
+ std::optional<int64_t> IncomingVGOffsetFromDefCFA;
+ if (AFL.requiresSaveVG(MF)) {
+ auto IncomingVG = *find_if(
+ reverse(CSI), [](auto &Info) { return Info.getReg() == AArch64::VG; });
+ IncomingVGOffsetFromDefCFA = MFI.getObjectOffset(IncomingVG.getFrameIdx()) -
+ AFL.getOffsetOfLocalArea();
+ }
+
+ for (const auto &Info : CSI) {
+ if (MFI.getStackID(Info.getFrameIdx()) != TargetStackID::ScalableVector)
+ continue;
+
+ // Not all unwinders may know about SVE registers, so assume the lowest
+ // common denominator.
+ assert(!Info.isSpilledToReg() && "Spilling to registers not implemented");
+ MCRegister Reg = Info.getReg();
+ if (!static_cast<const AArch64RegisterInfo &>(TRI).regNeedsCFI(Reg, Reg))
+ continue;
+
+ StackOffset Offset =
+ StackOffset::getScalable(MFI.getObjectOffset(Info.getFrameIdx())) -
+ StackOffset::getFixed(AFI.getCalleeSavedStackSize(MFI));
+
+ CFIBuilder.insertCFIInst(
+ createCFAOffset(TRI, Reg, Offset, IncomingVGOffsetFromDefCFA));
+ }
+}
+
+} // namespace llvm