diff options
Diffstat (limited to 'llvm/lib/Target/AArch64/MachineSMEABIPass.cpp')
| -rw-r--r-- | llvm/lib/Target/AArch64/MachineSMEABIPass.cpp | 176 |
1 files changed, 150 insertions, 26 deletions
diff --git a/llvm/lib/Target/AArch64/MachineSMEABIPass.cpp b/llvm/lib/Target/AArch64/MachineSMEABIPass.cpp index 5b65d734101c..28e76b7be11b 100644 --- a/llvm/lib/Target/AArch64/MachineSMEABIPass.cpp +++ b/llvm/lib/Target/AArch64/MachineSMEABIPass.cpp @@ -72,16 +72,30 @@ using namespace llvm; namespace { -enum ZAState { +// Note: For agnostic ZA, we assume the function is always entered/exited in the +// "ACTIVE" state -- this _may_ not be the case (since OFF is also a +// possibility, but for the purpose of placing ZA saves/restores, that does not +// matter). +enum ZAState : uint8_t { // Any/unknown state (not valid) ANY = 0, // ZA is in use and active (i.e. within the accumulator) ACTIVE, + // ZA is active, but ZT0 has been saved. + // This handles the edge case of sharedZA && !sharesZT0. + ACTIVE_ZT0_SAVED, + // A ZA save has been set up or committed (i.e. ZA is dormant or off) + // If the function uses ZT0 it must also be saved. LOCAL_SAVED, + // ZA has been committed to the lazy save buffer of the current function. + // If the function uses ZT0 it must also be saved. + // ZA is off when a save has been committed. + LOCAL_COMMITTED, + // The ZA/ZT0 state on entry to the function. ENTRY, @@ -164,6 +178,14 @@ public: return AgnosticZABufferPtr; } + int getZT0SaveSlot(MachineFunction &MF) { + if (ZT0SaveFI) + return *ZT0SaveFI; + MachineFrameInfo &MFI = MF.getFrameInfo(); + ZT0SaveFI = MFI.CreateSpillStackObject(64, Align(16)); + return *ZT0SaveFI; + } + /// Returns true if the function must allocate a ZA save buffer on entry. This /// will be the case if, at any point in the function, a ZA save was emitted. bool needsSaveBuffer() const { @@ -173,6 +195,7 @@ public: } private: + std::optional<int> ZT0SaveFI; std::optional<int> TPIDR2BlockFI; Register AgnosticZABufferPtr = AArch64::NoRegister; }; @@ -184,8 +207,10 @@ private: /// state would not be legal, as transitioning to it drops the content of ZA. static bool isLegalEdgeBundleZAState(ZAState State) { switch (State) { - case ZAState::ACTIVE: // ZA state within the accumulator/ZT0. - case ZAState::LOCAL_SAVED: // ZA state is saved on the stack. + case ZAState::ACTIVE: // ZA state within the accumulator/ZT0. + case ZAState::ACTIVE_ZT0_SAVED: // ZT0 is saved (ZA is active). + case ZAState::LOCAL_SAVED: // ZA state may be saved on the stack. + case ZAState::LOCAL_COMMITTED: // ZA state is saved on the stack. return true; default: return false; @@ -199,7 +224,9 @@ StringRef getZAStateString(ZAState State) { switch (State) { MAKE_CASE(ZAState::ANY) MAKE_CASE(ZAState::ACTIVE) + MAKE_CASE(ZAState::ACTIVE_ZT0_SAVED) MAKE_CASE(ZAState::LOCAL_SAVED) + MAKE_CASE(ZAState::LOCAL_COMMITTED) MAKE_CASE(ZAState::ENTRY) MAKE_CASE(ZAState::OFF) default: @@ -221,18 +248,34 @@ static bool isZAorZTRegOp(const TargetRegisterInfo &TRI, /// Returns the required ZA state needed before \p MI and an iterator pointing /// to where any code required to change the ZA state should be inserted. static std::pair<ZAState, MachineBasicBlock::iterator> -getZAStateBeforeInst(const TargetRegisterInfo &TRI, MachineInstr &MI, - bool ZAOffAtReturn) { +getInstNeededZAState(const TargetRegisterInfo &TRI, MachineInstr &MI, + SMEAttrs SMEFnAttrs) { MachineBasicBlock::iterator InsertPt(MI); if (MI.getOpcode() == AArch64::InOutZAUsePseudo) return {ZAState::ACTIVE, std::prev(InsertPt)}; + // Note: If we need to save both ZA and ZT0 we use RequiresZASavePseudo. if (MI.getOpcode() == AArch64::RequiresZASavePseudo) return {ZAState::LOCAL_SAVED, std::prev(InsertPt)}; - if (MI.isReturn()) + // If we only need to save ZT0 there's two cases to consider: + // 1. The function has ZA state (that we don't need to save). + // - In this case we switch to the "ACTIVE_ZT0_SAVED" state. + // This only saves ZT0. + // 2. The function does not have ZA state + // - In this case we switch to "LOCAL_COMMITTED" state. + // This saves ZT0 and turns ZA off. + if (MI.getOpcode() == AArch64::RequiresZT0SavePseudo) { + return {SMEFnAttrs.hasZAState() ? ZAState::ACTIVE_ZT0_SAVED + : ZAState::LOCAL_COMMITTED, + std::prev(InsertPt)}; + } + + if (MI.isReturn()) { + bool ZAOffAtReturn = SMEFnAttrs.hasPrivateZAInterface(); return {ZAOffAtReturn ? ZAState::OFF : ZAState::ACTIVE, InsertPt}; + } for (auto &MO : MI.operands()) { if (isZAorZTRegOp(TRI, MO)) @@ -280,6 +323,9 @@ struct MachineSMEABI : public MachineFunctionPass { /// predecessors). void propagateDesiredStates(FunctionInfo &FnInfo, bool Forwards = true); + void emitZT0SaveRestore(EmitContext &, MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, bool IsSave); + // Emission routines for private and shared ZA functions (using lazy saves). void emitSMEPrologue(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI); @@ -290,8 +336,8 @@ struct MachineSMEABI : public MachineFunctionPass { MachineBasicBlock::iterator MBBI); void emitAllocateLazySaveBuffer(EmitContext &, MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI); - void emitZAOff(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, - bool ClearTPIDR2); + void emitZAMode(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, + bool ClearTPIDR2, bool On); // Emission routines for agnostic ZA functions. void emitSetupFullZASave(MachineBasicBlock &MBB, @@ -409,7 +455,7 @@ FunctionInfo MachineSMEABI::collectNeededZAStates(SMEAttrs SMEFnAttrs) { Block.FixedEntryState = ZAState::ENTRY; } else if (MBB.isEHPad()) { // EH entry block: - Block.FixedEntryState = ZAState::LOCAL_SAVED; + Block.FixedEntryState = ZAState::LOCAL_COMMITTED; } LiveRegUnits LiveUnits(*TRI); @@ -431,8 +477,7 @@ FunctionInfo MachineSMEABI::collectNeededZAStates(SMEAttrs SMEFnAttrs) { PhysLiveRegsAfterSMEPrologue = PhysLiveRegs; } // Note: We treat Agnostic ZA as inout_za with an alternate save/restore. - auto [NeededState, InsertPt] = getZAStateBeforeInst( - *TRI, MI, /*ZAOffAtReturn=*/SMEFnAttrs.hasPrivateZAInterface()); + auto [NeededState, InsertPt] = getInstNeededZAState(*TRI, MI, SMEFnAttrs); assert((InsertPt == MBBI || isCallStartOpcode(InsertPt->getOpcode())) && "Unexpected state change insertion point!"); // TODO: Do something to avoid state changes where NZCV is live. @@ -752,9 +797,9 @@ void MachineSMEABI::emitRestoreLazySave(EmitContext &Context, restorePhyRegSave(RegSave, MBB, MBBI, DL); } -void MachineSMEABI::emitZAOff(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MBBI, - bool ClearTPIDR2) { +void MachineSMEABI::emitZAMode(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + bool ClearTPIDR2, bool On) { DebugLoc DL = getDebugLoc(MBB, MBBI); if (ClearTPIDR2) @@ -765,7 +810,7 @@ void MachineSMEABI::emitZAOff(MachineBasicBlock &MBB, // Disable ZA. BuildMI(MBB, MBBI, DL, TII->get(AArch64::MSRpstatesvcrImm1)) .addImm(AArch64SVCR::SVCRZA) - .addImm(0); + .addImm(On ? 1 : 0); } void MachineSMEABI::emitAllocateLazySaveBuffer( @@ -894,6 +939,28 @@ void MachineSMEABI::emitFullZASaveRestore(EmitContext &Context, restorePhyRegSave(RegSave, MBB, MBBI, DL); } +void MachineSMEABI::emitZT0SaveRestore(EmitContext &Context, + MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + bool IsSave) { + DebugLoc DL = getDebugLoc(MBB, MBBI); + Register ZT0Save = MRI->createVirtualRegister(&AArch64::GPR64spRegClass); + + BuildMI(MBB, MBBI, DL, TII->get(AArch64::ADDXri), ZT0Save) + .addFrameIndex(Context.getZT0SaveSlot(*MF)) + .addImm(0) + .addImm(0); + + if (IsSave) { + BuildMI(MBB, MBBI, DL, TII->get(AArch64::STR_TX)) + .addReg(AArch64::ZT0) + .addReg(ZT0Save); + } else { + BuildMI(MBB, MBBI, DL, TII->get(AArch64::LDR_TX), AArch64::ZT0) + .addReg(ZT0Save); + } +} + void MachineSMEABI::emitAllocateFullZASaveBuffer( EmitContext &Context, MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, LiveRegs PhysLiveRegs) { @@ -938,6 +1005,17 @@ void MachineSMEABI::emitAllocateFullZASaveBuffer( restorePhyRegSave(RegSave, MBB, MBBI, DL); } +struct FromState { + ZAState From; + + constexpr uint8_t to(ZAState To) const { + static_assert(NUM_ZA_STATE < 16, "expected ZAState to fit in 4-bits"); + return uint8_t(From) << 4 | uint8_t(To); + } +}; + +constexpr FromState transitionFrom(ZAState From) { return FromState{From}; } + void MachineSMEABI::emitStateChange(EmitContext &Context, MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertPt, @@ -969,17 +1047,63 @@ void MachineSMEABI::emitStateChange(EmitContext &Context, From = ZAState::ACTIVE; } - if (From == ZAState::ACTIVE && To == ZAState::LOCAL_SAVED) - emitZASave(Context, MBB, InsertPt, PhysLiveRegs); - else if (From == ZAState::LOCAL_SAVED && To == ZAState::ACTIVE) - emitZARestore(Context, MBB, InsertPt, PhysLiveRegs); - else if (To == ZAState::OFF) { - assert(From != ZAState::ENTRY && - "ENTRY to OFF should have already been handled"); - assert(!SMEFnAttrs.hasAgnosticZAInterface() && - "Should not turn ZA off in agnostic ZA function"); - emitZAOff(MBB, InsertPt, /*ClearTPIDR2=*/From == ZAState::LOCAL_SAVED); - } else { + bool IsAgnosticZA = SMEFnAttrs.hasAgnosticZAInterface(); + bool HasZT0State = SMEFnAttrs.hasZT0State(); + bool HasZAState = IsAgnosticZA || SMEFnAttrs.hasZAState(); + + switch (transitionFrom(From).to(To)) { + // This section handles: ACTIVE <-> ACTIVE_ZT0_SAVED + case transitionFrom(ZAState::ACTIVE).to(ZAState::ACTIVE_ZT0_SAVED): + emitZT0SaveRestore(Context, MBB, InsertPt, /*IsSave=*/true); + break; + case transitionFrom(ZAState::ACTIVE_ZT0_SAVED).to(ZAState::ACTIVE): + emitZT0SaveRestore(Context, MBB, InsertPt, /*IsSave=*/false); + break; + + // This section handles: ACTIVE -> LOCAL_SAVED + case transitionFrom(ZAState::ACTIVE).to(ZAState::LOCAL_SAVED): + if (HasZT0State) + emitZT0SaveRestore(Context, MBB, InsertPt, /*IsSave=*/true); + if (HasZAState) + emitZASave(Context, MBB, InsertPt, PhysLiveRegs); + break; + + // This section handles: ACTIVE -> LOCAL_COMMITTED + case transitionFrom(ZAState::ACTIVE).to(ZAState::LOCAL_COMMITTED): + // Note: We could support ZA state here, but this transition is currently + // only possible when we _don't_ have ZA state. + assert(HasZT0State && !HasZAState && "Expect to only have ZT0 state."); + emitZT0SaveRestore(Context, MBB, InsertPt, /*IsSave=*/true); + emitZAMode(MBB, InsertPt, /*ClearTPIDR2=*/false, /*On=*/false); + break; + + // This section handles: LOCAL_COMMITTED -> (OFF|LOCAL_SAVED) + case transitionFrom(ZAState::LOCAL_COMMITTED).to(ZAState::OFF): + case transitionFrom(ZAState::LOCAL_COMMITTED).to(ZAState::LOCAL_SAVED): + // These transistions are a no-op. + break; + + // This section handles: LOCAL_(SAVED|COMMITTED) -> ACTIVE[_ZT0_SAVED] + case transitionFrom(ZAState::LOCAL_COMMITTED).to(ZAState::ACTIVE): + case transitionFrom(ZAState::LOCAL_COMMITTED).to(ZAState::ACTIVE_ZT0_SAVED): + case transitionFrom(ZAState::LOCAL_SAVED).to(ZAState::ACTIVE): + if (HasZAState) + emitZARestore(Context, MBB, InsertPt, PhysLiveRegs); + else + emitZAMode(MBB, InsertPt, /*ClearTPIDR2=*/false, /*On=*/true); + if (HasZT0State && To == ZAState::ACTIVE) + emitZT0SaveRestore(Context, MBB, InsertPt, /*IsSave=*/false); + break; + default: + if (To == ZAState::OFF) { + assert(From != ZAState::ENTRY && + "ENTRY to OFF should have already been handled"); + assert(SMEFnAttrs.hasPrivateZAInterface() && + "Did not expect to turn ZA off in shared/agnostic ZA function"); + emitZAMode(MBB, InsertPt, /*ClearTPIDR2=*/From == ZAState::LOCAL_SAVED, + /*On=*/false); + break; + } dbgs() << "Error: Transition from " << getZAStateString(From) << " to " << getZAStateString(To) << '\n'; llvm_unreachable("Unimplemented state transition"); |
