diff options
Diffstat (limited to 'bolt')
20 files changed, 1979 insertions, 140 deletions
diff --git a/bolt/include/bolt/Passes/PAuthGadgetScanner.h b/bolt/include/bolt/Passes/PAuthGadgetScanner.h index c6b9cc2eb4b9..721fd664a325 100644 --- a/bolt/include/bolt/Passes/PAuthGadgetScanner.h +++ b/bolt/include/bolt/Passes/PAuthGadgetScanner.h @@ -199,8 +199,7 @@ namespace PAuthGadgetScanner { // to distinguish intermediate and final results at the type level. // // Here is an overview of issue life-cycle: -// * an analysis (SrcSafetyAnalysis at now, DstSafetyAnalysis will be added -// later to support the detection of authentication oracles) computes register +// * an analysis (SrcSafetyAnalysis or DstSafetyAnalysis) computes register // state for each instruction in the function. // * for each instruction, it is checked whether it is a gadget of some kind, // taking the computed state into account. If a gadget is found, its kind @@ -273,6 +272,11 @@ public: virtual ~ExtraInfo() {} }; +/// The set of instructions writing to the affected register in an unsafe +/// manner. +/// +/// This is a hint to be printed alongside the report. It should be further +/// analyzed by the user. class ClobberingInfo : public ExtraInfo { SmallVector<MCInstReference> ClobberingInstrs; @@ -282,6 +286,20 @@ public: void print(raw_ostream &OS, const MCInstReference Location) const override; }; +/// The set of instructions leaking the authenticated pointer before the +/// result of authentication was checked. +/// +/// This is a hint to be printed alongside the report. It should be further +/// analyzed by the user. +class LeakageInfo : public ExtraInfo { + SmallVector<MCInstReference> LeakingInstrs; + +public: + LeakageInfo(ArrayRef<MCInstReference> Instrs) : LeakingInstrs(Instrs) {} + + void print(raw_ostream &OS, const MCInstReference Location) const override; +}; + /// A brief version of a report that can be further augmented with the details. /// /// A half-baked report produced on the first run of the analysis. An extra, @@ -322,6 +340,9 @@ class FunctionAnalysisContext { void findUnsafeUses(SmallVector<PartialReport<MCPhysReg>> &Reports); void augmentUnsafeUseReports(ArrayRef<PartialReport<MCPhysReg>> Reports); + void findUnsafeDefs(SmallVector<PartialReport<MCPhysReg>> &Reports); + void augmentUnsafeDefReports(ArrayRef<PartialReport<MCPhysReg>> Reports); + /// Process the reports which do not have to be augmented, and remove them /// from Reports. void handleSimpleReports(SmallVector<PartialReport<MCPhysReg>> &Reports); diff --git a/bolt/include/bolt/Profile/DataAggregator.h b/bolt/include/bolt/Profile/DataAggregator.h index 96969cf53bac..cc28a06c151e 100644 --- a/bolt/include/bolt/Profile/DataAggregator.h +++ b/bolt/include/bolt/Profile/DataAggregator.h @@ -85,6 +85,8 @@ private: }; friend raw_ostream &operator<<(raw_ostream &OS, const LBREntry &); + friend struct PerfSpeEventsTestHelper; + struct PerfBranchSample { SmallVector<LBREntry, 32> LBR; }; @@ -99,16 +101,17 @@ private: uint64_t Addr; }; - /// Container for the unit of branch data. - /// Backwards compatible with legacy use for branches and fall-throughs: - /// - if \p Branch is FT_ONLY or FT_EXTERNAL_ORIGIN, the trace only - /// contains fall-through data, - /// - if \p To is BR_ONLY, the trace only contains branch data. + /// Container for the unit of branch data, matching pre-aggregated trace type. + /// Backwards compatible with branch and fall-through types: + /// - if \p To is < 0, the trace only contains branch data (BR_ONLY), + /// - if \p Branch is < 0, the trace only contains fall-through data + /// (FT_ONLY, FT_EXTERNAL_ORIGIN, or FT_EXTERNAL_RETURN). struct Trace { static constexpr const uint64_t EXTERNAL = 0ULL; static constexpr const uint64_t BR_ONLY = -1ULL; static constexpr const uint64_t FT_ONLY = -1ULL; static constexpr const uint64_t FT_EXTERNAL_ORIGIN = -2ULL; + static constexpr const uint64_t FT_EXTERNAL_RETURN = -3ULL; uint64_t Branch; uint64_t From; @@ -388,9 +391,9 @@ private: /// File format syntax: /// E <event> /// S <start> <count> - /// T <start> <end> <ft_end> <count> + /// [TR] <start> <end> <ft_end> <count> /// B <start> <end> <count> <mispred_count> - /// [Ff] <start> <end> <count> + /// [Ffr] <start> <end> <count> /// /// where <start>, <end>, <ft_end> have the format [<id>:]<offset> /// @@ -401,8 +404,11 @@ private: /// f - an aggregated fall-through with external origin - used to disambiguate /// between a return hitting a basic block head and a regular internal /// jump to the block + /// r - an aggregated fall-through originating at an external return, no + /// checks are performed for a fallthrough start /// T - an aggregated trace: branch from <start> to <end> with a fall-through /// to <ft_end> + /// R - an aggregated trace originating at a return /// /// <id> - build id of the object containing the address. We can skip it for /// the main binary and use "X" for an unknown object. This will save some @@ -530,7 +536,12 @@ inline raw_ostream &operator<<(raw_ostream &OS, const DataAggregator::Trace &T) { switch (T.Branch) { case DataAggregator::Trace::FT_ONLY: + break; case DataAggregator::Trace::FT_EXTERNAL_ORIGIN: + OS << "X:0 -> "; + break; + case DataAggregator::Trace::FT_EXTERNAL_RETURN: + OS << "X:R -> "; break; default: OS << Twine::utohexstr(T.Branch) << " -> "; diff --git a/bolt/include/bolt/Utils/CommandLineOpts.h b/bolt/include/bolt/Utils/CommandLineOpts.h index 4acce5a3e832..a75b6bf720ec 100644 --- a/bolt/include/bolt/Utils/CommandLineOpts.h +++ b/bolt/include/bolt/Utils/CommandLineOpts.h @@ -48,6 +48,7 @@ extern llvm::cl::OptionCategory BinaryAnalysisCategory; extern llvm::cl::opt<unsigned> AlignText; extern llvm::cl::opt<unsigned> AlignFunctions; extern llvm::cl::opt<bool> AggregateOnly; +extern llvm::cl::opt<bool> ArmSPE; extern llvm::cl::opt<unsigned> BucketsPerLine; extern llvm::cl::opt<bool> CompactCodeModel; extern llvm::cl::opt<bool> DiffOnly; diff --git a/bolt/lib/Passes/PAuthGadgetScanner.cpp b/bolt/lib/Passes/PAuthGadgetScanner.cpp index 971ea5fdef42..95e831fe9c8c 100644 --- a/bolt/lib/Passes/PAuthGadgetScanner.cpp +++ b/bolt/lib/Passes/PAuthGadgetScanner.cpp @@ -152,6 +152,8 @@ public: // in the gadgets to be reported. This information is used in the second run // to also track which instructions last wrote to those registers. +typedef SmallPtrSet<const MCInst *, 4> SetOfRelatedInsts; + /// A state representing which registers are safe to use by an instruction /// at a given program point. /// @@ -195,7 +197,7 @@ struct SrcState { /// pac-ret analysis, the expectation is that almost all return instructions /// only use register `X30`, and therefore, this vector will probably have /// length 1 in the second run. - std::vector<SmallPtrSet<const MCInst *, 4>> LastInstWritingReg; + std::vector<SetOfRelatedInsts> LastInstWritingReg; /// Construct an empty state. SrcState() {} @@ -230,12 +232,11 @@ struct SrcState { bool operator!=(const SrcState &RHS) const { return !((*this) == RHS); } }; -static void -printLastInsts(raw_ostream &OS, - ArrayRef<SmallPtrSet<const MCInst *, 4>> LastInstWritingReg) { +static void printInstsShort(raw_ostream &OS, + ArrayRef<SetOfRelatedInsts> Insts) { OS << "Insts: "; - for (unsigned I = 0; I < LastInstWritingReg.size(); ++I) { - auto &Set = LastInstWritingReg[I]; + for (unsigned I = 0; I < Insts.size(); ++I) { + auto &Set = Insts[I]; OS << "[" << I << "]("; for (const MCInst *MCInstP : Set) OS << MCInstP << " "; @@ -243,14 +244,14 @@ printLastInsts(raw_ostream &OS, } } -raw_ostream &operator<<(raw_ostream &OS, const SrcState &S) { +static raw_ostream &operator<<(raw_ostream &OS, const SrcState &S) { OS << "src-state<"; if (S.empty()) { OS << "empty"; } else { OS << "SafeToDerefRegs: " << S.SafeToDerefRegs << ", "; OS << "TrustedRegs: " << S.TrustedRegs << ", "; - printLastInsts(OS, S.LastInstWritingReg); + printInstsShort(OS, S.LastInstWritingReg); } OS << ">"; return OS; @@ -279,7 +280,7 @@ void SrcStatePrinter::print(raw_ostream &OS, const SrcState &S) const { OS << ", TrustedRegs: "; RegStatePrinter.print(OS, S.TrustedRegs); OS << ", "; - printLastInsts(OS, S.LastInstWritingReg); + printInstsShort(OS, S.LastInstWritingReg); } OS << ">"; } @@ -323,13 +324,12 @@ protected: DenseMap<const MCInst *, std::pair<MCPhysReg, const MCInst *>> CheckerSequenceInfo; - SmallPtrSet<const MCInst *, 4> &lastWritingInsts(SrcState &S, - MCPhysReg Reg) const { + SetOfRelatedInsts &lastWritingInsts(SrcState &S, MCPhysReg Reg) const { unsigned Index = RegsToTrackInstsFor.getIndex(Reg); return S.LastInstWritingReg[Index]; } - const SmallPtrSet<const MCInst *, 4> &lastWritingInsts(const SrcState &S, - MCPhysReg Reg) const { + const SetOfRelatedInsts &lastWritingInsts(const SrcState &S, + MCPhysReg Reg) const { unsigned Index = RegsToTrackInstsFor.getIndex(Reg); return S.LastInstWritingReg[Index]; } @@ -430,11 +430,13 @@ protected: } SrcState computeNext(const MCInst &Point, const SrcState &Cur) { + if (BC.MIB->isCFI(Point)) + return Cur; + SrcStatePrinter P(BC); LLVM_DEBUG({ dbgs() << " SrcSafetyAnalysis::ComputeNext("; - BC.InstPrinter->printInst(&const_cast<MCInst &>(Point), 0, "", *BC.STI, - dbgs()); + BC.InstPrinter->printInst(&Point, 0, "", *BC.STI, dbgs()); dbgs() << ", "; P.print(dbgs(), Cur); dbgs() << ")\n"; @@ -612,6 +614,42 @@ protected: StringRef getAnnotationName() const { return "DataflowSrcSafetyAnalysis"; } }; +/// A helper base class for implementing a simplified counterpart of a dataflow +/// analysis for functions without CFG information. +template <typename StateTy> class CFGUnawareAnalysis { + BinaryContext &BC; + BinaryFunction &BF; + MCPlusBuilder::AllocatorIdTy AllocId; + unsigned StateAnnotationIndex; + + void cleanStateAnnotations() { + for (auto &I : BF.instrs()) + BC.MIB->removeAnnotation(I.second, StateAnnotationIndex); + } + +protected: + CFGUnawareAnalysis(BinaryFunction &BF, MCPlusBuilder::AllocatorIdTy AllocId, + StringRef AnnotationName) + : BC(BF.getBinaryContext()), BF(BF), AllocId(AllocId) { + StateAnnotationIndex = BC.MIB->getOrCreateAnnotationIndex(AnnotationName); + } + + void setState(MCInst &Inst, const StateTy &S) { + // Check if we need to remove an old annotation (this is the case if + // this is the second, detailed run of the analysis). + if (BC.MIB->hasAnnotation(Inst, StateAnnotationIndex)) + BC.MIB->removeAnnotation(Inst, StateAnnotationIndex); + // Attach the state. + BC.MIB->addAnnotation(Inst, StateAnnotationIndex, S, AllocId); + } + + const StateTy &getState(const MCInst &Inst) const { + return BC.MIB->getAnnotationAs<StateTy>(Inst, StateAnnotationIndex); + } + + virtual ~CFGUnawareAnalysis() { cleanStateAnnotations(); } +}; + // A simplified implementation of DataflowSrcSafetyAnalysis for functions // lacking CFG information. // @@ -646,15 +684,10 @@ protected: // of instructions without labels in between. These sequences can be processed // the same way basic blocks are processed by data-flow analysis, assuming // pessimistically that all registers are unsafe at the start of each sequence. -class CFGUnawareSrcSafetyAnalysis : public SrcSafetyAnalysis { +class CFGUnawareSrcSafetyAnalysis : public SrcSafetyAnalysis, + public CFGUnawareAnalysis<SrcState> { + using SrcSafetyAnalysis::BC; BinaryFunction &BF; - MCPlusBuilder::AllocatorIdTy AllocId; - unsigned StateAnnotationIndex; - - void cleanStateAnnotations() { - for (auto &I : BF.instrs()) - BC.MIB->removeAnnotation(I.second, StateAnnotationIndex); - } /// Creates a state with all registers marked unsafe (not to be confused /// with empty state). @@ -666,15 +699,16 @@ public: CFGUnawareSrcSafetyAnalysis(BinaryFunction &BF, MCPlusBuilder::AllocatorIdTy AllocId, ArrayRef<MCPhysReg> RegsToTrackInstsFor) - : SrcSafetyAnalysis(BF, RegsToTrackInstsFor), BF(BF), AllocId(AllocId) { - StateAnnotationIndex = - BC.MIB->getOrCreateAnnotationIndex("CFGUnawareSrcSafetyAnalysis"); + : SrcSafetyAnalysis(BF, RegsToTrackInstsFor), + CFGUnawareAnalysis(BF, AllocId, "CFGUnawareSrcSafetyAnalysis"), BF(BF) { } void run() override { SrcState S = createEntryState(); for (auto &I : BF.instrs()) { MCInst &Inst = I.second; + if (BC.MIB->isCFI(Inst)) + continue; // If there is a label before this instruction, it is possible that it // can be jumped-to, thus conservatively resetting S. As an exception, @@ -687,12 +721,8 @@ public: S = createUnsafeState(); } - // Check if we need to remove an old annotation (this is the case if - // this is the second, detailed, run of the analysis). - if (BC.MIB->hasAnnotation(Inst, StateAnnotationIndex)) - BC.MIB->removeAnnotation(Inst, StateAnnotationIndex); // Attach the state *before* this instruction executes. - BC.MIB->addAnnotation(Inst, StateAnnotationIndex, S, AllocId); + setState(Inst, S); // Compute the state after this instruction executes. S = computeNext(Inst, S); @@ -700,10 +730,8 @@ public: } const SrcState &getStateBefore(const MCInst &Inst) const override { - return BC.MIB->getAnnotationAs<SrcState>(Inst, StateAnnotationIndex); + return getState(Inst); } - - ~CFGUnawareSrcSafetyAnalysis() { cleanStateAnnotations(); } }; std::shared_ptr<SrcSafetyAnalysis> @@ -717,6 +745,483 @@ SrcSafetyAnalysis::create(BinaryFunction &BF, RegsToTrackInstsFor); } +/// A state representing which registers are safe to be used as the destination +/// operand of an authentication instruction. +/// +/// Similar to SrcState, it is the responsibility of the analysis to take +/// register aliasing into account. +/// +/// Depending on the implementation (such as whether FEAT_FPAC is implemented +/// by an AArch64 CPU or not), it may be possible that an authentication +/// instruction returns an invalid pointer on failure instead of terminating +/// the program immediately (assuming the program will crash as soon as that +/// pointer is dereferenced). Since few bits are usually allocated for the PAC +/// field (such as less than 16 bits on a typical AArch64 system), an attacker +/// can try every possible signature and guess the correct one if there is a +/// gadget that tells whether the particular pointer has a correct signature +/// (a so called "authentication oracle"). For that reason, it should be +/// impossible for an attacker to test if a pointer is correctly signed - +/// either the program should be terminated on authentication failure or +/// the result of authentication should not be accessible to an attacker. +/// +/// Considering the instructions in forward order as they are executed, a +/// restricted set of operations can be allowed on any register containing a +/// value derived from the result of an authentication instruction until that +/// value is checked not to contain the result of a failed authentication. +/// In DstSafetyAnalysis, these rules are adapted, so that the safety property +/// for a register is computed by iterating the instructions in backward order. +/// Then the resulting properties are used at authentication instruction sites +/// to check output registers and report the particular instruction if it writes +/// to an unsafe register. +/// +/// Another approach would be to simulate the above rules as-is, iterating over +/// the instructions in forward direction. To make it possible to report the +/// particular instructions as oracles, this would probably require tracking +/// references to these instructions for each register currently containing +/// sensitive data. +/// +/// In DstSafetyAnalysis, the source register Xn of an instruction Inst is safe +/// if at least one of the following is true: +/// * Inst checks if Xn contains the result of a successful authentication and +/// terminates the program on failure. Note that Inst can either naturally +/// dereference Xn (load, branch, return, etc. instructions) or be the first +/// instruction of an explicit checking sequence. +/// * Inst performs safe address arithmetic AND both source and result +/// registers, as well as any temporary registers, must be safe after +/// execution of Inst (temporaries are not used on AArch64 and thus not +/// currently supported/allowed). +/// See MCPlusBuilder::analyzeAddressArithmeticsForPtrAuth for the details. +/// * Inst fully overwrites Xn with a constant. +struct DstState { + /// The set of registers whose values cannot be inspected by an attacker in + /// a way usable as an authentication oracle. The results of authentication + /// instructions should only be written to such registers. + BitVector CannotEscapeUnchecked; + + /// A vector of sets, only used on the second analysis run. + /// Each element in this vector represents one of the tracked registers. + /// For each such register we track the set of first instructions that leak + /// the authenticated pointer before it was checked. This is intended to + /// provide clues on which instruction made the particular register unsafe. + /// + /// Please note that the mapping from MCPhysReg values to indexes in this + /// vector is provided by RegsToTrackInstsFor field of DstSafetyAnalysis. + std::vector<SetOfRelatedInsts> FirstInstLeakingReg; + + /// Constructs an empty state. + DstState() {} + + DstState(unsigned NumRegs, unsigned NumRegsToTrack) + : CannotEscapeUnchecked(NumRegs), FirstInstLeakingReg(NumRegsToTrack) {} + + DstState &merge(const DstState &StateIn) { + if (StateIn.empty()) + return *this; + if (empty()) + return (*this = StateIn); + + CannotEscapeUnchecked &= StateIn.CannotEscapeUnchecked; + for (unsigned I = 0; I < FirstInstLeakingReg.size(); ++I) + for (const MCInst *J : StateIn.FirstInstLeakingReg[I]) + FirstInstLeakingReg[I].insert(J); + return *this; + } + + /// Returns true if this object does not store state of any registers - + /// neither safe, nor unsafe ones. + bool empty() const { return CannotEscapeUnchecked.empty(); } + + bool operator==(const DstState &RHS) const { + return CannotEscapeUnchecked == RHS.CannotEscapeUnchecked && + FirstInstLeakingReg == RHS.FirstInstLeakingReg; + } + bool operator!=(const DstState &RHS) const { return !((*this) == RHS); } +}; + +static raw_ostream &operator<<(raw_ostream &OS, const DstState &S) { + OS << "dst-state<"; + if (S.empty()) { + OS << "empty"; + } else { + OS << "CannotEscapeUnchecked: " << S.CannotEscapeUnchecked << ", "; + printInstsShort(OS, S.FirstInstLeakingReg); + } + OS << ">"; + return OS; +} + +class DstStatePrinter { +public: + void print(raw_ostream &OS, const DstState &S) const; + explicit DstStatePrinter(const BinaryContext &BC) : BC(BC) {} + +private: + const BinaryContext &BC; +}; + +void DstStatePrinter::print(raw_ostream &OS, const DstState &S) const { + RegStatePrinter RegStatePrinter(BC); + OS << "dst-state<"; + if (S.empty()) { + assert(S.CannotEscapeUnchecked.empty()); + assert(S.FirstInstLeakingReg.empty()); + OS << "empty"; + } else { + OS << "CannotEscapeUnchecked: "; + RegStatePrinter.print(OS, S.CannotEscapeUnchecked); + OS << ", "; + printInstsShort(OS, S.FirstInstLeakingReg); + } + OS << ">"; +} + +/// Computes which registers are safe to be written to by auth instructions. +/// +/// This is the base class for two implementations: a dataflow-based analysis +/// which is intended to be used for most functions and a simplified CFG-unaware +/// version for functions without reconstructed CFG. +class DstSafetyAnalysis { +public: + DstSafetyAnalysis(BinaryFunction &BF, ArrayRef<MCPhysReg> RegsToTrackInstsFor) + : BC(BF.getBinaryContext()), NumRegs(BC.MRI->getNumRegs()), + RegsToTrackInstsFor(RegsToTrackInstsFor) {} + + virtual ~DstSafetyAnalysis() {} + + static std::shared_ptr<DstSafetyAnalysis> + create(BinaryFunction &BF, MCPlusBuilder::AllocatorIdTy AllocId, + ArrayRef<MCPhysReg> RegsToTrackInstsFor); + + virtual void run() = 0; + virtual const DstState &getStateAfter(const MCInst &Inst) const = 0; + +protected: + BinaryContext &BC; + const unsigned NumRegs; + + const TrackedRegisters RegsToTrackInstsFor; + + /// Stores information about the detected instruction sequences emitted to + /// check an authenticated pointer. Specifically, if such sequence is detected + /// in a basic block, it maps the first instruction of that sequence to the + /// register being checked. + /// + /// As the detection of such sequences requires iterating over the adjacent + /// instructions, it should be done before calling computeNext(), which + /// operates on separate instructions. + DenseMap<const MCInst *, MCPhysReg> RegCheckedAt; + + SetOfRelatedInsts &firstLeakingInsts(DstState &S, MCPhysReg Reg) const { + unsigned Index = RegsToTrackInstsFor.getIndex(Reg); + return S.FirstInstLeakingReg[Index]; + } + const SetOfRelatedInsts &firstLeakingInsts(const DstState &S, + MCPhysReg Reg) const { + unsigned Index = RegsToTrackInstsFor.getIndex(Reg); + return S.FirstInstLeakingReg[Index]; + } + + /// Creates a state with all registers marked unsafe (not to be confused + /// with empty state). + DstState createUnsafeState() { + return DstState(NumRegs, RegsToTrackInstsFor.getNumTrackedRegisters()); + } + + /// Returns the set of registers that can be leaked by this instruction. + /// A register is considered leaked if it has any intersection with any + /// register read by Inst. This is similar to how the set of clobbered + /// registers is computed, but taking input operands instead of outputs. + BitVector getLeakedRegs(const MCInst &Inst) const { + BitVector Leaked(NumRegs); + + // Assume a call can read all registers. + if (BC.MIB->isCall(Inst)) { + Leaked.set(); + return Leaked; + } + + // Compute the set of registers overlapping with any register used by + // this instruction. + + const MCInstrDesc &Desc = BC.MII->get(Inst.getOpcode()); + + for (MCPhysReg Reg : Desc.implicit_uses()) + Leaked |= BC.MIB->getAliases(Reg, /*OnlySmaller=*/false); + + for (const MCOperand &Op : BC.MIB->useOperands(Inst)) { + if (Op.isReg()) + Leaked |= BC.MIB->getAliases(Op.getReg(), /*OnlySmaller=*/false); + } + + return Leaked; + } + + SmallVector<MCPhysReg> getRegsMadeProtected(const MCInst &Inst, + const BitVector &LeakedRegs, + const DstState &Cur) const { + SmallVector<MCPhysReg> Regs; + + // A pointer can be checked, or + if (auto CheckedReg = + BC.MIB->getAuthCheckedReg(Inst, /*MayOverwrite=*/true)) + Regs.push_back(*CheckedReg); + if (RegCheckedAt.contains(&Inst)) + Regs.push_back(RegCheckedAt.at(&Inst)); + + // ... it can be used as a branch target, or + if (BC.MIB->isIndirectBranch(Inst) || BC.MIB->isIndirectCall(Inst)) { + bool IsAuthenticated; + MCPhysReg BranchDestReg = + BC.MIB->getRegUsedAsIndirectBranchDest(Inst, IsAuthenticated); + assert(BranchDestReg != BC.MIB->getNoRegister()); + if (!IsAuthenticated) + Regs.push_back(BranchDestReg); + } + + // ... it can be used as a return target, or + if (BC.MIB->isReturn(Inst)) { + bool IsAuthenticated = false; + std::optional<MCPhysReg> RetReg = + BC.MIB->getRegUsedAsRetDest(Inst, IsAuthenticated); + if (RetReg && !IsAuthenticated) + Regs.push_back(*RetReg); + } + + // ... an address can be updated in a safe manner, or + if (auto DstAndSrc = BC.MIB->analyzeAddressArithmeticsForPtrAuth(Inst)) { + MCPhysReg DstReg, SrcReg; + std::tie(DstReg, SrcReg) = *DstAndSrc; + // Note that *all* registers containing the derived values must be safe, + // both source and destination ones. No temporaries are supported at now. + if (Cur.CannotEscapeUnchecked[SrcReg] && + Cur.CannotEscapeUnchecked[DstReg]) + Regs.push_back(SrcReg); + } + + // ... the register can be overwritten in whole with a constant: for that + // purpose, look for the instructions with no register inputs (neither + // explicit nor implicit ones) and no side effects (to rule out reading + // not modelled locations). + const MCInstrDesc &Desc = BC.MII->get(Inst.getOpcode()); + bool HasExplicitSrcRegs = llvm::any_of(BC.MIB->useOperands(Inst), + [](auto Op) { return Op.isReg(); }); + if (!Desc.hasUnmodeledSideEffects() && !HasExplicitSrcRegs && + Desc.implicit_uses().empty()) { + for (const MCOperand &Def : BC.MIB->defOperands(Inst)) + Regs.push_back(Def.getReg()); + } + + return Regs; + } + + DstState computeNext(const MCInst &Point, const DstState &Cur) { + if (BC.MIB->isCFI(Point)) + return Cur; + + DstStatePrinter P(BC); + LLVM_DEBUG({ + dbgs() << " DstSafetyAnalysis::ComputeNext("; + BC.InstPrinter->printInst(&Point, 0, "", *BC.STI, dbgs()); + dbgs() << ", "; + P.print(dbgs(), Cur); + dbgs() << ")\n"; + }); + + // If this instruction is reachable by the analysis, a non-empty state will + // be propagated to it sooner or later. Until then, skip computeNext(). + if (Cur.empty()) { + LLVM_DEBUG( + { dbgs() << "Skipping computeNext(Point, Cur) as Cur is empty.\n"; }); + return DstState(); + } + + // First, compute various properties of the instruction, taking the state + // after its execution into account, if necessary. + + BitVector LeakedRegs = getLeakedRegs(Point); + SmallVector<MCPhysReg> NewProtectedRegs = + getRegsMadeProtected(Point, LeakedRegs, Cur); + + // Then, compute the state before this instruction is executed. + DstState Next = Cur; + + Next.CannotEscapeUnchecked.reset(LeakedRegs); + for (MCPhysReg Reg : RegsToTrackInstsFor.getRegisters()) { + if (LeakedRegs[Reg]) + firstLeakingInsts(Next, Reg) = {&Point}; + } + + BitVector NewProtectedSubregs(NumRegs); + for (MCPhysReg Reg : NewProtectedRegs) + NewProtectedSubregs |= BC.MIB->getAliases(Reg, /*OnlySmaller=*/true); + Next.CannotEscapeUnchecked |= NewProtectedSubregs; + for (MCPhysReg Reg : RegsToTrackInstsFor.getRegisters()) { + if (NewProtectedSubregs[Reg]) + firstLeakingInsts(Next, Reg).clear(); + } + + LLVM_DEBUG({ + dbgs() << " .. result: ("; + P.print(dbgs(), Next); + dbgs() << ")\n"; + }); + + return Next; + } + +public: + std::vector<MCInstReference> getLeakingInsts(const MCInst &Inst, + BinaryFunction &BF, + MCPhysReg LeakedReg) const { + const DstState &S = getStateAfter(Inst); + + std::vector<MCInstReference> Result; + for (const MCInst *Inst : firstLeakingInsts(S, LeakedReg)) { + MCInstReference Ref = MCInstReference::get(Inst, BF); + assert(Ref && "Expected Inst to be found"); + Result.push_back(Ref); + } + return Result; + } +}; + +class DataflowDstSafetyAnalysis + : public DstSafetyAnalysis, + public DataflowAnalysis<DataflowDstSafetyAnalysis, DstState, + /*Backward=*/true, DstStatePrinter> { + using DFParent = DataflowAnalysis<DataflowDstSafetyAnalysis, DstState, true, + DstStatePrinter>; + friend DFParent; + + using DstSafetyAnalysis::BC; + using DstSafetyAnalysis::computeNext; + +public: + DataflowDstSafetyAnalysis(BinaryFunction &BF, + MCPlusBuilder::AllocatorIdTy AllocId, + ArrayRef<MCPhysReg> RegsToTrackInstsFor) + : DstSafetyAnalysis(BF, RegsToTrackInstsFor), DFParent(BF, AllocId) {} + + const DstState &getStateAfter(const MCInst &Inst) const override { + // The dataflow analysis base class iterates backwards over the + // instructions, thus "after" vs. "before" difference. + return DFParent::getStateBefore(Inst).get(); + } + + void run() override { + for (BinaryBasicBlock &BB : Func) { + if (auto CheckerInfo = BC.MIB->getAuthCheckedReg(BB)) { + LLVM_DEBUG({ + dbgs() << "Found pointer checking sequence in " << BB.getName() + << ":\n"; + traceReg(BC, "Checked register", CheckerInfo->first); + traceInst(BC, "First instruction", *CheckerInfo->second); + }); + RegCheckedAt[CheckerInfo->second] = CheckerInfo->first; + } + } + DFParent::run(); + } + +protected: + void preflight() {} + + DstState getStartingStateAtBB(const BinaryBasicBlock &BB) { + // In general, the initial state should be empty, not everything-is-unsafe, + // to give a chance for some meaningful state to be propagated to BB from + // an indirectly reachable "exit basic block" ending with a return or tail + // call instruction. + // + // A basic block without any successors, on the other hand, can be + // pessimistically initialized to everything-is-unsafe: this will naturally + // handle both return and tail call instructions and is harmless for + // internal indirect branch instructions (such as computed gotos). + if (BB.succ_empty()) + return createUnsafeState(); + + return DstState(); + } + + DstState getStartingStateAtPoint(const MCInst &Point) { return DstState(); } + + void doConfluence(DstState &StateOut, const DstState &StateIn) { + DstStatePrinter P(BC); + LLVM_DEBUG({ + dbgs() << " DataflowDstSafetyAnalysis::Confluence(\n"; + dbgs() << " State 1: "; + P.print(dbgs(), StateOut); + dbgs() << "\n"; + dbgs() << " State 2: "; + P.print(dbgs(), StateIn); + dbgs() << ")\n"; + }); + + StateOut.merge(StateIn); + + LLVM_DEBUG({ + dbgs() << " merged state: "; + P.print(dbgs(), StateOut); + dbgs() << "\n"; + }); + } + + StringRef getAnnotationName() const { return "DataflowDstSafetyAnalysis"; } +}; + +class CFGUnawareDstSafetyAnalysis : public DstSafetyAnalysis, + public CFGUnawareAnalysis<DstState> { + using DstSafetyAnalysis::BC; + BinaryFunction &BF; + +public: + CFGUnawareDstSafetyAnalysis(BinaryFunction &BF, + MCPlusBuilder::AllocatorIdTy AllocId, + ArrayRef<MCPhysReg> RegsToTrackInstsFor) + : DstSafetyAnalysis(BF, RegsToTrackInstsFor), + CFGUnawareAnalysis(BF, AllocId, "CFGUnawareDstSafetyAnalysis"), BF(BF) { + } + + void run() override { + DstState S = createUnsafeState(); + for (auto &I : llvm::reverse(BF.instrs())) { + MCInst &Inst = I.second; + if (BC.MIB->isCFI(Inst)) + continue; + + // If Inst can change the control flow, we cannot be sure that the next + // instruction (to be executed in analyzed program) is the one processed + // on the previous iteration, thus pessimistically reset S before + // starting to analyze Inst. + if (BC.MIB->isCall(Inst) || BC.MIB->isBranch(Inst) || + BC.MIB->isReturn(Inst)) { + LLVM_DEBUG({ traceInst(BC, "Control flow instruction", Inst); }); + S = createUnsafeState(); + } + + // Attach the state *after* this instruction executes. + setState(Inst, S); + + // Compute the next state. + S = computeNext(Inst, S); + } + } + + const DstState &getStateAfter(const MCInst &Inst) const override { + return getState(Inst); + } +}; + +std::shared_ptr<DstSafetyAnalysis> +DstSafetyAnalysis::create(BinaryFunction &BF, + MCPlusBuilder::AllocatorIdTy AllocId, + ArrayRef<MCPhysReg> RegsToTrackInstsFor) { + if (BF.hasCFG()) + return std::make_shared<DataflowDstSafetyAnalysis>(BF, AllocId, + RegsToTrackInstsFor); + return std::make_shared<CFGUnawareDstSafetyAnalysis>(BF, AllocId, + RegsToTrackInstsFor); +} + // This function could return PartialReport<T>, but currently T is always // MCPhysReg, even though it is an implementation detail. static PartialReport<MCPhysReg> make_generic_report(MCInstReference Location, @@ -808,6 +1313,37 @@ shouldReportSigningOracle(const BinaryContext &BC, const MCInstReference &Inst, return make_gadget_report(SigningOracleKind, Inst, *SignedReg); } +static std::optional<PartialReport<MCPhysReg>> +shouldReportAuthOracle(const BinaryContext &BC, const MCInstReference &Inst, + const DstState &S) { + static const GadgetKind AuthOracleKind("authentication oracle found"); + + bool IsChecked = false; + std::optional<MCPhysReg> AuthReg = + BC.MIB->getWrittenAuthenticatedReg(Inst, IsChecked); + if (!AuthReg || IsChecked) + return std::nullopt; + + LLVM_DEBUG({ + traceInst(BC, "Found auth inst", Inst); + traceReg(BC, "Authenticated reg", *AuthReg); + }); + + if (S.empty()) { + LLVM_DEBUG({ dbgs() << " DstState is empty!\n"; }); + return make_generic_report( + Inst, "Warning: no state computed for an authentication instruction " + "(possibly unreachable)"); + } + + LLVM_DEBUG( + { traceRegMask(BC, "safe output registers", S.CannotEscapeUnchecked); }); + if (S.CannotEscapeUnchecked[*AuthReg]) + return std::nullopt; + + return make_gadget_report(AuthOracleKind, Inst, *AuthReg); +} + template <typename T> static void iterateOverInstrs(BinaryFunction &BF, T Fn) { if (BF.hasCFG()) { for (BinaryBasicBlock &BB : BF) @@ -840,6 +1376,9 @@ void FunctionAnalysisContext::findUnsafeUses( }); iterateOverInstrs(BF, [&](MCInstReference Inst) { + if (BC.MIB->isCFI(Inst)) + return; + const SrcState &S = Analysis->getStateBefore(Inst); // If non-empty state was never propagated from the entry basic block @@ -889,6 +1428,55 @@ void FunctionAnalysisContext::augmentUnsafeUseReports( } } +void FunctionAnalysisContext::findUnsafeDefs( + SmallVector<PartialReport<MCPhysReg>> &Reports) { + if (PacRetGadgetsOnly) + return; + + auto Analysis = DstSafetyAnalysis::create(BF, AllocatorId, {}); + LLVM_DEBUG({ dbgs() << "Running dst register safety analysis...\n"; }); + Analysis->run(); + LLVM_DEBUG({ + dbgs() << "After dst register safety analysis:\n"; + BF.dump(); + }); + + iterateOverInstrs(BF, [&](MCInstReference Inst) { + if (BC.MIB->isCFI(Inst)) + return; + + const DstState &S = Analysis->getStateAfter(Inst); + + if (auto Report = shouldReportAuthOracle(BC, Inst, S)) + Reports.push_back(*Report); + }); +} + +void FunctionAnalysisContext::augmentUnsafeDefReports( + ArrayRef<PartialReport<MCPhysReg>> Reports) { + SmallVector<MCPhysReg> RegsToTrack = collectRegsToTrack(Reports); + // Re-compute the analysis with register tracking. + auto Analysis = DstSafetyAnalysis::create(BF, AllocatorId, RegsToTrack); + LLVM_DEBUG( + { dbgs() << "\nRunning detailed dst register safety analysis...\n"; }); + Analysis->run(); + LLVM_DEBUG({ + dbgs() << "After detailed dst register safety analysis:\n"; + BF.dump(); + }); + + // Augment gadget reports. + for (auto &Report : Reports) { + MCInstReference Location = Report.Issue->Location; + LLVM_DEBUG({ traceInst(BC, "Attaching leakage info to", Location); }); + assert(Report.RequestedDetails && + "Should be removed by handleSimpleReports"); + auto DetailedInfo = std::make_shared<LeakageInfo>( + Analysis->getLeakingInsts(Location, BF, *Report.RequestedDetails)); + Result.Diagnostics.emplace_back(Report.Issue, DetailedInfo); + } +} + void FunctionAnalysisContext::handleSimpleReports( SmallVector<PartialReport<MCPhysReg>> &Reports) { // Before re-running the detailed analysis, process the reports which do not @@ -912,6 +1500,12 @@ void FunctionAnalysisContext::run() { handleSimpleReports(UnsafeUses); if (!UnsafeUses.empty()) augmentUnsafeUseReports(UnsafeUses); + + SmallVector<PartialReport<MCPhysReg>> UnsafeDefs; + findUnsafeDefs(UnsafeDefs); + handleSimpleReports(UnsafeDefs); + if (!UnsafeDefs.empty()) + augmentUnsafeDefReports(UnsafeDefs); } void Analysis::runOnFunction(BinaryFunction &BF, @@ -1015,6 +1609,12 @@ void ClobberingInfo::print(raw_ostream &OS, printRelatedInstrs(OS, Location, ClobberingInstrs); } +void LeakageInfo::print(raw_ostream &OS, const MCInstReference Location) const { + OS << " The " << LeakingInstrs.size() + << " instructions that leak the affected registers are:\n"; + printRelatedInstrs(OS, Location, LeakingInstrs); +} + void GenericDiagnostic::generateReport(raw_ostream &OS, const BinaryContext &BC) const { printBasicInfo(OS, BC, Text); diff --git a/bolt/lib/Profile/BoltAddressTranslation.cpp b/bolt/lib/Profile/BoltAddressTranslation.cpp index a253522e4fb1..7ad4e6a2e141 100644 --- a/bolt/lib/Profile/BoltAddressTranslation.cpp +++ b/bolt/lib/Profile/BoltAddressTranslation.cpp @@ -546,7 +546,7 @@ BoltAddressTranslation::getFallthroughsInTrace(uint64_t FuncAddress, return Res; for (auto Iter = FromIter; Iter != ToIter;) { - const uint32_t Src = Iter->first; + const uint32_t Src = Iter->second >> 1; if (Iter->second & BRANCHENTRY) { ++Iter; continue; @@ -557,7 +557,7 @@ BoltAddressTranslation::getFallthroughsInTrace(uint64_t FuncAddress, ++Iter; if (Iter->second & BRANCHENTRY) break; - Res.emplace_back(Src, Iter->first); + Res.emplace_back(Src, Iter->second >> 1); } return Res; diff --git a/bolt/lib/Profile/DataAggregator.cpp b/bolt/lib/Profile/DataAggregator.cpp index 178c9d3a6373..5c8af3710720 100644 --- a/bolt/lib/Profile/DataAggregator.cpp +++ b/bolt/lib/Profile/DataAggregator.cpp @@ -49,6 +49,9 @@ static cl::opt<bool> cl::desc("aggregate basic samples (without LBR info)"), cl::cat(AggregatorCategory)); +cl::opt<bool> ArmSPE("spe", cl::desc("Enable Arm SPE mode."), + cl::cat(AggregatorCategory)); + static cl::opt<std::string> ITraceAggregation("itrace", cl::desc("Generate LBR info with perf itrace argument"), @@ -181,11 +184,21 @@ void DataAggregator::start() { findPerfExecutable(); + if (opts::ArmSPE) { + // pid from_ip to_ip flags + // where flags could be: + // P/M: whether branch was Predicted or Mispredicted. + // N: optionally appears when the branch was Not-Taken (ie fall-through) + // 12345 0x123/0x456/PN/-/-/8/RET/- + opts::ITraceAggregation = "bl"; + opts::ParseMemProfile = true; + opts::BasicAggregation = false; + } + if (opts::BasicAggregation) { - launchPerfProcess("events without LBR", - MainEventsPPI, + launchPerfProcess("events without LBR", MainEventsPPI, "script -F pid,event,ip", - /*Wait = */false); + /*Wait = */ false); } else if (!opts::ITraceAggregation.empty()) { // Disable parsing memory profile from trace data, unless requested by user. if (!opts::ParseMemProfile.getNumOccurrences()) @@ -524,8 +537,7 @@ Error DataAggregator::preprocessProfile(BinaryContext &BC) { heatmap: // Sort parsed traces for faster processing. - if (!opts::BasicAggregation) - llvm::sort(Traces, llvm::less_first()); + llvm::sort(Traces, llvm::less_first()); if (!opts::HeatmapMode) return Error::success(); @@ -823,13 +835,8 @@ bool DataAggregator::doTrace(const Trace &Trace, uint64_t Count, LLVM_DEBUG(dbgs() << "Processing " << FTs->size() << " fallthroughs for " << FromFunc->getPrintName() << ":" << Trace << '\n'); - for (auto [From, To] : *FTs) { - if (BAT) { - From = BAT->translate(FromFunc->getAddress(), From, /*IsBranchSrc=*/true); - To = BAT->translate(FromFunc->getAddress(), To, /*IsBranchSrc=*/false); - } + for (const auto &[From, To] : *FTs) doIntraBranch(*ParentFunc, From, To, Count, false); - } return true; } @@ -870,13 +877,9 @@ DataAggregator::getFallthroughsInTrace(BinaryFunction &BF, const Trace &Trace, // Adjust FromBB if the first LBR is a return from the last instruction in // the previous block (that instruction should be a call). - if (IsReturn) { - if (From) - FromBB = BF.getBasicBlockContainingOffset(From - 1); - else - LLVM_DEBUG(dbgs() << "return to the function start: " << Trace << '\n'); - } else if (Trace.Branch == Trace::EXTERNAL && From == FromBB->getOffset() && - !FromBB->isEntryPoint() && !FromBB->isLandingPad()) { + if (Trace.Branch != Trace::FT_ONLY && !BF.containsAddress(Trace.Branch) && + From == FromBB->getOffset() && + (IsReturn ? From : !(FromBB->isEntryPoint() || FromBB->isLandingPad()))) { const BinaryBasicBlock *PrevBB = BF.getLayout().getBlock(FromBB->getIndex() - 1); if (PrevBB->getSuccessor(FromBB->getLabel())) { @@ -994,9 +997,22 @@ ErrorOr<DataAggregator::LBREntry> DataAggregator::parseLBREntry() { if (std::error_code EC = MispredStrRes.getError()) return EC; StringRef MispredStr = MispredStrRes.get(); - if (MispredStr.size() != 1 || - (MispredStr[0] != 'P' && MispredStr[0] != 'M' && MispredStr[0] != '-')) { - reportError("expected single char for mispred bit"); + // SPE brstack mispredicted flags might be up to two characters long: + // 'PN' or 'MN'. Where 'N' optionally appears. + bool ValidStrSize = opts::ArmSPE + ? MispredStr.size() >= 1 && MispredStr.size() <= 2 + : MispredStr.size() == 1; + bool SpeTakenBitErr = + (opts::ArmSPE && MispredStr.size() == 2 && MispredStr[1] != 'N'); + bool PredictionBitErr = + !ValidStrSize || + (MispredStr[0] != 'P' && MispredStr[0] != 'M' && MispredStr[0] != '-'); + if (SpeTakenBitErr) + reportError("expected 'N' as SPE prediction bit for a not-taken branch"); + if (PredictionBitErr) + reportError("expected 'P', 'M' or '-' char as a prediction bit"); + + if (SpeTakenBitErr || PredictionBitErr) { Diag << "Found: " << MispredStr << "\n"; return make_error_code(llvm::errc::io_error); } @@ -1202,12 +1218,14 @@ ErrorOr<Location> DataAggregator::parseLocationOrOffset() { std::error_code DataAggregator::parseAggregatedLBREntry() { enum AggregatedLBREntry : char { INVALID = 0, - EVENT_NAME, // E - TRACE, // T - SAMPLE, // S - BRANCH, // B - FT, // F - FT_EXTERNAL_ORIGIN // f + EVENT_NAME, // E + TRACE, // T + RETURN, // R + SAMPLE, // S + BRANCH, // B + FT, // F + FT_EXTERNAL_ORIGIN, // f + FT_EXTERNAL_RETURN // r } Type = INVALID; /// The number of fields to parse, set based on \p Type. @@ -1235,20 +1253,22 @@ std::error_code DataAggregator::parseAggregatedLBREntry() { Type = StringSwitch<AggregatedLBREntry>(Str) .Case("T", TRACE) + .Case("R", RETURN) .Case("S", SAMPLE) .Case("E", EVENT_NAME) .Case("B", BRANCH) .Case("F", FT) .Case("f", FT_EXTERNAL_ORIGIN) + .Case("r", FT_EXTERNAL_RETURN) .Default(INVALID); if (Type == INVALID) { - reportError("expected T, S, E, B, F or f"); + reportError("expected T, R, S, E, B, F, f or r"); return make_error_code(llvm::errc::io_error); } using SSI = StringSwitch<int>; - AddrNum = SSI(Str).Case("T", 3).Case("S", 1).Case("E", 0).Default(2); + AddrNum = SSI(Str).Cases("T", "R", 3).Case("S", 1).Case("E", 0).Default(2); CounterNum = SSI(Str).Case("B", 2).Case("E", 0).Default(1); } @@ -1305,17 +1325,30 @@ std::error_code DataAggregator::parseAggregatedLBREntry() { if (ToFunc) ToFunc->setHasProfileAvailable(); - /// For legacy fall-through types, adjust locations to match Trace container. - if (Type == FT || Type == FT_EXTERNAL_ORIGIN) { + /// For fall-through types, adjust locations to match Trace container. + if (Type == FT || Type == FT_EXTERNAL_ORIGIN || Type == FT_EXTERNAL_RETURN) { Addr[2] = Location(Addr[1]->Offset); // Trace To Addr[1] = Location(Addr[0]->Offset); // Trace From - // Put a magic value into Trace Branch to differentiate from a full trace. - Addr[0] = Location(Type == FT ? Trace::FT_ONLY : Trace::FT_EXTERNAL_ORIGIN); + // Put a magic value into Trace Branch to differentiate from a full trace: + if (Type == FT) + Addr[0] = Location(Trace::FT_ONLY); + else if (Type == FT_EXTERNAL_ORIGIN) + Addr[0] = Location(Trace::FT_EXTERNAL_ORIGIN); + else if (Type == FT_EXTERNAL_RETURN) + Addr[0] = Location(Trace::FT_EXTERNAL_RETURN); + else + llvm_unreachable("Unexpected fall-through type"); } - /// For legacy branch type, mark Trace To to differentite from a full trace. - if (Type == BRANCH) { + /// For branch type, mark Trace To to differentiate from a full trace. + if (Type == BRANCH) Addr[2] = Location(Trace::BR_ONLY); + + if (Type == RETURN) { + if (!Addr[0]->Offset) + Addr[0]->Offset = Trace::FT_EXTERNAL_RETURN; + else + Returns.emplace(Addr[0]->Offset); } /// Record a trace. @@ -1497,7 +1530,9 @@ void DataAggregator::printBranchStacksDiagnostics( } std::error_code DataAggregator::parseBranchEvents() { - outs() << "PERF2BOLT: parse branch events...\n"; + std::string BranchEventTypeStr = + opts::ArmSPE ? "SPE branch events in LBR-format" : "branch events"; + outs() << "PERF2BOLT: parse " << BranchEventTypeStr << "...\n"; NamedRegionTimer T("parseBranch", "Parsing branch events", TimerGroupName, TimerGroupDesc, opts::TimeAggregator); @@ -1525,7 +1560,8 @@ std::error_code DataAggregator::parseBranchEvents() { } NumEntries += Sample.LBR.size(); - if (BAT && Sample.LBR.size() == 32 && !NeedsSkylakeFix) { + if (this->BC->isX86() && BAT && Sample.LBR.size() == 32 && + !NeedsSkylakeFix) { errs() << "PERF2BOLT-WARNING: using Intel Skylake bug workaround\n"; NeedsSkylakeFix = true; } @@ -1548,10 +1584,18 @@ std::error_code DataAggregator::parseBranchEvents() { if (NumSamples && NumSamplesNoLBR == NumSamples) { // Note: we don't know if perf2bolt is being used to parse memory samples // at this point. In this case, it is OK to parse zero LBRs. - errs() << "PERF2BOLT-WARNING: all recorded samples for this binary lack " - "LBR. Record profile with perf record -j any or run perf2bolt " - "in no-LBR mode with -nl (the performance improvement in -nl " - "mode may be limited)\n"; + if (!opts::ArmSPE) + errs() + << "PERF2BOLT-WARNING: all recorded samples for this binary lack " + "LBR. Record profile with perf record -j any or run perf2bolt " + "in no-LBR mode with -nl (the performance improvement in -nl " + "mode may be limited)\n"; + else + errs() + << "PERF2BOLT-WARNING: All recorded samples for this binary lack " + "SPE brstack entries. Make sure you are running Linux perf 6.14 " + "or later, otherwise you get zero samples. Record the profile " + "with: perf record -e 'arm_spe_0/branch_filter=1/'."; } else { printBranchStacksDiagnostics(NumTotalSamples - NumSamples); } @@ -1565,6 +1609,7 @@ void DataAggregator::processBranchEvents() { NamedRegionTimer T("processBranch", "Processing branch events", TimerGroupName, TimerGroupDesc, opts::TimeAggregator); + Returns.emplace(Trace::FT_EXTERNAL_RETURN); for (const auto &[Trace, Info] : Traces) { bool IsReturn = checkReturn(Trace.Branch); // Ignore returns. diff --git a/bolt/lib/Rewrite/LinuxKernelRewriter.cpp b/bolt/lib/Rewrite/LinuxKernelRewriter.cpp index 5a5e044184d0..174721a3a053 100644 --- a/bolt/lib/Rewrite/LinuxKernelRewriter.cpp +++ b/bolt/lib/Rewrite/LinuxKernelRewriter.cpp @@ -432,25 +432,33 @@ public: }; Error LinuxKernelRewriter::detectLinuxKernelVersion() { - if (BinaryData *BD = BC.getBinaryDataByName("linux_banner")) { - const BinarySection &Section = BD->getSection(); - const std::string S = - Section.getContents().substr(BD->getOffset(), BD->getSize()).str(); - - const std::regex Re(R"---(Linux version ((\d+)\.(\d+)(\.(\d+))?))---"); - std::smatch Match; - if (std::regex_search(S, Match, Re)) { - const unsigned Major = std::stoi(Match[2].str()); - const unsigned Minor = std::stoi(Match[3].str()); - const unsigned Rev = Match[5].matched ? std::stoi(Match[5].str()) : 0; - LinuxKernelVersion = LKVersion(Major, Minor, Rev); - BC.outs() << "BOLT-INFO: Linux kernel version is " << Match[1].str() - << "\n"; - return Error::success(); - } + // Check for global and local linux_banner symbol. + BinaryData *BD = BC.getBinaryDataByName("linux_banner"); + if (!BD) + BD = BC.getBinaryDataByName("linux_banner/1"); + + if (!BD) + return createStringError(errc::executable_format_error, + "unable to locate linux_banner"); + + const BinarySection &Section = BD->getSection(); + const std::string S = + Section.getContents().substr(BD->getOffset(), BD->getSize()).str(); + + const std::regex Re(R"---(Linux version ((\d+)\.(\d+)(\.(\d+))?))---"); + std::smatch Match; + if (std::regex_search(S, Match, Re)) { + const unsigned Major = std::stoi(Match[2].str()); + const unsigned Minor = std::stoi(Match[3].str()); + const unsigned Rev = Match[5].matched ? std::stoi(Match[5].str()) : 0; + LinuxKernelVersion = LKVersion(Major, Minor, Rev); + BC.outs() << "BOLT-INFO: Linux kernel version is " << Match[1].str() + << "\n"; + return Error::success(); } + return createStringError(errc::executable_format_error, - "Linux kernel version is unknown"); + "Linux kernel version is unknown: " + S); } void LinuxKernelRewriter::processLKSections() { diff --git a/bolt/lib/Rewrite/RewriteInstance.cpp b/bolt/lib/Rewrite/RewriteInstance.cpp index e1aa00a3d749..93bd93b6cb98 100644 --- a/bolt/lib/Rewrite/RewriteInstance.cpp +++ b/bolt/lib/Rewrite/RewriteInstance.cpp @@ -780,14 +780,6 @@ void RewriteInstance::discoverFileObjects() { // For local symbols we want to keep track of associated FILE symbol name for // disambiguation by combined name. - StringRef FileSymbolName; - bool SeenFileName = false; - struct SymbolRefHash { - size_t operator()(SymbolRef const &S) const { - return std::hash<decltype(DataRefImpl::p)>{}(S.getRawDataRefImpl().p); - } - }; - std::unordered_map<SymbolRef, StringRef, SymbolRefHash> SymbolToFileName; for (const ELFSymbolRef &Symbol : InputFile->symbols()) { Expected<StringRef> NameOrError = Symbol.getName(); if (NameOrError && NameOrError->starts_with("__asan_init")) { @@ -806,21 +798,8 @@ void RewriteInstance::discoverFileObjects() { if (cantFail(Symbol.getFlags()) & SymbolRef::SF_Undefined) continue; - if (cantFail(Symbol.getType()) == SymbolRef::ST_File) { + if (cantFail(Symbol.getType()) == SymbolRef::ST_File) FileSymbols.emplace_back(Symbol); - StringRef Name = - cantFail(std::move(NameOrError), "cannot get symbol name for file"); - // Ignore Clang LTO artificial FILE symbol as it is not always generated, - // and this uncertainty is causing havoc in function name matching. - if (Name == "ld-temp.o") - continue; - FileSymbolName = Name; - SeenFileName = true; - continue; - } - if (!FileSymbolName.empty() && - !(cantFail(Symbol.getFlags()) & SymbolRef::SF_Global)) - SymbolToFileName[Symbol] = FileSymbolName; } // Sort symbols in the file by value. Ignore symbols from non-allocatable @@ -1028,14 +1007,14 @@ void RewriteInstance::discoverFileObjects() { // The <id> field is used for disambiguation of local symbols since there // could be identical function names coming from identical file names // (e.g. from different directories). - std::string AltPrefix; - auto SFI = SymbolToFileName.find(Symbol); - if (SymbolType == SymbolRef::ST_Function && SFI != SymbolToFileName.end()) - AltPrefix = Name + "/" + std::string(SFI->second); + auto SFI = llvm::upper_bound(FileSymbols, ELFSymbolRef(Symbol)); + if (SymbolType == SymbolRef::ST_Function && SFI != FileSymbols.begin()) { + StringRef FileSymbolName = cantFail(SFI[-1].getName()); + if (!FileSymbolName.empty()) + AlternativeName = NR.uniquify(Name + "/" + FileSymbolName.str()); + } UniqueName = NR.uniquify(Name); - if (!AltPrefix.empty()) - AlternativeName = NR.uniquify(AltPrefix); } uint64_t SymbolSize = ELFSymbolRef(Symbol).getSize(); @@ -1294,7 +1273,7 @@ void RewriteInstance::discoverFileObjects() { FDE->getAddressRange()); } - BC->setHasSymbolsWithFileName(SeenFileName); + BC->setHasSymbolsWithFileName(FileSymbols.size()); // Now that all the functions were created - adjust their boundaries. adjustFunctionBoundaries(); @@ -1567,6 +1546,11 @@ void RewriteInstance::registerFragments() { uint64_t ParentAddress{0}; + // Check if containing FILE symbol is BOLT emitted synthetic symbol marking + // local fragments of global parents. + if (cantFail(FSI[-1].getName()) == getBOLTFileSymbolName()) + goto registerParent; + // BOLT split fragment symbols are emitted just before the main function // symbol. for (ELFSymbolRef NextSymbol = Symbol; NextSymbol < StopSymbol; diff --git a/bolt/test/AArch64/r_aarch64_prelxx.s b/bolt/test/AArch64/r_aarch64_prelxx.s index 5cbe2c50b294..39f74301cedf 100644 --- a/bolt/test/AArch64/r_aarch64_prelxx.s +++ b/bolt/test/AArch64/r_aarch64_prelxx.s @@ -5,7 +5,7 @@ // REQUIRES: system-linux // RUN: %clang %cflags -nostartfiles -nostdlib %s -o %t.exe -mlittle-endian \ -// RUN: -Wl,-q -Wl,-z,max-page-size=4 +// RUN: -Wl,-q -Wl,-z,max-page-size=4 -Wl,--no-relax // RUN: llvm-readelf -Wa %t.exe | FileCheck %s -check-prefix=CHECKPREL // CHECKPREL: R_AARCH64_PREL16 {{.*}} .dummy + 0 @@ -36,9 +36,9 @@ .type _start, %function _start: adrp x0, datatable - add x0, x0, :lo12:datable + add x0, x0, :lo12:datatable mov x0, #0 - ret + ret .section .dummy, "a", @progbits dummy: diff --git a/bolt/test/X86/callcont-fallthru.s b/bolt/test/X86/callcont-fallthru.s index c2ef024db947..8c05491e7bca 100644 --- a/bolt/test/X86/callcont-fallthru.s +++ b/bolt/test/X86/callcont-fallthru.s @@ -10,6 +10,10 @@ # RUN: link_fdata %s %t %t.pa-ret PREAGG-RET # Trace from an external location to a landing pad/entry point call continuation # RUN: link_fdata %s %t %t.pa-ext PREAGG-EXT +# Return trace to a landing pad/entry point call continuation +# RUN: link_fdata %s %t %t.pa-pret PREAGG-PRET +# External return to a landing pad/entry point call continuation +# RUN: link_fdata %s %t %t.pa-eret PREAGG-ERET # RUN-DISABLED: link_fdata %s %t %t.pa-plt PREAGG-PLT # RUN: llvm-strip --strip-unneeded %t -o %t.strip @@ -38,6 +42,21 @@ # RUN: llvm-bolt %t.strip --pa -p %t.pa-ext -o %t.out \ # RUN: --print-cfg --print-only=main | FileCheck %s --check-prefix=CHECK-SKIP +## Check pre-aggregated return traces from external location attach call +## continuation fallthrough count to secondary entry point (unstripped) +# RUN: llvm-bolt %t --pa -p %t.pa-pret -o %t.out \ +# RUN: --print-cfg --print-only=main | FileCheck %s --check-prefix=CHECK-ATTACH +## Check pre-aggregated return traces from external location attach call +## continuation fallthrough count to landing pad (stripped, landing pad) +# RUN: llvm-bolt %t.strip --pa -p %t.pa-pret -o %t.out \ +# RUN: --print-cfg --print-only=main | FileCheck %s --check-prefix=CHECK-ATTACH + +## Same for external return type +# RUN: llvm-bolt %t --pa -p %t.pa-eret -o %t.out \ +# RUN: --print-cfg --print-only=main | FileCheck %s --check-prefix=CHECK-ATTACH +# RUN: llvm-bolt %t.strip --pa -p %t.pa-eret -o %t.out \ +# RUN: --print-cfg --print-only=main | FileCheck %s --check-prefix=CHECK-ATTACH + ## Check pre-aggregated traces don't report zero-sized PLT fall-through as ## invalid trace # RUN-DISABLED: llvm-bolt %t.strip --pa -p %t.pa-plt -o %t.out | FileCheck %s \ @@ -92,6 +111,10 @@ Ltmp4_br: # PREAGG-RET: T #Lfoo_ret# #Ltmp3# #Ltmp3_br# 1 ## Target is a secondary entry point (unstripped) or a landing pad (stripped) # PREAGG-EXT: T X:0 #Ltmp3# #Ltmp3_br# 1 +## Pre-aggregated return trace +# PREAGG-PRET: R X:0 #Ltmp3# #Ltmp3_br# 1 +## External return +# PREAGG-ERET: r #Ltmp3# #Ltmp3_br# 1 # CHECK-ATTACH: callq foo # CHECK-ATTACH-NEXT: count: 1 diff --git a/bolt/test/X86/linux-version.S b/bolt/test/X86/linux-version.S index e680d0d64a21..a3d7f365304a 100644 --- a/bolt/test/X86/linux-version.S +++ b/bolt/test/X86/linux-version.S @@ -17,6 +17,11 @@ # RUN: -Wl,--image-base=0xffffffff80000000,--no-dynamic-linker,--no-eh-frame-hdr # RUN: llvm-bolt %t.exe -o %t.out 2>&1 | FileCheck --check-prefix=CHECK-C %s +# RUN: %clang -DD -target x86_64-unknown-unknown \ +# RUN: %cflags -nostdlib %s -o %t.exe \ +# RUN: -Wl,--image-base=0xffffffff80000000,--no-dynamic-linker,--no-eh-frame-hdr +# RUN: llvm-bolt %t.exe -o %t.out 2>&1 | FileCheck --check-prefix=CHECK-D %s + .text .globl foo .type foo, %function @@ -46,6 +51,12 @@ linux_banner: #endif # CHECK-C: BOLT-INFO: Linux kernel version is 6.6 +#ifdef D + .hidden linux_banner + .string "Linux version 6.6.15.2-2-xxx\n" +#endif +# CHECK-D: BOLT-INFO: Linux kernel version is 6.6 + .size linux_banner, . - linux_banner ## Fake Linux Kernel sections. diff --git a/bolt/test/X86/register-fragments-bolt-symbols.s b/bolt/test/X86/register-fragments-bolt-symbols.s index c9f1859c4e8a..20e7345541d9 100644 --- a/bolt/test/X86/register-fragments-bolt-symbols.s +++ b/bolt/test/X86/register-fragments-bolt-symbols.s @@ -29,6 +29,7 @@ # RUN: link_fdata %s %t.bolt %t.preagg PREAGG # PREAGG: B X:0 #chain.cold.0# 1 0 +# PREAGG: B X:0 #dummy# 1 0 # RUN: perf2bolt %t.bolt -p %t.preagg --pa -o %t.bat.fdata -w %t.bat.yaml -v=1 \ # RUN: | FileCheck %s --check-prefix=CHECK-REGISTER # RUN: FileCheck --input-file %t.bat.fdata --check-prefix=CHECK-FDATA %s @@ -44,7 +45,13 @@ # CHECK-SYMS: l F .text.cold [[#]] chain.cold.0 # CHECK-SYMS: l F .text [[#]] chain # CHECK-SYMS: l df *ABS* [[#]] bolt-pseudo.o +# CHECK-SYMS: l F .text.cold [[#]] dummy.cold.0 +# CHECK-SYMS: l F .text.cold.1 [[#]] dummy.cold.1 +# CHECK-SYMS: l F .text.cold.2 [[#]] dummy.cold.2 +# CHECK-REGISTER: BOLT-INFO: marking dummy.cold.0/1(*2) as a fragment of dummy +# CHECK-REGISTER: BOLT-INFO: marking dummy.cold.1/1(*2) as a fragment of dummy +# CHECK-REGISTER: BOLT-INFO: marking dummy.cold.2/1(*2) as a fragment of dummy # CHECK-REGISTER: BOLT-INFO: marking chain.cold.0/1(*2) as a fragment of chain/2(*2) # CHECK-FDATA: 0 [unknown] 0 1 chain/chain.s/2 10 0 1 diff --git a/bolt/test/binary-analysis/AArch64/gs-pauth-authentication-oracles.s b/bolt/test/binary-analysis/AArch64/gs-pauth-authentication-oracles.s new file mode 100644 index 000000000000..717bf40df3d0 --- /dev/null +++ b/bolt/test/binary-analysis/AArch64/gs-pauth-authentication-oracles.s @@ -0,0 +1,812 @@ +// RUN: %clang %cflags -march=armv8.3-a %s -o %t.exe +// RUN: llvm-bolt-binary-analysis --scanners=pacret %t.exe 2>&1 | FileCheck -check-prefix=PACRET %s +// RUN: llvm-bolt-binary-analysis --scanners=pauth %t.exe 2>&1 | FileCheck %s + +// The detection of compiler-generated explicit pointer checks is tested in +// gs-pauth-address-checks.s, for that reason only test here "dummy-load" and +// "high-bits-notbi" checkers, as the shortest examples of checkers that are +// detected per-instruction and per-BB. + +// PACRET-NOT: authentication oracle found in function + + .text + + .type sym,@function +sym: + ret + .size sym, .-sym + + .globl callee + .type callee,@function +callee: + ret + .size callee, .-callee + + .globl good_ret + .type good_ret,@function +good_ret: +// CHECK-NOT: good_ret + autia x0, x1 + ret x0 + .size good_ret, .-good_ret + + .globl good_call + .type good_call,@function +good_call: +// CHECK-NOT: good_call + paciasp + stp x29, x30, [sp, #-16]! + mov x29, sp + + autia x0, x1 + blr x0 + + ldp x29, x30, [sp], #16 + autiasp + ret + .size good_call, .-good_call + + .globl good_branch + .type good_branch,@function +good_branch: +// CHECK-NOT: good_branch + autia x0, x1 + br x0 + .size good_branch, .-good_branch + + .globl good_load_other_reg + .type good_load_other_reg,@function +good_load_other_reg: +// CHECK-NOT: good_load_other_reg + autia x0, x1 + ldr x2, [x0] + ret + .size good_load_other_reg, .-good_load_other_reg + + .globl good_load_same_reg + .type good_load_same_reg,@function +good_load_same_reg: +// CHECK-NOT: good_load_same_reg + autia x0, x1 + ldr x0, [x0] + ret + .size good_load_same_reg, .-good_load_same_reg + + .globl good_explicit_check + .type good_explicit_check,@function +good_explicit_check: +// CHECK-NOT: good_explicit_check + autia x0, x1 + eor x16, x0, x0, lsl #1 + tbz x16, #62, 1f + brk 0x1234 +1: + ret + .size good_explicit_check, .-good_explicit_check + + .globl bad_unchecked + .type bad_unchecked,@function +bad_unchecked: +// CHECK-LABEL: GS-PAUTH: authentication oracle found in function bad_unchecked, basic block {{[^,]+}}, at address +// CHECK-NEXT: The instruction is {{[0-9a-f]+}}: autia x0, x1 +// CHECK-NEXT: The 0 instructions that leak the affected registers are: + autia x0, x1 + ret + .size bad_unchecked, .-bad_unchecked + + .globl bad_leaked_to_subroutine + .type bad_leaked_to_subroutine,@function +bad_leaked_to_subroutine: +// CHECK-LABEL: GS-PAUTH: authentication oracle found in function bad_leaked_to_subroutine, basic block {{[^,]+}}, at address +// CHECK-NEXT: The instruction is {{[0-9a-f]+}}: autia x0, x1 +// CHECK-NEXT: The 1 instructions that leak the affected registers are: +// CHECK-NEXT: 1. {{[0-9a-f]+}}: bl callee +// CHECK-NEXT: This happens in the following basic block: +// CHECK-NEXT: {{[0-9a-f]+}}: paciasp +// CHECK-NEXT: {{[0-9a-f]+}}: stp x29, x30, [sp, #-0x10]! +// CHECK-NEXT: {{[0-9a-f]+}}: mov x29, sp +// CHECK-NEXT: {{[0-9a-f]+}}: autia x0, x1 +// CHECK-NEXT: {{[0-9a-f]+}}: bl callee +// CHECK-NEXT: {{[0-9a-f]+}}: ldr x2, [x0] +// CHECK-NEXT: {{[0-9a-f]+}}: ldp x29, x30, [sp], #0x10 +// CHECK-NEXT: {{[0-9a-f]+}}: autiasp +// CHECK-NEXT: {{[0-9a-f]+}}: ret + paciasp + stp x29, x30, [sp, #-16]! + mov x29, sp + + autia x0, x1 + bl callee + ldr x2, [x0] + + ldp x29, x30, [sp], #16 + autiasp + ret + .size bad_leaked_to_subroutine, .-bad_leaked_to_subroutine + + .globl bad_unknown_usage_read + .type bad_unknown_usage_read,@function +bad_unknown_usage_read: +// CHECK-LABEL: GS-PAUTH: authentication oracle found in function bad_unknown_usage_read, basic block {{[^,]+}}, at address +// CHECK-NEXT: The instruction is {{[0-9a-f]+}}: autia x0, x1 +// CHECK-NEXT: The 1 instructions that leak the affected registers are: +// CHECK-NEXT: 1. {{[0-9a-f]+}}: mul x3, x0, x1 +// CHECK-NEXT: This happens in the following basic block: +// CHECK-NEXT: {{[0-9a-f]+}}: autia x0, x1 +// CHECK-NEXT: {{[0-9a-f]+}}: mul x3, x0, x1 +// CHECK-NEXT: {{[0-9a-f]+}}: ldr x2, [x0] +// CHECK-NEXT: {{[0-9a-f]+}}: ret + autia x0, x1 + // Registers are not accessible to an attacker under Pointer + // Authentication threat model, until spilled to memory. + // Thus, reporting the below MUL instruction is a false positive, since + // the next LDR instruction prevents any possible spilling of x3 unless + // the authentication succeeded. Though, rejecting anything except for + // a closed list of instruction types is the intended behavior of the + // analysis, so this false positive is by design. + mul x3, x0, x1 + ldr x2, [x0] + ret + .size bad_unknown_usage_read, .-bad_unknown_usage_read + + .globl bad_store_to_memory_and_wait + .type bad_store_to_memory_and_wait,@function +bad_store_to_memory_and_wait: +// CHECK-LABEL: GS-PAUTH: authentication oracle found in function bad_store_to_memory_and_wait, basic block {{[^,]+}}, at address +// CHECK-NEXT: The instruction is {{[0-9a-f]+}}: autia x0, x1 +// CHECK-NEXT: The 1 instructions that leak the affected registers are: +// CHECK-NEXT: 1. {{[0-9a-f]+}}: str x0, [x3] + autia x0, x1 + cbz x3, 2f + str x0, [x3] +1: + // The thread performs a time-consuming computation while the result of + // authentication is accessible in memory. + nop +2: + ldr x2, [x0] + ret + .size bad_store_to_memory_and_wait, .-bad_store_to_memory_and_wait + +// FIXME: Known false negative: if no return instruction is reachable from a +// program point (this probably implies an infinite loop), such +// instruction cannot be detected as an authentication oracle. + .globl bad_store_to_memory_and_hang + .type bad_store_to_memory_and_hang,@function +bad_store_to_memory_and_hang: +// CHECK-NOT: bad_store_to_memory_and_hang + autia x0, x1 + cbz x3, 2f + str x0, [x3] +1: + // The thread loops indefinitely while the result of authentication + // is accessible in memory. + b 1b +2: + ldr x2, [x0] + ret + .size bad_store_to_memory_and_hang, .-bad_store_to_memory_and_hang + + .globl bad_unknown_usage_subreg_read + .type bad_unknown_usage_subreg_read,@function +bad_unknown_usage_subreg_read: +// CHECK-LABEL: GS-PAUTH: authentication oracle found in function bad_unknown_usage_subreg_read, basic block {{[^,]+}}, at address +// CHECK-NEXT: The instruction is {{[0-9a-f]+}}: autia x0, x1 +// CHECK-NEXT: The 1 instructions that leak the affected registers are: +// CHECK-NEXT: 1. {{[0-9a-f]+}}: mul w3, w0, w1 +// CHECK-NEXT: This happens in the following basic block: +// CHECK-NEXT: {{[0-9a-f]+}}: autia x0, x1 +// CHECK-NEXT: {{[0-9a-f]+}}: mul w3, w0, w1 +// CHECK-NEXT: {{[0-9a-f]+}}: ldr x2, [x0] +// CHECK-NEXT: {{[0-9a-f]+}}: ret + autia x0, x1 + mul w3, w0, w1 + ldr x2, [x0] + ret + .size bad_unknown_usage_subreg_read, .-bad_unknown_usage_subreg_read + + .globl bad_unknown_usage_update + .type bad_unknown_usage_update,@function +bad_unknown_usage_update: +// CHECK-LABEL: GS-PAUTH: authentication oracle found in function bad_unknown_usage_update, basic block {{[^,]+}}, at address +// CHECK-NEXT: The instruction is {{[0-9a-f]+}}: autia x0, x1 +// CHECK-NEXT: The 1 instructions that leak the affected registers are: +// CHECK-NEXT: 1. {{[0-9a-f]+}}: movk x0, #0x2a, lsl #16 +// CHECK-NEXT: This happens in the following basic block: +// CHECK-NEXT: {{[0-9a-f]+}}: autia x0, x1 +// CHECK-NEXT: {{[0-9a-f]+}}: movk x0, #0x2a, lsl #16 +// CHECK-NEXT: {{[0-9a-f]+}}: ldr x2, [x0] +// CHECK-NEXT: {{[0-9a-f]+}}: ret + autia x0, x1 + movk x0, #42, lsl #16 // does not overwrite x0 completely + ldr x2, [x0] + ret + .size bad_unknown_usage_update, .-bad_unknown_usage_update + + .globl good_overwrite_with_constant + .type good_overwrite_with_constant,@function +good_overwrite_with_constant: +// CHECK-NOT: good_overwrite_with_constant + autia x0, x1 + mov x0, #42 + ret + .size good_overwrite_with_constant, .-good_overwrite_with_constant + +// Overwriting sensitive data by instructions with unmodelled side-effects is +// explicitly rejected, even though this particular MRS is safe. + .globl bad_overwrite_with_side_effects + .type bad_overwrite_with_side_effects,@function +bad_overwrite_with_side_effects: +// CHECK-LABEL: GS-PAUTH: authentication oracle found in function bad_overwrite_with_side_effects, basic block {{[^,]+}}, at address +// CHECK-NEXT: The instruction is {{[0-9a-f]+}}: autia x0, x1 +// CHECK-NEXT: The 0 instructions that leak the affected registers are: + autia x0, x1 + mrs x0, CTR_EL0 + ret + .size bad_overwrite_with_side_effects, .-bad_overwrite_with_side_effects + +// Here the new value written by MUL to x0 is completely unrelated to the result +// of authentication, so this is a false positive. +// FIXME: Can/should we generalize overwriting by constant to handle such cases? + .globl good_unknown_overwrite + .type good_unknown_overwrite,@function +good_unknown_overwrite: +// CHECK-LABEL: GS-PAUTH: authentication oracle found in function good_unknown_overwrite, basic block {{[^,]+}}, at address +// CHECK-NEXT: The instruction is {{[0-9a-f]+}}: autia x0, x1 +// CHECK-NEXT: The 0 instructions that leak the affected registers are: + autia x0, x1 + mul x0, x1, x2 + ret + .size good_unknown_overwrite, .-good_unknown_overwrite + +// This is a false positive: when a general-purpose register is written to as +// a 32-bit register, its top 32 bits are zeroed, but according to LLVM +// representation, the instruction only overwrites the Wn register. + .globl good_wreg_overwrite + .type good_wreg_overwrite,@function +good_wreg_overwrite: +// CHECK-LABEL: GS-PAUTH: authentication oracle found in function good_wreg_overwrite, basic block {{[^,]+}}, at address +// CHECK-NEXT: The instruction is {{[0-9a-f]+}}: autia x0, x1 + autia x0, x1 + mov w0, #42 + ret + .size good_wreg_overwrite, .-good_wreg_overwrite + + .globl good_address_arith + .type good_address_arith,@function +good_address_arith: +// CHECK-NOT: good_address_arith + autia x0, x1 + + add x1, x0, #8 + sub x2, x1, #16 + mov x3, x2 + + ldr x4, [x3] + mov x0, #0 + mov x1, #0 + mov x2, #0 + + ret + .size good_address_arith, .-good_address_arith + + .globl good_ret_multi_bb + .type good_ret_multi_bb,@function +good_ret_multi_bb: +// CHECK-NOT: good_ret_multi_bb + autia x0, x1 + cbz x1, 1f + nop +1: + ret x0 + .size good_ret_multi_bb, .-good_ret_multi_bb + + .globl good_call_multi_bb + .type good_call_multi_bb,@function +good_call_multi_bb: +// CHECK-NOT: good_call_multi_bb + paciasp + stp x29, x30, [sp, #-16]! + mov x29, sp + + autia x0, x1 + cbz x1, 1f + nop +1: + blr x0 + cbz x1, 2f + nop +2: + ldp x29, x30, [sp], #16 + autiasp + ret + .size good_call_multi_bb, .-good_call_multi_bb + + .globl good_branch_multi_bb + .type good_branch_multi_bb,@function +good_branch_multi_bb: +// CHECK-NOT: good_branch_multi_bb + autia x0, x1 + cbz x1, 1f + nop +1: + br x0 + .size good_branch_multi_bb, .-good_branch_multi_bb + + .globl good_load_other_reg_multi_bb + .type good_load_other_reg_multi_bb,@function +good_load_other_reg_multi_bb: +// CHECK-NOT: good_load_other_reg_multi_bb + autia x0, x1 + cbz x1, 1f + nop +1: + ldr x2, [x0] + cbz x1, 2f + nop +2: + ret + .size good_load_other_reg_multi_bb, .-good_load_other_reg_multi_bb + + .globl good_load_same_reg_multi_bb + .type good_load_same_reg_multi_bb,@function +good_load_same_reg_multi_bb: +// CHECK-NOT: good_load_same_reg_multi_bb + autia x0, x1 + cbz x1, 1f + nop +1: + ldr x0, [x0] + cbz x1, 2f + nop +2: + ret + .size good_load_same_reg_multi_bb, .-good_load_same_reg_multi_bb + + .globl good_explicit_check_multi_bb + .type good_explicit_check_multi_bb,@function +good_explicit_check_multi_bb: +// CHECK-NOT: good_explicit_check_multi_bb + autia x0, x1 + cbz x1, 1f + nop +1: + eor x16, x0, x0, lsl #1 + tbz x16, #62, 2f + brk 0x1234 +2: + cbz x1, 3f + nop +3: + ret + .size good_explicit_check_multi_bb, .-good_explicit_check_multi_bb + + .globl bad_unchecked_multi_bb + .type bad_unchecked_multi_bb,@function +bad_unchecked_multi_bb: +// CHECK-LABEL: GS-PAUTH: authentication oracle found in function bad_unchecked_multi_bb, basic block {{[^,]+}}, at address +// CHECK-NEXT: The instruction is {{[0-9a-f]+}}: autia x0, x1 +// CHECK-NEXT: The 0 instructions that leak the affected registers are: + autia x0, x1 + cbz x1, 1f + ldr x2, [x0] +1: + ret + .size bad_unchecked_multi_bb, .-bad_unchecked_multi_bb + + .globl bad_leaked_to_subroutine_multi_bb + .type bad_leaked_to_subroutine_multi_bb,@function +bad_leaked_to_subroutine_multi_bb: +// CHECK-LABEL: GS-PAUTH: authentication oracle found in function bad_leaked_to_subroutine_multi_bb, basic block {{[^,]+}}, at address +// CHECK-NEXT: The instruction is {{[0-9a-f]+}}: autia x0, x1 +// CHECK-NEXT: The 1 instructions that leak the affected registers are: +// CHECK-NEXT: 1. {{[0-9a-f]+}}: bl callee + paciasp + stp x29, x30, [sp, #-16]! + mov x29, sp + + autia x0, x1 + cbz x1, 1f + ldr x2, [x0] +1: + bl callee + ldr x2, [x0] + + ldp x29, x30, [sp], #16 + autiasp + ret + .size bad_leaked_to_subroutine_multi_bb, .-bad_leaked_to_subroutine_multi_bb + + .globl bad_unknown_usage_read_multi_bb + .type bad_unknown_usage_read_multi_bb,@function +bad_unknown_usage_read_multi_bb: +// CHECK-LABEL: GS-PAUTH: authentication oracle found in function bad_unknown_usage_read_multi_bb, basic block {{[^,]+}}, at address +// CHECK-NEXT: The instruction is {{[0-9a-f]+}}: autia x0, x1 +// CHECK-NEXT: The 1 instructions that leak the affected registers are: +// CHECK-NEXT: 1. {{[0-9a-f]+}}: mul x3, x0, x1 + autia x0, x1 + cbz x3, 1f + mul x3, x0, x1 +1: + ldr x2, [x0] + ret + .size bad_unknown_usage_read_multi_bb, .-bad_unknown_usage_read_multi_bb + + .globl bad_unknown_usage_subreg_read_multi_bb + .type bad_unknown_usage_subreg_read_multi_bb,@function +bad_unknown_usage_subreg_read_multi_bb: +// CHECK-LABEL: GS-PAUTH: authentication oracle found in function bad_unknown_usage_subreg_read_multi_bb, basic block {{[^,]+}}, at address +// CHECK-NEXT: The instruction is {{[0-9a-f]+}}: autia x0, x1 +// CHECK-NEXT: The 1 instructions that leak the affected registers are: +// CHECK-NEXT: 1. {{[0-9a-f]+}}: mul w3, w0, w1 + autia x0, x1 + cbz x3, 1f + mul w3, w0, w1 +1: + ldr x2, [x0] + ret + .size bad_unknown_usage_subreg_read_multi_bb, .-bad_unknown_usage_subreg_read_multi_bb + + .globl bad_unknown_usage_update_multi_bb + .type bad_unknown_usage_update_multi_bb,@function +bad_unknown_usage_update_multi_bb: +// CHECK-LABEL: GS-PAUTH: authentication oracle found in function bad_unknown_usage_update_multi_bb, basic block {{[^,]+}}, at address +// CHECK-NEXT: The instruction is {{[0-9a-f]+}}: autia x0, x1 +// CHECK-NEXT: The 1 instructions that leak the affected registers are: +// CHECK-NEXT: 1. {{[0-9a-f]+}}: movk x0, #0x2a, lsl #16 + autia x0, x1 + cbz x3, 1f + movk x0, #42, lsl #16 // does not overwrite x0 completely +1: + ldr x2, [x0] + ret + .size bad_unknown_usage_update_multi_bb, .-bad_unknown_usage_update_multi_bb + + .globl good_overwrite_with_constant_multi_bb + .type good_overwrite_with_constant_multi_bb,@function +good_overwrite_with_constant_multi_bb: +// CHECK-NOT: good_overwrite_with_constant_multi_bb + autia x0, x1 + cbz x3, 1f +1: + mov x0, #42 + ret + .size good_overwrite_with_constant_multi_bb, .-good_overwrite_with_constant_multi_bb + + .globl good_address_arith_multi_bb + .type good_address_arith_multi_bb,@function +good_address_arith_multi_bb: +// CHECK-NOT: good_address_arith_multi_bb + autia x0, x1 + cbz x3, 1f + + add x1, x0, #8 + sub x2, x1, #16 + mov x0, x2 + + mov x1, #0 + mov x2, #0 +1: + ldr x3, [x0] + ret + .size good_address_arith_multi_bb, .-good_address_arith_multi_bb + +// FIXME: Most *_nocfg test cases contain paciasp+autiasp instructions even if +// LR is not spilled - this is a workaround for RET instructions being +// reported as non-protected, because LR state is reset at every label. + + .globl good_ret_nocfg + .type good_ret_nocfg,@function +good_ret_nocfg: +// CHECK-NOT: good_ret_nocfg + adr x2, 1f + br x2 +1: + autia x0, x1 + + ret x0 + .size good_ret_nocfg, .-good_ret_nocfg + + .globl good_call_nocfg + .type good_call_nocfg,@function +good_call_nocfg: +// CHECK-NOT: good_call_nocfg + paciasp + stp x29, x30, [sp, #-16]! + mov x29, sp + + adr x2, 1f + br x2 +1: + autia x0, x1 + blr x0 + + ldp x29, x30, [sp], #16 + autiasp + ret + .size good_call_nocfg, .-good_call_nocfg + + .globl good_branch_nocfg + .type good_branch_nocfg,@function +good_branch_nocfg: +// CHECK-NOT: good_branch_nocfg + adr x2, 1f + br x2 +1: + autia x0, x1 + br x0 + .size good_branch_nocfg, .-good_branch_nocfg + + .globl good_load_other_reg_nocfg + .type good_load_other_reg_nocfg,@function +good_load_other_reg_nocfg: +// CHECK-NOT: good_load_other_reg_nocfg + paciasp + adr x2, 1f + br x2 +1: + autia x0, x1 + ldr x2, [x0] + + autiasp + ret + .size good_load_other_reg_nocfg, .-good_load_other_reg_nocfg + + .globl good_load_same_reg_nocfg + .type good_load_same_reg_nocfg,@function +good_load_same_reg_nocfg: +// CHECK-NOT: good_load_same_reg_nocfg + paciasp + adr x2, 1f + br x2 +1: + autia x0, x1 + ldr x0, [x0] + + autiasp + ret + .size good_load_same_reg_nocfg, .-good_load_same_reg_nocfg + +// FIXME: Multi-instruction checker sequences are not supported without CFG. + + .globl bad_unchecked_nocfg + .type bad_unchecked_nocfg,@function +bad_unchecked_nocfg: +// CHECK-LABEL: GS-PAUTH: authentication oracle found in function bad_unchecked_nocfg, at address +// CHECK-NEXT: The instruction is {{[0-9a-f]+}}: autia x0, x1 +// CHECK-NEXT: The 0 instructions that leak the affected registers are: + paciasp + adr x2, 1f + br x2 +1: + autia x0, x1 + + autiasp + ret + .size bad_unchecked_nocfg, .-bad_unchecked_nocfg + + .globl bad_leaked_to_subroutine_nocfg + .type bad_leaked_to_subroutine_nocfg,@function +bad_leaked_to_subroutine_nocfg: +// CHECK-LABEL: GS-PAUTH: authentication oracle found in function bad_leaked_to_subroutine_nocfg, at address +// CHECK-NEXT: The instruction is {{[0-9a-f]+}}: autia x0, x1 +// CHECK-NEXT: The 1 instructions that leak the affected registers are: +// CHECK-NEXT: 1. {{[0-9a-f]+}}: bl callee # Offset: 24 + paciasp + stp x29, x30, [sp, #-16]! + mov x29, sp + + adr x2, 1f + br x2 +1: + autia x0, x1 + bl callee + ldr x2, [x0] + + ldp x29, x30, [sp], #16 + autiasp + ret + .size bad_leaked_to_subroutine_nocfg, .-bad_leaked_to_subroutine_nocfg + + .globl bad_unknown_usage_read_nocfg + .type bad_unknown_usage_read_nocfg,@function +bad_unknown_usage_read_nocfg: +// CHECK-LABEL: GS-PAUTH: authentication oracle found in function bad_unknown_usage_read_nocfg, at address +// CHECK-NEXT: The instruction is {{[0-9a-f]+}}: autia x0, x1 +// CHECK-NEXT: The 1 instructions that leak the affected registers are: +// CHECK-NEXT: 1. {{[0-9a-f]+}}: mul x3, x0, x1 + paciasp + adr x2, 1f + br x2 +1: + autia x0, x1 + mul x3, x0, x1 + ldr x2, [x0] + + autiasp + ret + .size bad_unknown_usage_read_nocfg, .-bad_unknown_usage_read_nocfg + + .globl bad_unknown_usage_subreg_read_nocfg + .type bad_unknown_usage_subreg_read_nocfg,@function +bad_unknown_usage_subreg_read_nocfg: +// CHECK-LABEL: GS-PAUTH: authentication oracle found in function bad_unknown_usage_subreg_read_nocfg, at address +// CHECK-NEXT: The instruction is {{[0-9a-f]+}}: autia x0, x1 +// CHECK-NEXT: The 1 instructions that leak the affected registers are: +// CHECK-NEXT: 1. {{[0-9a-f]+}}: mul w3, w0, w1 + paciasp + adr x2, 1f + br x2 +1: + autia x0, x1 + mul w3, w0, w1 + ldr x2, [x0] + + autiasp + ret + .size bad_unknown_usage_subreg_read_nocfg, .-bad_unknown_usage_subreg_read_nocfg + + .globl bad_unknown_usage_update_nocfg + .type bad_unknown_usage_update_nocfg,@function +bad_unknown_usage_update_nocfg: +// CHECK-LABEL: GS-PAUTH: authentication oracle found in function bad_unknown_usage_update_nocfg, at address +// CHECK-NEXT: The instruction is {{[0-9a-f]+}}: autia x0, x1 +// CHECK-NEXT: The 1 instructions that leak the affected registers are: +// CHECK-NEXT: 1. {{[0-9a-f]+}}: movk x0, #0x2a, lsl #16 + paciasp + adr x2, 1f + br x2 +1: + autia x0, x1 + movk x0, #42, lsl #16 // does not overwrite x0 completely + ldr x2, [x0] + + autiasp + ret + .size bad_unknown_usage_update_nocfg, .-bad_unknown_usage_update_nocfg + + .globl good_overwrite_with_constant_nocfg + .type good_overwrite_with_constant_nocfg,@function +good_overwrite_with_constant_nocfg: +// CHECK-NOT: good_overwrite_with_constant_nocfg + paciasp + adr x2, 1f + br x2 +1: + autia x0, x1 + mov x0, #42 + + autiasp + ret + .size good_overwrite_with_constant_nocfg, .-good_overwrite_with_constant_nocfg + + .globl good_address_arith_nocfg + .type good_address_arith_nocfg,@function +good_address_arith_nocfg: +// CHECK-NOT: good_address_arith_nocfg + paciasp + adr x2, 1f + br x2 +1: + autia x0, x1 + add x1, x0, #8 + sub x2, x1, #16 + mov x3, x2 + + ldr x4, [x3] + mov x0, #0 + mov x1, #0 + mov x2, #0 + + autiasp + ret + .size good_address_arith_nocfg, .-good_address_arith_nocfg + + .globl good_explicit_check_unrelated_reg + .type good_explicit_check_unrelated_reg,@function +good_explicit_check_unrelated_reg: +// CHECK-LABEL: GS-PAUTH: authentication oracle found in function good_explicit_check_unrelated_reg, basic block {{[^,]+}}, at address + // FIXME: The below instruction is not an authentication oracle + autia x2, x3 // One of possible execution paths after this instruction + // ends at BRK below, thus BRK used as a trap instruction + // should formally "check everything" not to introduce + // false-positive here. + autia x0, x1 + eor x16, x0, x0, lsl #1 + tbz x16, #62, 1f + brk 0x1234 +1: + ldr x4, [x2] // Right before this instruction X2 is checked - this + // should be propagated to the basic block ending with + // TBZ instruction above. + ret + .size good_explicit_check_unrelated_reg, .-good_explicit_check_unrelated_reg + +// The last BB (in layout order) is processed first by the data-flow analysis. +// Its initial state is usually filled in a special way (because it ends with +// `ret` instruction), and then affects the state propagated to the other BBs +// Thus, the case of the last instruction in a function being a jump somewhere +// in the middle is special. + + .globl good_no_ret_from_last_bb + .type good_no_ret_from_last_bb,@function +good_no_ret_from_last_bb: +// CHECK-NOT: good_no_ret_from_last_bb + paciasp + autiasp // authenticates LR + b 2f +1: + ret +2: + b 1b // LR is dereferenced by `ret`, which is executed next + .size good_no_ret_from_last_bb, .-good_no_ret_from_last_bb + + .globl bad_no_ret_from_last_bb + .type bad_no_ret_from_last_bb,@function +bad_no_ret_from_last_bb: +// CHECK-LABEL: GS-PAUTH: authentication oracle found in function bad_no_ret_from_last_bb, basic block {{[^,]+}}, at address +// CHECK-NEXT: The instruction is {{[0-9a-f]+}}: autiasp +// CHECK-NEXT: The 0 instructions that leak the affected registers are: + paciasp + autiasp // authenticates LR + b 2f +1: + ret x0 +2: + b 1b // X0 (but not LR) is dereferenced by `ret x0` + .size bad_no_ret_from_last_bb, .-bad_no_ret_from_last_bb + +// Test that combined auth+something instructions are not reported as +// authentication oracles. + + .globl inst_retaa + .type inst_retaa,@function +inst_retaa: +// CHECK-NOT: inst_retaa + paciasp + retaa + .size inst_retaa, .-inst_retaa + + .globl inst_blraa + .type inst_blraa,@function +inst_blraa: +// CHECK-NOT: inst_blraa + paciasp + stp x29, x30, [sp, #-16]! + mov x29, sp + + blraa x0, x1 + + ldp x29, x30, [sp], #16 + retaa + .size inst_blraa, .-inst_blraa + + .globl inst_braa + .type inst_braa,@function +inst_braa: +// CHECK-NOT: inst_braa + braa x0, x1 + .size inst_braa, .-inst_braa + + .globl inst_ldraa_no_wb + .type inst_ldraa_no_wb,@function +inst_ldraa_no_wb: +// CHECK-NOT: inst_ldraa_no_wb + ldraa x1, [x0] + ret + .size inst_ldraa_no_wb, .-inst_ldraa_no_wb + + .globl inst_ldraa_wb + .type inst_ldraa_wb,@function +inst_ldraa_wb: +// CHECK-NOT: inst_ldraa_wb + ldraa x1, [x0]! + ret + .size inst_ldraa_wb, .-inst_ldraa_wb + + .globl main + .type main,@function +main: + mov x0, 0 + ret + .size main, .-main diff --git a/bolt/test/binary-analysis/AArch64/gs-pauth-debug-output.s b/bolt/test/binary-analysis/AArch64/gs-pauth-debug-output.s index 82494d834a15..fbb96a63d41e 100644 --- a/bolt/test/binary-analysis/AArch64/gs-pauth-debug-output.s +++ b/bolt/test/binary-analysis/AArch64/gs-pauth-debug-output.s @@ -113,7 +113,7 @@ simple: // CHECK-EMPTY: // PAUTH-NEXT: Found sign inst: 00000000: paciasp # DataflowSrcSafetyAnalysis: src-state<SafeToDerefRegs: BitVector, TrustedRegs: BitVector, Insts: > // PAUTH-NEXT: Signed reg: LR -// PAUTH-NEXT: TrustedRegs: LR W30 W30_HI +// PAUTH-NEXT: TrustedRegs: LR W30 W30_HI{{[ \t]*$}} // PAUTH-NEXT: Found call inst: 00000000: blr x0 # DataflowSrcSafetyAnalysis: src-state<SafeToDerefRegs: BitVector, TrustedRegs: BitVector, Insts: > // PAUTH-NEXT: Call destination reg: X0 // PAUTH-NEXT: SafeToDerefRegs: W0 X0 W0_HI{{[ \t]*$}} @@ -220,10 +220,10 @@ nocfg: // CHECK-EMPTY: // PAUTH-NEXT: Found call inst: 00000000: br x0 # UNKNOWN CONTROL FLOW # Offset: 4 # CFGUnawareSrcSafetyAnalysis: src-state<SafeToDerefRegs: BitVector, TrustedRegs: BitVector, Insts: > // PAUTH-NEXT: Call destination reg: X0 -// PAUTH-NEXT: SafeToDerefRegs: LR W0 W30 X0 W0_HI W30_HI +// PAUTH-NEXT: SafeToDerefRegs: LR W0 W30 X0 W0_HI W30_HI{{[ \t]*$}} // CHECK-NEXT: Found RET inst: 00000000: ret # Offset: 8 # CFGUnawareSrcSafetyAnalysis: src-state<SafeToDerefRegs: BitVector, TrustedRegs: BitVector, Insts: > // CHECK-NEXT: RetReg: LR -// CHECK-NEXT: SafeToDerefRegs: +// CHECK-NEXT: SafeToDerefRegs:{{[ \t]*$}} // CHECK-EMPTY: // CHECK-NEXT: Running detailed src register safety analysis... // CHECK-NEXT: SrcSafetyAnalysis::ComputeNext( adr x0, __ENTRY_nocfg@0x[[ENTRY_ADDR]], src-state<SafeToDerefRegs: LR W30 W30_HI , TrustedRegs: LR W30 W30_HI , Insts: [0]()>) @@ -251,6 +251,116 @@ nocfg: // CHECK-EMPTY: // CHECK-NEXT: Attaching clobbering info to: 00000000: ret # Offset: 8 # CFGUnawareSrcSafetyAnalysis: src-state<SafeToDerefRegs: BitVector, TrustedRegs: BitVector, Insts: [0]()> + .globl auth_oracle + .type auth_oracle,@function +auth_oracle: + autia x0, x1 + ret + .size auth_oracle, .-auth_oracle + +// CHECK-LABEL:Analyzing function auth_oracle, AllocatorId = 1 +// CHECK-NEXT: Binary Function "auth_oracle" { +// CHECK-NEXT: Number : 4 +// CHECK-NEXT: State : CFG constructed +// ... +// CHECK: BB Layout : [[BB0:[0-9a-zA-Z.]+]] +// CHECK-NEXT: } +// CHECK-NEXT: [[BB0]] (2 instructions, align : 1) +// CHECK-NEXT: Entry Point +// CHECK-NEXT: 00000000: autia x0, x1 +// CHECK-NEXT: 00000004: ret +// CHECK-EMPTY: +// CHECK-NEXT: DWARF CFI Instructions: +// CHECK-NEXT: <empty> +// CHECK-NEXT: End of Function "auth_oracle" +// CHECK-EMPTY: +// CHECK-NEXT: Running src register safety analysis... +// ... +// CHECK: After src register safety analysis: +// CHECK-NEXT: Binary Function "auth_oracle" { +// ... +// CHECK: End of Function "auth_oracle" +// ... +// PAUTH: Running dst register safety analysis... +// PAUTH-NEXT: DstSafetyAnalysis::ComputeNext( ret x30, dst-state<CannotEscapeUnchecked: , Insts: >) +// PAUTH-NEXT: .. result: (dst-state<CannotEscapeUnchecked: LR W30 W30_HI , Insts: >) +// PAUTH-NEXT: DstSafetyAnalysis::ComputeNext( autia x0, x1, dst-state<CannotEscapeUnchecked: LR W30 W30_HI , Insts: >) +// PAUTH-NEXT: .. result: (dst-state<CannotEscapeUnchecked: LR W30 W30_HI , Insts: >) +// PAUTH-NEXT: After dst register safety analysis: +// PAUTH-NEXT: Binary Function "auth_oracle" { +// PAUTH-NEXT: Number : 4 +// PAUTH-NEXT: State : CFG constructed +// ... +// PAUTH: BB Layout : [[BB0]] +// PAUTH-NEXT: } +// PAUTH-NEXT: [[BB0]] (2 instructions, align : 1) +// PAUTH-NEXT: Entry Point +// PAUTH-NEXT: 00000000: autia x0, x1 # DataflowDstSafetyAnalysis: dst-state<CannotEscapeUnchecked: BitVector, Insts: > +// PAUTH-NEXT: 00000004: ret # DataflowDstSafetyAnalysis: dst-state<CannotEscapeUnchecked: BitVector, Insts: > +// PAUTH-EMPTY: +// PAUTH-NEXT: DWARF CFI Instructions: +// PAUTH-NEXT: <empty> +// PAUTH-NEXT: End of Function "auth_oracle" +// PAUTH-EMPTY: +// PAUTH-NEXT: Found auth inst: 00000000: autia x0, x1 # DataflowDstSafetyAnalysis: dst-state<CannotEscapeUnchecked: BitVector, Insts: > +// PAUTH-NEXT: Authenticated reg: X0 +// PAUTH-NEXT: safe output registers: LR W30 W30_HI{{[ \t]*$}} +// PAUTH-EMPTY: +// PAUTH-NEXT: Running detailed dst register safety analysis... +// PAUTH-NEXT: DstSafetyAnalysis::ComputeNext( ret x30, dst-state<CannotEscapeUnchecked: , Insts: [0]()>) +// PAUTH-NEXT: .. result: (dst-state<CannotEscapeUnchecked: LR W30 W30_HI , Insts: [0]()>) +// PAUTH-NEXT: DstSafetyAnalysis::ComputeNext( autia x0, x1, dst-state<CannotEscapeUnchecked: LR W30 W30_HI , Insts: [0]()>) +// PAUTH-NEXT: .. result: (dst-state<CannotEscapeUnchecked: LR W30 W30_HI , Insts: [0](0x{{[0-9a-f]+}} )>) +// PAUTH-NEXT: After detailed dst register safety analysis: +// PAUTH-NEXT: Binary Function "auth_oracle" { +// PAUTH-NEXT: Number : 4 +// PAUTH-NEXT: State : CFG constructed +// ... +// PAUTH: BB Layout : [[BB0]] +// PAUTH-NEXT: } +// PAUTH-NEXT: [[BB0]] (2 instructions, align : 1) +// PAUTH-NEXT: Entry Point +// PAUTH-NEXT: 00000000: autia x0, x1 # DataflowDstSafetyAnalysis: dst-state<CannotEscapeUnchecked: BitVector, Insts: [0](0x{{[0-9a-f]+}} )> +// PAUTH-NEXT: 00000004: ret # DataflowDstSafetyAnalysis: dst-state<CannotEscapeUnchecked: BitVector, Insts: [0]()> +// PAUTH-EMPTY: +// PAUTH-NEXT: DWARF CFI Instructions: +// PAUTH-NEXT: <empty> +// PAUTH-NEXT: End of Function "auth_oracle" +// PAUTH-EMPTY: +// PAUTH-NEXT: Attaching leakage info to: 00000000: autia x0, x1 # DataflowDstSafetyAnalysis: dst-state<CannotEscapeUnchecked: BitVector, Insts: [0](0x{{[0-9a-f]+}} )> + +// Gadget scanner should not crash on CFI instructions, including when debug-printing them. +// Note that the particular debug output is not checked, but BOLT should be +// compiled with assertions enabled to support -debug-only argument. + + .globl cfi_inst_df + .type cfi_inst_df,@function +cfi_inst_df: + .cfi_startproc + sub sp, sp, #16 + .cfi_def_cfa_offset 16 + add sp, sp, #16 + .cfi_def_cfa_offset 0 + ret + .size cfi_inst_df, .-cfi_inst_df + .cfi_endproc + + .globl cfi_inst_nocfg + .type cfi_inst_nocfg,@function +cfi_inst_nocfg: + .cfi_startproc + sub sp, sp, #16 + .cfi_def_cfa_offset 16 + + adr x0, 1f + br x0 +1: + add sp, sp, #16 + .cfi_def_cfa_offset 0 + ret + .size cfi_inst_nocfg, .-cfi_inst_nocfg + .cfi_endproc + // CHECK-LABEL:Analyzing function main, AllocatorId = 1 .globl main .type main,@function diff --git a/bolt/test/link_fdata.py b/bolt/test/link_fdata.py index 5a9752068bb9..898dce8e3fb5 100755 --- a/bolt/test/link_fdata.py +++ b/bolt/test/link_fdata.py @@ -36,9 +36,9 @@ prefix_pat = re.compile(f"^# {args.prefix}: (.*)") fdata_pat = re.compile(r"([01].*) (?P<mispred>\d+) (?P<exec>\d+)") # Pre-aggregated profile: -# {T|S|E|B|F|f} <start> [<end>] [<ft_end>] <count> [<mispred_count>] +# {T|R|S|E|B|F|f|r} <start> [<end>] [<ft_end>] <count> [<mispred_count>] # <loc>: [<id>:]<offset> -preagg_pat = re.compile(r"(?P<type>[TSBFf]) (?P<offsets_count>.*)") +preagg_pat = re.compile(r"(?P<type>[TRSBFfr]) (?P<offsets_count>.*)") # No-LBR profile: # <is symbol?> <closest elf symbol or DSO name> <relative address> <count> diff --git a/bolt/test/perf2bolt/AArch64/perf2bolt-spe.test b/bolt/test/perf2bolt/AArch64/perf2bolt-spe.test new file mode 100644 index 000000000000..91f5c857fbab --- /dev/null +++ b/bolt/test/perf2bolt/AArch64/perf2bolt-spe.test @@ -0,0 +1,12 @@ +## Check that Arm SPE mode is available on AArch64. + +REQUIRES: system-linux,perf,target=aarch64{{.*}} + +RUN: %clang %cflags %p/../../Inputs/asm_foo.s %p/../../Inputs/asm_main.c -o %t.exe + +RUN: perf record -e cycles -q -o %t.perf.data -- %t.exe 2> /dev/null + +RUN: (perf2bolt -p %t.perf.data -o %t.perf.boltdata --spe %t.exe 2> /dev/null; exit 0) | FileCheck %s --check-prefix=CHECK-SPE-LBR + +CHECK-SPE-LBR: PERF2BOLT: parse SPE branch events in LBR-format + diff --git a/bolt/test/perf2bolt/X86/perf2bolt-spe.test b/bolt/test/perf2bolt/X86/perf2bolt-spe.test new file mode 100644 index 000000000000..101bd3789a18 --- /dev/null +++ b/bolt/test/perf2bolt/X86/perf2bolt-spe.test @@ -0,0 +1,9 @@ +## Check that Arm SPE mode is unavailable on X86. + +REQUIRES: system-linux,x86_64-linux + +RUN: %clang %cflags %p/../../Inputs/asm_foo.s %p/../../Inputs/asm_main.c -o %t.exe +RUN: touch %t.empty.perf.data +RUN: not perf2bolt -p %t.empty.perf.data -o %t.perf.boltdata --spe --pa %t.exe 2>&1 | FileCheck %s + +CHECK: perf2bolt{{.*}} -spe is available only on AArch64. diff --git a/bolt/tools/driver/llvm-bolt.cpp b/bolt/tools/driver/llvm-bolt.cpp index b9836c2397b6..cf1b31f8c0c6 100644 --- a/bolt/tools/driver/llvm-bolt.cpp +++ b/bolt/tools/driver/llvm-bolt.cpp @@ -237,6 +237,13 @@ int main(int argc, char **argv) { if (Error E = RIOrErr.takeError()) report_error(opts::InputFilename, std::move(E)); RewriteInstance &RI = *RIOrErr.get(); + + if (opts::AggregateOnly && !RI.getBinaryContext().isAArch64() && + opts::ArmSPE) { + errs() << ToolName << ": -spe is available only on AArch64.\n"; + exit(1); + } + if (!opts::PerfData.empty()) { if (!opts::AggregateOnly) { errs() << ToolName diff --git a/bolt/unittests/Profile/CMakeLists.txt b/bolt/unittests/Profile/CMakeLists.txt index e0aa0926b49c..ce01c6c4b949 100644 --- a/bolt/unittests/Profile/CMakeLists.txt +++ b/bolt/unittests/Profile/CMakeLists.txt @@ -1,11 +1,25 @@ +set(LLVM_LINK_COMPONENTS + DebugInfoDWARF + Object + ${LLVM_TARGETS_TO_BUILD} + ) + add_bolt_unittest(ProfileTests DataAggregator.cpp + PerfSpeEvents.cpp DISABLE_LLVM_LINK_LLVM_DYLIB ) target_link_libraries(ProfileTests PRIVATE + LLVMBOLTCore LLVMBOLTProfile + LLVMTargetParser + LLVMTestingSupport ) +foreach (tgt ${BOLT_TARGETS_TO_BUILD}) + string(TOUPPER "${tgt}" upper) + target_compile_definitions(ProfileTests PRIVATE "${upper}_AVAILABLE") +endforeach() diff --git a/bolt/unittests/Profile/PerfSpeEvents.cpp b/bolt/unittests/Profile/PerfSpeEvents.cpp new file mode 100644 index 000000000000..3e3e05395246 --- /dev/null +++ b/bolt/unittests/Profile/PerfSpeEvents.cpp @@ -0,0 +1,164 @@ +//===- bolt/unittests/Profile/PerfSpeEvents.cpp ---------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifdef AARCH64_AVAILABLE + +#include "bolt/Core/BinaryContext.h" +#include "bolt/Profile/DataAggregator.h" +#include "llvm/BinaryFormat/ELF.h" +#include "llvm/DebugInfo/DWARF/DWARFContext.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/TargetSelect.h" +#include "gtest/gtest.h" + +using namespace llvm; +using namespace llvm::bolt; +using namespace llvm::object; +using namespace llvm::ELF; + +namespace opts { +extern cl::opt<std::string> ReadPerfEvents; +extern cl::opt<bool> ArmSPE; +} // namespace opts + +namespace llvm { +namespace bolt { + +/// Perform checks on perf SPE branch events. +struct PerfSpeEventsTestHelper : public testing::Test { + void SetUp() override { + initalizeLLVM(); + prepareElf(); + initializeBOLT(); + } + +protected: + using Trace = DataAggregator::Trace; + using TakenBranchInfo = DataAggregator::TakenBranchInfo; + + void initalizeLLVM() { + llvm::InitializeAllTargetInfos(); + llvm::InitializeAllTargetMCs(); + llvm::InitializeAllAsmParsers(); + llvm::InitializeAllDisassemblers(); + llvm::InitializeAllTargets(); + llvm::InitializeAllAsmPrinters(); + } + + void prepareElf() { + memcpy(ElfBuf, "\177ELF", 4); + ELF64LE::Ehdr *EHdr = reinterpret_cast<typename ELF64LE::Ehdr *>(ElfBuf); + EHdr->e_ident[llvm::ELF::EI_CLASS] = llvm::ELF::ELFCLASS64; + EHdr->e_ident[llvm::ELF::EI_DATA] = llvm::ELF::ELFDATA2LSB; + EHdr->e_machine = llvm::ELF::EM_AARCH64; + MemoryBufferRef Source(StringRef(ElfBuf, sizeof(ElfBuf)), "ELF"); + ObjFile = cantFail(ObjectFile::createObjectFile(Source)); + } + + void initializeBOLT() { + Relocation::Arch = ObjFile->makeTriple().getArch(); + BC = cantFail(BinaryContext::createBinaryContext( + ObjFile->makeTriple(), std::make_shared<orc::SymbolStringPool>(), + ObjFile->getFileName(), nullptr, /*IsPIC*/ false, + DWARFContext::create(*ObjFile.get()), {llvm::outs(), llvm::errs()})); + ASSERT_FALSE(!BC); + } + + char ElfBuf[sizeof(typename ELF64LE::Ehdr)] = {}; + std::unique_ptr<ObjectFile> ObjFile; + std::unique_ptr<BinaryContext> BC; + + /// Helper function to export lists to show the mismatch. + void reportBrStackEventMismatch( + const std::vector<std::pair<Trace, TakenBranchInfo>> &Traces, + const std::vector<std::pair<Trace, TakenBranchInfo>> &ExpectedSamples) { + llvm::errs() << "Traces items: \n"; + for (const auto &[Trace, BI] : Traces) + llvm::errs() << "{" << Trace.Branch << ", " << Trace.From << "," + << Trace.To << ", " << BI.TakenCount << ", " + << BI.MispredCount << "}" << "\n"; + + llvm::errs() << "Expected items: \n"; + for (const auto &[Trace, BI] : ExpectedSamples) + llvm::errs() << "{" << Trace.Branch << ", " << Trace.From << ", " + << Trace.To << ", " << BI.TakenCount << ", " + << BI.MispredCount << "}" << "\n"; + } + + /// Parse and check SPE brstack as LBR. + void parseAndCheckBrstackEvents( + uint64_t PID, + const std::vector<std::pair<Trace, TakenBranchInfo>> &ExpectedSamples) { + DataAggregator DA("<pseudo input>"); + DA.ParsingBuf = opts::ReadPerfEvents; + DA.BC = BC.get(); + DataAggregator::MMapInfo MMap; + DA.BinaryMMapInfo.insert(std::make_pair(PID, MMap)); + + DA.parseBranchEvents(); + + EXPECT_EQ(DA.Traces.size(), ExpectedSamples.size()); + if (DA.Traces.size() != ExpectedSamples.size()) + reportBrStackEventMismatch(DA.Traces, ExpectedSamples); + + const auto TracesBegin = DA.Traces.begin(); + const auto TracesEnd = DA.Traces.end(); + for (const auto &BI : ExpectedSamples) { + auto it = find_if(TracesBegin, TracesEnd, + [&BI](const auto &Tr) { return Tr.first == BI.first; }); + + EXPECT_NE(it, TracesEnd); + EXPECT_EQ(it->second.MispredCount, BI.second.MispredCount); + EXPECT_EQ(it->second.TakenCount, BI.second.TakenCount); + } + } +}; + +} // namespace bolt +} // namespace llvm + +TEST_F(PerfSpeEventsTestHelper, SpeBranchesWithBrstack) { + // Check perf input with SPE branch events as brstack format. + // Example collection command: + // ``` + // perf record -e 'arm_spe_0/branch_filter=1/u' -- BINARY + // ``` + // How Bolt extracts the branch events: + // ``` + // perf script -F pid,brstack --itrace=bl + // ``` + + opts::ArmSPE = true; + opts::ReadPerfEvents = " 1234 0xa001/0xa002/PN/-/-/10/COND/-\n" + " 1234 0xb001/0xb002/P/-/-/4/RET/-\n" + " 1234 0xc456/0xc789/P/-/-/13/-/-\n" + " 1234 0xd123/0xd456/M/-/-/7/RET/-\n" + " 1234 0xe001/0xe002/P/-/-/14/RET/-\n" + " 1234 0xd123/0xd456/M/-/-/7/RET/-\n" + " 1234 0xf001/0xf002/MN/-/-/8/COND/-\n" + " 1234 0xc456/0xc789/M/-/-/13/-/-\n"; + + // ExpectedSamples contains the aggregated information about + // a branch {{Branch From, To}, {TakenCount, MispredCount}}. + // Consider this example trace: {{0xd123, 0xd456, Trace::BR_ONLY}, + // {2,2}}. This entry has a TakenCount = 2, as we have two samples for + // (0xd123, 0xd456) in our input. It also has MispredsCount = 2, + // as 'M' misprediction flag appears in both cases. BR_ONLY means + // the trace only contains branch data. + std::vector<std::pair<Trace, TakenBranchInfo>> ExpectedSamples = { + {{0xa001, 0xa002, Trace::BR_ONLY}, {1, 0}}, + {{0xb001, 0xb002, Trace::BR_ONLY}, {1, 0}}, + {{0xc456, 0xc789, Trace::BR_ONLY}, {2, 1}}, + {{0xd123, 0xd456, Trace::BR_ONLY}, {2, 2}}, + {{0xe001, 0xe002, Trace::BR_ONLY}, {1, 0}}, + {{0xf001, 0xf002, Trace::BR_ONLY}, {1, 1}}}; + + parseAndCheckBrstackEvents(1234, ExpectedSamples); +} + +#endif |
