summaryrefslogtreecommitdiff
path: root/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Target/AArch64/AArch64InstrInfo.cpp')
-rw-r--r--llvm/lib/Target/AArch64/AArch64InstrInfo.cpp622
1 files changed, 476 insertions, 146 deletions
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
index 7d540efe2b41..ee397db3fba6 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
@@ -17,6 +17,7 @@
#include "AArch64PointerAuth.h"
#include "AArch64Subtarget.h"
#include "MCTargetDesc/AArch64AddressingModes.h"
+#include "MCTargetDesc/AArch64MCTargetDesc.h"
#include "Utils/AArch64BaseInfo.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/STLExtras.h"
@@ -1354,48 +1355,52 @@ static bool areCFlagsAccessedBetweenInstrs(
return false;
}
-/// optimizePTestInstr - Attempt to remove a ptest of a predicate-generating
-/// operation which could set the flags in an identical manner
-bool AArch64InstrInfo::optimizePTestInstr(
- MachineInstr *PTest, unsigned MaskReg, unsigned PredReg,
- const MachineRegisterInfo *MRI) const {
- auto *Mask = MRI->getUniqueVRegDef(MaskReg);
- auto *Pred = MRI->getUniqueVRegDef(PredReg);
- auto NewOp = Pred->getOpcode();
- bool OpChanged = false;
-
+std::optional<unsigned>
+AArch64InstrInfo::canRemovePTestInstr(MachineInstr *PTest, MachineInstr *Mask,
+ MachineInstr *Pred,
+ const MachineRegisterInfo *MRI) const {
unsigned MaskOpcode = Mask->getOpcode();
unsigned PredOpcode = Pred->getOpcode();
bool PredIsPTestLike = isPTestLikeOpcode(PredOpcode);
bool PredIsWhileLike = isWhileOpcode(PredOpcode);
- if (isPTrueOpcode(MaskOpcode) && (PredIsPTestLike || PredIsWhileLike) &&
- getElementSizeForOpcode(MaskOpcode) ==
- getElementSizeForOpcode(PredOpcode) &&
- Mask->getOperand(1).getImm() == 31) {
+ if (PredIsWhileLike) {
+ // For PTEST(PG, PG), PTEST is redundant when PG is the result of a WHILEcc
+ // instruction and the condition is "any" since WHILcc does an implicit
+ // PTEST(ALL, PG) check and PG is always a subset of ALL.
+ if ((Mask == Pred) && PTest->getOpcode() == AArch64::PTEST_PP_ANY)
+ return PredOpcode;
+
// For PTEST(PTRUE_ALL, WHILE), if the element size matches, the PTEST is
// redundant since WHILE performs an implicit PTEST with an all active
- // mask. Must be an all active predicate of matching element size.
+ // mask.
+ if (isPTrueOpcode(MaskOpcode) && Mask->getOperand(1).getImm() == 31 &&
+ getElementSizeForOpcode(MaskOpcode) ==
+ getElementSizeForOpcode(PredOpcode))
+ return PredOpcode;
+
+ return {};
+ }
+
+ if (PredIsPTestLike) {
+ // For PTEST(PG, PG), PTEST is redundant when PG is the result of an
+ // instruction that sets the flags as PTEST would and the condition is
+ // "any" since PG is always a subset of the governing predicate of the
+ // ptest-like instruction.
+ if ((Mask == Pred) && PTest->getOpcode() == AArch64::PTEST_PP_ANY)
+ return PredOpcode;
// For PTEST(PTRUE_ALL, PTEST_LIKE), the PTEST is redundant if the
- // PTEST_LIKE instruction uses the same all active mask and the element
- // size matches. If the PTEST has a condition of any then it is always
- // redundant.
- if (PredIsPTestLike) {
+ // the element size matches and either the PTEST_LIKE instruction uses
+ // the same all active mask or the condition is "any".
+ if (isPTrueOpcode(MaskOpcode) && Mask->getOperand(1).getImm() == 31 &&
+ getElementSizeForOpcode(MaskOpcode) ==
+ getElementSizeForOpcode(PredOpcode)) {
auto PTestLikeMask = MRI->getUniqueVRegDef(Pred->getOperand(1).getReg());
- if (Mask != PTestLikeMask && PTest->getOpcode() != AArch64::PTEST_PP_ANY)
- return false;
+ if (Mask == PTestLikeMask || PTest->getOpcode() == AArch64::PTEST_PP_ANY)
+ return PredOpcode;
}
- // Fallthough to simply remove the PTEST.
- } else if ((Mask == Pred) && (PredIsPTestLike || PredIsWhileLike) &&
- PTest->getOpcode() == AArch64::PTEST_PP_ANY) {
- // For PTEST(PG, PG), PTEST is redundant when PG is the result of an
- // instruction that sets the flags as PTEST would. This is only valid when
- // the condition is any.
-
- // Fallthough to simply remove the PTEST.
- } else if (PredIsPTestLike) {
// For PTEST(PG, PTEST_LIKE(PG, ...)), the PTEST is redundant since the
// flags are set based on the same mask 'PG', but PTEST_LIKE must operate
// on 8-bit predicates like the PTEST. Otherwise, for instructions like
@@ -1420,56 +1425,67 @@ bool AArch64InstrInfo::optimizePTestInstr(
// identical regardless of element size.
auto PTestLikeMask = MRI->getUniqueVRegDef(Pred->getOperand(1).getReg());
uint64_t PredElementSize = getElementSizeForOpcode(PredOpcode);
- if ((Mask != PTestLikeMask) ||
- (PredElementSize != AArch64::ElementSizeB &&
- PTest->getOpcode() != AArch64::PTEST_PP_ANY))
- return false;
+ if (Mask == PTestLikeMask && (PredElementSize == AArch64::ElementSizeB ||
+ PTest->getOpcode() == AArch64::PTEST_PP_ANY))
+ return PredOpcode;
- // Fallthough to simply remove the PTEST.
- } else {
- // If OP in PTEST(PG, OP(PG, ...)) has a flag-setting variant change the
- // opcode so the PTEST becomes redundant.
- switch (PredOpcode) {
- case AArch64::AND_PPzPP:
- case AArch64::BIC_PPzPP:
- case AArch64::EOR_PPzPP:
- case AArch64::NAND_PPzPP:
- case AArch64::NOR_PPzPP:
- case AArch64::ORN_PPzPP:
- case AArch64::ORR_PPzPP:
- case AArch64::BRKA_PPzP:
- case AArch64::BRKPA_PPzPP:
- case AArch64::BRKB_PPzP:
- case AArch64::BRKPB_PPzPP:
- case AArch64::RDFFR_PPz: {
- // Check to see if our mask is the same. If not the resulting flag bits
- // may be different and we can't remove the ptest.
- auto *PredMask = MRI->getUniqueVRegDef(Pred->getOperand(1).getReg());
- if (Mask != PredMask)
- return false;
- break;
- }
- case AArch64::BRKN_PPzP: {
- // BRKN uses an all active implicit mask to set flags unlike the other
- // flag-setting instructions.
- // PTEST(PTRUE_B(31), BRKN(PG, A, B)) -> BRKNS(PG, A, B).
- if ((MaskOpcode != AArch64::PTRUE_B) ||
- (Mask->getOperand(1).getImm() != 31))
- return false;
- break;
- }
- case AArch64::PTRUE_B:
- // PTEST(OP=PTRUE_B(A), OP) -> PTRUES_B(A)
- break;
- default:
- // Bail out if we don't recognize the input
- return false;
- }
+ return {};
+ }
- NewOp = convertToFlagSettingOpc(PredOpcode);
- OpChanged = true;
+ // If OP in PTEST(PG, OP(PG, ...)) has a flag-setting variant change the
+ // opcode so the PTEST becomes redundant.
+ switch (PredOpcode) {
+ case AArch64::AND_PPzPP:
+ case AArch64::BIC_PPzPP:
+ case AArch64::EOR_PPzPP:
+ case AArch64::NAND_PPzPP:
+ case AArch64::NOR_PPzPP:
+ case AArch64::ORN_PPzPP:
+ case AArch64::ORR_PPzPP:
+ case AArch64::BRKA_PPzP:
+ case AArch64::BRKPA_PPzPP:
+ case AArch64::BRKB_PPzP:
+ case AArch64::BRKPB_PPzPP:
+ case AArch64::RDFFR_PPz: {
+ // Check to see if our mask is the same. If not the resulting flag bits
+ // may be different and we can't remove the ptest.
+ auto *PredMask = MRI->getUniqueVRegDef(Pred->getOperand(1).getReg());
+ if (Mask != PredMask)
+ return {};
+ break;
+ }
+ case AArch64::BRKN_PPzP: {
+ // BRKN uses an all active implicit mask to set flags unlike the other
+ // flag-setting instructions.
+ // PTEST(PTRUE_B(31), BRKN(PG, A, B)) -> BRKNS(PG, A, B).
+ if ((MaskOpcode != AArch64::PTRUE_B) ||
+ (Mask->getOperand(1).getImm() != 31))
+ return {};
+ break;
+ }
+ case AArch64::PTRUE_B:
+ // PTEST(OP=PTRUE_B(A), OP) -> PTRUES_B(A)
+ break;
+ default:
+ // Bail out if we don't recognize the input
+ return {};
}
+ return convertToFlagSettingOpc(PredOpcode);
+}
+
+/// optimizePTestInstr - Attempt to remove a ptest of a predicate-generating
+/// operation which could set the flags in an identical manner
+bool AArch64InstrInfo::optimizePTestInstr(
+ MachineInstr *PTest, unsigned MaskReg, unsigned PredReg,
+ const MachineRegisterInfo *MRI) const {
+ auto *Mask = MRI->getUniqueVRegDef(MaskReg);
+ auto *Pred = MRI->getUniqueVRegDef(PredReg);
+ unsigned PredOpcode = Pred->getOpcode();
+ auto NewOp = canRemovePTestInstr(PTest, Mask, Pred, MRI);
+ if (!NewOp)
+ return false;
+
const TargetRegisterInfo *TRI = &getRegisterInfo();
// If another instruction between Pred and PTest accesses flags, don't remove
@@ -1481,9 +1497,9 @@ bool AArch64InstrInfo::optimizePTestInstr(
// as they are prior to PTEST. Sometimes this requires the tested PTEST
// operand to be replaced with an equivalent instruction that also sets the
// flags.
- Pred->setDesc(get(NewOp));
PTest->eraseFromParent();
- if (OpChanged) {
+ if (*NewOp != PredOpcode) {
+ Pred->setDesc(get(*NewOp));
bool succeeded = UpdateOperandRegClass(*Pred);
(void)succeeded;
assert(succeeded && "Operands have incompatible register classes!");
@@ -4481,7 +4497,8 @@ void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
// Copy a Predicate register by ORRing with itself.
if (AArch64::PPRRegClass.contains(DestReg) &&
AArch64::PPRRegClass.contains(SrcReg)) {
- assert(Subtarget.hasSVEorSME() && "Unexpected SVE register.");
+ assert(Subtarget.isSVEorStreamingSVEAvailable() &&
+ "Unexpected SVE register.");
BuildMI(MBB, I, DL, get(AArch64::ORR_PPzPP), DestReg)
.addReg(SrcReg) // Pg
.addReg(SrcReg)
@@ -4494,8 +4511,6 @@ void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
bool DestIsPNR = AArch64::PNRRegClass.contains(DestReg);
bool SrcIsPNR = AArch64::PNRRegClass.contains(SrcReg);
if (DestIsPNR || SrcIsPNR) {
- assert((Subtarget.hasSVE2p1() || Subtarget.hasSME2()) &&
- "Unexpected predicate-as-counter register.");
auto ToPPR = [](MCRegister R) -> MCRegister {
return (R - AArch64::PN0) + AArch64::P0;
};
@@ -4516,7 +4531,8 @@ void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
// Copy a Z register by ORRing with itself.
if (AArch64::ZPRRegClass.contains(DestReg) &&
AArch64::ZPRRegClass.contains(SrcReg)) {
- assert(Subtarget.hasSVEorSME() && "Unexpected SVE register.");
+ assert(Subtarget.isSVEorStreamingSVEAvailable() &&
+ "Unexpected SVE register.");
BuildMI(MBB, I, DL, get(AArch64::ORR_ZZZ), DestReg)
.addReg(SrcReg)
.addReg(SrcReg, getKillRegState(KillSrc));
@@ -4528,7 +4544,8 @@ void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
AArch64::ZPR2StridedOrContiguousRegClass.contains(DestReg)) &&
(AArch64::ZPR2RegClass.contains(SrcReg) ||
AArch64::ZPR2StridedOrContiguousRegClass.contains(SrcReg))) {
- assert(Subtarget.hasSVEorSME() && "Unexpected SVE register.");
+ assert(Subtarget.isSVEorStreamingSVEAvailable() &&
+ "Unexpected SVE register.");
static const unsigned Indices[] = {AArch64::zsub0, AArch64::zsub1};
copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORR_ZZZ,
Indices);
@@ -4538,7 +4555,8 @@ void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
// Copy a Z register triple by copying the individual sub-registers.
if (AArch64::ZPR3RegClass.contains(DestReg) &&
AArch64::ZPR3RegClass.contains(SrcReg)) {
- assert(Subtarget.hasSVEorSME() && "Unexpected SVE register.");
+ assert(Subtarget.isSVEorStreamingSVEAvailable() &&
+ "Unexpected SVE register.");
static const unsigned Indices[] = {AArch64::zsub0, AArch64::zsub1,
AArch64::zsub2};
copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORR_ZZZ,
@@ -4551,7 +4569,8 @@ void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
AArch64::ZPR4StridedOrContiguousRegClass.contains(DestReg)) &&
(AArch64::ZPR4RegClass.contains(SrcReg) ||
AArch64::ZPR4StridedOrContiguousRegClass.contains(SrcReg))) {
- assert(Subtarget.hasSVEorSME() && "Unexpected SVE register.");
+ assert(Subtarget.isSVEorStreamingSVEAvailable() &&
+ "Unexpected SVE register.");
static const unsigned Indices[] = {AArch64::zsub0, AArch64::zsub1,
AArch64::zsub2, AArch64::zsub3};
copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORR_ZZZ,
@@ -4656,7 +4675,8 @@ void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
if (AArch64::FPR128RegClass.contains(DestReg) &&
AArch64::FPR128RegClass.contains(SrcReg)) {
- if (Subtarget.hasSVEorSME() && !Subtarget.isNeonAvailable())
+ if (Subtarget.isSVEorStreamingSVEAvailable() &&
+ !Subtarget.isNeonAvailable())
BuildMI(MBB, I, DL, get(AArch64::ORR_ZZZ))
.addReg(AArch64::Z0 + (DestReg - AArch64::Q0), RegState::Define)
.addReg(AArch64::Z0 + (SrcReg - AArch64::Q0))
@@ -4814,14 +4834,12 @@ void AArch64InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
Opc = AArch64::STRBui;
break;
case 2: {
- bool IsPNR = AArch64::PNRRegClass.hasSubClassEq(RC);
if (AArch64::FPR16RegClass.hasSubClassEq(RC))
Opc = AArch64::STRHui;
- else if (IsPNR || AArch64::PPRRegClass.hasSubClassEq(RC)) {
- assert(Subtarget.hasSVEorSME() &&
+ else if (AArch64::PNRRegClass.hasSubClassEq(RC) ||
+ AArch64::PPRRegClass.hasSubClassEq(RC)) {
+ assert(Subtarget.isSVEorStreamingSVEAvailable() &&
"Unexpected register store without SVE store instructions");
- assert((!IsPNR || Subtarget.hasSVE2p1() || Subtarget.hasSME2()) &&
- "Unexpected register store without SVE2p1 or SME2");
Opc = AArch64::STR_PXI;
StackID = TargetStackID::ScalableVector;
}
@@ -4870,7 +4888,7 @@ void AArch64InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
AArch64::sube64, AArch64::subo64, FI, MMO);
return;
} else if (AArch64::ZPRRegClass.hasSubClassEq(RC)) {
- assert(Subtarget.hasSVEorSME() &&
+ assert(Subtarget.isSVEorStreamingSVEAvailable() &&
"Unexpected register store without SVE store instructions");
Opc = AArch64::STR_ZXI;
StackID = TargetStackID::ScalableVector;
@@ -4894,7 +4912,7 @@ void AArch64InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
Offset = false;
} else if (AArch64::ZPR2RegClass.hasSubClassEq(RC) ||
AArch64::ZPR2StridedOrContiguousRegClass.hasSubClassEq(RC)) {
- assert(Subtarget.hasSVEorSME() &&
+ assert(Subtarget.isSVEorStreamingSVEAvailable() &&
"Unexpected register store without SVE store instructions");
Opc = AArch64::STR_ZZXI;
StackID = TargetStackID::ScalableVector;
@@ -4906,7 +4924,7 @@ void AArch64InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
Opc = AArch64::ST1Threev2d;
Offset = false;
} else if (AArch64::ZPR3RegClass.hasSubClassEq(RC)) {
- assert(Subtarget.hasSVEorSME() &&
+ assert(Subtarget.isSVEorStreamingSVEAvailable() &&
"Unexpected register store without SVE store instructions");
Opc = AArch64::STR_ZZZXI;
StackID = TargetStackID::ScalableVector;
@@ -4919,7 +4937,7 @@ void AArch64InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
Offset = false;
} else if (AArch64::ZPR4RegClass.hasSubClassEq(RC) ||
AArch64::ZPR4StridedOrContiguousRegClass.hasSubClassEq(RC)) {
- assert(Subtarget.hasSVEorSME() &&
+ assert(Subtarget.isSVEorStreamingSVEAvailable() &&
"Unexpected register store without SVE store instructions");
Opc = AArch64::STR_ZZZZXI;
StackID = TargetStackID::ScalableVector;
@@ -4992,10 +5010,8 @@ void AArch64InstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
if (AArch64::FPR16RegClass.hasSubClassEq(RC))
Opc = AArch64::LDRHui;
else if (IsPNR || AArch64::PPRRegClass.hasSubClassEq(RC)) {
- assert(Subtarget.hasSVEorSME() &&
+ assert(Subtarget.isSVEorStreamingSVEAvailable() &&
"Unexpected register load without SVE load instructions");
- assert((!IsPNR || Subtarget.hasSVE2p1() || Subtarget.hasSME2()) &&
- "Unexpected register load without SVE2p1 or SME2");
if (IsPNR)
PNRReg = DestReg;
Opc = AArch64::LDR_PXI;
@@ -5046,7 +5062,7 @@ void AArch64InstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
AArch64::subo64, FI, MMO);
return;
} else if (AArch64::ZPRRegClass.hasSubClassEq(RC)) {
- assert(Subtarget.hasSVEorSME() &&
+ assert(Subtarget.isSVEorStreamingSVEAvailable() &&
"Unexpected register load without SVE load instructions");
Opc = AArch64::LDR_ZXI;
StackID = TargetStackID::ScalableVector;
@@ -5070,7 +5086,7 @@ void AArch64InstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
Offset = false;
} else if (AArch64::ZPR2RegClass.hasSubClassEq(RC) ||
AArch64::ZPR2StridedOrContiguousRegClass.hasSubClassEq(RC)) {
- assert(Subtarget.hasSVEorSME() &&
+ assert(Subtarget.isSVEorStreamingSVEAvailable() &&
"Unexpected register load without SVE load instructions");
Opc = AArch64::LDR_ZZXI;
StackID = TargetStackID::ScalableVector;
@@ -5082,7 +5098,7 @@ void AArch64InstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
Opc = AArch64::LD1Threev2d;
Offset = false;
} else if (AArch64::ZPR3RegClass.hasSubClassEq(RC)) {
- assert(Subtarget.hasSVEorSME() &&
+ assert(Subtarget.isSVEorStreamingSVEAvailable() &&
"Unexpected register load without SVE load instructions");
Opc = AArch64::LDR_ZZZXI;
StackID = TargetStackID::ScalableVector;
@@ -5095,7 +5111,7 @@ void AArch64InstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
Offset = false;
} else if (AArch64::ZPR4RegClass.hasSubClassEq(RC) ||
AArch64::ZPR4StridedOrContiguousRegClass.hasSubClassEq(RC)) {
- assert(Subtarget.hasSVEorSME() &&
+ assert(Subtarget.isSVEorStreamingSVEAvailable() &&
"Unexpected register load without SVE load instructions");
Opc = AArch64::LDR_ZZZZXI;
StackID = TargetStackID::ScalableVector;
@@ -8555,6 +8571,8 @@ AArch64InstrInfo::getOutliningCandidateInfo(
NumBytesNoStackCalls <= RepeatedSequenceLocs.size() * 12) {
RepeatedSequenceLocs = CandidatesWithoutStackFixups;
FrameID = MachineOutlinerNoLRSave;
+ if (RepeatedSequenceLocs.size() < 2)
+ return std::nullopt;
} else {
SetCandidateCallInfo(MachineOutlinerDefault, 12);
@@ -8700,6 +8718,13 @@ bool AArch64InstrInfo::isFunctionSafeToOutlineFrom(
if (!AFI || AFI->hasRedZone().value_or(true))
return false;
+ // FIXME: Determine whether it is safe to outline from functions which contain
+ // streaming-mode changes. We may need to ensure any smstart/smstop pairs are
+ // outlined together and ensure it is safe to outline with async unwind info,
+ // required for saving & restoring VG around calls.
+ if (AFI->hasStreamingModeChanges())
+ return false;
+
// FIXME: Teach the outliner to generate/handle Windows unwind info.
if (MF.getTarget().getMCAsmInfo()->usesWindowsCFI())
return false;
@@ -9582,18 +9607,49 @@ AArch64InstrInfo::probedStackAlloc(MachineBasicBlock::iterator MBBI,
namespace {
class AArch64PipelinerLoopInfo : public TargetInstrInfo::PipelinerLoopInfo {
- MachineInstr *PredBranch;
+ MachineFunction *MF;
+ const TargetInstrInfo *TII;
+ const TargetRegisterInfo *TRI;
+ MachineRegisterInfo &MRI;
+
+ /// The block of the loop
+ MachineBasicBlock *LoopBB;
+ /// The conditional branch of the loop
+ MachineInstr *CondBranch;
+ /// The compare instruction for loop control
+ MachineInstr *Comp;
+ /// The number of the operand of the loop counter value in Comp
+ unsigned CompCounterOprNum;
+ /// The instruction that updates the loop counter value
+ MachineInstr *Update;
+ /// The number of the operand of the loop counter value in Update
+ unsigned UpdateCounterOprNum;
+ /// The initial value of the loop counter
+ Register Init;
+ /// True iff Update is a predecessor of Comp
+ bool IsUpdatePriorComp;
+
+ /// The normalized condition used by createTripCountGreaterCondition()
SmallVector<MachineOperand, 4> Cond;
public:
- AArch64PipelinerLoopInfo(MachineInstr *PredBranch,
+ AArch64PipelinerLoopInfo(MachineBasicBlock *LoopBB, MachineInstr *CondBranch,
+ MachineInstr *Comp, unsigned CompCounterOprNum,
+ MachineInstr *Update, unsigned UpdateCounterOprNum,
+ Register Init, bool IsUpdatePriorComp,
const SmallVectorImpl<MachineOperand> &Cond)
- : PredBranch(PredBranch), Cond(Cond.begin(), Cond.end()) {}
+ : MF(Comp->getParent()->getParent()),
+ TII(MF->getSubtarget().getInstrInfo()),
+ TRI(MF->getSubtarget().getRegisterInfo()), MRI(MF->getRegInfo()),
+ LoopBB(LoopBB), CondBranch(CondBranch), Comp(Comp),
+ CompCounterOprNum(CompCounterOprNum), Update(Update),
+ UpdateCounterOprNum(UpdateCounterOprNum), Init(Init),
+ IsUpdatePriorComp(IsUpdatePriorComp), Cond(Cond.begin(), Cond.end()) {}
bool shouldIgnoreForPipelining(const MachineInstr *MI) const override {
// Make the instructions for loop control be placed in stage 0.
- // The predecessors of PredBranch are considered by the caller.
- return MI == PredBranch;
+ // The predecessors of Comp are considered by the caller.
+ return MI == Comp;
}
std::optional<bool> createTripCountGreaterCondition(
@@ -9606,31 +9662,277 @@ public:
return {};
}
+ void createRemainingIterationsGreaterCondition(
+ int TC, MachineBasicBlock &MBB, SmallVectorImpl<MachineOperand> &Cond,
+ DenseMap<MachineInstr *, MachineInstr *> &LastStage0Insts) override;
+
void setPreheader(MachineBasicBlock *NewPreheader) override {}
void adjustTripCount(int TripCountAdjust) override {}
void disposed() override {}
+ bool isMVEExpanderSupported() override { return true; }
};
} // namespace
-static bool isCompareAndBranch(unsigned Opcode) {
- switch (Opcode) {
- case AArch64::CBZW:
- case AArch64::CBZX:
- case AArch64::CBNZW:
- case AArch64::CBNZX:
- case AArch64::TBZW:
- case AArch64::TBZX:
- case AArch64::TBNZW:
- case AArch64::TBNZX:
- return true;
+/// Clone an instruction from MI. The register of ReplaceOprNum-th operand
+/// is replaced by ReplaceReg. The output register is newly created.
+/// The other operands are unchanged from MI.
+static Register cloneInstr(const MachineInstr *MI, unsigned ReplaceOprNum,
+ Register ReplaceReg, MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator InsertTo) {
+ MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
+ const TargetInstrInfo *TII = MBB.getParent()->getSubtarget().getInstrInfo();
+ const TargetRegisterInfo *TRI =
+ MBB.getParent()->getSubtarget().getRegisterInfo();
+ MachineInstr *NewMI = MBB.getParent()->CloneMachineInstr(MI);
+ Register Result = 0;
+ for (unsigned I = 0; I < NewMI->getNumOperands(); ++I) {
+ if (I == 0 && NewMI->getOperand(0).getReg().isVirtual()) {
+ Result = MRI.createVirtualRegister(
+ MRI.getRegClass(NewMI->getOperand(0).getReg()));
+ NewMI->getOperand(I).setReg(Result);
+ } else if (I == ReplaceOprNum) {
+ MRI.constrainRegClass(
+ ReplaceReg,
+ TII->getRegClass(NewMI->getDesc(), I, TRI, *MBB.getParent()));
+ NewMI->getOperand(I).setReg(ReplaceReg);
+ }
}
- return false;
+ MBB.insert(InsertTo, NewMI);
+ return Result;
+}
+
+void AArch64PipelinerLoopInfo::createRemainingIterationsGreaterCondition(
+ int TC, MachineBasicBlock &MBB, SmallVectorImpl<MachineOperand> &Cond,
+ DenseMap<MachineInstr *, MachineInstr *> &LastStage0Insts) {
+ // Create and accumulate conditions for next TC iterations.
+ // Example:
+ // SUBSXrr N, counter, implicit-def $nzcv # compare instruction for the last
+ // # iteration of the kernel
+ //
+ // # insert the following instructions
+ // cond = CSINCXr 0, 0, C, implicit $nzcv
+ // counter = ADDXri counter, 1 # clone from this->Update
+ // SUBSXrr n, counter, implicit-def $nzcv # clone from this->Comp
+ // cond = CSINCXr cond, cond, C, implicit $nzcv
+ // ... (repeat TC times)
+ // SUBSXri cond, 0, implicit-def $nzcv
+
+ assert(CondBranch->getOpcode() == AArch64::Bcc);
+ // CondCode to exit the loop
+ AArch64CC::CondCode CC =
+ (AArch64CC::CondCode)CondBranch->getOperand(0).getImm();
+ if (CondBranch->getOperand(1).getMBB() == LoopBB)
+ CC = AArch64CC::getInvertedCondCode(CC);
+
+ // Accumulate conditions to exit the loop
+ Register AccCond = AArch64::XZR;
+
+ // If CC holds, CurCond+1 is returned; otherwise CurCond is returned.
+ auto AccumulateCond = [&](Register CurCond,
+ AArch64CC::CondCode CC) -> Register {
+ Register NewCond = MRI.createVirtualRegister(&AArch64::GPR64commonRegClass);
+ BuildMI(MBB, MBB.end(), Comp->getDebugLoc(), TII->get(AArch64::CSINCXr))
+ .addReg(NewCond, RegState::Define)
+ .addReg(CurCond)
+ .addReg(CurCond)
+ .addImm(AArch64CC::getInvertedCondCode(CC));
+ return NewCond;
+ };
+
+ if (!LastStage0Insts.empty() && LastStage0Insts[Comp]->getParent() == &MBB) {
+ // Update and Comp for I==0 are already exists in MBB
+ // (MBB is an unrolled kernel)
+ Register Counter;
+ for (int I = 0; I <= TC; ++I) {
+ Register NextCounter;
+ if (I != 0)
+ NextCounter =
+ cloneInstr(Comp, CompCounterOprNum, Counter, MBB, MBB.end());
+
+ AccCond = AccumulateCond(AccCond, CC);
+
+ if (I != TC) {
+ if (I == 0) {
+ if (Update != Comp && IsUpdatePriorComp) {
+ Counter =
+ LastStage0Insts[Comp]->getOperand(CompCounterOprNum).getReg();
+ NextCounter = cloneInstr(Update, UpdateCounterOprNum, Counter, MBB,
+ MBB.end());
+ } else {
+ // can use already calculated value
+ NextCounter = LastStage0Insts[Update]->getOperand(0).getReg();
+ }
+ } else if (Update != Comp) {
+ NextCounter =
+ cloneInstr(Update, UpdateCounterOprNum, Counter, MBB, MBB.end());
+ }
+ }
+ Counter = NextCounter;
+ }
+ } else {
+ Register Counter;
+ if (LastStage0Insts.empty()) {
+ // use initial counter value (testing if the trip count is sufficient to
+ // be executed by pipelined code)
+ Counter = Init;
+ if (IsUpdatePriorComp)
+ Counter =
+ cloneInstr(Update, UpdateCounterOprNum, Counter, MBB, MBB.end());
+ } else {
+ // MBB is an epilogue block. LastStage0Insts[Comp] is in the kernel block.
+ Counter = LastStage0Insts[Comp]->getOperand(CompCounterOprNum).getReg();
+ }
+
+ for (int I = 0; I <= TC; ++I) {
+ Register NextCounter;
+ NextCounter =
+ cloneInstr(Comp, CompCounterOprNum, Counter, MBB, MBB.end());
+ AccCond = AccumulateCond(AccCond, CC);
+ if (I != TC && Update != Comp)
+ NextCounter =
+ cloneInstr(Update, UpdateCounterOprNum, Counter, MBB, MBB.end());
+ Counter = NextCounter;
+ }
+ }
+
+ // If AccCond == 0, the remainder is greater than TC.
+ BuildMI(MBB, MBB.end(), Comp->getDebugLoc(), TII->get(AArch64::SUBSXri))
+ .addReg(AArch64::XZR, RegState::Define | RegState::Dead)
+ .addReg(AccCond)
+ .addImm(0)
+ .addImm(0);
+ Cond.clear();
+ Cond.push_back(MachineOperand::CreateImm(AArch64CC::EQ));
+}
+
+static void extractPhiReg(const MachineInstr &Phi, const MachineBasicBlock *MBB,
+ Register &RegMBB, Register &RegOther) {
+ assert(Phi.getNumOperands() == 5);
+ if (Phi.getOperand(2).getMBB() == MBB) {
+ RegMBB = Phi.getOperand(1).getReg();
+ RegOther = Phi.getOperand(3).getReg();
+ } else {
+ assert(Phi.getOperand(4).getMBB() == MBB);
+ RegMBB = Phi.getOperand(3).getReg();
+ RegOther = Phi.getOperand(1).getReg();
+ }
+}
+
+static bool isDefinedOutside(Register Reg, const MachineBasicBlock *BB) {
+ if (!Reg.isVirtual())
+ return false;
+ const MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
+ return MRI.getVRegDef(Reg)->getParent() != BB;
+}
+
+/// If Reg is an induction variable, return true and set some parameters
+static bool getIndVarInfo(Register Reg, const MachineBasicBlock *LoopBB,
+ MachineInstr *&UpdateInst,
+ unsigned &UpdateCounterOprNum, Register &InitReg,
+ bool &IsUpdatePriorComp) {
+ // Example:
+ //
+ // Preheader:
+ // InitReg = ...
+ // LoopBB:
+ // Reg0 = PHI (InitReg, Preheader), (Reg1, LoopBB)
+ // Reg = COPY Reg0 ; COPY is ignored.
+ // Reg1 = ADD Reg, #1; UpdateInst. Incremented by a loop invariant value.
+ // ; Reg is the value calculated in the previous
+ // ; iteration, so IsUpdatePriorComp == false.
+
+ if (LoopBB->pred_size() != 2)
+ return false;
+ if (!Reg.isVirtual())
+ return false;
+ const MachineRegisterInfo &MRI = LoopBB->getParent()->getRegInfo();
+ UpdateInst = nullptr;
+ UpdateCounterOprNum = 0;
+ InitReg = 0;
+ IsUpdatePriorComp = true;
+ Register CurReg = Reg;
+ while (true) {
+ MachineInstr *Def = MRI.getVRegDef(CurReg);
+ if (Def->getParent() != LoopBB)
+ return false;
+ if (Def->isCopy()) {
+ // Ignore copy instructions unless they contain subregisters
+ if (Def->getOperand(0).getSubReg() || Def->getOperand(1).getSubReg())
+ return false;
+ CurReg = Def->getOperand(1).getReg();
+ } else if (Def->isPHI()) {
+ if (InitReg != 0)
+ return false;
+ if (!UpdateInst)
+ IsUpdatePriorComp = false;
+ extractPhiReg(*Def, LoopBB, CurReg, InitReg);
+ } else {
+ if (UpdateInst)
+ return false;
+ switch (Def->getOpcode()) {
+ case AArch64::ADDSXri:
+ case AArch64::ADDSWri:
+ case AArch64::SUBSXri:
+ case AArch64::SUBSWri:
+ case AArch64::ADDXri:
+ case AArch64::ADDWri:
+ case AArch64::SUBXri:
+ case AArch64::SUBWri:
+ UpdateInst = Def;
+ UpdateCounterOprNum = 1;
+ break;
+ case AArch64::ADDSXrr:
+ case AArch64::ADDSWrr:
+ case AArch64::SUBSXrr:
+ case AArch64::SUBSWrr:
+ case AArch64::ADDXrr:
+ case AArch64::ADDWrr:
+ case AArch64::SUBXrr:
+ case AArch64::SUBWrr:
+ UpdateInst = Def;
+ if (isDefinedOutside(Def->getOperand(2).getReg(), LoopBB))
+ UpdateCounterOprNum = 1;
+ else if (isDefinedOutside(Def->getOperand(1).getReg(), LoopBB))
+ UpdateCounterOprNum = 2;
+ else
+ return false;
+ break;
+ default:
+ return false;
+ }
+ CurReg = Def->getOperand(UpdateCounterOprNum).getReg();
+ }
+
+ if (!CurReg.isVirtual())
+ return false;
+ if (Reg == CurReg)
+ break;
+ }
+
+ if (!UpdateInst)
+ return false;
+
+ return true;
}
std::unique_ptr<TargetInstrInfo::PipelinerLoopInfo>
AArch64InstrInfo::analyzeLoopForPipelining(MachineBasicBlock *LoopBB) const {
+ // Accept loops that meet the following conditions
+ // * The conditional branch is BCC
+ // * The compare instruction is ADDS/SUBS/WHILEXX
+ // * One operand of the compare is an induction variable and the other is a
+ // loop invariant value
+ // * The induction variable is incremented/decremented by a single instruction
+ // * Does not contain CALL or instructions which have unmodeled side effects
+
+ for (MachineInstr &MI : *LoopBB)
+ if (MI.isCall() || MI.hasUnmodeledSideEffects())
+ // This instruction may use NZCV, which interferes with the instruction to
+ // be inserted for loop control.
+ return nullptr;
+
MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
SmallVector<MachineOperand, 4> Cond;
if (analyzeBranch(*LoopBB, TBB, FBB, Cond))
@@ -9641,48 +9943,76 @@ AArch64InstrInfo::analyzeLoopForPipelining(MachineBasicBlock *LoopBB) const {
return nullptr;
// Must be conditional branch
- if (FBB == nullptr)
+ if (TBB != LoopBB && FBB == nullptr)
return nullptr;
assert((TBB == LoopBB || FBB == LoopBB) &&
"The Loop must be a single-basic-block loop");
+ MachineInstr *CondBranch = &*LoopBB->getFirstTerminator();
+ const TargetRegisterInfo &TRI = getRegisterInfo();
+
+ if (CondBranch->getOpcode() != AArch64::Bcc)
+ return nullptr;
+
// Normalization for createTripCountGreaterCondition()
if (TBB == LoopBB)
reverseBranchCondition(Cond);
- MachineInstr *CondBranch = &*LoopBB->getFirstTerminator();
- const TargetRegisterInfo &TRI = getRegisterInfo();
-
- // Find the immediate predecessor of the conditional branch
- MachineInstr *PredBranch = nullptr;
- if (CondBranch->getOpcode() == AArch64::Bcc) {
- for (MachineInstr &MI : reverse(*LoopBB)) {
- if (MI.modifiesRegister(AArch64::NZCV, &TRI)) {
- PredBranch = &MI;
+ MachineInstr *Comp = nullptr;
+ unsigned CompCounterOprNum = 0;
+ for (MachineInstr &MI : reverse(*LoopBB)) {
+ if (MI.modifiesRegister(AArch64::NZCV, &TRI)) {
+ // Guarantee that the compare is SUBS/ADDS/WHILEXX and that one of the
+ // operands is a loop invariant value
+
+ switch (MI.getOpcode()) {
+ case AArch64::SUBSXri:
+ case AArch64::SUBSWri:
+ case AArch64::ADDSXri:
+ case AArch64::ADDSWri:
+ Comp = &MI;
+ CompCounterOprNum = 1;
break;
+ case AArch64::ADDSWrr:
+ case AArch64::ADDSXrr:
+ case AArch64::SUBSWrr:
+ case AArch64::SUBSXrr:
+ Comp = &MI;
+ break;
+ default:
+ if (isWhileOpcode(MI.getOpcode())) {
+ Comp = &MI;
+ break;
+ }
+ return nullptr;
}
- }
- if (!PredBranch)
- return nullptr;
- } else if (isCompareAndBranch(CondBranch->getOpcode())) {
- const MachineRegisterInfo &MRI = LoopBB->getParent()->getRegInfo();
- Register Reg = CondBranch->getOperand(0).getReg();
- if (!Reg.isVirtual())
- return nullptr;
- PredBranch = MRI.getVRegDef(Reg);
- // MachinePipeliner does not expect that the immediate predecessor is a Phi
- if (PredBranch->isPHI())
- return nullptr;
+ if (CompCounterOprNum == 0) {
+ if (isDefinedOutside(Comp->getOperand(1).getReg(), LoopBB))
+ CompCounterOprNum = 2;
+ else if (isDefinedOutside(Comp->getOperand(2).getReg(), LoopBB))
+ CompCounterOprNum = 1;
+ else
+ return nullptr;
+ }
+ break;
+ }
+ }
+ if (!Comp)
+ return nullptr;
- if (PredBranch->getParent() != LoopBB)
- return nullptr;
- } else {
+ MachineInstr *Update = nullptr;
+ Register Init;
+ bool IsUpdatePriorComp;
+ unsigned UpdateCounterOprNum;
+ if (!getIndVarInfo(Comp->getOperand(CompCounterOprNum).getReg(), LoopBB,
+ Update, UpdateCounterOprNum, Init, IsUpdatePriorComp))
return nullptr;
- }
- return std::make_unique<AArch64PipelinerLoopInfo>(PredBranch, Cond);
+ return std::make_unique<AArch64PipelinerLoopInfo>(
+ LoopBB, CondBranch, Comp, CompCounterOprNum, Update, UpdateCounterOprNum,
+ Init, IsUpdatePriorComp, Cond);
}
#define GET_INSTRINFO_HELPERS