summaryrefslogtreecommitdiff
path: root/llvm/lib/Target/AMDGPU/AMDGPURewriteAGPRCopyMFMA.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Target/AMDGPU/AMDGPURewriteAGPRCopyMFMA.cpp')
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPURewriteAGPRCopyMFMA.cpp305
1 files changed, 190 insertions, 115 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURewriteAGPRCopyMFMA.cpp b/llvm/lib/Target/AMDGPU/AMDGPURewriteAGPRCopyMFMA.cpp
index 8b1d4ba68a44..21cf9cc6878f 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURewriteAGPRCopyMFMA.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURewriteAGPRCopyMFMA.cpp
@@ -14,6 +14,10 @@
/// MFMA opcode.
///
/// TODO:
+/// - Handle rewrites of phis. This must be more careful than normal about the
+/// reassignment. We do not want to introduce an AGPR-to-AGPR copy inside of a
+/// loop, so it depends on the exact assignment of the copy.
+///
/// - Update LiveIntervals incrementally instead of recomputing from scratch
///
//===----------------------------------------------------------------------===//
@@ -22,6 +26,7 @@
#include "GCNSubtarget.h"
#include "SIMachineFunctionInfo.h"
#include "SIRegisterInfo.h"
+#include "llvm/ADT/Statistic.h"
#include "llvm/CodeGen/LiveIntervals.h"
#include "llvm/CodeGen/LiveRegMatrix.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
@@ -34,6 +39,9 @@ using namespace llvm;
namespace {
+STATISTIC(NumMFMAsRewrittenToAGPR,
+ "Number of MFMA instructions rewritten to use AGPR form");
+
class AMDGPURewriteAGPRCopyMFMAImpl {
MachineFunction &MF;
const GCNSubtarget &ST;
@@ -60,6 +68,25 @@ public:
return TII.isMAI(MI) && AMDGPU::getMFMASrcCVDstAGPROp(MI.getOpcode()) != -1;
}
+ /// Find AV_* registers assigned to AGPRs (or virtual registers which were
+ /// already required to be AGPR).
+ ///
+ /// \return the assigned physical register that \p VReg is assigned to if it
+ /// is an AGPR, otherwise MCRegister().
+ MCRegister getAssignedAGPR(Register VReg) const {
+ MCRegister PhysReg = VRM.getPhys(VReg);
+ if (!PhysReg)
+ return MCRegister();
+
+ // If this is an AV register, we have to check if the actual assignment is
+ // to an AGPR
+ const TargetRegisterClass *AssignedRC = TRI.getPhysRegBaseClass(PhysReg);
+ return TRI.isAGPRClass(AssignedRC) ? PhysReg : MCRegister();
+ }
+
+ bool tryReassigningMFMAChain(MachineInstr &MFMA, Register MFMAHintReg,
+ MCPhysReg PhysRegHint) const;
+
/// Compute the register class constraints based on the uses of \p Reg,
/// excluding MFMA uses from which can be rewritten to change the register
/// class constraint. This should be nearly identical to
@@ -74,6 +101,8 @@ public:
Register Reg, SmallVectorImpl<MachineInstr *> &RewriteCandidates,
SmallSetVector<Register, 4> &RewriteRegs) const;
+ bool tryFoldCopiesToAGPR(Register VReg, MCRegister AssignedAGPR) const;
+ bool tryFoldCopiesFromAGPR(Register VReg, MCRegister AssignedAGPR) const;
bool run(MachineFunction &MF) const;
};
@@ -154,6 +183,88 @@ bool AMDGPURewriteAGPRCopyMFMAImpl::recomputeRegClassExceptRewritable(
return true;
}
+bool AMDGPURewriteAGPRCopyMFMAImpl::tryReassigningMFMAChain(
+ MachineInstr &MFMA, Register MFMAHintReg, MCPhysReg PhysRegHint) const {
+ // src2 and dst have the same physical class constraint; try to preserve
+ // the original src2 subclass if one were to exist.
+ SmallVector<MachineInstr *, 4> RewriteCandidates = {&MFMA};
+ SmallSetVector<Register, 4> RewriteRegs;
+
+ // Make sure we reassign the MFMA we found the copy from first. We want
+ // to ensure dst ends up in the physreg we were originally copying to.
+ RewriteRegs.insert(MFMAHintReg);
+
+ // We've found av = COPY (MFMA) (or MFMA (v = COPY av)) and need to verify
+ // that we can trivially rewrite src2 to use the new AGPR. If we can't
+ // trivially replace it, we're going to induce as many copies as we would have
+ // emitted in the first place, as well as need to assign another register, and
+ // need to figure out where to put them. The live range splitting is smarter
+ // than anything we're doing here, so trust it did something reasonable.
+ //
+ // Note recomputeRegClassExceptRewritable will consider the constraints of
+ // this MFMA's src2 as well as the src2/dst of any transitive MFMA users.
+ if (!recomputeRegClassExceptRewritable(MFMAHintReg, RewriteCandidates,
+ RewriteRegs)) {
+ LLVM_DEBUG(dbgs() << "Could not recompute the regclass of dst reg "
+ << printReg(MFMAHintReg, &TRI) << '\n');
+ return false;
+ }
+
+ // If src2 and dst are different registers, we need to also reassign the
+ // input to an available AGPR if it is compatible with all other uses.
+ //
+ // If we can't reassign it, we'd need to introduce a different copy
+ // which is likely worse than the copy we'd be saving.
+ //
+ // It's likely that the MFMA is used in sequence with other MFMAs; if we
+ // cannot migrate the full use/def chain of MFMAs, we would need to
+ // introduce intermediate copies somewhere. So we only make the
+ // transform if all the interfering MFMAs can also be migrated. Collect
+ // the set of rewritable MFMAs and check if we can assign an AGPR at
+ // that point.
+ //
+ // If any of the MFMAs aren't reassignable, we give up and rollback to
+ // the original register assignments.
+
+ using RecoloringStack =
+ SmallVector<std::pair<const LiveInterval *, MCRegister>, 8>;
+ RecoloringStack TentativeReassignments;
+
+ for (Register RewriteReg : RewriteRegs) {
+ LiveInterval &LI = LIS.getInterval(RewriteReg);
+ TentativeReassignments.push_back({&LI, VRM.getPhys(RewriteReg)});
+ LRM.unassign(LI);
+ }
+
+ if (!attemptReassignmentsToAGPR(RewriteRegs, PhysRegHint)) {
+ // Roll back the register assignments to the original state.
+ for (auto [LI, OldAssign] : TentativeReassignments) {
+ if (VRM.hasPhys(LI->reg()))
+ LRM.unassign(*LI);
+ LRM.assign(*LI, OldAssign);
+ }
+
+ return false;
+ }
+
+ // Fixup the register classes of the virtual registers now that we've
+ // committed to the reassignments.
+ for (Register InterferingReg : RewriteRegs) {
+ const TargetRegisterClass *EquivalentAGPRRegClass =
+ TRI.getEquivalentAGPRClass(MRI.getRegClass(InterferingReg));
+ MRI.setRegClass(InterferingReg, EquivalentAGPRRegClass);
+ }
+
+ for (MachineInstr *RewriteCandidate : RewriteCandidates) {
+ int NewMFMAOp =
+ AMDGPU::getMFMASrcCVDstAGPROp(RewriteCandidate->getOpcode());
+ RewriteCandidate->setDesc(TII.get(NewMFMAOp));
+ ++NumMFMAsRewrittenToAGPR;
+ }
+
+ return true;
+}
+
/// Attempt to reassign the registers in \p InterferingRegs to be AGPRs, with a
/// preference to use \p PhysReg first. Returns false if the reassignments
/// cannot be trivially performed.
@@ -206,140 +317,104 @@ bool AMDGPURewriteAGPRCopyMFMAImpl::attemptReassignmentsToAGPR(
return true;
}
-bool AMDGPURewriteAGPRCopyMFMAImpl::run(MachineFunction &MF) const {
- // This only applies on subtargets that have a configurable AGPR vs. VGPR
- // allocation.
- if (!ST.hasGFX90AInsts())
- return false;
-
- // Early exit if no AGPRs were assigned.
- if (!LRM.isPhysRegUsed(AMDGPU::AGPR0)) {
- LLVM_DEBUG(dbgs() << "skipping function that did not allocate AGPRs\n");
- return false;
- }
-
+/// Identify copies that look like:
+/// %vdst:vgpr = V_MFMA_.. %src0:av, %src1:av, %src2:vgpr
+/// %agpr = COPY %vgpr
+///
+/// Then try to replace the transitive uses of %src2 and %vdst with the AGPR
+/// versions of the MFMA. This should cover the common case.
+bool AMDGPURewriteAGPRCopyMFMAImpl::tryFoldCopiesToAGPR(
+ Register VReg, MCRegister AssignedAGPR) const {
bool MadeChange = false;
-
- for (unsigned I = 0, E = MRI.getNumVirtRegs(); I != E; ++I) {
- Register VReg = Register::index2VirtReg(I);
- Register PhysReg = VRM.getPhys(VReg);
- if (!PhysReg)
+ for (MachineInstr &UseMI : MRI.def_instructions(VReg)) {
+ if (!UseMI.isCopy())
continue;
- // Find AV_* registers assigned to AGPRs.
- const TargetRegisterClass *VirtRegRC = MRI.getRegClass(VReg);
- if (!TRI.hasAGPRs(VirtRegRC))
+ Register CopySrcReg = UseMI.getOperand(1).getReg();
+ if (!CopySrcReg.isVirtual())
continue;
- const TargetRegisterClass *AssignedRC = VirtRegRC;
- if (TRI.hasVGPRs(VirtRegRC)) {
- // If this is an AV register, we have to check if the actual assignment is
- // to an AGPR
- AssignedRC = TRI.getPhysRegBaseClass(PhysReg);
- if (!TRI.isAGPRClass(AssignedRC))
- continue;
+ // TODO: Handle loop phis copied to AGPR. e.g.
+ //
+ // loop:
+ // %phi:vgpr = COPY %mfma:vgpr
+ // %mfma:vgpr = V_MFMA_xxx_vgprcd_e64 %a, %b, %phi
+ // s_cbranch_vccnz loop
+ //
+ // endloop:
+ // %agpr = mfma
+ //
+ // We need to be sure that %phi is assigned to the same physical register as
+ // %mfma, or else we will just be moving copies into the loop.
+
+ for (MachineInstr &CopySrcDefMI : MRI.def_instructions(CopySrcReg)) {
+ if (isRewriteCandidate(CopySrcDefMI) &&
+ tryReassigningMFMAChain(
+ CopySrcDefMI, CopySrcDefMI.getOperand(0).getReg(), AssignedAGPR))
+ MadeChange = true;
}
+ }
- LiveInterval &LI = LIS.getInterval(VReg);
-
- for (VNInfo *VNI : LI.vnis()) {
- if (VNI->isPHIDef() || VNI->isUnused())
- continue;
-
- MachineInstr *DefMI = LIS.getInstructionFromIndex(VNI->def);
- if (!DefMI || !DefMI->isCopy())
- continue;
+ return MadeChange;
+}
- Register MFMADstReg = DefMI->getOperand(1).getReg();
- if (!MFMADstReg.isVirtual())
- continue;
+/// Identify copies that look like:
+/// %src:vgpr = COPY %src:agpr
+/// %vdst:vgpr = V_MFMA_... %src0:av, %src1:av, %src:vgpr
+///
+/// Then try to replace the transitive uses of %src2 and %vdst with the AGPR
+/// versions of the MFMA. This should cover rarer cases, and will generally be
+/// redundant with tryFoldCopiesToAGPR.
+bool AMDGPURewriteAGPRCopyMFMAImpl::tryFoldCopiesFromAGPR(
+ Register VReg, MCRegister AssignedAGPR) const {
+ bool MadeChange = false;
+ for (MachineInstr &UseMI : MRI.use_instructions(VReg)) {
+ if (!UseMI.isCopy())
+ continue;
- LiveInterval &CopySrcLI = LIS.getInterval(MFMADstReg);
- LiveQueryResult LRQ = CopySrcLI.Query(VNI->def.getRegSlot());
- MachineInstr *MFMA = LIS.getInstructionFromIndex(LRQ.valueIn()->def);
- if (!MFMA || !isRewriteCandidate(*MFMA))
+ Register CopyDstReg = UseMI.getOperand(0).getReg();
+ if (!CopyDstReg.isVirtual())
+ continue;
+ for (MachineOperand &CopyUseMO : MRI.reg_nodbg_operands(CopyDstReg)) {
+ if (!CopyUseMO.readsReg())
continue;
- // src2 and dst have the same physical class constraint; try to preserve
- // the original src2 subclass if one were to exist.
- SmallVector<MachineInstr *, 4> RewriteCandidates = {MFMA};
- SmallSetVector<Register, 4> RewriteRegs;
-
- // Make sure we reassign the MFMA we found the copy from first. We want
- // to ensure dst ends up in the physreg we were originally copying to.
- RewriteRegs.insert(MFMADstReg);
-
- // We've found av = COPY (MFMA), and need to verify that we can trivially
- // rewrite src2 to use the new AGPR. If we can't trivially replace it,
- // we're going to induce as many copies as we would have emitted in the
- // first place, as well as need to assign another register, and need to
- // figure out where to put them. The live range splitting is smarter than
- // anything we're doing here, so trust it did something reasonable.
- //
- // Note recomputeRegClassExceptRewritable will consider the constraints of
- // this MFMA's src2 as well as the src2/dst of any transitive MFMA users.
- if (!recomputeRegClassExceptRewritable(MFMADstReg, RewriteCandidates,
- RewriteRegs)) {
- LLVM_DEBUG(dbgs() << "Could not recompute the regclass of dst reg "
- << printReg(MFMADstReg, &TRI) << '\n');
- continue;
+ MachineInstr &CopyUseMI = *CopyUseMO.getParent();
+ if (isRewriteCandidate(CopyUseMI)) {
+ if (tryReassigningMFMAChain(CopyUseMI, CopyDstReg,
+ VRM.getPhys(CopyDstReg)))
+ MadeChange = true;
}
+ }
+ }
- // If src2 and dst are different registers, we need to also reassign the
- // input to an available AGPR if it is compatible with all other uses.
- //
- // If we can't reassign it, we'd need to introduce a different copy
- // which is likely worse than the copy we'd be saving.
- //
- // It's likely that the MFMA is used in sequence with other MFMAs; if we
- // cannot migrate the full use/def chain of MFMAs, we would need to
- // introduce intermediate copies somewhere. So we only make the
- // transform if all the interfering MFMAs can also be migrated. Collect
- // the set of rewritable MFMAs and check if we can assign an AGPR at
- // that point.
- //
- // If any of the MFMAs aren't reassignable, we give up and rollback to
- // the original register assignments.
-
- using RecoloringStack =
- SmallVector<std::pair<const LiveInterval *, MCRegister>, 8>;
- RecoloringStack TentativeReassignments;
-
- for (Register RewriteReg : RewriteRegs) {
- LiveInterval &LI = LIS.getInterval(RewriteReg);
- TentativeReassignments.push_back({&LI, VRM.getPhys(RewriteReg)});
- LRM.unassign(LI);
- }
+ return MadeChange;
+}
- if (!attemptReassignmentsToAGPR(RewriteRegs, PhysReg)) {
- // Roll back the register assignments to the original state.
- for (auto [LI, OldAssign] : TentativeReassignments) {
- if (VRM.hasPhys(LI->reg()))
- LRM.unassign(*LI);
- LRM.assign(*LI, OldAssign);
- }
+bool AMDGPURewriteAGPRCopyMFMAImpl::run(MachineFunction &MF) const {
+ // This only applies on subtargets that have a configurable AGPR vs. VGPR
+ // allocation.
+ if (!ST.hasGFX90AInsts())
+ return false;
- continue;
- }
+ // Early exit if no AGPRs were assigned.
+ if (!LRM.isPhysRegUsed(AMDGPU::AGPR0)) {
+ LLVM_DEBUG(dbgs() << "skipping function that did not allocate AGPRs\n");
+ return false;
+ }
- // Fixup the register classes of the virtual registers now that we've
- // committed to the reassignments.
- for (Register InterferingReg : RewriteRegs) {
- const TargetRegisterClass *EquivalentAGPRRegClass =
- TRI.getEquivalentAGPRClass(MRI.getRegClass(InterferingReg));
- MRI.setRegClass(InterferingReg, EquivalentAGPRRegClass);
- }
+ bool MadeChange = false;
- for (MachineInstr *RewriteCandidate : RewriteCandidates) {
- int NewMFMAOp =
- AMDGPU::getMFMASrcCVDstAGPROp(RewriteCandidate->getOpcode());
- RewriteCandidate->setDesc(TII.get(NewMFMAOp));
- }
+ for (unsigned I = 0, E = MRI.getNumVirtRegs(); I != E; ++I) {
+ Register VReg = Register::index2VirtReg(I);
+ MCRegister AssignedAGPR = getAssignedAGPR(VReg);
+ if (!AssignedAGPR)
+ continue;
- // We likely left an identity copy behind after assignment; let
- // VirtRegRewriter deal with it later.
+ if (tryFoldCopiesToAGPR(VReg, AssignedAGPR))
+ MadeChange = true;
+ if (tryFoldCopiesFromAGPR(VReg, AssignedAGPR))
MadeChange = true;
- }
}
return MadeChange;