diff options
Diffstat (limited to 'llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp')
| -rw-r--r-- | llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp | 71 |
1 files changed, 65 insertions, 6 deletions
diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp index ae0f304ea304..22488384759b 100644 --- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp @@ -3273,6 +3273,10 @@ StringRef SIRegisterInfo::getRegAsmName(MCRegister Reg) const { return AMDGPUInstPrinter::getRegisterName(Reg); } +unsigned SIRegisterInfo::getHWRegIndex(MCRegister Reg) const { + return getEncodingValue(Reg) & AMDGPU::HWEncoding::REG_IDX_MASK; +} + unsigned AMDGPU::getRegBitWidth(const TargetRegisterClass &RC) { return getRegBitWidth(RC.getID()); } @@ -3353,6 +3357,40 @@ SIRegisterInfo::getVGPRClassForBitWidth(unsigned BitWidth) const { : getAnyVGPRClassForBitWidth(BitWidth); } +const TargetRegisterClass * +SIRegisterInfo::getAlignedLo256VGPRClassForBitWidth(unsigned BitWidth) const { + if (BitWidth <= 32) + return &AMDGPU::VGPR_32_Lo256RegClass; + if (BitWidth <= 64) + return &AMDGPU::VReg_64_Lo256_Align2RegClass; + if (BitWidth <= 96) + return &AMDGPU::VReg_96_Lo256_Align2RegClass; + if (BitWidth <= 128) + return &AMDGPU::VReg_128_Lo256_Align2RegClass; + if (BitWidth <= 160) + return &AMDGPU::VReg_160_Lo256_Align2RegClass; + if (BitWidth <= 192) + return &AMDGPU::VReg_192_Lo256_Align2RegClass; + if (BitWidth <= 224) + return &AMDGPU::VReg_224_Lo256_Align2RegClass; + if (BitWidth <= 256) + return &AMDGPU::VReg_256_Lo256_Align2RegClass; + if (BitWidth <= 288) + return &AMDGPU::VReg_288_Lo256_Align2RegClass; + if (BitWidth <= 320) + return &AMDGPU::VReg_320_Lo256_Align2RegClass; + if (BitWidth <= 352) + return &AMDGPU::VReg_352_Lo256_Align2RegClass; + if (BitWidth <= 384) + return &AMDGPU::VReg_384_Lo256_Align2RegClass; + if (BitWidth <= 512) + return &AMDGPU::VReg_512_Lo256_Align2RegClass; + if (BitWidth <= 1024) + return &AMDGPU::VReg_1024_Lo256_Align2RegClass; + + return nullptr; +} + static const TargetRegisterClass * getAnyAGPRClassForBitWidth(unsigned BitWidth) { if (BitWidth == 64) @@ -3547,7 +3585,17 @@ bool SIRegisterInfo::isSGPRReg(const MachineRegisterInfo &MRI, const TargetRegisterClass * SIRegisterInfo::getEquivalentVGPRClass(const TargetRegisterClass *SRC) const { unsigned Size = getRegSizeInBits(*SRC); - const TargetRegisterClass *VRC = getVGPRClassForBitWidth(Size); + + switch (SRC->getID()) { + default: + break; + case AMDGPU::VS_32_Lo256RegClassID: + case AMDGPU::VS_64_Lo256RegClassID: + return getAllocatableClass(getAlignedLo256VGPRClassForBitWidth(Size)); + } + + const TargetRegisterClass *VRC = + getAllocatableClass(getVGPRClassForBitWidth(Size)); assert(VRC && "Invalid register class size"); return VRC; } @@ -3708,14 +3756,15 @@ unsigned SIRegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC, unsigned SIRegisterInfo::getRegPressureSetLimit(const MachineFunction &MF, unsigned Idx) const { - if (Idx == AMDGPU::RegisterPressureSets::VGPR_32 || - Idx == AMDGPU::RegisterPressureSets::AGPR_32) + switch (static_cast<AMDGPU::RegisterPressureSets>(Idx)) { + case AMDGPU::RegisterPressureSets::VGPR_32: + case AMDGPU::RegisterPressureSets::AGPR_32: return getRegPressureLimit(&AMDGPU::VGPR_32RegClass, const_cast<MachineFunction &>(MF)); - - if (Idx == AMDGPU::RegisterPressureSets::SReg_32) + case AMDGPU::RegisterPressureSets::SReg_32: return getRegPressureLimit(&AMDGPU::SGPR_32RegClass, const_cast<MachineFunction &>(MF)); + } llvm_unreachable("Unexpected register pressure set!"); } @@ -3944,6 +3993,8 @@ bool SIRegisterInfo::isProperlyAlignedRC(const TargetRegisterClass &RC) const { return RC.hasSuperClassEq( getVectorSuperClassForBitWidth(getRegSizeInBits(RC))); + assert(&RC != &AMDGPU::VS_64RegClass); + return true; } @@ -3956,6 +4007,9 @@ SIRegisterInfo::getProperlyAlignedRC(const TargetRegisterClass *RC) const { if (Size <= 32) return RC; + if (RC == &AMDGPU::VS_64RegClass) + return &AMDGPU::VS_64_Align2RegClass; + if (isVGPRClass(RC)) return getAlignedVGPRClassForBitWidth(Size); if (isAGPRClass(RC)) @@ -4000,7 +4054,12 @@ SIRegisterInfo::getSubRegAlignmentNumBits(const TargetRegisterClass *RC, unsigned SIRegisterInfo::getNumUsedPhysRegs(const MachineRegisterInfo &MRI, const TargetRegisterClass &RC, bool IncludeCalls) const { - for (MCPhysReg Reg : reverse(RC.getRegisters())) + unsigned NumArchVGPRs = ST.has1024AddressableVGPRs() ? 1024 : 256; + ArrayRef<MCPhysReg> Registers = + (RC.getID() == AMDGPU::VGPR_32RegClassID) + ? RC.getRegisters().take_front(NumArchVGPRs) + : RC.getRegisters(); + for (MCPhysReg Reg : reverse(Registers)) if (MRI.isPhysRegUsed(Reg, /*SkipRegMaskTest=*/!IncludeCalls)) return getHWRegIndex(Reg) + 1; return 0; |
