summaryrefslogtreecommitdiff
path: root/llvm/lib/Target/AMDGPU/Utils
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Target/AMDGPU/Utils')
-rw-r--r--llvm/lib/Target/AMDGPU/Utils/AMDGPUAsmUtils.cpp2
-rw-r--r--llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp145
-rw-r--r--llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h29
-rw-r--r--llvm/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.cpp11
-rw-r--r--llvm/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.h1
5 files changed, 178 insertions, 10 deletions
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUAsmUtils.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUAsmUtils.cpp
index c740b5e0f09d..14ebbf8e9c92 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUAsmUtils.cpp
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUAsmUtils.cpp
@@ -116,6 +116,8 @@ static constexpr CustomOperand MsgOperands[] = {
{{"MSG_RTN_GET_TBA"}, ID_RTN_GET_TBA, isGFX11Plus},
{{"MSG_RTN_GET_TBA_TO_PC"}, ID_RTN_GET_TBA_TO_PC, isGFX11Plus},
{{"MSG_RTN_GET_SE_AID_ID"}, ID_RTN_GET_SE_AID_ID, isGFX12Plus},
+ {{"MSG_RTN_GET_CLUSTER_BARRIER_STATE"}, ID_RTN_GET_CLUSTER_BARRIER_STATE,
+ isGFX1250},
};
static constexpr CustomOperand SysMsgOperands[] = {
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
index 18ee9c16b3ff..9f4f42185d9a 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
@@ -934,6 +934,10 @@ std::optional<unsigned> InstInfo::getInvalidCompOperandIndex(
if (!OpXRegs[CompOprIdx] || !OpYRegs[CompOprIdx])
continue;
+ if (getVGPREncodingMSBs(OpXRegs[CompOprIdx], MRI) !=
+ getVGPREncodingMSBs(OpYRegs[CompOprIdx], MRI))
+ return CompOprIdx;
+
if (SkipSrc && CompOprIdx >= Component::DST_NUM)
continue;
@@ -1376,6 +1380,9 @@ unsigned getVGPREncodingGranule(const MCSubtargetInfo *STI,
? *EnableWavefrontSize32
: STI->getFeatureBits().test(FeatureWavefrontSize32);
+ if (STI->getFeatureBits().test(Feature1024AddressableVGPRs))
+ return IsWave32 ? 16 : 8;
+
return IsWave32 ? 8 : 4;
}
@@ -1396,7 +1403,10 @@ unsigned getAddressableNumArchVGPRs(const MCSubtargetInfo *STI) { return 256; }
unsigned getAddressableNumVGPRs(const MCSubtargetInfo *STI,
unsigned DynamicVGPRBlockSize) {
- if (STI->getFeatureBits().test(FeatureGFX90AInsts))
+ const auto &Features = STI->getFeatureBits();
+ if (Features.test(FeatureGFX1250Insts))
+ return Features.test(FeatureWavefrontSize32) ? 1024 : 512;
+ if (Features.test(FeatureGFX90AInsts))
return 512;
// Temporarily check the subtarget feature, until we fully switch to using
@@ -2720,13 +2730,6 @@ bool isInlineValue(unsigned Reg) {
#undef CASE_GFXPRE11_GFX11PLUS_TO
#undef MAP_REG2REG
-bool isSISrcOperand(const MCInstrDesc &Desc, unsigned OpNo) {
- assert(OpNo < Desc.NumOperands);
- unsigned OpType = Desc.operands()[OpNo].OperandType;
- return OpType >= AMDGPU::OPERAND_SRC_FIRST &&
- OpType <= AMDGPU::OPERAND_SRC_LAST;
-}
-
bool isKImmOperand(const MCInstrDesc &Desc, unsigned OpNo) {
assert(OpNo < Desc.NumOperands);
unsigned OpType = Desc.operands()[OpNo].OperandType;
@@ -2776,6 +2779,7 @@ unsigned getRegBitWidth(unsigned RCID) {
return 16;
case AMDGPU::SGPR_32RegClassID:
case AMDGPU::VGPR_32RegClassID:
+ case AMDGPU::VGPR_32_Lo256RegClassID:
case AMDGPU::VRegOrLds_32RegClassID:
case AMDGPU::AGPR_32RegClassID:
case AMDGPU::VS_32RegClassID:
@@ -2794,6 +2798,8 @@ unsigned getRegBitWidth(unsigned RCID) {
case AMDGPU::AReg_64_Align2RegClassID:
case AMDGPU::AV_64RegClassID:
case AMDGPU::AV_64_Align2RegClassID:
+ case AMDGPU::VReg_64_Lo256_Align2RegClassID:
+ case AMDGPU::VS_64_Lo256RegClassID:
return 64;
case AMDGPU::SGPR_96RegClassID:
case AMDGPU::SReg_96RegClassID:
@@ -2803,6 +2809,7 @@ unsigned getRegBitWidth(unsigned RCID) {
case AMDGPU::AReg_96_Align2RegClassID:
case AMDGPU::AV_96RegClassID:
case AMDGPU::AV_96_Align2RegClassID:
+ case AMDGPU::VReg_96_Lo256_Align2RegClassID:
return 96;
case AMDGPU::SGPR_128RegClassID:
case AMDGPU::SReg_128RegClassID:
@@ -2813,6 +2820,7 @@ unsigned getRegBitWidth(unsigned RCID) {
case AMDGPU::AV_128RegClassID:
case AMDGPU::AV_128_Align2RegClassID:
case AMDGPU::SReg_128_XNULLRegClassID:
+ case AMDGPU::VReg_128_Lo256_Align2RegClassID:
return 128;
case AMDGPU::SGPR_160RegClassID:
case AMDGPU::SReg_160RegClassID:
@@ -2822,6 +2830,7 @@ unsigned getRegBitWidth(unsigned RCID) {
case AMDGPU::AReg_160_Align2RegClassID:
case AMDGPU::AV_160RegClassID:
case AMDGPU::AV_160_Align2RegClassID:
+ case AMDGPU::VReg_160_Lo256_Align2RegClassID:
return 160;
case AMDGPU::SGPR_192RegClassID:
case AMDGPU::SReg_192RegClassID:
@@ -2831,6 +2840,7 @@ unsigned getRegBitWidth(unsigned RCID) {
case AMDGPU::AReg_192_Align2RegClassID:
case AMDGPU::AV_192RegClassID:
case AMDGPU::AV_192_Align2RegClassID:
+ case AMDGPU::VReg_192_Lo256_Align2RegClassID:
return 192;
case AMDGPU::SGPR_224RegClassID:
case AMDGPU::SReg_224RegClassID:
@@ -2840,6 +2850,7 @@ unsigned getRegBitWidth(unsigned RCID) {
case AMDGPU::AReg_224_Align2RegClassID:
case AMDGPU::AV_224RegClassID:
case AMDGPU::AV_224_Align2RegClassID:
+ case AMDGPU::VReg_224_Lo256_Align2RegClassID:
return 224;
case AMDGPU::SGPR_256RegClassID:
case AMDGPU::SReg_256RegClassID:
@@ -2850,6 +2861,7 @@ unsigned getRegBitWidth(unsigned RCID) {
case AMDGPU::AV_256RegClassID:
case AMDGPU::AV_256_Align2RegClassID:
case AMDGPU::SReg_256_XNULLRegClassID:
+ case AMDGPU::VReg_256_Lo256_Align2RegClassID:
return 256;
case AMDGPU::SGPR_288RegClassID:
case AMDGPU::SReg_288RegClassID:
@@ -2859,6 +2871,7 @@ unsigned getRegBitWidth(unsigned RCID) {
case AMDGPU::AReg_288_Align2RegClassID:
case AMDGPU::AV_288RegClassID:
case AMDGPU::AV_288_Align2RegClassID:
+ case AMDGPU::VReg_288_Lo256_Align2RegClassID:
return 288;
case AMDGPU::SGPR_320RegClassID:
case AMDGPU::SReg_320RegClassID:
@@ -2868,6 +2881,7 @@ unsigned getRegBitWidth(unsigned RCID) {
case AMDGPU::AReg_320_Align2RegClassID:
case AMDGPU::AV_320RegClassID:
case AMDGPU::AV_320_Align2RegClassID:
+ case AMDGPU::VReg_320_Lo256_Align2RegClassID:
return 320;
case AMDGPU::SGPR_352RegClassID:
case AMDGPU::SReg_352RegClassID:
@@ -2877,6 +2891,7 @@ unsigned getRegBitWidth(unsigned RCID) {
case AMDGPU::AReg_352_Align2RegClassID:
case AMDGPU::AV_352RegClassID:
case AMDGPU::AV_352_Align2RegClassID:
+ case AMDGPU::VReg_352_Lo256_Align2RegClassID:
return 352;
case AMDGPU::SGPR_384RegClassID:
case AMDGPU::SReg_384RegClassID:
@@ -2886,6 +2901,7 @@ unsigned getRegBitWidth(unsigned RCID) {
case AMDGPU::AReg_384_Align2RegClassID:
case AMDGPU::AV_384RegClassID:
case AMDGPU::AV_384_Align2RegClassID:
+ case AMDGPU::VReg_384_Lo256_Align2RegClassID:
return 384;
case AMDGPU::SGPR_512RegClassID:
case AMDGPU::SReg_512RegClassID:
@@ -2895,6 +2911,7 @@ unsigned getRegBitWidth(unsigned RCID) {
case AMDGPU::AReg_512_Align2RegClassID:
case AMDGPU::AV_512RegClassID:
case AMDGPU::AV_512_Align2RegClassID:
+ case AMDGPU::VReg_512_Lo256_Align2RegClassID:
return 512;
case AMDGPU::SGPR_1024RegClassID:
case AMDGPU::SReg_1024RegClassID:
@@ -2904,6 +2921,7 @@ unsigned getRegBitWidth(unsigned RCID) {
case AMDGPU::AReg_1024_Align2RegClassID:
case AMDGPU::AV_1024RegClassID:
case AMDGPU::AV_1024_Align2RegClassID:
+ case AMDGPU::VReg_1024_Lo256_Align2RegClassID:
return 1024;
default:
llvm_unreachable("Unexpected register class");
@@ -3206,8 +3224,11 @@ bool isLegalSMRDEncodedUnsignedOffset(const MCSubtargetInfo &ST,
bool isLegalSMRDEncodedSignedOffset(const MCSubtargetInfo &ST,
int64_t EncodedOffset, bool IsBuffer) {
- if (isGFX12Plus(ST))
+ if (isGFX12Plus(ST)) {
+ if (IsBuffer && EncodedOffset < 0)
+ return false;
return isInt<24>(EncodedOffset);
+ }
return !IsBuffer && hasSMRDSignedImmOffset(ST) && isInt<21>(EncodedOffset);
}
@@ -3321,6 +3342,112 @@ const GcnBufferFormatInfo *getGcnBufferFormatInfo(uint8_t Format,
: getGfx9BufferFormatInfo(Format);
}
+const MCRegisterClass *getVGPRPhysRegClass(MCPhysReg Reg,
+ const MCRegisterInfo &MRI) {
+ const unsigned VGPRClasses[] = {
+ AMDGPU::VGPR_16RegClassID, AMDGPU::VGPR_32RegClassID,
+ AMDGPU::VReg_64RegClassID, AMDGPU::VReg_96RegClassID,
+ AMDGPU::VReg_128RegClassID, AMDGPU::VReg_160RegClassID,
+ AMDGPU::VReg_192RegClassID, AMDGPU::VReg_224RegClassID,
+ AMDGPU::VReg_256RegClassID, AMDGPU::VReg_288RegClassID,
+ AMDGPU::VReg_320RegClassID, AMDGPU::VReg_352RegClassID,
+ AMDGPU::VReg_384RegClassID, AMDGPU::VReg_512RegClassID,
+ AMDGPU::VReg_1024RegClassID};
+
+ for (unsigned RCID : VGPRClasses) {
+ const MCRegisterClass &RC = MRI.getRegClass(RCID);
+ if (RC.contains(Reg))
+ return &RC;
+ }
+
+ return nullptr;
+}
+
+unsigned getVGPREncodingMSBs(MCPhysReg Reg, const MCRegisterInfo &MRI) {
+ unsigned Enc = MRI.getEncodingValue(Reg);
+ unsigned Idx = Enc & AMDGPU::HWEncoding::REG_IDX_MASK;
+ return Idx >> 8;
+}
+
+MCPhysReg getVGPRWithMSBs(MCPhysReg Reg, unsigned MSBs,
+ const MCRegisterInfo &MRI) {
+ unsigned Enc = MRI.getEncodingValue(Reg);
+ unsigned Idx = Enc & AMDGPU::HWEncoding::REG_IDX_MASK;
+ if (Idx >= 0x100)
+ return AMDGPU::NoRegister;
+
+ const MCRegisterClass *RC = getVGPRPhysRegClass(Reg, MRI);
+ if (!RC)
+ return AMDGPU::NoRegister;
+ return RC->getRegister(Idx | (MSBs << 8));
+}
+
+std::pair<const AMDGPU::OpName *, const AMDGPU::OpName *>
+getVGPRLoweringOperandTables(const MCInstrDesc &Desc) {
+ static const AMDGPU::OpName VOPOps[4] = {
+ AMDGPU::OpName::src0, AMDGPU::OpName::src1, AMDGPU::OpName::src2,
+ AMDGPU::OpName::vdst};
+ static const AMDGPU::OpName VDSOps[4] = {
+ AMDGPU::OpName::addr, AMDGPU::OpName::data0, AMDGPU::OpName::data1,
+ AMDGPU::OpName::vdst};
+ static const AMDGPU::OpName FLATOps[4] = {
+ AMDGPU::OpName::vaddr, AMDGPU::OpName::vdata,
+ AMDGPU::OpName::NUM_OPERAND_NAMES, AMDGPU::OpName::vdst};
+ static const AMDGPU::OpName BUFOps[4] = {
+ AMDGPU::OpName::vaddr, AMDGPU::OpName::NUM_OPERAND_NAMES,
+ AMDGPU::OpName::NUM_OPERAND_NAMES, AMDGPU::OpName::vdata};
+ static const AMDGPU::OpName VIMGOps[4] = {
+ AMDGPU::OpName::vaddr0, AMDGPU::OpName::vaddr1, AMDGPU::OpName::vaddr2,
+ AMDGPU::OpName::vdata};
+
+ // For VOPD instructions MSB of a corresponding Y component operand VGPR
+ // address is supposed to match X operand, otherwise VOPD shall not be
+ // combined.
+ static const AMDGPU::OpName VOPDOpsX[4] = {
+ AMDGPU::OpName::src0X, AMDGPU::OpName::vsrc1X, AMDGPU::OpName::vsrc2X,
+ AMDGPU::OpName::vdstX};
+ static const AMDGPU::OpName VOPDOpsY[4] = {
+ AMDGPU::OpName::src0Y, AMDGPU::OpName::vsrc1Y, AMDGPU::OpName::vsrc2Y,
+ AMDGPU::OpName::vdstY};
+
+ unsigned TSFlags = Desc.TSFlags;
+
+ if (TSFlags &
+ (SIInstrFlags::VOP1 | SIInstrFlags::VOP2 | SIInstrFlags::VOP3 |
+ SIInstrFlags::VOP3P | SIInstrFlags::VOPC | SIInstrFlags::DPP)) {
+ // LD_SCALE operands ignore MSB.
+ if (Desc.getOpcode() == AMDGPU::V_WMMA_LD_SCALE_PAIRED_B32 ||
+ Desc.getOpcode() == AMDGPU::V_WMMA_LD_SCALE_PAIRED_B32_gfx1250 ||
+ Desc.getOpcode() == AMDGPU::V_WMMA_LD_SCALE16_PAIRED_B64 ||
+ Desc.getOpcode() == AMDGPU::V_WMMA_LD_SCALE16_PAIRED_B64_gfx1250)
+ return {};
+ return {VOPOps, nullptr};
+ }
+
+ if (TSFlags & SIInstrFlags::DS)
+ return {VDSOps, nullptr};
+
+ if (TSFlags & SIInstrFlags::FLAT)
+ return {FLATOps, nullptr};
+
+ if (TSFlags & (SIInstrFlags::MUBUF | SIInstrFlags::MTBUF))
+ return {BUFOps, nullptr};
+
+ if (TSFlags & SIInstrFlags::VIMAGE)
+ return {VIMGOps, nullptr};
+
+ if (AMDGPU::isVOPD(Desc.getOpcode()))
+ return {VOPDOpsX, VOPDOpsY};
+
+ assert(!(TSFlags & SIInstrFlags::MIMG));
+
+ if (TSFlags & (SIInstrFlags::VSAMPLE | SIInstrFlags::EXP))
+ llvm_unreachable("Sample and export VGPR lowering is not implemented and"
+ " these instructions are not expected on gfx1250");
+
+ return {};
+}
+
bool supportsScaleOffset(const MCInstrInfo &MII, unsigned Opcode) {
uint64_t TSFlags = MII.get(Opcode).TSFlags;
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
index 70dfb63cbe04..3fcd16f9290b 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
@@ -1517,6 +1517,7 @@ constexpr bool mayTailCallThisCC(CallingConv::ID CC) {
switch (CC) {
case CallingConv::C:
case CallingConv::AMDGPU_Gfx:
+ case CallingConv::AMDGPU_Gfx_WholeWave:
return true;
default:
return canGuaranteeTCO(CC);
@@ -1590,7 +1591,14 @@ bool isInlineValue(unsigned Reg);
/// Is this an AMDGPU specific source operand? These include registers,
/// inline constants, literals and mandatory literals (KImm).
-bool isSISrcOperand(const MCInstrDesc &Desc, unsigned OpNo);
+constexpr bool isSISrcOperand(const MCOperandInfo &OpInfo) {
+ return OpInfo.OperandType >= AMDGPU::OPERAND_SRC_FIRST &&
+ OpInfo.OperandType <= AMDGPU::OPERAND_SRC_LAST;
+}
+
+inline bool isSISrcOperand(const MCInstrDesc &Desc, unsigned OpNo) {
+ return isSISrcOperand(Desc.operands()[OpNo]);
+}
/// Is this a KImm operand?
bool isKImmOperand(const MCInstrDesc &Desc, unsigned OpNo);
@@ -1778,6 +1786,25 @@ bool isIntrinsicSourceOfDivergence(unsigned IntrID);
/// \returns true if the intrinsic is uniform
bool isIntrinsicAlwaysUniform(unsigned IntrID);
+/// \returns a register class for the physical register \p Reg if it is a VGPR
+/// or nullptr otherwise.
+const MCRegisterClass *getVGPRPhysRegClass(MCPhysReg Reg,
+ const MCRegisterInfo &MRI);
+
+/// \returns the MODE bits which have to be set by the S_SET_VGPR_MSB for the
+/// physical register \p Reg.
+unsigned getVGPREncodingMSBs(MCPhysReg Reg, const MCRegisterInfo &MRI);
+
+/// If \p Reg is a low VGPR return a corresponding high VGPR with \p MSBs set.
+MCPhysReg getVGPRWithMSBs(MCPhysReg Reg, unsigned MSBs,
+ const MCRegisterInfo &MRI);
+
+// Returns a table for the opcode with a given \p Desc to map the VGPR MSB
+// set by the S_SET_VGPR_MSB to one of 4 sources. In case of VOPD returns 2
+// maps, one for X and one for Y component.
+std::pair<const AMDGPU::OpName *, const AMDGPU::OpName *>
+getVGPRLoweringOperandTables(const MCInstrDesc &Desc);
+
/// \returns true if a memory instruction supports scale_offset modifier.
bool supportsScaleOffset(const MCInstrInfo &MII, unsigned Opcode);
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.cpp
index fd6253daa327..a7a0e33da5e4 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.cpp
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.cpp
@@ -1061,6 +1061,17 @@ VersionTuple AMDGPUPALMetadata::getPALVersion() {
return VersionTuple(getPALVersion(0), getPALVersion(1));
}
+// Set the field in a given .hardware_stages entry to a maximum value
+void AMDGPUPALMetadata::updateHwStageMaximum(unsigned CC, StringRef field,
+ unsigned Val) {
+ msgpack::MapDocNode HwStageFieldMapNode = getHwStage(CC);
+ auto &Node = HwStageFieldMapNode[field];
+ if (Node.isEmpty())
+ Node = Val;
+ else
+ Node = std::max<unsigned>(Node.getUInt(), Val);
+}
+
// Set the field in a given .hardware_stages entry
void AMDGPUPALMetadata::setHwStage(unsigned CC, StringRef field, unsigned Val) {
getHwStage(CC)[field] = Val;
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.h
index 4830db5fda50..e50150cc8de9 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.h
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.h
@@ -156,6 +156,7 @@ public:
unsigned getPALMinorVersion();
VersionTuple getPALVersion();
+ void updateHwStageMaximum(unsigned CC, StringRef field, unsigned Val);
void setHwStage(unsigned CC, StringRef field, unsigned Val);
void setHwStage(unsigned CC, StringRef field, bool Val);
void setHwStage(unsigned CC, StringRef field, msgpack::Type Type,