diff options
Diffstat (limited to 'llvm/lib/Target/AMDGPU/Utils')
| -rw-r--r-- | llvm/lib/Target/AMDGPU/Utils/AMDGPUAsmUtils.cpp | 2 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp | 145 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h | 29 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.cpp | 11 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.h | 1 |
5 files changed, 178 insertions, 10 deletions
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUAsmUtils.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUAsmUtils.cpp index c740b5e0f09d..14ebbf8e9c92 100644 --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUAsmUtils.cpp +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUAsmUtils.cpp @@ -116,6 +116,8 @@ static constexpr CustomOperand MsgOperands[] = { {{"MSG_RTN_GET_TBA"}, ID_RTN_GET_TBA, isGFX11Plus}, {{"MSG_RTN_GET_TBA_TO_PC"}, ID_RTN_GET_TBA_TO_PC, isGFX11Plus}, {{"MSG_RTN_GET_SE_AID_ID"}, ID_RTN_GET_SE_AID_ID, isGFX12Plus}, + {{"MSG_RTN_GET_CLUSTER_BARRIER_STATE"}, ID_RTN_GET_CLUSTER_BARRIER_STATE, + isGFX1250}, }; static constexpr CustomOperand SysMsgOperands[] = { diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp index 18ee9c16b3ff..9f4f42185d9a 100644 --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp @@ -934,6 +934,10 @@ std::optional<unsigned> InstInfo::getInvalidCompOperandIndex( if (!OpXRegs[CompOprIdx] || !OpYRegs[CompOprIdx]) continue; + if (getVGPREncodingMSBs(OpXRegs[CompOprIdx], MRI) != + getVGPREncodingMSBs(OpYRegs[CompOprIdx], MRI)) + return CompOprIdx; + if (SkipSrc && CompOprIdx >= Component::DST_NUM) continue; @@ -1376,6 +1380,9 @@ unsigned getVGPREncodingGranule(const MCSubtargetInfo *STI, ? *EnableWavefrontSize32 : STI->getFeatureBits().test(FeatureWavefrontSize32); + if (STI->getFeatureBits().test(Feature1024AddressableVGPRs)) + return IsWave32 ? 16 : 8; + return IsWave32 ? 8 : 4; } @@ -1396,7 +1403,10 @@ unsigned getAddressableNumArchVGPRs(const MCSubtargetInfo *STI) { return 256; } unsigned getAddressableNumVGPRs(const MCSubtargetInfo *STI, unsigned DynamicVGPRBlockSize) { - if (STI->getFeatureBits().test(FeatureGFX90AInsts)) + const auto &Features = STI->getFeatureBits(); + if (Features.test(FeatureGFX1250Insts)) + return Features.test(FeatureWavefrontSize32) ? 1024 : 512; + if (Features.test(FeatureGFX90AInsts)) return 512; // Temporarily check the subtarget feature, until we fully switch to using @@ -2720,13 +2730,6 @@ bool isInlineValue(unsigned Reg) { #undef CASE_GFXPRE11_GFX11PLUS_TO #undef MAP_REG2REG -bool isSISrcOperand(const MCInstrDesc &Desc, unsigned OpNo) { - assert(OpNo < Desc.NumOperands); - unsigned OpType = Desc.operands()[OpNo].OperandType; - return OpType >= AMDGPU::OPERAND_SRC_FIRST && - OpType <= AMDGPU::OPERAND_SRC_LAST; -} - bool isKImmOperand(const MCInstrDesc &Desc, unsigned OpNo) { assert(OpNo < Desc.NumOperands); unsigned OpType = Desc.operands()[OpNo].OperandType; @@ -2776,6 +2779,7 @@ unsigned getRegBitWidth(unsigned RCID) { return 16; case AMDGPU::SGPR_32RegClassID: case AMDGPU::VGPR_32RegClassID: + case AMDGPU::VGPR_32_Lo256RegClassID: case AMDGPU::VRegOrLds_32RegClassID: case AMDGPU::AGPR_32RegClassID: case AMDGPU::VS_32RegClassID: @@ -2794,6 +2798,8 @@ unsigned getRegBitWidth(unsigned RCID) { case AMDGPU::AReg_64_Align2RegClassID: case AMDGPU::AV_64RegClassID: case AMDGPU::AV_64_Align2RegClassID: + case AMDGPU::VReg_64_Lo256_Align2RegClassID: + case AMDGPU::VS_64_Lo256RegClassID: return 64; case AMDGPU::SGPR_96RegClassID: case AMDGPU::SReg_96RegClassID: @@ -2803,6 +2809,7 @@ unsigned getRegBitWidth(unsigned RCID) { case AMDGPU::AReg_96_Align2RegClassID: case AMDGPU::AV_96RegClassID: case AMDGPU::AV_96_Align2RegClassID: + case AMDGPU::VReg_96_Lo256_Align2RegClassID: return 96; case AMDGPU::SGPR_128RegClassID: case AMDGPU::SReg_128RegClassID: @@ -2813,6 +2820,7 @@ unsigned getRegBitWidth(unsigned RCID) { case AMDGPU::AV_128RegClassID: case AMDGPU::AV_128_Align2RegClassID: case AMDGPU::SReg_128_XNULLRegClassID: + case AMDGPU::VReg_128_Lo256_Align2RegClassID: return 128; case AMDGPU::SGPR_160RegClassID: case AMDGPU::SReg_160RegClassID: @@ -2822,6 +2830,7 @@ unsigned getRegBitWidth(unsigned RCID) { case AMDGPU::AReg_160_Align2RegClassID: case AMDGPU::AV_160RegClassID: case AMDGPU::AV_160_Align2RegClassID: + case AMDGPU::VReg_160_Lo256_Align2RegClassID: return 160; case AMDGPU::SGPR_192RegClassID: case AMDGPU::SReg_192RegClassID: @@ -2831,6 +2840,7 @@ unsigned getRegBitWidth(unsigned RCID) { case AMDGPU::AReg_192_Align2RegClassID: case AMDGPU::AV_192RegClassID: case AMDGPU::AV_192_Align2RegClassID: + case AMDGPU::VReg_192_Lo256_Align2RegClassID: return 192; case AMDGPU::SGPR_224RegClassID: case AMDGPU::SReg_224RegClassID: @@ -2840,6 +2850,7 @@ unsigned getRegBitWidth(unsigned RCID) { case AMDGPU::AReg_224_Align2RegClassID: case AMDGPU::AV_224RegClassID: case AMDGPU::AV_224_Align2RegClassID: + case AMDGPU::VReg_224_Lo256_Align2RegClassID: return 224; case AMDGPU::SGPR_256RegClassID: case AMDGPU::SReg_256RegClassID: @@ -2850,6 +2861,7 @@ unsigned getRegBitWidth(unsigned RCID) { case AMDGPU::AV_256RegClassID: case AMDGPU::AV_256_Align2RegClassID: case AMDGPU::SReg_256_XNULLRegClassID: + case AMDGPU::VReg_256_Lo256_Align2RegClassID: return 256; case AMDGPU::SGPR_288RegClassID: case AMDGPU::SReg_288RegClassID: @@ -2859,6 +2871,7 @@ unsigned getRegBitWidth(unsigned RCID) { case AMDGPU::AReg_288_Align2RegClassID: case AMDGPU::AV_288RegClassID: case AMDGPU::AV_288_Align2RegClassID: + case AMDGPU::VReg_288_Lo256_Align2RegClassID: return 288; case AMDGPU::SGPR_320RegClassID: case AMDGPU::SReg_320RegClassID: @@ -2868,6 +2881,7 @@ unsigned getRegBitWidth(unsigned RCID) { case AMDGPU::AReg_320_Align2RegClassID: case AMDGPU::AV_320RegClassID: case AMDGPU::AV_320_Align2RegClassID: + case AMDGPU::VReg_320_Lo256_Align2RegClassID: return 320; case AMDGPU::SGPR_352RegClassID: case AMDGPU::SReg_352RegClassID: @@ -2877,6 +2891,7 @@ unsigned getRegBitWidth(unsigned RCID) { case AMDGPU::AReg_352_Align2RegClassID: case AMDGPU::AV_352RegClassID: case AMDGPU::AV_352_Align2RegClassID: + case AMDGPU::VReg_352_Lo256_Align2RegClassID: return 352; case AMDGPU::SGPR_384RegClassID: case AMDGPU::SReg_384RegClassID: @@ -2886,6 +2901,7 @@ unsigned getRegBitWidth(unsigned RCID) { case AMDGPU::AReg_384_Align2RegClassID: case AMDGPU::AV_384RegClassID: case AMDGPU::AV_384_Align2RegClassID: + case AMDGPU::VReg_384_Lo256_Align2RegClassID: return 384; case AMDGPU::SGPR_512RegClassID: case AMDGPU::SReg_512RegClassID: @@ -2895,6 +2911,7 @@ unsigned getRegBitWidth(unsigned RCID) { case AMDGPU::AReg_512_Align2RegClassID: case AMDGPU::AV_512RegClassID: case AMDGPU::AV_512_Align2RegClassID: + case AMDGPU::VReg_512_Lo256_Align2RegClassID: return 512; case AMDGPU::SGPR_1024RegClassID: case AMDGPU::SReg_1024RegClassID: @@ -2904,6 +2921,7 @@ unsigned getRegBitWidth(unsigned RCID) { case AMDGPU::AReg_1024_Align2RegClassID: case AMDGPU::AV_1024RegClassID: case AMDGPU::AV_1024_Align2RegClassID: + case AMDGPU::VReg_1024_Lo256_Align2RegClassID: return 1024; default: llvm_unreachable("Unexpected register class"); @@ -3206,8 +3224,11 @@ bool isLegalSMRDEncodedUnsignedOffset(const MCSubtargetInfo &ST, bool isLegalSMRDEncodedSignedOffset(const MCSubtargetInfo &ST, int64_t EncodedOffset, bool IsBuffer) { - if (isGFX12Plus(ST)) + if (isGFX12Plus(ST)) { + if (IsBuffer && EncodedOffset < 0) + return false; return isInt<24>(EncodedOffset); + } return !IsBuffer && hasSMRDSignedImmOffset(ST) && isInt<21>(EncodedOffset); } @@ -3321,6 +3342,112 @@ const GcnBufferFormatInfo *getGcnBufferFormatInfo(uint8_t Format, : getGfx9BufferFormatInfo(Format); } +const MCRegisterClass *getVGPRPhysRegClass(MCPhysReg Reg, + const MCRegisterInfo &MRI) { + const unsigned VGPRClasses[] = { + AMDGPU::VGPR_16RegClassID, AMDGPU::VGPR_32RegClassID, + AMDGPU::VReg_64RegClassID, AMDGPU::VReg_96RegClassID, + AMDGPU::VReg_128RegClassID, AMDGPU::VReg_160RegClassID, + AMDGPU::VReg_192RegClassID, AMDGPU::VReg_224RegClassID, + AMDGPU::VReg_256RegClassID, AMDGPU::VReg_288RegClassID, + AMDGPU::VReg_320RegClassID, AMDGPU::VReg_352RegClassID, + AMDGPU::VReg_384RegClassID, AMDGPU::VReg_512RegClassID, + AMDGPU::VReg_1024RegClassID}; + + for (unsigned RCID : VGPRClasses) { + const MCRegisterClass &RC = MRI.getRegClass(RCID); + if (RC.contains(Reg)) + return &RC; + } + + return nullptr; +} + +unsigned getVGPREncodingMSBs(MCPhysReg Reg, const MCRegisterInfo &MRI) { + unsigned Enc = MRI.getEncodingValue(Reg); + unsigned Idx = Enc & AMDGPU::HWEncoding::REG_IDX_MASK; + return Idx >> 8; +} + +MCPhysReg getVGPRWithMSBs(MCPhysReg Reg, unsigned MSBs, + const MCRegisterInfo &MRI) { + unsigned Enc = MRI.getEncodingValue(Reg); + unsigned Idx = Enc & AMDGPU::HWEncoding::REG_IDX_MASK; + if (Idx >= 0x100) + return AMDGPU::NoRegister; + + const MCRegisterClass *RC = getVGPRPhysRegClass(Reg, MRI); + if (!RC) + return AMDGPU::NoRegister; + return RC->getRegister(Idx | (MSBs << 8)); +} + +std::pair<const AMDGPU::OpName *, const AMDGPU::OpName *> +getVGPRLoweringOperandTables(const MCInstrDesc &Desc) { + static const AMDGPU::OpName VOPOps[4] = { + AMDGPU::OpName::src0, AMDGPU::OpName::src1, AMDGPU::OpName::src2, + AMDGPU::OpName::vdst}; + static const AMDGPU::OpName VDSOps[4] = { + AMDGPU::OpName::addr, AMDGPU::OpName::data0, AMDGPU::OpName::data1, + AMDGPU::OpName::vdst}; + static const AMDGPU::OpName FLATOps[4] = { + AMDGPU::OpName::vaddr, AMDGPU::OpName::vdata, + AMDGPU::OpName::NUM_OPERAND_NAMES, AMDGPU::OpName::vdst}; + static const AMDGPU::OpName BUFOps[4] = { + AMDGPU::OpName::vaddr, AMDGPU::OpName::NUM_OPERAND_NAMES, + AMDGPU::OpName::NUM_OPERAND_NAMES, AMDGPU::OpName::vdata}; + static const AMDGPU::OpName VIMGOps[4] = { + AMDGPU::OpName::vaddr0, AMDGPU::OpName::vaddr1, AMDGPU::OpName::vaddr2, + AMDGPU::OpName::vdata}; + + // For VOPD instructions MSB of a corresponding Y component operand VGPR + // address is supposed to match X operand, otherwise VOPD shall not be + // combined. + static const AMDGPU::OpName VOPDOpsX[4] = { + AMDGPU::OpName::src0X, AMDGPU::OpName::vsrc1X, AMDGPU::OpName::vsrc2X, + AMDGPU::OpName::vdstX}; + static const AMDGPU::OpName VOPDOpsY[4] = { + AMDGPU::OpName::src0Y, AMDGPU::OpName::vsrc1Y, AMDGPU::OpName::vsrc2Y, + AMDGPU::OpName::vdstY}; + + unsigned TSFlags = Desc.TSFlags; + + if (TSFlags & + (SIInstrFlags::VOP1 | SIInstrFlags::VOP2 | SIInstrFlags::VOP3 | + SIInstrFlags::VOP3P | SIInstrFlags::VOPC | SIInstrFlags::DPP)) { + // LD_SCALE operands ignore MSB. + if (Desc.getOpcode() == AMDGPU::V_WMMA_LD_SCALE_PAIRED_B32 || + Desc.getOpcode() == AMDGPU::V_WMMA_LD_SCALE_PAIRED_B32_gfx1250 || + Desc.getOpcode() == AMDGPU::V_WMMA_LD_SCALE16_PAIRED_B64 || + Desc.getOpcode() == AMDGPU::V_WMMA_LD_SCALE16_PAIRED_B64_gfx1250) + return {}; + return {VOPOps, nullptr}; + } + + if (TSFlags & SIInstrFlags::DS) + return {VDSOps, nullptr}; + + if (TSFlags & SIInstrFlags::FLAT) + return {FLATOps, nullptr}; + + if (TSFlags & (SIInstrFlags::MUBUF | SIInstrFlags::MTBUF)) + return {BUFOps, nullptr}; + + if (TSFlags & SIInstrFlags::VIMAGE) + return {VIMGOps, nullptr}; + + if (AMDGPU::isVOPD(Desc.getOpcode())) + return {VOPDOpsX, VOPDOpsY}; + + assert(!(TSFlags & SIInstrFlags::MIMG)); + + if (TSFlags & (SIInstrFlags::VSAMPLE | SIInstrFlags::EXP)) + llvm_unreachable("Sample and export VGPR lowering is not implemented and" + " these instructions are not expected on gfx1250"); + + return {}; +} + bool supportsScaleOffset(const MCInstrInfo &MII, unsigned Opcode) { uint64_t TSFlags = MII.get(Opcode).TSFlags; diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h index 70dfb63cbe04..3fcd16f9290b 100644 --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h @@ -1517,6 +1517,7 @@ constexpr bool mayTailCallThisCC(CallingConv::ID CC) { switch (CC) { case CallingConv::C: case CallingConv::AMDGPU_Gfx: + case CallingConv::AMDGPU_Gfx_WholeWave: return true; default: return canGuaranteeTCO(CC); @@ -1590,7 +1591,14 @@ bool isInlineValue(unsigned Reg); /// Is this an AMDGPU specific source operand? These include registers, /// inline constants, literals and mandatory literals (KImm). -bool isSISrcOperand(const MCInstrDesc &Desc, unsigned OpNo); +constexpr bool isSISrcOperand(const MCOperandInfo &OpInfo) { + return OpInfo.OperandType >= AMDGPU::OPERAND_SRC_FIRST && + OpInfo.OperandType <= AMDGPU::OPERAND_SRC_LAST; +} + +inline bool isSISrcOperand(const MCInstrDesc &Desc, unsigned OpNo) { + return isSISrcOperand(Desc.operands()[OpNo]); +} /// Is this a KImm operand? bool isKImmOperand(const MCInstrDesc &Desc, unsigned OpNo); @@ -1778,6 +1786,25 @@ bool isIntrinsicSourceOfDivergence(unsigned IntrID); /// \returns true if the intrinsic is uniform bool isIntrinsicAlwaysUniform(unsigned IntrID); +/// \returns a register class for the physical register \p Reg if it is a VGPR +/// or nullptr otherwise. +const MCRegisterClass *getVGPRPhysRegClass(MCPhysReg Reg, + const MCRegisterInfo &MRI); + +/// \returns the MODE bits which have to be set by the S_SET_VGPR_MSB for the +/// physical register \p Reg. +unsigned getVGPREncodingMSBs(MCPhysReg Reg, const MCRegisterInfo &MRI); + +/// If \p Reg is a low VGPR return a corresponding high VGPR with \p MSBs set. +MCPhysReg getVGPRWithMSBs(MCPhysReg Reg, unsigned MSBs, + const MCRegisterInfo &MRI); + +// Returns a table for the opcode with a given \p Desc to map the VGPR MSB +// set by the S_SET_VGPR_MSB to one of 4 sources. In case of VOPD returns 2 +// maps, one for X and one for Y component. +std::pair<const AMDGPU::OpName *, const AMDGPU::OpName *> +getVGPRLoweringOperandTables(const MCInstrDesc &Desc); + /// \returns true if a memory instruction supports scale_offset modifier. bool supportsScaleOffset(const MCInstrInfo &MII, unsigned Opcode); diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.cpp index fd6253daa327..a7a0e33da5e4 100644 --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.cpp +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.cpp @@ -1061,6 +1061,17 @@ VersionTuple AMDGPUPALMetadata::getPALVersion() { return VersionTuple(getPALVersion(0), getPALVersion(1)); } +// Set the field in a given .hardware_stages entry to a maximum value +void AMDGPUPALMetadata::updateHwStageMaximum(unsigned CC, StringRef field, + unsigned Val) { + msgpack::MapDocNode HwStageFieldMapNode = getHwStage(CC); + auto &Node = HwStageFieldMapNode[field]; + if (Node.isEmpty()) + Node = Val; + else + Node = std::max<unsigned>(Node.getUInt(), Val); +} + // Set the field in a given .hardware_stages entry void AMDGPUPALMetadata::setHwStage(unsigned CC, StringRef field, unsigned Val) { getHwStage(CC)[field] = Val; diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.h index 4830db5fda50..e50150cc8de9 100644 --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.h +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.h @@ -156,6 +156,7 @@ public: unsigned getPALMinorVersion(); VersionTuple getPALVersion(); + void updateHwStageMaximum(unsigned CC, StringRef field, unsigned Val); void setHwStage(unsigned CC, StringRef field, unsigned Val); void setHwStage(unsigned CC, StringRef field, bool Val); void setHwStage(unsigned CC, StringRef field, msgpack::Type Type, |
