summaryrefslogtreecommitdiff
path: root/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp')
-rw-r--r--llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp221
1 files changed, 134 insertions, 87 deletions
diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
index bdb5a8d9a0a0..b08957d22ee7 100644
--- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
+++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
@@ -1314,6 +1314,8 @@ class AMDGPUAsmParser : public MCTargetAsmParser {
/// }
private:
+ void createConstantSymbol(StringRef Id, int64_t Val);
+
bool ParseAsAbsoluteExpression(uint32_t &Ret);
bool OutOfRangeError(SMRange Range);
/// Calculate VGPR/SGPR blocks required for given target, reserved
@@ -1331,12 +1333,12 @@ private:
/// \param SGPRRange [in] Token range, used for SGPR diagnostics.
/// \param VGPRBlocks [out] Result VGPR block count.
/// \param SGPRBlocks [out] Result SGPR block count.
- bool calculateGPRBlocks(const FeatureBitset &Features, bool VCCUsed,
- bool FlatScrUsed, bool XNACKUsed,
+ bool calculateGPRBlocks(const FeatureBitset &Features, const MCExpr *VCCUsed,
+ const MCExpr *FlatScrUsed, bool XNACKUsed,
std::optional<bool> EnableWavefrontSize32,
- unsigned NextFreeVGPR, SMRange VGPRRange,
- unsigned NextFreeSGPR, SMRange SGPRRange,
- unsigned &VGPRBlocks, unsigned &SGPRBlocks);
+ const MCExpr *NextFreeVGPR, SMRange VGPRRange,
+ const MCExpr *NextFreeSGPR, SMRange SGPRRange,
+ const MCExpr *&VGPRBlocks, const MCExpr *&SGPRBlocks);
bool ParseDirectiveAMDGCNTarget();
bool ParseDirectiveAMDHSACodeObjectVersion();
bool ParseDirectiveAMDHSAKernel();
@@ -1408,36 +1410,28 @@ public:
setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits()));
- {
- // TODO: make those pre-defined variables read-only.
- // Currently there is none suitable machinery in the core llvm-mc for this.
- // MCSymbol::isRedefinable is intended for another purpose, and
- // AsmParser::parseDirectiveSet() cannot be specialized for specific target.
- AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
- MCContext &Ctx = getContext();
- if (ISA.Major >= 6 && isHsaAbi(getSTI())) {
- MCSymbol *Sym =
- Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_number"));
- Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
- Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_minor"));
- Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
- Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_stepping"));
- Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
- } else {
- MCSymbol *Sym =
- Ctx.getOrCreateSymbol(Twine(".option.machine_version_major"));
- Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
- Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_minor"));
- Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
- Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping"));
- Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
- }
- if (ISA.Major >= 6 && isHsaAbi(getSTI())) {
- initializeGprCountSymbol(IS_VGPR);
- initializeGprCountSymbol(IS_SGPR);
- } else
- KernelScope.initialize(getContext());
+ AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
+ if (ISA.Major >= 6 && isHsaAbi(getSTI())) {
+ createConstantSymbol(".amdgcn.gfx_generation_number", ISA.Major);
+ createConstantSymbol(".amdgcn.gfx_generation_minor", ISA.Minor);
+ createConstantSymbol(".amdgcn.gfx_generation_stepping", ISA.Stepping);
+ } else {
+ createConstantSymbol(".option.machine_version_major", ISA.Major);
+ createConstantSymbol(".option.machine_version_minor", ISA.Minor);
+ createConstantSymbol(".option.machine_version_stepping", ISA.Stepping);
}
+ if (ISA.Major >= 6 && isHsaAbi(getSTI())) {
+ initializeGprCountSymbol(IS_VGPR);
+ initializeGprCountSymbol(IS_SGPR);
+ } else
+ KernelScope.initialize(getContext());
+
+ for (auto [Symbol, Code] : AMDGPU::UCVersion::getGFXVersions())
+ createConstantSymbol(Symbol, Code);
+
+ createConstantSymbol("UC_VERSION_W64_BIT", 0x2000);
+ createConstantSymbol("UC_VERSION_W32_BIT", 0x4000);
+ createConstantSymbol("UC_VERSION_MDP_BIT", 0x8000);
}
bool hasMIMG_R128() const {
@@ -2486,6 +2480,16 @@ bool AMDGPUOperand::isInlineValue() const {
// AsmParser
//===----------------------------------------------------------------------===//
+void AMDGPUAsmParser::createConstantSymbol(StringRef Id, int64_t Val) {
+ // TODO: make those pre-defined variables read-only.
+ // Currently there is none suitable machinery in the core llvm-mc for this.
+ // MCSymbol::isRedefinable is intended for another purpose, and
+ // AsmParser::parseDirectiveSet() cannot be specialized for specific target.
+ MCContext &Ctx = getContext();
+ MCSymbol *Sym = Ctx.getOrCreateSymbol(Id);
+ Sym->setVariableValue(MCConstantExpr::create(Val, Ctx));
+}
+
static int getRegClass(RegisterKind Is, unsigned RegWidth) {
if (Is == IS_VGPR) {
switch (RegWidth) {
@@ -5352,41 +5356,64 @@ bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) {
}
bool AMDGPUAsmParser::calculateGPRBlocks(
- const FeatureBitset &Features, bool VCCUsed, bool FlatScrUsed,
- bool XNACKUsed, std::optional<bool> EnableWavefrontSize32,
- unsigned NextFreeVGPR, SMRange VGPRRange, unsigned NextFreeSGPR,
- SMRange SGPRRange, unsigned &VGPRBlocks, unsigned &SGPRBlocks) {
+ const FeatureBitset &Features, const MCExpr *VCCUsed,
+ const MCExpr *FlatScrUsed, bool XNACKUsed,
+ std::optional<bool> EnableWavefrontSize32, const MCExpr *NextFreeVGPR,
+ SMRange VGPRRange, const MCExpr *NextFreeSGPR, SMRange SGPRRange,
+ const MCExpr *&VGPRBlocks, const MCExpr *&SGPRBlocks) {
// TODO(scott.linder): These calculations are duplicated from
// AMDGPUAsmPrinter::getSIProgramInfo and could be unified.
IsaVersion Version = getIsaVersion(getSTI().getCPU());
+ MCContext &Ctx = getContext();
- unsigned NumVGPRs = NextFreeVGPR;
- unsigned NumSGPRs = NextFreeSGPR;
+ const MCExpr *NumSGPRs = NextFreeSGPR;
+ int64_t EvaluatedSGPRs;
if (Version.Major >= 10)
- NumSGPRs = 0;
+ NumSGPRs = MCConstantExpr::create(0, Ctx);
else {
unsigned MaxAddressableNumSGPRs =
IsaInfo::getAddressableNumSGPRs(&getSTI());
- if (Version.Major >= 8 && !Features.test(FeatureSGPRInitBug) &&
- NumSGPRs > MaxAddressableNumSGPRs)
+ if (NumSGPRs->evaluateAsAbsolute(EvaluatedSGPRs) && Version.Major >= 8 &&
+ !Features.test(FeatureSGPRInitBug) &&
+ static_cast<uint64_t>(EvaluatedSGPRs) > MaxAddressableNumSGPRs)
return OutOfRangeError(SGPRRange);
- NumSGPRs +=
- IsaInfo::getNumExtraSGPRs(&getSTI(), VCCUsed, FlatScrUsed, XNACKUsed);
+ const MCExpr *ExtraSGPRs =
+ AMDGPUMCExpr::createExtraSGPRs(VCCUsed, FlatScrUsed, XNACKUsed, Ctx);
+ NumSGPRs = MCBinaryExpr::createAdd(NumSGPRs, ExtraSGPRs, Ctx);
- if ((Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) &&
- NumSGPRs > MaxAddressableNumSGPRs)
+ if (NumSGPRs->evaluateAsAbsolute(EvaluatedSGPRs) &&
+ (Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) &&
+ static_cast<uint64_t>(EvaluatedSGPRs) > MaxAddressableNumSGPRs)
return OutOfRangeError(SGPRRange);
if (Features.test(FeatureSGPRInitBug))
- NumSGPRs = IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG;
- }
+ NumSGPRs =
+ MCConstantExpr::create(IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG, Ctx);
+ }
+
+ // The MCExpr equivalent of getNumSGPRBlocks/getNumVGPRBlocks:
+ // (alignTo(max(1u, NumGPR), GPREncodingGranule) / GPREncodingGranule) - 1
+ auto GetNumGPRBlocks = [&Ctx](const MCExpr *NumGPR,
+ unsigned Granule) -> const MCExpr * {
+ const MCExpr *OneConst = MCConstantExpr::create(1ul, Ctx);
+ const MCExpr *GranuleConst = MCConstantExpr::create(Granule, Ctx);
+ const MCExpr *MaxNumGPR = AMDGPUMCExpr::createMax({NumGPR, OneConst}, Ctx);
+ const MCExpr *AlignToGPR =
+ AMDGPUMCExpr::createAlignTo(MaxNumGPR, GranuleConst, Ctx);
+ const MCExpr *DivGPR =
+ MCBinaryExpr::createDiv(AlignToGPR, GranuleConst, Ctx);
+ const MCExpr *SubGPR = MCBinaryExpr::createSub(DivGPR, OneConst, Ctx);
+ return SubGPR;
+ };
- VGPRBlocks = IsaInfo::getEncodedNumVGPRBlocks(&getSTI(), NumVGPRs,
- EnableWavefrontSize32);
- SGPRBlocks = IsaInfo::getNumSGPRBlocks(&getSTI(), NumSGPRs);
+ VGPRBlocks = GetNumGPRBlocks(
+ NextFreeVGPR,
+ IsaInfo::getVGPREncodingGranule(&getSTI(), EnableWavefrontSize32));
+ SGPRBlocks =
+ GetNumGPRBlocks(NumSGPRs, IsaInfo::getSGPREncodingGranule(&getSTI()));
return false;
}
@@ -5410,14 +5437,17 @@ bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
IsaVersion IVersion = getIsaVersion(getSTI().getCPU());
+ const MCExpr *ZeroExpr = MCConstantExpr::create(0, getContext());
+ const MCExpr *OneExpr = MCConstantExpr::create(1, getContext());
+
SMRange VGPRRange;
- uint64_t NextFreeVGPR = 0;
- uint64_t AccumOffset = 0;
+ const MCExpr *NextFreeVGPR = ZeroExpr;
+ const MCExpr *AccumOffset = MCConstantExpr::create(0, getContext());
uint64_t SharedVGPRCount = 0;
uint64_t PreloadLength = 0;
uint64_t PreloadOffset = 0;
SMRange SGPRRange;
- uint64_t NextFreeSGPR = 0;
+ const MCExpr *NextFreeSGPR = ZeroExpr;
// Count the number of user SGPRs implied from the enabled feature bits.
unsigned ImpliedUserSGPRCount = 0;
@@ -5425,8 +5455,8 @@ bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
// Track if the asm explicitly contains the directive for the user SGPR
// count.
std::optional<unsigned> ExplicitUserSGPRCount;
- bool ReserveVCC = true;
- bool ReserveFlatScr = true;
+ const MCExpr *ReserveVCC = OneExpr;
+ const MCExpr *ReserveFlatScr = OneExpr;
std::optional<bool> EnableWavefrontSize32;
while (true) {
@@ -5620,34 +5650,29 @@ bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, ExprVal,
ValRange);
} else if (ID == ".amdhsa_next_free_vgpr") {
- EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
VGPRRange = ValRange;
- NextFreeVGPR = Val;
+ NextFreeVGPR = ExprVal;
} else if (ID == ".amdhsa_next_free_sgpr") {
- EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
SGPRRange = ValRange;
- NextFreeSGPR = Val;
+ NextFreeSGPR = ExprVal;
} else if (ID == ".amdhsa_accum_offset") {
if (!isGFX90A())
return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
- EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
- AccumOffset = Val;
+ AccumOffset = ExprVal;
} else if (ID == ".amdhsa_reserve_vcc") {
- EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
- if (!isUInt<1>(Val))
+ if (EvaluatableExpr && !isUInt<1>(Val))
return OutOfRangeError(ValRange);
- ReserveVCC = Val;
+ ReserveVCC = ExprVal;
} else if (ID == ".amdhsa_reserve_flat_scratch") {
- EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
if (IVersion.Major < 7)
return Error(IDRange.Start, "directive requires gfx7+", IDRange);
if (hasArchitectedFlatScratch())
return Error(IDRange.Start,
"directive is not supported with architected flat scratch",
IDRange);
- if (!isUInt<1>(Val))
+ if (EvaluatableExpr && !isUInt<1>(Val))
return OutOfRangeError(ValRange);
- ReserveFlatScr = Val;
+ ReserveFlatScr = ExprVal;
} else if (ID == ".amdhsa_reserve_xnack_mask") {
if (IVersion.Major < 8)
return Error(IDRange.Start, "directive requires gfx8+", IDRange);
@@ -5771,8 +5796,8 @@ bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
if (!Seen.contains(".amdhsa_next_free_sgpr"))
return TokError(".amdhsa_next_free_sgpr directive is required");
- unsigned VGPRBlocks;
- unsigned SGPRBlocks;
+ const MCExpr *VGPRBlocks;
+ const MCExpr *SGPRBlocks;
if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr,
getTargetStreamer().getTargetID()->isXnackOnOrAny(),
EnableWavefrontSize32, NextFreeVGPR,
@@ -5780,19 +5805,26 @@ bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
SGPRBlocks))
return true;
- if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>(
- VGPRBlocks))
+ int64_t EvaluatedVGPRBlocks;
+ bool VGPRBlocksEvaluatable =
+ VGPRBlocks->evaluateAsAbsolute(EvaluatedVGPRBlocks);
+ if (VGPRBlocksEvaluatable &&
+ !isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>(
+ static_cast<uint64_t>(EvaluatedVGPRBlocks))) {
return OutOfRangeError(VGPRRange);
+ }
AMDGPU::MCKernelDescriptor::bits_set(
- KD.compute_pgm_rsrc1, MCConstantExpr::create(VGPRBlocks, getContext()),
+ KD.compute_pgm_rsrc1, VGPRBlocks,
COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_SHIFT,
COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, getContext());
- if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH>(
- SGPRBlocks))
+ int64_t EvaluatedSGPRBlocks;
+ if (SGPRBlocks->evaluateAsAbsolute(EvaluatedSGPRBlocks) &&
+ !isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH>(
+ static_cast<uint64_t>(EvaluatedSGPRBlocks)))
return OutOfRangeError(SGPRRange);
AMDGPU::MCKernelDescriptor::bits_set(
- KD.compute_pgm_rsrc1, MCConstantExpr::create(SGPRBlocks, getContext()),
+ KD.compute_pgm_rsrc1, SGPRBlocks,
COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_SHIFT,
COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT, getContext());
@@ -5822,16 +5854,28 @@ bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
if (isGFX90A()) {
if (!Seen.contains(".amdhsa_accum_offset"))
return TokError(".amdhsa_accum_offset directive is required");
- if (AccumOffset < 4 || AccumOffset > 256 || (AccumOffset & 3))
+ int64_t EvaluatedAccum;
+ bool AccumEvaluatable = AccumOffset->evaluateAsAbsolute(EvaluatedAccum);
+ uint64_t UEvaluatedAccum = EvaluatedAccum;
+ if (AccumEvaluatable &&
+ (UEvaluatedAccum < 4 || UEvaluatedAccum > 256 || (UEvaluatedAccum & 3)))
return TokError("accum_offset should be in range [4..256] in "
"increments of 4");
- if (AccumOffset > alignTo(std::max((uint64_t)1, NextFreeVGPR), 4))
+
+ int64_t EvaluatedNumVGPR;
+ if (NextFreeVGPR->evaluateAsAbsolute(EvaluatedNumVGPR) &&
+ AccumEvaluatable &&
+ UEvaluatedAccum >
+ alignTo(std::max((uint64_t)1, (uint64_t)EvaluatedNumVGPR), 4))
return TokError("accum_offset exceeds total VGPR allocation");
- MCKernelDescriptor::bits_set(
- KD.compute_pgm_rsrc3,
- MCConstantExpr::create(AccumOffset / 4 - 1, getContext()),
- COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET_SHIFT,
- COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET, getContext());
+ const MCExpr *AdjustedAccum = MCBinaryExpr::createSub(
+ MCBinaryExpr::createDiv(
+ AccumOffset, MCConstantExpr::create(4, getContext()), getContext()),
+ MCConstantExpr::create(1, getContext()), getContext());
+ MCKernelDescriptor::bits_set(KD.compute_pgm_rsrc3, AdjustedAccum,
+ COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET_SHIFT,
+ COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET,
+ getContext());
}
if (IVersion.Major >= 10 && IVersion.Major < 12) {
@@ -5840,7 +5884,10 @@ bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
return TokError("shared_vgpr_count directive not valid on "
"wavefront size 32");
}
- if (SharedVGPRCount * 2 + VGPRBlocks > 63) {
+
+ if (VGPRBlocksEvaluatable &&
+ (SharedVGPRCount * 2 + static_cast<uint64_t>(EvaluatedVGPRBlocks) >
+ 63)) {
return TokError("shared_vgpr_count*2 + "
"compute_pgm_rsrc1.GRANULATED_WORKITEM_VGPR_COUNT cannot "
"exceed 63\n");
@@ -8353,7 +8400,7 @@ void AMDGPUAsmParser::onBeginOfFile() {
/// max(expr, ...)
///
bool AMDGPUAsmParser::parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) {
- using AGVK = AMDGPUVariadicMCExpr::VariadicKind;
+ using AGVK = AMDGPUMCExpr::VariantKind;
if (isToken(AsmToken::Identifier)) {
StringRef TokenId = getTokenStr();
@@ -8383,7 +8430,7 @@ bool AMDGPUAsmParser::parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) {
"mismatch of commas in " + Twine(TokenId) + " expression");
return true;
}
- Res = AMDGPUVariadicMCExpr::create(VK, Exprs, getContext());
+ Res = AMDGPUMCExpr::create(VK, Exprs, getContext());
return false;
}
const MCExpr *Expr;