diff options
Diffstat (limited to 'llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp')
| -rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp | 267 |
1 files changed, 155 insertions, 112 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp index cad4a3430327..e49925f86bd9 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp @@ -29,6 +29,7 @@ #include "TargetInfo/AMDGPUTargetInfo.h" #include "Utils/AMDGPUBaseInfo.h" #include "Utils/AMDKernelCodeTUtils.h" +#include "Utils/SIDefinesUtils.h" #include "llvm/Analysis/OptimizationRemarkEmitter.h" #include "llvm/BinaryFormat/ELF.h" #include "llvm/CodeGen/MachineFrameInfo.h" @@ -135,15 +136,6 @@ void AMDGPUAsmPrinter::initTargetStreamer(Module &M) { getTargetStreamer()->getPALMetadata()->readFromIR(M); } -uint64_t AMDGPUAsmPrinter::getMCExprValue(const MCExpr *Value, MCContext &Ctx) { - int64_t Val; - if (!Value->evaluateAsAbsolute(Val)) { - Ctx.reportError(SMLoc(), "could not resolve expression when required."); - return 0; - } - return static_cast<uint64_t>(Val); -} - void AMDGPUAsmPrinter::emitEndOfAsmFile(Module &M) { // Init target streamer if it has not yet happened if (!IsTargetStreamerInitialized) @@ -248,14 +240,14 @@ void AMDGPUAsmPrinter::emitFunctionBodyEnd() { getNameWithPrefix(KernelName, &MF->getFunction()); getTargetStreamer()->EmitAmdhsaKernelDescriptor( STM, KernelName, getAmdhsaKernelDescriptor(*MF, CurrentProgramInfo), - getMCExprValue(CurrentProgramInfo.NumVGPRsForWavesPerEU, Context), - getMCExprValue(CurrentProgramInfo.NumSGPRsForWavesPerEU, Context) - - IsaInfo::getNumExtraSGPRs( - &STM, getMCExprValue(CurrentProgramInfo.VCCUsed, Context), - getMCExprValue(CurrentProgramInfo.FlatUsed, Context), - getTargetStreamer()->getTargetID()->isXnackOnOrAny()), - getMCExprValue(CurrentProgramInfo.VCCUsed, Context), - getMCExprValue(CurrentProgramInfo.FlatUsed, Context)); + CurrentProgramInfo.NumVGPRsForWavesPerEU, + MCBinaryExpr::createSub( + CurrentProgramInfo.NumSGPRsForWavesPerEU, + AMDGPUMCExpr::createExtraSGPRs( + CurrentProgramInfo.VCCUsed, CurrentProgramInfo.FlatUsed, + getTargetStreamer()->getTargetID()->isXnackOnOrAny(), Context), + Context), + CurrentProgramInfo.VCCUsed, CurrentProgramInfo.FlatUsed); Streamer.popSection(); } @@ -400,9 +392,40 @@ void AMDGPUAsmPrinter::emitCommonFunctionComments( false); } -uint16_t AMDGPUAsmPrinter::getAmdhsaKernelCodeProperties( +SmallString<128> AMDGPUAsmPrinter::getMCExprStr(const MCExpr *Value) { + SmallString<128> Str; + raw_svector_ostream OSS(Str); + int64_t IVal; + if (Value->evaluateAsAbsolute(IVal)) { + OSS << static_cast<uint64_t>(IVal); + } else { + Value->print(OSS, MAI); + } + return Str; +} + +void AMDGPUAsmPrinter::emitCommonFunctionComments( + const MCExpr *NumVGPR, const MCExpr *NumAGPR, const MCExpr *TotalNumVGPR, + const MCExpr *NumSGPR, const MCExpr *ScratchSize, uint64_t CodeSize, + const AMDGPUMachineFunction *MFI) { + OutStreamer->emitRawComment(" codeLenInByte = " + Twine(CodeSize), false); + OutStreamer->emitRawComment(" NumSgprs: " + getMCExprStr(NumSGPR), false); + OutStreamer->emitRawComment(" NumVgprs: " + getMCExprStr(NumVGPR), false); + if (NumAGPR && TotalNumVGPR) { + OutStreamer->emitRawComment(" NumAgprs: " + getMCExprStr(NumAGPR), false); + OutStreamer->emitRawComment(" TotalNumVgprs: " + getMCExprStr(TotalNumVGPR), + false); + } + OutStreamer->emitRawComment(" ScratchSize: " + getMCExprStr(ScratchSize), + false); + OutStreamer->emitRawComment(" MemoryBound: " + Twine(MFI->isMemoryBound()), + false); +} + +const MCExpr *AMDGPUAsmPrinter::getAmdhsaKernelCodeProperties( const MachineFunction &MF) const { const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>(); + MCContext &Ctx = MF.getContext(); uint16_t KernelCodeProperties = 0; const GCNUserSGPRUsageInfo &UserSGPRInfo = MFI.getUserSGPRInfo(); @@ -430,16 +453,28 @@ uint16_t AMDGPUAsmPrinter::getAmdhsaKernelCodeProperties( KernelCodeProperties |= amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT; } + if (UserSGPRInfo.hasPrivateSegmentSize()) { + KernelCodeProperties |= + amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE; + } if (MF.getSubtarget<GCNSubtarget>().isWave32()) { KernelCodeProperties |= amdhsa::KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32; } - if (getMCExprValue(CurrentProgramInfo.DynamicCallStack, MF.getContext()) && - CodeObjectVersion >= AMDGPU::AMDHSA_COV5) - KernelCodeProperties |= amdhsa::KERNEL_CODE_PROPERTY_USES_DYNAMIC_STACK; - - return KernelCodeProperties; + // CurrentProgramInfo.DynamicCallStack is a MCExpr and could be + // un-evaluatable at this point so it cannot be conditionally checked here. + // Instead, we'll directly shift the possibly unknown MCExpr into its place + // and bitwise-or it into KernelCodeProperties. + const MCExpr *KernelCodePropExpr = + MCConstantExpr::create(KernelCodeProperties, Ctx); + const MCExpr *OrValue = MCConstantExpr::create( + amdhsa::KERNEL_CODE_PROPERTY_USES_DYNAMIC_STACK_SHIFT, Ctx); + OrValue = MCBinaryExpr::createShl(CurrentProgramInfo.DynamicCallStack, + OrValue, Ctx); + KernelCodePropExpr = MCBinaryExpr::createOr(KernelCodePropExpr, OrValue, Ctx); + + return KernelCodePropExpr; } MCKernelDescriptor @@ -462,11 +497,15 @@ AMDGPUAsmPrinter::getAmdhsaKernelDescriptor(const MachineFunction &MF, KernelDescriptor.compute_pgm_rsrc1 = PI.getComputePGMRSrc1(STM, Ctx); KernelDescriptor.compute_pgm_rsrc2 = PI.getComputePGMRSrc2(Ctx); - KernelDescriptor.kernel_code_properties = - MCConstantExpr::create(getAmdhsaKernelCodeProperties(MF), Ctx); - - assert(STM.hasGFX90AInsts() || - getMCExprValue(CurrentProgramInfo.ComputePGMRSrc3GFX90A, Ctx) == 0); + KernelDescriptor.kernel_code_properties = getAmdhsaKernelCodeProperties(MF); + + int64_t PGRM_Rsrc3 = 1; + bool EvaluatableRsrc3 = + CurrentProgramInfo.ComputePGMRSrc3GFX90A->evaluateAsAbsolute(PGRM_Rsrc3); + (void)PGRM_Rsrc3; + (void)EvaluatableRsrc3; + assert(STM.hasGFX90AInsts() || !EvaluatableRsrc3 || + static_cast<uint64_t>(PGRM_Rsrc3) == 0); KernelDescriptor.compute_pgm_rsrc3 = CurrentProgramInfo.ComputePGMRSrc3GFX90A; KernelDescriptor.kernarg_preload = MCConstantExpr::create( @@ -554,13 +593,10 @@ bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) { OutStreamer->emitRawComment(" Kernel info:", false); emitCommonFunctionComments( - getMCExprValue(CurrentProgramInfo.NumArchVGPR, Ctx), - STM.hasMAIInsts() ? getMCExprValue(CurrentProgramInfo.NumAccVGPR, Ctx) - : std::optional<uint32_t>(), - getMCExprValue(CurrentProgramInfo.NumVGPR, Ctx), - getMCExprValue(CurrentProgramInfo.NumSGPR, Ctx), - getMCExprValue(CurrentProgramInfo.ScratchSize, Ctx), - getFunctionCodeSize(MF), MFI); + CurrentProgramInfo.NumArchVGPR, + STM.hasMAIInsts() ? CurrentProgramInfo.NumAccVGPR : nullptr, + CurrentProgramInfo.NumVGPR, CurrentProgramInfo.NumSGPR, + CurrentProgramInfo.ScratchSize, getFunctionCodeSize(MF), MFI); OutStreamer->emitRawComment( " FloatMode: " + Twine(CurrentProgramInfo.FloatMode), false); @@ -571,43 +607,38 @@ bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) { " bytes/workgroup (compile time only)", false); OutStreamer->emitRawComment( - " SGPRBlocks: " + - Twine(getMCExprValue(CurrentProgramInfo.SGPRBlocks, Ctx)), - false); + " SGPRBlocks: " + getMCExprStr(CurrentProgramInfo.SGPRBlocks), false); + OutStreamer->emitRawComment( - " VGPRBlocks: " + - Twine(getMCExprValue(CurrentProgramInfo.VGPRBlocks, Ctx)), - false); + " VGPRBlocks: " + getMCExprStr(CurrentProgramInfo.VGPRBlocks), false); OutStreamer->emitRawComment( " NumSGPRsForWavesPerEU: " + - Twine( - getMCExprValue(CurrentProgramInfo.NumSGPRsForWavesPerEU, Ctx)), + getMCExprStr(CurrentProgramInfo.NumSGPRsForWavesPerEU), false); OutStreamer->emitRawComment( " NumVGPRsForWavesPerEU: " + - Twine( - getMCExprValue(CurrentProgramInfo.NumVGPRsForWavesPerEU, Ctx)), + getMCExprStr(CurrentProgramInfo.NumVGPRsForWavesPerEU), false); - if (STM.hasGFX90AInsts()) + if (STM.hasGFX90AInsts()) { + const MCExpr *AdjustedAccum = MCBinaryExpr::createAdd( + CurrentProgramInfo.AccumOffset, MCConstantExpr::create(1, Ctx), Ctx); + AdjustedAccum = MCBinaryExpr::createMul( + AdjustedAccum, MCConstantExpr::create(4, Ctx), Ctx); OutStreamer->emitRawComment( - " AccumOffset: " + - Twine((getMCExprValue(CurrentProgramInfo.AccumOffset, Ctx) + 1) * - 4), - false); + " AccumOffset: " + getMCExprStr(AdjustedAccum), false); + } OutStreamer->emitRawComment( - " Occupancy: " + - Twine(getMCExprValue(CurrentProgramInfo.Occupancy, Ctx)), - false); + " Occupancy: " + getMCExprStr(CurrentProgramInfo.Occupancy), false); OutStreamer->emitRawComment( " WaveLimiterHint : " + Twine(MFI->needsWaveLimiter()), false); OutStreamer->emitRawComment( " COMPUTE_PGM_RSRC2:SCRATCH_EN: " + - Twine(getMCExprValue(CurrentProgramInfo.ScratchEnable, Ctx)), + getMCExprStr(CurrentProgramInfo.ScratchEnable), false); OutStreamer->emitRawComment(" COMPUTE_PGM_RSRC2:USER_SGPR: " + Twine(CurrentProgramInfo.UserSGPR), @@ -628,20 +659,25 @@ bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) { Twine(CurrentProgramInfo.TIdIGCompCount), false); + [[maybe_unused]] int64_t PGMRSrc3; assert(STM.hasGFX90AInsts() || - getMCExprValue(CurrentProgramInfo.ComputePGMRSrc3GFX90A, Ctx) == 0); + (CurrentProgramInfo.ComputePGMRSrc3GFX90A->evaluateAsAbsolute( + PGMRSrc3) && + static_cast<uint64_t>(PGMRSrc3) == 0)); if (STM.hasGFX90AInsts()) { OutStreamer->emitRawComment( " COMPUTE_PGM_RSRC3_GFX90A:ACCUM_OFFSET: " + - Twine((AMDHSA_BITS_GET( - getMCExprValue(CurrentProgramInfo.ComputePGMRSrc3GFX90A, Ctx), - amdhsa::COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET))), + getMCExprStr(MCKernelDescriptor::bits_get( + CurrentProgramInfo.ComputePGMRSrc3GFX90A, + amdhsa::COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET_SHIFT, + amdhsa::COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET, Ctx)), false); OutStreamer->emitRawComment( " COMPUTE_PGM_RSRC3_GFX90A:TG_SPLIT: " + - Twine((AMDHSA_BITS_GET( - getMCExprValue(CurrentProgramInfo.ComputePGMRSrc3GFX90A, Ctx), - amdhsa::COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT))), + getMCExprStr(MCKernelDescriptor::bits_get( + CurrentProgramInfo.ComputePGMRSrc3GFX90A, + amdhsa::COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT_SHIFT, + amdhsa::COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT, Ctx)), false); } } @@ -765,7 +801,7 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo, // The calculations related to SGPR/VGPR blocks are // duplicated in part in AMDGPUAsmParser::calculateGPRBlocks, and could be // unified. - const MCExpr *ExtraSGPRs = AMDGPUVariadicMCExpr::createExtraSGPRs( + const MCExpr *ExtraSGPRs = AMDGPUMCExpr::createExtraSGPRs( ProgInfo.VCCUsed, ProgInfo.FlatUsed, getTargetStreamer()->getTargetID()->isXnackOnOrAny(), Ctx); @@ -858,27 +894,27 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo, } } } - ProgInfo.NumSGPR = AMDGPUVariadicMCExpr::createMax( + ProgInfo.NumSGPR = AMDGPUMCExpr::createMax( {ProgInfo.NumSGPR, CreateExpr(WaveDispatchNumSGPR)}, Ctx); - ProgInfo.NumArchVGPR = AMDGPUVariadicMCExpr::createMax( + ProgInfo.NumArchVGPR = AMDGPUMCExpr::createMax( {ProgInfo.NumVGPR, CreateExpr(WaveDispatchNumVGPR)}, Ctx); - ProgInfo.NumVGPR = AMDGPUVariadicMCExpr::createTotalNumVGPR( + ProgInfo.NumVGPR = AMDGPUMCExpr::createTotalNumVGPR( ProgInfo.NumAccVGPR, ProgInfo.NumArchVGPR, Ctx); } // Adjust number of registers used to meet default/requested minimum/maximum // number of waves per execution unit request. unsigned MaxWaves = MFI->getMaxWavesPerEU(); - ProgInfo.NumSGPRsForWavesPerEU = AMDGPUVariadicMCExpr::createMax( - {ProgInfo.NumSGPR, CreateExpr(1ul), - CreateExpr(STM.getMinNumSGPRs(MaxWaves))}, - Ctx); - ProgInfo.NumVGPRsForWavesPerEU = AMDGPUVariadicMCExpr::createMax( - {ProgInfo.NumVGPR, CreateExpr(1ul), - CreateExpr(STM.getMinNumVGPRs(MaxWaves))}, - Ctx); + ProgInfo.NumSGPRsForWavesPerEU = + AMDGPUMCExpr::createMax({ProgInfo.NumSGPR, CreateExpr(1ul), + CreateExpr(STM.getMinNumSGPRs(MaxWaves))}, + Ctx); + ProgInfo.NumVGPRsForWavesPerEU = + AMDGPUMCExpr::createMax({ProgInfo.NumVGPR, CreateExpr(1ul), + CreateExpr(STM.getMinNumVGPRs(MaxWaves))}, + Ctx); if (STM.getGeneration() <= AMDGPUSubtarget::SEA_ISLANDS || STM.hasSGPRInitBug()) { @@ -927,10 +963,9 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo, unsigned Granule) { const MCExpr *OneConst = CreateExpr(1ul); const MCExpr *GranuleConst = CreateExpr(Granule); - const MCExpr *MaxNumGPR = - AMDGPUVariadicMCExpr::createMax({NumGPR, OneConst}, Ctx); + const MCExpr *MaxNumGPR = AMDGPUMCExpr::createMax({NumGPR, OneConst}, Ctx); const MCExpr *AlignToGPR = - AMDGPUVariadicMCExpr::createAlignTo(MaxNumGPR, GranuleConst, Ctx); + AMDGPUMCExpr::createAlignTo(MaxNumGPR, GranuleConst, Ctx); const MCExpr *DivGPR = MCBinaryExpr::createDiv(AlignToGPR, GranuleConst, Ctx); const MCExpr *SubGPR = MCBinaryExpr::createSub(DivGPR, OneConst, Ctx); @@ -972,7 +1007,7 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo, // The MCExpr equivalent of divideCeil. auto DivideCeil = [&Ctx](const MCExpr *Numerator, const MCExpr *Denominator) { const MCExpr *Ceil = - AMDGPUVariadicMCExpr::createAlignTo(Numerator, Denominator, Ctx); + AMDGPUMCExpr::createAlignTo(Numerator, Denominator, Ctx); return MCBinaryExpr::createDiv(Ceil, Denominator, Ctx); }; @@ -1045,7 +1080,7 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo, amdhsa::COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT_SHIFT); } - ProgInfo.Occupancy = AMDGPUVariadicMCExpr::createOccupancy( + ProgInfo.Occupancy = AMDGPUMCExpr::createOccupancy( STM.computeOccupancy(F, ProgInfo.LDSSize), ProgInfo.NumSGPRsForWavesPerEU, ProgInfo.NumVGPRsForWavesPerEU, STM, Ctx); @@ -1207,41 +1242,49 @@ void AMDGPUAsmPrinter::EmitPALMetadata(const MachineFunction &MF, auto &Ctx = MF.getContext(); MD->setEntryPoint(CC, MF.getFunction().getName()); - MD->setNumUsedVgprs( - CC, getMCExprValue(CurrentProgramInfo.NumVGPRsForWavesPerEU, Ctx)); + MD->setNumUsedVgprs(CC, CurrentProgramInfo.NumVGPRsForWavesPerEU, Ctx); // Only set AGPRs for supported devices const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>(); if (STM.hasMAIInsts()) { - MD->setNumUsedAgprs(CC, getMCExprValue(CurrentProgramInfo.NumAccVGPR, Ctx)); + MD->setNumUsedAgprs(CC, CurrentProgramInfo.NumAccVGPR); } - MD->setNumUsedSgprs( - CC, getMCExprValue(CurrentProgramInfo.NumSGPRsForWavesPerEU, Ctx)); + MD->setNumUsedSgprs(CC, CurrentProgramInfo.NumSGPRsForWavesPerEU, Ctx); if (MD->getPALMajorVersion() < 3) { - MD->setRsrc1(CC, CurrentProgramInfo.getPGMRSrc1(CC, STM)); + MD->setRsrc1(CC, CurrentProgramInfo.getPGMRSrc1(CC, STM, Ctx), Ctx); if (AMDGPU::isCompute(CC)) { - MD->setRsrc2(CC, CurrentProgramInfo.getComputePGMRSrc2()); + MD->setRsrc2(CC, CurrentProgramInfo.getComputePGMRSrc2(Ctx), Ctx); } else { - if (getMCExprValue(CurrentProgramInfo.ScratchBlocks, Ctx) > 0) - MD->setRsrc2(CC, S_00B84C_SCRATCH_EN(1)); + const MCExpr *HasScratchBlocks = + MCBinaryExpr::createGT(CurrentProgramInfo.ScratchBlocks, + MCConstantExpr::create(0, Ctx), Ctx); + auto [Shift, Mask] = getShiftMask(C_00B84C_SCRATCH_EN); + MD->setRsrc2(CC, maskShiftSet(HasScratchBlocks, Mask, Shift, Ctx), Ctx); } } else { MD->setHwStage(CC, ".debug_mode", (bool)CurrentProgramInfo.DebugMode); - MD->setHwStage(CC, ".scratch_en", - (bool)getMCExprValue(CurrentProgramInfo.ScratchEnable, Ctx)); + MD->setHwStage(CC, ".scratch_en", msgpack::Type::Boolean, + CurrentProgramInfo.ScratchEnable); EmitPALMetadataCommon(MD, CurrentProgramInfo, CC, STM); } // ScratchSize is in bytes, 16 aligned. MD->setScratchSize( - CC, alignTo(getMCExprValue(CurrentProgramInfo.ScratchSize, Ctx), 16)); + CC, + AMDGPUMCExpr::createAlignTo(CurrentProgramInfo.ScratchSize, + MCConstantExpr::create(16, Ctx), Ctx), + Ctx); + if (MF.getFunction().getCallingConv() == CallingConv::AMDGPU_PS) { unsigned ExtraLDSSize = STM.getGeneration() >= AMDGPUSubtarget::GFX11 ? divideCeil(CurrentProgramInfo.LDSBlocks, 2) : CurrentProgramInfo.LDSBlocks; if (MD->getPALMajorVersion() < 3) { - MD->setRsrc2(CC, S_00B02C_EXTRA_LDS_SIZE(ExtraLDSSize)); + MD->setRsrc2( + CC, + MCConstantExpr::create(S_00B02C_EXTRA_LDS_SIZE(ExtraLDSSize), Ctx), + Ctx); MD->setSpiPsInputEna(MFI->getPSInputEnable()); MD->setSpiPsInputAddr(MFI->getPSInputAddr()); } else { @@ -1288,20 +1331,19 @@ void AMDGPUAsmPrinter::emitPALFunctionMetadata(const MachineFunction &MF) { if (MD->getPALMajorVersion() < 3) { // Set compute registers - MD->setRsrc1(CallingConv::AMDGPU_CS, - CurrentProgramInfo.getPGMRSrc1(CallingConv::AMDGPU_CS, ST)); + MD->setRsrc1( + CallingConv::AMDGPU_CS, + CurrentProgramInfo.getPGMRSrc1(CallingConv::AMDGPU_CS, ST, Ctx), Ctx); MD->setRsrc2(CallingConv::AMDGPU_CS, - CurrentProgramInfo.getComputePGMRSrc2()); + CurrentProgramInfo.getComputePGMRSrc2(Ctx), Ctx); } else { EmitPALMetadataCommon(MD, CurrentProgramInfo, CallingConv::AMDGPU_CS, ST); } // Set optional info MD->setFunctionLdsSize(FnName, CurrentProgramInfo.LDSSize); - MD->setFunctionNumUsedVgprs( - FnName, getMCExprValue(CurrentProgramInfo.NumVGPRsForWavesPerEU, Ctx)); - MD->setFunctionNumUsedSgprs( - FnName, getMCExprValue(CurrentProgramInfo.NumSGPRsForWavesPerEU, Ctx)); + MD->setFunctionNumUsedVgprs(FnName, CurrentProgramInfo.NumVGPRsForWavesPerEU); + MD->setFunctionNumUsedSgprs(FnName, CurrentProgramInfo.NumSGPRsForWavesPerEU); } // This is supposed to be log2(Size) @@ -1362,6 +1404,9 @@ void AMDGPUAsmPrinter::getAmdKernelCode(AMDGPUMCKernelCodeT &Out, if (UserSGPRInfo.hasFlatScratchInit()) Out.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT; + if (UserSGPRInfo.hasPrivateSegmentSize()) + Out.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE; + if (UserSGPRInfo.hasDispatchPtr()) Out.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR; @@ -1463,28 +1508,26 @@ void AMDGPUAsmPrinter::emitResourceUsageRemarks( // remarks to simulate newlines. If and when clang does accept newlines, this // formatting should be aggregated into one remark with newlines to avoid // printing multiple diagnostic location and diag opts. - MCContext &MCCtx = MF.getContext(); EmitResourceUsageRemark("FunctionName", "Function Name", MF.getFunction().getName()); EmitResourceUsageRemark("NumSGPR", "SGPRs", - getMCExprValue(CurrentProgramInfo.NumSGPR, MCCtx)); - EmitResourceUsageRemark( - "NumVGPR", "VGPRs", - getMCExprValue(CurrentProgramInfo.NumArchVGPR, MCCtx)); + getMCExprStr(CurrentProgramInfo.NumSGPR)); + EmitResourceUsageRemark("NumVGPR", "VGPRs", + getMCExprStr(CurrentProgramInfo.NumArchVGPR)); if (hasMAIInsts) { - EmitResourceUsageRemark( - "NumAGPR", "AGPRs", - getMCExprValue(CurrentProgramInfo.NumAccVGPR, MCCtx)); + EmitResourceUsageRemark("NumAGPR", "AGPRs", + getMCExprStr(CurrentProgramInfo.NumAccVGPR)); } - EmitResourceUsageRemark( - "ScratchSize", "ScratchSize [bytes/lane]", - getMCExprValue(CurrentProgramInfo.ScratchSize, MCCtx)); + EmitResourceUsageRemark("ScratchSize", "ScratchSize [bytes/lane]", + getMCExprStr(CurrentProgramInfo.ScratchSize)); + int64_t DynStack; + bool DynStackEvaluatable = + CurrentProgramInfo.DynamicCallStack->evaluateAsAbsolute(DynStack); StringRef DynamicStackStr = - getMCExprValue(CurrentProgramInfo.DynamicCallStack, MCCtx) ? "True" - : "False"; + DynStackEvaluatable && DynStack ? "True" : "False"; EmitResourceUsageRemark("DynamicStack", "Dynamic Stack", DynamicStackStr); EmitResourceUsageRemark("Occupancy", "Occupancy [waves/SIMD]", - getMCExprValue(CurrentProgramInfo.Occupancy, MCCtx)); + getMCExprStr(CurrentProgramInfo.Occupancy)); EmitResourceUsageRemark("SGPRSpill", "SGPRs Spill", CurrentProgramInfo.SGPRSpill); EmitResourceUsageRemark("VGPRSpill", "VGPRs Spill", |
