diff options
| author | Ivan Kosarev <ivan.kosarev@amd.com> | 2025-05-08 11:43:50 +0100 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2025-05-08 11:43:50 +0100 |
| commit | d9bdc2d6a2d3efcce81ecab151b393f19a81696b (patch) | |
| tree | 82721b32dcaba0468bdaf6a3357e9037a855f17d /llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp | |
| parent | 92cc31b0f7737408dffd38c2384dff825abb8e3a (diff) | |
[AMDGPU][Disassembler][NFCI] Always defer immediate operands. (#138885)
Removes the need to parameterise decoders with OperandSemantics,
ImmWidth and MandatoryLiteral.
Likely allows further simplification of handling _DEFERRED immediates.
Tested to work downstream.
Diffstat (limited to 'llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp')
| -rw-r--r-- | llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp | 228 |
1 files changed, 119 insertions, 109 deletions
diff --git a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp index 3fbba1715937..486c1defc332 100644 --- a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp +++ b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp @@ -45,6 +45,11 @@ using namespace llvm; using DecodeStatus = llvm::MCDisassembler::DecodeStatus; +static int64_t getInlineImmValF16(unsigned Imm); +static int64_t getInlineImmValBF16(unsigned Imm); +static int64_t getInlineImmVal32(unsigned Imm); +static int64_t getInlineImmVal64(unsigned Imm); + AMDGPUDisassembler::AMDGPUDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx, MCInstrInfo const *MCII) : MCDisassembler(STI, Ctx), MCII(MCII), MRI(*Ctx.getRegisterInfo()), @@ -151,36 +156,31 @@ static DecodeStatus decodeDpp8FI(MCInst &Inst, unsigned Val, uint64_t Addr, Inst, DAsm->createRegOperand(AMDGPU::RegClass##RegClassID, Imm)); \ } -#define DECODE_SrcOp(Name, EncSize, OpWidth, EncImm, MandatoryLiteral, \ - ImmWidth) \ +#define DECODE_SrcOp(Name, EncSize, OpWidth, EncImm) \ static DecodeStatus Name(MCInst &Inst, unsigned Imm, uint64_t /*Addr*/, \ const MCDisassembler *Decoder) { \ assert(Imm < (1 << EncSize) && #EncSize "-bit encoding"); \ auto DAsm = static_cast<const AMDGPUDisassembler *>(Decoder); \ return addOperand(Inst, \ - DAsm->decodeSrcOp(AMDGPUDisassembler::OpWidth, EncImm, \ - MandatoryLiteral, ImmWidth)); \ + DAsm->decodeSrcOp(AMDGPUDisassembler::OpWidth, EncImm)); \ } static DecodeStatus decodeSrcOp(MCInst &Inst, unsigned EncSize, AMDGPUDisassembler::OpWidthTy OpWidth, unsigned Imm, unsigned EncImm, - bool MandatoryLiteral, unsigned ImmWidth, - AMDGPU::OperandSemantics Sema, const MCDisassembler *Decoder) { assert(Imm < (1U << EncSize) && "Operand doesn't fit encoding!"); const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder); - return addOperand(Inst, DAsm->decodeSrcOp(OpWidth, EncImm, MandatoryLiteral, - ImmWidth, Sema)); + return addOperand(Inst, DAsm->decodeSrcOp(OpWidth, EncImm)); } // Decoder for registers. Imm(7-bit) is number of register, uses decodeSrcOp to // get register class. Used by SGPR only operands. #define DECODE_OPERAND_SREG_7(RegClass, OpWidth) \ - DECODE_SrcOp(Decode##RegClass##RegisterClass, 7, OpWidth, Imm, false, 0) + DECODE_SrcOp(Decode##RegClass##RegisterClass, 7, OpWidth, Imm) #define DECODE_OPERAND_SREG_8(RegClass, OpWidth) \ - DECODE_SrcOp(Decode##RegClass##RegisterClass, 8, OpWidth, Imm, false, 0) + DECODE_SrcOp(Decode##RegClass##RegisterClass, 8, OpWidth, Imm) // Decoder for registers. Imm(10-bit): Imm{7-0} is number of register, // Imm{9} is acc(agpr or vgpr) Imm{8} should be 0 (see VOP3Pe_SMFMAC). @@ -190,7 +190,7 @@ template <AMDGPUDisassembler::OpWidthTy OpWidth> static DecodeStatus decodeAV10(MCInst &Inst, unsigned Imm, uint64_t /* Addr */, const MCDisassembler *Decoder) { return decodeSrcOp(Inst, 10, OpWidth, Imm, Imm | AMDGPU::EncValues::IS_VGPR, - false, 0, AMDGPU::OperandSemantics::INT, Decoder); + Decoder); } // Decoder for Src(9-bit encoding) registers only. @@ -198,8 +198,7 @@ template <AMDGPUDisassembler::OpWidthTy OpWidth> static DecodeStatus decodeSrcReg9(MCInst &Inst, unsigned Imm, uint64_t /* Addr */, const MCDisassembler *Decoder) { - return decodeSrcOp(Inst, 9, OpWidth, Imm, Imm, false, 0, - AMDGPU::OperandSemantics::INT, Decoder); + return decodeSrcOp(Inst, 9, OpWidth, Imm, Imm, Decoder); } // Decoder for Src(9-bit encoding) AGPR, register number encoded in 9bits, set @@ -208,8 +207,7 @@ static DecodeStatus decodeSrcReg9(MCInst &Inst, unsigned Imm, template <AMDGPUDisassembler::OpWidthTy OpWidth> static DecodeStatus decodeSrcA9(MCInst &Inst, unsigned Imm, uint64_t /* Addr */, const MCDisassembler *Decoder) { - return decodeSrcOp(Inst, 9, OpWidth, Imm, Imm | 512, false, 0, - AMDGPU::OperandSemantics::INT, Decoder); + return decodeSrcOp(Inst, 9, OpWidth, Imm, Imm | 512, Decoder); } // Decoder for 'enum10' from decodeSrcOp, Imm{0-8} is 9-bit Src encoding @@ -218,42 +216,35 @@ template <AMDGPUDisassembler::OpWidthTy OpWidth> static DecodeStatus decodeSrcAV10(MCInst &Inst, unsigned Imm, uint64_t /* Addr */, const MCDisassembler *Decoder) { - return decodeSrcOp(Inst, 10, OpWidth, Imm, Imm, false, 0, - AMDGPU::OperandSemantics::INT, Decoder); + return decodeSrcOp(Inst, 10, OpWidth, Imm, Imm, Decoder); } // Decoder for RegisterOperands using 9-bit Src encoding. Operand can be // register from RegClass or immediate. Registers that don't belong to RegClass // will be decoded and InstPrinter will report warning. Immediate will be -// decoded into constant of size ImmWidth, should match width of immediate used -// by OperandType (important for floating point types). -template <AMDGPUDisassembler::OpWidthTy OpWidth, unsigned ImmWidth, - unsigned OperandSemantics> +// decoded into constant matching the OperandType (important for floating point +// types). +template <AMDGPUDisassembler::OpWidthTy OpWidth> static DecodeStatus decodeSrcRegOrImm9(MCInst &Inst, unsigned Imm, uint64_t /* Addr */, const MCDisassembler *Decoder) { - return decodeSrcOp(Inst, 9, OpWidth, Imm, Imm, false, ImmWidth, - (AMDGPU::OperandSemantics)OperandSemantics, Decoder); + return decodeSrcOp(Inst, 9, OpWidth, Imm, Imm, Decoder); } // Decoder for Src(9-bit encoding) AGPR or immediate. Set Imm{9} to 1 (set acc) // and decode using 'enum10' from decodeSrcOp. -template <AMDGPUDisassembler::OpWidthTy OpWidth, unsigned ImmWidth, - unsigned OperandSemantics> +template <AMDGPUDisassembler::OpWidthTy OpWidth> static DecodeStatus decodeSrcRegOrImmA9(MCInst &Inst, unsigned Imm, uint64_t /* Addr */, const MCDisassembler *Decoder) { - return decodeSrcOp(Inst, 9, OpWidth, Imm, Imm | 512, false, ImmWidth, - (AMDGPU::OperandSemantics)OperandSemantics, Decoder); + return decodeSrcOp(Inst, 9, OpWidth, Imm, Imm | 512, Decoder); } -template <AMDGPUDisassembler::OpWidthTy OpWidth, unsigned ImmWidth, - unsigned OperandSemantics> +template <AMDGPUDisassembler::OpWidthTy OpWidth> static DecodeStatus decodeSrcRegOrImmDeferred9(MCInst &Inst, unsigned Imm, uint64_t /* Addr */, const MCDisassembler *Decoder) { - return decodeSrcOp(Inst, 9, OpWidth, Imm, Imm, true, ImmWidth, - (AMDGPU::OperandSemantics)OperandSemantics, Decoder); + return decodeSrcOp(Inst, 9, OpWidth, Imm, Imm, Decoder); } // Default decoders generated by tablegen: 'Decode<RegClass>RegisterClass' @@ -320,8 +311,7 @@ DecodeVGPR_16_Lo128RegisterClass(MCInst &Inst, unsigned Imm, uint64_t /*Addr*/, return addOperand(Inst, DAsm->createVGPR16Operand(RegIdx, IsHi)); } -template <AMDGPUDisassembler::OpWidthTy OpWidth, unsigned ImmWidth, - unsigned OperandSemantics> +template <AMDGPUDisassembler::OpWidthTy OpWidth> static DecodeStatus decodeOperand_VSrcT16_Lo128(MCInst &Inst, unsigned Imm, uint64_t /*Addr*/, const MCDisassembler *Decoder) { @@ -333,13 +323,10 @@ static DecodeStatus decodeOperand_VSrcT16_Lo128(MCInst &Inst, unsigned Imm, unsigned RegIdx = Imm & 0x7f; return addOperand(Inst, DAsm->createVGPR16Operand(RegIdx, IsHi)); } - return addOperand(Inst, DAsm->decodeNonVGPRSrcOp( - OpWidth, Imm & 0xFF, false, ImmWidth, - (AMDGPU::OperandSemantics)OperandSemantics)); + return addOperand(Inst, DAsm->decodeNonVGPRSrcOp(OpWidth, Imm & 0xFF)); } -template <AMDGPUDisassembler::OpWidthTy OpWidth, unsigned ImmWidth, - unsigned OperandSemantics> +template <AMDGPUDisassembler::OpWidthTy OpWidth> static DecodeStatus decodeOperand_VSrcT16_Lo128_Deferred(MCInst &Inst, unsigned Imm, uint64_t /*Addr*/, @@ -352,13 +339,10 @@ decodeOperand_VSrcT16_Lo128_Deferred(MCInst &Inst, unsigned Imm, unsigned RegIdx = Imm & 0x7f; return addOperand(Inst, DAsm->createVGPR16Operand(RegIdx, IsHi)); } - return addOperand(Inst, DAsm->decodeNonVGPRSrcOp( - OpWidth, Imm & 0xFF, true, ImmWidth, - (AMDGPU::OperandSemantics)OperandSemantics)); + return addOperand(Inst, DAsm->decodeNonVGPRSrcOp(OpWidth, Imm & 0xFF)); } -template <AMDGPUDisassembler::OpWidthTy OpWidth, unsigned ImmWidth, - unsigned OperandSemantics> +template <AMDGPUDisassembler::OpWidthTy OpWidth> static DecodeStatus decodeOperand_VSrcT16(MCInst &Inst, unsigned Imm, uint64_t /*Addr*/, const MCDisassembler *Decoder) { @@ -370,9 +354,7 @@ static DecodeStatus decodeOperand_VSrcT16(MCInst &Inst, unsigned Imm, unsigned RegIdx = Imm & 0xff; return addOperand(Inst, DAsm->createVGPR16Operand(RegIdx, IsHi)); } - return addOperand(Inst, DAsm->decodeNonVGPRSrcOp( - OpWidth, Imm & 0xFF, false, ImmWidth, - (AMDGPU::OperandSemantics)OperandSemantics)); + return addOperand(Inst, DAsm->decodeNonVGPRSrcOp(OpWidth, Imm & 0xFF)); } static DecodeStatus decodeOperand_VGPR_16(MCInst &Inst, unsigned Imm, @@ -462,9 +444,7 @@ static DecodeStatus decodeOperand_VSrc_f64(MCInst &Inst, unsigned Imm, const MCDisassembler *Decoder) { assert(Imm < (1 << 9) && "9-bit encoding"); const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder); - return addOperand(Inst, - DAsm->decodeSrcOp(AMDGPUDisassembler::OPW64, Imm, false, 64, - AMDGPU::OperandSemantics::FP64)); + return addOperand(Inst, DAsm->decodeSrcOp(AMDGPUDisassembler::OPW64, Imm)); } #define DECODE_SDWA(DecName) \ @@ -557,6 +537,78 @@ static inline DecoderUInt128 eat16Bytes(ArrayRef<uint8_t> &Bytes) { return DecoderUInt128(Lo, Hi); } +void AMDGPUDisassembler::decodeImmOperands(MCInst &MI, + const MCInstrInfo &MCII) const { + const MCInstrDesc &Desc = MCII.get(MI.getOpcode()); + for (auto [OpNo, OpDesc] : enumerate(Desc.operands())) { + if (OpNo >= MI.getNumOperands()) + continue; + + // TODO: Fix V_DUAL_FMAMK_F32_X_FMAAK_F32_gfx12 vsrc operands, + // defined to take VGPR_32, but in reality allowing inline constants. + bool IsSrc = AMDGPU::OPERAND_SRC_FIRST <= OpDesc.OperandType && + OpDesc.OperandType <= AMDGPU::OPERAND_SRC_LAST; + if (!IsSrc && OpDesc.OperandType != MCOI::OPERAND_REGISTER) + continue; + + MCOperand &Op = MI.getOperand(OpNo); + if (!Op.isImm()) + continue; + int64_t Imm = Op.getImm(); + if (AMDGPU::EncValues::INLINE_INTEGER_C_MIN <= Imm && + Imm <= AMDGPU::EncValues::INLINE_INTEGER_C_MAX) { + Op = decodeIntImmed(Imm); + continue; + } + + if (Imm == AMDGPU::EncValues::LITERAL_CONST) { + switch (OpDesc.OperandType) { + case AMDGPU::OPERAND_REG_IMM_BF16_DEFERRED: + case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED: + case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED: + Op = MCOperand::createImm(AMDGPU::EncValues::LITERAL_CONST); + continue; + default: + Op = decodeLiteralConstant(OpDesc.OperandType == + AMDGPU::OPERAND_REG_IMM_FP64); + continue; + } + } + + if (AMDGPU::EncValues::INLINE_FLOATING_C_MIN <= Imm && + Imm <= AMDGPU::EncValues::INLINE_FLOATING_C_MAX) { + switch (OpDesc.OperandType) { + case AMDGPU::OPERAND_REG_IMM_BF16: + case AMDGPU::OPERAND_REG_IMM_BF16_DEFERRED: + case AMDGPU::OPERAND_REG_IMM_V2BF16: + case AMDGPU::OPERAND_REG_INLINE_C_BF16: + case AMDGPU::OPERAND_REG_INLINE_C_V2BF16: + Imm = getInlineImmValBF16(Imm); + break; + case AMDGPU::OPERAND_REG_IMM_FP16: + case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED: + case AMDGPU::OPERAND_REG_IMM_INT16: + case AMDGPU::OPERAND_REG_IMM_V2FP16: + case AMDGPU::OPERAND_REG_INLINE_C_FP16: + case AMDGPU::OPERAND_REG_INLINE_C_INT16: + case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: + Imm = getInlineImmValF16(Imm); + break; + case AMDGPU::OPERAND_REG_IMM_FP64: + case AMDGPU::OPERAND_REG_IMM_INT64: + case AMDGPU::OPERAND_REG_INLINE_AC_FP64: + case AMDGPU::OPERAND_REG_INLINE_C_FP64: + case AMDGPU::OPERAND_REG_INLINE_C_INT64: + Imm = getInlineImmVal64(Imm); + break; + default: + Imm = getInlineImmVal32(Imm); + } + Op.setImm(Imm); + } + } +} + DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size, ArrayRef<uint8_t> Bytes_, uint64_t Address, @@ -711,6 +763,8 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size, DecodeStatus Status = MCDisassembler::Success; + decodeImmOperands(MI, *MCII); + if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::DPP) { if (isMacDPP(MI)) convertMacDPPInst(MI); @@ -1575,33 +1629,6 @@ static int64_t getInlineImmValBF16(unsigned Imm) { } } -static int64_t getInlineImmVal16(unsigned Imm, AMDGPU::OperandSemantics Sema) { - return (Sema == AMDGPU::OperandSemantics::BF16) ? getInlineImmValBF16(Imm) - : getInlineImmValF16(Imm); -} - -MCOperand AMDGPUDisassembler::decodeFPImmed(unsigned ImmWidth, unsigned Imm, - AMDGPU::OperandSemantics Sema) { - assert(Imm >= AMDGPU::EncValues::INLINE_FLOATING_C_MIN && - Imm <= AMDGPU::EncValues::INLINE_FLOATING_C_MAX); - - // ToDo: case 248: 1/(2*PI) - is allowed only on VI - // ImmWidth 0 is a default case where operand should not allow immediates. - // Imm value is still decoded into 32 bit immediate operand, inst printer will - // use it to print verbose error message. - switch (ImmWidth) { - case 0: - case 32: - return MCOperand::createImm(getInlineImmVal32(Imm)); - case 64: - return MCOperand::createImm(getInlineImmVal64(Imm)); - case 16: - return MCOperand::createImm(getInlineImmVal16(Imm, Sema)); - default: - llvm_unreachable("implement me"); - } -} - unsigned AMDGPUDisassembler::getVgprClassId(const OpWidthTy Width) const { using namespace AMDGPU; @@ -1709,10 +1736,8 @@ int AMDGPUDisassembler::getTTmpIdx(unsigned Val) const { return (TTmpMin <= Val && Val <= TTmpMax)? Val - TTmpMin : -1; } -MCOperand AMDGPUDisassembler::decodeSrcOp(const OpWidthTy Width, unsigned Val, - bool MandatoryLiteral, - unsigned ImmWidth, - AMDGPU::OperandSemantics Sema) const { +MCOperand AMDGPUDisassembler::decodeSrcOp(const OpWidthTy Width, + unsigned Val) const { using namespace AMDGPU::EncValues; assert(Val < 1024); // enum10 @@ -1724,14 +1749,11 @@ MCOperand AMDGPUDisassembler::decodeSrcOp(const OpWidthTy Width, unsigned Val, return createRegOperand(IsAGPR ? getAgprClassId(Width) : getVgprClassId(Width), Val - VGPR_MIN); } - return decodeNonVGPRSrcOp(Width, Val & 0xFF, MandatoryLiteral, ImmWidth, - Sema); + return decodeNonVGPRSrcOp(Width, Val & 0xFF); } -MCOperand -AMDGPUDisassembler::decodeNonVGPRSrcOp(const OpWidthTy Width, unsigned Val, - bool MandatoryLiteral, unsigned ImmWidth, - AMDGPU::OperandSemantics Sema) const { +MCOperand AMDGPUDisassembler::decodeNonVGPRSrcOp(const OpWidthTy Width, + unsigned Val) const { // Cases when Val{8} is 1 (vgpr, agpr or true 16 vgpr) should have been // decoded earlier. assert(Val < (1 << 8) && "9-bit Src encoding when Val{8} is 0"); @@ -1748,18 +1770,10 @@ AMDGPUDisassembler::decodeNonVGPRSrcOp(const OpWidthTy Width, unsigned Val, return createSRegOperand(getTtmpClassId(Width), TTmpIdx); } - if (INLINE_INTEGER_C_MIN <= Val && Val <= INLINE_INTEGER_C_MAX) - return decodeIntImmed(Val); - - if (INLINE_FLOATING_C_MIN <= Val && Val <= INLINE_FLOATING_C_MAX) - return decodeFPImmed(ImmWidth, Val, Sema); - - if (Val == LITERAL_CONST) { - if (MandatoryLiteral) - // Keep a sentinel value for deferred setting - return MCOperand::createImm(LITERAL_CONST); - return decodeLiteralConstant(Sema == AMDGPU::OperandSemantics::FP64); - } + if ((INLINE_INTEGER_C_MIN <= Val && Val <= INLINE_INTEGER_C_MAX) || + (INLINE_FLOATING_C_MIN <= Val && Val <= INLINE_FLOATING_C_MAX) || + Val == LITERAL_CONST) + return MCOperand::createImm(Val); switch (Width) { case OPW32: @@ -1878,10 +1892,8 @@ MCOperand AMDGPUDisassembler::decodeSpecialReg96Plus(unsigned Val) const { return errOperand(Val, "unknown operand encoding " + Twine(Val)); } -MCOperand -AMDGPUDisassembler::decodeSDWASrc(const OpWidthTy Width, const unsigned Val, - unsigned ImmWidth, - AMDGPU::OperandSemantics Sema) const { +MCOperand AMDGPUDisassembler::decodeSDWASrc(const OpWidthTy Width, + const unsigned Val) const { using namespace AMDGPU::SDWA; using namespace AMDGPU::EncValues; @@ -1908,11 +1920,9 @@ AMDGPUDisassembler::decodeSDWASrc(const OpWidthTy Width, const unsigned Val, const unsigned SVal = Val - SDWA9EncValues::SRC_SGPR_MIN; - if (INLINE_INTEGER_C_MIN <= SVal && SVal <= INLINE_INTEGER_C_MAX) - return decodeIntImmed(SVal); - - if (INLINE_FLOATING_C_MIN <= SVal && SVal <= INLINE_FLOATING_C_MAX) - return decodeFPImmed(ImmWidth, SVal, Sema); + if ((INLINE_INTEGER_C_MIN <= SVal && SVal <= INLINE_INTEGER_C_MAX) || + (INLINE_FLOATING_C_MIN <= SVal && SVal <= INLINE_FLOATING_C_MAX)) + return MCOperand::createImm(SVal); return decodeSpecialReg32(SVal); } @@ -1922,11 +1932,11 @@ AMDGPUDisassembler::decodeSDWASrc(const OpWidthTy Width, const unsigned Val, } MCOperand AMDGPUDisassembler::decodeSDWASrc16(unsigned Val) const { - return decodeSDWASrc(OPW16, Val, 16, AMDGPU::OperandSemantics::FP16); + return decodeSDWASrc(OPW16, Val); } MCOperand AMDGPUDisassembler::decodeSDWASrc32(unsigned Val) const { - return decodeSDWASrc(OPW32, Val, 32, AMDGPU::OperandSemantics::FP32); + return decodeSDWASrc(OPW32, Val); } MCOperand AMDGPUDisassembler::decodeSDWAVopcDst(unsigned Val) const { |
