summaryrefslogtreecommitdiff
path: root/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp')
-rw-r--r--llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp96
1 files changed, 74 insertions, 22 deletions
diff --git a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
index 6a2beeed41df..6f6039bf4ec2 100644
--- a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
+++ b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
@@ -38,6 +38,7 @@
#include "llvm/Support/Compiler.h"
using namespace llvm;
+using namespace llvm::MCD;
#define DEBUG_TYPE "amdgpu-disassembler"
@@ -446,6 +447,14 @@ static DecodeStatus decodeVersionImm(MCInst &Inst, unsigned Imm,
#include "AMDGPUGenDisassemblerTables.inc"
+namespace {
+// Define bitwidths for various types used to instantiate the decoder.
+template <> constexpr uint32_t InsnBitWidth<uint32_t> = 32;
+template <> constexpr uint32_t InsnBitWidth<uint64_t> = 64;
+template <> constexpr uint32_t InsnBitWidth<std::bitset<96>> = 96;
+template <> constexpr uint32_t InsnBitWidth<std::bitset<128>> = 128;
+} // namespace
+
//===----------------------------------------------------------------------===//
//
//===----------------------------------------------------------------------===//
@@ -498,26 +507,24 @@ template <typename T> static inline T eatBytes(ArrayRef<uint8_t>& Bytes) {
return Res;
}
-static inline DecoderUInt128 eat12Bytes(ArrayRef<uint8_t> &Bytes) {
+static inline std::bitset<96> eat12Bytes(ArrayRef<uint8_t> &Bytes) {
+ using namespace llvm::support::endian;
assert(Bytes.size() >= 12);
- uint64_t Lo =
- support::endian::read<uint64_t, llvm::endianness::little>(Bytes.data());
+ std::bitset<96> Lo(read<uint64_t, endianness::little>(Bytes.data()));
Bytes = Bytes.slice(8);
- uint64_t Hi =
- support::endian::read<uint32_t, llvm::endianness::little>(Bytes.data());
+ std::bitset<96> Hi(read<uint32_t, endianness::little>(Bytes.data()));
Bytes = Bytes.slice(4);
- return DecoderUInt128(Lo, Hi);
+ return (Hi << 64) | Lo;
}
-static inline DecoderUInt128 eat16Bytes(ArrayRef<uint8_t> &Bytes) {
+static inline std::bitset<128> eat16Bytes(ArrayRef<uint8_t> &Bytes) {
+ using namespace llvm::support::endian;
assert(Bytes.size() >= 16);
- uint64_t Lo =
- support::endian::read<uint64_t, llvm::endianness::little>(Bytes.data());
+ std::bitset<128> Lo(read<uint64_t, endianness::little>(Bytes.data()));
Bytes = Bytes.slice(8);
- uint64_t Hi =
- support::endian::read<uint64_t, llvm::endianness::little>(Bytes.data());
+ std::bitset<128> Hi(read<uint64_t, endianness::little>(Bytes.data()));
Bytes = Bytes.slice(8);
- return DecoderUInt128(Lo, Hi);
+ return (Hi << 64) | Lo;
}
void AMDGPUDisassembler::decodeImmOperands(MCInst &MI,
@@ -600,14 +607,14 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
// Try to decode DPP and SDWA first to solve conflict with VOP1 and VOP2
// encodings
if (isGFX1250() && Bytes.size() >= 16) {
- DecoderUInt128 DecW = eat16Bytes(Bytes);
+ std::bitset<128> DecW = eat16Bytes(Bytes);
if (tryDecodeInst(DecoderTableGFX1250128, MI, DecW, Address, CS))
break;
Bytes = Bytes_.slice(0, MaxInstBytesNum);
}
if (isGFX11Plus() && Bytes.size() >= 12) {
- DecoderUInt128 DecW = eat12Bytes(Bytes);
+ std::bitset<96> DecW = eat12Bytes(Bytes);
if (isGFX11() &&
tryDecodeInst(DecoderTableGFX1196, DecoderTableGFX11_FAKE1696, MI,
@@ -642,7 +649,7 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
} else if (Bytes.size() >= 16 &&
STI.hasFeature(AMDGPU::FeatureGFX950Insts)) {
- DecoderUInt128 DecW = eat16Bytes(Bytes);
+ std::bitset<128> DecW = eat16Bytes(Bytes);
if (tryDecodeInst(DecoderTableGFX940128, MI, DecW, Address, CS))
break;
@@ -836,6 +843,18 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
}
}
+ // Validate buffer instruction offsets for GFX12+ - must not be a negative.
+ if (isGFX12Plus() && isBufferInstruction(MI)) {
+ int OffsetIdx =
+ AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::offset);
+ if (OffsetIdx != -1) {
+ uint32_t Imm = MI.getOperand(OffsetIdx).getImm();
+ int64_t SignedOffset = SignExtend64<24>(Imm);
+ if (SignedOffset < 0)
+ return MCDisassembler::Fail;
+ }
+ }
+
if (MCII->get(MI.getOpcode()).TSFlags &
(SIInstrFlags::MTBUF | SIInstrFlags::MUBUF)) {
int SWZOpIdx =
@@ -1216,6 +1235,26 @@ void AMDGPUDisassembler::convertVOP3DPPInst(MCInst &MI) const {
}
}
+// Given a wide tuple \p Reg check if it will overflow 256 registers.
+// \returns \p Reg on success or NoRegister otherwise.
+static unsigned CheckVGPROverflow(unsigned Reg, const MCRegisterClass &RC,
+ const MCRegisterInfo &MRI) {
+ unsigned NumRegs = RC.getSizeInBits() / 32;
+ MCRegister Sub0 = MRI.getSubReg(Reg, AMDGPU::sub0);
+ if (!Sub0)
+ return Reg;
+
+ MCRegister BaseReg;
+ if (MRI.getRegClass(AMDGPU::VGPR_32RegClassID).contains(Sub0))
+ BaseReg = AMDGPU::VGPR0;
+ else if (MRI.getRegClass(AMDGPU::AGPR_32RegClassID).contains(Sub0))
+ BaseReg = AMDGPU::AGPR0;
+
+ assert(BaseReg && "Only vector registers expected");
+
+ return (Sub0 - BaseReg + NumRegs <= 256) ? Reg : AMDGPU::NoRegister;
+}
+
// Note that before gfx10, the MIMG encoding provided no information about
// VADDR size. Consequently, decoded instructions always show address as if it
// has 1 dword, which could be not really so.
@@ -1320,8 +1359,9 @@ void AMDGPUDisassembler::convertMIMGInst(MCInst &MI) const {
MCRegister VdataSub0 = MRI.getSubReg(Vdata0, AMDGPU::sub0);
Vdata0 = (VdataSub0 != 0)? VdataSub0 : Vdata0;
- NewVdata = MRI.getMatchingSuperReg(Vdata0, AMDGPU::sub0,
- &MRI.getRegClass(DataRCID));
+ const MCRegisterClass &NewRC = MRI.getRegClass(DataRCID);
+ NewVdata = MRI.getMatchingSuperReg(Vdata0, AMDGPU::sub0, &NewRC);
+ NewVdata = CheckVGPROverflow(NewVdata, NewRC, MRI);
if (!NewVdata) {
// It's possible to encode this such that the low register + enabled
// components exceeds the register count.
@@ -1340,8 +1380,9 @@ void AMDGPUDisassembler::convertMIMGInst(MCInst &MI) const {
VAddrSA = VAddrSubSA ? VAddrSubSA : VAddrSA;
auto AddrRCID = MCII->get(NewOpcode).operands()[VAddrSAIdx].RegClass;
- NewVAddrSA = MRI.getMatchingSuperReg(VAddrSA, AMDGPU::sub0,
- &MRI.getRegClass(AddrRCID));
+ const MCRegisterClass &NewRC = MRI.getRegClass(AddrRCID);
+ NewVAddrSA = MRI.getMatchingSuperReg(VAddrSA, AMDGPU::sub0, &NewRC);
+ NewVAddrSA = CheckVGPROverflow(NewVAddrSA, NewRC, MRI);
if (!NewVAddrSA)
return;
}
@@ -2598,9 +2639,6 @@ Expected<bool> AMDGPUDisassembler::decodeKernelDescriptorDirective(
KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT);
PRINT_DIRECTIVE(".amdhsa_user_sgpr_private_segment_size",
KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE);
- if (isGFX1250())
- PRINT_DIRECTIVE(".amdhsa_uses_cu_stores",
- KERNEL_CODE_PROPERTY_USES_CU_STORES);
if (TwoByteBuffer & KERNEL_CODE_PROPERTY_RESERVED0)
return createReservedKDBitsError(KERNEL_CODE_PROPERTY_RESERVED0,
@@ -2743,6 +2781,20 @@ const MCExpr *AMDGPUDisassembler::createConstantSymbolExpr(StringRef Id,
return MCSymbolRefExpr::create(Sym, Ctx);
}
+bool AMDGPUDisassembler::isBufferInstruction(const MCInst &MI) const {
+ const uint64_t TSFlags = MCII->get(MI.getOpcode()).TSFlags;
+
+ // Check for MUBUF and MTBUF instructions
+ if (TSFlags & (SIInstrFlags::MTBUF | SIInstrFlags::MUBUF))
+ return true;
+
+ // Check for SMEM buffer instructions (S_BUFFER_* instructions)
+ if ((TSFlags & SIInstrFlags::SMRD) && AMDGPU::getSMEMIsBuffer(MI.getOpcode()))
+ return true;
+
+ return false;
+}
+
//===----------------------------------------------------------------------===//
// AMDGPUSymbolizer
//===----------------------------------------------------------------------===//