summaryrefslogtreecommitdiff
path: root/llvm/utils/TableGen/Common/VarLenCodeEmitterGen.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/utils/TableGen/Common/VarLenCodeEmitterGen.cpp')
-rw-r--r--llvm/utils/TableGen/Common/VarLenCodeEmitterGen.cpp526
1 files changed, 526 insertions, 0 deletions
diff --git a/llvm/utils/TableGen/Common/VarLenCodeEmitterGen.cpp b/llvm/utils/TableGen/Common/VarLenCodeEmitterGen.cpp
new file mode 100644
index 000000000000..4263d8f41715
--- /dev/null
+++ b/llvm/utils/TableGen/Common/VarLenCodeEmitterGen.cpp
@@ -0,0 +1,526 @@
+//===- VarLenCodeEmitterGen.cpp - CEG for variable-length insts -----------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// The CodeEmitterGen component for variable-length instructions.
+//
+// The basic CodeEmitterGen is almost exclusively designed for fixed-
+// length instructions. A good analogy for its encoding scheme is how printf
+// works: The (immutable) formatting string represent the fixed values in the
+// encoded instruction. Placeholders (i.e. %something), on the other hand,
+// represent encoding for instruction operands.
+// ```
+// printf("1101 %src 1001 %dst", <encoded value for operand `src`>,
+// <encoded value for operand `dst`>);
+// ```
+// VarLenCodeEmitterGen in this file provides an alternative encoding scheme
+// that works more like a C++ stream operator:
+// ```
+// OS << 0b1101;
+// if (Cond)
+// OS << OperandEncoding0;
+// OS << 0b1001 << OperandEncoding1;
+// ```
+// You are free to concatenate arbitrary types (and sizes) of encoding
+// fragments on any bit position, bringing more flexibilities on defining
+// encoding for variable-length instructions.
+//
+// In a more specific way, instruction encoding is represented by a DAG type
+// `Inst` field. Here is an example:
+// ```
+// dag Inst = (descend 0b1101, (operand "$src", 4), 0b1001,
+// (operand "$dst", 4));
+// ```
+// It represents the following instruction encoding:
+// ```
+// MSB LSB
+// 1101<encoding for operand src>1001<encoding for operand dst>
+// ```
+// For more details about DAG operators in the above snippet, please
+// refer to \file include/llvm/Target/Target.td.
+//
+// VarLenCodeEmitter will convert the above DAG into the same helper function
+// generated by CodeEmitter, `MCCodeEmitter::getBinaryCodeForInstr` (except
+// for few details).
+//
+//===----------------------------------------------------------------------===//
+
+#include "VarLenCodeEmitterGen.h"
+#include "CodeGenHwModes.h"
+#include "CodeGenInstruction.h"
+#include "CodeGenTarget.h"
+#include "InfoByHwMode.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/TableGen/Error.h"
+#include "llvm/TableGen/Record.h"
+
+#include <algorithm>
+
+using namespace llvm;
+
+namespace {
+
+class VarLenCodeEmitterGen {
+ RecordKeeper &Records;
+
+ // Representaton of alternative encodings used for HwModes.
+ using AltEncodingTy = int;
+ // Mode identifier when only one encoding is defined.
+ const AltEncodingTy Universal = -1;
+ // The set of alternative instruction encodings with a descriptive
+ // name suffix to improve readability of the generated code.
+ std::map<AltEncodingTy, std::string> Modes;
+
+ DenseMap<Record *, DenseMap<AltEncodingTy, VarLenInst>> VarLenInsts;
+
+ // Emit based values (i.e. fixed bits in the encoded instructions)
+ void emitInstructionBaseValues(
+ raw_ostream &OS,
+ ArrayRef<const CodeGenInstruction *> NumberedInstructions,
+ CodeGenTarget &Target, AltEncodingTy Mode);
+
+ std::string getInstructionCases(Record *R, CodeGenTarget &Target);
+ std::string getInstructionCaseForEncoding(Record *R, AltEncodingTy Mode,
+ const VarLenInst &VLI,
+ CodeGenTarget &Target, int I);
+
+public:
+ explicit VarLenCodeEmitterGen(RecordKeeper &R) : Records(R) {}
+
+ void run(raw_ostream &OS);
+};
+} // end anonymous namespace
+
+// Get the name of custom encoder or decoder, if there is any.
+// Returns `{encoder name, decoder name}`.
+static std::pair<StringRef, StringRef> getCustomCoders(ArrayRef<Init *> Args) {
+ std::pair<StringRef, StringRef> Result;
+ for (const auto *Arg : Args) {
+ const auto *DI = dyn_cast<DagInit>(Arg);
+ if (!DI)
+ continue;
+ const Init *Op = DI->getOperator();
+ if (!isa<DefInit>(Op))
+ continue;
+ // syntax: `(<encoder | decoder> "function name")`
+ StringRef OpName = cast<DefInit>(Op)->getDef()->getName();
+ if (OpName != "encoder" && OpName != "decoder")
+ continue;
+ if (!DI->getNumArgs() || !isa<StringInit>(DI->getArg(0)))
+ PrintFatalError("expected '" + OpName +
+ "' directive to be followed by a custom function name.");
+ StringRef FuncName = cast<StringInit>(DI->getArg(0))->getValue();
+ if (OpName == "encoder")
+ Result.first = FuncName;
+ else
+ Result.second = FuncName;
+ }
+ return Result;
+}
+
+VarLenInst::VarLenInst(const DagInit *DI, const RecordVal *TheDef)
+ : TheDef(TheDef), NumBits(0U), HasDynamicSegment(false) {
+ buildRec(DI);
+ for (const auto &S : Segments)
+ NumBits += S.BitWidth;
+}
+
+void VarLenInst::buildRec(const DagInit *DI) {
+ assert(TheDef && "The def record is nullptr ?");
+
+ std::string Op = DI->getOperator()->getAsString();
+
+ if (Op == "ascend" || Op == "descend") {
+ bool Reverse = Op == "descend";
+ int i = Reverse ? DI->getNumArgs() - 1 : 0;
+ int e = Reverse ? -1 : DI->getNumArgs();
+ int s = Reverse ? -1 : 1;
+ for (; i != e; i += s) {
+ const Init *Arg = DI->getArg(i);
+ if (const auto *BI = dyn_cast<BitsInit>(Arg)) {
+ if (!BI->isComplete())
+ PrintFatalError(TheDef->getLoc(),
+ "Expecting complete bits init in `" + Op + "`");
+ Segments.push_back({BI->getNumBits(), BI});
+ } else if (const auto *BI = dyn_cast<BitInit>(Arg)) {
+ if (!BI->isConcrete())
+ PrintFatalError(TheDef->getLoc(),
+ "Expecting concrete bit init in `" + Op + "`");
+ Segments.push_back({1, BI});
+ } else if (const auto *SubDI = dyn_cast<DagInit>(Arg)) {
+ buildRec(SubDI);
+ } else {
+ PrintFatalError(TheDef->getLoc(), "Unrecognized type of argument in `" +
+ Op + "`: " + Arg->getAsString());
+ }
+ }
+ } else if (Op == "operand") {
+ // (operand <operand name>, <# of bits>,
+ // [(encoder <custom encoder>)][, (decoder <custom decoder>)])
+ if (DI->getNumArgs() < 2)
+ PrintFatalError(TheDef->getLoc(),
+ "Expecting at least 2 arguments for `operand`");
+ HasDynamicSegment = true;
+ const Init *OperandName = DI->getArg(0), *NumBits = DI->getArg(1);
+ if (!isa<StringInit>(OperandName) || !isa<IntInit>(NumBits))
+ PrintFatalError(TheDef->getLoc(), "Invalid argument types for `operand`");
+
+ auto NumBitsVal = cast<IntInit>(NumBits)->getValue();
+ if (NumBitsVal <= 0)
+ PrintFatalError(TheDef->getLoc(), "Invalid number of bits for `operand`");
+
+ auto [CustomEncoder, CustomDecoder] =
+ getCustomCoders(DI->getArgs().slice(2));
+ Segments.push_back({static_cast<unsigned>(NumBitsVal), OperandName,
+ CustomEncoder, CustomDecoder});
+ } else if (Op == "slice") {
+ // (slice <operand name>, <high / low bit>, <low / high bit>,
+ // [(encoder <custom encoder>)][, (decoder <custom decoder>)])
+ if (DI->getNumArgs() < 3)
+ PrintFatalError(TheDef->getLoc(),
+ "Expecting at least 3 arguments for `slice`");
+ HasDynamicSegment = true;
+ Init *OperandName = DI->getArg(0), *HiBit = DI->getArg(1),
+ *LoBit = DI->getArg(2);
+ if (!isa<StringInit>(OperandName) || !isa<IntInit>(HiBit) ||
+ !isa<IntInit>(LoBit))
+ PrintFatalError(TheDef->getLoc(), "Invalid argument types for `slice`");
+
+ auto HiBitVal = cast<IntInit>(HiBit)->getValue(),
+ LoBitVal = cast<IntInit>(LoBit)->getValue();
+ if (HiBitVal < 0 || LoBitVal < 0)
+ PrintFatalError(TheDef->getLoc(), "Invalid bit range for `slice`");
+ bool NeedSwap = false;
+ unsigned NumBits = 0U;
+ if (HiBitVal < LoBitVal) {
+ NeedSwap = true;
+ NumBits = static_cast<unsigned>(LoBitVal - HiBitVal + 1);
+ } else {
+ NumBits = static_cast<unsigned>(HiBitVal - LoBitVal + 1);
+ }
+
+ auto [CustomEncoder, CustomDecoder] =
+ getCustomCoders(DI->getArgs().slice(3));
+
+ if (NeedSwap) {
+ // Normalization: Hi bit should always be the second argument.
+ Init *const NewArgs[] = {OperandName, LoBit, HiBit};
+ Segments.push_back({NumBits,
+ DagInit::get(DI->getOperator(), nullptr, NewArgs, {}),
+ CustomEncoder, CustomDecoder});
+ } else {
+ Segments.push_back({NumBits, DI, CustomEncoder, CustomDecoder});
+ }
+ }
+}
+
+void VarLenCodeEmitterGen::run(raw_ostream &OS) {
+ CodeGenTarget Target(Records);
+ auto Insts = Records.getAllDerivedDefinitions("Instruction");
+
+ auto NumberedInstructions = Target.getInstructionsByEnumValue();
+
+ for (const CodeGenInstruction *CGI : NumberedInstructions) {
+ Record *R = CGI->TheDef;
+ // Create the corresponding VarLenInst instance.
+ if (R->getValueAsString("Namespace") == "TargetOpcode" ||
+ R->getValueAsBit("isPseudo"))
+ continue;
+
+ // Setup alternative encodings according to HwModes
+ if (const RecordVal *RV = R->getValue("EncodingInfos")) {
+ if (auto *DI = dyn_cast_or_null<DefInit>(RV->getValue())) {
+ const CodeGenHwModes &HWM = Target.getHwModes();
+ EncodingInfoByHwMode EBM(DI->getDef(), HWM);
+ for (auto &KV : EBM) {
+ AltEncodingTy Mode = KV.first;
+ Modes.insert({Mode, "_" + HWM.getMode(Mode).Name.str()});
+ Record *EncodingDef = KV.second;
+ RecordVal *RV = EncodingDef->getValue("Inst");
+ DagInit *DI = cast<DagInit>(RV->getValue());
+ VarLenInsts[R].insert({Mode, VarLenInst(DI, RV)});
+ }
+ continue;
+ }
+ }
+ RecordVal *RV = R->getValue("Inst");
+ DagInit *DI = cast<DagInit>(RV->getValue());
+ VarLenInsts[R].insert({Universal, VarLenInst(DI, RV)});
+ }
+
+ if (Modes.empty())
+ Modes.insert({Universal, ""}); // Base case, skip suffix.
+
+ // Emit function declaration
+ OS << "void " << Target.getName()
+ << "MCCodeEmitter::getBinaryCodeForInstr(const MCInst &MI,\n"
+ << " SmallVectorImpl<MCFixup> &Fixups,\n"
+ << " APInt &Inst,\n"
+ << " APInt &Scratch,\n"
+ << " const MCSubtargetInfo &STI) const {\n";
+
+ // Emit instruction base values
+ for (const auto &Mode : Modes)
+ emitInstructionBaseValues(OS, NumberedInstructions, Target, Mode.first);
+
+ if (Modes.size() > 1) {
+ OS << " unsigned Mode = STI.getHwMode();\n";
+ }
+
+ for (const auto &Mode : Modes) {
+ // Emit helper function to retrieve base values.
+ OS << " auto getInstBits" << Mode.second
+ << " = [&](unsigned Opcode) -> APInt {\n"
+ << " unsigned NumBits = Index" << Mode.second << "[Opcode][0];\n"
+ << " if (!NumBits)\n"
+ << " return APInt::getZeroWidth();\n"
+ << " unsigned Idx = Index" << Mode.second << "[Opcode][1];\n"
+ << " ArrayRef<uint64_t> Data(&InstBits" << Mode.second << "[Idx], "
+ << "APInt::getNumWords(NumBits));\n"
+ << " return APInt(NumBits, Data);\n"
+ << " };\n";
+ }
+
+ // Map to accumulate all the cases.
+ std::map<std::string, std::vector<std::string>> CaseMap;
+
+ // Construct all cases statement for each opcode
+ for (Record *R : Insts) {
+ if (R->getValueAsString("Namespace") == "TargetOpcode" ||
+ R->getValueAsBit("isPseudo"))
+ continue;
+ std::string InstName =
+ (R->getValueAsString("Namespace") + "::" + R->getName()).str();
+ std::string Case = getInstructionCases(R, Target);
+
+ CaseMap[Case].push_back(std::move(InstName));
+ }
+
+ // Emit initial function code
+ OS << " const unsigned opcode = MI.getOpcode();\n"
+ << " switch (opcode) {\n";
+
+ // Emit each case statement
+ for (const auto &C : CaseMap) {
+ const std::string &Case = C.first;
+ const auto &InstList = C.second;
+
+ ListSeparator LS("\n");
+ for (const auto &InstName : InstList)
+ OS << LS << " case " << InstName << ":";
+
+ OS << " {\n";
+ OS << Case;
+ OS << " break;\n"
+ << " }\n";
+ }
+ // Default case: unhandled opcode
+ OS << " default:\n"
+ << " std::string msg;\n"
+ << " raw_string_ostream Msg(msg);\n"
+ << " Msg << \"Not supported instr: \" << MI;\n"
+ << " report_fatal_error(Msg.str().c_str());\n"
+ << " }\n";
+ OS << "}\n\n";
+}
+
+static void emitInstBits(raw_ostream &IS, raw_ostream &SS, const APInt &Bits,
+ unsigned &Index) {
+ if (!Bits.getNumWords()) {
+ IS.indent(4) << "{/*NumBits*/0, /*Index*/0},";
+ return;
+ }
+
+ IS.indent(4) << "{/*NumBits*/" << Bits.getBitWidth() << ", " << "/*Index*/"
+ << Index << "},";
+
+ SS.indent(4);
+ for (unsigned I = 0; I < Bits.getNumWords(); ++I, ++Index)
+ SS << "UINT64_C(" << utostr(Bits.getRawData()[I]) << "),";
+}
+
+void VarLenCodeEmitterGen::emitInstructionBaseValues(
+ raw_ostream &OS, ArrayRef<const CodeGenInstruction *> NumberedInstructions,
+ CodeGenTarget &Target, AltEncodingTy Mode) {
+ std::string IndexArray, StorageArray;
+ raw_string_ostream IS(IndexArray), SS(StorageArray);
+
+ IS << " static const unsigned Index" << Modes[Mode] << "[][2] = {\n";
+ SS << " static const uint64_t InstBits" << Modes[Mode] << "[] = {\n";
+
+ unsigned NumFixedValueWords = 0U;
+ for (const CodeGenInstruction *CGI : NumberedInstructions) {
+ Record *R = CGI->TheDef;
+
+ if (R->getValueAsString("Namespace") == "TargetOpcode" ||
+ R->getValueAsBit("isPseudo")) {
+ IS.indent(4) << "{/*NumBits*/0, /*Index*/0},\n";
+ continue;
+ }
+
+ const auto InstIt = VarLenInsts.find(R);
+ if (InstIt == VarLenInsts.end())
+ PrintFatalError(R, "VarLenInst not found for this record");
+ auto ModeIt = InstIt->second.find(Mode);
+ if (ModeIt == InstIt->second.end())
+ ModeIt = InstIt->second.find(Universal);
+ if (ModeIt == InstIt->second.end()) {
+ IS.indent(4) << "{/*NumBits*/0, /*Index*/0},\t" << "// " << R->getName()
+ << " no encoding\n";
+ continue;
+ }
+ const VarLenInst &VLI = ModeIt->second;
+ unsigned i = 0U, BitWidth = VLI.size();
+
+ // Start by filling in fixed values.
+ APInt Value(BitWidth, 0);
+ auto SI = VLI.begin(), SE = VLI.end();
+ // Scan through all the segments that have fixed-bits values.
+ while (i < BitWidth && SI != SE) {
+ unsigned SegmentNumBits = SI->BitWidth;
+ if (const auto *BI = dyn_cast<BitsInit>(SI->Value)) {
+ for (unsigned Idx = 0U; Idx != SegmentNumBits; ++Idx) {
+ auto *B = cast<BitInit>(BI->getBit(Idx));
+ Value.setBitVal(i + Idx, B->getValue());
+ }
+ }
+ if (const auto *BI = dyn_cast<BitInit>(SI->Value))
+ Value.setBitVal(i, BI->getValue());
+
+ i += SegmentNumBits;
+ ++SI;
+ }
+
+ emitInstBits(IS, SS, Value, NumFixedValueWords);
+ IS << '\t' << "// " << R->getName() << "\n";
+ if (Value.getNumWords())
+ SS << '\t' << "// " << R->getName() << "\n";
+ }
+ IS.indent(4) << "{/*NumBits*/0, /*Index*/0}\n };\n";
+ SS.indent(4) << "UINT64_C(0)\n };\n";
+
+ OS << IS.str() << SS.str();
+}
+
+std::string VarLenCodeEmitterGen::getInstructionCases(Record *R,
+ CodeGenTarget &Target) {
+ auto It = VarLenInsts.find(R);
+ if (It == VarLenInsts.end())
+ PrintFatalError(R, "Parsed encoding record not found");
+ const auto &Map = It->second;
+
+ // Is this instructions encoding universal (same for all modes)?
+ // Allways true if there is only one mode.
+ if (Map.size() == 1 && Map.begin()->first == Universal) {
+ // Universal, just pick the first mode.
+ AltEncodingTy Mode = Modes.begin()->first;
+ const auto &Encoding = Map.begin()->second;
+ return getInstructionCaseForEncoding(R, Mode, Encoding, Target, 6);
+ }
+
+ std::string Case;
+ Case += " switch (Mode) {\n";
+ Case += " default: llvm_unreachable(\"Unhandled Mode\");\n";
+ for (const auto &Mode : Modes) {
+ Case += " case " + itostr(Mode.first) + ": {\n";
+ const auto &It = Map.find(Mode.first);
+ if (It == Map.end()) {
+ Case +=
+ " llvm_unreachable(\"Undefined encoding in this mode\");\n";
+ } else {
+ Case +=
+ getInstructionCaseForEncoding(R, It->first, It->second, Target, 8);
+ }
+ Case += " break;\n";
+ Case += " }\n";
+ }
+ Case += " }\n";
+ return Case;
+}
+
+std::string VarLenCodeEmitterGen::getInstructionCaseForEncoding(
+ Record *R, AltEncodingTy Mode, const VarLenInst &VLI, CodeGenTarget &Target,
+ int I) {
+
+ CodeGenInstruction &CGI = Target.getInstruction(R);
+
+ std::string Case;
+ raw_string_ostream SS(Case);
+ // Populate based value.
+ SS.indent(I) << "Inst = getInstBits" << Modes[Mode] << "(opcode);\n";
+
+ // Process each segment in VLI.
+ size_t Offset = 0U;
+ unsigned HighScratchAccess = 0U;
+ for (const auto &ES : VLI) {
+ unsigned NumBits = ES.BitWidth;
+ const Init *Val = ES.Value;
+ // If it's a StringInit or DagInit, it's a reference to an operand
+ // or part of an operand.
+ if (isa<StringInit>(Val) || isa<DagInit>(Val)) {
+ StringRef OperandName;
+ unsigned LoBit = 0U;
+ if (const auto *SV = dyn_cast<StringInit>(Val)) {
+ OperandName = SV->getValue();
+ } else {
+ // Normalized: (slice <operand name>, <high bit>, <low bit>)
+ const auto *DV = cast<DagInit>(Val);
+ OperandName = cast<StringInit>(DV->getArg(0))->getValue();
+ LoBit = static_cast<unsigned>(cast<IntInit>(DV->getArg(2))->getValue());
+ }
+
+ auto OpIdx = CGI.Operands.ParseOperandName(OperandName);
+ unsigned FlatOpIdx = CGI.Operands.getFlattenedOperandNumber(OpIdx);
+ StringRef CustomEncoder =
+ CGI.Operands[OpIdx.first].EncoderMethodNames[OpIdx.second];
+ if (ES.CustomEncoder.size())
+ CustomEncoder = ES.CustomEncoder;
+
+ SS.indent(I) << "Scratch.clearAllBits();\n";
+ SS.indent(I) << "// op: " << OperandName.drop_front(1) << "\n";
+ if (CustomEncoder.empty())
+ SS.indent(I) << "getMachineOpValue(MI, MI.getOperand("
+ << utostr(FlatOpIdx) << ")";
+ else
+ SS.indent(I) << CustomEncoder << "(MI, /*OpIdx=*/" << utostr(FlatOpIdx);
+
+ SS << ", /*Pos=*/" << utostr(Offset) << ", Scratch, Fixups, STI);\n";
+
+ SS.indent(I) << "Inst.insertBits(" << "Scratch.extractBits("
+ << utostr(NumBits) << ", " << utostr(LoBit) << ")" << ", "
+ << Offset << ");\n";
+
+ HighScratchAccess = std::max(HighScratchAccess, NumBits + LoBit);
+ }
+ Offset += NumBits;
+ }
+
+ StringRef PostEmitter = R->getValueAsString("PostEncoderMethod");
+ if (!PostEmitter.empty())
+ SS.indent(I) << "Inst = " << PostEmitter << "(MI, Inst, STI);\n";
+
+ // Resize the scratch buffer if it's to small.
+ std::string ScratchResizeStr;
+ if (VLI.size() && !VLI.isFixedValueOnly()) {
+ raw_string_ostream RS(ScratchResizeStr);
+ RS.indent(I) << "if (Scratch.getBitWidth() < " << HighScratchAccess
+ << ") { Scratch = Scratch.zext(" << HighScratchAccess
+ << "); }\n";
+ }
+
+ return ScratchResizeStr + Case;
+}
+
+namespace llvm {
+
+void emitVarLenCodeEmitter(RecordKeeper &R, raw_ostream &OS) {
+ VarLenCodeEmitterGen(R).run(OS);
+}
+
+} // end namespace llvm