1444ed5c5SDimitry Andric //==- WebAssemblyDisassembler.cpp - Disassembler for WebAssembly -*- C++ -*-==//
2444ed5c5SDimitry Andric //
3444ed5c5SDimitry Andric // The LLVM Compiler Infrastructure
4444ed5c5SDimitry Andric //
5444ed5c5SDimitry Andric // This file is distributed under the University of Illinois Open Source
6444ed5c5SDimitry Andric // License. See LICENSE.TXT for details.
7444ed5c5SDimitry Andric //
8444ed5c5SDimitry Andric //===----------------------------------------------------------------------===//
9444ed5c5SDimitry Andric ///
10444ed5c5SDimitry Andric /// \file
114ba319b5SDimitry Andric /// This file is part of the WebAssembly Disassembler.
12444ed5c5SDimitry Andric ///
13444ed5c5SDimitry Andric /// It contains code to translate the data produced by the decoder into
14444ed5c5SDimitry Andric /// MCInsts.
15444ed5c5SDimitry Andric ///
16444ed5c5SDimitry Andric //===----------------------------------------------------------------------===//
17444ed5c5SDimitry Andric
18444ed5c5SDimitry Andric #include "MCTargetDesc/WebAssemblyMCTargetDesc.h"
19444ed5c5SDimitry Andric #include "llvm/MC/MCContext.h"
203ca95b02SDimitry Andric #include "llvm/MC/MCDisassembler/MCDisassembler.h"
214ba319b5SDimitry Andric #include "llvm/MC/MCFixedLenDisassembler.h"
22444ed5c5SDimitry Andric #include "llvm/MC/MCInst.h"
23444ed5c5SDimitry Andric #include "llvm/MC/MCInstrInfo.h"
24444ed5c5SDimitry Andric #include "llvm/MC/MCSubtargetInfo.h"
25444ed5c5SDimitry Andric #include "llvm/MC/MCSymbol.h"
26444ed5c5SDimitry Andric #include "llvm/Support/Endian.h"
274ba319b5SDimitry Andric #include "llvm/Support/LEB128.h"
28444ed5c5SDimitry Andric #include "llvm/Support/TargetRegistry.h"
294ba319b5SDimitry Andric
30444ed5c5SDimitry Andric using namespace llvm;
31444ed5c5SDimitry Andric
32444ed5c5SDimitry Andric #define DEBUG_TYPE "wasm-disassembler"
33444ed5c5SDimitry Andric
344ba319b5SDimitry Andric using DecodeStatus = MCDisassembler::DecodeStatus;
354ba319b5SDimitry Andric
364ba319b5SDimitry Andric #include "WebAssemblyGenDisassemblerTables.inc"
374ba319b5SDimitry Andric
38444ed5c5SDimitry Andric namespace {
39*b5893f02SDimitry Andric static constexpr int WebAssemblyInstructionTableSize = 256;
40*b5893f02SDimitry Andric
41444ed5c5SDimitry Andric class WebAssemblyDisassembler final : public MCDisassembler {
42444ed5c5SDimitry Andric std::unique_ptr<const MCInstrInfo> MCII;
43444ed5c5SDimitry Andric
44444ed5c5SDimitry Andric DecodeStatus getInstruction(MCInst &Instr, uint64_t &Size,
45444ed5c5SDimitry Andric ArrayRef<uint8_t> Bytes, uint64_t Address,
46444ed5c5SDimitry Andric raw_ostream &VStream,
47444ed5c5SDimitry Andric raw_ostream &CStream) const override;
48444ed5c5SDimitry Andric
49444ed5c5SDimitry Andric public:
WebAssemblyDisassembler(const MCSubtargetInfo & STI,MCContext & Ctx,std::unique_ptr<const MCInstrInfo> MCII)50444ed5c5SDimitry Andric WebAssemblyDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx,
51444ed5c5SDimitry Andric std::unique_ptr<const MCInstrInfo> MCII)
52444ed5c5SDimitry Andric : MCDisassembler(STI, Ctx), MCII(std::move(MCII)) {}
53444ed5c5SDimitry Andric };
54444ed5c5SDimitry Andric } // end anonymous namespace
55444ed5c5SDimitry Andric
createWebAssemblyDisassembler(const Target & T,const MCSubtargetInfo & STI,MCContext & Ctx)56444ed5c5SDimitry Andric static MCDisassembler *createWebAssemblyDisassembler(const Target &T,
57444ed5c5SDimitry Andric const MCSubtargetInfo &STI,
58444ed5c5SDimitry Andric MCContext &Ctx) {
59444ed5c5SDimitry Andric std::unique_ptr<const MCInstrInfo> MCII(T.createMCInstrInfo());
60444ed5c5SDimitry Andric return new WebAssemblyDisassembler(STI, Ctx, std::move(MCII));
61444ed5c5SDimitry Andric }
62444ed5c5SDimitry Andric
LLVMInitializeWebAssemblyDisassembler()63444ed5c5SDimitry Andric extern "C" void LLVMInitializeWebAssemblyDisassembler() {
64444ed5c5SDimitry Andric // Register the disassembler for each target.
65d88c1a5aSDimitry Andric TargetRegistry::RegisterMCDisassembler(getTheWebAssemblyTarget32(),
66444ed5c5SDimitry Andric createWebAssemblyDisassembler);
67d88c1a5aSDimitry Andric TargetRegistry::RegisterMCDisassembler(getTheWebAssemblyTarget64(),
68444ed5c5SDimitry Andric createWebAssemblyDisassembler);
69444ed5c5SDimitry Andric }
70444ed5c5SDimitry Andric
nextByte(ArrayRef<uint8_t> Bytes,uint64_t & Size)714ba319b5SDimitry Andric static int nextByte(ArrayRef<uint8_t> Bytes, uint64_t &Size) {
724ba319b5SDimitry Andric if (Size >= Bytes.size())
734ba319b5SDimitry Andric return -1;
744ba319b5SDimitry Andric auto V = Bytes[Size];
754ba319b5SDimitry Andric Size++;
764ba319b5SDimitry Andric return V;
774ba319b5SDimitry Andric }
784ba319b5SDimitry Andric
nextLEB(int64_t & Val,ArrayRef<uint8_t> Bytes,uint64_t & Size,bool Signed=false)79*b5893f02SDimitry Andric static bool nextLEB(int64_t &Val, ArrayRef<uint8_t> Bytes, uint64_t &Size,
80*b5893f02SDimitry Andric bool Signed = false) {
814ba319b5SDimitry Andric unsigned N = 0;
824ba319b5SDimitry Andric const char *Error = nullptr;
83*b5893f02SDimitry Andric Val = Signed ? decodeSLEB128(Bytes.data() + Size, &N,
844ba319b5SDimitry Andric Bytes.data() + Bytes.size(), &Error)
85*b5893f02SDimitry Andric : static_cast<int64_t>(decodeULEB128(Bytes.data() + Size, &N,
86*b5893f02SDimitry Andric Bytes.data() + Bytes.size(),
87*b5893f02SDimitry Andric &Error));
884ba319b5SDimitry Andric if (Error)
894ba319b5SDimitry Andric return false;
904ba319b5SDimitry Andric Size += N;
91*b5893f02SDimitry Andric return true;
92*b5893f02SDimitry Andric }
93*b5893f02SDimitry Andric
parseLEBImmediate(MCInst & MI,uint64_t & Size,ArrayRef<uint8_t> Bytes,bool Signed)94*b5893f02SDimitry Andric static bool parseLEBImmediate(MCInst &MI, uint64_t &Size,
95*b5893f02SDimitry Andric ArrayRef<uint8_t> Bytes, bool Signed) {
96*b5893f02SDimitry Andric int64_t Val;
97*b5893f02SDimitry Andric if (!nextLEB(Val, Bytes, Size, Signed))
98*b5893f02SDimitry Andric return false;
994ba319b5SDimitry Andric MI.addOperand(MCOperand::createImm(Val));
1004ba319b5SDimitry Andric return true;
1014ba319b5SDimitry Andric }
1024ba319b5SDimitry Andric
1034ba319b5SDimitry Andric template <typename T>
parseImmediate(MCInst & MI,uint64_t & Size,ArrayRef<uint8_t> Bytes)104*b5893f02SDimitry Andric bool parseImmediate(MCInst &MI, uint64_t &Size, ArrayRef<uint8_t> Bytes) {
1054ba319b5SDimitry Andric if (Size + sizeof(T) > Bytes.size())
1064ba319b5SDimitry Andric return false;
1074ba319b5SDimitry Andric T Val;
1084ba319b5SDimitry Andric memcpy(&Val, Bytes.data() + Size, sizeof(T));
1094ba319b5SDimitry Andric support::endian::byte_swap<T, support::endianness::little>(Val);
1104ba319b5SDimitry Andric Size += sizeof(T);
111*b5893f02SDimitry Andric if (std::is_floating_point<T>::value) {
1124ba319b5SDimitry Andric MI.addOperand(MCOperand::createFPImm(static_cast<double>(Val)));
113*b5893f02SDimitry Andric } else {
114*b5893f02SDimitry Andric MI.addOperand(MCOperand::createImm(static_cast<int64_t>(Val)));
115*b5893f02SDimitry Andric }
1164ba319b5SDimitry Andric return true;
1174ba319b5SDimitry Andric }
1184ba319b5SDimitry Andric
getInstruction(MCInst & MI,uint64_t & Size,ArrayRef<uint8_t> Bytes,uint64_t,raw_ostream &,raw_ostream & CS) const119444ed5c5SDimitry Andric MCDisassembler::DecodeStatus WebAssemblyDisassembler::getInstruction(
120444ed5c5SDimitry Andric MCInst &MI, uint64_t &Size, ArrayRef<uint8_t> Bytes, uint64_t /*Address*/,
1214ba319b5SDimitry Andric raw_ostream & /*OS*/, raw_ostream &CS) const {
1224ba319b5SDimitry Andric CommentStream = &CS;
1234ba319b5SDimitry Andric Size = 0;
124*b5893f02SDimitry Andric int Opc = nextByte(Bytes, Size);
1254ba319b5SDimitry Andric if (Opc < 0)
126444ed5c5SDimitry Andric return MCDisassembler::Fail;
1274ba319b5SDimitry Andric const auto *WasmInst = &InstructionTable0[Opc];
1284ba319b5SDimitry Andric // If this is a prefix byte, indirect to another table.
1294ba319b5SDimitry Andric if (WasmInst->ET == ET_Prefix) {
1304ba319b5SDimitry Andric WasmInst = nullptr;
1314ba319b5SDimitry Andric // Linear search, so far only 2 entries.
1324ba319b5SDimitry Andric for (auto PT = PrefixTable; PT->Table; PT++) {
1334ba319b5SDimitry Andric if (PT->Prefix == Opc) {
1344ba319b5SDimitry Andric WasmInst = PT->Table;
1354ba319b5SDimitry Andric break;
1364ba319b5SDimitry Andric }
1374ba319b5SDimitry Andric }
1384ba319b5SDimitry Andric if (!WasmInst)
1394ba319b5SDimitry Andric return MCDisassembler::Fail;
140*b5893f02SDimitry Andric int64_t PrefixedOpc;
141*b5893f02SDimitry Andric if (!nextLEB(PrefixedOpc, Bytes, Size))
1424ba319b5SDimitry Andric return MCDisassembler::Fail;
143*b5893f02SDimitry Andric if (PrefixedOpc < 0 || PrefixedOpc >= WebAssemblyInstructionTableSize)
144*b5893f02SDimitry Andric return MCDisassembler::Fail;
145*b5893f02SDimitry Andric WasmInst += PrefixedOpc;
1464ba319b5SDimitry Andric }
1474ba319b5SDimitry Andric if (WasmInst->ET == ET_Unused)
1484ba319b5SDimitry Andric return MCDisassembler::Fail;
1494ba319b5SDimitry Andric // At this point we must have a valid instruction to decode.
1504ba319b5SDimitry Andric assert(WasmInst->ET == ET_Instruction);
1514ba319b5SDimitry Andric MI.setOpcode(WasmInst->Opcode);
1524ba319b5SDimitry Andric // Parse any operands.
1534ba319b5SDimitry Andric for (uint8_t OPI = 0; OPI < WasmInst->NumOperands; OPI++) {
154*b5893f02SDimitry Andric auto OT = OperandTable[WasmInst->OperandStart + OPI];
155*b5893f02SDimitry Andric switch (OT) {
1564ba319b5SDimitry Andric // ULEB operands:
1574ba319b5SDimitry Andric case WebAssembly::OPERAND_BASIC_BLOCK:
1584ba319b5SDimitry Andric case WebAssembly::OPERAND_LOCAL:
1594ba319b5SDimitry Andric case WebAssembly::OPERAND_GLOBAL:
1604ba319b5SDimitry Andric case WebAssembly::OPERAND_FUNCTION32:
1614ba319b5SDimitry Andric case WebAssembly::OPERAND_OFFSET32:
1624ba319b5SDimitry Andric case WebAssembly::OPERAND_P2ALIGN:
1634ba319b5SDimitry Andric case WebAssembly::OPERAND_TYPEINDEX:
1644ba319b5SDimitry Andric case MCOI::OPERAND_IMMEDIATE: {
1654ba319b5SDimitry Andric if (!parseLEBImmediate(MI, Size, Bytes, false))
1664ba319b5SDimitry Andric return MCDisassembler::Fail;
1674ba319b5SDimitry Andric break;
1684ba319b5SDimitry Andric }
1694ba319b5SDimitry Andric // SLEB operands:
1704ba319b5SDimitry Andric case WebAssembly::OPERAND_I32IMM:
171*b5893f02SDimitry Andric case WebAssembly::OPERAND_I64IMM: {
1724ba319b5SDimitry Andric if (!parseLEBImmediate(MI, Size, Bytes, true))
1734ba319b5SDimitry Andric return MCDisassembler::Fail;
1744ba319b5SDimitry Andric break;
1754ba319b5SDimitry Andric }
176*b5893f02SDimitry Andric // block_type operands (uint8_t).
177*b5893f02SDimitry Andric case WebAssembly::OPERAND_SIGNATURE: {
178*b5893f02SDimitry Andric if (!parseImmediate<uint8_t>(MI, Size, Bytes))
179*b5893f02SDimitry Andric return MCDisassembler::Fail;
180*b5893f02SDimitry Andric break;
181*b5893f02SDimitry Andric }
1824ba319b5SDimitry Andric // FP operands.
1834ba319b5SDimitry Andric case WebAssembly::OPERAND_F32IMM: {
184*b5893f02SDimitry Andric if (!parseImmediate<float>(MI, Size, Bytes))
1854ba319b5SDimitry Andric return MCDisassembler::Fail;
1864ba319b5SDimitry Andric break;
1874ba319b5SDimitry Andric }
1884ba319b5SDimitry Andric case WebAssembly::OPERAND_F64IMM: {
189*b5893f02SDimitry Andric if (!parseImmediate<double>(MI, Size, Bytes))
1904ba319b5SDimitry Andric return MCDisassembler::Fail;
1914ba319b5SDimitry Andric break;
1924ba319b5SDimitry Andric }
193*b5893f02SDimitry Andric // Vector lane operands (not LEB encoded).
194*b5893f02SDimitry Andric case WebAssembly::OPERAND_VEC_I8IMM: {
195*b5893f02SDimitry Andric if (!parseImmediate<uint8_t>(MI, Size, Bytes))
196*b5893f02SDimitry Andric return MCDisassembler::Fail;
1974ba319b5SDimitry Andric break;
1984ba319b5SDimitry Andric }
199*b5893f02SDimitry Andric case WebAssembly::OPERAND_VEC_I16IMM: {
200*b5893f02SDimitry Andric if (!parseImmediate<uint16_t>(MI, Size, Bytes))
201*b5893f02SDimitry Andric return MCDisassembler::Fail;
202*b5893f02SDimitry Andric break;
203*b5893f02SDimitry Andric }
204*b5893f02SDimitry Andric case WebAssembly::OPERAND_VEC_I32IMM: {
205*b5893f02SDimitry Andric if (!parseImmediate<uint32_t>(MI, Size, Bytes))
206*b5893f02SDimitry Andric return MCDisassembler::Fail;
207*b5893f02SDimitry Andric break;
208*b5893f02SDimitry Andric }
209*b5893f02SDimitry Andric case WebAssembly::OPERAND_VEC_I64IMM: {
210*b5893f02SDimitry Andric if (!parseImmediate<uint64_t>(MI, Size, Bytes))
211*b5893f02SDimitry Andric return MCDisassembler::Fail;
212*b5893f02SDimitry Andric break;
213*b5893f02SDimitry Andric }
214*b5893f02SDimitry Andric case WebAssembly::OPERAND_BRLIST: {
215*b5893f02SDimitry Andric int64_t TargetTableLen;
216*b5893f02SDimitry Andric if (!nextLEB(TargetTableLen, Bytes, Size, false))
217*b5893f02SDimitry Andric return MCDisassembler::Fail;
218*b5893f02SDimitry Andric for (int64_t I = 0; I < TargetTableLen; I++) {
219*b5893f02SDimitry Andric if (!parseLEBImmediate(MI, Size, Bytes, false))
220*b5893f02SDimitry Andric return MCDisassembler::Fail;
221*b5893f02SDimitry Andric }
222*b5893f02SDimitry Andric // Default case.
223*b5893f02SDimitry Andric if (!parseLEBImmediate(MI, Size, Bytes, false))
224*b5893f02SDimitry Andric return MCDisassembler::Fail;
225*b5893f02SDimitry Andric break;
226*b5893f02SDimitry Andric }
227*b5893f02SDimitry Andric case MCOI::OPERAND_REGISTER:
228*b5893f02SDimitry Andric // The tablegen header currently does not have any register operands since
229*b5893f02SDimitry Andric // we use only the stack (_S) instructions.
230*b5893f02SDimitry Andric // If you hit this that probably means a bad instruction definition in
231*b5893f02SDimitry Andric // tablegen.
232*b5893f02SDimitry Andric llvm_unreachable("Register operand in WebAssemblyDisassembler");
2334ba319b5SDimitry Andric default:
2344ba319b5SDimitry Andric llvm_unreachable("Unknown operand type in WebAssemblyDisassembler");
2354ba319b5SDimitry Andric }
2364ba319b5SDimitry Andric }
2374ba319b5SDimitry Andric return MCDisassembler::Success;
238444ed5c5SDimitry Andric }
239