10b57cec5SDimitry Andric //==- WebAssemblyDisassembler.cpp - Disassembler for WebAssembly -*- C++ -*-==//
20b57cec5SDimitry Andric //
30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
60b57cec5SDimitry Andric //
70b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
80b57cec5SDimitry Andric ///
90b57cec5SDimitry Andric /// \file
100b57cec5SDimitry Andric /// This file is part of the WebAssembly Disassembler.
110b57cec5SDimitry Andric ///
120b57cec5SDimitry Andric /// It contains code to translate the data produced by the decoder into
130b57cec5SDimitry Andric /// MCInsts.
140b57cec5SDimitry Andric ///
150b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
160b57cec5SDimitry Andric
170b57cec5SDimitry Andric #include "TargetInfo/WebAssemblyTargetInfo.h"
18*5f7ddb14SDimitry Andric #include "Utils/WebAssemblyTypeUtilities.h"
190b57cec5SDimitry Andric #include "llvm/MC/MCContext.h"
200b57cec5SDimitry Andric #include "llvm/MC/MCDisassembler/MCDisassembler.h"
210b57cec5SDimitry Andric #include "llvm/MC/MCFixedLenDisassembler.h"
220b57cec5SDimitry Andric #include "llvm/MC/MCInst.h"
230b57cec5SDimitry Andric #include "llvm/MC/MCInstrInfo.h"
240b57cec5SDimitry Andric #include "llvm/MC/MCSubtargetInfo.h"
250b57cec5SDimitry Andric #include "llvm/MC/MCSymbol.h"
268bcb0991SDimitry Andric #include "llvm/MC/MCSymbolWasm.h"
270b57cec5SDimitry Andric #include "llvm/Support/Endian.h"
280b57cec5SDimitry Andric #include "llvm/Support/LEB128.h"
290b57cec5SDimitry Andric #include "llvm/Support/TargetRegistry.h"
300b57cec5SDimitry Andric
310b57cec5SDimitry Andric using namespace llvm;
320b57cec5SDimitry Andric
330b57cec5SDimitry Andric #define DEBUG_TYPE "wasm-disassembler"
340b57cec5SDimitry Andric
350b57cec5SDimitry Andric using DecodeStatus = MCDisassembler::DecodeStatus;
360b57cec5SDimitry Andric
370b57cec5SDimitry Andric #include "WebAssemblyGenDisassemblerTables.inc"
380b57cec5SDimitry Andric
390b57cec5SDimitry Andric namespace {
400b57cec5SDimitry Andric static constexpr int WebAssemblyInstructionTableSize = 256;
410b57cec5SDimitry Andric
420b57cec5SDimitry Andric class WebAssemblyDisassembler final : public MCDisassembler {
430b57cec5SDimitry Andric std::unique_ptr<const MCInstrInfo> MCII;
440b57cec5SDimitry Andric
450b57cec5SDimitry Andric DecodeStatus getInstruction(MCInst &Instr, uint64_t &Size,
460b57cec5SDimitry Andric ArrayRef<uint8_t> Bytes, uint64_t Address,
470b57cec5SDimitry Andric raw_ostream &CStream) const override;
485ffd83dbSDimitry Andric Optional<DecodeStatus> onSymbolStart(SymbolInfoTy &Symbol, uint64_t &Size,
495ffd83dbSDimitry Andric ArrayRef<uint8_t> Bytes,
505ffd83dbSDimitry Andric uint64_t Address,
510b57cec5SDimitry Andric raw_ostream &CStream) const override;
520b57cec5SDimitry Andric
530b57cec5SDimitry Andric public:
WebAssemblyDisassembler(const MCSubtargetInfo & STI,MCContext & Ctx,std::unique_ptr<const MCInstrInfo> MCII)540b57cec5SDimitry Andric WebAssemblyDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx,
550b57cec5SDimitry Andric std::unique_ptr<const MCInstrInfo> MCII)
560b57cec5SDimitry Andric : MCDisassembler(STI, Ctx), MCII(std::move(MCII)) {}
570b57cec5SDimitry Andric };
580b57cec5SDimitry Andric } // end anonymous namespace
590b57cec5SDimitry Andric
createWebAssemblyDisassembler(const Target & T,const MCSubtargetInfo & STI,MCContext & Ctx)600b57cec5SDimitry Andric static MCDisassembler *createWebAssemblyDisassembler(const Target &T,
610b57cec5SDimitry Andric const MCSubtargetInfo &STI,
620b57cec5SDimitry Andric MCContext &Ctx) {
630b57cec5SDimitry Andric std::unique_ptr<const MCInstrInfo> MCII(T.createMCInstrInfo());
640b57cec5SDimitry Andric return new WebAssemblyDisassembler(STI, Ctx, std::move(MCII));
650b57cec5SDimitry Andric }
660b57cec5SDimitry Andric
67480093f4SDimitry Andric extern "C" LLVM_EXTERNAL_VISIBILITY void
LLVMInitializeWebAssemblyDisassembler()68480093f4SDimitry Andric LLVMInitializeWebAssemblyDisassembler() {
690b57cec5SDimitry Andric // Register the disassembler for each target.
700b57cec5SDimitry Andric TargetRegistry::RegisterMCDisassembler(getTheWebAssemblyTarget32(),
710b57cec5SDimitry Andric createWebAssemblyDisassembler);
720b57cec5SDimitry Andric TargetRegistry::RegisterMCDisassembler(getTheWebAssemblyTarget64(),
730b57cec5SDimitry Andric createWebAssemblyDisassembler);
740b57cec5SDimitry Andric }
750b57cec5SDimitry Andric
nextByte(ArrayRef<uint8_t> Bytes,uint64_t & Size)760b57cec5SDimitry Andric static int nextByte(ArrayRef<uint8_t> Bytes, uint64_t &Size) {
770b57cec5SDimitry Andric if (Size >= Bytes.size())
780b57cec5SDimitry Andric return -1;
790b57cec5SDimitry Andric auto V = Bytes[Size];
800b57cec5SDimitry Andric Size++;
810b57cec5SDimitry Andric return V;
820b57cec5SDimitry Andric }
830b57cec5SDimitry Andric
nextLEB(int64_t & Val,ArrayRef<uint8_t> Bytes,uint64_t & Size,bool Signed)840b57cec5SDimitry Andric static bool nextLEB(int64_t &Val, ArrayRef<uint8_t> Bytes, uint64_t &Size,
850b57cec5SDimitry Andric bool Signed) {
860b57cec5SDimitry Andric unsigned N = 0;
870b57cec5SDimitry Andric const char *Error = nullptr;
880b57cec5SDimitry Andric Val = Signed ? decodeSLEB128(Bytes.data() + Size, &N,
890b57cec5SDimitry Andric Bytes.data() + Bytes.size(), &Error)
900b57cec5SDimitry Andric : static_cast<int64_t>(decodeULEB128(Bytes.data() + Size, &N,
910b57cec5SDimitry Andric Bytes.data() + Bytes.size(),
920b57cec5SDimitry Andric &Error));
930b57cec5SDimitry Andric if (Error)
940b57cec5SDimitry Andric return false;
950b57cec5SDimitry Andric Size += N;
960b57cec5SDimitry Andric return true;
970b57cec5SDimitry Andric }
980b57cec5SDimitry Andric
parseLEBImmediate(MCInst & MI,uint64_t & Size,ArrayRef<uint8_t> Bytes,bool Signed)990b57cec5SDimitry Andric static bool parseLEBImmediate(MCInst &MI, uint64_t &Size,
1000b57cec5SDimitry Andric ArrayRef<uint8_t> Bytes, bool Signed) {
1010b57cec5SDimitry Andric int64_t Val;
1020b57cec5SDimitry Andric if (!nextLEB(Val, Bytes, Size, Signed))
1030b57cec5SDimitry Andric return false;
1040b57cec5SDimitry Andric MI.addOperand(MCOperand::createImm(Val));
1050b57cec5SDimitry Andric return true;
1060b57cec5SDimitry Andric }
1070b57cec5SDimitry Andric
1080b57cec5SDimitry Andric template <typename T>
parseImmediate(MCInst & MI,uint64_t & Size,ArrayRef<uint8_t> Bytes)1090b57cec5SDimitry Andric bool parseImmediate(MCInst &MI, uint64_t &Size, ArrayRef<uint8_t> Bytes) {
1100b57cec5SDimitry Andric if (Size + sizeof(T) > Bytes.size())
1110b57cec5SDimitry Andric return false;
1120b57cec5SDimitry Andric T Val = support::endian::read<T, support::endianness::little, 1>(
1130b57cec5SDimitry Andric Bytes.data() + Size);
1140b57cec5SDimitry Andric Size += sizeof(T);
1150b57cec5SDimitry Andric if (std::is_floating_point<T>::value) {
116*5f7ddb14SDimitry Andric MI.addOperand(
117*5f7ddb14SDimitry Andric MCOperand::createDFPImm(bit_cast<uint64_t>(static_cast<double>(Val))));
1180b57cec5SDimitry Andric } else {
1190b57cec5SDimitry Andric MI.addOperand(MCOperand::createImm(static_cast<int64_t>(Val)));
1200b57cec5SDimitry Andric }
1210b57cec5SDimitry Andric return true;
1220b57cec5SDimitry Andric }
1230b57cec5SDimitry Andric
onSymbolStart(SymbolInfoTy & Symbol,uint64_t & Size,ArrayRef<uint8_t> Bytes,uint64_t Address,raw_ostream & CStream) const1245ffd83dbSDimitry Andric Optional<MCDisassembler::DecodeStatus> WebAssemblyDisassembler::onSymbolStart(
1255ffd83dbSDimitry Andric SymbolInfoTy &Symbol, uint64_t &Size, ArrayRef<uint8_t> Bytes,
1265ffd83dbSDimitry Andric uint64_t Address, raw_ostream &CStream) const {
1270b57cec5SDimitry Andric Size = 0;
1280b57cec5SDimitry Andric if (Address == 0) {
1290b57cec5SDimitry Andric // Start of a code section: we're parsing only the function count.
1300b57cec5SDimitry Andric int64_t FunctionCount;
1310b57cec5SDimitry Andric if (!nextLEB(FunctionCount, Bytes, Size, false))
1325ffd83dbSDimitry Andric return None;
1330b57cec5SDimitry Andric outs() << " # " << FunctionCount << " functions in section.";
1340b57cec5SDimitry Andric } else {
1350b57cec5SDimitry Andric // Parse the start of a single function.
1360b57cec5SDimitry Andric int64_t BodySize, LocalEntryCount;
1370b57cec5SDimitry Andric if (!nextLEB(BodySize, Bytes, Size, false) ||
1380b57cec5SDimitry Andric !nextLEB(LocalEntryCount, Bytes, Size, false))
1395ffd83dbSDimitry Andric return None;
1400b57cec5SDimitry Andric if (LocalEntryCount) {
1410b57cec5SDimitry Andric outs() << " .local ";
1420b57cec5SDimitry Andric for (int64_t I = 0; I < LocalEntryCount; I++) {
1430b57cec5SDimitry Andric int64_t Count, Type;
1440b57cec5SDimitry Andric if (!nextLEB(Count, Bytes, Size, false) ||
1450b57cec5SDimitry Andric !nextLEB(Type, Bytes, Size, false))
1465ffd83dbSDimitry Andric return None;
1470b57cec5SDimitry Andric for (int64_t J = 0; J < Count; J++) {
1480b57cec5SDimitry Andric if (I || J)
1490b57cec5SDimitry Andric outs() << ", ";
1500b57cec5SDimitry Andric outs() << WebAssembly::anyTypeToString(Type);
1510b57cec5SDimitry Andric }
1520b57cec5SDimitry Andric }
1530b57cec5SDimitry Andric }
1540b57cec5SDimitry Andric }
1550b57cec5SDimitry Andric outs() << "\n";
1560b57cec5SDimitry Andric return MCDisassembler::Success;
1570b57cec5SDimitry Andric }
1580b57cec5SDimitry Andric
getInstruction(MCInst & MI,uint64_t & Size,ArrayRef<uint8_t> Bytes,uint64_t,raw_ostream & CS) const1590b57cec5SDimitry Andric MCDisassembler::DecodeStatus WebAssemblyDisassembler::getInstruction(
1600b57cec5SDimitry Andric MCInst &MI, uint64_t &Size, ArrayRef<uint8_t> Bytes, uint64_t /*Address*/,
161480093f4SDimitry Andric raw_ostream &CS) const {
1620b57cec5SDimitry Andric CommentStream = &CS;
1630b57cec5SDimitry Andric Size = 0;
1640b57cec5SDimitry Andric int Opc = nextByte(Bytes, Size);
1650b57cec5SDimitry Andric if (Opc < 0)
1660b57cec5SDimitry Andric return MCDisassembler::Fail;
1670b57cec5SDimitry Andric const auto *WasmInst = &InstructionTable0[Opc];
1680b57cec5SDimitry Andric // If this is a prefix byte, indirect to another table.
1690b57cec5SDimitry Andric if (WasmInst->ET == ET_Prefix) {
1700b57cec5SDimitry Andric WasmInst = nullptr;
1710b57cec5SDimitry Andric // Linear search, so far only 2 entries.
1720b57cec5SDimitry Andric for (auto PT = PrefixTable; PT->Table; PT++) {
1730b57cec5SDimitry Andric if (PT->Prefix == Opc) {
1740b57cec5SDimitry Andric WasmInst = PT->Table;
1750b57cec5SDimitry Andric break;
1760b57cec5SDimitry Andric }
1770b57cec5SDimitry Andric }
1780b57cec5SDimitry Andric if (!WasmInst)
1790b57cec5SDimitry Andric return MCDisassembler::Fail;
1800b57cec5SDimitry Andric int64_t PrefixedOpc;
1810b57cec5SDimitry Andric if (!nextLEB(PrefixedOpc, Bytes, Size, false))
1820b57cec5SDimitry Andric return MCDisassembler::Fail;
1830b57cec5SDimitry Andric if (PrefixedOpc < 0 || PrefixedOpc >= WebAssemblyInstructionTableSize)
1840b57cec5SDimitry Andric return MCDisassembler::Fail;
1850b57cec5SDimitry Andric WasmInst += PrefixedOpc;
1860b57cec5SDimitry Andric }
1870b57cec5SDimitry Andric if (WasmInst->ET == ET_Unused)
1880b57cec5SDimitry Andric return MCDisassembler::Fail;
1890b57cec5SDimitry Andric // At this point we must have a valid instruction to decode.
1900b57cec5SDimitry Andric assert(WasmInst->ET == ET_Instruction);
1910b57cec5SDimitry Andric MI.setOpcode(WasmInst->Opcode);
1920b57cec5SDimitry Andric // Parse any operands.
1930b57cec5SDimitry Andric for (uint8_t OPI = 0; OPI < WasmInst->NumOperands; OPI++) {
1940b57cec5SDimitry Andric auto OT = OperandTable[WasmInst->OperandStart + OPI];
1950b57cec5SDimitry Andric switch (OT) {
1960b57cec5SDimitry Andric // ULEB operands:
1970b57cec5SDimitry Andric case WebAssembly::OPERAND_BASIC_BLOCK:
1980b57cec5SDimitry Andric case WebAssembly::OPERAND_LOCAL:
1990b57cec5SDimitry Andric case WebAssembly::OPERAND_GLOBAL:
2000b57cec5SDimitry Andric case WebAssembly::OPERAND_FUNCTION32:
201af732203SDimitry Andric case WebAssembly::OPERAND_TABLE:
2020b57cec5SDimitry Andric case WebAssembly::OPERAND_OFFSET32:
2035ffd83dbSDimitry Andric case WebAssembly::OPERAND_OFFSET64:
2040b57cec5SDimitry Andric case WebAssembly::OPERAND_P2ALIGN:
2050b57cec5SDimitry Andric case WebAssembly::OPERAND_TYPEINDEX:
206*5f7ddb14SDimitry Andric case WebAssembly::OPERAND_TAG:
2070b57cec5SDimitry Andric case MCOI::OPERAND_IMMEDIATE: {
2080b57cec5SDimitry Andric if (!parseLEBImmediate(MI, Size, Bytes, false))
2090b57cec5SDimitry Andric return MCDisassembler::Fail;
2100b57cec5SDimitry Andric break;
2110b57cec5SDimitry Andric }
2120b57cec5SDimitry Andric // SLEB operands:
2130b57cec5SDimitry Andric case WebAssembly::OPERAND_I32IMM:
2140b57cec5SDimitry Andric case WebAssembly::OPERAND_I64IMM: {
2150b57cec5SDimitry Andric if (!parseLEBImmediate(MI, Size, Bytes, true))
2160b57cec5SDimitry Andric return MCDisassembler::Fail;
2170b57cec5SDimitry Andric break;
2180b57cec5SDimitry Andric }
2198bcb0991SDimitry Andric // block_type operands:
2200b57cec5SDimitry Andric case WebAssembly::OPERAND_SIGNATURE: {
2218bcb0991SDimitry Andric int64_t Val;
2228bcb0991SDimitry Andric uint64_t PrevSize = Size;
2238bcb0991SDimitry Andric if (!nextLEB(Val, Bytes, Size, true))
2240b57cec5SDimitry Andric return MCDisassembler::Fail;
2258bcb0991SDimitry Andric if (Val < 0) {
2268bcb0991SDimitry Andric // Negative values are single septet value types or empty types
2278bcb0991SDimitry Andric if (Size != PrevSize + 1) {
2288bcb0991SDimitry Andric MI.addOperand(
2298bcb0991SDimitry Andric MCOperand::createImm(int64_t(WebAssembly::BlockType::Invalid)));
2308bcb0991SDimitry Andric } else {
2318bcb0991SDimitry Andric MI.addOperand(MCOperand::createImm(Val & 0x7f));
2328bcb0991SDimitry Andric }
2338bcb0991SDimitry Andric } else {
2348bcb0991SDimitry Andric // We don't have access to the signature, so create a symbol without one
2358bcb0991SDimitry Andric MCSymbol *Sym = getContext().createTempSymbol("typeindex", true);
2368bcb0991SDimitry Andric auto *WasmSym = cast<MCSymbolWasm>(Sym);
2378bcb0991SDimitry Andric WasmSym->setType(wasm::WASM_SYMBOL_TYPE_FUNCTION);
2388bcb0991SDimitry Andric const MCExpr *Expr = MCSymbolRefExpr::create(
2398bcb0991SDimitry Andric WasmSym, MCSymbolRefExpr::VK_WASM_TYPEINDEX, getContext());
2408bcb0991SDimitry Andric MI.addOperand(MCOperand::createExpr(Expr));
2418bcb0991SDimitry Andric }
2420b57cec5SDimitry Andric break;
2430b57cec5SDimitry Andric }
244af732203SDimitry Andric // heap_type operands, for e.g. ref.null:
245af732203SDimitry Andric case WebAssembly::OPERAND_HEAPTYPE: {
246af732203SDimitry Andric int64_t Val;
247af732203SDimitry Andric uint64_t PrevSize = Size;
248af732203SDimitry Andric if (!nextLEB(Val, Bytes, Size, true))
249af732203SDimitry Andric return MCDisassembler::Fail;
250af732203SDimitry Andric if (Val < 0 && Size == PrevSize + 1) {
251af732203SDimitry Andric // The HeapType encoding is like BlockType, in that encodings that
252af732203SDimitry Andric // decode as negative values indicate ValTypes. In practice we expect
253af732203SDimitry Andric // either wasm::ValType::EXTERNREF or wasm::ValType::FUNCREF here.
254af732203SDimitry Andric //
255af732203SDimitry Andric // The positive SLEB values are reserved for future expansion and are
256af732203SDimitry Andric // expected to be type indices in the typed function references
257af732203SDimitry Andric // proposal, and should disassemble as MCSymbolRefExpr as in BlockType
258af732203SDimitry Andric // above.
259af732203SDimitry Andric MI.addOperand(MCOperand::createImm(Val & 0x7f));
260af732203SDimitry Andric } else {
261af732203SDimitry Andric MI.addOperand(
262af732203SDimitry Andric MCOperand::createImm(int64_t(WebAssembly::HeapType::Invalid)));
263af732203SDimitry Andric }
264af732203SDimitry Andric break;
265af732203SDimitry Andric }
2660b57cec5SDimitry Andric // FP operands.
2670b57cec5SDimitry Andric case WebAssembly::OPERAND_F32IMM: {
2680b57cec5SDimitry Andric if (!parseImmediate<float>(MI, Size, Bytes))
2690b57cec5SDimitry Andric return MCDisassembler::Fail;
2700b57cec5SDimitry Andric break;
2710b57cec5SDimitry Andric }
2720b57cec5SDimitry Andric case WebAssembly::OPERAND_F64IMM: {
2730b57cec5SDimitry Andric if (!parseImmediate<double>(MI, Size, Bytes))
2740b57cec5SDimitry Andric return MCDisassembler::Fail;
2750b57cec5SDimitry Andric break;
2760b57cec5SDimitry Andric }
2770b57cec5SDimitry Andric // Vector lane operands (not LEB encoded).
2780b57cec5SDimitry Andric case WebAssembly::OPERAND_VEC_I8IMM: {
2790b57cec5SDimitry Andric if (!parseImmediate<uint8_t>(MI, Size, Bytes))
2800b57cec5SDimitry Andric return MCDisassembler::Fail;
2810b57cec5SDimitry Andric break;
2820b57cec5SDimitry Andric }
2830b57cec5SDimitry Andric case WebAssembly::OPERAND_VEC_I16IMM: {
2840b57cec5SDimitry Andric if (!parseImmediate<uint16_t>(MI, Size, Bytes))
2850b57cec5SDimitry Andric return MCDisassembler::Fail;
2860b57cec5SDimitry Andric break;
2870b57cec5SDimitry Andric }
2880b57cec5SDimitry Andric case WebAssembly::OPERAND_VEC_I32IMM: {
2890b57cec5SDimitry Andric if (!parseImmediate<uint32_t>(MI, Size, Bytes))
2900b57cec5SDimitry Andric return MCDisassembler::Fail;
2910b57cec5SDimitry Andric break;
2920b57cec5SDimitry Andric }
2930b57cec5SDimitry Andric case WebAssembly::OPERAND_VEC_I64IMM: {
2940b57cec5SDimitry Andric if (!parseImmediate<uint64_t>(MI, Size, Bytes))
2950b57cec5SDimitry Andric return MCDisassembler::Fail;
2960b57cec5SDimitry Andric break;
2970b57cec5SDimitry Andric }
2980b57cec5SDimitry Andric case WebAssembly::OPERAND_BRLIST: {
2990b57cec5SDimitry Andric int64_t TargetTableLen;
3000b57cec5SDimitry Andric if (!nextLEB(TargetTableLen, Bytes, Size, false))
3010b57cec5SDimitry Andric return MCDisassembler::Fail;
3020b57cec5SDimitry Andric for (int64_t I = 0; I < TargetTableLen; I++) {
3030b57cec5SDimitry Andric if (!parseLEBImmediate(MI, Size, Bytes, false))
3040b57cec5SDimitry Andric return MCDisassembler::Fail;
3050b57cec5SDimitry Andric }
3060b57cec5SDimitry Andric // Default case.
3070b57cec5SDimitry Andric if (!parseLEBImmediate(MI, Size, Bytes, false))
3080b57cec5SDimitry Andric return MCDisassembler::Fail;
3090b57cec5SDimitry Andric break;
3100b57cec5SDimitry Andric }
3110b57cec5SDimitry Andric case MCOI::OPERAND_REGISTER:
3120b57cec5SDimitry Andric // The tablegen header currently does not have any register operands since
3130b57cec5SDimitry Andric // we use only the stack (_S) instructions.
3140b57cec5SDimitry Andric // If you hit this that probably means a bad instruction definition in
3150b57cec5SDimitry Andric // tablegen.
3160b57cec5SDimitry Andric llvm_unreachable("Register operand in WebAssemblyDisassembler");
3170b57cec5SDimitry Andric default:
3180b57cec5SDimitry Andric llvm_unreachable("Unknown operand type in WebAssemblyDisassembler");
3190b57cec5SDimitry Andric }
3200b57cec5SDimitry Andric }
3210b57cec5SDimitry Andric return MCDisassembler::Success;
3220b57cec5SDimitry Andric }
323