11a427287SDan Gohman //==- WebAssemblyDisassembler.cpp - Disassembler for WebAssembly -*- C++ -*-==// 21a427287SDan Gohman // 31a427287SDan Gohman // The LLVM Compiler Infrastructure 41a427287SDan Gohman // 51a427287SDan Gohman // This file is distributed under the University of Illinois Open Source 61a427287SDan Gohman // License. See LICENSE.TXT for details. 71a427287SDan Gohman // 81a427287SDan Gohman //===----------------------------------------------------------------------===// 91a427287SDan Gohman /// 101a427287SDan Gohman /// \file 115f8f34e4SAdrian Prantl /// This file is part of the WebAssembly Disassembler. 121a427287SDan Gohman /// 131a427287SDan Gohman /// It contains code to translate the data produced by the decoder into 141a427287SDan Gohman /// MCInsts. 151a427287SDan Gohman /// 161a427287SDan Gohman //===----------------------------------------------------------------------===// 171a427287SDan Gohman 18*f3b762a0SWouter van Oortmerssen #include "InstPrinter/WebAssemblyInstPrinter.h" 191a427287SDan Gohman #include "MCTargetDesc/WebAssemblyMCTargetDesc.h" 201a427287SDan Gohman #include "llvm/MC/MCContext.h" 21c50b8907SBenjamin Kramer #include "llvm/MC/MCDisassembler/MCDisassembler.h" 2216c16827SSam Clegg #include "llvm/MC/MCFixedLenDisassembler.h" 231a427287SDan Gohman #include "llvm/MC/MCInst.h" 241a427287SDan Gohman #include "llvm/MC/MCInstrInfo.h" 251a427287SDan Gohman #include "llvm/MC/MCSubtargetInfo.h" 261a427287SDan Gohman #include "llvm/MC/MCSymbol.h" 271a427287SDan Gohman #include "llvm/Support/Endian.h" 2816c16827SSam Clegg #include "llvm/Support/LEB128.h" 291a427287SDan Gohman #include "llvm/Support/TargetRegistry.h" 3016c16827SSam Clegg 311a427287SDan Gohman using namespace llvm; 321a427287SDan Gohman 331a427287SDan Gohman #define DEBUG_TYPE "wasm-disassembler" 341a427287SDan Gohman 3516c16827SSam Clegg using DecodeStatus = MCDisassembler::DecodeStatus; 3616c16827SSam Clegg 3716c16827SSam Clegg #include "WebAssemblyGenDisassemblerTables.inc" 3816c16827SSam Clegg 391a427287SDan Gohman namespace { 4049550663SFangrui Song static constexpr int WebAssemblyInstructionTableSize = 256; 4149550663SFangrui Song 421a427287SDan Gohman class WebAssemblyDisassembler final : public MCDisassembler { 431a427287SDan Gohman std::unique_ptr<const MCInstrInfo> MCII; 441a427287SDan Gohman 451a427287SDan Gohman DecodeStatus getInstruction(MCInst &Instr, uint64_t &Size, 461a427287SDan Gohman ArrayRef<uint8_t> Bytes, uint64_t Address, 471a427287SDan Gohman raw_ostream &VStream, 481a427287SDan Gohman raw_ostream &CStream) const override; 49*f3b762a0SWouter van Oortmerssen DecodeStatus onSymbolStart(StringRef Name, uint64_t &Size, 50*f3b762a0SWouter van Oortmerssen ArrayRef<uint8_t> Bytes, uint64_t Address, 51*f3b762a0SWouter van Oortmerssen raw_ostream &VStream, 52*f3b762a0SWouter van Oortmerssen raw_ostream &CStream) const override; 531a427287SDan Gohman 541a427287SDan Gohman public: 551a427287SDan Gohman WebAssemblyDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx, 561a427287SDan Gohman std::unique_ptr<const MCInstrInfo> MCII) 571a427287SDan Gohman : MCDisassembler(STI, Ctx), MCII(std::move(MCII)) {} 581a427287SDan Gohman }; 591a427287SDan Gohman } // end anonymous namespace 601a427287SDan Gohman 611a427287SDan Gohman static MCDisassembler *createWebAssemblyDisassembler(const Target &T, 621a427287SDan Gohman const MCSubtargetInfo &STI, 631a427287SDan Gohman MCContext &Ctx) { 641a427287SDan Gohman std::unique_ptr<const MCInstrInfo> MCII(T.createMCInstrInfo()); 651a427287SDan Gohman return new WebAssemblyDisassembler(STI, Ctx, std::move(MCII)); 661a427287SDan Gohman } 671a427287SDan Gohman 681a427287SDan Gohman extern "C" void LLVMInitializeWebAssemblyDisassembler() { 691a427287SDan Gohman // Register the disassembler for each target. 70f42454b9SMehdi Amini TargetRegistry::RegisterMCDisassembler(getTheWebAssemblyTarget32(), 711a427287SDan Gohman createWebAssemblyDisassembler); 72f42454b9SMehdi Amini TargetRegistry::RegisterMCDisassembler(getTheWebAssemblyTarget64(), 731a427287SDan Gohman createWebAssemblyDisassembler); 741a427287SDan Gohman } 751a427287SDan Gohman 76fc3163b6SThomas Lively static int nextByte(ArrayRef<uint8_t> Bytes, uint64_t &Size) { 7716c16827SSam Clegg if (Size >= Bytes.size()) 7816c16827SSam Clegg return -1; 7916c16827SSam Clegg auto V = Bytes[Size]; 8016c16827SSam Clegg Size++; 8116c16827SSam Clegg return V; 8216c16827SSam Clegg } 8316c16827SSam Clegg 842faf0794SThomas Lively static bool nextLEB(int64_t &Val, ArrayRef<uint8_t> Bytes, uint64_t &Size, 85*f3b762a0SWouter van Oortmerssen bool Signed) { 8616c16827SSam Clegg unsigned N = 0; 8716c16827SSam Clegg const char *Error = nullptr; 882faf0794SThomas Lively Val = Signed ? decodeSLEB128(Bytes.data() + Size, &N, 8916c16827SSam Clegg Bytes.data() + Bytes.size(), &Error) 902faf0794SThomas Lively : static_cast<int64_t>(decodeULEB128(Bytes.data() + Size, &N, 912faf0794SThomas Lively Bytes.data() + Bytes.size(), 922faf0794SThomas Lively &Error)); 9316c16827SSam Clegg if (Error) 9416c16827SSam Clegg return false; 9516c16827SSam Clegg Size += N; 962faf0794SThomas Lively return true; 972faf0794SThomas Lively } 982faf0794SThomas Lively 992faf0794SThomas Lively static bool parseLEBImmediate(MCInst &MI, uint64_t &Size, 1002faf0794SThomas Lively ArrayRef<uint8_t> Bytes, bool Signed) { 1012faf0794SThomas Lively int64_t Val; 1022faf0794SThomas Lively if (!nextLEB(Val, Bytes, Size, Signed)) 1032faf0794SThomas Lively return false; 10416c16827SSam Clegg MI.addOperand(MCOperand::createImm(Val)); 10516c16827SSam Clegg return true; 10616c16827SSam Clegg } 10716c16827SSam Clegg 10816c16827SSam Clegg template <typename T> 10922442924SThomas Lively bool parseImmediate(MCInst &MI, uint64_t &Size, ArrayRef<uint8_t> Bytes) { 11016c16827SSam Clegg if (Size + sizeof(T) > Bytes.size()) 11116c16827SSam Clegg return false; 11216c16827SSam Clegg T Val; 11316c16827SSam Clegg memcpy(&Val, Bytes.data() + Size, sizeof(T)); 11416c16827SSam Clegg support::endian::byte_swap<T, support::endianness::little>(Val); 11516c16827SSam Clegg Size += sizeof(T); 11622442924SThomas Lively if (std::is_floating_point<T>::value) { 11716c16827SSam Clegg MI.addOperand(MCOperand::createFPImm(static_cast<double>(Val))); 11822442924SThomas Lively } else { 11922442924SThomas Lively MI.addOperand(MCOperand::createImm(static_cast<int64_t>(Val))); 12022442924SThomas Lively } 12116c16827SSam Clegg return true; 12216c16827SSam Clegg } 12316c16827SSam Clegg 124*f3b762a0SWouter van Oortmerssen MCDisassembler::DecodeStatus WebAssemblyDisassembler::onSymbolStart( 125*f3b762a0SWouter van Oortmerssen StringRef Name, uint64_t &Size, ArrayRef<uint8_t> Bytes, uint64_t Address, 126*f3b762a0SWouter van Oortmerssen raw_ostream &VStream, raw_ostream &CStream) const { 127*f3b762a0SWouter van Oortmerssen Size = 0; 128*f3b762a0SWouter van Oortmerssen if (Address == 0) { 129*f3b762a0SWouter van Oortmerssen // Start of a code section: we're parsing only the function count. 130*f3b762a0SWouter van Oortmerssen int64_t FunctionCount; 131*f3b762a0SWouter van Oortmerssen if (!nextLEB(FunctionCount, Bytes, Size, false)) 132*f3b762a0SWouter van Oortmerssen return MCDisassembler::Fail; 133*f3b762a0SWouter van Oortmerssen outs() << " # " << FunctionCount << " functions in section."; 134*f3b762a0SWouter van Oortmerssen } else { 135*f3b762a0SWouter van Oortmerssen // Parse the start of a single function. 136*f3b762a0SWouter van Oortmerssen int64_t BodySize, LocalEntryCount; 137*f3b762a0SWouter van Oortmerssen if (!nextLEB(BodySize, Bytes, Size, false) || 138*f3b762a0SWouter van Oortmerssen !nextLEB(LocalEntryCount, Bytes, Size, false)) 139*f3b762a0SWouter van Oortmerssen return MCDisassembler::Fail; 140*f3b762a0SWouter van Oortmerssen if (LocalEntryCount) { 141*f3b762a0SWouter van Oortmerssen outs() << " .local "; 142*f3b762a0SWouter van Oortmerssen for (int64_t I = 0; I < LocalEntryCount; I++) { 143*f3b762a0SWouter van Oortmerssen int64_t Count, Type; 144*f3b762a0SWouter van Oortmerssen if (!nextLEB(Count, Bytes, Size, false) || 145*f3b762a0SWouter van Oortmerssen !nextLEB(Type, Bytes, Size, false)) 146*f3b762a0SWouter van Oortmerssen return MCDisassembler::Fail; 147*f3b762a0SWouter van Oortmerssen for (int64_t J = 0; J < Count; J++) { 148*f3b762a0SWouter van Oortmerssen if (I || J) 149*f3b762a0SWouter van Oortmerssen outs() << ", "; 150*f3b762a0SWouter van Oortmerssen outs() << WebAssembly::anyTypeToString(Type); 151*f3b762a0SWouter van Oortmerssen } 152*f3b762a0SWouter van Oortmerssen } 153*f3b762a0SWouter van Oortmerssen } 154*f3b762a0SWouter van Oortmerssen } 155*f3b762a0SWouter van Oortmerssen outs() << "\n"; 156*f3b762a0SWouter van Oortmerssen return MCDisassembler::Success; 157*f3b762a0SWouter van Oortmerssen } 158*f3b762a0SWouter van Oortmerssen 1591a427287SDan Gohman MCDisassembler::DecodeStatus WebAssemblyDisassembler::getInstruction( 1601a427287SDan Gohman MCInst &MI, uint64_t &Size, ArrayRef<uint8_t> Bytes, uint64_t /*Address*/, 16116c16827SSam Clegg raw_ostream & /*OS*/, raw_ostream &CS) const { 16216c16827SSam Clegg CommentStream = &CS; 16316c16827SSam Clegg Size = 0; 164fc3163b6SThomas Lively int Opc = nextByte(Bytes, Size); 16516c16827SSam Clegg if (Opc < 0) 1661a427287SDan Gohman return MCDisassembler::Fail; 16716c16827SSam Clegg const auto *WasmInst = &InstructionTable0[Opc]; 16816c16827SSam Clegg // If this is a prefix byte, indirect to another table. 16916c16827SSam Clegg if (WasmInst->ET == ET_Prefix) { 17016c16827SSam Clegg WasmInst = nullptr; 17116c16827SSam Clegg // Linear search, so far only 2 entries. 17216c16827SSam Clegg for (auto PT = PrefixTable; PT->Table; PT++) { 17316c16827SSam Clegg if (PT->Prefix == Opc) { 17416c16827SSam Clegg WasmInst = PT->Table; 17516c16827SSam Clegg break; 17616c16827SSam Clegg } 17716c16827SSam Clegg } 17816c16827SSam Clegg if (!WasmInst) 17916c16827SSam Clegg return MCDisassembler::Fail; 1802faf0794SThomas Lively int64_t PrefixedOpc; 181*f3b762a0SWouter van Oortmerssen if (!nextLEB(PrefixedOpc, Bytes, Size, false)) 18216c16827SSam Clegg return MCDisassembler::Fail; 1832faf0794SThomas Lively if (PrefixedOpc < 0 || PrefixedOpc >= WebAssemblyInstructionTableSize) 1842faf0794SThomas Lively return MCDisassembler::Fail; 1852faf0794SThomas Lively WasmInst += PrefixedOpc; 18616c16827SSam Clegg } 18716c16827SSam Clegg if (WasmInst->ET == ET_Unused) 18816c16827SSam Clegg return MCDisassembler::Fail; 18916c16827SSam Clegg // At this point we must have a valid instruction to decode. 19016c16827SSam Clegg assert(WasmInst->ET == ET_Instruction); 19116c16827SSam Clegg MI.setOpcode(WasmInst->Opcode); 19216c16827SSam Clegg // Parse any operands. 19316c16827SSam Clegg for (uint8_t OPI = 0; OPI < WasmInst->NumOperands; OPI++) { 194820c6263SWouter van Oortmerssen auto OT = OperandTable[WasmInst->OperandStart + OPI]; 195820c6263SWouter van Oortmerssen switch (OT) { 19616c16827SSam Clegg // ULEB operands: 19716c16827SSam Clegg case WebAssembly::OPERAND_BASIC_BLOCK: 19816c16827SSam Clegg case WebAssembly::OPERAND_LOCAL: 19916c16827SSam Clegg case WebAssembly::OPERAND_GLOBAL: 20016c16827SSam Clegg case WebAssembly::OPERAND_FUNCTION32: 20116c16827SSam Clegg case WebAssembly::OPERAND_OFFSET32: 20216c16827SSam Clegg case WebAssembly::OPERAND_P2ALIGN: 20316c16827SSam Clegg case WebAssembly::OPERAND_TYPEINDEX: 20416c16827SSam Clegg case MCOI::OPERAND_IMMEDIATE: { 20516c16827SSam Clegg if (!parseLEBImmediate(MI, Size, Bytes, false)) 20616c16827SSam Clegg return MCDisassembler::Fail; 20716c16827SSam Clegg break; 20816c16827SSam Clegg } 20916c16827SSam Clegg // SLEB operands: 21016c16827SSam Clegg case WebAssembly::OPERAND_I32IMM: 211ad72f685SWouter van Oortmerssen case WebAssembly::OPERAND_I64IMM: { 21216c16827SSam Clegg if (!parseLEBImmediate(MI, Size, Bytes, true)) 21316c16827SSam Clegg return MCDisassembler::Fail; 21416c16827SSam Clegg break; 21516c16827SSam Clegg } 216ad72f685SWouter van Oortmerssen // block_type operands (uint8_t). 217ad72f685SWouter van Oortmerssen case WebAssembly::OPERAND_SIGNATURE: { 218ad72f685SWouter van Oortmerssen if (!parseImmediate<uint8_t>(MI, Size, Bytes)) 219ad72f685SWouter van Oortmerssen return MCDisassembler::Fail; 220ad72f685SWouter van Oortmerssen break; 221ad72f685SWouter van Oortmerssen } 22216c16827SSam Clegg // FP operands. 22316c16827SSam Clegg case WebAssembly::OPERAND_F32IMM: { 22422442924SThomas Lively if (!parseImmediate<float>(MI, Size, Bytes)) 22516c16827SSam Clegg return MCDisassembler::Fail; 22616c16827SSam Clegg break; 22716c16827SSam Clegg } 22816c16827SSam Clegg case WebAssembly::OPERAND_F64IMM: { 22922442924SThomas Lively if (!parseImmediate<double>(MI, Size, Bytes)) 23022442924SThomas Lively return MCDisassembler::Fail; 23122442924SThomas Lively break; 23222442924SThomas Lively } 23322442924SThomas Lively // Vector lane operands (not LEB encoded). 23422442924SThomas Lively case WebAssembly::OPERAND_VEC_I8IMM: { 23522442924SThomas Lively if (!parseImmediate<uint8_t>(MI, Size, Bytes)) 23622442924SThomas Lively return MCDisassembler::Fail; 23722442924SThomas Lively break; 23822442924SThomas Lively } 23922442924SThomas Lively case WebAssembly::OPERAND_VEC_I16IMM: { 24022442924SThomas Lively if (!parseImmediate<uint16_t>(MI, Size, Bytes)) 24122442924SThomas Lively return MCDisassembler::Fail; 24222442924SThomas Lively break; 24322442924SThomas Lively } 24422442924SThomas Lively case WebAssembly::OPERAND_VEC_I32IMM: { 24522442924SThomas Lively if (!parseImmediate<uint32_t>(MI, Size, Bytes)) 24622442924SThomas Lively return MCDisassembler::Fail; 24722442924SThomas Lively break; 24822442924SThomas Lively } 24922442924SThomas Lively case WebAssembly::OPERAND_VEC_I64IMM: { 25022442924SThomas Lively if (!parseImmediate<uint64_t>(MI, Size, Bytes)) 25116c16827SSam Clegg return MCDisassembler::Fail; 25216c16827SSam Clegg break; 25316c16827SSam Clegg } 254820c6263SWouter van Oortmerssen case WebAssembly::OPERAND_BRLIST: { 255820c6263SWouter van Oortmerssen int64_t TargetTableLen; 256820c6263SWouter van Oortmerssen if (!nextLEB(TargetTableLen, Bytes, Size, false)) 257820c6263SWouter van Oortmerssen return MCDisassembler::Fail; 258820c6263SWouter van Oortmerssen for (int64_t I = 0; I < TargetTableLen; I++) { 259820c6263SWouter van Oortmerssen if (!parseLEBImmediate(MI, Size, Bytes, false)) 260820c6263SWouter van Oortmerssen return MCDisassembler::Fail; 261820c6263SWouter van Oortmerssen } 262820c6263SWouter van Oortmerssen // Default case. 263820c6263SWouter van Oortmerssen if (!parseLEBImmediate(MI, Size, Bytes, false)) 264820c6263SWouter van Oortmerssen return MCDisassembler::Fail; 265820c6263SWouter van Oortmerssen break; 266820c6263SWouter van Oortmerssen } 267a733d08dSWouter van Oortmerssen case MCOI::OPERAND_REGISTER: 268a733d08dSWouter van Oortmerssen // The tablegen header currently does not have any register operands since 269a733d08dSWouter van Oortmerssen // we use only the stack (_S) instructions. 270a733d08dSWouter van Oortmerssen // If you hit this that probably means a bad instruction definition in 271a733d08dSWouter van Oortmerssen // tablegen. 272a733d08dSWouter van Oortmerssen llvm_unreachable("Register operand in WebAssemblyDisassembler"); 27316c16827SSam Clegg default: 27416c16827SSam Clegg llvm_unreachable("Unknown operand type in WebAssemblyDisassembler"); 27516c16827SSam Clegg } 27616c16827SSam Clegg } 27716c16827SSam Clegg return MCDisassembler::Success; 2781a427287SDan Gohman } 279