11a427287SDan Gohman //==- WebAssemblyDisassembler.cpp - Disassembler for WebAssembly -*- C++ -*-==//
21a427287SDan Gohman //
32946cd70SChandler Carruth // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
42946cd70SChandler Carruth // See https://llvm.org/LICENSE.txt for license information.
52946cd70SChandler Carruth // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
61a427287SDan Gohman //
71a427287SDan Gohman //===----------------------------------------------------------------------===//
81a427287SDan Gohman ///
91a427287SDan Gohman /// \file
105f8f34e4SAdrian Prantl /// This file is part of the WebAssembly Disassembler.
111a427287SDan Gohman ///
121a427287SDan Gohman /// It contains code to translate the data produced by the decoder into
131a427287SDan Gohman /// MCInsts.
141a427287SDan Gohman ///
151a427287SDan Gohman //===----------------------------------------------------------------------===//
161a427287SDan Gohman 
17a263aa25SDavid L. Jones #include "MCTargetDesc/WebAssemblyInstPrinter.h"
181a427287SDan Gohman #include "MCTargetDesc/WebAssemblyMCTargetDesc.h"
19c6c42137SRichard Trieu #include "TargetInfo/WebAssemblyTargetInfo.h"
201a427287SDan Gohman #include "llvm/MC/MCContext.h"
21c50b8907SBenjamin Kramer #include "llvm/MC/MCDisassembler/MCDisassembler.h"
2216c16827SSam Clegg #include "llvm/MC/MCFixedLenDisassembler.h"
231a427287SDan Gohman #include "llvm/MC/MCInst.h"
241a427287SDan Gohman #include "llvm/MC/MCInstrInfo.h"
251a427287SDan Gohman #include "llvm/MC/MCSubtargetInfo.h"
261a427287SDan Gohman #include "llvm/MC/MCSymbol.h"
272cb27072SThomas Lively #include "llvm/MC/MCSymbolWasm.h"
281a427287SDan Gohman #include "llvm/Support/Endian.h"
2916c16827SSam Clegg #include "llvm/Support/LEB128.h"
301a427287SDan Gohman #include "llvm/Support/TargetRegistry.h"
3116c16827SSam Clegg 
321a427287SDan Gohman using namespace llvm;
331a427287SDan Gohman 
341a427287SDan Gohman #define DEBUG_TYPE "wasm-disassembler"
351a427287SDan Gohman 
3616c16827SSam Clegg using DecodeStatus = MCDisassembler::DecodeStatus;
3716c16827SSam Clegg 
3816c16827SSam Clegg #include "WebAssemblyGenDisassemblerTables.inc"
3916c16827SSam Clegg 
401a427287SDan Gohman namespace {
4149550663SFangrui Song static constexpr int WebAssemblyInstructionTableSize = 256;
4249550663SFangrui Song 
431a427287SDan Gohman class WebAssemblyDisassembler final : public MCDisassembler {
441a427287SDan Gohman   std::unique_ptr<const MCInstrInfo> MCII;
451a427287SDan Gohman 
461a427287SDan Gohman   DecodeStatus getInstruction(MCInst &Instr, uint64_t &Size,
471a427287SDan Gohman                               ArrayRef<uint8_t> Bytes, uint64_t Address,
481a427287SDan Gohman                               raw_ostream &CStream) const override;
495bd33de9SRonak Chauhan   Optional<DecodeStatus> onSymbolStart(SymbolInfoTy &Symbol, uint64_t &Size,
50480a16d5SRonak Chauhan                                        ArrayRef<uint8_t> Bytes,
51480a16d5SRonak Chauhan                                        uint64_t Address,
52f3b762a0SWouter van Oortmerssen                                        raw_ostream &CStream) const override;
531a427287SDan Gohman 
541a427287SDan Gohman public:
551a427287SDan Gohman   WebAssemblyDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx,
561a427287SDan Gohman                           std::unique_ptr<const MCInstrInfo> MCII)
571a427287SDan Gohman       : MCDisassembler(STI, Ctx), MCII(std::move(MCII)) {}
581a427287SDan Gohman };
591a427287SDan Gohman } // end anonymous namespace
601a427287SDan Gohman 
611a427287SDan Gohman static MCDisassembler *createWebAssemblyDisassembler(const Target &T,
621a427287SDan Gohman                                                      const MCSubtargetInfo &STI,
631a427287SDan Gohman                                                      MCContext &Ctx) {
641a427287SDan Gohman   std::unique_ptr<const MCInstrInfo> MCII(T.createMCInstrInfo());
651a427287SDan Gohman   return new WebAssemblyDisassembler(STI, Ctx, std::move(MCII));
661a427287SDan Gohman }
671a427287SDan Gohman 
680dbcb363STom Stellard extern "C" LLVM_EXTERNAL_VISIBILITY void
690dbcb363STom Stellard LLVMInitializeWebAssemblyDisassembler() {
701a427287SDan Gohman   // Register the disassembler for each target.
71f42454b9SMehdi Amini   TargetRegistry::RegisterMCDisassembler(getTheWebAssemblyTarget32(),
721a427287SDan Gohman                                          createWebAssemblyDisassembler);
73f42454b9SMehdi Amini   TargetRegistry::RegisterMCDisassembler(getTheWebAssemblyTarget64(),
741a427287SDan Gohman                                          createWebAssemblyDisassembler);
751a427287SDan Gohman }
761a427287SDan Gohman 
77fc3163b6SThomas Lively static int nextByte(ArrayRef<uint8_t> Bytes, uint64_t &Size) {
7816c16827SSam Clegg   if (Size >= Bytes.size())
7916c16827SSam Clegg     return -1;
8016c16827SSam Clegg   auto V = Bytes[Size];
8116c16827SSam Clegg   Size++;
8216c16827SSam Clegg   return V;
8316c16827SSam Clegg }
8416c16827SSam Clegg 
852faf0794SThomas Lively static bool nextLEB(int64_t &Val, ArrayRef<uint8_t> Bytes, uint64_t &Size,
86f3b762a0SWouter van Oortmerssen                     bool Signed) {
8716c16827SSam Clegg   unsigned N = 0;
8816c16827SSam Clegg   const char *Error = nullptr;
892faf0794SThomas Lively   Val = Signed ? decodeSLEB128(Bytes.data() + Size, &N,
9016c16827SSam Clegg                                Bytes.data() + Bytes.size(), &Error)
912faf0794SThomas Lively                : static_cast<int64_t>(decodeULEB128(Bytes.data() + Size, &N,
922faf0794SThomas Lively                                                     Bytes.data() + Bytes.size(),
932faf0794SThomas Lively                                                     &Error));
9416c16827SSam Clegg   if (Error)
9516c16827SSam Clegg     return false;
9616c16827SSam Clegg   Size += N;
972faf0794SThomas Lively   return true;
982faf0794SThomas Lively }
992faf0794SThomas Lively 
1002faf0794SThomas Lively static bool parseLEBImmediate(MCInst &MI, uint64_t &Size,
1012faf0794SThomas Lively                               ArrayRef<uint8_t> Bytes, bool Signed) {
1022faf0794SThomas Lively   int64_t Val;
1032faf0794SThomas Lively   if (!nextLEB(Val, Bytes, Size, Signed))
1042faf0794SThomas Lively     return false;
10516c16827SSam Clegg   MI.addOperand(MCOperand::createImm(Val));
10616c16827SSam Clegg   return true;
10716c16827SSam Clegg }
10816c16827SSam Clegg 
10916c16827SSam Clegg template <typename T>
11022442924SThomas Lively bool parseImmediate(MCInst &MI, uint64_t &Size, ArrayRef<uint8_t> Bytes) {
11116c16827SSam Clegg   if (Size + sizeof(T) > Bytes.size())
11216c16827SSam Clegg     return false;
113df6770f0SHeejin Ahn   T Val = support::endian::read<T, support::endianness::little, 1>(
1140d9f3f7fSWouter van Oortmerssen       Bytes.data() + Size);
11516c16827SSam Clegg   Size += sizeof(T);
11622442924SThomas Lively   if (std::is_floating_point<T>::value) {
117*e5269da9SFangrui Song     MI.addOperand(
118*e5269da9SFangrui Song         MCOperand::createDFPImm(bit_cast<uint64_t>(static_cast<double>(Val))));
11922442924SThomas Lively   } else {
12022442924SThomas Lively     MI.addOperand(MCOperand::createImm(static_cast<int64_t>(Val)));
12122442924SThomas Lively   }
12216c16827SSam Clegg   return true;
12316c16827SSam Clegg }
12416c16827SSam Clegg 
125480a16d5SRonak Chauhan Optional<MCDisassembler::DecodeStatus> WebAssemblyDisassembler::onSymbolStart(
1265bd33de9SRonak Chauhan     SymbolInfoTy &Symbol, uint64_t &Size, ArrayRef<uint8_t> Bytes,
1275bd33de9SRonak Chauhan     uint64_t Address, raw_ostream &CStream) const {
128f3b762a0SWouter van Oortmerssen   Size = 0;
129f3b762a0SWouter van Oortmerssen   if (Address == 0) {
130f3b762a0SWouter van Oortmerssen     // Start of a code section: we're parsing only the function count.
131f3b762a0SWouter van Oortmerssen     int64_t FunctionCount;
132f3b762a0SWouter van Oortmerssen     if (!nextLEB(FunctionCount, Bytes, Size, false))
133480a16d5SRonak Chauhan       return None;
134f3b762a0SWouter van Oortmerssen     outs() << "        # " << FunctionCount << " functions in section.";
135f3b762a0SWouter van Oortmerssen   } else {
136f3b762a0SWouter van Oortmerssen     // Parse the start of a single function.
137f3b762a0SWouter van Oortmerssen     int64_t BodySize, LocalEntryCount;
138f3b762a0SWouter van Oortmerssen     if (!nextLEB(BodySize, Bytes, Size, false) ||
139f3b762a0SWouter van Oortmerssen         !nextLEB(LocalEntryCount, Bytes, Size, false))
140480a16d5SRonak Chauhan       return None;
141f3b762a0SWouter van Oortmerssen     if (LocalEntryCount) {
142f3b762a0SWouter van Oortmerssen       outs() << "        .local ";
143f3b762a0SWouter van Oortmerssen       for (int64_t I = 0; I < LocalEntryCount; I++) {
144f3b762a0SWouter van Oortmerssen         int64_t Count, Type;
145f3b762a0SWouter van Oortmerssen         if (!nextLEB(Count, Bytes, Size, false) ||
146f3b762a0SWouter van Oortmerssen             !nextLEB(Type, Bytes, Size, false))
147480a16d5SRonak Chauhan           return None;
148f3b762a0SWouter van Oortmerssen         for (int64_t J = 0; J < Count; J++) {
149f3b762a0SWouter van Oortmerssen           if (I || J)
150f3b762a0SWouter van Oortmerssen             outs() << ", ";
151f3b762a0SWouter van Oortmerssen           outs() << WebAssembly::anyTypeToString(Type);
152f3b762a0SWouter van Oortmerssen         }
153f3b762a0SWouter van Oortmerssen       }
154f3b762a0SWouter van Oortmerssen     }
155f3b762a0SWouter van Oortmerssen   }
156f3b762a0SWouter van Oortmerssen   outs() << "\n";
157f3b762a0SWouter van Oortmerssen   return MCDisassembler::Success;
158f3b762a0SWouter van Oortmerssen }
159f3b762a0SWouter van Oortmerssen 
1601a427287SDan Gohman MCDisassembler::DecodeStatus WebAssemblyDisassembler::getInstruction(
1611a427287SDan Gohman     MCInst &MI, uint64_t &Size, ArrayRef<uint8_t> Bytes, uint64_t /*Address*/,
1626fdd6a7bSFangrui Song     raw_ostream &CS) const {
16316c16827SSam Clegg   CommentStream = &CS;
16416c16827SSam Clegg   Size = 0;
165fc3163b6SThomas Lively   int Opc = nextByte(Bytes, Size);
16616c16827SSam Clegg   if (Opc < 0)
1671a427287SDan Gohman     return MCDisassembler::Fail;
16816c16827SSam Clegg   const auto *WasmInst = &InstructionTable0[Opc];
16916c16827SSam Clegg   // If this is a prefix byte, indirect to another table.
17016c16827SSam Clegg   if (WasmInst->ET == ET_Prefix) {
17116c16827SSam Clegg     WasmInst = nullptr;
17216c16827SSam Clegg     // Linear search, so far only 2 entries.
17316c16827SSam Clegg     for (auto PT = PrefixTable; PT->Table; PT++) {
17416c16827SSam Clegg       if (PT->Prefix == Opc) {
17516c16827SSam Clegg         WasmInst = PT->Table;
17616c16827SSam Clegg         break;
17716c16827SSam Clegg       }
17816c16827SSam Clegg     }
17916c16827SSam Clegg     if (!WasmInst)
18016c16827SSam Clegg       return MCDisassembler::Fail;
1812faf0794SThomas Lively     int64_t PrefixedOpc;
182f3b762a0SWouter van Oortmerssen     if (!nextLEB(PrefixedOpc, Bytes, Size, false))
18316c16827SSam Clegg       return MCDisassembler::Fail;
1842faf0794SThomas Lively     if (PrefixedOpc < 0 || PrefixedOpc >= WebAssemblyInstructionTableSize)
1852faf0794SThomas Lively       return MCDisassembler::Fail;
1862faf0794SThomas Lively     WasmInst += PrefixedOpc;
18716c16827SSam Clegg   }
18816c16827SSam Clegg   if (WasmInst->ET == ET_Unused)
18916c16827SSam Clegg     return MCDisassembler::Fail;
19016c16827SSam Clegg   // At this point we must have a valid instruction to decode.
19116c16827SSam Clegg   assert(WasmInst->ET == ET_Instruction);
19216c16827SSam Clegg   MI.setOpcode(WasmInst->Opcode);
19316c16827SSam Clegg   // Parse any operands.
19416c16827SSam Clegg   for (uint8_t OPI = 0; OPI < WasmInst->NumOperands; OPI++) {
195820c6263SWouter van Oortmerssen     auto OT = OperandTable[WasmInst->OperandStart + OPI];
196820c6263SWouter van Oortmerssen     switch (OT) {
19716c16827SSam Clegg     // ULEB operands:
19816c16827SSam Clegg     case WebAssembly::OPERAND_BASIC_BLOCK:
19916c16827SSam Clegg     case WebAssembly::OPERAND_LOCAL:
20016c16827SSam Clegg     case WebAssembly::OPERAND_GLOBAL:
20116c16827SSam Clegg     case WebAssembly::OPERAND_FUNCTION32:
20269e2797eSPaulo Matos     case WebAssembly::OPERAND_TABLE:
20316c16827SSam Clegg     case WebAssembly::OPERAND_OFFSET32:
204d9e0bbd1SWouter van Oortmerssen     case WebAssembly::OPERAND_OFFSET64:
20516c16827SSam Clegg     case WebAssembly::OPERAND_P2ALIGN:
20616c16827SSam Clegg     case WebAssembly::OPERAND_TYPEINDEX:
2078a28ce1aSWouter van Oortmerssen     case WebAssembly::OPERAND_EVENT:
20816c16827SSam Clegg     case MCOI::OPERAND_IMMEDIATE: {
20916c16827SSam Clegg       if (!parseLEBImmediate(MI, Size, Bytes, false))
21016c16827SSam Clegg         return MCDisassembler::Fail;
21116c16827SSam Clegg       break;
21216c16827SSam Clegg     }
21316c16827SSam Clegg     // SLEB operands:
21416c16827SSam Clegg     case WebAssembly::OPERAND_I32IMM:
215ad72f685SWouter van Oortmerssen     case WebAssembly::OPERAND_I64IMM: {
21616c16827SSam Clegg       if (!parseLEBImmediate(MI, Size, Bytes, true))
21716c16827SSam Clegg         return MCDisassembler::Fail;
21816c16827SSam Clegg       break;
21916c16827SSam Clegg     }
2202cb27072SThomas Lively     // block_type operands:
221ad72f685SWouter van Oortmerssen     case WebAssembly::OPERAND_SIGNATURE: {
2222cb27072SThomas Lively       int64_t Val;
2232cb27072SThomas Lively       uint64_t PrevSize = Size;
2242cb27072SThomas Lively       if (!nextLEB(Val, Bytes, Size, true))
225ad72f685SWouter van Oortmerssen         return MCDisassembler::Fail;
2262cb27072SThomas Lively       if (Val < 0) {
2272cb27072SThomas Lively         // Negative values are single septet value types or empty types
2282cb27072SThomas Lively         if (Size != PrevSize + 1) {
2292cb27072SThomas Lively           MI.addOperand(
2302cb27072SThomas Lively               MCOperand::createImm(int64_t(WebAssembly::BlockType::Invalid)));
2312cb27072SThomas Lively         } else {
2322cb27072SThomas Lively           MI.addOperand(MCOperand::createImm(Val & 0x7f));
2332cb27072SThomas Lively         }
2342cb27072SThomas Lively       } else {
2352cb27072SThomas Lively         // We don't have access to the signature, so create a symbol without one
2362cb27072SThomas Lively         MCSymbol *Sym = getContext().createTempSymbol("typeindex", true);
2372cb27072SThomas Lively         auto *WasmSym = cast<MCSymbolWasm>(Sym);
2382cb27072SThomas Lively         WasmSym->setType(wasm::WASM_SYMBOL_TYPE_FUNCTION);
2392cb27072SThomas Lively         const MCExpr *Expr = MCSymbolRefExpr::create(
2402cb27072SThomas Lively             WasmSym, MCSymbolRefExpr::VK_WASM_TYPEINDEX, getContext());
2412cb27072SThomas Lively         MI.addOperand(MCOperand::createExpr(Expr));
2422cb27072SThomas Lively       }
243ad72f685SWouter van Oortmerssen       break;
244ad72f685SWouter van Oortmerssen     }
245107c3a12SAndy Wingo     // heap_type operands, for e.g. ref.null:
246107c3a12SAndy Wingo     case WebAssembly::OPERAND_HEAPTYPE: {
247107c3a12SAndy Wingo       int64_t Val;
248107c3a12SAndy Wingo       uint64_t PrevSize = Size;
249107c3a12SAndy Wingo       if (!nextLEB(Val, Bytes, Size, true))
250107c3a12SAndy Wingo         return MCDisassembler::Fail;
251107c3a12SAndy Wingo       if (Val < 0 && Size == PrevSize + 1) {
252107c3a12SAndy Wingo         // The HeapType encoding is like BlockType, in that encodings that
253107c3a12SAndy Wingo         // decode as negative values indicate ValTypes.  In practice we expect
254107c3a12SAndy Wingo         // either wasm::ValType::EXTERNREF or wasm::ValType::FUNCREF here.
255107c3a12SAndy Wingo         //
256107c3a12SAndy Wingo         // The positive SLEB values are reserved for future expansion and are
257107c3a12SAndy Wingo         // expected to be type indices in the typed function references
258107c3a12SAndy Wingo         // proposal, and should disassemble as MCSymbolRefExpr as in BlockType
259107c3a12SAndy Wingo         // above.
260107c3a12SAndy Wingo         MI.addOperand(MCOperand::createImm(Val & 0x7f));
261107c3a12SAndy Wingo       } else {
262107c3a12SAndy Wingo         MI.addOperand(
263107c3a12SAndy Wingo             MCOperand::createImm(int64_t(WebAssembly::HeapType::Invalid)));
264107c3a12SAndy Wingo       }
265107c3a12SAndy Wingo       break;
266107c3a12SAndy Wingo     }
26716c16827SSam Clegg     // FP operands.
26816c16827SSam Clegg     case WebAssembly::OPERAND_F32IMM: {
26922442924SThomas Lively       if (!parseImmediate<float>(MI, Size, Bytes))
27016c16827SSam Clegg         return MCDisassembler::Fail;
27116c16827SSam Clegg       break;
27216c16827SSam Clegg     }
27316c16827SSam Clegg     case WebAssembly::OPERAND_F64IMM: {
27422442924SThomas Lively       if (!parseImmediate<double>(MI, Size, Bytes))
27522442924SThomas Lively         return MCDisassembler::Fail;
27622442924SThomas Lively       break;
27722442924SThomas Lively     }
27822442924SThomas Lively     // Vector lane operands (not LEB encoded).
27922442924SThomas Lively     case WebAssembly::OPERAND_VEC_I8IMM: {
28022442924SThomas Lively       if (!parseImmediate<uint8_t>(MI, Size, Bytes))
28122442924SThomas Lively         return MCDisassembler::Fail;
28222442924SThomas Lively       break;
28322442924SThomas Lively     }
28422442924SThomas Lively     case WebAssembly::OPERAND_VEC_I16IMM: {
28522442924SThomas Lively       if (!parseImmediate<uint16_t>(MI, Size, Bytes))
28622442924SThomas Lively         return MCDisassembler::Fail;
28722442924SThomas Lively       break;
28822442924SThomas Lively     }
28922442924SThomas Lively     case WebAssembly::OPERAND_VEC_I32IMM: {
29022442924SThomas Lively       if (!parseImmediate<uint32_t>(MI, Size, Bytes))
29122442924SThomas Lively         return MCDisassembler::Fail;
29222442924SThomas Lively       break;
29322442924SThomas Lively     }
29422442924SThomas Lively     case WebAssembly::OPERAND_VEC_I64IMM: {
29522442924SThomas Lively       if (!parseImmediate<uint64_t>(MI, Size, Bytes))
29616c16827SSam Clegg         return MCDisassembler::Fail;
29716c16827SSam Clegg       break;
29816c16827SSam Clegg     }
299820c6263SWouter van Oortmerssen     case WebAssembly::OPERAND_BRLIST: {
300820c6263SWouter van Oortmerssen       int64_t TargetTableLen;
301820c6263SWouter van Oortmerssen       if (!nextLEB(TargetTableLen, Bytes, Size, false))
302820c6263SWouter van Oortmerssen         return MCDisassembler::Fail;
303820c6263SWouter van Oortmerssen       for (int64_t I = 0; I < TargetTableLen; I++) {
304820c6263SWouter van Oortmerssen         if (!parseLEBImmediate(MI, Size, Bytes, false))
305820c6263SWouter van Oortmerssen           return MCDisassembler::Fail;
306820c6263SWouter van Oortmerssen       }
307820c6263SWouter van Oortmerssen       // Default case.
308820c6263SWouter van Oortmerssen       if (!parseLEBImmediate(MI, Size, Bytes, false))
309820c6263SWouter van Oortmerssen         return MCDisassembler::Fail;
310820c6263SWouter van Oortmerssen       break;
311820c6263SWouter van Oortmerssen     }
312a733d08dSWouter van Oortmerssen     case MCOI::OPERAND_REGISTER:
313a733d08dSWouter van Oortmerssen       // The tablegen header currently does not have any register operands since
314a733d08dSWouter van Oortmerssen       // we use only the stack (_S) instructions.
315a733d08dSWouter van Oortmerssen       // If you hit this that probably means a bad instruction definition in
316a733d08dSWouter van Oortmerssen       // tablegen.
317a733d08dSWouter van Oortmerssen       llvm_unreachable("Register operand in WebAssemblyDisassembler");
31816c16827SSam Clegg     default:
31916c16827SSam Clegg       llvm_unreachable("Unknown operand type in WebAssemblyDisassembler");
32016c16827SSam Clegg     }
32116c16827SSam Clegg   }
32216c16827SSam Clegg   return MCDisassembler::Success;
3231a427287SDan Gohman }
324