1 //==- WebAssemblyAsmParser.cpp - Assembler for WebAssembly -*- C++ -*-==//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 ///
10 /// \file
11 /// This file is part of the WebAssembly Assembler.
12 ///
13 /// It contains code to translate a parsed .s file into MCInsts.
14 ///
15 //===----------------------------------------------------------------------===//
16 
17 #include "MCTargetDesc/WebAssemblyMCTargetDesc.h"
18 #include "MCTargetDesc/WebAssemblyTargetStreamer.h"
19 #include "WebAssembly.h"
20 #include "llvm/MC/MCContext.h"
21 #include "llvm/MC/MCInst.h"
22 #include "llvm/MC/MCInstrInfo.h"
23 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
24 #include "llvm/MC/MCParser/MCTargetAsmParser.h"
25 #include "llvm/MC/MCStreamer.h"
26 #include "llvm/MC/MCSubtargetInfo.h"
27 #include "llvm/MC/MCSymbol.h"
28 #include "llvm/MC/MCSymbolWasm.h"
29 #include "llvm/Support/Endian.h"
30 #include "llvm/Support/TargetRegistry.h"
31 
32 using namespace llvm;
33 
34 #define DEBUG_TYPE "wasm-asm-parser"
35 
36 namespace {
37 
38 /// WebAssemblyOperand - Instances of this class represent the operands in a
39 /// parsed WASM machine instruction.
40 struct WebAssemblyOperand : public MCParsedAsmOperand {
41   enum KindTy { Token, Integer, Float, Symbol, BrList } Kind;
42 
43   SMLoc StartLoc, EndLoc;
44 
45   struct TokOp {
46     StringRef Tok;
47   };
48 
49   struct IntOp {
50     int64_t Val;
51   };
52 
53   struct FltOp {
54     double Val;
55   };
56 
57   struct SymOp {
58     const MCExpr *Exp;
59   };
60 
61   struct BrLOp {
62     std::vector<unsigned> List;
63   };
64 
65   union {
66     struct TokOp Tok;
67     struct IntOp Int;
68     struct FltOp Flt;
69     struct SymOp Sym;
70     struct BrLOp BrL;
71   };
72 
73   WebAssemblyOperand(KindTy K, SMLoc Start, SMLoc End, TokOp T)
74       : Kind(K), StartLoc(Start), EndLoc(End), Tok(T) {}
75   WebAssemblyOperand(KindTy K, SMLoc Start, SMLoc End, IntOp I)
76       : Kind(K), StartLoc(Start), EndLoc(End), Int(I) {}
77   WebAssemblyOperand(KindTy K, SMLoc Start, SMLoc End, FltOp F)
78       : Kind(K), StartLoc(Start), EndLoc(End), Flt(F) {}
79   WebAssemblyOperand(KindTy K, SMLoc Start, SMLoc End, SymOp S)
80       : Kind(K), StartLoc(Start), EndLoc(End), Sym(S) {}
81   WebAssemblyOperand(KindTy K, SMLoc Start, SMLoc End)
82       : Kind(K), StartLoc(Start), EndLoc(End), BrL() {}
83 
84   ~WebAssemblyOperand() {
85     if (isBrList())
86       BrL.~BrLOp();
87   }
88 
89   bool isToken() const override { return Kind == Token; }
90   bool isImm() const override {
91     return Kind == Integer || Kind == Float || Kind == Symbol;
92   }
93   bool isMem() const override { return false; }
94   bool isReg() const override { return false; }
95   bool isBrList() const { return Kind == BrList; }
96 
97   unsigned getReg() const override {
98     llvm_unreachable("Assembly inspects a register operand");
99     return 0;
100   }
101 
102   StringRef getToken() const {
103     assert(isToken());
104     return Tok.Tok;
105   }
106 
107   SMLoc getStartLoc() const override { return StartLoc; }
108   SMLoc getEndLoc() const override { return EndLoc; }
109 
110   void addRegOperands(MCInst &, unsigned) const {
111     // Required by the assembly matcher.
112     llvm_unreachable("Assembly matcher creates register operands");
113   }
114 
115   void addImmOperands(MCInst &Inst, unsigned N) const {
116     assert(N == 1 && "Invalid number of operands!");
117     if (Kind == Integer)
118       Inst.addOperand(MCOperand::createImm(Int.Val));
119     else if (Kind == Float)
120       Inst.addOperand(MCOperand::createFPImm(Flt.Val));
121     else if (Kind == Symbol)
122       Inst.addOperand(MCOperand::createExpr(Sym.Exp));
123     else
124       llvm_unreachable("Should be immediate or symbol!");
125   }
126 
127   void addBrListOperands(MCInst &Inst, unsigned N) const {
128     assert(N == 1 && isBrList() && "Invalid BrList!");
129     for (auto Br : BrL.List)
130       Inst.addOperand(MCOperand::createImm(Br));
131   }
132 
133   void print(raw_ostream &OS) const override {
134     switch (Kind) {
135     case Token:
136       OS << "Tok:" << Tok.Tok;
137       break;
138     case Integer:
139       OS << "Int:" << Int.Val;
140       break;
141     case Float:
142       OS << "Flt:" << Flt.Val;
143       break;
144     case Symbol:
145       OS << "Sym:" << Sym.Exp;
146       break;
147     case BrList:
148       OS << "BrList:" << BrL.List.size();
149       break;
150     }
151   }
152 };
153 
154 class WebAssemblyAsmParser final : public MCTargetAsmParser {
155   MCAsmParser &Parser;
156   MCAsmLexer &Lexer;
157 
158   // Much like WebAssemblyAsmPrinter in the backend, we have to own these.
159   std::vector<std::unique_ptr<wasm::WasmSignature>> Signatures;
160 
161   // Order of labels, directives and instructions in a .s file have no
162   // syntactical enforcement. This class is a callback from the actual parser,
163   // and yet we have to be feeding data to the streamer in a very particular
164   // order to ensure a correct binary encoding that matches the regular backend
165   // (the streamer does not enforce this). This "state machine" enum helps
166   // guarantee that correct order.
167   enum ParserState {
168     FileStart,
169     Label,
170     FunctionStart,
171     FunctionLocals,
172     Instructions,
173   } CurrentState = FileStart;
174 
175   // We track this to see if a .functype following a label is the same,
176   // as this is how we recognize the start of a function.
177   MCSymbol *LastLabel = nullptr;
178 
179 public:
180   WebAssemblyAsmParser(const MCSubtargetInfo &STI, MCAsmParser &Parser,
181                        const MCInstrInfo &MII, const MCTargetOptions &Options)
182       : MCTargetAsmParser(Options, STI, MII), Parser(Parser),
183         Lexer(Parser.getLexer()) {
184     setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits()));
185   }
186 
187   void addSignature(std::unique_ptr<wasm::WasmSignature> &&Sig) {
188     Signatures.push_back(std::move(Sig));
189   }
190 
191 #define GET_ASSEMBLER_HEADER
192 #include "WebAssemblyGenAsmMatcher.inc"
193 
194   // TODO: This is required to be implemented, but appears unused.
195   bool ParseRegister(unsigned & /*RegNo*/, SMLoc & /*StartLoc*/,
196                      SMLoc & /*EndLoc*/) override {
197     llvm_unreachable("ParseRegister is not implemented.");
198   }
199 
200   bool error(const StringRef &Msg, const AsmToken &Tok) {
201     return Parser.Error(Tok.getLoc(), Msg + Tok.getString());
202   }
203 
204   bool isNext(AsmToken::TokenKind Kind) {
205     auto Ok = Lexer.is(Kind);
206     if (Ok)
207       Parser.Lex();
208     return Ok;
209   }
210 
211   bool expect(AsmToken::TokenKind Kind, const char *KindName) {
212     if (!isNext(Kind))
213       return error(std::string("Expected ") + KindName + ", instead got: ",
214                    Lexer.getTok());
215     return false;
216   }
217 
218   StringRef expectIdent() {
219     if (!Lexer.is(AsmToken::Identifier)) {
220       error("Expected identifier, got: ", Lexer.getTok());
221       return StringRef();
222     }
223     auto Name = Lexer.getTok().getString();
224     Parser.Lex();
225     return Name;
226   }
227 
228   Optional<wasm::ValType> parseType(const StringRef &Type) {
229     // FIXME: can't use StringSwitch because wasm::ValType doesn't have a
230     // "invalid" value.
231     if (Type == "i32")
232       return wasm::ValType::I32;
233     if (Type == "i64")
234       return wasm::ValType::I64;
235     if (Type == "f32")
236       return wasm::ValType::F32;
237     if (Type == "f64")
238       return wasm::ValType::F64;
239     if (Type == "v128" || Type == "i8x16" || Type == "i16x8" ||
240         Type == "i32x4" || Type == "i64x2" || Type == "f32x4" ||
241         Type == "f64x2")
242       return wasm::ValType::V128;
243     return Optional<wasm::ValType>();
244   }
245 
246   bool parseRegTypeList(SmallVectorImpl<wasm::ValType> &Types) {
247     while (Lexer.is(AsmToken::Identifier)) {
248       auto Type = parseType(Lexer.getTok().getString());
249       if (!Type)
250         return true;
251       Types.push_back(Type.getValue());
252       Parser.Lex();
253       if (!isNext(AsmToken::Comma))
254         break;
255     }
256     return false;
257   }
258 
259   void parseSingleInteger(bool IsNegative, OperandVector &Operands) {
260     auto &Int = Lexer.getTok();
261     int64_t Val = Int.getIntVal();
262     if (IsNegative)
263       Val = -Val;
264     Operands.push_back(make_unique<WebAssemblyOperand>(
265         WebAssemblyOperand::Integer, Int.getLoc(), Int.getEndLoc(),
266         WebAssemblyOperand::IntOp{Val}));
267     Parser.Lex();
268   }
269 
270   bool parseOperandStartingWithInteger(bool IsNegative, OperandVector &Operands,
271                                        StringRef InstName) {
272     parseSingleInteger(IsNegative, Operands);
273     // FIXME: there is probably a cleaner way to do this.
274     auto IsLoadStore = InstName.startswith("load") ||
275                        InstName.startswith("store") ||
276                        InstName.startswith("atomic_load") ||
277                        InstName.startswith("atomic_store");
278     if (IsLoadStore) {
279       // Parse load/store operands of the form: offset align
280       auto &Offset = Lexer.getTok();
281       if (Offset.is(AsmToken::Integer)) {
282         parseSingleInteger(false, Operands);
283       } else {
284         // Alignment not specified.
285         // FIXME: correctly derive a default from the instruction.
286         // We can't just call WebAssembly::GetDefaultP2Align since we don't have
287         // an opcode until after the assembly matcher.
288         Operands.push_back(make_unique<WebAssemblyOperand>(
289             WebAssemblyOperand::Integer, Offset.getLoc(), Offset.getEndLoc(),
290             WebAssemblyOperand::IntOp{0}));
291       }
292     }
293     return false;
294   }
295 
296   bool ParseInstruction(ParseInstructionInfo & /*Info*/, StringRef Name,
297                         SMLoc NameLoc, OperandVector &Operands) override {
298     // Note: Name does NOT point into the sourcecode, but to a local, so
299     // use NameLoc instead.
300     Name = StringRef(NameLoc.getPointer(), Name.size());
301 
302     // WebAssembly has instructions with / in them, which AsmLexer parses
303     // as seperate tokens, so if we find such tokens immediately adjacent (no
304     // whitespace), expand the name to include them:
305     for (;;) {
306       auto &Sep = Lexer.getTok();
307       if (Sep.getLoc().getPointer() != Name.end() ||
308           Sep.getKind() != AsmToken::Slash)
309         break;
310       // Extend name with /
311       Name = StringRef(Name.begin(), Name.size() + Sep.getString().size());
312       Parser.Lex();
313       // We must now find another identifier, or error.
314       auto &Id = Lexer.getTok();
315       if (Id.getKind() != AsmToken::Identifier ||
316           Id.getLoc().getPointer() != Name.end())
317         return error("Incomplete instruction name: ", Id);
318       Name = StringRef(Name.begin(), Name.size() + Id.getString().size());
319       Parser.Lex();
320     }
321 
322     // Now construct the name as first operand.
323     Operands.push_back(make_unique<WebAssemblyOperand>(
324         WebAssemblyOperand::Token, NameLoc, SMLoc::getFromPointer(Name.end()),
325         WebAssemblyOperand::TokOp{Name}));
326     auto NamePair = Name.split('.');
327     // If no '.', there is no type prefix.
328     auto BaseName = NamePair.second.empty() ? NamePair.first : NamePair.second;
329 
330     while (Lexer.isNot(AsmToken::EndOfStatement)) {
331       auto &Tok = Lexer.getTok();
332       switch (Tok.getKind()) {
333       case AsmToken::Identifier: {
334         auto &Id = Lexer.getTok();
335         const MCExpr *Val;
336         SMLoc End;
337         if (Parser.parsePrimaryExpr(Val, End))
338           return error("Cannot parse symbol: ", Lexer.getTok());
339         Operands.push_back(make_unique<WebAssemblyOperand>(
340             WebAssemblyOperand::Symbol, Id.getLoc(), Id.getEndLoc(),
341             WebAssemblyOperand::SymOp{Val}));
342         break;
343       }
344       case AsmToken::Minus:
345         Parser.Lex();
346         if (Lexer.isNot(AsmToken::Integer))
347           return error("Expected integer instead got: ", Lexer.getTok());
348         if (parseOperandStartingWithInteger(true, Operands, BaseName))
349           return true;
350         break;
351       case AsmToken::Integer:
352         if (parseOperandStartingWithInteger(false, Operands, BaseName))
353           return true;
354         break;
355       case AsmToken::Real: {
356         double Val;
357         if (Tok.getString().getAsDouble(Val, false))
358           return error("Cannot parse real: ", Tok);
359         Operands.push_back(make_unique<WebAssemblyOperand>(
360             WebAssemblyOperand::Float, Tok.getLoc(), Tok.getEndLoc(),
361             WebAssemblyOperand::FltOp{Val}));
362         Parser.Lex();
363         break;
364       }
365       case AsmToken::LCurly: {
366         Parser.Lex();
367         auto Op = make_unique<WebAssemblyOperand>(
368             WebAssemblyOperand::BrList, Tok.getLoc(), Tok.getEndLoc());
369         if (!Lexer.is(AsmToken::RCurly))
370           for (;;) {
371             Op->BrL.List.push_back(Lexer.getTok().getIntVal());
372             expect(AsmToken::Integer, "integer");
373             if (!isNext(AsmToken::Comma))
374               break;
375           }
376         expect(AsmToken::RCurly, "}");
377         Operands.push_back(std::move(Op));
378         break;
379       }
380       default:
381         return error("Unexpected token in operand: ", Tok);
382       }
383       if (Lexer.isNot(AsmToken::EndOfStatement)) {
384         if (expect(AsmToken::Comma, ","))
385           return true;
386       }
387     }
388     Parser.Lex();
389 
390     // Block instructions require a signature index, but these are missing in
391     // assembly, so we add a dummy one explicitly (since we have no control
392     // over signature tables here, we assume these will be regenerated when
393     // the wasm module is generated).
394     if (BaseName == "block" || BaseName == "loop" || BaseName == "try") {
395       Operands.push_back(make_unique<WebAssemblyOperand>(
396           WebAssemblyOperand::Integer, NameLoc, NameLoc,
397           WebAssemblyOperand::IntOp{-1}));
398     }
399     return false;
400   }
401 
402   void onLabelParsed(MCSymbol *Symbol) override {
403     LastLabel = Symbol;
404     CurrentState = Label;
405   }
406 
407   bool parseSignature(wasm::WasmSignature *Signature) {
408     if (expect(AsmToken::LParen, "("))
409       return true;
410     if (parseRegTypeList(Signature->Params))
411       return true;
412     if (expect(AsmToken::RParen, ")"))
413       return true;
414     if (expect(AsmToken::MinusGreater, "->"))
415       return true;
416     if (expect(AsmToken::LParen, "("))
417       return true;
418     if (parseRegTypeList(Signature->Returns))
419       return true;
420     if (expect(AsmToken::RParen, ")"))
421       return true;
422     return false;
423   }
424 
425   // This function processes wasm-specific directives streamed to
426   // WebAssemblyTargetStreamer, all others go to the generic parser
427   // (see WasmAsmParser).
428   bool ParseDirective(AsmToken DirectiveID) override {
429     // This function has a really weird return value behavior that is different
430     // from all the other parsing functions:
431     // - return true && no tokens consumed -> don't know this directive / let
432     //   the generic parser handle it.
433     // - return true && tokens consumed -> a parsing error occurred.
434     // - return false -> processed this directive successfully.
435     assert(DirectiveID.getKind() == AsmToken::Identifier);
436     auto &Out = getStreamer();
437     auto &TOut =
438         reinterpret_cast<WebAssemblyTargetStreamer &>(*Out.getTargetStreamer());
439 
440     // TODO: any time we return an error, at least one token must have been
441     // consumed, otherwise this will not signal an error to the caller.
442     if (DirectiveID.getString() == ".globaltype") {
443       auto SymName = expectIdent();
444       if (SymName.empty())
445         return true;
446       if (expect(AsmToken::Comma, ","))
447         return true;
448       auto TypeTok = Lexer.getTok();
449       auto TypeName = expectIdent();
450       if (TypeName.empty())
451         return true;
452       auto Type = parseType(TypeName);
453       if (!Type)
454         return error("Unknown type in .globaltype directive: ", TypeTok);
455       // Now set this symbol with the correct type.
456       auto WasmSym = cast<MCSymbolWasm>(
457           TOut.getStreamer().getContext().getOrCreateSymbol(SymName));
458       WasmSym->setType(wasm::WASM_SYMBOL_TYPE_GLOBAL);
459       WasmSym->setGlobalType(
460           wasm::WasmGlobalType{uint8_t(Type.getValue()), true});
461       // And emit the directive again.
462       TOut.emitGlobalType(WasmSym);
463       return expect(AsmToken::EndOfStatement, "EOL");
464     }
465 
466     if (DirectiveID.getString() == ".functype") {
467       // This code has to send things to the streamer similar to
468       // WebAssemblyAsmPrinter::EmitFunctionBodyStart.
469       // TODO: would be good to factor this into a common function, but the
470       // assembler and backend really don't share any common code, and this code
471       // parses the locals seperately.
472       auto SymName = expectIdent();
473       if (SymName.empty())
474         return true;
475       auto WasmSym = cast<MCSymbolWasm>(
476           TOut.getStreamer().getContext().getOrCreateSymbol(SymName));
477       if (CurrentState == Label && WasmSym == LastLabel) {
478         // This .functype indicates a start of a function.
479         CurrentState = FunctionStart;
480       }
481       auto Signature = make_unique<wasm::WasmSignature>();
482       if (parseSignature(Signature.get()))
483         return true;
484       WasmSym->setSignature(Signature.get());
485       addSignature(std::move(Signature));
486       WasmSym->setType(wasm::WASM_SYMBOL_TYPE_FUNCTION);
487       TOut.emitFunctionType(WasmSym);
488       // TODO: backend also calls TOut.emitIndIdx, but that is not implemented.
489       return expect(AsmToken::EndOfStatement, "EOL");
490     }
491 
492     if (DirectiveID.getString() == ".eventtype") {
493       auto SymName = expectIdent();
494       if (SymName.empty())
495         return true;
496       auto WasmSym = cast<MCSymbolWasm>(
497           TOut.getStreamer().getContext().getOrCreateSymbol(SymName));
498       auto Signature = make_unique<wasm::WasmSignature>();
499       if (parseRegTypeList(Signature->Params))
500         return true;
501       WasmSym->setSignature(Signature.get());
502       addSignature(std::move(Signature));
503       WasmSym->setType(wasm::WASM_SYMBOL_TYPE_EVENT);
504       TOut.emitEventType(WasmSym);
505       // TODO: backend also calls TOut.emitIndIdx, but that is not implemented.
506       return expect(AsmToken::EndOfStatement, "EOL");
507     }
508 
509     if (DirectiveID.getString() == ".local") {
510       if (CurrentState != FunctionStart)
511         return error(".local directive should follow the start of a function",
512                      Lexer.getTok());
513       SmallVector<wasm::ValType, 4> Locals;
514       if (parseRegTypeList(Locals))
515         return true;
516       TOut.emitLocal(Locals);
517       CurrentState = FunctionLocals;
518       return expect(AsmToken::EndOfStatement, "EOL");
519     }
520 
521     return true; // We didn't process this directive.
522   }
523 
524   bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned & /*Opcode*/,
525                                OperandVector &Operands, MCStreamer &Out,
526                                uint64_t &ErrorInfo,
527                                bool MatchingInlineAsm) override {
528     MCInst Inst;
529     unsigned MatchResult =
530         MatchInstructionImpl(Operands, Inst, ErrorInfo, MatchingInlineAsm);
531     switch (MatchResult) {
532     case Match_Success: {
533       if (CurrentState == FunctionStart) {
534         // This is the first instruction in a function, but we haven't seen
535         // a .local directive yet. The streamer requires locals to be encoded
536         // as a prelude to the instructions, so emit an empty list of locals
537         // here.
538         auto &TOut = reinterpret_cast<WebAssemblyTargetStreamer &>(
539             *Out.getTargetStreamer());
540         TOut.emitLocal(SmallVector<wasm::ValType, 0>());
541       }
542       CurrentState = Instructions;
543       Out.EmitInstruction(Inst, getSTI());
544       return false;
545     }
546     case Match_MissingFeature:
547       return Parser.Error(
548           IDLoc, "instruction requires a WASM feature not currently enabled");
549     case Match_MnemonicFail:
550       return Parser.Error(IDLoc, "invalid instruction");
551     case Match_NearMisses:
552       return Parser.Error(IDLoc, "ambiguous instruction");
553     case Match_InvalidTiedOperand:
554     case Match_InvalidOperand: {
555       SMLoc ErrorLoc = IDLoc;
556       if (ErrorInfo != ~0ULL) {
557         if (ErrorInfo >= Operands.size())
558           return Parser.Error(IDLoc, "too few operands for instruction");
559         ErrorLoc = Operands[ErrorInfo]->getStartLoc();
560         if (ErrorLoc == SMLoc())
561           ErrorLoc = IDLoc;
562       }
563       return Parser.Error(ErrorLoc, "invalid operand for instruction");
564     }
565     }
566     llvm_unreachable("Implement any new match types added!");
567   }
568 };
569 } // end anonymous namespace
570 
571 // Force static initialization.
572 extern "C" void LLVMInitializeWebAssemblyAsmParser() {
573   RegisterMCAsmParser<WebAssemblyAsmParser> X(getTheWebAssemblyTarget32());
574   RegisterMCAsmParser<WebAssemblyAsmParser> Y(getTheWebAssemblyTarget64());
575 }
576 
577 #define GET_REGISTER_MATCHER
578 #define GET_MATCHER_IMPLEMENTATION
579 #include "WebAssemblyGenAsmMatcher.inc"
580