1 //==- WebAssemblyAsmParser.cpp - Assembler for WebAssembly -*- C++ -*-==//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 ///
10 /// \file
11 /// This file is part of the WebAssembly Assembler.
12 ///
13 /// It contains code to translate a parsed .s file into MCInsts.
14 ///
15 //===----------------------------------------------------------------------===//
16 
17 #include "MCTargetDesc/WebAssemblyMCTargetDesc.h"
18 #include "MCTargetDesc/WebAssemblyTargetStreamer.h"
19 #include "WebAssembly.h"
20 #include "llvm/MC/MCContext.h"
21 #include "llvm/MC/MCInst.h"
22 #include "llvm/MC/MCInstrInfo.h"
23 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
24 #include "llvm/MC/MCParser/MCTargetAsmParser.h"
25 #include "llvm/MC/MCStreamer.h"
26 #include "llvm/MC/MCSubtargetInfo.h"
27 #include "llvm/MC/MCSymbol.h"
28 #include "llvm/Support/Endian.h"
29 #include "llvm/Support/TargetRegistry.h"
30 
31 using namespace llvm;
32 
33 #define DEBUG_TYPE "wasm-asm-parser"
34 
35 namespace {
36 
37 /// WebAssemblyOperand - Instances of this class represent the operands in a
38 /// parsed WASM machine instruction.
39 struct WebAssemblyOperand : public MCParsedAsmOperand {
40   enum KindTy { Token, Integer, Float, Symbol } Kind;
41 
42   SMLoc StartLoc, EndLoc;
43 
44   struct TokOp {
45     StringRef Tok;
46   };
47 
48   struct IntOp {
49     int64_t Val;
50   };
51 
52   struct FltOp {
53     double Val;
54   };
55 
56   struct SymOp {
57     const MCExpr *Exp;
58   };
59 
60   union {
61     struct TokOp Tok;
62     struct IntOp Int;
63     struct FltOp Flt;
64     struct SymOp Sym;
65   };
66 
67   WebAssemblyOperand(KindTy K, SMLoc Start, SMLoc End, TokOp T)
68       : Kind(K), StartLoc(Start), EndLoc(End), Tok(T) {}
69   WebAssemblyOperand(KindTy K, SMLoc Start, SMLoc End, IntOp I)
70       : Kind(K), StartLoc(Start), EndLoc(End), Int(I) {}
71   WebAssemblyOperand(KindTy K, SMLoc Start, SMLoc End, FltOp F)
72       : Kind(K), StartLoc(Start), EndLoc(End), Flt(F) {}
73   WebAssemblyOperand(KindTy K, SMLoc Start, SMLoc End, SymOp S)
74       : Kind(K), StartLoc(Start), EndLoc(End), Sym(S) {}
75 
76   bool isToken() const override { return Kind == Token; }
77   bool isImm() const override {
78     return Kind == Integer || Kind == Float || Kind == Symbol;
79   }
80   bool isMem() const override { return false; }
81   bool isReg() const override { return false; }
82 
83   unsigned getReg() const override {
84     llvm_unreachable("Assembly inspects a register operand");
85     return 0;
86   }
87 
88   StringRef getToken() const {
89     assert(isToken());
90     return Tok.Tok;
91   }
92 
93   SMLoc getStartLoc() const override { return StartLoc; }
94   SMLoc getEndLoc() const override { return EndLoc; }
95 
96   void addRegOperands(MCInst &, unsigned) const {
97     // Required by the assembly matcher.
98     llvm_unreachable("Assembly matcher creates register operands");
99   }
100 
101   void addImmOperands(MCInst &Inst, unsigned N) const {
102     assert(N == 1 && "Invalid number of operands!");
103     if (Kind == Integer)
104       Inst.addOperand(MCOperand::createImm(Int.Val));
105     else if (Kind == Float)
106       Inst.addOperand(MCOperand::createFPImm(Flt.Val));
107     else if (Kind == Symbol)
108       Inst.addOperand(MCOperand::createExpr(Sym.Exp));
109     else
110       llvm_unreachable("Should be immediate or symbol!");
111   }
112 
113   void print(raw_ostream &OS) const override {
114     switch (Kind) {
115     case Token:
116       OS << "Tok:" << Tok.Tok;
117       break;
118     case Integer:
119       OS << "Int:" << Int.Val;
120       break;
121     case Float:
122       OS << "Flt:" << Flt.Val;
123       break;
124     case Symbol:
125       OS << "Sym:" << Sym.Exp;
126       break;
127     }
128   }
129 };
130 
131 class WebAssemblyAsmParser final : public MCTargetAsmParser {
132   MCAsmParser &Parser;
133   MCAsmLexer &Lexer;
134   MCSymbol *LastLabel;
135 
136 public:
137   WebAssemblyAsmParser(const MCSubtargetInfo &sti, MCAsmParser &Parser,
138                        const MCInstrInfo &mii, const MCTargetOptions &Options)
139       : MCTargetAsmParser(Options, sti, mii), Parser(Parser),
140         Lexer(Parser.getLexer()), LastLabel(nullptr) {
141     setAvailableFeatures(ComputeAvailableFeatures(sti.getFeatureBits()));
142   }
143 
144 #define GET_ASSEMBLER_HEADER
145 #include "WebAssemblyGenAsmMatcher.inc"
146 
147   // TODO: This is required to be implemented, but appears unused.
148   bool ParseRegister(unsigned & /*RegNo*/, SMLoc & /*StartLoc*/,
149                      SMLoc & /*EndLoc*/) override {
150     llvm_unreachable("ParseRegister is not implemented.");
151   }
152 
153   bool Error(const StringRef &msg, const AsmToken &tok) {
154     return Parser.Error(tok.getLoc(), msg + tok.getString());
155   }
156 
157   bool IsNext(AsmToken::TokenKind Kind) {
158     auto ok = Lexer.is(Kind);
159     if (ok)
160       Parser.Lex();
161     return ok;
162   }
163 
164   bool Expect(AsmToken::TokenKind Kind, const char *KindName) {
165     if (!IsNext(Kind))
166       return Error(std::string("Expected ") + KindName + ", instead got: ",
167                    Lexer.getTok());
168     return false;
169   }
170 
171   MVT::SimpleValueType ParseRegType(const StringRef &RegType) {
172     // Derive type from .param .local decls, or the instruction itself.
173     return StringSwitch<MVT::SimpleValueType>(RegType)
174         .Case("i32", MVT::i32)
175         .Case("i64", MVT::i64)
176         .Case("f32", MVT::f32)
177         .Case("f64", MVT::f64)
178         .Case("i8x16", MVT::v16i8)
179         .Case("i16x8", MVT::v8i16)
180         .Case("i32x4", MVT::v4i32)
181         .Case("i64x2", MVT::v2i64)
182         .Case("f32x4", MVT::v4f32)
183         .Case("f64x2", MVT::v2f64)
184         // arbitrarily chosen vector type to associate with "v128"
185         // FIXME: should these be EVTs to avoid this arbitrary hack? Do we want
186         // to accept more specific SIMD register types?
187         .Case("v128", MVT::v16i8)
188         .Default(MVT::INVALID_SIMPLE_VALUE_TYPE);
189   }
190 
191   void ParseSingleInteger(bool IsNegative, OperandVector &Operands) {
192     auto &Int = Lexer.getTok();
193     int64_t Val = Int.getIntVal();
194     if (IsNegative)
195       Val = -Val;
196     Operands.push_back(make_unique<WebAssemblyOperand>(
197         WebAssemblyOperand::Integer, Int.getLoc(), Int.getEndLoc(),
198         WebAssemblyOperand::IntOp{Val}));
199     Parser.Lex();
200   }
201 
202   bool ParseOperandStartingWithInteger(bool IsNegative, OperandVector &Operands,
203                                        StringRef InstName) {
204     ParseSingleInteger(IsNegative, Operands);
205     // FIXME: there is probably a cleaner way to do this.
206     auto IsLoadStore = InstName.startswith("load") ||
207                        InstName.startswith("store") ||
208                        InstName.startswith("atomic_load") ||
209                        InstName.startswith("atomic_store");
210     if (IsLoadStore) {
211       // Parse load/store operands of the form: offset align
212       auto &Offset = Lexer.getTok();
213       if (Offset.is(AsmToken::Integer)) {
214         ParseSingleInteger(false, Operands);
215       } else {
216         // Alignment not specified.
217         // FIXME: correctly derive a default from the instruction.
218         // We can't just call WebAssembly::GetDefaultP2Align since we don't have
219         // an opcode until after the assembly matcher.
220         Operands.push_back(make_unique<WebAssemblyOperand>(
221             WebAssemblyOperand::Integer, Offset.getLoc(), Offset.getEndLoc(),
222             WebAssemblyOperand::IntOp{0}));
223       }
224     }
225     return false;
226   }
227 
228   bool ParseInstruction(ParseInstructionInfo & /*Info*/, StringRef Name,
229                         SMLoc NameLoc, OperandVector &Operands) override {
230     // Note: Name does NOT point into the sourcecode, but to a local, so
231     // use NameLoc instead.
232     Name = StringRef(NameLoc.getPointer(), Name.size());
233     // WebAssembly has instructions with / in them, which AsmLexer parses
234     // as seperate tokens, so if we find such tokens immediately adjacent (no
235     // whitespace), expand the name to include them:
236     for (;;) {
237       auto &Sep = Lexer.getTok();
238       if (Sep.getLoc().getPointer() != Name.end() ||
239           Sep.getKind() != AsmToken::Slash) break;
240       // Extend name with /
241       Name = StringRef(Name.begin(), Name.size() + Sep.getString().size());
242       Parser.Lex();
243       // We must now find another identifier, or error.
244       auto &Id = Lexer.getTok();
245       if (Id.getKind() != AsmToken::Identifier ||
246           Id.getLoc().getPointer() != Name.end())
247         return Error("Incomplete instruction name: ", Id);
248       Name = StringRef(Name.begin(), Name.size() + Id.getString().size());
249       Parser.Lex();
250     }
251     // Now construct the name as first operand.
252     Operands.push_back(make_unique<WebAssemblyOperand>(
253         WebAssemblyOperand::Token, NameLoc, SMLoc::getFromPointer(Name.end()),
254         WebAssemblyOperand::TokOp{Name}));
255     auto NamePair = Name.split('.');
256     // If no '.', there is no type prefix.
257     auto BaseName = NamePair.second.empty() ? NamePair.first : NamePair.second;
258     while (Lexer.isNot(AsmToken::EndOfStatement)) {
259       auto &Tok = Lexer.getTok();
260       switch (Tok.getKind()) {
261       case AsmToken::Identifier: {
262         auto &Id = Lexer.getTok();
263         const MCExpr *Val;
264         SMLoc End;
265         if (Parser.parsePrimaryExpr(Val, End))
266           return Error("Cannot parse symbol: ", Lexer.getTok());
267         Operands.push_back(make_unique<WebAssemblyOperand>(
268             WebAssemblyOperand::Symbol, Id.getLoc(), Id.getEndLoc(),
269             WebAssemblyOperand::SymOp{Val}));
270         break;
271       }
272       case AsmToken::Minus:
273         Parser.Lex();
274         if (Lexer.isNot(AsmToken::Integer))
275           return Error("Expected integer instead got: ", Lexer.getTok());
276         if (ParseOperandStartingWithInteger(true, Operands, BaseName))
277           return true;
278         break;
279       case AsmToken::Integer:
280         if (ParseOperandStartingWithInteger(false, Operands, BaseName))
281           return true;
282         break;
283       case AsmToken::Real: {
284         double Val;
285         if (Tok.getString().getAsDouble(Val, false))
286           return Error("Cannot parse real: ", Tok);
287         Operands.push_back(make_unique<WebAssemblyOperand>(
288             WebAssemblyOperand::Float, Tok.getLoc(), Tok.getEndLoc(),
289             WebAssemblyOperand::FltOp{Val}));
290         Parser.Lex();
291         break;
292       }
293       default:
294         return Error("Unexpected token in operand: ", Tok);
295       }
296       if (Lexer.isNot(AsmToken::EndOfStatement)) {
297         if (Expect(AsmToken::Comma, ","))
298           return true;
299       }
300     }
301     Parser.Lex();
302     // Block instructions require a signature index, but these are missing in
303     // assembly, so we add a dummy one explicitly (since we have no control
304     // over signature tables here, we assume these will be regenerated when
305     // the wasm module is generated).
306     if (BaseName == "block" || BaseName == "loop") {
307       Operands.push_back(make_unique<WebAssemblyOperand>(
308           WebAssemblyOperand::Integer, NameLoc, NameLoc,
309           WebAssemblyOperand::IntOp{-1}));
310     }
311     return false;
312   }
313 
314   void onLabelParsed(MCSymbol *Symbol) override { LastLabel = Symbol; }
315 
316   bool ParseDirective(AsmToken DirectiveID) override {
317     assert(DirectiveID.getKind() == AsmToken::Identifier);
318     auto &Out = getStreamer();
319     auto &TOut =
320         reinterpret_cast<WebAssemblyTargetStreamer &>(*Out.getTargetStreamer());
321     // TODO: we're just parsing the subset of directives we're interested in,
322     // and ignoring ones we don't recognise. We should ideally verify
323     // all directives here.
324     if (DirectiveID.getString() == ".type") {
325       // This could be the start of a function, check if followed by
326       // "label,@function"
327       if (!(IsNext(AsmToken::Identifier) && IsNext(AsmToken::Comma) &&
328             IsNext(AsmToken::At) && Lexer.is(AsmToken::Identifier)))
329         return Error("Expected label,@type declaration, got: ", Lexer.getTok());
330       Parser.Lex();
331       // Out.EmitSymbolAttribute(??, MCSA_ELF_TypeFunction);
332     } else if (DirectiveID.getString() == ".param" ||
333                DirectiveID.getString() == ".local") {
334       // Track the number of locals, needed for correct virtual register
335       // assignment elsewhere.
336       // Also output a directive to the streamer.
337       std::vector<MVT> Params;
338       std::vector<MVT> Locals;
339       while (Lexer.is(AsmToken::Identifier)) {
340         auto RegType = ParseRegType(Lexer.getTok().getString());
341         if (RegType == MVT::INVALID_SIMPLE_VALUE_TYPE)
342           return true;
343         if (DirectiveID.getString() == ".param") {
344           Params.push_back(RegType);
345         } else {
346           Locals.push_back(RegType);
347         }
348         Parser.Lex();
349         if (!IsNext(AsmToken::Comma))
350           break;
351       }
352       assert(LastLabel);
353       TOut.emitParam(LastLabel, Params);
354       TOut.emitLocal(Locals);
355     } else {
356       // For now, ignore anydirective we don't recognize:
357       while (Lexer.isNot(AsmToken::EndOfStatement))
358         Parser.Lex();
359     }
360     return Expect(AsmToken::EndOfStatement, "EOL");
361   }
362 
363   bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned & /*Opcode*/,
364                                OperandVector &Operands, MCStreamer &Out,
365                                uint64_t &ErrorInfo,
366                                bool MatchingInlineAsm) override {
367     MCInst Inst;
368     unsigned MatchResult =
369         MatchInstructionImpl(Operands, Inst, ErrorInfo, MatchingInlineAsm);
370     switch (MatchResult) {
371     case Match_Success: {
372       Out.EmitInstruction(Inst, getSTI());
373       return false;
374     }
375     case Match_MissingFeature:
376       return Parser.Error(
377           IDLoc, "instruction requires a WASM feature not currently enabled");
378     case Match_MnemonicFail:
379       return Parser.Error(IDLoc, "invalid instruction");
380     case Match_NearMisses:
381       return Parser.Error(IDLoc, "ambiguous instruction");
382     case Match_InvalidTiedOperand:
383     case Match_InvalidOperand: {
384       SMLoc ErrorLoc = IDLoc;
385       if (ErrorInfo != ~0ULL) {
386         if (ErrorInfo >= Operands.size())
387           return Parser.Error(IDLoc, "too few operands for instruction");
388         ErrorLoc = Operands[ErrorInfo]->getStartLoc();
389         if (ErrorLoc == SMLoc())
390           ErrorLoc = IDLoc;
391       }
392       return Parser.Error(ErrorLoc, "invalid operand for instruction");
393     }
394     }
395     llvm_unreachable("Implement any new match types added!");
396   }
397 };
398 } // end anonymous namespace
399 
400 // Force static initialization.
401 extern "C" void LLVMInitializeWebAssemblyAsmParser() {
402   RegisterMCAsmParser<WebAssemblyAsmParser> X(getTheWebAssemblyTarget32());
403   RegisterMCAsmParser<WebAssemblyAsmParser> Y(getTheWebAssemblyTarget64());
404 }
405 
406 #define GET_REGISTER_MATCHER
407 #define GET_MATCHER_IMPLEMENTATION
408 #include "WebAssemblyGenAsmMatcher.inc"
409