1 //==- WebAssemblyAsmParser.cpp - Assembler for WebAssembly -*- C++ -*-==// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 /// 10 /// \file 11 /// This file is part of the WebAssembly Assembler. 12 /// 13 /// It contains code to translate a parsed .s file into MCInsts. 14 /// 15 //===----------------------------------------------------------------------===// 16 17 #include "MCTargetDesc/WebAssemblyMCTargetDesc.h" 18 #include "MCTargetDesc/WebAssemblyTargetStreamer.h" 19 #include "WebAssembly.h" 20 #include "llvm/MC/MCContext.h" 21 #include "llvm/MC/MCInst.h" 22 #include "llvm/MC/MCInstrInfo.h" 23 #include "llvm/MC/MCParser/MCParsedAsmOperand.h" 24 #include "llvm/MC/MCParser/MCTargetAsmParser.h" 25 #include "llvm/MC/MCStreamer.h" 26 #include "llvm/MC/MCSubtargetInfo.h" 27 #include "llvm/MC/MCSymbol.h" 28 #include "llvm/MC/MCSymbolWasm.h" 29 #include "llvm/Support/Endian.h" 30 #include "llvm/Support/TargetRegistry.h" 31 32 using namespace llvm; 33 34 #define DEBUG_TYPE "wasm-asm-parser" 35 36 namespace { 37 38 /// WebAssemblyOperand - Instances of this class represent the operands in a 39 /// parsed WASM machine instruction. 40 struct WebAssemblyOperand : public MCParsedAsmOperand { 41 enum KindTy { Token, Integer, Float, Symbol } Kind; 42 43 SMLoc StartLoc, EndLoc; 44 45 struct TokOp { 46 StringRef Tok; 47 }; 48 49 struct IntOp { 50 int64_t Val; 51 }; 52 53 struct FltOp { 54 double Val; 55 }; 56 57 struct SymOp { 58 const MCExpr *Exp; 59 }; 60 61 union { 62 struct TokOp Tok; 63 struct IntOp Int; 64 struct FltOp Flt; 65 struct SymOp Sym; 66 }; 67 68 WebAssemblyOperand(KindTy K, SMLoc Start, SMLoc End, TokOp T) 69 : Kind(K), StartLoc(Start), EndLoc(End), Tok(T) {} 70 WebAssemblyOperand(KindTy K, SMLoc Start, SMLoc End, IntOp I) 71 : Kind(K), StartLoc(Start), EndLoc(End), Int(I) {} 72 WebAssemblyOperand(KindTy K, SMLoc Start, SMLoc End, FltOp F) 73 : Kind(K), StartLoc(Start), EndLoc(End), Flt(F) {} 74 WebAssemblyOperand(KindTy K, SMLoc Start, SMLoc End, SymOp S) 75 : Kind(K), StartLoc(Start), EndLoc(End), Sym(S) {} 76 77 bool isToken() const override { return Kind == Token; } 78 bool isImm() const override { 79 return Kind == Integer || Kind == Float || Kind == Symbol; 80 } 81 bool isMem() const override { return false; } 82 bool isReg() const override { return false; } 83 84 unsigned getReg() const override { 85 llvm_unreachable("Assembly inspects a register operand"); 86 return 0; 87 } 88 89 StringRef getToken() const { 90 assert(isToken()); 91 return Tok.Tok; 92 } 93 94 SMLoc getStartLoc() const override { return StartLoc; } 95 SMLoc getEndLoc() const override { return EndLoc; } 96 97 void addRegOperands(MCInst &, unsigned) const { 98 // Required by the assembly matcher. 99 llvm_unreachable("Assembly matcher creates register operands"); 100 } 101 102 void addImmOperands(MCInst &Inst, unsigned N) const { 103 assert(N == 1 && "Invalid number of operands!"); 104 if (Kind == Integer) 105 Inst.addOperand(MCOperand::createImm(Int.Val)); 106 else if (Kind == Float) 107 Inst.addOperand(MCOperand::createFPImm(Flt.Val)); 108 else if (Kind == Symbol) 109 Inst.addOperand(MCOperand::createExpr(Sym.Exp)); 110 else 111 llvm_unreachable("Should be immediate or symbol!"); 112 } 113 114 void print(raw_ostream &OS) const override { 115 switch (Kind) { 116 case Token: 117 OS << "Tok:" << Tok.Tok; 118 break; 119 case Integer: 120 OS << "Int:" << Int.Val; 121 break; 122 case Float: 123 OS << "Flt:" << Flt.Val; 124 break; 125 case Symbol: 126 OS << "Sym:" << Sym.Exp; 127 break; 128 } 129 } 130 }; 131 132 class WebAssemblyAsmParser final : public MCTargetAsmParser { 133 MCAsmParser &Parser; 134 MCAsmLexer &Lexer; 135 136 // Much like WebAssemblyAsmPrinter in the backend, we have to own these. 137 std::vector<std::unique_ptr<wasm::WasmSignature>> Signatures; 138 139 // Order of labels, directives and instructions in a .s file have no 140 // syntactical enforcement. This class is a callback from the actual parser, 141 // and yet we have to be feeding data to the streamer in a very particular 142 // order to ensure a correct binary encoding that matches the regular backend 143 // (the streamer does not enforce this). This "state machine" enum helps 144 // guarantee that correct order. 145 enum ParserState { 146 FileStart, 147 Label, 148 FunctionStart, 149 FunctionLocals, 150 Instructions, 151 } CurrentState = FileStart; 152 153 // We track this to see if a .functype following a label is the same, 154 // as this is how we recognize the start of a function. 155 MCSymbol *LastLabel = nullptr; 156 157 public: 158 WebAssemblyAsmParser(const MCSubtargetInfo &STI, MCAsmParser &Parser, 159 const MCInstrInfo &MII, const MCTargetOptions &Options) 160 : MCTargetAsmParser(Options, STI, MII), Parser(Parser), 161 Lexer(Parser.getLexer()) { 162 setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits())); 163 } 164 165 void addSignature(std::unique_ptr<wasm::WasmSignature> &&Sig) { 166 Signatures.push_back(std::move(Sig)); 167 } 168 169 #define GET_ASSEMBLER_HEADER 170 #include "WebAssemblyGenAsmMatcher.inc" 171 172 // TODO: This is required to be implemented, but appears unused. 173 bool ParseRegister(unsigned & /*RegNo*/, SMLoc & /*StartLoc*/, 174 SMLoc & /*EndLoc*/) override { 175 llvm_unreachable("ParseRegister is not implemented."); 176 } 177 178 bool error(const StringRef &Msg, const AsmToken &Tok) { 179 return Parser.Error(Tok.getLoc(), Msg + Tok.getString()); 180 } 181 182 bool isNext(AsmToken::TokenKind Kind) { 183 auto Ok = Lexer.is(Kind); 184 if (Ok) 185 Parser.Lex(); 186 return Ok; 187 } 188 189 bool expect(AsmToken::TokenKind Kind, const char *KindName) { 190 if (!isNext(Kind)) 191 return error(std::string("Expected ") + KindName + ", instead got: ", 192 Lexer.getTok()); 193 return false; 194 } 195 196 StringRef expectIdent() { 197 if (!Lexer.is(AsmToken::Identifier)) { 198 error("Expected identifier, got: ", Lexer.getTok()); 199 return StringRef(); 200 } 201 auto Name = Lexer.getTok().getString(); 202 Parser.Lex(); 203 return Name; 204 } 205 206 Optional<wasm::ValType> parseType(const StringRef &Type) { 207 // FIXME: can't use StringSwitch because wasm::ValType doesn't have a 208 // "invalid" value. 209 if (Type == "i32") 210 return wasm::ValType::I32; 211 if (Type == "i64") 212 return wasm::ValType::I64; 213 if (Type == "f32") 214 return wasm::ValType::F32; 215 if (Type == "f64") 216 return wasm::ValType::F64; 217 if (Type == "v128" || Type == "i8x16" || Type == "i16x8" || 218 Type == "i32x4" || Type == "i64x2" || Type == "f32x4" || 219 Type == "f64x2") 220 return wasm::ValType::V128; 221 return Optional<wasm::ValType>(); 222 } 223 224 bool parseRegTypeList(SmallVectorImpl<wasm::ValType> &Types) { 225 while (Lexer.is(AsmToken::Identifier)) { 226 auto Type = parseType(Lexer.getTok().getString()); 227 if (!Type) 228 return true; 229 Types.push_back(Type.getValue()); 230 Parser.Lex(); 231 if (!isNext(AsmToken::Comma)) 232 break; 233 } 234 return false; 235 } 236 237 void parseSingleInteger(bool IsNegative, OperandVector &Operands) { 238 auto &Int = Lexer.getTok(); 239 int64_t Val = Int.getIntVal(); 240 if (IsNegative) 241 Val = -Val; 242 Operands.push_back(make_unique<WebAssemblyOperand>( 243 WebAssemblyOperand::Integer, Int.getLoc(), Int.getEndLoc(), 244 WebAssemblyOperand::IntOp{Val})); 245 Parser.Lex(); 246 } 247 248 bool parseOperandStartingWithInteger(bool IsNegative, OperandVector &Operands, 249 StringRef InstName) { 250 parseSingleInteger(IsNegative, Operands); 251 // FIXME: there is probably a cleaner way to do this. 252 auto IsLoadStore = InstName.startswith("load") || 253 InstName.startswith("store") || 254 InstName.startswith("atomic_load") || 255 InstName.startswith("atomic_store"); 256 if (IsLoadStore) { 257 // Parse load/store operands of the form: offset align 258 auto &Offset = Lexer.getTok(); 259 if (Offset.is(AsmToken::Integer)) { 260 parseSingleInteger(false, Operands); 261 } else { 262 // Alignment not specified. 263 // FIXME: correctly derive a default from the instruction. 264 // We can't just call WebAssembly::GetDefaultP2Align since we don't have 265 // an opcode until after the assembly matcher. 266 Operands.push_back(make_unique<WebAssemblyOperand>( 267 WebAssemblyOperand::Integer, Offset.getLoc(), Offset.getEndLoc(), 268 WebAssemblyOperand::IntOp{0})); 269 } 270 } 271 return false; 272 } 273 274 bool ParseInstruction(ParseInstructionInfo & /*Info*/, StringRef Name, 275 SMLoc NameLoc, OperandVector &Operands) override { 276 // Note: Name does NOT point into the sourcecode, but to a local, so 277 // use NameLoc instead. 278 Name = StringRef(NameLoc.getPointer(), Name.size()); 279 280 // WebAssembly has instructions with / in them, which AsmLexer parses 281 // as seperate tokens, so if we find such tokens immediately adjacent (no 282 // whitespace), expand the name to include them: 283 for (;;) { 284 auto &Sep = Lexer.getTok(); 285 if (Sep.getLoc().getPointer() != Name.end() || 286 Sep.getKind() != AsmToken::Slash) 287 break; 288 // Extend name with / 289 Name = StringRef(Name.begin(), Name.size() + Sep.getString().size()); 290 Parser.Lex(); 291 // We must now find another identifier, or error. 292 auto &Id = Lexer.getTok(); 293 if (Id.getKind() != AsmToken::Identifier || 294 Id.getLoc().getPointer() != Name.end()) 295 return error("Incomplete instruction name: ", Id); 296 Name = StringRef(Name.begin(), Name.size() + Id.getString().size()); 297 Parser.Lex(); 298 } 299 300 // Now construct the name as first operand. 301 Operands.push_back(make_unique<WebAssemblyOperand>( 302 WebAssemblyOperand::Token, NameLoc, SMLoc::getFromPointer(Name.end()), 303 WebAssemblyOperand::TokOp{Name})); 304 auto NamePair = Name.split('.'); 305 // If no '.', there is no type prefix. 306 auto BaseName = NamePair.second.empty() ? NamePair.first : NamePair.second; 307 308 while (Lexer.isNot(AsmToken::EndOfStatement)) { 309 auto &Tok = Lexer.getTok(); 310 switch (Tok.getKind()) { 311 case AsmToken::Identifier: { 312 auto &Id = Lexer.getTok(); 313 const MCExpr *Val; 314 SMLoc End; 315 if (Parser.parsePrimaryExpr(Val, End)) 316 return error("Cannot parse symbol: ", Lexer.getTok()); 317 Operands.push_back(make_unique<WebAssemblyOperand>( 318 WebAssemblyOperand::Symbol, Id.getLoc(), Id.getEndLoc(), 319 WebAssemblyOperand::SymOp{Val})); 320 break; 321 } 322 case AsmToken::Minus: 323 Parser.Lex(); 324 if (Lexer.isNot(AsmToken::Integer)) 325 return error("Expected integer instead got: ", Lexer.getTok()); 326 if (parseOperandStartingWithInteger(true, Operands, BaseName)) 327 return true; 328 break; 329 case AsmToken::Integer: 330 if (parseOperandStartingWithInteger(false, Operands, BaseName)) 331 return true; 332 break; 333 case AsmToken::Real: { 334 double Val; 335 if (Tok.getString().getAsDouble(Val, false)) 336 return error("Cannot parse real: ", Tok); 337 Operands.push_back(make_unique<WebAssemblyOperand>( 338 WebAssemblyOperand::Float, Tok.getLoc(), Tok.getEndLoc(), 339 WebAssemblyOperand::FltOp{Val})); 340 Parser.Lex(); 341 break; 342 } 343 default: 344 return error("Unexpected token in operand: ", Tok); 345 } 346 if (Lexer.isNot(AsmToken::EndOfStatement)) { 347 if (expect(AsmToken::Comma, ",")) 348 return true; 349 } 350 } 351 Parser.Lex(); 352 353 // Block instructions require a signature index, but these are missing in 354 // assembly, so we add a dummy one explicitly (since we have no control 355 // over signature tables here, we assume these will be regenerated when 356 // the wasm module is generated). 357 if (BaseName == "block" || BaseName == "loop" || BaseName == "try") { 358 Operands.push_back(make_unique<WebAssemblyOperand>( 359 WebAssemblyOperand::Integer, NameLoc, NameLoc, 360 WebAssemblyOperand::IntOp{-1})); 361 } 362 return false; 363 } 364 365 void onLabelParsed(MCSymbol *Symbol) override { 366 LastLabel = Symbol; 367 CurrentState = Label; 368 } 369 370 bool parseSignature(wasm::WasmSignature *Signature) { 371 if (expect(AsmToken::LParen, "(")) 372 return true; 373 if (parseRegTypeList(Signature->Params)) 374 return true; 375 if (expect(AsmToken::RParen, ")")) 376 return true; 377 if (expect(AsmToken::MinusGreater, "->")) 378 return true; 379 if (expect(AsmToken::LParen, "(")) 380 return true; 381 if (parseRegTypeList(Signature->Returns)) 382 return true; 383 if (expect(AsmToken::RParen, ")")) 384 return true; 385 return false; 386 } 387 388 // This function processes wasm-specific directives streamed to 389 // WebAssemblyTargetStreamer, all others go to the generic parser 390 // (see WasmAsmParser). 391 bool ParseDirective(AsmToken DirectiveID) override { 392 // This function has a really weird return value behavior that is different 393 // from all the other parsing functions: 394 // - return true && no tokens consumed -> don't know this directive / let 395 // the generic parser handle it. 396 // - return true && tokens consumed -> a parsing error occurred. 397 // - return false -> processed this directive successfully. 398 assert(DirectiveID.getKind() == AsmToken::Identifier); 399 auto &Out = getStreamer(); 400 auto &TOut = 401 reinterpret_cast<WebAssemblyTargetStreamer &>(*Out.getTargetStreamer()); 402 403 // TODO: any time we return an error, at least one token must have been 404 // consumed, otherwise this will not signal an error to the caller. 405 if (DirectiveID.getString() == ".globaltype") { 406 auto SymName = expectIdent(); 407 if (SymName.empty()) 408 return true; 409 if (expect(AsmToken::Comma, ",")) 410 return true; 411 auto TypeTok = Lexer.getTok(); 412 auto TypeName = expectIdent(); 413 if (TypeName.empty()) 414 return true; 415 auto Type = parseType(TypeName); 416 if (!Type) 417 return error("Unknown type in .globaltype directive: ", TypeTok); 418 // Now set this symbol with the correct type. 419 auto WasmSym = cast<MCSymbolWasm>( 420 TOut.getStreamer().getContext().getOrCreateSymbol(SymName)); 421 WasmSym->setType(wasm::WASM_SYMBOL_TYPE_GLOBAL); 422 WasmSym->setGlobalType( 423 wasm::WasmGlobalType{uint8_t(Type.getValue()), true}); 424 // And emit the directive again. 425 TOut.emitGlobalType(WasmSym); 426 return expect(AsmToken::EndOfStatement, "EOL"); 427 } 428 429 if (DirectiveID.getString() == ".functype") { 430 // This code has to send things to the streamer similar to 431 // WebAssemblyAsmPrinter::EmitFunctionBodyStart. 432 // TODO: would be good to factor this into a common function, but the 433 // assembler and backend really don't share any common code, and this code 434 // parses the locals seperately. 435 auto SymName = expectIdent(); 436 if (SymName.empty()) 437 return true; 438 auto WasmSym = cast<MCSymbolWasm>( 439 TOut.getStreamer().getContext().getOrCreateSymbol(SymName)); 440 if (CurrentState == Label && WasmSym == LastLabel) { 441 // This .functype indicates a start of a function. 442 CurrentState = FunctionStart; 443 } 444 auto Signature = make_unique<wasm::WasmSignature>(); 445 if (parseSignature(Signature.get())) 446 return true; 447 WasmSym->setSignature(Signature.get()); 448 addSignature(std::move(Signature)); 449 WasmSym->setType(wasm::WASM_SYMBOL_TYPE_FUNCTION); 450 TOut.emitFunctionType(WasmSym); 451 // TODO: backend also calls TOut.emitIndIdx, but that is not implemented. 452 return expect(AsmToken::EndOfStatement, "EOL"); 453 } 454 455 if (DirectiveID.getString() == ".eventtype") { 456 auto SymName = expectIdent(); 457 if (SymName.empty()) 458 return true; 459 auto WasmSym = cast<MCSymbolWasm>( 460 TOut.getStreamer().getContext().getOrCreateSymbol(SymName)); 461 auto Signature = make_unique<wasm::WasmSignature>(); 462 if (parseRegTypeList(Signature->Params)) 463 return true; 464 WasmSym->setSignature(Signature.get()); 465 addSignature(std::move(Signature)); 466 WasmSym->setType(wasm::WASM_SYMBOL_TYPE_EVENT); 467 TOut.emitEventType(WasmSym); 468 // TODO: backend also calls TOut.emitIndIdx, but that is not implemented. 469 return expect(AsmToken::EndOfStatement, "EOL"); 470 } 471 472 if (DirectiveID.getString() == ".local") { 473 if (CurrentState != FunctionStart) 474 return error(".local directive should follow the start of a function", 475 Lexer.getTok()); 476 SmallVector<wasm::ValType, 4> Locals; 477 if (parseRegTypeList(Locals)) 478 return true; 479 TOut.emitLocal(Locals); 480 CurrentState = FunctionLocals; 481 return expect(AsmToken::EndOfStatement, "EOL"); 482 } 483 484 return true; // We didn't process this directive. 485 } 486 487 bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned & /*Opcode*/, 488 OperandVector &Operands, MCStreamer &Out, 489 uint64_t &ErrorInfo, 490 bool MatchingInlineAsm) override { 491 MCInst Inst; 492 unsigned MatchResult = 493 MatchInstructionImpl(Operands, Inst, ErrorInfo, MatchingInlineAsm); 494 switch (MatchResult) { 495 case Match_Success: { 496 if (CurrentState == FunctionStart) { 497 // This is the first instruction in a function, but we haven't seen 498 // a .local directive yet. The streamer requires locals to be encoded 499 // as a prelude to the instructions, so emit an empty list of locals 500 // here. 501 auto &TOut = reinterpret_cast<WebAssemblyTargetStreamer &>( 502 *Out.getTargetStreamer()); 503 TOut.emitLocal(SmallVector<wasm::ValType, 0>()); 504 } 505 CurrentState = Instructions; 506 Out.EmitInstruction(Inst, getSTI()); 507 return false; 508 } 509 case Match_MissingFeature: 510 return Parser.Error( 511 IDLoc, "instruction requires a WASM feature not currently enabled"); 512 case Match_MnemonicFail: 513 return Parser.Error(IDLoc, "invalid instruction"); 514 case Match_NearMisses: 515 return Parser.Error(IDLoc, "ambiguous instruction"); 516 case Match_InvalidTiedOperand: 517 case Match_InvalidOperand: { 518 SMLoc ErrorLoc = IDLoc; 519 if (ErrorInfo != ~0ULL) { 520 if (ErrorInfo >= Operands.size()) 521 return Parser.Error(IDLoc, "too few operands for instruction"); 522 ErrorLoc = Operands[ErrorInfo]->getStartLoc(); 523 if (ErrorLoc == SMLoc()) 524 ErrorLoc = IDLoc; 525 } 526 return Parser.Error(ErrorLoc, "invalid operand for instruction"); 527 } 528 } 529 llvm_unreachable("Implement any new match types added!"); 530 } 531 }; 532 } // end anonymous namespace 533 534 // Force static initialization. 535 extern "C" void LLVMInitializeWebAssemblyAsmParser() { 536 RegisterMCAsmParser<WebAssemblyAsmParser> X(getTheWebAssemblyTarget32()); 537 RegisterMCAsmParser<WebAssemblyAsmParser> Y(getTheWebAssemblyTarget64()); 538 } 539 540 #define GET_REGISTER_MATCHER 541 #define GET_MATCHER_IMPLEMENTATION 542 #include "WebAssemblyGenAsmMatcher.inc" 543