1 //===- MILexer.cpp - Machine instructions lexer implementation ------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file implements the lexing of machine instructions. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "MILexer.h" 15 #include "llvm/ADT/APSInt.h" 16 #include "llvm/ADT/None.h" 17 #include "llvm/ADT/STLExtras.h" 18 #include "llvm/ADT/StringExtras.h" 19 #include "llvm/ADT/StringSwitch.h" 20 #include "llvm/ADT/StringRef.h" 21 #include "llvm/ADT/Twine.h" 22 #include <algorithm> 23 #include <cassert> 24 #include <cctype> 25 #include <string> 26 27 using namespace llvm; 28 29 namespace { 30 31 using ErrorCallbackType = 32 function_ref<void(StringRef::iterator Loc, const Twine &)>; 33 34 /// This class provides a way to iterate and get characters from the source 35 /// string. 36 class Cursor { 37 const char *Ptr = nullptr; 38 const char *End = nullptr; 39 40 public: 41 Cursor(NoneType) {} 42 43 explicit Cursor(StringRef Str) { 44 Ptr = Str.data(); 45 End = Ptr + Str.size(); 46 } 47 48 bool isEOF() const { return Ptr == End; } 49 50 char peek(int I = 0) const { return End - Ptr <= I ? 0 : Ptr[I]; } 51 52 void advance(unsigned I = 1) { Ptr += I; } 53 54 StringRef remaining() const { return StringRef(Ptr, End - Ptr); } 55 56 StringRef upto(Cursor C) const { 57 assert(C.Ptr >= Ptr && C.Ptr <= End); 58 return StringRef(Ptr, C.Ptr - Ptr); 59 } 60 61 StringRef::iterator location() const { return Ptr; } 62 63 operator bool() const { return Ptr != nullptr; } 64 }; 65 66 } // end anonymous namespace 67 68 MIToken &MIToken::reset(TokenKind Kind, StringRef Range) { 69 this->Kind = Kind; 70 this->Range = Range; 71 return *this; 72 } 73 74 MIToken &MIToken::setStringValue(StringRef StrVal) { 75 StringValue = StrVal; 76 return *this; 77 } 78 79 MIToken &MIToken::setOwnedStringValue(std::string StrVal) { 80 StringValueStorage = std::move(StrVal); 81 StringValue = StringValueStorage; 82 return *this; 83 } 84 85 MIToken &MIToken::setIntegerValue(APSInt IntVal) { 86 this->IntVal = std::move(IntVal); 87 return *this; 88 } 89 90 /// Skip the leading whitespace characters and return the updated cursor. 91 static Cursor skipWhitespace(Cursor C) { 92 while (isblank(C.peek())) 93 C.advance(); 94 return C; 95 } 96 97 static bool isNewlineChar(char C) { return C == '\n' || C == '\r'; } 98 99 /// Skip a line comment and return the updated cursor. 100 static Cursor skipComment(Cursor C) { 101 if (C.peek() != ';') 102 return C; 103 while (!isNewlineChar(C.peek()) && !C.isEOF()) 104 C.advance(); 105 return C; 106 } 107 108 /// Return true if the given character satisfies the following regular 109 /// expression: [-a-zA-Z$._0-9] 110 static bool isIdentifierChar(char C) { 111 return isalpha(C) || isdigit(C) || C == '_' || C == '-' || C == '.' || 112 C == '$'; 113 } 114 115 /// Unescapes the given string value. 116 /// 117 /// Expects the string value to be quoted. 118 static std::string unescapeQuotedString(StringRef Value) { 119 assert(Value.front() == '"' && Value.back() == '"'); 120 Cursor C = Cursor(Value.substr(1, Value.size() - 2)); 121 122 std::string Str; 123 Str.reserve(C.remaining().size()); 124 while (!C.isEOF()) { 125 char Char = C.peek(); 126 if (Char == '\\') { 127 if (C.peek(1) == '\\') { 128 // Two '\' become one 129 Str += '\\'; 130 C.advance(2); 131 continue; 132 } 133 if (isxdigit(C.peek(1)) && isxdigit(C.peek(2))) { 134 Str += hexDigitValue(C.peek(1)) * 16 + hexDigitValue(C.peek(2)); 135 C.advance(3); 136 continue; 137 } 138 } 139 Str += Char; 140 C.advance(); 141 } 142 return Str; 143 } 144 145 /// Lex a string constant using the following regular expression: \"[^\"]*\" 146 static Cursor lexStringConstant(Cursor C, ErrorCallbackType ErrorCallback) { 147 assert(C.peek() == '"'); 148 for (C.advance(); C.peek() != '"'; C.advance()) { 149 if (C.isEOF() || isNewlineChar(C.peek())) { 150 ErrorCallback( 151 C.location(), 152 "end of machine instruction reached before the closing '\"'"); 153 return None; 154 } 155 } 156 C.advance(); 157 return C; 158 } 159 160 static Cursor lexName(Cursor C, MIToken &Token, MIToken::TokenKind Type, 161 unsigned PrefixLength, ErrorCallbackType ErrorCallback) { 162 auto Range = C; 163 C.advance(PrefixLength); 164 if (C.peek() == '"') { 165 if (Cursor R = lexStringConstant(C, ErrorCallback)) { 166 StringRef String = Range.upto(R); 167 Token.reset(Type, String) 168 .setOwnedStringValue( 169 unescapeQuotedString(String.drop_front(PrefixLength))); 170 return R; 171 } 172 Token.reset(MIToken::Error, Range.remaining()); 173 return Range; 174 } 175 while (isIdentifierChar(C.peek())) 176 C.advance(); 177 Token.reset(Type, Range.upto(C)) 178 .setStringValue(Range.upto(C).drop_front(PrefixLength)); 179 return C; 180 } 181 182 static MIToken::TokenKind getIdentifierKind(StringRef Identifier) { 183 return StringSwitch<MIToken::TokenKind>(Identifier) 184 .Case("_", MIToken::underscore) 185 .Case("implicit", MIToken::kw_implicit) 186 .Case("implicit-def", MIToken::kw_implicit_define) 187 .Case("def", MIToken::kw_def) 188 .Case("dead", MIToken::kw_dead) 189 .Case("killed", MIToken::kw_killed) 190 .Case("undef", MIToken::kw_undef) 191 .Case("internal", MIToken::kw_internal) 192 .Case("early-clobber", MIToken::kw_early_clobber) 193 .Case("debug-use", MIToken::kw_debug_use) 194 .Case("renamable", MIToken::kw_renamable) 195 .Case("tied-def", MIToken::kw_tied_def) 196 .Case("frame-setup", MIToken::kw_frame_setup) 197 .Case("frame-destroy", MIToken::kw_frame_destroy) 198 .Case("nnan", MIToken::kw_nnan) 199 .Case("ninf", MIToken::kw_ninf) 200 .Case("nsz", MIToken::kw_nsz) 201 .Case("arcp", MIToken::kw_arcp) 202 .Case("contract", MIToken::kw_contract) 203 .Case("afn", MIToken::kw_afn) 204 .Case("reassoc", MIToken::kw_reassoc) 205 .Case("nuw" , MIToken::kw_nuw) 206 .Case("nsw" , MIToken::kw_nsw) 207 .Case("exact" , MIToken::kw_exact) 208 .Case("debug-location", MIToken::kw_debug_location) 209 .Case("same_value", MIToken::kw_cfi_same_value) 210 .Case("offset", MIToken::kw_cfi_offset) 211 .Case("rel_offset", MIToken::kw_cfi_rel_offset) 212 .Case("def_cfa_register", MIToken::kw_cfi_def_cfa_register) 213 .Case("def_cfa_offset", MIToken::kw_cfi_def_cfa_offset) 214 .Case("adjust_cfa_offset", MIToken::kw_cfi_adjust_cfa_offset) 215 .Case("escape", MIToken::kw_cfi_escape) 216 .Case("def_cfa", MIToken::kw_cfi_def_cfa) 217 .Case("remember_state", MIToken::kw_cfi_remember_state) 218 .Case("restore", MIToken::kw_cfi_restore) 219 .Case("restore_state", MIToken::kw_cfi_restore_state) 220 .Case("undefined", MIToken::kw_cfi_undefined) 221 .Case("register", MIToken::kw_cfi_register) 222 .Case("window_save", MIToken::kw_cfi_window_save) 223 .Case("blockaddress", MIToken::kw_blockaddress) 224 .Case("intrinsic", MIToken::kw_intrinsic) 225 .Case("target-index", MIToken::kw_target_index) 226 .Case("half", MIToken::kw_half) 227 .Case("float", MIToken::kw_float) 228 .Case("double", MIToken::kw_double) 229 .Case("x86_fp80", MIToken::kw_x86_fp80) 230 .Case("fp128", MIToken::kw_fp128) 231 .Case("ppc_fp128", MIToken::kw_ppc_fp128) 232 .Case("target-flags", MIToken::kw_target_flags) 233 .Case("volatile", MIToken::kw_volatile) 234 .Case("non-temporal", MIToken::kw_non_temporal) 235 .Case("dereferenceable", MIToken::kw_dereferenceable) 236 .Case("invariant", MIToken::kw_invariant) 237 .Case("align", MIToken::kw_align) 238 .Case("addrspace", MIToken::kw_addrspace) 239 .Case("stack", MIToken::kw_stack) 240 .Case("got", MIToken::kw_got) 241 .Case("jump-table", MIToken::kw_jump_table) 242 .Case("constant-pool", MIToken::kw_constant_pool) 243 .Case("call-entry", MIToken::kw_call_entry) 244 .Case("liveout", MIToken::kw_liveout) 245 .Case("address-taken", MIToken::kw_address_taken) 246 .Case("landing-pad", MIToken::kw_landing_pad) 247 .Case("liveins", MIToken::kw_liveins) 248 .Case("successors", MIToken::kw_successors) 249 .Case("floatpred", MIToken::kw_floatpred) 250 .Case("intpred", MIToken::kw_intpred) 251 .Case("pre-instr-symbol", MIToken::kw_pre_instr_symbol) 252 .Case("post-instr-symbol", MIToken::kw_post_instr_symbol) 253 .Case("unknown-size", MIToken::kw_unknown_size) 254 .Default(MIToken::Identifier); 255 } 256 257 static Cursor maybeLexIdentifier(Cursor C, MIToken &Token) { 258 if (!isalpha(C.peek()) && C.peek() != '_') 259 return None; 260 auto Range = C; 261 while (isIdentifierChar(C.peek())) 262 C.advance(); 263 auto Identifier = Range.upto(C); 264 Token.reset(getIdentifierKind(Identifier), Identifier) 265 .setStringValue(Identifier); 266 return C; 267 } 268 269 static Cursor maybeLexMachineBasicBlock(Cursor C, MIToken &Token, 270 ErrorCallbackType ErrorCallback) { 271 bool IsReference = C.remaining().startswith("%bb."); 272 if (!IsReference && !C.remaining().startswith("bb.")) 273 return None; 274 auto Range = C; 275 unsigned PrefixLength = IsReference ? 4 : 3; 276 C.advance(PrefixLength); // Skip '%bb.' or 'bb.' 277 if (!isdigit(C.peek())) { 278 Token.reset(MIToken::Error, C.remaining()); 279 ErrorCallback(C.location(), "expected a number after '%bb.'"); 280 return C; 281 } 282 auto NumberRange = C; 283 while (isdigit(C.peek())) 284 C.advance(); 285 StringRef Number = NumberRange.upto(C); 286 unsigned StringOffset = PrefixLength + Number.size(); // Drop '%bb.<id>' 287 // TODO: The format bb.<id>.<irname> is supported only when it's not a 288 // reference. Once we deprecate the format where the irname shows up, we 289 // should only lex forward if it is a reference. 290 if (C.peek() == '.') { 291 C.advance(); // Skip '.' 292 ++StringOffset; 293 while (isIdentifierChar(C.peek())) 294 C.advance(); 295 } 296 Token.reset(IsReference ? MIToken::MachineBasicBlock 297 : MIToken::MachineBasicBlockLabel, 298 Range.upto(C)) 299 .setIntegerValue(APSInt(Number)) 300 .setStringValue(Range.upto(C).drop_front(StringOffset)); 301 return C; 302 } 303 304 static Cursor maybeLexIndex(Cursor C, MIToken &Token, StringRef Rule, 305 MIToken::TokenKind Kind) { 306 if (!C.remaining().startswith(Rule) || !isdigit(C.peek(Rule.size()))) 307 return None; 308 auto Range = C; 309 C.advance(Rule.size()); 310 auto NumberRange = C; 311 while (isdigit(C.peek())) 312 C.advance(); 313 Token.reset(Kind, Range.upto(C)).setIntegerValue(APSInt(NumberRange.upto(C))); 314 return C; 315 } 316 317 static Cursor maybeLexIndexAndName(Cursor C, MIToken &Token, StringRef Rule, 318 MIToken::TokenKind Kind) { 319 if (!C.remaining().startswith(Rule) || !isdigit(C.peek(Rule.size()))) 320 return None; 321 auto Range = C; 322 C.advance(Rule.size()); 323 auto NumberRange = C; 324 while (isdigit(C.peek())) 325 C.advance(); 326 StringRef Number = NumberRange.upto(C); 327 unsigned StringOffset = Rule.size() + Number.size(); 328 if (C.peek() == '.') { 329 C.advance(); 330 ++StringOffset; 331 while (isIdentifierChar(C.peek())) 332 C.advance(); 333 } 334 Token.reset(Kind, Range.upto(C)) 335 .setIntegerValue(APSInt(Number)) 336 .setStringValue(Range.upto(C).drop_front(StringOffset)); 337 return C; 338 } 339 340 static Cursor maybeLexJumpTableIndex(Cursor C, MIToken &Token) { 341 return maybeLexIndex(C, Token, "%jump-table.", MIToken::JumpTableIndex); 342 } 343 344 static Cursor maybeLexStackObject(Cursor C, MIToken &Token) { 345 return maybeLexIndexAndName(C, Token, "%stack.", MIToken::StackObject); 346 } 347 348 static Cursor maybeLexFixedStackObject(Cursor C, MIToken &Token) { 349 return maybeLexIndex(C, Token, "%fixed-stack.", MIToken::FixedStackObject); 350 } 351 352 static Cursor maybeLexConstantPoolItem(Cursor C, MIToken &Token) { 353 return maybeLexIndex(C, Token, "%const.", MIToken::ConstantPoolItem); 354 } 355 356 static Cursor maybeLexSubRegisterIndex(Cursor C, MIToken &Token, 357 ErrorCallbackType ErrorCallback) { 358 const StringRef Rule = "%subreg."; 359 if (!C.remaining().startswith(Rule)) 360 return None; 361 return lexName(C, Token, MIToken::SubRegisterIndex, Rule.size(), 362 ErrorCallback); 363 } 364 365 static Cursor maybeLexIRBlock(Cursor C, MIToken &Token, 366 ErrorCallbackType ErrorCallback) { 367 const StringRef Rule = "%ir-block."; 368 if (!C.remaining().startswith(Rule)) 369 return None; 370 if (isdigit(C.peek(Rule.size()))) 371 return maybeLexIndex(C, Token, Rule, MIToken::IRBlock); 372 return lexName(C, Token, MIToken::NamedIRBlock, Rule.size(), ErrorCallback); 373 } 374 375 static Cursor maybeLexIRValue(Cursor C, MIToken &Token, 376 ErrorCallbackType ErrorCallback) { 377 const StringRef Rule = "%ir."; 378 if (!C.remaining().startswith(Rule)) 379 return None; 380 if (isdigit(C.peek(Rule.size()))) 381 return maybeLexIndex(C, Token, Rule, MIToken::IRValue); 382 return lexName(C, Token, MIToken::NamedIRValue, Rule.size(), ErrorCallback); 383 } 384 385 static Cursor maybeLexStringConstant(Cursor C, MIToken &Token, 386 ErrorCallbackType ErrorCallback) { 387 if (C.peek() != '"') 388 return None; 389 return lexName(C, Token, MIToken::StringConstant, /*PrefixLength=*/0, 390 ErrorCallback); 391 } 392 393 static Cursor lexVirtualRegister(Cursor C, MIToken &Token) { 394 auto Range = C; 395 C.advance(); // Skip '%' 396 auto NumberRange = C; 397 while (isdigit(C.peek())) 398 C.advance(); 399 Token.reset(MIToken::VirtualRegister, Range.upto(C)) 400 .setIntegerValue(APSInt(NumberRange.upto(C))); 401 return C; 402 } 403 404 /// Returns true for a character allowed in a register name. 405 static bool isRegisterChar(char C) { 406 return isIdentifierChar(C) && C != '.'; 407 } 408 409 static Cursor lexNamedVirtualRegister(Cursor C, MIToken &Token) { 410 Cursor Range = C; 411 C.advance(); // Skip '%' 412 while (isRegisterChar(C.peek())) 413 C.advance(); 414 Token.reset(MIToken::NamedVirtualRegister, Range.upto(C)) 415 .setStringValue(Range.upto(C).drop_front(1)); // Drop the '%' 416 return C; 417 } 418 419 static Cursor maybeLexRegister(Cursor C, MIToken &Token, 420 ErrorCallbackType ErrorCallback) { 421 if (C.peek() != '%' && C.peek() != '$') 422 return None; 423 424 if (C.peek() == '%') { 425 if (isdigit(C.peek(1))) 426 return lexVirtualRegister(C, Token); 427 428 if (isRegisterChar(C.peek(1))) 429 return lexNamedVirtualRegister(C, Token); 430 431 return None; 432 } 433 434 assert(C.peek() == '$'); 435 auto Range = C; 436 C.advance(); // Skip '$' 437 while (isRegisterChar(C.peek())) 438 C.advance(); 439 Token.reset(MIToken::NamedRegister, Range.upto(C)) 440 .setStringValue(Range.upto(C).drop_front(1)); // Drop the '$' 441 return C; 442 } 443 444 static Cursor maybeLexGlobalValue(Cursor C, MIToken &Token, 445 ErrorCallbackType ErrorCallback) { 446 if (C.peek() != '@') 447 return None; 448 if (!isdigit(C.peek(1))) 449 return lexName(C, Token, MIToken::NamedGlobalValue, /*PrefixLength=*/1, 450 ErrorCallback); 451 auto Range = C; 452 C.advance(1); // Skip the '@' 453 auto NumberRange = C; 454 while (isdigit(C.peek())) 455 C.advance(); 456 Token.reset(MIToken::GlobalValue, Range.upto(C)) 457 .setIntegerValue(APSInt(NumberRange.upto(C))); 458 return C; 459 } 460 461 static Cursor maybeLexExternalSymbol(Cursor C, MIToken &Token, 462 ErrorCallbackType ErrorCallback) { 463 if (C.peek() != '&') 464 return None; 465 return lexName(C, Token, MIToken::ExternalSymbol, /*PrefixLength=*/1, 466 ErrorCallback); 467 } 468 469 static Cursor maybeLexMCSymbol(Cursor C, MIToken &Token, 470 ErrorCallbackType ErrorCallback) { 471 const StringRef Rule = "<mcsymbol "; 472 if (!C.remaining().startswith(Rule)) 473 return None; 474 auto Start = C; 475 C.advance(Rule.size()); 476 477 // Try a simple unquoted name. 478 if (C.peek() != '"') { 479 while (isIdentifierChar(C.peek())) 480 C.advance(); 481 StringRef String = Start.upto(C).drop_front(Rule.size()); 482 if (C.peek() != '>') { 483 ErrorCallback(C.location(), 484 "expected the '<mcsymbol ...' to be closed by a '>'"); 485 Token.reset(MIToken::Error, Start.remaining()); 486 return Start; 487 } 488 C.advance(); 489 490 Token.reset(MIToken::MCSymbol, Start.upto(C)).setStringValue(String); 491 return C; 492 } 493 494 // Otherwise lex out a quoted name. 495 Cursor R = lexStringConstant(C, ErrorCallback); 496 if (!R) { 497 ErrorCallback(C.location(), 498 "unable to parse quoted string from opening quote"); 499 Token.reset(MIToken::Error, Start.remaining()); 500 return Start; 501 } 502 StringRef String = Start.upto(R).drop_front(Rule.size()); 503 if (R.peek() != '>') { 504 ErrorCallback(R.location(), 505 "expected the '<mcsymbol ...' to be closed by a '>'"); 506 Token.reset(MIToken::Error, Start.remaining()); 507 return Start; 508 } 509 R.advance(); 510 511 Token.reset(MIToken::MCSymbol, Start.upto(R)) 512 .setOwnedStringValue(unescapeQuotedString(String)); 513 return R; 514 } 515 516 static bool isValidHexFloatingPointPrefix(char C) { 517 return C == 'H' || C == 'K' || C == 'L' || C == 'M'; 518 } 519 520 static Cursor lexFloatingPointLiteral(Cursor Range, Cursor C, MIToken &Token) { 521 C.advance(); 522 // Skip over [0-9]*([eE][-+]?[0-9]+)? 523 while (isdigit(C.peek())) 524 C.advance(); 525 if ((C.peek() == 'e' || C.peek() == 'E') && 526 (isdigit(C.peek(1)) || 527 ((C.peek(1) == '-' || C.peek(1) == '+') && isdigit(C.peek(2))))) { 528 C.advance(2); 529 while (isdigit(C.peek())) 530 C.advance(); 531 } 532 Token.reset(MIToken::FloatingPointLiteral, Range.upto(C)); 533 return C; 534 } 535 536 static Cursor maybeLexHexadecimalLiteral(Cursor C, MIToken &Token) { 537 if (C.peek() != '0' || (C.peek(1) != 'x' && C.peek(1) != 'X')) 538 return None; 539 Cursor Range = C; 540 C.advance(2); 541 unsigned PrefLen = 2; 542 if (isValidHexFloatingPointPrefix(C.peek())) { 543 C.advance(); 544 PrefLen++; 545 } 546 while (isxdigit(C.peek())) 547 C.advance(); 548 StringRef StrVal = Range.upto(C); 549 if (StrVal.size() <= PrefLen) 550 return None; 551 if (PrefLen == 2) 552 Token.reset(MIToken::HexLiteral, Range.upto(C)); 553 else // It must be 3, which means that there was a floating-point prefix. 554 Token.reset(MIToken::FloatingPointLiteral, Range.upto(C)); 555 return C; 556 } 557 558 static Cursor maybeLexNumericalLiteral(Cursor C, MIToken &Token) { 559 if (!isdigit(C.peek()) && (C.peek() != '-' || !isdigit(C.peek(1)))) 560 return None; 561 auto Range = C; 562 C.advance(); 563 while (isdigit(C.peek())) 564 C.advance(); 565 if (C.peek() == '.') 566 return lexFloatingPointLiteral(Range, C, Token); 567 StringRef StrVal = Range.upto(C); 568 Token.reset(MIToken::IntegerLiteral, StrVal).setIntegerValue(APSInt(StrVal)); 569 return C; 570 } 571 572 static MIToken::TokenKind getMetadataKeywordKind(StringRef Identifier) { 573 return StringSwitch<MIToken::TokenKind>(Identifier) 574 .Case("!tbaa", MIToken::md_tbaa) 575 .Case("!alias.scope", MIToken::md_alias_scope) 576 .Case("!noalias", MIToken::md_noalias) 577 .Case("!range", MIToken::md_range) 578 .Case("!DIExpression", MIToken::md_diexpr) 579 .Default(MIToken::Error); 580 } 581 582 static Cursor maybeLexExlaim(Cursor C, MIToken &Token, 583 ErrorCallbackType ErrorCallback) { 584 if (C.peek() != '!') 585 return None; 586 auto Range = C; 587 C.advance(1); 588 if (isdigit(C.peek()) || !isIdentifierChar(C.peek())) { 589 Token.reset(MIToken::exclaim, Range.upto(C)); 590 return C; 591 } 592 while (isIdentifierChar(C.peek())) 593 C.advance(); 594 StringRef StrVal = Range.upto(C); 595 Token.reset(getMetadataKeywordKind(StrVal), StrVal); 596 if (Token.isError()) 597 ErrorCallback(Token.location(), 598 "use of unknown metadata keyword '" + StrVal + "'"); 599 return C; 600 } 601 602 static MIToken::TokenKind symbolToken(char C) { 603 switch (C) { 604 case ',': 605 return MIToken::comma; 606 case '.': 607 return MIToken::dot; 608 case '=': 609 return MIToken::equal; 610 case ':': 611 return MIToken::colon; 612 case '(': 613 return MIToken::lparen; 614 case ')': 615 return MIToken::rparen; 616 case '{': 617 return MIToken::lbrace; 618 case '}': 619 return MIToken::rbrace; 620 case '+': 621 return MIToken::plus; 622 case '-': 623 return MIToken::minus; 624 case '<': 625 return MIToken::less; 626 case '>': 627 return MIToken::greater; 628 default: 629 return MIToken::Error; 630 } 631 } 632 633 static Cursor maybeLexSymbol(Cursor C, MIToken &Token) { 634 MIToken::TokenKind Kind; 635 unsigned Length = 1; 636 if (C.peek() == ':' && C.peek(1) == ':') { 637 Kind = MIToken::coloncolon; 638 Length = 2; 639 } else 640 Kind = symbolToken(C.peek()); 641 if (Kind == MIToken::Error) 642 return None; 643 auto Range = C; 644 C.advance(Length); 645 Token.reset(Kind, Range.upto(C)); 646 return C; 647 } 648 649 static Cursor maybeLexNewline(Cursor C, MIToken &Token) { 650 if (!isNewlineChar(C.peek())) 651 return None; 652 auto Range = C; 653 C.advance(); 654 Token.reset(MIToken::Newline, Range.upto(C)); 655 return C; 656 } 657 658 static Cursor maybeLexEscapedIRValue(Cursor C, MIToken &Token, 659 ErrorCallbackType ErrorCallback) { 660 if (C.peek() != '`') 661 return None; 662 auto Range = C; 663 C.advance(); 664 auto StrRange = C; 665 while (C.peek() != '`') { 666 if (C.isEOF() || isNewlineChar(C.peek())) { 667 ErrorCallback( 668 C.location(), 669 "end of machine instruction reached before the closing '`'"); 670 Token.reset(MIToken::Error, Range.remaining()); 671 return C; 672 } 673 C.advance(); 674 } 675 StringRef Value = StrRange.upto(C); 676 C.advance(); 677 Token.reset(MIToken::QuotedIRValue, Range.upto(C)).setStringValue(Value); 678 return C; 679 } 680 681 StringRef llvm::lexMIToken(StringRef Source, MIToken &Token, 682 ErrorCallbackType ErrorCallback) { 683 auto C = skipComment(skipWhitespace(Cursor(Source))); 684 if (C.isEOF()) { 685 Token.reset(MIToken::Eof, C.remaining()); 686 return C.remaining(); 687 } 688 689 if (Cursor R = maybeLexMachineBasicBlock(C, Token, ErrorCallback)) 690 return R.remaining(); 691 if (Cursor R = maybeLexIdentifier(C, Token)) 692 return R.remaining(); 693 if (Cursor R = maybeLexJumpTableIndex(C, Token)) 694 return R.remaining(); 695 if (Cursor R = maybeLexStackObject(C, Token)) 696 return R.remaining(); 697 if (Cursor R = maybeLexFixedStackObject(C, Token)) 698 return R.remaining(); 699 if (Cursor R = maybeLexConstantPoolItem(C, Token)) 700 return R.remaining(); 701 if (Cursor R = maybeLexSubRegisterIndex(C, Token, ErrorCallback)) 702 return R.remaining(); 703 if (Cursor R = maybeLexIRBlock(C, Token, ErrorCallback)) 704 return R.remaining(); 705 if (Cursor R = maybeLexIRValue(C, Token, ErrorCallback)) 706 return R.remaining(); 707 if (Cursor R = maybeLexRegister(C, Token, ErrorCallback)) 708 return R.remaining(); 709 if (Cursor R = maybeLexGlobalValue(C, Token, ErrorCallback)) 710 return R.remaining(); 711 if (Cursor R = maybeLexExternalSymbol(C, Token, ErrorCallback)) 712 return R.remaining(); 713 if (Cursor R = maybeLexMCSymbol(C, Token, ErrorCallback)) 714 return R.remaining(); 715 if (Cursor R = maybeLexHexadecimalLiteral(C, Token)) 716 return R.remaining(); 717 if (Cursor R = maybeLexNumericalLiteral(C, Token)) 718 return R.remaining(); 719 if (Cursor R = maybeLexExlaim(C, Token, ErrorCallback)) 720 return R.remaining(); 721 if (Cursor R = maybeLexSymbol(C, Token)) 722 return R.remaining(); 723 if (Cursor R = maybeLexNewline(C, Token)) 724 return R.remaining(); 725 if (Cursor R = maybeLexEscapedIRValue(C, Token, ErrorCallback)) 726 return R.remaining(); 727 if (Cursor R = maybeLexStringConstant(C, Token, ErrorCallback)) 728 return R.remaining(); 729 730 Token.reset(MIToken::Error, C.remaining()); 731 ErrorCallback(C.location(), 732 Twine("unexpected character '") + Twine(C.peek()) + "'"); 733 return C.remaining(); 734 } 735