1 //===- AsmLexer.cpp - Lexer for Assembly Files ----------------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This class implements the lexer for assembly files. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "llvm/MC/MCParser/AsmLexer.h" 15 #include "llvm/ADT/APInt.h" 16 #include "llvm/ADT/ArrayRef.h" 17 #include "llvm/ADT/StringRef.h" 18 #include "llvm/ADT/StringSwitch.h" 19 #include "llvm/MC/MCAsmInfo.h" 20 #include "llvm/MC/MCParser/MCAsmLexer.h" 21 #include "llvm/Support/SMLoc.h" 22 #include "llvm/Support/SaveAndRestore.h" 23 #include <cassert> 24 #include <cctype> 25 #include <cstdio> 26 #include <cstring> 27 #include <string> 28 #include <tuple> 29 #include <utility> 30 31 using namespace llvm; 32 33 AsmLexer::AsmLexer(const MCAsmInfo &MAI) 34 : MAI(MAI), CurPtr(nullptr), IsAtStartOfLine(true), 35 IsAtStartOfStatement(true), IsParsingMSInlineAsm(false), 36 IsPeeking(false) { 37 AllowAtInIdentifier = !StringRef(MAI.getCommentString()).startswith("@"); 38 } 39 40 AsmLexer::~AsmLexer() { 41 } 42 43 void AsmLexer::setBuffer(StringRef Buf, const char *ptr) { 44 CurBuf = Buf; 45 46 if (ptr) 47 CurPtr = ptr; 48 else 49 CurPtr = CurBuf.begin(); 50 51 TokStart = nullptr; 52 } 53 54 /// ReturnError - Set the error to the specified string at the specified 55 /// location. This is defined to always return AsmToken::Error. 56 AsmToken AsmLexer::ReturnError(const char *Loc, const std::string &Msg) { 57 SetError(SMLoc::getFromPointer(Loc), Msg); 58 59 return AsmToken(AsmToken::Error, StringRef(Loc, CurPtr - Loc)); 60 } 61 62 int AsmLexer::getNextChar() { 63 if (CurPtr == CurBuf.end()) 64 return EOF; 65 return (unsigned char)*CurPtr++; 66 } 67 68 /// LexFloatLiteral: [0-9]*[.][0-9]*([eE][+-]?[0-9]*)? 69 /// 70 /// The leading integral digit sequence and dot should have already been 71 /// consumed, some or all of the fractional digit sequence *can* have been 72 /// consumed. 73 AsmToken AsmLexer::LexFloatLiteral() { 74 // Skip the fractional digit sequence. 75 while (isdigit(*CurPtr)) 76 ++CurPtr; 77 78 // Check for exponent; we intentionally accept a slighlty wider set of 79 // literals here and rely on the upstream client to reject invalid ones (e.g., 80 // "1e+"). 81 if (*CurPtr == 'e' || *CurPtr == 'E') { 82 ++CurPtr; 83 if (*CurPtr == '-' || *CurPtr == '+') 84 ++CurPtr; 85 while (isdigit(*CurPtr)) 86 ++CurPtr; 87 } 88 89 return AsmToken(AsmToken::Real, 90 StringRef(TokStart, CurPtr - TokStart)); 91 } 92 93 /// LexHexFloatLiteral matches essentially (.[0-9a-fA-F]*)?[pP][+-]?[0-9a-fA-F]+ 94 /// while making sure there are enough actual digits around for the constant to 95 /// be valid. 96 /// 97 /// The leading "0x[0-9a-fA-F]*" (i.e. integer part) has already been consumed 98 /// before we get here. 99 AsmToken AsmLexer::LexHexFloatLiteral(bool NoIntDigits) { 100 assert((*CurPtr == 'p' || *CurPtr == 'P' || *CurPtr == '.') && 101 "unexpected parse state in floating hex"); 102 bool NoFracDigits = true; 103 104 // Skip the fractional part if there is one 105 if (*CurPtr == '.') { 106 ++CurPtr; 107 108 const char *FracStart = CurPtr; 109 while (isxdigit(*CurPtr)) 110 ++CurPtr; 111 112 NoFracDigits = CurPtr == FracStart; 113 } 114 115 if (NoIntDigits && NoFracDigits) 116 return ReturnError(TokStart, "invalid hexadecimal floating-point constant: " 117 "expected at least one significand digit"); 118 119 // Make sure we do have some kind of proper exponent part 120 if (*CurPtr != 'p' && *CurPtr != 'P') 121 return ReturnError(TokStart, "invalid hexadecimal floating-point constant: " 122 "expected exponent part 'p'"); 123 ++CurPtr; 124 125 if (*CurPtr == '+' || *CurPtr == '-') 126 ++CurPtr; 127 128 // N.b. exponent digits are *not* hex 129 const char *ExpStart = CurPtr; 130 while (isdigit(*CurPtr)) 131 ++CurPtr; 132 133 if (CurPtr == ExpStart) 134 return ReturnError(TokStart, "invalid hexadecimal floating-point constant: " 135 "expected at least one exponent digit"); 136 137 return AsmToken(AsmToken::Real, StringRef(TokStart, CurPtr - TokStart)); 138 } 139 140 /// LexIdentifier: [a-zA-Z_.][a-zA-Z0-9_$.@?]* 141 static bool IsIdentifierChar(char c, bool AllowAt) { 142 return isalnum(c) || c == '_' || c == '$' || c == '.' || 143 (c == '@' && AllowAt) || c == '?'; 144 } 145 146 AsmToken AsmLexer::LexIdentifier() { 147 // Check for floating point literals. 148 if (CurPtr[-1] == '.' && isdigit(*CurPtr)) { 149 // Disambiguate a .1243foo identifier from a floating literal. 150 while (isdigit(*CurPtr)) 151 ++CurPtr; 152 if (*CurPtr == 'e' || *CurPtr == 'E' || 153 !IsIdentifierChar(*CurPtr, AllowAtInIdentifier)) 154 return LexFloatLiteral(); 155 } 156 157 while (IsIdentifierChar(*CurPtr, AllowAtInIdentifier)) 158 ++CurPtr; 159 160 // Handle . as a special case. 161 if (CurPtr == TokStart+1 && TokStart[0] == '.') 162 return AsmToken(AsmToken::Dot, StringRef(TokStart, 1)); 163 164 return AsmToken(AsmToken::Identifier, StringRef(TokStart, CurPtr - TokStart)); 165 } 166 167 /// LexSlash: Slash: / 168 /// C-Style Comment: /* ... */ 169 AsmToken AsmLexer::LexSlash() { 170 switch (*CurPtr) { 171 case '*': 172 IsAtStartOfStatement = false; 173 break; // C style comment. 174 case '/': 175 ++CurPtr; 176 return LexLineComment(); 177 default: 178 IsAtStartOfStatement = false; 179 return AsmToken(AsmToken::Slash, StringRef(TokStart, 1)); 180 } 181 182 // C Style comment. 183 ++CurPtr; // skip the star. 184 const char *CommentTextStart = CurPtr; 185 while (CurPtr != CurBuf.end()) { 186 switch (*CurPtr++) { 187 case '*': 188 // End of the comment? 189 if (*CurPtr != '/') 190 break; 191 // If we have a CommentConsumer, notify it about the comment. 192 if (CommentConsumer) { 193 CommentConsumer->HandleComment( 194 SMLoc::getFromPointer(CommentTextStart), 195 StringRef(CommentTextStart, CurPtr - 1 - CommentTextStart)); 196 } 197 ++CurPtr; // End the */. 198 return AsmToken(AsmToken::Comment, 199 StringRef(TokStart, CurPtr - TokStart)); 200 } 201 } 202 return ReturnError(TokStart, "unterminated comment"); 203 } 204 205 /// LexLineComment: Comment: #[^\n]* 206 /// : //[^\n]* 207 AsmToken AsmLexer::LexLineComment() { 208 // Mark This as an end of statement with a body of the 209 // comment. While it would be nicer to leave this two tokens, 210 // backwards compatability with TargetParsers makes keeping this in this form 211 // better. 212 const char *CommentTextStart = CurPtr; 213 int CurChar = getNextChar(); 214 while (CurChar != '\n' && CurChar != '\r' && CurChar != EOF) 215 CurChar = getNextChar(); 216 217 // If we have a CommentConsumer, notify it about the comment. 218 if (CommentConsumer) { 219 CommentConsumer->HandleComment( 220 SMLoc::getFromPointer(CommentTextStart), 221 StringRef(CommentTextStart, CurPtr - 1 - CommentTextStart)); 222 } 223 224 IsAtStartOfLine = true; 225 // This is a whole line comment. leave newline 226 if (IsAtStartOfStatement) 227 return AsmToken(AsmToken::EndOfStatement, 228 StringRef(TokStart, CurPtr - TokStart)); 229 IsAtStartOfStatement = true; 230 231 return AsmToken(AsmToken::EndOfStatement, 232 StringRef(TokStart, CurPtr - 1 - TokStart)); 233 } 234 235 static void SkipIgnoredIntegerSuffix(const char *&CurPtr) { 236 // Skip ULL, UL, U, L and LL suffices. 237 if (CurPtr[0] == 'U') 238 ++CurPtr; 239 if (CurPtr[0] == 'L') 240 ++CurPtr; 241 if (CurPtr[0] == 'L') 242 ++CurPtr; 243 } 244 245 // Look ahead to search for first non-hex digit, if it's [hH], then we treat the 246 // integer as a hexadecimal, possibly with leading zeroes. 247 static unsigned doLookAhead(const char *&CurPtr, unsigned DefaultRadix) { 248 const char *FirstHex = nullptr; 249 const char *LookAhead = CurPtr; 250 while (true) { 251 if (isdigit(*LookAhead)) { 252 ++LookAhead; 253 } else if (isxdigit(*LookAhead)) { 254 if (!FirstHex) 255 FirstHex = LookAhead; 256 ++LookAhead; 257 } else { 258 break; 259 } 260 } 261 bool isHex = *LookAhead == 'h' || *LookAhead == 'H'; 262 CurPtr = isHex || !FirstHex ? LookAhead : FirstHex; 263 if (isHex) 264 return 16; 265 return DefaultRadix; 266 } 267 268 static AsmToken intToken(StringRef Ref, APInt &Value) 269 { 270 if (Value.isIntN(64)) 271 return AsmToken(AsmToken::Integer, Ref, Value); 272 return AsmToken(AsmToken::BigNum, Ref, Value); 273 } 274 275 /// LexDigit: First character is [0-9]. 276 /// Local Label: [0-9][:] 277 /// Forward/Backward Label: [0-9][fb] 278 /// Binary integer: 0b[01]+ 279 /// Octal integer: 0[0-7]+ 280 /// Hex integer: 0x[0-9a-fA-F]+ or [0x]?[0-9][0-9a-fA-F]*[hH] 281 /// Decimal integer: [1-9][0-9]* 282 AsmToken AsmLexer::LexDigit() { 283 // MASM-flavor binary integer: [01]+[bB] 284 // MASM-flavor hexadecimal integer: [0-9][0-9a-fA-F]*[hH] 285 if (IsParsingMSInlineAsm && isdigit(CurPtr[-1])) { 286 const char *FirstNonBinary = (CurPtr[-1] != '0' && CurPtr[-1] != '1') ? 287 CurPtr - 1 : nullptr; 288 const char *OldCurPtr = CurPtr; 289 while (isxdigit(*CurPtr)) { 290 if (*CurPtr != '0' && *CurPtr != '1' && !FirstNonBinary) 291 FirstNonBinary = CurPtr; 292 ++CurPtr; 293 } 294 295 unsigned Radix = 0; 296 if (*CurPtr == 'h' || *CurPtr == 'H') { 297 // hexadecimal number 298 ++CurPtr; 299 Radix = 16; 300 } else if (FirstNonBinary && FirstNonBinary + 1 == CurPtr && 301 (*FirstNonBinary == 'b' || *FirstNonBinary == 'B')) 302 Radix = 2; 303 304 if (Radix == 2 || Radix == 16) { 305 StringRef Result(TokStart, CurPtr - TokStart); 306 APInt Value(128, 0, true); 307 308 if (Result.drop_back().getAsInteger(Radix, Value)) 309 return ReturnError(TokStart, Radix == 2 ? "invalid binary number" : 310 "invalid hexdecimal number"); 311 312 // MSVC accepts and ignores type suffices on integer literals. 313 SkipIgnoredIntegerSuffix(CurPtr); 314 315 return intToken(Result, Value); 316 } 317 318 // octal/decimal integers, or floating point numbers, fall through 319 CurPtr = OldCurPtr; 320 } 321 322 // Decimal integer: [1-9][0-9]* 323 if (CurPtr[-1] != '0' || CurPtr[0] == '.') { 324 unsigned Radix = doLookAhead(CurPtr, 10); 325 bool isHex = Radix == 16; 326 // Check for floating point literals. 327 if (!isHex && (*CurPtr == '.' || *CurPtr == 'e')) { 328 ++CurPtr; 329 return LexFloatLiteral(); 330 } 331 332 StringRef Result(TokStart, CurPtr - TokStart); 333 334 APInt Value(128, 0, true); 335 if (Result.getAsInteger(Radix, Value)) 336 return ReturnError(TokStart, !isHex ? "invalid decimal number" : 337 "invalid hexdecimal number"); 338 339 // Consume the [bB][hH]. 340 if (Radix == 2 || Radix == 16) 341 ++CurPtr; 342 343 // The darwin/x86 (and x86-64) assembler accepts and ignores type 344 // suffices on integer literals. 345 SkipIgnoredIntegerSuffix(CurPtr); 346 347 return intToken(Result, Value); 348 } 349 350 if (!IsParsingMSInlineAsm && ((*CurPtr == 'b') || (*CurPtr == 'B'))) { 351 ++CurPtr; 352 // See if we actually have "0b" as part of something like "jmp 0b\n" 353 if (!isdigit(CurPtr[0])) { 354 --CurPtr; 355 StringRef Result(TokStart, CurPtr - TokStart); 356 return AsmToken(AsmToken::Integer, Result, 0); 357 } 358 const char *NumStart = CurPtr; 359 while (CurPtr[0] == '0' || CurPtr[0] == '1') 360 ++CurPtr; 361 362 // Requires at least one binary digit. 363 if (CurPtr == NumStart) 364 return ReturnError(TokStart, "invalid binary number"); 365 366 StringRef Result(TokStart, CurPtr - TokStart); 367 368 APInt Value(128, 0, true); 369 if (Result.substr(2).getAsInteger(2, Value)) 370 return ReturnError(TokStart, "invalid binary number"); 371 372 // The darwin/x86 (and x86-64) assembler accepts and ignores ULL and LL 373 // suffixes on integer literals. 374 SkipIgnoredIntegerSuffix(CurPtr); 375 376 return intToken(Result, Value); 377 } 378 379 if ((*CurPtr == 'x') || (*CurPtr == 'X')) { 380 ++CurPtr; 381 const char *NumStart = CurPtr; 382 while (isxdigit(CurPtr[0])) 383 ++CurPtr; 384 385 // "0x.0p0" is valid, and "0x0p0" (but not "0xp0" for example, which will be 386 // diagnosed by LexHexFloatLiteral). 387 if (CurPtr[0] == '.' || CurPtr[0] == 'p' || CurPtr[0] == 'P') 388 return LexHexFloatLiteral(NumStart == CurPtr); 389 390 // Otherwise requires at least one hex digit. 391 if (CurPtr == NumStart) 392 return ReturnError(CurPtr-2, "invalid hexadecimal number"); 393 394 APInt Result(128, 0); 395 if (StringRef(TokStart, CurPtr - TokStart).getAsInteger(0, Result)) 396 return ReturnError(TokStart, "invalid hexadecimal number"); 397 398 // Consume the optional [hH]. 399 if (!IsParsingMSInlineAsm && (*CurPtr == 'h' || *CurPtr == 'H')) 400 ++CurPtr; 401 402 // The darwin/x86 (and x86-64) assembler accepts and ignores ULL and LL 403 // suffixes on integer literals. 404 SkipIgnoredIntegerSuffix(CurPtr); 405 406 return intToken(StringRef(TokStart, CurPtr - TokStart), Result); 407 } 408 409 // Either octal or hexadecimal. 410 APInt Value(128, 0, true); 411 unsigned Radix = doLookAhead(CurPtr, 8); 412 bool isHex = Radix == 16; 413 StringRef Result(TokStart, CurPtr - TokStart); 414 if (Result.getAsInteger(Radix, Value)) 415 return ReturnError(TokStart, !isHex ? "invalid octal number" : 416 "invalid hexdecimal number"); 417 418 // Consume the [hH]. 419 if (Radix == 16) 420 ++CurPtr; 421 422 // The darwin/x86 (and x86-64) assembler accepts and ignores ULL and LL 423 // suffixes on integer literals. 424 SkipIgnoredIntegerSuffix(CurPtr); 425 426 return intToken(Result, Value); 427 } 428 429 /// LexSingleQuote: Integer: 'b' 430 AsmToken AsmLexer::LexSingleQuote() { 431 int CurChar = getNextChar(); 432 433 if (CurChar == '\\') 434 CurChar = getNextChar(); 435 436 if (CurChar == EOF) 437 return ReturnError(TokStart, "unterminated single quote"); 438 439 CurChar = getNextChar(); 440 441 if (CurChar != '\'') 442 return ReturnError(TokStart, "single quote way too long"); 443 444 // The idea here being that 'c' is basically just an integral 445 // constant. 446 StringRef Res = StringRef(TokStart,CurPtr - TokStart); 447 long long Value; 448 449 if (Res.startswith("\'\\")) { 450 char theChar = Res[2]; 451 switch (theChar) { 452 default: Value = theChar; break; 453 case '\'': Value = '\''; break; 454 case 't': Value = '\t'; break; 455 case 'n': Value = '\n'; break; 456 case 'b': Value = '\b'; break; 457 } 458 } else 459 Value = TokStart[1]; 460 461 return AsmToken(AsmToken::Integer, Res, Value); 462 } 463 464 /// LexQuote: String: "..." 465 AsmToken AsmLexer::LexQuote() { 466 int CurChar = getNextChar(); 467 // TODO: does gas allow multiline string constants? 468 while (CurChar != '"') { 469 if (CurChar == '\\') { 470 // Allow \", etc. 471 CurChar = getNextChar(); 472 } 473 474 if (CurChar == EOF) 475 return ReturnError(TokStart, "unterminated string constant"); 476 477 CurChar = getNextChar(); 478 } 479 480 return AsmToken(AsmToken::String, StringRef(TokStart, CurPtr - TokStart)); 481 } 482 483 StringRef AsmLexer::LexUntilEndOfStatement() { 484 TokStart = CurPtr; 485 486 while (!isAtStartOfComment(CurPtr) && // Start of line comment. 487 !isAtStatementSeparator(CurPtr) && // End of statement marker. 488 *CurPtr != '\n' && *CurPtr != '\r' && CurPtr != CurBuf.end()) { 489 ++CurPtr; 490 } 491 return StringRef(TokStart, CurPtr-TokStart); 492 } 493 494 StringRef AsmLexer::LexUntilEndOfLine() { 495 TokStart = CurPtr; 496 497 while (*CurPtr != '\n' && *CurPtr != '\r' && CurPtr != CurBuf.end()) { 498 ++CurPtr; 499 } 500 return StringRef(TokStart, CurPtr-TokStart); 501 } 502 503 size_t AsmLexer::peekTokens(MutableArrayRef<AsmToken> Buf, 504 bool ShouldSkipSpace) { 505 SaveAndRestore<const char *> SavedTokenStart(TokStart); 506 SaveAndRestore<const char *> SavedCurPtr(CurPtr); 507 SaveAndRestore<bool> SavedAtStartOfLine(IsAtStartOfLine); 508 SaveAndRestore<bool> SavedAtStartOfStatement(IsAtStartOfStatement); 509 SaveAndRestore<bool> SavedSkipSpace(SkipSpace, ShouldSkipSpace); 510 SaveAndRestore<bool> SavedIsPeeking(IsPeeking, true); 511 std::string SavedErr = getErr(); 512 SMLoc SavedErrLoc = getErrLoc(); 513 514 size_t ReadCount; 515 for (ReadCount = 0; ReadCount < Buf.size(); ++ReadCount) { 516 AsmToken Token = LexToken(); 517 518 Buf[ReadCount] = Token; 519 520 if (Token.is(AsmToken::Eof)) 521 break; 522 } 523 524 SetError(SavedErrLoc, SavedErr); 525 return ReadCount; 526 } 527 528 bool AsmLexer::isAtStartOfComment(const char *Ptr) { 529 StringRef CommentString = MAI.getCommentString(); 530 531 if (CommentString.size() == 1) 532 return CommentString[0] == Ptr[0]; 533 534 // Allow # preprocessor commments also be counted as comments for "##" cases 535 if (CommentString[1] == '#') 536 return CommentString[0] == Ptr[0]; 537 538 return strncmp(Ptr, CommentString.data(), CommentString.size()) == 0; 539 } 540 541 bool AsmLexer::isAtStatementSeparator(const char *Ptr) { 542 return strncmp(Ptr, MAI.getSeparatorString(), 543 strlen(MAI.getSeparatorString())) == 0; 544 } 545 546 AsmToken AsmLexer::LexToken() { 547 TokStart = CurPtr; 548 // This always consumes at least one character. 549 int CurChar = getNextChar(); 550 551 if (!IsPeeking && CurChar == '#' && IsAtStartOfStatement) { 552 // If this starts with a '#', this may be a cpp 553 // hash directive and otherwise a line comment. 554 AsmToken TokenBuf[2]; 555 MutableArrayRef<AsmToken> Buf(TokenBuf, 2); 556 size_t num = peekTokens(Buf, true); 557 // There cannot be a space preceeding this 558 if (IsAtStartOfLine && num == 2 && TokenBuf[0].is(AsmToken::Integer) && 559 TokenBuf[1].is(AsmToken::String)) { 560 CurPtr = TokStart; // reset curPtr; 561 StringRef s = LexUntilEndOfLine(); 562 UnLex(TokenBuf[1]); 563 UnLex(TokenBuf[0]); 564 return AsmToken(AsmToken::HashDirective, s); 565 } 566 return LexLineComment(); 567 } 568 569 if (isAtStartOfComment(TokStart)) 570 return LexLineComment(); 571 572 if (isAtStatementSeparator(TokStart)) { 573 CurPtr += strlen(MAI.getSeparatorString()) - 1; 574 IsAtStartOfLine = true; 575 IsAtStartOfStatement = true; 576 return AsmToken(AsmToken::EndOfStatement, 577 StringRef(TokStart, strlen(MAI.getSeparatorString()))); 578 } 579 580 // If we're missing a newline at EOF, make sure we still get an 581 // EndOfStatement token before the Eof token. 582 if (CurChar == EOF && !IsAtStartOfStatement) { 583 IsAtStartOfLine = true; 584 IsAtStartOfStatement = true; 585 return AsmToken(AsmToken::EndOfStatement, StringRef(TokStart, 1)); 586 } 587 IsAtStartOfLine = false; 588 bool OldIsAtStartOfStatement = IsAtStartOfStatement; 589 IsAtStartOfStatement = false; 590 switch (CurChar) { 591 default: 592 // Handle identifier: [a-zA-Z_.][a-zA-Z0-9_$.@]* 593 if (isalpha(CurChar) || CurChar == '_' || CurChar == '.') 594 return LexIdentifier(); 595 596 // Unknown character, emit an error. 597 return ReturnError(TokStart, "invalid character in input"); 598 case EOF: 599 IsAtStartOfLine = true; 600 IsAtStartOfStatement = true; 601 return AsmToken(AsmToken::Eof, StringRef(TokStart, 0)); 602 case 0: 603 case ' ': 604 case '\t': 605 IsAtStartOfStatement = OldIsAtStartOfStatement; 606 while (*CurPtr == ' ' || *CurPtr == '\t') 607 CurPtr++; 608 if (SkipSpace) 609 return LexToken(); // Ignore whitespace. 610 else 611 return AsmToken(AsmToken::Space, StringRef(TokStart, CurPtr - TokStart)); 612 case '\n': 613 case '\r': 614 IsAtStartOfLine = true; 615 IsAtStartOfStatement = true; 616 return AsmToken(AsmToken::EndOfStatement, StringRef(TokStart, 1)); 617 case ':': return AsmToken(AsmToken::Colon, StringRef(TokStart, 1)); 618 case '+': return AsmToken(AsmToken::Plus, StringRef(TokStart, 1)); 619 case '-': return AsmToken(AsmToken::Minus, StringRef(TokStart, 1)); 620 case '~': return AsmToken(AsmToken::Tilde, StringRef(TokStart, 1)); 621 case '(': return AsmToken(AsmToken::LParen, StringRef(TokStart, 1)); 622 case ')': return AsmToken(AsmToken::RParen, StringRef(TokStart, 1)); 623 case '[': return AsmToken(AsmToken::LBrac, StringRef(TokStart, 1)); 624 case ']': return AsmToken(AsmToken::RBrac, StringRef(TokStart, 1)); 625 case '{': return AsmToken(AsmToken::LCurly, StringRef(TokStart, 1)); 626 case '}': return AsmToken(AsmToken::RCurly, StringRef(TokStart, 1)); 627 case '*': return AsmToken(AsmToken::Star, StringRef(TokStart, 1)); 628 case ',': return AsmToken(AsmToken::Comma, StringRef(TokStart, 1)); 629 case '$': return AsmToken(AsmToken::Dollar, StringRef(TokStart, 1)); 630 case '@': return AsmToken(AsmToken::At, StringRef(TokStart, 1)); 631 case '\\': return AsmToken(AsmToken::BackSlash, StringRef(TokStart, 1)); 632 case '=': 633 if (*CurPtr == '=') { 634 ++CurPtr; 635 return AsmToken(AsmToken::EqualEqual, StringRef(TokStart, 2)); 636 } 637 return AsmToken(AsmToken::Equal, StringRef(TokStart, 1)); 638 case '|': 639 if (*CurPtr == '|') { 640 ++CurPtr; 641 return AsmToken(AsmToken::PipePipe, StringRef(TokStart, 2)); 642 } 643 return AsmToken(AsmToken::Pipe, StringRef(TokStart, 1)); 644 case '^': return AsmToken(AsmToken::Caret, StringRef(TokStart, 1)); 645 case '&': 646 if (*CurPtr == '&') { 647 ++CurPtr; 648 return AsmToken(AsmToken::AmpAmp, StringRef(TokStart, 2)); 649 } 650 return AsmToken(AsmToken::Amp, StringRef(TokStart, 1)); 651 case '!': 652 if (*CurPtr == '=') { 653 ++CurPtr; 654 return AsmToken(AsmToken::ExclaimEqual, StringRef(TokStart, 2)); 655 } 656 return AsmToken(AsmToken::Exclaim, StringRef(TokStart, 1)); 657 case '%': 658 if (MAI.hasMipsExpressions()) { 659 AsmToken::TokenKind Operator; 660 unsigned OperatorLength; 661 662 std::tie(Operator, OperatorLength) = 663 StringSwitch<std::pair<AsmToken::TokenKind, unsigned>>( 664 StringRef(CurPtr)) 665 .StartsWith("call16", {AsmToken::PercentCall16, 7}) 666 .StartsWith("call_hi", {AsmToken::PercentCall_Hi, 8}) 667 .StartsWith("call_lo", {AsmToken::PercentCall_Lo, 8}) 668 .StartsWith("dtprel_hi", {AsmToken::PercentDtprel_Hi, 10}) 669 .StartsWith("dtprel_lo", {AsmToken::PercentDtprel_Lo, 10}) 670 .StartsWith("got_disp", {AsmToken::PercentGot_Disp, 9}) 671 .StartsWith("got_hi", {AsmToken::PercentGot_Hi, 7}) 672 .StartsWith("got_lo", {AsmToken::PercentGot_Lo, 7}) 673 .StartsWith("got_ofst", {AsmToken::PercentGot_Ofst, 9}) 674 .StartsWith("got_page", {AsmToken::PercentGot_Page, 9}) 675 .StartsWith("gottprel", {AsmToken::PercentGottprel, 9}) 676 .StartsWith("got", {AsmToken::PercentGot, 4}) 677 .StartsWith("gp_rel", {AsmToken::PercentGp_Rel, 7}) 678 .StartsWith("higher", {AsmToken::PercentHigher, 7}) 679 .StartsWith("highest", {AsmToken::PercentHighest, 8}) 680 .StartsWith("hi", {AsmToken::PercentHi, 3}) 681 .StartsWith("lo", {AsmToken::PercentLo, 3}) 682 .StartsWith("neg", {AsmToken::PercentNeg, 4}) 683 .StartsWith("pcrel_hi", {AsmToken::PercentPcrel_Hi, 9}) 684 .StartsWith("pcrel_lo", {AsmToken::PercentPcrel_Lo, 9}) 685 .StartsWith("tlsgd", {AsmToken::PercentTlsgd, 6}) 686 .StartsWith("tlsldm", {AsmToken::PercentTlsldm, 7}) 687 .StartsWith("tprel_hi", {AsmToken::PercentTprel_Hi, 9}) 688 .StartsWith("tprel_lo", {AsmToken::PercentTprel_Lo, 9}) 689 .Default({AsmToken::Percent, 1}); 690 691 if (Operator != AsmToken::Percent) { 692 CurPtr += OperatorLength - 1; 693 return AsmToken(Operator, StringRef(TokStart, OperatorLength)); 694 } 695 } 696 return AsmToken(AsmToken::Percent, StringRef(TokStart, 1)); 697 case '/': 698 IsAtStartOfStatement = OldIsAtStartOfStatement; 699 return LexSlash(); 700 case '#': return AsmToken(AsmToken::Hash, StringRef(TokStart, 1)); 701 case '\'': return LexSingleQuote(); 702 case '"': return LexQuote(); 703 case '0': case '1': case '2': case '3': case '4': 704 case '5': case '6': case '7': case '8': case '9': 705 return LexDigit(); 706 case '<': 707 switch (*CurPtr) { 708 case '<': 709 ++CurPtr; 710 return AsmToken(AsmToken::LessLess, StringRef(TokStart, 2)); 711 case '=': 712 ++CurPtr; 713 return AsmToken(AsmToken::LessEqual, StringRef(TokStart, 2)); 714 case '>': 715 ++CurPtr; 716 return AsmToken(AsmToken::LessGreater, StringRef(TokStart, 2)); 717 default: 718 return AsmToken(AsmToken::Less, StringRef(TokStart, 1)); 719 } 720 case '>': 721 switch (*CurPtr) { 722 case '>': 723 ++CurPtr; 724 return AsmToken(AsmToken::GreaterGreater, StringRef(TokStart, 2)); 725 case '=': 726 ++CurPtr; 727 return AsmToken(AsmToken::GreaterEqual, StringRef(TokStart, 2)); 728 default: 729 return AsmToken(AsmToken::Greater, StringRef(TokStart, 1)); 730 } 731 732 // TODO: Quoted identifiers (objc methods etc) 733 // local labels: [0-9][:] 734 // Forward/backward labels: [0-9][fb] 735 // Integers, fp constants, character constants. 736 } 737 } 738