1 //===- AsmLexer.cpp - Lexer for Assembly Files ----------------------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This class implements the lexer for assembly files.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "llvm/MC/MCParser/AsmLexer.h"
15 #include "llvm/ADT/APInt.h"
16 #include "llvm/ADT/ArrayRef.h"
17 #include "llvm/ADT/StringRef.h"
18 #include "llvm/ADT/StringSwitch.h"
19 #include "llvm/MC/MCAsmInfo.h"
20 #include "llvm/MC/MCParser/MCAsmLexer.h"
21 #include "llvm/Support/SMLoc.h"
22 #include "llvm/Support/SaveAndRestore.h"
23 #include <cassert>
24 #include <cctype>
25 #include <cstdio>
26 #include <cstring>
27 #include <string>
28 #include <tuple>
29 #include <utility>
30 
31 using namespace llvm;
32 
33 AsmLexer::AsmLexer(const MCAsmInfo &MAI)
34     : MAI(MAI), CurPtr(nullptr), IsAtStartOfLine(true),
35       IsAtStartOfStatement(true), IsParsingMSInlineAsm(false),
36       IsPeeking(false) {
37   AllowAtInIdentifier = !StringRef(MAI.getCommentString()).startswith("@");
38 }
39 
40 AsmLexer::~AsmLexer() {
41 }
42 
43 void AsmLexer::setBuffer(StringRef Buf, const char *ptr) {
44   CurBuf = Buf;
45 
46   if (ptr)
47     CurPtr = ptr;
48   else
49     CurPtr = CurBuf.begin();
50 
51   TokStart = nullptr;
52 }
53 
54 /// ReturnError - Set the error to the specified string at the specified
55 /// location.  This is defined to always return AsmToken::Error.
56 AsmToken AsmLexer::ReturnError(const char *Loc, const std::string &Msg) {
57   SetError(SMLoc::getFromPointer(Loc), Msg);
58 
59   return AsmToken(AsmToken::Error, StringRef(Loc, CurPtr - Loc));
60 }
61 
62 int AsmLexer::getNextChar() {
63   if (CurPtr == CurBuf.end())
64     return EOF;
65   return (unsigned char)*CurPtr++;
66 }
67 
68 /// LexFloatLiteral: [0-9]*[.][0-9]*([eE][+-]?[0-9]*)?
69 ///
70 /// The leading integral digit sequence and dot should have already been
71 /// consumed, some or all of the fractional digit sequence *can* have been
72 /// consumed.
73 AsmToken AsmLexer::LexFloatLiteral() {
74   // Skip the fractional digit sequence.
75   while (isdigit(*CurPtr))
76     ++CurPtr;
77 
78   // Check for exponent; we intentionally accept a slighlty wider set of
79   // literals here and rely on the upstream client to reject invalid ones (e.g.,
80   // "1e+").
81   if (*CurPtr == 'e' || *CurPtr == 'E') {
82     ++CurPtr;
83     if (*CurPtr == '-' || *CurPtr == '+')
84       ++CurPtr;
85     while (isdigit(*CurPtr))
86       ++CurPtr;
87   }
88 
89   return AsmToken(AsmToken::Real,
90                   StringRef(TokStart, CurPtr - TokStart));
91 }
92 
93 /// LexHexFloatLiteral matches essentially (.[0-9a-fA-F]*)?[pP][+-]?[0-9a-fA-F]+
94 /// while making sure there are enough actual digits around for the constant to
95 /// be valid.
96 ///
97 /// The leading "0x[0-9a-fA-F]*" (i.e. integer part) has already been consumed
98 /// before we get here.
99 AsmToken AsmLexer::LexHexFloatLiteral(bool NoIntDigits) {
100   assert((*CurPtr == 'p' || *CurPtr == 'P' || *CurPtr == '.') &&
101          "unexpected parse state in floating hex");
102   bool NoFracDigits = true;
103 
104   // Skip the fractional part if there is one
105   if (*CurPtr == '.') {
106     ++CurPtr;
107 
108     const char *FracStart = CurPtr;
109     while (isxdigit(*CurPtr))
110       ++CurPtr;
111 
112     NoFracDigits = CurPtr == FracStart;
113   }
114 
115   if (NoIntDigits && NoFracDigits)
116     return ReturnError(TokStart, "invalid hexadecimal floating-point constant: "
117                                  "expected at least one significand digit");
118 
119   // Make sure we do have some kind of proper exponent part
120   if (*CurPtr != 'p' && *CurPtr != 'P')
121     return ReturnError(TokStart, "invalid hexadecimal floating-point constant: "
122                                  "expected exponent part 'p'");
123   ++CurPtr;
124 
125   if (*CurPtr == '+' || *CurPtr == '-')
126     ++CurPtr;
127 
128   // N.b. exponent digits are *not* hex
129   const char *ExpStart = CurPtr;
130   while (isdigit(*CurPtr))
131     ++CurPtr;
132 
133   if (CurPtr == ExpStart)
134     return ReturnError(TokStart, "invalid hexadecimal floating-point constant: "
135                                  "expected at least one exponent digit");
136 
137   return AsmToken(AsmToken::Real, StringRef(TokStart, CurPtr - TokStart));
138 }
139 
140 /// LexIdentifier: [a-zA-Z_.][a-zA-Z0-9_$.@?]*
141 static bool IsIdentifierChar(char c, bool AllowAt) {
142   return isalnum(c) || c == '_' || c == '$' || c == '.' ||
143          (c == '@' && AllowAt) || c == '?';
144 }
145 
146 AsmToken AsmLexer::LexIdentifier() {
147   // Check for floating point literals.
148   if (CurPtr[-1] == '.' && isdigit(*CurPtr)) {
149     // Disambiguate a .1243foo identifier from a floating literal.
150     while (isdigit(*CurPtr))
151       ++CurPtr;
152     if (*CurPtr == 'e' || *CurPtr == 'E' ||
153         !IsIdentifierChar(*CurPtr, AllowAtInIdentifier))
154       return LexFloatLiteral();
155   }
156 
157   while (IsIdentifierChar(*CurPtr, AllowAtInIdentifier))
158     ++CurPtr;
159 
160   // Handle . as a special case.
161   if (CurPtr == TokStart+1 && TokStart[0] == '.')
162     return AsmToken(AsmToken::Dot, StringRef(TokStart, 1));
163 
164   return AsmToken(AsmToken::Identifier, StringRef(TokStart, CurPtr - TokStart));
165 }
166 
167 /// LexSlash: Slash: /
168 ///           C-Style Comment: /* ... */
169 AsmToken AsmLexer::LexSlash() {
170   switch (*CurPtr) {
171   case '*':
172     IsAtStartOfStatement = false;
173     break; // C style comment.
174   case '/':
175     ++CurPtr;
176     return LexLineComment();
177   default:
178     IsAtStartOfStatement = false;
179     return AsmToken(AsmToken::Slash, StringRef(TokStart, 1));
180   }
181 
182   // C Style comment.
183   ++CurPtr;  // skip the star.
184   const char *CommentTextStart = CurPtr;
185   while (CurPtr != CurBuf.end()) {
186     switch (*CurPtr++) {
187     case '*':
188       // End of the comment?
189       if (*CurPtr != '/')
190         break;
191       // If we have a CommentConsumer, notify it about the comment.
192       if (CommentConsumer) {
193         CommentConsumer->HandleComment(
194             SMLoc::getFromPointer(CommentTextStart),
195             StringRef(CommentTextStart, CurPtr - 1 - CommentTextStart));
196       }
197       ++CurPtr;   // End the */.
198       return AsmToken(AsmToken::Comment,
199                       StringRef(TokStart, CurPtr - TokStart));
200     }
201   }
202   return ReturnError(TokStart, "unterminated comment");
203 }
204 
205 /// LexLineComment: Comment: #[^\n]*
206 ///                        : //[^\n]*
207 AsmToken AsmLexer::LexLineComment() {
208   // Mark This as an end of statement with a body of the
209   // comment. While it would be nicer to leave this two tokens,
210   // backwards compatability with TargetParsers makes keeping this in this form
211   // better.
212   const char *CommentTextStart = CurPtr;
213   int CurChar = getNextChar();
214   while (CurChar != '\n' && CurChar != '\r' && CurChar != EOF)
215     CurChar = getNextChar();
216 
217   // If we have a CommentConsumer, notify it about the comment.
218   if (CommentConsumer) {
219     CommentConsumer->HandleComment(
220         SMLoc::getFromPointer(CommentTextStart),
221         StringRef(CommentTextStart, CurPtr - 1 - CommentTextStart));
222   }
223 
224   IsAtStartOfLine = true;
225   // This is a whole line comment. leave newline
226   if (IsAtStartOfStatement)
227     return AsmToken(AsmToken::EndOfStatement,
228                     StringRef(TokStart, CurPtr - TokStart));
229   IsAtStartOfStatement = true;
230 
231   return AsmToken(AsmToken::EndOfStatement,
232                   StringRef(TokStart, CurPtr - 1 - TokStart));
233 }
234 
235 static void SkipIgnoredIntegerSuffix(const char *&CurPtr) {
236   // Skip ULL, UL, U, L and LL suffices.
237   if (CurPtr[0] == 'U')
238     ++CurPtr;
239   if (CurPtr[0] == 'L')
240     ++CurPtr;
241   if (CurPtr[0] == 'L')
242     ++CurPtr;
243 }
244 
245 // Look ahead to search for first non-hex digit, if it's [hH], then we treat the
246 // integer as a hexadecimal, possibly with leading zeroes.
247 static unsigned doLookAhead(const char *&CurPtr, unsigned DefaultRadix) {
248   const char *FirstHex = nullptr;
249   const char *LookAhead = CurPtr;
250   while (true) {
251     if (isdigit(*LookAhead)) {
252       ++LookAhead;
253     } else if (isxdigit(*LookAhead)) {
254       if (!FirstHex)
255         FirstHex = LookAhead;
256       ++LookAhead;
257     } else {
258       break;
259     }
260   }
261   bool isHex = *LookAhead == 'h' || *LookAhead == 'H';
262   CurPtr = isHex || !FirstHex ? LookAhead : FirstHex;
263   if (isHex)
264     return 16;
265   return DefaultRadix;
266 }
267 
268 static AsmToken intToken(StringRef Ref, APInt &Value)
269 {
270   if (Value.isIntN(64))
271     return AsmToken(AsmToken::Integer, Ref, Value);
272   return AsmToken(AsmToken::BigNum, Ref, Value);
273 }
274 
275 /// LexDigit: First character is [0-9].
276 ///   Local Label: [0-9][:]
277 ///   Forward/Backward Label: [0-9][fb]
278 ///   Binary integer: 0b[01]+
279 ///   Octal integer: 0[0-7]+
280 ///   Hex integer: 0x[0-9a-fA-F]+ or [0x]?[0-9][0-9a-fA-F]*[hH]
281 ///   Decimal integer: [1-9][0-9]*
282 AsmToken AsmLexer::LexDigit() {
283   // MASM-flavor binary integer: [01]+[bB]
284   // MASM-flavor hexadecimal integer: [0-9][0-9a-fA-F]*[hH]
285   if (IsParsingMSInlineAsm && isdigit(CurPtr[-1])) {
286     const char *FirstNonBinary = (CurPtr[-1] != '0' && CurPtr[-1] != '1') ?
287                                    CurPtr - 1 : nullptr;
288     const char *OldCurPtr = CurPtr;
289     while (isxdigit(*CurPtr)) {
290       if (*CurPtr != '0' && *CurPtr != '1' && !FirstNonBinary)
291         FirstNonBinary = CurPtr;
292       ++CurPtr;
293     }
294 
295     unsigned Radix = 0;
296     if (*CurPtr == 'h' || *CurPtr == 'H') {
297       // hexadecimal number
298       ++CurPtr;
299       Radix = 16;
300     } else if (FirstNonBinary && FirstNonBinary + 1 == CurPtr &&
301                (*FirstNonBinary == 'b' || *FirstNonBinary == 'B'))
302       Radix = 2;
303 
304     if (Radix == 2 || Radix == 16) {
305       StringRef Result(TokStart, CurPtr - TokStart);
306       APInt Value(128, 0, true);
307 
308       if (Result.drop_back().getAsInteger(Radix, Value))
309         return ReturnError(TokStart, Radix == 2 ? "invalid binary number" :
310                              "invalid hexdecimal number");
311 
312       // MSVC accepts and ignores type suffices on integer literals.
313       SkipIgnoredIntegerSuffix(CurPtr);
314 
315       return intToken(Result, Value);
316    }
317 
318     // octal/decimal integers, or floating point numbers, fall through
319     CurPtr = OldCurPtr;
320   }
321 
322   // Decimal integer: [1-9][0-9]*
323   if (CurPtr[-1] != '0' || CurPtr[0] == '.') {
324     unsigned Radix = doLookAhead(CurPtr, 10);
325     bool isHex = Radix == 16;
326     // Check for floating point literals.
327     if (!isHex && (*CurPtr == '.' || *CurPtr == 'e')) {
328       ++CurPtr;
329       return LexFloatLiteral();
330     }
331 
332     StringRef Result(TokStart, CurPtr - TokStart);
333 
334     APInt Value(128, 0, true);
335     if (Result.getAsInteger(Radix, Value))
336       return ReturnError(TokStart, !isHex ? "invalid decimal number" :
337                            "invalid hexdecimal number");
338 
339     // Consume the [bB][hH].
340     if (Radix == 2 || Radix == 16)
341       ++CurPtr;
342 
343     // The darwin/x86 (and x86-64) assembler accepts and ignores type
344     // suffices on integer literals.
345     SkipIgnoredIntegerSuffix(CurPtr);
346 
347     return intToken(Result, Value);
348   }
349 
350   if (!IsParsingMSInlineAsm && ((*CurPtr == 'b') || (*CurPtr == 'B'))) {
351     ++CurPtr;
352     // See if we actually have "0b" as part of something like "jmp 0b\n"
353     if (!isdigit(CurPtr[0])) {
354       --CurPtr;
355       StringRef Result(TokStart, CurPtr - TokStart);
356       return AsmToken(AsmToken::Integer, Result, 0);
357     }
358     const char *NumStart = CurPtr;
359     while (CurPtr[0] == '0' || CurPtr[0] == '1')
360       ++CurPtr;
361 
362     // Requires at least one binary digit.
363     if (CurPtr == NumStart)
364       return ReturnError(TokStart, "invalid binary number");
365 
366     StringRef Result(TokStart, CurPtr - TokStart);
367 
368     APInt Value(128, 0, true);
369     if (Result.substr(2).getAsInteger(2, Value))
370       return ReturnError(TokStart, "invalid binary number");
371 
372     // The darwin/x86 (and x86-64) assembler accepts and ignores ULL and LL
373     // suffixes on integer literals.
374     SkipIgnoredIntegerSuffix(CurPtr);
375 
376     return intToken(Result, Value);
377   }
378 
379   if ((*CurPtr == 'x') || (*CurPtr == 'X')) {
380     ++CurPtr;
381     const char *NumStart = CurPtr;
382     while (isxdigit(CurPtr[0]))
383       ++CurPtr;
384 
385     // "0x.0p0" is valid, and "0x0p0" (but not "0xp0" for example, which will be
386     // diagnosed by LexHexFloatLiteral).
387     if (CurPtr[0] == '.' || CurPtr[0] == 'p' || CurPtr[0] == 'P')
388       return LexHexFloatLiteral(NumStart == CurPtr);
389 
390     // Otherwise requires at least one hex digit.
391     if (CurPtr == NumStart)
392       return ReturnError(CurPtr-2, "invalid hexadecimal number");
393 
394     APInt Result(128, 0);
395     if (StringRef(TokStart, CurPtr - TokStart).getAsInteger(0, Result))
396       return ReturnError(TokStart, "invalid hexadecimal number");
397 
398     // Consume the optional [hH].
399     if (!IsParsingMSInlineAsm && (*CurPtr == 'h' || *CurPtr == 'H'))
400       ++CurPtr;
401 
402     // The darwin/x86 (and x86-64) assembler accepts and ignores ULL and LL
403     // suffixes on integer literals.
404     SkipIgnoredIntegerSuffix(CurPtr);
405 
406     return intToken(StringRef(TokStart, CurPtr - TokStart), Result);
407   }
408 
409   // Either octal or hexadecimal.
410   APInt Value(128, 0, true);
411   unsigned Radix = doLookAhead(CurPtr, 8);
412   bool isHex = Radix == 16;
413   StringRef Result(TokStart, CurPtr - TokStart);
414   if (Result.getAsInteger(Radix, Value))
415     return ReturnError(TokStart, !isHex ? "invalid octal number" :
416                        "invalid hexdecimal number");
417 
418   // Consume the [hH].
419   if (Radix == 16)
420     ++CurPtr;
421 
422   // The darwin/x86 (and x86-64) assembler accepts and ignores ULL and LL
423   // suffixes on integer literals.
424   SkipIgnoredIntegerSuffix(CurPtr);
425 
426   return intToken(Result, Value);
427 }
428 
429 /// LexSingleQuote: Integer: 'b'
430 AsmToken AsmLexer::LexSingleQuote() {
431   int CurChar = getNextChar();
432 
433   if (CurChar == '\\')
434     CurChar = getNextChar();
435 
436   if (CurChar == EOF)
437     return ReturnError(TokStart, "unterminated single quote");
438 
439   CurChar = getNextChar();
440 
441   if (CurChar != '\'')
442     return ReturnError(TokStart, "single quote way too long");
443 
444   // The idea here being that 'c' is basically just an integral
445   // constant.
446   StringRef Res = StringRef(TokStart,CurPtr - TokStart);
447   long long Value;
448 
449   if (Res.startswith("\'\\")) {
450     char theChar = Res[2];
451     switch (theChar) {
452       default: Value = theChar; break;
453       case '\'': Value = '\''; break;
454       case 't': Value = '\t'; break;
455       case 'n': Value = '\n'; break;
456       case 'b': Value = '\b'; break;
457     }
458   } else
459     Value = TokStart[1];
460 
461   return AsmToken(AsmToken::Integer, Res, Value);
462 }
463 
464 /// LexQuote: String: "..."
465 AsmToken AsmLexer::LexQuote() {
466   int CurChar = getNextChar();
467   // TODO: does gas allow multiline string constants?
468   while (CurChar != '"') {
469     if (CurChar == '\\') {
470       // Allow \", etc.
471       CurChar = getNextChar();
472     }
473 
474     if (CurChar == EOF)
475       return ReturnError(TokStart, "unterminated string constant");
476 
477     CurChar = getNextChar();
478   }
479 
480   return AsmToken(AsmToken::String, StringRef(TokStart, CurPtr - TokStart));
481 }
482 
483 StringRef AsmLexer::LexUntilEndOfStatement() {
484   TokStart = CurPtr;
485 
486   while (!isAtStartOfComment(CurPtr) &&     // Start of line comment.
487          !isAtStatementSeparator(CurPtr) && // End of statement marker.
488          *CurPtr != '\n' && *CurPtr != '\r' && CurPtr != CurBuf.end()) {
489     ++CurPtr;
490   }
491   return StringRef(TokStart, CurPtr-TokStart);
492 }
493 
494 StringRef AsmLexer::LexUntilEndOfLine() {
495   TokStart = CurPtr;
496 
497   while (*CurPtr != '\n' && *CurPtr != '\r' && CurPtr != CurBuf.end()) {
498     ++CurPtr;
499   }
500   return StringRef(TokStart, CurPtr-TokStart);
501 }
502 
503 size_t AsmLexer::peekTokens(MutableArrayRef<AsmToken> Buf,
504                             bool ShouldSkipSpace) {
505   SaveAndRestore<const char *> SavedTokenStart(TokStart);
506   SaveAndRestore<const char *> SavedCurPtr(CurPtr);
507   SaveAndRestore<bool> SavedAtStartOfLine(IsAtStartOfLine);
508   SaveAndRestore<bool> SavedAtStartOfStatement(IsAtStartOfStatement);
509   SaveAndRestore<bool> SavedSkipSpace(SkipSpace, ShouldSkipSpace);
510   SaveAndRestore<bool> SavedIsPeeking(IsPeeking, true);
511   std::string SavedErr = getErr();
512   SMLoc SavedErrLoc = getErrLoc();
513 
514   size_t ReadCount;
515   for (ReadCount = 0; ReadCount < Buf.size(); ++ReadCount) {
516     AsmToken Token = LexToken();
517 
518     Buf[ReadCount] = Token;
519 
520     if (Token.is(AsmToken::Eof))
521       break;
522   }
523 
524   SetError(SavedErrLoc, SavedErr);
525   return ReadCount;
526 }
527 
528 bool AsmLexer::isAtStartOfComment(const char *Ptr) {
529   StringRef CommentString = MAI.getCommentString();
530 
531   if (CommentString.size() == 1)
532     return CommentString[0] == Ptr[0];
533 
534   // Allow # preprocessor commments also be counted as comments for "##" cases
535   if (CommentString[1] == '#')
536     return CommentString[0] == Ptr[0];
537 
538   return strncmp(Ptr, CommentString.data(), CommentString.size()) == 0;
539 }
540 
541 bool AsmLexer::isAtStatementSeparator(const char *Ptr) {
542   return strncmp(Ptr, MAI.getSeparatorString(),
543                  strlen(MAI.getSeparatorString())) == 0;
544 }
545 
546 AsmToken AsmLexer::LexToken() {
547   TokStart = CurPtr;
548   // This always consumes at least one character.
549   int CurChar = getNextChar();
550 
551   if (!IsPeeking && CurChar == '#' && IsAtStartOfStatement) {
552     // If this starts with a '#', this may be a cpp
553     // hash directive and otherwise a line comment.
554     AsmToken TokenBuf[2];
555     MutableArrayRef<AsmToken> Buf(TokenBuf, 2);
556     size_t num = peekTokens(Buf, true);
557     // There cannot be a space preceeding this
558     if (IsAtStartOfLine && num == 2 && TokenBuf[0].is(AsmToken::Integer) &&
559         TokenBuf[1].is(AsmToken::String)) {
560       CurPtr = TokStart; // reset curPtr;
561       StringRef s = LexUntilEndOfLine();
562       UnLex(TokenBuf[1]);
563       UnLex(TokenBuf[0]);
564       return AsmToken(AsmToken::HashDirective, s);
565     }
566     return LexLineComment();
567   }
568 
569   if (isAtStartOfComment(TokStart))
570     return LexLineComment();
571 
572   if (isAtStatementSeparator(TokStart)) {
573     CurPtr += strlen(MAI.getSeparatorString()) - 1;
574     IsAtStartOfLine = true;
575     IsAtStartOfStatement = true;
576     return AsmToken(AsmToken::EndOfStatement,
577                     StringRef(TokStart, strlen(MAI.getSeparatorString())));
578   }
579 
580   // If we're missing a newline at EOF, make sure we still get an
581   // EndOfStatement token before the Eof token.
582   if (CurChar == EOF && !IsAtStartOfStatement) {
583     IsAtStartOfLine = true;
584     IsAtStartOfStatement = true;
585     return AsmToken(AsmToken::EndOfStatement, StringRef(TokStart, 1));
586   }
587   IsAtStartOfLine = false;
588   bool OldIsAtStartOfStatement = IsAtStartOfStatement;
589   IsAtStartOfStatement = false;
590   switch (CurChar) {
591   default:
592     // Handle identifier: [a-zA-Z_.][a-zA-Z0-9_$.@]*
593     if (isalpha(CurChar) || CurChar == '_' || CurChar == '.')
594       return LexIdentifier();
595 
596     // Unknown character, emit an error.
597     return ReturnError(TokStart, "invalid character in input");
598   case EOF:
599     IsAtStartOfLine = true;
600     IsAtStartOfStatement = true;
601     return AsmToken(AsmToken::Eof, StringRef(TokStart, 0));
602   case 0:
603   case ' ':
604   case '\t':
605     IsAtStartOfStatement = OldIsAtStartOfStatement;
606     while (*CurPtr == ' ' || *CurPtr == '\t')
607       CurPtr++;
608     if (SkipSpace)
609       return LexToken(); // Ignore whitespace.
610     else
611       return AsmToken(AsmToken::Space, StringRef(TokStart, CurPtr - TokStart));
612   case '\n':
613   case '\r':
614     IsAtStartOfLine = true;
615     IsAtStartOfStatement = true;
616     return AsmToken(AsmToken::EndOfStatement, StringRef(TokStart, 1));
617   case ':': return AsmToken(AsmToken::Colon, StringRef(TokStart, 1));
618   case '+': return AsmToken(AsmToken::Plus, StringRef(TokStart, 1));
619   case '-': return AsmToken(AsmToken::Minus, StringRef(TokStart, 1));
620   case '~': return AsmToken(AsmToken::Tilde, StringRef(TokStart, 1));
621   case '(': return AsmToken(AsmToken::LParen, StringRef(TokStart, 1));
622   case ')': return AsmToken(AsmToken::RParen, StringRef(TokStart, 1));
623   case '[': return AsmToken(AsmToken::LBrac, StringRef(TokStart, 1));
624   case ']': return AsmToken(AsmToken::RBrac, StringRef(TokStart, 1));
625   case '{': return AsmToken(AsmToken::LCurly, StringRef(TokStart, 1));
626   case '}': return AsmToken(AsmToken::RCurly, StringRef(TokStart, 1));
627   case '*': return AsmToken(AsmToken::Star, StringRef(TokStart, 1));
628   case ',': return AsmToken(AsmToken::Comma, StringRef(TokStart, 1));
629   case '$': return AsmToken(AsmToken::Dollar, StringRef(TokStart, 1));
630   case '@': return AsmToken(AsmToken::At, StringRef(TokStart, 1));
631   case '\\': return AsmToken(AsmToken::BackSlash, StringRef(TokStart, 1));
632   case '=':
633     if (*CurPtr == '=') {
634       ++CurPtr;
635       return AsmToken(AsmToken::EqualEqual, StringRef(TokStart, 2));
636     }
637     return AsmToken(AsmToken::Equal, StringRef(TokStart, 1));
638   case '|':
639     if (*CurPtr == '|') {
640       ++CurPtr;
641       return AsmToken(AsmToken::PipePipe, StringRef(TokStart, 2));
642     }
643     return AsmToken(AsmToken::Pipe, StringRef(TokStart, 1));
644   case '^': return AsmToken(AsmToken::Caret, StringRef(TokStart, 1));
645   case '&':
646     if (*CurPtr == '&') {
647       ++CurPtr;
648       return AsmToken(AsmToken::AmpAmp, StringRef(TokStart, 2));
649     }
650     return AsmToken(AsmToken::Amp, StringRef(TokStart, 1));
651   case '!':
652     if (*CurPtr == '=') {
653       ++CurPtr;
654       return AsmToken(AsmToken::ExclaimEqual, StringRef(TokStart, 2));
655     }
656     return AsmToken(AsmToken::Exclaim, StringRef(TokStart, 1));
657   case '%':
658     if (MAI.hasMipsExpressions()) {
659       AsmToken::TokenKind Operator;
660       unsigned OperatorLength;
661 
662       std::tie(Operator, OperatorLength) =
663           StringSwitch<std::pair<AsmToken::TokenKind, unsigned>>(
664               StringRef(CurPtr))
665               .StartsWith("call16", {AsmToken::PercentCall16, 7})
666               .StartsWith("call_hi", {AsmToken::PercentCall_Hi, 8})
667               .StartsWith("call_lo", {AsmToken::PercentCall_Lo, 8})
668               .StartsWith("dtprel_hi", {AsmToken::PercentDtprel_Hi, 10})
669               .StartsWith("dtprel_lo", {AsmToken::PercentDtprel_Lo, 10})
670               .StartsWith("got_disp", {AsmToken::PercentGot_Disp, 9})
671               .StartsWith("got_hi", {AsmToken::PercentGot_Hi, 7})
672               .StartsWith("got_lo", {AsmToken::PercentGot_Lo, 7})
673               .StartsWith("got_ofst", {AsmToken::PercentGot_Ofst, 9})
674               .StartsWith("got_page", {AsmToken::PercentGot_Page, 9})
675               .StartsWith("gottprel", {AsmToken::PercentGottprel, 9})
676               .StartsWith("got", {AsmToken::PercentGot, 4})
677               .StartsWith("gp_rel", {AsmToken::PercentGp_Rel, 7})
678               .StartsWith("higher", {AsmToken::PercentHigher, 7})
679               .StartsWith("highest", {AsmToken::PercentHighest, 8})
680               .StartsWith("hi", {AsmToken::PercentHi, 3})
681               .StartsWith("lo", {AsmToken::PercentLo, 3})
682               .StartsWith("neg", {AsmToken::PercentNeg, 4})
683               .StartsWith("pcrel_hi", {AsmToken::PercentPcrel_Hi, 9})
684               .StartsWith("pcrel_lo", {AsmToken::PercentPcrel_Lo, 9})
685               .StartsWith("tlsgd", {AsmToken::PercentTlsgd, 6})
686               .StartsWith("tlsldm", {AsmToken::PercentTlsldm, 7})
687               .StartsWith("tprel_hi", {AsmToken::PercentTprel_Hi, 9})
688               .StartsWith("tprel_lo", {AsmToken::PercentTprel_Lo, 9})
689               .Default({AsmToken::Percent, 1});
690 
691       if (Operator != AsmToken::Percent) {
692         CurPtr += OperatorLength - 1;
693         return AsmToken(Operator, StringRef(TokStart, OperatorLength));
694       }
695     }
696     return AsmToken(AsmToken::Percent, StringRef(TokStart, 1));
697   case '/':
698     IsAtStartOfStatement = OldIsAtStartOfStatement;
699     return LexSlash();
700   case '#': return AsmToken(AsmToken::Hash, StringRef(TokStart, 1));
701   case '\'': return LexSingleQuote();
702   case '"': return LexQuote();
703   case '0': case '1': case '2': case '3': case '4':
704   case '5': case '6': case '7': case '8': case '9':
705     return LexDigit();
706   case '<':
707     switch (*CurPtr) {
708     case '<':
709       ++CurPtr;
710       return AsmToken(AsmToken::LessLess, StringRef(TokStart, 2));
711     case '=':
712       ++CurPtr;
713       return AsmToken(AsmToken::LessEqual, StringRef(TokStart, 2));
714     case '>':
715       ++CurPtr;
716       return AsmToken(AsmToken::LessGreater, StringRef(TokStart, 2));
717     default:
718       return AsmToken(AsmToken::Less, StringRef(TokStart, 1));
719     }
720   case '>':
721     switch (*CurPtr) {
722     case '>':
723       ++CurPtr;
724       return AsmToken(AsmToken::GreaterGreater, StringRef(TokStart, 2));
725     case '=':
726       ++CurPtr;
727       return AsmToken(AsmToken::GreaterEqual, StringRef(TokStart, 2));
728     default:
729       return AsmToken(AsmToken::Greater, StringRef(TokStart, 1));
730     }
731 
732   // TODO: Quoted identifiers (objc methods etc)
733   // local labels: [0-9][:]
734   // Forward/backward labels: [0-9][fb]
735   // Integers, fp constants, character constants.
736   }
737 }
738