111d26bd1SRiver Riddle //===- Lexer.cpp ----------------------------------------------------------===//
211d26bd1SRiver Riddle //
311d26bd1SRiver Riddle // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
411d26bd1SRiver Riddle // See https://llvm.org/LICENSE.txt for license information.
511d26bd1SRiver Riddle // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
611d26bd1SRiver Riddle //
711d26bd1SRiver Riddle //===----------------------------------------------------------------------===//
811d26bd1SRiver Riddle 
911d26bd1SRiver Riddle #include "Lexer.h"
1011d26bd1SRiver Riddle #include "mlir/Support/LogicalResult.h"
1111d26bd1SRiver Riddle #include "mlir/Tools/PDLL/AST/Diagnostic.h"
12008de486SRiver Riddle #include "mlir/Tools/PDLL/Parser/CodeComplete.h"
1311d26bd1SRiver Riddle #include "llvm/ADT/StringExtras.h"
1411d26bd1SRiver Riddle #include "llvm/ADT/StringSwitch.h"
1511d26bd1SRiver Riddle #include "llvm/Support/SourceMgr.h"
1611d26bd1SRiver Riddle 
1711d26bd1SRiver Riddle using namespace mlir;
1811d26bd1SRiver Riddle using namespace mlir::pdll;
1911d26bd1SRiver Riddle 
2011d26bd1SRiver Riddle //===----------------------------------------------------------------------===//
2111d26bd1SRiver Riddle // Token
2211d26bd1SRiver Riddle //===----------------------------------------------------------------------===//
2311d26bd1SRiver Riddle 
getStringValue() const2411d26bd1SRiver Riddle std::string Token::getStringValue() const {
2541d2c6dfSRiver Riddle   assert(getKind() == string || getKind() == string_block ||
2641d2c6dfSRiver Riddle          getKind() == code_complete_string);
2711d26bd1SRiver Riddle 
2811d26bd1SRiver Riddle   // Start by dropping the quotes.
2941d2c6dfSRiver Riddle   StringRef bytes = getSpelling();
3041d2c6dfSRiver Riddle   if (is(string))
3141d2c6dfSRiver Riddle     bytes = bytes.drop_front().drop_back();
3241d2c6dfSRiver Riddle   else if (is(string_block))
3341d2c6dfSRiver Riddle     bytes = bytes.drop_front(2).drop_back(2);
3411d26bd1SRiver Riddle 
3511d26bd1SRiver Riddle   std::string result;
3611d26bd1SRiver Riddle   result.reserve(bytes.size());
3711d26bd1SRiver Riddle   for (unsigned i = 0, e = bytes.size(); i != e;) {
3811d26bd1SRiver Riddle     auto c = bytes[i++];
3911d26bd1SRiver Riddle     if (c != '\\') {
4011d26bd1SRiver Riddle       result.push_back(c);
4111d26bd1SRiver Riddle       continue;
4211d26bd1SRiver Riddle     }
4311d26bd1SRiver Riddle 
4411d26bd1SRiver Riddle     assert(i + 1 <= e && "invalid string should be caught by lexer");
4511d26bd1SRiver Riddle     auto c1 = bytes[i++];
4611d26bd1SRiver Riddle     switch (c1) {
4711d26bd1SRiver Riddle     case '"':
4811d26bd1SRiver Riddle     case '\\':
4911d26bd1SRiver Riddle       result.push_back(c1);
5011d26bd1SRiver Riddle       continue;
5111d26bd1SRiver Riddle     case 'n':
5211d26bd1SRiver Riddle       result.push_back('\n');
5311d26bd1SRiver Riddle       continue;
5411d26bd1SRiver Riddle     case 't':
5511d26bd1SRiver Riddle       result.push_back('\t');
5611d26bd1SRiver Riddle       continue;
5711d26bd1SRiver Riddle     default:
5811d26bd1SRiver Riddle       break;
5911d26bd1SRiver Riddle     }
6011d26bd1SRiver Riddle 
6111d26bd1SRiver Riddle     assert(i + 1 <= e && "invalid string should be caught by lexer");
6211d26bd1SRiver Riddle     auto c2 = bytes[i++];
6311d26bd1SRiver Riddle 
6411d26bd1SRiver Riddle     assert(llvm::isHexDigit(c1) && llvm::isHexDigit(c2) && "invalid escape");
6511d26bd1SRiver Riddle     result.push_back((llvm::hexDigitValue(c1) << 4) | llvm::hexDigitValue(c2));
6611d26bd1SRiver Riddle   }
6711d26bd1SRiver Riddle 
6811d26bd1SRiver Riddle   return result;
6911d26bd1SRiver Riddle }
7011d26bd1SRiver Riddle 
7111d26bd1SRiver Riddle //===----------------------------------------------------------------------===//
7211d26bd1SRiver Riddle // Lexer
7311d26bd1SRiver Riddle //===----------------------------------------------------------------------===//
7411d26bd1SRiver Riddle 
Lexer(llvm::SourceMgr & mgr,ast::DiagnosticEngine & diagEngine,CodeCompleteContext * codeCompleteContext)75008de486SRiver Riddle Lexer::Lexer(llvm::SourceMgr &mgr, ast::DiagnosticEngine &diagEngine,
76008de486SRiver Riddle              CodeCompleteContext *codeCompleteContext)
77008de486SRiver Riddle     : srcMgr(mgr), diagEngine(diagEngine), addedHandlerToDiagEngine(false),
78008de486SRiver Riddle       codeCompletionLocation(nullptr) {
7911d26bd1SRiver Riddle   curBufferID = mgr.getMainFileID();
8011d26bd1SRiver Riddle   curBuffer = srcMgr.getMemoryBuffer(curBufferID)->getBuffer();
8111d26bd1SRiver Riddle   curPtr = curBuffer.begin();
8211d26bd1SRiver Riddle 
83008de486SRiver Riddle   // Set the code completion location if necessary.
84008de486SRiver Riddle   if (codeCompleteContext) {
85008de486SRiver Riddle     codeCompletionLocation =
86008de486SRiver Riddle         codeCompleteContext->getCodeCompleteLoc().getPointer();
87008de486SRiver Riddle   }
88008de486SRiver Riddle 
8911d26bd1SRiver Riddle   // If the diag engine has no handler, add a default that emits to the
9011d26bd1SRiver Riddle   // SourceMgr.
9111d26bd1SRiver Riddle   if (!diagEngine.getHandlerFn()) {
9211d26bd1SRiver Riddle     diagEngine.setHandlerFn([&](const ast::Diagnostic &diag) {
9311d26bd1SRiver Riddle       srcMgr.PrintMessage(diag.getLocation().Start, diag.getSeverity(),
9411d26bd1SRiver Riddle                           diag.getMessage());
9511d26bd1SRiver Riddle       for (const ast::Diagnostic &note : diag.getNotes())
9611d26bd1SRiver Riddle         srcMgr.PrintMessage(note.getLocation().Start, note.getSeverity(),
9711d26bd1SRiver Riddle                             note.getMessage());
9811d26bd1SRiver Riddle     });
9911d26bd1SRiver Riddle     addedHandlerToDiagEngine = true;
10011d26bd1SRiver Riddle   }
10111d26bd1SRiver Riddle }
10211d26bd1SRiver Riddle 
~Lexer()10311d26bd1SRiver Riddle Lexer::~Lexer() {
104*b7f93c28SJeff Niu   if (addedHandlerToDiagEngine)
105*b7f93c28SJeff Niu     diagEngine.setHandlerFn(nullptr);
10611d26bd1SRiver Riddle }
10711d26bd1SRiver Riddle 
pushInclude(StringRef filename,SMRange includeLoc)10809af7fefSRiver Riddle LogicalResult Lexer::pushInclude(StringRef filename, SMRange includeLoc) {
10911d26bd1SRiver Riddle   std::string includedFile;
11009af7fefSRiver Riddle   int bufferID =
11109af7fefSRiver Riddle       srcMgr.AddIncludeFile(filename.str(), includeLoc.End, includedFile);
11209af7fefSRiver Riddle   if (!bufferID)
11309af7fefSRiver Riddle     return failure();
11411d26bd1SRiver Riddle 
11511d26bd1SRiver Riddle   curBufferID = bufferID;
11611d26bd1SRiver Riddle   curBuffer = srcMgr.getMemoryBuffer(curBufferID)->getBuffer();
11711d26bd1SRiver Riddle   curPtr = curBuffer.begin();
11811d26bd1SRiver Riddle   return success();
11911d26bd1SRiver Riddle }
12011d26bd1SRiver Riddle 
emitError(SMRange loc,const Twine & msg)1216842ec42SRiver Riddle Token Lexer::emitError(SMRange loc, const Twine &msg) {
12211d26bd1SRiver Riddle   diagEngine.emitError(loc, msg);
12311d26bd1SRiver Riddle   return formToken(Token::error, loc.Start.getPointer());
12411d26bd1SRiver Riddle }
emitErrorAndNote(SMRange loc,const Twine & msg,SMRange noteLoc,const Twine & note)125*b7f93c28SJeff Niu Token Lexer::emitErrorAndNote(SMRange loc, const Twine &msg, SMRange noteLoc,
126*b7f93c28SJeff Niu                               const Twine &note) {
12711d26bd1SRiver Riddle   diagEngine.emitError(loc, msg)->attachNote(note, noteLoc);
12811d26bd1SRiver Riddle   return formToken(Token::error, loc.Start.getPointer());
12911d26bd1SRiver Riddle }
emitError(const char * loc,const Twine & msg)13011d26bd1SRiver Riddle Token Lexer::emitError(const char *loc, const Twine &msg) {
131*b7f93c28SJeff Niu   return emitError(
132*b7f93c28SJeff Niu       SMRange(SMLoc::getFromPointer(loc), SMLoc::getFromPointer(loc + 1)), msg);
13311d26bd1SRiver Riddle }
13411d26bd1SRiver Riddle 
getNextChar()13511d26bd1SRiver Riddle int Lexer::getNextChar() {
13611d26bd1SRiver Riddle   char curChar = *curPtr++;
13711d26bd1SRiver Riddle   switch (curChar) {
13811d26bd1SRiver Riddle   default:
13911d26bd1SRiver Riddle     return static_cast<unsigned char>(curChar);
14011d26bd1SRiver Riddle   case 0: {
14111d26bd1SRiver Riddle     // A nul character in the stream is either the end of the current buffer
14211d26bd1SRiver Riddle     // or a random nul in the file. Disambiguate that here.
143*b7f93c28SJeff Niu     if (curPtr - 1 != curBuffer.end())
144*b7f93c28SJeff Niu       return 0;
14511d26bd1SRiver Riddle 
14611d26bd1SRiver Riddle     // Otherwise, return end of file.
14711d26bd1SRiver Riddle     --curPtr;
14811d26bd1SRiver Riddle     return EOF;
14911d26bd1SRiver Riddle   }
15011d26bd1SRiver Riddle   case '\n':
15111d26bd1SRiver Riddle   case '\r':
15211d26bd1SRiver Riddle     // Handle the newline character by ignoring it and incrementing the line
15311d26bd1SRiver Riddle     // count. However, be careful about 'dos style' files with \n\r in them.
15411d26bd1SRiver Riddle     // Only treat a \n\r or \r\n as a single line.
15511d26bd1SRiver Riddle     if ((*curPtr == '\n' || (*curPtr == '\r')) && *curPtr != curChar)
15611d26bd1SRiver Riddle       ++curPtr;
15711d26bd1SRiver Riddle     return '\n';
15811d26bd1SRiver Riddle   }
15911d26bd1SRiver Riddle }
16011d26bd1SRiver Riddle 
lexToken()16111d26bd1SRiver Riddle Token Lexer::lexToken() {
16211d26bd1SRiver Riddle   while (true) {
16311d26bd1SRiver Riddle     const char *tokStart = curPtr;
16411d26bd1SRiver Riddle 
165008de486SRiver Riddle     // Check to see if this token is at the code completion location.
166008de486SRiver Riddle     if (tokStart == codeCompletionLocation)
167008de486SRiver Riddle       return formToken(Token::code_complete, tokStart);
168008de486SRiver Riddle 
16911d26bd1SRiver Riddle     // This always consumes at least one character.
17011d26bd1SRiver Riddle     int curChar = getNextChar();
17111d26bd1SRiver Riddle     switch (curChar) {
17211d26bd1SRiver Riddle     default:
17311d26bd1SRiver Riddle       // Handle identifiers: [a-zA-Z_]
174*b7f93c28SJeff Niu       if (isalpha(curChar) || curChar == '_')
175*b7f93c28SJeff Niu         return lexIdentifier(tokStart);
17611d26bd1SRiver Riddle 
17711d26bd1SRiver Riddle       // Unknown character, emit an error.
17811d26bd1SRiver Riddle       return emitError(tokStart, "unexpected character");
17911d26bd1SRiver Riddle     case EOF: {
18011d26bd1SRiver Riddle       // Return EOF denoting the end of lexing.
18111d26bd1SRiver Riddle       Token eof = formToken(Token::eof, tokStart);
18211d26bd1SRiver Riddle 
18311d26bd1SRiver Riddle       // Check to see if we are in an included file.
1846842ec42SRiver Riddle       SMLoc parentIncludeLoc = srcMgr.getParentIncludeLoc(curBufferID);
18511d26bd1SRiver Riddle       if (parentIncludeLoc.isValid()) {
18611d26bd1SRiver Riddle         curBufferID = srcMgr.FindBufferContainingLoc(parentIncludeLoc);
18711d26bd1SRiver Riddle         curBuffer = srcMgr.getMemoryBuffer(curBufferID)->getBuffer();
18811d26bd1SRiver Riddle         curPtr = parentIncludeLoc.getPointer();
18911d26bd1SRiver Riddle       }
19011d26bd1SRiver Riddle 
19111d26bd1SRiver Riddle       return eof;
19211d26bd1SRiver Riddle     }
19311d26bd1SRiver Riddle 
19411d26bd1SRiver Riddle     // Lex punctuation.
19511d26bd1SRiver Riddle     case '-':
19611d26bd1SRiver Riddle       if (*curPtr == '>') {
19711d26bd1SRiver Riddle         ++curPtr;
19811d26bd1SRiver Riddle         return formToken(Token::arrow, tokStart);
19911d26bd1SRiver Riddle       }
20011d26bd1SRiver Riddle       return emitError(tokStart, "unexpected character");
20111d26bd1SRiver Riddle     case ':':
20211d26bd1SRiver Riddle       return formToken(Token::colon, tokStart);
20311d26bd1SRiver Riddle     case ',':
20411d26bd1SRiver Riddle       return formToken(Token::comma, tokStart);
20511d26bd1SRiver Riddle     case '.':
20611d26bd1SRiver Riddle       return formToken(Token::dot, tokStart);
20711d26bd1SRiver Riddle     case '=':
20811d26bd1SRiver Riddle       if (*curPtr == '>') {
20911d26bd1SRiver Riddle         ++curPtr;
21011d26bd1SRiver Riddle         return formToken(Token::equal_arrow, tokStart);
21111d26bd1SRiver Riddle       }
21211d26bd1SRiver Riddle       return formToken(Token::equal, tokStart);
21311d26bd1SRiver Riddle     case ';':
21411d26bd1SRiver Riddle       return formToken(Token::semicolon, tokStart);
21511d26bd1SRiver Riddle     case '[':
21611d26bd1SRiver Riddle       if (*curPtr == '{') {
21711d26bd1SRiver Riddle         ++curPtr;
21811d26bd1SRiver Riddle         return lexString(tokStart, /*isStringBlock=*/true);
21911d26bd1SRiver Riddle       }
22011d26bd1SRiver Riddle       return formToken(Token::l_square, tokStart);
22111d26bd1SRiver Riddle     case ']':
22211d26bd1SRiver Riddle       return formToken(Token::r_square, tokStart);
22311d26bd1SRiver Riddle 
22411d26bd1SRiver Riddle     case '<':
22511d26bd1SRiver Riddle       return formToken(Token::less, tokStart);
22611d26bd1SRiver Riddle     case '>':
22711d26bd1SRiver Riddle       return formToken(Token::greater, tokStart);
22811d26bd1SRiver Riddle     case '{':
22911d26bd1SRiver Riddle       return formToken(Token::l_brace, tokStart);
23011d26bd1SRiver Riddle     case '}':
23111d26bd1SRiver Riddle       return formToken(Token::r_brace, tokStart);
23211d26bd1SRiver Riddle     case '(':
23311d26bd1SRiver Riddle       return formToken(Token::l_paren, tokStart);
23411d26bd1SRiver Riddle     case ')':
23511d26bd1SRiver Riddle       return formToken(Token::r_paren, tokStart);
23611d26bd1SRiver Riddle     case '/':
23711d26bd1SRiver Riddle       if (*curPtr == '/') {
23811d26bd1SRiver Riddle         lexComment();
23911d26bd1SRiver Riddle         continue;
24011d26bd1SRiver Riddle       }
24111d26bd1SRiver Riddle       return emitError(tokStart, "unexpected character");
24211d26bd1SRiver Riddle 
24311d26bd1SRiver Riddle     // Ignore whitespace characters.
24411d26bd1SRiver Riddle     case 0:
24511d26bd1SRiver Riddle     case ' ':
24611d26bd1SRiver Riddle     case '\t':
24711d26bd1SRiver Riddle     case '\n':
24811d26bd1SRiver Riddle       return lexToken();
24911d26bd1SRiver Riddle 
25011d26bd1SRiver Riddle     case '#':
25111d26bd1SRiver Riddle       return lexDirective(tokStart);
25211d26bd1SRiver Riddle     case '"':
25311d26bd1SRiver Riddle       return lexString(tokStart, /*isStringBlock=*/false);
25411d26bd1SRiver Riddle 
25511d26bd1SRiver Riddle     case '0':
25611d26bd1SRiver Riddle     case '1':
25711d26bd1SRiver Riddle     case '2':
25811d26bd1SRiver Riddle     case '3':
25911d26bd1SRiver Riddle     case '4':
26011d26bd1SRiver Riddle     case '5':
26111d26bd1SRiver Riddle     case '6':
26211d26bd1SRiver Riddle     case '7':
26311d26bd1SRiver Riddle     case '8':
26411d26bd1SRiver Riddle     case '9':
26511d26bd1SRiver Riddle       return lexNumber(tokStart);
26611d26bd1SRiver Riddle     }
26711d26bd1SRiver Riddle   }
26811d26bd1SRiver Riddle }
26911d26bd1SRiver Riddle 
27011d26bd1SRiver Riddle /// Skip a comment line, starting with a '//'.
lexComment()27111d26bd1SRiver Riddle void Lexer::lexComment() {
27211d26bd1SRiver Riddle   // Advance over the second '/' in a '//' comment.
27311d26bd1SRiver Riddle   assert(*curPtr == '/');
27411d26bd1SRiver Riddle   ++curPtr;
27511d26bd1SRiver Riddle 
27611d26bd1SRiver Riddle   while (true) {
27711d26bd1SRiver Riddle     switch (*curPtr++) {
27811d26bd1SRiver Riddle     case '\n':
27911d26bd1SRiver Riddle     case '\r':
28011d26bd1SRiver Riddle       // Newline is end of comment.
28111d26bd1SRiver Riddle       return;
28211d26bd1SRiver Riddle     case 0:
28311d26bd1SRiver Riddle       // If this is the end of the buffer, end the comment.
28411d26bd1SRiver Riddle       if (curPtr - 1 == curBuffer.end()) {
28511d26bd1SRiver Riddle         --curPtr;
28611d26bd1SRiver Riddle         return;
28711d26bd1SRiver Riddle       }
28811d26bd1SRiver Riddle       LLVM_FALLTHROUGH;
28911d26bd1SRiver Riddle     default:
29011d26bd1SRiver Riddle       // Skip over other characters.
29111d26bd1SRiver Riddle       break;
29211d26bd1SRiver Riddle     }
29311d26bd1SRiver Riddle   }
29411d26bd1SRiver Riddle }
29511d26bd1SRiver Riddle 
lexDirective(const char * tokStart)29611d26bd1SRiver Riddle Token Lexer::lexDirective(const char *tokStart) {
29711d26bd1SRiver Riddle   // Match the rest with an identifier regex: [0-9a-zA-Z_]*
298*b7f93c28SJeff Niu   while (isalnum(*curPtr) || *curPtr == '_')
299*b7f93c28SJeff Niu     ++curPtr;
30011d26bd1SRiver Riddle 
30111d26bd1SRiver Riddle   StringRef str(tokStart, curPtr - tokStart);
30211d26bd1SRiver Riddle   return Token(Token::directive, str);
30311d26bd1SRiver Riddle }
30411d26bd1SRiver Riddle 
lexIdentifier(const char * tokStart)30511d26bd1SRiver Riddle Token Lexer::lexIdentifier(const char *tokStart) {
30611d26bd1SRiver Riddle   // Match the rest of the identifier regex: [0-9a-zA-Z_]*
307*b7f93c28SJeff Niu   while (isalnum(*curPtr) || *curPtr == '_')
308*b7f93c28SJeff Niu     ++curPtr;
30911d26bd1SRiver Riddle 
31011d26bd1SRiver Riddle   // Check to see if this identifier is a keyword.
31111d26bd1SRiver Riddle   StringRef str(tokStart, curPtr - tokStart);
31211d26bd1SRiver Riddle   Token::Kind kind = StringSwitch<Token::Kind>(str)
31311d26bd1SRiver Riddle                          .Case("attr", Token::kw_attr)
31411d26bd1SRiver Riddle                          .Case("Attr", Token::kw_Attr)
31511d26bd1SRiver Riddle                          .Case("erase", Token::kw_erase)
31611d26bd1SRiver Riddle                          .Case("let", Token::kw_let)
31711d26bd1SRiver Riddle                          .Case("Constraint", Token::kw_Constraint)
31811d26bd1SRiver Riddle                          .Case("op", Token::kw_op)
31911d26bd1SRiver Riddle                          .Case("Op", Token::kw_Op)
32011d26bd1SRiver Riddle                          .Case("OpName", Token::kw_OpName)
32111d26bd1SRiver Riddle                          .Case("Pattern", Token::kw_Pattern)
32211d26bd1SRiver Riddle                          .Case("replace", Token::kw_replace)
323faf42264SRiver Riddle                          .Case("return", Token::kw_return)
32411d26bd1SRiver Riddle                          .Case("rewrite", Token::kw_rewrite)
325faf42264SRiver Riddle                          .Case("Rewrite", Token::kw_Rewrite)
32611d26bd1SRiver Riddle                          .Case("type", Token::kw_type)
32711d26bd1SRiver Riddle                          .Case("Type", Token::kw_Type)
32811d26bd1SRiver Riddle                          .Case("TypeRange", Token::kw_TypeRange)
32911d26bd1SRiver Riddle                          .Case("Value", Token::kw_Value)
33011d26bd1SRiver Riddle                          .Case("ValueRange", Token::kw_ValueRange)
33111d26bd1SRiver Riddle                          .Case("with", Token::kw_with)
33211d26bd1SRiver Riddle                          .Case("_", Token::underscore)
33311d26bd1SRiver Riddle                          .Default(Token::identifier);
33411d26bd1SRiver Riddle   return Token(kind, str);
33511d26bd1SRiver Riddle }
33611d26bd1SRiver Riddle 
lexNumber(const char * tokStart)33711d26bd1SRiver Riddle Token Lexer::lexNumber(const char *tokStart) {
33811d26bd1SRiver Riddle   assert(isdigit(curPtr[-1]));
33911d26bd1SRiver Riddle 
34011d26bd1SRiver Riddle   // Handle the normal decimal case.
341*b7f93c28SJeff Niu   while (isdigit(*curPtr))
342*b7f93c28SJeff Niu     ++curPtr;
34311d26bd1SRiver Riddle 
34411d26bd1SRiver Riddle   return formToken(Token::integer, tokStart);
34511d26bd1SRiver Riddle }
34611d26bd1SRiver Riddle 
lexString(const char * tokStart,bool isStringBlock)34711d26bd1SRiver Riddle Token Lexer::lexString(const char *tokStart, bool isStringBlock) {
34811d26bd1SRiver Riddle   while (true) {
34941d2c6dfSRiver Riddle     // Check to see if there is a code completion location within the string. In
35041d2c6dfSRiver Riddle     // these cases we generate a completion location and place the currently
35141d2c6dfSRiver Riddle     // lexed string within the token (without the quotes). This allows for the
35241d2c6dfSRiver Riddle     // parser to use the partially lexed string when computing the completion
35341d2c6dfSRiver Riddle     // results.
35441d2c6dfSRiver Riddle     if (curPtr == codeCompletionLocation) {
35541d2c6dfSRiver Riddle       return formToken(Token::code_complete_string,
35641d2c6dfSRiver Riddle                        tokStart + (isStringBlock ? 2 : 1));
35741d2c6dfSRiver Riddle     }
35841d2c6dfSRiver Riddle 
35911d26bd1SRiver Riddle     switch (*curPtr++) {
36011d26bd1SRiver Riddle     case '"':
36111d26bd1SRiver Riddle       // If this is a string block, we only end the string when we encounter a
36211d26bd1SRiver Riddle       // `}]`.
363ebb1e900SRiver Riddle       if (!isStringBlock)
364ebb1e900SRiver Riddle         return formToken(Token::string, tokStart);
36511d26bd1SRiver Riddle       continue;
36611d26bd1SRiver Riddle     case '}':
36711d26bd1SRiver Riddle       // If this is a string block, we only end the string when we encounter a
36811d26bd1SRiver Riddle       // `}]`.
369ebb1e900SRiver Riddle       if (!isStringBlock || *curPtr != ']')
370ebb1e900SRiver Riddle         continue;
37111d26bd1SRiver Riddle       ++curPtr;
37211d26bd1SRiver Riddle       return formToken(Token::string_block, tokStart);
373ebb1e900SRiver Riddle     case 0: {
37411d26bd1SRiver Riddle       // If this is a random nul character in the middle of a string, just
37511d26bd1SRiver Riddle       // include it. If it is the end of file, then it is an error.
376ebb1e900SRiver Riddle       if (curPtr - 1 != curBuffer.end())
377ebb1e900SRiver Riddle         continue;
378ebb1e900SRiver Riddle       --curPtr;
379ebb1e900SRiver Riddle 
380ebb1e900SRiver Riddle       StringRef expectedEndStr = isStringBlock ? "}]" : "\"";
381ebb1e900SRiver Riddle       return emitError(curPtr - 1,
382ebb1e900SRiver Riddle                        "expected '" + expectedEndStr + "' in string literal");
383ebb1e900SRiver Riddle     }
384ebb1e900SRiver Riddle 
38511d26bd1SRiver Riddle     case '\n':
38611d26bd1SRiver Riddle     case '\v':
38711d26bd1SRiver Riddle     case '\f':
38811d26bd1SRiver Riddle       // String blocks allow multiple lines.
38911d26bd1SRiver Riddle       if (!isStringBlock)
39011d26bd1SRiver Riddle         return emitError(curPtr - 1, "expected '\"' in string literal");
39111d26bd1SRiver Riddle       continue;
39211d26bd1SRiver Riddle 
39311d26bd1SRiver Riddle     case '\\':
39411d26bd1SRiver Riddle       // Handle explicitly a few escapes.
39511d26bd1SRiver Riddle       if (*curPtr == '"' || *curPtr == '\\' || *curPtr == 'n' ||
39611d26bd1SRiver Riddle           *curPtr == 't') {
39711d26bd1SRiver Riddle         ++curPtr;
39811d26bd1SRiver Riddle       } else if (llvm::isHexDigit(*curPtr) && llvm::isHexDigit(curPtr[1])) {
39911d26bd1SRiver Riddle         // Support \xx for two hex digits.
40011d26bd1SRiver Riddle         curPtr += 2;
40111d26bd1SRiver Riddle       } else {
40211d26bd1SRiver Riddle         return emitError(curPtr - 1, "unknown escape in string literal");
40311d26bd1SRiver Riddle       }
40411d26bd1SRiver Riddle       continue;
40511d26bd1SRiver Riddle 
40611d26bd1SRiver Riddle     default:
40711d26bd1SRiver Riddle       continue;
40811d26bd1SRiver Riddle     }
40911d26bd1SRiver Riddle   }
41011d26bd1SRiver Riddle }
411