111d26bd1SRiver Riddle //===- Lexer.cpp ----------------------------------------------------------===//
211d26bd1SRiver Riddle //
311d26bd1SRiver Riddle // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
411d26bd1SRiver Riddle // See https://llvm.org/LICENSE.txt for license information.
511d26bd1SRiver Riddle // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
611d26bd1SRiver Riddle //
711d26bd1SRiver Riddle //===----------------------------------------------------------------------===//
811d26bd1SRiver Riddle
911d26bd1SRiver Riddle #include "Lexer.h"
1011d26bd1SRiver Riddle #include "mlir/Support/LogicalResult.h"
1111d26bd1SRiver Riddle #include "mlir/Tools/PDLL/AST/Diagnostic.h"
12008de486SRiver Riddle #include "mlir/Tools/PDLL/Parser/CodeComplete.h"
1311d26bd1SRiver Riddle #include "llvm/ADT/StringExtras.h"
1411d26bd1SRiver Riddle #include "llvm/ADT/StringSwitch.h"
1511d26bd1SRiver Riddle #include "llvm/Support/SourceMgr.h"
1611d26bd1SRiver Riddle
1711d26bd1SRiver Riddle using namespace mlir;
1811d26bd1SRiver Riddle using namespace mlir::pdll;
1911d26bd1SRiver Riddle
2011d26bd1SRiver Riddle //===----------------------------------------------------------------------===//
2111d26bd1SRiver Riddle // Token
2211d26bd1SRiver Riddle //===----------------------------------------------------------------------===//
2311d26bd1SRiver Riddle
getStringValue() const2411d26bd1SRiver Riddle std::string Token::getStringValue() const {
2541d2c6dfSRiver Riddle assert(getKind() == string || getKind() == string_block ||
2641d2c6dfSRiver Riddle getKind() == code_complete_string);
2711d26bd1SRiver Riddle
2811d26bd1SRiver Riddle // Start by dropping the quotes.
2941d2c6dfSRiver Riddle StringRef bytes = getSpelling();
3041d2c6dfSRiver Riddle if (is(string))
3141d2c6dfSRiver Riddle bytes = bytes.drop_front().drop_back();
3241d2c6dfSRiver Riddle else if (is(string_block))
3341d2c6dfSRiver Riddle bytes = bytes.drop_front(2).drop_back(2);
3411d26bd1SRiver Riddle
3511d26bd1SRiver Riddle std::string result;
3611d26bd1SRiver Riddle result.reserve(bytes.size());
3711d26bd1SRiver Riddle for (unsigned i = 0, e = bytes.size(); i != e;) {
3811d26bd1SRiver Riddle auto c = bytes[i++];
3911d26bd1SRiver Riddle if (c != '\\') {
4011d26bd1SRiver Riddle result.push_back(c);
4111d26bd1SRiver Riddle continue;
4211d26bd1SRiver Riddle }
4311d26bd1SRiver Riddle
4411d26bd1SRiver Riddle assert(i + 1 <= e && "invalid string should be caught by lexer");
4511d26bd1SRiver Riddle auto c1 = bytes[i++];
4611d26bd1SRiver Riddle switch (c1) {
4711d26bd1SRiver Riddle case '"':
4811d26bd1SRiver Riddle case '\\':
4911d26bd1SRiver Riddle result.push_back(c1);
5011d26bd1SRiver Riddle continue;
5111d26bd1SRiver Riddle case 'n':
5211d26bd1SRiver Riddle result.push_back('\n');
5311d26bd1SRiver Riddle continue;
5411d26bd1SRiver Riddle case 't':
5511d26bd1SRiver Riddle result.push_back('\t');
5611d26bd1SRiver Riddle continue;
5711d26bd1SRiver Riddle default:
5811d26bd1SRiver Riddle break;
5911d26bd1SRiver Riddle }
6011d26bd1SRiver Riddle
6111d26bd1SRiver Riddle assert(i + 1 <= e && "invalid string should be caught by lexer");
6211d26bd1SRiver Riddle auto c2 = bytes[i++];
6311d26bd1SRiver Riddle
6411d26bd1SRiver Riddle assert(llvm::isHexDigit(c1) && llvm::isHexDigit(c2) && "invalid escape");
6511d26bd1SRiver Riddle result.push_back((llvm::hexDigitValue(c1) << 4) | llvm::hexDigitValue(c2));
6611d26bd1SRiver Riddle }
6711d26bd1SRiver Riddle
6811d26bd1SRiver Riddle return result;
6911d26bd1SRiver Riddle }
7011d26bd1SRiver Riddle
7111d26bd1SRiver Riddle //===----------------------------------------------------------------------===//
7211d26bd1SRiver Riddle // Lexer
7311d26bd1SRiver Riddle //===----------------------------------------------------------------------===//
7411d26bd1SRiver Riddle
Lexer(llvm::SourceMgr & mgr,ast::DiagnosticEngine & diagEngine,CodeCompleteContext * codeCompleteContext)75008de486SRiver Riddle Lexer::Lexer(llvm::SourceMgr &mgr, ast::DiagnosticEngine &diagEngine,
76008de486SRiver Riddle CodeCompleteContext *codeCompleteContext)
77008de486SRiver Riddle : srcMgr(mgr), diagEngine(diagEngine), addedHandlerToDiagEngine(false),
78008de486SRiver Riddle codeCompletionLocation(nullptr) {
7911d26bd1SRiver Riddle curBufferID = mgr.getMainFileID();
8011d26bd1SRiver Riddle curBuffer = srcMgr.getMemoryBuffer(curBufferID)->getBuffer();
8111d26bd1SRiver Riddle curPtr = curBuffer.begin();
8211d26bd1SRiver Riddle
83008de486SRiver Riddle // Set the code completion location if necessary.
84008de486SRiver Riddle if (codeCompleteContext) {
85008de486SRiver Riddle codeCompletionLocation =
86008de486SRiver Riddle codeCompleteContext->getCodeCompleteLoc().getPointer();
87008de486SRiver Riddle }
88008de486SRiver Riddle
8911d26bd1SRiver Riddle // If the diag engine has no handler, add a default that emits to the
9011d26bd1SRiver Riddle // SourceMgr.
9111d26bd1SRiver Riddle if (!diagEngine.getHandlerFn()) {
9211d26bd1SRiver Riddle diagEngine.setHandlerFn([&](const ast::Diagnostic &diag) {
9311d26bd1SRiver Riddle srcMgr.PrintMessage(diag.getLocation().Start, diag.getSeverity(),
9411d26bd1SRiver Riddle diag.getMessage());
9511d26bd1SRiver Riddle for (const ast::Diagnostic ¬e : diag.getNotes())
9611d26bd1SRiver Riddle srcMgr.PrintMessage(note.getLocation().Start, note.getSeverity(),
9711d26bd1SRiver Riddle note.getMessage());
9811d26bd1SRiver Riddle });
9911d26bd1SRiver Riddle addedHandlerToDiagEngine = true;
10011d26bd1SRiver Riddle }
10111d26bd1SRiver Riddle }
10211d26bd1SRiver Riddle
~Lexer()10311d26bd1SRiver Riddle Lexer::~Lexer() {
104*b7f93c28SJeff Niu if (addedHandlerToDiagEngine)
105*b7f93c28SJeff Niu diagEngine.setHandlerFn(nullptr);
10611d26bd1SRiver Riddle }
10711d26bd1SRiver Riddle
pushInclude(StringRef filename,SMRange includeLoc)10809af7fefSRiver Riddle LogicalResult Lexer::pushInclude(StringRef filename, SMRange includeLoc) {
10911d26bd1SRiver Riddle std::string includedFile;
11009af7fefSRiver Riddle int bufferID =
11109af7fefSRiver Riddle srcMgr.AddIncludeFile(filename.str(), includeLoc.End, includedFile);
11209af7fefSRiver Riddle if (!bufferID)
11309af7fefSRiver Riddle return failure();
11411d26bd1SRiver Riddle
11511d26bd1SRiver Riddle curBufferID = bufferID;
11611d26bd1SRiver Riddle curBuffer = srcMgr.getMemoryBuffer(curBufferID)->getBuffer();
11711d26bd1SRiver Riddle curPtr = curBuffer.begin();
11811d26bd1SRiver Riddle return success();
11911d26bd1SRiver Riddle }
12011d26bd1SRiver Riddle
emitError(SMRange loc,const Twine & msg)1216842ec42SRiver Riddle Token Lexer::emitError(SMRange loc, const Twine &msg) {
12211d26bd1SRiver Riddle diagEngine.emitError(loc, msg);
12311d26bd1SRiver Riddle return formToken(Token::error, loc.Start.getPointer());
12411d26bd1SRiver Riddle }
emitErrorAndNote(SMRange loc,const Twine & msg,SMRange noteLoc,const Twine & note)125*b7f93c28SJeff Niu Token Lexer::emitErrorAndNote(SMRange loc, const Twine &msg, SMRange noteLoc,
126*b7f93c28SJeff Niu const Twine ¬e) {
12711d26bd1SRiver Riddle diagEngine.emitError(loc, msg)->attachNote(note, noteLoc);
12811d26bd1SRiver Riddle return formToken(Token::error, loc.Start.getPointer());
12911d26bd1SRiver Riddle }
emitError(const char * loc,const Twine & msg)13011d26bd1SRiver Riddle Token Lexer::emitError(const char *loc, const Twine &msg) {
131*b7f93c28SJeff Niu return emitError(
132*b7f93c28SJeff Niu SMRange(SMLoc::getFromPointer(loc), SMLoc::getFromPointer(loc + 1)), msg);
13311d26bd1SRiver Riddle }
13411d26bd1SRiver Riddle
getNextChar()13511d26bd1SRiver Riddle int Lexer::getNextChar() {
13611d26bd1SRiver Riddle char curChar = *curPtr++;
13711d26bd1SRiver Riddle switch (curChar) {
13811d26bd1SRiver Riddle default:
13911d26bd1SRiver Riddle return static_cast<unsigned char>(curChar);
14011d26bd1SRiver Riddle case 0: {
14111d26bd1SRiver Riddle // A nul character in the stream is either the end of the current buffer
14211d26bd1SRiver Riddle // or a random nul in the file. Disambiguate that here.
143*b7f93c28SJeff Niu if (curPtr - 1 != curBuffer.end())
144*b7f93c28SJeff Niu return 0;
14511d26bd1SRiver Riddle
14611d26bd1SRiver Riddle // Otherwise, return end of file.
14711d26bd1SRiver Riddle --curPtr;
14811d26bd1SRiver Riddle return EOF;
14911d26bd1SRiver Riddle }
15011d26bd1SRiver Riddle case '\n':
15111d26bd1SRiver Riddle case '\r':
15211d26bd1SRiver Riddle // Handle the newline character by ignoring it and incrementing the line
15311d26bd1SRiver Riddle // count. However, be careful about 'dos style' files with \n\r in them.
15411d26bd1SRiver Riddle // Only treat a \n\r or \r\n as a single line.
15511d26bd1SRiver Riddle if ((*curPtr == '\n' || (*curPtr == '\r')) && *curPtr != curChar)
15611d26bd1SRiver Riddle ++curPtr;
15711d26bd1SRiver Riddle return '\n';
15811d26bd1SRiver Riddle }
15911d26bd1SRiver Riddle }
16011d26bd1SRiver Riddle
lexToken()16111d26bd1SRiver Riddle Token Lexer::lexToken() {
16211d26bd1SRiver Riddle while (true) {
16311d26bd1SRiver Riddle const char *tokStart = curPtr;
16411d26bd1SRiver Riddle
165008de486SRiver Riddle // Check to see if this token is at the code completion location.
166008de486SRiver Riddle if (tokStart == codeCompletionLocation)
167008de486SRiver Riddle return formToken(Token::code_complete, tokStart);
168008de486SRiver Riddle
16911d26bd1SRiver Riddle // This always consumes at least one character.
17011d26bd1SRiver Riddle int curChar = getNextChar();
17111d26bd1SRiver Riddle switch (curChar) {
17211d26bd1SRiver Riddle default:
17311d26bd1SRiver Riddle // Handle identifiers: [a-zA-Z_]
174*b7f93c28SJeff Niu if (isalpha(curChar) || curChar == '_')
175*b7f93c28SJeff Niu return lexIdentifier(tokStart);
17611d26bd1SRiver Riddle
17711d26bd1SRiver Riddle // Unknown character, emit an error.
17811d26bd1SRiver Riddle return emitError(tokStart, "unexpected character");
17911d26bd1SRiver Riddle case EOF: {
18011d26bd1SRiver Riddle // Return EOF denoting the end of lexing.
18111d26bd1SRiver Riddle Token eof = formToken(Token::eof, tokStart);
18211d26bd1SRiver Riddle
18311d26bd1SRiver Riddle // Check to see if we are in an included file.
1846842ec42SRiver Riddle SMLoc parentIncludeLoc = srcMgr.getParentIncludeLoc(curBufferID);
18511d26bd1SRiver Riddle if (parentIncludeLoc.isValid()) {
18611d26bd1SRiver Riddle curBufferID = srcMgr.FindBufferContainingLoc(parentIncludeLoc);
18711d26bd1SRiver Riddle curBuffer = srcMgr.getMemoryBuffer(curBufferID)->getBuffer();
18811d26bd1SRiver Riddle curPtr = parentIncludeLoc.getPointer();
18911d26bd1SRiver Riddle }
19011d26bd1SRiver Riddle
19111d26bd1SRiver Riddle return eof;
19211d26bd1SRiver Riddle }
19311d26bd1SRiver Riddle
19411d26bd1SRiver Riddle // Lex punctuation.
19511d26bd1SRiver Riddle case '-':
19611d26bd1SRiver Riddle if (*curPtr == '>') {
19711d26bd1SRiver Riddle ++curPtr;
19811d26bd1SRiver Riddle return formToken(Token::arrow, tokStart);
19911d26bd1SRiver Riddle }
20011d26bd1SRiver Riddle return emitError(tokStart, "unexpected character");
20111d26bd1SRiver Riddle case ':':
20211d26bd1SRiver Riddle return formToken(Token::colon, tokStart);
20311d26bd1SRiver Riddle case ',':
20411d26bd1SRiver Riddle return formToken(Token::comma, tokStart);
20511d26bd1SRiver Riddle case '.':
20611d26bd1SRiver Riddle return formToken(Token::dot, tokStart);
20711d26bd1SRiver Riddle case '=':
20811d26bd1SRiver Riddle if (*curPtr == '>') {
20911d26bd1SRiver Riddle ++curPtr;
21011d26bd1SRiver Riddle return formToken(Token::equal_arrow, tokStart);
21111d26bd1SRiver Riddle }
21211d26bd1SRiver Riddle return formToken(Token::equal, tokStart);
21311d26bd1SRiver Riddle case ';':
21411d26bd1SRiver Riddle return formToken(Token::semicolon, tokStart);
21511d26bd1SRiver Riddle case '[':
21611d26bd1SRiver Riddle if (*curPtr == '{') {
21711d26bd1SRiver Riddle ++curPtr;
21811d26bd1SRiver Riddle return lexString(tokStart, /*isStringBlock=*/true);
21911d26bd1SRiver Riddle }
22011d26bd1SRiver Riddle return formToken(Token::l_square, tokStart);
22111d26bd1SRiver Riddle case ']':
22211d26bd1SRiver Riddle return formToken(Token::r_square, tokStart);
22311d26bd1SRiver Riddle
22411d26bd1SRiver Riddle case '<':
22511d26bd1SRiver Riddle return formToken(Token::less, tokStart);
22611d26bd1SRiver Riddle case '>':
22711d26bd1SRiver Riddle return formToken(Token::greater, tokStart);
22811d26bd1SRiver Riddle case '{':
22911d26bd1SRiver Riddle return formToken(Token::l_brace, tokStart);
23011d26bd1SRiver Riddle case '}':
23111d26bd1SRiver Riddle return formToken(Token::r_brace, tokStart);
23211d26bd1SRiver Riddle case '(':
23311d26bd1SRiver Riddle return formToken(Token::l_paren, tokStart);
23411d26bd1SRiver Riddle case ')':
23511d26bd1SRiver Riddle return formToken(Token::r_paren, tokStart);
23611d26bd1SRiver Riddle case '/':
23711d26bd1SRiver Riddle if (*curPtr == '/') {
23811d26bd1SRiver Riddle lexComment();
23911d26bd1SRiver Riddle continue;
24011d26bd1SRiver Riddle }
24111d26bd1SRiver Riddle return emitError(tokStart, "unexpected character");
24211d26bd1SRiver Riddle
24311d26bd1SRiver Riddle // Ignore whitespace characters.
24411d26bd1SRiver Riddle case 0:
24511d26bd1SRiver Riddle case ' ':
24611d26bd1SRiver Riddle case '\t':
24711d26bd1SRiver Riddle case '\n':
24811d26bd1SRiver Riddle return lexToken();
24911d26bd1SRiver Riddle
25011d26bd1SRiver Riddle case '#':
25111d26bd1SRiver Riddle return lexDirective(tokStart);
25211d26bd1SRiver Riddle case '"':
25311d26bd1SRiver Riddle return lexString(tokStart, /*isStringBlock=*/false);
25411d26bd1SRiver Riddle
25511d26bd1SRiver Riddle case '0':
25611d26bd1SRiver Riddle case '1':
25711d26bd1SRiver Riddle case '2':
25811d26bd1SRiver Riddle case '3':
25911d26bd1SRiver Riddle case '4':
26011d26bd1SRiver Riddle case '5':
26111d26bd1SRiver Riddle case '6':
26211d26bd1SRiver Riddle case '7':
26311d26bd1SRiver Riddle case '8':
26411d26bd1SRiver Riddle case '9':
26511d26bd1SRiver Riddle return lexNumber(tokStart);
26611d26bd1SRiver Riddle }
26711d26bd1SRiver Riddle }
26811d26bd1SRiver Riddle }
26911d26bd1SRiver Riddle
27011d26bd1SRiver Riddle /// Skip a comment line, starting with a '//'.
lexComment()27111d26bd1SRiver Riddle void Lexer::lexComment() {
27211d26bd1SRiver Riddle // Advance over the second '/' in a '//' comment.
27311d26bd1SRiver Riddle assert(*curPtr == '/');
27411d26bd1SRiver Riddle ++curPtr;
27511d26bd1SRiver Riddle
27611d26bd1SRiver Riddle while (true) {
27711d26bd1SRiver Riddle switch (*curPtr++) {
27811d26bd1SRiver Riddle case '\n':
27911d26bd1SRiver Riddle case '\r':
28011d26bd1SRiver Riddle // Newline is end of comment.
28111d26bd1SRiver Riddle return;
28211d26bd1SRiver Riddle case 0:
28311d26bd1SRiver Riddle // If this is the end of the buffer, end the comment.
28411d26bd1SRiver Riddle if (curPtr - 1 == curBuffer.end()) {
28511d26bd1SRiver Riddle --curPtr;
28611d26bd1SRiver Riddle return;
28711d26bd1SRiver Riddle }
28811d26bd1SRiver Riddle LLVM_FALLTHROUGH;
28911d26bd1SRiver Riddle default:
29011d26bd1SRiver Riddle // Skip over other characters.
29111d26bd1SRiver Riddle break;
29211d26bd1SRiver Riddle }
29311d26bd1SRiver Riddle }
29411d26bd1SRiver Riddle }
29511d26bd1SRiver Riddle
lexDirective(const char * tokStart)29611d26bd1SRiver Riddle Token Lexer::lexDirective(const char *tokStart) {
29711d26bd1SRiver Riddle // Match the rest with an identifier regex: [0-9a-zA-Z_]*
298*b7f93c28SJeff Niu while (isalnum(*curPtr) || *curPtr == '_')
299*b7f93c28SJeff Niu ++curPtr;
30011d26bd1SRiver Riddle
30111d26bd1SRiver Riddle StringRef str(tokStart, curPtr - tokStart);
30211d26bd1SRiver Riddle return Token(Token::directive, str);
30311d26bd1SRiver Riddle }
30411d26bd1SRiver Riddle
lexIdentifier(const char * tokStart)30511d26bd1SRiver Riddle Token Lexer::lexIdentifier(const char *tokStart) {
30611d26bd1SRiver Riddle // Match the rest of the identifier regex: [0-9a-zA-Z_]*
307*b7f93c28SJeff Niu while (isalnum(*curPtr) || *curPtr == '_')
308*b7f93c28SJeff Niu ++curPtr;
30911d26bd1SRiver Riddle
31011d26bd1SRiver Riddle // Check to see if this identifier is a keyword.
31111d26bd1SRiver Riddle StringRef str(tokStart, curPtr - tokStart);
31211d26bd1SRiver Riddle Token::Kind kind = StringSwitch<Token::Kind>(str)
31311d26bd1SRiver Riddle .Case("attr", Token::kw_attr)
31411d26bd1SRiver Riddle .Case("Attr", Token::kw_Attr)
31511d26bd1SRiver Riddle .Case("erase", Token::kw_erase)
31611d26bd1SRiver Riddle .Case("let", Token::kw_let)
31711d26bd1SRiver Riddle .Case("Constraint", Token::kw_Constraint)
31811d26bd1SRiver Riddle .Case("op", Token::kw_op)
31911d26bd1SRiver Riddle .Case("Op", Token::kw_Op)
32011d26bd1SRiver Riddle .Case("OpName", Token::kw_OpName)
32111d26bd1SRiver Riddle .Case("Pattern", Token::kw_Pattern)
32211d26bd1SRiver Riddle .Case("replace", Token::kw_replace)
323faf42264SRiver Riddle .Case("return", Token::kw_return)
32411d26bd1SRiver Riddle .Case("rewrite", Token::kw_rewrite)
325faf42264SRiver Riddle .Case("Rewrite", Token::kw_Rewrite)
32611d26bd1SRiver Riddle .Case("type", Token::kw_type)
32711d26bd1SRiver Riddle .Case("Type", Token::kw_Type)
32811d26bd1SRiver Riddle .Case("TypeRange", Token::kw_TypeRange)
32911d26bd1SRiver Riddle .Case("Value", Token::kw_Value)
33011d26bd1SRiver Riddle .Case("ValueRange", Token::kw_ValueRange)
33111d26bd1SRiver Riddle .Case("with", Token::kw_with)
33211d26bd1SRiver Riddle .Case("_", Token::underscore)
33311d26bd1SRiver Riddle .Default(Token::identifier);
33411d26bd1SRiver Riddle return Token(kind, str);
33511d26bd1SRiver Riddle }
33611d26bd1SRiver Riddle
lexNumber(const char * tokStart)33711d26bd1SRiver Riddle Token Lexer::lexNumber(const char *tokStart) {
33811d26bd1SRiver Riddle assert(isdigit(curPtr[-1]));
33911d26bd1SRiver Riddle
34011d26bd1SRiver Riddle // Handle the normal decimal case.
341*b7f93c28SJeff Niu while (isdigit(*curPtr))
342*b7f93c28SJeff Niu ++curPtr;
34311d26bd1SRiver Riddle
34411d26bd1SRiver Riddle return formToken(Token::integer, tokStart);
34511d26bd1SRiver Riddle }
34611d26bd1SRiver Riddle
lexString(const char * tokStart,bool isStringBlock)34711d26bd1SRiver Riddle Token Lexer::lexString(const char *tokStart, bool isStringBlock) {
34811d26bd1SRiver Riddle while (true) {
34941d2c6dfSRiver Riddle // Check to see if there is a code completion location within the string. In
35041d2c6dfSRiver Riddle // these cases we generate a completion location and place the currently
35141d2c6dfSRiver Riddle // lexed string within the token (without the quotes). This allows for the
35241d2c6dfSRiver Riddle // parser to use the partially lexed string when computing the completion
35341d2c6dfSRiver Riddle // results.
35441d2c6dfSRiver Riddle if (curPtr == codeCompletionLocation) {
35541d2c6dfSRiver Riddle return formToken(Token::code_complete_string,
35641d2c6dfSRiver Riddle tokStart + (isStringBlock ? 2 : 1));
35741d2c6dfSRiver Riddle }
35841d2c6dfSRiver Riddle
35911d26bd1SRiver Riddle switch (*curPtr++) {
36011d26bd1SRiver Riddle case '"':
36111d26bd1SRiver Riddle // If this is a string block, we only end the string when we encounter a
36211d26bd1SRiver Riddle // `}]`.
363ebb1e900SRiver Riddle if (!isStringBlock)
364ebb1e900SRiver Riddle return formToken(Token::string, tokStart);
36511d26bd1SRiver Riddle continue;
36611d26bd1SRiver Riddle case '}':
36711d26bd1SRiver Riddle // If this is a string block, we only end the string when we encounter a
36811d26bd1SRiver Riddle // `}]`.
369ebb1e900SRiver Riddle if (!isStringBlock || *curPtr != ']')
370ebb1e900SRiver Riddle continue;
37111d26bd1SRiver Riddle ++curPtr;
37211d26bd1SRiver Riddle return formToken(Token::string_block, tokStart);
373ebb1e900SRiver Riddle case 0: {
37411d26bd1SRiver Riddle // If this is a random nul character in the middle of a string, just
37511d26bd1SRiver Riddle // include it. If it is the end of file, then it is an error.
376ebb1e900SRiver Riddle if (curPtr - 1 != curBuffer.end())
377ebb1e900SRiver Riddle continue;
378ebb1e900SRiver Riddle --curPtr;
379ebb1e900SRiver Riddle
380ebb1e900SRiver Riddle StringRef expectedEndStr = isStringBlock ? "}]" : "\"";
381ebb1e900SRiver Riddle return emitError(curPtr - 1,
382ebb1e900SRiver Riddle "expected '" + expectedEndStr + "' in string literal");
383ebb1e900SRiver Riddle }
384ebb1e900SRiver Riddle
38511d26bd1SRiver Riddle case '\n':
38611d26bd1SRiver Riddle case '\v':
38711d26bd1SRiver Riddle case '\f':
38811d26bd1SRiver Riddle // String blocks allow multiple lines.
38911d26bd1SRiver Riddle if (!isStringBlock)
39011d26bd1SRiver Riddle return emitError(curPtr - 1, "expected '\"' in string literal");
39111d26bd1SRiver Riddle continue;
39211d26bd1SRiver Riddle
39311d26bd1SRiver Riddle case '\\':
39411d26bd1SRiver Riddle // Handle explicitly a few escapes.
39511d26bd1SRiver Riddle if (*curPtr == '"' || *curPtr == '\\' || *curPtr == 'n' ||
39611d26bd1SRiver Riddle *curPtr == 't') {
39711d26bd1SRiver Riddle ++curPtr;
39811d26bd1SRiver Riddle } else if (llvm::isHexDigit(*curPtr) && llvm::isHexDigit(curPtr[1])) {
39911d26bd1SRiver Riddle // Support \xx for two hex digits.
40011d26bd1SRiver Riddle curPtr += 2;
40111d26bd1SRiver Riddle } else {
40211d26bd1SRiver Riddle return emitError(curPtr - 1, "unknown escape in string literal");
40311d26bd1SRiver Riddle }
40411d26bd1SRiver Riddle continue;
40511d26bd1SRiver Riddle
40611d26bd1SRiver Riddle default:
40711d26bd1SRiver Riddle continue;
40811d26bd1SRiver Riddle }
40911d26bd1SRiver Riddle }
41011d26bd1SRiver Riddle }
411