1 //===- Lexer.h - MLIR PDLL Frontend Lexer -----------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #ifndef LIB_TOOLS_PDLL_PARSER_LEXER_H_ 10 #define LIB_TOOLS_PDLL_PARSER_LEXER_H_ 11 12 #include "mlir/Support/LLVM.h" 13 #include "llvm/ADT/StringRef.h" 14 #include "llvm/Support/SMLoc.h" 15 16 namespace llvm { 17 class SourceMgr; 18 } // namespace llvm 19 20 namespace mlir { 21 struct LogicalResult; 22 23 namespace pdll { 24 class CodeCompleteContext; 25 26 namespace ast { 27 class DiagnosticEngine; 28 } // namespace ast 29 30 //===----------------------------------------------------------------------===// 31 // Token 32 //===----------------------------------------------------------------------===// 33 34 class Token { 35 public: 36 enum Kind { 37 /// Markers. 38 eof, 39 error, 40 /// Token signifying a code completion location. 41 code_complete, 42 /// Token signifying a code completion location within a string. 43 code_complete_string, 44 45 /// Keywords. 46 KW_BEGIN, 47 /// Dependent keywords, i.e. those that are treated as keywords depending on 48 /// the current parser context. 49 KW_DEPENDENT_BEGIN, 50 kw_attr, 51 kw_op, 52 kw_type, 53 KW_DEPENDENT_END, 54 55 /// General keywords. 56 kw_Attr, 57 kw_erase, 58 kw_let, 59 kw_Constraint, 60 kw_Op, 61 kw_OpName, 62 kw_Pattern, 63 kw_replace, 64 kw_return, 65 kw_rewrite, 66 kw_Rewrite, 67 kw_Type, 68 kw_TypeRange, 69 kw_Value, 70 kw_ValueRange, 71 kw_with, 72 KW_END, 73 74 /// Punctuation. 75 arrow, 76 colon, 77 comma, 78 dot, 79 equal, 80 equal_arrow, 81 semicolon, 82 /// Paired punctuation. 83 less, 84 greater, 85 l_brace, 86 r_brace, 87 l_paren, 88 r_paren, 89 l_square, 90 r_square, 91 underscore, 92 93 /// Tokens. 94 directive, 95 identifier, 96 integer, 97 string_block, 98 string 99 }; Token(Kind kind,StringRef spelling)100 Token(Kind kind, StringRef spelling) : kind(kind), spelling(spelling) {} 101 102 /// Given a token containing a string literal, return its value, including 103 /// removing the quote characters and unescaping the contents of the string. 104 std::string getStringValue() const; 105 106 /// Returns true if the current token is a string literal. isString()107 bool isString() const { return isAny(Token::string, Token::string_block); } 108 109 /// Returns true if the current token is a keyword. isKeyword()110 bool isKeyword() const { 111 return kind > Token::KW_BEGIN && kind < Token::KW_END; 112 } 113 114 /// Returns true if the current token is a keyword in a dependent context, and 115 /// in any other situation (e.g. variable names) may be treated as an 116 /// identifier. isDependentKeyword()117 bool isDependentKeyword() const { 118 return kind > Token::KW_DEPENDENT_BEGIN && kind < Token::KW_DEPENDENT_END; 119 } 120 121 /// Return the bytes that make up this token. getSpelling()122 StringRef getSpelling() const { return spelling; } 123 124 /// Return the kind of this token. getKind()125 Kind getKind() const { return kind; } 126 127 /// Return true if this token is one of the specified kinds. isAny(Kind k1,Kind k2)128 bool isAny(Kind k1, Kind k2) const { return is(k1) || is(k2); } 129 template <typename... T> isAny(Kind k1,Kind k2,Kind k3,T...others)130 bool isAny(Kind k1, Kind k2, Kind k3, T... others) const { 131 return is(k1) || isAny(k2, k3, others...); 132 } 133 134 /// Return if the token does not have the given kind. isNot(Kind k)135 bool isNot(Kind k) const { return k != kind; } 136 template <typename... T> isNot(Kind k1,Kind k2,T...others)137 bool isNot(Kind k1, Kind k2, T... others) const { 138 return !isAny(k1, k2, others...); 139 } 140 141 /// Return if the token has the given kind. is(Kind k)142 bool is(Kind k) const { return kind == k; } 143 144 /// Return a location for the start of this token. getStartLoc()145 SMLoc getStartLoc() const { return SMLoc::getFromPointer(spelling.data()); } 146 /// Return a location at the end of this token. getEndLoc()147 SMLoc getEndLoc() const { 148 return SMLoc::getFromPointer(spelling.data() + spelling.size()); 149 } 150 /// Return a location for the range of this token. getLoc()151 SMRange getLoc() const { return SMRange(getStartLoc(), getEndLoc()); } 152 153 private: 154 /// Discriminator that indicates the kind of token this is. 155 Kind kind; 156 157 /// A reference to the entire token contents; this is always a pointer into 158 /// a memory buffer owned by the source manager. 159 StringRef spelling; 160 }; 161 162 //===----------------------------------------------------------------------===// 163 // Lexer 164 //===----------------------------------------------------------------------===// 165 166 class Lexer { 167 public: 168 Lexer(llvm::SourceMgr &mgr, ast::DiagnosticEngine &diagEngine, 169 CodeCompleteContext *codeCompleteContext); 170 ~Lexer(); 171 172 /// Return a reference to the source manager used by the lexer. getSourceMgr()173 llvm::SourceMgr &getSourceMgr() { return srcMgr; } 174 175 /// Return a reference to the diagnostic engine used by the lexer. getDiagEngine()176 ast::DiagnosticEngine &getDiagEngine() { return diagEngine; } 177 178 /// Push an include of the given file. This will cause the lexer to start 179 /// processing the provided file. Returns failure if the file could not be 180 /// opened, success otherwise. 181 LogicalResult pushInclude(StringRef filename, SMRange includeLoc); 182 183 /// Lex the next token and return it. 184 Token lexToken(); 185 186 /// Change the position of the lexer cursor. The next token we lex will start 187 /// at the designated point in the input. resetPointer(const char * newPointer)188 void resetPointer(const char *newPointer) { curPtr = newPointer; } 189 190 /// Emit an error to the lexer with the given location and message. 191 Token emitError(SMRange loc, const Twine &msg); 192 Token emitError(const char *loc, const Twine &msg); 193 Token emitErrorAndNote(SMRange loc, const Twine &msg, SMRange noteLoc, 194 const Twine ¬e); 195 196 private: formToken(Token::Kind kind,const char * tokStart)197 Token formToken(Token::Kind kind, const char *tokStart) { 198 return Token(kind, StringRef(tokStart, curPtr - tokStart)); 199 } 200 201 /// Return the next character in the stream. 202 int getNextChar(); 203 204 /// Lex methods. 205 void lexComment(); 206 Token lexDirective(const char *tokStart); 207 Token lexIdentifier(const char *tokStart); 208 Token lexNumber(const char *tokStart); 209 Token lexString(const char *tokStart, bool isStringBlock); 210 211 llvm::SourceMgr &srcMgr; 212 int curBufferID; 213 StringRef curBuffer; 214 const char *curPtr; 215 216 /// The engine used to emit diagnostics during lexing/parsing. 217 ast::DiagnosticEngine &diagEngine; 218 219 /// A flag indicating if we added a default diagnostic handler to the provided 220 /// diagEngine. 221 bool addedHandlerToDiagEngine; 222 223 /// The optional code completion point within the input file. 224 const char *codeCompletionLocation; 225 }; 226 } // namespace pdll 227 } // namespace mlir 228 229 #endif // LIB_TOOLS_PDLL_PARSER_LEXER_H_ 230